aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorNatanael Copa <ncopa@alpinelinux.org>2012-01-04 15:26:25 +0000
committerNatanael Copa <ncopa@alpinelinux.org>2012-01-05 15:24:16 +0000
commit10cf6cb980dbf5eec28927f4903192e7e492fba9 (patch)
tree2f2c7ed84392a1c948e3035a682077d4946749ad
parent7ebd7c164c6eb1c6a062eccf21227b5657bc65eb (diff)
downloadalpine_aports-10cf6cb980dbf5eec28927f4903192e7e492fba9.tar.bz2
alpine_aports-10cf6cb980dbf5eec28927f4903192e7e492fba9.tar.xz
alpine_aports-10cf6cb980dbf5eec28927f4903192e7e492fba9.zip
main/linux-vserver: upgrade to 3.0.15
(cherry picked from commit e51dfbabf64c99af5b97165df6da8eb93e212a0f) Conflicts: main/linux-vserver/APKBUILD main/linux-vserver/patch-3.0.15-vs2.3.2.1-unofficial.diff
-rw-r--r--main/linux-vserver/APKBUILD8
-rw-r--r--main/linux-vserver/patch-3.0.15-vs2.3.2.1-unofficial.diff26517
2 files changed, 26521 insertions, 4 deletions
diff --git a/main/linux-vserver/APKBUILD b/main/linux-vserver/APKBUILD
index c0643b7348..3dc916c246 100644
--- a/main/linux-vserver/APKBUILD
+++ b/main/linux-vserver/APKBUILD
@@ -2,7 +2,7 @@
2 2
3_flavor=vserver 3_flavor=vserver
4pkgname=linux-${_flavor} 4pkgname=linux-${_flavor}
5pkgver=3.0.14 5pkgver=3.0.15
6pkgrel=0 6pkgrel=0
7_vsver=vs2.3.2.1 7_vsver=vs2.3.2.1
8 8
@@ -21,7 +21,7 @@ _config=${config:-kernelconfig.${CARCH}}
21install= 21install=
22source="http://www.kernel.org/pub/linux/kernel/v3.0/linux-$_kernver.tar.bz2 22source="http://www.kernel.org/pub/linux/kernel/v3.0/linux-$_kernver.tar.bz2
23 http://www.kernel.org/pub/linux/kernel/v3.0/patch-$pkgver.bz2 23 http://www.kernel.org/pub/linux/kernel/v3.0/patch-$pkgver.bz2
24 http://vserver.13thfloor.at/Experimental/patch-$pkgver-$_vsver.diff 24 patch-3.0.15-vs2.3.2.1-unofficial.diff
25 kernelconfig.x86 25 kernelconfig.x86
26 kernelconfig.x86_64 26 kernelconfig.x86_64
27 " 27 "
@@ -132,7 +132,7 @@ dev() {
132} 132}
133 133
134md5sums="398e95866794def22b12dfbc15ce89c0 linux-3.0.tar.bz2 134md5sums="398e95866794def22b12dfbc15ce89c0 linux-3.0.tar.bz2
135f2745bd4dcb3267414713adff403b54c patch-3.0.14.bz2 135759f5efe7eb8e8672041c1fe388d1ebe patch-3.0.15.bz2
13614f6e261b1742121534b42cf149043e7 patch-3.0.14-vs2.3.2.1.diff 13617856f9963c56b31d18f6d7cc77ace5a patch-3.0.15-vs2.3.2.1-unofficial.diff
1374baec73eeb0ddc1148d580a6d1e29b1f kernelconfig.x86 1374baec73eeb0ddc1148d580a6d1e29b1f kernelconfig.x86
138148b84ad28167290f2f7f0420945cb92 kernelconfig.x86_64" 138148b84ad28167290f2f7f0420945cb92 kernelconfig.x86_64"
diff --git a/main/linux-vserver/patch-3.0.15-vs2.3.2.1-unofficial.diff b/main/linux-vserver/patch-3.0.15-vs2.3.2.1-unofficial.diff
new file mode 100644
index 0000000000..d2e2f0d647
--- /dev/null
+++ b/main/linux-vserver/patch-3.0.15-vs2.3.2.1-unofficial.diff
@@ -0,0 +1,26517 @@
1diff -NurpP --minimal linux-3.0.9/Documentation/vserver/debug.txt linux-3.0.9-vs2.3.2.1/Documentation/vserver/debug.txt
2--- linux-3.0.9/Documentation/vserver/debug.txt 1970-01-01 01:00:00.000000000 +0100
3+++ linux-3.0.9-vs2.3.2.1/Documentation/vserver/debug.txt 2011-06-10 22:11:24.000000000 +0200
4@@ -0,0 +1,154 @@
5+
6+debug_cvirt:
7+
8+ 2 4 "vx_map_tgid: %p/%llx: %d -> %d"
9+ "vx_rmap_tgid: %p/%llx: %d -> %d"
10+
11+debug_dlim:
12+
13+ 0 1 "ALLOC (%p,#%d)%c inode (%d)"
14+ "FREE (%p,#%d)%c inode"
15+ 1 2 "ALLOC (%p,#%d)%c %lld bytes (%d)"
16+ "FREE (%p,#%d)%c %lld bytes"
17+ 2 4 "ADJUST: %lld,%lld on %ld,%ld [mult=%d]"
18+ 3 8 "ext3_has_free_blocks(%p): %lu<%lu+1, %c, %u!=%u r=%d"
19+ "ext3_has_free_blocks(%p): free=%lu, root=%lu"
20+ "rcu_free_dl_info(%p)"
21+ 4 10 "alloc_dl_info(%p,%d) = %p"
22+ "dealloc_dl_info(%p)"
23+ "get_dl_info(%p[#%d.%d])"
24+ "put_dl_info(%p[#%d.%d])"
25+ 5 20 "alloc_dl_info(%p,%d)*"
26+ 6 40 "__hash_dl_info: %p[#%d]"
27+ "__unhash_dl_info: %p[#%d]"
28+ 7 80 "locate_dl_info(%p,#%d) = %p"
29+
30+debug_misc:
31+
32+ 0 1 "destroy_dqhash: %p [#0x%08x] c=%d"
33+ "new_dqhash: %p [#0x%08x]"
34+ "vroot[%d]_clr_dev: dev=%p[%lu,%d:%d]"
35+ "vroot[%d]_get_real_bdev: dev=%p[%lu,%d:%d]"
36+ "vroot[%d]_set_dev: dev=%p[%lu,%d:%d]"
37+ "vroot_get_real_bdev not set"
38+ 1 2 "cow_break_link(»%s«)"
39+ "temp copy »%s«"
40+ 2 4 "dentry_open(new): %p"
41+ "dentry_open(old): %p"
42+ "lookup_create(new): %p"
43+ "old path »%s«"
44+ "path_lookup(old): %d"
45+ "vfs_create(new): %d"
46+ "vfs_rename: %d"
47+ "vfs_sendfile: %d"
48+ 3 8 "fput(new_file=%p[#%d])"
49+ "fput(old_file=%p[#%d])"
50+ 4 10 "vx_info_kill(%p[#%d],%d,%d) = %d"
51+ "vx_info_kill(%p[#%d],%d,%d)*"
52+ 5 20 "vs_reboot(%p[#%d],%d)"
53+ 6 40 "dropping task %p[#%u,%u] for %p[#%u,%u]"
54+
55+debug_net:
56+
57+ 2 4 "nx_addr_conflict(%p,%p) %d.%d,%d.%d"
58+ 3 8 "inet_bind(%p) %d.%d.%d.%d, %d.%d.%d.%d, %d.%d.%d.%d"
59+ "inet_bind(%p)* %p,%p;%lx %d.%d.%d.%d"
60+ 4 10 "ip_route_connect(%p) %p,%p;%lx"
61+ 5 20 "__addr_in_socket(%p,%d.%d.%d.%d) %p:%d.%d.%d.%d %p;%lx"
62+ 6 40 "sk,egf: %p [#%d] (from %d)"
63+ "sk,egn: %p [#%d] (from %d)"
64+ "sk,req: %p [#%d] (from %d)"
65+ "sk: %p [#%d] (from %d)"
66+ "tw: %p [#%d] (from %d)"
67+ 7 80 "__sock_recvmsg: %p[%p,%p,%p;%d]:%d/%d"
68+ "__sock_sendmsg: %p[%p,%p,%p;%d]:%d/%d"
69+
70+debug_nid:
71+
72+ 0 1 "__lookup_nx_info(#%u): %p[#%u]"
73+ "alloc_nx_info(%d) = %p"
74+ "create_nx_info(%d) (dynamic rejected)"
75+ "create_nx_info(%d) = %p (already there)"
76+ "create_nx_info(%d) = %p (new)"
77+ "dealloc_nx_info(%p)"
78+ 1 2 "alloc_nx_info(%d)*"
79+ "create_nx_info(%d)*"
80+ 2 4 "get_nx_info(%p[#%d.%d])"
81+ "put_nx_info(%p[#%d.%d])"
82+ 3 8 "claim_nx_info(%p[#%d.%d.%d]) %p"
83+ "clr_nx_info(%p[#%d.%d])"
84+ "init_nx_info(%p[#%d.%d])"
85+ "release_nx_info(%p[#%d.%d.%d]) %p"
86+ "set_nx_info(%p[#%d.%d])"
87+ 4 10 "__hash_nx_info: %p[#%d]"
88+ "__nx_dynamic_id: [#%d]"
89+ "__unhash_nx_info: %p[#%d.%d.%d]"
90+ 5 20 "moved task %p into nxi:%p[#%d]"
91+ "nx_migrate_task(%p,%p[#%d.%d.%d])"
92+ "task_get_nx_info(%p)"
93+ 6 40 "nx_clear_persistent(%p[#%d])"
94+
95+debug_quota:
96+
97+ 0 1 "quota_sync_dqh(%p,%d) discard inode %p"
98+ 1 2 "quota_sync_dqh(%p,%d)"
99+ "sync_dquots(%p,%d)"
100+ "sync_dquots_dqh(%p,%d)"
101+ 3 8 "do_quotactl(%p,%d,cmd=%d,id=%d,%p)"
102+
103+debug_switch:
104+
105+ 0 1 "vc: VCMD_%02d_%d[%d], %d,%p [%d,%d,%x,%x]"
106+ 1 2 "vc: VCMD_%02d_%d[%d] = %08lx(%ld) [%d,%d]"
107+ 4 10 "%s: (%s %s) returned %s with %d"
108+
109+debug_tag:
110+
111+ 7 80 "dx_parse_tag(»%s«): %d:#%d"
112+ "dx_propagate_tag(%p[#%lu.%d]): %d,%d"
113+
114+debug_xid:
115+
116+ 0 1 "__lookup_vx_info(#%u): %p[#%u]"
117+ "alloc_vx_info(%d) = %p"
118+ "alloc_vx_info(%d)*"
119+ "create_vx_info(%d) (dynamic rejected)"
120+ "create_vx_info(%d) = %p (already there)"
121+ "create_vx_info(%d) = %p (new)"
122+ "dealloc_vx_info(%p)"
123+ "loc_vx_info(%d) = %p (found)"
124+ "loc_vx_info(%d) = %p (new)"
125+ "loc_vx_info(%d) = %p (not available)"
126+ 1 2 "create_vx_info(%d)*"
127+ "loc_vx_info(%d)*"
128+ 2 4 "get_vx_info(%p[#%d.%d])"
129+ "put_vx_info(%p[#%d.%d])"
130+ 3 8 "claim_vx_info(%p[#%d.%d.%d]) %p"
131+ "clr_vx_info(%p[#%d.%d])"
132+ "init_vx_info(%p[#%d.%d])"
133+ "release_vx_info(%p[#%d.%d.%d]) %p"
134+ "set_vx_info(%p[#%d.%d])"
135+ 4 10 "__hash_vx_info: %p[#%d]"
136+ "__unhash_vx_info: %p[#%d.%d.%d]"
137+ "__vx_dynamic_id: [#%d]"
138+ 5 20 "enter_vx_info(%p[#%d],%p) %p[#%d,%p]"
139+ "leave_vx_info(%p[#%d,%p]) %p[#%d,%p]"
140+ "moved task %p into vxi:%p[#%d]"
141+ "task_get_vx_info(%p)"
142+ "vx_migrate_task(%p,%p[#%d.%d])"
143+ 6 40 "vx_clear_persistent(%p[#%d])"
144+ "vx_exit_init(%p[#%d],%p[#%d,%d,%d])"
145+ "vx_set_init(%p[#%d],%p[#%d,%d,%d])"
146+ "vx_set_persistent(%p[#%d])"
147+ "vx_set_reaper(%p[#%d],%p[#%d,%d])"
148+ 7 80 "vx_child_reaper(%p[#%u,%u]) = %p[#%u,%u]"
149+
150+
151+debug_limit:
152+
153+ n 2^n "vx_acc_cres[%5d,%s,%2d]: %5d%s"
154+ "vx_cres_avail[%5d,%s,%2d]: %5ld > %5d + %5d"
155+
156+ m 2^m "vx_acc_page[%5d,%s,%2d]: %5d%s"
157+ "vx_acc_pages[%5d,%s,%2d]: %5d += %5d"
158+ "vx_pages_avail[%5d,%s,%2d]: %5ld > %5d + %5d"
159diff -NurpP --minimal linux-3.0.9/Makefile linux-3.0.9-vs2.3.2.1/Makefile
160--- linux-3.0.9/Makefile 2011-11-15 16:40:41.000000000 +0100
161+++ linux-3.0.9-vs2.3.2.1/Makefile 2011-11-17 23:18:51.000000000 +0100
162@@ -1,7 +1,7 @@
163 VERSION = 3
164 PATCHLEVEL = 0
165 SUBLEVEL = 15
166-EXTRAVERSION =
167+EXTRAVERSION = -vs2.3.2.1
168 NAME = Sneaky Weasel
169
170 # *DOCUMENTATION*
171diff -NurpP --minimal linux-3.0.9/arch/alpha/Kconfig linux-3.0.9-vs2.3.2.1/arch/alpha/Kconfig
172--- linux-3.0.9/arch/alpha/Kconfig 2011-07-22 11:17:32.000000000 +0200
173+++ linux-3.0.9-vs2.3.2.1/arch/alpha/Kconfig 2011-06-10 22:11:24.000000000 +0200
174@@ -668,6 +668,8 @@ config DUMMY_CONSOLE
175 depends on VGA_HOSE
176 default y
177
178+source "kernel/vserver/Kconfig"
179+
180 source "security/Kconfig"
181
182 source "crypto/Kconfig"
183diff -NurpP --minimal linux-3.0.9/arch/alpha/kernel/entry.S linux-3.0.9-vs2.3.2.1/arch/alpha/kernel/entry.S
184--- linux-3.0.9/arch/alpha/kernel/entry.S 2010-10-21 13:06:45.000000000 +0200
185+++ linux-3.0.9-vs2.3.2.1/arch/alpha/kernel/entry.S 2011-06-10 22:11:24.000000000 +0200
186@@ -860,24 +860,15 @@ sys_getxgid:
187 .globl sys_getxpid
188 .ent sys_getxpid
189 sys_getxpid:
190+ lda $sp, -16($sp)
191+ stq $26, 0($sp)
192 .prologue 0
193- ldq $2, TI_TASK($8)
194
195- /* See linux/kernel/timer.c sys_getppid for discussion
196- about this loop. */
197- ldq $3, TASK_GROUP_LEADER($2)
198- ldq $4, TASK_REAL_PARENT($3)
199- ldl $0, TASK_TGID($2)
200-1: ldl $1, TASK_TGID($4)
201-#ifdef CONFIG_SMP
202- mov $4, $5
203- mb
204- ldq $3, TASK_GROUP_LEADER($2)
205- ldq $4, TASK_REAL_PARENT($3)
206- cmpeq $4, $5, $5
207- beq $5, 1b
208-#endif
209- stq $1, 80($sp)
210+ lda $16, 96($sp)
211+ jsr $26, do_getxpid
212+ ldq $26, 0($sp)
213+
214+ lda $sp, 16($sp)
215 ret
216 .end sys_getxpid
217
218diff -NurpP --minimal linux-3.0.9/arch/alpha/kernel/ptrace.c linux-3.0.9-vs2.3.2.1/arch/alpha/kernel/ptrace.c
219--- linux-3.0.9/arch/alpha/kernel/ptrace.c 2011-01-05 21:48:40.000000000 +0100
220+++ linux-3.0.9-vs2.3.2.1/arch/alpha/kernel/ptrace.c 2011-06-10 22:11:24.000000000 +0200
221@@ -13,6 +13,7 @@
222 #include <linux/user.h>
223 #include <linux/security.h>
224 #include <linux/signal.h>
225+#include <linux/vs_base.h>
226
227 #include <asm/uaccess.h>
228 #include <asm/pgtable.h>
229diff -NurpP --minimal linux-3.0.9/arch/alpha/kernel/systbls.S linux-3.0.9-vs2.3.2.1/arch/alpha/kernel/systbls.S
230--- linux-3.0.9/arch/alpha/kernel/systbls.S 2011-07-22 11:17:32.000000000 +0200
231+++ linux-3.0.9-vs2.3.2.1/arch/alpha/kernel/systbls.S 2011-06-10 22:11:24.000000000 +0200
232@@ -446,7 +446,7 @@ sys_call_table:
233 .quad sys_stat64 /* 425 */
234 .quad sys_lstat64
235 .quad sys_fstat64
236- .quad sys_ni_syscall /* sys_vserver */
237+ .quad sys_vserver /* sys_vserver */
238 .quad sys_ni_syscall /* sys_mbind */
239 .quad sys_ni_syscall /* sys_get_mempolicy */
240 .quad sys_ni_syscall /* sys_set_mempolicy */
241diff -NurpP --minimal linux-3.0.9/arch/alpha/kernel/traps.c linux-3.0.9-vs2.3.2.1/arch/alpha/kernel/traps.c
242--- linux-3.0.9/arch/alpha/kernel/traps.c 2010-10-21 13:06:46.000000000 +0200
243+++ linux-3.0.9-vs2.3.2.1/arch/alpha/kernel/traps.c 2011-06-10 22:11:24.000000000 +0200
244@@ -183,7 +183,8 @@ die_if_kernel(char * str, struct pt_regs
245 #ifdef CONFIG_SMP
246 printk("CPU %d ", hard_smp_processor_id());
247 #endif
248- printk("%s(%d): %s %ld\n", current->comm, task_pid_nr(current), str, err);
249+ printk("%s(%d[#%u]): %s %ld\n", current->comm,
250+ task_pid_nr(current), current->xid, str, err);
251 dik_show_regs(regs, r9_15);
252 add_taint(TAINT_DIE);
253 dik_show_trace((unsigned long *)(regs+1));
254diff -NurpP --minimal linux-3.0.9/arch/arm/Kconfig linux-3.0.9-vs2.3.2.1/arch/arm/Kconfig
255--- linux-3.0.9/arch/arm/Kconfig 2011-11-15 16:40:41.000000000 +0100
256+++ linux-3.0.9-vs2.3.2.1/arch/arm/Kconfig 2011-10-18 13:51:13.000000000 +0200
257@@ -2049,6 +2049,8 @@ source "fs/Kconfig"
258
259 source "arch/arm/Kconfig.debug"
260
261+source "kernel/vserver/Kconfig"
262+
263 source "security/Kconfig"
264
265 source "crypto/Kconfig"
266diff -NurpP --minimal linux-3.0.9/arch/arm/kernel/calls.S linux-3.0.9-vs2.3.2.1/arch/arm/kernel/calls.S
267--- linux-3.0.9/arch/arm/kernel/calls.S 2011-07-22 11:17:32.000000000 +0200
268+++ linux-3.0.9-vs2.3.2.1/arch/arm/kernel/calls.S 2011-06-10 22:11:24.000000000 +0200
269@@ -322,7 +322,7 @@
270 /* 310 */ CALL(sys_request_key)
271 CALL(sys_keyctl)
272 CALL(ABI(sys_semtimedop, sys_oabi_semtimedop))
273-/* vserver */ CALL(sys_ni_syscall)
274+ CALL(sys_vserver)
275 CALL(sys_ioprio_set)
276 /* 315 */ CALL(sys_ioprio_get)
277 CALL(sys_inotify_init)
278diff -NurpP --minimal linux-3.0.9/arch/arm/kernel/process.c linux-3.0.9-vs2.3.2.1/arch/arm/kernel/process.c
279--- linux-3.0.9/arch/arm/kernel/process.c 2011-05-22 16:16:47.000000000 +0200
280+++ linux-3.0.9-vs2.3.2.1/arch/arm/kernel/process.c 2011-06-10 22:11:24.000000000 +0200
281@@ -315,7 +315,8 @@ void __show_regs(struct pt_regs *regs)
282 void show_regs(struct pt_regs * regs)
283 {
284 printk("\n");
285- printk("Pid: %d, comm: %20s\n", task_pid_nr(current), current->comm);
286+ printk("Pid: %d[#%u], comm: %20s\n",
287+ task_pid_nr(current), current->xid, current->comm);
288 __show_regs(regs);
289 __backtrace();
290 }
291diff -NurpP --minimal linux-3.0.9/arch/arm/kernel/traps.c linux-3.0.9-vs2.3.2.1/arch/arm/kernel/traps.c
292--- linux-3.0.9/arch/arm/kernel/traps.c 2011-07-22 11:17:32.000000000 +0200
293+++ linux-3.0.9-vs2.3.2.1/arch/arm/kernel/traps.c 2011-06-22 12:39:12.000000000 +0200
294@@ -242,8 +242,8 @@ static int __die(const char *str, int er
295
296 print_modules();
297 __show_regs(regs);
298- printk(KERN_EMERG "Process %.*s (pid: %d, stack limit = 0x%p)\n",
299- TASK_COMM_LEN, tsk->comm, task_pid_nr(tsk), thread + 1);
300+ printk(KERN_EMERG "Process %.*s (pid: %d:#%u, stack limit = 0x%p)\n",
301+ TASK_COMM_LEN, tsk->comm, task_pid_nr(tsk), tsk->xid, thread + 1);
302
303 if (!user_mode(regs) || in_interrupt()) {
304 dump_mem(KERN_EMERG, "Stack: ", regs->ARM_sp,
305diff -NurpP --minimal linux-3.0.9/arch/cris/Kconfig linux-3.0.9-vs2.3.2.1/arch/cris/Kconfig
306--- linux-3.0.9/arch/cris/Kconfig 2011-07-22 11:17:35.000000000 +0200
307+++ linux-3.0.9-vs2.3.2.1/arch/cris/Kconfig 2011-06-10 22:11:24.000000000 +0200
308@@ -678,6 +678,8 @@ source "drivers/staging/Kconfig"
309
310 source "arch/cris/Kconfig.debug"
311
312+source "kernel/vserver/Kconfig"
313+
314 source "security/Kconfig"
315
316 source "crypto/Kconfig"
317diff -NurpP --minimal linux-3.0.9/arch/frv/kernel/kernel_thread.S linux-3.0.9-vs2.3.2.1/arch/frv/kernel/kernel_thread.S
318--- linux-3.0.9/arch/frv/kernel/kernel_thread.S 2008-12-25 00:26:37.000000000 +0100
319+++ linux-3.0.9-vs2.3.2.1/arch/frv/kernel/kernel_thread.S 2011-06-10 22:11:24.000000000 +0200
320@@ -37,7 +37,7 @@ kernel_thread:
321
322 # start by forking the current process, but with shared VM
323 setlos.p #__NR_clone,gr7 ; syscall number
324- ori gr10,#CLONE_VM,gr8 ; first syscall arg [clone_flags]
325+ ori gr10,#CLONE_KT,gr8 ; first syscall arg [clone_flags]
326 sethi.p #0xe4e4,gr9 ; second syscall arg [newsp]
327 setlo #0xe4e4,gr9
328 setlos.p #0,gr10 ; third syscall arg [parent_tidptr]
329diff -NurpP --minimal linux-3.0.9/arch/h8300/Kconfig linux-3.0.9-vs2.3.2.1/arch/h8300/Kconfig
330--- linux-3.0.9/arch/h8300/Kconfig 2011-07-22 11:17:35.000000000 +0200
331+++ linux-3.0.9-vs2.3.2.1/arch/h8300/Kconfig 2011-06-10 22:11:24.000000000 +0200
332@@ -213,6 +213,8 @@ source "fs/Kconfig"
333
334 source "arch/h8300/Kconfig.debug"
335
336+source "kernel/vserver/Kconfig"
337+
338 source "security/Kconfig"
339
340 source "crypto/Kconfig"
341diff -NurpP --minimal linux-3.0.9/arch/ia64/Kconfig linux-3.0.9-vs2.3.2.1/arch/ia64/Kconfig
342--- linux-3.0.9/arch/ia64/Kconfig 2011-07-22 11:17:35.000000000 +0200
343+++ linux-3.0.9-vs2.3.2.1/arch/ia64/Kconfig 2011-06-10 22:11:24.000000000 +0200
344@@ -671,6 +671,8 @@ source "fs/Kconfig"
345
346 source "arch/ia64/Kconfig.debug"
347
348+source "kernel/vserver/Kconfig"
349+
350 source "security/Kconfig"
351
352 source "crypto/Kconfig"
353diff -NurpP --minimal linux-3.0.9/arch/ia64/include/asm/tlb.h linux-3.0.9-vs2.3.2.1/arch/ia64/include/asm/tlb.h
354--- linux-3.0.9/arch/ia64/include/asm/tlb.h 2011-07-22 11:17:35.000000000 +0200
355+++ linux-3.0.9-vs2.3.2.1/arch/ia64/include/asm/tlb.h 2011-06-10 22:11:24.000000000 +0200
356@@ -40,6 +40,7 @@
357 #include <linux/mm.h>
358 #include <linux/pagemap.h>
359 #include <linux/swap.h>
360+#include <linux/vs_memory.h>
361
362 #include <asm/pgalloc.h>
363 #include <asm/processor.h>
364diff -NurpP --minimal linux-3.0.9/arch/ia64/kernel/entry.S linux-3.0.9-vs2.3.2.1/arch/ia64/kernel/entry.S
365--- linux-3.0.9/arch/ia64/kernel/entry.S 2011-07-22 11:17:35.000000000 +0200
366+++ linux-3.0.9-vs2.3.2.1/arch/ia64/kernel/entry.S 2011-06-10 22:11:24.000000000 +0200
367@@ -1714,7 +1714,7 @@ sys_call_table:
368 data8 sys_mq_notify
369 data8 sys_mq_getsetattr
370 data8 sys_kexec_load
371- data8 sys_ni_syscall // reserved for vserver
372+ data8 sys_vserver
373 data8 sys_waitid // 1270
374 data8 sys_add_key
375 data8 sys_request_key
376diff -NurpP --minimal linux-3.0.9/arch/ia64/kernel/perfmon.c linux-3.0.9-vs2.3.2.1/arch/ia64/kernel/perfmon.c
377--- linux-3.0.9/arch/ia64/kernel/perfmon.c 2011-03-15 18:06:39.000000000 +0100
378+++ linux-3.0.9-vs2.3.2.1/arch/ia64/kernel/perfmon.c 2011-06-10 22:11:24.000000000 +0200
379@@ -42,6 +42,7 @@
380 #include <linux/completion.h>
381 #include <linux/tracehook.h>
382 #include <linux/slab.h>
383+#include <linux/vs_memory.h>
384
385 #include <asm/errno.h>
386 #include <asm/intrinsics.h>
387diff -NurpP --minimal linux-3.0.9/arch/ia64/kernel/process.c linux-3.0.9-vs2.3.2.1/arch/ia64/kernel/process.c
388--- linux-3.0.9/arch/ia64/kernel/process.c 2011-03-15 18:06:39.000000000 +0100
389+++ linux-3.0.9-vs2.3.2.1/arch/ia64/kernel/process.c 2011-06-10 22:11:24.000000000 +0200
390@@ -109,8 +109,8 @@ show_regs (struct pt_regs *regs)
391 unsigned long ip = regs->cr_iip + ia64_psr(regs)->ri;
392
393 print_modules();
394- printk("\nPid: %d, CPU %d, comm: %20s\n", task_pid_nr(current),
395- smp_processor_id(), current->comm);
396+ printk("\nPid: %d[#%u], CPU %d, comm: %20s\n", task_pid_nr(current),
397+ current->xid, smp_processor_id(), current->comm);
398 printk("psr : %016lx ifs : %016lx ip : [<%016lx>] %s (%s)\n",
399 regs->cr_ipsr, regs->cr_ifs, ip, print_tainted(),
400 init_utsname()->release);
401diff -NurpP --minimal linux-3.0.9/arch/ia64/kernel/ptrace.c linux-3.0.9-vs2.3.2.1/arch/ia64/kernel/ptrace.c
402--- linux-3.0.9/arch/ia64/kernel/ptrace.c 2011-01-05 21:48:59.000000000 +0100
403+++ linux-3.0.9-vs2.3.2.1/arch/ia64/kernel/ptrace.c 2011-06-10 22:11:24.000000000 +0200
404@@ -21,6 +21,7 @@
405 #include <linux/regset.h>
406 #include <linux/elf.h>
407 #include <linux/tracehook.h>
408+#include <linux/vs_base.h>
409
410 #include <asm/pgtable.h>
411 #include <asm/processor.h>
412diff -NurpP --minimal linux-3.0.9/arch/ia64/kernel/traps.c linux-3.0.9-vs2.3.2.1/arch/ia64/kernel/traps.c
413--- linux-3.0.9/arch/ia64/kernel/traps.c 2010-07-07 18:31:01.000000000 +0200
414+++ linux-3.0.9-vs2.3.2.1/arch/ia64/kernel/traps.c 2011-06-10 22:11:24.000000000 +0200
415@@ -59,8 +59,9 @@ die (const char *str, struct pt_regs *re
416 put_cpu();
417
418 if (++die.lock_owner_depth < 3) {
419- printk("%s[%d]: %s %ld [%d]\n",
420- current->comm, task_pid_nr(current), str, err, ++die_counter);
421+ printk("%s[%d[#%u]]: %s %ld [%d]\n",
422+ current->comm, task_pid_nr(current), current->xid,
423+ str, err, ++die_counter);
424 if (notify_die(DIE_OOPS, str, regs, err, 255, SIGSEGV)
425 != NOTIFY_STOP)
426 show_regs(regs);
427@@ -323,8 +324,9 @@ handle_fpu_swa (int fp_fault, struct pt_
428 if ((last.count & 15) < 5 && (ia64_fetchadd(1, &last.count, acq) & 15) < 5) {
429 last.time = current_jiffies + 5 * HZ;
430 printk(KERN_WARNING
431- "%s(%d): floating-point assist fault at ip %016lx, isr %016lx\n",
432- current->comm, task_pid_nr(current), regs->cr_iip + ia64_psr(regs)->ri, isr);
433+ "%s(%d[#%u]): floating-point assist fault at ip %016lx, isr %016lx\n",
434+ current->comm, task_pid_nr(current), current->xid,
435+ regs->cr_iip + ia64_psr(regs)->ri, isr);
436 }
437 }
438 }
439diff -NurpP --minimal linux-3.0.9/arch/ia64/mm/fault.c linux-3.0.9-vs2.3.2.1/arch/ia64/mm/fault.c
440--- linux-3.0.9/arch/ia64/mm/fault.c 2011-07-22 11:17:35.000000000 +0200
441+++ linux-3.0.9-vs2.3.2.1/arch/ia64/mm/fault.c 2011-06-10 22:28:23.000000000 +0200
442@@ -11,6 +11,7 @@
443 #include <linux/kprobes.h>
444 #include <linux/kdebug.h>
445 #include <linux/prefetch.h>
446+#include <linux/vs_memory.h>
447
448 #include <asm/pgtable.h>
449 #include <asm/processor.h>
450diff -NurpP --minimal linux-3.0.9/arch/m32r/kernel/traps.c linux-3.0.9-vs2.3.2.1/arch/m32r/kernel/traps.c
451--- linux-3.0.9/arch/m32r/kernel/traps.c 2009-12-03 20:01:57.000000000 +0100
452+++ linux-3.0.9-vs2.3.2.1/arch/m32r/kernel/traps.c 2011-06-10 22:11:24.000000000 +0200
453@@ -196,8 +196,9 @@ static void show_registers(struct pt_reg
454 } else {
455 printk("SPI: %08lx\n", sp);
456 }
457- printk("Process %s (pid: %d, process nr: %d, stackpage=%08lx)",
458- current->comm, task_pid_nr(current), 0xffff & i, 4096+(unsigned long)current);
459+ printk("Process %s (pid: %d[#%u], process nr: %d, stackpage=%08lx)",
460+ current->comm, task_pid_nr(current), current->xid,
461+ 0xffff & i, 4096+(unsigned long)current);
462
463 /*
464 * When in-kernel, we also print out the stack and code at the
465diff -NurpP --minimal linux-3.0.9/arch/m68k/Kconfig linux-3.0.9-vs2.3.2.1/arch/m68k/Kconfig
466--- linux-3.0.9/arch/m68k/Kconfig 2011-07-22 11:17:35.000000000 +0200
467+++ linux-3.0.9-vs2.3.2.1/arch/m68k/Kconfig 2011-06-10 22:11:24.000000000 +0200
468@@ -241,6 +241,8 @@ source "fs/Kconfig"
469
470 source "arch/m68k/Kconfig.debug"
471
472+source "kernel/vserver/Kconfig"
473+
474 source "security/Kconfig"
475
476 source "crypto/Kconfig"
477diff -NurpP --minimal linux-3.0.9/arch/mips/Kconfig linux-3.0.9-vs2.3.2.1/arch/mips/Kconfig
478--- linux-3.0.9/arch/mips/Kconfig 2011-07-22 11:17:35.000000000 +0200
479+++ linux-3.0.9-vs2.3.2.1/arch/mips/Kconfig 2011-06-10 22:11:24.000000000 +0200
480@@ -2485,6 +2485,8 @@ source "fs/Kconfig"
481
482 source "arch/mips/Kconfig.debug"
483
484+source "kernel/vserver/Kconfig"
485+
486 source "security/Kconfig"
487
488 source "crypto/Kconfig"
489diff -NurpP --minimal linux-3.0.9/arch/mips/kernel/ptrace.c linux-3.0.9-vs2.3.2.1/arch/mips/kernel/ptrace.c
490--- linux-3.0.9/arch/mips/kernel/ptrace.c 2011-07-22 11:17:36.000000000 +0200
491+++ linux-3.0.9-vs2.3.2.1/arch/mips/kernel/ptrace.c 2011-06-10 22:11:24.000000000 +0200
492@@ -25,6 +25,7 @@
493 #include <linux/security.h>
494 #include <linux/audit.h>
495 #include <linux/seccomp.h>
496+#include <linux/vs_base.h>
497
498 #include <asm/byteorder.h>
499 #include <asm/cpu.h>
500@@ -263,6 +264,9 @@ long arch_ptrace(struct task_struct *chi
501 void __user *datavp = (void __user *) data;
502 unsigned long __user *datalp = (void __user *) data;
503
504+ if (!vx_check(vx_task_xid(child), VS_WATCH_P | VS_IDENT))
505+ goto out;
506+
507 switch (request) {
508 /* when I and D space are separate, these will need to be fixed. */
509 case PTRACE_PEEKTEXT: /* read word at location addr. */
510diff -NurpP --minimal linux-3.0.9/arch/mips/kernel/scall32-o32.S linux-3.0.9-vs2.3.2.1/arch/mips/kernel/scall32-o32.S
511--- linux-3.0.9/arch/mips/kernel/scall32-o32.S 2011-07-22 11:17:36.000000000 +0200
512+++ linux-3.0.9-vs2.3.2.1/arch/mips/kernel/scall32-o32.S 2011-06-10 22:11:24.000000000 +0200
513@@ -523,7 +523,7 @@ einval: li v0, -ENOSYS
514 sys sys_mq_timedreceive 5
515 sys sys_mq_notify 2 /* 4275 */
516 sys sys_mq_getsetattr 3
517- sys sys_ni_syscall 0 /* sys_vserver */
518+ sys sys_vserver 3
519 sys sys_waitid 5
520 sys sys_ni_syscall 0 /* available, was setaltroot */
521 sys sys_add_key 5 /* 4280 */
522diff -NurpP --minimal linux-3.0.9/arch/mips/kernel/scall64-64.S linux-3.0.9-vs2.3.2.1/arch/mips/kernel/scall64-64.S
523--- linux-3.0.9/arch/mips/kernel/scall64-64.S 2011-07-22 11:17:36.000000000 +0200
524+++ linux-3.0.9-vs2.3.2.1/arch/mips/kernel/scall64-64.S 2011-06-10 22:11:24.000000000 +0200
525@@ -362,7 +362,7 @@ sys_call_table:
526 PTR sys_mq_timedreceive
527 PTR sys_mq_notify
528 PTR sys_mq_getsetattr /* 5235 */
529- PTR sys_ni_syscall /* sys_vserver */
530+ PTR sys_vserver
531 PTR sys_waitid
532 PTR sys_ni_syscall /* available, was setaltroot */
533 PTR sys_add_key
534diff -NurpP --minimal linux-3.0.9/arch/mips/kernel/scall64-n32.S linux-3.0.9-vs2.3.2.1/arch/mips/kernel/scall64-n32.S
535--- linux-3.0.9/arch/mips/kernel/scall64-n32.S 2011-07-22 11:17:36.000000000 +0200
536+++ linux-3.0.9-vs2.3.2.1/arch/mips/kernel/scall64-n32.S 2011-06-10 22:11:24.000000000 +0200
537@@ -361,7 +361,7 @@ EXPORT(sysn32_call_table)
538 PTR compat_sys_mq_timedreceive
539 PTR compat_sys_mq_notify
540 PTR compat_sys_mq_getsetattr
541- PTR sys_ni_syscall /* 6240, sys_vserver */
542+ PTR sys32_vserver /* 6240 */
543 PTR compat_sys_waitid
544 PTR sys_ni_syscall /* available, was setaltroot */
545 PTR sys_add_key
546diff -NurpP --minimal linux-3.0.9/arch/mips/kernel/scall64-o32.S linux-3.0.9-vs2.3.2.1/arch/mips/kernel/scall64-o32.S
547--- linux-3.0.9/arch/mips/kernel/scall64-o32.S 2011-07-22 11:17:36.000000000 +0200
548+++ linux-3.0.9-vs2.3.2.1/arch/mips/kernel/scall64-o32.S 2011-06-10 22:11:24.000000000 +0200
549@@ -480,7 +480,7 @@ sys_call_table:
550 PTR compat_sys_mq_timedreceive
551 PTR compat_sys_mq_notify /* 4275 */
552 PTR compat_sys_mq_getsetattr
553- PTR sys_ni_syscall /* sys_vserver */
554+ PTR sys32_vserver
555 PTR sys_32_waitid
556 PTR sys_ni_syscall /* available, was setaltroot */
557 PTR sys_add_key /* 4280 */
558diff -NurpP --minimal linux-3.0.9/arch/mips/kernel/traps.c linux-3.0.9-vs2.3.2.1/arch/mips/kernel/traps.c
559--- linux-3.0.9/arch/mips/kernel/traps.c 2011-05-22 16:17:00.000000000 +0200
560+++ linux-3.0.9-vs2.3.2.1/arch/mips/kernel/traps.c 2011-06-10 22:11:24.000000000 +0200
561@@ -343,9 +343,10 @@ void show_registers(struct pt_regs *regs
562
563 __show_regs(regs);
564 print_modules();
565- printk("Process %s (pid: %d, threadinfo=%p, task=%p, tls=%0*lx)\n",
566- current->comm, current->pid, current_thread_info(), current,
567- field, current_thread_info()->tp_value);
568+ printk("Process %s (pid: %d:#%u, threadinfo=%p, task=%p, tls=%0*lx)\n",
569+ current->comm, task_pid_nr(current), current->xid,
570+ current_thread_info(), current,
571+ field, current_thread_info()->tp_value);
572 if (cpu_has_userlocal) {
573 unsigned long tls;
574
575diff -NurpP --minimal linux-3.0.9/arch/parisc/Kconfig linux-3.0.9-vs2.3.2.1/arch/parisc/Kconfig
576--- linux-3.0.9/arch/parisc/Kconfig 2011-07-22 11:17:36.000000000 +0200
577+++ linux-3.0.9-vs2.3.2.1/arch/parisc/Kconfig 2011-06-10 22:11:24.000000000 +0200
578@@ -279,6 +279,8 @@ source "fs/Kconfig"
579
580 source "arch/parisc/Kconfig.debug"
581
582+source "kernel/vserver/Kconfig"
583+
584 source "security/Kconfig"
585
586 source "crypto/Kconfig"
587diff -NurpP --minimal linux-3.0.9/arch/parisc/kernel/syscall_table.S linux-3.0.9-vs2.3.2.1/arch/parisc/kernel/syscall_table.S
588--- linux-3.0.9/arch/parisc/kernel/syscall_table.S 2011-11-15 16:40:42.000000000 +0100
589+++ linux-3.0.9-vs2.3.2.1/arch/parisc/kernel/syscall_table.S 2011-08-29 03:45:07.000000000 +0200
590@@ -361,7 +361,7 @@
591 ENTRY_COMP(mbind) /* 260 */
592 ENTRY_COMP(get_mempolicy)
593 ENTRY_COMP(set_mempolicy)
594- ENTRY_SAME(ni_syscall) /* 263: reserved for vserver */
595+ ENTRY_DIFF(vserver)
596 ENTRY_SAME(add_key)
597 ENTRY_SAME(request_key) /* 265 */
598 ENTRY_SAME(keyctl)
599diff -NurpP --minimal linux-3.0.9/arch/parisc/kernel/traps.c linux-3.0.9-vs2.3.2.1/arch/parisc/kernel/traps.c
600--- linux-3.0.9/arch/parisc/kernel/traps.c 2009-09-10 15:25:40.000000000 +0200
601+++ linux-3.0.9-vs2.3.2.1/arch/parisc/kernel/traps.c 2011-06-10 22:11:24.000000000 +0200
602@@ -236,8 +236,9 @@ void die_if_kernel(char *str, struct pt_
603 if (err == 0)
604 return; /* STFU */
605
606- printk(KERN_CRIT "%s (pid %d): %s (code %ld) at " RFMT "\n",
607- current->comm, task_pid_nr(current), str, err, regs->iaoq[0]);
608+ printk(KERN_CRIT "%s (pid %d:#%u): %s (code %ld) at " RFMT "\n",
609+ current->comm, task_pid_nr(current), current->xid,
610+ str, err, regs->iaoq[0]);
611 #ifdef PRINT_USER_FAULTS
612 /* XXX for debugging only */
613 show_regs(regs);
614@@ -270,8 +271,8 @@ void die_if_kernel(char *str, struct pt_
615 pdc_console_restart();
616
617 if (err)
618- printk(KERN_CRIT "%s (pid %d): %s (code %ld)\n",
619- current->comm, task_pid_nr(current), str, err);
620+ printk(KERN_CRIT "%s (pid %d:#%u): %s (code %ld)\n",
621+ current->comm, task_pid_nr(current), current->xid, str, err);
622
623 /* Wot's wrong wif bein' racy? */
624 if (current->thread.flags & PARISC_KERNEL_DEATH) {
625diff -NurpP --minimal linux-3.0.9/arch/parisc/mm/fault.c linux-3.0.9-vs2.3.2.1/arch/parisc/mm/fault.c
626--- linux-3.0.9/arch/parisc/mm/fault.c 2010-08-02 16:52:06.000000000 +0200
627+++ linux-3.0.9-vs2.3.2.1/arch/parisc/mm/fault.c 2011-06-10 22:11:24.000000000 +0200
628@@ -237,8 +237,9 @@ bad_area:
629
630 #ifdef PRINT_USER_FAULTS
631 printk(KERN_DEBUG "\n");
632- printk(KERN_DEBUG "do_page_fault() pid=%d command='%s' type=%lu address=0x%08lx\n",
633- task_pid_nr(tsk), tsk->comm, code, address);
634+ printk(KERN_DEBUG "do_page_fault() pid=%d:#%u "
635+ "command='%s' type=%lu address=0x%08lx\n",
636+ task_pid_nr(tsk), tsk->xid, tsk->comm, code, address);
637 if (vma) {
638 printk(KERN_DEBUG "vm_start = 0x%08lx, vm_end = 0x%08lx\n",
639 vma->vm_start, vma->vm_end);
640diff -NurpP --minimal linux-3.0.9/arch/powerpc/Kconfig linux-3.0.9-vs2.3.2.1/arch/powerpc/Kconfig
641--- linux-3.0.9/arch/powerpc/Kconfig 2011-07-22 11:17:36.000000000 +0200
642+++ linux-3.0.9-vs2.3.2.1/arch/powerpc/Kconfig 2011-06-10 22:11:24.000000000 +0200
643@@ -978,6 +978,8 @@ source "lib/Kconfig"
644
645 source "arch/powerpc/Kconfig.debug"
646
647+source "kernel/vserver/Kconfig"
648+
649 source "security/Kconfig"
650
651 config KEYS_COMPAT
652diff -NurpP --minimal linux-3.0.9/arch/powerpc/include/asm/unistd.h linux-3.0.9-vs2.3.2.1/arch/powerpc/include/asm/unistd.h
653--- linux-3.0.9/arch/powerpc/include/asm/unistd.h 2011-07-22 11:17:40.000000000 +0200
654+++ linux-3.0.9-vs2.3.2.1/arch/powerpc/include/asm/unistd.h 2011-06-10 22:11:24.000000000 +0200
655@@ -275,7 +275,7 @@
656 #endif
657 #define __NR_rtas 255
658 #define __NR_sys_debug_setcontext 256
659-/* Number 257 is reserved for vserver */
660+#define __NR_vserver 257
661 #define __NR_migrate_pages 258
662 #define __NR_mbind 259
663 #define __NR_get_mempolicy 260
664diff -NurpP --minimal linux-3.0.9/arch/powerpc/kernel/process.c linux-3.0.9-vs2.3.2.1/arch/powerpc/kernel/process.c
665--- linux-3.0.9/arch/powerpc/kernel/process.c 2011-07-22 11:17:40.000000000 +0200
666+++ linux-3.0.9-vs2.3.2.1/arch/powerpc/kernel/process.c 2011-06-10 22:11:24.000000000 +0200
667@@ -656,8 +656,9 @@ void show_regs(struct pt_regs * regs)
668 #else
669 printk("DAR: "REG", DSISR: %08lx\n", regs->dar, regs->dsisr);
670 #endif
671- printk("TASK = %p[%d] '%s' THREAD: %p",
672- current, task_pid_nr(current), current->comm, task_thread_info(current));
673+ printk("TASK = %p[%d,#%u] '%s' THREAD: %p",
674+ current, task_pid_nr(current), current->xid,
675+ current->comm, task_thread_info(current));
676
677 #ifdef CONFIG_SMP
678 printk(" CPU: %d", raw_smp_processor_id());
679diff -NurpP --minimal linux-3.0.9/arch/powerpc/kernel/traps.c linux-3.0.9-vs2.3.2.1/arch/powerpc/kernel/traps.c
680--- linux-3.0.9/arch/powerpc/kernel/traps.c 2011-07-22 11:17:40.000000000 +0200
681+++ linux-3.0.9-vs2.3.2.1/arch/powerpc/kernel/traps.c 2011-07-19 00:44:39.000000000 +0200
682@@ -1075,8 +1075,9 @@ void nonrecoverable_exception(struct pt_
683
684 void trace_syscall(struct pt_regs *regs)
685 {
686- printk("Task: %p(%d), PC: %08lX/%08lX, Syscall: %3ld, Result: %s%ld %s\n",
687- current, task_pid_nr(current), regs->nip, regs->link, regs->gpr[0],
688+ printk("Task: %p(%d[#%u]), PC: %08lX/%08lX, Syscall: %3ld, Result: %s%ld %s\n",
689+ current, task_pid_nr(current), current->xid,
690+ regs->nip, regs->link, regs->gpr[0],
691 regs->ccr&0x10000000?"Error=":"", regs->gpr[3], print_tainted());
692 }
693
694diff -NurpP --minimal linux-3.0.9/arch/powerpc/kernel/vdso.c linux-3.0.9-vs2.3.2.1/arch/powerpc/kernel/vdso.c
695--- linux-3.0.9/arch/powerpc/kernel/vdso.c 2011-05-22 16:17:02.000000000 +0200
696+++ linux-3.0.9-vs2.3.2.1/arch/powerpc/kernel/vdso.c 2011-06-10 22:11:24.000000000 +0200
697@@ -23,6 +23,7 @@
698 #include <linux/security.h>
699 #include <linux/bootmem.h>
700 #include <linux/memblock.h>
701+#include <linux/vs_memory.h>
702
703 #include <asm/pgtable.h>
704 #include <asm/system.h>
705diff -NurpP --minimal linux-3.0.9/arch/s390/Kconfig linux-3.0.9-vs2.3.2.1/arch/s390/Kconfig
706--- linux-3.0.9/arch/s390/Kconfig 2011-07-22 11:17:41.000000000 +0200
707+++ linux-3.0.9-vs2.3.2.1/arch/s390/Kconfig 2011-07-01 11:35:34.000000000 +0200
708@@ -628,6 +628,8 @@ source "fs/Kconfig"
709
710 source "arch/s390/Kconfig.debug"
711
712+source "kernel/vserver/Kconfig"
713+
714 source "security/Kconfig"
715
716 source "crypto/Kconfig"
717diff -NurpP --minimal linux-3.0.9/arch/s390/include/asm/tlb.h linux-3.0.9-vs2.3.2.1/arch/s390/include/asm/tlb.h
718--- linux-3.0.9/arch/s390/include/asm/tlb.h 2011-07-22 11:17:41.000000000 +0200
719+++ linux-3.0.9-vs2.3.2.1/arch/s390/include/asm/tlb.h 2011-06-15 02:40:14.000000000 +0200
720@@ -24,6 +24,8 @@
721 #include <linux/mm.h>
722 #include <linux/pagemap.h>
723 #include <linux/swap.h>
724+#include <linux/vs_memory.h>
725+
726 #include <asm/processor.h>
727 #include <asm/pgalloc.h>
728 #include <asm/tlbflush.h>
729diff -NurpP --minimal linux-3.0.9/arch/s390/include/asm/unistd.h linux-3.0.9-vs2.3.2.1/arch/s390/include/asm/unistd.h
730--- linux-3.0.9/arch/s390/include/asm/unistd.h 2011-07-22 11:17:41.000000000 +0200
731+++ linux-3.0.9-vs2.3.2.1/arch/s390/include/asm/unistd.h 2011-06-10 22:11:24.000000000 +0200
732@@ -202,7 +202,7 @@
733 #define __NR_clock_gettime (__NR_timer_create+6)
734 #define __NR_clock_getres (__NR_timer_create+7)
735 #define __NR_clock_nanosleep (__NR_timer_create+8)
736-/* Number 263 is reserved for vserver */
737+#define __NR_vserver 263
738 #define __NR_statfs64 265
739 #define __NR_fstatfs64 266
740 #define __NR_remap_file_pages 267
741diff -NurpP --minimal linux-3.0.9/arch/s390/kernel/ptrace.c linux-3.0.9-vs2.3.2.1/arch/s390/kernel/ptrace.c
742--- linux-3.0.9/arch/s390/kernel/ptrace.c 2011-11-15 16:40:42.000000000 +0100
743+++ linux-3.0.9-vs2.3.2.1/arch/s390/kernel/ptrace.c 2011-11-15 17:37:04.000000000 +0100
744@@ -20,6 +20,7 @@
745 #include <linux/regset.h>
746 #include <linux/tracehook.h>
747 #include <linux/seccomp.h>
748+#include <linux/vs_base.h>
749 #include <trace/syscall.h>
750 #include <asm/compat.h>
751 #include <asm/segment.h>
752diff -NurpP --minimal linux-3.0.9/arch/s390/kernel/syscalls.S linux-3.0.9-vs2.3.2.1/arch/s390/kernel/syscalls.S
753--- linux-3.0.9/arch/s390/kernel/syscalls.S 2011-07-22 11:17:41.000000000 +0200
754+++ linux-3.0.9-vs2.3.2.1/arch/s390/kernel/syscalls.S 2011-06-10 22:11:24.000000000 +0200
755@@ -271,7 +271,7 @@ SYSCALL(sys_clock_settime,sys_clock_sett
756 SYSCALL(sys_clock_gettime,sys_clock_gettime,sys32_clock_gettime_wrapper) /* 260 */
757 SYSCALL(sys_clock_getres,sys_clock_getres,sys32_clock_getres_wrapper)
758 SYSCALL(sys_clock_nanosleep,sys_clock_nanosleep,sys32_clock_nanosleep_wrapper)
759-NI_SYSCALL /* reserved for vserver */
760+SYSCALL(sys_vserver,sys_vserver,sys32_vserver)
761 SYSCALL(sys_s390_fadvise64_64,sys_ni_syscall,sys32_fadvise64_64_wrapper)
762 SYSCALL(sys_statfs64,sys_statfs64,compat_sys_statfs64_wrapper)
763 SYSCALL(sys_fstatfs64,sys_fstatfs64,compat_sys_fstatfs64_wrapper)
764diff -NurpP --minimal linux-3.0.9/arch/sh/Kconfig linux-3.0.9-vs2.3.2.1/arch/sh/Kconfig
765--- linux-3.0.9/arch/sh/Kconfig 2011-07-22 11:17:41.000000000 +0200
766+++ linux-3.0.9-vs2.3.2.1/arch/sh/Kconfig 2011-07-19 00:44:39.000000000 +0200
767@@ -893,6 +893,8 @@ source "fs/Kconfig"
768
769 source "arch/sh/Kconfig.debug"
770
771+source "kernel/vserver/Kconfig"
772+
773 source "security/Kconfig"
774
775 source "crypto/Kconfig"
776diff -NurpP --minimal linux-3.0.9/arch/sh/kernel/irq.c linux-3.0.9-vs2.3.2.1/arch/sh/kernel/irq.c
777--- linux-3.0.9/arch/sh/kernel/irq.c 2011-07-22 11:17:41.000000000 +0200
778+++ linux-3.0.9-vs2.3.2.1/arch/sh/kernel/irq.c 2011-07-19 00:45:06.000000000 +0200
779@@ -14,6 +14,7 @@
780 #include <linux/ftrace.h>
781 #include <linux/delay.h>
782 #include <linux/ratelimit.h>
783+// #include <linux/vs_context.h>
784 #include <asm/processor.h>
785 #include <asm/machvec.h>
786 #include <asm/uaccess.h>
787diff -NurpP --minimal linux-3.0.9/arch/sh/kernel/vsyscall/vsyscall.c linux-3.0.9-vs2.3.2.1/arch/sh/kernel/vsyscall/vsyscall.c
788--- linux-3.0.9/arch/sh/kernel/vsyscall/vsyscall.c 2011-05-22 16:17:07.000000000 +0200
789+++ linux-3.0.9-vs2.3.2.1/arch/sh/kernel/vsyscall/vsyscall.c 2011-06-10 22:11:24.000000000 +0200
790@@ -18,6 +18,7 @@
791 #include <linux/elf.h>
792 #include <linux/sched.h>
793 #include <linux/err.h>
794+#include <linux/vs_memory.h>
795
796 /*
797 * Should the kernel map a VDSO page into processes and pass its
798diff -NurpP --minimal linux-3.0.9/arch/sparc/Kconfig linux-3.0.9-vs2.3.2.1/arch/sparc/Kconfig
799--- linux-3.0.9/arch/sparc/Kconfig 2011-11-15 16:40:42.000000000 +0100
800+++ linux-3.0.9-vs2.3.2.1/arch/sparc/Kconfig 2011-10-18 13:51:13.000000000 +0200
801@@ -602,6 +602,8 @@ source "fs/Kconfig"
802
803 source "arch/sparc/Kconfig.debug"
804
805+source "kernel/vserver/Kconfig"
806+
807 source "security/Kconfig"
808
809 source "crypto/Kconfig"
810diff -NurpP --minimal linux-3.0.9/arch/sparc/include/asm/tlb_64.h linux-3.0.9-vs2.3.2.1/arch/sparc/include/asm/tlb_64.h
811--- linux-3.0.9/arch/sparc/include/asm/tlb_64.h 2011-07-22 11:17:42.000000000 +0200
812+++ linux-3.0.9-vs2.3.2.1/arch/sparc/include/asm/tlb_64.h 2011-06-10 22:11:24.000000000 +0200
813@@ -3,6 +3,7 @@
814
815 #include <linux/swap.h>
816 #include <linux/pagemap.h>
817+#include <linux/vs_memory.h>
818 #include <asm/pgalloc.h>
819 #include <asm/tlbflush.h>
820 #include <asm/mmu_context.h>
821diff -NurpP --minimal linux-3.0.9/arch/sparc/include/asm/unistd.h linux-3.0.9-vs2.3.2.1/arch/sparc/include/asm/unistd.h
822--- linux-3.0.9/arch/sparc/include/asm/unistd.h 2011-07-22 11:17:42.000000000 +0200
823+++ linux-3.0.9-vs2.3.2.1/arch/sparc/include/asm/unistd.h 2011-06-10 22:11:24.000000000 +0200
824@@ -335,7 +335,7 @@
825 #define __NR_timer_getoverrun 264
826 #define __NR_timer_delete 265
827 #define __NR_timer_create 266
828-/* #define __NR_vserver 267 Reserved for VSERVER */
829+#define __NR_vserver 267
830 #define __NR_io_setup 268
831 #define __NR_io_destroy 269
832 #define __NR_io_submit 270
833diff -NurpP --minimal linux-3.0.9/arch/sparc/kernel/systbls_32.S linux-3.0.9-vs2.3.2.1/arch/sparc/kernel/systbls_32.S
834--- linux-3.0.9/arch/sparc/kernel/systbls_32.S 2011-07-22 11:17:42.000000000 +0200
835+++ linux-3.0.9-vs2.3.2.1/arch/sparc/kernel/systbls_32.S 2011-06-10 22:11:24.000000000 +0200
836@@ -70,7 +70,7 @@ sys_call_table:
837 /*250*/ .long sys_mremap, sys_sysctl, sys_getsid, sys_fdatasync, sys_nfsservctl
838 /*255*/ .long sys_sync_file_range, sys_clock_settime, sys_clock_gettime, sys_clock_getres, sys_clock_nanosleep
839 /*260*/ .long sys_sched_getaffinity, sys_sched_setaffinity, sys_timer_settime, sys_timer_gettime, sys_timer_getoverrun
840-/*265*/ .long sys_timer_delete, sys_timer_create, sys_nis_syscall, sys_io_setup, sys_io_destroy
841+/*265*/ .long sys_timer_delete, sys_timer_create, sys_vserver, sys_io_setup, sys_io_destroy
842 /*270*/ .long sys_io_submit, sys_io_cancel, sys_io_getevents, sys_mq_open, sys_mq_unlink
843 /*275*/ .long sys_mq_timedsend, sys_mq_timedreceive, sys_mq_notify, sys_mq_getsetattr, sys_waitid
844 /*280*/ .long sys_tee, sys_add_key, sys_request_key, sys_keyctl, sys_openat
845diff -NurpP --minimal linux-3.0.9/arch/sparc/kernel/systbls_64.S linux-3.0.9-vs2.3.2.1/arch/sparc/kernel/systbls_64.S
846--- linux-3.0.9/arch/sparc/kernel/systbls_64.S 2011-07-22 11:17:42.000000000 +0200
847+++ linux-3.0.9-vs2.3.2.1/arch/sparc/kernel/systbls_64.S 2011-06-10 22:11:24.000000000 +0200
848@@ -71,7 +71,7 @@ sys_call_table32:
849 /*250*/ .word sys_mremap, compat_sys_sysctl, sys32_getsid, sys_fdatasync, sys32_nfsservctl
850 .word sys32_sync_file_range, compat_sys_clock_settime, compat_sys_clock_gettime, compat_sys_clock_getres, sys32_clock_nanosleep
851 /*260*/ .word compat_sys_sched_getaffinity, compat_sys_sched_setaffinity, sys32_timer_settime, compat_sys_timer_gettime, sys_timer_getoverrun
852- .word sys_timer_delete, compat_sys_timer_create, sys_ni_syscall, compat_sys_io_setup, sys_io_destroy
853+ .word sys_timer_delete, compat_sys_timer_create, sys32_vserver, compat_sys_io_setup, sys_io_destroy
854 /*270*/ .word sys32_io_submit, sys_io_cancel, compat_sys_io_getevents, sys32_mq_open, sys_mq_unlink
855 .word compat_sys_mq_timedsend, compat_sys_mq_timedreceive, compat_sys_mq_notify, compat_sys_mq_getsetattr, compat_sys_waitid
856 /*280*/ .word sys32_tee, sys_add_key, sys_request_key, sys_keyctl, compat_sys_openat
857@@ -148,7 +148,7 @@ sys_call_table:
858 /*250*/ .word sys_64_mremap, sys_sysctl, sys_getsid, sys_fdatasync, sys_nfsservctl
859 .word sys_sync_file_range, sys_clock_settime, sys_clock_gettime, sys_clock_getres, sys_clock_nanosleep
860 /*260*/ .word sys_sched_getaffinity, sys_sched_setaffinity, sys_timer_settime, sys_timer_gettime, sys_timer_getoverrun
861- .word sys_timer_delete, sys_timer_create, sys_ni_syscall, sys_io_setup, sys_io_destroy
862+ .word sys_timer_delete, sys_timer_create, sys_vserver, sys_io_setup, sys_io_destroy
863 /*270*/ .word sys_io_submit, sys_io_cancel, sys_io_getevents, sys_mq_open, sys_mq_unlink
864 .word sys_mq_timedsend, sys_mq_timedreceive, sys_mq_notify, sys_mq_getsetattr, sys_waitid
865 /*280*/ .word sys_tee, sys_add_key, sys_request_key, sys_keyctl, sys_openat
866diff -NurpP --minimal linux-3.0.9/arch/um/Kconfig.rest linux-3.0.9-vs2.3.2.1/arch/um/Kconfig.rest
867--- linux-3.0.9/arch/um/Kconfig.rest 2009-06-11 17:12:19.000000000 +0200
868+++ linux-3.0.9-vs2.3.2.1/arch/um/Kconfig.rest 2011-06-10 22:11:24.000000000 +0200
869@@ -18,6 +18,8 @@ source "drivers/connector/Kconfig"
870
871 source "fs/Kconfig"
872
873+source "kernel/vserver/Kconfig"
874+
875 source "security/Kconfig"
876
877 source "crypto/Kconfig"
878diff -NurpP --minimal linux-3.0.9/arch/um/include/asm/tlb.h linux-3.0.9-vs2.3.2.1/arch/um/include/asm/tlb.h
879--- linux-3.0.9/arch/um/include/asm/tlb.h 2011-07-22 11:17:42.000000000 +0200
880+++ linux-3.0.9-vs2.3.2.1/arch/um/include/asm/tlb.h 2011-06-10 22:11:24.000000000 +0200
881@@ -3,6 +3,7 @@
882
883 #include <linux/pagemap.h>
884 #include <linux/swap.h>
885+#include <linux/vs_memory.h>
886 #include <asm/percpu.h>
887 #include <asm/pgalloc.h>
888 #include <asm/tlbflush.h>
889diff -NurpP --minimal linux-3.0.9/arch/um/include/shared/kern_constants.h linux-3.0.9-vs2.3.2.1/arch/um/include/shared/kern_constants.h
890--- linux-3.0.9/arch/um/include/shared/kern_constants.h 1970-01-01 01:00:00.000000000 +0100
891+++ linux-3.0.9-vs2.3.2.1/arch/um/include/shared/kern_constants.h 2011-06-10 22:11:24.000000000 +0200
892@@ -0,0 +1 @@
893+#include "../../../../include/generated/asm-offsets.h"
894diff -NurpP --minimal linux-3.0.9/arch/um/include/shared/user_constants.h linux-3.0.9-vs2.3.2.1/arch/um/include/shared/user_constants.h
895--- linux-3.0.9/arch/um/include/shared/user_constants.h 1970-01-01 01:00:00.000000000 +0100
896+++ linux-3.0.9-vs2.3.2.1/arch/um/include/shared/user_constants.h 2011-06-10 22:11:24.000000000 +0200
897@@ -0,0 +1,40 @@
898+/*
899+ * DO NOT MODIFY.
900+ *
901+ * This file was generated by arch/um/Makefile
902+ *
903+ */
904+
905+#define HOST_SC_CR2 176 /* offsetof(struct sigcontext, cr2) # */
906+#define HOST_SC_ERR 152 /* offsetof(struct sigcontext, err) # */
907+#define HOST_SC_TRAPNO 160 /* offsetof(struct sigcontext, trapno) # */
908+#define HOST_FP_SIZE 64 /* sizeof(struct _fpstate) / sizeof(unsigned long) # */
909+#define HOST_RBX 5 /* RBX # */
910+#define HOST_RCX 11 /* RCX # */
911+#define HOST_RDI 14 /* RDI # */
912+#define HOST_RSI 13 /* RSI # */
913+#define HOST_RDX 12 /* RDX # */
914+#define HOST_RBP 4 /* RBP # */
915+#define HOST_RAX 10 /* RAX # */
916+#define HOST_R8 9 /* R8 # */
917+#define HOST_R9 8 /* R9 # */
918+#define HOST_R10 7 /* R10 # */
919+#define HOST_R11 6 /* R11 # */
920+#define HOST_R12 3 /* R12 # */
921+#define HOST_R13 2 /* R13 # */
922+#define HOST_R14 1 /* R14 # */
923+#define HOST_R15 0 /* R15 # */
924+#define HOST_ORIG_RAX 15 /* ORIG_RAX # */
925+#define HOST_CS 17 /* CS # */
926+#define HOST_SS 20 /* SS # */
927+#define HOST_EFLAGS 18 /* EFLAGS # */
928+#define HOST_IP 16 /* RIP # */
929+#define HOST_SP 19 /* RSP # */
930+#define UM_FRAME_SIZE 216 /* sizeof(struct user_regs_struct) # */
931+#define UM_POLLIN 1 /* POLLIN # */
932+#define UM_POLLPRI 2 /* POLLPRI # */
933+#define UM_POLLOUT 4 /* POLLOUT # */
934+#define UM_PROT_READ 1 /* PROT_READ # */
935+#define UM_PROT_WRITE 2 /* PROT_WRITE # */
936+#define UM_PROT_EXEC 4 /* PROT_EXEC # */
937+
938diff -NurpP --minimal linux-3.0.9/arch/x86/Kconfig linux-3.0.9-vs2.3.2.1/arch/x86/Kconfig
939--- linux-3.0.9/arch/x86/Kconfig 2011-07-22 11:17:42.000000000 +0200
940+++ linux-3.0.9-vs2.3.2.1/arch/x86/Kconfig 2011-07-22 11:20:39.000000000 +0200
941@@ -2159,6 +2159,8 @@ source "fs/Kconfig"
942
943 source "arch/x86/Kconfig.debug"
944
945+source "kernel/vserver/Kconfig"
946+
947 source "security/Kconfig"
948
949 source "crypto/Kconfig"
950diff -NurpP --minimal linux-3.0.9/arch/x86/ia32/ia32entry.S linux-3.0.9-vs2.3.2.1/arch/x86/ia32/ia32entry.S
951--- linux-3.0.9/arch/x86/ia32/ia32entry.S 2011-07-22 11:17:42.000000000 +0200
952+++ linux-3.0.9-vs2.3.2.1/arch/x86/ia32/ia32entry.S 2011-06-10 22:11:24.000000000 +0200
953@@ -776,7 +776,7 @@ ia32_sys_call_table:
954 .quad sys_tgkill /* 270 */
955 .quad compat_sys_utimes
956 .quad sys32_fadvise64_64
957- .quad quiet_ni_syscall /* sys_vserver */
958+ .quad sys32_vserver
959 .quad sys_mbind
960 .quad compat_sys_get_mempolicy /* 275 */
961 .quad sys_set_mempolicy
962diff -NurpP --minimal linux-3.0.9/arch/x86/include/asm/unistd_64.h linux-3.0.9-vs2.3.2.1/arch/x86/include/asm/unistd_64.h
963--- linux-3.0.9/arch/x86/include/asm/unistd_64.h 2011-07-22 11:17:43.000000000 +0200
964+++ linux-3.0.9-vs2.3.2.1/arch/x86/include/asm/unistd_64.h 2011-06-10 22:11:24.000000000 +0200
965@@ -535,7 +535,7 @@ __SYSCALL(__NR_tgkill, sys_tgkill)
966 #define __NR_utimes 235
967 __SYSCALL(__NR_utimes, sys_utimes)
968 #define __NR_vserver 236
969-__SYSCALL(__NR_vserver, sys_ni_syscall)
970+__SYSCALL(__NR_vserver, sys_vserver)
971 #define __NR_mbind 237
972 __SYSCALL(__NR_mbind, sys_mbind)
973 #define __NR_set_mempolicy 238
974diff -NurpP --minimal linux-3.0.9/arch/x86/kernel/syscall_table_32.S linux-3.0.9-vs2.3.2.1/arch/x86/kernel/syscall_table_32.S
975--- linux-3.0.9/arch/x86/kernel/syscall_table_32.S 2011-07-22 11:17:43.000000000 +0200
976+++ linux-3.0.9-vs2.3.2.1/arch/x86/kernel/syscall_table_32.S 2011-06-10 22:11:24.000000000 +0200
977@@ -272,7 +272,7 @@ ENTRY(sys_call_table)
978 .long sys_tgkill /* 270 */
979 .long sys_utimes
980 .long sys_fadvise64_64
981- .long sys_ni_syscall /* sys_vserver */
982+ .long sys_vserver
983 .long sys_mbind
984 .long sys_get_mempolicy
985 .long sys_set_mempolicy
986diff -NurpP --minimal linux-3.0.9/block/genhd.c linux-3.0.9-vs2.3.2.1/block/genhd.c
987--- linux-3.0.9/block/genhd.c 2011-11-15 16:40:42.000000000 +0100
988+++ linux-3.0.9-vs2.3.2.1/block/genhd.c 2011-11-15 17:37:04.000000000 +0100
989@@ -1162,17 +1162,17 @@ static int diskstats_show(struct seq_fil
990 cpu = part_stat_lock();
991 part_round_stats(cpu, hd);
992 part_stat_unlock();
993- seq_printf(seqf, "%4d %7d %s %lu %lu %llu "
994- "%u %lu %lu %llu %u %u %u %u\n",
995+ seq_printf(seqf, "%4d %7d %s %lu %lu %lu "
996+ "%u %lu %lu %lu %u %u %u %u\n",
997 MAJOR(part_devt(hd)), MINOR(part_devt(hd)),
998 disk_name(gp, hd->partno, buf),
999 part_stat_read(hd, ios[READ]),
1000 part_stat_read(hd, merges[READ]),
1001- (unsigned long long)part_stat_read(hd, sectors[READ]),
1002+ part_stat_read(hd, sectors[READ]),
1003 jiffies_to_msecs(part_stat_read(hd, ticks[READ])),
1004 part_stat_read(hd, ios[WRITE]),
1005 part_stat_read(hd, merges[WRITE]),
1006- (unsigned long long)part_stat_read(hd, sectors[WRITE]),
1007+ part_stat_read(hd, sectors[WRITE]),
1008 jiffies_to_msecs(part_stat_read(hd, ticks[WRITE])),
1009 part_in_flight(hd),
1010 jiffies_to_msecs(part_stat_read(hd, io_ticks)),
1011diff -NurpP --minimal linux-3.0.9/drivers/block/Kconfig linux-3.0.9-vs2.3.2.1/drivers/block/Kconfig
1012--- linux-3.0.9/drivers/block/Kconfig 2011-07-22 11:17:44.000000000 +0200
1013+++ linux-3.0.9-vs2.3.2.1/drivers/block/Kconfig 2011-06-10 22:11:24.000000000 +0200
1014@@ -273,6 +273,13 @@ config BLK_DEV_CRYPTOLOOP
1015
1016 source "drivers/block/drbd/Kconfig"
1017
1018+config BLK_DEV_VROOT
1019+ tristate "Virtual Root device support"
1020+ depends on QUOTACTL
1021+ ---help---
1022+ Saying Y here will allow you to use quota/fs ioctls on a shared
1023+ partition within a virtual server without compromising security.
1024+
1025 config BLK_DEV_NBD
1026 tristate "Network block device support"
1027 depends on NET
1028diff -NurpP --minimal linux-3.0.9/drivers/block/Makefile linux-3.0.9-vs2.3.2.1/drivers/block/Makefile
1029--- linux-3.0.9/drivers/block/Makefile 2011-07-22 11:17:44.000000000 +0200
1030+++ linux-3.0.9-vs2.3.2.1/drivers/block/Makefile 2011-06-10 22:11:24.000000000 +0200
1031@@ -34,6 +34,7 @@ obj-$(CONFIG_VIODASD) += viodasd.o
1032 obj-$(CONFIG_BLK_DEV_SX8) += sx8.o
1033 obj-$(CONFIG_BLK_DEV_UB) += ub.o
1034 obj-$(CONFIG_BLK_DEV_HD) += hd.o
1035+obj-$(CONFIG_BLK_DEV_VROOT) += vroot.o
1036
1037 obj-$(CONFIG_XEN_BLKDEV_FRONTEND) += xen-blkfront.o
1038 obj-$(CONFIG_XEN_BLKDEV_BACKEND) += xen-blkback/
1039diff -NurpP --minimal linux-3.0.9/drivers/block/loop.c linux-3.0.9-vs2.3.2.1/drivers/block/loop.c
1040--- linux-3.0.9/drivers/block/loop.c 2011-11-15 16:40:42.000000000 +0100
1041+++ linux-3.0.9-vs2.3.2.1/drivers/block/loop.c 2011-08-31 19:37:44.000000000 +0200
1042@@ -75,6 +75,7 @@
1043 #include <linux/kthread.h>
1044 #include <linux/splice.h>
1045 #include <linux/sysfs.h>
1046+#include <linux/vs_context.h>
1047
1048 #include <asm/uaccess.h>
1049
1050@@ -891,6 +892,7 @@ static int loop_set_fd(struct loop_devic
1051 lo->lo_blocksize = lo_blocksize;
1052 lo->lo_device = bdev;
1053 lo->lo_flags = lo_flags;
1054+ lo->lo_xid = vx_current_xid();
1055 lo->lo_backing_file = file;
1056 lo->transfer = transfer_none;
1057 lo->ioctl = NULL;
1058@@ -1021,6 +1023,7 @@ static int loop_clr_fd(struct loop_devic
1059 lo->lo_encrypt_key_size = 0;
1060 lo->lo_flags = 0;
1061 lo->lo_thread = NULL;
1062+ lo->lo_xid = 0;
1063 memset(lo->lo_encrypt_key, 0, LO_KEY_SIZE);
1064 memset(lo->lo_crypt_name, 0, LO_NAME_SIZE);
1065 memset(lo->lo_file_name, 0, LO_NAME_SIZE);
1066@@ -1059,7 +1062,7 @@ loop_set_status(struct loop_device *lo,
1067
1068 if (lo->lo_encrypt_key_size &&
1069 lo->lo_key_owner != uid &&
1070- !capable(CAP_SYS_ADMIN))
1071+ !vx_capable(CAP_SYS_ADMIN, VXC_ADMIN_CLOOP))
1072 return -EPERM;
1073 if (lo->lo_state != Lo_bound)
1074 return -ENXIO;
1075@@ -1143,7 +1146,8 @@ loop_get_status(struct loop_device *lo,
1076 memcpy(info->lo_crypt_name, lo->lo_crypt_name, LO_NAME_SIZE);
1077 info->lo_encrypt_type =
1078 lo->lo_encryption ? lo->lo_encryption->number : 0;
1079- if (lo->lo_encrypt_key_size && capable(CAP_SYS_ADMIN)) {
1080+ if (lo->lo_encrypt_key_size &&
1081+ vx_capable(CAP_SYS_ADMIN, VXC_ADMIN_CLOOP)) {
1082 info->lo_encrypt_key_size = lo->lo_encrypt_key_size;
1083 memcpy(info->lo_encrypt_key, lo->lo_encrypt_key,
1084 lo->lo_encrypt_key_size);
1085@@ -1489,6 +1493,9 @@ static int lo_open(struct block_device *
1086 {
1087 struct loop_device *lo = bdev->bd_disk->private_data;
1088
1089+ if (!vx_check(lo->lo_xid, VS_IDENT|VS_HOSTID|VS_ADMIN_P))
1090+ return -EACCES;
1091+
1092 mutex_lock(&lo->lo_ctl_mutex);
1093 lo->lo_refcnt++;
1094 mutex_unlock(&lo->lo_ctl_mutex);
1095diff -NurpP --minimal linux-3.0.9/drivers/block/vroot.c linux-3.0.9-vs2.3.2.1/drivers/block/vroot.c
1096--- linux-3.0.9/drivers/block/vroot.c 1970-01-01 01:00:00.000000000 +0100
1097+++ linux-3.0.9-vs2.3.2.1/drivers/block/vroot.c 2011-06-10 22:11:24.000000000 +0200
1098@@ -0,0 +1,292 @@
1099+/*
1100+ * linux/drivers/block/vroot.c
1101+ *
1102+ * written by Herbert Pötzl, 9/11/2002
1103+ * ported to 2.6.10 by Herbert Pötzl, 30/12/2004
1104+ *
1105+ * based on the loop.c code by Theodore Ts'o.
1106+ *
1107+ * Copyright (C) 2002-2007 by Herbert Pötzl.
1108+ * Redistribution of this file is permitted under the
1109+ * GNU General Public License.
1110+ *
1111+ */
1112+
1113+#include <linux/module.h>
1114+#include <linux/moduleparam.h>
1115+#include <linux/file.h>
1116+#include <linux/major.h>
1117+#include <linux/blkdev.h>
1118+#include <linux/slab.h>
1119+
1120+#include <linux/vroot.h>
1121+#include <linux/vs_context.h>
1122+
1123+
1124+static int max_vroot = 8;
1125+
1126+static struct vroot_device *vroot_dev;
1127+static struct gendisk **disks;
1128+
1129+
1130+static int vroot_set_dev(
1131+ struct vroot_device *vr,
1132+ struct block_device *bdev,
1133+ unsigned int arg)
1134+{
1135+ struct block_device *real_bdev;
1136+ struct file *file;
1137+ struct inode *inode;
1138+ int error;
1139+
1140+ error = -EBUSY;
1141+ if (vr->vr_state != Vr_unbound)
1142+ goto out;
1143+
1144+ error = -EBADF;
1145+ file = fget(arg);
1146+ if (!file)
1147+ goto out;
1148+
1149+ error = -EINVAL;
1150+ inode = file->f_dentry->d_inode;
1151+
1152+
1153+ if (S_ISBLK(inode->i_mode)) {
1154+ real_bdev = inode->i_bdev;
1155+ vr->vr_device = real_bdev;
1156+ __iget(real_bdev->bd_inode);
1157+ } else
1158+ goto out_fput;
1159+
1160+ vxdprintk(VXD_CBIT(misc, 0),
1161+ "vroot[%d]_set_dev: dev=" VXF_DEV,
1162+ vr->vr_number, VXD_DEV(real_bdev));
1163+
1164+ vr->vr_state = Vr_bound;
1165+ error = 0;
1166+
1167+ out_fput:
1168+ fput(file);
1169+ out:
1170+ return error;
1171+}
1172+
1173+static int vroot_clr_dev(
1174+ struct vroot_device *vr,
1175+ struct block_device *bdev)
1176+{
1177+ struct block_device *real_bdev;
1178+
1179+ if (vr->vr_state != Vr_bound)
1180+ return -ENXIO;
1181+ if (vr->vr_refcnt > 1) /* we needed one fd for the ioctl */
1182+ return -EBUSY;
1183+
1184+ real_bdev = vr->vr_device;
1185+
1186+ vxdprintk(VXD_CBIT(misc, 0),
1187+ "vroot[%d]_clr_dev: dev=" VXF_DEV,
1188+ vr->vr_number, VXD_DEV(real_bdev));
1189+
1190+ bdput(real_bdev);
1191+ vr->vr_state = Vr_unbound;
1192+ vr->vr_device = NULL;
1193+ return 0;
1194+}
1195+
1196+
1197+static int vr_ioctl(struct block_device *bdev, fmode_t mode,
1198+ unsigned int cmd, unsigned long arg)
1199+{
1200+ struct vroot_device *vr = bdev->bd_disk->private_data;
1201+ int err;
1202+
1203+ down(&vr->vr_ctl_mutex);
1204+ switch (cmd) {
1205+ case VROOT_SET_DEV:
1206+ err = vroot_set_dev(vr, bdev, arg);
1207+ break;
1208+ case VROOT_CLR_DEV:
1209+ err = vroot_clr_dev(vr, bdev);
1210+ break;
1211+ default:
1212+ err = -EINVAL;
1213+ break;
1214+ }
1215+ up(&vr->vr_ctl_mutex);
1216+ return err;
1217+}
1218+
1219+static int vr_open(struct block_device *bdev, fmode_t mode)
1220+{
1221+ struct vroot_device *vr = bdev->bd_disk->private_data;
1222+
1223+ down(&vr->vr_ctl_mutex);
1224+ vr->vr_refcnt++;
1225+ up(&vr->vr_ctl_mutex);
1226+ return 0;
1227+}
1228+
1229+static int vr_release(struct gendisk *disk, fmode_t mode)
1230+{
1231+ struct vroot_device *vr = disk->private_data;
1232+
1233+ down(&vr->vr_ctl_mutex);
1234+ --vr->vr_refcnt;
1235+ up(&vr->vr_ctl_mutex);
1236+ return 0;
1237+}
1238+
1239+static struct block_device_operations vr_fops = {
1240+ .owner = THIS_MODULE,
1241+ .open = vr_open,
1242+ .release = vr_release,
1243+ .ioctl = vr_ioctl,
1244+};
1245+
1246+static int vroot_make_request(struct request_queue *q, struct bio *bio)
1247+{
1248+ printk("vroot_make_request %p, %p\n", q, bio);
1249+ bio_io_error(bio);
1250+ return 0;
1251+}
1252+
1253+struct block_device *__vroot_get_real_bdev(struct block_device *bdev)
1254+{
1255+ struct inode *inode = bdev->bd_inode;
1256+ struct vroot_device *vr;
1257+ struct block_device *real_bdev;
1258+ int minor = iminor(inode);
1259+
1260+ vr = &vroot_dev[minor];
1261+ real_bdev = vr->vr_device;
1262+
1263+ vxdprintk(VXD_CBIT(misc, 0),
1264+ "vroot[%d]_get_real_bdev: dev=" VXF_DEV,
1265+ vr->vr_number, VXD_DEV(real_bdev));
1266+
1267+ if (vr->vr_state != Vr_bound)
1268+ return ERR_PTR(-ENXIO);
1269+
1270+ __iget(real_bdev->bd_inode);
1271+ return real_bdev;
1272+}
1273+
1274+
1275+
1276+/*
1277+ * And now the modules code and kernel interface.
1278+ */
1279+
1280+module_param(max_vroot, int, 0);
1281+
1282+MODULE_PARM_DESC(max_vroot, "Maximum number of vroot devices (1-256)");
1283+MODULE_LICENSE("GPL");
1284+MODULE_ALIAS_BLOCKDEV_MAJOR(VROOT_MAJOR);
1285+
1286+MODULE_AUTHOR ("Herbert Pötzl");
1287+MODULE_DESCRIPTION ("Virtual Root Device Mapper");
1288+
1289+
1290+int __init vroot_init(void)
1291+{
1292+ int err, i;
1293+
1294+ if (max_vroot < 1 || max_vroot > 256) {
1295+ max_vroot = MAX_VROOT_DEFAULT;
1296+ printk(KERN_WARNING "vroot: invalid max_vroot "
1297+ "(must be between 1 and 256), "
1298+ "using default (%d)\n", max_vroot);
1299+ }
1300+
1301+ if (register_blkdev(VROOT_MAJOR, "vroot"))
1302+ return -EIO;
1303+
1304+ err = -ENOMEM;
1305+ vroot_dev = kmalloc(max_vroot * sizeof(struct vroot_device), GFP_KERNEL);
1306+ if (!vroot_dev)
1307+ goto out_mem1;
1308+ memset(vroot_dev, 0, max_vroot * sizeof(struct vroot_device));
1309+
1310+ disks = kmalloc(max_vroot * sizeof(struct gendisk *), GFP_KERNEL);
1311+ if (!disks)
1312+ goto out_mem2;
1313+
1314+ for (i = 0; i < max_vroot; i++) {
1315+ disks[i] = alloc_disk(1);
1316+ if (!disks[i])
1317+ goto out_mem3;
1318+ disks[i]->queue = blk_alloc_queue(GFP_KERNEL);
1319+ if (!disks[i]->queue)
1320+ goto out_mem3;
1321+ blk_queue_make_request(disks[i]->queue, vroot_make_request);
1322+ }
1323+
1324+ for (i = 0; i < max_vroot; i++) {
1325+ struct vroot_device *vr = &vroot_dev[i];
1326+ struct gendisk *disk = disks[i];
1327+
1328+ memset(vr, 0, sizeof(*vr));
1329+ sema_init(&vr->vr_ctl_mutex, 1);
1330+ vr->vr_number = i;
1331+ disk->major = VROOT_MAJOR;
1332+ disk->first_minor = i;
1333+ disk->fops = &vr_fops;
1334+ sprintf(disk->disk_name, "vroot%d", i);
1335+ disk->private_data = vr;
1336+ }
1337+
1338+ err = register_vroot_grb(&__vroot_get_real_bdev);
1339+ if (err)
1340+ goto out_mem3;
1341+
1342+ for (i = 0; i < max_vroot; i++)
1343+ add_disk(disks[i]);
1344+ printk(KERN_INFO "vroot: loaded (max %d devices)\n", max_vroot);
1345+ return 0;
1346+
1347+out_mem3:
1348+ while (i--)
1349+ put_disk(disks[i]);
1350+ kfree(disks);
1351+out_mem2:
1352+ kfree(vroot_dev);
1353+out_mem1:
1354+ unregister_blkdev(VROOT_MAJOR, "vroot");
1355+ printk(KERN_ERR "vroot: ran out of memory\n");
1356+ return err;
1357+}
1358+
1359+void vroot_exit(void)
1360+{
1361+ int i;
1362+
1363+ if (unregister_vroot_grb(&__vroot_get_real_bdev))
1364+ printk(KERN_WARNING "vroot: cannot unregister grb\n");
1365+
1366+ for (i = 0; i < max_vroot; i++) {
1367+ del_gendisk(disks[i]);
1368+ put_disk(disks[i]);
1369+ }
1370+ unregister_blkdev(VROOT_MAJOR, "vroot");
1371+
1372+ kfree(disks);
1373+ kfree(vroot_dev);
1374+}
1375+
1376+module_init(vroot_init);
1377+module_exit(vroot_exit);
1378+
1379+#ifndef MODULE
1380+
1381+static int __init max_vroot_setup(char *str)
1382+{
1383+ max_vroot = simple_strtol(str, NULL, 0);
1384+ return 1;
1385+}
1386+
1387+__setup("max_vroot=", max_vroot_setup);
1388+
1389+#endif
1390+
1391diff -NurpP --minimal linux-3.0.9/drivers/infiniband/core/addr.c linux-3.0.9-vs2.3.2.1/drivers/infiniband/core/addr.c
1392--- linux-3.0.9/drivers/infiniband/core/addr.c 2011-07-22 11:17:45.000000000 +0200
1393+++ linux-3.0.9-vs2.3.2.1/drivers/infiniband/core/addr.c 2011-06-16 14:16:51.000000000 +0200
1394@@ -252,7 +252,7 @@ static int addr6_resolve(struct sockaddr
1395
1396 if (ipv6_addr_any(&fl6.saddr)) {
1397 ret = ipv6_dev_get_saddr(&init_net, ip6_dst_idev(dst)->dev,
1398- &fl6.daddr, 0, &fl6.saddr);
1399+ &fl6.daddr, 0, &fl6.saddr, NULL);
1400 if (ret)
1401 goto put;
1402
1403diff -NurpP --minimal linux-3.0.9/drivers/infiniband/hw/ipath/ipath_user_pages.c linux-3.0.9-vs2.3.2.1/drivers/infiniband/hw/ipath/ipath_user_pages.c
1404--- linux-3.0.9/drivers/infiniband/hw/ipath/ipath_user_pages.c 2011-05-22 16:17:16.000000000 +0200
1405+++ linux-3.0.9-vs2.3.2.1/drivers/infiniband/hw/ipath/ipath_user_pages.c 2011-06-10 22:11:24.000000000 +0200
1406@@ -35,6 +35,7 @@
1407 #include <linux/device.h>
1408 #include <linux/slab.h>
1409 #include <linux/sched.h>
1410+#include <linux/vs_memory.h>
1411
1412 #include "ipath_kernel.h"
1413
1414diff -NurpP --minimal linux-3.0.9/drivers/md/dm-ioctl.c linux-3.0.9-vs2.3.2.1/drivers/md/dm-ioctl.c
1415--- linux-3.0.9/drivers/md/dm-ioctl.c 2011-05-22 16:17:18.000000000 +0200
1416+++ linux-3.0.9-vs2.3.2.1/drivers/md/dm-ioctl.c 2011-06-10 22:11:24.000000000 +0200
1417@@ -16,6 +16,7 @@
1418 #include <linux/dm-ioctl.h>
1419 #include <linux/hdreg.h>
1420 #include <linux/compat.h>
1421+#include <linux/vs_context.h>
1422
1423 #include <asm/uaccess.h>
1424
1425@@ -106,7 +107,8 @@ static struct hash_cell *__get_name_cell
1426 unsigned int h = hash_str(str);
1427
1428 list_for_each_entry (hc, _name_buckets + h, name_list)
1429- if (!strcmp(hc->name, str)) {
1430+ if (vx_check(dm_get_xid(hc->md), VS_WATCH_P | VS_IDENT) &&
1431+ !strcmp(hc->name, str)) {
1432 dm_get(hc->md);
1433 return hc;
1434 }
1435@@ -120,7 +122,8 @@ static struct hash_cell *__get_uuid_cell
1436 unsigned int h = hash_str(str);
1437
1438 list_for_each_entry (hc, _uuid_buckets + h, uuid_list)
1439- if (!strcmp(hc->uuid, str)) {
1440+ if (vx_check(dm_get_xid(hc->md), VS_WATCH_P | VS_IDENT) &&
1441+ !strcmp(hc->uuid, str)) {
1442 dm_get(hc->md);
1443 return hc;
1444 }
1445@@ -427,6 +430,9 @@ typedef int (*ioctl_fn)(struct dm_ioctl
1446
1447 static int remove_all(struct dm_ioctl *param, size_t param_size)
1448 {
1449+ if (!vx_check(0, VS_ADMIN))
1450+ return -EPERM;
1451+
1452 dm_hash_remove_all(1);
1453 param->data_size = 0;
1454 return 0;
1455@@ -474,6 +480,8 @@ static int list_devices(struct dm_ioctl
1456 */
1457 for (i = 0; i < NUM_BUCKETS; i++) {
1458 list_for_each_entry (hc, _name_buckets + i, name_list) {
1459+ if (!vx_check(dm_get_xid(hc->md), VS_WATCH_P | VS_IDENT))
1460+ continue;
1461 needed += sizeof(struct dm_name_list);
1462 needed += strlen(hc->name) + 1;
1463 needed += ALIGN_MASK;
1464@@ -497,6 +505,8 @@ static int list_devices(struct dm_ioctl
1465 */
1466 for (i = 0; i < NUM_BUCKETS; i++) {
1467 list_for_each_entry (hc, _name_buckets + i, name_list) {
1468+ if (!vx_check(dm_get_xid(hc->md), VS_WATCH_P | VS_IDENT))
1469+ continue;
1470 if (old_nl)
1471 old_nl->next = (uint32_t) ((void *) nl -
1472 (void *) old_nl);
1473@@ -731,10 +741,11 @@ static struct hash_cell *__find_device_h
1474 if (!md)
1475 goto out;
1476
1477- mdptr = dm_get_mdptr(md);
1478+ if (vx_check(dm_get_xid(md), VS_WATCH_P | VS_IDENT))
1479+ mdptr = dm_get_mdptr(md);
1480+
1481 if (!mdptr)
1482 dm_put(md);
1483-
1484 out:
1485 return mdptr;
1486 }
1487@@ -1577,8 +1588,8 @@ static int ctl_ioctl(uint command, struc
1488 ioctl_fn fn = NULL;
1489 size_t input_param_size;
1490
1491- /* only root can play with this */
1492- if (!capable(CAP_SYS_ADMIN))
1493+ /* only root and certain contexts can play with this */
1494+ if (!vx_capable(CAP_SYS_ADMIN, VXC_ADMIN_MAPPER))
1495 return -EACCES;
1496
1497 if (_IOC_TYPE(command) != DM_IOCTL)
1498diff -NurpP --minimal linux-3.0.9/drivers/md/dm.c linux-3.0.9-vs2.3.2.1/drivers/md/dm.c
1499--- linux-3.0.9/drivers/md/dm.c 2011-11-15 16:40:44.000000000 +0100
1500+++ linux-3.0.9-vs2.3.2.1/drivers/md/dm.c 2011-08-08 23:04:47.000000000 +0200
1501@@ -20,6 +20,7 @@
1502 #include <linux/idr.h>
1503 #include <linux/hdreg.h>
1504 #include <linux/delay.h>
1505+#include <linux/vs_base.h>
1506
1507 #include <trace/events/block.h>
1508
1509@@ -121,6 +122,7 @@ struct mapped_device {
1510 rwlock_t map_lock;
1511 atomic_t holders;
1512 atomic_t open_count;
1513+ xid_t xid;
1514
1515 unsigned long flags;
1516
1517@@ -334,6 +336,7 @@ int dm_deleting_md(struct mapped_device
1518 static int dm_blk_open(struct block_device *bdev, fmode_t mode)
1519 {
1520 struct mapped_device *md;
1521+ int ret = -ENXIO;
1522
1523 spin_lock(&_minor_lock);
1524
1525@@ -342,18 +345,19 @@ static int dm_blk_open(struct block_devi
1526 goto out;
1527
1528 if (test_bit(DMF_FREEING, &md->flags) ||
1529- dm_deleting_md(md)) {
1530- md = NULL;
1531+ dm_deleting_md(md))
1532+ goto out;
1533+
1534+ ret = -EACCES;
1535+ if (!vx_check(md->xid, VS_IDENT|VS_HOSTID))
1536 goto out;
1537- }
1538
1539 dm_get(md);
1540 atomic_inc(&md->open_count);
1541-
1542+ ret = 0;
1543 out:
1544 spin_unlock(&_minor_lock);
1545-
1546- return md ? 0 : -ENXIO;
1547+ return ret;
1548 }
1549
1550 static int dm_blk_close(struct gendisk *disk, fmode_t mode)
1551@@ -574,6 +578,14 @@ int dm_set_geometry(struct mapped_device
1552 return 0;
1553 }
1554
1555+/*
1556+ * Get the xid associated with a dm device
1557+ */
1558+xid_t dm_get_xid(struct mapped_device *md)
1559+{
1560+ return md->xid;
1561+}
1562+
1563 /*-----------------------------------------------------------------
1564 * CRUD START:
1565 * A more elegant soln is in the works that uses the queue
1566@@ -1847,6 +1859,7 @@ static struct mapped_device *alloc_dev(i
1567 INIT_LIST_HEAD(&md->uevent_list);
1568 spin_lock_init(&md->uevent_lock);
1569
1570+ md->xid = vx_current_xid();
1571 md->queue = blk_alloc_queue(GFP_KERNEL);
1572 if (!md->queue)
1573 goto bad_queue;
1574diff -NurpP --minimal linux-3.0.9/drivers/md/dm.h linux-3.0.9-vs2.3.2.1/drivers/md/dm.h
1575--- linux-3.0.9/drivers/md/dm.h 2011-05-22 16:17:18.000000000 +0200
1576+++ linux-3.0.9-vs2.3.2.1/drivers/md/dm.h 2011-06-10 22:11:24.000000000 +0200
1577@@ -41,6 +41,8 @@ struct dm_dev_internal {
1578 struct dm_table;
1579 struct dm_md_mempools;
1580
1581+xid_t dm_get_xid(struct mapped_device *md);
1582+
1583 /*-----------------------------------------------------------------
1584 * Internal table functions.
1585 *---------------------------------------------------------------*/
1586diff -NurpP --minimal linux-3.0.9/drivers/net/tun.c linux-3.0.9-vs2.3.2.1/drivers/net/tun.c
1587--- linux-3.0.9/drivers/net/tun.c 2011-11-15 16:40:45.000000000 +0100
1588+++ linux-3.0.9-vs2.3.2.1/drivers/net/tun.c 2011-08-29 03:45:08.000000000 +0200
1589@@ -64,6 +64,7 @@
1590 #include <linux/nsproxy.h>
1591 #include <linux/virtio_net.h>
1592 #include <linux/rcupdate.h>
1593+#include <linux/vs_network.h>
1594 #include <net/net_namespace.h>
1595 #include <net/netns/generic.h>
1596 #include <net/rtnetlink.h>
1597@@ -121,6 +122,7 @@ struct tun_struct {
1598 unsigned int flags;
1599 uid_t owner;
1600 gid_t group;
1601+ nid_t nid;
1602
1603 struct net_device *dev;
1604 u32 set_features;
1605@@ -905,6 +907,7 @@ static void tun_setup(struct net_device
1606
1607 tun->owner = -1;
1608 tun->group = -1;
1609+ tun->nid = current->nid;
1610
1611 dev->ethtool_ops = &tun_ethtool_ops;
1612 dev->destructor = tun_free_netdev;
1613@@ -1055,7 +1058,7 @@ static int tun_set_iff(struct net *net,
1614
1615 if (((tun->owner != -1 && cred->euid != tun->owner) ||
1616 (tun->group != -1 && !in_egroup_p(tun->group))) &&
1617- !capable(CAP_NET_ADMIN))
1618+ !cap_raised(current_cap(), CAP_NET_ADMIN))
1619 return -EPERM;
1620 err = security_tun_dev_attach(tun->socket.sk);
1621 if (err < 0)
1622@@ -1069,7 +1072,7 @@ static int tun_set_iff(struct net *net,
1623 char *name;
1624 unsigned long flags = 0;
1625
1626- if (!capable(CAP_NET_ADMIN))
1627+ if (!nx_capable(CAP_NET_ADMIN, NXC_TUN_CREATE))
1628 return -EPERM;
1629 err = security_tun_dev_create();
1630 if (err < 0)
1631@@ -1137,6 +1140,9 @@ static int tun_set_iff(struct net *net,
1632
1633 sk->sk_destruct = tun_sock_destruct;
1634
1635+ if (!nx_check(tun->nid, VS_IDENT | VS_HOSTID | VS_ADMIN_P))
1636+ return -EPERM;
1637+
1638 err = tun_attach(tun, file);
1639 if (err < 0)
1640 goto failed;
1641@@ -1318,6 +1324,16 @@ static long __tun_chr_ioctl(struct file
1642 tun_debug(KERN_INFO, tun, "group set to %d\n", tun->group);
1643 break;
1644
1645+ case TUNSETNID:
1646+ if (!capable(CAP_CONTEXT))
1647+ return -EPERM;
1648+
1649+ /* Set nid owner of the device */
1650+ tun->nid = (nid_t) arg;
1651+
1652+ tun_debug(KERN_INFO, tun, "nid owner set to %u\n", tun->nid);
1653+ break;
1654+
1655 case TUNSETLINK:
1656 /* Only allow setting the type when the interface is down */
1657 if (tun->dev->flags & IFF_UP) {
1658diff -NurpP --minimal linux-3.0.9/drivers/tty/sysrq.c linux-3.0.9-vs2.3.2.1/drivers/tty/sysrq.c
1659--- linux-3.0.9/drivers/tty/sysrq.c 2011-05-22 16:17:44.000000000 +0200
1660+++ linux-3.0.9-vs2.3.2.1/drivers/tty/sysrq.c 2011-06-10 22:11:24.000000000 +0200
1661@@ -41,6 +41,7 @@
1662 #include <linux/oom.h>
1663 #include <linux/slab.h>
1664 #include <linux/input.h>
1665+#include <linux/vserver/debug.h>
1666
1667 #include <asm/ptrace.h>
1668 #include <asm/irq_regs.h>
1669@@ -395,6 +396,21 @@ static struct sysrq_key_op sysrq_unrt_op
1670 .enable_mask = SYSRQ_ENABLE_RTNICE,
1671 };
1672
1673+
1674+#ifdef CONFIG_VSERVER_DEBUG
1675+static void sysrq_handle_vxinfo(int key)
1676+{
1677+ dump_vx_info_inactive((key == 'x') ? 0 : 1);
1678+}
1679+
1680+static struct sysrq_key_op sysrq_showvxinfo_op = {
1681+ .handler = sysrq_handle_vxinfo,
1682+ .help_msg = "conteXt",
1683+ .action_msg = "Show Context Info",
1684+ .enable_mask = SYSRQ_ENABLE_DUMP,
1685+};
1686+#endif
1687+
1688 /* Key Operations table and lock */
1689 static DEFINE_SPINLOCK(sysrq_key_table_lock);
1690
1691@@ -449,7 +465,11 @@ static struct sysrq_key_op *sysrq_key_ta
1692 NULL, /* v */
1693 &sysrq_showstate_blocked_op, /* w */
1694 /* x: May be registered on ppc/powerpc for xmon */
1695+#ifdef CONFIG_VSERVER_DEBUG
1696+ &sysrq_showvxinfo_op, /* x */
1697+#else
1698 NULL, /* x */
1699+#endif
1700 /* y: May be registered on sparc64 for global register dump */
1701 NULL, /* y */
1702 &sysrq_ftrace_dump_op, /* z */
1703@@ -464,6 +484,8 @@ static int sysrq_key_table_key2index(int
1704 retval = key - '0';
1705 else if ((key >= 'a') && (key <= 'z'))
1706 retval = key + 10 - 'a';
1707+ else if ((key >= 'A') && (key <= 'Z'))
1708+ retval = key + 10 - 'A';
1709 else
1710 retval = -1;
1711 return retval;
1712diff -NurpP --minimal linux-3.0.9/drivers/tty/tty_io.c linux-3.0.9-vs2.3.2.1/drivers/tty/tty_io.c
1713--- linux-3.0.9/drivers/tty/tty_io.c 2011-11-15 16:40:46.000000000 +0100
1714+++ linux-3.0.9-vs2.3.2.1/drivers/tty/tty_io.c 2011-11-15 17:37:05.000000000 +0100
1715@@ -104,6 +104,7 @@
1716
1717 #include <linux/kmod.h>
1718 #include <linux/nsproxy.h>
1719+#include <linux/vs_pid.h>
1720
1721 #undef TTY_DEBUG_HANGUP
1722
1723@@ -2080,7 +2081,8 @@ static int tiocsti(struct tty_struct *tt
1724 char ch, mbz = 0;
1725 struct tty_ldisc *ld;
1726
1727- if ((current->signal->tty != tty) && !capable(CAP_SYS_ADMIN))
1728+ if (((current->signal->tty != tty) &&
1729+ !vx_capable(CAP_SYS_ADMIN, VXC_TIOCSTI)))
1730 return -EPERM;
1731 if (get_user(ch, p))
1732 return -EFAULT;
1733@@ -2368,6 +2370,7 @@ static int tiocspgrp(struct tty_struct *
1734 return -ENOTTY;
1735 if (get_user(pgrp_nr, p))
1736 return -EFAULT;
1737+ pgrp_nr = vx_rmap_pid(pgrp_nr);
1738 if (pgrp_nr < 0)
1739 return -EINVAL;
1740 rcu_read_lock();
1741diff -NurpP --minimal linux-3.0.9/fs/attr.c linux-3.0.9-vs2.3.2.1/fs/attr.c
1742--- linux-3.0.9/fs/attr.c 2011-07-22 11:18:05.000000000 +0200
1743+++ linux-3.0.9-vs2.3.2.1/fs/attr.c 2011-06-10 22:11:24.000000000 +0200
1744@@ -13,6 +13,9 @@
1745 #include <linux/fsnotify.h>
1746 #include <linux/fcntl.h>
1747 #include <linux/security.h>
1748+#include <linux/proc_fs.h>
1749+#include <linux/devpts_fs.h>
1750+#include <linux/vs_tag.h>
1751
1752 /**
1753 * inode_change_ok - check if attribute changes to an inode are allowed
1754@@ -73,6 +76,10 @@ int inode_change_ok(const struct inode *
1755 return -EPERM;
1756 }
1757
1758+ /* check for inode tag permission */
1759+ if (dx_permission(inode, MAY_WRITE))
1760+ return -EACCES;
1761+
1762 return 0;
1763 }
1764 EXPORT_SYMBOL(inode_change_ok);
1765@@ -143,6 +150,8 @@ void setattr_copy(struct inode *inode, c
1766 inode->i_uid = attr->ia_uid;
1767 if (ia_valid & ATTR_GID)
1768 inode->i_gid = attr->ia_gid;
1769+ if ((ia_valid & ATTR_TAG) && IS_TAGGED(inode))
1770+ inode->i_tag = attr->ia_tag;
1771 if (ia_valid & ATTR_ATIME)
1772 inode->i_atime = timespec_trunc(attr->ia_atime,
1773 inode->i_sb->s_time_gran);
1774@@ -170,7 +179,8 @@ int notify_change(struct dentry * dentry
1775 struct timespec now;
1776 unsigned int ia_valid = attr->ia_valid;
1777
1778- if (ia_valid & (ATTR_MODE | ATTR_UID | ATTR_GID | ATTR_TIMES_SET)) {
1779+ if (ia_valid & (ATTR_MODE | ATTR_UID | ATTR_GID |
1780+ ATTR_TAG | ATTR_TIMES_SET)) {
1781 if (IS_IMMUTABLE(inode) || IS_APPEND(inode))
1782 return -EPERM;
1783 }
1784diff -NurpP --minimal linux-3.0.9/fs/binfmt_aout.c linux-3.0.9-vs2.3.2.1/fs/binfmt_aout.c
1785--- linux-3.0.9/fs/binfmt_aout.c 2010-10-21 13:07:47.000000000 +0200
1786+++ linux-3.0.9-vs2.3.2.1/fs/binfmt_aout.c 2011-06-10 22:11:24.000000000 +0200
1787@@ -25,6 +25,7 @@
1788 #include <linux/init.h>
1789 #include <linux/coredump.h>
1790 #include <linux/slab.h>
1791+#include <linux/vs_memory.h>
1792
1793 #include <asm/system.h>
1794 #include <asm/uaccess.h>
1795diff -NurpP --minimal linux-3.0.9/fs/binfmt_elf.c linux-3.0.9-vs2.3.2.1/fs/binfmt_elf.c
1796--- linux-3.0.9/fs/binfmt_elf.c 2011-11-15 16:40:46.000000000 +0100
1797+++ linux-3.0.9-vs2.3.2.1/fs/binfmt_elf.c 2011-11-15 17:37:06.000000000 +0100
1798@@ -32,6 +32,7 @@
1799 #include <linux/elf.h>
1800 #include <linux/utsname.h>
1801 #include <linux/coredump.h>
1802+#include <linux/vs_memory.h>
1803 #include <asm/uaccess.h>
1804 #include <asm/param.h>
1805 #include <asm/page.h>
1806diff -NurpP --minimal linux-3.0.9/fs/binfmt_flat.c linux-3.0.9-vs2.3.2.1/fs/binfmt_flat.c
1807--- linux-3.0.9/fs/binfmt_flat.c 2011-07-22 11:18:05.000000000 +0200
1808+++ linux-3.0.9-vs2.3.2.1/fs/binfmt_flat.c 2011-06-10 22:11:24.000000000 +0200
1809@@ -35,6 +35,7 @@
1810 #include <linux/init.h>
1811 #include <linux/flat.h>
1812 #include <linux/syscalls.h>
1813+#include <linux/vs_memory.h>
1814
1815 #include <asm/byteorder.h>
1816 #include <asm/system.h>
1817diff -NurpP --minimal linux-3.0.9/fs/binfmt_som.c linux-3.0.9-vs2.3.2.1/fs/binfmt_som.c
1818--- linux-3.0.9/fs/binfmt_som.c 2010-02-25 11:52:04.000000000 +0100
1819+++ linux-3.0.9-vs2.3.2.1/fs/binfmt_som.c 2011-06-10 22:11:24.000000000 +0200
1820@@ -28,6 +28,7 @@
1821 #include <linux/shm.h>
1822 #include <linux/personality.h>
1823 #include <linux/init.h>
1824+#include <linux/vs_memory.h>
1825
1826 #include <asm/uaccess.h>
1827 #include <asm/pgtable.h>
1828diff -NurpP --minimal linux-3.0.9/fs/block_dev.c linux-3.0.9-vs2.3.2.1/fs/block_dev.c
1829--- linux-3.0.9/fs/block_dev.c 2011-11-15 16:40:46.000000000 +0100
1830+++ linux-3.0.9-vs2.3.2.1/fs/block_dev.c 2011-11-15 17:37:06.000000000 +0100
1831@@ -25,6 +25,7 @@
1832 #include <linux/namei.h>
1833 #include <linux/log2.h>
1834 #include <linux/kmemleak.h>
1835+#include <linux/vs_device.h>
1836 #include <asm/uaccess.h>
1837 #include "internal.h"
1838
1839@@ -553,6 +554,7 @@ struct block_device *bdget(dev_t dev)
1840 bdev->bd_invalidated = 0;
1841 inode->i_mode = S_IFBLK;
1842 inode->i_rdev = dev;
1843+ inode->i_mdev = dev;
1844 inode->i_bdev = bdev;
1845 inode->i_data.a_ops = &def_blk_aops;
1846 mapping_set_gfp_mask(&inode->i_data, GFP_USER);
1847@@ -599,6 +601,11 @@ EXPORT_SYMBOL(bdput);
1848 static struct block_device *bd_acquire(struct inode *inode)
1849 {
1850 struct block_device *bdev;
1851+ dev_t mdev;
1852+
1853+ if (!vs_map_blkdev(inode->i_rdev, &mdev, DATTR_OPEN))
1854+ return NULL;
1855+ inode->i_mdev = mdev;
1856
1857 spin_lock(&bdev_lock);
1858 bdev = inode->i_bdev;
1859@@ -609,7 +616,7 @@ static struct block_device *bd_acquire(s
1860 }
1861 spin_unlock(&bdev_lock);
1862
1863- bdev = bdget(inode->i_rdev);
1864+ bdev = bdget(mdev);
1865 if (bdev) {
1866 spin_lock(&bdev_lock);
1867 if (!inode->i_bdev) {
1868diff -NurpP --minimal linux-3.0.9/fs/btrfs/ctree.h linux-3.0.9-vs2.3.2.1/fs/btrfs/ctree.h
1869--- linux-3.0.9/fs/btrfs/ctree.h 2011-07-22 11:18:05.000000000 +0200
1870+++ linux-3.0.9-vs2.3.2.1/fs/btrfs/ctree.h 2011-07-19 00:44:39.000000000 +0200
1871@@ -600,11 +600,14 @@ struct btrfs_inode_item {
1872 /* modification sequence number for NFS */
1873 __le64 sequence;
1874
1875+ __le16 tag;
1876 /*
1877 * a little future expansion, for more than this we can
1878 * just grow the inode item and version it
1879 */
1880- __le64 reserved[4];
1881+ __le16 reserved16;
1882+ __le32 reserved32;
1883+ __le64 reserved[3];
1884 struct btrfs_timespec atime;
1885 struct btrfs_timespec ctime;
1886 struct btrfs_timespec mtime;
1887@@ -1359,6 +1362,8 @@ struct btrfs_ioctl_defrag_range_args {
1888 #define BTRFS_MOUNT_AUTO_DEFRAG (1 << 16)
1889 #define BTRFS_MOUNT_INODE_MAP_CACHE (1 << 17)
1890
1891+#define BTRFS_MOUNT_TAGGED (1 << 24)
1892+
1893 #define btrfs_clear_opt(o, opt) ((o) &= ~BTRFS_MOUNT_##opt)
1894 #define btrfs_set_opt(o, opt) ((o) |= BTRFS_MOUNT_##opt)
1895 #define btrfs_test_opt(root, opt) ((root)->fs_info->mount_opt & \
1896@@ -1568,6 +1573,7 @@ BTRFS_SETGET_FUNCS(inode_block_group, st
1897 BTRFS_SETGET_FUNCS(inode_nlink, struct btrfs_inode_item, nlink, 32);
1898 BTRFS_SETGET_FUNCS(inode_uid, struct btrfs_inode_item, uid, 32);
1899 BTRFS_SETGET_FUNCS(inode_gid, struct btrfs_inode_item, gid, 32);
1900+BTRFS_SETGET_FUNCS(inode_tag, struct btrfs_inode_item, tag, 16);
1901 BTRFS_SETGET_FUNCS(inode_mode, struct btrfs_inode_item, mode, 32);
1902 BTRFS_SETGET_FUNCS(inode_rdev, struct btrfs_inode_item, rdev, 64);
1903 BTRFS_SETGET_FUNCS(inode_flags, struct btrfs_inode_item, flags, 64);
1904@@ -1621,6 +1627,10 @@ BTRFS_SETGET_FUNCS(extent_flags, struct
1905
1906 BTRFS_SETGET_FUNCS(extent_refs_v0, struct btrfs_extent_item_v0, refs, 32);
1907
1908+#define BTRFS_INODE_IXUNLINK (1 << 24)
1909+#define BTRFS_INODE_BARRIER (1 << 25)
1910+#define BTRFS_INODE_COW (1 << 26)
1911+
1912
1913 BTRFS_SETGET_FUNCS(tree_block_level, struct btrfs_tree_block_info, level, 8);
1914
1915@@ -2595,6 +2605,7 @@ extern const struct dentry_operations bt
1916 long btrfs_ioctl(struct file *file, unsigned int cmd, unsigned long arg);
1917 void btrfs_update_iflags(struct inode *inode);
1918 void btrfs_inherit_iflags(struct inode *inode, struct inode *dir);
1919+int btrfs_sync_flags(struct inode *inode, int, int);
1920 int btrfs_defrag_file(struct inode *inode, struct file *file,
1921 struct btrfs_ioctl_defrag_range_args *range,
1922 u64 newer_than, unsigned long max_pages);
1923diff -NurpP --minimal linux-3.0.9/fs/btrfs/disk-io.c linux-3.0.9-vs2.3.2.1/fs/btrfs/disk-io.c
1924--- linux-3.0.9/fs/btrfs/disk-io.c 2011-07-22 11:18:05.000000000 +0200
1925+++ linux-3.0.9-vs2.3.2.1/fs/btrfs/disk-io.c 2011-06-22 12:39:15.000000000 +0200
1926@@ -1765,6 +1765,9 @@ struct btrfs_root *open_ctree(struct sup
1927 goto fail_alloc;
1928 }
1929
1930+ if (btrfs_test_opt(tree_root, TAGGED))
1931+ sb->s_flags |= MS_TAGGED;
1932+
1933 features = btrfs_super_incompat_flags(disk_super) &
1934 ~BTRFS_FEATURE_INCOMPAT_SUPP;
1935 if (features) {
1936diff -NurpP --minimal linux-3.0.9/fs/btrfs/inode.c linux-3.0.9-vs2.3.2.1/fs/btrfs/inode.c
1937--- linux-3.0.9/fs/btrfs/inode.c 2011-11-15 16:40:46.000000000 +0100
1938+++ linux-3.0.9-vs2.3.2.1/fs/btrfs/inode.c 2011-10-18 13:51:13.000000000 +0200
1939@@ -38,6 +38,7 @@
1940 #include <linux/falloc.h>
1941 #include <linux/slab.h>
1942 #include <linux/ratelimit.h>
1943+#include <linux/vs_tag.h>
1944 #include "compat.h"
1945 #include "ctree.h"
1946 #include "disk-io.h"
1947@@ -2508,6 +2509,8 @@ static void btrfs_read_locked_inode(stru
1948 struct btrfs_key location;
1949 int maybe_acls;
1950 u32 rdev;
1951+ uid_t uid;
1952+ gid_t gid;
1953 int ret;
1954 bool filled = false;
1955
1956@@ -2540,8 +2543,13 @@ static void btrfs_read_locked_inode(stru
1957
1958 inode->i_mode = btrfs_inode_mode(leaf, inode_item);
1959 inode->i_nlink = btrfs_inode_nlink(leaf, inode_item);
1960- inode->i_uid = btrfs_inode_uid(leaf, inode_item);
1961- inode->i_gid = btrfs_inode_gid(leaf, inode_item);
1962+
1963+ uid = btrfs_inode_uid(leaf, inode_item);
1964+ gid = btrfs_inode_gid(leaf, inode_item);
1965+ inode->i_uid = INOTAG_UID(DX_TAG(inode), uid, gid);
1966+ inode->i_gid = INOTAG_GID(DX_TAG(inode), uid, gid);
1967+ inode->i_tag = INOTAG_TAG(DX_TAG(inode), uid, gid,
1968+ btrfs_inode_tag(leaf, inode_item));
1969 btrfs_i_size_write(inode, btrfs_inode_size(leaf, inode_item));
1970
1971 tspec = btrfs_inode_atime(inode_item);
1972@@ -2624,6 +2632,9 @@ static void fill_inode_item(struct btrfs
1973 struct btrfs_inode_item *item,
1974 struct inode *inode)
1975 {
1976+ uid_t uid = TAGINO_UID(DX_TAG(inode), inode->i_uid, inode->i_tag);
1977+ gid_t gid = TAGINO_GID(DX_TAG(inode), inode->i_gid, inode->i_tag);
1978+
1979 if (!leaf->map_token)
1980 map_private_extent_buffer(leaf, (unsigned long)item,
1981 sizeof(struct btrfs_inode_item),
1982@@ -2631,8 +2642,11 @@ static void fill_inode_item(struct btrfs
1983 &leaf->map_start, &leaf->map_len,
1984 KM_USER1);
1985
1986- btrfs_set_inode_uid(leaf, item, inode->i_uid);
1987- btrfs_set_inode_gid(leaf, item, inode->i_gid);
1988+ btrfs_set_inode_uid(leaf, item, uid);
1989+ btrfs_set_inode_gid(leaf, item, gid);
1990+#ifdef CONFIG_TAGGING_INTERN
1991+ btrfs_set_inode_tag(leaf, item, inode->i_tag);
1992+#endif
1993 btrfs_set_inode_size(leaf, item, BTRFS_I(inode)->disk_i_size);
1994 btrfs_set_inode_mode(leaf, item, inode->i_mode);
1995 btrfs_set_inode_nlink(leaf, item, inode->i_nlink);
1996@@ -7360,6 +7374,7 @@ static const struct inode_operations btr
1997 .listxattr = btrfs_listxattr,
1998 .removexattr = btrfs_removexattr,
1999 .permission = btrfs_permission,
2000+ .sync_flags = btrfs_sync_flags,
2001 };
2002 static const struct inode_operations btrfs_dir_ro_inode_operations = {
2003 .lookup = btrfs_lookup,
2004@@ -7432,6 +7447,7 @@ static const struct inode_operations btr
2005 .removexattr = btrfs_removexattr,
2006 .permission = btrfs_permission,
2007 .fiemap = btrfs_fiemap,
2008+ .sync_flags = btrfs_sync_flags,
2009 };
2010 static const struct inode_operations btrfs_special_inode_operations = {
2011 .getattr = btrfs_getattr,
2012diff -NurpP --minimal linux-3.0.9/fs/btrfs/ioctl.c linux-3.0.9-vs2.3.2.1/fs/btrfs/ioctl.c
2013--- linux-3.0.9/fs/btrfs/ioctl.c 2011-07-22 11:18:05.000000000 +0200
2014+++ linux-3.0.9-vs2.3.2.1/fs/btrfs/ioctl.c 2011-06-22 12:39:15.000000000 +0200
2015@@ -70,10 +70,13 @@ static unsigned int btrfs_flags_to_ioctl
2016 {
2017 unsigned int iflags = 0;
2018
2019- if (flags & BTRFS_INODE_SYNC)
2020- iflags |= FS_SYNC_FL;
2021 if (flags & BTRFS_INODE_IMMUTABLE)
2022 iflags |= FS_IMMUTABLE_FL;
2023+ if (flags & BTRFS_INODE_IXUNLINK)
2024+ iflags |= FS_IXUNLINK_FL;
2025+
2026+ if (flags & BTRFS_INODE_SYNC)
2027+ iflags |= FS_SYNC_FL;
2028 if (flags & BTRFS_INODE_APPEND)
2029 iflags |= FS_APPEND_FL;
2030 if (flags & BTRFS_INODE_NODUMP)
2031@@ -90,28 +93,78 @@ static unsigned int btrfs_flags_to_ioctl
2032 else if (flags & BTRFS_INODE_NOCOMPRESS)
2033 iflags |= FS_NOCOMP_FL;
2034
2035+ if (flags & BTRFS_INODE_BARRIER)
2036+ iflags |= FS_BARRIER_FL;
2037+ if (flags & BTRFS_INODE_COW)
2038+ iflags |= FS_COW_FL;
2039 return iflags;
2040 }
2041
2042 /*
2043- * Update inode->i_flags based on the btrfs internal flags.
2044+ * Update inode->i_(v)flags based on the btrfs internal flags.
2045 */
2046 void btrfs_update_iflags(struct inode *inode)
2047 {
2048 struct btrfs_inode *ip = BTRFS_I(inode);
2049
2050- inode->i_flags &= ~(S_SYNC|S_APPEND|S_IMMUTABLE|S_NOATIME|S_DIRSYNC);
2051+ inode->i_flags &= ~(S_IMMUTABLE | S_IXUNLINK |
2052+ S_SYNC | S_APPEND | S_NOATIME | S_DIRSYNC);
2053
2054- if (ip->flags & BTRFS_INODE_SYNC)
2055- inode->i_flags |= S_SYNC;
2056 if (ip->flags & BTRFS_INODE_IMMUTABLE)
2057 inode->i_flags |= S_IMMUTABLE;
2058+ if (ip->flags & BTRFS_INODE_IXUNLINK)
2059+ inode->i_flags |= S_IXUNLINK;
2060+
2061+ if (ip->flags & BTRFS_INODE_SYNC)
2062+ inode->i_flags |= S_SYNC;
2063 if (ip->flags & BTRFS_INODE_APPEND)
2064 inode->i_flags |= S_APPEND;
2065 if (ip->flags & BTRFS_INODE_NOATIME)
2066 inode->i_flags |= S_NOATIME;
2067 if (ip->flags & BTRFS_INODE_DIRSYNC)
2068 inode->i_flags |= S_DIRSYNC;
2069+
2070+ inode->i_vflags &= ~(V_BARRIER | V_COW);
2071+
2072+ if (ip->flags & BTRFS_INODE_BARRIER)
2073+ inode->i_vflags |= V_BARRIER;
2074+ if (ip->flags & BTRFS_INODE_COW)
2075+ inode->i_vflags |= V_COW;
2076+}
2077+
2078+/*
2079+ * Update btrfs internal flags from inode->i_(v)flags.
2080+ */
2081+void btrfs_update_flags(struct inode *inode)
2082+{
2083+ struct btrfs_inode *ip = BTRFS_I(inode);
2084+
2085+ unsigned int flags = inode->i_flags;
2086+ unsigned int vflags = inode->i_vflags;
2087+
2088+ ip->flags &= ~(BTRFS_INODE_SYNC | BTRFS_INODE_APPEND |
2089+ BTRFS_INODE_IMMUTABLE | BTRFS_INODE_IXUNLINK |
2090+ BTRFS_INODE_NOATIME | BTRFS_INODE_DIRSYNC |
2091+ BTRFS_INODE_BARRIER | BTRFS_INODE_COW);
2092+
2093+ if (flags & S_IMMUTABLE)
2094+ ip->flags |= BTRFS_INODE_IMMUTABLE;
2095+ if (flags & S_IXUNLINK)
2096+ ip->flags |= BTRFS_INODE_IXUNLINK;
2097+
2098+ if (flags & S_SYNC)
2099+ ip->flags |= BTRFS_INODE_SYNC;
2100+ if (flags & S_APPEND)
2101+ ip->flags |= BTRFS_INODE_APPEND;
2102+ if (flags & S_NOATIME)
2103+ ip->flags |= BTRFS_INODE_NOATIME;
2104+ if (flags & S_DIRSYNC)
2105+ ip->flags |= BTRFS_INODE_DIRSYNC;
2106+
2107+ if (vflags & V_BARRIER)
2108+ ip->flags |= BTRFS_INODE_BARRIER;
2109+ if (vflags & V_COW)
2110+ ip->flags |= BTRFS_INODE_COW;
2111 }
2112
2113 /*
2114@@ -129,7 +182,7 @@ void btrfs_inherit_iflags(struct inode *
2115 flags = BTRFS_I(dir)->flags;
2116
2117 if (S_ISREG(inode->i_mode))
2118- flags &= ~BTRFS_INODE_DIRSYNC;
2119+ flags &= ~(BTRFS_INODE_DIRSYNC | BTRFS_INODE_BARRIER);
2120 else if (!S_ISDIR(inode->i_mode))
2121 flags &= (BTRFS_INODE_NODUMP | BTRFS_INODE_NOATIME);
2122
2123@@ -137,6 +190,30 @@ void btrfs_inherit_iflags(struct inode *
2124 btrfs_update_iflags(inode);
2125 }
2126
2127+int btrfs_sync_flags(struct inode *inode, int flags, int vflags)
2128+{
2129+ struct btrfs_inode *ip = BTRFS_I(inode);
2130+ struct btrfs_root *root = ip->root;
2131+ struct btrfs_trans_handle *trans;
2132+ int ret;
2133+
2134+ trans = btrfs_join_transaction(root);
2135+ BUG_ON(!trans);
2136+
2137+ inode->i_flags = flags;
2138+ inode->i_vflags = vflags;
2139+ btrfs_update_flags(inode);
2140+
2141+ ret = btrfs_update_inode(trans, root, inode);
2142+ BUG_ON(ret);
2143+
2144+ btrfs_update_iflags(inode);
2145+ inode->i_ctime = CURRENT_TIME;
2146+ btrfs_end_transaction(trans, root);
2147+
2148+ return 0;
2149+}
2150+
2151 static int btrfs_ioctl_getflags(struct file *file, void __user *arg)
2152 {
2153 struct btrfs_inode *ip = BTRFS_I(file->f_path.dentry->d_inode);
2154@@ -188,7 +265,8 @@ static int btrfs_ioctl_setflags(struct f
2155
2156 flags = btrfs_mask_flags(inode->i_mode, flags);
2157 oldflags = btrfs_flags_to_ioctl(ip->flags);
2158- if ((flags ^ oldflags) & (FS_APPEND_FL | FS_IMMUTABLE_FL)) {
2159+ if ((flags ^ oldflags) & (FS_APPEND_FL |
2160+ FS_IMMUTABLE_FL | FS_IXUNLINK_FL)) {
2161 if (!capable(CAP_LINUX_IMMUTABLE)) {
2162 ret = -EPERM;
2163 goto out_unlock;
2164@@ -199,14 +277,19 @@ static int btrfs_ioctl_setflags(struct f
2165 if (ret)
2166 goto out_unlock;
2167
2168- if (flags & FS_SYNC_FL)
2169- ip->flags |= BTRFS_INODE_SYNC;
2170- else
2171- ip->flags &= ~BTRFS_INODE_SYNC;
2172 if (flags & FS_IMMUTABLE_FL)
2173 ip->flags |= BTRFS_INODE_IMMUTABLE;
2174 else
2175 ip->flags &= ~BTRFS_INODE_IMMUTABLE;
2176+ if (flags & FS_IXUNLINK_FL)
2177+ ip->flags |= BTRFS_INODE_IXUNLINK;
2178+ else
2179+ ip->flags &= ~BTRFS_INODE_IXUNLINK;
2180+
2181+ if (flags & FS_SYNC_FL)
2182+ ip->flags |= BTRFS_INODE_SYNC;
2183+ else
2184+ ip->flags &= ~BTRFS_INODE_SYNC;
2185 if (flags & FS_APPEND_FL)
2186 ip->flags |= BTRFS_INODE_APPEND;
2187 else
2188diff -NurpP --minimal linux-3.0.9/fs/btrfs/super.c linux-3.0.9-vs2.3.2.1/fs/btrfs/super.c
2189--- linux-3.0.9/fs/btrfs/super.c 2011-07-22 11:18:05.000000000 +0200
2190+++ linux-3.0.9-vs2.3.2.1/fs/btrfs/super.c 2011-07-19 00:44:39.000000000 +0200
2191@@ -162,7 +162,7 @@ enum {
2192 Opt_notreelog, Opt_ratio, Opt_flushoncommit, Opt_discard,
2193 Opt_space_cache, Opt_clear_cache, Opt_user_subvol_rm_allowed,
2194 Opt_enospc_debug, Opt_subvolrootid, Opt_defrag,
2195- Opt_inode_cache, Opt_err,
2196+ Opt_inode_cache, Opt_tag, Opt_notag, Opt_tagid, Opt_err,
2197 };
2198
2199 static match_table_t tokens = {
2200@@ -195,6 +195,9 @@ static match_table_t tokens = {
2201 {Opt_subvolrootid, "subvolrootid=%d"},
2202 {Opt_defrag, "autodefrag"},
2203 {Opt_inode_cache, "inode_cache"},
2204+ {Opt_tag, "tag"},
2205+ {Opt_notag, "notag"},
2206+ {Opt_tagid, "tagid=%u"},
2207 {Opt_err, NULL},
2208 };
2209
2210@@ -381,6 +384,22 @@ int btrfs_parse_options(struct btrfs_roo
2211 printk(KERN_INFO "btrfs: enabling auto defrag");
2212 btrfs_set_opt(info->mount_opt, AUTO_DEFRAG);
2213 break;
2214+#ifndef CONFIG_TAGGING_NONE
2215+ case Opt_tag:
2216+ printk(KERN_INFO "btrfs: use tagging\n");
2217+ btrfs_set_opt(info->mount_opt, TAGGED);
2218+ break;
2219+ case Opt_notag:
2220+ printk(KERN_INFO "btrfs: disabled tagging\n");
2221+ btrfs_clear_opt(info->mount_opt, TAGGED);
2222+ break;
2223+#endif
2224+#ifdef CONFIG_PROPAGATE
2225+ case Opt_tagid:
2226+ /* use args[0] */
2227+ btrfs_set_opt(info->mount_opt, TAGGED);
2228+ break;
2229+#endif
2230 case Opt_err:
2231 printk(KERN_INFO "btrfs: unrecognized mount option "
2232 "'%s'\n", p);
2233@@ -907,6 +926,12 @@ static int btrfs_remount(struct super_bl
2234 if (ret)
2235 return -EINVAL;
2236
2237+ if (btrfs_test_opt(root, TAGGED) && !(sb->s_flags & MS_TAGGED)) {
2238+ printk("btrfs: %s: tagging not permitted on remount.\n",
2239+ sb->s_id);
2240+ return -EINVAL;
2241+ }
2242+
2243 if ((*flags & MS_RDONLY) == (sb->s_flags & MS_RDONLY))
2244 return 0;
2245
2246diff -NurpP --minimal linux-3.0.9/fs/char_dev.c linux-3.0.9-vs2.3.2.1/fs/char_dev.c
2247--- linux-3.0.9/fs/char_dev.c 2011-03-15 18:07:31.000000000 +0100
2248+++ linux-3.0.9-vs2.3.2.1/fs/char_dev.c 2011-06-10 22:11:24.000000000 +0200
2249@@ -21,6 +21,8 @@
2250 #include <linux/mutex.h>
2251 #include <linux/backing-dev.h>
2252 #include <linux/tty.h>
2253+#include <linux/vs_context.h>
2254+#include <linux/vs_device.h>
2255
2256 #include "internal.h"
2257
2258@@ -371,14 +373,21 @@ static int chrdev_open(struct inode *ino
2259 struct cdev *p;
2260 struct cdev *new = NULL;
2261 int ret = 0;
2262+ dev_t mdev;
2263+
2264+ if (!vs_map_chrdev(inode->i_rdev, &mdev, DATTR_OPEN))
2265+ return -EPERM;
2266+ inode->i_mdev = mdev;
2267
2268 spin_lock(&cdev_lock);
2269 p = inode->i_cdev;
2270 if (!p) {
2271 struct kobject *kobj;
2272 int idx;
2273+
2274 spin_unlock(&cdev_lock);
2275- kobj = kobj_lookup(cdev_map, inode->i_rdev, &idx);
2276+
2277+ kobj = kobj_lookup(cdev_map, mdev, &idx);
2278 if (!kobj)
2279 return -ENXIO;
2280 new = container_of(kobj, struct cdev, kobj);
2281diff -NurpP --minimal linux-3.0.9/fs/dcache.c linux-3.0.9-vs2.3.2.1/fs/dcache.c
2282--- linux-3.0.9/fs/dcache.c 2011-07-22 11:18:05.000000000 +0200
2283+++ linux-3.0.9-vs2.3.2.1/fs/dcache.c 2011-07-22 11:20:39.000000000 +0200
2284@@ -36,6 +36,7 @@
2285 #include <linux/bit_spinlock.h>
2286 #include <linux/rculist_bl.h>
2287 #include <linux/prefetch.h>
2288+#include <linux/vs_limit.h>
2289 #include "internal.h"
2290
2291 /*
2292@@ -479,6 +480,8 @@ int d_invalidate(struct dentry * dentry)
2293 spin_lock(&dentry->d_lock);
2294 }
2295
2296+ vx_dentry_dec(dentry);
2297+
2298 /*
2299 * Somebody else still using it?
2300 *
2301@@ -506,6 +509,7 @@ EXPORT_SYMBOL(d_invalidate);
2302 static inline void __dget_dlock(struct dentry *dentry)
2303 {
2304 dentry->d_count++;
2305+ vx_dentry_inc(dentry);
2306 }
2307
2308 static inline void __dget(struct dentry *dentry)
2309@@ -1266,6 +1270,9 @@ struct dentry *d_alloc(struct dentry * p
2310 struct dentry *dentry;
2311 char *dname;
2312
2313+ if (!vx_dentry_avail(1))
2314+ return NULL;
2315+
2316 dentry = kmem_cache_alloc(dentry_cache, GFP_KERNEL);
2317 if (!dentry)
2318 return NULL;
2319@@ -1288,6 +1295,7 @@ struct dentry *d_alloc(struct dentry * p
2320
2321 dentry->d_count = 1;
2322 dentry->d_flags = 0;
2323+ vx_dentry_inc(dentry);
2324 spin_lock_init(&dentry->d_lock);
2325 seqcount_init(&dentry->d_seq);
2326 dentry->d_inode = NULL;
2327@@ -1945,6 +1953,7 @@ struct dentry *__d_lookup(struct dentry
2328 }
2329
2330 dentry->d_count++;
2331+ vx_dentry_inc(dentry);
2332 found = dentry;
2333 spin_unlock(&dentry->d_lock);
2334 break;
2335diff -NurpP --minimal linux-3.0.9/fs/devpts/inode.c linux-3.0.9-vs2.3.2.1/fs/devpts/inode.c
2336--- linux-3.0.9/fs/devpts/inode.c 2011-05-22 16:17:50.000000000 +0200
2337+++ linux-3.0.9-vs2.3.2.1/fs/devpts/inode.c 2011-06-10 22:11:24.000000000 +0200
2338@@ -25,6 +25,7 @@
2339 #include <linux/parser.h>
2340 #include <linux/fsnotify.h>
2341 #include <linux/seq_file.h>
2342+#include <linux/vs_base.h>
2343
2344 #define DEVPTS_DEFAULT_MODE 0600
2345 /*
2346@@ -36,6 +37,20 @@
2347 #define DEVPTS_DEFAULT_PTMX_MODE 0000
2348 #define PTMX_MINOR 2
2349
2350+static int devpts_permission(struct inode *inode, int mask, unsigned int flags)
2351+{
2352+ int ret = -EACCES;
2353+
2354+ /* devpts is xid tagged */
2355+ if (vx_check((xid_t)inode->i_tag, VS_WATCH_P | VS_IDENT))
2356+ ret = generic_permission(inode, mask, flags, NULL);
2357+ return ret;
2358+}
2359+
2360+static struct inode_operations devpts_file_inode_operations = {
2361+ .permission = devpts_permission,
2362+};
2363+
2364 extern int pty_limit; /* Config limit on Unix98 ptys */
2365 static DEFINE_MUTEX(allocated_ptys_lock);
2366
2367@@ -263,6 +278,34 @@ static int devpts_show_options(struct se
2368 return 0;
2369 }
2370
2371+static int devpts_filter(struct dentry *de)
2372+{
2373+ xid_t xid = 0;
2374+
2375+ /* devpts is xid tagged */
2376+ if (de && de->d_inode)
2377+ xid = (xid_t)de->d_inode->i_tag;
2378+#ifdef CONFIG_VSERVER_WARN_DEVPTS
2379+ else
2380+ vxwprintk_task(1, "devpts " VS_Q("%.*s") " without inode.",
2381+ de->d_name.len, de->d_name.name);
2382+#endif
2383+ return vx_check(xid, VS_WATCH_P | VS_IDENT);
2384+}
2385+
2386+static int devpts_readdir(struct file * filp, void * dirent, filldir_t filldir)
2387+{
2388+ return dcache_readdir_filter(filp, dirent, filldir, devpts_filter);
2389+}
2390+
2391+static struct file_operations devpts_dir_operations = {
2392+ .open = dcache_dir_open,
2393+ .release = dcache_dir_close,
2394+ .llseek = dcache_dir_lseek,
2395+ .read = generic_read_dir,
2396+ .readdir = devpts_readdir,
2397+};
2398+
2399 static const struct super_operations devpts_sops = {
2400 .statfs = simple_statfs,
2401 .remount_fs = devpts_remount,
2402@@ -302,12 +345,15 @@ devpts_fill_super(struct super_block *s,
2403 inode = new_inode(s);
2404 if (!inode)
2405 goto free_fsi;
2406+
2407 inode->i_ino = 1;
2408 inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME;
2409 inode->i_mode = S_IFDIR | S_IRUGO | S_IXUGO | S_IWUSR;
2410 inode->i_op = &simple_dir_inode_operations;
2411- inode->i_fop = &simple_dir_operations;
2412+ inode->i_fop = &devpts_dir_operations;
2413 inode->i_nlink = 2;
2414+ /* devpts is xid tagged */
2415+ inode->i_tag = (tag_t)vx_current_xid();
2416
2417 s->s_root = d_alloc_root(inode);
2418 if (s->s_root)
2419@@ -494,6 +540,9 @@ int devpts_pty_new(struct inode *ptmx_in
2420 inode->i_gid = opts->setgid ? opts->gid : current_fsgid();
2421 inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME;
2422 init_special_inode(inode, S_IFCHR|opts->mode, device);
2423+ /* devpts is xid tagged */
2424+ inode->i_tag = (tag_t)vx_current_xid();
2425+ inode->i_op = &devpts_file_inode_operations;
2426 inode->i_private = tty;
2427 tty->driver_data = inode;
2428
2429diff -NurpP --minimal linux-3.0.9/fs/ext2/balloc.c linux-3.0.9-vs2.3.2.1/fs/ext2/balloc.c
2430--- linux-3.0.9/fs/ext2/balloc.c 2011-05-22 16:17:51.000000000 +0200
2431+++ linux-3.0.9-vs2.3.2.1/fs/ext2/balloc.c 2011-06-10 22:11:24.000000000 +0200
2432@@ -701,7 +701,6 @@ ext2_try_to_allocate(struct super_block
2433 start = 0;
2434 end = EXT2_BLOCKS_PER_GROUP(sb);
2435 }
2436-
2437 BUG_ON(start > EXT2_BLOCKS_PER_GROUP(sb));
2438
2439 repeat:
2440diff -NurpP --minimal linux-3.0.9/fs/ext2/ext2.h linux-3.0.9-vs2.3.2.1/fs/ext2/ext2.h
2441--- linux-3.0.9/fs/ext2/ext2.h 2011-05-22 16:17:51.000000000 +0200
2442+++ linux-3.0.9-vs2.3.2.1/fs/ext2/ext2.h 2011-06-10 22:11:24.000000000 +0200
2443@@ -126,6 +126,7 @@ extern void ext2_set_inode_flags(struct
2444 extern void ext2_get_inode_flags(struct ext2_inode_info *);
2445 extern int ext2_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
2446 u64 start, u64 len);
2447+extern int ext2_sync_flags(struct inode *, int, int);
2448
2449 /* ioctl.c */
2450 extern long ext2_ioctl(struct file *, unsigned int, unsigned long);
2451diff -NurpP --minimal linux-3.0.9/fs/ext2/file.c linux-3.0.9-vs2.3.2.1/fs/ext2/file.c
2452--- linux-3.0.9/fs/ext2/file.c 2010-08-02 16:52:48.000000000 +0200
2453+++ linux-3.0.9-vs2.3.2.1/fs/ext2/file.c 2011-06-10 22:11:24.000000000 +0200
2454@@ -104,4 +104,5 @@ const struct inode_operations ext2_file_
2455 .setattr = ext2_setattr,
2456 .check_acl = ext2_check_acl,
2457 .fiemap = ext2_fiemap,
2458+ .sync_flags = ext2_sync_flags,
2459 };
2460diff -NurpP --minimal linux-3.0.9/fs/ext2/ialloc.c linux-3.0.9-vs2.3.2.1/fs/ext2/ialloc.c
2461--- linux-3.0.9/fs/ext2/ialloc.c 2011-05-22 16:17:51.000000000 +0200
2462+++ linux-3.0.9-vs2.3.2.1/fs/ext2/ialloc.c 2011-06-10 22:11:24.000000000 +0200
2463@@ -17,6 +17,7 @@
2464 #include <linux/backing-dev.h>
2465 #include <linux/buffer_head.h>
2466 #include <linux/random.h>
2467+#include <linux/vs_tag.h>
2468 #include "ext2.h"
2469 #include "xattr.h"
2470 #include "acl.h"
2471@@ -549,6 +550,7 @@ got:
2472 inode->i_mode = mode;
2473 inode->i_uid = current_fsuid();
2474 inode->i_gid = dir->i_gid;
2475+ inode->i_tag = dx_current_fstag(sb);
2476 } else
2477 inode_init_owner(inode, dir, mode);
2478
2479diff -NurpP --minimal linux-3.0.9/fs/ext2/inode.c linux-3.0.9-vs2.3.2.1/fs/ext2/inode.c
2480--- linux-3.0.9/fs/ext2/inode.c 2011-05-22 16:17:51.000000000 +0200
2481+++ linux-3.0.9-vs2.3.2.1/fs/ext2/inode.c 2011-06-10 22:11:24.000000000 +0200
2482@@ -32,6 +32,7 @@
2483 #include <linux/mpage.h>
2484 #include <linux/fiemap.h>
2485 #include <linux/namei.h>
2486+#include <linux/vs_tag.h>
2487 #include "ext2.h"
2488 #include "acl.h"
2489 #include "xip.h"
2490@@ -1167,7 +1168,7 @@ static void ext2_truncate_blocks(struct
2491 return;
2492 if (ext2_inode_is_fast_symlink(inode))
2493 return;
2494- if (IS_APPEND(inode) || IS_IMMUTABLE(inode))
2495+ if (IS_APPEND(inode) || IS_IXORUNLINK(inode))
2496 return;
2497 __ext2_truncate_blocks(inode, offset);
2498 }
2499@@ -1256,36 +1257,61 @@ void ext2_set_inode_flags(struct inode *
2500 {
2501 unsigned int flags = EXT2_I(inode)->i_flags;
2502
2503- inode->i_flags &= ~(S_SYNC|S_APPEND|S_IMMUTABLE|S_NOATIME|S_DIRSYNC);
2504+ inode->i_flags &= ~(S_IMMUTABLE | S_IXUNLINK |
2505+ S_SYNC | S_APPEND | S_NOATIME | S_DIRSYNC);
2506+
2507+
2508+ if (flags & EXT2_IMMUTABLE_FL)
2509+ inode->i_flags |= S_IMMUTABLE;
2510+ if (flags & EXT2_IXUNLINK_FL)
2511+ inode->i_flags |= S_IXUNLINK;
2512+
2513 if (flags & EXT2_SYNC_FL)
2514 inode->i_flags |= S_SYNC;
2515 if (flags & EXT2_APPEND_FL)
2516 inode->i_flags |= S_APPEND;
2517- if (flags & EXT2_IMMUTABLE_FL)
2518- inode->i_flags |= S_IMMUTABLE;
2519 if (flags & EXT2_NOATIME_FL)
2520 inode->i_flags |= S_NOATIME;
2521 if (flags & EXT2_DIRSYNC_FL)
2522 inode->i_flags |= S_DIRSYNC;
2523+
2524+ inode->i_vflags &= ~(V_BARRIER | V_COW);
2525+
2526+ if (flags & EXT2_BARRIER_FL)
2527+ inode->i_vflags |= V_BARRIER;
2528+ if (flags & EXT2_COW_FL)
2529+ inode->i_vflags |= V_COW;
2530 }
2531
2532 /* Propagate flags from i_flags to EXT2_I(inode)->i_flags */
2533 void ext2_get_inode_flags(struct ext2_inode_info *ei)
2534 {
2535 unsigned int flags = ei->vfs_inode.i_flags;
2536+ unsigned int vflags = ei->vfs_inode.i_vflags;
2537+
2538+ ei->i_flags &= ~(EXT2_SYNC_FL | EXT2_APPEND_FL |
2539+ EXT2_IMMUTABLE_FL | EXT2_IXUNLINK_FL |
2540+ EXT2_NOATIME_FL | EXT2_DIRSYNC_FL |
2541+ EXT2_BARRIER_FL | EXT2_COW_FL);
2542+
2543+ if (flags & S_IMMUTABLE)
2544+ ei->i_flags |= EXT2_IMMUTABLE_FL;
2545+ if (flags & S_IXUNLINK)
2546+ ei->i_flags |= EXT2_IXUNLINK_FL;
2547
2548- ei->i_flags &= ~(EXT2_SYNC_FL|EXT2_APPEND_FL|
2549- EXT2_IMMUTABLE_FL|EXT2_NOATIME_FL|EXT2_DIRSYNC_FL);
2550 if (flags & S_SYNC)
2551 ei->i_flags |= EXT2_SYNC_FL;
2552 if (flags & S_APPEND)
2553 ei->i_flags |= EXT2_APPEND_FL;
2554- if (flags & S_IMMUTABLE)
2555- ei->i_flags |= EXT2_IMMUTABLE_FL;
2556 if (flags & S_NOATIME)
2557 ei->i_flags |= EXT2_NOATIME_FL;
2558 if (flags & S_DIRSYNC)
2559 ei->i_flags |= EXT2_DIRSYNC_FL;
2560+
2561+ if (vflags & V_BARRIER)
2562+ ei->i_flags |= EXT2_BARRIER_FL;
2563+ if (vflags & V_COW)
2564+ ei->i_flags |= EXT2_COW_FL;
2565 }
2566
2567 struct inode *ext2_iget (struct super_block *sb, unsigned long ino)
2568@@ -1295,6 +1321,8 @@ struct inode *ext2_iget (struct super_bl
2569 struct ext2_inode *raw_inode;
2570 struct inode *inode;
2571 long ret = -EIO;
2572+ uid_t uid;
2573+ gid_t gid;
2574 int n;
2575
2576 inode = iget_locked(sb, ino);
2577@@ -1313,12 +1341,17 @@ struct inode *ext2_iget (struct super_bl
2578 }
2579
2580 inode->i_mode = le16_to_cpu(raw_inode->i_mode);
2581- inode->i_uid = (uid_t)le16_to_cpu(raw_inode->i_uid_low);
2582- inode->i_gid = (gid_t)le16_to_cpu(raw_inode->i_gid_low);
2583+ uid = (uid_t)le16_to_cpu(raw_inode->i_uid_low);
2584+ gid = (gid_t)le16_to_cpu(raw_inode->i_gid_low);
2585 if (!(test_opt (inode->i_sb, NO_UID32))) {
2586- inode->i_uid |= le16_to_cpu(raw_inode->i_uid_high) << 16;
2587- inode->i_gid |= le16_to_cpu(raw_inode->i_gid_high) << 16;
2588+ uid |= le16_to_cpu(raw_inode->i_uid_high) << 16;
2589+ gid |= le16_to_cpu(raw_inode->i_gid_high) << 16;
2590 }
2591+ inode->i_uid = INOTAG_UID(DX_TAG(inode), uid, gid);
2592+ inode->i_gid = INOTAG_GID(DX_TAG(inode), uid, gid);
2593+ inode->i_tag = INOTAG_TAG(DX_TAG(inode), uid, gid,
2594+ le16_to_cpu(raw_inode->i_raw_tag));
2595+
2596 inode->i_nlink = le16_to_cpu(raw_inode->i_links_count);
2597 inode->i_size = le32_to_cpu(raw_inode->i_size);
2598 inode->i_atime.tv_sec = (signed)le32_to_cpu(raw_inode->i_atime);
2599@@ -1416,8 +1449,8 @@ static int __ext2_write_inode(struct ino
2600 struct ext2_inode_info *ei = EXT2_I(inode);
2601 struct super_block *sb = inode->i_sb;
2602 ino_t ino = inode->i_ino;
2603- uid_t uid = inode->i_uid;
2604- gid_t gid = inode->i_gid;
2605+ uid_t uid = TAGINO_UID(DX_TAG(inode), inode->i_uid, inode->i_tag);
2606+ gid_t gid = TAGINO_GID(DX_TAG(inode), inode->i_gid, inode->i_tag);
2607 struct buffer_head * bh;
2608 struct ext2_inode * raw_inode = ext2_get_inode(sb, ino, &bh);
2609 int n;
2610@@ -1453,6 +1486,9 @@ static int __ext2_write_inode(struct ino
2611 raw_inode->i_uid_high = 0;
2612 raw_inode->i_gid_high = 0;
2613 }
2614+#ifdef CONFIG_TAGGING_INTERN
2615+ raw_inode->i_raw_tag = cpu_to_le16(inode->i_tag);
2616+#endif
2617 raw_inode->i_links_count = cpu_to_le16(inode->i_nlink);
2618 raw_inode->i_size = cpu_to_le32(inode->i_size);
2619 raw_inode->i_atime = cpu_to_le32(inode->i_atime.tv_sec);
2620@@ -1533,7 +1569,8 @@ int ext2_setattr(struct dentry *dentry,
2621 if (is_quota_modification(inode, iattr))
2622 dquot_initialize(inode);
2623 if ((iattr->ia_valid & ATTR_UID && iattr->ia_uid != inode->i_uid) ||
2624- (iattr->ia_valid & ATTR_GID && iattr->ia_gid != inode->i_gid)) {
2625+ (iattr->ia_valid & ATTR_GID && iattr->ia_gid != inode->i_gid) ||
2626+ (iattr->ia_valid & ATTR_TAG && iattr->ia_tag != inode->i_tag)) {
2627 error = dquot_transfer(inode, iattr);
2628 if (error)
2629 return error;
2630diff -NurpP --minimal linux-3.0.9/fs/ext2/ioctl.c linux-3.0.9-vs2.3.2.1/fs/ext2/ioctl.c
2631--- linux-3.0.9/fs/ext2/ioctl.c 2011-05-22 16:17:51.000000000 +0200
2632+++ linux-3.0.9-vs2.3.2.1/fs/ext2/ioctl.c 2011-06-10 22:11:24.000000000 +0200
2633@@ -17,6 +17,16 @@
2634 #include <asm/uaccess.h>
2635
2636
2637+int ext2_sync_flags(struct inode *inode, int flags, int vflags)
2638+{
2639+ inode->i_flags = flags;
2640+ inode->i_vflags = vflags;
2641+ ext2_get_inode_flags(EXT2_I(inode));
2642+ inode->i_ctime = CURRENT_TIME_SEC;
2643+ mark_inode_dirty(inode);
2644+ return 0;
2645+}
2646+
2647 long ext2_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
2648 {
2649 struct inode *inode = filp->f_dentry->d_inode;
2650@@ -51,6 +61,11 @@ long ext2_ioctl(struct file *filp, unsig
2651
2652 flags = ext2_mask_flags(inode->i_mode, flags);
2653
2654+ if (IS_BARRIER(inode)) {
2655+ vxwprintk_task(1, "messing with the barrier.");
2656+ return -EACCES;
2657+ }
2658+
2659 mutex_lock(&inode->i_mutex);
2660 /* Is it quota file? Do not allow user to mess with it */
2661 if (IS_NOQUOTA(inode)) {
2662@@ -66,7 +81,9 @@ long ext2_ioctl(struct file *filp, unsig
2663 *
2664 * This test looks nicer. Thanks to Pauline Middelink
2665 */
2666- if ((flags ^ oldflags) & (EXT2_APPEND_FL | EXT2_IMMUTABLE_FL)) {
2667+ if ((oldflags & EXT2_IMMUTABLE_FL) ||
2668+ ((flags ^ oldflags) & (EXT2_APPEND_FL |
2669+ EXT2_IMMUTABLE_FL | EXT2_IXUNLINK_FL))) {
2670 if (!capable(CAP_LINUX_IMMUTABLE)) {
2671 mutex_unlock(&inode->i_mutex);
2672 ret = -EPERM;
2673@@ -74,7 +91,7 @@ long ext2_ioctl(struct file *filp, unsig
2674 }
2675 }
2676
2677- flags = flags & EXT2_FL_USER_MODIFIABLE;
2678+ flags &= EXT2_FL_USER_MODIFIABLE;
2679 flags |= oldflags & ~EXT2_FL_USER_MODIFIABLE;
2680 ei->i_flags = flags;
2681 mutex_unlock(&inode->i_mutex);
2682diff -NurpP --minimal linux-3.0.9/fs/ext2/namei.c linux-3.0.9-vs2.3.2.1/fs/ext2/namei.c
2683--- linux-3.0.9/fs/ext2/namei.c 2011-05-22 16:17:51.000000000 +0200
2684+++ linux-3.0.9-vs2.3.2.1/fs/ext2/namei.c 2011-06-10 22:11:24.000000000 +0200
2685@@ -32,6 +32,7 @@
2686
2687 #include <linux/pagemap.h>
2688 #include <linux/quotaops.h>
2689+#include <linux/vs_tag.h>
2690 #include "ext2.h"
2691 #include "xattr.h"
2692 #include "acl.h"
2693@@ -75,6 +76,7 @@ static struct dentry *ext2_lookup(struct
2694 return ERR_PTR(-EIO);
2695 } else {
2696 return ERR_CAST(inode);
2697+ dx_propagate_tag(nd, inode);
2698 }
2699 }
2700 }
2701@@ -413,6 +415,7 @@ const struct inode_operations ext2_dir_i
2702 #endif
2703 .setattr = ext2_setattr,
2704 .check_acl = ext2_check_acl,
2705+ .sync_flags = ext2_sync_flags,
2706 };
2707
2708 const struct inode_operations ext2_special_inode_operations = {
2709diff -NurpP --minimal linux-3.0.9/fs/ext2/super.c linux-3.0.9-vs2.3.2.1/fs/ext2/super.c
2710--- linux-3.0.9/fs/ext2/super.c 2011-07-22 11:18:05.000000000 +0200
2711+++ linux-3.0.9-vs2.3.2.1/fs/ext2/super.c 2011-06-10 22:11:24.000000000 +0200
2712@@ -394,7 +394,8 @@ enum {
2713 Opt_err_ro, Opt_nouid32, Opt_nocheck, Opt_debug,
2714 Opt_oldalloc, Opt_orlov, Opt_nobh, Opt_user_xattr, Opt_nouser_xattr,
2715 Opt_acl, Opt_noacl, Opt_xip, Opt_ignore, Opt_err, Opt_quota,
2716- Opt_usrquota, Opt_grpquota, Opt_reservation, Opt_noreservation
2717+ Opt_usrquota, Opt_grpquota, Opt_reservation, Opt_noreservation,
2718+ Opt_tag, Opt_notag, Opt_tagid
2719 };
2720
2721 static const match_table_t tokens = {
2722@@ -422,6 +423,9 @@ static const match_table_t tokens = {
2723 {Opt_acl, "acl"},
2724 {Opt_noacl, "noacl"},
2725 {Opt_xip, "xip"},
2726+ {Opt_tag, "tag"},
2727+ {Opt_notag, "notag"},
2728+ {Opt_tagid, "tagid=%u"},
2729 {Opt_grpquota, "grpquota"},
2730 {Opt_ignore, "noquota"},
2731 {Opt_quota, "quota"},
2732@@ -492,6 +496,20 @@ static int parse_options(char *options,
2733 case Opt_nouid32:
2734 set_opt (sbi->s_mount_opt, NO_UID32);
2735 break;
2736+#ifndef CONFIG_TAGGING_NONE
2737+ case Opt_tag:
2738+ set_opt (sbi->s_mount_opt, TAGGED);
2739+ break;
2740+ case Opt_notag:
2741+ clear_opt (sbi->s_mount_opt, TAGGED);
2742+ break;
2743+#endif
2744+#ifdef CONFIG_PROPAGATE
2745+ case Opt_tagid:
2746+ /* use args[0] */
2747+ set_opt (sbi->s_mount_opt, TAGGED);
2748+ break;
2749+#endif
2750 case Opt_nocheck:
2751 clear_opt (sbi->s_mount_opt, CHECK);
2752 break;
2753@@ -850,6 +868,8 @@ static int ext2_fill_super(struct super_
2754 if (!parse_options((char *) data, sb))
2755 goto failed_mount;
2756
2757+ if (EXT2_SB(sb)->s_mount_opt & EXT2_MOUNT_TAGGED)
2758+ sb->s_flags |= MS_TAGGED;
2759 sb->s_flags = (sb->s_flags & ~MS_POSIXACL) |
2760 ((EXT2_SB(sb)->s_mount_opt & EXT2_MOUNT_POSIX_ACL) ?
2761 MS_POSIXACL : 0);
2762@@ -1224,6 +1244,14 @@ static int ext2_remount (struct super_bl
2763 goto restore_opts;
2764 }
2765
2766+ if ((sbi->s_mount_opt & EXT2_MOUNT_TAGGED) &&
2767+ !(sb->s_flags & MS_TAGGED)) {
2768+ printk("EXT2-fs: %s: tagging not permitted on remount.\n",
2769+ sb->s_id);
2770+ err = -EINVAL;
2771+ goto restore_opts;
2772+ }
2773+
2774 sb->s_flags = (sb->s_flags & ~MS_POSIXACL) |
2775 ((sbi->s_mount_opt & EXT2_MOUNT_POSIX_ACL) ? MS_POSIXACL : 0);
2776
2777diff -NurpP --minimal linux-3.0.9/fs/ext3/file.c linux-3.0.9-vs2.3.2.1/fs/ext3/file.c
2778--- linux-3.0.9/fs/ext3/file.c 2010-07-07 18:31:51.000000000 +0200
2779+++ linux-3.0.9-vs2.3.2.1/fs/ext3/file.c 2011-06-10 22:11:24.000000000 +0200
2780@@ -81,5 +81,6 @@ const struct inode_operations ext3_file_
2781 #endif
2782 .check_acl = ext3_check_acl,
2783 .fiemap = ext3_fiemap,
2784+ .sync_flags = ext3_sync_flags,
2785 };
2786
2787diff -NurpP --minimal linux-3.0.9/fs/ext3/ialloc.c linux-3.0.9-vs2.3.2.1/fs/ext3/ialloc.c
2788--- linux-3.0.9/fs/ext3/ialloc.c 2011-05-22 16:17:52.000000000 +0200
2789+++ linux-3.0.9-vs2.3.2.1/fs/ext3/ialloc.c 2011-06-10 22:11:24.000000000 +0200
2790@@ -23,6 +23,7 @@
2791 #include <linux/buffer_head.h>
2792 #include <linux/random.h>
2793 #include <linux/bitops.h>
2794+#include <linux/vs_tag.h>
2795
2796 #include <asm/byteorder.h>
2797
2798@@ -532,6 +533,7 @@ got:
2799 inode->i_mode = mode;
2800 inode->i_uid = current_fsuid();
2801 inode->i_gid = dir->i_gid;
2802+ inode->i_tag = dx_current_fstag(sb);
2803 } else
2804 inode_init_owner(inode, dir, mode);
2805
2806diff -NurpP --minimal linux-3.0.9/fs/ext3/inode.c linux-3.0.9-vs2.3.2.1/fs/ext3/inode.c
2807--- linux-3.0.9/fs/ext3/inode.c 2011-07-22 11:18:05.000000000 +0200
2808+++ linux-3.0.9-vs2.3.2.1/fs/ext3/inode.c 2011-06-10 22:11:24.000000000 +0200
2809@@ -38,6 +38,7 @@
2810 #include <linux/bio.h>
2811 #include <linux/fiemap.h>
2812 #include <linux/namei.h>
2813+#include <linux/vs_tag.h>
2814 #include "xattr.h"
2815 #include "acl.h"
2816
2817@@ -2391,7 +2392,7 @@ static void ext3_free_branches(handle_t
2818
2819 int ext3_can_truncate(struct inode *inode)
2820 {
2821- if (IS_APPEND(inode) || IS_IMMUTABLE(inode))
2822+ if (IS_APPEND(inode) || IS_IXORUNLINK(inode))
2823 return 0;
2824 if (S_ISREG(inode->i_mode))
2825 return 1;
2826@@ -2775,36 +2776,60 @@ void ext3_set_inode_flags(struct inode *
2827 {
2828 unsigned int flags = EXT3_I(inode)->i_flags;
2829
2830- inode->i_flags &= ~(S_SYNC|S_APPEND|S_IMMUTABLE|S_NOATIME|S_DIRSYNC);
2831+ inode->i_flags &= ~(S_IMMUTABLE | S_IXUNLINK |
2832+ S_SYNC | S_APPEND | S_NOATIME | S_DIRSYNC);
2833+
2834+ if (flags & EXT3_IMMUTABLE_FL)
2835+ inode->i_flags |= S_IMMUTABLE;
2836+ if (flags & EXT3_IXUNLINK_FL)
2837+ inode->i_flags |= S_IXUNLINK;
2838+
2839 if (flags & EXT3_SYNC_FL)
2840 inode->i_flags |= S_SYNC;
2841 if (flags & EXT3_APPEND_FL)
2842 inode->i_flags |= S_APPEND;
2843- if (flags & EXT3_IMMUTABLE_FL)
2844- inode->i_flags |= S_IMMUTABLE;
2845 if (flags & EXT3_NOATIME_FL)
2846 inode->i_flags |= S_NOATIME;
2847 if (flags & EXT3_DIRSYNC_FL)
2848 inode->i_flags |= S_DIRSYNC;
2849+
2850+ inode->i_vflags &= ~(V_BARRIER | V_COW);
2851+
2852+ if (flags & EXT3_BARRIER_FL)
2853+ inode->i_vflags |= V_BARRIER;
2854+ if (flags & EXT3_COW_FL)
2855+ inode->i_vflags |= V_COW;
2856 }
2857
2858 /* Propagate flags from i_flags to EXT3_I(inode)->i_flags */
2859 void ext3_get_inode_flags(struct ext3_inode_info *ei)
2860 {
2861 unsigned int flags = ei->vfs_inode.i_flags;
2862+ unsigned int vflags = ei->vfs_inode.i_vflags;
2863+
2864+ ei->i_flags &= ~(EXT3_SYNC_FL | EXT3_APPEND_FL |
2865+ EXT3_IMMUTABLE_FL | EXT3_IXUNLINK_FL |
2866+ EXT3_NOATIME_FL | EXT3_DIRSYNC_FL |
2867+ EXT3_BARRIER_FL | EXT3_COW_FL);
2868+
2869+ if (flags & S_IMMUTABLE)
2870+ ei->i_flags |= EXT3_IMMUTABLE_FL;
2871+ if (flags & S_IXUNLINK)
2872+ ei->i_flags |= EXT3_IXUNLINK_FL;
2873
2874- ei->i_flags &= ~(EXT3_SYNC_FL|EXT3_APPEND_FL|
2875- EXT3_IMMUTABLE_FL|EXT3_NOATIME_FL|EXT3_DIRSYNC_FL);
2876 if (flags & S_SYNC)
2877 ei->i_flags |= EXT3_SYNC_FL;
2878 if (flags & S_APPEND)
2879 ei->i_flags |= EXT3_APPEND_FL;
2880- if (flags & S_IMMUTABLE)
2881- ei->i_flags |= EXT3_IMMUTABLE_FL;
2882 if (flags & S_NOATIME)
2883 ei->i_flags |= EXT3_NOATIME_FL;
2884 if (flags & S_DIRSYNC)
2885 ei->i_flags |= EXT3_DIRSYNC_FL;
2886+
2887+ if (vflags & V_BARRIER)
2888+ ei->i_flags |= EXT3_BARRIER_FL;
2889+ if (vflags & V_COW)
2890+ ei->i_flags |= EXT3_COW_FL;
2891 }
2892
2893 struct inode *ext3_iget(struct super_block *sb, unsigned long ino)
2894@@ -2818,6 +2843,8 @@ struct inode *ext3_iget(struct super_blo
2895 transaction_t *transaction;
2896 long ret;
2897 int block;
2898+ uid_t uid;
2899+ gid_t gid;
2900
2901 inode = iget_locked(sb, ino);
2902 if (!inode)
2903@@ -2834,12 +2861,17 @@ struct inode *ext3_iget(struct super_blo
2904 bh = iloc.bh;
2905 raw_inode = ext3_raw_inode(&iloc);
2906 inode->i_mode = le16_to_cpu(raw_inode->i_mode);
2907- inode->i_uid = (uid_t)le16_to_cpu(raw_inode->i_uid_low);
2908- inode->i_gid = (gid_t)le16_to_cpu(raw_inode->i_gid_low);
2909+ uid = (uid_t)le16_to_cpu(raw_inode->i_uid_low);
2910+ gid = (gid_t)le16_to_cpu(raw_inode->i_gid_low);
2911 if(!(test_opt (inode->i_sb, NO_UID32))) {
2912- inode->i_uid |= le16_to_cpu(raw_inode->i_uid_high) << 16;
2913- inode->i_gid |= le16_to_cpu(raw_inode->i_gid_high) << 16;
2914+ uid |= le16_to_cpu(raw_inode->i_uid_high) << 16;
2915+ gid |= le16_to_cpu(raw_inode->i_gid_high) << 16;
2916 }
2917+ inode->i_uid = INOTAG_UID(DX_TAG(inode), uid, gid);
2918+ inode->i_gid = INOTAG_GID(DX_TAG(inode), uid, gid);
2919+ inode->i_tag = INOTAG_TAG(DX_TAG(inode), uid, gid,
2920+ le16_to_cpu(raw_inode->i_raw_tag));
2921+
2922 inode->i_nlink = le16_to_cpu(raw_inode->i_links_count);
2923 inode->i_size = le32_to_cpu(raw_inode->i_size);
2924 inode->i_atime.tv_sec = (signed)le32_to_cpu(raw_inode->i_atime);
2925@@ -2994,6 +3026,8 @@ static int ext3_do_update_inode(handle_t
2926 struct ext3_inode *raw_inode = ext3_raw_inode(iloc);
2927 struct ext3_inode_info *ei = EXT3_I(inode);
2928 struct buffer_head *bh = iloc->bh;
2929+ uid_t uid = TAGINO_UID(DX_TAG(inode), inode->i_uid, inode->i_tag);
2930+ gid_t gid = TAGINO_GID(DX_TAG(inode), inode->i_gid, inode->i_tag);
2931 int err = 0, rc, block;
2932
2933 again:
2934@@ -3008,29 +3042,32 @@ again:
2935 ext3_get_inode_flags(ei);
2936 raw_inode->i_mode = cpu_to_le16(inode->i_mode);
2937 if(!(test_opt(inode->i_sb, NO_UID32))) {
2938- raw_inode->i_uid_low = cpu_to_le16(low_16_bits(inode->i_uid));
2939- raw_inode->i_gid_low = cpu_to_le16(low_16_bits(inode->i_gid));
2940+ raw_inode->i_uid_low = cpu_to_le16(low_16_bits(uid));
2941+ raw_inode->i_gid_low = cpu_to_le16(low_16_bits(gid));
2942 /*
2943 * Fix up interoperability with old kernels. Otherwise, old inodes get
2944 * re-used with the upper 16 bits of the uid/gid intact
2945 */
2946 if(!ei->i_dtime) {
2947 raw_inode->i_uid_high =
2948- cpu_to_le16(high_16_bits(inode->i_uid));
2949+ cpu_to_le16(high_16_bits(uid));
2950 raw_inode->i_gid_high =
2951- cpu_to_le16(high_16_bits(inode->i_gid));
2952+ cpu_to_le16(high_16_bits(gid));
2953 } else {
2954 raw_inode->i_uid_high = 0;
2955 raw_inode->i_gid_high = 0;
2956 }
2957 } else {
2958 raw_inode->i_uid_low =
2959- cpu_to_le16(fs_high2lowuid(inode->i_uid));
2960+ cpu_to_le16(fs_high2lowuid(uid));
2961 raw_inode->i_gid_low =
2962- cpu_to_le16(fs_high2lowgid(inode->i_gid));
2963+ cpu_to_le16(fs_high2lowgid(gid));
2964 raw_inode->i_uid_high = 0;
2965 raw_inode->i_gid_high = 0;
2966 }
2967+#ifdef CONFIG_TAGGING_INTERN
2968+ raw_inode->i_raw_tag = cpu_to_le16(inode->i_tag);
2969+#endif
2970 raw_inode->i_links_count = cpu_to_le16(inode->i_nlink);
2971 raw_inode->i_size = cpu_to_le32(ei->i_disksize);
2972 raw_inode->i_atime = cpu_to_le32(inode->i_atime.tv_sec);
2973@@ -3190,7 +3227,8 @@ int ext3_setattr(struct dentry *dentry,
2974 if (is_quota_modification(inode, attr))
2975 dquot_initialize(inode);
2976 if ((ia_valid & ATTR_UID && attr->ia_uid != inode->i_uid) ||
2977- (ia_valid & ATTR_GID && attr->ia_gid != inode->i_gid)) {
2978+ (ia_valid & ATTR_GID && attr->ia_gid != inode->i_gid) ||
2979+ (ia_valid & ATTR_TAG && attr->ia_tag != inode->i_tag)) {
2980 handle_t *handle;
2981
2982 /* (user+group)*(old+new) structure, inode write (sb,
2983@@ -3212,6 +3250,8 @@ int ext3_setattr(struct dentry *dentry,
2984 inode->i_uid = attr->ia_uid;
2985 if (attr->ia_valid & ATTR_GID)
2986 inode->i_gid = attr->ia_gid;
2987+ if ((attr->ia_valid & ATTR_TAG) && IS_TAGGED(inode))
2988+ inode->i_tag = attr->ia_tag;
2989 error = ext3_mark_inode_dirty(handle, inode);
2990 ext3_journal_stop(handle);
2991 }
2992diff -NurpP --minimal linux-3.0.9/fs/ext3/ioctl.c linux-3.0.9-vs2.3.2.1/fs/ext3/ioctl.c
2993--- linux-3.0.9/fs/ext3/ioctl.c 2011-05-22 16:17:52.000000000 +0200
2994+++ linux-3.0.9-vs2.3.2.1/fs/ext3/ioctl.c 2011-06-10 22:11:24.000000000 +0200
2995@@ -8,6 +8,7 @@
2996 */
2997
2998 #include <linux/fs.h>
2999+#include <linux/mount.h>
3000 #include <linux/jbd.h>
3001 #include <linux/capability.h>
3002 #include <linux/ext3_fs.h>
3003@@ -17,6 +18,34 @@
3004 #include <linux/compat.h>
3005 #include <asm/uaccess.h>
3006
3007+
3008+int ext3_sync_flags(struct inode *inode, int flags, int vflags)
3009+{
3010+ handle_t *handle = NULL;
3011+ struct ext3_iloc iloc;
3012+ int err;
3013+
3014+ handle = ext3_journal_start(inode, 1);
3015+ if (IS_ERR(handle))
3016+ return PTR_ERR(handle);
3017+
3018+ if (IS_SYNC(inode))
3019+ handle->h_sync = 1;
3020+ err = ext3_reserve_inode_write(handle, inode, &iloc);
3021+ if (err)
3022+ goto flags_err;
3023+
3024+ inode->i_flags = flags;
3025+ inode->i_vflags = vflags;
3026+ ext3_get_inode_flags(EXT3_I(inode));
3027+ inode->i_ctime = CURRENT_TIME_SEC;
3028+
3029+ err = ext3_mark_iloc_dirty(handle, inode, &iloc);
3030+flags_err:
3031+ ext3_journal_stop(handle);
3032+ return err;
3033+}
3034+
3035 long ext3_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
3036 {
3037 struct inode *inode = filp->f_dentry->d_inode;
3038@@ -50,6 +79,11 @@ long ext3_ioctl(struct file *filp, unsig
3039
3040 flags = ext3_mask_flags(inode->i_mode, flags);
3041
3042+ if (IS_BARRIER(inode)) {
3043+ vxwprintk_task(1, "messing with the barrier.");
3044+ return -EACCES;
3045+ }
3046+
3047 mutex_lock(&inode->i_mutex);
3048
3049 /* Is it quota file? Do not allow user to mess with it */
3050@@ -68,7 +102,9 @@ long ext3_ioctl(struct file *filp, unsig
3051 *
3052 * This test looks nicer. Thanks to Pauline Middelink
3053 */
3054- if ((flags ^ oldflags) & (EXT3_APPEND_FL | EXT3_IMMUTABLE_FL)) {
3055+ if ((oldflags & EXT3_IMMUTABLE_FL) ||
3056+ ((flags ^ oldflags) & (EXT3_APPEND_FL |
3057+ EXT3_IMMUTABLE_FL | EXT3_IXUNLINK_FL))) {
3058 if (!capable(CAP_LINUX_IMMUTABLE))
3059 goto flags_out;
3060 }
3061@@ -93,7 +129,7 @@ long ext3_ioctl(struct file *filp, unsig
3062 if (err)
3063 goto flags_err;
3064
3065- flags = flags & EXT3_FL_USER_MODIFIABLE;
3066+ flags &= EXT3_FL_USER_MODIFIABLE;
3067 flags |= oldflags & ~EXT3_FL_USER_MODIFIABLE;
3068 ei->i_flags = flags;
3069
3070diff -NurpP --minimal linux-3.0.9/fs/ext3/namei.c linux-3.0.9-vs2.3.2.1/fs/ext3/namei.c
3071--- linux-3.0.9/fs/ext3/namei.c 2011-11-15 16:40:46.000000000 +0100
3072+++ linux-3.0.9-vs2.3.2.1/fs/ext3/namei.c 2011-08-29 03:45:09.000000000 +0200
3073@@ -36,6 +36,7 @@
3074 #include <linux/quotaops.h>
3075 #include <linux/buffer_head.h>
3076 #include <linux/bio.h>
3077+#include <linux/vs_tag.h>
3078
3079 #include "namei.h"
3080 #include "xattr.h"
3081@@ -923,6 +924,7 @@ restart:
3082 if (bh)
3083 ll_rw_block(READ_META, 1, &bh);
3084 }
3085+ dx_propagate_tag(nd, inode);
3086 }
3087 if ((bh = bh_use[ra_ptr++]) == NULL)
3088 goto next;
3089@@ -2536,6 +2538,7 @@ const struct inode_operations ext3_dir_i
3090 .removexattr = generic_removexattr,
3091 #endif
3092 .check_acl = ext3_check_acl,
3093+ .sync_flags = ext3_sync_flags,
3094 };
3095
3096 const struct inode_operations ext3_special_inode_operations = {
3097diff -NurpP --minimal linux-3.0.9/fs/ext3/super.c linux-3.0.9-vs2.3.2.1/fs/ext3/super.c
3098--- linux-3.0.9/fs/ext3/super.c 2011-07-22 11:18:05.000000000 +0200
3099+++ linux-3.0.9-vs2.3.2.1/fs/ext3/super.c 2011-06-10 22:11:24.000000000 +0200
3100@@ -821,7 +821,8 @@ enum {
3101 Opt_usrjquota, Opt_grpjquota, Opt_offusrjquota, Opt_offgrpjquota,
3102 Opt_jqfmt_vfsold, Opt_jqfmt_vfsv0, Opt_jqfmt_vfsv1, Opt_quota,
3103 Opt_noquota, Opt_ignore, Opt_barrier, Opt_nobarrier, Opt_err,
3104- Opt_resize, Opt_usrquota, Opt_grpquota
3105+ Opt_resize, Opt_usrquota, Opt_grpquota,
3106+ Opt_tag, Opt_notag, Opt_tagid
3107 };
3108
3109 static const match_table_t tokens = {
3110@@ -878,6 +879,9 @@ static const match_table_t tokens = {
3111 {Opt_barrier, "barrier"},
3112 {Opt_nobarrier, "nobarrier"},
3113 {Opt_resize, "resize"},
3114+ {Opt_tag, "tag"},
3115+ {Opt_notag, "notag"},
3116+ {Opt_tagid, "tagid=%u"},
3117 {Opt_err, NULL},
3118 };
3119
3120@@ -1030,6 +1034,20 @@ static int parse_options (char *options,
3121 case Opt_nouid32:
3122 set_opt (sbi->s_mount_opt, NO_UID32);
3123 break;
3124+#ifndef CONFIG_TAGGING_NONE
3125+ case Opt_tag:
3126+ set_opt (sbi->s_mount_opt, TAGGED);
3127+ break;
3128+ case Opt_notag:
3129+ clear_opt (sbi->s_mount_opt, TAGGED);
3130+ break;
3131+#endif
3132+#ifdef CONFIG_PROPAGATE
3133+ case Opt_tagid:
3134+ /* use args[0] */
3135+ set_opt (sbi->s_mount_opt, TAGGED);
3136+ break;
3137+#endif
3138 case Opt_nocheck:
3139 clear_opt (sbi->s_mount_opt, CHECK);
3140 break;
3141@@ -1724,6 +1742,9 @@ static int ext3_fill_super (struct super
3142 NULL, 0))
3143 goto failed_mount;
3144
3145+ if (EXT3_SB(sb)->s_mount_opt & EXT3_MOUNT_TAGGED)
3146+ sb->s_flags |= MS_TAGGED;
3147+
3148 sb->s_flags = (sb->s_flags & ~MS_POSIXACL) |
3149 (test_opt(sb, POSIX_ACL) ? MS_POSIXACL : 0);
3150
3151@@ -2604,6 +2625,14 @@ static int ext3_remount (struct super_bl
3152 if (test_opt(sb, ABORT))
3153 ext3_abort(sb, __func__, "Abort forced by user");
3154
3155+ if ((sbi->s_mount_opt & EXT3_MOUNT_TAGGED) &&
3156+ !(sb->s_flags & MS_TAGGED)) {
3157+ printk("EXT3-fs: %s: tagging not permitted on remount.\n",
3158+ sb->s_id);
3159+ err = -EINVAL;
3160+ goto restore_opts;
3161+ }
3162+
3163 sb->s_flags = (sb->s_flags & ~MS_POSIXACL) |
3164 (test_opt(sb, POSIX_ACL) ? MS_POSIXACL : 0);
3165
3166diff -NurpP --minimal linux-3.0.9/fs/ext4/ext4.h linux-3.0.9-vs2.3.2.1/fs/ext4/ext4.h
3167--- linux-3.0.9/fs/ext4/ext4.h 2011-11-15 16:40:46.000000000 +0100
3168+++ linux-3.0.9-vs2.3.2.1/fs/ext4/ext4.h 2011-11-15 17:37:06.000000000 +0100
3169@@ -351,8 +351,12 @@ struct flex_groups {
3170 #define EXT4_EXTENTS_FL 0x00080000 /* Inode uses extents */
3171 #define EXT4_EA_INODE_FL 0x00200000 /* Inode used for large EA */
3172 #define EXT4_EOFBLOCKS_FL 0x00400000 /* Blocks allocated beyond EOF */
3173+#define EXT4_IXUNLINK_FL 0x08000000 /* Immutable invert on unlink */
3174 #define EXT4_RESERVED_FL 0x80000000 /* reserved for ext4 lib */
3175
3176+#define EXT4_BARRIER_FL 0x04000000 /* Barrier for chroot() */
3177+#define EXT4_COW_FL 0x20000000 /* Copy on Write marker */
3178+
3179 #define EXT4_FL_USER_VISIBLE 0x004BDFFF /* User visible flags */
3180 #define EXT4_FL_USER_MODIFIABLE 0x004B80FF /* User modifiable flags */
3181
3182@@ -609,7 +613,8 @@ struct ext4_inode {
3183 __le16 l_i_file_acl_high;
3184 __le16 l_i_uid_high; /* these 2 fields */
3185 __le16 l_i_gid_high; /* were reserved2[0] */
3186- __u32 l_i_reserved2;
3187+ __le16 l_i_tag; /* Context Tag */
3188+ __u16 l_i_reserved2;
3189 } linux2;
3190 struct {
3191 __le16 h_i_reserved1; /* Obsoleted fragment number/size which are removed in ext4 */
3192@@ -727,6 +732,7 @@ do { \
3193 #define i_gid_low i_gid
3194 #define i_uid_high osd2.linux2.l_i_uid_high
3195 #define i_gid_high osd2.linux2.l_i_gid_high
3196+#define i_raw_tag osd2.linux2.l_i_tag
3197 #define i_reserved2 osd2.linux2.l_i_reserved2
3198
3199 #elif defined(__GNU__)
3200@@ -903,6 +909,7 @@ struct ext4_inode_info {
3201 #define EXT4_MOUNT_POSIX_ACL 0x08000 /* POSIX Access Control Lists */
3202 #define EXT4_MOUNT_NO_AUTO_DA_ALLOC 0x10000 /* No auto delalloc mapping */
3203 #define EXT4_MOUNT_BARRIER 0x20000 /* Use block barriers */
3204+#define EXT4_MOUNT_TAGGED 0x40000 /* Enable Context Tags */
3205 #define EXT4_MOUNT_QUOTA 0x80000 /* Some quota option set */
3206 #define EXT4_MOUNT_USRQUOTA 0x100000 /* "old" user quota */
3207 #define EXT4_MOUNT_GRPQUOTA 0x200000 /* "old" group quota */
3208@@ -2173,6 +2180,7 @@ extern int ext4_map_blocks(handle_t *han
3209 struct ext4_map_blocks *map, int flags);
3210 extern int ext4_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
3211 __u64 start, __u64 len);
3212+extern int ext4_sync_flags(struct inode *, int, int);
3213 /* move_extent.c */
3214 extern int ext4_move_extents(struct file *o_filp, struct file *d_filp,
3215 __u64 start_orig, __u64 start_donor,
3216diff -NurpP --minimal linux-3.0.9/fs/ext4/file.c linux-3.0.9-vs2.3.2.1/fs/ext4/file.c
3217--- linux-3.0.9/fs/ext4/file.c 2011-07-22 11:18:05.000000000 +0200
3218+++ linux-3.0.9-vs2.3.2.1/fs/ext4/file.c 2011-06-10 22:11:24.000000000 +0200
3219@@ -282,5 +282,6 @@ const struct inode_operations ext4_file_
3220 #endif
3221 .check_acl = ext4_check_acl,
3222 .fiemap = ext4_fiemap,
3223+ .sync_flags = ext4_sync_flags,
3224 };
3225
3226diff -NurpP --minimal linux-3.0.9/fs/ext4/ialloc.c linux-3.0.9-vs2.3.2.1/fs/ext4/ialloc.c
3227--- linux-3.0.9/fs/ext4/ialloc.c 2011-05-22 16:17:52.000000000 +0200
3228+++ linux-3.0.9-vs2.3.2.1/fs/ext4/ialloc.c 2011-06-10 22:11:24.000000000 +0200
3229@@ -22,6 +22,7 @@
3230 #include <linux/random.h>
3231 #include <linux/bitops.h>
3232 #include <linux/blkdev.h>
3233+#include <linux/vs_tag.h>
3234 #include <asm/byteorder.h>
3235
3236 #include "ext4.h"
3237@@ -992,6 +993,7 @@ got:
3238 inode->i_mode = mode;
3239 inode->i_uid = current_fsuid();
3240 inode->i_gid = dir->i_gid;
3241+ inode->i_tag = dx_current_fstag(sb);
3242 } else
3243 inode_init_owner(inode, dir, mode);
3244
3245diff -NurpP --minimal linux-3.0.9/fs/ext4/inode.c linux-3.0.9-vs2.3.2.1/fs/ext4/inode.c
3246--- linux-3.0.9/fs/ext4/inode.c 2011-11-15 16:40:46.000000000 +0100
3247+++ linux-3.0.9-vs2.3.2.1/fs/ext4/inode.c 2011-11-15 17:37:06.000000000 +0100
3248@@ -42,6 +42,7 @@
3249 #include <linux/printk.h>
3250 #include <linux/slab.h>
3251 #include <linux/ratelimit.h>
3252+#include <linux/vs_tag.h>
3253
3254 #include "ext4_jbd2.h"
3255 #include "xattr.h"
3256@@ -4816,41 +4817,64 @@ void ext4_set_inode_flags(struct inode *
3257 {
3258 unsigned int flags = EXT4_I(inode)->i_flags;
3259
3260- inode->i_flags &= ~(S_SYNC|S_APPEND|S_IMMUTABLE|S_NOATIME|S_DIRSYNC);
3261+ inode->i_flags &= ~(S_IMMUTABLE | S_IXUNLINK |
3262+ S_SYNC | S_APPEND | S_NOATIME | S_DIRSYNC);
3263+
3264+ if (flags & EXT4_IMMUTABLE_FL)
3265+ inode->i_flags |= S_IMMUTABLE;
3266+ if (flags & EXT4_IXUNLINK_FL)
3267+ inode->i_flags |= S_IXUNLINK;
3268+
3269 if (flags & EXT4_SYNC_FL)
3270 inode->i_flags |= S_SYNC;
3271 if (flags & EXT4_APPEND_FL)
3272 inode->i_flags |= S_APPEND;
3273- if (flags & EXT4_IMMUTABLE_FL)
3274- inode->i_flags |= S_IMMUTABLE;
3275 if (flags & EXT4_NOATIME_FL)
3276 inode->i_flags |= S_NOATIME;
3277 if (flags & EXT4_DIRSYNC_FL)
3278 inode->i_flags |= S_DIRSYNC;
3279+
3280+ inode->i_vflags &= ~(V_BARRIER | V_COW);
3281+
3282+ if (flags & EXT4_BARRIER_FL)
3283+ inode->i_vflags |= V_BARRIER;
3284+ if (flags & EXT4_COW_FL)
3285+ inode->i_vflags |= V_COW;
3286 }
3287
3288 /* Propagate flags from i_flags to EXT4_I(inode)->i_flags */
3289 void ext4_get_inode_flags(struct ext4_inode_info *ei)
3290 {
3291- unsigned int vfs_fl;
3292+ unsigned int vfs_fl, vfs_vf;
3293 unsigned long old_fl, new_fl;
3294
3295 do {
3296 vfs_fl = ei->vfs_inode.i_flags;
3297+ vfs_vf = ei->vfs_inode.i_vflags;
3298 old_fl = ei->i_flags;
3299 new_fl = old_fl & ~(EXT4_SYNC_FL|EXT4_APPEND_FL|
3300 EXT4_IMMUTABLE_FL|EXT4_NOATIME_FL|
3301- EXT4_DIRSYNC_FL);
3302+ EXT4_DIRSYNC_FL|EXT4_BARRIER_FL|
3303+ EXT4_COW_FL);
3304+
3305+ if (vfs_fl & S_IMMUTABLE)
3306+ new_fl |= EXT4_IMMUTABLE_FL;
3307+ if (vfs_fl & S_IXUNLINK)
3308+ new_fl |= EXT4_IXUNLINK_FL;
3309+
3310 if (vfs_fl & S_SYNC)
3311 new_fl |= EXT4_SYNC_FL;
3312 if (vfs_fl & S_APPEND)
3313 new_fl |= EXT4_APPEND_FL;
3314- if (vfs_fl & S_IMMUTABLE)
3315- new_fl |= EXT4_IMMUTABLE_FL;
3316 if (vfs_fl & S_NOATIME)
3317 new_fl |= EXT4_NOATIME_FL;
3318 if (vfs_fl & S_DIRSYNC)
3319 new_fl |= EXT4_DIRSYNC_FL;
3320+
3321+ if (vfs_vf & V_BARRIER)
3322+ new_fl |= EXT4_BARRIER_FL;
3323+ if (vfs_vf & V_COW)
3324+ new_fl |= EXT4_COW_FL;
3325 } while (cmpxchg(&ei->i_flags, old_fl, new_fl) != old_fl);
3326 }
3327
3328@@ -4886,6 +4910,8 @@ struct inode *ext4_iget(struct super_blo
3329 journal_t *journal = EXT4_SB(sb)->s_journal;
3330 long ret;
3331 int block;
3332+ uid_t uid;
3333+ gid_t gid;
3334
3335 inode = iget_locked(sb, ino);
3336 if (!inode)
3337@@ -4901,12 +4927,16 @@ struct inode *ext4_iget(struct super_blo
3338 goto bad_inode;
3339 raw_inode = ext4_raw_inode(&iloc);
3340 inode->i_mode = le16_to_cpu(raw_inode->i_mode);
3341- inode->i_uid = (uid_t)le16_to_cpu(raw_inode->i_uid_low);
3342- inode->i_gid = (gid_t)le16_to_cpu(raw_inode->i_gid_low);
3343+ uid = (uid_t)le16_to_cpu(raw_inode->i_uid_low);
3344+ gid = (gid_t)le16_to_cpu(raw_inode->i_gid_low);
3345 if (!(test_opt(inode->i_sb, NO_UID32))) {
3346- inode->i_uid |= le16_to_cpu(raw_inode->i_uid_high) << 16;
3347- inode->i_gid |= le16_to_cpu(raw_inode->i_gid_high) << 16;
3348+ uid |= le16_to_cpu(raw_inode->i_uid_high) << 16;
3349+ gid |= le16_to_cpu(raw_inode->i_gid_high) << 16;
3350 }
3351+ inode->i_uid = INOTAG_UID(DX_TAG(inode), uid, gid);
3352+ inode->i_gid = INOTAG_GID(DX_TAG(inode), uid, gid);
3353+ inode->i_tag = INOTAG_TAG(DX_TAG(inode), uid, gid,
3354+ le16_to_cpu(raw_inode->i_raw_tag));
3355 inode->i_nlink = le16_to_cpu(raw_inode->i_links_count);
3356
3357 ext4_clear_state_flags(ei); /* Only relevant on 32-bit archs */
3358@@ -5125,6 +5155,8 @@ static int ext4_do_update_inode(handle_t
3359 struct ext4_inode *raw_inode = ext4_raw_inode(iloc);
3360 struct ext4_inode_info *ei = EXT4_I(inode);
3361 struct buffer_head *bh = iloc->bh;
3362+ uid_t uid = TAGINO_UID(DX_TAG(inode), inode->i_uid, inode->i_tag);
3363+ gid_t gid = TAGINO_GID(DX_TAG(inode), inode->i_gid, inode->i_tag);
3364 int err = 0, rc, block;
3365
3366 /* For fields not not tracking in the in-memory inode,
3367@@ -5135,29 +5167,32 @@ static int ext4_do_update_inode(handle_t
3368 ext4_get_inode_flags(ei);
3369 raw_inode->i_mode = cpu_to_le16(inode->i_mode);
3370 if (!(test_opt(inode->i_sb, NO_UID32))) {
3371- raw_inode->i_uid_low = cpu_to_le16(low_16_bits(inode->i_uid));
3372- raw_inode->i_gid_low = cpu_to_le16(low_16_bits(inode->i_gid));
3373+ raw_inode->i_uid_low = cpu_to_le16(low_16_bits(uid));
3374+ raw_inode->i_gid_low = cpu_to_le16(low_16_bits(gid));
3375 /*
3376 * Fix up interoperability with old kernels. Otherwise, old inodes get
3377 * re-used with the upper 16 bits of the uid/gid intact
3378 */
3379 if (!ei->i_dtime) {
3380 raw_inode->i_uid_high =
3381- cpu_to_le16(high_16_bits(inode->i_uid));
3382+ cpu_to_le16(high_16_bits(uid));
3383 raw_inode->i_gid_high =
3384- cpu_to_le16(high_16_bits(inode->i_gid));
3385+ cpu_to_le16(high_16_bits(gid));
3386 } else {
3387 raw_inode->i_uid_high = 0;
3388 raw_inode->i_gid_high = 0;
3389 }
3390 } else {
3391 raw_inode->i_uid_low =
3392- cpu_to_le16(fs_high2lowuid(inode->i_uid));
3393+ cpu_to_le16(fs_high2lowuid(uid));
3394 raw_inode->i_gid_low =
3395- cpu_to_le16(fs_high2lowgid(inode->i_gid));
3396+ cpu_to_le16(fs_high2lowgid(gid));
3397 raw_inode->i_uid_high = 0;
3398 raw_inode->i_gid_high = 0;
3399 }
3400+#ifdef CONFIG_TAGGING_INTERN
3401+ raw_inode->i_raw_tag = cpu_to_le16(inode->i_tag);
3402+#endif
3403 raw_inode->i_links_count = cpu_to_le16(inode->i_nlink);
3404
3405 EXT4_INODE_SET_XTIME(i_ctime, inode, raw_inode);
3406@@ -5343,7 +5378,8 @@ int ext4_setattr(struct dentry *dentry,
3407 if (is_quota_modification(inode, attr))
3408 dquot_initialize(inode);
3409 if ((ia_valid & ATTR_UID && attr->ia_uid != inode->i_uid) ||
3410- (ia_valid & ATTR_GID && attr->ia_gid != inode->i_gid)) {
3411+ (ia_valid & ATTR_GID && attr->ia_gid != inode->i_gid) ||
3412+ (ia_valid & ATTR_TAG && attr->ia_tag != inode->i_tag)) {
3413 handle_t *handle;
3414
3415 /* (user+group)*(old+new) structure, inode write (sb,
3416@@ -5365,6 +5401,8 @@ int ext4_setattr(struct dentry *dentry,
3417 inode->i_uid = attr->ia_uid;
3418 if (attr->ia_valid & ATTR_GID)
3419 inode->i_gid = attr->ia_gid;
3420+ if ((attr->ia_valid & ATTR_TAG) && IS_TAGGED(inode))
3421+ inode->i_tag = attr->ia_tag;
3422 error = ext4_mark_inode_dirty(handle, inode);
3423 ext4_journal_stop(handle);
3424 }
3425diff -NurpP --minimal linux-3.0.9/fs/ext4/ioctl.c linux-3.0.9-vs2.3.2.1/fs/ext4/ioctl.c
3426--- linux-3.0.9/fs/ext4/ioctl.c 2011-05-22 16:17:52.000000000 +0200
3427+++ linux-3.0.9-vs2.3.2.1/fs/ext4/ioctl.c 2011-06-10 22:11:24.000000000 +0200
3428@@ -14,10 +14,39 @@
3429 #include <linux/compat.h>
3430 #include <linux/mount.h>
3431 #include <linux/file.h>
3432+#include <linux/vs_tag.h>
3433 #include <asm/uaccess.h>
3434 #include "ext4_jbd2.h"
3435 #include "ext4.h"
3436
3437+
3438+int ext4_sync_flags(struct inode *inode, int flags, int vflags)
3439+{
3440+ handle_t *handle = NULL;
3441+ struct ext4_iloc iloc;
3442+ int err;
3443+
3444+ handle = ext4_journal_start(inode, 1);
3445+ if (IS_ERR(handle))
3446+ return PTR_ERR(handle);
3447+
3448+ if (IS_SYNC(inode))
3449+ ext4_handle_sync(handle);
3450+ err = ext4_reserve_inode_write(handle, inode, &iloc);
3451+ if (err)
3452+ goto flags_err;
3453+
3454+ inode->i_flags = flags;
3455+ inode->i_vflags = vflags;
3456+ ext4_get_inode_flags(EXT4_I(inode));
3457+ inode->i_ctime = ext4_current_time(inode);
3458+
3459+ err = ext4_mark_iloc_dirty(handle, inode, &iloc);
3460+flags_err:
3461+ ext4_journal_stop(handle);
3462+ return err;
3463+}
3464+
3465 long ext4_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
3466 {
3467 struct inode *inode = filp->f_dentry->d_inode;
3468@@ -50,6 +79,11 @@ long ext4_ioctl(struct file *filp, unsig
3469
3470 flags = ext4_mask_flags(inode->i_mode, flags);
3471
3472+ if (IS_BARRIER(inode)) {
3473+ vxwprintk_task(1, "messing with the barrier.");
3474+ return -EACCES;
3475+ }
3476+
3477 err = -EPERM;
3478 mutex_lock(&inode->i_mutex);
3479 /* Is it quota file? Do not allow user to mess with it */
3480@@ -67,7 +101,9 @@ long ext4_ioctl(struct file *filp, unsig
3481 *
3482 * This test looks nicer. Thanks to Pauline Middelink
3483 */
3484- if ((flags ^ oldflags) & (EXT4_APPEND_FL | EXT4_IMMUTABLE_FL)) {
3485+ if ((oldflags & EXT4_IMMUTABLE_FL) ||
3486+ ((flags ^ oldflags) & (EXT4_APPEND_FL |
3487+ EXT4_IMMUTABLE_FL | EXT4_IXUNLINK_FL))) {
3488 if (!capable(CAP_LINUX_IMMUTABLE))
3489 goto flags_out;
3490 }
3491diff -NurpP --minimal linux-3.0.9/fs/ext4/namei.c linux-3.0.9-vs2.3.2.1/fs/ext4/namei.c
3492--- linux-3.0.9/fs/ext4/namei.c 2011-11-15 16:40:46.000000000 +0100
3493+++ linux-3.0.9-vs2.3.2.1/fs/ext4/namei.c 2011-11-15 17:37:06.000000000 +0100
3494@@ -34,6 +34,7 @@
3495 #include <linux/quotaops.h>
3496 #include <linux/buffer_head.h>
3497 #include <linux/bio.h>
3498+#include <linux/vs_tag.h>
3499 #include "ext4.h"
3500 #include "ext4_jbd2.h"
3501
3502@@ -924,6 +925,7 @@ restart:
3503 if (bh)
3504 ll_rw_block(READ_META, 1, &bh);
3505 }
3506+ dx_propagate_tag(nd, inode);
3507 }
3508 if ((bh = bh_use[ra_ptr++]) == NULL)
3509 goto next;
3510@@ -2598,6 +2600,7 @@ const struct inode_operations ext4_dir_i
3511 #endif
3512 .check_acl = ext4_check_acl,
3513 .fiemap = ext4_fiemap,
3514+ .sync_flags = ext4_sync_flags,
3515 };
3516
3517 const struct inode_operations ext4_special_inode_operations = {
3518diff -NurpP --minimal linux-3.0.9/fs/ext4/super.c linux-3.0.9-vs2.3.2.1/fs/ext4/super.c
3519--- linux-3.0.9/fs/ext4/super.c 2011-11-15 16:40:46.000000000 +0100
3520+++ linux-3.0.9-vs2.3.2.1/fs/ext4/super.c 2011-08-31 19:37:44.000000000 +0200
3521@@ -1293,6 +1293,7 @@ enum {
3522 Opt_inode_readahead_blks, Opt_journal_ioprio,
3523 Opt_dioread_nolock, Opt_dioread_lock,
3524 Opt_discard, Opt_nodiscard, Opt_init_itable, Opt_noinit_itable,
3525+ Opt_tag, Opt_notag, Opt_tagid
3526 };
3527
3528 static const match_table_t tokens = {
3529@@ -1368,6 +1369,9 @@ static const match_table_t tokens = {
3530 {Opt_init_itable, "init_itable=%u"},
3531 {Opt_init_itable, "init_itable"},
3532 {Opt_noinit_itable, "noinit_itable"},
3533+ {Opt_tag, "tag"},
3534+ {Opt_notag, "notag"},
3535+ {Opt_tagid, "tagid=%u"},
3536 {Opt_err, NULL},
3537 };
3538
3539@@ -1536,6 +1540,20 @@ static int parse_options(char *options,
3540 case Opt_nouid32:
3541 set_opt(sb, NO_UID32);
3542 break;
3543+#ifndef CONFIG_TAGGING_NONE
3544+ case Opt_tag:
3545+ set_opt(sb, TAGGED);
3546+ break;
3547+ case Opt_notag:
3548+ clear_opt(sb, TAGGED);
3549+ break;
3550+#endif
3551+#ifdef CONFIG_PROPAGATE
3552+ case Opt_tagid:
3553+ /* use args[0] */
3554+ set_opt(sb, TAGGED);
3555+ break;
3556+#endif
3557 case Opt_debug:
3558 set_opt(sb, DEBUG);
3559 break;
3560@@ -3193,6 +3211,9 @@ static int ext4_fill_super(struct super_
3561 &journal_ioprio, NULL, 0))
3562 goto failed_mount;
3563
3564+ if (EXT4_SB(sb)->s_mount_opt & EXT4_MOUNT_TAGGED)
3565+ sb->s_flags |= MS_TAGGED;
3566+
3567 sb->s_flags = (sb->s_flags & ~MS_POSIXACL) |
3568 (test_opt(sb, POSIX_ACL) ? MS_POSIXACL : 0);
3569
3570@@ -4323,6 +4344,14 @@ static int ext4_remount(struct super_blo
3571 if (sbi->s_mount_flags & EXT4_MF_FS_ABORTED)
3572 ext4_abort(sb, "Abort forced by user");
3573
3574+ if ((sbi->s_mount_opt & EXT4_MOUNT_TAGGED) &&
3575+ !(sb->s_flags & MS_TAGGED)) {
3576+ printk("EXT4-fs: %s: tagging not permitted on remount.\n",
3577+ sb->s_id);
3578+ err = -EINVAL;
3579+ goto restore_opts;
3580+ }
3581+
3582 sb->s_flags = (sb->s_flags & ~MS_POSIXACL) |
3583 (test_opt(sb, POSIX_ACL) ? MS_POSIXACL : 0);
3584
3585diff -NurpP --minimal linux-3.0.9/fs/fcntl.c linux-3.0.9-vs2.3.2.1/fs/fcntl.c
3586--- linux-3.0.9/fs/fcntl.c 2011-05-22 16:17:52.000000000 +0200
3587+++ linux-3.0.9-vs2.3.2.1/fs/fcntl.c 2011-06-10 22:11:24.000000000 +0200
3588@@ -20,6 +20,7 @@
3589 #include <linux/signal.h>
3590 #include <linux/rcupdate.h>
3591 #include <linux/pid_namespace.h>
3592+#include <linux/vs_limit.h>
3593
3594 #include <asm/poll.h>
3595 #include <asm/siginfo.h>
3596@@ -103,6 +104,8 @@ SYSCALL_DEFINE3(dup3, unsigned int, oldf
3597
3598 if (tofree)
3599 filp_close(tofree, files);
3600+ else
3601+ vx_openfd_inc(newfd); /* fd was unused */
3602
3603 return newfd;
3604
3605@@ -447,6 +450,8 @@ SYSCALL_DEFINE3(fcntl, unsigned int, fd,
3606 filp = fget_raw(fd);
3607 if (!filp)
3608 goto out;
3609+ if (!vx_files_avail(1))
3610+ goto out;
3611
3612 if (unlikely(filp->f_mode & FMODE_PATH)) {
3613 if (!check_fcntl_cmd(cmd)) {
3614diff -NurpP --minimal linux-3.0.9/fs/file.c linux-3.0.9-vs2.3.2.1/fs/file.c
3615--- linux-3.0.9/fs/file.c 2011-05-22 16:17:52.000000000 +0200
3616+++ linux-3.0.9-vs2.3.2.1/fs/file.c 2011-06-10 22:11:24.000000000 +0200
3617@@ -21,6 +21,7 @@
3618 #include <linux/spinlock.h>
3619 #include <linux/rcupdate.h>
3620 #include <linux/workqueue.h>
3621+#include <linux/vs_limit.h>
3622
3623 struct fdtable_defer {
3624 spinlock_t lock;
3625@@ -359,6 +360,8 @@ struct files_struct *dup_fd(struct files
3626 struct file *f = *old_fds++;
3627 if (f) {
3628 get_file(f);
3629+ /* TODO: sum it first for check and performance */
3630+ vx_openfd_inc(open_files - i);
3631 } else {
3632 /*
3633 * The fd may be claimed in the fd bitmap but not yet
3634@@ -466,6 +469,7 @@ repeat:
3635 else
3636 FD_CLR(fd, fdt->close_on_exec);
3637 error = fd;
3638+ vx_openfd_inc(fd);
3639 #if 1
3640 /* Sanity check */
3641 if (rcu_dereference_raw(fdt->fd[fd]) != NULL) {
3642diff -NurpP --minimal linux-3.0.9/fs/file_table.c linux-3.0.9-vs2.3.2.1/fs/file_table.c
3643--- linux-3.0.9/fs/file_table.c 2011-05-22 16:17:52.000000000 +0200
3644+++ linux-3.0.9-vs2.3.2.1/fs/file_table.c 2011-06-10 22:11:24.000000000 +0200
3645@@ -24,6 +24,8 @@
3646 #include <linux/percpu_counter.h>
3647 #include <linux/percpu.h>
3648 #include <linux/ima.h>
3649+#include <linux/vs_limit.h>
3650+#include <linux/vs_context.h>
3651
3652 #include <asm/atomic.h>
3653
3654@@ -135,6 +137,8 @@ struct file *get_empty_filp(void)
3655 spin_lock_init(&f->f_lock);
3656 eventpoll_init_file(f);
3657 /* f->f_version: 0 */
3658+ f->f_xid = vx_current_xid();
3659+ vx_files_inc(f);
3660 return f;
3661
3662 over:
3663@@ -253,6 +257,8 @@ static void __fput(struct file *file)
3664 }
3665 fops_put(file->f_op);
3666 put_pid(file->f_owner.pid);
3667+ vx_files_dec(file);
3668+ file->f_xid = 0;
3669 file_sb_list_del(file);
3670 if ((file->f_mode & (FMODE_READ | FMODE_WRITE)) == FMODE_READ)
3671 i_readcount_dec(inode);
3672@@ -383,6 +389,8 @@ void put_filp(struct file *file)
3673 {
3674 if (atomic_long_dec_and_test(&file->f_count)) {
3675 security_file_free(file);
3676+ vx_files_dec(file);
3677+ file->f_xid = 0;
3678 file_sb_list_del(file);
3679 file_free(file);
3680 }
3681diff -NurpP --minimal linux-3.0.9/fs/fs_struct.c linux-3.0.9-vs2.3.2.1/fs/fs_struct.c
3682--- linux-3.0.9/fs/fs_struct.c 2011-03-15 18:07:31.000000000 +0100
3683+++ linux-3.0.9-vs2.3.2.1/fs/fs_struct.c 2011-06-10 22:11:24.000000000 +0200
3684@@ -4,6 +4,7 @@
3685 #include <linux/path.h>
3686 #include <linux/slab.h>
3687 #include <linux/fs_struct.h>
3688+#include <linux/vserver/global.h>
3689 #include "internal.h"
3690
3691 static inline void path_get_longterm(struct path *path)
3692@@ -96,6 +97,7 @@ void free_fs_struct(struct fs_struct *fs
3693 {
3694 path_put_longterm(&fs->root);
3695 path_put_longterm(&fs->pwd);
3696+ atomic_dec(&vs_global_fs);
3697 kmem_cache_free(fs_cachep, fs);
3698 }
3699
3700@@ -135,6 +137,7 @@ struct fs_struct *copy_fs_struct(struct
3701 fs->pwd = old->pwd;
3702 path_get_longterm(&fs->pwd);
3703 spin_unlock(&old->lock);
3704+ atomic_inc(&vs_global_fs);
3705 }
3706 return fs;
3707 }
3708diff -NurpP --minimal linux-3.0.9/fs/gfs2/file.c linux-3.0.9-vs2.3.2.1/fs/gfs2/file.c
3709--- linux-3.0.9/fs/gfs2/file.c 2011-07-22 11:18:05.000000000 +0200
3710+++ linux-3.0.9-vs2.3.2.1/fs/gfs2/file.c 2011-06-10 22:11:24.000000000 +0200
3711@@ -134,6 +134,9 @@ static const u32 fsflags_to_gfs2[32] = {
3712 [7] = GFS2_DIF_NOATIME,
3713 [12] = GFS2_DIF_EXHASH,
3714 [14] = GFS2_DIF_INHERIT_JDATA,
3715+ [27] = GFS2_DIF_IXUNLINK,
3716+ [26] = GFS2_DIF_BARRIER,
3717+ [29] = GFS2_DIF_COW,
3718 };
3719
3720 static const u32 gfs2_to_fsflags[32] = {
3721@@ -143,6 +146,9 @@ static const u32 gfs2_to_fsflags[32] = {
3722 [gfs2fl_NoAtime] = FS_NOATIME_FL,
3723 [gfs2fl_ExHash] = FS_INDEX_FL,
3724 [gfs2fl_InheritJdata] = FS_JOURNAL_DATA_FL,
3725+ [gfs2fl_IXUnlink] = FS_IXUNLINK_FL,
3726+ [gfs2fl_Barrier] = FS_BARRIER_FL,
3727+ [gfs2fl_Cow] = FS_COW_FL,
3728 };
3729
3730 static int gfs2_get_flags(struct file *filp, u32 __user *ptr)
3731@@ -173,10 +179,16 @@ void gfs2_set_inode_flags(struct inode *
3732 {
3733 struct gfs2_inode *ip = GFS2_I(inode);
3734 unsigned int flags = inode->i_flags;
3735+ unsigned int vflags = inode->i_vflags;
3736+
3737+ flags &= ~(S_IMMUTABLE | S_IXUNLINK |
3738+ S_SYNC | S_APPEND | S_NOATIME | S_DIRSYNC);
3739
3740- flags &= ~(S_SYNC|S_APPEND|S_IMMUTABLE|S_NOATIME|S_DIRSYNC);
3741 if (ip->i_diskflags & GFS2_DIF_IMMUTABLE)
3742 flags |= S_IMMUTABLE;
3743+ if (ip->i_diskflags & GFS2_DIF_IXUNLINK)
3744+ flags |= S_IXUNLINK;
3745+
3746 if (ip->i_diskflags & GFS2_DIF_APPENDONLY)
3747 flags |= S_APPEND;
3748 if (ip->i_diskflags & GFS2_DIF_NOATIME)
3749@@ -184,6 +196,43 @@ void gfs2_set_inode_flags(struct inode *
3750 if (ip->i_diskflags & GFS2_DIF_SYNC)
3751 flags |= S_SYNC;
3752 inode->i_flags = flags;
3753+
3754+ vflags &= ~(V_BARRIER | V_COW);
3755+
3756+ if (ip->i_diskflags & GFS2_DIF_BARRIER)
3757+ vflags |= V_BARRIER;
3758+ if (ip->i_diskflags & GFS2_DIF_COW)
3759+ vflags |= V_COW;
3760+ inode->i_vflags = vflags;
3761+}
3762+
3763+void gfs2_get_inode_flags(struct inode *inode)
3764+{
3765+ struct gfs2_inode *ip = GFS2_I(inode);
3766+ unsigned int flags = inode->i_flags;
3767+ unsigned int vflags = inode->i_vflags;
3768+
3769+ ip->i_diskflags &= ~(GFS2_DIF_APPENDONLY |
3770+ GFS2_DIF_NOATIME | GFS2_DIF_SYNC |
3771+ GFS2_DIF_IMMUTABLE | GFS2_DIF_IXUNLINK |
3772+ GFS2_DIF_BARRIER | GFS2_DIF_COW);
3773+
3774+ if (flags & S_IMMUTABLE)
3775+ ip->i_diskflags |= GFS2_DIF_IMMUTABLE;
3776+ if (flags & S_IXUNLINK)
3777+ ip->i_diskflags |= GFS2_DIF_IXUNLINK;
3778+
3779+ if (flags & S_APPEND)
3780+ ip->i_diskflags |= GFS2_DIF_APPENDONLY;
3781+ if (flags & S_NOATIME)
3782+ ip->i_diskflags |= GFS2_DIF_NOATIME;
3783+ if (flags & S_SYNC)
3784+ ip->i_diskflags |= GFS2_DIF_SYNC;
3785+
3786+ if (vflags & V_BARRIER)
3787+ ip->i_diskflags |= GFS2_DIF_BARRIER;
3788+ if (vflags & V_COW)
3789+ ip->i_diskflags |= GFS2_DIF_COW;
3790 }
3791
3792 /* Flags that can be set by user space */
3793@@ -295,6 +344,37 @@ static int gfs2_set_flags(struct file *f
3794 return do_gfs2_set_flags(filp, gfsflags, ~GFS2_DIF_JDATA);
3795 }
3796
3797+int gfs2_sync_flags(struct inode *inode, int flags, int vflags)
3798+{
3799+ struct gfs2_inode *ip = GFS2_I(inode);
3800+ struct gfs2_sbd *sdp = GFS2_SB(inode);
3801+ struct buffer_head *bh;
3802+ struct gfs2_holder gh;
3803+ int error;
3804+
3805+ error = gfs2_glock_nq_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &gh);
3806+ if (error)
3807+ return error;
3808+ error = gfs2_trans_begin(sdp, RES_DINODE, 0);
3809+ if (error)
3810+ goto out;
3811+ error = gfs2_meta_inode_buffer(ip, &bh);
3812+ if (error)
3813+ goto out_trans_end;
3814+ gfs2_trans_add_bh(ip->i_gl, bh, 1);
3815+ inode->i_flags = flags;
3816+ inode->i_vflags = vflags;
3817+ gfs2_get_inode_flags(inode);
3818+ gfs2_dinode_out(ip, bh->b_data);
3819+ brelse(bh);
3820+ gfs2_set_aops(inode);
3821+out_trans_end:
3822+ gfs2_trans_end(sdp);
3823+out:
3824+ gfs2_glock_dq_uninit(&gh);
3825+ return error;
3826+}
3827+
3828 static long gfs2_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
3829 {
3830 switch(cmd) {
3831diff -NurpP --minimal linux-3.0.9/fs/gfs2/inode.h linux-3.0.9-vs2.3.2.1/fs/gfs2/inode.h
3832--- linux-3.0.9/fs/gfs2/inode.h 2011-07-22 11:18:05.000000000 +0200
3833+++ linux-3.0.9-vs2.3.2.1/fs/gfs2/inode.h 2011-06-10 22:11:24.000000000 +0200
3834@@ -120,6 +120,7 @@ extern const struct file_operations gfs2
3835 extern const struct file_operations gfs2_dir_fops_nolock;
3836
3837 extern void gfs2_set_inode_flags(struct inode *inode);
3838+extern int gfs2_sync_flags(struct inode *inode, int flags, int vflags);
3839
3840 #ifdef CONFIG_GFS2_FS_LOCKING_DLM
3841 extern const struct file_operations gfs2_file_fops;
3842diff -NurpP --minimal linux-3.0.9/fs/inode.c linux-3.0.9-vs2.3.2.1/fs/inode.c
3843--- linux-3.0.9/fs/inode.c 2011-07-22 11:18:05.000000000 +0200
3844+++ linux-3.0.9-vs2.3.2.1/fs/inode.c 2011-07-01 11:35:34.000000000 +0200
3845@@ -26,6 +26,7 @@
3846 #include <linux/ima.h>
3847 #include <linux/cred.h>
3848 #include <linux/buffer_head.h> /* for inode_has_buffers */
3849+#include <linux/vs_tag.h>
3850 #include "internal.h"
3851
3852 /*
3853@@ -146,6 +147,9 @@ int inode_init_always(struct super_block
3854 struct address_space *const mapping = &inode->i_data;
3855
3856 inode->i_sb = sb;
3857+
3858+ /* essential because of inode slab reuse */
3859+ inode->i_tag = 0;
3860 inode->i_blkbits = sb->s_blocksize_bits;
3861 inode->i_flags = 0;
3862 atomic_set(&inode->i_count, 1);
3863@@ -166,6 +170,7 @@ int inode_init_always(struct super_block
3864 inode->i_bdev = NULL;
3865 inode->i_cdev = NULL;
3866 inode->i_rdev = 0;
3867+ inode->i_mdev = 0;
3868 inode->dirtied_when = 0;
3869
3870 if (security_inode_alloc(inode))
3871@@ -404,6 +409,8 @@ void __insert_inode_hash(struct inode *i
3872 }
3873 EXPORT_SYMBOL(__insert_inode_hash);
3874
3875+EXPORT_SYMBOL_GPL(__iget);
3876+
3877 /**
3878 * remove_inode_hash - remove an inode from the hash
3879 * @inode: inode to unhash
3880@@ -1643,9 +1650,11 @@ void init_special_inode(struct inode *in
3881 if (S_ISCHR(mode)) {
3882 inode->i_fop = &def_chr_fops;
3883 inode->i_rdev = rdev;
3884+ inode->i_mdev = rdev;
3885 } else if (S_ISBLK(mode)) {
3886 inode->i_fop = &def_blk_fops;
3887 inode->i_rdev = rdev;
3888+ inode->i_mdev = rdev;
3889 } else if (S_ISFIFO(mode))
3890 inode->i_fop = &def_fifo_fops;
3891 else if (S_ISSOCK(mode))
3892@@ -1674,6 +1683,7 @@ void inode_init_owner(struct inode *inod
3893 } else
3894 inode->i_gid = current_fsgid();
3895 inode->i_mode = mode;
3896+ inode->i_tag = dx_current_fstag(inode->i_sb);
3897 }
3898 EXPORT_SYMBOL(inode_init_owner);
3899
3900diff -NurpP --minimal linux-3.0.9/fs/ioctl.c linux-3.0.9-vs2.3.2.1/fs/ioctl.c
3901--- linux-3.0.9/fs/ioctl.c 2011-05-22 16:17:52.000000000 +0200
3902+++ linux-3.0.9-vs2.3.2.1/fs/ioctl.c 2011-06-10 22:11:24.000000000 +0200
3903@@ -15,6 +15,9 @@
3904 #include <linux/writeback.h>
3905 #include <linux/buffer_head.h>
3906 #include <linux/falloc.h>
3907+#include <linux/proc_fs.h>
3908+#include <linux/vserver/inode.h>
3909+#include <linux/vs_tag.h>
3910
3911 #include <asm/ioctls.h>
3912
3913diff -NurpP --minimal linux-3.0.9/fs/ioprio.c linux-3.0.9-vs2.3.2.1/fs/ioprio.c
3914--- linux-3.0.9/fs/ioprio.c 2011-01-05 21:50:24.000000000 +0100
3915+++ linux-3.0.9-vs2.3.2.1/fs/ioprio.c 2011-06-10 22:11:24.000000000 +0200
3916@@ -27,6 +27,7 @@
3917 #include <linux/syscalls.h>
3918 #include <linux/security.h>
3919 #include <linux/pid_namespace.h>
3920+#include <linux/vs_base.h>
3921
3922 int set_task_ioprio(struct task_struct *task, int ioprio)
3923 {
3924@@ -119,6 +120,8 @@ SYSCALL_DEFINE3(ioprio_set, int, which,
3925 else
3926 pgrp = find_vpid(who);
3927 do_each_pid_thread(pgrp, PIDTYPE_PGID, p) {
3928+ if (!vx_check(p->xid, VS_ADMIN_P | VS_IDENT))
3929+ continue;
3930 ret = set_task_ioprio(p, ioprio);
3931 if (ret)
3932 break;
3933@@ -208,6 +211,8 @@ SYSCALL_DEFINE2(ioprio_get, int, which,
3934 else
3935 pgrp = find_vpid(who);
3936 do_each_pid_thread(pgrp, PIDTYPE_PGID, p) {
3937+ if (!vx_check(p->xid, VS_ADMIN_P | VS_IDENT))
3938+ continue;
3939 tmpio = get_task_ioprio(p);
3940 if (tmpio < 0)
3941 continue;
3942diff -NurpP --minimal linux-3.0.9/fs/jfs/file.c linux-3.0.9-vs2.3.2.1/fs/jfs/file.c
3943--- linux-3.0.9/fs/jfs/file.c 2011-07-22 11:18:05.000000000 +0200
3944+++ linux-3.0.9-vs2.3.2.1/fs/jfs/file.c 2011-07-01 11:35:34.000000000 +0200
3945@@ -102,7 +102,8 @@ int jfs_setattr(struct dentry *dentry, s
3946 if (is_quota_modification(inode, iattr))
3947 dquot_initialize(inode);
3948 if ((iattr->ia_valid & ATTR_UID && iattr->ia_uid != inode->i_uid) ||
3949- (iattr->ia_valid & ATTR_GID && iattr->ia_gid != inode->i_gid)) {
3950+ (iattr->ia_valid & ATTR_GID && iattr->ia_gid != inode->i_gid) ||
3951+ (iattr->ia_valid & ATTR_TAG && iattr->ia_tag != inode->i_tag)) {
3952 rc = dquot_transfer(inode, iattr);
3953 if (rc)
3954 return rc;
3955@@ -133,6 +134,7 @@ const struct inode_operations jfs_file_i
3956 #ifdef CONFIG_JFS_POSIX_ACL
3957 .check_acl = jfs_check_acl,
3958 #endif
3959+ .sync_flags = jfs_sync_flags,
3960 };
3961
3962 const struct file_operations jfs_file_operations = {
3963diff -NurpP --minimal linux-3.0.9/fs/jfs/ioctl.c linux-3.0.9-vs2.3.2.1/fs/jfs/ioctl.c
3964--- linux-3.0.9/fs/jfs/ioctl.c 2011-05-22 16:17:52.000000000 +0200
3965+++ linux-3.0.9-vs2.3.2.1/fs/jfs/ioctl.c 2011-06-10 22:11:24.000000000 +0200
3966@@ -11,6 +11,7 @@
3967 #include <linux/mount.h>
3968 #include <linux/time.h>
3969 #include <linux/sched.h>
3970+#include <linux/mount.h>
3971 #include <asm/current.h>
3972 #include <asm/uaccess.h>
3973
3974@@ -52,6 +53,16 @@ static long jfs_map_ext2(unsigned long f
3975 }
3976
3977
3978+int jfs_sync_flags(struct inode *inode, int flags, int vflags)
3979+{
3980+ inode->i_flags = flags;
3981+ inode->i_vflags = vflags;
3982+ jfs_get_inode_flags(JFS_IP(inode));
3983+ inode->i_ctime = CURRENT_TIME_SEC;
3984+ mark_inode_dirty(inode);
3985+ return 0;
3986+}
3987+
3988 long jfs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
3989 {
3990 struct inode *inode = filp->f_dentry->d_inode;
3991@@ -85,6 +96,11 @@ long jfs_ioctl(struct file *filp, unsign
3992 if (!S_ISDIR(inode->i_mode))
3993 flags &= ~JFS_DIRSYNC_FL;
3994
3995+ if (IS_BARRIER(inode)) {
3996+ vxwprintk_task(1, "messing with the barrier.");
3997+ return -EACCES;
3998+ }
3999+
4000 /* Is it quota file? Do not allow user to mess with it */
4001 if (IS_NOQUOTA(inode)) {
4002 err = -EPERM;
4003@@ -102,8 +118,8 @@ long jfs_ioctl(struct file *filp, unsign
4004 * the relevant capability.
4005 */
4006 if ((oldflags & JFS_IMMUTABLE_FL) ||
4007- ((flags ^ oldflags) &
4008- (JFS_APPEND_FL | JFS_IMMUTABLE_FL))) {
4009+ ((flags ^ oldflags) & (JFS_APPEND_FL |
4010+ JFS_IMMUTABLE_FL | JFS_IXUNLINK_FL))) {
4011 if (!capable(CAP_LINUX_IMMUTABLE)) {
4012 mutex_unlock(&inode->i_mutex);
4013 err = -EPERM;
4014@@ -111,7 +127,7 @@ long jfs_ioctl(struct file *filp, unsign
4015 }
4016 }
4017
4018- flags = flags & JFS_FL_USER_MODIFIABLE;
4019+ flags &= JFS_FL_USER_MODIFIABLE;
4020 flags |= oldflags & ~JFS_FL_USER_MODIFIABLE;
4021 jfs_inode->mode2 = flags;
4022
4023diff -NurpP --minimal linux-3.0.9/fs/jfs/jfs_dinode.h linux-3.0.9-vs2.3.2.1/fs/jfs/jfs_dinode.h
4024--- linux-3.0.9/fs/jfs/jfs_dinode.h 2008-12-25 00:26:37.000000000 +0100
4025+++ linux-3.0.9-vs2.3.2.1/fs/jfs/jfs_dinode.h 2011-06-10 22:11:24.000000000 +0200
4026@@ -161,9 +161,13 @@ struct dinode {
4027
4028 #define JFS_APPEND_FL 0x01000000 /* writes to file may only append */
4029 #define JFS_IMMUTABLE_FL 0x02000000 /* Immutable file */
4030+#define JFS_IXUNLINK_FL 0x08000000 /* Immutable invert on unlink */
4031
4032-#define JFS_FL_USER_VISIBLE 0x03F80000
4033-#define JFS_FL_USER_MODIFIABLE 0x03F80000
4034+#define JFS_BARRIER_FL 0x04000000 /* Barrier for chroot() */
4035+#define JFS_COW_FL 0x20000000 /* Copy on Write marker */
4036+
4037+#define JFS_FL_USER_VISIBLE 0x07F80000
4038+#define JFS_FL_USER_MODIFIABLE 0x07F80000
4039 #define JFS_FL_INHERIT 0x03C80000
4040
4041 /* These are identical to EXT[23]_IOC_GETFLAGS/SETFLAGS */
4042diff -NurpP --minimal linux-3.0.9/fs/jfs/jfs_filsys.h linux-3.0.9-vs2.3.2.1/fs/jfs/jfs_filsys.h
4043--- linux-3.0.9/fs/jfs/jfs_filsys.h 2008-12-25 00:26:37.000000000 +0100
4044+++ linux-3.0.9-vs2.3.2.1/fs/jfs/jfs_filsys.h 2011-06-10 22:11:24.000000000 +0200
4045@@ -263,6 +263,7 @@
4046 #define JFS_NAME_MAX 255
4047 #define JFS_PATH_MAX BPSIZE
4048
4049+#define JFS_TAGGED 0x00800000 /* Context Tagging */
4050
4051 /*
4052 * file system state (superblock state)
4053diff -NurpP --minimal linux-3.0.9/fs/jfs/jfs_imap.c linux-3.0.9-vs2.3.2.1/fs/jfs/jfs_imap.c
4054--- linux-3.0.9/fs/jfs/jfs_imap.c 2011-07-22 11:18:05.000000000 +0200
4055+++ linux-3.0.9-vs2.3.2.1/fs/jfs/jfs_imap.c 2011-07-01 11:35:34.000000000 +0200
4056@@ -46,6 +46,7 @@
4057 #include <linux/pagemap.h>
4058 #include <linux/quotaops.h>
4059 #include <linux/slab.h>
4060+#include <linux/vs_tag.h>
4061
4062 #include "jfs_incore.h"
4063 #include "jfs_inode.h"
4064@@ -3058,6 +3059,8 @@ static int copy_from_dinode(struct dinod
4065 {
4066 struct jfs_inode_info *jfs_ip = JFS_IP(ip);
4067 struct jfs_sb_info *sbi = JFS_SBI(ip->i_sb);
4068+ uid_t uid;
4069+ gid_t gid;
4070
4071 jfs_ip->fileset = le32_to_cpu(dip->di_fileset);
4072 jfs_ip->mode2 = le32_to_cpu(dip->di_mode);
4073@@ -3078,14 +3081,18 @@ static int copy_from_dinode(struct dinod
4074 }
4075 ip->i_nlink = le32_to_cpu(dip->di_nlink);
4076
4077- jfs_ip->saved_uid = le32_to_cpu(dip->di_uid);
4078+ uid = le32_to_cpu(dip->di_uid);
4079+ gid = le32_to_cpu(dip->di_gid);
4080+ ip->i_tag = INOTAG_TAG(DX_TAG(ip), uid, gid, 0);
4081+
4082+ jfs_ip->saved_uid = INOTAG_UID(DX_TAG(ip), uid, gid);
4083 if (sbi->uid == -1)
4084 ip->i_uid = jfs_ip->saved_uid;
4085 else {
4086 ip->i_uid = sbi->uid;
4087 }
4088
4089- jfs_ip->saved_gid = le32_to_cpu(dip->di_gid);
4090+ jfs_ip->saved_gid = INOTAG_GID(DX_TAG(ip), uid, gid);
4091 if (sbi->gid == -1)
4092 ip->i_gid = jfs_ip->saved_gid;
4093 else {
4094@@ -3150,14 +3157,12 @@ static void copy_to_dinode(struct dinode
4095 dip->di_size = cpu_to_le64(ip->i_size);
4096 dip->di_nblocks = cpu_to_le64(PBLK2LBLK(ip->i_sb, ip->i_blocks));
4097 dip->di_nlink = cpu_to_le32(ip->i_nlink);
4098- if (sbi->uid == -1)
4099- dip->di_uid = cpu_to_le32(ip->i_uid);
4100- else
4101- dip->di_uid = cpu_to_le32(jfs_ip->saved_uid);
4102- if (sbi->gid == -1)
4103- dip->di_gid = cpu_to_le32(ip->i_gid);
4104- else
4105- dip->di_gid = cpu_to_le32(jfs_ip->saved_gid);
4106+
4107+ dip->di_uid = cpu_to_le32(TAGINO_UID(DX_TAG(ip),
4108+ (sbi->uid == -1) ? ip->i_uid : jfs_ip->saved_uid, ip->i_tag));
4109+ dip->di_gid = cpu_to_le32(TAGINO_GID(DX_TAG(ip),
4110+ (sbi->gid == -1) ? ip->i_gid : jfs_ip->saved_gid, ip->i_tag));
4111+
4112 jfs_get_inode_flags(jfs_ip);
4113 /*
4114 * mode2 is only needed for storing the higher order bits.
4115diff -NurpP --minimal linux-3.0.9/fs/jfs/jfs_inode.c linux-3.0.9-vs2.3.2.1/fs/jfs/jfs_inode.c
4116--- linux-3.0.9/fs/jfs/jfs_inode.c 2010-08-02 16:52:49.000000000 +0200
4117+++ linux-3.0.9-vs2.3.2.1/fs/jfs/jfs_inode.c 2011-06-10 22:11:24.000000000 +0200
4118@@ -18,6 +18,7 @@
4119
4120 #include <linux/fs.h>
4121 #include <linux/quotaops.h>
4122+#include <linux/vs_tag.h>
4123 #include "jfs_incore.h"
4124 #include "jfs_inode.h"
4125 #include "jfs_filsys.h"
4126@@ -30,29 +31,46 @@ void jfs_set_inode_flags(struct inode *i
4127 {
4128 unsigned int flags = JFS_IP(inode)->mode2;
4129
4130- inode->i_flags &= ~(S_IMMUTABLE | S_APPEND |
4131- S_NOATIME | S_DIRSYNC | S_SYNC);
4132+ inode->i_flags &= ~(S_IMMUTABLE | S_IXUNLINK |
4133+ S_SYNC | S_APPEND | S_NOATIME | S_DIRSYNC);
4134
4135 if (flags & JFS_IMMUTABLE_FL)
4136 inode->i_flags |= S_IMMUTABLE;
4137+ if (flags & JFS_IXUNLINK_FL)
4138+ inode->i_flags |= S_IXUNLINK;
4139+
4140+ if (flags & JFS_SYNC_FL)
4141+ inode->i_flags |= S_SYNC;
4142 if (flags & JFS_APPEND_FL)
4143 inode->i_flags |= S_APPEND;
4144 if (flags & JFS_NOATIME_FL)
4145 inode->i_flags |= S_NOATIME;
4146 if (flags & JFS_DIRSYNC_FL)
4147 inode->i_flags |= S_DIRSYNC;
4148- if (flags & JFS_SYNC_FL)
4149- inode->i_flags |= S_SYNC;
4150+
4151+ inode->i_vflags &= ~(V_BARRIER | V_COW);
4152+
4153+ if (flags & JFS_BARRIER_FL)
4154+ inode->i_vflags |= V_BARRIER;
4155+ if (flags & JFS_COW_FL)
4156+ inode->i_vflags |= V_COW;
4157 }
4158
4159 void jfs_get_inode_flags(struct jfs_inode_info *jfs_ip)
4160 {
4161 unsigned int flags = jfs_ip->vfs_inode.i_flags;
4162+ unsigned int vflags = jfs_ip->vfs_inode.i_vflags;
4163+
4164+ jfs_ip->mode2 &= ~(JFS_IMMUTABLE_FL | JFS_IXUNLINK_FL |
4165+ JFS_APPEND_FL | JFS_NOATIME_FL |
4166+ JFS_DIRSYNC_FL | JFS_SYNC_FL |
4167+ JFS_BARRIER_FL | JFS_COW_FL);
4168
4169- jfs_ip->mode2 &= ~(JFS_IMMUTABLE_FL | JFS_APPEND_FL | JFS_NOATIME_FL |
4170- JFS_DIRSYNC_FL | JFS_SYNC_FL);
4171 if (flags & S_IMMUTABLE)
4172 jfs_ip->mode2 |= JFS_IMMUTABLE_FL;
4173+ if (flags & S_IXUNLINK)
4174+ jfs_ip->mode2 |= JFS_IXUNLINK_FL;
4175+
4176 if (flags & S_APPEND)
4177 jfs_ip->mode2 |= JFS_APPEND_FL;
4178 if (flags & S_NOATIME)
4179@@ -61,6 +79,11 @@ void jfs_get_inode_flags(struct jfs_inod
4180 jfs_ip->mode2 |= JFS_DIRSYNC_FL;
4181 if (flags & S_SYNC)
4182 jfs_ip->mode2 |= JFS_SYNC_FL;
4183+
4184+ if (vflags & V_BARRIER)
4185+ jfs_ip->mode2 |= JFS_BARRIER_FL;
4186+ if (vflags & V_COW)
4187+ jfs_ip->mode2 |= JFS_COW_FL;
4188 }
4189
4190 /*
4191diff -NurpP --minimal linux-3.0.9/fs/jfs/jfs_inode.h linux-3.0.9-vs2.3.2.1/fs/jfs/jfs_inode.h
4192--- linux-3.0.9/fs/jfs/jfs_inode.h 2011-07-22 11:18:05.000000000 +0200
4193+++ linux-3.0.9-vs2.3.2.1/fs/jfs/jfs_inode.h 2011-06-10 22:11:24.000000000 +0200
4194@@ -39,6 +39,7 @@ extern struct dentry *jfs_fh_to_dentry(s
4195 extern struct dentry *jfs_fh_to_parent(struct super_block *sb, struct fid *fid,
4196 int fh_len, int fh_type);
4197 extern void jfs_set_inode_flags(struct inode *);
4198+extern int jfs_sync_flags(struct inode *, int, int);
4199 extern int jfs_get_block(struct inode *, sector_t, struct buffer_head *, int);
4200 extern int jfs_setattr(struct dentry *, struct iattr *);
4201
4202diff -NurpP --minimal linux-3.0.9/fs/jfs/namei.c linux-3.0.9-vs2.3.2.1/fs/jfs/namei.c
4203--- linux-3.0.9/fs/jfs/namei.c 2011-05-22 16:17:53.000000000 +0200
4204+++ linux-3.0.9-vs2.3.2.1/fs/jfs/namei.c 2011-06-10 22:11:24.000000000 +0200
4205@@ -22,6 +22,7 @@
4206 #include <linux/ctype.h>
4207 #include <linux/quotaops.h>
4208 #include <linux/exportfs.h>
4209+#include <linux/vs_tag.h>
4210 #include "jfs_incore.h"
4211 #include "jfs_superblock.h"
4212 #include "jfs_inode.h"
4213@@ -1486,6 +1487,7 @@ static struct dentry *jfs_lookup(struct
4214 return ERR_CAST(ip);
4215 }
4216
4217+ dx_propagate_tag(nd, ip);
4218 return d_splice_alias(ip, dentry);
4219 }
4220
4221@@ -1550,6 +1552,7 @@ const struct inode_operations jfs_dir_in
4222 #ifdef CONFIG_JFS_POSIX_ACL
4223 .check_acl = jfs_check_acl,
4224 #endif
4225+ .sync_flags = jfs_sync_flags,
4226 };
4227
4228 const struct file_operations jfs_dir_operations = {
4229diff -NurpP --minimal linux-3.0.9/fs/jfs/super.c linux-3.0.9-vs2.3.2.1/fs/jfs/super.c
4230--- linux-3.0.9/fs/jfs/super.c 2011-05-22 16:17:53.000000000 +0200
4231+++ linux-3.0.9-vs2.3.2.1/fs/jfs/super.c 2011-06-10 22:11:24.000000000 +0200
4232@@ -198,7 +198,8 @@ static void jfs_put_super(struct super_b
4233 enum {
4234 Opt_integrity, Opt_nointegrity, Opt_iocharset, Opt_resize,
4235 Opt_resize_nosize, Opt_errors, Opt_ignore, Opt_err, Opt_quota,
4236- Opt_usrquota, Opt_grpquota, Opt_uid, Opt_gid, Opt_umask
4237+ Opt_usrquota, Opt_grpquota, Opt_uid, Opt_gid, Opt_umask,
4238+ Opt_tag, Opt_notag, Opt_tagid
4239 };
4240
4241 static const match_table_t tokens = {
4242@@ -208,6 +209,10 @@ static const match_table_t tokens = {
4243 {Opt_resize, "resize=%u"},
4244 {Opt_resize_nosize, "resize"},
4245 {Opt_errors, "errors=%s"},
4246+ {Opt_tag, "tag"},
4247+ {Opt_notag, "notag"},
4248+ {Opt_tagid, "tagid=%u"},
4249+ {Opt_tag, "tagxid"},
4250 {Opt_ignore, "noquota"},
4251 {Opt_ignore, "quota"},
4252 {Opt_usrquota, "usrquota"},
4253@@ -342,6 +347,20 @@ static int parse_options(char *options,
4254 }
4255 break;
4256 }
4257+#ifndef CONFIG_TAGGING_NONE
4258+ case Opt_tag:
4259+ *flag |= JFS_TAGGED;
4260+ break;
4261+ case Opt_notag:
4262+ *flag &= JFS_TAGGED;
4263+ break;
4264+#endif
4265+#ifdef CONFIG_PROPAGATE
4266+ case Opt_tagid:
4267+ /* use args[0] */
4268+ *flag |= JFS_TAGGED;
4269+ break;
4270+#endif
4271 default:
4272 printk("jfs: Unrecognized mount option \"%s\" "
4273 " or missing value\n", p);
4274@@ -373,6 +392,12 @@ static int jfs_remount(struct super_bloc
4275 return -EINVAL;
4276 }
4277
4278+ if ((flag & JFS_TAGGED) && !(sb->s_flags & MS_TAGGED)) {
4279+ printk(KERN_ERR "JFS: %s: tagging not permitted on remount.\n",
4280+ sb->s_id);
4281+ return -EINVAL;
4282+ }
4283+
4284 if (newLVSize) {
4285 if (sb->s_flags & MS_RDONLY) {
4286 printk(KERN_ERR
4287@@ -455,6 +480,9 @@ static int jfs_fill_super(struct super_b
4288 #ifdef CONFIG_JFS_POSIX_ACL
4289 sb->s_flags |= MS_POSIXACL;
4290 #endif
4291+ /* map mount option tagxid */
4292+ if (sbi->flag & JFS_TAGGED)
4293+ sb->s_flags |= MS_TAGGED;
4294
4295 if (newLVSize) {
4296 printk(KERN_ERR "resize option for remount only\n");
4297diff -NurpP --minimal linux-3.0.9/fs/libfs.c linux-3.0.9-vs2.3.2.1/fs/libfs.c
4298--- linux-3.0.9/fs/libfs.c 2011-07-22 11:18:05.000000000 +0200
4299+++ linux-3.0.9-vs2.3.2.1/fs/libfs.c 2011-07-22 11:20:39.000000000 +0200
4300@@ -133,7 +133,8 @@ static inline unsigned char dt_type(stru
4301 * both impossible due to the lock on directory.
4302 */
4303
4304-int dcache_readdir(struct file * filp, void * dirent, filldir_t filldir)
4305+static inline int do_dcache_readdir_filter(struct file *filp,
4306+ void *dirent, filldir_t filldir, int (*filter)(struct dentry *dentry))
4307 {
4308 struct dentry *dentry = filp->f_path.dentry;
4309 struct dentry *cursor = filp->private_data;
4310@@ -164,6 +165,8 @@ int dcache_readdir(struct file * filp, v
4311 for (p=q->next; p != &dentry->d_subdirs; p=p->next) {
4312 struct dentry *next;
4313 next = list_entry(p, struct dentry, d_u.d_child);
4314+ if (filter && !filter(next))
4315+ continue;
4316 spin_lock_nested(&next->d_lock, DENTRY_D_LOCK_NESTED);
4317 if (!simple_positive(next)) {
4318 spin_unlock(&next->d_lock);
4319@@ -190,6 +193,17 @@ int dcache_readdir(struct file * filp, v
4320 return 0;
4321 }
4322
4323+int dcache_readdir(struct file *filp, void *dirent, filldir_t filldir)
4324+{
4325+ return do_dcache_readdir_filter(filp, dirent, filldir, NULL);
4326+}
4327+
4328+int dcache_readdir_filter(struct file *filp, void *dirent, filldir_t filldir,
4329+ int (*filter)(struct dentry *))
4330+{
4331+ return do_dcache_readdir_filter(filp, dirent, filldir, filter);
4332+}
4333+
4334 ssize_t generic_read_dir(struct file *filp, char __user *buf, size_t siz, loff_t *ppos)
4335 {
4336 return -EISDIR;
4337@@ -965,6 +979,7 @@ EXPORT_SYMBOL(dcache_dir_close);
4338 EXPORT_SYMBOL(dcache_dir_lseek);
4339 EXPORT_SYMBOL(dcache_dir_open);
4340 EXPORT_SYMBOL(dcache_readdir);
4341+EXPORT_SYMBOL(dcache_readdir_filter);
4342 EXPORT_SYMBOL(generic_read_dir);
4343 EXPORT_SYMBOL(mount_pseudo);
4344 EXPORT_SYMBOL(simple_write_begin);
4345diff -NurpP --minimal linux-3.0.9/fs/locks.c linux-3.0.9-vs2.3.2.1/fs/locks.c
4346--- linux-3.0.9/fs/locks.c 2011-07-22 11:18:05.000000000 +0200
4347+++ linux-3.0.9-vs2.3.2.1/fs/locks.c 2011-07-19 00:51:58.000000000 +0200
4348@@ -126,6 +126,8 @@
4349 #include <linux/time.h>
4350 #include <linux/rcupdate.h>
4351 #include <linux/pid_namespace.h>
4352+#include <linux/vs_base.h>
4353+#include <linux/vs_limit.h>
4354
4355 #include <asm/uaccess.h>
4356
4357@@ -171,13 +173,19 @@ static void locks_init_lock_always(struc
4358 fl->fl_flags = 0;
4359 fl->fl_type = 0;
4360 fl->fl_start = fl->fl_end = 0;
4361+ fl->fl_xid = -1;
4362 }
4363
4364+
4365 /* Allocate an empty lock structure. */
4366 struct file_lock *locks_alloc_lock(void)
4367 {
4368- struct file_lock *fl = kmem_cache_alloc(filelock_cache, GFP_KERNEL);
4369+ struct file_lock *fl;
4370+
4371+ if (!vx_locks_avail(1))
4372+ return NULL;
4373
4374+ fl = kmem_cache_alloc(filelock_cache, GFP_KERNEL);
4375 if (fl)
4376 locks_init_lock_always(fl);
4377
4378@@ -208,6 +216,7 @@ void locks_free_lock(struct file_lock *f
4379 BUG_ON(!list_empty(&fl->fl_block));
4380 BUG_ON(!list_empty(&fl->fl_link));
4381
4382+ vx_locks_dec(fl);
4383 locks_release_private(fl);
4384 kmem_cache_free(filelock_cache, fl);
4385 }
4386@@ -272,6 +281,7 @@ void locks_copy_lock(struct file_lock *n
4387 new->fl_file = fl->fl_file;
4388 new->fl_ops = fl->fl_ops;
4389 new->fl_lmops = fl->fl_lmops;
4390+ new->fl_xid = fl->fl_xid;
4391
4392 locks_copy_private(new, fl);
4393 }
4394@@ -310,6 +320,11 @@ static int flock_make_lock(struct file *
4395 fl->fl_flags = FL_FLOCK;
4396 fl->fl_type = type;
4397 fl->fl_end = OFFSET_MAX;
4398+
4399+ vxd_assert(filp->f_xid == vx_current_xid(),
4400+ "f_xid(%d) == current(%d)", filp->f_xid, vx_current_xid());
4401+ fl->fl_xid = filp->f_xid;
4402+ vx_locks_inc(fl);
4403
4404 *lock = fl;
4405 return 0;
4406@@ -459,6 +474,7 @@ static int lease_init(struct file *filp,
4407
4408 fl->fl_owner = current->files;
4409 fl->fl_pid = current->tgid;
4410+ fl->fl_xid = vx_current_xid();
4411
4412 fl->fl_file = filp;
4413 fl->fl_flags = FL_LEASE;
4414@@ -478,6 +494,11 @@ static struct file_lock *lease_alloc(str
4415 if (fl == NULL)
4416 return ERR_PTR(error);
4417
4418+ fl->fl_xid = vx_current_xid();
4419+ if (filp)
4420+ vxd_assert(filp->f_xid == fl->fl_xid,
4421+ "f_xid(%d) == fl_xid(%d)", filp->f_xid, fl->fl_xid);
4422+ vx_locks_inc(fl);
4423 error = lease_init(filp, type, fl);
4424 if (error) {
4425 locks_free_lock(fl);
4426@@ -779,6 +800,7 @@ static int flock_lock_file(struct file *
4427 lock_flocks();
4428 }
4429
4430+ new_fl->fl_xid = -1;
4431 find_conflict:
4432 for_each_lock(inode, before) {
4433 struct file_lock *fl = *before;
4434@@ -799,6 +821,7 @@ find_conflict:
4435 goto out;
4436 locks_copy_lock(new_fl, request);
4437 locks_insert_lock(before, new_fl);
4438+ vx_locks_inc(new_fl);
4439 new_fl = NULL;
4440 error = 0;
4441
4442@@ -809,7 +832,8 @@ out:
4443 return error;
4444 }
4445
4446-static int __posix_lock_file(struct inode *inode, struct file_lock *request, struct file_lock *conflock)
4447+static int __posix_lock_file(struct inode *inode, struct file_lock *request,
4448+ struct file_lock *conflock, xid_t xid)
4449 {
4450 struct file_lock *fl;
4451 struct file_lock *new_fl = NULL;
4452@@ -819,6 +843,8 @@ static int __posix_lock_file(struct inod
4453 struct file_lock **before;
4454 int error, added = 0;
4455
4456+ vxd_assert(xid == vx_current_xid(),
4457+ "xid(%d) == current(%d)", xid, vx_current_xid());
4458 /*
4459 * We may need two file_lock structures for this operation,
4460 * so we get them in advance to avoid races.
4461@@ -829,7 +855,11 @@ static int __posix_lock_file(struct inod
4462 (request->fl_type != F_UNLCK ||
4463 request->fl_start != 0 || request->fl_end != OFFSET_MAX)) {
4464 new_fl = locks_alloc_lock();
4465+ new_fl->fl_xid = xid;
4466+ vx_locks_inc(new_fl);
4467 new_fl2 = locks_alloc_lock();
4468+ new_fl2->fl_xid = xid;
4469+ vx_locks_inc(new_fl2);
4470 }
4471
4472 lock_flocks();
4473@@ -1028,7 +1058,8 @@ static int __posix_lock_file(struct inod
4474 int posix_lock_file(struct file *filp, struct file_lock *fl,
4475 struct file_lock *conflock)
4476 {
4477- return __posix_lock_file(filp->f_path.dentry->d_inode, fl, conflock);
4478+ return __posix_lock_file(filp->f_path.dentry->d_inode,
4479+ fl, conflock, filp->f_xid);
4480 }
4481 EXPORT_SYMBOL(posix_lock_file);
4482
4483@@ -1118,7 +1149,7 @@ int locks_mandatory_area(int read_write,
4484 fl.fl_end = offset + count - 1;
4485
4486 for (;;) {
4487- error = __posix_lock_file(inode, &fl, NULL);
4488+ error = __posix_lock_file(inode, &fl, NULL, filp->f_xid);
4489 if (error != FILE_LOCK_DEFERRED)
4490 break;
4491 error = wait_event_interruptible(fl.fl_wait, !fl.fl_next);
4492@@ -1431,6 +1462,7 @@ int generic_setlease(struct file *filp,
4493 goto out;
4494
4495 locks_insert_lock(before, lease);
4496+ vx_locks_inc(lease);
4497 return 0;
4498
4499 out:
4500@@ -1815,6 +1847,11 @@ int fcntl_setlk(unsigned int fd, struct
4501 if (file_lock == NULL)
4502 return -ENOLCK;
4503
4504+ vxd_assert(filp->f_xid == vx_current_xid(),
4505+ "f_xid(%d) == current(%d)", filp->f_xid, vx_current_xid());
4506+ file_lock->fl_xid = filp->f_xid;
4507+ vx_locks_inc(file_lock);
4508+
4509 /*
4510 * This might block, so we do it before checking the inode.
4511 */
4512@@ -1933,6 +1970,11 @@ int fcntl_setlk64(unsigned int fd, struc
4513 if (file_lock == NULL)
4514 return -ENOLCK;
4515
4516+ vxd_assert(filp->f_xid == vx_current_xid(),
4517+ "f_xid(%d) == current(%d)", filp->f_xid, vx_current_xid());
4518+ file_lock->fl_xid = filp->f_xid;
4519+ vx_locks_inc(file_lock);
4520+
4521 /*
4522 * This might block, so we do it before checking the inode.
4523 */
4524@@ -2198,8 +2240,11 @@ static int locks_show(struct seq_file *f
4525
4526 lock_get_status(f, fl, *((loff_t *)f->private), "");
4527
4528- list_for_each_entry(bfl, &fl->fl_block, fl_block)
4529+ list_for_each_entry(bfl, &fl->fl_block, fl_block) {
4530+ if (!vx_check(fl->fl_xid, VS_WATCH_P | VS_IDENT))
4531+ continue;
4532 lock_get_status(f, bfl, *((loff_t *)f->private), " ->");
4533+ }
4534
4535 return 0;
4536 }
4537diff -NurpP --minimal linux-3.0.9/fs/namei.c linux-3.0.9-vs2.3.2.1/fs/namei.c
4538--- linux-3.0.9/fs/namei.c 2011-11-15 16:40:47.000000000 +0100
4539+++ linux-3.0.9-vs2.3.2.1/fs/namei.c 2011-11-16 14:27:47.000000000 +0100
4540@@ -32,6 +32,14 @@
4541 #include <linux/fcntl.h>
4542 #include <linux/device_cgroup.h>
4543 #include <linux/fs_struct.h>
4544+#include <linux/proc_fs.h>
4545+#include <linux/vserver/inode.h>
4546+#include <linux/vs_base.h>
4547+#include <linux/vs_tag.h>
4548+#include <linux/vs_cowbl.h>
4549+#include <linux/vs_device.h>
4550+#include <linux/vs_context.h>
4551+#include <linux/pid_namespace.h>
4552 #include <asm/uaccess.h>
4553
4554 #include "internal.h"
4555@@ -175,6 +183,89 @@ void putname(const char *name)
4556 EXPORT_SYMBOL(putname);
4557 #endif
4558
4559+static inline int dx_barrier(const struct inode *inode)
4560+{
4561+ if (IS_BARRIER(inode) && !vx_check(0, VS_ADMIN | VS_WATCH)) {
4562+ vxwprintk_task(1, "did hit the barrier.");
4563+ return 1;
4564+ }
4565+ return 0;
4566+}
4567+
4568+static int __dx_permission(const struct inode *inode, int mask)
4569+{
4570+ if (dx_barrier(inode))
4571+ return -EACCES;
4572+
4573+ if (inode->i_sb->s_magic == DEVPTS_SUPER_MAGIC) {
4574+ /* devpts is xid tagged */
4575+ if (S_ISDIR(inode->i_mode) ||
4576+ vx_check((xid_t)inode->i_tag, VS_IDENT | VS_WATCH_P))
4577+ return 0;
4578+
4579+ /* just pretend we didn't find anything */
4580+ return -ENOENT;
4581+ }
4582+ else if (inode->i_sb->s_magic == PROC_SUPER_MAGIC) {
4583+ struct proc_dir_entry *de = PDE(inode);
4584+
4585+ if (de && !vx_hide_check(0, de->vx_flags))
4586+ goto out;
4587+
4588+ if ((mask & (MAY_WRITE | MAY_APPEND))) {
4589+ struct pid *pid;
4590+ struct task_struct *tsk;
4591+
4592+ if (vx_check(0, VS_ADMIN | VS_WATCH_P) ||
4593+ vx_flags(VXF_STATE_SETUP, 0))
4594+ return 0;
4595+
4596+ pid = PROC_I(inode)->pid;
4597+ if (!pid)
4598+ goto out;
4599+
4600+ rcu_read_lock();
4601+ tsk = pid_task(pid, PIDTYPE_PID);
4602+ vxdprintk(VXD_CBIT(tag, 0), "accessing %p[#%u]",
4603+ tsk, (tsk ? vx_task_xid(tsk) : 0));
4604+ if (tsk &&
4605+ vx_check(vx_task_xid(tsk), VS_IDENT | VS_WATCH_P)) {
4606+ rcu_read_unlock();
4607+ return 0;
4608+ }
4609+ rcu_read_unlock();
4610+ }
4611+ else {
4612+ /* FIXME: Should we block some entries here? */
4613+ return 0;
4614+ }
4615+ }
4616+ else {
4617+ if (dx_notagcheck(inode->i_sb) ||
4618+ dx_check(inode->i_tag, DX_HOSTID | DX_ADMIN | DX_WATCH |
4619+ DX_IDENT))
4620+ return 0;
4621+ }
4622+
4623+out:
4624+ return -EACCES;
4625+}
4626+
4627+int dx_permission(const struct inode *inode, int mask)
4628+{
4629+ int ret = __dx_permission(inode, mask);
4630+ if (unlikely(ret)) {
4631+#ifndef CONFIG_VSERVER_WARN_DEVPTS
4632+ if (inode->i_sb->s_magic != DEVPTS_SUPER_MAGIC)
4633+#endif
4634+ vxwprintk_task(1,
4635+ "denied [0x%x] access to inode %s:%p[#%d,%lu]",
4636+ mask, inode->i_sb->s_id, inode, inode->i_tag,
4637+ inode->i_ino);
4638+ }
4639+ return ret;
4640+}
4641+
4642 /*
4643 * This does basic POSIX ACL permission checking
4644 */
4645@@ -285,10 +376,14 @@ int inode_permission(struct inode *inode
4646 /*
4647 * Nobody gets write access to an immutable file.
4648 */
4649- if (IS_IMMUTABLE(inode))
4650+ if (IS_IMMUTABLE(inode) && !IS_COW(inode))
4651 return -EACCES;
4652 }
4653
4654+ retval = dx_permission(inode, mask);
4655+ if (retval)
4656+ return retval;
4657+
4658 if (inode->i_op->permission)
4659 retval = inode->i_op->permission(inode, mask, 0);
4660 else
4661@@ -584,6 +679,9 @@ static inline int exec_permission(struct
4662 int ret;
4663 struct user_namespace *ns = inode_userns(inode);
4664
4665+ if (dx_barrier(inode))
4666+ return -EACCES;
4667+
4668 if (inode->i_op->permission) {
4669 ret = inode->i_op->permission(inode, MAY_EXEC, flags);
4670 } else {
4671@@ -1095,7 +1193,8 @@ static void follow_dotdot(struct nameida
4672
4673 if (nd->path.dentry == nd->root.dentry &&
4674 nd->path.mnt == nd->root.mnt) {
4675- break;
4676+ /* for sane '/' avoid follow_mount() */
4677+ return;
4678 }
4679 if (nd->path.dentry != nd->path.mnt->mnt_root) {
4680 /* rare case of legitimate dget_parent()... */
4681@@ -1178,6 +1277,9 @@ static int do_lookup(struct nameidata *n
4682 goto unlazy;
4683 }
4684 }
4685+
4686+ /* FIXME: check dx permission */
4687+
4688 path->mnt = mnt;
4689 path->dentry = dentry;
4690 if (unlikely(!__follow_mount_rcu(nd, path, inode)))
4691@@ -1226,6 +1328,8 @@ retry:
4692 }
4693 }
4694
4695+ /* FIXME: check dx permission */
4696+
4697 path->mnt = mnt;
4698 path->dentry = dentry;
4699 err = follow_managed(path, nd->flags);
4700@@ -1853,7 +1957,7 @@ static int may_delete(struct inode *dir,
4701 if (IS_APPEND(dir))
4702 return -EPERM;
4703 if (check_sticky(dir, victim->d_inode)||IS_APPEND(victim->d_inode)||
4704- IS_IMMUTABLE(victim->d_inode) || IS_SWAPFILE(victim->d_inode))
4705+ IS_IXORUNLINK(victim->d_inode) || IS_SWAPFILE(victim->d_inode))
4706 return -EPERM;
4707 if (isdir) {
4708 if (!S_ISDIR(victim->d_inode->i_mode))
4709@@ -1980,6 +2084,15 @@ static int may_open(struct path *path, i
4710 break;
4711 }
4712
4713+#ifdef CONFIG_VSERVER_COWBL
4714+ if (IS_COW(inode) &&
4715+ ((flag & O_ACCMODE) != O_RDONLY)) {
4716+ if (IS_COW_LINK(inode))
4717+ return -EMLINK;
4718+ inode->i_flags &= ~(S_IXUNLINK|S_IMMUTABLE);
4719+ mark_inode_dirty(inode);
4720+ }
4721+#endif
4722 error = inode_permission(inode, acc_mode);
4723 if (error)
4724 return error;
4725@@ -2224,6 +2337,16 @@ ok:
4726 }
4727 common:
4728 error = may_open(&nd->path, acc_mode, open_flag);
4729+#ifdef CONFIG_VSERVER_COWBL
4730+ if (error == -EMLINK) {
4731+ struct dentry *dentry;
4732+ dentry = cow_break_link(pathname);
4733+ if (IS_ERR(dentry))
4734+ error = PTR_ERR(dentry);
4735+ else
4736+ dput(dentry);
4737+ }
4738+#endif
4739 if (error)
4740 goto exit;
4741 filp = nameidata_to_filp(nd);
4742@@ -2266,6 +2389,7 @@ static struct file *path_openat(int dfd,
4743 struct path path;
4744 int error;
4745
4746+restart:
4747 filp = get_empty_filp();
4748 if (!filp)
4749 return ERR_PTR(-ENFILE);
4750@@ -2303,6 +2427,17 @@ static struct file *path_openat(int dfd,
4751 filp = do_last(nd, &path, op, pathname);
4752 put_link(nd, &link, cookie);
4753 }
4754+
4755+#ifdef CONFIG_VSERVER_COWBL
4756+ if (filp == ERR_PTR(-EMLINK)) {
4757+ if (nd->root.mnt && !(nd->flags & LOOKUP_ROOT))
4758+ path_put(&nd->root);
4759+ if (base)
4760+ fput(base);
4761+ release_open_intent(nd);
4762+ goto restart;
4763+ }
4764+#endif
4765 out:
4766 if (nd->root.mnt && !(nd->flags & LOOKUP_ROOT))
4767 path_put(&nd->root);
4768@@ -2886,7 +3021,7 @@ int vfs_link(struct dentry *old_dentry,
4769 /*
4770 * A link to an append-only or immutable file cannot be created.
4771 */
4772- if (IS_APPEND(inode) || IS_IMMUTABLE(inode))
4773+ if (IS_APPEND(inode) || IS_IXORUNLINK(inode))
4774 return -EPERM;
4775 if (!dir->i_op->link)
4776 return -EPERM;
4777@@ -3274,6 +3409,227 @@ int vfs_follow_link(struct nameidata *nd
4778 return __vfs_follow_link(nd, link);
4779 }
4780
4781+
4782+#ifdef CONFIG_VSERVER_COWBL
4783+
4784+static inline
4785+long do_cow_splice(struct file *in, struct file *out, size_t len)
4786+{
4787+ loff_t ppos = 0;
4788+
4789+ return do_splice_direct(in, &ppos, out, len, 0);
4790+}
4791+
4792+struct dentry *cow_break_link(const char *pathname)
4793+{
4794+ int ret, mode, pathlen, redo = 0;
4795+ struct nameidata old_nd, dir_nd;
4796+ struct path old_path;
4797+ struct dentry *dir, *old_dentry, *new_dentry = NULL;
4798+ struct file *old_file;
4799+ struct file *new_file;
4800+ char *to, *path, pad='\251';
4801+ loff_t size;
4802+
4803+ vxdprintk(VXD_CBIT(misc, 1),
4804+ "cow_break_link(" VS_Q("%s") ")", pathname);
4805+ path = kmalloc(PATH_MAX, GFP_KERNEL);
4806+ ret = -ENOMEM;
4807+ if (!path)
4808+ goto out;
4809+
4810+ /* old_nd will have refs to dentry and mnt */
4811+ ret = do_path_lookup(AT_FDCWD, pathname, LOOKUP_FOLLOW, &old_nd);
4812+ vxdprintk(VXD_CBIT(misc, 2),
4813+ "do_path_lookup(old): %d [r=%d]",
4814+ ret, mnt_get_count(old_nd.path.mnt));
4815+ if (ret < 0)
4816+ goto out_free_path;
4817+
4818+ old_path = old_nd.path;
4819+ old_dentry = old_path.dentry;
4820+ mode = old_dentry->d_inode->i_mode;
4821+
4822+ to = d_path(&old_path, path, PATH_MAX-2);
4823+ pathlen = strlen(to);
4824+ vxdprintk(VXD_CBIT(misc, 2),
4825+ "old path " VS_Q("%s") " [%p:" VS_Q("%.*s") ":%d]", to,
4826+ old_dentry,
4827+ old_dentry->d_name.len, old_dentry->d_name.name,
4828+ old_dentry->d_name.len);
4829+
4830+ to[pathlen + 1] = 0;
4831+retry:
4832+ new_dentry = NULL;
4833+ to[pathlen] = pad--;
4834+ ret = -ELOOP;
4835+ if (pad <= '\240')
4836+ goto out_rel_old;
4837+
4838+ vxdprintk(VXD_CBIT(misc, 1), "temp copy " VS_Q("%s"), to);
4839+ /* dir_nd will have refs to dentry and mnt */
4840+ ret = do_path_lookup(AT_FDCWD, to,
4841+ LOOKUP_PARENT | LOOKUP_OPEN | LOOKUP_CREATE, &dir_nd);
4842+ vxdprintk(VXD_CBIT(misc, 2), "do_path_lookup(new): %d", ret);
4843+ if (ret < 0)
4844+ goto retry;
4845+
4846+ /* this puppy downs the inode mutex */
4847+ new_dentry = lookup_create(&dir_nd, 0);
4848+ if (!new_dentry || IS_ERR(new_dentry)) {
4849+ path_put(&dir_nd.path);
4850+ vxdprintk(VXD_CBIT(misc, 2),
4851+ "lookup_create(new): failed with %ld",
4852+ PTR_ERR(new_dentry));
4853+ mutex_unlock(&dir_nd.path.dentry->d_inode->i_mutex);
4854+ goto retry;
4855+ }
4856+ vxdprintk(VXD_CBIT(misc, 2),
4857+ "lookup_create(new): %p [" VS_Q("%.*s") ":%d]",
4858+ new_dentry,
4859+ new_dentry->d_name.len, new_dentry->d_name.name,
4860+ new_dentry->d_name.len);
4861+
4862+ dir = dir_nd.path.dentry;
4863+
4864+ ret = vfs_create(dir->d_inode, new_dentry, mode, &dir_nd);
4865+ vxdprintk(VXD_CBIT(misc, 2),
4866+ "vfs_create(new): %d", ret);
4867+ if (ret == -EEXIST) {
4868+ mutex_unlock(&dir->d_inode->i_mutex);
4869+ path_put(&dir_nd.path);
4870+ dput(new_dentry);
4871+ goto retry;
4872+ }
4873+ else if (ret < 0)
4874+ goto out_unlock_new;
4875+
4876+ /* drop out early, ret passes ENOENT */
4877+ ret = -ENOENT;
4878+ if ((redo = d_unhashed(old_dentry)))
4879+ goto out_unlock_new;
4880+
4881+ path_get(&old_path);
4882+ /* this one cleans up the dentry/mnt in case of failure */
4883+ old_file = dentry_open(old_dentry, old_path.mnt,
4884+ O_RDONLY, current_cred());
4885+ vxdprintk(VXD_CBIT(misc, 2),
4886+ "dentry_open(old): %p", old_file);
4887+ if (IS_ERR(old_file)) {
4888+ ret = PTR_ERR(old_file);
4889+ goto out_unlock_new;
4890+ }
4891+
4892+ dget(new_dentry);
4893+ mntget(old_path.mnt);
4894+ /* this one cleans up the dentry/mnt in case of failure */
4895+ new_file = dentry_open(new_dentry, old_path.mnt,
4896+ O_WRONLY, current_cred());
4897+ vxdprintk(VXD_CBIT(misc, 2),
4898+ "dentry_open(new): %p", new_file);
4899+ if (IS_ERR(new_file)) {
4900+ ret = PTR_ERR(new_file);
4901+ goto out_fput_old;
4902+ }
4903+
4904+ size = i_size_read(old_file->f_dentry->d_inode);
4905+ ret = do_cow_splice(old_file, new_file, size);
4906+ vxdprintk(VXD_CBIT(misc, 2), "do_splice_direct: %d", ret);
4907+ if (ret < 0) {
4908+ goto out_fput_both;
4909+ } else if (ret < size) {
4910+ ret = -ENOSPC;
4911+ goto out_fput_both;
4912+ } else {
4913+ struct inode *old_inode = old_dentry->d_inode;
4914+ struct inode *new_inode = new_dentry->d_inode;
4915+ struct iattr attr = {
4916+ .ia_uid = old_inode->i_uid,
4917+ .ia_gid = old_inode->i_gid,
4918+ .ia_valid = ATTR_UID | ATTR_GID
4919+ };
4920+
4921+ setattr_copy(new_inode, &attr);
4922+ mark_inode_dirty(new_inode);
4923+ }
4924+
4925+ mutex_lock(&old_dentry->d_inode->i_sb->s_vfs_rename_mutex);
4926+
4927+ /* drop out late */
4928+ ret = -ENOENT;
4929+ if ((redo = d_unhashed(old_dentry)))
4930+ goto out_unlock;
4931+
4932+ vxdprintk(VXD_CBIT(misc, 2),
4933+ "vfs_rename: [" VS_Q("%*s") ":%d] -> [" VS_Q("%*s") ":%d]",
4934+ new_dentry->d_name.len, new_dentry->d_name.name,
4935+ new_dentry->d_name.len,
4936+ old_dentry->d_name.len, old_dentry->d_name.name,
4937+ old_dentry->d_name.len);
4938+ ret = vfs_rename(dir_nd.path.dentry->d_inode, new_dentry,
4939+ old_dentry->d_parent->d_inode, old_dentry);
4940+ vxdprintk(VXD_CBIT(misc, 2), "vfs_rename: %d", ret);
4941+
4942+out_unlock:
4943+ mutex_unlock(&old_dentry->d_inode->i_sb->s_vfs_rename_mutex);
4944+
4945+out_fput_both:
4946+ vxdprintk(VXD_CBIT(misc, 3),
4947+ "fput(new_file=%p[#%ld])", new_file,
4948+ atomic_long_read(&new_file->f_count));
4949+ fput(new_file);
4950+
4951+out_fput_old:
4952+ vxdprintk(VXD_CBIT(misc, 3),
4953+ "fput(old_file=%p[#%ld])", old_file,
4954+ atomic_long_read(&old_file->f_count));
4955+ fput(old_file);
4956+
4957+out_unlock_new:
4958+ mutex_unlock(&dir->d_inode->i_mutex);
4959+ if (!ret)
4960+ goto out_redo;
4961+
4962+ /* error path cleanup */
4963+ vfs_unlink(dir->d_inode, new_dentry);
4964+
4965+out_redo:
4966+ if (!redo)
4967+ goto out_rel_both;
4968+ /* lookup dentry once again */
4969+ /* old_nd.path is freed as old_path in out_rel_old */
4970+ ret = do_path_lookup(AT_FDCWD, pathname, LOOKUP_FOLLOW, &old_nd);
4971+ if (ret)
4972+ goto out_rel_both;
4973+
4974+ dput(new_dentry);
4975+ new_dentry = old_nd.path.dentry;
4976+ vxdprintk(VXD_CBIT(misc, 2),
4977+ "do_path_lookup(redo): %p [" VS_Q("%.*s") ":%d]",
4978+ new_dentry,
4979+ new_dentry->d_name.len, new_dentry->d_name.name,
4980+ new_dentry->d_name.len);
4981+ dget(new_dentry);
4982+
4983+out_rel_both:
4984+ path_put(&dir_nd.path);
4985+out_rel_old:
4986+ path_put(&old_path);
4987+out_free_path:
4988+ kfree(path);
4989+out:
4990+ if (ret) {
4991+ dput(new_dentry);
4992+ new_dentry = ERR_PTR(ret);
4993+ }
4994+ vxdprintk(VXD_CBIT(misc, 3),
4995+ "cow_break_link returning with %p [r=%d]",
4996+ new_dentry, mnt_get_count(old_nd.path.mnt));
4997+ return new_dentry;
4998+}
4999+
5000+#endif
5001+
5002 /* get the link contents into pagecache */
5003 static char *page_getlink(struct dentry * dentry, struct page **ppage)
5004 {
5005diff -NurpP --minimal linux-3.0.9/fs/namespace.c linux-3.0.9-vs2.3.2.1/fs/namespace.c
5006--- linux-3.0.9/fs/namespace.c 2011-11-15 16:40:47.000000000 +0100
5007+++ linux-3.0.9-vs2.3.2.1/fs/namespace.c 2011-11-15 17:37:39.000000000 +0100
5008@@ -31,6 +31,11 @@
5009 #include <linux/idr.h>
5010 #include <linux/fs_struct.h>
5011 #include <linux/fsnotify.h>
5012+#include <linux/vs_base.h>
5013+#include <linux/vs_context.h>
5014+#include <linux/vs_tag.h>
5015+#include <linux/vserver/space.h>
5016+#include <linux/vserver/global.h>
5017 #include <asm/uaccess.h>
5018 #include <asm/unistd.h>
5019 #include "pnode.h"
5020@@ -679,6 +684,10 @@ vfs_kern_mount(struct file_system_type *
5021 if (!type)
5022 return ERR_PTR(-ENODEV);
5023
5024+ if ((type->fs_flags & FS_BINARY_MOUNTDATA) &&
5025+ !vx_capable(CAP_SYS_ADMIN, VXC_BINARY_MOUNT))
5026+ return ERR_PTR(-EPERM);
5027+
5028 mnt = alloc_vfsmnt(name);
5029 if (!mnt)
5030 return ERR_PTR(-ENOMEM);
5031@@ -724,6 +733,7 @@ static struct vfsmount *clone_mnt(struct
5032 mnt->mnt_root = dget(root);
5033 mnt->mnt_mountpoint = mnt->mnt_root;
5034 mnt->mnt_parent = mnt;
5035+ mnt->mnt_tag = old->mnt_tag;
5036
5037 if (flag & CL_SLAVE) {
5038 list_add(&mnt->mnt_slave, &old->mnt_slave_list);
5039@@ -852,6 +862,31 @@ static inline void mangle(struct seq_fil
5040 seq_escape(m, s, " \t\n\\");
5041 }
5042
5043+static int mnt_is_reachable(struct vfsmount *mnt)
5044+{
5045+ struct path root;
5046+ struct dentry *point;
5047+ int ret;
5048+
5049+ if (mnt == mnt->mnt_ns->root)
5050+ return 1;
5051+
5052+ br_read_lock(vfsmount_lock);
5053+ root = current->fs->root;
5054+ point = root.dentry;
5055+
5056+ while ((mnt != mnt->mnt_parent) && (mnt != root.mnt)) {
5057+ point = mnt->mnt_mountpoint;
5058+ mnt = mnt->mnt_parent;
5059+ }
5060+
5061+ ret = (mnt == root.mnt) && is_subdir(point, root.dentry);
5062+
5063+ br_read_unlock(vfsmount_lock);
5064+
5065+ return ret;
5066+}
5067+
5068 /*
5069 * Simple .show_options callback for filesystems which don't want to
5070 * implement more complex mount option showing.
5071@@ -954,6 +989,8 @@ static int show_sb_opts(struct seq_file
5072 { MS_SYNCHRONOUS, ",sync" },
5073 { MS_DIRSYNC, ",dirsync" },
5074 { MS_MANDLOCK, ",mand" },
5075+ { MS_TAGGED, ",tag" },
5076+ { MS_NOTAGCHECK, ",notagcheck" },
5077 { 0, NULL }
5078 };
5079 const struct proc_fs_info *fs_infop;
5080@@ -1000,16 +1037,26 @@ static int show_vfsmnt(struct seq_file *
5081 int err = 0;
5082 struct path mnt_path = { .dentry = mnt->mnt_root, .mnt = mnt };
5083
5084- if (mnt->mnt_sb->s_op->show_devname) {
5085- err = mnt->mnt_sb->s_op->show_devname(m, mnt);
5086- if (err)
5087- goto out;
5088+ if (vx_flags(VXF_HIDE_MOUNT, 0))
5089+ return SEQ_SKIP;
5090+ if (!mnt_is_reachable(mnt) && !vx_check(0, VS_WATCH_P))
5091+ return SEQ_SKIP;
5092+
5093+ if (!vx_check(0, VS_ADMIN|VS_WATCH) &&
5094+ mnt == current->fs->root.mnt) {
5095+ seq_puts(m, "/dev/root / ");
5096 } else {
5097- mangle(m, mnt->mnt_devname ? mnt->mnt_devname : "none");
5098+ if (mnt->mnt_sb->s_op->show_devname) {
5099+ err = mnt->mnt_sb->s_op->show_devname(m, mnt);
5100+ if (err)
5101+ goto out;
5102+ } else {
5103+ mangle(m, mnt->mnt_devname ? mnt->mnt_devname : "none");
5104+ }
5105+ seq_putc(m, ' ');
5106+ seq_path(m, &mnt_path, " \t\n\\");
5107+ seq_putc(m, ' ');
5108 }
5109- seq_putc(m, ' ');
5110- seq_path(m, &mnt_path, " \t\n\\");
5111- seq_putc(m, ' ');
5112 show_type(m, mnt->mnt_sb);
5113 seq_puts(m, __mnt_is_readonly(mnt) ? " ro" : " rw");
5114 err = show_sb_opts(m, mnt->mnt_sb);
5115@@ -1039,6 +1086,11 @@ static int show_mountinfo(struct seq_fil
5116 struct path root = p->root;
5117 int err = 0;
5118
5119+ if (vx_flags(VXF_HIDE_MOUNT, 0))
5120+ return SEQ_SKIP;
5121+ if (!mnt_is_reachable(mnt) && !vx_check(0, VS_WATCH_P))
5122+ return SEQ_SKIP;
5123+
5124 seq_printf(m, "%i %i %u:%u ", mnt->mnt_id, mnt->mnt_parent->mnt_id,
5125 MAJOR(sb->s_dev), MINOR(sb->s_dev));
5126 if (sb->s_op->show_path)
5127@@ -1107,22 +1159,32 @@ static int show_vfsstat(struct seq_file
5128 struct path mnt_path = { .dentry = mnt->mnt_root, .mnt = mnt };
5129 int err = 0;
5130
5131- /* device */
5132- if (mnt->mnt_sb->s_op->show_devname) {
5133- seq_puts(m, "device ");
5134- err = mnt->mnt_sb->s_op->show_devname(m, mnt);
5135+ if (vx_flags(VXF_HIDE_MOUNT, 0))
5136+ return SEQ_SKIP;
5137+ if (!mnt_is_reachable(mnt) && !vx_check(0, VS_WATCH_P))
5138+ return SEQ_SKIP;
5139+
5140+ if (!vx_check(0, VS_ADMIN|VS_WATCH) &&
5141+ mnt == current->fs->root.mnt) {
5142+ seq_puts(m, "device /dev/root mounted on / ");
5143 } else {
5144- if (mnt->mnt_devname) {
5145+ /* device */
5146+ if (mnt->mnt_sb->s_op->show_devname) {
5147 seq_puts(m, "device ");
5148- mangle(m, mnt->mnt_devname);
5149- } else
5150- seq_puts(m, "no device");
5151- }
5152+ err = mnt->mnt_sb->s_op->show_devname(m, mnt);
5153+ } else {
5154+ if (mnt->mnt_devname) {
5155+ seq_puts(m, "device ");
5156+ mangle(m, mnt->mnt_devname);
5157+ } else
5158+ seq_puts(m, "no device");
5159+ }
5160
5161- /* mount point */
5162- seq_puts(m, " mounted on ");
5163- seq_path(m, &mnt_path, " \t\n\\");
5164- seq_putc(m, ' ');
5165+ /* mount point */
5166+ seq_puts(m, " mounted on ");
5167+ seq_path(m, &mnt_path, " \t\n\\");
5168+ seq_putc(m, ' ');
5169+ }
5170
5171 /* file system type */
5172 seq_puts(m, "with fstype ");
5173@@ -1381,7 +1443,7 @@ SYSCALL_DEFINE2(umount, char __user *, n
5174 goto dput_and_out;
5175
5176 retval = -EPERM;
5177- if (!capable(CAP_SYS_ADMIN))
5178+ if (!vx_capable(CAP_SYS_ADMIN, VXC_SECURE_MOUNT))
5179 goto dput_and_out;
5180
5181 retval = do_umount(path.mnt, flags);
5182@@ -1407,7 +1469,7 @@ SYSCALL_DEFINE1(oldumount, char __user *
5183
5184 static int mount_is_safe(struct path *path)
5185 {
5186- if (capable(CAP_SYS_ADMIN))
5187+ if (vx_capable(CAP_SYS_ADMIN, VXC_SECURE_MOUNT))
5188 return 0;
5189 return -EPERM;
5190 #ifdef notyet
5191@@ -1717,7 +1779,7 @@ static int do_change_type(struct path *p
5192 int type;
5193 int err = 0;
5194
5195- if (!capable(CAP_SYS_ADMIN))
5196+ if (!vx_capable(CAP_SYS_ADMIN, VXC_NAMESPACE))
5197 return -EPERM;
5198
5199 if (path->dentry != path->mnt->mnt_root)
5200@@ -1733,6 +1795,7 @@ static int do_change_type(struct path *p
5201 if (err)
5202 goto out_unlock;
5203 }
5204+ // mnt->mnt_flags = mnt_flags;
5205
5206 br_write_lock(vfsmount_lock);
5207 for (m = mnt; m; m = (recurse ? next_mnt(m, mnt) : NULL))
5208@@ -1748,12 +1811,14 @@ static int do_change_type(struct path *p
5209 * do loopback mount.
5210 */
5211 static int do_loopback(struct path *path, char *old_name,
5212- int recurse)
5213+ tag_t tag, unsigned long flags, int mnt_flags)
5214 {
5215 LIST_HEAD(umount_list);
5216 struct path old_path;
5217 struct vfsmount *mnt = NULL;
5218 int err = mount_is_safe(path);
5219+ int recurse = flags & MS_REC;
5220+
5221 if (err)
5222 return err;
5223 if (!old_name || !*old_name)
5224@@ -1819,12 +1884,12 @@ static int change_mount_flags(struct vfs
5225 * on it - tough luck.
5226 */
5227 static int do_remount(struct path *path, int flags, int mnt_flags,
5228- void *data)
5229+ void *data, xid_t xid)
5230 {
5231 int err;
5232 struct super_block *sb = path->mnt->mnt_sb;
5233
5234- if (!capable(CAP_SYS_ADMIN))
5235+ if (!vx_capable(CAP_SYS_ADMIN, VXC_SECURE_REMOUNT))
5236 return -EPERM;
5237
5238 if (!check_mnt(path->mnt))
5239@@ -1872,7 +1937,7 @@ static int do_move_mount(struct path *pa
5240 struct path old_path, parent_path;
5241 struct vfsmount *p;
5242 int err = 0;
5243- if (!capable(CAP_SYS_ADMIN))
5244+ if (!vx_capable(CAP_SYS_ADMIN, VXC_SECURE_MOUNT))
5245 return -EPERM;
5246 if (!old_name || !*old_name)
5247 return -EINVAL;
5248@@ -2023,7 +2088,7 @@ static int do_new_mount(struct path *pat
5249 return -EINVAL;
5250
5251 /* we need capabilities... */
5252- if (!capable(CAP_SYS_ADMIN))
5253+ if (!vx_capable(CAP_SYS_ADMIN, VXC_SECURE_MOUNT))
5254 return -EPERM;
5255
5256 mnt = do_kern_mount(type, flags, name, data);
5257@@ -2292,6 +2357,7 @@ long do_mount(char *dev_name, char *dir_
5258 struct path path;
5259 int retval = 0;
5260 int mnt_flags = 0;
5261+ tag_t tag = 0;
5262
5263 /* Discard magic */
5264 if ((flags & MS_MGC_MSK) == MS_MGC_VAL)
5265@@ -2319,6 +2385,12 @@ long do_mount(char *dev_name, char *dir_
5266 if (!(flags & MS_NOATIME))
5267 mnt_flags |= MNT_RELATIME;
5268
5269+ if (dx_parse_tag(data_page, &tag, 1, &mnt_flags, &flags)) {
5270+ /* FIXME: bind and re-mounts get the tag flag? */
5271+ if (flags & (MS_BIND|MS_REMOUNT))
5272+ flags |= MS_TAGID;
5273+ }
5274+
5275 /* Separate the per-mountpoint flags */
5276 if (flags & MS_NOSUID)
5277 mnt_flags |= MNT_NOSUID;
5278@@ -2335,15 +2407,17 @@ long do_mount(char *dev_name, char *dir_
5279 if (flags & MS_RDONLY)
5280 mnt_flags |= MNT_READONLY;
5281
5282+ if (!capable(CAP_SYS_ADMIN))
5283+ mnt_flags |= MNT_NODEV;
5284 flags &= ~(MS_NOSUID | MS_NOEXEC | MS_NODEV | MS_ACTIVE | MS_BORN |
5285 MS_NOATIME | MS_NODIRATIME | MS_RELATIME| MS_KERNMOUNT |
5286 MS_STRICTATIME);
5287
5288 if (flags & MS_REMOUNT)
5289 retval = do_remount(&path, flags & ~MS_REMOUNT, mnt_flags,
5290- data_page);
5291+ data_page, tag);
5292 else if (flags & MS_BIND)
5293- retval = do_loopback(&path, dev_name, flags & MS_REC);
5294+ retval = do_loopback(&path, dev_name, tag, flags, mnt_flags);
5295 else if (flags & (MS_SHARED | MS_PRIVATE | MS_SLAVE | MS_UNBINDABLE))
5296 retval = do_change_type(&path, flags);
5297 else if (flags & MS_MOVE)
5298@@ -2443,6 +2517,7 @@ static struct mnt_namespace *dup_mnt_ns(
5299 q = next_mnt(q, new_ns->root);
5300 }
5301 up_write(&namespace_sem);
5302+ atomic_inc(&vs_global_mnt_ns);
5303
5304 if (rootmnt)
5305 mntput(rootmnt);
5306@@ -2582,9 +2657,10 @@ SYSCALL_DEFINE2(pivot_root, const char _
5307 goto out3;
5308
5309 error = -EINVAL;
5310- if (IS_MNT_SHARED(old.mnt) ||
5311- IS_MNT_SHARED(new.mnt->mnt_parent) ||
5312- IS_MNT_SHARED(root.mnt->mnt_parent))
5313+ if ((IS_MNT_SHARED(old.mnt) ||
5314+ IS_MNT_SHARED(new.mnt->mnt_parent) ||
5315+ IS_MNT_SHARED(root.mnt->mnt_parent)) &&
5316+ !vx_flags(VXF_STATE_SETUP, 0))
5317 goto out4;
5318 if (!check_mnt(root.mnt) || !check_mnt(new.mnt))
5319 goto out4;
5320@@ -2716,6 +2792,7 @@ void put_mnt_ns(struct mnt_namespace *ns
5321 br_write_unlock(vfsmount_lock);
5322 up_write(&namespace_sem);
5323 release_mounts(&umount_list);
5324+ atomic_dec(&vs_global_mnt_ns);
5325 kfree(ns);
5326 }
5327 EXPORT_SYMBOL(put_mnt_ns);
5328diff -NurpP --minimal linux-3.0.9/fs/nfs/client.c linux-3.0.9-vs2.3.2.1/fs/nfs/client.c
5329--- linux-3.0.9/fs/nfs/client.c 2011-07-22 11:18:05.000000000 +0200
5330+++ linux-3.0.9-vs2.3.2.1/fs/nfs/client.c 2011-06-10 22:11:24.000000000 +0200
5331@@ -780,6 +780,9 @@ static int nfs_init_server_rpcclient(str
5332 if (server->flags & NFS_MOUNT_SOFT)
5333 server->client->cl_softrtry = 1;
5334
5335+ server->client->cl_tag = 0;
5336+ if (server->flags & NFS_MOUNT_TAGGED)
5337+ server->client->cl_tag = 1;
5338 return 0;
5339 }
5340
5341@@ -951,6 +954,10 @@ static void nfs_server_set_fsinfo(struct
5342 server->acdirmin = server->acdirmax = 0;
5343 }
5344
5345+ /* FIXME: needs fsinfo
5346+ if (server->flags & NFS_MOUNT_TAGGED)
5347+ sb->s_flags |= MS_TAGGED; */
5348+
5349 server->maxfilesize = fsinfo->maxfilesize;
5350
5351 server->time_delta = fsinfo->time_delta;
5352diff -NurpP --minimal linux-3.0.9/fs/nfs/dir.c linux-3.0.9-vs2.3.2.1/fs/nfs/dir.c
5353--- linux-3.0.9/fs/nfs/dir.c 2011-11-15 16:40:47.000000000 +0100
5354+++ linux-3.0.9-vs2.3.2.1/fs/nfs/dir.c 2011-08-08 23:04:47.000000000 +0200
5355@@ -35,6 +35,7 @@
5356 #include <linux/sched.h>
5357 #include <linux/kmemleak.h>
5358 #include <linux/xattr.h>
5359+#include <linux/vs_tag.h>
5360
5361 #include "delegation.h"
5362 #include "iostat.h"
5363@@ -1308,6 +1309,7 @@ static struct dentry *nfs_lookup(struct
5364 if (IS_ERR(res))
5365 goto out_unblock_sillyrename;
5366
5367+ dx_propagate_tag(nd, inode);
5368 no_entry:
5369 res = d_materialise_unique(dentry, inode);
5370 if (res != NULL) {
5371diff -NurpP --minimal linux-3.0.9/fs/nfs/inode.c linux-3.0.9-vs2.3.2.1/fs/nfs/inode.c
5372--- linux-3.0.9/fs/nfs/inode.c 2011-07-22 11:18:05.000000000 +0200
5373+++ linux-3.0.9-vs2.3.2.1/fs/nfs/inode.c 2011-10-11 21:21:33.000000000 +0200
5374@@ -38,6 +38,7 @@
5375 #include <linux/nfs_xdr.h>
5376 #include <linux/slab.h>
5377 #include <linux/compat.h>
5378+#include <linux/vs_tag.h>
5379
5380 #include <asm/system.h>
5381 #include <asm/uaccess.h>
5382@@ -273,6 +274,8 @@ nfs_fhget(struct super_block *sb, struct
5383 if (inode->i_state & I_NEW) {
5384 struct nfs_inode *nfsi = NFS_I(inode);
5385 unsigned long now = jiffies;
5386+ uid_t uid;
5387+ gid_t gid;
5388
5389 /* We set i_ino for the few things that still rely on it,
5390 * such as stat(2) */
5391@@ -321,8 +324,8 @@ nfs_fhget(struct super_block *sb, struct
5392 nfsi->change_attr = 0;
5393 inode->i_size = 0;
5394 inode->i_nlink = 0;
5395- inode->i_uid = -2;
5396- inode->i_gid = -2;
5397+ uid = -2;
5398+ gid = -2;
5399 inode->i_blocks = 0;
5400 memset(nfsi->cookieverf, 0, sizeof(nfsi->cookieverf));
5401
5402@@ -359,13 +362,13 @@ nfs_fhget(struct super_block *sb, struct
5403 else if (nfs_server_capable(inode, NFS_CAP_NLINK))
5404 nfsi->cache_validity |= NFS_INO_INVALID_ATTR;
5405 if (fattr->valid & NFS_ATTR_FATTR_OWNER)
5406- inode->i_uid = fattr->uid;
5407+ uid = fattr->uid;
5408 else if (nfs_server_capable(inode, NFS_CAP_OWNER))
5409 nfsi->cache_validity |= NFS_INO_INVALID_ATTR
5410 | NFS_INO_INVALID_ACCESS
5411 | NFS_INO_INVALID_ACL;
5412 if (fattr->valid & NFS_ATTR_FATTR_GROUP)
5413- inode->i_gid = fattr->gid;
5414+ gid = fattr->gid;
5415 else if (nfs_server_capable(inode, NFS_CAP_OWNER_GROUP))
5416 nfsi->cache_validity |= NFS_INO_INVALID_ATTR
5417 | NFS_INO_INVALID_ACCESS
5418@@ -378,6 +381,11 @@ nfs_fhget(struct super_block *sb, struct
5419 */
5420 inode->i_blocks = nfs_calc_block_size(fattr->du.nfs3.used);
5421 }
5422+ inode->i_uid = INOTAG_UID(DX_TAG(inode), uid, gid);
5423+ inode->i_gid = INOTAG_GID(DX_TAG(inode), uid, gid);
5424+ inode->i_tag = INOTAG_TAG(DX_TAG(inode), uid, gid, 0);
5425+ /* maybe fattr->xid someday */
5426+
5427 nfsi->attrtimeo = NFS_MINATTRTIMEO(inode);
5428 nfsi->attrtimeo_timestamp = now;
5429 nfsi->access_cache = RB_ROOT;
5430@@ -494,6 +502,8 @@ void nfs_setattr_update_inode(struct ino
5431 inode->i_uid = attr->ia_uid;
5432 if ((attr->ia_valid & ATTR_GID) != 0)
5433 inode->i_gid = attr->ia_gid;
5434+ if ((attr->ia_valid & ATTR_TAG) && IS_TAGGED(inode))
5435+ inode->i_tag = attr->ia_tag;
5436 NFS_I(inode)->cache_validity |= NFS_INO_INVALID_ACCESS|NFS_INO_INVALID_ACL;
5437 spin_unlock(&inode->i_lock);
5438 }
5439@@ -941,6 +951,9 @@ static int nfs_check_inode_attributes(st
5440 struct nfs_inode *nfsi = NFS_I(inode);
5441 loff_t cur_size, new_isize;
5442 unsigned long invalid = 0;
5443+ uid_t uid;
5444+ gid_t gid;
5445+ tag_t tag;
5446
5447
5448 /* Has the inode gone and changed behind our back? */
5449@@ -964,13 +977,18 @@ static int nfs_check_inode_attributes(st
5450 invalid |= NFS_INO_INVALID_ATTR|NFS_INO_REVAL_PAGECACHE;
5451 }
5452
5453+ uid = INOTAG_UID(DX_TAG(inode), fattr->uid, fattr->gid);
5454+ gid = INOTAG_GID(DX_TAG(inode), fattr->uid, fattr->gid);
5455+ tag = INOTAG_TAG(DX_TAG(inode), fattr->uid, fattr->gid, 0);
5456+
5457 /* Have any file permissions changed? */
5458 if ((fattr->valid & NFS_ATTR_FATTR_MODE) && (inode->i_mode & S_IALLUGO) != (fattr->mode & S_IALLUGO))
5459 invalid |= NFS_INO_INVALID_ATTR | NFS_INO_INVALID_ACCESS | NFS_INO_INVALID_ACL;
5460- if ((fattr->valid & NFS_ATTR_FATTR_OWNER) && inode->i_uid != fattr->uid)
5461+ if ((fattr->valid & NFS_ATTR_FATTR_OWNER) && uid != fattr->uid)
5462 invalid |= NFS_INO_INVALID_ATTR | NFS_INO_INVALID_ACCESS | NFS_INO_INVALID_ACL;
5463- if ((fattr->valid & NFS_ATTR_FATTR_GROUP) && inode->i_gid != fattr->gid)
5464+ if ((fattr->valid & NFS_ATTR_FATTR_GROUP) && gid != fattr->gid)
5465 invalid |= NFS_INO_INVALID_ATTR | NFS_INO_INVALID_ACCESS | NFS_INO_INVALID_ACL;
5466+ /* maybe check for tag too? */
5467
5468 /* Has the link count changed? */
5469 if ((fattr->valid & NFS_ATTR_FATTR_NLINK) && inode->i_nlink != fattr->nlink)
5470@@ -1205,6 +1223,9 @@ static int nfs_update_inode(struct inode
5471 unsigned long invalid = 0;
5472 unsigned long now = jiffies;
5473 unsigned long save_cache_validity;
5474+ uid_t uid;
5475+ gid_t gid;
5476+ tag_t tag;
5477
5478 dfprintk(VFS, "NFS: %s(%s/%ld ct=%d info=0x%x)\n",
5479 __func__, inode->i_sb->s_id, inode->i_ino,
5480@@ -1312,6 +1333,9 @@ static int nfs_update_inode(struct inode
5481 | NFS_INO_REVAL_PAGECACHE
5482 | NFS_INO_REVAL_FORCED);
5483
5484+ uid = TAGINO_UID(DX_TAG(inode), inode->i_uid, inode->i_tag);
5485+ gid = TAGINO_GID(DX_TAG(inode), inode->i_gid, inode->i_tag);
5486+ tag = inode->i_tag;
5487
5488 if (fattr->valid & NFS_ATTR_FATTR_ATIME)
5489 memcpy(&inode->i_atime, &fattr->atime, sizeof(inode->i_atime));
5490@@ -1333,9 +1357,9 @@ static int nfs_update_inode(struct inode
5491 | NFS_INO_REVAL_FORCED);
5492
5493 if (fattr->valid & NFS_ATTR_FATTR_OWNER) {
5494- if (inode->i_uid != fattr->uid) {
5495+ if (uid != fattr->uid) {
5496 invalid |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_ACCESS|NFS_INO_INVALID_ACL;
5497- inode->i_uid = fattr->uid;
5498+ uid = fattr->uid;
5499 }
5500 } else if (server->caps & NFS_CAP_OWNER)
5501 invalid |= save_cache_validity & (NFS_INO_INVALID_ATTR
5502@@ -1344,9 +1368,9 @@ static int nfs_update_inode(struct inode
5503 | NFS_INO_REVAL_FORCED);
5504
5505 if (fattr->valid & NFS_ATTR_FATTR_GROUP) {
5506- if (inode->i_gid != fattr->gid) {
5507+ if (gid != fattr->gid) {
5508 invalid |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_ACCESS|NFS_INO_INVALID_ACL;
5509- inode->i_gid = fattr->gid;
5510+ gid = fattr->gid;
5511 }
5512 } else if (server->caps & NFS_CAP_OWNER_GROUP)
5513 invalid |= save_cache_validity & (NFS_INO_INVALID_ATTR
5514@@ -1354,6 +1378,10 @@ static int nfs_update_inode(struct inode
5515 | NFS_INO_INVALID_ACL
5516 | NFS_INO_REVAL_FORCED);
5517
5518+ inode->i_uid = INOTAG_UID(DX_TAG(inode), uid, gid);
5519+ inode->i_gid = INOTAG_GID(DX_TAG(inode), uid, gid);
5520+ inode->i_tag = INOTAG_TAG(DX_TAG(inode), uid, gid, tag);
5521+
5522 if (fattr->valid & NFS_ATTR_FATTR_NLINK) {
5523 if (inode->i_nlink != fattr->nlink) {
5524 invalid |= NFS_INO_INVALID_ATTR;
5525diff -NurpP --minimal linux-3.0.9/fs/nfs/nfs3xdr.c linux-3.0.9-vs2.3.2.1/fs/nfs/nfs3xdr.c
5526--- linux-3.0.9/fs/nfs/nfs3xdr.c 2011-03-15 18:07:32.000000000 +0100
5527+++ linux-3.0.9-vs2.3.2.1/fs/nfs/nfs3xdr.c 2011-06-10 22:11:24.000000000 +0200
5528@@ -20,6 +20,7 @@
5529 #include <linux/nfs3.h>
5530 #include <linux/nfs_fs.h>
5531 #include <linux/nfsacl.h>
5532+#include <linux/vs_tag.h>
5533 #include "internal.h"
5534
5535 #define NFSDBG_FACILITY NFSDBG_XDR
5536@@ -562,7 +563,8 @@ static __be32 *xdr_decode_nfstime3(__be3
5537 * set_mtime mtime;
5538 * };
5539 */
5540-static void encode_sattr3(struct xdr_stream *xdr, const struct iattr *attr)
5541+static void encode_sattr3(struct xdr_stream *xdr,
5542+ const struct iattr *attr, int tag)
5543 {
5544 u32 nbytes;
5545 __be32 *p;
5546@@ -594,15 +596,19 @@ static void encode_sattr3(struct xdr_str
5547 } else
5548 *p++ = xdr_zero;
5549
5550- if (attr->ia_valid & ATTR_UID) {
5551+ if (attr->ia_valid & ATTR_UID ||
5552+ (tag && (attr->ia_valid & ATTR_TAG))) {
5553 *p++ = xdr_one;
5554- *p++ = cpu_to_be32(attr->ia_uid);
5555+ *p++ = cpu_to_be32(TAGINO_UID(tag,
5556+ attr->ia_uid, attr->ia_tag));
5557 } else
5558 *p++ = xdr_zero;
5559
5560- if (attr->ia_valid & ATTR_GID) {
5561+ if (attr->ia_valid & ATTR_GID ||
5562+ (tag && (attr->ia_valid & ATTR_TAG))) {
5563 *p++ = xdr_one;
5564- *p++ = cpu_to_be32(attr->ia_gid);
5565+ *p++ = cpu_to_be32(TAGINO_GID(tag,
5566+ attr->ia_gid, attr->ia_tag));
5567 } else
5568 *p++ = xdr_zero;
5569
5570@@ -878,7 +884,7 @@ static void nfs3_xdr_enc_setattr3args(st
5571 const struct nfs3_sattrargs *args)
5572 {
5573 encode_nfs_fh3(xdr, args->fh);
5574- encode_sattr3(xdr, args->sattr);
5575+ encode_sattr3(xdr, args->sattr, req->rq_task->tk_client->cl_tag);
5576 encode_sattrguard3(xdr, args);
5577 }
5578
5579@@ -1028,13 +1034,13 @@ static void nfs3_xdr_enc_write3args(stru
5580 * };
5581 */
5582 static void encode_createhow3(struct xdr_stream *xdr,
5583- const struct nfs3_createargs *args)
5584+ const struct nfs3_createargs *args, int tag)
5585 {
5586 encode_uint32(xdr, args->createmode);
5587 switch (args->createmode) {
5588 case NFS3_CREATE_UNCHECKED:
5589 case NFS3_CREATE_GUARDED:
5590- encode_sattr3(xdr, args->sattr);
5591+ encode_sattr3(xdr, args->sattr, tag);
5592 break;
5593 case NFS3_CREATE_EXCLUSIVE:
5594 encode_createverf3(xdr, args->verifier);
5595@@ -1049,7 +1055,7 @@ static void nfs3_xdr_enc_create3args(str
5596 const struct nfs3_createargs *args)
5597 {
5598 encode_diropargs3(xdr, args->fh, args->name, args->len);
5599- encode_createhow3(xdr, args);
5600+ encode_createhow3(xdr, args, req->rq_task->tk_client->cl_tag);
5601 }
5602
5603 /*
5604@@ -1065,7 +1071,7 @@ static void nfs3_xdr_enc_mkdir3args(stru
5605 const struct nfs3_mkdirargs *args)
5606 {
5607 encode_diropargs3(xdr, args->fh, args->name, args->len);
5608- encode_sattr3(xdr, args->sattr);
5609+ encode_sattr3(xdr, args->sattr, req->rq_task->tk_client->cl_tag);
5610 }
5611
5612 /*
5613@@ -1082,9 +1088,9 @@ static void nfs3_xdr_enc_mkdir3args(stru
5614 * };
5615 */
5616 static void encode_symlinkdata3(struct xdr_stream *xdr,
5617- const struct nfs3_symlinkargs *args)
5618+ const struct nfs3_symlinkargs *args, int tag)
5619 {
5620- encode_sattr3(xdr, args->sattr);
5621+ encode_sattr3(xdr, args->sattr, tag);
5622 encode_nfspath3(xdr, args->pages, args->pathlen);
5623 }
5624
5625@@ -1093,7 +1099,7 @@ static void nfs3_xdr_enc_symlink3args(st
5626 const struct nfs3_symlinkargs *args)
5627 {
5628 encode_diropargs3(xdr, args->fromfh, args->fromname, args->fromlen);
5629- encode_symlinkdata3(xdr, args);
5630+ encode_symlinkdata3(xdr, args, req->rq_task->tk_client->cl_tag);
5631 }
5632
5633 /*
5634@@ -1121,24 +1127,24 @@ static void nfs3_xdr_enc_symlink3args(st
5635 * };
5636 */
5637 static void encode_devicedata3(struct xdr_stream *xdr,
5638- const struct nfs3_mknodargs *args)
5639+ const struct nfs3_mknodargs *args, int tag)
5640 {
5641- encode_sattr3(xdr, args->sattr);
5642+ encode_sattr3(xdr, args->sattr, tag);
5643 encode_specdata3(xdr, args->rdev);
5644 }
5645
5646 static void encode_mknoddata3(struct xdr_stream *xdr,
5647- const struct nfs3_mknodargs *args)
5648+ const struct nfs3_mknodargs *args, int tag)
5649 {
5650 encode_ftype3(xdr, args->type);
5651 switch (args->type) {
5652 case NF3CHR:
5653 case NF3BLK:
5654- encode_devicedata3(xdr, args);
5655+ encode_devicedata3(xdr, args, tag);
5656 break;
5657 case NF3SOCK:
5658 case NF3FIFO:
5659- encode_sattr3(xdr, args->sattr);
5660+ encode_sattr3(xdr, args->sattr, tag);
5661 break;
5662 case NF3REG:
5663 case NF3DIR:
5664@@ -1153,7 +1159,7 @@ static void nfs3_xdr_enc_mknod3args(stru
5665 const struct nfs3_mknodargs *args)
5666 {
5667 encode_diropargs3(xdr, args->fh, args->name, args->len);
5668- encode_mknoddata3(xdr, args);
5669+ encode_mknoddata3(xdr, args, req->rq_task->tk_client->cl_tag);
5670 }
5671
5672 /*
5673diff -NurpP --minimal linux-3.0.9/fs/nfs/super.c linux-3.0.9-vs2.3.2.1/fs/nfs/super.c
5674--- linux-3.0.9/fs/nfs/super.c 2011-11-15 16:40:47.000000000 +0100
5675+++ linux-3.0.9-vs2.3.2.1/fs/nfs/super.c 2011-11-15 17:37:07.000000000 +0100
5676@@ -53,6 +53,7 @@
5677 #include <linux/nfs_xdr.h>
5678 #include <linux/magic.h>
5679 #include <linux/parser.h>
5680+#include <linux/vs_tag.h>
5681
5682 #include <asm/system.h>
5683 #include <asm/uaccess.h>
5684@@ -87,6 +88,7 @@ enum {
5685 Opt_sharecache, Opt_nosharecache,
5686 Opt_resvport, Opt_noresvport,
5687 Opt_fscache, Opt_nofscache,
5688+ Opt_tag, Opt_notag,
5689
5690 /* Mount options that take integer arguments */
5691 Opt_port,
5692@@ -100,6 +102,7 @@ enum {
5693 Opt_mountvers,
5694 Opt_nfsvers,
5695 Opt_minorversion,
5696+ Opt_tagid,
5697
5698 /* Mount options that take string arguments */
5699 Opt_sec, Opt_proto, Opt_mountproto, Opt_mounthost,
5700@@ -180,6 +183,10 @@ static const match_table_t nfs_mount_opt
5701 { Opt_fscache_uniq, "fsc=%s" },
5702 { Opt_local_lock, "local_lock=%s" },
5703
5704+ { Opt_tag, "tag" },
5705+ { Opt_notag, "notag" },
5706+ { Opt_tagid, "tagid=%u" },
5707+
5708 { Opt_err, NULL }
5709 };
5710
5711@@ -650,6 +657,7 @@ static void nfs_show_mount_options(struc
5712 { NFS_MOUNT_NORDIRPLUS, ",nordirplus", "" },
5713 { NFS_MOUNT_UNSHARED, ",nosharecache", "" },
5714 { NFS_MOUNT_NORESVPORT, ",noresvport", "" },
5715+ { NFS_MOUNT_TAGGED, ",tag", "" },
5716 { 0, NULL, NULL }
5717 };
5718 const struct proc_nfs_info *nfs_infop;
5719@@ -1198,6 +1206,14 @@ static int nfs_parse_mount_options(char
5720 kfree(mnt->fscache_uniq);
5721 mnt->fscache_uniq = NULL;
5722 break;
5723+#ifndef CONFIG_TAGGING_NONE
5724+ case Opt_tag:
5725+ mnt->flags |= NFS_MOUNT_TAGGED;
5726+ break;
5727+ case Opt_notag:
5728+ mnt->flags &= ~NFS_MOUNT_TAGGED;
5729+ break;
5730+#endif
5731
5732 /*
5733 * options that take numeric values
5734@@ -1304,6 +1320,12 @@ static int nfs_parse_mount_options(char
5735 goto out_invalid_value;
5736 mnt->minorversion = option;
5737 break;
5738+#ifdef CONFIG_PROPAGATE
5739+ case Opt_tagid:
5740+ /* use args[0] */
5741+ nfs_data.flags |= NFS_MOUNT_TAGGED;
5742+ break;
5743+#endif
5744
5745 /*
5746 * options that take text values
5747diff -NurpP --minimal linux-3.0.9/fs/nfsd/auth.c linux-3.0.9-vs2.3.2.1/fs/nfsd/auth.c
5748--- linux-3.0.9/fs/nfsd/auth.c 2010-02-25 11:52:05.000000000 +0100
5749+++ linux-3.0.9-vs2.3.2.1/fs/nfsd/auth.c 2011-06-10 22:11:24.000000000 +0200
5750@@ -1,6 +1,7 @@
5751 /* Copyright (C) 1995, 1996 Olaf Kirch <okir@monad.swb.de> */
5752
5753 #include <linux/sched.h>
5754+#include <linux/vs_tag.h>
5755 #include "nfsd.h"
5756 #include "auth.h"
5757
5758@@ -36,6 +37,9 @@ int nfsd_setuser(struct svc_rqst *rqstp,
5759
5760 new->fsuid = rqstp->rq_cred.cr_uid;
5761 new->fsgid = rqstp->rq_cred.cr_gid;
5762+ /* FIXME: this desperately needs a tag :)
5763+ new->xid = (xid_t)INOTAG_TAG(DX_TAG_NFSD, cred.cr_uid, cred.cr_gid, 0);
5764+ */
5765
5766 rqgi = rqstp->rq_cred.cr_group_info;
5767
5768diff -NurpP --minimal linux-3.0.9/fs/nfsd/nfs3xdr.c linux-3.0.9-vs2.3.2.1/fs/nfsd/nfs3xdr.c
5769--- linux-3.0.9/fs/nfsd/nfs3xdr.c 2011-07-22 11:18:05.000000000 +0200
5770+++ linux-3.0.9-vs2.3.2.1/fs/nfsd/nfs3xdr.c 2011-06-10 22:11:24.000000000 +0200
5771@@ -7,6 +7,7 @@
5772 */
5773
5774 #include <linux/namei.h>
5775+#include <linux/vs_tag.h>
5776 #include "xdr3.h"
5777 #include "auth.h"
5778
5779@@ -95,6 +96,8 @@ static __be32 *
5780 decode_sattr3(__be32 *p, struct iattr *iap)
5781 {
5782 u32 tmp;
5783+ uid_t uid = 0;
5784+ gid_t gid = 0;
5785
5786 iap->ia_valid = 0;
5787
5788@@ -104,12 +107,15 @@ decode_sattr3(__be32 *p, struct iattr *i
5789 }
5790 if (*p++) {
5791 iap->ia_valid |= ATTR_UID;
5792- iap->ia_uid = ntohl(*p++);
5793+ uid = ntohl(*p++);
5794 }
5795 if (*p++) {
5796 iap->ia_valid |= ATTR_GID;
5797- iap->ia_gid = ntohl(*p++);
5798+ gid = ntohl(*p++);
5799 }
5800+ iap->ia_uid = INOTAG_UID(DX_TAG_NFSD, uid, gid);
5801+ iap->ia_gid = INOTAG_GID(DX_TAG_NFSD, uid, gid);
5802+ iap->ia_tag = INOTAG_TAG(DX_TAG_NFSD, uid, gid, 0);
5803 if (*p++) {
5804 u64 newsize;
5805
5806@@ -165,8 +171,12 @@ encode_fattr3(struct svc_rqst *rqstp, __
5807 *p++ = htonl(nfs3_ftypes[(stat->mode & S_IFMT) >> 12]);
5808 *p++ = htonl((u32) stat->mode);
5809 *p++ = htonl((u32) stat->nlink);
5810- *p++ = htonl((u32) nfsd_ruid(rqstp, stat->uid));
5811- *p++ = htonl((u32) nfsd_rgid(rqstp, stat->gid));
5812+ *p++ = htonl((u32) nfsd_ruid(rqstp,
5813+ TAGINO_UID(0 /* FIXME: DX_TAG(dentry->d_inode) */,
5814+ stat->uid, stat->tag)));
5815+ *p++ = htonl((u32) nfsd_rgid(rqstp,
5816+ TAGINO_GID(0 /* FIXME: DX_TAG(dentry->d_inode) */,
5817+ stat->gid, stat->tag)));
5818 if (S_ISLNK(stat->mode) && stat->size > NFS3_MAXPATHLEN) {
5819 p = xdr_encode_hyper(p, (u64) NFS3_MAXPATHLEN);
5820 } else {
5821diff -NurpP --minimal linux-3.0.9/fs/nfsd/nfs4xdr.c linux-3.0.9-vs2.3.2.1/fs/nfsd/nfs4xdr.c
5822--- linux-3.0.9/fs/nfsd/nfs4xdr.c 2011-11-15 16:40:47.000000000 +0100
5823+++ linux-3.0.9-vs2.3.2.1/fs/nfsd/nfs4xdr.c 2011-11-15 17:37:07.000000000 +0100
5824@@ -45,6 +45,7 @@
5825 #include <linux/statfs.h>
5826 #include <linux/utsname.h>
5827 #include <linux/sunrpc/svcauth_gss.h>
5828+#include <linux/vs_tag.h>
5829
5830 #include "idmap.h"
5831 #include "acl.h"
5832@@ -2102,14 +2103,18 @@ out_acl:
5833 WRITE32(stat.nlink);
5834 }
5835 if (bmval1 & FATTR4_WORD1_OWNER) {
5836- status = nfsd4_encode_user(rqstp, stat.uid, &p, &buflen);
5837+ status = nfsd4_encode_user(rqstp,
5838+ TAGINO_UID(DX_TAG(dentry->d_inode),
5839+ stat.uid, stat.tag), &p, &buflen);
5840 if (status == nfserr_resource)
5841 goto out_resource;
5842 if (status)
5843 goto out;
5844 }
5845 if (bmval1 & FATTR4_WORD1_OWNER_GROUP) {
5846- status = nfsd4_encode_group(rqstp, stat.gid, &p, &buflen);
5847+ status = nfsd4_encode_group(rqstp,
5848+ TAGINO_GID(DX_TAG(dentry->d_inode),
5849+ stat.gid, stat.tag), &p, &buflen);
5850 if (status == nfserr_resource)
5851 goto out_resource;
5852 if (status)
5853diff -NurpP --minimal linux-3.0.9/fs/nfsd/nfsxdr.c linux-3.0.9-vs2.3.2.1/fs/nfsd/nfsxdr.c
5854--- linux-3.0.9/fs/nfsd/nfsxdr.c 2011-05-22 16:17:53.000000000 +0200
5855+++ linux-3.0.9-vs2.3.2.1/fs/nfsd/nfsxdr.c 2011-06-10 22:11:24.000000000 +0200
5856@@ -6,6 +6,7 @@
5857
5858 #include "xdr.h"
5859 #include "auth.h"
5860+#include <linux/vs_tag.h>
5861
5862 #define NFSDDBG_FACILITY NFSDDBG_XDR
5863
5864@@ -88,6 +89,8 @@ static __be32 *
5865 decode_sattr(__be32 *p, struct iattr *iap)
5866 {
5867 u32 tmp, tmp1;
5868+ uid_t uid = 0;
5869+ gid_t gid = 0;
5870
5871 iap->ia_valid = 0;
5872
5873@@ -101,12 +104,15 @@ decode_sattr(__be32 *p, struct iattr *ia
5874 }
5875 if ((tmp = ntohl(*p++)) != (u32)-1) {
5876 iap->ia_valid |= ATTR_UID;
5877- iap->ia_uid = tmp;
5878+ uid = tmp;
5879 }
5880 if ((tmp = ntohl(*p++)) != (u32)-1) {
5881 iap->ia_valid |= ATTR_GID;
5882- iap->ia_gid = tmp;
5883+ gid = tmp;
5884 }
5885+ iap->ia_uid = INOTAG_UID(DX_TAG_NFSD, uid, gid);
5886+ iap->ia_gid = INOTAG_GID(DX_TAG_NFSD, uid, gid);
5887+ iap->ia_tag = INOTAG_TAG(DX_TAG_NFSD, uid, gid, 0);
5888 if ((tmp = ntohl(*p++)) != (u32)-1) {
5889 iap->ia_valid |= ATTR_SIZE;
5890 iap->ia_size = tmp;
5891@@ -151,8 +157,10 @@ encode_fattr(struct svc_rqst *rqstp, __b
5892 *p++ = htonl(nfs_ftypes[type >> 12]);
5893 *p++ = htonl((u32) stat->mode);
5894 *p++ = htonl((u32) stat->nlink);
5895- *p++ = htonl((u32) nfsd_ruid(rqstp, stat->uid));
5896- *p++ = htonl((u32) nfsd_rgid(rqstp, stat->gid));
5897+ *p++ = htonl((u32) nfsd_ruid(rqstp,
5898+ TAGINO_UID(DX_TAG(dentry->d_inode), stat->uid, stat->tag)));
5899+ *p++ = htonl((u32) nfsd_rgid(rqstp,
5900+ TAGINO_GID(DX_TAG(dentry->d_inode), stat->gid, stat->tag)));
5901
5902 if (S_ISLNK(type) && stat->size > NFS_MAXPATHLEN) {
5903 *p++ = htonl(NFS_MAXPATHLEN);
5904diff -NurpP --minimal linux-3.0.9/fs/ocfs2/dlmglue.c linux-3.0.9-vs2.3.2.1/fs/ocfs2/dlmglue.c
5905--- linux-3.0.9/fs/ocfs2/dlmglue.c 2011-05-22 16:17:53.000000000 +0200
5906+++ linux-3.0.9-vs2.3.2.1/fs/ocfs2/dlmglue.c 2011-06-10 22:11:24.000000000 +0200
5907@@ -2041,6 +2041,7 @@ static void __ocfs2_stuff_meta_lvb(struc
5908 lvb->lvb_iclusters = cpu_to_be32(oi->ip_clusters);
5909 lvb->lvb_iuid = cpu_to_be32(inode->i_uid);
5910 lvb->lvb_igid = cpu_to_be32(inode->i_gid);
5911+ lvb->lvb_itag = cpu_to_be16(inode->i_tag);
5912 lvb->lvb_imode = cpu_to_be16(inode->i_mode);
5913 lvb->lvb_inlink = cpu_to_be16(inode->i_nlink);
5914 lvb->lvb_iatime_packed =
5915@@ -2091,6 +2092,7 @@ static void ocfs2_refresh_inode_from_lvb
5916
5917 inode->i_uid = be32_to_cpu(lvb->lvb_iuid);
5918 inode->i_gid = be32_to_cpu(lvb->lvb_igid);
5919+ inode->i_tag = be16_to_cpu(lvb->lvb_itag);
5920 inode->i_mode = be16_to_cpu(lvb->lvb_imode);
5921 inode->i_nlink = be16_to_cpu(lvb->lvb_inlink);
5922 ocfs2_unpack_timespec(&inode->i_atime,
5923diff -NurpP --minimal linux-3.0.9/fs/ocfs2/dlmglue.h linux-3.0.9-vs2.3.2.1/fs/ocfs2/dlmglue.h
5924--- linux-3.0.9/fs/ocfs2/dlmglue.h 2010-10-21 13:07:50.000000000 +0200
5925+++ linux-3.0.9-vs2.3.2.1/fs/ocfs2/dlmglue.h 2011-06-10 22:11:24.000000000 +0200
5926@@ -46,7 +46,8 @@ struct ocfs2_meta_lvb {
5927 __be16 lvb_inlink;
5928 __be32 lvb_iattr;
5929 __be32 lvb_igeneration;
5930- __be32 lvb_reserved2;
5931+ __be16 lvb_itag;
5932+ __be16 lvb_reserved2;
5933 };
5934
5935 #define OCFS2_QINFO_LVB_VERSION 1
5936diff -NurpP --minimal linux-3.0.9/fs/ocfs2/file.c linux-3.0.9-vs2.3.2.1/fs/ocfs2/file.c
5937--- linux-3.0.9/fs/ocfs2/file.c 2011-07-22 11:18:06.000000000 +0200
5938+++ linux-3.0.9-vs2.3.2.1/fs/ocfs2/file.c 2011-06-10 22:43:33.000000000 +0200
5939@@ -1111,7 +1111,7 @@ int ocfs2_setattr(struct dentry *dentry,
5940 attr->ia_valid &= ~ATTR_SIZE;
5941
5942 #define OCFS2_VALID_ATTRS (ATTR_ATIME | ATTR_MTIME | ATTR_CTIME | ATTR_SIZE \
5943- | ATTR_GID | ATTR_UID | ATTR_MODE)
5944+ | ATTR_GID | ATTR_UID | ATTR_TAG | ATTR_MODE)
5945 if (!(attr->ia_valid & OCFS2_VALID_ATTRS))
5946 return 0;
5947
5948diff -NurpP --minimal linux-3.0.9/fs/ocfs2/inode.c linux-3.0.9-vs2.3.2.1/fs/ocfs2/inode.c
5949--- linux-3.0.9/fs/ocfs2/inode.c 2011-05-22 16:17:53.000000000 +0200
5950+++ linux-3.0.9-vs2.3.2.1/fs/ocfs2/inode.c 2011-06-10 22:11:24.000000000 +0200
5951@@ -28,6 +28,7 @@
5952 #include <linux/highmem.h>
5953 #include <linux/pagemap.h>
5954 #include <linux/quotaops.h>
5955+#include <linux/vs_tag.h>
5956
5957 #include <asm/byteorder.h>
5958
5959@@ -78,11 +79,13 @@ void ocfs2_set_inode_flags(struct inode
5960 {
5961 unsigned int flags = OCFS2_I(inode)->ip_attr;
5962
5963- inode->i_flags &= ~(S_IMMUTABLE |
5964+ inode->i_flags &= ~(S_IMMUTABLE | S_IXUNLINK |
5965 S_SYNC | S_APPEND | S_NOATIME | S_DIRSYNC);
5966
5967 if (flags & OCFS2_IMMUTABLE_FL)
5968 inode->i_flags |= S_IMMUTABLE;
5969+ if (flags & OCFS2_IXUNLINK_FL)
5970+ inode->i_flags |= S_IXUNLINK;
5971
5972 if (flags & OCFS2_SYNC_FL)
5973 inode->i_flags |= S_SYNC;
5974@@ -92,25 +95,44 @@ void ocfs2_set_inode_flags(struct inode
5975 inode->i_flags |= S_NOATIME;
5976 if (flags & OCFS2_DIRSYNC_FL)
5977 inode->i_flags |= S_DIRSYNC;
5978+
5979+ inode->i_vflags &= ~(V_BARRIER | V_COW);
5980+
5981+ if (flags & OCFS2_BARRIER_FL)
5982+ inode->i_vflags |= V_BARRIER;
5983+ if (flags & OCFS2_COW_FL)
5984+ inode->i_vflags |= V_COW;
5985 }
5986
5987 /* Propagate flags from i_flags to OCFS2_I(inode)->ip_attr */
5988 void ocfs2_get_inode_flags(struct ocfs2_inode_info *oi)
5989 {
5990 unsigned int flags = oi->vfs_inode.i_flags;
5991+ unsigned int vflags = oi->vfs_inode.i_vflags;
5992+
5993+ oi->ip_attr &= ~(OCFS2_SYNC_FL | OCFS2_APPEND_FL |
5994+ OCFS2_IMMUTABLE_FL | OCFS2_IXUNLINK_FL |
5995+ OCFS2_NOATIME_FL | OCFS2_DIRSYNC_FL |
5996+ OCFS2_BARRIER_FL | OCFS2_COW_FL);
5997+
5998+ if (flags & S_IMMUTABLE)
5999+ oi->ip_attr |= OCFS2_IMMUTABLE_FL;
6000+ if (flags & S_IXUNLINK)
6001+ oi->ip_attr |= OCFS2_IXUNLINK_FL;
6002
6003- oi->ip_attr &= ~(OCFS2_SYNC_FL|OCFS2_APPEND_FL|
6004- OCFS2_IMMUTABLE_FL|OCFS2_NOATIME_FL|OCFS2_DIRSYNC_FL);
6005 if (flags & S_SYNC)
6006 oi->ip_attr |= OCFS2_SYNC_FL;
6007 if (flags & S_APPEND)
6008 oi->ip_attr |= OCFS2_APPEND_FL;
6009- if (flags & S_IMMUTABLE)
6010- oi->ip_attr |= OCFS2_IMMUTABLE_FL;
6011 if (flags & S_NOATIME)
6012 oi->ip_attr |= OCFS2_NOATIME_FL;
6013 if (flags & S_DIRSYNC)
6014 oi->ip_attr |= OCFS2_DIRSYNC_FL;
6015+
6016+ if (vflags & V_BARRIER)
6017+ oi->ip_attr |= OCFS2_BARRIER_FL;
6018+ if (vflags & V_COW)
6019+ oi->ip_attr |= OCFS2_COW_FL;
6020 }
6021
6022 struct inode *ocfs2_ilookup(struct super_block *sb, u64 blkno)
6023@@ -241,6 +263,8 @@ void ocfs2_populate_inode(struct inode *
6024 struct super_block *sb;
6025 struct ocfs2_super *osb;
6026 int use_plocks = 1;
6027+ uid_t uid;
6028+ gid_t gid;
6029
6030 sb = inode->i_sb;
6031 osb = OCFS2_SB(sb);
6032@@ -269,8 +293,12 @@ void ocfs2_populate_inode(struct inode *
6033 inode->i_generation = le32_to_cpu(fe->i_generation);
6034 inode->i_rdev = huge_decode_dev(le64_to_cpu(fe->id1.dev1.i_rdev));
6035 inode->i_mode = le16_to_cpu(fe->i_mode);
6036- inode->i_uid = le32_to_cpu(fe->i_uid);
6037- inode->i_gid = le32_to_cpu(fe->i_gid);
6038+ uid = le32_to_cpu(fe->i_uid);
6039+ gid = le32_to_cpu(fe->i_gid);
6040+ inode->i_uid = INOTAG_UID(DX_TAG(inode), uid, gid);
6041+ inode->i_gid = INOTAG_GID(DX_TAG(inode), uid, gid);
6042+ inode->i_tag = INOTAG_TAG(DX_TAG(inode), uid, gid,
6043+ /* le16_to_cpu(raw_inode->i_raw_tag)i */ 0);
6044
6045 /* Fast symlinks will have i_size but no allocated clusters. */
6046 if (S_ISLNK(inode->i_mode) && !fe->i_clusters)
6047diff -NurpP --minimal linux-3.0.9/fs/ocfs2/inode.h linux-3.0.9-vs2.3.2.1/fs/ocfs2/inode.h
6048--- linux-3.0.9/fs/ocfs2/inode.h 2011-01-05 21:50:26.000000000 +0100
6049+++ linux-3.0.9-vs2.3.2.1/fs/ocfs2/inode.h 2011-06-10 22:11:24.000000000 +0200
6050@@ -151,6 +151,7 @@ struct buffer_head *ocfs2_bread(struct i
6051
6052 void ocfs2_set_inode_flags(struct inode *inode);
6053 void ocfs2_get_inode_flags(struct ocfs2_inode_info *oi);
6054+int ocfs2_sync_flags(struct inode *inode, int, int);
6055
6056 static inline blkcnt_t ocfs2_inode_sector_count(struct inode *inode)
6057 {
6058diff -NurpP --minimal linux-3.0.9/fs/ocfs2/ioctl.c linux-3.0.9-vs2.3.2.1/fs/ocfs2/ioctl.c
6059--- linux-3.0.9/fs/ocfs2/ioctl.c 2011-07-22 11:18:06.000000000 +0200
6060+++ linux-3.0.9-vs2.3.2.1/fs/ocfs2/ioctl.c 2011-06-10 22:11:24.000000000 +0200
6061@@ -78,7 +78,41 @@ static int ocfs2_get_inode_attr(struct i
6062 return status;
6063 }
6064
6065-static int ocfs2_set_inode_attr(struct inode *inode, unsigned flags,
6066+int ocfs2_sync_flags(struct inode *inode, int flags, int vflags)
6067+{
6068+ struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
6069+ struct buffer_head *bh = NULL;
6070+ handle_t *handle = NULL;
6071+ int status;
6072+
6073+ status = ocfs2_inode_lock(inode, &bh, 1);
6074+ if (status < 0) {
6075+ mlog_errno(status);
6076+ return status;
6077+ }
6078+ handle = ocfs2_start_trans(osb, OCFS2_INODE_UPDATE_CREDITS);
6079+ if (IS_ERR(handle)) {
6080+ status = PTR_ERR(handle);
6081+ mlog_errno(status);
6082+ goto bail_unlock;
6083+ }
6084+
6085+ inode->i_flags = flags;
6086+ inode->i_vflags = vflags;
6087+ ocfs2_get_inode_flags(OCFS2_I(inode));
6088+
6089+ status = ocfs2_mark_inode_dirty(handle, inode, bh);
6090+ if (status < 0)
6091+ mlog_errno(status);
6092+
6093+ ocfs2_commit_trans(osb, handle);
6094+bail_unlock:
6095+ ocfs2_inode_unlock(inode, 1);
6096+ brelse(bh);
6097+ return status;
6098+}
6099+
6100+int ocfs2_set_inode_attr(struct inode *inode, unsigned flags,
6101 unsigned mask)
6102 {
6103 struct ocfs2_inode_info *ocfs2_inode = OCFS2_I(inode);
6104@@ -103,6 +137,11 @@ static int ocfs2_set_inode_attr(struct i
6105 if (!S_ISDIR(inode->i_mode))
6106 flags &= ~OCFS2_DIRSYNC_FL;
6107
6108+ if (IS_BARRIER(inode)) {
6109+ vxwprintk_task(1, "messing with the barrier.");
6110+ goto bail_unlock;
6111+ }
6112+
6113 handle = ocfs2_start_trans(osb, OCFS2_INODE_UPDATE_CREDITS);
6114 if (IS_ERR(handle)) {
6115 status = PTR_ERR(handle);
6116@@ -880,6 +919,7 @@ bail:
6117 return status;
6118 }
6119
6120+
6121 long ocfs2_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
6122 {
6123 struct inode *inode = filp->f_path.dentry->d_inode;
6124diff -NurpP --minimal linux-3.0.9/fs/ocfs2/namei.c linux-3.0.9-vs2.3.2.1/fs/ocfs2/namei.c
6125--- linux-3.0.9/fs/ocfs2/namei.c 2011-05-22 16:17:53.000000000 +0200
6126+++ linux-3.0.9-vs2.3.2.1/fs/ocfs2/namei.c 2011-06-10 22:11:24.000000000 +0200
6127@@ -41,6 +41,7 @@
6128 #include <linux/slab.h>
6129 #include <linux/highmem.h>
6130 #include <linux/quotaops.h>
6131+#include <linux/vs_tag.h>
6132
6133 #include <cluster/masklog.h>
6134
6135@@ -477,6 +478,7 @@ static int __ocfs2_mknod_locked(struct i
6136 struct ocfs2_dinode *fe = NULL;
6137 struct ocfs2_extent_list *fel;
6138 u16 feat;
6139+ tag_t tag;
6140
6141 *new_fe_bh = NULL;
6142
6143@@ -514,8 +516,11 @@ static int __ocfs2_mknod_locked(struct i
6144 fe->i_suballoc_loc = cpu_to_le64(suballoc_loc);
6145 fe->i_suballoc_bit = cpu_to_le16(suballoc_bit);
6146 fe->i_suballoc_slot = cpu_to_le16(inode_ac->ac_alloc_slot);
6147- fe->i_uid = cpu_to_le32(inode->i_uid);
6148- fe->i_gid = cpu_to_le32(inode->i_gid);
6149+
6150+ tag = dx_current_fstag(osb->sb);
6151+ fe->i_uid = cpu_to_le32(TAGINO_UID(DX_TAG(inode), inode->i_uid, tag));
6152+ fe->i_gid = cpu_to_le32(TAGINO_GID(DX_TAG(inode), inode->i_gid, tag));
6153+ inode->i_tag = tag;
6154 fe->i_mode = cpu_to_le16(inode->i_mode);
6155 if (S_ISCHR(inode->i_mode) || S_ISBLK(inode->i_mode))
6156 fe->id1.dev1.i_rdev = cpu_to_le64(huge_encode_dev(dev));
6157diff -NurpP --minimal linux-3.0.9/fs/ocfs2/ocfs2.h linux-3.0.9-vs2.3.2.1/fs/ocfs2/ocfs2.h
6158--- linux-3.0.9/fs/ocfs2/ocfs2.h 2011-05-22 16:17:53.000000000 +0200
6159+++ linux-3.0.9-vs2.3.2.1/fs/ocfs2/ocfs2.h 2011-06-10 22:11:24.000000000 +0200
6160@@ -272,6 +272,7 @@ enum ocfs2_mount_options
6161 writes */
6162 OCFS2_MOUNT_HB_NONE = 1 << 13, /* No heartbeat */
6163 OCFS2_MOUNT_HB_GLOBAL = 1 << 14, /* Global heartbeat */
6164+ OCFS2_MOUNT_TAGGED = 1 << 15, /* use tagging */
6165 };
6166
6167 #define OCFS2_OSB_SOFT_RO 0x0001
6168diff -NurpP --minimal linux-3.0.9/fs/ocfs2/ocfs2_fs.h linux-3.0.9-vs2.3.2.1/fs/ocfs2/ocfs2_fs.h
6169--- linux-3.0.9/fs/ocfs2/ocfs2_fs.h 2011-05-22 16:17:53.000000000 +0200
6170+++ linux-3.0.9-vs2.3.2.1/fs/ocfs2/ocfs2_fs.h 2011-06-10 22:11:24.000000000 +0200
6171@@ -266,6 +266,11 @@
6172 #define OCFS2_TOPDIR_FL FS_TOPDIR_FL /* Top of directory hierarchies*/
6173 #define OCFS2_RESERVED_FL FS_RESERVED_FL /* reserved for ext2 lib */
6174
6175+#define OCFS2_IXUNLINK_FL FS_IXUNLINK_FL /* Immutable invert on unlink */
6176+
6177+#define OCFS2_BARRIER_FL FS_BARRIER_FL /* Barrier for chroot() */
6178+#define OCFS2_COW_FL FS_COW_FL /* Copy on Write marker */
6179+
6180 #define OCFS2_FL_VISIBLE FS_FL_USER_VISIBLE /* User visible flags */
6181 #define OCFS2_FL_MODIFIABLE FS_FL_USER_MODIFIABLE /* User modifiable flags */
6182
6183diff -NurpP --minimal linux-3.0.9/fs/ocfs2/super.c linux-3.0.9-vs2.3.2.1/fs/ocfs2/super.c
6184--- linux-3.0.9/fs/ocfs2/super.c 2011-07-22 11:18:06.000000000 +0200
6185+++ linux-3.0.9-vs2.3.2.1/fs/ocfs2/super.c 2011-06-15 02:40:14.000000000 +0200
6186@@ -184,6 +184,7 @@ enum {
6187 Opt_coherency_full,
6188 Opt_resv_level,
6189 Opt_dir_resv_level,
6190+ Opt_tag, Opt_notag, Opt_tagid,
6191 Opt_err,
6192 };
6193
6194@@ -215,6 +216,9 @@ static const match_table_t tokens = {
6195 {Opt_coherency_full, "coherency=full"},
6196 {Opt_resv_level, "resv_level=%u"},
6197 {Opt_dir_resv_level, "dir_resv_level=%u"},
6198+ {Opt_tag, "tag"},
6199+ {Opt_notag, "notag"},
6200+ {Opt_tagid, "tagid=%u"},
6201 {Opt_err, NULL}
6202 };
6203
6204@@ -662,6 +666,13 @@ static int ocfs2_remount(struct super_bl
6205 goto out;
6206 }
6207
6208+ if ((osb->s_mount_opt & OCFS2_MOUNT_TAGGED) !=
6209+ (parsed_options.mount_opt & OCFS2_MOUNT_TAGGED)) {
6210+ ret = -EINVAL;
6211+ mlog(ML_ERROR, "Cannot change tagging on remount\n");
6212+ goto out;
6213+ }
6214+
6215 /* We're going to/from readonly mode. */
6216 if ((*flags & MS_RDONLY) != (sb->s_flags & MS_RDONLY)) {
6217 /* Disable quota accounting before remounting RO */
6218@@ -1177,6 +1188,9 @@ static int ocfs2_fill_super(struct super
6219
6220 ocfs2_complete_mount_recovery(osb);
6221
6222+ if (osb->s_mount_opt & OCFS2_MOUNT_TAGGED)
6223+ sb->s_flags |= MS_TAGGED;
6224+
6225 if (ocfs2_mount_local(osb))
6226 snprintf(nodestr, sizeof(nodestr), "local");
6227 else
6228@@ -1506,6 +1520,20 @@ static int ocfs2_parse_options(struct su
6229 option < OCFS2_MAX_RESV_LEVEL)
6230 mopt->dir_resv_level = option;
6231 break;
6232+#ifndef CONFIG_TAGGING_NONE
6233+ case Opt_tag:
6234+ mopt->mount_opt |= OCFS2_MOUNT_TAGGED;
6235+ break;
6236+ case Opt_notag:
6237+ mopt->mount_opt &= ~OCFS2_MOUNT_TAGGED;
6238+ break;
6239+#endif
6240+#ifdef CONFIG_PROPAGATE
6241+ case Opt_tagid:
6242+ /* use args[0] */
6243+ mopt->mount_opt |= OCFS2_MOUNT_TAGGED;
6244+ break;
6245+#endif
6246 default:
6247 mlog(ML_ERROR,
6248 "Unrecognized mount option \"%s\" "
6249diff -NurpP --minimal linux-3.0.9/fs/open.c linux-3.0.9-vs2.3.2.1/fs/open.c
6250--- linux-3.0.9/fs/open.c 2011-05-22 16:17:53.000000000 +0200
6251+++ linux-3.0.9-vs2.3.2.1/fs/open.c 2011-06-10 22:11:24.000000000 +0200
6252@@ -30,6 +30,11 @@
6253 #include <linux/fs_struct.h>
6254 #include <linux/ima.h>
6255 #include <linux/dnotify.h>
6256+#include <linux/vs_base.h>
6257+#include <linux/vs_limit.h>
6258+#include <linux/vs_tag.h>
6259+#include <linux/vs_cowbl.h>
6260+#include <linux/vserver/dlimit.h>
6261
6262 #include "internal.h"
6263
6264@@ -494,6 +499,12 @@ SYSCALL_DEFINE3(fchmodat, int, dfd, cons
6265 error = user_path_at(dfd, filename, LOOKUP_FOLLOW, &path);
6266 if (error)
6267 goto out;
6268+
6269+#ifdef CONFIG_VSERVER_COWBL
6270+ error = cow_check_and_break(&path);
6271+ if (error)
6272+ goto dput_and_out;
6273+#endif
6274 inode = path.dentry->d_inode;
6275
6276 error = mnt_want_write(path.mnt);
6277@@ -531,11 +542,11 @@ static int chown_common(struct path *pat
6278 newattrs.ia_valid = ATTR_CTIME;
6279 if (user != (uid_t) -1) {
6280 newattrs.ia_valid |= ATTR_UID;
6281- newattrs.ia_uid = user;
6282+ newattrs.ia_uid = dx_map_uid(user);
6283 }
6284 if (group != (gid_t) -1) {
6285 newattrs.ia_valid |= ATTR_GID;
6286- newattrs.ia_gid = group;
6287+ newattrs.ia_gid = dx_map_gid(group);
6288 }
6289 if (!S_ISDIR(inode->i_mode))
6290 newattrs.ia_valid |=
6291@@ -560,6 +571,10 @@ SYSCALL_DEFINE3(chown, const char __user
6292 error = mnt_want_write(path.mnt);
6293 if (error)
6294 goto out_release;
6295+#ifdef CONFIG_VSERVER_COWBL
6296+ error = cow_check_and_break(&path);
6297+ if (!error)
6298+#endif
6299 error = chown_common(&path, user, group);
6300 mnt_drop_write(path.mnt);
6301 out_release:
6302@@ -587,6 +602,10 @@ SYSCALL_DEFINE5(fchownat, int, dfd, cons
6303 error = mnt_want_write(path.mnt);
6304 if (error)
6305 goto out_release;
6306+#ifdef CONFIG_VSERVER_COWBL
6307+ error = cow_check_and_break(&path);
6308+ if (!error)
6309+#endif
6310 error = chown_common(&path, user, group);
6311 mnt_drop_write(path.mnt);
6312 out_release:
6313@@ -606,6 +625,10 @@ SYSCALL_DEFINE3(lchown, const char __use
6314 error = mnt_want_write(path.mnt);
6315 if (error)
6316 goto out_release;
6317+#ifdef CONFIG_VSERVER_COWBL
6318+ error = cow_check_and_break(&path);
6319+ if (!error)
6320+#endif
6321 error = chown_common(&path, user, group);
6322 mnt_drop_write(path.mnt);
6323 out_release:
6324@@ -857,6 +880,7 @@ static void __put_unused_fd(struct files
6325 __FD_CLR(fd, fdt->open_fds);
6326 if (fd < files->next_fd)
6327 files->next_fd = fd;
6328+ vx_openfd_dec(fd);
6329 }
6330
6331 void put_unused_fd(unsigned int fd)
6332diff -NurpP --minimal linux-3.0.9/fs/proc/array.c linux-3.0.9-vs2.3.2.1/fs/proc/array.c
6333--- linux-3.0.9/fs/proc/array.c 2011-07-22 11:18:06.000000000 +0200
6334+++ linux-3.0.9-vs2.3.2.1/fs/proc/array.c 2011-06-10 22:11:24.000000000 +0200
6335@@ -81,6 +81,8 @@
6336 #include <linux/pid_namespace.h>
6337 #include <linux/ptrace.h>
6338 #include <linux/tracehook.h>
6339+#include <linux/vs_context.h>
6340+#include <linux/vs_network.h>
6341
6342 #include <asm/pgtable.h>
6343 #include <asm/processor.h>
6344@@ -170,6 +172,9 @@ static inline void task_state(struct seq
6345 rcu_read_lock();
6346 ppid = pid_alive(p) ?
6347 task_tgid_nr_ns(rcu_dereference(p->real_parent), ns) : 0;
6348+ if (unlikely(vx_current_initpid(p->pid)))
6349+ ppid = 0;
6350+
6351 tpid = 0;
6352 if (pid_alive(p)) {
6353 struct task_struct *tracer = tracehook_tracer_task(p);
6354@@ -287,7 +292,7 @@ static inline void task_sig(struct seq_f
6355 }
6356
6357 static void render_cap_t(struct seq_file *m, const char *header,
6358- kernel_cap_t *a)
6359+ struct vx_info *vxi, kernel_cap_t *a)
6360 {
6361 unsigned __capi;
6362
6363@@ -312,10 +317,11 @@ static inline void task_cap(struct seq_f
6364 cap_bset = cred->cap_bset;
6365 rcu_read_unlock();
6366
6367- render_cap_t(m, "CapInh:\t", &cap_inheritable);
6368- render_cap_t(m, "CapPrm:\t", &cap_permitted);
6369- render_cap_t(m, "CapEff:\t", &cap_effective);
6370- render_cap_t(m, "CapBnd:\t", &cap_bset);
6371+ /* FIXME: maybe move the p->vx_info masking to __task_cred() ? */
6372+ render_cap_t(m, "CapInh:\t", p->vx_info, &cap_inheritable);
6373+ render_cap_t(m, "CapPrm:\t", p->vx_info, &cap_permitted);
6374+ render_cap_t(m, "CapEff:\t", p->vx_info, &cap_effective);
6375+ render_cap_t(m, "CapBnd:\t", p->vx_info, &cap_bset);
6376 }
6377
6378 static inline void task_context_switch_counts(struct seq_file *m,
6379@@ -337,6 +343,42 @@ static void task_cpus_allowed(struct seq
6380 seq_putc(m, '\n');
6381 }
6382
6383+int proc_pid_nsproxy(struct seq_file *m, struct pid_namespace *ns,
6384+ struct pid *pid, struct task_struct *task)
6385+{
6386+ seq_printf(m, "Proxy:\t%p(%c)\n"
6387+ "Count:\t%u\n"
6388+ "uts:\t%p(%c)\n"
6389+ "ipc:\t%p(%c)\n"
6390+ "mnt:\t%p(%c)\n"
6391+ "pid:\t%p(%c)\n"
6392+ "net:\t%p(%c)\n",
6393+ task->nsproxy,
6394+ (task->nsproxy == init_task.nsproxy ? 'I' : '-'),
6395+ atomic_read(&task->nsproxy->count),
6396+ task->nsproxy->uts_ns,
6397+ (task->nsproxy->uts_ns == init_task.nsproxy->uts_ns ? 'I' : '-'),
6398+ task->nsproxy->ipc_ns,
6399+ (task->nsproxy->ipc_ns == init_task.nsproxy->ipc_ns ? 'I' : '-'),
6400+ task->nsproxy->mnt_ns,
6401+ (task->nsproxy->mnt_ns == init_task.nsproxy->mnt_ns ? 'I' : '-'),
6402+ task->nsproxy->pid_ns,
6403+ (task->nsproxy->pid_ns == init_task.nsproxy->pid_ns ? 'I' : '-'),
6404+ task->nsproxy->net_ns,
6405+ (task->nsproxy->net_ns == init_task.nsproxy->net_ns ? 'I' : '-'));
6406+ return 0;
6407+}
6408+
6409+void task_vs_id(struct seq_file *m, struct task_struct *task)
6410+{
6411+ if (task_vx_flags(task, VXF_HIDE_VINFO, 0))
6412+ return;
6413+
6414+ seq_printf(m, "VxID: %d\n", vx_task_xid(task));
6415+ seq_printf(m, "NxID: %d\n", nx_task_nid(task));
6416+}
6417+
6418+
6419 int proc_pid_status(struct seq_file *m, struct pid_namespace *ns,
6420 struct pid *pid, struct task_struct *task)
6421 {
6422@@ -353,6 +395,7 @@ int proc_pid_status(struct seq_file *m,
6423 task_cap(m, task);
6424 task_cpus_allowed(m, task);
6425 cpuset_task_status_allowed(m, task);
6426+ task_vs_id(m, task);
6427 task_context_switch_counts(m, task);
6428 return 0;
6429 }
6430@@ -462,6 +505,17 @@ static int do_task_stat(struct seq_file
6431 /* convert nsec -> ticks */
6432 start_time = nsec_to_clock_t(start_time);
6433
6434+ /* fixup start time for virt uptime */
6435+ if (vx_flags(VXF_VIRT_UPTIME, 0)) {
6436+ unsigned long long bias =
6437+ current->vx_info->cvirt.bias_clock;
6438+
6439+ if (start_time > bias)
6440+ start_time -= bias;
6441+ else
6442+ start_time = 0;
6443+ }
6444+
6445 seq_printf(m, "%d (%s) %c %d %d %d %d %d %u %lu \
6446 %lu %lu %lu %lu %lu %ld %ld %ld %ld %d 0 %llu %lu %ld %lu %lu %lu %lu %lu \
6447 %lu %lu %lu %lu %lu %lu %lu %lu %d %d %u %u %llu %lu %ld\n",
6448diff -NurpP --minimal linux-3.0.9/fs/proc/base.c linux-3.0.9-vs2.3.2.1/fs/proc/base.c
6449--- linux-3.0.9/fs/proc/base.c 2011-11-15 16:40:47.000000000 +0100
6450+++ linux-3.0.9-vs2.3.2.1/fs/proc/base.c 2011-11-15 17:37:07.000000000 +0100
6451@@ -83,6 +83,8 @@
6452 #include <linux/pid_namespace.h>
6453 #include <linux/fs_struct.h>
6454 #include <linux/slab.h>
6455+#include <linux/vs_context.h>
6456+#include <linux/vs_network.h>
6457 #ifdef CONFIG_HARDWALL
6458 #include <asm/hardwall.h>
6459 #endif
6460@@ -1102,11 +1104,16 @@ static ssize_t oom_adjust_write(struct f
6461 goto err_task_lock;
6462 }
6463
6464- if (oom_adjust < task->signal->oom_adj && !capable(CAP_SYS_RESOURCE)) {
6465+ if (oom_adjust < task->signal->oom_adj &&
6466+ !vx_capable(CAP_SYS_RESOURCE, VXC_OOM_ADJUST)) {
6467 err = -EACCES;
6468 goto err_sighand;
6469 }
6470
6471+ /* prevent guest processes from circumventing the oom killer */
6472+ if (vx_current_xid() && (oom_adjust == OOM_DISABLE))
6473+ oom_adjust = OOM_ADJUST_MIN;
6474+
6475 if (oom_adjust != task->signal->oom_adj) {
6476 if (oom_adjust == OOM_DISABLE)
6477 atomic_inc(&task->mm->oom_disable_count);
6478@@ -1275,7 +1282,7 @@ static ssize_t proc_loginuid_write(struc
6479 ssize_t length;
6480 uid_t loginuid;
6481
6482- if (!capable(CAP_AUDIT_CONTROL))
6483+ if (!vx_capable(CAP_AUDIT_CONTROL, VXC_AUDIT_CONTROL))
6484 return -EPERM;
6485
6486 rcu_read_lock();
6487@@ -1722,6 +1729,8 @@ struct inode *proc_pid_make_inode(struct
6488 inode->i_gid = cred->egid;
6489 rcu_read_unlock();
6490 }
6491+ /* procfs is xid tagged */
6492+ inode->i_tag = (tag_t)vx_task_xid(task);
6493 security_task_to_inode(task, inode);
6494
6495 out:
6496@@ -1758,6 +1767,8 @@ int pid_getattr(struct vfsmount *mnt, st
6497
6498 /* dentry stuff */
6499
6500+static unsigned name_to_int(struct dentry *dentry);
6501+
6502 /*
6503 * Exceptional case: normally we are not allowed to unhash a busy
6504 * directory. In this case, however, we can do it - no aliasing problems
6505@@ -1786,6 +1797,12 @@ int pid_revalidate(struct dentry *dentry
6506 task = get_proc_task(inode);
6507
6508 if (task) {
6509+ unsigned pid = name_to_int(dentry);
6510+
6511+ if (pid != ~0U && pid != vx_map_pid(task->pid)) {
6512+ put_task_struct(task);
6513+ goto drop;
6514+ }
6515 if ((inode->i_mode == (S_IFDIR|S_IRUGO|S_IXUGO)) ||
6516 task_dumpable(task)) {
6517 rcu_read_lock();
6518@@ -1802,6 +1819,7 @@ int pid_revalidate(struct dentry *dentry
6519 put_task_struct(task);
6520 return 1;
6521 }
6522+drop:
6523 d_drop(dentry);
6524 return 0;
6525 }
6526@@ -2291,6 +2309,13 @@ static struct dentry *proc_pident_lookup
6527 if (!task)
6528 goto out_no_task;
6529
6530+ /* TODO: maybe we can come up with a generic approach? */
6531+ if (task_vx_flags(task, VXF_HIDE_VINFO, 0) &&
6532+ (dentry->d_name.len == 5) &&
6533+ (!memcmp(dentry->d_name.name, "vinfo", 5) ||
6534+ !memcmp(dentry->d_name.name, "ninfo", 5)))
6535+ goto out;
6536+
6537 /*
6538 * Yes, it does not scale. And it should not. Don't add
6539 * new entries into /proc/<tgid>/ without very good reasons.
6540@@ -2676,7 +2701,7 @@ out_iput:
6541 static struct dentry *proc_base_lookup(struct inode *dir, struct dentry *dentry)
6542 {
6543 struct dentry *error;
6544- struct task_struct *task = get_proc_task(dir);
6545+ struct task_struct *task = get_proc_task_real(dir);
6546 const struct pid_entry *p, *last;
6547
6548 error = ERR_PTR(-ENOENT);
6549@@ -2783,6 +2808,9 @@ static int proc_pid_personality(struct s
6550 static const struct file_operations proc_task_operations;
6551 static const struct inode_operations proc_task_inode_operations;
6552
6553+extern int proc_pid_vx_info(struct task_struct *, char *);
6554+extern int proc_pid_nx_info(struct task_struct *, char *);
6555+
6556 static const struct pid_entry tgid_base_stuff[] = {
6557 DIR("task", S_IRUGO|S_IXUGO, proc_task_inode_operations, proc_task_operations),
6558 DIR("fd", S_IRUSR|S_IXUSR, proc_fd_inode_operations, proc_fd_operations),
6559@@ -2846,6 +2874,8 @@ static const struct pid_entry tgid_base_
6560 #ifdef CONFIG_CGROUPS
6561 REG("cgroup", S_IRUGO, proc_cgroup_operations),
6562 #endif
6563+ INF("vinfo", S_IRUGO, proc_pid_vx_info),
6564+ INF("ninfo", S_IRUGO, proc_pid_nx_info),
6565 INF("oom_score", S_IRUGO, proc_oom_score),
6566 REG("oom_adj", S_IRUGO|S_IWUSR, proc_oom_adjust_operations),
6567 REG("oom_score_adj", S_IRUGO|S_IWUSR, proc_oom_score_adj_operations),
6568@@ -2865,6 +2895,7 @@ static const struct pid_entry tgid_base_
6569 #ifdef CONFIG_HARDWALL
6570 INF("hardwall", S_IRUGO, proc_pid_hardwall),
6571 #endif
6572+ ONE("nsproxy", S_IRUGO, proc_pid_nsproxy),
6573 };
6574
6575 static int proc_tgid_base_readdir(struct file * filp,
6576@@ -3057,7 +3088,7 @@ retry:
6577 iter.task = NULL;
6578 pid = find_ge_pid(iter.tgid, ns);
6579 if (pid) {
6580- iter.tgid = pid_nr_ns(pid, ns);
6581+ iter.tgid = pid_unmapped_nr_ns(pid, ns);
6582 iter.task = pid_task(pid, PIDTYPE_PID);
6583 /* What we to know is if the pid we have find is the
6584 * pid of a thread_group_leader. Testing for task
6585@@ -3087,7 +3118,7 @@ static int proc_pid_fill_cache(struct fi
6586 struct tgid_iter iter)
6587 {
6588 char name[PROC_NUMBUF];
6589- int len = snprintf(name, sizeof(name), "%d", iter.tgid);
6590+ int len = snprintf(name, sizeof(name), "%d", vx_map_tgid(iter.tgid));
6591 return proc_fill_cache(filp, dirent, filldir, name, len,
6592 proc_pid_instantiate, iter.task, NULL);
6593 }
6594@@ -3104,7 +3135,7 @@ int proc_pid_readdir(struct file * filp,
6595 goto out_no_task;
6596 nr = filp->f_pos - FIRST_PROCESS_ENTRY;
6597
6598- reaper = get_proc_task(filp->f_path.dentry->d_inode);
6599+ reaper = get_proc_task_real(filp->f_path.dentry->d_inode);
6600 if (!reaper)
6601 goto out_no_task;
6602
6603@@ -3121,6 +3152,8 @@ int proc_pid_readdir(struct file * filp,
6604 iter.task;
6605 iter.tgid += 1, iter = next_tgid(ns, iter)) {
6606 filp->f_pos = iter.tgid + TGID_OFFSET;
6607+ if (!vx_proc_task_visible(iter.task))
6608+ continue;
6609 if (proc_pid_fill_cache(filp, dirent, filldir, iter) < 0) {
6610 put_task_struct(iter.task);
6611 goto out;
6612@@ -3274,6 +3307,8 @@ static struct dentry *proc_task_lookup(s
6613 tid = name_to_int(dentry);
6614 if (tid == ~0U)
6615 goto out;
6616+ if (vx_current_initpid(tid))
6617+ goto out;
6618
6619 ns = dentry->d_sb->s_fs_info;
6620 rcu_read_lock();
6621diff -NurpP --minimal linux-3.0.9/fs/proc/generic.c linux-3.0.9-vs2.3.2.1/fs/proc/generic.c
6622--- linux-3.0.9/fs/proc/generic.c 2011-07-22 11:18:06.000000000 +0200
6623+++ linux-3.0.9-vs2.3.2.1/fs/proc/generic.c 2011-06-10 22:11:24.000000000 +0200
6624@@ -22,6 +22,7 @@
6625 #include <linux/bitops.h>
6626 #include <linux/spinlock.h>
6627 #include <linux/completion.h>
6628+#include <linux/vserver/inode.h>
6629 #include <asm/uaccess.h>
6630
6631 #include "internal.h"
6632@@ -424,11 +425,15 @@ struct dentry *proc_lookup_de(struct pro
6633 for (de = de->subdir; de ; de = de->next) {
6634 if (de->namelen != dentry->d_name.len)
6635 continue;
6636+ if (!vx_hide_check(0, de->vx_flags))
6637+ continue;
6638 if (!memcmp(dentry->d_name.name, de->name, de->namelen)) {
6639 pde_get(de);
6640 spin_unlock(&proc_subdir_lock);
6641 error = -EINVAL;
6642 inode = proc_get_inode(dir->i_sb, de);
6643+ /* generic proc entries belong to the host */
6644+ inode->i_tag = 0;
6645 goto out_unlock;
6646 }
6647 }
6648@@ -506,6 +511,8 @@ int proc_readdir_de(struct proc_dir_entr
6649
6650 /* filldir passes info to user space */
6651 pde_get(de);
6652+ if (!vx_hide_check(0, de->vx_flags))
6653+ goto skip;
6654 spin_unlock(&proc_subdir_lock);
6655 if (filldir(dirent, de->name, de->namelen, filp->f_pos,
6656 de->low_ino, de->mode >> 12) < 0) {
6657@@ -513,6 +520,7 @@ int proc_readdir_de(struct proc_dir_entr
6658 goto out;
6659 }
6660 spin_lock(&proc_subdir_lock);
6661+ skip:
6662 filp->f_pos++;
6663 next = de->next;
6664 pde_put(de);
6665@@ -627,6 +635,7 @@ static struct proc_dir_entry *__proc_cre
6666 ent->nlink = nlink;
6667 atomic_set(&ent->count, 1);
6668 ent->pde_users = 0;
6669+ ent->vx_flags = IATTR_PROC_DEFAULT;
6670 spin_lock_init(&ent->pde_unload_lock);
6671 ent->pde_unload_completion = NULL;
6672 INIT_LIST_HEAD(&ent->pde_openers);
6673@@ -650,7 +659,8 @@ struct proc_dir_entry *proc_symlink(cons
6674 kfree(ent->data);
6675 kfree(ent);
6676 ent = NULL;
6677- }
6678+ } else
6679+ ent->vx_flags = IATTR_PROC_SYMLINK;
6680 } else {
6681 kfree(ent);
6682 ent = NULL;
6683diff -NurpP --minimal linux-3.0.9/fs/proc/inode.c linux-3.0.9-vs2.3.2.1/fs/proc/inode.c
6684--- linux-3.0.9/fs/proc/inode.c 2011-07-22 11:18:06.000000000 +0200
6685+++ linux-3.0.9-vs2.3.2.1/fs/proc/inode.c 2011-06-10 22:11:24.000000000 +0200
6686@@ -442,6 +442,8 @@ struct inode *proc_get_inode(struct supe
6687 inode->i_uid = de->uid;
6688 inode->i_gid = de->gid;
6689 }
6690+ if (de->vx_flags)
6691+ PROC_I(inode)->vx_flags = de->vx_flags;
6692 if (de->size)
6693 inode->i_size = de->size;
6694 if (de->nlink)
6695diff -NurpP --minimal linux-3.0.9/fs/proc/internal.h linux-3.0.9-vs2.3.2.1/fs/proc/internal.h
6696--- linux-3.0.9/fs/proc/internal.h 2011-07-22 11:18:06.000000000 +0200
6697+++ linux-3.0.9-vs2.3.2.1/fs/proc/internal.h 2011-06-10 22:11:24.000000000 +0200
6698@@ -10,6 +10,7 @@
6699 */
6700
6701 #include <linux/proc_fs.h>
6702+#include <linux/vs_pid.h>
6703
6704 extern struct proc_dir_entry proc_root;
6705 #ifdef CONFIG_PROC_SYSCTL
6706@@ -51,6 +52,9 @@ extern int proc_pid_status(struct seq_fi
6707 struct pid *pid, struct task_struct *task);
6708 extern int proc_pid_statm(struct seq_file *m, struct pid_namespace *ns,
6709 struct pid *pid, struct task_struct *task);
6710+extern int proc_pid_nsproxy(struct seq_file *m, struct pid_namespace *ns,
6711+ struct pid *pid, struct task_struct *task);
6712+
6713 extern loff_t mem_lseek(struct file *file, loff_t offset, int orig);
6714
6715 extern const struct file_operations proc_maps_operations;
6716@@ -76,11 +80,16 @@ static inline struct pid *proc_pid(struc
6717 return PROC_I(inode)->pid;
6718 }
6719
6720-static inline struct task_struct *get_proc_task(struct inode *inode)
6721+static inline struct task_struct *get_proc_task_real(struct inode *inode)
6722 {
6723 return get_pid_task(proc_pid(inode), PIDTYPE_PID);
6724 }
6725
6726+static inline struct task_struct *get_proc_task(struct inode *inode)
6727+{
6728+ return vx_get_proc_task(inode, proc_pid(inode));
6729+}
6730+
6731 static inline int proc_fd(struct inode *inode)
6732 {
6733 return PROC_I(inode)->fd;
6734diff -NurpP --minimal linux-3.0.9/fs/proc/loadavg.c linux-3.0.9-vs2.3.2.1/fs/proc/loadavg.c
6735--- linux-3.0.9/fs/proc/loadavg.c 2009-09-10 15:26:23.000000000 +0200
6736+++ linux-3.0.9-vs2.3.2.1/fs/proc/loadavg.c 2011-06-10 22:11:24.000000000 +0200
6737@@ -12,15 +12,27 @@
6738
6739 static int loadavg_proc_show(struct seq_file *m, void *v)
6740 {
6741+ unsigned long running;
6742+ unsigned int threads;
6743 unsigned long avnrun[3];
6744
6745 get_avenrun(avnrun, FIXED_1/200, 0);
6746
6747+ if (vx_flags(VXF_VIRT_LOAD, 0)) {
6748+ struct vx_info *vxi = current_vx_info();
6749+
6750+ running = atomic_read(&vxi->cvirt.nr_running);
6751+ threads = atomic_read(&vxi->cvirt.nr_threads);
6752+ } else {
6753+ running = nr_running();
6754+ threads = nr_threads;
6755+ }
6756+
6757 seq_printf(m, "%lu.%02lu %lu.%02lu %lu.%02lu %ld/%d %d\n",
6758 LOAD_INT(avnrun[0]), LOAD_FRAC(avnrun[0]),
6759 LOAD_INT(avnrun[1]), LOAD_FRAC(avnrun[1]),
6760 LOAD_INT(avnrun[2]), LOAD_FRAC(avnrun[2]),
6761- nr_running(), nr_threads,
6762+ running, threads,
6763 task_active_pid_ns(current)->last_pid);
6764 return 0;
6765 }
6766diff -NurpP --minimal linux-3.0.9/fs/proc/meminfo.c linux-3.0.9-vs2.3.2.1/fs/proc/meminfo.c
6767--- linux-3.0.9/fs/proc/meminfo.c 2011-03-15 18:07:33.000000000 +0100
6768+++ linux-3.0.9-vs2.3.2.1/fs/proc/meminfo.c 2011-06-10 22:11:24.000000000 +0200
6769@@ -39,7 +39,8 @@ static int meminfo_proc_show(struct seq_
6770 allowed = ((totalram_pages - hugetlb_total_pages())
6771 * sysctl_overcommit_ratio / 100) + total_swap_pages;
6772
6773- cached = global_page_state(NR_FILE_PAGES) -
6774+ cached = vx_flags(VXF_VIRT_MEM, 0) ?
6775+ vx_vsi_cached(&i) : global_page_state(NR_FILE_PAGES) -
6776 total_swapcache_pages - i.bufferram;
6777 if (cached < 0)
6778 cached = 0;
6779diff -NurpP --minimal linux-3.0.9/fs/proc/root.c linux-3.0.9-vs2.3.2.1/fs/proc/root.c
6780--- linux-3.0.9/fs/proc/root.c 2011-07-22 11:18:06.000000000 +0200
6781+++ linux-3.0.9-vs2.3.2.1/fs/proc/root.c 2011-06-22 12:39:15.000000000 +0200
6782@@ -18,9 +18,14 @@
6783 #include <linux/bitops.h>
6784 #include <linux/mount.h>
6785 #include <linux/pid_namespace.h>
6786+#include <linux/vserver/inode.h>
6787
6788 #include "internal.h"
6789
6790+struct proc_dir_entry *proc_virtual;
6791+
6792+extern void proc_vx_init(void);
6793+
6794 static int proc_test_super(struct super_block *sb, void *data)
6795 {
6796 return sb->s_fs_info == data;
6797@@ -125,6 +130,7 @@ void __init proc_root_init(void)
6798 #endif
6799 proc_mkdir("bus", NULL);
6800 proc_sys_init();
6801+ proc_vx_init();
6802 }
6803
6804 static int proc_root_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat
6805@@ -193,6 +199,7 @@ struct proc_dir_entry proc_root = {
6806 .proc_iops = &proc_root_inode_operations,
6807 .proc_fops = &proc_root_operations,
6808 .parent = &proc_root,
6809+ .vx_flags = IATTR_ADMIN | IATTR_WATCH,
6810 };
6811
6812 int pid_ns_prepare_proc(struct pid_namespace *ns)
6813diff -NurpP --minimal linux-3.0.9/fs/proc/uptime.c linux-3.0.9-vs2.3.2.1/fs/proc/uptime.c
6814--- linux-3.0.9/fs/proc/uptime.c 2009-12-03 20:02:53.000000000 +0100
6815+++ linux-3.0.9-vs2.3.2.1/fs/proc/uptime.c 2011-06-10 22:11:24.000000000 +0200
6816@@ -4,22 +4,22 @@
6817 #include <linux/sched.h>
6818 #include <linux/seq_file.h>
6819 #include <linux/time.h>
6820-#include <linux/kernel_stat.h>
6821+#include <linux/vserver/cvirt.h>
6822 #include <asm/cputime.h>
6823
6824 static int uptime_proc_show(struct seq_file *m, void *v)
6825 {
6826 struct timespec uptime;
6827 struct timespec idle;
6828- int i;
6829- cputime_t idletime = cputime_zero;
6830-
6831- for_each_possible_cpu(i)
6832- idletime = cputime64_add(idletime, kstat_cpu(i).cpustat.idle);
6833+ cputime_t idletime = cputime_add(init_task.utime, init_task.stime);
6834
6835 do_posix_clock_monotonic_gettime(&uptime);
6836 monotonic_to_bootbased(&uptime);
6837 cputime_to_timespec(idletime, &idle);
6838+
6839+ if (vx_flags(VXF_VIRT_UPTIME, 0))
6840+ vx_vsi_uptime(&uptime, &idle);
6841+
6842 seq_printf(m, "%lu.%02lu %lu.%02lu\n",
6843 (unsigned long) uptime.tv_sec,
6844 (uptime.tv_nsec / (NSEC_PER_SEC / 100)),
6845diff -NurpP --minimal linux-3.0.9/fs/quota/dquot.c linux-3.0.9-vs2.3.2.1/fs/quota/dquot.c
6846--- linux-3.0.9/fs/quota/dquot.c 2011-07-22 11:18:06.000000000 +0200
6847+++ linux-3.0.9-vs2.3.2.1/fs/quota/dquot.c 2011-06-10 22:11:24.000000000 +0200
6848@@ -1548,6 +1548,9 @@ int __dquot_alloc_space(struct inode *in
6849 int reserve = flags & DQUOT_SPACE_RESERVE;
6850 int nofail = flags & DQUOT_SPACE_NOFAIL;
6851
6852+ if ((ret = dl_alloc_space(inode, number)))
6853+ return ret;
6854+
6855 /*
6856 * First test before acquiring mutex - solves deadlocks when we
6857 * re-enter the quota code and are already holding the mutex
6858@@ -1602,6 +1605,9 @@ int dquot_alloc_inode(const struct inode
6859 int cnt, ret = 0;
6860 char warntype[MAXQUOTAS];
6861
6862+ if ((ret = dl_alloc_inode(inode)))
6863+ return ret;
6864+
6865 /* First test before acquiring mutex - solves deadlocks when we
6866 * re-enter the quota code and are already holding the mutex */
6867 if (!dquot_active(inode))
6868@@ -1672,6 +1678,8 @@ void __dquot_free_space(struct inode *in
6869 char warntype[MAXQUOTAS];
6870 int reserve = flags & DQUOT_SPACE_RESERVE;
6871
6872+ dl_free_space(inode, number);
6873+
6874 /* First test before acquiring mutex - solves deadlocks when we
6875 * re-enter the quota code and are already holding the mutex */
6876 if (!dquot_active(inode)) {
6877@@ -1710,6 +1718,8 @@ void dquot_free_inode(const struct inode
6878 unsigned int cnt;
6879 char warntype[MAXQUOTAS];
6880
6881+ dl_free_inode(inode);
6882+
6883 /* First test before acquiring mutex - solves deadlocks when we
6884 * re-enter the quota code and are already holding the mutex */
6885 if (!dquot_active(inode))
6886diff -NurpP --minimal linux-3.0.9/fs/quota/quota.c linux-3.0.9-vs2.3.2.1/fs/quota/quota.c
6887--- linux-3.0.9/fs/quota/quota.c 2011-11-15 16:40:47.000000000 +0100
6888+++ linux-3.0.9-vs2.3.2.1/fs/quota/quota.c 2011-11-15 17:37:07.000000000 +0100
6889@@ -8,6 +8,7 @@
6890 #include <linux/fs.h>
6891 #include <linux/namei.h>
6892 #include <linux/slab.h>
6893+#include <linux/vs_context.h>
6894 #include <asm/current.h>
6895 #include <asm/uaccess.h>
6896 #include <linux/kernel.h>
6897@@ -38,7 +39,7 @@ static int check_quotactl_permission(str
6898 break;
6899 /*FALLTHROUGH*/
6900 default:
6901- if (!capable(CAP_SYS_ADMIN))
6902+ if (!vx_capable(CAP_SYS_ADMIN, VXC_QUOTA_CTL))
6903 return -EPERM;
6904 }
6905
6906@@ -293,6 +294,46 @@ static int do_quotactl(struct super_bloc
6907 }
6908 }
6909
6910+#if defined(CONFIG_BLK_DEV_VROOT) || defined(CONFIG_BLK_DEV_VROOT_MODULE)
6911+
6912+#include <linux/vroot.h>
6913+#include <linux/major.h>
6914+#include <linux/module.h>
6915+#include <linux/kallsyms.h>
6916+#include <linux/vserver/debug.h>
6917+
6918+static vroot_grb_func *vroot_get_real_bdev = NULL;
6919+
6920+static DEFINE_SPINLOCK(vroot_grb_lock);
6921+
6922+int register_vroot_grb(vroot_grb_func *func) {
6923+ int ret = -EBUSY;
6924+
6925+ spin_lock(&vroot_grb_lock);
6926+ if (!vroot_get_real_bdev) {
6927+ vroot_get_real_bdev = func;
6928+ ret = 0;
6929+ }
6930+ spin_unlock(&vroot_grb_lock);
6931+ return ret;
6932+}
6933+EXPORT_SYMBOL(register_vroot_grb);
6934+
6935+int unregister_vroot_grb(vroot_grb_func *func) {
6936+ int ret = -EINVAL;
6937+
6938+ spin_lock(&vroot_grb_lock);
6939+ if (vroot_get_real_bdev) {
6940+ vroot_get_real_bdev = NULL;
6941+ ret = 0;
6942+ }
6943+ spin_unlock(&vroot_grb_lock);
6944+ return ret;
6945+}
6946+EXPORT_SYMBOL(unregister_vroot_grb);
6947+
6948+#endif
6949+
6950 /*
6951 * look up a superblock on which quota ops will be performed
6952 * - use the name of a block device to find the superblock thereon
6953@@ -310,6 +351,22 @@ static struct super_block *quotactl_bloc
6954 putname(tmp);
6955 if (IS_ERR(bdev))
6956 return ERR_CAST(bdev);
6957+#if defined(CONFIG_BLK_DEV_VROOT) || defined(CONFIG_BLK_DEV_VROOT_MODULE)
6958+ if (bdev && bdev->bd_inode &&
6959+ imajor(bdev->bd_inode) == VROOT_MAJOR) {
6960+ struct block_device *bdnew = (void *)-EINVAL;
6961+
6962+ if (vroot_get_real_bdev)
6963+ bdnew = vroot_get_real_bdev(bdev);
6964+ else
6965+ vxdprintk(VXD_CBIT(misc, 0),
6966+ "vroot_get_real_bdev not set");
6967+ bdput(bdev);
6968+ if (IS_ERR(bdnew))
6969+ return ERR_PTR(PTR_ERR(bdnew));
6970+ bdev = bdnew;
6971+ }
6972+#endif
6973 sb = get_super(bdev);
6974 bdput(bdev);
6975 if (!sb)
6976diff -NurpP --minimal linux-3.0.9/fs/reiserfs/file.c linux-3.0.9-vs2.3.2.1/fs/reiserfs/file.c
6977--- linux-3.0.9/fs/reiserfs/file.c 2011-01-05 21:50:26.000000000 +0100
6978+++ linux-3.0.9-vs2.3.2.1/fs/reiserfs/file.c 2011-06-10 22:11:24.000000000 +0200
6979@@ -312,4 +312,5 @@ const struct inode_operations reiserfs_f
6980 .listxattr = reiserfs_listxattr,
6981 .removexattr = reiserfs_removexattr,
6982 .permission = reiserfs_permission,
6983+ .sync_flags = reiserfs_sync_flags,
6984 };
6985diff -NurpP --minimal linux-3.0.9/fs/reiserfs/inode.c linux-3.0.9-vs2.3.2.1/fs/reiserfs/inode.c
6986--- linux-3.0.9/fs/reiserfs/inode.c 2011-05-22 16:17:53.000000000 +0200
6987+++ linux-3.0.9-vs2.3.2.1/fs/reiserfs/inode.c 2011-06-10 22:11:24.000000000 +0200
6988@@ -18,6 +18,7 @@
6989 #include <linux/writeback.h>
6990 #include <linux/quotaops.h>
6991 #include <linux/swap.h>
6992+#include <linux/vs_tag.h>
6993
6994 int reiserfs_commit_write(struct file *f, struct page *page,
6995 unsigned from, unsigned to);
6996@@ -1131,6 +1132,8 @@ static void init_inode(struct inode *ino
6997 struct buffer_head *bh;
6998 struct item_head *ih;
6999 __u32 rdev;
7000+ uid_t uid;
7001+ gid_t gid;
7002 //int version = ITEM_VERSION_1;
7003
7004 bh = PATH_PLAST_BUFFER(path);
7005@@ -1151,12 +1154,13 @@ static void init_inode(struct inode *ino
7006 (struct stat_data_v1 *)B_I_PITEM(bh, ih);
7007 unsigned long blocks;
7008
7009+ uid = sd_v1_uid(sd);
7010+ gid = sd_v1_gid(sd);
7011+
7012 set_inode_item_key_version(inode, KEY_FORMAT_3_5);
7013 set_inode_sd_version(inode, STAT_DATA_V1);
7014 inode->i_mode = sd_v1_mode(sd);
7015 inode->i_nlink = sd_v1_nlink(sd);
7016- inode->i_uid = sd_v1_uid(sd);
7017- inode->i_gid = sd_v1_gid(sd);
7018 inode->i_size = sd_v1_size(sd);
7019 inode->i_atime.tv_sec = sd_v1_atime(sd);
7020 inode->i_mtime.tv_sec = sd_v1_mtime(sd);
7021@@ -1198,11 +1202,12 @@ static void init_inode(struct inode *ino
7022 // (directories and symlinks)
7023 struct stat_data *sd = (struct stat_data *)B_I_PITEM(bh, ih);
7024
7025+ uid = sd_v2_uid(sd);
7026+ gid = sd_v2_gid(sd);
7027+
7028 inode->i_mode = sd_v2_mode(sd);
7029 inode->i_nlink = sd_v2_nlink(sd);
7030- inode->i_uid = sd_v2_uid(sd);
7031 inode->i_size = sd_v2_size(sd);
7032- inode->i_gid = sd_v2_gid(sd);
7033 inode->i_mtime.tv_sec = sd_v2_mtime(sd);
7034 inode->i_atime.tv_sec = sd_v2_atime(sd);
7035 inode->i_ctime.tv_sec = sd_v2_ctime(sd);
7036@@ -1232,6 +1237,10 @@ static void init_inode(struct inode *ino
7037 sd_attrs_to_i_attrs(sd_v2_attrs(sd), inode);
7038 }
7039
7040+ inode->i_uid = INOTAG_UID(DX_TAG(inode), uid, gid);
7041+ inode->i_gid = INOTAG_GID(DX_TAG(inode), uid, gid);
7042+ inode->i_tag = INOTAG_TAG(DX_TAG(inode), uid, gid, 0);
7043+
7044 pathrelse(path);
7045 if (S_ISREG(inode->i_mode)) {
7046 inode->i_op = &reiserfs_file_inode_operations;
7047@@ -1254,13 +1263,15 @@ static void init_inode(struct inode *ino
7048 static void inode2sd(void *sd, struct inode *inode, loff_t size)
7049 {
7050 struct stat_data *sd_v2 = (struct stat_data *)sd;
7051+ uid_t uid = TAGINO_UID(DX_TAG(inode), inode->i_uid, inode->i_tag);
7052+ gid_t gid = TAGINO_GID(DX_TAG(inode), inode->i_gid, inode->i_tag);
7053 __u16 flags;
7054
7055+ set_sd_v2_uid(sd_v2, uid);
7056+ set_sd_v2_gid(sd_v2, gid);
7057 set_sd_v2_mode(sd_v2, inode->i_mode);
7058 set_sd_v2_nlink(sd_v2, inode->i_nlink);
7059- set_sd_v2_uid(sd_v2, inode->i_uid);
7060 set_sd_v2_size(sd_v2, size);
7061- set_sd_v2_gid(sd_v2, inode->i_gid);
7062 set_sd_v2_mtime(sd_v2, inode->i_mtime.tv_sec);
7063 set_sd_v2_atime(sd_v2, inode->i_atime.tv_sec);
7064 set_sd_v2_ctime(sd_v2, inode->i_ctime.tv_sec);
7065@@ -2863,14 +2874,19 @@ int reiserfs_commit_write(struct file *f
7066 void sd_attrs_to_i_attrs(__u16 sd_attrs, struct inode *inode)
7067 {
7068 if (reiserfs_attrs(inode->i_sb)) {
7069- if (sd_attrs & REISERFS_SYNC_FL)
7070- inode->i_flags |= S_SYNC;
7071- else
7072- inode->i_flags &= ~S_SYNC;
7073 if (sd_attrs & REISERFS_IMMUTABLE_FL)
7074 inode->i_flags |= S_IMMUTABLE;
7075 else
7076 inode->i_flags &= ~S_IMMUTABLE;
7077+ if (sd_attrs & REISERFS_IXUNLINK_FL)
7078+ inode->i_flags |= S_IXUNLINK;
7079+ else
7080+ inode->i_flags &= ~S_IXUNLINK;
7081+
7082+ if (sd_attrs & REISERFS_SYNC_FL)
7083+ inode->i_flags |= S_SYNC;
7084+ else
7085+ inode->i_flags &= ~S_SYNC;
7086 if (sd_attrs & REISERFS_APPEND_FL)
7087 inode->i_flags |= S_APPEND;
7088 else
7089@@ -2883,6 +2899,15 @@ void sd_attrs_to_i_attrs(__u16 sd_attrs,
7090 REISERFS_I(inode)->i_flags |= i_nopack_mask;
7091 else
7092 REISERFS_I(inode)->i_flags &= ~i_nopack_mask;
7093+
7094+ if (sd_attrs & REISERFS_BARRIER_FL)
7095+ inode->i_vflags |= V_BARRIER;
7096+ else
7097+ inode->i_vflags &= ~V_BARRIER;
7098+ if (sd_attrs & REISERFS_COW_FL)
7099+ inode->i_vflags |= V_COW;
7100+ else
7101+ inode->i_vflags &= ~V_COW;
7102 }
7103 }
7104
7105@@ -2893,6 +2918,11 @@ void i_attrs_to_sd_attrs(struct inode *i
7106 *sd_attrs |= REISERFS_IMMUTABLE_FL;
7107 else
7108 *sd_attrs &= ~REISERFS_IMMUTABLE_FL;
7109+ if (inode->i_flags & S_IXUNLINK)
7110+ *sd_attrs |= REISERFS_IXUNLINK_FL;
7111+ else
7112+ *sd_attrs &= ~REISERFS_IXUNLINK_FL;
7113+
7114 if (inode->i_flags & S_SYNC)
7115 *sd_attrs |= REISERFS_SYNC_FL;
7116 else
7117@@ -2905,6 +2935,15 @@ void i_attrs_to_sd_attrs(struct inode *i
7118 *sd_attrs |= REISERFS_NOTAIL_FL;
7119 else
7120 *sd_attrs &= ~REISERFS_NOTAIL_FL;
7121+
7122+ if (inode->i_vflags & V_BARRIER)
7123+ *sd_attrs |= REISERFS_BARRIER_FL;
7124+ else
7125+ *sd_attrs &= ~REISERFS_BARRIER_FL;
7126+ if (inode->i_vflags & V_COW)
7127+ *sd_attrs |= REISERFS_COW_FL;
7128+ else
7129+ *sd_attrs &= ~REISERFS_COW_FL;
7130 }
7131 }
7132
7133@@ -3148,7 +3187,8 @@ int reiserfs_setattr(struct dentry *dent
7134 }
7135
7136 if ((ia_valid & ATTR_UID && attr->ia_uid != inode->i_uid) ||
7137- (ia_valid & ATTR_GID && attr->ia_gid != inode->i_gid)) {
7138+ (ia_valid & ATTR_GID && attr->ia_gid != inode->i_gid) ||
7139+ (ia_valid & ATTR_TAG && attr->ia_tag != inode->i_tag)) {
7140 struct reiserfs_transaction_handle th;
7141 int jbegin_count =
7142 2 *
7143@@ -3177,6 +3217,9 @@ int reiserfs_setattr(struct dentry *dent
7144 inode->i_uid = attr->ia_uid;
7145 if (attr->ia_valid & ATTR_GID)
7146 inode->i_gid = attr->ia_gid;
7147+ if ((attr->ia_valid & ATTR_TAG) &&
7148+ IS_TAGGED(inode))
7149+ inode->i_tag = attr->ia_tag;
7150 mark_inode_dirty(inode);
7151 error = journal_end(&th, inode->i_sb, jbegin_count);
7152 if (error)
7153diff -NurpP --minimal linux-3.0.9/fs/reiserfs/ioctl.c linux-3.0.9-vs2.3.2.1/fs/reiserfs/ioctl.c
7154--- linux-3.0.9/fs/reiserfs/ioctl.c 2011-05-22 16:17:53.000000000 +0200
7155+++ linux-3.0.9-vs2.3.2.1/fs/reiserfs/ioctl.c 2011-06-10 22:11:24.000000000 +0200
7156@@ -11,6 +11,21 @@
7157 #include <linux/pagemap.h>
7158 #include <linux/compat.h>
7159
7160+
7161+int reiserfs_sync_flags(struct inode *inode, int flags, int vflags)
7162+{
7163+ __u16 sd_attrs = 0;
7164+
7165+ inode->i_flags = flags;
7166+ inode->i_vflags = vflags;
7167+
7168+ i_attrs_to_sd_attrs(inode, &sd_attrs);
7169+ REISERFS_I(inode)->i_attrs = sd_attrs;
7170+ inode->i_ctime = CURRENT_TIME_SEC;
7171+ mark_inode_dirty(inode);
7172+ return 0;
7173+}
7174+
7175 /*
7176 * reiserfs_ioctl - handler for ioctl for inode
7177 * supported commands:
7178@@ -22,7 +37,7 @@
7179 long reiserfs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
7180 {
7181 struct inode *inode = filp->f_path.dentry->d_inode;
7182- unsigned int flags;
7183+ unsigned int flags, oldflags;
7184 int err = 0;
7185
7186 reiserfs_write_lock(inode->i_sb);
7187@@ -47,6 +62,7 @@ long reiserfs_ioctl(struct file *filp, u
7188
7189 flags = REISERFS_I(inode)->i_attrs;
7190 i_attrs_to_sd_attrs(inode, (__u16 *) & flags);
7191+ flags &= REISERFS_FL_USER_VISIBLE;
7192 err = put_user(flags, (int __user *)arg);
7193 break;
7194 case REISERFS_IOC_SETFLAGS:{
7195@@ -67,6 +83,10 @@ long reiserfs_ioctl(struct file *filp, u
7196 err = -EFAULT;
7197 goto setflags_out;
7198 }
7199+ if (IS_BARRIER(inode)) {
7200+ vxwprintk_task(1, "messing with the barrier.");
7201+ return -EACCES;
7202+ }
7203 /*
7204 * Is it quota file? Do not allow user to mess with it
7205 */
7206@@ -91,6 +111,10 @@ long reiserfs_ioctl(struct file *filp, u
7207 goto setflags_out;
7208 }
7209 }
7210+
7211+ oldflags = REISERFS_I(inode)->i_attrs;
7212+ flags &= REISERFS_FL_USER_MODIFIABLE;
7213+ flags |= oldflags & ~REISERFS_FL_USER_MODIFIABLE;
7214 sd_attrs_to_i_attrs(flags, inode);
7215 REISERFS_I(inode)->i_attrs = flags;
7216 inode->i_ctime = CURRENT_TIME_SEC;
7217diff -NurpP --minimal linux-3.0.9/fs/reiserfs/namei.c linux-3.0.9-vs2.3.2.1/fs/reiserfs/namei.c
7218--- linux-3.0.9/fs/reiserfs/namei.c 2011-05-22 16:17:53.000000000 +0200
7219+++ linux-3.0.9-vs2.3.2.1/fs/reiserfs/namei.c 2011-06-10 22:11:24.000000000 +0200
7220@@ -18,6 +18,7 @@
7221 #include <linux/reiserfs_acl.h>
7222 #include <linux/reiserfs_xattr.h>
7223 #include <linux/quotaops.h>
7224+#include <linux/vs_tag.h>
7225
7226 #define INC_DIR_INODE_NLINK(i) if (i->i_nlink != 1) { inc_nlink(i); if (i->i_nlink >= REISERFS_LINK_MAX) i->i_nlink=1; }
7227 #define DEC_DIR_INODE_NLINK(i) if (i->i_nlink != 1) drop_nlink(i);
7228@@ -362,6 +363,7 @@ static struct dentry *reiserfs_lookup(st
7229 if (retval == IO_ERROR) {
7230 return ERR_PTR(-EIO);
7231 }
7232+ dx_propagate_tag(nd, inode);
7233
7234 return d_splice_alias(inode, dentry);
7235 }
7236@@ -1529,6 +1531,7 @@ const struct inode_operations reiserfs_d
7237 .listxattr = reiserfs_listxattr,
7238 .removexattr = reiserfs_removexattr,
7239 .permission = reiserfs_permission,
7240+ .sync_flags = reiserfs_sync_flags,
7241 };
7242
7243 /*
7244diff -NurpP --minimal linux-3.0.9/fs/reiserfs/super.c linux-3.0.9-vs2.3.2.1/fs/reiserfs/super.c
7245--- linux-3.0.9/fs/reiserfs/super.c 2011-07-22 11:18:06.000000000 +0200
7246+++ linux-3.0.9-vs2.3.2.1/fs/reiserfs/super.c 2011-06-10 22:11:24.000000000 +0200
7247@@ -899,6 +899,14 @@ static int reiserfs_parse_options(struct
7248 {"user_xattr",.setmask = 1 << REISERFS_UNSUPPORTED_OPT},
7249 {"nouser_xattr",.clrmask = 1 << REISERFS_UNSUPPORTED_OPT},
7250 #endif
7251+#ifndef CONFIG_TAGGING_NONE
7252+ {"tagxid",.setmask = 1 << REISERFS_TAGGED},
7253+ {"tag",.setmask = 1 << REISERFS_TAGGED},
7254+ {"notag",.clrmask = 1 << REISERFS_TAGGED},
7255+#endif
7256+#ifdef CONFIG_PROPAGATE
7257+ {"tag",.arg_required = 'T',.values = NULL},
7258+#endif
7259 #ifdef CONFIG_REISERFS_FS_POSIX_ACL
7260 {"acl",.setmask = 1 << REISERFS_POSIXACL},
7261 {"noacl",.clrmask = 1 << REISERFS_POSIXACL},
7262@@ -1208,6 +1216,14 @@ static int reiserfs_remount(struct super
7263 handle_quota_files(s, qf_names, &qfmt);
7264 #endif
7265
7266+ if ((mount_options & (1 << REISERFS_TAGGED)) &&
7267+ !(s->s_flags & MS_TAGGED)) {
7268+ reiserfs_warning(s, "super-vs01",
7269+ "reiserfs: tagging not permitted on remount.");
7270+ err = -EINVAL;
7271+ goto out_err;
7272+ }
7273+
7274 handle_attrs(s);
7275
7276 /* Add options that are safe here */
7277@@ -1690,6 +1706,10 @@ static int reiserfs_fill_super(struct su
7278 goto error;
7279 }
7280
7281+ /* map mount option tagxid */
7282+ if (REISERFS_SB(s)->s_mount_opt & (1 << REISERFS_TAGGED))
7283+ s->s_flags |= MS_TAGGED;
7284+
7285 rs = SB_DISK_SUPER_BLOCK(s);
7286 /* Let's do basic sanity check to verify that underlying device is not
7287 smaller than the filesystem. If the check fails then abort and scream,
7288diff -NurpP --minimal linux-3.0.9/fs/reiserfs/xattr.c linux-3.0.9-vs2.3.2.1/fs/reiserfs/xattr.c
7289--- linux-3.0.9/fs/reiserfs/xattr.c 2011-07-22 11:18:06.000000000 +0200
7290+++ linux-3.0.9-vs2.3.2.1/fs/reiserfs/xattr.c 2011-06-22 12:39:15.000000000 +0200
7291@@ -40,6 +40,7 @@
7292 #include <linux/errno.h>
7293 #include <linux/gfp.h>
7294 #include <linux/fs.h>
7295+#include <linux/mount.h>
7296 #include <linux/file.h>
7297 #include <linux/pagemap.h>
7298 #include <linux/xattr.h>
7299diff -NurpP --minimal linux-3.0.9/fs/stat.c linux-3.0.9-vs2.3.2.1/fs/stat.c
7300--- linux-3.0.9/fs/stat.c 2011-11-15 16:40:47.000000000 +0100
7301+++ linux-3.0.9-vs2.3.2.1/fs/stat.c 2011-11-15 17:37:07.000000000 +0100
7302@@ -26,6 +26,7 @@ void generic_fillattr(struct inode *inod
7303 stat->nlink = inode->i_nlink;
7304 stat->uid = inode->i_uid;
7305 stat->gid = inode->i_gid;
7306+ stat->tag = inode->i_tag;
7307 stat->rdev = inode->i_rdev;
7308 stat->atime = inode->i_atime;
7309 stat->mtime = inode->i_mtime;
7310diff -NurpP --minimal linux-3.0.9/fs/statfs.c linux-3.0.9-vs2.3.2.1/fs/statfs.c
7311--- linux-3.0.9/fs/statfs.c 2011-11-15 16:40:47.000000000 +0100
7312+++ linux-3.0.9-vs2.3.2.1/fs/statfs.c 2011-11-15 17:37:07.000000000 +0100
7313@@ -7,6 +7,8 @@
7314 #include <linux/statfs.h>
7315 #include <linux/security.h>
7316 #include <linux/uaccess.h>
7317+#include <linux/vs_base.h>
7318+#include <linux/vs_dlimit.h>
7319
7320 static int flags_by_mnt(int mnt_flags)
7321 {
7322@@ -59,6 +61,8 @@ int statfs_by_dentry(struct dentry *dent
7323 retval = dentry->d_sb->s_op->statfs(dentry, buf);
7324 if (retval == 0 && buf->f_frsize == 0)
7325 buf->f_frsize = buf->f_bsize;
7326+ if (!vx_check(0, VS_ADMIN|VS_WATCH))
7327+ vx_vsi_statfs(dentry->d_sb, buf);
7328 return retval;
7329 }
7330
7331diff -NurpP --minimal linux-3.0.9/fs/super.c linux-3.0.9-vs2.3.2.1/fs/super.c
7332--- linux-3.0.9/fs/super.c 2011-07-22 11:18:06.000000000 +0200
7333+++ linux-3.0.9-vs2.3.2.1/fs/super.c 2011-07-23 16:49:09.000000000 +0200
7334@@ -32,6 +32,9 @@
7335 #include <linux/backing-dev.h>
7336 #include <linux/rculist_bl.h>
7337 #include <linux/cleancache.h>
7338+#include <linux/devpts_fs.h>
7339+#include <linux/proc_fs.h>
7340+#include <linux/vs_context.h>
7341 #include "internal.h"
7342
7343
7344@@ -943,6 +946,13 @@ mount_fs(struct file_system_type *type,
7345 WARN_ON(sb->s_bdi == &default_backing_dev_info);
7346 sb->s_flags |= MS_BORN;
7347
7348+ error = -EPERM;
7349+ if (!vx_capable(CAP_SYS_ADMIN, VXC_BINARY_MOUNT) &&
7350+ !sb->s_bdev &&
7351+ (sb->s_magic != PROC_SUPER_MAGIC) &&
7352+ (sb->s_magic != DEVPTS_SUPER_MAGIC))
7353+ goto out_sb;
7354+
7355 error = security_sb_kern_mount(sb, flags, secdata);
7356 if (error)
7357 goto out_sb;
7358diff -NurpP --minimal linux-3.0.9/fs/sysfs/mount.c linux-3.0.9-vs2.3.2.1/fs/sysfs/mount.c
7359--- linux-3.0.9/fs/sysfs/mount.c 2011-07-22 11:18:06.000000000 +0200
7360+++ linux-3.0.9-vs2.3.2.1/fs/sysfs/mount.c 2011-06-22 12:39:15.000000000 +0200
7361@@ -47,7 +47,7 @@ static int sysfs_fill_super(struct super
7362
7363 sb->s_blocksize = PAGE_CACHE_SIZE;
7364 sb->s_blocksize_bits = PAGE_CACHE_SHIFT;
7365- sb->s_magic = SYSFS_MAGIC;
7366+ sb->s_magic = SYSFS_SUPER_MAGIC;
7367 sb->s_op = &sysfs_ops;
7368 sb->s_time_gran = 1;
7369
7370diff -NurpP --minimal linux-3.0.9/fs/utimes.c linux-3.0.9-vs2.3.2.1/fs/utimes.c
7371--- linux-3.0.9/fs/utimes.c 2011-05-22 16:17:54.000000000 +0200
7372+++ linux-3.0.9-vs2.3.2.1/fs/utimes.c 2011-06-10 22:11:24.000000000 +0200
7373@@ -8,6 +8,8 @@
7374 #include <linux/stat.h>
7375 #include <linux/utime.h>
7376 #include <linux/syscalls.h>
7377+#include <linux/mount.h>
7378+#include <linux/vs_cowbl.h>
7379 #include <asm/uaccess.h>
7380 #include <asm/unistd.h>
7381
7382@@ -52,12 +54,18 @@ static int utimes_common(struct path *pa
7383 {
7384 int error;
7385 struct iattr newattrs;
7386- struct inode *inode = path->dentry->d_inode;
7387+ struct inode *inode;
7388
7389 error = mnt_want_write(path->mnt);
7390 if (error)
7391 goto out;
7392
7393+ error = cow_check_and_break(path);
7394+ if (error)
7395+ goto mnt_drop_write_and_out;
7396+
7397+ inode = path->dentry->d_inode;
7398+
7399 if (times && times[0].tv_nsec == UTIME_NOW &&
7400 times[1].tv_nsec == UTIME_NOW)
7401 times = NULL;
7402diff -NurpP --minimal linux-3.0.9/fs/xattr.c linux-3.0.9-vs2.3.2.1/fs/xattr.c
7403--- linux-3.0.9/fs/xattr.c 2011-07-22 11:18:09.000000000 +0200
7404+++ linux-3.0.9-vs2.3.2.1/fs/xattr.c 2011-06-10 23:10:19.000000000 +0200
7405@@ -18,6 +18,7 @@
7406 #include <linux/module.h>
7407 #include <linux/fsnotify.h>
7408 #include <linux/audit.h>
7409+#include <linux/mount.h>
7410 #include <asm/uaccess.h>
7411
7412
7413@@ -49,7 +50,7 @@ xattr_permission(struct inode *inode, co
7414 * The trusted.* namespace can only be accessed by privileged users.
7415 */
7416 if (!strncmp(name, XATTR_TRUSTED_PREFIX, XATTR_TRUSTED_PREFIX_LEN)) {
7417- if (!capable(CAP_SYS_ADMIN))
7418+ if (!vx_capable(CAP_SYS_ADMIN, VXC_FS_TRUSTED))
7419 return (mask & MAY_WRITE) ? -EPERM : -ENODATA;
7420 return 0;
7421 }
7422diff -NurpP --minimal linux-3.0.9/fs/xfs/linux-2.6/xfs_ioctl.c linux-3.0.9-vs2.3.2.1/fs/xfs/linux-2.6/xfs_ioctl.c
7423--- linux-3.0.9/fs/xfs/linux-2.6/xfs_ioctl.c 2011-05-22 16:17:54.000000000 +0200
7424+++ linux-3.0.9-vs2.3.2.1/fs/xfs/linux-2.6/xfs_ioctl.c 2011-06-10 22:11:24.000000000 +0200
7425@@ -28,7 +28,7 @@
7426 #include "xfs_bmap_btree.h"
7427 #include "xfs_dinode.h"
7428 #include "xfs_inode.h"
7429-#include "xfs_ioctl.h"
7430+// #include "xfs_ioctl.h"
7431 #include "xfs_rtalloc.h"
7432 #include "xfs_itable.h"
7433 #include "xfs_error.h"
7434@@ -748,6 +748,10 @@ xfs_merge_ioc_xflags(
7435 xflags |= XFS_XFLAG_IMMUTABLE;
7436 else
7437 xflags &= ~XFS_XFLAG_IMMUTABLE;
7438+ if (flags & FS_IXUNLINK_FL)
7439+ xflags |= XFS_XFLAG_IXUNLINK;
7440+ else
7441+ xflags &= ~XFS_XFLAG_IXUNLINK;
7442 if (flags & FS_APPEND_FL)
7443 xflags |= XFS_XFLAG_APPEND;
7444 else
7445@@ -776,6 +780,8 @@ xfs_di2lxflags(
7446
7447 if (di_flags & XFS_DIFLAG_IMMUTABLE)
7448 flags |= FS_IMMUTABLE_FL;
7449+ if (di_flags & XFS_DIFLAG_IXUNLINK)
7450+ flags |= FS_IXUNLINK_FL;
7451 if (di_flags & XFS_DIFLAG_APPEND)
7452 flags |= FS_APPEND_FL;
7453 if (di_flags & XFS_DIFLAG_SYNC)
7454@@ -836,6 +842,8 @@ xfs_set_diflags(
7455 di_flags = (ip->i_d.di_flags & XFS_DIFLAG_PREALLOC);
7456 if (xflags & XFS_XFLAG_IMMUTABLE)
7457 di_flags |= XFS_DIFLAG_IMMUTABLE;
7458+ if (xflags & XFS_XFLAG_IXUNLINK)
7459+ di_flags |= XFS_DIFLAG_IXUNLINK;
7460 if (xflags & XFS_XFLAG_APPEND)
7461 di_flags |= XFS_DIFLAG_APPEND;
7462 if (xflags & XFS_XFLAG_SYNC)
7463@@ -878,6 +886,10 @@ xfs_diflags_to_linux(
7464 inode->i_flags |= S_IMMUTABLE;
7465 else
7466 inode->i_flags &= ~S_IMMUTABLE;
7467+ if (xflags & XFS_XFLAG_IXUNLINK)
7468+ inode->i_flags |= S_IXUNLINK;
7469+ else
7470+ inode->i_flags &= ~S_IXUNLINK;
7471 if (xflags & XFS_XFLAG_APPEND)
7472 inode->i_flags |= S_APPEND;
7473 else
7474@@ -1370,10 +1382,18 @@ xfs_file_ioctl(
7475 case XFS_IOC_FSGETXATTRA:
7476 return xfs_ioc_fsgetxattr(ip, 1, arg);
7477 case XFS_IOC_FSSETXATTR:
7478+ if (IS_BARRIER(inode)) {
7479+ vxwprintk_task(1, "messing with the barrier.");
7480+ return -XFS_ERROR(EACCES);
7481+ }
7482 return xfs_ioc_fssetxattr(ip, filp, arg);
7483 case XFS_IOC_GETXFLAGS:
7484 return xfs_ioc_getxflags(ip, arg);
7485 case XFS_IOC_SETXFLAGS:
7486+ if (IS_BARRIER(inode)) {
7487+ vxwprintk_task(1, "messing with the barrier.");
7488+ return -XFS_ERROR(EACCES);
7489+ }
7490 return xfs_ioc_setxflags(ip, filp, arg);
7491
7492 case XFS_IOC_FSSETDM: {
7493diff -NurpP --minimal linux-3.0.9/fs/xfs/linux-2.6/xfs_ioctl.h linux-3.0.9-vs2.3.2.1/fs/xfs/linux-2.6/xfs_ioctl.h
7494--- linux-3.0.9/fs/xfs/linux-2.6/xfs_ioctl.h 2010-07-07 18:31:54.000000000 +0200
7495+++ linux-3.0.9-vs2.3.2.1/fs/xfs/linux-2.6/xfs_ioctl.h 2011-06-10 22:11:24.000000000 +0200
7496@@ -70,6 +70,12 @@ xfs_handle_to_dentry(
7497 void __user *uhandle,
7498 u32 hlen);
7499
7500+extern int
7501+xfs_sync_flags(
7502+ struct inode *inode,
7503+ int flags,
7504+ int vflags);
7505+
7506 extern long
7507 xfs_file_ioctl(
7508 struct file *filp,
7509diff -NurpP --minimal linux-3.0.9/fs/xfs/linux-2.6/xfs_iops.c linux-3.0.9-vs2.3.2.1/fs/xfs/linux-2.6/xfs_iops.c
7510--- linux-3.0.9/fs/xfs/linux-2.6/xfs_iops.c 2011-07-22 11:18:09.000000000 +0200
7511+++ linux-3.0.9-vs2.3.2.1/fs/xfs/linux-2.6/xfs_iops.c 2011-06-22 12:39:15.000000000 +0200
7512@@ -30,6 +30,7 @@
7513 #include "xfs_bmap_btree.h"
7514 #include "xfs_dinode.h"
7515 #include "xfs_inode.h"
7516+#include "xfs_ioctl.h"
7517 #include "xfs_bmap.h"
7518 #include "xfs_rtalloc.h"
7519 #include "xfs_error.h"
7520@@ -48,6 +49,7 @@
7521 #include <linux/security.h>
7522 #include <linux/fiemap.h>
7523 #include <linux/slab.h>
7524+#include <linux/vs_tag.h>
7525
7526 /*
7527 * Bring the timestamps in the XFS inode uptodate.
7528@@ -464,6 +466,7 @@ xfs_vn_getattr(
7529 stat->nlink = ip->i_d.di_nlink;
7530 stat->uid = ip->i_d.di_uid;
7531 stat->gid = ip->i_d.di_gid;
7532+ stat->tag = ip->i_d.di_tag;
7533 stat->ino = ip->i_ino;
7534 stat->atime = inode->i_atime;
7535 stat->mtime = inode->i_mtime;
7536@@ -599,6 +602,7 @@ static const struct inode_operations xfs
7537 .removexattr = generic_removexattr,
7538 .listxattr = xfs_vn_listxattr,
7539 .fiemap = xfs_vn_fiemap,
7540+ .sync_flags = xfs_sync_flags,
7541 };
7542
7543 static const struct inode_operations xfs_dir_inode_operations = {
7544@@ -624,6 +628,7 @@ static const struct inode_operations xfs
7545 .getxattr = generic_getxattr,
7546 .removexattr = generic_removexattr,
7547 .listxattr = xfs_vn_listxattr,
7548+ .sync_flags = xfs_sync_flags,
7549 };
7550
7551 static const struct inode_operations xfs_dir_ci_inode_operations = {
7552@@ -673,6 +678,10 @@ xfs_diflags_to_iflags(
7553 inode->i_flags |= S_IMMUTABLE;
7554 else
7555 inode->i_flags &= ~S_IMMUTABLE;
7556+ if (ip->i_d.di_flags & XFS_DIFLAG_IXUNLINK)
7557+ inode->i_flags |= S_IXUNLINK;
7558+ else
7559+ inode->i_flags &= ~S_IXUNLINK;
7560 if (ip->i_d.di_flags & XFS_DIFLAG_APPEND)
7561 inode->i_flags |= S_APPEND;
7562 else
7563@@ -685,6 +694,15 @@ xfs_diflags_to_iflags(
7564 inode->i_flags |= S_NOATIME;
7565 else
7566 inode->i_flags &= ~S_NOATIME;
7567+
7568+ if (ip->i_d.di_vflags & XFS_DIVFLAG_BARRIER)
7569+ inode->i_vflags |= V_BARRIER;
7570+ else
7571+ inode->i_vflags &= ~V_BARRIER;
7572+ if (ip->i_d.di_vflags & XFS_DIVFLAG_COW)
7573+ inode->i_vflags |= V_COW;
7574+ else
7575+ inode->i_vflags &= ~V_COW;
7576 }
7577
7578 /*
7579@@ -716,6 +734,7 @@ xfs_setup_inode(
7580 inode->i_nlink = ip->i_d.di_nlink;
7581 inode->i_uid = ip->i_d.di_uid;
7582 inode->i_gid = ip->i_d.di_gid;
7583+ inode->i_tag = ip->i_d.di_tag;
7584
7585 switch (inode->i_mode & S_IFMT) {
7586 case S_IFBLK:
7587diff -NurpP --minimal linux-3.0.9/fs/xfs/linux-2.6/xfs_linux.h linux-3.0.9-vs2.3.2.1/fs/xfs/linux-2.6/xfs_linux.h
7588--- linux-3.0.9/fs/xfs/linux-2.6/xfs_linux.h 2011-11-15 16:40:47.000000000 +0100
7589+++ linux-3.0.9-vs2.3.2.1/fs/xfs/linux-2.6/xfs_linux.h 2011-11-15 17:37:07.000000000 +0100
7590@@ -117,6 +117,7 @@
7591
7592 #define current_cpu() (raw_smp_processor_id())
7593 #define current_pid() (current->pid)
7594+#define current_fstag(vp) (dx_current_fstag((vp)->i_sb))
7595 #define current_test_flags(f) (current->flags & (f))
7596 #define current_set_flags_nested(sp, f) \
7597 (*(sp) = current->flags, current->flags |= (f))
7598diff -NurpP --minimal linux-3.0.9/fs/xfs/linux-2.6/xfs_super.c linux-3.0.9-vs2.3.2.1/fs/xfs/linux-2.6/xfs_super.c
7599--- linux-3.0.9/fs/xfs/linux-2.6/xfs_super.c 2011-11-15 16:40:47.000000000 +0100
7600+++ linux-3.0.9-vs2.3.2.1/fs/xfs/linux-2.6/xfs_super.c 2011-11-15 17:37:07.000000000 +0100
7601@@ -114,6 +114,9 @@ mempool_t *xfs_ioend_pool;
7602 #define MNTOPT_NODELAYLOG "nodelaylog" /* Delayed logging disabled */
7603 #define MNTOPT_DISCARD "discard" /* Discard unused blocks */
7604 #define MNTOPT_NODISCARD "nodiscard" /* Do not discard unused blocks */
7605+#define MNTOPT_TAGXID "tagxid" /* context tagging for inodes */
7606+#define MNTOPT_TAGGED "tag" /* context tagging for inodes */
7607+#define MNTOPT_NOTAGTAG "notag" /* do not use context tagging */
7608
7609 /*
7610 * Table driven mount option parser.
7611@@ -122,10 +125,14 @@ mempool_t *xfs_ioend_pool;
7612 * in the future, too.
7613 */
7614 enum {
7615+ Opt_tag, Opt_notag,
7616 Opt_barrier, Opt_nobarrier, Opt_err
7617 };
7618
7619 static const match_table_t tokens = {
7620+ {Opt_tag, "tagxid"},
7621+ {Opt_tag, "tag"},
7622+ {Opt_notag, "notag"},
7623 {Opt_barrier, "barrier"},
7624 {Opt_nobarrier, "nobarrier"},
7625 {Opt_err, NULL}
7626@@ -373,6 +380,19 @@ xfs_parseargs(
7627 } else if (!strcmp(this_char, "irixsgid")) {
7628 xfs_warn(mp,
7629 "irixsgid is now a sysctl(2) variable, option is deprecated.");
7630+#ifndef CONFIG_TAGGING_NONE
7631+ } else if (!strcmp(this_char, MNTOPT_TAGGED)) {
7632+ mp->m_flags |= XFS_MOUNT_TAGGED;
7633+ } else if (!strcmp(this_char, MNTOPT_NOTAGTAG)) {
7634+ mp->m_flags &= ~XFS_MOUNT_TAGGED;
7635+ } else if (!strcmp(this_char, MNTOPT_TAGXID)) {
7636+ mp->m_flags |= XFS_MOUNT_TAGGED;
7637+#endif
7638+#ifdef CONFIG_PROPAGATE
7639+ } else if (!strcmp(this_char, MNTOPT_TAGGED)) {
7640+ /* use value */
7641+ mp->m_flags |= XFS_MOUNT_TAGGED;
7642+#endif
7643 } else {
7644 xfs_warn(mp, "unknown mount option [%s].", this_char);
7645 return EINVAL;
7646@@ -1182,6 +1202,16 @@ xfs_fs_remount(
7647 case Opt_nobarrier:
7648 mp->m_flags &= ~XFS_MOUNT_BARRIER;
7649 break;
7650+ case Opt_tag:
7651+ if (!(sb->s_flags & MS_TAGGED)) {
7652+ printk(KERN_INFO
7653+ "XFS: %s: tagging not permitted on remount.\n",
7654+ sb->s_id);
7655+ return -EINVAL;
7656+ }
7657+ break;
7658+ case Opt_notag:
7659+ break;
7660 default:
7661 /*
7662 * Logically we would return an error here to prevent
7663@@ -1397,6 +1427,9 @@ xfs_fs_fill_super(
7664 if (error)
7665 goto out_free_sb;
7666
7667+ if (mp->m_flags & XFS_MOUNT_TAGGED)
7668+ sb->s_flags |= MS_TAGGED;
7669+
7670 /*
7671 * we must configure the block size in the superblock before we run the
7672 * full mount process as the mount process can lookup and cache inodes.
7673diff -NurpP --minimal linux-3.0.9/fs/xfs/xfs_dinode.h linux-3.0.9-vs2.3.2.1/fs/xfs/xfs_dinode.h
7674--- linux-3.0.9/fs/xfs/xfs_dinode.h 2011-01-05 21:50:28.000000000 +0100
7675+++ linux-3.0.9-vs2.3.2.1/fs/xfs/xfs_dinode.h 2011-06-10 22:11:24.000000000 +0200
7676@@ -51,7 +51,9 @@ typedef struct xfs_dinode {
7677 __be32 di_nlink; /* number of links to file */
7678 __be16 di_projid_lo; /* lower part of owner's project id */
7679 __be16 di_projid_hi; /* higher part owner's project id */
7680- __u8 di_pad[6]; /* unused, zeroed space */
7681+ __u8 di_pad[2]; /* unused, zeroed space */
7682+ __be16 di_tag; /* context tagging */
7683+ __be16 di_vflags; /* vserver specific flags */
7684 __be16 di_flushiter; /* incremented on flush */
7685 xfs_timestamp_t di_atime; /* time last accessed */
7686 xfs_timestamp_t di_mtime; /* time last modified */
7687@@ -184,6 +186,8 @@ static inline void xfs_dinode_put_rdev(s
7688 #define XFS_DIFLAG_EXTSZINHERIT_BIT 12 /* inherit inode extent size */
7689 #define XFS_DIFLAG_NODEFRAG_BIT 13 /* do not reorganize/defragment */
7690 #define XFS_DIFLAG_FILESTREAM_BIT 14 /* use filestream allocator */
7691+#define XFS_DIFLAG_IXUNLINK_BIT 15 /* Immutable inver on unlink */
7692+
7693 #define XFS_DIFLAG_REALTIME (1 << XFS_DIFLAG_REALTIME_BIT)
7694 #define XFS_DIFLAG_PREALLOC (1 << XFS_DIFLAG_PREALLOC_BIT)
7695 #define XFS_DIFLAG_NEWRTBM (1 << XFS_DIFLAG_NEWRTBM_BIT)
7696@@ -199,6 +203,7 @@ static inline void xfs_dinode_put_rdev(s
7697 #define XFS_DIFLAG_EXTSZINHERIT (1 << XFS_DIFLAG_EXTSZINHERIT_BIT)
7698 #define XFS_DIFLAG_NODEFRAG (1 << XFS_DIFLAG_NODEFRAG_BIT)
7699 #define XFS_DIFLAG_FILESTREAM (1 << XFS_DIFLAG_FILESTREAM_BIT)
7700+#define XFS_DIFLAG_IXUNLINK (1 << XFS_DIFLAG_IXUNLINK_BIT)
7701
7702 #ifdef CONFIG_XFS_RT
7703 #define XFS_IS_REALTIME_INODE(ip) ((ip)->i_d.di_flags & XFS_DIFLAG_REALTIME)
7704@@ -211,6 +216,10 @@ static inline void xfs_dinode_put_rdev(s
7705 XFS_DIFLAG_IMMUTABLE | XFS_DIFLAG_APPEND | XFS_DIFLAG_SYNC | \
7706 XFS_DIFLAG_NOATIME | XFS_DIFLAG_NODUMP | XFS_DIFLAG_RTINHERIT | \
7707 XFS_DIFLAG_PROJINHERIT | XFS_DIFLAG_NOSYMLINKS | XFS_DIFLAG_EXTSIZE | \
7708- XFS_DIFLAG_EXTSZINHERIT | XFS_DIFLAG_NODEFRAG | XFS_DIFLAG_FILESTREAM)
7709+ XFS_DIFLAG_EXTSZINHERIT | XFS_DIFLAG_NODEFRAG | XFS_DIFLAG_FILESTREAM | \
7710+ XFS_DIFLAG_IXUNLINK)
7711+
7712+#define XFS_DIVFLAG_BARRIER 0x01
7713+#define XFS_DIVFLAG_COW 0x02
7714
7715 #endif /* __XFS_DINODE_H__ */
7716diff -NurpP --minimal linux-3.0.9/fs/xfs/xfs_fs.h linux-3.0.9-vs2.3.2.1/fs/xfs/xfs_fs.h
7717--- linux-3.0.9/fs/xfs/xfs_fs.h 2011-01-05 21:50:28.000000000 +0100
7718+++ linux-3.0.9-vs2.3.2.1/fs/xfs/xfs_fs.h 2011-06-10 22:11:24.000000000 +0200
7719@@ -67,6 +67,9 @@ struct fsxattr {
7720 #define XFS_XFLAG_EXTSZINHERIT 0x00001000 /* inherit inode extent size */
7721 #define XFS_XFLAG_NODEFRAG 0x00002000 /* do not defragment */
7722 #define XFS_XFLAG_FILESTREAM 0x00004000 /* use filestream allocator */
7723+#define XFS_XFLAG_IXUNLINK 0x00008000 /* immutable invert on unlink */
7724+#define XFS_XFLAG_BARRIER 0x10000000 /* chroot() barrier */
7725+#define XFS_XFLAG_COW 0x20000000 /* copy on write mark */
7726 #define XFS_XFLAG_HASATTR 0x80000000 /* no DIFLAG for this */
7727
7728 /*
7729@@ -297,7 +300,8 @@ typedef struct xfs_bstat {
7730 #define bs_projid bs_projid_lo /* (previously just bs_projid) */
7731 __u16 bs_forkoff; /* inode fork offset in bytes */
7732 __u16 bs_projid_hi; /* higher part of project id */
7733- unsigned char bs_pad[10]; /* pad space, unused */
7734+ unsigned char bs_pad[8]; /* pad space, unused */
7735+ __u16 bs_tag; /* context tagging */
7736 __u32 bs_dmevmask; /* DMIG event mask */
7737 __u16 bs_dmstate; /* DMIG state info */
7738 __u16 bs_aextents; /* attribute number of extents */
7739diff -NurpP --minimal linux-3.0.9/fs/xfs/xfs_ialloc.c linux-3.0.9-vs2.3.2.1/fs/xfs/xfs_ialloc.c
7740--- linux-3.0.9/fs/xfs/xfs_ialloc.c 2011-05-22 16:17:54.000000000 +0200
7741+++ linux-3.0.9-vs2.3.2.1/fs/xfs/xfs_ialloc.c 2011-06-10 22:11:24.000000000 +0200
7742@@ -37,7 +37,6 @@
7743 #include "xfs_error.h"
7744 #include "xfs_bmap.h"
7745
7746-
7747 /*
7748 * Allocation group level functions.
7749 */
7750diff -NurpP --minimal linux-3.0.9/fs/xfs/xfs_inode.c linux-3.0.9-vs2.3.2.1/fs/xfs/xfs_inode.c
7751--- linux-3.0.9/fs/xfs/xfs_inode.c 2011-07-22 11:18:10.000000000 +0200
7752+++ linux-3.0.9-vs2.3.2.1/fs/xfs/xfs_inode.c 2011-06-10 22:11:24.000000000 +0200
7753@@ -243,6 +243,7 @@ xfs_inotobp(
7754 return 0;
7755 }
7756
7757+#include <linux/vs_tag.h>
7758
7759 /*
7760 * This routine is called to map an inode to the buffer containing
7761@@ -641,15 +642,25 @@ xfs_iformat_btree(
7762 STATIC void
7763 xfs_dinode_from_disk(
7764 xfs_icdinode_t *to,
7765- xfs_dinode_t *from)
7766+ xfs_dinode_t *from,
7767+ int tagged)
7768 {
7769+ uint32_t uid, gid, tag;
7770+
7771 to->di_magic = be16_to_cpu(from->di_magic);
7772 to->di_mode = be16_to_cpu(from->di_mode);
7773 to->di_version = from ->di_version;
7774 to->di_format = from->di_format;
7775 to->di_onlink = be16_to_cpu(from->di_onlink);
7776- to->di_uid = be32_to_cpu(from->di_uid);
7777- to->di_gid = be32_to_cpu(from->di_gid);
7778+
7779+ uid = be32_to_cpu(from->di_uid);
7780+ gid = be32_to_cpu(from->di_gid);
7781+ tag = be16_to_cpu(from->di_tag);
7782+
7783+ to->di_uid = INOTAG_UID(tagged, uid, gid);
7784+ to->di_gid = INOTAG_GID(tagged, uid, gid);
7785+ to->di_tag = INOTAG_TAG(tagged, uid, gid, tag);
7786+
7787 to->di_nlink = be32_to_cpu(from->di_nlink);
7788 to->di_projid_lo = be16_to_cpu(from->di_projid_lo);
7789 to->di_projid_hi = be16_to_cpu(from->di_projid_hi);
7790@@ -671,21 +682,26 @@ xfs_dinode_from_disk(
7791 to->di_dmevmask = be32_to_cpu(from->di_dmevmask);
7792 to->di_dmstate = be16_to_cpu(from->di_dmstate);
7793 to->di_flags = be16_to_cpu(from->di_flags);
7794+ to->di_vflags = be16_to_cpu(from->di_vflags);
7795 to->di_gen = be32_to_cpu(from->di_gen);
7796 }
7797
7798 void
7799 xfs_dinode_to_disk(
7800 xfs_dinode_t *to,
7801- xfs_icdinode_t *from)
7802+ xfs_icdinode_t *from,
7803+ int tagged)
7804 {
7805 to->di_magic = cpu_to_be16(from->di_magic);
7806 to->di_mode = cpu_to_be16(from->di_mode);
7807 to->di_version = from ->di_version;
7808 to->di_format = from->di_format;
7809 to->di_onlink = cpu_to_be16(from->di_onlink);
7810- to->di_uid = cpu_to_be32(from->di_uid);
7811- to->di_gid = cpu_to_be32(from->di_gid);
7812+
7813+ to->di_uid = cpu_to_be32(TAGINO_UID(tagged, from->di_uid, from->di_tag));
7814+ to->di_gid = cpu_to_be32(TAGINO_GID(tagged, from->di_gid, from->di_tag));
7815+ to->di_tag = cpu_to_be16(TAGINO_TAG(tagged, from->di_tag));
7816+
7817 to->di_nlink = cpu_to_be32(from->di_nlink);
7818 to->di_projid_lo = cpu_to_be16(from->di_projid_lo);
7819 to->di_projid_hi = cpu_to_be16(from->di_projid_hi);
7820@@ -707,12 +723,14 @@ xfs_dinode_to_disk(
7821 to->di_dmevmask = cpu_to_be32(from->di_dmevmask);
7822 to->di_dmstate = cpu_to_be16(from->di_dmstate);
7823 to->di_flags = cpu_to_be16(from->di_flags);
7824+ to->di_vflags = cpu_to_be16(from->di_vflags);
7825 to->di_gen = cpu_to_be32(from->di_gen);
7826 }
7827
7828 STATIC uint
7829 _xfs_dic2xflags(
7830- __uint16_t di_flags)
7831+ __uint16_t di_flags,
7832+ __uint16_t di_vflags)
7833 {
7834 uint flags = 0;
7835
7836@@ -723,6 +741,8 @@ _xfs_dic2xflags(
7837 flags |= XFS_XFLAG_PREALLOC;
7838 if (di_flags & XFS_DIFLAG_IMMUTABLE)
7839 flags |= XFS_XFLAG_IMMUTABLE;
7840+ if (di_flags & XFS_DIFLAG_IXUNLINK)
7841+ flags |= XFS_XFLAG_IXUNLINK;
7842 if (di_flags & XFS_DIFLAG_APPEND)
7843 flags |= XFS_XFLAG_APPEND;
7844 if (di_flags & XFS_DIFLAG_SYNC)
7845@@ -747,6 +767,10 @@ _xfs_dic2xflags(
7846 flags |= XFS_XFLAG_FILESTREAM;
7847 }
7848
7849+ if (di_vflags & XFS_DIVFLAG_BARRIER)
7850+ flags |= FS_BARRIER_FL;
7851+ if (di_vflags & XFS_DIVFLAG_COW)
7852+ flags |= FS_COW_FL;
7853 return flags;
7854 }
7855
7856@@ -756,7 +780,7 @@ xfs_ip2xflags(
7857 {
7858 xfs_icdinode_t *dic = &ip->i_d;
7859
7860- return _xfs_dic2xflags(dic->di_flags) |
7861+ return _xfs_dic2xflags(dic->di_flags, dic->di_vflags) |
7862 (XFS_IFORK_Q(ip) ? XFS_XFLAG_HASATTR : 0);
7863 }
7864
7865@@ -764,7 +788,8 @@ uint
7866 xfs_dic2xflags(
7867 xfs_dinode_t *dip)
7868 {
7869- return _xfs_dic2xflags(be16_to_cpu(dip->di_flags)) |
7870+ return _xfs_dic2xflags(be16_to_cpu(dip->di_flags),
7871+ be16_to_cpu(dip->di_vflags)) |
7872 (XFS_DFORK_Q(dip) ? XFS_XFLAG_HASATTR : 0);
7873 }
7874
7875@@ -797,7 +822,6 @@ xfs_iread(
7876 if (error)
7877 return error;
7878 dip = (xfs_dinode_t *)xfs_buf_offset(bp, ip->i_imap.im_boffset);
7879-
7880 /*
7881 * If we got something that isn't an inode it means someone
7882 * (nfs or dmi) has a stale handle.
7883@@ -820,7 +844,8 @@ xfs_iread(
7884 * Otherwise, just get the truly permanent information.
7885 */
7886 if (dip->di_mode) {
7887- xfs_dinode_from_disk(&ip->i_d, dip);
7888+ xfs_dinode_from_disk(&ip->i_d, dip,
7889+ mp->m_flags & XFS_MOUNT_TAGGED);
7890 error = xfs_iformat(ip, dip);
7891 if (error) {
7892 #ifdef DEBUG
7893@@ -1015,6 +1040,7 @@ xfs_ialloc(
7894 ASSERT(ip->i_d.di_nlink == nlink);
7895 ip->i_d.di_uid = current_fsuid();
7896 ip->i_d.di_gid = current_fsgid();
7897+ ip->i_d.di_tag = current_fstag(&ip->i_vnode);
7898 xfs_set_projid(ip, prid);
7899 memset(&(ip->i_d.di_pad[0]), 0, sizeof(ip->i_d.di_pad));
7900
7901@@ -1075,6 +1101,7 @@ xfs_ialloc(
7902 ip->i_d.di_dmevmask = 0;
7903 ip->i_d.di_dmstate = 0;
7904 ip->i_d.di_flags = 0;
7905+ ip->i_d.di_vflags = 0;
7906 flags = XFS_ILOG_CORE;
7907 switch (mode & S_IFMT) {
7908 case S_IFIFO:
7909@@ -2108,6 +2135,7 @@ xfs_ifree(
7910 }
7911 ip->i_d.di_mode = 0; /* mark incore inode as free */
7912 ip->i_d.di_flags = 0;
7913+ ip->i_d.di_vflags = 0;
7914 ip->i_d.di_dmevmask = 0;
7915 ip->i_d.di_forkoff = 0; /* mark the attr fork not in use */
7916 ip->i_df.if_ext_max =
7917@@ -2987,7 +3015,8 @@ xfs_iflush_int(
7918 * because if the inode is dirty at all the core must
7919 * be.
7920 */
7921- xfs_dinode_to_disk(dip, &ip->i_d);
7922+ xfs_dinode_to_disk(dip, &ip->i_d,
7923+ mp->m_flags & XFS_MOUNT_TAGGED);
7924
7925 /* Wrap, we never let the log put out DI_MAX_FLUSH */
7926 if (ip->i_d.di_flushiter == DI_MAX_FLUSH)
7927diff -NurpP --minimal linux-3.0.9/fs/xfs/xfs_inode.h linux-3.0.9-vs2.3.2.1/fs/xfs/xfs_inode.h
7928--- linux-3.0.9/fs/xfs/xfs_inode.h 2011-07-22 11:18:10.000000000 +0200
7929+++ linux-3.0.9-vs2.3.2.1/fs/xfs/xfs_inode.h 2011-07-01 11:35:35.000000000 +0200
7930@@ -135,7 +135,9 @@ typedef struct xfs_icdinode {
7931 __uint32_t di_nlink; /* number of links to file */
7932 __uint16_t di_projid_lo; /* lower part of owner's project id */
7933 __uint16_t di_projid_hi; /* higher part of owner's project id */
7934- __uint8_t di_pad[6]; /* unused, zeroed space */
7935+ __uint8_t di_pad[2]; /* unused, zeroed space */
7936+ __uint16_t di_tag; /* context tagging */
7937+ __uint16_t di_vflags; /* vserver specific flags */
7938 __uint16_t di_flushiter; /* incremented on flush */
7939 xfs_ictimestamp_t di_atime; /* time last accessed */
7940 xfs_ictimestamp_t di_mtime; /* time last modified */
7941@@ -546,7 +548,7 @@ int xfs_itobp(struct xfs_mount *, struc
7942 int xfs_iread(struct xfs_mount *, struct xfs_trans *,
7943 struct xfs_inode *, uint);
7944 void xfs_dinode_to_disk(struct xfs_dinode *,
7945- struct xfs_icdinode *);
7946+ struct xfs_icdinode *, int);
7947 void xfs_idestroy_fork(struct xfs_inode *, int);
7948 void xfs_idata_realloc(struct xfs_inode *, int, int);
7949 void xfs_iroot_realloc(struct xfs_inode *, int, int);
7950diff -NurpP --minimal linux-3.0.9/fs/xfs/xfs_itable.c linux-3.0.9-vs2.3.2.1/fs/xfs/xfs_itable.c
7951--- linux-3.0.9/fs/xfs/xfs_itable.c 2011-05-22 16:17:54.000000000 +0200
7952+++ linux-3.0.9-vs2.3.2.1/fs/xfs/xfs_itable.c 2011-06-10 22:11:24.000000000 +0200
7953@@ -98,6 +98,7 @@ xfs_bulkstat_one_int(
7954 buf->bs_mode = dic->di_mode;
7955 buf->bs_uid = dic->di_uid;
7956 buf->bs_gid = dic->di_gid;
7957+ buf->bs_tag = dic->di_tag;
7958 buf->bs_size = dic->di_size;
7959
7960 /*
7961diff -NurpP --minimal linux-3.0.9/fs/xfs/xfs_log_recover.c linux-3.0.9-vs2.3.2.1/fs/xfs/xfs_log_recover.c
7962--- linux-3.0.9/fs/xfs/xfs_log_recover.c 2011-07-22 11:18:10.000000000 +0200
7963+++ linux-3.0.9-vs2.3.2.1/fs/xfs/xfs_log_recover.c 2011-06-10 22:11:24.000000000 +0200
7964@@ -2343,7 +2343,8 @@ xlog_recover_inode_pass2(
7965 }
7966
7967 /* The core is in in-core format */
7968- xfs_dinode_to_disk(dip, item->ri_buf[1].i_addr);
7969+ xfs_dinode_to_disk(dip, item->ri_buf[1].i_addr,
7970+ mp->m_flags & XFS_MOUNT_TAGGED);
7971
7972 /* the rest is in on-disk format */
7973 if (item->ri_buf[1].i_len > sizeof(struct xfs_icdinode)) {
7974diff -NurpP --minimal linux-3.0.9/fs/xfs/xfs_mount.h linux-3.0.9-vs2.3.2.1/fs/xfs/xfs_mount.h
7975--- linux-3.0.9/fs/xfs/xfs_mount.h 2011-07-22 11:18:10.000000000 +0200
7976+++ linux-3.0.9-vs2.3.2.1/fs/xfs/xfs_mount.h 2011-06-10 22:11:24.000000000 +0200
7977@@ -249,6 +249,7 @@ typedef struct xfs_mount {
7978 allocator */
7979 #define XFS_MOUNT_NOATTR2 (1ULL << 25) /* disable use of attr2 format */
7980
7981+#define XFS_MOUNT_TAGGED (1ULL << 31) /* context tagging */
7982
7983 /*
7984 * Default minimum read and write sizes.
7985diff -NurpP --minimal linux-3.0.9/fs/xfs/xfs_vnodeops.c linux-3.0.9-vs2.3.2.1/fs/xfs/xfs_vnodeops.c
7986--- linux-3.0.9/fs/xfs/xfs_vnodeops.c 2011-07-22 11:18:10.000000000 +0200
7987+++ linux-3.0.9-vs2.3.2.1/fs/xfs/xfs_vnodeops.c 2011-07-01 11:35:35.000000000 +0200
7988@@ -50,6 +50,78 @@
7989 #include "xfs_vnodeops.h"
7990 #include "xfs_trace.h"
7991
7992+
7993+STATIC void
7994+xfs_get_inode_flags(
7995+ xfs_inode_t *ip)
7996+{
7997+ struct inode *inode = VFS_I(ip);
7998+ unsigned int flags = inode->i_flags;
7999+ unsigned int vflags = inode->i_vflags;
8000+
8001+ if (flags & S_IMMUTABLE)
8002+ ip->i_d.di_flags |= XFS_DIFLAG_IMMUTABLE;
8003+ else
8004+ ip->i_d.di_flags &= ~XFS_DIFLAG_IMMUTABLE;
8005+ if (flags & S_IXUNLINK)
8006+ ip->i_d.di_flags |= XFS_DIFLAG_IXUNLINK;
8007+ else
8008+ ip->i_d.di_flags &= ~XFS_DIFLAG_IXUNLINK;
8009+
8010+ if (vflags & V_BARRIER)
8011+ ip->i_d.di_vflags |= XFS_DIVFLAG_BARRIER;
8012+ else
8013+ ip->i_d.di_vflags &= ~XFS_DIVFLAG_BARRIER;
8014+ if (vflags & V_COW)
8015+ ip->i_d.di_vflags |= XFS_DIVFLAG_COW;
8016+ else
8017+ ip->i_d.di_vflags &= ~XFS_DIVFLAG_COW;
8018+}
8019+
8020+int
8021+xfs_sync_flags(
8022+ struct inode *inode,
8023+ int flags,
8024+ int vflags)
8025+{
8026+ struct xfs_inode *ip = XFS_I(inode);
8027+ struct xfs_mount *mp = ip->i_mount;
8028+ struct xfs_trans *tp;
8029+ unsigned int lock_flags = 0;
8030+ int code;
8031+
8032+ tp = xfs_trans_alloc(mp, XFS_TRANS_SETATTR_NOT_SIZE);
8033+ code = xfs_trans_reserve(tp, 0, XFS_ICHANGE_LOG_RES(mp), 0, 0, 0);
8034+ if (code)
8035+ goto error_out;
8036+
8037+ xfs_ilock(ip, XFS_ILOCK_EXCL);
8038+
8039+ xfs_trans_ijoin(tp, ip);
8040+
8041+ inode->i_flags = flags;
8042+ inode->i_vflags = vflags;
8043+ xfs_get_inode_flags(ip);
8044+
8045+ xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
8046+ xfs_trans_ichgtime(tp, ip, XFS_ICHGTIME_CHG);
8047+
8048+ XFS_STATS_INC(xs_ig_attrchg);
8049+
8050+ if (mp->m_flags & XFS_MOUNT_WSYNC)
8051+ xfs_trans_set_sync(tp);
8052+ code = xfs_trans_commit(tp, 0);
8053+ xfs_iunlock(ip, XFS_ILOCK_EXCL);
8054+ return code;
8055+
8056+error_out:
8057+ xfs_trans_cancel(tp, 0);
8058+ if (lock_flags)
8059+ xfs_iunlock(ip, XFS_ILOCK_EXCL);
8060+ return code;
8061+}
8062+
8063+
8064 int
8065 xfs_setattr(
8066 struct xfs_inode *ip,
8067@@ -65,6 +137,7 @@ xfs_setattr(
8068 uint commit_flags=0;
8069 uid_t uid=0, iuid=0;
8070 gid_t gid=0, igid=0;
8071+ tag_t tag=0, itag=0;
8072 struct xfs_dquot *udqp, *gdqp, *olddquot1, *olddquot2;
8073 int need_iolock = 1;
8074
8075@@ -147,7 +220,7 @@ xfs_setattr(
8076 /*
8077 * Change file ownership. Must be the owner or privileged.
8078 */
8079- if (mask & (ATTR_UID|ATTR_GID)) {
8080+ if (mask & (ATTR_UID|ATTR_GID|ATTR_TAG)) {
8081 /*
8082 * These IDs could have changed since we last looked at them.
8083 * But, we're assured that if the ownership did change
8084@@ -156,8 +229,10 @@ xfs_setattr(
8085 */
8086 iuid = ip->i_d.di_uid;
8087 igid = ip->i_d.di_gid;
8088+ itag = ip->i_d.di_tag;
8089 gid = (mask & ATTR_GID) ? iattr->ia_gid : igid;
8090 uid = (mask & ATTR_UID) ? iattr->ia_uid : iuid;
8091+ tag = (mask & ATTR_TAG) ? iattr->ia_tag : itag;
8092
8093 /*
8094 * Do a quota reservation only if uid/gid is actually
8095@@ -165,7 +240,8 @@ xfs_setattr(
8096 */
8097 if (XFS_IS_QUOTA_RUNNING(mp) &&
8098 ((XFS_IS_UQUOTA_ON(mp) && iuid != uid) ||
8099- (XFS_IS_GQUOTA_ON(mp) && igid != gid))) {
8100+ (XFS_IS_GQUOTA_ON(mp) && igid != gid) ||
8101+ (XFS_IS_GQUOTA_ON(mp) && itag != tag))) {
8102 ASSERT(tp);
8103 code = xfs_qm_vop_chown_reserve(tp, ip, udqp, gdqp,
8104 capable(CAP_FOWNER) ?
8105@@ -329,7 +405,7 @@ xfs_setattr(
8106 /*
8107 * Change file ownership. Must be the owner or privileged.
8108 */
8109- if (mask & (ATTR_UID|ATTR_GID)) {
8110+ if (mask & (ATTR_UID|ATTR_GID|ATTR_TAG)) {
8111 /*
8112 * CAP_FSETID overrides the following restrictions:
8113 *
8114@@ -345,6 +421,10 @@ xfs_setattr(
8115 * Change the ownerships and register quota modifications
8116 * in the transaction.
8117 */
8118+ if (itag != tag) {
8119+ ip->i_d.di_tag = tag;
8120+ inode->i_tag = tag;
8121+ }
8122 if (iuid != uid) {
8123 if (XFS_IS_QUOTA_RUNNING(mp) && XFS_IS_UQUOTA_ON(mp)) {
8124 ASSERT(mask & ATTR_UID);
8125diff -NurpP --minimal linux-3.0.9/fs/xfs/xfs_vnodeops.h linux-3.0.9-vs2.3.2.1/fs/xfs/xfs_vnodeops.h
8126--- linux-3.0.9/fs/xfs/xfs_vnodeops.h 2011-05-22 16:17:54.000000000 +0200
8127+++ linux-3.0.9-vs2.3.2.1/fs/xfs/xfs_vnodeops.h 2011-06-10 22:11:24.000000000 +0200
8128@@ -13,6 +13,7 @@ struct xfs_inode;
8129 struct xfs_iomap;
8130
8131
8132+int xfs_sync_xflags(struct xfs_inode *ip);
8133 int xfs_setattr(struct xfs_inode *ip, struct iattr *vap, int flags);
8134 #define XFS_ATTR_DMI 0x01 /* invocation from a DMI function */
8135 #define XFS_ATTR_NONBLOCK 0x02 /* return EAGAIN if operation would block */
8136diff -NurpP --minimal linux-3.0.9/include/asm-generic/tlb.h linux-3.0.9-vs2.3.2.1/include/asm-generic/tlb.h
8137--- linux-3.0.9/include/asm-generic/tlb.h 2011-07-22 11:18:10.000000000 +0200
8138+++ linux-3.0.9-vs2.3.2.1/include/asm-generic/tlb.h 2011-06-10 22:11:24.000000000 +0200
8139@@ -16,6 +16,7 @@
8140 #define _ASM_GENERIC__TLB_H
8141
8142 #include <linux/swap.h>
8143+#include <linux/vs_memory.h>
8144 #include <asm/pgalloc.h>
8145 #include <asm/tlbflush.h>
8146
8147diff -NurpP --minimal linux-3.0.9/include/linux/Kbuild linux-3.0.9-vs2.3.2.1/include/linux/Kbuild
8148--- linux-3.0.9/include/linux/Kbuild 2011-07-22 11:18:10.000000000 +0200
8149+++ linux-3.0.9-vs2.3.2.1/include/linux/Kbuild 2011-06-10 22:11:24.000000000 +0200
8150@@ -17,6 +17,7 @@ header-y += netfilter_bridge/
8151 header-y += netfilter_ipv4/
8152 header-y += netfilter_ipv6/
8153 header-y += usb/
8154+header-y += vserver/
8155 header-y += wimax/
8156
8157 objhdr-y += version.h
8158diff -NurpP --minimal linux-3.0.9/include/linux/capability.h linux-3.0.9-vs2.3.2.1/include/linux/capability.h
8159--- linux-3.0.9/include/linux/capability.h 2011-07-22 11:18:10.000000000 +0200
8160+++ linux-3.0.9-vs2.3.2.1/include/linux/capability.h 2011-06-10 22:11:24.000000000 +0200
8161@@ -279,6 +279,7 @@ struct cpu_vfs_cap_data {
8162 arbitrary SCSI commands */
8163 /* Allow setting encryption key on loopback filesystem */
8164 /* Allow setting zone reclaim policy */
8165+/* Allow the selection of a security context */
8166
8167 #define CAP_SYS_ADMIN 21
8168
8169@@ -362,7 +363,12 @@ struct cpu_vfs_cap_data {
8170
8171 #define CAP_LAST_CAP CAP_WAKE_ALARM
8172
8173-#define cap_valid(x) ((x) >= 0 && (x) <= CAP_LAST_CAP)
8174+/* Allow context manipulations */
8175+/* Allow changing context info on files */
8176+
8177+#define CAP_CONTEXT 63
8178+
8179+#define cap_valid(x) ((x) >= 0 && ((x) <= CAP_LAST_CAP || (x) == CAP_CONTEXT))
8180
8181 /*
8182 * Bit location of each capability (used by user-space library and kernel)
8183diff -NurpP --minimal linux-3.0.9/include/linux/cred.h linux-3.0.9-vs2.3.2.1/include/linux/cred.h
8184--- linux-3.0.9/include/linux/cred.h 2011-07-22 11:18:10.000000000 +0200
8185+++ linux-3.0.9-vs2.3.2.1/include/linux/cred.h 2011-06-10 22:11:24.000000000 +0200
8186@@ -156,6 +156,7 @@ extern void exit_creds(struct task_struc
8187 extern int copy_creds(struct task_struct *, unsigned long);
8188 extern const struct cred *get_task_cred(struct task_struct *);
8189 extern struct cred *cred_alloc_blank(void);
8190+extern struct cred *__prepare_creds(const struct cred *);
8191 extern struct cred *prepare_creds(void);
8192 extern struct cred *prepare_exec_creds(void);
8193 extern int commit_creds(struct cred *);
8194@@ -209,6 +210,31 @@ static inline void validate_process_cred
8195 }
8196 #endif
8197
8198+static inline void set_cred_subscribers(struct cred *cred, int n)
8199+{
8200+#ifdef CONFIG_DEBUG_CREDENTIALS
8201+ atomic_set(&cred->subscribers, n);
8202+#endif
8203+}
8204+
8205+static inline int read_cred_subscribers(const struct cred *cred)
8206+{
8207+#ifdef CONFIG_DEBUG_CREDENTIALS
8208+ return atomic_read(&cred->subscribers);
8209+#else
8210+ return 0;
8211+#endif
8212+}
8213+
8214+static inline void alter_cred_subscribers(const struct cred *_cred, int n)
8215+{
8216+#ifdef CONFIG_DEBUG_CREDENTIALS
8217+ struct cred *cred = (struct cred *) _cred;
8218+
8219+ atomic_add(n, &cred->subscribers);
8220+#endif
8221+}
8222+
8223 /**
8224 * get_new_cred - Get a reference on a new set of credentials
8225 * @cred: The new credentials to reference
8226diff -NurpP --minimal linux-3.0.9/include/linux/devpts_fs.h linux-3.0.9-vs2.3.2.1/include/linux/devpts_fs.h
8227--- linux-3.0.9/include/linux/devpts_fs.h 2008-12-25 00:26:37.000000000 +0100
8228+++ linux-3.0.9-vs2.3.2.1/include/linux/devpts_fs.h 2011-06-10 22:11:24.000000000 +0200
8229@@ -45,5 +45,4 @@ static inline void devpts_pty_kill(struc
8230
8231 #endif
8232
8233-
8234 #endif /* _LINUX_DEVPTS_FS_H */
8235diff -NurpP --minimal linux-3.0.9/include/linux/ext2_fs.h linux-3.0.9-vs2.3.2.1/include/linux/ext2_fs.h
8236--- linux-3.0.9/include/linux/ext2_fs.h 2011-11-15 16:40:47.000000000 +0100
8237+++ linux-3.0.9-vs2.3.2.1/include/linux/ext2_fs.h 2011-11-15 17:37:07.000000000 +0100
8238@@ -189,8 +189,12 @@ struct ext2_group_desc
8239 #define EXT2_NOTAIL_FL FS_NOTAIL_FL /* file tail should not be merged */
8240 #define EXT2_DIRSYNC_FL FS_DIRSYNC_FL /* dirsync behaviour (directories only) */
8241 #define EXT2_TOPDIR_FL FS_TOPDIR_FL /* Top of directory hierarchies*/
8242+#define EXT2_IXUNLINK_FL FS_IXUNLINK_FL /* Immutable invert on unlink */
8243 #define EXT2_RESERVED_FL FS_RESERVED_FL /* reserved for ext2 lib */
8244
8245+#define EXT2_BARRIER_FL FS_BARRIER_FL /* Barrier for chroot() */
8246+#define EXT2_COW_FL FS_COW_FL /* Copy on Write marker */
8247+
8248 #define EXT2_FL_USER_VISIBLE FS_FL_USER_VISIBLE /* User visible flags */
8249 #define EXT2_FL_USER_MODIFIABLE FS_FL_USER_MODIFIABLE /* User modifiable flags */
8250
8251@@ -274,7 +278,8 @@ struct ext2_inode {
8252 __u16 i_pad1;
8253 __le16 l_i_uid_high; /* these 2 fields */
8254 __le16 l_i_gid_high; /* were reserved2[0] */
8255- __u32 l_i_reserved2;
8256+ __le16 l_i_tag; /* Context Tag */
8257+ __u16 l_i_reserved2;
8258 } linux2;
8259 struct {
8260 __u8 h_i_frag; /* Fragment number */
8261@@ -303,6 +308,7 @@ struct ext2_inode {
8262 #define i_gid_low i_gid
8263 #define i_uid_high osd2.linux2.l_i_uid_high
8264 #define i_gid_high osd2.linux2.l_i_gid_high
8265+#define i_raw_tag osd2.linux2.l_i_tag
8266 #define i_reserved2 osd2.linux2.l_i_reserved2
8267 #endif
8268
8269@@ -347,6 +353,7 @@ struct ext2_inode {
8270 #define EXT2_MOUNT_USRQUOTA 0x020000 /* user quota */
8271 #define EXT2_MOUNT_GRPQUOTA 0x040000 /* group quota */
8272 #define EXT2_MOUNT_RESERVATION 0x080000 /* Preallocation */
8273+#define EXT2_MOUNT_TAGGED (1<<24) /* Enable Context Tags */
8274
8275
8276 #define clear_opt(o, opt) o &= ~EXT2_MOUNT_##opt
8277diff -NurpP --minimal linux-3.0.9/include/linux/ext3_fs.h linux-3.0.9-vs2.3.2.1/include/linux/ext3_fs.h
8278--- linux-3.0.9/include/linux/ext3_fs.h 2011-11-15 16:40:47.000000000 +0100
8279+++ linux-3.0.9-vs2.3.2.1/include/linux/ext3_fs.h 2011-11-15 17:37:07.000000000 +0100
8280@@ -173,10 +173,14 @@ struct ext3_group_desc
8281 #define EXT3_NOTAIL_FL 0x00008000 /* file tail should not be merged */
8282 #define EXT3_DIRSYNC_FL 0x00010000 /* dirsync behaviour (directories only) */
8283 #define EXT3_TOPDIR_FL 0x00020000 /* Top of directory hierarchies*/
8284+#define EXT3_IXUNLINK_FL 0x08000000 /* Immutable invert on unlink */
8285 #define EXT3_RESERVED_FL 0x80000000 /* reserved for ext3 lib */
8286
8287-#define EXT3_FL_USER_VISIBLE 0x0003DFFF /* User visible flags */
8288-#define EXT3_FL_USER_MODIFIABLE 0x000380FF /* User modifiable flags */
8289+#define EXT3_BARRIER_FL 0x04000000 /* Barrier for chroot() */
8290+#define EXT3_COW_FL 0x20000000 /* Copy on Write marker */
8291+
8292+#define EXT3_FL_USER_VISIBLE 0x0103DFFF /* User visible flags */
8293+#define EXT3_FL_USER_MODIFIABLE 0x010380FF /* User modifiable flags */
8294
8295 /* Flags that should be inherited by new inodes from their parent. */
8296 #define EXT3_FL_INHERITED (EXT3_SECRM_FL | EXT3_UNRM_FL | EXT3_COMPR_FL |\
8297@@ -312,7 +316,8 @@ struct ext3_inode {
8298 __u16 i_pad1;
8299 __le16 l_i_uid_high; /* these 2 fields */
8300 __le16 l_i_gid_high; /* were reserved2[0] */
8301- __u32 l_i_reserved2;
8302+ __le16 l_i_tag; /* Context Tag */
8303+ __u16 l_i_reserved2;
8304 } linux2;
8305 struct {
8306 __u8 h_i_frag; /* Fragment number */
8307@@ -343,6 +348,7 @@ struct ext3_inode {
8308 #define i_gid_low i_gid
8309 #define i_uid_high osd2.linux2.l_i_uid_high
8310 #define i_gid_high osd2.linux2.l_i_gid_high
8311+#define i_raw_tag osd2.linux2.l_i_tag
8312 #define i_reserved2 osd2.linux2.l_i_reserved2
8313
8314 #elif defined(__GNU__)
8315@@ -405,6 +411,7 @@ struct ext3_inode {
8316 #define EXT3_MOUNT_GRPQUOTA 0x200000 /* "old" group quota */
8317 #define EXT3_MOUNT_DATA_ERR_ABORT 0x400000 /* Abort on file data write
8318 * error in ordered mode */
8319+#define EXT3_MOUNT_TAGGED (1<<24) /* Enable Context Tags */
8320
8321 /* Compatibility, for having both ext2_fs.h and ext3_fs.h included at once */
8322 #ifndef _LINUX_EXT2_FS_H
8323@@ -919,6 +926,7 @@ extern void ext3_get_inode_flags(struct
8324 extern void ext3_set_aops(struct inode *inode);
8325 extern int ext3_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
8326 u64 start, u64 len);
8327+extern int ext3_sync_flags(struct inode *, int, int);
8328
8329 /* ioctl.c */
8330 extern long ext3_ioctl(struct file *, unsigned int, unsigned long);
8331diff -NurpP --minimal linux-3.0.9/include/linux/fs.h linux-3.0.9-vs2.3.2.1/include/linux/fs.h
8332--- linux-3.0.9/include/linux/fs.h 2011-07-22 11:18:10.000000000 +0200
8333+++ linux-3.0.9-vs2.3.2.1/include/linux/fs.h 2011-07-01 11:35:35.000000000 +0200
8334@@ -208,6 +208,9 @@ struct inodes_stat_t {
8335 #define MS_KERNMOUNT (1<<22) /* this is a kern_mount call */
8336 #define MS_I_VERSION (1<<23) /* Update inode I_version field */
8337 #define MS_STRICTATIME (1<<24) /* Always perform atime updates */
8338+#define MS_TAGGED (1<<25) /* use generic inode tagging */
8339+#define MS_TAGID (1<<26) /* use specific tag for this mount */
8340+#define MS_NOTAGCHECK (1<<27) /* don't check tags */
8341 #define MS_NOSEC (1<<28)
8342 #define MS_BORN (1<<29)
8343 #define MS_ACTIVE (1<<30)
8344@@ -239,6 +242,14 @@ struct inodes_stat_t {
8345 #define S_IMA 1024 /* Inode has an associated IMA struct */
8346 #define S_AUTOMOUNT 2048 /* Automount/referral quasi-directory */
8347 #define S_NOSEC 4096 /* no suid or xattr security attributes */
8348+#define S_IXUNLINK 8192 /* Immutable Invert on unlink */
8349+
8350+/* Linux-VServer related Inode flags */
8351+
8352+#define V_VALID 1
8353+#define V_XATTR 2
8354+#define V_BARRIER 4 /* Barrier for chroot() */
8355+#define V_COW 8 /* Copy on Write */
8356
8357 /*
8358 * Note that nosuid etc flags are inode-specific: setting some file-system
8359@@ -261,12 +272,15 @@ struct inodes_stat_t {
8360 #define IS_DIRSYNC(inode) (__IS_FLG(inode, MS_SYNCHRONOUS|MS_DIRSYNC) || \
8361 ((inode)->i_flags & (S_SYNC|S_DIRSYNC)))
8362 #define IS_MANDLOCK(inode) __IS_FLG(inode, MS_MANDLOCK)
8363-#define IS_NOATIME(inode) __IS_FLG(inode, MS_RDONLY|MS_NOATIME)
8364-#define IS_I_VERSION(inode) __IS_FLG(inode, MS_I_VERSION)
8365+#define IS_NOATIME(inode) __IS_FLG(inode, MS_RDONLY|MS_NOATIME)
8366+#define IS_I_VERSION(inode) __IS_FLG(inode, MS_I_VERSION)
8367+#define IS_TAGGED(inode) __IS_FLG(inode, MS_TAGGED)
8368
8369 #define IS_NOQUOTA(inode) ((inode)->i_flags & S_NOQUOTA)
8370 #define IS_APPEND(inode) ((inode)->i_flags & S_APPEND)
8371 #define IS_IMMUTABLE(inode) ((inode)->i_flags & S_IMMUTABLE)
8372+#define IS_IXUNLINK(inode) ((inode)->i_flags & S_IXUNLINK)
8373+#define IS_IXORUNLINK(inode) ((IS_IXUNLINK(inode) ? S_IMMUTABLE : 0) ^ IS_IMMUTABLE(inode))
8374 #define IS_POSIXACL(inode) __IS_FLG(inode, MS_POSIXACL)
8375
8376 #define IS_DEADDIR(inode) ((inode)->i_flags & S_DEAD)
8377@@ -277,6 +291,16 @@ struct inodes_stat_t {
8378 #define IS_AUTOMOUNT(inode) ((inode)->i_flags & S_AUTOMOUNT)
8379 #define IS_NOSEC(inode) ((inode)->i_flags & S_NOSEC)
8380
8381+#define IS_BARRIER(inode) (S_ISDIR((inode)->i_mode) && ((inode)->i_vflags & V_BARRIER))
8382+
8383+#ifdef CONFIG_VSERVER_COWBL
8384+# define IS_COW(inode) (IS_IXUNLINK(inode) && IS_IMMUTABLE(inode))
8385+# define IS_COW_LINK(inode) (S_ISREG((inode)->i_mode) && ((inode)->i_nlink > 1))
8386+#else
8387+# define IS_COW(inode) (0)
8388+# define IS_COW_LINK(inode) (0)
8389+#endif
8390+
8391 /* the read-only stuff doesn't really belong here, but any other place is
8392 probably as bad and I don't want to create yet another include file. */
8393
8394@@ -362,11 +386,14 @@ struct inodes_stat_t {
8395 #define FS_EXTENT_FL 0x00080000 /* Extents */
8396 #define FS_DIRECTIO_FL 0x00100000 /* Use direct i/o */
8397 #define FS_NOCOW_FL 0x00800000 /* Do not cow file */
8398+#define FS_IXUNLINK_FL 0x08000000 /* Immutable invert on unlink */
8399 #define FS_RESERVED_FL 0x80000000 /* reserved for ext2 lib */
8400
8401-#define FS_FL_USER_VISIBLE 0x0003DFFF /* User visible flags */
8402-#define FS_FL_USER_MODIFIABLE 0x000380FF /* User modifiable flags */
8403-
8404+#define FS_BARRIER_FL 0x04000000 /* Barrier for chroot() */
8405+#define FS_COW_FL 0x20000000 /* Copy on Write marker */
8406+
8407+#define FS_FL_USER_VISIBLE 0x0103DFFF /* User visible flags */
8408+#define FS_FL_USER_MODIFIABLE 0x010380FF /* User modifiable flags */
8409
8410 #define SYNC_FILE_RANGE_WAIT_BEFORE 1
8411 #define SYNC_FILE_RANGE_WRITE 2
8412@@ -447,6 +474,7 @@ typedef void (dio_iodone_t)(struct kiocb
8413 #define ATTR_KILL_PRIV (1 << 14)
8414 #define ATTR_OPEN (1 << 15) /* Truncating from open(O_TRUNC) */
8415 #define ATTR_TIMES_SET (1 << 16)
8416+#define ATTR_TAG (1 << 17)
8417
8418 /*
8419 * This is the Inode Attributes structure, used for notify_change(). It
8420@@ -462,6 +490,7 @@ struct iattr {
8421 umode_t ia_mode;
8422 uid_t ia_uid;
8423 gid_t ia_gid;
8424+ tag_t ia_tag;
8425 loff_t ia_size;
8426 struct timespec ia_atime;
8427 struct timespec ia_mtime;
8428@@ -475,6 +504,9 @@ struct iattr {
8429 struct file *ia_file;
8430 };
8431
8432+#define ATTR_FLAG_BARRIER 512 /* Barrier for chroot() */
8433+#define ATTR_FLAG_IXUNLINK 1024 /* Immutable invert on unlink */
8434+
8435 /*
8436 * Includes for diskquotas.
8437 */
8438@@ -740,11 +772,13 @@ struct inode {
8439 umode_t i_mode;
8440 uid_t i_uid;
8441 gid_t i_gid;
8442+ tag_t i_tag;
8443 const struct inode_operations *i_op;
8444 struct super_block *i_sb;
8445
8446 spinlock_t i_lock; /* i_blocks, i_bytes, maybe i_size */
8447- unsigned int i_flags;
8448+ unsigned short i_flags;
8449+ unsigned short i_vflags;
8450 unsigned long i_state;
8451 #ifdef CONFIG_SECURITY
8452 void *i_security;
8453@@ -766,6 +800,7 @@ struct inode {
8454 atomic_t i_count;
8455 unsigned int i_nlink;
8456 dev_t i_rdev;
8457+ dev_t i_mdev;
8458 unsigned int i_blkbits;
8459 u64 i_version;
8460 loff_t i_size;
8461@@ -890,12 +925,12 @@ static inline void i_size_write(struct i
8462
8463 static inline unsigned iminor(const struct inode *inode)
8464 {
8465- return MINOR(inode->i_rdev);
8466+ return MINOR(inode->i_mdev);
8467 }
8468
8469 static inline unsigned imajor(const struct inode *inode)
8470 {
8471- return MAJOR(inode->i_rdev);
8472+ return MAJOR(inode->i_mdev);
8473 }
8474
8475 extern struct block_device *I_BDEV(struct inode *inode);
8476@@ -957,6 +992,7 @@ struct file {
8477 loff_t f_pos;
8478 struct fown_struct f_owner;
8479 const struct cred *f_cred;
8480+ xid_t f_xid;
8481 struct file_ra_state f_ra;
8482
8483 u64 f_version;
8484@@ -1101,6 +1137,7 @@ struct file_lock {
8485 struct file *fl_file;
8486 loff_t fl_start;
8487 loff_t fl_end;
8488+ xid_t fl_xid;
8489
8490 struct fasync_struct * fl_fasync; /* for lease break notifications */
8491 unsigned long fl_break_time; /* for nonblocking lease breaks */
8492@@ -1600,6 +1637,7 @@ struct inode_operations {
8493 ssize_t (*getxattr) (struct dentry *, const char *, void *, size_t);
8494 ssize_t (*listxattr) (struct dentry *, char *, size_t);
8495 int (*removexattr) (struct dentry *, const char *);
8496+ int (*sync_flags) (struct inode *, int, int);
8497 void (*truncate_range)(struct inode *, loff_t, loff_t);
8498 int (*fiemap)(struct inode *, struct fiemap_extent_info *, u64 start,
8499 u64 len);
8500@@ -1618,6 +1656,7 @@ extern ssize_t vfs_readv(struct file *,
8501 unsigned long, loff_t *);
8502 extern ssize_t vfs_writev(struct file *, const struct iovec __user *,
8503 unsigned long, loff_t *);
8504+ssize_t vfs_sendfile(struct file *, struct file *, loff_t *, size_t, loff_t);
8505
8506 struct super_operations {
8507 struct inode *(*alloc_inode)(struct super_block *sb);
8508@@ -2437,6 +2476,7 @@ extern int dcache_dir_open(struct inode
8509 extern int dcache_dir_close(struct inode *, struct file *);
8510 extern loff_t dcache_dir_lseek(struct file *, loff_t, int);
8511 extern int dcache_readdir(struct file *, void *, filldir_t);
8512+extern int dcache_readdir_filter(struct file *, void *, filldir_t, int (*)(struct dentry *));
8513 extern int simple_setattr(struct dentry *, struct iattr *);
8514 extern int simple_getattr(struct vfsmount *, struct dentry *, struct kstat *);
8515 extern int simple_statfs(struct dentry *, struct kstatfs *);
8516diff -NurpP --minimal linux-3.0.9/include/linux/gfs2_ondisk.h linux-3.0.9-vs2.3.2.1/include/linux/gfs2_ondisk.h
8517--- linux-3.0.9/include/linux/gfs2_ondisk.h 2010-07-07 18:31:55.000000000 +0200
8518+++ linux-3.0.9-vs2.3.2.1/include/linux/gfs2_ondisk.h 2011-06-10 22:11:24.000000000 +0200
8519@@ -211,6 +211,9 @@ enum {
8520 gfs2fl_NoAtime = 7,
8521 gfs2fl_Sync = 8,
8522 gfs2fl_System = 9,
8523+ gfs2fl_IXUnlink = 16,
8524+ gfs2fl_Barrier = 17,
8525+ gfs2fl_Cow = 18,
8526 gfs2fl_TruncInProg = 29,
8527 gfs2fl_InheritDirectio = 30,
8528 gfs2fl_InheritJdata = 31,
8529@@ -227,6 +230,9 @@ enum {
8530 #define GFS2_DIF_NOATIME 0x00000080
8531 #define GFS2_DIF_SYNC 0x00000100
8532 #define GFS2_DIF_SYSTEM 0x00000200 /* New in gfs2 */
8533+#define GFS2_DIF_IXUNLINK 0x00010000
8534+#define GFS2_DIF_BARRIER 0x00020000
8535+#define GFS2_DIF_COW 0x00040000
8536 #define GFS2_DIF_TRUNC_IN_PROG 0x20000000 /* New in gfs2 */
8537 #define GFS2_DIF_INHERIT_DIRECTIO 0x40000000
8538 #define GFS2_DIF_INHERIT_JDATA 0x80000000
8539diff -NurpP --minimal linux-3.0.9/include/linux/if_tun.h linux-3.0.9-vs2.3.2.1/include/linux/if_tun.h
8540--- linux-3.0.9/include/linux/if_tun.h 2010-08-02 16:52:54.000000000 +0200
8541+++ linux-3.0.9-vs2.3.2.1/include/linux/if_tun.h 2011-06-10 22:11:24.000000000 +0200
8542@@ -53,6 +53,7 @@
8543 #define TUNDETACHFILTER _IOW('T', 214, struct sock_fprog)
8544 #define TUNGETVNETHDRSZ _IOR('T', 215, int)
8545 #define TUNSETVNETHDRSZ _IOW('T', 216, int)
8546+#define TUNSETNID _IOW('T', 217, int)
8547
8548 /* TUNSETIFF ifr flags */
8549 #define IFF_TUN 0x0001
8550diff -NurpP --minimal linux-3.0.9/include/linux/init_task.h linux-3.0.9-vs2.3.2.1/include/linux/init_task.h
8551--- linux-3.0.9/include/linux/init_task.h 2011-07-22 11:18:10.000000000 +0200
8552+++ linux-3.0.9-vs2.3.2.1/include/linux/init_task.h 2011-06-10 22:11:24.000000000 +0200
8553@@ -193,6 +193,10 @@ extern struct cred init_cred;
8554 INIT_FTRACE_GRAPH \
8555 INIT_TRACE_RECURSION \
8556 INIT_TASK_RCU_PREEMPT(tsk) \
8557+ .xid = 0, \
8558+ .vx_info = NULL, \
8559+ .nid = 0, \
8560+ .nx_info = NULL, \
8561 }
8562
8563
8564diff -NurpP --minimal linux-3.0.9/include/linux/ipc.h linux-3.0.9-vs2.3.2.1/include/linux/ipc.h
8565--- linux-3.0.9/include/linux/ipc.h 2009-12-03 20:02:55.000000000 +0100
8566+++ linux-3.0.9-vs2.3.2.1/include/linux/ipc.h 2011-06-10 22:11:24.000000000 +0200
8567@@ -91,6 +91,7 @@ struct kern_ipc_perm
8568 key_t key;
8569 uid_t uid;
8570 gid_t gid;
8571+ xid_t xid;
8572 uid_t cuid;
8573 gid_t cgid;
8574 mode_t mode;
8575diff -NurpP --minimal linux-3.0.9/include/linux/ipc_namespace.h linux-3.0.9-vs2.3.2.1/include/linux/ipc_namespace.h
8576--- linux-3.0.9/include/linux/ipc_namespace.h 2011-05-22 16:17:55.000000000 +0200
8577+++ linux-3.0.9-vs2.3.2.1/include/linux/ipc_namespace.h 2011-06-13 14:09:44.000000000 +0200
8578@@ -94,7 +94,8 @@ static inline int mq_init_ns(struct ipc_
8579
8580 #if defined(CONFIG_IPC_NS)
8581 extern struct ipc_namespace *copy_ipcs(unsigned long flags,
8582- struct task_struct *tsk);
8583+ struct ipc_namespace *old_ns,
8584+ struct user_namespace *user_ns);
8585 static inline struct ipc_namespace *get_ipc_ns(struct ipc_namespace *ns)
8586 {
8587 if (ns)
8588@@ -105,12 +106,13 @@ static inline struct ipc_namespace *get_
8589 extern void put_ipc_ns(struct ipc_namespace *ns);
8590 #else
8591 static inline struct ipc_namespace *copy_ipcs(unsigned long flags,
8592- struct task_struct *tsk)
8593+ struct ipc_namespace *old_ns,
8594+ struct user_namespace *user_ns)
8595 {
8596 if (flags & CLONE_NEWIPC)
8597 return ERR_PTR(-EINVAL);
8598
8599- return tsk->nsproxy->ipc_ns;
8600+ return old_ns;
8601 }
8602
8603 static inline struct ipc_namespace *get_ipc_ns(struct ipc_namespace *ns)
8604diff -NurpP --minimal linux-3.0.9/include/linux/loop.h linux-3.0.9-vs2.3.2.1/include/linux/loop.h
8605--- linux-3.0.9/include/linux/loop.h 2009-09-10 15:26:25.000000000 +0200
8606+++ linux-3.0.9-vs2.3.2.1/include/linux/loop.h 2011-06-10 22:11:24.000000000 +0200
8607@@ -45,6 +45,7 @@ struct loop_device {
8608 struct loop_func_table *lo_encryption;
8609 __u32 lo_init[2];
8610 uid_t lo_key_owner; /* Who set the key */
8611+ xid_t lo_xid;
8612 int (*ioctl)(struct loop_device *, int cmd,
8613 unsigned long arg);
8614
8615diff -NurpP --minimal linux-3.0.9/include/linux/magic.h linux-3.0.9-vs2.3.2.1/include/linux/magic.h
8616--- linux-3.0.9/include/linux/magic.h 2011-05-22 16:17:55.000000000 +0200
8617+++ linux-3.0.9-vs2.3.2.1/include/linux/magic.h 2011-06-10 22:11:24.000000000 +0200
8618@@ -3,7 +3,7 @@
8619
8620 #define ADFS_SUPER_MAGIC 0xadf5
8621 #define AFFS_SUPER_MAGIC 0xadff
8622-#define AFS_SUPER_MAGIC 0x5346414F
8623+#define AFS_SUPER_MAGIC 0x5346414F
8624 #define AUTOFS_SUPER_MAGIC 0x0187
8625 #define CODA_SUPER_MAGIC 0x73757245
8626 #define CRAMFS_MAGIC 0x28cd3d45 /* some random number */
8627@@ -41,6 +41,7 @@
8628 #define NFS_SUPER_MAGIC 0x6969
8629 #define OPENPROM_SUPER_MAGIC 0x9fa1
8630 #define PROC_SUPER_MAGIC 0x9fa0
8631+#define DEVPTS_SUPER_MAGIC 0x1cd1
8632 #define QNX4_SUPER_MAGIC 0x002f /* qnx4 fs detection */
8633
8634 #define REISERFS_SUPER_MAGIC 0x52654973 /* used by gcc */
8635diff -NurpP --minimal linux-3.0.9/include/linux/major.h linux-3.0.9-vs2.3.2.1/include/linux/major.h
8636--- linux-3.0.9/include/linux/major.h 2009-09-10 15:26:25.000000000 +0200
8637+++ linux-3.0.9-vs2.3.2.1/include/linux/major.h 2011-06-10 22:11:24.000000000 +0200
8638@@ -15,6 +15,7 @@
8639 #define HD_MAJOR IDE0_MAJOR
8640 #define PTY_SLAVE_MAJOR 3
8641 #define TTY_MAJOR 4
8642+#define VROOT_MAJOR 4
8643 #define TTYAUX_MAJOR 5
8644 #define LP_MAJOR 6
8645 #define VCS_MAJOR 7
8646diff -NurpP --minimal linux-3.0.9/include/linux/memcontrol.h linux-3.0.9-vs2.3.2.1/include/linux/memcontrol.h
8647--- linux-3.0.9/include/linux/memcontrol.h 2011-07-22 11:18:10.000000000 +0200
8648+++ linux-3.0.9-vs2.3.2.1/include/linux/memcontrol.h 2011-06-22 12:39:15.000000000 +0200
8649@@ -86,6 +86,13 @@ extern struct mem_cgroup *try_get_mem_cg
8650 extern struct mem_cgroup *mem_cgroup_from_task(struct task_struct *p);
8651 extern struct mem_cgroup *try_get_mem_cgroup_from_mm(struct mm_struct *mm);
8652
8653+extern u64 mem_cgroup_res_read_u64(struct mem_cgroup *mem, int member);
8654+extern u64 mem_cgroup_memsw_read_u64(struct mem_cgroup *mem, int member);
8655+
8656+extern s64 mem_cgroup_stat_read_cache(struct mem_cgroup *mem);
8657+extern s64 mem_cgroup_stat_read_anon(struct mem_cgroup *mem);
8658+extern s64 mem_cgroup_stat_read_mapped(struct mem_cgroup *mem);
8659+
8660 static inline
8661 int mm_match_cgroup(const struct mm_struct *mm, const struct mem_cgroup *cgroup)
8662 {
8663diff -NurpP --minimal linux-3.0.9/include/linux/mm_types.h linux-3.0.9-vs2.3.2.1/include/linux/mm_types.h
8664--- linux-3.0.9/include/linux/mm_types.h 2011-11-15 16:40:47.000000000 +0100
8665+++ linux-3.0.9-vs2.3.2.1/include/linux/mm_types.h 2011-11-15 17:37:07.000000000 +0100
8666@@ -282,6 +282,7 @@ struct mm_struct {
8667
8668 /* Architecture-specific MM context */
8669 mm_context_t context;
8670+ struct vx_info *mm_vx_info;
8671
8672 /* Swap token stuff */
8673 /*
8674diff -NurpP --minimal linux-3.0.9/include/linux/mmzone.h linux-3.0.9-vs2.3.2.1/include/linux/mmzone.h
8675--- linux-3.0.9/include/linux/mmzone.h 2011-07-22 11:18:11.000000000 +0200
8676+++ linux-3.0.9-vs2.3.2.1/include/linux/mmzone.h 2011-07-01 11:35:35.000000000 +0200
8677@@ -654,6 +654,13 @@ typedef struct pglist_data {
8678 __pgdat->node_start_pfn + __pgdat->node_spanned_pages;\
8679 })
8680
8681+#define node_start_pfn(nid) (NODE_DATA(nid)->node_start_pfn)
8682+
8683+#define node_end_pfn(nid) ({\
8684+ pg_data_t *__pgdat = NODE_DATA(nid);\
8685+ __pgdat->node_start_pfn + __pgdat->node_spanned_pages;\
8686+})
8687+
8688 #include <linux/memory_hotplug.h>
8689
8690 extern struct mutex zonelists_mutex;
8691diff -NurpP --minimal linux-3.0.9/include/linux/mount.h linux-3.0.9-vs2.3.2.1/include/linux/mount.h
8692--- linux-3.0.9/include/linux/mount.h 2011-03-15 18:07:39.000000000 +0100
8693+++ linux-3.0.9-vs2.3.2.1/include/linux/mount.h 2011-06-10 22:11:24.000000000 +0200
8694@@ -52,6 +52,9 @@ struct mnt_pcp {
8695 int mnt_writers;
8696 };
8697
8698+#define MNT_TAGID 0x10000
8699+#define MNT_NOTAG 0x20000
8700+
8701 struct vfsmount {
8702 struct list_head mnt_hash;
8703 struct vfsmount *mnt_parent; /* fs we are mounted on */
8704@@ -86,6 +89,7 @@ struct vfsmount {
8705 int mnt_expiry_mark; /* true if marked for expiry */
8706 int mnt_pinned;
8707 int mnt_ghosts;
8708+ tag_t mnt_tag; /* tagging used for vfsmount */
8709 };
8710
8711 struct file; /* forward dec */
8712diff -NurpP --minimal linux-3.0.9/include/linux/net.h linux-3.0.9-vs2.3.2.1/include/linux/net.h
8713--- linux-3.0.9/include/linux/net.h 2011-07-22 11:18:11.000000000 +0200
8714+++ linux-3.0.9-vs2.3.2.1/include/linux/net.h 2011-06-10 22:11:24.000000000 +0200
8715@@ -72,6 +72,7 @@ struct net;
8716 #define SOCK_NOSPACE 2
8717 #define SOCK_PASSCRED 3
8718 #define SOCK_PASSSEC 4
8719+#define SOCK_USER_SOCKET 5
8720
8721 #ifndef ARCH_HAS_SOCKET_TYPES
8722 /**
8723diff -NurpP --minimal linux-3.0.9/include/linux/netdevice.h linux-3.0.9-vs2.3.2.1/include/linux/netdevice.h
8724--- linux-3.0.9/include/linux/netdevice.h 2011-11-15 16:40:47.000000000 +0100
8725+++ linux-3.0.9-vs2.3.2.1/include/linux/netdevice.h 2011-09-17 19:22:49.000000000 +0200
8726@@ -1650,6 +1650,7 @@ extern void netdev_resync_ops(struct ne
8727 extern int call_netdevice_notifiers(unsigned long val, struct net_device *dev);
8728 extern struct net_device *dev_get_by_index(struct net *net, int ifindex);
8729 extern struct net_device *__dev_get_by_index(struct net *net, int ifindex);
8730+extern struct net_device *dev_get_by_index_real_rcu(struct net *net, int ifindex);
8731 extern struct net_device *dev_get_by_index_rcu(struct net *net, int ifindex);
8732 extern int dev_restart(struct net_device *dev);
8733 #ifdef CONFIG_NETPOLL_TRAP
8734diff -NurpP --minimal linux-3.0.9/include/linux/nfs_mount.h linux-3.0.9-vs2.3.2.1/include/linux/nfs_mount.h
8735--- linux-3.0.9/include/linux/nfs_mount.h 2011-01-05 21:50:31.000000000 +0100
8736+++ linux-3.0.9-vs2.3.2.1/include/linux/nfs_mount.h 2011-06-10 22:11:24.000000000 +0200
8737@@ -63,7 +63,8 @@ struct nfs_mount_data {
8738 #define NFS_MOUNT_SECFLAVOUR 0x2000 /* 5 */
8739 #define NFS_MOUNT_NORDIRPLUS 0x4000 /* 5 */
8740 #define NFS_MOUNT_UNSHARED 0x8000 /* 5 */
8741-#define NFS_MOUNT_FLAGMASK 0xFFFF
8742+#define NFS_MOUNT_TAGGED 0x10000 /* context tagging */
8743+#define NFS_MOUNT_FLAGMASK 0x1FFFF
8744
8745 /* The following are for internal use only */
8746 #define NFS_MOUNT_LOOKUP_CACHE_NONEG 0x10000
8747diff -NurpP --minimal linux-3.0.9/include/linux/nsproxy.h linux-3.0.9-vs2.3.2.1/include/linux/nsproxy.h
8748--- linux-3.0.9/include/linux/nsproxy.h 2011-07-22 11:18:11.000000000 +0200
8749+++ linux-3.0.9-vs2.3.2.1/include/linux/nsproxy.h 2011-06-10 22:11:24.000000000 +0200
8750@@ -3,6 +3,7 @@
8751
8752 #include <linux/spinlock.h>
8753 #include <linux/sched.h>
8754+#include <linux/vserver/debug.h>
8755
8756 struct mnt_namespace;
8757 struct uts_namespace;
8758@@ -63,22 +64,33 @@ static inline struct nsproxy *task_nspro
8759 }
8760
8761 int copy_namespaces(unsigned long flags, struct task_struct *tsk);
8762+struct nsproxy *copy_nsproxy(struct nsproxy *orig);
8763 void exit_task_namespaces(struct task_struct *tsk);
8764 void switch_task_namespaces(struct task_struct *tsk, struct nsproxy *new);
8765 void free_nsproxy(struct nsproxy *ns);
8766 int unshare_nsproxy_namespaces(unsigned long, struct nsproxy **,
8767 struct fs_struct *);
8768
8769-static inline void put_nsproxy(struct nsproxy *ns)
8770+#define get_nsproxy(n) __get_nsproxy(n, __FILE__, __LINE__)
8771+
8772+static inline void __get_nsproxy(struct nsproxy *ns,
8773+ const char *_file, int _line)
8774 {
8775- if (atomic_dec_and_test(&ns->count)) {
8776- free_nsproxy(ns);
8777- }
8778+ vxlprintk(VXD_CBIT(space, 0), "get_nsproxy(%p[%u])",
8779+ ns, atomic_read(&ns->count), _file, _line);
8780+ atomic_inc(&ns->count);
8781 }
8782
8783-static inline void get_nsproxy(struct nsproxy *ns)
8784+#define put_nsproxy(n) __put_nsproxy(n, __FILE__, __LINE__)
8785+
8786+static inline void __put_nsproxy(struct nsproxy *ns,
8787+ const char *_file, int _line)
8788 {
8789- atomic_inc(&ns->count);
8790+ vxlprintk(VXD_CBIT(space, 0), "put_nsproxy(%p[%u])",
8791+ ns, atomic_read(&ns->count), _file, _line);
8792+ if (atomic_dec_and_test(&ns->count)) {
8793+ free_nsproxy(ns);
8794+ }
8795 }
8796
8797 #endif
8798diff -NurpP --minimal linux-3.0.9/include/linux/pid.h linux-3.0.9-vs2.3.2.1/include/linux/pid.h
8799--- linux-3.0.9/include/linux/pid.h 2011-07-22 11:18:11.000000000 +0200
8800+++ linux-3.0.9-vs2.3.2.1/include/linux/pid.h 2011-06-10 22:11:24.000000000 +0200
8801@@ -8,7 +8,8 @@ enum pid_type
8802 PIDTYPE_PID,
8803 PIDTYPE_PGID,
8804 PIDTYPE_SID,
8805- PIDTYPE_MAX
8806+ PIDTYPE_MAX,
8807+ PIDTYPE_REALPID
8808 };
8809
8810 /*
8811@@ -171,6 +172,7 @@ static inline pid_t pid_nr(struct pid *p
8812 }
8813
8814 pid_t pid_nr_ns(struct pid *pid, struct pid_namespace *ns);
8815+pid_t pid_unmapped_nr_ns(struct pid *pid, struct pid_namespace *ns);
8816 pid_t pid_vnr(struct pid *pid);
8817
8818 #define do_each_pid_task(pid, type, task) \
8819diff -NurpP --minimal linux-3.0.9/include/linux/proc_fs.h linux-3.0.9-vs2.3.2.1/include/linux/proc_fs.h
8820--- linux-3.0.9/include/linux/proc_fs.h 2011-07-22 11:18:11.000000000 +0200
8821+++ linux-3.0.9-vs2.3.2.1/include/linux/proc_fs.h 2011-06-10 22:11:24.000000000 +0200
8822@@ -56,6 +56,7 @@ struct proc_dir_entry {
8823 nlink_t nlink;
8824 uid_t uid;
8825 gid_t gid;
8826+ int vx_flags;
8827 loff_t size;
8828 const struct inode_operations *proc_iops;
8829 /*
8830@@ -252,12 +253,18 @@ extern const struct proc_ns_operations n
8831 extern const struct proc_ns_operations utsns_operations;
8832 extern const struct proc_ns_operations ipcns_operations;
8833
8834+struct vx_info;
8835+struct nx_info;
8836+
8837 union proc_op {
8838 int (*proc_get_link)(struct inode *, struct path *);
8839 int (*proc_read)(struct task_struct *task, char *page);
8840 int (*proc_show)(struct seq_file *m,
8841 struct pid_namespace *ns, struct pid *pid,
8842 struct task_struct *task);
8843+ int (*proc_vs_read)(char *page);
8844+ int (*proc_vxi_read)(struct vx_info *vxi, char *page);
8845+ int (*proc_nxi_read)(struct nx_info *nxi, char *page);
8846 };
8847
8848 struct ctl_table_header;
8849@@ -265,6 +272,7 @@ struct ctl_table;
8850
8851 struct proc_inode {
8852 struct pid *pid;
8853+ int vx_flags;
8854 int fd;
8855 union proc_op op;
8856 struct proc_dir_entry *pde;
8857diff -NurpP --minimal linux-3.0.9/include/linux/quotaops.h linux-3.0.9-vs2.3.2.1/include/linux/quotaops.h
8858--- linux-3.0.9/include/linux/quotaops.h 2011-05-22 16:17:57.000000000 +0200
8859+++ linux-3.0.9-vs2.3.2.1/include/linux/quotaops.h 2011-06-10 22:11:24.000000000 +0200
8860@@ -8,6 +8,7 @@
8861 #define _LINUX_QUOTAOPS_
8862
8863 #include <linux/fs.h>
8864+#include <linux/vs_dlimit.h>
8865
8866 #define DQUOT_SPACE_WARN 0x1
8867 #define DQUOT_SPACE_RESERVE 0x2
8868@@ -204,11 +205,12 @@ static inline void dquot_drop(struct ino
8869
8870 static inline int dquot_alloc_inode(const struct inode *inode)
8871 {
8872- return 0;
8873+ return dl_alloc_inode(inode);
8874 }
8875
8876 static inline void dquot_free_inode(const struct inode *inode)
8877 {
8878+ dl_free_inode(inode);
8879 }
8880
8881 static inline int dquot_transfer(struct inode *inode, struct iattr *iattr)
8882@@ -219,6 +221,10 @@ static inline int dquot_transfer(struct
8883 static inline int __dquot_alloc_space(struct inode *inode, qsize_t number,
8884 int flags)
8885 {
8886+ int ret = 0;
8887+
8888+ if ((ret = dl_alloc_space(inode, number)))
8889+ return ret;
8890 if (!(flags & DQUOT_SPACE_RESERVE))
8891 inode_add_bytes(inode, number);
8892 return 0;
8893@@ -229,6 +235,7 @@ static inline void __dquot_free_space(st
8894 {
8895 if (!(flags & DQUOT_SPACE_RESERVE))
8896 inode_sub_bytes(inode, number);
8897+ dl_free_space(inode, number);
8898 }
8899
8900 static inline int dquot_claim_space_nodirty(struct inode *inode, qsize_t number)
8901diff -NurpP --minimal linux-3.0.9/include/linux/reboot.h linux-3.0.9-vs2.3.2.1/include/linux/reboot.h
8902--- linux-3.0.9/include/linux/reboot.h 2010-07-07 18:31:56.000000000 +0200
8903+++ linux-3.0.9-vs2.3.2.1/include/linux/reboot.h 2011-06-10 22:11:24.000000000 +0200
8904@@ -33,6 +33,7 @@
8905 #define LINUX_REBOOT_CMD_RESTART2 0xA1B2C3D4
8906 #define LINUX_REBOOT_CMD_SW_SUSPEND 0xD000FCE2
8907 #define LINUX_REBOOT_CMD_KEXEC 0x45584543
8908+#define LINUX_REBOOT_CMD_OOM 0xDEADBEEF
8909
8910
8911 #ifdef __KERNEL__
8912diff -NurpP --minimal linux-3.0.9/include/linux/reiserfs_fs.h linux-3.0.9-vs2.3.2.1/include/linux/reiserfs_fs.h
8913--- linux-3.0.9/include/linux/reiserfs_fs.h 2011-05-22 16:17:58.000000000 +0200
8914+++ linux-3.0.9-vs2.3.2.1/include/linux/reiserfs_fs.h 2011-06-10 22:11:24.000000000 +0200
8915@@ -976,6 +976,11 @@ struct stat_data_v1 {
8916 #define REISERFS_COMPR_FL FS_COMPR_FL
8917 #define REISERFS_NOTAIL_FL FS_NOTAIL_FL
8918
8919+/* unfortunately reiserfs sdattr is only 16 bit */
8920+#define REISERFS_IXUNLINK_FL (FS_IXUNLINK_FL >> 16)
8921+#define REISERFS_BARRIER_FL (FS_BARRIER_FL >> 16)
8922+#define REISERFS_COW_FL (FS_COW_FL >> 16)
8923+
8924 /* persistent flags that file inherits from the parent directory */
8925 #define REISERFS_INHERIT_MASK ( REISERFS_IMMUTABLE_FL | \
8926 REISERFS_SYNC_FL | \
8927@@ -985,6 +990,9 @@ struct stat_data_v1 {
8928 REISERFS_COMPR_FL | \
8929 REISERFS_NOTAIL_FL )
8930
8931+#define REISERFS_FL_USER_VISIBLE 0x80FF
8932+#define REISERFS_FL_USER_MODIFIABLE 0x80FF
8933+
8934 /* Stat Data on disk (reiserfs version of UFS disk inode minus the
8935 address blocks) */
8936 struct stat_data {
8937@@ -2073,6 +2081,7 @@ static inline void reiserfs_update_sd(st
8938 void sd_attrs_to_i_attrs(__u16 sd_attrs, struct inode *inode);
8939 void i_attrs_to_sd_attrs(struct inode *inode, __u16 * sd_attrs);
8940 int reiserfs_setattr(struct dentry *dentry, struct iattr *attr);
8941+int reiserfs_sync_flags(struct inode *inode, int, int);
8942
8943 int __reiserfs_write_begin(struct page *page, unsigned from, unsigned len);
8944
8945diff -NurpP --minimal linux-3.0.9/include/linux/reiserfs_fs_sb.h linux-3.0.9-vs2.3.2.1/include/linux/reiserfs_fs_sb.h
8946--- linux-3.0.9/include/linux/reiserfs_fs_sb.h 2010-02-25 11:52:07.000000000 +0100
8947+++ linux-3.0.9-vs2.3.2.1/include/linux/reiserfs_fs_sb.h 2011-06-10 22:11:24.000000000 +0200
8948@@ -476,6 +476,7 @@ enum reiserfs_mount_options {
8949 REISERFS_EXPOSE_PRIVROOT,
8950 REISERFS_BARRIER_NONE,
8951 REISERFS_BARRIER_FLUSH,
8952+ REISERFS_TAGGED,
8953
8954 /* Actions on error */
8955 REISERFS_ERROR_PANIC,
8956diff -NurpP --minimal linux-3.0.9/include/linux/sched.h linux-3.0.9-vs2.3.2.1/include/linux/sched.h
8957--- linux-3.0.9/include/linux/sched.h 2011-11-15 16:40:47.000000000 +0100
8958+++ linux-3.0.9-vs2.3.2.1/include/linux/sched.h 2011-10-18 13:51:13.000000000 +0200
8959@@ -1406,6 +1406,14 @@ struct task_struct {
8960 #endif
8961 seccomp_t seccomp;
8962
8963+/* vserver context data */
8964+ struct vx_info *vx_info;
8965+ struct nx_info *nx_info;
8966+
8967+ xid_t xid;
8968+ nid_t nid;
8969+ tag_t tag;
8970+
8971 /* Thread group tracking */
8972 u32 parent_exec_id;
8973 u32 self_exec_id;
8974@@ -1649,6 +1657,11 @@ struct pid_namespace;
8975 pid_t __task_pid_nr_ns(struct task_struct *task, enum pid_type type,
8976 struct pid_namespace *ns);
8977
8978+#include <linux/vserver/base.h>
8979+#include <linux/vserver/context.h>
8980+#include <linux/vserver/debug.h>
8981+#include <linux/vserver/pid.h>
8982+
8983 static inline pid_t task_pid_nr(struct task_struct *tsk)
8984 {
8985 return tsk->pid;
8986@@ -1662,7 +1675,8 @@ static inline pid_t task_pid_nr_ns(struc
8987
8988 static inline pid_t task_pid_vnr(struct task_struct *tsk)
8989 {
8990- return __task_pid_nr_ns(tsk, PIDTYPE_PID, NULL);
8991+ // return __task_pid_nr_ns(tsk, PIDTYPE_PID, NULL);
8992+ return vx_map_pid(__task_pid_nr_ns(tsk, PIDTYPE_PID, NULL));
8993 }
8994
8995
8996@@ -1675,7 +1689,7 @@ pid_t task_tgid_nr_ns(struct task_struct
8997
8998 static inline pid_t task_tgid_vnr(struct task_struct *tsk)
8999 {
9000- return pid_vnr(task_tgid(tsk));
9001+ return vx_map_tgid(pid_vnr(task_tgid(tsk)));
9002 }
9003
9004
9005diff -NurpP --minimal linux-3.0.9/include/linux/shmem_fs.h linux-3.0.9-vs2.3.2.1/include/linux/shmem_fs.h
9006--- linux-3.0.9/include/linux/shmem_fs.h 2011-07-22 11:18:11.000000000 +0200
9007+++ linux-3.0.9-vs2.3.2.1/include/linux/shmem_fs.h 2011-07-01 11:35:35.000000000 +0200
9008@@ -12,6 +12,9 @@
9009
9010 #define SHMEM_SYMLINK_INLINE_LEN (SHMEM_NR_DIRECT * sizeof(swp_entry_t))
9011
9012+#define TMPFS_SUPER_MAGIC 0x01021994
9013+
9014+
9015 struct shmem_inode_info {
9016 spinlock_t lock;
9017 unsigned long flags;
9018diff -NurpP --minimal linux-3.0.9/include/linux/stat.h linux-3.0.9-vs2.3.2.1/include/linux/stat.h
9019--- linux-3.0.9/include/linux/stat.h 2008-12-25 00:26:37.000000000 +0100
9020+++ linux-3.0.9-vs2.3.2.1/include/linux/stat.h 2011-06-10 22:11:24.000000000 +0200
9021@@ -66,6 +66,7 @@ struct kstat {
9022 unsigned int nlink;
9023 uid_t uid;
9024 gid_t gid;
9025+ tag_t tag;
9026 dev_t rdev;
9027 loff_t size;
9028 struct timespec atime;
9029diff -NurpP --minimal linux-3.0.9/include/linux/sunrpc/auth.h linux-3.0.9-vs2.3.2.1/include/linux/sunrpc/auth.h
9030--- linux-3.0.9/include/linux/sunrpc/auth.h 2011-03-15 18:07:39.000000000 +0100
9031+++ linux-3.0.9-vs2.3.2.1/include/linux/sunrpc/auth.h 2011-06-10 22:11:24.000000000 +0200
9032@@ -25,6 +25,7 @@
9033 struct auth_cred {
9034 uid_t uid;
9035 gid_t gid;
9036+ tag_t tag;
9037 struct group_info *group_info;
9038 unsigned char machine_cred : 1;
9039 };
9040diff -NurpP --minimal linux-3.0.9/include/linux/sunrpc/clnt.h linux-3.0.9-vs2.3.2.1/include/linux/sunrpc/clnt.h
9041--- linux-3.0.9/include/linux/sunrpc/clnt.h 2011-05-22 16:17:58.000000000 +0200
9042+++ linux-3.0.9-vs2.3.2.1/include/linux/sunrpc/clnt.h 2011-06-10 22:11:24.000000000 +0200
9043@@ -49,7 +49,8 @@ struct rpc_clnt {
9044 unsigned int cl_softrtry : 1,/* soft timeouts */
9045 cl_discrtry : 1,/* disconnect before retry */
9046 cl_autobind : 1,/* use getport() */
9047- cl_chatty : 1;/* be verbose */
9048+ cl_chatty : 1,/* be verbose */
9049+ cl_tag : 1;/* context tagging */
9050
9051 struct rpc_rtt * cl_rtt; /* RTO estimator data */
9052 const struct rpc_timeout *cl_timeout; /* Timeout strategy */
9053diff -NurpP --minimal linux-3.0.9/include/linux/syscalls.h linux-3.0.9-vs2.3.2.1/include/linux/syscalls.h
9054--- linux-3.0.9/include/linux/syscalls.h 2011-07-22 11:18:11.000000000 +0200
9055+++ linux-3.0.9-vs2.3.2.1/include/linux/syscalls.h 2011-06-10 22:11:24.000000000 +0200
9056@@ -483,6 +483,8 @@ asmlinkage long sys_symlink(const char _
9057 asmlinkage long sys_unlink(const char __user *pathname);
9058 asmlinkage long sys_rename(const char __user *oldname,
9059 const char __user *newname);
9060+asmlinkage long sys_copyfile(const char __user *from, const char __user *to,
9061+ umode_t mode);
9062 asmlinkage long sys_chmod(const char __user *filename, mode_t mode);
9063 asmlinkage long sys_fchmod(unsigned int fd, mode_t mode);
9064
9065diff -NurpP --minimal linux-3.0.9/include/linux/sysctl.h linux-3.0.9-vs2.3.2.1/include/linux/sysctl.h
9066--- linux-3.0.9/include/linux/sysctl.h 2011-03-15 18:07:40.000000000 +0100
9067+++ linux-3.0.9-vs2.3.2.1/include/linux/sysctl.h 2011-06-10 22:11:24.000000000 +0200
9068@@ -60,6 +60,7 @@ enum
9069 CTL_ABI=9, /* Binary emulation */
9070 CTL_CPU=10, /* CPU stuff (speed scaling, etc) */
9071 CTL_ARLAN=254, /* arlan wireless driver */
9072+ CTL_VSERVER=4242, /* Linux-VServer debug */
9073 CTL_S390DBF=5677, /* s390 debug */
9074 CTL_SUNRPC=7249, /* sunrpc debug */
9075 CTL_PM=9899, /* frv power management */
9076@@ -94,6 +95,7 @@ enum
9077
9078 KERN_PANIC=15, /* int: panic timeout */
9079 KERN_REALROOTDEV=16, /* real root device to mount after initrd */
9080+ KERN_VSHELPER=17, /* string: path to vshelper policy agent */
9081
9082 KERN_SPARC_REBOOT=21, /* reboot command on Sparc */
9083 KERN_CTLALTDEL=22, /* int: allow ctl-alt-del to reboot */
9084diff -NurpP --minimal linux-3.0.9/include/linux/sysfs.h linux-3.0.9-vs2.3.2.1/include/linux/sysfs.h
9085--- linux-3.0.9/include/linux/sysfs.h 2011-07-22 11:18:11.000000000 +0200
9086+++ linux-3.0.9-vs2.3.2.1/include/linux/sysfs.h 2011-06-22 12:39:15.000000000 +0200
9087@@ -19,6 +19,8 @@
9088 #include <linux/kobject_ns.h>
9089 #include <asm/atomic.h>
9090
9091+#define SYSFS_SUPER_MAGIC 0x62656572
9092+
9093 struct kobject;
9094 struct module;
9095 enum kobj_ns_type;
9096diff -NurpP --minimal linux-3.0.9/include/linux/time.h linux-3.0.9-vs2.3.2.1/include/linux/time.h
9097--- linux-3.0.9/include/linux/time.h 2011-07-22 11:18:11.000000000 +0200
9098+++ linux-3.0.9-vs2.3.2.1/include/linux/time.h 2011-06-10 22:11:24.000000000 +0200
9099@@ -256,6 +256,9 @@ static __always_inline void timespec_add
9100 a->tv_sec += __iter_div_u64_rem(a->tv_nsec + ns, NSEC_PER_SEC, &ns);
9101 a->tv_nsec = ns;
9102 }
9103+
9104+#include <linux/vs_time.h>
9105+
9106 #endif /* __KERNEL__ */
9107
9108 #define NFDBITS __NFDBITS
9109diff -NurpP --minimal linux-3.0.9/include/linux/types.h linux-3.0.9-vs2.3.2.1/include/linux/types.h
9110--- linux-3.0.9/include/linux/types.h 2011-05-22 16:17:58.000000000 +0200
9111+++ linux-3.0.9-vs2.3.2.1/include/linux/types.h 2011-06-10 22:11:24.000000000 +0200
9112@@ -40,6 +40,9 @@ typedef __kernel_uid32_t uid_t;
9113 typedef __kernel_gid32_t gid_t;
9114 typedef __kernel_uid16_t uid16_t;
9115 typedef __kernel_gid16_t gid16_t;
9116+typedef unsigned int xid_t;
9117+typedef unsigned int nid_t;
9118+typedef unsigned int tag_t;
9119
9120 typedef unsigned long uintptr_t;
9121
9122diff -NurpP --minimal linux-3.0.9/include/linux/utsname.h linux-3.0.9-vs2.3.2.1/include/linux/utsname.h
9123--- linux-3.0.9/include/linux/utsname.h 2011-05-22 16:17:58.000000000 +0200
9124+++ linux-3.0.9-vs2.3.2.1/include/linux/utsname.h 2011-06-13 14:36:48.000000000 +0200
9125@@ -54,7 +54,8 @@ static inline void get_uts_ns(struct uts
9126 }
9127
9128 extern struct uts_namespace *copy_utsname(unsigned long flags,
9129- struct task_struct *tsk);
9130+ struct uts_namespace *old_ns,
9131+ struct user_namespace *user_ns);
9132 extern void free_uts_ns(struct kref *kref);
9133
9134 static inline void put_uts_ns(struct uts_namespace *ns)
9135@@ -71,12 +72,13 @@ static inline void put_uts_ns(struct uts
9136 }
9137
9138 static inline struct uts_namespace *copy_utsname(unsigned long flags,
9139- struct task_struct *tsk)
9140+ struct uts_namespace *old_ns,
9141+ struct user_namespace *user_ns)
9142 {
9143 if (flags & CLONE_NEWUTS)
9144 return ERR_PTR(-EINVAL);
9145
9146- return tsk->nsproxy->uts_ns;
9147+ return old_ns;
9148 }
9149 #endif
9150
9151diff -NurpP --minimal linux-3.0.9/include/linux/vroot.h linux-3.0.9-vs2.3.2.1/include/linux/vroot.h
9152--- linux-3.0.9/include/linux/vroot.h 1970-01-01 01:00:00.000000000 +0100
9153+++ linux-3.0.9-vs2.3.2.1/include/linux/vroot.h 2011-06-10 22:11:24.000000000 +0200
9154@@ -0,0 +1,51 @@
9155+
9156+/*
9157+ * include/linux/vroot.h
9158+ *
9159+ * written by Herbert Pötzl, 9/11/2002
9160+ * ported to 2.6 by Herbert Pötzl, 30/12/2004
9161+ *
9162+ * Copyright (C) 2002-2007 by Herbert Pötzl.
9163+ * Redistribution of this file is permitted under the
9164+ * GNU General Public License.
9165+ */
9166+
9167+#ifndef _LINUX_VROOT_H
9168+#define _LINUX_VROOT_H
9169+
9170+
9171+#ifdef __KERNEL__
9172+
9173+/* Possible states of device */
9174+enum {
9175+ Vr_unbound,
9176+ Vr_bound,
9177+};
9178+
9179+struct vroot_device {
9180+ int vr_number;
9181+ int vr_refcnt;
9182+
9183+ struct semaphore vr_ctl_mutex;
9184+ struct block_device *vr_device;
9185+ int vr_state;
9186+};
9187+
9188+
9189+typedef struct block_device *(vroot_grb_func)(struct block_device *);
9190+
9191+extern int register_vroot_grb(vroot_grb_func *);
9192+extern int unregister_vroot_grb(vroot_grb_func *);
9193+
9194+#endif /* __KERNEL__ */
9195+
9196+#define MAX_VROOT_DEFAULT 8
9197+
9198+/*
9199+ * IOCTL commands --- we will commandeer 0x56 ('V')
9200+ */
9201+
9202+#define VROOT_SET_DEV 0x5600
9203+#define VROOT_CLR_DEV 0x5601
9204+
9205+#endif /* _LINUX_VROOT_H */
9206diff -NurpP --minimal linux-3.0.9/include/linux/vs_base.h linux-3.0.9-vs2.3.2.1/include/linux/vs_base.h
9207--- linux-3.0.9/include/linux/vs_base.h 1970-01-01 01:00:00.000000000 +0100
9208+++ linux-3.0.9-vs2.3.2.1/include/linux/vs_base.h 2011-06-10 22:11:24.000000000 +0200
9209@@ -0,0 +1,10 @@
9210+#ifndef _VS_BASE_H
9211+#define _VS_BASE_H
9212+
9213+#include "vserver/base.h"
9214+#include "vserver/check.h"
9215+#include "vserver/debug.h"
9216+
9217+#else
9218+#warning duplicate inclusion
9219+#endif
9220diff -NurpP --minimal linux-3.0.9/include/linux/vs_context.h linux-3.0.9-vs2.3.2.1/include/linux/vs_context.h
9221--- linux-3.0.9/include/linux/vs_context.h 1970-01-01 01:00:00.000000000 +0100
9222+++ linux-3.0.9-vs2.3.2.1/include/linux/vs_context.h 2011-06-10 22:11:24.000000000 +0200
9223@@ -0,0 +1,242 @@
9224+#ifndef _VS_CONTEXT_H
9225+#define _VS_CONTEXT_H
9226+
9227+#include "vserver/base.h"
9228+#include "vserver/check.h"
9229+#include "vserver/context.h"
9230+#include "vserver/history.h"
9231+#include "vserver/debug.h"
9232+
9233+#include <linux/sched.h>
9234+
9235+
9236+#define get_vx_info(i) __get_vx_info(i, __FILE__, __LINE__, __HERE__)
9237+
9238+static inline struct vx_info *__get_vx_info(struct vx_info *vxi,
9239+ const char *_file, int _line, void *_here)
9240+{
9241+ if (!vxi)
9242+ return NULL;
9243+
9244+ vxlprintk(VXD_CBIT(xid, 2), "get_vx_info(%p[#%d.%d])",
9245+ vxi, vxi ? vxi->vx_id : 0,
9246+ vxi ? atomic_read(&vxi->vx_usecnt) : 0,
9247+ _file, _line);
9248+ __vxh_get_vx_info(vxi, _here);
9249+
9250+ atomic_inc(&vxi->vx_usecnt);
9251+ return vxi;
9252+}
9253+
9254+
9255+extern void free_vx_info(struct vx_info *);
9256+
9257+#define put_vx_info(i) __put_vx_info(i, __FILE__, __LINE__, __HERE__)
9258+
9259+static inline void __put_vx_info(struct vx_info *vxi,
9260+ const char *_file, int _line, void *_here)
9261+{
9262+ if (!vxi)
9263+ return;
9264+
9265+ vxlprintk(VXD_CBIT(xid, 2), "put_vx_info(%p[#%d.%d])",
9266+ vxi, vxi ? vxi->vx_id : 0,
9267+ vxi ? atomic_read(&vxi->vx_usecnt) : 0,
9268+ _file, _line);
9269+ __vxh_put_vx_info(vxi, _here);
9270+
9271+ if (atomic_dec_and_test(&vxi->vx_usecnt))
9272+ free_vx_info(vxi);
9273+}
9274+
9275+
9276+#define init_vx_info(p, i) \
9277+ __init_vx_info(p, i, __FILE__, __LINE__, __HERE__)
9278+
9279+static inline void __init_vx_info(struct vx_info **vxp, struct vx_info *vxi,
9280+ const char *_file, int _line, void *_here)
9281+{
9282+ if (vxi) {
9283+ vxlprintk(VXD_CBIT(xid, 3),
9284+ "init_vx_info(%p[#%d.%d])",
9285+ vxi, vxi ? vxi->vx_id : 0,
9286+ vxi ? atomic_read(&vxi->vx_usecnt) : 0,
9287+ _file, _line);
9288+ __vxh_init_vx_info(vxi, vxp, _here);
9289+
9290+ atomic_inc(&vxi->vx_usecnt);
9291+ }
9292+ *vxp = vxi;
9293+}
9294+
9295+
9296+#define set_vx_info(p, i) \
9297+ __set_vx_info(p, i, __FILE__, __LINE__, __HERE__)
9298+
9299+static inline void __set_vx_info(struct vx_info **vxp, struct vx_info *vxi,
9300+ const char *_file, int _line, void *_here)
9301+{
9302+ struct vx_info *vxo;
9303+
9304+ if (!vxi)
9305+ return;
9306+
9307+ vxlprintk(VXD_CBIT(xid, 3), "set_vx_info(%p[#%d.%d])",
9308+ vxi, vxi ? vxi->vx_id : 0,
9309+ vxi ? atomic_read(&vxi->vx_usecnt) : 0,
9310+ _file, _line);
9311+ __vxh_set_vx_info(vxi, vxp, _here);
9312+
9313+ atomic_inc(&vxi->vx_usecnt);
9314+ vxo = xchg(vxp, vxi);
9315+ BUG_ON(vxo);
9316+}
9317+
9318+
9319+#define clr_vx_info(p) __clr_vx_info(p, __FILE__, __LINE__, __HERE__)
9320+
9321+static inline void __clr_vx_info(struct vx_info **vxp,
9322+ const char *_file, int _line, void *_here)
9323+{
9324+ struct vx_info *vxo;
9325+
9326+ vxo = xchg(vxp, NULL);
9327+ if (!vxo)
9328+ return;
9329+
9330+ vxlprintk(VXD_CBIT(xid, 3), "clr_vx_info(%p[#%d.%d])",
9331+ vxo, vxo ? vxo->vx_id : 0,
9332+ vxo ? atomic_read(&vxo->vx_usecnt) : 0,
9333+ _file, _line);
9334+ __vxh_clr_vx_info(vxo, vxp, _here);
9335+
9336+ if (atomic_dec_and_test(&vxo->vx_usecnt))
9337+ free_vx_info(vxo);
9338+}
9339+
9340+
9341+#define claim_vx_info(v, p) \
9342+ __claim_vx_info(v, p, __FILE__, __LINE__, __HERE__)
9343+
9344+static inline void __claim_vx_info(struct vx_info *vxi,
9345+ struct task_struct *task,
9346+ const char *_file, int _line, void *_here)
9347+{
9348+ vxlprintk(VXD_CBIT(xid, 3), "claim_vx_info(%p[#%d.%d.%d]) %p",
9349+ vxi, vxi ? vxi->vx_id : 0,
9350+ vxi ? atomic_read(&vxi->vx_usecnt) : 0,
9351+ vxi ? atomic_read(&vxi->vx_tasks) : 0,
9352+ task, _file, _line);
9353+ __vxh_claim_vx_info(vxi, task, _here);
9354+
9355+ atomic_inc(&vxi->vx_tasks);
9356+}
9357+
9358+
9359+extern void unhash_vx_info(struct vx_info *);
9360+
9361+#define release_vx_info(v, p) \
9362+ __release_vx_info(v, p, __FILE__, __LINE__, __HERE__)
9363+
9364+static inline void __release_vx_info(struct vx_info *vxi,
9365+ struct task_struct *task,
9366+ const char *_file, int _line, void *_here)
9367+{
9368+ vxlprintk(VXD_CBIT(xid, 3), "release_vx_info(%p[#%d.%d.%d]) %p",
9369+ vxi, vxi ? vxi->vx_id : 0,
9370+ vxi ? atomic_read(&vxi->vx_usecnt) : 0,
9371+ vxi ? atomic_read(&vxi->vx_tasks) : 0,
9372+ task, _file, _line);
9373+ __vxh_release_vx_info(vxi, task, _here);
9374+
9375+ might_sleep();
9376+
9377+ if (atomic_dec_and_test(&vxi->vx_tasks))
9378+ unhash_vx_info(vxi);
9379+}
9380+
9381+
9382+#define task_get_vx_info(p) \
9383+ __task_get_vx_info(p, __FILE__, __LINE__, __HERE__)
9384+
9385+static inline struct vx_info *__task_get_vx_info(struct task_struct *p,
9386+ const char *_file, int _line, void *_here)
9387+{
9388+ struct vx_info *vxi;
9389+
9390+ task_lock(p);
9391+ vxlprintk(VXD_CBIT(xid, 5), "task_get_vx_info(%p)",
9392+ p, _file, _line);
9393+ vxi = __get_vx_info(p->vx_info, _file, _line, _here);
9394+ task_unlock(p);
9395+ return vxi;
9396+}
9397+
9398+
9399+static inline void __wakeup_vx_info(struct vx_info *vxi)
9400+{
9401+ if (waitqueue_active(&vxi->vx_wait))
9402+ wake_up_interruptible(&vxi->vx_wait);
9403+}
9404+
9405+
9406+#define enter_vx_info(v, s) __enter_vx_info(v, s, __FILE__, __LINE__)
9407+
9408+static inline void __enter_vx_info(struct vx_info *vxi,
9409+ struct vx_info_save *vxis, const char *_file, int _line)
9410+{
9411+ vxlprintk(VXD_CBIT(xid, 5), "enter_vx_info(%p[#%d],%p) %p[#%d,%p]",
9412+ vxi, vxi ? vxi->vx_id : 0, vxis, current,
9413+ current->xid, current->vx_info, _file, _line);
9414+ vxis->vxi = xchg(&current->vx_info, vxi);
9415+ vxis->xid = current->xid;
9416+ current->xid = vxi ? vxi->vx_id : 0;
9417+}
9418+
9419+#define leave_vx_info(s) __leave_vx_info(s, __FILE__, __LINE__)
9420+
9421+static inline void __leave_vx_info(struct vx_info_save *vxis,
9422+ const char *_file, int _line)
9423+{
9424+ vxlprintk(VXD_CBIT(xid, 5), "leave_vx_info(%p[#%d,%p]) %p[#%d,%p]",
9425+ vxis, vxis->xid, vxis->vxi, current,
9426+ current->xid, current->vx_info, _file, _line);
9427+ (void)xchg(&current->vx_info, vxis->vxi);
9428+ current->xid = vxis->xid;
9429+}
9430+
9431+
9432+static inline void __enter_vx_admin(struct vx_info_save *vxis)
9433+{
9434+ vxis->vxi = xchg(&current->vx_info, NULL);
9435+ vxis->xid = xchg(&current->xid, (xid_t)0);
9436+}
9437+
9438+static inline void __leave_vx_admin(struct vx_info_save *vxis)
9439+{
9440+ (void)xchg(&current->xid, vxis->xid);
9441+ (void)xchg(&current->vx_info, vxis->vxi);
9442+}
9443+
9444+#define task_is_init(p) \
9445+ __task_is_init(p, __FILE__, __LINE__, __HERE__)
9446+
9447+static inline int __task_is_init(struct task_struct *p,
9448+ const char *_file, int _line, void *_here)
9449+{
9450+ int is_init = is_global_init(p);
9451+
9452+ task_lock(p);
9453+ if (p->vx_info)
9454+ is_init = p->vx_info->vx_initpid == p->pid;
9455+ task_unlock(p);
9456+ return is_init;
9457+}
9458+
9459+extern void exit_vx_info(struct task_struct *, int);
9460+extern void exit_vx_info_early(struct task_struct *, int);
9461+
9462+
9463+#else
9464+#warning duplicate inclusion
9465+#endif
9466diff -NurpP --minimal linux-3.0.9/include/linux/vs_cowbl.h linux-3.0.9-vs2.3.2.1/include/linux/vs_cowbl.h
9467--- linux-3.0.9/include/linux/vs_cowbl.h 1970-01-01 01:00:00.000000000 +0100
9468+++ linux-3.0.9-vs2.3.2.1/include/linux/vs_cowbl.h 2011-06-10 22:11:24.000000000 +0200
9469@@ -0,0 +1,48 @@
9470+#ifndef _VS_COWBL_H
9471+#define _VS_COWBL_H
9472+
9473+#include <linux/fs.h>
9474+#include <linux/dcache.h>
9475+#include <linux/namei.h>
9476+#include <linux/slab.h>
9477+
9478+extern struct dentry *cow_break_link(const char *pathname);
9479+
9480+static inline int cow_check_and_break(struct path *path)
9481+{
9482+ struct inode *inode = path->dentry->d_inode;
9483+ int error = 0;
9484+
9485+ /* do we need this check? */
9486+ if (IS_RDONLY(inode))
9487+ return -EROFS;
9488+
9489+ if (IS_COW(inode)) {
9490+ if (IS_COW_LINK(inode)) {
9491+ struct dentry *new_dentry, *old_dentry = path->dentry;
9492+ char *pp, *buf;
9493+
9494+ buf = kmalloc(PATH_MAX, GFP_KERNEL);
9495+ if (!buf) {
9496+ return -ENOMEM;
9497+ }
9498+ pp = d_path(path, buf, PATH_MAX);
9499+ new_dentry = cow_break_link(pp);
9500+ kfree(buf);
9501+ if (!IS_ERR(new_dentry)) {
9502+ path->dentry = new_dentry;
9503+ dput(old_dentry);
9504+ } else
9505+ error = PTR_ERR(new_dentry);
9506+ } else {
9507+ inode->i_flags &= ~(S_IXUNLINK | S_IMMUTABLE);
9508+ inode->i_ctime = CURRENT_TIME;
9509+ mark_inode_dirty(inode);
9510+ }
9511+ }
9512+ return error;
9513+}
9514+
9515+#else
9516+#warning duplicate inclusion
9517+#endif
9518diff -NurpP --minimal linux-3.0.9/include/linux/vs_cvirt.h linux-3.0.9-vs2.3.2.1/include/linux/vs_cvirt.h
9519--- linux-3.0.9/include/linux/vs_cvirt.h 1970-01-01 01:00:00.000000000 +0100
9520+++ linux-3.0.9-vs2.3.2.1/include/linux/vs_cvirt.h 2011-06-10 22:11:24.000000000 +0200
9521@@ -0,0 +1,50 @@
9522+#ifndef _VS_CVIRT_H
9523+#define _VS_CVIRT_H
9524+
9525+#include "vserver/cvirt.h"
9526+#include "vserver/context.h"
9527+#include "vserver/base.h"
9528+#include "vserver/check.h"
9529+#include "vserver/debug.h"
9530+
9531+
9532+static inline void vx_activate_task(struct task_struct *p)
9533+{
9534+ struct vx_info *vxi;
9535+
9536+ if ((vxi = p->vx_info)) {
9537+ vx_update_load(vxi);
9538+ atomic_inc(&vxi->cvirt.nr_running);
9539+ }
9540+}
9541+
9542+static inline void vx_deactivate_task(struct task_struct *p)
9543+{
9544+ struct vx_info *vxi;
9545+
9546+ if ((vxi = p->vx_info)) {
9547+ vx_update_load(vxi);
9548+ atomic_dec(&vxi->cvirt.nr_running);
9549+ }
9550+}
9551+
9552+static inline void vx_uninterruptible_inc(struct task_struct *p)
9553+{
9554+ struct vx_info *vxi;
9555+
9556+ if ((vxi = p->vx_info))
9557+ atomic_inc(&vxi->cvirt.nr_uninterruptible);
9558+}
9559+
9560+static inline void vx_uninterruptible_dec(struct task_struct *p)
9561+{
9562+ struct vx_info *vxi;
9563+
9564+ if ((vxi = p->vx_info))
9565+ atomic_dec(&vxi->cvirt.nr_uninterruptible);
9566+}
9567+
9568+
9569+#else
9570+#warning duplicate inclusion
9571+#endif
9572diff -NurpP --minimal linux-3.0.9/include/linux/vs_device.h linux-3.0.9-vs2.3.2.1/include/linux/vs_device.h
9573--- linux-3.0.9/include/linux/vs_device.h 1970-01-01 01:00:00.000000000 +0100
9574+++ linux-3.0.9-vs2.3.2.1/include/linux/vs_device.h 2011-06-10 22:11:24.000000000 +0200
9575@@ -0,0 +1,45 @@
9576+#ifndef _VS_DEVICE_H
9577+#define _VS_DEVICE_H
9578+
9579+#include "vserver/base.h"
9580+#include "vserver/device.h"
9581+#include "vserver/debug.h"
9582+
9583+
9584+#ifdef CONFIG_VSERVER_DEVICE
9585+
9586+int vs_map_device(struct vx_info *, dev_t, dev_t *, umode_t);
9587+
9588+#define vs_device_perm(v, d, m, p) \
9589+ ((vs_map_device(current_vx_info(), d, NULL, m) & (p)) == (p))
9590+
9591+#else
9592+
9593+static inline
9594+int vs_map_device(struct vx_info *vxi,
9595+ dev_t device, dev_t *target, umode_t mode)
9596+{
9597+ if (target)
9598+ *target = device;
9599+ return ~0;
9600+}
9601+
9602+#define vs_device_perm(v, d, m, p) ((p) == (p))
9603+
9604+#endif
9605+
9606+
9607+#define vs_map_chrdev(d, t, p) \
9608+ ((vs_map_device(current_vx_info(), d, t, S_IFCHR) & (p)) == (p))
9609+#define vs_map_blkdev(d, t, p) \
9610+ ((vs_map_device(current_vx_info(), d, t, S_IFBLK) & (p)) == (p))
9611+
9612+#define vs_chrdev_perm(d, p) \
9613+ vs_device_perm(current_vx_info(), d, S_IFCHR, p)
9614+#define vs_blkdev_perm(d, p) \
9615+ vs_device_perm(current_vx_info(), d, S_IFBLK, p)
9616+
9617+
9618+#else
9619+#warning duplicate inclusion
9620+#endif
9621diff -NurpP --minimal linux-3.0.9/include/linux/vs_dlimit.h linux-3.0.9-vs2.3.2.1/include/linux/vs_dlimit.h
9622--- linux-3.0.9/include/linux/vs_dlimit.h 1970-01-01 01:00:00.000000000 +0100
9623+++ linux-3.0.9-vs2.3.2.1/include/linux/vs_dlimit.h 2011-06-10 22:11:24.000000000 +0200
9624@@ -0,0 +1,215 @@
9625+#ifndef _VS_DLIMIT_H
9626+#define _VS_DLIMIT_H
9627+
9628+#include <linux/fs.h>
9629+
9630+#include "vserver/dlimit.h"
9631+#include "vserver/base.h"
9632+#include "vserver/debug.h"
9633+
9634+
9635+#define get_dl_info(i) __get_dl_info(i, __FILE__, __LINE__)
9636+
9637+static inline struct dl_info *__get_dl_info(struct dl_info *dli,
9638+ const char *_file, int _line)
9639+{
9640+ if (!dli)
9641+ return NULL;
9642+ vxlprintk(VXD_CBIT(dlim, 4), "get_dl_info(%p[#%d.%d])",
9643+ dli, dli ? dli->dl_tag : 0,
9644+ dli ? atomic_read(&dli->dl_usecnt) : 0,
9645+ _file, _line);
9646+ atomic_inc(&dli->dl_usecnt);
9647+ return dli;
9648+}
9649+
9650+
9651+#define free_dl_info(i) \
9652+ call_rcu(&(i)->dl_rcu, rcu_free_dl_info)
9653+
9654+#define put_dl_info(i) __put_dl_info(i, __FILE__, __LINE__)
9655+
9656+static inline void __put_dl_info(struct dl_info *dli,
9657+ const char *_file, int _line)
9658+{
9659+ if (!dli)
9660+ return;
9661+ vxlprintk(VXD_CBIT(dlim, 4), "put_dl_info(%p[#%d.%d])",
9662+ dli, dli ? dli->dl_tag : 0,
9663+ dli ? atomic_read(&dli->dl_usecnt) : 0,
9664+ _file, _line);
9665+ if (atomic_dec_and_test(&dli->dl_usecnt))
9666+ free_dl_info(dli);
9667+}
9668+
9669+
9670+#define __dlimit_char(d) ((d) ? '*' : ' ')
9671+
9672+static inline int __dl_alloc_space(struct super_block *sb,
9673+ tag_t tag, dlsize_t nr, const char *file, int line)
9674+{
9675+ struct dl_info *dli = NULL;
9676+ int ret = 0;
9677+
9678+ if (nr == 0)
9679+ goto out;
9680+ dli = locate_dl_info(sb, tag);
9681+ if (!dli)
9682+ goto out;
9683+
9684+ spin_lock(&dli->dl_lock);
9685+ ret = (dli->dl_space_used + nr > dli->dl_space_total);
9686+ if (!ret)
9687+ dli->dl_space_used += nr;
9688+ spin_unlock(&dli->dl_lock);
9689+ put_dl_info(dli);
9690+out:
9691+ vxlprintk(VXD_CBIT(dlim, 1),
9692+ "ALLOC (%p,#%d)%c %lld bytes (%d)",
9693+ sb, tag, __dlimit_char(dli), (long long)nr,
9694+ ret, file, line);
9695+ return ret ? -ENOSPC : 0;
9696+}
9697+
9698+static inline void __dl_free_space(struct super_block *sb,
9699+ tag_t tag, dlsize_t nr, const char *_file, int _line)
9700+{
9701+ struct dl_info *dli = NULL;
9702+
9703+ if (nr == 0)
9704+ goto out;
9705+ dli = locate_dl_info(sb, tag);
9706+ if (!dli)
9707+ goto out;
9708+
9709+ spin_lock(&dli->dl_lock);
9710+ if (dli->dl_space_used > nr)
9711+ dli->dl_space_used -= nr;
9712+ else
9713+ dli->dl_space_used = 0;
9714+ spin_unlock(&dli->dl_lock);
9715+ put_dl_info(dli);
9716+out:
9717+ vxlprintk(VXD_CBIT(dlim, 1),
9718+ "FREE (%p,#%d)%c %lld bytes",
9719+ sb, tag, __dlimit_char(dli), (long long)nr,
9720+ _file, _line);
9721+}
9722+
9723+static inline int __dl_alloc_inode(struct super_block *sb,
9724+ tag_t tag, const char *_file, int _line)
9725+{
9726+ struct dl_info *dli;
9727+ int ret = 0;
9728+
9729+ dli = locate_dl_info(sb, tag);
9730+ if (!dli)
9731+ goto out;
9732+
9733+ spin_lock(&dli->dl_lock);
9734+ dli->dl_inodes_used++;
9735+ ret = (dli->dl_inodes_used > dli->dl_inodes_total);
9736+ spin_unlock(&dli->dl_lock);
9737+ put_dl_info(dli);
9738+out:
9739+ vxlprintk(VXD_CBIT(dlim, 0),
9740+ "ALLOC (%p,#%d)%c inode (%d)",
9741+ sb, tag, __dlimit_char(dli), ret, _file, _line);
9742+ return ret ? -ENOSPC : 0;
9743+}
9744+
9745+static inline void __dl_free_inode(struct super_block *sb,
9746+ tag_t tag, const char *_file, int _line)
9747+{
9748+ struct dl_info *dli;
9749+
9750+ dli = locate_dl_info(sb, tag);
9751+ if (!dli)
9752+ goto out;
9753+
9754+ spin_lock(&dli->dl_lock);
9755+ if (dli->dl_inodes_used > 1)
9756+ dli->dl_inodes_used--;
9757+ else
9758+ dli->dl_inodes_used = 0;
9759+ spin_unlock(&dli->dl_lock);
9760+ put_dl_info(dli);
9761+out:
9762+ vxlprintk(VXD_CBIT(dlim, 0),
9763+ "FREE (%p,#%d)%c inode",
9764+ sb, tag, __dlimit_char(dli), _file, _line);
9765+}
9766+
9767+static inline void __dl_adjust_block(struct super_block *sb, tag_t tag,
9768+ unsigned long long *free_blocks, unsigned long long *root_blocks,
9769+ const char *_file, int _line)
9770+{
9771+ struct dl_info *dli;
9772+ uint64_t broot, bfree;
9773+
9774+ dli = locate_dl_info(sb, tag);
9775+ if (!dli)
9776+ return;
9777+
9778+ spin_lock(&dli->dl_lock);
9779+ broot = (dli->dl_space_total -
9780+ (dli->dl_space_total >> 10) * dli->dl_nrlmult)
9781+ >> sb->s_blocksize_bits;
9782+ bfree = (dli->dl_space_total - dli->dl_space_used)
9783+ >> sb->s_blocksize_bits;
9784+ spin_unlock(&dli->dl_lock);
9785+
9786+ vxlprintk(VXD_CBIT(dlim, 2),
9787+ "ADJUST: %lld,%lld on %lld,%lld [mult=%d]",
9788+ (long long)bfree, (long long)broot,
9789+ *free_blocks, *root_blocks, dli->dl_nrlmult,
9790+ _file, _line);
9791+ if (free_blocks) {
9792+ if (*free_blocks > bfree)
9793+ *free_blocks = bfree;
9794+ }
9795+ if (root_blocks) {
9796+ if (*root_blocks > broot)
9797+ *root_blocks = broot;
9798+ }
9799+ put_dl_info(dli);
9800+}
9801+
9802+#define dl_prealloc_space(in, bytes) \
9803+ __dl_alloc_space((in)->i_sb, (in)->i_tag, (dlsize_t)(bytes), \
9804+ __FILE__, __LINE__ )
9805+
9806+#define dl_alloc_space(in, bytes) \
9807+ __dl_alloc_space((in)->i_sb, (in)->i_tag, (dlsize_t)(bytes), \
9808+ __FILE__, __LINE__ )
9809+
9810+#define dl_reserve_space(in, bytes) \
9811+ __dl_alloc_space((in)->i_sb, (in)->i_tag, (dlsize_t)(bytes), \
9812+ __FILE__, __LINE__ )
9813+
9814+#define dl_claim_space(in, bytes) (0)
9815+
9816+#define dl_release_space(in, bytes) \
9817+ __dl_free_space((in)->i_sb, (in)->i_tag, (dlsize_t)(bytes), \
9818+ __FILE__, __LINE__ )
9819+
9820+#define dl_free_space(in, bytes) \
9821+ __dl_free_space((in)->i_sb, (in)->i_tag, (dlsize_t)(bytes), \
9822+ __FILE__, __LINE__ )
9823+
9824+
9825+
9826+#define dl_alloc_inode(in) \
9827+ __dl_alloc_inode((in)->i_sb, (in)->i_tag, __FILE__, __LINE__ )
9828+
9829+#define dl_free_inode(in) \
9830+ __dl_free_inode((in)->i_sb, (in)->i_tag, __FILE__, __LINE__ )
9831+
9832+
9833+#define dl_adjust_block(sb, tag, fb, rb) \
9834+ __dl_adjust_block(sb, tag, fb, rb, __FILE__, __LINE__ )
9835+
9836+
9837+#else
9838+#warning duplicate inclusion
9839+#endif
9840diff -NurpP --minimal linux-3.0.9/include/linux/vs_inet.h linux-3.0.9-vs2.3.2.1/include/linux/vs_inet.h
9841--- linux-3.0.9/include/linux/vs_inet.h 1970-01-01 01:00:00.000000000 +0100
9842+++ linux-3.0.9-vs2.3.2.1/include/linux/vs_inet.h 2011-08-18 17:01:00.000000000 +0200
9843@@ -0,0 +1,353 @@
9844+#ifndef _VS_INET_H
9845+#define _VS_INET_H
9846+
9847+#include "vserver/base.h"
9848+#include "vserver/network.h"
9849+#include "vserver/debug.h"
9850+
9851+#define IPI_LOOPBACK htonl(INADDR_LOOPBACK)
9852+
9853+#define NXAV4(a) NIPQUAD((a)->ip[0]), NIPQUAD((a)->ip[1]), \
9854+ NIPQUAD((a)->mask), (a)->type
9855+#define NXAV4_FMT "[" NIPQUAD_FMT "-" NIPQUAD_FMT "/" NIPQUAD_FMT ":%04x]"
9856+
9857+#define NIPQUAD(addr) \
9858+ ((unsigned char *)&addr)[0], \
9859+ ((unsigned char *)&addr)[1], \
9860+ ((unsigned char *)&addr)[2], \
9861+ ((unsigned char *)&addr)[3]
9862+
9863+#define NIPQUAD_FMT "%u.%u.%u.%u"
9864+
9865+
9866+static inline
9867+int v4_addr_match(struct nx_addr_v4 *nxa, __be32 addr, uint16_t tmask)
9868+{
9869+ __be32 ip = nxa->ip[0].s_addr;
9870+ __be32 mask = nxa->mask.s_addr;
9871+ __be32 bcast = ip | ~mask;
9872+ int ret = 0;
9873+
9874+ switch (nxa->type & tmask) {
9875+ case NXA_TYPE_MASK:
9876+ ret = (ip == (addr & mask));
9877+ break;
9878+ case NXA_TYPE_ADDR:
9879+ ret = 3;
9880+ if (addr == ip)
9881+ break;
9882+ /* fall through to broadcast */
9883+ case NXA_MOD_BCAST:
9884+ ret = ((tmask & NXA_MOD_BCAST) && (addr == bcast));
9885+ break;
9886+ case NXA_TYPE_RANGE:
9887+ ret = ((nxa->ip[0].s_addr <= addr) &&
9888+ (nxa->ip[1].s_addr > addr));
9889+ break;
9890+ case NXA_TYPE_ANY:
9891+ ret = 2;
9892+ break;
9893+ }
9894+
9895+ vxdprintk(VXD_CBIT(net, 0),
9896+ "v4_addr_match(%p" NXAV4_FMT "," NIPQUAD_FMT ",%04x) = %d",
9897+ nxa, NXAV4(nxa), NIPQUAD(addr), tmask, ret);
9898+ return ret;
9899+}
9900+
9901+static inline
9902+int v4_addr_in_nx_info(struct nx_info *nxi, __be32 addr, uint16_t tmask)
9903+{
9904+ struct nx_addr_v4 *nxa;
9905+ int ret = 1;
9906+
9907+ if (!nxi)
9908+ goto out;
9909+
9910+ ret = 2;
9911+ /* allow 127.0.0.1 when remapping lback */
9912+ if ((tmask & NXA_LOOPBACK) &&
9913+ (addr == IPI_LOOPBACK) &&
9914+ nx_info_flags(nxi, NXF_LBACK_REMAP, 0))
9915+ goto out;
9916+ ret = 3;
9917+ /* check for lback address */
9918+ if ((tmask & NXA_MOD_LBACK) &&
9919+ (nxi->v4_lback.s_addr == addr))
9920+ goto out;
9921+ ret = 4;
9922+ /* check for broadcast address */
9923+ if ((tmask & NXA_MOD_BCAST) &&
9924+ (nxi->v4_bcast.s_addr == addr))
9925+ goto out;
9926+ ret = 5;
9927+ /* check for v4 addresses */
9928+ for (nxa = &nxi->v4; nxa; nxa = nxa->next)
9929+ if (v4_addr_match(nxa, addr, tmask))
9930+ goto out;
9931+ ret = 0;
9932+out:
9933+ vxdprintk(VXD_CBIT(net, 0),
9934+ "v4_addr_in_nx_info(%p[#%u]," NIPQUAD_FMT ",%04x) = %d",
9935+ nxi, nxi ? nxi->nx_id : 0, NIPQUAD(addr), tmask, ret);
9936+ return ret;
9937+}
9938+
9939+static inline
9940+int v4_nx_addr_match(struct nx_addr_v4 *nxa, struct nx_addr_v4 *addr, uint16_t mask)
9941+{
9942+ /* FIXME: needs full range checks */
9943+ return v4_addr_match(nxa, addr->ip[0].s_addr, mask);
9944+}
9945+
9946+static inline
9947+int v4_nx_addr_in_nx_info(struct nx_info *nxi, struct nx_addr_v4 *nxa, uint16_t mask)
9948+{
9949+ struct nx_addr_v4 *ptr;
9950+
9951+ for (ptr = &nxi->v4; ptr; ptr = ptr->next)
9952+ if (v4_nx_addr_match(ptr, nxa, mask))
9953+ return 1;
9954+ return 0;
9955+}
9956+
9957+#include <net/inet_sock.h>
9958+
9959+/*
9960+ * Check if a given address matches for a socket
9961+ *
9962+ * nxi: the socket's nx_info if any
9963+ * addr: to be verified address
9964+ */
9965+static inline
9966+int v4_sock_addr_match (
9967+ struct nx_info *nxi,
9968+ struct inet_sock *inet,
9969+ __be32 addr)
9970+{
9971+ __be32 saddr = inet->inet_rcv_saddr;
9972+ __be32 bcast = nxi ? nxi->v4_bcast.s_addr : INADDR_BROADCAST;
9973+
9974+ if (addr && (saddr == addr || bcast == addr))
9975+ return 1;
9976+ if (!saddr)
9977+ return v4_addr_in_nx_info(nxi, addr, NXA_MASK_BIND);
9978+ return 0;
9979+}
9980+
9981+
9982+/* inet related checks and helpers */
9983+
9984+
9985+struct in_ifaddr;
9986+struct net_device;
9987+struct sock;
9988+
9989+#ifdef CONFIG_INET
9990+
9991+#include <linux/netdevice.h>
9992+#include <linux/inetdevice.h>
9993+#include <net/inet_sock.h>
9994+#include <net/inet_timewait_sock.h>
9995+
9996+
9997+int dev_in_nx_info(struct net_device *, struct nx_info *);
9998+int v4_dev_in_nx_info(struct net_device *, struct nx_info *);
9999+int nx_v4_addr_conflict(struct nx_info *, struct nx_info *);
10000+
10001+
10002+/*
10003+ * check if address is covered by socket
10004+ *
10005+ * sk: the socket to check against
10006+ * addr: the address in question (must be != 0)
10007+ */
10008+
10009+static inline
10010+int __v4_addr_match_socket(const struct sock *sk, struct nx_addr_v4 *nxa)
10011+{
10012+ struct nx_info *nxi = sk->sk_nx_info;
10013+ __be32 saddr = sk_rcv_saddr(sk);
10014+
10015+ vxdprintk(VXD_CBIT(net, 5),
10016+ "__v4_addr_in_socket(%p," NXAV4_FMT ") %p:" NIPQUAD_FMT " %p;%lx",
10017+ sk, NXAV4(nxa), nxi, NIPQUAD(saddr), sk->sk_socket,
10018+ (sk->sk_socket?sk->sk_socket->flags:0));
10019+
10020+ if (saddr) { /* direct address match */
10021+ return v4_addr_match(nxa, saddr, -1);
10022+ } else if (nxi) { /* match against nx_info */
10023+ return v4_nx_addr_in_nx_info(nxi, nxa, -1);
10024+ } else { /* unrestricted any socket */
10025+ return 1;
10026+ }
10027+}
10028+
10029+
10030+
10031+static inline
10032+int nx_dev_visible(struct nx_info *nxi, struct net_device *dev)
10033+{
10034+ vxdprintk(VXD_CBIT(net, 1),
10035+ "nx_dev_visible(%p[#%u],%p " VS_Q("%s") ") %d",
10036+ nxi, nxi ? nxi->nx_id : 0, dev, dev->name,
10037+ nxi ? dev_in_nx_info(dev, nxi) : 0);
10038+
10039+ if (!nx_info_flags(nxi, NXF_HIDE_NETIF, 0))
10040+ return 1;
10041+ if (dev_in_nx_info(dev, nxi))
10042+ return 1;
10043+ return 0;
10044+}
10045+
10046+
10047+static inline
10048+int v4_ifa_in_nx_info(struct in_ifaddr *ifa, struct nx_info *nxi)
10049+{
10050+ if (!nxi)
10051+ return 1;
10052+ if (!ifa)
10053+ return 0;
10054+ return v4_addr_in_nx_info(nxi, ifa->ifa_local, NXA_MASK_SHOW);
10055+}
10056+
10057+static inline
10058+int nx_v4_ifa_visible(struct nx_info *nxi, struct in_ifaddr *ifa)
10059+{
10060+ vxdprintk(VXD_CBIT(net, 1), "nx_v4_ifa_visible(%p[#%u],%p) %d",
10061+ nxi, nxi ? nxi->nx_id : 0, ifa,
10062+ nxi ? v4_ifa_in_nx_info(ifa, nxi) : 0);
10063+
10064+ if (!nx_info_flags(nxi, NXF_HIDE_NETIF, 0))
10065+ return 1;
10066+ if (v4_ifa_in_nx_info(ifa, nxi))
10067+ return 1;
10068+ return 0;
10069+}
10070+
10071+
10072+struct nx_v4_sock_addr {
10073+ __be32 saddr; /* Address used for validation */
10074+ __be32 baddr; /* Address used for socket bind */
10075+};
10076+
10077+static inline
10078+int v4_map_sock_addr(struct inet_sock *inet, struct sockaddr_in *addr,
10079+ struct nx_v4_sock_addr *nsa)
10080+{
10081+ struct sock *sk = &inet->sk;
10082+ struct nx_info *nxi = sk->sk_nx_info;
10083+ __be32 saddr = addr->sin_addr.s_addr;
10084+ __be32 baddr = saddr;
10085+
10086+ vxdprintk(VXD_CBIT(net, 3),
10087+ "inet_bind(%p)* %p,%p;%lx " NIPQUAD_FMT,
10088+ sk, sk->sk_nx_info, sk->sk_socket,
10089+ (sk->sk_socket ? sk->sk_socket->flags : 0),
10090+ NIPQUAD(saddr));
10091+
10092+ if (nxi) {
10093+ if (saddr == INADDR_ANY) {
10094+ if (nx_info_flags(nxi, NXF_SINGLE_IP, 0))
10095+ baddr = nxi->v4.ip[0].s_addr;
10096+ } else if (saddr == IPI_LOOPBACK) {
10097+ if (nx_info_flags(nxi, NXF_LBACK_REMAP, 0))
10098+ baddr = nxi->v4_lback.s_addr;
10099+ } else if (!ipv4_is_multicast(saddr) ||
10100+ !nx_info_ncaps(nxi, NXC_MULTICAST)) {
10101+ /* normal address bind */
10102+ if (!v4_addr_in_nx_info(nxi, saddr, NXA_MASK_BIND))
10103+ return -EADDRNOTAVAIL;
10104+ }
10105+ }
10106+
10107+ vxdprintk(VXD_CBIT(net, 3),
10108+ "inet_bind(%p) " NIPQUAD_FMT ", " NIPQUAD_FMT,
10109+ sk, NIPQUAD(saddr), NIPQUAD(baddr));
10110+
10111+ nsa->saddr = saddr;
10112+ nsa->baddr = baddr;
10113+ return 0;
10114+}
10115+
10116+static inline
10117+void v4_set_sock_addr(struct inet_sock *inet, struct nx_v4_sock_addr *nsa)
10118+{
10119+ inet->inet_saddr = nsa->baddr;
10120+ inet->inet_rcv_saddr = nsa->baddr;
10121+}
10122+
10123+
10124+/*
10125+ * helper to simplify inet_lookup_listener
10126+ *
10127+ * nxi: the socket's nx_info if any
10128+ * addr: to be verified address
10129+ * saddr: socket address
10130+ */
10131+static inline int v4_inet_addr_match (
10132+ struct nx_info *nxi,
10133+ __be32 addr,
10134+ __be32 saddr)
10135+{
10136+ if (addr && (saddr == addr))
10137+ return 1;
10138+ if (!saddr)
10139+ return nxi ? v4_addr_in_nx_info(nxi, addr, NXA_MASK_BIND) : 1;
10140+ return 0;
10141+}
10142+
10143+static inline __be32 nx_map_sock_lback(struct nx_info *nxi, __be32 addr)
10144+{
10145+ if (nx_info_flags(nxi, NXF_HIDE_LBACK, 0) &&
10146+ (addr == nxi->v4_lback.s_addr))
10147+ return IPI_LOOPBACK;
10148+ return addr;
10149+}
10150+
10151+static inline
10152+int nx_info_has_v4(struct nx_info *nxi)
10153+{
10154+ if (!nxi)
10155+ return 1;
10156+ if (NX_IPV4(nxi))
10157+ return 1;
10158+ if (nx_info_flags(nxi, NXF_LBACK_REMAP, 0))
10159+ return 1;
10160+ return 0;
10161+}
10162+
10163+#else /* CONFIG_INET */
10164+
10165+static inline
10166+int nx_dev_visible(struct nx_info *n, struct net_device *d)
10167+{
10168+ return 1;
10169+}
10170+
10171+static inline
10172+int nx_v4_addr_conflict(struct nx_info *n, uint32_t a, const struct sock *s)
10173+{
10174+ return 1;
10175+}
10176+
10177+static inline
10178+int v4_ifa_in_nx_info(struct in_ifaddr *a, struct nx_info *n)
10179+{
10180+ return 1;
10181+}
10182+
10183+static inline
10184+int nx_info_has_v4(struct nx_info *nxi)
10185+{
10186+ return 0;
10187+}
10188+
10189+#endif /* CONFIG_INET */
10190+
10191+#define current_nx_info_has_v4() \
10192+ nx_info_has_v4(current_nx_info())
10193+
10194+#else
10195+// #warning duplicate inclusion
10196+#endif
10197diff -NurpP --minimal linux-3.0.9/include/linux/vs_inet6.h linux-3.0.9-vs2.3.2.1/include/linux/vs_inet6.h
10198--- linux-3.0.9/include/linux/vs_inet6.h 1970-01-01 01:00:00.000000000 +0100
10199+++ linux-3.0.9-vs2.3.2.1/include/linux/vs_inet6.h 2011-06-10 22:11:24.000000000 +0200
10200@@ -0,0 +1,246 @@
10201+#ifndef _VS_INET6_H
10202+#define _VS_INET6_H
10203+
10204+#include "vserver/base.h"
10205+#include "vserver/network.h"
10206+#include "vserver/debug.h"
10207+
10208+#include <net/ipv6.h>
10209+
10210+#define NXAV6(a) &(a)->ip, &(a)->mask, (a)->prefix, (a)->type
10211+#define NXAV6_FMT "[%pI6/%pI6/%d:%04x]"
10212+
10213+
10214+#ifdef CONFIG_IPV6
10215+
10216+static inline
10217+int v6_addr_match(struct nx_addr_v6 *nxa,
10218+ const struct in6_addr *addr, uint16_t mask)
10219+{
10220+ int ret = 0;
10221+
10222+ switch (nxa->type & mask) {
10223+ case NXA_TYPE_MASK:
10224+ ret = ipv6_masked_addr_cmp(&nxa->ip, &nxa->mask, addr);
10225+ break;
10226+ case NXA_TYPE_ADDR:
10227+ ret = ipv6_addr_equal(&nxa->ip, addr);
10228+ break;
10229+ case NXA_TYPE_ANY:
10230+ ret = 1;
10231+ break;
10232+ }
10233+ vxdprintk(VXD_CBIT(net, 0),
10234+ "v6_addr_match(%p" NXAV6_FMT ",%pI6,%04x) = %d",
10235+ nxa, NXAV6(nxa), addr, mask, ret);
10236+ return ret;
10237+}
10238+
10239+static inline
10240+int v6_addr_in_nx_info(struct nx_info *nxi,
10241+ const struct in6_addr *addr, uint16_t mask)
10242+{
10243+ struct nx_addr_v6 *nxa;
10244+ int ret = 1;
10245+
10246+ if (!nxi)
10247+ goto out;
10248+ for (nxa = &nxi->v6; nxa; nxa = nxa->next)
10249+ if (v6_addr_match(nxa, addr, mask))
10250+ goto out;
10251+ ret = 0;
10252+out:
10253+ vxdprintk(VXD_CBIT(net, 0),
10254+ "v6_addr_in_nx_info(%p[#%u],%pI6,%04x) = %d",
10255+ nxi, nxi ? nxi->nx_id : 0, addr, mask, ret);
10256+ return ret;
10257+}
10258+
10259+static inline
10260+int v6_nx_addr_match(struct nx_addr_v6 *nxa, struct nx_addr_v6 *addr, uint16_t mask)
10261+{
10262+ /* FIXME: needs full range checks */
10263+ return v6_addr_match(nxa, &addr->ip, mask);
10264+}
10265+
10266+static inline
10267+int v6_nx_addr_in_nx_info(struct nx_info *nxi, struct nx_addr_v6 *nxa, uint16_t mask)
10268+{
10269+ struct nx_addr_v6 *ptr;
10270+
10271+ for (ptr = &nxi->v6; ptr; ptr = ptr->next)
10272+ if (v6_nx_addr_match(ptr, nxa, mask))
10273+ return 1;
10274+ return 0;
10275+}
10276+
10277+
10278+/*
10279+ * Check if a given address matches for a socket
10280+ *
10281+ * nxi: the socket's nx_info if any
10282+ * addr: to be verified address
10283+ */
10284+static inline
10285+int v6_sock_addr_match (
10286+ struct nx_info *nxi,
10287+ struct inet_sock *inet,
10288+ struct in6_addr *addr)
10289+{
10290+ struct sock *sk = &inet->sk;
10291+ struct in6_addr *saddr = inet6_rcv_saddr(sk);
10292+
10293+ if (!ipv6_addr_any(addr) &&
10294+ ipv6_addr_equal(saddr, addr))
10295+ return 1;
10296+ if (ipv6_addr_any(saddr))
10297+ return v6_addr_in_nx_info(nxi, addr, -1);
10298+ return 0;
10299+}
10300+
10301+/*
10302+ * check if address is covered by socket
10303+ *
10304+ * sk: the socket to check against
10305+ * addr: the address in question (must be != 0)
10306+ */
10307+
10308+static inline
10309+int __v6_addr_match_socket(const struct sock *sk, struct nx_addr_v6 *nxa)
10310+{
10311+ struct nx_info *nxi = sk->sk_nx_info;
10312+ struct in6_addr *saddr = inet6_rcv_saddr(sk);
10313+
10314+ vxdprintk(VXD_CBIT(net, 5),
10315+ "__v6_addr_in_socket(%p," NXAV6_FMT ") %p:%pI6 %p;%lx",
10316+ sk, NXAV6(nxa), nxi, saddr, sk->sk_socket,
10317+ (sk->sk_socket?sk->sk_socket->flags:0));
10318+
10319+ if (!ipv6_addr_any(saddr)) { /* direct address match */
10320+ return v6_addr_match(nxa, saddr, -1);
10321+ } else if (nxi) { /* match against nx_info */
10322+ return v6_nx_addr_in_nx_info(nxi, nxa, -1);
10323+ } else { /* unrestricted any socket */
10324+ return 1;
10325+ }
10326+}
10327+
10328+
10329+/* inet related checks and helpers */
10330+
10331+
10332+struct in_ifaddr;
10333+struct net_device;
10334+struct sock;
10335+
10336+
10337+#include <linux/netdevice.h>
10338+#include <linux/inetdevice.h>
10339+#include <net/inet_timewait_sock.h>
10340+
10341+
10342+int dev_in_nx_info(struct net_device *, struct nx_info *);
10343+int v6_dev_in_nx_info(struct net_device *, struct nx_info *);
10344+int nx_v6_addr_conflict(struct nx_info *, struct nx_info *);
10345+
10346+
10347+
10348+static inline
10349+int v6_ifa_in_nx_info(struct inet6_ifaddr *ifa, struct nx_info *nxi)
10350+{
10351+ if (!nxi)
10352+ return 1;
10353+ if (!ifa)
10354+ return 0;
10355+ return v6_addr_in_nx_info(nxi, &ifa->addr, -1);
10356+}
10357+
10358+static inline
10359+int nx_v6_ifa_visible(struct nx_info *nxi, struct inet6_ifaddr *ifa)
10360+{
10361+ vxdprintk(VXD_CBIT(net, 1), "nx_v6_ifa_visible(%p[#%u],%p) %d",
10362+ nxi, nxi ? nxi->nx_id : 0, ifa,
10363+ nxi ? v6_ifa_in_nx_info(ifa, nxi) : 0);
10364+
10365+ if (!nx_info_flags(nxi, NXF_HIDE_NETIF, 0))
10366+ return 1;
10367+ if (v6_ifa_in_nx_info(ifa, nxi))
10368+ return 1;
10369+ return 0;
10370+}
10371+
10372+
10373+struct nx_v6_sock_addr {
10374+ struct in6_addr saddr; /* Address used for validation */
10375+ struct in6_addr baddr; /* Address used for socket bind */
10376+};
10377+
10378+static inline
10379+int v6_map_sock_addr(struct inet_sock *inet, struct sockaddr_in6 *addr,
10380+ struct nx_v6_sock_addr *nsa)
10381+{
10382+ // struct sock *sk = &inet->sk;
10383+ // struct nx_info *nxi = sk->sk_nx_info;
10384+ struct in6_addr saddr = addr->sin6_addr;
10385+ struct in6_addr baddr = saddr;
10386+
10387+ nsa->saddr = saddr;
10388+ nsa->baddr = baddr;
10389+ return 0;
10390+}
10391+
10392+static inline
10393+void v6_set_sock_addr(struct inet_sock *inet, struct nx_v6_sock_addr *nsa)
10394+{
10395+ // struct sock *sk = &inet->sk;
10396+ // struct in6_addr *saddr = inet6_rcv_saddr(sk);
10397+
10398+ // *saddr = nsa->baddr;
10399+ // inet->inet_saddr = nsa->baddr;
10400+}
10401+
10402+static inline
10403+int nx_info_has_v6(struct nx_info *nxi)
10404+{
10405+ if (!nxi)
10406+ return 1;
10407+ if (NX_IPV6(nxi))
10408+ return 1;
10409+ return 0;
10410+}
10411+
10412+#else /* CONFIG_IPV6 */
10413+
10414+static inline
10415+int nx_v6_dev_visible(struct nx_info *n, struct net_device *d)
10416+{
10417+ return 1;
10418+}
10419+
10420+
10421+static inline
10422+int nx_v6_addr_conflict(struct nx_info *n, uint32_t a, const struct sock *s)
10423+{
10424+ return 1;
10425+}
10426+
10427+static inline
10428+int v6_ifa_in_nx_info(struct in_ifaddr *a, struct nx_info *n)
10429+{
10430+ return 1;
10431+}
10432+
10433+static inline
10434+int nx_info_has_v6(struct nx_info *nxi)
10435+{
10436+ return 0;
10437+}
10438+
10439+#endif /* CONFIG_IPV6 */
10440+
10441+#define current_nx_info_has_v6() \
10442+ nx_info_has_v6(current_nx_info())
10443+
10444+#else
10445+#warning duplicate inclusion
10446+#endif
10447diff -NurpP --minimal linux-3.0.9/include/linux/vs_limit.h linux-3.0.9-vs2.3.2.1/include/linux/vs_limit.h
10448--- linux-3.0.9/include/linux/vs_limit.h 1970-01-01 01:00:00.000000000 +0100
10449+++ linux-3.0.9-vs2.3.2.1/include/linux/vs_limit.h 2011-06-10 22:11:24.000000000 +0200
10450@@ -0,0 +1,140 @@
10451+#ifndef _VS_LIMIT_H
10452+#define _VS_LIMIT_H
10453+
10454+#include "vserver/limit.h"
10455+#include "vserver/base.h"
10456+#include "vserver/context.h"
10457+#include "vserver/debug.h"
10458+#include "vserver/context.h"
10459+#include "vserver/limit_int.h"
10460+
10461+
10462+#define vx_acc_cres(v, d, p, r) \
10463+ __vx_acc_cres(v, r, d, p, __FILE__, __LINE__)
10464+
10465+#define vx_acc_cres_cond(x, d, p, r) \
10466+ __vx_acc_cres(((x) == vx_current_xid()) ? current_vx_info() : 0, \
10467+ r, d, p, __FILE__, __LINE__)
10468+
10469+
10470+#define vx_add_cres(v, a, p, r) \
10471+ __vx_add_cres(v, r, a, p, __FILE__, __LINE__)
10472+#define vx_sub_cres(v, a, p, r) vx_add_cres(v, -(a), p, r)
10473+
10474+#define vx_add_cres_cond(x, a, p, r) \
10475+ __vx_add_cres(((x) == vx_current_xid()) ? current_vx_info() : 0, \
10476+ r, a, p, __FILE__, __LINE__)
10477+#define vx_sub_cres_cond(x, a, p, r) vx_add_cres_cond(x, -(a), p, r)
10478+
10479+
10480+/* process and file limits */
10481+
10482+#define vx_nproc_inc(p) \
10483+ vx_acc_cres((p)->vx_info, 1, p, RLIMIT_NPROC)
10484+
10485+#define vx_nproc_dec(p) \
10486+ vx_acc_cres((p)->vx_info,-1, p, RLIMIT_NPROC)
10487+
10488+#define vx_files_inc(f) \
10489+ vx_acc_cres_cond((f)->f_xid, 1, f, RLIMIT_NOFILE)
10490+
10491+#define vx_files_dec(f) \
10492+ vx_acc_cres_cond((f)->f_xid,-1, f, RLIMIT_NOFILE)
10493+
10494+#define vx_locks_inc(l) \
10495+ vx_acc_cres_cond((l)->fl_xid, 1, l, RLIMIT_LOCKS)
10496+
10497+#define vx_locks_dec(l) \
10498+ vx_acc_cres_cond((l)->fl_xid,-1, l, RLIMIT_LOCKS)
10499+
10500+#define vx_openfd_inc(f) \
10501+ vx_acc_cres(current_vx_info(), 1, (void *)(long)(f), VLIMIT_OPENFD)
10502+
10503+#define vx_openfd_dec(f) \
10504+ vx_acc_cres(current_vx_info(),-1, (void *)(long)(f), VLIMIT_OPENFD)
10505+
10506+
10507+#define vx_cres_avail(v, n, r) \
10508+ __vx_cres_avail(v, r, n, __FILE__, __LINE__)
10509+
10510+
10511+#define vx_nproc_avail(n) \
10512+ vx_cres_avail(current_vx_info(), n, RLIMIT_NPROC)
10513+
10514+#define vx_files_avail(n) \
10515+ vx_cres_avail(current_vx_info(), n, RLIMIT_NOFILE)
10516+
10517+#define vx_locks_avail(n) \
10518+ vx_cres_avail(current_vx_info(), n, RLIMIT_LOCKS)
10519+
10520+#define vx_openfd_avail(n) \
10521+ vx_cres_avail(current_vx_info(), n, VLIMIT_OPENFD)
10522+
10523+
10524+/* dentry limits */
10525+
10526+#define vx_dentry_inc(d) do { \
10527+ if ((d)->d_count == 1) \
10528+ vx_acc_cres(current_vx_info(), 1, d, VLIMIT_DENTRY); \
10529+ } while (0)
10530+
10531+#define vx_dentry_dec(d) do { \
10532+ if ((d)->d_count == 0) \
10533+ vx_acc_cres(current_vx_info(),-1, d, VLIMIT_DENTRY); \
10534+ } while (0)
10535+
10536+#define vx_dentry_avail(n) \
10537+ vx_cres_avail(current_vx_info(), n, VLIMIT_DENTRY)
10538+
10539+
10540+/* socket limits */
10541+
10542+#define vx_sock_inc(s) \
10543+ vx_acc_cres((s)->sk_vx_info, 1, s, VLIMIT_NSOCK)
10544+
10545+#define vx_sock_dec(s) \
10546+ vx_acc_cres((s)->sk_vx_info,-1, s, VLIMIT_NSOCK)
10547+
10548+#define vx_sock_avail(n) \
10549+ vx_cres_avail(current_vx_info(), n, VLIMIT_NSOCK)
10550+
10551+
10552+/* ipc resource limits */
10553+
10554+#define vx_ipcmsg_add(v, u, a) \
10555+ vx_add_cres(v, a, u, RLIMIT_MSGQUEUE)
10556+
10557+#define vx_ipcmsg_sub(v, u, a) \
10558+ vx_sub_cres(v, a, u, RLIMIT_MSGQUEUE)
10559+
10560+#define vx_ipcmsg_avail(v, a) \
10561+ vx_cres_avail(v, a, RLIMIT_MSGQUEUE)
10562+
10563+
10564+#define vx_ipcshm_add(v, k, a) \
10565+ vx_add_cres(v, a, (void *)(long)(k), VLIMIT_SHMEM)
10566+
10567+#define vx_ipcshm_sub(v, k, a) \
10568+ vx_sub_cres(v, a, (void *)(long)(k), VLIMIT_SHMEM)
10569+
10570+#define vx_ipcshm_avail(v, a) \
10571+ vx_cres_avail(v, a, VLIMIT_SHMEM)
10572+
10573+
10574+#define vx_semary_inc(a) \
10575+ vx_acc_cres(current_vx_info(), 1, a, VLIMIT_SEMARY)
10576+
10577+#define vx_semary_dec(a) \
10578+ vx_acc_cres(current_vx_info(), -1, a, VLIMIT_SEMARY)
10579+
10580+
10581+#define vx_nsems_add(a,n) \
10582+ vx_add_cres(current_vx_info(), n, a, VLIMIT_NSEMS)
10583+
10584+#define vx_nsems_sub(a,n) \
10585+ vx_sub_cres(current_vx_info(), n, a, VLIMIT_NSEMS)
10586+
10587+
10588+#else
10589+#warning duplicate inclusion
10590+#endif
10591diff -NurpP --minimal linux-3.0.9/include/linux/vs_memory.h linux-3.0.9-vs2.3.2.1/include/linux/vs_memory.h
10592--- linux-3.0.9/include/linux/vs_memory.h 1970-01-01 01:00:00.000000000 +0100
10593+++ linux-3.0.9-vs2.3.2.1/include/linux/vs_memory.h 2011-06-10 22:11:24.000000000 +0200
10594@@ -0,0 +1,58 @@
10595+#ifndef _VS_MEMORY_H
10596+#define _VS_MEMORY_H
10597+
10598+#include "vserver/limit.h"
10599+#include "vserver/base.h"
10600+#include "vserver/context.h"
10601+#include "vserver/debug.h"
10602+#include "vserver/context.h"
10603+#include "vserver/limit_int.h"
10604+
10605+enum {
10606+ VXPT_UNKNOWN = 0,
10607+ VXPT_ANON,
10608+ VXPT_NONE,
10609+ VXPT_FILE,
10610+ VXPT_SWAP,
10611+ VXPT_WRITE
10612+};
10613+
10614+#if 0
10615+#define vx_page_fault(mm, vma, type, ret)
10616+#else
10617+
10618+static inline
10619+void __vx_page_fault(struct mm_struct *mm,
10620+ struct vm_area_struct *vma, int type, int ret)
10621+{
10622+ struct vx_info *vxi = mm->mm_vx_info;
10623+ int what;
10624+/*
10625+ static char *page_type[6] =
10626+ { "UNKNOWN", "ANON", "NONE", "FILE", "SWAP", "WRITE" };
10627+ static char *page_what[4] =
10628+ { "FAULT_OOM", "FAULT_SIGBUS", "FAULT_MINOR", "FAULT_MAJOR" };
10629+*/
10630+
10631+ if (!vxi)
10632+ return;
10633+
10634+ what = (ret & 0x3);
10635+
10636+/* printk("[%d] page[%d][%d] %2x %s %s\n", vxi->vx_id,
10637+ type, what, ret, page_type[type], page_what[what]);
10638+*/
10639+ if (ret & VM_FAULT_WRITE)
10640+ what |= 0x4;
10641+ atomic_inc(&vxi->cacct.page[type][what]);
10642+}
10643+
10644+#define vx_page_fault(mm, vma, type, ret) __vx_page_fault(mm, vma, type, ret)
10645+#endif
10646+
10647+
10648+extern unsigned long vx_badness(struct task_struct *task, struct mm_struct *mm);
10649+
10650+#else
10651+#warning duplicate inclusion
10652+#endif
10653diff -NurpP --minimal linux-3.0.9/include/linux/vs_network.h linux-3.0.9-vs2.3.2.1/include/linux/vs_network.h
10654--- linux-3.0.9/include/linux/vs_network.h 1970-01-01 01:00:00.000000000 +0100
10655+++ linux-3.0.9-vs2.3.2.1/include/linux/vs_network.h 2011-06-10 22:11:24.000000000 +0200
10656@@ -0,0 +1,169 @@
10657+#ifndef _NX_VS_NETWORK_H
10658+#define _NX_VS_NETWORK_H
10659+
10660+#include "vserver/context.h"
10661+#include "vserver/network.h"
10662+#include "vserver/base.h"
10663+#include "vserver/check.h"
10664+#include "vserver/debug.h"
10665+
10666+#include <linux/sched.h>
10667+
10668+
10669+#define get_nx_info(i) __get_nx_info(i, __FILE__, __LINE__)
10670+
10671+static inline struct nx_info *__get_nx_info(struct nx_info *nxi,
10672+ const char *_file, int _line)
10673+{
10674+ if (!nxi)
10675+ return NULL;
10676+
10677+ vxlprintk(VXD_CBIT(nid, 2), "get_nx_info(%p[#%d.%d])",
10678+ nxi, nxi ? nxi->nx_id : 0,
10679+ nxi ? atomic_read(&nxi->nx_usecnt) : 0,
10680+ _file, _line);
10681+
10682+ atomic_inc(&nxi->nx_usecnt);
10683+ return nxi;
10684+}
10685+
10686+
10687+extern void free_nx_info(struct nx_info *);
10688+
10689+#define put_nx_info(i) __put_nx_info(i, __FILE__, __LINE__)
10690+
10691+static inline void __put_nx_info(struct nx_info *nxi, const char *_file, int _line)
10692+{
10693+ if (!nxi)
10694+ return;
10695+
10696+ vxlprintk(VXD_CBIT(nid, 2), "put_nx_info(%p[#%d.%d])",
10697+ nxi, nxi ? nxi->nx_id : 0,
10698+ nxi ? atomic_read(&nxi->nx_usecnt) : 0,
10699+ _file, _line);
10700+
10701+ if (atomic_dec_and_test(&nxi->nx_usecnt))
10702+ free_nx_info(nxi);
10703+}
10704+
10705+
10706+#define init_nx_info(p, i) __init_nx_info(p, i, __FILE__, __LINE__)
10707+
10708+static inline void __init_nx_info(struct nx_info **nxp, struct nx_info *nxi,
10709+ const char *_file, int _line)
10710+{
10711+ if (nxi) {
10712+ vxlprintk(VXD_CBIT(nid, 3),
10713+ "init_nx_info(%p[#%d.%d])",
10714+ nxi, nxi ? nxi->nx_id : 0,
10715+ nxi ? atomic_read(&nxi->nx_usecnt) : 0,
10716+ _file, _line);
10717+
10718+ atomic_inc(&nxi->nx_usecnt);
10719+ }
10720+ *nxp = nxi;
10721+}
10722+
10723+
10724+#define set_nx_info(p, i) __set_nx_info(p, i, __FILE__, __LINE__)
10725+
10726+static inline void __set_nx_info(struct nx_info **nxp, struct nx_info *nxi,
10727+ const char *_file, int _line)
10728+{
10729+ struct nx_info *nxo;
10730+
10731+ if (!nxi)
10732+ return;
10733+
10734+ vxlprintk(VXD_CBIT(nid, 3), "set_nx_info(%p[#%d.%d])",
10735+ nxi, nxi ? nxi->nx_id : 0,
10736+ nxi ? atomic_read(&nxi->nx_usecnt) : 0,
10737+ _file, _line);
10738+
10739+ atomic_inc(&nxi->nx_usecnt);
10740+ nxo = xchg(nxp, nxi);
10741+ BUG_ON(nxo);
10742+}
10743+
10744+#define clr_nx_info(p) __clr_nx_info(p, __FILE__, __LINE__)
10745+
10746+static inline void __clr_nx_info(struct nx_info **nxp,
10747+ const char *_file, int _line)
10748+{
10749+ struct nx_info *nxo;
10750+
10751+ nxo = xchg(nxp, NULL);
10752+ if (!nxo)
10753+ return;
10754+
10755+ vxlprintk(VXD_CBIT(nid, 3), "clr_nx_info(%p[#%d.%d])",
10756+ nxo, nxo ? nxo->nx_id : 0,
10757+ nxo ? atomic_read(&nxo->nx_usecnt) : 0,
10758+ _file, _line);
10759+
10760+ if (atomic_dec_and_test(&nxo->nx_usecnt))
10761+ free_nx_info(nxo);
10762+}
10763+
10764+
10765+#define claim_nx_info(v, p) __claim_nx_info(v, p, __FILE__, __LINE__)
10766+
10767+static inline void __claim_nx_info(struct nx_info *nxi,
10768+ struct task_struct *task, const char *_file, int _line)
10769+{
10770+ vxlprintk(VXD_CBIT(nid, 3), "claim_nx_info(%p[#%d.%d.%d]) %p",
10771+ nxi, nxi ? nxi->nx_id : 0,
10772+ nxi?atomic_read(&nxi->nx_usecnt):0,
10773+ nxi?atomic_read(&nxi->nx_tasks):0,
10774+ task, _file, _line);
10775+
10776+ atomic_inc(&nxi->nx_tasks);
10777+}
10778+
10779+
10780+extern void unhash_nx_info(struct nx_info *);
10781+
10782+#define release_nx_info(v, p) __release_nx_info(v, p, __FILE__, __LINE__)
10783+
10784+static inline void __release_nx_info(struct nx_info *nxi,
10785+ struct task_struct *task, const char *_file, int _line)
10786+{
10787+ vxlprintk(VXD_CBIT(nid, 3), "release_nx_info(%p[#%d.%d.%d]) %p",
10788+ nxi, nxi ? nxi->nx_id : 0,
10789+ nxi ? atomic_read(&nxi->nx_usecnt) : 0,
10790+ nxi ? atomic_read(&nxi->nx_tasks) : 0,
10791+ task, _file, _line);
10792+
10793+ might_sleep();
10794+
10795+ if (atomic_dec_and_test(&nxi->nx_tasks))
10796+ unhash_nx_info(nxi);
10797+}
10798+
10799+
10800+#define task_get_nx_info(i) __task_get_nx_info(i, __FILE__, __LINE__)
10801+
10802+static __inline__ struct nx_info *__task_get_nx_info(struct task_struct *p,
10803+ const char *_file, int _line)
10804+{
10805+ struct nx_info *nxi;
10806+
10807+ task_lock(p);
10808+ vxlprintk(VXD_CBIT(nid, 5), "task_get_nx_info(%p)",
10809+ p, _file, _line);
10810+ nxi = __get_nx_info(p->nx_info, _file, _line);
10811+ task_unlock(p);
10812+ return nxi;
10813+}
10814+
10815+
10816+static inline void exit_nx_info(struct task_struct *p)
10817+{
10818+ if (p->nx_info)
10819+ release_nx_info(p->nx_info, p);
10820+}
10821+
10822+
10823+#else
10824+#warning duplicate inclusion
10825+#endif
10826diff -NurpP --minimal linux-3.0.9/include/linux/vs_pid.h linux-3.0.9-vs2.3.2.1/include/linux/vs_pid.h
10827--- linux-3.0.9/include/linux/vs_pid.h 1970-01-01 01:00:00.000000000 +0100
10828+++ linux-3.0.9-vs2.3.2.1/include/linux/vs_pid.h 2011-06-10 22:11:24.000000000 +0200
10829@@ -0,0 +1,50 @@
10830+#ifndef _VS_PID_H
10831+#define _VS_PID_H
10832+
10833+#include "vserver/base.h"
10834+#include "vserver/check.h"
10835+#include "vserver/context.h"
10836+#include "vserver/debug.h"
10837+#include "vserver/pid.h"
10838+#include <linux/pid_namespace.h>
10839+
10840+
10841+#define VXF_FAKE_INIT (VXF_INFO_INIT | VXF_STATE_INIT)
10842+
10843+static inline
10844+int vx_proc_task_visible(struct task_struct *task)
10845+{
10846+ if ((task->pid == 1) &&
10847+ !vx_flags(VXF_FAKE_INIT, VXF_FAKE_INIT))
10848+ /* show a blend through init */
10849+ goto visible;
10850+ if (vx_check(vx_task_xid(task), VS_WATCH | VS_IDENT))
10851+ goto visible;
10852+ return 0;
10853+visible:
10854+ return 1;
10855+}
10856+
10857+#define find_task_by_real_pid(pid) find_task_by_pid_ns(pid, &init_pid_ns)
10858+
10859+
10860+static inline
10861+struct task_struct *vx_get_proc_task(struct inode *inode, struct pid *pid)
10862+{
10863+ struct task_struct *task = get_pid_task(pid, PIDTYPE_PID);
10864+
10865+ if (task && !vx_proc_task_visible(task)) {
10866+ vxdprintk(VXD_CBIT(misc, 6),
10867+ "dropping task (get) %p[#%u,%u] for %p[#%u,%u]",
10868+ task, task->xid, task->pid,
10869+ current, current->xid, current->pid);
10870+ put_task_struct(task);
10871+ task = NULL;
10872+ }
10873+ return task;
10874+}
10875+
10876+
10877+#else
10878+#warning duplicate inclusion
10879+#endif
10880diff -NurpP --minimal linux-3.0.9/include/linux/vs_sched.h linux-3.0.9-vs2.3.2.1/include/linux/vs_sched.h
10881--- linux-3.0.9/include/linux/vs_sched.h 1970-01-01 01:00:00.000000000 +0100
10882+++ linux-3.0.9-vs2.3.2.1/include/linux/vs_sched.h 2011-06-10 22:11:24.000000000 +0200
10883@@ -0,0 +1,40 @@
10884+#ifndef _VS_SCHED_H
10885+#define _VS_SCHED_H
10886+
10887+#include "vserver/base.h"
10888+#include "vserver/context.h"
10889+#include "vserver/sched.h"
10890+
10891+
10892+#define MAX_PRIO_BIAS 20
10893+#define MIN_PRIO_BIAS -20
10894+
10895+static inline
10896+int vx_adjust_prio(struct task_struct *p, int prio, int max_user)
10897+{
10898+ struct vx_info *vxi = p->vx_info;
10899+
10900+ if (vxi)
10901+ prio += vx_cpu(vxi, sched_pc).prio_bias;
10902+ return prio;
10903+}
10904+
10905+static inline void vx_account_user(struct vx_info *vxi,
10906+ cputime_t cputime, int nice)
10907+{
10908+ if (!vxi)
10909+ return;
10910+ vx_cpu(vxi, sched_pc).user_ticks += cputime;
10911+}
10912+
10913+static inline void vx_account_system(struct vx_info *vxi,
10914+ cputime_t cputime, int idle)
10915+{
10916+ if (!vxi)
10917+ return;
10918+ vx_cpu(vxi, sched_pc).sys_ticks += cputime;
10919+}
10920+
10921+#else
10922+#warning duplicate inclusion
10923+#endif
10924diff -NurpP --minimal linux-3.0.9/include/linux/vs_socket.h linux-3.0.9-vs2.3.2.1/include/linux/vs_socket.h
10925--- linux-3.0.9/include/linux/vs_socket.h 1970-01-01 01:00:00.000000000 +0100
10926+++ linux-3.0.9-vs2.3.2.1/include/linux/vs_socket.h 2011-06-10 22:11:24.000000000 +0200
10927@@ -0,0 +1,67 @@
10928+#ifndef _VS_SOCKET_H
10929+#define _VS_SOCKET_H
10930+
10931+#include "vserver/debug.h"
10932+#include "vserver/base.h"
10933+#include "vserver/cacct.h"
10934+#include "vserver/context.h"
10935+#include "vserver/tag.h"
10936+
10937+
10938+/* socket accounting */
10939+
10940+#include <linux/socket.h>
10941+
10942+static inline int vx_sock_type(int family)
10943+{
10944+ switch (family) {
10945+ case PF_UNSPEC:
10946+ return VXA_SOCK_UNSPEC;
10947+ case PF_UNIX:
10948+ return VXA_SOCK_UNIX;
10949+ case PF_INET:
10950+ return VXA_SOCK_INET;
10951+ case PF_INET6:
10952+ return VXA_SOCK_INET6;
10953+ case PF_PACKET:
10954+ return VXA_SOCK_PACKET;
10955+ default:
10956+ return VXA_SOCK_OTHER;
10957+ }
10958+}
10959+
10960+#define vx_acc_sock(v, f, p, s) \
10961+ __vx_acc_sock(v, f, p, s, __FILE__, __LINE__)
10962+
10963+static inline void __vx_acc_sock(struct vx_info *vxi,
10964+ int family, int pos, int size, char *file, int line)
10965+{
10966+ if (vxi) {
10967+ int type = vx_sock_type(family);
10968+
10969+ atomic_long_inc(&vxi->cacct.sock[type][pos].count);
10970+ atomic_long_add(size, &vxi->cacct.sock[type][pos].total);
10971+ }
10972+}
10973+
10974+#define vx_sock_recv(sk, s) \
10975+ vx_acc_sock((sk)->sk_vx_info, (sk)->sk_family, 0, s)
10976+#define vx_sock_send(sk, s) \
10977+ vx_acc_sock((sk)->sk_vx_info, (sk)->sk_family, 1, s)
10978+#define vx_sock_fail(sk, s) \
10979+ vx_acc_sock((sk)->sk_vx_info, (sk)->sk_family, 2, s)
10980+
10981+
10982+#define sock_vx_init(s) do { \
10983+ (s)->sk_xid = 0; \
10984+ (s)->sk_vx_info = NULL; \
10985+ } while (0)
10986+
10987+#define sock_nx_init(s) do { \
10988+ (s)->sk_nid = 0; \
10989+ (s)->sk_nx_info = NULL; \
10990+ } while (0)
10991+
10992+#else
10993+#warning duplicate inclusion
10994+#endif
10995diff -NurpP --minimal linux-3.0.9/include/linux/vs_tag.h linux-3.0.9-vs2.3.2.1/include/linux/vs_tag.h
10996--- linux-3.0.9/include/linux/vs_tag.h 1970-01-01 01:00:00.000000000 +0100
10997+++ linux-3.0.9-vs2.3.2.1/include/linux/vs_tag.h 2011-06-10 22:11:24.000000000 +0200
10998@@ -0,0 +1,47 @@
10999+#ifndef _VS_TAG_H
11000+#define _VS_TAG_H
11001+
11002+#include <linux/vserver/tag.h>
11003+
11004+/* check conditions */
11005+
11006+#define DX_ADMIN 0x0001
11007+#define DX_WATCH 0x0002
11008+#define DX_HOSTID 0x0008
11009+
11010+#define DX_IDENT 0x0010
11011+
11012+#define DX_ARG_MASK 0x0010
11013+
11014+
11015+#define dx_task_tag(t) ((t)->tag)
11016+
11017+#define dx_current_tag() dx_task_tag(current)
11018+
11019+#define dx_check(c, m) __dx_check(dx_current_tag(), c, m)
11020+
11021+#define dx_weak_check(c, m) ((m) ? dx_check(c, m) : 1)
11022+
11023+
11024+/*
11025+ * check current context for ADMIN/WATCH and
11026+ * optionally against supplied argument
11027+ */
11028+static inline int __dx_check(tag_t cid, tag_t id, unsigned int mode)
11029+{
11030+ if (mode & DX_ARG_MASK) {
11031+ if ((mode & DX_IDENT) && (id == cid))
11032+ return 1;
11033+ }
11034+ return (((mode & DX_ADMIN) && (cid == 0)) ||
11035+ ((mode & DX_WATCH) && (cid == 1)) ||
11036+ ((mode & DX_HOSTID) && (id == 0)));
11037+}
11038+
11039+struct inode;
11040+int dx_permission(const struct inode *inode, int mask);
11041+
11042+
11043+#else
11044+#warning duplicate inclusion
11045+#endif
11046diff -NurpP --minimal linux-3.0.9/include/linux/vs_time.h linux-3.0.9-vs2.3.2.1/include/linux/vs_time.h
11047--- linux-3.0.9/include/linux/vs_time.h 1970-01-01 01:00:00.000000000 +0100
11048+++ linux-3.0.9-vs2.3.2.1/include/linux/vs_time.h 2011-06-13 14:57:45.000000000 +0200
11049@@ -0,0 +1,19 @@
11050+#ifndef _VS_TIME_H
11051+#define _VS_TIME_H
11052+
11053+
11054+/* time faking stuff */
11055+
11056+#ifdef CONFIG_VSERVER_VTIME
11057+
11058+extern void vx_adjust_timespec(struct timespec *ts);
11059+extern int vx_settimeofday(const struct timespec *ts);
11060+
11061+#else
11062+#define vx_adjust_timespec(t) do { } while (0)
11063+#define vx_settimeofday(t) do_settimeofday(t)
11064+#endif
11065+
11066+#else
11067+#warning duplicate inclusion
11068+#endif
11069diff -NurpP --minimal linux-3.0.9/include/linux/vserver/Kbuild linux-3.0.9-vs2.3.2.1/include/linux/vserver/Kbuild
11070--- linux-3.0.9/include/linux/vserver/Kbuild 1970-01-01 01:00:00.000000000 +0100
11071+++ linux-3.0.9-vs2.3.2.1/include/linux/vserver/Kbuild 2011-08-08 18:02:46.000000000 +0200
11072@@ -0,0 +1,8 @@
11073+
11074+header-y += context_cmd.h network_cmd.h space_cmd.h \
11075+ cacct_cmd.h cvirt_cmd.h limit_cmd.h dlimit_cmd.h \
11076+ inode_cmd.h tag_cmd.h sched_cmd.h signal_cmd.h \
11077+ debug_cmd.h device_cmd.h
11078+
11079+header-y += switch.h network.h monitor.h inode.h device.h
11080+
11081diff -NurpP --minimal linux-3.0.9/include/linux/vserver/base.h linux-3.0.9-vs2.3.2.1/include/linux/vserver/base.h
11082--- linux-3.0.9/include/linux/vserver/base.h 1970-01-01 01:00:00.000000000 +0100
11083+++ linux-3.0.9-vs2.3.2.1/include/linux/vserver/base.h 2011-08-01 18:26:07.000000000 +0200
11084@@ -0,0 +1,178 @@
11085+#ifndef _VX_BASE_H
11086+#define _VX_BASE_H
11087+
11088+
11089+/* context state changes */
11090+
11091+enum {
11092+ VSC_STARTUP = 1,
11093+ VSC_SHUTDOWN,
11094+
11095+ VSC_NETUP,
11096+ VSC_NETDOWN,
11097+};
11098+
11099+
11100+
11101+#define vx_task_xid(t) ((t)->xid)
11102+
11103+#define vx_current_xid() vx_task_xid(current)
11104+
11105+#define current_vx_info() (current->vx_info)
11106+
11107+
11108+#define nx_task_nid(t) ((t)->nid)
11109+
11110+#define nx_current_nid() nx_task_nid(current)
11111+
11112+#define current_nx_info() (current->nx_info)
11113+
11114+
11115+/* generic flag merging */
11116+
11117+#define vs_check_flags(v, m, f) (((v) & (m)) ^ (f))
11118+
11119+#define vs_mask_flags(v, f, m) (((v) & ~(m)) | ((f) & (m)))
11120+
11121+#define vs_mask_mask(v, f, m) (((v) & ~(m)) | ((v) & (f) & (m)))
11122+
11123+#define vs_check_bit(v, n) ((v) & (1LL << (n)))
11124+
11125+
11126+/* context flags */
11127+
11128+#define __vx_flags(v) ((v) ? (v)->vx_flags : 0)
11129+
11130+#define vx_current_flags() __vx_flags(current_vx_info())
11131+
11132+#define vx_info_flags(v, m, f) \
11133+ vs_check_flags(__vx_flags(v), m, f)
11134+
11135+#define task_vx_flags(t, m, f) \
11136+ ((t) && vx_info_flags((t)->vx_info, m, f))
11137+
11138+#define vx_flags(m, f) vx_info_flags(current_vx_info(), m, f)
11139+
11140+
11141+/* context caps */
11142+
11143+#define __vx_ccaps(v) ((v) ? (v)->vx_ccaps : 0)
11144+
11145+#define vx_current_ccaps() __vx_ccaps(current_vx_info())
11146+
11147+#define vx_info_ccaps(v, c) (__vx_ccaps(v) & (c))
11148+
11149+#define vx_ccaps(c) vx_info_ccaps(current_vx_info(), (c))
11150+
11151+
11152+
11153+/* network flags */
11154+
11155+#define __nx_flags(n) ((n) ? (n)->nx_flags : 0)
11156+
11157+#define nx_current_flags() __nx_flags(current_nx_info())
11158+
11159+#define nx_info_flags(n, m, f) \
11160+ vs_check_flags(__nx_flags(n), m, f)
11161+
11162+#define task_nx_flags(t, m, f) \
11163+ ((t) && nx_info_flags((t)->nx_info, m, f))
11164+
11165+#define nx_flags(m, f) nx_info_flags(current_nx_info(), m, f)
11166+
11167+
11168+/* network caps */
11169+
11170+#define __nx_ncaps(n) ((n) ? (n)->nx_ncaps : 0)
11171+
11172+#define nx_current_ncaps() __nx_ncaps(current_nx_info())
11173+
11174+#define nx_info_ncaps(n, c) (__nx_ncaps(n) & (c))
11175+
11176+#define nx_ncaps(c) nx_info_ncaps(current_nx_info(), c)
11177+
11178+
11179+/* context mask capabilities */
11180+
11181+#define __vx_mcaps(v) ((v) ? (v)->vx_ccaps >> 32UL : ~0 )
11182+
11183+#define vx_info_mcaps(v, c) (__vx_mcaps(v) & (c))
11184+
11185+#define vx_mcaps(c) vx_info_mcaps(current_vx_info(), c)
11186+
11187+
11188+/* context bcap mask */
11189+
11190+#define __vx_bcaps(v) ((v)->vx_bcaps)
11191+
11192+#define vx_current_bcaps() __vx_bcaps(current_vx_info())
11193+
11194+
11195+/* mask given bcaps */
11196+
11197+#define vx_info_mbcaps(v, c) ((v) ? cap_intersect(__vx_bcaps(v), c) : c)
11198+
11199+#define vx_mbcaps(c) vx_info_mbcaps(current_vx_info(), c)
11200+
11201+
11202+/* masked cap_bset */
11203+
11204+#define vx_info_cap_bset(v) vx_info_mbcaps(v, current->cap_bset)
11205+
11206+#define vx_current_cap_bset() vx_info_cap_bset(current_vx_info())
11207+
11208+#if 0
11209+#define vx_info_mbcap(v, b) \
11210+ (!vx_info_flags(v, VXF_STATE_SETUP, 0) ? \
11211+ vx_info_bcaps(v, b) : (b))
11212+
11213+#define task_vx_mbcap(t, b) \
11214+ vx_info_mbcap((t)->vx_info, (t)->b)
11215+
11216+#define vx_mbcap(b) task_vx_mbcap(current, b)
11217+#endif
11218+
11219+#define vx_cap_raised(v, c, f) cap_raised(vx_info_mbcaps(v, c), f)
11220+
11221+#define vx_capable(b, c) (capable(b) || \
11222+ (cap_raised(current_cap(), b) && vx_ccaps(c)))
11223+
11224+#define vx_ns_capable(n, b, c) (ns_capable(n, b) || \
11225+ (cap_raised(current_cap(), b) && vx_ccaps(c)))
11226+
11227+#define nx_capable(b, c) (capable(b) || \
11228+ (cap_raised(current_cap(), b) && nx_ncaps(c)))
11229+
11230+#define vx_task_initpid(t, n) \
11231+ ((t)->vx_info && \
11232+ ((t)->vx_info->vx_initpid == (n)))
11233+
11234+#define vx_current_initpid(n) vx_task_initpid(current, n)
11235+
11236+
11237+/* context unshare mask */
11238+
11239+#define __vx_umask(v) ((v)->vx_umask)
11240+
11241+#define vx_current_umask() __vx_umask(current_vx_info())
11242+
11243+#define vx_can_unshare(b, f) (capable(b) || \
11244+ (cap_raised(current_cap(), b) && \
11245+ !((f) & ~vx_current_umask())))
11246+
11247+
11248+#define __vx_wmask(v) ((v)->vx_wmask)
11249+
11250+#define vx_current_wmask() __vx_wmask(current_vx_info())
11251+
11252+
11253+#define __vx_state(v) ((v) ? ((v)->vx_state) : 0)
11254+
11255+#define vx_info_state(v, m) (__vx_state(v) & (m))
11256+
11257+
11258+#define __nx_state(n) ((n) ? ((n)->nx_state) : 0)
11259+
11260+#define nx_info_state(n, m) (__nx_state(n) & (m))
11261+
11262+#endif
11263diff -NurpP --minimal linux-3.0.9/include/linux/vserver/cacct.h linux-3.0.9-vs2.3.2.1/include/linux/vserver/cacct.h
11264--- linux-3.0.9/include/linux/vserver/cacct.h 1970-01-01 01:00:00.000000000 +0100
11265+++ linux-3.0.9-vs2.3.2.1/include/linux/vserver/cacct.h 2011-06-10 22:11:24.000000000 +0200
11266@@ -0,0 +1,15 @@
11267+#ifndef _VX_CACCT_H
11268+#define _VX_CACCT_H
11269+
11270+
11271+enum sock_acc_field {
11272+ VXA_SOCK_UNSPEC = 0,
11273+ VXA_SOCK_UNIX,
11274+ VXA_SOCK_INET,
11275+ VXA_SOCK_INET6,
11276+ VXA_SOCK_PACKET,
11277+ VXA_SOCK_OTHER,
11278+ VXA_SOCK_SIZE /* array size */
11279+};
11280+
11281+#endif /* _VX_CACCT_H */
11282diff -NurpP --minimal linux-3.0.9/include/linux/vserver/cacct_cmd.h linux-3.0.9-vs2.3.2.1/include/linux/vserver/cacct_cmd.h
11283--- linux-3.0.9/include/linux/vserver/cacct_cmd.h 1970-01-01 01:00:00.000000000 +0100
11284+++ linux-3.0.9-vs2.3.2.1/include/linux/vserver/cacct_cmd.h 2011-06-10 22:11:24.000000000 +0200
11285@@ -0,0 +1,23 @@
11286+#ifndef _VX_CACCT_CMD_H
11287+#define _VX_CACCT_CMD_H
11288+
11289+
11290+/* virtual host info name commands */
11291+
11292+#define VCMD_sock_stat VC_CMD(VSTAT, 5, 0)
11293+
11294+struct vcmd_sock_stat_v0 {
11295+ uint32_t field;
11296+ uint32_t count[3];
11297+ uint64_t total[3];
11298+};
11299+
11300+
11301+#ifdef __KERNEL__
11302+
11303+#include <linux/compiler.h>
11304+
11305+extern int vc_sock_stat(struct vx_info *, void __user *);
11306+
11307+#endif /* __KERNEL__ */
11308+#endif /* _VX_CACCT_CMD_H */
11309diff -NurpP --minimal linux-3.0.9/include/linux/vserver/cacct_def.h linux-3.0.9-vs2.3.2.1/include/linux/vserver/cacct_def.h
11310--- linux-3.0.9/include/linux/vserver/cacct_def.h 1970-01-01 01:00:00.000000000 +0100
11311+++ linux-3.0.9-vs2.3.2.1/include/linux/vserver/cacct_def.h 2011-06-10 22:11:24.000000000 +0200
11312@@ -0,0 +1,43 @@
11313+#ifndef _VX_CACCT_DEF_H
11314+#define _VX_CACCT_DEF_H
11315+
11316+#include <asm/atomic.h>
11317+#include <linux/vserver/cacct.h>
11318+
11319+
11320+struct _vx_sock_acc {
11321+ atomic_long_t count;
11322+ atomic_long_t total;
11323+};
11324+
11325+/* context sub struct */
11326+
11327+struct _vx_cacct {
11328+ struct _vx_sock_acc sock[VXA_SOCK_SIZE][3];
11329+ atomic_t slab[8];
11330+ atomic_t page[6][8];
11331+};
11332+
11333+#ifdef CONFIG_VSERVER_DEBUG
11334+
11335+static inline void __dump_vx_cacct(struct _vx_cacct *cacct)
11336+{
11337+ int i, j;
11338+
11339+ printk("\t_vx_cacct:");
11340+ for (i = 0; i < 6; i++) {
11341+ struct _vx_sock_acc *ptr = cacct->sock[i];
11342+
11343+ printk("\t [%d] =", i);
11344+ for (j = 0; j < 3; j++) {
11345+ printk(" [%d] = %8lu, %8lu", j,
11346+ atomic_long_read(&ptr[j].count),
11347+ atomic_long_read(&ptr[j].total));
11348+ }
11349+ printk("\n");
11350+ }
11351+}
11352+
11353+#endif
11354+
11355+#endif /* _VX_CACCT_DEF_H */
11356diff -NurpP --minimal linux-3.0.9/include/linux/vserver/cacct_int.h linux-3.0.9-vs2.3.2.1/include/linux/vserver/cacct_int.h
11357--- linux-3.0.9/include/linux/vserver/cacct_int.h 1970-01-01 01:00:00.000000000 +0100
11358+++ linux-3.0.9-vs2.3.2.1/include/linux/vserver/cacct_int.h 2011-06-10 22:11:24.000000000 +0200
11359@@ -0,0 +1,21 @@
11360+#ifndef _VX_CACCT_INT_H
11361+#define _VX_CACCT_INT_H
11362+
11363+
11364+#ifdef __KERNEL__
11365+
11366+static inline
11367+unsigned long vx_sock_count(struct _vx_cacct *cacct, int type, int pos)
11368+{
11369+ return atomic_long_read(&cacct->sock[type][pos].count);
11370+}
11371+
11372+
11373+static inline
11374+unsigned long vx_sock_total(struct _vx_cacct *cacct, int type, int pos)
11375+{
11376+ return atomic_long_read(&cacct->sock[type][pos].total);
11377+}
11378+
11379+#endif /* __KERNEL__ */
11380+#endif /* _VX_CACCT_INT_H */
11381diff -NurpP --minimal linux-3.0.9/include/linux/vserver/check.h linux-3.0.9-vs2.3.2.1/include/linux/vserver/check.h
11382--- linux-3.0.9/include/linux/vserver/check.h 1970-01-01 01:00:00.000000000 +0100
11383+++ linux-3.0.9-vs2.3.2.1/include/linux/vserver/check.h 2011-06-10 22:11:24.000000000 +0200
11384@@ -0,0 +1,89 @@
11385+#ifndef _VS_CHECK_H
11386+#define _VS_CHECK_H
11387+
11388+
11389+#define MAX_S_CONTEXT 65535 /* Arbitrary limit */
11390+
11391+#ifdef CONFIG_VSERVER_DYNAMIC_IDS
11392+#define MIN_D_CONTEXT 49152 /* dynamic contexts start here */
11393+#else
11394+#define MIN_D_CONTEXT 65536
11395+#endif
11396+
11397+/* check conditions */
11398+
11399+#define VS_ADMIN 0x0001
11400+#define VS_WATCH 0x0002
11401+#define VS_HIDE 0x0004
11402+#define VS_HOSTID 0x0008
11403+
11404+#define VS_IDENT 0x0010
11405+#define VS_EQUIV 0x0020
11406+#define VS_PARENT 0x0040
11407+#define VS_CHILD 0x0080
11408+
11409+#define VS_ARG_MASK 0x00F0
11410+
11411+#define VS_DYNAMIC 0x0100
11412+#define VS_STATIC 0x0200
11413+
11414+#define VS_ATR_MASK 0x0F00
11415+
11416+#ifdef CONFIG_VSERVER_PRIVACY
11417+#define VS_ADMIN_P (0)
11418+#define VS_WATCH_P (0)
11419+#else
11420+#define VS_ADMIN_P VS_ADMIN
11421+#define VS_WATCH_P VS_WATCH
11422+#endif
11423+
11424+#define VS_HARDIRQ 0x1000
11425+#define VS_SOFTIRQ 0x2000
11426+#define VS_IRQ 0x4000
11427+
11428+#define VS_IRQ_MASK 0xF000
11429+
11430+#include <linux/hardirq.h>
11431+
11432+/*
11433+ * check current context for ADMIN/WATCH and
11434+ * optionally against supplied argument
11435+ */
11436+static inline int __vs_check(int cid, int id, unsigned int mode)
11437+{
11438+ if (mode & VS_ARG_MASK) {
11439+ if ((mode & VS_IDENT) && (id == cid))
11440+ return 1;
11441+ }
11442+ if (mode & VS_ATR_MASK) {
11443+ if ((mode & VS_DYNAMIC) &&
11444+ (id >= MIN_D_CONTEXT) &&
11445+ (id <= MAX_S_CONTEXT))
11446+ return 1;
11447+ if ((mode & VS_STATIC) &&
11448+ (id > 1) && (id < MIN_D_CONTEXT))
11449+ return 1;
11450+ }
11451+ if (mode & VS_IRQ_MASK) {
11452+ if ((mode & VS_IRQ) && unlikely(in_interrupt()))
11453+ return 1;
11454+ if ((mode & VS_HARDIRQ) && unlikely(in_irq()))
11455+ return 1;
11456+ if ((mode & VS_SOFTIRQ) && unlikely(in_softirq()))
11457+ return 1;
11458+ }
11459+ return (((mode & VS_ADMIN) && (cid == 0)) ||
11460+ ((mode & VS_WATCH) && (cid == 1)) ||
11461+ ((mode & VS_HOSTID) && (id == 0)));
11462+}
11463+
11464+#define vx_check(c, m) __vs_check(vx_current_xid(), c, (m) | VS_IRQ)
11465+
11466+#define vx_weak_check(c, m) ((m) ? vx_check(c, m) : 1)
11467+
11468+
11469+#define nx_check(c, m) __vs_check(nx_current_nid(), c, m)
11470+
11471+#define nx_weak_check(c, m) ((m) ? nx_check(c, m) : 1)
11472+
11473+#endif
11474diff -NurpP --minimal linux-3.0.9/include/linux/vserver/context.h linux-3.0.9-vs2.3.2.1/include/linux/vserver/context.h
11475--- linux-3.0.9/include/linux/vserver/context.h 1970-01-01 01:00:00.000000000 +0100
11476+++ linux-3.0.9-vs2.3.2.1/include/linux/vserver/context.h 2011-10-27 13:59:29.000000000 +0200
11477@@ -0,0 +1,188 @@
11478+#ifndef _VX_CONTEXT_H
11479+#define _VX_CONTEXT_H
11480+
11481+#include <linux/types.h>
11482+#include <linux/capability.h>
11483+
11484+
11485+/* context flags */
11486+
11487+#define VXF_INFO_SCHED 0x00000002
11488+#define VXF_INFO_NPROC 0x00000004
11489+#define VXF_INFO_PRIVATE 0x00000008
11490+
11491+#define VXF_INFO_INIT 0x00000010
11492+#define VXF_INFO_HIDE 0x00000020
11493+#define VXF_INFO_ULIMIT 0x00000040
11494+#define VXF_INFO_NSPACE 0x00000080
11495+
11496+#define VXF_SCHED_HARD 0x00000100
11497+#define VXF_SCHED_PRIO 0x00000200
11498+#define VXF_SCHED_PAUSE 0x00000400
11499+
11500+#define VXF_VIRT_MEM 0x00010000
11501+#define VXF_VIRT_UPTIME 0x00020000
11502+#define VXF_VIRT_CPU 0x00040000
11503+#define VXF_VIRT_LOAD 0x00080000
11504+#define VXF_VIRT_TIME 0x00100000
11505+
11506+#define VXF_HIDE_MOUNT 0x01000000
11507+/* was VXF_HIDE_NETIF 0x02000000 */
11508+#define VXF_HIDE_VINFO 0x04000000
11509+
11510+#define VXF_STATE_SETUP (1ULL << 32)
11511+#define VXF_STATE_INIT (1ULL << 33)
11512+#define VXF_STATE_ADMIN (1ULL << 34)
11513+
11514+#define VXF_SC_HELPER (1ULL << 36)
11515+#define VXF_REBOOT_KILL (1ULL << 37)
11516+#define VXF_PERSISTENT (1ULL << 38)
11517+
11518+#define VXF_FORK_RSS (1ULL << 48)
11519+#define VXF_PROLIFIC (1ULL << 49)
11520+
11521+#define VXF_IGNEG_NICE (1ULL << 52)
11522+
11523+#define VXF_ONE_TIME (0x0007ULL << 32)
11524+
11525+#define VXF_INIT_SET (VXF_STATE_SETUP | VXF_STATE_INIT | VXF_STATE_ADMIN)
11526+
11527+
11528+/* context migration */
11529+
11530+#define VXM_SET_INIT 0x00000001
11531+#define VXM_SET_REAPER 0x00000002
11532+
11533+/* context caps */
11534+
11535+#define VXC_SET_UTSNAME 0x00000001
11536+#define VXC_SET_RLIMIT 0x00000002
11537+#define VXC_FS_SECURITY 0x00000004
11538+#define VXC_FS_TRUSTED 0x00000008
11539+#define VXC_TIOCSTI 0x00000010
11540+
11541+/* was VXC_RAW_ICMP 0x00000100 */
11542+#define VXC_SYSLOG 0x00001000
11543+#define VXC_OOM_ADJUST 0x00002000
11544+#define VXC_AUDIT_CONTROL 0x00004000
11545+
11546+#define VXC_SECURE_MOUNT 0x00010000
11547+#define VXC_SECURE_REMOUNT 0x00020000
11548+#define VXC_BINARY_MOUNT 0x00040000
11549+
11550+#define VXC_QUOTA_CTL 0x00100000
11551+#define VXC_ADMIN_MAPPER 0x00200000
11552+#define VXC_ADMIN_CLOOP 0x00400000
11553+
11554+#define VXC_KTHREAD 0x01000000
11555+#define VXC_NAMESPACE 0x02000000
11556+
11557+
11558+#ifdef __KERNEL__
11559+
11560+#include <linux/list.h>
11561+#include <linux/spinlock.h>
11562+#include <linux/rcupdate.h>
11563+
11564+#include "limit_def.h"
11565+#include "sched_def.h"
11566+#include "cvirt_def.h"
11567+#include "cacct_def.h"
11568+#include "device_def.h"
11569+
11570+#define VX_SPACES 2
11571+
11572+struct _vx_info_pc {
11573+ struct _vx_sched_pc sched_pc;
11574+ struct _vx_cvirt_pc cvirt_pc;
11575+};
11576+
11577+struct _vx_space {
11578+ unsigned long vx_nsmask; /* assignment mask */
11579+ struct nsproxy *vx_nsproxy; /* private namespaces */
11580+ struct fs_struct *vx_fs; /* private namespace fs */
11581+ const struct cred *vx_cred; /* task credentials */
11582+};
11583+
11584+struct vx_info {
11585+ struct hlist_node vx_hlist; /* linked list of contexts */
11586+ xid_t vx_id; /* context id */
11587+ atomic_t vx_usecnt; /* usage count */
11588+ atomic_t vx_tasks; /* tasks count */
11589+ struct vx_info *vx_parent; /* parent context */
11590+ int vx_state; /* context state */
11591+
11592+ struct _vx_space space[VX_SPACES]; /* namespace store */
11593+
11594+ uint64_t vx_flags; /* context flags */
11595+ uint64_t vx_ccaps; /* context caps (vserver) */
11596+ uint64_t vx_umask; /* unshare mask (guest) */
11597+ uint64_t vx_wmask; /* warn mask (guest) */
11598+ kernel_cap_t vx_bcaps; /* bounding caps (system) */
11599+
11600+ struct task_struct *vx_reaper; /* guest reaper process */
11601+ pid_t vx_initpid; /* PID of guest init */
11602+ int64_t vx_badness_bias; /* OOM points bias */
11603+
11604+ struct _vx_limit limit; /* vserver limits */
11605+ struct _vx_sched sched; /* vserver scheduler */
11606+ struct _vx_cvirt cvirt; /* virtual/bias stuff */
11607+ struct _vx_cacct cacct; /* context accounting */
11608+
11609+ struct _vx_device dmap; /* default device map targets */
11610+
11611+#ifndef CONFIG_SMP
11612+ struct _vx_info_pc info_pc; /* per cpu data */
11613+#else
11614+ struct _vx_info_pc *ptr_pc; /* per cpu array */
11615+#endif
11616+
11617+ wait_queue_head_t vx_wait; /* context exit waitqueue */
11618+ int reboot_cmd; /* last sys_reboot() cmd */
11619+ int exit_code; /* last process exit code */
11620+
11621+ char vx_name[65]; /* vserver name */
11622+};
11623+
11624+#ifndef CONFIG_SMP
11625+#define vx_ptr_pc(vxi) (&(vxi)->info_pc)
11626+#define vx_per_cpu(vxi, v, id) vx_ptr_pc(vxi)->v
11627+#else
11628+#define vx_ptr_pc(vxi) ((vxi)->ptr_pc)
11629+#define vx_per_cpu(vxi, v, id) per_cpu_ptr(vx_ptr_pc(vxi), id)->v
11630+#endif
11631+
11632+#define vx_cpu(vxi, v) vx_per_cpu(vxi, v, smp_processor_id())
11633+
11634+
11635+struct vx_info_save {
11636+ struct vx_info *vxi;
11637+ xid_t xid;
11638+};
11639+
11640+
11641+/* status flags */
11642+
11643+#define VXS_HASHED 0x0001
11644+#define VXS_PAUSED 0x0010
11645+#define VXS_SHUTDOWN 0x0100
11646+#define VXS_HELPER 0x1000
11647+#define VXS_RELEASED 0x8000
11648+
11649+
11650+extern void claim_vx_info(struct vx_info *, struct task_struct *);
11651+extern void release_vx_info(struct vx_info *, struct task_struct *);
11652+
11653+extern struct vx_info *lookup_vx_info(int);
11654+extern struct vx_info *lookup_or_create_vx_info(int);
11655+
11656+extern int get_xid_list(int, unsigned int *, int);
11657+extern int xid_is_hashed(xid_t);
11658+
11659+extern int vx_migrate_task(struct task_struct *, struct vx_info *, int);
11660+
11661+extern long vs_state_change(struct vx_info *, unsigned int);
11662+
11663+
11664+#endif /* __KERNEL__ */
11665+#endif /* _VX_CONTEXT_H */
11666diff -NurpP --minimal linux-3.0.9/include/linux/vserver/context_cmd.h linux-3.0.9-vs2.3.2.1/include/linux/vserver/context_cmd.h
11667--- linux-3.0.9/include/linux/vserver/context_cmd.h 1970-01-01 01:00:00.000000000 +0100
11668+++ linux-3.0.9-vs2.3.2.1/include/linux/vserver/context_cmd.h 2011-08-01 18:25:07.000000000 +0200
11669@@ -0,0 +1,162 @@
11670+#ifndef _VX_CONTEXT_CMD_H
11671+#define _VX_CONTEXT_CMD_H
11672+
11673+
11674+/* vinfo commands */
11675+
11676+#define VCMD_task_xid VC_CMD(VINFO, 1, 0)
11677+
11678+#ifdef __KERNEL__
11679+extern int vc_task_xid(uint32_t);
11680+
11681+#endif /* __KERNEL__ */
11682+
11683+#define VCMD_vx_info VC_CMD(VINFO, 5, 0)
11684+
11685+struct vcmd_vx_info_v0 {
11686+ uint32_t xid;
11687+ uint32_t initpid;
11688+ /* more to come */
11689+};
11690+
11691+#ifdef __KERNEL__
11692+extern int vc_vx_info(struct vx_info *, void __user *);
11693+
11694+#endif /* __KERNEL__ */
11695+
11696+#define VCMD_ctx_stat VC_CMD(VSTAT, 0, 0)
11697+
11698+struct vcmd_ctx_stat_v0 {
11699+ uint32_t usecnt;
11700+ uint32_t tasks;
11701+ /* more to come */
11702+};
11703+
11704+#ifdef __KERNEL__
11705+extern int vc_ctx_stat(struct vx_info *, void __user *);
11706+
11707+#endif /* __KERNEL__ */
11708+
11709+/* context commands */
11710+
11711+#define VCMD_ctx_create_v0 VC_CMD(VPROC, 1, 0)
11712+#define VCMD_ctx_create VC_CMD(VPROC, 1, 1)
11713+
11714+struct vcmd_ctx_create {
11715+ uint64_t flagword;
11716+};
11717+
11718+#define VCMD_ctx_migrate_v0 VC_CMD(PROCMIG, 1, 0)
11719+#define VCMD_ctx_migrate VC_CMD(PROCMIG, 1, 1)
11720+
11721+struct vcmd_ctx_migrate {
11722+ uint64_t flagword;
11723+};
11724+
11725+#ifdef __KERNEL__
11726+extern int vc_ctx_create(uint32_t, void __user *);
11727+extern int vc_ctx_migrate(struct vx_info *, void __user *);
11728+
11729+#endif /* __KERNEL__ */
11730+
11731+
11732+/* flag commands */
11733+
11734+#define VCMD_get_cflags VC_CMD(FLAGS, 1, 0)
11735+#define VCMD_set_cflags VC_CMD(FLAGS, 2, 0)
11736+
11737+struct vcmd_ctx_flags_v0 {
11738+ uint64_t flagword;
11739+ uint64_t mask;
11740+};
11741+
11742+#ifdef __KERNEL__
11743+extern int vc_get_cflags(struct vx_info *, void __user *);
11744+extern int vc_set_cflags(struct vx_info *, void __user *);
11745+
11746+#endif /* __KERNEL__ */
11747+
11748+
11749+/* context caps commands */
11750+
11751+#define VCMD_get_ccaps VC_CMD(FLAGS, 3, 1)
11752+#define VCMD_set_ccaps VC_CMD(FLAGS, 4, 1)
11753+
11754+struct vcmd_ctx_caps_v1 {
11755+ uint64_t ccaps;
11756+ uint64_t cmask;
11757+};
11758+
11759+#ifdef __KERNEL__
11760+extern int vc_get_ccaps(struct vx_info *, void __user *);
11761+extern int vc_set_ccaps(struct vx_info *, void __user *);
11762+
11763+#endif /* __KERNEL__ */
11764+
11765+
11766+/* bcaps commands */
11767+
11768+#define VCMD_get_bcaps VC_CMD(FLAGS, 9, 0)
11769+#define VCMD_set_bcaps VC_CMD(FLAGS, 10, 0)
11770+
11771+struct vcmd_bcaps {
11772+ uint64_t bcaps;
11773+ uint64_t bmask;
11774+};
11775+
11776+#ifdef __KERNEL__
11777+extern int vc_get_bcaps(struct vx_info *, void __user *);
11778+extern int vc_set_bcaps(struct vx_info *, void __user *);
11779+
11780+#endif /* __KERNEL__ */
11781+
11782+
11783+/* umask commands */
11784+
11785+#define VCMD_get_umask VC_CMD(FLAGS, 13, 0)
11786+#define VCMD_set_umask VC_CMD(FLAGS, 14, 0)
11787+
11788+struct vcmd_umask {
11789+ uint64_t umask;
11790+ uint64_t mask;
11791+};
11792+
11793+#ifdef __KERNEL__
11794+extern int vc_get_umask(struct vx_info *, void __user *);
11795+extern int vc_set_umask(struct vx_info *, void __user *);
11796+
11797+#endif /* __KERNEL__ */
11798+
11799+
11800+/* wmask commands */
11801+
11802+#define VCMD_get_wmask VC_CMD(FLAGS, 15, 0)
11803+#define VCMD_set_wmask VC_CMD(FLAGS, 16, 0)
11804+
11805+struct vcmd_wmask {
11806+ uint64_t wmask;
11807+ uint64_t mask;
11808+};
11809+
11810+#ifdef __KERNEL__
11811+extern int vc_get_wmask(struct vx_info *, void __user *);
11812+extern int vc_set_wmask(struct vx_info *, void __user *);
11813+
11814+#endif /* __KERNEL__ */
11815+
11816+
11817+/* OOM badness */
11818+
11819+#define VCMD_get_badness VC_CMD(MEMCTRL, 5, 0)
11820+#define VCMD_set_badness VC_CMD(MEMCTRL, 6, 0)
11821+
11822+struct vcmd_badness_v0 {
11823+ int64_t bias;
11824+};
11825+
11826+#ifdef __KERNEL__
11827+extern int vc_get_badness(struct vx_info *, void __user *);
11828+extern int vc_set_badness(struct vx_info *, void __user *);
11829+
11830+#endif /* __KERNEL__ */
11831+#endif /* _VX_CONTEXT_CMD_H */
11832diff -NurpP --minimal linux-3.0.9/include/linux/vserver/cvirt.h linux-3.0.9-vs2.3.2.1/include/linux/vserver/cvirt.h
11833--- linux-3.0.9/include/linux/vserver/cvirt.h 1970-01-01 01:00:00.000000000 +0100
11834+++ linux-3.0.9-vs2.3.2.1/include/linux/vserver/cvirt.h 2011-06-10 22:11:24.000000000 +0200
11835@@ -0,0 +1,20 @@
11836+#ifndef _VX_CVIRT_H
11837+#define _VX_CVIRT_H
11838+
11839+
11840+#ifdef __KERNEL__
11841+
11842+struct timespec;
11843+
11844+void vx_vsi_uptime(struct timespec *, struct timespec *);
11845+
11846+
11847+struct vx_info;
11848+
11849+void vx_update_load(struct vx_info *);
11850+
11851+
11852+int vx_do_syslog(int, char __user *, int);
11853+
11854+#endif /* __KERNEL__ */
11855+#endif /* _VX_CVIRT_H */
11856diff -NurpP --minimal linux-3.0.9/include/linux/vserver/cvirt_cmd.h linux-3.0.9-vs2.3.2.1/include/linux/vserver/cvirt_cmd.h
11857--- linux-3.0.9/include/linux/vserver/cvirt_cmd.h 1970-01-01 01:00:00.000000000 +0100
11858+++ linux-3.0.9-vs2.3.2.1/include/linux/vserver/cvirt_cmd.h 2011-06-10 22:11:24.000000000 +0200
11859@@ -0,0 +1,53 @@
11860+#ifndef _VX_CVIRT_CMD_H
11861+#define _VX_CVIRT_CMD_H
11862+
11863+
11864+/* virtual host info name commands */
11865+
11866+#define VCMD_set_vhi_name VC_CMD(VHOST, 1, 0)
11867+#define VCMD_get_vhi_name VC_CMD(VHOST, 2, 0)
11868+
11869+struct vcmd_vhi_name_v0 {
11870+ uint32_t field;
11871+ char name[65];
11872+};
11873+
11874+
11875+enum vhi_name_field {
11876+ VHIN_CONTEXT = 0,
11877+ VHIN_SYSNAME,
11878+ VHIN_NODENAME,
11879+ VHIN_RELEASE,
11880+ VHIN_VERSION,
11881+ VHIN_MACHINE,
11882+ VHIN_DOMAINNAME,
11883+};
11884+
11885+
11886+#ifdef __KERNEL__
11887+
11888+#include <linux/compiler.h>
11889+
11890+extern int vc_set_vhi_name(struct vx_info *, void __user *);
11891+extern int vc_get_vhi_name(struct vx_info *, void __user *);
11892+
11893+#endif /* __KERNEL__ */
11894+
11895+#define VCMD_virt_stat VC_CMD(VSTAT, 3, 0)
11896+
11897+struct vcmd_virt_stat_v0 {
11898+ uint64_t offset;
11899+ uint64_t uptime;
11900+ uint32_t nr_threads;
11901+ uint32_t nr_running;
11902+ uint32_t nr_uninterruptible;
11903+ uint32_t nr_onhold;
11904+ uint32_t nr_forks;
11905+ uint32_t load[3];
11906+};
11907+
11908+#ifdef __KERNEL__
11909+extern int vc_virt_stat(struct vx_info *, void __user *);
11910+
11911+#endif /* __KERNEL__ */
11912+#endif /* _VX_CVIRT_CMD_H */
11913diff -NurpP --minimal linux-3.0.9/include/linux/vserver/cvirt_def.h linux-3.0.9-vs2.3.2.1/include/linux/vserver/cvirt_def.h
11914--- linux-3.0.9/include/linux/vserver/cvirt_def.h 1970-01-01 01:00:00.000000000 +0100
11915+++ linux-3.0.9-vs2.3.2.1/include/linux/vserver/cvirt_def.h 2011-06-10 22:11:24.000000000 +0200
11916@@ -0,0 +1,80 @@
11917+#ifndef _VX_CVIRT_DEF_H
11918+#define _VX_CVIRT_DEF_H
11919+
11920+#include <linux/jiffies.h>
11921+#include <linux/spinlock.h>
11922+#include <linux/wait.h>
11923+#include <linux/time.h>
11924+#include <asm/atomic.h>
11925+
11926+
11927+struct _vx_usage_stat {
11928+ uint64_t user;
11929+ uint64_t nice;
11930+ uint64_t system;
11931+ uint64_t softirq;
11932+ uint64_t irq;
11933+ uint64_t idle;
11934+ uint64_t iowait;
11935+};
11936+
11937+struct _vx_syslog {
11938+ wait_queue_head_t log_wait;
11939+ spinlock_t logbuf_lock; /* lock for the log buffer */
11940+
11941+ unsigned long log_start; /* next char to be read by syslog() */
11942+ unsigned long con_start; /* next char to be sent to consoles */
11943+ unsigned long log_end; /* most-recently-written-char + 1 */
11944+ unsigned long logged_chars; /* #chars since last read+clear operation */
11945+
11946+ char log_buf[1024];
11947+};
11948+
11949+
11950+/* context sub struct */
11951+
11952+struct _vx_cvirt {
11953+ atomic_t nr_threads; /* number of current threads */
11954+ atomic_t nr_running; /* number of running threads */
11955+ atomic_t nr_uninterruptible; /* number of uninterruptible threads */
11956+
11957+ atomic_t nr_onhold; /* processes on hold */
11958+ uint32_t onhold_last; /* jiffies when put on hold */
11959+
11960+ struct timespec bias_ts; /* time offset to the host */
11961+ struct timespec bias_idle;
11962+ struct timespec bias_uptime; /* context creation point */
11963+ uint64_t bias_clock; /* offset in clock_t */
11964+
11965+ spinlock_t load_lock; /* lock for the load averages */
11966+ atomic_t load_updates; /* nr of load updates done so far */
11967+ uint32_t load_last; /* last time load was calculated */
11968+ uint32_t load[3]; /* load averages 1,5,15 */
11969+
11970+ atomic_t total_forks; /* number of forks so far */
11971+
11972+ struct _vx_syslog syslog;
11973+};
11974+
11975+struct _vx_cvirt_pc {
11976+ struct _vx_usage_stat cpustat;
11977+};
11978+
11979+
11980+#ifdef CONFIG_VSERVER_DEBUG
11981+
11982+static inline void __dump_vx_cvirt(struct _vx_cvirt *cvirt)
11983+{
11984+ printk("\t_vx_cvirt:\n");
11985+ printk("\t threads: %4d, %4d, %4d, %4d\n",
11986+ atomic_read(&cvirt->nr_threads),
11987+ atomic_read(&cvirt->nr_running),
11988+ atomic_read(&cvirt->nr_uninterruptible),
11989+ atomic_read(&cvirt->nr_onhold));
11990+ /* add rest here */
11991+ printk("\t total_forks = %d\n", atomic_read(&cvirt->total_forks));
11992+}
11993+
11994+#endif
11995+
11996+#endif /* _VX_CVIRT_DEF_H */
11997diff -NurpP --minimal linux-3.0.9/include/linux/vserver/debug.h linux-3.0.9-vs2.3.2.1/include/linux/vserver/debug.h
11998--- linux-3.0.9/include/linux/vserver/debug.h 1970-01-01 01:00:00.000000000 +0100
11999+++ linux-3.0.9-vs2.3.2.1/include/linux/vserver/debug.h 2011-11-17 21:25:04.000000000 +0100
12000@@ -0,0 +1,145 @@
12001+#ifndef _VX_DEBUG_H
12002+#define _VX_DEBUG_H
12003+
12004+
12005+#define VXD_CBIT(n, m) (vs_debug_ ## n & (1 << (m)))
12006+#define VXD_CMIN(n, m) (vs_debug_ ## n > (m))
12007+#define VXD_MASK(n, m) (vs_debug_ ## n & (m))
12008+
12009+#define VXD_DEV(d) (d), (d)->bd_inode->i_ino, \
12010+ imajor((d)->bd_inode), iminor((d)->bd_inode)
12011+#define VXF_DEV "%p[%lu,%d:%d]"
12012+
12013+#if defined(CONFIG_QUOTES_UTF8)
12014+#define VS_Q_LQM "\xc2\xbb"
12015+#define VS_Q_RQM "\xc2\xab"
12016+#elif defined(CONFIG_QUOTES_ASCII)
12017+#define VS_Q_LQM "\x27"
12018+#define VS_Q_RQM "\x27"
12019+#else
12020+#define VS_Q_LQM "\xbb"
12021+#define VS_Q_RQM "\xab"
12022+#endif
12023+
12024+#define VS_Q(f) VS_Q_LQM f VS_Q_RQM
12025+
12026+
12027+#define vxd_path(p) \
12028+ ({ static char _buffer[PATH_MAX]; \
12029+ d_path(p, _buffer, sizeof(_buffer)); })
12030+
12031+#define vxd_cond_path(n) \
12032+ ((n) ? vxd_path(&(n)->path) : "<null>" )
12033+
12034+
12035+#ifdef CONFIG_VSERVER_DEBUG
12036+
12037+extern unsigned int vs_debug_switch;
12038+extern unsigned int vs_debug_xid;
12039+extern unsigned int vs_debug_nid;
12040+extern unsigned int vs_debug_tag;
12041+extern unsigned int vs_debug_net;
12042+extern unsigned int vs_debug_limit;
12043+extern unsigned int vs_debug_cres;
12044+extern unsigned int vs_debug_dlim;
12045+extern unsigned int vs_debug_quota;
12046+extern unsigned int vs_debug_cvirt;
12047+extern unsigned int vs_debug_space;
12048+extern unsigned int vs_debug_perm;
12049+extern unsigned int vs_debug_misc;
12050+
12051+
12052+#define VX_LOGLEVEL "vxD: "
12053+#define VX_PROC_FMT "%p: "
12054+#define VX_PROCESS current
12055+
12056+#define vxdprintk(c, f, x...) \
12057+ do { \
12058+ if (c) \
12059+ printk(VX_LOGLEVEL VX_PROC_FMT f "\n", \
12060+ VX_PROCESS , ##x); \
12061+ } while (0)
12062+
12063+#define vxlprintk(c, f, x...) \
12064+ do { \
12065+ if (c) \
12066+ printk(VX_LOGLEVEL f " @%s:%d\n", x); \
12067+ } while (0)
12068+
12069+#define vxfprintk(c, f, x...) \
12070+ do { \
12071+ if (c) \
12072+ printk(VX_LOGLEVEL f " %s@%s:%d\n", x); \
12073+ } while (0)
12074+
12075+
12076+struct vx_info;
12077+
12078+void dump_vx_info(struct vx_info *, int);
12079+void dump_vx_info_inactive(int);
12080+
12081+#else /* CONFIG_VSERVER_DEBUG */
12082+
12083+#define vs_debug_switch 0
12084+#define vs_debug_xid 0
12085+#define vs_debug_nid 0
12086+#define vs_debug_tag 0
12087+#define vs_debug_net 0
12088+#define vs_debug_limit 0
12089+#define vs_debug_cres 0
12090+#define vs_debug_dlim 0
12091+#define vs_debug_quota 0
12092+#define vs_debug_cvirt 0
12093+#define vs_debug_space 0
12094+#define vs_debug_perm 0
12095+#define vs_debug_misc 0
12096+
12097+#define vxdprintk(x...) do { } while (0)
12098+#define vxlprintk(x...) do { } while (0)
12099+#define vxfprintk(x...) do { } while (0)
12100+
12101+#endif /* CONFIG_VSERVER_DEBUG */
12102+
12103+
12104+#ifdef CONFIG_VSERVER_WARN
12105+
12106+#define VX_WARNLEVEL KERN_WARNING "vxW: "
12107+#define VX_WARN_TASK "[" VS_Q("%s") ",%u:#%u|%u|%u] "
12108+#define VX_WARN_XID "[xid #%u] "
12109+#define VX_WARN_NID "[nid #%u] "
12110+#define VX_WARN_TAG "[tag #%u] "
12111+
12112+#define vxwprintk(c, f, x...) \
12113+ do { \
12114+ if (c) \
12115+ printk(VX_WARNLEVEL f "\n", ##x); \
12116+ } while (0)
12117+
12118+#else /* CONFIG_VSERVER_WARN */
12119+
12120+#define vxwprintk(x...) do { } while (0)
12121+
12122+#endif /* CONFIG_VSERVER_WARN */
12123+
12124+#define vxwprintk_task(c, f, x...) \
12125+ vxwprintk(c, VX_WARN_TASK f, \
12126+ current->comm, current->pid, \
12127+ current->xid, current->nid, current->tag, ##x)
12128+#define vxwprintk_xid(c, f, x...) \
12129+ vxwprintk(c, VX_WARN_XID f, current->xid, x)
12130+#define vxwprintk_nid(c, f, x...) \
12131+ vxwprintk(c, VX_WARN_NID f, current->nid, x)
12132+#define vxwprintk_tag(c, f, x...) \
12133+ vxwprintk(c, VX_WARN_TAG f, current->tag, x)
12134+
12135+#ifdef CONFIG_VSERVER_DEBUG
12136+#define vxd_assert_lock(l) assert_spin_locked(l)
12137+#define vxd_assert(c, f, x...) vxlprintk(!(c), \
12138+ "assertion [" f "] failed.", ##x, __FILE__, __LINE__)
12139+#else
12140+#define vxd_assert_lock(l) do { } while (0)
12141+#define vxd_assert(c, f, x...) do { } while (0)
12142+#endif
12143+
12144+
12145+#endif /* _VX_DEBUG_H */
12146diff -NurpP --minimal linux-3.0.9/include/linux/vserver/debug_cmd.h linux-3.0.9-vs2.3.2.1/include/linux/vserver/debug_cmd.h
12147--- linux-3.0.9/include/linux/vserver/debug_cmd.h 1970-01-01 01:00:00.000000000 +0100
12148+++ linux-3.0.9-vs2.3.2.1/include/linux/vserver/debug_cmd.h 2011-06-10 22:11:24.000000000 +0200
12149@@ -0,0 +1,58 @@
12150+#ifndef _VX_DEBUG_CMD_H
12151+#define _VX_DEBUG_CMD_H
12152+
12153+
12154+/* debug commands */
12155+
12156+#define VCMD_dump_history VC_CMD(DEBUG, 1, 0)
12157+
12158+#define VCMD_read_history VC_CMD(DEBUG, 5, 0)
12159+#define VCMD_read_monitor VC_CMD(DEBUG, 6, 0)
12160+
12161+struct vcmd_read_history_v0 {
12162+ uint32_t index;
12163+ uint32_t count;
12164+ char __user *data;
12165+};
12166+
12167+struct vcmd_read_monitor_v0 {
12168+ uint32_t index;
12169+ uint32_t count;
12170+ char __user *data;
12171+};
12172+
12173+
12174+#ifdef __KERNEL__
12175+
12176+#ifdef CONFIG_COMPAT
12177+
12178+#include <asm/compat.h>
12179+
12180+struct vcmd_read_history_v0_x32 {
12181+ uint32_t index;
12182+ uint32_t count;
12183+ compat_uptr_t data_ptr;
12184+};
12185+
12186+struct vcmd_read_monitor_v0_x32 {
12187+ uint32_t index;
12188+ uint32_t count;
12189+ compat_uptr_t data_ptr;
12190+};
12191+
12192+#endif /* CONFIG_COMPAT */
12193+
12194+extern int vc_dump_history(uint32_t);
12195+
12196+extern int vc_read_history(uint32_t, void __user *);
12197+extern int vc_read_monitor(uint32_t, void __user *);
12198+
12199+#ifdef CONFIG_COMPAT
12200+
12201+extern int vc_read_history_x32(uint32_t, void __user *);
12202+extern int vc_read_monitor_x32(uint32_t, void __user *);
12203+
12204+#endif /* CONFIG_COMPAT */
12205+
12206+#endif /* __KERNEL__ */
12207+#endif /* _VX_DEBUG_CMD_H */
12208diff -NurpP --minimal linux-3.0.9/include/linux/vserver/device.h linux-3.0.9-vs2.3.2.1/include/linux/vserver/device.h
12209--- linux-3.0.9/include/linux/vserver/device.h 1970-01-01 01:00:00.000000000 +0100
12210+++ linux-3.0.9-vs2.3.2.1/include/linux/vserver/device.h 2011-06-10 22:11:24.000000000 +0200
12211@@ -0,0 +1,15 @@
12212+#ifndef _VX_DEVICE_H
12213+#define _VX_DEVICE_H
12214+
12215+
12216+#define DATTR_CREATE 0x00000001
12217+#define DATTR_OPEN 0x00000002
12218+
12219+#define DATTR_REMAP 0x00000010
12220+
12221+#define DATTR_MASK 0x00000013
12222+
12223+
12224+#else /* _VX_DEVICE_H */
12225+#warning duplicate inclusion
12226+#endif /* _VX_DEVICE_H */
12227diff -NurpP --minimal linux-3.0.9/include/linux/vserver/device_cmd.h linux-3.0.9-vs2.3.2.1/include/linux/vserver/device_cmd.h
12228--- linux-3.0.9/include/linux/vserver/device_cmd.h 1970-01-01 01:00:00.000000000 +0100
12229+++ linux-3.0.9-vs2.3.2.1/include/linux/vserver/device_cmd.h 2011-06-10 22:11:24.000000000 +0200
12230@@ -0,0 +1,44 @@
12231+#ifndef _VX_DEVICE_CMD_H
12232+#define _VX_DEVICE_CMD_H
12233+
12234+
12235+/* device vserver commands */
12236+
12237+#define VCMD_set_mapping VC_CMD(DEVICE, 1, 0)
12238+#define VCMD_unset_mapping VC_CMD(DEVICE, 2, 0)
12239+
12240+struct vcmd_set_mapping_v0 {
12241+ const char __user *device;
12242+ const char __user *target;
12243+ uint32_t flags;
12244+};
12245+
12246+
12247+#ifdef __KERNEL__
12248+
12249+#ifdef CONFIG_COMPAT
12250+
12251+#include <asm/compat.h>
12252+
12253+struct vcmd_set_mapping_v0_x32 {
12254+ compat_uptr_t device_ptr;
12255+ compat_uptr_t target_ptr;
12256+ uint32_t flags;
12257+};
12258+
12259+#endif /* CONFIG_COMPAT */
12260+
12261+#include <linux/compiler.h>
12262+
12263+extern int vc_set_mapping(struct vx_info *, void __user *);
12264+extern int vc_unset_mapping(struct vx_info *, void __user *);
12265+
12266+#ifdef CONFIG_COMPAT
12267+
12268+extern int vc_set_mapping_x32(struct vx_info *, void __user *);
12269+extern int vc_unset_mapping_x32(struct vx_info *, void __user *);
12270+
12271+#endif /* CONFIG_COMPAT */
12272+
12273+#endif /* __KERNEL__ */
12274+#endif /* _VX_DEVICE_CMD_H */
12275diff -NurpP --minimal linux-3.0.9/include/linux/vserver/device_def.h linux-3.0.9-vs2.3.2.1/include/linux/vserver/device_def.h
12276--- linux-3.0.9/include/linux/vserver/device_def.h 1970-01-01 01:00:00.000000000 +0100
12277+++ linux-3.0.9-vs2.3.2.1/include/linux/vserver/device_def.h 2011-06-10 22:11:24.000000000 +0200
12278@@ -0,0 +1,17 @@
12279+#ifndef _VX_DEVICE_DEF_H
12280+#define _VX_DEVICE_DEF_H
12281+
12282+#include <linux/types.h>
12283+
12284+struct vx_dmap_target {
12285+ dev_t target;
12286+ uint32_t flags;
12287+};
12288+
12289+struct _vx_device {
12290+#ifdef CONFIG_VSERVER_DEVICE
12291+ struct vx_dmap_target targets[2];
12292+#endif
12293+};
12294+
12295+#endif /* _VX_DEVICE_DEF_H */
12296diff -NurpP --minimal linux-3.0.9/include/linux/vserver/dlimit.h linux-3.0.9-vs2.3.2.1/include/linux/vserver/dlimit.h
12297--- linux-3.0.9/include/linux/vserver/dlimit.h 1970-01-01 01:00:00.000000000 +0100
12298+++ linux-3.0.9-vs2.3.2.1/include/linux/vserver/dlimit.h 2011-06-10 22:11:24.000000000 +0200
12299@@ -0,0 +1,54 @@
12300+#ifndef _VX_DLIMIT_H
12301+#define _VX_DLIMIT_H
12302+
12303+#include "switch.h"
12304+
12305+
12306+#ifdef __KERNEL__
12307+
12308+/* keep in sync with CDLIM_INFINITY */
12309+
12310+#define DLIM_INFINITY (~0ULL)
12311+
12312+#include <linux/spinlock.h>
12313+#include <linux/rcupdate.h>
12314+
12315+struct super_block;
12316+
12317+struct dl_info {
12318+ struct hlist_node dl_hlist; /* linked list of contexts */
12319+ struct rcu_head dl_rcu; /* the rcu head */
12320+ tag_t dl_tag; /* context tag */
12321+ atomic_t dl_usecnt; /* usage count */
12322+ atomic_t dl_refcnt; /* reference count */
12323+
12324+ struct super_block *dl_sb; /* associated superblock */
12325+
12326+ spinlock_t dl_lock; /* protect the values */
12327+
12328+ unsigned long long dl_space_used; /* used space in bytes */
12329+ unsigned long long dl_space_total; /* maximum space in bytes */
12330+ unsigned long dl_inodes_used; /* used inodes */
12331+ unsigned long dl_inodes_total; /* maximum inodes */
12332+
12333+ unsigned int dl_nrlmult; /* non root limit mult */
12334+};
12335+
12336+struct rcu_head;
12337+
12338+extern void rcu_free_dl_info(struct rcu_head *);
12339+extern void unhash_dl_info(struct dl_info *);
12340+
12341+extern struct dl_info *locate_dl_info(struct super_block *, tag_t);
12342+
12343+
12344+struct kstatfs;
12345+
12346+extern void vx_vsi_statfs(struct super_block *, struct kstatfs *);
12347+
12348+typedef uint64_t dlsize_t;
12349+
12350+#endif /* __KERNEL__ */
12351+#else /* _VX_DLIMIT_H */
12352+#warning duplicate inclusion
12353+#endif /* _VX_DLIMIT_H */
12354diff -NurpP --minimal linux-3.0.9/include/linux/vserver/dlimit_cmd.h linux-3.0.9-vs2.3.2.1/include/linux/vserver/dlimit_cmd.h
12355--- linux-3.0.9/include/linux/vserver/dlimit_cmd.h 1970-01-01 01:00:00.000000000 +0100
12356+++ linux-3.0.9-vs2.3.2.1/include/linux/vserver/dlimit_cmd.h 2011-06-10 22:11:24.000000000 +0200
12357@@ -0,0 +1,109 @@
12358+#ifndef _VX_DLIMIT_CMD_H
12359+#define _VX_DLIMIT_CMD_H
12360+
12361+
12362+/* dlimit vserver commands */
12363+
12364+#define VCMD_add_dlimit VC_CMD(DLIMIT, 1, 0)
12365+#define VCMD_rem_dlimit VC_CMD(DLIMIT, 2, 0)
12366+
12367+#define VCMD_set_dlimit VC_CMD(DLIMIT, 5, 0)
12368+#define VCMD_get_dlimit VC_CMD(DLIMIT, 6, 0)
12369+
12370+struct vcmd_ctx_dlimit_base_v0 {
12371+ const char __user *name;
12372+ uint32_t flags;
12373+};
12374+
12375+struct vcmd_ctx_dlimit_v0 {
12376+ const char __user *name;
12377+ uint32_t space_used; /* used space in kbytes */
12378+ uint32_t space_total; /* maximum space in kbytes */
12379+ uint32_t inodes_used; /* used inodes */
12380+ uint32_t inodes_total; /* maximum inodes */
12381+ uint32_t reserved; /* reserved for root in % */
12382+ uint32_t flags;
12383+};
12384+
12385+#define CDLIM_UNSET ((uint32_t)0UL)
12386+#define CDLIM_INFINITY ((uint32_t)~0UL)
12387+#define CDLIM_KEEP ((uint32_t)~1UL)
12388+
12389+#define DLIME_UNIT 0
12390+#define DLIME_KILO 1
12391+#define DLIME_MEGA 2
12392+#define DLIME_GIGA 3
12393+
12394+#define DLIMF_SHIFT 0x10
12395+
12396+#define DLIMS_USED 0
12397+#define DLIMS_TOTAL 2
12398+
12399+static inline
12400+uint64_t dlimit_space_32to64(uint32_t val, uint32_t flags, int shift)
12401+{
12402+ int exp = (flags & DLIMF_SHIFT) ?
12403+ (flags >> shift) & DLIME_GIGA : DLIME_KILO;
12404+ return ((uint64_t)val) << (10 * exp);
12405+}
12406+
12407+static inline
12408+uint32_t dlimit_space_64to32(uint64_t val, uint32_t *flags, int shift)
12409+{
12410+ int exp = 0;
12411+
12412+ if (*flags & DLIMF_SHIFT) {
12413+ while (val > (1LL << 32) && (exp < 3)) {
12414+ val >>= 10;
12415+ exp++;
12416+ }
12417+ *flags &= ~(DLIME_GIGA << shift);
12418+ *flags |= exp << shift;
12419+ } else
12420+ val >>= 10;
12421+ return val;
12422+}
12423+
12424+#ifdef __KERNEL__
12425+
12426+#ifdef CONFIG_COMPAT
12427+
12428+#include <asm/compat.h>
12429+
12430+struct vcmd_ctx_dlimit_base_v0_x32 {
12431+ compat_uptr_t name_ptr;
12432+ uint32_t flags;
12433+};
12434+
12435+struct vcmd_ctx_dlimit_v0_x32 {
12436+ compat_uptr_t name_ptr;
12437+ uint32_t space_used; /* used space in kbytes */
12438+ uint32_t space_total; /* maximum space in kbytes */
12439+ uint32_t inodes_used; /* used inodes */
12440+ uint32_t inodes_total; /* maximum inodes */
12441+ uint32_t reserved; /* reserved for root in % */
12442+ uint32_t flags;
12443+};
12444+
12445+#endif /* CONFIG_COMPAT */
12446+
12447+#include <linux/compiler.h>
12448+
12449+extern int vc_add_dlimit(uint32_t, void __user *);
12450+extern int vc_rem_dlimit(uint32_t, void __user *);
12451+
12452+extern int vc_set_dlimit(uint32_t, void __user *);
12453+extern int vc_get_dlimit(uint32_t, void __user *);
12454+
12455+#ifdef CONFIG_COMPAT
12456+
12457+extern int vc_add_dlimit_x32(uint32_t, void __user *);
12458+extern int vc_rem_dlimit_x32(uint32_t, void __user *);
12459+
12460+extern int vc_set_dlimit_x32(uint32_t, void __user *);
12461+extern int vc_get_dlimit_x32(uint32_t, void __user *);
12462+
12463+#endif /* CONFIG_COMPAT */
12464+
12465+#endif /* __KERNEL__ */
12466+#endif /* _VX_DLIMIT_CMD_H */
12467diff -NurpP --minimal linux-3.0.9/include/linux/vserver/global.h linux-3.0.9-vs2.3.2.1/include/linux/vserver/global.h
12468--- linux-3.0.9/include/linux/vserver/global.h 1970-01-01 01:00:00.000000000 +0100
12469+++ linux-3.0.9-vs2.3.2.1/include/linux/vserver/global.h 2011-06-10 22:11:24.000000000 +0200
12470@@ -0,0 +1,19 @@
12471+#ifndef _VX_GLOBAL_H
12472+#define _VX_GLOBAL_H
12473+
12474+
12475+extern atomic_t vx_global_ctotal;
12476+extern atomic_t vx_global_cactive;
12477+
12478+extern atomic_t nx_global_ctotal;
12479+extern atomic_t nx_global_cactive;
12480+
12481+extern atomic_t vs_global_nsproxy;
12482+extern atomic_t vs_global_fs;
12483+extern atomic_t vs_global_mnt_ns;
12484+extern atomic_t vs_global_uts_ns;
12485+extern atomic_t vs_global_user_ns;
12486+extern atomic_t vs_global_pid_ns;
12487+
12488+
12489+#endif /* _VX_GLOBAL_H */
12490diff -NurpP --minimal linux-3.0.9/include/linux/vserver/history.h linux-3.0.9-vs2.3.2.1/include/linux/vserver/history.h
12491--- linux-3.0.9/include/linux/vserver/history.h 1970-01-01 01:00:00.000000000 +0100
12492+++ linux-3.0.9-vs2.3.2.1/include/linux/vserver/history.h 2011-06-10 22:11:24.000000000 +0200
12493@@ -0,0 +1,197 @@
12494+#ifndef _VX_HISTORY_H
12495+#define _VX_HISTORY_H
12496+
12497+
12498+enum {
12499+ VXH_UNUSED = 0,
12500+ VXH_THROW_OOPS = 1,
12501+
12502+ VXH_GET_VX_INFO,
12503+ VXH_PUT_VX_INFO,
12504+ VXH_INIT_VX_INFO,
12505+ VXH_SET_VX_INFO,
12506+ VXH_CLR_VX_INFO,
12507+ VXH_CLAIM_VX_INFO,
12508+ VXH_RELEASE_VX_INFO,
12509+ VXH_ALLOC_VX_INFO,
12510+ VXH_DEALLOC_VX_INFO,
12511+ VXH_HASH_VX_INFO,
12512+ VXH_UNHASH_VX_INFO,
12513+ VXH_LOC_VX_INFO,
12514+ VXH_LOOKUP_VX_INFO,
12515+ VXH_CREATE_VX_INFO,
12516+};
12517+
12518+struct _vxhe_vxi {
12519+ struct vx_info *ptr;
12520+ unsigned xid;
12521+ unsigned usecnt;
12522+ unsigned tasks;
12523+};
12524+
12525+struct _vxhe_set_clr {
12526+ void *data;
12527+};
12528+
12529+struct _vxhe_loc_lookup {
12530+ unsigned arg;
12531+};
12532+
12533+struct _vx_hist_entry {
12534+ void *loc;
12535+ unsigned short seq;
12536+ unsigned short type;
12537+ struct _vxhe_vxi vxi;
12538+ union {
12539+ struct _vxhe_set_clr sc;
12540+ struct _vxhe_loc_lookup ll;
12541+ };
12542+};
12543+
12544+#ifdef CONFIG_VSERVER_HISTORY
12545+
12546+extern unsigned volatile int vxh_active;
12547+
12548+struct _vx_hist_entry *vxh_advance(void *loc);
12549+
12550+
12551+static inline
12552+void __vxh_copy_vxi(struct _vx_hist_entry *entry, struct vx_info *vxi)
12553+{
12554+ entry->vxi.ptr = vxi;
12555+ if (vxi) {
12556+ entry->vxi.usecnt = atomic_read(&vxi->vx_usecnt);
12557+ entry->vxi.tasks = atomic_read(&vxi->vx_tasks);
12558+ entry->vxi.xid = vxi->vx_id;
12559+ }
12560+}
12561+
12562+
12563+#define __HERE__ current_text_addr()
12564+
12565+#define __VXH_BODY(__type, __data, __here) \
12566+ struct _vx_hist_entry *entry; \
12567+ \
12568+ preempt_disable(); \
12569+ entry = vxh_advance(__here); \
12570+ __data; \
12571+ entry->type = __type; \
12572+ preempt_enable();
12573+
12574+
12575+ /* pass vxi only */
12576+
12577+#define __VXH_SMPL \
12578+ __vxh_copy_vxi(entry, vxi)
12579+
12580+static inline
12581+void __vxh_smpl(struct vx_info *vxi, int __type, void *__here)
12582+{
12583+ __VXH_BODY(__type, __VXH_SMPL, __here)
12584+}
12585+
12586+ /* pass vxi and data (void *) */
12587+
12588+#define __VXH_DATA \
12589+ __vxh_copy_vxi(entry, vxi); \
12590+ entry->sc.data = data
12591+
12592+static inline
12593+void __vxh_data(struct vx_info *vxi, void *data,
12594+ int __type, void *__here)
12595+{
12596+ __VXH_BODY(__type, __VXH_DATA, __here)
12597+}
12598+
12599+ /* pass vxi and arg (long) */
12600+
12601+#define __VXH_LONG \
12602+ __vxh_copy_vxi(entry, vxi); \
12603+ entry->ll.arg = arg
12604+
12605+static inline
12606+void __vxh_long(struct vx_info *vxi, long arg,
12607+ int __type, void *__here)
12608+{
12609+ __VXH_BODY(__type, __VXH_LONG, __here)
12610+}
12611+
12612+
12613+static inline
12614+void __vxh_throw_oops(void *__here)
12615+{
12616+ __VXH_BODY(VXH_THROW_OOPS, {}, __here);
12617+ /* prevent further acquisition */
12618+ vxh_active = 0;
12619+}
12620+
12621+
12622+#define vxh_throw_oops() __vxh_throw_oops(__HERE__);
12623+
12624+#define __vxh_get_vx_info(v, h) __vxh_smpl(v, VXH_GET_VX_INFO, h);
12625+#define __vxh_put_vx_info(v, h) __vxh_smpl(v, VXH_PUT_VX_INFO, h);
12626+
12627+#define __vxh_init_vx_info(v, d, h) \
12628+ __vxh_data(v, d, VXH_INIT_VX_INFO, h);
12629+#define __vxh_set_vx_info(v, d, h) \
12630+ __vxh_data(v, d, VXH_SET_VX_INFO, h);
12631+#define __vxh_clr_vx_info(v, d, h) \
12632+ __vxh_data(v, d, VXH_CLR_VX_INFO, h);
12633+
12634+#define __vxh_claim_vx_info(v, d, h) \
12635+ __vxh_data(v, d, VXH_CLAIM_VX_INFO, h);
12636+#define __vxh_release_vx_info(v, d, h) \
12637+ __vxh_data(v, d, VXH_RELEASE_VX_INFO, h);
12638+
12639+#define vxh_alloc_vx_info(v) \
12640+ __vxh_smpl(v, VXH_ALLOC_VX_INFO, __HERE__);
12641+#define vxh_dealloc_vx_info(v) \
12642+ __vxh_smpl(v, VXH_DEALLOC_VX_INFO, __HERE__);
12643+
12644+#define vxh_hash_vx_info(v) \
12645+ __vxh_smpl(v, VXH_HASH_VX_INFO, __HERE__);
12646+#define vxh_unhash_vx_info(v) \
12647+ __vxh_smpl(v, VXH_UNHASH_VX_INFO, __HERE__);
12648+
12649+#define vxh_loc_vx_info(v, l) \
12650+ __vxh_long(v, l, VXH_LOC_VX_INFO, __HERE__);
12651+#define vxh_lookup_vx_info(v, l) \
12652+ __vxh_long(v, l, VXH_LOOKUP_VX_INFO, __HERE__);
12653+#define vxh_create_vx_info(v, l) \
12654+ __vxh_long(v, l, VXH_CREATE_VX_INFO, __HERE__);
12655+
12656+extern void vxh_dump_history(void);
12657+
12658+
12659+#else /* CONFIG_VSERVER_HISTORY */
12660+
12661+#define __HERE__ 0
12662+
12663+#define vxh_throw_oops() do { } while (0)
12664+
12665+#define __vxh_get_vx_info(v, h) do { } while (0)
12666+#define __vxh_put_vx_info(v, h) do { } while (0)
12667+
12668+#define __vxh_init_vx_info(v, d, h) do { } while (0)
12669+#define __vxh_set_vx_info(v, d, h) do { } while (0)
12670+#define __vxh_clr_vx_info(v, d, h) do { } while (0)
12671+
12672+#define __vxh_claim_vx_info(v, d, h) do { } while (0)
12673+#define __vxh_release_vx_info(v, d, h) do { } while (0)
12674+
12675+#define vxh_alloc_vx_info(v) do { } while (0)
12676+#define vxh_dealloc_vx_info(v) do { } while (0)
12677+
12678+#define vxh_hash_vx_info(v) do { } while (0)
12679+#define vxh_unhash_vx_info(v) do { } while (0)
12680+
12681+#define vxh_loc_vx_info(v, l) do { } while (0)
12682+#define vxh_lookup_vx_info(v, l) do { } while (0)
12683+#define vxh_create_vx_info(v, l) do { } while (0)
12684+
12685+#define vxh_dump_history() do { } while (0)
12686+
12687+
12688+#endif /* CONFIG_VSERVER_HISTORY */
12689+
12690+#endif /* _VX_HISTORY_H */
12691diff -NurpP --minimal linux-3.0.9/include/linux/vserver/inode.h linux-3.0.9-vs2.3.2.1/include/linux/vserver/inode.h
12692--- linux-3.0.9/include/linux/vserver/inode.h 1970-01-01 01:00:00.000000000 +0100
12693+++ linux-3.0.9-vs2.3.2.1/include/linux/vserver/inode.h 2011-06-10 22:11:24.000000000 +0200
12694@@ -0,0 +1,39 @@
12695+#ifndef _VX_INODE_H
12696+#define _VX_INODE_H
12697+
12698+
12699+#define IATTR_TAG 0x01000000
12700+
12701+#define IATTR_ADMIN 0x00000001
12702+#define IATTR_WATCH 0x00000002
12703+#define IATTR_HIDE 0x00000004
12704+#define IATTR_FLAGS 0x00000007
12705+
12706+#define IATTR_BARRIER 0x00010000
12707+#define IATTR_IXUNLINK 0x00020000
12708+#define IATTR_IMMUTABLE 0x00040000
12709+#define IATTR_COW 0x00080000
12710+
12711+#ifdef __KERNEL__
12712+
12713+
12714+#ifdef CONFIG_VSERVER_PROC_SECURE
12715+#define IATTR_PROC_DEFAULT ( IATTR_ADMIN | IATTR_HIDE )
12716+#define IATTR_PROC_SYMLINK ( IATTR_ADMIN )
12717+#else
12718+#define IATTR_PROC_DEFAULT ( IATTR_ADMIN )
12719+#define IATTR_PROC_SYMLINK ( IATTR_ADMIN )
12720+#endif
12721+
12722+#define vx_hide_check(c, m) (((m) & IATTR_HIDE) ? vx_check(c, m) : 1)
12723+
12724+#endif /* __KERNEL__ */
12725+
12726+/* inode ioctls */
12727+
12728+#define FIOC_GETXFLG _IOR('x', 5, long)
12729+#define FIOC_SETXFLG _IOW('x', 6, long)
12730+
12731+#else /* _VX_INODE_H */
12732+#warning duplicate inclusion
12733+#endif /* _VX_INODE_H */
12734diff -NurpP --minimal linux-3.0.9/include/linux/vserver/inode_cmd.h linux-3.0.9-vs2.3.2.1/include/linux/vserver/inode_cmd.h
12735--- linux-3.0.9/include/linux/vserver/inode_cmd.h 1970-01-01 01:00:00.000000000 +0100
12736+++ linux-3.0.9-vs2.3.2.1/include/linux/vserver/inode_cmd.h 2011-06-10 22:11:24.000000000 +0200
12737@@ -0,0 +1,59 @@
12738+#ifndef _VX_INODE_CMD_H
12739+#define _VX_INODE_CMD_H
12740+
12741+
12742+/* inode vserver commands */
12743+
12744+#define VCMD_get_iattr VC_CMD(INODE, 1, 1)
12745+#define VCMD_set_iattr VC_CMD(INODE, 2, 1)
12746+
12747+#define VCMD_fget_iattr VC_CMD(INODE, 3, 0)
12748+#define VCMD_fset_iattr VC_CMD(INODE, 4, 0)
12749+
12750+struct vcmd_ctx_iattr_v1 {
12751+ const char __user *name;
12752+ uint32_t tag;
12753+ uint32_t flags;
12754+ uint32_t mask;
12755+};
12756+
12757+struct vcmd_ctx_fiattr_v0 {
12758+ uint32_t tag;
12759+ uint32_t flags;
12760+ uint32_t mask;
12761+};
12762+
12763+
12764+#ifdef __KERNEL__
12765+
12766+
12767+#ifdef CONFIG_COMPAT
12768+
12769+#include <asm/compat.h>
12770+
12771+struct vcmd_ctx_iattr_v1_x32 {
12772+ compat_uptr_t name_ptr;
12773+ uint32_t tag;
12774+ uint32_t flags;
12775+ uint32_t mask;
12776+};
12777+
12778+#endif /* CONFIG_COMPAT */
12779+
12780+#include <linux/compiler.h>
12781+
12782+extern int vc_get_iattr(void __user *);
12783+extern int vc_set_iattr(void __user *);
12784+
12785+extern int vc_fget_iattr(uint32_t, void __user *);
12786+extern int vc_fset_iattr(uint32_t, void __user *);
12787+
12788+#ifdef CONFIG_COMPAT
12789+
12790+extern int vc_get_iattr_x32(void __user *);
12791+extern int vc_set_iattr_x32(void __user *);
12792+
12793+#endif /* CONFIG_COMPAT */
12794+
12795+#endif /* __KERNEL__ */
12796+#endif /* _VX_INODE_CMD_H */
12797diff -NurpP --minimal linux-3.0.9/include/linux/vserver/limit.h linux-3.0.9-vs2.3.2.1/include/linux/vserver/limit.h
12798--- linux-3.0.9/include/linux/vserver/limit.h 1970-01-01 01:00:00.000000000 +0100
12799+++ linux-3.0.9-vs2.3.2.1/include/linux/vserver/limit.h 2011-06-10 22:11:24.000000000 +0200
12800@@ -0,0 +1,71 @@
12801+#ifndef _VX_LIMIT_H
12802+#define _VX_LIMIT_H
12803+
12804+#define VLIMIT_NSOCK 16
12805+#define VLIMIT_OPENFD 17
12806+#define VLIMIT_ANON 18
12807+#define VLIMIT_SHMEM 19
12808+#define VLIMIT_SEMARY 20
12809+#define VLIMIT_NSEMS 21
12810+#define VLIMIT_DENTRY 22
12811+#define VLIMIT_MAPPED 23
12812+
12813+
12814+#ifdef __KERNEL__
12815+
12816+#define VLIM_NOCHECK ((1L << VLIMIT_DENTRY) | (1L << RLIMIT_RSS))
12817+
12818+/* keep in sync with CRLIM_INFINITY */
12819+
12820+#define VLIM_INFINITY (~0ULL)
12821+
12822+#include <asm/atomic.h>
12823+#include <asm/resource.h>
12824+
12825+#ifndef RLIM_INFINITY
12826+#warning RLIM_INFINITY is undefined
12827+#endif
12828+
12829+#define __rlim_val(l, r, v) ((l)->res[r].v)
12830+
12831+#define __rlim_soft(l, r) __rlim_val(l, r, soft)
12832+#define __rlim_hard(l, r) __rlim_val(l, r, hard)
12833+
12834+#define __rlim_rcur(l, r) __rlim_val(l, r, rcur)
12835+#define __rlim_rmin(l, r) __rlim_val(l, r, rmin)
12836+#define __rlim_rmax(l, r) __rlim_val(l, r, rmax)
12837+
12838+#define __rlim_lhit(l, r) __rlim_val(l, r, lhit)
12839+#define __rlim_hit(l, r) atomic_inc(&__rlim_lhit(l, r))
12840+
12841+typedef atomic_long_t rlim_atomic_t;
12842+typedef unsigned long rlim_t;
12843+
12844+#define __rlim_get(l, r) atomic_long_read(&__rlim_rcur(l, r))
12845+#define __rlim_set(l, r, v) atomic_long_set(&__rlim_rcur(l, r), v)
12846+#define __rlim_inc(l, r) atomic_long_inc(&__rlim_rcur(l, r))
12847+#define __rlim_dec(l, r) atomic_long_dec(&__rlim_rcur(l, r))
12848+#define __rlim_add(l, r, v) atomic_long_add(v, &__rlim_rcur(l, r))
12849+#define __rlim_sub(l, r, v) atomic_long_sub(v, &__rlim_rcur(l, r))
12850+
12851+
12852+#if (RLIM_INFINITY == VLIM_INFINITY)
12853+#define VX_VLIM(r) ((long long)(long)(r))
12854+#define VX_RLIM(v) ((rlim_t)(v))
12855+#else
12856+#define VX_VLIM(r) (((r) == RLIM_INFINITY) \
12857+ ? VLIM_INFINITY : (long long)(r))
12858+#define VX_RLIM(v) (((v) == VLIM_INFINITY) \
12859+ ? RLIM_INFINITY : (rlim_t)(v))
12860+#endif
12861+
12862+struct sysinfo;
12863+
12864+void vx_vsi_meminfo(struct sysinfo *);
12865+void vx_vsi_swapinfo(struct sysinfo *);
12866+long vx_vsi_cached(struct sysinfo *);
12867+
12868+#define NUM_LIMITS 24
12869+
12870+#endif /* __KERNEL__ */
12871+#endif /* _VX_LIMIT_H */
12872diff -NurpP --minimal linux-3.0.9/include/linux/vserver/limit_cmd.h linux-3.0.9-vs2.3.2.1/include/linux/vserver/limit_cmd.h
12873--- linux-3.0.9/include/linux/vserver/limit_cmd.h 1970-01-01 01:00:00.000000000 +0100
12874+++ linux-3.0.9-vs2.3.2.1/include/linux/vserver/limit_cmd.h 2011-06-10 22:11:24.000000000 +0200
12875@@ -0,0 +1,71 @@
12876+#ifndef _VX_LIMIT_CMD_H
12877+#define _VX_LIMIT_CMD_H
12878+
12879+
12880+/* rlimit vserver commands */
12881+
12882+#define VCMD_get_rlimit VC_CMD(RLIMIT, 1, 0)
12883+#define VCMD_set_rlimit VC_CMD(RLIMIT, 2, 0)
12884+#define VCMD_get_rlimit_mask VC_CMD(RLIMIT, 3, 0)
12885+#define VCMD_reset_hits VC_CMD(RLIMIT, 7, 0)
12886+#define VCMD_reset_minmax VC_CMD(RLIMIT, 9, 0)
12887+
12888+struct vcmd_ctx_rlimit_v0 {
12889+ uint32_t id;
12890+ uint64_t minimum;
12891+ uint64_t softlimit;
12892+ uint64_t maximum;
12893+};
12894+
12895+struct vcmd_ctx_rlimit_mask_v0 {
12896+ uint32_t minimum;
12897+ uint32_t softlimit;
12898+ uint32_t maximum;
12899+};
12900+
12901+#define VCMD_rlimit_stat VC_CMD(VSTAT, 1, 0)
12902+
12903+struct vcmd_rlimit_stat_v0 {
12904+ uint32_t id;
12905+ uint32_t hits;
12906+ uint64_t value;
12907+ uint64_t minimum;
12908+ uint64_t maximum;
12909+};
12910+
12911+#define CRLIM_UNSET (0ULL)
12912+#define CRLIM_INFINITY (~0ULL)
12913+#define CRLIM_KEEP (~1ULL)
12914+
12915+#ifdef __KERNEL__
12916+
12917+#ifdef CONFIG_IA32_EMULATION
12918+
12919+struct vcmd_ctx_rlimit_v0_x32 {
12920+ uint32_t id;
12921+ uint64_t minimum;
12922+ uint64_t softlimit;
12923+ uint64_t maximum;
12924+} __attribute__ ((packed));
12925+
12926+#endif /* CONFIG_IA32_EMULATION */
12927+
12928+#include <linux/compiler.h>
12929+
12930+extern int vc_get_rlimit_mask(uint32_t, void __user *);
12931+extern int vc_get_rlimit(struct vx_info *, void __user *);
12932+extern int vc_set_rlimit(struct vx_info *, void __user *);
12933+extern int vc_reset_hits(struct vx_info *, void __user *);
12934+extern int vc_reset_minmax(struct vx_info *, void __user *);
12935+
12936+extern int vc_rlimit_stat(struct vx_info *, void __user *);
12937+
12938+#ifdef CONFIG_IA32_EMULATION
12939+
12940+extern int vc_get_rlimit_x32(struct vx_info *, void __user *);
12941+extern int vc_set_rlimit_x32(struct vx_info *, void __user *);
12942+
12943+#endif /* CONFIG_IA32_EMULATION */
12944+
12945+#endif /* __KERNEL__ */
12946+#endif /* _VX_LIMIT_CMD_H */
12947diff -NurpP --minimal linux-3.0.9/include/linux/vserver/limit_def.h linux-3.0.9-vs2.3.2.1/include/linux/vserver/limit_def.h
12948--- linux-3.0.9/include/linux/vserver/limit_def.h 1970-01-01 01:00:00.000000000 +0100
12949+++ linux-3.0.9-vs2.3.2.1/include/linux/vserver/limit_def.h 2011-06-10 22:11:24.000000000 +0200
12950@@ -0,0 +1,47 @@
12951+#ifndef _VX_LIMIT_DEF_H
12952+#define _VX_LIMIT_DEF_H
12953+
12954+#include <asm/atomic.h>
12955+#include <asm/resource.h>
12956+
12957+#include "limit.h"
12958+
12959+
12960+struct _vx_res_limit {
12961+ rlim_t soft; /* Context soft limit */
12962+ rlim_t hard; /* Context hard limit */
12963+
12964+ rlim_atomic_t rcur; /* Current value */
12965+ rlim_t rmin; /* Context minimum */
12966+ rlim_t rmax; /* Context maximum */
12967+
12968+ atomic_t lhit; /* Limit hits */
12969+};
12970+
12971+/* context sub struct */
12972+
12973+struct _vx_limit {
12974+ struct _vx_res_limit res[NUM_LIMITS];
12975+};
12976+
12977+#ifdef CONFIG_VSERVER_DEBUG
12978+
12979+static inline void __dump_vx_limit(struct _vx_limit *limit)
12980+{
12981+ int i;
12982+
12983+ printk("\t_vx_limit:");
12984+ for (i = 0; i < NUM_LIMITS; i++) {
12985+ printk("\t [%2d] = %8lu %8lu/%8lu, %8ld/%8ld, %8d\n",
12986+ i, (unsigned long)__rlim_get(limit, i),
12987+ (unsigned long)__rlim_rmin(limit, i),
12988+ (unsigned long)__rlim_rmax(limit, i),
12989+ (long)__rlim_soft(limit, i),
12990+ (long)__rlim_hard(limit, i),
12991+ atomic_read(&__rlim_lhit(limit, i)));
12992+ }
12993+}
12994+
12995+#endif
12996+
12997+#endif /* _VX_LIMIT_DEF_H */
12998diff -NurpP --minimal linux-3.0.9/include/linux/vserver/limit_int.h linux-3.0.9-vs2.3.2.1/include/linux/vserver/limit_int.h
12999--- linux-3.0.9/include/linux/vserver/limit_int.h 1970-01-01 01:00:00.000000000 +0100
13000+++ linux-3.0.9-vs2.3.2.1/include/linux/vserver/limit_int.h 2011-06-10 22:11:24.000000000 +0200
13001@@ -0,0 +1,198 @@
13002+#ifndef _VX_LIMIT_INT_H
13003+#define _VX_LIMIT_INT_H
13004+
13005+#include "context.h"
13006+
13007+#ifdef __KERNEL__
13008+
13009+#define VXD_RCRES_COND(r) VXD_CBIT(cres, r)
13010+#define VXD_RLIMIT_COND(r) VXD_CBIT(limit, r)
13011+
13012+extern const char *vlimit_name[NUM_LIMITS];
13013+
13014+static inline void __vx_acc_cres(struct vx_info *vxi,
13015+ int res, int dir, void *_data, char *_file, int _line)
13016+{
13017+ if (VXD_RCRES_COND(res))
13018+ vxlprintk(1, "vx_acc_cres[%5d,%s,%2d]: %5ld%s (%p)",
13019+ (vxi ? vxi->vx_id : -1), vlimit_name[res], res,
13020+ (vxi ? (long)__rlim_get(&vxi->limit, res) : 0),
13021+ (dir > 0) ? "++" : "--", _data, _file, _line);
13022+ if (!vxi)
13023+ return;
13024+
13025+ if (dir > 0)
13026+ __rlim_inc(&vxi->limit, res);
13027+ else
13028+ __rlim_dec(&vxi->limit, res);
13029+}
13030+
13031+static inline void __vx_add_cres(struct vx_info *vxi,
13032+ int res, int amount, void *_data, char *_file, int _line)
13033+{
13034+ if (VXD_RCRES_COND(res))
13035+ vxlprintk(1, "vx_add_cres[%5d,%s,%2d]: %5ld += %5d (%p)",
13036+ (vxi ? vxi->vx_id : -1), vlimit_name[res], res,
13037+ (vxi ? (long)__rlim_get(&vxi->limit, res) : 0),
13038+ amount, _data, _file, _line);
13039+ if (amount == 0)
13040+ return;
13041+ if (!vxi)
13042+ return;
13043+ __rlim_add(&vxi->limit, res, amount);
13044+}
13045+
13046+static inline
13047+int __vx_cres_adjust_max(struct _vx_limit *limit, int res, rlim_t value)
13048+{
13049+ int cond = (value > __rlim_rmax(limit, res));
13050+
13051+ if (cond)
13052+ __rlim_rmax(limit, res) = value;
13053+ return cond;
13054+}
13055+
13056+static inline
13057+int __vx_cres_adjust_min(struct _vx_limit *limit, int res, rlim_t value)
13058+{
13059+ int cond = (value < __rlim_rmin(limit, res));
13060+
13061+ if (cond)
13062+ __rlim_rmin(limit, res) = value;
13063+ return cond;
13064+}
13065+
13066+static inline
13067+void __vx_cres_fixup(struct _vx_limit *limit, int res, rlim_t value)
13068+{
13069+ if (!__vx_cres_adjust_max(limit, res, value))
13070+ __vx_cres_adjust_min(limit, res, value);
13071+}
13072+
13073+
13074+/* return values:
13075+ +1 ... no limit hit
13076+ -1 ... over soft limit
13077+ 0 ... over hard limit */
13078+
13079+static inline int __vx_cres_avail(struct vx_info *vxi,
13080+ int res, int num, char *_file, int _line)
13081+{
13082+ struct _vx_limit *limit;
13083+ rlim_t value;
13084+
13085+ if (VXD_RLIMIT_COND(res))
13086+ vxlprintk(1, "vx_cres_avail[%5d,%s,%2d]: %5ld/%5ld > %5ld + %5d",
13087+ (vxi ? vxi->vx_id : -1), vlimit_name[res], res,
13088+ (vxi ? (long)__rlim_soft(&vxi->limit, res) : -1),
13089+ (vxi ? (long)__rlim_hard(&vxi->limit, res) : -1),
13090+ (vxi ? (long)__rlim_get(&vxi->limit, res) : 0),
13091+ num, _file, _line);
13092+ if (!vxi)
13093+ return 1;
13094+
13095+ limit = &vxi->limit;
13096+ value = __rlim_get(limit, res);
13097+
13098+ if (!__vx_cres_adjust_max(limit, res, value))
13099+ __vx_cres_adjust_min(limit, res, value);
13100+
13101+ if (num == 0)
13102+ return 1;
13103+
13104+ if (__rlim_soft(limit, res) == RLIM_INFINITY)
13105+ return -1;
13106+ if (value + num <= __rlim_soft(limit, res))
13107+ return -1;
13108+
13109+ if (__rlim_hard(limit, res) == RLIM_INFINITY)
13110+ return 1;
13111+ if (value + num <= __rlim_hard(limit, res))
13112+ return 1;
13113+
13114+ __rlim_hit(limit, res);
13115+ return 0;
13116+}
13117+
13118+
13119+static const int VLA_RSS[] = { RLIMIT_RSS, VLIMIT_ANON, VLIMIT_MAPPED, 0 };
13120+
13121+static inline
13122+rlim_t __vx_cres_array_sum(struct _vx_limit *limit, const int *array)
13123+{
13124+ rlim_t value, sum = 0;
13125+ int res;
13126+
13127+ while ((res = *array++)) {
13128+ value = __rlim_get(limit, res);
13129+ __vx_cres_fixup(limit, res, value);
13130+ sum += value;
13131+ }
13132+ return sum;
13133+}
13134+
13135+static inline
13136+rlim_t __vx_cres_array_fixup(struct _vx_limit *limit, const int *array)
13137+{
13138+ rlim_t value = __vx_cres_array_sum(limit, array + 1);
13139+ int res = *array;
13140+
13141+ if (value == __rlim_get(limit, res))
13142+ return value;
13143+
13144+ __rlim_set(limit, res, value);
13145+ /* now adjust min/max */
13146+ if (!__vx_cres_adjust_max(limit, res, value))
13147+ __vx_cres_adjust_min(limit, res, value);
13148+
13149+ return value;
13150+}
13151+
13152+static inline int __vx_cres_array_avail(struct vx_info *vxi,
13153+ const int *array, int num, char *_file, int _line)
13154+{
13155+ struct _vx_limit *limit;
13156+ rlim_t value = 0;
13157+ int res;
13158+
13159+ if (num == 0)
13160+ return 1;
13161+ if (!vxi)
13162+ return 1;
13163+
13164+ limit = &vxi->limit;
13165+ res = *array;
13166+ value = __vx_cres_array_sum(limit, array + 1);
13167+
13168+ __rlim_set(limit, res, value);
13169+ __vx_cres_fixup(limit, res, value);
13170+
13171+ return __vx_cres_avail(vxi, res, num, _file, _line);
13172+}
13173+
13174+
13175+static inline void vx_limit_fixup(struct _vx_limit *limit, int id)
13176+{
13177+ rlim_t value;
13178+ int res;
13179+
13180+ /* complex resources first */
13181+ if ((id < 0) || (id == RLIMIT_RSS))
13182+ __vx_cres_array_fixup(limit, VLA_RSS);
13183+
13184+ for (res = 0; res < NUM_LIMITS; res++) {
13185+ if ((id > 0) && (res != id))
13186+ continue;
13187+
13188+ value = __rlim_get(limit, res);
13189+ __vx_cres_fixup(limit, res, value);
13190+
13191+ /* not supposed to happen, maybe warn? */
13192+ if (__rlim_rmax(limit, res) > __rlim_hard(limit, res))
13193+ __rlim_rmax(limit, res) = __rlim_hard(limit, res);
13194+ }
13195+}
13196+
13197+
13198+#endif /* __KERNEL__ */
13199+#endif /* _VX_LIMIT_INT_H */
13200diff -NurpP --minimal linux-3.0.9/include/linux/vserver/monitor.h linux-3.0.9-vs2.3.2.1/include/linux/vserver/monitor.h
13201--- linux-3.0.9/include/linux/vserver/monitor.h 1970-01-01 01:00:00.000000000 +0100
13202+++ linux-3.0.9-vs2.3.2.1/include/linux/vserver/monitor.h 2011-06-10 22:11:24.000000000 +0200
13203@@ -0,0 +1,96 @@
13204+#ifndef _VX_MONITOR_H
13205+#define _VX_MONITOR_H
13206+
13207+#include <linux/types.h>
13208+
13209+enum {
13210+ VXM_UNUSED = 0,
13211+
13212+ VXM_SYNC = 0x10,
13213+
13214+ VXM_UPDATE = 0x20,
13215+ VXM_UPDATE_1,
13216+ VXM_UPDATE_2,
13217+
13218+ VXM_RQINFO_1 = 0x24,
13219+ VXM_RQINFO_2,
13220+
13221+ VXM_ACTIVATE = 0x40,
13222+ VXM_DEACTIVATE,
13223+ VXM_IDLE,
13224+
13225+ VXM_HOLD = 0x44,
13226+ VXM_UNHOLD,
13227+
13228+ VXM_MIGRATE = 0x48,
13229+ VXM_RESCHED,
13230+
13231+ /* all other bits are flags */
13232+ VXM_SCHED = 0x80,
13233+};
13234+
13235+struct _vxm_update_1 {
13236+ uint32_t tokens_max;
13237+ uint32_t fill_rate;
13238+ uint32_t interval;
13239+};
13240+
13241+struct _vxm_update_2 {
13242+ uint32_t tokens_min;
13243+ uint32_t fill_rate;
13244+ uint32_t interval;
13245+};
13246+
13247+struct _vxm_rqinfo_1 {
13248+ uint16_t running;
13249+ uint16_t onhold;
13250+ uint16_t iowait;
13251+ uint16_t uintr;
13252+ uint32_t idle_tokens;
13253+};
13254+
13255+struct _vxm_rqinfo_2 {
13256+ uint32_t norm_time;
13257+ uint32_t idle_time;
13258+ uint32_t idle_skip;
13259+};
13260+
13261+struct _vxm_sched {
13262+ uint32_t tokens;
13263+ uint32_t norm_time;
13264+ uint32_t idle_time;
13265+};
13266+
13267+struct _vxm_task {
13268+ uint16_t pid;
13269+ uint16_t state;
13270+};
13271+
13272+struct _vxm_event {
13273+ uint32_t jif;
13274+ union {
13275+ uint32_t seq;
13276+ uint32_t sec;
13277+ };
13278+ union {
13279+ uint32_t tokens;
13280+ uint32_t nsec;
13281+ struct _vxm_task tsk;
13282+ };
13283+};
13284+
13285+struct _vx_mon_entry {
13286+ uint16_t type;
13287+ uint16_t xid;
13288+ union {
13289+ struct _vxm_event ev;
13290+ struct _vxm_sched sd;
13291+ struct _vxm_update_1 u1;
13292+ struct _vxm_update_2 u2;
13293+ struct _vxm_rqinfo_1 q1;
13294+ struct _vxm_rqinfo_2 q2;
13295+ };
13296+};
13297+
13298+
13299+#endif /* _VX_MONITOR_H */
13300diff -NurpP --minimal linux-3.0.9/include/linux/vserver/network.h linux-3.0.9-vs2.3.2.1/include/linux/vserver/network.h
13301--- linux-3.0.9/include/linux/vserver/network.h 1970-01-01 01:00:00.000000000 +0100
13302+++ linux-3.0.9-vs2.3.2.1/include/linux/vserver/network.h 2011-08-18 16:30:48.000000000 +0200
13303@@ -0,0 +1,148 @@
13304+#ifndef _VX_NETWORK_H
13305+#define _VX_NETWORK_H
13306+
13307+#include <linux/types.h>
13308+
13309+
13310+#define MAX_N_CONTEXT 65535 /* Arbitrary limit */
13311+
13312+
13313+/* network flags */
13314+
13315+#define NXF_INFO_PRIVATE 0x00000008
13316+
13317+#define NXF_SINGLE_IP 0x00000100
13318+#define NXF_LBACK_REMAP 0x00000200
13319+#define NXF_LBACK_ALLOW 0x00000400
13320+
13321+#define NXF_HIDE_NETIF 0x02000000
13322+#define NXF_HIDE_LBACK 0x04000000
13323+
13324+#define NXF_STATE_SETUP (1ULL << 32)
13325+#define NXF_STATE_ADMIN (1ULL << 34)
13326+
13327+#define NXF_SC_HELPER (1ULL << 36)
13328+#define NXF_PERSISTENT (1ULL << 38)
13329+
13330+#define NXF_ONE_TIME (0x0005ULL << 32)
13331+
13332+
13333+#define NXF_INIT_SET (__nxf_init_set())
13334+
13335+static inline uint64_t __nxf_init_set(void) {
13336+ return NXF_STATE_ADMIN
13337+#ifdef CONFIG_VSERVER_AUTO_LBACK
13338+ | NXF_LBACK_REMAP
13339+ | NXF_HIDE_LBACK
13340+#endif
13341+#ifdef CONFIG_VSERVER_AUTO_SINGLE
13342+ | NXF_SINGLE_IP
13343+#endif
13344+ | NXF_HIDE_NETIF;
13345+}
13346+
13347+
13348+/* network caps */
13349+
13350+#define NXC_TUN_CREATE 0x00000001
13351+
13352+#define NXC_RAW_ICMP 0x00000100
13353+
13354+#define NXC_MULTICAST 0x00001000
13355+
13356+
13357+/* address types */
13358+
13359+#define NXA_TYPE_IPV4 0x0001
13360+#define NXA_TYPE_IPV6 0x0002
13361+
13362+#define NXA_TYPE_NONE 0x0000
13363+#define NXA_TYPE_ANY 0x00FF
13364+
13365+#define NXA_TYPE_ADDR 0x0010
13366+#define NXA_TYPE_MASK 0x0020
13367+#define NXA_TYPE_RANGE 0x0040
13368+
13369+#define NXA_MASK_ALL (NXA_TYPE_ADDR | NXA_TYPE_MASK | NXA_TYPE_RANGE)
13370+
13371+#define NXA_MOD_BCAST 0x0100
13372+#define NXA_MOD_LBACK 0x0200
13373+
13374+#define NXA_LOOPBACK 0x1000
13375+
13376+#define NXA_MASK_BIND (NXA_MASK_ALL | NXA_MOD_BCAST | NXA_MOD_LBACK)
13377+#define NXA_MASK_SHOW (NXA_MASK_ALL | NXA_LOOPBACK)
13378+
13379+#ifdef __KERNEL__
13380+
13381+#include <linux/list.h>
13382+#include <linux/spinlock.h>
13383+#include <linux/rcupdate.h>
13384+#include <linux/in.h>
13385+#include <linux/in6.h>
13386+#include <asm/atomic.h>
13387+
13388+struct nx_addr_v4 {
13389+ struct nx_addr_v4 *next;
13390+ struct in_addr ip[2];
13391+ struct in_addr mask;
13392+ uint16_t type;
13393+ uint16_t flags;
13394+};
13395+
13396+struct nx_addr_v6 {
13397+ struct nx_addr_v6 *next;
13398+ struct in6_addr ip;
13399+ struct in6_addr mask;
13400+ uint32_t prefix;
13401+ uint16_t type;
13402+ uint16_t flags;
13403+};
13404+
13405+struct nx_info {
13406+ struct hlist_node nx_hlist; /* linked list of nxinfos */
13407+ nid_t nx_id; /* vnet id */
13408+ atomic_t nx_usecnt; /* usage count */
13409+ atomic_t nx_tasks; /* tasks count */
13410+ int nx_state; /* context state */
13411+
13412+ uint64_t nx_flags; /* network flag word */
13413+ uint64_t nx_ncaps; /* network capabilities */
13414+
13415+ struct in_addr v4_lback; /* Loopback address */
13416+ struct in_addr v4_bcast; /* Broadcast address */
13417+ struct nx_addr_v4 v4; /* First/Single ipv4 address */
13418+#ifdef CONFIG_IPV6
13419+ struct nx_addr_v6 v6; /* First/Single ipv6 address */
13420+#endif
13421+ char nx_name[65]; /* network context name */
13422+};
13423+
13424+
13425+/* status flags */
13426+
13427+#define NXS_HASHED 0x0001
13428+#define NXS_SHUTDOWN 0x0100
13429+#define NXS_RELEASED 0x8000
13430+
13431+extern struct nx_info *lookup_nx_info(int);
13432+
13433+extern int get_nid_list(int, unsigned int *, int);
13434+extern int nid_is_hashed(nid_t);
13435+
13436+extern int nx_migrate_task(struct task_struct *, struct nx_info *);
13437+
13438+extern long vs_net_change(struct nx_info *, unsigned int);
13439+
13440+struct sock;
13441+
13442+
13443+#define NX_IPV4(n) ((n)->v4.type != NXA_TYPE_NONE)
13444+#ifdef CONFIG_IPV6
13445+#define NX_IPV6(n) ((n)->v6.type != NXA_TYPE_NONE)
13446+#else
13447+#define NX_IPV6(n) (0)
13448+#endif
13449+
13450+#endif /* __KERNEL__ */
13451+#endif /* _VX_NETWORK_H */
13452diff -NurpP --minimal linux-3.0.9/include/linux/vserver/network_cmd.h linux-3.0.9-vs2.3.2.1/include/linux/vserver/network_cmd.h
13453--- linux-3.0.9/include/linux/vserver/network_cmd.h 1970-01-01 01:00:00.000000000 +0100
13454+++ linux-3.0.9-vs2.3.2.1/include/linux/vserver/network_cmd.h 2011-06-10 22:11:24.000000000 +0200
13455@@ -0,0 +1,164 @@
13456+#ifndef _VX_NETWORK_CMD_H
13457+#define _VX_NETWORK_CMD_H
13458+
13459+
13460+/* vinfo commands */
13461+
13462+#define VCMD_task_nid VC_CMD(VINFO, 2, 0)
13463+
13464+#ifdef __KERNEL__
13465+extern int vc_task_nid(uint32_t);
13466+
13467+#endif /* __KERNEL__ */
13468+
13469+#define VCMD_nx_info VC_CMD(VINFO, 6, 0)
13470+
13471+struct vcmd_nx_info_v0 {
13472+ uint32_t nid;
13473+ /* more to come */
13474+};
13475+
13476+#ifdef __KERNEL__
13477+extern int vc_nx_info(struct nx_info *, void __user *);
13478+
13479+#endif /* __KERNEL__ */
13480+
13481+#include <linux/in.h>
13482+#include <linux/in6.h>
13483+
13484+#define VCMD_net_create_v0 VC_CMD(VNET, 1, 0)
13485+#define VCMD_net_create VC_CMD(VNET, 1, 1)
13486+
13487+struct vcmd_net_create {
13488+ uint64_t flagword;
13489+};
13490+
13491+#define VCMD_net_migrate VC_CMD(NETMIG, 1, 0)
13492+
13493+#define VCMD_net_add VC_CMD(NETALT, 1, 0)
13494+#define VCMD_net_remove VC_CMD(NETALT, 2, 0)
13495+
13496+struct vcmd_net_addr_v0 {
13497+ uint16_t type;
13498+ uint16_t count;
13499+ struct in_addr ip[4];
13500+ struct in_addr mask[4];
13501+};
13502+
13503+#define VCMD_net_add_ipv4_v1 VC_CMD(NETALT, 1, 1)
13504+#define VCMD_net_rem_ipv4_v1 VC_CMD(NETALT, 2, 1)
13505+
13506+struct vcmd_net_addr_ipv4_v1 {
13507+ uint16_t type;
13508+ uint16_t flags;
13509+ struct in_addr ip;
13510+ struct in_addr mask;
13511+};
13512+
13513+#define VCMD_net_add_ipv4 VC_CMD(NETALT, 1, 2)
13514+#define VCMD_net_rem_ipv4 VC_CMD(NETALT, 2, 2)
13515+
13516+struct vcmd_net_addr_ipv4_v2 {
13517+ uint16_t type;
13518+ uint16_t flags;
13519+ struct in_addr ip;
13520+ struct in_addr ip2;
13521+ struct in_addr mask;
13522+};
13523+
13524+#define VCMD_net_add_ipv6 VC_CMD(NETALT, 3, 1)
13525+#define VCMD_net_remove_ipv6 VC_CMD(NETALT, 4, 1)
13526+
13527+struct vcmd_net_addr_ipv6_v1 {
13528+ uint16_t type;
13529+ uint16_t flags;
13530+ uint32_t prefix;
13531+ struct in6_addr ip;
13532+ struct in6_addr mask;
13533+};
13534+
13535+#define VCMD_add_match_ipv4 VC_CMD(NETALT, 5, 0)
13536+#define VCMD_get_match_ipv4 VC_CMD(NETALT, 6, 0)
13537+
13538+struct vcmd_match_ipv4_v0 {
13539+ uint16_t type;
13540+ uint16_t flags;
13541+ uint16_t parent;
13542+ uint16_t prefix;
13543+ struct in_addr ip;
13544+ struct in_addr ip2;
13545+ struct in_addr mask;
13546+};
13547+
13548+#define VCMD_add_match_ipv6 VC_CMD(NETALT, 7, 0)
13549+#define VCMD_get_match_ipv6 VC_CMD(NETALT, 8, 0)
13550+
13551+struct vcmd_match_ipv6_v0 {
13552+ uint16_t type;
13553+ uint16_t flags;
13554+ uint16_t parent;
13555+ uint16_t prefix;
13556+ struct in6_addr ip;
13557+ struct in6_addr ip2;
13558+ struct in6_addr mask;
13559+};
13560+
13561+
13562+#ifdef __KERNEL__
13563+extern int vc_net_create(uint32_t, void __user *);
13564+extern int vc_net_migrate(struct nx_info *, void __user *);
13565+
13566+extern int vc_net_add(struct nx_info *, void __user *);
13567+extern int vc_net_remove(struct nx_info *, void __user *);
13568+
13569+extern int vc_net_add_ipv4_v1(struct nx_info *, void __user *);
13570+extern int vc_net_add_ipv4(struct nx_info *, void __user *);
13571+
13572+extern int vc_net_rem_ipv4_v1(struct nx_info *, void __user *);
13573+extern int vc_net_rem_ipv4(struct nx_info *, void __user *);
13574+
13575+extern int vc_net_add_ipv6(struct nx_info *, void __user *);
13576+extern int vc_net_remove_ipv6(struct nx_info *, void __user *);
13577+
13578+extern int vc_add_match_ipv4(struct nx_info *, void __user *);
13579+extern int vc_get_match_ipv4(struct nx_info *, void __user *);
13580+
13581+extern int vc_add_match_ipv6(struct nx_info *, void __user *);
13582+extern int vc_get_match_ipv6(struct nx_info *, void __user *);
13583+
13584+#endif /* __KERNEL__ */
13585+
13586+
13587+/* flag commands */
13588+
13589+#define VCMD_get_nflags VC_CMD(FLAGS, 5, 0)
13590+#define VCMD_set_nflags VC_CMD(FLAGS, 6, 0)
13591+
13592+struct vcmd_net_flags_v0 {
13593+ uint64_t flagword;
13594+ uint64_t mask;
13595+};
13596+
13597+#ifdef __KERNEL__
13598+extern int vc_get_nflags(struct nx_info *, void __user *);
13599+extern int vc_set_nflags(struct nx_info *, void __user *);
13600+
13601+#endif /* __KERNEL__ */
13602+
13603+
13604+/* network caps commands */
13605+
13606+#define VCMD_get_ncaps VC_CMD(FLAGS, 7, 0)
13607+#define VCMD_set_ncaps VC_CMD(FLAGS, 8, 0)
13608+
13609+struct vcmd_net_caps_v0 {
13610+ uint64_t ncaps;
13611+ uint64_t cmask;
13612+};
13613+
13614+#ifdef __KERNEL__
13615+extern int vc_get_ncaps(struct nx_info *, void __user *);
13616+extern int vc_set_ncaps(struct nx_info *, void __user *);
13617+
13618+#endif /* __KERNEL__ */
13619+#endif /* _VX_CONTEXT_CMD_H */
13620diff -NurpP --minimal linux-3.0.9/include/linux/vserver/percpu.h linux-3.0.9-vs2.3.2.1/include/linux/vserver/percpu.h
13621--- linux-3.0.9/include/linux/vserver/percpu.h 1970-01-01 01:00:00.000000000 +0100
13622+++ linux-3.0.9-vs2.3.2.1/include/linux/vserver/percpu.h 2011-06-10 22:11:24.000000000 +0200
13623@@ -0,0 +1,14 @@
13624+#ifndef _VX_PERCPU_H
13625+#define _VX_PERCPU_H
13626+
13627+#include "cvirt_def.h"
13628+#include "sched_def.h"
13629+
13630+struct _vx_percpu {
13631+ struct _vx_cvirt_pc cvirt;
13632+ struct _vx_sched_pc sched;
13633+};
13634+
13635+#define PERCPU_PERCTX (sizeof(struct _vx_percpu))
13636+
13637+#endif /* _VX_PERCPU_H */
13638diff -NurpP --minimal linux-3.0.9/include/linux/vserver/pid.h linux-3.0.9-vs2.3.2.1/include/linux/vserver/pid.h
13639--- linux-3.0.9/include/linux/vserver/pid.h 1970-01-01 01:00:00.000000000 +0100
13640+++ linux-3.0.9-vs2.3.2.1/include/linux/vserver/pid.h 2011-06-10 22:11:24.000000000 +0200
13641@@ -0,0 +1,51 @@
13642+#ifndef _VSERVER_PID_H
13643+#define _VSERVER_PID_H
13644+
13645+/* pid faking stuff */
13646+
13647+#define vx_info_map_pid(v, p) \
13648+ __vx_info_map_pid((v), (p), __func__, __FILE__, __LINE__)
13649+#define vx_info_map_tgid(v,p) vx_info_map_pid(v,p)
13650+#define vx_map_pid(p) vx_info_map_pid(current_vx_info(), p)
13651+#define vx_map_tgid(p) vx_map_pid(p)
13652+
13653+static inline int __vx_info_map_pid(struct vx_info *vxi, int pid,
13654+ const char *func, const char *file, int line)
13655+{
13656+ if (vx_info_flags(vxi, VXF_INFO_INIT, 0)) {
13657+ vxfprintk(VXD_CBIT(cvirt, 2),
13658+ "vx_map_tgid: %p/%llx: %d -> %d",
13659+ vxi, (long long)vxi->vx_flags, pid,
13660+ (pid && pid == vxi->vx_initpid) ? 1 : pid,
13661+ func, file, line);
13662+ if (pid == 0)
13663+ return 0;
13664+ if (pid == vxi->vx_initpid)
13665+ return 1;
13666+ }
13667+ return pid;
13668+}
13669+
13670+#define vx_info_rmap_pid(v, p) \
13671+ __vx_info_rmap_pid((v), (p), __func__, __FILE__, __LINE__)
13672+#define vx_rmap_pid(p) vx_info_rmap_pid(current_vx_info(), p)
13673+#define vx_rmap_tgid(p) vx_rmap_pid(p)
13674+
13675+static inline int __vx_info_rmap_pid(struct vx_info *vxi, int pid,
13676+ const char *func, const char *file, int line)
13677+{
13678+ if (vx_info_flags(vxi, VXF_INFO_INIT, 0)) {
13679+ vxfprintk(VXD_CBIT(cvirt, 2),
13680+ "vx_rmap_tgid: %p/%llx: %d -> %d",
13681+ vxi, (long long)vxi->vx_flags, pid,
13682+ (pid == 1) ? vxi->vx_initpid : pid,
13683+ func, file, line);
13684+ if ((pid == 1) && vxi->vx_initpid)
13685+ return vxi->vx_initpid;
13686+ if (pid == vxi->vx_initpid)
13687+ return ~0U;
13688+ }
13689+ return pid;
13690+}
13691+
13692+#endif
13693diff -NurpP --minimal linux-3.0.9/include/linux/vserver/sched.h linux-3.0.9-vs2.3.2.1/include/linux/vserver/sched.h
13694--- linux-3.0.9/include/linux/vserver/sched.h 1970-01-01 01:00:00.000000000 +0100
13695+++ linux-3.0.9-vs2.3.2.1/include/linux/vserver/sched.h 2011-06-10 22:11:24.000000000 +0200
13696@@ -0,0 +1,23 @@
13697+#ifndef _VX_SCHED_H
13698+#define _VX_SCHED_H
13699+
13700+
13701+#ifdef __KERNEL__
13702+
13703+struct timespec;
13704+
13705+void vx_vsi_uptime(struct timespec *, struct timespec *);
13706+
13707+
13708+struct vx_info;
13709+
13710+void vx_update_load(struct vx_info *);
13711+
13712+
13713+void vx_update_sched_param(struct _vx_sched *sched,
13714+ struct _vx_sched_pc *sched_pc);
13715+
13716+#endif /* __KERNEL__ */
13717+#else /* _VX_SCHED_H */
13718+#warning duplicate inclusion
13719+#endif /* _VX_SCHED_H */
13720diff -NurpP --minimal linux-3.0.9/include/linux/vserver/sched_cmd.h linux-3.0.9-vs2.3.2.1/include/linux/vserver/sched_cmd.h
13721--- linux-3.0.9/include/linux/vserver/sched_cmd.h 1970-01-01 01:00:00.000000000 +0100
13722+++ linux-3.0.9-vs2.3.2.1/include/linux/vserver/sched_cmd.h 2011-06-10 22:11:24.000000000 +0200
13723@@ -0,0 +1,21 @@
13724+#ifndef _VX_SCHED_CMD_H
13725+#define _VX_SCHED_CMD_H
13726+
13727+
13728+struct vcmd_prio_bias {
13729+ int32_t cpu_id;
13730+ int32_t prio_bias;
13731+};
13732+
13733+#define VCMD_set_prio_bias VC_CMD(SCHED, 4, 0)
13734+#define VCMD_get_prio_bias VC_CMD(SCHED, 5, 0)
13735+
13736+#ifdef __KERNEL__
13737+
13738+#include <linux/compiler.h>
13739+
13740+extern int vc_set_prio_bias(struct vx_info *, void __user *);
13741+extern int vc_get_prio_bias(struct vx_info *, void __user *);
13742+
13743+#endif /* __KERNEL__ */
13744+#endif /* _VX_SCHED_CMD_H */
13745diff -NurpP --minimal linux-3.0.9/include/linux/vserver/sched_def.h linux-3.0.9-vs2.3.2.1/include/linux/vserver/sched_def.h
13746--- linux-3.0.9/include/linux/vserver/sched_def.h 1970-01-01 01:00:00.000000000 +0100
13747+++ linux-3.0.9-vs2.3.2.1/include/linux/vserver/sched_def.h 2011-06-10 22:11:24.000000000 +0200
13748@@ -0,0 +1,38 @@
13749+#ifndef _VX_SCHED_DEF_H
13750+#define _VX_SCHED_DEF_H
13751+
13752+#include <linux/spinlock.h>
13753+#include <linux/jiffies.h>
13754+#include <linux/cpumask.h>
13755+#include <asm/atomic.h>
13756+#include <asm/param.h>
13757+
13758+
13759+/* context sub struct */
13760+
13761+struct _vx_sched {
13762+ int prio_bias; /* bias offset for priority */
13763+
13764+ cpumask_t update; /* CPUs which should update */
13765+};
13766+
13767+struct _vx_sched_pc {
13768+ int prio_bias; /* bias offset for priority */
13769+
13770+ uint64_t user_ticks; /* token tick events */
13771+ uint64_t sys_ticks; /* token tick events */
13772+ uint64_t hold_ticks; /* token ticks paused */
13773+};
13774+
13775+
13776+#ifdef CONFIG_VSERVER_DEBUG
13777+
13778+static inline void __dump_vx_sched(struct _vx_sched *sched)
13779+{
13780+ printk("\t_vx_sched:\n");
13781+ printk("\t priority = %4d\n", sched->prio_bias);
13782+}
13783+
13784+#endif
13785+
13786+#endif /* _VX_SCHED_DEF_H */
13787diff -NurpP --minimal linux-3.0.9/include/linux/vserver/signal.h linux-3.0.9-vs2.3.2.1/include/linux/vserver/signal.h
13788--- linux-3.0.9/include/linux/vserver/signal.h 1970-01-01 01:00:00.000000000 +0100
13789+++ linux-3.0.9-vs2.3.2.1/include/linux/vserver/signal.h 2011-06-10 22:11:24.000000000 +0200
13790@@ -0,0 +1,14 @@
13791+#ifndef _VX_SIGNAL_H
13792+#define _VX_SIGNAL_H
13793+
13794+
13795+#ifdef __KERNEL__
13796+
13797+struct vx_info;
13798+
13799+int vx_info_kill(struct vx_info *, int, int);
13800+
13801+#endif /* __KERNEL__ */
13802+#else /* _VX_SIGNAL_H */
13803+#warning duplicate inclusion
13804+#endif /* _VX_SIGNAL_H */
13805diff -NurpP --minimal linux-3.0.9/include/linux/vserver/signal_cmd.h linux-3.0.9-vs2.3.2.1/include/linux/vserver/signal_cmd.h
13806--- linux-3.0.9/include/linux/vserver/signal_cmd.h 1970-01-01 01:00:00.000000000 +0100
13807+++ linux-3.0.9-vs2.3.2.1/include/linux/vserver/signal_cmd.h 2011-06-10 22:11:24.000000000 +0200
13808@@ -0,0 +1,43 @@
13809+#ifndef _VX_SIGNAL_CMD_H
13810+#define _VX_SIGNAL_CMD_H
13811+
13812+
13813+/* signalling vserver commands */
13814+
13815+#define VCMD_ctx_kill VC_CMD(PROCTRL, 1, 0)
13816+#define VCMD_wait_exit VC_CMD(EVENT, 99, 0)
13817+
13818+struct vcmd_ctx_kill_v0 {
13819+ int32_t pid;
13820+ int32_t sig;
13821+};
13822+
13823+struct vcmd_wait_exit_v0 {
13824+ int32_t reboot_cmd;
13825+ int32_t exit_code;
13826+};
13827+
13828+#ifdef __KERNEL__
13829+
13830+extern int vc_ctx_kill(struct vx_info *, void __user *);
13831+extern int vc_wait_exit(struct vx_info *, void __user *);
13832+
13833+#endif /* __KERNEL__ */
13834+
13835+/* process alteration commands */
13836+
13837+#define VCMD_get_pflags VC_CMD(PROCALT, 5, 0)
13838+#define VCMD_set_pflags VC_CMD(PROCALT, 6, 0)
13839+
13840+struct vcmd_pflags_v0 {
13841+ uint32_t flagword;
13842+ uint32_t mask;
13843+};
13844+
13845+#ifdef __KERNEL__
13846+
13847+extern int vc_get_pflags(uint32_t pid, void __user *);
13848+extern int vc_set_pflags(uint32_t pid, void __user *);
13849+
13850+#endif /* __KERNEL__ */
13851+#endif /* _VX_SIGNAL_CMD_H */
13852diff -NurpP --minimal linux-3.0.9/include/linux/vserver/space.h linux-3.0.9-vs2.3.2.1/include/linux/vserver/space.h
13853--- linux-3.0.9/include/linux/vserver/space.h 1970-01-01 01:00:00.000000000 +0100
13854+++ linux-3.0.9-vs2.3.2.1/include/linux/vserver/space.h 2011-06-10 22:11:24.000000000 +0200
13855@@ -0,0 +1,12 @@
13856+#ifndef _VX_SPACE_H
13857+#define _VX_SPACE_H
13858+
13859+#include <linux/types.h>
13860+
13861+struct vx_info;
13862+
13863+int vx_set_space(struct vx_info *vxi, unsigned long mask, unsigned index);
13864+
13865+#else /* _VX_SPACE_H */
13866+#warning duplicate inclusion
13867+#endif /* _VX_SPACE_H */
13868diff -NurpP --minimal linux-3.0.9/include/linux/vserver/space_cmd.h linux-3.0.9-vs2.3.2.1/include/linux/vserver/space_cmd.h
13869--- linux-3.0.9/include/linux/vserver/space_cmd.h 1970-01-01 01:00:00.000000000 +0100
13870+++ linux-3.0.9-vs2.3.2.1/include/linux/vserver/space_cmd.h 2011-06-10 22:11:24.000000000 +0200
13871@@ -0,0 +1,38 @@
13872+#ifndef _VX_SPACE_CMD_H
13873+#define _VX_SPACE_CMD_H
13874+
13875+
13876+#define VCMD_enter_space_v0 VC_CMD(PROCALT, 1, 0)
13877+#define VCMD_enter_space_v1 VC_CMD(PROCALT, 1, 1)
13878+#define VCMD_enter_space VC_CMD(PROCALT, 1, 2)
13879+
13880+#define VCMD_set_space_v0 VC_CMD(PROCALT, 3, 0)
13881+#define VCMD_set_space_v1 VC_CMD(PROCALT, 3, 1)
13882+#define VCMD_set_space VC_CMD(PROCALT, 3, 2)
13883+
13884+#define VCMD_get_space_mask_v0 VC_CMD(PROCALT, 4, 0)
13885+
13886+#define VCMD_get_space_mask VC_CMD(VSPACE, 0, 1)
13887+#define VCMD_get_space_default VC_CMD(VSPACE, 1, 0)
13888+
13889+
13890+struct vcmd_space_mask_v1 {
13891+ uint64_t mask;
13892+};
13893+
13894+struct vcmd_space_mask_v2 {
13895+ uint64_t mask;
13896+ uint32_t index;
13897+};
13898+
13899+
13900+#ifdef __KERNEL__
13901+
13902+extern int vc_enter_space_v1(struct vx_info *, void __user *);
13903+extern int vc_set_space_v1(struct vx_info *, void __user *);
13904+extern int vc_enter_space(struct vx_info *, void __user *);
13905+extern int vc_set_space(struct vx_info *, void __user *);
13906+extern int vc_get_space_mask(void __user *, int);
13907+
13908+#endif /* __KERNEL__ */
13909+#endif /* _VX_SPACE_CMD_H */
13910diff -NurpP --minimal linux-3.0.9/include/linux/vserver/switch.h linux-3.0.9-vs2.3.2.1/include/linux/vserver/switch.h
13911--- linux-3.0.9/include/linux/vserver/switch.h 1970-01-01 01:00:00.000000000 +0100
13912+++ linux-3.0.9-vs2.3.2.1/include/linux/vserver/switch.h 2011-06-10 22:11:24.000000000 +0200
13913@@ -0,0 +1,98 @@
13914+#ifndef _VX_SWITCH_H
13915+#define _VX_SWITCH_H
13916+
13917+#include <linux/types.h>
13918+
13919+
13920+#define VC_CATEGORY(c) (((c) >> 24) & 0x3F)
13921+#define VC_COMMAND(c) (((c) >> 16) & 0xFF)
13922+#define VC_VERSION(c) ((c) & 0xFFF)
13923+
13924+#define VC_CMD(c, i, v) ((((VC_CAT_ ## c) & 0x3F) << 24) \
13925+ | (((i) & 0xFF) << 16) | ((v) & 0xFFF))
13926+
13927+/*
13928+
13929+ Syscall Matrix V2.8
13930+
13931+ |VERSION|CREATE |MODIFY |MIGRATE|CONTROL|EXPERIM| |SPECIAL|SPECIAL|
13932+ |STATS |DESTROY|ALTER |CHANGE |LIMIT |TEST | | | |
13933+ |INFO |SETUP | |MOVE | | | | | |
13934+ -------+-------+-------+-------+-------+-------+-------+ +-------+-------+
13935+ SYSTEM |VERSION|VSETUP |VHOST | | | | |DEVICE | |
13936+ HOST | 00| 01| 02| 03| 04| 05| | 06| 07|
13937+ -------+-------+-------+-------+-------+-------+-------+ +-------+-------+
13938+ CPU | |VPROC |PROCALT|PROCMIG|PROCTRL| | |SCHED. | |
13939+ PROCESS| 08| 09| 10| 11| 12| 13| | 14| 15|
13940+ -------+-------+-------+-------+-------+-------+-------+ +-------+-------+
13941+ MEMORY | | | | |MEMCTRL| | |SWAP | |
13942+ | 16| 17| 18| 19| 20| 21| | 22| 23|
13943+ -------+-------+-------+-------+-------+-------+-------+ +-------+-------+
13944+ NETWORK| |VNET |NETALT |NETMIG |NETCTL | | |SERIAL | |
13945+ | 24| 25| 26| 27| 28| 29| | 30| 31|
13946+ -------+-------+-------+-------+-------+-------+-------+ +-------+-------+
13947+ DISK | | | |TAGMIG |DLIMIT | | |INODE | |
13948+ VFS | 32| 33| 34| 35| 36| 37| | 38| 39|
13949+ -------+-------+-------+-------+-------+-------+-------+ +-------+-------+
13950+ OTHER |VSTAT | | | | | | |VINFO | |
13951+ | 40| 41| 42| 43| 44| 45| | 46| 47|
13952+ =======+=======+=======+=======+=======+=======+=======+ +=======+=======+
13953+ SPECIAL|EVENT | | | |FLAGS | | |VSPACE | |
13954+ | 48| 49| 50| 51| 52| 53| | 54| 55|
13955+ -------+-------+-------+-------+-------+-------+-------+ +-------+-------+
13956+ SPECIAL|DEBUG | | | |RLIMIT |SYSCALL| | |COMPAT |
13957+ | 56| 57| 58| 59| 60|TEST 61| | 62| 63|
13958+ -------+-------+-------+-------+-------+-------+-------+ +-------+-------+
13959+
13960+*/
13961+
13962+#define VC_CAT_VERSION 0
13963+
13964+#define VC_CAT_VSETUP 1
13965+#define VC_CAT_VHOST 2
13966+
13967+#define VC_CAT_DEVICE 6
13968+
13969+#define VC_CAT_VPROC 9
13970+#define VC_CAT_PROCALT 10
13971+#define VC_CAT_PROCMIG 11
13972+#define VC_CAT_PROCTRL 12
13973+
13974+#define VC_CAT_SCHED 14
13975+#define VC_CAT_MEMCTRL 20
13976+
13977+#define VC_CAT_VNET 25
13978+#define VC_CAT_NETALT 26
13979+#define VC_CAT_NETMIG 27
13980+#define VC_CAT_NETCTRL 28
13981+
13982+#define VC_CAT_TAGMIG 35
13983+#define VC_CAT_DLIMIT 36
13984+#define VC_CAT_INODE 38
13985+
13986+#define VC_CAT_VSTAT 40
13987+#define VC_CAT_VINFO 46
13988+#define VC_CAT_EVENT 48
13989+
13990+#define VC_CAT_FLAGS 52
13991+#define VC_CAT_VSPACE 54
13992+#define VC_CAT_DEBUG 56
13993+#define VC_CAT_RLIMIT 60
13994+
13995+#define VC_CAT_SYSTEST 61
13996+#define VC_CAT_COMPAT 63
13997+
13998+/* query version */
13999+
14000+#define VCMD_get_version VC_CMD(VERSION, 0, 0)
14001+#define VCMD_get_vci VC_CMD(VERSION, 1, 0)
14002+
14003+
14004+#ifdef __KERNEL__
14005+
14006+#include <linux/errno.h>
14007+
14008+#endif /* __KERNEL__ */
14009+
14010+#endif /* _VX_SWITCH_H */
14011+
14012diff -NurpP --minimal linux-3.0.9/include/linux/vserver/tag.h linux-3.0.9-vs2.3.2.1/include/linux/vserver/tag.h
14013--- linux-3.0.9/include/linux/vserver/tag.h 1970-01-01 01:00:00.000000000 +0100
14014+++ linux-3.0.9-vs2.3.2.1/include/linux/vserver/tag.h 2011-06-10 22:11:24.000000000 +0200
14015@@ -0,0 +1,143 @@
14016+#ifndef _DX_TAG_H
14017+#define _DX_TAG_H
14018+
14019+#include <linux/types.h>
14020+
14021+
14022+#define DX_TAG(in) (IS_TAGGED(in))
14023+
14024+
14025+#ifdef CONFIG_TAG_NFSD
14026+#define DX_TAG_NFSD 1
14027+#else
14028+#define DX_TAG_NFSD 0
14029+#endif
14030+
14031+
14032+#ifdef CONFIG_TAGGING_NONE
14033+
14034+#define MAX_UID 0xFFFFFFFF
14035+#define MAX_GID 0xFFFFFFFF
14036+
14037+#define INOTAG_TAG(cond, uid, gid, tag) (0)
14038+
14039+#define TAGINO_UID(cond, uid, tag) (uid)
14040+#define TAGINO_GID(cond, gid, tag) (gid)
14041+
14042+#endif
14043+
14044+
14045+#ifdef CONFIG_TAGGING_GID16
14046+
14047+#define MAX_UID 0xFFFFFFFF
14048+#define MAX_GID 0x0000FFFF
14049+
14050+#define INOTAG_TAG(cond, uid, gid, tag) \
14051+ ((cond) ? (((gid) >> 16) & 0xFFFF) : 0)
14052+
14053+#define TAGINO_UID(cond, uid, tag) (uid)
14054+#define TAGINO_GID(cond, gid, tag) \
14055+ ((cond) ? (((gid) & 0xFFFF) | ((tag) << 16)) : (gid))
14056+
14057+#endif
14058+
14059+
14060+#ifdef CONFIG_TAGGING_ID24
14061+
14062+#define MAX_UID 0x00FFFFFF
14063+#define MAX_GID 0x00FFFFFF
14064+
14065+#define INOTAG_TAG(cond, uid, gid, tag) \
14066+ ((cond) ? ((((uid) >> 16) & 0xFF00) | (((gid) >> 24) & 0xFF)) : 0)
14067+
14068+#define TAGINO_UID(cond, uid, tag) \
14069+ ((cond) ? (((uid) & 0xFFFFFF) | (((tag) & 0xFF00) << 16)) : (uid))
14070+#define TAGINO_GID(cond, gid, tag) \
14071+ ((cond) ? (((gid) & 0xFFFFFF) | (((tag) & 0x00FF) << 24)) : (gid))
14072+
14073+#endif
14074+
14075+
14076+#ifdef CONFIG_TAGGING_UID16
14077+
14078+#define MAX_UID 0x0000FFFF
14079+#define MAX_GID 0xFFFFFFFF
14080+
14081+#define INOTAG_TAG(cond, uid, gid, tag) \
14082+ ((cond) ? (((uid) >> 16) & 0xFFFF) : 0)
14083+
14084+#define TAGINO_UID(cond, uid, tag) \
14085+ ((cond) ? (((uid) & 0xFFFF) | ((tag) << 16)) : (uid))
14086+#define TAGINO_GID(cond, gid, tag) (gid)
14087+
14088+#endif
14089+
14090+
14091+#ifdef CONFIG_TAGGING_INTERN
14092+
14093+#define MAX_UID 0xFFFFFFFF
14094+#define MAX_GID 0xFFFFFFFF
14095+
14096+#define INOTAG_TAG(cond, uid, gid, tag) \
14097+ ((cond) ? (tag) : 0)
14098+
14099+#define TAGINO_UID(cond, uid, tag) (uid)
14100+#define TAGINO_GID(cond, gid, tag) (gid)
14101+
14102+#endif
14103+
14104+
14105+#ifndef CONFIG_TAGGING_NONE
14106+#define dx_current_fstag(sb) \
14107+ ((sb)->s_flags & MS_TAGGED ? dx_current_tag() : 0)
14108+#else
14109+#define dx_current_fstag(sb) (0)
14110+#endif
14111+
14112+#ifndef CONFIG_TAGGING_INTERN
14113+#define TAGINO_TAG(cond, tag) (0)
14114+#else
14115+#define TAGINO_TAG(cond, tag) ((cond) ? (tag) : 0)
14116+#endif
14117+
14118+#define INOTAG_UID(cond, uid, gid) \
14119+ ((cond) ? ((uid) & MAX_UID) : (uid))
14120+#define INOTAG_GID(cond, uid, gid) \
14121+ ((cond) ? ((gid) & MAX_GID) : (gid))
14122+
14123+
14124+static inline uid_t dx_map_uid(uid_t uid)
14125+{
14126+ if ((uid > MAX_UID) && (uid != -1))
14127+ uid = -2;
14128+ return (uid & MAX_UID);
14129+}
14130+
14131+static inline gid_t dx_map_gid(gid_t gid)
14132+{
14133+ if ((gid > MAX_GID) && (gid != -1))
14134+ gid = -2;
14135+ return (gid & MAX_GID);
14136+}
14137+
14138+struct peer_tag {
14139+ int32_t xid;
14140+ int32_t nid;
14141+};
14142+
14143+#define dx_notagcheck(sb) ((sb) && ((sb)->s_flags & MS_NOTAGCHECK))
14144+
14145+int dx_parse_tag(char *string, tag_t *tag, int remove, int *mnt_flags,
14146+ unsigned long *flags);
14147+
14148+#ifdef CONFIG_PROPAGATE
14149+
14150+void __dx_propagate_tag(struct nameidata *nd, struct inode *inode);
14151+
14152+#define dx_propagate_tag(n, i) __dx_propagate_tag(n, i)
14153+
14154+#else
14155+#define dx_propagate_tag(n, i) do { } while (0)
14156+#endif
14157+
14158+#endif /* _DX_TAG_H */
14159diff -NurpP --minimal linux-3.0.9/include/linux/vserver/tag_cmd.h linux-3.0.9-vs2.3.2.1/include/linux/vserver/tag_cmd.h
14160--- linux-3.0.9/include/linux/vserver/tag_cmd.h 1970-01-01 01:00:00.000000000 +0100
14161+++ linux-3.0.9-vs2.3.2.1/include/linux/vserver/tag_cmd.h 2011-06-10 22:11:24.000000000 +0200
14162@@ -0,0 +1,22 @@
14163+#ifndef _VX_TAG_CMD_H
14164+#define _VX_TAG_CMD_H
14165+
14166+
14167+/* vinfo commands */
14168+
14169+#define VCMD_task_tag VC_CMD(VINFO, 3, 0)
14170+
14171+#ifdef __KERNEL__
14172+extern int vc_task_tag(uint32_t);
14173+
14174+#endif /* __KERNEL__ */
14175+
14176+/* context commands */
14177+
14178+#define VCMD_tag_migrate VC_CMD(TAGMIG, 1, 0)
14179+
14180+#ifdef __KERNEL__
14181+extern int vc_tag_migrate(uint32_t);
14182+
14183+#endif /* __KERNEL__ */
14184+#endif /* _VX_TAG_CMD_H */
14185diff -NurpP --minimal linux-3.0.9/include/net/addrconf.h linux-3.0.9-vs2.3.2.1/include/net/addrconf.h
14186--- linux-3.0.9/include/net/addrconf.h 2011-07-22 11:18:11.000000000 +0200
14187+++ linux-3.0.9-vs2.3.2.1/include/net/addrconf.h 2011-06-10 22:11:24.000000000 +0200
14188@@ -80,7 +80,8 @@ extern int ipv6_dev_get_saddr(struct n
14189 struct net_device *dev,
14190 const struct in6_addr *daddr,
14191 unsigned int srcprefs,
14192- struct in6_addr *saddr);
14193+ struct in6_addr *saddr,
14194+ struct nx_info *nxi);
14195 extern int ipv6_get_lladdr(struct net_device *dev,
14196 struct in6_addr *addr,
14197 unsigned char banned_flags);
14198diff -NurpP --minimal linux-3.0.9/include/net/af_unix.h linux-3.0.9-vs2.3.2.1/include/net/af_unix.h
14199--- linux-3.0.9/include/net/af_unix.h 2011-07-22 11:18:11.000000000 +0200
14200+++ linux-3.0.9-vs2.3.2.1/include/net/af_unix.h 2011-06-10 22:11:24.000000000 +0200
14201@@ -4,6 +4,7 @@
14202 #include <linux/socket.h>
14203 #include <linux/un.h>
14204 #include <linux/mutex.h>
14205+#include <linux/vs_base.h>
14206 #include <net/sock.h>
14207
14208 extern void unix_inflight(struct file *fp);
14209diff -NurpP --minimal linux-3.0.9/include/net/inet_timewait_sock.h linux-3.0.9-vs2.3.2.1/include/net/inet_timewait_sock.h
14210--- linux-3.0.9/include/net/inet_timewait_sock.h 2011-03-15 18:07:40.000000000 +0100
14211+++ linux-3.0.9-vs2.3.2.1/include/net/inet_timewait_sock.h 2011-06-10 22:11:24.000000000 +0200
14212@@ -113,6 +113,10 @@ struct inet_timewait_sock {
14213 #define tw_net __tw_common.skc_net
14214 #define tw_daddr __tw_common.skc_daddr
14215 #define tw_rcv_saddr __tw_common.skc_rcv_saddr
14216+#define tw_xid __tw_common.skc_xid
14217+#define tw_vx_info __tw_common.skc_vx_info
14218+#define tw_nid __tw_common.skc_nid
14219+#define tw_nx_info __tw_common.skc_nx_info
14220 int tw_timeout;
14221 volatile unsigned char tw_substate;
14222 unsigned char tw_rcv_wscale;
14223diff -NurpP --minimal linux-3.0.9/include/net/ip6_route.h linux-3.0.9-vs2.3.2.1/include/net/ip6_route.h
14224--- linux-3.0.9/include/net/ip6_route.h 2011-07-22 11:18:11.000000000 +0200
14225+++ linux-3.0.9-vs2.3.2.1/include/net/ip6_route.h 2011-06-16 14:16:51.000000000 +0200
14226@@ -86,7 +86,8 @@ extern int ip6_route_get_saddr(struct
14227 struct rt6_info *rt,
14228 const struct in6_addr *daddr,
14229 unsigned int prefs,
14230- struct in6_addr *saddr);
14231+ struct in6_addr *saddr,
14232+ struct nx_info *nxi);
14233
14234 extern struct rt6_info *rt6_lookup(struct net *net,
14235 const struct in6_addr *daddr,
14236diff -NurpP --minimal linux-3.0.9/include/net/route.h linux-3.0.9-vs2.3.2.1/include/net/route.h
14237--- linux-3.0.9/include/net/route.h 2011-07-22 11:18:11.000000000 +0200
14238+++ linux-3.0.9-vs2.3.2.1/include/net/route.h 2011-07-27 19:42:59.000000000 +0200
14239@@ -202,6 +202,9 @@ static inline void ip_rt_put(struct rtab
14240 dst_release(&rt->dst);
14241 }
14242
14243+#include <linux/vs_base.h>
14244+#include <linux/vs_inet.h>
14245+
14246 #define IPTOS_RT_MASK (IPTOS_TOS_MASK & ~3)
14247
14248 extern const __u8 ip_tos2prio[16];
14249@@ -253,6 +256,9 @@ static inline void ip_route_connect_init
14250 protocol, flow_flags, dst, src, dport, sport);
14251 }
14252
14253+extern struct rtable *ip_v4_find_src(struct net *net, struct nx_info *,
14254+ struct flowi4 *);
14255+
14256 static inline struct rtable *ip_route_connect(struct flowi4 *fl4,
14257 __be32 dst, __be32 src, u32 tos,
14258 int oif, u8 protocol,
14259@@ -261,11 +267,25 @@ static inline struct rtable *ip_route_co
14260 {
14261 struct net *net = sock_net(sk);
14262 struct rtable *rt;
14263+ struct nx_info *nx_info = current_nx_info();
14264
14265 ip_route_connect_init(fl4, dst, src, tos, oif, protocol,
14266 sport, dport, sk, can_sleep);
14267
14268- if (!dst || !src) {
14269+ if (sk)
14270+ nx_info = sk->sk_nx_info;
14271+
14272+ vxdprintk(VXD_CBIT(net, 4),
14273+ "ip_route_connect(%p) %p,%p;%lx",
14274+ sk, nx_info, sk->sk_socket,
14275+ (sk->sk_socket?sk->sk_socket->flags:0));
14276+
14277+ rt = ip_v4_find_src(net, nx_info, fl4);
14278+ if (IS_ERR(rt))
14279+ return rt;
14280+ ip_rt_put(rt);
14281+
14282+ if (!fl4->daddr || !fl4->saddr) {
14283 rt = __ip_route_output_key(net, fl4);
14284 if (IS_ERR(rt))
14285 return rt;
14286diff -NurpP --minimal linux-3.0.9/include/net/sock.h linux-3.0.9-vs2.3.2.1/include/net/sock.h
14287--- linux-3.0.9/include/net/sock.h 2011-07-22 11:18:12.000000000 +0200
14288+++ linux-3.0.9-vs2.3.2.1/include/net/sock.h 2011-07-01 11:35:35.000000000 +0200
14289@@ -149,6 +149,10 @@ struct sock_common {
14290 #ifdef CONFIG_NET_NS
14291 struct net *skc_net;
14292 #endif
14293+ xid_t skc_xid;
14294+ struct vx_info *skc_vx_info;
14295+ nid_t skc_nid;
14296+ struct nx_info *skc_nx_info;
14297 /*
14298 * fields between dontcopy_begin/dontcopy_end
14299 * are not copied in sock_copy()
14300@@ -256,6 +260,10 @@ struct sock {
14301 #define sk_bind_node __sk_common.skc_bind_node
14302 #define sk_prot __sk_common.skc_prot
14303 #define sk_net __sk_common.skc_net
14304+#define sk_xid __sk_common.skc_xid
14305+#define sk_vx_info __sk_common.skc_vx_info
14306+#define sk_nid __sk_common.skc_nid
14307+#define sk_nx_info __sk_common.skc_nx_info
14308 socket_lock_t sk_lock;
14309 struct sk_buff_head sk_receive_queue;
14310 /*
14311diff -NurpP --minimal linux-3.0.9/init/Kconfig linux-3.0.9-vs2.3.2.1/init/Kconfig
14312--- linux-3.0.9/init/Kconfig 2011-07-22 11:18:12.000000000 +0200
14313+++ linux-3.0.9-vs2.3.2.1/init/Kconfig 2011-08-08 18:08:57.000000000 +0200
14314@@ -574,6 +574,7 @@ config HAVE_UNSTABLE_SCHED_CLOCK
14315 menuconfig CGROUPS
14316 boolean "Control Group support"
14317 depends on EVENTFD
14318+ default y
14319 help
14320 This option adds support for grouping sets of processes together, for
14321 use with process control subsystems such as Cpusets, CFS, memory
14322@@ -790,6 +791,7 @@ config IPC_NS
14323 config USER_NS
14324 bool "User namespace (EXPERIMENTAL)"
14325 depends on EXPERIMENTAL
14326+ depends on VSERVER_DISABLED
14327 default y
14328 help
14329 This allows containers, i.e. vservers, to use user namespaces
14330diff -NurpP --minimal linux-3.0.9/init/main.c linux-3.0.9-vs2.3.2.1/init/main.c
14331--- linux-3.0.9/init/main.c 2011-07-22 11:18:12.000000000 +0200
14332+++ linux-3.0.9-vs2.3.2.1/init/main.c 2011-06-22 12:39:15.000000000 +0200
14333@@ -68,6 +68,7 @@
14334 #include <linux/shmem_fs.h>
14335 #include <linux/slab.h>
14336 #include <linux/perf_event.h>
14337+#include <linux/vserver/percpu.h>
14338
14339 #include <asm/io.h>
14340 #include <asm/bugs.h>
14341diff -NurpP --minimal linux-3.0.9/ipc/mqueue.c linux-3.0.9-vs2.3.2.1/ipc/mqueue.c
14342--- linux-3.0.9/ipc/mqueue.c 2011-11-15 16:40:47.000000000 +0100
14343+++ linux-3.0.9-vs2.3.2.1/ipc/mqueue.c 2011-10-18 14:03:46.000000000 +0200
14344@@ -33,6 +33,8 @@
14345 #include <linux/pid.h>
14346 #include <linux/ipc_namespace.h>
14347 #include <linux/slab.h>
14348+#include <linux/vs_context.h>
14349+#include <linux/vs_limit.h>
14350
14351 #include <net/sock.h>
14352 #include "util.h"
14353@@ -66,6 +68,7 @@ struct mqueue_inode_info {
14354 struct sigevent notify;
14355 struct pid* notify_owner;
14356 struct user_struct *user; /* user who created, for accounting */
14357+ struct vx_info *vxi;
14358 struct sock *notify_sock;
14359 struct sk_buff *notify_cookie;
14360
14361@@ -128,6 +131,7 @@ static struct inode *mqueue_get_inode(st
14362 if (S_ISREG(mode)) {
14363 struct mqueue_inode_info *info;
14364 struct task_struct *p = current;
14365+ struct vx_info *vxi = p->vx_info;
14366 unsigned long mq_bytes, mq_msg_tblsz;
14367
14368 inode->i_fop = &mqueue_file_operations;
14369@@ -141,6 +145,7 @@ static struct inode *mqueue_get_inode(st
14370 info->notify_owner = NULL;
14371 info->qsize = 0;
14372 info->user = NULL; /* set when all is ok */
14373+ info->vxi = NULL;
14374 memset(&info->attr, 0, sizeof(info->attr));
14375 info->attr.mq_maxmsg = ipc_ns->mq_msg_max;
14376 info->attr.mq_msgsize = ipc_ns->mq_msgsize_max;
14377@@ -158,17 +163,20 @@ static struct inode *mqueue_get_inode(st
14378
14379 spin_lock(&mq_lock);
14380 if (u->mq_bytes + mq_bytes < u->mq_bytes ||
14381- u->mq_bytes + mq_bytes > task_rlimit(p, RLIMIT_MSGQUEUE)) {
14382+ u->mq_bytes + mq_bytes > task_rlimit(p, RLIMIT_MSGQUEUE) ||
14383+ !vx_ipcmsg_avail(vxi, mq_bytes)) {
14384 spin_unlock(&mq_lock);
14385 /* mqueue_evict_inode() releases info->messages */
14386 ret = -EMFILE;
14387 goto out_inode;
14388 }
14389 u->mq_bytes += mq_bytes;
14390+ vx_ipcmsg_add(vxi, u, mq_bytes);
14391 spin_unlock(&mq_lock);
14392
14393 /* all is ok */
14394 info->user = get_uid(u);
14395+ info->vxi = get_vx_info(vxi);
14396 } else if (S_ISDIR(mode)) {
14397 inc_nlink(inode);
14398 /* Some things misbehave if size == 0 on a directory */
14399@@ -278,8 +286,11 @@ static void mqueue_evict_inode(struct in
14400 + info->attr.mq_msgsize);
14401 user = info->user;
14402 if (user) {
14403+ struct vx_info *vxi = info->vxi;
14404+
14405 spin_lock(&mq_lock);
14406 user->mq_bytes -= mq_bytes;
14407+ vx_ipcmsg_sub(vxi, user, mq_bytes);
14408 /*
14409 * get_ns_from_inode() ensures that the
14410 * (ipc_ns = sb->s_fs_info) is either a valid ipc_ns
14411@@ -289,6 +300,7 @@ static void mqueue_evict_inode(struct in
14412 if (ipc_ns)
14413 ipc_ns->mq_queues_count--;
14414 spin_unlock(&mq_lock);
14415+ put_vx_info(vxi);
14416 free_uid(user);
14417 }
14418 if (ipc_ns)
14419diff -NurpP --minimal linux-3.0.9/ipc/msg.c linux-3.0.9-vs2.3.2.1/ipc/msg.c
14420--- linux-3.0.9/ipc/msg.c 2011-05-22 16:17:59.000000000 +0200
14421+++ linux-3.0.9-vs2.3.2.1/ipc/msg.c 2011-06-10 22:11:24.000000000 +0200
14422@@ -37,6 +37,7 @@
14423 #include <linux/rwsem.h>
14424 #include <linux/nsproxy.h>
14425 #include <linux/ipc_namespace.h>
14426+#include <linux/vs_base.h>
14427
14428 #include <asm/current.h>
14429 #include <asm/uaccess.h>
14430@@ -190,6 +191,7 @@ static int newque(struct ipc_namespace *
14431
14432 msq->q_perm.mode = msgflg & S_IRWXUGO;
14433 msq->q_perm.key = key;
14434+ msq->q_perm.xid = vx_current_xid();
14435
14436 msq->q_perm.security = NULL;
14437 retval = security_msg_queue_alloc(msq);
14438diff -NurpP --minimal linux-3.0.9/ipc/namespace.c linux-3.0.9-vs2.3.2.1/ipc/namespace.c
14439--- linux-3.0.9/ipc/namespace.c 2011-07-22 11:18:12.000000000 +0200
14440+++ linux-3.0.9-vs2.3.2.1/ipc/namespace.c 2011-06-13 14:09:44.000000000 +0200
14441@@ -13,11 +13,12 @@
14442 #include <linux/mount.h>
14443 #include <linux/user_namespace.h>
14444 #include <linux/proc_fs.h>
14445+#include <linux/vs_base.h>
14446+#include <linux/vserver/global.h>
14447
14448 #include "util.h"
14449
14450-static struct ipc_namespace *create_ipc_ns(struct task_struct *tsk,
14451- struct ipc_namespace *old_ns)
14452+static struct ipc_namespace *create_ipc_ns(struct user_namespace *user_ns)
14453 {
14454 struct ipc_namespace *ns;
14455 int err;
14456@@ -46,19 +47,18 @@ static struct ipc_namespace *create_ipc_
14457 ipcns_notify(IPCNS_CREATED);
14458 register_ipcns_notifier(ns);
14459
14460- ns->user_ns = get_user_ns(task_cred_xxx(tsk, user)->user_ns);
14461+ ns->user_ns = get_user_ns(user_ns);
14462
14463 return ns;
14464 }
14465
14466 struct ipc_namespace *copy_ipcs(unsigned long flags,
14467- struct task_struct *tsk)
14468+ struct ipc_namespace *old_ns,
14469+ struct user_namespace *user_ns)
14470 {
14471- struct ipc_namespace *ns = tsk->nsproxy->ipc_ns;
14472-
14473 if (!(flags & CLONE_NEWIPC))
14474- return get_ipc_ns(ns);
14475- return create_ipc_ns(tsk, ns);
14476+ return get_ipc_ns(old_ns);
14477+ return create_ipc_ns(user_ns);
14478 }
14479
14480 /*
14481diff -NurpP --minimal linux-3.0.9/ipc/sem.c linux-3.0.9-vs2.3.2.1/ipc/sem.c
14482--- linux-3.0.9/ipc/sem.c 2011-11-15 16:40:47.000000000 +0100
14483+++ linux-3.0.9-vs2.3.2.1/ipc/sem.c 2011-08-08 23:04:47.000000000 +0200
14484@@ -86,6 +86,8 @@
14485 #include <linux/rwsem.h>
14486 #include <linux/nsproxy.h>
14487 #include <linux/ipc_namespace.h>
14488+#include <linux/vs_base.h>
14489+#include <linux/vs_limit.h>
14490
14491 #include <asm/uaccess.h>
14492 #include "util.h"
14493@@ -260,6 +262,7 @@ static int newary(struct ipc_namespace *
14494
14495 sma->sem_perm.mode = (semflg & S_IRWXUGO);
14496 sma->sem_perm.key = key;
14497+ sma->sem_perm.xid = vx_current_xid();
14498
14499 sma->sem_perm.security = NULL;
14500 retval = security_sem_alloc(sma);
14501@@ -275,6 +278,9 @@ static int newary(struct ipc_namespace *
14502 return id;
14503 }
14504 ns->used_sems += nsems;
14505+ /* FIXME: obsoleted? */
14506+ vx_semary_inc(sma);
14507+ vx_nsems_add(sma, nsems);
14508
14509 sma->sem_base = (struct sem *) &sma[1];
14510
14511@@ -730,6 +736,9 @@ static void freeary(struct ipc_namespace
14512
14513 wake_up_sem_queue_do(&tasks);
14514 ns->used_sems -= sma->sem_nsems;
14515+ /* FIXME: obsoleted? */
14516+ vx_nsems_sub(sma, sma->sem_nsems);
14517+ vx_semary_dec(sma);
14518 security_sem_free(sma);
14519 ipc_rcu_putref(sma);
14520 }
14521diff -NurpP --minimal linux-3.0.9/ipc/shm.c linux-3.0.9-vs2.3.2.1/ipc/shm.c
14522--- linux-3.0.9/ipc/shm.c 2011-07-22 11:18:12.000000000 +0200
14523+++ linux-3.0.9-vs2.3.2.1/ipc/shm.c 2011-06-10 22:11:24.000000000 +0200
14524@@ -39,6 +39,8 @@
14525 #include <linux/nsproxy.h>
14526 #include <linux/mount.h>
14527 #include <linux/ipc_namespace.h>
14528+#include <linux/vs_context.h>
14529+#include <linux/vs_limit.h>
14530
14531 #include <asm/uaccess.h>
14532
14533@@ -173,7 +175,12 @@ static void shm_open(struct vm_area_stru
14534 */
14535 static void shm_destroy(struct ipc_namespace *ns, struct shmid_kernel *shp)
14536 {
14537- ns->shm_tot -= (shp->shm_segsz + PAGE_SIZE - 1) >> PAGE_SHIFT;
14538+ struct vx_info *vxi = lookup_vx_info(shp->shm_perm.xid);
14539+ int numpages = (shp->shm_segsz + PAGE_SIZE - 1) >> PAGE_SHIFT;
14540+
14541+ vx_ipcshm_sub(vxi, shp, numpages);
14542+ ns->shm_tot -= numpages;
14543+
14544 shm_rmid(ns, shp);
14545 shm_unlock(shp);
14546 if (!is_file_hugepages(shp->shm_file))
14547@@ -183,6 +190,7 @@ static void shm_destroy(struct ipc_names
14548 shp->mlock_user);
14549 fput (shp->shm_file);
14550 security_shm_free(shp);
14551+ put_vx_info(vxi);
14552 ipc_rcu_putref(shp);
14553 }
14554
14555@@ -355,11 +363,15 @@ static int newseg(struct ipc_namespace *
14556 if (ns->shm_tot + numpages > ns->shm_ctlall)
14557 return -ENOSPC;
14558
14559+ if (!vx_ipcshm_avail(current_vx_info(), numpages))
14560+ return -ENOSPC;
14561+
14562 shp = ipc_rcu_alloc(sizeof(*shp));
14563 if (!shp)
14564 return -ENOMEM;
14565
14566 shp->shm_perm.key = key;
14567+ shp->shm_perm.xid = vx_current_xid();
14568 shp->shm_perm.mode = (shmflg & S_IRWXUGO);
14569 shp->mlock_user = NULL;
14570
14571@@ -413,6 +425,7 @@ static int newseg(struct ipc_namespace *
14572 ns->shm_tot += numpages;
14573 error = shp->shm_perm.id;
14574 shm_unlock(shp);
14575+ vx_ipcshm_add(current_vx_info(), key, numpages);
14576 return error;
14577
14578 no_id:
14579diff -NurpP --minimal linux-3.0.9/kernel/Makefile linux-3.0.9-vs2.3.2.1/kernel/Makefile
14580--- linux-3.0.9/kernel/Makefile 2011-07-22 11:18:12.000000000 +0200
14581+++ linux-3.0.9-vs2.3.2.1/kernel/Makefile 2011-06-10 22:11:24.000000000 +0200
14582@@ -24,6 +24,7 @@ CFLAGS_REMOVE_sched_clock.o = -pg
14583 CFLAGS_REMOVE_irq_work.o = -pg
14584 endif
14585
14586+obj-y += vserver/
14587 obj-$(CONFIG_FREEZER) += freezer.o
14588 obj-$(CONFIG_PROFILING) += profile.o
14589 obj-$(CONFIG_SYSCTL_SYSCALL_CHECK) += sysctl_check.o
14590diff -NurpP --minimal linux-3.0.9/kernel/capability.c linux-3.0.9-vs2.3.2.1/kernel/capability.c
14591--- linux-3.0.9/kernel/capability.c 2011-07-22 11:18:12.000000000 +0200
14592+++ linux-3.0.9-vs2.3.2.1/kernel/capability.c 2011-10-27 13:59:20.000000000 +0200
14593@@ -15,6 +15,7 @@
14594 #include <linux/syscalls.h>
14595 #include <linux/pid_namespace.h>
14596 #include <linux/user_namespace.h>
14597+#include <linux/vs_context.h>
14598 #include <asm/uaccess.h>
14599
14600 /*
14601@@ -116,6 +117,7 @@ static int cap_validate_magic(cap_user_h
14602 return 0;
14603 }
14604
14605+
14606 /*
14607 * The only thing that can change the capabilities of the current
14608 * process is the current process. As such, we can't be in this code
14609@@ -340,6 +342,8 @@ bool has_capability_noaudit(struct task_
14610 return (ret == 0);
14611 }
14612
14613+#include <linux/vserver/base.h>
14614+
14615 /**
14616 * capable - Determine if the current task has a superior capability in effect
14617 * @cap: The capability to be tested for
14618diff -NurpP --minimal linux-3.0.9/kernel/compat.c linux-3.0.9-vs2.3.2.1/kernel/compat.c
14619--- linux-3.0.9/kernel/compat.c 2011-07-22 11:18:12.000000000 +0200
14620+++ linux-3.0.9-vs2.3.2.1/kernel/compat.c 2011-06-10 22:11:24.000000000 +0200
14621@@ -970,7 +970,7 @@ asmlinkage long compat_sys_stime(compat_
14622 if (err)
14623 return err;
14624
14625- do_settimeofday(&tv);
14626+ vx_settimeofday(&tv);
14627 return 0;
14628 }
14629
14630diff -NurpP --minimal linux-3.0.9/kernel/cred.c linux-3.0.9-vs2.3.2.1/kernel/cred.c
14631--- linux-3.0.9/kernel/cred.c 2011-07-22 11:18:12.000000000 +0200
14632+++ linux-3.0.9-vs2.3.2.1/kernel/cred.c 2011-06-10 22:11:24.000000000 +0200
14633@@ -61,31 +61,6 @@ struct cred init_cred = {
14634 #endif
14635 };
14636
14637-static inline void set_cred_subscribers(struct cred *cred, int n)
14638-{
14639-#ifdef CONFIG_DEBUG_CREDENTIALS
14640- atomic_set(&cred->subscribers, n);
14641-#endif
14642-}
14643-
14644-static inline int read_cred_subscribers(const struct cred *cred)
14645-{
14646-#ifdef CONFIG_DEBUG_CREDENTIALS
14647- return atomic_read(&cred->subscribers);
14648-#else
14649- return 0;
14650-#endif
14651-}
14652-
14653-static inline void alter_cred_subscribers(const struct cred *_cred, int n)
14654-{
14655-#ifdef CONFIG_DEBUG_CREDENTIALS
14656- struct cred *cred = (struct cred *) _cred;
14657-
14658- atomic_add(n, &cred->subscribers);
14659-#endif
14660-}
14661-
14662 /*
14663 * Dispose of the shared task group credentials
14664 */
14665@@ -281,21 +256,16 @@ error:
14666 *
14667 * Call commit_creds() or abort_creds() to clean up.
14668 */
14669-struct cred *prepare_creds(void)
14670+struct cred *__prepare_creds(const struct cred *old)
14671 {
14672- struct task_struct *task = current;
14673- const struct cred *old;
14674 struct cred *new;
14675
14676- validate_process_creds();
14677-
14678 new = kmem_cache_alloc(cred_jar, GFP_KERNEL);
14679 if (!new)
14680 return NULL;
14681
14682 kdebug("prepare_creds() alloc %p", new);
14683
14684- old = task->cred;
14685 memcpy(new, old, sizeof(struct cred));
14686
14687 atomic_set(&new->usage, 1);
14688@@ -322,6 +292,13 @@ error:
14689 abort_creds(new);
14690 return NULL;
14691 }
14692+
14693+struct cred *prepare_creds(void)
14694+{
14695+ validate_process_creds();
14696+
14697+ return __prepare_creds(current->cred);
14698+}
14699 EXPORT_SYMBOL(prepare_creds);
14700
14701 /*
14702diff -NurpP --minimal linux-3.0.9/kernel/exit.c linux-3.0.9-vs2.3.2.1/kernel/exit.c
14703--- linux-3.0.9/kernel/exit.c 2011-07-22 11:18:12.000000000 +0200
14704+++ linux-3.0.9-vs2.3.2.1/kernel/exit.c 2011-06-22 12:39:15.000000000 +0200
14705@@ -48,6 +48,10 @@
14706 #include <linux/fs_struct.h>
14707 #include <linux/init_task.h>
14708 #include <linux/perf_event.h>
14709+#include <linux/vs_limit.h>
14710+#include <linux/vs_context.h>
14711+#include <linux/vs_network.h>
14712+#include <linux/vs_pid.h>
14713 #include <trace/events/sched.h>
14714 #include <linux/hw_breakpoint.h>
14715 #include <linux/oom.h>
14716@@ -494,9 +498,11 @@ static void close_files(struct files_str
14717 filp_close(file, files);
14718 cond_resched();
14719 }
14720+ vx_openfd_dec(i);
14721 }
14722 i++;
14723 set >>= 1;
14724+ cond_resched();
14725 }
14726 }
14727 }
14728@@ -1047,11 +1053,16 @@ NORET_TYPE void do_exit(long code)
14729
14730 validate_creds_for_do_exit(tsk);
14731
14732+ /* needs to stay after exit_notify() */
14733+ exit_vx_info(tsk, code);
14734+ exit_nx_info(tsk);
14735+
14736 preempt_disable();
14737 exit_rcu();
14738 /* causes final put_task_struct in finish_task_switch(). */
14739 tsk->state = TASK_DEAD;
14740 schedule();
14741+ printk("bad task: %p [%lx]\n", current, current->state);
14742 BUG();
14743 /* Avoid "noreturn function does return". */
14744 for (;;)
14745diff -NurpP --minimal linux-3.0.9/kernel/fork.c linux-3.0.9-vs2.3.2.1/kernel/fork.c
14746--- linux-3.0.9/kernel/fork.c 2011-07-22 11:18:12.000000000 +0200
14747+++ linux-3.0.9-vs2.3.2.1/kernel/fork.c 2011-06-10 22:21:02.000000000 +0200
14748@@ -67,6 +67,10 @@
14749 #include <linux/user-return-notifier.h>
14750 #include <linux/oom.h>
14751 #include <linux/khugepaged.h>
14752+#include <linux/vs_context.h>
14753+#include <linux/vs_network.h>
14754+#include <linux/vs_limit.h>
14755+#include <linux/vs_memory.h>
14756
14757 #include <asm/pgtable.h>
14758 #include <asm/pgalloc.h>
14759@@ -167,6 +171,8 @@ void free_task(struct task_struct *tsk)
14760 account_kernel_stack(tsk->stack, -1);
14761 free_thread_info(tsk->stack);
14762 rt_mutex_debug_task_free(tsk);
14763+ clr_vx_info(&tsk->vx_info);
14764+ clr_nx_info(&tsk->nx_info);
14765 ftrace_graph_exit_task(tsk);
14766 free_task_struct(tsk);
14767 }
14768@@ -505,6 +511,7 @@ static struct mm_struct * mm_init(struct
14769 if (likely(!mm_alloc_pgd(mm))) {
14770 mm->def_flags = 0;
14771 mmu_notifier_mm_init(mm);
14772+ set_vx_info(&mm->mm_vx_info, p->vx_info);
14773 return mm;
14774 }
14775
14776@@ -542,6 +549,7 @@ void __mmdrop(struct mm_struct *mm)
14777 #ifdef CONFIG_TRANSPARENT_HUGEPAGE
14778 VM_BUG_ON(mm->pmd_huge_pte);
14779 #endif
14780+ clr_vx_info(&mm->mm_vx_info);
14781 free_mm(mm);
14782 }
14783 EXPORT_SYMBOL_GPL(__mmdrop);
14784@@ -729,6 +737,7 @@ struct mm_struct *dup_mm(struct task_str
14785 goto fail_nomem;
14786
14787 memcpy(mm, oldmm, sizeof(*mm));
14788+ mm->mm_vx_info = NULL;
14789 mm_init_cpumask(mm);
14790
14791 /* Initializing for Swap token stuff */
14792@@ -772,6 +781,7 @@ fail_nocontext:
14793 * If init_new_context() failed, we cannot use mmput() to free the mm
14794 * because it calls destroy_context()
14795 */
14796+ clr_vx_info(&mm->mm_vx_info);
14797 mm_free_pgd(mm);
14798 free_mm(mm);
14799 return NULL;
14800@@ -1057,6 +1067,8 @@ static struct task_struct *copy_process(
14801 int retval;
14802 struct task_struct *p;
14803 int cgroup_callbacks_done = 0;
14804+ struct vx_info *vxi;
14805+ struct nx_info *nxi;
14806
14807 if ((clone_flags & (CLONE_NEWNS|CLONE_FS)) == (CLONE_NEWNS|CLONE_FS))
14808 return ERR_PTR(-EINVAL);
14809@@ -1103,7 +1115,12 @@ static struct task_struct *copy_process(
14810 DEBUG_LOCKS_WARN_ON(!p->hardirqs_enabled);
14811 DEBUG_LOCKS_WARN_ON(!p->softirqs_enabled);
14812 #endif
14813+ init_vx_info(&p->vx_info, current_vx_info());
14814+ init_nx_info(&p->nx_info, current_nx_info());
14815+
14816 retval = -EAGAIN;
14817+ if (!vx_nproc_avail(1))
14818+ goto bad_fork_free;
14819 if (atomic_read(&p->real_cred->user->processes) >=
14820 task_rlimit(p, RLIMIT_NPROC)) {
14821 if (!capable(CAP_SYS_ADMIN) && !capable(CAP_SYS_RESOURCE) &&
14822@@ -1360,6 +1377,18 @@ static struct task_struct *copy_process(
14823
14824 total_forks++;
14825 spin_unlock(&current->sighand->siglock);
14826+
14827+ /* p is copy of current */
14828+ vxi = p->vx_info;
14829+ if (vxi) {
14830+ claim_vx_info(vxi, p);
14831+ atomic_inc(&vxi->cvirt.nr_threads);
14832+ atomic_inc(&vxi->cvirt.total_forks);
14833+ vx_nproc_inc(p);
14834+ }
14835+ nxi = p->nx_info;
14836+ if (nxi)
14837+ claim_nx_info(nxi, p);
14838 write_unlock_irq(&tasklist_lock);
14839 proc_fork_connector(p);
14840 cgroup_post_fork(p);
14841diff -NurpP --minimal linux-3.0.9/kernel/kthread.c linux-3.0.9-vs2.3.2.1/kernel/kthread.c
14842--- linux-3.0.9/kernel/kthread.c 2011-07-22 11:18:12.000000000 +0200
14843+++ linux-3.0.9-vs2.3.2.1/kernel/kthread.c 2011-06-10 22:11:24.000000000 +0200
14844@@ -16,6 +16,7 @@
14845 #include <linux/mutex.h>
14846 #include <linux/slab.h>
14847 #include <linux/freezer.h>
14848+#include <linux/vs_pid.h>
14849 #include <trace/events/sched.h>
14850
14851 static DEFINE_SPINLOCK(kthread_create_lock);
14852diff -NurpP --minimal linux-3.0.9/kernel/nsproxy.c linux-3.0.9-vs2.3.2.1/kernel/nsproxy.c
14853--- linux-3.0.9/kernel/nsproxy.c 2011-07-22 11:18:12.000000000 +0200
14854+++ linux-3.0.9-vs2.3.2.1/kernel/nsproxy.c 2011-06-13 14:09:44.000000000 +0200
14855@@ -20,6 +20,8 @@
14856 #include <linux/mnt_namespace.h>
14857 #include <linux/utsname.h>
14858 #include <linux/pid_namespace.h>
14859+#include <linux/vserver/global.h>
14860+#include <linux/vserver/debug.h>
14861 #include <net/net_namespace.h>
14862 #include <linux/ipc_namespace.h>
14863 #include <linux/proc_fs.h>
14864@@ -46,8 +48,11 @@ static inline struct nsproxy *create_nsp
14865 struct nsproxy *nsproxy;
14866
14867 nsproxy = kmem_cache_alloc(nsproxy_cachep, GFP_KERNEL);
14868- if (nsproxy)
14869+ if (nsproxy) {
14870 atomic_set(&nsproxy->count, 1);
14871+ atomic_inc(&vs_global_nsproxy);
14872+ }
14873+ vxdprintk(VXD_CBIT(space, 2), "create_nsproxy = %p[1]", nsproxy);
14874 return nsproxy;
14875 }
14876
14877@@ -56,8 +61,11 @@ static inline struct nsproxy *create_nsp
14878 * Return the newly created nsproxy. Do not attach this to the task,
14879 * leave it to the caller to do proper locking and attach it to task.
14880 */
14881-static struct nsproxy *create_new_namespaces(unsigned long flags,
14882- struct task_struct *tsk, struct fs_struct *new_fs)
14883+static struct nsproxy *unshare_namespaces(unsigned long flags,
14884+ struct nsproxy *orig,
14885+ struct fs_struct *new_fs,
14886+ struct user_namespace *new_user,
14887+ struct pid_namespace *new_pid)
14888 {
14889 struct nsproxy *new_nsp;
14890 int err;
14891@@ -66,31 +74,31 @@ static struct nsproxy *create_new_namesp
14892 if (!new_nsp)
14893 return ERR_PTR(-ENOMEM);
14894
14895- new_nsp->mnt_ns = copy_mnt_ns(flags, tsk->nsproxy->mnt_ns, new_fs);
14896+ new_nsp->mnt_ns = copy_mnt_ns(flags, orig->mnt_ns, new_fs);
14897 if (IS_ERR(new_nsp->mnt_ns)) {
14898 err = PTR_ERR(new_nsp->mnt_ns);
14899 goto out_ns;
14900 }
14901
14902- new_nsp->uts_ns = copy_utsname(flags, tsk);
14903+ new_nsp->uts_ns = copy_utsname(flags, orig->uts_ns, new_user);
14904 if (IS_ERR(new_nsp->uts_ns)) {
14905 err = PTR_ERR(new_nsp->uts_ns);
14906 goto out_uts;
14907 }
14908
14909- new_nsp->ipc_ns = copy_ipcs(flags, tsk);
14910+ new_nsp->ipc_ns = copy_ipcs(flags, orig->ipc_ns, new_user);
14911 if (IS_ERR(new_nsp->ipc_ns)) {
14912 err = PTR_ERR(new_nsp->ipc_ns);
14913 goto out_ipc;
14914 }
14915
14916- new_nsp->pid_ns = copy_pid_ns(flags, task_active_pid_ns(tsk));
14917+ new_nsp->pid_ns = copy_pid_ns(flags, new_pid);
14918 if (IS_ERR(new_nsp->pid_ns)) {
14919 err = PTR_ERR(new_nsp->pid_ns);
14920 goto out_pid;
14921 }
14922
14923- new_nsp->net_ns = copy_net_ns(flags, tsk->nsproxy->net_ns);
14924+ new_nsp->net_ns = copy_net_ns(flags, orig->net_ns);
14925 if (IS_ERR(new_nsp->net_ns)) {
14926 err = PTR_ERR(new_nsp->net_ns);
14927 goto out_net;
14928@@ -115,6 +123,40 @@ out_ns:
14929 return ERR_PTR(err);
14930 }
14931
14932+static struct nsproxy *create_new_namespaces(unsigned long flags,
14933+ struct task_struct *tsk, struct fs_struct *new_fs)
14934+{
14935+ return unshare_namespaces(flags, tsk->nsproxy,
14936+ new_fs, task_cred_xxx(tsk, user)->user_ns,
14937+ task_active_pid_ns(tsk));
14938+}
14939+
14940+/*
14941+ * copies the nsproxy, setting refcount to 1, and grabbing a
14942+ * reference to all contained namespaces.
14943+ */
14944+struct nsproxy *copy_nsproxy(struct nsproxy *orig)
14945+{
14946+ struct nsproxy *ns = create_nsproxy();
14947+
14948+ if (ns) {
14949+ memcpy(ns, orig, sizeof(struct nsproxy));
14950+ atomic_set(&ns->count, 1);
14951+
14952+ if (ns->mnt_ns)
14953+ get_mnt_ns(ns->mnt_ns);
14954+ if (ns->uts_ns)
14955+ get_uts_ns(ns->uts_ns);
14956+ if (ns->ipc_ns)
14957+ get_ipc_ns(ns->ipc_ns);
14958+ if (ns->pid_ns)
14959+ get_pid_ns(ns->pid_ns);
14960+ if (ns->net_ns)
14961+ get_net(ns->net_ns);
14962+ }
14963+ return ns;
14964+}
14965+
14966 /*
14967 * called from clone. This now handles copy for nsproxy and all
14968 * namespaces therein.
14969@@ -122,9 +164,12 @@ out_ns:
14970 int copy_namespaces(unsigned long flags, struct task_struct *tsk)
14971 {
14972 struct nsproxy *old_ns = tsk->nsproxy;
14973- struct nsproxy *new_ns;
14974+ struct nsproxy *new_ns = NULL;
14975 int err = 0;
14976
14977+ vxdprintk(VXD_CBIT(space, 7), "copy_namespaces(0x%08lx,%p[%p])",
14978+ flags, tsk, old_ns);
14979+
14980 if (!old_ns)
14981 return 0;
14982
14983@@ -134,7 +179,7 @@ int copy_namespaces(unsigned long flags,
14984 CLONE_NEWPID | CLONE_NEWNET)))
14985 return 0;
14986
14987- if (!capable(CAP_SYS_ADMIN)) {
14988+ if (!vx_can_unshare(CAP_SYS_ADMIN, flags)) {
14989 err = -EPERM;
14990 goto out;
14991 }
14992@@ -161,6 +206,9 @@ int copy_namespaces(unsigned long flags,
14993
14994 out:
14995 put_nsproxy(old_ns);
14996+ vxdprintk(VXD_CBIT(space, 3),
14997+ "copy_namespaces(0x%08lx,%p[%p]) = %d [%p]",
14998+ flags, tsk, old_ns, err, new_ns);
14999 return err;
15000 }
15001
15002@@ -174,7 +222,9 @@ void free_nsproxy(struct nsproxy *ns)
15003 put_ipc_ns(ns->ipc_ns);
15004 if (ns->pid_ns)
15005 put_pid_ns(ns->pid_ns);
15006- put_net(ns->net_ns);
15007+ if (ns->net_ns)
15008+ put_net(ns->net_ns);
15009+ atomic_dec(&vs_global_nsproxy);
15010 kmem_cache_free(nsproxy_cachep, ns);
15011 }
15012
15013@@ -187,11 +237,15 @@ int unshare_nsproxy_namespaces(unsigned
15014 {
15015 int err = 0;
15016
15017+ vxdprintk(VXD_CBIT(space, 4),
15018+ "unshare_nsproxy_namespaces(0x%08lx,[%p])",
15019+ unshare_flags, current->nsproxy);
15020+
15021 if (!(unshare_flags & (CLONE_NEWNS | CLONE_NEWUTS | CLONE_NEWIPC |
15022 CLONE_NEWNET)))
15023 return 0;
15024
15025- if (!capable(CAP_SYS_ADMIN))
15026+ if (!vx_can_unshare(CAP_SYS_ADMIN, unshare_flags))
15027 return -EPERM;
15028
15029 *new_nsp = create_new_namespaces(unshare_flags, current,
15030diff -NurpP --minimal linux-3.0.9/kernel/pid.c linux-3.0.9-vs2.3.2.1/kernel/pid.c
15031--- linux-3.0.9/kernel/pid.c 2011-05-22 16:17:59.000000000 +0200
15032+++ linux-3.0.9-vs2.3.2.1/kernel/pid.c 2011-06-10 22:11:24.000000000 +0200
15033@@ -36,6 +36,7 @@
15034 #include <linux/pid_namespace.h>
15035 #include <linux/init_task.h>
15036 #include <linux/syscalls.h>
15037+#include <linux/vs_pid.h>
15038
15039 #define pid_hashfn(nr, ns) \
15040 hash_long((unsigned long)nr + (unsigned long)ns, pidhash_shift)
15041@@ -342,7 +343,7 @@ EXPORT_SYMBOL_GPL(find_pid_ns);
15042
15043 struct pid *find_vpid(int nr)
15044 {
15045- return find_pid_ns(nr, current->nsproxy->pid_ns);
15046+ return find_pid_ns(vx_rmap_pid(nr), current->nsproxy->pid_ns);
15047 }
15048 EXPORT_SYMBOL_GPL(find_vpid);
15049
15050@@ -402,6 +403,9 @@ void transfer_pid(struct task_struct *ol
15051 struct task_struct *pid_task(struct pid *pid, enum pid_type type)
15052 {
15053 struct task_struct *result = NULL;
15054+
15055+ if (type == PIDTYPE_REALPID)
15056+ type = PIDTYPE_PID;
15057 if (pid) {
15058 struct hlist_node *first;
15059 first = rcu_dereference_check(hlist_first_rcu(&pid->tasks[type]),
15060@@ -420,7 +424,7 @@ EXPORT_SYMBOL(pid_task);
15061 struct task_struct *find_task_by_pid_ns(pid_t nr, struct pid_namespace *ns)
15062 {
15063 rcu_lockdep_assert(rcu_read_lock_held());
15064- return pid_task(find_pid_ns(nr, ns), PIDTYPE_PID);
15065+ return pid_task(find_pid_ns(vx_rmap_pid(nr), ns), PIDTYPE_PID);
15066 }
15067
15068 struct task_struct *find_task_by_vpid(pid_t vnr)
15069@@ -464,7 +468,7 @@ struct pid *find_get_pid(pid_t nr)
15070 }
15071 EXPORT_SYMBOL_GPL(find_get_pid);
15072
15073-pid_t pid_nr_ns(struct pid *pid, struct pid_namespace *ns)
15074+pid_t pid_unmapped_nr_ns(struct pid *pid, struct pid_namespace *ns)
15075 {
15076 struct upid *upid;
15077 pid_t nr = 0;
15078@@ -477,6 +481,11 @@ pid_t pid_nr_ns(struct pid *pid, struct
15079 return nr;
15080 }
15081
15082+pid_t pid_nr_ns(struct pid *pid, struct pid_namespace *ns)
15083+{
15084+ return vx_map_pid(pid_unmapped_nr_ns(pid, ns));
15085+}
15086+
15087 pid_t pid_vnr(struct pid *pid)
15088 {
15089 return pid_nr_ns(pid, current->nsproxy->pid_ns);
15090diff -NurpP --minimal linux-3.0.9/kernel/pid_namespace.c linux-3.0.9-vs2.3.2.1/kernel/pid_namespace.c
15091--- linux-3.0.9/kernel/pid_namespace.c 2011-05-22 16:17:59.000000000 +0200
15092+++ linux-3.0.9-vs2.3.2.1/kernel/pid_namespace.c 2011-06-10 22:17:45.000000000 +0200
15093@@ -15,6 +15,7 @@
15094 #include <linux/acct.h>
15095 #include <linux/slab.h>
15096 #include <linux/proc_fs.h>
15097+#include <linux/vserver/global.h>
15098
15099 #define BITS_PER_PAGE (PAGE_SIZE*8)
15100
15101@@ -88,6 +89,7 @@ static struct pid_namespace *create_pid_
15102 goto out_free_map;
15103
15104 kref_init(&ns->kref);
15105+ atomic_inc(&vs_global_pid_ns);
15106 ns->level = level;
15107 ns->parent = get_pid_ns(parent_pid_ns);
15108
15109@@ -119,6 +121,7 @@ static void destroy_pid_namespace(struct
15110
15111 for (i = 0; i < PIDMAP_ENTRIES; i++)
15112 kfree(ns->pidmap[i].page);
15113+ atomic_dec(&vs_global_pid_ns);
15114 kmem_cache_free(pid_ns_cachep, ns);
15115 }
15116
15117diff -NurpP --minimal linux-3.0.9/kernel/posix-timers.c linux-3.0.9-vs2.3.2.1/kernel/posix-timers.c
15118--- linux-3.0.9/kernel/posix-timers.c 2011-07-22 11:18:12.000000000 +0200
15119+++ linux-3.0.9-vs2.3.2.1/kernel/posix-timers.c 2011-06-10 22:11:24.000000000 +0200
15120@@ -47,6 +47,7 @@
15121 #include <linux/wait.h>
15122 #include <linux/workqueue.h>
15123 #include <linux/module.h>
15124+#include <linux/vs_context.h>
15125
15126 /*
15127 * Management arrays for POSIX timers. Timers are kept in slab memory
15128@@ -340,6 +341,7 @@ int posix_timer_event(struct k_itimer *t
15129 {
15130 struct task_struct *task;
15131 int shared, ret = -1;
15132+
15133 /*
15134 * FIXME: if ->sigq is queued we can race with
15135 * dequeue_signal()->do_schedule_next_timer().
15136@@ -356,10 +358,18 @@ int posix_timer_event(struct k_itimer *t
15137 rcu_read_lock();
15138 task = pid_task(timr->it_pid, PIDTYPE_PID);
15139 if (task) {
15140+ struct vx_info_save vxis;
15141+ struct vx_info *vxi;
15142+
15143+ vxi = get_vx_info(task->vx_info);
15144+ enter_vx_info(vxi, &vxis);
15145 shared = !(timr->it_sigev_notify & SIGEV_THREAD_ID);
15146 ret = send_sigqueue(timr->sigq, task, shared);
15147+ leave_vx_info(&vxis);
15148+ put_vx_info(vxi);
15149 }
15150 rcu_read_unlock();
15151+
15152 /* If we failed to send the signal the timer stops. */
15153 return ret > 0;
15154 }
15155diff -NurpP --minimal linux-3.0.9/kernel/printk.c linux-3.0.9-vs2.3.2.1/kernel/printk.c
15156--- linux-3.0.9/kernel/printk.c 2011-11-15 16:40:47.000000000 +0100
15157+++ linux-3.0.9-vs2.3.2.1/kernel/printk.c 2011-10-18 13:51:13.000000000 +0200
15158@@ -41,6 +41,7 @@
15159 #include <linux/cpu.h>
15160 #include <linux/notifier.h>
15161 #include <linux/rculist.h>
15162+#include <linux/vs_cvirt.h>
15163
15164 #include <asm/uaccess.h>
15165
15166@@ -314,7 +315,7 @@ static int check_syslog_permissions(int
15167 return 0;
15168
15169 if (syslog_action_restricted(type)) {
15170- if (capable(CAP_SYSLOG))
15171+ if (vx_capable(CAP_SYSLOG, VXC_SYSLOG))
15172 return 0;
15173 /* For historical reasons, accept CAP_SYS_ADMIN too, with a warning */
15174 if (capable(CAP_SYS_ADMIN)) {
15175@@ -342,12 +343,9 @@ int do_syslog(int type, char __user *buf
15176 if (error)
15177 return error;
15178
15179- switch (type) {
15180- case SYSLOG_ACTION_CLOSE: /* Close log */
15181- break;
15182- case SYSLOG_ACTION_OPEN: /* Open log */
15183- break;
15184- case SYSLOG_ACTION_READ: /* Read from log */
15185+ if ((type == SYSLOG_ACTION_READ) ||
15186+ (type == SYSLOG_ACTION_READ_ALL) ||
15187+ (type == SYSLOG_ACTION_READ_CLEAR)) {
15188 error = -EINVAL;
15189 if (!buf || len < 0)
15190 goto out;
15191@@ -358,6 +356,16 @@ int do_syslog(int type, char __user *buf
15192 error = -EFAULT;
15193 goto out;
15194 }
15195+ }
15196+ if (!vx_check(0, VS_ADMIN|VS_WATCH))
15197+ return vx_do_syslog(type, buf, len);
15198+
15199+ switch (type) {
15200+ case SYSLOG_ACTION_CLOSE: /* Close log */
15201+ break;
15202+ case SYSLOG_ACTION_OPEN: /* Open log */
15203+ break;
15204+ case SYSLOG_ACTION_READ: /* Read from log */
15205 error = wait_event_interruptible(log_wait,
15206 (log_start - log_end));
15207 if (error)
15208@@ -384,16 +392,6 @@ int do_syslog(int type, char __user *buf
15209 /* FALL THRU */
15210 /* Read last kernel messages */
15211 case SYSLOG_ACTION_READ_ALL:
15212- error = -EINVAL;
15213- if (!buf || len < 0)
15214- goto out;
15215- error = 0;
15216- if (!len)
15217- goto out;
15218- if (!access_ok(VERIFY_WRITE, buf, len)) {
15219- error = -EFAULT;
15220- goto out;
15221- }
15222 count = len;
15223 if (count > log_buf_len)
15224 count = log_buf_len;
15225diff -NurpP --minimal linux-3.0.9/kernel/ptrace.c linux-3.0.9-vs2.3.2.1/kernel/ptrace.c
15226--- linux-3.0.9/kernel/ptrace.c 2011-07-22 11:18:12.000000000 +0200
15227+++ linux-3.0.9-vs2.3.2.1/kernel/ptrace.c 2011-10-27 16:12:46.000000000 +0200
15228@@ -22,6 +22,7 @@
15229 #include <linux/syscalls.h>
15230 #include <linux/uaccess.h>
15231 #include <linux/regset.h>
15232+#include <linux/vs_context.h>
15233 #include <linux/hw_breakpoint.h>
15234
15235
15236@@ -145,9 +146,15 @@ int __ptrace_may_access(struct task_stru
15237 * or halting the specified task is impossible.
15238 */
15239 int dumpable = 0;
15240+
15241 /* Don't let security modules deny introspection */
15242 if (task == current)
15243 return 0;
15244+
15245+ vxdprintk(VXD_CBIT(perm, 8),
15246+ "__ptrace_may_access(%p[#%d,%d,%d], %d)",
15247+ task, task->xid, task->pid, task->tgid, mode);
15248+
15249 rcu_read_lock();
15250 tcred = __task_cred(task);
15251 if (cred->user->user_ns == tcred->user->user_ns &&
15252@@ -161,6 +168,8 @@ int __ptrace_may_access(struct task_stru
15253 if (ns_capable(tcred->user->user_ns, CAP_SYS_PTRACE))
15254 goto ok;
15255 rcu_read_unlock();
15256+ vxdprintk(VXD_CBIT(perm, 8),
15257+ "__ptrace_may_access(%p) cred/cap failed", task);
15258 return -EPERM;
15259 ok:
15260 rcu_read_unlock();
15261@@ -169,6 +178,24 @@ ok:
15262 dumpable = get_dumpable(task->mm);
15263 if (!dumpable && !task_ns_capable(task, CAP_SYS_PTRACE))
15264 return -EPERM;
15265+ vxdprintk(VXD_CBIT(perm, 8),
15266+ "__ptrace_may_access(%p) cap/dump ok", task);
15267+
15268+ if (!vx_check(task->xid, VS_ADMIN_P|VS_WATCH_P|VS_IDENT))
15269+ return -EPERM;
15270+ vxdprintk(VXD_CBIT(perm, 8),
15271+ "__ptrace_may_access(%p) check ok", task);
15272+
15273+ printk("%d,%d %d,%d\n",
15274+ vx_check(task->xid, VS_IDENT),
15275+ task_vx_flags(task, VXF_STATE_ADMIN, 0),
15276+ current->xid, task->xid);
15277+
15278+ if (!vx_check(task->xid, VS_IDENT) &&
15279+ !task_vx_flags(task, VXF_STATE_ADMIN, 0))
15280+ return -EACCES;
15281+ vxdprintk(VXD_CBIT(perm, 8),
15282+ "__ptrace_may_access(%p) admin ok", task);
15283
15284 return security_ptrace_access_check(task, mode);
15285 }
15286diff -NurpP --minimal linux-3.0.9/kernel/sched.c linux-3.0.9-vs2.3.2.1/kernel/sched.c
15287--- linux-3.0.9/kernel/sched.c 2011-11-15 16:40:47.000000000 +0100
15288+++ linux-3.0.9-vs2.3.2.1/kernel/sched.c 2011-10-18 13:51:13.000000000 +0200
15289@@ -71,6 +71,8 @@
15290 #include <linux/ctype.h>
15291 #include <linux/ftrace.h>
15292 #include <linux/slab.h>
15293+#include <linux/vs_sched.h>
15294+#include <linux/vs_cvirt.h>
15295
15296 #include <asm/tlb.h>
15297 #include <asm/irq_regs.h>
15298@@ -3461,9 +3463,17 @@ static void calc_global_nohz(unsigned lo
15299 */
15300 void get_avenrun(unsigned long *loads, unsigned long offset, int shift)
15301 {
15302- loads[0] = (avenrun[0] + offset) << shift;
15303- loads[1] = (avenrun[1] + offset) << shift;
15304- loads[2] = (avenrun[2] + offset) << shift;
15305+ if (vx_flags(VXF_VIRT_LOAD, 0)) {
15306+ struct vx_info *vxi = current_vx_info();
15307+
15308+ loads[0] = (vxi->cvirt.load[0] + offset) << shift;
15309+ loads[1] = (vxi->cvirt.load[1] + offset) << shift;
15310+ loads[2] = (vxi->cvirt.load[2] + offset) << shift;
15311+ } else {
15312+ loads[0] = (avenrun[0] + offset) << shift;
15313+ loads[1] = (avenrun[1] + offset) << shift;
15314+ loads[2] = (avenrun[2] + offset) << shift;
15315+ }
15316 }
15317
15318 /*
15319@@ -3722,16 +3732,19 @@ void account_user_time(struct task_struc
15320 cputime_t cputime_scaled)
15321 {
15322 struct cpu_usage_stat *cpustat = &kstat_this_cpu.cpustat;
15323+ struct vx_info *vxi = p->vx_info; /* p is _always_ current */
15324 cputime64_t tmp;
15325+ int nice = (TASK_NICE(p) > 0);
15326
15327 /* Add user time to process. */
15328 p->utime = cputime_add(p->utime, cputime);
15329 p->utimescaled = cputime_add(p->utimescaled, cputime_scaled);
15330+ vx_account_user(vxi, cputime, nice);
15331 account_group_user_time(p, cputime);
15332
15333 /* Add user time to cpustat. */
15334 tmp = cputime_to_cputime64(cputime);
15335- if (TASK_NICE(p) > 0)
15336+ if (nice)
15337 cpustat->nice = cputime64_add(cpustat->nice, tmp);
15338 else
15339 cpustat->user = cputime64_add(cpustat->user, tmp);
15340@@ -3783,10 +3796,12 @@ void __account_system_time(struct task_s
15341 cputime_t cputime_scaled, cputime64_t *target_cputime64)
15342 {
15343 cputime64_t tmp = cputime_to_cputime64(cputime);
15344+ struct vx_info *vxi = p->vx_info; /* p is _always_ current */
15345
15346 /* Add system time to process. */
15347 p->stime = cputime_add(p->stime, cputime);
15348 p->stimescaled = cputime_add(p->stimescaled, cputime_scaled);
15349+ vx_account_system(vxi, cputime, 0 /* do we have idle time? */);
15350 account_group_system_time(p, cputime);
15351
15352 /* Add system time to cpustat. */
15353@@ -4954,7 +4969,7 @@ SYSCALL_DEFINE1(nice, int, increment)
15354 nice = 19;
15355
15356 if (increment < 0 && !can_nice(current, nice))
15357- return -EPERM;
15358+ return vx_flags(VXF_IGNEG_NICE, 0) ? 0 : -EPERM;
15359
15360 retval = security_task_setnice(current, nice);
15361 if (retval)
15362diff -NurpP --minimal linux-3.0.9/kernel/sched_fair.c linux-3.0.9-vs2.3.2.1/kernel/sched_fair.c
15363--- linux-3.0.9/kernel/sched_fair.c 2011-07-22 11:18:12.000000000 +0200
15364+++ linux-3.0.9-vs2.3.2.1/kernel/sched_fair.c 2011-07-22 11:20:39.000000000 +0200
15365@@ -998,6 +998,8 @@ enqueue_entity(struct cfs_rq *cfs_rq, st
15366 __enqueue_entity(cfs_rq, se);
15367 se->on_rq = 1;
15368
15369+ if (entity_is_task(se))
15370+ vx_activate_task(task_of(se));
15371 if (cfs_rq->nr_running == 1)
15372 list_add_leaf_cfs_rq(cfs_rq);
15373 }
15374@@ -1074,6 +1076,8 @@ dequeue_entity(struct cfs_rq *cfs_rq, st
15375 if (se != cfs_rq->curr)
15376 __dequeue_entity(cfs_rq, se);
15377 se->on_rq = 0;
15378+ if (entity_is_task(se))
15379+ vx_deactivate_task(task_of(se));
15380 update_cfs_load(cfs_rq, 0);
15381 account_entity_dequeue(cfs_rq, se);
15382
15383diff -NurpP --minimal linux-3.0.9/kernel/signal.c linux-3.0.9-vs2.3.2.1/kernel/signal.c
15384--- linux-3.0.9/kernel/signal.c 2011-11-15 16:40:47.000000000 +0100
15385+++ linux-3.0.9-vs2.3.2.1/kernel/signal.c 2011-11-15 17:37:07.000000000 +0100
15386@@ -28,6 +28,8 @@
15387 #include <linux/freezer.h>
15388 #include <linux/pid_namespace.h>
15389 #include <linux/nsproxy.h>
15390+#include <linux/vs_context.h>
15391+#include <linux/vs_pid.h>
15392 #define CREATE_TRACE_POINTS
15393 #include <trace/events/signal.h>
15394
15395@@ -744,9 +746,18 @@ static int check_kill_permission(int sig
15396 struct pid *sid;
15397 int error;
15398
15399+ vxdprintk(VXD_CBIT(misc, 7),
15400+ "check_kill_permission(%d,%p,%p[#%u,%u])",
15401+ sig, info, t, vx_task_xid(t), t->pid);
15402+
15403 if (!valid_signal(sig))
15404 return -EINVAL;
15405
15406+/* FIXME: needed? if so, why?
15407+ if ((info != SEND_SIG_NOINFO) &&
15408+ (is_si_special(info) || !si_fromuser(info)))
15409+ goto skip; */
15410+
15411 if (!si_fromuser(info))
15412 return 0;
15413
15414@@ -770,6 +781,20 @@ static int check_kill_permission(int sig
15415 }
15416 }
15417
15418+ error = -EPERM;
15419+ if (t->pid == 1 && current->xid)
15420+ return error;
15421+
15422+ error = -ESRCH;
15423+ /* FIXME: we shouldn't return ESRCH ever, to avoid
15424+ loops, maybe ENOENT or EACCES? */
15425+ if (!vx_check(vx_task_xid(t), VS_WATCH_P | VS_IDENT)) {
15426+ vxdprintk(current->xid || VXD_CBIT(misc, 7),
15427+ "signal %d[%p] xid mismatch %p[#%u,%u] xid=#%u",
15428+ sig, info, t, vx_task_xid(t), t->pid, current->xid);
15429+ return error;
15430+ }
15431+/* skip: */
15432 return security_task_kill(t, info, sig, 0);
15433 }
15434
15435@@ -1246,7 +1271,7 @@ int kill_pid_info(int sig, struct siginf
15436 rcu_read_lock();
15437 retry:
15438 p = pid_task(pid, PIDTYPE_PID);
15439- if (p) {
15440+ if (p && vx_check(vx_task_xid(p), VS_IDENT)) {
15441 error = group_send_sig_info(sig, info, p);
15442 if (unlikely(error == -ESRCH))
15443 /*
15444@@ -1285,7 +1310,7 @@ int kill_pid_info_as_uid(int sig, struct
15445
15446 rcu_read_lock();
15447 p = pid_task(pid, PIDTYPE_PID);
15448- if (!p) {
15449+ if (!p || !vx_check(vx_task_xid(p), VS_IDENT)) {
15450 ret = -ESRCH;
15451 goto out_unlock;
15452 }
15453@@ -1340,8 +1365,10 @@ static int kill_something_info(int sig,
15454 struct task_struct * p;
15455
15456 for_each_process(p) {
15457- if (task_pid_vnr(p) > 1 &&
15458- !same_thread_group(p, current)) {
15459+ if (vx_check(vx_task_xid(p), VS_ADMIN|VS_IDENT) &&
15460+ task_pid_vnr(p) > 1 &&
15461+ !same_thread_group(p, current) &&
15462+ !vx_current_initpid(p->pid)) {
15463 int err = group_send_sig_info(sig, info, p);
15464 ++count;
15465 if (err != -EPERM)
15466@@ -2138,6 +2165,11 @@ relock:
15467 !sig_kernel_only(signr))
15468 continue;
15469
15470+ /* virtual init is protected against user signals */
15471+ if ((info->si_code == SI_USER) &&
15472+ vx_current_initpid(current->pid))
15473+ continue;
15474+
15475 if (sig_kernel_stop(signr)) {
15476 /*
15477 * The default action is to stop all threads in
15478diff -NurpP --minimal linux-3.0.9/kernel/softirq.c linux-3.0.9-vs2.3.2.1/kernel/softirq.c
15479--- linux-3.0.9/kernel/softirq.c 2011-07-22 11:18:12.000000000 +0200
15480+++ linux-3.0.9-vs2.3.2.1/kernel/softirq.c 2011-07-22 11:20:39.000000000 +0200
15481@@ -24,6 +24,7 @@
15482 #include <linux/ftrace.h>
15483 #include <linux/smp.h>
15484 #include <linux/tick.h>
15485+#include <linux/vs_context.h>
15486
15487 #define CREATE_TRACE_POINTS
15488 #include <trace/events/irq.h>
15489diff -NurpP --minimal linux-3.0.9/kernel/sys.c linux-3.0.9-vs2.3.2.1/kernel/sys.c
15490--- linux-3.0.9/kernel/sys.c 2011-11-15 16:40:47.000000000 +0100
15491+++ linux-3.0.9-vs2.3.2.1/kernel/sys.c 2011-11-15 17:37:07.000000000 +0100
15492@@ -45,6 +45,7 @@
15493 #include <linux/syscalls.h>
15494 #include <linux/kprobes.h>
15495 #include <linux/user_namespace.h>
15496+#include <linux/vs_pid.h>
15497
15498 #include <linux/kmsg_dump.h>
15499 /* Move somewhere else to avoid recompiling? */
15500@@ -155,7 +156,10 @@ static int set_one_prio(struct task_stru
15501 goto out;
15502 }
15503 if (niceval < task_nice(p) && !can_nice(p, niceval)) {
15504- error = -EACCES;
15505+ if (vx_flags(VXF_IGNEG_NICE, 0))
15506+ error = 0;
15507+ else
15508+ error = -EACCES;
15509 goto out;
15510 }
15511 no_nice = security_task_setnice(p, niceval);
15512@@ -205,6 +209,8 @@ SYSCALL_DEFINE3(setpriority, int, which,
15513 else
15514 pgrp = task_pgrp(current);
15515 do_each_pid_thread(pgrp, PIDTYPE_PGID, p) {
15516+ if (!vx_check(p->xid, VS_ADMIN_P | VS_IDENT))
15517+ continue;
15518 error = set_one_prio(p, niceval, error);
15519 } while_each_pid_thread(pgrp, PIDTYPE_PGID, p);
15520 break;
15521@@ -268,6 +274,8 @@ SYSCALL_DEFINE2(getpriority, int, which,
15522 else
15523 pgrp = task_pgrp(current);
15524 do_each_pid_thread(pgrp, PIDTYPE_PGID, p) {
15525+ if (!vx_check(p->xid, VS_ADMIN_P | VS_IDENT))
15526+ continue;
15527 niceval = 20 - task_nice(p);
15528 if (niceval > retval)
15529 retval = niceval;
15530@@ -387,6 +395,8 @@ EXPORT_SYMBOL_GPL(kernel_power_off);
15531
15532 static DEFINE_MUTEX(reboot_mutex);
15533
15534+long vs_reboot(unsigned int, void __user *);
15535+
15536 /*
15537 * Reboot system call: for obvious reasons only root may call it,
15538 * and even root needs to set up some magic numbers in the registers
15539@@ -419,6 +429,9 @@ SYSCALL_DEFINE4(reboot, int, magic1, int
15540 if ((cmd == LINUX_REBOOT_CMD_POWER_OFF) && !pm_power_off)
15541 cmd = LINUX_REBOOT_CMD_HALT;
15542
15543+ if (!vx_check(0, VS_ADMIN|VS_WATCH))
15544+ return vs_reboot(cmd, arg);
15545+
15546 mutex_lock(&reboot_mutex);
15547 switch (cmd) {
15548 case LINUX_REBOOT_CMD_RESTART:
15549@@ -1235,7 +1248,8 @@ SYSCALL_DEFINE2(sethostname, char __user
15550 int errno;
15551 char tmp[__NEW_UTS_LEN];
15552
15553- if (!ns_capable(current->nsproxy->uts_ns->user_ns, CAP_SYS_ADMIN))
15554+ if (!vx_ns_capable(current->nsproxy->uts_ns->user_ns,
15555+ CAP_SYS_ADMIN, VXC_SET_UTSNAME))
15556 return -EPERM;
15557
15558 if (len < 0 || len > __NEW_UTS_LEN)
15559@@ -1285,7 +1299,8 @@ SYSCALL_DEFINE2(setdomainname, char __us
15560 int errno;
15561 char tmp[__NEW_UTS_LEN];
15562
15563- if (!ns_capable(current->nsproxy->uts_ns->user_ns, CAP_SYS_ADMIN))
15564+ if (!vx_ns_capable(current->nsproxy->uts_ns->user_ns,
15565+ CAP_SYS_ADMIN, VXC_SET_UTSNAME))
15566 return -EPERM;
15567 if (len < 0 || len > __NEW_UTS_LEN)
15568 return -EINVAL;
15569@@ -1403,7 +1418,7 @@ int do_prlimit(struct task_struct *tsk,
15570 /* Keep the capable check against init_user_ns until
15571 cgroups can contain all limits */
15572 if (new_rlim->rlim_max > rlim->rlim_max &&
15573- !capable(CAP_SYS_RESOURCE))
15574+ !vx_capable(CAP_SYS_RESOURCE, VXC_SET_RLIMIT))
15575 retval = -EPERM;
15576 if (!retval)
15577 retval = security_task_setrlimit(tsk->group_leader,
15578@@ -1457,7 +1472,8 @@ static int check_prlimit_permission(stru
15579 cred->gid == tcred->sgid &&
15580 cred->gid == tcred->gid))
15581 return 0;
15582- if (ns_capable(tcred->user->user_ns, CAP_SYS_RESOURCE))
15583+ if (vx_ns_capable(tcred->user->user_ns,
15584+ CAP_SYS_RESOURCE, VXC_SET_RLIMIT))
15585 return 0;
15586
15587 return -EPERM;
15588diff -NurpP --minimal linux-3.0.9/kernel/sysctl.c linux-3.0.9-vs2.3.2.1/kernel/sysctl.c
15589--- linux-3.0.9/kernel/sysctl.c 2011-07-22 11:18:12.000000000 +0200
15590+++ linux-3.0.9-vs2.3.2.1/kernel/sysctl.c 2011-06-15 02:40:14.000000000 +0200
15591@@ -75,6 +75,7 @@
15592 #if defined(CONFIG_PROVE_LOCKING) || defined(CONFIG_LOCK_STAT)
15593 #include <linux/lockdep.h>
15594 #endif
15595+extern char vshelper_path[];
15596 #ifdef CONFIG_CHR_DEV_SG
15597 #include <scsi/sg.h>
15598 #endif
15599@@ -568,6 +569,13 @@ static struct ctl_table kern_table[] = {
15600 .proc_handler = proc_dostring,
15601 },
15602 #endif
15603+ {
15604+ .procname = "vshelper",
15605+ .data = &vshelper_path,
15606+ .maxlen = 256,
15607+ .mode = 0644,
15608+ .proc_handler = &proc_dostring,
15609+ },
15610 #ifdef CONFIG_CHR_DEV_SG
15611 {
15612 .procname = "sg-big-buff",
15613diff -NurpP --minimal linux-3.0.9/kernel/sysctl_binary.c linux-3.0.9-vs2.3.2.1/kernel/sysctl_binary.c
15614--- linux-3.0.9/kernel/sysctl_binary.c 2011-05-22 16:17:59.000000000 +0200
15615+++ linux-3.0.9-vs2.3.2.1/kernel/sysctl_binary.c 2011-06-10 22:11:24.000000000 +0200
15616@@ -73,6 +73,7 @@ static const struct bin_table bin_kern_t
15617
15618 { CTL_INT, KERN_PANIC, "panic" },
15619 { CTL_INT, KERN_REALROOTDEV, "real-root-dev" },
15620+ { CTL_STR, KERN_VSHELPER, "vshelper" },
15621
15622 { CTL_STR, KERN_SPARC_REBOOT, "reboot-cmd" },
15623 { CTL_INT, KERN_CTLALTDEL, "ctrl-alt-del" },
15624diff -NurpP --minimal linux-3.0.9/kernel/time/timekeeping.c linux-3.0.9-vs2.3.2.1/kernel/time/timekeeping.c
15625--- linux-3.0.9/kernel/time/timekeeping.c 2011-07-22 11:18:12.000000000 +0200
15626+++ linux-3.0.9-vs2.3.2.1/kernel/time/timekeeping.c 2011-06-10 22:11:24.000000000 +0200
15627@@ -233,6 +233,7 @@ void getnstimeofday(struct timespec *ts)
15628 } while (read_seqretry(&xtime_lock, seq));
15629
15630 timespec_add_ns(ts, nsecs);
15631+ vx_adjust_timespec(ts);
15632 }
15633
15634 EXPORT_SYMBOL(getnstimeofday);
15635diff -NurpP --minimal linux-3.0.9/kernel/time.c linux-3.0.9-vs2.3.2.1/kernel/time.c
15636--- linux-3.0.9/kernel/time.c 2011-11-15 16:40:47.000000000 +0100
15637+++ linux-3.0.9-vs2.3.2.1/kernel/time.c 2011-11-15 17:37:07.000000000 +0100
15638@@ -92,7 +92,7 @@ SYSCALL_DEFINE1(stime, time_t __user *,
15639 if (err)
15640 return err;
15641
15642- do_settimeofday(&tv);
15643+ vx_settimeofday(&tv);
15644 return 0;
15645 }
15646
15647@@ -177,7 +177,7 @@ int do_sys_settimeofday(const struct tim
15648 /* SMP safe, again the code in arch/foo/time.c should
15649 * globally block out interrupts when it runs.
15650 */
15651- return do_settimeofday(tv);
15652+ return vx_settimeofday(tv);
15653 }
15654 return 0;
15655 }
15656diff -NurpP --minimal linux-3.0.9/kernel/timer.c linux-3.0.9-vs2.3.2.1/kernel/timer.c
15657--- linux-3.0.9/kernel/timer.c 2011-07-22 11:18:12.000000000 +0200
15658+++ linux-3.0.9-vs2.3.2.1/kernel/timer.c 2011-06-15 02:40:14.000000000 +0200
15659@@ -40,6 +40,10 @@
15660 #include <linux/irq_work.h>
15661 #include <linux/sched.h>
15662 #include <linux/slab.h>
15663+#include <linux/vs_base.h>
15664+#include <linux/vs_cvirt.h>
15665+#include <linux/vs_pid.h>
15666+#include <linux/vserver/sched.h>
15667
15668 #include <asm/uaccess.h>
15669 #include <asm/unistd.h>
15670@@ -1336,12 +1340,6 @@ SYSCALL_DEFINE1(alarm, unsigned int, sec
15671
15672 #endif
15673
15674-#ifndef __alpha__
15675-
15676-/*
15677- * The Alpha uses getxpid, getxuid, and getxgid instead. Maybe this
15678- * should be moved into arch/i386 instead?
15679- */
15680
15681 /**
15682 * sys_getpid - return the thread group id of the current process
15683@@ -1370,10 +1368,23 @@ SYSCALL_DEFINE0(getppid)
15684 rcu_read_lock();
15685 pid = task_tgid_vnr(current->real_parent);
15686 rcu_read_unlock();
15687+ return vx_map_pid(pid);
15688+}
15689
15690- return pid;
15691+#ifdef __alpha__
15692+
15693+/*
15694+ * The Alpha uses getxpid, getxuid, and getxgid instead.
15695+ */
15696+
15697+asmlinkage long do_getxpid(long *ppid)
15698+{
15699+ *ppid = sys_getppid();
15700+ return sys_getpid();
15701 }
15702
15703+#else /* _alpha_ */
15704+
15705 SYSCALL_DEFINE0(getuid)
15706 {
15707 /* Only we change this so SMP safe */
15708diff -NurpP --minimal linux-3.0.9/kernel/user_namespace.c linux-3.0.9-vs2.3.2.1/kernel/user_namespace.c
15709--- linux-3.0.9/kernel/user_namespace.c 2011-03-15 18:07:42.000000000 +0100
15710+++ linux-3.0.9-vs2.3.2.1/kernel/user_namespace.c 2011-06-10 22:11:24.000000000 +0200
15711@@ -11,6 +11,7 @@
15712 #include <linux/user_namespace.h>
15713 #include <linux/highuid.h>
15714 #include <linux/cred.h>
15715+#include <linux/vserver/global.h>
15716
15717 static struct kmem_cache *user_ns_cachep __read_mostly;
15718
15719@@ -33,6 +34,7 @@ int create_user_ns(struct cred *new)
15720 return -ENOMEM;
15721
15722 kref_init(&ns->kref);
15723+ atomic_inc(&vs_global_user_ns);
15724
15725 for (n = 0; n < UIDHASH_SZ; ++n)
15726 INIT_HLIST_HEAD(ns->uidhash_table + n);
15727@@ -81,6 +83,8 @@ void free_user_ns(struct kref *kref)
15728 struct user_namespace *ns =
15729 container_of(kref, struct user_namespace, kref);
15730
15731+ /* FIXME: maybe move into destroyer? */
15732+ atomic_dec(&vs_global_user_ns);
15733 INIT_WORK(&ns->destroyer, free_user_ns_work);
15734 schedule_work(&ns->destroyer);
15735 }
15736diff -NurpP --minimal linux-3.0.9/kernel/utsname.c linux-3.0.9-vs2.3.2.1/kernel/utsname.c
15737--- linux-3.0.9/kernel/utsname.c 2011-07-22 11:18:12.000000000 +0200
15738+++ linux-3.0.9-vs2.3.2.1/kernel/utsname.c 2011-06-13 14:09:44.000000000 +0200
15739@@ -16,14 +16,17 @@
15740 #include <linux/slab.h>
15741 #include <linux/user_namespace.h>
15742 #include <linux/proc_fs.h>
15743+#include <linux/vserver/global.h>
15744
15745 static struct uts_namespace *create_uts_ns(void)
15746 {
15747 struct uts_namespace *uts_ns;
15748
15749 uts_ns = kmalloc(sizeof(struct uts_namespace), GFP_KERNEL);
15750- if (uts_ns)
15751+ if (uts_ns) {
15752 kref_init(&uts_ns->kref);
15753+ atomic_inc(&vs_global_uts_ns);
15754+ }
15755 return uts_ns;
15756 }
15757
15758@@ -32,8 +35,8 @@ static struct uts_namespace *create_uts_
15759 * @old_ns: namespace to clone
15760 * Return NULL on error (failure to kmalloc), new ns otherwise
15761 */
15762-static struct uts_namespace *clone_uts_ns(struct task_struct *tsk,
15763- struct uts_namespace *old_ns)
15764+static struct uts_namespace *clone_uts_ns(struct uts_namespace *old_ns,
15765+ struct user_namespace *old_user)
15766 {
15767 struct uts_namespace *ns;
15768
15769@@ -43,7 +46,7 @@ static struct uts_namespace *clone_uts_n
15770
15771 down_read(&uts_sem);
15772 memcpy(&ns->name, &old_ns->name, sizeof(ns->name));
15773- ns->user_ns = get_user_ns(task_cred_xxx(tsk, user)->user_ns);
15774+ ns->user_ns = get_user_ns(old_user);
15775 up_read(&uts_sem);
15776 return ns;
15777 }
15778@@ -55,9 +58,9 @@ static struct uts_namespace *clone_uts_n
15779 * versa.
15780 */
15781 struct uts_namespace *copy_utsname(unsigned long flags,
15782- struct task_struct *tsk)
15783+ struct uts_namespace *old_ns,
15784+ struct user_namespace *user_ns)
15785 {
15786- struct uts_namespace *old_ns = tsk->nsproxy->uts_ns;
15787 struct uts_namespace *new_ns;
15788
15789 BUG_ON(!old_ns);
15790@@ -66,7 +69,7 @@ struct uts_namespace *copy_utsname(unsig
15791 if (!(flags & CLONE_NEWUTS))
15792 return old_ns;
15793
15794- new_ns = clone_uts_ns(tsk, old_ns);
15795+ new_ns = clone_uts_ns(old_ns, user_ns);
15796
15797 put_uts_ns(old_ns);
15798 return new_ns;
15799@@ -78,6 +81,7 @@ void free_uts_ns(struct kref *kref)
15800
15801 ns = container_of(kref, struct uts_namespace, kref);
15802 put_user_ns(ns->user_ns);
15803+ atomic_dec(&vs_global_uts_ns);
15804 kfree(ns);
15805 }
15806
15807diff -NurpP --minimal linux-3.0.9/kernel/vserver/Kconfig linux-3.0.9-vs2.3.2.1/kernel/vserver/Kconfig
15808--- linux-3.0.9/kernel/vserver/Kconfig 1970-01-01 01:00:00.000000000 +0100
15809+++ linux-3.0.9-vs2.3.2.1/kernel/vserver/Kconfig 2011-08-08 18:06:22.000000000 +0200
15810@@ -0,0 +1,232 @@
15811+#
15812+# Linux VServer configuration
15813+#
15814+
15815+menu "Linux VServer"
15816+
15817+config VSERVER_AUTO_LBACK
15818+ bool "Automatically Assign Loopback IP"
15819+ default y
15820+ help
15821+ Automatically assign a guest specific loopback
15822+ IP and add it to the kernel network stack on
15823+ startup.
15824+
15825+config VSERVER_AUTO_SINGLE
15826+ bool "Automatic Single IP Special Casing"
15827+ depends on EXPERIMENTAL
15828+ default y
15829+ help
15830+ This allows network contexts with a single IP to
15831+ automatically remap 0.0.0.0 bindings to that IP,
15832+ avoiding further network checks and improving
15833+ performance.
15834+
15835+ (note: such guests do not allow to change the ip
15836+ on the fly and do not show loopback addresses)
15837+
15838+config VSERVER_COWBL
15839+ bool "Enable COW Immutable Link Breaking"
15840+ default y
15841+ help
15842+ This enables the COW (Copy-On-Write) link break code.
15843+ It allows you to treat unified files like normal files
15844+ when writing to them (which will implicitely break the
15845+ link and create a copy of the unified file)
15846+
15847+config VSERVER_VTIME
15848+ bool "Enable Virtualized Guest Time"
15849+ depends on EXPERIMENTAL
15850+ default n
15851+ help
15852+ This enables per guest time offsets to allow for
15853+ adjusting the system clock individually per guest.
15854+ this adds some overhead to the time functions and
15855+ therefore should not be enabled without good reason.
15856+
15857+config VSERVER_DEVICE
15858+ bool "Enable Guest Device Mapping"
15859+ depends on EXPERIMENTAL
15860+ default n
15861+ help
15862+ This enables generic device remapping.
15863+
15864+config VSERVER_PROC_SECURE
15865+ bool "Enable Proc Security"
15866+ depends on PROC_FS
15867+ default y
15868+ help
15869+ This configures ProcFS security to initially hide
15870+ non-process entries for all contexts except the main and
15871+ spectator context (i.e. for all guests), which is a secure
15872+ default.
15873+
15874+ (note: on 1.2x the entries were visible by default)
15875+
15876+choice
15877+ prompt "Persistent Inode Tagging"
15878+ default TAGGING_ID24
15879+ help
15880+ This adds persistent context information to filesystems
15881+ mounted with the tagxid option. Tagging is a requirement
15882+ for per-context disk limits and per-context quota.
15883+
15884+
15885+config TAGGING_NONE
15886+ bool "Disabled"
15887+ help
15888+ do not store per-context information in inodes.
15889+
15890+config TAGGING_UID16
15891+ bool "UID16/GID32"
15892+ help
15893+ reduces UID to 16 bit, but leaves GID at 32 bit.
15894+
15895+config TAGGING_GID16
15896+ bool "UID32/GID16"
15897+ help
15898+ reduces GID to 16 bit, but leaves UID at 32 bit.
15899+
15900+config TAGGING_ID24
15901+ bool "UID24/GID24"
15902+ help
15903+ uses the upper 8bit from UID and GID for XID tagging
15904+ which leaves 24bit for UID/GID each, which should be
15905+ more than sufficient for normal use.
15906+
15907+config TAGGING_INTERN
15908+ bool "UID32/GID32"
15909+ help
15910+ this uses otherwise reserved inode fields in the on
15911+ disk representation, which limits the use to a few
15912+ filesystems (currently ext2 and ext3)
15913+
15914+endchoice
15915+
15916+config TAG_NFSD
15917+ bool "Tag NFSD User Auth and Files"
15918+ default n
15919+ help
15920+ Enable this if you do want the in-kernel NFS
15921+ Server to use the tagging specified above.
15922+ (will require patched clients too)
15923+
15924+config VSERVER_PRIVACY
15925+ bool "Honor Privacy Aspects of Guests"
15926+ default n
15927+ help
15928+ When enabled, most context checks will disallow
15929+ access to structures assigned to a specific context,
15930+ like ptys or loop devices.
15931+
15932+config VSERVER_CONTEXTS
15933+ int "Maximum number of Contexts (1-65533)" if EMBEDDED
15934+ range 1 65533
15935+ default "768" if 64BIT
15936+ default "256"
15937+ help
15938+ This setting will optimize certain data structures
15939+ and memory allocations according to the expected
15940+ maximum.
15941+
15942+ note: this is not a strict upper limit.
15943+
15944+config VSERVER_WARN
15945+ bool "VServer Warnings"
15946+ default y
15947+ help
15948+ This enables various runtime warnings, which will
15949+ notify about potential manipulation attempts or
15950+ resource shortage. It is generally considered to
15951+ be a good idea to have that enabled.
15952+
15953+config VSERVER_WARN_DEVPTS
15954+ bool "VServer DevPTS Warnings"
15955+ depends on VSERVER_WARN
15956+ default y
15957+ help
15958+ This enables DevPTS related warnings, issued when a
15959+ process inside a context tries to lookup or access
15960+ a dynamic pts from the host or a different context.
15961+
15962+config VSERVER_DEBUG
15963+ bool "VServer Debugging Code"
15964+ default n
15965+ help
15966+ Set this to yes if you want to be able to activate
15967+ debugging output at runtime. It adds a very small
15968+ overhead to all vserver related functions and
15969+ increases the kernel size by about 20k.
15970+
15971+config VSERVER_HISTORY
15972+ bool "VServer History Tracing"
15973+ depends on VSERVER_DEBUG
15974+ default n
15975+ help
15976+ Set this to yes if you want to record the history of
15977+ linux-vserver activities, so they can be replayed in
15978+ the event of a kernel panic or oops.
15979+
15980+config VSERVER_HISTORY_SIZE
15981+ int "Per-CPU History Size (32-65536)"
15982+ depends on VSERVER_HISTORY
15983+ range 32 65536
15984+ default 64
15985+ help
15986+ This allows you to specify the number of entries in
15987+ the per-CPU history buffer.
15988+
15989+config VSERVER_LEGACY_MEM
15990+ bool "Legacy Memory Limits"
15991+ default n
15992+ help
15993+ This provides fake memory limits to keep
15994+ older tools happy in the face of memory
15995+ cgroups
15996+
15997+choice
15998+ prompt "Quotes used in debug and warn messages"
15999+ default QUOTES_ISO8859
16000+
16001+config QUOTES_ISO8859
16002+ bool "Extended ASCII (ISO 8859) angle quotes"
16003+ help
16004+ This uses the extended ASCII characters \xbb
16005+ and \xab for quoting file and process names.
16006+
16007+config QUOTES_UTF8
16008+ bool "UTF-8 angle quotes"
16009+ help
16010+ This uses the the UTF-8 sequences for angle
16011+ quotes to quote file and process names.
16012+
16013+config QUOTES_ASCII
16014+ bool "ASCII single quotes"
16015+ help
16016+ This uses the ASCII single quote character
16017+ (\x27) to quote file and process names.
16018+
16019+endchoice
16020+
16021+endmenu
16022+
16023+
16024+config VSERVER
16025+ bool
16026+ default y
16027+ select NAMESPACES
16028+ select UTS_NS
16029+ select IPC_NS
16030+# select USER_NS
16031+ select SYSVIPC
16032+
16033+config VSERVER_SECURITY
16034+ bool
16035+ depends on SECURITY
16036+ default y
16037+ select SECURITY_CAPABILITIES
16038+
16039+config VSERVER_DISABLED
16040+ bool
16041+ default n
16042+
16043diff -NurpP --minimal linux-3.0.9/kernel/vserver/Makefile linux-3.0.9-vs2.3.2.1/kernel/vserver/Makefile
16044--- linux-3.0.9/kernel/vserver/Makefile 1970-01-01 01:00:00.000000000 +0100
16045+++ linux-3.0.9-vs2.3.2.1/kernel/vserver/Makefile 2011-06-10 22:11:24.000000000 +0200
16046@@ -0,0 +1,18 @@
16047+#
16048+# Makefile for the Linux vserver routines.
16049+#
16050+
16051+
16052+obj-y += vserver.o
16053+
16054+vserver-y := switch.o context.o space.o sched.o network.o inode.o \
16055+ limit.o cvirt.o cacct.o signal.o helper.o init.o \
16056+ dlimit.o tag.o
16057+
16058+vserver-$(CONFIG_INET) += inet.o
16059+vserver-$(CONFIG_PROC_FS) += proc.o
16060+vserver-$(CONFIG_VSERVER_DEBUG) += sysctl.o debug.o
16061+vserver-$(CONFIG_VSERVER_HISTORY) += history.o
16062+vserver-$(CONFIG_VSERVER_MONITOR) += monitor.o
16063+vserver-$(CONFIG_VSERVER_DEVICE) += device.o
16064+
16065diff -NurpP --minimal linux-3.0.9/kernel/vserver/cacct.c linux-3.0.9-vs2.3.2.1/kernel/vserver/cacct.c
16066--- linux-3.0.9/kernel/vserver/cacct.c 1970-01-01 01:00:00.000000000 +0100
16067+++ linux-3.0.9-vs2.3.2.1/kernel/vserver/cacct.c 2011-06-10 22:11:24.000000000 +0200
16068@@ -0,0 +1,42 @@
16069+/*
16070+ * linux/kernel/vserver/cacct.c
16071+ *
16072+ * Virtual Server: Context Accounting
16073+ *
16074+ * Copyright (C) 2006-2007 Herbert Pötzl
16075+ *
16076+ * V0.01 added accounting stats
16077+ *
16078+ */
16079+
16080+#include <linux/types.h>
16081+#include <linux/vs_context.h>
16082+#include <linux/vserver/cacct_cmd.h>
16083+#include <linux/vserver/cacct_int.h>
16084+
16085+#include <asm/errno.h>
16086+#include <asm/uaccess.h>
16087+
16088+
16089+int vc_sock_stat(struct vx_info *vxi, void __user *data)
16090+{
16091+ struct vcmd_sock_stat_v0 vc_data;
16092+ int j, field;
16093+
16094+ if (copy_from_user(&vc_data, data, sizeof(vc_data)))
16095+ return -EFAULT;
16096+
16097+ field = vc_data.field;
16098+ if ((field < 0) || (field >= VXA_SOCK_SIZE))
16099+ return -EINVAL;
16100+
16101+ for (j = 0; j < 3; j++) {
16102+ vc_data.count[j] = vx_sock_count(&vxi->cacct, field, j);
16103+ vc_data.total[j] = vx_sock_total(&vxi->cacct, field, j);
16104+ }
16105+
16106+ if (copy_to_user(data, &vc_data, sizeof(vc_data)))
16107+ return -EFAULT;
16108+ return 0;
16109+}
16110+
16111diff -NurpP --minimal linux-3.0.9/kernel/vserver/cacct_init.h linux-3.0.9-vs2.3.2.1/kernel/vserver/cacct_init.h
16112--- linux-3.0.9/kernel/vserver/cacct_init.h 1970-01-01 01:00:00.000000000 +0100
16113+++ linux-3.0.9-vs2.3.2.1/kernel/vserver/cacct_init.h 2011-06-10 22:11:24.000000000 +0200
16114@@ -0,0 +1,25 @@
16115+
16116+
16117+static inline void vx_info_init_cacct(struct _vx_cacct *cacct)
16118+{
16119+ int i, j;
16120+
16121+
16122+ for (i = 0; i < VXA_SOCK_SIZE; i++) {
16123+ for (j = 0; j < 3; j++) {
16124+ atomic_long_set(&cacct->sock[i][j].count, 0);
16125+ atomic_long_set(&cacct->sock[i][j].total, 0);
16126+ }
16127+ }
16128+ for (i = 0; i < 8; i++)
16129+ atomic_set(&cacct->slab[i], 0);
16130+ for (i = 0; i < 5; i++)
16131+ for (j = 0; j < 4; j++)
16132+ atomic_set(&cacct->page[i][j], 0);
16133+}
16134+
16135+static inline void vx_info_exit_cacct(struct _vx_cacct *cacct)
16136+{
16137+ return;
16138+}
16139+
16140diff -NurpP --minimal linux-3.0.9/kernel/vserver/cacct_proc.h linux-3.0.9-vs2.3.2.1/kernel/vserver/cacct_proc.h
16141--- linux-3.0.9/kernel/vserver/cacct_proc.h 1970-01-01 01:00:00.000000000 +0100
16142+++ linux-3.0.9-vs2.3.2.1/kernel/vserver/cacct_proc.h 2011-06-10 22:11:24.000000000 +0200
16143@@ -0,0 +1,53 @@
16144+#ifndef _VX_CACCT_PROC_H
16145+#define _VX_CACCT_PROC_H
16146+
16147+#include <linux/vserver/cacct_int.h>
16148+
16149+
16150+#define VX_SOCKA_TOP \
16151+ "Type\t recv #/bytes\t\t send #/bytes\t\t fail #/bytes\n"
16152+
16153+static inline int vx_info_proc_cacct(struct _vx_cacct *cacct, char *buffer)
16154+{
16155+ int i, j, length = 0;
16156+ static char *type[VXA_SOCK_SIZE] = {
16157+ "UNSPEC", "UNIX", "INET", "INET6", "PACKET", "OTHER"
16158+ };
16159+
16160+ length += sprintf(buffer + length, VX_SOCKA_TOP);
16161+ for (i = 0; i < VXA_SOCK_SIZE; i++) {
16162+ length += sprintf(buffer + length, "%s:", type[i]);
16163+ for (j = 0; j < 3; j++) {
16164+ length += sprintf(buffer + length,
16165+ "\t%10lu/%-10lu",
16166+ vx_sock_count(cacct, i, j),
16167+ vx_sock_total(cacct, i, j));
16168+ }
16169+ buffer[length++] = '\n';
16170+ }
16171+
16172+ length += sprintf(buffer + length, "\n");
16173+ length += sprintf(buffer + length,
16174+ "slab:\t %8u %8u %8u %8u\n",
16175+ atomic_read(&cacct->slab[1]),
16176+ atomic_read(&cacct->slab[4]),
16177+ atomic_read(&cacct->slab[0]),
16178+ atomic_read(&cacct->slab[2]));
16179+
16180+ length += sprintf(buffer + length, "\n");
16181+ for (i = 0; i < 5; i++) {
16182+ length += sprintf(buffer + length,
16183+ "page[%d]: %8u %8u %8u %8u\t %8u %8u %8u %8u\n", i,
16184+ atomic_read(&cacct->page[i][0]),
16185+ atomic_read(&cacct->page[i][1]),
16186+ atomic_read(&cacct->page[i][2]),
16187+ atomic_read(&cacct->page[i][3]),
16188+ atomic_read(&cacct->page[i][4]),
16189+ atomic_read(&cacct->page[i][5]),
16190+ atomic_read(&cacct->page[i][6]),
16191+ atomic_read(&cacct->page[i][7]));
16192+ }
16193+ return length;
16194+}
16195+
16196+#endif /* _VX_CACCT_PROC_H */
16197diff -NurpP --minimal linux-3.0.9/kernel/vserver/context.c linux-3.0.9-vs2.3.2.1/kernel/vserver/context.c
16198--- linux-3.0.9/kernel/vserver/context.c 1970-01-01 01:00:00.000000000 +0100
16199+++ linux-3.0.9-vs2.3.2.1/kernel/vserver/context.c 2011-08-01 18:28:12.000000000 +0200
16200@@ -0,0 +1,1107 @@
16201+/*
16202+ * linux/kernel/vserver/context.c
16203+ *
16204+ * Virtual Server: Context Support
16205+ *
16206+ * Copyright (C) 2003-2011 Herbert Pötzl
16207+ *
16208+ * V0.01 context helper
16209+ * V0.02 vx_ctx_kill syscall command
16210+ * V0.03 replaced context_info calls
16211+ * V0.04 redesign of struct (de)alloc
16212+ * V0.05 rlimit basic implementation
16213+ * V0.06 task_xid and info commands
16214+ * V0.07 context flags and caps
16215+ * V0.08 switch to RCU based hash
16216+ * V0.09 revert to non RCU for now
16217+ * V0.10 and back to working RCU hash
16218+ * V0.11 and back to locking again
16219+ * V0.12 referenced context store
16220+ * V0.13 separate per cpu data
16221+ * V0.14 changed vcmds to vxi arg
16222+ * V0.15 added context stat
16223+ * V0.16 have __create claim() the vxi
16224+ * V0.17 removed older and legacy stuff
16225+ * V0.18 added user credentials
16226+ * V0.19 added warn mask
16227+ *
16228+ */
16229+
16230+#include <linux/slab.h>
16231+#include <linux/types.h>
16232+#include <linux/security.h>
16233+#include <linux/pid_namespace.h>
16234+#include <linux/capability.h>
16235+
16236+#include <linux/vserver/context.h>
16237+#include <linux/vserver/network.h>
16238+#include <linux/vserver/debug.h>
16239+#include <linux/vserver/limit.h>
16240+#include <linux/vserver/limit_int.h>
16241+#include <linux/vserver/space.h>
16242+#include <linux/init_task.h>
16243+#include <linux/fs_struct.h>
16244+#include <linux/cred.h>
16245+
16246+#include <linux/vs_context.h>
16247+#include <linux/vs_limit.h>
16248+#include <linux/vs_pid.h>
16249+#include <linux/vserver/context_cmd.h>
16250+
16251+#include "cvirt_init.h"
16252+#include "cacct_init.h"
16253+#include "limit_init.h"
16254+#include "sched_init.h"
16255+
16256+
16257+atomic_t vx_global_ctotal = ATOMIC_INIT(0);
16258+atomic_t vx_global_cactive = ATOMIC_INIT(0);
16259+
16260+
16261+/* now inactive context structures */
16262+
16263+static struct hlist_head vx_info_inactive = HLIST_HEAD_INIT;
16264+
16265+static DEFINE_SPINLOCK(vx_info_inactive_lock);
16266+
16267+
16268+/* __alloc_vx_info()
16269+
16270+ * allocate an initialized vx_info struct
16271+ * doesn't make it visible (hash) */
16272+
16273+static struct vx_info *__alloc_vx_info(xid_t xid)
16274+{
16275+ struct vx_info *new = NULL;
16276+ int cpu, index;
16277+
16278+ vxdprintk(VXD_CBIT(xid, 0), "alloc_vx_info(%d)*", xid);
16279+
16280+ /* would this benefit from a slab cache? */
16281+ new = kmalloc(sizeof(struct vx_info), GFP_KERNEL);
16282+ if (!new)
16283+ return 0;
16284+
16285+ memset(new, 0, sizeof(struct vx_info));
16286+#ifdef CONFIG_SMP
16287+ new->ptr_pc = alloc_percpu(struct _vx_info_pc);
16288+ if (!new->ptr_pc)
16289+ goto error;
16290+#endif
16291+ new->vx_id = xid;
16292+ INIT_HLIST_NODE(&new->vx_hlist);
16293+ atomic_set(&new->vx_usecnt, 0);
16294+ atomic_set(&new->vx_tasks, 0);
16295+ new->vx_parent = NULL;
16296+ new->vx_state = 0;
16297+ init_waitqueue_head(&new->vx_wait);
16298+
16299+ /* prepare reaper */
16300+ get_task_struct(init_pid_ns.child_reaper);
16301+ new->vx_reaper = init_pid_ns.child_reaper;
16302+ new->vx_badness_bias = 0;
16303+
16304+ /* rest of init goes here */
16305+ vx_info_init_limit(&new->limit);
16306+ vx_info_init_sched(&new->sched);
16307+ vx_info_init_cvirt(&new->cvirt);
16308+ vx_info_init_cacct(&new->cacct);
16309+
16310+ /* per cpu data structures */
16311+ for_each_possible_cpu(cpu) {
16312+ vx_info_init_sched_pc(
16313+ &vx_per_cpu(new, sched_pc, cpu), cpu);
16314+ vx_info_init_cvirt_pc(
16315+ &vx_per_cpu(new, cvirt_pc, cpu), cpu);
16316+ }
16317+
16318+ new->vx_flags = VXF_INIT_SET;
16319+ new->vx_bcaps = CAP_FULL_SET; // maybe ~CAP_SETPCAP
16320+ new->vx_ccaps = 0;
16321+ new->vx_umask = 0;
16322+ new->vx_wmask = 0;
16323+
16324+ new->reboot_cmd = 0;
16325+ new->exit_code = 0;
16326+
16327+ // preconfig spaces
16328+ for (index = 0; index < VX_SPACES; index++) {
16329+ struct _vx_space *space = &new->space[index];
16330+
16331+ // filesystem
16332+ spin_lock(&init_fs.lock);
16333+ init_fs.users++;
16334+ spin_unlock(&init_fs.lock);
16335+ space->vx_fs = &init_fs;
16336+
16337+ /* FIXME: do we want defaults? */
16338+ // space->vx_real_cred = 0;
16339+ // space->vx_cred = 0;
16340+ }
16341+
16342+
16343+ vxdprintk(VXD_CBIT(xid, 0),
16344+ "alloc_vx_info(%d) = %p", xid, new);
16345+ vxh_alloc_vx_info(new);
16346+ atomic_inc(&vx_global_ctotal);
16347+ return new;
16348+#ifdef CONFIG_SMP
16349+error:
16350+ kfree(new);
16351+ return 0;
16352+#endif
16353+}
16354+
16355+/* __dealloc_vx_info()
16356+
16357+ * final disposal of vx_info */
16358+
16359+static void __dealloc_vx_info(struct vx_info *vxi)
16360+{
16361+#ifdef CONFIG_VSERVER_WARN
16362+ struct vx_info_save vxis;
16363+ int cpu;
16364+#endif
16365+ vxdprintk(VXD_CBIT(xid, 0),
16366+ "dealloc_vx_info(%p)", vxi);
16367+ vxh_dealloc_vx_info(vxi);
16368+
16369+#ifdef CONFIG_VSERVER_WARN
16370+ enter_vx_info(vxi, &vxis);
16371+ vx_info_exit_limit(&vxi->limit);
16372+ vx_info_exit_sched(&vxi->sched);
16373+ vx_info_exit_cvirt(&vxi->cvirt);
16374+ vx_info_exit_cacct(&vxi->cacct);
16375+
16376+ for_each_possible_cpu(cpu) {
16377+ vx_info_exit_sched_pc(
16378+ &vx_per_cpu(vxi, sched_pc, cpu), cpu);
16379+ vx_info_exit_cvirt_pc(
16380+ &vx_per_cpu(vxi, cvirt_pc, cpu), cpu);
16381+ }
16382+ leave_vx_info(&vxis);
16383+#endif
16384+
16385+ vxi->vx_id = -1;
16386+ vxi->vx_state |= VXS_RELEASED;
16387+
16388+#ifdef CONFIG_SMP
16389+ free_percpu(vxi->ptr_pc);
16390+#endif
16391+ kfree(vxi);
16392+ atomic_dec(&vx_global_ctotal);
16393+}
16394+
16395+static void __shutdown_vx_info(struct vx_info *vxi)
16396+{
16397+ struct nsproxy *nsproxy;
16398+ struct fs_struct *fs;
16399+ struct cred *cred;
16400+ int index, kill;
16401+
16402+ might_sleep();
16403+
16404+ vxi->vx_state |= VXS_SHUTDOWN;
16405+ vs_state_change(vxi, VSC_SHUTDOWN);
16406+
16407+ for (index = 0; index < VX_SPACES; index++) {
16408+ struct _vx_space *space = &vxi->space[index];
16409+
16410+ nsproxy = xchg(&space->vx_nsproxy, NULL);
16411+ if (nsproxy)
16412+ put_nsproxy(nsproxy);
16413+
16414+ fs = xchg(&space->vx_fs, NULL);
16415+ spin_lock(&fs->lock);
16416+ kill = !--fs->users;
16417+ spin_unlock(&fs->lock);
16418+ if (kill)
16419+ free_fs_struct(fs);
16420+
16421+ cred = (struct cred *)xchg(&space->vx_cred, NULL);
16422+ if (cred)
16423+ abort_creds(cred);
16424+ }
16425+}
16426+
16427+/* exported stuff */
16428+
16429+void free_vx_info(struct vx_info *vxi)
16430+{
16431+ unsigned long flags;
16432+ unsigned index;
16433+
16434+ /* check for reference counts first */
16435+ BUG_ON(atomic_read(&vxi->vx_usecnt));
16436+ BUG_ON(atomic_read(&vxi->vx_tasks));
16437+
16438+ /* context must not be hashed */
16439+ BUG_ON(vx_info_state(vxi, VXS_HASHED));
16440+
16441+ /* context shutdown is mandatory */
16442+ BUG_ON(!vx_info_state(vxi, VXS_SHUTDOWN));
16443+
16444+ /* spaces check */
16445+ for (index = 0; index < VX_SPACES; index++) {
16446+ struct _vx_space *space = &vxi->space[index];
16447+
16448+ BUG_ON(space->vx_nsproxy);
16449+ BUG_ON(space->vx_fs);
16450+ // BUG_ON(space->vx_real_cred);
16451+ // BUG_ON(space->vx_cred);
16452+ }
16453+
16454+ spin_lock_irqsave(&vx_info_inactive_lock, flags);
16455+ hlist_del(&vxi->vx_hlist);
16456+ spin_unlock_irqrestore(&vx_info_inactive_lock, flags);
16457+
16458+ __dealloc_vx_info(vxi);
16459+}
16460+
16461+
16462+/* hash table for vx_info hash */
16463+
16464+#define VX_HASH_SIZE 13
16465+
16466+static struct hlist_head vx_info_hash[VX_HASH_SIZE] =
16467+ { [0 ... VX_HASH_SIZE-1] = HLIST_HEAD_INIT };
16468+
16469+static DEFINE_SPINLOCK(vx_info_hash_lock);
16470+
16471+
16472+static inline unsigned int __hashval(xid_t xid)
16473+{
16474+ return (xid % VX_HASH_SIZE);
16475+}
16476+
16477+
16478+
16479+/* __hash_vx_info()
16480+
16481+ * add the vxi to the global hash table
16482+ * requires the hash_lock to be held */
16483+
16484+static inline void __hash_vx_info(struct vx_info *vxi)
16485+{
16486+ struct hlist_head *head;
16487+
16488+ vxd_assert_lock(&vx_info_hash_lock);
16489+ vxdprintk(VXD_CBIT(xid, 4),
16490+ "__hash_vx_info: %p[#%d]", vxi, vxi->vx_id);
16491+ vxh_hash_vx_info(vxi);
16492+
16493+ /* context must not be hashed */
16494+ BUG_ON(vx_info_state(vxi, VXS_HASHED));
16495+
16496+ vxi->vx_state |= VXS_HASHED;
16497+ head = &vx_info_hash[__hashval(vxi->vx_id)];
16498+ hlist_add_head(&vxi->vx_hlist, head);
16499+ atomic_inc(&vx_global_cactive);
16500+}
16501+
16502+/* __unhash_vx_info()
16503+
16504+ * remove the vxi from the global hash table
16505+ * requires the hash_lock to be held */
16506+
16507+static inline void __unhash_vx_info(struct vx_info *vxi)
16508+{
16509+ unsigned long flags;
16510+
16511+ vxd_assert_lock(&vx_info_hash_lock);
16512+ vxdprintk(VXD_CBIT(xid, 4),
16513+ "__unhash_vx_info: %p[#%d.%d.%d]", vxi, vxi->vx_id,
16514+ atomic_read(&vxi->vx_usecnt), atomic_read(&vxi->vx_tasks));
16515+ vxh_unhash_vx_info(vxi);
16516+
16517+ /* context must be hashed */
16518+ BUG_ON(!vx_info_state(vxi, VXS_HASHED));
16519+ /* but without tasks */
16520+ BUG_ON(atomic_read(&vxi->vx_tasks));
16521+
16522+ vxi->vx_state &= ~VXS_HASHED;
16523+ hlist_del_init(&vxi->vx_hlist);
16524+ spin_lock_irqsave(&vx_info_inactive_lock, flags);
16525+ hlist_add_head(&vxi->vx_hlist, &vx_info_inactive);
16526+ spin_unlock_irqrestore(&vx_info_inactive_lock, flags);
16527+ atomic_dec(&vx_global_cactive);
16528+}
16529+
16530+
16531+/* __lookup_vx_info()
16532+
16533+ * requires the hash_lock to be held
16534+ * doesn't increment the vx_refcnt */
16535+
16536+static inline struct vx_info *__lookup_vx_info(xid_t xid)
16537+{
16538+ struct hlist_head *head = &vx_info_hash[__hashval(xid)];
16539+ struct hlist_node *pos;
16540+ struct vx_info *vxi;
16541+
16542+ vxd_assert_lock(&vx_info_hash_lock);
16543+ hlist_for_each(pos, head) {
16544+ vxi = hlist_entry(pos, struct vx_info, vx_hlist);
16545+
16546+ if (vxi->vx_id == xid)
16547+ goto found;
16548+ }
16549+ vxi = NULL;
16550+found:
16551+ vxdprintk(VXD_CBIT(xid, 0),
16552+ "__lookup_vx_info(#%u): %p[#%u]",
16553+ xid, vxi, vxi ? vxi->vx_id : 0);
16554+ vxh_lookup_vx_info(vxi, xid);
16555+ return vxi;
16556+}
16557+
16558+
16559+/* __create_vx_info()
16560+
16561+ * create the requested context
16562+ * get(), claim() and hash it */
16563+
16564+static struct vx_info *__create_vx_info(int id)
16565+{
16566+ struct vx_info *new, *vxi = NULL;
16567+
16568+ vxdprintk(VXD_CBIT(xid, 1), "create_vx_info(%d)*", id);
16569+
16570+ if (!(new = __alloc_vx_info(id)))
16571+ return ERR_PTR(-ENOMEM);
16572+
16573+ /* required to make dynamic xids unique */
16574+ spin_lock(&vx_info_hash_lock);
16575+
16576+ /* static context requested */
16577+ if ((vxi = __lookup_vx_info(id))) {
16578+ vxdprintk(VXD_CBIT(xid, 0),
16579+ "create_vx_info(%d) = %p (already there)", id, vxi);
16580+ if (vx_info_flags(vxi, VXF_STATE_SETUP, 0))
16581+ vxi = ERR_PTR(-EBUSY);
16582+ else
16583+ vxi = ERR_PTR(-EEXIST);
16584+ goto out_unlock;
16585+ }
16586+ /* new context */
16587+ vxdprintk(VXD_CBIT(xid, 0),
16588+ "create_vx_info(%d) = %p (new)", id, new);
16589+ claim_vx_info(new, NULL);
16590+ __hash_vx_info(get_vx_info(new));
16591+ vxi = new, new = NULL;
16592+
16593+out_unlock:
16594+ spin_unlock(&vx_info_hash_lock);
16595+ vxh_create_vx_info(IS_ERR(vxi) ? NULL : vxi, id);
16596+ if (new)
16597+ __dealloc_vx_info(new);
16598+ return vxi;
16599+}
16600+
16601+
16602+/* exported stuff */
16603+
16604+
16605+void unhash_vx_info(struct vx_info *vxi)
16606+{
16607+ spin_lock(&vx_info_hash_lock);
16608+ __unhash_vx_info(vxi);
16609+ spin_unlock(&vx_info_hash_lock);
16610+ __shutdown_vx_info(vxi);
16611+ __wakeup_vx_info(vxi);
16612+}
16613+
16614+
16615+/* lookup_vx_info()
16616+
16617+ * search for a vx_info and get() it
16618+ * negative id means current */
16619+
16620+struct vx_info *lookup_vx_info(int id)
16621+{
16622+ struct vx_info *vxi = NULL;
16623+
16624+ if (id < 0) {
16625+ vxi = get_vx_info(current_vx_info());
16626+ } else if (id > 1) {
16627+ spin_lock(&vx_info_hash_lock);
16628+ vxi = get_vx_info(__lookup_vx_info(id));
16629+ spin_unlock(&vx_info_hash_lock);
16630+ }
16631+ return vxi;
16632+}
16633+
16634+/* xid_is_hashed()
16635+
16636+ * verify that xid is still hashed */
16637+
16638+int xid_is_hashed(xid_t xid)
16639+{
16640+ int hashed;
16641+
16642+ spin_lock(&vx_info_hash_lock);
16643+ hashed = (__lookup_vx_info(xid) != NULL);
16644+ spin_unlock(&vx_info_hash_lock);
16645+ return hashed;
16646+}
16647+
16648+#ifdef CONFIG_PROC_FS
16649+
16650+/* get_xid_list()
16651+
16652+ * get a subset of hashed xids for proc
16653+ * assumes size is at least one */
16654+
16655+int get_xid_list(int index, unsigned int *xids, int size)
16656+{
16657+ int hindex, nr_xids = 0;
16658+
16659+ /* only show current and children */
16660+ if (!vx_check(0, VS_ADMIN | VS_WATCH)) {
16661+ if (index > 0)
16662+ return 0;
16663+ xids[nr_xids] = vx_current_xid();
16664+ return 1;
16665+ }
16666+
16667+ for (hindex = 0; hindex < VX_HASH_SIZE; hindex++) {
16668+ struct hlist_head *head = &vx_info_hash[hindex];
16669+ struct hlist_node *pos;
16670+
16671+ spin_lock(&vx_info_hash_lock);
16672+ hlist_for_each(pos, head) {
16673+ struct vx_info *vxi;
16674+
16675+ if (--index > 0)
16676+ continue;
16677+
16678+ vxi = hlist_entry(pos, struct vx_info, vx_hlist);
16679+ xids[nr_xids] = vxi->vx_id;
16680+ if (++nr_xids >= size) {
16681+ spin_unlock(&vx_info_hash_lock);
16682+ goto out;
16683+ }
16684+ }
16685+ /* keep the lock time short */
16686+ spin_unlock(&vx_info_hash_lock);
16687+ }
16688+out:
16689+ return nr_xids;
16690+}
16691+#endif
16692+
16693+#ifdef CONFIG_VSERVER_DEBUG
16694+
16695+void dump_vx_info_inactive(int level)
16696+{
16697+ struct hlist_node *entry, *next;
16698+
16699+ hlist_for_each_safe(entry, next, &vx_info_inactive) {
16700+ struct vx_info *vxi =
16701+ list_entry(entry, struct vx_info, vx_hlist);
16702+
16703+ dump_vx_info(vxi, level);
16704+ }
16705+}
16706+
16707+#endif
16708+
16709+#if 0
16710+int vx_migrate_user(struct task_struct *p, struct vx_info *vxi)
16711+{
16712+ struct user_struct *new_user, *old_user;
16713+
16714+ if (!p || !vxi)
16715+ BUG();
16716+
16717+ if (vx_info_flags(vxi, VXF_INFO_PRIVATE, 0))
16718+ return -EACCES;
16719+
16720+ new_user = alloc_uid(vxi->vx_id, p->uid);
16721+ if (!new_user)
16722+ return -ENOMEM;
16723+
16724+ old_user = p->user;
16725+ if (new_user != old_user) {
16726+ atomic_inc(&new_user->processes);
16727+ atomic_dec(&old_user->processes);
16728+ p->user = new_user;
16729+ }
16730+ free_uid(old_user);
16731+ return 0;
16732+}
16733+#endif
16734+
16735+#if 0
16736+void vx_mask_cap_bset(struct vx_info *vxi, struct task_struct *p)
16737+{
16738+ // p->cap_effective &= vxi->vx_cap_bset;
16739+ p->cap_effective =
16740+ cap_intersect(p->cap_effective, vxi->cap_bset);
16741+ // p->cap_inheritable &= vxi->vx_cap_bset;
16742+ p->cap_inheritable =
16743+ cap_intersect(p->cap_inheritable, vxi->cap_bset);
16744+ // p->cap_permitted &= vxi->vx_cap_bset;
16745+ p->cap_permitted =
16746+ cap_intersect(p->cap_permitted, vxi->cap_bset);
16747+}
16748+#endif
16749+
16750+
16751+#include <linux/file.h>
16752+#include <linux/fdtable.h>
16753+
16754+static int vx_openfd_task(struct task_struct *tsk)
16755+{
16756+ struct files_struct *files = tsk->files;
16757+ struct fdtable *fdt;
16758+ const unsigned long *bptr;
16759+ int count, total;
16760+
16761+ /* no rcu_read_lock() because of spin_lock() */
16762+ spin_lock(&files->file_lock);
16763+ fdt = files_fdtable(files);
16764+ bptr = fdt->open_fds->fds_bits;
16765+ count = fdt->max_fds / (sizeof(unsigned long) * 8);
16766+ for (total = 0; count > 0; count--) {
16767+ if (*bptr)
16768+ total += hweight_long(*bptr);
16769+ bptr++;
16770+ }
16771+ spin_unlock(&files->file_lock);
16772+ return total;
16773+}
16774+
16775+
16776+/* for *space compatibility */
16777+
16778+asmlinkage long sys_unshare(unsigned long);
16779+
16780+/*
16781+ * migrate task to new context
16782+ * gets vxi, puts old_vxi on change
16783+ * optionally unshares namespaces (hack)
16784+ */
16785+
16786+int vx_migrate_task(struct task_struct *p, struct vx_info *vxi, int unshare)
16787+{
16788+ struct vx_info *old_vxi;
16789+ int ret = 0;
16790+
16791+ if (!p || !vxi)
16792+ BUG();
16793+
16794+ vxdprintk(VXD_CBIT(xid, 5),
16795+ "vx_migrate_task(%p,%p[#%d.%d])", p, vxi,
16796+ vxi->vx_id, atomic_read(&vxi->vx_usecnt));
16797+
16798+ if (vx_info_flags(vxi, VXF_INFO_PRIVATE, 0) &&
16799+ !vx_info_flags(vxi, VXF_STATE_SETUP, 0))
16800+ return -EACCES;
16801+
16802+ if (vx_info_state(vxi, VXS_SHUTDOWN))
16803+ return -EFAULT;
16804+
16805+ old_vxi = task_get_vx_info(p);
16806+ if (old_vxi == vxi)
16807+ goto out;
16808+
16809+// if (!(ret = vx_migrate_user(p, vxi))) {
16810+ {
16811+ int openfd;
16812+
16813+ task_lock(p);
16814+ openfd = vx_openfd_task(p);
16815+
16816+ if (old_vxi) {
16817+ atomic_dec(&old_vxi->cvirt.nr_threads);
16818+ atomic_dec(&old_vxi->cvirt.nr_running);
16819+ __rlim_dec(&old_vxi->limit, RLIMIT_NPROC);
16820+ /* FIXME: what about the struct files here? */
16821+ __rlim_sub(&old_vxi->limit, VLIMIT_OPENFD, openfd);
16822+ /* account for the executable */
16823+ __rlim_dec(&old_vxi->limit, VLIMIT_DENTRY);
16824+ }
16825+ atomic_inc(&vxi->cvirt.nr_threads);
16826+ atomic_inc(&vxi->cvirt.nr_running);
16827+ __rlim_inc(&vxi->limit, RLIMIT_NPROC);
16828+ /* FIXME: what about the struct files here? */
16829+ __rlim_add(&vxi->limit, VLIMIT_OPENFD, openfd);
16830+ /* account for the executable */
16831+ __rlim_inc(&vxi->limit, VLIMIT_DENTRY);
16832+
16833+ if (old_vxi) {
16834+ release_vx_info(old_vxi, p);
16835+ clr_vx_info(&p->vx_info);
16836+ }
16837+ claim_vx_info(vxi, p);
16838+ set_vx_info(&p->vx_info, vxi);
16839+ p->xid = vxi->vx_id;
16840+
16841+ vxdprintk(VXD_CBIT(xid, 5),
16842+ "moved task %p into vxi:%p[#%d]",
16843+ p, vxi, vxi->vx_id);
16844+
16845+ // vx_mask_cap_bset(vxi, p);
16846+ task_unlock(p);
16847+
16848+ /* hack for *spaces to provide compatibility */
16849+ if (unshare) {
16850+ struct nsproxy *old_nsp, *new_nsp;
16851+
16852+ ret = unshare_nsproxy_namespaces(
16853+ CLONE_NEWUTS | CLONE_NEWIPC | CLONE_NEWUSER,
16854+ &new_nsp, NULL);
16855+ if (ret)
16856+ goto out;
16857+
16858+ old_nsp = xchg(&p->nsproxy, new_nsp);
16859+ vx_set_space(vxi,
16860+ CLONE_NEWUTS | CLONE_NEWIPC | CLONE_NEWUSER, 0);
16861+ put_nsproxy(old_nsp);
16862+ }
16863+ }
16864+out:
16865+ put_vx_info(old_vxi);
16866+ return ret;
16867+}
16868+
16869+int vx_set_reaper(struct vx_info *vxi, struct task_struct *p)
16870+{
16871+ struct task_struct *old_reaper;
16872+
16873+ if (!vxi)
16874+ return -EINVAL;
16875+
16876+ vxdprintk(VXD_CBIT(xid, 6),
16877+ "vx_set_reaper(%p[#%d],%p[#%d,%d])",
16878+ vxi, vxi->vx_id, p, p->xid, p->pid);
16879+
16880+ old_reaper = vxi->vx_reaper;
16881+ if (old_reaper == p)
16882+ return 0;
16883+
16884+ /* set new child reaper */
16885+ get_task_struct(p);
16886+ vxi->vx_reaper = p;
16887+ put_task_struct(old_reaper);
16888+ return 0;
16889+}
16890+
16891+int vx_set_init(struct vx_info *vxi, struct task_struct *p)
16892+{
16893+ if (!vxi)
16894+ return -EINVAL;
16895+
16896+ vxdprintk(VXD_CBIT(xid, 6),
16897+ "vx_set_init(%p[#%d],%p[#%d,%d,%d])",
16898+ vxi, vxi->vx_id, p, p->xid, p->pid, p->tgid);
16899+
16900+ vxi->vx_flags &= ~VXF_STATE_INIT;
16901+ // vxi->vx_initpid = p->tgid;
16902+ vxi->vx_initpid = p->pid;
16903+ return 0;
16904+}
16905+
16906+void vx_exit_init(struct vx_info *vxi, struct task_struct *p, int code)
16907+{
16908+ vxdprintk(VXD_CBIT(xid, 6),
16909+ "vx_exit_init(%p[#%d],%p[#%d,%d,%d])",
16910+ vxi, vxi->vx_id, p, p->xid, p->pid, p->tgid);
16911+
16912+ vxi->exit_code = code;
16913+ vxi->vx_initpid = 0;
16914+}
16915+
16916+
16917+void vx_set_persistent(struct vx_info *vxi)
16918+{
16919+ vxdprintk(VXD_CBIT(xid, 6),
16920+ "vx_set_persistent(%p[#%d])", vxi, vxi->vx_id);
16921+
16922+ get_vx_info(vxi);
16923+ claim_vx_info(vxi, NULL);
16924+}
16925+
16926+void vx_clear_persistent(struct vx_info *vxi)
16927+{
16928+ vxdprintk(VXD_CBIT(xid, 6),
16929+ "vx_clear_persistent(%p[#%d])", vxi, vxi->vx_id);
16930+
16931+ release_vx_info(vxi, NULL);
16932+ put_vx_info(vxi);
16933+}
16934+
16935+void vx_update_persistent(struct vx_info *vxi)
16936+{
16937+ if (vx_info_flags(vxi, VXF_PERSISTENT, 0))
16938+ vx_set_persistent(vxi);
16939+ else
16940+ vx_clear_persistent(vxi);
16941+}
16942+
16943+
16944+/* task must be current or locked */
16945+
16946+void exit_vx_info(struct task_struct *p, int code)
16947+{
16948+ struct vx_info *vxi = p->vx_info;
16949+
16950+ if (vxi) {
16951+ atomic_dec(&vxi->cvirt.nr_threads);
16952+ vx_nproc_dec(p);
16953+
16954+ vxi->exit_code = code;
16955+ release_vx_info(vxi, p);
16956+ }
16957+}
16958+
16959+void exit_vx_info_early(struct task_struct *p, int code)
16960+{
16961+ struct vx_info *vxi = p->vx_info;
16962+
16963+ if (vxi) {
16964+ if (vxi->vx_initpid == p->pid)
16965+ vx_exit_init(vxi, p, code);
16966+ if (vxi->vx_reaper == p)
16967+ vx_set_reaper(vxi, init_pid_ns.child_reaper);
16968+ }
16969+}
16970+
16971+
16972+/* vserver syscall commands below here */
16973+
16974+/* taks xid and vx_info functions */
16975+
16976+#include <asm/uaccess.h>
16977+
16978+
16979+int vc_task_xid(uint32_t id)
16980+{
16981+ xid_t xid;
16982+
16983+ if (id) {
16984+ struct task_struct *tsk;
16985+
16986+ rcu_read_lock();
16987+ tsk = find_task_by_real_pid(id);
16988+ xid = (tsk) ? tsk->xid : -ESRCH;
16989+ rcu_read_unlock();
16990+ } else
16991+ xid = vx_current_xid();
16992+ return xid;
16993+}
16994+
16995+
16996+int vc_vx_info(struct vx_info *vxi, void __user *data)
16997+{
16998+ struct vcmd_vx_info_v0 vc_data;
16999+
17000+ vc_data.xid = vxi->vx_id;
17001+ vc_data.initpid = vxi->vx_initpid;
17002+
17003+ if (copy_to_user(data, &vc_data, sizeof(vc_data)))
17004+ return -EFAULT;
17005+ return 0;
17006+}
17007+
17008+
17009+int vc_ctx_stat(struct vx_info *vxi, void __user *data)
17010+{
17011+ struct vcmd_ctx_stat_v0 vc_data;
17012+
17013+ vc_data.usecnt = atomic_read(&vxi->vx_usecnt);
17014+ vc_data.tasks = atomic_read(&vxi->vx_tasks);
17015+
17016+ if (copy_to_user(data, &vc_data, sizeof(vc_data)))
17017+ return -EFAULT;
17018+ return 0;
17019+}
17020+
17021+
17022+/* context functions */
17023+
17024+int vc_ctx_create(uint32_t xid, void __user *data)
17025+{
17026+ struct vcmd_ctx_create vc_data = { .flagword = VXF_INIT_SET };
17027+ struct vx_info *new_vxi;
17028+ int ret;
17029+
17030+ if (data && copy_from_user(&vc_data, data, sizeof(vc_data)))
17031+ return -EFAULT;
17032+
17033+ if ((xid > MAX_S_CONTEXT) || (xid < 2))
17034+ return -EINVAL;
17035+
17036+ new_vxi = __create_vx_info(xid);
17037+ if (IS_ERR(new_vxi))
17038+ return PTR_ERR(new_vxi);
17039+
17040+ /* initial flags */
17041+ new_vxi->vx_flags = vc_data.flagword;
17042+
17043+ ret = -ENOEXEC;
17044+ if (vs_state_change(new_vxi, VSC_STARTUP))
17045+ goto out;
17046+
17047+ ret = vx_migrate_task(current, new_vxi, (!data));
17048+ if (ret)
17049+ goto out;
17050+
17051+ /* return context id on success */
17052+ ret = new_vxi->vx_id;
17053+
17054+ /* get a reference for persistent contexts */
17055+ if ((vc_data.flagword & VXF_PERSISTENT))
17056+ vx_set_persistent(new_vxi);
17057+out:
17058+ release_vx_info(new_vxi, NULL);
17059+ put_vx_info(new_vxi);
17060+ return ret;
17061+}
17062+
17063+
17064+int vc_ctx_migrate(struct vx_info *vxi, void __user *data)
17065+{
17066+ struct vcmd_ctx_migrate vc_data = { .flagword = 0 };
17067+ int ret;
17068+
17069+ if (data && copy_from_user(&vc_data, data, sizeof(vc_data)))
17070+ return -EFAULT;
17071+
17072+ ret = vx_migrate_task(current, vxi, 0);
17073+ if (ret)
17074+ return ret;
17075+ if (vc_data.flagword & VXM_SET_INIT)
17076+ ret = vx_set_init(vxi, current);
17077+ if (ret)
17078+ return ret;
17079+ if (vc_data.flagword & VXM_SET_REAPER)
17080+ ret = vx_set_reaper(vxi, current);
17081+ return ret;
17082+}
17083+
17084+
17085+int vc_get_cflags(struct vx_info *vxi, void __user *data)
17086+{
17087+ struct vcmd_ctx_flags_v0 vc_data;
17088+
17089+ vc_data.flagword = vxi->vx_flags;
17090+
17091+ /* special STATE flag handling */
17092+ vc_data.mask = vs_mask_flags(~0ULL, vxi->vx_flags, VXF_ONE_TIME);
17093+
17094+ if (copy_to_user(data, &vc_data, sizeof(vc_data)))
17095+ return -EFAULT;
17096+ return 0;
17097+}
17098+
17099+int vc_set_cflags(struct vx_info *vxi, void __user *data)
17100+{
17101+ struct vcmd_ctx_flags_v0 vc_data;
17102+ uint64_t mask, trigger;
17103+
17104+ if (copy_from_user(&vc_data, data, sizeof(vc_data)))
17105+ return -EFAULT;
17106+
17107+ /* special STATE flag handling */
17108+ mask = vs_mask_mask(vc_data.mask, vxi->vx_flags, VXF_ONE_TIME);
17109+ trigger = (mask & vxi->vx_flags) ^ (mask & vc_data.flagword);
17110+
17111+ if (vxi == current_vx_info()) {
17112+ /* if (trigger & VXF_STATE_SETUP)
17113+ vx_mask_cap_bset(vxi, current); */
17114+ if (trigger & VXF_STATE_INIT) {
17115+ int ret;
17116+
17117+ ret = vx_set_init(vxi, current);
17118+ if (ret)
17119+ return ret;
17120+ ret = vx_set_reaper(vxi, current);
17121+ if (ret)
17122+ return ret;
17123+ }
17124+ }
17125+
17126+ vxi->vx_flags = vs_mask_flags(vxi->vx_flags,
17127+ vc_data.flagword, mask);
17128+ if (trigger & VXF_PERSISTENT)
17129+ vx_update_persistent(vxi);
17130+
17131+ return 0;
17132+}
17133+
17134+
17135+static inline uint64_t caps_from_cap_t(kernel_cap_t c)
17136+{
17137+ uint64_t v = c.cap[0] | ((uint64_t)c.cap[1] << 32);
17138+
17139+ // printk("caps_from_cap_t(%08x:%08x) = %016llx\n", c.cap[1], c.cap[0], v);
17140+ return v;
17141+}
17142+
17143+static inline kernel_cap_t cap_t_from_caps(uint64_t v)
17144+{
17145+ kernel_cap_t c = __cap_empty_set;
17146+
17147+ c.cap[0] = v & 0xFFFFFFFF;
17148+ c.cap[1] = (v >> 32) & 0xFFFFFFFF;
17149+
17150+ // printk("cap_t_from_caps(%016llx) = %08x:%08x\n", v, c.cap[1], c.cap[0]);
17151+ return c;
17152+}
17153+
17154+
17155+static int do_get_caps(struct vx_info *vxi, uint64_t *bcaps, uint64_t *ccaps)
17156+{
17157+ if (bcaps)
17158+ *bcaps = caps_from_cap_t(vxi->vx_bcaps);
17159+ if (ccaps)
17160+ *ccaps = vxi->vx_ccaps;
17161+
17162+ return 0;
17163+}
17164+
17165+int vc_get_ccaps(struct vx_info *vxi, void __user *data)
17166+{
17167+ struct vcmd_ctx_caps_v1 vc_data;
17168+ int ret;
17169+
17170+ ret = do_get_caps(vxi, NULL, &vc_data.ccaps);
17171+ if (ret)
17172+ return ret;
17173+ vc_data.cmask = ~0ULL;
17174+
17175+ if (copy_to_user(data, &vc_data, sizeof(vc_data)))
17176+ return -EFAULT;
17177+ return 0;
17178+}
17179+
17180+static int do_set_caps(struct vx_info *vxi,
17181+ uint64_t bcaps, uint64_t bmask, uint64_t ccaps, uint64_t cmask)
17182+{
17183+ uint64_t bcold = caps_from_cap_t(vxi->vx_bcaps);
17184+
17185+#if 0
17186+ printk("do_set_caps(%16llx, %16llx, %16llx, %16llx)\n",
17187+ bcaps, bmask, ccaps, cmask);
17188+#endif
17189+ vxi->vx_bcaps = cap_t_from_caps(
17190+ vs_mask_flags(bcold, bcaps, bmask));
17191+ vxi->vx_ccaps = vs_mask_flags(vxi->vx_ccaps, ccaps, cmask);
17192+
17193+ return 0;
17194+}
17195+
17196+int vc_set_ccaps(struct vx_info *vxi, void __user *data)
17197+{
17198+ struct vcmd_ctx_caps_v1 vc_data;
17199+
17200+ if (copy_from_user(&vc_data, data, sizeof(vc_data)))
17201+ return -EFAULT;
17202+
17203+ return do_set_caps(vxi, 0, 0, vc_data.ccaps, vc_data.cmask);
17204+}
17205+
17206+int vc_get_bcaps(struct vx_info *vxi, void __user *data)
17207+{
17208+ struct vcmd_bcaps vc_data;
17209+ int ret;
17210+
17211+ ret = do_get_caps(vxi, &vc_data.bcaps, NULL);
17212+ if (ret)
17213+ return ret;
17214+ vc_data.bmask = ~0ULL;
17215+
17216+ if (copy_to_user(data, &vc_data, sizeof(vc_data)))
17217+ return -EFAULT;
17218+ return 0;
17219+}
17220+
17221+int vc_set_bcaps(struct vx_info *vxi, void __user *data)
17222+{
17223+ struct vcmd_bcaps vc_data;
17224+
17225+ if (copy_from_user(&vc_data, data, sizeof(vc_data)))
17226+ return -EFAULT;
17227+
17228+ return do_set_caps(vxi, vc_data.bcaps, vc_data.bmask, 0, 0);
17229+}
17230+
17231+
17232+int vc_get_umask(struct vx_info *vxi, void __user *data)
17233+{
17234+ struct vcmd_umask vc_data;
17235+
17236+ vc_data.umask = vxi->vx_umask;
17237+ vc_data.mask = ~0ULL;
17238+
17239+ if (copy_to_user(data, &vc_data, sizeof(vc_data)))
17240+ return -EFAULT;
17241+ return 0;
17242+}
17243+
17244+int vc_set_umask(struct vx_info *vxi, void __user *data)
17245+{
17246+ struct vcmd_umask vc_data;
17247+
17248+ if (copy_from_user(&vc_data, data, sizeof(vc_data)))
17249+ return -EFAULT;
17250+
17251+ vxi->vx_umask = vs_mask_flags(vxi->vx_umask,
17252+ vc_data.umask, vc_data.mask);
17253+ return 0;
17254+}
17255+
17256+
17257+int vc_get_wmask(struct vx_info *vxi, void __user *data)
17258+{
17259+ struct vcmd_wmask vc_data;
17260+
17261+ vc_data.wmask = vxi->vx_wmask;
17262+ vc_data.mask = ~0ULL;
17263+
17264+ if (copy_to_user(data, &vc_data, sizeof(vc_data)))
17265+ return -EFAULT;
17266+ return 0;
17267+}
17268+
17269+int vc_set_wmask(struct vx_info *vxi, void __user *data)
17270+{
17271+ struct vcmd_wmask vc_data;
17272+
17273+ if (copy_from_user(&vc_data, data, sizeof(vc_data)))
17274+ return -EFAULT;
17275+
17276+ vxi->vx_wmask = vs_mask_flags(vxi->vx_wmask,
17277+ vc_data.wmask, vc_data.mask);
17278+ return 0;
17279+}
17280+
17281+
17282+int vc_get_badness(struct vx_info *vxi, void __user *data)
17283+{
17284+ struct vcmd_badness_v0 vc_data;
17285+
17286+ vc_data.bias = vxi->vx_badness_bias;
17287+
17288+ if (copy_to_user(data, &vc_data, sizeof(vc_data)))
17289+ return -EFAULT;
17290+ return 0;
17291+}
17292+
17293+int vc_set_badness(struct vx_info *vxi, void __user *data)
17294+{
17295+ struct vcmd_badness_v0 vc_data;
17296+
17297+ if (copy_from_user(&vc_data, data, sizeof(vc_data)))
17298+ return -EFAULT;
17299+
17300+ vxi->vx_badness_bias = vc_data.bias;
17301+ return 0;
17302+}
17303+
17304+#include <linux/module.h>
17305+
17306+EXPORT_SYMBOL_GPL(free_vx_info);
17307+
17308diff -NurpP --minimal linux-3.0.9/kernel/vserver/cvirt.c linux-3.0.9-vs2.3.2.1/kernel/vserver/cvirt.c
17309--- linux-3.0.9/kernel/vserver/cvirt.c 1970-01-01 01:00:00.000000000 +0100
17310+++ linux-3.0.9-vs2.3.2.1/kernel/vserver/cvirt.c 2011-06-13 14:57:26.000000000 +0200
17311@@ -0,0 +1,303 @@
17312+/*
17313+ * linux/kernel/vserver/cvirt.c
17314+ *
17315+ * Virtual Server: Context Virtualization
17316+ *
17317+ * Copyright (C) 2004-2007 Herbert Pötzl
17318+ *
17319+ * V0.01 broken out from limit.c
17320+ * V0.02 added utsname stuff
17321+ * V0.03 changed vcmds to vxi arg
17322+ *
17323+ */
17324+
17325+#include <linux/types.h>
17326+#include <linux/utsname.h>
17327+#include <linux/vs_cvirt.h>
17328+#include <linux/vserver/switch.h>
17329+#include <linux/vserver/cvirt_cmd.h>
17330+
17331+#include <asm/uaccess.h>
17332+
17333+
17334+void vx_vsi_uptime(struct timespec *uptime, struct timespec *idle)
17335+{
17336+ struct vx_info *vxi = current_vx_info();
17337+
17338+ set_normalized_timespec(uptime,
17339+ uptime->tv_sec - vxi->cvirt.bias_uptime.tv_sec,
17340+ uptime->tv_nsec - vxi->cvirt.bias_uptime.tv_nsec);
17341+ if (!idle)
17342+ return;
17343+ set_normalized_timespec(idle,
17344+ idle->tv_sec - vxi->cvirt.bias_idle.tv_sec,
17345+ idle->tv_nsec - vxi->cvirt.bias_idle.tv_nsec);
17346+ return;
17347+}
17348+
17349+uint64_t vx_idle_jiffies(void)
17350+{
17351+ return init_task.utime + init_task.stime;
17352+}
17353+
17354+
17355+
17356+static inline uint32_t __update_loadavg(uint32_t load,
17357+ int wsize, int delta, int n)
17358+{
17359+ unsigned long long calc, prev;
17360+
17361+ /* just set it to n */
17362+ if (unlikely(delta >= wsize))
17363+ return (n << FSHIFT);
17364+
17365+ calc = delta * n;
17366+ calc <<= FSHIFT;
17367+ prev = (wsize - delta);
17368+ prev *= load;
17369+ calc += prev;
17370+ do_div(calc, wsize);
17371+ return calc;
17372+}
17373+
17374+
17375+void vx_update_load(struct vx_info *vxi)
17376+{
17377+ uint32_t now, last, delta;
17378+ unsigned int nr_running, nr_uninterruptible;
17379+ unsigned int total;
17380+ unsigned long flags;
17381+
17382+ spin_lock_irqsave(&vxi->cvirt.load_lock, flags);
17383+
17384+ now = jiffies;
17385+ last = vxi->cvirt.load_last;
17386+ delta = now - last;
17387+
17388+ if (delta < 5*HZ)
17389+ goto out;
17390+
17391+ nr_running = atomic_read(&vxi->cvirt.nr_running);
17392+ nr_uninterruptible = atomic_read(&vxi->cvirt.nr_uninterruptible);
17393+ total = nr_running + nr_uninterruptible;
17394+
17395+ vxi->cvirt.load[0] = __update_loadavg(vxi->cvirt.load[0],
17396+ 60*HZ, delta, total);
17397+ vxi->cvirt.load[1] = __update_loadavg(vxi->cvirt.load[1],
17398+ 5*60*HZ, delta, total);
17399+ vxi->cvirt.load[2] = __update_loadavg(vxi->cvirt.load[2],
17400+ 15*60*HZ, delta, total);
17401+
17402+ vxi->cvirt.load_last = now;
17403+out:
17404+ atomic_inc(&vxi->cvirt.load_updates);
17405+ spin_unlock_irqrestore(&vxi->cvirt.load_lock, flags);
17406+}
17407+
17408+
17409+/*
17410+ * Commands to do_syslog:
17411+ *
17412+ * 0 -- Close the log. Currently a NOP.
17413+ * 1 -- Open the log. Currently a NOP.
17414+ * 2 -- Read from the log.
17415+ * 3 -- Read all messages remaining in the ring buffer.
17416+ * 4 -- Read and clear all messages remaining in the ring buffer
17417+ * 5 -- Clear ring buffer.
17418+ * 6 -- Disable printk's to console
17419+ * 7 -- Enable printk's to console
17420+ * 8 -- Set level of messages printed to console
17421+ * 9 -- Return number of unread characters in the log buffer
17422+ * 10 -- Return size of the log buffer
17423+ */
17424+int vx_do_syslog(int type, char __user *buf, int len)
17425+{
17426+ int error = 0;
17427+ int do_clear = 0;
17428+ struct vx_info *vxi = current_vx_info();
17429+ struct _vx_syslog *log;
17430+
17431+ if (!vxi)
17432+ return -EINVAL;
17433+ log = &vxi->cvirt.syslog;
17434+
17435+ switch (type) {
17436+ case 0: /* Close log */
17437+ case 1: /* Open log */
17438+ break;
17439+ case 2: /* Read from log */
17440+ error = wait_event_interruptible(log->log_wait,
17441+ (log->log_start - log->log_end));
17442+ if (error)
17443+ break;
17444+ spin_lock_irq(&log->logbuf_lock);
17445+ spin_unlock_irq(&log->logbuf_lock);
17446+ break;
17447+ case 4: /* Read/clear last kernel messages */
17448+ do_clear = 1;
17449+ /* fall through */
17450+ case 3: /* Read last kernel messages */
17451+ return 0;
17452+
17453+ case 5: /* Clear ring buffer */
17454+ return 0;
17455+
17456+ case 6: /* Disable logging to console */
17457+ case 7: /* Enable logging to console */
17458+ case 8: /* Set level of messages printed to console */
17459+ break;
17460+
17461+ case 9: /* Number of chars in the log buffer */
17462+ return 0;
17463+ case 10: /* Size of the log buffer */
17464+ return 0;
17465+ default:
17466+ error = -EINVAL;
17467+ break;
17468+ }
17469+ return error;
17470+}
17471+
17472+
17473+/* virtual host info names */
17474+
17475+static char *vx_vhi_name(struct vx_info *vxi, int id)
17476+{
17477+ struct nsproxy *nsproxy;
17478+ struct uts_namespace *uts;
17479+
17480+ if (id == VHIN_CONTEXT)
17481+ return vxi->vx_name;
17482+
17483+ nsproxy = vxi->space[0].vx_nsproxy;
17484+ if (!nsproxy)
17485+ return NULL;
17486+
17487+ uts = nsproxy->uts_ns;
17488+ if (!uts)
17489+ return NULL;
17490+
17491+ switch (id) {
17492+ case VHIN_SYSNAME:
17493+ return uts->name.sysname;
17494+ case VHIN_NODENAME:
17495+ return uts->name.nodename;
17496+ case VHIN_RELEASE:
17497+ return uts->name.release;
17498+ case VHIN_VERSION:
17499+ return uts->name.version;
17500+ case VHIN_MACHINE:
17501+ return uts->name.machine;
17502+ case VHIN_DOMAINNAME:
17503+ return uts->name.domainname;
17504+ default:
17505+ return NULL;
17506+ }
17507+ return NULL;
17508+}
17509+
17510+int vc_set_vhi_name(struct vx_info *vxi, void __user *data)
17511+{
17512+ struct vcmd_vhi_name_v0 vc_data;
17513+ char *name;
17514+
17515+ if (copy_from_user(&vc_data, data, sizeof(vc_data)))
17516+ return -EFAULT;
17517+
17518+ name = vx_vhi_name(vxi, vc_data.field);
17519+ if (!name)
17520+ return -EINVAL;
17521+
17522+ memcpy(name, vc_data.name, 65);
17523+ return 0;
17524+}
17525+
17526+int vc_get_vhi_name(struct vx_info *vxi, void __user *data)
17527+{
17528+ struct vcmd_vhi_name_v0 vc_data;
17529+ char *name;
17530+
17531+ if (copy_from_user(&vc_data, data, sizeof(vc_data)))
17532+ return -EFAULT;
17533+
17534+ name = vx_vhi_name(vxi, vc_data.field);
17535+ if (!name)
17536+ return -EINVAL;
17537+
17538+ memcpy(vc_data.name, name, 65);
17539+ if (copy_to_user(data, &vc_data, sizeof(vc_data)))
17540+ return -EFAULT;
17541+ return 0;
17542+}
17543+
17544+
17545+int vc_virt_stat(struct vx_info *vxi, void __user *data)
17546+{
17547+ struct vcmd_virt_stat_v0 vc_data;
17548+ struct _vx_cvirt *cvirt = &vxi->cvirt;
17549+ struct timespec uptime;
17550+
17551+ do_posix_clock_monotonic_gettime(&uptime);
17552+ set_normalized_timespec(&uptime,
17553+ uptime.tv_sec - cvirt->bias_uptime.tv_sec,
17554+ uptime.tv_nsec - cvirt->bias_uptime.tv_nsec);
17555+
17556+ vc_data.offset = timespec_to_ns(&cvirt->bias_ts);
17557+ vc_data.uptime = timespec_to_ns(&uptime);
17558+ vc_data.nr_threads = atomic_read(&cvirt->nr_threads);
17559+ vc_data.nr_running = atomic_read(&cvirt->nr_running);
17560+ vc_data.nr_uninterruptible = atomic_read(&cvirt->nr_uninterruptible);
17561+ vc_data.nr_onhold = atomic_read(&cvirt->nr_onhold);
17562+ vc_data.nr_forks = atomic_read(&cvirt->total_forks);
17563+ vc_data.load[0] = cvirt->load[0];
17564+ vc_data.load[1] = cvirt->load[1];
17565+ vc_data.load[2] = cvirt->load[2];
17566+
17567+ if (copy_to_user(data, &vc_data, sizeof(vc_data)))
17568+ return -EFAULT;
17569+ return 0;
17570+}
17571+
17572+
17573+#ifdef CONFIG_VSERVER_VTIME
17574+
17575+/* virtualized time base */
17576+
17577+void vx_adjust_timespec(struct timespec *ts)
17578+{
17579+ struct vx_info *vxi;
17580+
17581+ if (!vx_flags(VXF_VIRT_TIME, 0))
17582+ return;
17583+
17584+ vxi = current_vx_info();
17585+ ts->tv_sec += vxi->cvirt.bias_ts.tv_sec;
17586+ ts->tv_nsec += vxi->cvirt.bias_ts.tv_nsec;
17587+
17588+ if (ts->tv_nsec >= NSEC_PER_SEC) {
17589+ ts->tv_sec++;
17590+ ts->tv_nsec -= NSEC_PER_SEC;
17591+ } else if (ts->tv_nsec < 0) {
17592+ ts->tv_sec--;
17593+ ts->tv_nsec += NSEC_PER_SEC;
17594+ }
17595+}
17596+
17597+int vx_settimeofday(const struct timespec *ts)
17598+{
17599+ struct timespec ats, delta;
17600+ struct vx_info *vxi;
17601+
17602+ if (!vx_flags(VXF_VIRT_TIME, 0))
17603+ return do_settimeofday(ts);
17604+
17605+ getnstimeofday(&ats);
17606+ delta = timespec_sub(*ts, ats);
17607+
17608+ vxi = current_vx_info();
17609+ vxi->cvirt.bias_ts = timespec_add(vxi->cvirt.bias_ts, delta);
17610+ return 0;
17611+}
17612+
17613+#endif
17614+
17615diff -NurpP --minimal linux-3.0.9/kernel/vserver/cvirt_init.h linux-3.0.9-vs2.3.2.1/kernel/vserver/cvirt_init.h
17616--- linux-3.0.9/kernel/vserver/cvirt_init.h 1970-01-01 01:00:00.000000000 +0100
17617+++ linux-3.0.9-vs2.3.2.1/kernel/vserver/cvirt_init.h 2011-06-10 22:11:24.000000000 +0200
17618@@ -0,0 +1,70 @@
17619+
17620+
17621+extern uint64_t vx_idle_jiffies(void);
17622+
17623+static inline void vx_info_init_cvirt(struct _vx_cvirt *cvirt)
17624+{
17625+ uint64_t idle_jiffies = vx_idle_jiffies();
17626+ uint64_t nsuptime;
17627+
17628+ do_posix_clock_monotonic_gettime(&cvirt->bias_uptime);
17629+ nsuptime = (unsigned long long)cvirt->bias_uptime.tv_sec
17630+ * NSEC_PER_SEC + cvirt->bias_uptime.tv_nsec;
17631+ cvirt->bias_clock = nsec_to_clock_t(nsuptime);
17632+ cvirt->bias_ts.tv_sec = 0;
17633+ cvirt->bias_ts.tv_nsec = 0;
17634+
17635+ jiffies_to_timespec(idle_jiffies, &cvirt->bias_idle);
17636+ atomic_set(&cvirt->nr_threads, 0);
17637+ atomic_set(&cvirt->nr_running, 0);
17638+ atomic_set(&cvirt->nr_uninterruptible, 0);
17639+ atomic_set(&cvirt->nr_onhold, 0);
17640+
17641+ spin_lock_init(&cvirt->load_lock);
17642+ cvirt->load_last = jiffies;
17643+ atomic_set(&cvirt->load_updates, 0);
17644+ cvirt->load[0] = 0;
17645+ cvirt->load[1] = 0;
17646+ cvirt->load[2] = 0;
17647+ atomic_set(&cvirt->total_forks, 0);
17648+
17649+ spin_lock_init(&cvirt->syslog.logbuf_lock);
17650+ init_waitqueue_head(&cvirt->syslog.log_wait);
17651+ cvirt->syslog.log_start = 0;
17652+ cvirt->syslog.log_end = 0;
17653+ cvirt->syslog.con_start = 0;
17654+ cvirt->syslog.logged_chars = 0;
17655+}
17656+
17657+static inline
17658+void vx_info_init_cvirt_pc(struct _vx_cvirt_pc *cvirt_pc, int cpu)
17659+{
17660+ // cvirt_pc->cpustat = { 0 };
17661+}
17662+
17663+static inline void vx_info_exit_cvirt(struct _vx_cvirt *cvirt)
17664+{
17665+#ifdef CONFIG_VSERVER_WARN
17666+ int value;
17667+#endif
17668+ vxwprintk_xid((value = atomic_read(&cvirt->nr_threads)),
17669+ "!!! cvirt: %p[nr_threads] = %d on exit.",
17670+ cvirt, value);
17671+ vxwprintk_xid((value = atomic_read(&cvirt->nr_running)),
17672+ "!!! cvirt: %p[nr_running] = %d on exit.",
17673+ cvirt, value);
17674+ vxwprintk_xid((value = atomic_read(&cvirt->nr_uninterruptible)),
17675+ "!!! cvirt: %p[nr_uninterruptible] = %d on exit.",
17676+ cvirt, value);
17677+ vxwprintk_xid((value = atomic_read(&cvirt->nr_onhold)),
17678+ "!!! cvirt: %p[nr_onhold] = %d on exit.",
17679+ cvirt, value);
17680+ return;
17681+}
17682+
17683+static inline
17684+void vx_info_exit_cvirt_pc(struct _vx_cvirt_pc *cvirt_pc, int cpu)
17685+{
17686+ return;
17687+}
17688+
17689diff -NurpP --minimal linux-3.0.9/kernel/vserver/cvirt_proc.h linux-3.0.9-vs2.3.2.1/kernel/vserver/cvirt_proc.h
17690--- linux-3.0.9/kernel/vserver/cvirt_proc.h 1970-01-01 01:00:00.000000000 +0100
17691+++ linux-3.0.9-vs2.3.2.1/kernel/vserver/cvirt_proc.h 2011-06-10 22:11:24.000000000 +0200
17692@@ -0,0 +1,135 @@
17693+#ifndef _VX_CVIRT_PROC_H
17694+#define _VX_CVIRT_PROC_H
17695+
17696+#include <linux/nsproxy.h>
17697+#include <linux/mnt_namespace.h>
17698+#include <linux/ipc_namespace.h>
17699+#include <linux/utsname.h>
17700+#include <linux/ipc.h>
17701+
17702+
17703+static inline
17704+int vx_info_proc_nsproxy(struct nsproxy *nsproxy, char *buffer)
17705+{
17706+ struct mnt_namespace *ns;
17707+ struct uts_namespace *uts;
17708+ struct ipc_namespace *ipc;
17709+ struct path path;
17710+ char *pstr, *root;
17711+ int length = 0;
17712+
17713+ if (!nsproxy)
17714+ goto out;
17715+
17716+ length += sprintf(buffer + length,
17717+ "NSProxy:\t%p [%p,%p,%p]\n",
17718+ nsproxy, nsproxy->mnt_ns,
17719+ nsproxy->uts_ns, nsproxy->ipc_ns);
17720+
17721+ ns = nsproxy->mnt_ns;
17722+ if (!ns)
17723+ goto skip_ns;
17724+
17725+ pstr = kmalloc(PATH_MAX, GFP_KERNEL);
17726+ if (!pstr)
17727+ goto skip_ns;
17728+
17729+ path.mnt = ns->root;
17730+ path.dentry = ns->root->mnt_root;
17731+ root = d_path(&path, pstr, PATH_MAX - 2);
17732+ length += sprintf(buffer + length,
17733+ "Namespace:\t%p [#%u]\n"
17734+ "RootPath:\t%s\n",
17735+ ns, atomic_read(&ns->count),
17736+ root);
17737+ kfree(pstr);
17738+skip_ns:
17739+
17740+ uts = nsproxy->uts_ns;
17741+ if (!uts)
17742+ goto skip_uts;
17743+
17744+ length += sprintf(buffer + length,
17745+ "SysName:\t%.*s\n"
17746+ "NodeName:\t%.*s\n"
17747+ "Release:\t%.*s\n"
17748+ "Version:\t%.*s\n"
17749+ "Machine:\t%.*s\n"
17750+ "DomainName:\t%.*s\n",
17751+ __NEW_UTS_LEN, uts->name.sysname,
17752+ __NEW_UTS_LEN, uts->name.nodename,
17753+ __NEW_UTS_LEN, uts->name.release,
17754+ __NEW_UTS_LEN, uts->name.version,
17755+ __NEW_UTS_LEN, uts->name.machine,
17756+ __NEW_UTS_LEN, uts->name.domainname);
17757+skip_uts:
17758+
17759+ ipc = nsproxy->ipc_ns;
17760+ if (!ipc)
17761+ goto skip_ipc;
17762+
17763+ length += sprintf(buffer + length,
17764+ "SEMS:\t\t%d %d %d %d %d\n"
17765+ "MSG:\t\t%d %d %d\n"
17766+ "SHM:\t\t%lu %lu %d %d\n",
17767+ ipc->sem_ctls[0], ipc->sem_ctls[1],
17768+ ipc->sem_ctls[2], ipc->sem_ctls[3],
17769+ ipc->used_sems,
17770+ ipc->msg_ctlmax, ipc->msg_ctlmnb, ipc->msg_ctlmni,
17771+ (unsigned long)ipc->shm_ctlmax,
17772+ (unsigned long)ipc->shm_ctlall,
17773+ ipc->shm_ctlmni, ipc->shm_tot);
17774+skip_ipc:
17775+out:
17776+ return length;
17777+}
17778+
17779+
17780+#include <linux/sched.h>
17781+
17782+#define LOAD_INT(x) ((x) >> FSHIFT)
17783+#define LOAD_FRAC(x) LOAD_INT(((x) & (FIXED_1 - 1)) * 100)
17784+
17785+static inline
17786+int vx_info_proc_cvirt(struct _vx_cvirt *cvirt, char *buffer)
17787+{
17788+ int length = 0;
17789+ int a, b, c;
17790+
17791+ length += sprintf(buffer + length,
17792+ "BiasUptime:\t%lu.%02lu\n",
17793+ (unsigned long)cvirt->bias_uptime.tv_sec,
17794+ (cvirt->bias_uptime.tv_nsec / (NSEC_PER_SEC / 100)));
17795+
17796+ a = cvirt->load[0] + (FIXED_1 / 200);
17797+ b = cvirt->load[1] + (FIXED_1 / 200);
17798+ c = cvirt->load[2] + (FIXED_1 / 200);
17799+ length += sprintf(buffer + length,
17800+ "nr_threads:\t%d\n"
17801+ "nr_running:\t%d\n"
17802+ "nr_unintr:\t%d\n"
17803+ "nr_onhold:\t%d\n"
17804+ "load_updates:\t%d\n"
17805+ "loadavg:\t%d.%02d %d.%02d %d.%02d\n"
17806+ "total_forks:\t%d\n",
17807+ atomic_read(&cvirt->nr_threads),
17808+ atomic_read(&cvirt->nr_running),
17809+ atomic_read(&cvirt->nr_uninterruptible),
17810+ atomic_read(&cvirt->nr_onhold),
17811+ atomic_read(&cvirt->load_updates),
17812+ LOAD_INT(a), LOAD_FRAC(a),
17813+ LOAD_INT(b), LOAD_FRAC(b),
17814+ LOAD_INT(c), LOAD_FRAC(c),
17815+ atomic_read(&cvirt->total_forks));
17816+ return length;
17817+}
17818+
17819+static inline
17820+int vx_info_proc_cvirt_pc(struct _vx_cvirt_pc *cvirt_pc,
17821+ char *buffer, int cpu)
17822+{
17823+ int length = 0;
17824+ return length;
17825+}
17826+
17827+#endif /* _VX_CVIRT_PROC_H */
17828diff -NurpP --minimal linux-3.0.9/kernel/vserver/debug.c linux-3.0.9-vs2.3.2.1/kernel/vserver/debug.c
17829--- linux-3.0.9/kernel/vserver/debug.c 1970-01-01 01:00:00.000000000 +0100
17830+++ linux-3.0.9-vs2.3.2.1/kernel/vserver/debug.c 2011-06-10 22:11:24.000000000 +0200
17831@@ -0,0 +1,32 @@
17832+/*
17833+ * kernel/vserver/debug.c
17834+ *
17835+ * Copyright (C) 2005-2007 Herbert Pötzl
17836+ *
17837+ * V0.01 vx_info dump support
17838+ *
17839+ */
17840+
17841+#include <linux/module.h>
17842+
17843+#include <linux/vserver/context.h>
17844+
17845+
17846+void dump_vx_info(struct vx_info *vxi, int level)
17847+{
17848+ printk("vx_info %p[#%d, %d.%d, %4x]\n", vxi, vxi->vx_id,
17849+ atomic_read(&vxi->vx_usecnt),
17850+ atomic_read(&vxi->vx_tasks),
17851+ vxi->vx_state);
17852+ if (level > 0) {
17853+ __dump_vx_limit(&vxi->limit);
17854+ __dump_vx_sched(&vxi->sched);
17855+ __dump_vx_cvirt(&vxi->cvirt);
17856+ __dump_vx_cacct(&vxi->cacct);
17857+ }
17858+ printk("---\n");
17859+}
17860+
17861+
17862+EXPORT_SYMBOL_GPL(dump_vx_info);
17863+
17864diff -NurpP --minimal linux-3.0.9/kernel/vserver/device.c linux-3.0.9-vs2.3.2.1/kernel/vserver/device.c
17865--- linux-3.0.9/kernel/vserver/device.c 1970-01-01 01:00:00.000000000 +0100
17866+++ linux-3.0.9-vs2.3.2.1/kernel/vserver/device.c 2011-06-10 23:20:56.000000000 +0200
17867@@ -0,0 +1,443 @@
17868+/*
17869+ * linux/kernel/vserver/device.c
17870+ *
17871+ * Linux-VServer: Device Support
17872+ *
17873+ * Copyright (C) 2006 Herbert Pötzl
17874+ * Copyright (C) 2007 Daniel Hokka Zakrisson
17875+ *
17876+ * V0.01 device mapping basics
17877+ * V0.02 added defaults
17878+ *
17879+ */
17880+
17881+#include <linux/slab.h>
17882+#include <linux/rcupdate.h>
17883+#include <linux/fs.h>
17884+#include <linux/namei.h>
17885+#include <linux/hash.h>
17886+
17887+#include <asm/errno.h>
17888+#include <asm/uaccess.h>
17889+#include <linux/vserver/base.h>
17890+#include <linux/vserver/debug.h>
17891+#include <linux/vserver/context.h>
17892+#include <linux/vserver/device.h>
17893+#include <linux/vserver/device_cmd.h>
17894+
17895+
17896+#define DMAP_HASH_BITS 4
17897+
17898+
17899+struct vs_mapping {
17900+ union {
17901+ struct hlist_node hlist;
17902+ struct list_head list;
17903+ } u;
17904+#define dm_hlist u.hlist
17905+#define dm_list u.list
17906+ xid_t xid;
17907+ dev_t device;
17908+ struct vx_dmap_target target;
17909+};
17910+
17911+
17912+static struct hlist_head dmap_main_hash[1 << DMAP_HASH_BITS];
17913+
17914+static DEFINE_SPINLOCK(dmap_main_hash_lock);
17915+
17916+static struct vx_dmap_target dmap_defaults[2] = {
17917+ { .flags = DATTR_OPEN },
17918+ { .flags = DATTR_OPEN },
17919+};
17920+
17921+
17922+struct kmem_cache *dmap_cachep __read_mostly;
17923+
17924+int __init dmap_cache_init(void)
17925+{
17926+ dmap_cachep = kmem_cache_create("dmap_cache",
17927+ sizeof(struct vs_mapping), 0,
17928+ SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL);
17929+ return 0;
17930+}
17931+
17932+__initcall(dmap_cache_init);
17933+
17934+
17935+static inline unsigned int __hashval(dev_t dev, int bits)
17936+{
17937+ return hash_long((unsigned long)dev, bits);
17938+}
17939+
17940+
17941+/* __hash_mapping()
17942+ * add the mapping to the hash table
17943+ */
17944+static inline void __hash_mapping(struct vx_info *vxi, struct vs_mapping *vdm)
17945+{
17946+ spinlock_t *hash_lock = &dmap_main_hash_lock;
17947+ struct hlist_head *head, *hash = dmap_main_hash;
17948+ int device = vdm->device;
17949+
17950+ spin_lock(hash_lock);
17951+ vxdprintk(VXD_CBIT(misc, 8), "__hash_mapping: %p[#%d] %08x:%08x",
17952+ vxi, vxi ? vxi->vx_id : 0, device, vdm->target.target);
17953+
17954+ head = &hash[__hashval(device, DMAP_HASH_BITS)];
17955+ hlist_add_head(&vdm->dm_hlist, head);
17956+ spin_unlock(hash_lock);
17957+}
17958+
17959+
17960+static inline int __mode_to_default(umode_t mode)
17961+{
17962+ switch (mode) {
17963+ case S_IFBLK:
17964+ return 0;
17965+ case S_IFCHR:
17966+ return 1;
17967+ default:
17968+ BUG();
17969+ }
17970+}
17971+
17972+
17973+/* __set_default()
17974+ * set a default
17975+ */
17976+static inline void __set_default(struct vx_info *vxi, umode_t mode,
17977+ struct vx_dmap_target *vdmt)
17978+{
17979+ spinlock_t *hash_lock = &dmap_main_hash_lock;
17980+ spin_lock(hash_lock);
17981+
17982+ if (vxi)
17983+ vxi->dmap.targets[__mode_to_default(mode)] = *vdmt;
17984+ else
17985+ dmap_defaults[__mode_to_default(mode)] = *vdmt;
17986+
17987+
17988+ spin_unlock(hash_lock);
17989+
17990+ vxdprintk(VXD_CBIT(misc, 8), "__set_default: %p[#%u] %08x %04x",
17991+ vxi, vxi ? vxi->vx_id : 0, vdmt->target, vdmt->flags);
17992+}
17993+
17994+
17995+/* __remove_default()
17996+ * remove a default
17997+ */
17998+static inline int __remove_default(struct vx_info *vxi, umode_t mode)
17999+{
18000+ spinlock_t *hash_lock = &dmap_main_hash_lock;
18001+ spin_lock(hash_lock);
18002+
18003+ if (vxi)
18004+ vxi->dmap.targets[__mode_to_default(mode)].flags = 0;
18005+ else /* remove == reset */
18006+ dmap_defaults[__mode_to_default(mode)].flags = DATTR_OPEN | mode;
18007+
18008+ spin_unlock(hash_lock);
18009+ return 0;
18010+}
18011+
18012+
18013+/* __find_mapping()
18014+ * find a mapping in the hash table
18015+ *
18016+ * caller must hold hash_lock
18017+ */
18018+static inline int __find_mapping(xid_t xid, dev_t device, umode_t mode,
18019+ struct vs_mapping **local, struct vs_mapping **global)
18020+{
18021+ struct hlist_head *hash = dmap_main_hash;
18022+ struct hlist_head *head = &hash[__hashval(device, DMAP_HASH_BITS)];
18023+ struct hlist_node *pos;
18024+ struct vs_mapping *vdm;
18025+
18026+ *local = NULL;
18027+ if (global)
18028+ *global = NULL;
18029+
18030+ hlist_for_each(pos, head) {
18031+ vdm = hlist_entry(pos, struct vs_mapping, dm_hlist);
18032+
18033+ if ((vdm->device == device) &&
18034+ !((vdm->target.flags ^ mode) & S_IFMT)) {
18035+ if (vdm->xid == xid) {
18036+ *local = vdm;
18037+ return 1;
18038+ } else if (global && vdm->xid == 0)
18039+ *global = vdm;
18040+ }
18041+ }
18042+
18043+ if (global && *global)
18044+ return 0;
18045+ else
18046+ return -ENOENT;
18047+}
18048+
18049+
18050+/* __lookup_mapping()
18051+ * find a mapping and store the result in target and flags
18052+ */
18053+static inline int __lookup_mapping(struct vx_info *vxi,
18054+ dev_t device, dev_t *target, int *flags, umode_t mode)
18055+{
18056+ spinlock_t *hash_lock = &dmap_main_hash_lock;
18057+ struct vs_mapping *vdm, *global;
18058+ struct vx_dmap_target *vdmt;
18059+ int ret = 0;
18060+ xid_t xid = vxi->vx_id;
18061+ int index;
18062+
18063+ spin_lock(hash_lock);
18064+ if (__find_mapping(xid, device, mode, &vdm, &global) > 0) {
18065+ ret = 1;
18066+ vdmt = &vdm->target;
18067+ goto found;
18068+ }
18069+
18070+ index = __mode_to_default(mode);
18071+ if (vxi && vxi->dmap.targets[index].flags) {
18072+ ret = 2;
18073+ vdmt = &vxi->dmap.targets[index];
18074+ } else if (global) {
18075+ ret = 3;
18076+ vdmt = &global->target;
18077+ goto found;
18078+ } else {
18079+ ret = 4;
18080+ vdmt = &dmap_defaults[index];
18081+ }
18082+
18083+found:
18084+ if (target && (vdmt->flags & DATTR_REMAP))
18085+ *target = vdmt->target;
18086+ else if (target)
18087+ *target = device;
18088+ if (flags)
18089+ *flags = vdmt->flags;
18090+
18091+ spin_unlock(hash_lock);
18092+
18093+ return ret;
18094+}
18095+
18096+
18097+/* __remove_mapping()
18098+ * remove a mapping from the hash table
18099+ */
18100+static inline int __remove_mapping(struct vx_info *vxi, dev_t device,
18101+ umode_t mode)
18102+{
18103+ spinlock_t *hash_lock = &dmap_main_hash_lock;
18104+ struct vs_mapping *vdm = NULL;
18105+ int ret = 0;
18106+
18107+ spin_lock(hash_lock);
18108+
18109+ ret = __find_mapping((vxi ? vxi->vx_id : 0), device, mode, &vdm,
18110+ NULL);
18111+ vxdprintk(VXD_CBIT(misc, 8), "__remove_mapping: %p[#%d] %08x %04x",
18112+ vxi, vxi ? vxi->vx_id : 0, device, mode);
18113+ if (ret < 0)
18114+ goto out;
18115+ hlist_del(&vdm->dm_hlist);
18116+
18117+out:
18118+ spin_unlock(hash_lock);
18119+ if (vdm)
18120+ kmem_cache_free(dmap_cachep, vdm);
18121+ return ret;
18122+}
18123+
18124+
18125+
18126+int vs_map_device(struct vx_info *vxi,
18127+ dev_t device, dev_t *target, umode_t mode)
18128+{
18129+ int ret, flags = DATTR_MASK;
18130+
18131+ if (!vxi) {
18132+ if (target)
18133+ *target = device;
18134+ goto out;
18135+ }
18136+ ret = __lookup_mapping(vxi, device, target, &flags, mode);
18137+ vxdprintk(VXD_CBIT(misc, 8), "vs_map_device: %08x target: %08x flags: %04x mode: %04x mapped=%d",
18138+ device, target ? *target : 0, flags, mode, ret);
18139+out:
18140+ return (flags & DATTR_MASK);
18141+}
18142+
18143+
18144+
18145+static int do_set_mapping(struct vx_info *vxi,
18146+ dev_t device, dev_t target, int flags, umode_t mode)
18147+{
18148+ if (device) {
18149+ struct vs_mapping *new;
18150+
18151+ new = kmem_cache_alloc(dmap_cachep, GFP_KERNEL);
18152+ if (!new)
18153+ return -ENOMEM;
18154+
18155+ INIT_HLIST_NODE(&new->dm_hlist);
18156+ new->device = device;
18157+ new->target.target = target;
18158+ new->target.flags = flags | mode;
18159+ new->xid = (vxi ? vxi->vx_id : 0);
18160+
18161+ vxdprintk(VXD_CBIT(misc, 8), "do_set_mapping: %08x target: %08x flags: %04x", device, target, flags);
18162+ __hash_mapping(vxi, new);
18163+ } else {
18164+ struct vx_dmap_target new = {
18165+ .target = target,
18166+ .flags = flags | mode,
18167+ };
18168+ __set_default(vxi, mode, &new);
18169+ }
18170+ return 0;
18171+}
18172+
18173+
18174+static int do_unset_mapping(struct vx_info *vxi,
18175+ dev_t device, dev_t target, int flags, umode_t mode)
18176+{
18177+ int ret = -EINVAL;
18178+
18179+ if (device) {
18180+ ret = __remove_mapping(vxi, device, mode);
18181+ if (ret < 0)
18182+ goto out;
18183+ } else {
18184+ ret = __remove_default(vxi, mode);
18185+ if (ret < 0)
18186+ goto out;
18187+ }
18188+
18189+out:
18190+ return ret;
18191+}
18192+
18193+
18194+static inline int __user_device(const char __user *name, dev_t *dev,
18195+ umode_t *mode)
18196+{
18197+ struct nameidata nd;
18198+ int ret;
18199+
18200+ if (!name) {
18201+ *dev = 0;
18202+ return 0;
18203+ }
18204+ ret = user_lpath(name, &nd.path);
18205+ if (ret)
18206+ return ret;
18207+ if (nd.path.dentry->d_inode) {
18208+ *dev = nd.path.dentry->d_inode->i_rdev;
18209+ *mode = nd.path.dentry->d_inode->i_mode;
18210+ }
18211+ path_put(&nd.path);
18212+ return 0;
18213+}
18214+
18215+static inline int __mapping_mode(dev_t device, dev_t target,
18216+ umode_t device_mode, umode_t target_mode, umode_t *mode)
18217+{
18218+ if (device)
18219+ *mode = device_mode & S_IFMT;
18220+ else if (target)
18221+ *mode = target_mode & S_IFMT;
18222+ else
18223+ return -EINVAL;
18224+
18225+ /* if both given, device and target mode have to match */
18226+ if (device && target &&
18227+ ((device_mode ^ target_mode) & S_IFMT))
18228+ return -EINVAL;
18229+ return 0;
18230+}
18231+
18232+
18233+static inline int do_mapping(struct vx_info *vxi, const char __user *device_path,
18234+ const char __user *target_path, int flags, int set)
18235+{
18236+ dev_t device = ~0, target = ~0;
18237+ umode_t device_mode = 0, target_mode = 0, mode;
18238+ int ret;
18239+
18240+ ret = __user_device(device_path, &device, &device_mode);
18241+ if (ret)
18242+ return ret;
18243+ ret = __user_device(target_path, &target, &target_mode);
18244+ if (ret)
18245+ return ret;
18246+
18247+ ret = __mapping_mode(device, target,
18248+ device_mode, target_mode, &mode);
18249+ if (ret)
18250+ return ret;
18251+
18252+ if (set)
18253+ return do_set_mapping(vxi, device, target,
18254+ flags, mode);
18255+ else
18256+ return do_unset_mapping(vxi, device, target,
18257+ flags, mode);
18258+}
18259+
18260+
18261+int vc_set_mapping(struct vx_info *vxi, void __user *data)
18262+{
18263+ struct vcmd_set_mapping_v0 vc_data;
18264+
18265+ if (copy_from_user(&vc_data, data, sizeof(vc_data)))
18266+ return -EFAULT;
18267+
18268+ return do_mapping(vxi, vc_data.device, vc_data.target,
18269+ vc_data.flags, 1);
18270+}
18271+
18272+int vc_unset_mapping(struct vx_info *vxi, void __user *data)
18273+{
18274+ struct vcmd_set_mapping_v0 vc_data;
18275+
18276+ if (copy_from_user(&vc_data, data, sizeof(vc_data)))
18277+ return -EFAULT;
18278+
18279+ return do_mapping(vxi, vc_data.device, vc_data.target,
18280+ vc_data.flags, 0);
18281+}
18282+
18283+
18284+#ifdef CONFIG_COMPAT
18285+
18286+int vc_set_mapping_x32(struct vx_info *vxi, void __user *data)
18287+{
18288+ struct vcmd_set_mapping_v0_x32 vc_data;
18289+
18290+ if (copy_from_user(&vc_data, data, sizeof(vc_data)))
18291+ return -EFAULT;
18292+
18293+ return do_mapping(vxi, compat_ptr(vc_data.device_ptr),
18294+ compat_ptr(vc_data.target_ptr), vc_data.flags, 1);
18295+}
18296+
18297+int vc_unset_mapping_x32(struct vx_info *vxi, void __user *data)
18298+{
18299+ struct vcmd_set_mapping_v0_x32 vc_data;
18300+
18301+ if (copy_from_user(&vc_data, data, sizeof(vc_data)))
18302+ return -EFAULT;
18303+
18304+ return do_mapping(vxi, compat_ptr(vc_data.device_ptr),
18305+ compat_ptr(vc_data.target_ptr), vc_data.flags, 0);
18306+}
18307+
18308+#endif /* CONFIG_COMPAT */
18309+
18310+
18311diff -NurpP --minimal linux-3.0.9/kernel/vserver/dlimit.c linux-3.0.9-vs2.3.2.1/kernel/vserver/dlimit.c
18312--- linux-3.0.9/kernel/vserver/dlimit.c 1970-01-01 01:00:00.000000000 +0100
18313+++ linux-3.0.9-vs2.3.2.1/kernel/vserver/dlimit.c 2011-06-10 23:20:56.000000000 +0200
18314@@ -0,0 +1,531 @@
18315+/*
18316+ * linux/kernel/vserver/dlimit.c
18317+ *
18318+ * Virtual Server: Context Disk Limits
18319+ *
18320+ * Copyright (C) 2004-2009 Herbert Pötzl
18321+ *
18322+ * V0.01 initial version
18323+ * V0.02 compat32 splitup
18324+ * V0.03 extended interface
18325+ *
18326+ */
18327+
18328+#include <linux/statfs.h>
18329+#include <linux/sched.h>
18330+#include <linux/namei.h>
18331+#include <linux/vs_tag.h>
18332+#include <linux/vs_dlimit.h>
18333+#include <linux/vserver/dlimit_cmd.h>
18334+#include <linux/slab.h>
18335+// #include <linux/gfp.h>
18336+
18337+#include <asm/uaccess.h>
18338+
18339+/* __alloc_dl_info()
18340+
18341+ * allocate an initialized dl_info struct
18342+ * doesn't make it visible (hash) */
18343+
18344+static struct dl_info *__alloc_dl_info(struct super_block *sb, tag_t tag)
18345+{
18346+ struct dl_info *new = NULL;
18347+
18348+ vxdprintk(VXD_CBIT(dlim, 5),
18349+ "alloc_dl_info(%p,%d)*", sb, tag);
18350+
18351+ /* would this benefit from a slab cache? */
18352+ new = kmalloc(sizeof(struct dl_info), GFP_KERNEL);
18353+ if (!new)
18354+ return 0;
18355+
18356+ memset(new, 0, sizeof(struct dl_info));
18357+ new->dl_tag = tag;
18358+ new->dl_sb = sb;
18359+ // INIT_RCU_HEAD(&new->dl_rcu);
18360+ INIT_HLIST_NODE(&new->dl_hlist);
18361+ spin_lock_init(&new->dl_lock);
18362+ atomic_set(&new->dl_refcnt, 0);
18363+ atomic_set(&new->dl_usecnt, 0);
18364+
18365+ /* rest of init goes here */
18366+
18367+ vxdprintk(VXD_CBIT(dlim, 4),
18368+ "alloc_dl_info(%p,%d) = %p", sb, tag, new);
18369+ return new;
18370+}
18371+
18372+/* __dealloc_dl_info()
18373+
18374+ * final disposal of dl_info */
18375+
18376+static void __dealloc_dl_info(struct dl_info *dli)
18377+{
18378+ vxdprintk(VXD_CBIT(dlim, 4),
18379+ "dealloc_dl_info(%p)", dli);
18380+
18381+ dli->dl_hlist.next = LIST_POISON1;
18382+ dli->dl_tag = -1;
18383+ dli->dl_sb = 0;
18384+
18385+ BUG_ON(atomic_read(&dli->dl_usecnt));
18386+ BUG_ON(atomic_read(&dli->dl_refcnt));
18387+
18388+ kfree(dli);
18389+}
18390+
18391+
18392+/* hash table for dl_info hash */
18393+
18394+#define DL_HASH_SIZE 13
18395+
18396+struct hlist_head dl_info_hash[DL_HASH_SIZE];
18397+
18398+static DEFINE_SPINLOCK(dl_info_hash_lock);
18399+
18400+
18401+static inline unsigned int __hashval(struct super_block *sb, tag_t tag)
18402+{
18403+ return ((tag ^ (unsigned long)sb) % DL_HASH_SIZE);
18404+}
18405+
18406+
18407+
18408+/* __hash_dl_info()
18409+
18410+ * add the dli to the global hash table
18411+ * requires the hash_lock to be held */
18412+
18413+static inline void __hash_dl_info(struct dl_info *dli)
18414+{
18415+ struct hlist_head *head;
18416+
18417+ vxdprintk(VXD_CBIT(dlim, 6),
18418+ "__hash_dl_info: %p[#%d]", dli, dli->dl_tag);
18419+ get_dl_info(dli);
18420+ head = &dl_info_hash[__hashval(dli->dl_sb, dli->dl_tag)];
18421+ hlist_add_head_rcu(&dli->dl_hlist, head);
18422+}
18423+
18424+/* __unhash_dl_info()
18425+
18426+ * remove the dli from the global hash table
18427+ * requires the hash_lock to be held */
18428+
18429+static inline void __unhash_dl_info(struct dl_info *dli)
18430+{
18431+ vxdprintk(VXD_CBIT(dlim, 6),
18432+ "__unhash_dl_info: %p[#%d]", dli, dli->dl_tag);
18433+ hlist_del_rcu(&dli->dl_hlist);
18434+ put_dl_info(dli);
18435+}
18436+
18437+
18438+/* __lookup_dl_info()
18439+
18440+ * requires the rcu_read_lock()
18441+ * doesn't increment the dl_refcnt */
18442+
18443+static inline struct dl_info *__lookup_dl_info(struct super_block *sb, tag_t tag)
18444+{
18445+ struct hlist_head *head = &dl_info_hash[__hashval(sb, tag)];
18446+ struct hlist_node *pos;
18447+ struct dl_info *dli;
18448+
18449+ hlist_for_each_entry_rcu(dli, pos, head, dl_hlist) {
18450+
18451+ if (dli->dl_tag == tag && dli->dl_sb == sb) {
18452+ return dli;
18453+ }
18454+ }
18455+ return NULL;
18456+}
18457+
18458+
18459+struct dl_info *locate_dl_info(struct super_block *sb, tag_t tag)
18460+{
18461+ struct dl_info *dli;
18462+
18463+ rcu_read_lock();
18464+ dli = get_dl_info(__lookup_dl_info(sb, tag));
18465+ vxdprintk(VXD_CBIT(dlim, 7),
18466+ "locate_dl_info(%p,#%d) = %p", sb, tag, dli);
18467+ rcu_read_unlock();
18468+ return dli;
18469+}
18470+
18471+void rcu_free_dl_info(struct rcu_head *head)
18472+{
18473+ struct dl_info *dli = container_of(head, struct dl_info, dl_rcu);
18474+ int usecnt, refcnt;
18475+
18476+ BUG_ON(!dli || !head);
18477+
18478+ usecnt = atomic_read(&dli->dl_usecnt);
18479+ BUG_ON(usecnt < 0);
18480+
18481+ refcnt = atomic_read(&dli->dl_refcnt);
18482+ BUG_ON(refcnt < 0);
18483+
18484+ vxdprintk(VXD_CBIT(dlim, 3),
18485+ "rcu_free_dl_info(%p)", dli);
18486+ if (!usecnt)
18487+ __dealloc_dl_info(dli);
18488+ else
18489+ printk("!!! rcu didn't free\n");
18490+}
18491+
18492+
18493+
18494+
18495+static int do_addrem_dlimit(uint32_t id, const char __user *name,
18496+ uint32_t flags, int add)
18497+{
18498+ struct path path;
18499+ int ret;
18500+
18501+ ret = user_lpath(name, &path);
18502+ if (!ret) {
18503+ struct super_block *sb;
18504+ struct dl_info *dli;
18505+
18506+ ret = -EINVAL;
18507+ if (!path.dentry->d_inode)
18508+ goto out_release;
18509+ if (!(sb = path.dentry->d_inode->i_sb))
18510+ goto out_release;
18511+
18512+ if (add) {
18513+ dli = __alloc_dl_info(sb, id);
18514+ spin_lock(&dl_info_hash_lock);
18515+
18516+ ret = -EEXIST;
18517+ if (__lookup_dl_info(sb, id))
18518+ goto out_unlock;
18519+ __hash_dl_info(dli);
18520+ dli = NULL;
18521+ } else {
18522+ spin_lock(&dl_info_hash_lock);
18523+ dli = __lookup_dl_info(sb, id);
18524+
18525+ ret = -ESRCH;
18526+ if (!dli)
18527+ goto out_unlock;
18528+ __unhash_dl_info(dli);
18529+ }
18530+ ret = 0;
18531+ out_unlock:
18532+ spin_unlock(&dl_info_hash_lock);
18533+ if (add && dli)
18534+ __dealloc_dl_info(dli);
18535+ out_release:
18536+ path_put(&path);
18537+ }
18538+ return ret;
18539+}
18540+
18541+int vc_add_dlimit(uint32_t id, void __user *data)
18542+{
18543+ struct vcmd_ctx_dlimit_base_v0 vc_data;
18544+
18545+ if (copy_from_user(&vc_data, data, sizeof(vc_data)))
18546+ return -EFAULT;
18547+
18548+ return do_addrem_dlimit(id, vc_data.name, vc_data.flags, 1);
18549+}
18550+
18551+int vc_rem_dlimit(uint32_t id, void __user *data)
18552+{
18553+ struct vcmd_ctx_dlimit_base_v0 vc_data;
18554+
18555+ if (copy_from_user(&vc_data, data, sizeof(vc_data)))
18556+ return -EFAULT;
18557+
18558+ return do_addrem_dlimit(id, vc_data.name, vc_data.flags, 0);
18559+}
18560+
18561+#ifdef CONFIG_COMPAT
18562+
18563+int vc_add_dlimit_x32(uint32_t id, void __user *data)
18564+{
18565+ struct vcmd_ctx_dlimit_base_v0_x32 vc_data;
18566+
18567+ if (copy_from_user(&vc_data, data, sizeof(vc_data)))
18568+ return -EFAULT;
18569+
18570+ return do_addrem_dlimit(id,
18571+ compat_ptr(vc_data.name_ptr), vc_data.flags, 1);
18572+}
18573+
18574+int vc_rem_dlimit_x32(uint32_t id, void __user *data)
18575+{
18576+ struct vcmd_ctx_dlimit_base_v0_x32 vc_data;
18577+
18578+ if (copy_from_user(&vc_data, data, sizeof(vc_data)))
18579+ return -EFAULT;
18580+
18581+ return do_addrem_dlimit(id,
18582+ compat_ptr(vc_data.name_ptr), vc_data.flags, 0);
18583+}
18584+
18585+#endif /* CONFIG_COMPAT */
18586+
18587+
18588+static inline
18589+int do_set_dlimit(uint32_t id, const char __user *name,
18590+ uint32_t space_used, uint32_t space_total,
18591+ uint32_t inodes_used, uint32_t inodes_total,
18592+ uint32_t reserved, uint32_t flags)
18593+{
18594+ struct path path;
18595+ int ret;
18596+
18597+ ret = user_lpath(name, &path);
18598+ if (!ret) {
18599+ struct super_block *sb;
18600+ struct dl_info *dli;
18601+
18602+ ret = -EINVAL;
18603+ if (!path.dentry->d_inode)
18604+ goto out_release;
18605+ if (!(sb = path.dentry->d_inode->i_sb))
18606+ goto out_release;
18607+
18608+ /* sanity checks */
18609+ if ((reserved != CDLIM_KEEP &&
18610+ reserved > 100) ||
18611+ (inodes_used != CDLIM_KEEP &&
18612+ inodes_used > inodes_total) ||
18613+ (space_used != CDLIM_KEEP &&
18614+ space_used > space_total))
18615+ goto out_release;
18616+
18617+ ret = -ESRCH;
18618+ dli = locate_dl_info(sb, id);
18619+ if (!dli)
18620+ goto out_release;
18621+
18622+ spin_lock(&dli->dl_lock);
18623+
18624+ if (inodes_used != CDLIM_KEEP)
18625+ dli->dl_inodes_used = inodes_used;
18626+ if (inodes_total != CDLIM_KEEP)
18627+ dli->dl_inodes_total = inodes_total;
18628+ if (space_used != CDLIM_KEEP)
18629+ dli->dl_space_used = dlimit_space_32to64(
18630+ space_used, flags, DLIMS_USED);
18631+
18632+ if (space_total == CDLIM_INFINITY)
18633+ dli->dl_space_total = DLIM_INFINITY;
18634+ else if (space_total != CDLIM_KEEP)
18635+ dli->dl_space_total = dlimit_space_32to64(
18636+ space_total, flags, DLIMS_TOTAL);
18637+
18638+ if (reserved != CDLIM_KEEP)
18639+ dli->dl_nrlmult = (1 << 10) * (100 - reserved) / 100;
18640+
18641+ spin_unlock(&dli->dl_lock);
18642+
18643+ put_dl_info(dli);
18644+ ret = 0;
18645+
18646+ out_release:
18647+ path_put(&path);
18648+ }
18649+ return ret;
18650+}
18651+
18652+int vc_set_dlimit(uint32_t id, void __user *data)
18653+{
18654+ struct vcmd_ctx_dlimit_v0 vc_data;
18655+
18656+ if (copy_from_user(&vc_data, data, sizeof(vc_data)))
18657+ return -EFAULT;
18658+
18659+ return do_set_dlimit(id, vc_data.name,
18660+ vc_data.space_used, vc_data.space_total,
18661+ vc_data.inodes_used, vc_data.inodes_total,
18662+ vc_data.reserved, vc_data.flags);
18663+}
18664+
18665+#ifdef CONFIG_COMPAT
18666+
18667+int vc_set_dlimit_x32(uint32_t id, void __user *data)
18668+{
18669+ struct vcmd_ctx_dlimit_v0_x32 vc_data;
18670+
18671+ if (copy_from_user(&vc_data, data, sizeof(vc_data)))
18672+ return -EFAULT;
18673+
18674+ return do_set_dlimit(id, compat_ptr(vc_data.name_ptr),
18675+ vc_data.space_used, vc_data.space_total,
18676+ vc_data.inodes_used, vc_data.inodes_total,
18677+ vc_data.reserved, vc_data.flags);
18678+}
18679+
18680+#endif /* CONFIG_COMPAT */
18681+
18682+
18683+static inline
18684+int do_get_dlimit(uint32_t id, const char __user *name,
18685+ uint32_t *space_used, uint32_t *space_total,
18686+ uint32_t *inodes_used, uint32_t *inodes_total,
18687+ uint32_t *reserved, uint32_t *flags)
18688+{
18689+ struct path path;
18690+ int ret;
18691+
18692+ ret = user_lpath(name, &path);
18693+ if (!ret) {
18694+ struct super_block *sb;
18695+ struct dl_info *dli;
18696+
18697+ ret = -EINVAL;
18698+ if (!path.dentry->d_inode)
18699+ goto out_release;
18700+ if (!(sb = path.dentry->d_inode->i_sb))
18701+ goto out_release;
18702+
18703+ ret = -ESRCH;
18704+ dli = locate_dl_info(sb, id);
18705+ if (!dli)
18706+ goto out_release;
18707+
18708+ spin_lock(&dli->dl_lock);
18709+ *inodes_used = dli->dl_inodes_used;
18710+ *inodes_total = dli->dl_inodes_total;
18711+
18712+ *space_used = dlimit_space_64to32(
18713+ dli->dl_space_used, flags, DLIMS_USED);
18714+
18715+ if (dli->dl_space_total == DLIM_INFINITY)
18716+ *space_total = CDLIM_INFINITY;
18717+ else
18718+ *space_total = dlimit_space_64to32(
18719+ dli->dl_space_total, flags, DLIMS_TOTAL);
18720+
18721+ *reserved = 100 - ((dli->dl_nrlmult * 100 + 512) >> 10);
18722+ spin_unlock(&dli->dl_lock);
18723+
18724+ put_dl_info(dli);
18725+ ret = -EFAULT;
18726+
18727+ ret = 0;
18728+ out_release:
18729+ path_put(&path);
18730+ }
18731+ return ret;
18732+}
18733+
18734+
18735+int vc_get_dlimit(uint32_t id, void __user *data)
18736+{
18737+ struct vcmd_ctx_dlimit_v0 vc_data;
18738+ int ret;
18739+
18740+ if (copy_from_user(&vc_data, data, sizeof(vc_data)))
18741+ return -EFAULT;
18742+
18743+ ret = do_get_dlimit(id, vc_data.name,
18744+ &vc_data.space_used, &vc_data.space_total,
18745+ &vc_data.inodes_used, &vc_data.inodes_total,
18746+ &vc_data.reserved, &vc_data.flags);
18747+ if (ret)
18748+ return ret;
18749+
18750+ if (copy_to_user(data, &vc_data, sizeof(vc_data)))
18751+ return -EFAULT;
18752+ return 0;
18753+}
18754+
18755+#ifdef CONFIG_COMPAT
18756+
18757+int vc_get_dlimit_x32(uint32_t id, void __user *data)
18758+{
18759+ struct vcmd_ctx_dlimit_v0_x32 vc_data;
18760+ int ret;
18761+
18762+ if (copy_from_user(&vc_data, data, sizeof(vc_data)))
18763+ return -EFAULT;
18764+
18765+ ret = do_get_dlimit(id, compat_ptr(vc_data.name_ptr),
18766+ &vc_data.space_used, &vc_data.space_total,
18767+ &vc_data.inodes_used, &vc_data.inodes_total,
18768+ &vc_data.reserved, &vc_data.flags);
18769+ if (ret)
18770+ return ret;
18771+
18772+ if (copy_to_user(data, &vc_data, sizeof(vc_data)))
18773+ return -EFAULT;
18774+ return 0;
18775+}
18776+
18777+#endif /* CONFIG_COMPAT */
18778+
18779+
18780+void vx_vsi_statfs(struct super_block *sb, struct kstatfs *buf)
18781+{
18782+ struct dl_info *dli;
18783+ __u64 blimit, bfree, bavail;
18784+ __u32 ifree;
18785+
18786+ dli = locate_dl_info(sb, dx_current_tag());
18787+ if (!dli)
18788+ return;
18789+
18790+ spin_lock(&dli->dl_lock);
18791+ if (dli->dl_inodes_total == (unsigned long)DLIM_INFINITY)
18792+ goto no_ilim;
18793+
18794+ /* reduce max inodes available to limit */
18795+ if (buf->f_files > dli->dl_inodes_total)
18796+ buf->f_files = dli->dl_inodes_total;
18797+
18798+ ifree = dli->dl_inodes_total - dli->dl_inodes_used;
18799+ /* reduce free inodes to min */
18800+ if (ifree < buf->f_ffree)
18801+ buf->f_ffree = ifree;
18802+
18803+no_ilim:
18804+ if (dli->dl_space_total == DLIM_INFINITY)
18805+ goto no_blim;
18806+
18807+ blimit = dli->dl_space_total >> sb->s_blocksize_bits;
18808+
18809+ if (dli->dl_space_total < dli->dl_space_used)
18810+ bfree = 0;
18811+ else
18812+ bfree = (dli->dl_space_total - dli->dl_space_used)
18813+ >> sb->s_blocksize_bits;
18814+
18815+ bavail = ((dli->dl_space_total >> 10) * dli->dl_nrlmult);
18816+ if (bavail < dli->dl_space_used)
18817+ bavail = 0;
18818+ else
18819+ bavail = (bavail - dli->dl_space_used)
18820+ >> sb->s_blocksize_bits;
18821+
18822+ /* reduce max space available to limit */
18823+ if (buf->f_blocks > blimit)
18824+ buf->f_blocks = blimit;
18825+
18826+ /* reduce free space to min */
18827+ if (bfree < buf->f_bfree)
18828+ buf->f_bfree = bfree;
18829+
18830+ /* reduce avail space to min */
18831+ if (bavail < buf->f_bavail)
18832+ buf->f_bavail = bavail;
18833+
18834+no_blim:
18835+ spin_unlock(&dli->dl_lock);
18836+ put_dl_info(dli);
18837+
18838+ return;
18839+}
18840+
18841+#include <linux/module.h>
18842+
18843+EXPORT_SYMBOL_GPL(locate_dl_info);
18844+EXPORT_SYMBOL_GPL(rcu_free_dl_info);
18845+
18846diff -NurpP --minimal linux-3.0.9/kernel/vserver/helper.c linux-3.0.9-vs2.3.2.1/kernel/vserver/helper.c
18847--- linux-3.0.9/kernel/vserver/helper.c 1970-01-01 01:00:00.000000000 +0100
18848+++ linux-3.0.9-vs2.3.2.1/kernel/vserver/helper.c 2011-06-10 22:11:24.000000000 +0200
18849@@ -0,0 +1,223 @@
18850+/*
18851+ * linux/kernel/vserver/helper.c
18852+ *
18853+ * Virtual Context Support
18854+ *
18855+ * Copyright (C) 2004-2007 Herbert Pötzl
18856+ *
18857+ * V0.01 basic helper
18858+ *
18859+ */
18860+
18861+#include <linux/kmod.h>
18862+#include <linux/reboot.h>
18863+#include <linux/vs_context.h>
18864+#include <linux/vs_network.h>
18865+#include <linux/vserver/signal.h>
18866+
18867+
18868+char vshelper_path[255] = "/sbin/vshelper";
18869+
18870+
18871+static int do_vshelper(char *name, char *argv[], char *envp[], int sync)
18872+{
18873+ int ret;
18874+
18875+ if ((ret = call_usermodehelper(name, argv, envp, sync))) {
18876+ printk( KERN_WARNING
18877+ "%s: (%s %s) returned %s with %d\n",
18878+ name, argv[1], argv[2],
18879+ sync ? "sync" : "async", ret);
18880+ }
18881+ vxdprintk(VXD_CBIT(switch, 4),
18882+ "%s: (%s %s) returned %s with %d",
18883+ name, argv[1], argv[2], sync ? "sync" : "async", ret);
18884+ return ret;
18885+}
18886+
18887+/*
18888+ * vshelper path is set via /proc/sys
18889+ * invoked by vserver sys_reboot(), with
18890+ * the following arguments
18891+ *
18892+ * argv [0] = vshelper_path;
18893+ * argv [1] = action: "restart", "halt", "poweroff", ...
18894+ * argv [2] = context identifier
18895+ *
18896+ * envp [*] = type-specific parameters
18897+ */
18898+
18899+long vs_reboot_helper(struct vx_info *vxi, int cmd, void __user *arg)
18900+{
18901+ char id_buf[8], cmd_buf[16];
18902+ char uid_buf[16], pid_buf[16];
18903+ int ret;
18904+
18905+ char *argv[] = {vshelper_path, NULL, id_buf, 0};
18906+ char *envp[] = {"HOME=/", "TERM=linux",
18907+ "PATH=/sbin:/usr/sbin:/bin:/usr/bin",
18908+ uid_buf, pid_buf, cmd_buf, 0};
18909+
18910+ if (vx_info_state(vxi, VXS_HELPER))
18911+ return -EAGAIN;
18912+ vxi->vx_state |= VXS_HELPER;
18913+
18914+ snprintf(id_buf, sizeof(id_buf)-1, "%d", vxi->vx_id);
18915+
18916+ snprintf(cmd_buf, sizeof(cmd_buf)-1, "VS_CMD=%08x", cmd);
18917+ snprintf(uid_buf, sizeof(uid_buf)-1, "VS_UID=%d", current_uid());
18918+ snprintf(pid_buf, sizeof(pid_buf)-1, "VS_PID=%d", current->pid);
18919+
18920+ switch (cmd) {
18921+ case LINUX_REBOOT_CMD_RESTART:
18922+ argv[1] = "restart";
18923+ break;
18924+
18925+ case LINUX_REBOOT_CMD_HALT:
18926+ argv[1] = "halt";
18927+ break;
18928+
18929+ case LINUX_REBOOT_CMD_POWER_OFF:
18930+ argv[1] = "poweroff";
18931+ break;
18932+
18933+ case LINUX_REBOOT_CMD_SW_SUSPEND:
18934+ argv[1] = "swsusp";
18935+ break;
18936+
18937+ case LINUX_REBOOT_CMD_OOM:
18938+ argv[1] = "oom";
18939+ break;
18940+
18941+ default:
18942+ vxi->vx_state &= ~VXS_HELPER;
18943+ return 0;
18944+ }
18945+
18946+ ret = do_vshelper(vshelper_path, argv, envp, 0);
18947+ vxi->vx_state &= ~VXS_HELPER;
18948+ __wakeup_vx_info(vxi);
18949+ return (ret) ? -EPERM : 0;
18950+}
18951+
18952+
18953+long vs_reboot(unsigned int cmd, void __user *arg)
18954+{
18955+ struct vx_info *vxi = current_vx_info();
18956+ long ret = 0;
18957+
18958+ vxdprintk(VXD_CBIT(misc, 5),
18959+ "vs_reboot(%p[#%d],%u)",
18960+ vxi, vxi ? vxi->vx_id : 0, cmd);
18961+
18962+ ret = vs_reboot_helper(vxi, cmd, arg);
18963+ if (ret)
18964+ return ret;
18965+
18966+ vxi->reboot_cmd = cmd;
18967+ if (vx_info_flags(vxi, VXF_REBOOT_KILL, 0)) {
18968+ switch (cmd) {
18969+ case LINUX_REBOOT_CMD_RESTART:
18970+ case LINUX_REBOOT_CMD_HALT:
18971+ case LINUX_REBOOT_CMD_POWER_OFF:
18972+ vx_info_kill(vxi, 0, SIGKILL);
18973+ vx_info_kill(vxi, 1, SIGKILL);
18974+ default:
18975+ break;
18976+ }
18977+ }
18978+ return 0;
18979+}
18980+
18981+long vs_oom_action(unsigned int cmd)
18982+{
18983+ struct vx_info *vxi = current_vx_info();
18984+ long ret = 0;
18985+
18986+ vxdprintk(VXD_CBIT(misc, 5),
18987+ "vs_oom_action(%p[#%d],%u)",
18988+ vxi, vxi ? vxi->vx_id : 0, cmd);
18989+
18990+ ret = vs_reboot_helper(vxi, cmd, NULL);
18991+ if (ret)
18992+ return ret;
18993+
18994+ vxi->reboot_cmd = cmd;
18995+ if (vx_info_flags(vxi, VXF_REBOOT_KILL, 0)) {
18996+ vx_info_kill(vxi, 0, SIGKILL);
18997+ vx_info_kill(vxi, 1, SIGKILL);
18998+ }
18999+ return 0;
19000+}
19001+
19002+/*
19003+ * argv [0] = vshelper_path;
19004+ * argv [1] = action: "startup", "shutdown"
19005+ * argv [2] = context identifier
19006+ *
19007+ * envp [*] = type-specific parameters
19008+ */
19009+
19010+long vs_state_change(struct vx_info *vxi, unsigned int cmd)
19011+{
19012+ char id_buf[8], cmd_buf[16];
19013+ char *argv[] = {vshelper_path, NULL, id_buf, 0};
19014+ char *envp[] = {"HOME=/", "TERM=linux",
19015+ "PATH=/sbin:/usr/sbin:/bin:/usr/bin", cmd_buf, 0};
19016+
19017+ if (!vx_info_flags(vxi, VXF_SC_HELPER, 0))
19018+ return 0;
19019+
19020+ snprintf(id_buf, sizeof(id_buf)-1, "%d", vxi->vx_id);
19021+ snprintf(cmd_buf, sizeof(cmd_buf)-1, "VS_CMD=%08x", cmd);
19022+
19023+ switch (cmd) {
19024+ case VSC_STARTUP:
19025+ argv[1] = "startup";
19026+ break;
19027+ case VSC_SHUTDOWN:
19028+ argv[1] = "shutdown";
19029+ break;
19030+ default:
19031+ return 0;
19032+ }
19033+
19034+ return do_vshelper(vshelper_path, argv, envp, 1);
19035+}
19036+
19037+
19038+/*
19039+ * argv [0] = vshelper_path;
19040+ * argv [1] = action: "netup", "netdown"
19041+ * argv [2] = context identifier
19042+ *
19043+ * envp [*] = type-specific parameters
19044+ */
19045+
19046+long vs_net_change(struct nx_info *nxi, unsigned int cmd)
19047+{
19048+ char id_buf[8], cmd_buf[16];
19049+ char *argv[] = {vshelper_path, NULL, id_buf, 0};
19050+ char *envp[] = {"HOME=/", "TERM=linux",
19051+ "PATH=/sbin:/usr/sbin:/bin:/usr/bin", cmd_buf, 0};
19052+
19053+ if (!nx_info_flags(nxi, NXF_SC_HELPER, 0))
19054+ return 0;
19055+
19056+ snprintf(id_buf, sizeof(id_buf)-1, "%d", nxi->nx_id);
19057+ snprintf(cmd_buf, sizeof(cmd_buf)-1, "VS_CMD=%08x", cmd);
19058+
19059+ switch (cmd) {
19060+ case VSC_NETUP:
19061+ argv[1] = "netup";
19062+ break;
19063+ case VSC_NETDOWN:
19064+ argv[1] = "netdown";
19065+ break;
19066+ default:
19067+ return 0;
19068+ }
19069+
19070+ return do_vshelper(vshelper_path, argv, envp, 1);
19071+}
19072+
19073diff -NurpP --minimal linux-3.0.9/kernel/vserver/history.c linux-3.0.9-vs2.3.2.1/kernel/vserver/history.c
19074--- linux-3.0.9/kernel/vserver/history.c 1970-01-01 01:00:00.000000000 +0100
19075+++ linux-3.0.9-vs2.3.2.1/kernel/vserver/history.c 2011-06-10 22:11:24.000000000 +0200
19076@@ -0,0 +1,258 @@
19077+/*
19078+ * kernel/vserver/history.c
19079+ *
19080+ * Virtual Context History Backtrace
19081+ *
19082+ * Copyright (C) 2004-2007 Herbert Pötzl
19083+ *
19084+ * V0.01 basic structure
19085+ * V0.02 hash/unhash and trace
19086+ * V0.03 preemption fixes
19087+ *
19088+ */
19089+
19090+#include <linux/module.h>
19091+#include <asm/uaccess.h>
19092+
19093+#include <linux/vserver/context.h>
19094+#include <linux/vserver/debug.h>
19095+#include <linux/vserver/debug_cmd.h>
19096+#include <linux/vserver/history.h>
19097+
19098+
19099+#ifdef CONFIG_VSERVER_HISTORY
19100+#define VXH_SIZE CONFIG_VSERVER_HISTORY_SIZE
19101+#else
19102+#define VXH_SIZE 64
19103+#endif
19104+
19105+struct _vx_history {
19106+ unsigned int counter;
19107+
19108+ struct _vx_hist_entry entry[VXH_SIZE + 1];
19109+};
19110+
19111+
19112+DEFINE_PER_CPU(struct _vx_history, vx_history_buffer);
19113+
19114+unsigned volatile int vxh_active = 1;
19115+
19116+static atomic_t sequence = ATOMIC_INIT(0);
19117+
19118+
19119+/* vxh_advance()
19120+
19121+ * requires disabled preemption */
19122+
19123+struct _vx_hist_entry *vxh_advance(void *loc)
19124+{
19125+ unsigned int cpu = smp_processor_id();
19126+ struct _vx_history *hist = &per_cpu(vx_history_buffer, cpu);
19127+ struct _vx_hist_entry *entry;
19128+ unsigned int index;
19129+
19130+ index = vxh_active ? (hist->counter++ % VXH_SIZE) : VXH_SIZE;
19131+ entry = &hist->entry[index];
19132+
19133+ entry->seq = atomic_inc_return(&sequence);
19134+ entry->loc = loc;
19135+ return entry;
19136+}
19137+
19138+EXPORT_SYMBOL_GPL(vxh_advance);
19139+
19140+
19141+#define VXH_LOC_FMTS "(#%04x,*%d):%p"
19142+
19143+#define VXH_LOC_ARGS(e) (e)->seq, cpu, (e)->loc
19144+
19145+
19146+#define VXH_VXI_FMTS "%p[#%d,%d.%d]"
19147+
19148+#define VXH_VXI_ARGS(e) (e)->vxi.ptr, \
19149+ (e)->vxi.ptr ? (e)->vxi.xid : 0, \
19150+ (e)->vxi.ptr ? (e)->vxi.usecnt : 0, \
19151+ (e)->vxi.ptr ? (e)->vxi.tasks : 0
19152+
19153+void vxh_dump_entry(struct _vx_hist_entry *e, unsigned cpu)
19154+{
19155+ switch (e->type) {
19156+ case VXH_THROW_OOPS:
19157+ printk( VXH_LOC_FMTS " oops \n", VXH_LOC_ARGS(e));
19158+ break;
19159+
19160+ case VXH_GET_VX_INFO:
19161+ case VXH_PUT_VX_INFO:
19162+ printk( VXH_LOC_FMTS " %s_vx_info " VXH_VXI_FMTS "\n",
19163+ VXH_LOC_ARGS(e),
19164+ (e->type == VXH_GET_VX_INFO) ? "get" : "put",
19165+ VXH_VXI_ARGS(e));
19166+ break;
19167+
19168+ case VXH_INIT_VX_INFO:
19169+ case VXH_SET_VX_INFO:
19170+ case VXH_CLR_VX_INFO:
19171+ printk( VXH_LOC_FMTS " %s_vx_info " VXH_VXI_FMTS " @%p\n",
19172+ VXH_LOC_ARGS(e),
19173+ (e->type == VXH_INIT_VX_INFO) ? "init" :
19174+ ((e->type == VXH_SET_VX_INFO) ? "set" : "clr"),
19175+ VXH_VXI_ARGS(e), e->sc.data);
19176+ break;
19177+
19178+ case VXH_CLAIM_VX_INFO:
19179+ case VXH_RELEASE_VX_INFO:
19180+ printk( VXH_LOC_FMTS " %s_vx_info " VXH_VXI_FMTS " @%p\n",
19181+ VXH_LOC_ARGS(e),
19182+ (e->type == VXH_CLAIM_VX_INFO) ? "claim" : "release",
19183+ VXH_VXI_ARGS(e), e->sc.data);
19184+ break;
19185+
19186+ case VXH_ALLOC_VX_INFO:
19187+ case VXH_DEALLOC_VX_INFO:
19188+ printk( VXH_LOC_FMTS " %s_vx_info " VXH_VXI_FMTS "\n",
19189+ VXH_LOC_ARGS(e),
19190+ (e->type == VXH_ALLOC_VX_INFO) ? "alloc" : "dealloc",
19191+ VXH_VXI_ARGS(e));
19192+ break;
19193+
19194+ case VXH_HASH_VX_INFO:
19195+ case VXH_UNHASH_VX_INFO:
19196+ printk( VXH_LOC_FMTS " __%s_vx_info " VXH_VXI_FMTS "\n",
19197+ VXH_LOC_ARGS(e),
19198+ (e->type == VXH_HASH_VX_INFO) ? "hash" : "unhash",
19199+ VXH_VXI_ARGS(e));
19200+ break;
19201+
19202+ case VXH_LOC_VX_INFO:
19203+ case VXH_LOOKUP_VX_INFO:
19204+ case VXH_CREATE_VX_INFO:
19205+ printk( VXH_LOC_FMTS " __%s_vx_info [#%d] -> " VXH_VXI_FMTS "\n",
19206+ VXH_LOC_ARGS(e),
19207+ (e->type == VXH_CREATE_VX_INFO) ? "create" :
19208+ ((e->type == VXH_LOC_VX_INFO) ? "loc" : "lookup"),
19209+ e->ll.arg, VXH_VXI_ARGS(e));
19210+ break;
19211+ }
19212+}
19213+
19214+static void __vxh_dump_history(void)
19215+{
19216+ unsigned int i, cpu;
19217+
19218+ printk("History:\tSEQ: %8x\tNR_CPUS: %d\n",
19219+ atomic_read(&sequence), NR_CPUS);
19220+
19221+ for (i = 0; i < VXH_SIZE; i++) {
19222+ for_each_online_cpu(cpu) {
19223+ struct _vx_history *hist =
19224+ &per_cpu(vx_history_buffer, cpu);
19225+ unsigned int index = (hist->counter - i) % VXH_SIZE;
19226+ struct _vx_hist_entry *entry = &hist->entry[index];
19227+
19228+ vxh_dump_entry(entry, cpu);
19229+ }
19230+ }
19231+}
19232+
19233+void vxh_dump_history(void)
19234+{
19235+ vxh_active = 0;
19236+#ifdef CONFIG_SMP
19237+ local_irq_enable();
19238+ smp_send_stop();
19239+ local_irq_disable();
19240+#endif
19241+ __vxh_dump_history();
19242+}
19243+
19244+
19245+/* vserver syscall commands below here */
19246+
19247+
19248+int vc_dump_history(uint32_t id)
19249+{
19250+ vxh_active = 0;
19251+ __vxh_dump_history();
19252+ vxh_active = 1;
19253+
19254+ return 0;
19255+}
19256+
19257+
19258+int do_read_history(struct __user _vx_hist_entry *data,
19259+ int cpu, uint32_t *index, uint32_t *count)
19260+{
19261+ int pos, ret = 0;
19262+ struct _vx_history *hist = &per_cpu(vx_history_buffer, cpu);
19263+ int end = hist->counter;
19264+ int start = end - VXH_SIZE + 2;
19265+ int idx = *index;
19266+
19267+ /* special case: get current pos */
19268+ if (!*count) {
19269+ *index = end;
19270+ return 0;
19271+ }
19272+
19273+ /* have we lost some data? */
19274+ if (idx < start)
19275+ idx = start;
19276+
19277+ for (pos = 0; (pos < *count) && (idx < end); pos++, idx++) {
19278+ struct _vx_hist_entry *entry =
19279+ &hist->entry[idx % VXH_SIZE];
19280+
19281+ /* send entry to userspace */
19282+ ret = copy_to_user(&data[pos], entry, sizeof(*entry));
19283+ if (ret)
19284+ break;
19285+ }
19286+ /* save new index and count */
19287+ *index = idx;
19288+ *count = pos;
19289+ return ret ? ret : (*index < end);
19290+}
19291+
19292+int vc_read_history(uint32_t id, void __user *data)
19293+{
19294+ struct vcmd_read_history_v0 vc_data;
19295+ int ret;
19296+
19297+ if (id >= NR_CPUS)
19298+ return -EINVAL;
19299+
19300+ if (copy_from_user(&vc_data, data, sizeof(vc_data)))
19301+ return -EFAULT;
19302+
19303+ ret = do_read_history((struct __user _vx_hist_entry *)vc_data.data,
19304+ id, &vc_data.index, &vc_data.count);
19305+
19306+ if (copy_to_user(data, &vc_data, sizeof(vc_data)))
19307+ return -EFAULT;
19308+ return ret;
19309+}
19310+
19311+#ifdef CONFIG_COMPAT
19312+
19313+int vc_read_history_x32(uint32_t id, void __user *data)
19314+{
19315+ struct vcmd_read_history_v0_x32 vc_data;
19316+ int ret;
19317+
19318+ if (id >= NR_CPUS)
19319+ return -EINVAL;
19320+
19321+ if (copy_from_user(&vc_data, data, sizeof(vc_data)))
19322+ return -EFAULT;
19323+
19324+ ret = do_read_history((struct __user _vx_hist_entry *)
19325+ compat_ptr(vc_data.data_ptr),
19326+ id, &vc_data.index, &vc_data.count);
19327+
19328+ if (copy_to_user(data, &vc_data, sizeof(vc_data)))
19329+ return -EFAULT;
19330+ return ret;
19331+}
19332+
19333+#endif /* CONFIG_COMPAT */
19334+
19335diff -NurpP --minimal linux-3.0.9/kernel/vserver/inet.c linux-3.0.9-vs2.3.2.1/kernel/vserver/inet.c
19336--- linux-3.0.9/kernel/vserver/inet.c 1970-01-01 01:00:00.000000000 +0100
19337+++ linux-3.0.9-vs2.3.2.1/kernel/vserver/inet.c 2011-08-30 20:14:33.000000000 +0200
19338@@ -0,0 +1,225 @@
19339+
19340+#include <linux/in.h>
19341+#include <linux/inetdevice.h>
19342+#include <linux/vs_inet.h>
19343+#include <linux/vs_inet6.h>
19344+#include <linux/vserver/debug.h>
19345+#include <net/route.h>
19346+#include <net/addrconf.h>
19347+
19348+
19349+int nx_v4_addr_conflict(struct nx_info *nxi1, struct nx_info *nxi2)
19350+{
19351+ int ret = 0;
19352+
19353+ if (!nxi1 || !nxi2 || nxi1 == nxi2)
19354+ ret = 1;
19355+ else {
19356+ struct nx_addr_v4 *ptr;
19357+
19358+ for (ptr = &nxi1->v4; ptr; ptr = ptr->next) {
19359+ if (v4_nx_addr_in_nx_info(nxi2, ptr, -1)) {
19360+ ret = 1;
19361+ break;
19362+ }
19363+ }
19364+ }
19365+
19366+ vxdprintk(VXD_CBIT(net, 2),
19367+ "nx_v4_addr_conflict(%p,%p): %d",
19368+ nxi1, nxi2, ret);
19369+
19370+ return ret;
19371+}
19372+
19373+
19374+#ifdef CONFIG_IPV6
19375+
19376+int nx_v6_addr_conflict(struct nx_info *nxi1, struct nx_info *nxi2)
19377+{
19378+ int ret = 0;
19379+
19380+ if (!nxi1 || !nxi2 || nxi1 == nxi2)
19381+ ret = 1;
19382+ else {
19383+ struct nx_addr_v6 *ptr;
19384+
19385+ for (ptr = &nxi1->v6; ptr; ptr = ptr->next) {
19386+ if (v6_nx_addr_in_nx_info(nxi2, ptr, -1)) {
19387+ ret = 1;
19388+ break;
19389+ }
19390+ }
19391+ }
19392+
19393+ vxdprintk(VXD_CBIT(net, 2),
19394+ "nx_v6_addr_conflict(%p,%p): %d",
19395+ nxi1, nxi2, ret);
19396+
19397+ return ret;
19398+}
19399+
19400+#endif
19401+
19402+int v4_dev_in_nx_info(struct net_device *dev, struct nx_info *nxi)
19403+{
19404+ struct in_device *in_dev;
19405+ struct in_ifaddr **ifap;
19406+ struct in_ifaddr *ifa;
19407+ int ret = 0;
19408+
19409+ if (!dev)
19410+ goto out;
19411+ in_dev = in_dev_get(dev);
19412+ if (!in_dev)
19413+ goto out;
19414+
19415+ for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
19416+ ifap = &ifa->ifa_next) {
19417+ if (v4_addr_in_nx_info(nxi, ifa->ifa_local, NXA_MASK_SHOW)) {
19418+ ret = 1;
19419+ break;
19420+ }
19421+ }
19422+ in_dev_put(in_dev);
19423+out:
19424+ return ret;
19425+}
19426+
19427+
19428+#ifdef CONFIG_IPV6
19429+
19430+int v6_dev_in_nx_info(struct net_device *dev, struct nx_info *nxi)
19431+{
19432+ struct inet6_dev *in_dev;
19433+ struct inet6_ifaddr *ifa;
19434+ int ret = 0;
19435+
19436+ if (!dev)
19437+ goto out;
19438+ in_dev = in6_dev_get(dev);
19439+ if (!in_dev)
19440+ goto out;
19441+
19442+ // for (ifap = &in_dev->addr_list; (ifa = *ifap) != NULL;
19443+ list_for_each_entry(ifa, &in_dev->addr_list, if_list) {
19444+ if (v6_addr_in_nx_info(nxi, &ifa->addr, -1)) {
19445+ ret = 1;
19446+ break;
19447+ }
19448+ }
19449+ in6_dev_put(in_dev);
19450+out:
19451+ return ret;
19452+}
19453+
19454+#endif
19455+
19456+int dev_in_nx_info(struct net_device *dev, struct nx_info *nxi)
19457+{
19458+ int ret = 1;
19459+
19460+ if (!nxi)
19461+ goto out;
19462+ if (nxi->v4.type && v4_dev_in_nx_info(dev, nxi))
19463+ goto out;
19464+#ifdef CONFIG_IPV6
19465+ ret = 2;
19466+ if (nxi->v6.type && v6_dev_in_nx_info(dev, nxi))
19467+ goto out;
19468+#endif
19469+ ret = 0;
19470+out:
19471+ vxdprintk(VXD_CBIT(net, 3),
19472+ "dev_in_nx_info(%p,%p[#%d]) = %d",
19473+ dev, nxi, nxi ? nxi->nx_id : 0, ret);
19474+ return ret;
19475+}
19476+
19477+struct rtable *ip_v4_find_src(struct net *net, struct nx_info *nxi,
19478+ struct flowi4 *fl4)
19479+{
19480+ struct rtable *rt;
19481+
19482+ if (!nxi)
19483+ return NULL;
19484+
19485+ /* FIXME: handle lback only case */
19486+ if (!NX_IPV4(nxi))
19487+ return ERR_PTR(-EPERM);
19488+
19489+ vxdprintk(VXD_CBIT(net, 4),
19490+ "ip_v4_find_src(%p[#%u]) " NIPQUAD_FMT " -> " NIPQUAD_FMT,
19491+ nxi, nxi ? nxi->nx_id : 0,
19492+ NIPQUAD(fl4->saddr), NIPQUAD(fl4->daddr));
19493+
19494+ /* single IP is unconditional */
19495+ if (nx_info_flags(nxi, NXF_SINGLE_IP, 0) &&
19496+ (fl4->saddr == INADDR_ANY))
19497+ fl4->saddr = nxi->v4.ip[0].s_addr;
19498+
19499+ if (fl4->saddr == INADDR_ANY) {
19500+ struct nx_addr_v4 *ptr;
19501+ __be32 found = 0;
19502+
19503+ rt = __ip_route_output_key(net, fl4);
19504+ if (!IS_ERR(rt)) {
19505+ found = fl4->saddr;
19506+ ip_rt_put(rt);
19507+ vxdprintk(VXD_CBIT(net, 4),
19508+ "ip_v4_find_src(%p[#%u]) rok[%u]: " NIPQUAD_FMT,
19509+ nxi, nxi ? nxi->nx_id : 0, fl4->flowi4_oif, NIPQUAD(found));
19510+ if (v4_addr_in_nx_info(nxi, found, NXA_MASK_BIND))
19511+ goto found;
19512+ }
19513+
19514+ for (ptr = &nxi->v4; ptr; ptr = ptr->next) {
19515+ __be32 primary = ptr->ip[0].s_addr;
19516+ __be32 mask = ptr->mask.s_addr;
19517+ __be32 neta = primary & mask;
19518+
19519+ vxdprintk(VXD_CBIT(net, 4), "ip_v4_find_src(%p[#%u]) chk: "
19520+ NIPQUAD_FMT "/" NIPQUAD_FMT "/" NIPQUAD_FMT,
19521+ nxi, nxi ? nxi->nx_id : 0, NIPQUAD(primary),
19522+ NIPQUAD(mask), NIPQUAD(neta));
19523+ if ((found & mask) != neta)
19524+ continue;
19525+
19526+ fl4->saddr = primary;
19527+ rt = __ip_route_output_key(net, fl4);
19528+ vxdprintk(VXD_CBIT(net, 4),
19529+ "ip_v4_find_src(%p[#%u]) rok[%u]: " NIPQUAD_FMT,
19530+ nxi, nxi ? nxi->nx_id : 0, fl4->flowi4_oif, NIPQUAD(primary));
19531+ if (!IS_ERR(rt)) {
19532+ found = fl4->saddr;
19533+ ip_rt_put(rt);
19534+ if (found == primary)
19535+ goto found;
19536+ }
19537+ }
19538+ /* still no source ip? */
19539+ found = ipv4_is_loopback(fl4->daddr)
19540+ ? IPI_LOOPBACK : nxi->v4.ip[0].s_addr;
19541+ found:
19542+ /* assign src ip to flow */
19543+ fl4->saddr = found;
19544+
19545+ } else {
19546+ if (!v4_addr_in_nx_info(nxi, fl4->saddr, NXA_MASK_BIND))
19547+ return ERR_PTR(-EPERM);
19548+ }
19549+
19550+ if (nx_info_flags(nxi, NXF_LBACK_REMAP, 0)) {
19551+ if (ipv4_is_loopback(fl4->daddr))
19552+ fl4->daddr = nxi->v4_lback.s_addr;
19553+ if (ipv4_is_loopback(fl4->saddr))
19554+ fl4->saddr = nxi->v4_lback.s_addr;
19555+ } else if (ipv4_is_loopback(fl4->daddr) &&
19556+ !nx_info_flags(nxi, NXF_LBACK_ALLOW, 0))
19557+ return ERR_PTR(-EPERM);
19558+
19559+ return NULL;
19560+}
19561+
19562+EXPORT_SYMBOL_GPL(ip_v4_find_src);
19563+
19564diff -NurpP --minimal linux-3.0.9/kernel/vserver/init.c linux-3.0.9-vs2.3.2.1/kernel/vserver/init.c
19565--- linux-3.0.9/kernel/vserver/init.c 1970-01-01 01:00:00.000000000 +0100
19566+++ linux-3.0.9-vs2.3.2.1/kernel/vserver/init.c 2011-06-10 22:11:24.000000000 +0200
19567@@ -0,0 +1,45 @@
19568+/*
19569+ * linux/kernel/init.c
19570+ *
19571+ * Virtual Server Init
19572+ *
19573+ * Copyright (C) 2004-2007 Herbert Pötzl
19574+ *
19575+ * V0.01 basic structure
19576+ *
19577+ */
19578+
19579+#include <linux/init.h>
19580+
19581+int vserver_register_sysctl(void);
19582+void vserver_unregister_sysctl(void);
19583+
19584+
19585+static int __init init_vserver(void)
19586+{
19587+ int ret = 0;
19588+
19589+#ifdef CONFIG_VSERVER_DEBUG
19590+ vserver_register_sysctl();
19591+#endif
19592+ return ret;
19593+}
19594+
19595+
19596+static void __exit exit_vserver(void)
19597+{
19598+
19599+#ifdef CONFIG_VSERVER_DEBUG
19600+ vserver_unregister_sysctl();
19601+#endif
19602+ return;
19603+}
19604+
19605+/* FIXME: GFP_ZONETYPES gone
19606+long vx_slab[GFP_ZONETYPES]; */
19607+long vx_area;
19608+
19609+
19610+module_init(init_vserver);
19611+module_exit(exit_vserver);
19612+
19613diff -NurpP --minimal linux-3.0.9/kernel/vserver/inode.c linux-3.0.9-vs2.3.2.1/kernel/vserver/inode.c
19614--- linux-3.0.9/kernel/vserver/inode.c 1970-01-01 01:00:00.000000000 +0100
19615+++ linux-3.0.9-vs2.3.2.1/kernel/vserver/inode.c 2011-06-10 22:11:24.000000000 +0200
19616@@ -0,0 +1,437 @@
19617+/*
19618+ * linux/kernel/vserver/inode.c
19619+ *
19620+ * Virtual Server: File System Support
19621+ *
19622+ * Copyright (C) 2004-2007 Herbert Pötzl
19623+ *
19624+ * V0.01 separated from vcontext V0.05
19625+ * V0.02 moved to tag (instead of xid)
19626+ *
19627+ */
19628+
19629+#include <linux/tty.h>
19630+#include <linux/proc_fs.h>
19631+#include <linux/devpts_fs.h>
19632+#include <linux/fs.h>
19633+#include <linux/file.h>
19634+#include <linux/mount.h>
19635+#include <linux/parser.h>
19636+#include <linux/namei.h>
19637+#include <linux/vserver/inode.h>
19638+#include <linux/vserver/inode_cmd.h>
19639+#include <linux/vs_base.h>
19640+#include <linux/vs_tag.h>
19641+
19642+#include <asm/uaccess.h>
19643+
19644+
19645+static int __vc_get_iattr(struct inode *in, uint32_t *tag, uint32_t *flags, uint32_t *mask)
19646+{
19647+ struct proc_dir_entry *entry;
19648+
19649+ if (!in || !in->i_sb)
19650+ return -ESRCH;
19651+
19652+ *flags = IATTR_TAG
19653+ | (IS_IMMUTABLE(in) ? IATTR_IMMUTABLE : 0)
19654+ | (IS_IXUNLINK(in) ? IATTR_IXUNLINK : 0)
19655+ | (IS_BARRIER(in) ? IATTR_BARRIER : 0)
19656+ | (IS_COW(in) ? IATTR_COW : 0);
19657+ *mask = IATTR_IXUNLINK | IATTR_IMMUTABLE | IATTR_COW;
19658+
19659+ if (S_ISDIR(in->i_mode))
19660+ *mask |= IATTR_BARRIER;
19661+
19662+ if (IS_TAGGED(in)) {
19663+ *tag = in->i_tag;
19664+ *mask |= IATTR_TAG;
19665+ }
19666+
19667+ switch (in->i_sb->s_magic) {
19668+ case PROC_SUPER_MAGIC:
19669+ entry = PROC_I(in)->pde;
19670+
19671+ /* check for specific inodes? */
19672+ if (entry)
19673+ *mask |= IATTR_FLAGS;
19674+ if (entry)
19675+ *flags |= (entry->vx_flags & IATTR_FLAGS);
19676+ else
19677+ *flags |= (PROC_I(in)->vx_flags & IATTR_FLAGS);
19678+ break;
19679+
19680+ case DEVPTS_SUPER_MAGIC:
19681+ *tag = in->i_tag;
19682+ *mask |= IATTR_TAG;
19683+ break;
19684+
19685+ default:
19686+ break;
19687+ }
19688+ return 0;
19689+}
19690+
19691+int vc_get_iattr(void __user *data)
19692+{
19693+ struct path path;
19694+ struct vcmd_ctx_iattr_v1 vc_data = { .tag = -1 };
19695+ int ret;
19696+
19697+ if (copy_from_user(&vc_data, data, sizeof(vc_data)))
19698+ return -EFAULT;
19699+
19700+ ret = user_lpath(vc_data.name, &path);
19701+ if (!ret) {
19702+ ret = __vc_get_iattr(path.dentry->d_inode,
19703+ &vc_data.tag, &vc_data.flags, &vc_data.mask);
19704+ path_put(&path);
19705+ }
19706+ if (ret)
19707+ return ret;
19708+
19709+ if (copy_to_user(data, &vc_data, sizeof(vc_data)))
19710+ ret = -EFAULT;
19711+ return ret;
19712+}
19713+
19714+#ifdef CONFIG_COMPAT
19715+
19716+int vc_get_iattr_x32(void __user *data)
19717+{
19718+ struct path path;
19719+ struct vcmd_ctx_iattr_v1_x32 vc_data = { .tag = -1 };
19720+ int ret;
19721+
19722+ if (copy_from_user(&vc_data, data, sizeof(vc_data)))
19723+ return -EFAULT;
19724+
19725+ ret = user_lpath(compat_ptr(vc_data.name_ptr), &path);
19726+ if (!ret) {
19727+ ret = __vc_get_iattr(path.dentry->d_inode,
19728+ &vc_data.tag, &vc_data.flags, &vc_data.mask);
19729+ path_put(&path);
19730+ }
19731+ if (ret)
19732+ return ret;
19733+
19734+ if (copy_to_user(data, &vc_data, sizeof(vc_data)))
19735+ ret = -EFAULT;
19736+ return ret;
19737+}
19738+
19739+#endif /* CONFIG_COMPAT */
19740+
19741+
19742+int vc_fget_iattr(uint32_t fd, void __user *data)
19743+{
19744+ struct file *filp;
19745+ struct vcmd_ctx_fiattr_v0 vc_data = { .tag = -1 };
19746+ int ret;
19747+
19748+ if (copy_from_user(&vc_data, data, sizeof(vc_data)))
19749+ return -EFAULT;
19750+
19751+ filp = fget(fd);
19752+ if (!filp || !filp->f_dentry || !filp->f_dentry->d_inode)
19753+ return -EBADF;
19754+
19755+ ret = __vc_get_iattr(filp->f_dentry->d_inode,
19756+ &vc_data.tag, &vc_data.flags, &vc_data.mask);
19757+
19758+ fput(filp);
19759+
19760+ if (copy_to_user(data, &vc_data, sizeof(vc_data)))
19761+ ret = -EFAULT;
19762+ return ret;
19763+}
19764+
19765+
19766+static int __vc_set_iattr(struct dentry *de, uint32_t *tag, uint32_t *flags, uint32_t *mask)
19767+{
19768+ struct inode *in = de->d_inode;
19769+ int error = 0, is_proc = 0, has_tag = 0;
19770+ struct iattr attr = { 0 };
19771+
19772+ if (!in || !in->i_sb)
19773+ return -ESRCH;
19774+
19775+ is_proc = (in->i_sb->s_magic == PROC_SUPER_MAGIC);
19776+ if ((*mask & IATTR_FLAGS) && !is_proc)
19777+ return -EINVAL;
19778+
19779+ has_tag = IS_TAGGED(in) ||
19780+ (in->i_sb->s_magic == DEVPTS_SUPER_MAGIC);
19781+ if ((*mask & IATTR_TAG) && !has_tag)
19782+ return -EINVAL;
19783+
19784+ mutex_lock(&in->i_mutex);
19785+ if (*mask & IATTR_TAG) {
19786+ attr.ia_tag = *tag;
19787+ attr.ia_valid |= ATTR_TAG;
19788+ }
19789+
19790+ if (*mask & IATTR_FLAGS) {
19791+ struct proc_dir_entry *entry = PROC_I(in)->pde;
19792+ unsigned int iflags = PROC_I(in)->vx_flags;
19793+
19794+ iflags = (iflags & ~(*mask & IATTR_FLAGS))
19795+ | (*flags & IATTR_FLAGS);
19796+ PROC_I(in)->vx_flags = iflags;
19797+ if (entry)
19798+ entry->vx_flags = iflags;
19799+ }
19800+
19801+ if (*mask & (IATTR_IMMUTABLE | IATTR_IXUNLINK |
19802+ IATTR_BARRIER | IATTR_COW)) {
19803+ int iflags = in->i_flags;
19804+ int vflags = in->i_vflags;
19805+
19806+ if (*mask & IATTR_IMMUTABLE) {
19807+ if (*flags & IATTR_IMMUTABLE)
19808+ iflags |= S_IMMUTABLE;
19809+ else
19810+ iflags &= ~S_IMMUTABLE;
19811+ }
19812+ if (*mask & IATTR_IXUNLINK) {
19813+ if (*flags & IATTR_IXUNLINK)
19814+ iflags |= S_IXUNLINK;
19815+ else
19816+ iflags &= ~S_IXUNLINK;
19817+ }
19818+ if (S_ISDIR(in->i_mode) && (*mask & IATTR_BARRIER)) {
19819+ if (*flags & IATTR_BARRIER)
19820+ vflags |= V_BARRIER;
19821+ else
19822+ vflags &= ~V_BARRIER;
19823+ }
19824+ if (S_ISREG(in->i_mode) && (*mask & IATTR_COW)) {
19825+ if (*flags & IATTR_COW)
19826+ vflags |= V_COW;
19827+ else
19828+ vflags &= ~V_COW;
19829+ }
19830+ if (in->i_op && in->i_op->sync_flags) {
19831+ error = in->i_op->sync_flags(in, iflags, vflags);
19832+ if (error)
19833+ goto out;
19834+ }
19835+ }
19836+
19837+ if (attr.ia_valid) {
19838+ if (in->i_op && in->i_op->setattr)
19839+ error = in->i_op->setattr(de, &attr);
19840+ else {
19841+ error = inode_change_ok(in, &attr);
19842+ if (!error) {
19843+ setattr_copy(in, &attr);
19844+ mark_inode_dirty(in);
19845+ }
19846+ }
19847+ }
19848+
19849+out:
19850+ mutex_unlock(&in->i_mutex);
19851+ return error;
19852+}
19853+
19854+int vc_set_iattr(void __user *data)
19855+{
19856+ struct path path;
19857+ struct vcmd_ctx_iattr_v1 vc_data;
19858+ int ret;
19859+
19860+ if (!capable(CAP_LINUX_IMMUTABLE))
19861+ return -EPERM;
19862+ if (copy_from_user(&vc_data, data, sizeof(vc_data)))
19863+ return -EFAULT;
19864+
19865+ ret = user_lpath(vc_data.name, &path);
19866+ if (!ret) {
19867+ ret = __vc_set_iattr(path.dentry,
19868+ &vc_data.tag, &vc_data.flags, &vc_data.mask);
19869+ path_put(&path);
19870+ }
19871+
19872+ if (copy_to_user(data, &vc_data, sizeof(vc_data)))
19873+ ret = -EFAULT;
19874+ return ret;
19875+}
19876+
19877+#ifdef CONFIG_COMPAT
19878+
19879+int vc_set_iattr_x32(void __user *data)
19880+{
19881+ struct path path;
19882+ struct vcmd_ctx_iattr_v1_x32 vc_data;
19883+ int ret;
19884+
19885+ if (!capable(CAP_LINUX_IMMUTABLE))
19886+ return -EPERM;
19887+ if (copy_from_user(&vc_data, data, sizeof(vc_data)))
19888+ return -EFAULT;
19889+
19890+ ret = user_lpath(compat_ptr(vc_data.name_ptr), &path);
19891+ if (!ret) {
19892+ ret = __vc_set_iattr(path.dentry,
19893+ &vc_data.tag, &vc_data.flags, &vc_data.mask);
19894+ path_put(&path);
19895+ }
19896+
19897+ if (copy_to_user(data, &vc_data, sizeof(vc_data)))
19898+ ret = -EFAULT;
19899+ return ret;
19900+}
19901+
19902+#endif /* CONFIG_COMPAT */
19903+
19904+int vc_fset_iattr(uint32_t fd, void __user *data)
19905+{
19906+ struct file *filp;
19907+ struct vcmd_ctx_fiattr_v0 vc_data;
19908+ int ret;
19909+
19910+ if (!capable(CAP_LINUX_IMMUTABLE))
19911+ return -EPERM;
19912+ if (copy_from_user(&vc_data, data, sizeof(vc_data)))
19913+ return -EFAULT;
19914+
19915+ filp = fget(fd);
19916+ if (!filp || !filp->f_dentry || !filp->f_dentry->d_inode)
19917+ return -EBADF;
19918+
19919+ ret = __vc_set_iattr(filp->f_dentry, &vc_data.tag,
19920+ &vc_data.flags, &vc_data.mask);
19921+
19922+ fput(filp);
19923+
19924+ if (copy_to_user(data, &vc_data, sizeof(vc_data)))
19925+ return -EFAULT;
19926+ return ret;
19927+}
19928+
19929+
19930+enum { Opt_notagcheck, Opt_tag, Opt_notag, Opt_tagid, Opt_err };
19931+
19932+static match_table_t tokens = {
19933+ {Opt_notagcheck, "notagcheck"},
19934+#ifdef CONFIG_PROPAGATE
19935+ {Opt_notag, "notag"},
19936+ {Opt_tag, "tag"},
19937+ {Opt_tagid, "tagid=%u"},
19938+#endif
19939+ {Opt_err, NULL}
19940+};
19941+
19942+
19943+static void __dx_parse_remove(char *string, char *opt)
19944+{
19945+ char *p = strstr(string, opt);
19946+ char *q = p;
19947+
19948+ if (p) {
19949+ while (*q != '\0' && *q != ',')
19950+ q++;
19951+ while (*q)
19952+ *p++ = *q++;
19953+ while (*p)
19954+ *p++ = '\0';
19955+ }
19956+}
19957+
19958+int dx_parse_tag(char *string, tag_t *tag, int remove, int *mnt_flags,
19959+ unsigned long *flags)
19960+{
19961+ int set = 0;
19962+ substring_t args[MAX_OPT_ARGS];
19963+ int token;
19964+ char *s, *p, *opts;
19965+#if defined(CONFIG_PROPAGATE) || defined(CONFIG_VSERVER_WARN)
19966+ int option = 0;
19967+#endif
19968+
19969+ if (!string)
19970+ return 0;
19971+ s = kstrdup(string, GFP_KERNEL | GFP_ATOMIC);
19972+ if (!s)
19973+ return 0;
19974+
19975+ opts = s;
19976+ while ((p = strsep(&opts, ",")) != NULL) {
19977+ token = match_token(p, tokens, args);
19978+
19979+ switch (token) {
19980+#ifdef CONFIG_PROPAGATE
19981+ case Opt_tag:
19982+ if (tag)
19983+ *tag = 0;
19984+ if (remove)
19985+ __dx_parse_remove(s, "tag");
19986+ *mnt_flags |= MNT_TAGID;
19987+ set |= MNT_TAGID;
19988+ break;
19989+ case Opt_notag:
19990+ if (remove)
19991+ __dx_parse_remove(s, "notag");
19992+ *mnt_flags |= MNT_NOTAG;
19993+ set |= MNT_NOTAG;
19994+ break;
19995+ case Opt_tagid:
19996+ if (tag && !match_int(args, &option))
19997+ *tag = option;
19998+ if (remove)
19999+ __dx_parse_remove(s, "tagid");
20000+ *mnt_flags |= MNT_TAGID;
20001+ set |= MNT_TAGID;
20002+ break;
20003+#endif
20004+ case Opt_notagcheck:
20005+ if (remove)
20006+ __dx_parse_remove(s, "notagcheck");
20007+ *flags |= MS_NOTAGCHECK;
20008+ set |= MS_NOTAGCHECK;
20009+ break;
20010+ }
20011+ vxdprintk(VXD_CBIT(tag, 7),
20012+ "dx_parse_tag(" VS_Q("%s") "): %d:#%d",
20013+ p, token, option);
20014+ }
20015+ if (set)
20016+ strcpy(string, s);
20017+ kfree(s);
20018+ return set;
20019+}
20020+
20021+#ifdef CONFIG_PROPAGATE
20022+
20023+void __dx_propagate_tag(struct nameidata *nd, struct inode *inode)
20024+{
20025+ tag_t new_tag = 0;
20026+ struct vfsmount *mnt;
20027+ int propagate;
20028+
20029+ if (!nd)
20030+ return;
20031+ mnt = nd->path.mnt;
20032+ if (!mnt)
20033+ return;
20034+
20035+ propagate = (mnt->mnt_flags & MNT_TAGID);
20036+ if (propagate)
20037+ new_tag = mnt->mnt_tag;
20038+
20039+ vxdprintk(VXD_CBIT(tag, 7),
20040+ "dx_propagate_tag(%p[#%lu.%d]): %d,%d",
20041+ inode, inode->i_ino, inode->i_tag,
20042+ new_tag, (propagate) ? 1 : 0);
20043+
20044+ if (propagate)
20045+ inode->i_tag = new_tag;
20046+}
20047+
20048+#include <linux/module.h>
20049+
20050+EXPORT_SYMBOL_GPL(__dx_propagate_tag);
20051+
20052+#endif /* CONFIG_PROPAGATE */
20053+
20054diff -NurpP --minimal linux-3.0.9/kernel/vserver/limit.c linux-3.0.9-vs2.3.2.1/kernel/vserver/limit.c
20055--- linux-3.0.9/kernel/vserver/limit.c 1970-01-01 01:00:00.000000000 +0100
20056+++ linux-3.0.9-vs2.3.2.1/kernel/vserver/limit.c 2011-10-11 20:48:08.000000000 +0200
20057@@ -0,0 +1,360 @@
20058+/*
20059+ * linux/kernel/vserver/limit.c
20060+ *
20061+ * Virtual Server: Context Limits
20062+ *
20063+ * Copyright (C) 2004-2010 Herbert Pötzl
20064+ *
20065+ * V0.01 broken out from vcontext V0.05
20066+ * V0.02 changed vcmds to vxi arg
20067+ * V0.03 added memory cgroup support
20068+ *
20069+ */
20070+
20071+#include <linux/sched.h>
20072+#include <linux/module.h>
20073+#include <linux/memcontrol.h>
20074+#include <linux/res_counter.h>
20075+#include <linux/vs_limit.h>
20076+#include <linux/vserver/limit.h>
20077+#include <linux/vserver/limit_cmd.h>
20078+
20079+#include <asm/uaccess.h>
20080+
20081+
20082+const char *vlimit_name[NUM_LIMITS] = {
20083+#ifdef CONFIG_VSERVER_LEGACY_MEM
20084+ [RLIMIT_RSS] = "RSS",
20085+ [RLIMIT_AS] = "VM",
20086+#endif /* CONFIG_VSERVER_LEGACY_MEM */
20087+ [RLIMIT_CPU] = "CPU",
20088+ [RLIMIT_NPROC] = "NPROC",
20089+ [RLIMIT_NOFILE] = "NOFILE",
20090+ [RLIMIT_LOCKS] = "LOCKS",
20091+ [RLIMIT_SIGPENDING] = "SIGP",
20092+ [RLIMIT_MSGQUEUE] = "MSGQ",
20093+
20094+ [VLIMIT_NSOCK] = "NSOCK",
20095+ [VLIMIT_OPENFD] = "OPENFD",
20096+ [VLIMIT_SHMEM] = "SHMEM",
20097+ [VLIMIT_DENTRY] = "DENTRY",
20098+};
20099+
20100+EXPORT_SYMBOL_GPL(vlimit_name);
20101+
20102+#define MASK_ENTRY(x) (1 << (x))
20103+
20104+const struct vcmd_ctx_rlimit_mask_v0 vlimit_mask = {
20105+ /* minimum */
20106+ 0
20107+ , /* softlimit */
20108+#ifdef CONFIG_VSERVER_LEGACY_MEM
20109+ MASK_ENTRY( RLIMIT_RSS ) |
20110+#endif /* CONFIG_VSERVER_LEGACY_MEM */
20111+ 0
20112+ , /* maximum */
20113+#ifdef CONFIG_VSERVER_LEGACY_MEM
20114+ MASK_ENTRY( RLIMIT_RSS ) |
20115+ MASK_ENTRY( RLIMIT_AS ) |
20116+#endif /* CONFIG_VSERVER_LEGACY_MEM */
20117+ MASK_ENTRY( RLIMIT_NPROC ) |
20118+ MASK_ENTRY( RLIMIT_NOFILE ) |
20119+ MASK_ENTRY( RLIMIT_LOCKS ) |
20120+ MASK_ENTRY( RLIMIT_MSGQUEUE ) |
20121+
20122+ MASK_ENTRY( VLIMIT_NSOCK ) |
20123+ MASK_ENTRY( VLIMIT_OPENFD ) |
20124+ MASK_ENTRY( VLIMIT_SHMEM ) |
20125+ MASK_ENTRY( VLIMIT_DENTRY ) |
20126+ 0
20127+};
20128+ /* accounting only */
20129+uint32_t account_mask =
20130+ MASK_ENTRY( VLIMIT_SEMARY ) |
20131+ MASK_ENTRY( VLIMIT_NSEMS ) |
20132+ MASK_ENTRY( VLIMIT_MAPPED ) |
20133+ 0;
20134+
20135+
20136+static int is_valid_vlimit(int id)
20137+{
20138+ uint32_t mask = vlimit_mask.minimum |
20139+ vlimit_mask.softlimit | vlimit_mask.maximum;
20140+ return mask & (1 << id);
20141+}
20142+
20143+static int is_accounted_vlimit(int id)
20144+{
20145+ if (is_valid_vlimit(id))
20146+ return 1;
20147+ return account_mask & (1 << id);
20148+}
20149+
20150+
20151+static inline uint64_t vc_get_soft(struct vx_info *vxi, int id)
20152+{
20153+ rlim_t limit = __rlim_soft(&vxi->limit, id);
20154+ return VX_VLIM(limit);
20155+}
20156+
20157+static inline uint64_t vc_get_hard(struct vx_info *vxi, int id)
20158+{
20159+ rlim_t limit = __rlim_hard(&vxi->limit, id);
20160+ return VX_VLIM(limit);
20161+}
20162+
20163+static int do_get_rlimit(struct vx_info *vxi, uint32_t id,
20164+ uint64_t *minimum, uint64_t *softlimit, uint64_t *maximum)
20165+{
20166+ if (!is_valid_vlimit(id))
20167+ return -EINVAL;
20168+
20169+ if (minimum)
20170+ *minimum = CRLIM_UNSET;
20171+ if (softlimit)
20172+ *softlimit = vc_get_soft(vxi, id);
20173+ if (maximum)
20174+ *maximum = vc_get_hard(vxi, id);
20175+ return 0;
20176+}
20177+
20178+int vc_get_rlimit(struct vx_info *vxi, void __user *data)
20179+{
20180+ struct vcmd_ctx_rlimit_v0 vc_data;
20181+ int ret;
20182+
20183+ if (copy_from_user(&vc_data, data, sizeof(vc_data)))
20184+ return -EFAULT;
20185+
20186+ ret = do_get_rlimit(vxi, vc_data.id,
20187+ &vc_data.minimum, &vc_data.softlimit, &vc_data.maximum);
20188+ if (ret)
20189+ return ret;
20190+
20191+ if (copy_to_user(data, &vc_data, sizeof(vc_data)))
20192+ return -EFAULT;
20193+ return 0;
20194+}
20195+
20196+static int do_set_rlimit(struct vx_info *vxi, uint32_t id,
20197+ uint64_t minimum, uint64_t softlimit, uint64_t maximum)
20198+{
20199+ if (!is_valid_vlimit(id))
20200+ return -EINVAL;
20201+
20202+ if (maximum != CRLIM_KEEP)
20203+ __rlim_hard(&vxi->limit, id) = VX_RLIM(maximum);
20204+ if (softlimit != CRLIM_KEEP)
20205+ __rlim_soft(&vxi->limit, id) = VX_RLIM(softlimit);
20206+
20207+ /* clamp soft limit */
20208+ if (__rlim_soft(&vxi->limit, id) > __rlim_hard(&vxi->limit, id))
20209+ __rlim_soft(&vxi->limit, id) = __rlim_hard(&vxi->limit, id);
20210+
20211+ return 0;
20212+}
20213+
20214+int vc_set_rlimit(struct vx_info *vxi, void __user *data)
20215+{
20216+ struct vcmd_ctx_rlimit_v0 vc_data;
20217+
20218+ if (copy_from_user(&vc_data, data, sizeof(vc_data)))
20219+ return -EFAULT;
20220+
20221+ return do_set_rlimit(vxi, vc_data.id,
20222+ vc_data.minimum, vc_data.softlimit, vc_data.maximum);
20223+}
20224+
20225+#ifdef CONFIG_IA32_EMULATION
20226+
20227+int vc_set_rlimit_x32(struct vx_info *vxi, void __user *data)
20228+{
20229+ struct vcmd_ctx_rlimit_v0_x32 vc_data;
20230+
20231+ if (copy_from_user(&vc_data, data, sizeof(vc_data)))
20232+ return -EFAULT;
20233+
20234+ return do_set_rlimit(vxi, vc_data.id,
20235+ vc_data.minimum, vc_data.softlimit, vc_data.maximum);
20236+}
20237+
20238+int vc_get_rlimit_x32(struct vx_info *vxi, void __user *data)
20239+{
20240+ struct vcmd_ctx_rlimit_v0_x32 vc_data;
20241+ int ret;
20242+
20243+ if (copy_from_user(&vc_data, data, sizeof(vc_data)))
20244+ return -EFAULT;
20245+
20246+ ret = do_get_rlimit(vxi, vc_data.id,
20247+ &vc_data.minimum, &vc_data.softlimit, &vc_data.maximum);
20248+ if (ret)
20249+ return ret;
20250+
20251+ if (copy_to_user(data, &vc_data, sizeof(vc_data)))
20252+ return -EFAULT;
20253+ return 0;
20254+}
20255+
20256+#endif /* CONFIG_IA32_EMULATION */
20257+
20258+
20259+int vc_get_rlimit_mask(uint32_t id, void __user *data)
20260+{
20261+ if (copy_to_user(data, &vlimit_mask, sizeof(vlimit_mask)))
20262+ return -EFAULT;
20263+ return 0;
20264+}
20265+
20266+
20267+static inline void vx_reset_hits(struct _vx_limit *limit)
20268+{
20269+ int lim;
20270+
20271+ for (lim = 0; lim < NUM_LIMITS; lim++) {
20272+ atomic_set(&__rlim_lhit(limit, lim), 0);
20273+ }
20274+}
20275+
20276+int vc_reset_hits(struct vx_info *vxi, void __user *data)
20277+{
20278+ vx_reset_hits(&vxi->limit);
20279+ return 0;
20280+}
20281+
20282+static inline void vx_reset_minmax(struct _vx_limit *limit)
20283+{
20284+ rlim_t value;
20285+ int lim;
20286+
20287+ for (lim = 0; lim < NUM_LIMITS; lim++) {
20288+ value = __rlim_get(limit, lim);
20289+ __rlim_rmax(limit, lim) = value;
20290+ __rlim_rmin(limit, lim) = value;
20291+ }
20292+}
20293+
20294+int vc_reset_minmax(struct vx_info *vxi, void __user *data)
20295+{
20296+ vx_reset_minmax(&vxi->limit);
20297+ return 0;
20298+}
20299+
20300+
20301+int vc_rlimit_stat(struct vx_info *vxi, void __user *data)
20302+{
20303+ struct vcmd_rlimit_stat_v0 vc_data;
20304+ struct _vx_limit *limit = &vxi->limit;
20305+ int id;
20306+
20307+ if (copy_from_user(&vc_data, data, sizeof(vc_data)))
20308+ return -EFAULT;
20309+
20310+ id = vc_data.id;
20311+ if (!is_accounted_vlimit(id))
20312+ return -EINVAL;
20313+
20314+ vx_limit_fixup(limit, id);
20315+ vc_data.hits = atomic_read(&__rlim_lhit(limit, id));
20316+ vc_data.value = __rlim_get(limit, id);
20317+ vc_data.minimum = __rlim_rmin(limit, id);
20318+ vc_data.maximum = __rlim_rmax(limit, id);
20319+
20320+ if (copy_to_user(data, &vc_data, sizeof(vc_data)))
20321+ return -EFAULT;
20322+ return 0;
20323+}
20324+
20325+
20326+void vx_vsi_meminfo(struct sysinfo *val)
20327+{
20328+#ifdef CONFIG_CGROUP_MEM_RES_CTLR
20329+ struct mem_cgroup *mcg = mem_cgroup_from_task(current);
20330+ u64 res_limit, res_usage;
20331+
20332+ if (!mcg)
20333+ return;
20334+
20335+ res_limit = mem_cgroup_res_read_u64(mcg, RES_LIMIT);
20336+ res_usage = mem_cgroup_res_read_u64(mcg, RES_USAGE);
20337+
20338+ if (res_limit != RESOURCE_MAX)
20339+ val->totalram = (res_limit >> PAGE_SHIFT);
20340+ val->freeram = val->totalram - (res_usage >> PAGE_SHIFT);
20341+ val->bufferram = 0;
20342+ val->totalhigh = 0;
20343+ val->freehigh = 0;
20344+#endif /* CONFIG_CGROUP_MEM_RES_CTLR */
20345+ return;
20346+}
20347+
20348+void vx_vsi_swapinfo(struct sysinfo *val)
20349+{
20350+#ifdef CONFIG_CGROUP_MEM_RES_CTLR
20351+#ifdef CONFIG_CGROUP_MEM_RES_CTLR_SWAP
20352+ struct mem_cgroup *mcg = mem_cgroup_from_task(current);
20353+ u64 res_limit, res_usage, memsw_limit, memsw_usage;
20354+ s64 swap_limit, swap_usage;
20355+
20356+ if (!mcg)
20357+ return;
20358+
20359+ res_limit = mem_cgroup_res_read_u64(mcg, RES_LIMIT);
20360+ res_usage = mem_cgroup_res_read_u64(mcg, RES_USAGE);
20361+ memsw_limit = mem_cgroup_memsw_read_u64(mcg, RES_LIMIT);
20362+ memsw_usage = mem_cgroup_memsw_read_u64(mcg, RES_USAGE);
20363+
20364+ /* memory unlimited */
20365+ if (res_limit == RESOURCE_MAX)
20366+ return;
20367+
20368+ swap_limit = memsw_limit - res_limit;
20369+ /* we have a swap limit? */
20370+ if (memsw_limit != RESOURCE_MAX)
20371+ val->totalswap = swap_limit >> PAGE_SHIFT;
20372+
20373+ /* calculate swap part */
20374+ swap_usage = (memsw_usage > res_usage) ?
20375+ memsw_usage - res_usage : 0;
20376+
20377+ /* total shown minus usage gives free swap */
20378+ val->freeswap = (swap_usage < swap_limit) ?
20379+ val->totalswap - (swap_usage >> PAGE_SHIFT) : 0;
20380+#else /* !CONFIG_CGROUP_MEM_RES_CTLR_SWAP */
20381+ val->totalswap = 0;
20382+ val->freeswap = 0;
20383+#endif /* !CONFIG_CGROUP_MEM_RES_CTLR_SWAP */
20384+#endif /* CONFIG_CGROUP_MEM_RES_CTLR */
20385+ return;
20386+}
20387+
20388+long vx_vsi_cached(struct sysinfo *val)
20389+{
20390+#ifdef CONFIG_CGROUP_MEM_RES_CTLR
20391+ struct mem_cgroup *mcg = mem_cgroup_from_task(current);
20392+
20393+ return mem_cgroup_stat_read_cache(mcg);
20394+#else
20395+ return 0;
20396+#endif
20397+}
20398+
20399+
20400+unsigned long vx_badness(struct task_struct *task, struct mm_struct *mm)
20401+{
20402+ struct vx_info *vxi = mm->mm_vx_info;
20403+ unsigned long points;
20404+ rlim_t v, w;
20405+
20406+ if (!vxi)
20407+ return 0;
20408+
20409+ points = vxi->vx_badness_bias;
20410+
20411+ v = __vx_cres_array_fixup(&vxi->limit, VLA_RSS);
20412+ w = __rlim_soft(&vxi->limit, RLIMIT_RSS);
20413+ points += (v > w) ? (v - w) : 0;
20414+
20415+ return points;
20416+}
20417+
20418diff -NurpP --minimal linux-3.0.9/kernel/vserver/limit_init.h linux-3.0.9-vs2.3.2.1/kernel/vserver/limit_init.h
20419--- linux-3.0.9/kernel/vserver/limit_init.h 1970-01-01 01:00:00.000000000 +0100
20420+++ linux-3.0.9-vs2.3.2.1/kernel/vserver/limit_init.h 2011-06-10 22:11:24.000000000 +0200
20421@@ -0,0 +1,31 @@
20422+
20423+
20424+static inline void vx_info_init_limit(struct _vx_limit *limit)
20425+{
20426+ int lim;
20427+
20428+ for (lim = 0; lim < NUM_LIMITS; lim++) {
20429+ __rlim_soft(limit, lim) = RLIM_INFINITY;
20430+ __rlim_hard(limit, lim) = RLIM_INFINITY;
20431+ __rlim_set(limit, lim, 0);
20432+ atomic_set(&__rlim_lhit(limit, lim), 0);
20433+ __rlim_rmin(limit, lim) = 0;
20434+ __rlim_rmax(limit, lim) = 0;
20435+ }
20436+}
20437+
20438+static inline void vx_info_exit_limit(struct _vx_limit *limit)
20439+{
20440+ rlim_t value;
20441+ int lim;
20442+
20443+ for (lim = 0; lim < NUM_LIMITS; lim++) {
20444+ if ((1 << lim) & VLIM_NOCHECK)
20445+ continue;
20446+ value = __rlim_get(limit, lim);
20447+ vxwprintk_xid(value,
20448+ "!!! limit: %p[%s,%d] = %ld on exit.",
20449+ limit, vlimit_name[lim], lim, (long)value);
20450+ }
20451+}
20452+
20453diff -NurpP --minimal linux-3.0.9/kernel/vserver/limit_proc.h linux-3.0.9-vs2.3.2.1/kernel/vserver/limit_proc.h
20454--- linux-3.0.9/kernel/vserver/limit_proc.h 1970-01-01 01:00:00.000000000 +0100
20455+++ linux-3.0.9-vs2.3.2.1/kernel/vserver/limit_proc.h 2011-06-10 22:11:24.000000000 +0200
20456@@ -0,0 +1,57 @@
20457+#ifndef _VX_LIMIT_PROC_H
20458+#define _VX_LIMIT_PROC_H
20459+
20460+#include <linux/vserver/limit_int.h>
20461+
20462+
20463+#define VX_LIMIT_FMT ":\t%8ld\t%8ld/%8ld\t%8lld/%8lld\t%6d\n"
20464+#define VX_LIMIT_TOP \
20465+ "Limit\t current\t min/max\t\t soft/hard\t\thits\n"
20466+
20467+#define VX_LIMIT_ARG(r) \
20468+ (unsigned long)__rlim_get(limit, r), \
20469+ (unsigned long)__rlim_rmin(limit, r), \
20470+ (unsigned long)__rlim_rmax(limit, r), \
20471+ VX_VLIM(__rlim_soft(limit, r)), \
20472+ VX_VLIM(__rlim_hard(limit, r)), \
20473+ atomic_read(&__rlim_lhit(limit, r))
20474+
20475+static inline int vx_info_proc_limit(struct _vx_limit *limit, char *buffer)
20476+{
20477+ vx_limit_fixup(limit, -1);
20478+ return sprintf(buffer, VX_LIMIT_TOP
20479+ "PROC" VX_LIMIT_FMT
20480+ "VM" VX_LIMIT_FMT
20481+ "VML" VX_LIMIT_FMT
20482+ "RSS" VX_LIMIT_FMT
20483+ "ANON" VX_LIMIT_FMT
20484+ "RMAP" VX_LIMIT_FMT
20485+ "FILES" VX_LIMIT_FMT
20486+ "OFD" VX_LIMIT_FMT
20487+ "LOCKS" VX_LIMIT_FMT
20488+ "SOCK" VX_LIMIT_FMT
20489+ "MSGQ" VX_LIMIT_FMT
20490+ "SHM" VX_LIMIT_FMT
20491+ "SEMA" VX_LIMIT_FMT
20492+ "SEMS" VX_LIMIT_FMT
20493+ "DENT" VX_LIMIT_FMT,
20494+ VX_LIMIT_ARG(RLIMIT_NPROC),
20495+ VX_LIMIT_ARG(RLIMIT_AS),
20496+ VX_LIMIT_ARG(RLIMIT_MEMLOCK),
20497+ VX_LIMIT_ARG(RLIMIT_RSS),
20498+ VX_LIMIT_ARG(VLIMIT_ANON),
20499+ VX_LIMIT_ARG(VLIMIT_MAPPED),
20500+ VX_LIMIT_ARG(RLIMIT_NOFILE),
20501+ VX_LIMIT_ARG(VLIMIT_OPENFD),
20502+ VX_LIMIT_ARG(RLIMIT_LOCKS),
20503+ VX_LIMIT_ARG(VLIMIT_NSOCK),
20504+ VX_LIMIT_ARG(RLIMIT_MSGQUEUE),
20505+ VX_LIMIT_ARG(VLIMIT_SHMEM),
20506+ VX_LIMIT_ARG(VLIMIT_SEMARY),
20507+ VX_LIMIT_ARG(VLIMIT_NSEMS),
20508+ VX_LIMIT_ARG(VLIMIT_DENTRY));
20509+}
20510+
20511+#endif /* _VX_LIMIT_PROC_H */
20512+
20513+
20514diff -NurpP --minimal linux-3.0.9/kernel/vserver/network.c linux-3.0.9-vs2.3.2.1/kernel/vserver/network.c
20515--- linux-3.0.9/kernel/vserver/network.c 1970-01-01 01:00:00.000000000 +0100
20516+++ linux-3.0.9-vs2.3.2.1/kernel/vserver/network.c 2011-06-10 23:20:56.000000000 +0200
20517@@ -0,0 +1,912 @@
20518+/*
20519+ * linux/kernel/vserver/network.c
20520+ *
20521+ * Virtual Server: Network Support
20522+ *
20523+ * Copyright (C) 2003-2007 Herbert Pötzl
20524+ *
20525+ * V0.01 broken out from vcontext V0.05
20526+ * V0.02 cleaned up implementation
20527+ * V0.03 added equiv nx commands
20528+ * V0.04 switch to RCU based hash
20529+ * V0.05 and back to locking again
20530+ * V0.06 changed vcmds to nxi arg
20531+ * V0.07 have __create claim() the nxi
20532+ *
20533+ */
20534+
20535+#include <linux/err.h>
20536+#include <linux/slab.h>
20537+#include <linux/rcupdate.h>
20538+
20539+#include <linux/vs_network.h>
20540+#include <linux/vs_pid.h>
20541+#include <linux/vserver/network_cmd.h>
20542+
20543+
20544+atomic_t nx_global_ctotal = ATOMIC_INIT(0);
20545+atomic_t nx_global_cactive = ATOMIC_INIT(0);
20546+
20547+static struct kmem_cache *nx_addr_v4_cachep = NULL;
20548+static struct kmem_cache *nx_addr_v6_cachep = NULL;
20549+
20550+
20551+static int __init init_network(void)
20552+{
20553+ nx_addr_v4_cachep = kmem_cache_create("nx_v4_addr_cache",
20554+ sizeof(struct nx_addr_v4), 0,
20555+ SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL);
20556+ nx_addr_v6_cachep = kmem_cache_create("nx_v6_addr_cache",
20557+ sizeof(struct nx_addr_v6), 0,
20558+ SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL);
20559+ return 0;
20560+}
20561+
20562+
20563+/* __alloc_nx_addr_v4() */
20564+
20565+static inline struct nx_addr_v4 *__alloc_nx_addr_v4(void)
20566+{
20567+ struct nx_addr_v4 *nxa = kmem_cache_alloc(
20568+ nx_addr_v4_cachep, GFP_KERNEL);
20569+
20570+ if (!IS_ERR(nxa))
20571+ memset(nxa, 0, sizeof(*nxa));
20572+ return nxa;
20573+}
20574+
20575+/* __dealloc_nx_addr_v4() */
20576+
20577+static inline void __dealloc_nx_addr_v4(struct nx_addr_v4 *nxa)
20578+{
20579+ kmem_cache_free(nx_addr_v4_cachep, nxa);
20580+}
20581+
20582+/* __dealloc_nx_addr_v4_all() */
20583+
20584+static inline void __dealloc_nx_addr_v4_all(struct nx_addr_v4 *nxa)
20585+{
20586+ while (nxa) {
20587+ struct nx_addr_v4 *next = nxa->next;
20588+
20589+ __dealloc_nx_addr_v4(nxa);
20590+ nxa = next;
20591+ }
20592+}
20593+
20594+
20595+#ifdef CONFIG_IPV6
20596+
20597+/* __alloc_nx_addr_v6() */
20598+
20599+static inline struct nx_addr_v6 *__alloc_nx_addr_v6(void)
20600+{
20601+ struct nx_addr_v6 *nxa = kmem_cache_alloc(
20602+ nx_addr_v6_cachep, GFP_KERNEL);
20603+
20604+ if (!IS_ERR(nxa))
20605+ memset(nxa, 0, sizeof(*nxa));
20606+ return nxa;
20607+}
20608+
20609+/* __dealloc_nx_addr_v6() */
20610+
20611+static inline void __dealloc_nx_addr_v6(struct nx_addr_v6 *nxa)
20612+{
20613+ kmem_cache_free(nx_addr_v6_cachep, nxa);
20614+}
20615+
20616+/* __dealloc_nx_addr_v6_all() */
20617+
20618+static inline void __dealloc_nx_addr_v6_all(struct nx_addr_v6 *nxa)
20619+{
20620+ while (nxa) {
20621+ struct nx_addr_v6 *next = nxa->next;
20622+
20623+ __dealloc_nx_addr_v6(nxa);
20624+ nxa = next;
20625+ }
20626+}
20627+
20628+#endif /* CONFIG_IPV6 */
20629+
20630+/* __alloc_nx_info()
20631+
20632+ * allocate an initialized nx_info struct
20633+ * doesn't make it visible (hash) */
20634+
20635+static struct nx_info *__alloc_nx_info(nid_t nid)
20636+{
20637+ struct nx_info *new = NULL;
20638+
20639+ vxdprintk(VXD_CBIT(nid, 1), "alloc_nx_info(%d)*", nid);
20640+
20641+ /* would this benefit from a slab cache? */
20642+ new = kmalloc(sizeof(struct nx_info), GFP_KERNEL);
20643+ if (!new)
20644+ return 0;
20645+
20646+ memset(new, 0, sizeof(struct nx_info));
20647+ new->nx_id = nid;
20648+ INIT_HLIST_NODE(&new->nx_hlist);
20649+ atomic_set(&new->nx_usecnt, 0);
20650+ atomic_set(&new->nx_tasks, 0);
20651+ new->nx_state = 0;
20652+
20653+ new->nx_flags = NXF_INIT_SET;
20654+
20655+ /* rest of init goes here */
20656+
20657+ new->v4_lback.s_addr = htonl(INADDR_LOOPBACK);
20658+ new->v4_bcast.s_addr = htonl(INADDR_BROADCAST);
20659+
20660+ vxdprintk(VXD_CBIT(nid, 0),
20661+ "alloc_nx_info(%d) = %p", nid, new);
20662+ atomic_inc(&nx_global_ctotal);
20663+ return new;
20664+}
20665+
20666+/* __dealloc_nx_info()
20667+
20668+ * final disposal of nx_info */
20669+
20670+static void __dealloc_nx_info(struct nx_info *nxi)
20671+{
20672+ vxdprintk(VXD_CBIT(nid, 0),
20673+ "dealloc_nx_info(%p)", nxi);
20674+
20675+ nxi->nx_hlist.next = LIST_POISON1;
20676+ nxi->nx_id = -1;
20677+
20678+ BUG_ON(atomic_read(&nxi->nx_usecnt));
20679+ BUG_ON(atomic_read(&nxi->nx_tasks));
20680+
20681+ __dealloc_nx_addr_v4_all(nxi->v4.next);
20682+
20683+ nxi->nx_state |= NXS_RELEASED;
20684+ kfree(nxi);
20685+ atomic_dec(&nx_global_ctotal);
20686+}
20687+
20688+static void __shutdown_nx_info(struct nx_info *nxi)
20689+{
20690+ nxi->nx_state |= NXS_SHUTDOWN;
20691+ vs_net_change(nxi, VSC_NETDOWN);
20692+}
20693+
20694+/* exported stuff */
20695+
20696+void free_nx_info(struct nx_info *nxi)
20697+{
20698+ /* context shutdown is mandatory */
20699+ BUG_ON(nxi->nx_state != NXS_SHUTDOWN);
20700+
20701+ /* context must not be hashed */
20702+ BUG_ON(nxi->nx_state & NXS_HASHED);
20703+
20704+ BUG_ON(atomic_read(&nxi->nx_usecnt));
20705+ BUG_ON(atomic_read(&nxi->nx_tasks));
20706+
20707+ __dealloc_nx_info(nxi);
20708+}
20709+
20710+
20711+void __nx_set_lback(struct nx_info *nxi)
20712+{
20713+ int nid = nxi->nx_id;
20714+ __be32 lback = htonl(INADDR_LOOPBACK ^ ((nid & 0xFFFF) << 8));
20715+
20716+ nxi->v4_lback.s_addr = lback;
20717+}
20718+
20719+extern int __nx_inet_add_lback(__be32 addr);
20720+extern int __nx_inet_del_lback(__be32 addr);
20721+
20722+
20723+/* hash table for nx_info hash */
20724+
20725+#define NX_HASH_SIZE 13
20726+
20727+struct hlist_head nx_info_hash[NX_HASH_SIZE];
20728+
20729+static DEFINE_SPINLOCK(nx_info_hash_lock);
20730+
20731+
20732+static inline unsigned int __hashval(nid_t nid)
20733+{
20734+ return (nid % NX_HASH_SIZE);
20735+}
20736+
20737+
20738+
20739+/* __hash_nx_info()
20740+
20741+ * add the nxi to the global hash table
20742+ * requires the hash_lock to be held */
20743+
20744+static inline void __hash_nx_info(struct nx_info *nxi)
20745+{
20746+ struct hlist_head *head;
20747+
20748+ vxd_assert_lock(&nx_info_hash_lock);
20749+ vxdprintk(VXD_CBIT(nid, 4),
20750+ "__hash_nx_info: %p[#%d]", nxi, nxi->nx_id);
20751+
20752+ /* context must not be hashed */
20753+ BUG_ON(nx_info_state(nxi, NXS_HASHED));
20754+
20755+ nxi->nx_state |= NXS_HASHED;
20756+ head = &nx_info_hash[__hashval(nxi->nx_id)];
20757+ hlist_add_head(&nxi->nx_hlist, head);
20758+ atomic_inc(&nx_global_cactive);
20759+}
20760+
20761+/* __unhash_nx_info()
20762+
20763+ * remove the nxi from the global hash table
20764+ * requires the hash_lock to be held */
20765+
20766+static inline void __unhash_nx_info(struct nx_info *nxi)
20767+{
20768+ vxd_assert_lock(&nx_info_hash_lock);
20769+ vxdprintk(VXD_CBIT(nid, 4),
20770+ "__unhash_nx_info: %p[#%d.%d.%d]", nxi, nxi->nx_id,
20771+ atomic_read(&nxi->nx_usecnt), atomic_read(&nxi->nx_tasks));
20772+
20773+ /* context must be hashed */
20774+ BUG_ON(!nx_info_state(nxi, NXS_HASHED));
20775+ /* but without tasks */
20776+ BUG_ON(atomic_read(&nxi->nx_tasks));
20777+
20778+ nxi->nx_state &= ~NXS_HASHED;
20779+ hlist_del(&nxi->nx_hlist);
20780+ atomic_dec(&nx_global_cactive);
20781+}
20782+
20783+
20784+/* __lookup_nx_info()
20785+
20786+ * requires the hash_lock to be held
20787+ * doesn't increment the nx_refcnt */
20788+
20789+static inline struct nx_info *__lookup_nx_info(nid_t nid)
20790+{
20791+ struct hlist_head *head = &nx_info_hash[__hashval(nid)];
20792+ struct hlist_node *pos;
20793+ struct nx_info *nxi;
20794+
20795+ vxd_assert_lock(&nx_info_hash_lock);
20796+ hlist_for_each(pos, head) {
20797+ nxi = hlist_entry(pos, struct nx_info, nx_hlist);
20798+
20799+ if (nxi->nx_id == nid)
20800+ goto found;
20801+ }
20802+ nxi = NULL;
20803+found:
20804+ vxdprintk(VXD_CBIT(nid, 0),
20805+ "__lookup_nx_info(#%u): %p[#%u]",
20806+ nid, nxi, nxi ? nxi->nx_id : 0);
20807+ return nxi;
20808+}
20809+
20810+
20811+/* __create_nx_info()
20812+
20813+ * create the requested context
20814+ * get(), claim() and hash it */
20815+
20816+static struct nx_info *__create_nx_info(int id)
20817+{
20818+ struct nx_info *new, *nxi = NULL;
20819+
20820+ vxdprintk(VXD_CBIT(nid, 1), "create_nx_info(%d)*", id);
20821+
20822+ if (!(new = __alloc_nx_info(id)))
20823+ return ERR_PTR(-ENOMEM);
20824+
20825+ /* required to make dynamic xids unique */
20826+ spin_lock(&nx_info_hash_lock);
20827+
20828+ /* static context requested */
20829+ if ((nxi = __lookup_nx_info(id))) {
20830+ vxdprintk(VXD_CBIT(nid, 0),
20831+ "create_nx_info(%d) = %p (already there)", id, nxi);
20832+ if (nx_info_flags(nxi, NXF_STATE_SETUP, 0))
20833+ nxi = ERR_PTR(-EBUSY);
20834+ else
20835+ nxi = ERR_PTR(-EEXIST);
20836+ goto out_unlock;
20837+ }
20838+ /* new context */
20839+ vxdprintk(VXD_CBIT(nid, 0),
20840+ "create_nx_info(%d) = %p (new)", id, new);
20841+ claim_nx_info(new, NULL);
20842+ __nx_set_lback(new);
20843+ __hash_nx_info(get_nx_info(new));
20844+ nxi = new, new = NULL;
20845+
20846+out_unlock:
20847+ spin_unlock(&nx_info_hash_lock);
20848+ if (new)
20849+ __dealloc_nx_info(new);
20850+ return nxi;
20851+}
20852+
20853+
20854+
20855+/* exported stuff */
20856+
20857+
20858+void unhash_nx_info(struct nx_info *nxi)
20859+{
20860+ __shutdown_nx_info(nxi);
20861+ spin_lock(&nx_info_hash_lock);
20862+ __unhash_nx_info(nxi);
20863+ spin_unlock(&nx_info_hash_lock);
20864+}
20865+
20866+/* lookup_nx_info()
20867+
20868+ * search for a nx_info and get() it
20869+ * negative id means current */
20870+
20871+struct nx_info *lookup_nx_info(int id)
20872+{
20873+ struct nx_info *nxi = NULL;
20874+
20875+ if (id < 0) {
20876+ nxi = get_nx_info(current_nx_info());
20877+ } else if (id > 1) {
20878+ spin_lock(&nx_info_hash_lock);
20879+ nxi = get_nx_info(__lookup_nx_info(id));
20880+ spin_unlock(&nx_info_hash_lock);
20881+ }
20882+ return nxi;
20883+}
20884+
20885+/* nid_is_hashed()
20886+
20887+ * verify that nid is still hashed */
20888+
20889+int nid_is_hashed(nid_t nid)
20890+{
20891+ int hashed;
20892+
20893+ spin_lock(&nx_info_hash_lock);
20894+ hashed = (__lookup_nx_info(nid) != NULL);
20895+ spin_unlock(&nx_info_hash_lock);
20896+ return hashed;
20897+}
20898+
20899+
20900+#ifdef CONFIG_PROC_FS
20901+
20902+/* get_nid_list()
20903+
20904+ * get a subset of hashed nids for proc
20905+ * assumes size is at least one */
20906+
20907+int get_nid_list(int index, unsigned int *nids, int size)
20908+{
20909+ int hindex, nr_nids = 0;
20910+
20911+ /* only show current and children */
20912+ if (!nx_check(0, VS_ADMIN | VS_WATCH)) {
20913+ if (index > 0)
20914+ return 0;
20915+ nids[nr_nids] = nx_current_nid();
20916+ return 1;
20917+ }
20918+
20919+ for (hindex = 0; hindex < NX_HASH_SIZE; hindex++) {
20920+ struct hlist_head *head = &nx_info_hash[hindex];
20921+ struct hlist_node *pos;
20922+
20923+ spin_lock(&nx_info_hash_lock);
20924+ hlist_for_each(pos, head) {
20925+ struct nx_info *nxi;
20926+
20927+ if (--index > 0)
20928+ continue;
20929+
20930+ nxi = hlist_entry(pos, struct nx_info, nx_hlist);
20931+ nids[nr_nids] = nxi->nx_id;
20932+ if (++nr_nids >= size) {
20933+ spin_unlock(&nx_info_hash_lock);
20934+ goto out;
20935+ }
20936+ }
20937+ /* keep the lock time short */
20938+ spin_unlock(&nx_info_hash_lock);
20939+ }
20940+out:
20941+ return nr_nids;
20942+}
20943+#endif
20944+
20945+
20946+/*
20947+ * migrate task to new network
20948+ * gets nxi, puts old_nxi on change
20949+ */
20950+
20951+int nx_migrate_task(struct task_struct *p, struct nx_info *nxi)
20952+{
20953+ struct nx_info *old_nxi;
20954+ int ret = 0;
20955+
20956+ if (!p || !nxi)
20957+ BUG();
20958+
20959+ vxdprintk(VXD_CBIT(nid, 5),
20960+ "nx_migrate_task(%p,%p[#%d.%d.%d])",
20961+ p, nxi, nxi->nx_id,
20962+ atomic_read(&nxi->nx_usecnt),
20963+ atomic_read(&nxi->nx_tasks));
20964+
20965+ if (nx_info_flags(nxi, NXF_INFO_PRIVATE, 0) &&
20966+ !nx_info_flags(nxi, NXF_STATE_SETUP, 0))
20967+ return -EACCES;
20968+
20969+ if (nx_info_state(nxi, NXS_SHUTDOWN))
20970+ return -EFAULT;
20971+
20972+ /* maybe disallow this completely? */
20973+ old_nxi = task_get_nx_info(p);
20974+ if (old_nxi == nxi)
20975+ goto out;
20976+
20977+ task_lock(p);
20978+ if (old_nxi)
20979+ clr_nx_info(&p->nx_info);
20980+ claim_nx_info(nxi, p);
20981+ set_nx_info(&p->nx_info, nxi);
20982+ p->nid = nxi->nx_id;
20983+ task_unlock(p);
20984+
20985+ vxdprintk(VXD_CBIT(nid, 5),
20986+ "moved task %p into nxi:%p[#%d]",
20987+ p, nxi, nxi->nx_id);
20988+
20989+ if (old_nxi)
20990+ release_nx_info(old_nxi, p);
20991+ ret = 0;
20992+out:
20993+ put_nx_info(old_nxi);
20994+ return ret;
20995+}
20996+
20997+
20998+void nx_set_persistent(struct nx_info *nxi)
20999+{
21000+ vxdprintk(VXD_CBIT(nid, 6),
21001+ "nx_set_persistent(%p[#%d])", nxi, nxi->nx_id);
21002+
21003+ get_nx_info(nxi);
21004+ claim_nx_info(nxi, NULL);
21005+}
21006+
21007+void nx_clear_persistent(struct nx_info *nxi)
21008+{
21009+ vxdprintk(VXD_CBIT(nid, 6),
21010+ "nx_clear_persistent(%p[#%d])", nxi, nxi->nx_id);
21011+
21012+ release_nx_info(nxi, NULL);
21013+ put_nx_info(nxi);
21014+}
21015+
21016+void nx_update_persistent(struct nx_info *nxi)
21017+{
21018+ if (nx_info_flags(nxi, NXF_PERSISTENT, 0))
21019+ nx_set_persistent(nxi);
21020+ else
21021+ nx_clear_persistent(nxi);
21022+}
21023+
21024+/* vserver syscall commands below here */
21025+
21026+/* taks nid and nx_info functions */
21027+
21028+#include <asm/uaccess.h>
21029+
21030+
21031+int vc_task_nid(uint32_t id)
21032+{
21033+ nid_t nid;
21034+
21035+ if (id) {
21036+ struct task_struct *tsk;
21037+
21038+ rcu_read_lock();
21039+ tsk = find_task_by_real_pid(id);
21040+ nid = (tsk) ? tsk->nid : -ESRCH;
21041+ rcu_read_unlock();
21042+ } else
21043+ nid = nx_current_nid();
21044+ return nid;
21045+}
21046+
21047+
21048+int vc_nx_info(struct nx_info *nxi, void __user *data)
21049+{
21050+ struct vcmd_nx_info_v0 vc_data;
21051+
21052+ vc_data.nid = nxi->nx_id;
21053+
21054+ if (copy_to_user(data, &vc_data, sizeof(vc_data)))
21055+ return -EFAULT;
21056+ return 0;
21057+}
21058+
21059+
21060+/* network functions */
21061+
21062+int vc_net_create(uint32_t nid, void __user *data)
21063+{
21064+ struct vcmd_net_create vc_data = { .flagword = NXF_INIT_SET };
21065+ struct nx_info *new_nxi;
21066+ int ret;
21067+
21068+ if (data && copy_from_user(&vc_data, data, sizeof(vc_data)))
21069+ return -EFAULT;
21070+
21071+ if ((nid > MAX_S_CONTEXT) || (nid < 2))
21072+ return -EINVAL;
21073+
21074+ new_nxi = __create_nx_info(nid);
21075+ if (IS_ERR(new_nxi))
21076+ return PTR_ERR(new_nxi);
21077+
21078+ /* initial flags */
21079+ new_nxi->nx_flags = vc_data.flagword;
21080+
21081+ ret = -ENOEXEC;
21082+ if (vs_net_change(new_nxi, VSC_NETUP))
21083+ goto out;
21084+
21085+ ret = nx_migrate_task(current, new_nxi);
21086+ if (ret)
21087+ goto out;
21088+
21089+ /* return context id on success */
21090+ ret = new_nxi->nx_id;
21091+
21092+ /* get a reference for persistent contexts */
21093+ if ((vc_data.flagword & NXF_PERSISTENT))
21094+ nx_set_persistent(new_nxi);
21095+out:
21096+ release_nx_info(new_nxi, NULL);
21097+ put_nx_info(new_nxi);
21098+ return ret;
21099+}
21100+
21101+
21102+int vc_net_migrate(struct nx_info *nxi, void __user *data)
21103+{
21104+ return nx_migrate_task(current, nxi);
21105+}
21106+
21107+
21108+
21109+int do_add_v4_addr(struct nx_info *nxi, __be32 ip, __be32 ip2, __be32 mask,
21110+ uint16_t type, uint16_t flags)
21111+{
21112+ struct nx_addr_v4 *nxa = &nxi->v4;
21113+
21114+ if (NX_IPV4(nxi)) {
21115+ /* locate last entry */
21116+ for (; nxa->next; nxa = nxa->next);
21117+ nxa->next = __alloc_nx_addr_v4();
21118+ nxa = nxa->next;
21119+
21120+ if (IS_ERR(nxa))
21121+ return PTR_ERR(nxa);
21122+ }
21123+
21124+ if (nxi->v4.next)
21125+ /* remove single ip for ip list */
21126+ nxi->nx_flags &= ~NXF_SINGLE_IP;
21127+
21128+ nxa->ip[0].s_addr = ip;
21129+ nxa->ip[1].s_addr = ip2;
21130+ nxa->mask.s_addr = mask;
21131+ nxa->type = type;
21132+ nxa->flags = flags;
21133+ return 0;
21134+}
21135+
21136+int do_remove_v4_addr(struct nx_info *nxi, __be32 ip, __be32 ip2, __be32 mask,
21137+ uint16_t type, uint16_t flags)
21138+{
21139+ struct nx_addr_v4 *nxa = &nxi->v4;
21140+
21141+ switch (type) {
21142+/* case NXA_TYPE_ADDR:
21143+ break; */
21144+
21145+ case NXA_TYPE_ANY:
21146+ __dealloc_nx_addr_v4_all(xchg(&nxa->next, NULL));
21147+ memset(nxa, 0, sizeof(*nxa));
21148+ break;
21149+
21150+ default:
21151+ return -EINVAL;
21152+ }
21153+ return 0;
21154+}
21155+
21156+
21157+int vc_net_add(struct nx_info *nxi, void __user *data)
21158+{
21159+ struct vcmd_net_addr_v0 vc_data;
21160+ int index, ret = 0;
21161+
21162+ if (data && copy_from_user(&vc_data, data, sizeof(vc_data)))
21163+ return -EFAULT;
21164+
21165+ switch (vc_data.type) {
21166+ case NXA_TYPE_IPV4:
21167+ if ((vc_data.count < 1) || (vc_data.count > 4))
21168+ return -EINVAL;
21169+
21170+ index = 0;
21171+ while (index < vc_data.count) {
21172+ ret = do_add_v4_addr(nxi, vc_data.ip[index].s_addr, 0,
21173+ vc_data.mask[index].s_addr, NXA_TYPE_ADDR, 0);
21174+ if (ret)
21175+ return ret;
21176+ index++;
21177+ }
21178+ ret = index;
21179+ break;
21180+
21181+ case NXA_TYPE_IPV4|NXA_MOD_BCAST:
21182+ nxi->v4_bcast = vc_data.ip[0];
21183+ ret = 1;
21184+ break;
21185+
21186+ case NXA_TYPE_IPV4|NXA_MOD_LBACK:
21187+ nxi->v4_lback = vc_data.ip[0];
21188+ ret = 1;
21189+ break;
21190+
21191+ default:
21192+ ret = -EINVAL;
21193+ break;
21194+ }
21195+ return ret;
21196+}
21197+
21198+int vc_net_remove(struct nx_info *nxi, void __user *data)
21199+{
21200+ struct vcmd_net_addr_v0 vc_data;
21201+
21202+ if (data && copy_from_user(&vc_data, data, sizeof(vc_data)))
21203+ return -EFAULT;
21204+
21205+ switch (vc_data.type) {
21206+ case NXA_TYPE_ANY:
21207+ __dealloc_nx_addr_v4_all(xchg(&nxi->v4.next, NULL));
21208+ memset(&nxi->v4, 0, sizeof(nxi->v4));
21209+ break;
21210+
21211+ default:
21212+ return -EINVAL;
21213+ }
21214+ return 0;
21215+}
21216+
21217+
21218+int vc_net_add_ipv4_v1(struct nx_info *nxi, void __user *data)
21219+{
21220+ struct vcmd_net_addr_ipv4_v1 vc_data;
21221+
21222+ if (data && copy_from_user(&vc_data, data, sizeof(vc_data)))
21223+ return -EFAULT;
21224+
21225+ switch (vc_data.type) {
21226+ case NXA_TYPE_ADDR:
21227+ case NXA_TYPE_MASK:
21228+ return do_add_v4_addr(nxi, vc_data.ip.s_addr, 0,
21229+ vc_data.mask.s_addr, vc_data.type, vc_data.flags);
21230+
21231+ case NXA_TYPE_ADDR | NXA_MOD_BCAST:
21232+ nxi->v4_bcast = vc_data.ip;
21233+ break;
21234+
21235+ case NXA_TYPE_ADDR | NXA_MOD_LBACK:
21236+ nxi->v4_lback = vc_data.ip;
21237+ break;
21238+
21239+ default:
21240+ return -EINVAL;
21241+ }
21242+ return 0;
21243+}
21244+
21245+int vc_net_add_ipv4(struct nx_info *nxi, void __user *data)
21246+{
21247+ struct vcmd_net_addr_ipv4_v2 vc_data;
21248+
21249+ if (data && copy_from_user(&vc_data, data, sizeof(vc_data)))
21250+ return -EFAULT;
21251+
21252+ switch (vc_data.type) {
21253+ case NXA_TYPE_ADDR:
21254+ case NXA_TYPE_MASK:
21255+ case NXA_TYPE_RANGE:
21256+ return do_add_v4_addr(nxi, vc_data.ip.s_addr, vc_data.ip2.s_addr,
21257+ vc_data.mask.s_addr, vc_data.type, vc_data.flags);
21258+
21259+ case NXA_TYPE_ADDR | NXA_MOD_BCAST:
21260+ nxi->v4_bcast = vc_data.ip;
21261+ break;
21262+
21263+ case NXA_TYPE_ADDR | NXA_MOD_LBACK:
21264+ nxi->v4_lback = vc_data.ip;
21265+ break;
21266+
21267+ default:
21268+ return -EINVAL;
21269+ }
21270+ return 0;
21271+}
21272+
21273+int vc_net_rem_ipv4_v1(struct nx_info *nxi, void __user *data)
21274+{
21275+ struct vcmd_net_addr_ipv4_v1 vc_data;
21276+
21277+ if (data && copy_from_user(&vc_data, data, sizeof(vc_data)))
21278+ return -EFAULT;
21279+
21280+ return do_remove_v4_addr(nxi, vc_data.ip.s_addr, 0,
21281+ vc_data.mask.s_addr, vc_data.type, vc_data.flags);
21282+}
21283+
21284+int vc_net_rem_ipv4(struct nx_info *nxi, void __user *data)
21285+{
21286+ struct vcmd_net_addr_ipv4_v2 vc_data;
21287+
21288+ if (data && copy_from_user(&vc_data, data, sizeof(vc_data)))
21289+ return -EFAULT;
21290+
21291+ return do_remove_v4_addr(nxi, vc_data.ip.s_addr, vc_data.ip2.s_addr,
21292+ vc_data.mask.s_addr, vc_data.type, vc_data.flags);
21293+}
21294+
21295+#ifdef CONFIG_IPV6
21296+
21297+int do_add_v6_addr(struct nx_info *nxi,
21298+ struct in6_addr *ip, struct in6_addr *mask,
21299+ uint32_t prefix, uint16_t type, uint16_t flags)
21300+{
21301+ struct nx_addr_v6 *nxa = &nxi->v6;
21302+
21303+ if (NX_IPV6(nxi)) {
21304+ /* locate last entry */
21305+ for (; nxa->next; nxa = nxa->next);
21306+ nxa->next = __alloc_nx_addr_v6();
21307+ nxa = nxa->next;
21308+
21309+ if (IS_ERR(nxa))
21310+ return PTR_ERR(nxa);
21311+ }
21312+
21313+ nxa->ip = *ip;
21314+ nxa->mask = *mask;
21315+ nxa->prefix = prefix;
21316+ nxa->type = type;
21317+ nxa->flags = flags;
21318+ return 0;
21319+}
21320+
21321+
21322+int vc_net_add_ipv6(struct nx_info *nxi, void __user *data)
21323+{
21324+ struct vcmd_net_addr_ipv6_v1 vc_data;
21325+
21326+ if (data && copy_from_user(&vc_data, data, sizeof(vc_data)))
21327+ return -EFAULT;
21328+
21329+ switch (vc_data.type) {
21330+ case NXA_TYPE_ADDR:
21331+ memset(&vc_data.mask, ~0, sizeof(vc_data.mask));
21332+ /* fallthrough */
21333+ case NXA_TYPE_MASK:
21334+ return do_add_v6_addr(nxi, &vc_data.ip, &vc_data.mask,
21335+ vc_data.prefix, vc_data.type, vc_data.flags);
21336+ default:
21337+ return -EINVAL;
21338+ }
21339+ return 0;
21340+}
21341+
21342+int vc_net_remove_ipv6(struct nx_info *nxi, void __user *data)
21343+{
21344+ struct vcmd_net_addr_ipv6_v1 vc_data;
21345+
21346+ if (data && copy_from_user(&vc_data, data, sizeof(vc_data)))
21347+ return -EFAULT;
21348+
21349+ switch (vc_data.type) {
21350+ case NXA_TYPE_ANY:
21351+ __dealloc_nx_addr_v6_all(xchg(&nxi->v6.next, NULL));
21352+ memset(&nxi->v6, 0, sizeof(nxi->v6));
21353+ break;
21354+
21355+ default:
21356+ return -EINVAL;
21357+ }
21358+ return 0;
21359+}
21360+
21361+#endif /* CONFIG_IPV6 */
21362+
21363+
21364+int vc_get_nflags(struct nx_info *nxi, void __user *data)
21365+{
21366+ struct vcmd_net_flags_v0 vc_data;
21367+
21368+ vc_data.flagword = nxi->nx_flags;
21369+
21370+ /* special STATE flag handling */
21371+ vc_data.mask = vs_mask_flags(~0ULL, nxi->nx_flags, NXF_ONE_TIME);
21372+
21373+ if (copy_to_user(data, &vc_data, sizeof(vc_data)))
21374+ return -EFAULT;
21375+ return 0;
21376+}
21377+
21378+int vc_set_nflags(struct nx_info *nxi, void __user *data)
21379+{
21380+ struct vcmd_net_flags_v0 vc_data;
21381+ uint64_t mask, trigger;
21382+
21383+ if (copy_from_user(&vc_data, data, sizeof(vc_data)))
21384+ return -EFAULT;
21385+
21386+ /* special STATE flag handling */
21387+ mask = vs_mask_mask(vc_data.mask, nxi->nx_flags, NXF_ONE_TIME);
21388+ trigger = (mask & nxi->nx_flags) ^ (mask & vc_data.flagword);
21389+
21390+ nxi->nx_flags = vs_mask_flags(nxi->nx_flags,
21391+ vc_data.flagword, mask);
21392+ if (trigger & NXF_PERSISTENT)
21393+ nx_update_persistent(nxi);
21394+
21395+ return 0;
21396+}
21397+
21398+int vc_get_ncaps(struct nx_info *nxi, void __user *data)
21399+{
21400+ struct vcmd_net_caps_v0 vc_data;
21401+
21402+ vc_data.ncaps = nxi->nx_ncaps;
21403+ vc_data.cmask = ~0ULL;
21404+
21405+ if (copy_to_user(data, &vc_data, sizeof(vc_data)))
21406+ return -EFAULT;
21407+ return 0;
21408+}
21409+
21410+int vc_set_ncaps(struct nx_info *nxi, void __user *data)
21411+{
21412+ struct vcmd_net_caps_v0 vc_data;
21413+
21414+ if (copy_from_user(&vc_data, data, sizeof(vc_data)))
21415+ return -EFAULT;
21416+
21417+ nxi->nx_ncaps = vs_mask_flags(nxi->nx_ncaps,
21418+ vc_data.ncaps, vc_data.cmask);
21419+ return 0;
21420+}
21421+
21422+
21423+#include <linux/module.h>
21424+
21425+module_init(init_network);
21426+
21427+EXPORT_SYMBOL_GPL(free_nx_info);
21428+EXPORT_SYMBOL_GPL(unhash_nx_info);
21429+
21430diff -NurpP --minimal linux-3.0.9/kernel/vserver/proc.c linux-3.0.9-vs2.3.2.1/kernel/vserver/proc.c
21431--- linux-3.0.9/kernel/vserver/proc.c 1970-01-01 01:00:00.000000000 +0100
21432+++ linux-3.0.9-vs2.3.2.1/kernel/vserver/proc.c 2011-08-01 19:57:21.000000000 +0200
21433@@ -0,0 +1,1103 @@
21434+/*
21435+ * linux/kernel/vserver/proc.c
21436+ *
21437+ * Virtual Context Support
21438+ *
21439+ * Copyright (C) 2003-2011 Herbert Pötzl
21440+ *
21441+ * V0.01 basic structure
21442+ * V0.02 adaptation vs1.3.0
21443+ * V0.03 proc permissions
21444+ * V0.04 locking/generic
21445+ * V0.05 next generation procfs
21446+ * V0.06 inode validation
21447+ * V0.07 generic rewrite vid
21448+ * V0.08 remove inode type
21449+ * V0.09 added u/wmask info
21450+ *
21451+ */
21452+
21453+#include <linux/proc_fs.h>
21454+#include <linux/fs_struct.h>
21455+#include <linux/mount.h>
21456+#include <asm/unistd.h>
21457+
21458+#include <linux/vs_context.h>
21459+#include <linux/vs_network.h>
21460+#include <linux/vs_cvirt.h>
21461+
21462+#include <linux/in.h>
21463+#include <linux/inetdevice.h>
21464+#include <linux/vs_inet.h>
21465+#include <linux/vs_inet6.h>
21466+
21467+#include <linux/vserver/global.h>
21468+
21469+#include "cvirt_proc.h"
21470+#include "cacct_proc.h"
21471+#include "limit_proc.h"
21472+#include "sched_proc.h"
21473+#include "vci_config.h"
21474+
21475+
21476+static inline char *print_cap_t(char *buffer, kernel_cap_t *c)
21477+{
21478+ unsigned __capi;
21479+
21480+ CAP_FOR_EACH_U32(__capi) {
21481+ buffer += sprintf(buffer, "%08x",
21482+ c->cap[(_KERNEL_CAPABILITY_U32S-1) - __capi]);
21483+ }
21484+ return buffer;
21485+}
21486+
21487+
21488+static struct proc_dir_entry *proc_virtual;
21489+
21490+static struct proc_dir_entry *proc_virtnet;
21491+
21492+
21493+/* first the actual feeds */
21494+
21495+
21496+static int proc_vci(char *buffer)
21497+{
21498+ return sprintf(buffer,
21499+ "VCIVersion:\t%04x:%04x\n"
21500+ "VCISyscall:\t%d\n"
21501+ "VCIKernel:\t%08x\n",
21502+ VCI_VERSION >> 16,
21503+ VCI_VERSION & 0xFFFF,
21504+ __NR_vserver,
21505+ vci_kernel_config());
21506+}
21507+
21508+static int proc_virtual_info(char *buffer)
21509+{
21510+ return proc_vci(buffer);
21511+}
21512+
21513+static int proc_virtual_status(char *buffer)
21514+{
21515+ return sprintf(buffer,
21516+ "#CTotal:\t%d\n"
21517+ "#CActive:\t%d\n"
21518+ "#NSProxy:\t%d\t%d %d %d %d %d %d\n"
21519+ "#InitTask:\t%d\t%d %d\n",
21520+ atomic_read(&vx_global_ctotal),
21521+ atomic_read(&vx_global_cactive),
21522+ atomic_read(&vs_global_nsproxy),
21523+ atomic_read(&vs_global_fs),
21524+ atomic_read(&vs_global_mnt_ns),
21525+ atomic_read(&vs_global_uts_ns),
21526+ atomic_read(&nr_ipc_ns),
21527+ atomic_read(&vs_global_user_ns),
21528+ atomic_read(&vs_global_pid_ns),
21529+ atomic_read(&init_task.usage),
21530+ atomic_read(&init_task.nsproxy->count),
21531+ init_task.fs->users);
21532+}
21533+
21534+
21535+int proc_vxi_info(struct vx_info *vxi, char *buffer)
21536+{
21537+ int length;
21538+
21539+ length = sprintf(buffer,
21540+ "ID:\t%d\n"
21541+ "Info:\t%p\n"
21542+ "Init:\t%d\n"
21543+ "OOM:\t%lld\n",
21544+ vxi->vx_id,
21545+ vxi,
21546+ vxi->vx_initpid,
21547+ vxi->vx_badness_bias);
21548+ return length;
21549+}
21550+
21551+int proc_vxi_status(struct vx_info *vxi, char *buffer)
21552+{
21553+ char *orig = buffer;
21554+
21555+ buffer += sprintf(buffer,
21556+ "UseCnt:\t%d\n"
21557+ "Tasks:\t%d\n"
21558+ "Flags:\t%016llx\n",
21559+ atomic_read(&vxi->vx_usecnt),
21560+ atomic_read(&vxi->vx_tasks),
21561+ (unsigned long long)vxi->vx_flags);
21562+
21563+ buffer += sprintf(buffer, "BCaps:\t");
21564+ buffer = print_cap_t(buffer, &vxi->vx_bcaps);
21565+ buffer += sprintf(buffer, "\n");
21566+
21567+ buffer += sprintf(buffer,
21568+ "CCaps:\t%016llx\n"
21569+ "Umask:\t%16llx\n"
21570+ "Wmask:\t%16llx\n"
21571+ "Spaces:\t%08lx %08lx\n",
21572+ (unsigned long long)vxi->vx_ccaps,
21573+ (unsigned long long)vxi->vx_umask,
21574+ (unsigned long long)vxi->vx_wmask,
21575+ vxi->space[0].vx_nsmask, vxi->space[1].vx_nsmask);
21576+ return buffer - orig;
21577+}
21578+
21579+int proc_vxi_limit(struct vx_info *vxi, char *buffer)
21580+{
21581+ return vx_info_proc_limit(&vxi->limit, buffer);
21582+}
21583+
21584+int proc_vxi_sched(struct vx_info *vxi, char *buffer)
21585+{
21586+ int cpu, length;
21587+
21588+ length = vx_info_proc_sched(&vxi->sched, buffer);
21589+ for_each_online_cpu(cpu) {
21590+ length += vx_info_proc_sched_pc(
21591+ &vx_per_cpu(vxi, sched_pc, cpu),
21592+ buffer + length, cpu);
21593+ }
21594+ return length;
21595+}
21596+
21597+int proc_vxi_nsproxy0(struct vx_info *vxi, char *buffer)
21598+{
21599+ return vx_info_proc_nsproxy(vxi->space[0].vx_nsproxy, buffer);
21600+}
21601+
21602+int proc_vxi_nsproxy1(struct vx_info *vxi, char *buffer)
21603+{
21604+ return vx_info_proc_nsproxy(vxi->space[1].vx_nsproxy, buffer);
21605+}
21606+
21607+int proc_vxi_cvirt(struct vx_info *vxi, char *buffer)
21608+{
21609+ int cpu, length;
21610+
21611+ vx_update_load(vxi);
21612+ length = vx_info_proc_cvirt(&vxi->cvirt, buffer);
21613+ for_each_online_cpu(cpu) {
21614+ length += vx_info_proc_cvirt_pc(
21615+ &vx_per_cpu(vxi, cvirt_pc, cpu),
21616+ buffer + length, cpu);
21617+ }
21618+ return length;
21619+}
21620+
21621+int proc_vxi_cacct(struct vx_info *vxi, char *buffer)
21622+{
21623+ return vx_info_proc_cacct(&vxi->cacct, buffer);
21624+}
21625+
21626+
21627+static int proc_virtnet_info(char *buffer)
21628+{
21629+ return proc_vci(buffer);
21630+}
21631+
21632+static int proc_virtnet_status(char *buffer)
21633+{
21634+ return sprintf(buffer,
21635+ "#CTotal:\t%d\n"
21636+ "#CActive:\t%d\n",
21637+ atomic_read(&nx_global_ctotal),
21638+ atomic_read(&nx_global_cactive));
21639+}
21640+
21641+int proc_nxi_info(struct nx_info *nxi, char *buffer)
21642+{
21643+ struct nx_addr_v4 *v4a;
21644+#ifdef CONFIG_IPV6
21645+ struct nx_addr_v6 *v6a;
21646+#endif
21647+ int length, i;
21648+
21649+ length = sprintf(buffer,
21650+ "ID:\t%d\n"
21651+ "Info:\t%p\n"
21652+ "Bcast:\t" NIPQUAD_FMT "\n"
21653+ "Lback:\t" NIPQUAD_FMT "\n",
21654+ nxi->nx_id,
21655+ nxi,
21656+ NIPQUAD(nxi->v4_bcast.s_addr),
21657+ NIPQUAD(nxi->v4_lback.s_addr));
21658+
21659+ if (!NX_IPV4(nxi))
21660+ goto skip_v4;
21661+ for (i = 0, v4a = &nxi->v4; v4a; i++, v4a = v4a->next)
21662+ length += sprintf(buffer + length, "%d:\t" NXAV4_FMT "\n",
21663+ i, NXAV4(v4a));
21664+skip_v4:
21665+#ifdef CONFIG_IPV6
21666+ if (!NX_IPV6(nxi))
21667+ goto skip_v6;
21668+ for (i = 0, v6a = &nxi->v6; v6a; i++, v6a = v6a->next)
21669+ length += sprintf(buffer + length, "%d:\t" NXAV6_FMT "\n",
21670+ i, NXAV6(v6a));
21671+skip_v6:
21672+#endif
21673+ return length;
21674+}
21675+
21676+int proc_nxi_status(struct nx_info *nxi, char *buffer)
21677+{
21678+ int length;
21679+
21680+ length = sprintf(buffer,
21681+ "UseCnt:\t%d\n"
21682+ "Tasks:\t%d\n"
21683+ "Flags:\t%016llx\n"
21684+ "NCaps:\t%016llx\n",
21685+ atomic_read(&nxi->nx_usecnt),
21686+ atomic_read(&nxi->nx_tasks),
21687+ (unsigned long long)nxi->nx_flags,
21688+ (unsigned long long)nxi->nx_ncaps);
21689+ return length;
21690+}
21691+
21692+
21693+
21694+/* here the inode helpers */
21695+
21696+struct vs_entry {
21697+ int len;
21698+ char *name;
21699+ mode_t mode;
21700+ struct inode_operations *iop;
21701+ struct file_operations *fop;
21702+ union proc_op op;
21703+};
21704+
21705+static struct inode *vs_proc_make_inode(struct super_block *sb, struct vs_entry *p)
21706+{
21707+ struct inode *inode = new_inode(sb);
21708+
21709+ if (!inode)
21710+ goto out;
21711+
21712+ inode->i_mode = p->mode;
21713+ if (p->iop)
21714+ inode->i_op = p->iop;
21715+ if (p->fop)
21716+ inode->i_fop = p->fop;
21717+
21718+ inode->i_nlink = (p->mode & S_IFDIR) ? 2 : 1;
21719+ inode->i_flags |= S_IMMUTABLE;
21720+
21721+ inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME;
21722+
21723+ inode->i_uid = 0;
21724+ inode->i_gid = 0;
21725+ inode->i_tag = 0;
21726+out:
21727+ return inode;
21728+}
21729+
21730+static struct dentry *vs_proc_instantiate(struct inode *dir,
21731+ struct dentry *dentry, int id, void *ptr)
21732+{
21733+ struct vs_entry *p = ptr;
21734+ struct inode *inode = vs_proc_make_inode(dir->i_sb, p);
21735+ struct dentry *error = ERR_PTR(-EINVAL);
21736+
21737+ if (!inode)
21738+ goto out;
21739+
21740+ PROC_I(inode)->op = p->op;
21741+ PROC_I(inode)->fd = id;
21742+ d_add(dentry, inode);
21743+ error = NULL;
21744+out:
21745+ return error;
21746+}
21747+
21748+/* Lookups */
21749+
21750+typedef struct dentry *instantiate_t(struct inode *, struct dentry *, int, void *);
21751+
21752+/*
21753+ * Fill a directory entry.
21754+ *
21755+ * If possible create the dcache entry and derive our inode number and
21756+ * file type from dcache entry.
21757+ *
21758+ * Since all of the proc inode numbers are dynamically generated, the inode
21759+ * numbers do not exist until the inode is cache. This means creating the
21760+ * the dcache entry in readdir is necessary to keep the inode numbers
21761+ * reported by readdir in sync with the inode numbers reported
21762+ * by stat.
21763+ */
21764+static int proc_fill_cache(struct file *filp, void *dirent, filldir_t filldir,
21765+ char *name, int len, instantiate_t instantiate, int id, void *ptr)
21766+{
21767+ struct dentry *child, *dir = filp->f_dentry;
21768+ struct inode *inode;
21769+ struct qstr qname;
21770+ ino_t ino = 0;
21771+ unsigned type = DT_UNKNOWN;
21772+
21773+ qname.name = name;
21774+ qname.len = len;
21775+ qname.hash = full_name_hash(name, len);
21776+
21777+ child = d_lookup(dir, &qname);
21778+ if (!child) {
21779+ struct dentry *new;
21780+ new = d_alloc(dir, &qname);
21781+ if (new) {
21782+ child = instantiate(dir->d_inode, new, id, ptr);
21783+ if (child)
21784+ dput(new);
21785+ else
21786+ child = new;
21787+ }
21788+ }
21789+ if (!child || IS_ERR(child) || !child->d_inode)
21790+ goto end_instantiate;
21791+ inode = child->d_inode;
21792+ if (inode) {
21793+ ino = inode->i_ino;
21794+ type = inode->i_mode >> 12;
21795+ }
21796+ dput(child);
21797+end_instantiate:
21798+ if (!ino)
21799+ ino = find_inode_number(dir, &qname);
21800+ if (!ino)
21801+ ino = 1;
21802+ return filldir(dirent, name, len, filp->f_pos, ino, type);
21803+}
21804+
21805+
21806+
21807+/* get and revalidate vx_info/xid */
21808+
21809+static inline
21810+struct vx_info *get_proc_vx_info(struct inode *inode)
21811+{
21812+ return lookup_vx_info(PROC_I(inode)->fd);
21813+}
21814+
21815+static int proc_xid_revalidate(struct dentry *dentry, struct nameidata *nd)
21816+{
21817+ struct inode *inode = dentry->d_inode;
21818+ xid_t xid = PROC_I(inode)->fd;
21819+
21820+ if (!xid || xid_is_hashed(xid))
21821+ return 1;
21822+ d_drop(dentry);
21823+ return 0;
21824+}
21825+
21826+
21827+/* get and revalidate nx_info/nid */
21828+
21829+static int proc_nid_revalidate(struct dentry *dentry, struct nameidata *nd)
21830+{
21831+ struct inode *inode = dentry->d_inode;
21832+ nid_t nid = PROC_I(inode)->fd;
21833+
21834+ if (!nid || nid_is_hashed(nid))
21835+ return 1;
21836+ d_drop(dentry);
21837+ return 0;
21838+}
21839+
21840+
21841+
21842+#define PROC_BLOCK_SIZE (PAGE_SIZE - 1024)
21843+
21844+static ssize_t proc_vs_info_read(struct file *file, char __user *buf,
21845+ size_t count, loff_t *ppos)
21846+{
21847+ struct inode *inode = file->f_dentry->d_inode;
21848+ unsigned long page;
21849+ ssize_t length = 0;
21850+
21851+ if (count > PROC_BLOCK_SIZE)
21852+ count = PROC_BLOCK_SIZE;
21853+
21854+ /* fade that out as soon as stable */
21855+ WARN_ON(PROC_I(inode)->fd);
21856+
21857+ if (!(page = __get_free_page(GFP_KERNEL)))
21858+ return -ENOMEM;
21859+
21860+ BUG_ON(!PROC_I(inode)->op.proc_vs_read);
21861+ length = PROC_I(inode)->op.proc_vs_read((char *)page);
21862+
21863+ if (length >= 0)
21864+ length = simple_read_from_buffer(buf, count, ppos,
21865+ (char *)page, length);
21866+
21867+ free_page(page);
21868+ return length;
21869+}
21870+
21871+static ssize_t proc_vx_info_read(struct file *file, char __user *buf,
21872+ size_t count, loff_t *ppos)
21873+{
21874+ struct inode *inode = file->f_dentry->d_inode;
21875+ struct vx_info *vxi = NULL;
21876+ xid_t xid = PROC_I(inode)->fd;
21877+ unsigned long page;
21878+ ssize_t length = 0;
21879+
21880+ if (count > PROC_BLOCK_SIZE)
21881+ count = PROC_BLOCK_SIZE;
21882+
21883+ /* fade that out as soon as stable */
21884+ WARN_ON(!xid);
21885+ vxi = lookup_vx_info(xid);
21886+ if (!vxi)
21887+ goto out;
21888+
21889+ length = -ENOMEM;
21890+ if (!(page = __get_free_page(GFP_KERNEL)))
21891+ goto out_put;
21892+
21893+ BUG_ON(!PROC_I(inode)->op.proc_vxi_read);
21894+ length = PROC_I(inode)->op.proc_vxi_read(vxi, (char *)page);
21895+
21896+ if (length >= 0)
21897+ length = simple_read_from_buffer(buf, count, ppos,
21898+ (char *)page, length);
21899+
21900+ free_page(page);
21901+out_put:
21902+ put_vx_info(vxi);
21903+out:
21904+ return length;
21905+}
21906+
21907+static ssize_t proc_nx_info_read(struct file *file, char __user *buf,
21908+ size_t count, loff_t *ppos)
21909+{
21910+ struct inode *inode = file->f_dentry->d_inode;
21911+ struct nx_info *nxi = NULL;
21912+ nid_t nid = PROC_I(inode)->fd;
21913+ unsigned long page;
21914+ ssize_t length = 0;
21915+
21916+ if (count > PROC_BLOCK_SIZE)
21917+ count = PROC_BLOCK_SIZE;
21918+
21919+ /* fade that out as soon as stable */
21920+ WARN_ON(!nid);
21921+ nxi = lookup_nx_info(nid);
21922+ if (!nxi)
21923+ goto out;
21924+
21925+ length = -ENOMEM;
21926+ if (!(page = __get_free_page(GFP_KERNEL)))
21927+ goto out_put;
21928+
21929+ BUG_ON(!PROC_I(inode)->op.proc_nxi_read);
21930+ length = PROC_I(inode)->op.proc_nxi_read(nxi, (char *)page);
21931+
21932+ if (length >= 0)
21933+ length = simple_read_from_buffer(buf, count, ppos,
21934+ (char *)page, length);
21935+
21936+ free_page(page);
21937+out_put:
21938+ put_nx_info(nxi);
21939+out:
21940+ return length;
21941+}
21942+
21943+
21944+
21945+/* here comes the lower level */
21946+
21947+
21948+#define NOD(NAME, MODE, IOP, FOP, OP) { \
21949+ .len = sizeof(NAME) - 1, \
21950+ .name = (NAME), \
21951+ .mode = MODE, \
21952+ .iop = IOP, \
21953+ .fop = FOP, \
21954+ .op = OP, \
21955+}
21956+
21957+
21958+#define DIR(NAME, MODE, OTYPE) \
21959+ NOD(NAME, (S_IFDIR | (MODE)), \
21960+ &proc_ ## OTYPE ## _inode_operations, \
21961+ &proc_ ## OTYPE ## _file_operations, { } )
21962+
21963+#define INF(NAME, MODE, OTYPE) \
21964+ NOD(NAME, (S_IFREG | (MODE)), NULL, \
21965+ &proc_vs_info_file_operations, \
21966+ { .proc_vs_read = &proc_##OTYPE } )
21967+
21968+#define VINF(NAME, MODE, OTYPE) \
21969+ NOD(NAME, (S_IFREG | (MODE)), NULL, \
21970+ &proc_vx_info_file_operations, \
21971+ { .proc_vxi_read = &proc_##OTYPE } )
21972+
21973+#define NINF(NAME, MODE, OTYPE) \
21974+ NOD(NAME, (S_IFREG | (MODE)), NULL, \
21975+ &proc_nx_info_file_operations, \
21976+ { .proc_nxi_read = &proc_##OTYPE } )
21977+
21978+
21979+static struct file_operations proc_vs_info_file_operations = {
21980+ .read = proc_vs_info_read,
21981+};
21982+
21983+static struct file_operations proc_vx_info_file_operations = {
21984+ .read = proc_vx_info_read,
21985+};
21986+
21987+static struct dentry_operations proc_xid_dentry_operations = {
21988+ .d_revalidate = proc_xid_revalidate,
21989+};
21990+
21991+static struct vs_entry vx_base_stuff[] = {
21992+ VINF("info", S_IRUGO, vxi_info),
21993+ VINF("status", S_IRUGO, vxi_status),
21994+ VINF("limit", S_IRUGO, vxi_limit),
21995+ VINF("sched", S_IRUGO, vxi_sched),
21996+ VINF("nsproxy", S_IRUGO, vxi_nsproxy0),
21997+ VINF("nsproxy1",S_IRUGO, vxi_nsproxy1),
21998+ VINF("cvirt", S_IRUGO, vxi_cvirt),
21999+ VINF("cacct", S_IRUGO, vxi_cacct),
22000+ {}
22001+};
22002+
22003+
22004+
22005+
22006+static struct dentry *proc_xid_instantiate(struct inode *dir,
22007+ struct dentry *dentry, int id, void *ptr)
22008+{
22009+ dentry->d_op = &proc_xid_dentry_operations;
22010+ return vs_proc_instantiate(dir, dentry, id, ptr);
22011+}
22012+
22013+static struct dentry *proc_xid_lookup(struct inode *dir,
22014+ struct dentry *dentry, struct nameidata *nd)
22015+{
22016+ struct vs_entry *p = vx_base_stuff;
22017+ struct dentry *error = ERR_PTR(-ENOENT);
22018+
22019+ for (; p->name; p++) {
22020+ if (p->len != dentry->d_name.len)
22021+ continue;
22022+ if (!memcmp(dentry->d_name.name, p->name, p->len))
22023+ break;
22024+ }
22025+ if (!p->name)
22026+ goto out;
22027+
22028+ error = proc_xid_instantiate(dir, dentry, PROC_I(dir)->fd, p);
22029+out:
22030+ return error;
22031+}
22032+
22033+static int proc_xid_readdir(struct file *filp,
22034+ void *dirent, filldir_t filldir)
22035+{
22036+ struct dentry *dentry = filp->f_dentry;
22037+ struct inode *inode = dentry->d_inode;
22038+ struct vs_entry *p = vx_base_stuff;
22039+ int size = sizeof(vx_base_stuff) / sizeof(struct vs_entry);
22040+ int pos, index;
22041+ u64 ino;
22042+
22043+ pos = filp->f_pos;
22044+ switch (pos) {
22045+ case 0:
22046+ ino = inode->i_ino;
22047+ if (filldir(dirent, ".", 1, pos, ino, DT_DIR) < 0)
22048+ goto out;
22049+ pos++;
22050+ /* fall through */
22051+ case 1:
22052+ ino = parent_ino(dentry);
22053+ if (filldir(dirent, "..", 2, pos, ino, DT_DIR) < 0)
22054+ goto out;
22055+ pos++;
22056+ /* fall through */
22057+ default:
22058+ index = pos - 2;
22059+ if (index >= size)
22060+ goto out;
22061+ for (p += index; p->name; p++) {
22062+ if (proc_fill_cache(filp, dirent, filldir, p->name, p->len,
22063+ vs_proc_instantiate, PROC_I(inode)->fd, p))
22064+ goto out;
22065+ pos++;
22066+ }
22067+ }
22068+out:
22069+ filp->f_pos = pos;
22070+ return 1;
22071+}
22072+
22073+
22074+
22075+static struct file_operations proc_nx_info_file_operations = {
22076+ .read = proc_nx_info_read,
22077+};
22078+
22079+static struct dentry_operations proc_nid_dentry_operations = {
22080+ .d_revalidate = proc_nid_revalidate,
22081+};
22082+
22083+static struct vs_entry nx_base_stuff[] = {
22084+ NINF("info", S_IRUGO, nxi_info),
22085+ NINF("status", S_IRUGO, nxi_status),
22086+ {}
22087+};
22088+
22089+
22090+static struct dentry *proc_nid_instantiate(struct inode *dir,
22091+ struct dentry *dentry, int id, void *ptr)
22092+{
22093+ dentry->d_op = &proc_nid_dentry_operations;
22094+ return vs_proc_instantiate(dir, dentry, id, ptr);
22095+}
22096+
22097+static struct dentry *proc_nid_lookup(struct inode *dir,
22098+ struct dentry *dentry, struct nameidata *nd)
22099+{
22100+ struct vs_entry *p = nx_base_stuff;
22101+ struct dentry *error = ERR_PTR(-ENOENT);
22102+
22103+ for (; p->name; p++) {
22104+ if (p->len != dentry->d_name.len)
22105+ continue;
22106+ if (!memcmp(dentry->d_name.name, p->name, p->len))
22107+ break;
22108+ }
22109+ if (!p->name)
22110+ goto out;
22111+
22112+ error = proc_nid_instantiate(dir, dentry, PROC_I(dir)->fd, p);
22113+out:
22114+ return error;
22115+}
22116+
22117+static int proc_nid_readdir(struct file *filp,
22118+ void *dirent, filldir_t filldir)
22119+{
22120+ struct dentry *dentry = filp->f_dentry;
22121+ struct inode *inode = dentry->d_inode;
22122+ struct vs_entry *p = nx_base_stuff;
22123+ int size = sizeof(nx_base_stuff) / sizeof(struct vs_entry);
22124+ int pos, index;
22125+ u64 ino;
22126+
22127+ pos = filp->f_pos;
22128+ switch (pos) {
22129+ case 0:
22130+ ino = inode->i_ino;
22131+ if (filldir(dirent, ".", 1, pos, ino, DT_DIR) < 0)
22132+ goto out;
22133+ pos++;
22134+ /* fall through */
22135+ case 1:
22136+ ino = parent_ino(dentry);
22137+ if (filldir(dirent, "..", 2, pos, ino, DT_DIR) < 0)
22138+ goto out;
22139+ pos++;
22140+ /* fall through */
22141+ default:
22142+ index = pos - 2;
22143+ if (index >= size)
22144+ goto out;
22145+ for (p += index; p->name; p++) {
22146+ if (proc_fill_cache(filp, dirent, filldir, p->name, p->len,
22147+ vs_proc_instantiate, PROC_I(inode)->fd, p))
22148+ goto out;
22149+ pos++;
22150+ }
22151+ }
22152+out:
22153+ filp->f_pos = pos;
22154+ return 1;
22155+}
22156+
22157+
22158+#define MAX_MULBY10 ((~0U - 9) / 10)
22159+
22160+static inline int atovid(const char *str, int len)
22161+{
22162+ int vid, c;
22163+
22164+ vid = 0;
22165+ while (len-- > 0) {
22166+ c = *str - '0';
22167+ str++;
22168+ if (c > 9)
22169+ return -1;
22170+ if (vid >= MAX_MULBY10)
22171+ return -1;
22172+ vid *= 10;
22173+ vid += c;
22174+ if (!vid)
22175+ return -1;
22176+ }
22177+ return vid;
22178+}
22179+
22180+/* now the upper level (virtual) */
22181+
22182+
22183+static struct file_operations proc_xid_file_operations = {
22184+ .read = generic_read_dir,
22185+ .readdir = proc_xid_readdir,
22186+};
22187+
22188+static struct inode_operations proc_xid_inode_operations = {
22189+ .lookup = proc_xid_lookup,
22190+};
22191+
22192+static struct vs_entry vx_virtual_stuff[] = {
22193+ INF("info", S_IRUGO, virtual_info),
22194+ INF("status", S_IRUGO, virtual_status),
22195+ DIR(NULL, S_IRUGO | S_IXUGO, xid),
22196+};
22197+
22198+
22199+static struct dentry *proc_virtual_lookup(struct inode *dir,
22200+ struct dentry *dentry, struct nameidata *nd)
22201+{
22202+ struct vs_entry *p = vx_virtual_stuff;
22203+ struct dentry *error = ERR_PTR(-ENOENT);
22204+ int id = 0;
22205+
22206+ for (; p->name; p++) {
22207+ if (p->len != dentry->d_name.len)
22208+ continue;
22209+ if (!memcmp(dentry->d_name.name, p->name, p->len))
22210+ break;
22211+ }
22212+ if (p->name)
22213+ goto instantiate;
22214+
22215+ id = atovid(dentry->d_name.name, dentry->d_name.len);
22216+ if ((id < 0) || !xid_is_hashed(id))
22217+ goto out;
22218+
22219+instantiate:
22220+ error = proc_xid_instantiate(dir, dentry, id, p);
22221+out:
22222+ return error;
22223+}
22224+
22225+static struct file_operations proc_nid_file_operations = {
22226+ .read = generic_read_dir,
22227+ .readdir = proc_nid_readdir,
22228+};
22229+
22230+static struct inode_operations proc_nid_inode_operations = {
22231+ .lookup = proc_nid_lookup,
22232+};
22233+
22234+static struct vs_entry nx_virtnet_stuff[] = {
22235+ INF("info", S_IRUGO, virtnet_info),
22236+ INF("status", S_IRUGO, virtnet_status),
22237+ DIR(NULL, S_IRUGO | S_IXUGO, nid),
22238+};
22239+
22240+
22241+static struct dentry *proc_virtnet_lookup(struct inode *dir,
22242+ struct dentry *dentry, struct nameidata *nd)
22243+{
22244+ struct vs_entry *p = nx_virtnet_stuff;
22245+ struct dentry *error = ERR_PTR(-ENOENT);
22246+ int id = 0;
22247+
22248+ for (; p->name; p++) {
22249+ if (p->len != dentry->d_name.len)
22250+ continue;
22251+ if (!memcmp(dentry->d_name.name, p->name, p->len))
22252+ break;
22253+ }
22254+ if (p->name)
22255+ goto instantiate;
22256+
22257+ id = atovid(dentry->d_name.name, dentry->d_name.len);
22258+ if ((id < 0) || !nid_is_hashed(id))
22259+ goto out;
22260+
22261+instantiate:
22262+ error = proc_nid_instantiate(dir, dentry, id, p);
22263+out:
22264+ return error;
22265+}
22266+
22267+
22268+#define PROC_MAXVIDS 32
22269+
22270+int proc_virtual_readdir(struct file *filp,
22271+ void *dirent, filldir_t filldir)
22272+{
22273+ struct dentry *dentry = filp->f_dentry;
22274+ struct inode *inode = dentry->d_inode;
22275+ struct vs_entry *p = vx_virtual_stuff;
22276+ int size = sizeof(vx_virtual_stuff) / sizeof(struct vs_entry);
22277+ int pos, index;
22278+ unsigned int xid_array[PROC_MAXVIDS];
22279+ char buf[PROC_NUMBUF];
22280+ unsigned int nr_xids, i;
22281+ u64 ino;
22282+
22283+ pos = filp->f_pos;
22284+ switch (pos) {
22285+ case 0:
22286+ ino = inode->i_ino;
22287+ if (filldir(dirent, ".", 1, pos, ino, DT_DIR) < 0)
22288+ goto out;
22289+ pos++;
22290+ /* fall through */
22291+ case 1:
22292+ ino = parent_ino(dentry);
22293+ if (filldir(dirent, "..", 2, pos, ino, DT_DIR) < 0)
22294+ goto out;
22295+ pos++;
22296+ /* fall through */
22297+ default:
22298+ index = pos - 2;
22299+ if (index >= size)
22300+ goto entries;
22301+ for (p += index; p->name; p++) {
22302+ if (proc_fill_cache(filp, dirent, filldir, p->name, p->len,
22303+ vs_proc_instantiate, 0, p))
22304+ goto out;
22305+ pos++;
22306+ }
22307+ entries:
22308+ index = pos - size;
22309+ p = &vx_virtual_stuff[size - 1];
22310+ nr_xids = get_xid_list(index, xid_array, PROC_MAXVIDS);
22311+ for (i = 0; i < nr_xids; i++) {
22312+ int n, xid = xid_array[i];
22313+ unsigned int j = PROC_NUMBUF;
22314+
22315+ n = xid;
22316+ do
22317+ buf[--j] = '0' + (n % 10);
22318+ while (n /= 10);
22319+
22320+ if (proc_fill_cache(filp, dirent, filldir,
22321+ buf + j, PROC_NUMBUF - j,
22322+ vs_proc_instantiate, xid, p))
22323+ goto out;
22324+ pos++;
22325+ }
22326+ }
22327+out:
22328+ filp->f_pos = pos;
22329+ return 0;
22330+}
22331+
22332+static int proc_virtual_getattr(struct vfsmount *mnt,
22333+ struct dentry *dentry, struct kstat *stat)
22334+{
22335+ struct inode *inode = dentry->d_inode;
22336+
22337+ generic_fillattr(inode, stat);
22338+ stat->nlink = 2 + atomic_read(&vx_global_cactive);
22339+ return 0;
22340+}
22341+
22342+static struct file_operations proc_virtual_dir_operations = {
22343+ .read = generic_read_dir,
22344+ .readdir = proc_virtual_readdir,
22345+};
22346+
22347+static struct inode_operations proc_virtual_dir_inode_operations = {
22348+ .getattr = proc_virtual_getattr,
22349+ .lookup = proc_virtual_lookup,
22350+};
22351+
22352+
22353+
22354+
22355+
22356+int proc_virtnet_readdir(struct file *filp,
22357+ void *dirent, filldir_t filldir)
22358+{
22359+ struct dentry *dentry = filp->f_dentry;
22360+ struct inode *inode = dentry->d_inode;
22361+ struct vs_entry *p = nx_virtnet_stuff;
22362+ int size = sizeof(nx_virtnet_stuff) / sizeof(struct vs_entry);
22363+ int pos, index;
22364+ unsigned int nid_array[PROC_MAXVIDS];
22365+ char buf[PROC_NUMBUF];
22366+ unsigned int nr_nids, i;
22367+ u64 ino;
22368+
22369+ pos = filp->f_pos;
22370+ switch (pos) {
22371+ case 0:
22372+ ino = inode->i_ino;
22373+ if (filldir(dirent, ".", 1, pos, ino, DT_DIR) < 0)
22374+ goto out;
22375+ pos++;
22376+ /* fall through */
22377+ case 1:
22378+ ino = parent_ino(dentry);
22379+ if (filldir(dirent, "..", 2, pos, ino, DT_DIR) < 0)
22380+ goto out;
22381+ pos++;
22382+ /* fall through */
22383+ default:
22384+ index = pos - 2;
22385+ if (index >= size)
22386+ goto entries;
22387+ for (p += index; p->name; p++) {
22388+ if (proc_fill_cache(filp, dirent, filldir, p->name, p->len,
22389+ vs_proc_instantiate, 0, p))
22390+ goto out;
22391+ pos++;
22392+ }
22393+ entries:
22394+ index = pos - size;
22395+ p = &nx_virtnet_stuff[size - 1];
22396+ nr_nids = get_nid_list(index, nid_array, PROC_MAXVIDS);
22397+ for (i = 0; i < nr_nids; i++) {
22398+ int n, nid = nid_array[i];
22399+ unsigned int j = PROC_NUMBUF;
22400+
22401+ n = nid;
22402+ do
22403+ buf[--j] = '0' + (n % 10);
22404+ while (n /= 10);
22405+
22406+ if (proc_fill_cache(filp, dirent, filldir,
22407+ buf + j, PROC_NUMBUF - j,
22408+ vs_proc_instantiate, nid, p))
22409+ goto out;
22410+ pos++;
22411+ }
22412+ }
22413+out:
22414+ filp->f_pos = pos;
22415+ return 0;
22416+}
22417+
22418+static int proc_virtnet_getattr(struct vfsmount *mnt,
22419+ struct dentry *dentry, struct kstat *stat)
22420+{
22421+ struct inode *inode = dentry->d_inode;
22422+
22423+ generic_fillattr(inode, stat);
22424+ stat->nlink = 2 + atomic_read(&nx_global_cactive);
22425+ return 0;
22426+}
22427+
22428+static struct file_operations proc_virtnet_dir_operations = {
22429+ .read = generic_read_dir,
22430+ .readdir = proc_virtnet_readdir,
22431+};
22432+
22433+static struct inode_operations proc_virtnet_dir_inode_operations = {
22434+ .getattr = proc_virtnet_getattr,
22435+ .lookup = proc_virtnet_lookup,
22436+};
22437+
22438+
22439+
22440+void proc_vx_init(void)
22441+{
22442+ struct proc_dir_entry *ent;
22443+
22444+ ent = proc_mkdir("virtual", 0);
22445+ if (ent) {
22446+ ent->proc_fops = &proc_virtual_dir_operations;
22447+ ent->proc_iops = &proc_virtual_dir_inode_operations;
22448+ }
22449+ proc_virtual = ent;
22450+
22451+ ent = proc_mkdir("virtnet", 0);
22452+ if (ent) {
22453+ ent->proc_fops = &proc_virtnet_dir_operations;
22454+ ent->proc_iops = &proc_virtnet_dir_inode_operations;
22455+ }
22456+ proc_virtnet = ent;
22457+}
22458+
22459+
22460+
22461+
22462+/* per pid info */
22463+
22464+
22465+int proc_pid_vx_info(struct task_struct *p, char *buffer)
22466+{
22467+ struct vx_info *vxi;
22468+ char *orig = buffer;
22469+
22470+ buffer += sprintf(buffer, "XID:\t%d\n", vx_task_xid(p));
22471+
22472+ vxi = task_get_vx_info(p);
22473+ if (!vxi)
22474+ goto out;
22475+
22476+ buffer += sprintf(buffer, "BCaps:\t");
22477+ buffer = print_cap_t(buffer, &vxi->vx_bcaps);
22478+ buffer += sprintf(buffer, "\n");
22479+ buffer += sprintf(buffer, "CCaps:\t%016llx\n",
22480+ (unsigned long long)vxi->vx_ccaps);
22481+ buffer += sprintf(buffer, "CFlags:\t%016llx\n",
22482+ (unsigned long long)vxi->vx_flags);
22483+ buffer += sprintf(buffer, "CIPid:\t%d\n", vxi->vx_initpid);
22484+
22485+ put_vx_info(vxi);
22486+out:
22487+ return buffer - orig;
22488+}
22489+
22490+
22491+int proc_pid_nx_info(struct task_struct *p, char *buffer)
22492+{
22493+ struct nx_info *nxi;
22494+ struct nx_addr_v4 *v4a;
22495+#ifdef CONFIG_IPV6
22496+ struct nx_addr_v6 *v6a;
22497+#endif
22498+ char *orig = buffer;
22499+ int i;
22500+
22501+ buffer += sprintf(buffer, "NID:\t%d\n", nx_task_nid(p));
22502+
22503+ nxi = task_get_nx_info(p);
22504+ if (!nxi)
22505+ goto out;
22506+
22507+ buffer += sprintf(buffer, "NCaps:\t%016llx\n",
22508+ (unsigned long long)nxi->nx_ncaps);
22509+ buffer += sprintf(buffer, "NFlags:\t%016llx\n",
22510+ (unsigned long long)nxi->nx_flags);
22511+
22512+ buffer += sprintf(buffer,
22513+ "V4Root[bcast]:\t" NIPQUAD_FMT "\n",
22514+ NIPQUAD(nxi->v4_bcast.s_addr));
22515+ buffer += sprintf (buffer,
22516+ "V4Root[lback]:\t" NIPQUAD_FMT "\n",
22517+ NIPQUAD(nxi->v4_lback.s_addr));
22518+ if (!NX_IPV4(nxi))
22519+ goto skip_v4;
22520+ for (i = 0, v4a = &nxi->v4; v4a; i++, v4a = v4a->next)
22521+ buffer += sprintf(buffer, "V4Root[%d]:\t" NXAV4_FMT "\n",
22522+ i, NXAV4(v4a));
22523+skip_v4:
22524+#ifdef CONFIG_IPV6
22525+ if (!NX_IPV6(nxi))
22526+ goto skip_v6;
22527+ for (i = 0, v6a = &nxi->v6; v6a; i++, v6a = v6a->next)
22528+ buffer += sprintf(buffer, "V6Root[%d]:\t" NXAV6_FMT "\n",
22529+ i, NXAV6(v6a));
22530+skip_v6:
22531+#endif
22532+ put_nx_info(nxi);
22533+out:
22534+ return buffer - orig;
22535+}
22536+
22537diff -NurpP --minimal linux-3.0.9/kernel/vserver/sched.c linux-3.0.9-vs2.3.2.1/kernel/vserver/sched.c
22538--- linux-3.0.9/kernel/vserver/sched.c 1970-01-01 01:00:00.000000000 +0100
22539+++ linux-3.0.9-vs2.3.2.1/kernel/vserver/sched.c 2011-06-10 22:11:24.000000000 +0200
22540@@ -0,0 +1,82 @@
22541+/*
22542+ * linux/kernel/vserver/sched.c
22543+ *
22544+ * Virtual Server: Scheduler Support
22545+ *
22546+ * Copyright (C) 2004-2010 Herbert Pötzl
22547+ *
22548+ * V0.01 adapted Sam Vilains version to 2.6.3
22549+ * V0.02 removed legacy interface
22550+ * V0.03 changed vcmds to vxi arg
22551+ * V0.04 removed older and legacy interfaces
22552+ * V0.05 removed scheduler code/commands
22553+ *
22554+ */
22555+
22556+#include <linux/vs_context.h>
22557+#include <linux/vs_sched.h>
22558+#include <linux/vserver/sched_cmd.h>
22559+
22560+#include <asm/uaccess.h>
22561+
22562+
22563+void vx_update_sched_param(struct _vx_sched *sched,
22564+ struct _vx_sched_pc *sched_pc)
22565+{
22566+ sched_pc->prio_bias = sched->prio_bias;
22567+}
22568+
22569+static int do_set_prio_bias(struct vx_info *vxi, struct vcmd_prio_bias *data)
22570+{
22571+ int cpu;
22572+
22573+ if (data->prio_bias > MAX_PRIO_BIAS)
22574+ data->prio_bias = MAX_PRIO_BIAS;
22575+ if (data->prio_bias < MIN_PRIO_BIAS)
22576+ data->prio_bias = MIN_PRIO_BIAS;
22577+
22578+ if (data->cpu_id != ~0) {
22579+ vxi->sched.update = cpumask_of_cpu(data->cpu_id);
22580+ cpus_and(vxi->sched.update, cpu_online_map,
22581+ vxi->sched.update);
22582+ } else
22583+ vxi->sched.update = cpu_online_map;
22584+
22585+ for_each_cpu_mask(cpu, vxi->sched.update)
22586+ vx_update_sched_param(&vxi->sched,
22587+ &vx_per_cpu(vxi, sched_pc, cpu));
22588+ return 0;
22589+}
22590+
22591+int vc_set_prio_bias(struct vx_info *vxi, void __user *data)
22592+{
22593+ struct vcmd_prio_bias vc_data;
22594+
22595+ if (copy_from_user(&vc_data, data, sizeof(vc_data)))
22596+ return -EFAULT;
22597+
22598+ return do_set_prio_bias(vxi, &vc_data);
22599+}
22600+
22601+int vc_get_prio_bias(struct vx_info *vxi, void __user *data)
22602+{
22603+ struct vcmd_prio_bias vc_data;
22604+ struct _vx_sched_pc *pcd;
22605+ int cpu;
22606+
22607+ if (copy_from_user(&vc_data, data, sizeof(vc_data)))
22608+ return -EFAULT;
22609+
22610+ cpu = vc_data.cpu_id;
22611+
22612+ if (!cpu_possible(cpu))
22613+ return -EINVAL;
22614+
22615+ pcd = &vx_per_cpu(vxi, sched_pc, cpu);
22616+ vc_data.prio_bias = pcd->prio_bias;
22617+
22618+ if (copy_to_user(data, &vc_data, sizeof(vc_data)))
22619+ return -EFAULT;
22620+ return 0;
22621+}
22622+
22623diff -NurpP --minimal linux-3.0.9/kernel/vserver/sched_init.h linux-3.0.9-vs2.3.2.1/kernel/vserver/sched_init.h
22624--- linux-3.0.9/kernel/vserver/sched_init.h 1970-01-01 01:00:00.000000000 +0100
22625+++ linux-3.0.9-vs2.3.2.1/kernel/vserver/sched_init.h 2011-06-10 22:11:24.000000000 +0200
22626@@ -0,0 +1,27 @@
22627+
22628+static inline void vx_info_init_sched(struct _vx_sched *sched)
22629+{
22630+ /* scheduling; hard code starting values as constants */
22631+ sched->prio_bias = 0;
22632+}
22633+
22634+static inline
22635+void vx_info_init_sched_pc(struct _vx_sched_pc *sched_pc, int cpu)
22636+{
22637+ sched_pc->prio_bias = 0;
22638+
22639+ sched_pc->user_ticks = 0;
22640+ sched_pc->sys_ticks = 0;
22641+ sched_pc->hold_ticks = 0;
22642+}
22643+
22644+static inline void vx_info_exit_sched(struct _vx_sched *sched)
22645+{
22646+ return;
22647+}
22648+
22649+static inline
22650+void vx_info_exit_sched_pc(struct _vx_sched_pc *sched_pc, int cpu)
22651+{
22652+ return;
22653+}
22654diff -NurpP --minimal linux-3.0.9/kernel/vserver/sched_proc.h linux-3.0.9-vs2.3.2.1/kernel/vserver/sched_proc.h
22655--- linux-3.0.9/kernel/vserver/sched_proc.h 1970-01-01 01:00:00.000000000 +0100
22656+++ linux-3.0.9-vs2.3.2.1/kernel/vserver/sched_proc.h 2011-06-10 22:11:24.000000000 +0200
22657@@ -0,0 +1,32 @@
22658+#ifndef _VX_SCHED_PROC_H
22659+#define _VX_SCHED_PROC_H
22660+
22661+
22662+static inline
22663+int vx_info_proc_sched(struct _vx_sched *sched, char *buffer)
22664+{
22665+ int length = 0;
22666+
22667+ length += sprintf(buffer,
22668+ "PrioBias:\t%8d\n",
22669+ sched->prio_bias);
22670+ return length;
22671+}
22672+
22673+static inline
22674+int vx_info_proc_sched_pc(struct _vx_sched_pc *sched_pc,
22675+ char *buffer, int cpu)
22676+{
22677+ int length = 0;
22678+
22679+ length += sprintf(buffer + length,
22680+ "cpu %d: %lld %lld %lld", cpu,
22681+ (unsigned long long)sched_pc->user_ticks,
22682+ (unsigned long long)sched_pc->sys_ticks,
22683+ (unsigned long long)sched_pc->hold_ticks);
22684+ length += sprintf(buffer + length,
22685+ " %d\n", sched_pc->prio_bias);
22686+ return length;
22687+}
22688+
22689+#endif /* _VX_SCHED_PROC_H */
22690diff -NurpP --minimal linux-3.0.9/kernel/vserver/signal.c linux-3.0.9-vs2.3.2.1/kernel/vserver/signal.c
22691--- linux-3.0.9/kernel/vserver/signal.c 1970-01-01 01:00:00.000000000 +0100
22692+++ linux-3.0.9-vs2.3.2.1/kernel/vserver/signal.c 2011-06-10 22:11:24.000000000 +0200
22693@@ -0,0 +1,134 @@
22694+/*
22695+ * linux/kernel/vserver/signal.c
22696+ *
22697+ * Virtual Server: Signal Support
22698+ *
22699+ * Copyright (C) 2003-2007 Herbert Pötzl
22700+ *
22701+ * V0.01 broken out from vcontext V0.05
22702+ * V0.02 changed vcmds to vxi arg
22703+ * V0.03 adjusted siginfo for kill
22704+ *
22705+ */
22706+
22707+#include <asm/uaccess.h>
22708+
22709+#include <linux/vs_context.h>
22710+#include <linux/vs_pid.h>
22711+#include <linux/vserver/signal_cmd.h>
22712+
22713+
22714+int vx_info_kill(struct vx_info *vxi, int pid, int sig)
22715+{
22716+ int retval, count = 0;
22717+ struct task_struct *p;
22718+ struct siginfo *sip = SEND_SIG_PRIV;
22719+
22720+ retval = -ESRCH;
22721+ vxdprintk(VXD_CBIT(misc, 4),
22722+ "vx_info_kill(%p[#%d],%d,%d)*",
22723+ vxi, vxi->vx_id, pid, sig);
22724+ read_lock(&tasklist_lock);
22725+ switch (pid) {
22726+ case 0:
22727+ case -1:
22728+ for_each_process(p) {
22729+ int err = 0;
22730+
22731+ if (vx_task_xid(p) != vxi->vx_id || p->pid <= 1 ||
22732+ (pid && vxi->vx_initpid == p->pid))
22733+ continue;
22734+
22735+ err = group_send_sig_info(sig, sip, p);
22736+ ++count;
22737+ if (err != -EPERM)
22738+ retval = err;
22739+ }
22740+ break;
22741+
22742+ case 1:
22743+ if (vxi->vx_initpid) {
22744+ pid = vxi->vx_initpid;
22745+ /* for now, only SIGINT to private init ... */
22746+ if (!vx_info_flags(vxi, VXF_STATE_ADMIN, 0) &&
22747+ /* ... as long as there are tasks left */
22748+ (atomic_read(&vxi->vx_tasks) > 1))
22749+ sig = SIGINT;
22750+ }
22751+ /* fallthrough */
22752+ default:
22753+ rcu_read_lock();
22754+ p = find_task_by_real_pid(pid);
22755+ rcu_read_unlock();
22756+ if (p) {
22757+ if (vx_task_xid(p) == vxi->vx_id)
22758+ retval = group_send_sig_info(sig, sip, p);
22759+ }
22760+ break;
22761+ }
22762+ read_unlock(&tasklist_lock);
22763+ vxdprintk(VXD_CBIT(misc, 4),
22764+ "vx_info_kill(%p[#%d],%d,%d,%ld) = %d",
22765+ vxi, vxi->vx_id, pid, sig, (long)sip, retval);
22766+ return retval;
22767+}
22768+
22769+int vc_ctx_kill(struct vx_info *vxi, void __user *data)
22770+{
22771+ struct vcmd_ctx_kill_v0 vc_data;
22772+
22773+ if (copy_from_user(&vc_data, data, sizeof(vc_data)))
22774+ return -EFAULT;
22775+
22776+ /* special check to allow guest shutdown */
22777+ if (!vx_info_flags(vxi, VXF_STATE_ADMIN, 0) &&
22778+ /* forbid killall pid=0 when init is present */
22779+ (((vc_data.pid < 1) && vxi->vx_initpid) ||
22780+ (vc_data.pid > 1)))
22781+ return -EACCES;
22782+
22783+ return vx_info_kill(vxi, vc_data.pid, vc_data.sig);
22784+}
22785+
22786+
22787+static int __wait_exit(struct vx_info *vxi)
22788+{
22789+ DECLARE_WAITQUEUE(wait, current);
22790+ int ret = 0;
22791+
22792+ add_wait_queue(&vxi->vx_wait, &wait);
22793+ set_current_state(TASK_INTERRUPTIBLE);
22794+
22795+wait:
22796+ if (vx_info_state(vxi,
22797+ VXS_SHUTDOWN | VXS_HASHED | VXS_HELPER) == VXS_SHUTDOWN)
22798+ goto out;
22799+ if (signal_pending(current)) {
22800+ ret = -ERESTARTSYS;
22801+ goto out;
22802+ }
22803+ schedule();
22804+ goto wait;
22805+
22806+out:
22807+ set_current_state(TASK_RUNNING);
22808+ remove_wait_queue(&vxi->vx_wait, &wait);
22809+ return ret;
22810+}
22811+
22812+
22813+
22814+int vc_wait_exit(struct vx_info *vxi, void __user *data)
22815+{
22816+ struct vcmd_wait_exit_v0 vc_data;
22817+ int ret;
22818+
22819+ ret = __wait_exit(vxi);
22820+ vc_data.reboot_cmd = vxi->reboot_cmd;
22821+ vc_data.exit_code = vxi->exit_code;
22822+
22823+ if (copy_to_user(data, &vc_data, sizeof(vc_data)))
22824+ ret = -EFAULT;
22825+ return ret;
22826+}
22827+
22828diff -NurpP --minimal linux-3.0.9/kernel/vserver/space.c linux-3.0.9-vs2.3.2.1/kernel/vserver/space.c
22829--- linux-3.0.9/kernel/vserver/space.c 1970-01-01 01:00:00.000000000 +0100
22830+++ linux-3.0.9-vs2.3.2.1/kernel/vserver/space.c 2011-07-20 02:11:49.000000000 +0200
22831@@ -0,0 +1,435 @@
22832+/*
22833+ * linux/kernel/vserver/space.c
22834+ *
22835+ * Virtual Server: Context Space Support
22836+ *
22837+ * Copyright (C) 2003-2010 Herbert Pötzl
22838+ *
22839+ * V0.01 broken out from context.c 0.07
22840+ * V0.02 added task locking for namespace
22841+ * V0.03 broken out vx_enter_namespace
22842+ * V0.04 added *space support and commands
22843+ * V0.05 added credential support
22844+ *
22845+ */
22846+
22847+#include <linux/utsname.h>
22848+#include <linux/nsproxy.h>
22849+#include <linux/err.h>
22850+#include <linux/fs_struct.h>
22851+#include <linux/cred.h>
22852+#include <asm/uaccess.h>
22853+
22854+#include <linux/vs_context.h>
22855+#include <linux/vserver/space.h>
22856+#include <linux/vserver/space_cmd.h>
22857+
22858+atomic_t vs_global_nsproxy = ATOMIC_INIT(0);
22859+atomic_t vs_global_fs = ATOMIC_INIT(0);
22860+atomic_t vs_global_mnt_ns = ATOMIC_INIT(0);
22861+atomic_t vs_global_uts_ns = ATOMIC_INIT(0);
22862+atomic_t vs_global_user_ns = ATOMIC_INIT(0);
22863+atomic_t vs_global_pid_ns = ATOMIC_INIT(0);
22864+
22865+
22866+/* namespace functions */
22867+
22868+#include <linux/mnt_namespace.h>
22869+#include <linux/user_namespace.h>
22870+#include <linux/pid_namespace.h>
22871+#include <linux/ipc_namespace.h>
22872+#include <net/net_namespace.h>
22873+
22874+
22875+static const struct vcmd_space_mask_v1 space_mask_v0 = {
22876+ .mask = CLONE_FS |
22877+ CLONE_NEWNS |
22878+#ifdef CONFIG_UTS_NS
22879+ CLONE_NEWUTS |
22880+#endif
22881+#ifdef CONFIG_IPC_NS
22882+ CLONE_NEWIPC |
22883+#endif
22884+#ifdef CONFIG_USER_NS
22885+ CLONE_NEWUSER |
22886+#endif
22887+ 0
22888+};
22889+
22890+static const struct vcmd_space_mask_v1 space_mask = {
22891+ .mask = CLONE_FS |
22892+ CLONE_NEWNS |
22893+#ifdef CONFIG_UTS_NS
22894+ CLONE_NEWUTS |
22895+#endif
22896+#ifdef CONFIG_IPC_NS
22897+ CLONE_NEWIPC |
22898+#endif
22899+#ifdef CONFIG_USER_NS
22900+ CLONE_NEWUSER |
22901+#endif
22902+#ifdef CONFIG_PID_NS
22903+ CLONE_NEWPID |
22904+#endif
22905+#ifdef CONFIG_NET_NS
22906+ CLONE_NEWNET |
22907+#endif
22908+ 0
22909+};
22910+
22911+static const struct vcmd_space_mask_v1 default_space_mask = {
22912+ .mask = CLONE_FS |
22913+ CLONE_NEWNS |
22914+#ifdef CONFIG_UTS_NS
22915+ CLONE_NEWUTS |
22916+#endif
22917+#ifdef CONFIG_IPC_NS
22918+ CLONE_NEWIPC |
22919+#endif
22920+#ifdef CONFIG_USER_NS
22921+ CLONE_NEWUSER |
22922+#endif
22923+#ifdef CONFIG_PID_NS
22924+// CLONE_NEWPID |
22925+#endif
22926+ 0
22927+};
22928+
22929+/*
22930+ * build a new nsproxy mix
22931+ * assumes that both proxies are 'const'
22932+ * does not touch nsproxy refcounts
22933+ * will hold a reference on the result.
22934+ */
22935+
22936+struct nsproxy *vs_mix_nsproxy(struct nsproxy *old_nsproxy,
22937+ struct nsproxy *new_nsproxy, unsigned long mask)
22938+{
22939+ struct mnt_namespace *old_ns;
22940+ struct uts_namespace *old_uts;
22941+ struct ipc_namespace *old_ipc;
22942+#ifdef CONFIG_PID_NS
22943+ struct pid_namespace *old_pid;
22944+#endif
22945+#ifdef CONFIG_NET_NS
22946+ struct net *old_net;
22947+#endif
22948+ struct nsproxy *nsproxy;
22949+
22950+ nsproxy = copy_nsproxy(old_nsproxy);
22951+ if (!nsproxy)
22952+ goto out;
22953+
22954+ if (mask & CLONE_NEWNS) {
22955+ old_ns = nsproxy->mnt_ns;
22956+ nsproxy->mnt_ns = new_nsproxy->mnt_ns;
22957+ if (nsproxy->mnt_ns)
22958+ get_mnt_ns(nsproxy->mnt_ns);
22959+ } else
22960+ old_ns = NULL;
22961+
22962+ if (mask & CLONE_NEWUTS) {
22963+ old_uts = nsproxy->uts_ns;
22964+ nsproxy->uts_ns = new_nsproxy->uts_ns;
22965+ if (nsproxy->uts_ns)
22966+ get_uts_ns(nsproxy->uts_ns);
22967+ } else
22968+ old_uts = NULL;
22969+
22970+ if (mask & CLONE_NEWIPC) {
22971+ old_ipc = nsproxy->ipc_ns;
22972+ nsproxy->ipc_ns = new_nsproxy->ipc_ns;
22973+ if (nsproxy->ipc_ns)
22974+ get_ipc_ns(nsproxy->ipc_ns);
22975+ } else
22976+ old_ipc = NULL;
22977+
22978+#ifdef CONFIG_PID_NS
22979+ if (mask & CLONE_NEWPID) {
22980+ old_pid = nsproxy->pid_ns;
22981+ nsproxy->pid_ns = new_nsproxy->pid_ns;
22982+ if (nsproxy->pid_ns)
22983+ get_pid_ns(nsproxy->pid_ns);
22984+ } else
22985+ old_pid = NULL;
22986+#endif
22987+#ifdef CONFIG_NET_NS
22988+ if (mask & CLONE_NEWNET) {
22989+ old_net = nsproxy->net_ns;
22990+ nsproxy->net_ns = new_nsproxy->net_ns;
22991+ if (nsproxy->net_ns)
22992+ get_net(nsproxy->net_ns);
22993+ } else
22994+ old_net = NULL;
22995+#endif
22996+ if (old_ns)
22997+ put_mnt_ns(old_ns);
22998+ if (old_uts)
22999+ put_uts_ns(old_uts);
23000+ if (old_ipc)
23001+ put_ipc_ns(old_ipc);
23002+#ifdef CONFIG_PID_NS
23003+ if (old_pid)
23004+ put_pid_ns(old_pid);
23005+#endif
23006+#ifdef CONFIG_NET_NS
23007+ if (old_net)
23008+ put_net(old_net);
23009+#endif
23010+out:
23011+ return nsproxy;
23012+}
23013+
23014+
23015+/*
23016+ * merge two nsproxy structs into a new one.
23017+ * will hold a reference on the result.
23018+ */
23019+
23020+static inline
23021+struct nsproxy *__vs_merge_nsproxy(struct nsproxy *old,
23022+ struct nsproxy *proxy, unsigned long mask)
23023+{
23024+ struct nsproxy null_proxy = { .mnt_ns = NULL };
23025+
23026+ if (!proxy)
23027+ return NULL;
23028+
23029+ if (mask) {
23030+ /* vs_mix_nsproxy returns with reference */
23031+ return vs_mix_nsproxy(old ? old : &null_proxy,
23032+ proxy, mask);
23033+ }
23034+ get_nsproxy(proxy);
23035+ return proxy;
23036+}
23037+
23038+
23039+int vx_enter_space(struct vx_info *vxi, unsigned long mask, unsigned index)
23040+{
23041+ struct nsproxy *proxy, *proxy_cur, *proxy_new;
23042+ struct fs_struct *fs_cur, *fs = NULL;
23043+ struct _vx_space *space;
23044+ int ret, kill = 0;
23045+
23046+ vxdprintk(VXD_CBIT(space, 8), "vx_enter_space(%p[#%u],0x%08lx,%d)",
23047+ vxi, vxi->vx_id, mask, index);
23048+
23049+ if (vx_info_flags(vxi, VXF_INFO_PRIVATE, 0))
23050+ return -EACCES;
23051+
23052+ if (index >= VX_SPACES)
23053+ return -EINVAL;
23054+
23055+ space = &vxi->space[index];
23056+
23057+ if (!mask)
23058+ mask = space->vx_nsmask;
23059+
23060+ if ((mask & space->vx_nsmask) != mask)
23061+ return -EINVAL;
23062+
23063+ if (mask & CLONE_FS) {
23064+ fs = copy_fs_struct(space->vx_fs);
23065+ if (!fs)
23066+ return -ENOMEM;
23067+ }
23068+ proxy = space->vx_nsproxy;
23069+
23070+ vxdprintk(VXD_CBIT(space, 9),
23071+ "vx_enter_space(%p[#%u],0x%08lx,%d) -> (%p,%p)",
23072+ vxi, vxi->vx_id, mask, index, proxy, fs);
23073+
23074+ task_lock(current);
23075+ fs_cur = current->fs;
23076+
23077+ if (mask & CLONE_FS) {
23078+ spin_lock(&fs_cur->lock);
23079+ current->fs = fs;
23080+ kill = !--fs_cur->users;
23081+ spin_unlock(&fs_cur->lock);
23082+ }
23083+
23084+ proxy_cur = current->nsproxy;
23085+ get_nsproxy(proxy_cur);
23086+ task_unlock(current);
23087+
23088+ if (kill)
23089+ free_fs_struct(fs_cur);
23090+
23091+ proxy_new = __vs_merge_nsproxy(proxy_cur, proxy, mask);
23092+ if (IS_ERR(proxy_new)) {
23093+ ret = PTR_ERR(proxy_new);
23094+ goto out_put;
23095+ }
23096+
23097+ proxy_new = xchg(&current->nsproxy, proxy_new);
23098+
23099+ if (mask & CLONE_NEWUSER) {
23100+ struct cred *cred;
23101+
23102+ vxdprintk(VXD_CBIT(space, 10),
23103+ "vx_enter_space(%p[#%u],%p) cred (%p,%p)",
23104+ vxi, vxi->vx_id, space->vx_cred,
23105+ current->real_cred, current->cred);
23106+
23107+ if (space->vx_cred) {
23108+ cred = __prepare_creds(space->vx_cred);
23109+ if (cred)
23110+ commit_creds(cred);
23111+ }
23112+ }
23113+
23114+ ret = 0;
23115+
23116+ if (proxy_new)
23117+ put_nsproxy(proxy_new);
23118+out_put:
23119+ if (proxy_cur)
23120+ put_nsproxy(proxy_cur);
23121+ return ret;
23122+}
23123+
23124+
23125+int vx_set_space(struct vx_info *vxi, unsigned long mask, unsigned index)
23126+{
23127+ struct nsproxy *proxy_vxi, *proxy_cur, *proxy_new;
23128+ struct fs_struct *fs_vxi, *fs;
23129+ struct _vx_space *space;
23130+ int ret, kill = 0;
23131+
23132+ vxdprintk(VXD_CBIT(space, 8), "vx_set_space(%p[#%u],0x%08lx,%d)",
23133+ vxi, vxi->vx_id, mask, index);
23134+
23135+ if ((mask & space_mask.mask) != mask)
23136+ return -EINVAL;
23137+
23138+ if (index >= VX_SPACES)
23139+ return -EINVAL;
23140+
23141+ space = &vxi->space[index];
23142+
23143+ proxy_vxi = space->vx_nsproxy;
23144+ fs_vxi = space->vx_fs;
23145+
23146+ if (mask & CLONE_FS) {
23147+ fs = copy_fs_struct(current->fs);
23148+ if (!fs)
23149+ return -ENOMEM;
23150+ }
23151+
23152+ task_lock(current);
23153+
23154+ if (mask & CLONE_FS) {
23155+ spin_lock(&fs_vxi->lock);
23156+ space->vx_fs = fs;
23157+ kill = !--fs_vxi->users;
23158+ spin_unlock(&fs_vxi->lock);
23159+ }
23160+
23161+ proxy_cur = current->nsproxy;
23162+ get_nsproxy(proxy_cur);
23163+ task_unlock(current);
23164+
23165+ if (kill)
23166+ free_fs_struct(fs_vxi);
23167+
23168+ proxy_new = __vs_merge_nsproxy(proxy_vxi, proxy_cur, mask);
23169+ if (IS_ERR(proxy_new)) {
23170+ ret = PTR_ERR(proxy_new);
23171+ goto out_put;
23172+ }
23173+
23174+ proxy_new = xchg(&space->vx_nsproxy, proxy_new);
23175+ space->vx_nsmask |= mask;
23176+
23177+ if (mask & CLONE_NEWUSER) {
23178+ struct cred *cred;
23179+
23180+ vxdprintk(VXD_CBIT(space, 10),
23181+ "vx_set_space(%p[#%u],%p) cred (%p,%p)",
23182+ vxi, vxi->vx_id, space->vx_cred,
23183+ current->real_cred, current->cred);
23184+
23185+ cred = prepare_creds();
23186+ cred = (struct cred *)xchg(&space->vx_cred, cred);
23187+ if (cred)
23188+ abort_creds(cred);
23189+ }
23190+
23191+ ret = 0;
23192+
23193+ if (proxy_new)
23194+ put_nsproxy(proxy_new);
23195+out_put:
23196+ if (proxy_cur)
23197+ put_nsproxy(proxy_cur);
23198+ return ret;
23199+}
23200+
23201+
23202+int vc_enter_space_v1(struct vx_info *vxi, void __user *data)
23203+{
23204+ struct vcmd_space_mask_v1 vc_data = { .mask = 0 };
23205+
23206+ if (data && copy_from_user(&vc_data, data, sizeof(vc_data)))
23207+ return -EFAULT;
23208+
23209+ return vx_enter_space(vxi, vc_data.mask, 0);
23210+}
23211+
23212+int vc_enter_space(struct vx_info *vxi, void __user *data)
23213+{
23214+ struct vcmd_space_mask_v2 vc_data = { .mask = 0 };
23215+
23216+ if (data && copy_from_user(&vc_data, data, sizeof(vc_data)))
23217+ return -EFAULT;
23218+
23219+ if (vc_data.index >= VX_SPACES)
23220+ return -EINVAL;
23221+
23222+ return vx_enter_space(vxi, vc_data.mask, vc_data.index);
23223+}
23224+
23225+int vc_set_space_v1(struct vx_info *vxi, void __user *data)
23226+{
23227+ struct vcmd_space_mask_v1 vc_data = { .mask = 0 };
23228+
23229+ if (data && copy_from_user(&vc_data, data, sizeof(vc_data)))
23230+ return -EFAULT;
23231+
23232+ return vx_set_space(vxi, vc_data.mask, 0);
23233+}
23234+
23235+int vc_set_space(struct vx_info *vxi, void __user *data)
23236+{
23237+ struct vcmd_space_mask_v2 vc_data = { .mask = 0 };
23238+
23239+ if (data && copy_from_user(&vc_data, data, sizeof(vc_data)))
23240+ return -EFAULT;
23241+
23242+ if (vc_data.index >= VX_SPACES)
23243+ return -EINVAL;
23244+
23245+ return vx_set_space(vxi, vc_data.mask, vc_data.index);
23246+}
23247+
23248+int vc_get_space_mask(void __user *data, int type)
23249+{
23250+ const struct vcmd_space_mask_v1 *mask;
23251+
23252+ if (type == 0)
23253+ mask = &space_mask_v0;
23254+ else if (type == 1)
23255+ mask = &space_mask;
23256+ else
23257+ mask = &default_space_mask;
23258+
23259+ vxdprintk(VXD_CBIT(space, 10),
23260+ "vc_get_space_mask(%d) = %08llx", type, mask->mask);
23261+
23262+ if (copy_to_user(data, mask, sizeof(*mask)))
23263+ return -EFAULT;
23264+ return 0;
23265+}
23266+
23267diff -NurpP --minimal linux-3.0.9/kernel/vserver/switch.c linux-3.0.9-vs2.3.2.1/kernel/vserver/switch.c
23268--- linux-3.0.9/kernel/vserver/switch.c 1970-01-01 01:00:00.000000000 +0100
23269+++ linux-3.0.9-vs2.3.2.1/kernel/vserver/switch.c 2011-08-01 18:28:26.000000000 +0200
23270@@ -0,0 +1,556 @@
23271+/*
23272+ * linux/kernel/vserver/switch.c
23273+ *
23274+ * Virtual Server: Syscall Switch
23275+ *
23276+ * Copyright (C) 2003-2011 Herbert Pötzl
23277+ *
23278+ * V0.01 syscall switch
23279+ * V0.02 added signal to context
23280+ * V0.03 added rlimit functions
23281+ * V0.04 added iattr, task/xid functions
23282+ * V0.05 added debug/history stuff
23283+ * V0.06 added compat32 layer
23284+ * V0.07 vcmd args and perms
23285+ * V0.08 added status commands
23286+ * V0.09 added tag commands
23287+ * V0.10 added oom bias
23288+ * V0.11 added device commands
23289+ * V0.12 added warn mask
23290+ *
23291+ */
23292+
23293+#include <linux/vs_context.h>
23294+#include <linux/vs_network.h>
23295+#include <linux/vserver/switch.h>
23296+
23297+#include "vci_config.h"
23298+
23299+
23300+static inline
23301+int vc_get_version(uint32_t id)
23302+{
23303+ return VCI_VERSION;
23304+}
23305+
23306+static inline
23307+int vc_get_vci(uint32_t id)
23308+{
23309+ return vci_kernel_config();
23310+}
23311+
23312+#include <linux/vserver/context_cmd.h>
23313+#include <linux/vserver/cvirt_cmd.h>
23314+#include <linux/vserver/cacct_cmd.h>
23315+#include <linux/vserver/limit_cmd.h>
23316+#include <linux/vserver/network_cmd.h>
23317+#include <linux/vserver/sched_cmd.h>
23318+#include <linux/vserver/debug_cmd.h>
23319+#include <linux/vserver/inode_cmd.h>
23320+#include <linux/vserver/dlimit_cmd.h>
23321+#include <linux/vserver/signal_cmd.h>
23322+#include <linux/vserver/space_cmd.h>
23323+#include <linux/vserver/tag_cmd.h>
23324+#include <linux/vserver/device_cmd.h>
23325+
23326+#include <linux/vserver/inode.h>
23327+#include <linux/vserver/dlimit.h>
23328+
23329+
23330+#ifdef CONFIG_COMPAT
23331+#define __COMPAT(name, id, data, compat) \
23332+ (compat) ? name ## _x32(id, data) : name(id, data)
23333+#define __COMPAT_NO_ID(name, data, compat) \
23334+ (compat) ? name ## _x32(data) : name(data)
23335+#else
23336+#define __COMPAT(name, id, data, compat) \
23337+ name(id, data)
23338+#define __COMPAT_NO_ID(name, data, compat) \
23339+ name(data)
23340+#endif
23341+
23342+
23343+static inline
23344+long do_vcmd(uint32_t cmd, uint32_t id,
23345+ struct vx_info *vxi, struct nx_info *nxi,
23346+ void __user *data, int compat)
23347+{
23348+ switch (cmd) {
23349+
23350+ case VCMD_get_version:
23351+ return vc_get_version(id);
23352+ case VCMD_get_vci:
23353+ return vc_get_vci(id);
23354+
23355+ case VCMD_task_xid:
23356+ return vc_task_xid(id);
23357+ case VCMD_vx_info:
23358+ return vc_vx_info(vxi, data);
23359+
23360+ case VCMD_task_nid:
23361+ return vc_task_nid(id);
23362+ case VCMD_nx_info:
23363+ return vc_nx_info(nxi, data);
23364+
23365+ case VCMD_task_tag:
23366+ return vc_task_tag(id);
23367+
23368+ case VCMD_set_space_v1:
23369+ return vc_set_space_v1(vxi, data);
23370+ /* this is version 2 */
23371+ case VCMD_set_space:
23372+ return vc_set_space(vxi, data);
23373+
23374+ case VCMD_get_space_mask_v0:
23375+ return vc_get_space_mask(data, 0);
23376+ /* this is version 1 */
23377+ case VCMD_get_space_mask:
23378+ return vc_get_space_mask(data, 1);
23379+
23380+ case VCMD_get_space_default:
23381+ return vc_get_space_mask(data, -1);
23382+
23383+ case VCMD_set_umask:
23384+ return vc_set_umask(vxi, data);
23385+
23386+ case VCMD_get_umask:
23387+ return vc_get_umask(vxi, data);
23388+
23389+ case VCMD_set_wmask:
23390+ return vc_set_wmask(vxi, data);
23391+
23392+ case VCMD_get_wmask:
23393+ return vc_get_wmask(vxi, data);
23394+#ifdef CONFIG_IA32_EMULATION
23395+ case VCMD_get_rlimit:
23396+ return __COMPAT(vc_get_rlimit, vxi, data, compat);
23397+ case VCMD_set_rlimit:
23398+ return __COMPAT(vc_set_rlimit, vxi, data, compat);
23399+#else
23400+ case VCMD_get_rlimit:
23401+ return vc_get_rlimit(vxi, data);
23402+ case VCMD_set_rlimit:
23403+ return vc_set_rlimit(vxi, data);
23404+#endif
23405+ case VCMD_get_rlimit_mask:
23406+ return vc_get_rlimit_mask(id, data);
23407+ case VCMD_reset_hits:
23408+ return vc_reset_hits(vxi, data);
23409+ case VCMD_reset_minmax:
23410+ return vc_reset_minmax(vxi, data);
23411+
23412+ case VCMD_get_vhi_name:
23413+ return vc_get_vhi_name(vxi, data);
23414+ case VCMD_set_vhi_name:
23415+ return vc_set_vhi_name(vxi, data);
23416+
23417+ case VCMD_ctx_stat:
23418+ return vc_ctx_stat(vxi, data);
23419+ case VCMD_virt_stat:
23420+ return vc_virt_stat(vxi, data);
23421+ case VCMD_sock_stat:
23422+ return vc_sock_stat(vxi, data);
23423+ case VCMD_rlimit_stat:
23424+ return vc_rlimit_stat(vxi, data);
23425+
23426+ case VCMD_set_cflags:
23427+ return vc_set_cflags(vxi, data);
23428+ case VCMD_get_cflags:
23429+ return vc_get_cflags(vxi, data);
23430+
23431+ /* this is version 1 */
23432+ case VCMD_set_ccaps:
23433+ return vc_set_ccaps(vxi, data);
23434+ /* this is version 1 */
23435+ case VCMD_get_ccaps:
23436+ return vc_get_ccaps(vxi, data);
23437+ case VCMD_set_bcaps:
23438+ return vc_set_bcaps(vxi, data);
23439+ case VCMD_get_bcaps:
23440+ return vc_get_bcaps(vxi, data);
23441+
23442+ case VCMD_set_badness:
23443+ return vc_set_badness(vxi, data);
23444+ case VCMD_get_badness:
23445+ return vc_get_badness(vxi, data);
23446+
23447+ case VCMD_set_nflags:
23448+ return vc_set_nflags(nxi, data);
23449+ case VCMD_get_nflags:
23450+ return vc_get_nflags(nxi, data);
23451+
23452+ case VCMD_set_ncaps:
23453+ return vc_set_ncaps(nxi, data);
23454+ case VCMD_get_ncaps:
23455+ return vc_get_ncaps(nxi, data);
23456+
23457+ case VCMD_set_prio_bias:
23458+ return vc_set_prio_bias(vxi, data);
23459+ case VCMD_get_prio_bias:
23460+ return vc_get_prio_bias(vxi, data);
23461+ case VCMD_add_dlimit:
23462+ return __COMPAT(vc_add_dlimit, id, data, compat);
23463+ case VCMD_rem_dlimit:
23464+ return __COMPAT(vc_rem_dlimit, id, data, compat);
23465+ case VCMD_set_dlimit:
23466+ return __COMPAT(vc_set_dlimit, id, data, compat);
23467+ case VCMD_get_dlimit:
23468+ return __COMPAT(vc_get_dlimit, id, data, compat);
23469+
23470+ case VCMD_ctx_kill:
23471+ return vc_ctx_kill(vxi, data);
23472+
23473+ case VCMD_wait_exit:
23474+ return vc_wait_exit(vxi, data);
23475+
23476+ case VCMD_get_iattr:
23477+ return __COMPAT_NO_ID(vc_get_iattr, data, compat);
23478+ case VCMD_set_iattr:
23479+ return __COMPAT_NO_ID(vc_set_iattr, data, compat);
23480+
23481+ case VCMD_fget_iattr:
23482+ return vc_fget_iattr(id, data);
23483+ case VCMD_fset_iattr:
23484+ return vc_fset_iattr(id, data);
23485+
23486+ case VCMD_enter_space_v0:
23487+ return vc_enter_space_v1(vxi, NULL);
23488+ case VCMD_enter_space_v1:
23489+ return vc_enter_space_v1(vxi, data);
23490+ /* this is version 2 */
23491+ case VCMD_enter_space:
23492+ return vc_enter_space(vxi, data);
23493+
23494+ case VCMD_ctx_create_v0:
23495+ return vc_ctx_create(id, NULL);
23496+ case VCMD_ctx_create:
23497+ return vc_ctx_create(id, data);
23498+ case VCMD_ctx_migrate_v0:
23499+ return vc_ctx_migrate(vxi, NULL);
23500+ case VCMD_ctx_migrate:
23501+ return vc_ctx_migrate(vxi, data);
23502+
23503+ case VCMD_net_create_v0:
23504+ return vc_net_create(id, NULL);
23505+ case VCMD_net_create:
23506+ return vc_net_create(id, data);
23507+ case VCMD_net_migrate:
23508+ return vc_net_migrate(nxi, data);
23509+
23510+ case VCMD_tag_migrate:
23511+ return vc_tag_migrate(id);
23512+
23513+ case VCMD_net_add:
23514+ return vc_net_add(nxi, data);
23515+ case VCMD_net_remove:
23516+ return vc_net_remove(nxi, data);
23517+
23518+ case VCMD_net_add_ipv4_v1:
23519+ return vc_net_add_ipv4_v1(nxi, data);
23520+ /* this is version 2 */
23521+ case VCMD_net_add_ipv4:
23522+ return vc_net_add_ipv4(nxi, data);
23523+
23524+ case VCMD_net_rem_ipv4_v1:
23525+ return vc_net_rem_ipv4_v1(nxi, data);
23526+ /* this is version 2 */
23527+ case VCMD_net_rem_ipv4:
23528+ return vc_net_rem_ipv4(nxi, data);
23529+#ifdef CONFIG_IPV6
23530+ case VCMD_net_add_ipv6:
23531+ return vc_net_add_ipv6(nxi, data);
23532+ case VCMD_net_remove_ipv6:
23533+ return vc_net_remove_ipv6(nxi, data);
23534+#endif
23535+/* case VCMD_add_match_ipv4:
23536+ return vc_add_match_ipv4(nxi, data);
23537+ case VCMD_get_match_ipv4:
23538+ return vc_get_match_ipv4(nxi, data);
23539+#ifdef CONFIG_IPV6
23540+ case VCMD_add_match_ipv6:
23541+ return vc_add_match_ipv6(nxi, data);
23542+ case VCMD_get_match_ipv6:
23543+ return vc_get_match_ipv6(nxi, data);
23544+#endif */
23545+
23546+#ifdef CONFIG_VSERVER_DEVICE
23547+ case VCMD_set_mapping:
23548+ return __COMPAT(vc_set_mapping, vxi, data, compat);
23549+ case VCMD_unset_mapping:
23550+ return __COMPAT(vc_unset_mapping, vxi, data, compat);
23551+#endif
23552+#ifdef CONFIG_VSERVER_HISTORY
23553+ case VCMD_dump_history:
23554+ return vc_dump_history(id);
23555+ case VCMD_read_history:
23556+ return __COMPAT(vc_read_history, id, data, compat);
23557+#endif
23558+ default:
23559+ vxwprintk_task(1, "unimplemented VCMD_%02d_%d[%d]",
23560+ VC_CATEGORY(cmd), VC_COMMAND(cmd), VC_VERSION(cmd));
23561+ }
23562+ return -ENOSYS;
23563+}
23564+
23565+
23566+#define __VCMD(vcmd, _perm, _args, _flags) \
23567+ case VCMD_ ## vcmd: perm = _perm; \
23568+ args = _args; flags = _flags; break
23569+
23570+
23571+#define VCA_NONE 0x00
23572+#define VCA_VXI 0x01
23573+#define VCA_NXI 0x02
23574+
23575+#define VCF_NONE 0x00
23576+#define VCF_INFO 0x01
23577+#define VCF_ADMIN 0x02
23578+#define VCF_ARES 0x06 /* includes admin */
23579+#define VCF_SETUP 0x08
23580+
23581+#define VCF_ZIDOK 0x10 /* zero id okay */
23582+
23583+
23584+static inline
23585+long do_vserver(uint32_t cmd, uint32_t id, void __user *data, int compat)
23586+{
23587+ long ret;
23588+ int permit = -1, state = 0;
23589+ int perm = -1, args = 0, flags = 0;
23590+ struct vx_info *vxi = NULL;
23591+ struct nx_info *nxi = NULL;
23592+
23593+ switch (cmd) {
23594+ /* unpriviledged commands */
23595+ __VCMD(get_version, 0, VCA_NONE, 0);
23596+ __VCMD(get_vci, 0, VCA_NONE, 0);
23597+ __VCMD(get_rlimit_mask, 0, VCA_NONE, 0);
23598+ __VCMD(get_space_mask_v0,0, VCA_NONE, 0);
23599+ __VCMD(get_space_mask, 0, VCA_NONE, 0);
23600+ __VCMD(get_space_default,0, VCA_NONE, 0);
23601+
23602+ /* info commands */
23603+ __VCMD(task_xid, 2, VCA_NONE, 0);
23604+ __VCMD(reset_hits, 2, VCA_VXI, 0);
23605+ __VCMD(reset_minmax, 2, VCA_VXI, 0);
23606+ __VCMD(vx_info, 3, VCA_VXI, VCF_INFO);
23607+ __VCMD(get_bcaps, 3, VCA_VXI, VCF_INFO);
23608+ __VCMD(get_ccaps, 3, VCA_VXI, VCF_INFO);
23609+ __VCMD(get_cflags, 3, VCA_VXI, VCF_INFO);
23610+ __VCMD(get_umask, 3, VCA_VXI, VCF_INFO);
23611+ __VCMD(get_wmask, 3, VCA_VXI, VCF_INFO);
23612+ __VCMD(get_badness, 3, VCA_VXI, VCF_INFO);
23613+ __VCMD(get_vhi_name, 3, VCA_VXI, VCF_INFO);
23614+ __VCMD(get_rlimit, 3, VCA_VXI, VCF_INFO);
23615+
23616+ __VCMD(ctx_stat, 3, VCA_VXI, VCF_INFO);
23617+ __VCMD(virt_stat, 3, VCA_VXI, VCF_INFO);
23618+ __VCMD(sock_stat, 3, VCA_VXI, VCF_INFO);
23619+ __VCMD(rlimit_stat, 3, VCA_VXI, VCF_INFO);
23620+
23621+ __VCMD(task_nid, 2, VCA_NONE, 0);
23622+ __VCMD(nx_info, 3, VCA_NXI, VCF_INFO);
23623+ __VCMD(get_ncaps, 3, VCA_NXI, VCF_INFO);
23624+ __VCMD(get_nflags, 3, VCA_NXI, VCF_INFO);
23625+
23626+ __VCMD(task_tag, 2, VCA_NONE, 0);
23627+
23628+ __VCMD(get_iattr, 2, VCA_NONE, 0);
23629+ __VCMD(fget_iattr, 2, VCA_NONE, 0);
23630+ __VCMD(get_dlimit, 3, VCA_NONE, VCF_INFO);
23631+ __VCMD(get_prio_bias, 3, VCA_VXI, VCF_INFO);
23632+
23633+ /* lower admin commands */
23634+ __VCMD(wait_exit, 4, VCA_VXI, VCF_INFO);
23635+ __VCMD(ctx_create_v0, 5, VCA_NONE, 0);
23636+ __VCMD(ctx_create, 5, VCA_NONE, 0);
23637+ __VCMD(ctx_migrate_v0, 5, VCA_VXI, VCF_ADMIN);
23638+ __VCMD(ctx_migrate, 5, VCA_VXI, VCF_ADMIN);
23639+ __VCMD(enter_space_v0, 5, VCA_VXI, VCF_ADMIN);
23640+ __VCMD(enter_space_v1, 5, VCA_VXI, VCF_ADMIN);
23641+ __VCMD(enter_space, 5, VCA_VXI, VCF_ADMIN);
23642+
23643+ __VCMD(net_create_v0, 5, VCA_NONE, 0);
23644+ __VCMD(net_create, 5, VCA_NONE, 0);
23645+ __VCMD(net_migrate, 5, VCA_NXI, VCF_ADMIN);
23646+
23647+ __VCMD(tag_migrate, 5, VCA_NONE, VCF_ADMIN);
23648+
23649+ /* higher admin commands */
23650+ __VCMD(ctx_kill, 6, VCA_VXI, VCF_ARES);
23651+ __VCMD(set_space_v1, 7, VCA_VXI, VCF_ARES | VCF_SETUP);
23652+ __VCMD(set_space, 7, VCA_VXI, VCF_ARES | VCF_SETUP);
23653+
23654+ __VCMD(set_ccaps, 7, VCA_VXI, VCF_ARES | VCF_SETUP);
23655+ __VCMD(set_bcaps, 7, VCA_VXI, VCF_ARES | VCF_SETUP);
23656+ __VCMD(set_cflags, 7, VCA_VXI, VCF_ARES | VCF_SETUP);
23657+ __VCMD(set_umask, 7, VCA_VXI, VCF_ARES | VCF_SETUP);
23658+ __VCMD(set_wmask, 7, VCA_VXI, VCF_ARES | VCF_SETUP);
23659+ __VCMD(set_badness, 7, VCA_VXI, VCF_ARES | VCF_SETUP);
23660+
23661+ __VCMD(set_vhi_name, 7, VCA_VXI, VCF_ARES | VCF_SETUP);
23662+ __VCMD(set_rlimit, 7, VCA_VXI, VCF_ARES | VCF_SETUP);
23663+ __VCMD(set_prio_bias, 7, VCA_VXI, VCF_ARES | VCF_SETUP);
23664+
23665+ __VCMD(set_ncaps, 7, VCA_NXI, VCF_ARES | VCF_SETUP);
23666+ __VCMD(set_nflags, 7, VCA_NXI, VCF_ARES | VCF_SETUP);
23667+ __VCMD(net_add, 8, VCA_NXI, VCF_ARES | VCF_SETUP);
23668+ __VCMD(net_remove, 8, VCA_NXI, VCF_ARES | VCF_SETUP);
23669+ __VCMD(net_add_ipv4_v1, 8, VCA_NXI, VCF_ARES | VCF_SETUP);
23670+ __VCMD(net_rem_ipv4_v1, 8, VCA_NXI, VCF_ARES | VCF_SETUP);
23671+ __VCMD(net_add_ipv4, 8, VCA_NXI, VCF_ARES | VCF_SETUP);
23672+ __VCMD(net_rem_ipv4, 8, VCA_NXI, VCF_ARES | VCF_SETUP);
23673+#ifdef CONFIG_IPV6
23674+ __VCMD(net_add_ipv6, 8, VCA_NXI, VCF_ARES | VCF_SETUP);
23675+ __VCMD(net_remove_ipv6, 8, VCA_NXI, VCF_ARES | VCF_SETUP);
23676+#endif
23677+ __VCMD(set_iattr, 7, VCA_NONE, 0);
23678+ __VCMD(fset_iattr, 7, VCA_NONE, 0);
23679+ __VCMD(set_dlimit, 7, VCA_NONE, VCF_ARES);
23680+ __VCMD(add_dlimit, 8, VCA_NONE, VCF_ARES);
23681+ __VCMD(rem_dlimit, 8, VCA_NONE, VCF_ARES);
23682+
23683+#ifdef CONFIG_VSERVER_DEVICE
23684+ __VCMD(set_mapping, 8, VCA_VXI, VCF_ARES|VCF_ZIDOK);
23685+ __VCMD(unset_mapping, 8, VCA_VXI, VCF_ARES|VCF_ZIDOK);
23686+#endif
23687+ /* debug level admin commands */
23688+#ifdef CONFIG_VSERVER_HISTORY
23689+ __VCMD(dump_history, 9, VCA_NONE, 0);
23690+ __VCMD(read_history, 9, VCA_NONE, 0);
23691+#endif
23692+
23693+ default:
23694+ perm = -1;
23695+ }
23696+
23697+ vxdprintk(VXD_CBIT(switch, 0),
23698+ "vc: VCMD_%02d_%d[%d], %d,%p [%d,%d,%x,%x]",
23699+ VC_CATEGORY(cmd), VC_COMMAND(cmd),
23700+ VC_VERSION(cmd), id, data, compat,
23701+ perm, args, flags);
23702+
23703+ ret = -ENOSYS;
23704+ if (perm < 0)
23705+ goto out;
23706+
23707+ state = 1;
23708+ if (!capable(CAP_CONTEXT))
23709+ goto out;
23710+
23711+ state = 2;
23712+ /* moved here from the individual commands */
23713+ ret = -EPERM;
23714+ if ((perm > 1) && !capable(CAP_SYS_ADMIN))
23715+ goto out;
23716+
23717+ state = 3;
23718+ /* vcmd involves resource management */
23719+ ret = -EPERM;
23720+ if ((flags & VCF_ARES) && !capable(CAP_SYS_RESOURCE))
23721+ goto out;
23722+
23723+ state = 4;
23724+ /* various legacy exceptions */
23725+ switch (cmd) {
23726+ /* will go away when spectator is a cap */
23727+ case VCMD_ctx_migrate_v0:
23728+ case VCMD_ctx_migrate:
23729+ if (id == 1) {
23730+ current->xid = 1;
23731+ ret = 1;
23732+ goto out;
23733+ }
23734+ break;
23735+
23736+ /* will go away when spectator is a cap */
23737+ case VCMD_net_migrate:
23738+ if (id == 1) {
23739+ current->nid = 1;
23740+ ret = 1;
23741+ goto out;
23742+ }
23743+ break;
23744+ }
23745+
23746+ /* vcmds are fine by default */
23747+ permit = 1;
23748+
23749+ /* admin type vcmds require admin ... */
23750+ if (flags & VCF_ADMIN)
23751+ permit = vx_check(0, VS_ADMIN) ? 1 : 0;
23752+
23753+ /* ... but setup type vcmds override that */
23754+ if (!permit && (flags & VCF_SETUP))
23755+ permit = vx_flags(VXF_STATE_SETUP, 0) ? 2 : 0;
23756+
23757+ state = 5;
23758+ ret = -EPERM;
23759+ if (!permit)
23760+ goto out;
23761+
23762+ state = 6;
23763+ if (!id && (flags & VCF_ZIDOK))
23764+ goto skip_id;
23765+
23766+ ret = -ESRCH;
23767+ if (args & VCA_VXI) {
23768+ vxi = lookup_vx_info(id);
23769+ if (!vxi)
23770+ goto out;
23771+
23772+ if ((flags & VCF_ADMIN) &&
23773+ /* special case kill for shutdown */
23774+ (cmd != VCMD_ctx_kill) &&
23775+ /* can context be administrated? */
23776+ !vx_info_flags(vxi, VXF_STATE_ADMIN, 0)) {
23777+ ret = -EACCES;
23778+ goto out_vxi;
23779+ }
23780+ }
23781+ state = 7;
23782+ if (args & VCA_NXI) {
23783+ nxi = lookup_nx_info(id);
23784+ if (!nxi)
23785+ goto out_vxi;
23786+
23787+ if ((flags & VCF_ADMIN) &&
23788+ /* can context be administrated? */
23789+ !nx_info_flags(nxi, NXF_STATE_ADMIN, 0)) {
23790+ ret = -EACCES;
23791+ goto out_nxi;
23792+ }
23793+ }
23794+skip_id:
23795+ state = 8;
23796+ ret = do_vcmd(cmd, id, vxi, nxi, data, compat);
23797+
23798+out_nxi:
23799+ if ((args & VCA_NXI) && nxi)
23800+ put_nx_info(nxi);
23801+out_vxi:
23802+ if ((args & VCA_VXI) && vxi)
23803+ put_vx_info(vxi);
23804+out:
23805+ vxdprintk(VXD_CBIT(switch, 1),
23806+ "vc: VCMD_%02d_%d[%d] = %08lx(%ld) [%d,%d]",
23807+ VC_CATEGORY(cmd), VC_COMMAND(cmd),
23808+ VC_VERSION(cmd), ret, ret, state, permit);
23809+ return ret;
23810+}
23811+
23812+asmlinkage long
23813+sys_vserver(uint32_t cmd, uint32_t id, void __user *data)
23814+{
23815+ return do_vserver(cmd, id, data, 0);
23816+}
23817+
23818+#ifdef CONFIG_COMPAT
23819+
23820+asmlinkage long
23821+sys32_vserver(uint32_t cmd, uint32_t id, void __user *data)
23822+{
23823+ return do_vserver(cmd, id, data, 1);
23824+}
23825+
23826+#endif /* CONFIG_COMPAT */
23827diff -NurpP --minimal linux-3.0.9/kernel/vserver/sysctl.c linux-3.0.9-vs2.3.2.1/kernel/vserver/sysctl.c
23828--- linux-3.0.9/kernel/vserver/sysctl.c 1970-01-01 01:00:00.000000000 +0100
23829+++ linux-3.0.9-vs2.3.2.1/kernel/vserver/sysctl.c 2011-10-27 15:18:40.000000000 +0200
23830@@ -0,0 +1,247 @@
23831+/*
23832+ * kernel/vserver/sysctl.c
23833+ *
23834+ * Virtual Context Support
23835+ *
23836+ * Copyright (C) 2004-2007 Herbert Pötzl
23837+ *
23838+ * V0.01 basic structure
23839+ *
23840+ */
23841+
23842+#include <linux/module.h>
23843+#include <linux/ctype.h>
23844+#include <linux/sysctl.h>
23845+#include <linux/parser.h>
23846+#include <asm/uaccess.h>
23847+
23848+enum {
23849+ CTL_DEBUG_ERROR = 0,
23850+ CTL_DEBUG_SWITCH = 1,
23851+ CTL_DEBUG_XID,
23852+ CTL_DEBUG_NID,
23853+ CTL_DEBUG_TAG,
23854+ CTL_DEBUG_NET,
23855+ CTL_DEBUG_LIMIT,
23856+ CTL_DEBUG_CRES,
23857+ CTL_DEBUG_DLIM,
23858+ CTL_DEBUG_QUOTA,
23859+ CTL_DEBUG_CVIRT,
23860+ CTL_DEBUG_SPACE,
23861+ CTL_DEBUG_PERM,
23862+ CTL_DEBUG_MISC,
23863+};
23864+
23865+
23866+unsigned int vs_debug_switch = 0;
23867+unsigned int vs_debug_xid = 0;
23868+unsigned int vs_debug_nid = 0;
23869+unsigned int vs_debug_tag = 0;
23870+unsigned int vs_debug_net = 0;
23871+unsigned int vs_debug_limit = 0;
23872+unsigned int vs_debug_cres = 0;
23873+unsigned int vs_debug_dlim = 0;
23874+unsigned int vs_debug_quota = 0;
23875+unsigned int vs_debug_cvirt = 0;
23876+unsigned int vs_debug_space = 0;
23877+unsigned int vs_debug_perm = 0;
23878+unsigned int vs_debug_misc = 0;
23879+
23880+
23881+static struct ctl_table_header *vserver_table_header;
23882+static ctl_table vserver_root_table[];
23883+
23884+
23885+void vserver_register_sysctl(void)
23886+{
23887+ if (!vserver_table_header) {
23888+ vserver_table_header = register_sysctl_table(vserver_root_table);
23889+ }
23890+
23891+}
23892+
23893+void vserver_unregister_sysctl(void)
23894+{
23895+ if (vserver_table_header) {
23896+ unregister_sysctl_table(vserver_table_header);
23897+ vserver_table_header = NULL;
23898+ }
23899+}
23900+
23901+
23902+static int proc_dodebug(ctl_table *table, int write,
23903+ void __user *buffer, size_t *lenp, loff_t *ppos)
23904+{
23905+ char tmpbuf[20], *p, c;
23906+ unsigned int value;
23907+ size_t left, len;
23908+
23909+ if ((*ppos && !write) || !*lenp) {
23910+ *lenp = 0;
23911+ return 0;
23912+ }
23913+
23914+ left = *lenp;
23915+
23916+ if (write) {
23917+ if (!access_ok(VERIFY_READ, buffer, left))
23918+ return -EFAULT;
23919+ p = (char *)buffer;
23920+ while (left && __get_user(c, p) >= 0 && isspace(c))
23921+ left--, p++;
23922+ if (!left)
23923+ goto done;
23924+
23925+ if (left > sizeof(tmpbuf) - 1)
23926+ return -EINVAL;
23927+ if (copy_from_user(tmpbuf, p, left))
23928+ return -EFAULT;
23929+ tmpbuf[left] = '\0';
23930+
23931+ for (p = tmpbuf, value = 0; '0' <= *p && *p <= '9'; p++, left--)
23932+ value = 10 * value + (*p - '0');
23933+ if (*p && !isspace(*p))
23934+ return -EINVAL;
23935+ while (left && isspace(*p))
23936+ left--, p++;
23937+ *(unsigned int *)table->data = value;
23938+ } else {
23939+ if (!access_ok(VERIFY_WRITE, buffer, left))
23940+ return -EFAULT;
23941+ len = sprintf(tmpbuf, "%d", *(unsigned int *)table->data);
23942+ if (len > left)
23943+ len = left;
23944+ if (__copy_to_user(buffer, tmpbuf, len))
23945+ return -EFAULT;
23946+ if ((left -= len) > 0) {
23947+ if (put_user('\n', (char *)buffer + len))
23948+ return -EFAULT;
23949+ left--;
23950+ }
23951+ }
23952+
23953+done:
23954+ *lenp -= left;
23955+ *ppos += *lenp;
23956+ return 0;
23957+}
23958+
23959+static int zero;
23960+
23961+#define CTL_ENTRY(ctl, name) \
23962+ { \
23963+ .procname = #name, \
23964+ .data = &vs_ ## name, \
23965+ .maxlen = sizeof(int), \
23966+ .mode = 0644, \
23967+ .proc_handler = &proc_dodebug, \
23968+ .extra1 = &zero, \
23969+ .extra2 = &zero, \
23970+ }
23971+
23972+static ctl_table vserver_debug_table[] = {
23973+ CTL_ENTRY(CTL_DEBUG_SWITCH, debug_switch),
23974+ CTL_ENTRY(CTL_DEBUG_XID, debug_xid),
23975+ CTL_ENTRY(CTL_DEBUG_NID, debug_nid),
23976+ CTL_ENTRY(CTL_DEBUG_TAG, debug_tag),
23977+ CTL_ENTRY(CTL_DEBUG_NET, debug_net),
23978+ CTL_ENTRY(CTL_DEBUG_LIMIT, debug_limit),
23979+ CTL_ENTRY(CTL_DEBUG_CRES, debug_cres),
23980+ CTL_ENTRY(CTL_DEBUG_DLIM, debug_dlim),
23981+ CTL_ENTRY(CTL_DEBUG_QUOTA, debug_quota),
23982+ CTL_ENTRY(CTL_DEBUG_CVIRT, debug_cvirt),
23983+ CTL_ENTRY(CTL_DEBUG_SPACE, debug_space),
23984+ CTL_ENTRY(CTL_DEBUG_PERM, debug_perm),
23985+ CTL_ENTRY(CTL_DEBUG_MISC, debug_misc),
23986+ { 0 }
23987+};
23988+
23989+static ctl_table vserver_root_table[] = {
23990+ {
23991+ .procname = "vserver",
23992+ .mode = 0555,
23993+ .child = vserver_debug_table
23994+ },
23995+ { 0 }
23996+};
23997+
23998+
23999+static match_table_t tokens = {
24000+ { CTL_DEBUG_SWITCH, "switch=%x" },
24001+ { CTL_DEBUG_XID, "xid=%x" },
24002+ { CTL_DEBUG_NID, "nid=%x" },
24003+ { CTL_DEBUG_TAG, "tag=%x" },
24004+ { CTL_DEBUG_NET, "net=%x" },
24005+ { CTL_DEBUG_LIMIT, "limit=%x" },
24006+ { CTL_DEBUG_CRES, "cres=%x" },
24007+ { CTL_DEBUG_DLIM, "dlim=%x" },
24008+ { CTL_DEBUG_QUOTA, "quota=%x" },
24009+ { CTL_DEBUG_CVIRT, "cvirt=%x" },
24010+ { CTL_DEBUG_SPACE, "space=%x" },
24011+ { CTL_DEBUG_PERM, "perm=%x" },
24012+ { CTL_DEBUG_MISC, "misc=%x" },
24013+ { CTL_DEBUG_ERROR, NULL }
24014+};
24015+
24016+#define HANDLE_CASE(id, name, val) \
24017+ case CTL_DEBUG_ ## id: \
24018+ vs_debug_ ## name = val; \
24019+ printk("vs_debug_" #name "=0x%x\n", val); \
24020+ break
24021+
24022+
24023+static int __init vs_debug_setup(char *str)
24024+{
24025+ char *p;
24026+ int token;
24027+
24028+ printk("vs_debug_setup(%s)\n", str);
24029+ while ((p = strsep(&str, ",")) != NULL) {
24030+ substring_t args[MAX_OPT_ARGS];
24031+ unsigned int value;
24032+
24033+ if (!*p)
24034+ continue;
24035+
24036+ token = match_token(p, tokens, args);
24037+ value = (token > 0) ? simple_strtoul(args[0].from, NULL, 0) : 0;
24038+
24039+ switch (token) {
24040+ HANDLE_CASE(SWITCH, switch, value);
24041+ HANDLE_CASE(XID, xid, value);
24042+ HANDLE_CASE(NID, nid, value);
24043+ HANDLE_CASE(TAG, tag, value);
24044+ HANDLE_CASE(NET, net, value);
24045+ HANDLE_CASE(LIMIT, limit, value);
24046+ HANDLE_CASE(CRES, cres, value);
24047+ HANDLE_CASE(DLIM, dlim, value);
24048+ HANDLE_CASE(QUOTA, quota, value);
24049+ HANDLE_CASE(CVIRT, cvirt, value);
24050+ HANDLE_CASE(SPACE, space, value);
24051+ HANDLE_CASE(PERM, perm, value);
24052+ HANDLE_CASE(MISC, misc, value);
24053+ default:
24054+ return -EINVAL;
24055+ break;
24056+ }
24057+ }
24058+ return 1;
24059+}
24060+
24061+__setup("vsdebug=", vs_debug_setup);
24062+
24063+
24064+
24065+EXPORT_SYMBOL_GPL(vs_debug_switch);
24066+EXPORT_SYMBOL_GPL(vs_debug_xid);
24067+EXPORT_SYMBOL_GPL(vs_debug_nid);
24068+EXPORT_SYMBOL_GPL(vs_debug_net);
24069+EXPORT_SYMBOL_GPL(vs_debug_limit);
24070+EXPORT_SYMBOL_GPL(vs_debug_cres);
24071+EXPORT_SYMBOL_GPL(vs_debug_dlim);
24072+EXPORT_SYMBOL_GPL(vs_debug_quota);
24073+EXPORT_SYMBOL_GPL(vs_debug_cvirt);
24074+EXPORT_SYMBOL_GPL(vs_debug_space);
24075+EXPORT_SYMBOL_GPL(vs_debug_perm);
24076+EXPORT_SYMBOL_GPL(vs_debug_misc);
24077+
24078diff -NurpP --minimal linux-3.0.9/kernel/vserver/tag.c linux-3.0.9-vs2.3.2.1/kernel/vserver/tag.c
24079--- linux-3.0.9/kernel/vserver/tag.c 1970-01-01 01:00:00.000000000 +0100
24080+++ linux-3.0.9-vs2.3.2.1/kernel/vserver/tag.c 2011-06-10 22:11:24.000000000 +0200
24081@@ -0,0 +1,63 @@
24082+/*
24083+ * linux/kernel/vserver/tag.c
24084+ *
24085+ * Virtual Server: Shallow Tag Space
24086+ *
24087+ * Copyright (C) 2007 Herbert Pötzl
24088+ *
24089+ * V0.01 basic implementation
24090+ *
24091+ */
24092+
24093+#include <linux/sched.h>
24094+#include <linux/vserver/debug.h>
24095+#include <linux/vs_pid.h>
24096+#include <linux/vs_tag.h>
24097+
24098+#include <linux/vserver/tag_cmd.h>
24099+
24100+
24101+int dx_migrate_task(struct task_struct *p, tag_t tag)
24102+{
24103+ if (!p)
24104+ BUG();
24105+
24106+ vxdprintk(VXD_CBIT(tag, 5),
24107+ "dx_migrate_task(%p[#%d],#%d)", p, p->tag, tag);
24108+
24109+ task_lock(p);
24110+ p->tag = tag;
24111+ task_unlock(p);
24112+
24113+ vxdprintk(VXD_CBIT(tag, 5),
24114+ "moved task %p into [#%d]", p, tag);
24115+ return 0;
24116+}
24117+
24118+/* vserver syscall commands below here */
24119+
24120+/* taks xid and vx_info functions */
24121+
24122+
24123+int vc_task_tag(uint32_t id)
24124+{
24125+ tag_t tag;
24126+
24127+ if (id) {
24128+ struct task_struct *tsk;
24129+ rcu_read_lock();
24130+ tsk = find_task_by_real_pid(id);
24131+ tag = (tsk) ? tsk->tag : -ESRCH;
24132+ rcu_read_unlock();
24133+ } else
24134+ tag = dx_current_tag();
24135+ return tag;
24136+}
24137+
24138+
24139+int vc_tag_migrate(uint32_t tag)
24140+{
24141+ return dx_migrate_task(current, tag & 0xFFFF);
24142+}
24143+
24144+
24145diff -NurpP --minimal linux-3.0.9/kernel/vserver/vci_config.h linux-3.0.9-vs2.3.2.1/kernel/vserver/vci_config.h
24146--- linux-3.0.9/kernel/vserver/vci_config.h 1970-01-01 01:00:00.000000000 +0100
24147+++ linux-3.0.9-vs2.3.2.1/kernel/vserver/vci_config.h 2011-06-10 22:11:24.000000000 +0200
24148@@ -0,0 +1,76 @@
24149+
24150+/* interface version */
24151+
24152+#define VCI_VERSION 0x00020308
24153+
24154+
24155+enum {
24156+ VCI_KCBIT_NO_DYNAMIC = 0,
24157+
24158+ VCI_KCBIT_PROC_SECURE = 4,
24159+ /* VCI_KCBIT_HARDCPU = 5, */
24160+ /* VCI_KCBIT_IDLELIMIT = 6, */
24161+ /* VCI_KCBIT_IDLETIME = 7, */
24162+
24163+ VCI_KCBIT_COWBL = 8,
24164+ VCI_KCBIT_FULLCOWBL = 9,
24165+ VCI_KCBIT_SPACES = 10,
24166+ VCI_KCBIT_NETV2 = 11,
24167+ VCI_KCBIT_MEMCG = 12,
24168+
24169+ VCI_KCBIT_DEBUG = 16,
24170+ VCI_KCBIT_HISTORY = 20,
24171+ VCI_KCBIT_TAGGED = 24,
24172+ VCI_KCBIT_PPTAG = 28,
24173+
24174+ VCI_KCBIT_MORE = 31,
24175+};
24176+
24177+
24178+static inline uint32_t vci_kernel_config(void)
24179+{
24180+ return
24181+ (1 << VCI_KCBIT_NO_DYNAMIC) |
24182+
24183+ /* configured features */
24184+#ifdef CONFIG_VSERVER_PROC_SECURE
24185+ (1 << VCI_KCBIT_PROC_SECURE) |
24186+#endif
24187+#ifdef CONFIG_VSERVER_COWBL
24188+ (1 << VCI_KCBIT_COWBL) |
24189+ (1 << VCI_KCBIT_FULLCOWBL) |
24190+#endif
24191+ (1 << VCI_KCBIT_SPACES) |
24192+ (1 << VCI_KCBIT_NETV2) |
24193+#ifdef CONFIG_CGROUP_MEM_RES_CTLR
24194+ (1 << VCI_KCBIT_MEMCG) |
24195+#endif
24196+
24197+ /* debug options */
24198+#ifdef CONFIG_VSERVER_DEBUG
24199+ (1 << VCI_KCBIT_DEBUG) |
24200+#endif
24201+#ifdef CONFIG_VSERVER_HISTORY
24202+ (1 << VCI_KCBIT_HISTORY) |
24203+#endif
24204+
24205+ /* inode context tagging */
24206+#if defined(CONFIG_TAGGING_NONE)
24207+ (0 << VCI_KCBIT_TAGGED) |
24208+#elif defined(CONFIG_TAGGING_UID16)
24209+ (1 << VCI_KCBIT_TAGGED) |
24210+#elif defined(CONFIG_TAGGING_GID16)
24211+ (2 << VCI_KCBIT_TAGGED) |
24212+#elif defined(CONFIG_TAGGING_ID24)
24213+ (3 << VCI_KCBIT_TAGGED) |
24214+#elif defined(CONFIG_TAGGING_INTERN)
24215+ (4 << VCI_KCBIT_TAGGED) |
24216+#elif defined(CONFIG_TAGGING_RUNTIME)
24217+ (5 << VCI_KCBIT_TAGGED) |
24218+#else
24219+ (7 << VCI_KCBIT_TAGGED) |
24220+#endif
24221+ (1 << VCI_KCBIT_PPTAG) |
24222+ 0;
24223+}
24224+
24225diff -NurpP --minimal linux-3.0.9/mm/filemap_xip.c linux-3.0.9-vs2.3.2.1/mm/filemap_xip.c
24226--- linux-3.0.9/mm/filemap_xip.c 2011-07-22 11:18:12.000000000 +0200
24227+++ linux-3.0.9-vs2.3.2.1/mm/filemap_xip.c 2011-06-10 22:11:24.000000000 +0200
24228@@ -18,6 +18,7 @@
24229 #include <linux/seqlock.h>
24230 #include <linux/mutex.h>
24231 #include <linux/gfp.h>
24232+#include <linux/vs_memory.h>
24233 #include <asm/tlbflush.h>
24234 #include <asm/io.h>
24235
24236diff -NurpP --minimal linux-3.0.9/mm/fremap.c linux-3.0.9-vs2.3.2.1/mm/fremap.c
24237--- linux-3.0.9/mm/fremap.c 2011-07-22 11:18:12.000000000 +0200
24238+++ linux-3.0.9-vs2.3.2.1/mm/fremap.c 2011-06-10 22:11:24.000000000 +0200
24239@@ -16,6 +16,7 @@
24240 #include <linux/module.h>
24241 #include <linux/syscalls.h>
24242 #include <linux/mmu_notifier.h>
24243+#include <linux/vs_memory.h>
24244
24245 #include <asm/mmu_context.h>
24246 #include <asm/cacheflush.h>
24247diff -NurpP --minimal linux-3.0.9/mm/hugetlb.c linux-3.0.9-vs2.3.2.1/mm/hugetlb.c
24248--- linux-3.0.9/mm/hugetlb.c 2011-07-22 11:18:12.000000000 +0200
24249+++ linux-3.0.9-vs2.3.2.1/mm/hugetlb.c 2011-06-22 12:39:16.000000000 +0200
24250@@ -28,6 +28,7 @@
24251
24252 #include <linux/hugetlb.h>
24253 #include <linux/node.h>
24254+#include <linux/vs_memory.h>
24255 #include "internal.h"
24256
24257 const unsigned long hugetlb_zero = 0, hugetlb_infinity = ~0UL;
24258diff -NurpP --minimal linux-3.0.9/mm/memcontrol.c linux-3.0.9-vs2.3.2.1/mm/memcontrol.c
24259--- linux-3.0.9/mm/memcontrol.c 2011-11-15 16:40:47.000000000 +0100
24260+++ linux-3.0.9-vs2.3.2.1/mm/memcontrol.c 2011-08-08 23:04:47.000000000 +0200
24261@@ -741,6 +741,31 @@ struct mem_cgroup *mem_cgroup_from_task(
24262 struct mem_cgroup, css);
24263 }
24264
24265+u64 mem_cgroup_res_read_u64(struct mem_cgroup *mem, int member)
24266+{
24267+ return res_counter_read_u64(&mem->res, member);
24268+}
24269+
24270+u64 mem_cgroup_memsw_read_u64(struct mem_cgroup *mem, int member)
24271+{
24272+ return res_counter_read_u64(&mem->memsw, member);
24273+}
24274+
24275+s64 mem_cgroup_stat_read_cache(struct mem_cgroup *mem)
24276+{
24277+ return mem_cgroup_read_stat(mem, MEM_CGROUP_STAT_CACHE);
24278+}
24279+
24280+s64 mem_cgroup_stat_read_anon(struct mem_cgroup *mem)
24281+{
24282+ return mem_cgroup_read_stat(mem, MEM_CGROUP_STAT_RSS);
24283+}
24284+
24285+s64 mem_cgroup_stat_read_mapped(struct mem_cgroup *mem)
24286+{
24287+ return mem_cgroup_read_stat(mem, MEM_CGROUP_STAT_FILE_MAPPED);
24288+}
24289+
24290 struct mem_cgroup *try_get_mem_cgroup_from_mm(struct mm_struct *mm)
24291 {
24292 struct mem_cgroup *mem = NULL;
24293diff -NurpP --minimal linux-3.0.9/mm/memory.c linux-3.0.9-vs2.3.2.1/mm/memory.c
24294--- linux-3.0.9/mm/memory.c 2011-11-15 16:40:47.000000000 +0100
24295+++ linux-3.0.9-vs2.3.2.1/mm/memory.c 2011-11-15 17:37:07.000000000 +0100
24296@@ -3388,6 +3388,7 @@ int handle_pte_fault(struct mm_struct *m
24297 {
24298 pte_t entry;
24299 spinlock_t *ptl;
24300+ int ret = 0, type = VXPT_UNKNOWN;
24301
24302 entry = *pte;
24303 if (!pte_present(entry)) {
24304@@ -3412,9 +3413,12 @@ int handle_pte_fault(struct mm_struct *m
24305 if (unlikely(!pte_same(*pte, entry)))
24306 goto unlock;
24307 if (flags & FAULT_FLAG_WRITE) {
24308- if (!pte_write(entry))
24309- return do_wp_page(mm, vma, address,
24310+ if (!pte_write(entry)) {
24311+ ret = do_wp_page(mm, vma, address,
24312 pte, pmd, ptl, entry);
24313+ type = VXPT_WRITE;
24314+ goto out;
24315+ }
24316 entry = pte_mkdirty(entry);
24317 }
24318 entry = pte_mkyoung(entry);
24319@@ -3432,7 +3436,10 @@ int handle_pte_fault(struct mm_struct *m
24320 }
24321 unlock:
24322 pte_unmap_unlock(pte, ptl);
24323- return 0;
24324+ ret = 0;
24325+out:
24326+ vx_page_fault(mm, vma, type, ret);
24327+ return ret;
24328 }
24329
24330 /*
24331diff -NurpP --minimal linux-3.0.9/mm/mremap.c linux-3.0.9-vs2.3.2.1/mm/mremap.c
24332--- linux-3.0.9/mm/mremap.c 2011-07-22 11:18:12.000000000 +0200
24333+++ linux-3.0.9-vs2.3.2.1/mm/mremap.c 2011-06-10 22:11:24.000000000 +0200
24334@@ -19,6 +19,7 @@
24335 #include <linux/security.h>
24336 #include <linux/syscalls.h>
24337 #include <linux/mmu_notifier.h>
24338+#include <linux/vs_memory.h>
24339
24340 #include <asm/uaccess.h>
24341 #include <asm/cacheflush.h>
24342diff -NurpP --minimal linux-3.0.9/mm/oom_kill.c linux-3.0.9-vs2.3.2.1/mm/oom_kill.c
24343--- linux-3.0.9/mm/oom_kill.c 2011-11-15 16:40:47.000000000 +0100
24344+++ linux-3.0.9-vs2.3.2.1/mm/oom_kill.c 2011-08-08 23:04:47.000000000 +0200
24345@@ -32,6 +32,9 @@
24346 #include <linux/mempolicy.h>
24347 #include <linux/security.h>
24348 #include <linux/ptrace.h>
24349+#include <linux/reboot.h>
24350+#include <linux/vs_memory.h>
24351+#include <linux/vs_context.h>
24352
24353 int sysctl_panic_on_oom;
24354 int sysctl_oom_kill_allocating_task;
24355@@ -134,11 +137,18 @@ struct task_struct *find_lock_task_mm(st
24356 static bool oom_unkillable_task(struct task_struct *p,
24357 const struct mem_cgroup *mem, const nodemask_t *nodemask)
24358 {
24359- if (is_global_init(p))
24360+ unsigned xid = vx_current_xid();
24361+
24362+ /* skip the init task, global and per guest */
24363+ if (task_is_init(p))
24364 return true;
24365 if (p->flags & PF_KTHREAD)
24366 return true;
24367
24368+ /* skip other guest and host processes if oom in guest */
24369+ if (xid && vx_task_xid(p) != xid)
24370+ return true;
24371+
24372 /* When mem_cgroup_out_of_memory() and p is not member of the group */
24373 if (mem && !task_in_mem_cgroup(p, mem))
24374 return true;
24375@@ -214,6 +224,18 @@ unsigned int oom_badness(struct task_str
24376 points += p->signal->oom_score_adj;
24377
24378 /*
24379+ * add points for context badness and
24380+ * reduce badness for processes belonging to
24381+ * a different context
24382+ */
24383+
24384+ points += vx_badness(p, p->mm);
24385+
24386+ if ((vx_current_xid() > 1) &&
24387+ vx_current_xid() != vx_task_xid(p))
24388+ points /= 16;
24389+
24390+ /*
24391 * Never return 0 for an eligible task that may be killed since it's
24392 * possible that no single user task uses more than 0.1% of memory and
24393 * no single admin tasks uses more than 3.0%.
24394@@ -429,8 +451,8 @@ static int oom_kill_task(struct task_str
24395 /* mm cannot be safely dereferenced after task_unlock(p) */
24396 mm = p->mm;
24397
24398- pr_err("Killed process %d (%s) total-vm:%lukB, anon-rss:%lukB, file-rss:%lukB\n",
24399- task_pid_nr(p), p->comm, K(p->mm->total_vm),
24400+ pr_err("Killed process %d:#%u (%s) total-vm:%lukB, anon-rss:%lukB, file-rss:%lukB\n",
24401+ task_pid_nr(p), p->xid, p->comm, K(p->mm->total_vm),
24402 K(get_mm_counter(p->mm, MM_ANONPAGES)),
24403 K(get_mm_counter(p->mm, MM_FILEPAGES)));
24404 task_unlock(p);
24405@@ -484,8 +506,8 @@ static int oom_kill_process(struct task_
24406 }
24407
24408 task_lock(p);
24409- pr_err("%s: Kill process %d (%s) score %d or sacrifice child\n",
24410- message, task_pid_nr(p), p->comm, points);
24411+ pr_err("%s: Kill process %d:#%u (%s) score %d or sacrifice child\n",
24412+ message, task_pid_nr(p), p->xid, p->comm, points);
24413 task_unlock(p);
24414
24415 /*
24416@@ -586,6 +608,8 @@ int unregister_oom_notifier(struct notif
24417 }
24418 EXPORT_SYMBOL_GPL(unregister_oom_notifier);
24419
24420+long vs_oom_action(unsigned int);
24421+
24422 /*
24423 * Try to acquire the OOM killer lock for the zones in zonelist. Returns zero
24424 * if a parallel OOM killing is already taking place that includes a zone in
24425@@ -744,7 +768,12 @@ retry:
24426 if (!p) {
24427 dump_header(NULL, gfp_mask, order, NULL, mpol_mask);
24428 read_unlock(&tasklist_lock);
24429- panic("Out of memory and no killable processes...\n");
24430+
24431+ /* avoid panic for guest OOM */
24432+ if (current->xid)
24433+ vs_oom_action(LINUX_REBOOT_CMD_OOM);
24434+ else
24435+ panic("Out of memory and no killable processes...\n");
24436 }
24437
24438 if (oom_kill_process(p, gfp_mask, order, points, totalpages, NULL,
24439diff -NurpP --minimal linux-3.0.9/mm/page_alloc.c linux-3.0.9-vs2.3.2.1/mm/page_alloc.c
24440--- linux-3.0.9/mm/page_alloc.c 2011-11-15 16:40:47.000000000 +0100
24441+++ linux-3.0.9-vs2.3.2.1/mm/page_alloc.c 2011-10-18 13:51:13.000000000 +0200
24442@@ -57,6 +57,8 @@
24443 #include <linux/ftrace_event.h>
24444 #include <linux/memcontrol.h>
24445 #include <linux/prefetch.h>
24446+#include <linux/vs_base.h>
24447+#include <linux/vs_limit.h>
24448
24449 #include <asm/tlbflush.h>
24450 #include <asm/div64.h>
24451@@ -2502,6 +2504,9 @@ void si_meminfo(struct sysinfo *val)
24452 val->totalhigh = totalhigh_pages;
24453 val->freehigh = nr_free_highpages();
24454 val->mem_unit = PAGE_SIZE;
24455+
24456+ if (vx_flags(VXF_VIRT_MEM, 0))
24457+ vx_vsi_meminfo(val);
24458 }
24459
24460 EXPORT_SYMBOL(si_meminfo);
24461@@ -2522,6 +2527,9 @@ void si_meminfo_node(struct sysinfo *val
24462 val->freehigh = 0;
24463 #endif
24464 val->mem_unit = PAGE_SIZE;
24465+
24466+ if (vx_flags(VXF_VIRT_MEM, 0))
24467+ vx_vsi_meminfo(val);
24468 }
24469 #endif
24470
24471diff -NurpP --minimal linux-3.0.9/mm/pgtable-generic.c linux-3.0.9-vs2.3.2.1/mm/pgtable-generic.c
24472--- linux-3.0.9/mm/pgtable-generic.c 2011-03-15 18:07:42.000000000 +0100
24473+++ linux-3.0.9-vs2.3.2.1/mm/pgtable-generic.c 2011-06-10 22:11:24.000000000 +0200
24474@@ -6,6 +6,8 @@
24475 * Copyright (C) 2010 Linus Torvalds
24476 */
24477
24478+#include <linux/mm.h>
24479+
24480 #include <linux/pagemap.h>
24481 #include <asm/tlb.h>
24482 #include <asm-generic/pgtable.h>
24483diff -NurpP --minimal linux-3.0.9/mm/rmap.c linux-3.0.9-vs2.3.2.1/mm/rmap.c
24484--- linux-3.0.9/mm/rmap.c 2011-07-22 11:18:12.000000000 +0200
24485+++ linux-3.0.9-vs2.3.2.1/mm/rmap.c 2011-07-01 11:35:35.000000000 +0200
24486@@ -57,6 +57,7 @@
24487 #include <linux/mmu_notifier.h>
24488 #include <linux/migrate.h>
24489 #include <linux/hugetlb.h>
24490+#include <linux/vs_memory.h>
24491
24492 #include <asm/tlbflush.h>
24493
24494diff -NurpP --minimal linux-3.0.9/mm/shmem.c linux-3.0.9-vs2.3.2.1/mm/shmem.c
24495--- linux-3.0.9/mm/shmem.c 2011-07-22 11:18:12.000000000 +0200
24496+++ linux-3.0.9-vs2.3.2.1/mm/shmem.c 2011-07-01 11:35:35.000000000 +0200
24497@@ -1850,7 +1850,7 @@ static int shmem_statfs(struct dentry *d
24498 {
24499 struct shmem_sb_info *sbinfo = SHMEM_SB(dentry->d_sb);
24500
24501- buf->f_type = TMPFS_MAGIC;
24502+ buf->f_type = TMPFS_SUPER_MAGIC;
24503 buf->f_bsize = PAGE_CACHE_SIZE;
24504 buf->f_namelen = NAME_MAX;
24505 if (sbinfo->max_blocks) {
24506@@ -2605,7 +2605,7 @@ int shmem_fill_super(struct super_block
24507 sb->s_maxbytes = SHMEM_MAX_BYTES;
24508 sb->s_blocksize = PAGE_CACHE_SIZE;
24509 sb->s_blocksize_bits = PAGE_CACHE_SHIFT;
24510- sb->s_magic = TMPFS_MAGIC;
24511+ sb->s_magic = TMPFS_SUPER_MAGIC;
24512 sb->s_op = &shmem_ops;
24513 sb->s_time_gran = 1;
24514 #ifdef CONFIG_TMPFS_XATTR
24515diff -NurpP --minimal linux-3.0.9/mm/slab.c linux-3.0.9-vs2.3.2.1/mm/slab.c
24516--- linux-3.0.9/mm/slab.c 2011-07-22 11:18:12.000000000 +0200
24517+++ linux-3.0.9-vs2.3.2.1/mm/slab.c 2011-06-15 02:41:23.000000000 +0200
24518@@ -411,6 +411,8 @@ static void kmem_list3_init(struct kmem_
24519 #define STATS_INC_FREEMISS(x) do { } while (0)
24520 #endif
24521
24522+#include "slab_vs.h"
24523+
24524 #if DEBUG
24525
24526 /*
24527@@ -3348,6 +3350,7 @@ retry:
24528
24529 obj = slab_get_obj(cachep, slabp, nodeid);
24530 check_slabp(cachep, slabp);
24531+ vx_slab_alloc(cachep, flags);
24532 l3->free_objects--;
24533 /* move slabp to correct slabp list: */
24534 list_del(&slabp->list);
24535@@ -3425,6 +3428,7 @@ __cache_alloc_node(struct kmem_cache *ca
24536 /* ___cache_alloc_node can fall back to other nodes */
24537 ptr = ____cache_alloc_node(cachep, flags, nodeid);
24538 out:
24539+ vx_slab_alloc(cachep, flags);
24540 local_irq_restore(save_flags);
24541 ptr = cache_alloc_debugcheck_after(cachep, flags, ptr, caller);
24542 kmemleak_alloc_recursive(ptr, obj_size(cachep), 1, cachep->flags,
24543@@ -3612,6 +3616,7 @@ static inline void __cache_free(struct k
24544 check_irq_off();
24545 kmemleak_free_recursive(objp, cachep->flags);
24546 objp = cache_free_debugcheck(cachep, objp, caller);
24547+ vx_slab_free(cachep);
24548
24549 kmemcheck_slab_free(cachep, objp, obj_size(cachep));
24550
24551diff -NurpP --minimal linux-3.0.9/mm/slab_vs.h linux-3.0.9-vs2.3.2.1/mm/slab_vs.h
24552--- linux-3.0.9/mm/slab_vs.h 1970-01-01 01:00:00.000000000 +0100
24553+++ linux-3.0.9-vs2.3.2.1/mm/slab_vs.h 2011-06-10 22:11:24.000000000 +0200
24554@@ -0,0 +1,29 @@
24555+
24556+#include <linux/vserver/context.h>
24557+
24558+#include <linux/vs_context.h>
24559+
24560+static inline
24561+void vx_slab_alloc(struct kmem_cache *cachep, gfp_t flags)
24562+{
24563+ int what = gfp_zone(cachep->gfpflags);
24564+ struct vx_info *vxi = current_vx_info();
24565+
24566+ if (!vxi)
24567+ return;
24568+
24569+ atomic_add(cachep->buffer_size, &vxi->cacct.slab[what]);
24570+}
24571+
24572+static inline
24573+void vx_slab_free(struct kmem_cache *cachep)
24574+{
24575+ int what = gfp_zone(cachep->gfpflags);
24576+ struct vx_info *vxi = current_vx_info();
24577+
24578+ if (!vxi)
24579+ return;
24580+
24581+ atomic_sub(cachep->buffer_size, &vxi->cacct.slab[what]);
24582+}
24583+
24584diff -NurpP --minimal linux-3.0.9/mm/swapfile.c linux-3.0.9-vs2.3.2.1/mm/swapfile.c
24585--- linux-3.0.9/mm/swapfile.c 2011-07-22 11:18:12.000000000 +0200
24586+++ linux-3.0.9-vs2.3.2.1/mm/swapfile.c 2011-07-01 11:35:35.000000000 +0200
24587@@ -37,6 +37,8 @@
24588 #include <asm/tlbflush.h>
24589 #include <linux/swapops.h>
24590 #include <linux/page_cgroup.h>
24591+#include <linux/vs_base.h>
24592+#include <linux/vs_memory.h>
24593
24594 static bool swap_count_continued(struct swap_info_struct *, pgoff_t,
24595 unsigned char);
24596@@ -1759,6 +1761,16 @@ static int swap_show(struct seq_file *sw
24597
24598 if (si == SEQ_START_TOKEN) {
24599 seq_puts(swap,"Filename\t\t\t\tType\t\tSize\tUsed\tPriority\n");
24600+ if (vx_flags(VXF_VIRT_MEM, 0)) {
24601+ struct sysinfo si;
24602+
24603+ vx_vsi_swapinfo(&si);
24604+ if (si.totalswap < (1 << 10))
24605+ return 0;
24606+ seq_printf(swap, "%s\t\t\t\t\t%s\t%lu\t%lu\t%d\n",
24607+ "hdv0", "partition", si.totalswap >> 10,
24608+ (si.totalswap - si.freeswap) >> 10, -1);
24609+ }
24610 return 0;
24611 }
24612
24613@@ -2186,6 +2198,8 @@ void si_swapinfo(struct sysinfo *val)
24614 val->freeswap = nr_swap_pages + nr_to_be_unused;
24615 val->totalswap = total_swap_pages + nr_to_be_unused;
24616 spin_unlock(&swap_lock);
24617+ if (vx_flags(VXF_VIRT_MEM, 0))
24618+ vx_vsi_swapinfo(val);
24619 }
24620
24621 /*
24622diff -NurpP --minimal linux-3.0.9/net/bridge/br_multicast.c linux-3.0.9-vs2.3.2.1/net/bridge/br_multicast.c
24623--- linux-3.0.9/net/bridge/br_multicast.c 2011-11-15 16:40:47.000000000 +0100
24624+++ linux-3.0.9-vs2.3.2.1/net/bridge/br_multicast.c 2011-10-18 13:51:13.000000000 +0200
24625@@ -447,7 +447,7 @@ static struct sk_buff *br_ip6_multicast_
24626 ip6h->hop_limit = 1;
24627 ipv6_addr_set(&ip6h->daddr, htonl(0xff020000), 0, 0, htonl(1));
24628 ipv6_dev_get_saddr(dev_net(br->dev), br->dev, &ip6h->daddr, 0,
24629- &ip6h->saddr);
24630+ &ip6h->saddr, NULL);
24631 ipv6_eth_mc_map(&ip6h->daddr, eth->h_dest);
24632
24633 hopopt = (u8 *)(ip6h + 1);
24634diff -NurpP --minimal linux-3.0.9/net/core/dev.c linux-3.0.9-vs2.3.2.1/net/core/dev.c
24635--- linux-3.0.9/net/core/dev.c 2011-11-15 16:40:47.000000000 +0100
24636+++ linux-3.0.9-vs2.3.2.1/net/core/dev.c 2011-11-15 17:37:07.000000000 +0100
24637@@ -127,6 +127,7 @@
24638 #include <linux/in.h>
24639 #include <linux/jhash.h>
24640 #include <linux/random.h>
24641+#include <linux/vs_inet.h>
24642 #include <trace/events/napi.h>
24643 #include <trace/events/net.h>
24644 #include <trace/events/skb.h>
24645@@ -609,7 +610,8 @@ struct net_device *__dev_get_by_name(str
24646 struct hlist_head *head = dev_name_hash(net, name);
24647
24648 hlist_for_each_entry(dev, p, head, name_hlist)
24649- if (!strncmp(dev->name, name, IFNAMSIZ))
24650+ if (!strncmp(dev->name, name, IFNAMSIZ) &&
24651+ nx_dev_visible(current_nx_info(), dev))
24652 return dev;
24653
24654 return NULL;
24655@@ -635,7 +637,8 @@ struct net_device *dev_get_by_name_rcu(s
24656 struct hlist_head *head = dev_name_hash(net, name);
24657
24658 hlist_for_each_entry_rcu(dev, p, head, name_hlist)
24659- if (!strncmp(dev->name, name, IFNAMSIZ))
24660+ if (!strncmp(dev->name, name, IFNAMSIZ) &&
24661+ nx_dev_visible(current_nx_info(), dev))
24662 return dev;
24663
24664 return NULL;
24665@@ -686,7 +689,8 @@ struct net_device *__dev_get_by_index(st
24666 struct hlist_head *head = dev_index_hash(net, ifindex);
24667
24668 hlist_for_each_entry(dev, p, head, index_hlist)
24669- if (dev->ifindex == ifindex)
24670+ if ((dev->ifindex == ifindex) &&
24671+ nx_dev_visible(current_nx_info(), dev))
24672 return dev;
24673
24674 return NULL;
24675@@ -704,7 +708,7 @@ EXPORT_SYMBOL(__dev_get_by_index);
24676 * about locking. The caller must hold RCU lock.
24677 */
24678
24679-struct net_device *dev_get_by_index_rcu(struct net *net, int ifindex)
24680+struct net_device *dev_get_by_index_real_rcu(struct net *net, int ifindex)
24681 {
24682 struct hlist_node *p;
24683 struct net_device *dev;
24684@@ -716,6 +720,16 @@ struct net_device *dev_get_by_index_rcu(
24685
24686 return NULL;
24687 }
24688+EXPORT_SYMBOL(dev_get_by_index_real_rcu);
24689+
24690+struct net_device *dev_get_by_index_rcu(struct net *net, int ifindex)
24691+{
24692+ struct net_device *dev = dev_get_by_index_real_rcu(net, ifindex);
24693+
24694+ if (nx_dev_visible(current_nx_info(), dev))
24695+ return dev;
24696+ return NULL;
24697+}
24698 EXPORT_SYMBOL(dev_get_by_index_rcu);
24699
24700
24701@@ -764,7 +778,8 @@ struct net_device *dev_getbyhwaddr_rcu(s
24702
24703 for_each_netdev_rcu(net, dev)
24704 if (dev->type == type &&
24705- !memcmp(dev->dev_addr, ha, dev->addr_len))
24706+ !memcmp(dev->dev_addr, ha, dev->addr_len) &&
24707+ nx_dev_visible(current_nx_info(), dev))
24708 return dev;
24709
24710 return NULL;
24711@@ -776,9 +791,11 @@ struct net_device *__dev_getfirstbyhwtyp
24712 struct net_device *dev;
24713
24714 ASSERT_RTNL();
24715- for_each_netdev(net, dev)
24716- if (dev->type == type)
24717+ for_each_netdev(net, dev) {
24718+ if ((dev->type == type) &&
24719+ nx_dev_visible(current_nx_info(), dev))
24720 return dev;
24721+ }
24722
24723 return NULL;
24724 }
24725@@ -896,6 +913,8 @@ static int __dev_alloc_name(struct net *
24726 continue;
24727 if (i < 0 || i >= max_netdevices)
24728 continue;
24729+ if (!nx_dev_visible(current_nx_info(), d))
24730+ continue;
24731
24732 /* avoid cases where sscanf is not exact inverse of printf */
24733 snprintf(buf, IFNAMSIZ, name, i);
24734@@ -3937,6 +3956,8 @@ static int dev_ifconf(struct net *net, c
24735
24736 total = 0;
24737 for_each_netdev(net, dev) {
24738+ if (!nx_dev_visible(current_nx_info(), dev))
24739+ continue;
24740 for (i = 0; i < NPROTO; i++) {
24741 if (gifconf_list[i]) {
24742 int done;
24743@@ -4011,6 +4032,10 @@ static void dev_seq_printf_stats(struct
24744 struct rtnl_link_stats64 temp;
24745 const struct rtnl_link_stats64 *stats = dev_get_stats(dev, &temp);
24746
24747+ /* device visible inside network context? */
24748+ if (!nx_dev_visible(current_nx_info(), dev))
24749+ return;
24750+
24751 seq_printf(seq, "%6s: %7llu %7llu %4llu %4llu %4llu %5llu %10llu %9llu "
24752 "%8llu %7llu %4llu %4llu %4llu %5llu %7llu %10llu\n",
24753 dev->name, stats->rx_bytes, stats->rx_packets,
24754diff -NurpP --minimal linux-3.0.9/net/core/rtnetlink.c linux-3.0.9-vs2.3.2.1/net/core/rtnetlink.c
24755--- linux-3.0.9/net/core/rtnetlink.c 2011-07-22 11:18:13.000000000 +0200
24756+++ linux-3.0.9-vs2.3.2.1/net/core/rtnetlink.c 2011-06-10 22:11:24.000000000 +0200
24757@@ -1015,6 +1015,8 @@ static int rtnl_dump_ifinfo(struct sk_bu
24758 hlist_for_each_entry_rcu(dev, node, head, index_hlist) {
24759 if (idx < s_idx)
24760 goto cont;
24761+ if (!nx_dev_visible(skb->sk->sk_nx_info, dev))
24762+ continue;
24763 if (rtnl_fill_ifinfo(skb, dev, RTM_NEWLINK,
24764 NETLINK_CB(cb->skb).pid,
24765 cb->nlh->nlmsg_seq, 0,
24766@@ -1848,6 +1850,9 @@ void rtmsg_ifinfo(int type, struct net_d
24767 struct sk_buff *skb;
24768 int err = -ENOBUFS;
24769
24770+ if (!nx_dev_visible(current_nx_info(), dev))
24771+ return;
24772+
24773 skb = nlmsg_new(if_nlmsg_size(dev), GFP_KERNEL);
24774 if (skb == NULL)
24775 goto errout;
24776diff -NurpP --minimal linux-3.0.9/net/core/sock.c linux-3.0.9-vs2.3.2.1/net/core/sock.c
24777--- linux-3.0.9/net/core/sock.c 2011-11-15 16:40:47.000000000 +0100
24778+++ linux-3.0.9-vs2.3.2.1/net/core/sock.c 2011-11-15 17:37:07.000000000 +0100
24779@@ -127,6 +127,10 @@
24780 #include <net/cls_cgroup.h>
24781
24782 #include <linux/filter.h>
24783+#include <linux/vs_socket.h>
24784+#include <linux/vs_limit.h>
24785+#include <linux/vs_context.h>
24786+#include <linux/vs_network.h>
24787
24788 #ifdef CONFIG_INET
24789 #include <net/tcp.h>
24790@@ -1070,6 +1074,8 @@ static struct sock *sk_prot_alloc(struct
24791 goto out_free_sec;
24792 sk_tx_queue_clear(sk);
24793 }
24794+ sock_vx_init(sk);
24795+ sock_nx_init(sk);
24796
24797 return sk;
24798
24799@@ -1169,6 +1175,11 @@ static void __sk_free(struct sock *sk)
24800 put_cred(sk->sk_peer_cred);
24801 put_pid(sk->sk_peer_pid);
24802 put_net(sock_net(sk));
24803+ vx_sock_dec(sk);
24804+ clr_vx_info(&sk->sk_vx_info);
24805+ sk->sk_xid = -1;
24806+ clr_nx_info(&sk->sk_nx_info);
24807+ sk->sk_nid = -1;
24808 sk_prot_free(sk->sk_prot_creator, sk);
24809 }
24810
24811@@ -1216,6 +1227,8 @@ struct sock *sk_clone(const struct sock
24812
24813 /* SANITY */
24814 get_net(sock_net(newsk));
24815+ sock_vx_init(newsk);
24816+ sock_nx_init(newsk);
24817 sk_node_init(&newsk->sk_node);
24818 sock_lock_init(newsk);
24819 bh_lock_sock(newsk);
24820@@ -1272,6 +1285,12 @@ struct sock *sk_clone(const struct sock
24821 smp_wmb();
24822 atomic_set(&newsk->sk_refcnt, 2);
24823
24824+ set_vx_info(&newsk->sk_vx_info, sk->sk_vx_info);
24825+ newsk->sk_xid = sk->sk_xid;
24826+ vx_sock_inc(newsk);
24827+ set_nx_info(&newsk->sk_nx_info, sk->sk_nx_info);
24828+ newsk->sk_nid = sk->sk_nid;
24829+
24830 /*
24831 * Increment the counter in the same struct proto as the master
24832 * sock (sk_refcnt_debug_inc uses newsk->sk_prot->socks, that
24833@@ -2018,6 +2037,12 @@ void sock_init_data(struct socket *sock,
24834
24835 sk->sk_stamp = ktime_set(-1L, 0);
24836
24837+ set_vx_info(&sk->sk_vx_info, current_vx_info());
24838+ sk->sk_xid = vx_current_xid();
24839+ vx_sock_inc(sk);
24840+ set_nx_info(&sk->sk_nx_info, current_nx_info());
24841+ sk->sk_nid = nx_current_nid();
24842+
24843 /*
24844 * Before updating sk_refcnt, we must commit prior changes to memory
24845 * (Documentation/RCU/rculist_nulls.txt for details)
24846diff -NurpP --minimal linux-3.0.9/net/ipv4/af_inet.c linux-3.0.9-vs2.3.2.1/net/ipv4/af_inet.c
24847--- linux-3.0.9/net/ipv4/af_inet.c 2011-07-22 11:18:13.000000000 +0200
24848+++ linux-3.0.9-vs2.3.2.1/net/ipv4/af_inet.c 2011-07-19 00:44:39.000000000 +0200
24849@@ -117,6 +117,7 @@
24850 #ifdef CONFIG_IP_MROUTE
24851 #include <linux/mroute.h>
24852 #endif
24853+#include <linux/vs_limit.h>
24854
24855
24856 /* The inetsw table contains everything that inet_create needs to
24857@@ -326,9 +327,13 @@ lookup_protocol:
24858 }
24859
24860 err = -EPERM;
24861+ if ((protocol == IPPROTO_ICMP) &&
24862+ nx_capable(CAP_NET_RAW, NXC_RAW_ICMP))
24863+ goto override;
24864+
24865 if (sock->type == SOCK_RAW && !kern && !capable(CAP_NET_RAW))
24866 goto out_rcu_unlock;
24867-
24868+override:
24869 err = -EAFNOSUPPORT;
24870 if (!inet_netns_ok(net, protocol))
24871 goto out_rcu_unlock;
24872@@ -452,6 +457,7 @@ int inet_bind(struct socket *sock, struc
24873 struct sockaddr_in *addr = (struct sockaddr_in *)uaddr;
24874 struct sock *sk = sock->sk;
24875 struct inet_sock *inet = inet_sk(sk);
24876+ struct nx_v4_sock_addr nsa;
24877 unsigned short snum;
24878 int chk_addr_ret;
24879 int err;
24880@@ -470,7 +476,11 @@ int inet_bind(struct socket *sock, struc
24881 goto out;
24882 }
24883
24884- chk_addr_ret = inet_addr_type(sock_net(sk), addr->sin_addr.s_addr);
24885+ err = v4_map_sock_addr(inet, addr, &nsa);
24886+ if (err)
24887+ goto out;
24888+
24889+ chk_addr_ret = inet_addr_type(sock_net(sk), nsa.saddr);
24890
24891 /* Not specified by any standard per-se, however it breaks too
24892 * many applications when removed. It is unfortunate since
24893@@ -482,7 +492,7 @@ int inet_bind(struct socket *sock, struc
24894 err = -EADDRNOTAVAIL;
24895 if (!sysctl_ip_nonlocal_bind &&
24896 !(inet->freebind || inet->transparent) &&
24897- addr->sin_addr.s_addr != htonl(INADDR_ANY) &&
24898+ nsa.saddr != htonl(INADDR_ANY) &&
24899 chk_addr_ret != RTN_LOCAL &&
24900 chk_addr_ret != RTN_MULTICAST &&
24901 chk_addr_ret != RTN_BROADCAST)
24902@@ -507,7 +517,7 @@ int inet_bind(struct socket *sock, struc
24903 if (sk->sk_state != TCP_CLOSE || inet->inet_num)
24904 goto out_release_sock;
24905
24906- inet->inet_rcv_saddr = inet->inet_saddr = addr->sin_addr.s_addr;
24907+ v4_set_sock_addr(inet, &nsa);
24908 if (chk_addr_ret == RTN_MULTICAST || chk_addr_ret == RTN_BROADCAST)
24909 inet->inet_saddr = 0; /* Use device */
24910
24911@@ -710,11 +720,13 @@ int inet_getname(struct socket *sock, st
24912 peer == 1))
24913 return -ENOTCONN;
24914 sin->sin_port = inet->inet_dport;
24915- sin->sin_addr.s_addr = inet->inet_daddr;
24916+ sin->sin_addr.s_addr =
24917+ nx_map_sock_lback(sk->sk_nx_info, inet->inet_daddr);
24918 } else {
24919 __be32 addr = inet->inet_rcv_saddr;
24920 if (!addr)
24921 addr = inet->inet_saddr;
24922+ addr = nx_map_sock_lback(sk->sk_nx_info, addr);
24923 sin->sin_port = inet->inet_sport;
24924 sin->sin_addr.s_addr = addr;
24925 }
24926diff -NurpP --minimal linux-3.0.9/net/ipv4/arp.c linux-3.0.9-vs2.3.2.1/net/ipv4/arp.c
24927--- linux-3.0.9/net/ipv4/arp.c 2011-05-22 16:18:00.000000000 +0200
24928+++ linux-3.0.9-vs2.3.2.1/net/ipv4/arp.c 2011-08-29 05:14:51.000000000 +0200
24929@@ -1365,6 +1365,7 @@ static void arp_format_neigh_entry(struc
24930 struct net_device *dev = n->dev;
24931 int hatype = dev->type;
24932
24933+ /* FIXME: check for network context */
24934 read_lock(&n->lock);
24935 /* Convert hardware address to XX:XX:XX:XX ... form. */
24936 #if defined(CONFIG_AX25) || defined(CONFIG_AX25_MODULE)
24937@@ -1396,6 +1397,7 @@ static void arp_format_pneigh_entry(stru
24938 int hatype = dev ? dev->type : 0;
24939 char tbuf[16];
24940
24941+ /* FIXME: check for network context */
24942 sprintf(tbuf, "%pI4", n->key);
24943 seq_printf(seq, "%-16s 0x%-10x0x%-10x%s * %s\n",
24944 tbuf, hatype, ATF_PUBL | ATF_PERM, "00:00:00:00:00:00",
24945diff -NurpP --minimal linux-3.0.9/net/ipv4/devinet.c linux-3.0.9-vs2.3.2.1/net/ipv4/devinet.c
24946--- linux-3.0.9/net/ipv4/devinet.c 2011-11-15 16:40:47.000000000 +0100
24947+++ linux-3.0.9-vs2.3.2.1/net/ipv4/devinet.c 2011-08-29 03:45:09.000000000 +0200
24948@@ -518,6 +518,7 @@ struct in_device *inetdev_by_index(struc
24949 }
24950 EXPORT_SYMBOL(inetdev_by_index);
24951
24952+
24953 /* Called only from RTNL semaphored context. No locks. */
24954
24955 struct in_ifaddr *inet_ifa_byprefix(struct in_device *in_dev, __be32 prefix,
24956@@ -759,6 +760,8 @@ int devinet_ioctl(struct net *net, unsig
24957
24958 in_dev = __in_dev_get_rtnl(dev);
24959 if (in_dev) {
24960+ struct nx_info *nxi = current_nx_info();
24961+
24962 if (tryaddrmatch) {
24963 /* Matthias Andree */
24964 /* compare label and address (4.4BSD style) */
24965@@ -767,6 +770,8 @@ int devinet_ioctl(struct net *net, unsig
24966 This is checked above. */
24967 for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
24968 ifap = &ifa->ifa_next) {
24969+ if (!nx_v4_ifa_visible(nxi, ifa))
24970+ continue;
24971 if (!strcmp(ifr.ifr_name, ifa->ifa_label) &&
24972 sin_orig.sin_addr.s_addr ==
24973 ifa->ifa_local) {
24974@@ -779,9 +784,12 @@ int devinet_ioctl(struct net *net, unsig
24975 comparing just the label */
24976 if (!ifa) {
24977 for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
24978- ifap = &ifa->ifa_next)
24979+ ifap = &ifa->ifa_next) {
24980+ if (!nx_v4_ifa_visible(nxi, ifa))
24981+ continue;
24982 if (!strcmp(ifr.ifr_name, ifa->ifa_label))
24983 break;
24984+ }
24985 }
24986 }
24987
24988@@ -934,6 +942,8 @@ static int inet_gifconf(struct net_devic
24989 goto out;
24990
24991 for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next) {
24992+ if (!nx_v4_ifa_visible(current_nx_info(), ifa))
24993+ continue;
24994 if (!buf) {
24995 done += sizeof(ifr);
24996 continue;
24997@@ -1294,6 +1304,7 @@ static int inet_dump_ifaddr(struct sk_bu
24998 struct net_device *dev;
24999 struct in_device *in_dev;
25000 struct in_ifaddr *ifa;
25001+ struct sock *sk = skb->sk;
25002 struct hlist_head *head;
25003 struct hlist_node *node;
25004
25005@@ -1316,6 +1327,8 @@ static int inet_dump_ifaddr(struct sk_bu
25006
25007 for (ifa = in_dev->ifa_list, ip_idx = 0; ifa;
25008 ifa = ifa->ifa_next, ip_idx++) {
25009+ if (sk && !nx_v4_ifa_visible(sk->sk_nx_info, ifa))
25010+ continue;
25011 if (ip_idx < s_ip_idx)
25012 continue;
25013 if (inet_fill_ifaddr(skb, ifa,
25014diff -NurpP --minimal linux-3.0.9/net/ipv4/fib_trie.c linux-3.0.9-vs2.3.2.1/net/ipv4/fib_trie.c
25015--- linux-3.0.9/net/ipv4/fib_trie.c 2011-07-22 11:18:13.000000000 +0200
25016+++ linux-3.0.9-vs2.3.2.1/net/ipv4/fib_trie.c 2011-08-29 04:56:47.000000000 +0200
25017@@ -2554,6 +2554,7 @@ static int fib_route_seq_show(struct seq
25018 || fa->fa_type == RTN_MULTICAST)
25019 continue;
25020
25021+ /* FIXME: check for network context? */
25022 if (fi)
25023 seq_printf(seq,
25024 "%s\t%08X\t%08X\t%04X\t%d\t%u\t"
25025diff -NurpP --minimal linux-3.0.9/net/ipv4/inet_connection_sock.c linux-3.0.9-vs2.3.2.1/net/ipv4/inet_connection_sock.c
25026--- linux-3.0.9/net/ipv4/inet_connection_sock.c 2011-07-22 11:18:13.000000000 +0200
25027+++ linux-3.0.9-vs2.3.2.1/net/ipv4/inet_connection_sock.c 2011-06-10 22:11:24.000000000 +0200
25028@@ -52,6 +52,37 @@ void inet_get_local_port_range(int *low,
25029 }
25030 EXPORT_SYMBOL(inet_get_local_port_range);
25031
25032+int ipv4_rcv_saddr_equal(const struct sock *sk1, const struct sock *sk2)
25033+{
25034+ __be32 sk1_rcv_saddr = sk_rcv_saddr(sk1),
25035+ sk2_rcv_saddr = sk_rcv_saddr(sk2);
25036+
25037+ if (inet_v6_ipv6only(sk2))
25038+ return 0;
25039+
25040+ if (sk1_rcv_saddr &&
25041+ sk2_rcv_saddr &&
25042+ sk1_rcv_saddr == sk2_rcv_saddr)
25043+ return 1;
25044+
25045+ if (sk1_rcv_saddr &&
25046+ !sk2_rcv_saddr &&
25047+ v4_addr_in_nx_info(sk2->sk_nx_info, sk1_rcv_saddr, NXA_MASK_BIND))
25048+ return 1;
25049+
25050+ if (sk2_rcv_saddr &&
25051+ !sk1_rcv_saddr &&
25052+ v4_addr_in_nx_info(sk1->sk_nx_info, sk2_rcv_saddr, NXA_MASK_BIND))
25053+ return 1;
25054+
25055+ if (!sk1_rcv_saddr &&
25056+ !sk2_rcv_saddr &&
25057+ nx_v4_addr_conflict(sk1->sk_nx_info, sk2->sk_nx_info))
25058+ return 1;
25059+
25060+ return 0;
25061+}
25062+
25063 int inet_csk_bind_conflict(const struct sock *sk,
25064 const struct inet_bind_bucket *tb)
25065 {
25066@@ -74,9 +105,7 @@ int inet_csk_bind_conflict(const struct
25067 sk->sk_bound_dev_if == sk2->sk_bound_dev_if)) {
25068 if (!reuse || !sk2->sk_reuse ||
25069 sk2->sk_state == TCP_LISTEN) {
25070- const __be32 sk2_rcv_saddr = sk_rcv_saddr(sk2);
25071- if (!sk2_rcv_saddr || !sk_rcv_saddr(sk) ||
25072- sk2_rcv_saddr == sk_rcv_saddr(sk))
25073+ if (ipv4_rcv_saddr_equal(sk, sk2))
25074 break;
25075 }
25076 }
25077diff -NurpP --minimal linux-3.0.9/net/ipv4/inet_diag.c linux-3.0.9-vs2.3.2.1/net/ipv4/inet_diag.c
25078--- linux-3.0.9/net/ipv4/inet_diag.c 2011-07-22 11:18:13.000000000 +0200
25079+++ linux-3.0.9-vs2.3.2.1/net/ipv4/inet_diag.c 2011-06-22 12:39:16.000000000 +0200
25080@@ -33,6 +33,8 @@
25081 #include <linux/stddef.h>
25082
25083 #include <linux/inet_diag.h>
25084+#include <linux/vs_network.h>
25085+#include <linux/vs_inet.h>
25086
25087 static const struct inet_diag_handler **inet_diag_table;
25088
25089@@ -119,8 +121,10 @@ static int inet_csk_diag_fill(struct soc
25090
25091 r->id.idiag_sport = inet->inet_sport;
25092 r->id.idiag_dport = inet->inet_dport;
25093- r->id.idiag_src[0] = inet->inet_rcv_saddr;
25094- r->id.idiag_dst[0] = inet->inet_daddr;
25095+ r->id.idiag_src[0] = nx_map_sock_lback(sk->sk_nx_info,
25096+ inet->inet_rcv_saddr);
25097+ r->id.idiag_dst[0] = nx_map_sock_lback(sk->sk_nx_info,
25098+ inet->inet_daddr);
25099
25100 #if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE)
25101 if (r->idiag_family == AF_INET6) {
25102@@ -205,8 +209,8 @@ static int inet_twsk_diag_fill(struct in
25103 r->id.idiag_cookie[1] = (u32)(((unsigned long)tw >> 31) >> 1);
25104 r->id.idiag_sport = tw->tw_sport;
25105 r->id.idiag_dport = tw->tw_dport;
25106- r->id.idiag_src[0] = tw->tw_rcv_saddr;
25107- r->id.idiag_dst[0] = tw->tw_daddr;
25108+ r->id.idiag_src[0] = nx_map_sock_lback(tw->tw_nx_info, tw->tw_rcv_saddr);
25109+ r->id.idiag_dst[0] = nx_map_sock_lback(tw->tw_nx_info, tw->tw_daddr);
25110 r->idiag_state = tw->tw_substate;
25111 r->idiag_timer = 3;
25112 r->idiag_expires = DIV_ROUND_UP(tmo * 1000, HZ);
25113@@ -263,6 +267,7 @@ static int inet_diag_get_exact(struct sk
25114 err = -EINVAL;
25115
25116 if (req->idiag_family == AF_INET) {
25117+ /* TODO: lback */
25118 sk = inet_lookup(&init_net, hashinfo, req->id.idiag_dst[0],
25119 req->id.idiag_dport, req->id.idiag_src[0],
25120 req->id.idiag_sport, req->id.idiag_if);
25121@@ -505,6 +510,7 @@ static int inet_csk_diag_dump(struct soc
25122 } else
25123 #endif
25124 {
25125+ /* TODO: lback */
25126 entry.saddr = &inet->inet_rcv_saddr;
25127 entry.daddr = &inet->inet_daddr;
25128 }
25129@@ -543,6 +549,7 @@ static int inet_twsk_diag_dump(struct in
25130 } else
25131 #endif
25132 {
25133+ /* TODO: lback */
25134 entry.saddr = &tw->tw_rcv_saddr;
25135 entry.daddr = &tw->tw_daddr;
25136 }
25137@@ -589,8 +596,8 @@ static int inet_diag_fill_req(struct sk_
25138
25139 r->id.idiag_sport = inet->inet_sport;
25140 r->id.idiag_dport = ireq->rmt_port;
25141- r->id.idiag_src[0] = ireq->loc_addr;
25142- r->id.idiag_dst[0] = ireq->rmt_addr;
25143+ r->id.idiag_src[0] = nx_map_sock_lback(sk->sk_nx_info, ireq->loc_addr);
25144+ r->id.idiag_dst[0] = nx_map_sock_lback(sk->sk_nx_info, ireq->rmt_addr);
25145 r->idiag_expires = jiffies_to_msecs(tmo);
25146 r->idiag_rqueue = 0;
25147 r->idiag_wqueue = 0;
25148@@ -661,6 +668,7 @@ static int inet_diag_dump_reqs(struct sk
25149 continue;
25150
25151 if (bc) {
25152+ /* TODO: lback */
25153 entry.saddr =
25154 #if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE)
25155 (entry.family == AF_INET6) ?
25156@@ -731,6 +739,8 @@ static int inet_diag_dump(struct sk_buff
25157 sk_nulls_for_each(sk, node, &ilb->head) {
25158 struct inet_sock *inet = inet_sk(sk);
25159
25160+ if (!nx_check(sk->sk_nid, VS_WATCH_P | VS_IDENT))
25161+ continue;
25162 if (num < s_num) {
25163 num++;
25164 continue;
25165@@ -797,6 +807,8 @@ skip_listen_ht:
25166 sk_nulls_for_each(sk, node, &head->chain) {
25167 struct inet_sock *inet = inet_sk(sk);
25168
25169+ if (!nx_check(sk->sk_nid, VS_WATCH_P | VS_IDENT))
25170+ continue;
25171 if (num < s_num)
25172 goto next_normal;
25173 if (!(r->idiag_states & (1 << sk->sk_state)))
25174@@ -821,6 +833,8 @@ next_normal:
25175 inet_twsk_for_each(tw, node,
25176 &head->twchain) {
25177
25178+ if (!nx_check(tw->tw_nid, VS_WATCH_P | VS_IDENT))
25179+ continue;
25180 if (num < s_num)
25181 goto next_dying;
25182 if (r->id.idiag_sport != tw->tw_sport &&
25183diff -NurpP --minimal linux-3.0.9/net/ipv4/inet_hashtables.c linux-3.0.9-vs2.3.2.1/net/ipv4/inet_hashtables.c
25184--- linux-3.0.9/net/ipv4/inet_hashtables.c 2011-11-15 16:40:47.000000000 +0100
25185+++ linux-3.0.9-vs2.3.2.1/net/ipv4/inet_hashtables.c 2011-08-29 03:45:56.000000000 +0200
25186@@ -22,6 +22,7 @@
25187 #include <net/inet_connection_sock.h>
25188 #include <net/inet_hashtables.h>
25189 #include <net/secure_seq.h>
25190+#include <net/route.h>
25191 #include <net/ip.h>
25192
25193 /*
25194@@ -156,6 +157,11 @@ static inline int compute_score(struct s
25195 if (rcv_saddr != daddr)
25196 return -1;
25197 score += 2;
25198+ } else {
25199+ /* block non nx_info ips */
25200+ if (!v4_addr_in_nx_info(sk->sk_nx_info,
25201+ daddr, NXA_MASK_BIND))
25202+ return -1;
25203 }
25204 if (sk->sk_bound_dev_if) {
25205 if (sk->sk_bound_dev_if != dif)
25206@@ -173,7 +179,6 @@ static inline int compute_score(struct s
25207 * wildcarded during the search since they can never be otherwise.
25208 */
25209
25210-
25211 struct sock *__inet_lookup_listener(struct net *net,
25212 struct inet_hashinfo *hashinfo,
25213 const __be32 daddr, const unsigned short hnum,
25214@@ -196,6 +201,7 @@ begin:
25215 hiscore = score;
25216 }
25217 }
25218+
25219 /*
25220 * if the nulls value we got at the end of this lookup is
25221 * not the expected one, we must restart lookup.
25222diff -NurpP --minimal linux-3.0.9/net/ipv4/netfilter/nf_nat_helper.c linux-3.0.9-vs2.3.2.1/net/ipv4/netfilter/nf_nat_helper.c
25223--- linux-3.0.9/net/ipv4/netfilter/nf_nat_helper.c 2011-07-22 11:18:13.000000000 +0200
25224+++ linux-3.0.9-vs2.3.2.1/net/ipv4/netfilter/nf_nat_helper.c 2011-06-15 02:40:14.000000000 +0200
25225@@ -20,6 +20,7 @@
25226 #include <net/route.h>
25227
25228 #include <linux/netfilter_ipv4.h>
25229+#include <net/route.h>
25230 #include <net/netfilter/nf_conntrack.h>
25231 #include <net/netfilter/nf_conntrack_helper.h>
25232 #include <net/netfilter/nf_conntrack_ecache.h>
25233diff -NurpP --minimal linux-3.0.9/net/ipv4/netfilter.c linux-3.0.9-vs2.3.2.1/net/ipv4/netfilter.c
25234--- linux-3.0.9/net/ipv4/netfilter.c 2011-11-15 16:40:47.000000000 +0100
25235+++ linux-3.0.9-vs2.3.2.1/net/ipv4/netfilter.c 2011-10-18 13:51:13.000000000 +0200
25236@@ -5,7 +5,7 @@
25237 #include <linux/ip.h>
25238 #include <linux/skbuff.h>
25239 #include <linux/gfp.h>
25240-#include <net/route.h>
25241+// #include <net/route.h>
25242 #include <net/xfrm.h>
25243 #include <net/ip.h>
25244 #include <net/netfilter/nf_queue.h>
25245diff -NurpP --minimal linux-3.0.9/net/ipv4/raw.c linux-3.0.9-vs2.3.2.1/net/ipv4/raw.c
25246--- linux-3.0.9/net/ipv4/raw.c 2011-07-22 11:18:13.000000000 +0200
25247+++ linux-3.0.9-vs2.3.2.1/net/ipv4/raw.c 2011-07-27 20:26:04.000000000 +0200
25248@@ -117,7 +117,7 @@ static struct sock *__raw_v4_lookup(stru
25249
25250 if (net_eq(sock_net(sk), net) && inet->inet_num == num &&
25251 !(inet->inet_daddr && inet->inet_daddr != raddr) &&
25252- !(inet->inet_rcv_saddr && inet->inet_rcv_saddr != laddr) &&
25253+ v4_sock_addr_match(sk->sk_nx_info, inet, laddr) &&
25254 !(sk->sk_bound_dev_if && sk->sk_bound_dev_if != dif))
25255 goto found; /* gotcha */
25256 }
25257@@ -385,6 +385,12 @@ static int raw_send_hdrinc(struct sock *
25258 icmp_out_count(net, ((struct icmphdr *)
25259 skb_transport_header(skb))->type);
25260
25261+ err = -EPERM;
25262+ if (!nx_check(0, VS_ADMIN) && !capable(CAP_NET_RAW) &&
25263+ sk->sk_nx_info &&
25264+ !v4_addr_in_nx_info(sk->sk_nx_info, iph->saddr, NXA_MASK_BIND))
25265+ goto error_free;
25266+
25267 err = NF_HOOK(NFPROTO_IPV4, NF_INET_LOCAL_OUT, skb, NULL,
25268 rt->dst.dev, dst_output);
25269 if (err > 0)
25270@@ -571,6 +577,16 @@ static int raw_sendmsg(struct kiocb *ioc
25271 goto done;
25272 }
25273
25274+ if (sk->sk_nx_info) {
25275+ rt = ip_v4_find_src(sock_net(sk), sk->sk_nx_info, &fl4);
25276+ if (IS_ERR(rt)) {
25277+ err = PTR_ERR(rt);
25278+ rt = NULL;
25279+ goto done;
25280+ }
25281+ ip_rt_put(rt);
25282+ }
25283+
25284 security_sk_classify_flow(sk, flowi4_to_flowi(&fl4));
25285 rt = ip_route_output_flow(sock_net(sk), &fl4, sk);
25286 if (IS_ERR(rt)) {
25287@@ -647,17 +663,19 @@ static int raw_bind(struct sock *sk, str
25288 {
25289 struct inet_sock *inet = inet_sk(sk);
25290 struct sockaddr_in *addr = (struct sockaddr_in *) uaddr;
25291+ struct nx_v4_sock_addr nsa = { 0 };
25292 int ret = -EINVAL;
25293 int chk_addr_ret;
25294
25295 if (sk->sk_state != TCP_CLOSE || addr_len < sizeof(struct sockaddr_in))
25296 goto out;
25297- chk_addr_ret = inet_addr_type(sock_net(sk), addr->sin_addr.s_addr);
25298+ v4_map_sock_addr(inet, addr, &nsa);
25299+ chk_addr_ret = inet_addr_type(sock_net(sk), nsa.saddr);
25300 ret = -EADDRNOTAVAIL;
25301- if (addr->sin_addr.s_addr && chk_addr_ret != RTN_LOCAL &&
25302+ if (nsa.saddr && chk_addr_ret != RTN_LOCAL &&
25303 chk_addr_ret != RTN_MULTICAST && chk_addr_ret != RTN_BROADCAST)
25304 goto out;
25305- inet->inet_rcv_saddr = inet->inet_saddr = addr->sin_addr.s_addr;
25306+ v4_set_sock_addr(inet, &nsa);
25307 if (chk_addr_ret == RTN_MULTICAST || chk_addr_ret == RTN_BROADCAST)
25308 inet->inet_saddr = 0; /* Use device */
25309 sk_dst_reset(sk);
25310@@ -709,7 +727,8 @@ static int raw_recvmsg(struct kiocb *ioc
25311 /* Copy the address. */
25312 if (sin) {
25313 sin->sin_family = AF_INET;
25314- sin->sin_addr.s_addr = ip_hdr(skb)->saddr;
25315+ sin->sin_addr.s_addr =
25316+ nx_map_sock_lback(sk->sk_nx_info, ip_hdr(skb)->saddr);
25317 sin->sin_port = 0;
25318 memset(&sin->sin_zero, 0, sizeof(sin->sin_zero));
25319 }
25320@@ -905,7 +924,8 @@ static struct sock *raw_get_first(struct
25321 struct hlist_node *node;
25322
25323 sk_for_each(sk, node, &state->h->ht[state->bucket])
25324- if (sock_net(sk) == seq_file_net(seq))
25325+ if ((sock_net(sk) == seq_file_net(seq)) &&
25326+ nx_check(sk->sk_nid, VS_WATCH_P | VS_IDENT))
25327 goto found;
25328 }
25329 sk = NULL;
25330@@ -921,7 +941,8 @@ static struct sock *raw_get_next(struct
25331 sk = sk_next(sk);
25332 try_again:
25333 ;
25334- } while (sk && sock_net(sk) != seq_file_net(seq));
25335+ } while (sk && ((sock_net(sk) != seq_file_net(seq)) ||
25336+ !nx_check(sk->sk_nid, VS_WATCH_P | VS_IDENT)));
25337
25338 if (!sk && ++state->bucket < RAW_HTABLE_SIZE) {
25339 sk = sk_head(&state->h->ht[state->bucket]);
25340diff -NurpP --minimal linux-3.0.9/net/ipv4/route.c linux-3.0.9-vs2.3.2.1/net/ipv4/route.c
25341--- linux-3.0.9/net/ipv4/route.c 2011-11-15 16:40:47.000000000 +0100
25342+++ linux-3.0.9-vs2.3.2.1/net/ipv4/route.c 2011-10-18 13:51:13.000000000 +0200
25343@@ -2523,7 +2523,7 @@ static struct rtable *ip_route_output_sl
25344
25345
25346 if (fl4->flowi4_oif) {
25347- dev_out = dev_get_by_index_rcu(net, fl4->flowi4_oif);
25348+ dev_out = dev_get_by_index_real_rcu(net, fl4->flowi4_oif);
25349 rth = ERR_PTR(-ENODEV);
25350 if (dev_out == NULL)
25351 goto out;
25352diff -NurpP --minimal linux-3.0.9/net/ipv4/tcp.c linux-3.0.9-vs2.3.2.1/net/ipv4/tcp.c
25353--- linux-3.0.9/net/ipv4/tcp.c 2011-07-22 11:18:13.000000000 +0200
25354+++ linux-3.0.9-vs2.3.2.1/net/ipv4/tcp.c 2011-07-19 00:44:39.000000000 +0200
25355@@ -266,6 +266,7 @@
25356 #include <linux/crypto.h>
25357 #include <linux/time.h>
25358 #include <linux/slab.h>
25359+#include <linux/in.h>
25360
25361 #include <net/icmp.h>
25362 #include <net/tcp.h>
25363diff -NurpP --minimal linux-3.0.9/net/ipv4/tcp_ipv4.c linux-3.0.9-vs2.3.2.1/net/ipv4/tcp_ipv4.c
25364--- linux-3.0.9/net/ipv4/tcp_ipv4.c 2011-11-15 16:40:47.000000000 +0100
25365+++ linux-3.0.9-vs2.3.2.1/net/ipv4/tcp_ipv4.c 2011-11-15 17:37:07.000000000 +0100
25366@@ -2006,6 +2006,12 @@ static void *listening_get_next(struct s
25367 req = req->dl_next;
25368 while (1) {
25369 while (req) {
25370+ vxdprintk(VXD_CBIT(net, 6),
25371+ "sk,req: %p [#%d] (from %d)", req->sk,
25372+ (req->sk)?req->sk->sk_nid:0, nx_current_nid());
25373+ if (req->sk &&
25374+ !nx_check(req->sk->sk_nid, VS_WATCH_P | VS_IDENT))
25375+ continue;
25376 if (req->rsk_ops->family == st->family) {
25377 cur = req;
25378 goto out;
25379@@ -2030,6 +2036,10 @@ get_req:
25380 }
25381 get_sk:
25382 sk_nulls_for_each_from(sk, node) {
25383+ vxdprintk(VXD_CBIT(net, 6), "sk: %p [#%d] (from %d)",
25384+ sk, sk->sk_nid, nx_current_nid());
25385+ if (!nx_check(sk->sk_nid, VS_WATCH_P | VS_IDENT))
25386+ continue;
25387 if (!net_eq(sock_net(sk), net))
25388 continue;
25389 if (sk->sk_family == st->family) {
25390@@ -2106,6 +2116,11 @@ static void *established_get_first(struc
25391
25392 spin_lock_bh(lock);
25393 sk_nulls_for_each(sk, node, &tcp_hashinfo.ehash[st->bucket].chain) {
25394+ vxdprintk(VXD_CBIT(net, 6),
25395+ "sk,egf: %p [#%d] (from %d)",
25396+ sk, sk->sk_nid, nx_current_nid());
25397+ if (!nx_check(sk->sk_nid, VS_WATCH_P | VS_IDENT))
25398+ continue;
25399 if (sk->sk_family != st->family ||
25400 !net_eq(sock_net(sk), net)) {
25401 continue;
25402@@ -2116,6 +2131,11 @@ static void *established_get_first(struc
25403 st->state = TCP_SEQ_STATE_TIME_WAIT;
25404 inet_twsk_for_each(tw, node,
25405 &tcp_hashinfo.ehash[st->bucket].twchain) {
25406+ vxdprintk(VXD_CBIT(net, 6),
25407+ "tw: %p [#%d] (from %d)",
25408+ tw, tw->tw_nid, nx_current_nid());
25409+ if (!nx_check(tw->tw_nid, VS_WATCH_P | VS_IDENT))
25410+ continue;
25411 if (tw->tw_family != st->family ||
25412 !net_eq(twsk_net(tw), net)) {
25413 continue;
25414@@ -2145,7 +2165,9 @@ static void *established_get_next(struct
25415 tw = cur;
25416 tw = tw_next(tw);
25417 get_tw:
25418- while (tw && (tw->tw_family != st->family || !net_eq(twsk_net(tw), net))) {
25419+ while (tw && (tw->tw_family != st->family ||
25420+ !net_eq(twsk_net(tw), net) ||
25421+ !nx_check(tw->tw_nid, VS_WATCH_P | VS_IDENT))) {
25422 tw = tw_next(tw);
25423 }
25424 if (tw) {
25425@@ -2169,6 +2191,11 @@ get_tw:
25426 sk = sk_nulls_next(sk);
25427
25428 sk_nulls_for_each_from(sk, node) {
25429+ vxdprintk(VXD_CBIT(net, 6),
25430+ "sk,egn: %p [#%d] (from %d)",
25431+ sk, sk->sk_nid, nx_current_nid());
25432+ if (!nx_check(sk->sk_nid, VS_WATCH_P | VS_IDENT))
25433+ continue;
25434 if (sk->sk_family == st->family && net_eq(sock_net(sk), net))
25435 goto found;
25436 }
25437@@ -2378,9 +2405,9 @@ static void get_openreq4(struct sock *sk
25438 seq_printf(f, "%4d: %08X:%04X %08X:%04X"
25439 " %02X %08X:%08X %02X:%08lX %08X %5d %8d %u %d %pK%n",
25440 i,
25441- ireq->loc_addr,
25442+ nx_map_sock_lback(current_nx_info(), ireq->loc_addr),
25443 ntohs(inet_sk(sk)->inet_sport),
25444- ireq->rmt_addr,
25445+ nx_map_sock_lback(current_nx_info(), ireq->rmt_addr),
25446 ntohs(ireq->rmt_port),
25447 TCP_SYN_RECV,
25448 0, 0, /* could print option size, but that is af dependent. */
25449@@ -2402,8 +2429,8 @@ static void get_tcp4_sock(struct sock *s
25450 struct tcp_sock *tp = tcp_sk(sk);
25451 const struct inet_connection_sock *icsk = inet_csk(sk);
25452 struct inet_sock *inet = inet_sk(sk);
25453- __be32 dest = inet->inet_daddr;
25454- __be32 src = inet->inet_rcv_saddr;
25455+ __be32 dest = nx_map_sock_lback(current_nx_info(), inet->inet_daddr);
25456+ __be32 src = nx_map_sock_lback(current_nx_info(), inet->inet_rcv_saddr);
25457 __u16 destp = ntohs(inet->inet_dport);
25458 __u16 srcp = ntohs(inet->inet_sport);
25459 int rx_queue;
25460@@ -2460,8 +2487,8 @@ static void get_timewait4_sock(struct in
25461 if (ttd < 0)
25462 ttd = 0;
25463
25464- dest = tw->tw_daddr;
25465- src = tw->tw_rcv_saddr;
25466+ dest = nx_map_sock_lback(current_nx_info(), tw->tw_daddr);
25467+ src = nx_map_sock_lback(current_nx_info(), tw->tw_rcv_saddr);
25468 destp = ntohs(tw->tw_dport);
25469 srcp = ntohs(tw->tw_sport);
25470
25471diff -NurpP --minimal linux-3.0.9/net/ipv4/tcp_minisocks.c linux-3.0.9-vs2.3.2.1/net/ipv4/tcp_minisocks.c
25472--- linux-3.0.9/net/ipv4/tcp_minisocks.c 2011-03-15 18:07:45.000000000 +0100
25473+++ linux-3.0.9-vs2.3.2.1/net/ipv4/tcp_minisocks.c 2011-06-10 22:11:24.000000000 +0200
25474@@ -23,6 +23,9 @@
25475 #include <linux/slab.h>
25476 #include <linux/sysctl.h>
25477 #include <linux/workqueue.h>
25478+#include <linux/vs_limit.h>
25479+#include <linux/vs_socket.h>
25480+#include <linux/vs_context.h>
25481 #include <net/tcp.h>
25482 #include <net/inet_common.h>
25483 #include <net/xfrm.h>
25484@@ -335,6 +338,11 @@ void tcp_time_wait(struct sock *sk, int
25485 tcptw->tw_ts_recent = tp->rx_opt.ts_recent;
25486 tcptw->tw_ts_recent_stamp = tp->rx_opt.ts_recent_stamp;
25487
25488+ tw->tw_xid = sk->sk_xid;
25489+ tw->tw_vx_info = NULL;
25490+ tw->tw_nid = sk->sk_nid;
25491+ tw->tw_nx_info = NULL;
25492+
25493 #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
25494 if (tw->tw_family == PF_INET6) {
25495 struct ipv6_pinfo *np = inet6_sk(sk);
25496diff -NurpP --minimal linux-3.0.9/net/ipv4/udp.c linux-3.0.9-vs2.3.2.1/net/ipv4/udp.c
25497--- linux-3.0.9/net/ipv4/udp.c 2011-07-22 11:18:13.000000000 +0200
25498+++ linux-3.0.9-vs2.3.2.1/net/ipv4/udp.c 2011-07-27 20:26:14.000000000 +0200
25499@@ -296,14 +296,7 @@ fail:
25500 }
25501 EXPORT_SYMBOL(udp_lib_get_port);
25502
25503-static int ipv4_rcv_saddr_equal(const struct sock *sk1, const struct sock *sk2)
25504-{
25505- struct inet_sock *inet1 = inet_sk(sk1), *inet2 = inet_sk(sk2);
25506-
25507- return (!ipv6_only_sock(sk2) &&
25508- (!inet1->inet_rcv_saddr || !inet2->inet_rcv_saddr ||
25509- inet1->inet_rcv_saddr == inet2->inet_rcv_saddr));
25510-}
25511+extern int ipv4_rcv_saddr_equal(const struct sock *, const struct sock *);
25512
25513 static unsigned int udp4_portaddr_hash(struct net *net, __be32 saddr,
25514 unsigned int port)
25515@@ -338,6 +331,11 @@ static inline int compute_score(struct s
25516 if (inet->inet_rcv_saddr != daddr)
25517 return -1;
25518 score += 2;
25519+ } else {
25520+ /* block non nx_info ips */
25521+ if (!v4_addr_in_nx_info(sk->sk_nx_info,
25522+ daddr, NXA_MASK_BIND))
25523+ return -1;
25524 }
25525 if (inet->inet_daddr) {
25526 if (inet->inet_daddr != saddr)
25527@@ -441,6 +439,7 @@ exact_match:
25528 return result;
25529 }
25530
25531+
25532 /* UDP is nearly always wildcards out the wazoo, it makes no sense to try
25533 * harder than this. -DaveM
25534 */
25535@@ -486,6 +485,11 @@ begin:
25536 sk_nulls_for_each_rcu(sk, node, &hslot->head) {
25537 score = compute_score(sk, net, saddr, hnum, sport,
25538 daddr, dport, dif);
25539+ /* FIXME: disabled?
25540+ if (score == 9) {
25541+ result = sk;
25542+ break;
25543+ } else */
25544 if (score > badness) {
25545 result = sk;
25546 badness = score;
25547@@ -499,6 +503,7 @@ begin:
25548 if (get_nulls_value(node) != slot)
25549 goto begin;
25550
25551+
25552 if (result) {
25553 if (unlikely(!atomic_inc_not_zero_hint(&result->sk_refcnt, 2)))
25554 result = NULL;
25555@@ -508,6 +513,7 @@ begin:
25556 goto begin;
25557 }
25558 }
25559+
25560 rcu_read_unlock();
25561 return result;
25562 }
25563@@ -550,8 +556,7 @@ static inline struct sock *udp_v4_mcast_
25564 udp_sk(s)->udp_port_hash != hnum ||
25565 (inet->inet_daddr && inet->inet_daddr != rmt_addr) ||
25566 (inet->inet_dport != rmt_port && inet->inet_dport) ||
25567- (inet->inet_rcv_saddr &&
25568- inet->inet_rcv_saddr != loc_addr) ||
25569+ !v4_sock_addr_match(sk->sk_nx_info, inet, loc_addr) ||
25570 ipv6_only_sock(s) ||
25571 (s->sk_bound_dev_if && s->sk_bound_dev_if != dif))
25572 continue;
25573@@ -929,6 +934,16 @@ int udp_sendmsg(struct kiocb *iocb, stru
25574 inet_sk_flowi_flags(sk)|FLOWI_FLAG_CAN_SLEEP,
25575 faddr, saddr, dport, inet->inet_sport);
25576
25577+ if (sk->sk_nx_info) {
25578+ rt = ip_v4_find_src(net, sk->sk_nx_info, fl4);
25579+ if (IS_ERR(rt)) {
25580+ err = PTR_ERR(rt);
25581+ rt = NULL;
25582+ goto out;
25583+ }
25584+ ip_rt_put(rt);
25585+ }
25586+
25587 security_sk_classify_flow(sk, flowi4_to_flowi(fl4));
25588 rt = ip_route_output_flow(net, fl4, sk);
25589 if (IS_ERR(rt)) {
25590@@ -1226,7 +1241,8 @@ try_again:
25591 if (sin) {
25592 sin->sin_family = AF_INET;
25593 sin->sin_port = udp_hdr(skb)->source;
25594- sin->sin_addr.s_addr = ip_hdr(skb)->saddr;
25595+ sin->sin_addr.s_addr = nx_map_sock_lback(
25596+ skb->sk->sk_nx_info, ip_hdr(skb)->saddr);
25597 memset(sin->sin_zero, 0, sizeof(sin->sin_zero));
25598 }
25599 if (inet->cmsg_flags)
25600@@ -1970,6 +1986,8 @@ static struct sock *udp_get_first(struct
25601 sk_nulls_for_each(sk, node, &hslot->head) {
25602 if (!net_eq(sock_net(sk), net))
25603 continue;
25604+ if (!nx_check(sk->sk_nid, VS_WATCH_P | VS_IDENT))
25605+ continue;
25606 if (sk->sk_family == state->family)
25607 goto found;
25608 }
25609@@ -1987,7 +2005,9 @@ static struct sock *udp_get_next(struct
25610
25611 do {
25612 sk = sk_nulls_next(sk);
25613- } while (sk && (!net_eq(sock_net(sk), net) || sk->sk_family != state->family));
25614+ } while (sk && (!net_eq(sock_net(sk), net) ||
25615+ sk->sk_family != state->family ||
25616+ !nx_check(sk->sk_nid, VS_WATCH_P | VS_IDENT)));
25617
25618 if (!sk) {
25619 if (state->bucket <= state->udp_table->mask)
25620diff -NurpP --minimal linux-3.0.9/net/ipv6/Kconfig linux-3.0.9-vs2.3.2.1/net/ipv6/Kconfig
25621--- linux-3.0.9/net/ipv6/Kconfig 2010-08-02 16:52:59.000000000 +0200
25622+++ linux-3.0.9-vs2.3.2.1/net/ipv6/Kconfig 2011-06-10 22:11:24.000000000 +0200
25623@@ -4,8 +4,8 @@
25624
25625 # IPv6 as module will cause a CRASH if you try to unload it
25626 menuconfig IPV6
25627- tristate "The IPv6 protocol"
25628- default m
25629+ bool "The IPv6 protocol"
25630+ default n
25631 ---help---
25632 This is complemental support for the IP version 6.
25633 You will still be able to do traditional IPv4 networking as well.
25634diff -NurpP --minimal linux-3.0.9/net/ipv6/addrconf.c linux-3.0.9-vs2.3.2.1/net/ipv6/addrconf.c
25635--- linux-3.0.9/net/ipv6/addrconf.c 2011-07-22 11:18:13.000000000 +0200
25636+++ linux-3.0.9-vs2.3.2.1/net/ipv6/addrconf.c 2011-06-10 22:11:24.000000000 +0200
25637@@ -87,6 +87,8 @@
25638
25639 #include <linux/proc_fs.h>
25640 #include <linux/seq_file.h>
25641+#include <linux/vs_network.h>
25642+#include <linux/vs_inet6.h>
25643
25644 /* Set to 3 to get tracing... */
25645 #define ACONF_DEBUG 2
25646@@ -1108,7 +1110,7 @@ out:
25647
25648 int ipv6_dev_get_saddr(struct net *net, struct net_device *dst_dev,
25649 const struct in6_addr *daddr, unsigned int prefs,
25650- struct in6_addr *saddr)
25651+ struct in6_addr *saddr, struct nx_info *nxi)
25652 {
25653 struct ipv6_saddr_score scores[2],
25654 *score = &scores[0], *hiscore = &scores[1];
25655@@ -1180,6 +1182,8 @@ int ipv6_dev_get_saddr(struct net *net,
25656 dev->name);
25657 continue;
25658 }
25659+ if (!v6_addr_in_nx_info(nxi, &score->ifa->addr, -1))
25660+ continue;
25661
25662 score->rule = -1;
25663 bitmap_zero(score->scorebits, IPV6_SADDR_RULE_MAX);
25664@@ -3048,7 +3052,10 @@ static void if6_seq_stop(struct seq_file
25665 static int if6_seq_show(struct seq_file *seq, void *v)
25666 {
25667 struct inet6_ifaddr *ifp = (struct inet6_ifaddr *)v;
25668- seq_printf(seq, "%pi6 %02x %02x %02x %02x %8s\n",
25669+
25670+ if (nx_check(0, VS_ADMIN|VS_WATCH) ||
25671+ v6_addr_in_nx_info(current_nx_info(), &ifp->addr, -1))
25672+ seq_printf(seq, "%pi6 %02x %02x %02x %02x %8s\n",
25673 &ifp->addr,
25674 ifp->idev->dev->ifindex,
25675 ifp->prefix_len,
25676@@ -3554,6 +3561,11 @@ static int in6_dump_addrs(struct inet6_d
25677 struct ifacaddr6 *ifaca;
25678 int err = 1;
25679 int ip_idx = *p_ip_idx;
25680+ struct nx_info *nxi = skb->sk ? skb->sk->sk_nx_info : NULL;
25681+
25682+ /* disable ipv6 on non v6 guests */
25683+ if (nxi && !nx_info_has_v6(nxi))
25684+ return skb->len;
25685
25686 read_lock_bh(&idev->lock);
25687 switch (type) {
25688@@ -3564,6 +3576,8 @@ static int in6_dump_addrs(struct inet6_d
25689 list_for_each_entry(ifa, &idev->addr_list, if_list) {
25690 if (++ip_idx < s_ip_idx)
25691 continue;
25692+ if (!v6_addr_in_nx_info(nxi, &ifa->addr, -1))
25693+ continue;
25694 err = inet6_fill_ifaddr(skb, ifa,
25695 NETLINK_CB(cb->skb).pid,
25696 cb->nlh->nlmsg_seq,
25697@@ -3580,6 +3594,8 @@ static int in6_dump_addrs(struct inet6_d
25698 ifmca = ifmca->next, ip_idx++) {
25699 if (ip_idx < s_ip_idx)
25700 continue;
25701+ if (!v6_addr_in_nx_info(nxi, &ifmca->mca_addr, -1))
25702+ continue;
25703 err = inet6_fill_ifmcaddr(skb, ifmca,
25704 NETLINK_CB(cb->skb).pid,
25705 cb->nlh->nlmsg_seq,
25706@@ -3595,6 +3611,8 @@ static int in6_dump_addrs(struct inet6_d
25707 ifaca = ifaca->aca_next, ip_idx++) {
25708 if (ip_idx < s_ip_idx)
25709 continue;
25710+ if (!v6_addr_in_nx_info(nxi, &ifaca->aca_addr, -1))
25711+ continue;
25712 err = inet6_fill_ifacaddr(skb, ifaca,
25713 NETLINK_CB(cb->skb).pid,
25714 cb->nlh->nlmsg_seq,
25715@@ -3980,6 +3998,11 @@ static int inet6_dump_ifinfo(struct sk_b
25716 struct inet6_dev *idev;
25717 struct hlist_head *head;
25718 struct hlist_node *node;
25719+ struct nx_info *nxi = skb->sk ? skb->sk->sk_nx_info : NULL;
25720+
25721+ /* FIXME: maybe disable ipv6 on non v6 guests?
25722+ if (skb->sk && skb->sk->sk_vx_info)
25723+ return skb->len; */
25724
25725 s_h = cb->args[0];
25726 s_idx = cb->args[1];
25727@@ -3991,6 +4014,8 @@ static int inet6_dump_ifinfo(struct sk_b
25728 hlist_for_each_entry_rcu(dev, node, head, index_hlist) {
25729 if (idx < s_idx)
25730 goto cont;
25731+ if (!v6_dev_in_nx_info(dev, nxi))
25732+ goto cont;
25733 idev = __in6_dev_get(dev);
25734 if (!idev)
25735 goto cont;
25736diff -NurpP --minimal linux-3.0.9/net/ipv6/af_inet6.c linux-3.0.9-vs2.3.2.1/net/ipv6/af_inet6.c
25737--- linux-3.0.9/net/ipv6/af_inet6.c 2011-11-15 16:40:47.000000000 +0100
25738+++ linux-3.0.9-vs2.3.2.1/net/ipv6/af_inet6.c 2011-08-29 03:45:10.000000000 +0200
25739@@ -42,6 +42,8 @@
25740 #include <linux/netdevice.h>
25741 #include <linux/icmpv6.h>
25742 #include <linux/netfilter_ipv6.h>
25743+#include <linux/vs_inet.h>
25744+#include <linux/vs_inet6.h>
25745
25746 #include <net/ip.h>
25747 #include <net/ipv6.h>
25748@@ -160,9 +162,12 @@ lookup_protocol:
25749 }
25750
25751 err = -EPERM;
25752+ if ((protocol == IPPROTO_ICMPV6) &&
25753+ nx_capable(CAP_NET_RAW, NXC_RAW_ICMP))
25754+ goto override;
25755 if (sock->type == SOCK_RAW && !kern && !capable(CAP_NET_RAW))
25756 goto out_rcu_unlock;
25757-
25758+override:
25759 sock->ops = answer->ops;
25760 answer_prot = answer->prot;
25761 answer_no_check = answer->no_check;
25762@@ -261,6 +266,7 @@ int inet6_bind(struct socket *sock, stru
25763 struct inet_sock *inet = inet_sk(sk);
25764 struct ipv6_pinfo *np = inet6_sk(sk);
25765 struct net *net = sock_net(sk);
25766+ struct nx_v6_sock_addr nsa;
25767 __be32 v4addr = 0;
25768 unsigned short snum;
25769 int addr_type = 0;
25770@@ -276,6 +282,10 @@ int inet6_bind(struct socket *sock, stru
25771 if (addr->sin6_family != AF_INET6)
25772 return -EAFNOSUPPORT;
25773
25774+ err = v6_map_sock_addr(inet, addr, &nsa);
25775+ if (err)
25776+ return err;
25777+
25778 addr_type = ipv6_addr_type(&addr->sin6_addr);
25779 if ((addr_type & IPV6_ADDR_MULTICAST) && sock->type == SOCK_STREAM)
25780 return -EINVAL;
25781@@ -307,6 +317,7 @@ int inet6_bind(struct socket *sock, stru
25782 /* Reproduce AF_INET checks to make the bindings consistent */
25783 v4addr = addr->sin6_addr.s6_addr32[3];
25784 chk_addr_ret = inet_addr_type(net, v4addr);
25785+
25786 if (!sysctl_ip_nonlocal_bind &&
25787 !(inet->freebind || inet->transparent) &&
25788 v4addr != htonl(INADDR_ANY) &&
25789@@ -316,6 +327,10 @@ int inet6_bind(struct socket *sock, stru
25790 err = -EADDRNOTAVAIL;
25791 goto out;
25792 }
25793+ if (!v4_addr_in_nx_info(sk->sk_nx_info, v4addr, NXA_MASK_BIND)) {
25794+ err = -EADDRNOTAVAIL;
25795+ goto out;
25796+ }
25797 } else {
25798 if (addr_type != IPV6_ADDR_ANY) {
25799 struct net_device *dev = NULL;
25800@@ -342,6 +357,11 @@ int inet6_bind(struct socket *sock, stru
25801 }
25802 }
25803
25804+ if (!v6_addr_in_nx_info(sk->sk_nx_info, &addr->sin6_addr, -1)) {
25805+ err = -EADDRNOTAVAIL;
25806+ goto out;
25807+ }
25808+
25809 /* ipv4 addr of the socket is invalid. Only the
25810 * unspecified and mapped address have a v4 equivalent.
25811 */
25812@@ -358,6 +378,9 @@ int inet6_bind(struct socket *sock, stru
25813 }
25814 }
25815
25816+ /* what's that for? */
25817+ v6_set_sock_addr(inet, &nsa);
25818+
25819 inet->inet_rcv_saddr = v4addr;
25820 inet->inet_saddr = v4addr;
25821
25822@@ -459,9 +482,11 @@ int inet6_getname(struct socket *sock, s
25823 return -ENOTCONN;
25824 sin->sin6_port = inet->inet_dport;
25825 ipv6_addr_copy(&sin->sin6_addr, &np->daddr);
25826+ /* FIXME: remap lback? */
25827 if (np->sndflow)
25828 sin->sin6_flowinfo = np->flow_label;
25829 } else {
25830+ /* FIXME: remap lback? */
25831 if (ipv6_addr_any(&np->rcv_saddr))
25832 ipv6_addr_copy(&sin->sin6_addr, &np->saddr);
25833 else
25834diff -NurpP --minimal linux-3.0.9/net/ipv6/datagram.c linux-3.0.9-vs2.3.2.1/net/ipv6/datagram.c
25835--- linux-3.0.9/net/ipv6/datagram.c 2011-05-22 16:18:00.000000000 +0200
25836+++ linux-3.0.9-vs2.3.2.1/net/ipv6/datagram.c 2011-09-17 19:24:15.000000000 +0200
25837@@ -639,7 +639,7 @@ int datagram_send_ctl(struct net *net,
25838
25839 rcu_read_lock();
25840 if (fl6->flowi6_oif) {
25841- dev = dev_get_by_index_rcu(net, fl6->flowi6_oif);
25842+ dev = dev_get_by_index_real_rcu(net, fl6->flowi6_oif);
25843 if (!dev) {
25844 rcu_read_unlock();
25845 return -ENODEV;
25846diff -NurpP --minimal linux-3.0.9/net/ipv6/fib6_rules.c linux-3.0.9-vs2.3.2.1/net/ipv6/fib6_rules.c
25847--- linux-3.0.9/net/ipv6/fib6_rules.c 2011-05-22 16:18:00.000000000 +0200
25848+++ linux-3.0.9-vs2.3.2.1/net/ipv6/fib6_rules.c 2011-06-10 22:11:24.000000000 +0200
25849@@ -90,7 +90,7 @@ static int fib6_rule_action(struct fib_r
25850 ip6_dst_idev(&rt->dst)->dev,
25851 &flp6->daddr,
25852 rt6_flags2srcprefs(flags),
25853- &saddr))
25854+ &saddr, NULL))
25855 goto again;
25856 if (!ipv6_prefix_equal(&saddr, &r->src.addr,
25857 r->src.plen))
25858diff -NurpP --minimal linux-3.0.9/net/ipv6/inet6_hashtables.c linux-3.0.9-vs2.3.2.1/net/ipv6/inet6_hashtables.c
25859--- linux-3.0.9/net/ipv6/inet6_hashtables.c 2011-11-15 16:40:47.000000000 +0100
25860+++ linux-3.0.9-vs2.3.2.1/net/ipv6/inet6_hashtables.c 2011-08-29 03:45:10.000000000 +0200
25861@@ -16,6 +16,7 @@
25862
25863 #include <linux/module.h>
25864 #include <linux/random.h>
25865+#include <linux/vs_inet6.h>
25866
25867 #include <net/inet_connection_sock.h>
25868 #include <net/inet_hashtables.h>
25869@@ -83,7 +84,6 @@ struct sock *__inet6_lookup_established(
25870 unsigned int slot = hash & hashinfo->ehash_mask;
25871 struct inet_ehash_bucket *head = &hashinfo->ehash[slot];
25872
25873-
25874 rcu_read_lock();
25875 begin:
25876 sk_nulls_for_each_rcu(sk, node, &head->chain) {
25877@@ -95,7 +95,7 @@ begin:
25878 sock_put(sk);
25879 goto begin;
25880 }
25881- goto out;
25882+ goto out;
25883 }
25884 }
25885 if (get_nulls_value(node) != slot)
25886@@ -141,6 +141,9 @@ static inline int compute_score(struct s
25887 if (!ipv6_addr_equal(&np->rcv_saddr, daddr))
25888 return -1;
25889 score++;
25890+ } else {
25891+ if (!v6_addr_in_nx_info(sk->sk_nx_info, daddr, -1))
25892+ return -1;
25893 }
25894 if (sk->sk_bound_dev_if) {
25895 if (sk->sk_bound_dev_if != dif)
25896diff -NurpP --minimal linux-3.0.9/net/ipv6/ip6_output.c linux-3.0.9-vs2.3.2.1/net/ipv6/ip6_output.c
25897--- linux-3.0.9/net/ipv6/ip6_output.c 2011-11-15 16:40:47.000000000 +0100
25898+++ linux-3.0.9-vs2.3.2.1/net/ipv6/ip6_output.c 2011-10-18 13:51:13.000000000 +0200
25899@@ -962,7 +962,8 @@ static int ip6_dst_lookup_tail(struct so
25900 struct rt6_info *rt = (struct rt6_info *) *dst;
25901 err = ip6_route_get_saddr(net, rt, &fl6->daddr,
25902 sk ? inet6_sk(sk)->srcprefs : 0,
25903- &fl6->saddr);
25904+ &fl6->saddr,
25905+ sk ? sk->sk_nx_info : NULL);
25906 if (err)
25907 goto out_err_release;
25908 }
25909diff -NurpP --minimal linux-3.0.9/net/ipv6/ndisc.c linux-3.0.9-vs2.3.2.1/net/ipv6/ndisc.c
25910--- linux-3.0.9/net/ipv6/ndisc.c 2011-07-22 11:18:13.000000000 +0200
25911+++ linux-3.0.9-vs2.3.2.1/net/ipv6/ndisc.c 2011-06-10 22:11:24.000000000 +0200
25912@@ -597,7 +597,7 @@ static void ndisc_send_na(struct net_dev
25913 } else {
25914 if (ipv6_dev_get_saddr(dev_net(dev), dev, daddr,
25915 inet6_sk(dev_net(dev)->ipv6.ndisc_sk)->srcprefs,
25916- &tmpaddr))
25917+ &tmpaddr, NULL))
25918 return;
25919 src_addr = &tmpaddr;
25920 }
25921diff -NurpP --minimal linux-3.0.9/net/ipv6/raw.c linux-3.0.9-vs2.3.2.1/net/ipv6/raw.c
25922--- linux-3.0.9/net/ipv6/raw.c 2011-07-22 11:18:13.000000000 +0200
25923+++ linux-3.0.9-vs2.3.2.1/net/ipv6/raw.c 2011-06-10 22:11:24.000000000 +0200
25924@@ -30,6 +30,7 @@
25925 #include <linux/icmpv6.h>
25926 #include <linux/netfilter.h>
25927 #include <linux/netfilter_ipv6.h>
25928+#include <linux/vs_inet6.h>
25929 #include <linux/skbuff.h>
25930 #include <linux/compat.h>
25931 #include <asm/uaccess.h>
25932@@ -284,6 +285,13 @@ static int rawv6_bind(struct sock *sk, s
25933 goto out_unlock;
25934 }
25935
25936+ if (!v6_addr_in_nx_info(sk->sk_nx_info, &addr->sin6_addr, -1)) {
25937+ err = -EADDRNOTAVAIL;
25938+ if (dev)
25939+ dev_put(dev);
25940+ goto out;
25941+ }
25942+
25943 /* ipv4 addr of the socket is invalid. Only the
25944 * unspecified and mapped address have a v4 equivalent.
25945 */
25946diff -NurpP --minimal linux-3.0.9/net/ipv6/route.c linux-3.0.9-vs2.3.2.1/net/ipv6/route.c
25947--- linux-3.0.9/net/ipv6/route.c 2011-07-22 11:18:13.000000000 +0200
25948+++ linux-3.0.9-vs2.3.2.1/net/ipv6/route.c 2011-08-29 05:05:08.000000000 +0200
25949@@ -54,6 +54,7 @@
25950 #include <net/xfrm.h>
25951 #include <net/netevent.h>
25952 #include <net/netlink.h>
25953+#include <linux/vs_inet6.h>
25954
25955 #include <asm/uaccess.h>
25956
25957@@ -2050,15 +2051,17 @@ int ip6_route_get_saddr(struct net *net,
25958 struct rt6_info *rt,
25959 const struct in6_addr *daddr,
25960 unsigned int prefs,
25961- struct in6_addr *saddr)
25962+ struct in6_addr *saddr,
25963+ struct nx_info *nxi)
25964 {
25965 struct inet6_dev *idev = ip6_dst_idev((struct dst_entry*)rt);
25966 int err = 0;
25967- if (rt->rt6i_prefsrc.plen)
25968+ if (rt->rt6i_prefsrc.plen && (!nxi ||
25969+ v6_addr_in_nx_info(nxi, &rt->rt6i_prefsrc.addr, NXA_TYPE_ADDR)))
25970 ipv6_addr_copy(saddr, &rt->rt6i_prefsrc.addr);
25971 else
25972 err = ipv6_dev_get_saddr(net, idev ? idev->dev : NULL,
25973- daddr, prefs, saddr);
25974+ daddr, prefs, saddr, nxi);
25975 return err;
25976 }
25977
25978@@ -2387,7 +2390,8 @@ static int rt6_fill_node(struct net *net
25979 NLA_PUT_U32(skb, RTA_IIF, iif);
25980 } else if (dst) {
25981 struct in6_addr saddr_buf;
25982- if (ip6_route_get_saddr(net, rt, dst, 0, &saddr_buf) == 0)
25983+ if (ip6_route_get_saddr(net, rt, dst, 0, &saddr_buf,
25984+ (skb->sk ? skb->sk->sk_nx_info : NULL)) == 0)
25985 NLA_PUT(skb, RTA_PREFSRC, 16, &saddr_buf);
25986 }
25987
25988@@ -2586,6 +2590,7 @@ static int rt6_info_route(struct rt6_inf
25989 {
25990 struct seq_file *m = p_arg;
25991
25992+ /* FIXME: check for network context? */
25993 seq_printf(m, "%pi6 %02x ", &rt->rt6i_dst.addr, rt->rt6i_dst.plen);
25994
25995 #ifdef CONFIG_IPV6_SUBTREES
25996diff -NurpP --minimal linux-3.0.9/net/ipv6/tcp_ipv6.c linux-3.0.9-vs2.3.2.1/net/ipv6/tcp_ipv6.c
25997--- linux-3.0.9/net/ipv6/tcp_ipv6.c 2011-11-15 16:40:47.000000000 +0100
25998+++ linux-3.0.9-vs2.3.2.1/net/ipv6/tcp_ipv6.c 2011-11-15 17:37:07.000000000 +0100
25999@@ -70,6 +70,7 @@
26000
26001 #include <linux/crypto.h>
26002 #include <linux/scatterlist.h>
26003+#include <linux/vs_inet6.h>
26004
26005 static void tcp_v6_send_reset(struct sock *sk, struct sk_buff *skb);
26006 static void tcp_v6_reqsk_send_ack(struct sock *sk, struct sk_buff *skb,
26007@@ -162,8 +163,15 @@ static int tcp_v6_connect(struct sock *s
26008 * connect() to INADDR_ANY means loopback (BSD'ism).
26009 */
26010
26011- if(ipv6_addr_any(&usin->sin6_addr))
26012- usin->sin6_addr.s6_addr[15] = 0x1;
26013+ if(ipv6_addr_any(&usin->sin6_addr)) {
26014+ struct nx_info *nxi = sk->sk_nx_info;
26015+
26016+ if (nxi && nx_info_has_v6(nxi))
26017+ /* FIXME: remap lback? */
26018+ usin->sin6_addr = nxi->v6.ip;
26019+ else
26020+ usin->sin6_addr.s6_addr[15] = 0x1;
26021+ }
26022
26023 addr_type = ipv6_addr_type(&usin->sin6_addr);
26024
26025diff -NurpP --minimal linux-3.0.9/net/ipv6/udp.c linux-3.0.9-vs2.3.2.1/net/ipv6/udp.c
26026--- linux-3.0.9/net/ipv6/udp.c 2011-11-15 16:40:47.000000000 +0100
26027+++ linux-3.0.9-vs2.3.2.1/net/ipv6/udp.c 2011-10-18 13:51:13.000000000 +0200
26028@@ -45,41 +45,67 @@
26029 #include <net/tcp_states.h>
26030 #include <net/ip6_checksum.h>
26031 #include <net/xfrm.h>
26032+#include <linux/vs_inet6.h>
26033
26034 #include <linux/proc_fs.h>
26035 #include <linux/seq_file.h>
26036 #include "udp_impl.h"
26037
26038-int ipv6_rcv_saddr_equal(const struct sock *sk, const struct sock *sk2)
26039+int ipv6_rcv_saddr_equal(const struct sock *sk1, const struct sock *sk2)
26040 {
26041- const struct in6_addr *sk_rcv_saddr6 = &inet6_sk(sk)->rcv_saddr;
26042+ const struct in6_addr *sk1_rcv_saddr6 = &inet6_sk(sk1)->rcv_saddr;
26043 const struct in6_addr *sk2_rcv_saddr6 = inet6_rcv_saddr(sk2);
26044- __be32 sk1_rcv_saddr = sk_rcv_saddr(sk);
26045+ __be32 sk1_rcv_saddr = sk_rcv_saddr(sk1);
26046 __be32 sk2_rcv_saddr = sk_rcv_saddr(sk2);
26047- int sk_ipv6only = ipv6_only_sock(sk);
26048+ int sk1_ipv6only = ipv6_only_sock(sk1);
26049 int sk2_ipv6only = inet_v6_ipv6only(sk2);
26050- int addr_type = ipv6_addr_type(sk_rcv_saddr6);
26051+ int addr_type = ipv6_addr_type(sk1_rcv_saddr6);
26052 int addr_type2 = sk2_rcv_saddr6 ? ipv6_addr_type(sk2_rcv_saddr6) : IPV6_ADDR_MAPPED;
26053
26054 /* if both are mapped, treat as IPv4 */
26055- if (addr_type == IPV6_ADDR_MAPPED && addr_type2 == IPV6_ADDR_MAPPED)
26056- return (!sk2_ipv6only &&
26057+ if (addr_type == IPV6_ADDR_MAPPED && addr_type2 == IPV6_ADDR_MAPPED) {
26058+ if (!sk2_ipv6only &&
26059 (!sk1_rcv_saddr || !sk2_rcv_saddr ||
26060- sk1_rcv_saddr == sk2_rcv_saddr));
26061+ sk1_rcv_saddr == sk2_rcv_saddr))
26062+ goto vs_v4;
26063+ else
26064+ return 0;
26065+ }
26066
26067 if (addr_type2 == IPV6_ADDR_ANY &&
26068 !(sk2_ipv6only && addr_type == IPV6_ADDR_MAPPED))
26069- return 1;
26070+ goto vs;
26071
26072 if (addr_type == IPV6_ADDR_ANY &&
26073- !(sk_ipv6only && addr_type2 == IPV6_ADDR_MAPPED))
26074- return 1;
26075+ !(sk1_ipv6only && addr_type2 == IPV6_ADDR_MAPPED))
26076+ goto vs;
26077
26078 if (sk2_rcv_saddr6 &&
26079- ipv6_addr_equal(sk_rcv_saddr6, sk2_rcv_saddr6))
26080- return 1;
26081+ ipv6_addr_equal(sk1_rcv_saddr6, sk2_rcv_saddr6))
26082+ goto vs;
26083
26084 return 0;
26085+
26086+vs_v4:
26087+ if (!sk1_rcv_saddr && !sk2_rcv_saddr)
26088+ return nx_v4_addr_conflict(sk1->sk_nx_info, sk2->sk_nx_info);
26089+ if (!sk2_rcv_saddr)
26090+ return v4_addr_in_nx_info(sk1->sk_nx_info, sk2_rcv_saddr, -1);
26091+ if (!sk1_rcv_saddr)
26092+ return v4_addr_in_nx_info(sk2->sk_nx_info, sk1_rcv_saddr, -1);
26093+ return 1;
26094+vs:
26095+ if (addr_type2 == IPV6_ADDR_ANY && addr_type == IPV6_ADDR_ANY)
26096+ return nx_v6_addr_conflict(sk1->sk_nx_info, sk2->sk_nx_info);
26097+ else if (addr_type2 == IPV6_ADDR_ANY)
26098+ return v6_addr_in_nx_info(sk2->sk_nx_info, sk1_rcv_saddr6, -1);
26099+ else if (addr_type == IPV6_ADDR_ANY) {
26100+ if (addr_type2 == IPV6_ADDR_MAPPED)
26101+ return nx_v4_addr_conflict(sk1->sk_nx_info, sk2->sk_nx_info);
26102+ else
26103+ return v6_addr_in_nx_info(sk1->sk_nx_info, sk2_rcv_saddr6, -1);
26104+ }
26105+ return 1;
26106 }
26107
26108 static unsigned int udp6_portaddr_hash(struct net *net,
26109@@ -143,6 +169,10 @@ static inline int compute_score(struct s
26110 if (!ipv6_addr_equal(&np->rcv_saddr, daddr))
26111 return -1;
26112 score++;
26113+ } else {
26114+ /* block non nx_info ips */
26115+ if (!v6_addr_in_nx_info(sk->sk_nx_info, daddr, -1))
26116+ return -1;
26117 }
26118 if (!ipv6_addr_any(&np->daddr)) {
26119 if (!ipv6_addr_equal(&np->daddr, saddr))
26120diff -NurpP --minimal linux-3.0.9/net/ipv6/xfrm6_policy.c linux-3.0.9-vs2.3.2.1/net/ipv6/xfrm6_policy.c
26121--- linux-3.0.9/net/ipv6/xfrm6_policy.c 2011-07-22 11:18:13.000000000 +0200
26122+++ linux-3.0.9-vs2.3.2.1/net/ipv6/xfrm6_policy.c 2011-06-10 22:11:24.000000000 +0200
26123@@ -63,7 +63,7 @@ static int xfrm6_get_saddr(struct net *n
26124 dev = ip6_dst_idev(dst)->dev;
26125 ipv6_dev_get_saddr(dev_net(dev), dev,
26126 (struct in6_addr *)&daddr->a6, 0,
26127- (struct in6_addr *)&saddr->a6);
26128+ (struct in6_addr *)&saddr->a6, NULL);
26129 dst_release(dst);
26130 return 0;
26131 }
26132diff -NurpP --minimal linux-3.0.9/net/netfilter/ipvs/ip_vs_xmit.c linux-3.0.9-vs2.3.2.1/net/netfilter/ipvs/ip_vs_xmit.c
26133--- linux-3.0.9/net/netfilter/ipvs/ip_vs_xmit.c 2011-07-22 11:18:13.000000000 +0200
26134+++ linux-3.0.9-vs2.3.2.1/net/netfilter/ipvs/ip_vs_xmit.c 2011-07-22 15:27:52.000000000 +0200
26135@@ -226,7 +226,7 @@ __ip_vs_route_output_v6(struct net *net,
26136 return dst;
26137 if (ipv6_addr_any(&fl6.saddr) &&
26138 ipv6_dev_get_saddr(net, ip6_dst_idev(dst)->dev,
26139- &fl6.daddr, 0, &fl6.saddr) < 0)
26140+ &fl6.daddr, 0, &fl6.saddr, NULL) < 0)
26141 goto out_err;
26142 if (do_xfrm) {
26143 dst = xfrm_lookup(net, dst, flowi6_to_flowi(&fl6), NULL, 0);
26144diff -NurpP --minimal linux-3.0.9/net/netlink/af_netlink.c linux-3.0.9-vs2.3.2.1/net/netlink/af_netlink.c
26145--- linux-3.0.9/net/netlink/af_netlink.c 2011-07-22 11:18:13.000000000 +0200
26146+++ linux-3.0.9-vs2.3.2.1/net/netlink/af_netlink.c 2011-06-10 22:11:24.000000000 +0200
26147@@ -55,6 +55,9 @@
26148 #include <linux/types.h>
26149 #include <linux/audit.h>
26150 #include <linux/mutex.h>
26151+#include <linux/vs_context.h>
26152+#include <linux/vs_network.h>
26153+#include <linux/vs_limit.h>
26154
26155 #include <net/net_namespace.h>
26156 #include <net/sock.h>
26157@@ -1907,6 +1910,8 @@ static struct sock *netlink_seq_socket_i
26158 sk_for_each(s, node, &hash->table[j]) {
26159 if (sock_net(s) != seq_file_net(seq))
26160 continue;
26161+ if (!nx_check(s->sk_nid, VS_WATCH_P | VS_IDENT))
26162+ continue;
26163 if (off == pos) {
26164 iter->link = i;
26165 iter->hash_idx = j;
26166@@ -1941,7 +1946,8 @@ static void *netlink_seq_next(struct seq
26167 s = v;
26168 do {
26169 s = sk_next(s);
26170- } while (s && sock_net(s) != seq_file_net(seq));
26171+ } while (s && (sock_net(s) != seq_file_net(seq) ||
26172+ !nx_check(s->sk_nid, VS_WATCH_P | VS_IDENT)));
26173 if (s)
26174 return s;
26175
26176@@ -1953,7 +1959,8 @@ static void *netlink_seq_next(struct seq
26177
26178 for (; j <= hash->mask; j++) {
26179 s = sk_head(&hash->table[j]);
26180- while (s && sock_net(s) != seq_file_net(seq))
26181+ while (s && (sock_net(s) != seq_file_net(seq) ||
26182+ !nx_check(s->sk_nid, VS_WATCH_P | VS_IDENT)))
26183 s = sk_next(s);
26184 if (s) {
26185 iter->link = i;
26186diff -NurpP --minimal linux-3.0.9/net/socket.c linux-3.0.9-vs2.3.2.1/net/socket.c
26187--- linux-3.0.9/net/socket.c 2011-11-15 16:40:48.000000000 +0100
26188+++ linux-3.0.9-vs2.3.2.1/net/socket.c 2011-10-18 13:51:13.000000000 +0200
26189@@ -98,6 +98,10 @@
26190
26191 #include <net/sock.h>
26192 #include <linux/netfilter.h>
26193+#include <linux/vs_base.h>
26194+#include <linux/vs_socket.h>
26195+#include <linux/vs_inet.h>
26196+#include <linux/vs_inet6.h>
26197
26198 #include <linux/if_tun.h>
26199 #include <linux/ipv6_route.h>
26200@@ -546,6 +550,7 @@ static inline int __sock_sendmsg_nosec(s
26201 struct msghdr *msg, size_t size)
26202 {
26203 struct sock_iocb *si = kiocb_to_siocb(iocb);
26204+ size_t len;
26205
26206 sock_update_classid(sock->sk);
26207
26208@@ -554,7 +559,22 @@ static inline int __sock_sendmsg_nosec(s
26209 si->msg = msg;
26210 si->size = size;
26211
26212- return sock->ops->sendmsg(iocb, sock, msg, size);
26213+ len = sock->ops->sendmsg(iocb, sock, msg, size);
26214+ if (sock->sk) {
26215+ if (len == size)
26216+ vx_sock_send(sock->sk, size);
26217+ else
26218+ vx_sock_fail(sock->sk, size);
26219+ }
26220+ vxdprintk(VXD_CBIT(net, 7),
26221+ "__sock_sendmsg: %p[%p,%p,%p;%d/%d]:%d/%zu",
26222+ sock, sock->sk,
26223+ (sock->sk)?sock->sk->sk_nx_info:0,
26224+ (sock->sk)?sock->sk->sk_vx_info:0,
26225+ (sock->sk)?sock->sk->sk_xid:0,
26226+ (sock->sk)?sock->sk->sk_nid:0,
26227+ (unsigned int)size, len);
26228+ return len;
26229 }
26230
26231 static inline int __sock_sendmsg(struct kiocb *iocb, struct socket *sock,
26232@@ -694,6 +714,7 @@ static inline int __sock_recvmsg_nosec(s
26233 struct msghdr *msg, size_t size, int flags)
26234 {
26235 struct sock_iocb *si = kiocb_to_siocb(iocb);
26236+ int len;
26237
26238 sock_update_classid(sock->sk);
26239
26240@@ -703,7 +724,18 @@ static inline int __sock_recvmsg_nosec(s
26241 si->size = size;
26242 si->flags = flags;
26243
26244- return sock->ops->recvmsg(iocb, sock, msg, size, flags);
26245+ len = sock->ops->recvmsg(iocb, sock, msg, size, flags);
26246+ if ((len >= 0) && sock->sk)
26247+ vx_sock_recv(sock->sk, len);
26248+ vxdprintk(VXD_CBIT(net, 7),
26249+ "__sock_recvmsg: %p[%p,%p,%p;%d/%d]:%d/%d",
26250+ sock, sock->sk,
26251+ (sock->sk)?sock->sk->sk_nx_info:0,
26252+ (sock->sk)?sock->sk->sk_vx_info:0,
26253+ (sock->sk)?sock->sk->sk_xid:0,
26254+ (sock->sk)?sock->sk->sk_nid:0,
26255+ (unsigned int)size, len);
26256+ return len;
26257 }
26258
26259 static inline int __sock_recvmsg(struct kiocb *iocb, struct socket *sock,
26260@@ -1188,6 +1220,13 @@ int __sock_create(struct net *net, int f
26261 if (type < 0 || type >= SOCK_MAX)
26262 return -EINVAL;
26263
26264+ if (!nx_check(0, VS_ADMIN)) {
26265+ if (family == PF_INET && !current_nx_info_has_v4())
26266+ return -EAFNOSUPPORT;
26267+ if (family == PF_INET6 && !current_nx_info_has_v6())
26268+ return -EAFNOSUPPORT;
26269+ }
26270+
26271 /* Compatibility.
26272
26273 This uglymoron is moved from INET layer to here to avoid
26274@@ -1323,6 +1362,7 @@ SYSCALL_DEFINE3(socket, int, family, int
26275 if (retval < 0)
26276 goto out;
26277
26278+ set_bit(SOCK_USER_SOCKET, &sock->flags);
26279 retval = sock_map_fd(sock, flags & (O_CLOEXEC | O_NONBLOCK));
26280 if (retval < 0)
26281 goto out_release;
26282@@ -1364,10 +1404,12 @@ SYSCALL_DEFINE4(socketpair, int, family,
26283 err = sock_create(family, type, protocol, &sock1);
26284 if (err < 0)
26285 goto out;
26286+ set_bit(SOCK_USER_SOCKET, &sock1->flags);
26287
26288 err = sock_create(family, type, protocol, &sock2);
26289 if (err < 0)
26290 goto out_release_1;
26291+ set_bit(SOCK_USER_SOCKET, &sock2->flags);
26292
26293 err = sock1->ops->socketpair(sock1, sock2);
26294 if (err < 0)
26295diff -NurpP --minimal linux-3.0.9/net/sunrpc/auth.c linux-3.0.9-vs2.3.2.1/net/sunrpc/auth.c
26296--- linux-3.0.9/net/sunrpc/auth.c 2011-07-22 11:18:13.000000000 +0200
26297+++ linux-3.0.9-vs2.3.2.1/net/sunrpc/auth.c 2011-06-10 22:11:24.000000000 +0200
26298@@ -14,6 +14,7 @@
26299 #include <linux/hash.h>
26300 #include <linux/sunrpc/clnt.h>
26301 #include <linux/spinlock.h>
26302+#include <linux/vs_tag.h>
26303
26304 #ifdef RPC_DEBUG
26305 # define RPCDBG_FACILITY RPCDBG_AUTH
26306@@ -427,6 +428,7 @@ rpcauth_lookupcred(struct rpc_auth *auth
26307 memset(&acred, 0, sizeof(acred));
26308 acred.uid = cred->fsuid;
26309 acred.gid = cred->fsgid;
26310+ acred.tag = dx_current_tag();
26311 acred.group_info = get_group_info(((struct cred *)cred)->group_info);
26312
26313 ret = auth->au_ops->lookup_cred(auth, &acred, flags);
26314@@ -467,6 +469,7 @@ rpcauth_bind_root_cred(struct rpc_task *
26315 struct auth_cred acred = {
26316 .uid = 0,
26317 .gid = 0,
26318+ .tag = dx_current_tag(),
26319 };
26320
26321 dprintk("RPC: %5u looking up %s cred\n",
26322diff -NurpP --minimal linux-3.0.9/net/sunrpc/auth_unix.c linux-3.0.9-vs2.3.2.1/net/sunrpc/auth_unix.c
26323--- linux-3.0.9/net/sunrpc/auth_unix.c 2011-11-15 16:40:48.000000000 +0100
26324+++ linux-3.0.9-vs2.3.2.1/net/sunrpc/auth_unix.c 2011-11-15 17:37:07.000000000 +0100
26325@@ -12,12 +12,14 @@
26326 #include <linux/module.h>
26327 #include <linux/sunrpc/clnt.h>
26328 #include <linux/sunrpc/auth.h>
26329+#include <linux/vs_tag.h>
26330
26331 #define NFS_NGROUPS 16
26332
26333 struct unx_cred {
26334 struct rpc_cred uc_base;
26335 gid_t uc_gid;
26336+ tag_t uc_tag;
26337 gid_t uc_gids[NFS_NGROUPS];
26338 };
26339 #define uc_uid uc_base.cr_uid
26340@@ -78,6 +80,7 @@ unx_create_cred(struct rpc_auth *auth, s
26341 groups = NFS_NGROUPS;
26342
26343 cred->uc_gid = acred->gid;
26344+ cred->uc_tag = acred->tag;
26345 for (i = 0; i < groups; i++)
26346 cred->uc_gids[i] = GROUP_AT(acred->group_info, i);
26347 if (i < NFS_NGROUPS)
26348@@ -119,7 +122,9 @@ unx_match(struct auth_cred *acred, struc
26349 unsigned int i;
26350
26351
26352- if (cred->uc_uid != acred->uid || cred->uc_gid != acred->gid)
26353+ if (cred->uc_uid != acred->uid ||
26354+ cred->uc_gid != acred->gid ||
26355+ cred->uc_tag != acred->tag)
26356 return 0;
26357
26358 if (acred->group_info != NULL)
26359@@ -145,7 +150,7 @@ unx_marshal(struct rpc_task *task, __be3
26360 struct rpc_clnt *clnt = task->tk_client;
26361 struct unx_cred *cred = container_of(task->tk_rqstp->rq_cred, struct unx_cred, uc_base);
26362 __be32 *base, *hold;
26363- int i;
26364+ int i, tag;
26365
26366 *p++ = htonl(RPC_AUTH_UNIX);
26367 base = p++;
26368@@ -155,9 +160,12 @@ unx_marshal(struct rpc_task *task, __be3
26369 * Copy the UTS nodename captured when the client was created.
26370 */
26371 p = xdr_encode_array(p, clnt->cl_nodename, clnt->cl_nodelen);
26372+ tag = task->tk_client->cl_tag;
26373
26374- *p++ = htonl((u32) cred->uc_uid);
26375- *p++ = htonl((u32) cred->uc_gid);
26376+ *p++ = htonl((u32) TAGINO_UID(tag,
26377+ cred->uc_uid, cred->uc_tag));
26378+ *p++ = htonl((u32) TAGINO_GID(tag,
26379+ cred->uc_gid, cred->uc_tag));
26380 hold = p++;
26381 for (i = 0; i < 16 && cred->uc_gids[i] != (gid_t) NOGROUP; i++)
26382 *p++ = htonl((u32) cred->uc_gids[i]);
26383diff -NurpP --minimal linux-3.0.9/net/sunrpc/clnt.c linux-3.0.9-vs2.3.2.1/net/sunrpc/clnt.c
26384--- linux-3.0.9/net/sunrpc/clnt.c 2011-07-22 11:18:13.000000000 +0200
26385+++ linux-3.0.9-vs2.3.2.1/net/sunrpc/clnt.c 2011-07-01 11:35:35.000000000 +0200
26386@@ -31,6 +31,7 @@
26387 #include <linux/in.h>
26388 #include <linux/in6.h>
26389 #include <linux/un.h>
26390+#include <linux/vs_cvirt.h>
26391
26392 #include <linux/sunrpc/clnt.h>
26393 #include <linux/sunrpc/rpc_pipe_fs.h>
26394@@ -362,6 +363,9 @@ struct rpc_clnt *rpc_create(struct rpc_c
26395 if (!(args->flags & RPC_CLNT_CREATE_QUIET))
26396 clnt->cl_chatty = 1;
26397
26398+ /* TODO: handle RPC_CLNT_CREATE_TAGGED
26399+ if (args->flags & RPC_CLNT_CREATE_TAGGED)
26400+ clnt->cl_tag = 1; */
26401 return clnt;
26402 }
26403 EXPORT_SYMBOL_GPL(rpc_create);
26404diff -NurpP --minimal linux-3.0.9/net/unix/af_unix.c linux-3.0.9-vs2.3.2.1/net/unix/af_unix.c
26405--- linux-3.0.9/net/unix/af_unix.c 2011-07-22 11:18:13.000000000 +0200
26406+++ linux-3.0.9-vs2.3.2.1/net/unix/af_unix.c 2011-06-10 22:11:24.000000000 +0200
26407@@ -114,6 +114,8 @@
26408 #include <linux/mount.h>
26409 #include <net/checksum.h>
26410 #include <linux/security.h>
26411+#include <linux/vs_context.h>
26412+#include <linux/vs_limit.h>
26413
26414 static struct hlist_head unix_socket_table[UNIX_HASH_SIZE + 1];
26415 static DEFINE_SPINLOCK(unix_table_lock);
26416@@ -258,6 +260,8 @@ static struct sock *__unix_find_socket_b
26417 if (!net_eq(sock_net(s), net))
26418 continue;
26419
26420+ if (!nx_check(s->sk_nid, VS_WATCH_P | VS_IDENT))
26421+ continue;
26422 if (u->addr->len == len &&
26423 !memcmp(u->addr->name, sunname, len))
26424 goto found;
26425@@ -2208,6 +2212,8 @@ static struct sock *unix_seq_idx(struct
26426 for (s = first_unix_socket(&iter->i); s; s = next_unix_socket(&iter->i, s)) {
26427 if (sock_net(s) != seq_file_net(seq))
26428 continue;
26429+ if (!nx_check(s->sk_nid, VS_WATCH_P | VS_IDENT))
26430+ continue;
26431 if (off == pos)
26432 return s;
26433 ++off;
26434@@ -2232,7 +2238,8 @@ static void *unix_seq_next(struct seq_fi
26435 sk = first_unix_socket(&iter->i);
26436 else
26437 sk = next_unix_socket(&iter->i, sk);
26438- while (sk && (sock_net(sk) != seq_file_net(seq)))
26439+ while (sk && (sock_net(sk) != seq_file_net(seq) ||
26440+ !nx_check(sk->sk_nid, VS_WATCH_P | VS_IDENT)))
26441 sk = next_unix_socket(&iter->i, sk);
26442 return sk;
26443 }
26444diff -NurpP --minimal linux-3.0.9/scripts/checksyscalls.sh linux-3.0.9-vs2.3.2.1/scripts/checksyscalls.sh
26445--- linux-3.0.9/scripts/checksyscalls.sh 2011-03-15 18:07:46.000000000 +0100
26446+++ linux-3.0.9-vs2.3.2.1/scripts/checksyscalls.sh 2011-06-10 22:11:24.000000000 +0200
26447@@ -193,7 +193,6 @@ cat << EOF
26448 #define __IGNORE_afs_syscall
26449 #define __IGNORE_getpmsg
26450 #define __IGNORE_putpmsg
26451-#define __IGNORE_vserver
26452 EOF
26453 }
26454
26455diff -NurpP --minimal linux-3.0.9/security/commoncap.c linux-3.0.9-vs2.3.2.1/security/commoncap.c
26456--- linux-3.0.9/security/commoncap.c 2011-07-22 11:18:14.000000000 +0200
26457+++ linux-3.0.9-vs2.3.2.1/security/commoncap.c 2011-08-30 15:42:43.000000000 +0200
26458@@ -62,6 +62,7 @@ int cap_netlink_recv(struct sk_buff *skb
26459 return -EPERM;
26460 return 0;
26461 }
26462+
26463 EXPORT_SYMBOL(cap_netlink_recv);
26464
26465 /**
26466@@ -83,14 +84,20 @@ EXPORT_SYMBOL(cap_netlink_recv);
26467 int cap_capable(struct task_struct *tsk, const struct cred *cred,
26468 struct user_namespace *targ_ns, int cap, int audit)
26469 {
26470+ struct vx_info *vxi = tsk->vx_info;
26471+
26472 for (;;) {
26473 /* The creator of the user namespace has all caps. */
26474 if (targ_ns != &init_user_ns && targ_ns->creator == cred->user)
26475 return 0;
26476
26477 /* Do we have the necessary capabilities? */
26478- if (targ_ns == cred->user->user_ns)
26479- return cap_raised(cred->cap_effective, cap) ? 0 : -EPERM;
26480+ if (targ_ns == cred->user->user_ns) {
26481+ if (vx_info_flags(vxi, VXF_STATE_SETUP, 0) &&
26482+ cap_raised(cred->cap_effective, cap))
26483+ return 0;
26484+ return vx_cap_raised(vxi, cred->cap_effective, cap) ? 0 : -EPERM;
26485+ }
26486
26487 /* Have we tried all of the parent namespaces? */
26488 if (targ_ns == &init_user_ns)
26489@@ -611,7 +618,7 @@ int cap_inode_setxattr(struct dentry *de
26490
26491 if (!strncmp(name, XATTR_SECURITY_PREFIX,
26492 sizeof(XATTR_SECURITY_PREFIX) - 1) &&
26493- !capable(CAP_SYS_ADMIN))
26494+ !vx_capable(CAP_SYS_ADMIN, VXC_FS_SECURITY))
26495 return -EPERM;
26496 return 0;
26497 }
26498@@ -637,7 +644,7 @@ int cap_inode_removexattr(struct dentry
26499
26500 if (!strncmp(name, XATTR_SECURITY_PREFIX,
26501 sizeof(XATTR_SECURITY_PREFIX) - 1) &&
26502- !capable(CAP_SYS_ADMIN))
26503+ !vx_capable(CAP_SYS_ADMIN, VXC_FS_SECURITY))
26504 return -EPERM;
26505 return 0;
26506 }
26507diff -NurpP --minimal linux-3.0.9/security/selinux/hooks.c linux-3.0.9-vs2.3.2.1/security/selinux/hooks.c
26508--- linux-3.0.9/security/selinux/hooks.c 2011-07-22 11:18:14.000000000 +0200
26509+++ linux-3.0.9-vs2.3.2.1/security/selinux/hooks.c 2011-06-15 02:40:14.000000000 +0200
26510@@ -67,7 +67,6 @@
26511 #include <linux/dccp.h>
26512 #include <linux/quota.h>
26513 #include <linux/un.h> /* for Unix socket types */
26514-#include <net/af_unix.h> /* for Unix socket types */
26515 #include <linux/parser.h>
26516 #include <linux/nfs_mount.h>
26517 #include <net/ipv6.h>