ia64/linux-2.6.18-xen.hg

view kernel/sysctl.c @ 907:cad6f60f0506

Transcendent memory ("tmem") for Linux

Tmem, when called from a tmem-capable (paravirtualized) guest, makes
use of otherwise unutilized ("fallow") memory to create and manage
pools of pages that can be accessed from the guest either as
"ephemeral" pages or as "persistent" pages. In either case, the pages
are not directly addressible by the guest, only copied to and fro via
the tmem interface. Ephemeral pages are a nice place for a guest to
put recently evicted clean pages that it might need again; these pages
can be reclaimed synchronously by Xen for other guests or other uses.
Persistent pages are a nice place for a guest to put "swap" pages to
avoid sending them to disk. These pages retain data as long as the
guest lives, but count against the guest memory allocation.

This patch contains the Linux paravirtualization changes to
complement the tmem Xen patch (xen-unstable c/s 19646). It
implements "precache" (ext3 only as of now), "preswap",
and limited "shared precache" (ocfs2 only as of now) support.
CONFIG options are required to turn on
the support (but in this patch they default to "y"). If
the underlying Xen does not have tmem support or has it
turned off, this is sensed early to avoid nearly all
hypercalls.

Lots of useful prose about tmem can be found at
http://oss.oracle.com/projects/tmem

Signed-off-by: Dan Magenheimer <dan.magenheimer@oracle.com>
author Keir Fraser <keir.fraser@citrix.com>
date Thu Jun 18 10:24:18 2009 +0100 (2009-06-18)
parents a9f723af5e18
children
line source
1 /*
2 * sysctl.c: General linux system control interface
3 *
4 * Begun 24 March 1995, Stephen Tweedie
5 * Added /proc support, Dec 1995
6 * Added bdflush entry and intvec min/max checking, 2/23/96, Tom Dyas.
7 * Added hooks for /proc/sys/net (minor, minor patch), 96/4/1, Mike Shaver.
8 * Added kernel/java-{interpreter,appletviewer}, 96/5/10, Mike Shaver.
9 * Dynamic registration fixes, Stephen Tweedie.
10 * Added kswapd-interval, ctrl-alt-del, printk stuff, 1/8/97, Chris Horn.
11 * Made sysctl support optional via CONFIG_SYSCTL, 1/10/97, Chris
12 * Horn.
13 * Added proc_doulongvec_ms_jiffies_minmax, 09/08/99, Carlos H. Bauer.
14 * Added proc_doulongvec_minmax, 09/08/99, Carlos H. Bauer.
15 * Changed linked lists to use list.h instead of lists.h, 02/24/00, Bill
16 * Wendling.
17 * The list_for_each() macro wasn't appropriate for the sysctl loop.
18 * Removed it and replaced it with older style, 03/23/00, Bill Wendling
19 */
21 #include <linux/module.h>
22 #include <linux/mm.h>
23 #include <linux/swap.h>
24 #include <linux/slab.h>
25 #include <linux/sysctl.h>
26 #include <linux/proc_fs.h>
27 #include <linux/capability.h>
28 #include <linux/ctype.h>
29 #include <linux/utsname.h>
30 #include <linux/capability.h>
31 #include <linux/smp_lock.h>
32 #include <linux/init.h>
33 #include <linux/kernel.h>
34 #include <linux/kobject.h>
35 #include <linux/net.h>
36 #include <linux/sysrq.h>
37 #include <linux/highuid.h>
38 #include <linux/writeback.h>
39 #include <linux/hugetlb.h>
40 #include <linux/security.h>
41 #include <linux/initrd.h>
42 #include <linux/times.h>
43 #include <linux/limits.h>
44 #include <linux/dcache.h>
45 #include <linux/syscalls.h>
46 #include <linux/nfs_fs.h>
47 #include <linux/acpi.h>
49 #include <asm/uaccess.h>
50 #include <asm/processor.h>
52 extern int proc_nr_files(ctl_table *table, int write, struct file *filp,
53 void __user *buffer, size_t *lenp, loff_t *ppos);
55 #if defined(CONFIG_SYSCTL)
57 /* External variables not in a header file. */
58 extern int C_A_D;
59 extern int sysctl_overcommit_memory;
60 extern int sysctl_overcommit_ratio;
61 extern int sysctl_panic_on_oom;
62 extern int max_threads;
63 extern int sysrq_enabled;
64 extern int core_uses_pid;
65 extern int suid_dumpable;
66 extern char core_pattern[];
67 extern int cad_pid;
68 extern int pid_max;
69 extern int min_free_kbytes;
70 extern int printk_ratelimit_jiffies;
71 extern int printk_ratelimit_burst;
72 extern int pid_max_min, pid_max_max;
73 extern int sysctl_drop_caches;
74 extern int percpu_pagelist_fraction;
75 extern int compat_log;
77 #if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_X86)
78 int unknown_nmi_panic;
79 extern int proc_unknown_nmi_panic(ctl_table *, int, struct file *,
80 void __user *, size_t *, loff_t *);
81 #endif
83 /* this is needed for the proc_dointvec_minmax for [fs_]overflow UID and GID */
84 static int maxolduid = 65535;
85 static int minolduid;
86 static int min_percpu_pagelist_fract = 8;
88 static int ngroups_max = NGROUPS_MAX;
90 #ifdef CONFIG_KMOD
91 extern char modprobe_path[];
92 #endif
93 #ifdef CONFIG_CHR_DEV_SG
94 extern int sg_big_buff;
95 #endif
96 #ifdef CONFIG_SYSVIPC
97 extern size_t shm_ctlmax;
98 extern size_t shm_ctlall;
99 extern int shm_ctlmni;
100 extern int msg_ctlmax;
101 extern int msg_ctlmnb;
102 extern int msg_ctlmni;
103 extern int sem_ctls[];
104 #endif
106 #ifdef __sparc__
107 extern char reboot_command [];
108 extern int stop_a_enabled;
109 extern int scons_pwroff;
110 #endif
112 #ifdef __hppa__
113 extern int pwrsw_enabled;
114 extern int unaligned_enabled;
115 #endif
117 #ifdef CONFIG_S390
118 #ifdef CONFIG_MATHEMU
119 extern int sysctl_ieee_emulation_warnings;
120 #endif
121 extern int sysctl_userprocess_debug;
122 extern int spin_retry;
123 #endif
125 extern int sysctl_hz_timer;
127 #ifdef CONFIG_BSD_PROCESS_ACCT
128 extern int acct_parm[];
129 #endif
131 #ifdef CONFIG_IA64
132 extern int no_unaligned_warning;
133 #endif
135 #ifdef CONFIG_RT_MUTEXES
136 extern int max_lock_depth;
137 #endif
139 static int parse_table(int __user *, int, void __user *, size_t __user *, void __user *, size_t,
140 ctl_table *, void **);
141 static int proc_doutsstring(ctl_table *table, int write, struct file *filp,
142 void __user *buffer, size_t *lenp, loff_t *ppos);
144 static ctl_table root_table[];
145 static struct ctl_table_header root_table_header =
146 { root_table, LIST_HEAD_INIT(root_table_header.ctl_entry) };
148 static ctl_table kern_table[];
149 static ctl_table vm_table[];
150 static ctl_table fs_table[];
151 static ctl_table debug_table[];
152 static ctl_table dev_table[];
153 extern ctl_table random_table[];
154 #ifdef CONFIG_UNIX98_PTYS
155 extern ctl_table pty_table[];
156 #endif
157 #ifdef CONFIG_INOTIFY_USER
158 extern ctl_table inotify_table[];
159 #endif
161 #ifdef HAVE_ARCH_PICK_MMAP_LAYOUT
162 int sysctl_legacy_va_layout;
163 #endif
165 /* /proc declarations: */
167 #ifdef CONFIG_PROC_FS
169 static ssize_t proc_readsys(struct file *, char __user *, size_t, loff_t *);
170 static ssize_t proc_writesys(struct file *, const char __user *, size_t, loff_t *);
171 static int proc_opensys(struct inode *, struct file *);
173 struct file_operations proc_sys_file_operations = {
174 .open = proc_opensys,
175 .read = proc_readsys,
176 .write = proc_writesys,
177 };
179 extern struct proc_dir_entry *proc_sys_root;
181 static void register_proc_table(ctl_table *, struct proc_dir_entry *, void *);
182 static void unregister_proc_table(ctl_table *, struct proc_dir_entry *);
183 #endif
185 /* The default sysctl tables: */
187 static ctl_table root_table[] = {
188 {
189 .ctl_name = CTL_KERN,
190 .procname = "kernel",
191 .mode = 0555,
192 .child = kern_table,
193 },
194 {
195 .ctl_name = CTL_VM,
196 .procname = "vm",
197 .mode = 0555,
198 .child = vm_table,
199 },
200 #ifdef CONFIG_NET
201 {
202 .ctl_name = CTL_NET,
203 .procname = "net",
204 .mode = 0555,
205 .child = net_table,
206 },
207 #endif
208 {
209 .ctl_name = CTL_FS,
210 .procname = "fs",
211 .mode = 0555,
212 .child = fs_table,
213 },
214 {
215 .ctl_name = CTL_DEBUG,
216 .procname = "debug",
217 .mode = 0555,
218 .child = debug_table,
219 },
220 {
221 .ctl_name = CTL_DEV,
222 .procname = "dev",
223 .mode = 0555,
224 .child = dev_table,
225 },
227 { .ctl_name = 0 }
228 };
230 static ctl_table kern_table[] = {
231 {
232 .ctl_name = KERN_OSTYPE,
233 .procname = "ostype",
234 .data = system_utsname.sysname,
235 .maxlen = sizeof(system_utsname.sysname),
236 .mode = 0444,
237 .proc_handler = &proc_doutsstring,
238 .strategy = &sysctl_string,
239 },
240 {
241 .ctl_name = KERN_OSRELEASE,
242 .procname = "osrelease",
243 .data = system_utsname.release,
244 .maxlen = sizeof(system_utsname.release),
245 .mode = 0444,
246 .proc_handler = &proc_doutsstring,
247 .strategy = &sysctl_string,
248 },
249 {
250 .ctl_name = KERN_VERSION,
251 .procname = "version",
252 .data = system_utsname.version,
253 .maxlen = sizeof(system_utsname.version),
254 .mode = 0444,
255 .proc_handler = &proc_doutsstring,
256 .strategy = &sysctl_string,
257 },
258 {
259 .ctl_name = KERN_NODENAME,
260 .procname = "hostname",
261 .data = system_utsname.nodename,
262 .maxlen = sizeof(system_utsname.nodename),
263 .mode = 0644,
264 .proc_handler = &proc_doutsstring,
265 .strategy = &sysctl_string,
266 },
267 {
268 .ctl_name = KERN_DOMAINNAME,
269 .procname = "domainname",
270 .data = system_utsname.domainname,
271 .maxlen = sizeof(system_utsname.domainname),
272 .mode = 0644,
273 .proc_handler = &proc_doutsstring,
274 .strategy = &sysctl_string,
275 },
276 {
277 .ctl_name = KERN_PANIC,
278 .procname = "panic",
279 .data = &panic_timeout,
280 .maxlen = sizeof(int),
281 .mode = 0644,
282 .proc_handler = &proc_dointvec,
283 },
284 {
285 .ctl_name = KERN_CORE_USES_PID,
286 .procname = "core_uses_pid",
287 .data = &core_uses_pid,
288 .maxlen = sizeof(int),
289 .mode = 0644,
290 .proc_handler = &proc_dointvec,
291 },
292 {
293 .ctl_name = KERN_CORE_PATTERN,
294 .procname = "core_pattern",
295 .data = core_pattern,
296 .maxlen = 64,
297 .mode = 0644,
298 .proc_handler = &proc_dostring,
299 .strategy = &sysctl_string,
300 },
301 {
302 .ctl_name = KERN_TAINTED,
303 .procname = "tainted",
304 .data = &tainted,
305 .maxlen = sizeof(int),
306 .mode = 0444,
307 .proc_handler = &proc_dointvec,
308 },
309 {
310 .ctl_name = KERN_CAP_BSET,
311 .procname = "cap-bound",
312 .data = &cap_bset,
313 .maxlen = sizeof(kernel_cap_t),
314 .mode = 0600,
315 .proc_handler = &proc_dointvec_bset,
316 },
317 #ifdef CONFIG_BLK_DEV_INITRD
318 {
319 .ctl_name = KERN_REALROOTDEV,
320 .procname = "real-root-dev",
321 .data = &real_root_dev,
322 .maxlen = sizeof(int),
323 .mode = 0644,
324 .proc_handler = &proc_dointvec,
325 },
326 #endif
327 #ifdef __sparc__
328 {
329 .ctl_name = KERN_SPARC_REBOOT,
330 .procname = "reboot-cmd",
331 .data = reboot_command,
332 .maxlen = 256,
333 .mode = 0644,
334 .proc_handler = &proc_dostring,
335 .strategy = &sysctl_string,
336 },
337 {
338 .ctl_name = KERN_SPARC_STOP_A,
339 .procname = "stop-a",
340 .data = &stop_a_enabled,
341 .maxlen = sizeof (int),
342 .mode = 0644,
343 .proc_handler = &proc_dointvec,
344 },
345 {
346 .ctl_name = KERN_SPARC_SCONS_PWROFF,
347 .procname = "scons-poweroff",
348 .data = &scons_pwroff,
349 .maxlen = sizeof (int),
350 .mode = 0644,
351 .proc_handler = &proc_dointvec,
352 },
353 #endif
354 #ifdef __hppa__
355 {
356 .ctl_name = KERN_HPPA_PWRSW,
357 .procname = "soft-power",
358 .data = &pwrsw_enabled,
359 .maxlen = sizeof (int),
360 .mode = 0644,
361 .proc_handler = &proc_dointvec,
362 },
363 {
364 .ctl_name = KERN_HPPA_UNALIGNED,
365 .procname = "unaligned-trap",
366 .data = &unaligned_enabled,
367 .maxlen = sizeof (int),
368 .mode = 0644,
369 .proc_handler = &proc_dointvec,
370 },
371 #endif
372 {
373 .ctl_name = KERN_CTLALTDEL,
374 .procname = "ctrl-alt-del",
375 .data = &C_A_D,
376 .maxlen = sizeof(int),
377 .mode = 0644,
378 .proc_handler = &proc_dointvec,
379 },
380 {
381 .ctl_name = KERN_PRINTK,
382 .procname = "printk",
383 .data = &console_loglevel,
384 .maxlen = 4*sizeof(int),
385 .mode = 0644,
386 .proc_handler = &proc_dointvec,
387 },
388 #ifdef CONFIG_KMOD
389 {
390 .ctl_name = KERN_MODPROBE,
391 .procname = "modprobe",
392 .data = &modprobe_path,
393 .maxlen = KMOD_PATH_LEN,
394 .mode = 0644,
395 .proc_handler = &proc_dostring,
396 .strategy = &sysctl_string,
397 },
398 #endif
399 #if defined(CONFIG_HOTPLUG) && defined(CONFIG_NET)
400 {
401 .ctl_name = KERN_HOTPLUG,
402 .procname = "hotplug",
403 .data = &uevent_helper,
404 .maxlen = UEVENT_HELPER_PATH_LEN,
405 .mode = 0644,
406 .proc_handler = &proc_dostring,
407 .strategy = &sysctl_string,
408 },
409 #endif
410 #ifdef CONFIG_CHR_DEV_SG
411 {
412 .ctl_name = KERN_SG_BIG_BUFF,
413 .procname = "sg-big-buff",
414 .data = &sg_big_buff,
415 .maxlen = sizeof (int),
416 .mode = 0444,
417 .proc_handler = &proc_dointvec,
418 },
419 #endif
420 #ifdef CONFIG_BSD_PROCESS_ACCT
421 {
422 .ctl_name = KERN_ACCT,
423 .procname = "acct",
424 .data = &acct_parm,
425 .maxlen = 3*sizeof(int),
426 .mode = 0644,
427 .proc_handler = &proc_dointvec,
428 },
429 #endif
430 #ifdef CONFIG_SYSVIPC
431 {
432 .ctl_name = KERN_SHMMAX,
433 .procname = "shmmax",
434 .data = &shm_ctlmax,
435 .maxlen = sizeof (size_t),
436 .mode = 0644,
437 .proc_handler = &proc_doulongvec_minmax,
438 },
439 {
440 .ctl_name = KERN_SHMALL,
441 .procname = "shmall",
442 .data = &shm_ctlall,
443 .maxlen = sizeof (size_t),
444 .mode = 0644,
445 .proc_handler = &proc_doulongvec_minmax,
446 },
447 {
448 .ctl_name = KERN_SHMMNI,
449 .procname = "shmmni",
450 .data = &shm_ctlmni,
451 .maxlen = sizeof (int),
452 .mode = 0644,
453 .proc_handler = &proc_dointvec,
454 },
455 {
456 .ctl_name = KERN_MSGMAX,
457 .procname = "msgmax",
458 .data = &msg_ctlmax,
459 .maxlen = sizeof (int),
460 .mode = 0644,
461 .proc_handler = &proc_dointvec,
462 },
463 {
464 .ctl_name = KERN_MSGMNI,
465 .procname = "msgmni",
466 .data = &msg_ctlmni,
467 .maxlen = sizeof (int),
468 .mode = 0644,
469 .proc_handler = &proc_dointvec,
470 },
471 {
472 .ctl_name = KERN_MSGMNB,
473 .procname = "msgmnb",
474 .data = &msg_ctlmnb,
475 .maxlen = sizeof (int),
476 .mode = 0644,
477 .proc_handler = &proc_dointvec,
478 },
479 {
480 .ctl_name = KERN_SEM,
481 .procname = "sem",
482 .data = &sem_ctls,
483 .maxlen = 4*sizeof (int),
484 .mode = 0644,
485 .proc_handler = &proc_dointvec,
486 },
487 #endif
488 #ifdef CONFIG_MAGIC_SYSRQ
489 {
490 .ctl_name = KERN_SYSRQ,
491 .procname = "sysrq",
492 .data = &sysrq_enabled,
493 .maxlen = sizeof (int),
494 .mode = 0644,
495 .proc_handler = &proc_dointvec,
496 },
497 #endif
498 {
499 .ctl_name = KERN_CADPID,
500 .procname = "cad_pid",
501 .data = &cad_pid,
502 .maxlen = sizeof (int),
503 .mode = 0600,
504 .proc_handler = &proc_dointvec,
505 },
506 {
507 .ctl_name = KERN_MAX_THREADS,
508 .procname = "threads-max",
509 .data = &max_threads,
510 .maxlen = sizeof(int),
511 .mode = 0644,
512 .proc_handler = &proc_dointvec,
513 },
514 {
515 .ctl_name = KERN_RANDOM,
516 .procname = "random",
517 .mode = 0555,
518 .child = random_table,
519 },
520 #ifdef CONFIG_UNIX98_PTYS
521 {
522 .ctl_name = KERN_PTY,
523 .procname = "pty",
524 .mode = 0555,
525 .child = pty_table,
526 },
527 #endif
528 {
529 .ctl_name = KERN_OVERFLOWUID,
530 .procname = "overflowuid",
531 .data = &overflowuid,
532 .maxlen = sizeof(int),
533 .mode = 0644,
534 .proc_handler = &proc_dointvec_minmax,
535 .strategy = &sysctl_intvec,
536 .extra1 = &minolduid,
537 .extra2 = &maxolduid,
538 },
539 {
540 .ctl_name = KERN_OVERFLOWGID,
541 .procname = "overflowgid",
542 .data = &overflowgid,
543 .maxlen = sizeof(int),
544 .mode = 0644,
545 .proc_handler = &proc_dointvec_minmax,
546 .strategy = &sysctl_intvec,
547 .extra1 = &minolduid,
548 .extra2 = &maxolduid,
549 },
550 #ifdef CONFIG_S390
551 #ifdef CONFIG_MATHEMU
552 {
553 .ctl_name = KERN_IEEE_EMULATION_WARNINGS,
554 .procname = "ieee_emulation_warnings",
555 .data = &sysctl_ieee_emulation_warnings,
556 .maxlen = sizeof(int),
557 .mode = 0644,
558 .proc_handler = &proc_dointvec,
559 },
560 #endif
561 #ifdef CONFIG_NO_IDLE_HZ
562 {
563 .ctl_name = KERN_HZ_TIMER,
564 .procname = "hz_timer",
565 .data = &sysctl_hz_timer,
566 .maxlen = sizeof(int),
567 .mode = 0644,
568 .proc_handler = &proc_dointvec,
569 },
570 #endif
571 {
572 .ctl_name = KERN_S390_USER_DEBUG_LOGGING,
573 .procname = "userprocess_debug",
574 .data = &sysctl_userprocess_debug,
575 .maxlen = sizeof(int),
576 .mode = 0644,
577 .proc_handler = &proc_dointvec,
578 },
579 #endif
580 {
581 .ctl_name = KERN_PIDMAX,
582 .procname = "pid_max",
583 .data = &pid_max,
584 .maxlen = sizeof (int),
585 .mode = 0644,
586 .proc_handler = &proc_dointvec_minmax,
587 .strategy = sysctl_intvec,
588 .extra1 = &pid_max_min,
589 .extra2 = &pid_max_max,
590 },
591 {
592 .ctl_name = KERN_PANIC_ON_OOPS,
593 .procname = "panic_on_oops",
594 .data = &panic_on_oops,
595 .maxlen = sizeof(int),
596 .mode = 0644,
597 .proc_handler = &proc_dointvec,
598 },
599 {
600 .ctl_name = KERN_PRINTK_RATELIMIT,
601 .procname = "printk_ratelimit",
602 .data = &printk_ratelimit_jiffies,
603 .maxlen = sizeof(int),
604 .mode = 0644,
605 .proc_handler = &proc_dointvec_jiffies,
606 .strategy = &sysctl_jiffies,
607 },
608 {
609 .ctl_name = KERN_PRINTK_RATELIMIT_BURST,
610 .procname = "printk_ratelimit_burst",
611 .data = &printk_ratelimit_burst,
612 .maxlen = sizeof(int),
613 .mode = 0644,
614 .proc_handler = &proc_dointvec,
615 },
616 {
617 .ctl_name = KERN_NGROUPS_MAX,
618 .procname = "ngroups_max",
619 .data = &ngroups_max,
620 .maxlen = sizeof (int),
621 .mode = 0444,
622 .proc_handler = &proc_dointvec,
623 },
624 #if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_X86)
625 {
626 .ctl_name = KERN_UNKNOWN_NMI_PANIC,
627 .procname = "unknown_nmi_panic",
628 .data = &unknown_nmi_panic,
629 .maxlen = sizeof (int),
630 .mode = 0644,
631 .proc_handler = &proc_unknown_nmi_panic,
632 },
633 #endif
634 #if defined(CONFIG_X86)
635 {
636 .ctl_name = KERN_BOOTLOADER_TYPE,
637 .procname = "bootloader_type",
638 .data = &bootloader_type,
639 .maxlen = sizeof (int),
640 .mode = 0444,
641 .proc_handler = &proc_dointvec,
642 },
643 #endif
644 #if defined(CONFIG_MMU)
645 {
646 .ctl_name = KERN_RANDOMIZE,
647 .procname = "randomize_va_space",
648 .data = &randomize_va_space,
649 .maxlen = sizeof(int),
650 .mode = 0644,
651 .proc_handler = &proc_dointvec,
652 },
653 #endif
654 #if defined(CONFIG_S390) && defined(CONFIG_SMP)
655 {
656 .ctl_name = KERN_SPIN_RETRY,
657 .procname = "spin_retry",
658 .data = &spin_retry,
659 .maxlen = sizeof (int),
660 .mode = 0644,
661 .proc_handler = &proc_dointvec,
662 },
663 #endif
664 #if defined(CONFIG_ACPI_SLEEP) && !defined(CONFIG_ACPI_PV_SLEEP)
665 {
666 .ctl_name = KERN_ACPI_VIDEO_FLAGS,
667 .procname = "acpi_video_flags",
668 .data = &acpi_video_flags,
669 .maxlen = sizeof (unsigned long),
670 .mode = 0644,
671 .proc_handler = &proc_doulongvec_minmax,
672 },
673 #endif
674 #ifdef CONFIG_IA64
675 {
676 .ctl_name = KERN_IA64_UNALIGNED,
677 .procname = "ignore-unaligned-usertrap",
678 .data = &no_unaligned_warning,
679 .maxlen = sizeof (int),
680 .mode = 0644,
681 .proc_handler = &proc_dointvec,
682 },
683 #endif
684 #ifdef CONFIG_COMPAT
685 {
686 .ctl_name = KERN_COMPAT_LOG,
687 .procname = "compat-log",
688 .data = &compat_log,
689 .maxlen = sizeof (int),
690 .mode = 0644,
691 .proc_handler = &proc_dointvec,
692 },
693 #endif
694 #ifdef CONFIG_RT_MUTEXES
695 {
696 .ctl_name = KERN_MAX_LOCK_DEPTH,
697 .procname = "max_lock_depth",
698 .data = &max_lock_depth,
699 .maxlen = sizeof(int),
700 .mode = 0644,
701 .proc_handler = &proc_dointvec,
702 },
703 #endif
705 { .ctl_name = 0 }
706 };
708 /* Constants for minimum and maximum testing in vm_table.
709 We use these as one-element integer vectors. */
710 static int zero;
711 static int one_hundred = 100;
714 static ctl_table vm_table[] = {
715 {
716 .ctl_name = VM_OVERCOMMIT_MEMORY,
717 .procname = "overcommit_memory",
718 .data = &sysctl_overcommit_memory,
719 .maxlen = sizeof(sysctl_overcommit_memory),
720 .mode = 0644,
721 .proc_handler = &proc_dointvec,
722 },
723 {
724 .ctl_name = VM_PANIC_ON_OOM,
725 .procname = "panic_on_oom",
726 .data = &sysctl_panic_on_oom,
727 .maxlen = sizeof(sysctl_panic_on_oom),
728 .mode = 0644,
729 .proc_handler = &proc_dointvec,
730 },
731 {
732 .ctl_name = VM_OVERCOMMIT_RATIO,
733 .procname = "overcommit_ratio",
734 .data = &sysctl_overcommit_ratio,
735 .maxlen = sizeof(sysctl_overcommit_ratio),
736 .mode = 0644,
737 .proc_handler = &proc_dointvec,
738 },
739 {
740 .ctl_name = VM_PAGE_CLUSTER,
741 .procname = "page-cluster",
742 .data = &page_cluster,
743 .maxlen = sizeof(int),
744 .mode = 0644,
745 .proc_handler = &proc_dointvec,
746 },
747 {
748 .ctl_name = VM_DIRTY_BACKGROUND,
749 .procname = "dirty_background_ratio",
750 .data = &dirty_background_ratio,
751 .maxlen = sizeof(dirty_background_ratio),
752 .mode = 0644,
753 .proc_handler = &proc_dointvec_minmax,
754 .strategy = &sysctl_intvec,
755 .extra1 = &zero,
756 .extra2 = &one_hundred,
757 },
758 {
759 .ctl_name = VM_DIRTY_RATIO,
760 .procname = "dirty_ratio",
761 .data = &vm_dirty_ratio,
762 .maxlen = sizeof(vm_dirty_ratio),
763 .mode = 0644,
764 .proc_handler = &proc_dointvec_minmax,
765 .strategy = &sysctl_intvec,
766 .extra1 = &zero,
767 .extra2 = &one_hundred,
768 },
769 {
770 .ctl_name = VM_DIRTY_WB_CS,
771 .procname = "dirty_writeback_centisecs",
772 .data = &dirty_writeback_interval,
773 .maxlen = sizeof(dirty_writeback_interval),
774 .mode = 0644,
775 .proc_handler = &dirty_writeback_centisecs_handler,
776 },
777 {
778 .ctl_name = VM_DIRTY_EXPIRE_CS,
779 .procname = "dirty_expire_centisecs",
780 .data = &dirty_expire_interval,
781 .maxlen = sizeof(dirty_expire_interval),
782 .mode = 0644,
783 .proc_handler = &proc_dointvec_userhz_jiffies,
784 },
785 {
786 .ctl_name = VM_NR_PDFLUSH_THREADS,
787 .procname = "nr_pdflush_threads",
788 .data = &nr_pdflush_threads,
789 .maxlen = sizeof nr_pdflush_threads,
790 .mode = 0444 /* read-only*/,
791 .proc_handler = &proc_dointvec,
792 },
793 {
794 .ctl_name = VM_SWAPPINESS,
795 .procname = "swappiness",
796 .data = &vm_swappiness,
797 .maxlen = sizeof(vm_swappiness),
798 .mode = 0644,
799 .proc_handler = &proc_dointvec_minmax,
800 .strategy = &sysctl_intvec,
801 .extra1 = &zero,
802 .extra2 = &one_hundred,
803 },
804 #ifdef CONFIG_HUGETLB_PAGE
805 {
806 .ctl_name = VM_HUGETLB_PAGES,
807 .procname = "nr_hugepages",
808 .data = &max_huge_pages,
809 .maxlen = sizeof(unsigned long),
810 .mode = 0644,
811 .proc_handler = &hugetlb_sysctl_handler,
812 .extra1 = (void *)&hugetlb_zero,
813 .extra2 = (void *)&hugetlb_infinity,
814 },
815 {
816 .ctl_name = VM_HUGETLB_GROUP,
817 .procname = "hugetlb_shm_group",
818 .data = &sysctl_hugetlb_shm_group,
819 .maxlen = sizeof(gid_t),
820 .mode = 0644,
821 .proc_handler = &proc_dointvec,
822 },
823 #endif
824 {
825 .ctl_name = VM_LOWMEM_RESERVE_RATIO,
826 .procname = "lowmem_reserve_ratio",
827 .data = &sysctl_lowmem_reserve_ratio,
828 .maxlen = sizeof(sysctl_lowmem_reserve_ratio),
829 .mode = 0644,
830 .proc_handler = &lowmem_reserve_ratio_sysctl_handler,
831 .strategy = &sysctl_intvec,
832 },
833 {
834 .ctl_name = VM_DROP_PAGECACHE,
835 .procname = "drop_caches",
836 .data = &sysctl_drop_caches,
837 .maxlen = sizeof(int),
838 .mode = 0644,
839 .proc_handler = drop_caches_sysctl_handler,
840 .strategy = &sysctl_intvec,
841 },
842 {
843 .ctl_name = VM_MIN_FREE_KBYTES,
844 .procname = "min_free_kbytes",
845 .data = &min_free_kbytes,
846 .maxlen = sizeof(min_free_kbytes),
847 .mode = 0644,
848 .proc_handler = &min_free_kbytes_sysctl_handler,
849 .strategy = &sysctl_intvec,
850 .extra1 = &zero,
851 },
852 {
853 .ctl_name = VM_PERCPU_PAGELIST_FRACTION,
854 .procname = "percpu_pagelist_fraction",
855 .data = &percpu_pagelist_fraction,
856 .maxlen = sizeof(percpu_pagelist_fraction),
857 .mode = 0644,
858 .proc_handler = &percpu_pagelist_fraction_sysctl_handler,
859 .strategy = &sysctl_intvec,
860 .extra1 = &min_percpu_pagelist_fract,
861 },
862 #ifdef CONFIG_MMU
863 {
864 .ctl_name = VM_MAX_MAP_COUNT,
865 .procname = "max_map_count",
866 .data = &sysctl_max_map_count,
867 .maxlen = sizeof(sysctl_max_map_count),
868 .mode = 0644,
869 .proc_handler = &proc_dointvec
870 },
871 #endif
872 {
873 .ctl_name = VM_LAPTOP_MODE,
874 .procname = "laptop_mode",
875 .data = &laptop_mode,
876 .maxlen = sizeof(laptop_mode),
877 .mode = 0644,
878 .proc_handler = &proc_dointvec_jiffies,
879 .strategy = &sysctl_jiffies,
880 },
881 {
882 .ctl_name = VM_BLOCK_DUMP,
883 .procname = "block_dump",
884 .data = &block_dump,
885 .maxlen = sizeof(block_dump),
886 .mode = 0644,
887 .proc_handler = &proc_dointvec,
888 .strategy = &sysctl_intvec,
889 .extra1 = &zero,
890 },
891 {
892 .ctl_name = VM_VFS_CACHE_PRESSURE,
893 .procname = "vfs_cache_pressure",
894 .data = &sysctl_vfs_cache_pressure,
895 .maxlen = sizeof(sysctl_vfs_cache_pressure),
896 .mode = 0644,
897 .proc_handler = &proc_dointvec,
898 .strategy = &sysctl_intvec,
899 .extra1 = &zero,
900 },
901 #ifdef HAVE_ARCH_PICK_MMAP_LAYOUT
902 {
903 .ctl_name = VM_LEGACY_VA_LAYOUT,
904 .procname = "legacy_va_layout",
905 .data = &sysctl_legacy_va_layout,
906 .maxlen = sizeof(sysctl_legacy_va_layout),
907 .mode = 0644,
908 .proc_handler = &proc_dointvec,
909 .strategy = &sysctl_intvec,
910 .extra1 = &zero,
911 },
912 #endif
913 #ifdef CONFIG_SWAP
914 {
915 .ctl_name = VM_SWAP_TOKEN_TIMEOUT,
916 .procname = "swap_token_timeout",
917 .data = &swap_token_default_timeout,
918 .maxlen = sizeof(swap_token_default_timeout),
919 .mode = 0644,
920 .proc_handler = &proc_dointvec_jiffies,
921 .strategy = &sysctl_jiffies,
922 },
923 #endif
924 #ifdef CONFIG_NUMA
925 {
926 .ctl_name = VM_ZONE_RECLAIM_MODE,
927 .procname = "zone_reclaim_mode",
928 .data = &zone_reclaim_mode,
929 .maxlen = sizeof(zone_reclaim_mode),
930 .mode = 0644,
931 .proc_handler = &proc_dointvec,
932 .strategy = &sysctl_intvec,
933 .extra1 = &zero,
934 },
935 {
936 .ctl_name = VM_MIN_UNMAPPED,
937 .procname = "min_unmapped_ratio",
938 .data = &sysctl_min_unmapped_ratio,
939 .maxlen = sizeof(sysctl_min_unmapped_ratio),
940 .mode = 0644,
941 .proc_handler = &sysctl_min_unmapped_ratio_sysctl_handler,
942 .strategy = &sysctl_intvec,
943 .extra1 = &zero,
944 .extra2 = &one_hundred,
945 },
946 {
947 .ctl_name = VM_MIN_SLAB,
948 .procname = "min_slab_ratio",
949 .data = &sysctl_min_slab_ratio,
950 .maxlen = sizeof(sysctl_min_slab_ratio),
951 .mode = 0644,
952 .proc_handler = &sysctl_min_slab_ratio_sysctl_handler,
953 .strategy = &sysctl_intvec,
954 .extra1 = &zero,
955 .extra2 = &one_hundred,
956 },
957 #endif
958 #ifdef CONFIG_X86_32
959 {
960 .ctl_name = VM_VDSO_ENABLED,
961 .procname = "vdso_enabled",
962 .data = &vdso_enabled,
963 .maxlen = sizeof(vdso_enabled),
964 .mode = 0644,
965 .proc_handler = &proc_dointvec,
966 .strategy = &sysctl_intvec,
967 .extra1 = &zero,
968 },
969 #endif
970 #ifdef CONFIG_PRESWAP
971 {
972 .ctl_name = VM_PRESWAP_PAGES,
973 .procname = "preswap",
974 .data = NULL,
975 .maxlen = sizeof(unsigned long),
976 .mode = 0644,
977 .proc_handler = &preswap_sysctl_handler,
978 .extra1 = (void *)&preswap_zero,
979 .extra2 = (void *)&preswap_infinity,
980 },
981 #endif
982 { .ctl_name = 0 }
983 };
985 static ctl_table fs_table[] = {
986 {
987 .ctl_name = FS_NRINODE,
988 .procname = "inode-nr",
989 .data = &inodes_stat,
990 .maxlen = 2*sizeof(int),
991 .mode = 0444,
992 .proc_handler = &proc_dointvec,
993 },
994 {
995 .ctl_name = FS_STATINODE,
996 .procname = "inode-state",
997 .data = &inodes_stat,
998 .maxlen = 7*sizeof(int),
999 .mode = 0444,
1000 .proc_handler = &proc_dointvec,
1001 },
1003 .ctl_name = FS_NRFILE,
1004 .procname = "file-nr",
1005 .data = &files_stat,
1006 .maxlen = 3*sizeof(int),
1007 .mode = 0444,
1008 .proc_handler = &proc_nr_files,
1009 },
1011 .ctl_name = FS_MAXFILE,
1012 .procname = "file-max",
1013 .data = &files_stat.max_files,
1014 .maxlen = sizeof(int),
1015 .mode = 0644,
1016 .proc_handler = &proc_dointvec,
1017 },
1019 .ctl_name = FS_DENTRY,
1020 .procname = "dentry-state",
1021 .data = &dentry_stat,
1022 .maxlen = 6*sizeof(int),
1023 .mode = 0444,
1024 .proc_handler = &proc_dointvec,
1025 },
1027 .ctl_name = FS_OVERFLOWUID,
1028 .procname = "overflowuid",
1029 .data = &fs_overflowuid,
1030 .maxlen = sizeof(int),
1031 .mode = 0644,
1032 .proc_handler = &proc_dointvec_minmax,
1033 .strategy = &sysctl_intvec,
1034 .extra1 = &minolduid,
1035 .extra2 = &maxolduid,
1036 },
1038 .ctl_name = FS_OVERFLOWGID,
1039 .procname = "overflowgid",
1040 .data = &fs_overflowgid,
1041 .maxlen = sizeof(int),
1042 .mode = 0644,
1043 .proc_handler = &proc_dointvec_minmax,
1044 .strategy = &sysctl_intvec,
1045 .extra1 = &minolduid,
1046 .extra2 = &maxolduid,
1047 },
1049 .ctl_name = FS_LEASES,
1050 .procname = "leases-enable",
1051 .data = &leases_enable,
1052 .maxlen = sizeof(int),
1053 .mode = 0644,
1054 .proc_handler = &proc_dointvec,
1055 },
1056 #ifdef CONFIG_DNOTIFY
1058 .ctl_name = FS_DIR_NOTIFY,
1059 .procname = "dir-notify-enable",
1060 .data = &dir_notify_enable,
1061 .maxlen = sizeof(int),
1062 .mode = 0644,
1063 .proc_handler = &proc_dointvec,
1064 },
1065 #endif
1066 #ifdef CONFIG_MMU
1068 .ctl_name = FS_LEASE_TIME,
1069 .procname = "lease-break-time",
1070 .data = &lease_break_time,
1071 .maxlen = sizeof(int),
1072 .mode = 0644,
1073 .proc_handler = &proc_dointvec,
1074 },
1076 .ctl_name = FS_AIO_NR,
1077 .procname = "aio-nr",
1078 .data = &aio_nr,
1079 .maxlen = sizeof(aio_nr),
1080 .mode = 0444,
1081 .proc_handler = &proc_doulongvec_minmax,
1082 },
1084 .ctl_name = FS_AIO_MAX_NR,
1085 .procname = "aio-max-nr",
1086 .data = &aio_max_nr,
1087 .maxlen = sizeof(aio_max_nr),
1088 .mode = 0644,
1089 .proc_handler = &proc_doulongvec_minmax,
1090 },
1091 #ifdef CONFIG_INOTIFY_USER
1093 .ctl_name = FS_INOTIFY,
1094 .procname = "inotify",
1095 .mode = 0555,
1096 .child = inotify_table,
1097 },
1098 #endif
1099 #endif
1101 .ctl_name = KERN_SETUID_DUMPABLE,
1102 .procname = "suid_dumpable",
1103 .data = &suid_dumpable,
1104 .maxlen = sizeof(int),
1105 .mode = 0644,
1106 .proc_handler = &proc_dointvec,
1107 },
1108 { .ctl_name = 0 }
1109 };
1111 static ctl_table debug_table[] = {
1112 { .ctl_name = 0 }
1113 };
1115 static ctl_table dev_table[] = {
1116 { .ctl_name = 0 }
1117 };
1119 extern void init_irq_proc (void);
1121 static DEFINE_SPINLOCK(sysctl_lock);
1123 /* called under sysctl_lock */
1124 static int use_table(struct ctl_table_header *p)
1126 if (unlikely(p->unregistering))
1127 return 0;
1128 p->used++;
1129 return 1;
1132 /* called under sysctl_lock */
1133 static void unuse_table(struct ctl_table_header *p)
1135 if (!--p->used)
1136 if (unlikely(p->unregistering))
1137 complete(p->unregistering);
1140 /* called under sysctl_lock, will reacquire if has to wait */
1141 static void start_unregistering(struct ctl_table_header *p)
1143 /*
1144 * if p->used is 0, nobody will ever touch that entry again;
1145 * we'll eliminate all paths to it before dropping sysctl_lock
1146 */
1147 if (unlikely(p->used)) {
1148 struct completion wait;
1149 init_completion(&wait);
1150 p->unregistering = &wait;
1151 spin_unlock(&sysctl_lock);
1152 wait_for_completion(&wait);
1153 spin_lock(&sysctl_lock);
1155 /*
1156 * do not remove from the list until nobody holds it; walking the
1157 * list in do_sysctl() relies on that.
1158 */
1159 list_del_init(&p->ctl_entry);
1162 void __init sysctl_init(void)
1164 #ifdef CONFIG_PROC_FS
1165 register_proc_table(root_table, proc_sys_root, &root_table_header);
1166 init_irq_proc();
1167 #endif
1170 int do_sysctl(int __user *name, int nlen, void __user *oldval, size_t __user *oldlenp,
1171 void __user *newval, size_t newlen)
1173 struct list_head *tmp;
1174 int error = -ENOTDIR;
1176 if (nlen <= 0 || nlen >= CTL_MAXNAME)
1177 return -ENOTDIR;
1178 if (oldval) {
1179 int old_len;
1180 if (!oldlenp || get_user(old_len, oldlenp))
1181 return -EFAULT;
1183 spin_lock(&sysctl_lock);
1184 tmp = &root_table_header.ctl_entry;
1185 do {
1186 struct ctl_table_header *head =
1187 list_entry(tmp, struct ctl_table_header, ctl_entry);
1188 void *context = NULL;
1190 if (!use_table(head))
1191 continue;
1193 spin_unlock(&sysctl_lock);
1195 error = parse_table(name, nlen, oldval, oldlenp,
1196 newval, newlen, head->ctl_table,
1197 &context);
1198 kfree(context);
1200 spin_lock(&sysctl_lock);
1201 unuse_table(head);
1202 if (error != -ENOTDIR)
1203 break;
1204 } while ((tmp = tmp->next) != &root_table_header.ctl_entry);
1205 spin_unlock(&sysctl_lock);
1206 return error;
1209 asmlinkage long sys_sysctl(struct __sysctl_args __user *args)
1211 struct __sysctl_args tmp;
1212 int error;
1214 if (copy_from_user(&tmp, args, sizeof(tmp)))
1215 return -EFAULT;
1217 lock_kernel();
1218 error = do_sysctl(tmp.name, tmp.nlen, tmp.oldval, tmp.oldlenp,
1219 tmp.newval, tmp.newlen);
1220 unlock_kernel();
1221 return error;
1224 /*
1225 * ctl_perm does NOT grant the superuser all rights automatically, because
1226 * some sysctl variables are readonly even to root.
1227 */
1229 static int test_perm(int mode, int op)
1231 if (!current->euid)
1232 mode >>= 6;
1233 else if (in_egroup_p(0))
1234 mode >>= 3;
1235 if ((mode & op & 0007) == op)
1236 return 0;
1237 return -EACCES;
1240 static inline int ctl_perm(ctl_table *table, int op)
1242 int error;
1243 error = security_sysctl(table, op);
1244 if (error)
1245 return error;
1246 return test_perm(table->mode, op);
1249 static int parse_table(int __user *name, int nlen,
1250 void __user *oldval, size_t __user *oldlenp,
1251 void __user *newval, size_t newlen,
1252 ctl_table *table, void **context)
1254 int n;
1255 repeat:
1256 if (!nlen)
1257 return -ENOTDIR;
1258 if (get_user(n, name))
1259 return -EFAULT;
1260 for ( ; table->ctl_name; table++) {
1261 if (n == table->ctl_name || table->ctl_name == CTL_ANY) {
1262 int error;
1263 if (table->child) {
1264 if (ctl_perm(table, 001))
1265 return -EPERM;
1266 if (table->strategy) {
1267 error = table->strategy(
1268 table, name, nlen,
1269 oldval, oldlenp,
1270 newval, newlen, context);
1271 if (error)
1272 return error;
1274 name++;
1275 nlen--;
1276 table = table->child;
1277 goto repeat;
1279 error = do_sysctl_strategy(table, name, nlen,
1280 oldval, oldlenp,
1281 newval, newlen, context);
1282 return error;
1285 return -ENOTDIR;
1288 /* Perform the actual read/write of a sysctl table entry. */
1289 int do_sysctl_strategy (ctl_table *table,
1290 int __user *name, int nlen,
1291 void __user *oldval, size_t __user *oldlenp,
1292 void __user *newval, size_t newlen, void **context)
1294 int op = 0, rc;
1295 size_t len;
1297 if (oldval)
1298 op |= 004;
1299 if (newval)
1300 op |= 002;
1301 if (ctl_perm(table, op))
1302 return -EPERM;
1304 if (table->strategy) {
1305 rc = table->strategy(table, name, nlen, oldval, oldlenp,
1306 newval, newlen, context);
1307 if (rc < 0)
1308 return rc;
1309 if (rc > 0)
1310 return 0;
1313 /* If there is no strategy routine, or if the strategy returns
1314 * zero, proceed with automatic r/w */
1315 if (table->data && table->maxlen) {
1316 if (oldval && oldlenp) {
1317 if (get_user(len, oldlenp))
1318 return -EFAULT;
1319 if (len) {
1320 if (len > table->maxlen)
1321 len = table->maxlen;
1322 if(copy_to_user(oldval, table->data, len))
1323 return -EFAULT;
1324 if(put_user(len, oldlenp))
1325 return -EFAULT;
1328 if (newval && newlen) {
1329 len = newlen;
1330 if (len > table->maxlen)
1331 len = table->maxlen;
1332 if(copy_from_user(table->data, newval, len))
1333 return -EFAULT;
1336 return 0;
1339 /**
1340 * register_sysctl_table - register a sysctl hierarchy
1341 * @table: the top-level table structure
1342 * @insert_at_head: whether the entry should be inserted in front or at the end
1344 * Register a sysctl table hierarchy. @table should be a filled in ctl_table
1345 * array. An entry with a ctl_name of 0 terminates the table.
1347 * The members of the &ctl_table structure are used as follows:
1349 * ctl_name - This is the numeric sysctl value used by sysctl(2). The number
1350 * must be unique within that level of sysctl
1352 * procname - the name of the sysctl file under /proc/sys. Set to %NULL to not
1353 * enter a sysctl file
1355 * data - a pointer to data for use by proc_handler
1357 * maxlen - the maximum size in bytes of the data
1359 * mode - the file permissions for the /proc/sys file, and for sysctl(2)
1361 * child - a pointer to the child sysctl table if this entry is a directory, or
1362 * %NULL.
1364 * proc_handler - the text handler routine (described below)
1366 * strategy - the strategy routine (described below)
1368 * de - for internal use by the sysctl routines
1370 * extra1, extra2 - extra pointers usable by the proc handler routines
1372 * Leaf nodes in the sysctl tree will be represented by a single file
1373 * under /proc; non-leaf nodes will be represented by directories.
1375 * sysctl(2) can automatically manage read and write requests through
1376 * the sysctl table. The data and maxlen fields of the ctl_table
1377 * struct enable minimal validation of the values being written to be
1378 * performed, and the mode field allows minimal authentication.
1380 * More sophisticated management can be enabled by the provision of a
1381 * strategy routine with the table entry. This will be called before
1382 * any automatic read or write of the data is performed.
1384 * The strategy routine may return
1386 * < 0 - Error occurred (error is passed to user process)
1388 * 0 - OK - proceed with automatic read or write.
1390 * > 0 - OK - read or write has been done by the strategy routine, so
1391 * return immediately.
1393 * There must be a proc_handler routine for any terminal nodes
1394 * mirrored under /proc/sys (non-terminals are handled by a built-in
1395 * directory handler). Several default handlers are available to
1396 * cover common cases -
1398 * proc_dostring(), proc_dointvec(), proc_dointvec_jiffies(),
1399 * proc_dointvec_userhz_jiffies(), proc_dointvec_minmax(),
1400 * proc_doulongvec_ms_jiffies_minmax(), proc_doulongvec_minmax()
1402 * It is the handler's job to read the input buffer from user memory
1403 * and process it. The handler should return 0 on success.
1405 * This routine returns %NULL on a failure to register, and a pointer
1406 * to the table header on success.
1407 */
1408 struct ctl_table_header *register_sysctl_table(ctl_table * table,
1409 int insert_at_head)
1411 struct ctl_table_header *tmp;
1412 tmp = kmalloc(sizeof(struct ctl_table_header), GFP_KERNEL);
1413 if (!tmp)
1414 return NULL;
1415 tmp->ctl_table = table;
1416 INIT_LIST_HEAD(&tmp->ctl_entry);
1417 tmp->used = 0;
1418 tmp->unregistering = NULL;
1419 spin_lock(&sysctl_lock);
1420 if (insert_at_head)
1421 list_add(&tmp->ctl_entry, &root_table_header.ctl_entry);
1422 else
1423 list_add_tail(&tmp->ctl_entry, &root_table_header.ctl_entry);
1424 spin_unlock(&sysctl_lock);
1425 #ifdef CONFIG_PROC_FS
1426 register_proc_table(table, proc_sys_root, tmp);
1427 #endif
1428 return tmp;
1431 /**
1432 * unregister_sysctl_table - unregister a sysctl table hierarchy
1433 * @header: the header returned from register_sysctl_table
1435 * Unregisters the sysctl table and all children. proc entries may not
1436 * actually be removed until they are no longer used by anyone.
1437 */
1438 void unregister_sysctl_table(struct ctl_table_header * header)
1440 might_sleep();
1441 spin_lock(&sysctl_lock);
1442 start_unregistering(header);
1443 #ifdef CONFIG_PROC_FS
1444 unregister_proc_table(header->ctl_table, proc_sys_root);
1445 #endif
1446 spin_unlock(&sysctl_lock);
1447 kfree(header);
1450 /*
1451 * /proc/sys support
1452 */
1454 #ifdef CONFIG_PROC_FS
1456 /* Scan the sysctl entries in table and add them all into /proc */
1457 static void register_proc_table(ctl_table * table, struct proc_dir_entry *root, void *set)
1459 struct proc_dir_entry *de;
1460 int len;
1461 mode_t mode;
1463 for (; table->ctl_name; table++) {
1464 /* Can't do anything without a proc name. */
1465 if (!table->procname)
1466 continue;
1467 /* Maybe we can't do anything with it... */
1468 if (!table->proc_handler && !table->child) {
1469 printk(KERN_WARNING "SYSCTL: Can't register %s\n",
1470 table->procname);
1471 continue;
1474 len = strlen(table->procname);
1475 mode = table->mode;
1477 de = NULL;
1478 if (table->proc_handler)
1479 mode |= S_IFREG;
1480 else {
1481 mode |= S_IFDIR;
1482 for (de = root->subdir; de; de = de->next) {
1483 if (proc_match(len, table->procname, de))
1484 break;
1486 /* If the subdir exists already, de is non-NULL */
1489 if (!de) {
1490 de = create_proc_entry(table->procname, mode, root);
1491 if (!de)
1492 continue;
1493 de->set = set;
1494 de->data = (void *) table;
1495 if (table->proc_handler)
1496 de->proc_fops = &proc_sys_file_operations;
1498 table->de = de;
1499 if (de->mode & S_IFDIR)
1500 register_proc_table(table->child, de, set);
1504 /*
1505 * Unregister a /proc sysctl table and any subdirectories.
1506 */
1507 static void unregister_proc_table(ctl_table * table, struct proc_dir_entry *root)
1509 struct proc_dir_entry *de;
1510 for (; table->ctl_name; table++) {
1511 if (!(de = table->de))
1512 continue;
1513 if (de->mode & S_IFDIR) {
1514 if (!table->child) {
1515 printk (KERN_ALERT "Help - malformed sysctl tree on free\n");
1516 continue;
1518 unregister_proc_table(table->child, de);
1520 /* Don't unregister directories which still have entries.. */
1521 if (de->subdir)
1522 continue;
1525 /*
1526 * In any case, mark the entry as goner; we'll keep it
1527 * around if it's busy, but we'll know to do nothing with
1528 * its fields. We are under sysctl_lock here.
1529 */
1530 de->data = NULL;
1532 /* Don't unregister proc entries that are still being used.. */
1533 if (atomic_read(&de->count))
1534 continue;
1536 table->de = NULL;
1537 remove_proc_entry(table->procname, root);
1541 static ssize_t do_rw_proc(int write, struct file * file, char __user * buf,
1542 size_t count, loff_t *ppos)
1544 int op;
1545 struct proc_dir_entry *de = PDE(file->f_dentry->d_inode);
1546 struct ctl_table *table;
1547 size_t res;
1548 ssize_t error = -ENOTDIR;
1550 spin_lock(&sysctl_lock);
1551 if (de && de->data && use_table(de->set)) {
1552 /*
1553 * at that point we know that sysctl was not unregistered
1554 * and won't be until we finish
1555 */
1556 spin_unlock(&sysctl_lock);
1557 table = (struct ctl_table *) de->data;
1558 if (!table || !table->proc_handler)
1559 goto out;
1560 error = -EPERM;
1561 op = (write ? 002 : 004);
1562 if (ctl_perm(table, op))
1563 goto out;
1565 /* careful: calling conventions are nasty here */
1566 res = count;
1567 error = (*table->proc_handler)(table, write, file,
1568 buf, &res, ppos);
1569 if (!error)
1570 error = res;
1571 out:
1572 spin_lock(&sysctl_lock);
1573 unuse_table(de->set);
1575 spin_unlock(&sysctl_lock);
1576 return error;
1579 static int proc_opensys(struct inode *inode, struct file *file)
1581 if (file->f_mode & FMODE_WRITE) {
1582 /*
1583 * sysctl entries that are not writable,
1584 * are _NOT_ writable, capabilities or not.
1585 */
1586 if (!(inode->i_mode & S_IWUSR))
1587 return -EPERM;
1590 return 0;
1593 static ssize_t proc_readsys(struct file * file, char __user * buf,
1594 size_t count, loff_t *ppos)
1596 return do_rw_proc(0, file, buf, count, ppos);
1599 static ssize_t proc_writesys(struct file * file, const char __user * buf,
1600 size_t count, loff_t *ppos)
1602 return do_rw_proc(1, file, (char __user *) buf, count, ppos);
1605 /**
1606 * proc_dostring - read a string sysctl
1607 * @table: the sysctl table
1608 * @write: %TRUE if this is a write to the sysctl file
1609 * @filp: the file structure
1610 * @buffer: the user buffer
1611 * @lenp: the size of the user buffer
1612 * @ppos: file position
1614 * Reads/writes a string from/to the user buffer. If the kernel
1615 * buffer provided is not large enough to hold the string, the
1616 * string is truncated. The copied string is %NULL-terminated.
1617 * If the string is being read by the user process, it is copied
1618 * and a newline '\n' is added. It is truncated if the buffer is
1619 * not large enough.
1621 * Returns 0 on success.
1622 */
1623 int proc_dostring(ctl_table *table, int write, struct file *filp,
1624 void __user *buffer, size_t *lenp, loff_t *ppos)
1626 size_t len;
1627 char __user *p;
1628 char c;
1630 if (!table->data || !table->maxlen || !*lenp ||
1631 (*ppos && !write)) {
1632 *lenp = 0;
1633 return 0;
1636 if (write) {
1637 len = 0;
1638 p = buffer;
1639 while (len < *lenp) {
1640 if (get_user(c, p++))
1641 return -EFAULT;
1642 if (c == 0 || c == '\n')
1643 break;
1644 len++;
1646 if (len >= table->maxlen)
1647 len = table->maxlen-1;
1648 if(copy_from_user(table->data, buffer, len))
1649 return -EFAULT;
1650 ((char *) table->data)[len] = 0;
1651 *ppos += *lenp;
1652 } else {
1653 len = strlen(table->data);
1654 if (len > table->maxlen)
1655 len = table->maxlen;
1656 if (len > *lenp)
1657 len = *lenp;
1658 if (len)
1659 if(copy_to_user(buffer, table->data, len))
1660 return -EFAULT;
1661 if (len < *lenp) {
1662 if(put_user('\n', ((char __user *) buffer) + len))
1663 return -EFAULT;
1664 len++;
1666 *lenp = len;
1667 *ppos += len;
1669 return 0;
1672 /*
1673 * Special case of dostring for the UTS structure. This has locks
1674 * to observe. Should this be in kernel/sys.c ????
1675 */
1677 static int proc_doutsstring(ctl_table *table, int write, struct file *filp,
1678 void __user *buffer, size_t *lenp, loff_t *ppos)
1680 int r;
1682 if (!write) {
1683 down_read(&uts_sem);
1684 r=proc_dostring(table,0,filp,buffer,lenp, ppos);
1685 up_read(&uts_sem);
1686 } else {
1687 down_write(&uts_sem);
1688 r=proc_dostring(table,1,filp,buffer,lenp, ppos);
1689 up_write(&uts_sem);
1691 return r;
1694 static int do_proc_dointvec_conv(int *negp, unsigned long *lvalp,
1695 int *valp,
1696 int write, void *data)
1698 if (write) {
1699 *valp = *negp ? -*lvalp : *lvalp;
1700 } else {
1701 int val = *valp;
1702 if (val < 0) {
1703 *negp = -1;
1704 *lvalp = (unsigned long)-val;
1705 } else {
1706 *negp = 0;
1707 *lvalp = (unsigned long)val;
1710 return 0;
1713 static int do_proc_dointvec(ctl_table *table, int write, struct file *filp,
1714 void __user *buffer, size_t *lenp, loff_t *ppos,
1715 int (*conv)(int *negp, unsigned long *lvalp, int *valp,
1716 int write, void *data),
1717 void *data)
1719 #define TMPBUFLEN 21
1720 int *i, vleft, first=1, neg, val;
1721 unsigned long lval;
1722 size_t left, len;
1724 char buf[TMPBUFLEN], *p;
1725 char __user *s = buffer;
1727 if (!table->data || !table->maxlen || !*lenp ||
1728 (*ppos && !write)) {
1729 *lenp = 0;
1730 return 0;
1733 i = (int *) table->data;
1734 vleft = table->maxlen / sizeof(*i);
1735 left = *lenp;
1737 if (!conv)
1738 conv = do_proc_dointvec_conv;
1740 for (; left && vleft--; i++, first=0) {
1741 if (write) {
1742 while (left) {
1743 char c;
1744 if (get_user(c, s))
1745 return -EFAULT;
1746 if (!isspace(c))
1747 break;
1748 left--;
1749 s++;
1751 if (!left)
1752 break;
1753 neg = 0;
1754 len = left;
1755 if (len > sizeof(buf) - 1)
1756 len = sizeof(buf) - 1;
1757 if (copy_from_user(buf, s, len))
1758 return -EFAULT;
1759 buf[len] = 0;
1760 p = buf;
1761 if (*p == '-' && left > 1) {
1762 neg = 1;
1763 left--, p++;
1765 if (*p < '0' || *p > '9')
1766 break;
1768 lval = simple_strtoul(p, &p, 0);
1770 len = p-buf;
1771 if ((len < left) && *p && !isspace(*p))
1772 break;
1773 if (neg)
1774 val = -val;
1775 s += len;
1776 left -= len;
1778 if (conv(&neg, &lval, i, 1, data))
1779 break;
1780 } else {
1781 p = buf;
1782 if (!first)
1783 *p++ = '\t';
1785 if (conv(&neg, &lval, i, 0, data))
1786 break;
1788 sprintf(p, "%s%lu", neg ? "-" : "", lval);
1789 len = strlen(buf);
1790 if (len > left)
1791 len = left;
1792 if(copy_to_user(s, buf, len))
1793 return -EFAULT;
1794 left -= len;
1795 s += len;
1799 if (!write && !first && left) {
1800 if(put_user('\n', s))
1801 return -EFAULT;
1802 left--, s++;
1804 if (write) {
1805 while (left) {
1806 char c;
1807 if (get_user(c, s++))
1808 return -EFAULT;
1809 if (!isspace(c))
1810 break;
1811 left--;
1814 if (write && first)
1815 return -EINVAL;
1816 *lenp -= left;
1817 *ppos += *lenp;
1818 return 0;
1819 #undef TMPBUFLEN
1822 /**
1823 * proc_dointvec - read a vector of integers
1824 * @table: the sysctl table
1825 * @write: %TRUE if this is a write to the sysctl file
1826 * @filp: the file structure
1827 * @buffer: the user buffer
1828 * @lenp: the size of the user buffer
1829 * @ppos: file position
1831 * Reads/writes up to table->maxlen/sizeof(unsigned int) integer
1832 * values from/to the user buffer, treated as an ASCII string.
1834 * Returns 0 on success.
1835 */
1836 int proc_dointvec(ctl_table *table, int write, struct file *filp,
1837 void __user *buffer, size_t *lenp, loff_t *ppos)
1839 return do_proc_dointvec(table,write,filp,buffer,lenp,ppos,
1840 NULL,NULL);
1843 #define OP_SET 0
1844 #define OP_AND 1
1845 #define OP_OR 2
1846 #define OP_MAX 3
1847 #define OP_MIN 4
1849 static int do_proc_dointvec_bset_conv(int *negp, unsigned long *lvalp,
1850 int *valp,
1851 int write, void *data)
1853 int op = *(int *)data;
1854 if (write) {
1855 int val = *negp ? -*lvalp : *lvalp;
1856 switch(op) {
1857 case OP_SET: *valp = val; break;
1858 case OP_AND: *valp &= val; break;
1859 case OP_OR: *valp |= val; break;
1860 case OP_MAX: if(*valp < val)
1861 *valp = val;
1862 break;
1863 case OP_MIN: if(*valp > val)
1864 *valp = val;
1865 break;
1867 } else {
1868 int val = *valp;
1869 if (val < 0) {
1870 *negp = -1;
1871 *lvalp = (unsigned long)-val;
1872 } else {
1873 *negp = 0;
1874 *lvalp = (unsigned long)val;
1877 return 0;
1880 /*
1881 * init may raise the set.
1882 */
1884 int proc_dointvec_bset(ctl_table *table, int write, struct file *filp,
1885 void __user *buffer, size_t *lenp, loff_t *ppos)
1887 int op;
1889 if (!capable(CAP_SYS_MODULE)) {
1890 return -EPERM;
1893 op = (current->pid == 1) ? OP_SET : OP_AND;
1894 return do_proc_dointvec(table,write,filp,buffer,lenp,ppos,
1895 do_proc_dointvec_bset_conv,&op);
1898 struct do_proc_dointvec_minmax_conv_param {
1899 int *min;
1900 int *max;
1901 };
1903 static int do_proc_dointvec_minmax_conv(int *negp, unsigned long *lvalp,
1904 int *valp,
1905 int write, void *data)
1907 struct do_proc_dointvec_minmax_conv_param *param = data;
1908 if (write) {
1909 int val = *negp ? -*lvalp : *lvalp;
1910 if ((param->min && *param->min > val) ||
1911 (param->max && *param->max < val))
1912 return -EINVAL;
1913 *valp = val;
1914 } else {
1915 int val = *valp;
1916 if (val < 0) {
1917 *negp = -1;
1918 *lvalp = (unsigned long)-val;
1919 } else {
1920 *negp = 0;
1921 *lvalp = (unsigned long)val;
1924 return 0;
1927 /**
1928 * proc_dointvec_minmax - read a vector of integers with min/max values
1929 * @table: the sysctl table
1930 * @write: %TRUE if this is a write to the sysctl file
1931 * @filp: the file structure
1932 * @buffer: the user buffer
1933 * @lenp: the size of the user buffer
1934 * @ppos: file position
1936 * Reads/writes up to table->maxlen/sizeof(unsigned int) integer
1937 * values from/to the user buffer, treated as an ASCII string.
1939 * This routine will ensure the values are within the range specified by
1940 * table->extra1 (min) and table->extra2 (max).
1942 * Returns 0 on success.
1943 */
1944 int proc_dointvec_minmax(ctl_table *table, int write, struct file *filp,
1945 void __user *buffer, size_t *lenp, loff_t *ppos)
1947 struct do_proc_dointvec_minmax_conv_param param = {
1948 .min = (int *) table->extra1,
1949 .max = (int *) table->extra2,
1950 };
1951 return do_proc_dointvec(table, write, filp, buffer, lenp, ppos,
1952 do_proc_dointvec_minmax_conv, &param);
1955 static int do_proc_doulongvec_minmax(ctl_table *table, int write,
1956 struct file *filp,
1957 void __user *buffer,
1958 size_t *lenp, loff_t *ppos,
1959 unsigned long convmul,
1960 unsigned long convdiv)
1962 #define TMPBUFLEN 21
1963 unsigned long *i, *min, *max, val;
1964 int vleft, first=1, neg;
1965 size_t len, left;
1966 char buf[TMPBUFLEN], *p;
1967 char __user *s = buffer;
1969 if (!table->data || !table->maxlen || !*lenp ||
1970 (*ppos && !write)) {
1971 *lenp = 0;
1972 return 0;
1975 i = (unsigned long *) table->data;
1976 min = (unsigned long *) table->extra1;
1977 max = (unsigned long *) table->extra2;
1978 vleft = table->maxlen / sizeof(unsigned long);
1979 left = *lenp;
1981 for (; left && vleft--; i++, min++, max++, first=0) {
1982 if (write) {
1983 while (left) {
1984 char c;
1985 if (get_user(c, s))
1986 return -EFAULT;
1987 if (!isspace(c))
1988 break;
1989 left--;
1990 s++;
1992 if (!left)
1993 break;
1994 neg = 0;
1995 len = left;
1996 if (len > TMPBUFLEN-1)
1997 len = TMPBUFLEN-1;
1998 if (copy_from_user(buf, s, len))
1999 return -EFAULT;
2000 buf[len] = 0;
2001 p = buf;
2002 if (*p == '-' && left > 1) {
2003 neg = 1;
2004 left--, p++;
2006 if (*p < '0' || *p > '9')
2007 break;
2008 val = simple_strtoul(p, &p, 0) * convmul / convdiv ;
2009 len = p-buf;
2010 if ((len < left) && *p && !isspace(*p))
2011 break;
2012 if (neg)
2013 val = -val;
2014 s += len;
2015 left -= len;
2017 if(neg)
2018 continue;
2019 if ((min && val < *min) || (max && val > *max))
2020 continue;
2021 *i = val;
2022 } else {
2023 p = buf;
2024 if (!first)
2025 *p++ = '\t';
2026 sprintf(p, "%lu", convdiv * (*i) / convmul);
2027 len = strlen(buf);
2028 if (len > left)
2029 len = left;
2030 if(copy_to_user(s, buf, len))
2031 return -EFAULT;
2032 left -= len;
2033 s += len;
2037 if (!write && !first && left) {
2038 if(put_user('\n', s))
2039 return -EFAULT;
2040 left--, s++;
2042 if (write) {
2043 while (left) {
2044 char c;
2045 if (get_user(c, s++))
2046 return -EFAULT;
2047 if (!isspace(c))
2048 break;
2049 left--;
2052 if (write && first)
2053 return -EINVAL;
2054 *lenp -= left;
2055 *ppos += *lenp;
2056 return 0;
2057 #undef TMPBUFLEN
2060 /**
2061 * proc_doulongvec_minmax - read a vector of long integers with min/max values
2062 * @table: the sysctl table
2063 * @write: %TRUE if this is a write to the sysctl file
2064 * @filp: the file structure
2065 * @buffer: the user buffer
2066 * @lenp: the size of the user buffer
2067 * @ppos: file position
2069 * Reads/writes up to table->maxlen/sizeof(unsigned long) unsigned long
2070 * values from/to the user buffer, treated as an ASCII string.
2072 * This routine will ensure the values are within the range specified by
2073 * table->extra1 (min) and table->extra2 (max).
2075 * Returns 0 on success.
2076 */
2077 int proc_doulongvec_minmax(ctl_table *table, int write, struct file *filp,
2078 void __user *buffer, size_t *lenp, loff_t *ppos)
2080 return do_proc_doulongvec_minmax(table, write, filp, buffer, lenp, ppos, 1l, 1l);
2083 /**
2084 * proc_doulongvec_ms_jiffies_minmax - read a vector of millisecond values with min/max values
2085 * @table: the sysctl table
2086 * @write: %TRUE if this is a write to the sysctl file
2087 * @filp: the file structure
2088 * @buffer: the user buffer
2089 * @lenp: the size of the user buffer
2090 * @ppos: file position
2092 * Reads/writes up to table->maxlen/sizeof(unsigned long) unsigned long
2093 * values from/to the user buffer, treated as an ASCII string. The values
2094 * are treated as milliseconds, and converted to jiffies when they are stored.
2096 * This routine will ensure the values are within the range specified by
2097 * table->extra1 (min) and table->extra2 (max).
2099 * Returns 0 on success.
2100 */
2101 int proc_doulongvec_ms_jiffies_minmax(ctl_table *table, int write,
2102 struct file *filp,
2103 void __user *buffer,
2104 size_t *lenp, loff_t *ppos)
2106 return do_proc_doulongvec_minmax(table, write, filp, buffer,
2107 lenp, ppos, HZ, 1000l);
2111 static int do_proc_dointvec_jiffies_conv(int *negp, unsigned long *lvalp,
2112 int *valp,
2113 int write, void *data)
2115 if (write) {
2116 if (*lvalp > LONG_MAX / HZ)
2117 return 1;
2118 *valp = *negp ? -(*lvalp*HZ) : (*lvalp*HZ);
2119 } else {
2120 int val = *valp;
2121 unsigned long lval;
2122 if (val < 0) {
2123 *negp = -1;
2124 lval = (unsigned long)-val;
2125 } else {
2126 *negp = 0;
2127 lval = (unsigned long)val;
2129 *lvalp = lval / HZ;
2131 return 0;
2134 static int do_proc_dointvec_userhz_jiffies_conv(int *negp, unsigned long *lvalp,
2135 int *valp,
2136 int write, void *data)
2138 if (write) {
2139 if (USER_HZ < HZ && *lvalp > (LONG_MAX / HZ) * USER_HZ)
2140 return 1;
2141 *valp = clock_t_to_jiffies(*negp ? -*lvalp : *lvalp);
2142 } else {
2143 int val = *valp;
2144 unsigned long lval;
2145 if (val < 0) {
2146 *negp = -1;
2147 lval = (unsigned long)-val;
2148 } else {
2149 *negp = 0;
2150 lval = (unsigned long)val;
2152 *lvalp = jiffies_to_clock_t(lval);
2154 return 0;
2157 static int do_proc_dointvec_ms_jiffies_conv(int *negp, unsigned long *lvalp,
2158 int *valp,
2159 int write, void *data)
2161 if (write) {
2162 *valp = msecs_to_jiffies(*negp ? -*lvalp : *lvalp);
2163 } else {
2164 int val = *valp;
2165 unsigned long lval;
2166 if (val < 0) {
2167 *negp = -1;
2168 lval = (unsigned long)-val;
2169 } else {
2170 *negp = 0;
2171 lval = (unsigned long)val;
2173 *lvalp = jiffies_to_msecs(lval);
2175 return 0;
2178 /**
2179 * proc_dointvec_jiffies - read a vector of integers as seconds
2180 * @table: the sysctl table
2181 * @write: %TRUE if this is a write to the sysctl file
2182 * @filp: the file structure
2183 * @buffer: the user buffer
2184 * @lenp: the size of the user buffer
2185 * @ppos: file position
2187 * Reads/writes up to table->maxlen/sizeof(unsigned int) integer
2188 * values from/to the user buffer, treated as an ASCII string.
2189 * The values read are assumed to be in seconds, and are converted into
2190 * jiffies.
2192 * Returns 0 on success.
2193 */
2194 int proc_dointvec_jiffies(ctl_table *table, int write, struct file *filp,
2195 void __user *buffer, size_t *lenp, loff_t *ppos)
2197 return do_proc_dointvec(table,write,filp,buffer,lenp,ppos,
2198 do_proc_dointvec_jiffies_conv,NULL);
2201 /**
2202 * proc_dointvec_userhz_jiffies - read a vector of integers as 1/USER_HZ seconds
2203 * @table: the sysctl table
2204 * @write: %TRUE if this is a write to the sysctl file
2205 * @filp: the file structure
2206 * @buffer: the user buffer
2207 * @lenp: the size of the user buffer
2208 * @ppos: pointer to the file position
2210 * Reads/writes up to table->maxlen/sizeof(unsigned int) integer
2211 * values from/to the user buffer, treated as an ASCII string.
2212 * The values read are assumed to be in 1/USER_HZ seconds, and
2213 * are converted into jiffies.
2215 * Returns 0 on success.
2216 */
2217 int proc_dointvec_userhz_jiffies(ctl_table *table, int write, struct file *filp,
2218 void __user *buffer, size_t *lenp, loff_t *ppos)
2220 return do_proc_dointvec(table,write,filp,buffer,lenp,ppos,
2221 do_proc_dointvec_userhz_jiffies_conv,NULL);
2224 /**
2225 * proc_dointvec_ms_jiffies - read a vector of integers as 1 milliseconds
2226 * @table: the sysctl table
2227 * @write: %TRUE if this is a write to the sysctl file
2228 * @filp: the file structure
2229 * @buffer: the user buffer
2230 * @lenp: the size of the user buffer
2231 * @ppos: file position
2232 * @ppos: the current position in the file
2234 * Reads/writes up to table->maxlen/sizeof(unsigned int) integer
2235 * values from/to the user buffer, treated as an ASCII string.
2236 * The values read are assumed to be in 1/1000 seconds, and
2237 * are converted into jiffies.
2239 * Returns 0 on success.
2240 */
2241 int proc_dointvec_ms_jiffies(ctl_table *table, int write, struct file *filp,
2242 void __user *buffer, size_t *lenp, loff_t *ppos)
2244 return do_proc_dointvec(table, write, filp, buffer, lenp, ppos,
2245 do_proc_dointvec_ms_jiffies_conv, NULL);
2248 #else /* CONFIG_PROC_FS */
2250 int proc_dostring(ctl_table *table, int write, struct file *filp,
2251 void __user *buffer, size_t *lenp, loff_t *ppos)
2253 return -ENOSYS;
2256 static int proc_doutsstring(ctl_table *table, int write, struct file *filp,
2257 void __user *buffer, size_t *lenp, loff_t *ppos)
2259 return -ENOSYS;
2262 int proc_dointvec(ctl_table *table, int write, struct file *filp,
2263 void __user *buffer, size_t *lenp, loff_t *ppos)
2265 return -ENOSYS;
2268 int proc_dointvec_bset(ctl_table *table, int write, struct file *filp,
2269 void __user *buffer, size_t *lenp, loff_t *ppos)
2271 return -ENOSYS;
2274 int proc_dointvec_minmax(ctl_table *table, int write, struct file *filp,
2275 void __user *buffer, size_t *lenp, loff_t *ppos)
2277 return -ENOSYS;
2280 int proc_dointvec_jiffies(ctl_table *table, int write, struct file *filp,
2281 void __user *buffer, size_t *lenp, loff_t *ppos)
2283 return -ENOSYS;
2286 int proc_dointvec_userhz_jiffies(ctl_table *table, int write, struct file *filp,
2287 void __user *buffer, size_t *lenp, loff_t *ppos)
2289 return -ENOSYS;
2292 int proc_dointvec_ms_jiffies(ctl_table *table, int write, struct file *filp,
2293 void __user *buffer, size_t *lenp, loff_t *ppos)
2295 return -ENOSYS;
2298 int proc_doulongvec_minmax(ctl_table *table, int write, struct file *filp,
2299 void __user *buffer, size_t *lenp, loff_t *ppos)
2301 return -ENOSYS;
2304 int proc_doulongvec_ms_jiffies_minmax(ctl_table *table, int write,
2305 struct file *filp,
2306 void __user *buffer,
2307 size_t *lenp, loff_t *ppos)
2309 return -ENOSYS;
2313 #endif /* CONFIG_PROC_FS */
2316 /*
2317 * General sysctl support routines
2318 */
2320 /* The generic string strategy routine: */
2321 int sysctl_string(ctl_table *table, int __user *name, int nlen,
2322 void __user *oldval, size_t __user *oldlenp,
2323 void __user *newval, size_t newlen, void **context)
2325 if (!table->data || !table->maxlen)
2326 return -ENOTDIR;
2328 if (oldval && oldlenp) {
2329 size_t bufsize;
2330 if (get_user(bufsize, oldlenp))
2331 return -EFAULT;
2332 if (bufsize) {
2333 size_t len = strlen(table->data), copied;
2335 /* This shouldn't trigger for a well-formed sysctl */
2336 if (len > table->maxlen)
2337 len = table->maxlen;
2339 /* Copy up to a max of bufsize-1 bytes of the string */
2340 copied = (len >= bufsize) ? bufsize - 1 : len;
2342 if (copy_to_user(oldval, table->data, copied) ||
2343 put_user(0, (char __user *)(oldval + copied)))
2344 return -EFAULT;
2345 if (put_user(len, oldlenp))
2346 return -EFAULT;
2349 if (newval && newlen) {
2350 size_t len = newlen;
2351 if (len > table->maxlen)
2352 len = table->maxlen;
2353 if(copy_from_user(table->data, newval, len))
2354 return -EFAULT;
2355 if (len == table->maxlen)
2356 len--;
2357 ((char *) table->data)[len] = 0;
2359 return 1;
2362 /*
2363 * This function makes sure that all of the integers in the vector
2364 * are between the minimum and maximum values given in the arrays
2365 * table->extra1 and table->extra2, respectively.
2366 */
2367 int sysctl_intvec(ctl_table *table, int __user *name, int nlen,
2368 void __user *oldval, size_t __user *oldlenp,
2369 void __user *newval, size_t newlen, void **context)
2372 if (newval && newlen) {
2373 int __user *vec = (int __user *) newval;
2374 int *min = (int *) table->extra1;
2375 int *max = (int *) table->extra2;
2376 size_t length;
2377 int i;
2379 if (newlen % sizeof(int) != 0)
2380 return -EINVAL;
2382 if (!table->extra1 && !table->extra2)
2383 return 0;
2385 if (newlen > table->maxlen)
2386 newlen = table->maxlen;
2387 length = newlen / sizeof(int);
2389 for (i = 0; i < length; i++) {
2390 int value;
2391 if (get_user(value, vec + i))
2392 return -EFAULT;
2393 if (min && value < min[i])
2394 return -EINVAL;
2395 if (max && value > max[i])
2396 return -EINVAL;
2399 return 0;
2402 /* Strategy function to convert jiffies to seconds */
2403 int sysctl_jiffies(ctl_table *table, int __user *name, int nlen,
2404 void __user *oldval, size_t __user *oldlenp,
2405 void __user *newval, size_t newlen, void **context)
2407 if (oldval) {
2408 size_t olen;
2409 if (oldlenp) {
2410 if (get_user(olen, oldlenp))
2411 return -EFAULT;
2412 if (olen!=sizeof(int))
2413 return -EINVAL;
2415 if (put_user(*(int *)(table->data)/HZ, (int __user *)oldval) ||
2416 (oldlenp && put_user(sizeof(int),oldlenp)))
2417 return -EFAULT;
2419 if (newval && newlen) {
2420 int new;
2421 if (newlen != sizeof(int))
2422 return -EINVAL;
2423 if (get_user(new, (int __user *)newval))
2424 return -EFAULT;
2425 *(int *)(table->data) = new*HZ;
2427 return 1;
2430 /* Strategy function to convert jiffies to seconds */
2431 int sysctl_ms_jiffies(ctl_table *table, int __user *name, int nlen,
2432 void __user *oldval, size_t __user *oldlenp,
2433 void __user *newval, size_t newlen, void **context)
2435 if (oldval) {
2436 size_t olen;
2437 if (oldlenp) {
2438 if (get_user(olen, oldlenp))
2439 return -EFAULT;
2440 if (olen!=sizeof(int))
2441 return -EINVAL;
2443 if (put_user(jiffies_to_msecs(*(int *)(table->data)), (int __user *)oldval) ||
2444 (oldlenp && put_user(sizeof(int),oldlenp)))
2445 return -EFAULT;
2447 if (newval && newlen) {
2448 int new;
2449 if (newlen != sizeof(int))
2450 return -EINVAL;
2451 if (get_user(new, (int __user *)newval))
2452 return -EFAULT;
2453 *(int *)(table->data) = msecs_to_jiffies(new);
2455 return 1;
2458 #else /* CONFIG_SYSCTL */
2461 asmlinkage long sys_sysctl(struct __sysctl_args __user *args)
2463 return -ENOSYS;
2466 int sysctl_string(ctl_table *table, int __user *name, int nlen,
2467 void __user *oldval, size_t __user *oldlenp,
2468 void __user *newval, size_t newlen, void **context)
2470 return -ENOSYS;
2473 int sysctl_intvec(ctl_table *table, int __user *name, int nlen,
2474 void __user *oldval, size_t __user *oldlenp,
2475 void __user *newval, size_t newlen, void **context)
2477 return -ENOSYS;
2480 int sysctl_jiffies(ctl_table *table, int __user *name, int nlen,
2481 void __user *oldval, size_t __user *oldlenp,
2482 void __user *newval, size_t newlen, void **context)
2484 return -ENOSYS;
2487 int sysctl_ms_jiffies(ctl_table *table, int __user *name, int nlen,
2488 void __user *oldval, size_t __user *oldlenp,
2489 void __user *newval, size_t newlen, void **context)
2491 return -ENOSYS;
2494 int proc_dostring(ctl_table *table, int write, struct file *filp,
2495 void __user *buffer, size_t *lenp, loff_t *ppos)
2497 return -ENOSYS;
2500 int proc_dointvec(ctl_table *table, int write, struct file *filp,
2501 void __user *buffer, size_t *lenp, loff_t *ppos)
2503 return -ENOSYS;
2506 int proc_dointvec_bset(ctl_table *table, int write, struct file *filp,
2507 void __user *buffer, size_t *lenp, loff_t *ppos)
2509 return -ENOSYS;
2512 int proc_dointvec_minmax(ctl_table *table, int write, struct file *filp,
2513 void __user *buffer, size_t *lenp, loff_t *ppos)
2515 return -ENOSYS;
2518 int proc_dointvec_jiffies(ctl_table *table, int write, struct file *filp,
2519 void __user *buffer, size_t *lenp, loff_t *ppos)
2521 return -ENOSYS;
2524 int proc_dointvec_userhz_jiffies(ctl_table *table, int write, struct file *filp,
2525 void __user *buffer, size_t *lenp, loff_t *ppos)
2527 return -ENOSYS;
2530 int proc_dointvec_ms_jiffies(ctl_table *table, int write, struct file *filp,
2531 void __user *buffer, size_t *lenp, loff_t *ppos)
2533 return -ENOSYS;
2536 int proc_doulongvec_minmax(ctl_table *table, int write, struct file *filp,
2537 void __user *buffer, size_t *lenp, loff_t *ppos)
2539 return -ENOSYS;
2542 int proc_doulongvec_ms_jiffies_minmax(ctl_table *table, int write,
2543 struct file *filp,
2544 void __user *buffer,
2545 size_t *lenp, loff_t *ppos)
2547 return -ENOSYS;
2550 struct ctl_table_header * register_sysctl_table(ctl_table * table,
2551 int insert_at_head)
2553 return NULL;
2556 void unregister_sysctl_table(struct ctl_table_header * table)
2560 #endif /* CONFIG_SYSCTL */
2562 /*
2563 * No sense putting this after each symbol definition, twice,
2564 * exception granted :-)
2565 */
2566 EXPORT_SYMBOL(proc_dointvec);
2567 EXPORT_SYMBOL(proc_dointvec_jiffies);
2568 EXPORT_SYMBOL(proc_dointvec_minmax);
2569 EXPORT_SYMBOL(proc_dointvec_userhz_jiffies);
2570 EXPORT_SYMBOL(proc_dointvec_ms_jiffies);
2571 EXPORT_SYMBOL(proc_dostring);
2572 EXPORT_SYMBOL(proc_doulongvec_minmax);
2573 EXPORT_SYMBOL(proc_doulongvec_ms_jiffies_minmax);
2574 EXPORT_SYMBOL(register_sysctl_table);
2575 EXPORT_SYMBOL(sysctl_intvec);
2576 EXPORT_SYMBOL(sysctl_jiffies);
2577 EXPORT_SYMBOL(sysctl_ms_jiffies);
2578 EXPORT_SYMBOL(sysctl_string);
2579 EXPORT_SYMBOL(unregister_sysctl_table);