aboutsummaryrefslogtreecommitdiffstats
path: root/kernel
diff options
context:
space:
mode:
Diffstat (limited to 'kernel')
-rw-r--r--kernel/Makefile4
-rw-r--r--kernel/cpuset.c27
-rw-r--r--kernel/exit.c27
-rw-r--r--kernel/fork.c5
-rw-r--r--kernel/hw_breakpoint.c423
-rw-r--r--kernel/kallsyms.c1
-rw-r--r--kernel/kgdb.c2
-rw-r--r--kernel/kprobes.c68
-rw-r--r--kernel/lockdep.c2
-rw-r--r--kernel/notifier.c2
-rw-r--r--kernel/perf_event.c627
-rw-r--r--kernel/sched.c275
-rw-r--r--kernel/sched_debug.c4
-rw-r--r--kernel/sched_fair.c65
-rw-r--r--kernel/sched_rt.c61
-rw-r--r--kernel/signal.c27
-rw-r--r--kernel/slow-work.c7
-rw-r--r--kernel/sys.c21
-rw-r--r--kernel/sys_ni.c1
-rw-r--r--kernel/sysctl.c893
-rw-r--r--kernel/sysctl_binary.c1507
-rw-r--r--kernel/sysctl_check.c1377
-rw-r--r--kernel/time.c30
-rw-r--r--kernel/trace/Kconfig38
-rw-r--r--kernel/trace/Makefile2
-rw-r--r--kernel/trace/ring_buffer.c15
-rw-r--r--kernel/trace/trace.h38
-rw-r--r--kernel/trace/trace_entries.h16
-rw-r--r--kernel/trace/trace_event_profile.c43
-rw-r--r--kernel/trace/trace_events.c168
-rw-r--r--kernel/trace/trace_events_filter.c310
-rw-r--r--kernel/trace/trace_export.c39
-rw-r--r--kernel/trace/trace_kprobe.c1523
-rw-r--r--kernel/trace/trace_ksym.c550
-rw-r--r--kernel/trace/trace_selftest.c55
-rw-r--r--kernel/trace/trace_syscalls.c195
-rw-r--r--kernel/utsname_sysctl.c31
37 files changed, 5564 insertions, 2915 deletions
diff --git a/kernel/Makefile b/kernel/Makefile
index dcf6789bf547..9943202b4355 100644
--- a/kernel/Makefile
+++ b/kernel/Makefile
@@ -4,7 +4,7 @@
4 4
5obj-y = sched.o fork.o exec_domain.o panic.o printk.o \ 5obj-y = sched.o fork.o exec_domain.o panic.o printk.o \
6 cpu.o exit.o itimer.o time.o softirq.o resource.o \ 6 cpu.o exit.o itimer.o time.o softirq.o resource.o \
7 sysctl.o capability.o ptrace.o timer.o user.o \ 7 sysctl.o sysctl_binary.o capability.o ptrace.o timer.o user.o \
8 signal.o sys.o kmod.o workqueue.o pid.o \ 8 signal.o sys.o kmod.o workqueue.o pid.o \
9 rcupdate.o extable.o params.o posix-timers.o \ 9 rcupdate.o extable.o params.o posix-timers.o \
10 kthread.o wait.o kfifo.o sys_ni.o posix-cpu-timers.o mutex.o \ 10 kthread.o wait.o kfifo.o sys_ni.o posix-cpu-timers.o mutex.o \
@@ -21,6 +21,7 @@ CFLAGS_REMOVE_mutex-debug.o = -pg
21CFLAGS_REMOVE_rtmutex-debug.o = -pg 21CFLAGS_REMOVE_rtmutex-debug.o = -pg
22CFLAGS_REMOVE_cgroup-debug.o = -pg 22CFLAGS_REMOVE_cgroup-debug.o = -pg
23CFLAGS_REMOVE_sched_clock.o = -pg 23CFLAGS_REMOVE_sched_clock.o = -pg
24CFLAGS_REMOVE_perf_event.o = -pg
24endif 25endif
25 26
26obj-$(CONFIG_FREEZER) += freezer.o 27obj-$(CONFIG_FREEZER) += freezer.o
@@ -97,6 +98,7 @@ obj-$(CONFIG_SMP) += sched_cpupri.o
97obj-$(CONFIG_SLOW_WORK) += slow-work.o 98obj-$(CONFIG_SLOW_WORK) += slow-work.o
98obj-$(CONFIG_SLOW_WORK_DEBUG) += slow-work-debugfs.o 99obj-$(CONFIG_SLOW_WORK_DEBUG) += slow-work-debugfs.o
99obj-$(CONFIG_PERF_EVENTS) += perf_event.o 100obj-$(CONFIG_PERF_EVENTS) += perf_event.o
101obj-$(CONFIG_HAVE_HW_BREAKPOINT) += hw_breakpoint.o
100 102
101ifneq ($(CONFIG_SCHED_OMIT_FRAME_POINTER),y) 103ifneq ($(CONFIG_SCHED_OMIT_FRAME_POINTER),y)
102# According to Alan Modra <alan@linuxcare.com.au>, the -fno-omit-frame-pointer is 104# According to Alan Modra <alan@linuxcare.com.au>, the -fno-omit-frame-pointer is
diff --git a/kernel/cpuset.c b/kernel/cpuset.c
index b5cb469d2545..3cf2183b472d 100644
--- a/kernel/cpuset.c
+++ b/kernel/cpuset.c
@@ -537,8 +537,7 @@ update_domain_attr_tree(struct sched_domain_attr *dattr, struct cpuset *c)
537 * element of the partition (one sched domain) to be passed to 537 * element of the partition (one sched domain) to be passed to
538 * partition_sched_domains(). 538 * partition_sched_domains().
539 */ 539 */
540/* FIXME: see the FIXME in partition_sched_domains() */ 540static int generate_sched_domains(cpumask_var_t **domains,
541static int generate_sched_domains(struct cpumask **domains,
542 struct sched_domain_attr **attributes) 541 struct sched_domain_attr **attributes)
543{ 542{
544 LIST_HEAD(q); /* queue of cpusets to be scanned */ 543 LIST_HEAD(q); /* queue of cpusets to be scanned */
@@ -546,7 +545,7 @@ static int generate_sched_domains(struct cpumask **domains,
546 struct cpuset **csa; /* array of all cpuset ptrs */ 545 struct cpuset **csa; /* array of all cpuset ptrs */
547 int csn; /* how many cpuset ptrs in csa so far */ 546 int csn; /* how many cpuset ptrs in csa so far */
548 int i, j, k; /* indices for partition finding loops */ 547 int i, j, k; /* indices for partition finding loops */
549 struct cpumask *doms; /* resulting partition; i.e. sched domains */ 548 cpumask_var_t *doms; /* resulting partition; i.e. sched domains */
550 struct sched_domain_attr *dattr; /* attributes for custom domains */ 549 struct sched_domain_attr *dattr; /* attributes for custom domains */
551 int ndoms = 0; /* number of sched domains in result */ 550 int ndoms = 0; /* number of sched domains in result */
552 int nslot; /* next empty doms[] struct cpumask slot */ 551 int nslot; /* next empty doms[] struct cpumask slot */
@@ -557,7 +556,8 @@ static int generate_sched_domains(struct cpumask **domains,
557 556
558 /* Special case for the 99% of systems with one, full, sched domain */ 557 /* Special case for the 99% of systems with one, full, sched domain */
559 if (is_sched_load_balance(&top_cpuset)) { 558 if (is_sched_load_balance(&top_cpuset)) {
560 doms = kmalloc(cpumask_size(), GFP_KERNEL); 559 ndoms = 1;
560 doms = alloc_sched_domains(ndoms);
561 if (!doms) 561 if (!doms)
562 goto done; 562 goto done;
563 563
@@ -566,9 +566,8 @@ static int generate_sched_domains(struct cpumask **domains,
566 *dattr = SD_ATTR_INIT; 566 *dattr = SD_ATTR_INIT;
567 update_domain_attr_tree(dattr, &top_cpuset); 567 update_domain_attr_tree(dattr, &top_cpuset);
568 } 568 }
569 cpumask_copy(doms, top_cpuset.cpus_allowed); 569 cpumask_copy(doms[0], top_cpuset.cpus_allowed);
570 570
571 ndoms = 1;
572 goto done; 571 goto done;
573 } 572 }
574 573
@@ -636,7 +635,7 @@ restart:
636 * Now we know how many domains to create. 635 * Now we know how many domains to create.
637 * Convert <csn, csa> to <ndoms, doms> and populate cpu masks. 636 * Convert <csn, csa> to <ndoms, doms> and populate cpu masks.
638 */ 637 */
639 doms = kmalloc(ndoms * cpumask_size(), GFP_KERNEL); 638 doms = alloc_sched_domains(ndoms);
640 if (!doms) 639 if (!doms)
641 goto done; 640 goto done;
642 641
@@ -656,7 +655,7 @@ restart:
656 continue; 655 continue;
657 } 656 }
658 657
659 dp = doms + nslot; 658 dp = doms[nslot];
660 659
661 if (nslot == ndoms) { 660 if (nslot == ndoms) {
662 static int warnings = 10; 661 static int warnings = 10;
@@ -718,7 +717,7 @@ done:
718static void do_rebuild_sched_domains(struct work_struct *unused) 717static void do_rebuild_sched_domains(struct work_struct *unused)
719{ 718{
720 struct sched_domain_attr *attr; 719 struct sched_domain_attr *attr;
721 struct cpumask *doms; 720 cpumask_var_t *doms;
722 int ndoms; 721 int ndoms;
723 722
724 get_online_cpus(); 723 get_online_cpus();
@@ -2052,7 +2051,7 @@ static int cpuset_track_online_cpus(struct notifier_block *unused_nb,
2052 unsigned long phase, void *unused_cpu) 2051 unsigned long phase, void *unused_cpu)
2053{ 2052{
2054 struct sched_domain_attr *attr; 2053 struct sched_domain_attr *attr;
2055 struct cpumask *doms; 2054 cpumask_var_t *doms;
2056 int ndoms; 2055 int ndoms;
2057 2056
2058 switch (phase) { 2057 switch (phase) {
@@ -2537,15 +2536,9 @@ const struct file_operations proc_cpuset_operations = {
2537}; 2536};
2538#endif /* CONFIG_PROC_PID_CPUSET */ 2537#endif /* CONFIG_PROC_PID_CPUSET */
2539 2538
2540/* Display task cpus_allowed, mems_allowed in /proc/<pid>/status file. */ 2539/* Display task mems_allowed in /proc/<pid>/status file. */
2541void cpuset_task_status_allowed(struct seq_file *m, struct task_struct *task) 2540void cpuset_task_status_allowed(struct seq_file *m, struct task_struct *task)
2542{ 2541{
2543 seq_printf(m, "Cpus_allowed:\t");
2544 seq_cpumask(m, &task->cpus_allowed);
2545 seq_printf(m, "\n");
2546 seq_printf(m, "Cpus_allowed_list:\t");
2547 seq_cpumask_list(m, &task->cpus_allowed);
2548 seq_printf(m, "\n");
2549 seq_printf(m, "Mems_allowed:\t"); 2542 seq_printf(m, "Mems_allowed:\t");
2550 seq_nodemask(m, &task->mems_allowed); 2543 seq_nodemask(m, &task->mems_allowed);
2551 seq_printf(m, "\n"); 2544 seq_printf(m, "\n");
diff --git a/kernel/exit.c b/kernel/exit.c
index f7864ac2ecc1..80ae941cfd2e 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -49,6 +49,7 @@
49#include <linux/init_task.h> 49#include <linux/init_task.h>
50#include <linux/perf_event.h> 50#include <linux/perf_event.h>
51#include <trace/events/sched.h> 51#include <trace/events/sched.h>
52#include <linux/hw_breakpoint.h>
52 53
53#include <asm/uaccess.h> 54#include <asm/uaccess.h>
54#include <asm/unistd.h> 55#include <asm/unistd.h>
@@ -110,9 +111,9 @@ static void __exit_signal(struct task_struct *tsk)
110 * We won't ever get here for the group leader, since it 111 * We won't ever get here for the group leader, since it
111 * will have been the last reference on the signal_struct. 112 * will have been the last reference on the signal_struct.
112 */ 113 */
113 sig->utime = cputime_add(sig->utime, task_utime(tsk)); 114 sig->utime = cputime_add(sig->utime, tsk->utime);
114 sig->stime = cputime_add(sig->stime, task_stime(tsk)); 115 sig->stime = cputime_add(sig->stime, tsk->stime);
115 sig->gtime = cputime_add(sig->gtime, task_gtime(tsk)); 116 sig->gtime = cputime_add(sig->gtime, tsk->gtime);
116 sig->min_flt += tsk->min_flt; 117 sig->min_flt += tsk->min_flt;
117 sig->maj_flt += tsk->maj_flt; 118 sig->maj_flt += tsk->maj_flt;
118 sig->nvcsw += tsk->nvcsw; 119 sig->nvcsw += tsk->nvcsw;
@@ -978,6 +979,10 @@ NORET_TYPE void do_exit(long code)
978 proc_exit_connector(tsk); 979 proc_exit_connector(tsk);
979 980
980 /* 981 /*
982 * FIXME: do that only when needed, using sched_exit tracepoint
983 */
984 flush_ptrace_hw_breakpoint(tsk);
985 /*
981 * Flush inherited counters to the parent - before the parent 986 * Flush inherited counters to the parent - before the parent
982 * gets woken up by child-exit notifications. 987 * gets woken up by child-exit notifications.
983 */ 988 */
@@ -1205,6 +1210,7 @@ static int wait_task_zombie(struct wait_opts *wo, struct task_struct *p)
1205 struct signal_struct *psig; 1210 struct signal_struct *psig;
1206 struct signal_struct *sig; 1211 struct signal_struct *sig;
1207 unsigned long maxrss; 1212 unsigned long maxrss;
1213 cputime_t tgutime, tgstime;
1208 1214
1209 /* 1215 /*
1210 * The resource counters for the group leader are in its 1216 * The resource counters for the group leader are in its
@@ -1220,20 +1226,23 @@ static int wait_task_zombie(struct wait_opts *wo, struct task_struct *p)
1220 * need to protect the access to parent->signal fields, 1226 * need to protect the access to parent->signal fields,
1221 * as other threads in the parent group can be right 1227 * as other threads in the parent group can be right
1222 * here reaping other children at the same time. 1228 * here reaping other children at the same time.
1229 *
1230 * We use thread_group_times() to get times for the thread
1231 * group, which consolidates times for all threads in the
1232 * group including the group leader.
1223 */ 1233 */
1234 thread_group_times(p, &tgutime, &tgstime);
1224 spin_lock_irq(&p->real_parent->sighand->siglock); 1235 spin_lock_irq(&p->real_parent->sighand->siglock);
1225 psig = p->real_parent->signal; 1236 psig = p->real_parent->signal;
1226 sig = p->signal; 1237 sig = p->signal;
1227 psig->cutime = 1238 psig->cutime =
1228 cputime_add(psig->cutime, 1239 cputime_add(psig->cutime,
1229 cputime_add(p->utime, 1240 cputime_add(tgutime,
1230 cputime_add(sig->utime, 1241 sig->cutime));
1231 sig->cutime)));
1232 psig->cstime = 1242 psig->cstime =
1233 cputime_add(psig->cstime, 1243 cputime_add(psig->cstime,
1234 cputime_add(p->stime, 1244 cputime_add(tgstime,
1235 cputime_add(sig->stime, 1245 sig->cstime));
1236 sig->cstime)));
1237 psig->cgtime = 1246 psig->cgtime =
1238 cputime_add(psig->cgtime, 1247 cputime_add(psig->cgtime,
1239 cputime_add(p->gtime, 1248 cputime_add(p->gtime,
diff --git a/kernel/fork.c b/kernel/fork.c
index 166b8c49257c..3d6f121bbe8a 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -884,6 +884,9 @@ static int copy_signal(unsigned long clone_flags, struct task_struct *tsk)
884 sig->utime = sig->stime = sig->cutime = sig->cstime = cputime_zero; 884 sig->utime = sig->stime = sig->cutime = sig->cstime = cputime_zero;
885 sig->gtime = cputime_zero; 885 sig->gtime = cputime_zero;
886 sig->cgtime = cputime_zero; 886 sig->cgtime = cputime_zero;
887#ifndef CONFIG_VIRT_CPU_ACCOUNTING
888 sig->prev_utime = sig->prev_stime = cputime_zero;
889#endif
887 sig->nvcsw = sig->nivcsw = sig->cnvcsw = sig->cnivcsw = 0; 890 sig->nvcsw = sig->nivcsw = sig->cnvcsw = sig->cnivcsw = 0;
888 sig->min_flt = sig->maj_flt = sig->cmin_flt = sig->cmaj_flt = 0; 891 sig->min_flt = sig->maj_flt = sig->cmin_flt = sig->cmaj_flt = 0;
889 sig->inblock = sig->oublock = sig->cinblock = sig->coublock = 0; 892 sig->inblock = sig->oublock = sig->cinblock = sig->coublock = 0;
@@ -1066,8 +1069,10 @@ static struct task_struct *copy_process(unsigned long clone_flags,
1066 p->gtime = cputime_zero; 1069 p->gtime = cputime_zero;
1067 p->utimescaled = cputime_zero; 1070 p->utimescaled = cputime_zero;
1068 p->stimescaled = cputime_zero; 1071 p->stimescaled = cputime_zero;
1072#ifndef CONFIG_VIRT_CPU_ACCOUNTING
1069 p->prev_utime = cputime_zero; 1073 p->prev_utime = cputime_zero;
1070 p->prev_stime = cputime_zero; 1074 p->prev_stime = cputime_zero;
1075#endif
1071 1076
1072 p->default_timer_slack_ns = current->timer_slack_ns; 1077 p->default_timer_slack_ns = current->timer_slack_ns;
1073 1078
diff --git a/kernel/hw_breakpoint.c b/kernel/hw_breakpoint.c
new file mode 100644
index 000000000000..cf5ee1628411
--- /dev/null
+++ b/kernel/hw_breakpoint.c
@@ -0,0 +1,423 @@
1/*
2 * This program is free software; you can redistribute it and/or modify
3 * it under the terms of the GNU General Public License as published by
4 * the Free Software Foundation; either version 2 of the License, or
5 * (at your option) any later version.
6 *
7 * This program is distributed in the hope that it will be useful,
8 * but WITHOUT ANY WARRANTY; without even the implied warranty of
9 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10 * GNU General Public License for more details.
11 *
12 * You should have received a copy of the GNU General Public License
13 * along with this program; if not, write to the Free Software
14 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
15 *
16 * Copyright (C) 2007 Alan Stern
17 * Copyright (C) IBM Corporation, 2009
18 * Copyright (C) 2009, Frederic Weisbecker <fweisbec@gmail.com>
19 *
20 * Thanks to Ingo Molnar for his many suggestions.
21 *
22 * Authors: Alan Stern <stern@rowland.harvard.edu>
23 * K.Prasad <prasad@linux.vnet.ibm.com>
24 * Frederic Weisbecker <fweisbec@gmail.com>
25 */
26
27/*
28 * HW_breakpoint: a unified kernel/user-space hardware breakpoint facility,
29 * using the CPU's debug registers.
30 * This file contains the arch-independent routines.
31 */
32
33#include <linux/irqflags.h>
34#include <linux/kallsyms.h>
35#include <linux/notifier.h>
36#include <linux/kprobes.h>
37#include <linux/kdebug.h>
38#include <linux/kernel.h>
39#include <linux/module.h>
40#include <linux/percpu.h>
41#include <linux/sched.h>
42#include <linux/init.h>
43#include <linux/smp.h>
44
45#include <linux/hw_breakpoint.h>
46
47/*
48 * Constraints data
49 */
50
51/* Number of pinned cpu breakpoints in a cpu */
52static DEFINE_PER_CPU(unsigned int, nr_cpu_bp_pinned);
53
54/* Number of pinned task breakpoints in a cpu */
55static DEFINE_PER_CPU(unsigned int, task_bp_pinned[HBP_NUM]);
56
57/* Number of non-pinned cpu/task breakpoints in a cpu */
58static DEFINE_PER_CPU(unsigned int, nr_bp_flexible);
59
60/* Gather the number of total pinned and un-pinned bp in a cpuset */
61struct bp_busy_slots {
62 unsigned int pinned;
63 unsigned int flexible;
64};
65
66/* Serialize accesses to the above constraints */
67static DEFINE_MUTEX(nr_bp_mutex);
68
69/*
70 * Report the maximum number of pinned breakpoints a task
71 * have in this cpu
72 */
73static unsigned int max_task_bp_pinned(int cpu)
74{
75 int i;
76 unsigned int *tsk_pinned = per_cpu(task_bp_pinned, cpu);
77
78 for (i = HBP_NUM -1; i >= 0; i--) {
79 if (tsk_pinned[i] > 0)
80 return i + 1;
81 }
82
83 return 0;
84}
85
86/*
87 * Report the number of pinned/un-pinned breakpoints we have in
88 * a given cpu (cpu > -1) or in all of them (cpu = -1).
89 */
90static void fetch_bp_busy_slots(struct bp_busy_slots *slots, int cpu)
91{
92 if (cpu >= 0) {
93 slots->pinned = per_cpu(nr_cpu_bp_pinned, cpu);
94 slots->pinned += max_task_bp_pinned(cpu);
95 slots->flexible = per_cpu(nr_bp_flexible, cpu);
96
97 return;
98 }
99
100 for_each_online_cpu(cpu) {
101 unsigned int nr;
102
103 nr = per_cpu(nr_cpu_bp_pinned, cpu);
104 nr += max_task_bp_pinned(cpu);
105
106 if (nr > slots->pinned)
107 slots->pinned = nr;
108
109 nr = per_cpu(nr_bp_flexible, cpu);
110
111 if (nr > slots->flexible)
112 slots->flexible = nr;
113 }
114}
115
116/*
117 * Add a pinned breakpoint for the given task in our constraint table
118 */
119static void toggle_bp_task_slot(struct task_struct *tsk, int cpu, bool enable)
120{
121 int count = 0;
122 struct perf_event *bp;
123 struct perf_event_context *ctx = tsk->perf_event_ctxp;
124 unsigned int *tsk_pinned;
125 struct list_head *list;
126 unsigned long flags;
127
128 if (WARN_ONCE(!ctx, "No perf context for this task"))
129 return;
130
131 list = &ctx->event_list;
132
133 spin_lock_irqsave(&ctx->lock, flags);
134
135 /*
136 * The current breakpoint counter is not included in the list
137 * at the open() callback time
138 */
139 list_for_each_entry(bp, list, event_entry) {
140 if (bp->attr.type == PERF_TYPE_BREAKPOINT)
141 count++;
142 }
143
144 spin_unlock_irqrestore(&ctx->lock, flags);
145
146 if (WARN_ONCE(count < 0, "No breakpoint counter found in the counter list"))
147 return;
148
149 tsk_pinned = per_cpu(task_bp_pinned, cpu);
150 if (enable) {
151 tsk_pinned[count]++;
152 if (count > 0)
153 tsk_pinned[count-1]--;
154 } else {
155 tsk_pinned[count]--;
156 if (count > 0)
157 tsk_pinned[count-1]++;
158 }
159}
160
161/*
162 * Add/remove the given breakpoint in our constraint table
163 */
164static void toggle_bp_slot(struct perf_event *bp, bool enable)
165{
166 int cpu = bp->cpu;
167 struct task_struct *tsk = bp->ctx->task;
168
169 /* Pinned counter task profiling */
170 if (tsk) {
171 if (cpu >= 0) {
172 toggle_bp_task_slot(tsk, cpu, enable);
173 return;
174 }
175
176 for_each_online_cpu(cpu)
177 toggle_bp_task_slot(tsk, cpu, enable);
178 return;
179 }
180
181 /* Pinned counter cpu profiling */
182 if (enable)
183 per_cpu(nr_cpu_bp_pinned, bp->cpu)++;
184 else
185 per_cpu(nr_cpu_bp_pinned, bp->cpu)--;
186}
187
188/*
189 * Contraints to check before allowing this new breakpoint counter:
190 *
191 * == Non-pinned counter == (Considered as pinned for now)
192 *
193 * - If attached to a single cpu, check:
194 *
195 * (per_cpu(nr_bp_flexible, cpu) || (per_cpu(nr_cpu_bp_pinned, cpu)
196 * + max(per_cpu(task_bp_pinned, cpu)))) < HBP_NUM
197 *
198 * -> If there are already non-pinned counters in this cpu, it means
199 * there is already a free slot for them.
200 * Otherwise, we check that the maximum number of per task
201 * breakpoints (for this cpu) plus the number of per cpu breakpoint
202 * (for this cpu) doesn't cover every registers.
203 *
204 * - If attached to every cpus, check:
205 *
206 * (per_cpu(nr_bp_flexible, *) || (max(per_cpu(nr_cpu_bp_pinned, *))
207 * + max(per_cpu(task_bp_pinned, *)))) < HBP_NUM
208 *
209 * -> This is roughly the same, except we check the number of per cpu
210 * bp for every cpu and we keep the max one. Same for the per tasks
211 * breakpoints.
212 *
213 *
214 * == Pinned counter ==
215 *
216 * - If attached to a single cpu, check:
217 *
218 * ((per_cpu(nr_bp_flexible, cpu) > 1) + per_cpu(nr_cpu_bp_pinned, cpu)
219 * + max(per_cpu(task_bp_pinned, cpu))) < HBP_NUM
220 *
221 * -> Same checks as before. But now the nr_bp_flexible, if any, must keep
222 * one register at least (or they will never be fed).
223 *
224 * - If attached to every cpus, check:
225 *
226 * ((per_cpu(nr_bp_flexible, *) > 1) + max(per_cpu(nr_cpu_bp_pinned, *))
227 * + max(per_cpu(task_bp_pinned, *))) < HBP_NUM
228 */
229int reserve_bp_slot(struct perf_event *bp)
230{
231 struct bp_busy_slots slots = {0};
232 int ret = 0;
233
234 mutex_lock(&nr_bp_mutex);
235
236 fetch_bp_busy_slots(&slots, bp->cpu);
237
238 /* Flexible counters need to keep at least one slot */
239 if (slots.pinned + (!!slots.flexible) == HBP_NUM) {
240 ret = -ENOSPC;
241 goto end;
242 }
243
244 toggle_bp_slot(bp, true);
245
246end:
247 mutex_unlock(&nr_bp_mutex);
248
249 return ret;
250}
251
252void release_bp_slot(struct perf_event *bp)
253{
254 mutex_lock(&nr_bp_mutex);
255
256 toggle_bp_slot(bp, false);
257
258 mutex_unlock(&nr_bp_mutex);
259}
260
261
262int __register_perf_hw_breakpoint(struct perf_event *bp)
263{
264 int ret;
265
266 ret = reserve_bp_slot(bp);
267 if (ret)
268 return ret;
269
270 /*
271 * Ptrace breakpoints can be temporary perf events only
272 * meant to reserve a slot. In this case, it is created disabled and
273 * we don't want to check the params right now (as we put a null addr)
274 * But perf tools create events as disabled and we want to check
275 * the params for them.
276 * This is a quick hack that will be removed soon, once we remove
277 * the tmp breakpoints from ptrace
278 */
279 if (!bp->attr.disabled || bp->callback == perf_bp_event)
280 ret = arch_validate_hwbkpt_settings(bp, bp->ctx->task);
281
282 return ret;
283}
284
285int register_perf_hw_breakpoint(struct perf_event *bp)
286{
287 bp->callback = perf_bp_event;
288
289 return __register_perf_hw_breakpoint(bp);
290}
291
292/**
293 * register_user_hw_breakpoint - register a hardware breakpoint for user space
294 * @attr: breakpoint attributes
295 * @triggered: callback to trigger when we hit the breakpoint
296 * @tsk: pointer to 'task_struct' of the process to which the address belongs
297 */
298struct perf_event *
299register_user_hw_breakpoint(struct perf_event_attr *attr,
300 perf_callback_t triggered,
301 struct task_struct *tsk)
302{
303 return perf_event_create_kernel_counter(attr, -1, tsk->pid, triggered);
304}
305EXPORT_SYMBOL_GPL(register_user_hw_breakpoint);
306
307/**
308 * modify_user_hw_breakpoint - modify a user-space hardware breakpoint
309 * @bp: the breakpoint structure to modify
310 * @attr: new breakpoint attributes
311 * @triggered: callback to trigger when we hit the breakpoint
312 * @tsk: pointer to 'task_struct' of the process to which the address belongs
313 */
314struct perf_event *
315modify_user_hw_breakpoint(struct perf_event *bp, struct perf_event_attr *attr,
316 perf_callback_t triggered,
317 struct task_struct *tsk)
318{
319 /*
320 * FIXME: do it without unregistering
321 * - We don't want to lose our slot
322 * - If the new bp is incorrect, don't lose the older one
323 */
324 unregister_hw_breakpoint(bp);
325
326 return perf_event_create_kernel_counter(attr, -1, tsk->pid, triggered);
327}
328EXPORT_SYMBOL_GPL(modify_user_hw_breakpoint);
329
330/**
331 * unregister_hw_breakpoint - unregister a user-space hardware breakpoint
332 * @bp: the breakpoint structure to unregister
333 */
334void unregister_hw_breakpoint(struct perf_event *bp)
335{
336 if (!bp)
337 return;
338 perf_event_release_kernel(bp);
339}
340EXPORT_SYMBOL_GPL(unregister_hw_breakpoint);
341
342/**
343 * register_wide_hw_breakpoint - register a wide breakpoint in the kernel
344 * @attr: breakpoint attributes
345 * @triggered: callback to trigger when we hit the breakpoint
346 *
347 * @return a set of per_cpu pointers to perf events
348 */
349struct perf_event **
350register_wide_hw_breakpoint(struct perf_event_attr *attr,
351 perf_callback_t triggered)
352{
353 struct perf_event **cpu_events, **pevent, *bp;
354 long err;
355 int cpu;
356
357 cpu_events = alloc_percpu(typeof(*cpu_events));
358 if (!cpu_events)
359 return ERR_PTR(-ENOMEM);
360
361 for_each_possible_cpu(cpu) {
362 pevent = per_cpu_ptr(cpu_events, cpu);
363 bp = perf_event_create_kernel_counter(attr, cpu, -1, triggered);
364
365 *pevent = bp;
366
367 if (IS_ERR(bp)) {
368 err = PTR_ERR(bp);
369 goto fail;
370 }
371 }
372
373 return cpu_events;
374
375fail:
376 for_each_possible_cpu(cpu) {
377 pevent = per_cpu_ptr(cpu_events, cpu);
378 if (IS_ERR(*pevent))
379 break;
380 unregister_hw_breakpoint(*pevent);
381 }
382 free_percpu(cpu_events);
383 /* return the error if any */
384 return ERR_PTR(err);
385}
386EXPORT_SYMBOL_GPL(register_wide_hw_breakpoint);
387
388/**
389 * unregister_wide_hw_breakpoint - unregister a wide breakpoint in the kernel
390 * @cpu_events: the per cpu set of events to unregister
391 */
392void unregister_wide_hw_breakpoint(struct perf_event **cpu_events)
393{
394 int cpu;
395 struct perf_event **pevent;
396
397 for_each_possible_cpu(cpu) {
398 pevent = per_cpu_ptr(cpu_events, cpu);
399 unregister_hw_breakpoint(*pevent);
400 }
401 free_percpu(cpu_events);
402}
403EXPORT_SYMBOL_GPL(unregister_wide_hw_breakpoint);
404
405static struct notifier_block hw_breakpoint_exceptions_nb = {
406 .notifier_call = hw_breakpoint_exceptions_notify,
407 /* we need to be notified first */
408 .priority = 0x7fffffff
409};
410
411static int __init init_hw_breakpoint(void)
412{
413 return register_die_notifier(&hw_breakpoint_exceptions_nb);
414}
415core_initcall(init_hw_breakpoint);
416
417
418struct pmu perf_ops_bp = {
419 .enable = arch_install_hw_breakpoint,
420 .disable = arch_uninstall_hw_breakpoint,
421 .read = hw_breakpoint_pmu_read,
422 .unthrottle = hw_breakpoint_pmu_unthrottle
423};
diff --git a/kernel/kallsyms.c b/kernel/kallsyms.c
index 8b6b8b697c68..8e5288a8a355 100644
--- a/kernel/kallsyms.c
+++ b/kernel/kallsyms.c
@@ -181,6 +181,7 @@ unsigned long kallsyms_lookup_name(const char *name)
181 } 181 }
182 return module_kallsyms_lookup_name(name); 182 return module_kallsyms_lookup_name(name);
183} 183}
184EXPORT_SYMBOL_GPL(kallsyms_lookup_name);
184 185
185int kallsyms_on_each_symbol(int (*fn)(void *, const char *, struct module *, 186int kallsyms_on_each_symbol(int (*fn)(void *, const char *, struct module *,
186 unsigned long), 187 unsigned long),
diff --git a/kernel/kgdb.c b/kernel/kgdb.c
index 9147a3190c9d..7d7014634022 100644
--- a/kernel/kgdb.c
+++ b/kernel/kgdb.c
@@ -870,7 +870,7 @@ static void gdb_cmd_getregs(struct kgdb_state *ks)
870 870
871 /* 871 /*
872 * All threads that don't have debuggerinfo should be 872 * All threads that don't have debuggerinfo should be
873 * in __schedule() sleeping, since all other CPUs 873 * in schedule() sleeping, since all other CPUs
874 * are in kgdb_wait, and thus have debuggerinfo. 874 * are in kgdb_wait, and thus have debuggerinfo.
875 */ 875 */
876 if (local_debuggerinfo) { 876 if (local_debuggerinfo) {
diff --git a/kernel/kprobes.c b/kernel/kprobes.c
index 1494e85b35f2..e5342a344c43 100644
--- a/kernel/kprobes.c
+++ b/kernel/kprobes.c
@@ -90,6 +90,9 @@ static spinlock_t *kretprobe_table_lock_ptr(unsigned long hash)
90 */ 90 */
91static struct kprobe_blackpoint kprobe_blacklist[] = { 91static struct kprobe_blackpoint kprobe_blacklist[] = {
92 {"preempt_schedule",}, 92 {"preempt_schedule",},
93 {"native_get_debugreg",},
94 {"irq_entries_start",},
95 {"common_interrupt",},
93 {NULL} /* Terminator */ 96 {NULL} /* Terminator */
94}; 97};
95 98
@@ -673,6 +676,40 @@ static kprobe_opcode_t __kprobes *kprobe_addr(struct kprobe *p)
673 return (kprobe_opcode_t *)(((char *)addr) + p->offset); 676 return (kprobe_opcode_t *)(((char *)addr) + p->offset);
674} 677}
675 678
679/* Check passed kprobe is valid and return kprobe in kprobe_table. */
680static struct kprobe * __kprobes __get_valid_kprobe(struct kprobe *p)
681{
682 struct kprobe *old_p, *list_p;
683
684 old_p = get_kprobe(p->addr);
685 if (unlikely(!old_p))
686 return NULL;
687
688 if (p != old_p) {
689 list_for_each_entry_rcu(list_p, &old_p->list, list)
690 if (list_p == p)
691 /* kprobe p is a valid probe */
692 goto valid;
693 return NULL;
694 }
695valid:
696 return old_p;
697}
698
699/* Return error if the kprobe is being re-registered */
700static inline int check_kprobe_rereg(struct kprobe *p)
701{
702 int ret = 0;
703 struct kprobe *old_p;
704
705 mutex_lock(&kprobe_mutex);
706 old_p = __get_valid_kprobe(p);
707 if (old_p)
708 ret = -EINVAL;
709 mutex_unlock(&kprobe_mutex);
710 return ret;
711}
712
676int __kprobes register_kprobe(struct kprobe *p) 713int __kprobes register_kprobe(struct kprobe *p)
677{ 714{
678 int ret = 0; 715 int ret = 0;
@@ -685,6 +722,10 @@ int __kprobes register_kprobe(struct kprobe *p)
685 return -EINVAL; 722 return -EINVAL;
686 p->addr = addr; 723 p->addr = addr;
687 724
725 ret = check_kprobe_rereg(p);
726 if (ret)
727 return ret;
728
688 preempt_disable(); 729 preempt_disable();
689 if (!kernel_text_address((unsigned long) p->addr) || 730 if (!kernel_text_address((unsigned long) p->addr) ||
690 in_kprobes_functions((unsigned long) p->addr)) { 731 in_kprobes_functions((unsigned long) p->addr)) {
@@ -754,26 +795,6 @@ out:
754} 795}
755EXPORT_SYMBOL_GPL(register_kprobe); 796EXPORT_SYMBOL_GPL(register_kprobe);
756 797
757/* Check passed kprobe is valid and return kprobe in kprobe_table. */
758static struct kprobe * __kprobes __get_valid_kprobe(struct kprobe *p)
759{
760 struct kprobe *old_p, *list_p;
761
762 old_p = get_kprobe(p->addr);
763 if (unlikely(!old_p))
764 return NULL;
765
766 if (p != old_p) {
767 list_for_each_entry_rcu(list_p, &old_p->list, list)
768 if (list_p == p)
769 /* kprobe p is a valid probe */
770 goto valid;
771 return NULL;
772 }
773valid:
774 return old_p;
775}
776
777/* 798/*
778 * Unregister a kprobe without a scheduler synchronization. 799 * Unregister a kprobe without a scheduler synchronization.
779 */ 800 */
@@ -1141,6 +1162,13 @@ static void __kprobes kill_kprobe(struct kprobe *p)
1141 arch_remove_kprobe(p); 1162 arch_remove_kprobe(p);
1142} 1163}
1143 1164
1165void __kprobes dump_kprobe(struct kprobe *kp)
1166{
1167 printk(KERN_WARNING "Dumping kprobe:\n");
1168 printk(KERN_WARNING "Name: %s\nAddress: %p\nOffset: %x\n",
1169 kp->symbol_name, kp->addr, kp->offset);
1170}
1171
1144/* Module notifier call back, checking kprobes on the module */ 1172/* Module notifier call back, checking kprobes on the module */
1145static int __kprobes kprobes_module_callback(struct notifier_block *nb, 1173static int __kprobes kprobes_module_callback(struct notifier_block *nb,
1146 unsigned long val, void *data) 1174 unsigned long val, void *data)
diff --git a/kernel/lockdep.c b/kernel/lockdep.c
index 9af56723c096..f5dcd36d3151 100644
--- a/kernel/lockdep.c
+++ b/kernel/lockdep.c
@@ -49,7 +49,7 @@
49#include "lockdep_internals.h" 49#include "lockdep_internals.h"
50 50
51#define CREATE_TRACE_POINTS 51#define CREATE_TRACE_POINTS
52#include <trace/events/lockdep.h> 52#include <trace/events/lock.h>
53 53
54#ifdef CONFIG_PROVE_LOCKING 54#ifdef CONFIG_PROVE_LOCKING
55int prove_locking = 1; 55int prove_locking = 1;
diff --git a/kernel/notifier.c b/kernel/notifier.c
index 61d5aa5eced3..acd24e7643eb 100644
--- a/kernel/notifier.c
+++ b/kernel/notifier.c
@@ -558,7 +558,7 @@ EXPORT_SYMBOL(unregister_reboot_notifier);
558 558
559static ATOMIC_NOTIFIER_HEAD(die_chain); 559static ATOMIC_NOTIFIER_HEAD(die_chain);
560 560
561int notrace notify_die(enum die_val val, const char *str, 561int notrace __kprobes notify_die(enum die_val val, const char *str,
562 struct pt_regs *regs, long err, int trap, int sig) 562 struct pt_regs *regs, long err, int trap, int sig)
563{ 563{
564 struct die_args args = { 564 struct die_args args = {
diff --git a/kernel/perf_event.c b/kernel/perf_event.c
index 7f29643c8985..6b7ddba1dd64 100644
--- a/kernel/perf_event.c
+++ b/kernel/perf_event.c
@@ -28,6 +28,8 @@
28#include <linux/anon_inodes.h> 28#include <linux/anon_inodes.h>
29#include <linux/kernel_stat.h> 29#include <linux/kernel_stat.h>
30#include <linux/perf_event.h> 30#include <linux/perf_event.h>
31#include <linux/ftrace_event.h>
32#include <linux/hw_breakpoint.h>
31 33
32#include <asm/irq_regs.h> 34#include <asm/irq_regs.h>
33 35
@@ -244,6 +246,49 @@ static void perf_unpin_context(struct perf_event_context *ctx)
244 put_ctx(ctx); 246 put_ctx(ctx);
245} 247}
246 248
249static inline u64 perf_clock(void)
250{
251 return cpu_clock(smp_processor_id());
252}
253
254/*
255 * Update the record of the current time in a context.
256 */
257static void update_context_time(struct perf_event_context *ctx)
258{
259 u64 now = perf_clock();
260
261 ctx->time += now - ctx->timestamp;
262 ctx->timestamp = now;
263}
264
265/*
266 * Update the total_time_enabled and total_time_running fields for a event.
267 */
268static void update_event_times(struct perf_event *event)
269{
270 struct perf_event_context *ctx = event->ctx;
271 u64 run_end;
272
273 if (event->state < PERF_EVENT_STATE_INACTIVE ||
274 event->group_leader->state < PERF_EVENT_STATE_INACTIVE)
275 return;
276
277 if (ctx->is_active)
278 run_end = ctx->time;
279 else
280 run_end = event->tstamp_stopped;
281
282 event->total_time_enabled = run_end - event->tstamp_enabled;
283
284 if (event->state == PERF_EVENT_STATE_INACTIVE)
285 run_end = event->tstamp_stopped;
286 else
287 run_end = ctx->time;
288
289 event->total_time_running = run_end - event->tstamp_running;
290}
291
247/* 292/*
248 * Add a event from the lists for its context. 293 * Add a event from the lists for its context.
249 * Must be called with ctx->mutex and ctx->lock held. 294 * Must be called with ctx->mutex and ctx->lock held.
@@ -292,6 +337,18 @@ list_del_event(struct perf_event *event, struct perf_event_context *ctx)
292 if (event->group_leader != event) 337 if (event->group_leader != event)
293 event->group_leader->nr_siblings--; 338 event->group_leader->nr_siblings--;
294 339
340 update_event_times(event);
341
342 /*
343 * If event was in error state, then keep it
344 * that way, otherwise bogus counts will be
345 * returned on read(). The only way to get out
346 * of error state is by explicit re-enabling
347 * of the event
348 */
349 if (event->state > PERF_EVENT_STATE_OFF)
350 event->state = PERF_EVENT_STATE_OFF;
351
295 /* 352 /*
296 * If this was a group event with sibling events then 353 * If this was a group event with sibling events then
297 * upgrade the siblings to singleton events by adding them 354 * upgrade the siblings to singleton events by adding them
@@ -445,50 +502,11 @@ retry:
445 * can remove the event safely, if the call above did not 502 * can remove the event safely, if the call above did not
446 * succeed. 503 * succeed.
447 */ 504 */
448 if (!list_empty(&event->group_entry)) { 505 if (!list_empty(&event->group_entry))
449 list_del_event(event, ctx); 506 list_del_event(event, ctx);
450 }
451 spin_unlock_irq(&ctx->lock); 507 spin_unlock_irq(&ctx->lock);
452} 508}
453 509
454static inline u64 perf_clock(void)
455{
456 return cpu_clock(smp_processor_id());
457}
458
459/*
460 * Update the record of the current time in a context.
461 */
462static void update_context_time(struct perf_event_context *ctx)
463{
464 u64 now = perf_clock();
465
466 ctx->time += now - ctx->timestamp;
467 ctx->timestamp = now;
468}
469
470/*
471 * Update the total_time_enabled and total_time_running fields for a event.
472 */
473static void update_event_times(struct perf_event *event)
474{
475 struct perf_event_context *ctx = event->ctx;
476 u64 run_end;
477
478 if (event->state < PERF_EVENT_STATE_INACTIVE ||
479 event->group_leader->state < PERF_EVENT_STATE_INACTIVE)
480 return;
481
482 event->total_time_enabled = ctx->time - event->tstamp_enabled;
483
484 if (event->state == PERF_EVENT_STATE_INACTIVE)
485 run_end = event->tstamp_stopped;
486 else
487 run_end = ctx->time;
488
489 event->total_time_running = run_end - event->tstamp_running;
490}
491
492/* 510/*
493 * Update total_time_enabled and total_time_running for all events in a group. 511 * Update total_time_enabled and total_time_running for all events in a group.
494 */ 512 */
@@ -1031,10 +1049,10 @@ void __perf_event_sched_out(struct perf_event_context *ctx,
1031 update_context_time(ctx); 1049 update_context_time(ctx);
1032 1050
1033 perf_disable(); 1051 perf_disable();
1034 if (ctx->nr_active) 1052 if (ctx->nr_active) {
1035 list_for_each_entry(event, &ctx->group_list, group_entry) 1053 list_for_each_entry(event, &ctx->group_list, group_entry)
1036 group_sched_out(event, cpuctx, ctx); 1054 group_sched_out(event, cpuctx, ctx);
1037 1055 }
1038 perf_enable(); 1056 perf_enable();
1039 out: 1057 out:
1040 spin_unlock(&ctx->lock); 1058 spin_unlock(&ctx->lock);
@@ -1059,8 +1077,6 @@ static int context_equiv(struct perf_event_context *ctx1,
1059 && !ctx1->pin_count && !ctx2->pin_count; 1077 && !ctx1->pin_count && !ctx2->pin_count;
1060} 1078}
1061 1079
1062static void __perf_event_read(void *event);
1063
1064static void __perf_event_sync_stat(struct perf_event *event, 1080static void __perf_event_sync_stat(struct perf_event *event,
1065 struct perf_event *next_event) 1081 struct perf_event *next_event)
1066{ 1082{
@@ -1078,8 +1094,8 @@ static void __perf_event_sync_stat(struct perf_event *event,
1078 */ 1094 */
1079 switch (event->state) { 1095 switch (event->state) {
1080 case PERF_EVENT_STATE_ACTIVE: 1096 case PERF_EVENT_STATE_ACTIVE:
1081 __perf_event_read(event); 1097 event->pmu->read(event);
1082 break; 1098 /* fall-through */
1083 1099
1084 case PERF_EVENT_STATE_INACTIVE: 1100 case PERF_EVENT_STATE_INACTIVE:
1085 update_event_times(event); 1101 update_event_times(event);
@@ -1118,6 +1134,8 @@ static void perf_event_sync_stat(struct perf_event_context *ctx,
1118 if (!ctx->nr_stat) 1134 if (!ctx->nr_stat)
1119 return; 1135 return;
1120 1136
1137 update_context_time(ctx);
1138
1121 event = list_first_entry(&ctx->event_list, 1139 event = list_first_entry(&ctx->event_list,
1122 struct perf_event, event_entry); 1140 struct perf_event, event_entry);
1123 1141
@@ -1161,8 +1179,6 @@ void perf_event_task_sched_out(struct task_struct *task,
1161 if (likely(!ctx || !cpuctx->task_ctx)) 1179 if (likely(!ctx || !cpuctx->task_ctx))
1162 return; 1180 return;
1163 1181
1164 update_context_time(ctx);
1165
1166 rcu_read_lock(); 1182 rcu_read_lock();
1167 parent = rcu_dereference(ctx->parent_ctx); 1183 parent = rcu_dereference(ctx->parent_ctx);
1168 next_ctx = next->perf_event_ctxp; 1184 next_ctx = next->perf_event_ctxp;
@@ -1515,7 +1531,6 @@ static void __perf_event_read(void *info)
1515 struct perf_cpu_context *cpuctx = &__get_cpu_var(perf_cpu_context); 1531 struct perf_cpu_context *cpuctx = &__get_cpu_var(perf_cpu_context);
1516 struct perf_event *event = info; 1532 struct perf_event *event = info;
1517 struct perf_event_context *ctx = event->ctx; 1533 struct perf_event_context *ctx = event->ctx;
1518 unsigned long flags;
1519 1534
1520 /* 1535 /*
1521 * If this is a task context, we need to check whether it is 1536 * If this is a task context, we need to check whether it is
@@ -1527,12 +1542,12 @@ static void __perf_event_read(void *info)
1527 if (ctx->task && cpuctx->task_ctx != ctx) 1542 if (ctx->task && cpuctx->task_ctx != ctx)
1528 return; 1543 return;
1529 1544
1530 local_irq_save(flags); 1545 spin_lock(&ctx->lock);
1531 if (ctx->is_active) 1546 update_context_time(ctx);
1532 update_context_time(ctx);
1533 event->pmu->read(event);
1534 update_event_times(event); 1547 update_event_times(event);
1535 local_irq_restore(flags); 1548 spin_unlock(&ctx->lock);
1549
1550 event->pmu->read(event);
1536} 1551}
1537 1552
1538static u64 perf_event_read(struct perf_event *event) 1553static u64 perf_event_read(struct perf_event *event)
@@ -1545,7 +1560,13 @@ static u64 perf_event_read(struct perf_event *event)
1545 smp_call_function_single(event->oncpu, 1560 smp_call_function_single(event->oncpu,
1546 __perf_event_read, event, 1); 1561 __perf_event_read, event, 1);
1547 } else if (event->state == PERF_EVENT_STATE_INACTIVE) { 1562 } else if (event->state == PERF_EVENT_STATE_INACTIVE) {
1563 struct perf_event_context *ctx = event->ctx;
1564 unsigned long flags;
1565
1566 spin_lock_irqsave(&ctx->lock, flags);
1567 update_context_time(ctx);
1548 update_event_times(event); 1568 update_event_times(event);
1569 spin_unlock_irqrestore(&ctx->lock, flags);
1549 } 1570 }
1550 1571
1551 return atomic64_read(&event->count); 1572 return atomic64_read(&event->count);
@@ -1658,6 +1679,8 @@ static struct perf_event_context *find_get_context(pid_t pid, int cpu)
1658 return ERR_PTR(err); 1679 return ERR_PTR(err);
1659} 1680}
1660 1681
1682static void perf_event_free_filter(struct perf_event *event);
1683
1661static void free_event_rcu(struct rcu_head *head) 1684static void free_event_rcu(struct rcu_head *head)
1662{ 1685{
1663 struct perf_event *event; 1686 struct perf_event *event;
@@ -1665,6 +1688,7 @@ static void free_event_rcu(struct rcu_head *head)
1665 event = container_of(head, struct perf_event, rcu_head); 1688 event = container_of(head, struct perf_event, rcu_head);
1666 if (event->ns) 1689 if (event->ns)
1667 put_pid_ns(event->ns); 1690 put_pid_ns(event->ns);
1691 perf_event_free_filter(event);
1668 kfree(event); 1692 kfree(event);
1669} 1693}
1670 1694
@@ -1696,16 +1720,10 @@ static void free_event(struct perf_event *event)
1696 call_rcu(&event->rcu_head, free_event_rcu); 1720 call_rcu(&event->rcu_head, free_event_rcu);
1697} 1721}
1698 1722
1699/* 1723int perf_event_release_kernel(struct perf_event *event)
1700 * Called when the last reference to the file is gone.
1701 */
1702static int perf_release(struct inode *inode, struct file *file)
1703{ 1724{
1704 struct perf_event *event = file->private_data;
1705 struct perf_event_context *ctx = event->ctx; 1725 struct perf_event_context *ctx = event->ctx;
1706 1726
1707 file->private_data = NULL;
1708
1709 WARN_ON_ONCE(ctx->parent_ctx); 1727 WARN_ON_ONCE(ctx->parent_ctx);
1710 mutex_lock(&ctx->mutex); 1728 mutex_lock(&ctx->mutex);
1711 perf_event_remove_from_context(event); 1729 perf_event_remove_from_context(event);
@@ -1720,6 +1738,19 @@ static int perf_release(struct inode *inode, struct file *file)
1720 1738
1721 return 0; 1739 return 0;
1722} 1740}
1741EXPORT_SYMBOL_GPL(perf_event_release_kernel);
1742
1743/*
1744 * Called when the last reference to the file is gone.
1745 */
1746static int perf_release(struct inode *inode, struct file *file)
1747{
1748 struct perf_event *event = file->private_data;
1749
1750 file->private_data = NULL;
1751
1752 return perf_event_release_kernel(event);
1753}
1723 1754
1724static int perf_event_read_size(struct perf_event *event) 1755static int perf_event_read_size(struct perf_event *event)
1725{ 1756{
@@ -1746,91 +1777,94 @@ static int perf_event_read_size(struct perf_event *event)
1746 return size; 1777 return size;
1747} 1778}
1748 1779
1749static u64 perf_event_read_value(struct perf_event *event) 1780u64 perf_event_read_value(struct perf_event *event, u64 *enabled, u64 *running)
1750{ 1781{
1751 struct perf_event *child; 1782 struct perf_event *child;
1752 u64 total = 0; 1783 u64 total = 0;
1753 1784
1785 *enabled = 0;
1786 *running = 0;
1787
1788 mutex_lock(&event->child_mutex);
1754 total += perf_event_read(event); 1789 total += perf_event_read(event);
1755 list_for_each_entry(child, &event->child_list, child_list) 1790 *enabled += event->total_time_enabled +
1791 atomic64_read(&event->child_total_time_enabled);
1792 *running += event->total_time_running +
1793 atomic64_read(&event->child_total_time_running);
1794
1795 list_for_each_entry(child, &event->child_list, child_list) {
1756 total += perf_event_read(child); 1796 total += perf_event_read(child);
1797 *enabled += child->total_time_enabled;
1798 *running += child->total_time_running;
1799 }
1800 mutex_unlock(&event->child_mutex);
1757 1801
1758 return total; 1802 return total;
1759} 1803}
1760 1804EXPORT_SYMBOL_GPL(perf_event_read_value);
1761static int perf_event_read_entry(struct perf_event *event,
1762 u64 read_format, char __user *buf)
1763{
1764 int n = 0, count = 0;
1765 u64 values[2];
1766
1767 values[n++] = perf_event_read_value(event);
1768 if (read_format & PERF_FORMAT_ID)
1769 values[n++] = primary_event_id(event);
1770
1771 count = n * sizeof(u64);
1772
1773 if (copy_to_user(buf, values, count))
1774 return -EFAULT;
1775
1776 return count;
1777}
1778 1805
1779static int perf_event_read_group(struct perf_event *event, 1806static int perf_event_read_group(struct perf_event *event,
1780 u64 read_format, char __user *buf) 1807 u64 read_format, char __user *buf)
1781{ 1808{
1782 struct perf_event *leader = event->group_leader, *sub; 1809 struct perf_event *leader = event->group_leader, *sub;
1783 int n = 0, size = 0, err = -EFAULT; 1810 int n = 0, size = 0, ret = -EFAULT;
1784 u64 values[3]; 1811 struct perf_event_context *ctx = leader->ctx;
1812 u64 values[5];
1813 u64 count, enabled, running;
1814
1815 mutex_lock(&ctx->mutex);
1816 count = perf_event_read_value(leader, &enabled, &running);
1785 1817
1786 values[n++] = 1 + leader->nr_siblings; 1818 values[n++] = 1 + leader->nr_siblings;
1787 if (read_format & PERF_FORMAT_TOTAL_TIME_ENABLED) { 1819 if (read_format & PERF_FORMAT_TOTAL_TIME_ENABLED)
1788 values[n++] = leader->total_time_enabled + 1820 values[n++] = enabled;
1789 atomic64_read(&leader->child_total_time_enabled); 1821 if (read_format & PERF_FORMAT_TOTAL_TIME_RUNNING)
1790 } 1822 values[n++] = running;
1791 if (read_format & PERF_FORMAT_TOTAL_TIME_RUNNING) { 1823 values[n++] = count;
1792 values[n++] = leader->total_time_running + 1824 if (read_format & PERF_FORMAT_ID)
1793 atomic64_read(&leader->child_total_time_running); 1825 values[n++] = primary_event_id(leader);
1794 }
1795 1826
1796 size = n * sizeof(u64); 1827 size = n * sizeof(u64);
1797 1828
1798 if (copy_to_user(buf, values, size)) 1829 if (copy_to_user(buf, values, size))
1799 return -EFAULT; 1830 goto unlock;
1800
1801 err = perf_event_read_entry(leader, read_format, buf + size);
1802 if (err < 0)
1803 return err;
1804 1831
1805 size += err; 1832 ret = size;
1806 1833
1807 list_for_each_entry(sub, &leader->sibling_list, group_entry) { 1834 list_for_each_entry(sub, &leader->sibling_list, group_entry) {
1808 err = perf_event_read_entry(sub, read_format, 1835 n = 0;
1809 buf + size); 1836
1810 if (err < 0) 1837 values[n++] = perf_event_read_value(sub, &enabled, &running);
1811 return err; 1838 if (read_format & PERF_FORMAT_ID)
1839 values[n++] = primary_event_id(sub);
1840
1841 size = n * sizeof(u64);
1812 1842
1813 size += err; 1843 if (copy_to_user(buf + ret, values, size)) {
1844 ret = -EFAULT;
1845 goto unlock;
1846 }
1847
1848 ret += size;
1814 } 1849 }
1850unlock:
1851 mutex_unlock(&ctx->mutex);
1815 1852
1816 return size; 1853 return ret;
1817} 1854}
1818 1855
1819static int perf_event_read_one(struct perf_event *event, 1856static int perf_event_read_one(struct perf_event *event,
1820 u64 read_format, char __user *buf) 1857 u64 read_format, char __user *buf)
1821{ 1858{
1859 u64 enabled, running;
1822 u64 values[4]; 1860 u64 values[4];
1823 int n = 0; 1861 int n = 0;
1824 1862
1825 values[n++] = perf_event_read_value(event); 1863 values[n++] = perf_event_read_value(event, &enabled, &running);
1826 if (read_format & PERF_FORMAT_TOTAL_TIME_ENABLED) { 1864 if (read_format & PERF_FORMAT_TOTAL_TIME_ENABLED)
1827 values[n++] = event->total_time_enabled + 1865 values[n++] = enabled;
1828 atomic64_read(&event->child_total_time_enabled); 1866 if (read_format & PERF_FORMAT_TOTAL_TIME_RUNNING)
1829 } 1867 values[n++] = running;
1830 if (read_format & PERF_FORMAT_TOTAL_TIME_RUNNING) {
1831 values[n++] = event->total_time_running +
1832 atomic64_read(&event->child_total_time_running);
1833 }
1834 if (read_format & PERF_FORMAT_ID) 1868 if (read_format & PERF_FORMAT_ID)
1835 values[n++] = primary_event_id(event); 1869 values[n++] = primary_event_id(event);
1836 1870
@@ -1861,12 +1895,10 @@ perf_read_hw(struct perf_event *event, char __user *buf, size_t count)
1861 return -ENOSPC; 1895 return -ENOSPC;
1862 1896
1863 WARN_ON_ONCE(event->ctx->parent_ctx); 1897 WARN_ON_ONCE(event->ctx->parent_ctx);
1864 mutex_lock(&event->child_mutex);
1865 if (read_format & PERF_FORMAT_GROUP) 1898 if (read_format & PERF_FORMAT_GROUP)
1866 ret = perf_event_read_group(event, read_format, buf); 1899 ret = perf_event_read_group(event, read_format, buf);
1867 else 1900 else
1868 ret = perf_event_read_one(event, read_format, buf); 1901 ret = perf_event_read_one(event, read_format, buf);
1869 mutex_unlock(&event->child_mutex);
1870 1902
1871 return ret; 1903 return ret;
1872} 1904}
@@ -1974,7 +2006,8 @@ unlock:
1974 return ret; 2006 return ret;
1975} 2007}
1976 2008
1977int perf_event_set_output(struct perf_event *event, int output_fd); 2009static int perf_event_set_output(struct perf_event *event, int output_fd);
2010static int perf_event_set_filter(struct perf_event *event, void __user *arg);
1978 2011
1979static long perf_ioctl(struct file *file, unsigned int cmd, unsigned long arg) 2012static long perf_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
1980{ 2013{
@@ -2002,6 +2035,9 @@ static long perf_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
2002 case PERF_EVENT_IOC_SET_OUTPUT: 2035 case PERF_EVENT_IOC_SET_OUTPUT:
2003 return perf_event_set_output(event, arg); 2036 return perf_event_set_output(event, arg);
2004 2037
2038 case PERF_EVENT_IOC_SET_FILTER:
2039 return perf_event_set_filter(event, (void __user *)arg);
2040
2005 default: 2041 default:
2006 return -ENOTTY; 2042 return -ENOTTY;
2007 } 2043 }
@@ -2174,6 +2210,7 @@ static void perf_mmap_data_free(struct perf_mmap_data *data)
2174 perf_mmap_free_page((unsigned long)data->user_page); 2210 perf_mmap_free_page((unsigned long)data->user_page);
2175 for (i = 0; i < data->nr_pages; i++) 2211 for (i = 0; i < data->nr_pages; i++)
2176 perf_mmap_free_page((unsigned long)data->data_pages[i]); 2212 perf_mmap_free_page((unsigned long)data->data_pages[i]);
2213 kfree(data);
2177} 2214}
2178 2215
2179#else 2216#else
@@ -2214,6 +2251,7 @@ static void perf_mmap_data_free_work(struct work_struct *work)
2214 perf_mmap_unmark_page(base + (i * PAGE_SIZE)); 2251 perf_mmap_unmark_page(base + (i * PAGE_SIZE));
2215 2252
2216 vfree(base); 2253 vfree(base);
2254 kfree(data);
2217} 2255}
2218 2256
2219static void perf_mmap_data_free(struct perf_mmap_data *data) 2257static void perf_mmap_data_free(struct perf_mmap_data *data)
@@ -2307,7 +2345,7 @@ perf_mmap_data_init(struct perf_event *event, struct perf_mmap_data *data)
2307 } 2345 }
2308 2346
2309 if (!data->watermark) 2347 if (!data->watermark)
2310 data->watermark = max_t(long, PAGE_SIZE, max_size / 2); 2348 data->watermark = max_size / 2;
2311 2349
2312 2350
2313 rcu_assign_pointer(event->data, data); 2351 rcu_assign_pointer(event->data, data);
@@ -2319,7 +2357,6 @@ static void perf_mmap_data_free_rcu(struct rcu_head *rcu_head)
2319 2357
2320 data = container_of(rcu_head, struct perf_mmap_data, rcu_head); 2358 data = container_of(rcu_head, struct perf_mmap_data, rcu_head);
2321 perf_mmap_data_free(data); 2359 perf_mmap_data_free(data);
2322 kfree(data);
2323} 2360}
2324 2361
2325static void perf_mmap_data_release(struct perf_event *event) 2362static void perf_mmap_data_release(struct perf_event *event)
@@ -2666,20 +2703,21 @@ static void perf_output_wakeup(struct perf_output_handle *handle)
2666static void perf_output_lock(struct perf_output_handle *handle) 2703static void perf_output_lock(struct perf_output_handle *handle)
2667{ 2704{
2668 struct perf_mmap_data *data = handle->data; 2705 struct perf_mmap_data *data = handle->data;
2669 int cpu; 2706 int cur, cpu = get_cpu();
2670 2707
2671 handle->locked = 0; 2708 handle->locked = 0;
2672 2709
2673 local_irq_save(handle->flags); 2710 for (;;) {
2674 cpu = smp_processor_id(); 2711 cur = atomic_cmpxchg(&data->lock, -1, cpu);
2675 2712 if (cur == -1) {
2676 if (in_nmi() && atomic_read(&data->lock) == cpu) 2713 handle->locked = 1;
2677 return; 2714 break;
2715 }
2716 if (cur == cpu)
2717 break;
2678 2718
2679 while (atomic_cmpxchg(&data->lock, -1, cpu) != -1)
2680 cpu_relax(); 2719 cpu_relax();
2681 2720 }
2682 handle->locked = 1;
2683} 2721}
2684 2722
2685static void perf_output_unlock(struct perf_output_handle *handle) 2723static void perf_output_unlock(struct perf_output_handle *handle)
@@ -2725,7 +2763,7 @@ again:
2725 if (atomic_xchg(&data->wakeup, 0)) 2763 if (atomic_xchg(&data->wakeup, 0))
2726 perf_output_wakeup(handle); 2764 perf_output_wakeup(handle);
2727out: 2765out:
2728 local_irq_restore(handle->flags); 2766 put_cpu();
2729} 2767}
2730 2768
2731void perf_output_copy(struct perf_output_handle *handle, 2769void perf_output_copy(struct perf_output_handle *handle,
@@ -3236,15 +3274,10 @@ static void perf_event_task_ctx(struct perf_event_context *ctx,
3236{ 3274{
3237 struct perf_event *event; 3275 struct perf_event *event;
3238 3276
3239 if (system_state != SYSTEM_RUNNING || list_empty(&ctx->event_list))
3240 return;
3241
3242 rcu_read_lock();
3243 list_for_each_entry_rcu(event, &ctx->event_list, event_entry) { 3277 list_for_each_entry_rcu(event, &ctx->event_list, event_entry) {
3244 if (perf_event_task_match(event)) 3278 if (perf_event_task_match(event))
3245 perf_event_task_output(event, task_event); 3279 perf_event_task_output(event, task_event);
3246 } 3280 }
3247 rcu_read_unlock();
3248} 3281}
3249 3282
3250static void perf_event_task_event(struct perf_task_event *task_event) 3283static void perf_event_task_event(struct perf_task_event *task_event)
@@ -3252,11 +3285,11 @@ static void perf_event_task_event(struct perf_task_event *task_event)
3252 struct perf_cpu_context *cpuctx; 3285 struct perf_cpu_context *cpuctx;
3253 struct perf_event_context *ctx = task_event->task_ctx; 3286 struct perf_event_context *ctx = task_event->task_ctx;
3254 3287
3288 rcu_read_lock();
3255 cpuctx = &get_cpu_var(perf_cpu_context); 3289 cpuctx = &get_cpu_var(perf_cpu_context);
3256 perf_event_task_ctx(&cpuctx->ctx, task_event); 3290 perf_event_task_ctx(&cpuctx->ctx, task_event);
3257 put_cpu_var(perf_cpu_context); 3291 put_cpu_var(perf_cpu_context);
3258 3292
3259 rcu_read_lock();
3260 if (!ctx) 3293 if (!ctx)
3261 ctx = rcu_dereference(task_event->task->perf_event_ctxp); 3294 ctx = rcu_dereference(task_event->task->perf_event_ctxp);
3262 if (ctx) 3295 if (ctx)
@@ -3348,15 +3381,10 @@ static void perf_event_comm_ctx(struct perf_event_context *ctx,
3348{ 3381{
3349 struct perf_event *event; 3382 struct perf_event *event;
3350 3383
3351 if (system_state != SYSTEM_RUNNING || list_empty(&ctx->event_list))
3352 return;
3353
3354 rcu_read_lock();
3355 list_for_each_entry_rcu(event, &ctx->event_list, event_entry) { 3384 list_for_each_entry_rcu(event, &ctx->event_list, event_entry) {
3356 if (perf_event_comm_match(event)) 3385 if (perf_event_comm_match(event))
3357 perf_event_comm_output(event, comm_event); 3386 perf_event_comm_output(event, comm_event);
3358 } 3387 }
3359 rcu_read_unlock();
3360} 3388}
3361 3389
3362static void perf_event_comm_event(struct perf_comm_event *comm_event) 3390static void perf_event_comm_event(struct perf_comm_event *comm_event)
@@ -3367,7 +3395,7 @@ static void perf_event_comm_event(struct perf_comm_event *comm_event)
3367 char comm[TASK_COMM_LEN]; 3395 char comm[TASK_COMM_LEN];
3368 3396
3369 memset(comm, 0, sizeof(comm)); 3397 memset(comm, 0, sizeof(comm));
3370 strncpy(comm, comm_event->task->comm, sizeof(comm)); 3398 strlcpy(comm, comm_event->task->comm, sizeof(comm));
3371 size = ALIGN(strlen(comm)+1, sizeof(u64)); 3399 size = ALIGN(strlen(comm)+1, sizeof(u64));
3372 3400
3373 comm_event->comm = comm; 3401 comm_event->comm = comm;
@@ -3375,11 +3403,11 @@ static void perf_event_comm_event(struct perf_comm_event *comm_event)
3375 3403
3376 comm_event->event_id.header.size = sizeof(comm_event->event_id) + size; 3404 comm_event->event_id.header.size = sizeof(comm_event->event_id) + size;
3377 3405
3406 rcu_read_lock();
3378 cpuctx = &get_cpu_var(perf_cpu_context); 3407 cpuctx = &get_cpu_var(perf_cpu_context);
3379 perf_event_comm_ctx(&cpuctx->ctx, comm_event); 3408 perf_event_comm_ctx(&cpuctx->ctx, comm_event);
3380 put_cpu_var(perf_cpu_context); 3409 put_cpu_var(perf_cpu_context);
3381 3410
3382 rcu_read_lock();
3383 /* 3411 /*
3384 * doesn't really matter which of the child contexts the 3412 * doesn't really matter which of the child contexts the
3385 * events ends up in. 3413 * events ends up in.
@@ -3472,15 +3500,10 @@ static void perf_event_mmap_ctx(struct perf_event_context *ctx,
3472{ 3500{
3473 struct perf_event *event; 3501 struct perf_event *event;
3474 3502
3475 if (system_state != SYSTEM_RUNNING || list_empty(&ctx->event_list))
3476 return;
3477
3478 rcu_read_lock();
3479 list_for_each_entry_rcu(event, &ctx->event_list, event_entry) { 3503 list_for_each_entry_rcu(event, &ctx->event_list, event_entry) {
3480 if (perf_event_mmap_match(event, mmap_event)) 3504 if (perf_event_mmap_match(event, mmap_event))
3481 perf_event_mmap_output(event, mmap_event); 3505 perf_event_mmap_output(event, mmap_event);
3482 } 3506 }
3483 rcu_read_unlock();
3484} 3507}
3485 3508
3486static void perf_event_mmap_event(struct perf_mmap_event *mmap_event) 3509static void perf_event_mmap_event(struct perf_mmap_event *mmap_event)
@@ -3536,11 +3559,11 @@ got_name:
3536 3559
3537 mmap_event->event_id.header.size = sizeof(mmap_event->event_id) + size; 3560 mmap_event->event_id.header.size = sizeof(mmap_event->event_id) + size;
3538 3561
3562 rcu_read_lock();
3539 cpuctx = &get_cpu_var(perf_cpu_context); 3563 cpuctx = &get_cpu_var(perf_cpu_context);
3540 perf_event_mmap_ctx(&cpuctx->ctx, mmap_event); 3564 perf_event_mmap_ctx(&cpuctx->ctx, mmap_event);
3541 put_cpu_var(perf_cpu_context); 3565 put_cpu_var(perf_cpu_context);
3542 3566
3543 rcu_read_lock();
3544 /* 3567 /*
3545 * doesn't really matter which of the child contexts the 3568 * doesn't really matter which of the child contexts the
3546 * events ends up in. 3569 * events ends up in.
@@ -3679,7 +3702,11 @@ static int __perf_event_overflow(struct perf_event *event, int nmi,
3679 perf_event_disable(event); 3702 perf_event_disable(event);
3680 } 3703 }
3681 3704
3682 perf_event_output(event, nmi, data, regs); 3705 if (event->overflow_handler)
3706 event->overflow_handler(event, nmi, data, regs);
3707 else
3708 perf_event_output(event, nmi, data, regs);
3709
3683 return ret; 3710 return ret;
3684} 3711}
3685 3712
@@ -3724,16 +3751,16 @@ again:
3724 return nr; 3751 return nr;
3725} 3752}
3726 3753
3727static void perf_swevent_overflow(struct perf_event *event, 3754static void perf_swevent_overflow(struct perf_event *event, u64 overflow,
3728 int nmi, struct perf_sample_data *data, 3755 int nmi, struct perf_sample_data *data,
3729 struct pt_regs *regs) 3756 struct pt_regs *regs)
3730{ 3757{
3731 struct hw_perf_event *hwc = &event->hw; 3758 struct hw_perf_event *hwc = &event->hw;
3732 int throttle = 0; 3759 int throttle = 0;
3733 u64 overflow;
3734 3760
3735 data->period = event->hw.last_period; 3761 data->period = event->hw.last_period;
3736 overflow = perf_swevent_set_period(event); 3762 if (!overflow)
3763 overflow = perf_swevent_set_period(event);
3737 3764
3738 if (hwc->interrupts == MAX_INTERRUPTS) 3765 if (hwc->interrupts == MAX_INTERRUPTS)
3739 return; 3766 return;
@@ -3766,14 +3793,19 @@ static void perf_swevent_add(struct perf_event *event, u64 nr,
3766 3793
3767 atomic64_add(nr, &event->count); 3794 atomic64_add(nr, &event->count);
3768 3795
3796 if (!regs)
3797 return;
3798
3769 if (!hwc->sample_period) 3799 if (!hwc->sample_period)
3770 return; 3800 return;
3771 3801
3772 if (!regs) 3802 if (nr == 1 && hwc->sample_period == 1 && !event->attr.freq)
3803 return perf_swevent_overflow(event, 1, nmi, data, regs);
3804
3805 if (atomic64_add_negative(nr, &hwc->period_left))
3773 return; 3806 return;
3774 3807
3775 if (!atomic64_add_negative(nr, &hwc->period_left)) 3808 perf_swevent_overflow(event, 0, nmi, data, regs);
3776 perf_swevent_overflow(event, nmi, data, regs);
3777} 3809}
3778 3810
3779static int perf_swevent_is_counting(struct perf_event *event) 3811static int perf_swevent_is_counting(struct perf_event *event)
@@ -3806,25 +3838,44 @@ static int perf_swevent_is_counting(struct perf_event *event)
3806 return 1; 3838 return 1;
3807} 3839}
3808 3840
3841static int perf_tp_event_match(struct perf_event *event,
3842 struct perf_sample_data *data);
3843
3844static int perf_exclude_event(struct perf_event *event,
3845 struct pt_regs *regs)
3846{
3847 if (regs) {
3848 if (event->attr.exclude_user && user_mode(regs))
3849 return 1;
3850
3851 if (event->attr.exclude_kernel && !user_mode(regs))
3852 return 1;
3853 }
3854
3855 return 0;
3856}
3857
3809static int perf_swevent_match(struct perf_event *event, 3858static int perf_swevent_match(struct perf_event *event,
3810 enum perf_type_id type, 3859 enum perf_type_id type,
3811 u32 event_id, struct pt_regs *regs) 3860 u32 event_id,
3861 struct perf_sample_data *data,
3862 struct pt_regs *regs)
3812{ 3863{
3813 if (!perf_swevent_is_counting(event)) 3864 if (!perf_swevent_is_counting(event))
3814 return 0; 3865 return 0;
3815 3866
3816 if (event->attr.type != type) 3867 if (event->attr.type != type)
3817 return 0; 3868 return 0;
3869
3818 if (event->attr.config != event_id) 3870 if (event->attr.config != event_id)
3819 return 0; 3871 return 0;
3820 3872
3821 if (regs) { 3873 if (perf_exclude_event(event, regs))
3822 if (event->attr.exclude_user && user_mode(regs)) 3874 return 0;
3823 return 0;
3824 3875
3825 if (event->attr.exclude_kernel && !user_mode(regs)) 3876 if (event->attr.type == PERF_TYPE_TRACEPOINT &&
3826 return 0; 3877 !perf_tp_event_match(event, data))
3827 } 3878 return 0;
3828 3879
3829 return 1; 3880 return 1;
3830} 3881}
@@ -3837,49 +3888,59 @@ static void perf_swevent_ctx_event(struct perf_event_context *ctx,
3837{ 3888{
3838 struct perf_event *event; 3889 struct perf_event *event;
3839 3890
3840 if (system_state != SYSTEM_RUNNING || list_empty(&ctx->event_list))
3841 return;
3842
3843 rcu_read_lock();
3844 list_for_each_entry_rcu(event, &ctx->event_list, event_entry) { 3891 list_for_each_entry_rcu(event, &ctx->event_list, event_entry) {
3845 if (perf_swevent_match(event, type, event_id, regs)) 3892 if (perf_swevent_match(event, type, event_id, data, regs))
3846 perf_swevent_add(event, nr, nmi, data, regs); 3893 perf_swevent_add(event, nr, nmi, data, regs);
3847 } 3894 }
3848 rcu_read_unlock();
3849} 3895}
3850 3896
3851static int *perf_swevent_recursion_context(struct perf_cpu_context *cpuctx) 3897int perf_swevent_get_recursion_context(void)
3852{ 3898{
3899 struct perf_cpu_context *cpuctx = &get_cpu_var(perf_cpu_context);
3900 int rctx;
3901
3853 if (in_nmi()) 3902 if (in_nmi())
3854 return &cpuctx->recursion[3]; 3903 rctx = 3;
3904 else if (in_irq())
3905 rctx = 2;
3906 else if (in_softirq())
3907 rctx = 1;
3908 else
3909 rctx = 0;
3910
3911 if (cpuctx->recursion[rctx]) {
3912 put_cpu_var(perf_cpu_context);
3913 return -1;
3914 }
3855 3915
3856 if (in_irq()) 3916 cpuctx->recursion[rctx]++;
3857 return &cpuctx->recursion[2]; 3917 barrier();
3858 3918
3859 if (in_softirq()) 3919 return rctx;
3860 return &cpuctx->recursion[1]; 3920}
3921EXPORT_SYMBOL_GPL(perf_swevent_get_recursion_context);
3861 3922
3862 return &cpuctx->recursion[0]; 3923void perf_swevent_put_recursion_context(int rctx)
3924{
3925 struct perf_cpu_context *cpuctx = &__get_cpu_var(perf_cpu_context);
3926 barrier();
3927 cpuctx->recursion[rctx]--;
3928 put_cpu_var(perf_cpu_context);
3863} 3929}
3930EXPORT_SYMBOL_GPL(perf_swevent_put_recursion_context);
3864 3931
3865static void do_perf_sw_event(enum perf_type_id type, u32 event_id, 3932static void do_perf_sw_event(enum perf_type_id type, u32 event_id,
3866 u64 nr, int nmi, 3933 u64 nr, int nmi,
3867 struct perf_sample_data *data, 3934 struct perf_sample_data *data,
3868 struct pt_regs *regs) 3935 struct pt_regs *regs)
3869{ 3936{
3870 struct perf_cpu_context *cpuctx = &get_cpu_var(perf_cpu_context); 3937 struct perf_cpu_context *cpuctx;
3871 int *recursion = perf_swevent_recursion_context(cpuctx);
3872 struct perf_event_context *ctx; 3938 struct perf_event_context *ctx;
3873 3939
3874 if (*recursion) 3940 cpuctx = &__get_cpu_var(perf_cpu_context);
3875 goto out; 3941 rcu_read_lock();
3876
3877 (*recursion)++;
3878 barrier();
3879
3880 perf_swevent_ctx_event(&cpuctx->ctx, type, event_id, 3942 perf_swevent_ctx_event(&cpuctx->ctx, type, event_id,
3881 nr, nmi, data, regs); 3943 nr, nmi, data, regs);
3882 rcu_read_lock();
3883 /* 3944 /*
3884 * doesn't really matter which of the child contexts the 3945 * doesn't really matter which of the child contexts the
3885 * events ends up in. 3946 * events ends up in.
@@ -3888,23 +3949,24 @@ static void do_perf_sw_event(enum perf_type_id type, u32 event_id,
3888 if (ctx) 3949 if (ctx)
3889 perf_swevent_ctx_event(ctx, type, event_id, nr, nmi, data, regs); 3950 perf_swevent_ctx_event(ctx, type, event_id, nr, nmi, data, regs);
3890 rcu_read_unlock(); 3951 rcu_read_unlock();
3891
3892 barrier();
3893 (*recursion)--;
3894
3895out:
3896 put_cpu_var(perf_cpu_context);
3897} 3952}
3898 3953
3899void __perf_sw_event(u32 event_id, u64 nr, int nmi, 3954void __perf_sw_event(u32 event_id, u64 nr, int nmi,
3900 struct pt_regs *regs, u64 addr) 3955 struct pt_regs *regs, u64 addr)
3901{ 3956{
3902 struct perf_sample_data data = { 3957 struct perf_sample_data data;
3903 .addr = addr, 3958 int rctx;
3904 };
3905 3959
3906 do_perf_sw_event(PERF_TYPE_SOFTWARE, event_id, nr, nmi, 3960 rctx = perf_swevent_get_recursion_context();
3907 &data, regs); 3961 if (rctx < 0)
3962 return;
3963
3964 data.addr = addr;
3965 data.raw = NULL;
3966
3967 do_perf_sw_event(PERF_TYPE_SOFTWARE, event_id, nr, nmi, &data, regs);
3968
3969 perf_swevent_put_recursion_context(rctx);
3908} 3970}
3909 3971
3910static void perf_swevent_read(struct perf_event *event) 3972static void perf_swevent_read(struct perf_event *event)
@@ -3949,6 +4011,7 @@ static enum hrtimer_restart perf_swevent_hrtimer(struct hrtimer *hrtimer)
3949 event->pmu->read(event); 4011 event->pmu->read(event);
3950 4012
3951 data.addr = 0; 4013 data.addr = 0;
4014 data.period = event->hw.last_period;
3952 regs = get_irq_regs(); 4015 regs = get_irq_regs();
3953 /* 4016 /*
3954 * In case we exclude kernel IPs or are somehow not in interrupt 4017 * In case we exclude kernel IPs or are somehow not in interrupt
@@ -4108,6 +4171,7 @@ static const struct pmu perf_ops_task_clock = {
4108}; 4171};
4109 4172
4110#ifdef CONFIG_EVENT_PROFILE 4173#ifdef CONFIG_EVENT_PROFILE
4174
4111void perf_tp_event(int event_id, u64 addr, u64 count, void *record, 4175void perf_tp_event(int event_id, u64 addr, u64 count, void *record,
4112 int entry_size) 4176 int entry_size)
4113{ 4177{
@@ -4126,13 +4190,21 @@ void perf_tp_event(int event_id, u64 addr, u64 count, void *record,
4126 if (!regs) 4190 if (!regs)
4127 regs = task_pt_regs(current); 4191 regs = task_pt_regs(current);
4128 4192
4193 /* Trace events already protected against recursion */
4129 do_perf_sw_event(PERF_TYPE_TRACEPOINT, event_id, count, 1, 4194 do_perf_sw_event(PERF_TYPE_TRACEPOINT, event_id, count, 1,
4130 &data, regs); 4195 &data, regs);
4131} 4196}
4132EXPORT_SYMBOL_GPL(perf_tp_event); 4197EXPORT_SYMBOL_GPL(perf_tp_event);
4133 4198
4134extern int ftrace_profile_enable(int); 4199static int perf_tp_event_match(struct perf_event *event,
4135extern void ftrace_profile_disable(int); 4200 struct perf_sample_data *data)
4201{
4202 void *record = data->raw->data;
4203
4204 if (likely(!event->filter) || filter_match_preds(event->filter, record))
4205 return 1;
4206 return 0;
4207}
4136 4208
4137static void tp_perf_event_destroy(struct perf_event *event) 4209static void tp_perf_event_destroy(struct perf_event *event)
4138{ 4210{
@@ -4157,11 +4229,99 @@ static const struct pmu *tp_perf_event_init(struct perf_event *event)
4157 4229
4158 return &perf_ops_generic; 4230 return &perf_ops_generic;
4159} 4231}
4232
4233static int perf_event_set_filter(struct perf_event *event, void __user *arg)
4234{
4235 char *filter_str;
4236 int ret;
4237
4238 if (event->attr.type != PERF_TYPE_TRACEPOINT)
4239 return -EINVAL;
4240
4241 filter_str = strndup_user(arg, PAGE_SIZE);
4242 if (IS_ERR(filter_str))
4243 return PTR_ERR(filter_str);
4244
4245 ret = ftrace_profile_set_filter(event, event->attr.config, filter_str);
4246
4247 kfree(filter_str);
4248 return ret;
4249}
4250
4251static void perf_event_free_filter(struct perf_event *event)
4252{
4253 ftrace_profile_free_filter(event);
4254}
4255
4160#else 4256#else
4257
4258static int perf_tp_event_match(struct perf_event *event,
4259 struct perf_sample_data *data)
4260{
4261 return 1;
4262}
4263
4161static const struct pmu *tp_perf_event_init(struct perf_event *event) 4264static const struct pmu *tp_perf_event_init(struct perf_event *event)
4162{ 4265{
4163 return NULL; 4266 return NULL;
4164} 4267}
4268
4269static int perf_event_set_filter(struct perf_event *event, void __user *arg)
4270{
4271 return -ENOENT;
4272}
4273
4274static void perf_event_free_filter(struct perf_event *event)
4275{
4276}
4277
4278#endif /* CONFIG_EVENT_PROFILE */
4279
4280#ifdef CONFIG_HAVE_HW_BREAKPOINT
4281static void bp_perf_event_destroy(struct perf_event *event)
4282{
4283 release_bp_slot(event);
4284}
4285
4286static const struct pmu *bp_perf_event_init(struct perf_event *bp)
4287{
4288 int err;
4289 /*
4290 * The breakpoint is already filled if we haven't created the counter
4291 * through perf syscall
4292 * FIXME: manage to get trigerred to NULL if it comes from syscalls
4293 */
4294 if (!bp->callback)
4295 err = register_perf_hw_breakpoint(bp);
4296 else
4297 err = __register_perf_hw_breakpoint(bp);
4298 if (err)
4299 return ERR_PTR(err);
4300
4301 bp->destroy = bp_perf_event_destroy;
4302
4303 return &perf_ops_bp;
4304}
4305
4306void perf_bp_event(struct perf_event *bp, void *data)
4307{
4308 struct perf_sample_data sample;
4309 struct pt_regs *regs = data;
4310
4311 sample.addr = bp->attr.bp_addr;
4312
4313 if (!perf_exclude_event(bp, regs))
4314 perf_swevent_add(bp, 1, 1, &sample, regs);
4315}
4316#else
4317static const struct pmu *bp_perf_event_init(struct perf_event *bp)
4318{
4319 return NULL;
4320}
4321
4322void perf_bp_event(struct perf_event *bp, void *regs)
4323{
4324}
4165#endif 4325#endif
4166 4326
4167atomic_t perf_swevent_enabled[PERF_COUNT_SW_MAX]; 4327atomic_t perf_swevent_enabled[PERF_COUNT_SW_MAX];
@@ -4208,6 +4368,8 @@ static const struct pmu *sw_perf_event_init(struct perf_event *event)
4208 case PERF_COUNT_SW_PAGE_FAULTS_MAJ: 4368 case PERF_COUNT_SW_PAGE_FAULTS_MAJ:
4209 case PERF_COUNT_SW_CONTEXT_SWITCHES: 4369 case PERF_COUNT_SW_CONTEXT_SWITCHES:
4210 case PERF_COUNT_SW_CPU_MIGRATIONS: 4370 case PERF_COUNT_SW_CPU_MIGRATIONS:
4371 case PERF_COUNT_SW_ALIGNMENT_FAULTS:
4372 case PERF_COUNT_SW_EMULATION_FAULTS:
4211 if (!event->parent) { 4373 if (!event->parent) {
4212 atomic_inc(&perf_swevent_enabled[event_id]); 4374 atomic_inc(&perf_swevent_enabled[event_id]);
4213 event->destroy = sw_perf_event_destroy; 4375 event->destroy = sw_perf_event_destroy;
@@ -4228,6 +4390,7 @@ perf_event_alloc(struct perf_event_attr *attr,
4228 struct perf_event_context *ctx, 4390 struct perf_event_context *ctx,
4229 struct perf_event *group_leader, 4391 struct perf_event *group_leader,
4230 struct perf_event *parent_event, 4392 struct perf_event *parent_event,
4393 perf_callback_t callback,
4231 gfp_t gfpflags) 4394 gfp_t gfpflags)
4232{ 4395{
4233 const struct pmu *pmu; 4396 const struct pmu *pmu;
@@ -4270,6 +4433,11 @@ perf_event_alloc(struct perf_event_attr *attr,
4270 4433
4271 event->state = PERF_EVENT_STATE_INACTIVE; 4434 event->state = PERF_EVENT_STATE_INACTIVE;
4272 4435
4436 if (!callback && parent_event)
4437 callback = parent_event->callback;
4438
4439 event->callback = callback;
4440
4273 if (attr->disabled) 4441 if (attr->disabled)
4274 event->state = PERF_EVENT_STATE_OFF; 4442 event->state = PERF_EVENT_STATE_OFF;
4275 4443
@@ -4304,6 +4472,11 @@ perf_event_alloc(struct perf_event_attr *attr,
4304 pmu = tp_perf_event_init(event); 4472 pmu = tp_perf_event_init(event);
4305 break; 4473 break;
4306 4474
4475 case PERF_TYPE_BREAKPOINT:
4476 pmu = bp_perf_event_init(event);
4477 break;
4478
4479
4307 default: 4480 default:
4308 break; 4481 break;
4309 } 4482 }
@@ -4416,7 +4589,7 @@ err_size:
4416 goto out; 4589 goto out;
4417} 4590}
4418 4591
4419int perf_event_set_output(struct perf_event *event, int output_fd) 4592static int perf_event_set_output(struct perf_event *event, int output_fd)
4420{ 4593{
4421 struct perf_event *output_event = NULL; 4594 struct perf_event *output_event = NULL;
4422 struct file *output_file = NULL; 4595 struct file *output_file = NULL;
@@ -4546,7 +4719,7 @@ SYSCALL_DEFINE5(perf_event_open,
4546 } 4719 }
4547 4720
4548 event = perf_event_alloc(&attr, cpu, ctx, group_leader, 4721 event = perf_event_alloc(&attr, cpu, ctx, group_leader,
4549 NULL, GFP_KERNEL); 4722 NULL, NULL, GFP_KERNEL);
4550 err = PTR_ERR(event); 4723 err = PTR_ERR(event);
4551 if (IS_ERR(event)) 4724 if (IS_ERR(event))
4552 goto err_put_context; 4725 goto err_put_context;
@@ -4594,6 +4767,60 @@ err_put_context:
4594 return err; 4767 return err;
4595} 4768}
4596 4769
4770/**
4771 * perf_event_create_kernel_counter
4772 *
4773 * @attr: attributes of the counter to create
4774 * @cpu: cpu in which the counter is bound
4775 * @pid: task to profile
4776 */
4777struct perf_event *
4778perf_event_create_kernel_counter(struct perf_event_attr *attr, int cpu,
4779 pid_t pid, perf_callback_t callback)
4780{
4781 struct perf_event *event;
4782 struct perf_event_context *ctx;
4783 int err;
4784
4785 /*
4786 * Get the target context (task or percpu):
4787 */
4788
4789 ctx = find_get_context(pid, cpu);
4790 if (IS_ERR(ctx)) {
4791 err = PTR_ERR(ctx);
4792 goto err_exit;
4793 }
4794
4795 event = perf_event_alloc(attr, cpu, ctx, NULL,
4796 NULL, callback, GFP_KERNEL);
4797 if (IS_ERR(event)) {
4798 err = PTR_ERR(event);
4799 goto err_put_context;
4800 }
4801
4802 event->filp = NULL;
4803 WARN_ON_ONCE(ctx->parent_ctx);
4804 mutex_lock(&ctx->mutex);
4805 perf_install_in_context(ctx, event, cpu);
4806 ++ctx->generation;
4807 mutex_unlock(&ctx->mutex);
4808
4809 event->owner = current;
4810 get_task_struct(current);
4811 mutex_lock(&current->perf_event_mutex);
4812 list_add_tail(&event->owner_entry, &current->perf_event_list);
4813 mutex_unlock(&current->perf_event_mutex);
4814
4815 return event;
4816
4817 err_put_context:
4818 put_ctx(ctx);
4819 err_exit:
4820 return ERR_PTR(err);
4821}
4822EXPORT_SYMBOL_GPL(perf_event_create_kernel_counter);
4823
4597/* 4824/*
4598 * inherit a event from parent task to child task: 4825 * inherit a event from parent task to child task:
4599 */ 4826 */
@@ -4619,7 +4846,7 @@ inherit_event(struct perf_event *parent_event,
4619 child_event = perf_event_alloc(&parent_event->attr, 4846 child_event = perf_event_alloc(&parent_event->attr,
4620 parent_event->cpu, child_ctx, 4847 parent_event->cpu, child_ctx,
4621 group_leader, parent_event, 4848 group_leader, parent_event,
4622 GFP_KERNEL); 4849 NULL, GFP_KERNEL);
4623 if (IS_ERR(child_event)) 4850 if (IS_ERR(child_event))
4624 return child_event; 4851 return child_event;
4625 get_ctx(child_ctx); 4852 get_ctx(child_ctx);
@@ -4637,6 +4864,8 @@ inherit_event(struct perf_event *parent_event,
4637 if (parent_event->attr.freq) 4864 if (parent_event->attr.freq)
4638 child_event->hw.sample_period = parent_event->hw.sample_period; 4865 child_event->hw.sample_period = parent_event->hw.sample_period;
4639 4866
4867 child_event->overflow_handler = parent_event->overflow_handler;
4868
4640 /* 4869 /*
4641 * Link it up in the child's context: 4870 * Link it up in the child's context:
4642 */ 4871 */
@@ -4726,7 +4955,6 @@ __perf_event_exit_task(struct perf_event *child_event,
4726{ 4955{
4727 struct perf_event *parent_event; 4956 struct perf_event *parent_event;
4728 4957
4729 update_event_times(child_event);
4730 perf_event_remove_from_context(child_event); 4958 perf_event_remove_from_context(child_event);
4731 4959
4732 parent_event = child_event->parent; 4960 parent_event = child_event->parent;
@@ -4778,6 +5006,7 @@ void perf_event_exit_task(struct task_struct *child)
4778 * the events from it. 5006 * the events from it.
4779 */ 5007 */
4780 unclone_ctx(child_ctx); 5008 unclone_ctx(child_ctx);
5009 update_context_time(child_ctx);
4781 spin_unlock_irqrestore(&child_ctx->lock, flags); 5010 spin_unlock_irqrestore(&child_ctx->lock, flags);
4782 5011
4783 /* 5012 /*
diff --git a/kernel/sched.c b/kernel/sched.c
index 6ae2739b8f19..e7f2cfa6a257 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -535,14 +535,12 @@ struct rq {
535 #define CPU_LOAD_IDX_MAX 5 535 #define CPU_LOAD_IDX_MAX 5
536 unsigned long cpu_load[CPU_LOAD_IDX_MAX]; 536 unsigned long cpu_load[CPU_LOAD_IDX_MAX];
537#ifdef CONFIG_NO_HZ 537#ifdef CONFIG_NO_HZ
538 unsigned long last_tick_seen;
539 unsigned char in_nohz_recently; 538 unsigned char in_nohz_recently;
540#endif 539#endif
541 /* capture load from *all* tasks on this cpu: */ 540 /* capture load from *all* tasks on this cpu: */
542 struct load_weight load; 541 struct load_weight load;
543 unsigned long nr_load_updates; 542 unsigned long nr_load_updates;
544 u64 nr_switches; 543 u64 nr_switches;
545 u64 nr_migrations_in;
546 544
547 struct cfs_rq cfs; 545 struct cfs_rq cfs;
548 struct rt_rq rt; 546 struct rt_rq rt;
@@ -591,6 +589,8 @@ struct rq {
591 589
592 u64 rt_avg; 590 u64 rt_avg;
593 u64 age_stamp; 591 u64 age_stamp;
592 u64 idle_stamp;
593 u64 avg_idle;
594#endif 594#endif
595 595
596 /* calc_load related fields */ 596 /* calc_load related fields */
@@ -772,7 +772,7 @@ sched_feat_write(struct file *filp, const char __user *ubuf,
772 if (!sched_feat_names[i]) 772 if (!sched_feat_names[i])
773 return -EINVAL; 773 return -EINVAL;
774 774
775 filp->f_pos += cnt; 775 *ppos += cnt;
776 776
777 return cnt; 777 return cnt;
778} 778}
@@ -2017,6 +2017,7 @@ void kthread_bind(struct task_struct *p, unsigned int cpu)
2017 } 2017 }
2018 2018
2019 spin_lock_irqsave(&rq->lock, flags); 2019 spin_lock_irqsave(&rq->lock, flags);
2020 update_rq_clock(rq);
2020 set_task_cpu(p, cpu); 2021 set_task_cpu(p, cpu);
2021 p->cpus_allowed = cpumask_of_cpu(cpu); 2022 p->cpus_allowed = cpumask_of_cpu(cpu);
2022 p->rt.nr_cpus_allowed = 1; 2023 p->rt.nr_cpus_allowed = 1;
@@ -2078,7 +2079,6 @@ void set_task_cpu(struct task_struct *p, unsigned int new_cpu)
2078#endif 2079#endif
2079 if (old_cpu != new_cpu) { 2080 if (old_cpu != new_cpu) {
2080 p->se.nr_migrations++; 2081 p->se.nr_migrations++;
2081 new_rq->nr_migrations_in++;
2082#ifdef CONFIG_SCHEDSTATS 2082#ifdef CONFIG_SCHEDSTATS
2083 if (task_hot(p, old_rq->clock, NULL)) 2083 if (task_hot(p, old_rq->clock, NULL))
2084 schedstat_inc(p, se.nr_forced2_migrations); 2084 schedstat_inc(p, se.nr_forced2_migrations);
@@ -2115,6 +2115,7 @@ migrate_task(struct task_struct *p, int dest_cpu, struct migration_req *req)
2115 * it is sufficient to simply update the task's cpu field. 2115 * it is sufficient to simply update the task's cpu field.
2116 */ 2116 */
2117 if (!p->se.on_rq && !task_running(rq, p)) { 2117 if (!p->se.on_rq && !task_running(rq, p)) {
2118 update_rq_clock(rq);
2118 set_task_cpu(p, dest_cpu); 2119 set_task_cpu(p, dest_cpu);
2119 return 0; 2120 return 0;
2120 } 2121 }
@@ -2376,14 +2377,15 @@ static int try_to_wake_up(struct task_struct *p, unsigned int state,
2376 task_rq_unlock(rq, &flags); 2377 task_rq_unlock(rq, &flags);
2377 2378
2378 cpu = p->sched_class->select_task_rq(p, SD_BALANCE_WAKE, wake_flags); 2379 cpu = p->sched_class->select_task_rq(p, SD_BALANCE_WAKE, wake_flags);
2379 if (cpu != orig_cpu) 2380 if (cpu != orig_cpu) {
2381 local_irq_save(flags);
2382 rq = cpu_rq(cpu);
2383 update_rq_clock(rq);
2380 set_task_cpu(p, cpu); 2384 set_task_cpu(p, cpu);
2381 2385 local_irq_restore(flags);
2386 }
2382 rq = task_rq_lock(p, &flags); 2387 rq = task_rq_lock(p, &flags);
2383 2388
2384 if (rq != orig_rq)
2385 update_rq_clock(rq);
2386
2387 WARN_ON(p->state != TASK_WAKING); 2389 WARN_ON(p->state != TASK_WAKING);
2388 cpu = task_cpu(p); 2390 cpu = task_cpu(p);
2389 2391
@@ -2440,6 +2442,17 @@ out_running:
2440#ifdef CONFIG_SMP 2442#ifdef CONFIG_SMP
2441 if (p->sched_class->task_wake_up) 2443 if (p->sched_class->task_wake_up)
2442 p->sched_class->task_wake_up(rq, p); 2444 p->sched_class->task_wake_up(rq, p);
2445
2446 if (unlikely(rq->idle_stamp)) {
2447 u64 delta = rq->clock - rq->idle_stamp;
2448 u64 max = 2*sysctl_sched_migration_cost;
2449
2450 if (delta > max)
2451 rq->avg_idle = max;
2452 else
2453 update_avg(&rq->avg_idle, delta);
2454 rq->idle_stamp = 0;
2455 }
2443#endif 2456#endif
2444out: 2457out:
2445 task_rq_unlock(rq, &flags); 2458 task_rq_unlock(rq, &flags);
@@ -2545,6 +2558,7 @@ static void __sched_fork(struct task_struct *p)
2545void sched_fork(struct task_struct *p, int clone_flags) 2558void sched_fork(struct task_struct *p, int clone_flags)
2546{ 2559{
2547 int cpu = get_cpu(); 2560 int cpu = get_cpu();
2561 unsigned long flags;
2548 2562
2549 __sched_fork(p); 2563 __sched_fork(p);
2550 2564
@@ -2581,7 +2595,10 @@ void sched_fork(struct task_struct *p, int clone_flags)
2581#ifdef CONFIG_SMP 2595#ifdef CONFIG_SMP
2582 cpu = p->sched_class->select_task_rq(p, SD_BALANCE_FORK, 0); 2596 cpu = p->sched_class->select_task_rq(p, SD_BALANCE_FORK, 0);
2583#endif 2597#endif
2598 local_irq_save(flags);
2599 update_rq_clock(cpu_rq(cpu));
2584 set_task_cpu(p, cpu); 2600 set_task_cpu(p, cpu);
2601 local_irq_restore(flags);
2585 2602
2586#if defined(CONFIG_SCHEDSTATS) || defined(CONFIG_TASK_DELAY_ACCT) 2603#if defined(CONFIG_SCHEDSTATS) || defined(CONFIG_TASK_DELAY_ACCT)
2587 if (likely(sched_info_on())) 2604 if (likely(sched_info_on()))
@@ -2848,14 +2865,14 @@ context_switch(struct rq *rq, struct task_struct *prev,
2848 */ 2865 */
2849 arch_start_context_switch(prev); 2866 arch_start_context_switch(prev);
2850 2867
2851 if (unlikely(!mm)) { 2868 if (likely(!mm)) {
2852 next->active_mm = oldmm; 2869 next->active_mm = oldmm;
2853 atomic_inc(&oldmm->mm_count); 2870 atomic_inc(&oldmm->mm_count);
2854 enter_lazy_tlb(oldmm, next); 2871 enter_lazy_tlb(oldmm, next);
2855 } else 2872 } else
2856 switch_mm(oldmm, mm, next); 2873 switch_mm(oldmm, mm, next);
2857 2874
2858 if (unlikely(!prev->mm)) { 2875 if (likely(!prev->mm)) {
2859 prev->active_mm = NULL; 2876 prev->active_mm = NULL;
2860 rq->prev_mm = oldmm; 2877 rq->prev_mm = oldmm;
2861 } 2878 }
@@ -3018,15 +3035,6 @@ static void calc_load_account_active(struct rq *this_rq)
3018} 3035}
3019 3036
3020/* 3037/*
3021 * Externally visible per-cpu scheduler statistics:
3022 * cpu_nr_migrations(cpu) - number of migrations into that cpu
3023 */
3024u64 cpu_nr_migrations(int cpu)
3025{
3026 return cpu_rq(cpu)->nr_migrations_in;
3027}
3028
3029/*
3030 * Update rq->cpu_load[] statistics. This function is usually called every 3038 * Update rq->cpu_load[] statistics. This function is usually called every
3031 * scheduler tick (TICK_NSEC). 3039 * scheduler tick (TICK_NSEC).
3032 */ 3040 */
@@ -4126,7 +4134,7 @@ static int load_balance(int this_cpu, struct rq *this_rq,
4126 unsigned long flags; 4134 unsigned long flags;
4127 struct cpumask *cpus = __get_cpu_var(load_balance_tmpmask); 4135 struct cpumask *cpus = __get_cpu_var(load_balance_tmpmask);
4128 4136
4129 cpumask_setall(cpus); 4137 cpumask_copy(cpus, cpu_online_mask);
4130 4138
4131 /* 4139 /*
4132 * When power savings policy is enabled for the parent domain, idle 4140 * When power savings policy is enabled for the parent domain, idle
@@ -4289,7 +4297,7 @@ load_balance_newidle(int this_cpu, struct rq *this_rq, struct sched_domain *sd)
4289 int all_pinned = 0; 4297 int all_pinned = 0;
4290 struct cpumask *cpus = __get_cpu_var(load_balance_tmpmask); 4298 struct cpumask *cpus = __get_cpu_var(load_balance_tmpmask);
4291 4299
4292 cpumask_setall(cpus); 4300 cpumask_copy(cpus, cpu_online_mask);
4293 4301
4294 /* 4302 /*
4295 * When power savings policy is enabled for the parent domain, idle 4303 * When power savings policy is enabled for the parent domain, idle
@@ -4429,6 +4437,11 @@ static void idle_balance(int this_cpu, struct rq *this_rq)
4429 int pulled_task = 0; 4437 int pulled_task = 0;
4430 unsigned long next_balance = jiffies + HZ; 4438 unsigned long next_balance = jiffies + HZ;
4431 4439
4440 this_rq->idle_stamp = this_rq->clock;
4441
4442 if (this_rq->avg_idle < sysctl_sched_migration_cost)
4443 return;
4444
4432 for_each_domain(this_cpu, sd) { 4445 for_each_domain(this_cpu, sd) {
4433 unsigned long interval; 4446 unsigned long interval;
4434 4447
@@ -4443,8 +4456,10 @@ static void idle_balance(int this_cpu, struct rq *this_rq)
4443 interval = msecs_to_jiffies(sd->balance_interval); 4456 interval = msecs_to_jiffies(sd->balance_interval);
4444 if (time_after(next_balance, sd->last_balance + interval)) 4457 if (time_after(next_balance, sd->last_balance + interval))
4445 next_balance = sd->last_balance + interval; 4458 next_balance = sd->last_balance + interval;
4446 if (pulled_task) 4459 if (pulled_task) {
4460 this_rq->idle_stamp = 0;
4447 break; 4461 break;
4462 }
4448 } 4463 }
4449 if (pulled_task || time_after(jiffies, this_rq->next_balance)) { 4464 if (pulled_task || time_after(jiffies, this_rq->next_balance)) {
4450 /* 4465 /*
@@ -5046,8 +5061,13 @@ static void account_guest_time(struct task_struct *p, cputime_t cputime,
5046 p->gtime = cputime_add(p->gtime, cputime); 5061 p->gtime = cputime_add(p->gtime, cputime);
5047 5062
5048 /* Add guest time to cpustat. */ 5063 /* Add guest time to cpustat. */
5049 cpustat->user = cputime64_add(cpustat->user, tmp); 5064 if (TASK_NICE(p) > 0) {
5050 cpustat->guest = cputime64_add(cpustat->guest, tmp); 5065 cpustat->nice = cputime64_add(cpustat->nice, tmp);
5066 cpustat->guest_nice = cputime64_add(cpustat->guest_nice, tmp);
5067 } else {
5068 cpustat->user = cputime64_add(cpustat->user, tmp);
5069 cpustat->guest = cputime64_add(cpustat->guest, tmp);
5070 }
5051} 5071}
5052 5072
5053/* 5073/*
@@ -5162,60 +5182,86 @@ void account_idle_ticks(unsigned long ticks)
5162 * Use precise platform statistics if available: 5182 * Use precise platform statistics if available:
5163 */ 5183 */
5164#ifdef CONFIG_VIRT_CPU_ACCOUNTING 5184#ifdef CONFIG_VIRT_CPU_ACCOUNTING
5165cputime_t task_utime(struct task_struct *p) 5185void task_times(struct task_struct *p, cputime_t *ut, cputime_t *st)
5166{ 5186{
5167 return p->utime; 5187 *ut = p->utime;
5188 *st = p->stime;
5168} 5189}
5169 5190
5170cputime_t task_stime(struct task_struct *p) 5191void thread_group_times(struct task_struct *p, cputime_t *ut, cputime_t *st)
5171{ 5192{
5172 return p->stime; 5193 struct task_cputime cputime;
5194
5195 thread_group_cputime(p, &cputime);
5196
5197 *ut = cputime.utime;
5198 *st = cputime.stime;
5173} 5199}
5174#else 5200#else
5175cputime_t task_utime(struct task_struct *p) 5201
5202#ifndef nsecs_to_cputime
5203# define nsecs_to_cputime(__nsecs) nsecs_to_jiffies(__nsecs)
5204#endif
5205
5206void task_times(struct task_struct *p, cputime_t *ut, cputime_t *st)
5176{ 5207{
5177 clock_t utime = cputime_to_clock_t(p->utime), 5208 cputime_t rtime, utime = p->utime, total = cputime_add(utime, p->stime);
5178 total = utime + cputime_to_clock_t(p->stime);
5179 u64 temp;
5180 5209
5181 /* 5210 /*
5182 * Use CFS's precise accounting: 5211 * Use CFS's precise accounting:
5183 */ 5212 */
5184 temp = (u64)nsec_to_clock_t(p->se.sum_exec_runtime); 5213 rtime = nsecs_to_cputime(p->se.sum_exec_runtime);
5185 5214
5186 if (total) { 5215 if (total) {
5187 temp *= utime; 5216 u64 temp;
5217
5218 temp = (u64)(rtime * utime);
5188 do_div(temp, total); 5219 do_div(temp, total);
5189 } 5220 utime = (cputime_t)temp;
5190 utime = (clock_t)temp; 5221 } else
5222 utime = rtime;
5223
5224 /*
5225 * Compare with previous values, to keep monotonicity:
5226 */
5227 p->prev_utime = max(p->prev_utime, utime);
5228 p->prev_stime = max(p->prev_stime, cputime_sub(rtime, p->prev_utime));
5191 5229
5192 p->prev_utime = max(p->prev_utime, clock_t_to_cputime(utime)); 5230 *ut = p->prev_utime;
5193 return p->prev_utime; 5231 *st = p->prev_stime;
5194} 5232}
5195 5233
5196cputime_t task_stime(struct task_struct *p) 5234/*
5235 * Must be called with siglock held.
5236 */
5237void thread_group_times(struct task_struct *p, cputime_t *ut, cputime_t *st)
5197{ 5238{
5198 clock_t stime; 5239 struct signal_struct *sig = p->signal;
5240 struct task_cputime cputime;
5241 cputime_t rtime, utime, total;
5199 5242
5200 /* 5243 thread_group_cputime(p, &cputime);
5201 * Use CFS's precise accounting. (we subtract utime from
5202 * the total, to make sure the total observed by userspace
5203 * grows monotonically - apps rely on that):
5204 */
5205 stime = nsec_to_clock_t(p->se.sum_exec_runtime) -
5206 cputime_to_clock_t(task_utime(p));
5207 5244
5208 if (stime >= 0) 5245 total = cputime_add(cputime.utime, cputime.stime);
5209 p->prev_stime = max(p->prev_stime, clock_t_to_cputime(stime)); 5246 rtime = nsecs_to_cputime(cputime.sum_exec_runtime);
5210 5247
5211 return p->prev_stime; 5248 if (total) {
5212} 5249 u64 temp;
5213#endif
5214 5250
5215inline cputime_t task_gtime(struct task_struct *p) 5251 temp = (u64)(rtime * cputime.utime);
5216{ 5252 do_div(temp, total);
5217 return p->gtime; 5253 utime = (cputime_t)temp;
5254 } else
5255 utime = rtime;
5256
5257 sig->prev_utime = max(sig->prev_utime, utime);
5258 sig->prev_stime = max(sig->prev_stime,
5259 cputime_sub(rtime, sig->prev_utime));
5260
5261 *ut = sig->prev_utime;
5262 *st = sig->prev_stime;
5218} 5263}
5264#endif
5219 5265
5220/* 5266/*
5221 * This function gets called by the timer code, with HZ frequency. 5267 * This function gets called by the timer code, with HZ frequency.
@@ -6175,22 +6221,14 @@ __setscheduler(struct rq *rq, struct task_struct *p, int policy, int prio)
6175 BUG_ON(p->se.on_rq); 6221 BUG_ON(p->se.on_rq);
6176 6222
6177 p->policy = policy; 6223 p->policy = policy;
6178 switch (p->policy) {
6179 case SCHED_NORMAL:
6180 case SCHED_BATCH:
6181 case SCHED_IDLE:
6182 p->sched_class = &fair_sched_class;
6183 break;
6184 case SCHED_FIFO:
6185 case SCHED_RR:
6186 p->sched_class = &rt_sched_class;
6187 break;
6188 }
6189
6190 p->rt_priority = prio; 6224 p->rt_priority = prio;
6191 p->normal_prio = normal_prio(p); 6225 p->normal_prio = normal_prio(p);
6192 /* we are holding p->pi_lock already */ 6226 /* we are holding p->pi_lock already */
6193 p->prio = rt_mutex_getprio(p); 6227 p->prio = rt_mutex_getprio(p);
6228 if (rt_prio(p->prio))
6229 p->sched_class = &rt_sched_class;
6230 else
6231 p->sched_class = &fair_sched_class;
6194 set_load_weight(p); 6232 set_load_weight(p);
6195} 6233}
6196 6234
@@ -6935,7 +6973,7 @@ void show_state_filter(unsigned long state_filter)
6935 /* 6973 /*
6936 * Only show locks if all tasks are dumped: 6974 * Only show locks if all tasks are dumped:
6937 */ 6975 */
6938 if (state_filter == -1) 6976 if (!state_filter)
6939 debug_show_all_locks(); 6977 debug_show_all_locks();
6940} 6978}
6941 6979
@@ -7406,17 +7444,16 @@ static struct ctl_table sd_ctl_dir[] = {
7406 .procname = "sched_domain", 7444 .procname = "sched_domain",
7407 .mode = 0555, 7445 .mode = 0555,
7408 }, 7446 },
7409 {0, }, 7447 {}
7410}; 7448};
7411 7449
7412static struct ctl_table sd_ctl_root[] = { 7450static struct ctl_table sd_ctl_root[] = {
7413 { 7451 {
7414 .ctl_name = CTL_KERN,
7415 .procname = "kernel", 7452 .procname = "kernel",
7416 .mode = 0555, 7453 .mode = 0555,
7417 .child = sd_ctl_dir, 7454 .child = sd_ctl_dir,
7418 }, 7455 },
7419 {0, }, 7456 {}
7420}; 7457};
7421 7458
7422static struct ctl_table *sd_alloc_ctl_entry(int n) 7459static struct ctl_table *sd_alloc_ctl_entry(int n)
@@ -7740,6 +7777,16 @@ early_initcall(migration_init);
7740 7777
7741#ifdef CONFIG_SCHED_DEBUG 7778#ifdef CONFIG_SCHED_DEBUG
7742 7779
7780static __read_mostly int sched_domain_debug_enabled;
7781
7782static int __init sched_domain_debug_setup(char *str)
7783{
7784 sched_domain_debug_enabled = 1;
7785
7786 return 0;
7787}
7788early_param("sched_debug", sched_domain_debug_setup);
7789
7743static int sched_domain_debug_one(struct sched_domain *sd, int cpu, int level, 7790static int sched_domain_debug_one(struct sched_domain *sd, int cpu, int level,
7744 struct cpumask *groupmask) 7791 struct cpumask *groupmask)
7745{ 7792{
@@ -7826,6 +7873,9 @@ static void sched_domain_debug(struct sched_domain *sd, int cpu)
7826 cpumask_var_t groupmask; 7873 cpumask_var_t groupmask;
7827 int level = 0; 7874 int level = 0;
7828 7875
7876 if (!sched_domain_debug_enabled)
7877 return;
7878
7829 if (!sd) { 7879 if (!sd) {
7830 printk(KERN_DEBUG "CPU%d attaching NULL sched-domain.\n", cpu); 7880 printk(KERN_DEBUG "CPU%d attaching NULL sched-domain.\n", cpu);
7831 return; 7881 return;
@@ -7905,6 +7955,8 @@ sd_parent_degenerate(struct sched_domain *sd, struct sched_domain *parent)
7905 7955
7906static void free_rootdomain(struct root_domain *rd) 7956static void free_rootdomain(struct root_domain *rd)
7907{ 7957{
7958 synchronize_sched();
7959
7908 cpupri_cleanup(&rd->cpupri); 7960 cpupri_cleanup(&rd->cpupri);
7909 7961
7910 free_cpumask_var(rd->rto_mask); 7962 free_cpumask_var(rd->rto_mask);
@@ -8045,6 +8097,7 @@ static cpumask_var_t cpu_isolated_map;
8045/* Setup the mask of cpus configured for isolated domains */ 8097/* Setup the mask of cpus configured for isolated domains */
8046static int __init isolated_cpu_setup(char *str) 8098static int __init isolated_cpu_setup(char *str)
8047{ 8099{
8100 alloc_bootmem_cpumask_var(&cpu_isolated_map);
8048 cpulist_parse(str, cpu_isolated_map); 8101 cpulist_parse(str, cpu_isolated_map);
8049 return 1; 8102 return 1;
8050} 8103}
@@ -8881,7 +8934,7 @@ static int build_sched_domains(const struct cpumask *cpu_map)
8881 return __build_sched_domains(cpu_map, NULL); 8934 return __build_sched_domains(cpu_map, NULL);
8882} 8935}
8883 8936
8884static struct cpumask *doms_cur; /* current sched domains */ 8937static cpumask_var_t *doms_cur; /* current sched domains */
8885static int ndoms_cur; /* number of sched domains in 'doms_cur' */ 8938static int ndoms_cur; /* number of sched domains in 'doms_cur' */
8886static struct sched_domain_attr *dattr_cur; 8939static struct sched_domain_attr *dattr_cur;
8887 /* attribues of custom domains in 'doms_cur' */ 8940 /* attribues of custom domains in 'doms_cur' */
@@ -8903,6 +8956,31 @@ int __attribute__((weak)) arch_update_cpu_topology(void)
8903 return 0; 8956 return 0;
8904} 8957}
8905 8958
8959cpumask_var_t *alloc_sched_domains(unsigned int ndoms)
8960{
8961 int i;
8962 cpumask_var_t *doms;
8963
8964 doms = kmalloc(sizeof(*doms) * ndoms, GFP_KERNEL);
8965 if (!doms)
8966 return NULL;
8967 for (i = 0; i < ndoms; i++) {
8968 if (!alloc_cpumask_var(&doms[i], GFP_KERNEL)) {
8969 free_sched_domains(doms, i);
8970 return NULL;
8971 }
8972 }
8973 return doms;
8974}
8975
8976void free_sched_domains(cpumask_var_t doms[], unsigned int ndoms)
8977{
8978 unsigned int i;
8979 for (i = 0; i < ndoms; i++)
8980 free_cpumask_var(doms[i]);
8981 kfree(doms);
8982}
8983
8906/* 8984/*
8907 * Set up scheduler domains and groups. Callers must hold the hotplug lock. 8985 * Set up scheduler domains and groups. Callers must hold the hotplug lock.
8908 * For now this just excludes isolated cpus, but could be used to 8986 * For now this just excludes isolated cpus, but could be used to
@@ -8914,12 +8992,12 @@ static int arch_init_sched_domains(const struct cpumask *cpu_map)
8914 8992
8915 arch_update_cpu_topology(); 8993 arch_update_cpu_topology();
8916 ndoms_cur = 1; 8994 ndoms_cur = 1;
8917 doms_cur = kmalloc(cpumask_size(), GFP_KERNEL); 8995 doms_cur = alloc_sched_domains(ndoms_cur);
8918 if (!doms_cur) 8996 if (!doms_cur)
8919 doms_cur = fallback_doms; 8997 doms_cur = &fallback_doms;
8920 cpumask_andnot(doms_cur, cpu_map, cpu_isolated_map); 8998 cpumask_andnot(doms_cur[0], cpu_map, cpu_isolated_map);
8921 dattr_cur = NULL; 8999 dattr_cur = NULL;
8922 err = build_sched_domains(doms_cur); 9000 err = build_sched_domains(doms_cur[0]);
8923 register_sched_domain_sysctl(); 9001 register_sched_domain_sysctl();
8924 9002
8925 return err; 9003 return err;
@@ -8969,19 +9047,19 @@ static int dattrs_equal(struct sched_domain_attr *cur, int idx_cur,
8969 * doms_new[] to the current sched domain partitioning, doms_cur[]. 9047 * doms_new[] to the current sched domain partitioning, doms_cur[].
8970 * It destroys each deleted domain and builds each new domain. 9048 * It destroys each deleted domain and builds each new domain.
8971 * 9049 *
8972 * 'doms_new' is an array of cpumask's of length 'ndoms_new'. 9050 * 'doms_new' is an array of cpumask_var_t's of length 'ndoms_new'.
8973 * The masks don't intersect (don't overlap.) We should setup one 9051 * The masks don't intersect (don't overlap.) We should setup one
8974 * sched domain for each mask. CPUs not in any of the cpumasks will 9052 * sched domain for each mask. CPUs not in any of the cpumasks will
8975 * not be load balanced. If the same cpumask appears both in the 9053 * not be load balanced. If the same cpumask appears both in the
8976 * current 'doms_cur' domains and in the new 'doms_new', we can leave 9054 * current 'doms_cur' domains and in the new 'doms_new', we can leave
8977 * it as it is. 9055 * it as it is.
8978 * 9056 *
8979 * The passed in 'doms_new' should be kmalloc'd. This routine takes 9057 * The passed in 'doms_new' should be allocated using
8980 * ownership of it and will kfree it when done with it. If the caller 9058 * alloc_sched_domains. This routine takes ownership of it and will
8981 * failed the kmalloc call, then it can pass in doms_new == NULL && 9059 * free_sched_domains it when done with it. If the caller failed the
8982 * ndoms_new == 1, and partition_sched_domains() will fallback to 9060 * alloc call, then it can pass in doms_new == NULL && ndoms_new == 1,
8983 * the single partition 'fallback_doms', it also forces the domains 9061 * and partition_sched_domains() will fallback to the single partition
8984 * to be rebuilt. 9062 * 'fallback_doms', it also forces the domains to be rebuilt.
8985 * 9063 *
8986 * If doms_new == NULL it will be replaced with cpu_online_mask. 9064 * If doms_new == NULL it will be replaced with cpu_online_mask.
8987 * ndoms_new == 0 is a special case for destroying existing domains, 9065 * ndoms_new == 0 is a special case for destroying existing domains,
@@ -8989,8 +9067,7 @@ static int dattrs_equal(struct sched_domain_attr *cur, int idx_cur,
8989 * 9067 *
8990 * Call with hotplug lock held 9068 * Call with hotplug lock held
8991 */ 9069 */
8992/* FIXME: Change to struct cpumask *doms_new[] */ 9070void partition_sched_domains(int ndoms_new, cpumask_var_t doms_new[],
8993void partition_sched_domains(int ndoms_new, struct cpumask *doms_new,
8994 struct sched_domain_attr *dattr_new) 9071 struct sched_domain_attr *dattr_new)
8995{ 9072{
8996 int i, j, n; 9073 int i, j, n;
@@ -9009,40 +9086,40 @@ void partition_sched_domains(int ndoms_new, struct cpumask *doms_new,
9009 /* Destroy deleted domains */ 9086 /* Destroy deleted domains */
9010 for (i = 0; i < ndoms_cur; i++) { 9087 for (i = 0; i < ndoms_cur; i++) {
9011 for (j = 0; j < n && !new_topology; j++) { 9088 for (j = 0; j < n && !new_topology; j++) {
9012 if (cpumask_equal(&doms_cur[i], &doms_new[j]) 9089 if (cpumask_equal(doms_cur[i], doms_new[j])
9013 && dattrs_equal(dattr_cur, i, dattr_new, j)) 9090 && dattrs_equal(dattr_cur, i, dattr_new, j))
9014 goto match1; 9091 goto match1;
9015 } 9092 }
9016 /* no match - a current sched domain not in new doms_new[] */ 9093 /* no match - a current sched domain not in new doms_new[] */
9017 detach_destroy_domains(doms_cur + i); 9094 detach_destroy_domains(doms_cur[i]);
9018match1: 9095match1:
9019 ; 9096 ;
9020 } 9097 }
9021 9098
9022 if (doms_new == NULL) { 9099 if (doms_new == NULL) {
9023 ndoms_cur = 0; 9100 ndoms_cur = 0;
9024 doms_new = fallback_doms; 9101 doms_new = &fallback_doms;
9025 cpumask_andnot(&doms_new[0], cpu_online_mask, cpu_isolated_map); 9102 cpumask_andnot(doms_new[0], cpu_online_mask, cpu_isolated_map);
9026 WARN_ON_ONCE(dattr_new); 9103 WARN_ON_ONCE(dattr_new);
9027 } 9104 }
9028 9105
9029 /* Build new domains */ 9106 /* Build new domains */
9030 for (i = 0; i < ndoms_new; i++) { 9107 for (i = 0; i < ndoms_new; i++) {
9031 for (j = 0; j < ndoms_cur && !new_topology; j++) { 9108 for (j = 0; j < ndoms_cur && !new_topology; j++) {
9032 if (cpumask_equal(&doms_new[i], &doms_cur[j]) 9109 if (cpumask_equal(doms_new[i], doms_cur[j])
9033 && dattrs_equal(dattr_new, i, dattr_cur, j)) 9110 && dattrs_equal(dattr_new, i, dattr_cur, j))
9034 goto match2; 9111 goto match2;
9035 } 9112 }
9036 /* no match - add a new doms_new */ 9113 /* no match - add a new doms_new */
9037 __build_sched_domains(doms_new + i, 9114 __build_sched_domains(doms_new[i],
9038 dattr_new ? dattr_new + i : NULL); 9115 dattr_new ? dattr_new + i : NULL);
9039match2: 9116match2:
9040 ; 9117 ;
9041 } 9118 }
9042 9119
9043 /* Remember the new sched domains */ 9120 /* Remember the new sched domains */
9044 if (doms_cur != fallback_doms) 9121 if (doms_cur != &fallback_doms)
9045 kfree(doms_cur); 9122 free_sched_domains(doms_cur, ndoms_cur);
9046 kfree(dattr_cur); /* kfree(NULL) is safe */ 9123 kfree(dattr_cur); /* kfree(NULL) is safe */
9047 doms_cur = doms_new; 9124 doms_cur = doms_new;
9048 dattr_cur = dattr_new; 9125 dattr_cur = dattr_new;
@@ -9364,10 +9441,6 @@ void __init sched_init(void)
9364#ifdef CONFIG_CPUMASK_OFFSTACK 9441#ifdef CONFIG_CPUMASK_OFFSTACK
9365 alloc_size += num_possible_cpus() * cpumask_size(); 9442 alloc_size += num_possible_cpus() * cpumask_size();
9366#endif 9443#endif
9367 /*
9368 * As sched_init() is called before page_alloc is setup,
9369 * we use alloc_bootmem().
9370 */
9371 if (alloc_size) { 9444 if (alloc_size) {
9372 ptr = (unsigned long)kzalloc(alloc_size, GFP_NOWAIT); 9445 ptr = (unsigned long)kzalloc(alloc_size, GFP_NOWAIT);
9373 9446
@@ -9522,6 +9595,8 @@ void __init sched_init(void)
9522 rq->cpu = i; 9595 rq->cpu = i;
9523 rq->online = 0; 9596 rq->online = 0;
9524 rq->migration_thread = NULL; 9597 rq->migration_thread = NULL;
9598 rq->idle_stamp = 0;
9599 rq->avg_idle = 2*sysctl_sched_migration_cost;
9525 INIT_LIST_HEAD(&rq->migration_queue); 9600 INIT_LIST_HEAD(&rq->migration_queue);
9526 rq_attach_root(rq, &def_root_domain); 9601 rq_attach_root(rq, &def_root_domain);
9527#endif 9602#endif
@@ -9571,7 +9646,9 @@ void __init sched_init(void)
9571 zalloc_cpumask_var(&nohz.cpu_mask, GFP_NOWAIT); 9646 zalloc_cpumask_var(&nohz.cpu_mask, GFP_NOWAIT);
9572 alloc_cpumask_var(&nohz.ilb_grp_nohz_mask, GFP_NOWAIT); 9647 alloc_cpumask_var(&nohz.ilb_grp_nohz_mask, GFP_NOWAIT);
9573#endif 9648#endif
9574 zalloc_cpumask_var(&cpu_isolated_map, GFP_NOWAIT); 9649 /* May be allocated at isolcpus cmdline parse time */
9650 if (cpu_isolated_map == NULL)
9651 zalloc_cpumask_var(&cpu_isolated_map, GFP_NOWAIT);
9575#endif /* SMP */ 9652#endif /* SMP */
9576 9653
9577 perf_event_init(); 9654 perf_event_init();
diff --git a/kernel/sched_debug.c b/kernel/sched_debug.c
index efb84409bc43..6988cf08f705 100644
--- a/kernel/sched_debug.c
+++ b/kernel/sched_debug.c
@@ -285,12 +285,16 @@ static void print_cpu(struct seq_file *m, int cpu)
285 285
286#ifdef CONFIG_SCHEDSTATS 286#ifdef CONFIG_SCHEDSTATS
287#define P(n) SEQ_printf(m, " .%-30s: %d\n", #n, rq->n); 287#define P(n) SEQ_printf(m, " .%-30s: %d\n", #n, rq->n);
288#define P64(n) SEQ_printf(m, " .%-30s: %Ld\n", #n, rq->n);
288 289
289 P(yld_count); 290 P(yld_count);
290 291
291 P(sched_switch); 292 P(sched_switch);
292 P(sched_count); 293 P(sched_count);
293 P(sched_goidle); 294 P(sched_goidle);
295#ifdef CONFIG_SMP
296 P64(avg_idle);
297#endif
294 298
295 P(ttwu_count); 299 P(ttwu_count);
296 P(ttwu_local); 300 P(ttwu_local);
diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c
index 37087a7fac22..f61837ad336d 100644
--- a/kernel/sched_fair.c
+++ b/kernel/sched_fair.c
@@ -1345,6 +1345,37 @@ find_idlest_cpu(struct sched_group *group, struct task_struct *p, int this_cpu)
1345} 1345}
1346 1346
1347/* 1347/*
1348 * Try and locate an idle CPU in the sched_domain.
1349 */
1350static int
1351select_idle_sibling(struct task_struct *p, struct sched_domain *sd, int target)
1352{
1353 int cpu = smp_processor_id();
1354 int prev_cpu = task_cpu(p);
1355 int i;
1356
1357 /*
1358 * If this domain spans both cpu and prev_cpu (see the SD_WAKE_AFFINE
1359 * test in select_task_rq_fair) and the prev_cpu is idle then that's
1360 * always a better target than the current cpu.
1361 */
1362 if (target == cpu && !cpu_rq(prev_cpu)->cfs.nr_running)
1363 return prev_cpu;
1364
1365 /*
1366 * Otherwise, iterate the domain and find an elegible idle cpu.
1367 */
1368 for_each_cpu_and(i, sched_domain_span(sd), &p->cpus_allowed) {
1369 if (!cpu_rq(i)->cfs.nr_running) {
1370 target = i;
1371 break;
1372 }
1373 }
1374
1375 return target;
1376}
1377
1378/*
1348 * sched_balance_self: balance the current task (running on cpu) in domains 1379 * sched_balance_self: balance the current task (running on cpu) in domains
1349 * that have the 'flag' flag set. In practice, this is SD_BALANCE_FORK and 1380 * that have the 'flag' flag set. In practice, this is SD_BALANCE_FORK and
1350 * SD_BALANCE_EXEC. 1381 * SD_BALANCE_EXEC.
@@ -1398,11 +1429,35 @@ static int select_task_rq_fair(struct task_struct *p, int sd_flag, int wake_flag
1398 want_sd = 0; 1429 want_sd = 0;
1399 } 1430 }
1400 1431
1401 if (want_affine && (tmp->flags & SD_WAKE_AFFINE) && 1432 /*
1402 cpumask_test_cpu(prev_cpu, sched_domain_span(tmp))) { 1433 * While iterating the domains looking for a spanning
1434 * WAKE_AFFINE domain, adjust the affine target to any idle cpu
1435 * in cache sharing domains along the way.
1436 */
1437 if (want_affine) {
1438 int target = -1;
1403 1439
1404 affine_sd = tmp; 1440 /*
1405 want_affine = 0; 1441 * If both cpu and prev_cpu are part of this domain,
1442 * cpu is a valid SD_WAKE_AFFINE target.
1443 */
1444 if (cpumask_test_cpu(prev_cpu, sched_domain_span(tmp)))
1445 target = cpu;
1446
1447 /*
1448 * If there's an idle sibling in this domain, make that
1449 * the wake_affine target instead of the current cpu.
1450 */
1451 if (tmp->flags & SD_PREFER_SIBLING)
1452 target = select_idle_sibling(p, tmp, target);
1453
1454 if (target >= 0) {
1455 if (tmp->flags & SD_WAKE_AFFINE) {
1456 affine_sd = tmp;
1457 want_affine = 0;
1458 }
1459 cpu = target;
1460 }
1406 } 1461 }
1407 1462
1408 if (!want_sd && !want_affine) 1463 if (!want_sd && !want_affine)
@@ -1679,7 +1734,7 @@ static struct task_struct *pick_next_task_fair(struct rq *rq)
1679 struct cfs_rq *cfs_rq = &rq->cfs; 1734 struct cfs_rq *cfs_rq = &rq->cfs;
1680 struct sched_entity *se; 1735 struct sched_entity *se;
1681 1736
1682 if (unlikely(!cfs_rq->nr_running)) 1737 if (!cfs_rq->nr_running)
1683 return NULL; 1738 return NULL;
1684 1739
1685 do { 1740 do {
diff --git a/kernel/sched_rt.c b/kernel/sched_rt.c
index a4d790cddb19..5c5fef378415 100644
--- a/kernel/sched_rt.c
+++ b/kernel/sched_rt.c
@@ -1153,29 +1153,12 @@ static struct task_struct *pick_next_highest_task_rt(struct rq *rq, int cpu)
1153 1153
1154static DEFINE_PER_CPU(cpumask_var_t, local_cpu_mask); 1154static DEFINE_PER_CPU(cpumask_var_t, local_cpu_mask);
1155 1155
1156static inline int pick_optimal_cpu(int this_cpu,
1157 const struct cpumask *mask)
1158{
1159 int first;
1160
1161 /* "this_cpu" is cheaper to preempt than a remote processor */
1162 if ((this_cpu != -1) && cpumask_test_cpu(this_cpu, mask))
1163 return this_cpu;
1164
1165 first = cpumask_first(mask);
1166 if (first < nr_cpu_ids)
1167 return first;
1168
1169 return -1;
1170}
1171
1172static int find_lowest_rq(struct task_struct *task) 1156static int find_lowest_rq(struct task_struct *task)
1173{ 1157{
1174 struct sched_domain *sd; 1158 struct sched_domain *sd;
1175 struct cpumask *lowest_mask = __get_cpu_var(local_cpu_mask); 1159 struct cpumask *lowest_mask = __get_cpu_var(local_cpu_mask);
1176 int this_cpu = smp_processor_id(); 1160 int this_cpu = smp_processor_id();
1177 int cpu = task_cpu(task); 1161 int cpu = task_cpu(task);
1178 cpumask_var_t domain_mask;
1179 1162
1180 if (task->rt.nr_cpus_allowed == 1) 1163 if (task->rt.nr_cpus_allowed == 1)
1181 return -1; /* No other targets possible */ 1164 return -1; /* No other targets possible */
@@ -1198,28 +1181,26 @@ static int find_lowest_rq(struct task_struct *task)
1198 * Otherwise, we consult the sched_domains span maps to figure 1181 * Otherwise, we consult the sched_domains span maps to figure
1199 * out which cpu is logically closest to our hot cache data. 1182 * out which cpu is logically closest to our hot cache data.
1200 */ 1183 */
1201 if (this_cpu == cpu) 1184 if (!cpumask_test_cpu(this_cpu, lowest_mask))
1202 this_cpu = -1; /* Skip this_cpu opt if the same */ 1185 this_cpu = -1; /* Skip this_cpu opt if not among lowest */
1203
1204 if (alloc_cpumask_var(&domain_mask, GFP_ATOMIC)) {
1205 for_each_domain(cpu, sd) {
1206 if (sd->flags & SD_WAKE_AFFINE) {
1207 int best_cpu;
1208 1186
1209 cpumask_and(domain_mask, 1187 for_each_domain(cpu, sd) {
1210 sched_domain_span(sd), 1188 if (sd->flags & SD_WAKE_AFFINE) {
1211 lowest_mask); 1189 int best_cpu;
1212 1190
1213 best_cpu = pick_optimal_cpu(this_cpu, 1191 /*
1214 domain_mask); 1192 * "this_cpu" is cheaper to preempt than a
1215 1193 * remote processor.
1216 if (best_cpu != -1) { 1194 */
1217 free_cpumask_var(domain_mask); 1195 if (this_cpu != -1 &&
1218 return best_cpu; 1196 cpumask_test_cpu(this_cpu, sched_domain_span(sd)))
1219 } 1197 return this_cpu;
1220 } 1198
1199 best_cpu = cpumask_first_and(lowest_mask,
1200 sched_domain_span(sd));
1201 if (best_cpu < nr_cpu_ids)
1202 return best_cpu;
1221 } 1203 }
1222 free_cpumask_var(domain_mask);
1223 } 1204 }
1224 1205
1225 /* 1206 /*
@@ -1227,7 +1208,13 @@ static int find_lowest_rq(struct task_struct *task)
1227 * just give the caller *something* to work with from the compatible 1208 * just give the caller *something* to work with from the compatible
1228 * locations. 1209 * locations.
1229 */ 1210 */
1230 return pick_optimal_cpu(this_cpu, lowest_mask); 1211 if (this_cpu != -1)
1212 return this_cpu;
1213
1214 cpu = cpumask_any(lowest_mask);
1215 if (cpu < nr_cpu_ids)
1216 return cpu;
1217 return -1;
1231} 1218}
1232 1219
1233/* Will lock the rq it finds */ 1220/* Will lock the rq it finds */
diff --git a/kernel/signal.c b/kernel/signal.c
index fe08008133da..6b982f2cf524 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -28,7 +28,8 @@
28#include <linux/freezer.h> 28#include <linux/freezer.h>
29#include <linux/pid_namespace.h> 29#include <linux/pid_namespace.h>
30#include <linux/nsproxy.h> 30#include <linux/nsproxy.h>
31#include <trace/events/sched.h> 31#define CREATE_TRACE_POINTS
32#include <trace/events/signal.h>
32 33
33#include <asm/param.h> 34#include <asm/param.h>
34#include <asm/uaccess.h> 35#include <asm/uaccess.h>
@@ -856,7 +857,7 @@ static int __send_signal(int sig, struct siginfo *info, struct task_struct *t,
856 struct sigqueue *q; 857 struct sigqueue *q;
857 int override_rlimit; 858 int override_rlimit;
858 859
859 trace_sched_signal_send(sig, t); 860 trace_signal_generate(sig, info, t);
860 861
861 assert_spin_locked(&t->sighand->siglock); 862 assert_spin_locked(&t->sighand->siglock);
862 863
@@ -918,12 +919,21 @@ static int __send_signal(int sig, struct siginfo *info, struct task_struct *t,
918 break; 919 break;
919 } 920 }
920 } else if (!is_si_special(info)) { 921 } else if (!is_si_special(info)) {
921 if (sig >= SIGRTMIN && info->si_code != SI_USER) 922 if (sig >= SIGRTMIN && info->si_code != SI_USER) {
922 /* 923 /*
923 * Queue overflow, abort. We may abort if the signal was rt 924 * Queue overflow, abort. We may abort if the
924 * and sent by user using something other than kill(). 925 * signal was rt and sent by user using something
925 */ 926 * other than kill().
927 */
928 trace_signal_overflow_fail(sig, group, info);
926 return -EAGAIN; 929 return -EAGAIN;
930 } else {
931 /*
932 * This is a silent loss of information. We still
933 * send the signal, but the *info bits are lost.
934 */
935 trace_signal_lose_info(sig, group, info);
936 }
927 } 937 }
928 938
929out_set: 939out_set:
@@ -1859,6 +1869,9 @@ relock:
1859 ka = &sighand->action[signr-1]; 1869 ka = &sighand->action[signr-1];
1860 } 1870 }
1861 1871
1872 /* Trace actually delivered signals. */
1873 trace_signal_deliver(signr, info, ka);
1874
1862 if (ka->sa.sa_handler == SIG_IGN) /* Do nothing. */ 1875 if (ka->sa.sa_handler == SIG_IGN) /* Do nothing. */
1863 continue; 1876 continue;
1864 if (ka->sa.sa_handler != SIG_DFL) { 1877 if (ka->sa.sa_handler != SIG_DFL) {
diff --git a/kernel/slow-work.c b/kernel/slow-work.c
index 00889bd3c590..7494bbf5a270 100644
--- a/kernel/slow-work.c
+++ b/kernel/slow-work.c
@@ -49,7 +49,6 @@ static const int slow_work_max_vslow = 99;
49 49
50ctl_table slow_work_sysctls[] = { 50ctl_table slow_work_sysctls[] = {
51 { 51 {
52 .ctl_name = CTL_UNNUMBERED,
53 .procname = "min-threads", 52 .procname = "min-threads",
54 .data = &slow_work_min_threads, 53 .data = &slow_work_min_threads,
55 .maxlen = sizeof(unsigned), 54 .maxlen = sizeof(unsigned),
@@ -59,7 +58,6 @@ ctl_table slow_work_sysctls[] = {
59 .extra2 = &slow_work_max_threads, 58 .extra2 = &slow_work_max_threads,
60 }, 59 },
61 { 60 {
62 .ctl_name = CTL_UNNUMBERED,
63 .procname = "max-threads", 61 .procname = "max-threads",
64 .data = &slow_work_max_threads, 62 .data = &slow_work_max_threads,
65 .maxlen = sizeof(unsigned), 63 .maxlen = sizeof(unsigned),
@@ -69,16 +67,15 @@ ctl_table slow_work_sysctls[] = {
69 .extra2 = (void *) &slow_work_max_max_threads, 67 .extra2 = (void *) &slow_work_max_max_threads,
70 }, 68 },
71 { 69 {
72 .ctl_name = CTL_UNNUMBERED,
73 .procname = "vslow-percentage", 70 .procname = "vslow-percentage",
74 .data = &vslow_work_proportion, 71 .data = &vslow_work_proportion,
75 .maxlen = sizeof(unsigned), 72 .maxlen = sizeof(unsigned),
76 .mode = 0644, 73 .mode = 0644,
77 .proc_handler = &proc_dointvec_minmax, 74 .proc_handler = proc_dointvec_minmax,
78 .extra1 = (void *) &slow_work_min_vslow, 75 .extra1 = (void *) &slow_work_min_vslow,
79 .extra2 = (void *) &slow_work_max_vslow, 76 .extra2 = (void *) &slow_work_max_vslow,
80 }, 77 },
81 { .ctl_name = 0 } 78 {}
82}; 79};
83#endif 80#endif
84 81
diff --git a/kernel/sys.c b/kernel/sys.c
index ce17760d9c51..9968c5fb55b9 100644
--- a/kernel/sys.c
+++ b/kernel/sys.c
@@ -911,16 +911,15 @@ change_okay:
911 911
912void do_sys_times(struct tms *tms) 912void do_sys_times(struct tms *tms)
913{ 913{
914 struct task_cputime cputime; 914 cputime_t tgutime, tgstime, cutime, cstime;
915 cputime_t cutime, cstime;
916 915
917 thread_group_cputime(current, &cputime);
918 spin_lock_irq(&current->sighand->siglock); 916 spin_lock_irq(&current->sighand->siglock);
917 thread_group_times(current, &tgutime, &tgstime);
919 cutime = current->signal->cutime; 918 cutime = current->signal->cutime;
920 cstime = current->signal->cstime; 919 cstime = current->signal->cstime;
921 spin_unlock_irq(&current->sighand->siglock); 920 spin_unlock_irq(&current->sighand->siglock);
922 tms->tms_utime = cputime_to_clock_t(cputime.utime); 921 tms->tms_utime = cputime_to_clock_t(tgutime);
923 tms->tms_stime = cputime_to_clock_t(cputime.stime); 922 tms->tms_stime = cputime_to_clock_t(tgstime);
924 tms->tms_cutime = cputime_to_clock_t(cutime); 923 tms->tms_cutime = cputime_to_clock_t(cutime);
925 tms->tms_cstime = cputime_to_clock_t(cstime); 924 tms->tms_cstime = cputime_to_clock_t(cstime);
926} 925}
@@ -1338,16 +1337,14 @@ static void k_getrusage(struct task_struct *p, int who, struct rusage *r)
1338{ 1337{
1339 struct task_struct *t; 1338 struct task_struct *t;
1340 unsigned long flags; 1339 unsigned long flags;
1341 cputime_t utime, stime; 1340 cputime_t tgutime, tgstime, utime, stime;
1342 struct task_cputime cputime;
1343 unsigned long maxrss = 0; 1341 unsigned long maxrss = 0;
1344 1342
1345 memset((char *) r, 0, sizeof *r); 1343 memset((char *) r, 0, sizeof *r);
1346 utime = stime = cputime_zero; 1344 utime = stime = cputime_zero;
1347 1345
1348 if (who == RUSAGE_THREAD) { 1346 if (who == RUSAGE_THREAD) {
1349 utime = task_utime(current); 1347 task_times(current, &utime, &stime);
1350 stime = task_stime(current);
1351 accumulate_thread_rusage(p, r); 1348 accumulate_thread_rusage(p, r);
1352 maxrss = p->signal->maxrss; 1349 maxrss = p->signal->maxrss;
1353 goto out; 1350 goto out;
@@ -1373,9 +1370,9 @@ static void k_getrusage(struct task_struct *p, int who, struct rusage *r)
1373 break; 1370 break;
1374 1371
1375 case RUSAGE_SELF: 1372 case RUSAGE_SELF:
1376 thread_group_cputime(p, &cputime); 1373 thread_group_times(p, &tgutime, &tgstime);
1377 utime = cputime_add(utime, cputime.utime); 1374 utime = cputime_add(utime, tgutime);
1378 stime = cputime_add(stime, cputime.stime); 1375 stime = cputime_add(stime, tgstime);
1379 r->ru_nvcsw += p->signal->nvcsw; 1376 r->ru_nvcsw += p->signal->nvcsw;
1380 r->ru_nivcsw += p->signal->nivcsw; 1377 r->ru_nivcsw += p->signal->nivcsw;
1381 r->ru_minflt += p->signal->min_flt; 1378 r->ru_minflt += p->signal->min_flt;
diff --git a/kernel/sys_ni.c b/kernel/sys_ni.c
index f050ba85d420..695384f12a7d 100644
--- a/kernel/sys_ni.c
+++ b/kernel/sys_ni.c
@@ -141,7 +141,6 @@ cond_syscall(sys_pciconfig_read);
141cond_syscall(sys_pciconfig_write); 141cond_syscall(sys_pciconfig_write);
142cond_syscall(sys_pciconfig_iobase); 142cond_syscall(sys_pciconfig_iobase);
143cond_syscall(sys32_ipc); 143cond_syscall(sys32_ipc);
144cond_syscall(sys32_sysctl);
145cond_syscall(ppc_rtas); 144cond_syscall(ppc_rtas);
146cond_syscall(sys_spu_run); 145cond_syscall(sys_spu_run);
147cond_syscall(sys_spu_create); 146cond_syscall(sys_spu_create);
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 4dbf93a52ee9..9327a26765c5 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -27,7 +27,6 @@
27#include <linux/security.h> 27#include <linux/security.h>
28#include <linux/ctype.h> 28#include <linux/ctype.h>
29#include <linux/kmemcheck.h> 29#include <linux/kmemcheck.h>
30#include <linux/smp_lock.h>
31#include <linux/fs.h> 30#include <linux/fs.h>
32#include <linux/init.h> 31#include <linux/init.h>
33#include <linux/kernel.h> 32#include <linux/kernel.h>
@@ -61,7 +60,6 @@
61#include <asm/io.h> 60#include <asm/io.h>
62#endif 61#endif
63 62
64static int deprecated_sysctl_warning(struct __sysctl_args *args);
65 63
66#if defined(CONFIG_SYSCTL) 64#if defined(CONFIG_SYSCTL)
67 65
@@ -210,31 +208,26 @@ extern int lock_stat;
210 208
211static struct ctl_table root_table[] = { 209static struct ctl_table root_table[] = {
212 { 210 {
213 .ctl_name = CTL_KERN,
214 .procname = "kernel", 211 .procname = "kernel",
215 .mode = 0555, 212 .mode = 0555,
216 .child = kern_table, 213 .child = kern_table,
217 }, 214 },
218 { 215 {
219 .ctl_name = CTL_VM,
220 .procname = "vm", 216 .procname = "vm",
221 .mode = 0555, 217 .mode = 0555,
222 .child = vm_table, 218 .child = vm_table,
223 }, 219 },
224 { 220 {
225 .ctl_name = CTL_FS,
226 .procname = "fs", 221 .procname = "fs",
227 .mode = 0555, 222 .mode = 0555,
228 .child = fs_table, 223 .child = fs_table,
229 }, 224 },
230 { 225 {
231 .ctl_name = CTL_DEBUG,
232 .procname = "debug", 226 .procname = "debug",
233 .mode = 0555, 227 .mode = 0555,
234 .child = debug_table, 228 .child = debug_table,
235 }, 229 },
236 { 230 {
237 .ctl_name = CTL_DEV,
238 .procname = "dev", 231 .procname = "dev",
239 .mode = 0555, 232 .mode = 0555,
240 .child = dev_table, 233 .child = dev_table,
@@ -243,7 +236,7 @@ static struct ctl_table root_table[] = {
243 * NOTE: do not add new entries to this table unless you have read 236 * NOTE: do not add new entries to this table unless you have read
244 * Documentation/sysctl/ctl_unnumbered.txt 237 * Documentation/sysctl/ctl_unnumbered.txt
245 */ 238 */
246 { .ctl_name = 0 } 239 { }
247}; 240};
248 241
249#ifdef CONFIG_SCHED_DEBUG 242#ifdef CONFIG_SCHED_DEBUG
@@ -255,192 +248,166 @@ static int max_wakeup_granularity_ns = NSEC_PER_SEC; /* 1 second */
255 248
256static struct ctl_table kern_table[] = { 249static struct ctl_table kern_table[] = {
257 { 250 {
258 .ctl_name = CTL_UNNUMBERED,
259 .procname = "sched_child_runs_first", 251 .procname = "sched_child_runs_first",
260 .data = &sysctl_sched_child_runs_first, 252 .data = &sysctl_sched_child_runs_first,
261 .maxlen = sizeof(unsigned int), 253 .maxlen = sizeof(unsigned int),
262 .mode = 0644, 254 .mode = 0644,
263 .proc_handler = &proc_dointvec, 255 .proc_handler = proc_dointvec,
264 }, 256 },
265#ifdef CONFIG_SCHED_DEBUG 257#ifdef CONFIG_SCHED_DEBUG
266 { 258 {
267 .ctl_name = CTL_UNNUMBERED,
268 .procname = "sched_min_granularity_ns", 259 .procname = "sched_min_granularity_ns",
269 .data = &sysctl_sched_min_granularity, 260 .data = &sysctl_sched_min_granularity,
270 .maxlen = sizeof(unsigned int), 261 .maxlen = sizeof(unsigned int),
271 .mode = 0644, 262 .mode = 0644,
272 .proc_handler = &sched_nr_latency_handler, 263 .proc_handler = sched_nr_latency_handler,
273 .strategy = &sysctl_intvec,
274 .extra1 = &min_sched_granularity_ns, 264 .extra1 = &min_sched_granularity_ns,
275 .extra2 = &max_sched_granularity_ns, 265 .extra2 = &max_sched_granularity_ns,
276 }, 266 },
277 { 267 {
278 .ctl_name = CTL_UNNUMBERED,
279 .procname = "sched_latency_ns", 268 .procname = "sched_latency_ns",
280 .data = &sysctl_sched_latency, 269 .data = &sysctl_sched_latency,
281 .maxlen = sizeof(unsigned int), 270 .maxlen = sizeof(unsigned int),
282 .mode = 0644, 271 .mode = 0644,
283 .proc_handler = &sched_nr_latency_handler, 272 .proc_handler = sched_nr_latency_handler,
284 .strategy = &sysctl_intvec,
285 .extra1 = &min_sched_granularity_ns, 273 .extra1 = &min_sched_granularity_ns,
286 .extra2 = &max_sched_granularity_ns, 274 .extra2 = &max_sched_granularity_ns,
287 }, 275 },
288 { 276 {
289 .ctl_name = CTL_UNNUMBERED,
290 .procname = "sched_wakeup_granularity_ns", 277 .procname = "sched_wakeup_granularity_ns",
291 .data = &sysctl_sched_wakeup_granularity, 278 .data = &sysctl_sched_wakeup_granularity,
292 .maxlen = sizeof(unsigned int), 279 .maxlen = sizeof(unsigned int),
293 .mode = 0644, 280 .mode = 0644,
294 .proc_handler = &proc_dointvec_minmax, 281 .proc_handler = proc_dointvec_minmax,
295 .strategy = &sysctl_intvec,
296 .extra1 = &min_wakeup_granularity_ns, 282 .extra1 = &min_wakeup_granularity_ns,
297 .extra2 = &max_wakeup_granularity_ns, 283 .extra2 = &max_wakeup_granularity_ns,
298 }, 284 },
299 { 285 {
300 .ctl_name = CTL_UNNUMBERED,
301 .procname = "sched_shares_ratelimit", 286 .procname = "sched_shares_ratelimit",
302 .data = &sysctl_sched_shares_ratelimit, 287 .data = &sysctl_sched_shares_ratelimit,
303 .maxlen = sizeof(unsigned int), 288 .maxlen = sizeof(unsigned int),
304 .mode = 0644, 289 .mode = 0644,
305 .proc_handler = &proc_dointvec, 290 .proc_handler = proc_dointvec,
306 }, 291 },
307 { 292 {
308 .ctl_name = CTL_UNNUMBERED,
309 .procname = "sched_shares_thresh", 293 .procname = "sched_shares_thresh",
310 .data = &sysctl_sched_shares_thresh, 294 .data = &sysctl_sched_shares_thresh,
311 .maxlen = sizeof(unsigned int), 295 .maxlen = sizeof(unsigned int),
312 .mode = 0644, 296 .mode = 0644,
313 .proc_handler = &proc_dointvec_minmax, 297 .proc_handler = proc_dointvec_minmax,
314 .strategy = &sysctl_intvec,
315 .extra1 = &zero, 298 .extra1 = &zero,
316 }, 299 },
317 { 300 {
318 .ctl_name = CTL_UNNUMBERED,
319 .procname = "sched_features", 301 .procname = "sched_features",
320 .data = &sysctl_sched_features, 302 .data = &sysctl_sched_features,
321 .maxlen = sizeof(unsigned int), 303 .maxlen = sizeof(unsigned int),
322 .mode = 0644, 304 .mode = 0644,
323 .proc_handler = &proc_dointvec, 305 .proc_handler = proc_dointvec,
324 }, 306 },
325 { 307 {
326 .ctl_name = CTL_UNNUMBERED,
327 .procname = "sched_migration_cost", 308 .procname = "sched_migration_cost",
328 .data = &sysctl_sched_migration_cost, 309 .data = &sysctl_sched_migration_cost,
329 .maxlen = sizeof(unsigned int), 310 .maxlen = sizeof(unsigned int),
330 .mode = 0644, 311 .mode = 0644,
331 .proc_handler = &proc_dointvec, 312 .proc_handler = proc_dointvec,
332 }, 313 },
333 { 314 {
334 .ctl_name = CTL_UNNUMBERED,
335 .procname = "sched_nr_migrate", 315 .procname = "sched_nr_migrate",
336 .data = &sysctl_sched_nr_migrate, 316 .data = &sysctl_sched_nr_migrate,
337 .maxlen = sizeof(unsigned int), 317 .maxlen = sizeof(unsigned int),
338 .mode = 0644, 318 .mode = 0644,
339 .proc_handler = &proc_dointvec, 319 .proc_handler = proc_dointvec,
340 }, 320 },
341 { 321 {
342 .ctl_name = CTL_UNNUMBERED,
343 .procname = "sched_time_avg", 322 .procname = "sched_time_avg",
344 .data = &sysctl_sched_time_avg, 323 .data = &sysctl_sched_time_avg,
345 .maxlen = sizeof(unsigned int), 324 .maxlen = sizeof(unsigned int),
346 .mode = 0644, 325 .mode = 0644,
347 .proc_handler = &proc_dointvec, 326 .proc_handler = proc_dointvec,
348 }, 327 },
349 { 328 {
350 .ctl_name = CTL_UNNUMBERED,
351 .procname = "timer_migration", 329 .procname = "timer_migration",
352 .data = &sysctl_timer_migration, 330 .data = &sysctl_timer_migration,
353 .maxlen = sizeof(unsigned int), 331 .maxlen = sizeof(unsigned int),
354 .mode = 0644, 332 .mode = 0644,
355 .proc_handler = &proc_dointvec_minmax, 333 .proc_handler = proc_dointvec_minmax,
356 .strategy = &sysctl_intvec,
357 .extra1 = &zero, 334 .extra1 = &zero,
358 .extra2 = &one, 335 .extra2 = &one,
359 }, 336 },
360#endif 337#endif
361 { 338 {
362 .ctl_name = CTL_UNNUMBERED,
363 .procname = "sched_rt_period_us", 339 .procname = "sched_rt_period_us",
364 .data = &sysctl_sched_rt_period, 340 .data = &sysctl_sched_rt_period,
365 .maxlen = sizeof(unsigned int), 341 .maxlen = sizeof(unsigned int),
366 .mode = 0644, 342 .mode = 0644,
367 .proc_handler = &sched_rt_handler, 343 .proc_handler = sched_rt_handler,
368 }, 344 },
369 { 345 {
370 .ctl_name = CTL_UNNUMBERED,
371 .procname = "sched_rt_runtime_us", 346 .procname = "sched_rt_runtime_us",
372 .data = &sysctl_sched_rt_runtime, 347 .data = &sysctl_sched_rt_runtime,
373 .maxlen = sizeof(int), 348 .maxlen = sizeof(int),
374 .mode = 0644, 349 .mode = 0644,
375 .proc_handler = &sched_rt_handler, 350 .proc_handler = sched_rt_handler,
376 }, 351 },
377 { 352 {
378 .ctl_name = CTL_UNNUMBERED,
379 .procname = "sched_compat_yield", 353 .procname = "sched_compat_yield",
380 .data = &sysctl_sched_compat_yield, 354 .data = &sysctl_sched_compat_yield,
381 .maxlen = sizeof(unsigned int), 355 .maxlen = sizeof(unsigned int),
382 .mode = 0644, 356 .mode = 0644,
383 .proc_handler = &proc_dointvec, 357 .proc_handler = proc_dointvec,
384 }, 358 },
385#ifdef CONFIG_PROVE_LOCKING 359#ifdef CONFIG_PROVE_LOCKING
386 { 360 {
387 .ctl_name = CTL_UNNUMBERED,
388 .procname = "prove_locking", 361 .procname = "prove_locking",
389 .data = &prove_locking, 362 .data = &prove_locking,
390 .maxlen = sizeof(int), 363 .maxlen = sizeof(int),
391 .mode = 0644, 364 .mode = 0644,
392 .proc_handler = &proc_dointvec, 365 .proc_handler = proc_dointvec,
393 }, 366 },
394#endif 367#endif
395#ifdef CONFIG_LOCK_STAT 368#ifdef CONFIG_LOCK_STAT
396 { 369 {
397 .ctl_name = CTL_UNNUMBERED,
398 .procname = "lock_stat", 370 .procname = "lock_stat",
399 .data = &lock_stat, 371 .data = &lock_stat,
400 .maxlen = sizeof(int), 372 .maxlen = sizeof(int),
401 .mode = 0644, 373 .mode = 0644,
402 .proc_handler = &proc_dointvec, 374 .proc_handler = proc_dointvec,
403 }, 375 },
404#endif 376#endif
405 { 377 {
406 .ctl_name = KERN_PANIC,
407 .procname = "panic", 378 .procname = "panic",
408 .data = &panic_timeout, 379 .data = &panic_timeout,
409 .maxlen = sizeof(int), 380 .maxlen = sizeof(int),
410 .mode = 0644, 381 .mode = 0644,
411 .proc_handler = &proc_dointvec, 382 .proc_handler = proc_dointvec,
412 }, 383 },
413 { 384 {
414 .ctl_name = KERN_CORE_USES_PID,
415 .procname = "core_uses_pid", 385 .procname = "core_uses_pid",
416 .data = &core_uses_pid, 386 .data = &core_uses_pid,
417 .maxlen = sizeof(int), 387 .maxlen = sizeof(int),
418 .mode = 0644, 388 .mode = 0644,
419 .proc_handler = &proc_dointvec, 389 .proc_handler = proc_dointvec,
420 }, 390 },
421 { 391 {
422 .ctl_name = KERN_CORE_PATTERN,
423 .procname = "core_pattern", 392 .procname = "core_pattern",
424 .data = core_pattern, 393 .data = core_pattern,
425 .maxlen = CORENAME_MAX_SIZE, 394 .maxlen = CORENAME_MAX_SIZE,
426 .mode = 0644, 395 .mode = 0644,
427 .proc_handler = &proc_dostring, 396 .proc_handler = proc_dostring,
428 .strategy = &sysctl_string,
429 }, 397 },
430 { 398 {
431 .ctl_name = CTL_UNNUMBERED,
432 .procname = "core_pipe_limit", 399 .procname = "core_pipe_limit",
433 .data = &core_pipe_limit, 400 .data = &core_pipe_limit,
434 .maxlen = sizeof(unsigned int), 401 .maxlen = sizeof(unsigned int),
435 .mode = 0644, 402 .mode = 0644,
436 .proc_handler = &proc_dointvec, 403 .proc_handler = proc_dointvec,
437 }, 404 },
438#ifdef CONFIG_PROC_SYSCTL 405#ifdef CONFIG_PROC_SYSCTL
439 { 406 {
440 .procname = "tainted", 407 .procname = "tainted",
441 .maxlen = sizeof(long), 408 .maxlen = sizeof(long),
442 .mode = 0644, 409 .mode = 0644,
443 .proc_handler = &proc_taint, 410 .proc_handler = proc_taint,
444 }, 411 },
445#endif 412#endif
446#ifdef CONFIG_LATENCYTOP 413#ifdef CONFIG_LATENCYTOP
@@ -449,181 +416,160 @@ static struct ctl_table kern_table[] = {
449 .data = &latencytop_enabled, 416 .data = &latencytop_enabled,
450 .maxlen = sizeof(int), 417 .maxlen = sizeof(int),
451 .mode = 0644, 418 .mode = 0644,
452 .proc_handler = &proc_dointvec, 419 .proc_handler = proc_dointvec,
453 }, 420 },
454#endif 421#endif
455#ifdef CONFIG_BLK_DEV_INITRD 422#ifdef CONFIG_BLK_DEV_INITRD
456 { 423 {
457 .ctl_name = KERN_REALROOTDEV,
458 .procname = "real-root-dev", 424 .procname = "real-root-dev",
459 .data = &real_root_dev, 425 .data = &real_root_dev,
460 .maxlen = sizeof(int), 426 .maxlen = sizeof(int),
461 .mode = 0644, 427 .mode = 0644,
462 .proc_handler = &proc_dointvec, 428 .proc_handler = proc_dointvec,
463 }, 429 },
464#endif 430#endif
465 { 431 {
466 .ctl_name = CTL_UNNUMBERED,
467 .procname = "print-fatal-signals", 432 .procname = "print-fatal-signals",
468 .data = &print_fatal_signals, 433 .data = &print_fatal_signals,
469 .maxlen = sizeof(int), 434 .maxlen = sizeof(int),
470 .mode = 0644, 435 .mode = 0644,
471 .proc_handler = &proc_dointvec, 436 .proc_handler = proc_dointvec,
472 }, 437 },
473#ifdef CONFIG_SPARC 438#ifdef CONFIG_SPARC
474 { 439 {
475 .ctl_name = KERN_SPARC_REBOOT,
476 .procname = "reboot-cmd", 440 .procname = "reboot-cmd",
477 .data = reboot_command, 441 .data = reboot_command,
478 .maxlen = 256, 442 .maxlen = 256,
479 .mode = 0644, 443 .mode = 0644,
480 .proc_handler = &proc_dostring, 444 .proc_handler = proc_dostring,
481 .strategy = &sysctl_string,
482 }, 445 },
483 { 446 {
484 .ctl_name = KERN_SPARC_STOP_A,
485 .procname = "stop-a", 447 .procname = "stop-a",
486 .data = &stop_a_enabled, 448 .data = &stop_a_enabled,
487 .maxlen = sizeof (int), 449 .maxlen = sizeof (int),
488 .mode = 0644, 450 .mode = 0644,
489 .proc_handler = &proc_dointvec, 451 .proc_handler = proc_dointvec,
490 }, 452 },
491 { 453 {
492 .ctl_name = KERN_SPARC_SCONS_PWROFF,
493 .procname = "scons-poweroff", 454 .procname = "scons-poweroff",
494 .data = &scons_pwroff, 455 .data = &scons_pwroff,
495 .maxlen = sizeof (int), 456 .maxlen = sizeof (int),
496 .mode = 0644, 457 .mode = 0644,
497 .proc_handler = &proc_dointvec, 458 .proc_handler = proc_dointvec,
498 }, 459 },
499#endif 460#endif
500#ifdef CONFIG_SPARC64 461#ifdef CONFIG_SPARC64
501 { 462 {
502 .ctl_name = CTL_UNNUMBERED,
503 .procname = "tsb-ratio", 463 .procname = "tsb-ratio",
504 .data = &sysctl_tsb_ratio, 464 .data = &sysctl_tsb_ratio,
505 .maxlen = sizeof (int), 465 .maxlen = sizeof (int),
506 .mode = 0644, 466 .mode = 0644,
507 .proc_handler = &proc_dointvec, 467 .proc_handler = proc_dointvec,
508 }, 468 },
509#endif 469#endif
510#ifdef __hppa__ 470#ifdef __hppa__
511 { 471 {
512 .ctl_name = KERN_HPPA_PWRSW,
513 .procname = "soft-power", 472 .procname = "soft-power",
514 .data = &pwrsw_enabled, 473 .data = &pwrsw_enabled,
515 .maxlen = sizeof (int), 474 .maxlen = sizeof (int),
516 .mode = 0644, 475 .mode = 0644,
517 .proc_handler = &proc_dointvec, 476 .proc_handler = proc_dointvec,
518 }, 477 },
519 { 478 {
520 .ctl_name = KERN_HPPA_UNALIGNED,
521 .procname = "unaligned-trap", 479 .procname = "unaligned-trap",
522 .data = &unaligned_enabled, 480 .data = &unaligned_enabled,
523 .maxlen = sizeof (int), 481 .maxlen = sizeof (int),
524 .mode = 0644, 482 .mode = 0644,
525 .proc_handler = &proc_dointvec, 483 .proc_handler = proc_dointvec,
526 }, 484 },
527#endif 485#endif
528 { 486 {
529 .ctl_name = KERN_CTLALTDEL,
530 .procname = "ctrl-alt-del", 487 .procname = "ctrl-alt-del",
531 .data = &C_A_D, 488 .data = &C_A_D,
532 .maxlen = sizeof(int), 489 .maxlen = sizeof(int),
533 .mode = 0644, 490 .mode = 0644,
534 .proc_handler = &proc_dointvec, 491 .proc_handler = proc_dointvec,
535 }, 492 },
536#ifdef CONFIG_FUNCTION_TRACER 493#ifdef CONFIG_FUNCTION_TRACER
537 { 494 {
538 .ctl_name = CTL_UNNUMBERED,
539 .procname = "ftrace_enabled", 495 .procname = "ftrace_enabled",
540 .data = &ftrace_enabled, 496 .data = &ftrace_enabled,
541 .maxlen = sizeof(int), 497 .maxlen = sizeof(int),
542 .mode = 0644, 498 .mode = 0644,
543 .proc_handler = &ftrace_enable_sysctl, 499 .proc_handler = ftrace_enable_sysctl,
544 }, 500 },
545#endif 501#endif
546#ifdef CONFIG_STACK_TRACER 502#ifdef CONFIG_STACK_TRACER
547 { 503 {
548 .ctl_name = CTL_UNNUMBERED,
549 .procname = "stack_tracer_enabled", 504 .procname = "stack_tracer_enabled",
550 .data = &stack_tracer_enabled, 505 .data = &stack_tracer_enabled,
551 .maxlen = sizeof(int), 506 .maxlen = sizeof(int),
552 .mode = 0644, 507 .mode = 0644,
553 .proc_handler = &stack_trace_sysctl, 508 .proc_handler = stack_trace_sysctl,
554 }, 509 },
555#endif 510#endif
556#ifdef CONFIG_TRACING 511#ifdef CONFIG_TRACING
557 { 512 {
558 .ctl_name = CTL_UNNUMBERED,
559 .procname = "ftrace_dump_on_oops", 513 .procname = "ftrace_dump_on_oops",
560 .data = &ftrace_dump_on_oops, 514 .data = &ftrace_dump_on_oops,
561 .maxlen = sizeof(int), 515 .maxlen = sizeof(int),
562 .mode = 0644, 516 .mode = 0644,
563 .proc_handler = &proc_dointvec, 517 .proc_handler = proc_dointvec,
564 }, 518 },
565#endif 519#endif
566#ifdef CONFIG_MODULES 520#ifdef CONFIG_MODULES
567 { 521 {
568 .ctl_name = KERN_MODPROBE,
569 .procname = "modprobe", 522 .procname = "modprobe",
570 .data = &modprobe_path, 523 .data = &modprobe_path,
571 .maxlen = KMOD_PATH_LEN, 524 .maxlen = KMOD_PATH_LEN,
572 .mode = 0644, 525 .mode = 0644,
573 .proc_handler = &proc_dostring, 526 .proc_handler = proc_dostring,
574 .strategy = &sysctl_string,
575 }, 527 },
576 { 528 {
577 .ctl_name = CTL_UNNUMBERED,
578 .procname = "modules_disabled", 529 .procname = "modules_disabled",
579 .data = &modules_disabled, 530 .data = &modules_disabled,
580 .maxlen = sizeof(int), 531 .maxlen = sizeof(int),
581 .mode = 0644, 532 .mode = 0644,
582 /* only handle a transition from default "0" to "1" */ 533 /* only handle a transition from default "0" to "1" */
583 .proc_handler = &proc_dointvec_minmax, 534 .proc_handler = proc_dointvec_minmax,
584 .extra1 = &one, 535 .extra1 = &one,
585 .extra2 = &one, 536 .extra2 = &one,
586 }, 537 },
587#endif 538#endif
588#if defined(CONFIG_HOTPLUG) && defined(CONFIG_NET) 539#if defined(CONFIG_HOTPLUG) && defined(CONFIG_NET)
589 { 540 {
590 .ctl_name = KERN_HOTPLUG,
591 .procname = "hotplug", 541 .procname = "hotplug",
592 .data = &uevent_helper, 542 .data = &uevent_helper,
593 .maxlen = UEVENT_HELPER_PATH_LEN, 543 .maxlen = UEVENT_HELPER_PATH_LEN,
594 .mode = 0644, 544 .mode = 0644,
595 .proc_handler = &proc_dostring, 545 .proc_handler = proc_dostring,
596 .strategy = &sysctl_string,
597 }, 546 },
598#endif 547#endif
599#ifdef CONFIG_CHR_DEV_SG 548#ifdef CONFIG_CHR_DEV_SG
600 { 549 {
601 .ctl_name = KERN_SG_BIG_BUFF,
602 .procname = "sg-big-buff", 550 .procname = "sg-big-buff",
603 .data = &sg_big_buff, 551 .data = &sg_big_buff,
604 .maxlen = sizeof (int), 552 .maxlen = sizeof (int),
605 .mode = 0444, 553 .mode = 0444,
606 .proc_handler = &proc_dointvec, 554 .proc_handler = proc_dointvec,
607 }, 555 },
608#endif 556#endif
609#ifdef CONFIG_BSD_PROCESS_ACCT 557#ifdef CONFIG_BSD_PROCESS_ACCT
610 { 558 {
611 .ctl_name = KERN_ACCT,
612 .procname = "acct", 559 .procname = "acct",
613 .data = &acct_parm, 560 .data = &acct_parm,
614 .maxlen = 3*sizeof(int), 561 .maxlen = 3*sizeof(int),
615 .mode = 0644, 562 .mode = 0644,
616 .proc_handler = &proc_dointvec, 563 .proc_handler = proc_dointvec,
617 }, 564 },
618#endif 565#endif
619#ifdef CONFIG_MAGIC_SYSRQ 566#ifdef CONFIG_MAGIC_SYSRQ
620 { 567 {
621 .ctl_name = KERN_SYSRQ,
622 .procname = "sysrq", 568 .procname = "sysrq",
623 .data = &__sysrq_enabled, 569 .data = &__sysrq_enabled,
624 .maxlen = sizeof (int), 570 .maxlen = sizeof (int),
625 .mode = 0644, 571 .mode = 0644,
626 .proc_handler = &proc_dointvec, 572 .proc_handler = proc_dointvec,
627 }, 573 },
628#endif 574#endif
629#ifdef CONFIG_PROC_SYSCTL 575#ifdef CONFIG_PROC_SYSCTL
@@ -632,215 +578,188 @@ static struct ctl_table kern_table[] = {
632 .data = NULL, 578 .data = NULL,
633 .maxlen = sizeof (int), 579 .maxlen = sizeof (int),
634 .mode = 0600, 580 .mode = 0600,
635 .proc_handler = &proc_do_cad_pid, 581 .proc_handler = proc_do_cad_pid,
636 }, 582 },
637#endif 583#endif
638 { 584 {
639 .ctl_name = KERN_MAX_THREADS,
640 .procname = "threads-max", 585 .procname = "threads-max",
641 .data = &max_threads, 586 .data = &max_threads,
642 .maxlen = sizeof(int), 587 .maxlen = sizeof(int),
643 .mode = 0644, 588 .mode = 0644,
644 .proc_handler = &proc_dointvec, 589 .proc_handler = proc_dointvec,
645 }, 590 },
646 { 591 {
647 .ctl_name = KERN_RANDOM,
648 .procname = "random", 592 .procname = "random",
649 .mode = 0555, 593 .mode = 0555,
650 .child = random_table, 594 .child = random_table,
651 }, 595 },
652 { 596 {
653 .ctl_name = KERN_OVERFLOWUID,
654 .procname = "overflowuid", 597 .procname = "overflowuid",
655 .data = &overflowuid, 598 .data = &overflowuid,
656 .maxlen = sizeof(int), 599 .maxlen = sizeof(int),
657 .mode = 0644, 600 .mode = 0644,
658 .proc_handler = &proc_dointvec_minmax, 601 .proc_handler = proc_dointvec_minmax,
659 .strategy = &sysctl_intvec,
660 .extra1 = &minolduid, 602 .extra1 = &minolduid,
661 .extra2 = &maxolduid, 603 .extra2 = &maxolduid,
662 }, 604 },
663 { 605 {
664 .ctl_name = KERN_OVERFLOWGID,
665 .procname = "overflowgid", 606 .procname = "overflowgid",
666 .data = &overflowgid, 607 .data = &overflowgid,
667 .maxlen = sizeof(int), 608 .maxlen = sizeof(int),
668 .mode = 0644, 609 .mode = 0644,
669 .proc_handler = &proc_dointvec_minmax, 610 .proc_handler = proc_dointvec_minmax,
670 .strategy = &sysctl_intvec,
671 .extra1 = &minolduid, 611 .extra1 = &minolduid,
672 .extra2 = &maxolduid, 612 .extra2 = &maxolduid,
673 }, 613 },
674#ifdef CONFIG_S390 614#ifdef CONFIG_S390
675#ifdef CONFIG_MATHEMU 615#ifdef CONFIG_MATHEMU
676 { 616 {
677 .ctl_name = KERN_IEEE_EMULATION_WARNINGS,
678 .procname = "ieee_emulation_warnings", 617 .procname = "ieee_emulation_warnings",
679 .data = &sysctl_ieee_emulation_warnings, 618 .data = &sysctl_ieee_emulation_warnings,
680 .maxlen = sizeof(int), 619 .maxlen = sizeof(int),
681 .mode = 0644, 620 .mode = 0644,
682 .proc_handler = &proc_dointvec, 621 .proc_handler = proc_dointvec,
683 }, 622 },
684#endif 623#endif
685 { 624 {
686 .ctl_name = KERN_S390_USER_DEBUG_LOGGING,
687 .procname = "userprocess_debug", 625 .procname = "userprocess_debug",
688 .data = &sysctl_userprocess_debug, 626 .data = &sysctl_userprocess_debug,
689 .maxlen = sizeof(int), 627 .maxlen = sizeof(int),
690 .mode = 0644, 628 .mode = 0644,
691 .proc_handler = &proc_dointvec, 629 .proc_handler = proc_dointvec,
692 }, 630 },
693#endif 631#endif
694 { 632 {
695 .ctl_name = KERN_PIDMAX,
696 .procname = "pid_max", 633 .procname = "pid_max",
697 .data = &pid_max, 634 .data = &pid_max,
698 .maxlen = sizeof (int), 635 .maxlen = sizeof (int),
699 .mode = 0644, 636 .mode = 0644,
700 .proc_handler = &proc_dointvec_minmax, 637 .proc_handler = proc_dointvec_minmax,
701 .strategy = sysctl_intvec,
702 .extra1 = &pid_max_min, 638 .extra1 = &pid_max_min,
703 .extra2 = &pid_max_max, 639 .extra2 = &pid_max_max,
704 }, 640 },
705 { 641 {
706 .ctl_name = KERN_PANIC_ON_OOPS,
707 .procname = "panic_on_oops", 642 .procname = "panic_on_oops",
708 .data = &panic_on_oops, 643 .data = &panic_on_oops,
709 .maxlen = sizeof(int), 644 .maxlen = sizeof(int),
710 .mode = 0644, 645 .mode = 0644,
711 .proc_handler = &proc_dointvec, 646 .proc_handler = proc_dointvec,
712 }, 647 },
713#if defined CONFIG_PRINTK 648#if defined CONFIG_PRINTK
714 { 649 {
715 .ctl_name = KERN_PRINTK,
716 .procname = "printk", 650 .procname = "printk",
717 .data = &console_loglevel, 651 .data = &console_loglevel,
718 .maxlen = 4*sizeof(int), 652 .maxlen = 4*sizeof(int),
719 .mode = 0644, 653 .mode = 0644,
720 .proc_handler = &proc_dointvec, 654 .proc_handler = proc_dointvec,
721 }, 655 },
722 { 656 {
723 .ctl_name = KERN_PRINTK_RATELIMIT,
724 .procname = "printk_ratelimit", 657 .procname = "printk_ratelimit",
725 .data = &printk_ratelimit_state.interval, 658 .data = &printk_ratelimit_state.interval,
726 .maxlen = sizeof(int), 659 .maxlen = sizeof(int),
727 .mode = 0644, 660 .mode = 0644,
728 .proc_handler = &proc_dointvec_jiffies, 661 .proc_handler = proc_dointvec_jiffies,
729 .strategy = &sysctl_jiffies,
730 }, 662 },
731 { 663 {
732 .ctl_name = KERN_PRINTK_RATELIMIT_BURST,
733 .procname = "printk_ratelimit_burst", 664 .procname = "printk_ratelimit_burst",
734 .data = &printk_ratelimit_state.burst, 665 .data = &printk_ratelimit_state.burst,
735 .maxlen = sizeof(int), 666 .maxlen = sizeof(int),
736 .mode = 0644, 667 .mode = 0644,
737 .proc_handler = &proc_dointvec, 668 .proc_handler = proc_dointvec,
738 }, 669 },
739 { 670 {
740 .ctl_name = CTL_UNNUMBERED,
741 .procname = "printk_delay", 671 .procname = "printk_delay",
742 .data = &printk_delay_msec, 672 .data = &printk_delay_msec,
743 .maxlen = sizeof(int), 673 .maxlen = sizeof(int),
744 .mode = 0644, 674 .mode = 0644,
745 .proc_handler = &proc_dointvec_minmax, 675 .proc_handler = proc_dointvec_minmax,
746 .strategy = &sysctl_intvec,
747 .extra1 = &zero, 676 .extra1 = &zero,
748 .extra2 = &ten_thousand, 677 .extra2 = &ten_thousand,
749 }, 678 },
750#endif 679#endif
751 { 680 {
752 .ctl_name = KERN_NGROUPS_MAX,
753 .procname = "ngroups_max", 681 .procname = "ngroups_max",
754 .data = &ngroups_max, 682 .data = &ngroups_max,
755 .maxlen = sizeof (int), 683 .maxlen = sizeof (int),
756 .mode = 0444, 684 .mode = 0444,
757 .proc_handler = &proc_dointvec, 685 .proc_handler = proc_dointvec,
758 }, 686 },
759#if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_X86) 687#if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_X86)
760 { 688 {
761 .ctl_name = KERN_UNKNOWN_NMI_PANIC,
762 .procname = "unknown_nmi_panic", 689 .procname = "unknown_nmi_panic",
763 .data = &unknown_nmi_panic, 690 .data = &unknown_nmi_panic,
764 .maxlen = sizeof (int), 691 .maxlen = sizeof (int),
765 .mode = 0644, 692 .mode = 0644,
766 .proc_handler = &proc_dointvec, 693 .proc_handler = proc_dointvec,
767 }, 694 },
768 { 695 {
769 .procname = "nmi_watchdog", 696 .procname = "nmi_watchdog",
770 .data = &nmi_watchdog_enabled, 697 .data = &nmi_watchdog_enabled,
771 .maxlen = sizeof (int), 698 .maxlen = sizeof (int),
772 .mode = 0644, 699 .mode = 0644,
773 .proc_handler = &proc_nmi_enabled, 700 .proc_handler = proc_nmi_enabled,
774 }, 701 },
775#endif 702#endif
776#if defined(CONFIG_X86) 703#if defined(CONFIG_X86)
777 { 704 {
778 .ctl_name = KERN_PANIC_ON_NMI,
779 .procname = "panic_on_unrecovered_nmi", 705 .procname = "panic_on_unrecovered_nmi",
780 .data = &panic_on_unrecovered_nmi, 706 .data = &panic_on_unrecovered_nmi,
781 .maxlen = sizeof(int), 707 .maxlen = sizeof(int),
782 .mode = 0644, 708 .mode = 0644,
783 .proc_handler = &proc_dointvec, 709 .proc_handler = proc_dointvec,
784 }, 710 },
785 { 711 {
786 .ctl_name = CTL_UNNUMBERED,
787 .procname = "panic_on_io_nmi", 712 .procname = "panic_on_io_nmi",
788 .data = &panic_on_io_nmi, 713 .data = &panic_on_io_nmi,
789 .maxlen = sizeof(int), 714 .maxlen = sizeof(int),
790 .mode = 0644, 715 .mode = 0644,
791 .proc_handler = &proc_dointvec, 716 .proc_handler = proc_dointvec,
792 }, 717 },
793 { 718 {
794 .ctl_name = KERN_BOOTLOADER_TYPE,
795 .procname = "bootloader_type", 719 .procname = "bootloader_type",
796 .data = &bootloader_type, 720 .data = &bootloader_type,
797 .maxlen = sizeof (int), 721 .maxlen = sizeof (int),
798 .mode = 0444, 722 .mode = 0444,
799 .proc_handler = &proc_dointvec, 723 .proc_handler = proc_dointvec,
800 }, 724 },
801 { 725 {
802 .ctl_name = CTL_UNNUMBERED,
803 .procname = "bootloader_version", 726 .procname = "bootloader_version",
804 .data = &bootloader_version, 727 .data = &bootloader_version,
805 .maxlen = sizeof (int), 728 .maxlen = sizeof (int),
806 .mode = 0444, 729 .mode = 0444,
807 .proc_handler = &proc_dointvec, 730 .proc_handler = proc_dointvec,
808 }, 731 },
809 { 732 {
810 .ctl_name = CTL_UNNUMBERED,
811 .procname = "kstack_depth_to_print", 733 .procname = "kstack_depth_to_print",
812 .data = &kstack_depth_to_print, 734 .data = &kstack_depth_to_print,
813 .maxlen = sizeof(int), 735 .maxlen = sizeof(int),
814 .mode = 0644, 736 .mode = 0644,
815 .proc_handler = &proc_dointvec, 737 .proc_handler = proc_dointvec,
816 }, 738 },
817 { 739 {
818 .ctl_name = CTL_UNNUMBERED,
819 .procname = "io_delay_type", 740 .procname = "io_delay_type",
820 .data = &io_delay_type, 741 .data = &io_delay_type,
821 .maxlen = sizeof(int), 742 .maxlen = sizeof(int),
822 .mode = 0644, 743 .mode = 0644,
823 .proc_handler = &proc_dointvec, 744 .proc_handler = proc_dointvec,
824 }, 745 },
825#endif 746#endif
826#if defined(CONFIG_MMU) 747#if defined(CONFIG_MMU)
827 { 748 {
828 .ctl_name = KERN_RANDOMIZE,
829 .procname = "randomize_va_space", 749 .procname = "randomize_va_space",
830 .data = &randomize_va_space, 750 .data = &randomize_va_space,
831 .maxlen = sizeof(int), 751 .maxlen = sizeof(int),
832 .mode = 0644, 752 .mode = 0644,
833 .proc_handler = &proc_dointvec, 753 .proc_handler = proc_dointvec,
834 }, 754 },
835#endif 755#endif
836#if defined(CONFIG_S390) && defined(CONFIG_SMP) 756#if defined(CONFIG_S390) && defined(CONFIG_SMP)
837 { 757 {
838 .ctl_name = KERN_SPIN_RETRY,
839 .procname = "spin_retry", 758 .procname = "spin_retry",
840 .data = &spin_retry, 759 .data = &spin_retry,
841 .maxlen = sizeof (int), 760 .maxlen = sizeof (int),
842 .mode = 0644, 761 .mode = 0644,
843 .proc_handler = &proc_dointvec, 762 .proc_handler = proc_dointvec,
844 }, 763 },
845#endif 764#endif
846#if defined(CONFIG_ACPI_SLEEP) && defined(CONFIG_X86) 765#if defined(CONFIG_ACPI_SLEEP) && defined(CONFIG_X86)
@@ -849,123 +768,104 @@ static struct ctl_table kern_table[] = {
849 .data = &acpi_realmode_flags, 768 .data = &acpi_realmode_flags,
850 .maxlen = sizeof (unsigned long), 769 .maxlen = sizeof (unsigned long),
851 .mode = 0644, 770 .mode = 0644,
852 .proc_handler = &proc_doulongvec_minmax, 771 .proc_handler = proc_doulongvec_minmax,
853 }, 772 },
854#endif 773#endif
855#ifdef CONFIG_IA64 774#ifdef CONFIG_IA64
856 { 775 {
857 .ctl_name = KERN_IA64_UNALIGNED,
858 .procname = "ignore-unaligned-usertrap", 776 .procname = "ignore-unaligned-usertrap",
859 .data = &no_unaligned_warning, 777 .data = &no_unaligned_warning,
860 .maxlen = sizeof (int), 778 .maxlen = sizeof (int),
861 .mode = 0644, 779 .mode = 0644,
862 .proc_handler = &proc_dointvec, 780 .proc_handler = proc_dointvec,
863 }, 781 },
864 { 782 {
865 .ctl_name = CTL_UNNUMBERED,
866 .procname = "unaligned-dump-stack", 783 .procname = "unaligned-dump-stack",
867 .data = &unaligned_dump_stack, 784 .data = &unaligned_dump_stack,
868 .maxlen = sizeof (int), 785 .maxlen = sizeof (int),
869 .mode = 0644, 786 .mode = 0644,
870 .proc_handler = &proc_dointvec, 787 .proc_handler = proc_dointvec,
871 }, 788 },
872#endif 789#endif
873#ifdef CONFIG_DETECT_SOFTLOCKUP 790#ifdef CONFIG_DETECT_SOFTLOCKUP
874 { 791 {
875 .ctl_name = CTL_UNNUMBERED,
876 .procname = "softlockup_panic", 792 .procname = "softlockup_panic",
877 .data = &softlockup_panic, 793 .data = &softlockup_panic,
878 .maxlen = sizeof(int), 794 .maxlen = sizeof(int),
879 .mode = 0644, 795 .mode = 0644,
880 .proc_handler = &proc_dointvec_minmax, 796 .proc_handler = proc_dointvec_minmax,
881 .strategy = &sysctl_intvec,
882 .extra1 = &zero, 797 .extra1 = &zero,
883 .extra2 = &one, 798 .extra2 = &one,
884 }, 799 },
885 { 800 {
886 .ctl_name = CTL_UNNUMBERED,
887 .procname = "softlockup_thresh", 801 .procname = "softlockup_thresh",
888 .data = &softlockup_thresh, 802 .data = &softlockup_thresh,
889 .maxlen = sizeof(int), 803 .maxlen = sizeof(int),
890 .mode = 0644, 804 .mode = 0644,
891 .proc_handler = &proc_dosoftlockup_thresh, 805 .proc_handler = proc_dosoftlockup_thresh,
892 .strategy = &sysctl_intvec,
893 .extra1 = &neg_one, 806 .extra1 = &neg_one,
894 .extra2 = &sixty, 807 .extra2 = &sixty,
895 }, 808 },
896#endif 809#endif
897#ifdef CONFIG_DETECT_HUNG_TASK 810#ifdef CONFIG_DETECT_HUNG_TASK
898 { 811 {
899 .ctl_name = CTL_UNNUMBERED,
900 .procname = "hung_task_panic", 812 .procname = "hung_task_panic",
901 .data = &sysctl_hung_task_panic, 813 .data = &sysctl_hung_task_panic,
902 .maxlen = sizeof(int), 814 .maxlen = sizeof(int),
903 .mode = 0644, 815 .mode = 0644,
904 .proc_handler = &proc_dointvec_minmax, 816 .proc_handler = proc_dointvec_minmax,
905 .strategy = &sysctl_intvec,
906 .extra1 = &zero, 817 .extra1 = &zero,
907 .extra2 = &one, 818 .extra2 = &one,
908 }, 819 },
909 { 820 {
910 .ctl_name = CTL_UNNUMBERED,
911 .procname = "hung_task_check_count", 821 .procname = "hung_task_check_count",
912 .data = &sysctl_hung_task_check_count, 822 .data = &sysctl_hung_task_check_count,
913 .maxlen = sizeof(unsigned long), 823 .maxlen = sizeof(unsigned long),
914 .mode = 0644, 824 .mode = 0644,
915 .proc_handler = &proc_doulongvec_minmax, 825 .proc_handler = proc_doulongvec_minmax,
916 .strategy = &sysctl_intvec,
917 }, 826 },
918 { 827 {
919 .ctl_name = CTL_UNNUMBERED,
920 .procname = "hung_task_timeout_secs", 828 .procname = "hung_task_timeout_secs",
921 .data = &sysctl_hung_task_timeout_secs, 829 .data = &sysctl_hung_task_timeout_secs,
922 .maxlen = sizeof(unsigned long), 830 .maxlen = sizeof(unsigned long),
923 .mode = 0644, 831 .mode = 0644,
924 .proc_handler = &proc_dohung_task_timeout_secs, 832 .proc_handler = proc_dohung_task_timeout_secs,
925 .strategy = &sysctl_intvec,
926 }, 833 },
927 { 834 {
928 .ctl_name = CTL_UNNUMBERED,
929 .procname = "hung_task_warnings", 835 .procname = "hung_task_warnings",
930 .data = &sysctl_hung_task_warnings, 836 .data = &sysctl_hung_task_warnings,
931 .maxlen = sizeof(unsigned long), 837 .maxlen = sizeof(unsigned long),
932 .mode = 0644, 838 .mode = 0644,
933 .proc_handler = &proc_doulongvec_minmax, 839 .proc_handler = proc_doulongvec_minmax,
934 .strategy = &sysctl_intvec,
935 }, 840 },
936#endif 841#endif
937#ifdef CONFIG_COMPAT 842#ifdef CONFIG_COMPAT
938 { 843 {
939 .ctl_name = KERN_COMPAT_LOG,
940 .procname = "compat-log", 844 .procname = "compat-log",
941 .data = &compat_log, 845 .data = &compat_log,
942 .maxlen = sizeof (int), 846 .maxlen = sizeof (int),
943 .mode = 0644, 847 .mode = 0644,
944 .proc_handler = &proc_dointvec, 848 .proc_handler = proc_dointvec,
945 }, 849 },
946#endif 850#endif
947#ifdef CONFIG_RT_MUTEXES 851#ifdef CONFIG_RT_MUTEXES
948 { 852 {
949 .ctl_name = KERN_MAX_LOCK_DEPTH,
950 .procname = "max_lock_depth", 853 .procname = "max_lock_depth",
951 .data = &max_lock_depth, 854 .data = &max_lock_depth,
952 .maxlen = sizeof(int), 855 .maxlen = sizeof(int),
953 .mode = 0644, 856 .mode = 0644,
954 .proc_handler = &proc_dointvec, 857 .proc_handler = proc_dointvec,
955 }, 858 },
956#endif 859#endif
957 { 860 {
958 .ctl_name = CTL_UNNUMBERED,
959 .procname = "poweroff_cmd", 861 .procname = "poweroff_cmd",
960 .data = &poweroff_cmd, 862 .data = &poweroff_cmd,
961 .maxlen = POWEROFF_CMD_PATH_LEN, 863 .maxlen = POWEROFF_CMD_PATH_LEN,
962 .mode = 0644, 864 .mode = 0644,
963 .proc_handler = &proc_dostring, 865 .proc_handler = proc_dostring,
964 .strategy = &sysctl_string,
965 }, 866 },
966#ifdef CONFIG_KEYS 867#ifdef CONFIG_KEYS
967 { 868 {
968 .ctl_name = CTL_UNNUMBERED,
969 .procname = "keys", 869 .procname = "keys",
970 .mode = 0555, 870 .mode = 0555,
971 .child = key_sysctls, 871 .child = key_sysctls,
@@ -973,17 +873,15 @@ static struct ctl_table kern_table[] = {
973#endif 873#endif
974#ifdef CONFIG_RCU_TORTURE_TEST 874#ifdef CONFIG_RCU_TORTURE_TEST
975 { 875 {
976 .ctl_name = CTL_UNNUMBERED,
977 .procname = "rcutorture_runnable", 876 .procname = "rcutorture_runnable",
978 .data = &rcutorture_runnable, 877 .data = &rcutorture_runnable,
979 .maxlen = sizeof(int), 878 .maxlen = sizeof(int),
980 .mode = 0644, 879 .mode = 0644,
981 .proc_handler = &proc_dointvec, 880 .proc_handler = proc_dointvec,
982 }, 881 },
983#endif 882#endif
984#ifdef CONFIG_SLOW_WORK 883#ifdef CONFIG_SLOW_WORK
985 { 884 {
986 .ctl_name = CTL_UNNUMBERED,
987 .procname = "slow-work", 885 .procname = "slow-work",
988 .mode = 0555, 886 .mode = 0555,
989 .child = slow_work_sysctls, 887 .child = slow_work_sysctls,
@@ -991,146 +889,127 @@ static struct ctl_table kern_table[] = {
991#endif 889#endif
992#ifdef CONFIG_PERF_EVENTS 890#ifdef CONFIG_PERF_EVENTS
993 { 891 {
994 .ctl_name = CTL_UNNUMBERED,
995 .procname = "perf_event_paranoid", 892 .procname = "perf_event_paranoid",
996 .data = &sysctl_perf_event_paranoid, 893 .data = &sysctl_perf_event_paranoid,
997 .maxlen = sizeof(sysctl_perf_event_paranoid), 894 .maxlen = sizeof(sysctl_perf_event_paranoid),
998 .mode = 0644, 895 .mode = 0644,
999 .proc_handler = &proc_dointvec, 896 .proc_handler = proc_dointvec,
1000 }, 897 },
1001 { 898 {
1002 .ctl_name = CTL_UNNUMBERED,
1003 .procname = "perf_event_mlock_kb", 899 .procname = "perf_event_mlock_kb",
1004 .data = &sysctl_perf_event_mlock, 900 .data = &sysctl_perf_event_mlock,
1005 .maxlen = sizeof(sysctl_perf_event_mlock), 901 .maxlen = sizeof(sysctl_perf_event_mlock),
1006 .mode = 0644, 902 .mode = 0644,
1007 .proc_handler = &proc_dointvec, 903 .proc_handler = proc_dointvec,
1008 }, 904 },
1009 { 905 {
1010 .ctl_name = CTL_UNNUMBERED,
1011 .procname = "perf_event_max_sample_rate", 906 .procname = "perf_event_max_sample_rate",
1012 .data = &sysctl_perf_event_sample_rate, 907 .data = &sysctl_perf_event_sample_rate,
1013 .maxlen = sizeof(sysctl_perf_event_sample_rate), 908 .maxlen = sizeof(sysctl_perf_event_sample_rate),
1014 .mode = 0644, 909 .mode = 0644,
1015 .proc_handler = &proc_dointvec, 910 .proc_handler = proc_dointvec,
1016 }, 911 },
1017#endif 912#endif
1018#ifdef CONFIG_KMEMCHECK 913#ifdef CONFIG_KMEMCHECK
1019 { 914 {
1020 .ctl_name = CTL_UNNUMBERED,
1021 .procname = "kmemcheck", 915 .procname = "kmemcheck",
1022 .data = &kmemcheck_enabled, 916 .data = &kmemcheck_enabled,
1023 .maxlen = sizeof(int), 917 .maxlen = sizeof(int),
1024 .mode = 0644, 918 .mode = 0644,
1025 .proc_handler = &proc_dointvec, 919 .proc_handler = proc_dointvec,
1026 }, 920 },
1027#endif 921#endif
1028#ifdef CONFIG_BLOCK 922#ifdef CONFIG_BLOCK
1029 { 923 {
1030 .ctl_name = CTL_UNNUMBERED,
1031 .procname = "blk_iopoll", 924 .procname = "blk_iopoll",
1032 .data = &blk_iopoll_enabled, 925 .data = &blk_iopoll_enabled,
1033 .maxlen = sizeof(int), 926 .maxlen = sizeof(int),
1034 .mode = 0644, 927 .mode = 0644,
1035 .proc_handler = &proc_dointvec, 928 .proc_handler = proc_dointvec,
1036 }, 929 },
1037#endif 930#endif
1038/* 931/*
1039 * NOTE: do not add new entries to this table unless you have read 932 * NOTE: do not add new entries to this table unless you have read
1040 * Documentation/sysctl/ctl_unnumbered.txt 933 * Documentation/sysctl/ctl_unnumbered.txt
1041 */ 934 */
1042 { .ctl_name = 0 } 935 { }
1043}; 936};
1044 937
1045static struct ctl_table vm_table[] = { 938static struct ctl_table vm_table[] = {
1046 { 939 {
1047 .ctl_name = VM_OVERCOMMIT_MEMORY,
1048 .procname = "overcommit_memory", 940 .procname = "overcommit_memory",
1049 .data = &sysctl_overcommit_memory, 941 .data = &sysctl_overcommit_memory,
1050 .maxlen = sizeof(sysctl_overcommit_memory), 942 .maxlen = sizeof(sysctl_overcommit_memory),
1051 .mode = 0644, 943 .mode = 0644,
1052 .proc_handler = &proc_dointvec, 944 .proc_handler = proc_dointvec,
1053 }, 945 },
1054 { 946 {
1055 .ctl_name = VM_PANIC_ON_OOM,
1056 .procname = "panic_on_oom", 947 .procname = "panic_on_oom",
1057 .data = &sysctl_panic_on_oom, 948 .data = &sysctl_panic_on_oom,
1058 .maxlen = sizeof(sysctl_panic_on_oom), 949 .maxlen = sizeof(sysctl_panic_on_oom),
1059 .mode = 0644, 950 .mode = 0644,
1060 .proc_handler = &proc_dointvec, 951 .proc_handler = proc_dointvec,
1061 }, 952 },
1062 { 953 {
1063 .ctl_name = CTL_UNNUMBERED,
1064 .procname = "oom_kill_allocating_task", 954 .procname = "oom_kill_allocating_task",
1065 .data = &sysctl_oom_kill_allocating_task, 955 .data = &sysctl_oom_kill_allocating_task,
1066 .maxlen = sizeof(sysctl_oom_kill_allocating_task), 956 .maxlen = sizeof(sysctl_oom_kill_allocating_task),
1067 .mode = 0644, 957 .mode = 0644,
1068 .proc_handler = &proc_dointvec, 958 .proc_handler = proc_dointvec,
1069 }, 959 },
1070 { 960 {
1071 .ctl_name = CTL_UNNUMBERED,
1072 .procname = "oom_dump_tasks", 961 .procname = "oom_dump_tasks",
1073 .data = &sysctl_oom_dump_tasks, 962 .data = &sysctl_oom_dump_tasks,
1074 .maxlen = sizeof(sysctl_oom_dump_tasks), 963 .maxlen = sizeof(sysctl_oom_dump_tasks),
1075 .mode = 0644, 964 .mode = 0644,
1076 .proc_handler = &proc_dointvec, 965 .proc_handler = proc_dointvec,
1077 }, 966 },
1078 { 967 {
1079 .ctl_name = VM_OVERCOMMIT_RATIO,
1080 .procname = "overcommit_ratio", 968 .procname = "overcommit_ratio",
1081 .data = &sysctl_overcommit_ratio, 969 .data = &sysctl_overcommit_ratio,
1082 .maxlen = sizeof(sysctl_overcommit_ratio), 970 .maxlen = sizeof(sysctl_overcommit_ratio),
1083 .mode = 0644, 971 .mode = 0644,
1084 .proc_handler = &proc_dointvec, 972 .proc_handler = proc_dointvec,
1085 }, 973 },
1086 { 974 {
1087 .ctl_name = VM_PAGE_CLUSTER,
1088 .procname = "page-cluster", 975 .procname = "page-cluster",
1089 .data = &page_cluster, 976 .data = &page_cluster,
1090 .maxlen = sizeof(int), 977 .maxlen = sizeof(int),
1091 .mode = 0644, 978 .mode = 0644,
1092 .proc_handler = &proc_dointvec, 979 .proc_handler = proc_dointvec,
1093 }, 980 },
1094 { 981 {
1095 .ctl_name = VM_DIRTY_BACKGROUND,
1096 .procname = "dirty_background_ratio", 982 .procname = "dirty_background_ratio",
1097 .data = &dirty_background_ratio, 983 .data = &dirty_background_ratio,
1098 .maxlen = sizeof(dirty_background_ratio), 984 .maxlen = sizeof(dirty_background_ratio),
1099 .mode = 0644, 985 .mode = 0644,
1100 .proc_handler = &dirty_background_ratio_handler, 986 .proc_handler = dirty_background_ratio_handler,
1101 .strategy = &sysctl_intvec,
1102 .extra1 = &zero, 987 .extra1 = &zero,
1103 .extra2 = &one_hundred, 988 .extra2 = &one_hundred,
1104 }, 989 },
1105 { 990 {
1106 .ctl_name = CTL_UNNUMBERED,
1107 .procname = "dirty_background_bytes", 991 .procname = "dirty_background_bytes",
1108 .data = &dirty_background_bytes, 992 .data = &dirty_background_bytes,
1109 .maxlen = sizeof(dirty_background_bytes), 993 .maxlen = sizeof(dirty_background_bytes),
1110 .mode = 0644, 994 .mode = 0644,
1111 .proc_handler = &dirty_background_bytes_handler, 995 .proc_handler = dirty_background_bytes_handler,
1112 .strategy = &sysctl_intvec,
1113 .extra1 = &one_ul, 996 .extra1 = &one_ul,
1114 }, 997 },
1115 { 998 {
1116 .ctl_name = VM_DIRTY_RATIO,
1117 .procname = "dirty_ratio", 999 .procname = "dirty_ratio",
1118 .data = &vm_dirty_ratio, 1000 .data = &vm_dirty_ratio,
1119 .maxlen = sizeof(vm_dirty_ratio), 1001 .maxlen = sizeof(vm_dirty_ratio),
1120 .mode = 0644, 1002 .mode = 0644,
1121 .proc_handler = &dirty_ratio_handler, 1003 .proc_handler = dirty_ratio_handler,
1122 .strategy = &sysctl_intvec,
1123 .extra1 = &zero, 1004 .extra1 = &zero,
1124 .extra2 = &one_hundred, 1005 .extra2 = &one_hundred,
1125 }, 1006 },
1126 { 1007 {
1127 .ctl_name = CTL_UNNUMBERED,
1128 .procname = "dirty_bytes", 1008 .procname = "dirty_bytes",
1129 .data = &vm_dirty_bytes, 1009 .data = &vm_dirty_bytes,
1130 .maxlen = sizeof(vm_dirty_bytes), 1010 .maxlen = sizeof(vm_dirty_bytes),
1131 .mode = 0644, 1011 .mode = 0644,
1132 .proc_handler = &dirty_bytes_handler, 1012 .proc_handler = dirty_bytes_handler,
1133 .strategy = &sysctl_intvec,
1134 .extra1 = &dirty_bytes_min, 1013 .extra1 = &dirty_bytes_min,
1135 }, 1014 },
1136 { 1015 {
@@ -1138,31 +1017,28 @@ static struct ctl_table vm_table[] = {
1138 .data = &dirty_writeback_interval, 1017 .data = &dirty_writeback_interval,
1139 .maxlen = sizeof(dirty_writeback_interval), 1018 .maxlen = sizeof(dirty_writeback_interval),
1140 .mode = 0644, 1019 .mode = 0644,
1141 .proc_handler = &dirty_writeback_centisecs_handler, 1020 .proc_handler = dirty_writeback_centisecs_handler,
1142 }, 1021 },
1143 { 1022 {
1144 .procname = "dirty_expire_centisecs", 1023 .procname = "dirty_expire_centisecs",
1145 .data = &dirty_expire_interval, 1024 .data = &dirty_expire_interval,
1146 .maxlen = sizeof(dirty_expire_interval), 1025 .maxlen = sizeof(dirty_expire_interval),
1147 .mode = 0644, 1026 .mode = 0644,
1148 .proc_handler = &proc_dointvec, 1027 .proc_handler = proc_dointvec,
1149 }, 1028 },
1150 { 1029 {
1151 .ctl_name = VM_NR_PDFLUSH_THREADS,
1152 .procname = "nr_pdflush_threads", 1030 .procname = "nr_pdflush_threads",
1153 .data = &nr_pdflush_threads, 1031 .data = &nr_pdflush_threads,
1154 .maxlen = sizeof nr_pdflush_threads, 1032 .maxlen = sizeof nr_pdflush_threads,
1155 .mode = 0444 /* read-only*/, 1033 .mode = 0444 /* read-only*/,
1156 .proc_handler = &proc_dointvec, 1034 .proc_handler = proc_dointvec,
1157 }, 1035 },
1158 { 1036 {
1159 .ctl_name = VM_SWAPPINESS,
1160 .procname = "swappiness", 1037 .procname = "swappiness",
1161 .data = &vm_swappiness, 1038 .data = &vm_swappiness,
1162 .maxlen = sizeof(vm_swappiness), 1039 .maxlen = sizeof(vm_swappiness),
1163 .mode = 0644, 1040 .mode = 0644,
1164 .proc_handler = &proc_dointvec_minmax, 1041 .proc_handler = proc_dointvec_minmax,
1165 .strategy = &sysctl_intvec,
1166 .extra1 = &zero, 1042 .extra1 = &zero,
1167 .extra2 = &one_hundred, 1043 .extra2 = &one_hundred,
1168 }, 1044 },
@@ -1172,255 +1048,213 @@ static struct ctl_table vm_table[] = {
1172 .data = NULL, 1048 .data = NULL,
1173 .maxlen = sizeof(unsigned long), 1049 .maxlen = sizeof(unsigned long),
1174 .mode = 0644, 1050 .mode = 0644,
1175 .proc_handler = &hugetlb_sysctl_handler, 1051 .proc_handler = hugetlb_sysctl_handler,
1176 .extra1 = (void *)&hugetlb_zero, 1052 .extra1 = (void *)&hugetlb_zero,
1177 .extra2 = (void *)&hugetlb_infinity, 1053 .extra2 = (void *)&hugetlb_infinity,
1178 }, 1054 },
1179 { 1055 {
1180 .ctl_name = VM_HUGETLB_GROUP,
1181 .procname = "hugetlb_shm_group", 1056 .procname = "hugetlb_shm_group",
1182 .data = &sysctl_hugetlb_shm_group, 1057 .data = &sysctl_hugetlb_shm_group,
1183 .maxlen = sizeof(gid_t), 1058 .maxlen = sizeof(gid_t),
1184 .mode = 0644, 1059 .mode = 0644,
1185 .proc_handler = &proc_dointvec, 1060 .proc_handler = proc_dointvec,
1186 }, 1061 },
1187 { 1062 {
1188 .ctl_name = CTL_UNNUMBERED,
1189 .procname = "hugepages_treat_as_movable", 1063 .procname = "hugepages_treat_as_movable",
1190 .data = &hugepages_treat_as_movable, 1064 .data = &hugepages_treat_as_movable,
1191 .maxlen = sizeof(int), 1065 .maxlen = sizeof(int),
1192 .mode = 0644, 1066 .mode = 0644,
1193 .proc_handler = &hugetlb_treat_movable_handler, 1067 .proc_handler = hugetlb_treat_movable_handler,
1194 }, 1068 },
1195 { 1069 {
1196 .ctl_name = CTL_UNNUMBERED,
1197 .procname = "nr_overcommit_hugepages", 1070 .procname = "nr_overcommit_hugepages",
1198 .data = NULL, 1071 .data = NULL,
1199 .maxlen = sizeof(unsigned long), 1072 .maxlen = sizeof(unsigned long),
1200 .mode = 0644, 1073 .mode = 0644,
1201 .proc_handler = &hugetlb_overcommit_handler, 1074 .proc_handler = hugetlb_overcommit_handler,
1202 .extra1 = (void *)&hugetlb_zero, 1075 .extra1 = (void *)&hugetlb_zero,
1203 .extra2 = (void *)&hugetlb_infinity, 1076 .extra2 = (void *)&hugetlb_infinity,
1204 }, 1077 },
1205#endif 1078#endif
1206 { 1079 {
1207 .ctl_name = VM_LOWMEM_RESERVE_RATIO,
1208 .procname = "lowmem_reserve_ratio", 1080 .procname = "lowmem_reserve_ratio",
1209 .data = &sysctl_lowmem_reserve_ratio, 1081 .data = &sysctl_lowmem_reserve_ratio,
1210 .maxlen = sizeof(sysctl_lowmem_reserve_ratio), 1082 .maxlen = sizeof(sysctl_lowmem_reserve_ratio),
1211 .mode = 0644, 1083 .mode = 0644,
1212 .proc_handler = &lowmem_reserve_ratio_sysctl_handler, 1084 .proc_handler = lowmem_reserve_ratio_sysctl_handler,
1213 .strategy = &sysctl_intvec,
1214 }, 1085 },
1215 { 1086 {
1216 .ctl_name = VM_DROP_PAGECACHE,
1217 .procname = "drop_caches", 1087 .procname = "drop_caches",
1218 .data = &sysctl_drop_caches, 1088 .data = &sysctl_drop_caches,
1219 .maxlen = sizeof(int), 1089 .maxlen = sizeof(int),
1220 .mode = 0644, 1090 .mode = 0644,
1221 .proc_handler = drop_caches_sysctl_handler, 1091 .proc_handler = drop_caches_sysctl_handler,
1222 .strategy = &sysctl_intvec,
1223 }, 1092 },
1224 { 1093 {
1225 .ctl_name = VM_MIN_FREE_KBYTES,
1226 .procname = "min_free_kbytes", 1094 .procname = "min_free_kbytes",
1227 .data = &min_free_kbytes, 1095 .data = &min_free_kbytes,
1228 .maxlen = sizeof(min_free_kbytes), 1096 .maxlen = sizeof(min_free_kbytes),
1229 .mode = 0644, 1097 .mode = 0644,
1230 .proc_handler = &min_free_kbytes_sysctl_handler, 1098 .proc_handler = min_free_kbytes_sysctl_handler,
1231 .strategy = &sysctl_intvec,
1232 .extra1 = &zero, 1099 .extra1 = &zero,
1233 }, 1100 },
1234 { 1101 {
1235 .ctl_name = VM_PERCPU_PAGELIST_FRACTION,
1236 .procname = "percpu_pagelist_fraction", 1102 .procname = "percpu_pagelist_fraction",
1237 .data = &percpu_pagelist_fraction, 1103 .data = &percpu_pagelist_fraction,
1238 .maxlen = sizeof(percpu_pagelist_fraction), 1104 .maxlen = sizeof(percpu_pagelist_fraction),
1239 .mode = 0644, 1105 .mode = 0644,
1240 .proc_handler = &percpu_pagelist_fraction_sysctl_handler, 1106 .proc_handler = percpu_pagelist_fraction_sysctl_handler,
1241 .strategy = &sysctl_intvec,
1242 .extra1 = &min_percpu_pagelist_fract, 1107 .extra1 = &min_percpu_pagelist_fract,
1243 }, 1108 },
1244#ifdef CONFIG_MMU 1109#ifdef CONFIG_MMU
1245 { 1110 {
1246 .ctl_name = VM_MAX_MAP_COUNT,
1247 .procname = "max_map_count", 1111 .procname = "max_map_count",
1248 .data = &sysctl_max_map_count, 1112 .data = &sysctl_max_map_count,
1249 .maxlen = sizeof(sysctl_max_map_count), 1113 .maxlen = sizeof(sysctl_max_map_count),
1250 .mode = 0644, 1114 .mode = 0644,
1251 .proc_handler = &proc_dointvec 1115 .proc_handler = proc_dointvec
1252 }, 1116 },
1253#else 1117#else
1254 { 1118 {
1255 .ctl_name = CTL_UNNUMBERED,
1256 .procname = "nr_trim_pages", 1119 .procname = "nr_trim_pages",
1257 .data = &sysctl_nr_trim_pages, 1120 .data = &sysctl_nr_trim_pages,
1258 .maxlen = sizeof(sysctl_nr_trim_pages), 1121 .maxlen = sizeof(sysctl_nr_trim_pages),
1259 .mode = 0644, 1122 .mode = 0644,
1260 .proc_handler = &proc_dointvec_minmax, 1123 .proc_handler = proc_dointvec_minmax,
1261 .strategy = &sysctl_intvec,
1262 .extra1 = &zero, 1124 .extra1 = &zero,
1263 }, 1125 },
1264#endif 1126#endif
1265 { 1127 {
1266 .ctl_name = VM_LAPTOP_MODE,
1267 .procname = "laptop_mode", 1128 .procname = "laptop_mode",
1268 .data = &laptop_mode, 1129 .data = &laptop_mode,
1269 .maxlen = sizeof(laptop_mode), 1130 .maxlen = sizeof(laptop_mode),
1270 .mode = 0644, 1131 .mode = 0644,
1271 .proc_handler = &proc_dointvec_jiffies, 1132 .proc_handler = proc_dointvec_jiffies,
1272 .strategy = &sysctl_jiffies,
1273 }, 1133 },
1274 { 1134 {
1275 .ctl_name = VM_BLOCK_DUMP,
1276 .procname = "block_dump", 1135 .procname = "block_dump",
1277 .data = &block_dump, 1136 .data = &block_dump,
1278 .maxlen = sizeof(block_dump), 1137 .maxlen = sizeof(block_dump),
1279 .mode = 0644, 1138 .mode = 0644,
1280 .proc_handler = &proc_dointvec, 1139 .proc_handler = proc_dointvec,
1281 .strategy = &sysctl_intvec,
1282 .extra1 = &zero, 1140 .extra1 = &zero,
1283 }, 1141 },
1284 { 1142 {
1285 .ctl_name = VM_VFS_CACHE_PRESSURE,
1286 .procname = "vfs_cache_pressure", 1143 .procname = "vfs_cache_pressure",
1287 .data = &sysctl_vfs_cache_pressure, 1144 .data = &sysctl_vfs_cache_pressure,
1288 .maxlen = sizeof(sysctl_vfs_cache_pressure), 1145 .maxlen = sizeof(sysctl_vfs_cache_pressure),
1289 .mode = 0644, 1146 .mode = 0644,
1290 .proc_handler = &proc_dointvec, 1147 .proc_handler = proc_dointvec,
1291 .strategy = &sysctl_intvec,
1292 .extra1 = &zero, 1148 .extra1 = &zero,
1293 }, 1149 },
1294#ifdef HAVE_ARCH_PICK_MMAP_LAYOUT 1150#ifdef HAVE_ARCH_PICK_MMAP_LAYOUT
1295 { 1151 {
1296 .ctl_name = VM_LEGACY_VA_LAYOUT,
1297 .procname = "legacy_va_layout", 1152 .procname = "legacy_va_layout",
1298 .data = &sysctl_legacy_va_layout, 1153 .data = &sysctl_legacy_va_layout,
1299 .maxlen = sizeof(sysctl_legacy_va_layout), 1154 .maxlen = sizeof(sysctl_legacy_va_layout),
1300 .mode = 0644, 1155 .mode = 0644,
1301 .proc_handler = &proc_dointvec, 1156 .proc_handler = proc_dointvec,
1302 .strategy = &sysctl_intvec,
1303 .extra1 = &zero, 1157 .extra1 = &zero,
1304 }, 1158 },
1305#endif 1159#endif
1306#ifdef CONFIG_NUMA 1160#ifdef CONFIG_NUMA
1307 { 1161 {
1308 .ctl_name = VM_ZONE_RECLAIM_MODE,
1309 .procname = "zone_reclaim_mode", 1162 .procname = "zone_reclaim_mode",
1310 .data = &zone_reclaim_mode, 1163 .data = &zone_reclaim_mode,
1311 .maxlen = sizeof(zone_reclaim_mode), 1164 .maxlen = sizeof(zone_reclaim_mode),
1312 .mode = 0644, 1165 .mode = 0644,
1313 .proc_handler = &proc_dointvec, 1166 .proc_handler = proc_dointvec,
1314 .strategy = &sysctl_intvec,
1315 .extra1 = &zero, 1167 .extra1 = &zero,
1316 }, 1168 },
1317 { 1169 {
1318 .ctl_name = VM_MIN_UNMAPPED,
1319 .procname = "min_unmapped_ratio", 1170 .procname = "min_unmapped_ratio",
1320 .data = &sysctl_min_unmapped_ratio, 1171 .data = &sysctl_min_unmapped_ratio,
1321 .maxlen = sizeof(sysctl_min_unmapped_ratio), 1172 .maxlen = sizeof(sysctl_min_unmapped_ratio),
1322 .mode = 0644, 1173 .mode = 0644,
1323 .proc_handler = &sysctl_min_unmapped_ratio_sysctl_handler, 1174 .proc_handler = sysctl_min_unmapped_ratio_sysctl_handler,
1324 .strategy = &sysctl_intvec,
1325 .extra1 = &zero, 1175 .extra1 = &zero,
1326 .extra2 = &one_hundred, 1176 .extra2 = &one_hundred,
1327 }, 1177 },
1328 { 1178 {
1329 .ctl_name = VM_MIN_SLAB,
1330 .procname = "min_slab_ratio", 1179 .procname = "min_slab_ratio",
1331 .data = &sysctl_min_slab_ratio, 1180 .data = &sysctl_min_slab_ratio,
1332 .maxlen = sizeof(sysctl_min_slab_ratio), 1181 .maxlen = sizeof(sysctl_min_slab_ratio),
1333 .mode = 0644, 1182 .mode = 0644,
1334 .proc_handler = &sysctl_min_slab_ratio_sysctl_handler, 1183 .proc_handler = sysctl_min_slab_ratio_sysctl_handler,
1335 .strategy = &sysctl_intvec,
1336 .extra1 = &zero, 1184 .extra1 = &zero,
1337 .extra2 = &one_hundred, 1185 .extra2 = &one_hundred,
1338 }, 1186 },
1339#endif 1187#endif
1340#ifdef CONFIG_SMP 1188#ifdef CONFIG_SMP
1341 { 1189 {
1342 .ctl_name = CTL_UNNUMBERED,
1343 .procname = "stat_interval", 1190 .procname = "stat_interval",
1344 .data = &sysctl_stat_interval, 1191 .data = &sysctl_stat_interval,
1345 .maxlen = sizeof(sysctl_stat_interval), 1192 .maxlen = sizeof(sysctl_stat_interval),
1346 .mode = 0644, 1193 .mode = 0644,
1347 .proc_handler = &proc_dointvec_jiffies, 1194 .proc_handler = proc_dointvec_jiffies,
1348 .strategy = &sysctl_jiffies,
1349 }, 1195 },
1350#endif 1196#endif
1351 { 1197 {
1352 .ctl_name = CTL_UNNUMBERED,
1353 .procname = "mmap_min_addr", 1198 .procname = "mmap_min_addr",
1354 .data = &dac_mmap_min_addr, 1199 .data = &dac_mmap_min_addr,
1355 .maxlen = sizeof(unsigned long), 1200 .maxlen = sizeof(unsigned long),
1356 .mode = 0644, 1201 .mode = 0644,
1357 .proc_handler = &mmap_min_addr_handler, 1202 .proc_handler = mmap_min_addr_handler,
1358 }, 1203 },
1359#ifdef CONFIG_NUMA 1204#ifdef CONFIG_NUMA
1360 { 1205 {
1361 .ctl_name = CTL_UNNUMBERED,
1362 .procname = "numa_zonelist_order", 1206 .procname = "numa_zonelist_order",
1363 .data = &numa_zonelist_order, 1207 .data = &numa_zonelist_order,
1364 .maxlen = NUMA_ZONELIST_ORDER_LEN, 1208 .maxlen = NUMA_ZONELIST_ORDER_LEN,
1365 .mode = 0644, 1209 .mode = 0644,
1366 .proc_handler = &numa_zonelist_order_handler, 1210 .proc_handler = numa_zonelist_order_handler,
1367 .strategy = &sysctl_string,
1368 }, 1211 },
1369#endif 1212#endif
1370#if (defined(CONFIG_X86_32) && !defined(CONFIG_UML))|| \ 1213#if (defined(CONFIG_X86_32) && !defined(CONFIG_UML))|| \
1371 (defined(CONFIG_SUPERH) && defined(CONFIG_VSYSCALL)) 1214 (defined(CONFIG_SUPERH) && defined(CONFIG_VSYSCALL))
1372 { 1215 {
1373 .ctl_name = VM_VDSO_ENABLED,
1374 .procname = "vdso_enabled", 1216 .procname = "vdso_enabled",
1375 .data = &vdso_enabled, 1217 .data = &vdso_enabled,
1376 .maxlen = sizeof(vdso_enabled), 1218 .maxlen = sizeof(vdso_enabled),
1377 .mode = 0644, 1219 .mode = 0644,
1378 .proc_handler = &proc_dointvec, 1220 .proc_handler = proc_dointvec,
1379 .strategy = &sysctl_intvec,
1380 .extra1 = &zero, 1221 .extra1 = &zero,
1381 }, 1222 },
1382#endif 1223#endif
1383#ifdef CONFIG_HIGHMEM 1224#ifdef CONFIG_HIGHMEM
1384 { 1225 {
1385 .ctl_name = CTL_UNNUMBERED,
1386 .procname = "highmem_is_dirtyable", 1226 .procname = "highmem_is_dirtyable",
1387 .data = &vm_highmem_is_dirtyable, 1227 .data = &vm_highmem_is_dirtyable,
1388 .maxlen = sizeof(vm_highmem_is_dirtyable), 1228 .maxlen = sizeof(vm_highmem_is_dirtyable),
1389 .mode = 0644, 1229 .mode = 0644,
1390 .proc_handler = &proc_dointvec_minmax, 1230 .proc_handler = proc_dointvec_minmax,
1391 .strategy = &sysctl_intvec,
1392 .extra1 = &zero, 1231 .extra1 = &zero,
1393 .extra2 = &one, 1232 .extra2 = &one,
1394 }, 1233 },
1395#endif 1234#endif
1396 { 1235 {
1397 .ctl_name = CTL_UNNUMBERED,
1398 .procname = "scan_unevictable_pages", 1236 .procname = "scan_unevictable_pages",
1399 .data = &scan_unevictable_pages, 1237 .data = &scan_unevictable_pages,
1400 .maxlen = sizeof(scan_unevictable_pages), 1238 .maxlen = sizeof(scan_unevictable_pages),
1401 .mode = 0644, 1239 .mode = 0644,
1402 .proc_handler = &scan_unevictable_handler, 1240 .proc_handler = scan_unevictable_handler,
1403 }, 1241 },
1404#ifdef CONFIG_MEMORY_FAILURE 1242#ifdef CONFIG_MEMORY_FAILURE
1405 { 1243 {
1406 .ctl_name = CTL_UNNUMBERED,
1407 .procname = "memory_failure_early_kill", 1244 .procname = "memory_failure_early_kill",
1408 .data = &sysctl_memory_failure_early_kill, 1245 .data = &sysctl_memory_failure_early_kill,
1409 .maxlen = sizeof(sysctl_memory_failure_early_kill), 1246 .maxlen = sizeof(sysctl_memory_failure_early_kill),
1410 .mode = 0644, 1247 .mode = 0644,
1411 .proc_handler = &proc_dointvec_minmax, 1248 .proc_handler = proc_dointvec_minmax,
1412 .strategy = &sysctl_intvec,
1413 .extra1 = &zero, 1249 .extra1 = &zero,
1414 .extra2 = &one, 1250 .extra2 = &one,
1415 }, 1251 },
1416 { 1252 {
1417 .ctl_name = CTL_UNNUMBERED,
1418 .procname = "memory_failure_recovery", 1253 .procname = "memory_failure_recovery",
1419 .data = &sysctl_memory_failure_recovery, 1254 .data = &sysctl_memory_failure_recovery,
1420 .maxlen = sizeof(sysctl_memory_failure_recovery), 1255 .maxlen = sizeof(sysctl_memory_failure_recovery),
1421 .mode = 0644, 1256 .mode = 0644,
1422 .proc_handler = &proc_dointvec_minmax, 1257 .proc_handler = proc_dointvec_minmax,
1423 .strategy = &sysctl_intvec,
1424 .extra1 = &zero, 1258 .extra1 = &zero,
1425 .extra2 = &one, 1259 .extra2 = &one,
1426 }, 1260 },
@@ -1430,116 +1264,104 @@ static struct ctl_table vm_table[] = {
1430 * NOTE: do not add new entries to this table unless you have read 1264 * NOTE: do not add new entries to this table unless you have read
1431 * Documentation/sysctl/ctl_unnumbered.txt 1265 * Documentation/sysctl/ctl_unnumbered.txt
1432 */ 1266 */
1433 { .ctl_name = 0 } 1267 { }
1434}; 1268};
1435 1269
1436#if defined(CONFIG_BINFMT_MISC) || defined(CONFIG_BINFMT_MISC_MODULE) 1270#if defined(CONFIG_BINFMT_MISC) || defined(CONFIG_BINFMT_MISC_MODULE)
1437static struct ctl_table binfmt_misc_table[] = { 1271static struct ctl_table binfmt_misc_table[] = {
1438 { .ctl_name = 0 } 1272 { }
1439}; 1273};
1440#endif 1274#endif
1441 1275
1442static struct ctl_table fs_table[] = { 1276static struct ctl_table fs_table[] = {
1443 { 1277 {
1444 .ctl_name = FS_NRINODE,
1445 .procname = "inode-nr", 1278 .procname = "inode-nr",
1446 .data = &inodes_stat, 1279 .data = &inodes_stat,
1447 .maxlen = 2*sizeof(int), 1280 .maxlen = 2*sizeof(int),
1448 .mode = 0444, 1281 .mode = 0444,
1449 .proc_handler = &proc_dointvec, 1282 .proc_handler = proc_dointvec,
1450 }, 1283 },
1451 { 1284 {
1452 .ctl_name = FS_STATINODE,
1453 .procname = "inode-state", 1285 .procname = "inode-state",
1454 .data = &inodes_stat, 1286 .data = &inodes_stat,
1455 .maxlen = 7*sizeof(int), 1287 .maxlen = 7*sizeof(int),
1456 .mode = 0444, 1288 .mode = 0444,
1457 .proc_handler = &proc_dointvec, 1289 .proc_handler = proc_dointvec,
1458 }, 1290 },
1459 { 1291 {
1460 .procname = "file-nr", 1292 .procname = "file-nr",
1461 .data = &files_stat, 1293 .data = &files_stat,
1462 .maxlen = 3*sizeof(int), 1294 .maxlen = 3*sizeof(int),
1463 .mode = 0444, 1295 .mode = 0444,
1464 .proc_handler = &proc_nr_files, 1296 .proc_handler = proc_nr_files,
1465 }, 1297 },
1466 { 1298 {
1467 .ctl_name = FS_MAXFILE,
1468 .procname = "file-max", 1299 .procname = "file-max",
1469 .data = &files_stat.max_files, 1300 .data = &files_stat.max_files,
1470 .maxlen = sizeof(int), 1301 .maxlen = sizeof(int),
1471 .mode = 0644, 1302 .mode = 0644,
1472 .proc_handler = &proc_dointvec, 1303 .proc_handler = proc_dointvec,
1473 }, 1304 },
1474 { 1305 {
1475 .ctl_name = CTL_UNNUMBERED,
1476 .procname = "nr_open", 1306 .procname = "nr_open",
1477 .data = &sysctl_nr_open, 1307 .data = &sysctl_nr_open,
1478 .maxlen = sizeof(int), 1308 .maxlen = sizeof(int),
1479 .mode = 0644, 1309 .mode = 0644,
1480 .proc_handler = &proc_dointvec_minmax, 1310 .proc_handler = proc_dointvec_minmax,
1481 .extra1 = &sysctl_nr_open_min, 1311 .extra1 = &sysctl_nr_open_min,
1482 .extra2 = &sysctl_nr_open_max, 1312 .extra2 = &sysctl_nr_open_max,
1483 }, 1313 },
1484 { 1314 {
1485 .ctl_name = FS_DENTRY,
1486 .procname = "dentry-state", 1315 .procname = "dentry-state",
1487 .data = &dentry_stat, 1316 .data = &dentry_stat,
1488 .maxlen = 6*sizeof(int), 1317 .maxlen = 6*sizeof(int),
1489 .mode = 0444, 1318 .mode = 0444,
1490 .proc_handler = &proc_dointvec, 1319 .proc_handler = proc_dointvec,
1491 }, 1320 },
1492 { 1321 {
1493 .ctl_name = FS_OVERFLOWUID,
1494 .procname = "overflowuid", 1322 .procname = "overflowuid",
1495 .data = &fs_overflowuid, 1323 .data = &fs_overflowuid,
1496 .maxlen = sizeof(int), 1324 .maxlen = sizeof(int),
1497 .mode = 0644, 1325 .mode = 0644,
1498 .proc_handler = &proc_dointvec_minmax, 1326 .proc_handler = proc_dointvec_minmax,
1499 .strategy = &sysctl_intvec,
1500 .extra1 = &minolduid, 1327 .extra1 = &minolduid,
1501 .extra2 = &maxolduid, 1328 .extra2 = &maxolduid,
1502 }, 1329 },
1503 { 1330 {
1504 .ctl_name = FS_OVERFLOWGID,
1505 .procname = "overflowgid", 1331 .procname = "overflowgid",
1506 .data = &fs_overflowgid, 1332 .data = &fs_overflowgid,
1507 .maxlen = sizeof(int), 1333 .maxlen = sizeof(int),
1508 .mode = 0644, 1334 .mode = 0644,
1509 .proc_handler = &proc_dointvec_minmax, 1335 .proc_handler = proc_dointvec_minmax,
1510 .strategy = &sysctl_intvec,
1511 .extra1 = &minolduid, 1336 .extra1 = &minolduid,
1512 .extra2 = &maxolduid, 1337 .extra2 = &maxolduid,
1513 }, 1338 },
1514#ifdef CONFIG_FILE_LOCKING 1339#ifdef CONFIG_FILE_LOCKING
1515 { 1340 {
1516 .ctl_name = FS_LEASES,
1517 .procname = "leases-enable", 1341 .procname = "leases-enable",
1518 .data = &leases_enable, 1342 .data = &leases_enable,
1519 .maxlen = sizeof(int), 1343 .maxlen = sizeof(int),
1520 .mode = 0644, 1344 .mode = 0644,
1521 .proc_handler = &proc_dointvec, 1345 .proc_handler = proc_dointvec,
1522 }, 1346 },
1523#endif 1347#endif
1524#ifdef CONFIG_DNOTIFY 1348#ifdef CONFIG_DNOTIFY
1525 { 1349 {
1526 .ctl_name = FS_DIR_NOTIFY,
1527 .procname = "dir-notify-enable", 1350 .procname = "dir-notify-enable",
1528 .data = &dir_notify_enable, 1351 .data = &dir_notify_enable,
1529 .maxlen = sizeof(int), 1352 .maxlen = sizeof(int),
1530 .mode = 0644, 1353 .mode = 0644,
1531 .proc_handler = &proc_dointvec, 1354 .proc_handler = proc_dointvec,
1532 }, 1355 },
1533#endif 1356#endif
1534#ifdef CONFIG_MMU 1357#ifdef CONFIG_MMU
1535#ifdef CONFIG_FILE_LOCKING 1358#ifdef CONFIG_FILE_LOCKING
1536 { 1359 {
1537 .ctl_name = FS_LEASE_TIME,
1538 .procname = "lease-break-time", 1360 .procname = "lease-break-time",
1539 .data = &lease_break_time, 1361 .data = &lease_break_time,
1540 .maxlen = sizeof(int), 1362 .maxlen = sizeof(int),
1541 .mode = 0644, 1363 .mode = 0644,
1542 .proc_handler = &proc_dointvec, 1364 .proc_handler = proc_dointvec,
1543 }, 1365 },
1544#endif 1366#endif
1545#ifdef CONFIG_AIO 1367#ifdef CONFIG_AIO
@@ -1548,19 +1370,18 @@ static struct ctl_table fs_table[] = {
1548 .data = &aio_nr, 1370 .data = &aio_nr,
1549 .maxlen = sizeof(aio_nr), 1371 .maxlen = sizeof(aio_nr),
1550 .mode = 0444, 1372 .mode = 0444,
1551 .proc_handler = &proc_doulongvec_minmax, 1373 .proc_handler = proc_doulongvec_minmax,
1552 }, 1374 },
1553 { 1375 {
1554 .procname = "aio-max-nr", 1376 .procname = "aio-max-nr",
1555 .data = &aio_max_nr, 1377 .data = &aio_max_nr,
1556 .maxlen = sizeof(aio_max_nr), 1378 .maxlen = sizeof(aio_max_nr),
1557 .mode = 0644, 1379 .mode = 0644,
1558 .proc_handler = &proc_doulongvec_minmax, 1380 .proc_handler = proc_doulongvec_minmax,
1559 }, 1381 },
1560#endif /* CONFIG_AIO */ 1382#endif /* CONFIG_AIO */
1561#ifdef CONFIG_INOTIFY_USER 1383#ifdef CONFIG_INOTIFY_USER
1562 { 1384 {
1563 .ctl_name = FS_INOTIFY,
1564 .procname = "inotify", 1385 .procname = "inotify",
1565 .mode = 0555, 1386 .mode = 0555,
1566 .child = inotify_table, 1387 .child = inotify_table,
@@ -1575,19 +1396,16 @@ static struct ctl_table fs_table[] = {
1575#endif 1396#endif
1576#endif 1397#endif
1577 { 1398 {
1578 .ctl_name = KERN_SETUID_DUMPABLE,
1579 .procname = "suid_dumpable", 1399 .procname = "suid_dumpable",
1580 .data = &suid_dumpable, 1400 .data = &suid_dumpable,
1581 .maxlen = sizeof(int), 1401 .maxlen = sizeof(int),
1582 .mode = 0644, 1402 .mode = 0644,
1583 .proc_handler = &proc_dointvec_minmax, 1403 .proc_handler = proc_dointvec_minmax,
1584 .strategy = &sysctl_intvec,
1585 .extra1 = &zero, 1404 .extra1 = &zero,
1586 .extra2 = &two, 1405 .extra2 = &two,
1587 }, 1406 },
1588#if defined(CONFIG_BINFMT_MISC) || defined(CONFIG_BINFMT_MISC_MODULE) 1407#if defined(CONFIG_BINFMT_MISC) || defined(CONFIG_BINFMT_MISC_MODULE)
1589 { 1408 {
1590 .ctl_name = CTL_UNNUMBERED,
1591 .procname = "binfmt_misc", 1409 .procname = "binfmt_misc",
1592 .mode = 0555, 1410 .mode = 0555,
1593 .child = binfmt_misc_table, 1411 .child = binfmt_misc_table,
@@ -1597,13 +1415,12 @@ static struct ctl_table fs_table[] = {
1597 * NOTE: do not add new entries to this table unless you have read 1415 * NOTE: do not add new entries to this table unless you have read
1598 * Documentation/sysctl/ctl_unnumbered.txt 1416 * Documentation/sysctl/ctl_unnumbered.txt
1599 */ 1417 */
1600 { .ctl_name = 0 } 1418 { }
1601}; 1419};
1602 1420
1603static struct ctl_table debug_table[] = { 1421static struct ctl_table debug_table[] = {
1604#if defined(CONFIG_X86) || defined(CONFIG_PPC) 1422#if defined(CONFIG_X86) || defined(CONFIG_PPC)
1605 { 1423 {
1606 .ctl_name = CTL_UNNUMBERED,
1607 .procname = "exception-trace", 1424 .procname = "exception-trace",
1608 .data = &show_unhandled_signals, 1425 .data = &show_unhandled_signals,
1609 .maxlen = sizeof(int), 1426 .maxlen = sizeof(int),
@@ -1611,11 +1428,11 @@ static struct ctl_table debug_table[] = {
1611 .proc_handler = proc_dointvec 1428 .proc_handler = proc_dointvec
1612 }, 1429 },
1613#endif 1430#endif
1614 { .ctl_name = 0 } 1431 { }
1615}; 1432};
1616 1433
1617static struct ctl_table dev_table[] = { 1434static struct ctl_table dev_table[] = {
1618 { .ctl_name = 0 } 1435 { }
1619}; 1436};
1620 1437
1621static DEFINE_SPINLOCK(sysctl_lock); 1438static DEFINE_SPINLOCK(sysctl_lock);
@@ -1769,122 +1586,6 @@ void register_sysctl_root(struct ctl_table_root *root)
1769 spin_unlock(&sysctl_lock); 1586 spin_unlock(&sysctl_lock);
1770} 1587}
1771 1588
1772#ifdef CONFIG_SYSCTL_SYSCALL
1773/* Perform the actual read/write of a sysctl table entry. */
1774static int do_sysctl_strategy(struct ctl_table_root *root,
1775 struct ctl_table *table,
1776 void __user *oldval, size_t __user *oldlenp,
1777 void __user *newval, size_t newlen)
1778{
1779 int op = 0, rc;
1780
1781 if (oldval)
1782 op |= MAY_READ;
1783 if (newval)
1784 op |= MAY_WRITE;
1785 if (sysctl_perm(root, table, op))
1786 return -EPERM;
1787
1788 if (table->strategy) {
1789 rc = table->strategy(table, oldval, oldlenp, newval, newlen);
1790 if (rc < 0)
1791 return rc;
1792 if (rc > 0)
1793 return 0;
1794 }
1795
1796 /* If there is no strategy routine, or if the strategy returns
1797 * zero, proceed with automatic r/w */
1798 if (table->data && table->maxlen) {
1799 rc = sysctl_data(table, oldval, oldlenp, newval, newlen);
1800 if (rc < 0)
1801 return rc;
1802 }
1803 return 0;
1804}
1805
1806static int parse_table(int __user *name, int nlen,
1807 void __user *oldval, size_t __user *oldlenp,
1808 void __user *newval, size_t newlen,
1809 struct ctl_table_root *root,
1810 struct ctl_table *table)
1811{
1812 int n;
1813repeat:
1814 if (!nlen)
1815 return -ENOTDIR;
1816 if (get_user(n, name))
1817 return -EFAULT;
1818 for ( ; table->ctl_name || table->procname; table++) {
1819 if (!table->ctl_name)
1820 continue;
1821 if (n == table->ctl_name) {
1822 int error;
1823 if (table->child) {
1824 if (sysctl_perm(root, table, MAY_EXEC))
1825 return -EPERM;
1826 name++;
1827 nlen--;
1828 table = table->child;
1829 goto repeat;
1830 }
1831 error = do_sysctl_strategy(root, table,
1832 oldval, oldlenp,
1833 newval, newlen);
1834 return error;
1835 }
1836 }
1837 return -ENOTDIR;
1838}
1839
1840int do_sysctl(int __user *name, int nlen, void __user *oldval, size_t __user *oldlenp,
1841 void __user *newval, size_t newlen)
1842{
1843 struct ctl_table_header *head;
1844 int error = -ENOTDIR;
1845
1846 if (nlen <= 0 || nlen >= CTL_MAXNAME)
1847 return -ENOTDIR;
1848 if (oldval) {
1849 int old_len;
1850 if (!oldlenp || get_user(old_len, oldlenp))
1851 return -EFAULT;
1852 }
1853
1854 for (head = sysctl_head_next(NULL); head;
1855 head = sysctl_head_next(head)) {
1856 error = parse_table(name, nlen, oldval, oldlenp,
1857 newval, newlen,
1858 head->root, head->ctl_table);
1859 if (error != -ENOTDIR) {
1860 sysctl_head_finish(head);
1861 break;
1862 }
1863 }
1864 return error;
1865}
1866
1867SYSCALL_DEFINE1(sysctl, struct __sysctl_args __user *, args)
1868{
1869 struct __sysctl_args tmp;
1870 int error;
1871
1872 if (copy_from_user(&tmp, args, sizeof(tmp)))
1873 return -EFAULT;
1874
1875 error = deprecated_sysctl_warning(&tmp);
1876 if (error)
1877 goto out;
1878
1879 lock_kernel();
1880 error = do_sysctl(tmp.name, tmp.nlen, tmp.oldval, tmp.oldlenp,
1881 tmp.newval, tmp.newlen);
1882 unlock_kernel();
1883out:
1884 return error;
1885}
1886#endif /* CONFIG_SYSCTL_SYSCALL */
1887
1888/* 1589/*
1889 * sysctl_perm does NOT grant the superuser all rights automatically, because 1590 * sysctl_perm does NOT grant the superuser all rights automatically, because
1890 * some sysctl variables are readonly even to root. 1591 * some sysctl variables are readonly even to root.
@@ -1920,7 +1621,7 @@ int sysctl_perm(struct ctl_table_root *root, struct ctl_table *table, int op)
1920 1621
1921static void sysctl_set_parent(struct ctl_table *parent, struct ctl_table *table) 1622static void sysctl_set_parent(struct ctl_table *parent, struct ctl_table *table)
1922{ 1623{
1923 for (; table->ctl_name || table->procname; table++) { 1624 for (; table->procname; table++) {
1924 table->parent = parent; 1625 table->parent = parent;
1925 if (table->child) 1626 if (table->child)
1926 sysctl_set_parent(table, table->child); 1627 sysctl_set_parent(table, table->child);
@@ -1952,11 +1653,11 @@ static struct ctl_table *is_branch_in(struct ctl_table *branch,
1952 return NULL; 1653 return NULL;
1953 1654
1954 /* ... and nothing else */ 1655 /* ... and nothing else */
1955 if (branch[1].procname || branch[1].ctl_name) 1656 if (branch[1].procname)
1956 return NULL; 1657 return NULL;
1957 1658
1958 /* table should contain subdirectory with the same name */ 1659 /* table should contain subdirectory with the same name */
1959 for (p = table; p->procname || p->ctl_name; p++) { 1660 for (p = table; p->procname; p++) {
1960 if (!p->child) 1661 if (!p->child)
1961 continue; 1662 continue;
1962 if (p->procname && strcmp(p->procname, s) == 0) 1663 if (p->procname && strcmp(p->procname, s) == 0)
@@ -2001,9 +1702,6 @@ static void try_attach(struct ctl_table_header *p, struct ctl_table_header *q)
2001 * 1702 *
2002 * The members of the &struct ctl_table structure are used as follows: 1703 * The members of the &struct ctl_table structure are used as follows:
2003 * 1704 *
2004 * ctl_name - This is the numeric sysctl value used by sysctl(2). The number
2005 * must be unique within that level of sysctl
2006 *
2007 * procname - the name of the sysctl file under /proc/sys. Set to %NULL to not 1705 * procname - the name of the sysctl file under /proc/sys. Set to %NULL to not
2008 * enter a sysctl file 1706 * enter a sysctl file
2009 * 1707 *
@@ -2018,8 +1716,6 @@ static void try_attach(struct ctl_table_header *p, struct ctl_table_header *q)
2018 * 1716 *
2019 * proc_handler - the text handler routine (described below) 1717 * proc_handler - the text handler routine (described below)
2020 * 1718 *
2021 * strategy - the strategy routine (described below)
2022 *
2023 * de - for internal use by the sysctl routines 1719 * de - for internal use by the sysctl routines
2024 * 1720 *
2025 * extra1, extra2 - extra pointers usable by the proc handler routines 1721 * extra1, extra2 - extra pointers usable by the proc handler routines
@@ -2032,19 +1728,6 @@ static void try_attach(struct ctl_table_header *p, struct ctl_table_header *q)
2032 * struct enable minimal validation of the values being written to be 1728 * struct enable minimal validation of the values being written to be
2033 * performed, and the mode field allows minimal authentication. 1729 * performed, and the mode field allows minimal authentication.
2034 * 1730 *
2035 * More sophisticated management can be enabled by the provision of a
2036 * strategy routine with the table entry. This will be called before
2037 * any automatic read or write of the data is performed.
2038 *
2039 * The strategy routine may return
2040 *
2041 * < 0 - Error occurred (error is passed to user process)
2042 *
2043 * 0 - OK - proceed with automatic read or write.
2044 *
2045 * > 0 - OK - read or write has been done by the strategy routine, so
2046 * return immediately.
2047 *
2048 * There must be a proc_handler routine for any terminal nodes 1731 * There must be a proc_handler routine for any terminal nodes
2049 * mirrored under /proc/sys (non-terminals are handled by a built-in 1732 * mirrored under /proc/sys (non-terminals are handled by a built-in
2050 * directory handler). Several default handlers are available to 1733 * directory handler). Several default handlers are available to
@@ -2071,13 +1754,13 @@ struct ctl_table_header *__register_sysctl_paths(
2071 struct ctl_table_set *set; 1754 struct ctl_table_set *set;
2072 1755
2073 /* Count the path components */ 1756 /* Count the path components */
2074 for (npath = 0; path[npath].ctl_name || path[npath].procname; ++npath) 1757 for (npath = 0; path[npath].procname; ++npath)
2075 ; 1758 ;
2076 1759
2077 /* 1760 /*
2078 * For each path component, allocate a 2-element ctl_table array. 1761 * For each path component, allocate a 2-element ctl_table array.
2079 * The first array element will be filled with the sysctl entry 1762 * The first array element will be filled with the sysctl entry
2080 * for this, the second will be the sentinel (ctl_name == 0). 1763 * for this, the second will be the sentinel (procname == 0).
2081 * 1764 *
2082 * We allocate everything in one go so that we don't have to 1765 * We allocate everything in one go so that we don't have to
2083 * worry about freeing additional memory in unregister_sysctl_table. 1766 * worry about freeing additional memory in unregister_sysctl_table.
@@ -2094,7 +1777,6 @@ struct ctl_table_header *__register_sysctl_paths(
2094 for (n = 0; n < npath; ++n, ++path) { 1777 for (n = 0; n < npath; ++n, ++path) {
2095 /* Copy the procname */ 1778 /* Copy the procname */
2096 new->procname = path->procname; 1779 new->procname = path->procname;
2097 new->ctl_name = path->ctl_name;
2098 new->mode = 0555; 1780 new->mode = 0555;
2099 1781
2100 *prevp = new; 1782 *prevp = new;
@@ -2956,286 +2638,6 @@ int proc_doulongvec_ms_jiffies_minmax(struct ctl_table *table, int write,
2956 2638
2957#endif /* CONFIG_PROC_FS */ 2639#endif /* CONFIG_PROC_FS */
2958 2640
2959
2960#ifdef CONFIG_SYSCTL_SYSCALL
2961/*
2962 * General sysctl support routines
2963 */
2964
2965/* The generic sysctl data routine (used if no strategy routine supplied) */
2966int sysctl_data(struct ctl_table *table,
2967 void __user *oldval, size_t __user *oldlenp,
2968 void __user *newval, size_t newlen)
2969{
2970 size_t len;
2971
2972 /* Get out of I don't have a variable */
2973 if (!table->data || !table->maxlen)
2974 return -ENOTDIR;
2975
2976 if (oldval && oldlenp) {
2977 if (get_user(len, oldlenp))
2978 return -EFAULT;
2979 if (len) {
2980 if (len > table->maxlen)
2981 len = table->maxlen;
2982 if (copy_to_user(oldval, table->data, len))
2983 return -EFAULT;
2984 if (put_user(len, oldlenp))
2985 return -EFAULT;
2986 }
2987 }
2988
2989 if (newval && newlen) {
2990 if (newlen > table->maxlen)
2991 newlen = table->maxlen;
2992
2993 if (copy_from_user(table->data, newval, newlen))
2994 return -EFAULT;
2995 }
2996 return 1;
2997}
2998
2999/* The generic string strategy routine: */
3000int sysctl_string(struct ctl_table *table,
3001 void __user *oldval, size_t __user *oldlenp,
3002 void __user *newval, size_t newlen)
3003{
3004 if (!table->data || !table->maxlen)
3005 return -ENOTDIR;
3006
3007 if (oldval && oldlenp) {
3008 size_t bufsize;
3009 if (get_user(bufsize, oldlenp))
3010 return -EFAULT;
3011 if (bufsize) {
3012 size_t len = strlen(table->data), copied;
3013
3014 /* This shouldn't trigger for a well-formed sysctl */
3015 if (len > table->maxlen)
3016 len = table->maxlen;
3017
3018 /* Copy up to a max of bufsize-1 bytes of the string */
3019 copied = (len >= bufsize) ? bufsize - 1 : len;
3020
3021 if (copy_to_user(oldval, table->data, copied) ||
3022 put_user(0, (char __user *)(oldval + copied)))
3023 return -EFAULT;
3024 if (put_user(len, oldlenp))
3025 return -EFAULT;
3026 }
3027 }
3028 if (newval && newlen) {
3029 size_t len = newlen;
3030 if (len > table->maxlen)
3031 len = table->maxlen;
3032 if(copy_from_user(table->data, newval, len))
3033 return -EFAULT;
3034 if (len == table->maxlen)
3035 len--;
3036 ((char *) table->data)[len] = 0;
3037 }
3038 return 1;
3039}
3040
3041/*
3042 * This function makes sure that all of the integers in the vector
3043 * are between the minimum and maximum values given in the arrays
3044 * table->extra1 and table->extra2, respectively.
3045 */
3046int sysctl_intvec(struct ctl_table *table,
3047 void __user *oldval, size_t __user *oldlenp,
3048 void __user *newval, size_t newlen)
3049{
3050
3051 if (newval && newlen) {
3052 int __user *vec = (int __user *) newval;
3053 int *min = (int *) table->extra1;
3054 int *max = (int *) table->extra2;
3055 size_t length;
3056 int i;
3057
3058 if (newlen % sizeof(int) != 0)
3059 return -EINVAL;
3060
3061 if (!table->extra1 && !table->extra2)
3062 return 0;
3063
3064 if (newlen > table->maxlen)
3065 newlen = table->maxlen;
3066 length = newlen / sizeof(int);
3067
3068 for (i = 0; i < length; i++) {
3069 int value;
3070 if (get_user(value, vec + i))
3071 return -EFAULT;
3072 if (min && value < min[i])
3073 return -EINVAL;
3074 if (max && value > max[i])
3075 return -EINVAL;
3076 }
3077 }
3078 return 0;
3079}
3080
3081/* Strategy function to convert jiffies to seconds */
3082int sysctl_jiffies(struct ctl_table *table,
3083 void __user *oldval, size_t __user *oldlenp,
3084 void __user *newval, size_t newlen)
3085{
3086 if (oldval && oldlenp) {
3087 size_t olen;
3088
3089 if (get_user(olen, oldlenp))
3090 return -EFAULT;
3091 if (olen) {
3092 int val;
3093
3094 if (olen < sizeof(int))
3095 return -EINVAL;
3096
3097 val = *(int *)(table->data) / HZ;
3098 if (put_user(val, (int __user *)oldval))
3099 return -EFAULT;
3100 if (put_user(sizeof(int), oldlenp))
3101 return -EFAULT;
3102 }
3103 }
3104 if (newval && newlen) {
3105 int new;
3106 if (newlen != sizeof(int))
3107 return -EINVAL;
3108 if (get_user(new, (int __user *)newval))
3109 return -EFAULT;
3110 *(int *)(table->data) = new*HZ;
3111 }
3112 return 1;
3113}
3114
3115/* Strategy function to convert jiffies to seconds */
3116int sysctl_ms_jiffies(struct ctl_table *table,
3117 void __user *oldval, size_t __user *oldlenp,
3118 void __user *newval, size_t newlen)
3119{
3120 if (oldval && oldlenp) {
3121 size_t olen;
3122
3123 if (get_user(olen, oldlenp))
3124 return -EFAULT;
3125 if (olen) {
3126 int val;
3127
3128 if (olen < sizeof(int))
3129 return -EINVAL;
3130
3131 val = jiffies_to_msecs(*(int *)(table->data));
3132 if (put_user(val, (int __user *)oldval))
3133 return -EFAULT;
3134 if (put_user(sizeof(int), oldlenp))
3135 return -EFAULT;
3136 }
3137 }
3138 if (newval && newlen) {
3139 int new;
3140 if (newlen != sizeof(int))
3141 return -EINVAL;
3142 if (get_user(new, (int __user *)newval))
3143 return -EFAULT;
3144 *(int *)(table->data) = msecs_to_jiffies(new);
3145 }
3146 return 1;
3147}
3148
3149
3150
3151#else /* CONFIG_SYSCTL_SYSCALL */
3152
3153
3154SYSCALL_DEFINE1(sysctl, struct __sysctl_args __user *, args)
3155{
3156 struct __sysctl_args tmp;
3157 int error;
3158
3159 if (copy_from_user(&tmp, args, sizeof(tmp)))
3160 return -EFAULT;
3161
3162 error = deprecated_sysctl_warning(&tmp);
3163
3164 /* If no error reading the parameters then just -ENOSYS ... */
3165 if (!error)
3166 error = -ENOSYS;
3167
3168 return error;
3169}
3170
3171int sysctl_data(struct ctl_table *table,
3172 void __user *oldval, size_t __user *oldlenp,
3173 void __user *newval, size_t newlen)
3174{
3175 return -ENOSYS;
3176}
3177
3178int sysctl_string(struct ctl_table *table,
3179 void __user *oldval, size_t __user *oldlenp,
3180 void __user *newval, size_t newlen)
3181{
3182 return -ENOSYS;
3183}
3184
3185int sysctl_intvec(struct ctl_table *table,
3186 void __user *oldval, size_t __user *oldlenp,
3187 void __user *newval, size_t newlen)
3188{
3189 return -ENOSYS;
3190}
3191
3192int sysctl_jiffies(struct ctl_table *table,
3193 void __user *oldval, size_t __user *oldlenp,
3194 void __user *newval, size_t newlen)
3195{
3196 return -ENOSYS;
3197}
3198
3199int sysctl_ms_jiffies(struct ctl_table *table,
3200 void __user *oldval, size_t __user *oldlenp,
3201 void __user *newval, size_t newlen)
3202{
3203 return -ENOSYS;
3204}
3205
3206#endif /* CONFIG_SYSCTL_SYSCALL */
3207
3208static int deprecated_sysctl_warning(struct __sysctl_args *args)
3209{
3210 static int msg_count;
3211 int name[CTL_MAXNAME];
3212 int i;
3213
3214 /* Check args->nlen. */
3215 if (args->nlen < 0 || args->nlen > CTL_MAXNAME)
3216 return -ENOTDIR;
3217
3218 /* Read in the sysctl name for better debug message logging */
3219 for (i = 0; i < args->nlen; i++)
3220 if (get_user(name[i], args->name + i))
3221 return -EFAULT;
3222
3223 /* Ignore accesses to kernel.version */
3224 if ((args->nlen == 2) && (name[0] == CTL_KERN) && (name[1] == KERN_VERSION))
3225 return 0;
3226
3227 if (msg_count < 5) {
3228 msg_count++;
3229 printk(KERN_INFO
3230 "warning: process `%s' used the deprecated sysctl "
3231 "system call with ", current->comm);
3232 for (i = 0; i < args->nlen; i++)
3233 printk("%d.", name[i]);
3234 printk("\n");
3235 }
3236 return 0;
3237}
3238
3239/* 2641/*
3240 * No sense putting this after each symbol definition, twice, 2642 * No sense putting this after each symbol definition, twice,
3241 * exception granted :-) 2643 * exception granted :-)
@@ -3250,9 +2652,4 @@ EXPORT_SYMBOL(proc_doulongvec_minmax);
3250EXPORT_SYMBOL(proc_doulongvec_ms_jiffies_minmax); 2652EXPORT_SYMBOL(proc_doulongvec_ms_jiffies_minmax);
3251EXPORT_SYMBOL(register_sysctl_table); 2653EXPORT_SYMBOL(register_sysctl_table);
3252EXPORT_SYMBOL(register_sysctl_paths); 2654EXPORT_SYMBOL(register_sysctl_paths);
3253EXPORT_SYMBOL(sysctl_intvec);
3254EXPORT_SYMBOL(sysctl_jiffies);
3255EXPORT_SYMBOL(sysctl_ms_jiffies);
3256EXPORT_SYMBOL(sysctl_string);
3257EXPORT_SYMBOL(sysctl_data);
3258EXPORT_SYMBOL(unregister_sysctl_table); 2655EXPORT_SYMBOL(unregister_sysctl_table);
diff --git a/kernel/sysctl_binary.c b/kernel/sysctl_binary.c
new file mode 100644
index 000000000000..b75dbf40f573
--- /dev/null
+++ b/kernel/sysctl_binary.c
@@ -0,0 +1,1507 @@
1#include <linux/stat.h>
2#include <linux/sysctl.h>
3#include "../fs/xfs/linux-2.6/xfs_sysctl.h"
4#include <linux/sunrpc/debug.h>
5#include <linux/string.h>
6#include <net/ip_vs.h>
7#include <linux/syscalls.h>
8#include <linux/namei.h>
9#include <linux/mount.h>
10#include <linux/fs.h>
11#include <linux/nsproxy.h>
12#include <linux/pid_namespace.h>
13#include <linux/file.h>
14#include <linux/ctype.h>
15#include <linux/netdevice.h>
16
17#ifdef CONFIG_SYSCTL_SYSCALL
18
19struct bin_table;
20typedef ssize_t bin_convert_t(struct file *file,
21 void __user *oldval, size_t oldlen, void __user *newval, size_t newlen);
22
23static bin_convert_t bin_dir;
24static bin_convert_t bin_string;
25static bin_convert_t bin_intvec;
26static bin_convert_t bin_ulongvec;
27static bin_convert_t bin_uuid;
28static bin_convert_t bin_dn_node_address;
29
30#define CTL_DIR bin_dir
31#define CTL_STR bin_string
32#define CTL_INT bin_intvec
33#define CTL_ULONG bin_ulongvec
34#define CTL_UUID bin_uuid
35#define CTL_DNADR bin_dn_node_address
36
37#define BUFSZ 256
38
39struct bin_table {
40 bin_convert_t *convert;
41 int ctl_name;
42 const char *procname;
43 const struct bin_table *child;
44};
45
46static const struct bin_table bin_random_table[] = {
47 { CTL_INT, RANDOM_POOLSIZE, "poolsize" },
48 { CTL_INT, RANDOM_ENTROPY_COUNT, "entropy_avail" },
49 { CTL_INT, RANDOM_READ_THRESH, "read_wakeup_threshold" },
50 { CTL_INT, RANDOM_WRITE_THRESH, "write_wakeup_threshold" },
51 { CTL_UUID, RANDOM_BOOT_ID, "boot_id" },
52 { CTL_UUID, RANDOM_UUID, "uuid" },
53 {}
54};
55
56static const struct bin_table bin_pty_table[] = {
57 { CTL_INT, PTY_MAX, "max" },
58 { CTL_INT, PTY_NR, "nr" },
59 {}
60};
61
62static const struct bin_table bin_kern_table[] = {
63 { CTL_STR, KERN_OSTYPE, "ostype" },
64 { CTL_STR, KERN_OSRELEASE, "osrelease" },
65 /* KERN_OSREV not used */
66 { CTL_STR, KERN_VERSION, "version" },
67 /* KERN_SECUREMASK not used */
68 /* KERN_PROF not used */
69 { CTL_STR, KERN_NODENAME, "hostname" },
70 { CTL_STR, KERN_DOMAINNAME, "domainname" },
71
72 { CTL_INT, KERN_PANIC, "panic" },
73 { CTL_INT, KERN_REALROOTDEV, "real-root-dev" },
74
75 { CTL_STR, KERN_SPARC_REBOOT, "reboot-cmd" },
76 { CTL_INT, KERN_CTLALTDEL, "ctrl-alt-del" },
77 { CTL_INT, KERN_PRINTK, "printk" },
78
79 /* KERN_NAMETRANS not used */
80 /* KERN_PPC_HTABRECLAIM not used */
81 /* KERN_PPC_ZEROPAGED not used */
82 { CTL_INT, KERN_PPC_POWERSAVE_NAP, "powersave-nap" },
83
84 { CTL_STR, KERN_MODPROBE, "modprobe" },
85 { CTL_INT, KERN_SG_BIG_BUFF, "sg-big-buff" },
86 { CTL_INT, KERN_ACCT, "acct" },
87 /* KERN_PPC_L2CR "l2cr" no longer used */
88
89 /* KERN_RTSIGNR not used */
90 /* KERN_RTSIGMAX not used */
91
92 { CTL_ULONG, KERN_SHMMAX, "shmmax" },
93 { CTL_INT, KERN_MSGMAX, "msgmax" },
94 { CTL_INT, KERN_MSGMNB, "msgmnb" },
95 /* KERN_MSGPOOL not used*/
96 { CTL_INT, KERN_SYSRQ, "sysrq" },
97 { CTL_INT, KERN_MAX_THREADS, "threads-max" },
98 { CTL_DIR, KERN_RANDOM, "random", bin_random_table },
99 { CTL_ULONG, KERN_SHMALL, "shmall" },
100 { CTL_INT, KERN_MSGMNI, "msgmni" },
101 { CTL_INT, KERN_SEM, "sem" },
102 { CTL_INT, KERN_SPARC_STOP_A, "stop-a" },
103 { CTL_INT, KERN_SHMMNI, "shmmni" },
104
105 { CTL_INT, KERN_OVERFLOWUID, "overflowuid" },
106 { CTL_INT, KERN_OVERFLOWGID, "overflowgid" },
107
108 { CTL_STR, KERN_HOTPLUG, "hotplug", },
109 { CTL_INT, KERN_IEEE_EMULATION_WARNINGS, "ieee_emulation_warnings" },
110
111 { CTL_INT, KERN_S390_USER_DEBUG_LOGGING, "userprocess_debug" },
112 { CTL_INT, KERN_CORE_USES_PID, "core_uses_pid" },
113 /* KERN_TAINTED "tainted" no longer used */
114 { CTL_INT, KERN_CADPID, "cad_pid" },
115 { CTL_INT, KERN_PIDMAX, "pid_max" },
116 { CTL_STR, KERN_CORE_PATTERN, "core_pattern" },
117 { CTL_INT, KERN_PANIC_ON_OOPS, "panic_on_oops" },
118 { CTL_INT, KERN_HPPA_PWRSW, "soft-power" },
119 { CTL_INT, KERN_HPPA_UNALIGNED, "unaligned-trap" },
120
121 { CTL_INT, KERN_PRINTK_RATELIMIT, "printk_ratelimit" },
122 { CTL_INT, KERN_PRINTK_RATELIMIT_BURST, "printk_ratelimit_burst" },
123
124 { CTL_DIR, KERN_PTY, "pty", bin_pty_table },
125 { CTL_INT, KERN_NGROUPS_MAX, "ngroups_max" },
126 { CTL_INT, KERN_SPARC_SCONS_PWROFF, "scons-poweroff" },
127 /* KERN_HZ_TIMER "hz_timer" no longer used */
128 { CTL_INT, KERN_UNKNOWN_NMI_PANIC, "unknown_nmi_panic" },
129 { CTL_INT, KERN_BOOTLOADER_TYPE, "bootloader_type" },
130 { CTL_INT, KERN_RANDOMIZE, "randomize_va_space" },
131
132 { CTL_INT, KERN_SPIN_RETRY, "spin_retry" },
133 /* KERN_ACPI_VIDEO_FLAGS "acpi_video_flags" no longer used */
134 { CTL_INT, KERN_IA64_UNALIGNED, "ignore-unaligned-usertrap" },
135 { CTL_INT, KERN_COMPAT_LOG, "compat-log" },
136 { CTL_INT, KERN_MAX_LOCK_DEPTH, "max_lock_depth" },
137 { CTL_INT, KERN_NMI_WATCHDOG, "nmi_watchdog" },
138 { CTL_INT, KERN_PANIC_ON_NMI, "panic_on_unrecovered_nmi" },
139 {}
140};
141
142static const struct bin_table bin_vm_table[] = {
143 { CTL_INT, VM_OVERCOMMIT_MEMORY, "overcommit_memory" },
144 { CTL_INT, VM_PAGE_CLUSTER, "page-cluster" },
145 { CTL_INT, VM_DIRTY_BACKGROUND, "dirty_background_ratio" },
146 { CTL_INT, VM_DIRTY_RATIO, "dirty_ratio" },
147 /* VM_DIRTY_WB_CS "dirty_writeback_centisecs" no longer used */
148 /* VM_DIRTY_EXPIRE_CS "dirty_expire_centisecs" no longer used */
149 { CTL_INT, VM_NR_PDFLUSH_THREADS, "nr_pdflush_threads" },
150 { CTL_INT, VM_OVERCOMMIT_RATIO, "overcommit_ratio" },
151 /* VM_PAGEBUF unused */
152 /* VM_HUGETLB_PAGES "nr_hugepages" no longer used */
153 { CTL_INT, VM_SWAPPINESS, "swappiness" },
154 { CTL_INT, VM_LOWMEM_RESERVE_RATIO, "lowmem_reserve_ratio" },
155 { CTL_INT, VM_MIN_FREE_KBYTES, "min_free_kbytes" },
156 { CTL_INT, VM_MAX_MAP_COUNT, "max_map_count" },
157 { CTL_INT, VM_LAPTOP_MODE, "laptop_mode" },
158 { CTL_INT, VM_BLOCK_DUMP, "block_dump" },
159 { CTL_INT, VM_HUGETLB_GROUP, "hugetlb_shm_group" },
160 { CTL_INT, VM_VFS_CACHE_PRESSURE, "vfs_cache_pressure" },
161 { CTL_INT, VM_LEGACY_VA_LAYOUT, "legacy_va_layout" },
162 /* VM_SWAP_TOKEN_TIMEOUT unused */
163 { CTL_INT, VM_DROP_PAGECACHE, "drop_caches" },
164 { CTL_INT, VM_PERCPU_PAGELIST_FRACTION, "percpu_pagelist_fraction" },
165 { CTL_INT, VM_ZONE_RECLAIM_MODE, "zone_reclaim_mode" },
166 { CTL_INT, VM_MIN_UNMAPPED, "min_unmapped_ratio" },
167 { CTL_INT, VM_PANIC_ON_OOM, "panic_on_oom" },
168 { CTL_INT, VM_VDSO_ENABLED, "vdso_enabled" },
169 { CTL_INT, VM_MIN_SLAB, "min_slab_ratio" },
170
171 {}
172};
173
174static const struct bin_table bin_net_core_table[] = {
175 { CTL_INT, NET_CORE_WMEM_MAX, "wmem_max" },
176 { CTL_INT, NET_CORE_RMEM_MAX, "rmem_max" },
177 { CTL_INT, NET_CORE_WMEM_DEFAULT, "wmem_default" },
178 { CTL_INT, NET_CORE_RMEM_DEFAULT, "rmem_default" },
179 /* NET_CORE_DESTROY_DELAY unused */
180 { CTL_INT, NET_CORE_MAX_BACKLOG, "netdev_max_backlog" },
181 /* NET_CORE_FASTROUTE unused */
182 { CTL_INT, NET_CORE_MSG_COST, "message_cost" },
183 { CTL_INT, NET_CORE_MSG_BURST, "message_burst" },
184 { CTL_INT, NET_CORE_OPTMEM_MAX, "optmem_max" },
185 /* NET_CORE_HOT_LIST_LENGTH unused */
186 /* NET_CORE_DIVERT_VERSION unused */
187 /* NET_CORE_NO_CONG_THRESH unused */
188 /* NET_CORE_NO_CONG unused */
189 /* NET_CORE_LO_CONG unused */
190 /* NET_CORE_MOD_CONG unused */
191 { CTL_INT, NET_CORE_DEV_WEIGHT, "dev_weight" },
192 { CTL_INT, NET_CORE_SOMAXCONN, "somaxconn" },
193 { CTL_INT, NET_CORE_BUDGET, "netdev_budget" },
194 { CTL_INT, NET_CORE_AEVENT_ETIME, "xfrm_aevent_etime" },
195 { CTL_INT, NET_CORE_AEVENT_RSEQTH, "xfrm_aevent_rseqth" },
196 { CTL_INT, NET_CORE_WARNINGS, "warnings" },
197 {},
198};
199
200static const struct bin_table bin_net_unix_table[] = {
201 /* NET_UNIX_DESTROY_DELAY unused */
202 /* NET_UNIX_DELETE_DELAY unused */
203 { CTL_INT, NET_UNIX_MAX_DGRAM_QLEN, "max_dgram_qlen" },
204 {}
205};
206
207static const struct bin_table bin_net_ipv4_route_table[] = {
208 { CTL_INT, NET_IPV4_ROUTE_FLUSH, "flush" },
209 /* NET_IPV4_ROUTE_MIN_DELAY "min_delay" no longer used */
210 /* NET_IPV4_ROUTE_MAX_DELAY "max_delay" no longer used */
211 { CTL_INT, NET_IPV4_ROUTE_GC_THRESH, "gc_thresh" },
212 { CTL_INT, NET_IPV4_ROUTE_MAX_SIZE, "max_size" },
213 { CTL_INT, NET_IPV4_ROUTE_GC_MIN_INTERVAL, "gc_min_interval" },
214 { CTL_INT, NET_IPV4_ROUTE_GC_MIN_INTERVAL_MS, "gc_min_interval_ms" },
215 { CTL_INT, NET_IPV4_ROUTE_GC_TIMEOUT, "gc_timeout" },
216 { CTL_INT, NET_IPV4_ROUTE_GC_INTERVAL, "gc_interval" },
217 { CTL_INT, NET_IPV4_ROUTE_REDIRECT_LOAD, "redirect_load" },
218 { CTL_INT, NET_IPV4_ROUTE_REDIRECT_NUMBER, "redirect_number" },
219 { CTL_INT, NET_IPV4_ROUTE_REDIRECT_SILENCE, "redirect_silence" },
220 { CTL_INT, NET_IPV4_ROUTE_ERROR_COST, "error_cost" },
221 { CTL_INT, NET_IPV4_ROUTE_ERROR_BURST, "error_burst" },
222 { CTL_INT, NET_IPV4_ROUTE_GC_ELASTICITY, "gc_elasticity" },
223 { CTL_INT, NET_IPV4_ROUTE_MTU_EXPIRES, "mtu_expires" },
224 { CTL_INT, NET_IPV4_ROUTE_MIN_PMTU, "min_pmtu" },
225 { CTL_INT, NET_IPV4_ROUTE_MIN_ADVMSS, "min_adv_mss" },
226 { CTL_INT, NET_IPV4_ROUTE_SECRET_INTERVAL, "secret_interval" },
227 {}
228};
229
230static const struct bin_table bin_net_ipv4_conf_vars_table[] = {
231 { CTL_INT, NET_IPV4_CONF_FORWARDING, "forwarding" },
232 { CTL_INT, NET_IPV4_CONF_MC_FORWARDING, "mc_forwarding" },
233
234 { CTL_INT, NET_IPV4_CONF_ACCEPT_REDIRECTS, "accept_redirects" },
235 { CTL_INT, NET_IPV4_CONF_SECURE_REDIRECTS, "secure_redirects" },
236 { CTL_INT, NET_IPV4_CONF_SEND_REDIRECTS, "send_redirects" },
237 { CTL_INT, NET_IPV4_CONF_SHARED_MEDIA, "shared_media" },
238 { CTL_INT, NET_IPV4_CONF_RP_FILTER, "rp_filter" },
239 { CTL_INT, NET_IPV4_CONF_ACCEPT_SOURCE_ROUTE, "accept_source_route" },
240 { CTL_INT, NET_IPV4_CONF_PROXY_ARP, "proxy_arp" },
241 { CTL_INT, NET_IPV4_CONF_MEDIUM_ID, "medium_id" },
242 { CTL_INT, NET_IPV4_CONF_BOOTP_RELAY, "bootp_relay" },
243 { CTL_INT, NET_IPV4_CONF_LOG_MARTIANS, "log_martians" },
244 { CTL_INT, NET_IPV4_CONF_TAG, "tag" },
245 { CTL_INT, NET_IPV4_CONF_ARPFILTER, "arp_filter" },
246 { CTL_INT, NET_IPV4_CONF_ARP_ANNOUNCE, "arp_announce" },
247 { CTL_INT, NET_IPV4_CONF_ARP_IGNORE, "arp_ignore" },
248 { CTL_INT, NET_IPV4_CONF_ARP_ACCEPT, "arp_accept" },
249 { CTL_INT, NET_IPV4_CONF_ARP_NOTIFY, "arp_notify" },
250
251 { CTL_INT, NET_IPV4_CONF_NOXFRM, "disable_xfrm" },
252 { CTL_INT, NET_IPV4_CONF_NOPOLICY, "disable_policy" },
253 { CTL_INT, NET_IPV4_CONF_FORCE_IGMP_VERSION, "force_igmp_version" },
254 { CTL_INT, NET_IPV4_CONF_PROMOTE_SECONDARIES, "promote_secondaries" },
255 {}
256};
257
258static const struct bin_table bin_net_ipv4_conf_table[] = {
259 { CTL_DIR, NET_PROTO_CONF_ALL, "all", bin_net_ipv4_conf_vars_table },
260 { CTL_DIR, NET_PROTO_CONF_DEFAULT, "default", bin_net_ipv4_conf_vars_table },
261 { CTL_DIR, 0, NULL, bin_net_ipv4_conf_vars_table },
262 {}
263};
264
265static const struct bin_table bin_net_neigh_vars_table[] = {
266 { CTL_INT, NET_NEIGH_MCAST_SOLICIT, "mcast_solicit" },
267 { CTL_INT, NET_NEIGH_UCAST_SOLICIT, "ucast_solicit" },
268 { CTL_INT, NET_NEIGH_APP_SOLICIT, "app_solicit" },
269 /* NET_NEIGH_RETRANS_TIME "retrans_time" no longer used */
270 { CTL_INT, NET_NEIGH_REACHABLE_TIME, "base_reachable_time" },
271 { CTL_INT, NET_NEIGH_DELAY_PROBE_TIME, "delay_first_probe_time" },
272 { CTL_INT, NET_NEIGH_GC_STALE_TIME, "gc_stale_time" },
273 { CTL_INT, NET_NEIGH_UNRES_QLEN, "unres_qlen" },
274 { CTL_INT, NET_NEIGH_PROXY_QLEN, "proxy_qlen" },
275 /* NET_NEIGH_ANYCAST_DELAY "anycast_delay" no longer used */
276 /* NET_NEIGH_PROXY_DELAY "proxy_delay" no longer used */
277 /* NET_NEIGH_LOCKTIME "locktime" no longer used */
278 { CTL_INT, NET_NEIGH_GC_INTERVAL, "gc_interval" },
279 { CTL_INT, NET_NEIGH_GC_THRESH1, "gc_thresh1" },
280 { CTL_INT, NET_NEIGH_GC_THRESH2, "gc_thresh2" },
281 { CTL_INT, NET_NEIGH_GC_THRESH3, "gc_thresh3" },
282 { CTL_INT, NET_NEIGH_RETRANS_TIME_MS, "retrans_time_ms" },
283 { CTL_INT, NET_NEIGH_REACHABLE_TIME_MS, "base_reachable_time_ms" },
284 {}
285};
286
287static const struct bin_table bin_net_neigh_table[] = {
288 { CTL_DIR, NET_PROTO_CONF_DEFAULT, "default", bin_net_neigh_vars_table },
289 { CTL_DIR, 0, NULL, bin_net_neigh_vars_table },
290 {}
291};
292
293static const struct bin_table bin_net_ipv4_netfilter_table[] = {
294 { CTL_INT, NET_IPV4_NF_CONNTRACK_MAX, "ip_conntrack_max" },
295
296 /* NET_IPV4_NF_CONNTRACK_TCP_TIMEOUT_SYN_SENT "ip_conntrack_tcp_timeout_syn_sent" no longer used */
297 /* NET_IPV4_NF_CONNTRACK_TCP_TIMEOUT_SYN_RECV "ip_conntrack_tcp_timeout_syn_recv" no longer used */
298 /* NET_IPV4_NF_CONNTRACK_TCP_TIMEOUT_ESTABLISHED "ip_conntrack_tcp_timeout_established" no longer used */
299 /* NET_IPV4_NF_CONNTRACK_TCP_TIMEOUT_FIN_WAIT "ip_conntrack_tcp_timeout_fin_wait" no longer used */
300 /* NET_IPV4_NF_CONNTRACK_TCP_TIMEOUT_CLOSE_WAIT "ip_conntrack_tcp_timeout_close_wait" no longer used */
301 /* NET_IPV4_NF_CONNTRACK_TCP_TIMEOUT_LAST_ACK "ip_conntrack_tcp_timeout_last_ack" no longer used */
302 /* NET_IPV4_NF_CONNTRACK_TCP_TIMEOUT_TIME_WAIT "ip_conntrack_tcp_timeout_time_wait" no longer used */
303 /* NET_IPV4_NF_CONNTRACK_TCP_TIMEOUT_CLOSE "ip_conntrack_tcp_timeout_close" no longer used */
304
305 /* NET_IPV4_NF_CONNTRACK_UDP_TIMEOUT "ip_conntrack_udp_timeout" no longer used */
306 /* NET_IPV4_NF_CONNTRACK_UDP_TIMEOUT_STREAM "ip_conntrack_udp_timeout_stream" no longer used */
307 /* NET_IPV4_NF_CONNTRACK_ICMP_TIMEOUT "ip_conntrack_icmp_timeout" no longer used */
308 /* NET_IPV4_NF_CONNTRACK_GENERIC_TIMEOUT "ip_conntrack_generic_timeout" no longer used */
309
310 { CTL_INT, NET_IPV4_NF_CONNTRACK_BUCKETS, "ip_conntrack_buckets" },
311 { CTL_INT, NET_IPV4_NF_CONNTRACK_LOG_INVALID, "ip_conntrack_log_invalid" },
312 /* NET_IPV4_NF_CONNTRACK_TCP_TIMEOUT_MAX_RETRANS "ip_conntrack_tcp_timeout_max_retrans" no longer used */
313 { CTL_INT, NET_IPV4_NF_CONNTRACK_TCP_LOOSE, "ip_conntrack_tcp_loose" },
314 { CTL_INT, NET_IPV4_NF_CONNTRACK_TCP_BE_LIBERAL, "ip_conntrack_tcp_be_liberal" },
315 { CTL_INT, NET_IPV4_NF_CONNTRACK_TCP_MAX_RETRANS, "ip_conntrack_tcp_max_retrans" },
316
317 /* NET_IPV4_NF_CONNTRACK_SCTP_TIMEOUT_CLOSED "ip_conntrack_sctp_timeout_closed" no longer used */
318 /* NET_IPV4_NF_CONNTRACK_SCTP_TIMEOUT_COOKIE_WAIT "ip_conntrack_sctp_timeout_cookie_wait" no longer used */
319 /* NET_IPV4_NF_CONNTRACK_SCTP_TIMEOUT_COOKIE_ECHOED "ip_conntrack_sctp_timeout_cookie_echoed" no longer used */
320 /* NET_IPV4_NF_CONNTRACK_SCTP_TIMEOUT_ESTABLISHED "ip_conntrack_sctp_timeout_established" no longer used */
321 /* NET_IPV4_NF_CONNTRACK_SCTP_TIMEOUT_SHUTDOWN_SENT "ip_conntrack_sctp_timeout_shutdown_sent" no longer used */
322 /* NET_IPV4_NF_CONNTRACK_SCTP_TIMEOUT_SHUTDOWN_RECD "ip_conntrack_sctp_timeout_shutdown_recd" no longer used */
323 /* NET_IPV4_NF_CONNTRACK_SCTP_TIMEOUT_SHUTDOWN_ACK_SENT "ip_conntrack_sctp_timeout_shutdown_ack_sent" no longer used */
324
325 { CTL_INT, NET_IPV4_NF_CONNTRACK_COUNT, "ip_conntrack_count" },
326 { CTL_INT, NET_IPV4_NF_CONNTRACK_CHECKSUM, "ip_conntrack_checksum" },
327 {}
328};
329
330static const struct bin_table bin_net_ipv4_table[] = {
331 {CTL_INT, NET_IPV4_FORWARD, "ip_forward" },
332
333 { CTL_DIR, NET_IPV4_CONF, "conf", bin_net_ipv4_conf_table },
334 { CTL_DIR, NET_IPV4_NEIGH, "neigh", bin_net_neigh_table },
335 { CTL_DIR, NET_IPV4_ROUTE, "route", bin_net_ipv4_route_table },
336 /* NET_IPV4_FIB_HASH unused */
337 { CTL_DIR, NET_IPV4_NETFILTER, "netfilter", bin_net_ipv4_netfilter_table },
338
339 { CTL_INT, NET_IPV4_TCP_TIMESTAMPS, "tcp_timestamps" },
340 { CTL_INT, NET_IPV4_TCP_WINDOW_SCALING, "tcp_window_scaling" },
341 { CTL_INT, NET_IPV4_TCP_SACK, "tcp_sack" },
342 { CTL_INT, NET_IPV4_TCP_RETRANS_COLLAPSE, "tcp_retrans_collapse" },
343 { CTL_INT, NET_IPV4_DEFAULT_TTL, "ip_default_ttl" },
344 /* NET_IPV4_AUTOCONFIG unused */
345 { CTL_INT, NET_IPV4_NO_PMTU_DISC, "ip_no_pmtu_disc" },
346 { CTL_INT, NET_IPV4_NONLOCAL_BIND, "ip_nonlocal_bind" },
347 { CTL_INT, NET_IPV4_TCP_SYN_RETRIES, "tcp_syn_retries" },
348 { CTL_INT, NET_TCP_SYNACK_RETRIES, "tcp_synack_retries" },
349 { CTL_INT, NET_TCP_MAX_ORPHANS, "tcp_max_orphans" },
350 { CTL_INT, NET_TCP_MAX_TW_BUCKETS, "tcp_max_tw_buckets" },
351 { CTL_INT, NET_IPV4_DYNADDR, "ip_dynaddr" },
352 { CTL_INT, NET_IPV4_TCP_KEEPALIVE_TIME, "tcp_keepalive_time" },
353 { CTL_INT, NET_IPV4_TCP_KEEPALIVE_PROBES, "tcp_keepalive_probes" },
354 { CTL_INT, NET_IPV4_TCP_KEEPALIVE_INTVL, "tcp_keepalive_intvl" },
355 { CTL_INT, NET_IPV4_TCP_RETRIES1, "tcp_retries1" },
356 { CTL_INT, NET_IPV4_TCP_RETRIES2, "tcp_retries2" },
357 { CTL_INT, NET_IPV4_TCP_FIN_TIMEOUT, "tcp_fin_timeout" },
358 { CTL_INT, NET_TCP_SYNCOOKIES, "tcp_syncookies" },
359 { CTL_INT, NET_TCP_TW_RECYCLE, "tcp_tw_recycle" },
360 { CTL_INT, NET_TCP_ABORT_ON_OVERFLOW, "tcp_abort_on_overflow" },
361 { CTL_INT, NET_TCP_STDURG, "tcp_stdurg" },
362 { CTL_INT, NET_TCP_RFC1337, "tcp_rfc1337" },
363 { CTL_INT, NET_TCP_MAX_SYN_BACKLOG, "tcp_max_syn_backlog" },
364 { CTL_INT, NET_IPV4_LOCAL_PORT_RANGE, "ip_local_port_range" },
365 { CTL_INT, NET_IPV4_IGMP_MAX_MEMBERSHIPS, "igmp_max_memberships" },
366 { CTL_INT, NET_IPV4_IGMP_MAX_MSF, "igmp_max_msf" },
367 { CTL_INT, NET_IPV4_INET_PEER_THRESHOLD, "inet_peer_threshold" },
368 { CTL_INT, NET_IPV4_INET_PEER_MINTTL, "inet_peer_minttl" },
369 { CTL_INT, NET_IPV4_INET_PEER_MAXTTL, "inet_peer_maxttl" },
370 { CTL_INT, NET_IPV4_INET_PEER_GC_MINTIME, "inet_peer_gc_mintime" },
371 { CTL_INT, NET_IPV4_INET_PEER_GC_MAXTIME, "inet_peer_gc_maxtime" },
372 { CTL_INT, NET_TCP_ORPHAN_RETRIES, "tcp_orphan_retries" },
373 { CTL_INT, NET_TCP_FACK, "tcp_fack" },
374 { CTL_INT, NET_TCP_REORDERING, "tcp_reordering" },
375 { CTL_INT, NET_TCP_ECN, "tcp_ecn" },
376 { CTL_INT, NET_TCP_DSACK, "tcp_dsack" },
377 { CTL_INT, NET_TCP_MEM, "tcp_mem" },
378 { CTL_INT, NET_TCP_WMEM, "tcp_wmem" },
379 { CTL_INT, NET_TCP_RMEM, "tcp_rmem" },
380 { CTL_INT, NET_TCP_APP_WIN, "tcp_app_win" },
381 { CTL_INT, NET_TCP_ADV_WIN_SCALE, "tcp_adv_win_scale" },
382 { CTL_INT, NET_TCP_TW_REUSE, "tcp_tw_reuse" },
383 { CTL_INT, NET_TCP_FRTO, "tcp_frto" },
384 { CTL_INT, NET_TCP_FRTO_RESPONSE, "tcp_frto_response" },
385 { CTL_INT, NET_TCP_LOW_LATENCY, "tcp_low_latency" },
386 { CTL_INT, NET_TCP_NO_METRICS_SAVE, "tcp_no_metrics_save" },
387 { CTL_INT, NET_TCP_MODERATE_RCVBUF, "tcp_moderate_rcvbuf" },
388 { CTL_INT, NET_TCP_TSO_WIN_DIVISOR, "tcp_tso_win_divisor" },
389 { CTL_STR, NET_TCP_CONG_CONTROL, "tcp_congestion_control" },
390 { CTL_INT, NET_TCP_ABC, "tcp_abc" },
391 { CTL_INT, NET_TCP_MTU_PROBING, "tcp_mtu_probing" },
392 { CTL_INT, NET_TCP_BASE_MSS, "tcp_base_mss" },
393 { CTL_INT, NET_IPV4_TCP_WORKAROUND_SIGNED_WINDOWS, "tcp_workaround_signed_windows" },
394 { CTL_INT, NET_TCP_DMA_COPYBREAK, "tcp_dma_copybreak" },
395 { CTL_INT, NET_TCP_SLOW_START_AFTER_IDLE, "tcp_slow_start_after_idle" },
396 { CTL_INT, NET_CIPSOV4_CACHE_ENABLE, "cipso_cache_enable" },
397 { CTL_INT, NET_CIPSOV4_CACHE_BUCKET_SIZE, "cipso_cache_bucket_size" },
398 { CTL_INT, NET_CIPSOV4_RBM_OPTFMT, "cipso_rbm_optfmt" },
399 { CTL_INT, NET_CIPSOV4_RBM_STRICTVALID, "cipso_rbm_strictvalid" },
400 /* NET_TCP_AVAIL_CONG_CONTROL "tcp_available_congestion_control" no longer used */
401 { CTL_STR, NET_TCP_ALLOWED_CONG_CONTROL, "tcp_allowed_congestion_control" },
402 { CTL_INT, NET_TCP_MAX_SSTHRESH, "tcp_max_ssthresh" },
403
404 { CTL_INT, NET_IPV4_ICMP_ECHO_IGNORE_ALL, "icmp_echo_ignore_all" },
405 { CTL_INT, NET_IPV4_ICMP_ECHO_IGNORE_BROADCASTS, "icmp_echo_ignore_broadcasts" },
406 { CTL_INT, NET_IPV4_ICMP_IGNORE_BOGUS_ERROR_RESPONSES, "icmp_ignore_bogus_error_responses" },
407 { CTL_INT, NET_IPV4_ICMP_ERRORS_USE_INBOUND_IFADDR, "icmp_errors_use_inbound_ifaddr" },
408 { CTL_INT, NET_IPV4_ICMP_RATELIMIT, "icmp_ratelimit" },
409 { CTL_INT, NET_IPV4_ICMP_RATEMASK, "icmp_ratemask" },
410
411 { CTL_INT, NET_IPV4_IPFRAG_HIGH_THRESH, "ipfrag_high_thresh" },
412 { CTL_INT, NET_IPV4_IPFRAG_LOW_THRESH, "ipfrag_low_thresh" },
413 { CTL_INT, NET_IPV4_IPFRAG_TIME, "ipfrag_time" },
414
415 { CTL_INT, NET_IPV4_IPFRAG_SECRET_INTERVAL, "ipfrag_secret_interval" },
416 /* NET_IPV4_IPFRAG_MAX_DIST "ipfrag_max_dist" no longer used */
417
418 { CTL_INT, 2088 /* NET_IPQ_QMAX */, "ip_queue_maxlen" },
419
420 /* NET_TCP_DEFAULT_WIN_SCALE unused */
421 /* NET_TCP_BIC_BETA unused */
422 /* NET_IPV4_TCP_MAX_KA_PROBES unused */
423 /* NET_IPV4_IP_MASQ_DEBUG unused */
424 /* NET_TCP_SYN_TAILDROP unused */
425 /* NET_IPV4_ICMP_SOURCEQUENCH_RATE unused */
426 /* NET_IPV4_ICMP_DESTUNREACH_RATE unused */
427 /* NET_IPV4_ICMP_TIMEEXCEED_RATE unused */
428 /* NET_IPV4_ICMP_PARAMPROB_RATE unused */
429 /* NET_IPV4_ICMP_ECHOREPLY_RATE unused */
430 /* NET_IPV4_ALWAYS_DEFRAG unused */
431 {}
432};
433
434static const struct bin_table bin_net_ipx_table[] = {
435 { CTL_INT, NET_IPX_PPROP_BROADCASTING, "ipx_pprop_broadcasting" },
436 /* NET_IPX_FORWARDING unused */
437 {}
438};
439
440static const struct bin_table bin_net_atalk_table[] = {
441 { CTL_INT, NET_ATALK_AARP_EXPIRY_TIME, "aarp-expiry-time" },
442 { CTL_INT, NET_ATALK_AARP_TICK_TIME, "aarp-tick-time" },
443 { CTL_INT, NET_ATALK_AARP_RETRANSMIT_LIMIT, "aarp-retransmit-limit" },
444 { CTL_INT, NET_ATALK_AARP_RESOLVE_TIME, "aarp-resolve-time" },
445 {},
446};
447
448static const struct bin_table bin_net_netrom_table[] = {
449 { CTL_INT, NET_NETROM_DEFAULT_PATH_QUALITY, "default_path_quality" },
450 { CTL_INT, NET_NETROM_OBSOLESCENCE_COUNT_INITIALISER, "obsolescence_count_initialiser" },
451 { CTL_INT, NET_NETROM_NETWORK_TTL_INITIALISER, "network_ttl_initialiser" },
452 { CTL_INT, NET_NETROM_TRANSPORT_TIMEOUT, "transport_timeout" },
453 { CTL_INT, NET_NETROM_TRANSPORT_MAXIMUM_TRIES, "transport_maximum_tries" },
454 { CTL_INT, NET_NETROM_TRANSPORT_ACKNOWLEDGE_DELAY, "transport_acknowledge_delay" },
455 { CTL_INT, NET_NETROM_TRANSPORT_BUSY_DELAY, "transport_busy_delay" },
456 { CTL_INT, NET_NETROM_TRANSPORT_REQUESTED_WINDOW_SIZE, "transport_requested_window_size" },
457 { CTL_INT, NET_NETROM_TRANSPORT_NO_ACTIVITY_TIMEOUT, "transport_no_activity_timeout" },
458 { CTL_INT, NET_NETROM_ROUTING_CONTROL, "routing_control" },
459 { CTL_INT, NET_NETROM_LINK_FAILS_COUNT, "link_fails_count" },
460 { CTL_INT, NET_NETROM_RESET, "reset" },
461 {}
462};
463
464static const struct bin_table bin_net_ax25_param_table[] = {
465 { CTL_INT, NET_AX25_IP_DEFAULT_MODE, "ip_default_mode" },
466 { CTL_INT, NET_AX25_DEFAULT_MODE, "ax25_default_mode" },
467 { CTL_INT, NET_AX25_BACKOFF_TYPE, "backoff_type" },
468 { CTL_INT, NET_AX25_CONNECT_MODE, "connect_mode" },
469 { CTL_INT, NET_AX25_STANDARD_WINDOW, "standard_window_size" },
470 { CTL_INT, NET_AX25_EXTENDED_WINDOW, "extended_window_size" },
471 { CTL_INT, NET_AX25_T1_TIMEOUT, "t1_timeout" },
472 { CTL_INT, NET_AX25_T2_TIMEOUT, "t2_timeout" },
473 { CTL_INT, NET_AX25_T3_TIMEOUT, "t3_timeout" },
474 { CTL_INT, NET_AX25_IDLE_TIMEOUT, "idle_timeout" },
475 { CTL_INT, NET_AX25_N2, "maximum_retry_count" },
476 { CTL_INT, NET_AX25_PACLEN, "maximum_packet_length" },
477 { CTL_INT, NET_AX25_PROTOCOL, "protocol" },
478 { CTL_INT, NET_AX25_DAMA_SLAVE_TIMEOUT, "dama_slave_timeout" },
479 {}
480};
481
482static const struct bin_table bin_net_ax25_table[] = {
483 { CTL_DIR, 0, NULL, bin_net_ax25_param_table },
484 {}
485};
486
487static const struct bin_table bin_net_rose_table[] = {
488 { CTL_INT, NET_ROSE_RESTART_REQUEST_TIMEOUT, "restart_request_timeout" },
489 { CTL_INT, NET_ROSE_CALL_REQUEST_TIMEOUT, "call_request_timeout" },
490 { CTL_INT, NET_ROSE_RESET_REQUEST_TIMEOUT, "reset_request_timeout" },
491 { CTL_INT, NET_ROSE_CLEAR_REQUEST_TIMEOUT, "clear_request_timeout" },
492 { CTL_INT, NET_ROSE_ACK_HOLD_BACK_TIMEOUT, "acknowledge_hold_back_timeout" },
493 { CTL_INT, NET_ROSE_ROUTING_CONTROL, "routing_control" },
494 { CTL_INT, NET_ROSE_LINK_FAIL_TIMEOUT, "link_fail_timeout" },
495 { CTL_INT, NET_ROSE_MAX_VCS, "maximum_virtual_circuits" },
496 { CTL_INT, NET_ROSE_WINDOW_SIZE, "window_size" },
497 { CTL_INT, NET_ROSE_NO_ACTIVITY_TIMEOUT, "no_activity_timeout" },
498 {}
499};
500
501static const struct bin_table bin_net_ipv6_conf_var_table[] = {
502 { CTL_INT, NET_IPV6_FORWARDING, "forwarding" },
503 { CTL_INT, NET_IPV6_HOP_LIMIT, "hop_limit" },
504 { CTL_INT, NET_IPV6_MTU, "mtu" },
505 { CTL_INT, NET_IPV6_ACCEPT_RA, "accept_ra" },
506 { CTL_INT, NET_IPV6_ACCEPT_REDIRECTS, "accept_redirects" },
507 { CTL_INT, NET_IPV6_AUTOCONF, "autoconf" },
508 { CTL_INT, NET_IPV6_DAD_TRANSMITS, "dad_transmits" },
509 { CTL_INT, NET_IPV6_RTR_SOLICITS, "router_solicitations" },
510 { CTL_INT, NET_IPV6_RTR_SOLICIT_INTERVAL, "router_solicitation_interval" },
511 { CTL_INT, NET_IPV6_RTR_SOLICIT_DELAY, "router_solicitation_delay" },
512 { CTL_INT, NET_IPV6_USE_TEMPADDR, "use_tempaddr" },
513 { CTL_INT, NET_IPV6_TEMP_VALID_LFT, "temp_valid_lft" },
514 { CTL_INT, NET_IPV6_TEMP_PREFERED_LFT, "temp_prefered_lft" },
515 { CTL_INT, NET_IPV6_REGEN_MAX_RETRY, "regen_max_retry" },
516 { CTL_INT, NET_IPV6_MAX_DESYNC_FACTOR, "max_desync_factor" },
517 { CTL_INT, NET_IPV6_MAX_ADDRESSES, "max_addresses" },
518 { CTL_INT, NET_IPV6_FORCE_MLD_VERSION, "force_mld_version" },
519 { CTL_INT, NET_IPV6_ACCEPT_RA_DEFRTR, "accept_ra_defrtr" },
520 { CTL_INT, NET_IPV6_ACCEPT_RA_PINFO, "accept_ra_pinfo" },
521 { CTL_INT, NET_IPV6_ACCEPT_RA_RTR_PREF, "accept_ra_rtr_pref" },
522 { CTL_INT, NET_IPV6_RTR_PROBE_INTERVAL, "router_probe_interval" },
523 { CTL_INT, NET_IPV6_ACCEPT_RA_RT_INFO_MAX_PLEN, "accept_ra_rt_info_max_plen" },
524 { CTL_INT, NET_IPV6_PROXY_NDP, "proxy_ndp" },
525 { CTL_INT, NET_IPV6_ACCEPT_SOURCE_ROUTE, "accept_source_route" },
526 {}
527};
528
529static const struct bin_table bin_net_ipv6_conf_table[] = {
530 { CTL_DIR, NET_PROTO_CONF_ALL, "all", bin_net_ipv6_conf_var_table },
531 { CTL_DIR, NET_PROTO_CONF_DEFAULT, "default", bin_net_ipv6_conf_var_table },
532 { CTL_DIR, 0, NULL, bin_net_ipv6_conf_var_table },
533 {}
534};
535
536static const struct bin_table bin_net_ipv6_route_table[] = {
537 /* NET_IPV6_ROUTE_FLUSH "flush" no longer used */
538 { CTL_INT, NET_IPV6_ROUTE_GC_THRESH, "gc_thresh" },
539 { CTL_INT, NET_IPV6_ROUTE_MAX_SIZE, "max_size" },
540 { CTL_INT, NET_IPV6_ROUTE_GC_MIN_INTERVAL, "gc_min_interval" },
541 { CTL_INT, NET_IPV6_ROUTE_GC_TIMEOUT, "gc_timeout" },
542 { CTL_INT, NET_IPV6_ROUTE_GC_INTERVAL, "gc_interval" },
543 { CTL_INT, NET_IPV6_ROUTE_GC_ELASTICITY, "gc_elasticity" },
544 { CTL_INT, NET_IPV6_ROUTE_MTU_EXPIRES, "mtu_expires" },
545 { CTL_INT, NET_IPV6_ROUTE_MIN_ADVMSS, "min_adv_mss" },
546 { CTL_INT, NET_IPV6_ROUTE_GC_MIN_INTERVAL_MS, "gc_min_interval_ms" },
547 {}
548};
549
550static const struct bin_table bin_net_ipv6_icmp_table[] = {
551 { CTL_INT, NET_IPV6_ICMP_RATELIMIT, "ratelimit" },
552 {}
553};
554
555static const struct bin_table bin_net_ipv6_table[] = {
556 { CTL_DIR, NET_IPV6_CONF, "conf", bin_net_ipv6_conf_table },
557 { CTL_DIR, NET_IPV6_NEIGH, "neigh", bin_net_neigh_table },
558 { CTL_DIR, NET_IPV6_ROUTE, "route", bin_net_ipv6_route_table },
559 { CTL_DIR, NET_IPV6_ICMP, "icmp", bin_net_ipv6_icmp_table },
560 { CTL_INT, NET_IPV6_BINDV6ONLY, "bindv6only" },
561 { CTL_INT, NET_IPV6_IP6FRAG_HIGH_THRESH, "ip6frag_high_thresh" },
562 { CTL_INT, NET_IPV6_IP6FRAG_LOW_THRESH, "ip6frag_low_thresh" },
563 { CTL_INT, NET_IPV6_IP6FRAG_TIME, "ip6frag_time" },
564 { CTL_INT, NET_IPV6_IP6FRAG_SECRET_INTERVAL, "ip6frag_secret_interval" },
565 { CTL_INT, NET_IPV6_MLD_MAX_MSF, "mld_max_msf" },
566 { CTL_INT, 2088 /* IPQ_QMAX */, "ip6_queue_maxlen" },
567 {}
568};
569
570static const struct bin_table bin_net_x25_table[] = {
571 { CTL_INT, NET_X25_RESTART_REQUEST_TIMEOUT, "restart_request_timeout" },
572 { CTL_INT, NET_X25_CALL_REQUEST_TIMEOUT, "call_request_timeout" },
573 { CTL_INT, NET_X25_RESET_REQUEST_TIMEOUT, "reset_request_timeout" },
574 { CTL_INT, NET_X25_CLEAR_REQUEST_TIMEOUT, "clear_request_timeout" },
575 { CTL_INT, NET_X25_ACK_HOLD_BACK_TIMEOUT, "acknowledgement_hold_back_timeout" },
576 { CTL_INT, NET_X25_FORWARD, "x25_forward" },
577 {}
578};
579
580static const struct bin_table bin_net_tr_table[] = {
581 { CTL_INT, NET_TR_RIF_TIMEOUT, "rif_timeout" },
582 {}
583};
584
585
586static const struct bin_table bin_net_decnet_conf_vars[] = {
587 { CTL_INT, NET_DECNET_CONF_DEV_FORWARDING, "forwarding" },
588 { CTL_INT, NET_DECNET_CONF_DEV_PRIORITY, "priority" },
589 { CTL_INT, NET_DECNET_CONF_DEV_T2, "t2" },
590 { CTL_INT, NET_DECNET_CONF_DEV_T3, "t3" },
591 {}
592};
593
594static const struct bin_table bin_net_decnet_conf[] = {
595 { CTL_DIR, NET_DECNET_CONF_ETHER, "ethernet", bin_net_decnet_conf_vars },
596 { CTL_DIR, NET_DECNET_CONF_GRE, "ipgre", bin_net_decnet_conf_vars },
597 { CTL_DIR, NET_DECNET_CONF_X25, "x25", bin_net_decnet_conf_vars },
598 { CTL_DIR, NET_DECNET_CONF_PPP, "ppp", bin_net_decnet_conf_vars },
599 { CTL_DIR, NET_DECNET_CONF_DDCMP, "ddcmp", bin_net_decnet_conf_vars },
600 { CTL_DIR, NET_DECNET_CONF_LOOPBACK, "loopback", bin_net_decnet_conf_vars },
601 { CTL_DIR, 0, NULL, bin_net_decnet_conf_vars },
602 {}
603};
604
605static const struct bin_table bin_net_decnet_table[] = {
606 { CTL_DIR, NET_DECNET_CONF, "conf", bin_net_decnet_conf },
607 { CTL_DNADR, NET_DECNET_NODE_ADDRESS, "node_address" },
608 { CTL_STR, NET_DECNET_NODE_NAME, "node_name" },
609 { CTL_STR, NET_DECNET_DEFAULT_DEVICE, "default_device" },
610 { CTL_INT, NET_DECNET_TIME_WAIT, "time_wait" },
611 { CTL_INT, NET_DECNET_DN_COUNT, "dn_count" },
612 { CTL_INT, NET_DECNET_DI_COUNT, "di_count" },
613 { CTL_INT, NET_DECNET_DR_COUNT, "dr_count" },
614 { CTL_INT, NET_DECNET_DST_GC_INTERVAL, "dst_gc_interval" },
615 { CTL_INT, NET_DECNET_NO_FC_MAX_CWND, "no_fc_max_cwnd" },
616 { CTL_INT, NET_DECNET_MEM, "decnet_mem" },
617 { CTL_INT, NET_DECNET_RMEM, "decnet_rmem" },
618 { CTL_INT, NET_DECNET_WMEM, "decnet_wmem" },
619 { CTL_INT, NET_DECNET_DEBUG_LEVEL, "debug" },
620 {}
621};
622
623static const struct bin_table bin_net_sctp_table[] = {
624 { CTL_INT, NET_SCTP_RTO_INITIAL, "rto_initial" },
625 { CTL_INT, NET_SCTP_RTO_MIN, "rto_min" },
626 { CTL_INT, NET_SCTP_RTO_MAX, "rto_max" },
627 { CTL_INT, NET_SCTP_RTO_ALPHA, "rto_alpha_exp_divisor" },
628 { CTL_INT, NET_SCTP_RTO_BETA, "rto_beta_exp_divisor" },
629 { CTL_INT, NET_SCTP_VALID_COOKIE_LIFE, "valid_cookie_life" },
630 { CTL_INT, NET_SCTP_ASSOCIATION_MAX_RETRANS, "association_max_retrans" },
631 { CTL_INT, NET_SCTP_PATH_MAX_RETRANS, "path_max_retrans" },
632 { CTL_INT, NET_SCTP_MAX_INIT_RETRANSMITS, "max_init_retransmits" },
633 { CTL_INT, NET_SCTP_HB_INTERVAL, "hb_interval" },
634 { CTL_INT, NET_SCTP_PRESERVE_ENABLE, "cookie_preserve_enable" },
635 { CTL_INT, NET_SCTP_MAX_BURST, "max_burst" },
636 { CTL_INT, NET_SCTP_ADDIP_ENABLE, "addip_enable" },
637 { CTL_INT, NET_SCTP_PRSCTP_ENABLE, "prsctp_enable" },
638 { CTL_INT, NET_SCTP_SNDBUF_POLICY, "sndbuf_policy" },
639 { CTL_INT, NET_SCTP_SACK_TIMEOUT, "sack_timeout" },
640 { CTL_INT, NET_SCTP_RCVBUF_POLICY, "rcvbuf_policy" },
641 {}
642};
643
644static const struct bin_table bin_net_llc_llc2_timeout_table[] = {
645 { CTL_INT, NET_LLC2_ACK_TIMEOUT, "ack" },
646 { CTL_INT, NET_LLC2_P_TIMEOUT, "p" },
647 { CTL_INT, NET_LLC2_REJ_TIMEOUT, "rej" },
648 { CTL_INT, NET_LLC2_BUSY_TIMEOUT, "busy" },
649 {}
650};
651
652static const struct bin_table bin_net_llc_station_table[] = {
653 { CTL_INT, NET_LLC_STATION_ACK_TIMEOUT, "ack_timeout" },
654 {}
655};
656
657static const struct bin_table bin_net_llc_llc2_table[] = {
658 { CTL_DIR, NET_LLC2, "timeout", bin_net_llc_llc2_timeout_table },
659 {}
660};
661
662static const struct bin_table bin_net_llc_table[] = {
663 { CTL_DIR, NET_LLC2, "llc2", bin_net_llc_llc2_table },
664 { CTL_DIR, NET_LLC_STATION, "station", bin_net_llc_station_table },
665 {}
666};
667
668static const struct bin_table bin_net_netfilter_table[] = {
669 { CTL_INT, NET_NF_CONNTRACK_MAX, "nf_conntrack_max" },
670 /* NET_NF_CONNTRACK_TCP_TIMEOUT_SYN_SENT "nf_conntrack_tcp_timeout_syn_sent" no longer used */
671 /* NET_NF_CONNTRACK_TCP_TIMEOUT_SYN_RECV "nf_conntrack_tcp_timeout_syn_recv" no longer used */
672 /* NET_NF_CONNTRACK_TCP_TIMEOUT_ESTABLISHED "nf_conntrack_tcp_timeout_established" no longer used */
673 /* NET_NF_CONNTRACK_TCP_TIMEOUT_FIN_WAIT "nf_conntrack_tcp_timeout_fin_wait" no longer used */
674 /* NET_NF_CONNTRACK_TCP_TIMEOUT_CLOSE_WAIT "nf_conntrack_tcp_timeout_close_wait" no longer used */
675 /* NET_NF_CONNTRACK_TCP_TIMEOUT_LAST_ACK "nf_conntrack_tcp_timeout_last_ack" no longer used */
676 /* NET_NF_CONNTRACK_TCP_TIMEOUT_TIME_WAIT "nf_conntrack_tcp_timeout_time_wait" no longer used */
677 /* NET_NF_CONNTRACK_TCP_TIMEOUT_CLOSE "nf_conntrack_tcp_timeout_close" no longer used */
678 /* NET_NF_CONNTRACK_UDP_TIMEOUT "nf_conntrack_udp_timeout" no longer used */
679 /* NET_NF_CONNTRACK_UDP_TIMEOUT_STREAM "nf_conntrack_udp_timeout_stream" no longer used */
680 /* NET_NF_CONNTRACK_ICMP_TIMEOUT "nf_conntrack_icmp_timeout" no longer used */
681 /* NET_NF_CONNTRACK_GENERIC_TIMEOUT "nf_conntrack_generic_timeout" no longer used */
682 { CTL_INT, NET_NF_CONNTRACK_BUCKETS, "nf_conntrack_buckets" },
683 { CTL_INT, NET_NF_CONNTRACK_LOG_INVALID, "nf_conntrack_log_invalid" },
684 /* NET_NF_CONNTRACK_TCP_TIMEOUT_MAX_RETRANS "nf_conntrack_tcp_timeout_max_retrans" no longer used */
685 { CTL_INT, NET_NF_CONNTRACK_TCP_LOOSE, "nf_conntrack_tcp_loose" },
686 { CTL_INT, NET_NF_CONNTRACK_TCP_BE_LIBERAL, "nf_conntrack_tcp_be_liberal" },
687 { CTL_INT, NET_NF_CONNTRACK_TCP_MAX_RETRANS, "nf_conntrack_tcp_max_retrans" },
688 /* NET_NF_CONNTRACK_SCTP_TIMEOUT_CLOSED "nf_conntrack_sctp_timeout_closed" no longer used */
689 /* NET_NF_CONNTRACK_SCTP_TIMEOUT_COOKIE_WAIT "nf_conntrack_sctp_timeout_cookie_wait" no longer used */
690 /* NET_NF_CONNTRACK_SCTP_TIMEOUT_COOKIE_ECHOED "nf_conntrack_sctp_timeout_cookie_echoed" no longer used */
691 /* NET_NF_CONNTRACK_SCTP_TIMEOUT_ESTABLISHED "nf_conntrack_sctp_timeout_established" no longer used */
692 /* NET_NF_CONNTRACK_SCTP_TIMEOUT_SHUTDOWN_SENT "nf_conntrack_sctp_timeout_shutdown_sent" no longer used */
693 /* NET_NF_CONNTRACK_SCTP_TIMEOUT_SHUTDOWN_RECD "nf_conntrack_sctp_timeout_shutdown_recd" no longer used */
694 /* NET_NF_CONNTRACK_SCTP_TIMEOUT_SHUTDOWN_ACK_SENT "nf_conntrack_sctp_timeout_shutdown_ack_sent" no longer used */
695 { CTL_INT, NET_NF_CONNTRACK_COUNT, "nf_conntrack_count" },
696 /* NET_NF_CONNTRACK_ICMPV6_TIMEOUT "nf_conntrack_icmpv6_timeout" no longer used */
697 /* NET_NF_CONNTRACK_FRAG6_TIMEOUT "nf_conntrack_frag6_timeout" no longer used */
698 { CTL_INT, NET_NF_CONNTRACK_FRAG6_LOW_THRESH, "nf_conntrack_frag6_low_thresh" },
699 { CTL_INT, NET_NF_CONNTRACK_FRAG6_HIGH_THRESH, "nf_conntrack_frag6_high_thresh" },
700 { CTL_INT, NET_NF_CONNTRACK_CHECKSUM, "nf_conntrack_checksum" },
701
702 {}
703};
704
705static const struct bin_table bin_net_irda_table[] = {
706 { CTL_INT, NET_IRDA_DISCOVERY, "discovery" },
707 { CTL_STR, NET_IRDA_DEVNAME, "devname" },
708 { CTL_INT, NET_IRDA_DEBUG, "debug" },
709 { CTL_INT, NET_IRDA_FAST_POLL, "fast_poll_increase" },
710 { CTL_INT, NET_IRDA_DISCOVERY_SLOTS, "discovery_slots" },
711 { CTL_INT, NET_IRDA_DISCOVERY_TIMEOUT, "discovery_timeout" },
712 { CTL_INT, NET_IRDA_SLOT_TIMEOUT, "slot_timeout" },
713 { CTL_INT, NET_IRDA_MAX_BAUD_RATE, "max_baud_rate" },
714 { CTL_INT, NET_IRDA_MIN_TX_TURN_TIME, "min_tx_turn_time" },
715 { CTL_INT, NET_IRDA_MAX_TX_DATA_SIZE, "max_tx_data_size" },
716 { CTL_INT, NET_IRDA_MAX_TX_WINDOW, "max_tx_window" },
717 { CTL_INT, NET_IRDA_MAX_NOREPLY_TIME, "max_noreply_time" },
718 { CTL_INT, NET_IRDA_WARN_NOREPLY_TIME, "warn_noreply_time" },
719 { CTL_INT, NET_IRDA_LAP_KEEPALIVE_TIME, "lap_keepalive_time" },
720 {}
721};
722
723static const struct bin_table bin_net_table[] = {
724 { CTL_DIR, NET_CORE, "core", bin_net_core_table },
725 /* NET_ETHER not used */
726 /* NET_802 not used */
727 { CTL_DIR, NET_UNIX, "unix", bin_net_unix_table },
728 { CTL_DIR, NET_IPV4, "ipv4", bin_net_ipv4_table },
729 { CTL_DIR, NET_IPX, "ipx", bin_net_ipx_table },
730 { CTL_DIR, NET_ATALK, "appletalk", bin_net_atalk_table },
731 { CTL_DIR, NET_NETROM, "netrom", bin_net_netrom_table },
732 { CTL_DIR, NET_AX25, "ax25", bin_net_ax25_table },
733 /* NET_BRIDGE "bridge" no longer used */
734 { CTL_DIR, NET_ROSE, "rose", bin_net_rose_table },
735 { CTL_DIR, NET_IPV6, "ipv6", bin_net_ipv6_table },
736 { CTL_DIR, NET_X25, "x25", bin_net_x25_table },
737 { CTL_DIR, NET_TR, "token-ring", bin_net_tr_table },
738 { CTL_DIR, NET_DECNET, "decnet", bin_net_decnet_table },
739 /* NET_ECONET not used */
740 { CTL_DIR, NET_SCTP, "sctp", bin_net_sctp_table },
741 { CTL_DIR, NET_LLC, "llc", bin_net_llc_table },
742 { CTL_DIR, NET_NETFILTER, "netfilter", bin_net_netfilter_table },
743 /* NET_DCCP "dccp" no longer used */
744 { CTL_DIR, NET_IRDA, "irda", bin_net_irda_table },
745 { CTL_INT, 2089, "nf_conntrack_max" },
746 {}
747};
748
749static const struct bin_table bin_fs_quota_table[] = {
750 { CTL_INT, FS_DQ_LOOKUPS, "lookups" },
751 { CTL_INT, FS_DQ_DROPS, "drops" },
752 { CTL_INT, FS_DQ_READS, "reads" },
753 { CTL_INT, FS_DQ_WRITES, "writes" },
754 { CTL_INT, FS_DQ_CACHE_HITS, "cache_hits" },
755 { CTL_INT, FS_DQ_ALLOCATED, "allocated_dquots" },
756 { CTL_INT, FS_DQ_FREE, "free_dquots" },
757 { CTL_INT, FS_DQ_SYNCS, "syncs" },
758 { CTL_INT, FS_DQ_WARNINGS, "warnings" },
759 {}
760};
761
762static const struct bin_table bin_fs_xfs_table[] = {
763 { CTL_INT, XFS_SGID_INHERIT, "irix_sgid_inherit" },
764 { CTL_INT, XFS_SYMLINK_MODE, "irix_symlink_mode" },
765 { CTL_INT, XFS_PANIC_MASK, "panic_mask" },
766
767 { CTL_INT, XFS_ERRLEVEL, "error_level" },
768 { CTL_INT, XFS_SYNCD_TIMER, "xfssyncd_centisecs" },
769 { CTL_INT, XFS_INHERIT_SYNC, "inherit_sync" },
770 { CTL_INT, XFS_INHERIT_NODUMP, "inherit_nodump" },
771 { CTL_INT, XFS_INHERIT_NOATIME, "inherit_noatime" },
772 { CTL_INT, XFS_BUF_TIMER, "xfsbufd_centisecs" },
773 { CTL_INT, XFS_BUF_AGE, "age_buffer_centisecs" },
774 { CTL_INT, XFS_INHERIT_NOSYM, "inherit_nosymlinks" },
775 { CTL_INT, XFS_ROTORSTEP, "rotorstep" },
776 { CTL_INT, XFS_INHERIT_NODFRG, "inherit_nodefrag" },
777 { CTL_INT, XFS_FILESTREAM_TIMER, "filestream_centisecs" },
778 { CTL_INT, XFS_STATS_CLEAR, "stats_clear" },
779 {}
780};
781
782static const struct bin_table bin_fs_ocfs2_nm_table[] = {
783 { CTL_STR, 1, "hb_ctl_path" },
784 {}
785};
786
787static const struct bin_table bin_fs_ocfs2_table[] = {
788 { CTL_DIR, 1, "nm", bin_fs_ocfs2_nm_table },
789 {}
790};
791
792static const struct bin_table bin_inotify_table[] = {
793 { CTL_INT, INOTIFY_MAX_USER_INSTANCES, "max_user_instances" },
794 { CTL_INT, INOTIFY_MAX_USER_WATCHES, "max_user_watches" },
795 { CTL_INT, INOTIFY_MAX_QUEUED_EVENTS, "max_queued_events" },
796 {}
797};
798
799static const struct bin_table bin_fs_table[] = {
800 { CTL_INT, FS_NRINODE, "inode-nr" },
801 { CTL_INT, FS_STATINODE, "inode-state" },
802 /* FS_MAXINODE unused */
803 /* FS_NRDQUOT unused */
804 /* FS_MAXDQUOT unused */
805 /* FS_NRFILE "file-nr" no longer used */
806 { CTL_INT, FS_MAXFILE, "file-max" },
807 { CTL_INT, FS_DENTRY, "dentry-state" },
808 /* FS_NRSUPER unused */
809 /* FS_MAXUPSER unused */
810 { CTL_INT, FS_OVERFLOWUID, "overflowuid" },
811 { CTL_INT, FS_OVERFLOWGID, "overflowgid" },
812 { CTL_INT, FS_LEASES, "leases-enable" },
813 { CTL_INT, FS_DIR_NOTIFY, "dir-notify-enable" },
814 { CTL_INT, FS_LEASE_TIME, "lease-break-time" },
815 { CTL_DIR, FS_DQSTATS, "quota", bin_fs_quota_table },
816 { CTL_DIR, FS_XFS, "xfs", bin_fs_xfs_table },
817 { CTL_ULONG, FS_AIO_NR, "aio-nr" },
818 { CTL_ULONG, FS_AIO_MAX_NR, "aio-max-nr" },
819 { CTL_DIR, FS_INOTIFY, "inotify", bin_inotify_table },
820 { CTL_DIR, FS_OCFS2, "ocfs2", bin_fs_ocfs2_table },
821 { CTL_INT, KERN_SETUID_DUMPABLE, "suid_dumpable" },
822 {}
823};
824
825static const struct bin_table bin_ipmi_table[] = {
826 { CTL_INT, DEV_IPMI_POWEROFF_POWERCYCLE, "poweroff_powercycle" },
827 {}
828};
829
830static const struct bin_table bin_mac_hid_files[] = {
831 /* DEV_MAC_HID_KEYBOARD_SENDS_LINUX_KEYCODES unused */
832 /* DEV_MAC_HID_KEYBOARD_LOCK_KEYCODES unused */
833 { CTL_INT, DEV_MAC_HID_MOUSE_BUTTON_EMULATION, "mouse_button_emulation" },
834 { CTL_INT, DEV_MAC_HID_MOUSE_BUTTON2_KEYCODE, "mouse_button2_keycode" },
835 { CTL_INT, DEV_MAC_HID_MOUSE_BUTTON3_KEYCODE, "mouse_button3_keycode" },
836 /* DEV_MAC_HID_ADB_MOUSE_SENDS_KEYCODES unused */
837 {}
838};
839
840static const struct bin_table bin_raid_table[] = {
841 { CTL_INT, DEV_RAID_SPEED_LIMIT_MIN, "speed_limit_min" },
842 { CTL_INT, DEV_RAID_SPEED_LIMIT_MAX, "speed_limit_max" },
843 {}
844};
845
846static const struct bin_table bin_scsi_table[] = {
847 { CTL_INT, DEV_SCSI_LOGGING_LEVEL, "logging_level" },
848 {}
849};
850
851static const struct bin_table bin_dev_table[] = {
852 /* DEV_CDROM "cdrom" no longer used */
853 /* DEV_HWMON unused */
854 /* DEV_PARPORT "parport" no longer used */
855 { CTL_DIR, DEV_RAID, "raid", bin_raid_table },
856 { CTL_DIR, DEV_MAC_HID, "mac_hid", bin_mac_hid_files },
857 { CTL_DIR, DEV_SCSI, "scsi", bin_scsi_table },
858 { CTL_DIR, DEV_IPMI, "ipmi", bin_ipmi_table },
859 {}
860};
861
862static const struct bin_table bin_bus_isa_table[] = {
863 { CTL_INT, BUS_ISA_MEM_BASE, "membase" },
864 { CTL_INT, BUS_ISA_PORT_BASE, "portbase" },
865 { CTL_INT, BUS_ISA_PORT_SHIFT, "portshift" },
866 {}
867};
868
869static const struct bin_table bin_bus_table[] = {
870 { CTL_DIR, CTL_BUS_ISA, "isa", bin_bus_isa_table },
871 {}
872};
873
874
875static const struct bin_table bin_s390dbf_table[] = {
876 { CTL_INT, 5678 /* CTL_S390DBF_STOPPABLE */, "debug_stoppable" },
877 { CTL_INT, 5679 /* CTL_S390DBF_ACTIVE */, "debug_active" },
878 {}
879};
880
881static const struct bin_table bin_sunrpc_table[] = {
882 /* CTL_RPCDEBUG "rpc_debug" no longer used */
883 /* CTL_NFSDEBUG "nfs_debug" no longer used */
884 /* CTL_NFSDDEBUG "nfsd_debug" no longer used */
885 /* CTL_NLMDEBUG "nlm_debug" no longer used */
886
887 { CTL_INT, CTL_SLOTTABLE_UDP, "udp_slot_table_entries" },
888 { CTL_INT, CTL_SLOTTABLE_TCP, "tcp_slot_table_entries" },
889 { CTL_INT, CTL_MIN_RESVPORT, "min_resvport" },
890 { CTL_INT, CTL_MAX_RESVPORT, "max_resvport" },
891 {}
892};
893
894static const struct bin_table bin_pm_table[] = {
895 /* frv specific */
896 /* 1 == CTL_PM_SUSPEND "suspend" no longer used" */
897 { CTL_INT, 2 /* CTL_PM_CMODE */, "cmode" },
898 { CTL_INT, 3 /* CTL_PM_P0 */, "p0" },
899 { CTL_INT, 4 /* CTL_PM_CM */, "cm" },
900 {}
901};
902
903static const struct bin_table bin_root_table[] = {
904 { CTL_DIR, CTL_KERN, "kernel", bin_kern_table },
905 { CTL_DIR, CTL_VM, "vm", bin_vm_table },
906 { CTL_DIR, CTL_NET, "net", bin_net_table },
907 /* CTL_PROC not used */
908 { CTL_DIR, CTL_FS, "fs", bin_fs_table },
909 /* CTL_DEBUG "debug" no longer used */
910 { CTL_DIR, CTL_DEV, "dev", bin_dev_table },
911 { CTL_DIR, CTL_BUS, "bus", bin_bus_table },
912 { CTL_DIR, CTL_ABI, "abi" },
913 /* CTL_CPU not used */
914 /* CTL_ARLAN "arlan" no longer used */
915 { CTL_DIR, CTL_S390DBF, "s390dbf", bin_s390dbf_table },
916 { CTL_DIR, CTL_SUNRPC, "sunrpc", bin_sunrpc_table },
917 { CTL_DIR, CTL_PM, "pm", bin_pm_table },
918 {}
919};
920
921static ssize_t bin_dir(struct file *file,
922 void __user *oldval, size_t oldlen, void __user *newval, size_t newlen)
923{
924 return -ENOTDIR;
925}
926
927
928static ssize_t bin_string(struct file *file,
929 void __user *oldval, size_t oldlen, void __user *newval, size_t newlen)
930{
931 ssize_t result, copied = 0;
932
933 if (oldval && oldlen) {
934 char __user *lastp;
935 loff_t pos = 0;
936 int ch;
937
938 result = vfs_read(file, oldval, oldlen, &pos);
939 if (result < 0)
940 goto out;
941
942 copied = result;
943 lastp = oldval + copied - 1;
944
945 result = -EFAULT;
946 if (get_user(ch, lastp))
947 goto out;
948
949 /* Trim off the trailing newline */
950 if (ch == '\n') {
951 result = -EFAULT;
952 if (put_user('\0', lastp))
953 goto out;
954 copied -= 1;
955 }
956 }
957
958 if (newval && newlen) {
959 loff_t pos = 0;
960
961 result = vfs_write(file, newval, newlen, &pos);
962 if (result < 0)
963 goto out;
964 }
965
966 result = copied;
967out:
968 return result;
969}
970
971static ssize_t bin_intvec(struct file *file,
972 void __user *oldval, size_t oldlen, void __user *newval, size_t newlen)
973{
974 mm_segment_t old_fs = get_fs();
975 ssize_t copied = 0;
976 char *buffer;
977 ssize_t result;
978
979 result = -ENOMEM;
980 buffer = kmalloc(BUFSZ, GFP_KERNEL);
981 if (!buffer)
982 goto out;
983
984 if (oldval && oldlen) {
985 unsigned __user *vec = oldval;
986 size_t length = oldlen / sizeof(*vec);
987 loff_t pos = 0;
988 char *str, *end;
989 int i;
990
991 set_fs(KERNEL_DS);
992 result = vfs_read(file, buffer, BUFSZ - 1, &pos);
993 set_fs(old_fs);
994 if (result < 0)
995 goto out_kfree;
996
997 str = buffer;
998 end = str + result;
999 *end++ = '\0';
1000 for (i = 0; i < length; i++) {
1001 unsigned long value;
1002
1003 value = simple_strtoul(str, &str, 10);
1004 while (isspace(*str))
1005 str++;
1006
1007 result = -EFAULT;
1008 if (put_user(value, vec + i))
1009 goto out_kfree;
1010
1011 copied += sizeof(*vec);
1012 if (!isdigit(*str))
1013 break;
1014 }
1015 }
1016
1017 if (newval && newlen) {
1018 unsigned __user *vec = newval;
1019 size_t length = newlen / sizeof(*vec);
1020 loff_t pos = 0;
1021 char *str, *end;
1022 int i;
1023
1024 str = buffer;
1025 end = str + BUFSZ;
1026 for (i = 0; i < length; i++) {
1027 unsigned long value;
1028
1029 result = -EFAULT;
1030 if (get_user(value, vec + i))
1031 goto out_kfree;
1032
1033 str += snprintf(str, end - str, "%lu\t", value);
1034 }
1035
1036 set_fs(KERNEL_DS);
1037 result = vfs_write(file, buffer, str - buffer, &pos);
1038 set_fs(old_fs);
1039 if (result < 0)
1040 goto out_kfree;
1041 }
1042 result = copied;
1043out_kfree:
1044 kfree(buffer);
1045out:
1046 return result;
1047}
1048
1049static ssize_t bin_ulongvec(struct file *file,
1050 void __user *oldval, size_t oldlen, void __user *newval, size_t newlen)
1051{
1052 mm_segment_t old_fs = get_fs();
1053 ssize_t copied = 0;
1054 char *buffer;
1055 ssize_t result;
1056
1057 result = -ENOMEM;
1058 buffer = kmalloc(BUFSZ, GFP_KERNEL);
1059 if (!buffer)
1060 goto out;
1061
1062 if (oldval && oldlen) {
1063 unsigned long __user *vec = oldval;
1064 size_t length = oldlen / sizeof(*vec);
1065 loff_t pos = 0;
1066 char *str, *end;
1067 int i;
1068
1069 set_fs(KERNEL_DS);
1070 result = vfs_read(file, buffer, BUFSZ - 1, &pos);
1071 set_fs(old_fs);
1072 if (result < 0)
1073 goto out_kfree;
1074
1075 str = buffer;
1076 end = str + result;
1077 *end++ = '\0';
1078 for (i = 0; i < length; i++) {
1079 unsigned long value;
1080
1081 value = simple_strtoul(str, &str, 10);
1082 while (isspace(*str))
1083 str++;
1084
1085 result = -EFAULT;
1086 if (put_user(value, vec + i))
1087 goto out_kfree;
1088
1089 copied += sizeof(*vec);
1090 if (!isdigit(*str))
1091 break;
1092 }
1093 }
1094
1095 if (newval && newlen) {
1096 unsigned long __user *vec = newval;
1097 size_t length = newlen / sizeof(*vec);
1098 loff_t pos = 0;
1099 char *str, *end;
1100 int i;
1101
1102 str = buffer;
1103 end = str + BUFSZ;
1104 for (i = 0; i < length; i++) {
1105 unsigned long value;
1106
1107 result = -EFAULT;
1108 if (get_user(value, vec + i))
1109 goto out_kfree;
1110
1111 str += snprintf(str, end - str, "%lu\t", value);
1112 }
1113
1114 set_fs(KERNEL_DS);
1115 result = vfs_write(file, buffer, str - buffer, &pos);
1116 set_fs(old_fs);
1117 if (result < 0)
1118 goto out_kfree;
1119 }
1120 result = copied;
1121out_kfree:
1122 kfree(buffer);
1123out:
1124 return result;
1125}
1126
1127static unsigned hex_value(int ch)
1128{
1129 return isdigit(ch) ? ch - '0' : ((ch | 0x20) - 'a') + 10;
1130}
1131
1132static ssize_t bin_uuid(struct file *file,
1133 void __user *oldval, size_t oldlen, void __user *newval, size_t newlen)
1134{
1135 mm_segment_t old_fs = get_fs();
1136 ssize_t result, copied = 0;
1137
1138 /* Only supports reads */
1139 if (oldval && oldlen) {
1140 loff_t pos = 0;
1141 char buf[40], *str = buf;
1142 unsigned char uuid[16];
1143 int i;
1144
1145 set_fs(KERNEL_DS);
1146 result = vfs_read(file, buf, sizeof(buf) - 1, &pos);
1147 set_fs(old_fs);
1148 if (result < 0)
1149 goto out;
1150
1151 buf[result] = '\0';
1152
1153 /* Convert the uuid to from a string to binary */
1154 for (i = 0; i < 16; i++) {
1155 result = -EIO;
1156 if (!isxdigit(str[0]) || !isxdigit(str[1]))
1157 goto out;
1158
1159 uuid[i] = (hex_value(str[0]) << 4) | hex_value(str[1]);
1160 str += 2;
1161 if (*str == '-')
1162 str++;
1163 }
1164
1165 if (oldlen > 16)
1166 oldlen = 16;
1167
1168 result = -EFAULT;
1169 if (copy_to_user(oldval, uuid, oldlen))
1170 goto out;
1171
1172 copied = oldlen;
1173 }
1174 result = copied;
1175out:
1176 return result;
1177}
1178
1179static ssize_t bin_dn_node_address(struct file *file,
1180 void __user *oldval, size_t oldlen, void __user *newval, size_t newlen)
1181{
1182 mm_segment_t old_fs = get_fs();
1183 ssize_t result, copied = 0;
1184
1185 if (oldval && oldlen) {
1186 loff_t pos = 0;
1187 char buf[15], *nodep;
1188 unsigned long area, node;
1189 __le16 dnaddr;
1190
1191 set_fs(KERNEL_DS);
1192 result = vfs_read(file, buf, sizeof(buf) - 1, &pos);
1193 set_fs(old_fs);
1194 if (result < 0)
1195 goto out;
1196
1197 buf[result] = '\0';
1198
1199 /* Convert the decnet addresss to binary */
1200 result = -EIO;
1201 nodep = strchr(buf, '.') + 1;
1202 if (!nodep)
1203 goto out;
1204
1205 area = simple_strtoul(buf, NULL, 10);
1206 node = simple_strtoul(nodep, NULL, 10);
1207
1208 result = -EIO;
1209 if ((area > 63)||(node > 1023))
1210 goto out;
1211
1212 dnaddr = cpu_to_le16((area << 10) | node);
1213
1214 result = -EFAULT;
1215 if (put_user(dnaddr, (__le16 __user *)oldval))
1216 goto out;
1217
1218 copied = sizeof(dnaddr);
1219 }
1220
1221 if (newval && newlen) {
1222 loff_t pos = 0;
1223 __le16 dnaddr;
1224 char buf[15];
1225 int len;
1226
1227 result = -EINVAL;
1228 if (newlen != sizeof(dnaddr))
1229 goto out;
1230
1231 result = -EFAULT;
1232 if (get_user(dnaddr, (__le16 __user *)newval))
1233 goto out;
1234
1235 len = snprintf(buf, sizeof(buf), "%hu.%hu",
1236 le16_to_cpu(dnaddr) >> 10,
1237 le16_to_cpu(dnaddr) & 0x3ff);
1238
1239 set_fs(KERNEL_DS);
1240 result = vfs_write(file, buf, len, &pos);
1241 set_fs(old_fs);
1242 if (result < 0)
1243 goto out;
1244 }
1245
1246 result = copied;
1247out:
1248 return result;
1249}
1250
1251static const struct bin_table *get_sysctl(const int *name, int nlen, char *path)
1252{
1253 const struct bin_table *table = &bin_root_table[0];
1254 int ctl_name;
1255
1256 /* The binary sysctl tables have a small maximum depth so
1257 * there is no danger of overflowing our path as it PATH_MAX
1258 * bytes long.
1259 */
1260 memcpy(path, "sys/", 4);
1261 path += 4;
1262
1263repeat:
1264 if (!nlen)
1265 return ERR_PTR(-ENOTDIR);
1266 ctl_name = *name;
1267 name++;
1268 nlen--;
1269 for ( ; table->convert; table++) {
1270 int len = 0;
1271
1272 /*
1273 * For a wild card entry map from ifindex to network
1274 * device name.
1275 */
1276 if (!table->ctl_name) {
1277#ifdef CONFIG_NET
1278 struct net *net = current->nsproxy->net_ns;
1279 struct net_device *dev;
1280 dev = dev_get_by_index(net, ctl_name);
1281 if (dev) {
1282 len = strlen(dev->name);
1283 memcpy(path, dev->name, len);
1284 dev_put(dev);
1285 }
1286#endif
1287 /* Use the well known sysctl number to proc name mapping */
1288 } else if (ctl_name == table->ctl_name) {
1289 len = strlen(table->procname);
1290 memcpy(path, table->procname, len);
1291 }
1292 if (len) {
1293 path += len;
1294 if (table->child) {
1295 *path++ = '/';
1296 table = table->child;
1297 goto repeat;
1298 }
1299 *path = '\0';
1300 return table;
1301 }
1302 }
1303 return ERR_PTR(-ENOTDIR);
1304}
1305
1306static char *sysctl_getname(const int *name, int nlen, const struct bin_table **tablep)
1307{
1308 char *tmp, *result;
1309
1310 result = ERR_PTR(-ENOMEM);
1311 tmp = __getname();
1312 if (tmp) {
1313 const struct bin_table *table = get_sysctl(name, nlen, tmp);
1314 result = tmp;
1315 *tablep = table;
1316 if (IS_ERR(table)) {
1317 __putname(tmp);
1318 result = ERR_CAST(table);
1319 }
1320 }
1321 return result;
1322}
1323
1324static ssize_t binary_sysctl(const int *name, int nlen,
1325 void __user *oldval, size_t oldlen, void __user *newval, size_t newlen)
1326{
1327 const struct bin_table *table = NULL;
1328 struct nameidata nd;
1329 struct vfsmount *mnt;
1330 struct file *file;
1331 ssize_t result;
1332 char *pathname;
1333 int flags;
1334 int acc_mode, fmode;
1335
1336 pathname = sysctl_getname(name, nlen, &table);
1337 result = PTR_ERR(pathname);
1338 if (IS_ERR(pathname))
1339 goto out;
1340
1341 /* How should the sysctl be accessed? */
1342 if (oldval && oldlen && newval && newlen) {
1343 flags = O_RDWR;
1344 acc_mode = MAY_READ | MAY_WRITE;
1345 fmode = FMODE_READ | FMODE_WRITE;
1346 } else if (newval && newlen) {
1347 flags = O_WRONLY;
1348 acc_mode = MAY_WRITE;
1349 fmode = FMODE_WRITE;
1350 } else if (oldval && oldlen) {
1351 flags = O_RDONLY;
1352 acc_mode = MAY_READ;
1353 fmode = FMODE_READ;
1354 } else {
1355 result = 0;
1356 goto out_putname;
1357 }
1358
1359 mnt = current->nsproxy->pid_ns->proc_mnt;
1360 result = vfs_path_lookup(mnt->mnt_root, mnt, pathname, 0, &nd);
1361 if (result)
1362 goto out_putname;
1363
1364 result = may_open(&nd.path, acc_mode, fmode);
1365 if (result)
1366 goto out_putpath;
1367
1368 file = dentry_open(nd.path.dentry, nd.path.mnt, flags, current_cred());
1369 result = PTR_ERR(file);
1370 if (IS_ERR(file))
1371 goto out_putname;
1372
1373 result = table->convert(file, oldval, oldlen, newval, newlen);
1374
1375 fput(file);
1376out_putname:
1377 putname(pathname);
1378out:
1379 return result;
1380
1381out_putpath:
1382 path_put(&nd.path);
1383 goto out_putname;
1384}
1385
1386
1387#else /* CONFIG_SYSCTL_SYSCALL */
1388
1389static ssize_t binary_sysctl(const int *name, int nlen,
1390 void __user *oldval, size_t oldlen, void __user *newval, size_t newlen)
1391{
1392 return -ENOSYS;
1393}
1394
1395#endif /* CONFIG_SYSCTL_SYSCALL */
1396
1397
1398static void deprecated_sysctl_warning(const int *name, int nlen)
1399{
1400 int i;
1401
1402 if (printk_ratelimit()) {
1403 printk(KERN_INFO
1404 "warning: process `%s' used the deprecated sysctl "
1405 "system call with ", current->comm);
1406 for (i = 0; i < nlen; i++)
1407 printk("%d.", name[i]);
1408 printk("\n");
1409 }
1410 return;
1411}
1412
1413static ssize_t do_sysctl(int __user *args_name, int nlen,
1414 void __user *oldval, size_t oldlen, void __user *newval, size_t newlen)
1415{
1416 int name[CTL_MAXNAME];
1417 int i;
1418
1419 /* Check args->nlen. */
1420 if (nlen < 0 || nlen > CTL_MAXNAME)
1421 return -ENOTDIR;
1422 /* Read in the sysctl name for simplicity */
1423 for (i = 0; i < nlen; i++)
1424 if (get_user(name[i], args_name + i))
1425 return -EFAULT;
1426
1427 deprecated_sysctl_warning(name, nlen);
1428
1429 return binary_sysctl(name, nlen, oldval, oldlen, newval, newlen);
1430}
1431
1432SYSCALL_DEFINE1(sysctl, struct __sysctl_args __user *, args)
1433{
1434 struct __sysctl_args tmp;
1435 size_t oldlen = 0;
1436 ssize_t result;
1437
1438 if (copy_from_user(&tmp, args, sizeof(tmp)))
1439 return -EFAULT;
1440
1441 if (tmp.oldval && !tmp.oldlenp)
1442 return -EFAULT;
1443
1444 if (tmp.oldlenp && get_user(oldlen, tmp.oldlenp))
1445 return -EFAULT;
1446
1447 result = do_sysctl(tmp.name, tmp.nlen, tmp.oldval, oldlen,
1448 tmp.newval, tmp.newlen);
1449
1450 if (result >= 0) {
1451 oldlen = result;
1452 result = 0;
1453 }
1454
1455 if (tmp.oldlenp && put_user(oldlen, tmp.oldlenp))
1456 return -EFAULT;
1457
1458 return result;
1459}
1460
1461
1462#ifdef CONFIG_COMPAT
1463#include <asm/compat.h>
1464
1465struct compat_sysctl_args {
1466 compat_uptr_t name;
1467 int nlen;
1468 compat_uptr_t oldval;
1469 compat_uptr_t oldlenp;
1470 compat_uptr_t newval;
1471 compat_size_t newlen;
1472 compat_ulong_t __unused[4];
1473};
1474
1475asmlinkage long compat_sys_sysctl(struct compat_sysctl_args __user *args)
1476{
1477 struct compat_sysctl_args tmp;
1478 compat_size_t __user *compat_oldlenp;
1479 size_t oldlen = 0;
1480 ssize_t result;
1481
1482 if (copy_from_user(&tmp, args, sizeof(tmp)))
1483 return -EFAULT;
1484
1485 if (tmp.oldval && !tmp.oldlenp)
1486 return -EFAULT;
1487
1488 compat_oldlenp = compat_ptr(tmp.oldlenp);
1489 if (compat_oldlenp && get_user(oldlen, compat_oldlenp))
1490 return -EFAULT;
1491
1492 result = do_sysctl(compat_ptr(tmp.name), tmp.nlen,
1493 compat_ptr(tmp.oldval), oldlen,
1494 compat_ptr(tmp.newval), tmp.newlen);
1495
1496 if (result >= 0) {
1497 oldlen = result;
1498 result = 0;
1499 }
1500
1501 if (compat_oldlenp && put_user(oldlen, compat_oldlenp))
1502 return -EFAULT;
1503
1504 return result;
1505}
1506
1507#endif /* CONFIG_COMPAT */
diff --git a/kernel/sysctl_check.c b/kernel/sysctl_check.c
index f1d676e4b368..04cdcf72c827 100644
--- a/kernel/sysctl_check.c
+++ b/kernel/sysctl_check.c
@@ -5,1240 +5,6 @@
5#include <linux/string.h> 5#include <linux/string.h>
6#include <net/ip_vs.h> 6#include <net/ip_vs.h>
7 7
8struct trans_ctl_table {
9 int ctl_name;
10 const char *procname;
11 const struct trans_ctl_table *child;
12};
13
14static const struct trans_ctl_table trans_random_table[] = {
15 { RANDOM_POOLSIZE, "poolsize" },
16 { RANDOM_ENTROPY_COUNT, "entropy_avail" },
17 { RANDOM_READ_THRESH, "read_wakeup_threshold" },
18 { RANDOM_WRITE_THRESH, "write_wakeup_threshold" },
19 { RANDOM_BOOT_ID, "boot_id" },
20 { RANDOM_UUID, "uuid" },
21 {}
22};
23
24static const struct trans_ctl_table trans_pty_table[] = {
25 { PTY_MAX, "max" },
26 { PTY_NR, "nr" },
27 {}
28};
29
30static const struct trans_ctl_table trans_kern_table[] = {
31 { KERN_OSTYPE, "ostype" },
32 { KERN_OSRELEASE, "osrelease" },
33 /* KERN_OSREV not used */
34 { KERN_VERSION, "version" },
35 /* KERN_SECUREMASK not used */
36 /* KERN_PROF not used */
37 { KERN_NODENAME, "hostname" },
38 { KERN_DOMAINNAME, "domainname" },
39
40 { KERN_PANIC, "panic" },
41 { KERN_REALROOTDEV, "real-root-dev" },
42
43 { KERN_SPARC_REBOOT, "reboot-cmd" },
44 { KERN_CTLALTDEL, "ctrl-alt-del" },
45 { KERN_PRINTK, "printk" },
46
47 /* KERN_NAMETRANS not used */
48 /* KERN_PPC_HTABRECLAIM not used */
49 /* KERN_PPC_ZEROPAGED not used */
50 { KERN_PPC_POWERSAVE_NAP, "powersave-nap" },
51
52 { KERN_MODPROBE, "modprobe" },
53 { KERN_SG_BIG_BUFF, "sg-big-buff" },
54 { KERN_ACCT, "acct" },
55 { KERN_PPC_L2CR, "l2cr" },
56
57 /* KERN_RTSIGNR not used */
58 /* KERN_RTSIGMAX not used */
59
60 { KERN_SHMMAX, "shmmax" },
61 { KERN_MSGMAX, "msgmax" },
62 { KERN_MSGMNB, "msgmnb" },
63 /* KERN_MSGPOOL not used*/
64 { KERN_SYSRQ, "sysrq" },
65 { KERN_MAX_THREADS, "threads-max" },
66 { KERN_RANDOM, "random", trans_random_table },
67 { KERN_SHMALL, "shmall" },
68 { KERN_MSGMNI, "msgmni" },
69 { KERN_SEM, "sem" },
70 { KERN_SPARC_STOP_A, "stop-a" },
71 { KERN_SHMMNI, "shmmni" },
72
73 { KERN_OVERFLOWUID, "overflowuid" },
74 { KERN_OVERFLOWGID, "overflowgid" },
75
76 { KERN_HOTPLUG, "hotplug", },
77 { KERN_IEEE_EMULATION_WARNINGS, "ieee_emulation_warnings" },
78
79 { KERN_S390_USER_DEBUG_LOGGING, "userprocess_debug" },
80 { KERN_CORE_USES_PID, "core_uses_pid" },
81 { KERN_TAINTED, "tainted" },
82 { KERN_CADPID, "cad_pid" },
83 { KERN_PIDMAX, "pid_max" },
84 { KERN_CORE_PATTERN, "core_pattern" },
85 { KERN_PANIC_ON_OOPS, "panic_on_oops" },
86 { KERN_HPPA_PWRSW, "soft-power" },
87 { KERN_HPPA_UNALIGNED, "unaligned-trap" },
88
89 { KERN_PRINTK_RATELIMIT, "printk_ratelimit" },
90 { KERN_PRINTK_RATELIMIT_BURST, "printk_ratelimit_burst" },
91
92 { KERN_PTY, "pty", trans_pty_table },
93 { KERN_NGROUPS_MAX, "ngroups_max" },
94 { KERN_SPARC_SCONS_PWROFF, "scons-poweroff" },
95 { KERN_HZ_TIMER, "hz_timer" },
96 { KERN_UNKNOWN_NMI_PANIC, "unknown_nmi_panic" },
97 { KERN_BOOTLOADER_TYPE, "bootloader_type" },
98 { KERN_RANDOMIZE, "randomize_va_space" },
99
100 { KERN_SPIN_RETRY, "spin_retry" },
101 { KERN_ACPI_VIDEO_FLAGS, "acpi_video_flags" },
102 { KERN_IA64_UNALIGNED, "ignore-unaligned-usertrap" },
103 { KERN_COMPAT_LOG, "compat-log" },
104 { KERN_MAX_LOCK_DEPTH, "max_lock_depth" },
105 { KERN_NMI_WATCHDOG, "nmi_watchdog" },
106 { KERN_PANIC_ON_NMI, "panic_on_unrecovered_nmi" },
107 {}
108};
109
110static const struct trans_ctl_table trans_vm_table[] = {
111 { VM_OVERCOMMIT_MEMORY, "overcommit_memory" },
112 { VM_PAGE_CLUSTER, "page-cluster" },
113 { VM_DIRTY_BACKGROUND, "dirty_background_ratio" },
114 { VM_DIRTY_RATIO, "dirty_ratio" },
115 { VM_DIRTY_WB_CS, "dirty_writeback_centisecs" },
116 { VM_DIRTY_EXPIRE_CS, "dirty_expire_centisecs" },
117 { VM_NR_PDFLUSH_THREADS, "nr_pdflush_threads" },
118 { VM_OVERCOMMIT_RATIO, "overcommit_ratio" },
119 /* VM_PAGEBUF unused */
120 { VM_HUGETLB_PAGES, "nr_hugepages" },
121 { VM_SWAPPINESS, "swappiness" },
122 { VM_LOWMEM_RESERVE_RATIO, "lowmem_reserve_ratio" },
123 { VM_MIN_FREE_KBYTES, "min_free_kbytes" },
124 { VM_MAX_MAP_COUNT, "max_map_count" },
125 { VM_LAPTOP_MODE, "laptop_mode" },
126 { VM_BLOCK_DUMP, "block_dump" },
127 { VM_HUGETLB_GROUP, "hugetlb_shm_group" },
128 { VM_VFS_CACHE_PRESSURE, "vfs_cache_pressure" },
129 { VM_LEGACY_VA_LAYOUT, "legacy_va_layout" },
130 /* VM_SWAP_TOKEN_TIMEOUT unused */
131 { VM_DROP_PAGECACHE, "drop_caches" },
132 { VM_PERCPU_PAGELIST_FRACTION, "percpu_pagelist_fraction" },
133 { VM_ZONE_RECLAIM_MODE, "zone_reclaim_mode" },
134 { VM_MIN_UNMAPPED, "min_unmapped_ratio" },
135 { VM_PANIC_ON_OOM, "panic_on_oom" },
136 { VM_VDSO_ENABLED, "vdso_enabled" },
137 { VM_MIN_SLAB, "min_slab_ratio" },
138
139 {}
140};
141
142static const struct trans_ctl_table trans_net_core_table[] = {
143 { NET_CORE_WMEM_MAX, "wmem_max" },
144 { NET_CORE_RMEM_MAX, "rmem_max" },
145 { NET_CORE_WMEM_DEFAULT, "wmem_default" },
146 { NET_CORE_RMEM_DEFAULT, "rmem_default" },
147 /* NET_CORE_DESTROY_DELAY unused */
148 { NET_CORE_MAX_BACKLOG, "netdev_max_backlog" },
149 /* NET_CORE_FASTROUTE unused */
150 { NET_CORE_MSG_COST, "message_cost" },
151 { NET_CORE_MSG_BURST, "message_burst" },
152 { NET_CORE_OPTMEM_MAX, "optmem_max" },
153 /* NET_CORE_HOT_LIST_LENGTH unused */
154 /* NET_CORE_DIVERT_VERSION unused */
155 /* NET_CORE_NO_CONG_THRESH unused */
156 /* NET_CORE_NO_CONG unused */
157 /* NET_CORE_LO_CONG unused */
158 /* NET_CORE_MOD_CONG unused */
159 { NET_CORE_DEV_WEIGHT, "dev_weight" },
160 { NET_CORE_SOMAXCONN, "somaxconn" },
161 { NET_CORE_BUDGET, "netdev_budget" },
162 { NET_CORE_AEVENT_ETIME, "xfrm_aevent_etime" },
163 { NET_CORE_AEVENT_RSEQTH, "xfrm_aevent_rseqth" },
164 { NET_CORE_WARNINGS, "warnings" },
165 {},
166};
167
168static const struct trans_ctl_table trans_net_unix_table[] = {
169 /* NET_UNIX_DESTROY_DELAY unused */
170 /* NET_UNIX_DELETE_DELAY unused */
171 { NET_UNIX_MAX_DGRAM_QLEN, "max_dgram_qlen" },
172 {}
173};
174
175static const struct trans_ctl_table trans_net_ipv4_route_table[] = {
176 { NET_IPV4_ROUTE_FLUSH, "flush" },
177 { NET_IPV4_ROUTE_MIN_DELAY, "min_delay" },
178 { NET_IPV4_ROUTE_MAX_DELAY, "max_delay" },
179 { NET_IPV4_ROUTE_GC_THRESH, "gc_thresh" },
180 { NET_IPV4_ROUTE_MAX_SIZE, "max_size" },
181 { NET_IPV4_ROUTE_GC_MIN_INTERVAL, "gc_min_interval" },
182 { NET_IPV4_ROUTE_GC_TIMEOUT, "gc_timeout" },
183 { NET_IPV4_ROUTE_GC_INTERVAL, "gc_interval" },
184 { NET_IPV4_ROUTE_REDIRECT_LOAD, "redirect_load" },
185 { NET_IPV4_ROUTE_REDIRECT_NUMBER, "redirect_number" },
186 { NET_IPV4_ROUTE_REDIRECT_SILENCE, "redirect_silence" },
187 { NET_IPV4_ROUTE_ERROR_COST, "error_cost" },
188 { NET_IPV4_ROUTE_ERROR_BURST, "error_burst" },
189 { NET_IPV4_ROUTE_GC_ELASTICITY, "gc_elasticity" },
190 { NET_IPV4_ROUTE_MTU_EXPIRES, "mtu_expires" },
191 { NET_IPV4_ROUTE_MIN_PMTU, "min_pmtu" },
192 { NET_IPV4_ROUTE_MIN_ADVMSS, "min_adv_mss" },
193 { NET_IPV4_ROUTE_SECRET_INTERVAL, "secret_interval" },
194 { NET_IPV4_ROUTE_GC_MIN_INTERVAL_MS, "gc_min_interval_ms" },
195 {}
196};
197
198static const struct trans_ctl_table trans_net_ipv4_conf_vars_table[] = {
199 { NET_IPV4_CONF_FORWARDING, "forwarding" },
200 { NET_IPV4_CONF_MC_FORWARDING, "mc_forwarding" },
201
202 { NET_IPV4_CONF_PROXY_ARP, "proxy_arp" },
203 { NET_IPV4_CONF_ACCEPT_REDIRECTS, "accept_redirects" },
204 { NET_IPV4_CONF_SECURE_REDIRECTS, "secure_redirects" },
205 { NET_IPV4_CONF_SEND_REDIRECTS, "send_redirects" },
206 { NET_IPV4_CONF_SHARED_MEDIA, "shared_media" },
207 { NET_IPV4_CONF_RP_FILTER, "rp_filter" },
208 { NET_IPV4_CONF_ACCEPT_SOURCE_ROUTE, "accept_source_route" },
209 { NET_IPV4_CONF_BOOTP_RELAY, "bootp_relay" },
210 { NET_IPV4_CONF_LOG_MARTIANS, "log_martians" },
211 { NET_IPV4_CONF_TAG, "tag" },
212 { NET_IPV4_CONF_ARPFILTER, "arp_filter" },
213 { NET_IPV4_CONF_MEDIUM_ID, "medium_id" },
214 { NET_IPV4_CONF_NOXFRM, "disable_xfrm" },
215 { NET_IPV4_CONF_NOPOLICY, "disable_policy" },
216 { NET_IPV4_CONF_FORCE_IGMP_VERSION, "force_igmp_version" },
217
218 { NET_IPV4_CONF_ARP_ANNOUNCE, "arp_announce" },
219 { NET_IPV4_CONF_ARP_IGNORE, "arp_ignore" },
220 { NET_IPV4_CONF_PROMOTE_SECONDARIES, "promote_secondaries" },
221 { NET_IPV4_CONF_ARP_ACCEPT, "arp_accept" },
222 { NET_IPV4_CONF_ARP_NOTIFY, "arp_notify" },
223 { NET_IPV4_CONF_ACCEPT_LOCAL, "accept_local" },
224 {}
225};
226
227static const struct trans_ctl_table trans_net_ipv4_conf_table[] = {
228 { NET_PROTO_CONF_ALL, "all", trans_net_ipv4_conf_vars_table },
229 { NET_PROTO_CONF_DEFAULT, "default", trans_net_ipv4_conf_vars_table },
230 { 0, NULL, trans_net_ipv4_conf_vars_table },
231 {}
232};
233
234static const struct trans_ctl_table trans_net_neigh_vars_table[] = {
235 { NET_NEIGH_MCAST_SOLICIT, "mcast_solicit" },
236 { NET_NEIGH_UCAST_SOLICIT, "ucast_solicit" },
237 { NET_NEIGH_APP_SOLICIT, "app_solicit" },
238 { NET_NEIGH_RETRANS_TIME, "retrans_time" },
239 { NET_NEIGH_REACHABLE_TIME, "base_reachable_time" },
240 { NET_NEIGH_DELAY_PROBE_TIME, "delay_first_probe_time" },
241 { NET_NEIGH_GC_STALE_TIME, "gc_stale_time" },
242 { NET_NEIGH_UNRES_QLEN, "unres_qlen" },
243 { NET_NEIGH_PROXY_QLEN, "proxy_qlen" },
244 { NET_NEIGH_ANYCAST_DELAY, "anycast_delay" },
245 { NET_NEIGH_PROXY_DELAY, "proxy_delay" },
246 { NET_NEIGH_LOCKTIME, "locktime" },
247 { NET_NEIGH_GC_INTERVAL, "gc_interval" },
248 { NET_NEIGH_GC_THRESH1, "gc_thresh1" },
249 { NET_NEIGH_GC_THRESH2, "gc_thresh2" },
250 { NET_NEIGH_GC_THRESH3, "gc_thresh3" },
251 { NET_NEIGH_RETRANS_TIME_MS, "retrans_time_ms" },
252 { NET_NEIGH_REACHABLE_TIME_MS, "base_reachable_time_ms" },
253 {}
254};
255
256static const struct trans_ctl_table trans_net_neigh_table[] = {
257 { NET_PROTO_CONF_DEFAULT, "default", trans_net_neigh_vars_table },
258 { 0, NULL, trans_net_neigh_vars_table },
259 {}
260};
261
262static const struct trans_ctl_table trans_net_ipv4_netfilter_table[] = {
263 { NET_IPV4_NF_CONNTRACK_MAX, "ip_conntrack_max" },
264
265 { NET_IPV4_NF_CONNTRACK_TCP_TIMEOUT_SYN_SENT, "ip_conntrack_tcp_timeout_syn_sent" },
266 { NET_IPV4_NF_CONNTRACK_TCP_TIMEOUT_SYN_RECV, "ip_conntrack_tcp_timeout_syn_recv" },
267 { NET_IPV4_NF_CONNTRACK_TCP_TIMEOUT_ESTABLISHED, "ip_conntrack_tcp_timeout_established" },
268 { NET_IPV4_NF_CONNTRACK_TCP_TIMEOUT_FIN_WAIT, "ip_conntrack_tcp_timeout_fin_wait" },
269 { NET_IPV4_NF_CONNTRACK_TCP_TIMEOUT_CLOSE_WAIT, "ip_conntrack_tcp_timeout_close_wait" },
270 { NET_IPV4_NF_CONNTRACK_TCP_TIMEOUT_LAST_ACK, "ip_conntrack_tcp_timeout_last_ack" },
271 { NET_IPV4_NF_CONNTRACK_TCP_TIMEOUT_TIME_WAIT, "ip_conntrack_tcp_timeout_time_wait" },
272 { NET_IPV4_NF_CONNTRACK_TCP_TIMEOUT_CLOSE, "ip_conntrack_tcp_timeout_close" },
273
274 { NET_IPV4_NF_CONNTRACK_UDP_TIMEOUT, "ip_conntrack_udp_timeout" },
275 { NET_IPV4_NF_CONNTRACK_UDP_TIMEOUT_STREAM, "ip_conntrack_udp_timeout_stream" },
276 { NET_IPV4_NF_CONNTRACK_ICMP_TIMEOUT, "ip_conntrack_icmp_timeout" },
277 { NET_IPV4_NF_CONNTRACK_GENERIC_TIMEOUT, "ip_conntrack_generic_timeout" },
278
279 { NET_IPV4_NF_CONNTRACK_BUCKETS, "ip_conntrack_buckets" },
280 { NET_IPV4_NF_CONNTRACK_LOG_INVALID, "ip_conntrack_log_invalid" },
281 { NET_IPV4_NF_CONNTRACK_TCP_TIMEOUT_MAX_RETRANS, "ip_conntrack_tcp_timeout_max_retrans" },
282 { NET_IPV4_NF_CONNTRACK_TCP_LOOSE, "ip_conntrack_tcp_loose" },
283 { NET_IPV4_NF_CONNTRACK_TCP_BE_LIBERAL, "ip_conntrack_tcp_be_liberal" },
284 { NET_IPV4_NF_CONNTRACK_TCP_MAX_RETRANS, "ip_conntrack_tcp_max_retrans" },
285
286 { NET_IPV4_NF_CONNTRACK_SCTP_TIMEOUT_CLOSED, "ip_conntrack_sctp_timeout_closed" },
287 { NET_IPV4_NF_CONNTRACK_SCTP_TIMEOUT_COOKIE_WAIT, "ip_conntrack_sctp_timeout_cookie_wait" },
288 { NET_IPV4_NF_CONNTRACK_SCTP_TIMEOUT_COOKIE_ECHOED, "ip_conntrack_sctp_timeout_cookie_echoed" },
289 { NET_IPV4_NF_CONNTRACK_SCTP_TIMEOUT_ESTABLISHED, "ip_conntrack_sctp_timeout_established" },
290 { NET_IPV4_NF_CONNTRACK_SCTP_TIMEOUT_SHUTDOWN_SENT, "ip_conntrack_sctp_timeout_shutdown_sent" },
291 { NET_IPV4_NF_CONNTRACK_SCTP_TIMEOUT_SHUTDOWN_RECD, "ip_conntrack_sctp_timeout_shutdown_recd" },
292 { NET_IPV4_NF_CONNTRACK_SCTP_TIMEOUT_SHUTDOWN_ACK_SENT, "ip_conntrack_sctp_timeout_shutdown_ack_sent" },
293
294 { NET_IPV4_NF_CONNTRACK_COUNT, "ip_conntrack_count" },
295 { NET_IPV4_NF_CONNTRACK_CHECKSUM, "ip_conntrack_checksum" },
296 {}
297};
298
299static const struct trans_ctl_table trans_net_ipv4_table[] = {
300 { NET_IPV4_FORWARD, "ip_forward" },
301 { NET_IPV4_DYNADDR, "ip_dynaddr" },
302
303 { NET_IPV4_CONF, "conf", trans_net_ipv4_conf_table },
304 { NET_IPV4_NEIGH, "neigh", trans_net_neigh_table },
305 { NET_IPV4_ROUTE, "route", trans_net_ipv4_route_table },
306 /* NET_IPV4_FIB_HASH unused */
307 { NET_IPV4_NETFILTER, "netfilter", trans_net_ipv4_netfilter_table },
308
309 { NET_IPV4_TCP_TIMESTAMPS, "tcp_timestamps" },
310 { NET_IPV4_TCP_WINDOW_SCALING, "tcp_window_scaling" },
311 { NET_IPV4_TCP_SACK, "tcp_sack" },
312 { NET_IPV4_TCP_RETRANS_COLLAPSE, "tcp_retrans_collapse" },
313 { NET_IPV4_DEFAULT_TTL, "ip_default_ttl" },
314 /* NET_IPV4_AUTOCONFIG unused */
315 { NET_IPV4_NO_PMTU_DISC, "ip_no_pmtu_disc" },
316 { NET_IPV4_TCP_SYN_RETRIES, "tcp_syn_retries" },
317 { NET_IPV4_IPFRAG_HIGH_THRESH, "ipfrag_high_thresh" },
318 { NET_IPV4_IPFRAG_LOW_THRESH, "ipfrag_low_thresh" },
319 { NET_IPV4_IPFRAG_TIME, "ipfrag_time" },
320 /* NET_IPV4_TCP_MAX_KA_PROBES unused */
321 { NET_IPV4_TCP_KEEPALIVE_TIME, "tcp_keepalive_time" },
322 { NET_IPV4_TCP_KEEPALIVE_PROBES, "tcp_keepalive_probes" },
323 { NET_IPV4_TCP_RETRIES1, "tcp_retries1" },
324 { NET_IPV4_TCP_RETRIES2, "tcp_retries2" },
325 { NET_IPV4_TCP_FIN_TIMEOUT, "tcp_fin_timeout" },
326 /* NET_IPV4_IP_MASQ_DEBUG unused */
327 { NET_TCP_SYNCOOKIES, "tcp_syncookies" },
328 { NET_TCP_STDURG, "tcp_stdurg" },
329 { NET_TCP_RFC1337, "tcp_rfc1337" },
330 /* NET_TCP_SYN_TAILDROP unused */
331 { NET_TCP_MAX_SYN_BACKLOG, "tcp_max_syn_backlog" },
332 { NET_IPV4_LOCAL_PORT_RANGE, "ip_local_port_range" },
333 { NET_IPV4_ICMP_ECHO_IGNORE_ALL, "icmp_echo_ignore_all" },
334 { NET_IPV4_ICMP_ECHO_IGNORE_BROADCASTS, "icmp_echo_ignore_broadcasts" },
335 /* NET_IPV4_ICMP_SOURCEQUENCH_RATE unused */
336 /* NET_IPV4_ICMP_DESTUNREACH_RATE unused */
337 /* NET_IPV4_ICMP_TIMEEXCEED_RATE unused */
338 /* NET_IPV4_ICMP_PARAMPROB_RATE unused */
339 /* NET_IPV4_ICMP_ECHOREPLY_RATE unused */
340 { NET_IPV4_ICMP_IGNORE_BOGUS_ERROR_RESPONSES, "icmp_ignore_bogus_error_responses" },
341 { NET_IPV4_IGMP_MAX_MEMBERSHIPS, "igmp_max_memberships" },
342 { NET_TCP_TW_RECYCLE, "tcp_tw_recycle" },
343 /* NET_IPV4_ALWAYS_DEFRAG unused */
344 { NET_IPV4_TCP_KEEPALIVE_INTVL, "tcp_keepalive_intvl" },
345 { NET_IPV4_INET_PEER_THRESHOLD, "inet_peer_threshold" },
346 { NET_IPV4_INET_PEER_MINTTL, "inet_peer_minttl" },
347 { NET_IPV4_INET_PEER_MAXTTL, "inet_peer_maxttl" },
348 { NET_IPV4_INET_PEER_GC_MINTIME, "inet_peer_gc_mintime" },
349 { NET_IPV4_INET_PEER_GC_MAXTIME, "inet_peer_gc_maxtime" },
350 { NET_TCP_ORPHAN_RETRIES, "tcp_orphan_retries" },
351 { NET_TCP_ABORT_ON_OVERFLOW, "tcp_abort_on_overflow" },
352 { NET_TCP_SYNACK_RETRIES, "tcp_synack_retries" },
353 { NET_TCP_MAX_ORPHANS, "tcp_max_orphans" },
354 { NET_TCP_MAX_TW_BUCKETS, "tcp_max_tw_buckets" },
355 { NET_TCP_FACK, "tcp_fack" },
356 { NET_TCP_REORDERING, "tcp_reordering" },
357 { NET_TCP_ECN, "tcp_ecn" },
358 { NET_TCP_DSACK, "tcp_dsack" },
359 { NET_TCP_MEM, "tcp_mem" },
360 { NET_TCP_WMEM, "tcp_wmem" },
361 { NET_TCP_RMEM, "tcp_rmem" },
362 { NET_TCP_APP_WIN, "tcp_app_win" },
363 { NET_TCP_ADV_WIN_SCALE, "tcp_adv_win_scale" },
364 { NET_IPV4_NONLOCAL_BIND, "ip_nonlocal_bind" },
365 { NET_IPV4_ICMP_RATELIMIT, "icmp_ratelimit" },
366 { NET_IPV4_ICMP_RATEMASK, "icmp_ratemask" },
367 { NET_TCP_TW_REUSE, "tcp_tw_reuse" },
368 { NET_TCP_FRTO, "tcp_frto" },
369 { NET_TCP_LOW_LATENCY, "tcp_low_latency" },
370 { NET_IPV4_IPFRAG_SECRET_INTERVAL, "ipfrag_secret_interval" },
371 { NET_IPV4_IGMP_MAX_MSF, "igmp_max_msf" },
372 { NET_TCP_NO_METRICS_SAVE, "tcp_no_metrics_save" },
373 /* NET_TCP_DEFAULT_WIN_SCALE unused */
374 { NET_TCP_MODERATE_RCVBUF, "tcp_moderate_rcvbuf" },
375 { NET_TCP_TSO_WIN_DIVISOR, "tcp_tso_win_divisor" },
376 /* NET_TCP_BIC_BETA unused */
377 { NET_IPV4_ICMP_ERRORS_USE_INBOUND_IFADDR, "icmp_errors_use_inbound_ifaddr" },
378 { NET_TCP_CONG_CONTROL, "tcp_congestion_control" },
379 { NET_TCP_ABC, "tcp_abc" },
380 { NET_IPV4_IPFRAG_MAX_DIST, "ipfrag_max_dist" },
381 { NET_TCP_MTU_PROBING, "tcp_mtu_probing" },
382 { NET_TCP_BASE_MSS, "tcp_base_mss" },
383 { NET_IPV4_TCP_WORKAROUND_SIGNED_WINDOWS, "tcp_workaround_signed_windows" },
384 { NET_TCP_DMA_COPYBREAK, "tcp_dma_copybreak" },
385 { NET_TCP_SLOW_START_AFTER_IDLE, "tcp_slow_start_after_idle" },
386 { NET_CIPSOV4_CACHE_ENABLE, "cipso_cache_enable" },
387 { NET_CIPSOV4_CACHE_BUCKET_SIZE, "cipso_cache_bucket_size" },
388 { NET_CIPSOV4_RBM_OPTFMT, "cipso_rbm_optfmt" },
389 { NET_CIPSOV4_RBM_STRICTVALID, "cipso_rbm_strictvalid" },
390 { NET_TCP_AVAIL_CONG_CONTROL, "tcp_available_congestion_control" },
391 { NET_TCP_ALLOWED_CONG_CONTROL, "tcp_allowed_congestion_control" },
392 { NET_TCP_MAX_SSTHRESH, "tcp_max_ssthresh" },
393 { NET_TCP_FRTO_RESPONSE, "tcp_frto_response" },
394 { 2088 /* NET_IPQ_QMAX */, "ip_queue_maxlen" },
395 {}
396};
397
398static const struct trans_ctl_table trans_net_ipx_table[] = {
399 { NET_IPX_PPROP_BROADCASTING, "ipx_pprop_broadcasting" },
400 /* NET_IPX_FORWARDING unused */
401 {}
402};
403
404static const struct trans_ctl_table trans_net_atalk_table[] = {
405 { NET_ATALK_AARP_EXPIRY_TIME, "aarp-expiry-time" },
406 { NET_ATALK_AARP_TICK_TIME, "aarp-tick-time" },
407 { NET_ATALK_AARP_RETRANSMIT_LIMIT, "aarp-retransmit-limit" },
408 { NET_ATALK_AARP_RESOLVE_TIME, "aarp-resolve-time" },
409 {},
410};
411
412static const struct trans_ctl_table trans_net_netrom_table[] = {
413 { NET_NETROM_DEFAULT_PATH_QUALITY, "default_path_quality" },
414 { NET_NETROM_OBSOLESCENCE_COUNT_INITIALISER, "obsolescence_count_initialiser" },
415 { NET_NETROM_NETWORK_TTL_INITIALISER, "network_ttl_initialiser" },
416 { NET_NETROM_TRANSPORT_TIMEOUT, "transport_timeout" },
417 { NET_NETROM_TRANSPORT_MAXIMUM_TRIES, "transport_maximum_tries" },
418 { NET_NETROM_TRANSPORT_ACKNOWLEDGE_DELAY, "transport_acknowledge_delay" },
419 { NET_NETROM_TRANSPORT_BUSY_DELAY, "transport_busy_delay" },
420 { NET_NETROM_TRANSPORT_REQUESTED_WINDOW_SIZE, "transport_requested_window_size" },
421 { NET_NETROM_TRANSPORT_NO_ACTIVITY_TIMEOUT, "transport_no_activity_timeout" },
422 { NET_NETROM_ROUTING_CONTROL, "routing_control" },
423 { NET_NETROM_LINK_FAILS_COUNT, "link_fails_count" },
424 { NET_NETROM_RESET, "reset" },
425 {}
426};
427
428static const struct trans_ctl_table trans_net_ax25_param_table[] = {
429 { NET_AX25_IP_DEFAULT_MODE, "ip_default_mode" },
430 { NET_AX25_DEFAULT_MODE, "ax25_default_mode" },
431 { NET_AX25_BACKOFF_TYPE, "backoff_type" },
432 { NET_AX25_CONNECT_MODE, "connect_mode" },
433 { NET_AX25_STANDARD_WINDOW, "standard_window_size" },
434 { NET_AX25_EXTENDED_WINDOW, "extended_window_size" },
435 { NET_AX25_T1_TIMEOUT, "t1_timeout" },
436 { NET_AX25_T2_TIMEOUT, "t2_timeout" },
437 { NET_AX25_T3_TIMEOUT, "t3_timeout" },
438 { NET_AX25_IDLE_TIMEOUT, "idle_timeout" },
439 { NET_AX25_N2, "maximum_retry_count" },
440 { NET_AX25_PACLEN, "maximum_packet_length" },
441 { NET_AX25_PROTOCOL, "protocol" },
442 { NET_AX25_DAMA_SLAVE_TIMEOUT, "dama_slave_timeout" },
443 {}
444};
445
446static const struct trans_ctl_table trans_net_ax25_table[] = {
447 { 0, NULL, trans_net_ax25_param_table },
448 {}
449};
450
451static const struct trans_ctl_table trans_net_bridge_table[] = {
452 { NET_BRIDGE_NF_CALL_ARPTABLES, "bridge-nf-call-arptables" },
453 { NET_BRIDGE_NF_CALL_IPTABLES, "bridge-nf-call-iptables" },
454 { NET_BRIDGE_NF_CALL_IP6TABLES, "bridge-nf-call-ip6tables" },
455 { NET_BRIDGE_NF_FILTER_VLAN_TAGGED, "bridge-nf-filter-vlan-tagged" },
456 { NET_BRIDGE_NF_FILTER_PPPOE_TAGGED, "bridge-nf-filter-pppoe-tagged" },
457 {}
458};
459
460static const struct trans_ctl_table trans_net_rose_table[] = {
461 { NET_ROSE_RESTART_REQUEST_TIMEOUT, "restart_request_timeout" },
462 { NET_ROSE_CALL_REQUEST_TIMEOUT, "call_request_timeout" },
463 { NET_ROSE_RESET_REQUEST_TIMEOUT, "reset_request_timeout" },
464 { NET_ROSE_CLEAR_REQUEST_TIMEOUT, "clear_request_timeout" },
465 { NET_ROSE_ACK_HOLD_BACK_TIMEOUT, "acknowledge_hold_back_timeout" },
466 { NET_ROSE_ROUTING_CONTROL, "routing_control" },
467 { NET_ROSE_LINK_FAIL_TIMEOUT, "link_fail_timeout" },
468 { NET_ROSE_MAX_VCS, "maximum_virtual_circuits" },
469 { NET_ROSE_WINDOW_SIZE, "window_size" },
470 { NET_ROSE_NO_ACTIVITY_TIMEOUT, "no_activity_timeout" },
471 {}
472};
473
474static const struct trans_ctl_table trans_net_ipv6_conf_var_table[] = {
475 { NET_IPV6_FORWARDING, "forwarding" },
476 { NET_IPV6_HOP_LIMIT, "hop_limit" },
477 { NET_IPV6_MTU, "mtu" },
478 { NET_IPV6_ACCEPT_RA, "accept_ra" },
479 { NET_IPV6_ACCEPT_REDIRECTS, "accept_redirects" },
480 { NET_IPV6_AUTOCONF, "autoconf" },
481 { NET_IPV6_DAD_TRANSMITS, "dad_transmits" },
482 { NET_IPV6_RTR_SOLICITS, "router_solicitations" },
483 { NET_IPV6_RTR_SOLICIT_INTERVAL, "router_solicitation_interval" },
484 { NET_IPV6_RTR_SOLICIT_DELAY, "router_solicitation_delay" },
485 { NET_IPV6_USE_TEMPADDR, "use_tempaddr" },
486 { NET_IPV6_TEMP_VALID_LFT, "temp_valid_lft" },
487 { NET_IPV6_TEMP_PREFERED_LFT, "temp_prefered_lft" },
488 { NET_IPV6_REGEN_MAX_RETRY, "regen_max_retry" },
489 { NET_IPV6_MAX_DESYNC_FACTOR, "max_desync_factor" },
490 { NET_IPV6_MAX_ADDRESSES, "max_addresses" },
491 { NET_IPV6_FORCE_MLD_VERSION, "force_mld_version" },
492 { NET_IPV6_ACCEPT_RA_DEFRTR, "accept_ra_defrtr" },
493 { NET_IPV6_ACCEPT_RA_PINFO, "accept_ra_pinfo" },
494 { NET_IPV6_ACCEPT_RA_RTR_PREF, "accept_ra_rtr_pref" },
495 { NET_IPV6_RTR_PROBE_INTERVAL, "router_probe_interval" },
496 { NET_IPV6_ACCEPT_RA_RT_INFO_MAX_PLEN, "accept_ra_rt_info_max_plen" },
497 { NET_IPV6_PROXY_NDP, "proxy_ndp" },
498 { NET_IPV6_ACCEPT_SOURCE_ROUTE, "accept_source_route" },
499 {}
500};
501
502static const struct trans_ctl_table trans_net_ipv6_conf_table[] = {
503 { NET_PROTO_CONF_ALL, "all", trans_net_ipv6_conf_var_table },
504 { NET_PROTO_CONF_DEFAULT, "default", trans_net_ipv6_conf_var_table },
505 { 0, NULL, trans_net_ipv6_conf_var_table },
506 {}
507};
508
509static const struct trans_ctl_table trans_net_ipv6_route_table[] = {
510 { NET_IPV6_ROUTE_FLUSH, "flush" },
511 { NET_IPV6_ROUTE_GC_THRESH, "gc_thresh" },
512 { NET_IPV6_ROUTE_MAX_SIZE, "max_size" },
513 { NET_IPV6_ROUTE_GC_MIN_INTERVAL, "gc_min_interval" },
514 { NET_IPV6_ROUTE_GC_TIMEOUT, "gc_timeout" },
515 { NET_IPV6_ROUTE_GC_INTERVAL, "gc_interval" },
516 { NET_IPV6_ROUTE_GC_ELASTICITY, "gc_elasticity" },
517 { NET_IPV6_ROUTE_MTU_EXPIRES, "mtu_expires" },
518 { NET_IPV6_ROUTE_MIN_ADVMSS, "min_adv_mss" },
519 { NET_IPV6_ROUTE_GC_MIN_INTERVAL_MS, "gc_min_interval_ms" },
520 {}
521};
522
523static const struct trans_ctl_table trans_net_ipv6_icmp_table[] = {
524 { NET_IPV6_ICMP_RATELIMIT, "ratelimit" },
525 {}
526};
527
528static const struct trans_ctl_table trans_net_ipv6_table[] = {
529 { NET_IPV6_CONF, "conf", trans_net_ipv6_conf_table },
530 { NET_IPV6_NEIGH, "neigh", trans_net_neigh_table },
531 { NET_IPV6_ROUTE, "route", trans_net_ipv6_route_table },
532 { NET_IPV6_ICMP, "icmp", trans_net_ipv6_icmp_table },
533 { NET_IPV6_BINDV6ONLY, "bindv6only" },
534 { NET_IPV6_IP6FRAG_HIGH_THRESH, "ip6frag_high_thresh" },
535 { NET_IPV6_IP6FRAG_LOW_THRESH, "ip6frag_low_thresh" },
536 { NET_IPV6_IP6FRAG_TIME, "ip6frag_time" },
537 { NET_IPV6_IP6FRAG_SECRET_INTERVAL, "ip6frag_secret_interval" },
538 { NET_IPV6_MLD_MAX_MSF, "mld_max_msf" },
539 { 2088 /* IPQ_QMAX */, "ip6_queue_maxlen" },
540 {}
541};
542
543static const struct trans_ctl_table trans_net_x25_table[] = {
544 { NET_X25_RESTART_REQUEST_TIMEOUT, "restart_request_timeout" },
545 { NET_X25_CALL_REQUEST_TIMEOUT, "call_request_timeout" },
546 { NET_X25_RESET_REQUEST_TIMEOUT, "reset_request_timeout" },
547 { NET_X25_CLEAR_REQUEST_TIMEOUT, "clear_request_timeout" },
548 { NET_X25_ACK_HOLD_BACK_TIMEOUT, "acknowledgement_hold_back_timeout" },
549 { NET_X25_FORWARD, "x25_forward" },
550 {}
551};
552
553static const struct trans_ctl_table trans_net_tr_table[] = {
554 { NET_TR_RIF_TIMEOUT, "rif_timeout" },
555 {}
556};
557
558
559static const struct trans_ctl_table trans_net_decnet_conf_vars[] = {
560 { NET_DECNET_CONF_DEV_FORWARDING, "forwarding" },
561 { NET_DECNET_CONF_DEV_PRIORITY, "priority" },
562 { NET_DECNET_CONF_DEV_T2, "t2" },
563 { NET_DECNET_CONF_DEV_T3, "t3" },
564 {}
565};
566
567static const struct trans_ctl_table trans_net_decnet_conf[] = {
568 { 0, NULL, trans_net_decnet_conf_vars },
569 {}
570};
571
572static const struct trans_ctl_table trans_net_decnet_table[] = {
573 { NET_DECNET_CONF, "conf", trans_net_decnet_conf },
574 { NET_DECNET_NODE_ADDRESS, "node_address" },
575 { NET_DECNET_NODE_NAME, "node_name" },
576 { NET_DECNET_DEFAULT_DEVICE, "default_device" },
577 { NET_DECNET_TIME_WAIT, "time_wait" },
578 { NET_DECNET_DN_COUNT, "dn_count" },
579 { NET_DECNET_DI_COUNT, "di_count" },
580 { NET_DECNET_DR_COUNT, "dr_count" },
581 { NET_DECNET_DST_GC_INTERVAL, "dst_gc_interval" },
582 { NET_DECNET_NO_FC_MAX_CWND, "no_fc_max_cwnd" },
583 { NET_DECNET_MEM, "decnet_mem" },
584 { NET_DECNET_RMEM, "decnet_rmem" },
585 { NET_DECNET_WMEM, "decnet_wmem" },
586 { NET_DECNET_DEBUG_LEVEL, "debug" },
587 {}
588};
589
590static const struct trans_ctl_table trans_net_sctp_table[] = {
591 { NET_SCTP_RTO_INITIAL, "rto_initial" },
592 { NET_SCTP_RTO_MIN, "rto_min" },
593 { NET_SCTP_RTO_MAX, "rto_max" },
594 { NET_SCTP_RTO_ALPHA, "rto_alpha_exp_divisor" },
595 { NET_SCTP_RTO_BETA, "rto_beta_exp_divisor" },
596 { NET_SCTP_VALID_COOKIE_LIFE, "valid_cookie_life" },
597 { NET_SCTP_ASSOCIATION_MAX_RETRANS, "association_max_retrans" },
598 { NET_SCTP_PATH_MAX_RETRANS, "path_max_retrans" },
599 { NET_SCTP_MAX_INIT_RETRANSMITS, "max_init_retransmits" },
600 { NET_SCTP_HB_INTERVAL, "hb_interval" },
601 { NET_SCTP_PRESERVE_ENABLE, "cookie_preserve_enable" },
602 { NET_SCTP_MAX_BURST, "max_burst" },
603 { NET_SCTP_ADDIP_ENABLE, "addip_enable" },
604 { NET_SCTP_PRSCTP_ENABLE, "prsctp_enable" },
605 { NET_SCTP_SNDBUF_POLICY, "sndbuf_policy" },
606 { NET_SCTP_SACK_TIMEOUT, "sack_timeout" },
607 { NET_SCTP_RCVBUF_POLICY, "rcvbuf_policy" },
608 {}
609};
610
611static const struct trans_ctl_table trans_net_llc_llc2_timeout_table[] = {
612 { NET_LLC2_ACK_TIMEOUT, "ack" },
613 { NET_LLC2_P_TIMEOUT, "p" },
614 { NET_LLC2_REJ_TIMEOUT, "rej" },
615 { NET_LLC2_BUSY_TIMEOUT, "busy" },
616 {}
617};
618
619static const struct trans_ctl_table trans_net_llc_station_table[] = {
620 { NET_LLC_STATION_ACK_TIMEOUT, "ack_timeout" },
621 {}
622};
623
624static const struct trans_ctl_table trans_net_llc_llc2_table[] = {
625 { NET_LLC2, "timeout", trans_net_llc_llc2_timeout_table },
626 {}
627};
628
629static const struct trans_ctl_table trans_net_llc_table[] = {
630 { NET_LLC2, "llc2", trans_net_llc_llc2_table },
631 { NET_LLC_STATION, "station", trans_net_llc_station_table },
632 {}
633};
634
635static const struct trans_ctl_table trans_net_netfilter_table[] = {
636 { NET_NF_CONNTRACK_MAX, "nf_conntrack_max" },
637 { NET_NF_CONNTRACK_TCP_TIMEOUT_SYN_SENT, "nf_conntrack_tcp_timeout_syn_sent" },
638 { NET_NF_CONNTRACK_TCP_TIMEOUT_SYN_RECV, "nf_conntrack_tcp_timeout_syn_recv" },
639 { NET_NF_CONNTRACK_TCP_TIMEOUT_ESTABLISHED, "nf_conntrack_tcp_timeout_established" },
640 { NET_NF_CONNTRACK_TCP_TIMEOUT_FIN_WAIT, "nf_conntrack_tcp_timeout_fin_wait" },
641 { NET_NF_CONNTRACK_TCP_TIMEOUT_CLOSE_WAIT, "nf_conntrack_tcp_timeout_close_wait" },
642 { NET_NF_CONNTRACK_TCP_TIMEOUT_LAST_ACK, "nf_conntrack_tcp_timeout_last_ack" },
643 { NET_NF_CONNTRACK_TCP_TIMEOUT_TIME_WAIT, "nf_conntrack_tcp_timeout_time_wait" },
644 { NET_NF_CONNTRACK_TCP_TIMEOUT_CLOSE, "nf_conntrack_tcp_timeout_close" },
645 { NET_NF_CONNTRACK_UDP_TIMEOUT, "nf_conntrack_udp_timeout" },
646 { NET_NF_CONNTRACK_UDP_TIMEOUT_STREAM, "nf_conntrack_udp_timeout_stream" },
647 { NET_NF_CONNTRACK_ICMP_TIMEOUT, "nf_conntrack_icmp_timeout" },
648 { NET_NF_CONNTRACK_GENERIC_TIMEOUT, "nf_conntrack_generic_timeout" },
649 { NET_NF_CONNTRACK_BUCKETS, "nf_conntrack_buckets" },
650 { NET_NF_CONNTRACK_LOG_INVALID, "nf_conntrack_log_invalid" },
651 { NET_NF_CONNTRACK_TCP_TIMEOUT_MAX_RETRANS, "nf_conntrack_tcp_timeout_max_retrans" },
652 { NET_NF_CONNTRACK_TCP_LOOSE, "nf_conntrack_tcp_loose" },
653 { NET_NF_CONNTRACK_TCP_BE_LIBERAL, "nf_conntrack_tcp_be_liberal" },
654 { NET_NF_CONNTRACK_TCP_MAX_RETRANS, "nf_conntrack_tcp_max_retrans" },
655 { NET_NF_CONNTRACK_SCTP_TIMEOUT_CLOSED, "nf_conntrack_sctp_timeout_closed" },
656 { NET_NF_CONNTRACK_SCTP_TIMEOUT_COOKIE_WAIT, "nf_conntrack_sctp_timeout_cookie_wait" },
657 { NET_NF_CONNTRACK_SCTP_TIMEOUT_COOKIE_ECHOED, "nf_conntrack_sctp_timeout_cookie_echoed" },
658 { NET_NF_CONNTRACK_SCTP_TIMEOUT_ESTABLISHED, "nf_conntrack_sctp_timeout_established" },
659 { NET_NF_CONNTRACK_SCTP_TIMEOUT_SHUTDOWN_SENT, "nf_conntrack_sctp_timeout_shutdown_sent" },
660 { NET_NF_CONNTRACK_SCTP_TIMEOUT_SHUTDOWN_RECD, "nf_conntrack_sctp_timeout_shutdown_recd" },
661 { NET_NF_CONNTRACK_SCTP_TIMEOUT_SHUTDOWN_ACK_SENT, "nf_conntrack_sctp_timeout_shutdown_ack_sent" },
662 { NET_NF_CONNTRACK_COUNT, "nf_conntrack_count" },
663 { NET_NF_CONNTRACK_ICMPV6_TIMEOUT, "nf_conntrack_icmpv6_timeout" },
664 { NET_NF_CONNTRACK_FRAG6_TIMEOUT, "nf_conntrack_frag6_timeout" },
665 { NET_NF_CONNTRACK_FRAG6_LOW_THRESH, "nf_conntrack_frag6_low_thresh" },
666 { NET_NF_CONNTRACK_FRAG6_HIGH_THRESH, "nf_conntrack_frag6_high_thresh" },
667 { NET_NF_CONNTRACK_CHECKSUM, "nf_conntrack_checksum" },
668
669 {}
670};
671
672static const struct trans_ctl_table trans_net_dccp_table[] = {
673 { NET_DCCP_DEFAULT, "default" },
674 {}
675};
676
677static const struct trans_ctl_table trans_net_irda_table[] = {
678 { NET_IRDA_DISCOVERY, "discovery" },
679 { NET_IRDA_DEVNAME, "devname" },
680 { NET_IRDA_DEBUG, "debug" },
681 { NET_IRDA_FAST_POLL, "fast_poll_increase" },
682 { NET_IRDA_DISCOVERY_SLOTS, "discovery_slots" },
683 { NET_IRDA_DISCOVERY_TIMEOUT, "discovery_timeout" },
684 { NET_IRDA_SLOT_TIMEOUT, "slot_timeout" },
685 { NET_IRDA_MAX_BAUD_RATE, "max_baud_rate" },
686 { NET_IRDA_MIN_TX_TURN_TIME, "min_tx_turn_time" },
687 { NET_IRDA_MAX_TX_DATA_SIZE, "max_tx_data_size" },
688 { NET_IRDA_MAX_TX_WINDOW, "max_tx_window" },
689 { NET_IRDA_MAX_NOREPLY_TIME, "max_noreply_time" },
690 { NET_IRDA_WARN_NOREPLY_TIME, "warn_noreply_time" },
691 { NET_IRDA_LAP_KEEPALIVE_TIME, "lap_keepalive_time" },
692 {}
693};
694
695static const struct trans_ctl_table trans_net_table[] = {
696 { NET_CORE, "core", trans_net_core_table },
697 /* NET_ETHER not used */
698 /* NET_802 not used */
699 { NET_UNIX, "unix", trans_net_unix_table },
700 { NET_IPV4, "ipv4", trans_net_ipv4_table },
701 { NET_IPX, "ipx", trans_net_ipx_table },
702 { NET_ATALK, "appletalk", trans_net_atalk_table },
703 { NET_NETROM, "netrom", trans_net_netrom_table },
704 { NET_AX25, "ax25", trans_net_ax25_table },
705 { NET_BRIDGE, "bridge", trans_net_bridge_table },
706 { NET_ROSE, "rose", trans_net_rose_table },
707 { NET_IPV6, "ipv6", trans_net_ipv6_table },
708 { NET_X25, "x25", trans_net_x25_table },
709 { NET_TR, "token-ring", trans_net_tr_table },
710 { NET_DECNET, "decnet", trans_net_decnet_table },
711 /* NET_ECONET not used */
712 { NET_SCTP, "sctp", trans_net_sctp_table },
713 { NET_LLC, "llc", trans_net_llc_table },
714 { NET_NETFILTER, "netfilter", trans_net_netfilter_table },
715 { NET_DCCP, "dccp", trans_net_dccp_table },
716 { NET_IRDA, "irda", trans_net_irda_table },
717 { 2089, "nf_conntrack_max" },
718 {}
719};
720
721static const struct trans_ctl_table trans_fs_quota_table[] = {
722 { FS_DQ_LOOKUPS, "lookups" },
723 { FS_DQ_DROPS, "drops" },
724 { FS_DQ_READS, "reads" },
725 { FS_DQ_WRITES, "writes" },
726 { FS_DQ_CACHE_HITS, "cache_hits" },
727 { FS_DQ_ALLOCATED, "allocated_dquots" },
728 { FS_DQ_FREE, "free_dquots" },
729 { FS_DQ_SYNCS, "syncs" },
730 { FS_DQ_WARNINGS, "warnings" },
731 {}
732};
733
734static const struct trans_ctl_table trans_fs_xfs_table[] = {
735 { XFS_SGID_INHERIT, "irix_sgid_inherit" },
736 { XFS_SYMLINK_MODE, "irix_symlink_mode" },
737 { XFS_PANIC_MASK, "panic_mask" },
738
739 { XFS_ERRLEVEL, "error_level" },
740 { XFS_SYNCD_TIMER, "xfssyncd_centisecs" },
741 { XFS_INHERIT_SYNC, "inherit_sync" },
742 { XFS_INHERIT_NODUMP, "inherit_nodump" },
743 { XFS_INHERIT_NOATIME, "inherit_noatime" },
744 { XFS_BUF_TIMER, "xfsbufd_centisecs" },
745 { XFS_BUF_AGE, "age_buffer_centisecs" },
746 { XFS_INHERIT_NOSYM, "inherit_nosymlinks" },
747 { XFS_ROTORSTEP, "rotorstep" },
748 { XFS_INHERIT_NODFRG, "inherit_nodefrag" },
749 { XFS_FILESTREAM_TIMER, "filestream_centisecs" },
750 { XFS_STATS_CLEAR, "stats_clear" },
751 {}
752};
753
754static const struct trans_ctl_table trans_fs_ocfs2_nm_table[] = {
755 { 1, "hb_ctl_path" },
756 {}
757};
758
759static const struct trans_ctl_table trans_fs_ocfs2_table[] = {
760 { 1, "nm", trans_fs_ocfs2_nm_table },
761 {}
762};
763
764static const struct trans_ctl_table trans_inotify_table[] = {
765 { INOTIFY_MAX_USER_INSTANCES, "max_user_instances" },
766 { INOTIFY_MAX_USER_WATCHES, "max_user_watches" },
767 { INOTIFY_MAX_QUEUED_EVENTS, "max_queued_events" },
768 {}
769};
770
771static const struct trans_ctl_table trans_fs_table[] = {
772 { FS_NRINODE, "inode-nr" },
773 { FS_STATINODE, "inode-state" },
774 /* FS_MAXINODE unused */
775 /* FS_NRDQUOT unused */
776 /* FS_MAXDQUOT unused */
777 { FS_NRFILE, "file-nr" },
778 { FS_MAXFILE, "file-max" },
779 { FS_DENTRY, "dentry-state" },
780 /* FS_NRSUPER unused */
781 /* FS_MAXUPSER unused */
782 { FS_OVERFLOWUID, "overflowuid" },
783 { FS_OVERFLOWGID, "overflowgid" },
784 { FS_LEASES, "leases-enable" },
785 { FS_DIR_NOTIFY, "dir-notify-enable" },
786 { FS_LEASE_TIME, "lease-break-time" },
787 { FS_DQSTATS, "quota", trans_fs_quota_table },
788 { FS_XFS, "xfs", trans_fs_xfs_table },
789 { FS_AIO_NR, "aio-nr" },
790 { FS_AIO_MAX_NR, "aio-max-nr" },
791 { FS_INOTIFY, "inotify", trans_inotify_table },
792 { FS_OCFS2, "ocfs2", trans_fs_ocfs2_table },
793 { KERN_SETUID_DUMPABLE, "suid_dumpable" },
794 {}
795};
796
797static const struct trans_ctl_table trans_debug_table[] = {
798 {}
799};
800
801static const struct trans_ctl_table trans_cdrom_table[] = {
802 { DEV_CDROM_INFO, "info" },
803 { DEV_CDROM_AUTOCLOSE, "autoclose" },
804 { DEV_CDROM_AUTOEJECT, "autoeject" },
805 { DEV_CDROM_DEBUG, "debug" },
806 { DEV_CDROM_LOCK, "lock" },
807 { DEV_CDROM_CHECK_MEDIA, "check_media" },
808 {}
809};
810
811static const struct trans_ctl_table trans_ipmi_table[] = {
812 { DEV_IPMI_POWEROFF_POWERCYCLE, "poweroff_powercycle" },
813 {}
814};
815
816static const struct trans_ctl_table trans_mac_hid_files[] = {
817 /* DEV_MAC_HID_KEYBOARD_SENDS_LINUX_KEYCODES unused */
818 /* DEV_MAC_HID_KEYBOARD_LOCK_KEYCODES unused */
819 { DEV_MAC_HID_MOUSE_BUTTON_EMULATION, "mouse_button_emulation" },
820 { DEV_MAC_HID_MOUSE_BUTTON2_KEYCODE, "mouse_button2_keycode" },
821 { DEV_MAC_HID_MOUSE_BUTTON3_KEYCODE, "mouse_button3_keycode" },
822 /* DEV_MAC_HID_ADB_MOUSE_SENDS_KEYCODES unused */
823 {}
824};
825
826static const struct trans_ctl_table trans_raid_table[] = {
827 { DEV_RAID_SPEED_LIMIT_MIN, "speed_limit_min" },
828 { DEV_RAID_SPEED_LIMIT_MAX, "speed_limit_max" },
829 {}
830};
831
832static const struct trans_ctl_table trans_scsi_table[] = {
833 { DEV_SCSI_LOGGING_LEVEL, "logging_level" },
834 {}
835};
836
837static const struct trans_ctl_table trans_parport_default_table[] = {
838 { DEV_PARPORT_DEFAULT_TIMESLICE, "timeslice" },
839 { DEV_PARPORT_DEFAULT_SPINTIME, "spintime" },
840 {}
841};
842
843static const struct trans_ctl_table trans_parport_device_table[] = {
844 { DEV_PARPORT_DEVICE_TIMESLICE, "timeslice" },
845 {}
846};
847
848static const struct trans_ctl_table trans_parport_devices_table[] = {
849 { DEV_PARPORT_DEVICES_ACTIVE, "active" },
850 { 0, NULL, trans_parport_device_table },
851 {}
852};
853
854static const struct trans_ctl_table trans_parport_parport_table[] = {
855 { DEV_PARPORT_SPINTIME, "spintime" },
856 { DEV_PARPORT_BASE_ADDR, "base-addr" },
857 { DEV_PARPORT_IRQ, "irq" },
858 { DEV_PARPORT_DMA, "dma" },
859 { DEV_PARPORT_MODES, "modes" },
860 { DEV_PARPORT_DEVICES, "devices", trans_parport_devices_table },
861 { DEV_PARPORT_AUTOPROBE, "autoprobe" },
862 { DEV_PARPORT_AUTOPROBE + 1, "autoprobe0" },
863 { DEV_PARPORT_AUTOPROBE + 2, "autoprobe1" },
864 { DEV_PARPORT_AUTOPROBE + 3, "autoprobe2" },
865 { DEV_PARPORT_AUTOPROBE + 4, "autoprobe3" },
866 {}
867};
868static const struct trans_ctl_table trans_parport_table[] = {
869 { DEV_PARPORT_DEFAULT, "default", trans_parport_default_table },
870 { 0, NULL, trans_parport_parport_table },
871 {}
872};
873
874static const struct trans_ctl_table trans_dev_table[] = {
875 { DEV_CDROM, "cdrom", trans_cdrom_table },
876 /* DEV_HWMON unused */
877 { DEV_PARPORT, "parport", trans_parport_table },
878 { DEV_RAID, "raid", trans_raid_table },
879 { DEV_MAC_HID, "mac_hid", trans_mac_hid_files },
880 { DEV_SCSI, "scsi", trans_scsi_table },
881 { DEV_IPMI, "ipmi", trans_ipmi_table },
882 {}
883};
884
885static const struct trans_ctl_table trans_bus_isa_table[] = {
886 { BUS_ISA_MEM_BASE, "membase" },
887 { BUS_ISA_PORT_BASE, "portbase" },
888 { BUS_ISA_PORT_SHIFT, "portshift" },
889 {}
890};
891
892static const struct trans_ctl_table trans_bus_table[] = {
893 { CTL_BUS_ISA, "isa", trans_bus_isa_table },
894 {}
895};
896
897static const struct trans_ctl_table trans_arlan_conf_table0[] = {
898 { 1, "spreadingCode" },
899 { 2, "channelNumber" },
900 { 3, "scramblingDisable" },
901 { 4, "txAttenuation" },
902 { 5, "systemId" },
903 { 6, "maxDatagramSize" },
904 { 7, "maxFrameSize" },
905 { 8, "maxRetries" },
906 { 9, "receiveMode" },
907 { 10, "priority" },
908 { 11, "rootOrRepeater" },
909 { 12, "SID" },
910 { 13, "registrationMode" },
911 { 14, "registrationFill" },
912 { 15, "localTalkAddress" },
913 { 16, "codeFormat" },
914 { 17, "numChannels" },
915 { 18, "channel1" },
916 { 19, "channel2" },
917 { 20, "channel3" },
918 { 21, "channel4" },
919 { 22, "txClear" },
920 { 23, "txRetries" },
921 { 24, "txRouting" },
922 { 25, "txScrambled" },
923 { 26, "rxParameter" },
924 { 27, "txTimeoutMs" },
925 { 28, "waitCardTimeout" },
926 { 29, "channelSet" },
927 { 30, "name" },
928 { 31, "waitTime" },
929 { 32, "lParameter" },
930 { 33, "_15" },
931 { 34, "headerSize" },
932 { 36, "tx_delay_ms" },
933 { 37, "retries" },
934 { 38, "ReTransmitPacketMaxSize" },
935 { 39, "waitReTransmitPacketMaxSize" },
936 { 40, "fastReTransCount" },
937 { 41, "driverRetransmissions" },
938 { 42, "txAckTimeoutMs" },
939 { 43, "registrationInterrupts" },
940 { 44, "hardwareType" },
941 { 45, "radioType" },
942 { 46, "writeEEPROM" },
943 { 47, "writeRadioType" },
944 { 48, "entry_exit_debug" },
945 { 49, "debug" },
946 { 50, "in_speed" },
947 { 51, "out_speed" },
948 { 52, "in_speed10" },
949 { 53, "out_speed10" },
950 { 54, "in_speed_max" },
951 { 55, "out_speed_max" },
952 { 56, "measure_rate" },
953 { 57, "pre_Command_Wait" },
954 { 58, "rx_tweak1" },
955 { 59, "rx_tweak2" },
956 { 60, "tx_queue_len" },
957
958 { 150, "arlan0-txRing" },
959 { 151, "arlan0-rxRing" },
960 { 152, "arlan0-18" },
961 { 153, "arlan0-ring" },
962 { 154, "arlan0-shm-cpy" },
963 { 155, "config0" },
964 { 156, "reset0" },
965 {}
966};
967
968static const struct trans_ctl_table trans_arlan_conf_table1[] = {
969 { 1, "spreadingCode" },
970 { 2, "channelNumber" },
971 { 3, "scramblingDisable" },
972 { 4, "txAttenuation" },
973 { 5, "systemId" },
974 { 6, "maxDatagramSize" },
975 { 7, "maxFrameSize" },
976 { 8, "maxRetries" },
977 { 9, "receiveMode" },
978 { 10, "priority" },
979 { 11, "rootOrRepeater" },
980 { 12, "SID" },
981 { 13, "registrationMode" },
982 { 14, "registrationFill" },
983 { 15, "localTalkAddress" },
984 { 16, "codeFormat" },
985 { 17, "numChannels" },
986 { 18, "channel1" },
987 { 19, "channel2" },
988 { 20, "channel3" },
989 { 21, "channel4" },
990 { 22, "txClear" },
991 { 23, "txRetries" },
992 { 24, "txRouting" },
993 { 25, "txScrambled" },
994 { 26, "rxParameter" },
995 { 27, "txTimeoutMs" },
996 { 28, "waitCardTimeout" },
997 { 29, "channelSet" },
998 { 30, "name" },
999 { 31, "waitTime" },
1000 { 32, "lParameter" },
1001 { 33, "_15" },
1002 { 34, "headerSize" },
1003 { 36, "tx_delay_ms" },
1004 { 37, "retries" },
1005 { 38, "ReTransmitPacketMaxSize" },
1006 { 39, "waitReTransmitPacketMaxSize" },
1007 { 40, "fastReTransCount" },
1008 { 41, "driverRetransmissions" },
1009 { 42, "txAckTimeoutMs" },
1010 { 43, "registrationInterrupts" },
1011 { 44, "hardwareType" },
1012 { 45, "radioType" },
1013 { 46, "writeEEPROM" },
1014 { 47, "writeRadioType" },
1015 { 48, "entry_exit_debug" },
1016 { 49, "debug" },
1017 { 50, "in_speed" },
1018 { 51, "out_speed" },
1019 { 52, "in_speed10" },
1020 { 53, "out_speed10" },
1021 { 54, "in_speed_max" },
1022 { 55, "out_speed_max" },
1023 { 56, "measure_rate" },
1024 { 57, "pre_Command_Wait" },
1025 { 58, "rx_tweak1" },
1026 { 59, "rx_tweak2" },
1027 { 60, "tx_queue_len" },
1028
1029 { 150, "arlan1-txRing" },
1030 { 151, "arlan1-rxRing" },
1031 { 152, "arlan1-18" },
1032 { 153, "arlan1-ring" },
1033 { 154, "arlan1-shm-cpy" },
1034 { 155, "config1" },
1035 { 156, "reset1" },
1036 {}
1037};
1038
1039static const struct trans_ctl_table trans_arlan_conf_table2[] = {
1040 { 1, "spreadingCode" },
1041 { 2, "channelNumber" },
1042 { 3, "scramblingDisable" },
1043 { 4, "txAttenuation" },
1044 { 5, "systemId" },
1045 { 6, "maxDatagramSize" },
1046 { 7, "maxFrameSize" },
1047 { 8, "maxRetries" },
1048 { 9, "receiveMode" },
1049 { 10, "priority" },
1050 { 11, "rootOrRepeater" },
1051 { 12, "SID" },
1052 { 13, "registrationMode" },
1053 { 14, "registrationFill" },
1054 { 15, "localTalkAddress" },
1055 { 16, "codeFormat" },
1056 { 17, "numChannels" },
1057 { 18, "channel1" },
1058 { 19, "channel2" },
1059 { 20, "channel3" },
1060 { 21, "channel4" },
1061 { 22, "txClear" },
1062 { 23, "txRetries" },
1063 { 24, "txRouting" },
1064 { 25, "txScrambled" },
1065 { 26, "rxParameter" },
1066 { 27, "txTimeoutMs" },
1067 { 28, "waitCardTimeout" },
1068 { 29, "channelSet" },
1069 { 30, "name" },
1070 { 31, "waitTime" },
1071 { 32, "lParameter" },
1072 { 33, "_15" },
1073 { 34, "headerSize" },
1074 { 36, "tx_delay_ms" },
1075 { 37, "retries" },
1076 { 38, "ReTransmitPacketMaxSize" },
1077 { 39, "waitReTransmitPacketMaxSize" },
1078 { 40, "fastReTransCount" },
1079 { 41, "driverRetransmissions" },
1080 { 42, "txAckTimeoutMs" },
1081 { 43, "registrationInterrupts" },
1082 { 44, "hardwareType" },
1083 { 45, "radioType" },
1084 { 46, "writeEEPROM" },
1085 { 47, "writeRadioType" },
1086 { 48, "entry_exit_debug" },
1087 { 49, "debug" },
1088 { 50, "in_speed" },
1089 { 51, "out_speed" },
1090 { 52, "in_speed10" },
1091 { 53, "out_speed10" },
1092 { 54, "in_speed_max" },
1093 { 55, "out_speed_max" },
1094 { 56, "measure_rate" },
1095 { 57, "pre_Command_Wait" },
1096 { 58, "rx_tweak1" },
1097 { 59, "rx_tweak2" },
1098 { 60, "tx_queue_len" },
1099
1100 { 150, "arlan2-txRing" },
1101 { 151, "arlan2-rxRing" },
1102 { 152, "arlan2-18" },
1103 { 153, "arlan2-ring" },
1104 { 154, "arlan2-shm-cpy" },
1105 { 155, "config2" },
1106 { 156, "reset2" },
1107 {}
1108};
1109
1110static const struct trans_ctl_table trans_arlan_conf_table3[] = {
1111 { 1, "spreadingCode" },
1112 { 2, "channelNumber" },
1113 { 3, "scramblingDisable" },
1114 { 4, "txAttenuation" },
1115 { 5, "systemId" },
1116 { 6, "maxDatagramSize" },
1117 { 7, "maxFrameSize" },
1118 { 8, "maxRetries" },
1119 { 9, "receiveMode" },
1120 { 10, "priority" },
1121 { 11, "rootOrRepeater" },
1122 { 12, "SID" },
1123 { 13, "registrationMode" },
1124 { 14, "registrationFill" },
1125 { 15, "localTalkAddress" },
1126 { 16, "codeFormat" },
1127 { 17, "numChannels" },
1128 { 18, "channel1" },
1129 { 19, "channel2" },
1130 { 20, "channel3" },
1131 { 21, "channel4" },
1132 { 22, "txClear" },
1133 { 23, "txRetries" },
1134 { 24, "txRouting" },
1135 { 25, "txScrambled" },
1136 { 26, "rxParameter" },
1137 { 27, "txTimeoutMs" },
1138 { 28, "waitCardTimeout" },
1139 { 29, "channelSet" },
1140 { 30, "name" },
1141 { 31, "waitTime" },
1142 { 32, "lParameter" },
1143 { 33, "_15" },
1144 { 34, "headerSize" },
1145 { 36, "tx_delay_ms" },
1146 { 37, "retries" },
1147 { 38, "ReTransmitPacketMaxSize" },
1148 { 39, "waitReTransmitPacketMaxSize" },
1149 { 40, "fastReTransCount" },
1150 { 41, "driverRetransmissions" },
1151 { 42, "txAckTimeoutMs" },
1152 { 43, "registrationInterrupts" },
1153 { 44, "hardwareType" },
1154 { 45, "radioType" },
1155 { 46, "writeEEPROM" },
1156 { 47, "writeRadioType" },
1157 { 48, "entry_exit_debug" },
1158 { 49, "debug" },
1159 { 50, "in_speed" },
1160 { 51, "out_speed" },
1161 { 52, "in_speed10" },
1162 { 53, "out_speed10" },
1163 { 54, "in_speed_max" },
1164 { 55, "out_speed_max" },
1165 { 56, "measure_rate" },
1166 { 57, "pre_Command_Wait" },
1167 { 58, "rx_tweak1" },
1168 { 59, "rx_tweak2" },
1169 { 60, "tx_queue_len" },
1170
1171 { 150, "arlan3-txRing" },
1172 { 151, "arlan3-rxRing" },
1173 { 152, "arlan3-18" },
1174 { 153, "arlan3-ring" },
1175 { 154, "arlan3-shm-cpy" },
1176 { 155, "config3" },
1177 { 156, "reset3" },
1178 {}
1179};
1180
1181static const struct trans_ctl_table trans_arlan_table[] = {
1182 { 1, "arlan0", trans_arlan_conf_table0 },
1183 { 2, "arlan1", trans_arlan_conf_table1 },
1184 { 3, "arlan2", trans_arlan_conf_table2 },
1185 { 4, "arlan3", trans_arlan_conf_table3 },
1186 {}
1187};
1188
1189static const struct trans_ctl_table trans_s390dbf_table[] = {
1190 { 5678 /* CTL_S390DBF_STOPPABLE */, "debug_stoppable" },
1191 { 5679 /* CTL_S390DBF_ACTIVE */, "debug_active" },
1192 {}
1193};
1194
1195static const struct trans_ctl_table trans_sunrpc_table[] = {
1196 { CTL_RPCDEBUG, "rpc_debug" },
1197 { CTL_NFSDEBUG, "nfs_debug" },
1198 { CTL_NFSDDEBUG, "nfsd_debug" },
1199 { CTL_NLMDEBUG, "nlm_debug" },
1200 { CTL_SLOTTABLE_UDP, "udp_slot_table_entries" },
1201 { CTL_SLOTTABLE_TCP, "tcp_slot_table_entries" },
1202 { CTL_MIN_RESVPORT, "min_resvport" },
1203 { CTL_MAX_RESVPORT, "max_resvport" },
1204 {}
1205};
1206
1207static const struct trans_ctl_table trans_pm_table[] = {
1208 { 1 /* CTL_PM_SUSPEND */, "suspend" },
1209 { 2 /* CTL_PM_CMODE */, "cmode" },
1210 { 3 /* CTL_PM_P0 */, "p0" },
1211 { 4 /* CTL_PM_CM */, "cm" },
1212 {}
1213};
1214
1215static const struct trans_ctl_table trans_frv_table[] = {
1216 { 1, "cache-mode" },
1217 { 2, "pin-cxnr" },
1218 {}
1219};
1220
1221static const struct trans_ctl_table trans_root_table[] = {
1222 { CTL_KERN, "kernel", trans_kern_table },
1223 { CTL_VM, "vm", trans_vm_table },
1224 { CTL_NET, "net", trans_net_table },
1225 /* CTL_PROC not used */
1226 { CTL_FS, "fs", trans_fs_table },
1227 { CTL_DEBUG, "debug", trans_debug_table },
1228 { CTL_DEV, "dev", trans_dev_table },
1229 { CTL_BUS, "bus", trans_bus_table },
1230 { CTL_ABI, "abi" },
1231 /* CTL_CPU not used */
1232 { CTL_ARLAN, "arlan", trans_arlan_table },
1233 { CTL_S390DBF, "s390dbf", trans_s390dbf_table },
1234 { CTL_SUNRPC, "sunrpc", trans_sunrpc_table },
1235 { CTL_PM, "pm", trans_pm_table },
1236 { CTL_FRV, "frv", trans_frv_table },
1237 {}
1238};
1239
1240
1241
1242 8
1243static int sysctl_depth(struct ctl_table *table) 9static int sysctl_depth(struct ctl_table *table)
1244{ 10{
@@ -1262,47 +28,6 @@ static struct ctl_table *sysctl_parent(struct ctl_table *table, int n)
1262 return table; 28 return table;
1263} 29}
1264 30
1265static const struct trans_ctl_table *sysctl_binary_lookup(struct ctl_table *table)
1266{
1267 struct ctl_table *test;
1268 const struct trans_ctl_table *ref;
1269 int cur_depth;
1270
1271 cur_depth = sysctl_depth(table);
1272
1273 ref = trans_root_table;
1274repeat:
1275 test = sysctl_parent(table, cur_depth);
1276 for (; ref->ctl_name || ref->procname || ref->child; ref++) {
1277 int match = 0;
1278
1279 if (cur_depth && !ref->child)
1280 continue;
1281
1282 if (test->procname && ref->procname &&
1283 (strcmp(test->procname, ref->procname) == 0))
1284 match++;
1285
1286 if (test->ctl_name && ref->ctl_name &&
1287 (test->ctl_name == ref->ctl_name))
1288 match++;
1289
1290 if (!ref->ctl_name && !ref->procname)
1291 match++;
1292
1293 if (match) {
1294 if (cur_depth != 0) {
1295 cur_depth--;
1296 ref = ref->child;
1297 goto repeat;
1298 }
1299 goto out;
1300 }
1301 }
1302 ref = NULL;
1303out:
1304 return ref;
1305}
1306 31
1307static void sysctl_print_path(struct ctl_table *table) 32static void sysctl_print_path(struct ctl_table *table)
1308{ 33{
@@ -1316,26 +41,6 @@ static void sysctl_print_path(struct ctl_table *table)
1316 } 41 }
1317 } 42 }
1318 printk(" "); 43 printk(" ");
1319 if (table->ctl_name) {
1320 for (i = depth; i >= 0; i--) {
1321 tmp = sysctl_parent(table, i);
1322 printk(".%d", tmp->ctl_name);
1323 }
1324 }
1325}
1326
1327static void sysctl_repair_table(struct ctl_table *table)
1328{
1329 /* Don't complain about the classic default
1330 * sysctl strategy routine. Maybe later we
1331 * can get the tables fixed and complain about
1332 * this.
1333 */
1334 if (table->ctl_name && table->procname &&
1335 (table->proc_handler == proc_dointvec) &&
1336 (!table->strategy)) {
1337 table->strategy = sysctl_data;
1338 }
1339} 44}
1340 45
1341static struct ctl_table *sysctl_check_lookup(struct nsproxy *namespaces, 46static struct ctl_table *sysctl_check_lookup(struct nsproxy *namespaces,
@@ -1353,7 +58,7 @@ static struct ctl_table *sysctl_check_lookup(struct nsproxy *namespaces,
1353 ref = head->ctl_table; 58 ref = head->ctl_table;
1354repeat: 59repeat:
1355 test = sysctl_parent(table, cur_depth); 60 test = sysctl_parent(table, cur_depth);
1356 for (; ref->ctl_name || ref->procname; ref++) { 61 for (; ref->procname; ref++) {
1357 int match = 0; 62 int match = 0;
1358 if (cur_depth && !ref->child) 63 if (cur_depth && !ref->child)
1359 continue; 64 continue;
@@ -1362,10 +67,6 @@ repeat:
1362 (strcmp(test->procname, ref->procname) == 0)) 67 (strcmp(test->procname, ref->procname) == 0))
1363 match++; 68 match++;
1364 69
1365 if (test->ctl_name && ref->ctl_name &&
1366 (test->ctl_name == ref->ctl_name))
1367 match++;
1368
1369 if (match) { 70 if (match) {
1370 if (cur_depth != 0) { 71 if (cur_depth != 0) {
1371 cur_depth--; 72 cur_depth--;
@@ -1393,38 +94,6 @@ static void set_fail(const char **fail, struct ctl_table *table, const char *str
1393 *fail = str; 94 *fail = str;
1394} 95}
1395 96
1396static int sysctl_check_dir(struct nsproxy *namespaces,
1397 struct ctl_table *table)
1398{
1399 struct ctl_table *ref;
1400 int error;
1401
1402 error = 0;
1403 ref = sysctl_check_lookup(namespaces, table);
1404 if (ref) {
1405 int match = 0;
1406 if ((!table->procname && !ref->procname) ||
1407 (table->procname && ref->procname &&
1408 (strcmp(table->procname, ref->procname) == 0)))
1409 match++;
1410
1411 if ((!table->ctl_name && !ref->ctl_name) ||
1412 (table->ctl_name && ref->ctl_name &&
1413 (table->ctl_name == ref->ctl_name)))
1414 match++;
1415
1416 if (match != 2) {
1417 printk(KERN_ERR "%s: failed: ", __func__);
1418 sysctl_print_path(table);
1419 printk(" ref: ");
1420 sysctl_print_path(ref);
1421 printk("\n");
1422 error = -EINVAL;
1423 }
1424 }
1425 return error;
1426}
1427
1428static void sysctl_check_leaf(struct nsproxy *namespaces, 97static void sysctl_check_leaf(struct nsproxy *namespaces,
1429 struct ctl_table *table, const char **fail) 98 struct ctl_table *table, const char **fail)
1430{ 99{
@@ -1435,37 +104,15 @@ static void sysctl_check_leaf(struct nsproxy *namespaces,
1435 set_fail(fail, table, "Sysctl already exists"); 104 set_fail(fail, table, "Sysctl already exists");
1436} 105}
1437 106
1438static void sysctl_check_bin_path(struct ctl_table *table, const char **fail)
1439{
1440 const struct trans_ctl_table *ref;
1441
1442 ref = sysctl_binary_lookup(table);
1443 if (table->ctl_name && !ref)
1444 set_fail(fail, table, "Unknown sysctl binary path");
1445 if (ref) {
1446 if (ref->procname &&
1447 (!table->procname ||
1448 (strcmp(table->procname, ref->procname) != 0)))
1449 set_fail(fail, table, "procname does not match binary path procname");
1450
1451 if (ref->ctl_name && table->ctl_name &&
1452 (table->ctl_name != ref->ctl_name))
1453 set_fail(fail, table, "ctl_name does not match binary path ctl_name");
1454 }
1455}
1456
1457int sysctl_check_table(struct nsproxy *namespaces, struct ctl_table *table) 107int sysctl_check_table(struct nsproxy *namespaces, struct ctl_table *table)
1458{ 108{
1459 int error = 0; 109 int error = 0;
1460 for (; table->ctl_name || table->procname; table++) { 110 for (; table->procname; table++) {
1461 const char *fail = NULL; 111 const char *fail = NULL;
1462 112
1463 sysctl_repair_table(table);
1464 if (table->parent) { 113 if (table->parent) {
1465 if (table->procname && !table->parent->procname) 114 if (table->procname && !table->parent->procname)
1466 set_fail(&fail, table, "Parent without procname"); 115 set_fail(&fail, table, "Parent without procname");
1467 if (table->ctl_name && !table->parent->ctl_name)
1468 set_fail(&fail, table, "Parent without ctl_name");
1469 } 116 }
1470 if (!table->procname) 117 if (!table->procname)
1471 set_fail(&fail, table, "No procname"); 118 set_fail(&fail, table, "No procname");
@@ -1478,21 +125,12 @@ int sysctl_check_table(struct nsproxy *namespaces, struct ctl_table *table)
1478 set_fail(&fail, table, "Writable sysctl directory"); 125 set_fail(&fail, table, "Writable sysctl directory");
1479 if (table->proc_handler) 126 if (table->proc_handler)
1480 set_fail(&fail, table, "Directory with proc_handler"); 127 set_fail(&fail, table, "Directory with proc_handler");
1481 if (table->strategy)
1482 set_fail(&fail, table, "Directory with strategy");
1483 if (table->extra1) 128 if (table->extra1)
1484 set_fail(&fail, table, "Directory with extra1"); 129 set_fail(&fail, table, "Directory with extra1");
1485 if (table->extra2) 130 if (table->extra2)
1486 set_fail(&fail, table, "Directory with extra2"); 131 set_fail(&fail, table, "Directory with extra2");
1487 if (sysctl_check_dir(namespaces, table))
1488 set_fail(&fail, table, "Inconsistent directory names");
1489 } else { 132 } else {
1490 if ((table->strategy == sysctl_data) || 133 if ((table->proc_handler == proc_dostring) ||
1491 (table->strategy == sysctl_string) ||
1492 (table->strategy == sysctl_intvec) ||
1493 (table->strategy == sysctl_jiffies) ||
1494 (table->strategy == sysctl_ms_jiffies) ||
1495 (table->proc_handler == proc_dostring) ||
1496 (table->proc_handler == proc_dointvec) || 134 (table->proc_handler == proc_dointvec) ||
1497 (table->proc_handler == proc_dointvec_minmax) || 135 (table->proc_handler == proc_dointvec_minmax) ||
1498 (table->proc_handler == proc_dointvec_jiffies) || 136 (table->proc_handler == proc_dointvec_jiffies) ||
@@ -1514,14 +152,6 @@ int sysctl_check_table(struct nsproxy *namespaces, struct ctl_table *table)
1514 set_fail(&fail, table, "No max"); 152 set_fail(&fail, table, "No max");
1515 } 153 }
1516 } 154 }
1517#ifdef CONFIG_SYSCTL_SYSCALL
1518 if (table->ctl_name && !table->strategy)
1519 set_fail(&fail, table, "Missing strategy");
1520#endif
1521#if 0
1522 if (!table->ctl_name && table->strategy)
1523 set_fail(&fail, table, "Strategy without ctl_name");
1524#endif
1525#ifdef CONFIG_PROC_SYSCTL 155#ifdef CONFIG_PROC_SYSCTL
1526 if (table->procname && !table->proc_handler) 156 if (table->procname && !table->proc_handler)
1527 set_fail(&fail, table, "No proc_handler"); 157 set_fail(&fail, table, "No proc_handler");
@@ -1532,7 +162,6 @@ int sysctl_check_table(struct nsproxy *namespaces, struct ctl_table *table)
1532#endif 162#endif
1533 sysctl_check_leaf(namespaces, table, &fail); 163 sysctl_check_leaf(namespaces, table, &fail);
1534 } 164 }
1535 sysctl_check_bin_path(table, &fail);
1536 if (table->mode > 0777) 165 if (table->mode > 0777)
1537 set_fail(&fail, table, "bogus .mode"); 166 set_fail(&fail, table, "bogus .mode");
1538 if (fail) { 167 if (fail) {
diff --git a/kernel/time.c b/kernel/time.c
index 2e2e469a7fec..804798005d19 100644
--- a/kernel/time.c
+++ b/kernel/time.c
@@ -662,6 +662,36 @@ u64 nsec_to_clock_t(u64 x)
662#endif 662#endif
663} 663}
664 664
665/**
666 * nsecs_to_jiffies - Convert nsecs in u64 to jiffies
667 *
668 * @n: nsecs in u64
669 *
670 * Unlike {m,u}secs_to_jiffies, type of input is not unsigned int but u64.
671 * And this doesn't return MAX_JIFFY_OFFSET since this function is designed
672 * for scheduler, not for use in device drivers to calculate timeout value.
673 *
674 * note:
675 * NSEC_PER_SEC = 10^9 = (5^9 * 2^9) = (1953125 * 512)
676 * ULLONG_MAX ns = 18446744073.709551615 secs = about 584 years
677 */
678unsigned long nsecs_to_jiffies(u64 n)
679{
680#if (NSEC_PER_SEC % HZ) == 0
681 /* Common case, HZ = 100, 128, 200, 250, 256, 500, 512, 1000 etc. */
682 return div_u64(n, NSEC_PER_SEC / HZ);
683#elif (HZ % 512) == 0
684 /* overflow after 292 years if HZ = 1024 */
685 return div_u64(n * HZ / 512, NSEC_PER_SEC / 512);
686#else
687 /*
688 * Generic case - optimized for cases where HZ is a multiple of 3.
689 * overflow after 64.99 years, exact for HZ = 60, 72, 90, 120 etc.
690 */
691 return div_u64(n * 9, (9ull * NSEC_PER_SEC + HZ / 2) / HZ);
692#endif
693}
694
665#if (BITS_PER_LONG < 64) 695#if (BITS_PER_LONG < 64)
666u64 get_jiffies_64(void) 696u64 get_jiffies_64(void)
667{ 697{
diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig
index b416512ad17f..d006554888dc 100644
--- a/kernel/trace/Kconfig
+++ b/kernel/trace/Kconfig
@@ -339,6 +339,27 @@ config POWER_TRACER
339 power management decisions, specifically the C-state and P-state 339 power management decisions, specifically the C-state and P-state
340 behavior. 340 behavior.
341 341
342config KSYM_TRACER
343 bool "Trace read and write access on kernel memory locations"
344 depends on HAVE_HW_BREAKPOINT
345 select TRACING
346 help
347 This tracer helps find read and write operations on any given kernel
348 symbol i.e. /proc/kallsyms.
349
350config PROFILE_KSYM_TRACER
351 bool "Profile all kernel memory accesses on 'watched' variables"
352 depends on KSYM_TRACER
353 help
354 This tracer profiles kernel accesses on variables watched through the
355 ksym tracer ftrace plugin. Depending upon the hardware, all read
356 and write operations on kernel variables can be monitored for
357 accesses.
358
359 The results will be displayed in:
360 /debugfs/tracing/profile_ksym
361
362 Say N if unsure.
342 363
343config STACK_TRACER 364config STACK_TRACER
344 bool "Trace max stack" 365 bool "Trace max stack"
@@ -428,6 +449,23 @@ config BLK_DEV_IO_TRACE
428 449
429 If unsure, say N. 450 If unsure, say N.
430 451
452config KPROBE_EVENT
453 depends on KPROBES
454 depends on X86
455 bool "Enable kprobes-based dynamic events"
456 select TRACING
457 default y
458 help
459 This allows the user to add tracing events (similar to tracepoints) on the fly
460 via the ftrace interface. See Documentation/trace/kprobetrace.txt
461 for more details.
462
463 Those events can be inserted wherever kprobes can probe, and record
464 various register and memory values.
465
466 This option is also required by perf-probe subcommand of perf tools. If
467 you want to use perf tools, this option is strongly recommended.
468
431config DYNAMIC_FTRACE 469config DYNAMIC_FTRACE
432 bool "enable/disable ftrace tracepoints dynamically" 470 bool "enable/disable ftrace tracepoints dynamically"
433 depends on FUNCTION_TRACER 471 depends on FUNCTION_TRACER
diff --git a/kernel/trace/Makefile b/kernel/trace/Makefile
index 26f03ac07c2b..cd9ecd89ec77 100644
--- a/kernel/trace/Makefile
+++ b/kernel/trace/Makefile
@@ -53,6 +53,8 @@ obj-$(CONFIG_EVENT_TRACING) += trace_export.o
53obj-$(CONFIG_FTRACE_SYSCALLS) += trace_syscalls.o 53obj-$(CONFIG_FTRACE_SYSCALLS) += trace_syscalls.o
54obj-$(CONFIG_EVENT_PROFILE) += trace_event_profile.o 54obj-$(CONFIG_EVENT_PROFILE) += trace_event_profile.o
55obj-$(CONFIG_EVENT_TRACING) += trace_events_filter.o 55obj-$(CONFIG_EVENT_TRACING) += trace_events_filter.o
56obj-$(CONFIG_KPROBE_EVENT) += trace_kprobe.o
57obj-$(CONFIG_KSYM_TRACER) += trace_ksym.o
56obj-$(CONFIG_EVENT_TRACING) += power-traces.o 58obj-$(CONFIG_EVENT_TRACING) += power-traces.o
57 59
58libftrace-y := ftrace.o 60libftrace-y := ftrace.o
diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c
index a72c6e03deec..a1ca4956ab5e 100644
--- a/kernel/trace/ring_buffer.c
+++ b/kernel/trace/ring_buffer.c
@@ -397,18 +397,21 @@ int ring_buffer_print_page_header(struct trace_seq *s)
397 int ret; 397 int ret;
398 398
399 ret = trace_seq_printf(s, "\tfield: u64 timestamp;\t" 399 ret = trace_seq_printf(s, "\tfield: u64 timestamp;\t"
400 "offset:0;\tsize:%u;\n", 400 "offset:0;\tsize:%u;\tsigned:%u;\n",
401 (unsigned int)sizeof(field.time_stamp)); 401 (unsigned int)sizeof(field.time_stamp),
402 (unsigned int)is_signed_type(u64));
402 403
403 ret = trace_seq_printf(s, "\tfield: local_t commit;\t" 404 ret = trace_seq_printf(s, "\tfield: local_t commit;\t"
404 "offset:%u;\tsize:%u;\n", 405 "offset:%u;\tsize:%u;\tsigned:%u;\n",
405 (unsigned int)offsetof(typeof(field), commit), 406 (unsigned int)offsetof(typeof(field), commit),
406 (unsigned int)sizeof(field.commit)); 407 (unsigned int)sizeof(field.commit),
408 (unsigned int)is_signed_type(long));
407 409
408 ret = trace_seq_printf(s, "\tfield: char data;\t" 410 ret = trace_seq_printf(s, "\tfield: char data;\t"
409 "offset:%u;\tsize:%u;\n", 411 "offset:%u;\tsize:%u;\tsigned:%u;\n",
410 (unsigned int)offsetof(typeof(field), data), 412 (unsigned int)offsetof(typeof(field), data),
411 (unsigned int)BUF_PAGE_SIZE); 413 (unsigned int)BUF_PAGE_SIZE,
414 (unsigned int)is_signed_type(char));
412 415
413 return ret; 416 return ret;
414} 417}
diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
index acef8b4636f0..1d7f4830a80d 100644
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -11,6 +11,7 @@
11#include <linux/ftrace.h> 11#include <linux/ftrace.h>
12#include <trace/boot.h> 12#include <trace/boot.h>
13#include <linux/kmemtrace.h> 13#include <linux/kmemtrace.h>
14#include <linux/hw_breakpoint.h>
14 15
15#include <linux/trace_seq.h> 16#include <linux/trace_seq.h>
16#include <linux/ftrace_event.h> 17#include <linux/ftrace_event.h>
@@ -37,6 +38,7 @@ enum trace_type {
37 TRACE_KMEM_ALLOC, 38 TRACE_KMEM_ALLOC,
38 TRACE_KMEM_FREE, 39 TRACE_KMEM_FREE,
39 TRACE_BLK, 40 TRACE_BLK,
41 TRACE_KSYM,
40 42
41 __TRACE_LAST_TYPE, 43 __TRACE_LAST_TYPE,
42}; 44};
@@ -98,9 +100,32 @@ struct syscall_trace_enter {
98struct syscall_trace_exit { 100struct syscall_trace_exit {
99 struct trace_entry ent; 101 struct trace_entry ent;
100 int nr; 102 int nr;
101 unsigned long ret; 103 long ret;
102}; 104};
103 105
106struct kprobe_trace_entry {
107 struct trace_entry ent;
108 unsigned long ip;
109 int nargs;
110 unsigned long args[];
111};
112
113#define SIZEOF_KPROBE_TRACE_ENTRY(n) \
114 (offsetof(struct kprobe_trace_entry, args) + \
115 (sizeof(unsigned long) * (n)))
116
117struct kretprobe_trace_entry {
118 struct trace_entry ent;
119 unsigned long func;
120 unsigned long ret_ip;
121 int nargs;
122 unsigned long args[];
123};
124
125#define SIZEOF_KRETPROBE_TRACE_ENTRY(n) \
126 (offsetof(struct kretprobe_trace_entry, args) + \
127 (sizeof(unsigned long) * (n)))
128
104/* 129/*
105 * trace_flag_type is an enumeration that holds different 130 * trace_flag_type is an enumeration that holds different
106 * states when a trace occurs. These are: 131 * states when a trace occurs. These are:
@@ -209,6 +234,7 @@ extern void __ftrace_bad_type(void);
209 TRACE_KMEM_ALLOC); \ 234 TRACE_KMEM_ALLOC); \
210 IF_ASSIGN(var, ent, struct kmemtrace_free_entry, \ 235 IF_ASSIGN(var, ent, struct kmemtrace_free_entry, \
211 TRACE_KMEM_FREE); \ 236 TRACE_KMEM_FREE); \
237 IF_ASSIGN(var, ent, struct ksym_trace_entry, TRACE_KSYM);\
212 __ftrace_bad_type(); \ 238 __ftrace_bad_type(); \
213 } while (0) 239 } while (0)
214 240
@@ -364,6 +390,8 @@ int register_tracer(struct tracer *type);
364void unregister_tracer(struct tracer *type); 390void unregister_tracer(struct tracer *type);
365int is_tracing_stopped(void); 391int is_tracing_stopped(void);
366 392
393extern int process_new_ksym_entry(char *ksymname, int op, unsigned long addr);
394
367extern unsigned long nsecs_to_usecs(unsigned long nsecs); 395extern unsigned long nsecs_to_usecs(unsigned long nsecs);
368 396
369#ifdef CONFIG_TRACER_MAX_TRACE 397#ifdef CONFIG_TRACER_MAX_TRACE
@@ -438,6 +466,8 @@ extern int trace_selftest_startup_branch(struct tracer *trace,
438 struct trace_array *tr); 466 struct trace_array *tr);
439extern int trace_selftest_startup_hw_branches(struct tracer *trace, 467extern int trace_selftest_startup_hw_branches(struct tracer *trace,
440 struct trace_array *tr); 468 struct trace_array *tr);
469extern int trace_selftest_startup_ksym(struct tracer *trace,
470 struct trace_array *tr);
441#endif /* CONFIG_FTRACE_STARTUP_TEST */ 471#endif /* CONFIG_FTRACE_STARTUP_TEST */
442 472
443extern void *head_page(struct trace_array_cpu *data); 473extern void *head_page(struct trace_array_cpu *data);
@@ -683,7 +713,6 @@ struct event_filter {
683 int n_preds; 713 int n_preds;
684 struct filter_pred **preds; 714 struct filter_pred **preds;
685 char *filter_string; 715 char *filter_string;
686 bool no_reset;
687}; 716};
688 717
689struct event_subsystem { 718struct event_subsystem {
@@ -703,7 +732,7 @@ typedef int (*filter_pred_fn_t) (struct filter_pred *pred, void *event,
703typedef int (*regex_match_func)(char *str, struct regex *r, int len); 732typedef int (*regex_match_func)(char *str, struct regex *r, int len);
704 733
705enum regex_type { 734enum regex_type {
706 MATCH_FULL, 735 MATCH_FULL = 0,
707 MATCH_FRONT_ONLY, 736 MATCH_FRONT_ONLY,
708 MATCH_MIDDLE_ONLY, 737 MATCH_MIDDLE_ONLY,
709 MATCH_END_ONLY, 738 MATCH_END_ONLY,
@@ -744,7 +773,8 @@ filter_check_discard(struct ftrace_event_call *call, void *rec,
744 struct ring_buffer *buffer, 773 struct ring_buffer *buffer,
745 struct ring_buffer_event *event) 774 struct ring_buffer_event *event)
746{ 775{
747 if (unlikely(call->filter_active) && !filter_match_preds(call, rec)) { 776 if (unlikely(call->filter_active) &&
777 !filter_match_preds(call->filter, rec)) {
748 ring_buffer_discard_commit(buffer, event); 778 ring_buffer_discard_commit(buffer, event);
749 return 1; 779 return 1;
750 } 780 }
diff --git a/kernel/trace/trace_entries.h b/kernel/trace/trace_entries.h
index ead3d724599d..c16a08f399df 100644
--- a/kernel/trace/trace_entries.h
+++ b/kernel/trace/trace_entries.h
@@ -364,3 +364,19 @@ FTRACE_ENTRY(kmem_free, kmemtrace_free_entry,
364 F_printk("type:%u call_site:%lx ptr:%p", 364 F_printk("type:%u call_site:%lx ptr:%p",
365 __entry->type_id, __entry->call_site, __entry->ptr) 365 __entry->type_id, __entry->call_site, __entry->ptr)
366); 366);
367
368FTRACE_ENTRY(ksym_trace, ksym_trace_entry,
369
370 TRACE_KSYM,
371
372 F_STRUCT(
373 __field( unsigned long, ip )
374 __field( unsigned char, type )
375 __array( char , cmd, TASK_COMM_LEN )
376 __field( unsigned long, addr )
377 ),
378
379 F_printk("ip: %pF type: %d ksym_name: %pS cmd: %s",
380 (void *)__entry->ip, (unsigned int)__entry->type,
381 (void *)__entry->addr, __entry->cmd)
382);
diff --git a/kernel/trace/trace_event_profile.c b/kernel/trace/trace_event_profile.c
index 8d5c171cc998..d9c60f80aa0d 100644
--- a/kernel/trace/trace_event_profile.c
+++ b/kernel/trace/trace_event_profile.c
@@ -8,17 +8,14 @@
8#include <linux/module.h> 8#include <linux/module.h>
9#include "trace.h" 9#include "trace.h"
10 10
11/*
12 * We can't use a size but a type in alloc_percpu()
13 * So let's create a dummy type that matches the desired size
14 */
15typedef struct {char buf[FTRACE_MAX_PROFILE_SIZE];} profile_buf_t;
16 11
17char *trace_profile_buf; 12char *perf_trace_buf;
18EXPORT_SYMBOL_GPL(trace_profile_buf); 13EXPORT_SYMBOL_GPL(perf_trace_buf);
14
15char *perf_trace_buf_nmi;
16EXPORT_SYMBOL_GPL(perf_trace_buf_nmi);
19 17
20char *trace_profile_buf_nmi; 18typedef typeof(char [FTRACE_MAX_PROFILE_SIZE]) perf_trace_t ;
21EXPORT_SYMBOL_GPL(trace_profile_buf_nmi);
22 19
23/* Count the events in use (per event id, not per instance) */ 20/* Count the events in use (per event id, not per instance) */
24static int total_profile_count; 21static int total_profile_count;
@@ -32,20 +29,20 @@ static int ftrace_profile_enable_event(struct ftrace_event_call *event)
32 return 0; 29 return 0;
33 30
34 if (!total_profile_count) { 31 if (!total_profile_count) {
35 buf = (char *)alloc_percpu(profile_buf_t); 32 buf = (char *)alloc_percpu(perf_trace_t);
36 if (!buf) 33 if (!buf)
37 goto fail_buf; 34 goto fail_buf;
38 35
39 rcu_assign_pointer(trace_profile_buf, buf); 36 rcu_assign_pointer(perf_trace_buf, buf);
40 37
41 buf = (char *)alloc_percpu(profile_buf_t); 38 buf = (char *)alloc_percpu(perf_trace_t);
42 if (!buf) 39 if (!buf)
43 goto fail_buf_nmi; 40 goto fail_buf_nmi;
44 41
45 rcu_assign_pointer(trace_profile_buf_nmi, buf); 42 rcu_assign_pointer(perf_trace_buf_nmi, buf);
46 } 43 }
47 44
48 ret = event->profile_enable(); 45 ret = event->profile_enable(event);
49 if (!ret) { 46 if (!ret) {
50 total_profile_count++; 47 total_profile_count++;
51 return 0; 48 return 0;
@@ -53,10 +50,10 @@ static int ftrace_profile_enable_event(struct ftrace_event_call *event)
53 50
54fail_buf_nmi: 51fail_buf_nmi:
55 if (!total_profile_count) { 52 if (!total_profile_count) {
56 free_percpu(trace_profile_buf_nmi); 53 free_percpu(perf_trace_buf_nmi);
57 free_percpu(trace_profile_buf); 54 free_percpu(perf_trace_buf);
58 trace_profile_buf_nmi = NULL; 55 perf_trace_buf_nmi = NULL;
59 trace_profile_buf = NULL; 56 perf_trace_buf = NULL;
60 } 57 }
61fail_buf: 58fail_buf:
62 atomic_dec(&event->profile_count); 59 atomic_dec(&event->profile_count);
@@ -89,14 +86,14 @@ static void ftrace_profile_disable_event(struct ftrace_event_call *event)
89 if (!atomic_add_negative(-1, &event->profile_count)) 86 if (!atomic_add_negative(-1, &event->profile_count))
90 return; 87 return;
91 88
92 event->profile_disable(); 89 event->profile_disable(event);
93 90
94 if (!--total_profile_count) { 91 if (!--total_profile_count) {
95 buf = trace_profile_buf; 92 buf = perf_trace_buf;
96 rcu_assign_pointer(trace_profile_buf, NULL); 93 rcu_assign_pointer(perf_trace_buf, NULL);
97 94
98 nmi_buf = trace_profile_buf_nmi; 95 nmi_buf = perf_trace_buf_nmi;
99 rcu_assign_pointer(trace_profile_buf_nmi, NULL); 96 rcu_assign_pointer(perf_trace_buf_nmi, NULL);
100 97
101 /* 98 /*
102 * Ensure every events in profiling have finished before 99 * Ensure every events in profiling have finished before
diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c
index 5e9ffc33f6db..1d18315dc836 100644
--- a/kernel/trace/trace_events.c
+++ b/kernel/trace/trace_events.c
@@ -93,9 +93,7 @@ int trace_define_common_fields(struct ftrace_event_call *call)
93} 93}
94EXPORT_SYMBOL_GPL(trace_define_common_fields); 94EXPORT_SYMBOL_GPL(trace_define_common_fields);
95 95
96#ifdef CONFIG_MODULES 96void trace_destroy_fields(struct ftrace_event_call *call)
97
98static void trace_destroy_fields(struct ftrace_event_call *call)
99{ 97{
100 struct ftrace_event_field *field, *next; 98 struct ftrace_event_field *field, *next;
101 99
@@ -107,8 +105,6 @@ static void trace_destroy_fields(struct ftrace_event_call *call)
107 } 105 }
108} 106}
109 107
110#endif /* CONFIG_MODULES */
111
112static void ftrace_event_enable_disable(struct ftrace_event_call *call, 108static void ftrace_event_enable_disable(struct ftrace_event_call *call,
113 int enable) 109 int enable)
114{ 110{
@@ -117,14 +113,14 @@ static void ftrace_event_enable_disable(struct ftrace_event_call *call,
117 if (call->enabled) { 113 if (call->enabled) {
118 call->enabled = 0; 114 call->enabled = 0;
119 tracing_stop_cmdline_record(); 115 tracing_stop_cmdline_record();
120 call->unregfunc(call->data); 116 call->unregfunc(call);
121 } 117 }
122 break; 118 break;
123 case 1: 119 case 1:
124 if (!call->enabled) { 120 if (!call->enabled) {
125 call->enabled = 1; 121 call->enabled = 1;
126 tracing_start_cmdline_record(); 122 tracing_start_cmdline_record();
127 call->regfunc(call->data); 123 call->regfunc(call);
128 } 124 }
129 break; 125 break;
130 } 126 }
@@ -507,7 +503,7 @@ extern char *__bad_type_size(void);
507#define FIELD(type, name) \ 503#define FIELD(type, name) \
508 sizeof(type) != sizeof(field.name) ? __bad_type_size() : \ 504 sizeof(type) != sizeof(field.name) ? __bad_type_size() : \
509 #type, "common_" #name, offsetof(typeof(field), name), \ 505 #type, "common_" #name, offsetof(typeof(field), name), \
510 sizeof(field.name) 506 sizeof(field.name), is_signed_type(type)
511 507
512static int trace_write_header(struct trace_seq *s) 508static int trace_write_header(struct trace_seq *s)
513{ 509{
@@ -515,17 +511,17 @@ static int trace_write_header(struct trace_seq *s)
515 511
516 /* struct trace_entry */ 512 /* struct trace_entry */
517 return trace_seq_printf(s, 513 return trace_seq_printf(s,
518 "\tfield:%s %s;\toffset:%zu;\tsize:%zu;\n" 514 "\tfield:%s %s;\toffset:%zu;\tsize:%zu;\tsigned:%u;\n"
519 "\tfield:%s %s;\toffset:%zu;\tsize:%zu;\n" 515 "\tfield:%s %s;\toffset:%zu;\tsize:%zu;\tsigned:%u;\n"
520 "\tfield:%s %s;\toffset:%zu;\tsize:%zu;\n" 516 "\tfield:%s %s;\toffset:%zu;\tsize:%zu;\tsigned:%u;\n"
521 "\tfield:%s %s;\toffset:%zu;\tsize:%zu;\n" 517 "\tfield:%s %s;\toffset:%zu;\tsize:%zu;\tsigned:%u;\n"
522 "\tfield:%s %s;\toffset:%zu;\tsize:%zu;\n" 518 "\tfield:%s %s;\toffset:%zu;\tsize:%zu;\tsigned:%u;\n"
523 "\n", 519 "\n",
524 FIELD(unsigned short, type), 520 FIELD(unsigned short, type),
525 FIELD(unsigned char, flags), 521 FIELD(unsigned char, flags),
526 FIELD(unsigned char, preempt_count), 522 FIELD(unsigned char, preempt_count),
527 FIELD(int, pid), 523 FIELD(int, pid),
528 FIELD(int, lock_depth)); 524 FIELD(int, lock_depth));
529} 525}
530 526
531static ssize_t 527static ssize_t
@@ -937,27 +933,46 @@ event_create_dir(struct ftrace_event_call *call, struct dentry *d_events,
937 return 0; 933 return 0;
938} 934}
939 935
940#define for_each_event(event, start, end) \ 936static int __trace_add_event_call(struct ftrace_event_call *call)
941 for (event = start; \ 937{
942 (unsigned long)event < (unsigned long)end; \ 938 struct dentry *d_events;
943 event++) 939 int ret;
944 940
945#ifdef CONFIG_MODULES 941 if (!call->name)
942 return -EINVAL;
946 943
947static LIST_HEAD(ftrace_module_file_list); 944 if (call->raw_init) {
945 ret = call->raw_init(call);
946 if (ret < 0) {
947 if (ret != -ENOSYS)
948 pr_warning("Could not initialize trace "
949 "events/%s\n", call->name);
950 return ret;
951 }
952 }
948 953
949/* 954 d_events = event_trace_events_dir();
950 * Modules must own their file_operations to keep up with 955 if (!d_events)
951 * reference counting. 956 return -ENOENT;
952 */ 957
953struct ftrace_module_file_ops { 958 ret = event_create_dir(call, d_events, &ftrace_event_id_fops,
954 struct list_head list; 959 &ftrace_enable_fops, &ftrace_event_filter_fops,
955 struct module *mod; 960 &ftrace_event_format_fops);
956 struct file_operations id; 961 if (!ret)
957 struct file_operations enable; 962 list_add(&call->list, &ftrace_events);
958 struct file_operations format; 963
959 struct file_operations filter; 964 return ret;
960}; 965}
966
967/* Add an additional event_call dynamically */
968int trace_add_event_call(struct ftrace_event_call *call)
969{
970 int ret;
971 mutex_lock(&event_mutex);
972 ret = __trace_add_event_call(call);
973 mutex_unlock(&event_mutex);
974 return ret;
975}
961 976
962static void remove_subsystem_dir(const char *name) 977static void remove_subsystem_dir(const char *name)
963{ 978{
@@ -985,6 +1000,53 @@ static void remove_subsystem_dir(const char *name)
985 } 1000 }
986} 1001}
987 1002
1003/*
1004 * Must be called under locking both of event_mutex and trace_event_mutex.
1005 */
1006static void __trace_remove_event_call(struct ftrace_event_call *call)
1007{
1008 ftrace_event_enable_disable(call, 0);
1009 if (call->event)
1010 __unregister_ftrace_event(call->event);
1011 debugfs_remove_recursive(call->dir);
1012 list_del(&call->list);
1013 trace_destroy_fields(call);
1014 destroy_preds(call);
1015 remove_subsystem_dir(call->system);
1016}
1017
1018/* Remove an event_call */
1019void trace_remove_event_call(struct ftrace_event_call *call)
1020{
1021 mutex_lock(&event_mutex);
1022 down_write(&trace_event_mutex);
1023 __trace_remove_event_call(call);
1024 up_write(&trace_event_mutex);
1025 mutex_unlock(&event_mutex);
1026}
1027
1028#define for_each_event(event, start, end) \
1029 for (event = start; \
1030 (unsigned long)event < (unsigned long)end; \
1031 event++)
1032
1033#ifdef CONFIG_MODULES
1034
1035static LIST_HEAD(ftrace_module_file_list);
1036
1037/*
1038 * Modules must own their file_operations to keep up with
1039 * reference counting.
1040 */
1041struct ftrace_module_file_ops {
1042 struct list_head list;
1043 struct module *mod;
1044 struct file_operations id;
1045 struct file_operations enable;
1046 struct file_operations format;
1047 struct file_operations filter;
1048};
1049
988static struct ftrace_module_file_ops * 1050static struct ftrace_module_file_ops *
989trace_create_file_ops(struct module *mod) 1051trace_create_file_ops(struct module *mod)
990{ 1052{
@@ -1042,7 +1104,7 @@ static void trace_module_add_events(struct module *mod)
1042 if (!call->name) 1104 if (!call->name)
1043 continue; 1105 continue;
1044 if (call->raw_init) { 1106 if (call->raw_init) {
1045 ret = call->raw_init(); 1107 ret = call->raw_init(call);
1046 if (ret < 0) { 1108 if (ret < 0) {
1047 if (ret != -ENOSYS) 1109 if (ret != -ENOSYS)
1048 pr_warning("Could not initialize trace " 1110 pr_warning("Could not initialize trace "
@@ -1060,10 +1122,11 @@ static void trace_module_add_events(struct module *mod)
1060 return; 1122 return;
1061 } 1123 }
1062 call->mod = mod; 1124 call->mod = mod;
1063 list_add(&call->list, &ftrace_events); 1125 ret = event_create_dir(call, d_events,
1064 event_create_dir(call, d_events, 1126 &file_ops->id, &file_ops->enable,
1065 &file_ops->id, &file_ops->enable, 1127 &file_ops->filter, &file_ops->format);
1066 &file_ops->filter, &file_ops->format); 1128 if (!ret)
1129 list_add(&call->list, &ftrace_events);
1067 } 1130 }
1068} 1131}
1069 1132
@@ -1077,14 +1140,7 @@ static void trace_module_remove_events(struct module *mod)
1077 list_for_each_entry_safe(call, p, &ftrace_events, list) { 1140 list_for_each_entry_safe(call, p, &ftrace_events, list) {
1078 if (call->mod == mod) { 1141 if (call->mod == mod) {
1079 found = true; 1142 found = true;
1080 ftrace_event_enable_disable(call, 0); 1143 __trace_remove_event_call(call);
1081 if (call->event)
1082 __unregister_ftrace_event(call->event);
1083 debugfs_remove_recursive(call->dir);
1084 list_del(&call->list);
1085 trace_destroy_fields(call);
1086 destroy_preds(call);
1087 remove_subsystem_dir(call->system);
1088 } 1144 }
1089 } 1145 }
1090 1146
@@ -1202,7 +1258,7 @@ static __init int event_trace_init(void)
1202 if (!call->name) 1258 if (!call->name)
1203 continue; 1259 continue;
1204 if (call->raw_init) { 1260 if (call->raw_init) {
1205 ret = call->raw_init(); 1261 ret = call->raw_init(call);
1206 if (ret < 0) { 1262 if (ret < 0) {
1207 if (ret != -ENOSYS) 1263 if (ret != -ENOSYS)
1208 pr_warning("Could not initialize trace " 1264 pr_warning("Could not initialize trace "
@@ -1210,10 +1266,12 @@ static __init int event_trace_init(void)
1210 continue; 1266 continue;
1211 } 1267 }
1212 } 1268 }
1213 list_add(&call->list, &ftrace_events); 1269 ret = event_create_dir(call, d_events, &ftrace_event_id_fops,
1214 event_create_dir(call, d_events, &ftrace_event_id_fops, 1270 &ftrace_enable_fops,
1215 &ftrace_enable_fops, &ftrace_event_filter_fops, 1271 &ftrace_event_filter_fops,
1216 &ftrace_event_format_fops); 1272 &ftrace_event_format_fops);
1273 if (!ret)
1274 list_add(&call->list, &ftrace_events);
1217 } 1275 }
1218 1276
1219 while (true) { 1277 while (true) {
diff --git a/kernel/trace/trace_events_filter.c b/kernel/trace/trace_events_filter.c
index 92672016da28..50504cb228de 100644
--- a/kernel/trace/trace_events_filter.c
+++ b/kernel/trace/trace_events_filter.c
@@ -21,6 +21,7 @@
21#include <linux/module.h> 21#include <linux/module.h>
22#include <linux/ctype.h> 22#include <linux/ctype.h>
23#include <linux/mutex.h> 23#include <linux/mutex.h>
24#include <linux/perf_event.h>
24 25
25#include "trace.h" 26#include "trace.h"
26#include "trace_output.h" 27#include "trace_output.h"
@@ -29,6 +30,7 @@ enum filter_op_ids
29{ 30{
30 OP_OR, 31 OP_OR,
31 OP_AND, 32 OP_AND,
33 OP_GLOB,
32 OP_NE, 34 OP_NE,
33 OP_EQ, 35 OP_EQ,
34 OP_LT, 36 OP_LT,
@@ -46,16 +48,17 @@ struct filter_op {
46}; 48};
47 49
48static struct filter_op filter_ops[] = { 50static struct filter_op filter_ops[] = {
49 { OP_OR, "||", 1 }, 51 { OP_OR, "||", 1 },
50 { OP_AND, "&&", 2 }, 52 { OP_AND, "&&", 2 },
51 { OP_NE, "!=", 4 }, 53 { OP_GLOB, "~", 4 },
52 { OP_EQ, "==", 4 }, 54 { OP_NE, "!=", 4 },
53 { OP_LT, "<", 5 }, 55 { OP_EQ, "==", 4 },
54 { OP_LE, "<=", 5 }, 56 { OP_LT, "<", 5 },
55 { OP_GT, ">", 5 }, 57 { OP_LE, "<=", 5 },
56 { OP_GE, ">=", 5 }, 58 { OP_GT, ">", 5 },
57 { OP_NONE, "OP_NONE", 0 }, 59 { OP_GE, ">=", 5 },
58 { OP_OPEN_PAREN, "(", 0 }, 60 { OP_NONE, "OP_NONE", 0 },
61 { OP_OPEN_PAREN, "(", 0 },
59}; 62};
60 63
61enum { 64enum {
@@ -329,22 +332,18 @@ enum regex_type filter_parse_regex(char *buff, int len, char **search, int *not)
329 return type; 332 return type;
330} 333}
331 334
332static int filter_build_regex(struct filter_pred *pred) 335static void filter_build_regex(struct filter_pred *pred)
333{ 336{
334 struct regex *r = &pred->regex; 337 struct regex *r = &pred->regex;
335 char *search, *dup; 338 char *search;
336 enum regex_type type; 339 enum regex_type type = MATCH_FULL;
337 int not; 340 int not = 0;
338 341
339 type = filter_parse_regex(r->pattern, r->len, &search, &not); 342 if (pred->op == OP_GLOB) {
340 dup = kstrdup(search, GFP_KERNEL); 343 type = filter_parse_regex(r->pattern, r->len, &search, &not);
341 if (!dup) 344 r->len = strlen(search);
342 return -ENOMEM; 345 memmove(r->pattern, search, r->len+1);
343 346 }
344 strcpy(r->pattern, dup);
345 kfree(dup);
346
347 r->len = strlen(r->pattern);
348 347
349 switch (type) { 348 switch (type) {
350 case MATCH_FULL: 349 case MATCH_FULL:
@@ -362,14 +361,11 @@ static int filter_build_regex(struct filter_pred *pred)
362 } 361 }
363 362
364 pred->not ^= not; 363 pred->not ^= not;
365
366 return 0;
367} 364}
368 365
369/* return 1 if event matches, 0 otherwise (discard) */ 366/* return 1 if event matches, 0 otherwise (discard) */
370int filter_match_preds(struct ftrace_event_call *call, void *rec) 367int filter_match_preds(struct event_filter *filter, void *rec)
371{ 368{
372 struct event_filter *filter = call->filter;
373 int match, top = 0, val1 = 0, val2 = 0; 369 int match, top = 0, val1 = 0, val2 = 0;
374 int stack[MAX_FILTER_PRED]; 370 int stack[MAX_FILTER_PRED];
375 struct filter_pred *pred; 371 struct filter_pred *pred;
@@ -542,9 +538,8 @@ static void filter_disable_preds(struct ftrace_event_call *call)
542 filter->preds[i]->fn = filter_pred_none; 538 filter->preds[i]->fn = filter_pred_none;
543} 539}
544 540
545void destroy_preds(struct ftrace_event_call *call) 541static void __free_preds(struct event_filter *filter)
546{ 542{
547 struct event_filter *filter = call->filter;
548 int i; 543 int i;
549 544
550 if (!filter) 545 if (!filter)
@@ -557,21 +552,24 @@ void destroy_preds(struct ftrace_event_call *call)
557 kfree(filter->preds); 552 kfree(filter->preds);
558 kfree(filter->filter_string); 553 kfree(filter->filter_string);
559 kfree(filter); 554 kfree(filter);
555}
556
557void destroy_preds(struct ftrace_event_call *call)
558{
559 __free_preds(call->filter);
560 call->filter = NULL; 560 call->filter = NULL;
561 call->filter_active = 0;
561} 562}
562 563
563static int init_preds(struct ftrace_event_call *call) 564static struct event_filter *__alloc_preds(void)
564{ 565{
565 struct event_filter *filter; 566 struct event_filter *filter;
566 struct filter_pred *pred; 567 struct filter_pred *pred;
567 int i; 568 int i;
568 569
569 if (call->filter) 570 filter = kzalloc(sizeof(*filter), GFP_KERNEL);
570 return 0; 571 if (!filter)
571 572 return ERR_PTR(-ENOMEM);
572 filter = call->filter = kzalloc(sizeof(*filter), GFP_KERNEL);
573 if (!call->filter)
574 return -ENOMEM;
575 573
576 filter->n_preds = 0; 574 filter->n_preds = 0;
577 575
@@ -587,12 +585,24 @@ static int init_preds(struct ftrace_event_call *call)
587 filter->preds[i] = pred; 585 filter->preds[i] = pred;
588 } 586 }
589 587
590 return 0; 588 return filter;
591 589
592oom: 590oom:
593 destroy_preds(call); 591 __free_preds(filter);
592 return ERR_PTR(-ENOMEM);
593}
594
595static int init_preds(struct ftrace_event_call *call)
596{
597 if (call->filter)
598 return 0;
594 599
595 return -ENOMEM; 600 call->filter_active = 0;
601 call->filter = __alloc_preds();
602 if (IS_ERR(call->filter))
603 return PTR_ERR(call->filter);
604
605 return 0;
596} 606}
597 607
598static int init_subsystem_preds(struct event_subsystem *system) 608static int init_subsystem_preds(struct event_subsystem *system)
@@ -615,14 +625,7 @@ static int init_subsystem_preds(struct event_subsystem *system)
615 return 0; 625 return 0;
616} 626}
617 627
618enum { 628static void filter_free_subsystem_preds(struct event_subsystem *system)
619 FILTER_DISABLE_ALL,
620 FILTER_INIT_NO_RESET,
621 FILTER_SKIP_NO_RESET,
622};
623
624static void filter_free_subsystem_preds(struct event_subsystem *system,
625 int flag)
626{ 629{
627 struct ftrace_event_call *call; 630 struct ftrace_event_call *call;
628 631
@@ -633,14 +636,6 @@ static void filter_free_subsystem_preds(struct event_subsystem *system,
633 if (strcmp(call->system, system->name) != 0) 636 if (strcmp(call->system, system->name) != 0)
634 continue; 637 continue;
635 638
636 if (flag == FILTER_INIT_NO_RESET) {
637 call->filter->no_reset = false;
638 continue;
639 }
640
641 if (flag == FILTER_SKIP_NO_RESET && call->filter->no_reset)
642 continue;
643
644 filter_disable_preds(call); 639 filter_disable_preds(call);
645 remove_filter_string(call->filter); 640 remove_filter_string(call->filter);
646 } 641 }
@@ -648,10 +643,10 @@ static void filter_free_subsystem_preds(struct event_subsystem *system,
648 643
649static int filter_add_pred_fn(struct filter_parse_state *ps, 644static int filter_add_pred_fn(struct filter_parse_state *ps,
650 struct ftrace_event_call *call, 645 struct ftrace_event_call *call,
646 struct event_filter *filter,
651 struct filter_pred *pred, 647 struct filter_pred *pred,
652 filter_pred_fn_t fn) 648 filter_pred_fn_t fn)
653{ 649{
654 struct event_filter *filter = call->filter;
655 int idx, err; 650 int idx, err;
656 651
657 if (filter->n_preds == MAX_FILTER_PRED) { 652 if (filter->n_preds == MAX_FILTER_PRED) {
@@ -666,7 +661,6 @@ static int filter_add_pred_fn(struct filter_parse_state *ps,
666 return err; 661 return err;
667 662
668 filter->n_preds++; 663 filter->n_preds++;
669 call->filter_active = 1;
670 664
671 return 0; 665 return 0;
672} 666}
@@ -691,7 +685,10 @@ static bool is_string_field(struct ftrace_event_field *field)
691 685
692static int is_legal_op(struct ftrace_event_field *field, int op) 686static int is_legal_op(struct ftrace_event_field *field, int op)
693{ 687{
694 if (is_string_field(field) && (op != OP_EQ && op != OP_NE)) 688 if (is_string_field(field) &&
689 (op != OP_EQ && op != OP_NE && op != OP_GLOB))
690 return 0;
691 if (!is_string_field(field) && op == OP_GLOB)
695 return 0; 692 return 0;
696 693
697 return 1; 694 return 1;
@@ -742,6 +739,7 @@ static filter_pred_fn_t select_comparison_fn(int op, int field_size,
742 739
743static int filter_add_pred(struct filter_parse_state *ps, 740static int filter_add_pred(struct filter_parse_state *ps,
744 struct ftrace_event_call *call, 741 struct ftrace_event_call *call,
742 struct event_filter *filter,
745 struct filter_pred *pred, 743 struct filter_pred *pred,
746 bool dry_run) 744 bool dry_run)
747{ 745{
@@ -776,15 +774,13 @@ static int filter_add_pred(struct filter_parse_state *ps,
776 } 774 }
777 775
778 if (is_string_field(field)) { 776 if (is_string_field(field)) {
779 ret = filter_build_regex(pred); 777 filter_build_regex(pred);
780 if (ret)
781 return ret;
782 778
783 if (field->filter_type == FILTER_STATIC_STRING) { 779 if (field->filter_type == FILTER_STATIC_STRING) {
784 fn = filter_pred_string; 780 fn = filter_pred_string;
785 pred->regex.field_len = field->size; 781 pred->regex.field_len = field->size;
786 } else if (field->filter_type == FILTER_DYN_STRING) 782 } else if (field->filter_type == FILTER_DYN_STRING)
787 fn = filter_pred_strloc; 783 fn = filter_pred_strloc;
788 else { 784 else {
789 fn = filter_pred_pchar; 785 fn = filter_pred_pchar;
790 pred->regex.field_len = strlen(pred->regex.pattern); 786 pred->regex.field_len = strlen(pred->regex.pattern);
@@ -813,45 +809,7 @@ static int filter_add_pred(struct filter_parse_state *ps,
813 809
814add_pred_fn: 810add_pred_fn:
815 if (!dry_run) 811 if (!dry_run)
816 return filter_add_pred_fn(ps, call, pred, fn); 812 return filter_add_pred_fn(ps, call, filter, pred, fn);
817 return 0;
818}
819
820static int filter_add_subsystem_pred(struct filter_parse_state *ps,
821 struct event_subsystem *system,
822 struct filter_pred *pred,
823 char *filter_string,
824 bool dry_run)
825{
826 struct ftrace_event_call *call;
827 int err = 0;
828 bool fail = true;
829
830 list_for_each_entry(call, &ftrace_events, list) {
831
832 if (!call->define_fields)
833 continue;
834
835 if (strcmp(call->system, system->name))
836 continue;
837
838 if (call->filter->no_reset)
839 continue;
840
841 err = filter_add_pred(ps, call, pred, dry_run);
842 if (err)
843 call->filter->no_reset = true;
844 else
845 fail = false;
846
847 if (!dry_run)
848 replace_filter_string(call->filter, filter_string);
849 }
850
851 if (fail) {
852 parse_error(ps, FILT_ERR_BAD_SUBSYS_FILTER, 0);
853 return err;
854 }
855 return 0; 813 return 0;
856} 814}
857 815
@@ -1209,8 +1167,8 @@ static int check_preds(struct filter_parse_state *ps)
1209 return 0; 1167 return 0;
1210} 1168}
1211 1169
1212static int replace_preds(struct event_subsystem *system, 1170static int replace_preds(struct ftrace_event_call *call,
1213 struct ftrace_event_call *call, 1171 struct event_filter *filter,
1214 struct filter_parse_state *ps, 1172 struct filter_parse_state *ps,
1215 char *filter_string, 1173 char *filter_string,
1216 bool dry_run) 1174 bool dry_run)
@@ -1257,11 +1215,7 @@ static int replace_preds(struct event_subsystem *system,
1257add_pred: 1215add_pred:
1258 if (!pred) 1216 if (!pred)
1259 return -ENOMEM; 1217 return -ENOMEM;
1260 if (call) 1218 err = filter_add_pred(ps, call, filter, pred, dry_run);
1261 err = filter_add_pred(ps, call, pred, false);
1262 else
1263 err = filter_add_subsystem_pred(ps, system, pred,
1264 filter_string, dry_run);
1265 filter_free_pred(pred); 1219 filter_free_pred(pred);
1266 if (err) 1220 if (err)
1267 return err; 1221 return err;
@@ -1272,10 +1226,50 @@ add_pred:
1272 return 0; 1226 return 0;
1273} 1227}
1274 1228
1275int apply_event_filter(struct ftrace_event_call *call, char *filter_string) 1229static int replace_system_preds(struct event_subsystem *system,
1230 struct filter_parse_state *ps,
1231 char *filter_string)
1276{ 1232{
1233 struct ftrace_event_call *call;
1234 bool fail = true;
1277 int err; 1235 int err;
1278 1236
1237 list_for_each_entry(call, &ftrace_events, list) {
1238 struct event_filter *filter = call->filter;
1239
1240 if (!call->define_fields)
1241 continue;
1242
1243 if (strcmp(call->system, system->name) != 0)
1244 continue;
1245
1246 /* try to see if the filter can be applied */
1247 err = replace_preds(call, filter, ps, filter_string, true);
1248 if (err)
1249 continue;
1250
1251 /* really apply the filter */
1252 filter_disable_preds(call);
1253 err = replace_preds(call, filter, ps, filter_string, false);
1254 if (err)
1255 filter_disable_preds(call);
1256 else {
1257 call->filter_active = 1;
1258 replace_filter_string(filter, filter_string);
1259 }
1260 fail = false;
1261 }
1262
1263 if (fail) {
1264 parse_error(ps, FILT_ERR_BAD_SUBSYS_FILTER, 0);
1265 return -EINVAL;
1266 }
1267 return 0;
1268}
1269
1270int apply_event_filter(struct ftrace_event_call *call, char *filter_string)
1271{
1272 int err;
1279 struct filter_parse_state *ps; 1273 struct filter_parse_state *ps;
1280 1274
1281 mutex_lock(&event_mutex); 1275 mutex_lock(&event_mutex);
@@ -1287,8 +1281,7 @@ int apply_event_filter(struct ftrace_event_call *call, char *filter_string)
1287 if (!strcmp(strstrip(filter_string), "0")) { 1281 if (!strcmp(strstrip(filter_string), "0")) {
1288 filter_disable_preds(call); 1282 filter_disable_preds(call);
1289 remove_filter_string(call->filter); 1283 remove_filter_string(call->filter);
1290 mutex_unlock(&event_mutex); 1284 goto out_unlock;
1291 return 0;
1292 } 1285 }
1293 1286
1294 err = -ENOMEM; 1287 err = -ENOMEM;
@@ -1306,10 +1299,11 @@ int apply_event_filter(struct ftrace_event_call *call, char *filter_string)
1306 goto out; 1299 goto out;
1307 } 1300 }
1308 1301
1309 err = replace_preds(NULL, call, ps, filter_string, false); 1302 err = replace_preds(call, call->filter, ps, filter_string, false);
1310 if (err) 1303 if (err)
1311 append_filter_err(ps, call->filter); 1304 append_filter_err(ps, call->filter);
1312 1305 else
1306 call->filter_active = 1;
1313out: 1307out:
1314 filter_opstack_clear(ps); 1308 filter_opstack_clear(ps);
1315 postfix_clear(ps); 1309 postfix_clear(ps);
@@ -1324,7 +1318,6 @@ int apply_subsystem_event_filter(struct event_subsystem *system,
1324 char *filter_string) 1318 char *filter_string)
1325{ 1319{
1326 int err; 1320 int err;
1327
1328 struct filter_parse_state *ps; 1321 struct filter_parse_state *ps;
1329 1322
1330 mutex_lock(&event_mutex); 1323 mutex_lock(&event_mutex);
@@ -1334,10 +1327,9 @@ int apply_subsystem_event_filter(struct event_subsystem *system,
1334 goto out_unlock; 1327 goto out_unlock;
1335 1328
1336 if (!strcmp(strstrip(filter_string), "0")) { 1329 if (!strcmp(strstrip(filter_string), "0")) {
1337 filter_free_subsystem_preds(system, FILTER_DISABLE_ALL); 1330 filter_free_subsystem_preds(system);
1338 remove_filter_string(system->filter); 1331 remove_filter_string(system->filter);
1339 mutex_unlock(&event_mutex); 1332 goto out_unlock;
1340 return 0;
1341 } 1333 }
1342 1334
1343 err = -ENOMEM; 1335 err = -ENOMEM;
@@ -1354,31 +1346,87 @@ int apply_subsystem_event_filter(struct event_subsystem *system,
1354 goto out; 1346 goto out;
1355 } 1347 }
1356 1348
1357 filter_free_subsystem_preds(system, FILTER_INIT_NO_RESET); 1349 err = replace_system_preds(system, ps, filter_string);
1358 1350 if (err)
1359 /* try to see the filter can be applied to which events */
1360 err = replace_preds(system, NULL, ps, filter_string, true);
1361 if (err) {
1362 append_filter_err(ps, system->filter); 1351 append_filter_err(ps, system->filter);
1363 goto out; 1352
1353out:
1354 filter_opstack_clear(ps);
1355 postfix_clear(ps);
1356 kfree(ps);
1357out_unlock:
1358 mutex_unlock(&event_mutex);
1359
1360 return err;
1361}
1362
1363#ifdef CONFIG_EVENT_PROFILE
1364
1365void ftrace_profile_free_filter(struct perf_event *event)
1366{
1367 struct event_filter *filter = event->filter;
1368
1369 event->filter = NULL;
1370 __free_preds(filter);
1371}
1372
1373int ftrace_profile_set_filter(struct perf_event *event, int event_id,
1374 char *filter_str)
1375{
1376 int err;
1377 struct event_filter *filter;
1378 struct filter_parse_state *ps;
1379 struct ftrace_event_call *call = NULL;
1380
1381 mutex_lock(&event_mutex);
1382
1383 list_for_each_entry(call, &ftrace_events, list) {
1384 if (call->id == event_id)
1385 break;
1364 } 1386 }
1365 1387
1366 filter_free_subsystem_preds(system, FILTER_SKIP_NO_RESET); 1388 err = -EINVAL;
1389 if (!call)
1390 goto out_unlock;
1367 1391
1368 /* really apply the filter to the events */ 1392 err = -EEXIST;
1369 err = replace_preds(system, NULL, ps, filter_string, false); 1393 if (event->filter)
1370 if (err) { 1394 goto out_unlock;
1371 append_filter_err(ps, system->filter); 1395
1372 filter_free_subsystem_preds(system, 2); 1396 filter = __alloc_preds();
1397 if (IS_ERR(filter)) {
1398 err = PTR_ERR(filter);
1399 goto out_unlock;
1373 } 1400 }
1374 1401
1375out: 1402 err = -ENOMEM;
1403 ps = kzalloc(sizeof(*ps), GFP_KERNEL);
1404 if (!ps)
1405 goto free_preds;
1406
1407 parse_init(ps, filter_ops, filter_str);
1408 err = filter_parse(ps);
1409 if (err)
1410 goto free_ps;
1411
1412 err = replace_preds(call, filter, ps, filter_str, false);
1413 if (!err)
1414 event->filter = filter;
1415
1416free_ps:
1376 filter_opstack_clear(ps); 1417 filter_opstack_clear(ps);
1377 postfix_clear(ps); 1418 postfix_clear(ps);
1378 kfree(ps); 1419 kfree(ps);
1420
1421free_preds:
1422 if (err)
1423 __free_preds(filter);
1424
1379out_unlock: 1425out_unlock:
1380 mutex_unlock(&event_mutex); 1426 mutex_unlock(&event_mutex);
1381 1427
1382 return err; 1428 return err;
1383} 1429}
1384 1430
1431#endif /* CONFIG_EVENT_PROFILE */
1432
diff --git a/kernel/trace/trace_export.c b/kernel/trace/trace_export.c
index c74848ddb85a..dff8c84ddf17 100644
--- a/kernel/trace/trace_export.c
+++ b/kernel/trace/trace_export.c
@@ -66,44 +66,47 @@ static void __always_unused ____ftrace_check_##name(void) \
66#undef __field 66#undef __field
67#define __field(type, item) \ 67#define __field(type, item) \
68 ret = trace_seq_printf(s, "\tfield:" #type " " #item ";\t" \ 68 ret = trace_seq_printf(s, "\tfield:" #type " " #item ";\t" \
69 "offset:%zu;\tsize:%zu;\n", \ 69 "offset:%zu;\tsize:%zu;\tsigned:%u;\n", \
70 offsetof(typeof(field), item), \ 70 offsetof(typeof(field), item), \
71 sizeof(field.item)); \ 71 sizeof(field.item), is_signed_type(type)); \
72 if (!ret) \ 72 if (!ret) \
73 return 0; 73 return 0;
74 74
75#undef __field_desc 75#undef __field_desc
76#define __field_desc(type, container, item) \ 76#define __field_desc(type, container, item) \
77 ret = trace_seq_printf(s, "\tfield:" #type " " #item ";\t" \ 77 ret = trace_seq_printf(s, "\tfield:" #type " " #item ";\t" \
78 "offset:%zu;\tsize:%zu;\n", \ 78 "offset:%zu;\tsize:%zu;\tsigned:%u;\n", \
79 offsetof(typeof(field), container.item), \ 79 offsetof(typeof(field), container.item), \
80 sizeof(field.container.item)); \ 80 sizeof(field.container.item), \
81 is_signed_type(type)); \
81 if (!ret) \ 82 if (!ret) \
82 return 0; 83 return 0;
83 84
84#undef __array 85#undef __array
85#define __array(type, item, len) \ 86#define __array(type, item, len) \
86 ret = trace_seq_printf(s, "\tfield:" #type " " #item "[" #len "];\t" \ 87 ret = trace_seq_printf(s, "\tfield:" #type " " #item "[" #len "];\t" \
87 "offset:%zu;\tsize:%zu;\n", \ 88 "offset:%zu;\tsize:%zu;\tsigned:%u;\n", \
88 offsetof(typeof(field), item), \ 89 offsetof(typeof(field), item), \
89 sizeof(field.item)); \ 90 sizeof(field.item), is_signed_type(type)); \
90 if (!ret) \ 91 if (!ret) \
91 return 0; 92 return 0;
92 93
93#undef __array_desc 94#undef __array_desc
94#define __array_desc(type, container, item, len) \ 95#define __array_desc(type, container, item, len) \
95 ret = trace_seq_printf(s, "\tfield:" #type " " #item "[" #len "];\t" \ 96 ret = trace_seq_printf(s, "\tfield:" #type " " #item "[" #len "];\t" \
96 "offset:%zu;\tsize:%zu;\n", \ 97 "offset:%zu;\tsize:%zu;\tsigned:%u;\n", \
97 offsetof(typeof(field), container.item), \ 98 offsetof(typeof(field), container.item), \
98 sizeof(field.container.item)); \ 99 sizeof(field.container.item), \
100 is_signed_type(type)); \
99 if (!ret) \ 101 if (!ret) \
100 return 0; 102 return 0;
101 103
102#undef __dynamic_array 104#undef __dynamic_array
103#define __dynamic_array(type, item) \ 105#define __dynamic_array(type, item) \
104 ret = trace_seq_printf(s, "\tfield:" #type " " #item ";\t" \ 106 ret = trace_seq_printf(s, "\tfield:" #type " " #item ";\t" \
105 "offset:%zu;\tsize:0;\n", \ 107 "offset:%zu;\tsize:0;\tsigned:%u;\n", \
106 offsetof(typeof(field), item)); \ 108 offsetof(typeof(field), item), \
109 is_signed_type(type)); \
107 if (!ret) \ 110 if (!ret) \
108 return 0; 111 return 0;
109 112
@@ -131,7 +134,6 @@ ftrace_format_##name(struct ftrace_event_call *unused, \
131 134
132#include "trace_entries.h" 135#include "trace_entries.h"
133 136
134
135#undef __field 137#undef __field
136#define __field(type, item) \ 138#define __field(type, item) \
137 ret = trace_define_field(event_call, #type, #item, \ 139 ret = trace_define_field(event_call, #type, #item, \
@@ -193,6 +195,11 @@ ftrace_define_fields_##name(struct ftrace_event_call *event_call) \
193 195
194#include "trace_entries.h" 196#include "trace_entries.h"
195 197
198static int ftrace_raw_init_event(struct ftrace_event_call *call)
199{
200 INIT_LIST_HEAD(&call->fields);
201 return 0;
202}
196 203
197#undef __field 204#undef __field
198#define __field(type, item) 205#define __field(type, item)
@@ -211,7 +218,6 @@ ftrace_define_fields_##name(struct ftrace_event_call *event_call) \
211 218
212#undef FTRACE_ENTRY 219#undef FTRACE_ENTRY
213#define FTRACE_ENTRY(call, struct_name, type, tstruct, print) \ 220#define FTRACE_ENTRY(call, struct_name, type, tstruct, print) \
214static int ftrace_raw_init_event_##call(void); \
215 \ 221 \
216struct ftrace_event_call __used \ 222struct ftrace_event_call __used \
217__attribute__((__aligned__(4))) \ 223__attribute__((__aligned__(4))) \
@@ -219,14 +225,9 @@ __attribute__((section("_ftrace_events"))) event_##call = { \
219 .name = #call, \ 225 .name = #call, \
220 .id = type, \ 226 .id = type, \
221 .system = __stringify(TRACE_SYSTEM), \ 227 .system = __stringify(TRACE_SYSTEM), \
222 .raw_init = ftrace_raw_init_event_##call, \ 228 .raw_init = ftrace_raw_init_event, \
223 .show_format = ftrace_format_##call, \ 229 .show_format = ftrace_format_##call, \
224 .define_fields = ftrace_define_fields_##call, \ 230 .define_fields = ftrace_define_fields_##call, \
225}; \ 231}; \
226static int ftrace_raw_init_event_##call(void) \
227{ \
228 INIT_LIST_HEAD(&event_##call.fields); \
229 return 0; \
230} \
231 232
232#include "trace_entries.h" 233#include "trace_entries.h"
diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c
new file mode 100644
index 000000000000..aff5f80b59b8
--- /dev/null
+++ b/kernel/trace/trace_kprobe.c
@@ -0,0 +1,1523 @@
1/*
2 * Kprobes-based tracing events
3 *
4 * Created by Masami Hiramatsu <mhiramat@redhat.com>
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License version 2 as
8 * published by the Free Software Foundation.
9 *
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 *
15 * You should have received a copy of the GNU General Public License
16 * along with this program; if not, write to the Free Software
17 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
18 */
19
20#include <linux/module.h>
21#include <linux/uaccess.h>
22#include <linux/kprobes.h>
23#include <linux/seq_file.h>
24#include <linux/slab.h>
25#include <linux/smp.h>
26#include <linux/debugfs.h>
27#include <linux/types.h>
28#include <linux/string.h>
29#include <linux/ctype.h>
30#include <linux/ptrace.h>
31#include <linux/perf_event.h>
32
33#include "trace.h"
34#include "trace_output.h"
35
36#define MAX_TRACE_ARGS 128
37#define MAX_ARGSTR_LEN 63
38#define MAX_EVENT_NAME_LEN 64
39#define KPROBE_EVENT_SYSTEM "kprobes"
40
41/* Reserved field names */
42#define FIELD_STRING_IP "__probe_ip"
43#define FIELD_STRING_NARGS "__probe_nargs"
44#define FIELD_STRING_RETIP "__probe_ret_ip"
45#define FIELD_STRING_FUNC "__probe_func"
46
47const char *reserved_field_names[] = {
48 "common_type",
49 "common_flags",
50 "common_preempt_count",
51 "common_pid",
52 "common_tgid",
53 "common_lock_depth",
54 FIELD_STRING_IP,
55 FIELD_STRING_NARGS,
56 FIELD_STRING_RETIP,
57 FIELD_STRING_FUNC,
58};
59
60struct fetch_func {
61 unsigned long (*func)(struct pt_regs *, void *);
62 void *data;
63};
64
65static __kprobes unsigned long call_fetch(struct fetch_func *f,
66 struct pt_regs *regs)
67{
68 return f->func(regs, f->data);
69}
70
71/* fetch handlers */
72static __kprobes unsigned long fetch_register(struct pt_regs *regs,
73 void *offset)
74{
75 return regs_get_register(regs, (unsigned int)((unsigned long)offset));
76}
77
78static __kprobes unsigned long fetch_stack(struct pt_regs *regs,
79 void *num)
80{
81 return regs_get_kernel_stack_nth(regs,
82 (unsigned int)((unsigned long)num));
83}
84
85static __kprobes unsigned long fetch_memory(struct pt_regs *regs, void *addr)
86{
87 unsigned long retval;
88
89 if (probe_kernel_address(addr, retval))
90 return 0;
91 return retval;
92}
93
94static __kprobes unsigned long fetch_argument(struct pt_regs *regs, void *num)
95{
96 return regs_get_argument_nth(regs, (unsigned int)((unsigned long)num));
97}
98
99static __kprobes unsigned long fetch_retvalue(struct pt_regs *regs,
100 void *dummy)
101{
102 return regs_return_value(regs);
103}
104
105static __kprobes unsigned long fetch_stack_address(struct pt_regs *regs,
106 void *dummy)
107{
108 return kernel_stack_pointer(regs);
109}
110
111/* Memory fetching by symbol */
112struct symbol_cache {
113 char *symbol;
114 long offset;
115 unsigned long addr;
116};
117
118static unsigned long update_symbol_cache(struct symbol_cache *sc)
119{
120 sc->addr = (unsigned long)kallsyms_lookup_name(sc->symbol);
121 if (sc->addr)
122 sc->addr += sc->offset;
123 return sc->addr;
124}
125
126static void free_symbol_cache(struct symbol_cache *sc)
127{
128 kfree(sc->symbol);
129 kfree(sc);
130}
131
132static struct symbol_cache *alloc_symbol_cache(const char *sym, long offset)
133{
134 struct symbol_cache *sc;
135
136 if (!sym || strlen(sym) == 0)
137 return NULL;
138 sc = kzalloc(sizeof(struct symbol_cache), GFP_KERNEL);
139 if (!sc)
140 return NULL;
141
142 sc->symbol = kstrdup(sym, GFP_KERNEL);
143 if (!sc->symbol) {
144 kfree(sc);
145 return NULL;
146 }
147 sc->offset = offset;
148
149 update_symbol_cache(sc);
150 return sc;
151}
152
153static __kprobes unsigned long fetch_symbol(struct pt_regs *regs, void *data)
154{
155 struct symbol_cache *sc = data;
156
157 if (sc->addr)
158 return fetch_memory(regs, (void *)sc->addr);
159 else
160 return 0;
161}
162
163/* Special indirect memory access interface */
164struct indirect_fetch_data {
165 struct fetch_func orig;
166 long offset;
167};
168
169static __kprobes unsigned long fetch_indirect(struct pt_regs *regs, void *data)
170{
171 struct indirect_fetch_data *ind = data;
172 unsigned long addr;
173
174 addr = call_fetch(&ind->orig, regs);
175 if (addr) {
176 addr += ind->offset;
177 return fetch_memory(regs, (void *)addr);
178 } else
179 return 0;
180}
181
182static __kprobes void free_indirect_fetch_data(struct indirect_fetch_data *data)
183{
184 if (data->orig.func == fetch_indirect)
185 free_indirect_fetch_data(data->orig.data);
186 else if (data->orig.func == fetch_symbol)
187 free_symbol_cache(data->orig.data);
188 kfree(data);
189}
190
191/**
192 * Kprobe event core functions
193 */
194
195struct probe_arg {
196 struct fetch_func fetch;
197 const char *name;
198};
199
200/* Flags for trace_probe */
201#define TP_FLAG_TRACE 1
202#define TP_FLAG_PROFILE 2
203
204struct trace_probe {
205 struct list_head list;
206 struct kretprobe rp; /* Use rp.kp for kprobe use */
207 unsigned long nhit;
208 unsigned int flags; /* For TP_FLAG_* */
209 const char *symbol; /* symbol name */
210 struct ftrace_event_call call;
211 struct trace_event event;
212 unsigned int nr_args;
213 struct probe_arg args[];
214};
215
216#define SIZEOF_TRACE_PROBE(n) \
217 (offsetof(struct trace_probe, args) + \
218 (sizeof(struct probe_arg) * (n)))
219
220static __kprobes int probe_is_return(struct trace_probe *tp)
221{
222 return tp->rp.handler != NULL;
223}
224
225static __kprobes const char *probe_symbol(struct trace_probe *tp)
226{
227 return tp->symbol ? tp->symbol : "unknown";
228}
229
230static int probe_arg_string(char *buf, size_t n, struct fetch_func *ff)
231{
232 int ret = -EINVAL;
233
234 if (ff->func == fetch_argument)
235 ret = snprintf(buf, n, "$arg%lu", (unsigned long)ff->data);
236 else if (ff->func == fetch_register) {
237 const char *name;
238 name = regs_query_register_name((unsigned int)((long)ff->data));
239 ret = snprintf(buf, n, "%%%s", name);
240 } else if (ff->func == fetch_stack)
241 ret = snprintf(buf, n, "$stack%lu", (unsigned long)ff->data);
242 else if (ff->func == fetch_memory)
243 ret = snprintf(buf, n, "@0x%p", ff->data);
244 else if (ff->func == fetch_symbol) {
245 struct symbol_cache *sc = ff->data;
246 if (sc->offset)
247 ret = snprintf(buf, n, "@%s%+ld", sc->symbol,
248 sc->offset);
249 else
250 ret = snprintf(buf, n, "@%s", sc->symbol);
251 } else if (ff->func == fetch_retvalue)
252 ret = snprintf(buf, n, "$retval");
253 else if (ff->func == fetch_stack_address)
254 ret = snprintf(buf, n, "$stack");
255 else if (ff->func == fetch_indirect) {
256 struct indirect_fetch_data *id = ff->data;
257 size_t l = 0;
258 ret = snprintf(buf, n, "%+ld(", id->offset);
259 if (ret >= n)
260 goto end;
261 l += ret;
262 ret = probe_arg_string(buf + l, n - l, &id->orig);
263 if (ret < 0)
264 goto end;
265 l += ret;
266 ret = snprintf(buf + l, n - l, ")");
267 ret += l;
268 }
269end:
270 if (ret >= n)
271 return -ENOSPC;
272 return ret;
273}
274
275static int register_probe_event(struct trace_probe *tp);
276static void unregister_probe_event(struct trace_probe *tp);
277
278static DEFINE_MUTEX(probe_lock);
279static LIST_HEAD(probe_list);
280
281static int kprobe_dispatcher(struct kprobe *kp, struct pt_regs *regs);
282static int kretprobe_dispatcher(struct kretprobe_instance *ri,
283 struct pt_regs *regs);
284
285/*
286 * Allocate new trace_probe and initialize it (including kprobes).
287 */
288static struct trace_probe *alloc_trace_probe(const char *group,
289 const char *event,
290 void *addr,
291 const char *symbol,
292 unsigned long offs,
293 int nargs, int is_return)
294{
295 struct trace_probe *tp;
296
297 tp = kzalloc(SIZEOF_TRACE_PROBE(nargs), GFP_KERNEL);
298 if (!tp)
299 return ERR_PTR(-ENOMEM);
300
301 if (symbol) {
302 tp->symbol = kstrdup(symbol, GFP_KERNEL);
303 if (!tp->symbol)
304 goto error;
305 tp->rp.kp.symbol_name = tp->symbol;
306 tp->rp.kp.offset = offs;
307 } else
308 tp->rp.kp.addr = addr;
309
310 if (is_return)
311 tp->rp.handler = kretprobe_dispatcher;
312 else
313 tp->rp.kp.pre_handler = kprobe_dispatcher;
314
315 if (!event)
316 goto error;
317 tp->call.name = kstrdup(event, GFP_KERNEL);
318 if (!tp->call.name)
319 goto error;
320
321 if (!group)
322 goto error;
323 tp->call.system = kstrdup(group, GFP_KERNEL);
324 if (!tp->call.system)
325 goto error;
326
327 INIT_LIST_HEAD(&tp->list);
328 return tp;
329error:
330 kfree(tp->call.name);
331 kfree(tp->symbol);
332 kfree(tp);
333 return ERR_PTR(-ENOMEM);
334}
335
336static void free_probe_arg(struct probe_arg *arg)
337{
338 if (arg->fetch.func == fetch_symbol)
339 free_symbol_cache(arg->fetch.data);
340 else if (arg->fetch.func == fetch_indirect)
341 free_indirect_fetch_data(arg->fetch.data);
342 kfree(arg->name);
343}
344
345static void free_trace_probe(struct trace_probe *tp)
346{
347 int i;
348
349 for (i = 0; i < tp->nr_args; i++)
350 free_probe_arg(&tp->args[i]);
351
352 kfree(tp->call.system);
353 kfree(tp->call.name);
354 kfree(tp->symbol);
355 kfree(tp);
356}
357
358static struct trace_probe *find_probe_event(const char *event,
359 const char *group)
360{
361 struct trace_probe *tp;
362
363 list_for_each_entry(tp, &probe_list, list)
364 if (strcmp(tp->call.name, event) == 0 &&
365 strcmp(tp->call.system, group) == 0)
366 return tp;
367 return NULL;
368}
369
370/* Unregister a trace_probe and probe_event: call with locking probe_lock */
371static void unregister_trace_probe(struct trace_probe *tp)
372{
373 if (probe_is_return(tp))
374 unregister_kretprobe(&tp->rp);
375 else
376 unregister_kprobe(&tp->rp.kp);
377 list_del(&tp->list);
378 unregister_probe_event(tp);
379}
380
381/* Register a trace_probe and probe_event */
382static int register_trace_probe(struct trace_probe *tp)
383{
384 struct trace_probe *old_tp;
385 int ret;
386
387 mutex_lock(&probe_lock);
388
389 /* register as an event */
390 old_tp = find_probe_event(tp->call.name, tp->call.system);
391 if (old_tp) {
392 /* delete old event */
393 unregister_trace_probe(old_tp);
394 free_trace_probe(old_tp);
395 }
396 ret = register_probe_event(tp);
397 if (ret) {
398 pr_warning("Faild to register probe event(%d)\n", ret);
399 goto end;
400 }
401
402 tp->rp.kp.flags |= KPROBE_FLAG_DISABLED;
403 if (probe_is_return(tp))
404 ret = register_kretprobe(&tp->rp);
405 else
406 ret = register_kprobe(&tp->rp.kp);
407
408 if (ret) {
409 pr_warning("Could not insert probe(%d)\n", ret);
410 if (ret == -EILSEQ) {
411 pr_warning("Probing address(0x%p) is not an "
412 "instruction boundary.\n",
413 tp->rp.kp.addr);
414 ret = -EINVAL;
415 }
416 unregister_probe_event(tp);
417 } else
418 list_add_tail(&tp->list, &probe_list);
419end:
420 mutex_unlock(&probe_lock);
421 return ret;
422}
423
424/* Split symbol and offset. */
425static int split_symbol_offset(char *symbol, unsigned long *offset)
426{
427 char *tmp;
428 int ret;
429
430 if (!offset)
431 return -EINVAL;
432
433 tmp = strchr(symbol, '+');
434 if (tmp) {
435 /* skip sign because strict_strtol doesn't accept '+' */
436 ret = strict_strtoul(tmp + 1, 0, offset);
437 if (ret)
438 return ret;
439 *tmp = '\0';
440 } else
441 *offset = 0;
442 return 0;
443}
444
445#define PARAM_MAX_ARGS 16
446#define PARAM_MAX_STACK (THREAD_SIZE / sizeof(unsigned long))
447
448static int parse_probe_vars(char *arg, struct fetch_func *ff, int is_return)
449{
450 int ret = 0;
451 unsigned long param;
452
453 if (strcmp(arg, "retval") == 0) {
454 if (is_return) {
455 ff->func = fetch_retvalue;
456 ff->data = NULL;
457 } else
458 ret = -EINVAL;
459 } else if (strncmp(arg, "stack", 5) == 0) {
460 if (arg[5] == '\0') {
461 ff->func = fetch_stack_address;
462 ff->data = NULL;
463 } else if (isdigit(arg[5])) {
464 ret = strict_strtoul(arg + 5, 10, &param);
465 if (ret || param > PARAM_MAX_STACK)
466 ret = -EINVAL;
467 else {
468 ff->func = fetch_stack;
469 ff->data = (void *)param;
470 }
471 } else
472 ret = -EINVAL;
473 } else if (strncmp(arg, "arg", 3) == 0 && isdigit(arg[3])) {
474 ret = strict_strtoul(arg + 3, 10, &param);
475 if (ret || param > PARAM_MAX_ARGS)
476 ret = -EINVAL;
477 else {
478 ff->func = fetch_argument;
479 ff->data = (void *)param;
480 }
481 } else
482 ret = -EINVAL;
483 return ret;
484}
485
486/* Recursive argument parser */
487static int __parse_probe_arg(char *arg, struct fetch_func *ff, int is_return)
488{
489 int ret = 0;
490 unsigned long param;
491 long offset;
492 char *tmp;
493
494 switch (arg[0]) {
495 case '$':
496 ret = parse_probe_vars(arg + 1, ff, is_return);
497 break;
498 case '%': /* named register */
499 ret = regs_query_register_offset(arg + 1);
500 if (ret >= 0) {
501 ff->func = fetch_register;
502 ff->data = (void *)(unsigned long)ret;
503 ret = 0;
504 }
505 break;
506 case '@': /* memory or symbol */
507 if (isdigit(arg[1])) {
508 ret = strict_strtoul(arg + 1, 0, &param);
509 if (ret)
510 break;
511 ff->func = fetch_memory;
512 ff->data = (void *)param;
513 } else {
514 ret = split_symbol_offset(arg + 1, &offset);
515 if (ret)
516 break;
517 ff->data = alloc_symbol_cache(arg + 1, offset);
518 if (ff->data)
519 ff->func = fetch_symbol;
520 else
521 ret = -EINVAL;
522 }
523 break;
524 case '+': /* indirect memory */
525 case '-':
526 tmp = strchr(arg, '(');
527 if (!tmp) {
528 ret = -EINVAL;
529 break;
530 }
531 *tmp = '\0';
532 ret = strict_strtol(arg + 1, 0, &offset);
533 if (ret)
534 break;
535 if (arg[0] == '-')
536 offset = -offset;
537 arg = tmp + 1;
538 tmp = strrchr(arg, ')');
539 if (tmp) {
540 struct indirect_fetch_data *id;
541 *tmp = '\0';
542 id = kzalloc(sizeof(struct indirect_fetch_data),
543 GFP_KERNEL);
544 if (!id)
545 return -ENOMEM;
546 id->offset = offset;
547 ret = __parse_probe_arg(arg, &id->orig, is_return);
548 if (ret)
549 kfree(id);
550 else {
551 ff->func = fetch_indirect;
552 ff->data = (void *)id;
553 }
554 } else
555 ret = -EINVAL;
556 break;
557 default:
558 /* TODO: support custom handler */
559 ret = -EINVAL;
560 }
561 return ret;
562}
563
564/* String length checking wrapper */
565static int parse_probe_arg(char *arg, struct fetch_func *ff, int is_return)
566{
567 if (strlen(arg) > MAX_ARGSTR_LEN) {
568 pr_info("Argument is too long.: %s\n", arg);
569 return -ENOSPC;
570 }
571 return __parse_probe_arg(arg, ff, is_return);
572}
573
574/* Return 1 if name is reserved or already used by another argument */
575static int conflict_field_name(const char *name,
576 struct probe_arg *args, int narg)
577{
578 int i;
579 for (i = 0; i < ARRAY_SIZE(reserved_field_names); i++)
580 if (strcmp(reserved_field_names[i], name) == 0)
581 return 1;
582 for (i = 0; i < narg; i++)
583 if (strcmp(args[i].name, name) == 0)
584 return 1;
585 return 0;
586}
587
588static int create_trace_probe(int argc, char **argv)
589{
590 /*
591 * Argument syntax:
592 * - Add kprobe: p[:[GRP/]EVENT] KSYM[+OFFS]|KADDR [FETCHARGS]
593 * - Add kretprobe: r[:[GRP/]EVENT] KSYM[+0] [FETCHARGS]
594 * Fetch args:
595 * $argN : fetch Nth of function argument. (N:0-)
596 * $retval : fetch return value
597 * $stack : fetch stack address
598 * $stackN : fetch Nth of stack (N:0-)
599 * @ADDR : fetch memory at ADDR (ADDR should be in kernel)
600 * @SYM[+|-offs] : fetch memory at SYM +|- offs (SYM is a data symbol)
601 * %REG : fetch register REG
602 * Indirect memory fetch:
603 * +|-offs(ARG) : fetch memory at ARG +|- offs address.
604 * Alias name of args:
605 * NAME=FETCHARG : set NAME as alias of FETCHARG.
606 */
607 struct trace_probe *tp;
608 int i, ret = 0;
609 int is_return = 0;
610 char *symbol = NULL, *event = NULL, *arg = NULL, *group = NULL;
611 unsigned long offset = 0;
612 void *addr = NULL;
613 char buf[MAX_EVENT_NAME_LEN];
614
615 if (argc < 2) {
616 pr_info("Probe point is not specified.\n");
617 return -EINVAL;
618 }
619
620 if (argv[0][0] == 'p')
621 is_return = 0;
622 else if (argv[0][0] == 'r')
623 is_return = 1;
624 else {
625 pr_info("Probe definition must be started with 'p' or 'r'.\n");
626 return -EINVAL;
627 }
628
629 if (argv[0][1] == ':') {
630 event = &argv[0][2];
631 if (strchr(event, '/')) {
632 group = event;
633 event = strchr(group, '/') + 1;
634 event[-1] = '\0';
635 if (strlen(group) == 0) {
636 pr_info("Group name is not specifiled\n");
637 return -EINVAL;
638 }
639 }
640 if (strlen(event) == 0) {
641 pr_info("Event name is not specifiled\n");
642 return -EINVAL;
643 }
644 }
645
646 if (isdigit(argv[1][0])) {
647 if (is_return) {
648 pr_info("Return probe point must be a symbol.\n");
649 return -EINVAL;
650 }
651 /* an address specified */
652 ret = strict_strtoul(&argv[0][2], 0, (unsigned long *)&addr);
653 if (ret) {
654 pr_info("Failed to parse address.\n");
655 return ret;
656 }
657 } else {
658 /* a symbol specified */
659 symbol = argv[1];
660 /* TODO: support .init module functions */
661 ret = split_symbol_offset(symbol, &offset);
662 if (ret) {
663 pr_info("Failed to parse symbol.\n");
664 return ret;
665 }
666 if (offset && is_return) {
667 pr_info("Return probe must be used without offset.\n");
668 return -EINVAL;
669 }
670 }
671 argc -= 2; argv += 2;
672
673 /* setup a probe */
674 if (!group)
675 group = KPROBE_EVENT_SYSTEM;
676 if (!event) {
677 /* Make a new event name */
678 if (symbol)
679 snprintf(buf, MAX_EVENT_NAME_LEN, "%c@%s%+ld",
680 is_return ? 'r' : 'p', symbol, offset);
681 else
682 snprintf(buf, MAX_EVENT_NAME_LEN, "%c@0x%p",
683 is_return ? 'r' : 'p', addr);
684 event = buf;
685 }
686 tp = alloc_trace_probe(group, event, addr, symbol, offset, argc,
687 is_return);
688 if (IS_ERR(tp)) {
689 pr_info("Failed to allocate trace_probe.(%d)\n",
690 (int)PTR_ERR(tp));
691 return PTR_ERR(tp);
692 }
693
694 /* parse arguments */
695 ret = 0;
696 for (i = 0; i < argc && i < MAX_TRACE_ARGS; i++) {
697 /* Parse argument name */
698 arg = strchr(argv[i], '=');
699 if (arg)
700 *arg++ = '\0';
701 else
702 arg = argv[i];
703
704 if (conflict_field_name(argv[i], tp->args, i)) {
705 pr_info("Argument%d name '%s' conflicts with "
706 "another field.\n", i, argv[i]);
707 ret = -EINVAL;
708 goto error;
709 }
710
711 tp->args[i].name = kstrdup(argv[i], GFP_KERNEL);
712 if (!tp->args[i].name) {
713 pr_info("Failed to allocate argument%d name '%s'.\n",
714 i, argv[i]);
715 ret = -ENOMEM;
716 goto error;
717 }
718
719 /* Parse fetch argument */
720 ret = parse_probe_arg(arg, &tp->args[i].fetch, is_return);
721 if (ret) {
722 pr_info("Parse error at argument%d. (%d)\n", i, ret);
723 kfree(tp->args[i].name);
724 goto error;
725 }
726
727 tp->nr_args++;
728 }
729
730 ret = register_trace_probe(tp);
731 if (ret)
732 goto error;
733 return 0;
734
735error:
736 free_trace_probe(tp);
737 return ret;
738}
739
740static void cleanup_all_probes(void)
741{
742 struct trace_probe *tp;
743
744 mutex_lock(&probe_lock);
745 /* TODO: Use batch unregistration */
746 while (!list_empty(&probe_list)) {
747 tp = list_entry(probe_list.next, struct trace_probe, list);
748 unregister_trace_probe(tp);
749 free_trace_probe(tp);
750 }
751 mutex_unlock(&probe_lock);
752}
753
754
755/* Probes listing interfaces */
756static void *probes_seq_start(struct seq_file *m, loff_t *pos)
757{
758 mutex_lock(&probe_lock);
759 return seq_list_start(&probe_list, *pos);
760}
761
762static void *probes_seq_next(struct seq_file *m, void *v, loff_t *pos)
763{
764 return seq_list_next(v, &probe_list, pos);
765}
766
767static void probes_seq_stop(struct seq_file *m, void *v)
768{
769 mutex_unlock(&probe_lock);
770}
771
772static int probes_seq_show(struct seq_file *m, void *v)
773{
774 struct trace_probe *tp = v;
775 int i, ret;
776 char buf[MAX_ARGSTR_LEN + 1];
777
778 seq_printf(m, "%c", probe_is_return(tp) ? 'r' : 'p');
779 seq_printf(m, ":%s/%s", tp->call.system, tp->call.name);
780
781 if (!tp->symbol)
782 seq_printf(m, " 0x%p", tp->rp.kp.addr);
783 else if (tp->rp.kp.offset)
784 seq_printf(m, " %s+%u", probe_symbol(tp), tp->rp.kp.offset);
785 else
786 seq_printf(m, " %s", probe_symbol(tp));
787
788 for (i = 0; i < tp->nr_args; i++) {
789 ret = probe_arg_string(buf, MAX_ARGSTR_LEN, &tp->args[i].fetch);
790 if (ret < 0) {
791 pr_warning("Argument%d decoding error(%d).\n", i, ret);
792 return ret;
793 }
794 seq_printf(m, " %s=%s", tp->args[i].name, buf);
795 }
796 seq_printf(m, "\n");
797 return 0;
798}
799
800static const struct seq_operations probes_seq_op = {
801 .start = probes_seq_start,
802 .next = probes_seq_next,
803 .stop = probes_seq_stop,
804 .show = probes_seq_show
805};
806
807static int probes_open(struct inode *inode, struct file *file)
808{
809 if ((file->f_mode & FMODE_WRITE) &&
810 (file->f_flags & O_TRUNC))
811 cleanup_all_probes();
812
813 return seq_open(file, &probes_seq_op);
814}
815
816static int command_trace_probe(const char *buf)
817{
818 char **argv;
819 int argc = 0, ret = 0;
820
821 argv = argv_split(GFP_KERNEL, buf, &argc);
822 if (!argv)
823 return -ENOMEM;
824
825 if (argc)
826 ret = create_trace_probe(argc, argv);
827
828 argv_free(argv);
829 return ret;
830}
831
832#define WRITE_BUFSIZE 128
833
834static ssize_t probes_write(struct file *file, const char __user *buffer,
835 size_t count, loff_t *ppos)
836{
837 char *kbuf, *tmp;
838 int ret;
839 size_t done;
840 size_t size;
841
842 kbuf = kmalloc(WRITE_BUFSIZE, GFP_KERNEL);
843 if (!kbuf)
844 return -ENOMEM;
845
846 ret = done = 0;
847 while (done < count) {
848 size = count - done;
849 if (size >= WRITE_BUFSIZE)
850 size = WRITE_BUFSIZE - 1;
851 if (copy_from_user(kbuf, buffer + done, size)) {
852 ret = -EFAULT;
853 goto out;
854 }
855 kbuf[size] = '\0';
856 tmp = strchr(kbuf, '\n');
857 if (tmp) {
858 *tmp = '\0';
859 size = tmp - kbuf + 1;
860 } else if (done + size < count) {
861 pr_warning("Line length is too long: "
862 "Should be less than %d.", WRITE_BUFSIZE);
863 ret = -EINVAL;
864 goto out;
865 }
866 done += size;
867 /* Remove comments */
868 tmp = strchr(kbuf, '#');
869 if (tmp)
870 *tmp = '\0';
871
872 ret = command_trace_probe(kbuf);
873 if (ret)
874 goto out;
875 }
876 ret = done;
877out:
878 kfree(kbuf);
879 return ret;
880}
881
882static const struct file_operations kprobe_events_ops = {
883 .owner = THIS_MODULE,
884 .open = probes_open,
885 .read = seq_read,
886 .llseek = seq_lseek,
887 .release = seq_release,
888 .write = probes_write,
889};
890
891/* Probes profiling interfaces */
892static int probes_profile_seq_show(struct seq_file *m, void *v)
893{
894 struct trace_probe *tp = v;
895
896 seq_printf(m, " %-44s %15lu %15lu\n", tp->call.name, tp->nhit,
897 tp->rp.kp.nmissed);
898
899 return 0;
900}
901
902static const struct seq_operations profile_seq_op = {
903 .start = probes_seq_start,
904 .next = probes_seq_next,
905 .stop = probes_seq_stop,
906 .show = probes_profile_seq_show
907};
908
909static int profile_open(struct inode *inode, struct file *file)
910{
911 return seq_open(file, &profile_seq_op);
912}
913
914static const struct file_operations kprobe_profile_ops = {
915 .owner = THIS_MODULE,
916 .open = profile_open,
917 .read = seq_read,
918 .llseek = seq_lseek,
919 .release = seq_release,
920};
921
922/* Kprobe handler */
923static __kprobes int kprobe_trace_func(struct kprobe *kp, struct pt_regs *regs)
924{
925 struct trace_probe *tp = container_of(kp, struct trace_probe, rp.kp);
926 struct kprobe_trace_entry *entry;
927 struct ring_buffer_event *event;
928 struct ring_buffer *buffer;
929 int size, i, pc;
930 unsigned long irq_flags;
931 struct ftrace_event_call *call = &tp->call;
932
933 tp->nhit++;
934
935 local_save_flags(irq_flags);
936 pc = preempt_count();
937
938 size = SIZEOF_KPROBE_TRACE_ENTRY(tp->nr_args);
939
940 event = trace_current_buffer_lock_reserve(&buffer, call->id, size,
941 irq_flags, pc);
942 if (!event)
943 return 0;
944
945 entry = ring_buffer_event_data(event);
946 entry->nargs = tp->nr_args;
947 entry->ip = (unsigned long)kp->addr;
948 for (i = 0; i < tp->nr_args; i++)
949 entry->args[i] = call_fetch(&tp->args[i].fetch, regs);
950
951 if (!filter_current_check_discard(buffer, call, entry, event))
952 trace_nowake_buffer_unlock_commit(buffer, event, irq_flags, pc);
953 return 0;
954}
955
956/* Kretprobe handler */
957static __kprobes int kretprobe_trace_func(struct kretprobe_instance *ri,
958 struct pt_regs *regs)
959{
960 struct trace_probe *tp = container_of(ri->rp, struct trace_probe, rp);
961 struct kretprobe_trace_entry *entry;
962 struct ring_buffer_event *event;
963 struct ring_buffer *buffer;
964 int size, i, pc;
965 unsigned long irq_flags;
966 struct ftrace_event_call *call = &tp->call;
967
968 local_save_flags(irq_flags);
969 pc = preempt_count();
970
971 size = SIZEOF_KRETPROBE_TRACE_ENTRY(tp->nr_args);
972
973 event = trace_current_buffer_lock_reserve(&buffer, call->id, size,
974 irq_flags, pc);
975 if (!event)
976 return 0;
977
978 entry = ring_buffer_event_data(event);
979 entry->nargs = tp->nr_args;
980 entry->func = (unsigned long)tp->rp.kp.addr;
981 entry->ret_ip = (unsigned long)ri->ret_addr;
982 for (i = 0; i < tp->nr_args; i++)
983 entry->args[i] = call_fetch(&tp->args[i].fetch, regs);
984
985 if (!filter_current_check_discard(buffer, call, entry, event))
986 trace_nowake_buffer_unlock_commit(buffer, event, irq_flags, pc);
987
988 return 0;
989}
990
991/* Event entry printers */
992enum print_line_t
993print_kprobe_event(struct trace_iterator *iter, int flags)
994{
995 struct kprobe_trace_entry *field;
996 struct trace_seq *s = &iter->seq;
997 struct trace_event *event;
998 struct trace_probe *tp;
999 int i;
1000
1001 field = (struct kprobe_trace_entry *)iter->ent;
1002 event = ftrace_find_event(field->ent.type);
1003 tp = container_of(event, struct trace_probe, event);
1004
1005 if (!trace_seq_printf(s, "%s: (", tp->call.name))
1006 goto partial;
1007
1008 if (!seq_print_ip_sym(s, field->ip, flags | TRACE_ITER_SYM_OFFSET))
1009 goto partial;
1010
1011 if (!trace_seq_puts(s, ")"))
1012 goto partial;
1013
1014 for (i = 0; i < field->nargs; i++)
1015 if (!trace_seq_printf(s, " %s=%lx",
1016 tp->args[i].name, field->args[i]))
1017 goto partial;
1018
1019 if (!trace_seq_puts(s, "\n"))
1020 goto partial;
1021
1022 return TRACE_TYPE_HANDLED;
1023partial:
1024 return TRACE_TYPE_PARTIAL_LINE;
1025}
1026
1027enum print_line_t
1028print_kretprobe_event(struct trace_iterator *iter, int flags)
1029{
1030 struct kretprobe_trace_entry *field;
1031 struct trace_seq *s = &iter->seq;
1032 struct trace_event *event;
1033 struct trace_probe *tp;
1034 int i;
1035
1036 field = (struct kretprobe_trace_entry *)iter->ent;
1037 event = ftrace_find_event(field->ent.type);
1038 tp = container_of(event, struct trace_probe, event);
1039
1040 if (!trace_seq_printf(s, "%s: (", tp->call.name))
1041 goto partial;
1042
1043 if (!seq_print_ip_sym(s, field->ret_ip, flags | TRACE_ITER_SYM_OFFSET))
1044 goto partial;
1045
1046 if (!trace_seq_puts(s, " <- "))
1047 goto partial;
1048
1049 if (!seq_print_ip_sym(s, field->func, flags & ~TRACE_ITER_SYM_OFFSET))
1050 goto partial;
1051
1052 if (!trace_seq_puts(s, ")"))
1053 goto partial;
1054
1055 for (i = 0; i < field->nargs; i++)
1056 if (!trace_seq_printf(s, " %s=%lx",
1057 tp->args[i].name, field->args[i]))
1058 goto partial;
1059
1060 if (!trace_seq_puts(s, "\n"))
1061 goto partial;
1062
1063 return TRACE_TYPE_HANDLED;
1064partial:
1065 return TRACE_TYPE_PARTIAL_LINE;
1066}
1067
1068static int probe_event_enable(struct ftrace_event_call *call)
1069{
1070 struct trace_probe *tp = (struct trace_probe *)call->data;
1071
1072 tp->flags |= TP_FLAG_TRACE;
1073 if (probe_is_return(tp))
1074 return enable_kretprobe(&tp->rp);
1075 else
1076 return enable_kprobe(&tp->rp.kp);
1077}
1078
1079static void probe_event_disable(struct ftrace_event_call *call)
1080{
1081 struct trace_probe *tp = (struct trace_probe *)call->data;
1082
1083 tp->flags &= ~TP_FLAG_TRACE;
1084 if (!(tp->flags & (TP_FLAG_TRACE | TP_FLAG_PROFILE))) {
1085 if (probe_is_return(tp))
1086 disable_kretprobe(&tp->rp);
1087 else
1088 disable_kprobe(&tp->rp.kp);
1089 }
1090}
1091
1092static int probe_event_raw_init(struct ftrace_event_call *event_call)
1093{
1094 INIT_LIST_HEAD(&event_call->fields);
1095
1096 return 0;
1097}
1098
1099#undef DEFINE_FIELD
1100#define DEFINE_FIELD(type, item, name, is_signed) \
1101 do { \
1102 ret = trace_define_field(event_call, #type, name, \
1103 offsetof(typeof(field), item), \
1104 sizeof(field.item), is_signed, \
1105 FILTER_OTHER); \
1106 if (ret) \
1107 return ret; \
1108 } while (0)
1109
1110static int kprobe_event_define_fields(struct ftrace_event_call *event_call)
1111{
1112 int ret, i;
1113 struct kprobe_trace_entry field;
1114 struct trace_probe *tp = (struct trace_probe *)event_call->data;
1115
1116 ret = trace_define_common_fields(event_call);
1117 if (!ret)
1118 return ret;
1119
1120 DEFINE_FIELD(unsigned long, ip, FIELD_STRING_IP, 0);
1121 DEFINE_FIELD(int, nargs, FIELD_STRING_NARGS, 1);
1122 /* Set argument names as fields */
1123 for (i = 0; i < tp->nr_args; i++)
1124 DEFINE_FIELD(unsigned long, args[i], tp->args[i].name, 0);
1125 return 0;
1126}
1127
1128static int kretprobe_event_define_fields(struct ftrace_event_call *event_call)
1129{
1130 int ret, i;
1131 struct kretprobe_trace_entry field;
1132 struct trace_probe *tp = (struct trace_probe *)event_call->data;
1133
1134 ret = trace_define_common_fields(event_call);
1135 if (!ret)
1136 return ret;
1137
1138 DEFINE_FIELD(unsigned long, func, FIELD_STRING_FUNC, 0);
1139 DEFINE_FIELD(unsigned long, ret_ip, FIELD_STRING_RETIP, 0);
1140 DEFINE_FIELD(int, nargs, FIELD_STRING_NARGS, 1);
1141 /* Set argument names as fields */
1142 for (i = 0; i < tp->nr_args; i++)
1143 DEFINE_FIELD(unsigned long, args[i], tp->args[i].name, 0);
1144 return 0;
1145}
1146
1147static int __probe_event_show_format(struct trace_seq *s,
1148 struct trace_probe *tp, const char *fmt,
1149 const char *arg)
1150{
1151 int i;
1152
1153 /* Show format */
1154 if (!trace_seq_printf(s, "\nprint fmt: \"%s", fmt))
1155 return 0;
1156
1157 for (i = 0; i < tp->nr_args; i++)
1158 if (!trace_seq_printf(s, " %s=%%lx", tp->args[i].name))
1159 return 0;
1160
1161 if (!trace_seq_printf(s, "\", %s", arg))
1162 return 0;
1163
1164 for (i = 0; i < tp->nr_args; i++)
1165 if (!trace_seq_printf(s, ", REC->%s", tp->args[i].name))
1166 return 0;
1167
1168 return trace_seq_puts(s, "\n");
1169}
1170
1171#undef SHOW_FIELD
1172#define SHOW_FIELD(type, item, name) \
1173 do { \
1174 ret = trace_seq_printf(s, "\tfield: " #type " %s;\t" \
1175 "offset:%u;\tsize:%u;\n", name, \
1176 (unsigned int)offsetof(typeof(field), item),\
1177 (unsigned int)sizeof(type)); \
1178 if (!ret) \
1179 return 0; \
1180 } while (0)
1181
1182static int kprobe_event_show_format(struct ftrace_event_call *call,
1183 struct trace_seq *s)
1184{
1185 struct kprobe_trace_entry field __attribute__((unused));
1186 int ret, i;
1187 struct trace_probe *tp = (struct trace_probe *)call->data;
1188
1189 SHOW_FIELD(unsigned long, ip, FIELD_STRING_IP);
1190 SHOW_FIELD(int, nargs, FIELD_STRING_NARGS);
1191
1192 /* Show fields */
1193 for (i = 0; i < tp->nr_args; i++)
1194 SHOW_FIELD(unsigned long, args[i], tp->args[i].name);
1195 trace_seq_puts(s, "\n");
1196
1197 return __probe_event_show_format(s, tp, "(%lx)",
1198 "REC->" FIELD_STRING_IP);
1199}
1200
1201static int kretprobe_event_show_format(struct ftrace_event_call *call,
1202 struct trace_seq *s)
1203{
1204 struct kretprobe_trace_entry field __attribute__((unused));
1205 int ret, i;
1206 struct trace_probe *tp = (struct trace_probe *)call->data;
1207
1208 SHOW_FIELD(unsigned long, func, FIELD_STRING_FUNC);
1209 SHOW_FIELD(unsigned long, ret_ip, FIELD_STRING_RETIP);
1210 SHOW_FIELD(int, nargs, FIELD_STRING_NARGS);
1211
1212 /* Show fields */
1213 for (i = 0; i < tp->nr_args; i++)
1214 SHOW_FIELD(unsigned long, args[i], tp->args[i].name);
1215 trace_seq_puts(s, "\n");
1216
1217 return __probe_event_show_format(s, tp, "(%lx <- %lx)",
1218 "REC->" FIELD_STRING_FUNC
1219 ", REC->" FIELD_STRING_RETIP);
1220}
1221
1222#ifdef CONFIG_EVENT_PROFILE
1223
1224/* Kprobe profile handler */
1225static __kprobes int kprobe_profile_func(struct kprobe *kp,
1226 struct pt_regs *regs)
1227{
1228 struct trace_probe *tp = container_of(kp, struct trace_probe, rp.kp);
1229 struct ftrace_event_call *call = &tp->call;
1230 struct kprobe_trace_entry *entry;
1231 struct trace_entry *ent;
1232 int size, __size, i, pc, __cpu;
1233 unsigned long irq_flags;
1234 char *trace_buf;
1235 char *raw_data;
1236 int rctx;
1237
1238 pc = preempt_count();
1239 __size = SIZEOF_KPROBE_TRACE_ENTRY(tp->nr_args);
1240 size = ALIGN(__size + sizeof(u32), sizeof(u64));
1241 size -= sizeof(u32);
1242 if (WARN_ONCE(size > FTRACE_MAX_PROFILE_SIZE,
1243 "profile buffer not large enough"))
1244 return 0;
1245
1246 /*
1247 * Protect the non nmi buffer
1248 * This also protects the rcu read side
1249 */
1250 local_irq_save(irq_flags);
1251
1252 rctx = perf_swevent_get_recursion_context();
1253 if (rctx < 0)
1254 goto end_recursion;
1255
1256 __cpu = smp_processor_id();
1257
1258 if (in_nmi())
1259 trace_buf = rcu_dereference(perf_trace_buf_nmi);
1260 else
1261 trace_buf = rcu_dereference(perf_trace_buf);
1262
1263 if (!trace_buf)
1264 goto end;
1265
1266 raw_data = per_cpu_ptr(trace_buf, __cpu);
1267
1268 /* Zero dead bytes from alignment to avoid buffer leak to userspace */
1269 *(u64 *)(&raw_data[size - sizeof(u64)]) = 0ULL;
1270 entry = (struct kprobe_trace_entry *)raw_data;
1271 ent = &entry->ent;
1272
1273 tracing_generic_entry_update(ent, irq_flags, pc);
1274 ent->type = call->id;
1275 entry->nargs = tp->nr_args;
1276 entry->ip = (unsigned long)kp->addr;
1277 for (i = 0; i < tp->nr_args; i++)
1278 entry->args[i] = call_fetch(&tp->args[i].fetch, regs);
1279 perf_tp_event(call->id, entry->ip, 1, entry, size);
1280
1281end:
1282 perf_swevent_put_recursion_context(rctx);
1283end_recursion:
1284 local_irq_restore(irq_flags);
1285
1286 return 0;
1287}
1288
1289/* Kretprobe profile handler */
1290static __kprobes int kretprobe_profile_func(struct kretprobe_instance *ri,
1291 struct pt_regs *regs)
1292{
1293 struct trace_probe *tp = container_of(ri->rp, struct trace_probe, rp);
1294 struct ftrace_event_call *call = &tp->call;
1295 struct kretprobe_trace_entry *entry;
1296 struct trace_entry *ent;
1297 int size, __size, i, pc, __cpu;
1298 unsigned long irq_flags;
1299 char *trace_buf;
1300 char *raw_data;
1301 int rctx;
1302
1303 pc = preempt_count();
1304 __size = SIZEOF_KRETPROBE_TRACE_ENTRY(tp->nr_args);
1305 size = ALIGN(__size + sizeof(u32), sizeof(u64));
1306 size -= sizeof(u32);
1307 if (WARN_ONCE(size > FTRACE_MAX_PROFILE_SIZE,
1308 "profile buffer not large enough"))
1309 return 0;
1310
1311 /*
1312 * Protect the non nmi buffer
1313 * This also protects the rcu read side
1314 */
1315 local_irq_save(irq_flags);
1316
1317 rctx = perf_swevent_get_recursion_context();
1318 if (rctx < 0)
1319 goto end_recursion;
1320
1321 __cpu = smp_processor_id();
1322
1323 if (in_nmi())
1324 trace_buf = rcu_dereference(perf_trace_buf_nmi);
1325 else
1326 trace_buf = rcu_dereference(perf_trace_buf);
1327
1328 if (!trace_buf)
1329 goto end;
1330
1331 raw_data = per_cpu_ptr(trace_buf, __cpu);
1332
1333 /* Zero dead bytes from alignment to avoid buffer leak to userspace */
1334 *(u64 *)(&raw_data[size - sizeof(u64)]) = 0ULL;
1335 entry = (struct kretprobe_trace_entry *)raw_data;
1336 ent = &entry->ent;
1337
1338 tracing_generic_entry_update(ent, irq_flags, pc);
1339 ent->type = call->id;
1340 entry->nargs = tp->nr_args;
1341 entry->func = (unsigned long)tp->rp.kp.addr;
1342 entry->ret_ip = (unsigned long)ri->ret_addr;
1343 for (i = 0; i < tp->nr_args; i++)
1344 entry->args[i] = call_fetch(&tp->args[i].fetch, regs);
1345 perf_tp_event(call->id, entry->ret_ip, 1, entry, size);
1346
1347end:
1348 perf_swevent_put_recursion_context(rctx);
1349end_recursion:
1350 local_irq_restore(irq_flags);
1351
1352 return 0;
1353}
1354
1355static int probe_profile_enable(struct ftrace_event_call *call)
1356{
1357 struct trace_probe *tp = (struct trace_probe *)call->data;
1358
1359 tp->flags |= TP_FLAG_PROFILE;
1360
1361 if (probe_is_return(tp))
1362 return enable_kretprobe(&tp->rp);
1363 else
1364 return enable_kprobe(&tp->rp.kp);
1365}
1366
1367static void probe_profile_disable(struct ftrace_event_call *call)
1368{
1369 struct trace_probe *tp = (struct trace_probe *)call->data;
1370
1371 tp->flags &= ~TP_FLAG_PROFILE;
1372
1373 if (!(tp->flags & TP_FLAG_TRACE)) {
1374 if (probe_is_return(tp))
1375 disable_kretprobe(&tp->rp);
1376 else
1377 disable_kprobe(&tp->rp.kp);
1378 }
1379}
1380#endif /* CONFIG_EVENT_PROFILE */
1381
1382
1383static __kprobes
1384int kprobe_dispatcher(struct kprobe *kp, struct pt_regs *regs)
1385{
1386 struct trace_probe *tp = container_of(kp, struct trace_probe, rp.kp);
1387
1388 if (tp->flags & TP_FLAG_TRACE)
1389 kprobe_trace_func(kp, regs);
1390#ifdef CONFIG_EVENT_PROFILE
1391 if (tp->flags & TP_FLAG_PROFILE)
1392 kprobe_profile_func(kp, regs);
1393#endif /* CONFIG_EVENT_PROFILE */
1394 return 0; /* We don't tweek kernel, so just return 0 */
1395}
1396
1397static __kprobes
1398int kretprobe_dispatcher(struct kretprobe_instance *ri, struct pt_regs *regs)
1399{
1400 struct trace_probe *tp = container_of(ri->rp, struct trace_probe, rp);
1401
1402 if (tp->flags & TP_FLAG_TRACE)
1403 kretprobe_trace_func(ri, regs);
1404#ifdef CONFIG_EVENT_PROFILE
1405 if (tp->flags & TP_FLAG_PROFILE)
1406 kretprobe_profile_func(ri, regs);
1407#endif /* CONFIG_EVENT_PROFILE */
1408 return 0; /* We don't tweek kernel, so just return 0 */
1409}
1410
1411static int register_probe_event(struct trace_probe *tp)
1412{
1413 struct ftrace_event_call *call = &tp->call;
1414 int ret;
1415
1416 /* Initialize ftrace_event_call */
1417 if (probe_is_return(tp)) {
1418 tp->event.trace = print_kretprobe_event;
1419 call->raw_init = probe_event_raw_init;
1420 call->show_format = kretprobe_event_show_format;
1421 call->define_fields = kretprobe_event_define_fields;
1422 } else {
1423 tp->event.trace = print_kprobe_event;
1424 call->raw_init = probe_event_raw_init;
1425 call->show_format = kprobe_event_show_format;
1426 call->define_fields = kprobe_event_define_fields;
1427 }
1428 call->event = &tp->event;
1429 call->id = register_ftrace_event(&tp->event);
1430 if (!call->id)
1431 return -ENODEV;
1432 call->enabled = 0;
1433 call->regfunc = probe_event_enable;
1434 call->unregfunc = probe_event_disable;
1435
1436#ifdef CONFIG_EVENT_PROFILE
1437 atomic_set(&call->profile_count, -1);
1438 call->profile_enable = probe_profile_enable;
1439 call->profile_disable = probe_profile_disable;
1440#endif
1441 call->data = tp;
1442 ret = trace_add_event_call(call);
1443 if (ret) {
1444 pr_info("Failed to register kprobe event: %s\n", call->name);
1445 unregister_ftrace_event(&tp->event);
1446 }
1447 return ret;
1448}
1449
1450static void unregister_probe_event(struct trace_probe *tp)
1451{
1452 /* tp->event is unregistered in trace_remove_event_call() */
1453 trace_remove_event_call(&tp->call);
1454}
1455
1456/* Make a debugfs interface for controling probe points */
1457static __init int init_kprobe_trace(void)
1458{
1459 struct dentry *d_tracer;
1460 struct dentry *entry;
1461
1462 d_tracer = tracing_init_dentry();
1463 if (!d_tracer)
1464 return 0;
1465
1466 entry = debugfs_create_file("kprobe_events", 0644, d_tracer,
1467 NULL, &kprobe_events_ops);
1468
1469 /* Event list interface */
1470 if (!entry)
1471 pr_warning("Could not create debugfs "
1472 "'kprobe_events' entry\n");
1473
1474 /* Profile interface */
1475 entry = debugfs_create_file("kprobe_profile", 0444, d_tracer,
1476 NULL, &kprobe_profile_ops);
1477
1478 if (!entry)
1479 pr_warning("Could not create debugfs "
1480 "'kprobe_profile' entry\n");
1481 return 0;
1482}
1483fs_initcall(init_kprobe_trace);
1484
1485
1486#ifdef CONFIG_FTRACE_STARTUP_TEST
1487
1488static int kprobe_trace_selftest_target(int a1, int a2, int a3,
1489 int a4, int a5, int a6)
1490{
1491 return a1 + a2 + a3 + a4 + a5 + a6;
1492}
1493
1494static __init int kprobe_trace_self_tests_init(void)
1495{
1496 int ret;
1497 int (*target)(int, int, int, int, int, int);
1498
1499 target = kprobe_trace_selftest_target;
1500
1501 pr_info("Testing kprobe tracing: ");
1502
1503 ret = command_trace_probe("p:testprobe kprobe_trace_selftest_target "
1504 "$arg1 $arg2 $arg3 $arg4 $stack $stack0");
1505 if (WARN_ON_ONCE(ret))
1506 pr_warning("error enabling function entry\n");
1507
1508 ret = command_trace_probe("r:testprobe2 kprobe_trace_selftest_target "
1509 "$retval");
1510 if (WARN_ON_ONCE(ret))
1511 pr_warning("error enabling function return\n");
1512
1513 ret = target(1, 2, 3, 4, 5, 6);
1514
1515 cleanup_all_probes();
1516
1517 pr_cont("OK\n");
1518 return 0;
1519}
1520
1521late_initcall(kprobe_trace_self_tests_init);
1522
1523#endif
diff --git a/kernel/trace/trace_ksym.c b/kernel/trace/trace_ksym.c
new file mode 100644
index 000000000000..ddfa0fd43bc0
--- /dev/null
+++ b/kernel/trace/trace_ksym.c
@@ -0,0 +1,550 @@
1/*
2 * trace_ksym.c - Kernel Symbol Tracer
3 *
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License as published by
6 * the Free Software Foundation; either version 2 of the License, or
7 * (at your option) any later version.
8 *
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write to the Free Software
16 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
17 *
18 * Copyright (C) IBM Corporation, 2009
19 */
20
21#include <linux/kallsyms.h>
22#include <linux/uaccess.h>
23#include <linux/debugfs.h>
24#include <linux/ftrace.h>
25#include <linux/module.h>
26#include <linux/fs.h>
27
28#include "trace_output.h"
29#include "trace_stat.h"
30#include "trace.h"
31
32#include <linux/hw_breakpoint.h>
33#include <asm/hw_breakpoint.h>
34
35/*
36 * For now, let us restrict the no. of symbols traced simultaneously to number
37 * of available hardware breakpoint registers.
38 */
39#define KSYM_TRACER_MAX HBP_NUM
40
41#define KSYM_TRACER_OP_LEN 3 /* rw- */
42
43struct trace_ksym {
44 struct perf_event **ksym_hbp;
45 struct perf_event_attr attr;
46#ifdef CONFIG_PROFILE_KSYM_TRACER
47 unsigned long counter;
48#endif
49 struct hlist_node ksym_hlist;
50};
51
52static struct trace_array *ksym_trace_array;
53
54static unsigned int ksym_filter_entry_count;
55static unsigned int ksym_tracing_enabled;
56
57static HLIST_HEAD(ksym_filter_head);
58
59static DEFINE_MUTEX(ksym_tracer_mutex);
60
61#ifdef CONFIG_PROFILE_KSYM_TRACER
62
63#define MAX_UL_INT 0xffffffff
64
65void ksym_collect_stats(unsigned long hbp_hit_addr)
66{
67 struct hlist_node *node;
68 struct trace_ksym *entry;
69
70 rcu_read_lock();
71 hlist_for_each_entry_rcu(entry, node, &ksym_filter_head, ksym_hlist) {
72 if ((entry->attr.bp_addr == hbp_hit_addr) &&
73 (entry->counter <= MAX_UL_INT)) {
74 entry->counter++;
75 break;
76 }
77 }
78 rcu_read_unlock();
79}
80#endif /* CONFIG_PROFILE_KSYM_TRACER */
81
82void ksym_hbp_handler(struct perf_event *hbp, void *data)
83{
84 struct ring_buffer_event *event;
85 struct ksym_trace_entry *entry;
86 struct pt_regs *regs = data;
87 struct ring_buffer *buffer;
88 int pc;
89
90 if (!ksym_tracing_enabled)
91 return;
92
93 buffer = ksym_trace_array->buffer;
94
95 pc = preempt_count();
96
97 event = trace_buffer_lock_reserve(buffer, TRACE_KSYM,
98 sizeof(*entry), 0, pc);
99 if (!event)
100 return;
101
102 entry = ring_buffer_event_data(event);
103 entry->ip = instruction_pointer(regs);
104 entry->type = hw_breakpoint_type(hbp);
105 entry->addr = hw_breakpoint_addr(hbp);
106 strlcpy(entry->cmd, current->comm, TASK_COMM_LEN);
107
108#ifdef CONFIG_PROFILE_KSYM_TRACER
109 ksym_collect_stats(hw_breakpoint_addr(hbp));
110#endif /* CONFIG_PROFILE_KSYM_TRACER */
111
112 trace_buffer_unlock_commit(buffer, event, 0, pc);
113}
114
115/* Valid access types are represented as
116 *
117 * rw- : Set Read/Write Access Breakpoint
118 * -w- : Set Write Access Breakpoint
119 * --- : Clear Breakpoints
120 * --x : Set Execution Break points (Not available yet)
121 *
122 */
123static int ksym_trace_get_access_type(char *str)
124{
125 int access = 0;
126
127 if (str[0] == 'r')
128 access |= HW_BREAKPOINT_R;
129
130 if (str[1] == 'w')
131 access |= HW_BREAKPOINT_W;
132
133 if (str[2] == 'x')
134 access |= HW_BREAKPOINT_X;
135
136 switch (access) {
137 case HW_BREAKPOINT_R:
138 case HW_BREAKPOINT_W:
139 case HW_BREAKPOINT_W | HW_BREAKPOINT_R:
140 return access;
141 default:
142 return -EINVAL;
143 }
144}
145
146/*
147 * There can be several possible malformed requests and we attempt to capture
148 * all of them. We enumerate some of the rules
149 * 1. We will not allow kernel symbols with ':' since it is used as a delimiter.
150 * i.e. multiple ':' symbols disallowed. Possible uses are of the form
151 * <module>:<ksym_name>:<op>.
152 * 2. No delimiter symbol ':' in the input string
153 * 3. Spurious operator symbols or symbols not in their respective positions
154 * 4. <ksym_name>:--- i.e. clear breakpoint request when ksym_name not in file
155 * 5. Kernel symbol not a part of /proc/kallsyms
156 * 6. Duplicate requests
157 */
158static int parse_ksym_trace_str(char *input_string, char **ksymname,
159 unsigned long *addr)
160{
161 int ret;
162
163 *ksymname = strsep(&input_string, ":");
164 *addr = kallsyms_lookup_name(*ksymname);
165
166 /* Check for malformed request: (2), (1) and (5) */
167 if ((!input_string) ||
168 (strlen(input_string) != KSYM_TRACER_OP_LEN) ||
169 (*addr == 0))
170 return -EINVAL;;
171
172 ret = ksym_trace_get_access_type(input_string);
173
174 return ret;
175}
176
177int process_new_ksym_entry(char *ksymname, int op, unsigned long addr)
178{
179 struct trace_ksym *entry;
180 int ret = -ENOMEM;
181
182 if (ksym_filter_entry_count >= KSYM_TRACER_MAX) {
183 printk(KERN_ERR "ksym_tracer: Maximum limit:(%d) reached. No"
184 " new requests for tracing can be accepted now.\n",
185 KSYM_TRACER_MAX);
186 return -ENOSPC;
187 }
188
189 entry = kzalloc(sizeof(struct trace_ksym), GFP_KERNEL);
190 if (!entry)
191 return -ENOMEM;
192
193 hw_breakpoint_init(&entry->attr);
194
195 entry->attr.bp_type = op;
196 entry->attr.bp_addr = addr;
197 entry->attr.bp_len = HW_BREAKPOINT_LEN_4;
198
199 ret = -EAGAIN;
200 entry->ksym_hbp = register_wide_hw_breakpoint(&entry->attr,
201 ksym_hbp_handler);
202
203 if (IS_ERR(entry->ksym_hbp)) {
204 ret = PTR_ERR(entry->ksym_hbp);
205 printk(KERN_INFO "ksym_tracer request failed. Try again"
206 " later!!\n");
207 goto err;
208 }
209
210 hlist_add_head_rcu(&(entry->ksym_hlist), &ksym_filter_head);
211 ksym_filter_entry_count++;
212
213 return 0;
214
215err:
216 kfree(entry);
217
218 return ret;
219}
220
221static ssize_t ksym_trace_filter_read(struct file *filp, char __user *ubuf,
222 size_t count, loff_t *ppos)
223{
224 struct trace_ksym *entry;
225 struct hlist_node *node;
226 struct trace_seq *s;
227 ssize_t cnt = 0;
228 int ret;
229
230 s = kmalloc(sizeof(*s), GFP_KERNEL);
231 if (!s)
232 return -ENOMEM;
233 trace_seq_init(s);
234
235 mutex_lock(&ksym_tracer_mutex);
236
237 hlist_for_each_entry(entry, node, &ksym_filter_head, ksym_hlist) {
238 ret = trace_seq_printf(s, "%pS:", (void *)entry->attr.bp_addr);
239 if (entry->attr.bp_type == HW_BREAKPOINT_R)
240 ret = trace_seq_puts(s, "r--\n");
241 else if (entry->attr.bp_type == HW_BREAKPOINT_W)
242 ret = trace_seq_puts(s, "-w-\n");
243 else if (entry->attr.bp_type == (HW_BREAKPOINT_W | HW_BREAKPOINT_R))
244 ret = trace_seq_puts(s, "rw-\n");
245 WARN_ON_ONCE(!ret);
246 }
247
248 cnt = simple_read_from_buffer(ubuf, count, ppos, s->buffer, s->len);
249
250 mutex_unlock(&ksym_tracer_mutex);
251
252 kfree(s);
253
254 return cnt;
255}
256
257static void __ksym_trace_reset(void)
258{
259 struct trace_ksym *entry;
260 struct hlist_node *node, *node1;
261
262 mutex_lock(&ksym_tracer_mutex);
263 hlist_for_each_entry_safe(entry, node, node1, &ksym_filter_head,
264 ksym_hlist) {
265 unregister_wide_hw_breakpoint(entry->ksym_hbp);
266 ksym_filter_entry_count--;
267 hlist_del_rcu(&(entry->ksym_hlist));
268 synchronize_rcu();
269 kfree(entry);
270 }
271 mutex_unlock(&ksym_tracer_mutex);
272}
273
274static ssize_t ksym_trace_filter_write(struct file *file,
275 const char __user *buffer,
276 size_t count, loff_t *ppos)
277{
278 struct trace_ksym *entry;
279 struct hlist_node *node;
280 char *input_string, *ksymname = NULL;
281 unsigned long ksym_addr = 0;
282 int ret, op, changed = 0;
283
284 input_string = kzalloc(count + 1, GFP_KERNEL);
285 if (!input_string)
286 return -ENOMEM;
287
288 if (copy_from_user(input_string, buffer, count)) {
289 kfree(input_string);
290 return -EFAULT;
291 }
292 input_string[count] = '\0';
293
294 strstrip(input_string);
295
296 /*
297 * Clear all breakpoints if:
298 * 1: echo > ksym_trace_filter
299 * 2: echo 0 > ksym_trace_filter
300 * 3: echo "*:---" > ksym_trace_filter
301 */
302 if (!input_string[0] || !strcmp(input_string, "0") ||
303 !strcmp(input_string, "*:---")) {
304 __ksym_trace_reset();
305 kfree(input_string);
306 return count;
307 }
308
309 ret = op = parse_ksym_trace_str(input_string, &ksymname, &ksym_addr);
310 if (ret < 0) {
311 kfree(input_string);
312 return ret;
313 }
314
315 mutex_lock(&ksym_tracer_mutex);
316
317 ret = -EINVAL;
318 hlist_for_each_entry(entry, node, &ksym_filter_head, ksym_hlist) {
319 if (entry->attr.bp_addr == ksym_addr) {
320 /* Check for malformed request: (6) */
321 if (entry->attr.bp_type != op)
322 changed = 1;
323 else
324 goto out;
325 break;
326 }
327 }
328 if (changed) {
329 unregister_wide_hw_breakpoint(entry->ksym_hbp);
330 entry->attr.bp_type = op;
331 ret = 0;
332 if (op > 0) {
333 entry->ksym_hbp =
334 register_wide_hw_breakpoint(&entry->attr,
335 ksym_hbp_handler);
336 if (IS_ERR(entry->ksym_hbp))
337 ret = PTR_ERR(entry->ksym_hbp);
338 else
339 goto out;
340 }
341 /* Error or "symbol:---" case: drop it */
342 ksym_filter_entry_count--;
343 hlist_del_rcu(&(entry->ksym_hlist));
344 synchronize_rcu();
345 kfree(entry);
346 goto out;
347 } else {
348 /* Check for malformed request: (4) */
349 if (op == 0)
350 goto out;
351 ret = process_new_ksym_entry(ksymname, op, ksym_addr);
352 }
353out:
354 mutex_unlock(&ksym_tracer_mutex);
355
356 kfree(input_string);
357
358 if (!ret)
359 ret = count;
360 return ret;
361}
362
363static const struct file_operations ksym_tracing_fops = {
364 .open = tracing_open_generic,
365 .read = ksym_trace_filter_read,
366 .write = ksym_trace_filter_write,
367};
368
369static void ksym_trace_reset(struct trace_array *tr)
370{
371 ksym_tracing_enabled = 0;
372 __ksym_trace_reset();
373}
374
375static int ksym_trace_init(struct trace_array *tr)
376{
377 int cpu, ret = 0;
378
379 for_each_online_cpu(cpu)
380 tracing_reset(tr, cpu);
381 ksym_tracing_enabled = 1;
382 ksym_trace_array = tr;
383
384 return ret;
385}
386
387static void ksym_trace_print_header(struct seq_file *m)
388{
389 seq_puts(m,
390 "# TASK-PID CPU# Symbol "
391 "Type Function\n");
392 seq_puts(m,
393 "# | | | "
394 " | |\n");
395}
396
397static enum print_line_t ksym_trace_output(struct trace_iterator *iter)
398{
399 struct trace_entry *entry = iter->ent;
400 struct trace_seq *s = &iter->seq;
401 struct ksym_trace_entry *field;
402 char str[KSYM_SYMBOL_LEN];
403 int ret;
404
405 if (entry->type != TRACE_KSYM)
406 return TRACE_TYPE_UNHANDLED;
407
408 trace_assign_type(field, entry);
409
410 ret = trace_seq_printf(s, "%11s-%-5d [%03d] %pS", field->cmd,
411 entry->pid, iter->cpu, (char *)field->addr);
412 if (!ret)
413 return TRACE_TYPE_PARTIAL_LINE;
414
415 switch (field->type) {
416 case HW_BREAKPOINT_R:
417 ret = trace_seq_printf(s, " R ");
418 break;
419 case HW_BREAKPOINT_W:
420 ret = trace_seq_printf(s, " W ");
421 break;
422 case HW_BREAKPOINT_R | HW_BREAKPOINT_W:
423 ret = trace_seq_printf(s, " RW ");
424 break;
425 default:
426 return TRACE_TYPE_PARTIAL_LINE;
427 }
428
429 if (!ret)
430 return TRACE_TYPE_PARTIAL_LINE;
431
432 sprint_symbol(str, field->ip);
433 ret = trace_seq_printf(s, "%s\n", str);
434 if (!ret)
435 return TRACE_TYPE_PARTIAL_LINE;
436
437 return TRACE_TYPE_HANDLED;
438}
439
440struct tracer ksym_tracer __read_mostly =
441{
442 .name = "ksym_tracer",
443 .init = ksym_trace_init,
444 .reset = ksym_trace_reset,
445#ifdef CONFIG_FTRACE_SELFTEST
446 .selftest = trace_selftest_startup_ksym,
447#endif
448 .print_header = ksym_trace_print_header,
449 .print_line = ksym_trace_output
450};
451
452__init static int init_ksym_trace(void)
453{
454 struct dentry *d_tracer;
455 struct dentry *entry;
456
457 d_tracer = tracing_init_dentry();
458 ksym_filter_entry_count = 0;
459
460 entry = debugfs_create_file("ksym_trace_filter", 0644, d_tracer,
461 NULL, &ksym_tracing_fops);
462 if (!entry)
463 pr_warning("Could not create debugfs "
464 "'ksym_trace_filter' file\n");
465
466 return register_tracer(&ksym_tracer);
467}
468device_initcall(init_ksym_trace);
469
470
471#ifdef CONFIG_PROFILE_KSYM_TRACER
472static int ksym_tracer_stat_headers(struct seq_file *m)
473{
474 seq_puts(m, " Access Type ");
475 seq_puts(m, " Symbol Counter\n");
476 seq_puts(m, " ----------- ");
477 seq_puts(m, " ------ -------\n");
478 return 0;
479}
480
481static int ksym_tracer_stat_show(struct seq_file *m, void *v)
482{
483 struct hlist_node *stat = v;
484 struct trace_ksym *entry;
485 int access_type = 0;
486 char fn_name[KSYM_NAME_LEN];
487
488 entry = hlist_entry(stat, struct trace_ksym, ksym_hlist);
489
490 access_type = entry->attr.bp_type;
491
492 switch (access_type) {
493 case HW_BREAKPOINT_R:
494 seq_puts(m, " R ");
495 break;
496 case HW_BREAKPOINT_W:
497 seq_puts(m, " W ");
498 break;
499 case HW_BREAKPOINT_R | HW_BREAKPOINT_W:
500 seq_puts(m, " RW ");
501 break;
502 default:
503 seq_puts(m, " NA ");
504 }
505
506 if (lookup_symbol_name(entry->attr.bp_addr, fn_name) >= 0)
507 seq_printf(m, " %-36s", fn_name);
508 else
509 seq_printf(m, " %-36s", "<NA>");
510 seq_printf(m, " %15lu\n", entry->counter);
511
512 return 0;
513}
514
515static void *ksym_tracer_stat_start(struct tracer_stat *trace)
516{
517 return ksym_filter_head.first;
518}
519
520static void *
521ksym_tracer_stat_next(void *v, int idx)
522{
523 struct hlist_node *stat = v;
524
525 return stat->next;
526}
527
528static struct tracer_stat ksym_tracer_stats = {
529 .name = "ksym_tracer",
530 .stat_start = ksym_tracer_stat_start,
531 .stat_next = ksym_tracer_stat_next,
532 .stat_headers = ksym_tracer_stat_headers,
533 .stat_show = ksym_tracer_stat_show
534};
535
536__init static int ksym_tracer_stat_init(void)
537{
538 int ret;
539
540 ret = register_stat_tracer(&ksym_tracer_stats);
541 if (ret) {
542 printk(KERN_WARNING "Warning: could not register "
543 "ksym tracer stats\n");
544 return 1;
545 }
546
547 return 0;
548}
549fs_initcall(ksym_tracer_stat_init);
550#endif /* CONFIG_PROFILE_KSYM_TRACER */
diff --git a/kernel/trace/trace_selftest.c b/kernel/trace/trace_selftest.c
index d2cdbabb4ead..dc98309e839a 100644
--- a/kernel/trace/trace_selftest.c
+++ b/kernel/trace/trace_selftest.c
@@ -17,6 +17,7 @@ static inline int trace_valid_entry(struct trace_entry *entry)
17 case TRACE_GRAPH_ENT: 17 case TRACE_GRAPH_ENT:
18 case TRACE_GRAPH_RET: 18 case TRACE_GRAPH_RET:
19 case TRACE_HW_BRANCHES: 19 case TRACE_HW_BRANCHES:
20 case TRACE_KSYM:
20 return 1; 21 return 1;
21 } 22 }
22 return 0; 23 return 0;
@@ -808,3 +809,57 @@ trace_selftest_startup_hw_branches(struct tracer *trace,
808 return ret; 809 return ret;
809} 810}
810#endif /* CONFIG_HW_BRANCH_TRACER */ 811#endif /* CONFIG_HW_BRANCH_TRACER */
812
813#ifdef CONFIG_KSYM_TRACER
814static int ksym_selftest_dummy;
815
816int
817trace_selftest_startup_ksym(struct tracer *trace, struct trace_array *tr)
818{
819 unsigned long count;
820 int ret;
821
822 /* start the tracing */
823 ret = tracer_init(trace, tr);
824 if (ret) {
825 warn_failed_init_tracer(trace, ret);
826 return ret;
827 }
828
829 ksym_selftest_dummy = 0;
830 /* Register the read-write tracing request */
831
832 ret = process_new_ksym_entry("ksym_selftest_dummy",
833 HW_BREAKPOINT_R | HW_BREAKPOINT_W,
834 (unsigned long)(&ksym_selftest_dummy));
835
836 if (ret < 0) {
837 printk(KERN_CONT "ksym_trace read-write startup test failed\n");
838 goto ret_path;
839 }
840 /* Perform a read and a write operation over the dummy variable to
841 * trigger the tracer
842 */
843 if (ksym_selftest_dummy == 0)
844 ksym_selftest_dummy++;
845
846 /* stop the tracing. */
847 tracing_stop();
848 /* check the trace buffer */
849 ret = trace_test_buffer(tr, &count);
850 trace->reset(tr);
851 tracing_start();
852
853 /* read & write operations - one each is performed on the dummy variable
854 * triggering two entries in the trace buffer
855 */
856 if (!ret && count != 2) {
857 printk(KERN_CONT "Ksym tracer startup test failed");
858 ret = -1;
859 }
860
861ret_path:
862 return ret;
863}
864#endif /* CONFIG_KSYM_TRACER */
865
diff --git a/kernel/trace/trace_syscalls.c b/kernel/trace/trace_syscalls.c
index ddee9c593732..57501d90096a 100644
--- a/kernel/trace/trace_syscalls.c
+++ b/kernel/trace/trace_syscalls.c
@@ -51,32 +51,6 @@ static struct syscall_metadata *syscall_nr_to_meta(int nr)
51 return syscalls_metadata[nr]; 51 return syscalls_metadata[nr];
52} 52}
53 53
54int syscall_name_to_nr(char *name)
55{
56 int i;
57
58 if (!syscalls_metadata)
59 return -1;
60
61 for (i = 0; i < NR_syscalls; i++) {
62 if (syscalls_metadata[i]) {
63 if (!strcmp(syscalls_metadata[i]->name, name))
64 return i;
65 }
66 }
67 return -1;
68}
69
70void set_syscall_enter_id(int num, int id)
71{
72 syscalls_metadata[num]->enter_id = id;
73}
74
75void set_syscall_exit_id(int num, int id)
76{
77 syscalls_metadata[num]->exit_id = id;
78}
79
80enum print_line_t 54enum print_line_t
81print_syscall_enter(struct trace_iterator *iter, int flags) 55print_syscall_enter(struct trace_iterator *iter, int flags)
82{ 56{
@@ -93,7 +67,7 @@ print_syscall_enter(struct trace_iterator *iter, int flags)
93 if (!entry) 67 if (!entry)
94 goto end; 68 goto end;
95 69
96 if (entry->enter_id != ent->type) { 70 if (entry->enter_event->id != ent->type) {
97 WARN_ON_ONCE(1); 71 WARN_ON_ONCE(1);
98 goto end; 72 goto end;
99 } 73 }
@@ -148,7 +122,7 @@ print_syscall_exit(struct trace_iterator *iter, int flags)
148 return TRACE_TYPE_HANDLED; 122 return TRACE_TYPE_HANDLED;
149 } 123 }
150 124
151 if (entry->exit_id != ent->type) { 125 if (entry->exit_event->id != ent->type) {
152 WARN_ON_ONCE(1); 126 WARN_ON_ONCE(1);
153 return TRACE_TYPE_UNHANDLED; 127 return TRACE_TYPE_UNHANDLED;
154 } 128 }
@@ -166,24 +140,19 @@ extern char *__bad_type_size(void);
166#define SYSCALL_FIELD(type, name) \ 140#define SYSCALL_FIELD(type, name) \
167 sizeof(type) != sizeof(trace.name) ? \ 141 sizeof(type) != sizeof(trace.name) ? \
168 __bad_type_size() : \ 142 __bad_type_size() : \
169 #type, #name, offsetof(typeof(trace), name), sizeof(trace.name) 143 #type, #name, offsetof(typeof(trace), name), \
144 sizeof(trace.name), is_signed_type(type)
170 145
171int syscall_enter_format(struct ftrace_event_call *call, struct trace_seq *s) 146int syscall_enter_format(struct ftrace_event_call *call, struct trace_seq *s)
172{ 147{
173 int i; 148 int i;
174 int nr;
175 int ret; 149 int ret;
176 struct syscall_metadata *entry; 150 struct syscall_metadata *entry = call->data;
177 struct syscall_trace_enter trace; 151 struct syscall_trace_enter trace;
178 int offset = offsetof(struct syscall_trace_enter, args); 152 int offset = offsetof(struct syscall_trace_enter, args);
179 153
180 nr = syscall_name_to_nr(call->data); 154 ret = trace_seq_printf(s, "\tfield:%s %s;\toffset:%zu;\tsize:%zu;"
181 entry = syscall_nr_to_meta(nr); 155 "\tsigned:%u;\n",
182
183 if (!entry)
184 return 0;
185
186 ret = trace_seq_printf(s, "\tfield:%s %s;\toffset:%zu;\tsize:%zu;\n",
187 SYSCALL_FIELD(int, nr)); 156 SYSCALL_FIELD(int, nr));
188 if (!ret) 157 if (!ret)
189 return 0; 158 return 0;
@@ -193,8 +162,10 @@ int syscall_enter_format(struct ftrace_event_call *call, struct trace_seq *s)
193 entry->args[i]); 162 entry->args[i]);
194 if (!ret) 163 if (!ret)
195 return 0; 164 return 0;
196 ret = trace_seq_printf(s, "\toffset:%d;\tsize:%zu;\n", offset, 165 ret = trace_seq_printf(s, "\toffset:%d;\tsize:%zu;"
197 sizeof(unsigned long)); 166 "\tsigned:%u;\n", offset,
167 sizeof(unsigned long),
168 is_signed_type(unsigned long));
198 if (!ret) 169 if (!ret)
199 return 0; 170 return 0;
200 offset += sizeof(unsigned long); 171 offset += sizeof(unsigned long);
@@ -226,8 +197,10 @@ int syscall_exit_format(struct ftrace_event_call *call, struct trace_seq *s)
226 struct syscall_trace_exit trace; 197 struct syscall_trace_exit trace;
227 198
228 ret = trace_seq_printf(s, 199 ret = trace_seq_printf(s,
229 "\tfield:%s %s;\toffset:%zu;\tsize:%zu;\n" 200 "\tfield:%s %s;\toffset:%zu;\tsize:%zu;"
230 "\tfield:%s %s;\toffset:%zu;\tsize:%zu;\n", 201 "\tsigned:%u;\n"
202 "\tfield:%s %s;\toffset:%zu;\tsize:%zu;"
203 "\tsigned:%u;\n",
231 SYSCALL_FIELD(int, nr), 204 SYSCALL_FIELD(int, nr),
232 SYSCALL_FIELD(long, ret)); 205 SYSCALL_FIELD(long, ret));
233 if (!ret) 206 if (!ret)
@@ -239,22 +212,19 @@ int syscall_exit_format(struct ftrace_event_call *call, struct trace_seq *s)
239int syscall_enter_define_fields(struct ftrace_event_call *call) 212int syscall_enter_define_fields(struct ftrace_event_call *call)
240{ 213{
241 struct syscall_trace_enter trace; 214 struct syscall_trace_enter trace;
242 struct syscall_metadata *meta; 215 struct syscall_metadata *meta = call->data;
243 int ret; 216 int ret;
244 int nr;
245 int i; 217 int i;
246 int offset = offsetof(typeof(trace), args); 218 int offset = offsetof(typeof(trace), args);
247 219
248 nr = syscall_name_to_nr(call->data);
249 meta = syscall_nr_to_meta(nr);
250
251 if (!meta)
252 return 0;
253
254 ret = trace_define_common_fields(call); 220 ret = trace_define_common_fields(call);
255 if (ret) 221 if (ret)
256 return ret; 222 return ret;
257 223
224 ret = trace_define_field(call, SYSCALL_FIELD(int, nr), FILTER_OTHER);
225 if (ret)
226 return ret;
227
258 for (i = 0; i < meta->nb_args; i++) { 228 for (i = 0; i < meta->nb_args; i++) {
259 ret = trace_define_field(call, meta->types[i], 229 ret = trace_define_field(call, meta->types[i],
260 meta->args[i], offset, 230 meta->args[i], offset,
@@ -275,7 +245,11 @@ int syscall_exit_define_fields(struct ftrace_event_call *call)
275 if (ret) 245 if (ret)
276 return ret; 246 return ret;
277 247
278 ret = trace_define_field(call, SYSCALL_FIELD(long, ret), 0, 248 ret = trace_define_field(call, SYSCALL_FIELD(int, nr), FILTER_OTHER);
249 if (ret)
250 return ret;
251
252 ret = trace_define_field(call, SYSCALL_FIELD(long, ret),
279 FILTER_OTHER); 253 FILTER_OTHER);
280 254
281 return ret; 255 return ret;
@@ -302,8 +276,8 @@ void ftrace_syscall_enter(struct pt_regs *regs, long id)
302 276
303 size = sizeof(*entry) + sizeof(unsigned long) * sys_data->nb_args; 277 size = sizeof(*entry) + sizeof(unsigned long) * sys_data->nb_args;
304 278
305 event = trace_current_buffer_lock_reserve(&buffer, sys_data->enter_id, 279 event = trace_current_buffer_lock_reserve(&buffer,
306 size, 0, 0); 280 sys_data->enter_event->id, size, 0, 0);
307 if (!event) 281 if (!event)
308 return; 282 return;
309 283
@@ -334,8 +308,8 @@ void ftrace_syscall_exit(struct pt_regs *regs, long ret)
334 if (!sys_data) 308 if (!sys_data)
335 return; 309 return;
336 310
337 event = trace_current_buffer_lock_reserve(&buffer, sys_data->exit_id, 311 event = trace_current_buffer_lock_reserve(&buffer,
338 sizeof(*entry), 0, 0); 312 sys_data->exit_event->id, sizeof(*entry), 0, 0);
339 if (!event) 313 if (!event)
340 return; 314 return;
341 315
@@ -348,14 +322,12 @@ void ftrace_syscall_exit(struct pt_regs *regs, long ret)
348 trace_current_buffer_unlock_commit(buffer, event, 0, 0); 322 trace_current_buffer_unlock_commit(buffer, event, 0, 0);
349} 323}
350 324
351int reg_event_syscall_enter(void *ptr) 325int reg_event_syscall_enter(struct ftrace_event_call *call)
352{ 326{
353 int ret = 0; 327 int ret = 0;
354 int num; 328 int num;
355 char *name;
356 329
357 name = (char *)ptr; 330 num = ((struct syscall_metadata *)call->data)->syscall_nr;
358 num = syscall_name_to_nr(name);
359 if (num < 0 || num >= NR_syscalls) 331 if (num < 0 || num >= NR_syscalls)
360 return -ENOSYS; 332 return -ENOSYS;
361 mutex_lock(&syscall_trace_lock); 333 mutex_lock(&syscall_trace_lock);
@@ -372,13 +344,11 @@ int reg_event_syscall_enter(void *ptr)
372 return ret; 344 return ret;
373} 345}
374 346
375void unreg_event_syscall_enter(void *ptr) 347void unreg_event_syscall_enter(struct ftrace_event_call *call)
376{ 348{
377 int num; 349 int num;
378 char *name;
379 350
380 name = (char *)ptr; 351 num = ((struct syscall_metadata *)call->data)->syscall_nr;
381 num = syscall_name_to_nr(name);
382 if (num < 0 || num >= NR_syscalls) 352 if (num < 0 || num >= NR_syscalls)
383 return; 353 return;
384 mutex_lock(&syscall_trace_lock); 354 mutex_lock(&syscall_trace_lock);
@@ -389,14 +359,12 @@ void unreg_event_syscall_enter(void *ptr)
389 mutex_unlock(&syscall_trace_lock); 359 mutex_unlock(&syscall_trace_lock);
390} 360}
391 361
392int reg_event_syscall_exit(void *ptr) 362int reg_event_syscall_exit(struct ftrace_event_call *call)
393{ 363{
394 int ret = 0; 364 int ret = 0;
395 int num; 365 int num;
396 char *name;
397 366
398 name = (char *)ptr; 367 num = ((struct syscall_metadata *)call->data)->syscall_nr;
399 num = syscall_name_to_nr(name);
400 if (num < 0 || num >= NR_syscalls) 368 if (num < 0 || num >= NR_syscalls)
401 return -ENOSYS; 369 return -ENOSYS;
402 mutex_lock(&syscall_trace_lock); 370 mutex_lock(&syscall_trace_lock);
@@ -413,13 +381,11 @@ int reg_event_syscall_exit(void *ptr)
413 return ret; 381 return ret;
414} 382}
415 383
416void unreg_event_syscall_exit(void *ptr) 384void unreg_event_syscall_exit(struct ftrace_event_call *call)
417{ 385{
418 int num; 386 int num;
419 char *name;
420 387
421 name = (char *)ptr; 388 num = ((struct syscall_metadata *)call->data)->syscall_nr;
422 num = syscall_name_to_nr(name);
423 if (num < 0 || num >= NR_syscalls) 389 if (num < 0 || num >= NR_syscalls)
424 return; 390 return;
425 mutex_lock(&syscall_trace_lock); 391 mutex_lock(&syscall_trace_lock);
@@ -430,13 +396,17 @@ void unreg_event_syscall_exit(void *ptr)
430 mutex_unlock(&syscall_trace_lock); 396 mutex_unlock(&syscall_trace_lock);
431} 397}
432 398
433struct trace_event event_syscall_enter = { 399int init_syscall_trace(struct ftrace_event_call *call)
434 .trace = print_syscall_enter, 400{
435}; 401 int id;
436 402
437struct trace_event event_syscall_exit = { 403 id = register_ftrace_event(call->event);
438 .trace = print_syscall_exit, 404 if (!id)
439}; 405 return -ENODEV;
406 call->id = id;
407 INIT_LIST_HEAD(&call->fields);
408 return 0;
409}
440 410
441int __init init_ftrace_syscalls(void) 411int __init init_ftrace_syscalls(void)
442{ 412{
@@ -454,6 +424,10 @@ int __init init_ftrace_syscalls(void)
454 for (i = 0; i < NR_syscalls; i++) { 424 for (i = 0; i < NR_syscalls; i++) {
455 addr = arch_syscall_addr(i); 425 addr = arch_syscall_addr(i);
456 meta = find_syscall_meta(addr); 426 meta = find_syscall_meta(addr);
427 if (!meta)
428 continue;
429
430 meta->syscall_nr = i;
457 syscalls_metadata[i] = meta; 431 syscalls_metadata[i] = meta;
458 } 432 }
459 433
@@ -473,8 +447,10 @@ static void prof_syscall_enter(struct pt_regs *regs, long id)
473 struct syscall_metadata *sys_data; 447 struct syscall_metadata *sys_data;
474 struct syscall_trace_enter *rec; 448 struct syscall_trace_enter *rec;
475 unsigned long flags; 449 unsigned long flags;
450 char *trace_buf;
476 char *raw_data; 451 char *raw_data;
477 int syscall_nr; 452 int syscall_nr;
453 int rctx;
478 int size; 454 int size;
479 int cpu; 455 int cpu;
480 456
@@ -498,41 +474,42 @@ static void prof_syscall_enter(struct pt_regs *regs, long id)
498 /* Protect the per cpu buffer, begin the rcu read side */ 474 /* Protect the per cpu buffer, begin the rcu read side */
499 local_irq_save(flags); 475 local_irq_save(flags);
500 476
477 rctx = perf_swevent_get_recursion_context();
478 if (rctx < 0)
479 goto end_recursion;
480
501 cpu = smp_processor_id(); 481 cpu = smp_processor_id();
502 482
503 if (in_nmi()) 483 trace_buf = rcu_dereference(perf_trace_buf);
504 raw_data = rcu_dereference(trace_profile_buf_nmi);
505 else
506 raw_data = rcu_dereference(trace_profile_buf);
507 484
508 if (!raw_data) 485 if (!trace_buf)
509 goto end; 486 goto end;
510 487
511 raw_data = per_cpu_ptr(raw_data, cpu); 488 raw_data = per_cpu_ptr(trace_buf, cpu);
512 489
513 /* zero the dead bytes from align to not leak stack to user */ 490 /* zero the dead bytes from align to not leak stack to user */
514 *(u64 *)(&raw_data[size - sizeof(u64)]) = 0ULL; 491 *(u64 *)(&raw_data[size - sizeof(u64)]) = 0ULL;
515 492
516 rec = (struct syscall_trace_enter *) raw_data; 493 rec = (struct syscall_trace_enter *) raw_data;
517 tracing_generic_entry_update(&rec->ent, 0, 0); 494 tracing_generic_entry_update(&rec->ent, 0, 0);
518 rec->ent.type = sys_data->enter_id; 495 rec->ent.type = sys_data->enter_event->id;
519 rec->nr = syscall_nr; 496 rec->nr = syscall_nr;
520 syscall_get_arguments(current, regs, 0, sys_data->nb_args, 497 syscall_get_arguments(current, regs, 0, sys_data->nb_args,
521 (unsigned long *)&rec->args); 498 (unsigned long *)&rec->args);
522 perf_tp_event(sys_data->enter_id, 0, 1, rec, size); 499 perf_tp_event(sys_data->enter_event->id, 0, 1, rec, size);
523 500
524end: 501end:
502 perf_swevent_put_recursion_context(rctx);
503end_recursion:
525 local_irq_restore(flags); 504 local_irq_restore(flags);
526} 505}
527 506
528int reg_prof_syscall_enter(char *name) 507int prof_sysenter_enable(struct ftrace_event_call *call)
529{ 508{
530 int ret = 0; 509 int ret = 0;
531 int num; 510 int num;
532 511
533 num = syscall_name_to_nr(name); 512 num = ((struct syscall_metadata *)call->data)->syscall_nr;
534 if (num < 0 || num >= NR_syscalls)
535 return -ENOSYS;
536 513
537 mutex_lock(&syscall_trace_lock); 514 mutex_lock(&syscall_trace_lock);
538 if (!sys_prof_refcount_enter) 515 if (!sys_prof_refcount_enter)
@@ -548,13 +525,11 @@ int reg_prof_syscall_enter(char *name)
548 return ret; 525 return ret;
549} 526}
550 527
551void unreg_prof_syscall_enter(char *name) 528void prof_sysenter_disable(struct ftrace_event_call *call)
552{ 529{
553 int num; 530 int num;
554 531
555 num = syscall_name_to_nr(name); 532 num = ((struct syscall_metadata *)call->data)->syscall_nr;
556 if (num < 0 || num >= NR_syscalls)
557 return;
558 533
559 mutex_lock(&syscall_trace_lock); 534 mutex_lock(&syscall_trace_lock);
560 sys_prof_refcount_enter--; 535 sys_prof_refcount_enter--;
@@ -570,7 +545,9 @@ static void prof_syscall_exit(struct pt_regs *regs, long ret)
570 struct syscall_trace_exit *rec; 545 struct syscall_trace_exit *rec;
571 unsigned long flags; 546 unsigned long flags;
572 int syscall_nr; 547 int syscall_nr;
548 char *trace_buf;
573 char *raw_data; 549 char *raw_data;
550 int rctx;
574 int size; 551 int size;
575 int cpu; 552 int cpu;
576 553
@@ -596,17 +573,19 @@ static void prof_syscall_exit(struct pt_regs *regs, long ret)
596 573
597 /* Protect the per cpu buffer, begin the rcu read side */ 574 /* Protect the per cpu buffer, begin the rcu read side */
598 local_irq_save(flags); 575 local_irq_save(flags);
576
577 rctx = perf_swevent_get_recursion_context();
578 if (rctx < 0)
579 goto end_recursion;
580
599 cpu = smp_processor_id(); 581 cpu = smp_processor_id();
600 582
601 if (in_nmi()) 583 trace_buf = rcu_dereference(perf_trace_buf);
602 raw_data = rcu_dereference(trace_profile_buf_nmi);
603 else
604 raw_data = rcu_dereference(trace_profile_buf);
605 584
606 if (!raw_data) 585 if (!trace_buf)
607 goto end; 586 goto end;
608 587
609 raw_data = per_cpu_ptr(raw_data, cpu); 588 raw_data = per_cpu_ptr(trace_buf, cpu);
610 589
611 /* zero the dead bytes from align to not leak stack to user */ 590 /* zero the dead bytes from align to not leak stack to user */
612 *(u64 *)(&raw_data[size - sizeof(u64)]) = 0ULL; 591 *(u64 *)(&raw_data[size - sizeof(u64)]) = 0ULL;
@@ -614,24 +593,24 @@ static void prof_syscall_exit(struct pt_regs *regs, long ret)
614 rec = (struct syscall_trace_exit *)raw_data; 593 rec = (struct syscall_trace_exit *)raw_data;
615 594
616 tracing_generic_entry_update(&rec->ent, 0, 0); 595 tracing_generic_entry_update(&rec->ent, 0, 0);
617 rec->ent.type = sys_data->exit_id; 596 rec->ent.type = sys_data->exit_event->id;
618 rec->nr = syscall_nr; 597 rec->nr = syscall_nr;
619 rec->ret = syscall_get_return_value(current, regs); 598 rec->ret = syscall_get_return_value(current, regs);
620 599
621 perf_tp_event(sys_data->exit_id, 0, 1, rec, size); 600 perf_tp_event(sys_data->exit_event->id, 0, 1, rec, size);
622 601
623end: 602end:
603 perf_swevent_put_recursion_context(rctx);
604end_recursion:
624 local_irq_restore(flags); 605 local_irq_restore(flags);
625} 606}
626 607
627int reg_prof_syscall_exit(char *name) 608int prof_sysexit_enable(struct ftrace_event_call *call)
628{ 609{
629 int ret = 0; 610 int ret = 0;
630 int num; 611 int num;
631 612
632 num = syscall_name_to_nr(name); 613 num = ((struct syscall_metadata *)call->data)->syscall_nr;
633 if (num < 0 || num >= NR_syscalls)
634 return -ENOSYS;
635 614
636 mutex_lock(&syscall_trace_lock); 615 mutex_lock(&syscall_trace_lock);
637 if (!sys_prof_refcount_exit) 616 if (!sys_prof_refcount_exit)
@@ -647,13 +626,11 @@ int reg_prof_syscall_exit(char *name)
647 return ret; 626 return ret;
648} 627}
649 628
650void unreg_prof_syscall_exit(char *name) 629void prof_sysexit_disable(struct ftrace_event_call *call)
651{ 630{
652 int num; 631 int num;
653 632
654 num = syscall_name_to_nr(name); 633 num = ((struct syscall_metadata *)call->data)->syscall_nr;
655 if (num < 0 || num >= NR_syscalls)
656 return;
657 634
658 mutex_lock(&syscall_trace_lock); 635 mutex_lock(&syscall_trace_lock);
659 sys_prof_refcount_exit--; 636 sys_prof_refcount_exit--;
diff --git a/kernel/utsname_sysctl.c b/kernel/utsname_sysctl.c
index 69eae358a726..a2cd77e70d4d 100644
--- a/kernel/utsname_sysctl.c
+++ b/kernel/utsname_sysctl.c
@@ -57,78 +57,47 @@ static int proc_do_uts_string(ctl_table *table, int write,
57#define proc_do_uts_string NULL 57#define proc_do_uts_string NULL
58#endif 58#endif
59 59
60
61#ifdef CONFIG_SYSCTL_SYSCALL
62/* The generic string strategy routine: */
63static int sysctl_uts_string(ctl_table *table,
64 void __user *oldval, size_t __user *oldlenp,
65 void __user *newval, size_t newlen)
66{
67 struct ctl_table uts_table;
68 int r, write;
69 write = newval && newlen;
70 memcpy(&uts_table, table, sizeof(uts_table));
71 uts_table.data = get_uts(table, write);
72 r = sysctl_string(&uts_table, oldval, oldlenp, newval, newlen);
73 put_uts(table, write, uts_table.data);
74 return r;
75}
76#else
77#define sysctl_uts_string NULL
78#endif
79
80static struct ctl_table uts_kern_table[] = { 60static struct ctl_table uts_kern_table[] = {
81 { 61 {
82 .ctl_name = KERN_OSTYPE,
83 .procname = "ostype", 62 .procname = "ostype",
84 .data = init_uts_ns.name.sysname, 63 .data = init_uts_ns.name.sysname,
85 .maxlen = sizeof(init_uts_ns.name.sysname), 64 .maxlen = sizeof(init_uts_ns.name.sysname),
86 .mode = 0444, 65 .mode = 0444,
87 .proc_handler = proc_do_uts_string, 66 .proc_handler = proc_do_uts_string,
88 .strategy = sysctl_uts_string,
89 }, 67 },
90 { 68 {
91 .ctl_name = KERN_OSRELEASE,
92 .procname = "osrelease", 69 .procname = "osrelease",
93 .data = init_uts_ns.name.release, 70 .data = init_uts_ns.name.release,
94 .maxlen = sizeof(init_uts_ns.name.release), 71 .maxlen = sizeof(init_uts_ns.name.release),
95 .mode = 0444, 72 .mode = 0444,
96 .proc_handler = proc_do_uts_string, 73 .proc_handler = proc_do_uts_string,
97 .strategy = sysctl_uts_string,
98 }, 74 },
99 { 75 {
100 .ctl_name = KERN_VERSION,
101 .procname = "version", 76 .procname = "version",
102 .data = init_uts_ns.name.version, 77 .data = init_uts_ns.name.version,
103 .maxlen = sizeof(init_uts_ns.name.version), 78 .maxlen = sizeof(init_uts_ns.name.version),
104 .mode = 0444, 79 .mode = 0444,
105 .proc_handler = proc_do_uts_string, 80 .proc_handler = proc_do_uts_string,
106 .strategy = sysctl_uts_string,
107 }, 81 },
108 { 82 {
109 .ctl_name = KERN_NODENAME,
110 .procname = "hostname", 83 .procname = "hostname",
111 .data = init_uts_ns.name.nodename, 84 .data = init_uts_ns.name.nodename,
112 .maxlen = sizeof(init_uts_ns.name.nodename), 85 .maxlen = sizeof(init_uts_ns.name.nodename),
113 .mode = 0644, 86 .mode = 0644,
114 .proc_handler = proc_do_uts_string, 87 .proc_handler = proc_do_uts_string,
115 .strategy = sysctl_uts_string,
116 }, 88 },
117 { 89 {
118 .ctl_name = KERN_DOMAINNAME,
119 .procname = "domainname", 90 .procname = "domainname",
120 .data = init_uts_ns.name.domainname, 91 .data = init_uts_ns.name.domainname,
121 .maxlen = sizeof(init_uts_ns.name.domainname), 92 .maxlen = sizeof(init_uts_ns.name.domainname),
122 .mode = 0644, 93 .mode = 0644,
123 .proc_handler = proc_do_uts_string, 94 .proc_handler = proc_do_uts_string,
124 .strategy = sysctl_uts_string,
125 }, 95 },
126 {} 96 {}
127}; 97};
128 98
129static struct ctl_table uts_root_table[] = { 99static struct ctl_table uts_root_table[] = {
130 { 100 {
131 .ctl_name = CTL_KERN,
132 .procname = "kernel", 101 .procname = "kernel",
133 .mode = 0555, 102 .mode = 0555,
134 .child = uts_kern_table, 103 .child = uts_kern_table,