aboutsummaryrefslogtreecommitdiffstats
path: root/kernel
diff options
context:
space:
mode:
Diffstat (limited to 'kernel')
-rw-r--r--kernel/Makefile2
-rw-r--r--kernel/acct.c20
-rw-r--r--kernel/cgroup.c2
-rw-r--r--kernel/cgroup_freezer.c21
-rw-r--r--kernel/compat.c25
-rw-r--r--kernel/cred.c2
-rw-r--r--kernel/debug/Makefile6
-rw-r--r--kernel/debug/debug_core.c983
-rw-r--r--kernel/debug/debug_core.h81
-rw-r--r--kernel/debug/gdbstub.c1017
-rw-r--r--kernel/debug/kdb/.gitignore1
-rw-r--r--kernel/debug/kdb/Makefile25
-rw-r--r--kernel/debug/kdb/kdb_bp.c564
-rw-r--r--kernel/debug/kdb/kdb_bt.c210
-rw-r--r--kernel/debug/kdb/kdb_cmds35
-rw-r--r--kernel/debug/kdb/kdb_debugger.c169
-rw-r--r--kernel/debug/kdb/kdb_io.c826
-rw-r--r--kernel/debug/kdb/kdb_keyboard.c212
-rw-r--r--kernel/debug/kdb/kdb_main.c2849
-rw-r--r--kernel/debug/kdb/kdb_private.h300
-rw-r--r--kernel/debug/kdb/kdb_support.c927
-rw-r--r--kernel/groups.c6
-rw-r--r--kernel/hrtimer.c67
-rw-r--r--kernel/irq/handle.c3
-rw-r--r--kernel/irq/manage.c89
-rw-r--r--kernel/irq/proc.c60
-rw-r--r--kernel/kallsyms.c21
-rw-r--r--kernel/kgdb.c1764
-rw-r--r--kernel/ksysfs.c3
-rw-r--r--kernel/lockdep.c5
-rw-r--r--kernel/module.c6
-rw-r--r--kernel/padata.c185
-rw-r--r--kernel/pm_qos_params.c218
-rw-r--r--kernel/posix-cpu-timers.c298
-rw-r--r--kernel/power/Makefile3
-rw-r--r--kernel/power/block_io.c103
-rw-r--r--kernel/power/power.h27
-rw-r--r--kernel/power/snapshot.c145
-rw-r--r--kernel/power/swap.c333
-rw-r--r--kernel/power/user.c37
-rw-r--r--kernel/printk.c25
-rw-r--r--kernel/relay.c15
-rw-r--r--kernel/sched.c8
-rw-r--r--kernel/sched_clock.c1
-rw-r--r--kernel/signal.c40
-rw-r--r--kernel/sys.c31
-rw-r--r--kernel/sysctl.c588
-rw-r--r--kernel/sysctl_binary.c1
-rw-r--r--kernel/time.c11
-rw-r--r--kernel/time/clocksource.c48
-rw-r--r--kernel/time/ntp.c2
-rw-r--r--kernel/time/timekeeping.c35
-rw-r--r--kernel/timer.c137
-rw-r--r--kernel/trace/trace.c60
-rw-r--r--kernel/trace/trace_output.c16
-rw-r--r--kernel/user_namespace.c4
-rw-r--r--kernel/workqueue.c36
57 files changed, 9869 insertions, 2839 deletions
diff --git a/kernel/Makefile b/kernel/Makefile
index 149e18ef1ab1..057472fbc272 100644
--- a/kernel/Makefile
+++ b/kernel/Makefile
@@ -75,7 +75,7 @@ obj-$(CONFIG_AUDITSYSCALL) += auditsc.o
75obj-$(CONFIG_GCOV_KERNEL) += gcov/ 75obj-$(CONFIG_GCOV_KERNEL) += gcov/
76obj-$(CONFIG_AUDIT_TREE) += audit_tree.o 76obj-$(CONFIG_AUDIT_TREE) += audit_tree.o
77obj-$(CONFIG_KPROBES) += kprobes.o 77obj-$(CONFIG_KPROBES) += kprobes.o
78obj-$(CONFIG_KGDB) += kgdb.o 78obj-$(CONFIG_KGDB) += debug/
79obj-$(CONFIG_DETECT_SOFTLOCKUP) += softlockup.o 79obj-$(CONFIG_DETECT_SOFTLOCKUP) += softlockup.o
80obj-$(CONFIG_DETECT_HUNG_TASK) += hung_task.o 80obj-$(CONFIG_DETECT_HUNG_TASK) += hung_task.o
81obj-$(CONFIG_GENERIC_HARDIRQS) += irq/ 81obj-$(CONFIG_GENERIC_HARDIRQS) += irq/
diff --git a/kernel/acct.c b/kernel/acct.c
index e4c0e1fee9b0..385b88461c29 100644
--- a/kernel/acct.c
+++ b/kernel/acct.c
@@ -216,7 +216,6 @@ static int acct_on(char *name)
216{ 216{
217 struct file *file; 217 struct file *file;
218 struct vfsmount *mnt; 218 struct vfsmount *mnt;
219 int error;
220 struct pid_namespace *ns; 219 struct pid_namespace *ns;
221 struct bsd_acct_struct *acct = NULL; 220 struct bsd_acct_struct *acct = NULL;
222 221
@@ -244,13 +243,6 @@ static int acct_on(char *name)
244 } 243 }
245 } 244 }
246 245
247 error = security_acct(file);
248 if (error) {
249 kfree(acct);
250 filp_close(file, NULL);
251 return error;
252 }
253
254 spin_lock(&acct_lock); 246 spin_lock(&acct_lock);
255 if (ns->bacct == NULL) { 247 if (ns->bacct == NULL) {
256 ns->bacct = acct; 248 ns->bacct = acct;
@@ -281,7 +273,7 @@ static int acct_on(char *name)
281 */ 273 */
282SYSCALL_DEFINE1(acct, const char __user *, name) 274SYSCALL_DEFINE1(acct, const char __user *, name)
283{ 275{
284 int error; 276 int error = 0;
285 277
286 if (!capable(CAP_SYS_PACCT)) 278 if (!capable(CAP_SYS_PACCT))
287 return -EPERM; 279 return -EPERM;
@@ -299,13 +291,11 @@ SYSCALL_DEFINE1(acct, const char __user *, name)
299 if (acct == NULL) 291 if (acct == NULL)
300 return 0; 292 return 0;
301 293
302 error = security_acct(NULL); 294 spin_lock(&acct_lock);
303 if (!error) { 295 acct_file_reopen(acct, NULL, NULL);
304 spin_lock(&acct_lock); 296 spin_unlock(&acct_lock);
305 acct_file_reopen(acct, NULL, NULL);
306 spin_unlock(&acct_lock);
307 }
308 } 297 }
298
309 return error; 299 return error;
310} 300}
311 301
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index e9ec642932ee..291775021b2e 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -3615,7 +3615,7 @@ static void __init cgroup_init_subsys(struct cgroup_subsys *ss)
3615 * @ss: the subsystem to load 3615 * @ss: the subsystem to load
3616 * 3616 *
3617 * This function should be called in a modular subsystem's initcall. If the 3617 * This function should be called in a modular subsystem's initcall. If the
3618 * subsytem is built as a module, it will be assigned a new subsys_id and set 3618 * subsystem is built as a module, it will be assigned a new subsys_id and set
3619 * up for use. If the subsystem is built-in anyway, work is delegated to the 3619 * up for use. If the subsystem is built-in anyway, work is delegated to the
3620 * simpler cgroup_init_subsys. 3620 * simpler cgroup_init_subsys.
3621 */ 3621 */
diff --git a/kernel/cgroup_freezer.c b/kernel/cgroup_freezer.c
index e5c0244962b0..ce71ed53e88f 100644
--- a/kernel/cgroup_freezer.c
+++ b/kernel/cgroup_freezer.c
@@ -89,10 +89,10 @@ struct cgroup_subsys freezer_subsys;
89 89
90/* Locks taken and their ordering 90/* Locks taken and their ordering
91 * ------------------------------ 91 * ------------------------------
92 * css_set_lock
93 * cgroup_mutex (AKA cgroup_lock) 92 * cgroup_mutex (AKA cgroup_lock)
94 * task->alloc_lock (AKA task_lock)
95 * freezer->lock 93 * freezer->lock
94 * css_set_lock
95 * task->alloc_lock (AKA task_lock)
96 * task->sighand->siglock 96 * task->sighand->siglock
97 * 97 *
98 * cgroup code forces css_set_lock to be taken before task->alloc_lock 98 * cgroup code forces css_set_lock to be taken before task->alloc_lock
@@ -100,33 +100,38 @@ struct cgroup_subsys freezer_subsys;
100 * freezer_create(), freezer_destroy(): 100 * freezer_create(), freezer_destroy():
101 * cgroup_mutex [ by cgroup core ] 101 * cgroup_mutex [ by cgroup core ]
102 * 102 *
103 * can_attach(): 103 * freezer_can_attach():
104 * cgroup_mutex 104 * cgroup_mutex (held by caller of can_attach)
105 * 105 *
106 * cgroup_frozen(): 106 * cgroup_freezing_or_frozen():
107 * task->alloc_lock (to get task's cgroup) 107 * task->alloc_lock (to get task's cgroup)
108 * 108 *
109 * freezer_fork() (preserving fork() performance means can't take cgroup_mutex): 109 * freezer_fork() (preserving fork() performance means can't take cgroup_mutex):
110 * task->alloc_lock (to get task's cgroup)
111 * freezer->lock 110 * freezer->lock
112 * sighand->siglock (if the cgroup is freezing) 111 * sighand->siglock (if the cgroup is freezing)
113 * 112 *
114 * freezer_read(): 113 * freezer_read():
115 * cgroup_mutex 114 * cgroup_mutex
116 * freezer->lock 115 * freezer->lock
116 * write_lock css_set_lock (cgroup iterator start)
117 * task->alloc_lock
117 * read_lock css_set_lock (cgroup iterator start) 118 * read_lock css_set_lock (cgroup iterator start)
118 * 119 *
119 * freezer_write() (freeze): 120 * freezer_write() (freeze):
120 * cgroup_mutex 121 * cgroup_mutex
121 * freezer->lock 122 * freezer->lock
123 * write_lock css_set_lock (cgroup iterator start)
124 * task->alloc_lock
122 * read_lock css_set_lock (cgroup iterator start) 125 * read_lock css_set_lock (cgroup iterator start)
123 * sighand->siglock 126 * sighand->siglock (fake signal delivery inside freeze_task())
124 * 127 *
125 * freezer_write() (unfreeze): 128 * freezer_write() (unfreeze):
126 * cgroup_mutex 129 * cgroup_mutex
127 * freezer->lock 130 * freezer->lock
131 * write_lock css_set_lock (cgroup iterator start)
132 * task->alloc_lock
128 * read_lock css_set_lock (cgroup iterator start) 133 * read_lock css_set_lock (cgroup iterator start)
129 * task->alloc_lock (to prevent races with freeze_task()) 134 * task->alloc_lock (inside thaw_process(), prevents race with refrigerator())
130 * sighand->siglock 135 * sighand->siglock
131 */ 136 */
132static struct cgroup_subsys_state *freezer_create(struct cgroup_subsys *ss, 137static struct cgroup_subsys_state *freezer_create(struct cgroup_subsys *ss,
diff --git a/kernel/compat.c b/kernel/compat.c
index 7f40e9275fd9..5adab05a3172 100644
--- a/kernel/compat.c
+++ b/kernel/compat.c
@@ -495,29 +495,26 @@ asmlinkage long compat_sys_sched_getaffinity(compat_pid_t pid, unsigned int len,
495{ 495{
496 int ret; 496 int ret;
497 cpumask_var_t mask; 497 cpumask_var_t mask;
498 unsigned long *k;
499 unsigned int min_length = cpumask_size();
500
501 if (nr_cpu_ids <= BITS_PER_COMPAT_LONG)
502 min_length = sizeof(compat_ulong_t);
503 498
504 if (len < min_length) 499 if ((len * BITS_PER_BYTE) < nr_cpu_ids)
500 return -EINVAL;
501 if (len & (sizeof(compat_ulong_t)-1))
505 return -EINVAL; 502 return -EINVAL;
506 503
507 if (!alloc_cpumask_var(&mask, GFP_KERNEL)) 504 if (!alloc_cpumask_var(&mask, GFP_KERNEL))
508 return -ENOMEM; 505 return -ENOMEM;
509 506
510 ret = sched_getaffinity(pid, mask); 507 ret = sched_getaffinity(pid, mask);
511 if (ret < 0) 508 if (ret == 0) {
512 goto out; 509 size_t retlen = min_t(size_t, len, cpumask_size());
513 510
514 k = cpumask_bits(mask); 511 if (compat_put_bitmap(user_mask_ptr, cpumask_bits(mask), retlen * 8))
515 ret = compat_put_bitmap(user_mask_ptr, k, min_length * 8); 512 ret = -EFAULT;
516 if (ret == 0) 513 else
517 ret = min_length; 514 ret = retlen;
518 515 }
519out:
520 free_cpumask_var(mask); 516 free_cpumask_var(mask);
517
521 return ret; 518 return ret;
522} 519}
523 520
diff --git a/kernel/cred.c b/kernel/cred.c
index 8f3672a58a1e..2c24870c55d1 100644
--- a/kernel/cred.c
+++ b/kernel/cred.c
@@ -522,8 +522,6 @@ int commit_creds(struct cred *new)
522#endif 522#endif
523 BUG_ON(atomic_read(&new->usage) < 1); 523 BUG_ON(atomic_read(&new->usage) < 1);
524 524
525 security_commit_creds(new, old);
526
527 get_cred(new); /* we will require a ref for the subj creds too */ 525 get_cred(new); /* we will require a ref for the subj creds too */
528 526
529 /* dumpability changes */ 527 /* dumpability changes */
diff --git a/kernel/debug/Makefile b/kernel/debug/Makefile
new file mode 100644
index 000000000000..a85edc339985
--- /dev/null
+++ b/kernel/debug/Makefile
@@ -0,0 +1,6 @@
1#
2# Makefile for the linux kernel debugger
3#
4
5obj-$(CONFIG_KGDB) += debug_core.o gdbstub.o
6obj-$(CONFIG_KGDB_KDB) += kdb/
diff --git a/kernel/debug/debug_core.c b/kernel/debug/debug_core.c
new file mode 100644
index 000000000000..5cb7cd1de10c
--- /dev/null
+++ b/kernel/debug/debug_core.c
@@ -0,0 +1,983 @@
1/*
2 * Kernel Debug Core
3 *
4 * Maintainer: Jason Wessel <jason.wessel@windriver.com>
5 *
6 * Copyright (C) 2000-2001 VERITAS Software Corporation.
7 * Copyright (C) 2002-2004 Timesys Corporation
8 * Copyright (C) 2003-2004 Amit S. Kale <amitkale@linsyssoft.com>
9 * Copyright (C) 2004 Pavel Machek <pavel@suse.cz>
10 * Copyright (C) 2004-2006 Tom Rini <trini@kernel.crashing.org>
11 * Copyright (C) 2004-2006 LinSysSoft Technologies Pvt. Ltd.
12 * Copyright (C) 2005-2009 Wind River Systems, Inc.
13 * Copyright (C) 2007 MontaVista Software, Inc.
14 * Copyright (C) 2008 Red Hat, Inc., Ingo Molnar <mingo@redhat.com>
15 *
16 * Contributors at various stages not listed above:
17 * Jason Wessel ( jason.wessel@windriver.com )
18 * George Anzinger <george@mvista.com>
19 * Anurekh Saxena (anurekh.saxena@timesys.com)
20 * Lake Stevens Instrument Division (Glenn Engel)
21 * Jim Kingdon, Cygnus Support.
22 *
23 * Original KGDB stub: David Grothe <dave@gcom.com>,
24 * Tigran Aivazian <tigran@sco.com>
25 *
26 * This file is licensed under the terms of the GNU General Public License
27 * version 2. This program is licensed "as is" without any warranty of any
28 * kind, whether express or implied.
29 */
30#include <linux/pid_namespace.h>
31#include <linux/clocksource.h>
32#include <linux/interrupt.h>
33#include <linux/spinlock.h>
34#include <linux/console.h>
35#include <linux/threads.h>
36#include <linux/uaccess.h>
37#include <linux/kernel.h>
38#include <linux/module.h>
39#include <linux/ptrace.h>
40#include <linux/string.h>
41#include <linux/delay.h>
42#include <linux/sched.h>
43#include <linux/sysrq.h>
44#include <linux/init.h>
45#include <linux/kgdb.h>
46#include <linux/kdb.h>
47#include <linux/pid.h>
48#include <linux/smp.h>
49#include <linux/mm.h>
50
51#include <asm/cacheflush.h>
52#include <asm/byteorder.h>
53#include <asm/atomic.h>
54#include <asm/system.h>
55
56#include "debug_core.h"
57
58static int kgdb_break_asap;
59
60struct debuggerinfo_struct kgdb_info[NR_CPUS];
61
62/**
63 * kgdb_connected - Is a host GDB connected to us?
64 */
65int kgdb_connected;
66EXPORT_SYMBOL_GPL(kgdb_connected);
67
68/* All the KGDB handlers are installed */
69int kgdb_io_module_registered;
70
71/* Guard for recursive entry */
72static int exception_level;
73
74struct kgdb_io *dbg_io_ops;
75static DEFINE_SPINLOCK(kgdb_registration_lock);
76
77/* kgdb console driver is loaded */
78static int kgdb_con_registered;
79/* determine if kgdb console output should be used */
80static int kgdb_use_con;
81/* Flag for alternate operations for early debugging */
82bool dbg_is_early = true;
83/* Next cpu to become the master debug core */
84int dbg_switch_cpu;
85
86/* Use kdb or gdbserver mode */
87int dbg_kdb_mode = 1;
88
89static int __init opt_kgdb_con(char *str)
90{
91 kgdb_use_con = 1;
92 return 0;
93}
94
95early_param("kgdbcon", opt_kgdb_con);
96
97module_param(kgdb_use_con, int, 0644);
98
99/*
100 * Holds information about breakpoints in a kernel. These breakpoints are
101 * added and removed by gdb.
102 */
103static struct kgdb_bkpt kgdb_break[KGDB_MAX_BREAKPOINTS] = {
104 [0 ... KGDB_MAX_BREAKPOINTS-1] = { .state = BP_UNDEFINED }
105};
106
107/*
108 * The CPU# of the active CPU, or -1 if none:
109 */
110atomic_t kgdb_active = ATOMIC_INIT(-1);
111EXPORT_SYMBOL_GPL(kgdb_active);
112
113/*
114 * We use NR_CPUs not PERCPU, in case kgdb is used to debug early
115 * bootup code (which might not have percpu set up yet):
116 */
117static atomic_t passive_cpu_wait[NR_CPUS];
118static atomic_t cpu_in_kgdb[NR_CPUS];
119static atomic_t kgdb_break_tasklet_var;
120atomic_t kgdb_setting_breakpoint;
121
122struct task_struct *kgdb_usethread;
123struct task_struct *kgdb_contthread;
124
125int kgdb_single_step;
126static pid_t kgdb_sstep_pid;
127
128/* to keep track of the CPU which is doing the single stepping*/
129atomic_t kgdb_cpu_doing_single_step = ATOMIC_INIT(-1);
130
131/*
132 * If you are debugging a problem where roundup (the collection of
133 * all other CPUs) is a problem [this should be extremely rare],
134 * then use the nokgdbroundup option to avoid roundup. In that case
135 * the other CPUs might interfere with your debugging context, so
136 * use this with care:
137 */
138static int kgdb_do_roundup = 1;
139
140static int __init opt_nokgdbroundup(char *str)
141{
142 kgdb_do_roundup = 0;
143
144 return 0;
145}
146
147early_param("nokgdbroundup", opt_nokgdbroundup);
148
149/*
150 * Finally, some KGDB code :-)
151 */
152
153/*
154 * Weak aliases for breakpoint management,
155 * can be overriden by architectures when needed:
156 */
157int __weak kgdb_arch_set_breakpoint(unsigned long addr, char *saved_instr)
158{
159 int err;
160
161 err = probe_kernel_read(saved_instr, (char *)addr, BREAK_INSTR_SIZE);
162 if (err)
163 return err;
164
165 return probe_kernel_write((char *)addr, arch_kgdb_ops.gdb_bpt_instr,
166 BREAK_INSTR_SIZE);
167}
168
169int __weak kgdb_arch_remove_breakpoint(unsigned long addr, char *bundle)
170{
171 return probe_kernel_write((char *)addr,
172 (char *)bundle, BREAK_INSTR_SIZE);
173}
174
175int __weak kgdb_validate_break_address(unsigned long addr)
176{
177 char tmp_variable[BREAK_INSTR_SIZE];
178 int err;
179 /* Validate setting the breakpoint and then removing it. In the
180 * remove fails, the kernel needs to emit a bad message because we
181 * are deep trouble not being able to put things back the way we
182 * found them.
183 */
184 err = kgdb_arch_set_breakpoint(addr, tmp_variable);
185 if (err)
186 return err;
187 err = kgdb_arch_remove_breakpoint(addr, tmp_variable);
188 if (err)
189 printk(KERN_ERR "KGDB: Critical breakpoint error, kernel "
190 "memory destroyed at: %lx", addr);
191 return err;
192}
193
194unsigned long __weak kgdb_arch_pc(int exception, struct pt_regs *regs)
195{
196 return instruction_pointer(regs);
197}
198
199int __weak kgdb_arch_init(void)
200{
201 return 0;
202}
203
204int __weak kgdb_skipexception(int exception, struct pt_regs *regs)
205{
206 return 0;
207}
208
209/**
210 * kgdb_disable_hw_debug - Disable hardware debugging while we in kgdb.
211 * @regs: Current &struct pt_regs.
212 *
213 * This function will be called if the particular architecture must
214 * disable hardware debugging while it is processing gdb packets or
215 * handling exception.
216 */
217void __weak kgdb_disable_hw_debug(struct pt_regs *regs)
218{
219}
220
221/*
222 * Some architectures need cache flushes when we set/clear a
223 * breakpoint:
224 */
225static void kgdb_flush_swbreak_addr(unsigned long addr)
226{
227 if (!CACHE_FLUSH_IS_SAFE)
228 return;
229
230 if (current->mm && current->mm->mmap_cache) {
231 flush_cache_range(current->mm->mmap_cache,
232 addr, addr + BREAK_INSTR_SIZE);
233 }
234 /* Force flush instruction cache if it was outside the mm */
235 flush_icache_range(addr, addr + BREAK_INSTR_SIZE);
236}
237
238/*
239 * SW breakpoint management:
240 */
241int dbg_activate_sw_breakpoints(void)
242{
243 unsigned long addr;
244 int error;
245 int ret = 0;
246 int i;
247
248 for (i = 0; i < KGDB_MAX_BREAKPOINTS; i++) {
249 if (kgdb_break[i].state != BP_SET)
250 continue;
251
252 addr = kgdb_break[i].bpt_addr;
253 error = kgdb_arch_set_breakpoint(addr,
254 kgdb_break[i].saved_instr);
255 if (error) {
256 ret = error;
257 printk(KERN_INFO "KGDB: BP install failed: %lx", addr);
258 continue;
259 }
260
261 kgdb_flush_swbreak_addr(addr);
262 kgdb_break[i].state = BP_ACTIVE;
263 }
264 return ret;
265}
266
267int dbg_set_sw_break(unsigned long addr)
268{
269 int err = kgdb_validate_break_address(addr);
270 int breakno = -1;
271 int i;
272
273 if (err)
274 return err;
275
276 for (i = 0; i < KGDB_MAX_BREAKPOINTS; i++) {
277 if ((kgdb_break[i].state == BP_SET) &&
278 (kgdb_break[i].bpt_addr == addr))
279 return -EEXIST;
280 }
281 for (i = 0; i < KGDB_MAX_BREAKPOINTS; i++) {
282 if (kgdb_break[i].state == BP_REMOVED &&
283 kgdb_break[i].bpt_addr == addr) {
284 breakno = i;
285 break;
286 }
287 }
288
289 if (breakno == -1) {
290 for (i = 0; i < KGDB_MAX_BREAKPOINTS; i++) {
291 if (kgdb_break[i].state == BP_UNDEFINED) {
292 breakno = i;
293 break;
294 }
295 }
296 }
297
298 if (breakno == -1)
299 return -E2BIG;
300
301 kgdb_break[breakno].state = BP_SET;
302 kgdb_break[breakno].type = BP_BREAKPOINT;
303 kgdb_break[breakno].bpt_addr = addr;
304
305 return 0;
306}
307
308int dbg_deactivate_sw_breakpoints(void)
309{
310 unsigned long addr;
311 int error;
312 int ret = 0;
313 int i;
314
315 for (i = 0; i < KGDB_MAX_BREAKPOINTS; i++) {
316 if (kgdb_break[i].state != BP_ACTIVE)
317 continue;
318 addr = kgdb_break[i].bpt_addr;
319 error = kgdb_arch_remove_breakpoint(addr,
320 kgdb_break[i].saved_instr);
321 if (error) {
322 printk(KERN_INFO "KGDB: BP remove failed: %lx\n", addr);
323 ret = error;
324 }
325
326 kgdb_flush_swbreak_addr(addr);
327 kgdb_break[i].state = BP_SET;
328 }
329 return ret;
330}
331
332int dbg_remove_sw_break(unsigned long addr)
333{
334 int i;
335
336 for (i = 0; i < KGDB_MAX_BREAKPOINTS; i++) {
337 if ((kgdb_break[i].state == BP_SET) &&
338 (kgdb_break[i].bpt_addr == addr)) {
339 kgdb_break[i].state = BP_REMOVED;
340 return 0;
341 }
342 }
343 return -ENOENT;
344}
345
346int kgdb_isremovedbreak(unsigned long addr)
347{
348 int i;
349
350 for (i = 0; i < KGDB_MAX_BREAKPOINTS; i++) {
351 if ((kgdb_break[i].state == BP_REMOVED) &&
352 (kgdb_break[i].bpt_addr == addr))
353 return 1;
354 }
355 return 0;
356}
357
358int dbg_remove_all_break(void)
359{
360 unsigned long addr;
361 int error;
362 int i;
363
364 /* Clear memory breakpoints. */
365 for (i = 0; i < KGDB_MAX_BREAKPOINTS; i++) {
366 if (kgdb_break[i].state != BP_ACTIVE)
367 goto setundefined;
368 addr = kgdb_break[i].bpt_addr;
369 error = kgdb_arch_remove_breakpoint(addr,
370 kgdb_break[i].saved_instr);
371 if (error)
372 printk(KERN_ERR "KGDB: breakpoint remove failed: %lx\n",
373 addr);
374setundefined:
375 kgdb_break[i].state = BP_UNDEFINED;
376 }
377
378 /* Clear hardware breakpoints. */
379 if (arch_kgdb_ops.remove_all_hw_break)
380 arch_kgdb_ops.remove_all_hw_break();
381
382 return 0;
383}
384
385/*
386 * Return true if there is a valid kgdb I/O module. Also if no
387 * debugger is attached a message can be printed to the console about
388 * waiting for the debugger to attach.
389 *
390 * The print_wait argument is only to be true when called from inside
391 * the core kgdb_handle_exception, because it will wait for the
392 * debugger to attach.
393 */
394static int kgdb_io_ready(int print_wait)
395{
396 if (!dbg_io_ops)
397 return 0;
398 if (kgdb_connected)
399 return 1;
400 if (atomic_read(&kgdb_setting_breakpoint))
401 return 1;
402 if (print_wait) {
403#ifdef CONFIG_KGDB_KDB
404 if (!dbg_kdb_mode)
405 printk(KERN_CRIT "KGDB: waiting... or $3#33 for KDB\n");
406#else
407 printk(KERN_CRIT "KGDB: Waiting for remote debugger\n");
408#endif
409 }
410 return 1;
411}
412
413static int kgdb_reenter_check(struct kgdb_state *ks)
414{
415 unsigned long addr;
416
417 if (atomic_read(&kgdb_active) != raw_smp_processor_id())
418 return 0;
419
420 /* Panic on recursive debugger calls: */
421 exception_level++;
422 addr = kgdb_arch_pc(ks->ex_vector, ks->linux_regs);
423 dbg_deactivate_sw_breakpoints();
424
425 /*
426 * If the break point removed ok at the place exception
427 * occurred, try to recover and print a warning to the end
428 * user because the user planted a breakpoint in a place that
429 * KGDB needs in order to function.
430 */
431 if (dbg_remove_sw_break(addr) == 0) {
432 exception_level = 0;
433 kgdb_skipexception(ks->ex_vector, ks->linux_regs);
434 dbg_activate_sw_breakpoints();
435 printk(KERN_CRIT "KGDB: re-enter error: breakpoint removed %lx\n",
436 addr);
437 WARN_ON_ONCE(1);
438
439 return 1;
440 }
441 dbg_remove_all_break();
442 kgdb_skipexception(ks->ex_vector, ks->linux_regs);
443
444 if (exception_level > 1) {
445 dump_stack();
446 panic("Recursive entry to debugger");
447 }
448
449 printk(KERN_CRIT "KGDB: re-enter exception: ALL breakpoints killed\n");
450#ifdef CONFIG_KGDB_KDB
451 /* Allow kdb to debug itself one level */
452 return 0;
453#endif
454 dump_stack();
455 panic("Recursive entry to debugger");
456
457 return 1;
458}
459
460static void dbg_cpu_switch(int cpu, int next_cpu)
461{
462 /* Mark the cpu we are switching away from as a slave when it
463 * holds the kgdb_active token. This must be done so that the
464 * that all the cpus wait in for the debug core will not enter
465 * again as the master. */
466 if (cpu == atomic_read(&kgdb_active)) {
467 kgdb_info[cpu].exception_state |= DCPU_IS_SLAVE;
468 kgdb_info[cpu].exception_state &= ~DCPU_WANT_MASTER;
469 }
470 kgdb_info[next_cpu].exception_state |= DCPU_NEXT_MASTER;
471}
472
473static int kgdb_cpu_enter(struct kgdb_state *ks, struct pt_regs *regs)
474{
475 unsigned long flags;
476 int sstep_tries = 100;
477 int error;
478 int i, cpu;
479 int trace_on = 0;
480acquirelock:
481 /*
482 * Interrupts will be restored by the 'trap return' code, except when
483 * single stepping.
484 */
485 local_irq_save(flags);
486
487 cpu = ks->cpu;
488 kgdb_info[cpu].debuggerinfo = regs;
489 kgdb_info[cpu].task = current;
490 kgdb_info[cpu].ret_state = 0;
491 kgdb_info[cpu].irq_depth = hardirq_count() >> HARDIRQ_SHIFT;
492 /*
493 * Make sure the above info reaches the primary CPU before
494 * our cpu_in_kgdb[] flag setting does:
495 */
496 atomic_inc(&cpu_in_kgdb[cpu]);
497
498 if (exception_level == 1)
499 goto cpu_master_loop;
500
501 /*
502 * CPU will loop if it is a slave or request to become a kgdb
503 * master cpu and acquire the kgdb_active lock:
504 */
505 while (1) {
506cpu_loop:
507 if (kgdb_info[cpu].exception_state & DCPU_NEXT_MASTER) {
508 kgdb_info[cpu].exception_state &= ~DCPU_NEXT_MASTER;
509 goto cpu_master_loop;
510 } else if (kgdb_info[cpu].exception_state & DCPU_WANT_MASTER) {
511 if (atomic_cmpxchg(&kgdb_active, -1, cpu) == cpu)
512 break;
513 } else if (kgdb_info[cpu].exception_state & DCPU_IS_SLAVE) {
514 if (!atomic_read(&passive_cpu_wait[cpu]))
515 goto return_normal;
516 } else {
517return_normal:
518 /* Return to normal operation by executing any
519 * hw breakpoint fixup.
520 */
521 if (arch_kgdb_ops.correct_hw_break)
522 arch_kgdb_ops.correct_hw_break();
523 if (trace_on)
524 tracing_on();
525 atomic_dec(&cpu_in_kgdb[cpu]);
526 touch_softlockup_watchdog_sync();
527 clocksource_touch_watchdog();
528 local_irq_restore(flags);
529 return 0;
530 }
531 cpu_relax();
532 }
533
534 /*
535 * For single stepping, try to only enter on the processor
536 * that was single stepping. To gaurd against a deadlock, the
537 * kernel will only try for the value of sstep_tries before
538 * giving up and continuing on.
539 */
540 if (atomic_read(&kgdb_cpu_doing_single_step) != -1 &&
541 (kgdb_info[cpu].task &&
542 kgdb_info[cpu].task->pid != kgdb_sstep_pid) && --sstep_tries) {
543 atomic_set(&kgdb_active, -1);
544 touch_softlockup_watchdog_sync();
545 clocksource_touch_watchdog();
546 local_irq_restore(flags);
547
548 goto acquirelock;
549 }
550
551 if (!kgdb_io_ready(1)) {
552 kgdb_info[cpu].ret_state = 1;
553 goto kgdb_restore; /* No I/O connection, resume the system */
554 }
555
556 /*
557 * Don't enter if we have hit a removed breakpoint.
558 */
559 if (kgdb_skipexception(ks->ex_vector, ks->linux_regs))
560 goto kgdb_restore;
561
562 /* Call the I/O driver's pre_exception routine */
563 if (dbg_io_ops->pre_exception)
564 dbg_io_ops->pre_exception();
565
566 kgdb_disable_hw_debug(ks->linux_regs);
567
568 /*
569 * Get the passive CPU lock which will hold all the non-primary
570 * CPU in a spin state while the debugger is active
571 */
572 if (!kgdb_single_step) {
573 for (i = 0; i < NR_CPUS; i++)
574 atomic_inc(&passive_cpu_wait[i]);
575 }
576
577#ifdef CONFIG_SMP
578 /* Signal the other CPUs to enter kgdb_wait() */
579 if ((!kgdb_single_step) && kgdb_do_roundup)
580 kgdb_roundup_cpus(flags);
581#endif
582
583 /*
584 * Wait for the other CPUs to be notified and be waiting for us:
585 */
586 for_each_online_cpu(i) {
587 while (kgdb_do_roundup && !atomic_read(&cpu_in_kgdb[i]))
588 cpu_relax();
589 }
590
591 /*
592 * At this point the primary processor is completely
593 * in the debugger and all secondary CPUs are quiescent
594 */
595 dbg_deactivate_sw_breakpoints();
596 kgdb_single_step = 0;
597 kgdb_contthread = current;
598 exception_level = 0;
599 trace_on = tracing_is_on();
600 if (trace_on)
601 tracing_off();
602
603 while (1) {
604cpu_master_loop:
605 if (dbg_kdb_mode) {
606 kgdb_connected = 1;
607 error = kdb_stub(ks);
608 } else {
609 error = gdb_serial_stub(ks);
610 }
611
612 if (error == DBG_PASS_EVENT) {
613 dbg_kdb_mode = !dbg_kdb_mode;
614 kgdb_connected = 0;
615 } else if (error == DBG_SWITCH_CPU_EVENT) {
616 dbg_cpu_switch(cpu, dbg_switch_cpu);
617 goto cpu_loop;
618 } else {
619 kgdb_info[cpu].ret_state = error;
620 break;
621 }
622 }
623
624 /* Call the I/O driver's post_exception routine */
625 if (dbg_io_ops->post_exception)
626 dbg_io_ops->post_exception();
627
628 atomic_dec(&cpu_in_kgdb[ks->cpu]);
629
630 if (!kgdb_single_step) {
631 for (i = NR_CPUS-1; i >= 0; i--)
632 atomic_dec(&passive_cpu_wait[i]);
633 /*
634 * Wait till all the CPUs have quit from the debugger,
635 * but allow a CPU that hit an exception and is
636 * waiting to become the master to remain in the debug
637 * core.
638 */
639 for_each_online_cpu(i) {
640 while (kgdb_do_roundup &&
641 atomic_read(&cpu_in_kgdb[i]) &&
642 !(kgdb_info[i].exception_state &
643 DCPU_WANT_MASTER))
644 cpu_relax();
645 }
646 }
647
648kgdb_restore:
649 if (atomic_read(&kgdb_cpu_doing_single_step) != -1) {
650 int sstep_cpu = atomic_read(&kgdb_cpu_doing_single_step);
651 if (kgdb_info[sstep_cpu].task)
652 kgdb_sstep_pid = kgdb_info[sstep_cpu].task->pid;
653 else
654 kgdb_sstep_pid = 0;
655 }
656 if (trace_on)
657 tracing_on();
658 /* Free kgdb_active */
659 atomic_set(&kgdb_active, -1);
660 touch_softlockup_watchdog_sync();
661 clocksource_touch_watchdog();
662 local_irq_restore(flags);
663
664 return kgdb_info[cpu].ret_state;
665}
666
667/*
668 * kgdb_handle_exception() - main entry point from a kernel exception
669 *
670 * Locking hierarchy:
671 * interface locks, if any (begin_session)
672 * kgdb lock (kgdb_active)
673 */
674int
675kgdb_handle_exception(int evector, int signo, int ecode, struct pt_regs *regs)
676{
677 struct kgdb_state kgdb_var;
678 struct kgdb_state *ks = &kgdb_var;
679 int ret;
680
681 ks->cpu = raw_smp_processor_id();
682 ks->ex_vector = evector;
683 ks->signo = signo;
684 ks->err_code = ecode;
685 ks->kgdb_usethreadid = 0;
686 ks->linux_regs = regs;
687
688 if (kgdb_reenter_check(ks))
689 return 0; /* Ouch, double exception ! */
690 kgdb_info[ks->cpu].exception_state |= DCPU_WANT_MASTER;
691 ret = kgdb_cpu_enter(ks, regs);
692 kgdb_info[ks->cpu].exception_state &= ~(DCPU_WANT_MASTER |
693 DCPU_IS_SLAVE);
694 return ret;
695}
696
697int kgdb_nmicallback(int cpu, void *regs)
698{
699#ifdef CONFIG_SMP
700 struct kgdb_state kgdb_var;
701 struct kgdb_state *ks = &kgdb_var;
702
703 memset(ks, 0, sizeof(struct kgdb_state));
704 ks->cpu = cpu;
705 ks->linux_regs = regs;
706
707 if (!atomic_read(&cpu_in_kgdb[cpu]) &&
708 atomic_read(&kgdb_active) != -1 &&
709 atomic_read(&kgdb_active) != cpu) {
710 kgdb_info[cpu].exception_state |= DCPU_IS_SLAVE;
711 kgdb_cpu_enter(ks, regs);
712 kgdb_info[cpu].exception_state &= ~DCPU_IS_SLAVE;
713 return 0;
714 }
715#endif
716 return 1;
717}
718
719static void kgdb_console_write(struct console *co, const char *s,
720 unsigned count)
721{
722 unsigned long flags;
723
724 /* If we're debugging, or KGDB has not connected, don't try
725 * and print. */
726 if (!kgdb_connected || atomic_read(&kgdb_active) != -1 || dbg_kdb_mode)
727 return;
728
729 local_irq_save(flags);
730 gdbstub_msg_write(s, count);
731 local_irq_restore(flags);
732}
733
734static struct console kgdbcons = {
735 .name = "kgdb",
736 .write = kgdb_console_write,
737 .flags = CON_PRINTBUFFER | CON_ENABLED,
738 .index = -1,
739};
740
741#ifdef CONFIG_MAGIC_SYSRQ
742static void sysrq_handle_dbg(int key, struct tty_struct *tty)
743{
744 if (!dbg_io_ops) {
745 printk(KERN_CRIT "ERROR: No KGDB I/O module available\n");
746 return;
747 }
748 if (!kgdb_connected) {
749#ifdef CONFIG_KGDB_KDB
750 if (!dbg_kdb_mode)
751 printk(KERN_CRIT "KGDB or $3#33 for KDB\n");
752#else
753 printk(KERN_CRIT "Entering KGDB\n");
754#endif
755 }
756
757 kgdb_breakpoint();
758}
759
760static struct sysrq_key_op sysrq_dbg_op = {
761 .handler = sysrq_handle_dbg,
762 .help_msg = "debug(G)",
763 .action_msg = "DEBUG",
764};
765#endif
766
767static int kgdb_panic_event(struct notifier_block *self,
768 unsigned long val,
769 void *data)
770{
771 if (dbg_kdb_mode)
772 kdb_printf("PANIC: %s\n", (char *)data);
773 kgdb_breakpoint();
774 return NOTIFY_DONE;
775}
776
777static struct notifier_block kgdb_panic_event_nb = {
778 .notifier_call = kgdb_panic_event,
779 .priority = INT_MAX,
780};
781
782void __weak kgdb_arch_late(void)
783{
784}
785
786void __init dbg_late_init(void)
787{
788 dbg_is_early = false;
789 if (kgdb_io_module_registered)
790 kgdb_arch_late();
791 kdb_init(KDB_INIT_FULL);
792}
793
794static void kgdb_register_callbacks(void)
795{
796 if (!kgdb_io_module_registered) {
797 kgdb_io_module_registered = 1;
798 kgdb_arch_init();
799 if (!dbg_is_early)
800 kgdb_arch_late();
801 atomic_notifier_chain_register(&panic_notifier_list,
802 &kgdb_panic_event_nb);
803#ifdef CONFIG_MAGIC_SYSRQ
804 register_sysrq_key('g', &sysrq_dbg_op);
805#endif
806 if (kgdb_use_con && !kgdb_con_registered) {
807 register_console(&kgdbcons);
808 kgdb_con_registered = 1;
809 }
810 }
811}
812
813static void kgdb_unregister_callbacks(void)
814{
815 /*
816 * When this routine is called KGDB should unregister from the
817 * panic handler and clean up, making sure it is not handling any
818 * break exceptions at the time.
819 */
820 if (kgdb_io_module_registered) {
821 kgdb_io_module_registered = 0;
822 atomic_notifier_chain_unregister(&panic_notifier_list,
823 &kgdb_panic_event_nb);
824 kgdb_arch_exit();
825#ifdef CONFIG_MAGIC_SYSRQ
826 unregister_sysrq_key('g', &sysrq_dbg_op);
827#endif
828 if (kgdb_con_registered) {
829 unregister_console(&kgdbcons);
830 kgdb_con_registered = 0;
831 }
832 }
833}
834
835/*
836 * There are times a tasklet needs to be used vs a compiled in
837 * break point so as to cause an exception outside a kgdb I/O module,
838 * such as is the case with kgdboe, where calling a breakpoint in the
839 * I/O driver itself would be fatal.
840 */
841static void kgdb_tasklet_bpt(unsigned long ing)
842{
843 kgdb_breakpoint();
844 atomic_set(&kgdb_break_tasklet_var, 0);
845}
846
847static DECLARE_TASKLET(kgdb_tasklet_breakpoint, kgdb_tasklet_bpt, 0);
848
849void kgdb_schedule_breakpoint(void)
850{
851 if (atomic_read(&kgdb_break_tasklet_var) ||
852 atomic_read(&kgdb_active) != -1 ||
853 atomic_read(&kgdb_setting_breakpoint))
854 return;
855 atomic_inc(&kgdb_break_tasklet_var);
856 tasklet_schedule(&kgdb_tasklet_breakpoint);
857}
858EXPORT_SYMBOL_GPL(kgdb_schedule_breakpoint);
859
860static void kgdb_initial_breakpoint(void)
861{
862 kgdb_break_asap = 0;
863
864 printk(KERN_CRIT "kgdb: Waiting for connection from remote gdb...\n");
865 kgdb_breakpoint();
866}
867
868/**
869 * kgdb_register_io_module - register KGDB IO module
870 * @new_dbg_io_ops: the io ops vector
871 *
872 * Register it with the KGDB core.
873 */
874int kgdb_register_io_module(struct kgdb_io *new_dbg_io_ops)
875{
876 int err;
877
878 spin_lock(&kgdb_registration_lock);
879
880 if (dbg_io_ops) {
881 spin_unlock(&kgdb_registration_lock);
882
883 printk(KERN_ERR "kgdb: Another I/O driver is already "
884 "registered with KGDB.\n");
885 return -EBUSY;
886 }
887
888 if (new_dbg_io_ops->init) {
889 err = new_dbg_io_ops->init();
890 if (err) {
891 spin_unlock(&kgdb_registration_lock);
892 return err;
893 }
894 }
895
896 dbg_io_ops = new_dbg_io_ops;
897
898 spin_unlock(&kgdb_registration_lock);
899
900 printk(KERN_INFO "kgdb: Registered I/O driver %s.\n",
901 new_dbg_io_ops->name);
902
903 /* Arm KGDB now. */
904 kgdb_register_callbacks();
905
906 if (kgdb_break_asap)
907 kgdb_initial_breakpoint();
908
909 return 0;
910}
911EXPORT_SYMBOL_GPL(kgdb_register_io_module);
912
913/**
914 * kkgdb_unregister_io_module - unregister KGDB IO module
915 * @old_dbg_io_ops: the io ops vector
916 *
917 * Unregister it with the KGDB core.
918 */
919void kgdb_unregister_io_module(struct kgdb_io *old_dbg_io_ops)
920{
921 BUG_ON(kgdb_connected);
922
923 /*
924 * KGDB is no longer able to communicate out, so
925 * unregister our callbacks and reset state.
926 */
927 kgdb_unregister_callbacks();
928
929 spin_lock(&kgdb_registration_lock);
930
931 WARN_ON_ONCE(dbg_io_ops != old_dbg_io_ops);
932 dbg_io_ops = NULL;
933
934 spin_unlock(&kgdb_registration_lock);
935
936 printk(KERN_INFO
937 "kgdb: Unregistered I/O driver %s, debugger disabled.\n",
938 old_dbg_io_ops->name);
939}
940EXPORT_SYMBOL_GPL(kgdb_unregister_io_module);
941
942int dbg_io_get_char(void)
943{
944 int ret = dbg_io_ops->read_char();
945 if (ret == NO_POLL_CHAR)
946 return -1;
947 if (!dbg_kdb_mode)
948 return ret;
949 if (ret == 127)
950 return 8;
951 return ret;
952}
953
954/**
955 * kgdb_breakpoint - generate breakpoint exception
956 *
957 * This function will generate a breakpoint exception. It is used at the
958 * beginning of a program to sync up with a debugger and can be used
959 * otherwise as a quick means to stop program execution and "break" into
960 * the debugger.
961 */
962void kgdb_breakpoint(void)
963{
964 atomic_inc(&kgdb_setting_breakpoint);
965 wmb(); /* Sync point before breakpoint */
966 arch_kgdb_breakpoint();
967 wmb(); /* Sync point after breakpoint */
968 atomic_dec(&kgdb_setting_breakpoint);
969}
970EXPORT_SYMBOL_GPL(kgdb_breakpoint);
971
972static int __init opt_kgdb_wait(char *str)
973{
974 kgdb_break_asap = 1;
975
976 kdb_init(KDB_INIT_EARLY);
977 if (kgdb_io_module_registered)
978 kgdb_initial_breakpoint();
979
980 return 0;
981}
982
983early_param("kgdbwait", opt_kgdb_wait);
diff --git a/kernel/debug/debug_core.h b/kernel/debug/debug_core.h
new file mode 100644
index 000000000000..c5d753d80f67
--- /dev/null
+++ b/kernel/debug/debug_core.h
@@ -0,0 +1,81 @@
1/*
2 * Created by: Jason Wessel <jason.wessel@windriver.com>
3 *
4 * Copyright (c) 2009 Wind River Systems, Inc. All Rights Reserved.
5 *
6 * This file is licensed under the terms of the GNU General Public
7 * License version 2. This program is licensed "as is" without any
8 * warranty of any kind, whether express or implied.
9 */
10
11#ifndef _DEBUG_CORE_H_
12#define _DEBUG_CORE_H_
13/*
14 * These are the private implementation headers between the kernel
15 * debugger core and the debugger front end code.
16 */
17
18/* kernel debug core data structures */
19struct kgdb_state {
20 int ex_vector;
21 int signo;
22 int err_code;
23 int cpu;
24 int pass_exception;
25 unsigned long thr_query;
26 unsigned long threadid;
27 long kgdb_usethreadid;
28 struct pt_regs *linux_regs;
29};
30
31/* Exception state values */
32#define DCPU_WANT_MASTER 0x1 /* Waiting to become a master kgdb cpu */
33#define DCPU_NEXT_MASTER 0x2 /* Transition from one master cpu to another */
34#define DCPU_IS_SLAVE 0x4 /* Slave cpu enter exception */
35#define DCPU_SSTEP 0x8 /* CPU is single stepping */
36
37struct debuggerinfo_struct {
38 void *debuggerinfo;
39 struct task_struct *task;
40 int exception_state;
41 int ret_state;
42 int irq_depth;
43};
44
45extern struct debuggerinfo_struct kgdb_info[];
46
47/* kernel debug core break point routines */
48extern int dbg_remove_all_break(void);
49extern int dbg_set_sw_break(unsigned long addr);
50extern int dbg_remove_sw_break(unsigned long addr);
51extern int dbg_activate_sw_breakpoints(void);
52extern int dbg_deactivate_sw_breakpoints(void);
53
54/* polled character access to i/o module */
55extern int dbg_io_get_char(void);
56
57/* stub return value for switching between the gdbstub and kdb */
58#define DBG_PASS_EVENT -12345
59/* Switch from one cpu to another */
60#define DBG_SWITCH_CPU_EVENT -123456
61extern int dbg_switch_cpu;
62
63/* gdbstub interface functions */
64extern int gdb_serial_stub(struct kgdb_state *ks);
65extern void gdbstub_msg_write(const char *s, int len);
66
67/* gdbstub functions used for kdb <-> gdbstub transition */
68extern int gdbstub_state(struct kgdb_state *ks, char *cmd);
69extern int dbg_kdb_mode;
70
71#ifdef CONFIG_KGDB_KDB
72extern int kdb_stub(struct kgdb_state *ks);
73extern int kdb_parse(const char *cmdstr);
74#else /* ! CONFIG_KGDB_KDB */
75static inline int kdb_stub(struct kgdb_state *ks)
76{
77 return DBG_PASS_EVENT;
78}
79#endif /* CONFIG_KGDB_KDB */
80
81#endif /* _DEBUG_CORE_H_ */
diff --git a/kernel/debug/gdbstub.c b/kernel/debug/gdbstub.c
new file mode 100644
index 000000000000..4b17b3269525
--- /dev/null
+++ b/kernel/debug/gdbstub.c
@@ -0,0 +1,1017 @@
1/*
2 * Kernel Debug Core
3 *
4 * Maintainer: Jason Wessel <jason.wessel@windriver.com>
5 *
6 * Copyright (C) 2000-2001 VERITAS Software Corporation.
7 * Copyright (C) 2002-2004 Timesys Corporation
8 * Copyright (C) 2003-2004 Amit S. Kale <amitkale@linsyssoft.com>
9 * Copyright (C) 2004 Pavel Machek <pavel@suse.cz>
10 * Copyright (C) 2004-2006 Tom Rini <trini@kernel.crashing.org>
11 * Copyright (C) 2004-2006 LinSysSoft Technologies Pvt. Ltd.
12 * Copyright (C) 2005-2009 Wind River Systems, Inc.
13 * Copyright (C) 2007 MontaVista Software, Inc.
14 * Copyright (C) 2008 Red Hat, Inc., Ingo Molnar <mingo@redhat.com>
15 *
16 * Contributors at various stages not listed above:
17 * Jason Wessel ( jason.wessel@windriver.com )
18 * George Anzinger <george@mvista.com>
19 * Anurekh Saxena (anurekh.saxena@timesys.com)
20 * Lake Stevens Instrument Division (Glenn Engel)
21 * Jim Kingdon, Cygnus Support.
22 *
23 * Original KGDB stub: David Grothe <dave@gcom.com>,
24 * Tigran Aivazian <tigran@sco.com>
25 *
26 * This file is licensed under the terms of the GNU General Public License
27 * version 2. This program is licensed "as is" without any warranty of any
28 * kind, whether express or implied.
29 */
30
31#include <linux/kernel.h>
32#include <linux/kgdb.h>
33#include <linux/kdb.h>
34#include <linux/reboot.h>
35#include <linux/uaccess.h>
36#include <asm/cacheflush.h>
37#include <asm/unaligned.h>
38#include "debug_core.h"
39
40#define KGDB_MAX_THREAD_QUERY 17
41
42/* Our I/O buffers. */
43static char remcom_in_buffer[BUFMAX];
44static char remcom_out_buffer[BUFMAX];
45
46/* Storage for the registers, in GDB format. */
47static unsigned long gdb_regs[(NUMREGBYTES +
48 sizeof(unsigned long) - 1) /
49 sizeof(unsigned long)];
50
51/*
52 * GDB remote protocol parser:
53 */
54
55static int hex(char ch)
56{
57 if ((ch >= 'a') && (ch <= 'f'))
58 return ch - 'a' + 10;
59 if ((ch >= '0') && (ch <= '9'))
60 return ch - '0';
61 if ((ch >= 'A') && (ch <= 'F'))
62 return ch - 'A' + 10;
63 return -1;
64}
65
66#ifdef CONFIG_KGDB_KDB
67static int gdbstub_read_wait(void)
68{
69 int ret = -1;
70 int i;
71
72 /* poll any additional I/O interfaces that are defined */
73 while (ret < 0)
74 for (i = 0; kdb_poll_funcs[i] != NULL; i++) {
75 ret = kdb_poll_funcs[i]();
76 if (ret > 0)
77 break;
78 }
79 return ret;
80}
81#else
82static int gdbstub_read_wait(void)
83{
84 int ret = dbg_io_ops->read_char();
85 while (ret == NO_POLL_CHAR)
86 ret = dbg_io_ops->read_char();
87 return ret;
88}
89#endif
90/* scan for the sequence $<data>#<checksum> */
91static void get_packet(char *buffer)
92{
93 unsigned char checksum;
94 unsigned char xmitcsum;
95 int count;
96 char ch;
97
98 do {
99 /*
100 * Spin and wait around for the start character, ignore all
101 * other characters:
102 */
103 while ((ch = (gdbstub_read_wait())) != '$')
104 /* nothing */;
105
106 kgdb_connected = 1;
107 checksum = 0;
108 xmitcsum = -1;
109
110 count = 0;
111
112 /*
113 * now, read until a # or end of buffer is found:
114 */
115 while (count < (BUFMAX - 1)) {
116 ch = gdbstub_read_wait();
117 if (ch == '#')
118 break;
119 checksum = checksum + ch;
120 buffer[count] = ch;
121 count = count + 1;
122 }
123 buffer[count] = 0;
124
125 if (ch == '#') {
126 xmitcsum = hex(gdbstub_read_wait()) << 4;
127 xmitcsum += hex(gdbstub_read_wait());
128
129 if (checksum != xmitcsum)
130 /* failed checksum */
131 dbg_io_ops->write_char('-');
132 else
133 /* successful transfer */
134 dbg_io_ops->write_char('+');
135 if (dbg_io_ops->flush)
136 dbg_io_ops->flush();
137 }
138 } while (checksum != xmitcsum);
139}
140
141/*
142 * Send the packet in buffer.
143 * Check for gdb connection if asked for.
144 */
145static void put_packet(char *buffer)
146{
147 unsigned char checksum;
148 int count;
149 char ch;
150
151 /*
152 * $<packet info>#<checksum>.
153 */
154 while (1) {
155 dbg_io_ops->write_char('$');
156 checksum = 0;
157 count = 0;
158
159 while ((ch = buffer[count])) {
160 dbg_io_ops->write_char(ch);
161 checksum += ch;
162 count++;
163 }
164
165 dbg_io_ops->write_char('#');
166 dbg_io_ops->write_char(hex_asc_hi(checksum));
167 dbg_io_ops->write_char(hex_asc_lo(checksum));
168 if (dbg_io_ops->flush)
169 dbg_io_ops->flush();
170
171 /* Now see what we get in reply. */
172 ch = gdbstub_read_wait();
173
174 if (ch == 3)
175 ch = gdbstub_read_wait();
176
177 /* If we get an ACK, we are done. */
178 if (ch == '+')
179 return;
180
181 /*
182 * If we get the start of another packet, this means
183 * that GDB is attempting to reconnect. We will NAK
184 * the packet being sent, and stop trying to send this
185 * packet.
186 */
187 if (ch == '$') {
188 dbg_io_ops->write_char('-');
189 if (dbg_io_ops->flush)
190 dbg_io_ops->flush();
191 return;
192 }
193 }
194}
195
196static char gdbmsgbuf[BUFMAX + 1];
197
198void gdbstub_msg_write(const char *s, int len)
199{
200 char *bufptr;
201 int wcount;
202 int i;
203
204 if (len == 0)
205 len = strlen(s);
206
207 /* 'O'utput */
208 gdbmsgbuf[0] = 'O';
209
210 /* Fill and send buffers... */
211 while (len > 0) {
212 bufptr = gdbmsgbuf + 1;
213
214 /* Calculate how many this time */
215 if ((len << 1) > (BUFMAX - 2))
216 wcount = (BUFMAX - 2) >> 1;
217 else
218 wcount = len;
219
220 /* Pack in hex chars */
221 for (i = 0; i < wcount; i++)
222 bufptr = pack_hex_byte(bufptr, s[i]);
223 *bufptr = '\0';
224
225 /* Move up */
226 s += wcount;
227 len -= wcount;
228
229 /* Write packet */
230 put_packet(gdbmsgbuf);
231 }
232}
233
234/*
235 * Convert the memory pointed to by mem into hex, placing result in
236 * buf. Return a pointer to the last char put in buf (null). May
237 * return an error.
238 */
239int kgdb_mem2hex(char *mem, char *buf, int count)
240{
241 char *tmp;
242 int err;
243
244 /*
245 * We use the upper half of buf as an intermediate buffer for the
246 * raw memory copy. Hex conversion will work against this one.
247 */
248 tmp = buf + count;
249
250 err = probe_kernel_read(tmp, mem, count);
251 if (!err) {
252 while (count > 0) {
253 buf = pack_hex_byte(buf, *tmp);
254 tmp++;
255 count--;
256 }
257
258 *buf = 0;
259 }
260
261 return err;
262}
263
264/*
265 * Convert the hex array pointed to by buf into binary to be placed in
266 * mem. Return a pointer to the character AFTER the last byte
267 * written. May return an error.
268 */
269int kgdb_hex2mem(char *buf, char *mem, int count)
270{
271 char *tmp_raw;
272 char *tmp_hex;
273
274 /*
275 * We use the upper half of buf as an intermediate buffer for the
276 * raw memory that is converted from hex.
277 */
278 tmp_raw = buf + count * 2;
279
280 tmp_hex = tmp_raw - 1;
281 while (tmp_hex >= buf) {
282 tmp_raw--;
283 *tmp_raw = hex(*tmp_hex--);
284 *tmp_raw |= hex(*tmp_hex--) << 4;
285 }
286
287 return probe_kernel_write(mem, tmp_raw, count);
288}
289
290/*
291 * While we find nice hex chars, build a long_val.
292 * Return number of chars processed.
293 */
294int kgdb_hex2long(char **ptr, unsigned long *long_val)
295{
296 int hex_val;
297 int num = 0;
298 int negate = 0;
299
300 *long_val = 0;
301
302 if (**ptr == '-') {
303 negate = 1;
304 (*ptr)++;
305 }
306 while (**ptr) {
307 hex_val = hex(**ptr);
308 if (hex_val < 0)
309 break;
310
311 *long_val = (*long_val << 4) | hex_val;
312 num++;
313 (*ptr)++;
314 }
315
316 if (negate)
317 *long_val = -*long_val;
318
319 return num;
320}
321
322/*
323 * Copy the binary array pointed to by buf into mem. Fix $, #, and
324 * 0x7d escaped with 0x7d. Return -EFAULT on failure or 0 on success.
325 * The input buf is overwitten with the result to write to mem.
326 */
327static int kgdb_ebin2mem(char *buf, char *mem, int count)
328{
329 int size = 0;
330 char *c = buf;
331
332 while (count-- > 0) {
333 c[size] = *buf++;
334 if (c[size] == 0x7d)
335 c[size] = *buf++ ^ 0x20;
336 size++;
337 }
338
339 return probe_kernel_write(mem, c, size);
340}
341
342/* Write memory due to an 'M' or 'X' packet. */
343static int write_mem_msg(int binary)
344{
345 char *ptr = &remcom_in_buffer[1];
346 unsigned long addr;
347 unsigned long length;
348 int err;
349
350 if (kgdb_hex2long(&ptr, &addr) > 0 && *(ptr++) == ',' &&
351 kgdb_hex2long(&ptr, &length) > 0 && *(ptr++) == ':') {
352 if (binary)
353 err = kgdb_ebin2mem(ptr, (char *)addr, length);
354 else
355 err = kgdb_hex2mem(ptr, (char *)addr, length);
356 if (err)
357 return err;
358 if (CACHE_FLUSH_IS_SAFE)
359 flush_icache_range(addr, addr + length);
360 return 0;
361 }
362
363 return -EINVAL;
364}
365
366static void error_packet(char *pkt, int error)
367{
368 error = -error;
369 pkt[0] = 'E';
370 pkt[1] = hex_asc[(error / 10)];
371 pkt[2] = hex_asc[(error % 10)];
372 pkt[3] = '\0';
373}
374
375/*
376 * Thread ID accessors. We represent a flat TID space to GDB, where
377 * the per CPU idle threads (which under Linux all have PID 0) are
378 * remapped to negative TIDs.
379 */
380
381#define BUF_THREAD_ID_SIZE 16
382
383static char *pack_threadid(char *pkt, unsigned char *id)
384{
385 char *limit;
386
387 limit = pkt + BUF_THREAD_ID_SIZE;
388 while (pkt < limit)
389 pkt = pack_hex_byte(pkt, *id++);
390
391 return pkt;
392}
393
394static void int_to_threadref(unsigned char *id, int value)
395{
396 unsigned char *scan;
397 int i = 4;
398
399 scan = (unsigned char *)id;
400 while (i--)
401 *scan++ = 0;
402 put_unaligned_be32(value, scan);
403}
404
405static struct task_struct *getthread(struct pt_regs *regs, int tid)
406{
407 /*
408 * Non-positive TIDs are remapped to the cpu shadow information
409 */
410 if (tid == 0 || tid == -1)
411 tid = -atomic_read(&kgdb_active) - 2;
412 if (tid < -1 && tid > -NR_CPUS - 2) {
413 if (kgdb_info[-tid - 2].task)
414 return kgdb_info[-tid - 2].task;
415 else
416 return idle_task(-tid - 2);
417 }
418 if (tid <= 0) {
419 printk(KERN_ERR "KGDB: Internal thread select error\n");
420 dump_stack();
421 return NULL;
422 }
423
424 /*
425 * find_task_by_pid_ns() does not take the tasklist lock anymore
426 * but is nicely RCU locked - hence is a pretty resilient
427 * thing to use:
428 */
429 return find_task_by_pid_ns(tid, &init_pid_ns);
430}
431
432
433/*
434 * Remap normal tasks to their real PID,
435 * CPU shadow threads are mapped to -CPU - 2
436 */
437static inline int shadow_pid(int realpid)
438{
439 if (realpid)
440 return realpid;
441
442 return -raw_smp_processor_id() - 2;
443}
444
445/*
446 * All the functions that start with gdb_cmd are the various
447 * operations to implement the handlers for the gdbserial protocol
448 * where KGDB is communicating with an external debugger
449 */
450
451/* Handle the '?' status packets */
452static void gdb_cmd_status(struct kgdb_state *ks)
453{
454 /*
455 * We know that this packet is only sent
456 * during initial connect. So to be safe,
457 * we clear out our breakpoints now in case
458 * GDB is reconnecting.
459 */
460 dbg_remove_all_break();
461
462 remcom_out_buffer[0] = 'S';
463 pack_hex_byte(&remcom_out_buffer[1], ks->signo);
464}
465
466/* Handle the 'g' get registers request */
467static void gdb_cmd_getregs(struct kgdb_state *ks)
468{
469 struct task_struct *thread;
470 void *local_debuggerinfo;
471 int i;
472
473 thread = kgdb_usethread;
474 if (!thread) {
475 thread = kgdb_info[ks->cpu].task;
476 local_debuggerinfo = kgdb_info[ks->cpu].debuggerinfo;
477 } else {
478 local_debuggerinfo = NULL;
479 for_each_online_cpu(i) {
480 /*
481 * Try to find the task on some other
482 * or possibly this node if we do not
483 * find the matching task then we try
484 * to approximate the results.
485 */
486 if (thread == kgdb_info[i].task)
487 local_debuggerinfo = kgdb_info[i].debuggerinfo;
488 }
489 }
490
491 /*
492 * All threads that don't have debuggerinfo should be
493 * in schedule() sleeping, since all other CPUs
494 * are in kgdb_wait, and thus have debuggerinfo.
495 */
496 if (local_debuggerinfo) {
497 pt_regs_to_gdb_regs(gdb_regs, local_debuggerinfo);
498 } else {
499 /*
500 * Pull stuff saved during switch_to; nothing
501 * else is accessible (or even particularly
502 * relevant).
503 *
504 * This should be enough for a stack trace.
505 */
506 sleeping_thread_to_gdb_regs(gdb_regs, thread);
507 }
508 kgdb_mem2hex((char *)gdb_regs, remcom_out_buffer, NUMREGBYTES);
509}
510
511/* Handle the 'G' set registers request */
512static void gdb_cmd_setregs(struct kgdb_state *ks)
513{
514 kgdb_hex2mem(&remcom_in_buffer[1], (char *)gdb_regs, NUMREGBYTES);
515
516 if (kgdb_usethread && kgdb_usethread != current) {
517 error_packet(remcom_out_buffer, -EINVAL);
518 } else {
519 gdb_regs_to_pt_regs(gdb_regs, ks->linux_regs);
520 strcpy(remcom_out_buffer, "OK");
521 }
522}
523
524/* Handle the 'm' memory read bytes */
525static void gdb_cmd_memread(struct kgdb_state *ks)
526{
527 char *ptr = &remcom_in_buffer[1];
528 unsigned long length;
529 unsigned long addr;
530 int err;
531
532 if (kgdb_hex2long(&ptr, &addr) > 0 && *ptr++ == ',' &&
533 kgdb_hex2long(&ptr, &length) > 0) {
534 err = kgdb_mem2hex((char *)addr, remcom_out_buffer, length);
535 if (err)
536 error_packet(remcom_out_buffer, err);
537 } else {
538 error_packet(remcom_out_buffer, -EINVAL);
539 }
540}
541
542/* Handle the 'M' memory write bytes */
543static void gdb_cmd_memwrite(struct kgdb_state *ks)
544{
545 int err = write_mem_msg(0);
546
547 if (err)
548 error_packet(remcom_out_buffer, err);
549 else
550 strcpy(remcom_out_buffer, "OK");
551}
552
553/* Handle the 'X' memory binary write bytes */
554static void gdb_cmd_binwrite(struct kgdb_state *ks)
555{
556 int err = write_mem_msg(1);
557
558 if (err)
559 error_packet(remcom_out_buffer, err);
560 else
561 strcpy(remcom_out_buffer, "OK");
562}
563
564/* Handle the 'D' or 'k', detach or kill packets */
565static void gdb_cmd_detachkill(struct kgdb_state *ks)
566{
567 int error;
568
569 /* The detach case */
570 if (remcom_in_buffer[0] == 'D') {
571 error = dbg_remove_all_break();
572 if (error < 0) {
573 error_packet(remcom_out_buffer, error);
574 } else {
575 strcpy(remcom_out_buffer, "OK");
576 kgdb_connected = 0;
577 }
578 put_packet(remcom_out_buffer);
579 } else {
580 /*
581 * Assume the kill case, with no exit code checking,
582 * trying to force detach the debugger:
583 */
584 dbg_remove_all_break();
585 kgdb_connected = 0;
586 }
587}
588
589/* Handle the 'R' reboot packets */
590static int gdb_cmd_reboot(struct kgdb_state *ks)
591{
592 /* For now, only honor R0 */
593 if (strcmp(remcom_in_buffer, "R0") == 0) {
594 printk(KERN_CRIT "Executing emergency reboot\n");
595 strcpy(remcom_out_buffer, "OK");
596 put_packet(remcom_out_buffer);
597
598 /*
599 * Execution should not return from
600 * machine_emergency_restart()
601 */
602 machine_emergency_restart();
603 kgdb_connected = 0;
604
605 return 1;
606 }
607 return 0;
608}
609
610/* Handle the 'q' query packets */
611static void gdb_cmd_query(struct kgdb_state *ks)
612{
613 struct task_struct *g;
614 struct task_struct *p;
615 unsigned char thref[8];
616 char *ptr;
617 int i;
618 int cpu;
619 int finished = 0;
620
621 switch (remcom_in_buffer[1]) {
622 case 's':
623 case 'f':
624 if (memcmp(remcom_in_buffer + 2, "ThreadInfo", 10)) {
625 error_packet(remcom_out_buffer, -EINVAL);
626 break;
627 }
628
629 i = 0;
630 remcom_out_buffer[0] = 'm';
631 ptr = remcom_out_buffer + 1;
632 if (remcom_in_buffer[1] == 'f') {
633 /* Each cpu is a shadow thread */
634 for_each_online_cpu(cpu) {
635 ks->thr_query = 0;
636 int_to_threadref(thref, -cpu - 2);
637 pack_threadid(ptr, thref);
638 ptr += BUF_THREAD_ID_SIZE;
639 *(ptr++) = ',';
640 i++;
641 }
642 }
643
644 do_each_thread(g, p) {
645 if (i >= ks->thr_query && !finished) {
646 int_to_threadref(thref, p->pid);
647 pack_threadid(ptr, thref);
648 ptr += BUF_THREAD_ID_SIZE;
649 *(ptr++) = ',';
650 ks->thr_query++;
651 if (ks->thr_query % KGDB_MAX_THREAD_QUERY == 0)
652 finished = 1;
653 }
654 i++;
655 } while_each_thread(g, p);
656
657 *(--ptr) = '\0';
658 break;
659
660 case 'C':
661 /* Current thread id */
662 strcpy(remcom_out_buffer, "QC");
663 ks->threadid = shadow_pid(current->pid);
664 int_to_threadref(thref, ks->threadid);
665 pack_threadid(remcom_out_buffer + 2, thref);
666 break;
667 case 'T':
668 if (memcmp(remcom_in_buffer + 1, "ThreadExtraInfo,", 16)) {
669 error_packet(remcom_out_buffer, -EINVAL);
670 break;
671 }
672 ks->threadid = 0;
673 ptr = remcom_in_buffer + 17;
674 kgdb_hex2long(&ptr, &ks->threadid);
675 if (!getthread(ks->linux_regs, ks->threadid)) {
676 error_packet(remcom_out_buffer, -EINVAL);
677 break;
678 }
679 if ((int)ks->threadid > 0) {
680 kgdb_mem2hex(getthread(ks->linux_regs,
681 ks->threadid)->comm,
682 remcom_out_buffer, 16);
683 } else {
684 static char tmpstr[23 + BUF_THREAD_ID_SIZE];
685
686 sprintf(tmpstr, "shadowCPU%d",
687 (int)(-ks->threadid - 2));
688 kgdb_mem2hex(tmpstr, remcom_out_buffer, strlen(tmpstr));
689 }
690 break;
691#ifdef CONFIG_KGDB_KDB
692 case 'R':
693 if (strncmp(remcom_in_buffer, "qRcmd,", 6) == 0) {
694 int len = strlen(remcom_in_buffer + 6);
695
696 if ((len % 2) != 0) {
697 strcpy(remcom_out_buffer, "E01");
698 break;
699 }
700 kgdb_hex2mem(remcom_in_buffer + 6,
701 remcom_out_buffer, len);
702 len = len / 2;
703 remcom_out_buffer[len++] = 0;
704
705 kdb_parse(remcom_out_buffer);
706 strcpy(remcom_out_buffer, "OK");
707 }
708 break;
709#endif
710 }
711}
712
713/* Handle the 'H' task query packets */
714static void gdb_cmd_task(struct kgdb_state *ks)
715{
716 struct task_struct *thread;
717 char *ptr;
718
719 switch (remcom_in_buffer[1]) {
720 case 'g':
721 ptr = &remcom_in_buffer[2];
722 kgdb_hex2long(&ptr, &ks->threadid);
723 thread = getthread(ks->linux_regs, ks->threadid);
724 if (!thread && ks->threadid > 0) {
725 error_packet(remcom_out_buffer, -EINVAL);
726 break;
727 }
728 kgdb_usethread = thread;
729 ks->kgdb_usethreadid = ks->threadid;
730 strcpy(remcom_out_buffer, "OK");
731 break;
732 case 'c':
733 ptr = &remcom_in_buffer[2];
734 kgdb_hex2long(&ptr, &ks->threadid);
735 if (!ks->threadid) {
736 kgdb_contthread = NULL;
737 } else {
738 thread = getthread(ks->linux_regs, ks->threadid);
739 if (!thread && ks->threadid > 0) {
740 error_packet(remcom_out_buffer, -EINVAL);
741 break;
742 }
743 kgdb_contthread = thread;
744 }
745 strcpy(remcom_out_buffer, "OK");
746 break;
747 }
748}
749
750/* Handle the 'T' thread query packets */
751static void gdb_cmd_thread(struct kgdb_state *ks)
752{
753 char *ptr = &remcom_in_buffer[1];
754 struct task_struct *thread;
755
756 kgdb_hex2long(&ptr, &ks->threadid);
757 thread = getthread(ks->linux_regs, ks->threadid);
758 if (thread)
759 strcpy(remcom_out_buffer, "OK");
760 else
761 error_packet(remcom_out_buffer, -EINVAL);
762}
763
764/* Handle the 'z' or 'Z' breakpoint remove or set packets */
765static void gdb_cmd_break(struct kgdb_state *ks)
766{
767 /*
768 * Since GDB-5.3, it's been drafted that '0' is a software
769 * breakpoint, '1' is a hardware breakpoint, so let's do that.
770 */
771 char *bpt_type = &remcom_in_buffer[1];
772 char *ptr = &remcom_in_buffer[2];
773 unsigned long addr;
774 unsigned long length;
775 int error = 0;
776
777 if (arch_kgdb_ops.set_hw_breakpoint && *bpt_type >= '1') {
778 /* Unsupported */
779 if (*bpt_type > '4')
780 return;
781 } else {
782 if (*bpt_type != '0' && *bpt_type != '1')
783 /* Unsupported. */
784 return;
785 }
786
787 /*
788 * Test if this is a hardware breakpoint, and
789 * if we support it:
790 */
791 if (*bpt_type == '1' && !(arch_kgdb_ops.flags & KGDB_HW_BREAKPOINT))
792 /* Unsupported. */
793 return;
794
795 if (*(ptr++) != ',') {
796 error_packet(remcom_out_buffer, -EINVAL);
797 return;
798 }
799 if (!kgdb_hex2long(&ptr, &addr)) {
800 error_packet(remcom_out_buffer, -EINVAL);
801 return;
802 }
803 if (*(ptr++) != ',' ||
804 !kgdb_hex2long(&ptr, &length)) {
805 error_packet(remcom_out_buffer, -EINVAL);
806 return;
807 }
808
809 if (remcom_in_buffer[0] == 'Z' && *bpt_type == '0')
810 error = dbg_set_sw_break(addr);
811 else if (remcom_in_buffer[0] == 'z' && *bpt_type == '0')
812 error = dbg_remove_sw_break(addr);
813 else if (remcom_in_buffer[0] == 'Z')
814 error = arch_kgdb_ops.set_hw_breakpoint(addr,
815 (int)length, *bpt_type - '0');
816 else if (remcom_in_buffer[0] == 'z')
817 error = arch_kgdb_ops.remove_hw_breakpoint(addr,
818 (int) length, *bpt_type - '0');
819
820 if (error == 0)
821 strcpy(remcom_out_buffer, "OK");
822 else
823 error_packet(remcom_out_buffer, error);
824}
825
826/* Handle the 'C' signal / exception passing packets */
827static int gdb_cmd_exception_pass(struct kgdb_state *ks)
828{
829 /* C09 == pass exception
830 * C15 == detach kgdb, pass exception
831 */
832 if (remcom_in_buffer[1] == '0' && remcom_in_buffer[2] == '9') {
833
834 ks->pass_exception = 1;
835 remcom_in_buffer[0] = 'c';
836
837 } else if (remcom_in_buffer[1] == '1' && remcom_in_buffer[2] == '5') {
838
839 ks->pass_exception = 1;
840 remcom_in_buffer[0] = 'D';
841 dbg_remove_all_break();
842 kgdb_connected = 0;
843 return 1;
844
845 } else {
846 gdbstub_msg_write("KGDB only knows signal 9 (pass)"
847 " and 15 (pass and disconnect)\n"
848 "Executing a continue without signal passing\n", 0);
849 remcom_in_buffer[0] = 'c';
850 }
851
852 /* Indicate fall through */
853 return -1;
854}
855
856/*
857 * This function performs all gdbserial command procesing
858 */
859int gdb_serial_stub(struct kgdb_state *ks)
860{
861 int error = 0;
862 int tmp;
863
864 /* Clear the out buffer. */
865 memset(remcom_out_buffer, 0, sizeof(remcom_out_buffer));
866
867 if (kgdb_connected) {
868 unsigned char thref[8];
869 char *ptr;
870
871 /* Reply to host that an exception has occurred */
872 ptr = remcom_out_buffer;
873 *ptr++ = 'T';
874 ptr = pack_hex_byte(ptr, ks->signo);
875 ptr += strlen(strcpy(ptr, "thread:"));
876 int_to_threadref(thref, shadow_pid(current->pid));
877 ptr = pack_threadid(ptr, thref);
878 *ptr++ = ';';
879 put_packet(remcom_out_buffer);
880 }
881
882 kgdb_usethread = kgdb_info[ks->cpu].task;
883 ks->kgdb_usethreadid = shadow_pid(kgdb_info[ks->cpu].task->pid);
884 ks->pass_exception = 0;
885
886 while (1) {
887 error = 0;
888
889 /* Clear the out buffer. */
890 memset(remcom_out_buffer, 0, sizeof(remcom_out_buffer));
891
892 get_packet(remcom_in_buffer);
893
894 switch (remcom_in_buffer[0]) {
895 case '?': /* gdbserial status */
896 gdb_cmd_status(ks);
897 break;
898 case 'g': /* return the value of the CPU registers */
899 gdb_cmd_getregs(ks);
900 break;
901 case 'G': /* set the value of the CPU registers - return OK */
902 gdb_cmd_setregs(ks);
903 break;
904 case 'm': /* mAA..AA,LLLL Read LLLL bytes at address AA..AA */
905 gdb_cmd_memread(ks);
906 break;
907 case 'M': /* MAA..AA,LLLL: Write LLLL bytes at address AA..AA */
908 gdb_cmd_memwrite(ks);
909 break;
910 case 'X': /* XAA..AA,LLLL: Write LLLL bytes at address AA..AA */
911 gdb_cmd_binwrite(ks);
912 break;
913 /* kill or detach. KGDB should treat this like a
914 * continue.
915 */
916 case 'D': /* Debugger detach */
917 case 'k': /* Debugger detach via kill */
918 gdb_cmd_detachkill(ks);
919 goto default_handle;
920 case 'R': /* Reboot */
921 if (gdb_cmd_reboot(ks))
922 goto default_handle;
923 break;
924 case 'q': /* query command */
925 gdb_cmd_query(ks);
926 break;
927 case 'H': /* task related */
928 gdb_cmd_task(ks);
929 break;
930 case 'T': /* Query thread status */
931 gdb_cmd_thread(ks);
932 break;
933 case 'z': /* Break point remove */
934 case 'Z': /* Break point set */
935 gdb_cmd_break(ks);
936 break;
937#ifdef CONFIG_KGDB_KDB
938 case '3': /* Escape into back into kdb */
939 if (remcom_in_buffer[1] == '\0') {
940 gdb_cmd_detachkill(ks);
941 return DBG_PASS_EVENT;
942 }
943#endif
944 case 'C': /* Exception passing */
945 tmp = gdb_cmd_exception_pass(ks);
946 if (tmp > 0)
947 goto default_handle;
948 if (tmp == 0)
949 break;
950 /* Fall through on tmp < 0 */
951 case 'c': /* Continue packet */
952 case 's': /* Single step packet */
953 if (kgdb_contthread && kgdb_contthread != current) {
954 /* Can't switch threads in kgdb */
955 error_packet(remcom_out_buffer, -EINVAL);
956 break;
957 }
958 dbg_activate_sw_breakpoints();
959 /* Fall through to default processing */
960 default:
961default_handle:
962 error = kgdb_arch_handle_exception(ks->ex_vector,
963 ks->signo,
964 ks->err_code,
965 remcom_in_buffer,
966 remcom_out_buffer,
967 ks->linux_regs);
968 /*
969 * Leave cmd processing on error, detach,
970 * kill, continue, or single step.
971 */
972 if (error >= 0 || remcom_in_buffer[0] == 'D' ||
973 remcom_in_buffer[0] == 'k') {
974 error = 0;
975 goto kgdb_exit;
976 }
977
978 }
979
980 /* reply to the request */
981 put_packet(remcom_out_buffer);
982 }
983
984kgdb_exit:
985 if (ks->pass_exception)
986 error = 1;
987 return error;
988}
989
990int gdbstub_state(struct kgdb_state *ks, char *cmd)
991{
992 int error;
993
994 switch (cmd[0]) {
995 case 'e':
996 error = kgdb_arch_handle_exception(ks->ex_vector,
997 ks->signo,
998 ks->err_code,
999 remcom_in_buffer,
1000 remcom_out_buffer,
1001 ks->linux_regs);
1002 return error;
1003 case 's':
1004 case 'c':
1005 strcpy(remcom_in_buffer, cmd);
1006 return 0;
1007 case '?':
1008 gdb_cmd_status(ks);
1009 break;
1010 case '\0':
1011 strcpy(remcom_out_buffer, "");
1012 break;
1013 }
1014 dbg_io_ops->write_char('+');
1015 put_packet(remcom_out_buffer);
1016 return 0;
1017}
diff --git a/kernel/debug/kdb/.gitignore b/kernel/debug/kdb/.gitignore
new file mode 100644
index 000000000000..396d12eda9e8
--- /dev/null
+++ b/kernel/debug/kdb/.gitignore
@@ -0,0 +1 @@
gen-kdb_cmds.c
diff --git a/kernel/debug/kdb/Makefile b/kernel/debug/kdb/Makefile
new file mode 100644
index 000000000000..d4fc58f4b88d
--- /dev/null
+++ b/kernel/debug/kdb/Makefile
@@ -0,0 +1,25 @@
1# This file is subject to the terms and conditions of the GNU General Public
2# License. See the file "COPYING" in the main directory of this archive
3# for more details.
4#
5# Copyright (c) 1999-2004 Silicon Graphics, Inc. All Rights Reserved.
6# Copyright (c) 2009 Wind River Systems, Inc. All Rights Reserved.
7#
8
9CCVERSION := $(shell $(CC) -v 2>&1 | sed -ne '$$p')
10obj-y := kdb_io.o kdb_main.o kdb_support.o kdb_bt.o gen-kdb_cmds.o kdb_bp.o kdb_debugger.o
11obj-$(CONFIG_KDB_KEYBOARD) += kdb_keyboard.o
12
13clean-files := gen-kdb_cmds.c
14
15quiet_cmd_gen-kdb = GENKDB $@
16 cmd_gen-kdb = $(AWK) 'BEGIN {print "\#include <linux/stddef.h>"; print "\#include <linux/init.h>"} \
17 /^\#/{next} \
18 /^[ \t]*$$/{next} \
19 {gsub(/"/, "\\\"", $$0); \
20 print "static __initdata char kdb_cmd" cmds++ "[] = \"" $$0 "\\n\";"} \
21 END {print "extern char *kdb_cmds[]; char __initdata *kdb_cmds[] = {"; for (i = 0; i < cmds; ++i) {print " kdb_cmd" i ","}; print(" NULL\n};");}' \
22 $(filter-out %/Makefile,$^) > $@#
23
24$(obj)/gen-kdb_cmds.c: $(src)/kdb_cmds $(src)/Makefile
25 $(call cmd,gen-kdb)
diff --git a/kernel/debug/kdb/kdb_bp.c b/kernel/debug/kdb/kdb_bp.c
new file mode 100644
index 000000000000..75bd9b3ebbb7
--- /dev/null
+++ b/kernel/debug/kdb/kdb_bp.c
@@ -0,0 +1,564 @@
1/*
2 * Kernel Debugger Architecture Independent Breakpoint Handler
3 *
4 * This file is subject to the terms and conditions of the GNU General Public
5 * License. See the file "COPYING" in the main directory of this archive
6 * for more details.
7 *
8 * Copyright (c) 1999-2004 Silicon Graphics, Inc. All Rights Reserved.
9 * Copyright (c) 2009 Wind River Systems, Inc. All Rights Reserved.
10 */
11
12#include <linux/string.h>
13#include <linux/kernel.h>
14#include <linux/init.h>
15#include <linux/kdb.h>
16#include <linux/kgdb.h>
17#include <linux/smp.h>
18#include <linux/sched.h>
19#include <linux/interrupt.h>
20#include "kdb_private.h"
21
22/*
23 * Table of kdb_breakpoints
24 */
25kdb_bp_t kdb_breakpoints[KDB_MAXBPT];
26
27static void kdb_setsinglestep(struct pt_regs *regs)
28{
29 KDB_STATE_SET(DOING_SS);
30}
31
32static char *kdb_rwtypes[] = {
33 "Instruction(i)",
34 "Instruction(Register)",
35 "Data Write",
36 "I/O",
37 "Data Access"
38};
39
40static char *kdb_bptype(kdb_bp_t *bp)
41{
42 if (bp->bp_type < 0 || bp->bp_type > 4)
43 return "";
44
45 return kdb_rwtypes[bp->bp_type];
46}
47
48static int kdb_parsebp(int argc, const char **argv, int *nextargp, kdb_bp_t *bp)
49{
50 int nextarg = *nextargp;
51 int diag;
52
53 bp->bph_length = 1;
54 if ((argc + 1) != nextarg) {
55 if (strnicmp(argv[nextarg], "datar", sizeof("datar")) == 0)
56 bp->bp_type = BP_ACCESS_WATCHPOINT;
57 else if (strnicmp(argv[nextarg], "dataw", sizeof("dataw")) == 0)
58 bp->bp_type = BP_WRITE_WATCHPOINT;
59 else if (strnicmp(argv[nextarg], "inst", sizeof("inst")) == 0)
60 bp->bp_type = BP_HARDWARE_BREAKPOINT;
61 else
62 return KDB_ARGCOUNT;
63
64 bp->bph_length = 1;
65
66 nextarg++;
67
68 if ((argc + 1) != nextarg) {
69 unsigned long len;
70
71 diag = kdbgetularg((char *)argv[nextarg],
72 &len);
73 if (diag)
74 return diag;
75
76
77 if (len > 8)
78 return KDB_BADLENGTH;
79
80 bp->bph_length = len;
81 nextarg++;
82 }
83
84 if ((argc + 1) != nextarg)
85 return KDB_ARGCOUNT;
86 }
87
88 *nextargp = nextarg;
89 return 0;
90}
91
92static int _kdb_bp_remove(kdb_bp_t *bp)
93{
94 int ret = 1;
95 if (!bp->bp_installed)
96 return ret;
97 if (!bp->bp_type)
98 ret = dbg_remove_sw_break(bp->bp_addr);
99 else
100 ret = arch_kgdb_ops.remove_hw_breakpoint(bp->bp_addr,
101 bp->bph_length,
102 bp->bp_type);
103 if (ret == 0)
104 bp->bp_installed = 0;
105 return ret;
106}
107
108static void kdb_handle_bp(struct pt_regs *regs, kdb_bp_t *bp)
109{
110 if (KDB_DEBUG(BP))
111 kdb_printf("regs->ip = 0x%lx\n", instruction_pointer(regs));
112
113 /*
114 * Setup single step
115 */
116 kdb_setsinglestep(regs);
117
118 /*
119 * Reset delay attribute
120 */
121 bp->bp_delay = 0;
122 bp->bp_delayed = 1;
123}
124
125static int _kdb_bp_install(struct pt_regs *regs, kdb_bp_t *bp)
126{
127 int ret;
128 /*
129 * Install the breakpoint, if it is not already installed.
130 */
131
132 if (KDB_DEBUG(BP))
133 kdb_printf("%s: bp_installed %d\n",
134 __func__, bp->bp_installed);
135 if (!KDB_STATE(SSBPT))
136 bp->bp_delay = 0;
137 if (bp->bp_installed)
138 return 1;
139 if (bp->bp_delay || (bp->bp_delayed && KDB_STATE(DOING_SS))) {
140 if (KDB_DEBUG(BP))
141 kdb_printf("%s: delayed bp\n", __func__);
142 kdb_handle_bp(regs, bp);
143 return 0;
144 }
145 if (!bp->bp_type)
146 ret = dbg_set_sw_break(bp->bp_addr);
147 else
148 ret = arch_kgdb_ops.set_hw_breakpoint(bp->bp_addr,
149 bp->bph_length,
150 bp->bp_type);
151 if (ret == 0) {
152 bp->bp_installed = 1;
153 } else {
154 kdb_printf("%s: failed to set breakpoint at 0x%lx\n",
155 __func__, bp->bp_addr);
156 return 1;
157 }
158 return 0;
159}
160
161/*
162 * kdb_bp_install
163 *
164 * Install kdb_breakpoints prior to returning from the
165 * kernel debugger. This allows the kdb_breakpoints to be set
166 * upon functions that are used internally by kdb, such as
167 * printk(). This function is only called once per kdb session.
168 */
169void kdb_bp_install(struct pt_regs *regs)
170{
171 int i;
172
173 for (i = 0; i < KDB_MAXBPT; i++) {
174 kdb_bp_t *bp = &kdb_breakpoints[i];
175
176 if (KDB_DEBUG(BP)) {
177 kdb_printf("%s: bp %d bp_enabled %d\n",
178 __func__, i, bp->bp_enabled);
179 }
180 if (bp->bp_enabled)
181 _kdb_bp_install(regs, bp);
182 }
183}
184
185/*
186 * kdb_bp_remove
187 *
188 * Remove kdb_breakpoints upon entry to the kernel debugger.
189 *
190 * Parameters:
191 * None.
192 * Outputs:
193 * None.
194 * Returns:
195 * None.
196 * Locking:
197 * None.
198 * Remarks:
199 */
200void kdb_bp_remove(void)
201{
202 int i;
203
204 for (i = KDB_MAXBPT - 1; i >= 0; i--) {
205 kdb_bp_t *bp = &kdb_breakpoints[i];
206
207 if (KDB_DEBUG(BP)) {
208 kdb_printf("%s: bp %d bp_enabled %d\n",
209 __func__, i, bp->bp_enabled);
210 }
211 if (bp->bp_enabled)
212 _kdb_bp_remove(bp);
213 }
214}
215
216
217/*
218 * kdb_printbp
219 *
220 * Internal function to format and print a breakpoint entry.
221 *
222 * Parameters:
223 * None.
224 * Outputs:
225 * None.
226 * Returns:
227 * None.
228 * Locking:
229 * None.
230 * Remarks:
231 */
232
233static void kdb_printbp(kdb_bp_t *bp, int i)
234{
235 kdb_printf("%s ", kdb_bptype(bp));
236 kdb_printf("BP #%d at ", i);
237 kdb_symbol_print(bp->bp_addr, NULL, KDB_SP_DEFAULT);
238
239 if (bp->bp_enabled)
240 kdb_printf("\n is enabled");
241 else
242 kdb_printf("\n is disabled");
243
244 kdb_printf("\taddr at %016lx, hardtype=%d installed=%d\n",
245 bp->bp_addr, bp->bp_type, bp->bp_installed);
246
247 kdb_printf("\n");
248}
249
250/*
251 * kdb_bp
252 *
253 * Handle the bp commands.
254 *
255 * [bp|bph] <addr-expression> [DATAR|DATAW]
256 *
257 * Parameters:
258 * argc Count of arguments in argv
259 * argv Space delimited command line arguments
260 * Outputs:
261 * None.
262 * Returns:
263 * Zero for success, a kdb diagnostic if failure.
264 * Locking:
265 * None.
266 * Remarks:
267 *
268 * bp Set breakpoint on all cpus. Only use hardware assist if need.
269 * bph Set breakpoint on all cpus. Force hardware register
270 */
271
272static int kdb_bp(int argc, const char **argv)
273{
274 int i, bpno;
275 kdb_bp_t *bp, *bp_check;
276 int diag;
277 int free;
278 char *symname = NULL;
279 long offset = 0ul;
280 int nextarg;
281 kdb_bp_t template = {0};
282
283 if (argc == 0) {
284 /*
285 * Display breakpoint table
286 */
287 for (bpno = 0, bp = kdb_breakpoints; bpno < KDB_MAXBPT;
288 bpno++, bp++) {
289 if (bp->bp_free)
290 continue;
291 kdb_printbp(bp, bpno);
292 }
293
294 return 0;
295 }
296
297 nextarg = 1;
298 diag = kdbgetaddrarg(argc, argv, &nextarg, &template.bp_addr,
299 &offset, &symname);
300 if (diag)
301 return diag;
302 if (!template.bp_addr)
303 return KDB_BADINT;
304
305 /*
306 * Find an empty bp structure to allocate
307 */
308 free = KDB_MAXBPT;
309 for (bpno = 0, bp = kdb_breakpoints; bpno < KDB_MAXBPT; bpno++, bp++) {
310 if (bp->bp_free)
311 break;
312 }
313
314 if (bpno == KDB_MAXBPT)
315 return KDB_TOOMANYBPT;
316
317 if (strcmp(argv[0], "bph") == 0) {
318 template.bp_type = BP_HARDWARE_BREAKPOINT;
319 diag = kdb_parsebp(argc, argv, &nextarg, &template);
320 if (diag)
321 return diag;
322 } else {
323 template.bp_type = BP_BREAKPOINT;
324 }
325
326 /*
327 * Check for clashing breakpoints.
328 *
329 * Note, in this design we can't have hardware breakpoints
330 * enabled for both read and write on the same address.
331 */
332 for (i = 0, bp_check = kdb_breakpoints; i < KDB_MAXBPT;
333 i++, bp_check++) {
334 if (!bp_check->bp_free &&
335 bp_check->bp_addr == template.bp_addr) {
336 kdb_printf("You already have a breakpoint at "
337 kdb_bfd_vma_fmt0 "\n", template.bp_addr);
338 return KDB_DUPBPT;
339 }
340 }
341
342 template.bp_enabled = 1;
343
344 /*
345 * Actually allocate the breakpoint found earlier
346 */
347 *bp = template;
348 bp->bp_free = 0;
349
350 kdb_printbp(bp, bpno);
351
352 return 0;
353}
354
355/*
356 * kdb_bc
357 *
358 * Handles the 'bc', 'be', and 'bd' commands
359 *
360 * [bd|bc|be] <breakpoint-number>
361 * [bd|bc|be] *
362 *
363 * Parameters:
364 * argc Count of arguments in argv
365 * argv Space delimited command line arguments
366 * Outputs:
367 * None.
368 * Returns:
369 * Zero for success, a kdb diagnostic for failure
370 * Locking:
371 * None.
372 * Remarks:
373 */
374static int kdb_bc(int argc, const char **argv)
375{
376 unsigned long addr;
377 kdb_bp_t *bp = NULL;
378 int lowbp = KDB_MAXBPT;
379 int highbp = 0;
380 int done = 0;
381 int i;
382 int diag = 0;
383
384 int cmd; /* KDBCMD_B? */
385#define KDBCMD_BC 0
386#define KDBCMD_BE 1
387#define KDBCMD_BD 2
388
389 if (strcmp(argv[0], "be") == 0)
390 cmd = KDBCMD_BE;
391 else if (strcmp(argv[0], "bd") == 0)
392 cmd = KDBCMD_BD;
393 else
394 cmd = KDBCMD_BC;
395
396 if (argc != 1)
397 return KDB_ARGCOUNT;
398
399 if (strcmp(argv[1], "*") == 0) {
400 lowbp = 0;
401 highbp = KDB_MAXBPT;
402 } else {
403 diag = kdbgetularg(argv[1], &addr);
404 if (diag)
405 return diag;
406
407 /*
408 * For addresses less than the maximum breakpoint number,
409 * assume that the breakpoint number is desired.
410 */
411 if (addr < KDB_MAXBPT) {
412 bp = &kdb_breakpoints[addr];
413 lowbp = highbp = addr;
414 highbp++;
415 } else {
416 for (i = 0, bp = kdb_breakpoints; i < KDB_MAXBPT;
417 i++, bp++) {
418 if (bp->bp_addr == addr) {
419 lowbp = highbp = i;
420 highbp++;
421 break;
422 }
423 }
424 }
425 }
426
427 /*
428 * Now operate on the set of breakpoints matching the input
429 * criteria (either '*' for all, or an individual breakpoint).
430 */
431 for (bp = &kdb_breakpoints[lowbp], i = lowbp;
432 i < highbp;
433 i++, bp++) {
434 if (bp->bp_free)
435 continue;
436
437 done++;
438
439 switch (cmd) {
440 case KDBCMD_BC:
441 bp->bp_enabled = 0;
442
443 kdb_printf("Breakpoint %d at "
444 kdb_bfd_vma_fmt " cleared\n",
445 i, bp->bp_addr);
446
447 bp->bp_addr = 0;
448 bp->bp_free = 1;
449
450 break;
451 case KDBCMD_BE:
452 bp->bp_enabled = 1;
453
454 kdb_printf("Breakpoint %d at "
455 kdb_bfd_vma_fmt " enabled",
456 i, bp->bp_addr);
457
458 kdb_printf("\n");
459 break;
460 case KDBCMD_BD:
461 if (!bp->bp_enabled)
462 break;
463
464 bp->bp_enabled = 0;
465
466 kdb_printf("Breakpoint %d at "
467 kdb_bfd_vma_fmt " disabled\n",
468 i, bp->bp_addr);
469
470 break;
471 }
472 if (bp->bp_delay && (cmd == KDBCMD_BC || cmd == KDBCMD_BD)) {
473 bp->bp_delay = 0;
474 KDB_STATE_CLEAR(SSBPT);
475 }
476 }
477
478 return (!done) ? KDB_BPTNOTFOUND : 0;
479}
480
481/*
482 * kdb_ss
483 *
484 * Process the 'ss' (Single Step) and 'ssb' (Single Step to Branch)
485 * commands.
486 *
487 * ss
488 * ssb
489 *
490 * Parameters:
491 * argc Argument count
492 * argv Argument vector
493 * Outputs:
494 * None.
495 * Returns:
496 * KDB_CMD_SS[B] for success, a kdb error if failure.
497 * Locking:
498 * None.
499 * Remarks:
500 *
501 * Set the arch specific option to trigger a debug trap after the next
502 * instruction.
503 *
504 * For 'ssb', set the trace flag in the debug trap handler
505 * after printing the current insn and return directly without
506 * invoking the kdb command processor, until a branch instruction
507 * is encountered.
508 */
509
510static int kdb_ss(int argc, const char **argv)
511{
512 int ssb = 0;
513
514 ssb = (strcmp(argv[0], "ssb") == 0);
515 if (argc != 0)
516 return KDB_ARGCOUNT;
517 /*
518 * Set trace flag and go.
519 */
520 KDB_STATE_SET(DOING_SS);
521 if (ssb) {
522 KDB_STATE_SET(DOING_SSB);
523 return KDB_CMD_SSB;
524 }
525 return KDB_CMD_SS;
526}
527
528/* Initialize the breakpoint table and register breakpoint commands. */
529
530void __init kdb_initbptab(void)
531{
532 int i;
533 kdb_bp_t *bp;
534
535 /*
536 * First time initialization.
537 */
538 memset(&kdb_breakpoints, '\0', sizeof(kdb_breakpoints));
539
540 for (i = 0, bp = kdb_breakpoints; i < KDB_MAXBPT; i++, bp++)
541 bp->bp_free = 1;
542
543 kdb_register_repeat("bp", kdb_bp, "[<vaddr>]",
544 "Set/Display breakpoints", 0, KDB_REPEAT_NO_ARGS);
545 kdb_register_repeat("bl", kdb_bp, "[<vaddr>]",
546 "Display breakpoints", 0, KDB_REPEAT_NO_ARGS);
547 if (arch_kgdb_ops.flags & KGDB_HW_BREAKPOINT)
548 kdb_register_repeat("bph", kdb_bp, "[<vaddr>]",
549 "[datar [length]|dataw [length]] Set hw brk", 0, KDB_REPEAT_NO_ARGS);
550 kdb_register_repeat("bc", kdb_bc, "<bpnum>",
551 "Clear Breakpoint", 0, KDB_REPEAT_NONE);
552 kdb_register_repeat("be", kdb_bc, "<bpnum>",
553 "Enable Breakpoint", 0, KDB_REPEAT_NONE);
554 kdb_register_repeat("bd", kdb_bc, "<bpnum>",
555 "Disable Breakpoint", 0, KDB_REPEAT_NONE);
556
557 kdb_register_repeat("ss", kdb_ss, "",
558 "Single Step", 1, KDB_REPEAT_NO_ARGS);
559 kdb_register_repeat("ssb", kdb_ss, "",
560 "Single step to branch/call", 0, KDB_REPEAT_NO_ARGS);
561 /*
562 * Architecture dependent initialization.
563 */
564}
diff --git a/kernel/debug/kdb/kdb_bt.c b/kernel/debug/kdb/kdb_bt.c
new file mode 100644
index 000000000000..2f62fe85f16a
--- /dev/null
+++ b/kernel/debug/kdb/kdb_bt.c
@@ -0,0 +1,210 @@
1/*
2 * Kernel Debugger Architecture Independent Stack Traceback
3 *
4 * This file is subject to the terms and conditions of the GNU General Public
5 * License. See the file "COPYING" in the main directory of this archive
6 * for more details.
7 *
8 * Copyright (c) 1999-2004 Silicon Graphics, Inc. All Rights Reserved.
9 * Copyright (c) 2009 Wind River Systems, Inc. All Rights Reserved.
10 */
11
12#include <linux/ctype.h>
13#include <linux/string.h>
14#include <linux/kernel.h>
15#include <linux/sched.h>
16#include <linux/kdb.h>
17#include <linux/nmi.h>
18#include <asm/system.h>
19#include "kdb_private.h"
20
21
22static void kdb_show_stack(struct task_struct *p, void *addr)
23{
24 int old_lvl = console_loglevel;
25 console_loglevel = 15;
26 kdb_trap_printk++;
27 kdb_set_current_task(p);
28 if (addr) {
29 show_stack((struct task_struct *)p, addr);
30 } else if (kdb_current_regs) {
31#ifdef CONFIG_X86
32 show_stack(p, &kdb_current_regs->sp);
33#else
34 show_stack(p, NULL);
35#endif
36 } else {
37 show_stack(p, NULL);
38 }
39 console_loglevel = old_lvl;
40 kdb_trap_printk--;
41}
42
43/*
44 * kdb_bt
45 *
46 * This function implements the 'bt' command. Print a stack
47 * traceback.
48 *
49 * bt [<address-expression>] (addr-exp is for alternate stacks)
50 * btp <pid> Kernel stack for <pid>
51 * btt <address-expression> Kernel stack for task structure at
52 * <address-expression>
53 * bta [DRSTCZEUIMA] All useful processes, optionally
54 * filtered by state
55 * btc [<cpu>] The current process on one cpu,
56 * default is all cpus
57 *
58 * bt <address-expression> refers to a address on the stack, that location
59 * is assumed to contain a return address.
60 *
61 * btt <address-expression> refers to the address of a struct task.
62 *
63 * Inputs:
64 * argc argument count
65 * argv argument vector
66 * Outputs:
67 * None.
68 * Returns:
69 * zero for success, a kdb diagnostic if error
70 * Locking:
71 * none.
72 * Remarks:
73 * Backtrack works best when the code uses frame pointers. But even
74 * without frame pointers we should get a reasonable trace.
75 *
76 * mds comes in handy when examining the stack to do a manual traceback or
77 * to get a starting point for bt <address-expression>.
78 */
79
80static int
81kdb_bt1(struct task_struct *p, unsigned long mask,
82 int argcount, int btaprompt)
83{
84 char buffer[2];
85 if (kdb_getarea(buffer[0], (unsigned long)p) ||
86 kdb_getarea(buffer[0], (unsigned long)(p+1)-1))
87 return KDB_BADADDR;
88 if (!kdb_task_state(p, mask))
89 return 0;
90 kdb_printf("Stack traceback for pid %d\n", p->pid);
91 kdb_ps1(p);
92 kdb_show_stack(p, NULL);
93 if (btaprompt) {
94 kdb_getstr(buffer, sizeof(buffer),
95 "Enter <q> to end, <cr> to continue:");
96 if (buffer[0] == 'q') {
97 kdb_printf("\n");
98 return 1;
99 }
100 }
101 touch_nmi_watchdog();
102 return 0;
103}
104
105int
106kdb_bt(int argc, const char **argv)
107{
108 int diag;
109 int argcount = 5;
110 int btaprompt = 1;
111 int nextarg;
112 unsigned long addr;
113 long offset;
114
115 kdbgetintenv("BTARGS", &argcount); /* Arguments to print */
116 kdbgetintenv("BTAPROMPT", &btaprompt); /* Prompt after each
117 * proc in bta */
118
119 if (strcmp(argv[0], "bta") == 0) {
120 struct task_struct *g, *p;
121 unsigned long cpu;
122 unsigned long mask = kdb_task_state_string(argc ? argv[1] :
123 NULL);
124 if (argc == 0)
125 kdb_ps_suppressed();
126 /* Run the active tasks first */
127 for_each_online_cpu(cpu) {
128 p = kdb_curr_task(cpu);
129 if (kdb_bt1(p, mask, argcount, btaprompt))
130 return 0;
131 }
132 /* Now the inactive tasks */
133 kdb_do_each_thread(g, p) {
134 if (task_curr(p))
135 continue;
136 if (kdb_bt1(p, mask, argcount, btaprompt))
137 return 0;
138 } kdb_while_each_thread(g, p);
139 } else if (strcmp(argv[0], "btp") == 0) {
140 struct task_struct *p;
141 unsigned long pid;
142 if (argc != 1)
143 return KDB_ARGCOUNT;
144 diag = kdbgetularg((char *)argv[1], &pid);
145 if (diag)
146 return diag;
147 p = find_task_by_pid_ns(pid, &init_pid_ns);
148 if (p) {
149 kdb_set_current_task(p);
150 return kdb_bt1(p, ~0UL, argcount, 0);
151 }
152 kdb_printf("No process with pid == %ld found\n", pid);
153 return 0;
154 } else if (strcmp(argv[0], "btt") == 0) {
155 if (argc != 1)
156 return KDB_ARGCOUNT;
157 diag = kdbgetularg((char *)argv[1], &addr);
158 if (diag)
159 return diag;
160 kdb_set_current_task((struct task_struct *)addr);
161 return kdb_bt1((struct task_struct *)addr, ~0UL, argcount, 0);
162 } else if (strcmp(argv[0], "btc") == 0) {
163 unsigned long cpu = ~0;
164 struct task_struct *save_current_task = kdb_current_task;
165 char buf[80];
166 if (argc > 1)
167 return KDB_ARGCOUNT;
168 if (argc == 1) {
169 diag = kdbgetularg((char *)argv[1], &cpu);
170 if (diag)
171 return diag;
172 }
173 /* Recursive use of kdb_parse, do not use argv after
174 * this point */
175 argv = NULL;
176 if (cpu != ~0) {
177 if (cpu >= num_possible_cpus() || !cpu_online(cpu)) {
178 kdb_printf("no process for cpu %ld\n", cpu);
179 return 0;
180 }
181 sprintf(buf, "btt 0x%p\n", KDB_TSK(cpu));
182 kdb_parse(buf);
183 return 0;
184 }
185 kdb_printf("btc: cpu status: ");
186 kdb_parse("cpu\n");
187 for_each_online_cpu(cpu) {
188 sprintf(buf, "btt 0x%p\n", KDB_TSK(cpu));
189 kdb_parse(buf);
190 touch_nmi_watchdog();
191 }
192 kdb_set_current_task(save_current_task);
193 return 0;
194 } else {
195 if (argc) {
196 nextarg = 1;
197 diag = kdbgetaddrarg(argc, argv, &nextarg, &addr,
198 &offset, NULL);
199 if (diag)
200 return diag;
201 kdb_show_stack(kdb_current_task, (void *)addr);
202 return 0;
203 } else {
204 return kdb_bt1(kdb_current_task, ~0UL, argcount, 0);
205 }
206 }
207
208 /* NOTREACHED */
209 return 0;
210}
diff --git a/kernel/debug/kdb/kdb_cmds b/kernel/debug/kdb/kdb_cmds
new file mode 100644
index 000000000000..56c88e4db309
--- /dev/null
+++ b/kernel/debug/kdb/kdb_cmds
@@ -0,0 +1,35 @@
1# Initial commands for kdb, alter to suit your needs.
2# These commands are executed in kdb_init() context, no SMP, no
3# processes. Commands that require process data (including stack or
4# registers) are not reliable this early. set and bp commands should
5# be safe. Global breakpoint commands affect each cpu as it is booted.
6
7# Standard debugging information for first level support, just type archkdb
8# or archkdbcpu or archkdbshort at the kdb prompt.
9
10defcmd dumpcommon "" "Common kdb debugging"
11 set BTAPROMPT 0
12 set LINES 10000
13 -summary
14 -cpu
15 -ps
16 -dmesg 600
17 -bt
18endefcmd
19
20defcmd dumpall "" "First line debugging"
21 set BTSYMARG 1
22 set BTARGS 9
23 pid R
24 -dumpcommon
25 -bta
26endefcmd
27
28defcmd dumpcpu "" "Same as dumpall but only tasks on cpus"
29 set BTSYMARG 1
30 set BTARGS 9
31 pid R
32 -dumpcommon
33 -btc
34endefcmd
35
diff --git a/kernel/debug/kdb/kdb_debugger.c b/kernel/debug/kdb/kdb_debugger.c
new file mode 100644
index 000000000000..bf6e8270e957
--- /dev/null
+++ b/kernel/debug/kdb/kdb_debugger.c
@@ -0,0 +1,169 @@
1/*
2 * Created by: Jason Wessel <jason.wessel@windriver.com>
3 *
4 * Copyright (c) 2009 Wind River Systems, Inc. All Rights Reserved.
5 *
6 * This file is licensed under the terms of the GNU General Public
7 * License version 2. This program is licensed "as is" without any
8 * warranty of any kind, whether express or implied.
9 */
10
11#include <linux/kgdb.h>
12#include <linux/kdb.h>
13#include <linux/kdebug.h>
14#include "kdb_private.h"
15#include "../debug_core.h"
16
17/*
18 * KDB interface to KGDB internals
19 */
20get_char_func kdb_poll_funcs[] = {
21 dbg_io_get_char,
22 NULL,
23 NULL,
24 NULL,
25 NULL,
26 NULL,
27};
28EXPORT_SYMBOL_GPL(kdb_poll_funcs);
29
30int kdb_poll_idx = 1;
31EXPORT_SYMBOL_GPL(kdb_poll_idx);
32
33int kdb_stub(struct kgdb_state *ks)
34{
35 int error = 0;
36 kdb_bp_t *bp;
37 unsigned long addr = kgdb_arch_pc(ks->ex_vector, ks->linux_regs);
38 kdb_reason_t reason = KDB_REASON_OOPS;
39 kdb_dbtrap_t db_result = KDB_DB_NOBPT;
40 int i;
41
42 if (KDB_STATE(REENTRY)) {
43 reason = KDB_REASON_SWITCH;
44 KDB_STATE_CLEAR(REENTRY);
45 addr = instruction_pointer(ks->linux_regs);
46 }
47 ks->pass_exception = 0;
48 if (atomic_read(&kgdb_setting_breakpoint))
49 reason = KDB_REASON_KEYBOARD;
50
51 for (i = 0, bp = kdb_breakpoints; i < KDB_MAXBPT; i++, bp++) {
52 if ((bp->bp_enabled) && (bp->bp_addr == addr)) {
53 reason = KDB_REASON_BREAK;
54 db_result = KDB_DB_BPT;
55 if (addr != instruction_pointer(ks->linux_regs))
56 kgdb_arch_set_pc(ks->linux_regs, addr);
57 break;
58 }
59 }
60 if (reason == KDB_REASON_BREAK || reason == KDB_REASON_SWITCH) {
61 for (i = 0, bp = kdb_breakpoints; i < KDB_MAXBPT; i++, bp++) {
62 if (bp->bp_free)
63 continue;
64 if (bp->bp_addr == addr) {
65 bp->bp_delay = 1;
66 bp->bp_delayed = 1;
67 /*
68 * SSBPT is set when the kernel debugger must single step a
69 * task in order to re-establish an instruction breakpoint
70 * which uses the instruction replacement mechanism. It is
71 * cleared by any action that removes the need to single-step
72 * the breakpoint.
73 */
74 reason = KDB_REASON_BREAK;
75 db_result = KDB_DB_BPT;
76 KDB_STATE_SET(SSBPT);
77 break;
78 }
79 }
80 }
81
82 if (reason != KDB_REASON_BREAK && ks->ex_vector == 0 &&
83 ks->signo == SIGTRAP) {
84 reason = KDB_REASON_SSTEP;
85 db_result = KDB_DB_BPT;
86 }
87 /* Set initial kdb state variables */
88 KDB_STATE_CLEAR(KGDB_TRANS);
89 kdb_initial_cpu = ks->cpu;
90 kdb_current_task = kgdb_info[ks->cpu].task;
91 kdb_current_regs = kgdb_info[ks->cpu].debuggerinfo;
92 /* Remove any breakpoints as needed by kdb and clear single step */
93 kdb_bp_remove();
94 KDB_STATE_CLEAR(DOING_SS);
95 KDB_STATE_CLEAR(DOING_SSB);
96 KDB_STATE_SET(PAGER);
97 /* zero out any offline cpu data */
98 for_each_present_cpu(i) {
99 if (!cpu_online(i)) {
100 kgdb_info[i].debuggerinfo = NULL;
101 kgdb_info[i].task = NULL;
102 }
103 }
104 if (ks->err_code == DIE_OOPS || reason == KDB_REASON_OOPS) {
105 ks->pass_exception = 1;
106 KDB_FLAG_SET(CATASTROPHIC);
107 }
108 kdb_initial_cpu = ks->cpu;
109 if (KDB_STATE(SSBPT) && reason == KDB_REASON_SSTEP) {
110 KDB_STATE_CLEAR(SSBPT);
111 KDB_STATE_CLEAR(DOING_SS);
112 } else {
113 /* Start kdb main loop */
114 error = kdb_main_loop(KDB_REASON_ENTER, reason,
115 ks->err_code, db_result, ks->linux_regs);
116 }
117 /*
118 * Upon exit from the kdb main loop setup break points and restart
119 * the system based on the requested continue state
120 */
121 kdb_initial_cpu = -1;
122 kdb_current_task = NULL;
123 kdb_current_regs = NULL;
124 KDB_STATE_CLEAR(PAGER);
125 kdbnearsym_cleanup();
126 if (error == KDB_CMD_KGDB) {
127 if (KDB_STATE(DOING_KGDB) || KDB_STATE(DOING_KGDB2)) {
128 /*
129 * This inteface glue which allows kdb to transition in into
130 * the gdb stub. In order to do this the '?' or '' gdb serial
131 * packet response is processed here. And then control is
132 * passed to the gdbstub.
133 */
134 if (KDB_STATE(DOING_KGDB))
135 gdbstub_state(ks, "?");
136 else
137 gdbstub_state(ks, "");
138 KDB_STATE_CLEAR(DOING_KGDB);
139 KDB_STATE_CLEAR(DOING_KGDB2);
140 }
141 return DBG_PASS_EVENT;
142 }
143 kdb_bp_install(ks->linux_regs);
144 dbg_activate_sw_breakpoints();
145 /* Set the exit state to a single step or a continue */
146 if (KDB_STATE(DOING_SS))
147 gdbstub_state(ks, "s");
148 else
149 gdbstub_state(ks, "c");
150
151 KDB_FLAG_CLEAR(CATASTROPHIC);
152
153 /* Invoke arch specific exception handling prior to system resume */
154 kgdb_info[ks->cpu].ret_state = gdbstub_state(ks, "e");
155 if (ks->pass_exception)
156 kgdb_info[ks->cpu].ret_state = 1;
157 if (error == KDB_CMD_CPU) {
158 KDB_STATE_SET(REENTRY);
159 /*
160 * Force clear the single step bit because kdb emulates this
161 * differently vs the gdbstub
162 */
163 kgdb_single_step = 0;
164 dbg_deactivate_sw_breakpoints();
165 return DBG_SWITCH_CPU_EVENT;
166 }
167 return kgdb_info[ks->cpu].ret_state;
168}
169
diff --git a/kernel/debug/kdb/kdb_io.c b/kernel/debug/kdb/kdb_io.c
new file mode 100644
index 000000000000..c9b7f4f90bba
--- /dev/null
+++ b/kernel/debug/kdb/kdb_io.c
@@ -0,0 +1,826 @@
1/*
2 * Kernel Debugger Architecture Independent Console I/O handler
3 *
4 * This file is subject to the terms and conditions of the GNU General Public
5 * License. See the file "COPYING" in the main directory of this archive
6 * for more details.
7 *
8 * Copyright (c) 1999-2006 Silicon Graphics, Inc. All Rights Reserved.
9 * Copyright (c) 2009 Wind River Systems, Inc. All Rights Reserved.
10 */
11
12#include <linux/module.h>
13#include <linux/types.h>
14#include <linux/ctype.h>
15#include <linux/kernel.h>
16#include <linux/init.h>
17#include <linux/kdev_t.h>
18#include <linux/console.h>
19#include <linux/string.h>
20#include <linux/sched.h>
21#include <linux/smp.h>
22#include <linux/nmi.h>
23#include <linux/delay.h>
24#include <linux/kgdb.h>
25#include <linux/kdb.h>
26#include <linux/kallsyms.h>
27#include "kdb_private.h"
28
29#define CMD_BUFLEN 256
30char kdb_prompt_str[CMD_BUFLEN];
31
32int kdb_trap_printk;
33
34static void kgdb_transition_check(char *buffer)
35{
36 int slen = strlen(buffer);
37 if (strncmp(buffer, "$?#3f", slen) != 0 &&
38 strncmp(buffer, "$qSupported#37", slen) != 0 &&
39 strncmp(buffer, "+$qSupported#37", slen) != 0) {
40 KDB_STATE_SET(KGDB_TRANS);
41 kdb_printf("%s", buffer);
42 }
43}
44
45static int kdb_read_get_key(char *buffer, size_t bufsize)
46{
47#define ESCAPE_UDELAY 1000
48#define ESCAPE_DELAY (2*1000000/ESCAPE_UDELAY) /* 2 seconds worth of udelays */
49 char escape_data[5]; /* longest vt100 escape sequence is 4 bytes */
50 char *ped = escape_data;
51 int escape_delay = 0;
52 get_char_func *f, *f_escape = NULL;
53 int key;
54
55 for (f = &kdb_poll_funcs[0]; ; ++f) {
56 if (*f == NULL) {
57 /* Reset NMI watchdog once per poll loop */
58 touch_nmi_watchdog();
59 f = &kdb_poll_funcs[0];
60 }
61 if (escape_delay == 2) {
62 *ped = '\0';
63 ped = escape_data;
64 --escape_delay;
65 }
66 if (escape_delay == 1) {
67 key = *ped++;
68 if (!*ped)
69 --escape_delay;
70 break;
71 }
72 key = (*f)();
73 if (key == -1) {
74 if (escape_delay) {
75 udelay(ESCAPE_UDELAY);
76 --escape_delay;
77 }
78 continue;
79 }
80 if (bufsize <= 2) {
81 if (key == '\r')
82 key = '\n';
83 *buffer++ = key;
84 *buffer = '\0';
85 return -1;
86 }
87 if (escape_delay == 0 && key == '\e') {
88 escape_delay = ESCAPE_DELAY;
89 ped = escape_data;
90 f_escape = f;
91 }
92 if (escape_delay) {
93 *ped++ = key;
94 if (f_escape != f) {
95 escape_delay = 2;
96 continue;
97 }
98 if (ped - escape_data == 1) {
99 /* \e */
100 continue;
101 } else if (ped - escape_data == 2) {
102 /* \e<something> */
103 if (key != '[')
104 escape_delay = 2;
105 continue;
106 } else if (ped - escape_data == 3) {
107 /* \e[<something> */
108 int mapkey = 0;
109 switch (key) {
110 case 'A': /* \e[A, up arrow */
111 mapkey = 16;
112 break;
113 case 'B': /* \e[B, down arrow */
114 mapkey = 14;
115 break;
116 case 'C': /* \e[C, right arrow */
117 mapkey = 6;
118 break;
119 case 'D': /* \e[D, left arrow */
120 mapkey = 2;
121 break;
122 case '1': /* dropthrough */
123 case '3': /* dropthrough */
124 /* \e[<1,3,4>], may be home, del, end */
125 case '4':
126 mapkey = -1;
127 break;
128 }
129 if (mapkey != -1) {
130 if (mapkey > 0) {
131 escape_data[0] = mapkey;
132 escape_data[1] = '\0';
133 }
134 escape_delay = 2;
135 }
136 continue;
137 } else if (ped - escape_data == 4) {
138 /* \e[<1,3,4><something> */
139 int mapkey = 0;
140 if (key == '~') {
141 switch (escape_data[2]) {
142 case '1': /* \e[1~, home */
143 mapkey = 1;
144 break;
145 case '3': /* \e[3~, del */
146 mapkey = 4;
147 break;
148 case '4': /* \e[4~, end */
149 mapkey = 5;
150 break;
151 }
152 }
153 if (mapkey > 0) {
154 escape_data[0] = mapkey;
155 escape_data[1] = '\0';
156 }
157 escape_delay = 2;
158 continue;
159 }
160 }
161 break; /* A key to process */
162 }
163 return key;
164}
165
166/*
167 * kdb_read
168 *
169 * This function reads a string of characters, terminated by
170 * a newline, or by reaching the end of the supplied buffer,
171 * from the current kernel debugger console device.
172 * Parameters:
173 * buffer - Address of character buffer to receive input characters.
174 * bufsize - size, in bytes, of the character buffer
175 * Returns:
176 * Returns a pointer to the buffer containing the received
177 * character string. This string will be terminated by a
178 * newline character.
179 * Locking:
180 * No locks are required to be held upon entry to this
181 * function. It is not reentrant - it relies on the fact
182 * that while kdb is running on only one "master debug" cpu.
183 * Remarks:
184 *
185 * The buffer size must be >= 2. A buffer size of 2 means that the caller only
186 * wants a single key.
187 *
188 * An escape key could be the start of a vt100 control sequence such as \e[D
189 * (left arrow) or it could be a character in its own right. The standard
190 * method for detecting the difference is to wait for 2 seconds to see if there
191 * are any other characters. kdb is complicated by the lack of a timer service
192 * (interrupts are off), by multiple input sources and by the need to sometimes
193 * return after just one key. Escape sequence processing has to be done as
194 * states in the polling loop.
195 */
196
197static char *kdb_read(char *buffer, size_t bufsize)
198{
199 char *cp = buffer;
200 char *bufend = buffer+bufsize-2; /* Reserve space for newline
201 * and null byte */
202 char *lastchar;
203 char *p_tmp;
204 char tmp;
205 static char tmpbuffer[CMD_BUFLEN];
206 int len = strlen(buffer);
207 int len_tmp;
208 int tab = 0;
209 int count;
210 int i;
211 int diag, dtab_count;
212 int key;
213
214
215 diag = kdbgetintenv("DTABCOUNT", &dtab_count);
216 if (diag)
217 dtab_count = 30;
218
219 if (len > 0) {
220 cp += len;
221 if (*(buffer+len-1) == '\n')
222 cp--;
223 }
224
225 lastchar = cp;
226 *cp = '\0';
227 kdb_printf("%s", buffer);
228poll_again:
229 key = kdb_read_get_key(buffer, bufsize);
230 if (key == -1)
231 return buffer;
232 if (key != 9)
233 tab = 0;
234 switch (key) {
235 case 8: /* backspace */
236 if (cp > buffer) {
237 if (cp < lastchar) {
238 memcpy(tmpbuffer, cp, lastchar - cp);
239 memcpy(cp-1, tmpbuffer, lastchar - cp);
240 }
241 *(--lastchar) = '\0';
242 --cp;
243 kdb_printf("\b%s \r", cp);
244 tmp = *cp;
245 *cp = '\0';
246 kdb_printf(kdb_prompt_str);
247 kdb_printf("%s", buffer);
248 *cp = tmp;
249 }
250 break;
251 case 13: /* enter */
252 *lastchar++ = '\n';
253 *lastchar++ = '\0';
254 kdb_printf("\n");
255 return buffer;
256 case 4: /* Del */
257 if (cp < lastchar) {
258 memcpy(tmpbuffer, cp+1, lastchar - cp - 1);
259 memcpy(cp, tmpbuffer, lastchar - cp - 1);
260 *(--lastchar) = '\0';
261 kdb_printf("%s \r", cp);
262 tmp = *cp;
263 *cp = '\0';
264 kdb_printf(kdb_prompt_str);
265 kdb_printf("%s", buffer);
266 *cp = tmp;
267 }
268 break;
269 case 1: /* Home */
270 if (cp > buffer) {
271 kdb_printf("\r");
272 kdb_printf(kdb_prompt_str);
273 cp = buffer;
274 }
275 break;
276 case 5: /* End */
277 if (cp < lastchar) {
278 kdb_printf("%s", cp);
279 cp = lastchar;
280 }
281 break;
282 case 2: /* Left */
283 if (cp > buffer) {
284 kdb_printf("\b");
285 --cp;
286 }
287 break;
288 case 14: /* Down */
289 memset(tmpbuffer, ' ',
290 strlen(kdb_prompt_str) + (lastchar-buffer));
291 *(tmpbuffer+strlen(kdb_prompt_str) +
292 (lastchar-buffer)) = '\0';
293 kdb_printf("\r%s\r", tmpbuffer);
294 *lastchar = (char)key;
295 *(lastchar+1) = '\0';
296 return lastchar;
297 case 6: /* Right */
298 if (cp < lastchar) {
299 kdb_printf("%c", *cp);
300 ++cp;
301 }
302 break;
303 case 16: /* Up */
304 memset(tmpbuffer, ' ',
305 strlen(kdb_prompt_str) + (lastchar-buffer));
306 *(tmpbuffer+strlen(kdb_prompt_str) +
307 (lastchar-buffer)) = '\0';
308 kdb_printf("\r%s\r", tmpbuffer);
309 *lastchar = (char)key;
310 *(lastchar+1) = '\0';
311 return lastchar;
312 case 9: /* Tab */
313 if (tab < 2)
314 ++tab;
315 p_tmp = buffer;
316 while (*p_tmp == ' ')
317 p_tmp++;
318 if (p_tmp > cp)
319 break;
320 memcpy(tmpbuffer, p_tmp, cp-p_tmp);
321 *(tmpbuffer + (cp-p_tmp)) = '\0';
322 p_tmp = strrchr(tmpbuffer, ' ');
323 if (p_tmp)
324 ++p_tmp;
325 else
326 p_tmp = tmpbuffer;
327 len = strlen(p_tmp);
328 count = kallsyms_symbol_complete(p_tmp,
329 sizeof(tmpbuffer) -
330 (p_tmp - tmpbuffer));
331 if (tab == 2 && count > 0) {
332 kdb_printf("\n%d symbols are found.", count);
333 if (count > dtab_count) {
334 count = dtab_count;
335 kdb_printf(" But only first %d symbols will"
336 " be printed.\nYou can change the"
337 " environment variable DTABCOUNT.",
338 count);
339 }
340 kdb_printf("\n");
341 for (i = 0; i < count; i++) {
342 if (kallsyms_symbol_next(p_tmp, i) < 0)
343 break;
344 kdb_printf("%s ", p_tmp);
345 *(p_tmp + len) = '\0';
346 }
347 if (i >= dtab_count)
348 kdb_printf("...");
349 kdb_printf("\n");
350 kdb_printf(kdb_prompt_str);
351 kdb_printf("%s", buffer);
352 } else if (tab != 2 && count > 0) {
353 len_tmp = strlen(p_tmp);
354 strncpy(p_tmp+len_tmp, cp, lastchar-cp+1);
355 len_tmp = strlen(p_tmp);
356 strncpy(cp, p_tmp+len, len_tmp-len + 1);
357 len = len_tmp - len;
358 kdb_printf("%s", cp);
359 cp += len;
360 lastchar += len;
361 }
362 kdb_nextline = 1; /* reset output line number */
363 break;
364 default:
365 if (key >= 32 && lastchar < bufend) {
366 if (cp < lastchar) {
367 memcpy(tmpbuffer, cp, lastchar - cp);
368 memcpy(cp+1, tmpbuffer, lastchar - cp);
369 *++lastchar = '\0';
370 *cp = key;
371 kdb_printf("%s\r", cp);
372 ++cp;
373 tmp = *cp;
374 *cp = '\0';
375 kdb_printf(kdb_prompt_str);
376 kdb_printf("%s", buffer);
377 *cp = tmp;
378 } else {
379 *++lastchar = '\0';
380 *cp++ = key;
381 /* The kgdb transition check will hide
382 * printed characters if we think that
383 * kgdb is connecting, until the check
384 * fails */
385 if (!KDB_STATE(KGDB_TRANS))
386 kgdb_transition_check(buffer);
387 else
388 kdb_printf("%c", key);
389 }
390 /* Special escape to kgdb */
391 if (lastchar - buffer >= 5 &&
392 strcmp(lastchar - 5, "$?#3f") == 0) {
393 strcpy(buffer, "kgdb");
394 KDB_STATE_SET(DOING_KGDB);
395 return buffer;
396 }
397 if (lastchar - buffer >= 14 &&
398 strcmp(lastchar - 14, "$qSupported#37") == 0) {
399 strcpy(buffer, "kgdb");
400 KDB_STATE_SET(DOING_KGDB2);
401 return buffer;
402 }
403 }
404 break;
405 }
406 goto poll_again;
407}
408
409/*
410 * kdb_getstr
411 *
412 * Print the prompt string and read a command from the
413 * input device.
414 *
415 * Parameters:
416 * buffer Address of buffer to receive command
417 * bufsize Size of buffer in bytes
418 * prompt Pointer to string to use as prompt string
419 * Returns:
420 * Pointer to command buffer.
421 * Locking:
422 * None.
423 * Remarks:
424 * For SMP kernels, the processor number will be
425 * substituted for %d, %x or %o in the prompt.
426 */
427
428char *kdb_getstr(char *buffer, size_t bufsize, char *prompt)
429{
430 if (prompt && kdb_prompt_str != prompt)
431 strncpy(kdb_prompt_str, prompt, CMD_BUFLEN);
432 kdb_printf(kdb_prompt_str);
433 kdb_nextline = 1; /* Prompt and input resets line number */
434 return kdb_read(buffer, bufsize);
435}
436
437/*
438 * kdb_input_flush
439 *
440 * Get rid of any buffered console input.
441 *
442 * Parameters:
443 * none
444 * Returns:
445 * nothing
446 * Locking:
447 * none
448 * Remarks:
449 * Call this function whenever you want to flush input. If there is any
450 * outstanding input, it ignores all characters until there has been no
451 * data for approximately 1ms.
452 */
453
454static void kdb_input_flush(void)
455{
456 get_char_func *f;
457 int res;
458 int flush_delay = 1;
459 while (flush_delay) {
460 flush_delay--;
461empty:
462 touch_nmi_watchdog();
463 for (f = &kdb_poll_funcs[0]; *f; ++f) {
464 res = (*f)();
465 if (res != -1) {
466 flush_delay = 1;
467 goto empty;
468 }
469 }
470 if (flush_delay)
471 mdelay(1);
472 }
473}
474
475/*
476 * kdb_printf
477 *
478 * Print a string to the output device(s).
479 *
480 * Parameters:
481 * printf-like format and optional args.
482 * Returns:
483 * 0
484 * Locking:
485 * None.
486 * Remarks:
487 * use 'kdbcons->write()' to avoid polluting 'log_buf' with
488 * kdb output.
489 *
490 * If the user is doing a cmd args | grep srch
491 * then kdb_grepping_flag is set.
492 * In that case we need to accumulate full lines (ending in \n) before
493 * searching for the pattern.
494 */
495
496static char kdb_buffer[256]; /* A bit too big to go on stack */
497static char *next_avail = kdb_buffer;
498static int size_avail;
499static int suspend_grep;
500
501/*
502 * search arg1 to see if it contains arg2
503 * (kdmain.c provides flags for ^pat and pat$)
504 *
505 * return 1 for found, 0 for not found
506 */
507static int kdb_search_string(char *searched, char *searchfor)
508{
509 char firstchar, *cp;
510 int len1, len2;
511
512 /* not counting the newline at the end of "searched" */
513 len1 = strlen(searched)-1;
514 len2 = strlen(searchfor);
515 if (len1 < len2)
516 return 0;
517 if (kdb_grep_leading && kdb_grep_trailing && len1 != len2)
518 return 0;
519 if (kdb_grep_leading) {
520 if (!strncmp(searched, searchfor, len2))
521 return 1;
522 } else if (kdb_grep_trailing) {
523 if (!strncmp(searched+len1-len2, searchfor, len2))
524 return 1;
525 } else {
526 firstchar = *searchfor;
527 cp = searched;
528 while ((cp = strchr(cp, firstchar))) {
529 if (!strncmp(cp, searchfor, len2))
530 return 1;
531 cp++;
532 }
533 }
534 return 0;
535}
536
537int vkdb_printf(const char *fmt, va_list ap)
538{
539 int diag;
540 int linecount;
541 int logging, saved_loglevel = 0;
542 int saved_trap_printk;
543 int got_printf_lock = 0;
544 int retlen = 0;
545 int fnd, len;
546 char *cp, *cp2, *cphold = NULL, replaced_byte = ' ';
547 char *moreprompt = "more> ";
548 struct console *c = console_drivers;
549 static DEFINE_SPINLOCK(kdb_printf_lock);
550 unsigned long uninitialized_var(flags);
551
552 preempt_disable();
553 saved_trap_printk = kdb_trap_printk;
554 kdb_trap_printk = 0;
555
556 /* Serialize kdb_printf if multiple cpus try to write at once.
557 * But if any cpu goes recursive in kdb, just print the output,
558 * even if it is interleaved with any other text.
559 */
560 if (!KDB_STATE(PRINTF_LOCK)) {
561 KDB_STATE_SET(PRINTF_LOCK);
562 spin_lock_irqsave(&kdb_printf_lock, flags);
563 got_printf_lock = 1;
564 atomic_inc(&kdb_event);
565 } else {
566 __acquire(kdb_printf_lock);
567 }
568
569 diag = kdbgetintenv("LINES", &linecount);
570 if (diag || linecount <= 1)
571 linecount = 24;
572
573 diag = kdbgetintenv("LOGGING", &logging);
574 if (diag)
575 logging = 0;
576
577 if (!kdb_grepping_flag || suspend_grep) {
578 /* normally, every vsnprintf starts a new buffer */
579 next_avail = kdb_buffer;
580 size_avail = sizeof(kdb_buffer);
581 }
582 vsnprintf(next_avail, size_avail, fmt, ap);
583
584 /*
585 * If kdb_parse() found that the command was cmd xxx | grep yyy
586 * then kdb_grepping_flag is set, and kdb_grep_string contains yyy
587 *
588 * Accumulate the print data up to a newline before searching it.
589 * (vsnprintf does null-terminate the string that it generates)
590 */
591
592 /* skip the search if prints are temporarily unconditional */
593 if (!suspend_grep && kdb_grepping_flag) {
594 cp = strchr(kdb_buffer, '\n');
595 if (!cp) {
596 /*
597 * Special cases that don't end with newlines
598 * but should be written without one:
599 * The "[nn]kdb> " prompt should
600 * appear at the front of the buffer.
601 *
602 * The "[nn]more " prompt should also be
603 * (MOREPROMPT -> moreprompt)
604 * written * but we print that ourselves,
605 * we set the suspend_grep flag to make
606 * it unconditional.
607 *
608 */
609 if (next_avail == kdb_buffer) {
610 /*
611 * these should occur after a newline,
612 * so they will be at the front of the
613 * buffer
614 */
615 cp2 = kdb_buffer;
616 len = strlen(kdb_prompt_str);
617 if (!strncmp(cp2, kdb_prompt_str, len)) {
618 /*
619 * We're about to start a new
620 * command, so we can go back
621 * to normal mode.
622 */
623 kdb_grepping_flag = 0;
624 goto kdb_printit;
625 }
626 }
627 /* no newline; don't search/write the buffer
628 until one is there */
629 len = strlen(kdb_buffer);
630 next_avail = kdb_buffer + len;
631 size_avail = sizeof(kdb_buffer) - len;
632 goto kdb_print_out;
633 }
634
635 /*
636 * The newline is present; print through it or discard
637 * it, depending on the results of the search.
638 */
639 cp++; /* to byte after the newline */
640 replaced_byte = *cp; /* remember what/where it was */
641 cphold = cp;
642 *cp = '\0'; /* end the string for our search */
643
644 /*
645 * We now have a newline at the end of the string
646 * Only continue with this output if it contains the
647 * search string.
648 */
649 fnd = kdb_search_string(kdb_buffer, kdb_grep_string);
650 if (!fnd) {
651 /*
652 * At this point the complete line at the start
653 * of kdb_buffer can be discarded, as it does
654 * not contain what the user is looking for.
655 * Shift the buffer left.
656 */
657 *cphold = replaced_byte;
658 strcpy(kdb_buffer, cphold);
659 len = strlen(kdb_buffer);
660 next_avail = kdb_buffer + len;
661 size_avail = sizeof(kdb_buffer) - len;
662 goto kdb_print_out;
663 }
664 /*
665 * at this point the string is a full line and
666 * should be printed, up to the null.
667 */
668 }
669kdb_printit:
670
671 /*
672 * Write to all consoles.
673 */
674 retlen = strlen(kdb_buffer);
675 if (!dbg_kdb_mode && kgdb_connected) {
676 gdbstub_msg_write(kdb_buffer, retlen);
677 } else {
678 if (!dbg_io_ops->is_console) {
679 len = strlen(kdb_buffer);
680 cp = kdb_buffer;
681 while (len--) {
682 dbg_io_ops->write_char(*cp);
683 cp++;
684 }
685 }
686 while (c) {
687 c->write(c, kdb_buffer, retlen);
688 touch_nmi_watchdog();
689 c = c->next;
690 }
691 }
692 if (logging) {
693 saved_loglevel = console_loglevel;
694 console_loglevel = 0;
695 printk(KERN_INFO "%s", kdb_buffer);
696 }
697
698 if (KDB_STATE(PAGER) && strchr(kdb_buffer, '\n'))
699 kdb_nextline++;
700
701 /* check for having reached the LINES number of printed lines */
702 if (kdb_nextline == linecount) {
703 char buf1[16] = "";
704#if defined(CONFIG_SMP)
705 char buf2[32];
706#endif
707
708 /* Watch out for recursion here. Any routine that calls
709 * kdb_printf will come back through here. And kdb_read
710 * uses kdb_printf to echo on serial consoles ...
711 */
712 kdb_nextline = 1; /* In case of recursion */
713
714 /*
715 * Pause until cr.
716 */
717 moreprompt = kdbgetenv("MOREPROMPT");
718 if (moreprompt == NULL)
719 moreprompt = "more> ";
720
721#if defined(CONFIG_SMP)
722 if (strchr(moreprompt, '%')) {
723 sprintf(buf2, moreprompt, get_cpu());
724 put_cpu();
725 moreprompt = buf2;
726 }
727#endif
728
729 kdb_input_flush();
730 c = console_drivers;
731
732 if (!dbg_io_ops->is_console) {
733 len = strlen(moreprompt);
734 cp = moreprompt;
735 while (len--) {
736 dbg_io_ops->write_char(*cp);
737 cp++;
738 }
739 }
740 while (c) {
741 c->write(c, moreprompt, strlen(moreprompt));
742 touch_nmi_watchdog();
743 c = c->next;
744 }
745
746 if (logging)
747 printk("%s", moreprompt);
748
749 kdb_read(buf1, 2); /* '2' indicates to return
750 * immediately after getting one key. */
751 kdb_nextline = 1; /* Really set output line 1 */
752
753 /* empty and reset the buffer: */
754 kdb_buffer[0] = '\0';
755 next_avail = kdb_buffer;
756 size_avail = sizeof(kdb_buffer);
757 if ((buf1[0] == 'q') || (buf1[0] == 'Q')) {
758 /* user hit q or Q */
759 KDB_FLAG_SET(CMD_INTERRUPT); /* command interrupted */
760 KDB_STATE_CLEAR(PAGER);
761 /* end of command output; back to normal mode */
762 kdb_grepping_flag = 0;
763 kdb_printf("\n");
764 } else if (buf1[0] == ' ') {
765 kdb_printf("\n");
766 suspend_grep = 1; /* for this recursion */
767 } else if (buf1[0] == '\n') {
768 kdb_nextline = linecount - 1;
769 kdb_printf("\r");
770 suspend_grep = 1; /* for this recursion */
771 } else if (buf1[0] && buf1[0] != '\n') {
772 /* user hit something other than enter */
773 suspend_grep = 1; /* for this recursion */
774 kdb_printf("\nOnly 'q' or 'Q' are processed at more "
775 "prompt, input ignored\n");
776 } else if (kdb_grepping_flag) {
777 /* user hit enter */
778 suspend_grep = 1; /* for this recursion */
779 kdb_printf("\n");
780 }
781 kdb_input_flush();
782 }
783
784 /*
785 * For grep searches, shift the printed string left.
786 * replaced_byte contains the character that was overwritten with
787 * the terminating null, and cphold points to the null.
788 * Then adjust the notion of available space in the buffer.
789 */
790 if (kdb_grepping_flag && !suspend_grep) {
791 *cphold = replaced_byte;
792 strcpy(kdb_buffer, cphold);
793 len = strlen(kdb_buffer);
794 next_avail = kdb_buffer + len;
795 size_avail = sizeof(kdb_buffer) - len;
796 }
797
798kdb_print_out:
799 suspend_grep = 0; /* end of what may have been a recursive call */
800 if (logging)
801 console_loglevel = saved_loglevel;
802 if (KDB_STATE(PRINTF_LOCK) && got_printf_lock) {
803 got_printf_lock = 0;
804 spin_unlock_irqrestore(&kdb_printf_lock, flags);
805 KDB_STATE_CLEAR(PRINTF_LOCK);
806 atomic_dec(&kdb_event);
807 } else {
808 __release(kdb_printf_lock);
809 }
810 kdb_trap_printk = saved_trap_printk;
811 preempt_enable();
812 return retlen;
813}
814
815int kdb_printf(const char *fmt, ...)
816{
817 va_list ap;
818 int r;
819
820 va_start(ap, fmt);
821 r = vkdb_printf(fmt, ap);
822 va_end(ap);
823
824 return r;
825}
826
diff --git a/kernel/debug/kdb/kdb_keyboard.c b/kernel/debug/kdb/kdb_keyboard.c
new file mode 100644
index 000000000000..4bca634975c0
--- /dev/null
+++ b/kernel/debug/kdb/kdb_keyboard.c
@@ -0,0 +1,212 @@
1/*
2 * Kernel Debugger Architecture Dependent Console I/O handler
3 *
4 * This file is subject to the terms and conditions of the GNU General Public
5 * License.
6 *
7 * Copyright (c) 1999-2006 Silicon Graphics, Inc. All Rights Reserved.
8 * Copyright (c) 2009 Wind River Systems, Inc. All Rights Reserved.
9 */
10
11#include <linux/kdb.h>
12#include <linux/keyboard.h>
13#include <linux/ctype.h>
14#include <linux/module.h>
15#include <linux/io.h>
16
17/* Keyboard Controller Registers on normal PCs. */
18
19#define KBD_STATUS_REG 0x64 /* Status register (R) */
20#define KBD_DATA_REG 0x60 /* Keyboard data register (R/W) */
21
22/* Status Register Bits */
23
24#define KBD_STAT_OBF 0x01 /* Keyboard output buffer full */
25#define KBD_STAT_MOUSE_OBF 0x20 /* Mouse output buffer full */
26
27static int kbd_exists;
28
29/*
30 * Check if the keyboard controller has a keypress for us.
31 * Some parts (Enter Release, LED change) are still blocking polled here,
32 * but hopefully they are all short.
33 */
34int kdb_get_kbd_char(void)
35{
36 int scancode, scanstatus;
37 static int shift_lock; /* CAPS LOCK state (0-off, 1-on) */
38 static int shift_key; /* Shift next keypress */
39 static int ctrl_key;
40 u_short keychar;
41
42 if (KDB_FLAG(NO_I8042) || KDB_FLAG(NO_VT_CONSOLE) ||
43 (inb(KBD_STATUS_REG) == 0xff && inb(KBD_DATA_REG) == 0xff)) {
44 kbd_exists = 0;
45 return -1;
46 }
47 kbd_exists = 1;
48
49 if ((inb(KBD_STATUS_REG) & KBD_STAT_OBF) == 0)
50 return -1;
51
52 /*
53 * Fetch the scancode
54 */
55 scancode = inb(KBD_DATA_REG);
56 scanstatus = inb(KBD_STATUS_REG);
57
58 /*
59 * Ignore mouse events.
60 */
61 if (scanstatus & KBD_STAT_MOUSE_OBF)
62 return -1;
63
64 /*
65 * Ignore release, trigger on make
66 * (except for shift keys, where we want to
67 * keep the shift state so long as the key is
68 * held down).
69 */
70
71 if (((scancode&0x7f) == 0x2a) || ((scancode&0x7f) == 0x36)) {
72 /*
73 * Next key may use shift table
74 */
75 if ((scancode & 0x80) == 0)
76 shift_key = 1;
77 else
78 shift_key = 0;
79 return -1;
80 }
81
82 if ((scancode&0x7f) == 0x1d) {
83 /*
84 * Left ctrl key
85 */
86 if ((scancode & 0x80) == 0)
87 ctrl_key = 1;
88 else
89 ctrl_key = 0;
90 return -1;
91 }
92
93 if ((scancode & 0x80) != 0)
94 return -1;
95
96 scancode &= 0x7f;
97
98 /*
99 * Translate scancode
100 */
101
102 if (scancode == 0x3a) {
103 /*
104 * Toggle caps lock
105 */
106 shift_lock ^= 1;
107
108#ifdef KDB_BLINK_LED
109 kdb_toggleled(0x4);
110#endif
111 return -1;
112 }
113
114 if (scancode == 0x0e) {
115 /*
116 * Backspace
117 */
118 return 8;
119 }
120
121 /* Special Key */
122 switch (scancode) {
123 case 0xF: /* Tab */
124 return 9;
125 case 0x53: /* Del */
126 return 4;
127 case 0x47: /* Home */
128 return 1;
129 case 0x4F: /* End */
130 return 5;
131 case 0x4B: /* Left */
132 return 2;
133 case 0x48: /* Up */
134 return 16;
135 case 0x50: /* Down */
136 return 14;
137 case 0x4D: /* Right */
138 return 6;
139 }
140
141 if (scancode == 0xe0)
142 return -1;
143
144 /*
145 * For Japanese 86/106 keyboards
146 * See comment in drivers/char/pc_keyb.c.
147 * - Masahiro Adegawa
148 */
149 if (scancode == 0x73)
150 scancode = 0x59;
151 else if (scancode == 0x7d)
152 scancode = 0x7c;
153
154 if (!shift_lock && !shift_key && !ctrl_key) {
155 keychar = plain_map[scancode];
156 } else if ((shift_lock || shift_key) && key_maps[1]) {
157 keychar = key_maps[1][scancode];
158 } else if (ctrl_key && key_maps[4]) {
159 keychar = key_maps[4][scancode];
160 } else {
161 keychar = 0x0020;
162 kdb_printf("Unknown state/scancode (%d)\n", scancode);
163 }
164 keychar &= 0x0fff;
165 if (keychar == '\t')
166 keychar = ' ';
167 switch (KTYP(keychar)) {
168 case KT_LETTER:
169 case KT_LATIN:
170 if (isprint(keychar))
171 break; /* printable characters */
172 /* drop through */
173 case KT_SPEC:
174 if (keychar == K_ENTER)
175 break;
176 /* drop through */
177 default:
178 return -1; /* ignore unprintables */
179 }
180
181 if ((scancode & 0x7f) == 0x1c) {
182 /*
183 * enter key. All done. Absorb the release scancode.
184 */
185 while ((inb(KBD_STATUS_REG) & KBD_STAT_OBF) == 0)
186 ;
187
188 /*
189 * Fetch the scancode
190 */
191 scancode = inb(KBD_DATA_REG);
192 scanstatus = inb(KBD_STATUS_REG);
193
194 while (scanstatus & KBD_STAT_MOUSE_OBF) {
195 scancode = inb(KBD_DATA_REG);
196 scanstatus = inb(KBD_STATUS_REG);
197 }
198
199 if (scancode != 0x9c) {
200 /*
201 * Wasn't an enter-release, why not?
202 */
203 kdb_printf("kdb: expected enter got 0x%x status 0x%x\n",
204 scancode, scanstatus);
205 }
206
207 return 13;
208 }
209
210 return keychar & 0xff;
211}
212EXPORT_SYMBOL_GPL(kdb_get_kbd_char);
diff --git a/kernel/debug/kdb/kdb_main.c b/kernel/debug/kdb/kdb_main.c
new file mode 100644
index 000000000000..b724c791b6d4
--- /dev/null
+++ b/kernel/debug/kdb/kdb_main.c
@@ -0,0 +1,2849 @@
1/*
2 * Kernel Debugger Architecture Independent Main Code
3 *
4 * This file is subject to the terms and conditions of the GNU General Public
5 * License. See the file "COPYING" in the main directory of this archive
6 * for more details.
7 *
8 * Copyright (C) 1999-2004 Silicon Graphics, Inc. All Rights Reserved.
9 * Copyright (C) 2000 Stephane Eranian <eranian@hpl.hp.com>
10 * Xscale (R) modifications copyright (C) 2003 Intel Corporation.
11 * Copyright (c) 2009 Wind River Systems, Inc. All Rights Reserved.
12 */
13
14#include <linux/ctype.h>
15#include <linux/string.h>
16#include <linux/kernel.h>
17#include <linux/reboot.h>
18#include <linux/sched.h>
19#include <linux/sysrq.h>
20#include <linux/smp.h>
21#include <linux/utsname.h>
22#include <linux/vmalloc.h>
23#include <linux/module.h>
24#include <linux/mm.h>
25#include <linux/init.h>
26#include <linux/kallsyms.h>
27#include <linux/kgdb.h>
28#include <linux/kdb.h>
29#include <linux/notifier.h>
30#include <linux/interrupt.h>
31#include <linux/delay.h>
32#include <linux/nmi.h>
33#include <linux/time.h>
34#include <linux/ptrace.h>
35#include <linux/sysctl.h>
36#include <linux/cpu.h>
37#include <linux/kdebug.h>
38#include <linux/proc_fs.h>
39#include <linux/uaccess.h>
40#include <linux/slab.h>
41#include "kdb_private.h"
42
43#define GREP_LEN 256
44char kdb_grep_string[GREP_LEN];
45int kdb_grepping_flag;
46EXPORT_SYMBOL(kdb_grepping_flag);
47int kdb_grep_leading;
48int kdb_grep_trailing;
49
50/*
51 * Kernel debugger state flags
52 */
53int kdb_flags;
54atomic_t kdb_event;
55
56/*
57 * kdb_lock protects updates to kdb_initial_cpu. Used to
58 * single thread processors through the kernel debugger.
59 */
60int kdb_initial_cpu = -1; /* cpu number that owns kdb */
61int kdb_nextline = 1;
62int kdb_state; /* General KDB state */
63
64struct task_struct *kdb_current_task;
65EXPORT_SYMBOL(kdb_current_task);
66struct pt_regs *kdb_current_regs;
67
68const char *kdb_diemsg;
69static int kdb_go_count;
70#ifdef CONFIG_KDB_CONTINUE_CATASTROPHIC
71static unsigned int kdb_continue_catastrophic =
72 CONFIG_KDB_CONTINUE_CATASTROPHIC;
73#else
74static unsigned int kdb_continue_catastrophic;
75#endif
76
77/* kdb_commands describes the available commands. */
78static kdbtab_t *kdb_commands;
79#define KDB_BASE_CMD_MAX 50
80static int kdb_max_commands = KDB_BASE_CMD_MAX;
81static kdbtab_t kdb_base_commands[50];
82#define for_each_kdbcmd(cmd, num) \
83 for ((cmd) = kdb_base_commands, (num) = 0; \
84 num < kdb_max_commands; \
85 num == KDB_BASE_CMD_MAX ? cmd = kdb_commands : cmd++, num++)
86
87typedef struct _kdbmsg {
88 int km_diag; /* kdb diagnostic */
89 char *km_msg; /* Corresponding message text */
90} kdbmsg_t;
91
92#define KDBMSG(msgnum, text) \
93 { KDB_##msgnum, text }
94
95static kdbmsg_t kdbmsgs[] = {
96 KDBMSG(NOTFOUND, "Command Not Found"),
97 KDBMSG(ARGCOUNT, "Improper argument count, see usage."),
98 KDBMSG(BADWIDTH, "Illegal value for BYTESPERWORD use 1, 2, 4 or 8, "
99 "8 is only allowed on 64 bit systems"),
100 KDBMSG(BADRADIX, "Illegal value for RADIX use 8, 10 or 16"),
101 KDBMSG(NOTENV, "Cannot find environment variable"),
102 KDBMSG(NOENVVALUE, "Environment variable should have value"),
103 KDBMSG(NOTIMP, "Command not implemented"),
104 KDBMSG(ENVFULL, "Environment full"),
105 KDBMSG(ENVBUFFULL, "Environment buffer full"),
106 KDBMSG(TOOMANYBPT, "Too many breakpoints defined"),
107#ifdef CONFIG_CPU_XSCALE
108 KDBMSG(TOOMANYDBREGS, "More breakpoints than ibcr registers defined"),
109#else
110 KDBMSG(TOOMANYDBREGS, "More breakpoints than db registers defined"),
111#endif
112 KDBMSG(DUPBPT, "Duplicate breakpoint address"),
113 KDBMSG(BPTNOTFOUND, "Breakpoint not found"),
114 KDBMSG(BADMODE, "Invalid IDMODE"),
115 KDBMSG(BADINT, "Illegal numeric value"),
116 KDBMSG(INVADDRFMT, "Invalid symbolic address format"),
117 KDBMSG(BADREG, "Invalid register name"),
118 KDBMSG(BADCPUNUM, "Invalid cpu number"),
119 KDBMSG(BADLENGTH, "Invalid length field"),
120 KDBMSG(NOBP, "No Breakpoint exists"),
121 KDBMSG(BADADDR, "Invalid address"),
122};
123#undef KDBMSG
124
125static const int __nkdb_err = sizeof(kdbmsgs) / sizeof(kdbmsg_t);
126
127
128/*
129 * Initial environment. This is all kept static and local to
130 * this file. We don't want to rely on the memory allocation
131 * mechanisms in the kernel, so we use a very limited allocate-only
132 * heap for new and altered environment variables. The entire
133 * environment is limited to a fixed number of entries (add more
134 * to __env[] if required) and a fixed amount of heap (add more to
135 * KDB_ENVBUFSIZE if required).
136 */
137
138static char *__env[] = {
139#if defined(CONFIG_SMP)
140 "PROMPT=[%d]kdb> ",
141 "MOREPROMPT=[%d]more> ",
142#else
143 "PROMPT=kdb> ",
144 "MOREPROMPT=more> ",
145#endif
146 "RADIX=16",
147 "MDCOUNT=8", /* lines of md output */
148 "BTARGS=9", /* 9 possible args in bt */
149 KDB_PLATFORM_ENV,
150 "DTABCOUNT=30",
151 "NOSECT=1",
152 (char *)0,
153 (char *)0,
154 (char *)0,
155 (char *)0,
156 (char *)0,
157 (char *)0,
158 (char *)0,
159 (char *)0,
160 (char *)0,
161 (char *)0,
162 (char *)0,
163 (char *)0,
164 (char *)0,
165 (char *)0,
166 (char *)0,
167 (char *)0,
168 (char *)0,
169 (char *)0,
170 (char *)0,
171 (char *)0,
172 (char *)0,
173 (char *)0,
174 (char *)0,
175};
176
177static const int __nenv = (sizeof(__env) / sizeof(char *));
178
179struct task_struct *kdb_curr_task(int cpu)
180{
181 struct task_struct *p = curr_task(cpu);
182#ifdef _TIF_MCA_INIT
183 if ((task_thread_info(p)->flags & _TIF_MCA_INIT) && KDB_TSK(cpu))
184 p = krp->p;
185#endif
186 return p;
187}
188
189/*
190 * kdbgetenv - This function will return the character string value of
191 * an environment variable.
192 * Parameters:
193 * match A character string representing an environment variable.
194 * Returns:
195 * NULL No environment variable matches 'match'
196 * char* Pointer to string value of environment variable.
197 */
198char *kdbgetenv(const char *match)
199{
200 char **ep = __env;
201 int matchlen = strlen(match);
202 int i;
203
204 for (i = 0; i < __nenv; i++) {
205 char *e = *ep++;
206
207 if (!e)
208 continue;
209
210 if ((strncmp(match, e, matchlen) == 0)
211 && ((e[matchlen] == '\0')
212 || (e[matchlen] == '='))) {
213 char *cp = strchr(e, '=');
214 return cp ? ++cp : "";
215 }
216 }
217 return NULL;
218}
219
220/*
221 * kdballocenv - This function is used to allocate bytes for
222 * environment entries.
223 * Parameters:
224 * match A character string representing a numeric value
225 * Outputs:
226 * *value the unsigned long representation of the env variable 'match'
227 * Returns:
228 * Zero on success, a kdb diagnostic on failure.
229 * Remarks:
230 * We use a static environment buffer (envbuffer) to hold the values
231 * of dynamically generated environment variables (see kdb_set). Buffer
232 * space once allocated is never free'd, so over time, the amount of space
233 * (currently 512 bytes) will be exhausted if env variables are changed
234 * frequently.
235 */
236static char *kdballocenv(size_t bytes)
237{
238#define KDB_ENVBUFSIZE 512
239 static char envbuffer[KDB_ENVBUFSIZE];
240 static int envbufsize;
241 char *ep = NULL;
242
243 if ((KDB_ENVBUFSIZE - envbufsize) >= bytes) {
244 ep = &envbuffer[envbufsize];
245 envbufsize += bytes;
246 }
247 return ep;
248}
249
250/*
251 * kdbgetulenv - This function will return the value of an unsigned
252 * long-valued environment variable.
253 * Parameters:
254 * match A character string representing a numeric value
255 * Outputs:
256 * *value the unsigned long represntation of the env variable 'match'
257 * Returns:
258 * Zero on success, a kdb diagnostic on failure.
259 */
260static int kdbgetulenv(const char *match, unsigned long *value)
261{
262 char *ep;
263
264 ep = kdbgetenv(match);
265 if (!ep)
266 return KDB_NOTENV;
267 if (strlen(ep) == 0)
268 return KDB_NOENVVALUE;
269
270 *value = simple_strtoul(ep, NULL, 0);
271
272 return 0;
273}
274
275/*
276 * kdbgetintenv - This function will return the value of an
277 * integer-valued environment variable.
278 * Parameters:
279 * match A character string representing an integer-valued env variable
280 * Outputs:
281 * *value the integer representation of the environment variable 'match'
282 * Returns:
283 * Zero on success, a kdb diagnostic on failure.
284 */
285int kdbgetintenv(const char *match, int *value)
286{
287 unsigned long val;
288 int diag;
289
290 diag = kdbgetulenv(match, &val);
291 if (!diag)
292 *value = (int) val;
293 return diag;
294}
295
296/*
297 * kdbgetularg - This function will convert a numeric string into an
298 * unsigned long value.
299 * Parameters:
300 * arg A character string representing a numeric value
301 * Outputs:
302 * *value the unsigned long represntation of arg.
303 * Returns:
304 * Zero on success, a kdb diagnostic on failure.
305 */
306int kdbgetularg(const char *arg, unsigned long *value)
307{
308 char *endp;
309 unsigned long val;
310
311 val = simple_strtoul(arg, &endp, 0);
312
313 if (endp == arg) {
314 /*
315 * Try base 16, for us folks too lazy to type the
316 * leading 0x...
317 */
318 val = simple_strtoul(arg, &endp, 16);
319 if (endp == arg)
320 return KDB_BADINT;
321 }
322
323 *value = val;
324
325 return 0;
326}
327
328/*
329 * kdb_set - This function implements the 'set' command. Alter an
330 * existing environment variable or create a new one.
331 */
332int kdb_set(int argc, const char **argv)
333{
334 int i;
335 char *ep;
336 size_t varlen, vallen;
337
338 /*
339 * we can be invoked two ways:
340 * set var=value argv[1]="var", argv[2]="value"
341 * set var = value argv[1]="var", argv[2]="=", argv[3]="value"
342 * - if the latter, shift 'em down.
343 */
344 if (argc == 3) {
345 argv[2] = argv[3];
346 argc--;
347 }
348
349 if (argc != 2)
350 return KDB_ARGCOUNT;
351
352 /*
353 * Check for internal variables
354 */
355 if (strcmp(argv[1], "KDBDEBUG") == 0) {
356 unsigned int debugflags;
357 char *cp;
358
359 debugflags = simple_strtoul(argv[2], &cp, 0);
360 if (cp == argv[2] || debugflags & ~KDB_DEBUG_FLAG_MASK) {
361 kdb_printf("kdb: illegal debug flags '%s'\n",
362 argv[2]);
363 return 0;
364 }
365 kdb_flags = (kdb_flags &
366 ~(KDB_DEBUG_FLAG_MASK << KDB_DEBUG_FLAG_SHIFT))
367 | (debugflags << KDB_DEBUG_FLAG_SHIFT);
368
369 return 0;
370 }
371
372 /*
373 * Tokenizer squashed the '=' sign. argv[1] is variable
374 * name, argv[2] = value.
375 */
376 varlen = strlen(argv[1]);
377 vallen = strlen(argv[2]);
378 ep = kdballocenv(varlen + vallen + 2);
379 if (ep == (char *)0)
380 return KDB_ENVBUFFULL;
381
382 sprintf(ep, "%s=%s", argv[1], argv[2]);
383
384 ep[varlen+vallen+1] = '\0';
385
386 for (i = 0; i < __nenv; i++) {
387 if (__env[i]
388 && ((strncmp(__env[i], argv[1], varlen) == 0)
389 && ((__env[i][varlen] == '\0')
390 || (__env[i][varlen] == '=')))) {
391 __env[i] = ep;
392 return 0;
393 }
394 }
395
396 /*
397 * Wasn't existing variable. Fit into slot.
398 */
399 for (i = 0; i < __nenv-1; i++) {
400 if (__env[i] == (char *)0) {
401 __env[i] = ep;
402 return 0;
403 }
404 }
405
406 return KDB_ENVFULL;
407}
408
409static int kdb_check_regs(void)
410{
411 if (!kdb_current_regs) {
412 kdb_printf("No current kdb registers."
413 " You may need to select another task\n");
414 return KDB_BADREG;
415 }
416 return 0;
417}
418
419/*
420 * kdbgetaddrarg - This function is responsible for parsing an
421 * address-expression and returning the value of the expression,
422 * symbol name, and offset to the caller.
423 *
424 * The argument may consist of a numeric value (decimal or
425 * hexidecimal), a symbol name, a register name (preceeded by the
426 * percent sign), an environment variable with a numeric value
427 * (preceeded by a dollar sign) or a simple arithmetic expression
428 * consisting of a symbol name, +/-, and a numeric constant value
429 * (offset).
430 * Parameters:
431 * argc - count of arguments in argv
432 * argv - argument vector
433 * *nextarg - index to next unparsed argument in argv[]
434 * regs - Register state at time of KDB entry
435 * Outputs:
436 * *value - receives the value of the address-expression
437 * *offset - receives the offset specified, if any
438 * *name - receives the symbol name, if any
439 * *nextarg - index to next unparsed argument in argv[]
440 * Returns:
441 * zero is returned on success, a kdb diagnostic code is
442 * returned on error.
443 */
444int kdbgetaddrarg(int argc, const char **argv, int *nextarg,
445 unsigned long *value, long *offset,
446 char **name)
447{
448 unsigned long addr;
449 unsigned long off = 0;
450 int positive;
451 int diag;
452 int found = 0;
453 char *symname;
454 char symbol = '\0';
455 char *cp;
456 kdb_symtab_t symtab;
457
458 /*
459 * Process arguments which follow the following syntax:
460 *
461 * symbol | numeric-address [+/- numeric-offset]
462 * %register
463 * $environment-variable
464 */
465
466 if (*nextarg > argc)
467 return KDB_ARGCOUNT;
468
469 symname = (char *)argv[*nextarg];
470
471 /*
472 * If there is no whitespace between the symbol
473 * or address and the '+' or '-' symbols, we
474 * remember the character and replace it with a
475 * null so the symbol/value can be properly parsed
476 */
477 cp = strpbrk(symname, "+-");
478 if (cp != NULL) {
479 symbol = *cp;
480 *cp++ = '\0';
481 }
482
483 if (symname[0] == '$') {
484 diag = kdbgetulenv(&symname[1], &addr);
485 if (diag)
486 return diag;
487 } else if (symname[0] == '%') {
488 diag = kdb_check_regs();
489 if (diag)
490 return diag;
491 /* Implement register values with % at a later time as it is
492 * arch optional.
493 */
494 return KDB_NOTIMP;
495 } else {
496 found = kdbgetsymval(symname, &symtab);
497 if (found) {
498 addr = symtab.sym_start;
499 } else {
500 diag = kdbgetularg(argv[*nextarg], &addr);
501 if (diag)
502 return diag;
503 }
504 }
505
506 if (!found)
507 found = kdbnearsym(addr, &symtab);
508
509 (*nextarg)++;
510
511 if (name)
512 *name = symname;
513 if (value)
514 *value = addr;
515 if (offset && name && *name)
516 *offset = addr - symtab.sym_start;
517
518 if ((*nextarg > argc)
519 && (symbol == '\0'))
520 return 0;
521
522 /*
523 * check for +/- and offset
524 */
525
526 if (symbol == '\0') {
527 if ((argv[*nextarg][0] != '+')
528 && (argv[*nextarg][0] != '-')) {
529 /*
530 * Not our argument. Return.
531 */
532 return 0;
533 } else {
534 positive = (argv[*nextarg][0] == '+');
535 (*nextarg)++;
536 }
537 } else
538 positive = (symbol == '+');
539
540 /*
541 * Now there must be an offset!
542 */
543 if ((*nextarg > argc)
544 && (symbol == '\0')) {
545 return KDB_INVADDRFMT;
546 }
547
548 if (!symbol) {
549 cp = (char *)argv[*nextarg];
550 (*nextarg)++;
551 }
552
553 diag = kdbgetularg(cp, &off);
554 if (diag)
555 return diag;
556
557 if (!positive)
558 off = -off;
559
560 if (offset)
561 *offset += off;
562
563 if (value)
564 *value += off;
565
566 return 0;
567}
568
569static void kdb_cmderror(int diag)
570{
571 int i;
572
573 if (diag >= 0) {
574 kdb_printf("no error detected (diagnostic is %d)\n", diag);
575 return;
576 }
577
578 for (i = 0; i < __nkdb_err; i++) {
579 if (kdbmsgs[i].km_diag == diag) {
580 kdb_printf("diag: %d: %s\n", diag, kdbmsgs[i].km_msg);
581 return;
582 }
583 }
584
585 kdb_printf("Unknown diag %d\n", -diag);
586}
587
588/*
589 * kdb_defcmd, kdb_defcmd2 - This function implements the 'defcmd'
590 * command which defines one command as a set of other commands,
591 * terminated by endefcmd. kdb_defcmd processes the initial
592 * 'defcmd' command, kdb_defcmd2 is invoked from kdb_parse for
593 * the following commands until 'endefcmd'.
594 * Inputs:
595 * argc argument count
596 * argv argument vector
597 * Returns:
598 * zero for success, a kdb diagnostic if error
599 */
600struct defcmd_set {
601 int count;
602 int usable;
603 char *name;
604 char *usage;
605 char *help;
606 char **command;
607};
608static struct defcmd_set *defcmd_set;
609static int defcmd_set_count;
610static int defcmd_in_progress;
611
612/* Forward references */
613static int kdb_exec_defcmd(int argc, const char **argv);
614
615static int kdb_defcmd2(const char *cmdstr, const char *argv0)
616{
617 struct defcmd_set *s = defcmd_set + defcmd_set_count - 1;
618 char **save_command = s->command;
619 if (strcmp(argv0, "endefcmd") == 0) {
620 defcmd_in_progress = 0;
621 if (!s->count)
622 s->usable = 0;
623 if (s->usable)
624 kdb_register(s->name, kdb_exec_defcmd,
625 s->usage, s->help, 0);
626 return 0;
627 }
628 if (!s->usable)
629 return KDB_NOTIMP;
630 s->command = kmalloc((s->count + 1) * sizeof(*(s->command)), GFP_KDB);
631 if (!s->command) {
632 kdb_printf("Could not allocate new kdb_defcmd table for %s\n",
633 cmdstr);
634 s->usable = 0;
635 return KDB_NOTIMP;
636 }
637 memcpy(s->command, save_command, s->count * sizeof(*(s->command)));
638 s->command[s->count++] = kdb_strdup(cmdstr, GFP_KDB);
639 kfree(save_command);
640 return 0;
641}
642
643static int kdb_defcmd(int argc, const char **argv)
644{
645 struct defcmd_set *save_defcmd_set = defcmd_set, *s;
646 if (defcmd_in_progress) {
647 kdb_printf("kdb: nested defcmd detected, assuming missing "
648 "endefcmd\n");
649 kdb_defcmd2("endefcmd", "endefcmd");
650 }
651 if (argc == 0) {
652 int i;
653 for (s = defcmd_set; s < defcmd_set + defcmd_set_count; ++s) {
654 kdb_printf("defcmd %s \"%s\" \"%s\"\n", s->name,
655 s->usage, s->help);
656 for (i = 0; i < s->count; ++i)
657 kdb_printf("%s", s->command[i]);
658 kdb_printf("endefcmd\n");
659 }
660 return 0;
661 }
662 if (argc != 3)
663 return KDB_ARGCOUNT;
664 defcmd_set = kmalloc((defcmd_set_count + 1) * sizeof(*defcmd_set),
665 GFP_KDB);
666 if (!defcmd_set) {
667 kdb_printf("Could not allocate new defcmd_set entry for %s\n",
668 argv[1]);
669 defcmd_set = save_defcmd_set;
670 return KDB_NOTIMP;
671 }
672 memcpy(defcmd_set, save_defcmd_set,
673 defcmd_set_count * sizeof(*defcmd_set));
674 kfree(save_defcmd_set);
675 s = defcmd_set + defcmd_set_count;
676 memset(s, 0, sizeof(*s));
677 s->usable = 1;
678 s->name = kdb_strdup(argv[1], GFP_KDB);
679 s->usage = kdb_strdup(argv[2], GFP_KDB);
680 s->help = kdb_strdup(argv[3], GFP_KDB);
681 if (s->usage[0] == '"') {
682 strcpy(s->usage, s->usage+1);
683 s->usage[strlen(s->usage)-1] = '\0';
684 }
685 if (s->help[0] == '"') {
686 strcpy(s->help, s->help+1);
687 s->help[strlen(s->help)-1] = '\0';
688 }
689 ++defcmd_set_count;
690 defcmd_in_progress = 1;
691 return 0;
692}
693
694/*
695 * kdb_exec_defcmd - Execute the set of commands associated with this
696 * defcmd name.
697 * Inputs:
698 * argc argument count
699 * argv argument vector
700 * Returns:
701 * zero for success, a kdb diagnostic if error
702 */
703static int kdb_exec_defcmd(int argc, const char **argv)
704{
705 int i, ret;
706 struct defcmd_set *s;
707 if (argc != 0)
708 return KDB_ARGCOUNT;
709 for (s = defcmd_set, i = 0; i < defcmd_set_count; ++i, ++s) {
710 if (strcmp(s->name, argv[0]) == 0)
711 break;
712 }
713 if (i == defcmd_set_count) {
714 kdb_printf("kdb_exec_defcmd: could not find commands for %s\n",
715 argv[0]);
716 return KDB_NOTIMP;
717 }
718 for (i = 0; i < s->count; ++i) {
719 /* Recursive use of kdb_parse, do not use argv after
720 * this point */
721 argv = NULL;
722 kdb_printf("[%s]kdb> %s\n", s->name, s->command[i]);
723 ret = kdb_parse(s->command[i]);
724 if (ret)
725 return ret;
726 }
727 return 0;
728}
729
730/* Command history */
731#define KDB_CMD_HISTORY_COUNT 32
732#define CMD_BUFLEN 200 /* kdb_printf: max printline
733 * size == 256 */
734static unsigned int cmd_head, cmd_tail;
735static unsigned int cmdptr;
736static char cmd_hist[KDB_CMD_HISTORY_COUNT][CMD_BUFLEN];
737static char cmd_cur[CMD_BUFLEN];
738
739/*
740 * The "str" argument may point to something like | grep xyz
741 */
742static void parse_grep(const char *str)
743{
744 int len;
745 char *cp = (char *)str, *cp2;
746
747 /* sanity check: we should have been called with the \ first */
748 if (*cp != '|')
749 return;
750 cp++;
751 while (isspace(*cp))
752 cp++;
753 if (strncmp(cp, "grep ", 5)) {
754 kdb_printf("invalid 'pipe', see grephelp\n");
755 return;
756 }
757 cp += 5;
758 while (isspace(*cp))
759 cp++;
760 cp2 = strchr(cp, '\n');
761 if (cp2)
762 *cp2 = '\0'; /* remove the trailing newline */
763 len = strlen(cp);
764 if (len == 0) {
765 kdb_printf("invalid 'pipe', see grephelp\n");
766 return;
767 }
768 /* now cp points to a nonzero length search string */
769 if (*cp == '"') {
770 /* allow it be "x y z" by removing the "'s - there must
771 be two of them */
772 cp++;
773 cp2 = strchr(cp, '"');
774 if (!cp2) {
775 kdb_printf("invalid quoted string, see grephelp\n");
776 return;
777 }
778 *cp2 = '\0'; /* end the string where the 2nd " was */
779 }
780 kdb_grep_leading = 0;
781 if (*cp == '^') {
782 kdb_grep_leading = 1;
783 cp++;
784 }
785 len = strlen(cp);
786 kdb_grep_trailing = 0;
787 if (*(cp+len-1) == '$') {
788 kdb_grep_trailing = 1;
789 *(cp+len-1) = '\0';
790 }
791 len = strlen(cp);
792 if (!len)
793 return;
794 if (len >= GREP_LEN) {
795 kdb_printf("search string too long\n");
796 return;
797 }
798 strcpy(kdb_grep_string, cp);
799 kdb_grepping_flag++;
800 return;
801}
802
803/*
804 * kdb_parse - Parse the command line, search the command table for a
805 * matching command and invoke the command function. This
806 * function may be called recursively, if it is, the second call
807 * will overwrite argv and cbuf. It is the caller's
808 * responsibility to save their argv if they recursively call
809 * kdb_parse().
810 * Parameters:
811 * cmdstr The input command line to be parsed.
812 * regs The registers at the time kdb was entered.
813 * Returns:
814 * Zero for success, a kdb diagnostic if failure.
815 * Remarks:
816 * Limited to 20 tokens.
817 *
818 * Real rudimentary tokenization. Basically only whitespace
819 * is considered a token delimeter (but special consideration
820 * is taken of the '=' sign as used by the 'set' command).
821 *
822 * The algorithm used to tokenize the input string relies on
823 * there being at least one whitespace (or otherwise useless)
824 * character between tokens as the character immediately following
825 * the token is altered in-place to a null-byte to terminate the
826 * token string.
827 */
828
829#define MAXARGC 20
830
831int kdb_parse(const char *cmdstr)
832{
833 static char *argv[MAXARGC];
834 static int argc;
835 static char cbuf[CMD_BUFLEN+2];
836 char *cp;
837 char *cpp, quoted;
838 kdbtab_t *tp;
839 int i, escaped, ignore_errors = 0, check_grep;
840
841 /*
842 * First tokenize the command string.
843 */
844 cp = (char *)cmdstr;
845 kdb_grepping_flag = check_grep = 0;
846
847 if (KDB_FLAG(CMD_INTERRUPT)) {
848 /* Previous command was interrupted, newline must not
849 * repeat the command */
850 KDB_FLAG_CLEAR(CMD_INTERRUPT);
851 KDB_STATE_SET(PAGER);
852 argc = 0; /* no repeat */
853 }
854
855 if (*cp != '\n' && *cp != '\0') {
856 argc = 0;
857 cpp = cbuf;
858 while (*cp) {
859 /* skip whitespace */
860 while (isspace(*cp))
861 cp++;
862 if ((*cp == '\0') || (*cp == '\n') ||
863 (*cp == '#' && !defcmd_in_progress))
864 break;
865 /* special case: check for | grep pattern */
866 if (*cp == '|') {
867 check_grep++;
868 break;
869 }
870 if (cpp >= cbuf + CMD_BUFLEN) {
871 kdb_printf("kdb_parse: command buffer "
872 "overflow, command ignored\n%s\n",
873 cmdstr);
874 return KDB_NOTFOUND;
875 }
876 if (argc >= MAXARGC - 1) {
877 kdb_printf("kdb_parse: too many arguments, "
878 "command ignored\n%s\n", cmdstr);
879 return KDB_NOTFOUND;
880 }
881 argv[argc++] = cpp;
882 escaped = 0;
883 quoted = '\0';
884 /* Copy to next unquoted and unescaped
885 * whitespace or '=' */
886 while (*cp && *cp != '\n' &&
887 (escaped || quoted || !isspace(*cp))) {
888 if (cpp >= cbuf + CMD_BUFLEN)
889 break;
890 if (escaped) {
891 escaped = 0;
892 *cpp++ = *cp++;
893 continue;
894 }
895 if (*cp == '\\') {
896 escaped = 1;
897 ++cp;
898 continue;
899 }
900 if (*cp == quoted)
901 quoted = '\0';
902 else if (*cp == '\'' || *cp == '"')
903 quoted = *cp;
904 *cpp = *cp++;
905 if (*cpp == '=' && !quoted)
906 break;
907 ++cpp;
908 }
909 *cpp++ = '\0'; /* Squash a ws or '=' character */
910 }
911 }
912 if (!argc)
913 return 0;
914 if (check_grep)
915 parse_grep(cp);
916 if (defcmd_in_progress) {
917 int result = kdb_defcmd2(cmdstr, argv[0]);
918 if (!defcmd_in_progress) {
919 argc = 0; /* avoid repeat on endefcmd */
920 *(argv[0]) = '\0';
921 }
922 return result;
923 }
924 if (argv[0][0] == '-' && argv[0][1] &&
925 (argv[0][1] < '0' || argv[0][1] > '9')) {
926 ignore_errors = 1;
927 ++argv[0];
928 }
929
930 for_each_kdbcmd(tp, i) {
931 if (tp->cmd_name) {
932 /*
933 * If this command is allowed to be abbreviated,
934 * check to see if this is it.
935 */
936
937 if (tp->cmd_minlen
938 && (strlen(argv[0]) <= tp->cmd_minlen)) {
939 if (strncmp(argv[0],
940 tp->cmd_name,
941 tp->cmd_minlen) == 0) {
942 break;
943 }
944 }
945
946 if (strcmp(argv[0], tp->cmd_name) == 0)
947 break;
948 }
949 }
950
951 /*
952 * If we don't find a command by this name, see if the first
953 * few characters of this match any of the known commands.
954 * e.g., md1c20 should match md.
955 */
956 if (i == kdb_max_commands) {
957 for_each_kdbcmd(tp, i) {
958 if (tp->cmd_name) {
959 if (strncmp(argv[0],
960 tp->cmd_name,
961 strlen(tp->cmd_name)) == 0) {
962 break;
963 }
964 }
965 }
966 }
967
968 if (i < kdb_max_commands) {
969 int result;
970 KDB_STATE_SET(CMD);
971 result = (*tp->cmd_func)(argc-1, (const char **)argv);
972 if (result && ignore_errors && result > KDB_CMD_GO)
973 result = 0;
974 KDB_STATE_CLEAR(CMD);
975 switch (tp->cmd_repeat) {
976 case KDB_REPEAT_NONE:
977 argc = 0;
978 if (argv[0])
979 *(argv[0]) = '\0';
980 break;
981 case KDB_REPEAT_NO_ARGS:
982 argc = 1;
983 if (argv[1])
984 *(argv[1]) = '\0';
985 break;
986 case KDB_REPEAT_WITH_ARGS:
987 break;
988 }
989 return result;
990 }
991
992 /*
993 * If the input with which we were presented does not
994 * map to an existing command, attempt to parse it as an
995 * address argument and display the result. Useful for
996 * obtaining the address of a variable, or the nearest symbol
997 * to an address contained in a register.
998 */
999 {
1000 unsigned long value;
1001 char *name = NULL;
1002 long offset;
1003 int nextarg = 0;
1004
1005 if (kdbgetaddrarg(0, (const char **)argv, &nextarg,
1006 &value, &offset, &name)) {
1007 return KDB_NOTFOUND;
1008 }
1009
1010 kdb_printf("%s = ", argv[0]);
1011 kdb_symbol_print(value, NULL, KDB_SP_DEFAULT);
1012 kdb_printf("\n");
1013 return 0;
1014 }
1015}
1016
1017
1018static int handle_ctrl_cmd(char *cmd)
1019{
1020#define CTRL_P 16
1021#define CTRL_N 14
1022
1023 /* initial situation */
1024 if (cmd_head == cmd_tail)
1025 return 0;
1026 switch (*cmd) {
1027 case CTRL_P:
1028 if (cmdptr != cmd_tail)
1029 cmdptr = (cmdptr-1) % KDB_CMD_HISTORY_COUNT;
1030 strncpy(cmd_cur, cmd_hist[cmdptr], CMD_BUFLEN);
1031 return 1;
1032 case CTRL_N:
1033 if (cmdptr != cmd_head)
1034 cmdptr = (cmdptr+1) % KDB_CMD_HISTORY_COUNT;
1035 strncpy(cmd_cur, cmd_hist[cmdptr], CMD_BUFLEN);
1036 return 1;
1037 }
1038 return 0;
1039}
1040
1041/*
1042 * kdb_reboot - This function implements the 'reboot' command. Reboot
1043 * the system immediately, or loop for ever on failure.
1044 */
1045static int kdb_reboot(int argc, const char **argv)
1046{
1047 emergency_restart();
1048 kdb_printf("Hmm, kdb_reboot did not reboot, spinning here\n");
1049 while (1)
1050 cpu_relax();
1051 /* NOTREACHED */
1052 return 0;
1053}
1054
1055static void kdb_dumpregs(struct pt_regs *regs)
1056{
1057 int old_lvl = console_loglevel;
1058 console_loglevel = 15;
1059 kdb_trap_printk++;
1060 show_regs(regs);
1061 kdb_trap_printk--;
1062 kdb_printf("\n");
1063 console_loglevel = old_lvl;
1064}
1065
1066void kdb_set_current_task(struct task_struct *p)
1067{
1068 kdb_current_task = p;
1069
1070 if (kdb_task_has_cpu(p)) {
1071 kdb_current_regs = KDB_TSKREGS(kdb_process_cpu(p));
1072 return;
1073 }
1074 kdb_current_regs = NULL;
1075}
1076
1077/*
1078 * kdb_local - The main code for kdb. This routine is invoked on a
1079 * specific processor, it is not global. The main kdb() routine
1080 * ensures that only one processor at a time is in this routine.
1081 * This code is called with the real reason code on the first
1082 * entry to a kdb session, thereafter it is called with reason
1083 * SWITCH, even if the user goes back to the original cpu.
1084 * Inputs:
1085 * reason The reason KDB was invoked
1086 * error The hardware-defined error code
1087 * regs The exception frame at time of fault/breakpoint.
1088 * db_result Result code from the break or debug point.
1089 * Returns:
1090 * 0 KDB was invoked for an event which it wasn't responsible
1091 * 1 KDB handled the event for which it was invoked.
1092 * KDB_CMD_GO User typed 'go'.
1093 * KDB_CMD_CPU User switched to another cpu.
1094 * KDB_CMD_SS Single step.
1095 * KDB_CMD_SSB Single step until branch.
1096 */
1097static int kdb_local(kdb_reason_t reason, int error, struct pt_regs *regs,
1098 kdb_dbtrap_t db_result)
1099{
1100 char *cmdbuf;
1101 int diag;
1102 struct task_struct *kdb_current =
1103 kdb_curr_task(raw_smp_processor_id());
1104
1105 KDB_DEBUG_STATE("kdb_local 1", reason);
1106 kdb_go_count = 0;
1107 if (reason == KDB_REASON_DEBUG) {
1108 /* special case below */
1109 } else {
1110 kdb_printf("\nEntering kdb (current=0x%p, pid %d) ",
1111 kdb_current, kdb_current->pid);
1112#if defined(CONFIG_SMP)
1113 kdb_printf("on processor %d ", raw_smp_processor_id());
1114#endif
1115 }
1116
1117 switch (reason) {
1118 case KDB_REASON_DEBUG:
1119 {
1120 /*
1121 * If re-entering kdb after a single step
1122 * command, don't print the message.
1123 */
1124 switch (db_result) {
1125 case KDB_DB_BPT:
1126 kdb_printf("\nEntering kdb (0x%p, pid %d) ",
1127 kdb_current, kdb_current->pid);
1128#if defined(CONFIG_SMP)
1129 kdb_printf("on processor %d ", raw_smp_processor_id());
1130#endif
1131 kdb_printf("due to Debug @ " kdb_machreg_fmt "\n",
1132 instruction_pointer(regs));
1133 break;
1134 case KDB_DB_SSB:
1135 /*
1136 * In the midst of ssb command. Just return.
1137 */
1138 KDB_DEBUG_STATE("kdb_local 3", reason);
1139 return KDB_CMD_SSB; /* Continue with SSB command */
1140
1141 break;
1142 case KDB_DB_SS:
1143 break;
1144 case KDB_DB_SSBPT:
1145 KDB_DEBUG_STATE("kdb_local 4", reason);
1146 return 1; /* kdba_db_trap did the work */
1147 default:
1148 kdb_printf("kdb: Bad result from kdba_db_trap: %d\n",
1149 db_result);
1150 break;
1151 }
1152
1153 }
1154 break;
1155 case KDB_REASON_ENTER:
1156 if (KDB_STATE(KEYBOARD))
1157 kdb_printf("due to Keyboard Entry\n");
1158 else
1159 kdb_printf("due to KDB_ENTER()\n");
1160 break;
1161 case KDB_REASON_KEYBOARD:
1162 KDB_STATE_SET(KEYBOARD);
1163 kdb_printf("due to Keyboard Entry\n");
1164 break;
1165 case KDB_REASON_ENTER_SLAVE:
1166 /* drop through, slaves only get released via cpu switch */
1167 case KDB_REASON_SWITCH:
1168 kdb_printf("due to cpu switch\n");
1169 break;
1170 case KDB_REASON_OOPS:
1171 kdb_printf("Oops: %s\n", kdb_diemsg);
1172 kdb_printf("due to oops @ " kdb_machreg_fmt "\n",
1173 instruction_pointer(regs));
1174 kdb_dumpregs(regs);
1175 break;
1176 case KDB_REASON_NMI:
1177 kdb_printf("due to NonMaskable Interrupt @ "
1178 kdb_machreg_fmt "\n",
1179 instruction_pointer(regs));
1180 kdb_dumpregs(regs);
1181 break;
1182 case KDB_REASON_SSTEP:
1183 case KDB_REASON_BREAK:
1184 kdb_printf("due to %s @ " kdb_machreg_fmt "\n",
1185 reason == KDB_REASON_BREAK ?
1186 "Breakpoint" : "SS trap", instruction_pointer(regs));
1187 /*
1188 * Determine if this breakpoint is one that we
1189 * are interested in.
1190 */
1191 if (db_result != KDB_DB_BPT) {
1192 kdb_printf("kdb: error return from kdba_bp_trap: %d\n",
1193 db_result);
1194 KDB_DEBUG_STATE("kdb_local 6", reason);
1195 return 0; /* Not for us, dismiss it */
1196 }
1197 break;
1198 case KDB_REASON_RECURSE:
1199 kdb_printf("due to Recursion @ " kdb_machreg_fmt "\n",
1200 instruction_pointer(regs));
1201 break;
1202 default:
1203 kdb_printf("kdb: unexpected reason code: %d\n", reason);
1204 KDB_DEBUG_STATE("kdb_local 8", reason);
1205 return 0; /* Not for us, dismiss it */
1206 }
1207
1208 while (1) {
1209 /*
1210 * Initialize pager context.
1211 */
1212 kdb_nextline = 1;
1213 KDB_STATE_CLEAR(SUPPRESS);
1214
1215 cmdbuf = cmd_cur;
1216 *cmdbuf = '\0';
1217 *(cmd_hist[cmd_head]) = '\0';
1218
1219 if (KDB_FLAG(ONLY_DO_DUMP)) {
1220 /* kdb is off but a catastrophic error requires a dump.
1221 * Take the dump and reboot.
1222 * Turn on logging so the kdb output appears in the log
1223 * buffer in the dump.
1224 */
1225 const char *setargs[] = { "set", "LOGGING", "1" };
1226 kdb_set(2, setargs);
1227 kdb_reboot(0, NULL);
1228 /*NOTREACHED*/
1229 }
1230
1231do_full_getstr:
1232#if defined(CONFIG_SMP)
1233 snprintf(kdb_prompt_str, CMD_BUFLEN, kdbgetenv("PROMPT"),
1234 raw_smp_processor_id());
1235#else
1236 snprintf(kdb_prompt_str, CMD_BUFLEN, kdbgetenv("PROMPT"));
1237#endif
1238 if (defcmd_in_progress)
1239 strncat(kdb_prompt_str, "[defcmd]", CMD_BUFLEN);
1240
1241 /*
1242 * Fetch command from keyboard
1243 */
1244 cmdbuf = kdb_getstr(cmdbuf, CMD_BUFLEN, kdb_prompt_str);
1245 if (*cmdbuf != '\n') {
1246 if (*cmdbuf < 32) {
1247 if (cmdptr == cmd_head) {
1248 strncpy(cmd_hist[cmd_head], cmd_cur,
1249 CMD_BUFLEN);
1250 *(cmd_hist[cmd_head] +
1251 strlen(cmd_hist[cmd_head])-1) = '\0';
1252 }
1253 if (!handle_ctrl_cmd(cmdbuf))
1254 *(cmd_cur+strlen(cmd_cur)-1) = '\0';
1255 cmdbuf = cmd_cur;
1256 goto do_full_getstr;
1257 } else {
1258 strncpy(cmd_hist[cmd_head], cmd_cur,
1259 CMD_BUFLEN);
1260 }
1261
1262 cmd_head = (cmd_head+1) % KDB_CMD_HISTORY_COUNT;
1263 if (cmd_head == cmd_tail)
1264 cmd_tail = (cmd_tail+1) % KDB_CMD_HISTORY_COUNT;
1265 }
1266
1267 cmdptr = cmd_head;
1268 diag = kdb_parse(cmdbuf);
1269 if (diag == KDB_NOTFOUND) {
1270 kdb_printf("Unknown kdb command: '%s'\n", cmdbuf);
1271 diag = 0;
1272 }
1273 if (diag == KDB_CMD_GO
1274 || diag == KDB_CMD_CPU
1275 || diag == KDB_CMD_SS
1276 || diag == KDB_CMD_SSB
1277 || diag == KDB_CMD_KGDB)
1278 break;
1279
1280 if (diag)
1281 kdb_cmderror(diag);
1282 }
1283 KDB_DEBUG_STATE("kdb_local 9", diag);
1284 return diag;
1285}
1286
1287
1288/*
1289 * kdb_print_state - Print the state data for the current processor
1290 * for debugging.
1291 * Inputs:
1292 * text Identifies the debug point
1293 * value Any integer value to be printed, e.g. reason code.
1294 */
1295void kdb_print_state(const char *text, int value)
1296{
1297 kdb_printf("state: %s cpu %d value %d initial %d state %x\n",
1298 text, raw_smp_processor_id(), value, kdb_initial_cpu,
1299 kdb_state);
1300}
1301
1302/*
1303 * kdb_main_loop - After initial setup and assignment of the
1304 * controlling cpu, all cpus are in this loop. One cpu is in
1305 * control and will issue the kdb prompt, the others will spin
1306 * until 'go' or cpu switch.
1307 *
1308 * To get a consistent view of the kernel stacks for all
1309 * processes, this routine is invoked from the main kdb code via
1310 * an architecture specific routine. kdba_main_loop is
1311 * responsible for making the kernel stacks consistent for all
1312 * processes, there should be no difference between a blocked
1313 * process and a running process as far as kdb is concerned.
1314 * Inputs:
1315 * reason The reason KDB was invoked
1316 * error The hardware-defined error code
1317 * reason2 kdb's current reason code.
1318 * Initially error but can change
1319 * acording to kdb state.
1320 * db_result Result code from break or debug point.
1321 * regs The exception frame at time of fault/breakpoint.
1322 * should always be valid.
1323 * Returns:
1324 * 0 KDB was invoked for an event which it wasn't responsible
1325 * 1 KDB handled the event for which it was invoked.
1326 */
1327int kdb_main_loop(kdb_reason_t reason, kdb_reason_t reason2, int error,
1328 kdb_dbtrap_t db_result, struct pt_regs *regs)
1329{
1330 int result = 1;
1331 /* Stay in kdb() until 'go', 'ss[b]' or an error */
1332 while (1) {
1333 /*
1334 * All processors except the one that is in control
1335 * will spin here.
1336 */
1337 KDB_DEBUG_STATE("kdb_main_loop 1", reason);
1338 while (KDB_STATE(HOLD_CPU)) {
1339 /* state KDB is turned off by kdb_cpu to see if the
1340 * other cpus are still live, each cpu in this loop
1341 * turns it back on.
1342 */
1343 if (!KDB_STATE(KDB))
1344 KDB_STATE_SET(KDB);
1345 }
1346
1347 KDB_STATE_CLEAR(SUPPRESS);
1348 KDB_DEBUG_STATE("kdb_main_loop 2", reason);
1349 if (KDB_STATE(LEAVING))
1350 break; /* Another cpu said 'go' */
1351 /* Still using kdb, this processor is in control */
1352 result = kdb_local(reason2, error, regs, db_result);
1353 KDB_DEBUG_STATE("kdb_main_loop 3", result);
1354
1355 if (result == KDB_CMD_CPU)
1356 break;
1357
1358 if (result == KDB_CMD_SS) {
1359 KDB_STATE_SET(DOING_SS);
1360 break;
1361 }
1362
1363 if (result == KDB_CMD_SSB) {
1364 KDB_STATE_SET(DOING_SS);
1365 KDB_STATE_SET(DOING_SSB);
1366 break;
1367 }
1368
1369 if (result == KDB_CMD_KGDB) {
1370 if (!(KDB_STATE(DOING_KGDB) || KDB_STATE(DOING_KGDB2)))
1371 kdb_printf("Entering please attach debugger "
1372 "or use $D#44+ or $3#33\n");
1373 break;
1374 }
1375 if (result && result != 1 && result != KDB_CMD_GO)
1376 kdb_printf("\nUnexpected kdb_local return code %d\n",
1377 result);
1378 KDB_DEBUG_STATE("kdb_main_loop 4", reason);
1379 break;
1380 }
1381 if (KDB_STATE(DOING_SS))
1382 KDB_STATE_CLEAR(SSBPT);
1383
1384 return result;
1385}
1386
1387/*
1388 * kdb_mdr - This function implements the guts of the 'mdr', memory
1389 * read command.
1390 * mdr <addr arg>,<byte count>
1391 * Inputs:
1392 * addr Start address
1393 * count Number of bytes
1394 * Returns:
1395 * Always 0. Any errors are detected and printed by kdb_getarea.
1396 */
1397static int kdb_mdr(unsigned long addr, unsigned int count)
1398{
1399 unsigned char c;
1400 while (count--) {
1401 if (kdb_getarea(c, addr))
1402 return 0;
1403 kdb_printf("%02x", c);
1404 addr++;
1405 }
1406 kdb_printf("\n");
1407 return 0;
1408}
1409
1410/*
1411 * kdb_md - This function implements the 'md', 'md1', 'md2', 'md4',
1412 * 'md8' 'mdr' and 'mds' commands.
1413 *
1414 * md|mds [<addr arg> [<line count> [<radix>]]]
1415 * mdWcN [<addr arg> [<line count> [<radix>]]]
1416 * where W = is the width (1, 2, 4 or 8) and N is the count.
1417 * for eg., md1c20 reads 20 bytes, 1 at a time.
1418 * mdr <addr arg>,<byte count>
1419 */
1420static void kdb_md_line(const char *fmtstr, unsigned long addr,
1421 int symbolic, int nosect, int bytesperword,
1422 int num, int repeat, int phys)
1423{
1424 /* print just one line of data */
1425 kdb_symtab_t symtab;
1426 char cbuf[32];
1427 char *c = cbuf;
1428 int i;
1429 unsigned long word;
1430
1431 memset(cbuf, '\0', sizeof(cbuf));
1432 if (phys)
1433 kdb_printf("phys " kdb_machreg_fmt0 " ", addr);
1434 else
1435 kdb_printf(kdb_machreg_fmt0 " ", addr);
1436
1437 for (i = 0; i < num && repeat--; i++) {
1438 if (phys) {
1439 if (kdb_getphysword(&word, addr, bytesperword))
1440 break;
1441 } else if (kdb_getword(&word, addr, bytesperword))
1442 break;
1443 kdb_printf(fmtstr, word);
1444 if (symbolic)
1445 kdbnearsym(word, &symtab);
1446 else
1447 memset(&symtab, 0, sizeof(symtab));
1448 if (symtab.sym_name) {
1449 kdb_symbol_print(word, &symtab, 0);
1450 if (!nosect) {
1451 kdb_printf("\n");
1452 kdb_printf(" %s %s "
1453 kdb_machreg_fmt " "
1454 kdb_machreg_fmt " "
1455 kdb_machreg_fmt, symtab.mod_name,
1456 symtab.sec_name, symtab.sec_start,
1457 symtab.sym_start, symtab.sym_end);
1458 }
1459 addr += bytesperword;
1460 } else {
1461 union {
1462 u64 word;
1463 unsigned char c[8];
1464 } wc;
1465 unsigned char *cp;
1466#ifdef __BIG_ENDIAN
1467 cp = wc.c + 8 - bytesperword;
1468#else
1469 cp = wc.c;
1470#endif
1471 wc.word = word;
1472#define printable_char(c) \
1473 ({unsigned char __c = c; isascii(__c) && isprint(__c) ? __c : '.'; })
1474 switch (bytesperword) {
1475 case 8:
1476 *c++ = printable_char(*cp++);
1477 *c++ = printable_char(*cp++);
1478 *c++ = printable_char(*cp++);
1479 *c++ = printable_char(*cp++);
1480 addr += 4;
1481 case 4:
1482 *c++ = printable_char(*cp++);
1483 *c++ = printable_char(*cp++);
1484 addr += 2;
1485 case 2:
1486 *c++ = printable_char(*cp++);
1487 addr++;
1488 case 1:
1489 *c++ = printable_char(*cp++);
1490 addr++;
1491 break;
1492 }
1493#undef printable_char
1494 }
1495 }
1496 kdb_printf("%*s %s\n", (int)((num-i)*(2*bytesperword + 1)+1),
1497 " ", cbuf);
1498}
1499
1500static int kdb_md(int argc, const char **argv)
1501{
1502 static unsigned long last_addr;
1503 static int last_radix, last_bytesperword, last_repeat;
1504 int radix = 16, mdcount = 8, bytesperword = KDB_WORD_SIZE, repeat;
1505 int nosect = 0;
1506 char fmtchar, fmtstr[64];
1507 unsigned long addr;
1508 unsigned long word;
1509 long offset = 0;
1510 int symbolic = 0;
1511 int valid = 0;
1512 int phys = 0;
1513
1514 kdbgetintenv("MDCOUNT", &mdcount);
1515 kdbgetintenv("RADIX", &radix);
1516 kdbgetintenv("BYTESPERWORD", &bytesperword);
1517
1518 /* Assume 'md <addr>' and start with environment values */
1519 repeat = mdcount * 16 / bytesperword;
1520
1521 if (strcmp(argv[0], "mdr") == 0) {
1522 if (argc != 2)
1523 return KDB_ARGCOUNT;
1524 valid = 1;
1525 } else if (isdigit(argv[0][2])) {
1526 bytesperword = (int)(argv[0][2] - '0');
1527 if (bytesperword == 0) {
1528 bytesperword = last_bytesperword;
1529 if (bytesperword == 0)
1530 bytesperword = 4;
1531 }
1532 last_bytesperword = bytesperword;
1533 repeat = mdcount * 16 / bytesperword;
1534 if (!argv[0][3])
1535 valid = 1;
1536 else if (argv[0][3] == 'c' && argv[0][4]) {
1537 char *p;
1538 repeat = simple_strtoul(argv[0] + 4, &p, 10);
1539 mdcount = ((repeat * bytesperword) + 15) / 16;
1540 valid = !*p;
1541 }
1542 last_repeat = repeat;
1543 } else if (strcmp(argv[0], "md") == 0)
1544 valid = 1;
1545 else if (strcmp(argv[0], "mds") == 0)
1546 valid = 1;
1547 else if (strcmp(argv[0], "mdp") == 0) {
1548 phys = valid = 1;
1549 }
1550 if (!valid)
1551 return KDB_NOTFOUND;
1552
1553 if (argc == 0) {
1554 if (last_addr == 0)
1555 return KDB_ARGCOUNT;
1556 addr = last_addr;
1557 radix = last_radix;
1558 bytesperword = last_bytesperword;
1559 repeat = last_repeat;
1560 mdcount = ((repeat * bytesperword) + 15) / 16;
1561 }
1562
1563 if (argc) {
1564 unsigned long val;
1565 int diag, nextarg = 1;
1566 diag = kdbgetaddrarg(argc, argv, &nextarg, &addr,
1567 &offset, NULL);
1568 if (diag)
1569 return diag;
1570 if (argc > nextarg+2)
1571 return KDB_ARGCOUNT;
1572
1573 if (argc >= nextarg) {
1574 diag = kdbgetularg(argv[nextarg], &val);
1575 if (!diag) {
1576 mdcount = (int) val;
1577 repeat = mdcount * 16 / bytesperword;
1578 }
1579 }
1580 if (argc >= nextarg+1) {
1581 diag = kdbgetularg(argv[nextarg+1], &val);
1582 if (!diag)
1583 radix = (int) val;
1584 }
1585 }
1586
1587 if (strcmp(argv[0], "mdr") == 0)
1588 return kdb_mdr(addr, mdcount);
1589
1590 switch (radix) {
1591 case 10:
1592 fmtchar = 'd';
1593 break;
1594 case 16:
1595 fmtchar = 'x';
1596 break;
1597 case 8:
1598 fmtchar = 'o';
1599 break;
1600 default:
1601 return KDB_BADRADIX;
1602 }
1603
1604 last_radix = radix;
1605
1606 if (bytesperword > KDB_WORD_SIZE)
1607 return KDB_BADWIDTH;
1608
1609 switch (bytesperword) {
1610 case 8:
1611 sprintf(fmtstr, "%%16.16l%c ", fmtchar);
1612 break;
1613 case 4:
1614 sprintf(fmtstr, "%%8.8l%c ", fmtchar);
1615 break;
1616 case 2:
1617 sprintf(fmtstr, "%%4.4l%c ", fmtchar);
1618 break;
1619 case 1:
1620 sprintf(fmtstr, "%%2.2l%c ", fmtchar);
1621 break;
1622 default:
1623 return KDB_BADWIDTH;
1624 }
1625
1626 last_repeat = repeat;
1627 last_bytesperword = bytesperword;
1628
1629 if (strcmp(argv[0], "mds") == 0) {
1630 symbolic = 1;
1631 /* Do not save these changes as last_*, they are temporary mds
1632 * overrides.
1633 */
1634 bytesperword = KDB_WORD_SIZE;
1635 repeat = mdcount;
1636 kdbgetintenv("NOSECT", &nosect);
1637 }
1638
1639 /* Round address down modulo BYTESPERWORD */
1640
1641 addr &= ~(bytesperword-1);
1642
1643 while (repeat > 0) {
1644 unsigned long a;
1645 int n, z, num = (symbolic ? 1 : (16 / bytesperword));
1646
1647 if (KDB_FLAG(CMD_INTERRUPT))
1648 return 0;
1649 for (a = addr, z = 0; z < repeat; a += bytesperword, ++z) {
1650 if (phys) {
1651 if (kdb_getphysword(&word, a, bytesperword)
1652 || word)
1653 break;
1654 } else if (kdb_getword(&word, a, bytesperword) || word)
1655 break;
1656 }
1657 n = min(num, repeat);
1658 kdb_md_line(fmtstr, addr, symbolic, nosect, bytesperword,
1659 num, repeat, phys);
1660 addr += bytesperword * n;
1661 repeat -= n;
1662 z = (z + num - 1) / num;
1663 if (z > 2) {
1664 int s = num * (z-2);
1665 kdb_printf(kdb_machreg_fmt0 "-" kdb_machreg_fmt0
1666 " zero suppressed\n",
1667 addr, addr + bytesperword * s - 1);
1668 addr += bytesperword * s;
1669 repeat -= s;
1670 }
1671 }
1672 last_addr = addr;
1673
1674 return 0;
1675}
1676
1677/*
1678 * kdb_mm - This function implements the 'mm' command.
1679 * mm address-expression new-value
1680 * Remarks:
1681 * mm works on machine words, mmW works on bytes.
1682 */
1683static int kdb_mm(int argc, const char **argv)
1684{
1685 int diag;
1686 unsigned long addr;
1687 long offset = 0;
1688 unsigned long contents;
1689 int nextarg;
1690 int width;
1691
1692 if (argv[0][2] && !isdigit(argv[0][2]))
1693 return KDB_NOTFOUND;
1694
1695 if (argc < 2)
1696 return KDB_ARGCOUNT;
1697
1698 nextarg = 1;
1699 diag = kdbgetaddrarg(argc, argv, &nextarg, &addr, &offset, NULL);
1700 if (diag)
1701 return diag;
1702
1703 if (nextarg > argc)
1704 return KDB_ARGCOUNT;
1705 diag = kdbgetaddrarg(argc, argv, &nextarg, &contents, NULL, NULL);
1706 if (diag)
1707 return diag;
1708
1709 if (nextarg != argc + 1)
1710 return KDB_ARGCOUNT;
1711
1712 width = argv[0][2] ? (argv[0][2] - '0') : (KDB_WORD_SIZE);
1713 diag = kdb_putword(addr, contents, width);
1714 if (diag)
1715 return diag;
1716
1717 kdb_printf(kdb_machreg_fmt " = " kdb_machreg_fmt "\n", addr, contents);
1718
1719 return 0;
1720}
1721
1722/*
1723 * kdb_go - This function implements the 'go' command.
1724 * go [address-expression]
1725 */
1726static int kdb_go(int argc, const char **argv)
1727{
1728 unsigned long addr;
1729 int diag;
1730 int nextarg;
1731 long offset;
1732
1733 if (argc == 1) {
1734 if (raw_smp_processor_id() != kdb_initial_cpu) {
1735 kdb_printf("go <address> must be issued from the "
1736 "initial cpu, do cpu %d first\n",
1737 kdb_initial_cpu);
1738 return KDB_ARGCOUNT;
1739 }
1740 nextarg = 1;
1741 diag = kdbgetaddrarg(argc, argv, &nextarg,
1742 &addr, &offset, NULL);
1743 if (diag)
1744 return diag;
1745 } else if (argc) {
1746 return KDB_ARGCOUNT;
1747 }
1748
1749 diag = KDB_CMD_GO;
1750 if (KDB_FLAG(CATASTROPHIC)) {
1751 kdb_printf("Catastrophic error detected\n");
1752 kdb_printf("kdb_continue_catastrophic=%d, ",
1753 kdb_continue_catastrophic);
1754 if (kdb_continue_catastrophic == 0 && kdb_go_count++ == 0) {
1755 kdb_printf("type go a second time if you really want "
1756 "to continue\n");
1757 return 0;
1758 }
1759 if (kdb_continue_catastrophic == 2) {
1760 kdb_printf("forcing reboot\n");
1761 kdb_reboot(0, NULL);
1762 }
1763 kdb_printf("attempting to continue\n");
1764 }
1765 return diag;
1766}
1767
1768/*
1769 * kdb_rd - This function implements the 'rd' command.
1770 */
1771static int kdb_rd(int argc, const char **argv)
1772{
1773 int diag = kdb_check_regs();
1774 if (diag)
1775 return diag;
1776
1777 kdb_dumpregs(kdb_current_regs);
1778 return 0;
1779}
1780
1781/*
1782 * kdb_rm - This function implements the 'rm' (register modify) command.
1783 * rm register-name new-contents
1784 * Remarks:
1785 * Currently doesn't allow modification of control or
1786 * debug registers.
1787 */
1788static int kdb_rm(int argc, const char **argv)
1789{
1790 int diag;
1791 int ind = 0;
1792 unsigned long contents;
1793
1794 if (argc != 2)
1795 return KDB_ARGCOUNT;
1796 /*
1797 * Allow presence or absence of leading '%' symbol.
1798 */
1799 if (argv[1][0] == '%')
1800 ind = 1;
1801
1802 diag = kdbgetularg(argv[2], &contents);
1803 if (diag)
1804 return diag;
1805
1806 diag = kdb_check_regs();
1807 if (diag)
1808 return diag;
1809 kdb_printf("ERROR: Register set currently not implemented\n");
1810 return 0;
1811}
1812
1813#if defined(CONFIG_MAGIC_SYSRQ)
1814/*
1815 * kdb_sr - This function implements the 'sr' (SYSRQ key) command
1816 * which interfaces to the soi-disant MAGIC SYSRQ functionality.
1817 * sr <magic-sysrq-code>
1818 */
1819static int kdb_sr(int argc, const char **argv)
1820{
1821 if (argc != 1)
1822 return KDB_ARGCOUNT;
1823 sysrq_toggle_support(1);
1824 kdb_trap_printk++;
1825 handle_sysrq(*argv[1], NULL);
1826 kdb_trap_printk--;
1827
1828 return 0;
1829}
1830#endif /* CONFIG_MAGIC_SYSRQ */
1831
1832/*
1833 * kdb_ef - This function implements the 'regs' (display exception
1834 * frame) command. This command takes an address and expects to
1835 * find an exception frame at that address, formats and prints
1836 * it.
1837 * regs address-expression
1838 * Remarks:
1839 * Not done yet.
1840 */
1841static int kdb_ef(int argc, const char **argv)
1842{
1843 int diag;
1844 unsigned long addr;
1845 long offset;
1846 int nextarg;
1847
1848 if (argc != 1)
1849 return KDB_ARGCOUNT;
1850
1851 nextarg = 1;
1852 diag = kdbgetaddrarg(argc, argv, &nextarg, &addr, &offset, NULL);
1853 if (diag)
1854 return diag;
1855 show_regs((struct pt_regs *)addr);
1856 return 0;
1857}
1858
1859#if defined(CONFIG_MODULES)
1860/* modules using other modules */
1861struct module_use {
1862 struct list_head list;
1863 struct module *module_which_uses;
1864};
1865
1866/*
1867 * kdb_lsmod - This function implements the 'lsmod' command. Lists
1868 * currently loaded kernel modules.
1869 * Mostly taken from userland lsmod.
1870 */
1871static int kdb_lsmod(int argc, const char **argv)
1872{
1873 struct module *mod;
1874
1875 if (argc != 0)
1876 return KDB_ARGCOUNT;
1877
1878 kdb_printf("Module Size modstruct Used by\n");
1879 list_for_each_entry(mod, kdb_modules, list) {
1880
1881 kdb_printf("%-20s%8u 0x%p ", mod->name,
1882 mod->core_size, (void *)mod);
1883#ifdef CONFIG_MODULE_UNLOAD
1884 kdb_printf("%4d ", module_refcount(mod));
1885#endif
1886 if (mod->state == MODULE_STATE_GOING)
1887 kdb_printf(" (Unloading)");
1888 else if (mod->state == MODULE_STATE_COMING)
1889 kdb_printf(" (Loading)");
1890 else
1891 kdb_printf(" (Live)");
1892
1893#ifdef CONFIG_MODULE_UNLOAD
1894 {
1895 struct module_use *use;
1896 kdb_printf(" [ ");
1897 list_for_each_entry(use, &mod->modules_which_use_me,
1898 list)
1899 kdb_printf("%s ", use->module_which_uses->name);
1900 kdb_printf("]\n");
1901 }
1902#endif
1903 }
1904
1905 return 0;
1906}
1907
1908#endif /* CONFIG_MODULES */
1909
1910/*
1911 * kdb_env - This function implements the 'env' command. Display the
1912 * current environment variables.
1913 */
1914
1915static int kdb_env(int argc, const char **argv)
1916{
1917 int i;
1918
1919 for (i = 0; i < __nenv; i++) {
1920 if (__env[i])
1921 kdb_printf("%s\n", __env[i]);
1922 }
1923
1924 if (KDB_DEBUG(MASK))
1925 kdb_printf("KDBFLAGS=0x%x\n", kdb_flags);
1926
1927 return 0;
1928}
1929
1930#ifdef CONFIG_PRINTK
1931/*
1932 * kdb_dmesg - This function implements the 'dmesg' command to display
1933 * the contents of the syslog buffer.
1934 * dmesg [lines] [adjust]
1935 */
1936static int kdb_dmesg(int argc, const char **argv)
1937{
1938 char *syslog_data[4], *start, *end, c = '\0', *p;
1939 int diag, logging, logsize, lines = 0, adjust = 0, n;
1940
1941 if (argc > 2)
1942 return KDB_ARGCOUNT;
1943 if (argc) {
1944 char *cp;
1945 lines = simple_strtol(argv[1], &cp, 0);
1946 if (*cp)
1947 lines = 0;
1948 if (argc > 1) {
1949 adjust = simple_strtoul(argv[2], &cp, 0);
1950 if (*cp || adjust < 0)
1951 adjust = 0;
1952 }
1953 }
1954
1955 /* disable LOGGING if set */
1956 diag = kdbgetintenv("LOGGING", &logging);
1957 if (!diag && logging) {
1958 const char *setargs[] = { "set", "LOGGING", "0" };
1959 kdb_set(2, setargs);
1960 }
1961
1962 /* syslog_data[0,1] physical start, end+1. syslog_data[2,3]
1963 * logical start, end+1. */
1964 kdb_syslog_data(syslog_data);
1965 if (syslog_data[2] == syslog_data[3])
1966 return 0;
1967 logsize = syslog_data[1] - syslog_data[0];
1968 start = syslog_data[2];
1969 end = syslog_data[3];
1970#define KDB_WRAP(p) (((p - syslog_data[0]) % logsize) + syslog_data[0])
1971 for (n = 0, p = start; p < end; ++p) {
1972 c = *KDB_WRAP(p);
1973 if (c == '\n')
1974 ++n;
1975 }
1976 if (c != '\n')
1977 ++n;
1978 if (lines < 0) {
1979 if (adjust >= n)
1980 kdb_printf("buffer only contains %d lines, nothing "
1981 "printed\n", n);
1982 else if (adjust - lines >= n)
1983 kdb_printf("buffer only contains %d lines, last %d "
1984 "lines printed\n", n, n - adjust);
1985 if (adjust) {
1986 for (; start < end && adjust; ++start) {
1987 if (*KDB_WRAP(start) == '\n')
1988 --adjust;
1989 }
1990 if (start < end)
1991 ++start;
1992 }
1993 for (p = start; p < end && lines; ++p) {
1994 if (*KDB_WRAP(p) == '\n')
1995 ++lines;
1996 }
1997 end = p;
1998 } else if (lines > 0) {
1999 int skip = n - (adjust + lines);
2000 if (adjust >= n) {
2001 kdb_printf("buffer only contains %d lines, "
2002 "nothing printed\n", n);
2003 skip = n;
2004 } else if (skip < 0) {
2005 lines += skip;
2006 skip = 0;
2007 kdb_printf("buffer only contains %d lines, first "
2008 "%d lines printed\n", n, lines);
2009 }
2010 for (; start < end && skip; ++start) {
2011 if (*KDB_WRAP(start) == '\n')
2012 --skip;
2013 }
2014 for (p = start; p < end && lines; ++p) {
2015 if (*KDB_WRAP(p) == '\n')
2016 --lines;
2017 }
2018 end = p;
2019 }
2020 /* Do a line at a time (max 200 chars) to reduce protocol overhead */
2021 c = '\n';
2022 while (start != end) {
2023 char buf[201];
2024 p = buf;
2025 if (KDB_FLAG(CMD_INTERRUPT))
2026 return 0;
2027 while (start < end && (c = *KDB_WRAP(start)) &&
2028 (p - buf) < sizeof(buf)-1) {
2029 ++start;
2030 *p++ = c;
2031 if (c == '\n')
2032 break;
2033 }
2034 *p = '\0';
2035 kdb_printf("%s", buf);
2036 }
2037 if (c != '\n')
2038 kdb_printf("\n");
2039
2040 return 0;
2041}
2042#endif /* CONFIG_PRINTK */
2043/*
2044 * kdb_cpu - This function implements the 'cpu' command.
2045 * cpu [<cpunum>]
2046 * Returns:
2047 * KDB_CMD_CPU for success, a kdb diagnostic if error
2048 */
2049static void kdb_cpu_status(void)
2050{
2051 int i, start_cpu, first_print = 1;
2052 char state, prev_state = '?';
2053
2054 kdb_printf("Currently on cpu %d\n", raw_smp_processor_id());
2055 kdb_printf("Available cpus: ");
2056 for (start_cpu = -1, i = 0; i < NR_CPUS; i++) {
2057 if (!cpu_online(i)) {
2058 state = 'F'; /* cpu is offline */
2059 } else {
2060 state = ' '; /* cpu is responding to kdb */
2061 if (kdb_task_state_char(KDB_TSK(i)) == 'I')
2062 state = 'I'; /* idle task */
2063 }
2064 if (state != prev_state) {
2065 if (prev_state != '?') {
2066 if (!first_print)
2067 kdb_printf(", ");
2068 first_print = 0;
2069 kdb_printf("%d", start_cpu);
2070 if (start_cpu < i-1)
2071 kdb_printf("-%d", i-1);
2072 if (prev_state != ' ')
2073 kdb_printf("(%c)", prev_state);
2074 }
2075 prev_state = state;
2076 start_cpu = i;
2077 }
2078 }
2079 /* print the trailing cpus, ignoring them if they are all offline */
2080 if (prev_state != 'F') {
2081 if (!first_print)
2082 kdb_printf(", ");
2083 kdb_printf("%d", start_cpu);
2084 if (start_cpu < i-1)
2085 kdb_printf("-%d", i-1);
2086 if (prev_state != ' ')
2087 kdb_printf("(%c)", prev_state);
2088 }
2089 kdb_printf("\n");
2090}
2091
2092static int kdb_cpu(int argc, const char **argv)
2093{
2094 unsigned long cpunum;
2095 int diag;
2096
2097 if (argc == 0) {
2098 kdb_cpu_status();
2099 return 0;
2100 }
2101
2102 if (argc != 1)
2103 return KDB_ARGCOUNT;
2104
2105 diag = kdbgetularg(argv[1], &cpunum);
2106 if (diag)
2107 return diag;
2108
2109 /*
2110 * Validate cpunum
2111 */
2112 if ((cpunum > NR_CPUS) || !cpu_online(cpunum))
2113 return KDB_BADCPUNUM;
2114
2115 dbg_switch_cpu = cpunum;
2116
2117 /*
2118 * Switch to other cpu
2119 */
2120 return KDB_CMD_CPU;
2121}
2122
2123/* The user may not realize that ps/bta with no parameters does not print idle
2124 * or sleeping system daemon processes, so tell them how many were suppressed.
2125 */
2126void kdb_ps_suppressed(void)
2127{
2128 int idle = 0, daemon = 0;
2129 unsigned long mask_I = kdb_task_state_string("I"),
2130 mask_M = kdb_task_state_string("M");
2131 unsigned long cpu;
2132 const struct task_struct *p, *g;
2133 for_each_online_cpu(cpu) {
2134 p = kdb_curr_task(cpu);
2135 if (kdb_task_state(p, mask_I))
2136 ++idle;
2137 }
2138 kdb_do_each_thread(g, p) {
2139 if (kdb_task_state(p, mask_M))
2140 ++daemon;
2141 } kdb_while_each_thread(g, p);
2142 if (idle || daemon) {
2143 if (idle)
2144 kdb_printf("%d idle process%s (state I)%s\n",
2145 idle, idle == 1 ? "" : "es",
2146 daemon ? " and " : "");
2147 if (daemon)
2148 kdb_printf("%d sleeping system daemon (state M) "
2149 "process%s", daemon,
2150 daemon == 1 ? "" : "es");
2151 kdb_printf(" suppressed,\nuse 'ps A' to see all.\n");
2152 }
2153}
2154
2155/*
2156 * kdb_ps - This function implements the 'ps' command which shows a
2157 * list of the active processes.
2158 * ps [DRSTCZEUIMA] All processes, optionally filtered by state
2159 */
2160void kdb_ps1(const struct task_struct *p)
2161{
2162 int cpu;
2163 unsigned long tmp;
2164
2165 if (!p || probe_kernel_read(&tmp, (char *)p, sizeof(unsigned long)))
2166 return;
2167
2168 cpu = kdb_process_cpu(p);
2169 kdb_printf("0x%p %8d %8d %d %4d %c 0x%p %c%s\n",
2170 (void *)p, p->pid, p->parent->pid,
2171 kdb_task_has_cpu(p), kdb_process_cpu(p),
2172 kdb_task_state_char(p),
2173 (void *)(&p->thread),
2174 p == kdb_curr_task(raw_smp_processor_id()) ? '*' : ' ',
2175 p->comm);
2176 if (kdb_task_has_cpu(p)) {
2177 if (!KDB_TSK(cpu)) {
2178 kdb_printf(" Error: no saved data for this cpu\n");
2179 } else {
2180 if (KDB_TSK(cpu) != p)
2181 kdb_printf(" Error: does not match running "
2182 "process table (0x%p)\n", KDB_TSK(cpu));
2183 }
2184 }
2185}
2186
2187static int kdb_ps(int argc, const char **argv)
2188{
2189 struct task_struct *g, *p;
2190 unsigned long mask, cpu;
2191
2192 if (argc == 0)
2193 kdb_ps_suppressed();
2194 kdb_printf("%-*s Pid Parent [*] cpu State %-*s Command\n",
2195 (int)(2*sizeof(void *))+2, "Task Addr",
2196 (int)(2*sizeof(void *))+2, "Thread");
2197 mask = kdb_task_state_string(argc ? argv[1] : NULL);
2198 /* Run the active tasks first */
2199 for_each_online_cpu(cpu) {
2200 if (KDB_FLAG(CMD_INTERRUPT))
2201 return 0;
2202 p = kdb_curr_task(cpu);
2203 if (kdb_task_state(p, mask))
2204 kdb_ps1(p);
2205 }
2206 kdb_printf("\n");
2207 /* Now the real tasks */
2208 kdb_do_each_thread(g, p) {
2209 if (KDB_FLAG(CMD_INTERRUPT))
2210 return 0;
2211 if (kdb_task_state(p, mask))
2212 kdb_ps1(p);
2213 } kdb_while_each_thread(g, p);
2214
2215 return 0;
2216}
2217
2218/*
2219 * kdb_pid - This function implements the 'pid' command which switches
2220 * the currently active process.
2221 * pid [<pid> | R]
2222 */
2223static int kdb_pid(int argc, const char **argv)
2224{
2225 struct task_struct *p;
2226 unsigned long val;
2227 int diag;
2228
2229 if (argc > 1)
2230 return KDB_ARGCOUNT;
2231
2232 if (argc) {
2233 if (strcmp(argv[1], "R") == 0) {
2234 p = KDB_TSK(kdb_initial_cpu);
2235 } else {
2236 diag = kdbgetularg(argv[1], &val);
2237 if (diag)
2238 return KDB_BADINT;
2239
2240 p = find_task_by_pid_ns((pid_t)val, &init_pid_ns);
2241 if (!p) {
2242 kdb_printf("No task with pid=%d\n", (pid_t)val);
2243 return 0;
2244 }
2245 }
2246 kdb_set_current_task(p);
2247 }
2248 kdb_printf("KDB current process is %s(pid=%d)\n",
2249 kdb_current_task->comm,
2250 kdb_current_task->pid);
2251
2252 return 0;
2253}
2254
2255/*
2256 * kdb_ll - This function implements the 'll' command which follows a
2257 * linked list and executes an arbitrary command for each
2258 * element.
2259 */
2260static int kdb_ll(int argc, const char **argv)
2261{
2262 int diag;
2263 unsigned long addr;
2264 long offset = 0;
2265 unsigned long va;
2266 unsigned long linkoffset;
2267 int nextarg;
2268 const char *command;
2269
2270 if (argc != 3)
2271 return KDB_ARGCOUNT;
2272
2273 nextarg = 1;
2274 diag = kdbgetaddrarg(argc, argv, &nextarg, &addr, &offset, NULL);
2275 if (diag)
2276 return diag;
2277
2278 diag = kdbgetularg(argv[2], &linkoffset);
2279 if (diag)
2280 return diag;
2281
2282 /*
2283 * Using the starting address as
2284 * the first element in the list, and assuming that
2285 * the list ends with a null pointer.
2286 */
2287
2288 va = addr;
2289 command = kdb_strdup(argv[3], GFP_KDB);
2290 if (!command) {
2291 kdb_printf("%s: cannot duplicate command\n", __func__);
2292 return 0;
2293 }
2294 /* Recursive use of kdb_parse, do not use argv after this point */
2295 argv = NULL;
2296
2297 while (va) {
2298 char buf[80];
2299
2300 sprintf(buf, "%s " kdb_machreg_fmt "\n", command, va);
2301 diag = kdb_parse(buf);
2302 if (diag)
2303 return diag;
2304
2305 addr = va + linkoffset;
2306 if (kdb_getword(&va, addr, sizeof(va)))
2307 return 0;
2308 }
2309 kfree(command);
2310
2311 return 0;
2312}
2313
2314static int kdb_kgdb(int argc, const char **argv)
2315{
2316 return KDB_CMD_KGDB;
2317}
2318
2319/*
2320 * kdb_help - This function implements the 'help' and '?' commands.
2321 */
2322static int kdb_help(int argc, const char **argv)
2323{
2324 kdbtab_t *kt;
2325 int i;
2326
2327 kdb_printf("%-15.15s %-20.20s %s\n", "Command", "Usage", "Description");
2328 kdb_printf("-----------------------------"
2329 "-----------------------------\n");
2330 for_each_kdbcmd(kt, i) {
2331 if (kt->cmd_name)
2332 kdb_printf("%-15.15s %-20.20s %s\n", kt->cmd_name,
2333 kt->cmd_usage, kt->cmd_help);
2334 if (KDB_FLAG(CMD_INTERRUPT))
2335 return 0;
2336 }
2337 return 0;
2338}
2339
2340/*
2341 * kdb_kill - This function implements the 'kill' commands.
2342 */
2343static int kdb_kill(int argc, const char **argv)
2344{
2345 long sig, pid;
2346 char *endp;
2347 struct task_struct *p;
2348 struct siginfo info;
2349
2350 if (argc != 2)
2351 return KDB_ARGCOUNT;
2352
2353 sig = simple_strtol(argv[1], &endp, 0);
2354 if (*endp)
2355 return KDB_BADINT;
2356 if (sig >= 0) {
2357 kdb_printf("Invalid signal parameter.<-signal>\n");
2358 return 0;
2359 }
2360 sig = -sig;
2361
2362 pid = simple_strtol(argv[2], &endp, 0);
2363 if (*endp)
2364 return KDB_BADINT;
2365 if (pid <= 0) {
2366 kdb_printf("Process ID must be large than 0.\n");
2367 return 0;
2368 }
2369
2370 /* Find the process. */
2371 p = find_task_by_pid_ns(pid, &init_pid_ns);
2372 if (!p) {
2373 kdb_printf("The specified process isn't found.\n");
2374 return 0;
2375 }
2376 p = p->group_leader;
2377 info.si_signo = sig;
2378 info.si_errno = 0;
2379 info.si_code = SI_USER;
2380 info.si_pid = pid; /* same capabilities as process being signalled */
2381 info.si_uid = 0; /* kdb has root authority */
2382 kdb_send_sig_info(p, &info);
2383 return 0;
2384}
2385
2386struct kdb_tm {
2387 int tm_sec; /* seconds */
2388 int tm_min; /* minutes */
2389 int tm_hour; /* hours */
2390 int tm_mday; /* day of the month */
2391 int tm_mon; /* month */
2392 int tm_year; /* year */
2393};
2394
2395static void kdb_gmtime(struct timespec *tv, struct kdb_tm *tm)
2396{
2397 /* This will work from 1970-2099, 2100 is not a leap year */
2398 static int mon_day[] = { 31, 29, 31, 30, 31, 30, 31,
2399 31, 30, 31, 30, 31 };
2400 memset(tm, 0, sizeof(*tm));
2401 tm->tm_sec = tv->tv_sec % (24 * 60 * 60);
2402 tm->tm_mday = tv->tv_sec / (24 * 60 * 60) +
2403 (2 * 365 + 1); /* shift base from 1970 to 1968 */
2404 tm->tm_min = tm->tm_sec / 60 % 60;
2405 tm->tm_hour = tm->tm_sec / 60 / 60;
2406 tm->tm_sec = tm->tm_sec % 60;
2407 tm->tm_year = 68 + 4*(tm->tm_mday / (4*365+1));
2408 tm->tm_mday %= (4*365+1);
2409 mon_day[1] = 29;
2410 while (tm->tm_mday >= mon_day[tm->tm_mon]) {
2411 tm->tm_mday -= mon_day[tm->tm_mon];
2412 if (++tm->tm_mon == 12) {
2413 tm->tm_mon = 0;
2414 ++tm->tm_year;
2415 mon_day[1] = 28;
2416 }
2417 }
2418 ++tm->tm_mday;
2419}
2420
2421/*
2422 * Most of this code has been lifted from kernel/timer.c::sys_sysinfo().
2423 * I cannot call that code directly from kdb, it has an unconditional
2424 * cli()/sti() and calls routines that take locks which can stop the debugger.
2425 */
2426static void kdb_sysinfo(struct sysinfo *val)
2427{
2428 struct timespec uptime;
2429 do_posix_clock_monotonic_gettime(&uptime);
2430 memset(val, 0, sizeof(*val));
2431 val->uptime = uptime.tv_sec;
2432 val->loads[0] = avenrun[0];
2433 val->loads[1] = avenrun[1];
2434 val->loads[2] = avenrun[2];
2435 val->procs = nr_threads-1;
2436 si_meminfo(val);
2437
2438 return;
2439}
2440
2441/*
2442 * kdb_summary - This function implements the 'summary' command.
2443 */
2444static int kdb_summary(int argc, const char **argv)
2445{
2446 struct kdb_tm tm;
2447 struct sysinfo val;
2448
2449 if (argc)
2450 return KDB_ARGCOUNT;
2451
2452 kdb_printf("sysname %s\n", init_uts_ns.name.sysname);
2453 kdb_printf("release %s\n", init_uts_ns.name.release);
2454 kdb_printf("version %s\n", init_uts_ns.name.version);
2455 kdb_printf("machine %s\n", init_uts_ns.name.machine);
2456 kdb_printf("nodename %s\n", init_uts_ns.name.nodename);
2457 kdb_printf("domainname %s\n", init_uts_ns.name.domainname);
2458 kdb_printf("ccversion %s\n", __stringify(CCVERSION));
2459
2460 kdb_gmtime(&xtime, &tm);
2461 kdb_printf("date %04d-%02d-%02d %02d:%02d:%02d "
2462 "tz_minuteswest %d\n",
2463 1900+tm.tm_year, tm.tm_mon+1, tm.tm_mday,
2464 tm.tm_hour, tm.tm_min, tm.tm_sec,
2465 sys_tz.tz_minuteswest);
2466
2467 kdb_sysinfo(&val);
2468 kdb_printf("uptime ");
2469 if (val.uptime > (24*60*60)) {
2470 int days = val.uptime / (24*60*60);
2471 val.uptime %= (24*60*60);
2472 kdb_printf("%d day%s ", days, days == 1 ? "" : "s");
2473 }
2474 kdb_printf("%02ld:%02ld\n", val.uptime/(60*60), (val.uptime/60)%60);
2475
2476 /* lifted from fs/proc/proc_misc.c::loadavg_read_proc() */
2477
2478#define LOAD_INT(x) ((x) >> FSHIFT)
2479#define LOAD_FRAC(x) LOAD_INT(((x) & (FIXED_1-1)) * 100)
2480 kdb_printf("load avg %ld.%02ld %ld.%02ld %ld.%02ld\n",
2481 LOAD_INT(val.loads[0]), LOAD_FRAC(val.loads[0]),
2482 LOAD_INT(val.loads[1]), LOAD_FRAC(val.loads[1]),
2483 LOAD_INT(val.loads[2]), LOAD_FRAC(val.loads[2]));
2484#undef LOAD_INT
2485#undef LOAD_FRAC
2486 /* Display in kilobytes */
2487#define K(x) ((x) << (PAGE_SHIFT - 10))
2488 kdb_printf("\nMemTotal: %8lu kB\nMemFree: %8lu kB\n"
2489 "Buffers: %8lu kB\n",
2490 val.totalram, val.freeram, val.bufferram);
2491 return 0;
2492}
2493
2494/*
2495 * kdb_per_cpu - This function implements the 'per_cpu' command.
2496 */
2497static int kdb_per_cpu(int argc, const char **argv)
2498{
2499 char buf[256], fmtstr[64];
2500 kdb_symtab_t symtab;
2501 cpumask_t suppress = CPU_MASK_NONE;
2502 int cpu, diag;
2503 unsigned long addr, val, bytesperword = 0, whichcpu = ~0UL;
2504
2505 if (argc < 1 || argc > 3)
2506 return KDB_ARGCOUNT;
2507
2508 snprintf(buf, sizeof(buf), "per_cpu__%s", argv[1]);
2509 if (!kdbgetsymval(buf, &symtab)) {
2510 kdb_printf("%s is not a per_cpu variable\n", argv[1]);
2511 return KDB_BADADDR;
2512 }
2513 if (argc >= 2) {
2514 diag = kdbgetularg(argv[2], &bytesperword);
2515 if (diag)
2516 return diag;
2517 }
2518 if (!bytesperword)
2519 bytesperword = KDB_WORD_SIZE;
2520 else if (bytesperword > KDB_WORD_SIZE)
2521 return KDB_BADWIDTH;
2522 sprintf(fmtstr, "%%0%dlx ", (int)(2*bytesperword));
2523 if (argc >= 3) {
2524 diag = kdbgetularg(argv[3], &whichcpu);
2525 if (diag)
2526 return diag;
2527 if (!cpu_online(whichcpu)) {
2528 kdb_printf("cpu %ld is not online\n", whichcpu);
2529 return KDB_BADCPUNUM;
2530 }
2531 }
2532
2533 /* Most architectures use __per_cpu_offset[cpu], some use
2534 * __per_cpu_offset(cpu), smp has no __per_cpu_offset.
2535 */
2536#ifdef __per_cpu_offset
2537#define KDB_PCU(cpu) __per_cpu_offset(cpu)
2538#else
2539#ifdef CONFIG_SMP
2540#define KDB_PCU(cpu) __per_cpu_offset[cpu]
2541#else
2542#define KDB_PCU(cpu) 0
2543#endif
2544#endif
2545
2546 for_each_online_cpu(cpu) {
2547 if (whichcpu != ~0UL && whichcpu != cpu)
2548 continue;
2549 addr = symtab.sym_start + KDB_PCU(cpu);
2550 diag = kdb_getword(&val, addr, bytesperword);
2551 if (diag) {
2552 kdb_printf("%5d " kdb_bfd_vma_fmt0 " - unable to "
2553 "read, diag=%d\n", cpu, addr, diag);
2554 continue;
2555 }
2556#ifdef CONFIG_SMP
2557 if (!val) {
2558 cpu_set(cpu, suppress);
2559 continue;
2560 }
2561#endif /* CONFIG_SMP */
2562 kdb_printf("%5d ", cpu);
2563 kdb_md_line(fmtstr, addr,
2564 bytesperword == KDB_WORD_SIZE,
2565 1, bytesperword, 1, 1, 0);
2566 }
2567 if (cpus_weight(suppress) == 0)
2568 return 0;
2569 kdb_printf("Zero suppressed cpu(s):");
2570 for (cpu = first_cpu(suppress); cpu < num_possible_cpus();
2571 cpu = next_cpu(cpu, suppress)) {
2572 kdb_printf(" %d", cpu);
2573 if (cpu == num_possible_cpus() - 1 ||
2574 next_cpu(cpu, suppress) != cpu + 1)
2575 continue;
2576 while (cpu < num_possible_cpus() &&
2577 next_cpu(cpu, suppress) == cpu + 1)
2578 ++cpu;
2579 kdb_printf("-%d", cpu);
2580 }
2581 kdb_printf("\n");
2582
2583#undef KDB_PCU
2584
2585 return 0;
2586}
2587
2588/*
2589 * display help for the use of cmd | grep pattern
2590 */
2591static int kdb_grep_help(int argc, const char **argv)
2592{
2593 kdb_printf("Usage of cmd args | grep pattern:\n");
2594 kdb_printf(" Any command's output may be filtered through an ");
2595 kdb_printf("emulated 'pipe'.\n");
2596 kdb_printf(" 'grep' is just a key word.\n");
2597 kdb_printf(" The pattern may include a very limited set of "
2598 "metacharacters:\n");
2599 kdb_printf(" pattern or ^pattern or pattern$ or ^pattern$\n");
2600 kdb_printf(" And if there are spaces in the pattern, you may "
2601 "quote it:\n");
2602 kdb_printf(" \"pat tern\" or \"^pat tern\" or \"pat tern$\""
2603 " or \"^pat tern$\"\n");
2604 return 0;
2605}
2606
2607/*
2608 * kdb_register_repeat - This function is used to register a kernel
2609 * debugger command.
2610 * Inputs:
2611 * cmd Command name
2612 * func Function to execute the command
2613 * usage A simple usage string showing arguments
2614 * help A simple help string describing command
2615 * repeat Does the command auto repeat on enter?
2616 * Returns:
2617 * zero for success, one if a duplicate command.
2618 */
2619#define kdb_command_extend 50 /* arbitrary */
2620int kdb_register_repeat(char *cmd,
2621 kdb_func_t func,
2622 char *usage,
2623 char *help,
2624 short minlen,
2625 kdb_repeat_t repeat)
2626{
2627 int i;
2628 kdbtab_t *kp;
2629
2630 /*
2631 * Brute force method to determine duplicates
2632 */
2633 for_each_kdbcmd(kp, i) {
2634 if (kp->cmd_name && (strcmp(kp->cmd_name, cmd) == 0)) {
2635 kdb_printf("Duplicate kdb command registered: "
2636 "%s, func %p help %s\n", cmd, func, help);
2637 return 1;
2638 }
2639 }
2640
2641 /*
2642 * Insert command into first available location in table
2643 */
2644 for_each_kdbcmd(kp, i) {
2645 if (kp->cmd_name == NULL)
2646 break;
2647 }
2648
2649 if (i >= kdb_max_commands) {
2650 kdbtab_t *new = kmalloc((kdb_max_commands - KDB_BASE_CMD_MAX +
2651 kdb_command_extend) * sizeof(*new), GFP_KDB);
2652 if (!new) {
2653 kdb_printf("Could not allocate new kdb_command "
2654 "table\n");
2655 return 1;
2656 }
2657 if (kdb_commands) {
2658 memcpy(new, kdb_commands,
2659 kdb_max_commands * sizeof(*new));
2660 kfree(kdb_commands);
2661 }
2662 memset(new + kdb_max_commands, 0,
2663 kdb_command_extend * sizeof(*new));
2664 kdb_commands = new;
2665 kp = kdb_commands + kdb_max_commands;
2666 kdb_max_commands += kdb_command_extend;
2667 }
2668
2669 kp->cmd_name = cmd;
2670 kp->cmd_func = func;
2671 kp->cmd_usage = usage;
2672 kp->cmd_help = help;
2673 kp->cmd_flags = 0;
2674 kp->cmd_minlen = minlen;
2675 kp->cmd_repeat = repeat;
2676
2677 return 0;
2678}
2679
2680/*
2681 * kdb_register - Compatibility register function for commands that do
2682 * not need to specify a repeat state. Equivalent to
2683 * kdb_register_repeat with KDB_REPEAT_NONE.
2684 * Inputs:
2685 * cmd Command name
2686 * func Function to execute the command
2687 * usage A simple usage string showing arguments
2688 * help A simple help string describing command
2689 * Returns:
2690 * zero for success, one if a duplicate command.
2691 */
2692int kdb_register(char *cmd,
2693 kdb_func_t func,
2694 char *usage,
2695 char *help,
2696 short minlen)
2697{
2698 return kdb_register_repeat(cmd, func, usage, help, minlen,
2699 KDB_REPEAT_NONE);
2700}
2701
2702/*
2703 * kdb_unregister - This function is used to unregister a kernel
2704 * debugger command. It is generally called when a module which
2705 * implements kdb commands is unloaded.
2706 * Inputs:
2707 * cmd Command name
2708 * Returns:
2709 * zero for success, one command not registered.
2710 */
2711int kdb_unregister(char *cmd)
2712{
2713 int i;
2714 kdbtab_t *kp;
2715
2716 /*
2717 * find the command.
2718 */
2719 for (i = 0, kp = kdb_commands; i < kdb_max_commands; i++, kp++) {
2720 if (kp->cmd_name && (strcmp(kp->cmd_name, cmd) == 0)) {
2721 kp->cmd_name = NULL;
2722 return 0;
2723 }
2724 }
2725
2726 /* Couldn't find it. */
2727 return 1;
2728}
2729
2730/* Initialize the kdb command table. */
2731static void __init kdb_inittab(void)
2732{
2733 int i;
2734 kdbtab_t *kp;
2735
2736 for_each_kdbcmd(kp, i)
2737 kp->cmd_name = NULL;
2738
2739 kdb_register_repeat("md", kdb_md, "<vaddr>",
2740 "Display Memory Contents, also mdWcN, e.g. md8c1", 1,
2741 KDB_REPEAT_NO_ARGS);
2742 kdb_register_repeat("mdr", kdb_md, "<vaddr> <bytes>",
2743 "Display Raw Memory", 0, KDB_REPEAT_NO_ARGS);
2744 kdb_register_repeat("mdp", kdb_md, "<paddr> <bytes>",
2745 "Display Physical Memory", 0, KDB_REPEAT_NO_ARGS);
2746 kdb_register_repeat("mds", kdb_md, "<vaddr>",
2747 "Display Memory Symbolically", 0, KDB_REPEAT_NO_ARGS);
2748 kdb_register_repeat("mm", kdb_mm, "<vaddr> <contents>",
2749 "Modify Memory Contents", 0, KDB_REPEAT_NO_ARGS);
2750 kdb_register_repeat("go", kdb_go, "[<vaddr>]",
2751 "Continue Execution", 1, KDB_REPEAT_NONE);
2752 kdb_register_repeat("rd", kdb_rd, "",
2753 "Display Registers", 0, KDB_REPEAT_NONE);
2754 kdb_register_repeat("rm", kdb_rm, "<reg> <contents>",
2755 "Modify Registers", 0, KDB_REPEAT_NONE);
2756 kdb_register_repeat("ef", kdb_ef, "<vaddr>",
2757 "Display exception frame", 0, KDB_REPEAT_NONE);
2758 kdb_register_repeat("bt", kdb_bt, "[<vaddr>]",
2759 "Stack traceback", 1, KDB_REPEAT_NONE);
2760 kdb_register_repeat("btp", kdb_bt, "<pid>",
2761 "Display stack for process <pid>", 0, KDB_REPEAT_NONE);
2762 kdb_register_repeat("bta", kdb_bt, "[DRSTCZEUIMA]",
2763 "Display stack all processes", 0, KDB_REPEAT_NONE);
2764 kdb_register_repeat("btc", kdb_bt, "",
2765 "Backtrace current process on each cpu", 0, KDB_REPEAT_NONE);
2766 kdb_register_repeat("btt", kdb_bt, "<vaddr>",
2767 "Backtrace process given its struct task address", 0,
2768 KDB_REPEAT_NONE);
2769 kdb_register_repeat("ll", kdb_ll, "<first-element> <linkoffset> <cmd>",
2770 "Execute cmd for each element in linked list", 0, KDB_REPEAT_NONE);
2771 kdb_register_repeat("env", kdb_env, "",
2772 "Show environment variables", 0, KDB_REPEAT_NONE);
2773 kdb_register_repeat("set", kdb_set, "",
2774 "Set environment variables", 0, KDB_REPEAT_NONE);
2775 kdb_register_repeat("help", kdb_help, "",
2776 "Display Help Message", 1, KDB_REPEAT_NONE);
2777 kdb_register_repeat("?", kdb_help, "",
2778 "Display Help Message", 0, KDB_REPEAT_NONE);
2779 kdb_register_repeat("cpu", kdb_cpu, "<cpunum>",
2780 "Switch to new cpu", 0, KDB_REPEAT_NONE);
2781 kdb_register_repeat("kgdb", kdb_kgdb, "",
2782 "Enter kgdb mode", 0, KDB_REPEAT_NONE);
2783 kdb_register_repeat("ps", kdb_ps, "[<flags>|A]",
2784 "Display active task list", 0, KDB_REPEAT_NONE);
2785 kdb_register_repeat("pid", kdb_pid, "<pidnum>",
2786 "Switch to another task", 0, KDB_REPEAT_NONE);
2787 kdb_register_repeat("reboot", kdb_reboot, "",
2788 "Reboot the machine immediately", 0, KDB_REPEAT_NONE);
2789#if defined(CONFIG_MODULES)
2790 kdb_register_repeat("lsmod", kdb_lsmod, "",
2791 "List loaded kernel modules", 0, KDB_REPEAT_NONE);
2792#endif
2793#if defined(CONFIG_MAGIC_SYSRQ)
2794 kdb_register_repeat("sr", kdb_sr, "<key>",
2795 "Magic SysRq key", 0, KDB_REPEAT_NONE);
2796#endif
2797#if defined(CONFIG_PRINTK)
2798 kdb_register_repeat("dmesg", kdb_dmesg, "[lines]",
2799 "Display syslog buffer", 0, KDB_REPEAT_NONE);
2800#endif
2801 kdb_register_repeat("defcmd", kdb_defcmd, "name \"usage\" \"help\"",
2802 "Define a set of commands, down to endefcmd", 0, KDB_REPEAT_NONE);
2803 kdb_register_repeat("kill", kdb_kill, "<-signal> <pid>",
2804 "Send a signal to a process", 0, KDB_REPEAT_NONE);
2805 kdb_register_repeat("summary", kdb_summary, "",
2806 "Summarize the system", 4, KDB_REPEAT_NONE);
2807 kdb_register_repeat("per_cpu", kdb_per_cpu, "",
2808 "Display per_cpu variables", 3, KDB_REPEAT_NONE);
2809 kdb_register_repeat("grephelp", kdb_grep_help, "",
2810 "Display help on | grep", 0, KDB_REPEAT_NONE);
2811}
2812
2813/* Execute any commands defined in kdb_cmds. */
2814static void __init kdb_cmd_init(void)
2815{
2816 int i, diag;
2817 for (i = 0; kdb_cmds[i]; ++i) {
2818 diag = kdb_parse(kdb_cmds[i]);
2819 if (diag)
2820 kdb_printf("kdb command %s failed, kdb diag %d\n",
2821 kdb_cmds[i], diag);
2822 }
2823 if (defcmd_in_progress) {
2824 kdb_printf("Incomplete 'defcmd' set, forcing endefcmd\n");
2825 kdb_parse("endefcmd");
2826 }
2827}
2828
2829/* Intialize kdb_printf, breakpoint tables and kdb state */
2830void __init kdb_init(int lvl)
2831{
2832 static int kdb_init_lvl = KDB_NOT_INITIALIZED;
2833 int i;
2834
2835 if (kdb_init_lvl == KDB_INIT_FULL || lvl <= kdb_init_lvl)
2836 return;
2837 for (i = kdb_init_lvl; i < lvl; i++) {
2838 switch (i) {
2839 case KDB_NOT_INITIALIZED:
2840 kdb_inittab(); /* Initialize Command Table */
2841 kdb_initbptab(); /* Initialize Breakpoints */
2842 break;
2843 case KDB_INIT_EARLY:
2844 kdb_cmd_init(); /* Build kdb_cmds tables */
2845 break;
2846 }
2847 }
2848 kdb_init_lvl = lvl;
2849}
diff --git a/kernel/debug/kdb/kdb_private.h b/kernel/debug/kdb/kdb_private.h
new file mode 100644
index 000000000000..97d3ba69775d
--- /dev/null
+++ b/kernel/debug/kdb/kdb_private.h
@@ -0,0 +1,300 @@
1#ifndef _KDBPRIVATE_H
2#define _KDBPRIVATE_H
3
4/*
5 * Kernel Debugger Architecture Independent Private Headers
6 *
7 * This file is subject to the terms and conditions of the GNU General Public
8 * License. See the file "COPYING" in the main directory of this archive
9 * for more details.
10 *
11 * Copyright (c) 2000-2004 Silicon Graphics, Inc. All Rights Reserved.
12 * Copyright (c) 2009 Wind River Systems, Inc. All Rights Reserved.
13 */
14
15#include <linux/kgdb.h>
16#include "../debug_core.h"
17
18/* Kernel Debugger Error codes. Must not overlap with command codes. */
19#define KDB_NOTFOUND (-1)
20#define KDB_ARGCOUNT (-2)
21#define KDB_BADWIDTH (-3)
22#define KDB_BADRADIX (-4)
23#define KDB_NOTENV (-5)
24#define KDB_NOENVVALUE (-6)
25#define KDB_NOTIMP (-7)
26#define KDB_ENVFULL (-8)
27#define KDB_ENVBUFFULL (-9)
28#define KDB_TOOMANYBPT (-10)
29#define KDB_TOOMANYDBREGS (-11)
30#define KDB_DUPBPT (-12)
31#define KDB_BPTNOTFOUND (-13)
32#define KDB_BADMODE (-14)
33#define KDB_BADINT (-15)
34#define KDB_INVADDRFMT (-16)
35#define KDB_BADREG (-17)
36#define KDB_BADCPUNUM (-18)
37#define KDB_BADLENGTH (-19)
38#define KDB_NOBP (-20)
39#define KDB_BADADDR (-21)
40
41/* Kernel Debugger Command codes. Must not overlap with error codes. */
42#define KDB_CMD_GO (-1001)
43#define KDB_CMD_CPU (-1002)
44#define KDB_CMD_SS (-1003)
45#define KDB_CMD_SSB (-1004)
46#define KDB_CMD_KGDB (-1005)
47#define KDB_CMD_KGDB2 (-1006)
48
49/* Internal debug flags */
50#define KDB_DEBUG_FLAG_BP 0x0002 /* Breakpoint subsystem debug */
51#define KDB_DEBUG_FLAG_BB_SUMM 0x0004 /* Basic block analysis, summary only */
52#define KDB_DEBUG_FLAG_AR 0x0008 /* Activation record, generic */
53#define KDB_DEBUG_FLAG_ARA 0x0010 /* Activation record, arch specific */
54#define KDB_DEBUG_FLAG_BB 0x0020 /* All basic block analysis */
55#define KDB_DEBUG_FLAG_STATE 0x0040 /* State flags */
56#define KDB_DEBUG_FLAG_MASK 0xffff /* All debug flags */
57#define KDB_DEBUG_FLAG_SHIFT 16 /* Shift factor for dbflags */
58
59#define KDB_DEBUG(flag) (kdb_flags & \
60 (KDB_DEBUG_FLAG_##flag << KDB_DEBUG_FLAG_SHIFT))
61#define KDB_DEBUG_STATE(text, value) if (KDB_DEBUG(STATE)) \
62 kdb_print_state(text, value)
63
64#if BITS_PER_LONG == 32
65
66#define KDB_PLATFORM_ENV "BYTESPERWORD=4"
67
68#define kdb_machreg_fmt "0x%lx"
69#define kdb_machreg_fmt0 "0x%08lx"
70#define kdb_bfd_vma_fmt "0x%lx"
71#define kdb_bfd_vma_fmt0 "0x%08lx"
72#define kdb_elfw_addr_fmt "0x%x"
73#define kdb_elfw_addr_fmt0 "0x%08x"
74#define kdb_f_count_fmt "%d"
75
76#elif BITS_PER_LONG == 64
77
78#define KDB_PLATFORM_ENV "BYTESPERWORD=8"
79
80#define kdb_machreg_fmt "0x%lx"
81#define kdb_machreg_fmt0 "0x%016lx"
82#define kdb_bfd_vma_fmt "0x%lx"
83#define kdb_bfd_vma_fmt0 "0x%016lx"
84#define kdb_elfw_addr_fmt "0x%x"
85#define kdb_elfw_addr_fmt0 "0x%016x"
86#define kdb_f_count_fmt "%ld"
87
88#endif
89
90/*
91 * KDB_MAXBPT describes the total number of breakpoints
92 * supported by this architecure.
93 */
94#define KDB_MAXBPT 16
95
96/* Maximum number of arguments to a function */
97#define KDB_MAXARGS 16
98
99typedef enum {
100 KDB_REPEAT_NONE = 0, /* Do not repeat this command */
101 KDB_REPEAT_NO_ARGS, /* Repeat the command without arguments */
102 KDB_REPEAT_WITH_ARGS, /* Repeat the command including its arguments */
103} kdb_repeat_t;
104
105typedef int (*kdb_func_t)(int, const char **);
106
107/* Symbol table format returned by kallsyms. */
108typedef struct __ksymtab {
109 unsigned long value; /* Address of symbol */
110 const char *mod_name; /* Module containing symbol or
111 * "kernel" */
112 unsigned long mod_start;
113 unsigned long mod_end;
114 const char *sec_name; /* Section containing symbol */
115 unsigned long sec_start;
116 unsigned long sec_end;
117 const char *sym_name; /* Full symbol name, including
118 * any version */
119 unsigned long sym_start;
120 unsigned long sym_end;
121 } kdb_symtab_t;
122extern int kallsyms_symbol_next(char *prefix_name, int flag);
123extern int kallsyms_symbol_complete(char *prefix_name, int max_len);
124
125/* Exported Symbols for kernel loadable modules to use. */
126extern int kdb_register(char *, kdb_func_t, char *, char *, short);
127extern int kdb_register_repeat(char *, kdb_func_t, char *, char *,
128 short, kdb_repeat_t);
129extern int kdb_unregister(char *);
130
131extern int kdb_getarea_size(void *, unsigned long, size_t);
132extern int kdb_putarea_size(unsigned long, void *, size_t);
133
134/*
135 * Like get_user and put_user, kdb_getarea and kdb_putarea take variable
136 * names, not pointers. The underlying *_size functions take pointers.
137 */
138#define kdb_getarea(x, addr) kdb_getarea_size(&(x), addr, sizeof((x)))
139#define kdb_putarea(addr, x) kdb_putarea_size(addr, &(x), sizeof((x)))
140
141extern int kdb_getphysword(unsigned long *word,
142 unsigned long addr, size_t size);
143extern int kdb_getword(unsigned long *, unsigned long, size_t);
144extern int kdb_putword(unsigned long, unsigned long, size_t);
145
146extern int kdbgetularg(const char *, unsigned long *);
147extern int kdb_set(int, const char **);
148extern char *kdbgetenv(const char *);
149extern int kdbgetintenv(const char *, int *);
150extern int kdbgetaddrarg(int, const char **, int*, unsigned long *,
151 long *, char **);
152extern int kdbgetsymval(const char *, kdb_symtab_t *);
153extern int kdbnearsym(unsigned long, kdb_symtab_t *);
154extern void kdbnearsym_cleanup(void);
155extern char *kdb_strdup(const char *str, gfp_t type);
156extern void kdb_symbol_print(unsigned long, const kdb_symtab_t *, unsigned int);
157
158/* Routine for debugging the debugger state. */
159extern void kdb_print_state(const char *, int);
160
161extern int kdb_state;
162#define KDB_STATE_KDB 0x00000001 /* Cpu is inside kdb */
163#define KDB_STATE_LEAVING 0x00000002 /* Cpu is leaving kdb */
164#define KDB_STATE_CMD 0x00000004 /* Running a kdb command */
165#define KDB_STATE_KDB_CONTROL 0x00000008 /* This cpu is under
166 * kdb control */
167#define KDB_STATE_HOLD_CPU 0x00000010 /* Hold this cpu inside kdb */
168#define KDB_STATE_DOING_SS 0x00000020 /* Doing ss command */
169#define KDB_STATE_DOING_SSB 0x00000040 /* Doing ssb command,
170 * DOING_SS is also set */
171#define KDB_STATE_SSBPT 0x00000080 /* Install breakpoint
172 * after one ss, independent of
173 * DOING_SS */
174#define KDB_STATE_REENTRY 0x00000100 /* Valid re-entry into kdb */
175#define KDB_STATE_SUPPRESS 0x00000200 /* Suppress error messages */
176#define KDB_STATE_PAGER 0x00000400 /* pager is available */
177#define KDB_STATE_GO_SWITCH 0x00000800 /* go is switching
178 * back to initial cpu */
179#define KDB_STATE_PRINTF_LOCK 0x00001000 /* Holds kdb_printf lock */
180#define KDB_STATE_WAIT_IPI 0x00002000 /* Waiting for kdb_ipi() NMI */
181#define KDB_STATE_RECURSE 0x00004000 /* Recursive entry to kdb */
182#define KDB_STATE_IP_ADJUSTED 0x00008000 /* Restart IP has been
183 * adjusted */
184#define KDB_STATE_GO1 0x00010000 /* go only releases one cpu */
185#define KDB_STATE_KEYBOARD 0x00020000 /* kdb entered via
186 * keyboard on this cpu */
187#define KDB_STATE_KEXEC 0x00040000 /* kexec issued */
188#define KDB_STATE_DOING_KGDB 0x00080000 /* kgdb enter now issued */
189#define KDB_STATE_DOING_KGDB2 0x00100000 /* kgdb enter now issued */
190#define KDB_STATE_KGDB_TRANS 0x00200000 /* Transition to kgdb */
191#define KDB_STATE_ARCH 0xff000000 /* Reserved for arch
192 * specific use */
193
194#define KDB_STATE(flag) (kdb_state & KDB_STATE_##flag)
195#define KDB_STATE_SET(flag) ((void)(kdb_state |= KDB_STATE_##flag))
196#define KDB_STATE_CLEAR(flag) ((void)(kdb_state &= ~KDB_STATE_##flag))
197
198extern int kdb_nextline; /* Current number of lines displayed */
199
200typedef struct _kdb_bp {
201 unsigned long bp_addr; /* Address breakpoint is present at */
202 unsigned int bp_free:1; /* This entry is available */
203 unsigned int bp_enabled:1; /* Breakpoint is active in register */
204 unsigned int bp_type:4; /* Uses hardware register */
205 unsigned int bp_installed:1; /* Breakpoint is installed */
206 unsigned int bp_delay:1; /* Do delayed bp handling */
207 unsigned int bp_delayed:1; /* Delayed breakpoint */
208 unsigned int bph_length; /* HW break length */
209} kdb_bp_t;
210
211#ifdef CONFIG_KGDB_KDB
212extern kdb_bp_t kdb_breakpoints[/* KDB_MAXBPT */];
213
214/* The KDB shell command table */
215typedef struct _kdbtab {
216 char *cmd_name; /* Command name */
217 kdb_func_t cmd_func; /* Function to execute command */
218 char *cmd_usage; /* Usage String for this command */
219 char *cmd_help; /* Help message for this command */
220 short cmd_flags; /* Parsing flags */
221 short cmd_minlen; /* Minimum legal # command
222 * chars required */
223 kdb_repeat_t cmd_repeat; /* Does command auto repeat on enter? */
224} kdbtab_t;
225
226extern int kdb_bt(int, const char **); /* KDB display back trace */
227
228/* KDB breakpoint management functions */
229extern void kdb_initbptab(void);
230extern void kdb_bp_install(struct pt_regs *);
231extern void kdb_bp_remove(void);
232
233typedef enum {
234 KDB_DB_BPT, /* Breakpoint */
235 KDB_DB_SS, /* Single-step trap */
236 KDB_DB_SSB, /* Single step to branch */
237 KDB_DB_SSBPT, /* Single step over breakpoint */
238 KDB_DB_NOBPT /* Spurious breakpoint */
239} kdb_dbtrap_t;
240
241extern int kdb_main_loop(kdb_reason_t, kdb_reason_t,
242 int, kdb_dbtrap_t, struct pt_regs *);
243
244/* Miscellaneous functions and data areas */
245extern int kdb_grepping_flag;
246extern char kdb_grep_string[];
247extern int kdb_grep_leading;
248extern int kdb_grep_trailing;
249extern char *kdb_cmds[];
250extern void kdb_syslog_data(char *syslog_data[]);
251extern unsigned long kdb_task_state_string(const char *);
252extern char kdb_task_state_char (const struct task_struct *);
253extern unsigned long kdb_task_state(const struct task_struct *p,
254 unsigned long mask);
255extern void kdb_ps_suppressed(void);
256extern void kdb_ps1(const struct task_struct *p);
257extern void kdb_print_nameval(const char *name, unsigned long val);
258extern void kdb_send_sig_info(struct task_struct *p, struct siginfo *info);
259extern void kdb_meminfo_proc_show(void);
260extern const char *kdb_walk_kallsyms(loff_t *pos);
261extern char *kdb_getstr(char *, size_t, char *);
262
263/* Defines for kdb_symbol_print */
264#define KDB_SP_SPACEB 0x0001 /* Space before string */
265#define KDB_SP_SPACEA 0x0002 /* Space after string */
266#define KDB_SP_PAREN 0x0004 /* Parenthesis around string */
267#define KDB_SP_VALUE 0x0008 /* Print the value of the address */
268#define KDB_SP_SYMSIZE 0x0010 /* Print the size of the symbol */
269#define KDB_SP_NEWLINE 0x0020 /* Newline after string */
270#define KDB_SP_DEFAULT (KDB_SP_VALUE|KDB_SP_PAREN)
271
272#define KDB_TSK(cpu) kgdb_info[cpu].task
273#define KDB_TSKREGS(cpu) kgdb_info[cpu].debuggerinfo
274
275extern struct task_struct *kdb_curr_task(int);
276
277#define kdb_task_has_cpu(p) (task_curr(p))
278
279/* Simplify coexistence with NPTL */
280#define kdb_do_each_thread(g, p) do_each_thread(g, p)
281#define kdb_while_each_thread(g, p) while_each_thread(g, p)
282
283#define GFP_KDB (in_interrupt() ? GFP_ATOMIC : GFP_KERNEL)
284
285extern void *debug_kmalloc(size_t size, gfp_t flags);
286extern void debug_kfree(void *);
287extern void debug_kusage(void);
288
289extern void kdb_set_current_task(struct task_struct *);
290extern struct task_struct *kdb_current_task;
291#ifdef CONFIG_MODULES
292extern struct list_head *kdb_modules;
293#endif /* CONFIG_MODULES */
294
295extern char kdb_prompt_str[];
296
297#define KDB_WORD_SIZE ((int)sizeof(unsigned long))
298
299#endif /* CONFIG_KGDB_KDB */
300#endif /* !_KDBPRIVATE_H */
diff --git a/kernel/debug/kdb/kdb_support.c b/kernel/debug/kdb/kdb_support.c
new file mode 100644
index 000000000000..45344d5c53dd
--- /dev/null
+++ b/kernel/debug/kdb/kdb_support.c
@@ -0,0 +1,927 @@
1/*
2 * Kernel Debugger Architecture Independent Support Functions
3 *
4 * This file is subject to the terms and conditions of the GNU General Public
5 * License. See the file "COPYING" in the main directory of this archive
6 * for more details.
7 *
8 * Copyright (c) 1999-2004 Silicon Graphics, Inc. All Rights Reserved.
9 * Copyright (c) 2009 Wind River Systems, Inc. All Rights Reserved.
10 * 03/02/13 added new 2.5 kallsyms <xavier.bru@bull.net>
11 */
12
13#include <stdarg.h>
14#include <linux/types.h>
15#include <linux/sched.h>
16#include <linux/mm.h>
17#include <linux/kallsyms.h>
18#include <linux/stddef.h>
19#include <linux/vmalloc.h>
20#include <linux/ptrace.h>
21#include <linux/module.h>
22#include <linux/highmem.h>
23#include <linux/hardirq.h>
24#include <linux/delay.h>
25#include <linux/uaccess.h>
26#include <linux/kdb.h>
27#include <linux/slab.h>
28#include "kdb_private.h"
29
30/*
31 * kdbgetsymval - Return the address of the given symbol.
32 *
33 * Parameters:
34 * symname Character string containing symbol name
35 * symtab Structure to receive results
36 * Returns:
37 * 0 Symbol not found, symtab zero filled
38 * 1 Symbol mapped to module/symbol/section, data in symtab
39 */
40int kdbgetsymval(const char *symname, kdb_symtab_t *symtab)
41{
42 if (KDB_DEBUG(AR))
43 kdb_printf("kdbgetsymval: symname=%s, symtab=%p\n", symname,
44 symtab);
45 memset(symtab, 0, sizeof(*symtab));
46 symtab->sym_start = kallsyms_lookup_name(symname);
47 if (symtab->sym_start) {
48 if (KDB_DEBUG(AR))
49 kdb_printf("kdbgetsymval: returns 1, "
50 "symtab->sym_start=0x%lx\n",
51 symtab->sym_start);
52 return 1;
53 }
54 if (KDB_DEBUG(AR))
55 kdb_printf("kdbgetsymval: returns 0\n");
56 return 0;
57}
58EXPORT_SYMBOL(kdbgetsymval);
59
60static char *kdb_name_table[100]; /* arbitrary size */
61
62/*
63 * kdbnearsym - Return the name of the symbol with the nearest address
64 * less than 'addr'.
65 *
66 * Parameters:
67 * addr Address to check for symbol near
68 * symtab Structure to receive results
69 * Returns:
70 * 0 No sections contain this address, symtab zero filled
71 * 1 Address mapped to module/symbol/section, data in symtab
72 * Remarks:
73 * 2.6 kallsyms has a "feature" where it unpacks the name into a
74 * string. If that string is reused before the caller expects it
75 * then the caller sees its string change without warning. To
76 * avoid cluttering up the main kdb code with lots of kdb_strdup,
77 * tests and kfree calls, kdbnearsym maintains an LRU list of the
78 * last few unique strings. The list is sized large enough to
79 * hold active strings, no kdb caller of kdbnearsym makes more
80 * than ~20 later calls before using a saved value.
81 */
82int kdbnearsym(unsigned long addr, kdb_symtab_t *symtab)
83{
84 int ret = 0;
85 unsigned long symbolsize;
86 unsigned long offset;
87#define knt1_size 128 /* must be >= kallsyms table size */
88 char *knt1 = NULL;
89
90 if (KDB_DEBUG(AR))
91 kdb_printf("kdbnearsym: addr=0x%lx, symtab=%p\n", addr, symtab);
92 memset(symtab, 0, sizeof(*symtab));
93
94 if (addr < 4096)
95 goto out;
96 knt1 = debug_kmalloc(knt1_size, GFP_ATOMIC);
97 if (!knt1) {
98 kdb_printf("kdbnearsym: addr=0x%lx cannot kmalloc knt1\n",
99 addr);
100 goto out;
101 }
102 symtab->sym_name = kallsyms_lookup(addr, &symbolsize , &offset,
103 (char **)(&symtab->mod_name), knt1);
104 if (offset > 8*1024*1024) {
105 symtab->sym_name = NULL;
106 addr = offset = symbolsize = 0;
107 }
108 symtab->sym_start = addr - offset;
109 symtab->sym_end = symtab->sym_start + symbolsize;
110 ret = symtab->sym_name != NULL && *(symtab->sym_name) != '\0';
111
112 if (ret) {
113 int i;
114 /* Another 2.6 kallsyms "feature". Sometimes the sym_name is
115 * set but the buffer passed into kallsyms_lookup is not used,
116 * so it contains garbage. The caller has to work out which
117 * buffer needs to be saved.
118 *
119 * What was Rusty smoking when he wrote that code?
120 */
121 if (symtab->sym_name != knt1) {
122 strncpy(knt1, symtab->sym_name, knt1_size);
123 knt1[knt1_size-1] = '\0';
124 }
125 for (i = 0; i < ARRAY_SIZE(kdb_name_table); ++i) {
126 if (kdb_name_table[i] &&
127 strcmp(kdb_name_table[i], knt1) == 0)
128 break;
129 }
130 if (i >= ARRAY_SIZE(kdb_name_table)) {
131 debug_kfree(kdb_name_table[0]);
132 memcpy(kdb_name_table, kdb_name_table+1,
133 sizeof(kdb_name_table[0]) *
134 (ARRAY_SIZE(kdb_name_table)-1));
135 } else {
136 debug_kfree(knt1);
137 knt1 = kdb_name_table[i];
138 memcpy(kdb_name_table+i, kdb_name_table+i+1,
139 sizeof(kdb_name_table[0]) *
140 (ARRAY_SIZE(kdb_name_table)-i-1));
141 }
142 i = ARRAY_SIZE(kdb_name_table) - 1;
143 kdb_name_table[i] = knt1;
144 symtab->sym_name = kdb_name_table[i];
145 knt1 = NULL;
146 }
147
148 if (symtab->mod_name == NULL)
149 symtab->mod_name = "kernel";
150 if (KDB_DEBUG(AR))
151 kdb_printf("kdbnearsym: returns %d symtab->sym_start=0x%lx, "
152 "symtab->mod_name=%p, symtab->sym_name=%p (%s)\n", ret,
153 symtab->sym_start, symtab->mod_name, symtab->sym_name,
154 symtab->sym_name);
155
156out:
157 debug_kfree(knt1);
158 return ret;
159}
160
161void kdbnearsym_cleanup(void)
162{
163 int i;
164 for (i = 0; i < ARRAY_SIZE(kdb_name_table); ++i) {
165 if (kdb_name_table[i]) {
166 debug_kfree(kdb_name_table[i]);
167 kdb_name_table[i] = NULL;
168 }
169 }
170}
171
172static char ks_namebuf[KSYM_NAME_LEN+1], ks_namebuf_prev[KSYM_NAME_LEN+1];
173
174/*
175 * kallsyms_symbol_complete
176 *
177 * Parameters:
178 * prefix_name prefix of a symbol name to lookup
179 * max_len maximum length that can be returned
180 * Returns:
181 * Number of symbols which match the given prefix.
182 * Notes:
183 * prefix_name is changed to contain the longest unique prefix that
184 * starts with this prefix (tab completion).
185 */
186int kallsyms_symbol_complete(char *prefix_name, int max_len)
187{
188 loff_t pos = 0;
189 int prefix_len = strlen(prefix_name), prev_len = 0;
190 int i, number = 0;
191 const char *name;
192
193 while ((name = kdb_walk_kallsyms(&pos))) {
194 if (strncmp(name, prefix_name, prefix_len) == 0) {
195 strcpy(ks_namebuf, name);
196 /* Work out the longest name that matches the prefix */
197 if (++number == 1) {
198 prev_len = min_t(int, max_len-1,
199 strlen(ks_namebuf));
200 memcpy(ks_namebuf_prev, ks_namebuf, prev_len);
201 ks_namebuf_prev[prev_len] = '\0';
202 continue;
203 }
204 for (i = 0; i < prev_len; i++) {
205 if (ks_namebuf[i] != ks_namebuf_prev[i]) {
206 prev_len = i;
207 ks_namebuf_prev[i] = '\0';
208 break;
209 }
210 }
211 }
212 }
213 if (prev_len > prefix_len)
214 memcpy(prefix_name, ks_namebuf_prev, prev_len+1);
215 return number;
216}
217
218/*
219 * kallsyms_symbol_next
220 *
221 * Parameters:
222 * prefix_name prefix of a symbol name to lookup
223 * flag 0 means search from the head, 1 means continue search.
224 * Returns:
225 * 1 if a symbol matches the given prefix.
226 * 0 if no string found
227 */
228int kallsyms_symbol_next(char *prefix_name, int flag)
229{
230 int prefix_len = strlen(prefix_name);
231 static loff_t pos;
232 const char *name;
233
234 if (!flag)
235 pos = 0;
236
237 while ((name = kdb_walk_kallsyms(&pos))) {
238 if (strncmp(name, prefix_name, prefix_len) == 0) {
239 strncpy(prefix_name, name, strlen(name)+1);
240 return 1;
241 }
242 }
243 return 0;
244}
245
246/*
247 * kdb_symbol_print - Standard method for printing a symbol name and offset.
248 * Inputs:
249 * addr Address to be printed.
250 * symtab Address of symbol data, if NULL this routine does its
251 * own lookup.
252 * punc Punctuation for string, bit field.
253 * Remarks:
254 * The string and its punctuation is only printed if the address
255 * is inside the kernel, except that the value is always printed
256 * when requested.
257 */
258void kdb_symbol_print(unsigned long addr, const kdb_symtab_t *symtab_p,
259 unsigned int punc)
260{
261 kdb_symtab_t symtab, *symtab_p2;
262 if (symtab_p) {
263 symtab_p2 = (kdb_symtab_t *)symtab_p;
264 } else {
265 symtab_p2 = &symtab;
266 kdbnearsym(addr, symtab_p2);
267 }
268 if (!(symtab_p2->sym_name || (punc & KDB_SP_VALUE)))
269 return;
270 if (punc & KDB_SP_SPACEB)
271 kdb_printf(" ");
272 if (punc & KDB_SP_VALUE)
273 kdb_printf(kdb_machreg_fmt0, addr);
274 if (symtab_p2->sym_name) {
275 if (punc & KDB_SP_VALUE)
276 kdb_printf(" ");
277 if (punc & KDB_SP_PAREN)
278 kdb_printf("(");
279 if (strcmp(symtab_p2->mod_name, "kernel"))
280 kdb_printf("[%s]", symtab_p2->mod_name);
281 kdb_printf("%s", symtab_p2->sym_name);
282 if (addr != symtab_p2->sym_start)
283 kdb_printf("+0x%lx", addr - symtab_p2->sym_start);
284 if (punc & KDB_SP_SYMSIZE)
285 kdb_printf("/0x%lx",
286 symtab_p2->sym_end - symtab_p2->sym_start);
287 if (punc & KDB_SP_PAREN)
288 kdb_printf(")");
289 }
290 if (punc & KDB_SP_SPACEA)
291 kdb_printf(" ");
292 if (punc & KDB_SP_NEWLINE)
293 kdb_printf("\n");
294}
295
296/*
297 * kdb_strdup - kdb equivalent of strdup, for disasm code.
298 * Inputs:
299 * str The string to duplicate.
300 * type Flags to kmalloc for the new string.
301 * Returns:
302 * Address of the new string, NULL if storage could not be allocated.
303 * Remarks:
304 * This is not in lib/string.c because it uses kmalloc which is not
305 * available when string.o is used in boot loaders.
306 */
307char *kdb_strdup(const char *str, gfp_t type)
308{
309 int n = strlen(str)+1;
310 char *s = kmalloc(n, type);
311 if (!s)
312 return NULL;
313 return strcpy(s, str);
314}
315
316/*
317 * kdb_getarea_size - Read an area of data. The kdb equivalent of
318 * copy_from_user, with kdb messages for invalid addresses.
319 * Inputs:
320 * res Pointer to the area to receive the result.
321 * addr Address of the area to copy.
322 * size Size of the area.
323 * Returns:
324 * 0 for success, < 0 for error.
325 */
326int kdb_getarea_size(void *res, unsigned long addr, size_t size)
327{
328 int ret = probe_kernel_read((char *)res, (char *)addr, size);
329 if (ret) {
330 if (!KDB_STATE(SUPPRESS)) {
331 kdb_printf("kdb_getarea: Bad address 0x%lx\n", addr);
332 KDB_STATE_SET(SUPPRESS);
333 }
334 ret = KDB_BADADDR;
335 } else {
336 KDB_STATE_CLEAR(SUPPRESS);
337 }
338 return ret;
339}
340
341/*
342 * kdb_putarea_size - Write an area of data. The kdb equivalent of
343 * copy_to_user, with kdb messages for invalid addresses.
344 * Inputs:
345 * addr Address of the area to write to.
346 * res Pointer to the area holding the data.
347 * size Size of the area.
348 * Returns:
349 * 0 for success, < 0 for error.
350 */
351int kdb_putarea_size(unsigned long addr, void *res, size_t size)
352{
353 int ret = probe_kernel_read((char *)addr, (char *)res, size);
354 if (ret) {
355 if (!KDB_STATE(SUPPRESS)) {
356 kdb_printf("kdb_putarea: Bad address 0x%lx\n", addr);
357 KDB_STATE_SET(SUPPRESS);
358 }
359 ret = KDB_BADADDR;
360 } else {
361 KDB_STATE_CLEAR(SUPPRESS);
362 }
363 return ret;
364}
365
366/*
367 * kdb_getphys - Read data from a physical address. Validate the
368 * address is in range, use kmap_atomic() to get data
369 * similar to kdb_getarea() - but for phys addresses
370 * Inputs:
371 * res Pointer to the word to receive the result
372 * addr Physical address of the area to copy
373 * size Size of the area
374 * Returns:
375 * 0 for success, < 0 for error.
376 */
377static int kdb_getphys(void *res, unsigned long addr, size_t size)
378{
379 unsigned long pfn;
380 void *vaddr;
381 struct page *page;
382
383 pfn = (addr >> PAGE_SHIFT);
384 if (!pfn_valid(pfn))
385 return 1;
386 page = pfn_to_page(pfn);
387 vaddr = kmap_atomic(page, KM_KDB);
388 memcpy(res, vaddr + (addr & (PAGE_SIZE - 1)), size);
389 kunmap_atomic(vaddr, KM_KDB);
390
391 return 0;
392}
393
394/*
395 * kdb_getphysword
396 * Inputs:
397 * word Pointer to the word to receive the result.
398 * addr Address of the area to copy.
399 * size Size of the area.
400 * Returns:
401 * 0 for success, < 0 for error.
402 */
403int kdb_getphysword(unsigned long *word, unsigned long addr, size_t size)
404{
405 int diag;
406 __u8 w1;
407 __u16 w2;
408 __u32 w4;
409 __u64 w8;
410 *word = 0; /* Default value if addr or size is invalid */
411
412 switch (size) {
413 case 1:
414 diag = kdb_getphys(&w1, addr, sizeof(w1));
415 if (!diag)
416 *word = w1;
417 break;
418 case 2:
419 diag = kdb_getphys(&w2, addr, sizeof(w2));
420 if (!diag)
421 *word = w2;
422 break;
423 case 4:
424 diag = kdb_getphys(&w4, addr, sizeof(w4));
425 if (!diag)
426 *word = w4;
427 break;
428 case 8:
429 if (size <= sizeof(*word)) {
430 diag = kdb_getphys(&w8, addr, sizeof(w8));
431 if (!diag)
432 *word = w8;
433 break;
434 }
435 /* drop through */
436 default:
437 diag = KDB_BADWIDTH;
438 kdb_printf("kdb_getphysword: bad width %ld\n", (long) size);
439 }
440 return diag;
441}
442
443/*
444 * kdb_getword - Read a binary value. Unlike kdb_getarea, this treats
445 * data as numbers.
446 * Inputs:
447 * word Pointer to the word to receive the result.
448 * addr Address of the area to copy.
449 * size Size of the area.
450 * Returns:
451 * 0 for success, < 0 for error.
452 */
453int kdb_getword(unsigned long *word, unsigned long addr, size_t size)
454{
455 int diag;
456 __u8 w1;
457 __u16 w2;
458 __u32 w4;
459 __u64 w8;
460 *word = 0; /* Default value if addr or size is invalid */
461 switch (size) {
462 case 1:
463 diag = kdb_getarea(w1, addr);
464 if (!diag)
465 *word = w1;
466 break;
467 case 2:
468 diag = kdb_getarea(w2, addr);
469 if (!diag)
470 *word = w2;
471 break;
472 case 4:
473 diag = kdb_getarea(w4, addr);
474 if (!diag)
475 *word = w4;
476 break;
477 case 8:
478 if (size <= sizeof(*word)) {
479 diag = kdb_getarea(w8, addr);
480 if (!diag)
481 *word = w8;
482 break;
483 }
484 /* drop through */
485 default:
486 diag = KDB_BADWIDTH;
487 kdb_printf("kdb_getword: bad width %ld\n", (long) size);
488 }
489 return diag;
490}
491
492/*
493 * kdb_putword - Write a binary value. Unlike kdb_putarea, this
494 * treats data as numbers.
495 * Inputs:
496 * addr Address of the area to write to..
497 * word The value to set.
498 * size Size of the area.
499 * Returns:
500 * 0 for success, < 0 for error.
501 */
502int kdb_putword(unsigned long addr, unsigned long word, size_t size)
503{
504 int diag;
505 __u8 w1;
506 __u16 w2;
507 __u32 w4;
508 __u64 w8;
509 switch (size) {
510 case 1:
511 w1 = word;
512 diag = kdb_putarea(addr, w1);
513 break;
514 case 2:
515 w2 = word;
516 diag = kdb_putarea(addr, w2);
517 break;
518 case 4:
519 w4 = word;
520 diag = kdb_putarea(addr, w4);
521 break;
522 case 8:
523 if (size <= sizeof(word)) {
524 w8 = word;
525 diag = kdb_putarea(addr, w8);
526 break;
527 }
528 /* drop through */
529 default:
530 diag = KDB_BADWIDTH;
531 kdb_printf("kdb_putword: bad width %ld\n", (long) size);
532 }
533 return diag;
534}
535
536/*
537 * kdb_task_state_string - Convert a string containing any of the
538 * letters DRSTCZEUIMA to a mask for the process state field and
539 * return the value. If no argument is supplied, return the mask
540 * that corresponds to environment variable PS, DRSTCZEU by
541 * default.
542 * Inputs:
543 * s String to convert
544 * Returns:
545 * Mask for process state.
546 * Notes:
547 * The mask folds data from several sources into a single long value, so
548 * be carefull not to overlap the bits. TASK_* bits are in the LSB,
549 * special cases like UNRUNNABLE are in the MSB. As of 2.6.10-rc1 there
550 * is no overlap between TASK_* and EXIT_* but that may not always be
551 * true, so EXIT_* bits are shifted left 16 bits before being stored in
552 * the mask.
553 */
554
555/* unrunnable is < 0 */
556#define UNRUNNABLE (1UL << (8*sizeof(unsigned long) - 1))
557#define RUNNING (1UL << (8*sizeof(unsigned long) - 2))
558#define IDLE (1UL << (8*sizeof(unsigned long) - 3))
559#define DAEMON (1UL << (8*sizeof(unsigned long) - 4))
560
561unsigned long kdb_task_state_string(const char *s)
562{
563 long res = 0;
564 if (!s) {
565 s = kdbgetenv("PS");
566 if (!s)
567 s = "DRSTCZEU"; /* default value for ps */
568 }
569 while (*s) {
570 switch (*s) {
571 case 'D':
572 res |= TASK_UNINTERRUPTIBLE;
573 break;
574 case 'R':
575 res |= RUNNING;
576 break;
577 case 'S':
578 res |= TASK_INTERRUPTIBLE;
579 break;
580 case 'T':
581 res |= TASK_STOPPED;
582 break;
583 case 'C':
584 res |= TASK_TRACED;
585 break;
586 case 'Z':
587 res |= EXIT_ZOMBIE << 16;
588 break;
589 case 'E':
590 res |= EXIT_DEAD << 16;
591 break;
592 case 'U':
593 res |= UNRUNNABLE;
594 break;
595 case 'I':
596 res |= IDLE;
597 break;
598 case 'M':
599 res |= DAEMON;
600 break;
601 case 'A':
602 res = ~0UL;
603 break;
604 default:
605 kdb_printf("%s: unknown flag '%c' ignored\n",
606 __func__, *s);
607 break;
608 }
609 ++s;
610 }
611 return res;
612}
613
614/*
615 * kdb_task_state_char - Return the character that represents the task state.
616 * Inputs:
617 * p struct task for the process
618 * Returns:
619 * One character to represent the task state.
620 */
621char kdb_task_state_char (const struct task_struct *p)
622{
623 int cpu;
624 char state;
625 unsigned long tmp;
626
627 if (!p || probe_kernel_read(&tmp, (char *)p, sizeof(unsigned long)))
628 return 'E';
629
630 cpu = kdb_process_cpu(p);
631 state = (p->state == 0) ? 'R' :
632 (p->state < 0) ? 'U' :
633 (p->state & TASK_UNINTERRUPTIBLE) ? 'D' :
634 (p->state & TASK_STOPPED) ? 'T' :
635 (p->state & TASK_TRACED) ? 'C' :
636 (p->exit_state & EXIT_ZOMBIE) ? 'Z' :
637 (p->exit_state & EXIT_DEAD) ? 'E' :
638 (p->state & TASK_INTERRUPTIBLE) ? 'S' : '?';
639 if (p->pid == 0) {
640 /* Idle task. Is it really idle, apart from the kdb
641 * interrupt? */
642 if (!kdb_task_has_cpu(p) || kgdb_info[cpu].irq_depth == 1) {
643 if (cpu != kdb_initial_cpu)
644 state = 'I'; /* idle task */
645 }
646 } else if (!p->mm && state == 'S') {
647 state = 'M'; /* sleeping system daemon */
648 }
649 return state;
650}
651
652/*
653 * kdb_task_state - Return true if a process has the desired state
654 * given by the mask.
655 * Inputs:
656 * p struct task for the process
657 * mask mask from kdb_task_state_string to select processes
658 * Returns:
659 * True if the process matches at least one criteria defined by the mask.
660 */
661unsigned long kdb_task_state(const struct task_struct *p, unsigned long mask)
662{
663 char state[] = { kdb_task_state_char(p), '\0' };
664 return (mask & kdb_task_state_string(state)) != 0;
665}
666
667/*
668 * kdb_print_nameval - Print a name and its value, converting the
669 * value to a symbol lookup if possible.
670 * Inputs:
671 * name field name to print
672 * val value of field
673 */
674void kdb_print_nameval(const char *name, unsigned long val)
675{
676 kdb_symtab_t symtab;
677 kdb_printf(" %-11.11s ", name);
678 if (kdbnearsym(val, &symtab))
679 kdb_symbol_print(val, &symtab,
680 KDB_SP_VALUE|KDB_SP_SYMSIZE|KDB_SP_NEWLINE);
681 else
682 kdb_printf("0x%lx\n", val);
683}
684
685/* Last ditch allocator for debugging, so we can still debug even when
686 * the GFP_ATOMIC pool has been exhausted. The algorithms are tuned
687 * for space usage, not for speed. One smallish memory pool, the free
688 * chain is always in ascending address order to allow coalescing,
689 * allocations are done in brute force best fit.
690 */
691
692struct debug_alloc_header {
693 u32 next; /* offset of next header from start of pool */
694 u32 size;
695 void *caller;
696};
697
698/* The memory returned by this allocator must be aligned, which means
699 * so must the header size. Do not assume that sizeof(struct
700 * debug_alloc_header) is a multiple of the alignment, explicitly
701 * calculate the overhead of this header, including the alignment.
702 * The rest of this code must not use sizeof() on any header or
703 * pointer to a header.
704 */
705#define dah_align 8
706#define dah_overhead ALIGN(sizeof(struct debug_alloc_header), dah_align)
707
708static u64 debug_alloc_pool_aligned[256*1024/dah_align]; /* 256K pool */
709static char *debug_alloc_pool = (char *)debug_alloc_pool_aligned;
710static u32 dah_first, dah_first_call = 1, dah_used, dah_used_max;
711
712/* Locking is awkward. The debug code is called from all contexts,
713 * including non maskable interrupts. A normal spinlock is not safe
714 * in NMI context. Try to get the debug allocator lock, if it cannot
715 * be obtained after a second then give up. If the lock could not be
716 * previously obtained on this cpu then only try once.
717 *
718 * sparse has no annotation for "this function _sometimes_ acquires a
719 * lock", so fudge the acquire/release notation.
720 */
721static DEFINE_SPINLOCK(dap_lock);
722static int get_dap_lock(void)
723 __acquires(dap_lock)
724{
725 static int dap_locked = -1;
726 int count;
727 if (dap_locked == smp_processor_id())
728 count = 1;
729 else
730 count = 1000;
731 while (1) {
732 if (spin_trylock(&dap_lock)) {
733 dap_locked = -1;
734 return 1;
735 }
736 if (!count--)
737 break;
738 udelay(1000);
739 }
740 dap_locked = smp_processor_id();
741 __acquire(dap_lock);
742 return 0;
743}
744
745void *debug_kmalloc(size_t size, gfp_t flags)
746{
747 unsigned int rem, h_offset;
748 struct debug_alloc_header *best, *bestprev, *prev, *h;
749 void *p = NULL;
750 if (!get_dap_lock()) {
751 __release(dap_lock); /* we never actually got it */
752 return NULL;
753 }
754 h = (struct debug_alloc_header *)(debug_alloc_pool + dah_first);
755 if (dah_first_call) {
756 h->size = sizeof(debug_alloc_pool_aligned) - dah_overhead;
757 dah_first_call = 0;
758 }
759 size = ALIGN(size, dah_align);
760 prev = best = bestprev = NULL;
761 while (1) {
762 if (h->size >= size && (!best || h->size < best->size)) {
763 best = h;
764 bestprev = prev;
765 if (h->size == size)
766 break;
767 }
768 if (!h->next)
769 break;
770 prev = h;
771 h = (struct debug_alloc_header *)(debug_alloc_pool + h->next);
772 }
773 if (!best)
774 goto out;
775 rem = best->size - size;
776 /* The pool must always contain at least one header */
777 if (best->next == 0 && bestprev == NULL && rem < dah_overhead)
778 goto out;
779 if (rem >= dah_overhead) {
780 best->size = size;
781 h_offset = ((char *)best - debug_alloc_pool) +
782 dah_overhead + best->size;
783 h = (struct debug_alloc_header *)(debug_alloc_pool + h_offset);
784 h->size = rem - dah_overhead;
785 h->next = best->next;
786 } else
787 h_offset = best->next;
788 best->caller = __builtin_return_address(0);
789 dah_used += best->size;
790 dah_used_max = max(dah_used, dah_used_max);
791 if (bestprev)
792 bestprev->next = h_offset;
793 else
794 dah_first = h_offset;
795 p = (char *)best + dah_overhead;
796 memset(p, POISON_INUSE, best->size - 1);
797 *((char *)p + best->size - 1) = POISON_END;
798out:
799 spin_unlock(&dap_lock);
800 return p;
801}
802
803void debug_kfree(void *p)
804{
805 struct debug_alloc_header *h;
806 unsigned int h_offset;
807 if (!p)
808 return;
809 if ((char *)p < debug_alloc_pool ||
810 (char *)p >= debug_alloc_pool + sizeof(debug_alloc_pool_aligned)) {
811 kfree(p);
812 return;
813 }
814 if (!get_dap_lock()) {
815 __release(dap_lock); /* we never actually got it */
816 return; /* memory leak, cannot be helped */
817 }
818 h = (struct debug_alloc_header *)((char *)p - dah_overhead);
819 memset(p, POISON_FREE, h->size - 1);
820 *((char *)p + h->size - 1) = POISON_END;
821 h->caller = NULL;
822 dah_used -= h->size;
823 h_offset = (char *)h - debug_alloc_pool;
824 if (h_offset < dah_first) {
825 h->next = dah_first;
826 dah_first = h_offset;
827 } else {
828 struct debug_alloc_header *prev;
829 unsigned int prev_offset;
830 prev = (struct debug_alloc_header *)(debug_alloc_pool +
831 dah_first);
832 while (1) {
833 if (!prev->next || prev->next > h_offset)
834 break;
835 prev = (struct debug_alloc_header *)
836 (debug_alloc_pool + prev->next);
837 }
838 prev_offset = (char *)prev - debug_alloc_pool;
839 if (prev_offset + dah_overhead + prev->size == h_offset) {
840 prev->size += dah_overhead + h->size;
841 memset(h, POISON_FREE, dah_overhead - 1);
842 *((char *)h + dah_overhead - 1) = POISON_END;
843 h = prev;
844 h_offset = prev_offset;
845 } else {
846 h->next = prev->next;
847 prev->next = h_offset;
848 }
849 }
850 if (h_offset + dah_overhead + h->size == h->next) {
851 struct debug_alloc_header *next;
852 next = (struct debug_alloc_header *)
853 (debug_alloc_pool + h->next);
854 h->size += dah_overhead + next->size;
855 h->next = next->next;
856 memset(next, POISON_FREE, dah_overhead - 1);
857 *((char *)next + dah_overhead - 1) = POISON_END;
858 }
859 spin_unlock(&dap_lock);
860}
861
862void debug_kusage(void)
863{
864 struct debug_alloc_header *h_free, *h_used;
865#ifdef CONFIG_IA64
866 /* FIXME: using dah for ia64 unwind always results in a memory leak.
867 * Fix that memory leak first, then set debug_kusage_one_time = 1 for
868 * all architectures.
869 */
870 static int debug_kusage_one_time;
871#else
872 static int debug_kusage_one_time = 1;
873#endif
874 if (!get_dap_lock()) {
875 __release(dap_lock); /* we never actually got it */
876 return;
877 }
878 h_free = (struct debug_alloc_header *)(debug_alloc_pool + dah_first);
879 if (dah_first == 0 &&
880 (h_free->size == sizeof(debug_alloc_pool_aligned) - dah_overhead ||
881 dah_first_call))
882 goto out;
883 if (!debug_kusage_one_time)
884 goto out;
885 debug_kusage_one_time = 0;
886 kdb_printf("%s: debug_kmalloc memory leak dah_first %d\n",
887 __func__, dah_first);
888 if (dah_first) {
889 h_used = (struct debug_alloc_header *)debug_alloc_pool;
890 kdb_printf("%s: h_used %p size %d\n", __func__, h_used,
891 h_used->size);
892 }
893 do {
894 h_used = (struct debug_alloc_header *)
895 ((char *)h_free + dah_overhead + h_free->size);
896 kdb_printf("%s: h_used %p size %d caller %p\n",
897 __func__, h_used, h_used->size, h_used->caller);
898 h_free = (struct debug_alloc_header *)
899 (debug_alloc_pool + h_free->next);
900 } while (h_free->next);
901 h_used = (struct debug_alloc_header *)
902 ((char *)h_free + dah_overhead + h_free->size);
903 if ((char *)h_used - debug_alloc_pool !=
904 sizeof(debug_alloc_pool_aligned))
905 kdb_printf("%s: h_used %p size %d caller %p\n",
906 __func__, h_used, h_used->size, h_used->caller);
907out:
908 spin_unlock(&dap_lock);
909}
910
911/* Maintain a small stack of kdb_flags to allow recursion without disturbing
912 * the global kdb state.
913 */
914
915static int kdb_flags_stack[4], kdb_flags_index;
916
917void kdb_save_flags(void)
918{
919 BUG_ON(kdb_flags_index >= ARRAY_SIZE(kdb_flags_stack));
920 kdb_flags_stack[kdb_flags_index++] = kdb_flags;
921}
922
923void kdb_restore_flags(void)
924{
925 BUG_ON(kdb_flags_index <= 0);
926 kdb_flags = kdb_flags_stack[--kdb_flags_index];
927}
diff --git a/kernel/groups.c b/kernel/groups.c
index 2b45b2ee3964..53b1916c9492 100644
--- a/kernel/groups.c
+++ b/kernel/groups.c
@@ -164,12 +164,6 @@ int groups_search(const struct group_info *group_info, gid_t grp)
164 */ 164 */
165int set_groups(struct cred *new, struct group_info *group_info) 165int set_groups(struct cred *new, struct group_info *group_info)
166{ 166{
167 int retval;
168
169 retval = security_task_setgroups(group_info);
170 if (retval)
171 return retval;
172
173 put_group_info(new->group_info); 167 put_group_info(new->group_info);
174 groups_sort(group_info); 168 groups_sort(group_info);
175 get_group_info(group_info); 169 get_group_info(group_info);
diff --git a/kernel/hrtimer.c b/kernel/hrtimer.c
index 0086628b6e97..b9b134b35088 100644
--- a/kernel/hrtimer.c
+++ b/kernel/hrtimer.c
@@ -1749,35 +1749,15 @@ void __init hrtimers_init(void)
1749} 1749}
1750 1750
1751/** 1751/**
1752 * schedule_hrtimeout_range - sleep until timeout 1752 * schedule_hrtimeout_range_clock - sleep until timeout
1753 * @expires: timeout value (ktime_t) 1753 * @expires: timeout value (ktime_t)
1754 * @delta: slack in expires timeout (ktime_t) 1754 * @delta: slack in expires timeout (ktime_t)
1755 * @mode: timer mode, HRTIMER_MODE_ABS or HRTIMER_MODE_REL 1755 * @mode: timer mode, HRTIMER_MODE_ABS or HRTIMER_MODE_REL
1756 * 1756 * @clock: timer clock, CLOCK_MONOTONIC or CLOCK_REALTIME
1757 * Make the current task sleep until the given expiry time has
1758 * elapsed. The routine will return immediately unless
1759 * the current task state has been set (see set_current_state()).
1760 *
1761 * The @delta argument gives the kernel the freedom to schedule the
1762 * actual wakeup to a time that is both power and performance friendly.
1763 * The kernel give the normal best effort behavior for "@expires+@delta",
1764 * but may decide to fire the timer earlier, but no earlier than @expires.
1765 *
1766 * You can set the task state as follows -
1767 *
1768 * %TASK_UNINTERRUPTIBLE - at least @timeout time is guaranteed to
1769 * pass before the routine returns.
1770 *
1771 * %TASK_INTERRUPTIBLE - the routine may return early if a signal is
1772 * delivered to the current task.
1773 *
1774 * The current task state is guaranteed to be TASK_RUNNING when this
1775 * routine returns.
1776 *
1777 * Returns 0 when the timer has expired otherwise -EINTR
1778 */ 1757 */
1779int __sched schedule_hrtimeout_range(ktime_t *expires, unsigned long delta, 1758int __sched
1780 const enum hrtimer_mode mode) 1759schedule_hrtimeout_range_clock(ktime_t *expires, unsigned long delta,
1760 const enum hrtimer_mode mode, int clock)
1781{ 1761{
1782 struct hrtimer_sleeper t; 1762 struct hrtimer_sleeper t;
1783 1763
@@ -1799,7 +1779,7 @@ int __sched schedule_hrtimeout_range(ktime_t *expires, unsigned long delta,
1799 return -EINTR; 1779 return -EINTR;
1800 } 1780 }
1801 1781
1802 hrtimer_init_on_stack(&t.timer, CLOCK_MONOTONIC, mode); 1782 hrtimer_init_on_stack(&t.timer, clock, mode);
1803 hrtimer_set_expires_range_ns(&t.timer, *expires, delta); 1783 hrtimer_set_expires_range_ns(&t.timer, *expires, delta);
1804 1784
1805 hrtimer_init_sleeper(&t, current); 1785 hrtimer_init_sleeper(&t, current);
@@ -1818,6 +1798,41 @@ int __sched schedule_hrtimeout_range(ktime_t *expires, unsigned long delta,
1818 1798
1819 return !t.task ? 0 : -EINTR; 1799 return !t.task ? 0 : -EINTR;
1820} 1800}
1801
1802/**
1803 * schedule_hrtimeout_range - sleep until timeout
1804 * @expires: timeout value (ktime_t)
1805 * @delta: slack in expires timeout (ktime_t)
1806 * @mode: timer mode, HRTIMER_MODE_ABS or HRTIMER_MODE_REL
1807 *
1808 * Make the current task sleep until the given expiry time has
1809 * elapsed. The routine will return immediately unless
1810 * the current task state has been set (see set_current_state()).
1811 *
1812 * The @delta argument gives the kernel the freedom to schedule the
1813 * actual wakeup to a time that is both power and performance friendly.
1814 * The kernel give the normal best effort behavior for "@expires+@delta",
1815 * but may decide to fire the timer earlier, but no earlier than @expires.
1816 *
1817 * You can set the task state as follows -
1818 *
1819 * %TASK_UNINTERRUPTIBLE - at least @timeout time is guaranteed to
1820 * pass before the routine returns.
1821 *
1822 * %TASK_INTERRUPTIBLE - the routine may return early if a signal is
1823 * delivered to the current task.
1824 *
1825 * The current task state is guaranteed to be TASK_RUNNING when this
1826 * routine returns.
1827 *
1828 * Returns 0 when the timer has expired otherwise -EINTR
1829 */
1830int __sched schedule_hrtimeout_range(ktime_t *expires, unsigned long delta,
1831 const enum hrtimer_mode mode)
1832{
1833 return schedule_hrtimeout_range_clock(expires, delta, mode,
1834 CLOCK_MONOTONIC);
1835}
1821EXPORT_SYMBOL_GPL(schedule_hrtimeout_range); 1836EXPORT_SYMBOL_GPL(schedule_hrtimeout_range);
1822 1837
1823/** 1838/**
diff --git a/kernel/irq/handle.c b/kernel/irq/handle.c
index 76d5a671bfe1..27e5c6911223 100644
--- a/kernel/irq/handle.c
+++ b/kernel/irq/handle.c
@@ -370,9 +370,6 @@ irqreturn_t handle_IRQ_event(unsigned int irq, struct irqaction *action)
370 irqreturn_t ret, retval = IRQ_NONE; 370 irqreturn_t ret, retval = IRQ_NONE;
371 unsigned int status = 0; 371 unsigned int status = 0;
372 372
373 if (!(action->flags & IRQF_DISABLED))
374 local_irq_enable_in_hardirq();
375
376 do { 373 do {
377 trace_irq_handler_entry(irq, action); 374 trace_irq_handler_entry(irq, action);
378 ret = action->handler(irq, action->dev_id); 375 ret = action->handler(irq, action->dev_id);
diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c
index 704e488730a5..3164ba7ce151 100644
--- a/kernel/irq/manage.c
+++ b/kernel/irq/manage.c
@@ -138,6 +138,22 @@ int irq_set_affinity(unsigned int irq, const struct cpumask *cpumask)
138 return 0; 138 return 0;
139} 139}
140 140
141int irq_set_affinity_hint(unsigned int irq, const struct cpumask *m)
142{
143 struct irq_desc *desc = irq_to_desc(irq);
144 unsigned long flags;
145
146 if (!desc)
147 return -EINVAL;
148
149 raw_spin_lock_irqsave(&desc->lock, flags);
150 desc->affinity_hint = m;
151 raw_spin_unlock_irqrestore(&desc->lock, flags);
152
153 return 0;
154}
155EXPORT_SYMBOL_GPL(irq_set_affinity_hint);
156
141#ifndef CONFIG_AUTO_IRQ_AFFINITY 157#ifndef CONFIG_AUTO_IRQ_AFFINITY
142/* 158/*
143 * Generic version of the affinity autoselector. 159 * Generic version of the affinity autoselector.
@@ -757,16 +773,6 @@ __setup_irq(unsigned int irq, struct irq_desc *desc, struct irqaction *new)
757 if (new->flags & IRQF_ONESHOT) 773 if (new->flags & IRQF_ONESHOT)
758 desc->status |= IRQ_ONESHOT; 774 desc->status |= IRQ_ONESHOT;
759 775
760 /*
761 * Force MSI interrupts to run with interrupts
762 * disabled. The multi vector cards can cause stack
763 * overflows due to nested interrupts when enough of
764 * them are directed to a core and fire at the same
765 * time.
766 */
767 if (desc->msi_desc)
768 new->flags |= IRQF_DISABLED;
769
770 if (!(desc->status & IRQ_NOAUTOEN)) { 776 if (!(desc->status & IRQ_NOAUTOEN)) {
771 desc->depth = 0; 777 desc->depth = 0;
772 desc->status &= ~IRQ_DISABLED; 778 desc->status &= ~IRQ_DISABLED;
@@ -916,6 +922,12 @@ static struct irqaction *__free_irq(unsigned int irq, void *dev_id)
916 desc->chip->disable(irq); 922 desc->chip->disable(irq);
917 } 923 }
918 924
925#ifdef CONFIG_SMP
926 /* make sure affinity_hint is cleaned up */
927 if (WARN_ON_ONCE(desc->affinity_hint))
928 desc->affinity_hint = NULL;
929#endif
930
919 raw_spin_unlock_irqrestore(&desc->lock, flags); 931 raw_spin_unlock_irqrestore(&desc->lock, flags);
920 932
921 unregister_handler_proc(irq, action); 933 unregister_handler_proc(irq, action);
@@ -1027,7 +1039,6 @@ EXPORT_SYMBOL(free_irq);
1027 * Flags: 1039 * Flags:
1028 * 1040 *
1029 * IRQF_SHARED Interrupt is shared 1041 * IRQF_SHARED Interrupt is shared
1030 * IRQF_DISABLED Disable local interrupts while processing
1031 * IRQF_SAMPLE_RANDOM The interrupt can be used for entropy 1042 * IRQF_SAMPLE_RANDOM The interrupt can be used for entropy
1032 * IRQF_TRIGGER_* Specify active edge(s) or level 1043 * IRQF_TRIGGER_* Specify active edge(s) or level
1033 * 1044 *
@@ -1041,25 +1052,6 @@ int request_threaded_irq(unsigned int irq, irq_handler_t handler,
1041 int retval; 1052 int retval;
1042 1053
1043 /* 1054 /*
1044 * handle_IRQ_event() always ignores IRQF_DISABLED except for
1045 * the _first_ irqaction (sigh). That can cause oopsing, but
1046 * the behavior is classified as "will not fix" so we need to
1047 * start nudging drivers away from using that idiom.
1048 */
1049 if ((irqflags & (IRQF_SHARED|IRQF_DISABLED)) ==
1050 (IRQF_SHARED|IRQF_DISABLED)) {
1051 pr_warning(
1052 "IRQ %d/%s: IRQF_DISABLED is not guaranteed on shared IRQs\n",
1053 irq, devname);
1054 }
1055
1056#ifdef CONFIG_LOCKDEP
1057 /*
1058 * Lockdep wants atomic interrupt handlers:
1059 */
1060 irqflags |= IRQF_DISABLED;
1061#endif
1062 /*
1063 * Sanity-check: shared interrupts must pass in a real dev-ID, 1055 * Sanity-check: shared interrupts must pass in a real dev-ID,
1064 * otherwise we'll have trouble later trying to figure out 1056 * otherwise we'll have trouble later trying to figure out
1065 * which interrupt is which (messes up the interrupt freeing 1057 * which interrupt is which (messes up the interrupt freeing
@@ -1120,3 +1112,40 @@ int request_threaded_irq(unsigned int irq, irq_handler_t handler,
1120 return retval; 1112 return retval;
1121} 1113}
1122EXPORT_SYMBOL(request_threaded_irq); 1114EXPORT_SYMBOL(request_threaded_irq);
1115
1116/**
1117 * request_any_context_irq - allocate an interrupt line
1118 * @irq: Interrupt line to allocate
1119 * @handler: Function to be called when the IRQ occurs.
1120 * Threaded handler for threaded interrupts.
1121 * @flags: Interrupt type flags
1122 * @name: An ascii name for the claiming device
1123 * @dev_id: A cookie passed back to the handler function
1124 *
1125 * This call allocates interrupt resources and enables the
1126 * interrupt line and IRQ handling. It selects either a
1127 * hardirq or threaded handling method depending on the
1128 * context.
1129 *
1130 * On failure, it returns a negative value. On success,
1131 * it returns either IRQC_IS_HARDIRQ or IRQC_IS_NESTED.
1132 */
1133int request_any_context_irq(unsigned int irq, irq_handler_t handler,
1134 unsigned long flags, const char *name, void *dev_id)
1135{
1136 struct irq_desc *desc = irq_to_desc(irq);
1137 int ret;
1138
1139 if (!desc)
1140 return -EINVAL;
1141
1142 if (desc->status & IRQ_NESTED_THREAD) {
1143 ret = request_threaded_irq(irq, NULL, handler,
1144 flags, name, dev_id);
1145 return !ret ? IRQC_IS_NESTED : ret;
1146 }
1147
1148 ret = request_irq(irq, handler, flags, name, dev_id);
1149 return !ret ? IRQC_IS_HARDIRQ : ret;
1150}
1151EXPORT_SYMBOL_GPL(request_any_context_irq);
diff --git a/kernel/irq/proc.c b/kernel/irq/proc.c
index 7a6eb04ef6b5..09a2ee540bd2 100644
--- a/kernel/irq/proc.c
+++ b/kernel/irq/proc.c
@@ -32,6 +32,27 @@ static int irq_affinity_proc_show(struct seq_file *m, void *v)
32 return 0; 32 return 0;
33} 33}
34 34
35static int irq_affinity_hint_proc_show(struct seq_file *m, void *v)
36{
37 struct irq_desc *desc = irq_to_desc((long)m->private);
38 unsigned long flags;
39 cpumask_var_t mask;
40
41 if (!zalloc_cpumask_var(&mask, GFP_KERNEL))
42 return -ENOMEM;
43
44 raw_spin_lock_irqsave(&desc->lock, flags);
45 if (desc->affinity_hint)
46 cpumask_copy(mask, desc->affinity_hint);
47 raw_spin_unlock_irqrestore(&desc->lock, flags);
48
49 seq_cpumask(m, mask);
50 seq_putc(m, '\n');
51 free_cpumask_var(mask);
52
53 return 0;
54}
55
35#ifndef is_affinity_mask_valid 56#ifndef is_affinity_mask_valid
36#define is_affinity_mask_valid(val) 1 57#define is_affinity_mask_valid(val) 1
37#endif 58#endif
@@ -84,6 +105,11 @@ static int irq_affinity_proc_open(struct inode *inode, struct file *file)
84 return single_open(file, irq_affinity_proc_show, PDE(inode)->data); 105 return single_open(file, irq_affinity_proc_show, PDE(inode)->data);
85} 106}
86 107
108static int irq_affinity_hint_proc_open(struct inode *inode, struct file *file)
109{
110 return single_open(file, irq_affinity_hint_proc_show, PDE(inode)->data);
111}
112
87static const struct file_operations irq_affinity_proc_fops = { 113static const struct file_operations irq_affinity_proc_fops = {
88 .open = irq_affinity_proc_open, 114 .open = irq_affinity_proc_open,
89 .read = seq_read, 115 .read = seq_read,
@@ -92,6 +118,13 @@ static const struct file_operations irq_affinity_proc_fops = {
92 .write = irq_affinity_proc_write, 118 .write = irq_affinity_proc_write,
93}; 119};
94 120
121static const struct file_operations irq_affinity_hint_proc_fops = {
122 .open = irq_affinity_hint_proc_open,
123 .read = seq_read,
124 .llseek = seq_lseek,
125 .release = single_release,
126};
127
95static int default_affinity_show(struct seq_file *m, void *v) 128static int default_affinity_show(struct seq_file *m, void *v)
96{ 129{
97 seq_cpumask(m, irq_default_affinity); 130 seq_cpumask(m, irq_default_affinity);
@@ -147,6 +180,26 @@ static const struct file_operations default_affinity_proc_fops = {
147 .release = single_release, 180 .release = single_release,
148 .write = default_affinity_write, 181 .write = default_affinity_write,
149}; 182};
183
184static int irq_node_proc_show(struct seq_file *m, void *v)
185{
186 struct irq_desc *desc = irq_to_desc((long) m->private);
187
188 seq_printf(m, "%d\n", desc->node);
189 return 0;
190}
191
192static int irq_node_proc_open(struct inode *inode, struct file *file)
193{
194 return single_open(file, irq_node_proc_show, PDE(inode)->data);
195}
196
197static const struct file_operations irq_node_proc_fops = {
198 .open = irq_node_proc_open,
199 .read = seq_read,
200 .llseek = seq_lseek,
201 .release = single_release,
202};
150#endif 203#endif
151 204
152static int irq_spurious_proc_show(struct seq_file *m, void *v) 205static int irq_spurious_proc_show(struct seq_file *m, void *v)
@@ -231,6 +284,13 @@ void register_irq_proc(unsigned int irq, struct irq_desc *desc)
231 /* create /proc/irq/<irq>/smp_affinity */ 284 /* create /proc/irq/<irq>/smp_affinity */
232 proc_create_data("smp_affinity", 0600, desc->dir, 285 proc_create_data("smp_affinity", 0600, desc->dir,
233 &irq_affinity_proc_fops, (void *)(long)irq); 286 &irq_affinity_proc_fops, (void *)(long)irq);
287
288 /* create /proc/irq/<irq>/affinity_hint */
289 proc_create_data("affinity_hint", 0400, desc->dir,
290 &irq_affinity_hint_proc_fops, (void *)(long)irq);
291
292 proc_create_data("node", 0444, desc->dir,
293 &irq_node_proc_fops, (void *)(long)irq);
234#endif 294#endif
235 295
236 proc_create_data("spurious", 0444, desc->dir, 296 proc_create_data("spurious", 0444, desc->dir,
diff --git a/kernel/kallsyms.c b/kernel/kallsyms.c
index 13aff293f4de..6f6d091b5757 100644
--- a/kernel/kallsyms.c
+++ b/kernel/kallsyms.c
@@ -16,6 +16,7 @@
16#include <linux/init.h> 16#include <linux/init.h>
17#include <linux/seq_file.h> 17#include <linux/seq_file.h>
18#include <linux/fs.h> 18#include <linux/fs.h>
19#include <linux/kdb.h>
19#include <linux/err.h> 20#include <linux/err.h>
20#include <linux/proc_fs.h> 21#include <linux/proc_fs.h>
21#include <linux/sched.h> /* for cond_resched */ 22#include <linux/sched.h> /* for cond_resched */
@@ -516,6 +517,26 @@ static int kallsyms_open(struct inode *inode, struct file *file)
516 return ret; 517 return ret;
517} 518}
518 519
520#ifdef CONFIG_KGDB_KDB
521const char *kdb_walk_kallsyms(loff_t *pos)
522{
523 static struct kallsym_iter kdb_walk_kallsyms_iter;
524 if (*pos == 0) {
525 memset(&kdb_walk_kallsyms_iter, 0,
526 sizeof(kdb_walk_kallsyms_iter));
527 reset_iter(&kdb_walk_kallsyms_iter, 0);
528 }
529 while (1) {
530 if (!update_iter(&kdb_walk_kallsyms_iter, *pos))
531 return NULL;
532 ++*pos;
533 /* Some debugging symbols have no name. Ignore them. */
534 if (kdb_walk_kallsyms_iter.name[0])
535 return kdb_walk_kallsyms_iter.name;
536 }
537}
538#endif /* CONFIG_KGDB_KDB */
539
519static const struct file_operations kallsyms_operations = { 540static const struct file_operations kallsyms_operations = {
520 .open = kallsyms_open, 541 .open = kallsyms_open,
521 .read = seq_read, 542 .read = seq_read,
diff --git a/kernel/kgdb.c b/kernel/kgdb.c
deleted file mode 100644
index 11f3515ca83f..000000000000
--- a/kernel/kgdb.c
+++ /dev/null
@@ -1,1764 +0,0 @@
1/*
2 * KGDB stub.
3 *
4 * Maintainer: Jason Wessel <jason.wessel@windriver.com>
5 *
6 * Copyright (C) 2000-2001 VERITAS Software Corporation.
7 * Copyright (C) 2002-2004 Timesys Corporation
8 * Copyright (C) 2003-2004 Amit S. Kale <amitkale@linsyssoft.com>
9 * Copyright (C) 2004 Pavel Machek <pavel@suse.cz>
10 * Copyright (C) 2004-2006 Tom Rini <trini@kernel.crashing.org>
11 * Copyright (C) 2004-2006 LinSysSoft Technologies Pvt. Ltd.
12 * Copyright (C) 2005-2008 Wind River Systems, Inc.
13 * Copyright (C) 2007 MontaVista Software, Inc.
14 * Copyright (C) 2008 Red Hat, Inc., Ingo Molnar <mingo@redhat.com>
15 *
16 * Contributors at various stages not listed above:
17 * Jason Wessel ( jason.wessel@windriver.com )
18 * George Anzinger <george@mvista.com>
19 * Anurekh Saxena (anurekh.saxena@timesys.com)
20 * Lake Stevens Instrument Division (Glenn Engel)
21 * Jim Kingdon, Cygnus Support.
22 *
23 * Original KGDB stub: David Grothe <dave@gcom.com>,
24 * Tigran Aivazian <tigran@sco.com>
25 *
26 * This file is licensed under the terms of the GNU General Public License
27 * version 2. This program is licensed "as is" without any warranty of any
28 * kind, whether express or implied.
29 */
30#include <linux/pid_namespace.h>
31#include <linux/clocksource.h>
32#include <linux/interrupt.h>
33#include <linux/spinlock.h>
34#include <linux/console.h>
35#include <linux/threads.h>
36#include <linux/uaccess.h>
37#include <linux/kernel.h>
38#include <linux/module.h>
39#include <linux/ptrace.h>
40#include <linux/reboot.h>
41#include <linux/string.h>
42#include <linux/delay.h>
43#include <linux/sched.h>
44#include <linux/sysrq.h>
45#include <linux/init.h>
46#include <linux/kgdb.h>
47#include <linux/pid.h>
48#include <linux/smp.h>
49#include <linux/mm.h>
50
51#include <asm/cacheflush.h>
52#include <asm/byteorder.h>
53#include <asm/atomic.h>
54#include <asm/system.h>
55#include <asm/unaligned.h>
56
57static int kgdb_break_asap;
58
59#define KGDB_MAX_THREAD_QUERY 17
60struct kgdb_state {
61 int ex_vector;
62 int signo;
63 int err_code;
64 int cpu;
65 int pass_exception;
66 unsigned long thr_query;
67 unsigned long threadid;
68 long kgdb_usethreadid;
69 struct pt_regs *linux_regs;
70};
71
72/* Exception state values */
73#define DCPU_WANT_MASTER 0x1 /* Waiting to become a master kgdb cpu */
74#define DCPU_NEXT_MASTER 0x2 /* Transition from one master cpu to another */
75#define DCPU_IS_SLAVE 0x4 /* Slave cpu enter exception */
76#define DCPU_SSTEP 0x8 /* CPU is single stepping */
77
78static struct debuggerinfo_struct {
79 void *debuggerinfo;
80 struct task_struct *task;
81 int exception_state;
82} kgdb_info[NR_CPUS];
83
84/**
85 * kgdb_connected - Is a host GDB connected to us?
86 */
87int kgdb_connected;
88EXPORT_SYMBOL_GPL(kgdb_connected);
89
90/* All the KGDB handlers are installed */
91static int kgdb_io_module_registered;
92
93/* Guard for recursive entry */
94static int exception_level;
95
96static struct kgdb_io *kgdb_io_ops;
97static DEFINE_SPINLOCK(kgdb_registration_lock);
98
99/* kgdb console driver is loaded */
100static int kgdb_con_registered;
101/* determine if kgdb console output should be used */
102static int kgdb_use_con;
103
104static int __init opt_kgdb_con(char *str)
105{
106 kgdb_use_con = 1;
107 return 0;
108}
109
110early_param("kgdbcon", opt_kgdb_con);
111
112module_param(kgdb_use_con, int, 0644);
113
114/*
115 * Holds information about breakpoints in a kernel. These breakpoints are
116 * added and removed by gdb.
117 */
118static struct kgdb_bkpt kgdb_break[KGDB_MAX_BREAKPOINTS] = {
119 [0 ... KGDB_MAX_BREAKPOINTS-1] = { .state = BP_UNDEFINED }
120};
121
122/*
123 * The CPU# of the active CPU, or -1 if none:
124 */
125atomic_t kgdb_active = ATOMIC_INIT(-1);
126
127/*
128 * We use NR_CPUs not PERCPU, in case kgdb is used to debug early
129 * bootup code (which might not have percpu set up yet):
130 */
131static atomic_t passive_cpu_wait[NR_CPUS];
132static atomic_t cpu_in_kgdb[NR_CPUS];
133atomic_t kgdb_setting_breakpoint;
134
135struct task_struct *kgdb_usethread;
136struct task_struct *kgdb_contthread;
137
138int kgdb_single_step;
139pid_t kgdb_sstep_pid;
140
141/* Our I/O buffers. */
142static char remcom_in_buffer[BUFMAX];
143static char remcom_out_buffer[BUFMAX];
144
145/* Storage for the registers, in GDB format. */
146static unsigned long gdb_regs[(NUMREGBYTES +
147 sizeof(unsigned long) - 1) /
148 sizeof(unsigned long)];
149
150/* to keep track of the CPU which is doing the single stepping*/
151atomic_t kgdb_cpu_doing_single_step = ATOMIC_INIT(-1);
152
153/*
154 * If you are debugging a problem where roundup (the collection of
155 * all other CPUs) is a problem [this should be extremely rare],
156 * then use the nokgdbroundup option to avoid roundup. In that case
157 * the other CPUs might interfere with your debugging context, so
158 * use this with care:
159 */
160static int kgdb_do_roundup = 1;
161
162static int __init opt_nokgdbroundup(char *str)
163{
164 kgdb_do_roundup = 0;
165
166 return 0;
167}
168
169early_param("nokgdbroundup", opt_nokgdbroundup);
170
171/*
172 * Finally, some KGDB code :-)
173 */
174
175/*
176 * Weak aliases for breakpoint management,
177 * can be overriden by architectures when needed:
178 */
179int __weak kgdb_arch_set_breakpoint(unsigned long addr, char *saved_instr)
180{
181 int err;
182
183 err = probe_kernel_read(saved_instr, (char *)addr, BREAK_INSTR_SIZE);
184 if (err)
185 return err;
186
187 return probe_kernel_write((char *)addr, arch_kgdb_ops.gdb_bpt_instr,
188 BREAK_INSTR_SIZE);
189}
190
191int __weak kgdb_arch_remove_breakpoint(unsigned long addr, char *bundle)
192{
193 return probe_kernel_write((char *)addr,
194 (char *)bundle, BREAK_INSTR_SIZE);
195}
196
197int __weak kgdb_validate_break_address(unsigned long addr)
198{
199 char tmp_variable[BREAK_INSTR_SIZE];
200 int err;
201 /* Validate setting the breakpoint and then removing it. In the
202 * remove fails, the kernel needs to emit a bad message because we
203 * are deep trouble not being able to put things back the way we
204 * found them.
205 */
206 err = kgdb_arch_set_breakpoint(addr, tmp_variable);
207 if (err)
208 return err;
209 err = kgdb_arch_remove_breakpoint(addr, tmp_variable);
210 if (err)
211 printk(KERN_ERR "KGDB: Critical breakpoint error, kernel "
212 "memory destroyed at: %lx", addr);
213 return err;
214}
215
216unsigned long __weak kgdb_arch_pc(int exception, struct pt_regs *regs)
217{
218 return instruction_pointer(regs);
219}
220
221int __weak kgdb_arch_init(void)
222{
223 return 0;
224}
225
226int __weak kgdb_skipexception(int exception, struct pt_regs *regs)
227{
228 return 0;
229}
230
231void __weak
232kgdb_post_primary_code(struct pt_regs *regs, int e_vector, int err_code)
233{
234 return;
235}
236
237/**
238 * kgdb_disable_hw_debug - Disable hardware debugging while we in kgdb.
239 * @regs: Current &struct pt_regs.
240 *
241 * This function will be called if the particular architecture must
242 * disable hardware debugging while it is processing gdb packets or
243 * handling exception.
244 */
245void __weak kgdb_disable_hw_debug(struct pt_regs *regs)
246{
247}
248
249/*
250 * GDB remote protocol parser:
251 */
252
253static int hex(char ch)
254{
255 if ((ch >= 'a') && (ch <= 'f'))
256 return ch - 'a' + 10;
257 if ((ch >= '0') && (ch <= '9'))
258 return ch - '0';
259 if ((ch >= 'A') && (ch <= 'F'))
260 return ch - 'A' + 10;
261 return -1;
262}
263
264/* scan for the sequence $<data>#<checksum> */
265static void get_packet(char *buffer)
266{
267 unsigned char checksum;
268 unsigned char xmitcsum;
269 int count;
270 char ch;
271
272 do {
273 /*
274 * Spin and wait around for the start character, ignore all
275 * other characters:
276 */
277 while ((ch = (kgdb_io_ops->read_char())) != '$')
278 /* nothing */;
279
280 kgdb_connected = 1;
281 checksum = 0;
282 xmitcsum = -1;
283
284 count = 0;
285
286 /*
287 * now, read until a # or end of buffer is found:
288 */
289 while (count < (BUFMAX - 1)) {
290 ch = kgdb_io_ops->read_char();
291 if (ch == '#')
292 break;
293 checksum = checksum + ch;
294 buffer[count] = ch;
295 count = count + 1;
296 }
297 buffer[count] = 0;
298
299 if (ch == '#') {
300 xmitcsum = hex(kgdb_io_ops->read_char()) << 4;
301 xmitcsum += hex(kgdb_io_ops->read_char());
302
303 if (checksum != xmitcsum)
304 /* failed checksum */
305 kgdb_io_ops->write_char('-');
306 else
307 /* successful transfer */
308 kgdb_io_ops->write_char('+');
309 if (kgdb_io_ops->flush)
310 kgdb_io_ops->flush();
311 }
312 } while (checksum != xmitcsum);
313}
314
315/*
316 * Send the packet in buffer.
317 * Check for gdb connection if asked for.
318 */
319static void put_packet(char *buffer)
320{
321 unsigned char checksum;
322 int count;
323 char ch;
324
325 /*
326 * $<packet info>#<checksum>.
327 */
328 while (1) {
329 kgdb_io_ops->write_char('$');
330 checksum = 0;
331 count = 0;
332
333 while ((ch = buffer[count])) {
334 kgdb_io_ops->write_char(ch);
335 checksum += ch;
336 count++;
337 }
338
339 kgdb_io_ops->write_char('#');
340 kgdb_io_ops->write_char(hex_asc_hi(checksum));
341 kgdb_io_ops->write_char(hex_asc_lo(checksum));
342 if (kgdb_io_ops->flush)
343 kgdb_io_ops->flush();
344
345 /* Now see what we get in reply. */
346 ch = kgdb_io_ops->read_char();
347
348 if (ch == 3)
349 ch = kgdb_io_ops->read_char();
350
351 /* If we get an ACK, we are done. */
352 if (ch == '+')
353 return;
354
355 /*
356 * If we get the start of another packet, this means
357 * that GDB is attempting to reconnect. We will NAK
358 * the packet being sent, and stop trying to send this
359 * packet.
360 */
361 if (ch == '$') {
362 kgdb_io_ops->write_char('-');
363 if (kgdb_io_ops->flush)
364 kgdb_io_ops->flush();
365 return;
366 }
367 }
368}
369
370/*
371 * Convert the memory pointed to by mem into hex, placing result in buf.
372 * Return a pointer to the last char put in buf (null). May return an error.
373 */
374int kgdb_mem2hex(char *mem, char *buf, int count)
375{
376 char *tmp;
377 int err;
378
379 /*
380 * We use the upper half of buf as an intermediate buffer for the
381 * raw memory copy. Hex conversion will work against this one.
382 */
383 tmp = buf + count;
384
385 err = probe_kernel_read(tmp, mem, count);
386 if (!err) {
387 while (count > 0) {
388 buf = pack_hex_byte(buf, *tmp);
389 tmp++;
390 count--;
391 }
392
393 *buf = 0;
394 }
395
396 return err;
397}
398
399/*
400 * Copy the binary array pointed to by buf into mem. Fix $, #, and
401 * 0x7d escaped with 0x7d. Return -EFAULT on failure or 0 on success.
402 * The input buf is overwitten with the result to write to mem.
403 */
404static int kgdb_ebin2mem(char *buf, char *mem, int count)
405{
406 int size = 0;
407 char *c = buf;
408
409 while (count-- > 0) {
410 c[size] = *buf++;
411 if (c[size] == 0x7d)
412 c[size] = *buf++ ^ 0x20;
413 size++;
414 }
415
416 return probe_kernel_write(mem, c, size);
417}
418
419/*
420 * Convert the hex array pointed to by buf into binary to be placed in mem.
421 * Return a pointer to the character AFTER the last byte written.
422 * May return an error.
423 */
424int kgdb_hex2mem(char *buf, char *mem, int count)
425{
426 char *tmp_raw;
427 char *tmp_hex;
428
429 /*
430 * We use the upper half of buf as an intermediate buffer for the
431 * raw memory that is converted from hex.
432 */
433 tmp_raw = buf + count * 2;
434
435 tmp_hex = tmp_raw - 1;
436 while (tmp_hex >= buf) {
437 tmp_raw--;
438 *tmp_raw = hex(*tmp_hex--);
439 *tmp_raw |= hex(*tmp_hex--) << 4;
440 }
441
442 return probe_kernel_write(mem, tmp_raw, count);
443}
444
445/*
446 * While we find nice hex chars, build a long_val.
447 * Return number of chars processed.
448 */
449int kgdb_hex2long(char **ptr, unsigned long *long_val)
450{
451 int hex_val;
452 int num = 0;
453 int negate = 0;
454
455 *long_val = 0;
456
457 if (**ptr == '-') {
458 negate = 1;
459 (*ptr)++;
460 }
461 while (**ptr) {
462 hex_val = hex(**ptr);
463 if (hex_val < 0)
464 break;
465
466 *long_val = (*long_val << 4) | hex_val;
467 num++;
468 (*ptr)++;
469 }
470
471 if (negate)
472 *long_val = -*long_val;
473
474 return num;
475}
476
477/* Write memory due to an 'M' or 'X' packet. */
478static int write_mem_msg(int binary)
479{
480 char *ptr = &remcom_in_buffer[1];
481 unsigned long addr;
482 unsigned long length;
483 int err;
484
485 if (kgdb_hex2long(&ptr, &addr) > 0 && *(ptr++) == ',' &&
486 kgdb_hex2long(&ptr, &length) > 0 && *(ptr++) == ':') {
487 if (binary)
488 err = kgdb_ebin2mem(ptr, (char *)addr, length);
489 else
490 err = kgdb_hex2mem(ptr, (char *)addr, length);
491 if (err)
492 return err;
493 if (CACHE_FLUSH_IS_SAFE)
494 flush_icache_range(addr, addr + length);
495 return 0;
496 }
497
498 return -EINVAL;
499}
500
501static void error_packet(char *pkt, int error)
502{
503 error = -error;
504 pkt[0] = 'E';
505 pkt[1] = hex_asc[(error / 10)];
506 pkt[2] = hex_asc[(error % 10)];
507 pkt[3] = '\0';
508}
509
510/*
511 * Thread ID accessors. We represent a flat TID space to GDB, where
512 * the per CPU idle threads (which under Linux all have PID 0) are
513 * remapped to negative TIDs.
514 */
515
516#define BUF_THREAD_ID_SIZE 16
517
518static char *pack_threadid(char *pkt, unsigned char *id)
519{
520 char *limit;
521
522 limit = pkt + BUF_THREAD_ID_SIZE;
523 while (pkt < limit)
524 pkt = pack_hex_byte(pkt, *id++);
525
526 return pkt;
527}
528
529static void int_to_threadref(unsigned char *id, int value)
530{
531 unsigned char *scan;
532 int i = 4;
533
534 scan = (unsigned char *)id;
535 while (i--)
536 *scan++ = 0;
537 put_unaligned_be32(value, scan);
538}
539
540static struct task_struct *getthread(struct pt_regs *regs, int tid)
541{
542 /*
543 * Non-positive TIDs are remapped to the cpu shadow information
544 */
545 if (tid == 0 || tid == -1)
546 tid = -atomic_read(&kgdb_active) - 2;
547 if (tid < -1 && tid > -NR_CPUS - 2) {
548 if (kgdb_info[-tid - 2].task)
549 return kgdb_info[-tid - 2].task;
550 else
551 return idle_task(-tid - 2);
552 }
553 if (tid <= 0) {
554 printk(KERN_ERR "KGDB: Internal thread select error\n");
555 dump_stack();
556 return NULL;
557 }
558
559 /*
560 * find_task_by_pid_ns() does not take the tasklist lock anymore
561 * but is nicely RCU locked - hence is a pretty resilient
562 * thing to use:
563 */
564 return find_task_by_pid_ns(tid, &init_pid_ns);
565}
566
567/*
568 * Some architectures need cache flushes when we set/clear a
569 * breakpoint:
570 */
571static void kgdb_flush_swbreak_addr(unsigned long addr)
572{
573 if (!CACHE_FLUSH_IS_SAFE)
574 return;
575
576 if (current->mm && current->mm->mmap_cache) {
577 flush_cache_range(current->mm->mmap_cache,
578 addr, addr + BREAK_INSTR_SIZE);
579 }
580 /* Force flush instruction cache if it was outside the mm */
581 flush_icache_range(addr, addr + BREAK_INSTR_SIZE);
582}
583
584/*
585 * SW breakpoint management:
586 */
587static int kgdb_activate_sw_breakpoints(void)
588{
589 unsigned long addr;
590 int error;
591 int ret = 0;
592 int i;
593
594 for (i = 0; i < KGDB_MAX_BREAKPOINTS; i++) {
595 if (kgdb_break[i].state != BP_SET)
596 continue;
597
598 addr = kgdb_break[i].bpt_addr;
599 error = kgdb_arch_set_breakpoint(addr,
600 kgdb_break[i].saved_instr);
601 if (error) {
602 ret = error;
603 printk(KERN_INFO "KGDB: BP install failed: %lx", addr);
604 continue;
605 }
606
607 kgdb_flush_swbreak_addr(addr);
608 kgdb_break[i].state = BP_ACTIVE;
609 }
610 return ret;
611}
612
613static int kgdb_set_sw_break(unsigned long addr)
614{
615 int err = kgdb_validate_break_address(addr);
616 int breakno = -1;
617 int i;
618
619 if (err)
620 return err;
621
622 for (i = 0; i < KGDB_MAX_BREAKPOINTS; i++) {
623 if ((kgdb_break[i].state == BP_SET) &&
624 (kgdb_break[i].bpt_addr == addr))
625 return -EEXIST;
626 }
627 for (i = 0; i < KGDB_MAX_BREAKPOINTS; i++) {
628 if (kgdb_break[i].state == BP_REMOVED &&
629 kgdb_break[i].bpt_addr == addr) {
630 breakno = i;
631 break;
632 }
633 }
634
635 if (breakno == -1) {
636 for (i = 0; i < KGDB_MAX_BREAKPOINTS; i++) {
637 if (kgdb_break[i].state == BP_UNDEFINED) {
638 breakno = i;
639 break;
640 }
641 }
642 }
643
644 if (breakno == -1)
645 return -E2BIG;
646
647 kgdb_break[breakno].state = BP_SET;
648 kgdb_break[breakno].type = BP_BREAKPOINT;
649 kgdb_break[breakno].bpt_addr = addr;
650
651 return 0;
652}
653
654static int kgdb_deactivate_sw_breakpoints(void)
655{
656 unsigned long addr;
657 int error;
658 int ret = 0;
659 int i;
660
661 for (i = 0; i < KGDB_MAX_BREAKPOINTS; i++) {
662 if (kgdb_break[i].state != BP_ACTIVE)
663 continue;
664 addr = kgdb_break[i].bpt_addr;
665 error = kgdb_arch_remove_breakpoint(addr,
666 kgdb_break[i].saved_instr);
667 if (error) {
668 printk(KERN_INFO "KGDB: BP remove failed: %lx\n", addr);
669 ret = error;
670 }
671
672 kgdb_flush_swbreak_addr(addr);
673 kgdb_break[i].state = BP_SET;
674 }
675 return ret;
676}
677
678static int kgdb_remove_sw_break(unsigned long addr)
679{
680 int i;
681
682 for (i = 0; i < KGDB_MAX_BREAKPOINTS; i++) {
683 if ((kgdb_break[i].state == BP_SET) &&
684 (kgdb_break[i].bpt_addr == addr)) {
685 kgdb_break[i].state = BP_REMOVED;
686 return 0;
687 }
688 }
689 return -ENOENT;
690}
691
692int kgdb_isremovedbreak(unsigned long addr)
693{
694 int i;
695
696 for (i = 0; i < KGDB_MAX_BREAKPOINTS; i++) {
697 if ((kgdb_break[i].state == BP_REMOVED) &&
698 (kgdb_break[i].bpt_addr == addr))
699 return 1;
700 }
701 return 0;
702}
703
704static int remove_all_break(void)
705{
706 unsigned long addr;
707 int error;
708 int i;
709
710 /* Clear memory breakpoints. */
711 for (i = 0; i < KGDB_MAX_BREAKPOINTS; i++) {
712 if (kgdb_break[i].state != BP_ACTIVE)
713 goto setundefined;
714 addr = kgdb_break[i].bpt_addr;
715 error = kgdb_arch_remove_breakpoint(addr,
716 kgdb_break[i].saved_instr);
717 if (error)
718 printk(KERN_ERR "KGDB: breakpoint remove failed: %lx\n",
719 addr);
720setundefined:
721 kgdb_break[i].state = BP_UNDEFINED;
722 }
723
724 /* Clear hardware breakpoints. */
725 if (arch_kgdb_ops.remove_all_hw_break)
726 arch_kgdb_ops.remove_all_hw_break();
727
728 return 0;
729}
730
731/*
732 * Remap normal tasks to their real PID,
733 * CPU shadow threads are mapped to -CPU - 2
734 */
735static inline int shadow_pid(int realpid)
736{
737 if (realpid)
738 return realpid;
739
740 return -raw_smp_processor_id() - 2;
741}
742
743static char gdbmsgbuf[BUFMAX + 1];
744
745static void kgdb_msg_write(const char *s, int len)
746{
747 char *bufptr;
748 int wcount;
749 int i;
750
751 /* 'O'utput */
752 gdbmsgbuf[0] = 'O';
753
754 /* Fill and send buffers... */
755 while (len > 0) {
756 bufptr = gdbmsgbuf + 1;
757
758 /* Calculate how many this time */
759 if ((len << 1) > (BUFMAX - 2))
760 wcount = (BUFMAX - 2) >> 1;
761 else
762 wcount = len;
763
764 /* Pack in hex chars */
765 for (i = 0; i < wcount; i++)
766 bufptr = pack_hex_byte(bufptr, s[i]);
767 *bufptr = '\0';
768
769 /* Move up */
770 s += wcount;
771 len -= wcount;
772
773 /* Write packet */
774 put_packet(gdbmsgbuf);
775 }
776}
777
778/*
779 * Return true if there is a valid kgdb I/O module. Also if no
780 * debugger is attached a message can be printed to the console about
781 * waiting for the debugger to attach.
782 *
783 * The print_wait argument is only to be true when called from inside
784 * the core kgdb_handle_exception, because it will wait for the
785 * debugger to attach.
786 */
787static int kgdb_io_ready(int print_wait)
788{
789 if (!kgdb_io_ops)
790 return 0;
791 if (kgdb_connected)
792 return 1;
793 if (atomic_read(&kgdb_setting_breakpoint))
794 return 1;
795 if (print_wait)
796 printk(KERN_CRIT "KGDB: Waiting for remote debugger\n");
797 return 1;
798}
799
800/*
801 * All the functions that start with gdb_cmd are the various
802 * operations to implement the handlers for the gdbserial protocol
803 * where KGDB is communicating with an external debugger
804 */
805
806/* Handle the '?' status packets */
807static void gdb_cmd_status(struct kgdb_state *ks)
808{
809 /*
810 * We know that this packet is only sent
811 * during initial connect. So to be safe,
812 * we clear out our breakpoints now in case
813 * GDB is reconnecting.
814 */
815 remove_all_break();
816
817 remcom_out_buffer[0] = 'S';
818 pack_hex_byte(&remcom_out_buffer[1], ks->signo);
819}
820
821/* Handle the 'g' get registers request */
822static void gdb_cmd_getregs(struct kgdb_state *ks)
823{
824 struct task_struct *thread;
825 void *local_debuggerinfo;
826 int i;
827
828 thread = kgdb_usethread;
829 if (!thread) {
830 thread = kgdb_info[ks->cpu].task;
831 local_debuggerinfo = kgdb_info[ks->cpu].debuggerinfo;
832 } else {
833 local_debuggerinfo = NULL;
834 for_each_online_cpu(i) {
835 /*
836 * Try to find the task on some other
837 * or possibly this node if we do not
838 * find the matching task then we try
839 * to approximate the results.
840 */
841 if (thread == kgdb_info[i].task)
842 local_debuggerinfo = kgdb_info[i].debuggerinfo;
843 }
844 }
845
846 /*
847 * All threads that don't have debuggerinfo should be
848 * in schedule() sleeping, since all other CPUs
849 * are in kgdb_wait, and thus have debuggerinfo.
850 */
851 if (local_debuggerinfo) {
852 pt_regs_to_gdb_regs(gdb_regs, local_debuggerinfo);
853 } else {
854 /*
855 * Pull stuff saved during switch_to; nothing
856 * else is accessible (or even particularly
857 * relevant).
858 *
859 * This should be enough for a stack trace.
860 */
861 sleeping_thread_to_gdb_regs(gdb_regs, thread);
862 }
863 kgdb_mem2hex((char *)gdb_regs, remcom_out_buffer, NUMREGBYTES);
864}
865
866/* Handle the 'G' set registers request */
867static void gdb_cmd_setregs(struct kgdb_state *ks)
868{
869 kgdb_hex2mem(&remcom_in_buffer[1], (char *)gdb_regs, NUMREGBYTES);
870
871 if (kgdb_usethread && kgdb_usethread != current) {
872 error_packet(remcom_out_buffer, -EINVAL);
873 } else {
874 gdb_regs_to_pt_regs(gdb_regs, ks->linux_regs);
875 strcpy(remcom_out_buffer, "OK");
876 }
877}
878
879/* Handle the 'm' memory read bytes */
880static void gdb_cmd_memread(struct kgdb_state *ks)
881{
882 char *ptr = &remcom_in_buffer[1];
883 unsigned long length;
884 unsigned long addr;
885 int err;
886
887 if (kgdb_hex2long(&ptr, &addr) > 0 && *ptr++ == ',' &&
888 kgdb_hex2long(&ptr, &length) > 0) {
889 err = kgdb_mem2hex((char *)addr, remcom_out_buffer, length);
890 if (err)
891 error_packet(remcom_out_buffer, err);
892 } else {
893 error_packet(remcom_out_buffer, -EINVAL);
894 }
895}
896
897/* Handle the 'M' memory write bytes */
898static void gdb_cmd_memwrite(struct kgdb_state *ks)
899{
900 int err = write_mem_msg(0);
901
902 if (err)
903 error_packet(remcom_out_buffer, err);
904 else
905 strcpy(remcom_out_buffer, "OK");
906}
907
908/* Handle the 'X' memory binary write bytes */
909static void gdb_cmd_binwrite(struct kgdb_state *ks)
910{
911 int err = write_mem_msg(1);
912
913 if (err)
914 error_packet(remcom_out_buffer, err);
915 else
916 strcpy(remcom_out_buffer, "OK");
917}
918
919/* Handle the 'D' or 'k', detach or kill packets */
920static void gdb_cmd_detachkill(struct kgdb_state *ks)
921{
922 int error;
923
924 /* The detach case */
925 if (remcom_in_buffer[0] == 'D') {
926 error = remove_all_break();
927 if (error < 0) {
928 error_packet(remcom_out_buffer, error);
929 } else {
930 strcpy(remcom_out_buffer, "OK");
931 kgdb_connected = 0;
932 }
933 put_packet(remcom_out_buffer);
934 } else {
935 /*
936 * Assume the kill case, with no exit code checking,
937 * trying to force detach the debugger:
938 */
939 remove_all_break();
940 kgdb_connected = 0;
941 }
942}
943
944/* Handle the 'R' reboot packets */
945static int gdb_cmd_reboot(struct kgdb_state *ks)
946{
947 /* For now, only honor R0 */
948 if (strcmp(remcom_in_buffer, "R0") == 0) {
949 printk(KERN_CRIT "Executing emergency reboot\n");
950 strcpy(remcom_out_buffer, "OK");
951 put_packet(remcom_out_buffer);
952
953 /*
954 * Execution should not return from
955 * machine_emergency_restart()
956 */
957 machine_emergency_restart();
958 kgdb_connected = 0;
959
960 return 1;
961 }
962 return 0;
963}
964
965/* Handle the 'q' query packets */
966static void gdb_cmd_query(struct kgdb_state *ks)
967{
968 struct task_struct *g;
969 struct task_struct *p;
970 unsigned char thref[8];
971 char *ptr;
972 int i;
973 int cpu;
974 int finished = 0;
975
976 switch (remcom_in_buffer[1]) {
977 case 's':
978 case 'f':
979 if (memcmp(remcom_in_buffer + 2, "ThreadInfo", 10)) {
980 error_packet(remcom_out_buffer, -EINVAL);
981 break;
982 }
983
984 i = 0;
985 remcom_out_buffer[0] = 'm';
986 ptr = remcom_out_buffer + 1;
987 if (remcom_in_buffer[1] == 'f') {
988 /* Each cpu is a shadow thread */
989 for_each_online_cpu(cpu) {
990 ks->thr_query = 0;
991 int_to_threadref(thref, -cpu - 2);
992 pack_threadid(ptr, thref);
993 ptr += BUF_THREAD_ID_SIZE;
994 *(ptr++) = ',';
995 i++;
996 }
997 }
998
999 do_each_thread(g, p) {
1000 if (i >= ks->thr_query && !finished) {
1001 int_to_threadref(thref, p->pid);
1002 pack_threadid(ptr, thref);
1003 ptr += BUF_THREAD_ID_SIZE;
1004 *(ptr++) = ',';
1005 ks->thr_query++;
1006 if (ks->thr_query % KGDB_MAX_THREAD_QUERY == 0)
1007 finished = 1;
1008 }
1009 i++;
1010 } while_each_thread(g, p);
1011
1012 *(--ptr) = '\0';
1013 break;
1014
1015 case 'C':
1016 /* Current thread id */
1017 strcpy(remcom_out_buffer, "QC");
1018 ks->threadid = shadow_pid(current->pid);
1019 int_to_threadref(thref, ks->threadid);
1020 pack_threadid(remcom_out_buffer + 2, thref);
1021 break;
1022 case 'T':
1023 if (memcmp(remcom_in_buffer + 1, "ThreadExtraInfo,", 16)) {
1024 error_packet(remcom_out_buffer, -EINVAL);
1025 break;
1026 }
1027 ks->threadid = 0;
1028 ptr = remcom_in_buffer + 17;
1029 kgdb_hex2long(&ptr, &ks->threadid);
1030 if (!getthread(ks->linux_regs, ks->threadid)) {
1031 error_packet(remcom_out_buffer, -EINVAL);
1032 break;
1033 }
1034 if ((int)ks->threadid > 0) {
1035 kgdb_mem2hex(getthread(ks->linux_regs,
1036 ks->threadid)->comm,
1037 remcom_out_buffer, 16);
1038 } else {
1039 static char tmpstr[23 + BUF_THREAD_ID_SIZE];
1040
1041 sprintf(tmpstr, "shadowCPU%d",
1042 (int)(-ks->threadid - 2));
1043 kgdb_mem2hex(tmpstr, remcom_out_buffer, strlen(tmpstr));
1044 }
1045 break;
1046 }
1047}
1048
1049/* Handle the 'H' task query packets */
1050static void gdb_cmd_task(struct kgdb_state *ks)
1051{
1052 struct task_struct *thread;
1053 char *ptr;
1054
1055 switch (remcom_in_buffer[1]) {
1056 case 'g':
1057 ptr = &remcom_in_buffer[2];
1058 kgdb_hex2long(&ptr, &ks->threadid);
1059 thread = getthread(ks->linux_regs, ks->threadid);
1060 if (!thread && ks->threadid > 0) {
1061 error_packet(remcom_out_buffer, -EINVAL);
1062 break;
1063 }
1064 kgdb_usethread = thread;
1065 ks->kgdb_usethreadid = ks->threadid;
1066 strcpy(remcom_out_buffer, "OK");
1067 break;
1068 case 'c':
1069 ptr = &remcom_in_buffer[2];
1070 kgdb_hex2long(&ptr, &ks->threadid);
1071 if (!ks->threadid) {
1072 kgdb_contthread = NULL;
1073 } else {
1074 thread = getthread(ks->linux_regs, ks->threadid);
1075 if (!thread && ks->threadid > 0) {
1076 error_packet(remcom_out_buffer, -EINVAL);
1077 break;
1078 }
1079 kgdb_contthread = thread;
1080 }
1081 strcpy(remcom_out_buffer, "OK");
1082 break;
1083 }
1084}
1085
1086/* Handle the 'T' thread query packets */
1087static void gdb_cmd_thread(struct kgdb_state *ks)
1088{
1089 char *ptr = &remcom_in_buffer[1];
1090 struct task_struct *thread;
1091
1092 kgdb_hex2long(&ptr, &ks->threadid);
1093 thread = getthread(ks->linux_regs, ks->threadid);
1094 if (thread)
1095 strcpy(remcom_out_buffer, "OK");
1096 else
1097 error_packet(remcom_out_buffer, -EINVAL);
1098}
1099
1100/* Handle the 'z' or 'Z' breakpoint remove or set packets */
1101static void gdb_cmd_break(struct kgdb_state *ks)
1102{
1103 /*
1104 * Since GDB-5.3, it's been drafted that '0' is a software
1105 * breakpoint, '1' is a hardware breakpoint, so let's do that.
1106 */
1107 char *bpt_type = &remcom_in_buffer[1];
1108 char *ptr = &remcom_in_buffer[2];
1109 unsigned long addr;
1110 unsigned long length;
1111 int error = 0;
1112
1113 if (arch_kgdb_ops.set_hw_breakpoint && *bpt_type >= '1') {
1114 /* Unsupported */
1115 if (*bpt_type > '4')
1116 return;
1117 } else {
1118 if (*bpt_type != '0' && *bpt_type != '1')
1119 /* Unsupported. */
1120 return;
1121 }
1122
1123 /*
1124 * Test if this is a hardware breakpoint, and
1125 * if we support it:
1126 */
1127 if (*bpt_type == '1' && !(arch_kgdb_ops.flags & KGDB_HW_BREAKPOINT))
1128 /* Unsupported. */
1129 return;
1130
1131 if (*(ptr++) != ',') {
1132 error_packet(remcom_out_buffer, -EINVAL);
1133 return;
1134 }
1135 if (!kgdb_hex2long(&ptr, &addr)) {
1136 error_packet(remcom_out_buffer, -EINVAL);
1137 return;
1138 }
1139 if (*(ptr++) != ',' ||
1140 !kgdb_hex2long(&ptr, &length)) {
1141 error_packet(remcom_out_buffer, -EINVAL);
1142 return;
1143 }
1144
1145 if (remcom_in_buffer[0] == 'Z' && *bpt_type == '0')
1146 error = kgdb_set_sw_break(addr);
1147 else if (remcom_in_buffer[0] == 'z' && *bpt_type == '0')
1148 error = kgdb_remove_sw_break(addr);
1149 else if (remcom_in_buffer[0] == 'Z')
1150 error = arch_kgdb_ops.set_hw_breakpoint(addr,
1151 (int)length, *bpt_type - '0');
1152 else if (remcom_in_buffer[0] == 'z')
1153 error = arch_kgdb_ops.remove_hw_breakpoint(addr,
1154 (int) length, *bpt_type - '0');
1155
1156 if (error == 0)
1157 strcpy(remcom_out_buffer, "OK");
1158 else
1159 error_packet(remcom_out_buffer, error);
1160}
1161
1162/* Handle the 'C' signal / exception passing packets */
1163static int gdb_cmd_exception_pass(struct kgdb_state *ks)
1164{
1165 /* C09 == pass exception
1166 * C15 == detach kgdb, pass exception
1167 */
1168 if (remcom_in_buffer[1] == '0' && remcom_in_buffer[2] == '9') {
1169
1170 ks->pass_exception = 1;
1171 remcom_in_buffer[0] = 'c';
1172
1173 } else if (remcom_in_buffer[1] == '1' && remcom_in_buffer[2] == '5') {
1174
1175 ks->pass_exception = 1;
1176 remcom_in_buffer[0] = 'D';
1177 remove_all_break();
1178 kgdb_connected = 0;
1179 return 1;
1180
1181 } else {
1182 kgdb_msg_write("KGDB only knows signal 9 (pass)"
1183 " and 15 (pass and disconnect)\n"
1184 "Executing a continue without signal passing\n", 0);
1185 remcom_in_buffer[0] = 'c';
1186 }
1187
1188 /* Indicate fall through */
1189 return -1;
1190}
1191
1192/*
1193 * This function performs all gdbserial command procesing
1194 */
1195static int gdb_serial_stub(struct kgdb_state *ks)
1196{
1197 int error = 0;
1198 int tmp;
1199
1200 /* Clear the out buffer. */
1201 memset(remcom_out_buffer, 0, sizeof(remcom_out_buffer));
1202
1203 if (kgdb_connected) {
1204 unsigned char thref[8];
1205 char *ptr;
1206
1207 /* Reply to host that an exception has occurred */
1208 ptr = remcom_out_buffer;
1209 *ptr++ = 'T';
1210 ptr = pack_hex_byte(ptr, ks->signo);
1211 ptr += strlen(strcpy(ptr, "thread:"));
1212 int_to_threadref(thref, shadow_pid(current->pid));
1213 ptr = pack_threadid(ptr, thref);
1214 *ptr++ = ';';
1215 put_packet(remcom_out_buffer);
1216 }
1217
1218 kgdb_usethread = kgdb_info[ks->cpu].task;
1219 ks->kgdb_usethreadid = shadow_pid(kgdb_info[ks->cpu].task->pid);
1220 ks->pass_exception = 0;
1221
1222 while (1) {
1223 error = 0;
1224
1225 /* Clear the out buffer. */
1226 memset(remcom_out_buffer, 0, sizeof(remcom_out_buffer));
1227
1228 get_packet(remcom_in_buffer);
1229
1230 switch (remcom_in_buffer[0]) {
1231 case '?': /* gdbserial status */
1232 gdb_cmd_status(ks);
1233 break;
1234 case 'g': /* return the value of the CPU registers */
1235 gdb_cmd_getregs(ks);
1236 break;
1237 case 'G': /* set the value of the CPU registers - return OK */
1238 gdb_cmd_setregs(ks);
1239 break;
1240 case 'm': /* mAA..AA,LLLL Read LLLL bytes at address AA..AA */
1241 gdb_cmd_memread(ks);
1242 break;
1243 case 'M': /* MAA..AA,LLLL: Write LLLL bytes at address AA..AA */
1244 gdb_cmd_memwrite(ks);
1245 break;
1246 case 'X': /* XAA..AA,LLLL: Write LLLL bytes at address AA..AA */
1247 gdb_cmd_binwrite(ks);
1248 break;
1249 /* kill or detach. KGDB should treat this like a
1250 * continue.
1251 */
1252 case 'D': /* Debugger detach */
1253 case 'k': /* Debugger detach via kill */
1254 gdb_cmd_detachkill(ks);
1255 goto default_handle;
1256 case 'R': /* Reboot */
1257 if (gdb_cmd_reboot(ks))
1258 goto default_handle;
1259 break;
1260 case 'q': /* query command */
1261 gdb_cmd_query(ks);
1262 break;
1263 case 'H': /* task related */
1264 gdb_cmd_task(ks);
1265 break;
1266 case 'T': /* Query thread status */
1267 gdb_cmd_thread(ks);
1268 break;
1269 case 'z': /* Break point remove */
1270 case 'Z': /* Break point set */
1271 gdb_cmd_break(ks);
1272 break;
1273 case 'C': /* Exception passing */
1274 tmp = gdb_cmd_exception_pass(ks);
1275 if (tmp > 0)
1276 goto default_handle;
1277 if (tmp == 0)
1278 break;
1279 /* Fall through on tmp < 0 */
1280 case 'c': /* Continue packet */
1281 case 's': /* Single step packet */
1282 if (kgdb_contthread && kgdb_contthread != current) {
1283 /* Can't switch threads in kgdb */
1284 error_packet(remcom_out_buffer, -EINVAL);
1285 break;
1286 }
1287 kgdb_activate_sw_breakpoints();
1288 /* Fall through to default processing */
1289 default:
1290default_handle:
1291 error = kgdb_arch_handle_exception(ks->ex_vector,
1292 ks->signo,
1293 ks->err_code,
1294 remcom_in_buffer,
1295 remcom_out_buffer,
1296 ks->linux_regs);
1297 /*
1298 * Leave cmd processing on error, detach,
1299 * kill, continue, or single step.
1300 */
1301 if (error >= 0 || remcom_in_buffer[0] == 'D' ||
1302 remcom_in_buffer[0] == 'k') {
1303 error = 0;
1304 goto kgdb_exit;
1305 }
1306
1307 }
1308
1309 /* reply to the request */
1310 put_packet(remcom_out_buffer);
1311 }
1312
1313kgdb_exit:
1314 if (ks->pass_exception)
1315 error = 1;
1316 return error;
1317}
1318
1319static int kgdb_reenter_check(struct kgdb_state *ks)
1320{
1321 unsigned long addr;
1322
1323 if (atomic_read(&kgdb_active) != raw_smp_processor_id())
1324 return 0;
1325
1326 /* Panic on recursive debugger calls: */
1327 exception_level++;
1328 addr = kgdb_arch_pc(ks->ex_vector, ks->linux_regs);
1329 kgdb_deactivate_sw_breakpoints();
1330
1331 /*
1332 * If the break point removed ok at the place exception
1333 * occurred, try to recover and print a warning to the end
1334 * user because the user planted a breakpoint in a place that
1335 * KGDB needs in order to function.
1336 */
1337 if (kgdb_remove_sw_break(addr) == 0) {
1338 exception_level = 0;
1339 kgdb_skipexception(ks->ex_vector, ks->linux_regs);
1340 kgdb_activate_sw_breakpoints();
1341 printk(KERN_CRIT "KGDB: re-enter error: breakpoint removed %lx\n",
1342 addr);
1343 WARN_ON_ONCE(1);
1344
1345 return 1;
1346 }
1347 remove_all_break();
1348 kgdb_skipexception(ks->ex_vector, ks->linux_regs);
1349
1350 if (exception_level > 1) {
1351 dump_stack();
1352 panic("Recursive entry to debugger");
1353 }
1354
1355 printk(KERN_CRIT "KGDB: re-enter exception: ALL breakpoints killed\n");
1356 dump_stack();
1357 panic("Recursive entry to debugger");
1358
1359 return 1;
1360}
1361
1362static int kgdb_cpu_enter(struct kgdb_state *ks, struct pt_regs *regs)
1363{
1364 unsigned long flags;
1365 int sstep_tries = 100;
1366 int error = 0;
1367 int i, cpu;
1368 int trace_on = 0;
1369acquirelock:
1370 /*
1371 * Interrupts will be restored by the 'trap return' code, except when
1372 * single stepping.
1373 */
1374 local_irq_save(flags);
1375
1376 cpu = ks->cpu;
1377 kgdb_info[cpu].debuggerinfo = regs;
1378 kgdb_info[cpu].task = current;
1379 /*
1380 * Make sure the above info reaches the primary CPU before
1381 * our cpu_in_kgdb[] flag setting does:
1382 */
1383 atomic_inc(&cpu_in_kgdb[cpu]);
1384
1385 /*
1386 * CPU will loop if it is a slave or request to become a kgdb
1387 * master cpu and acquire the kgdb_active lock:
1388 */
1389 while (1) {
1390 if (kgdb_info[cpu].exception_state & DCPU_WANT_MASTER) {
1391 if (atomic_cmpxchg(&kgdb_active, -1, cpu) == cpu)
1392 break;
1393 } else if (kgdb_info[cpu].exception_state & DCPU_IS_SLAVE) {
1394 if (!atomic_read(&passive_cpu_wait[cpu]))
1395 goto return_normal;
1396 } else {
1397return_normal:
1398 /* Return to normal operation by executing any
1399 * hw breakpoint fixup.
1400 */
1401 if (arch_kgdb_ops.correct_hw_break)
1402 arch_kgdb_ops.correct_hw_break();
1403 if (trace_on)
1404 tracing_on();
1405 atomic_dec(&cpu_in_kgdb[cpu]);
1406 touch_softlockup_watchdog_sync();
1407 clocksource_touch_watchdog();
1408 local_irq_restore(flags);
1409 return 0;
1410 }
1411 cpu_relax();
1412 }
1413
1414 /*
1415 * For single stepping, try to only enter on the processor
1416 * that was single stepping. To gaurd against a deadlock, the
1417 * kernel will only try for the value of sstep_tries before
1418 * giving up and continuing on.
1419 */
1420 if (atomic_read(&kgdb_cpu_doing_single_step) != -1 &&
1421 (kgdb_info[cpu].task &&
1422 kgdb_info[cpu].task->pid != kgdb_sstep_pid) && --sstep_tries) {
1423 atomic_set(&kgdb_active, -1);
1424 touch_softlockup_watchdog_sync();
1425 clocksource_touch_watchdog();
1426 local_irq_restore(flags);
1427
1428 goto acquirelock;
1429 }
1430
1431 if (!kgdb_io_ready(1)) {
1432 error = 1;
1433 goto kgdb_restore; /* No I/O connection, so resume the system */
1434 }
1435
1436 /*
1437 * Don't enter if we have hit a removed breakpoint.
1438 */
1439 if (kgdb_skipexception(ks->ex_vector, ks->linux_regs))
1440 goto kgdb_restore;
1441
1442 /* Call the I/O driver's pre_exception routine */
1443 if (kgdb_io_ops->pre_exception)
1444 kgdb_io_ops->pre_exception();
1445
1446 kgdb_disable_hw_debug(ks->linux_regs);
1447
1448 /*
1449 * Get the passive CPU lock which will hold all the non-primary
1450 * CPU in a spin state while the debugger is active
1451 */
1452 if (!kgdb_single_step) {
1453 for (i = 0; i < NR_CPUS; i++)
1454 atomic_inc(&passive_cpu_wait[i]);
1455 }
1456
1457#ifdef CONFIG_SMP
1458 /* Signal the other CPUs to enter kgdb_wait() */
1459 if ((!kgdb_single_step) && kgdb_do_roundup)
1460 kgdb_roundup_cpus(flags);
1461#endif
1462
1463 /*
1464 * Wait for the other CPUs to be notified and be waiting for us:
1465 */
1466 for_each_online_cpu(i) {
1467 while (!atomic_read(&cpu_in_kgdb[i]))
1468 cpu_relax();
1469 }
1470
1471 /*
1472 * At this point the primary processor is completely
1473 * in the debugger and all secondary CPUs are quiescent
1474 */
1475 kgdb_post_primary_code(ks->linux_regs, ks->ex_vector, ks->err_code);
1476 kgdb_deactivate_sw_breakpoints();
1477 kgdb_single_step = 0;
1478 kgdb_contthread = current;
1479 exception_level = 0;
1480 trace_on = tracing_is_on();
1481 if (trace_on)
1482 tracing_off();
1483
1484 /* Talk to debugger with gdbserial protocol */
1485 error = gdb_serial_stub(ks);
1486
1487 /* Call the I/O driver's post_exception routine */
1488 if (kgdb_io_ops->post_exception)
1489 kgdb_io_ops->post_exception();
1490
1491 atomic_dec(&cpu_in_kgdb[ks->cpu]);
1492
1493 if (!kgdb_single_step) {
1494 for (i = NR_CPUS-1; i >= 0; i--)
1495 atomic_dec(&passive_cpu_wait[i]);
1496 /*
1497 * Wait till all the CPUs have quit
1498 * from the debugger.
1499 */
1500 for_each_online_cpu(i) {
1501 while (atomic_read(&cpu_in_kgdb[i]))
1502 cpu_relax();
1503 }
1504 }
1505
1506kgdb_restore:
1507 if (atomic_read(&kgdb_cpu_doing_single_step) != -1) {
1508 int sstep_cpu = atomic_read(&kgdb_cpu_doing_single_step);
1509 if (kgdb_info[sstep_cpu].task)
1510 kgdb_sstep_pid = kgdb_info[sstep_cpu].task->pid;
1511 else
1512 kgdb_sstep_pid = 0;
1513 }
1514 if (trace_on)
1515 tracing_on();
1516 /* Free kgdb_active */
1517 atomic_set(&kgdb_active, -1);
1518 touch_softlockup_watchdog_sync();
1519 clocksource_touch_watchdog();
1520 local_irq_restore(flags);
1521
1522 return error;
1523}
1524
1525/*
1526 * kgdb_handle_exception() - main entry point from a kernel exception
1527 *
1528 * Locking hierarchy:
1529 * interface locks, if any (begin_session)
1530 * kgdb lock (kgdb_active)
1531 */
1532int
1533kgdb_handle_exception(int evector, int signo, int ecode, struct pt_regs *regs)
1534{
1535 struct kgdb_state kgdb_var;
1536 struct kgdb_state *ks = &kgdb_var;
1537 int ret;
1538
1539 ks->cpu = raw_smp_processor_id();
1540 ks->ex_vector = evector;
1541 ks->signo = signo;
1542 ks->ex_vector = evector;
1543 ks->err_code = ecode;
1544 ks->kgdb_usethreadid = 0;
1545 ks->linux_regs = regs;
1546
1547 if (kgdb_reenter_check(ks))
1548 return 0; /* Ouch, double exception ! */
1549 kgdb_info[ks->cpu].exception_state |= DCPU_WANT_MASTER;
1550 ret = kgdb_cpu_enter(ks, regs);
1551 kgdb_info[ks->cpu].exception_state &= ~DCPU_WANT_MASTER;
1552 return ret;
1553}
1554
1555int kgdb_nmicallback(int cpu, void *regs)
1556{
1557#ifdef CONFIG_SMP
1558 struct kgdb_state kgdb_var;
1559 struct kgdb_state *ks = &kgdb_var;
1560
1561 memset(ks, 0, sizeof(struct kgdb_state));
1562 ks->cpu = cpu;
1563 ks->linux_regs = regs;
1564
1565 if (!atomic_read(&cpu_in_kgdb[cpu]) &&
1566 atomic_read(&kgdb_active) != -1 &&
1567 atomic_read(&kgdb_active) != cpu) {
1568 kgdb_info[cpu].exception_state |= DCPU_IS_SLAVE;
1569 kgdb_cpu_enter(ks, regs);
1570 kgdb_info[cpu].exception_state &= ~DCPU_IS_SLAVE;
1571 return 0;
1572 }
1573#endif
1574 return 1;
1575}
1576
1577static void kgdb_console_write(struct console *co, const char *s,
1578 unsigned count)
1579{
1580 unsigned long flags;
1581
1582 /* If we're debugging, or KGDB has not connected, don't try
1583 * and print. */
1584 if (!kgdb_connected || atomic_read(&kgdb_active) != -1)
1585 return;
1586
1587 local_irq_save(flags);
1588 kgdb_msg_write(s, count);
1589 local_irq_restore(flags);
1590}
1591
1592static struct console kgdbcons = {
1593 .name = "kgdb",
1594 .write = kgdb_console_write,
1595 .flags = CON_PRINTBUFFER | CON_ENABLED,
1596 .index = -1,
1597};
1598
1599#ifdef CONFIG_MAGIC_SYSRQ
1600static void sysrq_handle_gdb(int key, struct tty_struct *tty)
1601{
1602 if (!kgdb_io_ops) {
1603 printk(KERN_CRIT "ERROR: No KGDB I/O module available\n");
1604 return;
1605 }
1606 if (!kgdb_connected)
1607 printk(KERN_CRIT "Entering KGDB\n");
1608
1609 kgdb_breakpoint();
1610}
1611
1612static struct sysrq_key_op sysrq_gdb_op = {
1613 .handler = sysrq_handle_gdb,
1614 .help_msg = "debug(G)",
1615 .action_msg = "DEBUG",
1616};
1617#endif
1618
1619static void kgdb_register_callbacks(void)
1620{
1621 if (!kgdb_io_module_registered) {
1622 kgdb_io_module_registered = 1;
1623 kgdb_arch_init();
1624#ifdef CONFIG_MAGIC_SYSRQ
1625 register_sysrq_key('g', &sysrq_gdb_op);
1626#endif
1627 if (kgdb_use_con && !kgdb_con_registered) {
1628 register_console(&kgdbcons);
1629 kgdb_con_registered = 1;
1630 }
1631 }
1632}
1633
1634static void kgdb_unregister_callbacks(void)
1635{
1636 /*
1637 * When this routine is called KGDB should unregister from the
1638 * panic handler and clean up, making sure it is not handling any
1639 * break exceptions at the time.
1640 */
1641 if (kgdb_io_module_registered) {
1642 kgdb_io_module_registered = 0;
1643 kgdb_arch_exit();
1644#ifdef CONFIG_MAGIC_SYSRQ
1645 unregister_sysrq_key('g', &sysrq_gdb_op);
1646#endif
1647 if (kgdb_con_registered) {
1648 unregister_console(&kgdbcons);
1649 kgdb_con_registered = 0;
1650 }
1651 }
1652}
1653
1654static void kgdb_initial_breakpoint(void)
1655{
1656 kgdb_break_asap = 0;
1657
1658 printk(KERN_CRIT "kgdb: Waiting for connection from remote gdb...\n");
1659 kgdb_breakpoint();
1660}
1661
1662/**
1663 * kgdb_register_io_module - register KGDB IO module
1664 * @new_kgdb_io_ops: the io ops vector
1665 *
1666 * Register it with the KGDB core.
1667 */
1668int kgdb_register_io_module(struct kgdb_io *new_kgdb_io_ops)
1669{
1670 int err;
1671
1672 spin_lock(&kgdb_registration_lock);
1673
1674 if (kgdb_io_ops) {
1675 spin_unlock(&kgdb_registration_lock);
1676
1677 printk(KERN_ERR "kgdb: Another I/O driver is already "
1678 "registered with KGDB.\n");
1679 return -EBUSY;
1680 }
1681
1682 if (new_kgdb_io_ops->init) {
1683 err = new_kgdb_io_ops->init();
1684 if (err) {
1685 spin_unlock(&kgdb_registration_lock);
1686 return err;
1687 }
1688 }
1689
1690 kgdb_io_ops = new_kgdb_io_ops;
1691
1692 spin_unlock(&kgdb_registration_lock);
1693
1694 printk(KERN_INFO "kgdb: Registered I/O driver %s.\n",
1695 new_kgdb_io_ops->name);
1696
1697 /* Arm KGDB now. */
1698 kgdb_register_callbacks();
1699
1700 if (kgdb_break_asap)
1701 kgdb_initial_breakpoint();
1702
1703 return 0;
1704}
1705EXPORT_SYMBOL_GPL(kgdb_register_io_module);
1706
1707/**
1708 * kkgdb_unregister_io_module - unregister KGDB IO module
1709 * @old_kgdb_io_ops: the io ops vector
1710 *
1711 * Unregister it with the KGDB core.
1712 */
1713void kgdb_unregister_io_module(struct kgdb_io *old_kgdb_io_ops)
1714{
1715 BUG_ON(kgdb_connected);
1716
1717 /*
1718 * KGDB is no longer able to communicate out, so
1719 * unregister our callbacks and reset state.
1720 */
1721 kgdb_unregister_callbacks();
1722
1723 spin_lock(&kgdb_registration_lock);
1724
1725 WARN_ON_ONCE(kgdb_io_ops != old_kgdb_io_ops);
1726 kgdb_io_ops = NULL;
1727
1728 spin_unlock(&kgdb_registration_lock);
1729
1730 printk(KERN_INFO
1731 "kgdb: Unregistered I/O driver %s, debugger disabled.\n",
1732 old_kgdb_io_ops->name);
1733}
1734EXPORT_SYMBOL_GPL(kgdb_unregister_io_module);
1735
1736/**
1737 * kgdb_breakpoint - generate breakpoint exception
1738 *
1739 * This function will generate a breakpoint exception. It is used at the
1740 * beginning of a program to sync up with a debugger and can be used
1741 * otherwise as a quick means to stop program execution and "break" into
1742 * the debugger.
1743 */
1744void kgdb_breakpoint(void)
1745{
1746 atomic_inc(&kgdb_setting_breakpoint);
1747 wmb(); /* Sync point before breakpoint */
1748 arch_kgdb_breakpoint();
1749 wmb(); /* Sync point after breakpoint */
1750 atomic_dec(&kgdb_setting_breakpoint);
1751}
1752EXPORT_SYMBOL_GPL(kgdb_breakpoint);
1753
1754static int __init opt_kgdb_wait(char *str)
1755{
1756 kgdb_break_asap = 1;
1757
1758 if (kgdb_io_module_registered)
1759 kgdb_initial_breakpoint();
1760
1761 return 0;
1762}
1763
1764early_param("kgdbwait", opt_kgdb_wait);
diff --git a/kernel/ksysfs.c b/kernel/ksysfs.c
index 21fe3c426948..0b624e791805 100644
--- a/kernel/ksysfs.c
+++ b/kernel/ksysfs.c
@@ -138,7 +138,8 @@ extern const void __start_notes __attribute__((weak));
138extern const void __stop_notes __attribute__((weak)); 138extern const void __stop_notes __attribute__((weak));
139#define notes_size (&__stop_notes - &__start_notes) 139#define notes_size (&__stop_notes - &__start_notes)
140 140
141static ssize_t notes_read(struct kobject *kobj, struct bin_attribute *bin_attr, 141static ssize_t notes_read(struct file *filp, struct kobject *kobj,
142 struct bin_attribute *bin_attr,
142 char *buf, loff_t off, size_t count) 143 char *buf, loff_t off, size_t count)
143{ 144{
144 memcpy(buf, &__start_notes + off, count); 145 memcpy(buf, &__start_notes + off, count);
diff --git a/kernel/lockdep.c b/kernel/lockdep.c
index ec21304856d1..54286798c37b 100644
--- a/kernel/lockdep.c
+++ b/kernel/lockdep.c
@@ -2711,6 +2711,8 @@ void lockdep_init_map(struct lockdep_map *lock, const char *name,
2711} 2711}
2712EXPORT_SYMBOL_GPL(lockdep_init_map); 2712EXPORT_SYMBOL_GPL(lockdep_init_map);
2713 2713
2714struct lock_class_key __lockdep_no_validate__;
2715
2714/* 2716/*
2715 * This gets called for every mutex_lock*()/spin_lock*() operation. 2717 * This gets called for every mutex_lock*()/spin_lock*() operation.
2716 * We maintain the dependency maps and validate the locking attempt: 2718 * We maintain the dependency maps and validate the locking attempt:
@@ -2745,6 +2747,9 @@ static int __lock_acquire(struct lockdep_map *lock, unsigned int subclass,
2745 return 0; 2747 return 0;
2746 } 2748 }
2747 2749
2750 if (lock->key == &__lockdep_no_validate__)
2751 check = 1;
2752
2748 if (!subclass) 2753 if (!subclass)
2749 class = lock->class_cache; 2754 class = lock->class_cache;
2750 /* 2755 /*
diff --git a/kernel/module.c b/kernel/module.c
index 970d773aec62..a8014bfb5a4e 100644
--- a/kernel/module.c
+++ b/kernel/module.c
@@ -77,6 +77,10 @@
77DEFINE_MUTEX(module_mutex); 77DEFINE_MUTEX(module_mutex);
78EXPORT_SYMBOL_GPL(module_mutex); 78EXPORT_SYMBOL_GPL(module_mutex);
79static LIST_HEAD(modules); 79static LIST_HEAD(modules);
80#ifdef CONFIG_KGDB_KDB
81struct list_head *kdb_modules = &modules; /* kdb needs the list of modules */
82#endif /* CONFIG_KGDB_KDB */
83
80 84
81/* Block module loading/unloading? */ 85/* Block module loading/unloading? */
82int modules_disabled = 0; 86int modules_disabled = 0;
@@ -1197,7 +1201,7 @@ struct module_notes_attrs {
1197 struct bin_attribute attrs[0]; 1201 struct bin_attribute attrs[0];
1198}; 1202};
1199 1203
1200static ssize_t module_notes_read(struct kobject *kobj, 1204static ssize_t module_notes_read(struct file *filp, struct kobject *kobj,
1201 struct bin_attribute *bin_attr, 1205 struct bin_attribute *bin_attr,
1202 char *buf, loff_t pos, size_t count) 1206 char *buf, loff_t pos, size_t count)
1203{ 1207{
diff --git a/kernel/padata.c b/kernel/padata.c
index fd03513c7327..b1c9857f8402 100644
--- a/kernel/padata.c
+++ b/kernel/padata.c
@@ -29,7 +29,7 @@
29#include <linux/rcupdate.h> 29#include <linux/rcupdate.h>
30 30
31#define MAX_SEQ_NR INT_MAX - NR_CPUS 31#define MAX_SEQ_NR INT_MAX - NR_CPUS
32#define MAX_OBJ_NUM 10000 * NR_CPUS 32#define MAX_OBJ_NUM 1000
33 33
34static int padata_index_to_cpu(struct parallel_data *pd, int cpu_index) 34static int padata_index_to_cpu(struct parallel_data *pd, int cpu_index)
35{ 35{
@@ -88,7 +88,7 @@ static void padata_parallel_worker(struct work_struct *work)
88 local_bh_enable(); 88 local_bh_enable();
89} 89}
90 90
91/* 91/**
92 * padata_do_parallel - padata parallelization function 92 * padata_do_parallel - padata parallelization function
93 * 93 *
94 * @pinst: padata instance 94 * @pinst: padata instance
@@ -152,6 +152,23 @@ out:
152} 152}
153EXPORT_SYMBOL(padata_do_parallel); 153EXPORT_SYMBOL(padata_do_parallel);
154 154
155/*
156 * padata_get_next - Get the next object that needs serialization.
157 *
158 * Return values are:
159 *
160 * A pointer to the control struct of the next object that needs
161 * serialization, if present in one of the percpu reorder queues.
162 *
163 * NULL, if all percpu reorder queues are empty.
164 *
165 * -EINPROGRESS, if the next object that needs serialization will
166 * be parallel processed by another cpu and is not yet present in
167 * the cpu's reorder queue.
168 *
169 * -ENODATA, if this cpu has to do the parallel processing for
170 * the next object.
171 */
155static struct padata_priv *padata_get_next(struct parallel_data *pd) 172static struct padata_priv *padata_get_next(struct parallel_data *pd)
156{ 173{
157 int cpu, num_cpus, empty, calc_seq_nr; 174 int cpu, num_cpus, empty, calc_seq_nr;
@@ -173,7 +190,7 @@ static struct padata_priv *padata_get_next(struct parallel_data *pd)
173 190
174 /* 191 /*
175 * Calculate the seq_nr of the object that should be 192 * Calculate the seq_nr of the object that should be
176 * next in this queue. 193 * next in this reorder queue.
177 */ 194 */
178 overrun = 0; 195 overrun = 0;
179 calc_seq_nr = (atomic_read(&queue->num_obj) * num_cpus) 196 calc_seq_nr = (atomic_read(&queue->num_obj) * num_cpus)
@@ -231,7 +248,8 @@ static struct padata_priv *padata_get_next(struct parallel_data *pd)
231 goto out; 248 goto out;
232 } 249 }
233 250
234 if (next_nr % num_cpus == next_queue->cpu_index) { 251 queue = per_cpu_ptr(pd->queue, smp_processor_id());
252 if (queue->cpu_index == next_queue->cpu_index) {
235 padata = ERR_PTR(-ENODATA); 253 padata = ERR_PTR(-ENODATA);
236 goto out; 254 goto out;
237 } 255 }
@@ -247,19 +265,40 @@ static void padata_reorder(struct parallel_data *pd)
247 struct padata_queue *queue; 265 struct padata_queue *queue;
248 struct padata_instance *pinst = pd->pinst; 266 struct padata_instance *pinst = pd->pinst;
249 267
250try_again: 268 /*
269 * We need to ensure that only one cpu can work on dequeueing of
270 * the reorder queue the time. Calculating in which percpu reorder
271 * queue the next object will arrive takes some time. A spinlock
272 * would be highly contended. Also it is not clear in which order
273 * the objects arrive to the reorder queues. So a cpu could wait to
274 * get the lock just to notice that there is nothing to do at the
275 * moment. Therefore we use a trylock and let the holder of the lock
276 * care for all the objects enqueued during the holdtime of the lock.
277 */
251 if (!spin_trylock_bh(&pd->lock)) 278 if (!spin_trylock_bh(&pd->lock))
252 goto out; 279 return;
253 280
254 while (1) { 281 while (1) {
255 padata = padata_get_next(pd); 282 padata = padata_get_next(pd);
256 283
284 /*
285 * All reorder queues are empty, or the next object that needs
286 * serialization is parallel processed by another cpu and is
287 * still on it's way to the cpu's reorder queue, nothing to
288 * do for now.
289 */
257 if (!padata || PTR_ERR(padata) == -EINPROGRESS) 290 if (!padata || PTR_ERR(padata) == -EINPROGRESS)
258 break; 291 break;
259 292
293 /*
294 * This cpu has to do the parallel processing of the next
295 * object. It's waiting in the cpu's parallelization queue,
296 * so exit imediately.
297 */
260 if (PTR_ERR(padata) == -ENODATA) { 298 if (PTR_ERR(padata) == -ENODATA) {
299 del_timer(&pd->timer);
261 spin_unlock_bh(&pd->lock); 300 spin_unlock_bh(&pd->lock);
262 goto out; 301 return;
263 } 302 }
264 303
265 queue = per_cpu_ptr(pd->queue, padata->cb_cpu); 304 queue = per_cpu_ptr(pd->queue, padata->cb_cpu);
@@ -273,13 +312,27 @@ try_again:
273 312
274 spin_unlock_bh(&pd->lock); 313 spin_unlock_bh(&pd->lock);
275 314
276 if (atomic_read(&pd->reorder_objects)) 315 /*
277 goto try_again; 316 * The next object that needs serialization might have arrived to
317 * the reorder queues in the meantime, we will be called again
318 * from the timer function if noone else cares for it.
319 */
320 if (atomic_read(&pd->reorder_objects)
321 && !(pinst->flags & PADATA_RESET))
322 mod_timer(&pd->timer, jiffies + HZ);
323 else
324 del_timer(&pd->timer);
278 325
279out:
280 return; 326 return;
281} 327}
282 328
329static void padata_reorder_timer(unsigned long arg)
330{
331 struct parallel_data *pd = (struct parallel_data *)arg;
332
333 padata_reorder(pd);
334}
335
283static void padata_serial_worker(struct work_struct *work) 336static void padata_serial_worker(struct work_struct *work)
284{ 337{
285 struct padata_queue *queue; 338 struct padata_queue *queue;
@@ -308,7 +361,7 @@ static void padata_serial_worker(struct work_struct *work)
308 local_bh_enable(); 361 local_bh_enable();
309} 362}
310 363
311/* 364/**
312 * padata_do_serial - padata serialization function 365 * padata_do_serial - padata serialization function
313 * 366 *
314 * @padata: object to be serialized. 367 * @padata: object to be serialized.
@@ -338,6 +391,7 @@ void padata_do_serial(struct padata_priv *padata)
338} 391}
339EXPORT_SYMBOL(padata_do_serial); 392EXPORT_SYMBOL(padata_do_serial);
340 393
394/* Allocate and initialize the internal cpumask dependend resources. */
341static struct parallel_data *padata_alloc_pd(struct padata_instance *pinst, 395static struct parallel_data *padata_alloc_pd(struct padata_instance *pinst,
342 const struct cpumask *cpumask) 396 const struct cpumask *cpumask)
343{ 397{
@@ -358,17 +412,15 @@ static struct parallel_data *padata_alloc_pd(struct padata_instance *pinst,
358 if (!alloc_cpumask_var(&pd->cpumask, GFP_KERNEL)) 412 if (!alloc_cpumask_var(&pd->cpumask, GFP_KERNEL))
359 goto err_free_queue; 413 goto err_free_queue;
360 414
361 for_each_possible_cpu(cpu) { 415 cpumask_and(pd->cpumask, cpumask, cpu_active_mask);
416
417 for_each_cpu(cpu, pd->cpumask) {
362 queue = per_cpu_ptr(pd->queue, cpu); 418 queue = per_cpu_ptr(pd->queue, cpu);
363 419
364 queue->pd = pd; 420 queue->pd = pd;
365 421
366 if (cpumask_test_cpu(cpu, cpumask) 422 queue->cpu_index = cpu_index;
367 && cpumask_test_cpu(cpu, cpu_active_mask)) { 423 cpu_index++;
368 queue->cpu_index = cpu_index;
369 cpu_index++;
370 } else
371 queue->cpu_index = -1;
372 424
373 INIT_LIST_HEAD(&queue->reorder.list); 425 INIT_LIST_HEAD(&queue->reorder.list);
374 INIT_LIST_HEAD(&queue->parallel.list); 426 INIT_LIST_HEAD(&queue->parallel.list);
@@ -382,11 +434,10 @@ static struct parallel_data *padata_alloc_pd(struct padata_instance *pinst,
382 atomic_set(&queue->num_obj, 0); 434 atomic_set(&queue->num_obj, 0);
383 } 435 }
384 436
385 cpumask_and(pd->cpumask, cpumask, cpu_active_mask);
386
387 num_cpus = cpumask_weight(pd->cpumask); 437 num_cpus = cpumask_weight(pd->cpumask);
388 pd->max_seq_nr = (MAX_SEQ_NR / num_cpus) * num_cpus - 1; 438 pd->max_seq_nr = (MAX_SEQ_NR / num_cpus) * num_cpus - 1;
389 439
440 setup_timer(&pd->timer, padata_reorder_timer, (unsigned long)pd);
390 atomic_set(&pd->seq_nr, -1); 441 atomic_set(&pd->seq_nr, -1);
391 atomic_set(&pd->reorder_objects, 0); 442 atomic_set(&pd->reorder_objects, 0);
392 atomic_set(&pd->refcnt, 0); 443 atomic_set(&pd->refcnt, 0);
@@ -410,6 +461,31 @@ static void padata_free_pd(struct parallel_data *pd)
410 kfree(pd); 461 kfree(pd);
411} 462}
412 463
464/* Flush all objects out of the padata queues. */
465static void padata_flush_queues(struct parallel_data *pd)
466{
467 int cpu;
468 struct padata_queue *queue;
469
470 for_each_cpu(cpu, pd->cpumask) {
471 queue = per_cpu_ptr(pd->queue, cpu);
472 flush_work(&queue->pwork);
473 }
474
475 del_timer_sync(&pd->timer);
476
477 if (atomic_read(&pd->reorder_objects))
478 padata_reorder(pd);
479
480 for_each_cpu(cpu, pd->cpumask) {
481 queue = per_cpu_ptr(pd->queue, cpu);
482 flush_work(&queue->swork);
483 }
484
485 BUG_ON(atomic_read(&pd->refcnt) != 0);
486}
487
488/* Replace the internal control stucture with a new one. */
413static void padata_replace(struct padata_instance *pinst, 489static void padata_replace(struct padata_instance *pinst,
414 struct parallel_data *pd_new) 490 struct parallel_data *pd_new)
415{ 491{
@@ -421,17 +497,13 @@ static void padata_replace(struct padata_instance *pinst,
421 497
422 synchronize_rcu(); 498 synchronize_rcu();
423 499
424 while (atomic_read(&pd_old->refcnt) != 0) 500 padata_flush_queues(pd_old);
425 yield();
426
427 flush_workqueue(pinst->wq);
428
429 padata_free_pd(pd_old); 501 padata_free_pd(pd_old);
430 502
431 pinst->flags &= ~PADATA_RESET; 503 pinst->flags &= ~PADATA_RESET;
432} 504}
433 505
434/* 506/**
435 * padata_set_cpumask - set the cpumask that padata should use 507 * padata_set_cpumask - set the cpumask that padata should use
436 * 508 *
437 * @pinst: padata instance 509 * @pinst: padata instance
@@ -443,10 +515,10 @@ int padata_set_cpumask(struct padata_instance *pinst,
443 struct parallel_data *pd; 515 struct parallel_data *pd;
444 int err = 0; 516 int err = 0;
445 517
446 might_sleep();
447
448 mutex_lock(&pinst->lock); 518 mutex_lock(&pinst->lock);
449 519
520 get_online_cpus();
521
450 pd = padata_alloc_pd(pinst, cpumask); 522 pd = padata_alloc_pd(pinst, cpumask);
451 if (!pd) { 523 if (!pd) {
452 err = -ENOMEM; 524 err = -ENOMEM;
@@ -458,6 +530,8 @@ int padata_set_cpumask(struct padata_instance *pinst,
458 padata_replace(pinst, pd); 530 padata_replace(pinst, pd);
459 531
460out: 532out:
533 put_online_cpus();
534
461 mutex_unlock(&pinst->lock); 535 mutex_unlock(&pinst->lock);
462 536
463 return err; 537 return err;
@@ -479,7 +553,7 @@ static int __padata_add_cpu(struct padata_instance *pinst, int cpu)
479 return 0; 553 return 0;
480} 554}
481 555
482/* 556/**
483 * padata_add_cpu - add a cpu to the padata cpumask 557 * padata_add_cpu - add a cpu to the padata cpumask
484 * 558 *
485 * @pinst: padata instance 559 * @pinst: padata instance
@@ -489,12 +563,12 @@ int padata_add_cpu(struct padata_instance *pinst, int cpu)
489{ 563{
490 int err; 564 int err;
491 565
492 might_sleep();
493
494 mutex_lock(&pinst->lock); 566 mutex_lock(&pinst->lock);
495 567
568 get_online_cpus();
496 cpumask_set_cpu(cpu, pinst->cpumask); 569 cpumask_set_cpu(cpu, pinst->cpumask);
497 err = __padata_add_cpu(pinst, cpu); 570 err = __padata_add_cpu(pinst, cpu);
571 put_online_cpus();
498 572
499 mutex_unlock(&pinst->lock); 573 mutex_unlock(&pinst->lock);
500 574
@@ -517,7 +591,7 @@ static int __padata_remove_cpu(struct padata_instance *pinst, int cpu)
517 return 0; 591 return 0;
518} 592}
519 593
520/* 594/**
521 * padata_remove_cpu - remove a cpu from the padata cpumask 595 * padata_remove_cpu - remove a cpu from the padata cpumask
522 * 596 *
523 * @pinst: padata instance 597 * @pinst: padata instance
@@ -527,12 +601,12 @@ int padata_remove_cpu(struct padata_instance *pinst, int cpu)
527{ 601{
528 int err; 602 int err;
529 603
530 might_sleep();
531
532 mutex_lock(&pinst->lock); 604 mutex_lock(&pinst->lock);
533 605
606 get_online_cpus();
534 cpumask_clear_cpu(cpu, pinst->cpumask); 607 cpumask_clear_cpu(cpu, pinst->cpumask);
535 err = __padata_remove_cpu(pinst, cpu); 608 err = __padata_remove_cpu(pinst, cpu);
609 put_online_cpus();
536 610
537 mutex_unlock(&pinst->lock); 611 mutex_unlock(&pinst->lock);
538 612
@@ -540,38 +614,35 @@ int padata_remove_cpu(struct padata_instance *pinst, int cpu)
540} 614}
541EXPORT_SYMBOL(padata_remove_cpu); 615EXPORT_SYMBOL(padata_remove_cpu);
542 616
543/* 617/**
544 * padata_start - start the parallel processing 618 * padata_start - start the parallel processing
545 * 619 *
546 * @pinst: padata instance to start 620 * @pinst: padata instance to start
547 */ 621 */
548void padata_start(struct padata_instance *pinst) 622void padata_start(struct padata_instance *pinst)
549{ 623{
550 might_sleep();
551
552 mutex_lock(&pinst->lock); 624 mutex_lock(&pinst->lock);
553 pinst->flags |= PADATA_INIT; 625 pinst->flags |= PADATA_INIT;
554 mutex_unlock(&pinst->lock); 626 mutex_unlock(&pinst->lock);
555} 627}
556EXPORT_SYMBOL(padata_start); 628EXPORT_SYMBOL(padata_start);
557 629
558/* 630/**
559 * padata_stop - stop the parallel processing 631 * padata_stop - stop the parallel processing
560 * 632 *
561 * @pinst: padata instance to stop 633 * @pinst: padata instance to stop
562 */ 634 */
563void padata_stop(struct padata_instance *pinst) 635void padata_stop(struct padata_instance *pinst)
564{ 636{
565 might_sleep();
566
567 mutex_lock(&pinst->lock); 637 mutex_lock(&pinst->lock);
568 pinst->flags &= ~PADATA_INIT; 638 pinst->flags &= ~PADATA_INIT;
569 mutex_unlock(&pinst->lock); 639 mutex_unlock(&pinst->lock);
570} 640}
571EXPORT_SYMBOL(padata_stop); 641EXPORT_SYMBOL(padata_stop);
572 642
573static int __cpuinit padata_cpu_callback(struct notifier_block *nfb, 643#ifdef CONFIG_HOTPLUG_CPU
574 unsigned long action, void *hcpu) 644static int padata_cpu_callback(struct notifier_block *nfb,
645 unsigned long action, void *hcpu)
575{ 646{
576 int err; 647 int err;
577 struct padata_instance *pinst; 648 struct padata_instance *pinst;
@@ -621,8 +692,9 @@ static int __cpuinit padata_cpu_callback(struct notifier_block *nfb,
621 692
622 return NOTIFY_OK; 693 return NOTIFY_OK;
623} 694}
695#endif
624 696
625/* 697/**
626 * padata_alloc - allocate and initialize a padata instance 698 * padata_alloc - allocate and initialize a padata instance
627 * 699 *
628 * @cpumask: cpumask that padata uses for parallelization 700 * @cpumask: cpumask that padata uses for parallelization
@@ -631,7 +703,6 @@ static int __cpuinit padata_cpu_callback(struct notifier_block *nfb,
631struct padata_instance *padata_alloc(const struct cpumask *cpumask, 703struct padata_instance *padata_alloc(const struct cpumask *cpumask,
632 struct workqueue_struct *wq) 704 struct workqueue_struct *wq)
633{ 705{
634 int err;
635 struct padata_instance *pinst; 706 struct padata_instance *pinst;
636 struct parallel_data *pd; 707 struct parallel_data *pd;
637 708
@@ -639,6 +710,8 @@ struct padata_instance *padata_alloc(const struct cpumask *cpumask,
639 if (!pinst) 710 if (!pinst)
640 goto err; 711 goto err;
641 712
713 get_online_cpus();
714
642 pd = padata_alloc_pd(pinst, cpumask); 715 pd = padata_alloc_pd(pinst, cpumask);
643 if (!pd) 716 if (!pd)
644 goto err_free_inst; 717 goto err_free_inst;
@@ -654,31 +727,32 @@ struct padata_instance *padata_alloc(const struct cpumask *cpumask,
654 727
655 pinst->flags = 0; 728 pinst->flags = 0;
656 729
730#ifdef CONFIG_HOTPLUG_CPU
657 pinst->cpu_notifier.notifier_call = padata_cpu_callback; 731 pinst->cpu_notifier.notifier_call = padata_cpu_callback;
658 pinst->cpu_notifier.priority = 0; 732 pinst->cpu_notifier.priority = 0;
659 err = register_hotcpu_notifier(&pinst->cpu_notifier); 733 register_hotcpu_notifier(&pinst->cpu_notifier);
660 if (err) 734#endif
661 goto err_free_cpumask; 735
736 put_online_cpus();
662 737
663 mutex_init(&pinst->lock); 738 mutex_init(&pinst->lock);
664 739
665 return pinst; 740 return pinst;
666 741
667err_free_cpumask:
668 free_cpumask_var(pinst->cpumask);
669err_free_pd: 742err_free_pd:
670 padata_free_pd(pd); 743 padata_free_pd(pd);
671err_free_inst: 744err_free_inst:
672 kfree(pinst); 745 kfree(pinst);
746 put_online_cpus();
673err: 747err:
674 return NULL; 748 return NULL;
675} 749}
676EXPORT_SYMBOL(padata_alloc); 750EXPORT_SYMBOL(padata_alloc);
677 751
678/* 752/**
679 * padata_free - free a padata instance 753 * padata_free - free a padata instance
680 * 754 *
681 * @ padata_inst: padata instance to free 755 * @padata_inst: padata instance to free
682 */ 756 */
683void padata_free(struct padata_instance *pinst) 757void padata_free(struct padata_instance *pinst)
684{ 758{
@@ -686,10 +760,13 @@ void padata_free(struct padata_instance *pinst)
686 760
687 synchronize_rcu(); 761 synchronize_rcu();
688 762
689 while (atomic_read(&pinst->pd->refcnt) != 0) 763#ifdef CONFIG_HOTPLUG_CPU
690 yield();
691
692 unregister_hotcpu_notifier(&pinst->cpu_notifier); 764 unregister_hotcpu_notifier(&pinst->cpu_notifier);
765#endif
766 get_online_cpus();
767 padata_flush_queues(pinst->pd);
768 put_online_cpus();
769
693 padata_free_pd(pinst->pd); 770 padata_free_pd(pinst->pd);
694 free_cpumask_var(pinst->cpumask); 771 free_cpumask_var(pinst->cpumask);
695 kfree(pinst); 772 kfree(pinst);
diff --git a/kernel/pm_qos_params.c b/kernel/pm_qos_params.c
index 3db49b9ca374..f42d3f737a33 100644
--- a/kernel/pm_qos_params.c
+++ b/kernel/pm_qos_params.c
@@ -2,7 +2,7 @@
2 * This module exposes the interface to kernel space for specifying 2 * This module exposes the interface to kernel space for specifying
3 * QoS dependencies. It provides infrastructure for registration of: 3 * QoS dependencies. It provides infrastructure for registration of:
4 * 4 *
5 * Dependents on a QoS value : register requirements 5 * Dependents on a QoS value : register requests
6 * Watchers of QoS value : get notified when target QoS value changes 6 * Watchers of QoS value : get notified when target QoS value changes
7 * 7 *
8 * This QoS design is best effort based. Dependents register their QoS needs. 8 * This QoS design is best effort based. Dependents register their QoS needs.
@@ -14,19 +14,21 @@
14 * timeout: usec <-- currently not used. 14 * timeout: usec <-- currently not used.
15 * throughput: kbs (kilo byte / sec) 15 * throughput: kbs (kilo byte / sec)
16 * 16 *
17 * There are lists of pm_qos_objects each one wrapping requirements, notifiers 17 * There are lists of pm_qos_objects each one wrapping requests, notifiers
18 * 18 *
19 * User mode requirements on a QOS parameter register themselves to the 19 * User mode requests on a QOS parameter register themselves to the
20 * subsystem by opening the device node /dev/... and writing there request to 20 * subsystem by opening the device node /dev/... and writing there request to
21 * the node. As long as the process holds a file handle open to the node the 21 * the node. As long as the process holds a file handle open to the node the
22 * client continues to be accounted for. Upon file release the usermode 22 * client continues to be accounted for. Upon file release the usermode
23 * requirement is removed and a new qos target is computed. This way when the 23 * request is removed and a new qos target is computed. This way when the
24 * requirement that the application has is cleaned up when closes the file 24 * request that the application has is cleaned up when closes the file
25 * pointer or exits the pm_qos_object will get an opportunity to clean up. 25 * pointer or exits the pm_qos_object will get an opportunity to clean up.
26 * 26 *
27 * Mark Gross <mgross@linux.intel.com> 27 * Mark Gross <mgross@linux.intel.com>
28 */ 28 */
29 29
30/*#define DEBUG*/
31
30#include <linux/pm_qos_params.h> 32#include <linux/pm_qos_params.h>
31#include <linux/sched.h> 33#include <linux/sched.h>
32#include <linux/spinlock.h> 34#include <linux/spinlock.h>
@@ -42,25 +44,25 @@
42#include <linux/uaccess.h> 44#include <linux/uaccess.h>
43 45
44/* 46/*
45 * locking rule: all changes to requirements or notifiers lists 47 * locking rule: all changes to requests or notifiers lists
46 * or pm_qos_object list and pm_qos_objects need to happen with pm_qos_lock 48 * or pm_qos_object list and pm_qos_objects need to happen with pm_qos_lock
47 * held, taken with _irqsave. One lock to rule them all 49 * held, taken with _irqsave. One lock to rule them all
48 */ 50 */
49struct requirement_list { 51struct pm_qos_request_list {
50 struct list_head list; 52 struct list_head list;
51 union { 53 union {
52 s32 value; 54 s32 value;
53 s32 usec; 55 s32 usec;
54 s32 kbps; 56 s32 kbps;
55 }; 57 };
56 char *name; 58 int pm_qos_class;
57}; 59};
58 60
59static s32 max_compare(s32 v1, s32 v2); 61static s32 max_compare(s32 v1, s32 v2);
60static s32 min_compare(s32 v1, s32 v2); 62static s32 min_compare(s32 v1, s32 v2);
61 63
62struct pm_qos_object { 64struct pm_qos_object {
63 struct requirement_list requirements; 65 struct pm_qos_request_list requests;
64 struct blocking_notifier_head *notifiers; 66 struct blocking_notifier_head *notifiers;
65 struct miscdevice pm_qos_power_miscdev; 67 struct miscdevice pm_qos_power_miscdev;
66 char *name; 68 char *name;
@@ -72,7 +74,7 @@ struct pm_qos_object {
72static struct pm_qos_object null_pm_qos; 74static struct pm_qos_object null_pm_qos;
73static BLOCKING_NOTIFIER_HEAD(cpu_dma_lat_notifier); 75static BLOCKING_NOTIFIER_HEAD(cpu_dma_lat_notifier);
74static struct pm_qos_object cpu_dma_pm_qos = { 76static struct pm_qos_object cpu_dma_pm_qos = {
75 .requirements = {LIST_HEAD_INIT(cpu_dma_pm_qos.requirements.list)}, 77 .requests = {LIST_HEAD_INIT(cpu_dma_pm_qos.requests.list)},
76 .notifiers = &cpu_dma_lat_notifier, 78 .notifiers = &cpu_dma_lat_notifier,
77 .name = "cpu_dma_latency", 79 .name = "cpu_dma_latency",
78 .default_value = 2000 * USEC_PER_SEC, 80 .default_value = 2000 * USEC_PER_SEC,
@@ -82,7 +84,7 @@ static struct pm_qos_object cpu_dma_pm_qos = {
82 84
83static BLOCKING_NOTIFIER_HEAD(network_lat_notifier); 85static BLOCKING_NOTIFIER_HEAD(network_lat_notifier);
84static struct pm_qos_object network_lat_pm_qos = { 86static struct pm_qos_object network_lat_pm_qos = {
85 .requirements = {LIST_HEAD_INIT(network_lat_pm_qos.requirements.list)}, 87 .requests = {LIST_HEAD_INIT(network_lat_pm_qos.requests.list)},
86 .notifiers = &network_lat_notifier, 88 .notifiers = &network_lat_notifier,
87 .name = "network_latency", 89 .name = "network_latency",
88 .default_value = 2000 * USEC_PER_SEC, 90 .default_value = 2000 * USEC_PER_SEC,
@@ -93,8 +95,7 @@ static struct pm_qos_object network_lat_pm_qos = {
93 95
94static BLOCKING_NOTIFIER_HEAD(network_throughput_notifier); 96static BLOCKING_NOTIFIER_HEAD(network_throughput_notifier);
95static struct pm_qos_object network_throughput_pm_qos = { 97static struct pm_qos_object network_throughput_pm_qos = {
96 .requirements = 98 .requests = {LIST_HEAD_INIT(network_throughput_pm_qos.requests.list)},
97 {LIST_HEAD_INIT(network_throughput_pm_qos.requirements.list)},
98 .notifiers = &network_throughput_notifier, 99 .notifiers = &network_throughput_notifier,
99 .name = "network_throughput", 100 .name = "network_throughput",
100 .default_value = 0, 101 .default_value = 0,
@@ -135,31 +136,34 @@ static s32 min_compare(s32 v1, s32 v2)
135} 136}
136 137
137 138
138static void update_target(int target) 139static void update_target(int pm_qos_class)
139{ 140{
140 s32 extreme_value; 141 s32 extreme_value;
141 struct requirement_list *node; 142 struct pm_qos_request_list *node;
142 unsigned long flags; 143 unsigned long flags;
143 int call_notifier = 0; 144 int call_notifier = 0;
144 145
145 spin_lock_irqsave(&pm_qos_lock, flags); 146 spin_lock_irqsave(&pm_qos_lock, flags);
146 extreme_value = pm_qos_array[target]->default_value; 147 extreme_value = pm_qos_array[pm_qos_class]->default_value;
147 list_for_each_entry(node, 148 list_for_each_entry(node,
148 &pm_qos_array[target]->requirements.list, list) { 149 &pm_qos_array[pm_qos_class]->requests.list, list) {
149 extreme_value = pm_qos_array[target]->comparitor( 150 extreme_value = pm_qos_array[pm_qos_class]->comparitor(
150 extreme_value, node->value); 151 extreme_value, node->value);
151 } 152 }
152 if (atomic_read(&pm_qos_array[target]->target_value) != extreme_value) { 153 if (atomic_read(&pm_qos_array[pm_qos_class]->target_value) !=
154 extreme_value) {
153 call_notifier = 1; 155 call_notifier = 1;
154 atomic_set(&pm_qos_array[target]->target_value, extreme_value); 156 atomic_set(&pm_qos_array[pm_qos_class]->target_value,
155 pr_debug(KERN_ERR "new target for qos %d is %d\n", target, 157 extreme_value);
156 atomic_read(&pm_qos_array[target]->target_value)); 158 pr_debug(KERN_ERR "new target for qos %d is %d\n", pm_qos_class,
159 atomic_read(&pm_qos_array[pm_qos_class]->target_value));
157 } 160 }
158 spin_unlock_irqrestore(&pm_qos_lock, flags); 161 spin_unlock_irqrestore(&pm_qos_lock, flags);
159 162
160 if (call_notifier) 163 if (call_notifier)
161 blocking_notifier_call_chain(pm_qos_array[target]->notifiers, 164 blocking_notifier_call_chain(
162 (unsigned long) extreme_value, NULL); 165 pm_qos_array[pm_qos_class]->notifiers,
166 (unsigned long) extreme_value, NULL);
163} 167}
164 168
165static int register_pm_qos_misc(struct pm_qos_object *qos) 169static int register_pm_qos_misc(struct pm_qos_object *qos)
@@ -185,125 +189,112 @@ static int find_pm_qos_object_by_minor(int minor)
185} 189}
186 190
187/** 191/**
188 * pm_qos_requirement - returns current system wide qos expectation 192 * pm_qos_request - returns current system wide qos expectation
189 * @pm_qos_class: identification of which qos value is requested 193 * @pm_qos_class: identification of which qos value is requested
190 * 194 *
191 * This function returns the current target value in an atomic manner. 195 * This function returns the current target value in an atomic manner.
192 */ 196 */
193int pm_qos_requirement(int pm_qos_class) 197int pm_qos_request(int pm_qos_class)
194{ 198{
195 return atomic_read(&pm_qos_array[pm_qos_class]->target_value); 199 return atomic_read(&pm_qos_array[pm_qos_class]->target_value);
196} 200}
197EXPORT_SYMBOL_GPL(pm_qos_requirement); 201EXPORT_SYMBOL_GPL(pm_qos_request);
198 202
199/** 203/**
200 * pm_qos_add_requirement - inserts new qos request into the list 204 * pm_qos_add_request - inserts new qos request into the list
201 * @pm_qos_class: identifies which list of qos request to us 205 * @pm_qos_class: identifies which list of qos request to us
202 * @name: identifies the request
203 * @value: defines the qos request 206 * @value: defines the qos request
204 * 207 *
205 * This function inserts a new entry in the pm_qos_class list of requested qos 208 * This function inserts a new entry in the pm_qos_class list of requested qos
206 * performance characteristics. It recomputes the aggregate QoS expectations 209 * performance characteristics. It recomputes the aggregate QoS expectations
207 * for the pm_qos_class of parameters. 210 * for the pm_qos_class of parameters, and returns the pm_qos_request list
211 * element as a handle for use in updating and removal. Call needs to save
212 * this handle for later use.
208 */ 213 */
209int pm_qos_add_requirement(int pm_qos_class, char *name, s32 value) 214struct pm_qos_request_list *pm_qos_add_request(int pm_qos_class, s32 value)
210{ 215{
211 struct requirement_list *dep; 216 struct pm_qos_request_list *dep;
212 unsigned long flags; 217 unsigned long flags;
213 218
214 dep = kzalloc(sizeof(struct requirement_list), GFP_KERNEL); 219 dep = kzalloc(sizeof(struct pm_qos_request_list), GFP_KERNEL);
215 if (dep) { 220 if (dep) {
216 if (value == PM_QOS_DEFAULT_VALUE) 221 if (value == PM_QOS_DEFAULT_VALUE)
217 dep->value = pm_qos_array[pm_qos_class]->default_value; 222 dep->value = pm_qos_array[pm_qos_class]->default_value;
218 else 223 else
219 dep->value = value; 224 dep->value = value;
220 dep->name = kstrdup(name, GFP_KERNEL); 225 dep->pm_qos_class = pm_qos_class;
221 if (!dep->name)
222 goto cleanup;
223 226
224 spin_lock_irqsave(&pm_qos_lock, flags); 227 spin_lock_irqsave(&pm_qos_lock, flags);
225 list_add(&dep->list, 228 list_add(&dep->list,
226 &pm_qos_array[pm_qos_class]->requirements.list); 229 &pm_qos_array[pm_qos_class]->requests.list);
227 spin_unlock_irqrestore(&pm_qos_lock, flags); 230 spin_unlock_irqrestore(&pm_qos_lock, flags);
228 update_target(pm_qos_class); 231 update_target(pm_qos_class);
229
230 return 0;
231 } 232 }
232 233
233cleanup: 234 return dep;
234 kfree(dep);
235 return -ENOMEM;
236} 235}
237EXPORT_SYMBOL_GPL(pm_qos_add_requirement); 236EXPORT_SYMBOL_GPL(pm_qos_add_request);
238 237
239/** 238/**
240 * pm_qos_update_requirement - modifies an existing qos request 239 * pm_qos_update_request - modifies an existing qos request
241 * @pm_qos_class: identifies which list of qos request to us 240 * @pm_qos_req : handle to list element holding a pm_qos request to use
242 * @name: identifies the request
243 * @value: defines the qos request 241 * @value: defines the qos request
244 * 242 *
245 * Updates an existing qos requirement for the pm_qos_class of parameters along 243 * Updates an existing qos request for the pm_qos_class of parameters along
246 * with updating the target pm_qos_class value. 244 * with updating the target pm_qos_class value.
247 * 245 *
248 * If the named request isn't in the list then no change is made. 246 * Attempts are made to make this code callable on hot code paths.
249 */ 247 */
250int pm_qos_update_requirement(int pm_qos_class, char *name, s32 new_value) 248void pm_qos_update_request(struct pm_qos_request_list *pm_qos_req,
249 s32 new_value)
251{ 250{
252 unsigned long flags; 251 unsigned long flags;
253 struct requirement_list *node;
254 int pending_update = 0; 252 int pending_update = 0;
253 s32 temp;
255 254
256 spin_lock_irqsave(&pm_qos_lock, flags); 255 if (pm_qos_req) { /*guard against callers passing in null */
257 list_for_each_entry(node, 256 spin_lock_irqsave(&pm_qos_lock, flags);
258 &pm_qos_array[pm_qos_class]->requirements.list, list) { 257 if (new_value == PM_QOS_DEFAULT_VALUE)
259 if (strcmp(node->name, name) == 0) { 258 temp = pm_qos_array[pm_qos_req->pm_qos_class]->default_value;
260 if (new_value == PM_QOS_DEFAULT_VALUE) 259 else
261 node->value = 260 temp = new_value;
262 pm_qos_array[pm_qos_class]->default_value; 261
263 else 262 if (temp != pm_qos_req->value) {
264 node->value = new_value;
265 pending_update = 1; 263 pending_update = 1;
266 break; 264 pm_qos_req->value = temp;
267 } 265 }
266 spin_unlock_irqrestore(&pm_qos_lock, flags);
267 if (pending_update)
268 update_target(pm_qos_req->pm_qos_class);
268 } 269 }
269 spin_unlock_irqrestore(&pm_qos_lock, flags);
270 if (pending_update)
271 update_target(pm_qos_class);
272
273 return 0;
274} 270}
275EXPORT_SYMBOL_GPL(pm_qos_update_requirement); 271EXPORT_SYMBOL_GPL(pm_qos_update_request);
276 272
277/** 273/**
278 * pm_qos_remove_requirement - modifies an existing qos request 274 * pm_qos_remove_request - modifies an existing qos request
279 * @pm_qos_class: identifies which list of qos request to us 275 * @pm_qos_req: handle to request list element
280 * @name: identifies the request
281 * 276 *
282 * Will remove named qos request from pm_qos_class list of parameters and 277 * Will remove pm qos request from the list of requests and
283 * recompute the current target value for the pm_qos_class. 278 * recompute the current target value for the pm_qos_class. Call this
279 * on slow code paths.
284 */ 280 */
285void pm_qos_remove_requirement(int pm_qos_class, char *name) 281void pm_qos_remove_request(struct pm_qos_request_list *pm_qos_req)
286{ 282{
287 unsigned long flags; 283 unsigned long flags;
288 struct requirement_list *node; 284 int qos_class;
289 int pending_update = 0;
290 285
286 if (pm_qos_req == NULL)
287 return;
288 /* silent return to keep pcm code cleaner */
289
290 qos_class = pm_qos_req->pm_qos_class;
291 spin_lock_irqsave(&pm_qos_lock, flags); 291 spin_lock_irqsave(&pm_qos_lock, flags);
292 list_for_each_entry(node, 292 list_del(&pm_qos_req->list);
293 &pm_qos_array[pm_qos_class]->requirements.list, list) { 293 kfree(pm_qos_req);
294 if (strcmp(node->name, name) == 0) {
295 kfree(node->name);
296 list_del(&node->list);
297 kfree(node);
298 pending_update = 1;
299 break;
300 }
301 }
302 spin_unlock_irqrestore(&pm_qos_lock, flags); 294 spin_unlock_irqrestore(&pm_qos_lock, flags);
303 if (pending_update) 295 update_target(qos_class);
304 update_target(pm_qos_class);
305} 296}
306EXPORT_SYMBOL_GPL(pm_qos_remove_requirement); 297EXPORT_SYMBOL_GPL(pm_qos_remove_request);
307 298
308/** 299/**
309 * pm_qos_add_notifier - sets notification entry for changes to target value 300 * pm_qos_add_notifier - sets notification entry for changes to target value
@@ -313,7 +304,7 @@ EXPORT_SYMBOL_GPL(pm_qos_remove_requirement);
313 * will register the notifier into a notification chain that gets called 304 * will register the notifier into a notification chain that gets called
314 * upon changes to the pm_qos_class target value. 305 * upon changes to the pm_qos_class target value.
315 */ 306 */
316 int pm_qos_add_notifier(int pm_qos_class, struct notifier_block *notifier) 307int pm_qos_add_notifier(int pm_qos_class, struct notifier_block *notifier)
317{ 308{
318 int retval; 309 int retval;
319 310
@@ -343,21 +334,16 @@ int pm_qos_remove_notifier(int pm_qos_class, struct notifier_block *notifier)
343} 334}
344EXPORT_SYMBOL_GPL(pm_qos_remove_notifier); 335EXPORT_SYMBOL_GPL(pm_qos_remove_notifier);
345 336
346#define PID_NAME_LEN 32
347
348static int pm_qos_power_open(struct inode *inode, struct file *filp) 337static int pm_qos_power_open(struct inode *inode, struct file *filp)
349{ 338{
350 int ret;
351 long pm_qos_class; 339 long pm_qos_class;
352 char name[PID_NAME_LEN];
353 340
354 pm_qos_class = find_pm_qos_object_by_minor(iminor(inode)); 341 pm_qos_class = find_pm_qos_object_by_minor(iminor(inode));
355 if (pm_qos_class >= 0) { 342 if (pm_qos_class >= 0) {
356 filp->private_data = (void *)pm_qos_class; 343 filp->private_data = (void *) pm_qos_add_request(pm_qos_class,
357 snprintf(name, PID_NAME_LEN, "process_%d", current->pid); 344 PM_QOS_DEFAULT_VALUE);
358 ret = pm_qos_add_requirement(pm_qos_class, name, 345
359 PM_QOS_DEFAULT_VALUE); 346 if (filp->private_data)
360 if (ret >= 0)
361 return 0; 347 return 0;
362 } 348 }
363 return -EPERM; 349 return -EPERM;
@@ -365,32 +351,40 @@ static int pm_qos_power_open(struct inode *inode, struct file *filp)
365 351
366static int pm_qos_power_release(struct inode *inode, struct file *filp) 352static int pm_qos_power_release(struct inode *inode, struct file *filp)
367{ 353{
368 int pm_qos_class; 354 struct pm_qos_request_list *req;
369 char name[PID_NAME_LEN];
370 355
371 pm_qos_class = (long)filp->private_data; 356 req = (struct pm_qos_request_list *)filp->private_data;
372 snprintf(name, PID_NAME_LEN, "process_%d", current->pid); 357 pm_qos_remove_request(req);
373 pm_qos_remove_requirement(pm_qos_class, name);
374 358
375 return 0; 359 return 0;
376} 360}
377 361
362
378static ssize_t pm_qos_power_write(struct file *filp, const char __user *buf, 363static ssize_t pm_qos_power_write(struct file *filp, const char __user *buf,
379 size_t count, loff_t *f_pos) 364 size_t count, loff_t *f_pos)
380{ 365{
381 s32 value; 366 s32 value;
382 int pm_qos_class; 367 int x;
383 char name[PID_NAME_LEN]; 368 char ascii_value[11];
384 369 struct pm_qos_request_list *pm_qos_req;
385 pm_qos_class = (long)filp->private_data; 370
386 if (count != sizeof(s32)) 371 if (count == sizeof(s32)) {
372 if (copy_from_user(&value, buf, sizeof(s32)))
373 return -EFAULT;
374 } else if (count == 11) { /* len('0x12345678/0') */
375 if (copy_from_user(ascii_value, buf, 11))
376 return -EFAULT;
377 x = sscanf(ascii_value, "%x", &value);
378 if (x != 1)
379 return -EINVAL;
380 pr_debug(KERN_ERR "%s, %d, 0x%x\n", ascii_value, x, value);
381 } else
387 return -EINVAL; 382 return -EINVAL;
388 if (copy_from_user(&value, buf, sizeof(s32)))
389 return -EFAULT;
390 snprintf(name, PID_NAME_LEN, "process_%d", current->pid);
391 pm_qos_update_requirement(pm_qos_class, name, value);
392 383
393 return sizeof(s32); 384 pm_qos_req = (struct pm_qos_request_list *)filp->private_data;
385 pm_qos_update_request(pm_qos_req, value);
386
387 return count;
394} 388}
395 389
396 390
diff --git a/kernel/posix-cpu-timers.c b/kernel/posix-cpu-timers.c
index bc7704b3a443..00bb252f29a2 100644
--- a/kernel/posix-cpu-timers.c
+++ b/kernel/posix-cpu-timers.c
@@ -11,19 +11,18 @@
11#include <trace/events/timer.h> 11#include <trace/events/timer.h>
12 12
13/* 13/*
14 * Called after updating RLIMIT_CPU to set timer expiration if necessary. 14 * Called after updating RLIMIT_CPU to run cpu timer and update
15 * tsk->signal->cputime_expires expiration cache if necessary. Needs
16 * siglock protection since other code may update expiration cache as
17 * well.
15 */ 18 */
16void update_rlimit_cpu(unsigned long rlim_new) 19void update_rlimit_cpu(unsigned long rlim_new)
17{ 20{
18 cputime_t cputime = secs_to_cputime(rlim_new); 21 cputime_t cputime = secs_to_cputime(rlim_new);
19 struct signal_struct *const sig = current->signal;
20 22
21 if (cputime_eq(sig->it[CPUCLOCK_PROF].expires, cputime_zero) || 23 spin_lock_irq(&current->sighand->siglock);
22 cputime_gt(sig->it[CPUCLOCK_PROF].expires, cputime)) { 24 set_process_cpu_timer(current, CPUCLOCK_PROF, &cputime, NULL);
23 spin_lock_irq(&current->sighand->siglock); 25 spin_unlock_irq(&current->sighand->siglock);
24 set_process_cpu_timer(current, CPUCLOCK_PROF, &cputime, NULL);
25 spin_unlock_irq(&current->sighand->siglock);
26 }
27} 26}
28 27
29static int check_clock(const clockid_t which_clock) 28static int check_clock(const clockid_t which_clock)
@@ -548,111 +547,62 @@ static inline int expires_gt(cputime_t expires, cputime_t new_exp)
548 cputime_gt(expires, new_exp); 547 cputime_gt(expires, new_exp);
549} 548}
550 549
551static inline int expires_le(cputime_t expires, cputime_t new_exp)
552{
553 return !cputime_eq(expires, cputime_zero) &&
554 cputime_le(expires, new_exp);
555}
556/* 550/*
557 * Insert the timer on the appropriate list before any timers that 551 * Insert the timer on the appropriate list before any timers that
558 * expire later. This must be called with the tasklist_lock held 552 * expire later. This must be called with the tasklist_lock held
559 * for reading, and interrupts disabled. 553 * for reading, interrupts disabled and p->sighand->siglock taken.
560 */ 554 */
561static void arm_timer(struct k_itimer *timer, union cpu_time_count now) 555static void arm_timer(struct k_itimer *timer)
562{ 556{
563 struct task_struct *p = timer->it.cpu.task; 557 struct task_struct *p = timer->it.cpu.task;
564 struct list_head *head, *listpos; 558 struct list_head *head, *listpos;
559 struct task_cputime *cputime_expires;
565 struct cpu_timer_list *const nt = &timer->it.cpu; 560 struct cpu_timer_list *const nt = &timer->it.cpu;
566 struct cpu_timer_list *next; 561 struct cpu_timer_list *next;
567 unsigned long i;
568 562
569 head = (CPUCLOCK_PERTHREAD(timer->it_clock) ? 563 if (CPUCLOCK_PERTHREAD(timer->it_clock)) {
570 p->cpu_timers : p->signal->cpu_timers); 564 head = p->cpu_timers;
565 cputime_expires = &p->cputime_expires;
566 } else {
567 head = p->signal->cpu_timers;
568 cputime_expires = &p->signal->cputime_expires;
569 }
571 head += CPUCLOCK_WHICH(timer->it_clock); 570 head += CPUCLOCK_WHICH(timer->it_clock);
572 571
573 BUG_ON(!irqs_disabled());
574 spin_lock(&p->sighand->siglock);
575
576 listpos = head; 572 listpos = head;
577 if (CPUCLOCK_WHICH(timer->it_clock) == CPUCLOCK_SCHED) { 573 list_for_each_entry(next, head, entry) {
578 list_for_each_entry(next, head, entry) { 574 if (cpu_time_before(timer->it_clock, nt->expires, next->expires))
579 if (next->expires.sched > nt->expires.sched) 575 break;
580 break; 576 listpos = &next->entry;
581 listpos = &next->entry;
582 }
583 } else {
584 list_for_each_entry(next, head, entry) {
585 if (cputime_gt(next->expires.cpu, nt->expires.cpu))
586 break;
587 listpos = &next->entry;
588 }
589 } 577 }
590 list_add(&nt->entry, listpos); 578 list_add(&nt->entry, listpos);
591 579
592 if (listpos == head) { 580 if (listpos == head) {
581 union cpu_time_count *exp = &nt->expires;
582
593 /* 583 /*
594 * We are the new earliest-expiring timer. 584 * We are the new earliest-expiring POSIX 1.b timer, hence
595 * If we are a thread timer, there can always 585 * need to update expiration cache. Take into account that
596 * be a process timer telling us to stop earlier. 586 * for process timers we share expiration cache with itimers
587 * and RLIMIT_CPU and for thread timers with RLIMIT_RTTIME.
597 */ 588 */
598 589
599 if (CPUCLOCK_PERTHREAD(timer->it_clock)) { 590 switch (CPUCLOCK_WHICH(timer->it_clock)) {
600 union cpu_time_count *exp = &nt->expires; 591 case CPUCLOCK_PROF:
601 592 if (expires_gt(cputime_expires->prof_exp, exp->cpu))
602 switch (CPUCLOCK_WHICH(timer->it_clock)) { 593 cputime_expires->prof_exp = exp->cpu;
603 default: 594 break;
604 BUG(); 595 case CPUCLOCK_VIRT:
605 case CPUCLOCK_PROF: 596 if (expires_gt(cputime_expires->virt_exp, exp->cpu))
606 if (expires_gt(p->cputime_expires.prof_exp, 597 cputime_expires->virt_exp = exp->cpu;
607 exp->cpu)) 598 break;
608 p->cputime_expires.prof_exp = exp->cpu; 599 case CPUCLOCK_SCHED:
609 break; 600 if (cputime_expires->sched_exp == 0 ||
610 case CPUCLOCK_VIRT: 601 cputime_expires->sched_exp > exp->sched)
611 if (expires_gt(p->cputime_expires.virt_exp, 602 cputime_expires->sched_exp = exp->sched;
612 exp->cpu)) 603 break;
613 p->cputime_expires.virt_exp = exp->cpu;
614 break;
615 case CPUCLOCK_SCHED:
616 if (p->cputime_expires.sched_exp == 0 ||
617 p->cputime_expires.sched_exp > exp->sched)
618 p->cputime_expires.sched_exp =
619 exp->sched;
620 break;
621 }
622 } else {
623 struct signal_struct *const sig = p->signal;
624 union cpu_time_count *exp = &timer->it.cpu.expires;
625
626 /*
627 * For a process timer, set the cached expiration time.
628 */
629 switch (CPUCLOCK_WHICH(timer->it_clock)) {
630 default:
631 BUG();
632 case CPUCLOCK_VIRT:
633 if (expires_le(sig->it[CPUCLOCK_VIRT].expires,
634 exp->cpu))
635 break;
636 sig->cputime_expires.virt_exp = exp->cpu;
637 break;
638 case CPUCLOCK_PROF:
639 if (expires_le(sig->it[CPUCLOCK_PROF].expires,
640 exp->cpu))
641 break;
642 i = sig->rlim[RLIMIT_CPU].rlim_cur;
643 if (i != RLIM_INFINITY &&
644 i <= cputime_to_secs(exp->cpu))
645 break;
646 sig->cputime_expires.prof_exp = exp->cpu;
647 break;
648 case CPUCLOCK_SCHED:
649 sig->cputime_expires.sched_exp = exp->sched;
650 break;
651 }
652 } 604 }
653 } 605 }
654
655 spin_unlock(&p->sighand->siglock);
656} 606}
657 607
658/* 608/*
@@ -660,7 +610,12 @@ static void arm_timer(struct k_itimer *timer, union cpu_time_count now)
660 */ 610 */
661static void cpu_timer_fire(struct k_itimer *timer) 611static void cpu_timer_fire(struct k_itimer *timer)
662{ 612{
663 if (unlikely(timer->sigq == NULL)) { 613 if ((timer->it_sigev_notify & ~SIGEV_THREAD_ID) == SIGEV_NONE) {
614 /*
615 * User don't want any signal.
616 */
617 timer->it.cpu.expires.sched = 0;
618 } else if (unlikely(timer->sigq == NULL)) {
664 /* 619 /*
665 * This a special case for clock_nanosleep, 620 * This a special case for clock_nanosleep,
666 * not a normal timer from sys_timer_create. 621 * not a normal timer from sys_timer_create.
@@ -721,7 +676,7 @@ int posix_cpu_timer_set(struct k_itimer *timer, int flags,
721 struct itimerspec *new, struct itimerspec *old) 676 struct itimerspec *new, struct itimerspec *old)
722{ 677{
723 struct task_struct *p = timer->it.cpu.task; 678 struct task_struct *p = timer->it.cpu.task;
724 union cpu_time_count old_expires, new_expires, val; 679 union cpu_time_count old_expires, new_expires, old_incr, val;
725 int ret; 680 int ret;
726 681
727 if (unlikely(p == NULL)) { 682 if (unlikely(p == NULL)) {
@@ -752,6 +707,7 @@ int posix_cpu_timer_set(struct k_itimer *timer, int flags,
752 BUG_ON(!irqs_disabled()); 707 BUG_ON(!irqs_disabled());
753 708
754 ret = 0; 709 ret = 0;
710 old_incr = timer->it.cpu.incr;
755 spin_lock(&p->sighand->siglock); 711 spin_lock(&p->sighand->siglock);
756 old_expires = timer->it.cpu.expires; 712 old_expires = timer->it.cpu.expires;
757 if (unlikely(timer->it.cpu.firing)) { 713 if (unlikely(timer->it.cpu.firing)) {
@@ -759,7 +715,6 @@ int posix_cpu_timer_set(struct k_itimer *timer, int flags,
759 ret = TIMER_RETRY; 715 ret = TIMER_RETRY;
760 } else 716 } else
761 list_del_init(&timer->it.cpu.entry); 717 list_del_init(&timer->it.cpu.entry);
762 spin_unlock(&p->sighand->siglock);
763 718
764 /* 719 /*
765 * We need to sample the current value to convert the new 720 * We need to sample the current value to convert the new
@@ -813,6 +768,7 @@ int posix_cpu_timer_set(struct k_itimer *timer, int flags,
813 * disable this firing since we are already reporting 768 * disable this firing since we are already reporting
814 * it as an overrun (thanks to bump_cpu_timer above). 769 * it as an overrun (thanks to bump_cpu_timer above).
815 */ 770 */
771 spin_unlock(&p->sighand->siglock);
816 read_unlock(&tasklist_lock); 772 read_unlock(&tasklist_lock);
817 goto out; 773 goto out;
818 } 774 }
@@ -828,11 +784,11 @@ int posix_cpu_timer_set(struct k_itimer *timer, int flags,
828 */ 784 */
829 timer->it.cpu.expires = new_expires; 785 timer->it.cpu.expires = new_expires;
830 if (new_expires.sched != 0 && 786 if (new_expires.sched != 0 &&
831 (timer->it_sigev_notify & ~SIGEV_THREAD_ID) != SIGEV_NONE &&
832 cpu_time_before(timer->it_clock, val, new_expires)) { 787 cpu_time_before(timer->it_clock, val, new_expires)) {
833 arm_timer(timer, val); 788 arm_timer(timer);
834 } 789 }
835 790
791 spin_unlock(&p->sighand->siglock);
836 read_unlock(&tasklist_lock); 792 read_unlock(&tasklist_lock);
837 793
838 /* 794 /*
@@ -853,7 +809,6 @@ int posix_cpu_timer_set(struct k_itimer *timer, int flags,
853 timer->it_overrun = -1; 809 timer->it_overrun = -1;
854 810
855 if (new_expires.sched != 0 && 811 if (new_expires.sched != 0 &&
856 (timer->it_sigev_notify & ~SIGEV_THREAD_ID) != SIGEV_NONE &&
857 !cpu_time_before(timer->it_clock, val, new_expires)) { 812 !cpu_time_before(timer->it_clock, val, new_expires)) {
858 /* 813 /*
859 * The designated time already passed, so we notify 814 * The designated time already passed, so we notify
@@ -867,7 +822,7 @@ int posix_cpu_timer_set(struct k_itimer *timer, int flags,
867 out: 822 out:
868 if (old) { 823 if (old) {
869 sample_to_timespec(timer->it_clock, 824 sample_to_timespec(timer->it_clock,
870 timer->it.cpu.incr, &old->it_interval); 825 old_incr, &old->it_interval);
871 } 826 }
872 return ret; 827 return ret;
873} 828}
@@ -927,25 +882,6 @@ void posix_cpu_timer_get(struct k_itimer *timer, struct itimerspec *itp)
927 read_unlock(&tasklist_lock); 882 read_unlock(&tasklist_lock);
928 } 883 }
929 884
930 if ((timer->it_sigev_notify & ~SIGEV_THREAD_ID) == SIGEV_NONE) {
931 if (timer->it.cpu.incr.sched == 0 &&
932 cpu_time_before(timer->it_clock,
933 timer->it.cpu.expires, now)) {
934 /*
935 * Do-nothing timer expired and has no reload,
936 * so it's as if it was never set.
937 */
938 timer->it.cpu.expires.sched = 0;
939 itp->it_value.tv_sec = itp->it_value.tv_nsec = 0;
940 return;
941 }
942 /*
943 * Account for any expirations and reloads that should
944 * have happened.
945 */
946 bump_cpu_timer(timer, now);
947 }
948
949 if (unlikely(clear_dead)) { 885 if (unlikely(clear_dead)) {
950 /* 886 /*
951 * We've noticed that the thread is dead, but 887 * We've noticed that the thread is dead, but
@@ -1066,16 +1002,9 @@ static void stop_process_timers(struct signal_struct *sig)
1066 struct thread_group_cputimer *cputimer = &sig->cputimer; 1002 struct thread_group_cputimer *cputimer = &sig->cputimer;
1067 unsigned long flags; 1003 unsigned long flags;
1068 1004
1069 if (!cputimer->running)
1070 return;
1071
1072 spin_lock_irqsave(&cputimer->lock, flags); 1005 spin_lock_irqsave(&cputimer->lock, flags);
1073 cputimer->running = 0; 1006 cputimer->running = 0;
1074 spin_unlock_irqrestore(&cputimer->lock, flags); 1007 spin_unlock_irqrestore(&cputimer->lock, flags);
1075
1076 sig->cputime_expires.prof_exp = cputime_zero;
1077 sig->cputime_expires.virt_exp = cputime_zero;
1078 sig->cputime_expires.sched_exp = 0;
1079} 1008}
1080 1009
1081static u32 onecputick; 1010static u32 onecputick;
@@ -1112,6 +1041,23 @@ static void check_cpu_itimer(struct task_struct *tsk, struct cpu_itimer *it,
1112 } 1041 }
1113} 1042}
1114 1043
1044/**
1045 * task_cputime_zero - Check a task_cputime struct for all zero fields.
1046 *
1047 * @cputime: The struct to compare.
1048 *
1049 * Checks @cputime to see if all fields are zero. Returns true if all fields
1050 * are zero, false if any field is nonzero.
1051 */
1052static inline int task_cputime_zero(const struct task_cputime *cputime)
1053{
1054 if (cputime_eq(cputime->utime, cputime_zero) &&
1055 cputime_eq(cputime->stime, cputime_zero) &&
1056 cputime->sum_exec_runtime == 0)
1057 return 1;
1058 return 0;
1059}
1060
1115/* 1061/*
1116 * Check for any per-thread CPU timers that have fired and move them 1062 * Check for any per-thread CPU timers that have fired and move them
1117 * off the tsk->*_timers list onto the firing list. Per-thread timers 1063 * off the tsk->*_timers list onto the firing list. Per-thread timers
@@ -1129,19 +1075,6 @@ static void check_process_timers(struct task_struct *tsk,
1129 unsigned long soft; 1075 unsigned long soft;
1130 1076
1131 /* 1077 /*
1132 * Don't sample the current process CPU clocks if there are no timers.
1133 */
1134 if (list_empty(&timers[CPUCLOCK_PROF]) &&
1135 cputime_eq(sig->it[CPUCLOCK_PROF].expires, cputime_zero) &&
1136 sig->rlim[RLIMIT_CPU].rlim_cur == RLIM_INFINITY &&
1137 list_empty(&timers[CPUCLOCK_VIRT]) &&
1138 cputime_eq(sig->it[CPUCLOCK_VIRT].expires, cputime_zero) &&
1139 list_empty(&timers[CPUCLOCK_SCHED])) {
1140 stop_process_timers(sig);
1141 return;
1142 }
1143
1144 /*
1145 * Collect the current process totals. 1078 * Collect the current process totals.
1146 */ 1079 */
1147 thread_group_cputimer(tsk, &cputime); 1080 thread_group_cputimer(tsk, &cputime);
@@ -1230,18 +1163,11 @@ static void check_process_timers(struct task_struct *tsk,
1230 } 1163 }
1231 } 1164 }
1232 1165
1233 if (!cputime_eq(prof_expires, cputime_zero) && 1166 sig->cputime_expires.prof_exp = prof_expires;
1234 (cputime_eq(sig->cputime_expires.prof_exp, cputime_zero) || 1167 sig->cputime_expires.virt_exp = virt_expires;
1235 cputime_gt(sig->cputime_expires.prof_exp, prof_expires))) 1168 sig->cputime_expires.sched_exp = sched_expires;
1236 sig->cputime_expires.prof_exp = prof_expires; 1169 if (task_cputime_zero(&sig->cputime_expires))
1237 if (!cputime_eq(virt_expires, cputime_zero) && 1170 stop_process_timers(sig);
1238 (cputime_eq(sig->cputime_expires.virt_exp, cputime_zero) ||
1239 cputime_gt(sig->cputime_expires.virt_exp, virt_expires)))
1240 sig->cputime_expires.virt_exp = virt_expires;
1241 if (sched_expires != 0 &&
1242 (sig->cputime_expires.sched_exp == 0 ||
1243 sig->cputime_expires.sched_exp > sched_expires))
1244 sig->cputime_expires.sched_exp = sched_expires;
1245} 1171}
1246 1172
1247/* 1173/*
@@ -1270,6 +1196,7 @@ void posix_cpu_timer_schedule(struct k_itimer *timer)
1270 goto out; 1196 goto out;
1271 } 1197 }
1272 read_lock(&tasklist_lock); /* arm_timer needs it. */ 1198 read_lock(&tasklist_lock); /* arm_timer needs it. */
1199 spin_lock(&p->sighand->siglock);
1273 } else { 1200 } else {
1274 read_lock(&tasklist_lock); 1201 read_lock(&tasklist_lock);
1275 if (unlikely(p->signal == NULL)) { 1202 if (unlikely(p->signal == NULL)) {
@@ -1290,6 +1217,7 @@ void posix_cpu_timer_schedule(struct k_itimer *timer)
1290 clear_dead_task(timer, now); 1217 clear_dead_task(timer, now);
1291 goto out_unlock; 1218 goto out_unlock;
1292 } 1219 }
1220 spin_lock(&p->sighand->siglock);
1293 cpu_timer_sample_group(timer->it_clock, p, &now); 1221 cpu_timer_sample_group(timer->it_clock, p, &now);
1294 bump_cpu_timer(timer, now); 1222 bump_cpu_timer(timer, now);
1295 /* Leave the tasklist_lock locked for the call below. */ 1223 /* Leave the tasklist_lock locked for the call below. */
@@ -1298,7 +1226,9 @@ void posix_cpu_timer_schedule(struct k_itimer *timer)
1298 /* 1226 /*
1299 * Now re-arm for the new expiry time. 1227 * Now re-arm for the new expiry time.
1300 */ 1228 */
1301 arm_timer(timer, now); 1229 BUG_ON(!irqs_disabled());
1230 arm_timer(timer);
1231 spin_unlock(&p->sighand->siglock);
1302 1232
1303out_unlock: 1233out_unlock:
1304 read_unlock(&tasklist_lock); 1234 read_unlock(&tasklist_lock);
@@ -1310,23 +1240,6 @@ out:
1310} 1240}
1311 1241
1312/** 1242/**
1313 * task_cputime_zero - Check a task_cputime struct for all zero fields.
1314 *
1315 * @cputime: The struct to compare.
1316 *
1317 * Checks @cputime to see if all fields are zero. Returns true if all fields
1318 * are zero, false if any field is nonzero.
1319 */
1320static inline int task_cputime_zero(const struct task_cputime *cputime)
1321{
1322 if (cputime_eq(cputime->utime, cputime_zero) &&
1323 cputime_eq(cputime->stime, cputime_zero) &&
1324 cputime->sum_exec_runtime == 0)
1325 return 1;
1326 return 0;
1327}
1328
1329/**
1330 * task_cputime_expired - Compare two task_cputime entities. 1243 * task_cputime_expired - Compare two task_cputime entities.
1331 * 1244 *
1332 * @sample: The task_cputime structure to be checked for expiration. 1245 * @sample: The task_cputime structure to be checked for expiration.
@@ -1382,7 +1295,7 @@ static inline int fastpath_timer_check(struct task_struct *tsk)
1382 } 1295 }
1383 1296
1384 sig = tsk->signal; 1297 sig = tsk->signal;
1385 if (!task_cputime_zero(&sig->cputime_expires)) { 1298 if (sig->cputimer.running) {
1386 struct task_cputime group_sample; 1299 struct task_cputime group_sample;
1387 1300
1388 thread_group_cputimer(tsk, &group_sample); 1301 thread_group_cputimer(tsk, &group_sample);
@@ -1390,7 +1303,7 @@ static inline int fastpath_timer_check(struct task_struct *tsk)
1390 return 1; 1303 return 1;
1391 } 1304 }
1392 1305
1393 return sig->rlim[RLIMIT_CPU].rlim_cur != RLIM_INFINITY; 1306 return 0;
1394} 1307}
1395 1308
1396/* 1309/*
@@ -1419,7 +1332,12 @@ void run_posix_cpu_timers(struct task_struct *tsk)
1419 * put them on the firing list. 1332 * put them on the firing list.
1420 */ 1333 */
1421 check_thread_timers(tsk, &firing); 1334 check_thread_timers(tsk, &firing);
1422 check_process_timers(tsk, &firing); 1335 /*
1336 * If there are any active process wide timers (POSIX 1.b, itimers,
1337 * RLIMIT_CPU) cputimer must be running.
1338 */
1339 if (tsk->signal->cputimer.running)
1340 check_process_timers(tsk, &firing);
1423 1341
1424 /* 1342 /*
1425 * We must release these locks before taking any timer's lock. 1343 * We must release these locks before taking any timer's lock.
@@ -1456,21 +1374,23 @@ void run_posix_cpu_timers(struct task_struct *tsk)
1456} 1374}
1457 1375
1458/* 1376/*
1459 * Set one of the process-wide special case CPU timers. 1377 * Set one of the process-wide special case CPU timers or RLIMIT_CPU.
1460 * The tsk->sighand->siglock must be held by the caller. 1378 * The tsk->sighand->siglock must be held by the caller.
1461 * The *newval argument is relative and we update it to be absolute, *oldval
1462 * is absolute and we update it to be relative.
1463 */ 1379 */
1464void set_process_cpu_timer(struct task_struct *tsk, unsigned int clock_idx, 1380void set_process_cpu_timer(struct task_struct *tsk, unsigned int clock_idx,
1465 cputime_t *newval, cputime_t *oldval) 1381 cputime_t *newval, cputime_t *oldval)
1466{ 1382{
1467 union cpu_time_count now; 1383 union cpu_time_count now;
1468 struct list_head *head;
1469 1384
1470 BUG_ON(clock_idx == CPUCLOCK_SCHED); 1385 BUG_ON(clock_idx == CPUCLOCK_SCHED);
1471 cpu_timer_sample_group(clock_idx, tsk, &now); 1386 cpu_timer_sample_group(clock_idx, tsk, &now);
1472 1387
1473 if (oldval) { 1388 if (oldval) {
1389 /*
1390 * We are setting itimer. The *oldval is absolute and we update
1391 * it to be relative, *newval argument is relative and we update
1392 * it to be absolute.
1393 */
1474 if (!cputime_eq(*oldval, cputime_zero)) { 1394 if (!cputime_eq(*oldval, cputime_zero)) {
1475 if (cputime_le(*oldval, now.cpu)) { 1395 if (cputime_le(*oldval, now.cpu)) {
1476 /* Just about to fire. */ 1396 /* Just about to fire. */
@@ -1483,33 +1403,21 @@ void set_process_cpu_timer(struct task_struct *tsk, unsigned int clock_idx,
1483 if (cputime_eq(*newval, cputime_zero)) 1403 if (cputime_eq(*newval, cputime_zero))
1484 return; 1404 return;
1485 *newval = cputime_add(*newval, now.cpu); 1405 *newval = cputime_add(*newval, now.cpu);
1486
1487 /*
1488 * If the RLIMIT_CPU timer will expire before the
1489 * ITIMER_PROF timer, we have nothing else to do.
1490 */
1491 if (tsk->signal->rlim[RLIMIT_CPU].rlim_cur
1492 < cputime_to_secs(*newval))
1493 return;
1494 } 1406 }
1495 1407
1496 /* 1408 /*
1497 * Check whether there are any process timers already set to fire 1409 * Update expiration cache if we are the earliest timer, or eventually
1498 * before this one. If so, we don't have anything more to do. 1410 * RLIMIT_CPU limit is earlier than prof_exp cpu timer expire.
1499 */ 1411 */
1500 head = &tsk->signal->cpu_timers[clock_idx]; 1412 switch (clock_idx) {
1501 if (list_empty(head) || 1413 case CPUCLOCK_PROF:
1502 cputime_ge(list_first_entry(head, 1414 if (expires_gt(tsk->signal->cputime_expires.prof_exp, *newval))
1503 struct cpu_timer_list, entry)->expires.cpu,
1504 *newval)) {
1505 switch (clock_idx) {
1506 case CPUCLOCK_PROF:
1507 tsk->signal->cputime_expires.prof_exp = *newval; 1415 tsk->signal->cputime_expires.prof_exp = *newval;
1508 break; 1416 break;
1509 case CPUCLOCK_VIRT: 1417 case CPUCLOCK_VIRT:
1418 if (expires_gt(tsk->signal->cputime_expires.virt_exp, *newval))
1510 tsk->signal->cputime_expires.virt_exp = *newval; 1419 tsk->signal->cputime_expires.virt_exp = *newval;
1511 break; 1420 break;
1512 }
1513 } 1421 }
1514} 1422}
1515 1423
diff --git a/kernel/power/Makefile b/kernel/power/Makefile
index 43191815f874..524e058dcf06 100644
--- a/kernel/power/Makefile
+++ b/kernel/power/Makefile
@@ -8,7 +8,8 @@ obj-$(CONFIG_PM_SLEEP) += console.o
8obj-$(CONFIG_FREEZER) += process.o 8obj-$(CONFIG_FREEZER) += process.o
9obj-$(CONFIG_SUSPEND) += suspend.o 9obj-$(CONFIG_SUSPEND) += suspend.o
10obj-$(CONFIG_PM_TEST_SUSPEND) += suspend_test.o 10obj-$(CONFIG_PM_TEST_SUSPEND) += suspend_test.o
11obj-$(CONFIG_HIBERNATION) += hibernate.o snapshot.o swap.o user.o 11obj-$(CONFIG_HIBERNATION) += hibernate.o snapshot.o swap.o user.o \
12 block_io.o
12obj-$(CONFIG_HIBERNATION_NVS) += hibernate_nvs.o 13obj-$(CONFIG_HIBERNATION_NVS) += hibernate_nvs.o
13 14
14obj-$(CONFIG_MAGIC_SYSRQ) += poweroff.o 15obj-$(CONFIG_MAGIC_SYSRQ) += poweroff.o
diff --git a/kernel/power/block_io.c b/kernel/power/block_io.c
new file mode 100644
index 000000000000..97024fd40cd5
--- /dev/null
+++ b/kernel/power/block_io.c
@@ -0,0 +1,103 @@
1/*
2 * This file provides functions for block I/O operations on swap/file.
3 *
4 * Copyright (C) 1998,2001-2005 Pavel Machek <pavel@ucw.cz>
5 * Copyright (C) 2006 Rafael J. Wysocki <rjw@sisk.pl>
6 *
7 * This file is released under the GPLv2.
8 */
9
10#include <linux/bio.h>
11#include <linux/kernel.h>
12#include <linux/pagemap.h>
13#include <linux/swap.h>
14
15#include "power.h"
16
17/**
18 * submit - submit BIO request.
19 * @rw: READ or WRITE.
20 * @off physical offset of page.
21 * @page: page we're reading or writing.
22 * @bio_chain: list of pending biod (for async reading)
23 *
24 * Straight from the textbook - allocate and initialize the bio.
25 * If we're reading, make sure the page is marked as dirty.
26 * Then submit it and, if @bio_chain == NULL, wait.
27 */
28static int submit(int rw, struct block_device *bdev, sector_t sector,
29 struct page *page, struct bio **bio_chain)
30{
31 const int bio_rw = rw | (1 << BIO_RW_SYNCIO) | (1 << BIO_RW_UNPLUG);
32 struct bio *bio;
33
34 bio = bio_alloc(__GFP_WAIT | __GFP_HIGH, 1);
35 bio->bi_sector = sector;
36 bio->bi_bdev = bdev;
37 bio->bi_end_io = end_swap_bio_read;
38
39 if (bio_add_page(bio, page, PAGE_SIZE, 0) < PAGE_SIZE) {
40 printk(KERN_ERR "PM: Adding page to bio failed at %llu\n",
41 (unsigned long long)sector);
42 bio_put(bio);
43 return -EFAULT;
44 }
45
46 lock_page(page);
47 bio_get(bio);
48
49 if (bio_chain == NULL) {
50 submit_bio(bio_rw, bio);
51 wait_on_page_locked(page);
52 if (rw == READ)
53 bio_set_pages_dirty(bio);
54 bio_put(bio);
55 } else {
56 if (rw == READ)
57 get_page(page); /* These pages are freed later */
58 bio->bi_private = *bio_chain;
59 *bio_chain = bio;
60 submit_bio(bio_rw, bio);
61 }
62 return 0;
63}
64
65int hib_bio_read_page(pgoff_t page_off, void *addr, struct bio **bio_chain)
66{
67 return submit(READ, hib_resume_bdev, page_off * (PAGE_SIZE >> 9),
68 virt_to_page(addr), bio_chain);
69}
70
71int hib_bio_write_page(pgoff_t page_off, void *addr, struct bio **bio_chain)
72{
73 return submit(WRITE, hib_resume_bdev, page_off * (PAGE_SIZE >> 9),
74 virt_to_page(addr), bio_chain);
75}
76
77int hib_wait_on_bio_chain(struct bio **bio_chain)
78{
79 struct bio *bio;
80 struct bio *next_bio;
81 int ret = 0;
82
83 if (bio_chain == NULL)
84 return 0;
85
86 bio = *bio_chain;
87 if (bio == NULL)
88 return 0;
89 while (bio) {
90 struct page *page;
91
92 next_bio = bio->bi_private;
93 page = bio->bi_io_vec[0].bv_page;
94 wait_on_page_locked(page);
95 if (!PageUptodate(page) || PageError(page))
96 ret = -EIO;
97 put_page(page);
98 bio_put(bio);
99 bio = next_bio;
100 }
101 *bio_chain = NULL;
102 return ret;
103}
diff --git a/kernel/power/power.h b/kernel/power/power.h
index 46c5a26630a3..006270fe382d 100644
--- a/kernel/power/power.h
+++ b/kernel/power/power.h
@@ -97,24 +97,12 @@ extern int hibernate_preallocate_memory(void);
97 */ 97 */
98 98
99struct snapshot_handle { 99struct snapshot_handle {
100 loff_t offset; /* number of the last byte ready for reading
101 * or writing in the sequence
102 */
103 unsigned int cur; /* number of the block of PAGE_SIZE bytes the 100 unsigned int cur; /* number of the block of PAGE_SIZE bytes the
104 * next operation will refer to (ie. current) 101 * next operation will refer to (ie. current)
105 */ 102 */
106 unsigned int cur_offset; /* offset with respect to the current
107 * block (for the next operation)
108 */
109 unsigned int prev; /* number of the block of PAGE_SIZE bytes that
110 * was the current one previously
111 */
112 void *buffer; /* address of the block to read from 103 void *buffer; /* address of the block to read from
113 * or write to 104 * or write to
114 */ 105 */
115 unsigned int buf_offset; /* location to read from or write to,
116 * given as a displacement from 'buffer'
117 */
118 int sync_read; /* Set to one to notify the caller of 106 int sync_read; /* Set to one to notify the caller of
119 * snapshot_write_next() that it may 107 * snapshot_write_next() that it may
120 * need to call wait_on_bio_chain() 108 * need to call wait_on_bio_chain()
@@ -125,12 +113,12 @@ struct snapshot_handle {
125 * snapshot_read_next()/snapshot_write_next() is allowed to 113 * snapshot_read_next()/snapshot_write_next() is allowed to
126 * read/write data after the function returns 114 * read/write data after the function returns
127 */ 115 */
128#define data_of(handle) ((handle).buffer + (handle).buf_offset) 116#define data_of(handle) ((handle).buffer)
129 117
130extern unsigned int snapshot_additional_pages(struct zone *zone); 118extern unsigned int snapshot_additional_pages(struct zone *zone);
131extern unsigned long snapshot_get_image_size(void); 119extern unsigned long snapshot_get_image_size(void);
132extern int snapshot_read_next(struct snapshot_handle *handle, size_t count); 120extern int snapshot_read_next(struct snapshot_handle *handle);
133extern int snapshot_write_next(struct snapshot_handle *handle, size_t count); 121extern int snapshot_write_next(struct snapshot_handle *handle);
134extern void snapshot_write_finalize(struct snapshot_handle *handle); 122extern void snapshot_write_finalize(struct snapshot_handle *handle);
135extern int snapshot_image_loaded(struct snapshot_handle *handle); 123extern int snapshot_image_loaded(struct snapshot_handle *handle);
136 124
@@ -154,6 +142,15 @@ extern int swsusp_read(unsigned int *flags_p);
154extern int swsusp_write(unsigned int flags); 142extern int swsusp_write(unsigned int flags);
155extern void swsusp_close(fmode_t); 143extern void swsusp_close(fmode_t);
156 144
145/* kernel/power/block_io.c */
146extern struct block_device *hib_resume_bdev;
147
148extern int hib_bio_read_page(pgoff_t page_off, void *addr,
149 struct bio **bio_chain);
150extern int hib_bio_write_page(pgoff_t page_off, void *addr,
151 struct bio **bio_chain);
152extern int hib_wait_on_bio_chain(struct bio **bio_chain);
153
157struct timeval; 154struct timeval;
158/* kernel/power/swsusp.c */ 155/* kernel/power/swsusp.c */
159extern void swsusp_show_speed(struct timeval *, struct timeval *, 156extern void swsusp_show_speed(struct timeval *, struct timeval *,
diff --git a/kernel/power/snapshot.c b/kernel/power/snapshot.c
index be861c26dda7..25ce010e9f8b 100644
--- a/kernel/power/snapshot.c
+++ b/kernel/power/snapshot.c
@@ -1604,14 +1604,9 @@ pack_pfns(unsigned long *buf, struct memory_bitmap *bm)
1604 * snapshot_handle structure. The structure gets updated and a pointer 1604 * snapshot_handle structure. The structure gets updated and a pointer
1605 * to it should be passed to this function every next time. 1605 * to it should be passed to this function every next time.
1606 * 1606 *
1607 * The @count parameter should contain the number of bytes the caller
1608 * wants to read from the snapshot. It must not be zero.
1609 *
1610 * On success the function returns a positive number. Then, the caller 1607 * On success the function returns a positive number. Then, the caller
1611 * is allowed to read up to the returned number of bytes from the memory 1608 * is allowed to read up to the returned number of bytes from the memory
1612 * location computed by the data_of() macro. The number returned 1609 * location computed by the data_of() macro.
1613 * may be smaller than @count, but this only happens if the read would
1614 * cross a page boundary otherwise.
1615 * 1610 *
1616 * The function returns 0 to indicate the end of data stream condition, 1611 * The function returns 0 to indicate the end of data stream condition,
1617 * and a negative number is returned on error. In such cases the 1612 * and a negative number is returned on error. In such cases the
@@ -1619,7 +1614,7 @@ pack_pfns(unsigned long *buf, struct memory_bitmap *bm)
1619 * any more. 1614 * any more.
1620 */ 1615 */
1621 1616
1622int snapshot_read_next(struct snapshot_handle *handle, size_t count) 1617int snapshot_read_next(struct snapshot_handle *handle)
1623{ 1618{
1624 if (handle->cur > nr_meta_pages + nr_copy_pages) 1619 if (handle->cur > nr_meta_pages + nr_copy_pages)
1625 return 0; 1620 return 0;
@@ -1630,7 +1625,7 @@ int snapshot_read_next(struct snapshot_handle *handle, size_t count)
1630 if (!buffer) 1625 if (!buffer)
1631 return -ENOMEM; 1626 return -ENOMEM;
1632 } 1627 }
1633 if (!handle->offset) { 1628 if (!handle->cur) {
1634 int error; 1629 int error;
1635 1630
1636 error = init_header((struct swsusp_info *)buffer); 1631 error = init_header((struct swsusp_info *)buffer);
@@ -1639,42 +1634,30 @@ int snapshot_read_next(struct snapshot_handle *handle, size_t count)
1639 handle->buffer = buffer; 1634 handle->buffer = buffer;
1640 memory_bm_position_reset(&orig_bm); 1635 memory_bm_position_reset(&orig_bm);
1641 memory_bm_position_reset(&copy_bm); 1636 memory_bm_position_reset(&copy_bm);
1642 } 1637 } else if (handle->cur <= nr_meta_pages) {
1643 if (handle->prev < handle->cur) { 1638 memset(buffer, 0, PAGE_SIZE);
1644 if (handle->cur <= nr_meta_pages) { 1639 pack_pfns(buffer, &orig_bm);
1645 memset(buffer, 0, PAGE_SIZE); 1640 } else {
1646 pack_pfns(buffer, &orig_bm); 1641 struct page *page;
1647 } else {
1648 struct page *page;
1649 1642
1650 page = pfn_to_page(memory_bm_next_pfn(&copy_bm)); 1643 page = pfn_to_page(memory_bm_next_pfn(&copy_bm));
1651 if (PageHighMem(page)) { 1644 if (PageHighMem(page)) {
1652 /* Highmem pages are copied to the buffer, 1645 /* Highmem pages are copied to the buffer,
1653 * because we can't return with a kmapped 1646 * because we can't return with a kmapped
1654 * highmem page (we may not be called again). 1647 * highmem page (we may not be called again).
1655 */ 1648 */
1656 void *kaddr; 1649 void *kaddr;
1657 1650
1658 kaddr = kmap_atomic(page, KM_USER0); 1651 kaddr = kmap_atomic(page, KM_USER0);
1659 memcpy(buffer, kaddr, PAGE_SIZE); 1652 memcpy(buffer, kaddr, PAGE_SIZE);
1660 kunmap_atomic(kaddr, KM_USER0); 1653 kunmap_atomic(kaddr, KM_USER0);
1661 handle->buffer = buffer; 1654 handle->buffer = buffer;
1662 } else { 1655 } else {
1663 handle->buffer = page_address(page); 1656 handle->buffer = page_address(page);
1664 }
1665 } 1657 }
1666 handle->prev = handle->cur;
1667 }
1668 handle->buf_offset = handle->cur_offset;
1669 if (handle->cur_offset + count >= PAGE_SIZE) {
1670 count = PAGE_SIZE - handle->cur_offset;
1671 handle->cur_offset = 0;
1672 handle->cur++;
1673 } else {
1674 handle->cur_offset += count;
1675 } 1658 }
1676 handle->offset += count; 1659 handle->cur++;
1677 return count; 1660 return PAGE_SIZE;
1678} 1661}
1679 1662
1680/** 1663/**
@@ -2133,14 +2116,9 @@ static void *get_buffer(struct memory_bitmap *bm, struct chain_allocator *ca)
2133 * snapshot_handle structure. The structure gets updated and a pointer 2116 * snapshot_handle structure. The structure gets updated and a pointer
2134 * to it should be passed to this function every next time. 2117 * to it should be passed to this function every next time.
2135 * 2118 *
2136 * The @count parameter should contain the number of bytes the caller
2137 * wants to write to the image. It must not be zero.
2138 *
2139 * On success the function returns a positive number. Then, the caller 2119 * On success the function returns a positive number. Then, the caller
2140 * is allowed to write up to the returned number of bytes to the memory 2120 * is allowed to write up to the returned number of bytes to the memory
2141 * location computed by the data_of() macro. The number returned 2121 * location computed by the data_of() macro.
2142 * may be smaller than @count, but this only happens if the write would
2143 * cross a page boundary otherwise.
2144 * 2122 *
2145 * The function returns 0 to indicate the "end of file" condition, 2123 * The function returns 0 to indicate the "end of file" condition,
2146 * and a negative number is returned on error. In such cases the 2124 * and a negative number is returned on error. In such cases the
@@ -2148,16 +2126,18 @@ static void *get_buffer(struct memory_bitmap *bm, struct chain_allocator *ca)
2148 * any more. 2126 * any more.
2149 */ 2127 */
2150 2128
2151int snapshot_write_next(struct snapshot_handle *handle, size_t count) 2129int snapshot_write_next(struct snapshot_handle *handle)
2152{ 2130{
2153 static struct chain_allocator ca; 2131 static struct chain_allocator ca;
2154 int error = 0; 2132 int error = 0;
2155 2133
2156 /* Check if we have already loaded the entire image */ 2134 /* Check if we have already loaded the entire image */
2157 if (handle->prev && handle->cur > nr_meta_pages + nr_copy_pages) 2135 if (handle->cur > 1 && handle->cur > nr_meta_pages + nr_copy_pages)
2158 return 0; 2136 return 0;
2159 2137
2160 if (handle->offset == 0) { 2138 handle->sync_read = 1;
2139
2140 if (!handle->cur) {
2161 if (!buffer) 2141 if (!buffer)
2162 /* This makes the buffer be freed by swsusp_free() */ 2142 /* This makes the buffer be freed by swsusp_free() */
2163 buffer = get_image_page(GFP_ATOMIC, PG_ANY); 2143 buffer = get_image_page(GFP_ATOMIC, PG_ANY);
@@ -2166,56 +2146,43 @@ int snapshot_write_next(struct snapshot_handle *handle, size_t count)
2166 return -ENOMEM; 2146 return -ENOMEM;
2167 2147
2168 handle->buffer = buffer; 2148 handle->buffer = buffer;
2169 } 2149 } else if (handle->cur == 1) {
2170 handle->sync_read = 1; 2150 error = load_header(buffer);
2171 if (handle->prev < handle->cur) { 2151 if (error)
2172 if (handle->prev == 0) { 2152 return error;
2173 error = load_header(buffer);
2174 if (error)
2175 return error;
2176 2153
2177 error = memory_bm_create(&copy_bm, GFP_ATOMIC, PG_ANY); 2154 error = memory_bm_create(&copy_bm, GFP_ATOMIC, PG_ANY);
2178 if (error) 2155 if (error)
2179 return error; 2156 return error;
2157
2158 } else if (handle->cur <= nr_meta_pages + 1) {
2159 error = unpack_orig_pfns(buffer, &copy_bm);
2160 if (error)
2161 return error;
2180 2162
2181 } else if (handle->prev <= nr_meta_pages) { 2163 if (handle->cur == nr_meta_pages + 1) {
2182 error = unpack_orig_pfns(buffer, &copy_bm); 2164 error = prepare_image(&orig_bm, &copy_bm);
2183 if (error) 2165 if (error)
2184 return error; 2166 return error;
2185 2167
2186 if (handle->prev == nr_meta_pages) { 2168 chain_init(&ca, GFP_ATOMIC, PG_SAFE);
2187 error = prepare_image(&orig_bm, &copy_bm); 2169 memory_bm_position_reset(&orig_bm);
2188 if (error) 2170 restore_pblist = NULL;
2189 return error;
2190
2191 chain_init(&ca, GFP_ATOMIC, PG_SAFE);
2192 memory_bm_position_reset(&orig_bm);
2193 restore_pblist = NULL;
2194 handle->buffer = get_buffer(&orig_bm, &ca);
2195 handle->sync_read = 0;
2196 if (IS_ERR(handle->buffer))
2197 return PTR_ERR(handle->buffer);
2198 }
2199 } else {
2200 copy_last_highmem_page();
2201 handle->buffer = get_buffer(&orig_bm, &ca); 2171 handle->buffer = get_buffer(&orig_bm, &ca);
2172 handle->sync_read = 0;
2202 if (IS_ERR(handle->buffer)) 2173 if (IS_ERR(handle->buffer))
2203 return PTR_ERR(handle->buffer); 2174 return PTR_ERR(handle->buffer);
2204 if (handle->buffer != buffer)
2205 handle->sync_read = 0;
2206 } 2175 }
2207 handle->prev = handle->cur;
2208 }
2209 handle->buf_offset = handle->cur_offset;
2210 if (handle->cur_offset + count >= PAGE_SIZE) {
2211 count = PAGE_SIZE - handle->cur_offset;
2212 handle->cur_offset = 0;
2213 handle->cur++;
2214 } else { 2176 } else {
2215 handle->cur_offset += count; 2177 copy_last_highmem_page();
2178 handle->buffer = get_buffer(&orig_bm, &ca);
2179 if (IS_ERR(handle->buffer))
2180 return PTR_ERR(handle->buffer);
2181 if (handle->buffer != buffer)
2182 handle->sync_read = 0;
2216 } 2183 }
2217 handle->offset += count; 2184 handle->cur++;
2218 return count; 2185 return PAGE_SIZE;
2219} 2186}
2220 2187
2221/** 2188/**
@@ -2230,7 +2197,7 @@ void snapshot_write_finalize(struct snapshot_handle *handle)
2230{ 2197{
2231 copy_last_highmem_page(); 2198 copy_last_highmem_page();
2232 /* Free only if we have loaded the image entirely */ 2199 /* Free only if we have loaded the image entirely */
2233 if (handle->prev && handle->cur > nr_meta_pages + nr_copy_pages) { 2200 if (handle->cur > 1 && handle->cur > nr_meta_pages + nr_copy_pages) {
2234 memory_bm_free(&orig_bm, PG_UNSAFE_CLEAR); 2201 memory_bm_free(&orig_bm, PG_UNSAFE_CLEAR);
2235 free_highmem_data(); 2202 free_highmem_data();
2236 } 2203 }
diff --git a/kernel/power/swap.c b/kernel/power/swap.c
index 66824d71983a..b0bb21778391 100644
--- a/kernel/power/swap.c
+++ b/kernel/power/swap.c
@@ -29,6 +29,40 @@
29 29
30#define SWSUSP_SIG "S1SUSPEND" 30#define SWSUSP_SIG "S1SUSPEND"
31 31
32/*
33 * The swap map is a data structure used for keeping track of each page
34 * written to a swap partition. It consists of many swap_map_page
35 * structures that contain each an array of MAP_PAGE_SIZE swap entries.
36 * These structures are stored on the swap and linked together with the
37 * help of the .next_swap member.
38 *
39 * The swap map is created during suspend. The swap map pages are
40 * allocated and populated one at a time, so we only need one memory
41 * page to set up the entire structure.
42 *
43 * During resume we also only need to use one swap_map_page structure
44 * at a time.
45 */
46
47#define MAP_PAGE_ENTRIES (PAGE_SIZE / sizeof(sector_t) - 1)
48
49struct swap_map_page {
50 sector_t entries[MAP_PAGE_ENTRIES];
51 sector_t next_swap;
52};
53
54/**
55 * The swap_map_handle structure is used for handling swap in
56 * a file-alike way
57 */
58
59struct swap_map_handle {
60 struct swap_map_page *cur;
61 sector_t cur_swap;
62 sector_t first_sector;
63 unsigned int k;
64};
65
32struct swsusp_header { 66struct swsusp_header {
33 char reserved[PAGE_SIZE - 20 - sizeof(sector_t) - sizeof(int)]; 67 char reserved[PAGE_SIZE - 20 - sizeof(sector_t) - sizeof(int)];
34 sector_t image; 68 sector_t image;
@@ -145,110 +179,24 @@ int swsusp_swap_in_use(void)
145 */ 179 */
146 180
147static unsigned short root_swap = 0xffff; 181static unsigned short root_swap = 0xffff;
148static struct block_device *resume_bdev; 182struct block_device *hib_resume_bdev;
149
150/**
151 * submit - submit BIO request.
152 * @rw: READ or WRITE.
153 * @off physical offset of page.
154 * @page: page we're reading or writing.
155 * @bio_chain: list of pending biod (for async reading)
156 *
157 * Straight from the textbook - allocate and initialize the bio.
158 * If we're reading, make sure the page is marked as dirty.
159 * Then submit it and, if @bio_chain == NULL, wait.
160 */
161static int submit(int rw, pgoff_t page_off, struct page *page,
162 struct bio **bio_chain)
163{
164 const int bio_rw = rw | (1 << BIO_RW_SYNCIO) | (1 << BIO_RW_UNPLUG);
165 struct bio *bio;
166
167 bio = bio_alloc(__GFP_WAIT | __GFP_HIGH, 1);
168 bio->bi_sector = page_off * (PAGE_SIZE >> 9);
169 bio->bi_bdev = resume_bdev;
170 bio->bi_end_io = end_swap_bio_read;
171
172 if (bio_add_page(bio, page, PAGE_SIZE, 0) < PAGE_SIZE) {
173 printk(KERN_ERR "PM: Adding page to bio failed at %ld\n",
174 page_off);
175 bio_put(bio);
176 return -EFAULT;
177 }
178
179 lock_page(page);
180 bio_get(bio);
181
182 if (bio_chain == NULL) {
183 submit_bio(bio_rw, bio);
184 wait_on_page_locked(page);
185 if (rw == READ)
186 bio_set_pages_dirty(bio);
187 bio_put(bio);
188 } else {
189 if (rw == READ)
190 get_page(page); /* These pages are freed later */
191 bio->bi_private = *bio_chain;
192 *bio_chain = bio;
193 submit_bio(bio_rw, bio);
194 }
195 return 0;
196}
197
198static int bio_read_page(pgoff_t page_off, void *addr, struct bio **bio_chain)
199{
200 return submit(READ, page_off, virt_to_page(addr), bio_chain);
201}
202
203static int bio_write_page(pgoff_t page_off, void *addr, struct bio **bio_chain)
204{
205 return submit(WRITE, page_off, virt_to_page(addr), bio_chain);
206}
207
208static int wait_on_bio_chain(struct bio **bio_chain)
209{
210 struct bio *bio;
211 struct bio *next_bio;
212 int ret = 0;
213
214 if (bio_chain == NULL)
215 return 0;
216
217 bio = *bio_chain;
218 if (bio == NULL)
219 return 0;
220 while (bio) {
221 struct page *page;
222
223 next_bio = bio->bi_private;
224 page = bio->bi_io_vec[0].bv_page;
225 wait_on_page_locked(page);
226 if (!PageUptodate(page) || PageError(page))
227 ret = -EIO;
228 put_page(page);
229 bio_put(bio);
230 bio = next_bio;
231 }
232 *bio_chain = NULL;
233 return ret;
234}
235 183
236/* 184/*
237 * Saving part 185 * Saving part
238 */ 186 */
239 187
240static int mark_swapfiles(sector_t start, unsigned int flags) 188static int mark_swapfiles(struct swap_map_handle *handle, unsigned int flags)
241{ 189{
242 int error; 190 int error;
243 191
244 bio_read_page(swsusp_resume_block, swsusp_header, NULL); 192 hib_bio_read_page(swsusp_resume_block, swsusp_header, NULL);
245 if (!memcmp("SWAP-SPACE",swsusp_header->sig, 10) || 193 if (!memcmp("SWAP-SPACE",swsusp_header->sig, 10) ||
246 !memcmp("SWAPSPACE2",swsusp_header->sig, 10)) { 194 !memcmp("SWAPSPACE2",swsusp_header->sig, 10)) {
247 memcpy(swsusp_header->orig_sig,swsusp_header->sig, 10); 195 memcpy(swsusp_header->orig_sig,swsusp_header->sig, 10);
248 memcpy(swsusp_header->sig,SWSUSP_SIG, 10); 196 memcpy(swsusp_header->sig,SWSUSP_SIG, 10);
249 swsusp_header->image = start; 197 swsusp_header->image = handle->first_sector;
250 swsusp_header->flags = flags; 198 swsusp_header->flags = flags;
251 error = bio_write_page(swsusp_resume_block, 199 error = hib_bio_write_page(swsusp_resume_block,
252 swsusp_header, NULL); 200 swsusp_header, NULL);
253 } else { 201 } else {
254 printk(KERN_ERR "PM: Swap header not found!\n"); 202 printk(KERN_ERR "PM: Swap header not found!\n");
@@ -260,25 +208,26 @@ static int mark_swapfiles(sector_t start, unsigned int flags)
260/** 208/**
261 * swsusp_swap_check - check if the resume device is a swap device 209 * swsusp_swap_check - check if the resume device is a swap device
262 * and get its index (if so) 210 * and get its index (if so)
211 *
212 * This is called before saving image
263 */ 213 */
264 214static int swsusp_swap_check(void)
265static int swsusp_swap_check(void) /* This is called before saving image */
266{ 215{
267 int res; 216 int res;
268 217
269 res = swap_type_of(swsusp_resume_device, swsusp_resume_block, 218 res = swap_type_of(swsusp_resume_device, swsusp_resume_block,
270 &resume_bdev); 219 &hib_resume_bdev);
271 if (res < 0) 220 if (res < 0)
272 return res; 221 return res;
273 222
274 root_swap = res; 223 root_swap = res;
275 res = blkdev_get(resume_bdev, FMODE_WRITE); 224 res = blkdev_get(hib_resume_bdev, FMODE_WRITE);
276 if (res) 225 if (res)
277 return res; 226 return res;
278 227
279 res = set_blocksize(resume_bdev, PAGE_SIZE); 228 res = set_blocksize(hib_resume_bdev, PAGE_SIZE);
280 if (res < 0) 229 if (res < 0)
281 blkdev_put(resume_bdev, FMODE_WRITE); 230 blkdev_put(hib_resume_bdev, FMODE_WRITE);
282 231
283 return res; 232 return res;
284} 233}
@@ -309,42 +258,9 @@ static int write_page(void *buf, sector_t offset, struct bio **bio_chain)
309 } else { 258 } else {
310 src = buf; 259 src = buf;
311 } 260 }
312 return bio_write_page(offset, src, bio_chain); 261 return hib_bio_write_page(offset, src, bio_chain);
313} 262}
314 263
315/*
316 * The swap map is a data structure used for keeping track of each page
317 * written to a swap partition. It consists of many swap_map_page
318 * structures that contain each an array of MAP_PAGE_SIZE swap entries.
319 * These structures are stored on the swap and linked together with the
320 * help of the .next_swap member.
321 *
322 * The swap map is created during suspend. The swap map pages are
323 * allocated and populated one at a time, so we only need one memory
324 * page to set up the entire structure.
325 *
326 * During resume we also only need to use one swap_map_page structure
327 * at a time.
328 */
329
330#define MAP_PAGE_ENTRIES (PAGE_SIZE / sizeof(sector_t) - 1)
331
332struct swap_map_page {
333 sector_t entries[MAP_PAGE_ENTRIES];
334 sector_t next_swap;
335};
336
337/**
338 * The swap_map_handle structure is used for handling swap in
339 * a file-alike way
340 */
341
342struct swap_map_handle {
343 struct swap_map_page *cur;
344 sector_t cur_swap;
345 unsigned int k;
346};
347
348static void release_swap_writer(struct swap_map_handle *handle) 264static void release_swap_writer(struct swap_map_handle *handle)
349{ 265{
350 if (handle->cur) 266 if (handle->cur)
@@ -354,16 +270,33 @@ static void release_swap_writer(struct swap_map_handle *handle)
354 270
355static int get_swap_writer(struct swap_map_handle *handle) 271static int get_swap_writer(struct swap_map_handle *handle)
356{ 272{
273 int ret;
274
275 ret = swsusp_swap_check();
276 if (ret) {
277 if (ret != -ENOSPC)
278 printk(KERN_ERR "PM: Cannot find swap device, try "
279 "swapon -a.\n");
280 return ret;
281 }
357 handle->cur = (struct swap_map_page *)get_zeroed_page(GFP_KERNEL); 282 handle->cur = (struct swap_map_page *)get_zeroed_page(GFP_KERNEL);
358 if (!handle->cur) 283 if (!handle->cur) {
359 return -ENOMEM; 284 ret = -ENOMEM;
285 goto err_close;
286 }
360 handle->cur_swap = alloc_swapdev_block(root_swap); 287 handle->cur_swap = alloc_swapdev_block(root_swap);
361 if (!handle->cur_swap) { 288 if (!handle->cur_swap) {
362 release_swap_writer(handle); 289 ret = -ENOSPC;
363 return -ENOSPC; 290 goto err_rel;
364 } 291 }
365 handle->k = 0; 292 handle->k = 0;
293 handle->first_sector = handle->cur_swap;
366 return 0; 294 return 0;
295err_rel:
296 release_swap_writer(handle);
297err_close:
298 swsusp_close(FMODE_WRITE);
299 return ret;
367} 300}
368 301
369static int swap_write_page(struct swap_map_handle *handle, void *buf, 302static int swap_write_page(struct swap_map_handle *handle, void *buf,
@@ -380,7 +313,7 @@ static int swap_write_page(struct swap_map_handle *handle, void *buf,
380 return error; 313 return error;
381 handle->cur->entries[handle->k++] = offset; 314 handle->cur->entries[handle->k++] = offset;
382 if (handle->k >= MAP_PAGE_ENTRIES) { 315 if (handle->k >= MAP_PAGE_ENTRIES) {
383 error = wait_on_bio_chain(bio_chain); 316 error = hib_wait_on_bio_chain(bio_chain);
384 if (error) 317 if (error)
385 goto out; 318 goto out;
386 offset = alloc_swapdev_block(root_swap); 319 offset = alloc_swapdev_block(root_swap);
@@ -406,6 +339,24 @@ static int flush_swap_writer(struct swap_map_handle *handle)
406 return -EINVAL; 339 return -EINVAL;
407} 340}
408 341
342static int swap_writer_finish(struct swap_map_handle *handle,
343 unsigned int flags, int error)
344{
345 if (!error) {
346 flush_swap_writer(handle);
347 printk(KERN_INFO "PM: S");
348 error = mark_swapfiles(handle, flags);
349 printk("|\n");
350 }
351
352 if (error)
353 free_all_swap_pages(root_swap);
354 release_swap_writer(handle);
355 swsusp_close(FMODE_WRITE);
356
357 return error;
358}
359
409/** 360/**
410 * save_image - save the suspend image data 361 * save_image - save the suspend image data
411 */ 362 */
@@ -431,7 +382,7 @@ static int save_image(struct swap_map_handle *handle,
431 bio = NULL; 382 bio = NULL;
432 do_gettimeofday(&start); 383 do_gettimeofday(&start);
433 while (1) { 384 while (1) {
434 ret = snapshot_read_next(snapshot, PAGE_SIZE); 385 ret = snapshot_read_next(snapshot);
435 if (ret <= 0) 386 if (ret <= 0)
436 break; 387 break;
437 ret = swap_write_page(handle, data_of(*snapshot), &bio); 388 ret = swap_write_page(handle, data_of(*snapshot), &bio);
@@ -441,7 +392,7 @@ static int save_image(struct swap_map_handle *handle,
441 printk(KERN_CONT "\b\b\b\b%3d%%", nr_pages / m); 392 printk(KERN_CONT "\b\b\b\b%3d%%", nr_pages / m);
442 nr_pages++; 393 nr_pages++;
443 } 394 }
444 err2 = wait_on_bio_chain(&bio); 395 err2 = hib_wait_on_bio_chain(&bio);
445 do_gettimeofday(&stop); 396 do_gettimeofday(&stop);
446 if (!ret) 397 if (!ret)
447 ret = err2; 398 ret = err2;
@@ -483,50 +434,34 @@ int swsusp_write(unsigned int flags)
483 struct swap_map_handle handle; 434 struct swap_map_handle handle;
484 struct snapshot_handle snapshot; 435 struct snapshot_handle snapshot;
485 struct swsusp_info *header; 436 struct swsusp_info *header;
437 unsigned long pages;
486 int error; 438 int error;
487 439
488 error = swsusp_swap_check(); 440 pages = snapshot_get_image_size();
441 error = get_swap_writer(&handle);
489 if (error) { 442 if (error) {
490 printk(KERN_ERR "PM: Cannot find swap device, try " 443 printk(KERN_ERR "PM: Cannot get swap writer\n");
491 "swapon -a.\n");
492 return error; 444 return error;
493 } 445 }
446 if (!enough_swap(pages)) {
447 printk(KERN_ERR "PM: Not enough free swap\n");
448 error = -ENOSPC;
449 goto out_finish;
450 }
494 memset(&snapshot, 0, sizeof(struct snapshot_handle)); 451 memset(&snapshot, 0, sizeof(struct snapshot_handle));
495 error = snapshot_read_next(&snapshot, PAGE_SIZE); 452 error = snapshot_read_next(&snapshot);
496 if (error < PAGE_SIZE) { 453 if (error < PAGE_SIZE) {
497 if (error >= 0) 454 if (error >= 0)
498 error = -EFAULT; 455 error = -EFAULT;
499 456
500 goto out; 457 goto out_finish;
501 } 458 }
502 header = (struct swsusp_info *)data_of(snapshot); 459 header = (struct swsusp_info *)data_of(snapshot);
503 if (!enough_swap(header->pages)) { 460 error = swap_write_page(&handle, header, NULL);
504 printk(KERN_ERR "PM: Not enough free swap\n"); 461 if (!error)
505 error = -ENOSPC; 462 error = save_image(&handle, &snapshot, pages - 1);
506 goto out; 463out_finish:
507 } 464 error = swap_writer_finish(&handle, flags, error);
508 error = get_swap_writer(&handle);
509 if (!error) {
510 sector_t start = handle.cur_swap;
511
512 error = swap_write_page(&handle, header, NULL);
513 if (!error)
514 error = save_image(&handle, &snapshot,
515 header->pages - 1);
516
517 if (!error) {
518 flush_swap_writer(&handle);
519 printk(KERN_INFO "PM: S");
520 error = mark_swapfiles(start, flags);
521 printk("|\n");
522 }
523 }
524 if (error)
525 free_all_swap_pages(root_swap);
526
527 release_swap_writer(&handle);
528 out:
529 swsusp_close(FMODE_WRITE);
530 return error; 465 return error;
531} 466}
532 467
@@ -542,18 +477,21 @@ static void release_swap_reader(struct swap_map_handle *handle)
542 handle->cur = NULL; 477 handle->cur = NULL;
543} 478}
544 479
545static int get_swap_reader(struct swap_map_handle *handle, sector_t start) 480static int get_swap_reader(struct swap_map_handle *handle,
481 unsigned int *flags_p)
546{ 482{
547 int error; 483 int error;
548 484
549 if (!start) 485 *flags_p = swsusp_header->flags;
486
487 if (!swsusp_header->image) /* how can this happen? */
550 return -EINVAL; 488 return -EINVAL;
551 489
552 handle->cur = (struct swap_map_page *)get_zeroed_page(__GFP_WAIT | __GFP_HIGH); 490 handle->cur = (struct swap_map_page *)get_zeroed_page(__GFP_WAIT | __GFP_HIGH);
553 if (!handle->cur) 491 if (!handle->cur)
554 return -ENOMEM; 492 return -ENOMEM;
555 493
556 error = bio_read_page(start, handle->cur, NULL); 494 error = hib_bio_read_page(swsusp_header->image, handle->cur, NULL);
557 if (error) { 495 if (error) {
558 release_swap_reader(handle); 496 release_swap_reader(handle);
559 return error; 497 return error;
@@ -573,21 +511,28 @@ static int swap_read_page(struct swap_map_handle *handle, void *buf,
573 offset = handle->cur->entries[handle->k]; 511 offset = handle->cur->entries[handle->k];
574 if (!offset) 512 if (!offset)
575 return -EFAULT; 513 return -EFAULT;
576 error = bio_read_page(offset, buf, bio_chain); 514 error = hib_bio_read_page(offset, buf, bio_chain);
577 if (error) 515 if (error)
578 return error; 516 return error;
579 if (++handle->k >= MAP_PAGE_ENTRIES) { 517 if (++handle->k >= MAP_PAGE_ENTRIES) {
580 error = wait_on_bio_chain(bio_chain); 518 error = hib_wait_on_bio_chain(bio_chain);
581 handle->k = 0; 519 handle->k = 0;
582 offset = handle->cur->next_swap; 520 offset = handle->cur->next_swap;
583 if (!offset) 521 if (!offset)
584 release_swap_reader(handle); 522 release_swap_reader(handle);
585 else if (!error) 523 else if (!error)
586 error = bio_read_page(offset, handle->cur, NULL); 524 error = hib_bio_read_page(offset, handle->cur, NULL);
587 } 525 }
588 return error; 526 return error;
589} 527}
590 528
529static int swap_reader_finish(struct swap_map_handle *handle)
530{
531 release_swap_reader(handle);
532
533 return 0;
534}
535
591/** 536/**
592 * load_image - load the image using the swap map handle 537 * load_image - load the image using the swap map handle
593 * @handle and the snapshot handle @snapshot 538 * @handle and the snapshot handle @snapshot
@@ -615,21 +560,21 @@ static int load_image(struct swap_map_handle *handle,
615 bio = NULL; 560 bio = NULL;
616 do_gettimeofday(&start); 561 do_gettimeofday(&start);
617 for ( ; ; ) { 562 for ( ; ; ) {
618 error = snapshot_write_next(snapshot, PAGE_SIZE); 563 error = snapshot_write_next(snapshot);
619 if (error <= 0) 564 if (error <= 0)
620 break; 565 break;
621 error = swap_read_page(handle, data_of(*snapshot), &bio); 566 error = swap_read_page(handle, data_of(*snapshot), &bio);
622 if (error) 567 if (error)
623 break; 568 break;
624 if (snapshot->sync_read) 569 if (snapshot->sync_read)
625 error = wait_on_bio_chain(&bio); 570 error = hib_wait_on_bio_chain(&bio);
626 if (error) 571 if (error)
627 break; 572 break;
628 if (!(nr_pages % m)) 573 if (!(nr_pages % m))
629 printk("\b\b\b\b%3d%%", nr_pages / m); 574 printk("\b\b\b\b%3d%%", nr_pages / m);
630 nr_pages++; 575 nr_pages++;
631 } 576 }
632 err2 = wait_on_bio_chain(&bio); 577 err2 = hib_wait_on_bio_chain(&bio);
633 do_gettimeofday(&stop); 578 do_gettimeofday(&stop);
634 if (!error) 579 if (!error)
635 error = err2; 580 error = err2;
@@ -657,20 +602,20 @@ int swsusp_read(unsigned int *flags_p)
657 struct snapshot_handle snapshot; 602 struct snapshot_handle snapshot;
658 struct swsusp_info *header; 603 struct swsusp_info *header;
659 604
660 *flags_p = swsusp_header->flags;
661
662 memset(&snapshot, 0, sizeof(struct snapshot_handle)); 605 memset(&snapshot, 0, sizeof(struct snapshot_handle));
663 error = snapshot_write_next(&snapshot, PAGE_SIZE); 606 error = snapshot_write_next(&snapshot);
664 if (error < PAGE_SIZE) 607 if (error < PAGE_SIZE)
665 return error < 0 ? error : -EFAULT; 608 return error < 0 ? error : -EFAULT;
666 header = (struct swsusp_info *)data_of(snapshot); 609 header = (struct swsusp_info *)data_of(snapshot);
667 error = get_swap_reader(&handle, swsusp_header->image); 610 error = get_swap_reader(&handle, flags_p);
611 if (error)
612 goto end;
668 if (!error) 613 if (!error)
669 error = swap_read_page(&handle, header, NULL); 614 error = swap_read_page(&handle, header, NULL);
670 if (!error) 615 if (!error)
671 error = load_image(&handle, &snapshot, header->pages - 1); 616 error = load_image(&handle, &snapshot, header->pages - 1);
672 release_swap_reader(&handle); 617 swap_reader_finish(&handle);
673 618end:
674 if (!error) 619 if (!error)
675 pr_debug("PM: Image successfully loaded\n"); 620 pr_debug("PM: Image successfully loaded\n");
676 else 621 else
@@ -686,11 +631,11 @@ int swsusp_check(void)
686{ 631{
687 int error; 632 int error;
688 633
689 resume_bdev = open_by_devnum(swsusp_resume_device, FMODE_READ); 634 hib_resume_bdev = open_by_devnum(swsusp_resume_device, FMODE_READ);
690 if (!IS_ERR(resume_bdev)) { 635 if (!IS_ERR(hib_resume_bdev)) {
691 set_blocksize(resume_bdev, PAGE_SIZE); 636 set_blocksize(hib_resume_bdev, PAGE_SIZE);
692 memset(swsusp_header, 0, PAGE_SIZE); 637 memset(swsusp_header, 0, PAGE_SIZE);
693 error = bio_read_page(swsusp_resume_block, 638 error = hib_bio_read_page(swsusp_resume_block,
694 swsusp_header, NULL); 639 swsusp_header, NULL);
695 if (error) 640 if (error)
696 goto put; 641 goto put;
@@ -698,7 +643,7 @@ int swsusp_check(void)
698 if (!memcmp(SWSUSP_SIG, swsusp_header->sig, 10)) { 643 if (!memcmp(SWSUSP_SIG, swsusp_header->sig, 10)) {
699 memcpy(swsusp_header->sig, swsusp_header->orig_sig, 10); 644 memcpy(swsusp_header->sig, swsusp_header->orig_sig, 10);
700 /* Reset swap signature now */ 645 /* Reset swap signature now */
701 error = bio_write_page(swsusp_resume_block, 646 error = hib_bio_write_page(swsusp_resume_block,
702 swsusp_header, NULL); 647 swsusp_header, NULL);
703 } else { 648 } else {
704 error = -EINVAL; 649 error = -EINVAL;
@@ -706,11 +651,11 @@ int swsusp_check(void)
706 651
707put: 652put:
708 if (error) 653 if (error)
709 blkdev_put(resume_bdev, FMODE_READ); 654 blkdev_put(hib_resume_bdev, FMODE_READ);
710 else 655 else
711 pr_debug("PM: Signature found, resuming\n"); 656 pr_debug("PM: Signature found, resuming\n");
712 } else { 657 } else {
713 error = PTR_ERR(resume_bdev); 658 error = PTR_ERR(hib_resume_bdev);
714 } 659 }
715 660
716 if (error) 661 if (error)
@@ -725,12 +670,12 @@ put:
725 670
726void swsusp_close(fmode_t mode) 671void swsusp_close(fmode_t mode)
727{ 672{
728 if (IS_ERR(resume_bdev)) { 673 if (IS_ERR(hib_resume_bdev)) {
729 pr_debug("PM: Image device not initialised\n"); 674 pr_debug("PM: Image device not initialised\n");
730 return; 675 return;
731 } 676 }
732 677
733 blkdev_put(resume_bdev, mode); 678 blkdev_put(hib_resume_bdev, mode);
734} 679}
735 680
736static int swsusp_header_init(void) 681static int swsusp_header_init(void)
diff --git a/kernel/power/user.c b/kernel/power/user.c
index a8c96212bc1b..e819e17877ca 100644
--- a/kernel/power/user.c
+++ b/kernel/power/user.c
@@ -151,6 +151,7 @@ static ssize_t snapshot_read(struct file *filp, char __user *buf,
151{ 151{
152 struct snapshot_data *data; 152 struct snapshot_data *data;
153 ssize_t res; 153 ssize_t res;
154 loff_t pg_offp = *offp & ~PAGE_MASK;
154 155
155 mutex_lock(&pm_mutex); 156 mutex_lock(&pm_mutex);
156 157
@@ -159,14 +160,19 @@ static ssize_t snapshot_read(struct file *filp, char __user *buf,
159 res = -ENODATA; 160 res = -ENODATA;
160 goto Unlock; 161 goto Unlock;
161 } 162 }
162 res = snapshot_read_next(&data->handle, count); 163 if (!pg_offp) { /* on page boundary? */
163 if (res > 0) { 164 res = snapshot_read_next(&data->handle);
164 if (copy_to_user(buf, data_of(data->handle), res)) 165 if (res <= 0)
165 res = -EFAULT; 166 goto Unlock;
166 else 167 } else {
167 *offp = data->handle.offset; 168 res = PAGE_SIZE - pg_offp;
168 } 169 }
169 170
171 res = simple_read_from_buffer(buf, count, &pg_offp,
172 data_of(data->handle), res);
173 if (res > 0)
174 *offp += res;
175
170 Unlock: 176 Unlock:
171 mutex_unlock(&pm_mutex); 177 mutex_unlock(&pm_mutex);
172 178
@@ -178,18 +184,25 @@ static ssize_t snapshot_write(struct file *filp, const char __user *buf,
178{ 184{
179 struct snapshot_data *data; 185 struct snapshot_data *data;
180 ssize_t res; 186 ssize_t res;
187 loff_t pg_offp = *offp & ~PAGE_MASK;
181 188
182 mutex_lock(&pm_mutex); 189 mutex_lock(&pm_mutex);
183 190
184 data = filp->private_data; 191 data = filp->private_data;
185 res = snapshot_write_next(&data->handle, count); 192
186 if (res > 0) { 193 if (!pg_offp) {
187 if (copy_from_user(data_of(data->handle), buf, res)) 194 res = snapshot_write_next(&data->handle);
188 res = -EFAULT; 195 if (res <= 0)
189 else 196 goto unlock;
190 *offp = data->handle.offset; 197 } else {
198 res = PAGE_SIZE - pg_offp;
191 } 199 }
192 200
201 res = simple_write_to_buffer(data_of(data->handle), res, &pg_offp,
202 buf, count);
203 if (res > 0)
204 *offp += res;
205unlock:
193 mutex_unlock(&pm_mutex); 206 mutex_unlock(&pm_mutex);
194 207
195 return res; 208 return res;
diff --git a/kernel/printk.c b/kernel/printk.c
index 75077ad0b537..444b770c9595 100644
--- a/kernel/printk.c
+++ b/kernel/printk.c
@@ -33,6 +33,7 @@
33#include <linux/bootmem.h> 33#include <linux/bootmem.h>
34#include <linux/syscalls.h> 34#include <linux/syscalls.h>
35#include <linux/kexec.h> 35#include <linux/kexec.h>
36#include <linux/kdb.h>
36#include <linux/ratelimit.h> 37#include <linux/ratelimit.h>
37#include <linux/kmsg_dump.h> 38#include <linux/kmsg_dump.h>
38#include <linux/syslog.h> 39#include <linux/syslog.h>
@@ -413,6 +414,22 @@ SYSCALL_DEFINE3(syslog, int, type, char __user *, buf, int, len)
413 return do_syslog(type, buf, len, SYSLOG_FROM_CALL); 414 return do_syslog(type, buf, len, SYSLOG_FROM_CALL);
414} 415}
415 416
417#ifdef CONFIG_KGDB_KDB
418/* kdb dmesg command needs access to the syslog buffer. do_syslog()
419 * uses locks so it cannot be used during debugging. Just tell kdb
420 * where the start and end of the physical and logical logs are. This
421 * is equivalent to do_syslog(3).
422 */
423void kdb_syslog_data(char *syslog_data[4])
424{
425 syslog_data[0] = log_buf;
426 syslog_data[1] = log_buf + log_buf_len;
427 syslog_data[2] = log_buf + log_end -
428 (logged_chars < log_buf_len ? logged_chars : log_buf_len);
429 syslog_data[3] = log_buf + log_end;
430}
431#endif /* CONFIG_KGDB_KDB */
432
416/* 433/*
417 * Call the console drivers on a range of log_buf 434 * Call the console drivers on a range of log_buf
418 */ 435 */
@@ -586,6 +603,14 @@ asmlinkage int printk(const char *fmt, ...)
586 va_list args; 603 va_list args;
587 int r; 604 int r;
588 605
606#ifdef CONFIG_KGDB_KDB
607 if (unlikely(kdb_trap_printk)) {
608 va_start(args, fmt);
609 r = vkdb_printf(fmt, args);
610 va_end(args);
611 return r;
612 }
613#endif
589 va_start(args, fmt); 614 va_start(args, fmt);
590 r = vprintk(fmt, args); 615 r = vprintk(fmt, args);
591 va_end(args); 616 va_end(args);
diff --git a/kernel/relay.c b/kernel/relay.c
index 3d97f2821611..4268287148c1 100644
--- a/kernel/relay.c
+++ b/kernel/relay.c
@@ -1231,8 +1231,8 @@ static ssize_t subbuf_splice_actor(struct file *in,
1231 size_t read_subbuf = read_start / subbuf_size; 1231 size_t read_subbuf = read_start / subbuf_size;
1232 size_t padding = rbuf->padding[read_subbuf]; 1232 size_t padding = rbuf->padding[read_subbuf];
1233 size_t nonpad_end = read_subbuf * subbuf_size + subbuf_size - padding; 1233 size_t nonpad_end = read_subbuf * subbuf_size + subbuf_size - padding;
1234 struct page *pages[PIPE_BUFFERS]; 1234 struct page *pages[PIPE_DEF_BUFFERS];
1235 struct partial_page partial[PIPE_BUFFERS]; 1235 struct partial_page partial[PIPE_DEF_BUFFERS];
1236 struct splice_pipe_desc spd = { 1236 struct splice_pipe_desc spd = {
1237 .pages = pages, 1237 .pages = pages,
1238 .nr_pages = 0, 1238 .nr_pages = 0,
@@ -1245,6 +1245,8 @@ static ssize_t subbuf_splice_actor(struct file *in,
1245 1245
1246 if (rbuf->subbufs_produced == rbuf->subbufs_consumed) 1246 if (rbuf->subbufs_produced == rbuf->subbufs_consumed)
1247 return 0; 1247 return 0;
1248 if (splice_grow_spd(pipe, &spd))
1249 return -ENOMEM;
1248 1250
1249 /* 1251 /*
1250 * Adjust read len, if longer than what is available 1252 * Adjust read len, if longer than what is available
@@ -1255,7 +1257,7 @@ static ssize_t subbuf_splice_actor(struct file *in,
1255 subbuf_pages = rbuf->chan->alloc_size >> PAGE_SHIFT; 1257 subbuf_pages = rbuf->chan->alloc_size >> PAGE_SHIFT;
1256 pidx = (read_start / PAGE_SIZE) % subbuf_pages; 1258 pidx = (read_start / PAGE_SIZE) % subbuf_pages;
1257 poff = read_start & ~PAGE_MASK; 1259 poff = read_start & ~PAGE_MASK;
1258 nr_pages = min_t(unsigned int, subbuf_pages, PIPE_BUFFERS); 1260 nr_pages = min_t(unsigned int, subbuf_pages, pipe->buffers);
1259 1261
1260 for (total_len = 0; spd.nr_pages < nr_pages; spd.nr_pages++) { 1262 for (total_len = 0; spd.nr_pages < nr_pages; spd.nr_pages++) {
1261 unsigned int this_len, this_end, private; 1263 unsigned int this_len, this_end, private;
@@ -1289,16 +1291,19 @@ static ssize_t subbuf_splice_actor(struct file *in,
1289 } 1291 }
1290 } 1292 }
1291 1293
1294 ret = 0;
1292 if (!spd.nr_pages) 1295 if (!spd.nr_pages)
1293 return 0; 1296 goto out;
1294 1297
1295 ret = *nonpad_ret = splice_to_pipe(pipe, &spd); 1298 ret = *nonpad_ret = splice_to_pipe(pipe, &spd);
1296 if (ret < 0 || ret < total_len) 1299 if (ret < 0 || ret < total_len)
1297 return ret; 1300 goto out;
1298 1301
1299 if (read_start + ret == nonpad_end) 1302 if (read_start + ret == nonpad_end)
1300 ret += padding; 1303 ret += padding;
1301 1304
1305out:
1306 splice_shrink_spd(pipe, &spd);
1302 return ret; 1307 return ret;
1303} 1308}
1304 1309
diff --git a/kernel/sched.c b/kernel/sched.c
index 1d93cd0ae4d3..054a6012de99 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -3851,6 +3851,7 @@ void __wake_up_locked(wait_queue_head_t *q, unsigned int mode)
3851{ 3851{
3852 __wake_up_common(q, mode, 1, 0, NULL); 3852 __wake_up_common(q, mode, 1, 0, NULL);
3853} 3853}
3854EXPORT_SYMBOL_GPL(__wake_up_locked);
3854 3855
3855void __wake_up_locked_key(wait_queue_head_t *q, unsigned int mode, void *key) 3856void __wake_up_locked_key(wait_queue_head_t *q, unsigned int mode, void *key)
3856{ 3857{
@@ -7758,9 +7759,9 @@ void normalize_rt_tasks(void)
7758 7759
7759#endif /* CONFIG_MAGIC_SYSRQ */ 7760#endif /* CONFIG_MAGIC_SYSRQ */
7760 7761
7761#ifdef CONFIG_IA64 7762#if defined(CONFIG_IA64) || defined(CONFIG_KGDB_KDB)
7762/* 7763/*
7763 * These functions are only useful for the IA64 MCA handling. 7764 * These functions are only useful for the IA64 MCA handling, or kdb.
7764 * 7765 *
7765 * They can only be called when the whole system has been 7766 * They can only be called when the whole system has been
7766 * stopped - every CPU needs to be quiescent, and no scheduling 7767 * stopped - every CPU needs to be quiescent, and no scheduling
@@ -7780,6 +7781,9 @@ struct task_struct *curr_task(int cpu)
7780 return cpu_curr(cpu); 7781 return cpu_curr(cpu);
7781} 7782}
7782 7783
7784#endif /* defined(CONFIG_IA64) || defined(CONFIG_KGDB_KDB) */
7785
7786#ifdef CONFIG_IA64
7783/** 7787/**
7784 * set_curr_task - set the current task for a given cpu. 7788 * set_curr_task - set the current task for a given cpu.
7785 * @cpu: the processor in question. 7789 * @cpu: the processor in question.
diff --git a/kernel/sched_clock.c b/kernel/sched_clock.c
index 5b496132c28a..906a0f718cb3 100644
--- a/kernel/sched_clock.c
+++ b/kernel/sched_clock.c
@@ -41,6 +41,7 @@ unsigned long long __attribute__((weak)) sched_clock(void)
41 return (unsigned long long)(jiffies - INITIAL_JIFFIES) 41 return (unsigned long long)(jiffies - INITIAL_JIFFIES)
42 * (NSEC_PER_SEC / HZ); 42 * (NSEC_PER_SEC / HZ);
43} 43}
44EXPORT_SYMBOL_GPL(sched_clock);
44 45
45static __read_mostly int sched_clock_running; 46static __read_mostly int sched_clock_running;
46 47
diff --git a/kernel/signal.c b/kernel/signal.c
index dbd7fe073c55..825a3f24ad76 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -2735,3 +2735,43 @@ void __init signals_init(void)
2735{ 2735{
2736 sigqueue_cachep = KMEM_CACHE(sigqueue, SLAB_PANIC); 2736 sigqueue_cachep = KMEM_CACHE(sigqueue, SLAB_PANIC);
2737} 2737}
2738
2739#ifdef CONFIG_KGDB_KDB
2740#include <linux/kdb.h>
2741/*
2742 * kdb_send_sig_info - Allows kdb to send signals without exposing
2743 * signal internals. This function checks if the required locks are
2744 * available before calling the main signal code, to avoid kdb
2745 * deadlocks.
2746 */
2747void
2748kdb_send_sig_info(struct task_struct *t, struct siginfo *info)
2749{
2750 static struct task_struct *kdb_prev_t;
2751 int sig, new_t;
2752 if (!spin_trylock(&t->sighand->siglock)) {
2753 kdb_printf("Can't do kill command now.\n"
2754 "The sigmask lock is held somewhere else in "
2755 "kernel, try again later\n");
2756 return;
2757 }
2758 spin_unlock(&t->sighand->siglock);
2759 new_t = kdb_prev_t != t;
2760 kdb_prev_t = t;
2761 if (t->state != TASK_RUNNING && new_t) {
2762 kdb_printf("Process is not RUNNING, sending a signal from "
2763 "kdb risks deadlock\n"
2764 "on the run queue locks. "
2765 "The signal has _not_ been sent.\n"
2766 "Reissue the kill command if you want to risk "
2767 "the deadlock.\n");
2768 return;
2769 }
2770 sig = info->si_signo;
2771 if (send_sig_info(sig, info, t))
2772 kdb_printf("Fail to deliver Signal %d to process %d.\n",
2773 sig, t->pid);
2774 else
2775 kdb_printf("Signal %d is sent to process %d.\n", sig, t->pid);
2776}
2777#endif /* CONFIG_KGDB_KDB */
diff --git a/kernel/sys.c b/kernel/sys.c
index 7cb426a58965..0d36d889c74d 100644
--- a/kernel/sys.c
+++ b/kernel/sys.c
@@ -492,10 +492,6 @@ SYSCALL_DEFINE2(setregid, gid_t, rgid, gid_t, egid)
492 return -ENOMEM; 492 return -ENOMEM;
493 old = current_cred(); 493 old = current_cred();
494 494
495 retval = security_task_setgid(rgid, egid, (gid_t)-1, LSM_SETID_RE);
496 if (retval)
497 goto error;
498
499 retval = -EPERM; 495 retval = -EPERM;
500 if (rgid != (gid_t) -1) { 496 if (rgid != (gid_t) -1) {
501 if (old->gid == rgid || 497 if (old->gid == rgid ||
@@ -543,10 +539,6 @@ SYSCALL_DEFINE1(setgid, gid_t, gid)
543 return -ENOMEM; 539 return -ENOMEM;
544 old = current_cred(); 540 old = current_cred();
545 541
546 retval = security_task_setgid(gid, (gid_t)-1, (gid_t)-1, LSM_SETID_ID);
547 if (retval)
548 goto error;
549
550 retval = -EPERM; 542 retval = -EPERM;
551 if (capable(CAP_SETGID)) 543 if (capable(CAP_SETGID))
552 new->gid = new->egid = new->sgid = new->fsgid = gid; 544 new->gid = new->egid = new->sgid = new->fsgid = gid;
@@ -610,10 +602,6 @@ SYSCALL_DEFINE2(setreuid, uid_t, ruid, uid_t, euid)
610 return -ENOMEM; 602 return -ENOMEM;
611 old = current_cred(); 603 old = current_cred();
612 604
613 retval = security_task_setuid(ruid, euid, (uid_t)-1, LSM_SETID_RE);
614 if (retval)
615 goto error;
616
617 retval = -EPERM; 605 retval = -EPERM;
618 if (ruid != (uid_t) -1) { 606 if (ruid != (uid_t) -1) {
619 new->uid = ruid; 607 new->uid = ruid;
@@ -675,10 +663,6 @@ SYSCALL_DEFINE1(setuid, uid_t, uid)
675 return -ENOMEM; 663 return -ENOMEM;
676 old = current_cred(); 664 old = current_cred();
677 665
678 retval = security_task_setuid(uid, (uid_t)-1, (uid_t)-1, LSM_SETID_ID);
679 if (retval)
680 goto error;
681
682 retval = -EPERM; 666 retval = -EPERM;
683 if (capable(CAP_SETUID)) { 667 if (capable(CAP_SETUID)) {
684 new->suid = new->uid = uid; 668 new->suid = new->uid = uid;
@@ -719,9 +703,6 @@ SYSCALL_DEFINE3(setresuid, uid_t, ruid, uid_t, euid, uid_t, suid)
719 if (!new) 703 if (!new)
720 return -ENOMEM; 704 return -ENOMEM;
721 705
722 retval = security_task_setuid(ruid, euid, suid, LSM_SETID_RES);
723 if (retval)
724 goto error;
725 old = current_cred(); 706 old = current_cred();
726 707
727 retval = -EPERM; 708 retval = -EPERM;
@@ -788,10 +769,6 @@ SYSCALL_DEFINE3(setresgid, gid_t, rgid, gid_t, egid, gid_t, sgid)
788 return -ENOMEM; 769 return -ENOMEM;
789 old = current_cred(); 770 old = current_cred();
790 771
791 retval = security_task_setgid(rgid, egid, sgid, LSM_SETID_RES);
792 if (retval)
793 goto error;
794
795 retval = -EPERM; 772 retval = -EPERM;
796 if (!capable(CAP_SETGID)) { 773 if (!capable(CAP_SETGID)) {
797 if (rgid != (gid_t) -1 && rgid != old->gid && 774 if (rgid != (gid_t) -1 && rgid != old->gid &&
@@ -851,9 +828,6 @@ SYSCALL_DEFINE1(setfsuid, uid_t, uid)
851 old = current_cred(); 828 old = current_cred();
852 old_fsuid = old->fsuid; 829 old_fsuid = old->fsuid;
853 830
854 if (security_task_setuid(uid, (uid_t)-1, (uid_t)-1, LSM_SETID_FS) < 0)
855 goto error;
856
857 if (uid == old->uid || uid == old->euid || 831 if (uid == old->uid || uid == old->euid ||
858 uid == old->suid || uid == old->fsuid || 832 uid == old->suid || uid == old->fsuid ||
859 capable(CAP_SETUID)) { 833 capable(CAP_SETUID)) {
@@ -864,7 +838,6 @@ SYSCALL_DEFINE1(setfsuid, uid_t, uid)
864 } 838 }
865 } 839 }
866 840
867error:
868 abort_creds(new); 841 abort_creds(new);
869 return old_fsuid; 842 return old_fsuid;
870 843
@@ -888,9 +861,6 @@ SYSCALL_DEFINE1(setfsgid, gid_t, gid)
888 old = current_cred(); 861 old = current_cred();
889 old_fsgid = old->fsgid; 862 old_fsgid = old->fsgid;
890 863
891 if (security_task_setgid(gid, (gid_t)-1, (gid_t)-1, LSM_SETID_FS))
892 goto error;
893
894 if (gid == old->gid || gid == old->egid || 864 if (gid == old->gid || gid == old->egid ||
895 gid == old->sgid || gid == old->fsgid || 865 gid == old->sgid || gid == old->fsgid ||
896 capable(CAP_SETGID)) { 866 capable(CAP_SETGID)) {
@@ -900,7 +870,6 @@ SYSCALL_DEFINE1(setfsgid, gid_t, gid)
900 } 870 }
901 } 871 }
902 872
903error:
904 abort_creds(new); 873 abort_creds(new);
905 return old_fsgid; 874 return old_fsgid;
906 875
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 8686b0f5fc12..4c93486b45d1 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -52,6 +52,7 @@
52#include <linux/slow-work.h> 52#include <linux/slow-work.h>
53#include <linux/perf_event.h> 53#include <linux/perf_event.h>
54#include <linux/kprobes.h> 54#include <linux/kprobes.h>
55#include <linux/pipe_fs_i.h>
55 56
56#include <asm/uaccess.h> 57#include <asm/uaccess.h>
57#include <asm/processor.h> 58#include <asm/processor.h>
@@ -163,6 +164,27 @@ static int proc_taint(struct ctl_table *table, int write,
163 void __user *buffer, size_t *lenp, loff_t *ppos); 164 void __user *buffer, size_t *lenp, loff_t *ppos);
164#endif 165#endif
165 166
167#ifdef CONFIG_MAGIC_SYSRQ
168static int __sysrq_enabled; /* Note: sysrq code ises it's own private copy */
169
170static int sysrq_sysctl_handler(ctl_table *table, int write,
171 void __user *buffer, size_t *lenp,
172 loff_t *ppos)
173{
174 int error;
175
176 error = proc_dointvec(table, write, buffer, lenp, ppos);
177 if (error)
178 return error;
179
180 if (write)
181 sysrq_toggle_support(__sysrq_enabled);
182
183 return 0;
184}
185
186#endif
187
166static struct ctl_table root_table[]; 188static struct ctl_table root_table[];
167static struct ctl_table_root sysctl_table_root; 189static struct ctl_table_root sysctl_table_root;
168static struct ctl_table_header root_table_header = { 190static struct ctl_table_header root_table_header = {
@@ -567,7 +589,7 @@ static struct ctl_table kern_table[] = {
567 .data = &__sysrq_enabled, 589 .data = &__sysrq_enabled,
568 .maxlen = sizeof (int), 590 .maxlen = sizeof (int),
569 .mode = 0644, 591 .mode = 0644,
570 .proc_handler = proc_dointvec, 592 .proc_handler = sysrq_sysctl_handler,
571 }, 593 },
572#endif 594#endif
573#ifdef CONFIG_PROC_SYSCTL 595#ifdef CONFIG_PROC_SYSCTL
@@ -621,7 +643,7 @@ static struct ctl_table kern_table[] = {
621#endif 643#endif
622 { 644 {
623 .procname = "userprocess_debug", 645 .procname = "userprocess_debug",
624 .data = &sysctl_userprocess_debug, 646 .data = &show_unhandled_signals,
625 .maxlen = sizeof(int), 647 .maxlen = sizeof(int),
626 .mode = 0644, 648 .mode = 0644,
627 .proc_handler = proc_dointvec, 649 .proc_handler = proc_dointvec,
@@ -1423,6 +1445,14 @@ static struct ctl_table fs_table[] = {
1423 .child = binfmt_misc_table, 1445 .child = binfmt_misc_table,
1424 }, 1446 },
1425#endif 1447#endif
1448 {
1449 .procname = "pipe-max-pages",
1450 .data = &pipe_max_pages,
1451 .maxlen = sizeof(int),
1452 .mode = 0644,
1453 .proc_handler = &proc_dointvec_minmax,
1454 .extra1 = &two,
1455 },
1426/* 1456/*
1427 * NOTE: do not add new entries to this table unless you have read 1457 * NOTE: do not add new entries to this table unless you have read
1428 * Documentation/sysctl/ctl_unnumbered.txt 1458 * Documentation/sysctl/ctl_unnumbered.txt
@@ -1431,7 +1461,8 @@ static struct ctl_table fs_table[] = {
1431}; 1461};
1432 1462
1433static struct ctl_table debug_table[] = { 1463static struct ctl_table debug_table[] = {
1434#if defined(CONFIG_X86) || defined(CONFIG_PPC) || defined(CONFIG_SPARC) 1464#if defined(CONFIG_X86) || defined(CONFIG_PPC) || defined(CONFIG_SPARC) || \
1465 defined(CONFIG_S390)
1435 { 1466 {
1436 .procname = "exception-trace", 1467 .procname = "exception-trace",
1437 .data = &show_unhandled_signals, 1468 .data = &show_unhandled_signals,
@@ -2040,8 +2071,132 @@ int proc_dostring(struct ctl_table *table, int write,
2040 buffer, lenp, ppos); 2071 buffer, lenp, ppos);
2041} 2072}
2042 2073
2074static size_t proc_skip_spaces(char **buf)
2075{
2076 size_t ret;
2077 char *tmp = skip_spaces(*buf);
2078 ret = tmp - *buf;
2079 *buf = tmp;
2080 return ret;
2081}
2082
2083static void proc_skip_char(char **buf, size_t *size, const char v)
2084{
2085 while (*size) {
2086 if (**buf != v)
2087 break;
2088 (*size)--;
2089 (*buf)++;
2090 }
2091}
2092
2093#define TMPBUFLEN 22
2094/**
2095 * proc_get_long - reads an ASCII formatted integer from a user buffer
2096 *
2097 * @buf: a kernel buffer
2098 * @size: size of the kernel buffer
2099 * @val: this is where the number will be stored
2100 * @neg: set to %TRUE if number is negative
2101 * @perm_tr: a vector which contains the allowed trailers
2102 * @perm_tr_len: size of the perm_tr vector
2103 * @tr: pointer to store the trailer character
2104 *
2105 * In case of success %0 is returned and @buf and @size are updated with
2106 * the amount of bytes read. If @tr is non-NULL and a trailing
2107 * character exists (size is non-zero after returning from this
2108 * function), @tr is updated with the trailing character.
2109 */
2110static int proc_get_long(char **buf, size_t *size,
2111 unsigned long *val, bool *neg,
2112 const char *perm_tr, unsigned perm_tr_len, char *tr)
2113{
2114 int len;
2115 char *p, tmp[TMPBUFLEN];
2116
2117 if (!*size)
2118 return -EINVAL;
2119
2120 len = *size;
2121 if (len > TMPBUFLEN - 1)
2122 len = TMPBUFLEN - 1;
2123
2124 memcpy(tmp, *buf, len);
2125
2126 tmp[len] = 0;
2127 p = tmp;
2128 if (*p == '-' && *size > 1) {
2129 *neg = true;
2130 p++;
2131 } else
2132 *neg = false;
2133 if (!isdigit(*p))
2134 return -EINVAL;
2135
2136 *val = simple_strtoul(p, &p, 0);
2137
2138 len = p - tmp;
2139
2140 /* We don't know if the next char is whitespace thus we may accept
2141 * invalid integers (e.g. 1234...a) or two integers instead of one
2142 * (e.g. 123...1). So lets not allow such large numbers. */
2143 if (len == TMPBUFLEN - 1)
2144 return -EINVAL;
2145
2146 if (len < *size && perm_tr_len && !memchr(perm_tr, *p, perm_tr_len))
2147 return -EINVAL;
2148
2149 if (tr && (len < *size))
2150 *tr = *p;
2151
2152 *buf += len;
2153 *size -= len;
2154
2155 return 0;
2156}
2157
2158/**
2159 * proc_put_long - converts an integer to a decimal ASCII formatted string
2160 *
2161 * @buf: the user buffer
2162 * @size: the size of the user buffer
2163 * @val: the integer to be converted
2164 * @neg: sign of the number, %TRUE for negative
2165 *
2166 * In case of success %0 is returned and @buf and @size are updated with
2167 * the amount of bytes written.
2168 */
2169static int proc_put_long(void __user **buf, size_t *size, unsigned long val,
2170 bool neg)
2171{
2172 int len;
2173 char tmp[TMPBUFLEN], *p = tmp;
2174
2175 sprintf(p, "%s%lu", neg ? "-" : "", val);
2176 len = strlen(tmp);
2177 if (len > *size)
2178 len = *size;
2179 if (copy_to_user(*buf, tmp, len))
2180 return -EFAULT;
2181 *size -= len;
2182 *buf += len;
2183 return 0;
2184}
2185#undef TMPBUFLEN
2186
2187static int proc_put_char(void __user **buf, size_t *size, char c)
2188{
2189 if (*size) {
2190 char __user **buffer = (char __user **)buf;
2191 if (put_user(c, *buffer))
2192 return -EFAULT;
2193 (*size)--, (*buffer)++;
2194 *buf = *buffer;
2195 }
2196 return 0;
2197}
2043 2198
2044static int do_proc_dointvec_conv(int *negp, unsigned long *lvalp, 2199static int do_proc_dointvec_conv(bool *negp, unsigned long *lvalp,
2045 int *valp, 2200 int *valp,
2046 int write, void *data) 2201 int write, void *data)
2047{ 2202{
@@ -2050,33 +2205,31 @@ static int do_proc_dointvec_conv(int *negp, unsigned long *lvalp,
2050 } else { 2205 } else {
2051 int val = *valp; 2206 int val = *valp;
2052 if (val < 0) { 2207 if (val < 0) {
2053 *negp = -1; 2208 *negp = true;
2054 *lvalp = (unsigned long)-val; 2209 *lvalp = (unsigned long)-val;
2055 } else { 2210 } else {
2056 *negp = 0; 2211 *negp = false;
2057 *lvalp = (unsigned long)val; 2212 *lvalp = (unsigned long)val;
2058 } 2213 }
2059 } 2214 }
2060 return 0; 2215 return 0;
2061} 2216}
2062 2217
2218static const char proc_wspace_sep[] = { ' ', '\t', '\n' };
2219
2063static int __do_proc_dointvec(void *tbl_data, struct ctl_table *table, 2220static int __do_proc_dointvec(void *tbl_data, struct ctl_table *table,
2064 int write, void __user *buffer, 2221 int write, void __user *buffer,
2065 size_t *lenp, loff_t *ppos, 2222 size_t *lenp, loff_t *ppos,
2066 int (*conv)(int *negp, unsigned long *lvalp, int *valp, 2223 int (*conv)(bool *negp, unsigned long *lvalp, int *valp,
2067 int write, void *data), 2224 int write, void *data),
2068 void *data) 2225 void *data)
2069{ 2226{
2070#define TMPBUFLEN 21 2227 int *i, vleft, first = 1, err = 0;
2071 int *i, vleft, first = 1, neg; 2228 unsigned long page = 0;
2072 unsigned long lval; 2229 size_t left;
2073 size_t left, len; 2230 char *kbuf;
2074 2231
2075 char buf[TMPBUFLEN], *p; 2232 if (!tbl_data || !table->maxlen || !*lenp || (*ppos && !write)) {
2076 char __user *s = buffer;
2077
2078 if (!tbl_data || !table->maxlen || !*lenp ||
2079 (*ppos && !write)) {
2080 *lenp = 0; 2233 *lenp = 0;
2081 return 0; 2234 return 0;
2082 } 2235 }
@@ -2088,89 +2241,69 @@ static int __do_proc_dointvec(void *tbl_data, struct ctl_table *table,
2088 if (!conv) 2241 if (!conv)
2089 conv = do_proc_dointvec_conv; 2242 conv = do_proc_dointvec_conv;
2090 2243
2244 if (write) {
2245 if (left > PAGE_SIZE - 1)
2246 left = PAGE_SIZE - 1;
2247 page = __get_free_page(GFP_TEMPORARY);
2248 kbuf = (char *) page;
2249 if (!kbuf)
2250 return -ENOMEM;
2251 if (copy_from_user(kbuf, buffer, left)) {
2252 err = -EFAULT;
2253 goto free;
2254 }
2255 kbuf[left] = 0;
2256 }
2257
2091 for (; left && vleft--; i++, first=0) { 2258 for (; left && vleft--; i++, first=0) {
2092 if (write) { 2259 unsigned long lval;
2093 while (left) { 2260 bool neg;
2094 char c;
2095 if (get_user(c, s))
2096 return -EFAULT;
2097 if (!isspace(c))
2098 break;
2099 left--;
2100 s++;
2101 }
2102 if (!left)
2103 break;
2104 neg = 0;
2105 len = left;
2106 if (len > sizeof(buf) - 1)
2107 len = sizeof(buf) - 1;
2108 if (copy_from_user(buf, s, len))
2109 return -EFAULT;
2110 buf[len] = 0;
2111 p = buf;
2112 if (*p == '-' && left > 1) {
2113 neg = 1;
2114 p++;
2115 }
2116 if (*p < '0' || *p > '9')
2117 break;
2118 2261
2119 lval = simple_strtoul(p, &p, 0); 2262 if (write) {
2263 left -= proc_skip_spaces(&kbuf);
2120 2264
2121 len = p-buf; 2265 err = proc_get_long(&kbuf, &left, &lval, &neg,
2122 if ((len < left) && *p && !isspace(*p)) 2266 proc_wspace_sep,
2267 sizeof(proc_wspace_sep), NULL);
2268 if (err)
2123 break; 2269 break;
2124 s += len; 2270 if (conv(&neg, &lval, i, 1, data)) {
2125 left -= len; 2271 err = -EINVAL;
2126
2127 if (conv(&neg, &lval, i, 1, data))
2128 break; 2272 break;
2273 }
2129 } else { 2274 } else {
2130 p = buf; 2275 if (conv(&neg, &lval, i, 0, data)) {
2276 err = -EINVAL;
2277 break;
2278 }
2131 if (!first) 2279 if (!first)
2132 *p++ = '\t'; 2280 err = proc_put_char(&buffer, &left, '\t');
2133 2281 if (err)
2134 if (conv(&neg, &lval, i, 0, data)) 2282 break;
2283 err = proc_put_long(&buffer, &left, lval, neg);
2284 if (err)
2135 break; 2285 break;
2136
2137 sprintf(p, "%s%lu", neg ? "-" : "", lval);
2138 len = strlen(buf);
2139 if (len > left)
2140 len = left;
2141 if(copy_to_user(s, buf, len))
2142 return -EFAULT;
2143 left -= len;
2144 s += len;
2145 } 2286 }
2146 } 2287 }
2147 2288
2148 if (!write && !first && left) { 2289 if (!write && !first && left && !err)
2149 if(put_user('\n', s)) 2290 err = proc_put_char(&buffer, &left, '\n');
2150 return -EFAULT; 2291 if (write && !err)
2151 left--, s++; 2292 left -= proc_skip_spaces(&kbuf);
2152 } 2293free:
2153 if (write) { 2294 if (write) {
2154 while (left) { 2295 free_page(page);
2155 char c; 2296 if (first)
2156 if (get_user(c, s++)) 2297 return err ? : -EINVAL;
2157 return -EFAULT;
2158 if (!isspace(c))
2159 break;
2160 left--;
2161 }
2162 } 2298 }
2163 if (write && first)
2164 return -EINVAL;
2165 *lenp -= left; 2299 *lenp -= left;
2166 *ppos += *lenp; 2300 *ppos += *lenp;
2167 return 0; 2301 return err;
2168#undef TMPBUFLEN
2169} 2302}
2170 2303
2171static int do_proc_dointvec(struct ctl_table *table, int write, 2304static int do_proc_dointvec(struct ctl_table *table, int write,
2172 void __user *buffer, size_t *lenp, loff_t *ppos, 2305 void __user *buffer, size_t *lenp, loff_t *ppos,
2173 int (*conv)(int *negp, unsigned long *lvalp, int *valp, 2306 int (*conv)(bool *negp, unsigned long *lvalp, int *valp,
2174 int write, void *data), 2307 int write, void *data),
2175 void *data) 2308 void *data)
2176{ 2309{
@@ -2238,8 +2371,8 @@ struct do_proc_dointvec_minmax_conv_param {
2238 int *max; 2371 int *max;
2239}; 2372};
2240 2373
2241static int do_proc_dointvec_minmax_conv(int *negp, unsigned long *lvalp, 2374static int do_proc_dointvec_minmax_conv(bool *negp, unsigned long *lvalp,
2242 int *valp, 2375 int *valp,
2243 int write, void *data) 2376 int write, void *data)
2244{ 2377{
2245 struct do_proc_dointvec_minmax_conv_param *param = data; 2378 struct do_proc_dointvec_minmax_conv_param *param = data;
@@ -2252,10 +2385,10 @@ static int do_proc_dointvec_minmax_conv(int *negp, unsigned long *lvalp,
2252 } else { 2385 } else {
2253 int val = *valp; 2386 int val = *valp;
2254 if (val < 0) { 2387 if (val < 0) {
2255 *negp = -1; 2388 *negp = true;
2256 *lvalp = (unsigned long)-val; 2389 *lvalp = (unsigned long)-val;
2257 } else { 2390 } else {
2258 *negp = 0; 2391 *negp = false;
2259 *lvalp = (unsigned long)val; 2392 *lvalp = (unsigned long)val;
2260 } 2393 }
2261 } 2394 }
@@ -2295,102 +2428,78 @@ static int __do_proc_doulongvec_minmax(void *data, struct ctl_table *table, int
2295 unsigned long convmul, 2428 unsigned long convmul,
2296 unsigned long convdiv) 2429 unsigned long convdiv)
2297{ 2430{
2298#define TMPBUFLEN 21 2431 unsigned long *i, *min, *max;
2299 unsigned long *i, *min, *max, val; 2432 int vleft, first = 1, err = 0;
2300 int vleft, first=1, neg; 2433 unsigned long page = 0;
2301 size_t len, left; 2434 size_t left;
2302 char buf[TMPBUFLEN], *p; 2435 char *kbuf;
2303 char __user *s = buffer; 2436
2304 2437 if (!data || !table->maxlen || !*lenp || (*ppos && !write)) {
2305 if (!data || !table->maxlen || !*lenp ||
2306 (*ppos && !write)) {
2307 *lenp = 0; 2438 *lenp = 0;
2308 return 0; 2439 return 0;
2309 } 2440 }
2310 2441
2311 i = (unsigned long *) data; 2442 i = (unsigned long *) data;
2312 min = (unsigned long *) table->extra1; 2443 min = (unsigned long *) table->extra1;
2313 max = (unsigned long *) table->extra2; 2444 max = (unsigned long *) table->extra2;
2314 vleft = table->maxlen / sizeof(unsigned long); 2445 vleft = table->maxlen / sizeof(unsigned long);
2315 left = *lenp; 2446 left = *lenp;
2316 2447
2448 if (write) {
2449 if (left > PAGE_SIZE - 1)
2450 left = PAGE_SIZE - 1;
2451 page = __get_free_page(GFP_TEMPORARY);
2452 kbuf = (char *) page;
2453 if (!kbuf)
2454 return -ENOMEM;
2455 if (copy_from_user(kbuf, buffer, left)) {
2456 err = -EFAULT;
2457 goto free;
2458 }
2459 kbuf[left] = 0;
2460 }
2461
2317 for (; left && vleft--; i++, min++, max++, first=0) { 2462 for (; left && vleft--; i++, min++, max++, first=0) {
2463 unsigned long val;
2464
2318 if (write) { 2465 if (write) {
2319 while (left) { 2466 bool neg;
2320 char c; 2467
2321 if (get_user(c, s)) 2468 left -= proc_skip_spaces(&kbuf);
2322 return -EFAULT; 2469
2323 if (!isspace(c)) 2470 err = proc_get_long(&kbuf, &left, &val, &neg,
2324 break; 2471 proc_wspace_sep,
2325 left--; 2472 sizeof(proc_wspace_sep), NULL);
2326 s++; 2473 if (err)
2327 }
2328 if (!left)
2329 break;
2330 neg = 0;
2331 len = left;
2332 if (len > TMPBUFLEN-1)
2333 len = TMPBUFLEN-1;
2334 if (copy_from_user(buf, s, len))
2335 return -EFAULT;
2336 buf[len] = 0;
2337 p = buf;
2338 if (*p == '-' && left > 1) {
2339 neg = 1;
2340 p++;
2341 }
2342 if (*p < '0' || *p > '9')
2343 break;
2344 val = simple_strtoul(p, &p, 0) * convmul / convdiv ;
2345 len = p-buf;
2346 if ((len < left) && *p && !isspace(*p))
2347 break; 2474 break;
2348 if (neg) 2475 if (neg)
2349 val = -val;
2350 s += len;
2351 left -= len;
2352
2353 if(neg)
2354 continue; 2476 continue;
2355 if ((min && val < *min) || (max && val > *max)) 2477 if ((min && val < *min) || (max && val > *max))
2356 continue; 2478 continue;
2357 *i = val; 2479 *i = val;
2358 } else { 2480 } else {
2359 p = buf; 2481 val = convdiv * (*i) / convmul;
2360 if (!first) 2482 if (!first)
2361 *p++ = '\t'; 2483 err = proc_put_char(&buffer, &left, '\t');
2362 sprintf(p, "%lu", convdiv * (*i) / convmul); 2484 err = proc_put_long(&buffer, &left, val, false);
2363 len = strlen(buf); 2485 if (err)
2364 if (len > left) 2486 break;
2365 len = left;
2366 if(copy_to_user(s, buf, len))
2367 return -EFAULT;
2368 left -= len;
2369 s += len;
2370 } 2487 }
2371 } 2488 }
2372 2489
2373 if (!write && !first && left) { 2490 if (!write && !first && left && !err)
2374 if(put_user('\n', s)) 2491 err = proc_put_char(&buffer, &left, '\n');
2375 return -EFAULT; 2492 if (write && !err)
2376 left--, s++; 2493 left -= proc_skip_spaces(&kbuf);
2377 } 2494free:
2378 if (write) { 2495 if (write) {
2379 while (left) { 2496 free_page(page);
2380 char c; 2497 if (first)
2381 if (get_user(c, s++)) 2498 return err ? : -EINVAL;
2382 return -EFAULT;
2383 if (!isspace(c))
2384 break;
2385 left--;
2386 }
2387 } 2499 }
2388 if (write && first)
2389 return -EINVAL;
2390 *lenp -= left; 2500 *lenp -= left;
2391 *ppos += *lenp; 2501 *ppos += *lenp;
2392 return 0; 2502 return err;
2393#undef TMPBUFLEN
2394} 2503}
2395 2504
2396static int do_proc_doulongvec_minmax(struct ctl_table *table, int write, 2505static int do_proc_doulongvec_minmax(struct ctl_table *table, int write,
@@ -2451,7 +2560,7 @@ int proc_doulongvec_ms_jiffies_minmax(struct ctl_table *table, int write,
2451} 2560}
2452 2561
2453 2562
2454static int do_proc_dointvec_jiffies_conv(int *negp, unsigned long *lvalp, 2563static int do_proc_dointvec_jiffies_conv(bool *negp, unsigned long *lvalp,
2455 int *valp, 2564 int *valp,
2456 int write, void *data) 2565 int write, void *data)
2457{ 2566{
@@ -2463,10 +2572,10 @@ static int do_proc_dointvec_jiffies_conv(int *negp, unsigned long *lvalp,
2463 int val = *valp; 2572 int val = *valp;
2464 unsigned long lval; 2573 unsigned long lval;
2465 if (val < 0) { 2574 if (val < 0) {
2466 *negp = -1; 2575 *negp = true;
2467 lval = (unsigned long)-val; 2576 lval = (unsigned long)-val;
2468 } else { 2577 } else {
2469 *negp = 0; 2578 *negp = false;
2470 lval = (unsigned long)val; 2579 lval = (unsigned long)val;
2471 } 2580 }
2472 *lvalp = lval / HZ; 2581 *lvalp = lval / HZ;
@@ -2474,7 +2583,7 @@ static int do_proc_dointvec_jiffies_conv(int *negp, unsigned long *lvalp,
2474 return 0; 2583 return 0;
2475} 2584}
2476 2585
2477static int do_proc_dointvec_userhz_jiffies_conv(int *negp, unsigned long *lvalp, 2586static int do_proc_dointvec_userhz_jiffies_conv(bool *negp, unsigned long *lvalp,
2478 int *valp, 2587 int *valp,
2479 int write, void *data) 2588 int write, void *data)
2480{ 2589{
@@ -2486,10 +2595,10 @@ static int do_proc_dointvec_userhz_jiffies_conv(int *negp, unsigned long *lvalp,
2486 int val = *valp; 2595 int val = *valp;
2487 unsigned long lval; 2596 unsigned long lval;
2488 if (val < 0) { 2597 if (val < 0) {
2489 *negp = -1; 2598 *negp = true;
2490 lval = (unsigned long)-val; 2599 lval = (unsigned long)-val;
2491 } else { 2600 } else {
2492 *negp = 0; 2601 *negp = false;
2493 lval = (unsigned long)val; 2602 lval = (unsigned long)val;
2494 } 2603 }
2495 *lvalp = jiffies_to_clock_t(lval); 2604 *lvalp = jiffies_to_clock_t(lval);
@@ -2497,7 +2606,7 @@ static int do_proc_dointvec_userhz_jiffies_conv(int *negp, unsigned long *lvalp,
2497 return 0; 2606 return 0;
2498} 2607}
2499 2608
2500static int do_proc_dointvec_ms_jiffies_conv(int *negp, unsigned long *lvalp, 2609static int do_proc_dointvec_ms_jiffies_conv(bool *negp, unsigned long *lvalp,
2501 int *valp, 2610 int *valp,
2502 int write, void *data) 2611 int write, void *data)
2503{ 2612{
@@ -2507,10 +2616,10 @@ static int do_proc_dointvec_ms_jiffies_conv(int *negp, unsigned long *lvalp,
2507 int val = *valp; 2616 int val = *valp;
2508 unsigned long lval; 2617 unsigned long lval;
2509 if (val < 0) { 2618 if (val < 0) {
2510 *negp = -1; 2619 *negp = true;
2511 lval = (unsigned long)-val; 2620 lval = (unsigned long)-val;
2512 } else { 2621 } else {
2513 *negp = 0; 2622 *negp = false;
2514 lval = (unsigned long)val; 2623 lval = (unsigned long)val;
2515 } 2624 }
2516 *lvalp = jiffies_to_msecs(lval); 2625 *lvalp = jiffies_to_msecs(lval);
@@ -2607,6 +2716,157 @@ static int proc_do_cad_pid(struct ctl_table *table, int write,
2607 return 0; 2716 return 0;
2608} 2717}
2609 2718
2719/**
2720 * proc_do_large_bitmap - read/write from/to a large bitmap
2721 * @table: the sysctl table
2722 * @write: %TRUE if this is a write to the sysctl file
2723 * @buffer: the user buffer
2724 * @lenp: the size of the user buffer
2725 * @ppos: file position
2726 *
2727 * The bitmap is stored at table->data and the bitmap length (in bits)
2728 * in table->maxlen.
2729 *
2730 * We use a range comma separated format (e.g. 1,3-4,10-10) so that
2731 * large bitmaps may be represented in a compact manner. Writing into
2732 * the file will clear the bitmap then update it with the given input.
2733 *
2734 * Returns 0 on success.
2735 */
2736int proc_do_large_bitmap(struct ctl_table *table, int write,
2737 void __user *buffer, size_t *lenp, loff_t *ppos)
2738{
2739 int err = 0;
2740 bool first = 1;
2741 size_t left = *lenp;
2742 unsigned long bitmap_len = table->maxlen;
2743 unsigned long *bitmap = (unsigned long *) table->data;
2744 unsigned long *tmp_bitmap = NULL;
2745 char tr_a[] = { '-', ',', '\n' }, tr_b[] = { ',', '\n', 0 }, c;
2746
2747 if (!bitmap_len || !left || (*ppos && !write)) {
2748 *lenp = 0;
2749 return 0;
2750 }
2751
2752 if (write) {
2753 unsigned long page = 0;
2754 char *kbuf;
2755
2756 if (left > PAGE_SIZE - 1)
2757 left = PAGE_SIZE - 1;
2758
2759 page = __get_free_page(GFP_TEMPORARY);
2760 kbuf = (char *) page;
2761 if (!kbuf)
2762 return -ENOMEM;
2763 if (copy_from_user(kbuf, buffer, left)) {
2764 free_page(page);
2765 return -EFAULT;
2766 }
2767 kbuf[left] = 0;
2768
2769 tmp_bitmap = kzalloc(BITS_TO_LONGS(bitmap_len) * sizeof(unsigned long),
2770 GFP_KERNEL);
2771 if (!tmp_bitmap) {
2772 free_page(page);
2773 return -ENOMEM;
2774 }
2775 proc_skip_char(&kbuf, &left, '\n');
2776 while (!err && left) {
2777 unsigned long val_a, val_b;
2778 bool neg;
2779
2780 err = proc_get_long(&kbuf, &left, &val_a, &neg, tr_a,
2781 sizeof(tr_a), &c);
2782 if (err)
2783 break;
2784 if (val_a >= bitmap_len || neg) {
2785 err = -EINVAL;
2786 break;
2787 }
2788
2789 val_b = val_a;
2790 if (left) {
2791 kbuf++;
2792 left--;
2793 }
2794
2795 if (c == '-') {
2796 err = proc_get_long(&kbuf, &left, &val_b,
2797 &neg, tr_b, sizeof(tr_b),
2798 &c);
2799 if (err)
2800 break;
2801 if (val_b >= bitmap_len || neg ||
2802 val_a > val_b) {
2803 err = -EINVAL;
2804 break;
2805 }
2806 if (left) {
2807 kbuf++;
2808 left--;
2809 }
2810 }
2811
2812 while (val_a <= val_b)
2813 set_bit(val_a++, tmp_bitmap);
2814
2815 first = 0;
2816 proc_skip_char(&kbuf, &left, '\n');
2817 }
2818 free_page(page);
2819 } else {
2820 unsigned long bit_a, bit_b = 0;
2821
2822 while (left) {
2823 bit_a = find_next_bit(bitmap, bitmap_len, bit_b);
2824 if (bit_a >= bitmap_len)
2825 break;
2826 bit_b = find_next_zero_bit(bitmap, bitmap_len,
2827 bit_a + 1) - 1;
2828
2829 if (!first) {
2830 err = proc_put_char(&buffer, &left, ',');
2831 if (err)
2832 break;
2833 }
2834 err = proc_put_long(&buffer, &left, bit_a, false);
2835 if (err)
2836 break;
2837 if (bit_a != bit_b) {
2838 err = proc_put_char(&buffer, &left, '-');
2839 if (err)
2840 break;
2841 err = proc_put_long(&buffer, &left, bit_b, false);
2842 if (err)
2843 break;
2844 }
2845
2846 first = 0; bit_b++;
2847 }
2848 if (!err)
2849 err = proc_put_char(&buffer, &left, '\n');
2850 }
2851
2852 if (!err) {
2853 if (write) {
2854 if (*ppos)
2855 bitmap_or(bitmap, bitmap, tmp_bitmap, bitmap_len);
2856 else
2857 memcpy(bitmap, tmp_bitmap,
2858 BITS_TO_LONGS(bitmap_len) * sizeof(unsigned long));
2859 }
2860 kfree(tmp_bitmap);
2861 *lenp -= left;
2862 *ppos += *lenp;
2863 return 0;
2864 } else {
2865 kfree(tmp_bitmap);
2866 return err;
2867 }
2868}
2869
2610#else /* CONFIG_PROC_FS */ 2870#else /* CONFIG_PROC_FS */
2611 2871
2612int proc_dostring(struct ctl_table *table, int write, 2872int proc_dostring(struct ctl_table *table, int write,
diff --git a/kernel/sysctl_binary.c b/kernel/sysctl_binary.c
index 59030570f5ca..937d31dc8566 100644
--- a/kernel/sysctl_binary.c
+++ b/kernel/sysctl_binary.c
@@ -224,7 +224,6 @@ static const struct bin_table bin_net_ipv4_route_table[] = {
224 { CTL_INT, NET_IPV4_ROUTE_MTU_EXPIRES, "mtu_expires" }, 224 { CTL_INT, NET_IPV4_ROUTE_MTU_EXPIRES, "mtu_expires" },
225 { CTL_INT, NET_IPV4_ROUTE_MIN_PMTU, "min_pmtu" }, 225 { CTL_INT, NET_IPV4_ROUTE_MIN_PMTU, "min_pmtu" },
226 { CTL_INT, NET_IPV4_ROUTE_MIN_ADVMSS, "min_adv_mss" }, 226 { CTL_INT, NET_IPV4_ROUTE_MIN_ADVMSS, "min_adv_mss" },
227 { CTL_INT, NET_IPV4_ROUTE_SECRET_INTERVAL, "secret_interval" },
228 {} 227 {}
229}; 228};
230 229
diff --git a/kernel/time.c b/kernel/time.c
index 656dccfe1cbb..50612faa9baf 100644
--- a/kernel/time.c
+++ b/kernel/time.c
@@ -132,12 +132,11 @@ SYSCALL_DEFINE2(gettimeofday, struct timeval __user *, tv,
132 */ 132 */
133static inline void warp_clock(void) 133static inline void warp_clock(void)
134{ 134{
135 write_seqlock_irq(&xtime_lock); 135 struct timespec delta, adjust;
136 wall_to_monotonic.tv_sec -= sys_tz.tz_minuteswest * 60; 136 delta.tv_sec = sys_tz.tz_minuteswest * 60;
137 xtime.tv_sec += sys_tz.tz_minuteswest * 60; 137 delta.tv_nsec = 0;
138 update_xtime_cache(0); 138 adjust = timespec_add_safe(current_kernel_time(), delta);
139 write_sequnlock_irq(&xtime_lock); 139 do_settimeofday(&adjust);
140 clock_was_set();
141} 140}
142 141
143/* 142/*
diff --git a/kernel/time/clocksource.c b/kernel/time/clocksource.c
index 1f5dde637457..f08e99c1d561 100644
--- a/kernel/time/clocksource.c
+++ b/kernel/time/clocksource.c
@@ -625,6 +625,54 @@ static void clocksource_enqueue(struct clocksource *cs)
625 list_add(&cs->list, entry); 625 list_add(&cs->list, entry);
626} 626}
627 627
628
629/*
630 * Maximum time we expect to go between ticks. This includes idle
631 * tickless time. It provides the trade off between selecting a
632 * mult/shift pair that is very precise but can only handle a short
633 * period of time, vs. a mult/shift pair that can handle long periods
634 * of time but isn't as precise.
635 *
636 * This is a subsystem constant, and actual hardware limitations
637 * may override it (ie: clocksources that wrap every 3 seconds).
638 */
639#define MAX_UPDATE_LENGTH 5 /* Seconds */
640
641/**
642 * __clocksource_register_scale - Used to install new clocksources
643 * @t: clocksource to be registered
644 * @scale: Scale factor multiplied against freq to get clocksource hz
645 * @freq: clocksource frequency (cycles per second) divided by scale
646 *
647 * Returns -EBUSY if registration fails, zero otherwise.
648 *
649 * This *SHOULD NOT* be called directly! Please use the
650 * clocksource_register_hz() or clocksource_register_khz helper functions.
651 */
652int __clocksource_register_scale(struct clocksource *cs, u32 scale, u32 freq)
653{
654
655 /*
656 * Ideally we want to use some of the limits used in
657 * clocksource_max_deferment, to provide a more informed
658 * MAX_UPDATE_LENGTH. But for now this just gets the
659 * register interface working properly.
660 */
661 clocks_calc_mult_shift(&cs->mult, &cs->shift, freq,
662 NSEC_PER_SEC/scale,
663 MAX_UPDATE_LENGTH*scale);
664 cs->max_idle_ns = clocksource_max_deferment(cs);
665
666 mutex_lock(&clocksource_mutex);
667 clocksource_enqueue(cs);
668 clocksource_select();
669 clocksource_enqueue_watchdog(cs);
670 mutex_unlock(&clocksource_mutex);
671 return 0;
672}
673EXPORT_SYMBOL_GPL(__clocksource_register_scale);
674
675
628/** 676/**
629 * clocksource_register - Used to install new clocksources 677 * clocksource_register - Used to install new clocksources
630 * @t: clocksource to be registered 678 * @t: clocksource to be registered
diff --git a/kernel/time/ntp.c b/kernel/time/ntp.c
index 7c0f180d6e9d..c63116863a80 100644
--- a/kernel/time/ntp.c
+++ b/kernel/time/ntp.c
@@ -69,7 +69,7 @@ static s64 time_freq;
69/* time at last adjustment (secs): */ 69/* time at last adjustment (secs): */
70static long time_reftime; 70static long time_reftime;
71 71
72long time_adjust; 72static long time_adjust;
73 73
74/* constant (boot-param configurable) NTP tick adjustment (upscaled) */ 74/* constant (boot-param configurable) NTP tick adjustment (upscaled) */
75static s64 ntp_tick_adj; 75static s64 ntp_tick_adj;
diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c
index 39f6177fafac..caf8d4d4f5c8 100644
--- a/kernel/time/timekeeping.c
+++ b/kernel/time/timekeeping.c
@@ -165,13 +165,6 @@ struct timespec raw_time;
165/* flag for if timekeeping is suspended */ 165/* flag for if timekeeping is suspended */
166int __read_mostly timekeeping_suspended; 166int __read_mostly timekeeping_suspended;
167 167
168static struct timespec xtime_cache __attribute__ ((aligned (16)));
169void update_xtime_cache(u64 nsec)
170{
171 xtime_cache = xtime;
172 timespec_add_ns(&xtime_cache, nsec);
173}
174
175/* must hold xtime_lock */ 168/* must hold xtime_lock */
176void timekeeping_leap_insert(int leapsecond) 169void timekeeping_leap_insert(int leapsecond)
177{ 170{
@@ -332,8 +325,6 @@ int do_settimeofday(struct timespec *tv)
332 325
333 xtime = *tv; 326 xtime = *tv;
334 327
335 update_xtime_cache(0);
336
337 timekeeper.ntp_error = 0; 328 timekeeper.ntp_error = 0;
338 ntp_clear(); 329 ntp_clear();
339 330
@@ -559,7 +550,6 @@ void __init timekeeping_init(void)
559 } 550 }
560 set_normalized_timespec(&wall_to_monotonic, 551 set_normalized_timespec(&wall_to_monotonic,
561 -boot.tv_sec, -boot.tv_nsec); 552 -boot.tv_sec, -boot.tv_nsec);
562 update_xtime_cache(0);
563 total_sleep_time.tv_sec = 0; 553 total_sleep_time.tv_sec = 0;
564 total_sleep_time.tv_nsec = 0; 554 total_sleep_time.tv_nsec = 0;
565 write_sequnlock_irqrestore(&xtime_lock, flags); 555 write_sequnlock_irqrestore(&xtime_lock, flags);
@@ -593,7 +583,6 @@ static int timekeeping_resume(struct sys_device *dev)
593 wall_to_monotonic = timespec_sub(wall_to_monotonic, ts); 583 wall_to_monotonic = timespec_sub(wall_to_monotonic, ts);
594 total_sleep_time = timespec_add_safe(total_sleep_time, ts); 584 total_sleep_time = timespec_add_safe(total_sleep_time, ts);
595 } 585 }
596 update_xtime_cache(0);
597 /* re-base the last cycle value */ 586 /* re-base the last cycle value */
598 timekeeper.clock->cycle_last = timekeeper.clock->read(timekeeper.clock); 587 timekeeper.clock->cycle_last = timekeeper.clock->read(timekeeper.clock);
599 timekeeper.ntp_error = 0; 588 timekeeper.ntp_error = 0;
@@ -788,7 +777,6 @@ void update_wall_time(void)
788{ 777{
789 struct clocksource *clock; 778 struct clocksource *clock;
790 cycle_t offset; 779 cycle_t offset;
791 u64 nsecs;
792 int shift = 0, maxshift; 780 int shift = 0, maxshift;
793 781
794 /* Make sure we're fully resumed: */ 782 /* Make sure we're fully resumed: */
@@ -847,7 +835,9 @@ void update_wall_time(void)
847 timekeeper.ntp_error += neg << timekeeper.ntp_error_shift; 835 timekeeper.ntp_error += neg << timekeeper.ntp_error_shift;
848 } 836 }
849 837
850 /* store full nanoseconds into xtime after rounding it up and 838
839 /*
840 * Store full nanoseconds into xtime after rounding it up and
851 * add the remainder to the error difference. 841 * add the remainder to the error difference.
852 */ 842 */
853 xtime.tv_nsec = ((s64) timekeeper.xtime_nsec >> timekeeper.shift) + 1; 843 xtime.tv_nsec = ((s64) timekeeper.xtime_nsec >> timekeeper.shift) + 1;
@@ -855,8 +845,15 @@ void update_wall_time(void)
855 timekeeper.ntp_error += timekeeper.xtime_nsec << 845 timekeeper.ntp_error += timekeeper.xtime_nsec <<
856 timekeeper.ntp_error_shift; 846 timekeeper.ntp_error_shift;
857 847
858 nsecs = clocksource_cyc2ns(offset, timekeeper.mult, timekeeper.shift); 848 /*
859 update_xtime_cache(nsecs); 849 * Finally, make sure that after the rounding
850 * xtime.tv_nsec isn't larger then NSEC_PER_SEC
851 */
852 if (unlikely(xtime.tv_nsec >= NSEC_PER_SEC)) {
853 xtime.tv_nsec -= NSEC_PER_SEC;
854 xtime.tv_sec++;
855 second_overflow();
856 }
860 857
861 /* check to see if there is a new clocksource to use */ 858 /* check to see if there is a new clocksource to use */
862 update_vsyscall(&xtime, timekeeper.clock, timekeeper.mult); 859 update_vsyscall(&xtime, timekeeper.clock, timekeeper.mult);
@@ -896,13 +893,13 @@ EXPORT_SYMBOL_GPL(monotonic_to_bootbased);
896 893
897unsigned long get_seconds(void) 894unsigned long get_seconds(void)
898{ 895{
899 return xtime_cache.tv_sec; 896 return xtime.tv_sec;
900} 897}
901EXPORT_SYMBOL(get_seconds); 898EXPORT_SYMBOL(get_seconds);
902 899
903struct timespec __current_kernel_time(void) 900struct timespec __current_kernel_time(void)
904{ 901{
905 return xtime_cache; 902 return xtime;
906} 903}
907 904
908struct timespec current_kernel_time(void) 905struct timespec current_kernel_time(void)
@@ -913,7 +910,7 @@ struct timespec current_kernel_time(void)
913 do { 910 do {
914 seq = read_seqbegin(&xtime_lock); 911 seq = read_seqbegin(&xtime_lock);
915 912
916 now = xtime_cache; 913 now = xtime;
917 } while (read_seqretry(&xtime_lock, seq)); 914 } while (read_seqretry(&xtime_lock, seq));
918 915
919 return now; 916 return now;
@@ -928,7 +925,7 @@ struct timespec get_monotonic_coarse(void)
928 do { 925 do {
929 seq = read_seqbegin(&xtime_lock); 926 seq = read_seqbegin(&xtime_lock);
930 927
931 now = xtime_cache; 928 now = xtime;
932 mono = wall_to_monotonic; 929 mono = wall_to_monotonic;
933 } while (read_seqretry(&xtime_lock, seq)); 930 } while (read_seqretry(&xtime_lock, seq));
934 931
diff --git a/kernel/timer.c b/kernel/timer.c
index aeb6a54f2771..9199f3c52215 100644
--- a/kernel/timer.c
+++ b/kernel/timer.c
@@ -319,6 +319,24 @@ unsigned long round_jiffies_up_relative(unsigned long j)
319} 319}
320EXPORT_SYMBOL_GPL(round_jiffies_up_relative); 320EXPORT_SYMBOL_GPL(round_jiffies_up_relative);
321 321
322/**
323 * set_timer_slack - set the allowed slack for a timer
324 * @slack_hz: the amount of time (in jiffies) allowed for rounding
325 *
326 * Set the amount of time, in jiffies, that a certain timer has
327 * in terms of slack. By setting this value, the timer subsystem
328 * will schedule the actual timer somewhere between
329 * the time mod_timer() asks for, and that time plus the slack.
330 *
331 * By setting the slack to -1, a percentage of the delay is used
332 * instead.
333 */
334void set_timer_slack(struct timer_list *timer, int slack_hz)
335{
336 timer->slack = slack_hz;
337}
338EXPORT_SYMBOL_GPL(set_timer_slack);
339
322 340
323static inline void set_running_timer(struct tvec_base *base, 341static inline void set_running_timer(struct tvec_base *base,
324 struct timer_list *timer) 342 struct timer_list *timer)
@@ -550,6 +568,7 @@ static void __init_timer(struct timer_list *timer,
550{ 568{
551 timer->entry.next = NULL; 569 timer->entry.next = NULL;
552 timer->base = __raw_get_cpu_var(tvec_bases); 570 timer->base = __raw_get_cpu_var(tvec_bases);
571 timer->slack = -1;
553#ifdef CONFIG_TIMER_STATS 572#ifdef CONFIG_TIMER_STATS
554 timer->start_site = NULL; 573 timer->start_site = NULL;
555 timer->start_pid = -1; 574 timer->start_pid = -1;
@@ -715,6 +734,41 @@ int mod_timer_pending(struct timer_list *timer, unsigned long expires)
715} 734}
716EXPORT_SYMBOL(mod_timer_pending); 735EXPORT_SYMBOL(mod_timer_pending);
717 736
737/*
738 * Decide where to put the timer while taking the slack into account
739 *
740 * Algorithm:
741 * 1) calculate the maximum (absolute) time
742 * 2) calculate the highest bit where the expires and new max are different
743 * 3) use this bit to make a mask
744 * 4) use the bitmask to round down the maximum time, so that all last
745 * bits are zeros
746 */
747static inline
748unsigned long apply_slack(struct timer_list *timer, unsigned long expires)
749{
750 unsigned long expires_limit, mask;
751 int bit;
752
753 expires_limit = expires + timer->slack;
754
755 if (timer->slack < 0) /* auto slack: use 0.4% */
756 expires_limit = expires + (expires - jiffies)/256;
757
758 mask = expires ^ expires_limit;
759
760 if (mask == 0)
761 return expires;
762
763 bit = find_last_bit(&mask, BITS_PER_LONG);
764
765 mask = (1 << bit) - 1;
766
767 expires_limit = expires_limit & ~(mask);
768
769 return expires_limit;
770}
771
718/** 772/**
719 * mod_timer - modify a timer's timeout 773 * mod_timer - modify a timer's timeout
720 * @timer: the timer to be modified 774 * @timer: the timer to be modified
@@ -745,6 +799,8 @@ int mod_timer(struct timer_list *timer, unsigned long expires)
745 if (timer_pending(timer) && timer->expires == expires) 799 if (timer_pending(timer) && timer->expires == expires)
746 return 1; 800 return 1;
747 801
802 expires = apply_slack(timer, expires);
803
748 return __mod_timer(timer, expires, false, TIMER_NOT_PINNED); 804 return __mod_timer(timer, expires, false, TIMER_NOT_PINNED);
749} 805}
750EXPORT_SYMBOL(mod_timer); 806EXPORT_SYMBOL(mod_timer);
@@ -955,6 +1011,47 @@ static int cascade(struct tvec_base *base, struct tvec *tv, int index)
955 return index; 1011 return index;
956} 1012}
957 1013
1014static void call_timer_fn(struct timer_list *timer, void (*fn)(unsigned long),
1015 unsigned long data)
1016{
1017 int preempt_count = preempt_count();
1018
1019#ifdef CONFIG_LOCKDEP
1020 /*
1021 * It is permissible to free the timer from inside the
1022 * function that is called from it, this we need to take into
1023 * account for lockdep too. To avoid bogus "held lock freed"
1024 * warnings as well as problems when looking into
1025 * timer->lockdep_map, make a copy and use that here.
1026 */
1027 struct lockdep_map lockdep_map = timer->lockdep_map;
1028#endif
1029 /*
1030 * Couple the lock chain with the lock chain at
1031 * del_timer_sync() by acquiring the lock_map around the fn()
1032 * call here and in del_timer_sync().
1033 */
1034 lock_map_acquire(&lockdep_map);
1035
1036 trace_timer_expire_entry(timer);
1037 fn(data);
1038 trace_timer_expire_exit(timer);
1039
1040 lock_map_release(&lockdep_map);
1041
1042 if (preempt_count != preempt_count()) {
1043 WARN_ONCE(1, "timer: %pF preempt leak: %08x -> %08x\n",
1044 fn, preempt_count, preempt_count());
1045 /*
1046 * Restore the preempt count. That gives us a decent
1047 * chance to survive and extract information. If the
1048 * callback kept a lock held, bad luck, but not worse
1049 * than the BUG() we had.
1050 */
1051 preempt_count() = preempt_count;
1052 }
1053}
1054
958#define INDEX(N) ((base->timer_jiffies >> (TVR_BITS + (N) * TVN_BITS)) & TVN_MASK) 1055#define INDEX(N) ((base->timer_jiffies >> (TVR_BITS + (N) * TVN_BITS)) & TVN_MASK)
959 1056
960/** 1057/**
@@ -998,45 +1095,7 @@ static inline void __run_timers(struct tvec_base *base)
998 detach_timer(timer, 1); 1095 detach_timer(timer, 1);
999 1096
1000 spin_unlock_irq(&base->lock); 1097 spin_unlock_irq(&base->lock);
1001 { 1098 call_timer_fn(timer, fn, data);
1002 int preempt_count = preempt_count();
1003
1004#ifdef CONFIG_LOCKDEP
1005 /*
1006 * It is permissible to free the timer from
1007 * inside the function that is called from
1008 * it, this we need to take into account for
1009 * lockdep too. To avoid bogus "held lock
1010 * freed" warnings as well as problems when
1011 * looking into timer->lockdep_map, make a
1012 * copy and use that here.
1013 */
1014 struct lockdep_map lockdep_map =
1015 timer->lockdep_map;
1016#endif
1017 /*
1018 * Couple the lock chain with the lock chain at
1019 * del_timer_sync() by acquiring the lock_map
1020 * around the fn() call here and in
1021 * del_timer_sync().
1022 */
1023 lock_map_acquire(&lockdep_map);
1024
1025 trace_timer_expire_entry(timer);
1026 fn(data);
1027 trace_timer_expire_exit(timer);
1028
1029 lock_map_release(&lockdep_map);
1030
1031 if (preempt_count != preempt_count()) {
1032 printk(KERN_ERR "huh, entered %p "
1033 "with preempt_count %08x, exited"
1034 " with %08x?\n",
1035 fn, preempt_count,
1036 preempt_count());
1037 BUG();
1038 }
1039 }
1040 spin_lock_irq(&base->lock); 1099 spin_lock_irq(&base->lock);
1041 } 1100 }
1042 } 1101 }
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index 756d7283318b..8a76339a9e65 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -3309,12 +3309,12 @@ static ssize_t tracing_splice_read_pipe(struct file *filp,
3309 size_t len, 3309 size_t len,
3310 unsigned int flags) 3310 unsigned int flags)
3311{ 3311{
3312 struct page *pages[PIPE_BUFFERS]; 3312 struct page *pages_def[PIPE_DEF_BUFFERS];
3313 struct partial_page partial[PIPE_BUFFERS]; 3313 struct partial_page partial_def[PIPE_DEF_BUFFERS];
3314 struct trace_iterator *iter = filp->private_data; 3314 struct trace_iterator *iter = filp->private_data;
3315 struct splice_pipe_desc spd = { 3315 struct splice_pipe_desc spd = {
3316 .pages = pages, 3316 .pages = pages_def,
3317 .partial = partial, 3317 .partial = partial_def,
3318 .nr_pages = 0, /* This gets updated below. */ 3318 .nr_pages = 0, /* This gets updated below. */
3319 .flags = flags, 3319 .flags = flags,
3320 .ops = &tracing_pipe_buf_ops, 3320 .ops = &tracing_pipe_buf_ops,
@@ -3325,6 +3325,9 @@ static ssize_t tracing_splice_read_pipe(struct file *filp,
3325 size_t rem; 3325 size_t rem;
3326 unsigned int i; 3326 unsigned int i;
3327 3327
3328 if (splice_grow_spd(pipe, &spd))
3329 return -ENOMEM;
3330
3328 /* copy the tracer to avoid using a global lock all around */ 3331 /* copy the tracer to avoid using a global lock all around */
3329 mutex_lock(&trace_types_lock); 3332 mutex_lock(&trace_types_lock);
3330 if (unlikely(old_tracer != current_trace && current_trace)) { 3333 if (unlikely(old_tracer != current_trace && current_trace)) {
@@ -3355,23 +3358,23 @@ static ssize_t tracing_splice_read_pipe(struct file *filp,
3355 trace_access_lock(iter->cpu_file); 3358 trace_access_lock(iter->cpu_file);
3356 3359
3357 /* Fill as many pages as possible. */ 3360 /* Fill as many pages as possible. */
3358 for (i = 0, rem = len; i < PIPE_BUFFERS && rem; i++) { 3361 for (i = 0, rem = len; i < pipe->buffers && rem; i++) {
3359 pages[i] = alloc_page(GFP_KERNEL); 3362 spd.pages[i] = alloc_page(GFP_KERNEL);
3360 if (!pages[i]) 3363 if (!spd.pages[i])
3361 break; 3364 break;
3362 3365
3363 rem = tracing_fill_pipe_page(rem, iter); 3366 rem = tracing_fill_pipe_page(rem, iter);
3364 3367
3365 /* Copy the data into the page, so we can start over. */ 3368 /* Copy the data into the page, so we can start over. */
3366 ret = trace_seq_to_buffer(&iter->seq, 3369 ret = trace_seq_to_buffer(&iter->seq,
3367 page_address(pages[i]), 3370 page_address(spd.pages[i]),
3368 iter->seq.len); 3371 iter->seq.len);
3369 if (ret < 0) { 3372 if (ret < 0) {
3370 __free_page(pages[i]); 3373 __free_page(spd.pages[i]);
3371 break; 3374 break;
3372 } 3375 }
3373 partial[i].offset = 0; 3376 spd.partial[i].offset = 0;
3374 partial[i].len = iter->seq.len; 3377 spd.partial[i].len = iter->seq.len;
3375 3378
3376 trace_seq_init(&iter->seq); 3379 trace_seq_init(&iter->seq);
3377 } 3380 }
@@ -3382,12 +3385,14 @@ static ssize_t tracing_splice_read_pipe(struct file *filp,
3382 3385
3383 spd.nr_pages = i; 3386 spd.nr_pages = i;
3384 3387
3385 return splice_to_pipe(pipe, &spd); 3388 ret = splice_to_pipe(pipe, &spd);
3389out:
3390 splice_shrink_spd(pipe, &spd);
3391 return ret;
3386 3392
3387out_err: 3393out_err:
3388 mutex_unlock(&iter->mutex); 3394 mutex_unlock(&iter->mutex);
3389 3395 goto out;
3390 return ret;
3391} 3396}
3392 3397
3393static ssize_t 3398static ssize_t
@@ -3786,11 +3791,11 @@ tracing_buffers_splice_read(struct file *file, loff_t *ppos,
3786 unsigned int flags) 3791 unsigned int flags)
3787{ 3792{
3788 struct ftrace_buffer_info *info = file->private_data; 3793 struct ftrace_buffer_info *info = file->private_data;
3789 struct partial_page partial[PIPE_BUFFERS]; 3794 struct partial_page partial_def[PIPE_DEF_BUFFERS];
3790 struct page *pages[PIPE_BUFFERS]; 3795 struct page *pages_def[PIPE_DEF_BUFFERS];
3791 struct splice_pipe_desc spd = { 3796 struct splice_pipe_desc spd = {
3792 .pages = pages, 3797 .pages = pages_def,
3793 .partial = partial, 3798 .partial = partial_def,
3794 .flags = flags, 3799 .flags = flags,
3795 .ops = &buffer_pipe_buf_ops, 3800 .ops = &buffer_pipe_buf_ops,
3796 .spd_release = buffer_spd_release, 3801 .spd_release = buffer_spd_release,
@@ -3799,22 +3804,28 @@ tracing_buffers_splice_read(struct file *file, loff_t *ppos,
3799 int entries, size, i; 3804 int entries, size, i;
3800 size_t ret; 3805 size_t ret;
3801 3806
3807 if (splice_grow_spd(pipe, &spd))
3808 return -ENOMEM;
3809
3802 if (*ppos & (PAGE_SIZE - 1)) { 3810 if (*ppos & (PAGE_SIZE - 1)) {
3803 WARN_ONCE(1, "Ftrace: previous read must page-align\n"); 3811 WARN_ONCE(1, "Ftrace: previous read must page-align\n");
3804 return -EINVAL; 3812 ret = -EINVAL;
3813 goto out;
3805 } 3814 }
3806 3815
3807 if (len & (PAGE_SIZE - 1)) { 3816 if (len & (PAGE_SIZE - 1)) {
3808 WARN_ONCE(1, "Ftrace: splice_read should page-align\n"); 3817 WARN_ONCE(1, "Ftrace: splice_read should page-align\n");
3809 if (len < PAGE_SIZE) 3818 if (len < PAGE_SIZE) {
3810 return -EINVAL; 3819 ret = -EINVAL;
3820 goto out;
3821 }
3811 len &= PAGE_MASK; 3822 len &= PAGE_MASK;
3812 } 3823 }
3813 3824
3814 trace_access_lock(info->cpu); 3825 trace_access_lock(info->cpu);
3815 entries = ring_buffer_entries_cpu(info->tr->buffer, info->cpu); 3826 entries = ring_buffer_entries_cpu(info->tr->buffer, info->cpu);
3816 3827
3817 for (i = 0; i < PIPE_BUFFERS && len && entries; i++, len -= PAGE_SIZE) { 3828 for (i = 0; i < pipe->buffers && len && entries; i++, len -= PAGE_SIZE) {
3818 struct page *page; 3829 struct page *page;
3819 int r; 3830 int r;
3820 3831
@@ -3869,11 +3880,12 @@ tracing_buffers_splice_read(struct file *file, loff_t *ppos,
3869 else 3880 else
3870 ret = 0; 3881 ret = 0;
3871 /* TODO: block */ 3882 /* TODO: block */
3872 return ret; 3883 goto out;
3873 } 3884 }
3874 3885
3875 ret = splice_to_pipe(pipe, &spd); 3886 ret = splice_to_pipe(pipe, &spd);
3876 3887 splice_shrink_spd(pipe, &spd);
3888out:
3877 return ret; 3889 return ret;
3878} 3890}
3879 3891
diff --git a/kernel/trace/trace_output.c b/kernel/trace/trace_output.c
index 2404c129a8c9..ab13d7008061 100644
--- a/kernel/trace/trace_output.c
+++ b/kernel/trace/trace_output.c
@@ -209,6 +209,7 @@ int trace_seq_putc(struct trace_seq *s, unsigned char c)
209 209
210 return 1; 210 return 1;
211} 211}
212EXPORT_SYMBOL(trace_seq_putc);
212 213
213int trace_seq_putmem(struct trace_seq *s, const void *mem, size_t len) 214int trace_seq_putmem(struct trace_seq *s, const void *mem, size_t len)
214{ 215{
@@ -355,6 +356,21 @@ ftrace_print_symbols_seq(struct trace_seq *p, unsigned long val,
355} 356}
356EXPORT_SYMBOL(ftrace_print_symbols_seq); 357EXPORT_SYMBOL(ftrace_print_symbols_seq);
357 358
359const char *
360ftrace_print_hex_seq(struct trace_seq *p, const unsigned char *buf, int buf_len)
361{
362 int i;
363 const char *ret = p->buffer + p->len;
364
365 for (i = 0; i < buf_len; i++)
366 trace_seq_printf(p, "%s%2.2x", i == 0 ? "" : " ", buf[i]);
367
368 trace_seq_putc(p, 0);
369
370 return ret;
371}
372EXPORT_SYMBOL(ftrace_print_hex_seq);
373
358#ifdef CONFIG_KRETPROBES 374#ifdef CONFIG_KRETPROBES
359static inline const char *kretprobed(const char *name) 375static inline const char *kretprobed(const char *name)
360{ 376{
diff --git a/kernel/user_namespace.c b/kernel/user_namespace.c
index 076c7c8215b0..b2d70d38dff4 100644
--- a/kernel/user_namespace.c
+++ b/kernel/user_namespace.c
@@ -54,8 +54,8 @@ int create_user_ns(struct cred *new)
54#endif 54#endif
55 /* tgcred will be cleared in our caller bc CLONE_THREAD won't be set */ 55 /* tgcred will be cleared in our caller bc CLONE_THREAD won't be set */
56 56
57 /* alloc_uid() incremented the userns refcount. Just set it to 1 */ 57 /* root_user holds a reference to ns, our reference can be dropped */
58 kref_set(&ns->kref, 1); 58 put_user_ns(ns);
59 59
60 return 0; 60 return 0;
61} 61}
diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index 5bfb213984b2..77dabbf64b8f 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -229,6 +229,16 @@ static inline void set_wq_data(struct work_struct *work,
229 atomic_long_set(&work->data, new); 229 atomic_long_set(&work->data, new);
230} 230}
231 231
232/*
233 * Clear WORK_STRUCT_PENDING and the workqueue on which it was queued.
234 */
235static inline void clear_wq_data(struct work_struct *work)
236{
237 unsigned long flags = *work_data_bits(work) &
238 (1UL << WORK_STRUCT_STATIC);
239 atomic_long_set(&work->data, flags);
240}
241
232static inline 242static inline
233struct cpu_workqueue_struct *get_wq_data(struct work_struct *work) 243struct cpu_workqueue_struct *get_wq_data(struct work_struct *work)
234{ 244{
@@ -671,7 +681,7 @@ static int __cancel_work_timer(struct work_struct *work,
671 wait_on_work(work); 681 wait_on_work(work);
672 } while (unlikely(ret < 0)); 682 } while (unlikely(ret < 0));
673 683
674 work_clear_pending(work); 684 clear_wq_data(work);
675 return ret; 685 return ret;
676} 686}
677 687
@@ -845,6 +855,30 @@ int schedule_on_each_cpu(work_func_t func)
845 return 0; 855 return 0;
846} 856}
847 857
858/**
859 * flush_scheduled_work - ensure that any scheduled work has run to completion.
860 *
861 * Forces execution of the kernel-global workqueue and blocks until its
862 * completion.
863 *
864 * Think twice before calling this function! It's very easy to get into
865 * trouble if you don't take great care. Either of the following situations
866 * will lead to deadlock:
867 *
868 * One of the work items currently on the workqueue needs to acquire
869 * a lock held by your code or its caller.
870 *
871 * Your code is running in the context of a work routine.
872 *
873 * They will be detected by lockdep when they occur, but the first might not
874 * occur very often. It depends on what work items are on the workqueue and
875 * what locks they need, which you have no control over.
876 *
877 * In most situations flushing the entire workqueue is overkill; you merely
878 * need to know that a particular work item isn't queued and isn't running.
879 * In such cases you should use cancel_delayed_work_sync() or
880 * cancel_work_sync() instead.
881 */
848void flush_scheduled_work(void) 882void flush_scheduled_work(void)
849{ 883{
850 flush_workqueue(keventd_wq); 884 flush_workqueue(keventd_wq);