aboutsummaryrefslogtreecommitdiffstats
path: root/kernel
diff options
context:
space:
mode:
Diffstat (limited to 'kernel')
-rw-r--r--kernel/audit.c2
-rw-r--r--kernel/cgroup.c23
-rw-r--r--kernel/cpuset.c2
-rw-r--r--kernel/debug/debug_core.c4
-rw-r--r--kernel/debug/gdbstub.c191
-rw-r--r--kernel/debug/kdb/kdb_main.c132
-rw-r--r--kernel/debug/kdb/kdb_private.h2
-rw-r--r--kernel/hw_breakpoint.c12
-rw-r--r--kernel/module.c1088
-rw-r--r--kernel/padata.c755
-rw-r--r--kernel/pm_qos_params.c215
-rw-r--r--kernel/power/hibernate.c26
-rw-r--r--kernel/power/main.c55
-rw-r--r--kernel/power/snapshot.c2
-rw-r--r--kernel/power/suspend.c13
-rw-r--r--kernel/power/swap.c6
-rw-r--r--kernel/printk.c33
-rw-r--r--kernel/rcupdate.c160
-rw-r--r--kernel/rcutiny.c2
-rw-r--r--kernel/rcutree.c2
-rw-r--r--kernel/signal.c9
-rw-r--r--kernel/time/tick-broadcast.c2
-rw-r--r--kernel/timer.c13
-rw-r--r--kernel/trace/Makefile3
-rw-r--r--kernel/trace/trace.c43
-rw-r--r--kernel/trace/trace.h19
-rw-r--r--kernel/trace/trace_kdb.c136
-rw-r--r--kernel/user_namespace.c44
-rw-r--r--kernel/workqueue.c15
29 files changed, 2086 insertions, 923 deletions
diff --git a/kernel/audit.c b/kernel/audit.c
index c71bd26631a2..8296aa516c5a 100644
--- a/kernel/audit.c
+++ b/kernel/audit.c
@@ -407,7 +407,7 @@ static void kauditd_send_skb(struct sk_buff *skb)
407 audit_hold_skb(skb); 407 audit_hold_skb(skb);
408 } else 408 } else
409 /* drop the extra reference if sent ok */ 409 /* drop the extra reference if sent ok */
410 kfree_skb(skb); 410 consume_skb(skb);
411} 411}
412 412
413static int kauditd_thread(void *dummy) 413static int kauditd_thread(void *dummy)
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index 3ac6f5b0a64b..a8ce09954404 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -1788,6 +1788,29 @@ out:
1788 return retval; 1788 return retval;
1789} 1789}
1790 1790
1791/**
1792 * cgroup_attach_task_current_cg - attach task 'tsk' to current task's cgroup
1793 * @tsk: the task to be attached
1794 */
1795int cgroup_attach_task_current_cg(struct task_struct *tsk)
1796{
1797 struct cgroupfs_root *root;
1798 struct cgroup *cur_cg;
1799 int retval = 0;
1800
1801 cgroup_lock();
1802 for_each_active_root(root) {
1803 cur_cg = task_cgroup_from_root(current, root);
1804 retval = cgroup_attach_task(cur_cg, tsk);
1805 if (retval)
1806 break;
1807 }
1808 cgroup_unlock();
1809
1810 return retval;
1811}
1812EXPORT_SYMBOL_GPL(cgroup_attach_task_current_cg);
1813
1791/* 1814/*
1792 * Attach task with pid 'pid' to cgroup 'cgrp'. Call with cgroup_mutex 1815 * Attach task with pid 'pid' to cgroup 'cgrp'. Call with cgroup_mutex
1793 * held. May take task_lock of task 1816 * held. May take task_lock of task
diff --git a/kernel/cpuset.c b/kernel/cpuset.c
index 02b9611eadde..7cb37d86a005 100644
--- a/kernel/cpuset.c
+++ b/kernel/cpuset.c
@@ -105,7 +105,7 @@ struct cpuset {
105 /* for custom sched domain */ 105 /* for custom sched domain */
106 int relax_domain_level; 106 int relax_domain_level;
107 107
108 /* used for walking a cpuset heirarchy */ 108 /* used for walking a cpuset hierarchy */
109 struct list_head stack_list; 109 struct list_head stack_list;
110}; 110};
111 111
diff --git a/kernel/debug/debug_core.c b/kernel/debug/debug_core.c
index 8bc5eeffec8a..3c2d4972d235 100644
--- a/kernel/debug/debug_core.c
+++ b/kernel/debug/debug_core.c
@@ -6,7 +6,7 @@
6 * Copyright (C) 2000-2001 VERITAS Software Corporation. 6 * Copyright (C) 2000-2001 VERITAS Software Corporation.
7 * Copyright (C) 2002-2004 Timesys Corporation 7 * Copyright (C) 2002-2004 Timesys Corporation
8 * Copyright (C) 2003-2004 Amit S. Kale <amitkale@linsyssoft.com> 8 * Copyright (C) 2003-2004 Amit S. Kale <amitkale@linsyssoft.com>
9 * Copyright (C) 2004 Pavel Machek <pavel@suse.cz> 9 * Copyright (C) 2004 Pavel Machek <pavel@ucw.cz>
10 * Copyright (C) 2004-2006 Tom Rini <trini@kernel.crashing.org> 10 * Copyright (C) 2004-2006 Tom Rini <trini@kernel.crashing.org>
11 * Copyright (C) 2004-2006 LinSysSoft Technologies Pvt. Ltd. 11 * Copyright (C) 2004-2006 LinSysSoft Technologies Pvt. Ltd.
12 * Copyright (C) 2005-2009 Wind River Systems, Inc. 12 * Copyright (C) 2005-2009 Wind River Systems, Inc.
@@ -605,6 +605,8 @@ cpu_master_loop:
605 if (dbg_kdb_mode) { 605 if (dbg_kdb_mode) {
606 kgdb_connected = 1; 606 kgdb_connected = 1;
607 error = kdb_stub(ks); 607 error = kdb_stub(ks);
608 if (error == -1)
609 continue;
608 kgdb_connected = 0; 610 kgdb_connected = 0;
609 } else { 611 } else {
610 error = gdb_serial_stub(ks); 612 error = gdb_serial_stub(ks);
diff --git a/kernel/debug/gdbstub.c b/kernel/debug/gdbstub.c
index e8fd6868682d..481a7bd2dfe7 100644
--- a/kernel/debug/gdbstub.c
+++ b/kernel/debug/gdbstub.c
@@ -6,7 +6,7 @@
6 * Copyright (C) 2000-2001 VERITAS Software Corporation. 6 * Copyright (C) 2000-2001 VERITAS Software Corporation.
7 * Copyright (C) 2002-2004 Timesys Corporation 7 * Copyright (C) 2002-2004 Timesys Corporation
8 * Copyright (C) 2003-2004 Amit S. Kale <amitkale@linsyssoft.com> 8 * Copyright (C) 2003-2004 Amit S. Kale <amitkale@linsyssoft.com>
9 * Copyright (C) 2004 Pavel Machek <pavel@suse.cz> 9 * Copyright (C) 2004 Pavel Machek <pavel@ucw.cz>
10 * Copyright (C) 2004-2006 Tom Rini <trini@kernel.crashing.org> 10 * Copyright (C) 2004-2006 Tom Rini <trini@kernel.crashing.org>
11 * Copyright (C) 2004-2006 LinSysSoft Technologies Pvt. Ltd. 11 * Copyright (C) 2004-2006 LinSysSoft Technologies Pvt. Ltd.
12 * Copyright (C) 2005-2009 Wind River Systems, Inc. 12 * Copyright (C) 2005-2009 Wind River Systems, Inc.
@@ -52,17 +52,6 @@ static unsigned long gdb_regs[(NUMREGBYTES +
52 * GDB remote protocol parser: 52 * GDB remote protocol parser:
53 */ 53 */
54 54
55static int hex(char ch)
56{
57 if ((ch >= 'a') && (ch <= 'f'))
58 return ch - 'a' + 10;
59 if ((ch >= '0') && (ch <= '9'))
60 return ch - '0';
61 if ((ch >= 'A') && (ch <= 'F'))
62 return ch - 'A' + 10;
63 return -1;
64}
65
66#ifdef CONFIG_KGDB_KDB 55#ifdef CONFIG_KGDB_KDB
67static int gdbstub_read_wait(void) 56static int gdbstub_read_wait(void)
68{ 57{
@@ -123,8 +112,8 @@ static void get_packet(char *buffer)
123 buffer[count] = 0; 112 buffer[count] = 0;
124 113
125 if (ch == '#') { 114 if (ch == '#') {
126 xmitcsum = hex(gdbstub_read_wait()) << 4; 115 xmitcsum = hex_to_bin(gdbstub_read_wait()) << 4;
127 xmitcsum += hex(gdbstub_read_wait()); 116 xmitcsum += hex_to_bin(gdbstub_read_wait());
128 117
129 if (checksum != xmitcsum) 118 if (checksum != xmitcsum)
130 /* failed checksum */ 119 /* failed checksum */
@@ -236,7 +225,7 @@ void gdbstub_msg_write(const char *s, int len)
236 * buf. Return a pointer to the last char put in buf (null). May 225 * buf. Return a pointer to the last char put in buf (null). May
237 * return an error. 226 * return an error.
238 */ 227 */
239int kgdb_mem2hex(char *mem, char *buf, int count) 228char *kgdb_mem2hex(char *mem, char *buf, int count)
240{ 229{
241 char *tmp; 230 char *tmp;
242 int err; 231 int err;
@@ -248,17 +237,16 @@ int kgdb_mem2hex(char *mem, char *buf, int count)
248 tmp = buf + count; 237 tmp = buf + count;
249 238
250 err = probe_kernel_read(tmp, mem, count); 239 err = probe_kernel_read(tmp, mem, count);
251 if (!err) { 240 if (err)
252 while (count > 0) { 241 return NULL;
253 buf = pack_hex_byte(buf, *tmp); 242 while (count > 0) {
254 tmp++; 243 buf = pack_hex_byte(buf, *tmp);
255 count--; 244 tmp++;
256 } 245 count--;
257
258 *buf = 0;
259 } 246 }
247 *buf = 0;
260 248
261 return err; 249 return buf;
262} 250}
263 251
264/* 252/*
@@ -280,8 +268,8 @@ int kgdb_hex2mem(char *buf, char *mem, int count)
280 tmp_hex = tmp_raw - 1; 268 tmp_hex = tmp_raw - 1;
281 while (tmp_hex >= buf) { 269 while (tmp_hex >= buf) {
282 tmp_raw--; 270 tmp_raw--;
283 *tmp_raw = hex(*tmp_hex--); 271 *tmp_raw = hex_to_bin(*tmp_hex--);
284 *tmp_raw |= hex(*tmp_hex--) << 4; 272 *tmp_raw |= hex_to_bin(*tmp_hex--) << 4;
285 } 273 }
286 274
287 return probe_kernel_write(mem, tmp_raw, count); 275 return probe_kernel_write(mem, tmp_raw, count);
@@ -304,7 +292,7 @@ int kgdb_hex2long(char **ptr, unsigned long *long_val)
304 (*ptr)++; 292 (*ptr)++;
305 } 293 }
306 while (**ptr) { 294 while (**ptr) {
307 hex_val = hex(**ptr); 295 hex_val = hex_to_bin(**ptr);
308 if (hex_val < 0) 296 if (hex_val < 0)
309 break; 297 break;
310 298
@@ -339,6 +327,32 @@ static int kgdb_ebin2mem(char *buf, char *mem, int count)
339 return probe_kernel_write(mem, c, size); 327 return probe_kernel_write(mem, c, size);
340} 328}
341 329
330#if DBG_MAX_REG_NUM > 0
331void pt_regs_to_gdb_regs(unsigned long *gdb_regs, struct pt_regs *regs)
332{
333 int i;
334 int idx = 0;
335 char *ptr = (char *)gdb_regs;
336
337 for (i = 0; i < DBG_MAX_REG_NUM; i++) {
338 dbg_get_reg(i, ptr + idx, regs);
339 idx += dbg_reg_def[i].size;
340 }
341}
342
343void gdb_regs_to_pt_regs(unsigned long *gdb_regs, struct pt_regs *regs)
344{
345 int i;
346 int idx = 0;
347 char *ptr = (char *)gdb_regs;
348
349 for (i = 0; i < DBG_MAX_REG_NUM; i++) {
350 dbg_set_reg(i, ptr + idx, regs);
351 idx += dbg_reg_def[i].size;
352 }
353}
354#endif /* DBG_MAX_REG_NUM > 0 */
355
342/* Write memory due to an 'M' or 'X' packet. */ 356/* Write memory due to an 'M' or 'X' packet. */
343static int write_mem_msg(int binary) 357static int write_mem_msg(int binary)
344{ 358{
@@ -378,28 +392,31 @@ static void error_packet(char *pkt, int error)
378 * remapped to negative TIDs. 392 * remapped to negative TIDs.
379 */ 393 */
380 394
381#define BUF_THREAD_ID_SIZE 16 395#define BUF_THREAD_ID_SIZE 8
382 396
383static char *pack_threadid(char *pkt, unsigned char *id) 397static char *pack_threadid(char *pkt, unsigned char *id)
384{ 398{
385 char *limit; 399 unsigned char *limit;
400 int lzero = 1;
401
402 limit = id + (BUF_THREAD_ID_SIZE / 2);
403 while (id < limit) {
404 if (!lzero || *id != 0) {
405 pkt = pack_hex_byte(pkt, *id);
406 lzero = 0;
407 }
408 id++;
409 }
386 410
387 limit = pkt + BUF_THREAD_ID_SIZE; 411 if (lzero)
388 while (pkt < limit) 412 pkt = pack_hex_byte(pkt, 0);
389 pkt = pack_hex_byte(pkt, *id++);
390 413
391 return pkt; 414 return pkt;
392} 415}
393 416
394static void int_to_threadref(unsigned char *id, int value) 417static void int_to_threadref(unsigned char *id, int value)
395{ 418{
396 unsigned char *scan; 419 put_unaligned_be32(value, id);
397 int i = 4;
398
399 scan = (unsigned char *)id;
400 while (i--)
401 *scan++ = 0;
402 put_unaligned_be32(value, scan);
403} 420}
404 421
405static struct task_struct *getthread(struct pt_regs *regs, int tid) 422static struct task_struct *getthread(struct pt_regs *regs, int tid)
@@ -463,8 +480,7 @@ static void gdb_cmd_status(struct kgdb_state *ks)
463 pack_hex_byte(&remcom_out_buffer[1], ks->signo); 480 pack_hex_byte(&remcom_out_buffer[1], ks->signo);
464} 481}
465 482
466/* Handle the 'g' get registers request */ 483static void gdb_get_regs_helper(struct kgdb_state *ks)
467static void gdb_cmd_getregs(struct kgdb_state *ks)
468{ 484{
469 struct task_struct *thread; 485 struct task_struct *thread;
470 void *local_debuggerinfo; 486 void *local_debuggerinfo;
@@ -505,6 +521,12 @@ static void gdb_cmd_getregs(struct kgdb_state *ks)
505 */ 521 */
506 sleeping_thread_to_gdb_regs(gdb_regs, thread); 522 sleeping_thread_to_gdb_regs(gdb_regs, thread);
507 } 523 }
524}
525
526/* Handle the 'g' get registers request */
527static void gdb_cmd_getregs(struct kgdb_state *ks)
528{
529 gdb_get_regs_helper(ks);
508 kgdb_mem2hex((char *)gdb_regs, remcom_out_buffer, NUMREGBYTES); 530 kgdb_mem2hex((char *)gdb_regs, remcom_out_buffer, NUMREGBYTES);
509} 531}
510 532
@@ -527,13 +549,13 @@ static void gdb_cmd_memread(struct kgdb_state *ks)
527 char *ptr = &remcom_in_buffer[1]; 549 char *ptr = &remcom_in_buffer[1];
528 unsigned long length; 550 unsigned long length;
529 unsigned long addr; 551 unsigned long addr;
530 int err; 552 char *err;
531 553
532 if (kgdb_hex2long(&ptr, &addr) > 0 && *ptr++ == ',' && 554 if (kgdb_hex2long(&ptr, &addr) > 0 && *ptr++ == ',' &&
533 kgdb_hex2long(&ptr, &length) > 0) { 555 kgdb_hex2long(&ptr, &length) > 0) {
534 err = kgdb_mem2hex((char *)addr, remcom_out_buffer, length); 556 err = kgdb_mem2hex((char *)addr, remcom_out_buffer, length);
535 if (err) 557 if (!err)
536 error_packet(remcom_out_buffer, err); 558 error_packet(remcom_out_buffer, -EINVAL);
537 } else { 559 } else {
538 error_packet(remcom_out_buffer, -EINVAL); 560 error_packet(remcom_out_buffer, -EINVAL);
539 } 561 }
@@ -550,6 +572,60 @@ static void gdb_cmd_memwrite(struct kgdb_state *ks)
550 strcpy(remcom_out_buffer, "OK"); 572 strcpy(remcom_out_buffer, "OK");
551} 573}
552 574
575#if DBG_MAX_REG_NUM > 0
576static char *gdb_hex_reg_helper(int regnum, char *out)
577{
578 int i;
579 int offset = 0;
580
581 for (i = 0; i < regnum; i++)
582 offset += dbg_reg_def[i].size;
583 return kgdb_mem2hex((char *)gdb_regs + offset, out,
584 dbg_reg_def[i].size);
585}
586
587/* Handle the 'p' individual regster get */
588static void gdb_cmd_reg_get(struct kgdb_state *ks)
589{
590 unsigned long regnum;
591 char *ptr = &remcom_in_buffer[1];
592
593 kgdb_hex2long(&ptr, &regnum);
594 if (regnum >= DBG_MAX_REG_NUM) {
595 error_packet(remcom_out_buffer, -EINVAL);
596 return;
597 }
598 gdb_get_regs_helper(ks);
599 gdb_hex_reg_helper(regnum, remcom_out_buffer);
600}
601
602/* Handle the 'P' individual regster set */
603static void gdb_cmd_reg_set(struct kgdb_state *ks)
604{
605 unsigned long regnum;
606 char *ptr = &remcom_in_buffer[1];
607 int i = 0;
608
609 kgdb_hex2long(&ptr, &regnum);
610 if (*ptr++ != '=' ||
611 !(!kgdb_usethread || kgdb_usethread == current) ||
612 !dbg_get_reg(regnum, gdb_regs, ks->linux_regs)) {
613 error_packet(remcom_out_buffer, -EINVAL);
614 return;
615 }
616 memset(gdb_regs, 0, sizeof(gdb_regs));
617 while (i < sizeof(gdb_regs) * 2)
618 if (hex_to_bin(ptr[i]) >= 0)
619 i++;
620 else
621 break;
622 i = i / 2;
623 kgdb_hex2mem(ptr, (char *)gdb_regs, i);
624 dbg_set_reg(regnum, gdb_regs, ks->linux_regs);
625 strcpy(remcom_out_buffer, "OK");
626}
627#endif /* DBG_MAX_REG_NUM > 0 */
628
553/* Handle the 'X' memory binary write bytes */ 629/* Handle the 'X' memory binary write bytes */
554static void gdb_cmd_binwrite(struct kgdb_state *ks) 630static void gdb_cmd_binwrite(struct kgdb_state *ks)
555{ 631{
@@ -612,7 +688,7 @@ static void gdb_cmd_query(struct kgdb_state *ks)
612{ 688{
613 struct task_struct *g; 689 struct task_struct *g;
614 struct task_struct *p; 690 struct task_struct *p;
615 unsigned char thref[8]; 691 unsigned char thref[BUF_THREAD_ID_SIZE];
616 char *ptr; 692 char *ptr;
617 int i; 693 int i;
618 int cpu; 694 int cpu;
@@ -632,8 +708,7 @@ static void gdb_cmd_query(struct kgdb_state *ks)
632 for_each_online_cpu(cpu) { 708 for_each_online_cpu(cpu) {
633 ks->thr_query = 0; 709 ks->thr_query = 0;
634 int_to_threadref(thref, -cpu - 2); 710 int_to_threadref(thref, -cpu - 2);
635 pack_threadid(ptr, thref); 711 ptr = pack_threadid(ptr, thref);
636 ptr += BUF_THREAD_ID_SIZE;
637 *(ptr++) = ','; 712 *(ptr++) = ',';
638 i++; 713 i++;
639 } 714 }
@@ -642,8 +717,7 @@ static void gdb_cmd_query(struct kgdb_state *ks)
642 do_each_thread(g, p) { 717 do_each_thread(g, p) {
643 if (i >= ks->thr_query && !finished) { 718 if (i >= ks->thr_query && !finished) {
644 int_to_threadref(thref, p->pid); 719 int_to_threadref(thref, p->pid);
645 pack_threadid(ptr, thref); 720 ptr = pack_threadid(ptr, thref);
646 ptr += BUF_THREAD_ID_SIZE;
647 *(ptr++) = ','; 721 *(ptr++) = ',';
648 ks->thr_query++; 722 ks->thr_query++;
649 if (ks->thr_query % KGDB_MAX_THREAD_QUERY == 0) 723 if (ks->thr_query % KGDB_MAX_THREAD_QUERY == 0)
@@ -858,11 +932,14 @@ int gdb_serial_stub(struct kgdb_state *ks)
858 int error = 0; 932 int error = 0;
859 int tmp; 933 int tmp;
860 934
861 /* Clear the out buffer. */ 935 /* Initialize comm buffer and globals. */
862 memset(remcom_out_buffer, 0, sizeof(remcom_out_buffer)); 936 memset(remcom_out_buffer, 0, sizeof(remcom_out_buffer));
937 kgdb_usethread = kgdb_info[ks->cpu].task;
938 ks->kgdb_usethreadid = shadow_pid(kgdb_info[ks->cpu].task->pid);
939 ks->pass_exception = 0;
863 940
864 if (kgdb_connected) { 941 if (kgdb_connected) {
865 unsigned char thref[8]; 942 unsigned char thref[BUF_THREAD_ID_SIZE];
866 char *ptr; 943 char *ptr;
867 944
868 /* Reply to host that an exception has occurred */ 945 /* Reply to host that an exception has occurred */
@@ -876,10 +953,6 @@ int gdb_serial_stub(struct kgdb_state *ks)
876 put_packet(remcom_out_buffer); 953 put_packet(remcom_out_buffer);
877 } 954 }
878 955
879 kgdb_usethread = kgdb_info[ks->cpu].task;
880 ks->kgdb_usethreadid = shadow_pid(kgdb_info[ks->cpu].task->pid);
881 ks->pass_exception = 0;
882
883 while (1) { 956 while (1) {
884 error = 0; 957 error = 0;
885 958
@@ -904,6 +977,14 @@ int gdb_serial_stub(struct kgdb_state *ks)
904 case 'M': /* MAA..AA,LLLL: Write LLLL bytes at address AA..AA */ 977 case 'M': /* MAA..AA,LLLL: Write LLLL bytes at address AA..AA */
905 gdb_cmd_memwrite(ks); 978 gdb_cmd_memwrite(ks);
906 break; 979 break;
980#if DBG_MAX_REG_NUM > 0
981 case 'p': /* pXX Return gdb register XX (in hex) */
982 gdb_cmd_reg_get(ks);
983 break;
984 case 'P': /* PXX=aaaa Set gdb register XX to aaaa (in hex) */
985 gdb_cmd_reg_set(ks);
986 break;
987#endif /* DBG_MAX_REG_NUM > 0 */
907 case 'X': /* XAA..AA,LLLL: Write LLLL bytes at address AA..AA */ 988 case 'X': /* XAA..AA,LLLL: Write LLLL bytes at address AA..AA */
908 gdb_cmd_binwrite(ks); 989 gdb_cmd_binwrite(ks);
909 break; 990 break;
diff --git a/kernel/debug/kdb/kdb_main.c b/kernel/debug/kdb/kdb_main.c
index ebe4a287419e..8577e45a9a58 100644
--- a/kernel/debug/kdb/kdb_main.c
+++ b/kernel/debug/kdb/kdb_main.c
@@ -312,7 +312,7 @@ int kdbgetularg(const char *arg, unsigned long *value)
312 312
313 if (endp == arg) { 313 if (endp == arg) {
314 /* 314 /*
315 * Try base 16, for us folks too lazy to type the 315 * Also try base 16, for us folks too lazy to type the
316 * leading 0x... 316 * leading 0x...
317 */ 317 */
318 val = simple_strtoul(arg, &endp, 16); 318 val = simple_strtoul(arg, &endp, 16);
@@ -325,6 +325,25 @@ int kdbgetularg(const char *arg, unsigned long *value)
325 return 0; 325 return 0;
326} 326}
327 327
328int kdbgetu64arg(const char *arg, u64 *value)
329{
330 char *endp;
331 u64 val;
332
333 val = simple_strtoull(arg, &endp, 0);
334
335 if (endp == arg) {
336
337 val = simple_strtoull(arg, &endp, 16);
338 if (endp == arg)
339 return KDB_BADINT;
340 }
341
342 *value = val;
343
344 return 0;
345}
346
328/* 347/*
329 * kdb_set - This function implements the 'set' command. Alter an 348 * kdb_set - This function implements the 'set' command. Alter an
330 * existing environment variable or create a new one. 349 * existing environment variable or create a new one.
@@ -1770,11 +1789,65 @@ static int kdb_go(int argc, const char **argv)
1770 */ 1789 */
1771static int kdb_rd(int argc, const char **argv) 1790static int kdb_rd(int argc, const char **argv)
1772{ 1791{
1773 int diag = kdb_check_regs(); 1792 int len = kdb_check_regs();
1774 if (diag) 1793#if DBG_MAX_REG_NUM > 0
1775 return diag; 1794 int i;
1795 char *rname;
1796 int rsize;
1797 u64 reg64;
1798 u32 reg32;
1799 u16 reg16;
1800 u8 reg8;
1801
1802 if (len)
1803 return len;
1804
1805 for (i = 0; i < DBG_MAX_REG_NUM; i++) {
1806 rsize = dbg_reg_def[i].size * 2;
1807 if (rsize > 16)
1808 rsize = 2;
1809 if (len + strlen(dbg_reg_def[i].name) + 4 + rsize > 80) {
1810 len = 0;
1811 kdb_printf("\n");
1812 }
1813 if (len)
1814 len += kdb_printf(" ");
1815 switch(dbg_reg_def[i].size * 8) {
1816 case 8:
1817 rname = dbg_get_reg(i, &reg8, kdb_current_regs);
1818 if (!rname)
1819 break;
1820 len += kdb_printf("%s: %02x", rname, reg8);
1821 break;
1822 case 16:
1823 rname = dbg_get_reg(i, &reg16, kdb_current_regs);
1824 if (!rname)
1825 break;
1826 len += kdb_printf("%s: %04x", rname, reg16);
1827 break;
1828 case 32:
1829 rname = dbg_get_reg(i, &reg32, kdb_current_regs);
1830 if (!rname)
1831 break;
1832 len += kdb_printf("%s: %08x", rname, reg32);
1833 break;
1834 case 64:
1835 rname = dbg_get_reg(i, &reg64, kdb_current_regs);
1836 if (!rname)
1837 break;
1838 len += kdb_printf("%s: %016llx", rname, reg64);
1839 break;
1840 default:
1841 len += kdb_printf("%s: ??", dbg_reg_def[i].name);
1842 }
1843 }
1844 kdb_printf("\n");
1845#else
1846 if (len)
1847 return len;
1776 1848
1777 kdb_dumpregs(kdb_current_regs); 1849 kdb_dumpregs(kdb_current_regs);
1850#endif
1778 return 0; 1851 return 0;
1779} 1852}
1780 1853
@@ -1782,32 +1855,67 @@ static int kdb_rd(int argc, const char **argv)
1782 * kdb_rm - This function implements the 'rm' (register modify) command. 1855 * kdb_rm - This function implements the 'rm' (register modify) command.
1783 * rm register-name new-contents 1856 * rm register-name new-contents
1784 * Remarks: 1857 * Remarks:
1785 * Currently doesn't allow modification of control or 1858 * Allows register modification with the same restrictions as gdb
1786 * debug registers.
1787 */ 1859 */
1788static int kdb_rm(int argc, const char **argv) 1860static int kdb_rm(int argc, const char **argv)
1789{ 1861{
1862#if DBG_MAX_REG_NUM > 0
1790 int diag; 1863 int diag;
1791 int ind = 0; 1864 const char *rname;
1792 unsigned long contents; 1865 int i;
1866 u64 reg64;
1867 u32 reg32;
1868 u16 reg16;
1869 u8 reg8;
1793 1870
1794 if (argc != 2) 1871 if (argc != 2)
1795 return KDB_ARGCOUNT; 1872 return KDB_ARGCOUNT;
1796 /* 1873 /*
1797 * Allow presence or absence of leading '%' symbol. 1874 * Allow presence or absence of leading '%' symbol.
1798 */ 1875 */
1799 if (argv[1][0] == '%') 1876 rname = argv[1];
1800 ind = 1; 1877 if (*rname == '%')
1878 rname++;
1801 1879
1802 diag = kdbgetularg(argv[2], &contents); 1880 diag = kdbgetu64arg(argv[2], &reg64);
1803 if (diag) 1881 if (diag)
1804 return diag; 1882 return diag;
1805 1883
1806 diag = kdb_check_regs(); 1884 diag = kdb_check_regs();
1807 if (diag) 1885 if (diag)
1808 return diag; 1886 return diag;
1887
1888 diag = KDB_BADREG;
1889 for (i = 0; i < DBG_MAX_REG_NUM; i++) {
1890 if (strcmp(rname, dbg_reg_def[i].name) == 0) {
1891 diag = 0;
1892 break;
1893 }
1894 }
1895 if (!diag) {
1896 switch(dbg_reg_def[i].size * 8) {
1897 case 8:
1898 reg8 = reg64;
1899 dbg_set_reg(i, &reg8, kdb_current_regs);
1900 break;
1901 case 16:
1902 reg16 = reg64;
1903 dbg_set_reg(i, &reg16, kdb_current_regs);
1904 break;
1905 case 32:
1906 reg32 = reg64;
1907 dbg_set_reg(i, &reg32, kdb_current_regs);
1908 break;
1909 case 64:
1910 dbg_set_reg(i, &reg64, kdb_current_regs);
1911 break;
1912 }
1913 }
1914 return diag;
1915#else
1809 kdb_printf("ERROR: Register set currently not implemented\n"); 1916 kdb_printf("ERROR: Register set currently not implemented\n");
1810 return 0; 1917 return 0;
1918#endif
1811} 1919}
1812 1920
1813#if defined(CONFIG_MAGIC_SYSRQ) 1921#if defined(CONFIG_MAGIC_SYSRQ)
diff --git a/kernel/debug/kdb/kdb_private.h b/kernel/debug/kdb/kdb_private.h
index 97d3ba69775d..c438f545a321 100644
--- a/kernel/debug/kdb/kdb_private.h
+++ b/kernel/debug/kdb/kdb_private.h
@@ -144,9 +144,7 @@ extern int kdb_getword(unsigned long *, unsigned long, size_t);
144extern int kdb_putword(unsigned long, unsigned long, size_t); 144extern int kdb_putword(unsigned long, unsigned long, size_t);
145 145
146extern int kdbgetularg(const char *, unsigned long *); 146extern int kdbgetularg(const char *, unsigned long *);
147extern int kdb_set(int, const char **);
148extern char *kdbgetenv(const char *); 147extern char *kdbgetenv(const char *);
149extern int kdbgetintenv(const char *, int *);
150extern int kdbgetaddrarg(int, const char **, int*, unsigned long *, 148extern int kdbgetaddrarg(int, const char **, int*, unsigned long *,
151 long *, char **); 149 long *, char **);
152extern int kdbgetsymval(const char *, kdb_symtab_t *); 150extern int kdbgetsymval(const char *, kdb_symtab_t *);
diff --git a/kernel/hw_breakpoint.c b/kernel/hw_breakpoint.c
index e34d94d50924..d71a987fd2bf 100644
--- a/kernel/hw_breakpoint.c
+++ b/kernel/hw_breakpoint.c
@@ -242,6 +242,17 @@ toggle_bp_slot(struct perf_event *bp, bool enable, enum bp_type_idx type,
242} 242}
243 243
244/* 244/*
245 * Function to perform processor-specific cleanup during unregistration
246 */
247__weak void arch_unregister_hw_breakpoint(struct perf_event *bp)
248{
249 /*
250 * A weak stub function here for those archs that don't define
251 * it inside arch/.../kernel/hw_breakpoint.c
252 */
253}
254
255/*
245 * Contraints to check before allowing this new breakpoint counter: 256 * Contraints to check before allowing this new breakpoint counter:
246 * 257 *
247 * == Non-pinned counter == (Considered as pinned for now) 258 * == Non-pinned counter == (Considered as pinned for now)
@@ -343,6 +354,7 @@ void release_bp_slot(struct perf_event *bp)
343{ 354{
344 mutex_lock(&nr_bp_mutex); 355 mutex_lock(&nr_bp_mutex);
345 356
357 arch_unregister_hw_breakpoint(bp);
346 __release_bp_slot(bp); 358 __release_bp_slot(bp);
347 359
348 mutex_unlock(&nr_bp_mutex); 360 mutex_unlock(&nr_bp_mutex);
diff --git a/kernel/module.c b/kernel/module.c
index 6c562828c85c..d0b5f8db11b4 100644
--- a/kernel/module.c
+++ b/kernel/module.c
@@ -1,6 +1,6 @@
1/* 1/*
2 Copyright (C) 2002 Richard Henderson 2 Copyright (C) 2002 Richard Henderson
3 Copyright (C) 2001 Rusty Russell, 2002 Rusty Russell IBM. 3 Copyright (C) 2001 Rusty Russell, 2002, 2010 Rusty Russell IBM.
4 4
5 This program is free software; you can redistribute it and/or modify 5 This program is free software; you can redistribute it and/or modify
6 it under the terms of the GNU General Public License as published by 6 it under the terms of the GNU General Public License as published by
@@ -110,6 +110,20 @@ int unregister_module_notifier(struct notifier_block * nb)
110} 110}
111EXPORT_SYMBOL(unregister_module_notifier); 111EXPORT_SYMBOL(unregister_module_notifier);
112 112
113struct load_info {
114 Elf_Ehdr *hdr;
115 unsigned long len;
116 Elf_Shdr *sechdrs;
117 char *secstrings, *strtab;
118 unsigned long *strmap;
119 unsigned long symoffs, stroffs;
120 struct _ddebug *debug;
121 unsigned int num_debug;
122 struct {
123 unsigned int sym, str, mod, vers, info, pcpu;
124 } index;
125};
126
113/* We require a truly strong try_module_get(): 0 means failure due to 127/* We require a truly strong try_module_get(): 0 means failure due to
114 ongoing or failed initialization etc. */ 128 ongoing or failed initialization etc. */
115static inline int strong_try_module_get(struct module *mod) 129static inline int strong_try_module_get(struct module *mod)
@@ -140,42 +154,38 @@ void __module_put_and_exit(struct module *mod, long code)
140EXPORT_SYMBOL(__module_put_and_exit); 154EXPORT_SYMBOL(__module_put_and_exit);
141 155
142/* Find a module section: 0 means not found. */ 156/* Find a module section: 0 means not found. */
143static unsigned int find_sec(Elf_Ehdr *hdr, 157static unsigned int find_sec(const struct load_info *info, const char *name)
144 Elf_Shdr *sechdrs,
145 const char *secstrings,
146 const char *name)
147{ 158{
148 unsigned int i; 159 unsigned int i;
149 160
150 for (i = 1; i < hdr->e_shnum; i++) 161 for (i = 1; i < info->hdr->e_shnum; i++) {
162 Elf_Shdr *shdr = &info->sechdrs[i];
151 /* Alloc bit cleared means "ignore it." */ 163 /* Alloc bit cleared means "ignore it." */
152 if ((sechdrs[i].sh_flags & SHF_ALLOC) 164 if ((shdr->sh_flags & SHF_ALLOC)
153 && strcmp(secstrings+sechdrs[i].sh_name, name) == 0) 165 && strcmp(info->secstrings + shdr->sh_name, name) == 0)
154 return i; 166 return i;
167 }
155 return 0; 168 return 0;
156} 169}
157 170
158/* Find a module section, or NULL. */ 171/* Find a module section, or NULL. */
159static void *section_addr(Elf_Ehdr *hdr, Elf_Shdr *shdrs, 172static void *section_addr(const struct load_info *info, const char *name)
160 const char *secstrings, const char *name)
161{ 173{
162 /* Section 0 has sh_addr 0. */ 174 /* Section 0 has sh_addr 0. */
163 return (void *)shdrs[find_sec(hdr, shdrs, secstrings, name)].sh_addr; 175 return (void *)info->sechdrs[find_sec(info, name)].sh_addr;
164} 176}
165 177
166/* Find a module section, or NULL. Fill in number of "objects" in section. */ 178/* Find a module section, or NULL. Fill in number of "objects" in section. */
167static void *section_objs(Elf_Ehdr *hdr, 179static void *section_objs(const struct load_info *info,
168 Elf_Shdr *sechdrs,
169 const char *secstrings,
170 const char *name, 180 const char *name,
171 size_t object_size, 181 size_t object_size,
172 unsigned int *num) 182 unsigned int *num)
173{ 183{
174 unsigned int sec = find_sec(hdr, sechdrs, secstrings, name); 184 unsigned int sec = find_sec(info, name);
175 185
176 /* Section 0 has sh_addr 0 and sh_size 0. */ 186 /* Section 0 has sh_addr 0 and sh_size 0. */
177 *num = sechdrs[sec].sh_size / object_size; 187 *num = info->sechdrs[sec].sh_size / object_size;
178 return (void *)sechdrs[sec].sh_addr; 188 return (void *)info->sechdrs[sec].sh_addr;
179} 189}
180 190
181/* Provided by the linker */ 191/* Provided by the linker */
@@ -227,7 +237,7 @@ bool each_symbol(bool (*fn)(const struct symsearch *arr, struct module *owner,
227 unsigned int symnum, void *data), void *data) 237 unsigned int symnum, void *data), void *data)
228{ 238{
229 struct module *mod; 239 struct module *mod;
230 const struct symsearch arr[] = { 240 static const struct symsearch arr[] = {
231 { __start___ksymtab, __stop___ksymtab, __start___kcrctab, 241 { __start___ksymtab, __stop___ksymtab, __start___kcrctab,
232 NOT_GPL_ONLY, false }, 242 NOT_GPL_ONLY, false },
233 { __start___ksymtab_gpl, __stop___ksymtab_gpl, 243 { __start___ksymtab_gpl, __stop___ksymtab_gpl,
@@ -392,7 +402,8 @@ static int percpu_modalloc(struct module *mod,
392 mod->percpu = __alloc_reserved_percpu(size, align); 402 mod->percpu = __alloc_reserved_percpu(size, align);
393 if (!mod->percpu) { 403 if (!mod->percpu) {
394 printk(KERN_WARNING 404 printk(KERN_WARNING
395 "Could not allocate %lu bytes percpu data\n", size); 405 "%s: Could not allocate %lu bytes percpu data\n",
406 mod->name, size);
396 return -ENOMEM; 407 return -ENOMEM;
397 } 408 }
398 mod->percpu_size = size; 409 mod->percpu_size = size;
@@ -404,11 +415,9 @@ static void percpu_modfree(struct module *mod)
404 free_percpu(mod->percpu); 415 free_percpu(mod->percpu);
405} 416}
406 417
407static unsigned int find_pcpusec(Elf_Ehdr *hdr, 418static unsigned int find_pcpusec(struct load_info *info)
408 Elf_Shdr *sechdrs,
409 const char *secstrings)
410{ 419{
411 return find_sec(hdr, sechdrs, secstrings, ".data..percpu"); 420 return find_sec(info, ".data..percpu");
412} 421}
413 422
414static void percpu_modcopy(struct module *mod, 423static void percpu_modcopy(struct module *mod,
@@ -468,9 +477,7 @@ static inline int percpu_modalloc(struct module *mod,
468static inline void percpu_modfree(struct module *mod) 477static inline void percpu_modfree(struct module *mod)
469{ 478{
470} 479}
471static inline unsigned int find_pcpusec(Elf_Ehdr *hdr, 480static unsigned int find_pcpusec(struct load_info *info)
472 Elf_Shdr *sechdrs,
473 const char *secstrings)
474{ 481{
475 return 0; 482 return 0;
476} 483}
@@ -524,21 +531,21 @@ static char last_unloaded_module[MODULE_NAME_LEN+1];
524EXPORT_TRACEPOINT_SYMBOL(module_get); 531EXPORT_TRACEPOINT_SYMBOL(module_get);
525 532
526/* Init the unload section of the module. */ 533/* Init the unload section of the module. */
527static void module_unload_init(struct module *mod) 534static int module_unload_init(struct module *mod)
528{ 535{
529 int cpu; 536 mod->refptr = alloc_percpu(struct module_ref);
537 if (!mod->refptr)
538 return -ENOMEM;
530 539
531 INIT_LIST_HEAD(&mod->source_list); 540 INIT_LIST_HEAD(&mod->source_list);
532 INIT_LIST_HEAD(&mod->target_list); 541 INIT_LIST_HEAD(&mod->target_list);
533 for_each_possible_cpu(cpu) {
534 per_cpu_ptr(mod->refptr, cpu)->incs = 0;
535 per_cpu_ptr(mod->refptr, cpu)->decs = 0;
536 }
537 542
538 /* Hold reference count during initialization. */ 543 /* Hold reference count during initialization. */
539 __this_cpu_write(mod->refptr->incs, 1); 544 __this_cpu_write(mod->refptr->incs, 1);
540 /* Backwards compatibility macros put refcount during init. */ 545 /* Backwards compatibility macros put refcount during init. */
541 mod->waiter = current; 546 mod->waiter = current;
547
548 return 0;
542} 549}
543 550
544/* Does a already use b? */ 551/* Does a already use b? */
@@ -618,6 +625,8 @@ static void module_unload_free(struct module *mod)
618 kfree(use); 625 kfree(use);
619 } 626 }
620 mutex_unlock(&module_mutex); 627 mutex_unlock(&module_mutex);
628
629 free_percpu(mod->refptr);
621} 630}
622 631
623#ifdef CONFIG_MODULE_FORCE_UNLOAD 632#ifdef CONFIG_MODULE_FORCE_UNLOAD
@@ -891,8 +900,9 @@ int ref_module(struct module *a, struct module *b)
891} 900}
892EXPORT_SYMBOL_GPL(ref_module); 901EXPORT_SYMBOL_GPL(ref_module);
893 902
894static inline void module_unload_init(struct module *mod) 903static inline int module_unload_init(struct module *mod)
895{ 904{
905 return 0;
896} 906}
897#endif /* CONFIG_MODULE_UNLOAD */ 907#endif /* CONFIG_MODULE_UNLOAD */
898 908
@@ -1051,10 +1061,9 @@ static inline int same_magic(const char *amagic, const char *bmagic,
1051#endif /* CONFIG_MODVERSIONS */ 1061#endif /* CONFIG_MODVERSIONS */
1052 1062
1053/* Resolve a symbol for this module. I.e. if we find one, record usage. */ 1063/* Resolve a symbol for this module. I.e. if we find one, record usage. */
1054static const struct kernel_symbol *resolve_symbol(Elf_Shdr *sechdrs, 1064static const struct kernel_symbol *resolve_symbol(struct module *mod,
1055 unsigned int versindex, 1065 const struct load_info *info,
1056 const char *name, 1066 const char *name,
1057 struct module *mod,
1058 char ownername[]) 1067 char ownername[])
1059{ 1068{
1060 struct module *owner; 1069 struct module *owner;
@@ -1068,7 +1077,8 @@ static const struct kernel_symbol *resolve_symbol(Elf_Shdr *sechdrs,
1068 if (!sym) 1077 if (!sym)
1069 goto unlock; 1078 goto unlock;
1070 1079
1071 if (!check_version(sechdrs, versindex, name, mod, crc, owner)) { 1080 if (!check_version(info->sechdrs, info->index.vers, name, mod, crc,
1081 owner)) {
1072 sym = ERR_PTR(-EINVAL); 1082 sym = ERR_PTR(-EINVAL);
1073 goto getname; 1083 goto getname;
1074 } 1084 }
@@ -1087,21 +1097,20 @@ unlock:
1087 return sym; 1097 return sym;
1088} 1098}
1089 1099
1090static const struct kernel_symbol *resolve_symbol_wait(Elf_Shdr *sechdrs, 1100static const struct kernel_symbol *
1091 unsigned int versindex, 1101resolve_symbol_wait(struct module *mod,
1092 const char *name, 1102 const struct load_info *info,
1093 struct module *mod) 1103 const char *name)
1094{ 1104{
1095 const struct kernel_symbol *ksym; 1105 const struct kernel_symbol *ksym;
1096 char ownername[MODULE_NAME_LEN]; 1106 char owner[MODULE_NAME_LEN];
1097 1107
1098 if (wait_event_interruptible_timeout(module_wq, 1108 if (wait_event_interruptible_timeout(module_wq,
1099 !IS_ERR(ksym = resolve_symbol(sechdrs, versindex, name, 1109 !IS_ERR(ksym = resolve_symbol(mod, info, name, owner))
1100 mod, ownername)) || 1110 || PTR_ERR(ksym) != -EBUSY,
1101 PTR_ERR(ksym) != -EBUSY,
1102 30 * HZ) <= 0) { 1111 30 * HZ) <= 0) {
1103 printk(KERN_WARNING "%s: gave up waiting for init of module %s.\n", 1112 printk(KERN_WARNING "%s: gave up waiting for init of module %s.\n",
1104 mod->name, ownername); 1113 mod->name, owner);
1105 } 1114 }
1106 return ksym; 1115 return ksym;
1107} 1116}
@@ -1110,8 +1119,9 @@ static const struct kernel_symbol *resolve_symbol_wait(Elf_Shdr *sechdrs,
1110 * /sys/module/foo/sections stuff 1119 * /sys/module/foo/sections stuff
1111 * J. Corbet <corbet@lwn.net> 1120 * J. Corbet <corbet@lwn.net>
1112 */ 1121 */
1113#if defined(CONFIG_KALLSYMS) && defined(CONFIG_SYSFS) 1122#ifdef CONFIG_SYSFS
1114 1123
1124#ifdef CONFIG_KALLSYMS
1115static inline bool sect_empty(const Elf_Shdr *sect) 1125static inline bool sect_empty(const Elf_Shdr *sect)
1116{ 1126{
1117 return !(sect->sh_flags & SHF_ALLOC) || sect->sh_size == 0; 1127 return !(sect->sh_flags & SHF_ALLOC) || sect->sh_size == 0;
@@ -1148,8 +1158,7 @@ static void free_sect_attrs(struct module_sect_attrs *sect_attrs)
1148 kfree(sect_attrs); 1158 kfree(sect_attrs);
1149} 1159}
1150 1160
1151static void add_sect_attrs(struct module *mod, unsigned int nsect, 1161static void add_sect_attrs(struct module *mod, const struct load_info *info)
1152 char *secstrings, Elf_Shdr *sechdrs)
1153{ 1162{
1154 unsigned int nloaded = 0, i, size[2]; 1163 unsigned int nloaded = 0, i, size[2];
1155 struct module_sect_attrs *sect_attrs; 1164 struct module_sect_attrs *sect_attrs;
@@ -1157,8 +1166,8 @@ static void add_sect_attrs(struct module *mod, unsigned int nsect,
1157 struct attribute **gattr; 1166 struct attribute **gattr;
1158 1167
1159 /* Count loaded sections and allocate structures */ 1168 /* Count loaded sections and allocate structures */
1160 for (i = 0; i < nsect; i++) 1169 for (i = 0; i < info->hdr->e_shnum; i++)
1161 if (!sect_empty(&sechdrs[i])) 1170 if (!sect_empty(&info->sechdrs[i]))
1162 nloaded++; 1171 nloaded++;
1163 size[0] = ALIGN(sizeof(*sect_attrs) 1172 size[0] = ALIGN(sizeof(*sect_attrs)
1164 + nloaded * sizeof(sect_attrs->attrs[0]), 1173 + nloaded * sizeof(sect_attrs->attrs[0]),
@@ -1175,11 +1184,12 @@ static void add_sect_attrs(struct module *mod, unsigned int nsect,
1175 sect_attrs->nsections = 0; 1184 sect_attrs->nsections = 0;
1176 sattr = &sect_attrs->attrs[0]; 1185 sattr = &sect_attrs->attrs[0];
1177 gattr = &sect_attrs->grp.attrs[0]; 1186 gattr = &sect_attrs->grp.attrs[0];
1178 for (i = 0; i < nsect; i++) { 1187 for (i = 0; i < info->hdr->e_shnum; i++) {
1179 if (sect_empty(&sechdrs[i])) 1188 Elf_Shdr *sec = &info->sechdrs[i];
1189 if (sect_empty(sec))
1180 continue; 1190 continue;
1181 sattr->address = sechdrs[i].sh_addr; 1191 sattr->address = sec->sh_addr;
1182 sattr->name = kstrdup(secstrings + sechdrs[i].sh_name, 1192 sattr->name = kstrdup(info->secstrings + sec->sh_name,
1183 GFP_KERNEL); 1193 GFP_KERNEL);
1184 if (sattr->name == NULL) 1194 if (sattr->name == NULL)
1185 goto out; 1195 goto out;
@@ -1247,8 +1257,7 @@ static void free_notes_attrs(struct module_notes_attrs *notes_attrs,
1247 kfree(notes_attrs); 1257 kfree(notes_attrs);
1248} 1258}
1249 1259
1250static void add_notes_attrs(struct module *mod, unsigned int nsect, 1260static void add_notes_attrs(struct module *mod, const struct load_info *info)
1251 char *secstrings, Elf_Shdr *sechdrs)
1252{ 1261{
1253 unsigned int notes, loaded, i; 1262 unsigned int notes, loaded, i;
1254 struct module_notes_attrs *notes_attrs; 1263 struct module_notes_attrs *notes_attrs;
@@ -1260,9 +1269,9 @@ static void add_notes_attrs(struct module *mod, unsigned int nsect,
1260 1269
1261 /* Count notes sections and allocate structures. */ 1270 /* Count notes sections and allocate structures. */
1262 notes = 0; 1271 notes = 0;
1263 for (i = 0; i < nsect; i++) 1272 for (i = 0; i < info->hdr->e_shnum; i++)
1264 if (!sect_empty(&sechdrs[i]) && 1273 if (!sect_empty(&info->sechdrs[i]) &&
1265 (sechdrs[i].sh_type == SHT_NOTE)) 1274 (info->sechdrs[i].sh_type == SHT_NOTE))
1266 ++notes; 1275 ++notes;
1267 1276
1268 if (notes == 0) 1277 if (notes == 0)
@@ -1276,15 +1285,15 @@ static void add_notes_attrs(struct module *mod, unsigned int nsect,
1276 1285
1277 notes_attrs->notes = notes; 1286 notes_attrs->notes = notes;
1278 nattr = &notes_attrs->attrs[0]; 1287 nattr = &notes_attrs->attrs[0];
1279 for (loaded = i = 0; i < nsect; ++i) { 1288 for (loaded = i = 0; i < info->hdr->e_shnum; ++i) {
1280 if (sect_empty(&sechdrs[i])) 1289 if (sect_empty(&info->sechdrs[i]))
1281 continue; 1290 continue;
1282 if (sechdrs[i].sh_type == SHT_NOTE) { 1291 if (info->sechdrs[i].sh_type == SHT_NOTE) {
1283 sysfs_bin_attr_init(nattr); 1292 sysfs_bin_attr_init(nattr);
1284 nattr->attr.name = mod->sect_attrs->attrs[loaded].name; 1293 nattr->attr.name = mod->sect_attrs->attrs[loaded].name;
1285 nattr->attr.mode = S_IRUGO; 1294 nattr->attr.mode = S_IRUGO;
1286 nattr->size = sechdrs[i].sh_size; 1295 nattr->size = info->sechdrs[i].sh_size;
1287 nattr->private = (void *) sechdrs[i].sh_addr; 1296 nattr->private = (void *) info->sechdrs[i].sh_addr;
1288 nattr->read = module_notes_read; 1297 nattr->read = module_notes_read;
1289 ++nattr; 1298 ++nattr;
1290 } 1299 }
@@ -1315,8 +1324,8 @@ static void remove_notes_attrs(struct module *mod)
1315 1324
1316#else 1325#else
1317 1326
1318static inline void add_sect_attrs(struct module *mod, unsigned int nsect, 1327static inline void add_sect_attrs(struct module *mod,
1319 char *sectstrings, Elf_Shdr *sechdrs) 1328 const struct load_info *info)
1320{ 1329{
1321} 1330}
1322 1331
@@ -1324,17 +1333,16 @@ static inline void remove_sect_attrs(struct module *mod)
1324{ 1333{
1325} 1334}
1326 1335
1327static inline void add_notes_attrs(struct module *mod, unsigned int nsect, 1336static inline void add_notes_attrs(struct module *mod,
1328 char *sectstrings, Elf_Shdr *sechdrs) 1337 const struct load_info *info)
1329{ 1338{
1330} 1339}
1331 1340
1332static inline void remove_notes_attrs(struct module *mod) 1341static inline void remove_notes_attrs(struct module *mod)
1333{ 1342{
1334} 1343}
1335#endif 1344#endif /* CONFIG_KALLSYMS */
1336 1345
1337#ifdef CONFIG_SYSFS
1338static void add_usage_links(struct module *mod) 1346static void add_usage_links(struct module *mod)
1339{ 1347{
1340#ifdef CONFIG_MODULE_UNLOAD 1348#ifdef CONFIG_MODULE_UNLOAD
@@ -1439,6 +1447,7 @@ out:
1439} 1447}
1440 1448
1441static int mod_sysfs_setup(struct module *mod, 1449static int mod_sysfs_setup(struct module *mod,
1450 const struct load_info *info,
1442 struct kernel_param *kparam, 1451 struct kernel_param *kparam,
1443 unsigned int num_params) 1452 unsigned int num_params)
1444{ 1453{
@@ -1463,6 +1472,8 @@ static int mod_sysfs_setup(struct module *mod,
1463 goto out_unreg_param; 1472 goto out_unreg_param;
1464 1473
1465 add_usage_links(mod); 1474 add_usage_links(mod);
1475 add_sect_attrs(mod, info);
1476 add_notes_attrs(mod, info);
1466 1477
1467 kobject_uevent(&mod->mkobj.kobj, KOBJ_ADD); 1478 kobject_uevent(&mod->mkobj.kobj, KOBJ_ADD);
1468 return 0; 1479 return 0;
@@ -1479,33 +1490,26 @@ out:
1479 1490
1480static void mod_sysfs_fini(struct module *mod) 1491static void mod_sysfs_fini(struct module *mod)
1481{ 1492{
1493 remove_notes_attrs(mod);
1494 remove_sect_attrs(mod);
1482 kobject_put(&mod->mkobj.kobj); 1495 kobject_put(&mod->mkobj.kobj);
1483} 1496}
1484 1497
1485#else /* CONFIG_SYSFS */ 1498#else /* !CONFIG_SYSFS */
1486
1487static inline int mod_sysfs_init(struct module *mod)
1488{
1489 return 0;
1490}
1491 1499
1492static inline int mod_sysfs_setup(struct module *mod, 1500static int mod_sysfs_setup(struct module *mod,
1501 const struct load_info *info,
1493 struct kernel_param *kparam, 1502 struct kernel_param *kparam,
1494 unsigned int num_params) 1503 unsigned int num_params)
1495{ 1504{
1496 return 0; 1505 return 0;
1497} 1506}
1498 1507
1499static inline int module_add_modinfo_attrs(struct module *mod) 1508static void mod_sysfs_fini(struct module *mod)
1500{
1501 return 0;
1502}
1503
1504static inline void module_remove_modinfo_attrs(struct module *mod)
1505{ 1509{
1506} 1510}
1507 1511
1508static void mod_sysfs_fini(struct module *mod) 1512static void module_remove_modinfo_attrs(struct module *mod)
1509{ 1513{
1510} 1514}
1511 1515
@@ -1515,7 +1519,7 @@ static void del_usage_links(struct module *mod)
1515 1519
1516#endif /* CONFIG_SYSFS */ 1520#endif /* CONFIG_SYSFS */
1517 1521
1518static void mod_kobject_remove(struct module *mod) 1522static void mod_sysfs_teardown(struct module *mod)
1519{ 1523{
1520 del_usage_links(mod); 1524 del_usage_links(mod);
1521 module_remove_modinfo_attrs(mod); 1525 module_remove_modinfo_attrs(mod);
@@ -1545,9 +1549,7 @@ static void free_module(struct module *mod)
1545 mutex_lock(&module_mutex); 1549 mutex_lock(&module_mutex);
1546 stop_machine(__unlink_module, mod, NULL); 1550 stop_machine(__unlink_module, mod, NULL);
1547 mutex_unlock(&module_mutex); 1551 mutex_unlock(&module_mutex);
1548 remove_notes_attrs(mod); 1552 mod_sysfs_teardown(mod);
1549 remove_sect_attrs(mod);
1550 mod_kobject_remove(mod);
1551 1553
1552 /* Remove dynamic debug info */ 1554 /* Remove dynamic debug info */
1553 ddebug_remove_module(mod->name); 1555 ddebug_remove_module(mod->name);
@@ -1565,10 +1567,7 @@ static void free_module(struct module *mod)
1565 module_free(mod, mod->module_init); 1567 module_free(mod, mod->module_init);
1566 kfree(mod->args); 1568 kfree(mod->args);
1567 percpu_modfree(mod); 1569 percpu_modfree(mod);
1568#if defined(CONFIG_MODULE_UNLOAD) 1570
1569 if (mod->refptr)
1570 free_percpu(mod->refptr);
1571#endif
1572 /* Free lock-classes: */ 1571 /* Free lock-classes: */
1573 lockdep_free_key_range(mod->module_core, mod->core_size); 1572 lockdep_free_key_range(mod->module_core, mod->core_size);
1574 1573
@@ -1634,25 +1633,23 @@ static int verify_export_symbols(struct module *mod)
1634} 1633}
1635 1634
1636/* Change all symbols so that st_value encodes the pointer directly. */ 1635/* Change all symbols so that st_value encodes the pointer directly. */
1637static int simplify_symbols(Elf_Shdr *sechdrs, 1636static int simplify_symbols(struct module *mod, const struct load_info *info)
1638 unsigned int symindex, 1637{
1639 const char *strtab, 1638 Elf_Shdr *symsec = &info->sechdrs[info->index.sym];
1640 unsigned int versindex, 1639 Elf_Sym *sym = (void *)symsec->sh_addr;
1641 unsigned int pcpuindex,
1642 struct module *mod)
1643{
1644 Elf_Sym *sym = (void *)sechdrs[symindex].sh_addr;
1645 unsigned long secbase; 1640 unsigned long secbase;
1646 unsigned int i, n = sechdrs[symindex].sh_size / sizeof(Elf_Sym); 1641 unsigned int i;
1647 int ret = 0; 1642 int ret = 0;
1648 const struct kernel_symbol *ksym; 1643 const struct kernel_symbol *ksym;
1649 1644
1650 for (i = 1; i < n; i++) { 1645 for (i = 1; i < symsec->sh_size / sizeof(Elf_Sym); i++) {
1646 const char *name = info->strtab + sym[i].st_name;
1647
1651 switch (sym[i].st_shndx) { 1648 switch (sym[i].st_shndx) {
1652 case SHN_COMMON: 1649 case SHN_COMMON:
1653 /* We compiled with -fno-common. These are not 1650 /* We compiled with -fno-common. These are not
1654 supposed to happen. */ 1651 supposed to happen. */
1655 DEBUGP("Common symbol: %s\n", strtab + sym[i].st_name); 1652 DEBUGP("Common symbol: %s\n", name);
1656 printk("%s: please compile with -fno-common\n", 1653 printk("%s: please compile with -fno-common\n",
1657 mod->name); 1654 mod->name);
1658 ret = -ENOEXEC; 1655 ret = -ENOEXEC;
@@ -1665,9 +1662,7 @@ static int simplify_symbols(Elf_Shdr *sechdrs,
1665 break; 1662 break;
1666 1663
1667 case SHN_UNDEF: 1664 case SHN_UNDEF:
1668 ksym = resolve_symbol_wait(sechdrs, versindex, 1665 ksym = resolve_symbol_wait(mod, info, name);
1669 strtab + sym[i].st_name,
1670 mod);
1671 /* Ok if resolved. */ 1666 /* Ok if resolved. */
1672 if (ksym && !IS_ERR(ksym)) { 1667 if (ksym && !IS_ERR(ksym)) {
1673 sym[i].st_value = ksym->value; 1668 sym[i].st_value = ksym->value;
@@ -1679,17 +1674,16 @@ static int simplify_symbols(Elf_Shdr *sechdrs,
1679 break; 1674 break;
1680 1675
1681 printk(KERN_WARNING "%s: Unknown symbol %s (err %li)\n", 1676 printk(KERN_WARNING "%s: Unknown symbol %s (err %li)\n",
1682 mod->name, strtab + sym[i].st_name, 1677 mod->name, name, PTR_ERR(ksym));
1683 PTR_ERR(ksym));
1684 ret = PTR_ERR(ksym) ?: -ENOENT; 1678 ret = PTR_ERR(ksym) ?: -ENOENT;
1685 break; 1679 break;
1686 1680
1687 default: 1681 default:
1688 /* Divert to percpu allocation if a percpu var. */ 1682 /* Divert to percpu allocation if a percpu var. */
1689 if (sym[i].st_shndx == pcpuindex) 1683 if (sym[i].st_shndx == info->index.pcpu)
1690 secbase = (unsigned long)mod_percpu(mod); 1684 secbase = (unsigned long)mod_percpu(mod);
1691 else 1685 else
1692 secbase = sechdrs[sym[i].st_shndx].sh_addr; 1686 secbase = info->sechdrs[sym[i].st_shndx].sh_addr;
1693 sym[i].st_value += secbase; 1687 sym[i].st_value += secbase;
1694 break; 1688 break;
1695 } 1689 }
@@ -1698,6 +1692,35 @@ static int simplify_symbols(Elf_Shdr *sechdrs,
1698 return ret; 1692 return ret;
1699} 1693}
1700 1694
1695static int apply_relocations(struct module *mod, const struct load_info *info)
1696{
1697 unsigned int i;
1698 int err = 0;
1699
1700 /* Now do relocations. */
1701 for (i = 1; i < info->hdr->e_shnum; i++) {
1702 unsigned int infosec = info->sechdrs[i].sh_info;
1703
1704 /* Not a valid relocation section? */
1705 if (infosec >= info->hdr->e_shnum)
1706 continue;
1707
1708 /* Don't bother with non-allocated sections */
1709 if (!(info->sechdrs[infosec].sh_flags & SHF_ALLOC))
1710 continue;
1711
1712 if (info->sechdrs[i].sh_type == SHT_REL)
1713 err = apply_relocate(info->sechdrs, info->strtab,
1714 info->index.sym, i, mod);
1715 else if (info->sechdrs[i].sh_type == SHT_RELA)
1716 err = apply_relocate_add(info->sechdrs, info->strtab,
1717 info->index.sym, i, mod);
1718 if (err < 0)
1719 break;
1720 }
1721 return err;
1722}
1723
1701/* Additional bytes needed by arch in front of individual sections */ 1724/* Additional bytes needed by arch in front of individual sections */
1702unsigned int __weak arch_mod_section_prepend(struct module *mod, 1725unsigned int __weak arch_mod_section_prepend(struct module *mod,
1703 unsigned int section) 1726 unsigned int section)
@@ -1722,10 +1745,7 @@ static long get_offset(struct module *mod, unsigned int *size,
1722 might -- code, read-only data, read-write data, small data. Tally 1745 might -- code, read-only data, read-write data, small data. Tally
1723 sizes, and place the offsets into sh_entsize fields: high bit means it 1746 sizes, and place the offsets into sh_entsize fields: high bit means it
1724 belongs in init. */ 1747 belongs in init. */
1725static void layout_sections(struct module *mod, 1748static void layout_sections(struct module *mod, struct load_info *info)
1726 const Elf_Ehdr *hdr,
1727 Elf_Shdr *sechdrs,
1728 const char *secstrings)
1729{ 1749{
1730 static unsigned long const masks[][2] = { 1750 static unsigned long const masks[][2] = {
1731 /* NOTE: all executable code must be the first section 1751 /* NOTE: all executable code must be the first section
@@ -1738,21 +1758,22 @@ static void layout_sections(struct module *mod,
1738 }; 1758 };
1739 unsigned int m, i; 1759 unsigned int m, i;
1740 1760
1741 for (i = 0; i < hdr->e_shnum; i++) 1761 for (i = 0; i < info->hdr->e_shnum; i++)
1742 sechdrs[i].sh_entsize = ~0UL; 1762 info->sechdrs[i].sh_entsize = ~0UL;
1743 1763
1744 DEBUGP("Core section allocation order:\n"); 1764 DEBUGP("Core section allocation order:\n");
1745 for (m = 0; m < ARRAY_SIZE(masks); ++m) { 1765 for (m = 0; m < ARRAY_SIZE(masks); ++m) {
1746 for (i = 0; i < hdr->e_shnum; ++i) { 1766 for (i = 0; i < info->hdr->e_shnum; ++i) {
1747 Elf_Shdr *s = &sechdrs[i]; 1767 Elf_Shdr *s = &info->sechdrs[i];
1768 const char *sname = info->secstrings + s->sh_name;
1748 1769
1749 if ((s->sh_flags & masks[m][0]) != masks[m][0] 1770 if ((s->sh_flags & masks[m][0]) != masks[m][0]
1750 || (s->sh_flags & masks[m][1]) 1771 || (s->sh_flags & masks[m][1])
1751 || s->sh_entsize != ~0UL 1772 || s->sh_entsize != ~0UL
1752 || strstarts(secstrings + s->sh_name, ".init")) 1773 || strstarts(sname, ".init"))
1753 continue; 1774 continue;
1754 s->sh_entsize = get_offset(mod, &mod->core_size, s, i); 1775 s->sh_entsize = get_offset(mod, &mod->core_size, s, i);
1755 DEBUGP("\t%s\n", secstrings + s->sh_name); 1776 DEBUGP("\t%s\n", name);
1756 } 1777 }
1757 if (m == 0) 1778 if (m == 0)
1758 mod->core_text_size = mod->core_size; 1779 mod->core_text_size = mod->core_size;
@@ -1760,17 +1781,18 @@ static void layout_sections(struct module *mod,
1760 1781
1761 DEBUGP("Init section allocation order:\n"); 1782 DEBUGP("Init section allocation order:\n");
1762 for (m = 0; m < ARRAY_SIZE(masks); ++m) { 1783 for (m = 0; m < ARRAY_SIZE(masks); ++m) {
1763 for (i = 0; i < hdr->e_shnum; ++i) { 1784 for (i = 0; i < info->hdr->e_shnum; ++i) {
1764 Elf_Shdr *s = &sechdrs[i]; 1785 Elf_Shdr *s = &info->sechdrs[i];
1786 const char *sname = info->secstrings + s->sh_name;
1765 1787
1766 if ((s->sh_flags & masks[m][0]) != masks[m][0] 1788 if ((s->sh_flags & masks[m][0]) != masks[m][0]
1767 || (s->sh_flags & masks[m][1]) 1789 || (s->sh_flags & masks[m][1])
1768 || s->sh_entsize != ~0UL 1790 || s->sh_entsize != ~0UL
1769 || !strstarts(secstrings + s->sh_name, ".init")) 1791 || !strstarts(sname, ".init"))
1770 continue; 1792 continue;
1771 s->sh_entsize = (get_offset(mod, &mod->init_size, s, i) 1793 s->sh_entsize = (get_offset(mod, &mod->init_size, s, i)
1772 | INIT_OFFSET_MASK); 1794 | INIT_OFFSET_MASK);
1773 DEBUGP("\t%s\n", secstrings + s->sh_name); 1795 DEBUGP("\t%s\n", sname);
1774 } 1796 }
1775 if (m == 0) 1797 if (m == 0)
1776 mod->init_text_size = mod->init_size; 1798 mod->init_text_size = mod->init_size;
@@ -1809,33 +1831,28 @@ static char *next_string(char *string, unsigned long *secsize)
1809 return string; 1831 return string;
1810} 1832}
1811 1833
1812static char *get_modinfo(Elf_Shdr *sechdrs, 1834static char *get_modinfo(struct load_info *info, const char *tag)
1813 unsigned int info,
1814 const char *tag)
1815{ 1835{
1816 char *p; 1836 char *p;
1817 unsigned int taglen = strlen(tag); 1837 unsigned int taglen = strlen(tag);
1818 unsigned long size = sechdrs[info].sh_size; 1838 Elf_Shdr *infosec = &info->sechdrs[info->index.info];
1839 unsigned long size = infosec->sh_size;
1819 1840
1820 for (p = (char *)sechdrs[info].sh_addr; p; p = next_string(p, &size)) { 1841 for (p = (char *)infosec->sh_addr; p; p = next_string(p, &size)) {
1821 if (strncmp(p, tag, taglen) == 0 && p[taglen] == '=') 1842 if (strncmp(p, tag, taglen) == 0 && p[taglen] == '=')
1822 return p + taglen + 1; 1843 return p + taglen + 1;
1823 } 1844 }
1824 return NULL; 1845 return NULL;
1825} 1846}
1826 1847
1827static void setup_modinfo(struct module *mod, Elf_Shdr *sechdrs, 1848static void setup_modinfo(struct module *mod, struct load_info *info)
1828 unsigned int infoindex)
1829{ 1849{
1830 struct module_attribute *attr; 1850 struct module_attribute *attr;
1831 int i; 1851 int i;
1832 1852
1833 for (i = 0; (attr = modinfo_attrs[i]); i++) { 1853 for (i = 0; (attr = modinfo_attrs[i]); i++) {
1834 if (attr->setup) 1854 if (attr->setup)
1835 attr->setup(mod, 1855 attr->setup(mod, get_modinfo(info, attr->attr.name));
1836 get_modinfo(sechdrs,
1837 infoindex,
1838 attr->attr.name));
1839 } 1856 }
1840} 1857}
1841 1858
@@ -1876,11 +1893,10 @@ static int is_exported(const char *name, unsigned long value,
1876} 1893}
1877 1894
1878/* As per nm */ 1895/* As per nm */
1879static char elf_type(const Elf_Sym *sym, 1896static char elf_type(const Elf_Sym *sym, const struct load_info *info)
1880 Elf_Shdr *sechdrs,
1881 const char *secstrings,
1882 struct module *mod)
1883{ 1897{
1898 const Elf_Shdr *sechdrs = info->sechdrs;
1899
1884 if (ELF_ST_BIND(sym->st_info) == STB_WEAK) { 1900 if (ELF_ST_BIND(sym->st_info) == STB_WEAK) {
1885 if (ELF_ST_TYPE(sym->st_info) == STT_OBJECT) 1901 if (ELF_ST_TYPE(sym->st_info) == STT_OBJECT)
1886 return 'v'; 1902 return 'v';
@@ -1910,8 +1926,10 @@ static char elf_type(const Elf_Sym *sym,
1910 else 1926 else
1911 return 'b'; 1927 return 'b';
1912 } 1928 }
1913 if (strstarts(secstrings + sechdrs[sym->st_shndx].sh_name, ".debug")) 1929 if (strstarts(info->secstrings + sechdrs[sym->st_shndx].sh_name,
1930 ".debug")) {
1914 return 'n'; 1931 return 'n';
1932 }
1915 return '?'; 1933 return '?';
1916} 1934}
1917 1935
@@ -1936,127 +1954,96 @@ static bool is_core_symbol(const Elf_Sym *src, const Elf_Shdr *sechdrs,
1936 return true; 1954 return true;
1937} 1955}
1938 1956
1939static unsigned long layout_symtab(struct module *mod, 1957static void layout_symtab(struct module *mod, struct load_info *info)
1940 Elf_Shdr *sechdrs,
1941 unsigned int symindex,
1942 unsigned int strindex,
1943 const Elf_Ehdr *hdr,
1944 const char *secstrings,
1945 unsigned long *pstroffs,
1946 unsigned long *strmap)
1947{ 1958{
1948 unsigned long symoffs; 1959 Elf_Shdr *symsect = info->sechdrs + info->index.sym;
1949 Elf_Shdr *symsect = sechdrs + symindex; 1960 Elf_Shdr *strsect = info->sechdrs + info->index.str;
1950 Elf_Shdr *strsect = sechdrs + strindex;
1951 const Elf_Sym *src; 1961 const Elf_Sym *src;
1952 const char *strtab;
1953 unsigned int i, nsrc, ndst; 1962 unsigned int i, nsrc, ndst;
1954 1963
1955 /* Put symbol section at end of init part of module. */ 1964 /* Put symbol section at end of init part of module. */
1956 symsect->sh_flags |= SHF_ALLOC; 1965 symsect->sh_flags |= SHF_ALLOC;
1957 symsect->sh_entsize = get_offset(mod, &mod->init_size, symsect, 1966 symsect->sh_entsize = get_offset(mod, &mod->init_size, symsect,
1958 symindex) | INIT_OFFSET_MASK; 1967 info->index.sym) | INIT_OFFSET_MASK;
1959 DEBUGP("\t%s\n", secstrings + symsect->sh_name); 1968 DEBUGP("\t%s\n", info->secstrings + symsect->sh_name);
1960 1969
1961 src = (void *)hdr + symsect->sh_offset; 1970 src = (void *)info->hdr + symsect->sh_offset;
1962 nsrc = symsect->sh_size / sizeof(*src); 1971 nsrc = symsect->sh_size / sizeof(*src);
1963 strtab = (void *)hdr + strsect->sh_offset;
1964 for (ndst = i = 1; i < nsrc; ++i, ++src) 1972 for (ndst = i = 1; i < nsrc; ++i, ++src)
1965 if (is_core_symbol(src, sechdrs, hdr->e_shnum)) { 1973 if (is_core_symbol(src, info->sechdrs, info->hdr->e_shnum)) {
1966 unsigned int j = src->st_name; 1974 unsigned int j = src->st_name;
1967 1975
1968 while(!__test_and_set_bit(j, strmap) && strtab[j]) 1976 while (!__test_and_set_bit(j, info->strmap)
1977 && info->strtab[j])
1969 ++j; 1978 ++j;
1970 ++ndst; 1979 ++ndst;
1971 } 1980 }
1972 1981
1973 /* Append room for core symbols at end of core part. */ 1982 /* Append room for core symbols at end of core part. */
1974 symoffs = ALIGN(mod->core_size, symsect->sh_addralign ?: 1); 1983 info->symoffs = ALIGN(mod->core_size, symsect->sh_addralign ?: 1);
1975 mod->core_size = symoffs + ndst * sizeof(Elf_Sym); 1984 mod->core_size = info->symoffs + ndst * sizeof(Elf_Sym);
1976 1985
1977 /* Put string table section at end of init part of module. */ 1986 /* Put string table section at end of init part of module. */
1978 strsect->sh_flags |= SHF_ALLOC; 1987 strsect->sh_flags |= SHF_ALLOC;
1979 strsect->sh_entsize = get_offset(mod, &mod->init_size, strsect, 1988 strsect->sh_entsize = get_offset(mod, &mod->init_size, strsect,
1980 strindex) | INIT_OFFSET_MASK; 1989 info->index.str) | INIT_OFFSET_MASK;
1981 DEBUGP("\t%s\n", secstrings + strsect->sh_name); 1990 DEBUGP("\t%s\n", info->secstrings + strsect->sh_name);
1982 1991
1983 /* Append room for core symbols' strings at end of core part. */ 1992 /* Append room for core symbols' strings at end of core part. */
1984 *pstroffs = mod->core_size; 1993 info->stroffs = mod->core_size;
1985 __set_bit(0, strmap); 1994 __set_bit(0, info->strmap);
1986 mod->core_size += bitmap_weight(strmap, strsect->sh_size); 1995 mod->core_size += bitmap_weight(info->strmap, strsect->sh_size);
1987
1988 return symoffs;
1989} 1996}
1990 1997
1991static void add_kallsyms(struct module *mod, 1998static void add_kallsyms(struct module *mod, const struct load_info *info)
1992 Elf_Shdr *sechdrs,
1993 unsigned int shnum,
1994 unsigned int symindex,
1995 unsigned int strindex,
1996 unsigned long symoffs,
1997 unsigned long stroffs,
1998 const char *secstrings,
1999 unsigned long *strmap)
2000{ 1999{
2001 unsigned int i, ndst; 2000 unsigned int i, ndst;
2002 const Elf_Sym *src; 2001 const Elf_Sym *src;
2003 Elf_Sym *dst; 2002 Elf_Sym *dst;
2004 char *s; 2003 char *s;
2004 Elf_Shdr *symsec = &info->sechdrs[info->index.sym];
2005 2005
2006 mod->symtab = (void *)sechdrs[symindex].sh_addr; 2006 mod->symtab = (void *)symsec->sh_addr;
2007 mod->num_symtab = sechdrs[symindex].sh_size / sizeof(Elf_Sym); 2007 mod->num_symtab = symsec->sh_size / sizeof(Elf_Sym);
2008 mod->strtab = (void *)sechdrs[strindex].sh_addr; 2008 /* Make sure we get permanent strtab: don't use info->strtab. */
2009 mod->strtab = (void *)info->sechdrs[info->index.str].sh_addr;
2009 2010
2010 /* Set types up while we still have access to sections. */ 2011 /* Set types up while we still have access to sections. */
2011 for (i = 0; i < mod->num_symtab; i++) 2012 for (i = 0; i < mod->num_symtab; i++)
2012 mod->symtab[i].st_info 2013 mod->symtab[i].st_info = elf_type(&mod->symtab[i], info);
2013 = elf_type(&mod->symtab[i], sechdrs, secstrings, mod);
2014 2014
2015 mod->core_symtab = dst = mod->module_core + symoffs; 2015 mod->core_symtab = dst = mod->module_core + info->symoffs;
2016 src = mod->symtab; 2016 src = mod->symtab;
2017 *dst = *src; 2017 *dst = *src;
2018 for (ndst = i = 1; i < mod->num_symtab; ++i, ++src) { 2018 for (ndst = i = 1; i < mod->num_symtab; ++i, ++src) {
2019 if (!is_core_symbol(src, sechdrs, shnum)) 2019 if (!is_core_symbol(src, info->sechdrs, info->hdr->e_shnum))
2020 continue; 2020 continue;
2021 dst[ndst] = *src; 2021 dst[ndst] = *src;
2022 dst[ndst].st_name = bitmap_weight(strmap, dst[ndst].st_name); 2022 dst[ndst].st_name = bitmap_weight(info->strmap,
2023 dst[ndst].st_name);
2023 ++ndst; 2024 ++ndst;
2024 } 2025 }
2025 mod->core_num_syms = ndst; 2026 mod->core_num_syms = ndst;
2026 2027
2027 mod->core_strtab = s = mod->module_core + stroffs; 2028 mod->core_strtab = s = mod->module_core + info->stroffs;
2028 for (*s = 0, i = 1; i < sechdrs[strindex].sh_size; ++i) 2029 for (*s = 0, i = 1; i < info->sechdrs[info->index.str].sh_size; ++i)
2029 if (test_bit(i, strmap)) 2030 if (test_bit(i, info->strmap))
2030 *++s = mod->strtab[i]; 2031 *++s = mod->strtab[i];
2031} 2032}
2032#else 2033#else
2033static inline unsigned long layout_symtab(struct module *mod, 2034static inline void layout_symtab(struct module *mod, struct load_info *info)
2034 Elf_Shdr *sechdrs,
2035 unsigned int symindex,
2036 unsigned int strindex,
2037 const Elf_Ehdr *hdr,
2038 const char *secstrings,
2039 unsigned long *pstroffs,
2040 unsigned long *strmap)
2041{ 2035{
2042 return 0;
2043} 2036}
2044 2037
2045static inline void add_kallsyms(struct module *mod, 2038static void add_kallsyms(struct module *mod, struct load_info *info)
2046 Elf_Shdr *sechdrs,
2047 unsigned int shnum,
2048 unsigned int symindex,
2049 unsigned int strindex,
2050 unsigned long symoffs,
2051 unsigned long stroffs,
2052 const char *secstrings,
2053 const unsigned long *strmap)
2054{ 2039{
2055} 2040}
2056#endif /* CONFIG_KALLSYMS */ 2041#endif /* CONFIG_KALLSYMS */
2057 2042
2058static void dynamic_debug_setup(struct _ddebug *debug, unsigned int num) 2043static void dynamic_debug_setup(struct _ddebug *debug, unsigned int num)
2059{ 2044{
2045 if (!debug)
2046 return;
2060#ifdef CONFIG_DYNAMIC_DEBUG 2047#ifdef CONFIG_DYNAMIC_DEBUG
2061 if (ddebug_add_module(debug, num, debug->modname)) 2048 if (ddebug_add_module(debug, num, debug->modname))
2062 printk(KERN_ERR "dynamic debug error adding module: %s\n", 2049 printk(KERN_ERR "dynamic debug error adding module: %s\n",
@@ -2087,65 +2074,47 @@ static void *module_alloc_update_bounds(unsigned long size)
2087} 2074}
2088 2075
2089#ifdef CONFIG_DEBUG_KMEMLEAK 2076#ifdef CONFIG_DEBUG_KMEMLEAK
2090static void kmemleak_load_module(struct module *mod, Elf_Ehdr *hdr, 2077static void kmemleak_load_module(const struct module *mod,
2091 Elf_Shdr *sechdrs, char *secstrings) 2078 const struct load_info *info)
2092{ 2079{
2093 unsigned int i; 2080 unsigned int i;
2094 2081
2095 /* only scan the sections containing data */ 2082 /* only scan the sections containing data */
2096 kmemleak_scan_area(mod, sizeof(struct module), GFP_KERNEL); 2083 kmemleak_scan_area(mod, sizeof(struct module), GFP_KERNEL);
2097 2084
2098 for (i = 1; i < hdr->e_shnum; i++) { 2085 for (i = 1; i < info->hdr->e_shnum; i++) {
2099 if (!(sechdrs[i].sh_flags & SHF_ALLOC)) 2086 const char *name = info->secstrings + info->sechdrs[i].sh_name;
2087 if (!(info->sechdrs[i].sh_flags & SHF_ALLOC))
2100 continue; 2088 continue;
2101 if (strncmp(secstrings + sechdrs[i].sh_name, ".data", 5) != 0 2089 if (!strstarts(name, ".data") && !strstarts(name, ".bss"))
2102 && strncmp(secstrings + sechdrs[i].sh_name, ".bss", 4) != 0)
2103 continue; 2090 continue;
2104 2091
2105 kmemleak_scan_area((void *)sechdrs[i].sh_addr, 2092 kmemleak_scan_area((void *)info->sechdrs[i].sh_addr,
2106 sechdrs[i].sh_size, GFP_KERNEL); 2093 info->sechdrs[i].sh_size, GFP_KERNEL);
2107 } 2094 }
2108} 2095}
2109#else 2096#else
2110static inline void kmemleak_load_module(struct module *mod, Elf_Ehdr *hdr, 2097static inline void kmemleak_load_module(const struct module *mod,
2111 Elf_Shdr *sechdrs, char *secstrings) 2098 const struct load_info *info)
2112{ 2099{
2113} 2100}
2114#endif 2101#endif
2115 2102
2116/* Allocate and load the module: note that size of section 0 is always 2103/* Sets info->hdr and info->len. */
2117 zero, and we rely on this for optional sections. */ 2104static int copy_and_check(struct load_info *info,
2118static noinline struct module *load_module(void __user *umod, 2105 const void __user *umod, unsigned long len,
2119 unsigned long len, 2106 const char __user *uargs)
2120 const char __user *uargs)
2121{ 2107{
2108 int err;
2122 Elf_Ehdr *hdr; 2109 Elf_Ehdr *hdr;
2123 Elf_Shdr *sechdrs;
2124 char *secstrings, *args, *modmagic, *strtab = NULL;
2125 char *staging;
2126 unsigned int i;
2127 unsigned int symindex = 0;
2128 unsigned int strindex = 0;
2129 unsigned int modindex, versindex, infoindex, pcpuindex;
2130 struct module *mod;
2131 long err = 0;
2132 void *ptr = NULL; /* Stops spurious gcc warning */
2133 unsigned long symoffs, stroffs, *strmap;
2134 void __percpu *percpu;
2135 struct _ddebug *debug = NULL;
2136 unsigned int num_debug = 0;
2137 2110
2138 mm_segment_t old_fs;
2139
2140 DEBUGP("load_module: umod=%p, len=%lu, uargs=%p\n",
2141 umod, len, uargs);
2142 if (len < sizeof(*hdr)) 2111 if (len < sizeof(*hdr))
2143 return ERR_PTR(-ENOEXEC); 2112 return -ENOEXEC;
2144 2113
2145 /* Suck in entire file: we'll want most of it. */ 2114 /* Suck in entire file: we'll want most of it. */
2146 /* vmalloc barfs on "unusual" numbers. Check here */ 2115 /* vmalloc barfs on "unusual" numbers. Check here */
2147 if (len > 64 * 1024 * 1024 || (hdr = vmalloc(len)) == NULL) 2116 if (len > 64 * 1024 * 1024 || (hdr = vmalloc(len)) == NULL)
2148 return ERR_PTR(-ENOMEM); 2117 return -ENOMEM;
2149 2118
2150 if (copy_from_user(hdr, umod, len) != 0) { 2119 if (copy_from_user(hdr, umod, len) != 0) {
2151 err = -EFAULT; 2120 err = -EFAULT;
@@ -2153,135 +2122,225 @@ static noinline struct module *load_module(void __user *umod,
2153 } 2122 }
2154 2123
2155 /* Sanity checks against insmoding binaries or wrong arch, 2124 /* Sanity checks against insmoding binaries or wrong arch,
2156 weird elf version */ 2125 weird elf version */
2157 if (memcmp(hdr->e_ident, ELFMAG, SELFMAG) != 0 2126 if (memcmp(hdr->e_ident, ELFMAG, SELFMAG) != 0
2158 || hdr->e_type != ET_REL 2127 || hdr->e_type != ET_REL
2159 || !elf_check_arch(hdr) 2128 || !elf_check_arch(hdr)
2160 || hdr->e_shentsize != sizeof(*sechdrs)) { 2129 || hdr->e_shentsize != sizeof(Elf_Shdr)) {
2161 err = -ENOEXEC; 2130 err = -ENOEXEC;
2162 goto free_hdr; 2131 goto free_hdr;
2163 } 2132 }
2164 2133
2165 if (len < hdr->e_shoff + hdr->e_shnum * sizeof(Elf_Shdr)) 2134 if (len < hdr->e_shoff + hdr->e_shnum * sizeof(Elf_Shdr)) {
2166 goto truncated; 2135 err = -ENOEXEC;
2136 goto free_hdr;
2137 }
2167 2138
2168 /* Convenience variables */ 2139 info->hdr = hdr;
2169 sechdrs = (void *)hdr + hdr->e_shoff; 2140 info->len = len;
2170 secstrings = (void *)hdr + sechdrs[hdr->e_shstrndx].sh_offset; 2141 return 0;
2171 sechdrs[0].sh_addr = 0;
2172 2142
2173 for (i = 1; i < hdr->e_shnum; i++) { 2143free_hdr:
2174 if (sechdrs[i].sh_type != SHT_NOBITS 2144 vfree(hdr);
2175 && len < sechdrs[i].sh_offset + sechdrs[i].sh_size) 2145 return err;
2176 goto truncated; 2146}
2147
2148static void free_copy(struct load_info *info)
2149{
2150 vfree(info->hdr);
2151}
2152
2153static int rewrite_section_headers(struct load_info *info)
2154{
2155 unsigned int i;
2156
2157 /* This should always be true, but let's be sure. */
2158 info->sechdrs[0].sh_addr = 0;
2159
2160 for (i = 1; i < info->hdr->e_shnum; i++) {
2161 Elf_Shdr *shdr = &info->sechdrs[i];
2162 if (shdr->sh_type != SHT_NOBITS
2163 && info->len < shdr->sh_offset + shdr->sh_size) {
2164 printk(KERN_ERR "Module len %lu truncated\n",
2165 info->len);
2166 return -ENOEXEC;
2167 }
2177 2168
2178 /* Mark all sections sh_addr with their address in the 2169 /* Mark all sections sh_addr with their address in the
2179 temporary image. */ 2170 temporary image. */
2180 sechdrs[i].sh_addr = (size_t)hdr + sechdrs[i].sh_offset; 2171 shdr->sh_addr = (size_t)info->hdr + shdr->sh_offset;
2181 2172
2182 /* Internal symbols and strings. */
2183 if (sechdrs[i].sh_type == SHT_SYMTAB) {
2184 symindex = i;
2185 strindex = sechdrs[i].sh_link;
2186 strtab = (char *)hdr + sechdrs[strindex].sh_offset;
2187 }
2188#ifndef CONFIG_MODULE_UNLOAD 2173#ifndef CONFIG_MODULE_UNLOAD
2189 /* Don't load .exit sections */ 2174 /* Don't load .exit sections */
2190 if (strstarts(secstrings+sechdrs[i].sh_name, ".exit")) 2175 if (strstarts(info->secstrings+shdr->sh_name, ".exit"))
2191 sechdrs[i].sh_flags &= ~(unsigned long)SHF_ALLOC; 2176 shdr->sh_flags &= ~(unsigned long)SHF_ALLOC;
2192#endif 2177#endif
2193 } 2178 }
2194 2179
2195 modindex = find_sec(hdr, sechdrs, secstrings, 2180 /* Track but don't keep modinfo and version sections. */
2196 ".gnu.linkonce.this_module"); 2181 info->index.vers = find_sec(info, "__versions");
2197 if (!modindex) { 2182 info->index.info = find_sec(info, ".modinfo");
2183 info->sechdrs[info->index.info].sh_flags &= ~(unsigned long)SHF_ALLOC;
2184 info->sechdrs[info->index.vers].sh_flags &= ~(unsigned long)SHF_ALLOC;
2185 return 0;
2186}
2187
2188/*
2189 * Set up our basic convenience variables (pointers to section headers,
2190 * search for module section index etc), and do some basic section
2191 * verification.
2192 *
2193 * Return the temporary module pointer (we'll replace it with the final
2194 * one when we move the module sections around).
2195 */
2196static struct module *setup_load_info(struct load_info *info)
2197{
2198 unsigned int i;
2199 int err;
2200 struct module *mod;
2201
2202 /* Set up the convenience variables */
2203 info->sechdrs = (void *)info->hdr + info->hdr->e_shoff;
2204 info->secstrings = (void *)info->hdr
2205 + info->sechdrs[info->hdr->e_shstrndx].sh_offset;
2206
2207 err = rewrite_section_headers(info);
2208 if (err)
2209 return ERR_PTR(err);
2210
2211 /* Find internal symbols and strings. */
2212 for (i = 1; i < info->hdr->e_shnum; i++) {
2213 if (info->sechdrs[i].sh_type == SHT_SYMTAB) {
2214 info->index.sym = i;
2215 info->index.str = info->sechdrs[i].sh_link;
2216 info->strtab = (char *)info->hdr
2217 + info->sechdrs[info->index.str].sh_offset;
2218 break;
2219 }
2220 }
2221
2222 info->index.mod = find_sec(info, ".gnu.linkonce.this_module");
2223 if (!info->index.mod) {
2198 printk(KERN_WARNING "No module found in object\n"); 2224 printk(KERN_WARNING "No module found in object\n");
2199 err = -ENOEXEC; 2225 return ERR_PTR(-ENOEXEC);
2200 goto free_hdr;
2201 } 2226 }
2202 /* This is temporary: point mod into copy of data. */ 2227 /* This is temporary: point mod into copy of data. */
2203 mod = (void *)sechdrs[modindex].sh_addr; 2228 mod = (void *)info->sechdrs[info->index.mod].sh_addr;
2204 2229
2205 if (symindex == 0) { 2230 if (info->index.sym == 0) {
2206 printk(KERN_WARNING "%s: module has no symbols (stripped?)\n", 2231 printk(KERN_WARNING "%s: module has no symbols (stripped?)\n",
2207 mod->name); 2232 mod->name);
2208 err = -ENOEXEC; 2233 return ERR_PTR(-ENOEXEC);
2209 goto free_hdr;
2210 } 2234 }
2211 2235
2212 versindex = find_sec(hdr, sechdrs, secstrings, "__versions"); 2236 info->index.pcpu = find_pcpusec(info);
2213 infoindex = find_sec(hdr, sechdrs, secstrings, ".modinfo");
2214 pcpuindex = find_pcpusec(hdr, sechdrs, secstrings);
2215
2216 /* Don't keep modinfo and version sections. */
2217 sechdrs[infoindex].sh_flags &= ~(unsigned long)SHF_ALLOC;
2218 sechdrs[versindex].sh_flags &= ~(unsigned long)SHF_ALLOC;
2219 2237
2220 /* Check module struct version now, before we try to use module. */ 2238 /* Check module struct version now, before we try to use module. */
2221 if (!check_modstruct_version(sechdrs, versindex, mod)) { 2239 if (!check_modstruct_version(info->sechdrs, info->index.vers, mod))
2222 err = -ENOEXEC; 2240 return ERR_PTR(-ENOEXEC);
2223 goto free_hdr; 2241
2224 } 2242 return mod;
2243}
2244
2245static int check_modinfo(struct module *mod, struct load_info *info)
2246{
2247 const char *modmagic = get_modinfo(info, "vermagic");
2248 int err;
2225 2249
2226 modmagic = get_modinfo(sechdrs, infoindex, "vermagic");
2227 /* This is allowed: modprobe --force will invalidate it. */ 2250 /* This is allowed: modprobe --force will invalidate it. */
2228 if (!modmagic) { 2251 if (!modmagic) {
2229 err = try_to_force_load(mod, "bad vermagic"); 2252 err = try_to_force_load(mod, "bad vermagic");
2230 if (err) 2253 if (err)
2231 goto free_hdr; 2254 return err;
2232 } else if (!same_magic(modmagic, vermagic, versindex)) { 2255 } else if (!same_magic(modmagic, vermagic, info->index.vers)) {
2233 printk(KERN_ERR "%s: version magic '%s' should be '%s'\n", 2256 printk(KERN_ERR "%s: version magic '%s' should be '%s'\n",
2234 mod->name, modmagic, vermagic); 2257 mod->name, modmagic, vermagic);
2235 err = -ENOEXEC; 2258 return -ENOEXEC;
2236 goto free_hdr;
2237 } 2259 }
2238 2260
2239 staging = get_modinfo(sechdrs, infoindex, "staging"); 2261 if (get_modinfo(info, "staging")) {
2240 if (staging) {
2241 add_taint_module(mod, TAINT_CRAP); 2262 add_taint_module(mod, TAINT_CRAP);
2242 printk(KERN_WARNING "%s: module is from the staging directory," 2263 printk(KERN_WARNING "%s: module is from the staging directory,"
2243 " the quality is unknown, you have been warned.\n", 2264 " the quality is unknown, you have been warned.\n",
2244 mod->name); 2265 mod->name);
2245 } 2266 }
2246 2267
2247 /* Now copy in args */ 2268 /* Set up license info based on the info section */
2248 args = strndup_user(uargs, ~0UL >> 1); 2269 set_license(mod, get_modinfo(info, "license"));
2249 if (IS_ERR(args)) {
2250 err = PTR_ERR(args);
2251 goto free_hdr;
2252 }
2253 2270
2254 strmap = kzalloc(BITS_TO_LONGS(sechdrs[strindex].sh_size) 2271 return 0;
2255 * sizeof(long), GFP_KERNEL); 2272}
2256 if (!strmap) {
2257 err = -ENOMEM;
2258 goto free_mod;
2259 }
2260 2273
2261 mod->state = MODULE_STATE_COMING; 2274static void find_module_sections(struct module *mod, struct load_info *info)
2275{
2276 mod->kp = section_objs(info, "__param",
2277 sizeof(*mod->kp), &mod->num_kp);
2278 mod->syms = section_objs(info, "__ksymtab",
2279 sizeof(*mod->syms), &mod->num_syms);
2280 mod->crcs = section_addr(info, "__kcrctab");
2281 mod->gpl_syms = section_objs(info, "__ksymtab_gpl",
2282 sizeof(*mod->gpl_syms),
2283 &mod->num_gpl_syms);
2284 mod->gpl_crcs = section_addr(info, "__kcrctab_gpl");
2285 mod->gpl_future_syms = section_objs(info,
2286 "__ksymtab_gpl_future",
2287 sizeof(*mod->gpl_future_syms),
2288 &mod->num_gpl_future_syms);
2289 mod->gpl_future_crcs = section_addr(info, "__kcrctab_gpl_future");
2262 2290
2263 /* Allow arches to frob section contents and sizes. */ 2291#ifdef CONFIG_UNUSED_SYMBOLS
2264 err = module_frob_arch_sections(hdr, sechdrs, secstrings, mod); 2292 mod->unused_syms = section_objs(info, "__ksymtab_unused",
2265 if (err < 0) 2293 sizeof(*mod->unused_syms),
2266 goto free_mod; 2294 &mod->num_unused_syms);
2295 mod->unused_crcs = section_addr(info, "__kcrctab_unused");
2296 mod->unused_gpl_syms = section_objs(info, "__ksymtab_unused_gpl",
2297 sizeof(*mod->unused_gpl_syms),
2298 &mod->num_unused_gpl_syms);
2299 mod->unused_gpl_crcs = section_addr(info, "__kcrctab_unused_gpl");
2300#endif
2301#ifdef CONFIG_CONSTRUCTORS
2302 mod->ctors = section_objs(info, ".ctors",
2303 sizeof(*mod->ctors), &mod->num_ctors);
2304#endif
2267 2305
2268 if (pcpuindex) { 2306#ifdef CONFIG_TRACEPOINTS
2269 /* We have a special allocation for this section. */ 2307 mod->tracepoints = section_objs(info, "__tracepoints",
2270 err = percpu_modalloc(mod, sechdrs[pcpuindex].sh_size, 2308 sizeof(*mod->tracepoints),
2271 sechdrs[pcpuindex].sh_addralign); 2309 &mod->num_tracepoints);
2272 if (err) 2310#endif
2273 goto free_mod; 2311#ifdef CONFIG_EVENT_TRACING
2274 sechdrs[pcpuindex].sh_flags &= ~(unsigned long)SHF_ALLOC; 2312 mod->trace_events = section_objs(info, "_ftrace_events",
2275 } 2313 sizeof(*mod->trace_events),
2276 /* Keep this around for failure path. */ 2314 &mod->num_trace_events);
2277 percpu = mod_percpu(mod); 2315 /*
2316 * This section contains pointers to allocated objects in the trace
2317 * code and not scanning it leads to false positives.
2318 */
2319 kmemleak_scan_area(mod->trace_events, sizeof(*mod->trace_events) *
2320 mod->num_trace_events, GFP_KERNEL);
2321#endif
2322#ifdef CONFIG_FTRACE_MCOUNT_RECORD
2323 /* sechdrs[0].sh_size is always zero */
2324 mod->ftrace_callsites = section_objs(info, "__mcount_loc",
2325 sizeof(*mod->ftrace_callsites),
2326 &mod->num_ftrace_callsites);
2327#endif
2278 2328
2279 /* Determine total sizes, and put offsets in sh_entsize. For now 2329 mod->extable = section_objs(info, "__ex_table",
2280 this is done generically; there doesn't appear to be any 2330 sizeof(*mod->extable), &mod->num_exentries);
2281 special cases for the architectures. */ 2331
2282 layout_sections(mod, hdr, sechdrs, secstrings); 2332 if (section_addr(info, "__obsparm"))
2283 symoffs = layout_symtab(mod, sechdrs, symindex, strindex, hdr, 2333 printk(KERN_WARNING "%s: Ignoring obsolete parameters\n",
2284 secstrings, &stroffs, strmap); 2334 mod->name);
2335
2336 info->debug = section_objs(info, "__verbose",
2337 sizeof(*info->debug), &info->num_debug);
2338}
2339
2340static int move_module(struct module *mod, struct load_info *info)
2341{
2342 int i;
2343 void *ptr;
2285 2344
2286 /* Do the allocs. */ 2345 /* Do the allocs. */
2287 ptr = module_alloc_update_bounds(mod->core_size); 2346 ptr = module_alloc_update_bounds(mod->core_size);
@@ -2291,10 +2350,9 @@ static noinline struct module *load_module(void __user *umod,
2291 * leak. 2350 * leak.
2292 */ 2351 */
2293 kmemleak_not_leak(ptr); 2352 kmemleak_not_leak(ptr);
2294 if (!ptr) { 2353 if (!ptr)
2295 err = -ENOMEM; 2354 return -ENOMEM;
2296 goto free_percpu; 2355
2297 }
2298 memset(ptr, 0, mod->core_size); 2356 memset(ptr, 0, mod->core_size);
2299 mod->module_core = ptr; 2357 mod->module_core = ptr;
2300 2358
@@ -2307,50 +2365,40 @@ static noinline struct module *load_module(void __user *umod,
2307 */ 2365 */
2308 kmemleak_ignore(ptr); 2366 kmemleak_ignore(ptr);
2309 if (!ptr && mod->init_size) { 2367 if (!ptr && mod->init_size) {
2310 err = -ENOMEM; 2368 module_free(mod, mod->module_core);
2311 goto free_core; 2369 return -ENOMEM;
2312 } 2370 }
2313 memset(ptr, 0, mod->init_size); 2371 memset(ptr, 0, mod->init_size);
2314 mod->module_init = ptr; 2372 mod->module_init = ptr;
2315 2373
2316 /* Transfer each section which specifies SHF_ALLOC */ 2374 /* Transfer each section which specifies SHF_ALLOC */
2317 DEBUGP("final section addresses:\n"); 2375 DEBUGP("final section addresses:\n");
2318 for (i = 0; i < hdr->e_shnum; i++) { 2376 for (i = 0; i < info->hdr->e_shnum; i++) {
2319 void *dest; 2377 void *dest;
2378 Elf_Shdr *shdr = &info->sechdrs[i];
2320 2379
2321 if (!(sechdrs[i].sh_flags & SHF_ALLOC)) 2380 if (!(shdr->sh_flags & SHF_ALLOC))
2322 continue; 2381 continue;
2323 2382
2324 if (sechdrs[i].sh_entsize & INIT_OFFSET_MASK) 2383 if (shdr->sh_entsize & INIT_OFFSET_MASK)
2325 dest = mod->module_init 2384 dest = mod->module_init
2326 + (sechdrs[i].sh_entsize & ~INIT_OFFSET_MASK); 2385 + (shdr->sh_entsize & ~INIT_OFFSET_MASK);
2327 else 2386 else
2328 dest = mod->module_core + sechdrs[i].sh_entsize; 2387 dest = mod->module_core + shdr->sh_entsize;
2329 2388
2330 if (sechdrs[i].sh_type != SHT_NOBITS) 2389 if (shdr->sh_type != SHT_NOBITS)
2331 memcpy(dest, (void *)sechdrs[i].sh_addr, 2390 memcpy(dest, (void *)shdr->sh_addr, shdr->sh_size);
2332 sechdrs[i].sh_size);
2333 /* Update sh_addr to point to copy in image. */ 2391 /* Update sh_addr to point to copy in image. */
2334 sechdrs[i].sh_addr = (unsigned long)dest; 2392 shdr->sh_addr = (unsigned long)dest;
2335 DEBUGP("\t0x%lx %s\n", sechdrs[i].sh_addr, secstrings + sechdrs[i].sh_name); 2393 DEBUGP("\t0x%lx %s\n",
2336 } 2394 shdr->sh_addr, info->secstrings + shdr->sh_name);
2337 /* Module has been moved. */
2338 mod = (void *)sechdrs[modindex].sh_addr;
2339 kmemleak_load_module(mod, hdr, sechdrs, secstrings);
2340
2341#if defined(CONFIG_MODULE_UNLOAD)
2342 mod->refptr = alloc_percpu(struct module_ref);
2343 if (!mod->refptr) {
2344 err = -ENOMEM;
2345 goto free_init;
2346 } 2395 }
2347#endif
2348 /* Now we've moved module, initialize linked lists, etc. */
2349 module_unload_init(mod);
2350 2396
2351 /* Set up license info based on the info section */ 2397 return 0;
2352 set_license(mod, get_modinfo(sechdrs, infoindex, "license")); 2398}
2353 2399
2400static int check_module_license_and_versions(struct module *mod)
2401{
2354 /* 2402 /*
2355 * ndiswrapper is under GPL by itself, but loads proprietary modules. 2403 * ndiswrapper is under GPL by itself, but loads proprietary modules.
2356 * Don't use add_taint_module(), as it would prevent ndiswrapper from 2404 * Don't use add_taint_module(), as it would prevent ndiswrapper from
@@ -2363,77 +2411,6 @@ static noinline struct module *load_module(void __user *umod,
2363 if (strcmp(mod->name, "driverloader") == 0) 2411 if (strcmp(mod->name, "driverloader") == 0)
2364 add_taint_module(mod, TAINT_PROPRIETARY_MODULE); 2412 add_taint_module(mod, TAINT_PROPRIETARY_MODULE);
2365 2413
2366 /* Set up MODINFO_ATTR fields */
2367 setup_modinfo(mod, sechdrs, infoindex);
2368
2369 /* Fix up syms, so that st_value is a pointer to location. */
2370 err = simplify_symbols(sechdrs, symindex, strtab, versindex, pcpuindex,
2371 mod);
2372 if (err < 0)
2373 goto cleanup;
2374
2375 /* Now we've got everything in the final locations, we can
2376 * find optional sections. */
2377 mod->kp = section_objs(hdr, sechdrs, secstrings, "__param",
2378 sizeof(*mod->kp), &mod->num_kp);
2379 mod->syms = section_objs(hdr, sechdrs, secstrings, "__ksymtab",
2380 sizeof(*mod->syms), &mod->num_syms);
2381 mod->crcs = section_addr(hdr, sechdrs, secstrings, "__kcrctab");
2382 mod->gpl_syms = section_objs(hdr, sechdrs, secstrings, "__ksymtab_gpl",
2383 sizeof(*mod->gpl_syms),
2384 &mod->num_gpl_syms);
2385 mod->gpl_crcs = section_addr(hdr, sechdrs, secstrings, "__kcrctab_gpl");
2386 mod->gpl_future_syms = section_objs(hdr, sechdrs, secstrings,
2387 "__ksymtab_gpl_future",
2388 sizeof(*mod->gpl_future_syms),
2389 &mod->num_gpl_future_syms);
2390 mod->gpl_future_crcs = section_addr(hdr, sechdrs, secstrings,
2391 "__kcrctab_gpl_future");
2392
2393#ifdef CONFIG_UNUSED_SYMBOLS
2394 mod->unused_syms = section_objs(hdr, sechdrs, secstrings,
2395 "__ksymtab_unused",
2396 sizeof(*mod->unused_syms),
2397 &mod->num_unused_syms);
2398 mod->unused_crcs = section_addr(hdr, sechdrs, secstrings,
2399 "__kcrctab_unused");
2400 mod->unused_gpl_syms = section_objs(hdr, sechdrs, secstrings,
2401 "__ksymtab_unused_gpl",
2402 sizeof(*mod->unused_gpl_syms),
2403 &mod->num_unused_gpl_syms);
2404 mod->unused_gpl_crcs = section_addr(hdr, sechdrs, secstrings,
2405 "__kcrctab_unused_gpl");
2406#endif
2407#ifdef CONFIG_CONSTRUCTORS
2408 mod->ctors = section_objs(hdr, sechdrs, secstrings, ".ctors",
2409 sizeof(*mod->ctors), &mod->num_ctors);
2410#endif
2411
2412#ifdef CONFIG_TRACEPOINTS
2413 mod->tracepoints = section_objs(hdr, sechdrs, secstrings,
2414 "__tracepoints",
2415 sizeof(*mod->tracepoints),
2416 &mod->num_tracepoints);
2417#endif
2418#ifdef CONFIG_EVENT_TRACING
2419 mod->trace_events = section_objs(hdr, sechdrs, secstrings,
2420 "_ftrace_events",
2421 sizeof(*mod->trace_events),
2422 &mod->num_trace_events);
2423 /*
2424 * This section contains pointers to allocated objects in the trace
2425 * code and not scanning it leads to false positives.
2426 */
2427 kmemleak_scan_area(mod->trace_events, sizeof(*mod->trace_events) *
2428 mod->num_trace_events, GFP_KERNEL);
2429#endif
2430#ifdef CONFIG_FTRACE_MCOUNT_RECORD
2431 /* sechdrs[0].sh_size is always zero */
2432 mod->ftrace_callsites = section_objs(hdr, sechdrs, secstrings,
2433 "__mcount_loc",
2434 sizeof(*mod->ftrace_callsites),
2435 &mod->num_ftrace_callsites);
2436#endif
2437#ifdef CONFIG_MODVERSIONS 2414#ifdef CONFIG_MODVERSIONS
2438 if ((mod->num_syms && !mod->crcs) 2415 if ((mod->num_syms && !mod->crcs)
2439 || (mod->num_gpl_syms && !mod->gpl_crcs) 2416 || (mod->num_gpl_syms && !mod->gpl_crcs)
@@ -2443,56 +2420,16 @@ static noinline struct module *load_module(void __user *umod,
2443 || (mod->num_unused_gpl_syms && !mod->unused_gpl_crcs) 2420 || (mod->num_unused_gpl_syms && !mod->unused_gpl_crcs)
2444#endif 2421#endif
2445 ) { 2422 ) {
2446 err = try_to_force_load(mod, 2423 return try_to_force_load(mod,
2447 "no versions for exported symbols"); 2424 "no versions for exported symbols");
2448 if (err)
2449 goto cleanup;
2450 } 2425 }
2451#endif 2426#endif
2427 return 0;
2428}
2452 2429
2453 /* Now do relocations. */ 2430static void flush_module_icache(const struct module *mod)
2454 for (i = 1; i < hdr->e_shnum; i++) { 2431{
2455 const char *strtab = (char *)sechdrs[strindex].sh_addr; 2432 mm_segment_t old_fs;
2456 unsigned int info = sechdrs[i].sh_info;
2457
2458 /* Not a valid relocation section? */
2459 if (info >= hdr->e_shnum)
2460 continue;
2461
2462 /* Don't bother with non-allocated sections */
2463 if (!(sechdrs[info].sh_flags & SHF_ALLOC))
2464 continue;
2465
2466 if (sechdrs[i].sh_type == SHT_REL)
2467 err = apply_relocate(sechdrs, strtab, symindex, i,mod);
2468 else if (sechdrs[i].sh_type == SHT_RELA)
2469 err = apply_relocate_add(sechdrs, strtab, symindex, i,
2470 mod);
2471 if (err < 0)
2472 goto cleanup;
2473 }
2474
2475 /* Set up and sort exception table */
2476 mod->extable = section_objs(hdr, sechdrs, secstrings, "__ex_table",
2477 sizeof(*mod->extable), &mod->num_exentries);
2478 sort_extable(mod->extable, mod->extable + mod->num_exentries);
2479
2480 /* Finally, copy percpu area over. */
2481 percpu_modcopy(mod, (void *)sechdrs[pcpuindex].sh_addr,
2482 sechdrs[pcpuindex].sh_size);
2483
2484 add_kallsyms(mod, sechdrs, hdr->e_shnum, symindex, strindex,
2485 symoffs, stroffs, secstrings, strmap);
2486 kfree(strmap);
2487 strmap = NULL;
2488
2489 if (!mod->taints)
2490 debug = section_objs(hdr, sechdrs, secstrings, "__verbose",
2491 sizeof(*debug), &num_debug);
2492
2493 err = module_finalize(hdr, sechdrs, mod);
2494 if (err < 0)
2495 goto cleanup;
2496 2433
2497 /* flush the icache in correct context */ 2434 /* flush the icache in correct context */
2498 old_fs = get_fs(); 2435 old_fs = get_fs();
@@ -2511,11 +2448,160 @@ static noinline struct module *load_module(void __user *umod,
2511 (unsigned long)mod->module_core + mod->core_size); 2448 (unsigned long)mod->module_core + mod->core_size);
2512 2449
2513 set_fs(old_fs); 2450 set_fs(old_fs);
2451}
2514 2452
2515 mod->args = args; 2453static struct module *layout_and_allocate(struct load_info *info)
2516 if (section_addr(hdr, sechdrs, secstrings, "__obsparm")) 2454{
2517 printk(KERN_WARNING "%s: Ignoring obsolete parameters\n", 2455 /* Module within temporary copy. */
2518 mod->name); 2456 struct module *mod;
2457 Elf_Shdr *pcpusec;
2458 int err;
2459
2460 mod = setup_load_info(info);
2461 if (IS_ERR(mod))
2462 return mod;
2463
2464 err = check_modinfo(mod, info);
2465 if (err)
2466 return ERR_PTR(err);
2467
2468 /* Allow arches to frob section contents and sizes. */
2469 err = module_frob_arch_sections(info->hdr, info->sechdrs,
2470 info->secstrings, mod);
2471 if (err < 0)
2472 goto out;
2473
2474 pcpusec = &info->sechdrs[info->index.pcpu];
2475 if (pcpusec->sh_size) {
2476 /* We have a special allocation for this section. */
2477 err = percpu_modalloc(mod,
2478 pcpusec->sh_size, pcpusec->sh_addralign);
2479 if (err)
2480 goto out;
2481 pcpusec->sh_flags &= ~(unsigned long)SHF_ALLOC;
2482 }
2483
2484 /* Determine total sizes, and put offsets in sh_entsize. For now
2485 this is done generically; there doesn't appear to be any
2486 special cases for the architectures. */
2487 layout_sections(mod, info);
2488
2489 info->strmap = kzalloc(BITS_TO_LONGS(info->sechdrs[info->index.str].sh_size)
2490 * sizeof(long), GFP_KERNEL);
2491 if (!info->strmap) {
2492 err = -ENOMEM;
2493 goto free_percpu;
2494 }
2495 layout_symtab(mod, info);
2496
2497 /* Allocate and move to the final place */
2498 err = move_module(mod, info);
2499 if (err)
2500 goto free_strmap;
2501
2502 /* Module has been copied to its final place now: return it. */
2503 mod = (void *)info->sechdrs[info->index.mod].sh_addr;
2504 kmemleak_load_module(mod, info);
2505 return mod;
2506
2507free_strmap:
2508 kfree(info->strmap);
2509free_percpu:
2510 percpu_modfree(mod);
2511out:
2512 return ERR_PTR(err);
2513}
2514
2515/* mod is no longer valid after this! */
2516static void module_deallocate(struct module *mod, struct load_info *info)
2517{
2518 kfree(info->strmap);
2519 percpu_modfree(mod);
2520 module_free(mod, mod->module_init);
2521 module_free(mod, mod->module_core);
2522}
2523
2524static int post_relocation(struct module *mod, const struct load_info *info)
2525{
2526 /* Sort exception table now relocations are done. */
2527 sort_extable(mod->extable, mod->extable + mod->num_exentries);
2528
2529 /* Copy relocated percpu area over. */
2530 percpu_modcopy(mod, (void *)info->sechdrs[info->index.pcpu].sh_addr,
2531 info->sechdrs[info->index.pcpu].sh_size);
2532
2533 /* Setup kallsyms-specific fields. */
2534 add_kallsyms(mod, info);
2535
2536 /* Arch-specific module finalizing. */
2537 return module_finalize(info->hdr, info->sechdrs, mod);
2538}
2539
2540/* Allocate and load the module: note that size of section 0 is always
2541 zero, and we rely on this for optional sections. */
2542static struct module *load_module(void __user *umod,
2543 unsigned long len,
2544 const char __user *uargs)
2545{
2546 struct load_info info = { NULL, };
2547 struct module *mod;
2548 long err;
2549
2550 DEBUGP("load_module: umod=%p, len=%lu, uargs=%p\n",
2551 umod, len, uargs);
2552
2553 /* Copy in the blobs from userspace, check they are vaguely sane. */
2554 err = copy_and_check(&info, umod, len, uargs);
2555 if (err)
2556 return ERR_PTR(err);
2557
2558 /* Figure out module layout, and allocate all the memory. */
2559 mod = layout_and_allocate(&info);
2560 if (IS_ERR(mod)) {
2561 err = PTR_ERR(mod);
2562 goto free_copy;
2563 }
2564
2565 /* Now module is in final location, initialize linked lists, etc. */
2566 err = module_unload_init(mod);
2567 if (err)
2568 goto free_module;
2569
2570 /* Now we've got everything in the final locations, we can
2571 * find optional sections. */
2572 find_module_sections(mod, &info);
2573
2574 err = check_module_license_and_versions(mod);
2575 if (err)
2576 goto free_unload;
2577
2578 /* Set up MODINFO_ATTR fields */
2579 setup_modinfo(mod, &info);
2580
2581 /* Fix up syms, so that st_value is a pointer to location. */
2582 err = simplify_symbols(mod, &info);
2583 if (err < 0)
2584 goto free_modinfo;
2585
2586 err = apply_relocations(mod, &info);
2587 if (err < 0)
2588 goto free_modinfo;
2589
2590 err = post_relocation(mod, &info);
2591 if (err < 0)
2592 goto free_modinfo;
2593
2594 flush_module_icache(mod);
2595
2596 /* Now copy in args */
2597 mod->args = strndup_user(uargs, ~0UL >> 1);
2598 if (IS_ERR(mod->args)) {
2599 err = PTR_ERR(mod->args);
2600 goto free_arch_cleanup;
2601 }
2602
2603 /* Mark state as coming so strong_try_module_get() ignores us. */
2604 mod->state = MODULE_STATE_COMING;
2519 2605
2520 /* Now sew it into the lists so we can get lockdep and oops 2606 /* Now sew it into the lists so we can get lockdep and oops
2521 * info during argument parsing. Noone should access us, since 2607 * info during argument parsing. Noone should access us, since
@@ -2530,8 +2616,9 @@ static noinline struct module *load_module(void __user *umod,
2530 goto unlock; 2616 goto unlock;
2531 } 2617 }
2532 2618
2533 if (debug) 2619 /* This has to be done once we're sure module name is unique. */
2534 dynamic_debug_setup(debug, num_debug); 2620 if (!mod->taints)
2621 dynamic_debug_setup(info.debug, info.num_debug);
2535 2622
2536 /* Find duplicate symbols */ 2623 /* Find duplicate symbols */
2537 err = verify_export_symbols(mod); 2624 err = verify_export_symbols(mod);
@@ -2541,23 +2628,22 @@ static noinline struct module *load_module(void __user *umod,
2541 list_add_rcu(&mod->list, &modules); 2628 list_add_rcu(&mod->list, &modules);
2542 mutex_unlock(&module_mutex); 2629 mutex_unlock(&module_mutex);
2543 2630
2631 /* Module is ready to execute: parsing args may do that. */
2544 err = parse_args(mod->name, mod->args, mod->kp, mod->num_kp, NULL); 2632 err = parse_args(mod->name, mod->args, mod->kp, mod->num_kp, NULL);
2545 if (err < 0) 2633 if (err < 0)
2546 goto unlink; 2634 goto unlink;
2547 2635
2548 err = mod_sysfs_setup(mod, mod->kp, mod->num_kp); 2636 /* Link in to syfs. */
2637 err = mod_sysfs_setup(mod, &info, mod->kp, mod->num_kp);
2549 if (err < 0) 2638 if (err < 0)
2550 goto unlink; 2639 goto unlink;
2551 2640
2552 add_sect_attrs(mod, hdr->e_shnum, secstrings, sechdrs); 2641 /* Get rid of temporary copy and strmap. */
2553 add_notes_attrs(mod, hdr->e_shnum, secstrings, sechdrs); 2642 kfree(info.strmap);
2554 2643 free_copy(&info);
2555 /* Get rid of temporary copy */
2556 vfree(hdr);
2557
2558 trace_module_load(mod);
2559 2644
2560 /* Done! */ 2645 /* Done! */
2646 trace_module_load(mod);
2561 return mod; 2647 return mod;
2562 2648
2563 unlink: 2649 unlink:
@@ -2565,35 +2651,23 @@ static noinline struct module *load_module(void __user *umod,
2565 /* Unlink carefully: kallsyms could be walking list. */ 2651 /* Unlink carefully: kallsyms could be walking list. */
2566 list_del_rcu(&mod->list); 2652 list_del_rcu(&mod->list);
2567 ddebug: 2653 ddebug:
2568 dynamic_debug_remove(debug); 2654 if (!mod->taints)
2655 dynamic_debug_remove(info.debug);
2569 unlock: 2656 unlock:
2570 mutex_unlock(&module_mutex); 2657 mutex_unlock(&module_mutex);
2571 synchronize_sched(); 2658 synchronize_sched();
2659 kfree(mod->args);
2660 free_arch_cleanup:
2572 module_arch_cleanup(mod); 2661 module_arch_cleanup(mod);
2573 cleanup: 2662 free_modinfo:
2574 free_modinfo(mod); 2663 free_modinfo(mod);
2664 free_unload:
2575 module_unload_free(mod); 2665 module_unload_free(mod);
2576#if defined(CONFIG_MODULE_UNLOAD) 2666 free_module:
2577 free_percpu(mod->refptr); 2667 module_deallocate(mod, &info);
2578 free_init: 2668 free_copy:
2579#endif 2669 free_copy(&info);
2580 module_free(mod, mod->module_init);
2581 free_core:
2582 module_free(mod, mod->module_core);
2583 /* mod will be freed with core. Don't access it beyond this line! */
2584 free_percpu:
2585 free_percpu(percpu);
2586 free_mod:
2587 kfree(args);
2588 kfree(strmap);
2589 free_hdr:
2590 vfree(hdr);
2591 return ERR_PTR(err); 2670 return ERR_PTR(err);
2592
2593 truncated:
2594 printk(KERN_ERR "Module len %lu truncated\n", len);
2595 err = -ENOEXEC;
2596 goto free_hdr;
2597} 2671}
2598 2672
2599/* Call module constructors. */ 2673/* Call module constructors. */
diff --git a/kernel/padata.c b/kernel/padata.c
index fdd8ae609ce3..751019415d23 100644
--- a/kernel/padata.c
+++ b/kernel/padata.c
@@ -26,18 +26,19 @@
26#include <linux/mutex.h> 26#include <linux/mutex.h>
27#include <linux/sched.h> 27#include <linux/sched.h>
28#include <linux/slab.h> 28#include <linux/slab.h>
29#include <linux/sysfs.h>
29#include <linux/rcupdate.h> 30#include <linux/rcupdate.h>
30 31
31#define MAX_SEQ_NR INT_MAX - NR_CPUS 32#define MAX_SEQ_NR (INT_MAX - NR_CPUS)
32#define MAX_OBJ_NUM 1000 33#define MAX_OBJ_NUM 1000
33 34
34static int padata_index_to_cpu(struct parallel_data *pd, int cpu_index) 35static int padata_index_to_cpu(struct parallel_data *pd, int cpu_index)
35{ 36{
36 int cpu, target_cpu; 37 int cpu, target_cpu;
37 38
38 target_cpu = cpumask_first(pd->cpumask); 39 target_cpu = cpumask_first(pd->cpumask.pcpu);
39 for (cpu = 0; cpu < cpu_index; cpu++) 40 for (cpu = 0; cpu < cpu_index; cpu++)
40 target_cpu = cpumask_next(target_cpu, pd->cpumask); 41 target_cpu = cpumask_next(target_cpu, pd->cpumask.pcpu);
41 42
42 return target_cpu; 43 return target_cpu;
43} 44}
@@ -53,26 +54,27 @@ static int padata_cpu_hash(struct padata_priv *padata)
53 * Hash the sequence numbers to the cpus by taking 54 * Hash the sequence numbers to the cpus by taking
54 * seq_nr mod. number of cpus in use. 55 * seq_nr mod. number of cpus in use.
55 */ 56 */
56 cpu_index = padata->seq_nr % cpumask_weight(pd->cpumask); 57 cpu_index = padata->seq_nr % cpumask_weight(pd->cpumask.pcpu);
57 58
58 return padata_index_to_cpu(pd, cpu_index); 59 return padata_index_to_cpu(pd, cpu_index);
59} 60}
60 61
61static void padata_parallel_worker(struct work_struct *work) 62static void padata_parallel_worker(struct work_struct *parallel_work)
62{ 63{
63 struct padata_queue *queue; 64 struct padata_parallel_queue *pqueue;
64 struct parallel_data *pd; 65 struct parallel_data *pd;
65 struct padata_instance *pinst; 66 struct padata_instance *pinst;
66 LIST_HEAD(local_list); 67 LIST_HEAD(local_list);
67 68
68 local_bh_disable(); 69 local_bh_disable();
69 queue = container_of(work, struct padata_queue, pwork); 70 pqueue = container_of(parallel_work,
70 pd = queue->pd; 71 struct padata_parallel_queue, work);
72 pd = pqueue->pd;
71 pinst = pd->pinst; 73 pinst = pd->pinst;
72 74
73 spin_lock(&queue->parallel.lock); 75 spin_lock(&pqueue->parallel.lock);
74 list_replace_init(&queue->parallel.list, &local_list); 76 list_replace_init(&pqueue->parallel.list, &local_list);
75 spin_unlock(&queue->parallel.lock); 77 spin_unlock(&pqueue->parallel.lock);
76 78
77 while (!list_empty(&local_list)) { 79 while (!list_empty(&local_list)) {
78 struct padata_priv *padata; 80 struct padata_priv *padata;
@@ -94,7 +96,7 @@ static void padata_parallel_worker(struct work_struct *work)
94 * @pinst: padata instance 96 * @pinst: padata instance
95 * @padata: object to be parallelized 97 * @padata: object to be parallelized
96 * @cb_cpu: cpu the serialization callback function will run on, 98 * @cb_cpu: cpu the serialization callback function will run on,
97 * must be in the cpumask of padata. 99 * must be in the serial cpumask of padata(i.e. cpumask.cbcpu).
98 * 100 *
99 * The parallelization callback function will run with BHs off. 101 * The parallelization callback function will run with BHs off.
100 * Note: Every object which is parallelized by padata_do_parallel 102 * Note: Every object which is parallelized by padata_do_parallel
@@ -104,15 +106,18 @@ int padata_do_parallel(struct padata_instance *pinst,
104 struct padata_priv *padata, int cb_cpu) 106 struct padata_priv *padata, int cb_cpu)
105{ 107{
106 int target_cpu, err; 108 int target_cpu, err;
107 struct padata_queue *queue; 109 struct padata_parallel_queue *queue;
108 struct parallel_data *pd; 110 struct parallel_data *pd;
109 111
110 rcu_read_lock_bh(); 112 rcu_read_lock_bh();
111 113
112 pd = rcu_dereference(pinst->pd); 114 pd = rcu_dereference(pinst->pd);
113 115
114 err = 0; 116 err = -EINVAL;
115 if (!(pinst->flags & PADATA_INIT)) 117 if (!(pinst->flags & PADATA_INIT) || pinst->flags & PADATA_INVALID)
118 goto out;
119
120 if (!cpumask_test_cpu(cb_cpu, pd->cpumask.cbcpu))
116 goto out; 121 goto out;
117 122
118 err = -EBUSY; 123 err = -EBUSY;
@@ -122,11 +127,7 @@ int padata_do_parallel(struct padata_instance *pinst,
122 if (atomic_read(&pd->refcnt) >= MAX_OBJ_NUM) 127 if (atomic_read(&pd->refcnt) >= MAX_OBJ_NUM)
123 goto out; 128 goto out;
124 129
125 err = -EINVAL; 130 err = 0;
126 if (!cpumask_test_cpu(cb_cpu, pd->cpumask))
127 goto out;
128
129 err = -EINPROGRESS;
130 atomic_inc(&pd->refcnt); 131 atomic_inc(&pd->refcnt);
131 padata->pd = pd; 132 padata->pd = pd;
132 padata->cb_cpu = cb_cpu; 133 padata->cb_cpu = cb_cpu;
@@ -137,13 +138,13 @@ int padata_do_parallel(struct padata_instance *pinst,
137 padata->seq_nr = atomic_inc_return(&pd->seq_nr); 138 padata->seq_nr = atomic_inc_return(&pd->seq_nr);
138 139
139 target_cpu = padata_cpu_hash(padata); 140 target_cpu = padata_cpu_hash(padata);
140 queue = per_cpu_ptr(pd->queue, target_cpu); 141 queue = per_cpu_ptr(pd->pqueue, target_cpu);
141 142
142 spin_lock(&queue->parallel.lock); 143 spin_lock(&queue->parallel.lock);
143 list_add_tail(&padata->list, &queue->parallel.list); 144 list_add_tail(&padata->list, &queue->parallel.list);
144 spin_unlock(&queue->parallel.lock); 145 spin_unlock(&queue->parallel.lock);
145 146
146 queue_work_on(target_cpu, pinst->wq, &queue->pwork); 147 queue_work_on(target_cpu, pinst->wq, &queue->work);
147 148
148out: 149out:
149 rcu_read_unlock_bh(); 150 rcu_read_unlock_bh();
@@ -171,84 +172,52 @@ EXPORT_SYMBOL(padata_do_parallel);
171 */ 172 */
172static struct padata_priv *padata_get_next(struct parallel_data *pd) 173static struct padata_priv *padata_get_next(struct parallel_data *pd)
173{ 174{
174 int cpu, num_cpus, empty, calc_seq_nr; 175 int cpu, num_cpus;
175 int seq_nr, next_nr, overrun, next_overrun; 176 int next_nr, next_index;
176 struct padata_queue *queue, *next_queue; 177 struct padata_parallel_queue *queue, *next_queue;
177 struct padata_priv *padata; 178 struct padata_priv *padata;
178 struct padata_list *reorder; 179 struct padata_list *reorder;
179 180
180 empty = 0; 181 num_cpus = cpumask_weight(pd->cpumask.pcpu);
181 next_nr = -1;
182 next_overrun = 0;
183 next_queue = NULL;
184
185 num_cpus = cpumask_weight(pd->cpumask);
186
187 for_each_cpu(cpu, pd->cpumask) {
188 queue = per_cpu_ptr(pd->queue, cpu);
189 reorder = &queue->reorder;
190
191 /*
192 * Calculate the seq_nr of the object that should be
193 * next in this reorder queue.
194 */
195 overrun = 0;
196 calc_seq_nr = (atomic_read(&queue->num_obj) * num_cpus)
197 + queue->cpu_index;
198 182
199 if (unlikely(calc_seq_nr > pd->max_seq_nr)) { 183 /*
200 calc_seq_nr = calc_seq_nr - pd->max_seq_nr - 1; 184 * Calculate the percpu reorder queue and the sequence
201 overrun = 1; 185 * number of the next object.
202 } 186 */
203 187 next_nr = pd->processed;
204 if (!list_empty(&reorder->list)) { 188 next_index = next_nr % num_cpus;
205 padata = list_entry(reorder->list.next, 189 cpu = padata_index_to_cpu(pd, next_index);
206 struct padata_priv, list); 190 next_queue = per_cpu_ptr(pd->pqueue, cpu);
207 191
208 seq_nr = padata->seq_nr; 192 if (unlikely(next_nr > pd->max_seq_nr)) {
209 BUG_ON(calc_seq_nr != seq_nr); 193 next_nr = next_nr - pd->max_seq_nr - 1;
210 } else { 194 next_index = next_nr % num_cpus;
211 seq_nr = calc_seq_nr; 195 cpu = padata_index_to_cpu(pd, next_index);
212 empty++; 196 next_queue = per_cpu_ptr(pd->pqueue, cpu);
213 } 197 pd->processed = 0;
214
215 if (next_nr < 0 || seq_nr < next_nr
216 || (next_overrun && !overrun)) {
217 next_nr = seq_nr;
218 next_overrun = overrun;
219 next_queue = queue;
220 }
221 } 198 }
222 199
223 padata = NULL; 200 padata = NULL;
224 201
225 if (empty == num_cpus)
226 goto out;
227
228 reorder = &next_queue->reorder; 202 reorder = &next_queue->reorder;
229 203
230 if (!list_empty(&reorder->list)) { 204 if (!list_empty(&reorder->list)) {
231 padata = list_entry(reorder->list.next, 205 padata = list_entry(reorder->list.next,
232 struct padata_priv, list); 206 struct padata_priv, list);
233 207
234 if (unlikely(next_overrun)) { 208 BUG_ON(next_nr != padata->seq_nr);
235 for_each_cpu(cpu, pd->cpumask) {
236 queue = per_cpu_ptr(pd->queue, cpu);
237 atomic_set(&queue->num_obj, 0);
238 }
239 }
240 209
241 spin_lock(&reorder->lock); 210 spin_lock(&reorder->lock);
242 list_del_init(&padata->list); 211 list_del_init(&padata->list);
243 atomic_dec(&pd->reorder_objects); 212 atomic_dec(&pd->reorder_objects);
244 spin_unlock(&reorder->lock); 213 spin_unlock(&reorder->lock);
245 214
246 atomic_inc(&next_queue->num_obj); 215 pd->processed++;
247 216
248 goto out; 217 goto out;
249 } 218 }
250 219
251 queue = per_cpu_ptr(pd->queue, smp_processor_id()); 220 queue = per_cpu_ptr(pd->pqueue, smp_processor_id());
252 if (queue->cpu_index == next_queue->cpu_index) { 221 if (queue->cpu_index == next_queue->cpu_index) {
253 padata = ERR_PTR(-ENODATA); 222 padata = ERR_PTR(-ENODATA);
254 goto out; 223 goto out;
@@ -262,7 +231,7 @@ out:
262static void padata_reorder(struct parallel_data *pd) 231static void padata_reorder(struct parallel_data *pd)
263{ 232{
264 struct padata_priv *padata; 233 struct padata_priv *padata;
265 struct padata_queue *queue; 234 struct padata_serial_queue *squeue;
266 struct padata_instance *pinst = pd->pinst; 235 struct padata_instance *pinst = pd->pinst;
267 236
268 /* 237 /*
@@ -301,13 +270,13 @@ static void padata_reorder(struct parallel_data *pd)
301 return; 270 return;
302 } 271 }
303 272
304 queue = per_cpu_ptr(pd->queue, padata->cb_cpu); 273 squeue = per_cpu_ptr(pd->squeue, padata->cb_cpu);
305 274
306 spin_lock(&queue->serial.lock); 275 spin_lock(&squeue->serial.lock);
307 list_add_tail(&padata->list, &queue->serial.list); 276 list_add_tail(&padata->list, &squeue->serial.list);
308 spin_unlock(&queue->serial.lock); 277 spin_unlock(&squeue->serial.lock);
309 278
310 queue_work_on(padata->cb_cpu, pinst->wq, &queue->swork); 279 queue_work_on(padata->cb_cpu, pinst->wq, &squeue->work);
311 } 280 }
312 281
313 spin_unlock_bh(&pd->lock); 282 spin_unlock_bh(&pd->lock);
@@ -333,19 +302,19 @@ static void padata_reorder_timer(unsigned long arg)
333 padata_reorder(pd); 302 padata_reorder(pd);
334} 303}
335 304
336static void padata_serial_worker(struct work_struct *work) 305static void padata_serial_worker(struct work_struct *serial_work)
337{ 306{
338 struct padata_queue *queue; 307 struct padata_serial_queue *squeue;
339 struct parallel_data *pd; 308 struct parallel_data *pd;
340 LIST_HEAD(local_list); 309 LIST_HEAD(local_list);
341 310
342 local_bh_disable(); 311 local_bh_disable();
343 queue = container_of(work, struct padata_queue, swork); 312 squeue = container_of(serial_work, struct padata_serial_queue, work);
344 pd = queue->pd; 313 pd = squeue->pd;
345 314
346 spin_lock(&queue->serial.lock); 315 spin_lock(&squeue->serial.lock);
347 list_replace_init(&queue->serial.list, &local_list); 316 list_replace_init(&squeue->serial.list, &local_list);
348 spin_unlock(&queue->serial.lock); 317 spin_unlock(&squeue->serial.lock);
349 318
350 while (!list_empty(&local_list)) { 319 while (!list_empty(&local_list)) {
351 struct padata_priv *padata; 320 struct padata_priv *padata;
@@ -372,18 +341,18 @@ static void padata_serial_worker(struct work_struct *work)
372void padata_do_serial(struct padata_priv *padata) 341void padata_do_serial(struct padata_priv *padata)
373{ 342{
374 int cpu; 343 int cpu;
375 struct padata_queue *queue; 344 struct padata_parallel_queue *pqueue;
376 struct parallel_data *pd; 345 struct parallel_data *pd;
377 346
378 pd = padata->pd; 347 pd = padata->pd;
379 348
380 cpu = get_cpu(); 349 cpu = get_cpu();
381 queue = per_cpu_ptr(pd->queue, cpu); 350 pqueue = per_cpu_ptr(pd->pqueue, cpu);
382 351
383 spin_lock(&queue->reorder.lock); 352 spin_lock(&pqueue->reorder.lock);
384 atomic_inc(&pd->reorder_objects); 353 atomic_inc(&pd->reorder_objects);
385 list_add_tail(&padata->list, &queue->reorder.list); 354 list_add_tail(&padata->list, &pqueue->reorder.list);
386 spin_unlock(&queue->reorder.lock); 355 spin_unlock(&pqueue->reorder.lock);
387 356
388 put_cpu(); 357 put_cpu();
389 358
@@ -391,52 +360,89 @@ void padata_do_serial(struct padata_priv *padata)
391} 360}
392EXPORT_SYMBOL(padata_do_serial); 361EXPORT_SYMBOL(padata_do_serial);
393 362
394/* Allocate and initialize the internal cpumask dependend resources. */ 363static int padata_setup_cpumasks(struct parallel_data *pd,
395static struct parallel_data *padata_alloc_pd(struct padata_instance *pinst, 364 const struct cpumask *pcpumask,
396 const struct cpumask *cpumask) 365 const struct cpumask *cbcpumask)
397{ 366{
398 int cpu, cpu_index, num_cpus; 367 if (!alloc_cpumask_var(&pd->cpumask.pcpu, GFP_KERNEL))
399 struct padata_queue *queue; 368 return -ENOMEM;
400 struct parallel_data *pd;
401
402 cpu_index = 0;
403 369
404 pd = kzalloc(sizeof(struct parallel_data), GFP_KERNEL); 370 cpumask_and(pd->cpumask.pcpu, pcpumask, cpu_active_mask);
405 if (!pd) 371 if (!alloc_cpumask_var(&pd->cpumask.cbcpu, GFP_KERNEL)) {
406 goto err; 372 free_cpumask_var(pd->cpumask.cbcpu);
373 return -ENOMEM;
374 }
407 375
408 pd->queue = alloc_percpu(struct padata_queue); 376 cpumask_and(pd->cpumask.cbcpu, cbcpumask, cpu_active_mask);
409 if (!pd->queue) 377 return 0;
410 goto err_free_pd; 378}
411 379
412 if (!alloc_cpumask_var(&pd->cpumask, GFP_KERNEL)) 380static void __padata_list_init(struct padata_list *pd_list)
413 goto err_free_queue; 381{
382 INIT_LIST_HEAD(&pd_list->list);
383 spin_lock_init(&pd_list->lock);
384}
414 385
415 cpumask_and(pd->cpumask, cpumask, cpu_active_mask); 386/* Initialize all percpu queues used by serial workers */
387static void padata_init_squeues(struct parallel_data *pd)
388{
389 int cpu;
390 struct padata_serial_queue *squeue;
416 391
417 for_each_cpu(cpu, pd->cpumask) { 392 for_each_cpu(cpu, pd->cpumask.cbcpu) {
418 queue = per_cpu_ptr(pd->queue, cpu); 393 squeue = per_cpu_ptr(pd->squeue, cpu);
394 squeue->pd = pd;
395 __padata_list_init(&squeue->serial);
396 INIT_WORK(&squeue->work, padata_serial_worker);
397 }
398}
419 399
420 queue->pd = pd; 400/* Initialize all percpu queues used by parallel workers */
401static void padata_init_pqueues(struct parallel_data *pd)
402{
403 int cpu_index, num_cpus, cpu;
404 struct padata_parallel_queue *pqueue;
421 405
422 queue->cpu_index = cpu_index; 406 cpu_index = 0;
407 for_each_cpu(cpu, pd->cpumask.pcpu) {
408 pqueue = per_cpu_ptr(pd->pqueue, cpu);
409 pqueue->pd = pd;
410 pqueue->cpu_index = cpu_index;
423 cpu_index++; 411 cpu_index++;
424 412
425 INIT_LIST_HEAD(&queue->reorder.list); 413 __padata_list_init(&pqueue->reorder);
426 INIT_LIST_HEAD(&queue->parallel.list); 414 __padata_list_init(&pqueue->parallel);
427 INIT_LIST_HEAD(&queue->serial.list); 415 INIT_WORK(&pqueue->work, padata_parallel_worker);
428 spin_lock_init(&queue->reorder.lock); 416 atomic_set(&pqueue->num_obj, 0);
429 spin_lock_init(&queue->parallel.lock);
430 spin_lock_init(&queue->serial.lock);
431
432 INIT_WORK(&queue->pwork, padata_parallel_worker);
433 INIT_WORK(&queue->swork, padata_serial_worker);
434 atomic_set(&queue->num_obj, 0);
435 } 417 }
436 418
437 num_cpus = cpumask_weight(pd->cpumask); 419 num_cpus = cpumask_weight(pd->cpumask.pcpu);
438 pd->max_seq_nr = (MAX_SEQ_NR / num_cpus) * num_cpus - 1; 420 pd->max_seq_nr = num_cpus ? (MAX_SEQ_NR / num_cpus) * num_cpus - 1 : 0;
421}
422
423/* Allocate and initialize the internal cpumask dependend resources. */
424static struct parallel_data *padata_alloc_pd(struct padata_instance *pinst,
425 const struct cpumask *pcpumask,
426 const struct cpumask *cbcpumask)
427{
428 struct parallel_data *pd;
439 429
430 pd = kzalloc(sizeof(struct parallel_data), GFP_KERNEL);
431 if (!pd)
432 goto err;
433
434 pd->pqueue = alloc_percpu(struct padata_parallel_queue);
435 if (!pd->pqueue)
436 goto err_free_pd;
437
438 pd->squeue = alloc_percpu(struct padata_serial_queue);
439 if (!pd->squeue)
440 goto err_free_pqueue;
441 if (padata_setup_cpumasks(pd, pcpumask, cbcpumask) < 0)
442 goto err_free_squeue;
443
444 padata_init_pqueues(pd);
445 padata_init_squeues(pd);
440 setup_timer(&pd->timer, padata_reorder_timer, (unsigned long)pd); 446 setup_timer(&pd->timer, padata_reorder_timer, (unsigned long)pd);
441 atomic_set(&pd->seq_nr, -1); 447 atomic_set(&pd->seq_nr, -1);
442 atomic_set(&pd->reorder_objects, 0); 448 atomic_set(&pd->reorder_objects, 0);
@@ -446,8 +452,10 @@ static struct parallel_data *padata_alloc_pd(struct padata_instance *pinst,
446 452
447 return pd; 453 return pd;
448 454
449err_free_queue: 455err_free_squeue:
450 free_percpu(pd->queue); 456 free_percpu(pd->squeue);
457err_free_pqueue:
458 free_percpu(pd->pqueue);
451err_free_pd: 459err_free_pd:
452 kfree(pd); 460 kfree(pd);
453err: 461err:
@@ -456,8 +464,10 @@ err:
456 464
457static void padata_free_pd(struct parallel_data *pd) 465static void padata_free_pd(struct parallel_data *pd)
458{ 466{
459 free_cpumask_var(pd->cpumask); 467 free_cpumask_var(pd->cpumask.pcpu);
460 free_percpu(pd->queue); 468 free_cpumask_var(pd->cpumask.cbcpu);
469 free_percpu(pd->pqueue);
470 free_percpu(pd->squeue);
461 kfree(pd); 471 kfree(pd);
462} 472}
463 473
@@ -465,11 +475,12 @@ static void padata_free_pd(struct parallel_data *pd)
465static void padata_flush_queues(struct parallel_data *pd) 475static void padata_flush_queues(struct parallel_data *pd)
466{ 476{
467 int cpu; 477 int cpu;
468 struct padata_queue *queue; 478 struct padata_parallel_queue *pqueue;
479 struct padata_serial_queue *squeue;
469 480
470 for_each_cpu(cpu, pd->cpumask) { 481 for_each_cpu(cpu, pd->cpumask.pcpu) {
471 queue = per_cpu_ptr(pd->queue, cpu); 482 pqueue = per_cpu_ptr(pd->pqueue, cpu);
472 flush_work(&queue->pwork); 483 flush_work(&pqueue->work);
473 } 484 }
474 485
475 del_timer_sync(&pd->timer); 486 del_timer_sync(&pd->timer);
@@ -477,19 +488,39 @@ static void padata_flush_queues(struct parallel_data *pd)
477 if (atomic_read(&pd->reorder_objects)) 488 if (atomic_read(&pd->reorder_objects))
478 padata_reorder(pd); 489 padata_reorder(pd);
479 490
480 for_each_cpu(cpu, pd->cpumask) { 491 for_each_cpu(cpu, pd->cpumask.cbcpu) {
481 queue = per_cpu_ptr(pd->queue, cpu); 492 squeue = per_cpu_ptr(pd->squeue, cpu);
482 flush_work(&queue->swork); 493 flush_work(&squeue->work);
483 } 494 }
484 495
485 BUG_ON(atomic_read(&pd->refcnt) != 0); 496 BUG_ON(atomic_read(&pd->refcnt) != 0);
486} 497}
487 498
499static void __padata_start(struct padata_instance *pinst)
500{
501 pinst->flags |= PADATA_INIT;
502}
503
504static void __padata_stop(struct padata_instance *pinst)
505{
506 if (!(pinst->flags & PADATA_INIT))
507 return;
508
509 pinst->flags &= ~PADATA_INIT;
510
511 synchronize_rcu();
512
513 get_online_cpus();
514 padata_flush_queues(pinst->pd);
515 put_online_cpus();
516}
517
488/* Replace the internal control stucture with a new one. */ 518/* Replace the internal control stucture with a new one. */
489static void padata_replace(struct padata_instance *pinst, 519static void padata_replace(struct padata_instance *pinst,
490 struct parallel_data *pd_new) 520 struct parallel_data *pd_new)
491{ 521{
492 struct parallel_data *pd_old = pinst->pd; 522 struct parallel_data *pd_old = pinst->pd;
523 int notification_mask = 0;
493 524
494 pinst->flags |= PADATA_RESET; 525 pinst->flags |= PADATA_RESET;
495 526
@@ -497,41 +528,162 @@ static void padata_replace(struct padata_instance *pinst,
497 528
498 synchronize_rcu(); 529 synchronize_rcu();
499 530
531 if (!cpumask_equal(pd_old->cpumask.pcpu, pd_new->cpumask.pcpu))
532 notification_mask |= PADATA_CPU_PARALLEL;
533 if (!cpumask_equal(pd_old->cpumask.cbcpu, pd_new->cpumask.cbcpu))
534 notification_mask |= PADATA_CPU_SERIAL;
535
500 padata_flush_queues(pd_old); 536 padata_flush_queues(pd_old);
501 padata_free_pd(pd_old); 537 padata_free_pd(pd_old);
502 538
539 if (notification_mask)
540 blocking_notifier_call_chain(&pinst->cpumask_change_notifier,
541 notification_mask,
542 &pd_new->cpumask);
543
503 pinst->flags &= ~PADATA_RESET; 544 pinst->flags &= ~PADATA_RESET;
504} 545}
505 546
506/** 547/**
507 * padata_set_cpumask - set the cpumask that padata should use 548 * padata_register_cpumask_notifier - Registers a notifier that will be called
549 * if either pcpu or cbcpu or both cpumasks change.
508 * 550 *
509 * @pinst: padata instance 551 * @pinst: A poineter to padata instance
510 * @cpumask: the cpumask to use 552 * @nblock: A pointer to notifier block.
511 */ 553 */
512int padata_set_cpumask(struct padata_instance *pinst, 554int padata_register_cpumask_notifier(struct padata_instance *pinst,
513 cpumask_var_t cpumask) 555 struct notifier_block *nblock)
514{ 556{
557 return blocking_notifier_chain_register(&pinst->cpumask_change_notifier,
558 nblock);
559}
560EXPORT_SYMBOL(padata_register_cpumask_notifier);
561
562/**
563 * padata_unregister_cpumask_notifier - Unregisters cpumask notifier
564 * registered earlier using padata_register_cpumask_notifier
565 *
566 * @pinst: A pointer to data instance.
567 * @nlock: A pointer to notifier block.
568 */
569int padata_unregister_cpumask_notifier(struct padata_instance *pinst,
570 struct notifier_block *nblock)
571{
572 return blocking_notifier_chain_unregister(
573 &pinst->cpumask_change_notifier,
574 nblock);
575}
576EXPORT_SYMBOL(padata_unregister_cpumask_notifier);
577
578
579/* If cpumask contains no active cpu, we mark the instance as invalid. */
580static bool padata_validate_cpumask(struct padata_instance *pinst,
581 const struct cpumask *cpumask)
582{
583 if (!cpumask_intersects(cpumask, cpu_active_mask)) {
584 pinst->flags |= PADATA_INVALID;
585 return false;
586 }
587
588 pinst->flags &= ~PADATA_INVALID;
589 return true;
590}
591
592static int __padata_set_cpumasks(struct padata_instance *pinst,
593 cpumask_var_t pcpumask,
594 cpumask_var_t cbcpumask)
595{
596 int valid;
515 struct parallel_data *pd; 597 struct parallel_data *pd;
516 int err = 0; 598
599 valid = padata_validate_cpumask(pinst, pcpumask);
600 if (!valid) {
601 __padata_stop(pinst);
602 goto out_replace;
603 }
604
605 valid = padata_validate_cpumask(pinst, cbcpumask);
606 if (!valid)
607 __padata_stop(pinst);
608
609out_replace:
610 pd = padata_alloc_pd(pinst, pcpumask, cbcpumask);
611 if (!pd)
612 return -ENOMEM;
613
614 cpumask_copy(pinst->cpumask.pcpu, pcpumask);
615 cpumask_copy(pinst->cpumask.cbcpu, cbcpumask);
616
617 padata_replace(pinst, pd);
618
619 if (valid)
620 __padata_start(pinst);
621
622 return 0;
623}
624
625/**
626 * padata_set_cpumasks - Set both parallel and serial cpumasks. The first
627 * one is used by parallel workers and the second one
628 * by the wokers doing serialization.
629 *
630 * @pinst: padata instance
631 * @pcpumask: the cpumask to use for parallel workers
632 * @cbcpumask: the cpumsak to use for serial workers
633 */
634int padata_set_cpumasks(struct padata_instance *pinst, cpumask_var_t pcpumask,
635 cpumask_var_t cbcpumask)
636{
637 int err;
517 638
518 mutex_lock(&pinst->lock); 639 mutex_lock(&pinst->lock);
640 get_online_cpus();
519 641
642 err = __padata_set_cpumasks(pinst, pcpumask, cbcpumask);
643
644 put_online_cpus();
645 mutex_unlock(&pinst->lock);
646
647 return err;
648
649}
650EXPORT_SYMBOL(padata_set_cpumasks);
651
652/**
653 * padata_set_cpumask: Sets specified by @cpumask_type cpumask to the value
654 * equivalent to @cpumask.
655 *
656 * @pinst: padata instance
657 * @cpumask_type: PADATA_CPU_SERIAL or PADATA_CPU_PARALLEL corresponding
658 * to parallel and serial cpumasks respectively.
659 * @cpumask: the cpumask to use
660 */
661int padata_set_cpumask(struct padata_instance *pinst, int cpumask_type,
662 cpumask_var_t cpumask)
663{
664 struct cpumask *serial_mask, *parallel_mask;
665 int err = -EINVAL;
666
667 mutex_lock(&pinst->lock);
520 get_online_cpus(); 668 get_online_cpus();
521 669
522 pd = padata_alloc_pd(pinst, cpumask); 670 switch (cpumask_type) {
523 if (!pd) { 671 case PADATA_CPU_PARALLEL:
524 err = -ENOMEM; 672 serial_mask = pinst->cpumask.cbcpu;
525 goto out; 673 parallel_mask = cpumask;
674 break;
675 case PADATA_CPU_SERIAL:
676 parallel_mask = pinst->cpumask.pcpu;
677 serial_mask = cpumask;
678 break;
679 default:
680 goto out;
526 } 681 }
527 682
528 cpumask_copy(pinst->cpumask, cpumask); 683 err = __padata_set_cpumasks(pinst, parallel_mask, serial_mask);
529
530 padata_replace(pinst, pd);
531 684
532out: 685out:
533 put_online_cpus(); 686 put_online_cpus();
534
535 mutex_unlock(&pinst->lock); 687 mutex_unlock(&pinst->lock);
536 688
537 return err; 689 return err;
@@ -543,30 +695,48 @@ static int __padata_add_cpu(struct padata_instance *pinst, int cpu)
543 struct parallel_data *pd; 695 struct parallel_data *pd;
544 696
545 if (cpumask_test_cpu(cpu, cpu_active_mask)) { 697 if (cpumask_test_cpu(cpu, cpu_active_mask)) {
546 pd = padata_alloc_pd(pinst, pinst->cpumask); 698 pd = padata_alloc_pd(pinst, pinst->cpumask.pcpu,
699 pinst->cpumask.cbcpu);
547 if (!pd) 700 if (!pd)
548 return -ENOMEM; 701 return -ENOMEM;
549 702
550 padata_replace(pinst, pd); 703 padata_replace(pinst, pd);
704
705 if (padata_validate_cpumask(pinst, pinst->cpumask.pcpu) &&
706 padata_validate_cpumask(pinst, pinst->cpumask.cbcpu))
707 __padata_start(pinst);
551 } 708 }
552 709
553 return 0; 710 return 0;
554} 711}
555 712
556/** 713 /**
557 * padata_add_cpu - add a cpu to the padata cpumask 714 * padata_add_cpu - add a cpu to one or both(parallel and serial)
715 * padata cpumasks.
558 * 716 *
559 * @pinst: padata instance 717 * @pinst: padata instance
560 * @cpu: cpu to add 718 * @cpu: cpu to add
719 * @mask: bitmask of flags specifying to which cpumask @cpu shuld be added.
720 * The @mask may be any combination of the following flags:
721 * PADATA_CPU_SERIAL - serial cpumask
722 * PADATA_CPU_PARALLEL - parallel cpumask
561 */ 723 */
562int padata_add_cpu(struct padata_instance *pinst, int cpu) 724
725int padata_add_cpu(struct padata_instance *pinst, int cpu, int mask)
563{ 726{
564 int err; 727 int err;
565 728
729 if (!(mask & (PADATA_CPU_SERIAL | PADATA_CPU_PARALLEL)))
730 return -EINVAL;
731
566 mutex_lock(&pinst->lock); 732 mutex_lock(&pinst->lock);
567 733
568 get_online_cpus(); 734 get_online_cpus();
569 cpumask_set_cpu(cpu, pinst->cpumask); 735 if (mask & PADATA_CPU_SERIAL)
736 cpumask_set_cpu(cpu, pinst->cpumask.cbcpu);
737 if (mask & PADATA_CPU_PARALLEL)
738 cpumask_set_cpu(cpu, pinst->cpumask.pcpu);
739
570 err = __padata_add_cpu(pinst, cpu); 740 err = __padata_add_cpu(pinst, cpu);
571 put_online_cpus(); 741 put_online_cpus();
572 742
@@ -578,10 +748,16 @@ EXPORT_SYMBOL(padata_add_cpu);
578 748
579static int __padata_remove_cpu(struct padata_instance *pinst, int cpu) 749static int __padata_remove_cpu(struct padata_instance *pinst, int cpu)
580{ 750{
581 struct parallel_data *pd; 751 struct parallel_data *pd = NULL;
582 752
583 if (cpumask_test_cpu(cpu, cpu_online_mask)) { 753 if (cpumask_test_cpu(cpu, cpu_online_mask)) {
584 pd = padata_alloc_pd(pinst, pinst->cpumask); 754
755 if (!padata_validate_cpumask(pinst, pinst->cpumask.pcpu) ||
756 !padata_validate_cpumask(pinst, pinst->cpumask.cbcpu))
757 __padata_stop(pinst);
758
759 pd = padata_alloc_pd(pinst, pinst->cpumask.pcpu,
760 pinst->cpumask.cbcpu);
585 if (!pd) 761 if (!pd)
586 return -ENOMEM; 762 return -ENOMEM;
587 763
@@ -591,20 +767,32 @@ static int __padata_remove_cpu(struct padata_instance *pinst, int cpu)
591 return 0; 767 return 0;
592} 768}
593 769
594/** 770 /**
595 * padata_remove_cpu - remove a cpu from the padata cpumask 771 * padata_remove_cpu - remove a cpu from the one or both(serial and paralell)
772 * padata cpumasks.
596 * 773 *
597 * @pinst: padata instance 774 * @pinst: padata instance
598 * @cpu: cpu to remove 775 * @cpu: cpu to remove
776 * @mask: bitmask specifying from which cpumask @cpu should be removed
777 * The @mask may be any combination of the following flags:
778 * PADATA_CPU_SERIAL - serial cpumask
779 * PADATA_CPU_PARALLEL - parallel cpumask
599 */ 780 */
600int padata_remove_cpu(struct padata_instance *pinst, int cpu) 781int padata_remove_cpu(struct padata_instance *pinst, int cpu, int mask)
601{ 782{
602 int err; 783 int err;
603 784
785 if (!(mask & (PADATA_CPU_SERIAL | PADATA_CPU_PARALLEL)))
786 return -EINVAL;
787
604 mutex_lock(&pinst->lock); 788 mutex_lock(&pinst->lock);
605 789
606 get_online_cpus(); 790 get_online_cpus();
607 cpumask_clear_cpu(cpu, pinst->cpumask); 791 if (mask & PADATA_CPU_SERIAL)
792 cpumask_clear_cpu(cpu, pinst->cpumask.cbcpu);
793 if (mask & PADATA_CPU_PARALLEL)
794 cpumask_clear_cpu(cpu, pinst->cpumask.pcpu);
795
608 err = __padata_remove_cpu(pinst, cpu); 796 err = __padata_remove_cpu(pinst, cpu);
609 put_online_cpus(); 797 put_online_cpus();
610 798
@@ -619,11 +807,20 @@ EXPORT_SYMBOL(padata_remove_cpu);
619 * 807 *
620 * @pinst: padata instance to start 808 * @pinst: padata instance to start
621 */ 809 */
622void padata_start(struct padata_instance *pinst) 810int padata_start(struct padata_instance *pinst)
623{ 811{
812 int err = 0;
813
624 mutex_lock(&pinst->lock); 814 mutex_lock(&pinst->lock);
625 pinst->flags |= PADATA_INIT; 815
816 if (pinst->flags & PADATA_INVALID)
817 err =-EINVAL;
818
819 __padata_start(pinst);
820
626 mutex_unlock(&pinst->lock); 821 mutex_unlock(&pinst->lock);
822
823 return err;
627} 824}
628EXPORT_SYMBOL(padata_start); 825EXPORT_SYMBOL(padata_start);
629 826
@@ -635,12 +832,20 @@ EXPORT_SYMBOL(padata_start);
635void padata_stop(struct padata_instance *pinst) 832void padata_stop(struct padata_instance *pinst)
636{ 833{
637 mutex_lock(&pinst->lock); 834 mutex_lock(&pinst->lock);
638 pinst->flags &= ~PADATA_INIT; 835 __padata_stop(pinst);
639 mutex_unlock(&pinst->lock); 836 mutex_unlock(&pinst->lock);
640} 837}
641EXPORT_SYMBOL(padata_stop); 838EXPORT_SYMBOL(padata_stop);
642 839
643#ifdef CONFIG_HOTPLUG_CPU 840#ifdef CONFIG_HOTPLUG_CPU
841
842static inline int pinst_has_cpu(struct padata_instance *pinst, int cpu)
843{
844 return cpumask_test_cpu(cpu, pinst->cpumask.pcpu) ||
845 cpumask_test_cpu(cpu, pinst->cpumask.cbcpu);
846}
847
848
644static int padata_cpu_callback(struct notifier_block *nfb, 849static int padata_cpu_callback(struct notifier_block *nfb,
645 unsigned long action, void *hcpu) 850 unsigned long action, void *hcpu)
646{ 851{
@@ -653,7 +858,7 @@ static int padata_cpu_callback(struct notifier_block *nfb,
653 switch (action) { 858 switch (action) {
654 case CPU_ONLINE: 859 case CPU_ONLINE:
655 case CPU_ONLINE_FROZEN: 860 case CPU_ONLINE_FROZEN:
656 if (!cpumask_test_cpu(cpu, pinst->cpumask)) 861 if (!pinst_has_cpu(pinst, cpu))
657 break; 862 break;
658 mutex_lock(&pinst->lock); 863 mutex_lock(&pinst->lock);
659 err = __padata_add_cpu(pinst, cpu); 864 err = __padata_add_cpu(pinst, cpu);
@@ -664,7 +869,7 @@ static int padata_cpu_callback(struct notifier_block *nfb,
664 869
665 case CPU_DOWN_PREPARE: 870 case CPU_DOWN_PREPARE:
666 case CPU_DOWN_PREPARE_FROZEN: 871 case CPU_DOWN_PREPARE_FROZEN:
667 if (!cpumask_test_cpu(cpu, pinst->cpumask)) 872 if (!pinst_has_cpu(pinst, cpu))
668 break; 873 break;
669 mutex_lock(&pinst->lock); 874 mutex_lock(&pinst->lock);
670 err = __padata_remove_cpu(pinst, cpu); 875 err = __padata_remove_cpu(pinst, cpu);
@@ -675,7 +880,7 @@ static int padata_cpu_callback(struct notifier_block *nfb,
675 880
676 case CPU_UP_CANCELED: 881 case CPU_UP_CANCELED:
677 case CPU_UP_CANCELED_FROZEN: 882 case CPU_UP_CANCELED_FROZEN:
678 if (!cpumask_test_cpu(cpu, pinst->cpumask)) 883 if (!pinst_has_cpu(pinst, cpu))
679 break; 884 break;
680 mutex_lock(&pinst->lock); 885 mutex_lock(&pinst->lock);
681 __padata_remove_cpu(pinst, cpu); 886 __padata_remove_cpu(pinst, cpu);
@@ -683,7 +888,7 @@ static int padata_cpu_callback(struct notifier_block *nfb,
683 888
684 case CPU_DOWN_FAILED: 889 case CPU_DOWN_FAILED:
685 case CPU_DOWN_FAILED_FROZEN: 890 case CPU_DOWN_FAILED_FROZEN:
686 if (!cpumask_test_cpu(cpu, pinst->cpumask)) 891 if (!pinst_has_cpu(pinst, cpu))
687 break; 892 break;
688 mutex_lock(&pinst->lock); 893 mutex_lock(&pinst->lock);
689 __padata_add_cpu(pinst, cpu); 894 __padata_add_cpu(pinst, cpu);
@@ -694,36 +899,202 @@ static int padata_cpu_callback(struct notifier_block *nfb,
694} 899}
695#endif 900#endif
696 901
902static void __padata_free(struct padata_instance *pinst)
903{
904#ifdef CONFIG_HOTPLUG_CPU
905 unregister_hotcpu_notifier(&pinst->cpu_notifier);
906#endif
907
908 padata_stop(pinst);
909 padata_free_pd(pinst->pd);
910 free_cpumask_var(pinst->cpumask.pcpu);
911 free_cpumask_var(pinst->cpumask.cbcpu);
912 kfree(pinst);
913}
914
915#define kobj2pinst(_kobj) \
916 container_of(_kobj, struct padata_instance, kobj)
917#define attr2pentry(_attr) \
918 container_of(_attr, struct padata_sysfs_entry, attr)
919
920static void padata_sysfs_release(struct kobject *kobj)
921{
922 struct padata_instance *pinst = kobj2pinst(kobj);
923 __padata_free(pinst);
924}
925
926struct padata_sysfs_entry {
927 struct attribute attr;
928 ssize_t (*show)(struct padata_instance *, struct attribute *, char *);
929 ssize_t (*store)(struct padata_instance *, struct attribute *,
930 const char *, size_t);
931};
932
933static ssize_t show_cpumask(struct padata_instance *pinst,
934 struct attribute *attr, char *buf)
935{
936 struct cpumask *cpumask;
937 ssize_t len;
938
939 mutex_lock(&pinst->lock);
940 if (!strcmp(attr->name, "serial_cpumask"))
941 cpumask = pinst->cpumask.cbcpu;
942 else
943 cpumask = pinst->cpumask.pcpu;
944
945 len = bitmap_scnprintf(buf, PAGE_SIZE, cpumask_bits(cpumask),
946 nr_cpu_ids);
947 if (PAGE_SIZE - len < 2)
948 len = -EINVAL;
949 else
950 len += sprintf(buf + len, "\n");
951
952 mutex_unlock(&pinst->lock);
953 return len;
954}
955
956static ssize_t store_cpumask(struct padata_instance *pinst,
957 struct attribute *attr,
958 const char *buf, size_t count)
959{
960 cpumask_var_t new_cpumask;
961 ssize_t ret;
962 int mask_type;
963
964 if (!alloc_cpumask_var(&new_cpumask, GFP_KERNEL))
965 return -ENOMEM;
966
967 ret = bitmap_parse(buf, count, cpumask_bits(new_cpumask),
968 nr_cpumask_bits);
969 if (ret < 0)
970 goto out;
971
972 mask_type = !strcmp(attr->name, "serial_cpumask") ?
973 PADATA_CPU_SERIAL : PADATA_CPU_PARALLEL;
974 ret = padata_set_cpumask(pinst, mask_type, new_cpumask);
975 if (!ret)
976 ret = count;
977
978out:
979 free_cpumask_var(new_cpumask);
980 return ret;
981}
982
983#define PADATA_ATTR_RW(_name, _show_name, _store_name) \
984 static struct padata_sysfs_entry _name##_attr = \
985 __ATTR(_name, 0644, _show_name, _store_name)
986#define PADATA_ATTR_RO(_name, _show_name) \
987 static struct padata_sysfs_entry _name##_attr = \
988 __ATTR(_name, 0400, _show_name, NULL)
989
990PADATA_ATTR_RW(serial_cpumask, show_cpumask, store_cpumask);
991PADATA_ATTR_RW(parallel_cpumask, show_cpumask, store_cpumask);
992
993/*
994 * Padata sysfs provides the following objects:
995 * serial_cpumask [RW] - cpumask for serial workers
996 * parallel_cpumask [RW] - cpumask for parallel workers
997 */
998static struct attribute *padata_default_attrs[] = {
999 &serial_cpumask_attr.attr,
1000 &parallel_cpumask_attr.attr,
1001 NULL,
1002};
1003
1004static ssize_t padata_sysfs_show(struct kobject *kobj,
1005 struct attribute *attr, char *buf)
1006{
1007 struct padata_instance *pinst;
1008 struct padata_sysfs_entry *pentry;
1009 ssize_t ret = -EIO;
1010
1011 pinst = kobj2pinst(kobj);
1012 pentry = attr2pentry(attr);
1013 if (pentry->show)
1014 ret = pentry->show(pinst, attr, buf);
1015
1016 return ret;
1017}
1018
1019static ssize_t padata_sysfs_store(struct kobject *kobj, struct attribute *attr,
1020 const char *buf, size_t count)
1021{
1022 struct padata_instance *pinst;
1023 struct padata_sysfs_entry *pentry;
1024 ssize_t ret = -EIO;
1025
1026 pinst = kobj2pinst(kobj);
1027 pentry = attr2pentry(attr);
1028 if (pentry->show)
1029 ret = pentry->store(pinst, attr, buf, count);
1030
1031 return ret;
1032}
1033
1034static const struct sysfs_ops padata_sysfs_ops = {
1035 .show = padata_sysfs_show,
1036 .store = padata_sysfs_store,
1037};
1038
1039static struct kobj_type padata_attr_type = {
1040 .sysfs_ops = &padata_sysfs_ops,
1041 .default_attrs = padata_default_attrs,
1042 .release = padata_sysfs_release,
1043};
1044
697/** 1045/**
698 * padata_alloc - allocate and initialize a padata instance 1046 * padata_alloc_possible - Allocate and initialize padata instance.
1047 * Use the cpu_possible_mask for serial and
1048 * parallel workers.
699 * 1049 *
700 * @cpumask: cpumask that padata uses for parallelization
701 * @wq: workqueue to use for the allocated padata instance 1050 * @wq: workqueue to use for the allocated padata instance
702 */ 1051 */
703struct padata_instance *padata_alloc(const struct cpumask *cpumask, 1052struct padata_instance *padata_alloc_possible(struct workqueue_struct *wq)
704 struct workqueue_struct *wq) 1053{
1054 return padata_alloc(wq, cpu_possible_mask, cpu_possible_mask);
1055}
1056EXPORT_SYMBOL(padata_alloc_possible);
1057
1058/**
1059 * padata_alloc - allocate and initialize a padata instance and specify
1060 * cpumasks for serial and parallel workers.
1061 *
1062 * @wq: workqueue to use for the allocated padata instance
1063 * @pcpumask: cpumask that will be used for padata parallelization
1064 * @cbcpumask: cpumask that will be used for padata serialization
1065 */
1066struct padata_instance *padata_alloc(struct workqueue_struct *wq,
1067 const struct cpumask *pcpumask,
1068 const struct cpumask *cbcpumask)
705{ 1069{
706 struct padata_instance *pinst; 1070 struct padata_instance *pinst;
707 struct parallel_data *pd; 1071 struct parallel_data *pd = NULL;
708 1072
709 pinst = kzalloc(sizeof(struct padata_instance), GFP_KERNEL); 1073 pinst = kzalloc(sizeof(struct padata_instance), GFP_KERNEL);
710 if (!pinst) 1074 if (!pinst)
711 goto err; 1075 goto err;
712 1076
713 get_online_cpus(); 1077 get_online_cpus();
714 1078 if (!alloc_cpumask_var(&pinst->cpumask.pcpu, GFP_KERNEL))
715 pd = padata_alloc_pd(pinst, cpumask);
716 if (!pd)
717 goto err_free_inst; 1079 goto err_free_inst;
1080 if (!alloc_cpumask_var(&pinst->cpumask.cbcpu, GFP_KERNEL)) {
1081 free_cpumask_var(pinst->cpumask.pcpu);
1082 goto err_free_inst;
1083 }
1084 if (!padata_validate_cpumask(pinst, pcpumask) ||
1085 !padata_validate_cpumask(pinst, cbcpumask))
1086 goto err_free_masks;
718 1087
719 if (!alloc_cpumask_var(&pinst->cpumask, GFP_KERNEL)) 1088 pd = padata_alloc_pd(pinst, pcpumask, cbcpumask);
720 goto err_free_pd; 1089 if (!pd)
1090 goto err_free_masks;
721 1091
722 rcu_assign_pointer(pinst->pd, pd); 1092 rcu_assign_pointer(pinst->pd, pd);
723 1093
724 pinst->wq = wq; 1094 pinst->wq = wq;
725 1095
726 cpumask_copy(pinst->cpumask, cpumask); 1096 cpumask_copy(pinst->cpumask.pcpu, pcpumask);
1097 cpumask_copy(pinst->cpumask.cbcpu, cbcpumask);
727 1098
728 pinst->flags = 0; 1099 pinst->flags = 0;
729 1100
@@ -735,12 +1106,15 @@ struct padata_instance *padata_alloc(const struct cpumask *cpumask,
735 1106
736 put_online_cpus(); 1107 put_online_cpus();
737 1108
1109 BLOCKING_INIT_NOTIFIER_HEAD(&pinst->cpumask_change_notifier);
1110 kobject_init(&pinst->kobj, &padata_attr_type);
738 mutex_init(&pinst->lock); 1111 mutex_init(&pinst->lock);
739 1112
740 return pinst; 1113 return pinst;
741 1114
742err_free_pd: 1115err_free_masks:
743 padata_free_pd(pd); 1116 free_cpumask_var(pinst->cpumask.pcpu);
1117 free_cpumask_var(pinst->cpumask.cbcpu);
744err_free_inst: 1118err_free_inst:
745 kfree(pinst); 1119 kfree(pinst);
746 put_online_cpus(); 1120 put_online_cpus();
@@ -756,19 +1130,6 @@ EXPORT_SYMBOL(padata_alloc);
756 */ 1130 */
757void padata_free(struct padata_instance *pinst) 1131void padata_free(struct padata_instance *pinst)
758{ 1132{
759 padata_stop(pinst); 1133 kobject_put(&pinst->kobj);
760
761 synchronize_rcu();
762
763#ifdef CONFIG_HOTPLUG_CPU
764 unregister_hotcpu_notifier(&pinst->cpu_notifier);
765#endif
766 get_online_cpus();
767 padata_flush_queues(pinst->pd);
768 put_online_cpus();
769
770 padata_free_pd(pinst->pd);
771 free_cpumask_var(pinst->cpumask);
772 kfree(pinst);
773} 1134}
774EXPORT_SYMBOL(padata_free); 1135EXPORT_SYMBOL(padata_free);
diff --git a/kernel/pm_qos_params.c b/kernel/pm_qos_params.c
index f42d3f737a33..996a4dec5f96 100644
--- a/kernel/pm_qos_params.c
+++ b/kernel/pm_qos_params.c
@@ -48,59 +48,49 @@
48 * or pm_qos_object list and pm_qos_objects need to happen with pm_qos_lock 48 * or pm_qos_object list and pm_qos_objects need to happen with pm_qos_lock
49 * held, taken with _irqsave. One lock to rule them all 49 * held, taken with _irqsave. One lock to rule them all
50 */ 50 */
51struct pm_qos_request_list { 51enum pm_qos_type {
52 struct list_head list; 52 PM_QOS_MAX, /* return the largest value */
53 union { 53 PM_QOS_MIN /* return the smallest value */
54 s32 value;
55 s32 usec;
56 s32 kbps;
57 };
58 int pm_qos_class;
59}; 54};
60 55
61static s32 max_compare(s32 v1, s32 v2);
62static s32 min_compare(s32 v1, s32 v2);
63
64struct pm_qos_object { 56struct pm_qos_object {
65 struct pm_qos_request_list requests; 57 struct plist_head requests;
66 struct blocking_notifier_head *notifiers; 58 struct blocking_notifier_head *notifiers;
67 struct miscdevice pm_qos_power_miscdev; 59 struct miscdevice pm_qos_power_miscdev;
68 char *name; 60 char *name;
69 s32 default_value; 61 s32 default_value;
70 atomic_t target_value; 62 enum pm_qos_type type;
71 s32 (*comparitor)(s32, s32);
72}; 63};
73 64
65static DEFINE_SPINLOCK(pm_qos_lock);
66
74static struct pm_qos_object null_pm_qos; 67static struct pm_qos_object null_pm_qos;
75static BLOCKING_NOTIFIER_HEAD(cpu_dma_lat_notifier); 68static BLOCKING_NOTIFIER_HEAD(cpu_dma_lat_notifier);
76static struct pm_qos_object cpu_dma_pm_qos = { 69static struct pm_qos_object cpu_dma_pm_qos = {
77 .requests = {LIST_HEAD_INIT(cpu_dma_pm_qos.requests.list)}, 70 .requests = PLIST_HEAD_INIT(cpu_dma_pm_qos.requests, pm_qos_lock),
78 .notifiers = &cpu_dma_lat_notifier, 71 .notifiers = &cpu_dma_lat_notifier,
79 .name = "cpu_dma_latency", 72 .name = "cpu_dma_latency",
80 .default_value = 2000 * USEC_PER_SEC, 73 .default_value = 2000 * USEC_PER_SEC,
81 .target_value = ATOMIC_INIT(2000 * USEC_PER_SEC), 74 .type = PM_QOS_MIN,
82 .comparitor = min_compare
83}; 75};
84 76
85static BLOCKING_NOTIFIER_HEAD(network_lat_notifier); 77static BLOCKING_NOTIFIER_HEAD(network_lat_notifier);
86static struct pm_qos_object network_lat_pm_qos = { 78static struct pm_qos_object network_lat_pm_qos = {
87 .requests = {LIST_HEAD_INIT(network_lat_pm_qos.requests.list)}, 79 .requests = PLIST_HEAD_INIT(network_lat_pm_qos.requests, pm_qos_lock),
88 .notifiers = &network_lat_notifier, 80 .notifiers = &network_lat_notifier,
89 .name = "network_latency", 81 .name = "network_latency",
90 .default_value = 2000 * USEC_PER_SEC, 82 .default_value = 2000 * USEC_PER_SEC,
91 .target_value = ATOMIC_INIT(2000 * USEC_PER_SEC), 83 .type = PM_QOS_MIN
92 .comparitor = min_compare
93}; 84};
94 85
95 86
96static BLOCKING_NOTIFIER_HEAD(network_throughput_notifier); 87static BLOCKING_NOTIFIER_HEAD(network_throughput_notifier);
97static struct pm_qos_object network_throughput_pm_qos = { 88static struct pm_qos_object network_throughput_pm_qos = {
98 .requests = {LIST_HEAD_INIT(network_throughput_pm_qos.requests.list)}, 89 .requests = PLIST_HEAD_INIT(network_throughput_pm_qos.requests, pm_qos_lock),
99 .notifiers = &network_throughput_notifier, 90 .notifiers = &network_throughput_notifier,
100 .name = "network_throughput", 91 .name = "network_throughput",
101 .default_value = 0, 92 .default_value = 0,
102 .target_value = ATOMIC_INIT(0), 93 .type = PM_QOS_MAX,
103 .comparitor = max_compare
104}; 94};
105 95
106 96
@@ -111,8 +101,6 @@ static struct pm_qos_object *pm_qos_array[] = {
111 &network_throughput_pm_qos 101 &network_throughput_pm_qos
112}; 102};
113 103
114static DEFINE_SPINLOCK(pm_qos_lock);
115
116static ssize_t pm_qos_power_write(struct file *filp, const char __user *buf, 104static ssize_t pm_qos_power_write(struct file *filp, const char __user *buf,
117 size_t count, loff_t *f_pos); 105 size_t count, loff_t *f_pos);
118static int pm_qos_power_open(struct inode *inode, struct file *filp); 106static int pm_qos_power_open(struct inode *inode, struct file *filp);
@@ -124,46 +112,55 @@ static const struct file_operations pm_qos_power_fops = {
124 .release = pm_qos_power_release, 112 .release = pm_qos_power_release,
125}; 113};
126 114
127/* static helper functions */ 115/* unlocked internal variant */
128static s32 max_compare(s32 v1, s32 v2) 116static inline int pm_qos_get_value(struct pm_qos_object *o)
129{ 117{
130 return max(v1, v2); 118 if (plist_head_empty(&o->requests))
131} 119 return o->default_value;
132 120
133static s32 min_compare(s32 v1, s32 v2) 121 switch (o->type) {
134{ 122 case PM_QOS_MIN:
135 return min(v1, v2); 123 return plist_last(&o->requests)->prio;
136}
137 124
125 case PM_QOS_MAX:
126 return plist_first(&o->requests)->prio;
138 127
139static void update_target(int pm_qos_class) 128 default:
129 /* runtime check for not using enum */
130 BUG();
131 }
132}
133
134static void update_target(struct pm_qos_object *o, struct plist_node *node,
135 int del, int value)
140{ 136{
141 s32 extreme_value;
142 struct pm_qos_request_list *node;
143 unsigned long flags; 137 unsigned long flags;
144 int call_notifier = 0; 138 int prev_value, curr_value;
145 139
146 spin_lock_irqsave(&pm_qos_lock, flags); 140 spin_lock_irqsave(&pm_qos_lock, flags);
147 extreme_value = pm_qos_array[pm_qos_class]->default_value; 141 prev_value = pm_qos_get_value(o);
148 list_for_each_entry(node, 142 /* PM_QOS_DEFAULT_VALUE is a signal that the value is unchanged */
149 &pm_qos_array[pm_qos_class]->requests.list, list) { 143 if (value != PM_QOS_DEFAULT_VALUE) {
150 extreme_value = pm_qos_array[pm_qos_class]->comparitor( 144 /*
151 extreme_value, node->value); 145 * to change the list, we atomically remove, reinit
152 } 146 * with new value and add, then see if the extremal
153 if (atomic_read(&pm_qos_array[pm_qos_class]->target_value) != 147 * changed
154 extreme_value) { 148 */
155 call_notifier = 1; 149 plist_del(node, &o->requests);
156 atomic_set(&pm_qos_array[pm_qos_class]->target_value, 150 plist_node_init(node, value);
157 extreme_value); 151 plist_add(node, &o->requests);
158 pr_debug(KERN_ERR "new target for qos %d is %d\n", pm_qos_class, 152 } else if (del) {
159 atomic_read(&pm_qos_array[pm_qos_class]->target_value)); 153 plist_del(node, &o->requests);
154 } else {
155 plist_add(node, &o->requests);
160 } 156 }
157 curr_value = pm_qos_get_value(o);
161 spin_unlock_irqrestore(&pm_qos_lock, flags); 158 spin_unlock_irqrestore(&pm_qos_lock, flags);
162 159
163 if (call_notifier) 160 if (prev_value != curr_value)
164 blocking_notifier_call_chain( 161 blocking_notifier_call_chain(o->notifiers,
165 pm_qos_array[pm_qos_class]->notifiers, 162 (unsigned long)curr_value,
166 (unsigned long) extreme_value, NULL); 163 NULL);
167} 164}
168 165
169static int register_pm_qos_misc(struct pm_qos_object *qos) 166static int register_pm_qos_misc(struct pm_qos_object *qos)
@@ -196,10 +193,23 @@ static int find_pm_qos_object_by_minor(int minor)
196 */ 193 */
197int pm_qos_request(int pm_qos_class) 194int pm_qos_request(int pm_qos_class)
198{ 195{
199 return atomic_read(&pm_qos_array[pm_qos_class]->target_value); 196 unsigned long flags;
197 int value;
198
199 spin_lock_irqsave(&pm_qos_lock, flags);
200 value = pm_qos_get_value(pm_qos_array[pm_qos_class]);
201 spin_unlock_irqrestore(&pm_qos_lock, flags);
202
203 return value;
200} 204}
201EXPORT_SYMBOL_GPL(pm_qos_request); 205EXPORT_SYMBOL_GPL(pm_qos_request);
202 206
207int pm_qos_request_active(struct pm_qos_request_list *req)
208{
209 return req->pm_qos_class != 0;
210}
211EXPORT_SYMBOL_GPL(pm_qos_request_active);
212
203/** 213/**
204 * pm_qos_add_request - inserts new qos request into the list 214 * pm_qos_add_request - inserts new qos request into the list
205 * @pm_qos_class: identifies which list of qos request to us 215 * @pm_qos_class: identifies which list of qos request to us
@@ -211,27 +221,23 @@ EXPORT_SYMBOL_GPL(pm_qos_request);
211 * element as a handle for use in updating and removal. Call needs to save 221 * element as a handle for use in updating and removal. Call needs to save
212 * this handle for later use. 222 * this handle for later use.
213 */ 223 */
214struct pm_qos_request_list *pm_qos_add_request(int pm_qos_class, s32 value) 224void pm_qos_add_request(struct pm_qos_request_list *dep,
225 int pm_qos_class, s32 value)
215{ 226{
216 struct pm_qos_request_list *dep; 227 struct pm_qos_object *o = pm_qos_array[pm_qos_class];
217 unsigned long flags; 228 int new_value;
218 229
219 dep = kzalloc(sizeof(struct pm_qos_request_list), GFP_KERNEL); 230 if (pm_qos_request_active(dep)) {
220 if (dep) { 231 WARN(1, KERN_ERR "pm_qos_add_request() called for already added request\n");
221 if (value == PM_QOS_DEFAULT_VALUE) 232 return;
222 dep->value = pm_qos_array[pm_qos_class]->default_value;
223 else
224 dep->value = value;
225 dep->pm_qos_class = pm_qos_class;
226
227 spin_lock_irqsave(&pm_qos_lock, flags);
228 list_add(&dep->list,
229 &pm_qos_array[pm_qos_class]->requests.list);
230 spin_unlock_irqrestore(&pm_qos_lock, flags);
231 update_target(pm_qos_class);
232 } 233 }
233 234 if (value == PM_QOS_DEFAULT_VALUE)
234 return dep; 235 new_value = o->default_value;
236 else
237 new_value = value;
238 plist_node_init(&dep->list, new_value);
239 dep->pm_qos_class = pm_qos_class;
240 update_target(o, &dep->list, 0, PM_QOS_DEFAULT_VALUE);
235} 241}
236EXPORT_SYMBOL_GPL(pm_qos_add_request); 242EXPORT_SYMBOL_GPL(pm_qos_add_request);
237 243
@@ -246,27 +252,28 @@ EXPORT_SYMBOL_GPL(pm_qos_add_request);
246 * Attempts are made to make this code callable on hot code paths. 252 * Attempts are made to make this code callable on hot code paths.
247 */ 253 */
248void pm_qos_update_request(struct pm_qos_request_list *pm_qos_req, 254void pm_qos_update_request(struct pm_qos_request_list *pm_qos_req,
249 s32 new_value) 255 s32 new_value)
250{ 256{
251 unsigned long flags;
252 int pending_update = 0;
253 s32 temp; 257 s32 temp;
258 struct pm_qos_object *o;
259
260 if (!pm_qos_req) /*guard against callers passing in null */
261 return;
254 262
255 if (pm_qos_req) { /*guard against callers passing in null */ 263 if (!pm_qos_request_active(pm_qos_req)) {
256 spin_lock_irqsave(&pm_qos_lock, flags); 264 WARN(1, KERN_ERR "pm_qos_update_request() called for unknown object\n");
257 if (new_value == PM_QOS_DEFAULT_VALUE) 265 return;
258 temp = pm_qos_array[pm_qos_req->pm_qos_class]->default_value;
259 else
260 temp = new_value;
261
262 if (temp != pm_qos_req->value) {
263 pending_update = 1;
264 pm_qos_req->value = temp;
265 }
266 spin_unlock_irqrestore(&pm_qos_lock, flags);
267 if (pending_update)
268 update_target(pm_qos_req->pm_qos_class);
269 } 266 }
267
268 o = pm_qos_array[pm_qos_req->pm_qos_class];
269
270 if (new_value == PM_QOS_DEFAULT_VALUE)
271 temp = o->default_value;
272 else
273 temp = new_value;
274
275 if (temp != pm_qos_req->list.prio)
276 update_target(o, &pm_qos_req->list, 0, temp);
270} 277}
271EXPORT_SYMBOL_GPL(pm_qos_update_request); 278EXPORT_SYMBOL_GPL(pm_qos_update_request);
272 279
@@ -280,19 +287,20 @@ EXPORT_SYMBOL_GPL(pm_qos_update_request);
280 */ 287 */
281void pm_qos_remove_request(struct pm_qos_request_list *pm_qos_req) 288void pm_qos_remove_request(struct pm_qos_request_list *pm_qos_req)
282{ 289{
283 unsigned long flags; 290 struct pm_qos_object *o;
284 int qos_class;
285 291
286 if (pm_qos_req == NULL) 292 if (pm_qos_req == NULL)
287 return; 293 return;
288 /* silent return to keep pcm code cleaner */ 294 /* silent return to keep pcm code cleaner */
289 295
290 qos_class = pm_qos_req->pm_qos_class; 296 if (!pm_qos_request_active(pm_qos_req)) {
291 spin_lock_irqsave(&pm_qos_lock, flags); 297 WARN(1, KERN_ERR "pm_qos_remove_request() called for unknown object\n");
292 list_del(&pm_qos_req->list); 298 return;
293 kfree(pm_qos_req); 299 }
294 spin_unlock_irqrestore(&pm_qos_lock, flags); 300
295 update_target(qos_class); 301 o = pm_qos_array[pm_qos_req->pm_qos_class];
302 update_target(o, &pm_qos_req->list, 1, PM_QOS_DEFAULT_VALUE);
303 memset(pm_qos_req, 0, sizeof(*pm_qos_req));
296} 304}
297EXPORT_SYMBOL_GPL(pm_qos_remove_request); 305EXPORT_SYMBOL_GPL(pm_qos_remove_request);
298 306
@@ -340,8 +348,12 @@ static int pm_qos_power_open(struct inode *inode, struct file *filp)
340 348
341 pm_qos_class = find_pm_qos_object_by_minor(iminor(inode)); 349 pm_qos_class = find_pm_qos_object_by_minor(iminor(inode));
342 if (pm_qos_class >= 0) { 350 if (pm_qos_class >= 0) {
343 filp->private_data = (void *) pm_qos_add_request(pm_qos_class, 351 struct pm_qos_request_list *req = kzalloc(GFP_KERNEL, sizeof(*req));
344 PM_QOS_DEFAULT_VALUE); 352 if (!req)
353 return -ENOMEM;
354
355 pm_qos_add_request(req, pm_qos_class, PM_QOS_DEFAULT_VALUE);
356 filp->private_data = req;
345 357
346 if (filp->private_data) 358 if (filp->private_data)
347 return 0; 359 return 0;
@@ -353,8 +365,9 @@ static int pm_qos_power_release(struct inode *inode, struct file *filp)
353{ 365{
354 struct pm_qos_request_list *req; 366 struct pm_qos_request_list *req;
355 367
356 req = (struct pm_qos_request_list *)filp->private_data; 368 req = filp->private_data;
357 pm_qos_remove_request(req); 369 pm_qos_remove_request(req);
370 kfree(req);
358 371
359 return 0; 372 return 0;
360} 373}
diff --git a/kernel/power/hibernate.c b/kernel/power/hibernate.c
index aa9e916da4d5..8dc31e02ae12 100644
--- a/kernel/power/hibernate.c
+++ b/kernel/power/hibernate.c
@@ -3,7 +3,7 @@
3 * 3 *
4 * Copyright (c) 2003 Patrick Mochel 4 * Copyright (c) 2003 Patrick Mochel
5 * Copyright (c) 2003 Open Source Development Lab 5 * Copyright (c) 2003 Open Source Development Lab
6 * Copyright (c) 2004 Pavel Machek <pavel@suse.cz> 6 * Copyright (c) 2004 Pavel Machek <pavel@ucw.cz>
7 * Copyright (c) 2009 Rafael J. Wysocki, Novell Inc. 7 * Copyright (c) 2009 Rafael J. Wysocki, Novell Inc.
8 * 8 *
9 * This file is released under the GPLv2. 9 * This file is released under the GPLv2.
@@ -277,7 +277,7 @@ static int create_image(int platform_mode)
277 goto Enable_irqs; 277 goto Enable_irqs;
278 } 278 }
279 279
280 if (hibernation_test(TEST_CORE)) 280 if (hibernation_test(TEST_CORE) || !pm_check_wakeup_events())
281 goto Power_up; 281 goto Power_up;
282 282
283 in_suspend = 1; 283 in_suspend = 1;
@@ -288,8 +288,10 @@ static int create_image(int platform_mode)
288 error); 288 error);
289 /* Restore control flow magically appears here */ 289 /* Restore control flow magically appears here */
290 restore_processor_state(); 290 restore_processor_state();
291 if (!in_suspend) 291 if (!in_suspend) {
292 events_check_enabled = false;
292 platform_leave(platform_mode); 293 platform_leave(platform_mode);
294 }
293 295
294 Power_up: 296 Power_up:
295 sysdev_resume(); 297 sysdev_resume();
@@ -328,7 +330,7 @@ int hibernation_snapshot(int platform_mode)
328 330
329 error = platform_begin(platform_mode); 331 error = platform_begin(platform_mode);
330 if (error) 332 if (error)
331 return error; 333 goto Close;
332 334
333 /* Preallocate image memory before shutting down devices. */ 335 /* Preallocate image memory before shutting down devices. */
334 error = hibernate_preallocate_memory(); 336 error = hibernate_preallocate_memory();
@@ -511,18 +513,24 @@ int hibernation_platform_enter(void)
511 513
512 local_irq_disable(); 514 local_irq_disable();
513 sysdev_suspend(PMSG_HIBERNATE); 515 sysdev_suspend(PMSG_HIBERNATE);
516 if (!pm_check_wakeup_events()) {
517 error = -EAGAIN;
518 goto Power_up;
519 }
520
514 hibernation_ops->enter(); 521 hibernation_ops->enter();
515 /* We should never get here */ 522 /* We should never get here */
516 while (1); 523 while (1);
517 524
518 /* 525 Power_up:
519 * We don't need to reenable the nonboot CPUs or resume consoles, since 526 sysdev_resume();
520 * the system is going to be halted anyway. 527 local_irq_enable();
521 */ 528 enable_nonboot_cpus();
529
522 Platform_finish: 530 Platform_finish:
523 hibernation_ops->finish(); 531 hibernation_ops->finish();
524 532
525 dpm_suspend_noirq(PMSG_RESTORE); 533 dpm_resume_noirq(PMSG_RESTORE);
526 534
527 Resume_devices: 535 Resume_devices:
528 entering_platform_hibernation = false; 536 entering_platform_hibernation = false;
diff --git a/kernel/power/main.c b/kernel/power/main.c
index b58800b21fc0..62b0bc6e4983 100644
--- a/kernel/power/main.c
+++ b/kernel/power/main.c
@@ -204,6 +204,60 @@ static ssize_t state_store(struct kobject *kobj, struct kobj_attribute *attr,
204 204
205power_attr(state); 205power_attr(state);
206 206
207#ifdef CONFIG_PM_SLEEP
208/*
209 * The 'wakeup_count' attribute, along with the functions defined in
210 * drivers/base/power/wakeup.c, provides a means by which wakeup events can be
211 * handled in a non-racy way.
212 *
213 * If a wakeup event occurs when the system is in a sleep state, it simply is
214 * woken up. In turn, if an event that would wake the system up from a sleep
215 * state occurs when it is undergoing a transition to that sleep state, the
216 * transition should be aborted. Moreover, if such an event occurs when the
217 * system is in the working state, an attempt to start a transition to the
218 * given sleep state should fail during certain period after the detection of
219 * the event. Using the 'state' attribute alone is not sufficient to satisfy
220 * these requirements, because a wakeup event may occur exactly when 'state'
221 * is being written to and may be delivered to user space right before it is
222 * frozen, so the event will remain only partially processed until the system is
223 * woken up by another event. In particular, it won't cause the transition to
224 * a sleep state to be aborted.
225 *
226 * This difficulty may be overcome if user space uses 'wakeup_count' before
227 * writing to 'state'. It first should read from 'wakeup_count' and store
228 * the read value. Then, after carrying out its own preparations for the system
229 * transition to a sleep state, it should write the stored value to
230 * 'wakeup_count'. If that fails, at least one wakeup event has occured since
231 * 'wakeup_count' was read and 'state' should not be written to. Otherwise, it
232 * is allowed to write to 'state', but the transition will be aborted if there
233 * are any wakeup events detected after 'wakeup_count' was written to.
234 */
235
236static ssize_t wakeup_count_show(struct kobject *kobj,
237 struct kobj_attribute *attr,
238 char *buf)
239{
240 unsigned long val;
241
242 return pm_get_wakeup_count(&val) ? sprintf(buf, "%lu\n", val) : -EINTR;
243}
244
245static ssize_t wakeup_count_store(struct kobject *kobj,
246 struct kobj_attribute *attr,
247 const char *buf, size_t n)
248{
249 unsigned long val;
250
251 if (sscanf(buf, "%lu", &val) == 1) {
252 if (pm_save_wakeup_count(val))
253 return n;
254 }
255 return -EINVAL;
256}
257
258power_attr(wakeup_count);
259#endif /* CONFIG_PM_SLEEP */
260
207#ifdef CONFIG_PM_TRACE 261#ifdef CONFIG_PM_TRACE
208int pm_trace_enabled; 262int pm_trace_enabled;
209 263
@@ -236,6 +290,7 @@ static struct attribute * g[] = {
236#endif 290#endif
237#ifdef CONFIG_PM_SLEEP 291#ifdef CONFIG_PM_SLEEP
238 &pm_async_attr.attr, 292 &pm_async_attr.attr,
293 &wakeup_count_attr.attr,
239#ifdef CONFIG_PM_DEBUG 294#ifdef CONFIG_PM_DEBUG
240 &pm_test_attr.attr, 295 &pm_test_attr.attr,
241#endif 296#endif
diff --git a/kernel/power/snapshot.c b/kernel/power/snapshot.c
index 25ce010e9f8b..f6cd6faf84fd 100644
--- a/kernel/power/snapshot.c
+++ b/kernel/power/snapshot.c
@@ -3,7 +3,7 @@
3 * 3 *
4 * This file provides system snapshot/restore functionality for swsusp. 4 * This file provides system snapshot/restore functionality for swsusp.
5 * 5 *
6 * Copyright (C) 1998-2005 Pavel Machek <pavel@suse.cz> 6 * Copyright (C) 1998-2005 Pavel Machek <pavel@ucw.cz>
7 * Copyright (C) 2006 Rafael J. Wysocki <rjw@sisk.pl> 7 * Copyright (C) 2006 Rafael J. Wysocki <rjw@sisk.pl>
8 * 8 *
9 * This file is released under the GPLv2. 9 * This file is released under the GPLv2.
diff --git a/kernel/power/suspend.c b/kernel/power/suspend.c
index f37cb7dd4402..7335952ee473 100644
--- a/kernel/power/suspend.c
+++ b/kernel/power/suspend.c
@@ -136,19 +136,19 @@ static int suspend_enter(suspend_state_t state)
136 if (suspend_ops->prepare) { 136 if (suspend_ops->prepare) {
137 error = suspend_ops->prepare(); 137 error = suspend_ops->prepare();
138 if (error) 138 if (error)
139 return error; 139 goto Platform_finish;
140 } 140 }
141 141
142 error = dpm_suspend_noirq(PMSG_SUSPEND); 142 error = dpm_suspend_noirq(PMSG_SUSPEND);
143 if (error) { 143 if (error) {
144 printk(KERN_ERR "PM: Some devices failed to power down\n"); 144 printk(KERN_ERR "PM: Some devices failed to power down\n");
145 goto Platfrom_finish; 145 goto Platform_finish;
146 } 146 }
147 147
148 if (suspend_ops->prepare_late) { 148 if (suspend_ops->prepare_late) {
149 error = suspend_ops->prepare_late(); 149 error = suspend_ops->prepare_late();
150 if (error) 150 if (error)
151 goto Power_up_devices; 151 goto Platform_wake;
152 } 152 }
153 153
154 if (suspend_test(TEST_PLATFORM)) 154 if (suspend_test(TEST_PLATFORM))
@@ -163,8 +163,10 @@ static int suspend_enter(suspend_state_t state)
163 163
164 error = sysdev_suspend(PMSG_SUSPEND); 164 error = sysdev_suspend(PMSG_SUSPEND);
165 if (!error) { 165 if (!error) {
166 if (!suspend_test(TEST_CORE)) 166 if (!suspend_test(TEST_CORE) && pm_check_wakeup_events()) {
167 error = suspend_ops->enter(state); 167 error = suspend_ops->enter(state);
168 events_check_enabled = false;
169 }
168 sysdev_resume(); 170 sysdev_resume();
169 } 171 }
170 172
@@ -178,10 +180,9 @@ static int suspend_enter(suspend_state_t state)
178 if (suspend_ops->wake) 180 if (suspend_ops->wake)
179 suspend_ops->wake(); 181 suspend_ops->wake();
180 182
181 Power_up_devices:
182 dpm_resume_noirq(PMSG_RESUME); 183 dpm_resume_noirq(PMSG_RESUME);
183 184
184 Platfrom_finish: 185 Platform_finish:
185 if (suspend_ops->finish) 186 if (suspend_ops->finish)
186 suspend_ops->finish(); 187 suspend_ops->finish();
187 188
diff --git a/kernel/power/swap.c b/kernel/power/swap.c
index b0bb21778391..e6a5bdf61a37 100644
--- a/kernel/power/swap.c
+++ b/kernel/power/swap.c
@@ -4,7 +4,7 @@
4 * This file provides functions for reading the suspend image from 4 * This file provides functions for reading the suspend image from
5 * and writing it to a swap partition. 5 * and writing it to a swap partition.
6 * 6 *
7 * Copyright (C) 1998,2001-2005 Pavel Machek <pavel@suse.cz> 7 * Copyright (C) 1998,2001-2005 Pavel Machek <pavel@ucw.cz>
8 * Copyright (C) 2006 Rafael J. Wysocki <rjw@sisk.pl> 8 * Copyright (C) 2006 Rafael J. Wysocki <rjw@sisk.pl>
9 * 9 *
10 * This file is released under the GPLv2. 10 * This file is released under the GPLv2.
@@ -32,7 +32,7 @@
32/* 32/*
33 * The swap map is a data structure used for keeping track of each page 33 * The swap map is a data structure used for keeping track of each page
34 * written to a swap partition. It consists of many swap_map_page 34 * written to a swap partition. It consists of many swap_map_page
35 * structures that contain each an array of MAP_PAGE_SIZE swap entries. 35 * structures that contain each an array of MAP_PAGE_ENTRIES swap entries.
36 * These structures are stored on the swap and linked together with the 36 * These structures are stored on the swap and linked together with the
37 * help of the .next_swap member. 37 * help of the .next_swap member.
38 * 38 *
@@ -148,7 +148,7 @@ sector_t alloc_swapdev_block(int swap)
148 148
149/** 149/**
150 * free_all_swap_pages - free swap pages allocated for saving image data. 150 * free_all_swap_pages - free swap pages allocated for saving image data.
151 * It also frees the extents used to register which swap entres had been 151 * It also frees the extents used to register which swap entries had been
152 * allocated. 152 * allocated.
153 */ 153 */
154 154
diff --git a/kernel/printk.c b/kernel/printk.c
index 444b770c9595..4ab0164bcf84 100644
--- a/kernel/printk.c
+++ b/kernel/printk.c
@@ -37,6 +37,8 @@
37#include <linux/ratelimit.h> 37#include <linux/ratelimit.h>
38#include <linux/kmsg_dump.h> 38#include <linux/kmsg_dump.h>
39#include <linux/syslog.h> 39#include <linux/syslog.h>
40#include <linux/cpu.h>
41#include <linux/notifier.h>
40 42
41#include <asm/uaccess.h> 43#include <asm/uaccess.h>
42 44
@@ -985,6 +987,32 @@ void resume_console(void)
985} 987}
986 988
987/** 989/**
990 * console_cpu_notify - print deferred console messages after CPU hotplug
991 * @self: notifier struct
992 * @action: CPU hotplug event
993 * @hcpu: unused
994 *
995 * If printk() is called from a CPU that is not online yet, the messages
996 * will be spooled but will not show up on the console. This function is
997 * called when a new CPU comes online (or fails to come up), and ensures
998 * that any such output gets printed.
999 */
1000static int __cpuinit console_cpu_notify(struct notifier_block *self,
1001 unsigned long action, void *hcpu)
1002{
1003 switch (action) {
1004 case CPU_ONLINE:
1005 case CPU_DEAD:
1006 case CPU_DYING:
1007 case CPU_DOWN_FAILED:
1008 case CPU_UP_CANCELED:
1009 acquire_console_sem();
1010 release_console_sem();
1011 }
1012 return NOTIFY_OK;
1013}
1014
1015/**
988 * acquire_console_sem - lock the console system for exclusive use. 1016 * acquire_console_sem - lock the console system for exclusive use.
989 * 1017 *
990 * Acquires a semaphore which guarantees that the caller has 1018 * Acquires a semaphore which guarantees that the caller has
@@ -1371,7 +1399,7 @@ int unregister_console(struct console *console)
1371} 1399}
1372EXPORT_SYMBOL(unregister_console); 1400EXPORT_SYMBOL(unregister_console);
1373 1401
1374static int __init disable_boot_consoles(void) 1402static int __init printk_late_init(void)
1375{ 1403{
1376 struct console *con; 1404 struct console *con;
1377 1405
@@ -1382,9 +1410,10 @@ static int __init disable_boot_consoles(void)
1382 unregister_console(con); 1410 unregister_console(con);
1383 } 1411 }
1384 } 1412 }
1413 hotcpu_notifier(console_cpu_notify, 0);
1385 return 0; 1414 return 0;
1386} 1415}
1387late_initcall(disable_boot_consoles); 1416late_initcall(printk_late_init);
1388 1417
1389#if defined CONFIG_PRINTK 1418#if defined CONFIG_PRINTK
1390 1419
diff --git a/kernel/rcupdate.c b/kernel/rcupdate.c
index 72a8dc9567f5..4d169835fb36 100644
--- a/kernel/rcupdate.c
+++ b/kernel/rcupdate.c
@@ -114,3 +114,163 @@ int rcu_my_thread_group_empty(void)
114} 114}
115EXPORT_SYMBOL_GPL(rcu_my_thread_group_empty); 115EXPORT_SYMBOL_GPL(rcu_my_thread_group_empty);
116#endif /* #ifdef CONFIG_PROVE_RCU */ 116#endif /* #ifdef CONFIG_PROVE_RCU */
117
118#ifdef CONFIG_DEBUG_OBJECTS_RCU_HEAD
119static inline void debug_init_rcu_head(struct rcu_head *head)
120{
121 debug_object_init(head, &rcuhead_debug_descr);
122}
123
124static inline void debug_rcu_head_free(struct rcu_head *head)
125{
126 debug_object_free(head, &rcuhead_debug_descr);
127}
128
129/*
130 * fixup_init is called when:
131 * - an active object is initialized
132 */
133static int rcuhead_fixup_init(void *addr, enum debug_obj_state state)
134{
135 struct rcu_head *head = addr;
136
137 switch (state) {
138 case ODEBUG_STATE_ACTIVE:
139 /*
140 * Ensure that queued callbacks are all executed.
141 * If we detect that we are nested in a RCU read-side critical
142 * section, we should simply fail, otherwise we would deadlock.
143 */
144 if (rcu_preempt_depth() != 0 || preempt_count() != 0 ||
145 irqs_disabled()) {
146 WARN_ON(1);
147 return 0;
148 }
149 rcu_barrier();
150 rcu_barrier_sched();
151 rcu_barrier_bh();
152 debug_object_init(head, &rcuhead_debug_descr);
153 return 1;
154 default:
155 return 0;
156 }
157}
158
159/*
160 * fixup_activate is called when:
161 * - an active object is activated
162 * - an unknown object is activated (might be a statically initialized object)
163 * Activation is performed internally by call_rcu().
164 */
165static int rcuhead_fixup_activate(void *addr, enum debug_obj_state state)
166{
167 struct rcu_head *head = addr;
168
169 switch (state) {
170
171 case ODEBUG_STATE_NOTAVAILABLE:
172 /*
173 * This is not really a fixup. We just make sure that it is
174 * tracked in the object tracker.
175 */
176 debug_object_init(head, &rcuhead_debug_descr);
177 debug_object_activate(head, &rcuhead_debug_descr);
178 return 0;
179
180 case ODEBUG_STATE_ACTIVE:
181 /*
182 * Ensure that queued callbacks are all executed.
183 * If we detect that we are nested in a RCU read-side critical
184 * section, we should simply fail, otherwise we would deadlock.
185 */
186 if (rcu_preempt_depth() != 0 || preempt_count() != 0 ||
187 irqs_disabled()) {
188 WARN_ON(1);
189 return 0;
190 }
191 rcu_barrier();
192 rcu_barrier_sched();
193 rcu_barrier_bh();
194 debug_object_activate(head, &rcuhead_debug_descr);
195 return 1;
196 default:
197 return 0;
198 }
199}
200
201/*
202 * fixup_free is called when:
203 * - an active object is freed
204 */
205static int rcuhead_fixup_free(void *addr, enum debug_obj_state state)
206{
207 struct rcu_head *head = addr;
208
209 switch (state) {
210 case ODEBUG_STATE_ACTIVE:
211 /*
212 * Ensure that queued callbacks are all executed.
213 * If we detect that we are nested in a RCU read-side critical
214 * section, we should simply fail, otherwise we would deadlock.
215 */
216#ifndef CONFIG_PREEMPT
217 WARN_ON(1);
218 return 0;
219#else
220 if (rcu_preempt_depth() != 0 || preempt_count() != 0 ||
221 irqs_disabled()) {
222 WARN_ON(1);
223 return 0;
224 }
225 rcu_barrier();
226 rcu_barrier_sched();
227 rcu_barrier_bh();
228 debug_object_free(head, &rcuhead_debug_descr);
229 return 1;
230#endif
231 default:
232 return 0;
233 }
234}
235
236/**
237 * init_rcu_head_on_stack() - initialize on-stack rcu_head for debugobjects
238 * @head: pointer to rcu_head structure to be initialized
239 *
240 * This function informs debugobjects of a new rcu_head structure that
241 * has been allocated as an auto variable on the stack. This function
242 * is not required for rcu_head structures that are statically defined or
243 * that are dynamically allocated on the heap. This function has no
244 * effect for !CONFIG_DEBUG_OBJECTS_RCU_HEAD kernel builds.
245 */
246void init_rcu_head_on_stack(struct rcu_head *head)
247{
248 debug_object_init_on_stack(head, &rcuhead_debug_descr);
249}
250EXPORT_SYMBOL_GPL(init_rcu_head_on_stack);
251
252/**
253 * destroy_rcu_head_on_stack() - destroy on-stack rcu_head for debugobjects
254 * @head: pointer to rcu_head structure to be initialized
255 *
256 * This function informs debugobjects that an on-stack rcu_head structure
257 * is about to go out of scope. As with init_rcu_head_on_stack(), this
258 * function is not required for rcu_head structures that are statically
259 * defined or that are dynamically allocated on the heap. Also as with
260 * init_rcu_head_on_stack(), this function has no effect for
261 * !CONFIG_DEBUG_OBJECTS_RCU_HEAD kernel builds.
262 */
263void destroy_rcu_head_on_stack(struct rcu_head *head)
264{
265 debug_object_free(head, &rcuhead_debug_descr);
266}
267EXPORT_SYMBOL_GPL(destroy_rcu_head_on_stack);
268
269struct debug_obj_descr rcuhead_debug_descr = {
270 .name = "rcu_head",
271 .fixup_init = rcuhead_fixup_init,
272 .fixup_activate = rcuhead_fixup_activate,
273 .fixup_free = rcuhead_fixup_free,
274};
275EXPORT_SYMBOL_GPL(rcuhead_debug_descr);
276#endif /* #ifdef CONFIG_DEBUG_OBJECTS_RCU_HEAD */
diff --git a/kernel/rcutiny.c b/kernel/rcutiny.c
index 38729d3cd236..196ec02f8be0 100644
--- a/kernel/rcutiny.c
+++ b/kernel/rcutiny.c
@@ -169,6 +169,7 @@ static void __rcu_process_callbacks(struct rcu_ctrlblk *rcp)
169 while (list) { 169 while (list) {
170 next = list->next; 170 next = list->next;
171 prefetch(next); 171 prefetch(next);
172 debug_rcu_head_unqueue(list);
172 list->func(list); 173 list->func(list);
173 list = next; 174 list = next;
174 } 175 }
@@ -211,6 +212,7 @@ static void __call_rcu(struct rcu_head *head,
211{ 212{
212 unsigned long flags; 213 unsigned long flags;
213 214
215 debug_rcu_head_queue(head);
214 head->func = func; 216 head->func = func;
215 head->next = NULL; 217 head->next = NULL;
216 218
diff --git a/kernel/rcutree.c b/kernel/rcutree.c
index d4437345706f..d5bc43976c5a 100644
--- a/kernel/rcutree.c
+++ b/kernel/rcutree.c
@@ -1112,6 +1112,7 @@ static void rcu_do_batch(struct rcu_state *rsp, struct rcu_data *rdp)
1112 while (list) { 1112 while (list) {
1113 next = list->next; 1113 next = list->next;
1114 prefetch(next); 1114 prefetch(next);
1115 debug_rcu_head_unqueue(list);
1115 list->func(list); 1116 list->func(list);
1116 list = next; 1117 list = next;
1117 if (++count >= rdp->blimit) 1118 if (++count >= rdp->blimit)
@@ -1388,6 +1389,7 @@ __call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *rcu),
1388 unsigned long flags; 1389 unsigned long flags;
1389 struct rcu_data *rdp; 1390 struct rcu_data *rdp;
1390 1391
1392 debug_rcu_head_queue(head);
1391 head->func = func; 1393 head->func = func;
1392 head->next = NULL; 1394 head->next = NULL;
1393 1395
diff --git a/kernel/signal.c b/kernel/signal.c
index 906ae5a1779c..bded65187780 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -637,7 +637,7 @@ static inline bool si_fromuser(const struct siginfo *info)
637 637
638/* 638/*
639 * Bad permissions for sending the signal 639 * Bad permissions for sending the signal
640 * - the caller must hold at least the RCU read lock 640 * - the caller must hold the RCU read lock
641 */ 641 */
642static int check_kill_permission(int sig, struct siginfo *info, 642static int check_kill_permission(int sig, struct siginfo *info,
643 struct task_struct *t) 643 struct task_struct *t)
@@ -1127,11 +1127,14 @@ struct sighand_struct *lock_task_sighand(struct task_struct *tsk, unsigned long
1127 1127
1128/* 1128/*
1129 * send signal info to all the members of a group 1129 * send signal info to all the members of a group
1130 * - the caller must hold the RCU read lock at least
1131 */ 1130 */
1132int group_send_sig_info(int sig, struct siginfo *info, struct task_struct *p) 1131int group_send_sig_info(int sig, struct siginfo *info, struct task_struct *p)
1133{ 1132{
1134 int ret = check_kill_permission(sig, info, p); 1133 int ret;
1134
1135 rcu_read_lock();
1136 ret = check_kill_permission(sig, info, p);
1137 rcu_read_unlock();
1135 1138
1136 if (!ret && sig) 1139 if (!ret && sig)
1137 ret = do_send_sig_info(sig, info, p, true); 1140 ret = do_send_sig_info(sig, info, p, true);
diff --git a/kernel/time/tick-broadcast.c b/kernel/time/tick-broadcast.c
index b3bafd5fc66d..48b2761b5668 100644
--- a/kernel/time/tick-broadcast.c
+++ b/kernel/time/tick-broadcast.c
@@ -188,7 +188,7 @@ static void tick_handle_periodic_broadcast(struct clock_event_device *dev)
188 /* 188 /*
189 * Setup the next period for devices, which do not have 189 * Setup the next period for devices, which do not have
190 * periodic mode. We read dev->next_event first and add to it 190 * periodic mode. We read dev->next_event first and add to it
191 * when the event alrady expired. clockevents_program_event() 191 * when the event already expired. clockevents_program_event()
192 * sets dev->next_event only when the event is really 192 * sets dev->next_event only when the event is really
193 * programmed to the device. 193 * programmed to the device.
194 */ 194 */
diff --git a/kernel/timer.c b/kernel/timer.c
index c29e2d4d2a66..6aa6f7e69ad5 100644
--- a/kernel/timer.c
+++ b/kernel/timer.c
@@ -577,6 +577,19 @@ static void __init_timer(struct timer_list *timer,
577 lockdep_init_map(&timer->lockdep_map, name, key, 0); 577 lockdep_init_map(&timer->lockdep_map, name, key, 0);
578} 578}
579 579
580void setup_deferrable_timer_on_stack_key(struct timer_list *timer,
581 const char *name,
582 struct lock_class_key *key,
583 void (*function)(unsigned long),
584 unsigned long data)
585{
586 timer->function = function;
587 timer->data = data;
588 init_timer_on_stack_key(timer, name, key);
589 timer_set_deferrable(timer);
590}
591EXPORT_SYMBOL_GPL(setup_deferrable_timer_on_stack_key);
592
580/** 593/**
581 * init_timer_key - initialize a timer 594 * init_timer_key - initialize a timer
582 * @timer: the timer to be initialized 595 * @timer: the timer to be initialized
diff --git a/kernel/trace/Makefile b/kernel/trace/Makefile
index 438e84a56ab3..53f338190b26 100644
--- a/kernel/trace/Makefile
+++ b/kernel/trace/Makefile
@@ -53,5 +53,8 @@ endif
53obj-$(CONFIG_EVENT_TRACING) += trace_events_filter.o 53obj-$(CONFIG_EVENT_TRACING) += trace_events_filter.o
54obj-$(CONFIG_KPROBE_EVENT) += trace_kprobe.o 54obj-$(CONFIG_KPROBE_EVENT) += trace_kprobe.o
55obj-$(CONFIG_EVENT_TRACING) += power-traces.o 55obj-$(CONFIG_EVENT_TRACING) += power-traces.o
56ifeq ($(CONFIG_TRACING),y)
57obj-$(CONFIG_KGDB_KDB) += trace_kdb.o
58endif
56 59
57libftrace-y := ftrace.o 60libftrace-y := ftrace.o
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index 4b1122d0df37..ed1032d6f81d 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -101,10 +101,7 @@ static inline void ftrace_enable_cpu(void)
101 preempt_enable(); 101 preempt_enable();
102} 102}
103 103
104static cpumask_var_t __read_mostly tracing_buffer_mask; 104cpumask_var_t __read_mostly tracing_buffer_mask;
105
106#define for_each_tracing_cpu(cpu) \
107 for_each_cpu(cpu, tracing_buffer_mask)
108 105
109/* 106/*
110 * ftrace_dump_on_oops - variable to dump ftrace buffer on oops 107 * ftrace_dump_on_oops - variable to dump ftrace buffer on oops
@@ -1493,11 +1490,6 @@ int trace_vprintk(unsigned long ip, const char *fmt, va_list args)
1493} 1490}
1494EXPORT_SYMBOL_GPL(trace_vprintk); 1491EXPORT_SYMBOL_GPL(trace_vprintk);
1495 1492
1496enum trace_file_type {
1497 TRACE_FILE_LAT_FMT = 1,
1498 TRACE_FILE_ANNOTATE = 2,
1499};
1500
1501static void trace_iterator_increment(struct trace_iterator *iter) 1493static void trace_iterator_increment(struct trace_iterator *iter)
1502{ 1494{
1503 /* Don't allow ftrace to trace into the ring buffers */ 1495 /* Don't allow ftrace to trace into the ring buffers */
@@ -1595,7 +1587,7 @@ struct trace_entry *trace_find_next_entry(struct trace_iterator *iter,
1595} 1587}
1596 1588
1597/* Find the next real entry, and increment the iterator to the next entry */ 1589/* Find the next real entry, and increment the iterator to the next entry */
1598static void *find_next_entry_inc(struct trace_iterator *iter) 1590void *trace_find_next_entry_inc(struct trace_iterator *iter)
1599{ 1591{
1600 iter->ent = __find_next_entry(iter, &iter->cpu, 1592 iter->ent = __find_next_entry(iter, &iter->cpu,
1601 &iter->lost_events, &iter->ts); 1593 &iter->lost_events, &iter->ts);
@@ -1630,19 +1622,19 @@ static void *s_next(struct seq_file *m, void *v, loff_t *pos)
1630 return NULL; 1622 return NULL;
1631 1623
1632 if (iter->idx < 0) 1624 if (iter->idx < 0)
1633 ent = find_next_entry_inc(iter); 1625 ent = trace_find_next_entry_inc(iter);
1634 else 1626 else
1635 ent = iter; 1627 ent = iter;
1636 1628
1637 while (ent && iter->idx < i) 1629 while (ent && iter->idx < i)
1638 ent = find_next_entry_inc(iter); 1630 ent = trace_find_next_entry_inc(iter);
1639 1631
1640 iter->pos = *pos; 1632 iter->pos = *pos;
1641 1633
1642 return ent; 1634 return ent;
1643} 1635}
1644 1636
1645static void tracing_iter_reset(struct trace_iterator *iter, int cpu) 1637void tracing_iter_reset(struct trace_iterator *iter, int cpu)
1646{ 1638{
1647 struct trace_array *tr = iter->tr; 1639 struct trace_array *tr = iter->tr;
1648 struct ring_buffer_event *event; 1640 struct ring_buffer_event *event;
@@ -2003,7 +1995,7 @@ int trace_empty(struct trace_iterator *iter)
2003} 1995}
2004 1996
2005/* Called with trace_event_read_lock() held. */ 1997/* Called with trace_event_read_lock() held. */
2006static enum print_line_t print_trace_line(struct trace_iterator *iter) 1998enum print_line_t print_trace_line(struct trace_iterator *iter)
2007{ 1999{
2008 enum print_line_t ret; 2000 enum print_line_t ret;
2009 2001
@@ -3193,7 +3185,7 @@ waitagain:
3193 3185
3194 trace_event_read_lock(); 3186 trace_event_read_lock();
3195 trace_access_lock(iter->cpu_file); 3187 trace_access_lock(iter->cpu_file);
3196 while (find_next_entry_inc(iter) != NULL) { 3188 while (trace_find_next_entry_inc(iter) != NULL) {
3197 enum print_line_t ret; 3189 enum print_line_t ret;
3198 int len = iter->seq.len; 3190 int len = iter->seq.len;
3199 3191
@@ -3276,7 +3268,7 @@ tracing_fill_pipe_page(size_t rem, struct trace_iterator *iter)
3276 if (ret != TRACE_TYPE_NO_CONSUME) 3268 if (ret != TRACE_TYPE_NO_CONSUME)
3277 trace_consume(iter); 3269 trace_consume(iter);
3278 rem -= count; 3270 rem -= count;
3279 if (!find_next_entry_inc(iter)) { 3271 if (!trace_find_next_entry_inc(iter)) {
3280 rem = 0; 3272 rem = 0;
3281 iter->ent = NULL; 3273 iter->ent = NULL;
3282 break; 3274 break;
@@ -3332,7 +3324,7 @@ static ssize_t tracing_splice_read_pipe(struct file *filp,
3332 if (ret <= 0) 3324 if (ret <= 0)
3333 goto out_err; 3325 goto out_err;
3334 3326
3335 if (!iter->ent && !find_next_entry_inc(iter)) { 3327 if (!iter->ent && !trace_find_next_entry_inc(iter)) {
3336 ret = -EFAULT; 3328 ret = -EFAULT;
3337 goto out_err; 3329 goto out_err;
3338 } 3330 }
@@ -4402,7 +4394,7 @@ static struct notifier_block trace_die_notifier = {
4402 */ 4394 */
4403#define KERN_TRACE KERN_EMERG 4395#define KERN_TRACE KERN_EMERG
4404 4396
4405static void 4397void
4406trace_printk_seq(struct trace_seq *s) 4398trace_printk_seq(struct trace_seq *s)
4407{ 4399{
4408 /* Probably should print a warning here. */ 4400 /* Probably should print a warning here. */
@@ -4417,6 +4409,13 @@ trace_printk_seq(struct trace_seq *s)
4417 trace_seq_init(s); 4409 trace_seq_init(s);
4418} 4410}
4419 4411
4412void trace_init_global_iter(struct trace_iterator *iter)
4413{
4414 iter->tr = &global_trace;
4415 iter->trace = current_trace;
4416 iter->cpu_file = TRACE_PIPE_ALL_CPU;
4417}
4418
4420static void 4419static void
4421__ftrace_dump(bool disable_tracing, enum ftrace_dump_mode oops_dump_mode) 4420__ftrace_dump(bool disable_tracing, enum ftrace_dump_mode oops_dump_mode)
4422{ 4421{
@@ -4442,8 +4441,10 @@ __ftrace_dump(bool disable_tracing, enum ftrace_dump_mode oops_dump_mode)
4442 if (disable_tracing) 4441 if (disable_tracing)
4443 ftrace_kill(); 4442 ftrace_kill();
4444 4443
4444 trace_init_global_iter(&iter);
4445
4445 for_each_tracing_cpu(cpu) { 4446 for_each_tracing_cpu(cpu) {
4446 atomic_inc(&global_trace.data[cpu]->disabled); 4447 atomic_inc(&iter.tr->data[cpu]->disabled);
4447 } 4448 }
4448 4449
4449 old_userobj = trace_flags & TRACE_ITER_SYM_USEROBJ; 4450 old_userobj = trace_flags & TRACE_ITER_SYM_USEROBJ;
@@ -4492,7 +4493,7 @@ __ftrace_dump(bool disable_tracing, enum ftrace_dump_mode oops_dump_mode)
4492 iter.iter_flags |= TRACE_FILE_LAT_FMT; 4493 iter.iter_flags |= TRACE_FILE_LAT_FMT;
4493 iter.pos = -1; 4494 iter.pos = -1;
4494 4495
4495 if (find_next_entry_inc(&iter) != NULL) { 4496 if (trace_find_next_entry_inc(&iter) != NULL) {
4496 int ret; 4497 int ret;
4497 4498
4498 ret = print_trace_line(&iter); 4499 ret = print_trace_line(&iter);
@@ -4514,7 +4515,7 @@ __ftrace_dump(bool disable_tracing, enum ftrace_dump_mode oops_dump_mode)
4514 trace_flags |= old_userobj; 4515 trace_flags |= old_userobj;
4515 4516
4516 for_each_tracing_cpu(cpu) { 4517 for_each_tracing_cpu(cpu) {
4517 atomic_dec(&global_trace.data[cpu]->disabled); 4518 atomic_dec(&iter.tr->data[cpu]->disabled);
4518 } 4519 }
4519 tracing_on(); 4520 tracing_on();
4520 } 4521 }
diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
index d05c873dd4b2..d39b3c5454a5 100644
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -314,6 +314,14 @@ struct trace_entry *tracing_get_trace_entry(struct trace_array *tr,
314struct trace_entry *trace_find_next_entry(struct trace_iterator *iter, 314struct trace_entry *trace_find_next_entry(struct trace_iterator *iter,
315 int *ent_cpu, u64 *ent_ts); 315 int *ent_cpu, u64 *ent_ts);
316 316
317int trace_empty(struct trace_iterator *iter);
318
319void *trace_find_next_entry_inc(struct trace_iterator *iter);
320
321void trace_init_global_iter(struct trace_iterator *iter);
322
323void tracing_iter_reset(struct trace_iterator *iter, int cpu);
324
317void default_wait_pipe(struct trace_iterator *iter); 325void default_wait_pipe(struct trace_iterator *iter);
318void poll_wait_pipe(struct trace_iterator *iter); 326void poll_wait_pipe(struct trace_iterator *iter);
319 327
@@ -351,6 +359,15 @@ void tracing_start_sched_switch_record(void);
351int register_tracer(struct tracer *type); 359int register_tracer(struct tracer *type);
352void unregister_tracer(struct tracer *type); 360void unregister_tracer(struct tracer *type);
353int is_tracing_stopped(void); 361int is_tracing_stopped(void);
362enum trace_file_type {
363 TRACE_FILE_LAT_FMT = 1,
364 TRACE_FILE_ANNOTATE = 2,
365};
366
367extern cpumask_var_t __read_mostly tracing_buffer_mask;
368
369#define for_each_tracing_cpu(cpu) \
370 for_each_cpu(cpu, tracing_buffer_mask)
354 371
355extern unsigned long nsecs_to_usecs(unsigned long nsecs); 372extern unsigned long nsecs_to_usecs(unsigned long nsecs);
356 373
@@ -436,6 +453,8 @@ trace_array_vprintk(struct trace_array *tr,
436 unsigned long ip, const char *fmt, va_list args); 453 unsigned long ip, const char *fmt, va_list args);
437int trace_array_printk(struct trace_array *tr, 454int trace_array_printk(struct trace_array *tr,
438 unsigned long ip, const char *fmt, ...); 455 unsigned long ip, const char *fmt, ...);
456void trace_printk_seq(struct trace_seq *s);
457enum print_line_t print_trace_line(struct trace_iterator *iter);
439 458
440extern unsigned long trace_flags; 459extern unsigned long trace_flags;
441 460
diff --git a/kernel/trace/trace_kdb.c b/kernel/trace/trace_kdb.c
new file mode 100644
index 000000000000..7b8ecd751d93
--- /dev/null
+++ b/kernel/trace/trace_kdb.c
@@ -0,0 +1,136 @@
1/*
2 * kdb helper for dumping the ftrace buffer
3 *
4 * Copyright (C) 2010 Jason Wessel <jason.wessel@windriver.com>
5 *
6 * ftrace_dump_buf based on ftrace_dump:
7 * Copyright (C) 2007-2008 Steven Rostedt <srostedt@redhat.com>
8 * Copyright (C) 2008 Ingo Molnar <mingo@redhat.com>
9 *
10 */
11#include <linux/init.h>
12#include <linux/kgdb.h>
13#include <linux/kdb.h>
14#include <linux/ftrace.h>
15
16#include "../debug/kdb/kdb_private.h"
17#include "trace.h"
18#include "trace_output.h"
19
20static void ftrace_dump_buf(int skip_lines, long cpu_file)
21{
22 /* use static because iter can be a bit big for the stack */
23 static struct trace_iterator iter;
24 unsigned int old_userobj;
25 int cnt = 0, cpu;
26
27 trace_init_global_iter(&iter);
28
29 for_each_tracing_cpu(cpu) {
30 atomic_inc(&iter.tr->data[cpu]->disabled);
31 }
32
33 old_userobj = trace_flags;
34
35 /* don't look at user memory in panic mode */
36 trace_flags &= ~TRACE_ITER_SYM_USEROBJ;
37
38 kdb_printf("Dumping ftrace buffer:\n");
39
40 /* reset all but tr, trace, and overruns */
41 memset(&iter.seq, 0,
42 sizeof(struct trace_iterator) -
43 offsetof(struct trace_iterator, seq));
44 iter.iter_flags |= TRACE_FILE_LAT_FMT;
45 iter.pos = -1;
46
47 if (cpu_file == TRACE_PIPE_ALL_CPU) {
48 for_each_tracing_cpu(cpu) {
49 iter.buffer_iter[cpu] =
50 ring_buffer_read_prepare(iter.tr->buffer, cpu);
51 ring_buffer_read_start(iter.buffer_iter[cpu]);
52 tracing_iter_reset(&iter, cpu);
53 }
54 } else {
55 iter.cpu_file = cpu_file;
56 iter.buffer_iter[cpu_file] =
57 ring_buffer_read_prepare(iter.tr->buffer, cpu_file);
58 ring_buffer_read_start(iter.buffer_iter[cpu_file]);
59 tracing_iter_reset(&iter, cpu_file);
60 }
61 if (!trace_empty(&iter))
62 trace_find_next_entry_inc(&iter);
63 while (!trace_empty(&iter)) {
64 if (!cnt)
65 kdb_printf("---------------------------------\n");
66 cnt++;
67
68 if (trace_find_next_entry_inc(&iter) != NULL && !skip_lines)
69 print_trace_line(&iter);
70 if (!skip_lines)
71 trace_printk_seq(&iter.seq);
72 else
73 skip_lines--;
74 if (KDB_FLAG(CMD_INTERRUPT))
75 goto out;
76 }
77
78 if (!cnt)
79 kdb_printf(" (ftrace buffer empty)\n");
80 else
81 kdb_printf("---------------------------------\n");
82
83out:
84 trace_flags = old_userobj;
85
86 for_each_tracing_cpu(cpu) {
87 atomic_dec(&iter.tr->data[cpu]->disabled);
88 }
89
90 for_each_tracing_cpu(cpu)
91 if (iter.buffer_iter[cpu])
92 ring_buffer_read_finish(iter.buffer_iter[cpu]);
93}
94
95/*
96 * kdb_ftdump - Dump the ftrace log buffer
97 */
98static int kdb_ftdump(int argc, const char **argv)
99{
100 int skip_lines = 0;
101 long cpu_file;
102 char *cp;
103
104 if (argc > 2)
105 return KDB_ARGCOUNT;
106
107 if (argc) {
108 skip_lines = simple_strtol(argv[1], &cp, 0);
109 if (*cp)
110 skip_lines = 0;
111 }
112
113 if (argc == 2) {
114 cpu_file = simple_strtol(argv[2], &cp, 0);
115 if (*cp || cpu_file >= NR_CPUS || cpu_file < 0 ||
116 !cpu_online(cpu_file))
117 return KDB_BADINT;
118 } else {
119 cpu_file = TRACE_PIPE_ALL_CPU;
120 }
121
122 kdb_trap_printk++;
123 ftrace_dump_buf(skip_lines, cpu_file);
124 kdb_trap_printk--;
125
126 return 0;
127}
128
129static __init int kdb_ftrace_register(void)
130{
131 kdb_register_repeat("ftdump", kdb_ftdump, "[skip_#lines] [cpu]",
132 "Dump ftrace log", 0, KDB_REPEAT_NONE);
133 return 0;
134}
135
136late_initcall(kdb_ftrace_register);
diff --git a/kernel/user_namespace.c b/kernel/user_namespace.c
index b2d70d38dff4..25915832291a 100644
--- a/kernel/user_namespace.c
+++ b/kernel/user_namespace.c
@@ -9,6 +9,7 @@
9#include <linux/nsproxy.h> 9#include <linux/nsproxy.h>
10#include <linux/slab.h> 10#include <linux/slab.h>
11#include <linux/user_namespace.h> 11#include <linux/user_namespace.h>
12#include <linux/highuid.h>
12#include <linux/cred.h> 13#include <linux/cred.h>
13 14
14/* 15/*
@@ -82,3 +83,46 @@ void free_user_ns(struct kref *kref)
82 schedule_work(&ns->destroyer); 83 schedule_work(&ns->destroyer);
83} 84}
84EXPORT_SYMBOL(free_user_ns); 85EXPORT_SYMBOL(free_user_ns);
86
87uid_t user_ns_map_uid(struct user_namespace *to, const struct cred *cred, uid_t uid)
88{
89 struct user_namespace *tmp;
90
91 if (likely(to == cred->user->user_ns))
92 return uid;
93
94
95 /* Is cred->user the creator of the target user_ns
96 * or the creator of one of it's parents?
97 */
98 for ( tmp = to; tmp != &init_user_ns;
99 tmp = tmp->creator->user_ns ) {
100 if (cred->user == tmp->creator) {
101 return (uid_t)0;
102 }
103 }
104
105 /* No useful relationship so no mapping */
106 return overflowuid;
107}
108
109gid_t user_ns_map_gid(struct user_namespace *to, const struct cred *cred, gid_t gid)
110{
111 struct user_namespace *tmp;
112
113 if (likely(to == cred->user->user_ns))
114 return gid;
115
116 /* Is cred->user the creator of the target user_ns
117 * or the creator of one of it's parents?
118 */
119 for ( tmp = to; tmp != &init_user_ns;
120 tmp = tmp->creator->user_ns ) {
121 if (cred->user == tmp->creator) {
122 return (gid_t)0;
123 }
124 }
125
126 /* No useful relationship so no mapping */
127 return overflowgid;
128}
diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index 327d2deb4451..59fef1531dd2 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -68,6 +68,21 @@ struct workqueue_struct {
68#endif 68#endif
69}; 69};
70 70
71#ifdef CONFIG_LOCKDEP
72/**
73 * in_workqueue_context() - in context of specified workqueue?
74 * @wq: the workqueue of interest
75 *
76 * Checks lockdep state to see if the current task is executing from
77 * within a workqueue item. This function exists only if lockdep is
78 * enabled.
79 */
80int in_workqueue_context(struct workqueue_struct *wq)
81{
82 return lock_is_held(&wq->lockdep_map);
83}
84#endif
85
71#ifdef CONFIG_DEBUG_OBJECTS_WORK 86#ifdef CONFIG_DEBUG_OBJECTS_WORK
72 87
73static struct debug_obj_descr work_debug_descr; 88static struct debug_obj_descr work_debug_descr;