aboutsummaryrefslogtreecommitdiffstats
path: root/kernel
diff options
context:
space:
mode:
Diffstat (limited to 'kernel')
-rw-r--r--kernel/Makefile1
-rw-r--r--kernel/acct.c2
-rw-r--r--kernel/cpuset.c125
-rw-r--r--kernel/futex.c137
-rw-r--r--kernel/intermodule.c3
-rw-r--r--kernel/irq/handle.c2
-rw-r--r--kernel/irq/manage.c4
-rw-r--r--kernel/irq/proc.c14
-rw-r--r--kernel/kprobes.c94
-rw-r--r--kernel/module.c33
-rw-r--r--kernel/params.c4
-rw-r--r--kernel/posix-timers.c28
-rw-r--r--kernel/power/Kconfig2
-rw-r--r--kernel/power/pm.c3
-rw-r--r--kernel/power/swsusp.c1
-rw-r--r--kernel/printk.c13
-rw-r--r--kernel/ptrace.c41
-rw-r--r--kernel/resource.c3
-rw-r--r--kernel/sched.c339
-rw-r--r--kernel/signal.c83
-rw-r--r--kernel/softlockup.c151
-rw-r--r--kernel/sys.c6
-rw-r--r--kernel/timer.c18
-rw-r--r--kernel/workqueue.c5
24 files changed, 887 insertions, 225 deletions
diff --git a/kernel/Makefile b/kernel/Makefile
index cb05cd05d237..8d57a2f1226b 100644
--- a/kernel/Makefile
+++ b/kernel/Makefile
@@ -27,6 +27,7 @@ obj-$(CONFIG_AUDIT) += audit.o
27obj-$(CONFIG_AUDITSYSCALL) += auditsc.o 27obj-$(CONFIG_AUDITSYSCALL) += auditsc.o
28obj-$(CONFIG_KPROBES) += kprobes.o 28obj-$(CONFIG_KPROBES) += kprobes.o
29obj-$(CONFIG_SYSFS) += ksysfs.o 29obj-$(CONFIG_SYSFS) += ksysfs.o
30obj-$(CONFIG_DETECT_SOFTLOCKUP) += softlockup.o
30obj-$(CONFIG_GENERIC_HARDIRQS) += irq/ 31obj-$(CONFIG_GENERIC_HARDIRQS) += irq/
31obj-$(CONFIG_CRASH_DUMP) += crash_dump.o 32obj-$(CONFIG_CRASH_DUMP) += crash_dump.o
32obj-$(CONFIG_SECCOMP) += seccomp.o 33obj-$(CONFIG_SECCOMP) += seccomp.o
diff --git a/kernel/acct.c b/kernel/acct.c
index 4168f631868e..f70e6027cca9 100644
--- a/kernel/acct.c
+++ b/kernel/acct.c
@@ -220,7 +220,7 @@ asmlinkage long sys_acct(const char __user *name)
220 return (PTR_ERR(tmp)); 220 return (PTR_ERR(tmp));
221 } 221 }
222 /* Difference from BSD - they don't do O_APPEND */ 222 /* Difference from BSD - they don't do O_APPEND */
223 file = filp_open(tmp, O_WRONLY|O_APPEND, 0); 223 file = filp_open(tmp, O_WRONLY|O_APPEND|O_LARGEFILE, 0);
224 putname(tmp); 224 putname(tmp);
225 if (IS_ERR(file)) { 225 if (IS_ERR(file)) {
226 return (PTR_ERR(file)); 226 return (PTR_ERR(file));
diff --git a/kernel/cpuset.c b/kernel/cpuset.c
index 8ab1b4e518b8..1f06e7690106 100644
--- a/kernel/cpuset.c
+++ b/kernel/cpuset.c
@@ -628,13 +628,6 @@ static int validate_change(const struct cpuset *cur, const struct cpuset *trial)
628 * lock_cpu_hotplug()/unlock_cpu_hotplug() pair. 628 * lock_cpu_hotplug()/unlock_cpu_hotplug() pair.
629 */ 629 */
630 630
631/*
632 * Hack to avoid 2.6.13 partial node dynamic sched domain bug.
633 * Disable letting 'cpu_exclusive' cpusets define dynamic sched
634 * domains, until the sched domain can handle partial nodes.
635 * Remove this #if hackery when sched domains fixed.
636 */
637#if 0
638static void update_cpu_domains(struct cpuset *cur) 631static void update_cpu_domains(struct cpuset *cur)
639{ 632{
640 struct cpuset *c, *par = cur->parent; 633 struct cpuset *c, *par = cur->parent;
@@ -675,11 +668,6 @@ static void update_cpu_domains(struct cpuset *cur)
675 partition_sched_domains(&pspan, &cspan); 668 partition_sched_domains(&pspan, &cspan);
676 unlock_cpu_hotplug(); 669 unlock_cpu_hotplug();
677} 670}
678#else
679static void update_cpu_domains(struct cpuset *cur)
680{
681}
682#endif
683 671
684static int update_cpumask(struct cpuset *cs, char *buf) 672static int update_cpumask(struct cpuset *cs, char *buf)
685{ 673{
@@ -1611,17 +1599,114 @@ int cpuset_zonelist_valid_mems_allowed(struct zonelist *zl)
1611 return 0; 1599 return 0;
1612} 1600}
1613 1601
1602/*
1603 * nearest_exclusive_ancestor() - Returns the nearest mem_exclusive
1604 * ancestor to the specified cpuset. Call while holding cpuset_sem.
1605 * If no ancestor is mem_exclusive (an unusual configuration), then
1606 * returns the root cpuset.
1607 */
1608static const struct cpuset *nearest_exclusive_ancestor(const struct cpuset *cs)
1609{
1610 while (!is_mem_exclusive(cs) && cs->parent)
1611 cs = cs->parent;
1612 return cs;
1613}
1614
1614/** 1615/**
1615 * cpuset_zone_allowed - is zone z allowed in current->mems_allowed 1616 * cpuset_zone_allowed - Can we allocate memory on zone z's memory node?
1616 * @z: zone in question 1617 * @z: is this zone on an allowed node?
1618 * @gfp_mask: memory allocation flags (we use __GFP_HARDWALL)
1617 * 1619 *
1618 * Is zone z allowed in current->mems_allowed, or is 1620 * If we're in interrupt, yes, we can always allocate. If zone
1619 * the CPU in interrupt context? (zone is always allowed in this case) 1621 * z's node is in our tasks mems_allowed, yes. If it's not a
1620 */ 1622 * __GFP_HARDWALL request and this zone's nodes is in the nearest
1621int cpuset_zone_allowed(struct zone *z) 1623 * mem_exclusive cpuset ancestor to this tasks cpuset, yes.
1624 * Otherwise, no.
1625 *
1626 * GFP_USER allocations are marked with the __GFP_HARDWALL bit,
1627 * and do not allow allocations outside the current tasks cpuset.
1628 * GFP_KERNEL allocations are not so marked, so can escape to the
1629 * nearest mem_exclusive ancestor cpuset.
1630 *
1631 * Scanning up parent cpusets requires cpuset_sem. The __alloc_pages()
1632 * routine only calls here with __GFP_HARDWALL bit _not_ set if
1633 * it's a GFP_KERNEL allocation, and all nodes in the current tasks
1634 * mems_allowed came up empty on the first pass over the zonelist.
1635 * So only GFP_KERNEL allocations, if all nodes in the cpuset are
1636 * short of memory, might require taking the cpuset_sem semaphore.
1637 *
1638 * The first loop over the zonelist in mm/page_alloc.c:__alloc_pages()
1639 * calls here with __GFP_HARDWALL always set in gfp_mask, enforcing
1640 * hardwall cpusets - no allocation on a node outside the cpuset is
1641 * allowed (unless in interrupt, of course).
1642 *
1643 * The second loop doesn't even call here for GFP_ATOMIC requests
1644 * (if the __alloc_pages() local variable 'wait' is set). That check
1645 * and the checks below have the combined affect in the second loop of
1646 * the __alloc_pages() routine that:
1647 * in_interrupt - any node ok (current task context irrelevant)
1648 * GFP_ATOMIC - any node ok
1649 * GFP_KERNEL - any node in enclosing mem_exclusive cpuset ok
1650 * GFP_USER - only nodes in current tasks mems allowed ok.
1651 **/
1652
1653int cpuset_zone_allowed(struct zone *z, unsigned int __nocast gfp_mask)
1622{ 1654{
1623 return in_interrupt() || 1655 int node; /* node that zone z is on */
1624 node_isset(z->zone_pgdat->node_id, current->mems_allowed); 1656 const struct cpuset *cs; /* current cpuset ancestors */
1657 int allowed = 1; /* is allocation in zone z allowed? */
1658
1659 if (in_interrupt())
1660 return 1;
1661 node = z->zone_pgdat->node_id;
1662 if (node_isset(node, current->mems_allowed))
1663 return 1;
1664 if (gfp_mask & __GFP_HARDWALL) /* If hardwall request, stop here */
1665 return 0;
1666
1667 /* Not hardwall and node outside mems_allowed: scan up cpusets */
1668 down(&cpuset_sem);
1669 cs = current->cpuset;
1670 if (!cs)
1671 goto done; /* current task exiting */
1672 cs = nearest_exclusive_ancestor(cs);
1673 allowed = node_isset(node, cs->mems_allowed);
1674done:
1675 up(&cpuset_sem);
1676 return allowed;
1677}
1678
1679/**
1680 * cpuset_excl_nodes_overlap - Do we overlap @p's mem_exclusive ancestors?
1681 * @p: pointer to task_struct of some other task.
1682 *
1683 * Description: Return true if the nearest mem_exclusive ancestor
1684 * cpusets of tasks @p and current overlap. Used by oom killer to
1685 * determine if task @p's memory usage might impact the memory
1686 * available to the current task.
1687 *
1688 * Acquires cpuset_sem - not suitable for calling from a fast path.
1689 **/
1690
1691int cpuset_excl_nodes_overlap(const struct task_struct *p)
1692{
1693 const struct cpuset *cs1, *cs2; /* my and p's cpuset ancestors */
1694 int overlap = 0; /* do cpusets overlap? */
1695
1696 down(&cpuset_sem);
1697 cs1 = current->cpuset;
1698 if (!cs1)
1699 goto done; /* current task exiting */
1700 cs2 = p->cpuset;
1701 if (!cs2)
1702 goto done; /* task p is exiting */
1703 cs1 = nearest_exclusive_ancestor(cs1);
1704 cs2 = nearest_exclusive_ancestor(cs2);
1705 overlap = nodes_intersects(cs1->mems_allowed, cs2->mems_allowed);
1706done:
1707 up(&cpuset_sem);
1708
1709 return overlap;
1625} 1710}
1626 1711
1627/* 1712/*
diff --git a/kernel/futex.c b/kernel/futex.c
index c7130f86106c..ca05fe6a70b2 100644
--- a/kernel/futex.c
+++ b/kernel/futex.c
@@ -40,6 +40,7 @@
40#include <linux/pagemap.h> 40#include <linux/pagemap.h>
41#include <linux/syscalls.h> 41#include <linux/syscalls.h>
42#include <linux/signal.h> 42#include <linux/signal.h>
43#include <asm/futex.h>
43 44
44#define FUTEX_HASHBITS (CONFIG_BASE_SMALL ? 4 : 8) 45#define FUTEX_HASHBITS (CONFIG_BASE_SMALL ? 4 : 8)
45 46
@@ -327,6 +328,118 @@ out:
327} 328}
328 329
329/* 330/*
331 * Wake up all waiters hashed on the physical page that is mapped
332 * to this virtual address:
333 */
334static int futex_wake_op(unsigned long uaddr1, unsigned long uaddr2, int nr_wake, int nr_wake2, int op)
335{
336 union futex_key key1, key2;
337 struct futex_hash_bucket *bh1, *bh2;
338 struct list_head *head;
339 struct futex_q *this, *next;
340 int ret, op_ret, attempt = 0;
341
342retryfull:
343 down_read(&current->mm->mmap_sem);
344
345 ret = get_futex_key(uaddr1, &key1);
346 if (unlikely(ret != 0))
347 goto out;
348 ret = get_futex_key(uaddr2, &key2);
349 if (unlikely(ret != 0))
350 goto out;
351
352 bh1 = hash_futex(&key1);
353 bh2 = hash_futex(&key2);
354
355retry:
356 if (bh1 < bh2)
357 spin_lock(&bh1->lock);
358 spin_lock(&bh2->lock);
359 if (bh1 > bh2)
360 spin_lock(&bh1->lock);
361
362 op_ret = futex_atomic_op_inuser(op, (int __user *)uaddr2);
363 if (unlikely(op_ret < 0)) {
364 int dummy;
365
366 spin_unlock(&bh1->lock);
367 if (bh1 != bh2)
368 spin_unlock(&bh2->lock);
369
370 /* futex_atomic_op_inuser needs to both read and write
371 * *(int __user *)uaddr2, but we can't modify it
372 * non-atomically. Therefore, if get_user below is not
373 * enough, we need to handle the fault ourselves, while
374 * still holding the mmap_sem. */
375 if (attempt++) {
376 struct vm_area_struct * vma;
377 struct mm_struct *mm = current->mm;
378
379 ret = -EFAULT;
380 if (attempt >= 2 ||
381 !(vma = find_vma(mm, uaddr2)) ||
382 vma->vm_start > uaddr2 ||
383 !(vma->vm_flags & VM_WRITE))
384 goto out;
385
386 switch (handle_mm_fault(mm, vma, uaddr2, 1)) {
387 case VM_FAULT_MINOR:
388 current->min_flt++;
389 break;
390 case VM_FAULT_MAJOR:
391 current->maj_flt++;
392 break;
393 default:
394 goto out;
395 }
396 goto retry;
397 }
398
399 /* If we would have faulted, release mmap_sem,
400 * fault it in and start all over again. */
401 up_read(&current->mm->mmap_sem);
402
403 ret = get_user(dummy, (int __user *)uaddr2);
404 if (ret)
405 return ret;
406
407 goto retryfull;
408 }
409
410 head = &bh1->chain;
411
412 list_for_each_entry_safe(this, next, head, list) {
413 if (match_futex (&this->key, &key1)) {
414 wake_futex(this);
415 if (++ret >= nr_wake)
416 break;
417 }
418 }
419
420 if (op_ret > 0) {
421 head = &bh2->chain;
422
423 op_ret = 0;
424 list_for_each_entry_safe(this, next, head, list) {
425 if (match_futex (&this->key, &key2)) {
426 wake_futex(this);
427 if (++op_ret >= nr_wake2)
428 break;
429 }
430 }
431 ret += op_ret;
432 }
433
434 spin_unlock(&bh1->lock);
435 if (bh1 != bh2)
436 spin_unlock(&bh2->lock);
437out:
438 up_read(&current->mm->mmap_sem);
439 return ret;
440}
441
442/*
330 * Requeue all waiters hashed on one physical page to another 443 * Requeue all waiters hashed on one physical page to another
331 * physical page. 444 * physical page.
332 */ 445 */
@@ -673,23 +786,17 @@ static int futex_fd(unsigned long uaddr, int signal)
673 filp->f_mapping = filp->f_dentry->d_inode->i_mapping; 786 filp->f_mapping = filp->f_dentry->d_inode->i_mapping;
674 787
675 if (signal) { 788 if (signal) {
676 int err;
677 err = f_setown(filp, current->pid, 1); 789 err = f_setown(filp, current->pid, 1);
678 if (err < 0) { 790 if (err < 0) {
679 put_unused_fd(ret); 791 goto error;
680 put_filp(filp);
681 ret = err;
682 goto out;
683 } 792 }
684 filp->f_owner.signum = signal; 793 filp->f_owner.signum = signal;
685 } 794 }
686 795
687 q = kmalloc(sizeof(*q), GFP_KERNEL); 796 q = kmalloc(sizeof(*q), GFP_KERNEL);
688 if (!q) { 797 if (!q) {
689 put_unused_fd(ret); 798 err = -ENOMEM;
690 put_filp(filp); 799 goto error;
691 ret = -ENOMEM;
692 goto out;
693 } 800 }
694 801
695 down_read(&current->mm->mmap_sem); 802 down_read(&current->mm->mmap_sem);
@@ -697,10 +804,8 @@ static int futex_fd(unsigned long uaddr, int signal)
697 804
698 if (unlikely(err != 0)) { 805 if (unlikely(err != 0)) {
699 up_read(&current->mm->mmap_sem); 806 up_read(&current->mm->mmap_sem);
700 put_unused_fd(ret);
701 put_filp(filp);
702 kfree(q); 807 kfree(q);
703 return err; 808 goto error;
704 } 809 }
705 810
706 /* 811 /*
@@ -716,6 +821,11 @@ static int futex_fd(unsigned long uaddr, int signal)
716 fd_install(ret, filp); 821 fd_install(ret, filp);
717out: 822out:
718 return ret; 823 return ret;
824error:
825 put_unused_fd(ret);
826 put_filp(filp);
827 ret = err;
828 goto out;
719} 829}
720 830
721long do_futex(unsigned long uaddr, int op, int val, unsigned long timeout, 831long do_futex(unsigned long uaddr, int op, int val, unsigned long timeout,
@@ -740,6 +850,9 @@ long do_futex(unsigned long uaddr, int op, int val, unsigned long timeout,
740 case FUTEX_CMP_REQUEUE: 850 case FUTEX_CMP_REQUEUE:
741 ret = futex_requeue(uaddr, uaddr2, val, val2, &val3); 851 ret = futex_requeue(uaddr, uaddr2, val, val2, &val3);
742 break; 852 break;
853 case FUTEX_WAKE_OP:
854 ret = futex_wake_op(uaddr, uaddr2, val, val2, val3);
855 break;
743 default: 856 default:
744 ret = -ENOSYS; 857 ret = -ENOSYS;
745 } 858 }
diff --git a/kernel/intermodule.c b/kernel/intermodule.c
index 388977f3e9b7..0cbe633420fb 100644
--- a/kernel/intermodule.c
+++ b/kernel/intermodule.c
@@ -39,7 +39,7 @@ void inter_module_register(const char *im_name, struct module *owner, const void
39 struct list_head *tmp; 39 struct list_head *tmp;
40 struct inter_module_entry *ime, *ime_new; 40 struct inter_module_entry *ime, *ime_new;
41 41
42 if (!(ime_new = kmalloc(sizeof(*ime), GFP_KERNEL))) { 42 if (!(ime_new = kzalloc(sizeof(*ime), GFP_KERNEL))) {
43 /* Overloaded kernel, not fatal */ 43 /* Overloaded kernel, not fatal */
44 printk(KERN_ERR 44 printk(KERN_ERR
45 "Aiee, inter_module_register: cannot kmalloc entry for '%s'\n", 45 "Aiee, inter_module_register: cannot kmalloc entry for '%s'\n",
@@ -47,7 +47,6 @@ void inter_module_register(const char *im_name, struct module *owner, const void
47 kmalloc_failed = 1; 47 kmalloc_failed = 1;
48 return; 48 return;
49 } 49 }
50 memset(ime_new, 0, sizeof(*ime_new));
51 ime_new->im_name = im_name; 50 ime_new->im_name = im_name;
52 ime_new->owner = owner; 51 ime_new->owner = owner;
53 ime_new->userdata = userdata; 52 ime_new->userdata = userdata;
diff --git a/kernel/irq/handle.c b/kernel/irq/handle.c
index c29f83c16497..3ff7b925c387 100644
--- a/kernel/irq/handle.c
+++ b/kernel/irq/handle.c
@@ -111,7 +111,7 @@ fastcall unsigned int __do_IRQ(unsigned int irq, struct pt_regs *regs)
111 unsigned int status; 111 unsigned int status;
112 112
113 kstat_this_cpu.irqs[irq]++; 113 kstat_this_cpu.irqs[irq]++;
114 if (desc->status & IRQ_PER_CPU) { 114 if (CHECK_IRQ_PER_CPU(desc->status)) {
115 irqreturn_t action_ret; 115 irqreturn_t action_ret;
116 116
117 /* 117 /*
diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c
index ac6700985705..1cfdb08ddf20 100644
--- a/kernel/irq/manage.c
+++ b/kernel/irq/manage.c
@@ -18,6 +18,10 @@
18 18
19cpumask_t irq_affinity[NR_IRQS] = { [0 ... NR_IRQS-1] = CPU_MASK_ALL }; 19cpumask_t irq_affinity[NR_IRQS] = { [0 ... NR_IRQS-1] = CPU_MASK_ALL };
20 20
21#if defined (CONFIG_GENERIC_PENDING_IRQ) || defined (CONFIG_IRQBALANCE)
22cpumask_t __cacheline_aligned pending_irq_cpumask[NR_IRQS];
23#endif
24
21/** 25/**
22 * synchronize_irq - wait for pending IRQ handlers (on other CPUs) 26 * synchronize_irq - wait for pending IRQ handlers (on other CPUs)
23 * 27 *
diff --git a/kernel/irq/proc.c b/kernel/irq/proc.c
index 85d08daa6600..f26e534c6585 100644
--- a/kernel/irq/proc.c
+++ b/kernel/irq/proc.c
@@ -19,12 +19,22 @@ static struct proc_dir_entry *root_irq_dir, *irq_dir[NR_IRQS];
19 */ 19 */
20static struct proc_dir_entry *smp_affinity_entry[NR_IRQS]; 20static struct proc_dir_entry *smp_affinity_entry[NR_IRQS];
21 21
22void __attribute__((weak)) 22#ifdef CONFIG_GENERIC_PENDING_IRQ
23proc_set_irq_affinity(unsigned int irq, cpumask_t mask_val) 23void proc_set_irq_affinity(unsigned int irq, cpumask_t mask_val)
24{
25 /*
26 * Save these away for later use. Re-progam when the
27 * interrupt is pending
28 */
29 set_pending_irq(irq, mask_val);
30}
31#else
32void proc_set_irq_affinity(unsigned int irq, cpumask_t mask_val)
24{ 33{
25 irq_affinity[irq] = mask_val; 34 irq_affinity[irq] = mask_val;
26 irq_desc[irq].handler->set_affinity(irq, mask_val); 35 irq_desc[irq].handler->set_affinity(irq, mask_val);
27} 36}
37#endif
28 38
29static int irq_affinity_read_proc(char *page, char **start, off_t off, 39static int irq_affinity_read_proc(char *page, char **start, off_t off,
30 int count, int *eof, void *data) 40 int count, int *eof, void *data)
diff --git a/kernel/kprobes.c b/kernel/kprobes.c
index b0237122b24e..f3ea492ab44d 100644
--- a/kernel/kprobes.c
+++ b/kernel/kprobes.c
@@ -37,6 +37,7 @@
37#include <linux/init.h> 37#include <linux/init.h>
38#include <linux/module.h> 38#include <linux/module.h>
39#include <linux/moduleloader.h> 39#include <linux/moduleloader.h>
40#include <asm-generic/sections.h>
40#include <asm/cacheflush.h> 41#include <asm/cacheflush.h>
41#include <asm/errno.h> 42#include <asm/errno.h>
42#include <asm/kdebug.h> 43#include <asm/kdebug.h>
@@ -72,7 +73,7 @@ static struct hlist_head kprobe_insn_pages;
72 * get_insn_slot() - Find a slot on an executable page for an instruction. 73 * get_insn_slot() - Find a slot on an executable page for an instruction.
73 * We allocate an executable page if there's no room on existing ones. 74 * We allocate an executable page if there's no room on existing ones.
74 */ 75 */
75kprobe_opcode_t *get_insn_slot(void) 76kprobe_opcode_t __kprobes *get_insn_slot(void)
76{ 77{
77 struct kprobe_insn_page *kip; 78 struct kprobe_insn_page *kip;
78 struct hlist_node *pos; 79 struct hlist_node *pos;
@@ -117,7 +118,7 @@ kprobe_opcode_t *get_insn_slot(void)
117 return kip->insns; 118 return kip->insns;
118} 119}
119 120
120void free_insn_slot(kprobe_opcode_t *slot) 121void __kprobes free_insn_slot(kprobe_opcode_t *slot)
121{ 122{
122 struct kprobe_insn_page *kip; 123 struct kprobe_insn_page *kip;
123 struct hlist_node *pos; 124 struct hlist_node *pos;
@@ -152,20 +153,42 @@ void free_insn_slot(kprobe_opcode_t *slot)
152} 153}
153 154
154/* Locks kprobe: irqs must be disabled */ 155/* Locks kprobe: irqs must be disabled */
155void lock_kprobes(void) 156void __kprobes lock_kprobes(void)
156{ 157{
158 unsigned long flags = 0;
159
160 /* Avoiding local interrupts to happen right after we take the kprobe_lock
161 * and before we get a chance to update kprobe_cpu, this to prevent
162 * deadlock when we have a kprobe on ISR routine and a kprobe on task
163 * routine
164 */
165 local_irq_save(flags);
166
157 spin_lock(&kprobe_lock); 167 spin_lock(&kprobe_lock);
158 kprobe_cpu = smp_processor_id(); 168 kprobe_cpu = smp_processor_id();
169
170 local_irq_restore(flags);
159} 171}
160 172
161void unlock_kprobes(void) 173void __kprobes unlock_kprobes(void)
162{ 174{
175 unsigned long flags = 0;
176
177 /* Avoiding local interrupts to happen right after we update
178 * kprobe_cpu and before we get a a chance to release kprobe_lock,
179 * this to prevent deadlock when we have a kprobe on ISR routine and
180 * a kprobe on task routine
181 */
182 local_irq_save(flags);
183
163 kprobe_cpu = NR_CPUS; 184 kprobe_cpu = NR_CPUS;
164 spin_unlock(&kprobe_lock); 185 spin_unlock(&kprobe_lock);
186
187 local_irq_restore(flags);
165} 188}
166 189
167/* You have to be holding the kprobe_lock */ 190/* You have to be holding the kprobe_lock */
168struct kprobe *get_kprobe(void *addr) 191struct kprobe __kprobes *get_kprobe(void *addr)
169{ 192{
170 struct hlist_head *head; 193 struct hlist_head *head;
171 struct hlist_node *node; 194 struct hlist_node *node;
@@ -183,7 +206,7 @@ struct kprobe *get_kprobe(void *addr)
183 * Aggregate handlers for multiple kprobes support - these handlers 206 * Aggregate handlers for multiple kprobes support - these handlers
184 * take care of invoking the individual kprobe handlers on p->list 207 * take care of invoking the individual kprobe handlers on p->list
185 */ 208 */
186static int aggr_pre_handler(struct kprobe *p, struct pt_regs *regs) 209static int __kprobes aggr_pre_handler(struct kprobe *p, struct pt_regs *regs)
187{ 210{
188 struct kprobe *kp; 211 struct kprobe *kp;
189 212
@@ -198,8 +221,8 @@ static int aggr_pre_handler(struct kprobe *p, struct pt_regs *regs)
198 return 0; 221 return 0;
199} 222}
200 223
201static void aggr_post_handler(struct kprobe *p, struct pt_regs *regs, 224static void __kprobes aggr_post_handler(struct kprobe *p, struct pt_regs *regs,
202 unsigned long flags) 225 unsigned long flags)
203{ 226{
204 struct kprobe *kp; 227 struct kprobe *kp;
205 228
@@ -213,8 +236,8 @@ static void aggr_post_handler(struct kprobe *p, struct pt_regs *regs,
213 return; 236 return;
214} 237}
215 238
216static int aggr_fault_handler(struct kprobe *p, struct pt_regs *regs, 239static int __kprobes aggr_fault_handler(struct kprobe *p, struct pt_regs *regs,
217 int trapnr) 240 int trapnr)
218{ 241{
219 /* 242 /*
220 * if we faulted "during" the execution of a user specified 243 * if we faulted "during" the execution of a user specified
@@ -227,7 +250,7 @@ static int aggr_fault_handler(struct kprobe *p, struct pt_regs *regs,
227 return 0; 250 return 0;
228} 251}
229 252
230static int aggr_break_handler(struct kprobe *p, struct pt_regs *regs) 253static int __kprobes aggr_break_handler(struct kprobe *p, struct pt_regs *regs)
231{ 254{
232 struct kprobe *kp = curr_kprobe; 255 struct kprobe *kp = curr_kprobe;
233 if (curr_kprobe && kp->break_handler) { 256 if (curr_kprobe && kp->break_handler) {
@@ -240,7 +263,7 @@ static int aggr_break_handler(struct kprobe *p, struct pt_regs *regs)
240 return 0; 263 return 0;
241} 264}
242 265
243struct kretprobe_instance *get_free_rp_inst(struct kretprobe *rp) 266struct kretprobe_instance __kprobes *get_free_rp_inst(struct kretprobe *rp)
244{ 267{
245 struct hlist_node *node; 268 struct hlist_node *node;
246 struct kretprobe_instance *ri; 269 struct kretprobe_instance *ri;
@@ -249,7 +272,8 @@ struct kretprobe_instance *get_free_rp_inst(struct kretprobe *rp)
249 return NULL; 272 return NULL;
250} 273}
251 274
252static struct kretprobe_instance *get_used_rp_inst(struct kretprobe *rp) 275static struct kretprobe_instance __kprobes *get_used_rp_inst(struct kretprobe
276 *rp)
253{ 277{
254 struct hlist_node *node; 278 struct hlist_node *node;
255 struct kretprobe_instance *ri; 279 struct kretprobe_instance *ri;
@@ -258,7 +282,7 @@ static struct kretprobe_instance *get_used_rp_inst(struct kretprobe *rp)
258 return NULL; 282 return NULL;
259} 283}
260 284
261void add_rp_inst(struct kretprobe_instance *ri) 285void __kprobes add_rp_inst(struct kretprobe_instance *ri)
262{ 286{
263 /* 287 /*
264 * Remove rp inst off the free list - 288 * Remove rp inst off the free list -
@@ -276,7 +300,7 @@ void add_rp_inst(struct kretprobe_instance *ri)
276 hlist_add_head(&ri->uflist, &ri->rp->used_instances); 300 hlist_add_head(&ri->uflist, &ri->rp->used_instances);
277} 301}
278 302
279void recycle_rp_inst(struct kretprobe_instance *ri) 303void __kprobes recycle_rp_inst(struct kretprobe_instance *ri)
280{ 304{
281 /* remove rp inst off the rprobe_inst_table */ 305 /* remove rp inst off the rprobe_inst_table */
282 hlist_del(&ri->hlist); 306 hlist_del(&ri->hlist);
@@ -291,7 +315,7 @@ void recycle_rp_inst(struct kretprobe_instance *ri)
291 kfree(ri); 315 kfree(ri);
292} 316}
293 317
294struct hlist_head * kretprobe_inst_table_head(struct task_struct *tsk) 318struct hlist_head __kprobes *kretprobe_inst_table_head(struct task_struct *tsk)
295{ 319{
296 return &kretprobe_inst_table[hash_ptr(tsk, KPROBE_HASH_BITS)]; 320 return &kretprobe_inst_table[hash_ptr(tsk, KPROBE_HASH_BITS)];
297} 321}
@@ -302,7 +326,7 @@ struct hlist_head * kretprobe_inst_table_head(struct task_struct *tsk)
302 * instances associated with this task. These left over instances represent 326 * instances associated with this task. These left over instances represent
303 * probed functions that have been called but will never return. 327 * probed functions that have been called but will never return.
304 */ 328 */
305void kprobe_flush_task(struct task_struct *tk) 329void __kprobes kprobe_flush_task(struct task_struct *tk)
306{ 330{
307 struct kretprobe_instance *ri; 331 struct kretprobe_instance *ri;
308 struct hlist_head *head; 332 struct hlist_head *head;
@@ -322,7 +346,8 @@ void kprobe_flush_task(struct task_struct *tk)
322 * This kprobe pre_handler is registered with every kretprobe. When probe 346 * This kprobe pre_handler is registered with every kretprobe. When probe
323 * hits it will set up the return probe. 347 * hits it will set up the return probe.
324 */ 348 */
325static int pre_handler_kretprobe(struct kprobe *p, struct pt_regs *regs) 349static int __kprobes pre_handler_kretprobe(struct kprobe *p,
350 struct pt_regs *regs)
326{ 351{
327 struct kretprobe *rp = container_of(p, struct kretprobe, kp); 352 struct kretprobe *rp = container_of(p, struct kretprobe, kp);
328 353
@@ -353,7 +378,7 @@ static inline void copy_kprobe(struct kprobe *old_p, struct kprobe *p)
353* Add the new probe to old_p->list. Fail if this is the 378* Add the new probe to old_p->list. Fail if this is the
354* second jprobe at the address - two jprobes can't coexist 379* second jprobe at the address - two jprobes can't coexist
355*/ 380*/
356static int add_new_kprobe(struct kprobe *old_p, struct kprobe *p) 381static int __kprobes add_new_kprobe(struct kprobe *old_p, struct kprobe *p)
357{ 382{
358 struct kprobe *kp; 383 struct kprobe *kp;
359 384
@@ -395,7 +420,8 @@ static inline void add_aggr_kprobe(struct kprobe *ap, struct kprobe *p)
395 * the intricacies 420 * the intricacies
396 * TODO: Move kcalloc outside the spinlock 421 * TODO: Move kcalloc outside the spinlock
397 */ 422 */
398static int register_aggr_kprobe(struct kprobe *old_p, struct kprobe *p) 423static int __kprobes register_aggr_kprobe(struct kprobe *old_p,
424 struct kprobe *p)
399{ 425{
400 int ret = 0; 426 int ret = 0;
401 struct kprobe *ap; 427 struct kprobe *ap;
@@ -434,15 +460,25 @@ static inline void cleanup_aggr_kprobe(struct kprobe *old_p,
434 spin_unlock_irqrestore(&kprobe_lock, flags); 460 spin_unlock_irqrestore(&kprobe_lock, flags);
435} 461}
436 462
437int register_kprobe(struct kprobe *p) 463static int __kprobes in_kprobes_functions(unsigned long addr)
464{
465 if (addr >= (unsigned long)__kprobes_text_start
466 && addr < (unsigned long)__kprobes_text_end)
467 return -EINVAL;
468 return 0;
469}
470
471int __kprobes register_kprobe(struct kprobe *p)
438{ 472{
439 int ret = 0; 473 int ret = 0;
440 unsigned long flags = 0; 474 unsigned long flags = 0;
441 struct kprobe *old_p; 475 struct kprobe *old_p;
442 476
443 if ((ret = arch_prepare_kprobe(p)) != 0) { 477 if ((ret = in_kprobes_functions((unsigned long) p->addr)) != 0)
478 return ret;
479 if ((ret = arch_prepare_kprobe(p)) != 0)
444 goto rm_kprobe; 480 goto rm_kprobe;
445 } 481
446 spin_lock_irqsave(&kprobe_lock, flags); 482 spin_lock_irqsave(&kprobe_lock, flags);
447 old_p = get_kprobe(p->addr); 483 old_p = get_kprobe(p->addr);
448 p->nmissed = 0; 484 p->nmissed = 0;
@@ -466,7 +502,7 @@ rm_kprobe:
466 return ret; 502 return ret;
467} 503}
468 504
469void unregister_kprobe(struct kprobe *p) 505void __kprobes unregister_kprobe(struct kprobe *p)
470{ 506{
471 unsigned long flags; 507 unsigned long flags;
472 struct kprobe *old_p; 508 struct kprobe *old_p;
@@ -487,7 +523,7 @@ static struct notifier_block kprobe_exceptions_nb = {
487 .priority = 0x7fffffff /* we need to notified first */ 523 .priority = 0x7fffffff /* we need to notified first */
488}; 524};
489 525
490int register_jprobe(struct jprobe *jp) 526int __kprobes register_jprobe(struct jprobe *jp)
491{ 527{
492 /* Todo: Verify probepoint is a function entry point */ 528 /* Todo: Verify probepoint is a function entry point */
493 jp->kp.pre_handler = setjmp_pre_handler; 529 jp->kp.pre_handler = setjmp_pre_handler;
@@ -496,14 +532,14 @@ int register_jprobe(struct jprobe *jp)
496 return register_kprobe(&jp->kp); 532 return register_kprobe(&jp->kp);
497} 533}
498 534
499void unregister_jprobe(struct jprobe *jp) 535void __kprobes unregister_jprobe(struct jprobe *jp)
500{ 536{
501 unregister_kprobe(&jp->kp); 537 unregister_kprobe(&jp->kp);
502} 538}
503 539
504#ifdef ARCH_SUPPORTS_KRETPROBES 540#ifdef ARCH_SUPPORTS_KRETPROBES
505 541
506int register_kretprobe(struct kretprobe *rp) 542int __kprobes register_kretprobe(struct kretprobe *rp)
507{ 543{
508 int ret = 0; 544 int ret = 0;
509 struct kretprobe_instance *inst; 545 struct kretprobe_instance *inst;
@@ -540,14 +576,14 @@ int register_kretprobe(struct kretprobe *rp)
540 576
541#else /* ARCH_SUPPORTS_KRETPROBES */ 577#else /* ARCH_SUPPORTS_KRETPROBES */
542 578
543int register_kretprobe(struct kretprobe *rp) 579int __kprobes register_kretprobe(struct kretprobe *rp)
544{ 580{
545 return -ENOSYS; 581 return -ENOSYS;
546} 582}
547 583
548#endif /* ARCH_SUPPORTS_KRETPROBES */ 584#endif /* ARCH_SUPPORTS_KRETPROBES */
549 585
550void unregister_kretprobe(struct kretprobe *rp) 586void __kprobes unregister_kretprobe(struct kretprobe *rp)
551{ 587{
552 unsigned long flags; 588 unsigned long flags;
553 struct kretprobe_instance *ri; 589 struct kretprobe_instance *ri;
diff --git a/kernel/module.c b/kernel/module.c
index c32995fbd8fd..4b39d3793c72 100644
--- a/kernel/module.c
+++ b/kernel/module.c
@@ -1509,6 +1509,7 @@ static struct module *load_module(void __user *umod,
1509 long err = 0; 1509 long err = 0;
1510 void *percpu = NULL, *ptr = NULL; /* Stops spurious gcc warning */ 1510 void *percpu = NULL, *ptr = NULL; /* Stops spurious gcc warning */
1511 struct exception_table_entry *extable; 1511 struct exception_table_entry *extable;
1512 mm_segment_t old_fs;
1512 1513
1513 DEBUGP("load_module: umod=%p, len=%lu, uargs=%p\n", 1514 DEBUGP("load_module: umod=%p, len=%lu, uargs=%p\n",
1514 umod, len, uargs); 1515 umod, len, uargs);
@@ -1779,6 +1780,24 @@ static struct module *load_module(void __user *umod,
1779 if (err < 0) 1780 if (err < 0)
1780 goto cleanup; 1781 goto cleanup;
1781 1782
1783 /* flush the icache in correct context */
1784 old_fs = get_fs();
1785 set_fs(KERNEL_DS);
1786
1787 /*
1788 * Flush the instruction cache, since we've played with text.
1789 * Do it before processing of module parameters, so the module
1790 * can provide parameter accessor functions of its own.
1791 */
1792 if (mod->module_init)
1793 flush_icache_range((unsigned long)mod->module_init,
1794 (unsigned long)mod->module_init
1795 + mod->init_size);
1796 flush_icache_range((unsigned long)mod->module_core,
1797 (unsigned long)mod->module_core + mod->core_size);
1798
1799 set_fs(old_fs);
1800
1782 mod->args = args; 1801 mod->args = args;
1783 if (obsparmindex) { 1802 if (obsparmindex) {
1784 err = obsolete_params(mod->name, mod->args, 1803 err = obsolete_params(mod->name, mod->args,
@@ -1860,7 +1879,6 @@ sys_init_module(void __user *umod,
1860 const char __user *uargs) 1879 const char __user *uargs)
1861{ 1880{
1862 struct module *mod; 1881 struct module *mod;
1863 mm_segment_t old_fs = get_fs();
1864 int ret = 0; 1882 int ret = 0;
1865 1883
1866 /* Must have permission */ 1884 /* Must have permission */
@@ -1878,19 +1896,6 @@ sys_init_module(void __user *umod,
1878 return PTR_ERR(mod); 1896 return PTR_ERR(mod);
1879 } 1897 }
1880 1898
1881 /* flush the icache in correct context */
1882 set_fs(KERNEL_DS);
1883
1884 /* Flush the instruction cache, since we've played with text */
1885 if (mod->module_init)
1886 flush_icache_range((unsigned long)mod->module_init,
1887 (unsigned long)mod->module_init
1888 + mod->init_size);
1889 flush_icache_range((unsigned long)mod->module_core,
1890 (unsigned long)mod->module_core + mod->core_size);
1891
1892 set_fs(old_fs);
1893
1894 /* Now sew it into the lists. They won't access us, since 1899 /* Now sew it into the lists. They won't access us, since
1895 strong_try_module_get() will fail. */ 1900 strong_try_module_get() will fail. */
1896 stop_machine_run(__link_module, mod, NR_CPUS); 1901 stop_machine_run(__link_module, mod, NR_CPUS);
diff --git a/kernel/params.c b/kernel/params.c
index d586c35ef8fc..fbf173215fd2 100644
--- a/kernel/params.c
+++ b/kernel/params.c
@@ -542,8 +542,8 @@ static void __init kernel_param_sysfs_setup(const char *name,
542{ 542{
543 struct module_kobject *mk; 543 struct module_kobject *mk;
544 544
545 mk = kmalloc(sizeof(struct module_kobject), GFP_KERNEL); 545 mk = kzalloc(sizeof(struct module_kobject), GFP_KERNEL);
546 memset(mk, 0, sizeof(struct module_kobject)); 546 BUG_ON(!mk);
547 547
548 mk->mod = THIS_MODULE; 548 mk->mod = THIS_MODULE;
549 kobj_set_kset_s(mk, module_subsys); 549 kobj_set_kset_s(mk, module_subsys);
diff --git a/kernel/posix-timers.c b/kernel/posix-timers.c
index 38798a2ff994..b7b532acd9fc 100644
--- a/kernel/posix-timers.c
+++ b/kernel/posix-timers.c
@@ -427,21 +427,23 @@ int posix_timer_event(struct k_itimer *timr,int si_private)
427 timr->sigq->info.si_code = SI_TIMER; 427 timr->sigq->info.si_code = SI_TIMER;
428 timr->sigq->info.si_tid = timr->it_id; 428 timr->sigq->info.si_tid = timr->it_id;
429 timr->sigq->info.si_value = timr->it_sigev_value; 429 timr->sigq->info.si_value = timr->it_sigev_value;
430
430 if (timr->it_sigev_notify & SIGEV_THREAD_ID) { 431 if (timr->it_sigev_notify & SIGEV_THREAD_ID) {
431 if (unlikely(timr->it_process->flags & PF_EXITING)) { 432 struct task_struct *leader;
432 timr->it_sigev_notify = SIGEV_SIGNAL; 433 int ret = send_sigqueue(timr->it_sigev_signo, timr->sigq,
433 put_task_struct(timr->it_process); 434 timr->it_process);
434 timr->it_process = timr->it_process->group_leader; 435
435 goto group; 436 if (likely(ret >= 0))
436 } 437 return ret;
437 return send_sigqueue(timr->it_sigev_signo, timr->sigq, 438
438 timr->it_process); 439 timr->it_sigev_notify = SIGEV_SIGNAL;
439 } 440 leader = timr->it_process->group_leader;
440 else { 441 put_task_struct(timr->it_process);
441 group: 442 timr->it_process = leader;
442 return send_group_sigqueue(timr->it_sigev_signo, timr->sigq,
443 timr->it_process);
444 } 443 }
444
445 return send_group_sigqueue(timr->it_sigev_signo, timr->sigq,
446 timr->it_process);
445} 447}
446EXPORT_SYMBOL_GPL(posix_timer_event); 448EXPORT_SYMBOL_GPL(posix_timer_event);
447 449
diff --git a/kernel/power/Kconfig b/kernel/power/Kconfig
index 917066a5767c..c14cd9991181 100644
--- a/kernel/power/Kconfig
+++ b/kernel/power/Kconfig
@@ -28,7 +28,7 @@ config PM_DEBUG
28 28
29config SOFTWARE_SUSPEND 29config SOFTWARE_SUSPEND
30 bool "Software Suspend" 30 bool "Software Suspend"
31 depends on EXPERIMENTAL && PM && SWAP && ((X86 && SMP) || ((FVR || PPC32 || X86) && !SMP)) 31 depends on PM && SWAP && (X86 || ((FVR || PPC32) && !SMP))
32 ---help--- 32 ---help---
33 Enable the possibility of suspending the machine. 33 Enable the possibility of suspending the machine.
34 It doesn't need APM. 34 It doesn't need APM.
diff --git a/kernel/power/pm.c b/kernel/power/pm.c
index 61deda04e39e..159149321b3c 100644
--- a/kernel/power/pm.c
+++ b/kernel/power/pm.c
@@ -60,9 +60,8 @@ struct pm_dev *pm_register(pm_dev_t type,
60 unsigned long id, 60 unsigned long id,
61 pm_callback callback) 61 pm_callback callback)
62{ 62{
63 struct pm_dev *dev = kmalloc(sizeof(struct pm_dev), GFP_KERNEL); 63 struct pm_dev *dev = kzalloc(sizeof(struct pm_dev), GFP_KERNEL);
64 if (dev) { 64 if (dev) {
65 memset(dev, 0, sizeof(*dev));
66 dev->type = type; 65 dev->type = type;
67 dev->id = id; 66 dev->id = id;
68 dev->callback = callback; 67 dev->callback = callback;
diff --git a/kernel/power/swsusp.c b/kernel/power/swsusp.c
index eaacd5cb5889..d967e875ee82 100644
--- a/kernel/power/swsusp.c
+++ b/kernel/power/swsusp.c
@@ -1059,6 +1059,7 @@ int swsusp_resume(void)
1059 BUG_ON(!error); 1059 BUG_ON(!error);
1060 restore_processor_state(); 1060 restore_processor_state();
1061 restore_highmem(); 1061 restore_highmem();
1062 touch_softlockup_watchdog();
1062 device_power_up(); 1063 device_power_up();
1063 local_irq_enable(); 1064 local_irq_enable();
1064 return error; 1065 return error;
diff --git a/kernel/printk.c b/kernel/printk.c
index 5092397fac29..a967605bc2e3 100644
--- a/kernel/printk.c
+++ b/kernel/printk.c
@@ -514,6 +514,9 @@ asmlinkage int printk(const char *fmt, ...)
514 return r; 514 return r;
515} 515}
516 516
517/* cpu currently holding logbuf_lock */
518static volatile unsigned int printk_cpu = UINT_MAX;
519
517asmlinkage int vprintk(const char *fmt, va_list args) 520asmlinkage int vprintk(const char *fmt, va_list args)
518{ 521{
519 unsigned long flags; 522 unsigned long flags;
@@ -522,11 +525,15 @@ asmlinkage int vprintk(const char *fmt, va_list args)
522 static char printk_buf[1024]; 525 static char printk_buf[1024];
523 static int log_level_unknown = 1; 526 static int log_level_unknown = 1;
524 527
525 if (unlikely(oops_in_progress)) 528 preempt_disable();
529 if (unlikely(oops_in_progress) && printk_cpu == smp_processor_id())
530 /* If a crash is occurring during printk() on this CPU,
531 * make sure we can't deadlock */
526 zap_locks(); 532 zap_locks();
527 533
528 /* This stops the holder of console_sem just where we want him */ 534 /* This stops the holder of console_sem just where we want him */
529 spin_lock_irqsave(&logbuf_lock, flags); 535 spin_lock_irqsave(&logbuf_lock, flags);
536 printk_cpu = smp_processor_id();
530 537
531 /* Emit the output into the temporary buffer */ 538 /* Emit the output into the temporary buffer */
532 printed_len = vscnprintf(printk_buf, sizeof(printk_buf), fmt, args); 539 printed_len = vscnprintf(printk_buf, sizeof(printk_buf), fmt, args);
@@ -595,6 +602,7 @@ asmlinkage int vprintk(const char *fmt, va_list args)
595 * CPU until it is officially up. We shouldn't be calling into 602 * CPU until it is officially up. We shouldn't be calling into
596 * random console drivers on a CPU which doesn't exist yet.. 603 * random console drivers on a CPU which doesn't exist yet..
597 */ 604 */
605 printk_cpu = UINT_MAX;
598 spin_unlock_irqrestore(&logbuf_lock, flags); 606 spin_unlock_irqrestore(&logbuf_lock, flags);
599 goto out; 607 goto out;
600 } 608 }
@@ -604,6 +612,7 @@ asmlinkage int vprintk(const char *fmt, va_list args)
604 * We own the drivers. We can drop the spinlock and let 612 * We own the drivers. We can drop the spinlock and let
605 * release_console_sem() print the text 613 * release_console_sem() print the text
606 */ 614 */
615 printk_cpu = UINT_MAX;
607 spin_unlock_irqrestore(&logbuf_lock, flags); 616 spin_unlock_irqrestore(&logbuf_lock, flags);
608 console_may_schedule = 0; 617 console_may_schedule = 0;
609 release_console_sem(); 618 release_console_sem();
@@ -613,9 +622,11 @@ asmlinkage int vprintk(const char *fmt, va_list args)
613 * allows the semaphore holder to proceed and to call the 622 * allows the semaphore holder to proceed and to call the
614 * console drivers with the output which we just produced. 623 * console drivers with the output which we just produced.
615 */ 624 */
625 printk_cpu = UINT_MAX;
616 spin_unlock_irqrestore(&logbuf_lock, flags); 626 spin_unlock_irqrestore(&logbuf_lock, flags);
617 } 627 }
618out: 628out:
629 preempt_enable();
619 return printed_len; 630 return printed_len;
620} 631}
621EXPORT_SYMBOL(printk); 632EXPORT_SYMBOL(printk);
diff --git a/kernel/ptrace.c b/kernel/ptrace.c
index 8dcb8f6288bc..019e04ec065a 100644
--- a/kernel/ptrace.c
+++ b/kernel/ptrace.c
@@ -118,6 +118,33 @@ int ptrace_check_attach(struct task_struct *child, int kill)
118 return ret; 118 return ret;
119} 119}
120 120
121static int may_attach(struct task_struct *task)
122{
123 if (!task->mm)
124 return -EPERM;
125 if (((current->uid != task->euid) ||
126 (current->uid != task->suid) ||
127 (current->uid != task->uid) ||
128 (current->gid != task->egid) ||
129 (current->gid != task->sgid) ||
130 (current->gid != task->gid)) && !capable(CAP_SYS_PTRACE))
131 return -EPERM;
132 smp_rmb();
133 if (!task->mm->dumpable && !capable(CAP_SYS_PTRACE))
134 return -EPERM;
135
136 return security_ptrace(current, task);
137}
138
139int ptrace_may_attach(struct task_struct *task)
140{
141 int err;
142 task_lock(task);
143 err = may_attach(task);
144 task_unlock(task);
145 return !err;
146}
147
121int ptrace_attach(struct task_struct *task) 148int ptrace_attach(struct task_struct *task)
122{ 149{
123 int retval; 150 int retval;
@@ -127,22 +154,10 @@ int ptrace_attach(struct task_struct *task)
127 goto bad; 154 goto bad;
128 if (task == current) 155 if (task == current)
129 goto bad; 156 goto bad;
130 if (!task->mm)
131 goto bad;
132 if(((current->uid != task->euid) ||
133 (current->uid != task->suid) ||
134 (current->uid != task->uid) ||
135 (current->gid != task->egid) ||
136 (current->gid != task->sgid) ||
137 (current->gid != task->gid)) && !capable(CAP_SYS_PTRACE))
138 goto bad;
139 smp_rmb();
140 if (!task->mm->dumpable && !capable(CAP_SYS_PTRACE))
141 goto bad;
142 /* the same process cannot be attached many times */ 157 /* the same process cannot be attached many times */
143 if (task->ptrace & PT_PTRACED) 158 if (task->ptrace & PT_PTRACED)
144 goto bad; 159 goto bad;
145 retval = security_ptrace(current, task); 160 retval = may_attach(task);
146 if (retval) 161 if (retval)
147 goto bad; 162 goto bad;
148 163
diff --git a/kernel/resource.c b/kernel/resource.c
index 26967e042201..92285d822de6 100644
--- a/kernel/resource.c
+++ b/kernel/resource.c
@@ -430,10 +430,9 @@ EXPORT_SYMBOL(adjust_resource);
430 */ 430 */
431struct resource * __request_region(struct resource *parent, unsigned long start, unsigned long n, const char *name) 431struct resource * __request_region(struct resource *parent, unsigned long start, unsigned long n, const char *name)
432{ 432{
433 struct resource *res = kmalloc(sizeof(*res), GFP_KERNEL); 433 struct resource *res = kzalloc(sizeof(*res), GFP_KERNEL);
434 434
435 if (res) { 435 if (res) {
436 memset(res, 0, sizeof(*res));
437 res->name = name; 436 res->name = name;
438 res->start = start; 437 res->start = start;
439 res->end = start + n - 1; 438 res->end = start + n - 1;
diff --git a/kernel/sched.c b/kernel/sched.c
index f41fa94d2070..18b95520a2e2 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -4780,7 +4780,7 @@ static int sd_parent_degenerate(struct sched_domain *sd,
4780 * Attach the domain 'sd' to 'cpu' as its base domain. Callers must 4780 * Attach the domain 'sd' to 'cpu' as its base domain. Callers must
4781 * hold the hotplug lock. 4781 * hold the hotplug lock.
4782 */ 4782 */
4783void cpu_attach_domain(struct sched_domain *sd, int cpu) 4783static void cpu_attach_domain(struct sched_domain *sd, int cpu)
4784{ 4784{
4785 runqueue_t *rq = cpu_rq(cpu); 4785 runqueue_t *rq = cpu_rq(cpu);
4786 struct sched_domain *tmp; 4786 struct sched_domain *tmp;
@@ -4803,7 +4803,7 @@ void cpu_attach_domain(struct sched_domain *sd, int cpu)
4803} 4803}
4804 4804
4805/* cpus with isolated domains */ 4805/* cpus with isolated domains */
4806cpumask_t __devinitdata cpu_isolated_map = CPU_MASK_NONE; 4806static cpumask_t __devinitdata cpu_isolated_map = CPU_MASK_NONE;
4807 4807
4808/* Setup the mask of cpus configured for isolated domains */ 4808/* Setup the mask of cpus configured for isolated domains */
4809static int __init isolated_cpu_setup(char *str) 4809static int __init isolated_cpu_setup(char *str)
@@ -4831,8 +4831,8 @@ __setup ("isolcpus=", isolated_cpu_setup);
4831 * covered by the given span, and will set each group's ->cpumask correctly, 4831 * covered by the given span, and will set each group's ->cpumask correctly,
4832 * and ->cpu_power to 0. 4832 * and ->cpu_power to 0.
4833 */ 4833 */
4834void init_sched_build_groups(struct sched_group groups[], 4834static void init_sched_build_groups(struct sched_group groups[], cpumask_t span,
4835 cpumask_t span, int (*group_fn)(int cpu)) 4835 int (*group_fn)(int cpu))
4836{ 4836{
4837 struct sched_group *first = NULL, *last = NULL; 4837 struct sched_group *first = NULL, *last = NULL;
4838 cpumask_t covered = CPU_MASK_NONE; 4838 cpumask_t covered = CPU_MASK_NONE;
@@ -4865,12 +4865,85 @@ void init_sched_build_groups(struct sched_group groups[],
4865 last->next = first; 4865 last->next = first;
4866} 4866}
4867 4867
4868#define SD_NODES_PER_DOMAIN 16
4868 4869
4869#ifdef ARCH_HAS_SCHED_DOMAIN 4870#ifdef CONFIG_NUMA
4870extern void build_sched_domains(const cpumask_t *cpu_map); 4871/**
4871extern void arch_init_sched_domains(const cpumask_t *cpu_map); 4872 * find_next_best_node - find the next node to include in a sched_domain
4872extern void arch_destroy_sched_domains(const cpumask_t *cpu_map); 4873 * @node: node whose sched_domain we're building
4873#else 4874 * @used_nodes: nodes already in the sched_domain
4875 *
4876 * Find the next node to include in a given scheduling domain. Simply
4877 * finds the closest node not already in the @used_nodes map.
4878 *
4879 * Should use nodemask_t.
4880 */
4881static int find_next_best_node(int node, unsigned long *used_nodes)
4882{
4883 int i, n, val, min_val, best_node = 0;
4884
4885 min_val = INT_MAX;
4886
4887 for (i = 0; i < MAX_NUMNODES; i++) {
4888 /* Start at @node */
4889 n = (node + i) % MAX_NUMNODES;
4890
4891 if (!nr_cpus_node(n))
4892 continue;
4893
4894 /* Skip already used nodes */
4895 if (test_bit(n, used_nodes))
4896 continue;
4897
4898 /* Simple min distance search */
4899 val = node_distance(node, n);
4900
4901 if (val < min_val) {
4902 min_val = val;
4903 best_node = n;
4904 }
4905 }
4906
4907 set_bit(best_node, used_nodes);
4908 return best_node;
4909}
4910
4911/**
4912 * sched_domain_node_span - get a cpumask for a node's sched_domain
4913 * @node: node whose cpumask we're constructing
4914 * @size: number of nodes to include in this span
4915 *
4916 * Given a node, construct a good cpumask for its sched_domain to span. It
4917 * should be one that prevents unnecessary balancing, but also spreads tasks
4918 * out optimally.
4919 */
4920static cpumask_t sched_domain_node_span(int node)
4921{
4922 int i;
4923 cpumask_t span, nodemask;
4924 DECLARE_BITMAP(used_nodes, MAX_NUMNODES);
4925
4926 cpus_clear(span);
4927 bitmap_zero(used_nodes, MAX_NUMNODES);
4928
4929 nodemask = node_to_cpumask(node);
4930 cpus_or(span, span, nodemask);
4931 set_bit(node, used_nodes);
4932
4933 for (i = 1; i < SD_NODES_PER_DOMAIN; i++) {
4934 int next_node = find_next_best_node(node, used_nodes);
4935 nodemask = node_to_cpumask(next_node);
4936 cpus_or(span, span, nodemask);
4937 }
4938
4939 return span;
4940}
4941#endif
4942
4943/*
4944 * At the moment, CONFIG_SCHED_SMT is never defined, but leave it in so we
4945 * can switch it on easily if needed.
4946 */
4874#ifdef CONFIG_SCHED_SMT 4947#ifdef CONFIG_SCHED_SMT
4875static DEFINE_PER_CPU(struct sched_domain, cpu_domains); 4948static DEFINE_PER_CPU(struct sched_domain, cpu_domains);
4876static struct sched_group sched_group_cpus[NR_CPUS]; 4949static struct sched_group sched_group_cpus[NR_CPUS];
@@ -4892,36 +4965,20 @@ static int cpu_to_phys_group(int cpu)
4892} 4965}
4893 4966
4894#ifdef CONFIG_NUMA 4967#ifdef CONFIG_NUMA
4895
4896static DEFINE_PER_CPU(struct sched_domain, node_domains);
4897static struct sched_group sched_group_nodes[MAX_NUMNODES];
4898static int cpu_to_node_group(int cpu)
4899{
4900 return cpu_to_node(cpu);
4901}
4902#endif
4903
4904#if defined(CONFIG_SCHED_SMT) && defined(CONFIG_NUMA)
4905/* 4968/*
4906 * The domains setup code relies on siblings not spanning 4969 * The init_sched_build_groups can't handle what we want to do with node
4907 * multiple nodes. Make sure the architecture has a proper 4970 * groups, so roll our own. Now each node has its own list of groups which
4908 * siblings map: 4971 * gets dynamically allocated.
4909 */ 4972 */
4910static void check_sibling_maps(void) 4973static DEFINE_PER_CPU(struct sched_domain, node_domains);
4911{ 4974static struct sched_group **sched_group_nodes_bycpu[NR_CPUS];
4912 int i, j;
4913 4975
4914 for_each_online_cpu(i) { 4976static DEFINE_PER_CPU(struct sched_domain, allnodes_domains);
4915 for_each_cpu_mask(j, cpu_sibling_map[i]) { 4977static struct sched_group *sched_group_allnodes_bycpu[NR_CPUS];
4916 if (cpu_to_node(i) != cpu_to_node(j)) { 4978
4917 printk(KERN_INFO "warning: CPU %d siblings map " 4979static int cpu_to_allnodes_group(int cpu)
4918 "to different node - isolating " 4980{
4919 "them.\n", i); 4981 return cpu_to_node(cpu);
4920 cpu_sibling_map[i] = cpumask_of_cpu(i);
4921 break;
4922 }
4923 }
4924 }
4925} 4982}
4926#endif 4983#endif
4927 4984
@@ -4929,9 +4986,24 @@ static void check_sibling_maps(void)
4929 * Build sched domains for a given set of cpus and attach the sched domains 4986 * Build sched domains for a given set of cpus and attach the sched domains
4930 * to the individual cpus 4987 * to the individual cpus
4931 */ 4988 */
4932static void build_sched_domains(const cpumask_t *cpu_map) 4989void build_sched_domains(const cpumask_t *cpu_map)
4933{ 4990{
4934 int i; 4991 int i;
4992#ifdef CONFIG_NUMA
4993 struct sched_group **sched_group_nodes = NULL;
4994 struct sched_group *sched_group_allnodes = NULL;
4995
4996 /*
4997 * Allocate the per-node list of sched groups
4998 */
4999 sched_group_nodes = kmalloc(sizeof(struct sched_group*)*MAX_NUMNODES,
5000 GFP_ATOMIC);
5001 if (!sched_group_nodes) {
5002 printk(KERN_WARNING "Can not alloc sched group node list\n");
5003 return;
5004 }
5005 sched_group_nodes_bycpu[first_cpu(*cpu_map)] = sched_group_nodes;
5006#endif
4935 5007
4936 /* 5008 /*
4937 * Set up domains for cpus specified by the cpu_map. 5009 * Set up domains for cpus specified by the cpu_map.
@@ -4944,11 +5016,35 @@ static void build_sched_domains(const cpumask_t *cpu_map)
4944 cpus_and(nodemask, nodemask, *cpu_map); 5016 cpus_and(nodemask, nodemask, *cpu_map);
4945 5017
4946#ifdef CONFIG_NUMA 5018#ifdef CONFIG_NUMA
5019 if (cpus_weight(*cpu_map)
5020 > SD_NODES_PER_DOMAIN*cpus_weight(nodemask)) {
5021 if (!sched_group_allnodes) {
5022 sched_group_allnodes
5023 = kmalloc(sizeof(struct sched_group)
5024 * MAX_NUMNODES,
5025 GFP_KERNEL);
5026 if (!sched_group_allnodes) {
5027 printk(KERN_WARNING
5028 "Can not alloc allnodes sched group\n");
5029 break;
5030 }
5031 sched_group_allnodes_bycpu[i]
5032 = sched_group_allnodes;
5033 }
5034 sd = &per_cpu(allnodes_domains, i);
5035 *sd = SD_ALLNODES_INIT;
5036 sd->span = *cpu_map;
5037 group = cpu_to_allnodes_group(i);
5038 sd->groups = &sched_group_allnodes[group];
5039 p = sd;
5040 } else
5041 p = NULL;
5042
4947 sd = &per_cpu(node_domains, i); 5043 sd = &per_cpu(node_domains, i);
4948 group = cpu_to_node_group(i);
4949 *sd = SD_NODE_INIT; 5044 *sd = SD_NODE_INIT;
4950 sd->span = *cpu_map; 5045 sd->span = sched_domain_node_span(cpu_to_node(i));
4951 sd->groups = &sched_group_nodes[group]; 5046 sd->parent = p;
5047 cpus_and(sd->span, sd->span, *cpu_map);
4952#endif 5048#endif
4953 5049
4954 p = sd; 5050 p = sd;
@@ -4973,7 +5069,7 @@ static void build_sched_domains(const cpumask_t *cpu_map)
4973 5069
4974#ifdef CONFIG_SCHED_SMT 5070#ifdef CONFIG_SCHED_SMT
4975 /* Set up CPU (sibling) groups */ 5071 /* Set up CPU (sibling) groups */
4976 for_each_online_cpu(i) { 5072 for_each_cpu_mask(i, *cpu_map) {
4977 cpumask_t this_sibling_map = cpu_sibling_map[i]; 5073 cpumask_t this_sibling_map = cpu_sibling_map[i];
4978 cpus_and(this_sibling_map, this_sibling_map, *cpu_map); 5074 cpus_and(this_sibling_map, this_sibling_map, *cpu_map);
4979 if (i != first_cpu(this_sibling_map)) 5075 if (i != first_cpu(this_sibling_map))
@@ -4998,8 +5094,77 @@ static void build_sched_domains(const cpumask_t *cpu_map)
4998 5094
4999#ifdef CONFIG_NUMA 5095#ifdef CONFIG_NUMA
5000 /* Set up node groups */ 5096 /* Set up node groups */
5001 init_sched_build_groups(sched_group_nodes, *cpu_map, 5097 if (sched_group_allnodes)
5002 &cpu_to_node_group); 5098 init_sched_build_groups(sched_group_allnodes, *cpu_map,
5099 &cpu_to_allnodes_group);
5100
5101 for (i = 0; i < MAX_NUMNODES; i++) {
5102 /* Set up node groups */
5103 struct sched_group *sg, *prev;
5104 cpumask_t nodemask = node_to_cpumask(i);
5105 cpumask_t domainspan;
5106 cpumask_t covered = CPU_MASK_NONE;
5107 int j;
5108
5109 cpus_and(nodemask, nodemask, *cpu_map);
5110 if (cpus_empty(nodemask)) {
5111 sched_group_nodes[i] = NULL;
5112 continue;
5113 }
5114
5115 domainspan = sched_domain_node_span(i);
5116 cpus_and(domainspan, domainspan, *cpu_map);
5117
5118 sg = kmalloc(sizeof(struct sched_group), GFP_KERNEL);
5119 sched_group_nodes[i] = sg;
5120 for_each_cpu_mask(j, nodemask) {
5121 struct sched_domain *sd;
5122 sd = &per_cpu(node_domains, j);
5123 sd->groups = sg;
5124 if (sd->groups == NULL) {
5125 /* Turn off balancing if we have no groups */
5126 sd->flags = 0;
5127 }
5128 }
5129 if (!sg) {
5130 printk(KERN_WARNING
5131 "Can not alloc domain group for node %d\n", i);
5132 continue;
5133 }
5134 sg->cpu_power = 0;
5135 sg->cpumask = nodemask;
5136 cpus_or(covered, covered, nodemask);
5137 prev = sg;
5138
5139 for (j = 0; j < MAX_NUMNODES; j++) {
5140 cpumask_t tmp, notcovered;
5141 int n = (i + j) % MAX_NUMNODES;
5142
5143 cpus_complement(notcovered, covered);
5144 cpus_and(tmp, notcovered, *cpu_map);
5145 cpus_and(tmp, tmp, domainspan);
5146 if (cpus_empty(tmp))
5147 break;
5148
5149 nodemask = node_to_cpumask(n);
5150 cpus_and(tmp, tmp, nodemask);
5151 if (cpus_empty(tmp))
5152 continue;
5153
5154 sg = kmalloc(sizeof(struct sched_group), GFP_KERNEL);
5155 if (!sg) {
5156 printk(KERN_WARNING
5157 "Can not alloc domain group for node %d\n", j);
5158 break;
5159 }
5160 sg->cpu_power = 0;
5161 sg->cpumask = tmp;
5162 cpus_or(covered, covered, tmp);
5163 prev->next = sg;
5164 prev = sg;
5165 }
5166 prev->next = sched_group_nodes[i];
5167 }
5003#endif 5168#endif
5004 5169
5005 /* Calculate CPU power for physical packages and nodes */ 5170 /* Calculate CPU power for physical packages and nodes */
@@ -5018,14 +5183,46 @@ static void build_sched_domains(const cpumask_t *cpu_map)
5018 sd->groups->cpu_power = power; 5183 sd->groups->cpu_power = power;
5019 5184
5020#ifdef CONFIG_NUMA 5185#ifdef CONFIG_NUMA
5021 if (i == first_cpu(sd->groups->cpumask)) { 5186 sd = &per_cpu(allnodes_domains, i);
5022 /* Only add "power" once for each physical package. */ 5187 if (sd->groups) {
5023 sd = &per_cpu(node_domains, i); 5188 power = SCHED_LOAD_SCALE + SCHED_LOAD_SCALE *
5024 sd->groups->cpu_power += power; 5189 (cpus_weight(sd->groups->cpumask)-1) / 10;
5190 sd->groups->cpu_power = power;
5025 } 5191 }
5026#endif 5192#endif
5027 } 5193 }
5028 5194
5195#ifdef CONFIG_NUMA
5196 for (i = 0; i < MAX_NUMNODES; i++) {
5197 struct sched_group *sg = sched_group_nodes[i];
5198 int j;
5199
5200 if (sg == NULL)
5201 continue;
5202next_sg:
5203 for_each_cpu_mask(j, sg->cpumask) {
5204 struct sched_domain *sd;
5205 int power;
5206
5207 sd = &per_cpu(phys_domains, j);
5208 if (j != first_cpu(sd->groups->cpumask)) {
5209 /*
5210 * Only add "power" once for each
5211 * physical package.
5212 */
5213 continue;
5214 }
5215 power = SCHED_LOAD_SCALE + SCHED_LOAD_SCALE *
5216 (cpus_weight(sd->groups->cpumask)-1) / 10;
5217
5218 sg->cpu_power += power;
5219 }
5220 sg = sg->next;
5221 if (sg != sched_group_nodes[i])
5222 goto next_sg;
5223 }
5224#endif
5225
5029 /* Attach the domains */ 5226 /* Attach the domains */
5030 for_each_cpu_mask(i, *cpu_map) { 5227 for_each_cpu_mask(i, *cpu_map) {
5031 struct sched_domain *sd; 5228 struct sched_domain *sd;
@@ -5040,13 +5237,10 @@ static void build_sched_domains(const cpumask_t *cpu_map)
5040/* 5237/*
5041 * Set up scheduler domains and groups. Callers must hold the hotplug lock. 5238 * Set up scheduler domains and groups. Callers must hold the hotplug lock.
5042 */ 5239 */
5043static void arch_init_sched_domains(cpumask_t *cpu_map) 5240static void arch_init_sched_domains(const cpumask_t *cpu_map)
5044{ 5241{
5045 cpumask_t cpu_default_map; 5242 cpumask_t cpu_default_map;
5046 5243
5047#if defined(CONFIG_SCHED_SMT) && defined(CONFIG_NUMA)
5048 check_sibling_maps();
5049#endif
5050 /* 5244 /*
5051 * Setup mask for cpus without special case scheduling requirements. 5245 * Setup mask for cpus without special case scheduling requirements.
5052 * For now this just excludes isolated cpus, but could be used to 5246 * For now this just excludes isolated cpus, but could be used to
@@ -5059,10 +5253,47 @@ static void arch_init_sched_domains(cpumask_t *cpu_map)
5059 5253
5060static void arch_destroy_sched_domains(const cpumask_t *cpu_map) 5254static void arch_destroy_sched_domains(const cpumask_t *cpu_map)
5061{ 5255{
5062 /* Do nothing: everything is statically allocated. */ 5256#ifdef CONFIG_NUMA
5063} 5257 int i;
5258 int cpu;
5259
5260 for_each_cpu_mask(cpu, *cpu_map) {
5261 struct sched_group *sched_group_allnodes
5262 = sched_group_allnodes_bycpu[cpu];
5263 struct sched_group **sched_group_nodes
5264 = sched_group_nodes_bycpu[cpu];
5265
5266 if (sched_group_allnodes) {
5267 kfree(sched_group_allnodes);
5268 sched_group_allnodes_bycpu[cpu] = NULL;
5269 }
5270
5271 if (!sched_group_nodes)
5272 continue;
5273
5274 for (i = 0; i < MAX_NUMNODES; i++) {
5275 cpumask_t nodemask = node_to_cpumask(i);
5276 struct sched_group *oldsg, *sg = sched_group_nodes[i];
5064 5277
5065#endif /* ARCH_HAS_SCHED_DOMAIN */ 5278 cpus_and(nodemask, nodemask, *cpu_map);
5279 if (cpus_empty(nodemask))
5280 continue;
5281
5282 if (sg == NULL)
5283 continue;
5284 sg = sg->next;
5285next_sg:
5286 oldsg = sg;
5287 sg = sg->next;
5288 kfree(oldsg);
5289 if (oldsg != sched_group_nodes[i])
5290 goto next_sg;
5291 }
5292 kfree(sched_group_nodes);
5293 sched_group_nodes_bycpu[cpu] = NULL;
5294 }
5295#endif
5296}
5066 5297
5067/* 5298/*
5068 * Detach sched domains from a group of cpus specified in cpu_map 5299 * Detach sched domains from a group of cpus specified in cpu_map
diff --git a/kernel/signal.c b/kernel/signal.c
index d282fea81138..4980a073237f 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -678,7 +678,7 @@ static int check_kill_permission(int sig, struct siginfo *info,
678 678
679/* forward decl */ 679/* forward decl */
680static void do_notify_parent_cldstop(struct task_struct *tsk, 680static void do_notify_parent_cldstop(struct task_struct *tsk,
681 struct task_struct *parent, 681 int to_self,
682 int why); 682 int why);
683 683
684/* 684/*
@@ -729,14 +729,7 @@ static void handle_stop_signal(int sig, struct task_struct *p)
729 p->signal->group_stop_count = 0; 729 p->signal->group_stop_count = 0;
730 p->signal->flags = SIGNAL_STOP_CONTINUED; 730 p->signal->flags = SIGNAL_STOP_CONTINUED;
731 spin_unlock(&p->sighand->siglock); 731 spin_unlock(&p->sighand->siglock);
732 if (p->ptrace & PT_PTRACED) 732 do_notify_parent_cldstop(p, (p->ptrace & PT_PTRACED), CLD_STOPPED);
733 do_notify_parent_cldstop(p, p->parent,
734 CLD_STOPPED);
735 else
736 do_notify_parent_cldstop(
737 p->group_leader,
738 p->group_leader->real_parent,
739 CLD_STOPPED);
740 spin_lock(&p->sighand->siglock); 733 spin_lock(&p->sighand->siglock);
741 } 734 }
742 rm_from_queue(SIG_KERNEL_STOP_MASK, &p->signal->shared_pending); 735 rm_from_queue(SIG_KERNEL_STOP_MASK, &p->signal->shared_pending);
@@ -777,14 +770,7 @@ static void handle_stop_signal(int sig, struct task_struct *p)
777 p->signal->flags = SIGNAL_STOP_CONTINUED; 770 p->signal->flags = SIGNAL_STOP_CONTINUED;
778 p->signal->group_exit_code = 0; 771 p->signal->group_exit_code = 0;
779 spin_unlock(&p->sighand->siglock); 772 spin_unlock(&p->sighand->siglock);
780 if (p->ptrace & PT_PTRACED) 773 do_notify_parent_cldstop(p, (p->ptrace & PT_PTRACED), CLD_CONTINUED);
781 do_notify_parent_cldstop(p, p->parent,
782 CLD_CONTINUED);
783 else
784 do_notify_parent_cldstop(
785 p->group_leader,
786 p->group_leader->real_parent,
787 CLD_CONTINUED);
788 spin_lock(&p->sighand->siglock); 774 spin_lock(&p->sighand->siglock);
789 } else { 775 } else {
790 /* 776 /*
@@ -1380,16 +1366,16 @@ send_sigqueue(int sig, struct sigqueue *q, struct task_struct *p)
1380 unsigned long flags; 1366 unsigned long flags;
1381 int ret = 0; 1367 int ret = 0;
1382 1368
1383 /*
1384 * We need the tasklist lock even for the specific
1385 * thread case (when we don't need to follow the group
1386 * lists) in order to avoid races with "p->sighand"
1387 * going away or changing from under us.
1388 */
1389 BUG_ON(!(q->flags & SIGQUEUE_PREALLOC)); 1369 BUG_ON(!(q->flags & SIGQUEUE_PREALLOC));
1390 read_lock(&tasklist_lock); 1370 read_lock(&tasklist_lock);
1371
1372 if (unlikely(p->flags & PF_EXITING)) {
1373 ret = -1;
1374 goto out_err;
1375 }
1376
1391 spin_lock_irqsave(&p->sighand->siglock, flags); 1377 spin_lock_irqsave(&p->sighand->siglock, flags);
1392 1378
1393 if (unlikely(!list_empty(&q->list))) { 1379 if (unlikely(!list_empty(&q->list))) {
1394 /* 1380 /*
1395 * If an SI_TIMER entry is already queue just increment 1381 * If an SI_TIMER entry is already queue just increment
@@ -1399,7 +1385,7 @@ send_sigqueue(int sig, struct sigqueue *q, struct task_struct *p)
1399 BUG(); 1385 BUG();
1400 q->info.si_overrun++; 1386 q->info.si_overrun++;
1401 goto out; 1387 goto out;
1402 } 1388 }
1403 /* Short-circuit ignored signals. */ 1389 /* Short-circuit ignored signals. */
1404 if (sig_ignored(p, sig)) { 1390 if (sig_ignored(p, sig)) {
1405 ret = 1; 1391 ret = 1;
@@ -1414,8 +1400,10 @@ send_sigqueue(int sig, struct sigqueue *q, struct task_struct *p)
1414 1400
1415out: 1401out:
1416 spin_unlock_irqrestore(&p->sighand->siglock, flags); 1402 spin_unlock_irqrestore(&p->sighand->siglock, flags);
1403out_err:
1417 read_unlock(&tasklist_lock); 1404 read_unlock(&tasklist_lock);
1418 return(ret); 1405
1406 return ret;
1419} 1407}
1420 1408
1421int 1409int
@@ -1542,14 +1530,20 @@ void do_notify_parent(struct task_struct *tsk, int sig)
1542 spin_unlock_irqrestore(&psig->siglock, flags); 1530 spin_unlock_irqrestore(&psig->siglock, flags);
1543} 1531}
1544 1532
1545static void 1533static void do_notify_parent_cldstop(struct task_struct *tsk, int to_self, int why)
1546do_notify_parent_cldstop(struct task_struct *tsk, struct task_struct *parent,
1547 int why)
1548{ 1534{
1549 struct siginfo info; 1535 struct siginfo info;
1550 unsigned long flags; 1536 unsigned long flags;
1537 struct task_struct *parent;
1551 struct sighand_struct *sighand; 1538 struct sighand_struct *sighand;
1552 1539
1540 if (to_self)
1541 parent = tsk->parent;
1542 else {
1543 tsk = tsk->group_leader;
1544 parent = tsk->real_parent;
1545 }
1546
1553 info.si_signo = SIGCHLD; 1547 info.si_signo = SIGCHLD;
1554 info.si_errno = 0; 1548 info.si_errno = 0;
1555 info.si_pid = tsk->pid; 1549 info.si_pid = tsk->pid;
@@ -1618,8 +1612,7 @@ static void ptrace_stop(int exit_code, int nostop_code, siginfo_t *info)
1618 !(current->ptrace & PT_ATTACHED)) && 1612 !(current->ptrace & PT_ATTACHED)) &&
1619 (likely(current->parent->signal != current->signal) || 1613 (likely(current->parent->signal != current->signal) ||
1620 !unlikely(current->signal->flags & SIGNAL_GROUP_EXIT))) { 1614 !unlikely(current->signal->flags & SIGNAL_GROUP_EXIT))) {
1621 do_notify_parent_cldstop(current, current->parent, 1615 do_notify_parent_cldstop(current, 1, CLD_TRAPPED);
1622 CLD_TRAPPED);
1623 read_unlock(&tasklist_lock); 1616 read_unlock(&tasklist_lock);
1624 schedule(); 1617 schedule();
1625 } else { 1618 } else {
@@ -1668,25 +1661,25 @@ void ptrace_notify(int exit_code)
1668static void 1661static void
1669finish_stop(int stop_count) 1662finish_stop(int stop_count)
1670{ 1663{
1664 int to_self;
1665
1671 /* 1666 /*
1672 * If there are no other threads in the group, or if there is 1667 * If there are no other threads in the group, or if there is
1673 * a group stop in progress and we are the last to stop, 1668 * a group stop in progress and we are the last to stop,
1674 * report to the parent. When ptraced, every thread reports itself. 1669 * report to the parent. When ptraced, every thread reports itself.
1675 */ 1670 */
1676 if (stop_count < 0 || (current->ptrace & PT_PTRACED)) { 1671 if (stop_count < 0 || (current->ptrace & PT_PTRACED))
1677 read_lock(&tasklist_lock); 1672 to_self = 1;
1678 do_notify_parent_cldstop(current, current->parent, 1673 else if (stop_count == 0)
1679 CLD_STOPPED); 1674 to_self = 0;
1680 read_unlock(&tasklist_lock); 1675 else
1681 } 1676 goto out;
1682 else if (stop_count == 0) {
1683 read_lock(&tasklist_lock);
1684 do_notify_parent_cldstop(current->group_leader,
1685 current->group_leader->real_parent,
1686 CLD_STOPPED);
1687 read_unlock(&tasklist_lock);
1688 }
1689 1677
1678 read_lock(&tasklist_lock);
1679 do_notify_parent_cldstop(current, to_self, CLD_STOPPED);
1680 read_unlock(&tasklist_lock);
1681
1682out:
1690 schedule(); 1683 schedule();
1691 /* 1684 /*
1692 * Now we don't run again until continued. 1685 * Now we don't run again until continued.
diff --git a/kernel/softlockup.c b/kernel/softlockup.c
new file mode 100644
index 000000000000..75976209cea7
--- /dev/null
+++ b/kernel/softlockup.c
@@ -0,0 +1,151 @@
1/*
2 * Detect Soft Lockups
3 *
4 * started by Ingo Molnar, (C) 2005, Red Hat
5 *
6 * this code detects soft lockups: incidents in where on a CPU
7 * the kernel does not reschedule for 10 seconds or more.
8 */
9
10#include <linux/mm.h>
11#include <linux/cpu.h>
12#include <linux/init.h>
13#include <linux/delay.h>
14#include <linux/kthread.h>
15#include <linux/notifier.h>
16#include <linux/module.h>
17
18static DEFINE_SPINLOCK(print_lock);
19
20static DEFINE_PER_CPU(unsigned long, timestamp) = 0;
21static DEFINE_PER_CPU(unsigned long, print_timestamp) = 0;
22static DEFINE_PER_CPU(struct task_struct *, watchdog_task);
23
24static int did_panic = 0;
25static int softlock_panic(struct notifier_block *this, unsigned long event,
26 void *ptr)
27{
28 did_panic = 1;
29
30 return NOTIFY_DONE;
31}
32
33static struct notifier_block panic_block = {
34 .notifier_call = softlock_panic,
35};
36
37void touch_softlockup_watchdog(void)
38{
39 per_cpu(timestamp, raw_smp_processor_id()) = jiffies;
40}
41EXPORT_SYMBOL(touch_softlockup_watchdog);
42
43/*
44 * This callback runs from the timer interrupt, and checks
45 * whether the watchdog thread has hung or not:
46 */
47void softlockup_tick(struct pt_regs *regs)
48{
49 int this_cpu = smp_processor_id();
50 unsigned long timestamp = per_cpu(timestamp, this_cpu);
51
52 if (per_cpu(print_timestamp, this_cpu) == timestamp)
53 return;
54
55 /* Do not cause a second panic when there already was one */
56 if (did_panic)
57 return;
58
59 if (time_after(jiffies, timestamp + 10*HZ)) {
60 per_cpu(print_timestamp, this_cpu) = timestamp;
61
62 spin_lock(&print_lock);
63 printk(KERN_ERR "BUG: soft lockup detected on CPU#%d!\n",
64 this_cpu);
65 show_regs(regs);
66 spin_unlock(&print_lock);
67 }
68}
69
70/*
71 * The watchdog thread - runs every second and touches the timestamp.
72 */
73static int watchdog(void * __bind_cpu)
74{
75 struct sched_param param = { .sched_priority = 99 };
76 int this_cpu = (long) __bind_cpu;
77
78 printk("softlockup thread %d started up.\n", this_cpu);
79
80 sched_setscheduler(current, SCHED_FIFO, &param);
81 current->flags |= PF_NOFREEZE;
82
83 set_current_state(TASK_INTERRUPTIBLE);
84
85 /*
86 * Run briefly once per second - if this gets delayed for
87 * more than 10 seconds then the debug-printout triggers
88 * in softlockup_tick():
89 */
90 while (!kthread_should_stop()) {
91 msleep_interruptible(1000);
92 touch_softlockup_watchdog();
93 }
94 __set_current_state(TASK_RUNNING);
95
96 return 0;
97}
98
99/*
100 * Create/destroy watchdog threads as CPUs come and go:
101 */
102static int __devinit
103cpu_callback(struct notifier_block *nfb, unsigned long action, void *hcpu)
104{
105 int hotcpu = (unsigned long)hcpu;
106 struct task_struct *p;
107
108 switch (action) {
109 case CPU_UP_PREPARE:
110 BUG_ON(per_cpu(watchdog_task, hotcpu));
111 p = kthread_create(watchdog, hcpu, "watchdog/%d", hotcpu);
112 if (IS_ERR(p)) {
113 printk("watchdog for %i failed\n", hotcpu);
114 return NOTIFY_BAD;
115 }
116 per_cpu(watchdog_task, hotcpu) = p;
117 kthread_bind(p, hotcpu);
118 break;
119 case CPU_ONLINE:
120
121 wake_up_process(per_cpu(watchdog_task, hotcpu));
122 break;
123#ifdef CONFIG_HOTPLUG_CPU
124 case CPU_UP_CANCELED:
125 /* Unbind so it can run. Fall thru. */
126 kthread_bind(per_cpu(watchdog_task, hotcpu), smp_processor_id());
127 case CPU_DEAD:
128 p = per_cpu(watchdog_task, hotcpu);
129 per_cpu(watchdog_task, hotcpu) = NULL;
130 kthread_stop(p);
131 break;
132#endif /* CONFIG_HOTPLUG_CPU */
133 }
134 return NOTIFY_OK;
135}
136
137static struct notifier_block __devinitdata cpu_nfb = {
138 .notifier_call = cpu_callback
139};
140
141__init void spawn_softlockup_task(void)
142{
143 void *cpu = (void *)(long)smp_processor_id();
144
145 cpu_callback(&cpu_nfb, CPU_UP_PREPARE, cpu);
146 cpu_callback(&cpu_nfb, CPU_ONLINE, cpu);
147 register_cpu_notifier(&cpu_nfb);
148
149 notifier_chain_register(&panic_notifier_list, &panic_block);
150}
151
diff --git a/kernel/sys.c b/kernel/sys.c
index 0bcaed6560ac..c80412be2302 100644
--- a/kernel/sys.c
+++ b/kernel/sys.c
@@ -1711,7 +1711,6 @@ asmlinkage long sys_prctl(int option, unsigned long arg2, unsigned long arg3,
1711 unsigned long arg4, unsigned long arg5) 1711 unsigned long arg4, unsigned long arg5)
1712{ 1712{
1713 long error; 1713 long error;
1714 int sig;
1715 1714
1716 error = security_task_prctl(option, arg2, arg3, arg4, arg5); 1715 error = security_task_prctl(option, arg2, arg3, arg4, arg5);
1717 if (error) 1716 if (error)
@@ -1719,12 +1718,11 @@ asmlinkage long sys_prctl(int option, unsigned long arg2, unsigned long arg3,
1719 1718
1720 switch (option) { 1719 switch (option) {
1721 case PR_SET_PDEATHSIG: 1720 case PR_SET_PDEATHSIG:
1722 sig = arg2; 1721 if (!valid_signal(arg2)) {
1723 if (!valid_signal(sig)) {
1724 error = -EINVAL; 1722 error = -EINVAL;
1725 break; 1723 break;
1726 } 1724 }
1727 current->pdeath_signal = sig; 1725 current->pdeath_signal = arg2;
1728 break; 1726 break;
1729 case PR_GET_PDEATHSIG: 1727 case PR_GET_PDEATHSIG:
1730 error = put_user(current->pdeath_signal, (int __user *)arg2); 1728 error = put_user(current->pdeath_signal, (int __user *)arg2);
diff --git a/kernel/timer.c b/kernel/timer.c
index 5377f40723ff..13e2b513be01 100644
--- a/kernel/timer.c
+++ b/kernel/timer.c
@@ -950,6 +950,7 @@ void do_timer(struct pt_regs *regs)
950{ 950{
951 jiffies_64++; 951 jiffies_64++;
952 update_times(); 952 update_times();
953 softlockup_tick(regs);
953} 954}
954 955
955#ifdef __ARCH_WANT_SYS_ALARM 956#ifdef __ARCH_WANT_SYS_ALARM
@@ -1428,7 +1429,7 @@ static inline u64 time_interpolator_get_cycles(unsigned int src)
1428 } 1429 }
1429} 1430}
1430 1431
1431static inline u64 time_interpolator_get_counter(void) 1432static inline u64 time_interpolator_get_counter(int writelock)
1432{ 1433{
1433 unsigned int src = time_interpolator->source; 1434 unsigned int src = time_interpolator->source;
1434 1435
@@ -1442,6 +1443,15 @@ static inline u64 time_interpolator_get_counter(void)
1442 now = time_interpolator_get_cycles(src); 1443 now = time_interpolator_get_cycles(src);
1443 if (lcycle && time_after(lcycle, now)) 1444 if (lcycle && time_after(lcycle, now))
1444 return lcycle; 1445 return lcycle;
1446
1447 /* When holding the xtime write lock, there's no need
1448 * to add the overhead of the cmpxchg. Readers are
1449 * force to retry until the write lock is released.
1450 */
1451 if (writelock) {
1452 time_interpolator->last_cycle = now;
1453 return now;
1454 }
1445 /* Keep track of the last timer value returned. The use of cmpxchg here 1455 /* Keep track of the last timer value returned. The use of cmpxchg here
1446 * will cause contention in an SMP environment. 1456 * will cause contention in an SMP environment.
1447 */ 1457 */
@@ -1455,7 +1465,7 @@ static inline u64 time_interpolator_get_counter(void)
1455void time_interpolator_reset(void) 1465void time_interpolator_reset(void)
1456{ 1466{
1457 time_interpolator->offset = 0; 1467 time_interpolator->offset = 0;
1458 time_interpolator->last_counter = time_interpolator_get_counter(); 1468 time_interpolator->last_counter = time_interpolator_get_counter(1);
1459} 1469}
1460 1470
1461#define GET_TI_NSECS(count,i) (((((count) - i->last_counter) & (i)->mask) * (i)->nsec_per_cyc) >> (i)->shift) 1471#define GET_TI_NSECS(count,i) (((((count) - i->last_counter) & (i)->mask) * (i)->nsec_per_cyc) >> (i)->shift)
@@ -1467,7 +1477,7 @@ unsigned long time_interpolator_get_offset(void)
1467 return 0; 1477 return 0;
1468 1478
1469 return time_interpolator->offset + 1479 return time_interpolator->offset +
1470 GET_TI_NSECS(time_interpolator_get_counter(), time_interpolator); 1480 GET_TI_NSECS(time_interpolator_get_counter(0), time_interpolator);
1471} 1481}
1472 1482
1473#define INTERPOLATOR_ADJUST 65536 1483#define INTERPOLATOR_ADJUST 65536
@@ -1490,7 +1500,7 @@ static void time_interpolator_update(long delta_nsec)
1490 * and the tuning logic insures that. 1500 * and the tuning logic insures that.
1491 */ 1501 */
1492 1502
1493 counter = time_interpolator_get_counter(); 1503 counter = time_interpolator_get_counter(1);
1494 offset = time_interpolator->offset + GET_TI_NSECS(counter, time_interpolator); 1504 offset = time_interpolator->offset + GET_TI_NSECS(counter, time_interpolator);
1495 1505
1496 if (delta_nsec < 0 || (unsigned long) delta_nsec < offset) 1506 if (delta_nsec < 0 || (unsigned long) delta_nsec < offset)
diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index c7e36d4a70ca..91bacb13a7e2 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -308,10 +308,9 @@ struct workqueue_struct *__create_workqueue(const char *name,
308 struct workqueue_struct *wq; 308 struct workqueue_struct *wq;
309 struct task_struct *p; 309 struct task_struct *p;
310 310
311 wq = kmalloc(sizeof(*wq), GFP_KERNEL); 311 wq = kzalloc(sizeof(*wq), GFP_KERNEL);
312 if (!wq) 312 if (!wq)
313 return NULL; 313 return NULL;
314 memset(wq, 0, sizeof(*wq));
315 314
316 wq->name = name; 315 wq->name = name;
317 /* We don't need the distraction of CPUs appearing and vanishing. */ 316 /* We don't need the distraction of CPUs appearing and vanishing. */
@@ -499,7 +498,7 @@ static int __devinit workqueue_cpu_callback(struct notifier_block *nfb,
499 case CPU_UP_PREPARE: 498 case CPU_UP_PREPARE:
500 /* Create a new workqueue thread for it. */ 499 /* Create a new workqueue thread for it. */
501 list_for_each_entry(wq, &workqueues, list) { 500 list_for_each_entry(wq, &workqueues, list) {
502 if (create_workqueue_thread(wq, hotcpu) < 0) { 501 if (!create_workqueue_thread(wq, hotcpu)) {
503 printk("workqueue for %i failed\n", hotcpu); 502 printk("workqueue for %i failed\n", hotcpu);
504 return NOTIFY_BAD; 503 return NOTIFY_BAD;
505 } 504 }