diff options
Diffstat (limited to 'kernel')
-rw-r--r-- | kernel/Makefile | 1 | ||||
-rw-r--r-- | kernel/acct.c | 2 | ||||
-rw-r--r-- | kernel/cpuset.c | 125 | ||||
-rw-r--r-- | kernel/futex.c | 137 | ||||
-rw-r--r-- | kernel/intermodule.c | 3 | ||||
-rw-r--r-- | kernel/irq/handle.c | 2 | ||||
-rw-r--r-- | kernel/irq/manage.c | 4 | ||||
-rw-r--r-- | kernel/irq/proc.c | 14 | ||||
-rw-r--r-- | kernel/kprobes.c | 94 | ||||
-rw-r--r-- | kernel/module.c | 33 | ||||
-rw-r--r-- | kernel/params.c | 4 | ||||
-rw-r--r-- | kernel/posix-timers.c | 28 | ||||
-rw-r--r-- | kernel/power/Kconfig | 2 | ||||
-rw-r--r-- | kernel/power/pm.c | 3 | ||||
-rw-r--r-- | kernel/power/swsusp.c | 1 | ||||
-rw-r--r-- | kernel/printk.c | 13 | ||||
-rw-r--r-- | kernel/ptrace.c | 41 | ||||
-rw-r--r-- | kernel/resource.c | 3 | ||||
-rw-r--r-- | kernel/sched.c | 339 | ||||
-rw-r--r-- | kernel/signal.c | 83 | ||||
-rw-r--r-- | kernel/softlockup.c | 151 | ||||
-rw-r--r-- | kernel/sys.c | 6 | ||||
-rw-r--r-- | kernel/timer.c | 18 | ||||
-rw-r--r-- | kernel/workqueue.c | 5 |
24 files changed, 887 insertions, 225 deletions
diff --git a/kernel/Makefile b/kernel/Makefile index cb05cd05d237..8d57a2f1226b 100644 --- a/kernel/Makefile +++ b/kernel/Makefile | |||
@@ -27,6 +27,7 @@ obj-$(CONFIG_AUDIT) += audit.o | |||
27 | obj-$(CONFIG_AUDITSYSCALL) += auditsc.o | 27 | obj-$(CONFIG_AUDITSYSCALL) += auditsc.o |
28 | obj-$(CONFIG_KPROBES) += kprobes.o | 28 | obj-$(CONFIG_KPROBES) += kprobes.o |
29 | obj-$(CONFIG_SYSFS) += ksysfs.o | 29 | obj-$(CONFIG_SYSFS) += ksysfs.o |
30 | obj-$(CONFIG_DETECT_SOFTLOCKUP) += softlockup.o | ||
30 | obj-$(CONFIG_GENERIC_HARDIRQS) += irq/ | 31 | obj-$(CONFIG_GENERIC_HARDIRQS) += irq/ |
31 | obj-$(CONFIG_CRASH_DUMP) += crash_dump.o | 32 | obj-$(CONFIG_CRASH_DUMP) += crash_dump.o |
32 | obj-$(CONFIG_SECCOMP) += seccomp.o | 33 | obj-$(CONFIG_SECCOMP) += seccomp.o |
diff --git a/kernel/acct.c b/kernel/acct.c index 4168f631868e..f70e6027cca9 100644 --- a/kernel/acct.c +++ b/kernel/acct.c | |||
@@ -220,7 +220,7 @@ asmlinkage long sys_acct(const char __user *name) | |||
220 | return (PTR_ERR(tmp)); | 220 | return (PTR_ERR(tmp)); |
221 | } | 221 | } |
222 | /* Difference from BSD - they don't do O_APPEND */ | 222 | /* Difference from BSD - they don't do O_APPEND */ |
223 | file = filp_open(tmp, O_WRONLY|O_APPEND, 0); | 223 | file = filp_open(tmp, O_WRONLY|O_APPEND|O_LARGEFILE, 0); |
224 | putname(tmp); | 224 | putname(tmp); |
225 | if (IS_ERR(file)) { | 225 | if (IS_ERR(file)) { |
226 | return (PTR_ERR(file)); | 226 | return (PTR_ERR(file)); |
diff --git a/kernel/cpuset.c b/kernel/cpuset.c index 8ab1b4e518b8..1f06e7690106 100644 --- a/kernel/cpuset.c +++ b/kernel/cpuset.c | |||
@@ -628,13 +628,6 @@ static int validate_change(const struct cpuset *cur, const struct cpuset *trial) | |||
628 | * lock_cpu_hotplug()/unlock_cpu_hotplug() pair. | 628 | * lock_cpu_hotplug()/unlock_cpu_hotplug() pair. |
629 | */ | 629 | */ |
630 | 630 | ||
631 | /* | ||
632 | * Hack to avoid 2.6.13 partial node dynamic sched domain bug. | ||
633 | * Disable letting 'cpu_exclusive' cpusets define dynamic sched | ||
634 | * domains, until the sched domain can handle partial nodes. | ||
635 | * Remove this #if hackery when sched domains fixed. | ||
636 | */ | ||
637 | #if 0 | ||
638 | static void update_cpu_domains(struct cpuset *cur) | 631 | static void update_cpu_domains(struct cpuset *cur) |
639 | { | 632 | { |
640 | struct cpuset *c, *par = cur->parent; | 633 | struct cpuset *c, *par = cur->parent; |
@@ -675,11 +668,6 @@ static void update_cpu_domains(struct cpuset *cur) | |||
675 | partition_sched_domains(&pspan, &cspan); | 668 | partition_sched_domains(&pspan, &cspan); |
676 | unlock_cpu_hotplug(); | 669 | unlock_cpu_hotplug(); |
677 | } | 670 | } |
678 | #else | ||
679 | static void update_cpu_domains(struct cpuset *cur) | ||
680 | { | ||
681 | } | ||
682 | #endif | ||
683 | 671 | ||
684 | static int update_cpumask(struct cpuset *cs, char *buf) | 672 | static int update_cpumask(struct cpuset *cs, char *buf) |
685 | { | 673 | { |
@@ -1611,17 +1599,114 @@ int cpuset_zonelist_valid_mems_allowed(struct zonelist *zl) | |||
1611 | return 0; | 1599 | return 0; |
1612 | } | 1600 | } |
1613 | 1601 | ||
1602 | /* | ||
1603 | * nearest_exclusive_ancestor() - Returns the nearest mem_exclusive | ||
1604 | * ancestor to the specified cpuset. Call while holding cpuset_sem. | ||
1605 | * If no ancestor is mem_exclusive (an unusual configuration), then | ||
1606 | * returns the root cpuset. | ||
1607 | */ | ||
1608 | static const struct cpuset *nearest_exclusive_ancestor(const struct cpuset *cs) | ||
1609 | { | ||
1610 | while (!is_mem_exclusive(cs) && cs->parent) | ||
1611 | cs = cs->parent; | ||
1612 | return cs; | ||
1613 | } | ||
1614 | |||
1614 | /** | 1615 | /** |
1615 | * cpuset_zone_allowed - is zone z allowed in current->mems_allowed | 1616 | * cpuset_zone_allowed - Can we allocate memory on zone z's memory node? |
1616 | * @z: zone in question | 1617 | * @z: is this zone on an allowed node? |
1618 | * @gfp_mask: memory allocation flags (we use __GFP_HARDWALL) | ||
1617 | * | 1619 | * |
1618 | * Is zone z allowed in current->mems_allowed, or is | 1620 | * If we're in interrupt, yes, we can always allocate. If zone |
1619 | * the CPU in interrupt context? (zone is always allowed in this case) | 1621 | * z's node is in our tasks mems_allowed, yes. If it's not a |
1620 | */ | 1622 | * __GFP_HARDWALL request and this zone's nodes is in the nearest |
1621 | int cpuset_zone_allowed(struct zone *z) | 1623 | * mem_exclusive cpuset ancestor to this tasks cpuset, yes. |
1624 | * Otherwise, no. | ||
1625 | * | ||
1626 | * GFP_USER allocations are marked with the __GFP_HARDWALL bit, | ||
1627 | * and do not allow allocations outside the current tasks cpuset. | ||
1628 | * GFP_KERNEL allocations are not so marked, so can escape to the | ||
1629 | * nearest mem_exclusive ancestor cpuset. | ||
1630 | * | ||
1631 | * Scanning up parent cpusets requires cpuset_sem. The __alloc_pages() | ||
1632 | * routine only calls here with __GFP_HARDWALL bit _not_ set if | ||
1633 | * it's a GFP_KERNEL allocation, and all nodes in the current tasks | ||
1634 | * mems_allowed came up empty on the first pass over the zonelist. | ||
1635 | * So only GFP_KERNEL allocations, if all nodes in the cpuset are | ||
1636 | * short of memory, might require taking the cpuset_sem semaphore. | ||
1637 | * | ||
1638 | * The first loop over the zonelist in mm/page_alloc.c:__alloc_pages() | ||
1639 | * calls here with __GFP_HARDWALL always set in gfp_mask, enforcing | ||
1640 | * hardwall cpusets - no allocation on a node outside the cpuset is | ||
1641 | * allowed (unless in interrupt, of course). | ||
1642 | * | ||
1643 | * The second loop doesn't even call here for GFP_ATOMIC requests | ||
1644 | * (if the __alloc_pages() local variable 'wait' is set). That check | ||
1645 | * and the checks below have the combined affect in the second loop of | ||
1646 | * the __alloc_pages() routine that: | ||
1647 | * in_interrupt - any node ok (current task context irrelevant) | ||
1648 | * GFP_ATOMIC - any node ok | ||
1649 | * GFP_KERNEL - any node in enclosing mem_exclusive cpuset ok | ||
1650 | * GFP_USER - only nodes in current tasks mems allowed ok. | ||
1651 | **/ | ||
1652 | |||
1653 | int cpuset_zone_allowed(struct zone *z, unsigned int __nocast gfp_mask) | ||
1622 | { | 1654 | { |
1623 | return in_interrupt() || | 1655 | int node; /* node that zone z is on */ |
1624 | node_isset(z->zone_pgdat->node_id, current->mems_allowed); | 1656 | const struct cpuset *cs; /* current cpuset ancestors */ |
1657 | int allowed = 1; /* is allocation in zone z allowed? */ | ||
1658 | |||
1659 | if (in_interrupt()) | ||
1660 | return 1; | ||
1661 | node = z->zone_pgdat->node_id; | ||
1662 | if (node_isset(node, current->mems_allowed)) | ||
1663 | return 1; | ||
1664 | if (gfp_mask & __GFP_HARDWALL) /* If hardwall request, stop here */ | ||
1665 | return 0; | ||
1666 | |||
1667 | /* Not hardwall and node outside mems_allowed: scan up cpusets */ | ||
1668 | down(&cpuset_sem); | ||
1669 | cs = current->cpuset; | ||
1670 | if (!cs) | ||
1671 | goto done; /* current task exiting */ | ||
1672 | cs = nearest_exclusive_ancestor(cs); | ||
1673 | allowed = node_isset(node, cs->mems_allowed); | ||
1674 | done: | ||
1675 | up(&cpuset_sem); | ||
1676 | return allowed; | ||
1677 | } | ||
1678 | |||
1679 | /** | ||
1680 | * cpuset_excl_nodes_overlap - Do we overlap @p's mem_exclusive ancestors? | ||
1681 | * @p: pointer to task_struct of some other task. | ||
1682 | * | ||
1683 | * Description: Return true if the nearest mem_exclusive ancestor | ||
1684 | * cpusets of tasks @p and current overlap. Used by oom killer to | ||
1685 | * determine if task @p's memory usage might impact the memory | ||
1686 | * available to the current task. | ||
1687 | * | ||
1688 | * Acquires cpuset_sem - not suitable for calling from a fast path. | ||
1689 | **/ | ||
1690 | |||
1691 | int cpuset_excl_nodes_overlap(const struct task_struct *p) | ||
1692 | { | ||
1693 | const struct cpuset *cs1, *cs2; /* my and p's cpuset ancestors */ | ||
1694 | int overlap = 0; /* do cpusets overlap? */ | ||
1695 | |||
1696 | down(&cpuset_sem); | ||
1697 | cs1 = current->cpuset; | ||
1698 | if (!cs1) | ||
1699 | goto done; /* current task exiting */ | ||
1700 | cs2 = p->cpuset; | ||
1701 | if (!cs2) | ||
1702 | goto done; /* task p is exiting */ | ||
1703 | cs1 = nearest_exclusive_ancestor(cs1); | ||
1704 | cs2 = nearest_exclusive_ancestor(cs2); | ||
1705 | overlap = nodes_intersects(cs1->mems_allowed, cs2->mems_allowed); | ||
1706 | done: | ||
1707 | up(&cpuset_sem); | ||
1708 | |||
1709 | return overlap; | ||
1625 | } | 1710 | } |
1626 | 1711 | ||
1627 | /* | 1712 | /* |
diff --git a/kernel/futex.c b/kernel/futex.c index c7130f86106c..ca05fe6a70b2 100644 --- a/kernel/futex.c +++ b/kernel/futex.c | |||
@@ -40,6 +40,7 @@ | |||
40 | #include <linux/pagemap.h> | 40 | #include <linux/pagemap.h> |
41 | #include <linux/syscalls.h> | 41 | #include <linux/syscalls.h> |
42 | #include <linux/signal.h> | 42 | #include <linux/signal.h> |
43 | #include <asm/futex.h> | ||
43 | 44 | ||
44 | #define FUTEX_HASHBITS (CONFIG_BASE_SMALL ? 4 : 8) | 45 | #define FUTEX_HASHBITS (CONFIG_BASE_SMALL ? 4 : 8) |
45 | 46 | ||
@@ -327,6 +328,118 @@ out: | |||
327 | } | 328 | } |
328 | 329 | ||
329 | /* | 330 | /* |
331 | * Wake up all waiters hashed on the physical page that is mapped | ||
332 | * to this virtual address: | ||
333 | */ | ||
334 | static int futex_wake_op(unsigned long uaddr1, unsigned long uaddr2, int nr_wake, int nr_wake2, int op) | ||
335 | { | ||
336 | union futex_key key1, key2; | ||
337 | struct futex_hash_bucket *bh1, *bh2; | ||
338 | struct list_head *head; | ||
339 | struct futex_q *this, *next; | ||
340 | int ret, op_ret, attempt = 0; | ||
341 | |||
342 | retryfull: | ||
343 | down_read(¤t->mm->mmap_sem); | ||
344 | |||
345 | ret = get_futex_key(uaddr1, &key1); | ||
346 | if (unlikely(ret != 0)) | ||
347 | goto out; | ||
348 | ret = get_futex_key(uaddr2, &key2); | ||
349 | if (unlikely(ret != 0)) | ||
350 | goto out; | ||
351 | |||
352 | bh1 = hash_futex(&key1); | ||
353 | bh2 = hash_futex(&key2); | ||
354 | |||
355 | retry: | ||
356 | if (bh1 < bh2) | ||
357 | spin_lock(&bh1->lock); | ||
358 | spin_lock(&bh2->lock); | ||
359 | if (bh1 > bh2) | ||
360 | spin_lock(&bh1->lock); | ||
361 | |||
362 | op_ret = futex_atomic_op_inuser(op, (int __user *)uaddr2); | ||
363 | if (unlikely(op_ret < 0)) { | ||
364 | int dummy; | ||
365 | |||
366 | spin_unlock(&bh1->lock); | ||
367 | if (bh1 != bh2) | ||
368 | spin_unlock(&bh2->lock); | ||
369 | |||
370 | /* futex_atomic_op_inuser needs to both read and write | ||
371 | * *(int __user *)uaddr2, but we can't modify it | ||
372 | * non-atomically. Therefore, if get_user below is not | ||
373 | * enough, we need to handle the fault ourselves, while | ||
374 | * still holding the mmap_sem. */ | ||
375 | if (attempt++) { | ||
376 | struct vm_area_struct * vma; | ||
377 | struct mm_struct *mm = current->mm; | ||
378 | |||
379 | ret = -EFAULT; | ||
380 | if (attempt >= 2 || | ||
381 | !(vma = find_vma(mm, uaddr2)) || | ||
382 | vma->vm_start > uaddr2 || | ||
383 | !(vma->vm_flags & VM_WRITE)) | ||
384 | goto out; | ||
385 | |||
386 | switch (handle_mm_fault(mm, vma, uaddr2, 1)) { | ||
387 | case VM_FAULT_MINOR: | ||
388 | current->min_flt++; | ||
389 | break; | ||
390 | case VM_FAULT_MAJOR: | ||
391 | current->maj_flt++; | ||
392 | break; | ||
393 | default: | ||
394 | goto out; | ||
395 | } | ||
396 | goto retry; | ||
397 | } | ||
398 | |||
399 | /* If we would have faulted, release mmap_sem, | ||
400 | * fault it in and start all over again. */ | ||
401 | up_read(¤t->mm->mmap_sem); | ||
402 | |||
403 | ret = get_user(dummy, (int __user *)uaddr2); | ||
404 | if (ret) | ||
405 | return ret; | ||
406 | |||
407 | goto retryfull; | ||
408 | } | ||
409 | |||
410 | head = &bh1->chain; | ||
411 | |||
412 | list_for_each_entry_safe(this, next, head, list) { | ||
413 | if (match_futex (&this->key, &key1)) { | ||
414 | wake_futex(this); | ||
415 | if (++ret >= nr_wake) | ||
416 | break; | ||
417 | } | ||
418 | } | ||
419 | |||
420 | if (op_ret > 0) { | ||
421 | head = &bh2->chain; | ||
422 | |||
423 | op_ret = 0; | ||
424 | list_for_each_entry_safe(this, next, head, list) { | ||
425 | if (match_futex (&this->key, &key2)) { | ||
426 | wake_futex(this); | ||
427 | if (++op_ret >= nr_wake2) | ||
428 | break; | ||
429 | } | ||
430 | } | ||
431 | ret += op_ret; | ||
432 | } | ||
433 | |||
434 | spin_unlock(&bh1->lock); | ||
435 | if (bh1 != bh2) | ||
436 | spin_unlock(&bh2->lock); | ||
437 | out: | ||
438 | up_read(¤t->mm->mmap_sem); | ||
439 | return ret; | ||
440 | } | ||
441 | |||
442 | /* | ||
330 | * Requeue all waiters hashed on one physical page to another | 443 | * Requeue all waiters hashed on one physical page to another |
331 | * physical page. | 444 | * physical page. |
332 | */ | 445 | */ |
@@ -673,23 +786,17 @@ static int futex_fd(unsigned long uaddr, int signal) | |||
673 | filp->f_mapping = filp->f_dentry->d_inode->i_mapping; | 786 | filp->f_mapping = filp->f_dentry->d_inode->i_mapping; |
674 | 787 | ||
675 | if (signal) { | 788 | if (signal) { |
676 | int err; | ||
677 | err = f_setown(filp, current->pid, 1); | 789 | err = f_setown(filp, current->pid, 1); |
678 | if (err < 0) { | 790 | if (err < 0) { |
679 | put_unused_fd(ret); | 791 | goto error; |
680 | put_filp(filp); | ||
681 | ret = err; | ||
682 | goto out; | ||
683 | } | 792 | } |
684 | filp->f_owner.signum = signal; | 793 | filp->f_owner.signum = signal; |
685 | } | 794 | } |
686 | 795 | ||
687 | q = kmalloc(sizeof(*q), GFP_KERNEL); | 796 | q = kmalloc(sizeof(*q), GFP_KERNEL); |
688 | if (!q) { | 797 | if (!q) { |
689 | put_unused_fd(ret); | 798 | err = -ENOMEM; |
690 | put_filp(filp); | 799 | goto error; |
691 | ret = -ENOMEM; | ||
692 | goto out; | ||
693 | } | 800 | } |
694 | 801 | ||
695 | down_read(¤t->mm->mmap_sem); | 802 | down_read(¤t->mm->mmap_sem); |
@@ -697,10 +804,8 @@ static int futex_fd(unsigned long uaddr, int signal) | |||
697 | 804 | ||
698 | if (unlikely(err != 0)) { | 805 | if (unlikely(err != 0)) { |
699 | up_read(¤t->mm->mmap_sem); | 806 | up_read(¤t->mm->mmap_sem); |
700 | put_unused_fd(ret); | ||
701 | put_filp(filp); | ||
702 | kfree(q); | 807 | kfree(q); |
703 | return err; | 808 | goto error; |
704 | } | 809 | } |
705 | 810 | ||
706 | /* | 811 | /* |
@@ -716,6 +821,11 @@ static int futex_fd(unsigned long uaddr, int signal) | |||
716 | fd_install(ret, filp); | 821 | fd_install(ret, filp); |
717 | out: | 822 | out: |
718 | return ret; | 823 | return ret; |
824 | error: | ||
825 | put_unused_fd(ret); | ||
826 | put_filp(filp); | ||
827 | ret = err; | ||
828 | goto out; | ||
719 | } | 829 | } |
720 | 830 | ||
721 | long do_futex(unsigned long uaddr, int op, int val, unsigned long timeout, | 831 | long do_futex(unsigned long uaddr, int op, int val, unsigned long timeout, |
@@ -740,6 +850,9 @@ long do_futex(unsigned long uaddr, int op, int val, unsigned long timeout, | |||
740 | case FUTEX_CMP_REQUEUE: | 850 | case FUTEX_CMP_REQUEUE: |
741 | ret = futex_requeue(uaddr, uaddr2, val, val2, &val3); | 851 | ret = futex_requeue(uaddr, uaddr2, val, val2, &val3); |
742 | break; | 852 | break; |
853 | case FUTEX_WAKE_OP: | ||
854 | ret = futex_wake_op(uaddr, uaddr2, val, val2, val3); | ||
855 | break; | ||
743 | default: | 856 | default: |
744 | ret = -ENOSYS; | 857 | ret = -ENOSYS; |
745 | } | 858 | } |
diff --git a/kernel/intermodule.c b/kernel/intermodule.c index 388977f3e9b7..0cbe633420fb 100644 --- a/kernel/intermodule.c +++ b/kernel/intermodule.c | |||
@@ -39,7 +39,7 @@ void inter_module_register(const char *im_name, struct module *owner, const void | |||
39 | struct list_head *tmp; | 39 | struct list_head *tmp; |
40 | struct inter_module_entry *ime, *ime_new; | 40 | struct inter_module_entry *ime, *ime_new; |
41 | 41 | ||
42 | if (!(ime_new = kmalloc(sizeof(*ime), GFP_KERNEL))) { | 42 | if (!(ime_new = kzalloc(sizeof(*ime), GFP_KERNEL))) { |
43 | /* Overloaded kernel, not fatal */ | 43 | /* Overloaded kernel, not fatal */ |
44 | printk(KERN_ERR | 44 | printk(KERN_ERR |
45 | "Aiee, inter_module_register: cannot kmalloc entry for '%s'\n", | 45 | "Aiee, inter_module_register: cannot kmalloc entry for '%s'\n", |
@@ -47,7 +47,6 @@ void inter_module_register(const char *im_name, struct module *owner, const void | |||
47 | kmalloc_failed = 1; | 47 | kmalloc_failed = 1; |
48 | return; | 48 | return; |
49 | } | 49 | } |
50 | memset(ime_new, 0, sizeof(*ime_new)); | ||
51 | ime_new->im_name = im_name; | 50 | ime_new->im_name = im_name; |
52 | ime_new->owner = owner; | 51 | ime_new->owner = owner; |
53 | ime_new->userdata = userdata; | 52 | ime_new->userdata = userdata; |
diff --git a/kernel/irq/handle.c b/kernel/irq/handle.c index c29f83c16497..3ff7b925c387 100644 --- a/kernel/irq/handle.c +++ b/kernel/irq/handle.c | |||
@@ -111,7 +111,7 @@ fastcall unsigned int __do_IRQ(unsigned int irq, struct pt_regs *regs) | |||
111 | unsigned int status; | 111 | unsigned int status; |
112 | 112 | ||
113 | kstat_this_cpu.irqs[irq]++; | 113 | kstat_this_cpu.irqs[irq]++; |
114 | if (desc->status & IRQ_PER_CPU) { | 114 | if (CHECK_IRQ_PER_CPU(desc->status)) { |
115 | irqreturn_t action_ret; | 115 | irqreturn_t action_ret; |
116 | 116 | ||
117 | /* | 117 | /* |
diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c index ac6700985705..1cfdb08ddf20 100644 --- a/kernel/irq/manage.c +++ b/kernel/irq/manage.c | |||
@@ -18,6 +18,10 @@ | |||
18 | 18 | ||
19 | cpumask_t irq_affinity[NR_IRQS] = { [0 ... NR_IRQS-1] = CPU_MASK_ALL }; | 19 | cpumask_t irq_affinity[NR_IRQS] = { [0 ... NR_IRQS-1] = CPU_MASK_ALL }; |
20 | 20 | ||
21 | #if defined (CONFIG_GENERIC_PENDING_IRQ) || defined (CONFIG_IRQBALANCE) | ||
22 | cpumask_t __cacheline_aligned pending_irq_cpumask[NR_IRQS]; | ||
23 | #endif | ||
24 | |||
21 | /** | 25 | /** |
22 | * synchronize_irq - wait for pending IRQ handlers (on other CPUs) | 26 | * synchronize_irq - wait for pending IRQ handlers (on other CPUs) |
23 | * | 27 | * |
diff --git a/kernel/irq/proc.c b/kernel/irq/proc.c index 85d08daa6600..f26e534c6585 100644 --- a/kernel/irq/proc.c +++ b/kernel/irq/proc.c | |||
@@ -19,12 +19,22 @@ static struct proc_dir_entry *root_irq_dir, *irq_dir[NR_IRQS]; | |||
19 | */ | 19 | */ |
20 | static struct proc_dir_entry *smp_affinity_entry[NR_IRQS]; | 20 | static struct proc_dir_entry *smp_affinity_entry[NR_IRQS]; |
21 | 21 | ||
22 | void __attribute__((weak)) | 22 | #ifdef CONFIG_GENERIC_PENDING_IRQ |
23 | proc_set_irq_affinity(unsigned int irq, cpumask_t mask_val) | 23 | void proc_set_irq_affinity(unsigned int irq, cpumask_t mask_val) |
24 | { | ||
25 | /* | ||
26 | * Save these away for later use. Re-progam when the | ||
27 | * interrupt is pending | ||
28 | */ | ||
29 | set_pending_irq(irq, mask_val); | ||
30 | } | ||
31 | #else | ||
32 | void proc_set_irq_affinity(unsigned int irq, cpumask_t mask_val) | ||
24 | { | 33 | { |
25 | irq_affinity[irq] = mask_val; | 34 | irq_affinity[irq] = mask_val; |
26 | irq_desc[irq].handler->set_affinity(irq, mask_val); | 35 | irq_desc[irq].handler->set_affinity(irq, mask_val); |
27 | } | 36 | } |
37 | #endif | ||
28 | 38 | ||
29 | static int irq_affinity_read_proc(char *page, char **start, off_t off, | 39 | static int irq_affinity_read_proc(char *page, char **start, off_t off, |
30 | int count, int *eof, void *data) | 40 | int count, int *eof, void *data) |
diff --git a/kernel/kprobes.c b/kernel/kprobes.c index b0237122b24e..f3ea492ab44d 100644 --- a/kernel/kprobes.c +++ b/kernel/kprobes.c | |||
@@ -37,6 +37,7 @@ | |||
37 | #include <linux/init.h> | 37 | #include <linux/init.h> |
38 | #include <linux/module.h> | 38 | #include <linux/module.h> |
39 | #include <linux/moduleloader.h> | 39 | #include <linux/moduleloader.h> |
40 | #include <asm-generic/sections.h> | ||
40 | #include <asm/cacheflush.h> | 41 | #include <asm/cacheflush.h> |
41 | #include <asm/errno.h> | 42 | #include <asm/errno.h> |
42 | #include <asm/kdebug.h> | 43 | #include <asm/kdebug.h> |
@@ -72,7 +73,7 @@ static struct hlist_head kprobe_insn_pages; | |||
72 | * get_insn_slot() - Find a slot on an executable page for an instruction. | 73 | * get_insn_slot() - Find a slot on an executable page for an instruction. |
73 | * We allocate an executable page if there's no room on existing ones. | 74 | * We allocate an executable page if there's no room on existing ones. |
74 | */ | 75 | */ |
75 | kprobe_opcode_t *get_insn_slot(void) | 76 | kprobe_opcode_t __kprobes *get_insn_slot(void) |
76 | { | 77 | { |
77 | struct kprobe_insn_page *kip; | 78 | struct kprobe_insn_page *kip; |
78 | struct hlist_node *pos; | 79 | struct hlist_node *pos; |
@@ -117,7 +118,7 @@ kprobe_opcode_t *get_insn_slot(void) | |||
117 | return kip->insns; | 118 | return kip->insns; |
118 | } | 119 | } |
119 | 120 | ||
120 | void free_insn_slot(kprobe_opcode_t *slot) | 121 | void __kprobes free_insn_slot(kprobe_opcode_t *slot) |
121 | { | 122 | { |
122 | struct kprobe_insn_page *kip; | 123 | struct kprobe_insn_page *kip; |
123 | struct hlist_node *pos; | 124 | struct hlist_node *pos; |
@@ -152,20 +153,42 @@ void free_insn_slot(kprobe_opcode_t *slot) | |||
152 | } | 153 | } |
153 | 154 | ||
154 | /* Locks kprobe: irqs must be disabled */ | 155 | /* Locks kprobe: irqs must be disabled */ |
155 | void lock_kprobes(void) | 156 | void __kprobes lock_kprobes(void) |
156 | { | 157 | { |
158 | unsigned long flags = 0; | ||
159 | |||
160 | /* Avoiding local interrupts to happen right after we take the kprobe_lock | ||
161 | * and before we get a chance to update kprobe_cpu, this to prevent | ||
162 | * deadlock when we have a kprobe on ISR routine and a kprobe on task | ||
163 | * routine | ||
164 | */ | ||
165 | local_irq_save(flags); | ||
166 | |||
157 | spin_lock(&kprobe_lock); | 167 | spin_lock(&kprobe_lock); |
158 | kprobe_cpu = smp_processor_id(); | 168 | kprobe_cpu = smp_processor_id(); |
169 | |||
170 | local_irq_restore(flags); | ||
159 | } | 171 | } |
160 | 172 | ||
161 | void unlock_kprobes(void) | 173 | void __kprobes unlock_kprobes(void) |
162 | { | 174 | { |
175 | unsigned long flags = 0; | ||
176 | |||
177 | /* Avoiding local interrupts to happen right after we update | ||
178 | * kprobe_cpu and before we get a a chance to release kprobe_lock, | ||
179 | * this to prevent deadlock when we have a kprobe on ISR routine and | ||
180 | * a kprobe on task routine | ||
181 | */ | ||
182 | local_irq_save(flags); | ||
183 | |||
163 | kprobe_cpu = NR_CPUS; | 184 | kprobe_cpu = NR_CPUS; |
164 | spin_unlock(&kprobe_lock); | 185 | spin_unlock(&kprobe_lock); |
186 | |||
187 | local_irq_restore(flags); | ||
165 | } | 188 | } |
166 | 189 | ||
167 | /* You have to be holding the kprobe_lock */ | 190 | /* You have to be holding the kprobe_lock */ |
168 | struct kprobe *get_kprobe(void *addr) | 191 | struct kprobe __kprobes *get_kprobe(void *addr) |
169 | { | 192 | { |
170 | struct hlist_head *head; | 193 | struct hlist_head *head; |
171 | struct hlist_node *node; | 194 | struct hlist_node *node; |
@@ -183,7 +206,7 @@ struct kprobe *get_kprobe(void *addr) | |||
183 | * Aggregate handlers for multiple kprobes support - these handlers | 206 | * Aggregate handlers for multiple kprobes support - these handlers |
184 | * take care of invoking the individual kprobe handlers on p->list | 207 | * take care of invoking the individual kprobe handlers on p->list |
185 | */ | 208 | */ |
186 | static int aggr_pre_handler(struct kprobe *p, struct pt_regs *regs) | 209 | static int __kprobes aggr_pre_handler(struct kprobe *p, struct pt_regs *regs) |
187 | { | 210 | { |
188 | struct kprobe *kp; | 211 | struct kprobe *kp; |
189 | 212 | ||
@@ -198,8 +221,8 @@ static int aggr_pre_handler(struct kprobe *p, struct pt_regs *regs) | |||
198 | return 0; | 221 | return 0; |
199 | } | 222 | } |
200 | 223 | ||
201 | static void aggr_post_handler(struct kprobe *p, struct pt_regs *regs, | 224 | static void __kprobes aggr_post_handler(struct kprobe *p, struct pt_regs *regs, |
202 | unsigned long flags) | 225 | unsigned long flags) |
203 | { | 226 | { |
204 | struct kprobe *kp; | 227 | struct kprobe *kp; |
205 | 228 | ||
@@ -213,8 +236,8 @@ static void aggr_post_handler(struct kprobe *p, struct pt_regs *regs, | |||
213 | return; | 236 | return; |
214 | } | 237 | } |
215 | 238 | ||
216 | static int aggr_fault_handler(struct kprobe *p, struct pt_regs *regs, | 239 | static int __kprobes aggr_fault_handler(struct kprobe *p, struct pt_regs *regs, |
217 | int trapnr) | 240 | int trapnr) |
218 | { | 241 | { |
219 | /* | 242 | /* |
220 | * if we faulted "during" the execution of a user specified | 243 | * if we faulted "during" the execution of a user specified |
@@ -227,7 +250,7 @@ static int aggr_fault_handler(struct kprobe *p, struct pt_regs *regs, | |||
227 | return 0; | 250 | return 0; |
228 | } | 251 | } |
229 | 252 | ||
230 | static int aggr_break_handler(struct kprobe *p, struct pt_regs *regs) | 253 | static int __kprobes aggr_break_handler(struct kprobe *p, struct pt_regs *regs) |
231 | { | 254 | { |
232 | struct kprobe *kp = curr_kprobe; | 255 | struct kprobe *kp = curr_kprobe; |
233 | if (curr_kprobe && kp->break_handler) { | 256 | if (curr_kprobe && kp->break_handler) { |
@@ -240,7 +263,7 @@ static int aggr_break_handler(struct kprobe *p, struct pt_regs *regs) | |||
240 | return 0; | 263 | return 0; |
241 | } | 264 | } |
242 | 265 | ||
243 | struct kretprobe_instance *get_free_rp_inst(struct kretprobe *rp) | 266 | struct kretprobe_instance __kprobes *get_free_rp_inst(struct kretprobe *rp) |
244 | { | 267 | { |
245 | struct hlist_node *node; | 268 | struct hlist_node *node; |
246 | struct kretprobe_instance *ri; | 269 | struct kretprobe_instance *ri; |
@@ -249,7 +272,8 @@ struct kretprobe_instance *get_free_rp_inst(struct kretprobe *rp) | |||
249 | return NULL; | 272 | return NULL; |
250 | } | 273 | } |
251 | 274 | ||
252 | static struct kretprobe_instance *get_used_rp_inst(struct kretprobe *rp) | 275 | static struct kretprobe_instance __kprobes *get_used_rp_inst(struct kretprobe |
276 | *rp) | ||
253 | { | 277 | { |
254 | struct hlist_node *node; | 278 | struct hlist_node *node; |
255 | struct kretprobe_instance *ri; | 279 | struct kretprobe_instance *ri; |
@@ -258,7 +282,7 @@ static struct kretprobe_instance *get_used_rp_inst(struct kretprobe *rp) | |||
258 | return NULL; | 282 | return NULL; |
259 | } | 283 | } |
260 | 284 | ||
261 | void add_rp_inst(struct kretprobe_instance *ri) | 285 | void __kprobes add_rp_inst(struct kretprobe_instance *ri) |
262 | { | 286 | { |
263 | /* | 287 | /* |
264 | * Remove rp inst off the free list - | 288 | * Remove rp inst off the free list - |
@@ -276,7 +300,7 @@ void add_rp_inst(struct kretprobe_instance *ri) | |||
276 | hlist_add_head(&ri->uflist, &ri->rp->used_instances); | 300 | hlist_add_head(&ri->uflist, &ri->rp->used_instances); |
277 | } | 301 | } |
278 | 302 | ||
279 | void recycle_rp_inst(struct kretprobe_instance *ri) | 303 | void __kprobes recycle_rp_inst(struct kretprobe_instance *ri) |
280 | { | 304 | { |
281 | /* remove rp inst off the rprobe_inst_table */ | 305 | /* remove rp inst off the rprobe_inst_table */ |
282 | hlist_del(&ri->hlist); | 306 | hlist_del(&ri->hlist); |
@@ -291,7 +315,7 @@ void recycle_rp_inst(struct kretprobe_instance *ri) | |||
291 | kfree(ri); | 315 | kfree(ri); |
292 | } | 316 | } |
293 | 317 | ||
294 | struct hlist_head * kretprobe_inst_table_head(struct task_struct *tsk) | 318 | struct hlist_head __kprobes *kretprobe_inst_table_head(struct task_struct *tsk) |
295 | { | 319 | { |
296 | return &kretprobe_inst_table[hash_ptr(tsk, KPROBE_HASH_BITS)]; | 320 | return &kretprobe_inst_table[hash_ptr(tsk, KPROBE_HASH_BITS)]; |
297 | } | 321 | } |
@@ -302,7 +326,7 @@ struct hlist_head * kretprobe_inst_table_head(struct task_struct *tsk) | |||
302 | * instances associated with this task. These left over instances represent | 326 | * instances associated with this task. These left over instances represent |
303 | * probed functions that have been called but will never return. | 327 | * probed functions that have been called but will never return. |
304 | */ | 328 | */ |
305 | void kprobe_flush_task(struct task_struct *tk) | 329 | void __kprobes kprobe_flush_task(struct task_struct *tk) |
306 | { | 330 | { |
307 | struct kretprobe_instance *ri; | 331 | struct kretprobe_instance *ri; |
308 | struct hlist_head *head; | 332 | struct hlist_head *head; |
@@ -322,7 +346,8 @@ void kprobe_flush_task(struct task_struct *tk) | |||
322 | * This kprobe pre_handler is registered with every kretprobe. When probe | 346 | * This kprobe pre_handler is registered with every kretprobe. When probe |
323 | * hits it will set up the return probe. | 347 | * hits it will set up the return probe. |
324 | */ | 348 | */ |
325 | static int pre_handler_kretprobe(struct kprobe *p, struct pt_regs *regs) | 349 | static int __kprobes pre_handler_kretprobe(struct kprobe *p, |
350 | struct pt_regs *regs) | ||
326 | { | 351 | { |
327 | struct kretprobe *rp = container_of(p, struct kretprobe, kp); | 352 | struct kretprobe *rp = container_of(p, struct kretprobe, kp); |
328 | 353 | ||
@@ -353,7 +378,7 @@ static inline void copy_kprobe(struct kprobe *old_p, struct kprobe *p) | |||
353 | * Add the new probe to old_p->list. Fail if this is the | 378 | * Add the new probe to old_p->list. Fail if this is the |
354 | * second jprobe at the address - two jprobes can't coexist | 379 | * second jprobe at the address - two jprobes can't coexist |
355 | */ | 380 | */ |
356 | static int add_new_kprobe(struct kprobe *old_p, struct kprobe *p) | 381 | static int __kprobes add_new_kprobe(struct kprobe *old_p, struct kprobe *p) |
357 | { | 382 | { |
358 | struct kprobe *kp; | 383 | struct kprobe *kp; |
359 | 384 | ||
@@ -395,7 +420,8 @@ static inline void add_aggr_kprobe(struct kprobe *ap, struct kprobe *p) | |||
395 | * the intricacies | 420 | * the intricacies |
396 | * TODO: Move kcalloc outside the spinlock | 421 | * TODO: Move kcalloc outside the spinlock |
397 | */ | 422 | */ |
398 | static int register_aggr_kprobe(struct kprobe *old_p, struct kprobe *p) | 423 | static int __kprobes register_aggr_kprobe(struct kprobe *old_p, |
424 | struct kprobe *p) | ||
399 | { | 425 | { |
400 | int ret = 0; | 426 | int ret = 0; |
401 | struct kprobe *ap; | 427 | struct kprobe *ap; |
@@ -434,15 +460,25 @@ static inline void cleanup_aggr_kprobe(struct kprobe *old_p, | |||
434 | spin_unlock_irqrestore(&kprobe_lock, flags); | 460 | spin_unlock_irqrestore(&kprobe_lock, flags); |
435 | } | 461 | } |
436 | 462 | ||
437 | int register_kprobe(struct kprobe *p) | 463 | static int __kprobes in_kprobes_functions(unsigned long addr) |
464 | { | ||
465 | if (addr >= (unsigned long)__kprobes_text_start | ||
466 | && addr < (unsigned long)__kprobes_text_end) | ||
467 | return -EINVAL; | ||
468 | return 0; | ||
469 | } | ||
470 | |||
471 | int __kprobes register_kprobe(struct kprobe *p) | ||
438 | { | 472 | { |
439 | int ret = 0; | 473 | int ret = 0; |
440 | unsigned long flags = 0; | 474 | unsigned long flags = 0; |
441 | struct kprobe *old_p; | 475 | struct kprobe *old_p; |
442 | 476 | ||
443 | if ((ret = arch_prepare_kprobe(p)) != 0) { | 477 | if ((ret = in_kprobes_functions((unsigned long) p->addr)) != 0) |
478 | return ret; | ||
479 | if ((ret = arch_prepare_kprobe(p)) != 0) | ||
444 | goto rm_kprobe; | 480 | goto rm_kprobe; |
445 | } | 481 | |
446 | spin_lock_irqsave(&kprobe_lock, flags); | 482 | spin_lock_irqsave(&kprobe_lock, flags); |
447 | old_p = get_kprobe(p->addr); | 483 | old_p = get_kprobe(p->addr); |
448 | p->nmissed = 0; | 484 | p->nmissed = 0; |
@@ -466,7 +502,7 @@ rm_kprobe: | |||
466 | return ret; | 502 | return ret; |
467 | } | 503 | } |
468 | 504 | ||
469 | void unregister_kprobe(struct kprobe *p) | 505 | void __kprobes unregister_kprobe(struct kprobe *p) |
470 | { | 506 | { |
471 | unsigned long flags; | 507 | unsigned long flags; |
472 | struct kprobe *old_p; | 508 | struct kprobe *old_p; |
@@ -487,7 +523,7 @@ static struct notifier_block kprobe_exceptions_nb = { | |||
487 | .priority = 0x7fffffff /* we need to notified first */ | 523 | .priority = 0x7fffffff /* we need to notified first */ |
488 | }; | 524 | }; |
489 | 525 | ||
490 | int register_jprobe(struct jprobe *jp) | 526 | int __kprobes register_jprobe(struct jprobe *jp) |
491 | { | 527 | { |
492 | /* Todo: Verify probepoint is a function entry point */ | 528 | /* Todo: Verify probepoint is a function entry point */ |
493 | jp->kp.pre_handler = setjmp_pre_handler; | 529 | jp->kp.pre_handler = setjmp_pre_handler; |
@@ -496,14 +532,14 @@ int register_jprobe(struct jprobe *jp) | |||
496 | return register_kprobe(&jp->kp); | 532 | return register_kprobe(&jp->kp); |
497 | } | 533 | } |
498 | 534 | ||
499 | void unregister_jprobe(struct jprobe *jp) | 535 | void __kprobes unregister_jprobe(struct jprobe *jp) |
500 | { | 536 | { |
501 | unregister_kprobe(&jp->kp); | 537 | unregister_kprobe(&jp->kp); |
502 | } | 538 | } |
503 | 539 | ||
504 | #ifdef ARCH_SUPPORTS_KRETPROBES | 540 | #ifdef ARCH_SUPPORTS_KRETPROBES |
505 | 541 | ||
506 | int register_kretprobe(struct kretprobe *rp) | 542 | int __kprobes register_kretprobe(struct kretprobe *rp) |
507 | { | 543 | { |
508 | int ret = 0; | 544 | int ret = 0; |
509 | struct kretprobe_instance *inst; | 545 | struct kretprobe_instance *inst; |
@@ -540,14 +576,14 @@ int register_kretprobe(struct kretprobe *rp) | |||
540 | 576 | ||
541 | #else /* ARCH_SUPPORTS_KRETPROBES */ | 577 | #else /* ARCH_SUPPORTS_KRETPROBES */ |
542 | 578 | ||
543 | int register_kretprobe(struct kretprobe *rp) | 579 | int __kprobes register_kretprobe(struct kretprobe *rp) |
544 | { | 580 | { |
545 | return -ENOSYS; | 581 | return -ENOSYS; |
546 | } | 582 | } |
547 | 583 | ||
548 | #endif /* ARCH_SUPPORTS_KRETPROBES */ | 584 | #endif /* ARCH_SUPPORTS_KRETPROBES */ |
549 | 585 | ||
550 | void unregister_kretprobe(struct kretprobe *rp) | 586 | void __kprobes unregister_kretprobe(struct kretprobe *rp) |
551 | { | 587 | { |
552 | unsigned long flags; | 588 | unsigned long flags; |
553 | struct kretprobe_instance *ri; | 589 | struct kretprobe_instance *ri; |
diff --git a/kernel/module.c b/kernel/module.c index c32995fbd8fd..4b39d3793c72 100644 --- a/kernel/module.c +++ b/kernel/module.c | |||
@@ -1509,6 +1509,7 @@ static struct module *load_module(void __user *umod, | |||
1509 | long err = 0; | 1509 | long err = 0; |
1510 | void *percpu = NULL, *ptr = NULL; /* Stops spurious gcc warning */ | 1510 | void *percpu = NULL, *ptr = NULL; /* Stops spurious gcc warning */ |
1511 | struct exception_table_entry *extable; | 1511 | struct exception_table_entry *extable; |
1512 | mm_segment_t old_fs; | ||
1512 | 1513 | ||
1513 | DEBUGP("load_module: umod=%p, len=%lu, uargs=%p\n", | 1514 | DEBUGP("load_module: umod=%p, len=%lu, uargs=%p\n", |
1514 | umod, len, uargs); | 1515 | umod, len, uargs); |
@@ -1779,6 +1780,24 @@ static struct module *load_module(void __user *umod, | |||
1779 | if (err < 0) | 1780 | if (err < 0) |
1780 | goto cleanup; | 1781 | goto cleanup; |
1781 | 1782 | ||
1783 | /* flush the icache in correct context */ | ||
1784 | old_fs = get_fs(); | ||
1785 | set_fs(KERNEL_DS); | ||
1786 | |||
1787 | /* | ||
1788 | * Flush the instruction cache, since we've played with text. | ||
1789 | * Do it before processing of module parameters, so the module | ||
1790 | * can provide parameter accessor functions of its own. | ||
1791 | */ | ||
1792 | if (mod->module_init) | ||
1793 | flush_icache_range((unsigned long)mod->module_init, | ||
1794 | (unsigned long)mod->module_init | ||
1795 | + mod->init_size); | ||
1796 | flush_icache_range((unsigned long)mod->module_core, | ||
1797 | (unsigned long)mod->module_core + mod->core_size); | ||
1798 | |||
1799 | set_fs(old_fs); | ||
1800 | |||
1782 | mod->args = args; | 1801 | mod->args = args; |
1783 | if (obsparmindex) { | 1802 | if (obsparmindex) { |
1784 | err = obsolete_params(mod->name, mod->args, | 1803 | err = obsolete_params(mod->name, mod->args, |
@@ -1860,7 +1879,6 @@ sys_init_module(void __user *umod, | |||
1860 | const char __user *uargs) | 1879 | const char __user *uargs) |
1861 | { | 1880 | { |
1862 | struct module *mod; | 1881 | struct module *mod; |
1863 | mm_segment_t old_fs = get_fs(); | ||
1864 | int ret = 0; | 1882 | int ret = 0; |
1865 | 1883 | ||
1866 | /* Must have permission */ | 1884 | /* Must have permission */ |
@@ -1878,19 +1896,6 @@ sys_init_module(void __user *umod, | |||
1878 | return PTR_ERR(mod); | 1896 | return PTR_ERR(mod); |
1879 | } | 1897 | } |
1880 | 1898 | ||
1881 | /* flush the icache in correct context */ | ||
1882 | set_fs(KERNEL_DS); | ||
1883 | |||
1884 | /* Flush the instruction cache, since we've played with text */ | ||
1885 | if (mod->module_init) | ||
1886 | flush_icache_range((unsigned long)mod->module_init, | ||
1887 | (unsigned long)mod->module_init | ||
1888 | + mod->init_size); | ||
1889 | flush_icache_range((unsigned long)mod->module_core, | ||
1890 | (unsigned long)mod->module_core + mod->core_size); | ||
1891 | |||
1892 | set_fs(old_fs); | ||
1893 | |||
1894 | /* Now sew it into the lists. They won't access us, since | 1899 | /* Now sew it into the lists. They won't access us, since |
1895 | strong_try_module_get() will fail. */ | 1900 | strong_try_module_get() will fail. */ |
1896 | stop_machine_run(__link_module, mod, NR_CPUS); | 1901 | stop_machine_run(__link_module, mod, NR_CPUS); |
diff --git a/kernel/params.c b/kernel/params.c index d586c35ef8fc..fbf173215fd2 100644 --- a/kernel/params.c +++ b/kernel/params.c | |||
@@ -542,8 +542,8 @@ static void __init kernel_param_sysfs_setup(const char *name, | |||
542 | { | 542 | { |
543 | struct module_kobject *mk; | 543 | struct module_kobject *mk; |
544 | 544 | ||
545 | mk = kmalloc(sizeof(struct module_kobject), GFP_KERNEL); | 545 | mk = kzalloc(sizeof(struct module_kobject), GFP_KERNEL); |
546 | memset(mk, 0, sizeof(struct module_kobject)); | 546 | BUG_ON(!mk); |
547 | 547 | ||
548 | mk->mod = THIS_MODULE; | 548 | mk->mod = THIS_MODULE; |
549 | kobj_set_kset_s(mk, module_subsys); | 549 | kobj_set_kset_s(mk, module_subsys); |
diff --git a/kernel/posix-timers.c b/kernel/posix-timers.c index 38798a2ff994..b7b532acd9fc 100644 --- a/kernel/posix-timers.c +++ b/kernel/posix-timers.c | |||
@@ -427,21 +427,23 @@ int posix_timer_event(struct k_itimer *timr,int si_private) | |||
427 | timr->sigq->info.si_code = SI_TIMER; | 427 | timr->sigq->info.si_code = SI_TIMER; |
428 | timr->sigq->info.si_tid = timr->it_id; | 428 | timr->sigq->info.si_tid = timr->it_id; |
429 | timr->sigq->info.si_value = timr->it_sigev_value; | 429 | timr->sigq->info.si_value = timr->it_sigev_value; |
430 | |||
430 | if (timr->it_sigev_notify & SIGEV_THREAD_ID) { | 431 | if (timr->it_sigev_notify & SIGEV_THREAD_ID) { |
431 | if (unlikely(timr->it_process->flags & PF_EXITING)) { | 432 | struct task_struct *leader; |
432 | timr->it_sigev_notify = SIGEV_SIGNAL; | 433 | int ret = send_sigqueue(timr->it_sigev_signo, timr->sigq, |
433 | put_task_struct(timr->it_process); | 434 | timr->it_process); |
434 | timr->it_process = timr->it_process->group_leader; | 435 | |
435 | goto group; | 436 | if (likely(ret >= 0)) |
436 | } | 437 | return ret; |
437 | return send_sigqueue(timr->it_sigev_signo, timr->sigq, | 438 | |
438 | timr->it_process); | 439 | timr->it_sigev_notify = SIGEV_SIGNAL; |
439 | } | 440 | leader = timr->it_process->group_leader; |
440 | else { | 441 | put_task_struct(timr->it_process); |
441 | group: | 442 | timr->it_process = leader; |
442 | return send_group_sigqueue(timr->it_sigev_signo, timr->sigq, | ||
443 | timr->it_process); | ||
444 | } | 443 | } |
444 | |||
445 | return send_group_sigqueue(timr->it_sigev_signo, timr->sigq, | ||
446 | timr->it_process); | ||
445 | } | 447 | } |
446 | EXPORT_SYMBOL_GPL(posix_timer_event); | 448 | EXPORT_SYMBOL_GPL(posix_timer_event); |
447 | 449 | ||
diff --git a/kernel/power/Kconfig b/kernel/power/Kconfig index 917066a5767c..c14cd9991181 100644 --- a/kernel/power/Kconfig +++ b/kernel/power/Kconfig | |||
@@ -28,7 +28,7 @@ config PM_DEBUG | |||
28 | 28 | ||
29 | config SOFTWARE_SUSPEND | 29 | config SOFTWARE_SUSPEND |
30 | bool "Software Suspend" | 30 | bool "Software Suspend" |
31 | depends on EXPERIMENTAL && PM && SWAP && ((X86 && SMP) || ((FVR || PPC32 || X86) && !SMP)) | 31 | depends on PM && SWAP && (X86 || ((FVR || PPC32) && !SMP)) |
32 | ---help--- | 32 | ---help--- |
33 | Enable the possibility of suspending the machine. | 33 | Enable the possibility of suspending the machine. |
34 | It doesn't need APM. | 34 | It doesn't need APM. |
diff --git a/kernel/power/pm.c b/kernel/power/pm.c index 61deda04e39e..159149321b3c 100644 --- a/kernel/power/pm.c +++ b/kernel/power/pm.c | |||
@@ -60,9 +60,8 @@ struct pm_dev *pm_register(pm_dev_t type, | |||
60 | unsigned long id, | 60 | unsigned long id, |
61 | pm_callback callback) | 61 | pm_callback callback) |
62 | { | 62 | { |
63 | struct pm_dev *dev = kmalloc(sizeof(struct pm_dev), GFP_KERNEL); | 63 | struct pm_dev *dev = kzalloc(sizeof(struct pm_dev), GFP_KERNEL); |
64 | if (dev) { | 64 | if (dev) { |
65 | memset(dev, 0, sizeof(*dev)); | ||
66 | dev->type = type; | 65 | dev->type = type; |
67 | dev->id = id; | 66 | dev->id = id; |
68 | dev->callback = callback; | 67 | dev->callback = callback; |
diff --git a/kernel/power/swsusp.c b/kernel/power/swsusp.c index eaacd5cb5889..d967e875ee82 100644 --- a/kernel/power/swsusp.c +++ b/kernel/power/swsusp.c | |||
@@ -1059,6 +1059,7 @@ int swsusp_resume(void) | |||
1059 | BUG_ON(!error); | 1059 | BUG_ON(!error); |
1060 | restore_processor_state(); | 1060 | restore_processor_state(); |
1061 | restore_highmem(); | 1061 | restore_highmem(); |
1062 | touch_softlockup_watchdog(); | ||
1062 | device_power_up(); | 1063 | device_power_up(); |
1063 | local_irq_enable(); | 1064 | local_irq_enable(); |
1064 | return error; | 1065 | return error; |
diff --git a/kernel/printk.c b/kernel/printk.c index 5092397fac29..a967605bc2e3 100644 --- a/kernel/printk.c +++ b/kernel/printk.c | |||
@@ -514,6 +514,9 @@ asmlinkage int printk(const char *fmt, ...) | |||
514 | return r; | 514 | return r; |
515 | } | 515 | } |
516 | 516 | ||
517 | /* cpu currently holding logbuf_lock */ | ||
518 | static volatile unsigned int printk_cpu = UINT_MAX; | ||
519 | |||
517 | asmlinkage int vprintk(const char *fmt, va_list args) | 520 | asmlinkage int vprintk(const char *fmt, va_list args) |
518 | { | 521 | { |
519 | unsigned long flags; | 522 | unsigned long flags; |
@@ -522,11 +525,15 @@ asmlinkage int vprintk(const char *fmt, va_list args) | |||
522 | static char printk_buf[1024]; | 525 | static char printk_buf[1024]; |
523 | static int log_level_unknown = 1; | 526 | static int log_level_unknown = 1; |
524 | 527 | ||
525 | if (unlikely(oops_in_progress)) | 528 | preempt_disable(); |
529 | if (unlikely(oops_in_progress) && printk_cpu == smp_processor_id()) | ||
530 | /* If a crash is occurring during printk() on this CPU, | ||
531 | * make sure we can't deadlock */ | ||
526 | zap_locks(); | 532 | zap_locks(); |
527 | 533 | ||
528 | /* This stops the holder of console_sem just where we want him */ | 534 | /* This stops the holder of console_sem just where we want him */ |
529 | spin_lock_irqsave(&logbuf_lock, flags); | 535 | spin_lock_irqsave(&logbuf_lock, flags); |
536 | printk_cpu = smp_processor_id(); | ||
530 | 537 | ||
531 | /* Emit the output into the temporary buffer */ | 538 | /* Emit the output into the temporary buffer */ |
532 | printed_len = vscnprintf(printk_buf, sizeof(printk_buf), fmt, args); | 539 | printed_len = vscnprintf(printk_buf, sizeof(printk_buf), fmt, args); |
@@ -595,6 +602,7 @@ asmlinkage int vprintk(const char *fmt, va_list args) | |||
595 | * CPU until it is officially up. We shouldn't be calling into | 602 | * CPU until it is officially up. We shouldn't be calling into |
596 | * random console drivers on a CPU which doesn't exist yet.. | 603 | * random console drivers on a CPU which doesn't exist yet.. |
597 | */ | 604 | */ |
605 | printk_cpu = UINT_MAX; | ||
598 | spin_unlock_irqrestore(&logbuf_lock, flags); | 606 | spin_unlock_irqrestore(&logbuf_lock, flags); |
599 | goto out; | 607 | goto out; |
600 | } | 608 | } |
@@ -604,6 +612,7 @@ asmlinkage int vprintk(const char *fmt, va_list args) | |||
604 | * We own the drivers. We can drop the spinlock and let | 612 | * We own the drivers. We can drop the spinlock and let |
605 | * release_console_sem() print the text | 613 | * release_console_sem() print the text |
606 | */ | 614 | */ |
615 | printk_cpu = UINT_MAX; | ||
607 | spin_unlock_irqrestore(&logbuf_lock, flags); | 616 | spin_unlock_irqrestore(&logbuf_lock, flags); |
608 | console_may_schedule = 0; | 617 | console_may_schedule = 0; |
609 | release_console_sem(); | 618 | release_console_sem(); |
@@ -613,9 +622,11 @@ asmlinkage int vprintk(const char *fmt, va_list args) | |||
613 | * allows the semaphore holder to proceed and to call the | 622 | * allows the semaphore holder to proceed and to call the |
614 | * console drivers with the output which we just produced. | 623 | * console drivers with the output which we just produced. |
615 | */ | 624 | */ |
625 | printk_cpu = UINT_MAX; | ||
616 | spin_unlock_irqrestore(&logbuf_lock, flags); | 626 | spin_unlock_irqrestore(&logbuf_lock, flags); |
617 | } | 627 | } |
618 | out: | 628 | out: |
629 | preempt_enable(); | ||
619 | return printed_len; | 630 | return printed_len; |
620 | } | 631 | } |
621 | EXPORT_SYMBOL(printk); | 632 | EXPORT_SYMBOL(printk); |
diff --git a/kernel/ptrace.c b/kernel/ptrace.c index 8dcb8f6288bc..019e04ec065a 100644 --- a/kernel/ptrace.c +++ b/kernel/ptrace.c | |||
@@ -118,6 +118,33 @@ int ptrace_check_attach(struct task_struct *child, int kill) | |||
118 | return ret; | 118 | return ret; |
119 | } | 119 | } |
120 | 120 | ||
121 | static int may_attach(struct task_struct *task) | ||
122 | { | ||
123 | if (!task->mm) | ||
124 | return -EPERM; | ||
125 | if (((current->uid != task->euid) || | ||
126 | (current->uid != task->suid) || | ||
127 | (current->uid != task->uid) || | ||
128 | (current->gid != task->egid) || | ||
129 | (current->gid != task->sgid) || | ||
130 | (current->gid != task->gid)) && !capable(CAP_SYS_PTRACE)) | ||
131 | return -EPERM; | ||
132 | smp_rmb(); | ||
133 | if (!task->mm->dumpable && !capable(CAP_SYS_PTRACE)) | ||
134 | return -EPERM; | ||
135 | |||
136 | return security_ptrace(current, task); | ||
137 | } | ||
138 | |||
139 | int ptrace_may_attach(struct task_struct *task) | ||
140 | { | ||
141 | int err; | ||
142 | task_lock(task); | ||
143 | err = may_attach(task); | ||
144 | task_unlock(task); | ||
145 | return !err; | ||
146 | } | ||
147 | |||
121 | int ptrace_attach(struct task_struct *task) | 148 | int ptrace_attach(struct task_struct *task) |
122 | { | 149 | { |
123 | int retval; | 150 | int retval; |
@@ -127,22 +154,10 @@ int ptrace_attach(struct task_struct *task) | |||
127 | goto bad; | 154 | goto bad; |
128 | if (task == current) | 155 | if (task == current) |
129 | goto bad; | 156 | goto bad; |
130 | if (!task->mm) | ||
131 | goto bad; | ||
132 | if(((current->uid != task->euid) || | ||
133 | (current->uid != task->suid) || | ||
134 | (current->uid != task->uid) || | ||
135 | (current->gid != task->egid) || | ||
136 | (current->gid != task->sgid) || | ||
137 | (current->gid != task->gid)) && !capable(CAP_SYS_PTRACE)) | ||
138 | goto bad; | ||
139 | smp_rmb(); | ||
140 | if (!task->mm->dumpable && !capable(CAP_SYS_PTRACE)) | ||
141 | goto bad; | ||
142 | /* the same process cannot be attached many times */ | 157 | /* the same process cannot be attached many times */ |
143 | if (task->ptrace & PT_PTRACED) | 158 | if (task->ptrace & PT_PTRACED) |
144 | goto bad; | 159 | goto bad; |
145 | retval = security_ptrace(current, task); | 160 | retval = may_attach(task); |
146 | if (retval) | 161 | if (retval) |
147 | goto bad; | 162 | goto bad; |
148 | 163 | ||
diff --git a/kernel/resource.c b/kernel/resource.c index 26967e042201..92285d822de6 100644 --- a/kernel/resource.c +++ b/kernel/resource.c | |||
@@ -430,10 +430,9 @@ EXPORT_SYMBOL(adjust_resource); | |||
430 | */ | 430 | */ |
431 | struct resource * __request_region(struct resource *parent, unsigned long start, unsigned long n, const char *name) | 431 | struct resource * __request_region(struct resource *parent, unsigned long start, unsigned long n, const char *name) |
432 | { | 432 | { |
433 | struct resource *res = kmalloc(sizeof(*res), GFP_KERNEL); | 433 | struct resource *res = kzalloc(sizeof(*res), GFP_KERNEL); |
434 | 434 | ||
435 | if (res) { | 435 | if (res) { |
436 | memset(res, 0, sizeof(*res)); | ||
437 | res->name = name; | 436 | res->name = name; |
438 | res->start = start; | 437 | res->start = start; |
439 | res->end = start + n - 1; | 438 | res->end = start + n - 1; |
diff --git a/kernel/sched.c b/kernel/sched.c index f41fa94d2070..18b95520a2e2 100644 --- a/kernel/sched.c +++ b/kernel/sched.c | |||
@@ -4780,7 +4780,7 @@ static int sd_parent_degenerate(struct sched_domain *sd, | |||
4780 | * Attach the domain 'sd' to 'cpu' as its base domain. Callers must | 4780 | * Attach the domain 'sd' to 'cpu' as its base domain. Callers must |
4781 | * hold the hotplug lock. | 4781 | * hold the hotplug lock. |
4782 | */ | 4782 | */ |
4783 | void cpu_attach_domain(struct sched_domain *sd, int cpu) | 4783 | static void cpu_attach_domain(struct sched_domain *sd, int cpu) |
4784 | { | 4784 | { |
4785 | runqueue_t *rq = cpu_rq(cpu); | 4785 | runqueue_t *rq = cpu_rq(cpu); |
4786 | struct sched_domain *tmp; | 4786 | struct sched_domain *tmp; |
@@ -4803,7 +4803,7 @@ void cpu_attach_domain(struct sched_domain *sd, int cpu) | |||
4803 | } | 4803 | } |
4804 | 4804 | ||
4805 | /* cpus with isolated domains */ | 4805 | /* cpus with isolated domains */ |
4806 | cpumask_t __devinitdata cpu_isolated_map = CPU_MASK_NONE; | 4806 | static cpumask_t __devinitdata cpu_isolated_map = CPU_MASK_NONE; |
4807 | 4807 | ||
4808 | /* Setup the mask of cpus configured for isolated domains */ | 4808 | /* Setup the mask of cpus configured for isolated domains */ |
4809 | static int __init isolated_cpu_setup(char *str) | 4809 | static int __init isolated_cpu_setup(char *str) |
@@ -4831,8 +4831,8 @@ __setup ("isolcpus=", isolated_cpu_setup); | |||
4831 | * covered by the given span, and will set each group's ->cpumask correctly, | 4831 | * covered by the given span, and will set each group's ->cpumask correctly, |
4832 | * and ->cpu_power to 0. | 4832 | * and ->cpu_power to 0. |
4833 | */ | 4833 | */ |
4834 | void init_sched_build_groups(struct sched_group groups[], | 4834 | static void init_sched_build_groups(struct sched_group groups[], cpumask_t span, |
4835 | cpumask_t span, int (*group_fn)(int cpu)) | 4835 | int (*group_fn)(int cpu)) |
4836 | { | 4836 | { |
4837 | struct sched_group *first = NULL, *last = NULL; | 4837 | struct sched_group *first = NULL, *last = NULL; |
4838 | cpumask_t covered = CPU_MASK_NONE; | 4838 | cpumask_t covered = CPU_MASK_NONE; |
@@ -4865,12 +4865,85 @@ void init_sched_build_groups(struct sched_group groups[], | |||
4865 | last->next = first; | 4865 | last->next = first; |
4866 | } | 4866 | } |
4867 | 4867 | ||
4868 | #define SD_NODES_PER_DOMAIN 16 | ||
4868 | 4869 | ||
4869 | #ifdef ARCH_HAS_SCHED_DOMAIN | 4870 | #ifdef CONFIG_NUMA |
4870 | extern void build_sched_domains(const cpumask_t *cpu_map); | 4871 | /** |
4871 | extern void arch_init_sched_domains(const cpumask_t *cpu_map); | 4872 | * find_next_best_node - find the next node to include in a sched_domain |
4872 | extern void arch_destroy_sched_domains(const cpumask_t *cpu_map); | 4873 | * @node: node whose sched_domain we're building |
4873 | #else | 4874 | * @used_nodes: nodes already in the sched_domain |
4875 | * | ||
4876 | * Find the next node to include in a given scheduling domain. Simply | ||
4877 | * finds the closest node not already in the @used_nodes map. | ||
4878 | * | ||
4879 | * Should use nodemask_t. | ||
4880 | */ | ||
4881 | static int find_next_best_node(int node, unsigned long *used_nodes) | ||
4882 | { | ||
4883 | int i, n, val, min_val, best_node = 0; | ||
4884 | |||
4885 | min_val = INT_MAX; | ||
4886 | |||
4887 | for (i = 0; i < MAX_NUMNODES; i++) { | ||
4888 | /* Start at @node */ | ||
4889 | n = (node + i) % MAX_NUMNODES; | ||
4890 | |||
4891 | if (!nr_cpus_node(n)) | ||
4892 | continue; | ||
4893 | |||
4894 | /* Skip already used nodes */ | ||
4895 | if (test_bit(n, used_nodes)) | ||
4896 | continue; | ||
4897 | |||
4898 | /* Simple min distance search */ | ||
4899 | val = node_distance(node, n); | ||
4900 | |||
4901 | if (val < min_val) { | ||
4902 | min_val = val; | ||
4903 | best_node = n; | ||
4904 | } | ||
4905 | } | ||
4906 | |||
4907 | set_bit(best_node, used_nodes); | ||
4908 | return best_node; | ||
4909 | } | ||
4910 | |||
4911 | /** | ||
4912 | * sched_domain_node_span - get a cpumask for a node's sched_domain | ||
4913 | * @node: node whose cpumask we're constructing | ||
4914 | * @size: number of nodes to include in this span | ||
4915 | * | ||
4916 | * Given a node, construct a good cpumask for its sched_domain to span. It | ||
4917 | * should be one that prevents unnecessary balancing, but also spreads tasks | ||
4918 | * out optimally. | ||
4919 | */ | ||
4920 | static cpumask_t sched_domain_node_span(int node) | ||
4921 | { | ||
4922 | int i; | ||
4923 | cpumask_t span, nodemask; | ||
4924 | DECLARE_BITMAP(used_nodes, MAX_NUMNODES); | ||
4925 | |||
4926 | cpus_clear(span); | ||
4927 | bitmap_zero(used_nodes, MAX_NUMNODES); | ||
4928 | |||
4929 | nodemask = node_to_cpumask(node); | ||
4930 | cpus_or(span, span, nodemask); | ||
4931 | set_bit(node, used_nodes); | ||
4932 | |||
4933 | for (i = 1; i < SD_NODES_PER_DOMAIN; i++) { | ||
4934 | int next_node = find_next_best_node(node, used_nodes); | ||
4935 | nodemask = node_to_cpumask(next_node); | ||
4936 | cpus_or(span, span, nodemask); | ||
4937 | } | ||
4938 | |||
4939 | return span; | ||
4940 | } | ||
4941 | #endif | ||
4942 | |||
4943 | /* | ||
4944 | * At the moment, CONFIG_SCHED_SMT is never defined, but leave it in so we | ||
4945 | * can switch it on easily if needed. | ||
4946 | */ | ||
4874 | #ifdef CONFIG_SCHED_SMT | 4947 | #ifdef CONFIG_SCHED_SMT |
4875 | static DEFINE_PER_CPU(struct sched_domain, cpu_domains); | 4948 | static DEFINE_PER_CPU(struct sched_domain, cpu_domains); |
4876 | static struct sched_group sched_group_cpus[NR_CPUS]; | 4949 | static struct sched_group sched_group_cpus[NR_CPUS]; |
@@ -4892,36 +4965,20 @@ static int cpu_to_phys_group(int cpu) | |||
4892 | } | 4965 | } |
4893 | 4966 | ||
4894 | #ifdef CONFIG_NUMA | 4967 | #ifdef CONFIG_NUMA |
4895 | |||
4896 | static DEFINE_PER_CPU(struct sched_domain, node_domains); | ||
4897 | static struct sched_group sched_group_nodes[MAX_NUMNODES]; | ||
4898 | static int cpu_to_node_group(int cpu) | ||
4899 | { | ||
4900 | return cpu_to_node(cpu); | ||
4901 | } | ||
4902 | #endif | ||
4903 | |||
4904 | #if defined(CONFIG_SCHED_SMT) && defined(CONFIG_NUMA) | ||
4905 | /* | 4968 | /* |
4906 | * The domains setup code relies on siblings not spanning | 4969 | * The init_sched_build_groups can't handle what we want to do with node |
4907 | * multiple nodes. Make sure the architecture has a proper | 4970 | * groups, so roll our own. Now each node has its own list of groups which |
4908 | * siblings map: | 4971 | * gets dynamically allocated. |
4909 | */ | 4972 | */ |
4910 | static void check_sibling_maps(void) | 4973 | static DEFINE_PER_CPU(struct sched_domain, node_domains); |
4911 | { | 4974 | static struct sched_group **sched_group_nodes_bycpu[NR_CPUS]; |
4912 | int i, j; | ||
4913 | 4975 | ||
4914 | for_each_online_cpu(i) { | 4976 | static DEFINE_PER_CPU(struct sched_domain, allnodes_domains); |
4915 | for_each_cpu_mask(j, cpu_sibling_map[i]) { | 4977 | static struct sched_group *sched_group_allnodes_bycpu[NR_CPUS]; |
4916 | if (cpu_to_node(i) != cpu_to_node(j)) { | 4978 | |
4917 | printk(KERN_INFO "warning: CPU %d siblings map " | 4979 | static int cpu_to_allnodes_group(int cpu) |
4918 | "to different node - isolating " | 4980 | { |
4919 | "them.\n", i); | 4981 | return cpu_to_node(cpu); |
4920 | cpu_sibling_map[i] = cpumask_of_cpu(i); | ||
4921 | break; | ||
4922 | } | ||
4923 | } | ||
4924 | } | ||
4925 | } | 4982 | } |
4926 | #endif | 4983 | #endif |
4927 | 4984 | ||
@@ -4929,9 +4986,24 @@ static void check_sibling_maps(void) | |||
4929 | * Build sched domains for a given set of cpus and attach the sched domains | 4986 | * Build sched domains for a given set of cpus and attach the sched domains |
4930 | * to the individual cpus | 4987 | * to the individual cpus |
4931 | */ | 4988 | */ |
4932 | static void build_sched_domains(const cpumask_t *cpu_map) | 4989 | void build_sched_domains(const cpumask_t *cpu_map) |
4933 | { | 4990 | { |
4934 | int i; | 4991 | int i; |
4992 | #ifdef CONFIG_NUMA | ||
4993 | struct sched_group **sched_group_nodes = NULL; | ||
4994 | struct sched_group *sched_group_allnodes = NULL; | ||
4995 | |||
4996 | /* | ||
4997 | * Allocate the per-node list of sched groups | ||
4998 | */ | ||
4999 | sched_group_nodes = kmalloc(sizeof(struct sched_group*)*MAX_NUMNODES, | ||
5000 | GFP_ATOMIC); | ||
5001 | if (!sched_group_nodes) { | ||
5002 | printk(KERN_WARNING "Can not alloc sched group node list\n"); | ||
5003 | return; | ||
5004 | } | ||
5005 | sched_group_nodes_bycpu[first_cpu(*cpu_map)] = sched_group_nodes; | ||
5006 | #endif | ||
4935 | 5007 | ||
4936 | /* | 5008 | /* |
4937 | * Set up domains for cpus specified by the cpu_map. | 5009 | * Set up domains for cpus specified by the cpu_map. |
@@ -4944,11 +5016,35 @@ static void build_sched_domains(const cpumask_t *cpu_map) | |||
4944 | cpus_and(nodemask, nodemask, *cpu_map); | 5016 | cpus_and(nodemask, nodemask, *cpu_map); |
4945 | 5017 | ||
4946 | #ifdef CONFIG_NUMA | 5018 | #ifdef CONFIG_NUMA |
5019 | if (cpus_weight(*cpu_map) | ||
5020 | > SD_NODES_PER_DOMAIN*cpus_weight(nodemask)) { | ||
5021 | if (!sched_group_allnodes) { | ||
5022 | sched_group_allnodes | ||
5023 | = kmalloc(sizeof(struct sched_group) | ||
5024 | * MAX_NUMNODES, | ||
5025 | GFP_KERNEL); | ||
5026 | if (!sched_group_allnodes) { | ||
5027 | printk(KERN_WARNING | ||
5028 | "Can not alloc allnodes sched group\n"); | ||
5029 | break; | ||
5030 | } | ||
5031 | sched_group_allnodes_bycpu[i] | ||
5032 | = sched_group_allnodes; | ||
5033 | } | ||
5034 | sd = &per_cpu(allnodes_domains, i); | ||
5035 | *sd = SD_ALLNODES_INIT; | ||
5036 | sd->span = *cpu_map; | ||
5037 | group = cpu_to_allnodes_group(i); | ||
5038 | sd->groups = &sched_group_allnodes[group]; | ||
5039 | p = sd; | ||
5040 | } else | ||
5041 | p = NULL; | ||
5042 | |||
4947 | sd = &per_cpu(node_domains, i); | 5043 | sd = &per_cpu(node_domains, i); |
4948 | group = cpu_to_node_group(i); | ||
4949 | *sd = SD_NODE_INIT; | 5044 | *sd = SD_NODE_INIT; |
4950 | sd->span = *cpu_map; | 5045 | sd->span = sched_domain_node_span(cpu_to_node(i)); |
4951 | sd->groups = &sched_group_nodes[group]; | 5046 | sd->parent = p; |
5047 | cpus_and(sd->span, sd->span, *cpu_map); | ||
4952 | #endif | 5048 | #endif |
4953 | 5049 | ||
4954 | p = sd; | 5050 | p = sd; |
@@ -4973,7 +5069,7 @@ static void build_sched_domains(const cpumask_t *cpu_map) | |||
4973 | 5069 | ||
4974 | #ifdef CONFIG_SCHED_SMT | 5070 | #ifdef CONFIG_SCHED_SMT |
4975 | /* Set up CPU (sibling) groups */ | 5071 | /* Set up CPU (sibling) groups */ |
4976 | for_each_online_cpu(i) { | 5072 | for_each_cpu_mask(i, *cpu_map) { |
4977 | cpumask_t this_sibling_map = cpu_sibling_map[i]; | 5073 | cpumask_t this_sibling_map = cpu_sibling_map[i]; |
4978 | cpus_and(this_sibling_map, this_sibling_map, *cpu_map); | 5074 | cpus_and(this_sibling_map, this_sibling_map, *cpu_map); |
4979 | if (i != first_cpu(this_sibling_map)) | 5075 | if (i != first_cpu(this_sibling_map)) |
@@ -4998,8 +5094,77 @@ static void build_sched_domains(const cpumask_t *cpu_map) | |||
4998 | 5094 | ||
4999 | #ifdef CONFIG_NUMA | 5095 | #ifdef CONFIG_NUMA |
5000 | /* Set up node groups */ | 5096 | /* Set up node groups */ |
5001 | init_sched_build_groups(sched_group_nodes, *cpu_map, | 5097 | if (sched_group_allnodes) |
5002 | &cpu_to_node_group); | 5098 | init_sched_build_groups(sched_group_allnodes, *cpu_map, |
5099 | &cpu_to_allnodes_group); | ||
5100 | |||
5101 | for (i = 0; i < MAX_NUMNODES; i++) { | ||
5102 | /* Set up node groups */ | ||
5103 | struct sched_group *sg, *prev; | ||
5104 | cpumask_t nodemask = node_to_cpumask(i); | ||
5105 | cpumask_t domainspan; | ||
5106 | cpumask_t covered = CPU_MASK_NONE; | ||
5107 | int j; | ||
5108 | |||
5109 | cpus_and(nodemask, nodemask, *cpu_map); | ||
5110 | if (cpus_empty(nodemask)) { | ||
5111 | sched_group_nodes[i] = NULL; | ||
5112 | continue; | ||
5113 | } | ||
5114 | |||
5115 | domainspan = sched_domain_node_span(i); | ||
5116 | cpus_and(domainspan, domainspan, *cpu_map); | ||
5117 | |||
5118 | sg = kmalloc(sizeof(struct sched_group), GFP_KERNEL); | ||
5119 | sched_group_nodes[i] = sg; | ||
5120 | for_each_cpu_mask(j, nodemask) { | ||
5121 | struct sched_domain *sd; | ||
5122 | sd = &per_cpu(node_domains, j); | ||
5123 | sd->groups = sg; | ||
5124 | if (sd->groups == NULL) { | ||
5125 | /* Turn off balancing if we have no groups */ | ||
5126 | sd->flags = 0; | ||
5127 | } | ||
5128 | } | ||
5129 | if (!sg) { | ||
5130 | printk(KERN_WARNING | ||
5131 | "Can not alloc domain group for node %d\n", i); | ||
5132 | continue; | ||
5133 | } | ||
5134 | sg->cpu_power = 0; | ||
5135 | sg->cpumask = nodemask; | ||
5136 | cpus_or(covered, covered, nodemask); | ||
5137 | prev = sg; | ||
5138 | |||
5139 | for (j = 0; j < MAX_NUMNODES; j++) { | ||
5140 | cpumask_t tmp, notcovered; | ||
5141 | int n = (i + j) % MAX_NUMNODES; | ||
5142 | |||
5143 | cpus_complement(notcovered, covered); | ||
5144 | cpus_and(tmp, notcovered, *cpu_map); | ||
5145 | cpus_and(tmp, tmp, domainspan); | ||
5146 | if (cpus_empty(tmp)) | ||
5147 | break; | ||
5148 | |||
5149 | nodemask = node_to_cpumask(n); | ||
5150 | cpus_and(tmp, tmp, nodemask); | ||
5151 | if (cpus_empty(tmp)) | ||
5152 | continue; | ||
5153 | |||
5154 | sg = kmalloc(sizeof(struct sched_group), GFP_KERNEL); | ||
5155 | if (!sg) { | ||
5156 | printk(KERN_WARNING | ||
5157 | "Can not alloc domain group for node %d\n", j); | ||
5158 | break; | ||
5159 | } | ||
5160 | sg->cpu_power = 0; | ||
5161 | sg->cpumask = tmp; | ||
5162 | cpus_or(covered, covered, tmp); | ||
5163 | prev->next = sg; | ||
5164 | prev = sg; | ||
5165 | } | ||
5166 | prev->next = sched_group_nodes[i]; | ||
5167 | } | ||
5003 | #endif | 5168 | #endif |
5004 | 5169 | ||
5005 | /* Calculate CPU power for physical packages and nodes */ | 5170 | /* Calculate CPU power for physical packages and nodes */ |
@@ -5018,14 +5183,46 @@ static void build_sched_domains(const cpumask_t *cpu_map) | |||
5018 | sd->groups->cpu_power = power; | 5183 | sd->groups->cpu_power = power; |
5019 | 5184 | ||
5020 | #ifdef CONFIG_NUMA | 5185 | #ifdef CONFIG_NUMA |
5021 | if (i == first_cpu(sd->groups->cpumask)) { | 5186 | sd = &per_cpu(allnodes_domains, i); |
5022 | /* Only add "power" once for each physical package. */ | 5187 | if (sd->groups) { |
5023 | sd = &per_cpu(node_domains, i); | 5188 | power = SCHED_LOAD_SCALE + SCHED_LOAD_SCALE * |
5024 | sd->groups->cpu_power += power; | 5189 | (cpus_weight(sd->groups->cpumask)-1) / 10; |
5190 | sd->groups->cpu_power = power; | ||
5025 | } | 5191 | } |
5026 | #endif | 5192 | #endif |
5027 | } | 5193 | } |
5028 | 5194 | ||
5195 | #ifdef CONFIG_NUMA | ||
5196 | for (i = 0; i < MAX_NUMNODES; i++) { | ||
5197 | struct sched_group *sg = sched_group_nodes[i]; | ||
5198 | int j; | ||
5199 | |||
5200 | if (sg == NULL) | ||
5201 | continue; | ||
5202 | next_sg: | ||
5203 | for_each_cpu_mask(j, sg->cpumask) { | ||
5204 | struct sched_domain *sd; | ||
5205 | int power; | ||
5206 | |||
5207 | sd = &per_cpu(phys_domains, j); | ||
5208 | if (j != first_cpu(sd->groups->cpumask)) { | ||
5209 | /* | ||
5210 | * Only add "power" once for each | ||
5211 | * physical package. | ||
5212 | */ | ||
5213 | continue; | ||
5214 | } | ||
5215 | power = SCHED_LOAD_SCALE + SCHED_LOAD_SCALE * | ||
5216 | (cpus_weight(sd->groups->cpumask)-1) / 10; | ||
5217 | |||
5218 | sg->cpu_power += power; | ||
5219 | } | ||
5220 | sg = sg->next; | ||
5221 | if (sg != sched_group_nodes[i]) | ||
5222 | goto next_sg; | ||
5223 | } | ||
5224 | #endif | ||
5225 | |||
5029 | /* Attach the domains */ | 5226 | /* Attach the domains */ |
5030 | for_each_cpu_mask(i, *cpu_map) { | 5227 | for_each_cpu_mask(i, *cpu_map) { |
5031 | struct sched_domain *sd; | 5228 | struct sched_domain *sd; |
@@ -5040,13 +5237,10 @@ static void build_sched_domains(const cpumask_t *cpu_map) | |||
5040 | /* | 5237 | /* |
5041 | * Set up scheduler domains and groups. Callers must hold the hotplug lock. | 5238 | * Set up scheduler domains and groups. Callers must hold the hotplug lock. |
5042 | */ | 5239 | */ |
5043 | static void arch_init_sched_domains(cpumask_t *cpu_map) | 5240 | static void arch_init_sched_domains(const cpumask_t *cpu_map) |
5044 | { | 5241 | { |
5045 | cpumask_t cpu_default_map; | 5242 | cpumask_t cpu_default_map; |
5046 | 5243 | ||
5047 | #if defined(CONFIG_SCHED_SMT) && defined(CONFIG_NUMA) | ||
5048 | check_sibling_maps(); | ||
5049 | #endif | ||
5050 | /* | 5244 | /* |
5051 | * Setup mask for cpus without special case scheduling requirements. | 5245 | * Setup mask for cpus without special case scheduling requirements. |
5052 | * For now this just excludes isolated cpus, but could be used to | 5246 | * For now this just excludes isolated cpus, but could be used to |
@@ -5059,10 +5253,47 @@ static void arch_init_sched_domains(cpumask_t *cpu_map) | |||
5059 | 5253 | ||
5060 | static void arch_destroy_sched_domains(const cpumask_t *cpu_map) | 5254 | static void arch_destroy_sched_domains(const cpumask_t *cpu_map) |
5061 | { | 5255 | { |
5062 | /* Do nothing: everything is statically allocated. */ | 5256 | #ifdef CONFIG_NUMA |
5063 | } | 5257 | int i; |
5258 | int cpu; | ||
5259 | |||
5260 | for_each_cpu_mask(cpu, *cpu_map) { | ||
5261 | struct sched_group *sched_group_allnodes | ||
5262 | = sched_group_allnodes_bycpu[cpu]; | ||
5263 | struct sched_group **sched_group_nodes | ||
5264 | = sched_group_nodes_bycpu[cpu]; | ||
5265 | |||
5266 | if (sched_group_allnodes) { | ||
5267 | kfree(sched_group_allnodes); | ||
5268 | sched_group_allnodes_bycpu[cpu] = NULL; | ||
5269 | } | ||
5270 | |||
5271 | if (!sched_group_nodes) | ||
5272 | continue; | ||
5273 | |||
5274 | for (i = 0; i < MAX_NUMNODES; i++) { | ||
5275 | cpumask_t nodemask = node_to_cpumask(i); | ||
5276 | struct sched_group *oldsg, *sg = sched_group_nodes[i]; | ||
5064 | 5277 | ||
5065 | #endif /* ARCH_HAS_SCHED_DOMAIN */ | 5278 | cpus_and(nodemask, nodemask, *cpu_map); |
5279 | if (cpus_empty(nodemask)) | ||
5280 | continue; | ||
5281 | |||
5282 | if (sg == NULL) | ||
5283 | continue; | ||
5284 | sg = sg->next; | ||
5285 | next_sg: | ||
5286 | oldsg = sg; | ||
5287 | sg = sg->next; | ||
5288 | kfree(oldsg); | ||
5289 | if (oldsg != sched_group_nodes[i]) | ||
5290 | goto next_sg; | ||
5291 | } | ||
5292 | kfree(sched_group_nodes); | ||
5293 | sched_group_nodes_bycpu[cpu] = NULL; | ||
5294 | } | ||
5295 | #endif | ||
5296 | } | ||
5066 | 5297 | ||
5067 | /* | 5298 | /* |
5068 | * Detach sched domains from a group of cpus specified in cpu_map | 5299 | * Detach sched domains from a group of cpus specified in cpu_map |
diff --git a/kernel/signal.c b/kernel/signal.c index d282fea81138..4980a073237f 100644 --- a/kernel/signal.c +++ b/kernel/signal.c | |||
@@ -678,7 +678,7 @@ static int check_kill_permission(int sig, struct siginfo *info, | |||
678 | 678 | ||
679 | /* forward decl */ | 679 | /* forward decl */ |
680 | static void do_notify_parent_cldstop(struct task_struct *tsk, | 680 | static void do_notify_parent_cldstop(struct task_struct *tsk, |
681 | struct task_struct *parent, | 681 | int to_self, |
682 | int why); | 682 | int why); |
683 | 683 | ||
684 | /* | 684 | /* |
@@ -729,14 +729,7 @@ static void handle_stop_signal(int sig, struct task_struct *p) | |||
729 | p->signal->group_stop_count = 0; | 729 | p->signal->group_stop_count = 0; |
730 | p->signal->flags = SIGNAL_STOP_CONTINUED; | 730 | p->signal->flags = SIGNAL_STOP_CONTINUED; |
731 | spin_unlock(&p->sighand->siglock); | 731 | spin_unlock(&p->sighand->siglock); |
732 | if (p->ptrace & PT_PTRACED) | 732 | do_notify_parent_cldstop(p, (p->ptrace & PT_PTRACED), CLD_STOPPED); |
733 | do_notify_parent_cldstop(p, p->parent, | ||
734 | CLD_STOPPED); | ||
735 | else | ||
736 | do_notify_parent_cldstop( | ||
737 | p->group_leader, | ||
738 | p->group_leader->real_parent, | ||
739 | CLD_STOPPED); | ||
740 | spin_lock(&p->sighand->siglock); | 733 | spin_lock(&p->sighand->siglock); |
741 | } | 734 | } |
742 | rm_from_queue(SIG_KERNEL_STOP_MASK, &p->signal->shared_pending); | 735 | rm_from_queue(SIG_KERNEL_STOP_MASK, &p->signal->shared_pending); |
@@ -777,14 +770,7 @@ static void handle_stop_signal(int sig, struct task_struct *p) | |||
777 | p->signal->flags = SIGNAL_STOP_CONTINUED; | 770 | p->signal->flags = SIGNAL_STOP_CONTINUED; |
778 | p->signal->group_exit_code = 0; | 771 | p->signal->group_exit_code = 0; |
779 | spin_unlock(&p->sighand->siglock); | 772 | spin_unlock(&p->sighand->siglock); |
780 | if (p->ptrace & PT_PTRACED) | 773 | do_notify_parent_cldstop(p, (p->ptrace & PT_PTRACED), CLD_CONTINUED); |
781 | do_notify_parent_cldstop(p, p->parent, | ||
782 | CLD_CONTINUED); | ||
783 | else | ||
784 | do_notify_parent_cldstop( | ||
785 | p->group_leader, | ||
786 | p->group_leader->real_parent, | ||
787 | CLD_CONTINUED); | ||
788 | spin_lock(&p->sighand->siglock); | 774 | spin_lock(&p->sighand->siglock); |
789 | } else { | 775 | } else { |
790 | /* | 776 | /* |
@@ -1380,16 +1366,16 @@ send_sigqueue(int sig, struct sigqueue *q, struct task_struct *p) | |||
1380 | unsigned long flags; | 1366 | unsigned long flags; |
1381 | int ret = 0; | 1367 | int ret = 0; |
1382 | 1368 | ||
1383 | /* | ||
1384 | * We need the tasklist lock even for the specific | ||
1385 | * thread case (when we don't need to follow the group | ||
1386 | * lists) in order to avoid races with "p->sighand" | ||
1387 | * going away or changing from under us. | ||
1388 | */ | ||
1389 | BUG_ON(!(q->flags & SIGQUEUE_PREALLOC)); | 1369 | BUG_ON(!(q->flags & SIGQUEUE_PREALLOC)); |
1390 | read_lock(&tasklist_lock); | 1370 | read_lock(&tasklist_lock); |
1371 | |||
1372 | if (unlikely(p->flags & PF_EXITING)) { | ||
1373 | ret = -1; | ||
1374 | goto out_err; | ||
1375 | } | ||
1376 | |||
1391 | spin_lock_irqsave(&p->sighand->siglock, flags); | 1377 | spin_lock_irqsave(&p->sighand->siglock, flags); |
1392 | 1378 | ||
1393 | if (unlikely(!list_empty(&q->list))) { | 1379 | if (unlikely(!list_empty(&q->list))) { |
1394 | /* | 1380 | /* |
1395 | * If an SI_TIMER entry is already queue just increment | 1381 | * If an SI_TIMER entry is already queue just increment |
@@ -1399,7 +1385,7 @@ send_sigqueue(int sig, struct sigqueue *q, struct task_struct *p) | |||
1399 | BUG(); | 1385 | BUG(); |
1400 | q->info.si_overrun++; | 1386 | q->info.si_overrun++; |
1401 | goto out; | 1387 | goto out; |
1402 | } | 1388 | } |
1403 | /* Short-circuit ignored signals. */ | 1389 | /* Short-circuit ignored signals. */ |
1404 | if (sig_ignored(p, sig)) { | 1390 | if (sig_ignored(p, sig)) { |
1405 | ret = 1; | 1391 | ret = 1; |
@@ -1414,8 +1400,10 @@ send_sigqueue(int sig, struct sigqueue *q, struct task_struct *p) | |||
1414 | 1400 | ||
1415 | out: | 1401 | out: |
1416 | spin_unlock_irqrestore(&p->sighand->siglock, flags); | 1402 | spin_unlock_irqrestore(&p->sighand->siglock, flags); |
1403 | out_err: | ||
1417 | read_unlock(&tasklist_lock); | 1404 | read_unlock(&tasklist_lock); |
1418 | return(ret); | 1405 | |
1406 | return ret; | ||
1419 | } | 1407 | } |
1420 | 1408 | ||
1421 | int | 1409 | int |
@@ -1542,14 +1530,20 @@ void do_notify_parent(struct task_struct *tsk, int sig) | |||
1542 | spin_unlock_irqrestore(&psig->siglock, flags); | 1530 | spin_unlock_irqrestore(&psig->siglock, flags); |
1543 | } | 1531 | } |
1544 | 1532 | ||
1545 | static void | 1533 | static void do_notify_parent_cldstop(struct task_struct *tsk, int to_self, int why) |
1546 | do_notify_parent_cldstop(struct task_struct *tsk, struct task_struct *parent, | ||
1547 | int why) | ||
1548 | { | 1534 | { |
1549 | struct siginfo info; | 1535 | struct siginfo info; |
1550 | unsigned long flags; | 1536 | unsigned long flags; |
1537 | struct task_struct *parent; | ||
1551 | struct sighand_struct *sighand; | 1538 | struct sighand_struct *sighand; |
1552 | 1539 | ||
1540 | if (to_self) | ||
1541 | parent = tsk->parent; | ||
1542 | else { | ||
1543 | tsk = tsk->group_leader; | ||
1544 | parent = tsk->real_parent; | ||
1545 | } | ||
1546 | |||
1553 | info.si_signo = SIGCHLD; | 1547 | info.si_signo = SIGCHLD; |
1554 | info.si_errno = 0; | 1548 | info.si_errno = 0; |
1555 | info.si_pid = tsk->pid; | 1549 | info.si_pid = tsk->pid; |
@@ -1618,8 +1612,7 @@ static void ptrace_stop(int exit_code, int nostop_code, siginfo_t *info) | |||
1618 | !(current->ptrace & PT_ATTACHED)) && | 1612 | !(current->ptrace & PT_ATTACHED)) && |
1619 | (likely(current->parent->signal != current->signal) || | 1613 | (likely(current->parent->signal != current->signal) || |
1620 | !unlikely(current->signal->flags & SIGNAL_GROUP_EXIT))) { | 1614 | !unlikely(current->signal->flags & SIGNAL_GROUP_EXIT))) { |
1621 | do_notify_parent_cldstop(current, current->parent, | 1615 | do_notify_parent_cldstop(current, 1, CLD_TRAPPED); |
1622 | CLD_TRAPPED); | ||
1623 | read_unlock(&tasklist_lock); | 1616 | read_unlock(&tasklist_lock); |
1624 | schedule(); | 1617 | schedule(); |
1625 | } else { | 1618 | } else { |
@@ -1668,25 +1661,25 @@ void ptrace_notify(int exit_code) | |||
1668 | static void | 1661 | static void |
1669 | finish_stop(int stop_count) | 1662 | finish_stop(int stop_count) |
1670 | { | 1663 | { |
1664 | int to_self; | ||
1665 | |||
1671 | /* | 1666 | /* |
1672 | * If there are no other threads in the group, or if there is | 1667 | * If there are no other threads in the group, or if there is |
1673 | * a group stop in progress and we are the last to stop, | 1668 | * a group stop in progress and we are the last to stop, |
1674 | * report to the parent. When ptraced, every thread reports itself. | 1669 | * report to the parent. When ptraced, every thread reports itself. |
1675 | */ | 1670 | */ |
1676 | if (stop_count < 0 || (current->ptrace & PT_PTRACED)) { | 1671 | if (stop_count < 0 || (current->ptrace & PT_PTRACED)) |
1677 | read_lock(&tasklist_lock); | 1672 | to_self = 1; |
1678 | do_notify_parent_cldstop(current, current->parent, | 1673 | else if (stop_count == 0) |
1679 | CLD_STOPPED); | 1674 | to_self = 0; |
1680 | read_unlock(&tasklist_lock); | 1675 | else |
1681 | } | 1676 | goto out; |
1682 | else if (stop_count == 0) { | ||
1683 | read_lock(&tasklist_lock); | ||
1684 | do_notify_parent_cldstop(current->group_leader, | ||
1685 | current->group_leader->real_parent, | ||
1686 | CLD_STOPPED); | ||
1687 | read_unlock(&tasklist_lock); | ||
1688 | } | ||
1689 | 1677 | ||
1678 | read_lock(&tasklist_lock); | ||
1679 | do_notify_parent_cldstop(current, to_self, CLD_STOPPED); | ||
1680 | read_unlock(&tasklist_lock); | ||
1681 | |||
1682 | out: | ||
1690 | schedule(); | 1683 | schedule(); |
1691 | /* | 1684 | /* |
1692 | * Now we don't run again until continued. | 1685 | * Now we don't run again until continued. |
diff --git a/kernel/softlockup.c b/kernel/softlockup.c new file mode 100644 index 000000000000..75976209cea7 --- /dev/null +++ b/kernel/softlockup.c | |||
@@ -0,0 +1,151 @@ | |||
1 | /* | ||
2 | * Detect Soft Lockups | ||
3 | * | ||
4 | * started by Ingo Molnar, (C) 2005, Red Hat | ||
5 | * | ||
6 | * this code detects soft lockups: incidents in where on a CPU | ||
7 | * the kernel does not reschedule for 10 seconds or more. | ||
8 | */ | ||
9 | |||
10 | #include <linux/mm.h> | ||
11 | #include <linux/cpu.h> | ||
12 | #include <linux/init.h> | ||
13 | #include <linux/delay.h> | ||
14 | #include <linux/kthread.h> | ||
15 | #include <linux/notifier.h> | ||
16 | #include <linux/module.h> | ||
17 | |||
18 | static DEFINE_SPINLOCK(print_lock); | ||
19 | |||
20 | static DEFINE_PER_CPU(unsigned long, timestamp) = 0; | ||
21 | static DEFINE_PER_CPU(unsigned long, print_timestamp) = 0; | ||
22 | static DEFINE_PER_CPU(struct task_struct *, watchdog_task); | ||
23 | |||
24 | static int did_panic = 0; | ||
25 | static int softlock_panic(struct notifier_block *this, unsigned long event, | ||
26 | void *ptr) | ||
27 | { | ||
28 | did_panic = 1; | ||
29 | |||
30 | return NOTIFY_DONE; | ||
31 | } | ||
32 | |||
33 | static struct notifier_block panic_block = { | ||
34 | .notifier_call = softlock_panic, | ||
35 | }; | ||
36 | |||
37 | void touch_softlockup_watchdog(void) | ||
38 | { | ||
39 | per_cpu(timestamp, raw_smp_processor_id()) = jiffies; | ||
40 | } | ||
41 | EXPORT_SYMBOL(touch_softlockup_watchdog); | ||
42 | |||
43 | /* | ||
44 | * This callback runs from the timer interrupt, and checks | ||
45 | * whether the watchdog thread has hung or not: | ||
46 | */ | ||
47 | void softlockup_tick(struct pt_regs *regs) | ||
48 | { | ||
49 | int this_cpu = smp_processor_id(); | ||
50 | unsigned long timestamp = per_cpu(timestamp, this_cpu); | ||
51 | |||
52 | if (per_cpu(print_timestamp, this_cpu) == timestamp) | ||
53 | return; | ||
54 | |||
55 | /* Do not cause a second panic when there already was one */ | ||
56 | if (did_panic) | ||
57 | return; | ||
58 | |||
59 | if (time_after(jiffies, timestamp + 10*HZ)) { | ||
60 | per_cpu(print_timestamp, this_cpu) = timestamp; | ||
61 | |||
62 | spin_lock(&print_lock); | ||
63 | printk(KERN_ERR "BUG: soft lockup detected on CPU#%d!\n", | ||
64 | this_cpu); | ||
65 | show_regs(regs); | ||
66 | spin_unlock(&print_lock); | ||
67 | } | ||
68 | } | ||
69 | |||
70 | /* | ||
71 | * The watchdog thread - runs every second and touches the timestamp. | ||
72 | */ | ||
73 | static int watchdog(void * __bind_cpu) | ||
74 | { | ||
75 | struct sched_param param = { .sched_priority = 99 }; | ||
76 | int this_cpu = (long) __bind_cpu; | ||
77 | |||
78 | printk("softlockup thread %d started up.\n", this_cpu); | ||
79 | |||
80 | sched_setscheduler(current, SCHED_FIFO, ¶m); | ||
81 | current->flags |= PF_NOFREEZE; | ||
82 | |||
83 | set_current_state(TASK_INTERRUPTIBLE); | ||
84 | |||
85 | /* | ||
86 | * Run briefly once per second - if this gets delayed for | ||
87 | * more than 10 seconds then the debug-printout triggers | ||
88 | * in softlockup_tick(): | ||
89 | */ | ||
90 | while (!kthread_should_stop()) { | ||
91 | msleep_interruptible(1000); | ||
92 | touch_softlockup_watchdog(); | ||
93 | } | ||
94 | __set_current_state(TASK_RUNNING); | ||
95 | |||
96 | return 0; | ||
97 | } | ||
98 | |||
99 | /* | ||
100 | * Create/destroy watchdog threads as CPUs come and go: | ||
101 | */ | ||
102 | static int __devinit | ||
103 | cpu_callback(struct notifier_block *nfb, unsigned long action, void *hcpu) | ||
104 | { | ||
105 | int hotcpu = (unsigned long)hcpu; | ||
106 | struct task_struct *p; | ||
107 | |||
108 | switch (action) { | ||
109 | case CPU_UP_PREPARE: | ||
110 | BUG_ON(per_cpu(watchdog_task, hotcpu)); | ||
111 | p = kthread_create(watchdog, hcpu, "watchdog/%d", hotcpu); | ||
112 | if (IS_ERR(p)) { | ||
113 | printk("watchdog for %i failed\n", hotcpu); | ||
114 | return NOTIFY_BAD; | ||
115 | } | ||
116 | per_cpu(watchdog_task, hotcpu) = p; | ||
117 | kthread_bind(p, hotcpu); | ||
118 | break; | ||
119 | case CPU_ONLINE: | ||
120 | |||
121 | wake_up_process(per_cpu(watchdog_task, hotcpu)); | ||
122 | break; | ||
123 | #ifdef CONFIG_HOTPLUG_CPU | ||
124 | case CPU_UP_CANCELED: | ||
125 | /* Unbind so it can run. Fall thru. */ | ||
126 | kthread_bind(per_cpu(watchdog_task, hotcpu), smp_processor_id()); | ||
127 | case CPU_DEAD: | ||
128 | p = per_cpu(watchdog_task, hotcpu); | ||
129 | per_cpu(watchdog_task, hotcpu) = NULL; | ||
130 | kthread_stop(p); | ||
131 | break; | ||
132 | #endif /* CONFIG_HOTPLUG_CPU */ | ||
133 | } | ||
134 | return NOTIFY_OK; | ||
135 | } | ||
136 | |||
137 | static struct notifier_block __devinitdata cpu_nfb = { | ||
138 | .notifier_call = cpu_callback | ||
139 | }; | ||
140 | |||
141 | __init void spawn_softlockup_task(void) | ||
142 | { | ||
143 | void *cpu = (void *)(long)smp_processor_id(); | ||
144 | |||
145 | cpu_callback(&cpu_nfb, CPU_UP_PREPARE, cpu); | ||
146 | cpu_callback(&cpu_nfb, CPU_ONLINE, cpu); | ||
147 | register_cpu_notifier(&cpu_nfb); | ||
148 | |||
149 | notifier_chain_register(&panic_notifier_list, &panic_block); | ||
150 | } | ||
151 | |||
diff --git a/kernel/sys.c b/kernel/sys.c index 0bcaed6560ac..c80412be2302 100644 --- a/kernel/sys.c +++ b/kernel/sys.c | |||
@@ -1711,7 +1711,6 @@ asmlinkage long sys_prctl(int option, unsigned long arg2, unsigned long arg3, | |||
1711 | unsigned long arg4, unsigned long arg5) | 1711 | unsigned long arg4, unsigned long arg5) |
1712 | { | 1712 | { |
1713 | long error; | 1713 | long error; |
1714 | int sig; | ||
1715 | 1714 | ||
1716 | error = security_task_prctl(option, arg2, arg3, arg4, arg5); | 1715 | error = security_task_prctl(option, arg2, arg3, arg4, arg5); |
1717 | if (error) | 1716 | if (error) |
@@ -1719,12 +1718,11 @@ asmlinkage long sys_prctl(int option, unsigned long arg2, unsigned long arg3, | |||
1719 | 1718 | ||
1720 | switch (option) { | 1719 | switch (option) { |
1721 | case PR_SET_PDEATHSIG: | 1720 | case PR_SET_PDEATHSIG: |
1722 | sig = arg2; | 1721 | if (!valid_signal(arg2)) { |
1723 | if (!valid_signal(sig)) { | ||
1724 | error = -EINVAL; | 1722 | error = -EINVAL; |
1725 | break; | 1723 | break; |
1726 | } | 1724 | } |
1727 | current->pdeath_signal = sig; | 1725 | current->pdeath_signal = arg2; |
1728 | break; | 1726 | break; |
1729 | case PR_GET_PDEATHSIG: | 1727 | case PR_GET_PDEATHSIG: |
1730 | error = put_user(current->pdeath_signal, (int __user *)arg2); | 1728 | error = put_user(current->pdeath_signal, (int __user *)arg2); |
diff --git a/kernel/timer.c b/kernel/timer.c index 5377f40723ff..13e2b513be01 100644 --- a/kernel/timer.c +++ b/kernel/timer.c | |||
@@ -950,6 +950,7 @@ void do_timer(struct pt_regs *regs) | |||
950 | { | 950 | { |
951 | jiffies_64++; | 951 | jiffies_64++; |
952 | update_times(); | 952 | update_times(); |
953 | softlockup_tick(regs); | ||
953 | } | 954 | } |
954 | 955 | ||
955 | #ifdef __ARCH_WANT_SYS_ALARM | 956 | #ifdef __ARCH_WANT_SYS_ALARM |
@@ -1428,7 +1429,7 @@ static inline u64 time_interpolator_get_cycles(unsigned int src) | |||
1428 | } | 1429 | } |
1429 | } | 1430 | } |
1430 | 1431 | ||
1431 | static inline u64 time_interpolator_get_counter(void) | 1432 | static inline u64 time_interpolator_get_counter(int writelock) |
1432 | { | 1433 | { |
1433 | unsigned int src = time_interpolator->source; | 1434 | unsigned int src = time_interpolator->source; |
1434 | 1435 | ||
@@ -1442,6 +1443,15 @@ static inline u64 time_interpolator_get_counter(void) | |||
1442 | now = time_interpolator_get_cycles(src); | 1443 | now = time_interpolator_get_cycles(src); |
1443 | if (lcycle && time_after(lcycle, now)) | 1444 | if (lcycle && time_after(lcycle, now)) |
1444 | return lcycle; | 1445 | return lcycle; |
1446 | |||
1447 | /* When holding the xtime write lock, there's no need | ||
1448 | * to add the overhead of the cmpxchg. Readers are | ||
1449 | * force to retry until the write lock is released. | ||
1450 | */ | ||
1451 | if (writelock) { | ||
1452 | time_interpolator->last_cycle = now; | ||
1453 | return now; | ||
1454 | } | ||
1445 | /* Keep track of the last timer value returned. The use of cmpxchg here | 1455 | /* Keep track of the last timer value returned. The use of cmpxchg here |
1446 | * will cause contention in an SMP environment. | 1456 | * will cause contention in an SMP environment. |
1447 | */ | 1457 | */ |
@@ -1455,7 +1465,7 @@ static inline u64 time_interpolator_get_counter(void) | |||
1455 | void time_interpolator_reset(void) | 1465 | void time_interpolator_reset(void) |
1456 | { | 1466 | { |
1457 | time_interpolator->offset = 0; | 1467 | time_interpolator->offset = 0; |
1458 | time_interpolator->last_counter = time_interpolator_get_counter(); | 1468 | time_interpolator->last_counter = time_interpolator_get_counter(1); |
1459 | } | 1469 | } |
1460 | 1470 | ||
1461 | #define GET_TI_NSECS(count,i) (((((count) - i->last_counter) & (i)->mask) * (i)->nsec_per_cyc) >> (i)->shift) | 1471 | #define GET_TI_NSECS(count,i) (((((count) - i->last_counter) & (i)->mask) * (i)->nsec_per_cyc) >> (i)->shift) |
@@ -1467,7 +1477,7 @@ unsigned long time_interpolator_get_offset(void) | |||
1467 | return 0; | 1477 | return 0; |
1468 | 1478 | ||
1469 | return time_interpolator->offset + | 1479 | return time_interpolator->offset + |
1470 | GET_TI_NSECS(time_interpolator_get_counter(), time_interpolator); | 1480 | GET_TI_NSECS(time_interpolator_get_counter(0), time_interpolator); |
1471 | } | 1481 | } |
1472 | 1482 | ||
1473 | #define INTERPOLATOR_ADJUST 65536 | 1483 | #define INTERPOLATOR_ADJUST 65536 |
@@ -1490,7 +1500,7 @@ static void time_interpolator_update(long delta_nsec) | |||
1490 | * and the tuning logic insures that. | 1500 | * and the tuning logic insures that. |
1491 | */ | 1501 | */ |
1492 | 1502 | ||
1493 | counter = time_interpolator_get_counter(); | 1503 | counter = time_interpolator_get_counter(1); |
1494 | offset = time_interpolator->offset + GET_TI_NSECS(counter, time_interpolator); | 1504 | offset = time_interpolator->offset + GET_TI_NSECS(counter, time_interpolator); |
1495 | 1505 | ||
1496 | if (delta_nsec < 0 || (unsigned long) delta_nsec < offset) | 1506 | if (delta_nsec < 0 || (unsigned long) delta_nsec < offset) |
diff --git a/kernel/workqueue.c b/kernel/workqueue.c index c7e36d4a70ca..91bacb13a7e2 100644 --- a/kernel/workqueue.c +++ b/kernel/workqueue.c | |||
@@ -308,10 +308,9 @@ struct workqueue_struct *__create_workqueue(const char *name, | |||
308 | struct workqueue_struct *wq; | 308 | struct workqueue_struct *wq; |
309 | struct task_struct *p; | 309 | struct task_struct *p; |
310 | 310 | ||
311 | wq = kmalloc(sizeof(*wq), GFP_KERNEL); | 311 | wq = kzalloc(sizeof(*wq), GFP_KERNEL); |
312 | if (!wq) | 312 | if (!wq) |
313 | return NULL; | 313 | return NULL; |
314 | memset(wq, 0, sizeof(*wq)); | ||
315 | 314 | ||
316 | wq->name = name; | 315 | wq->name = name; |
317 | /* We don't need the distraction of CPUs appearing and vanishing. */ | 316 | /* We don't need the distraction of CPUs appearing and vanishing. */ |
@@ -499,7 +498,7 @@ static int __devinit workqueue_cpu_callback(struct notifier_block *nfb, | |||
499 | case CPU_UP_PREPARE: | 498 | case CPU_UP_PREPARE: |
500 | /* Create a new workqueue thread for it. */ | 499 | /* Create a new workqueue thread for it. */ |
501 | list_for_each_entry(wq, &workqueues, list) { | 500 | list_for_each_entry(wq, &workqueues, list) { |
502 | if (create_workqueue_thread(wq, hotcpu) < 0) { | 501 | if (!create_workqueue_thread(wq, hotcpu)) { |
503 | printk("workqueue for %i failed\n", hotcpu); | 502 | printk("workqueue for %i failed\n", hotcpu); |
504 | return NOTIFY_BAD; | 503 | return NOTIFY_BAD; |
505 | } | 504 | } |