diff options
author | Len Brown <len.brown@intel.com> | 2005-09-08 01:45:47 -0400 |
---|---|---|
committer | Len Brown <len.brown@intel.com> | 2005-09-08 01:45:47 -0400 |
commit | 64e47488c913ac704d465a6af86a26786d1412a5 (patch) | |
tree | d3b0148592963dcde26e4bb35ddfec8b1eaf8e23 /kernel | |
parent | 4a35a46bf1cda4737c428380d1db5d15e2590d18 (diff) | |
parent | caf39e87cc1182f7dae84eefc43ca14d54c78ef9 (diff) |
Merge linux-2.6 with linux-acpi-2.6
Diffstat (limited to 'kernel')
-rw-r--r-- | kernel/Makefile | 1 | ||||
-rw-r--r-- | kernel/acct.c | 2 | ||||
-rw-r--r-- | kernel/cpuset.c | 125 | ||||
-rw-r--r-- | kernel/fork.c | 3 | ||||
-rw-r--r-- | kernel/futex.c | 137 | ||||
-rw-r--r-- | kernel/intermodule.c | 3 | ||||
-rw-r--r-- | kernel/irq/handle.c | 2 | ||||
-rw-r--r-- | kernel/irq/manage.c | 4 | ||||
-rw-r--r-- | kernel/irq/proc.c | 14 | ||||
-rw-r--r-- | kernel/kprobes.c | 94 | ||||
-rw-r--r-- | kernel/module.c | 33 | ||||
-rw-r--r-- | kernel/params.c | 4 | ||||
-rw-r--r-- | kernel/posix-timers.c | 28 | ||||
-rw-r--r-- | kernel/power/Kconfig | 14 | ||||
-rw-r--r-- | kernel/power/disk.c | 55 | ||||
-rw-r--r-- | kernel/power/main.c | 5 | ||||
-rw-r--r-- | kernel/power/pm.c | 3 | ||||
-rw-r--r-- | kernel/power/process.c | 29 | ||||
-rw-r--r-- | kernel/power/swsusp.c | 202 | ||||
-rw-r--r-- | kernel/printk.c | 13 | ||||
-rw-r--r-- | kernel/ptrace.c | 41 | ||||
-rw-r--r-- | kernel/resource.c | 3 | ||||
-rw-r--r-- | kernel/sched.c | 340 | ||||
-rw-r--r-- | kernel/signal.c | 83 | ||||
-rw-r--r-- | kernel/softlockup.c | 151 | ||||
-rw-r--r-- | kernel/sys.c | 6 | ||||
-rw-r--r-- | kernel/timer.c | 18 | ||||
-rw-r--r-- | kernel/workqueue.c | 5 |
28 files changed, 1128 insertions, 290 deletions
diff --git a/kernel/Makefile b/kernel/Makefile index cb05cd05d237..8d57a2f1226b 100644 --- a/kernel/Makefile +++ b/kernel/Makefile | |||
@@ -27,6 +27,7 @@ obj-$(CONFIG_AUDIT) += audit.o | |||
27 | obj-$(CONFIG_AUDITSYSCALL) += auditsc.o | 27 | obj-$(CONFIG_AUDITSYSCALL) += auditsc.o |
28 | obj-$(CONFIG_KPROBES) += kprobes.o | 28 | obj-$(CONFIG_KPROBES) += kprobes.o |
29 | obj-$(CONFIG_SYSFS) += ksysfs.o | 29 | obj-$(CONFIG_SYSFS) += ksysfs.o |
30 | obj-$(CONFIG_DETECT_SOFTLOCKUP) += softlockup.o | ||
30 | obj-$(CONFIG_GENERIC_HARDIRQS) += irq/ | 31 | obj-$(CONFIG_GENERIC_HARDIRQS) += irq/ |
31 | obj-$(CONFIG_CRASH_DUMP) += crash_dump.o | 32 | obj-$(CONFIG_CRASH_DUMP) += crash_dump.o |
32 | obj-$(CONFIG_SECCOMP) += seccomp.o | 33 | obj-$(CONFIG_SECCOMP) += seccomp.o |
diff --git a/kernel/acct.c b/kernel/acct.c index 4168f631868e..f70e6027cca9 100644 --- a/kernel/acct.c +++ b/kernel/acct.c | |||
@@ -220,7 +220,7 @@ asmlinkage long sys_acct(const char __user *name) | |||
220 | return (PTR_ERR(tmp)); | 220 | return (PTR_ERR(tmp)); |
221 | } | 221 | } |
222 | /* Difference from BSD - they don't do O_APPEND */ | 222 | /* Difference from BSD - they don't do O_APPEND */ |
223 | file = filp_open(tmp, O_WRONLY|O_APPEND, 0); | 223 | file = filp_open(tmp, O_WRONLY|O_APPEND|O_LARGEFILE, 0); |
224 | putname(tmp); | 224 | putname(tmp); |
225 | if (IS_ERR(file)) { | 225 | if (IS_ERR(file)) { |
226 | return (PTR_ERR(file)); | 226 | return (PTR_ERR(file)); |
diff --git a/kernel/cpuset.c b/kernel/cpuset.c index 8ab1b4e518b8..1f06e7690106 100644 --- a/kernel/cpuset.c +++ b/kernel/cpuset.c | |||
@@ -628,13 +628,6 @@ static int validate_change(const struct cpuset *cur, const struct cpuset *trial) | |||
628 | * lock_cpu_hotplug()/unlock_cpu_hotplug() pair. | 628 | * lock_cpu_hotplug()/unlock_cpu_hotplug() pair. |
629 | */ | 629 | */ |
630 | 630 | ||
631 | /* | ||
632 | * Hack to avoid 2.6.13 partial node dynamic sched domain bug. | ||
633 | * Disable letting 'cpu_exclusive' cpusets define dynamic sched | ||
634 | * domains, until the sched domain can handle partial nodes. | ||
635 | * Remove this #if hackery when sched domains fixed. | ||
636 | */ | ||
637 | #if 0 | ||
638 | static void update_cpu_domains(struct cpuset *cur) | 631 | static void update_cpu_domains(struct cpuset *cur) |
639 | { | 632 | { |
640 | struct cpuset *c, *par = cur->parent; | 633 | struct cpuset *c, *par = cur->parent; |
@@ -675,11 +668,6 @@ static void update_cpu_domains(struct cpuset *cur) | |||
675 | partition_sched_domains(&pspan, &cspan); | 668 | partition_sched_domains(&pspan, &cspan); |
676 | unlock_cpu_hotplug(); | 669 | unlock_cpu_hotplug(); |
677 | } | 670 | } |
678 | #else | ||
679 | static void update_cpu_domains(struct cpuset *cur) | ||
680 | { | ||
681 | } | ||
682 | #endif | ||
683 | 671 | ||
684 | static int update_cpumask(struct cpuset *cs, char *buf) | 672 | static int update_cpumask(struct cpuset *cs, char *buf) |
685 | { | 673 | { |
@@ -1611,17 +1599,114 @@ int cpuset_zonelist_valid_mems_allowed(struct zonelist *zl) | |||
1611 | return 0; | 1599 | return 0; |
1612 | } | 1600 | } |
1613 | 1601 | ||
1602 | /* | ||
1603 | * nearest_exclusive_ancestor() - Returns the nearest mem_exclusive | ||
1604 | * ancestor to the specified cpuset. Call while holding cpuset_sem. | ||
1605 | * If no ancestor is mem_exclusive (an unusual configuration), then | ||
1606 | * returns the root cpuset. | ||
1607 | */ | ||
1608 | static const struct cpuset *nearest_exclusive_ancestor(const struct cpuset *cs) | ||
1609 | { | ||
1610 | while (!is_mem_exclusive(cs) && cs->parent) | ||
1611 | cs = cs->parent; | ||
1612 | return cs; | ||
1613 | } | ||
1614 | |||
1614 | /** | 1615 | /** |
1615 | * cpuset_zone_allowed - is zone z allowed in current->mems_allowed | 1616 | * cpuset_zone_allowed - Can we allocate memory on zone z's memory node? |
1616 | * @z: zone in question | 1617 | * @z: is this zone on an allowed node? |
1618 | * @gfp_mask: memory allocation flags (we use __GFP_HARDWALL) | ||
1617 | * | 1619 | * |
1618 | * Is zone z allowed in current->mems_allowed, or is | 1620 | * If we're in interrupt, yes, we can always allocate. If zone |
1619 | * the CPU in interrupt context? (zone is always allowed in this case) | 1621 | * z's node is in our tasks mems_allowed, yes. If it's not a |
1620 | */ | 1622 | * __GFP_HARDWALL request and this zone's nodes is in the nearest |
1621 | int cpuset_zone_allowed(struct zone *z) | 1623 | * mem_exclusive cpuset ancestor to this tasks cpuset, yes. |
1624 | * Otherwise, no. | ||
1625 | * | ||
1626 | * GFP_USER allocations are marked with the __GFP_HARDWALL bit, | ||
1627 | * and do not allow allocations outside the current tasks cpuset. | ||
1628 | * GFP_KERNEL allocations are not so marked, so can escape to the | ||
1629 | * nearest mem_exclusive ancestor cpuset. | ||
1630 | * | ||
1631 | * Scanning up parent cpusets requires cpuset_sem. The __alloc_pages() | ||
1632 | * routine only calls here with __GFP_HARDWALL bit _not_ set if | ||
1633 | * it's a GFP_KERNEL allocation, and all nodes in the current tasks | ||
1634 | * mems_allowed came up empty on the first pass over the zonelist. | ||
1635 | * So only GFP_KERNEL allocations, if all nodes in the cpuset are | ||
1636 | * short of memory, might require taking the cpuset_sem semaphore. | ||
1637 | * | ||
1638 | * The first loop over the zonelist in mm/page_alloc.c:__alloc_pages() | ||
1639 | * calls here with __GFP_HARDWALL always set in gfp_mask, enforcing | ||
1640 | * hardwall cpusets - no allocation on a node outside the cpuset is | ||
1641 | * allowed (unless in interrupt, of course). | ||
1642 | * | ||
1643 | * The second loop doesn't even call here for GFP_ATOMIC requests | ||
1644 | * (if the __alloc_pages() local variable 'wait' is set). That check | ||
1645 | * and the checks below have the combined affect in the second loop of | ||
1646 | * the __alloc_pages() routine that: | ||
1647 | * in_interrupt - any node ok (current task context irrelevant) | ||
1648 | * GFP_ATOMIC - any node ok | ||
1649 | * GFP_KERNEL - any node in enclosing mem_exclusive cpuset ok | ||
1650 | * GFP_USER - only nodes in current tasks mems allowed ok. | ||
1651 | **/ | ||
1652 | |||
1653 | int cpuset_zone_allowed(struct zone *z, unsigned int __nocast gfp_mask) | ||
1622 | { | 1654 | { |
1623 | return in_interrupt() || | 1655 | int node; /* node that zone z is on */ |
1624 | node_isset(z->zone_pgdat->node_id, current->mems_allowed); | 1656 | const struct cpuset *cs; /* current cpuset ancestors */ |
1657 | int allowed = 1; /* is allocation in zone z allowed? */ | ||
1658 | |||
1659 | if (in_interrupt()) | ||
1660 | return 1; | ||
1661 | node = z->zone_pgdat->node_id; | ||
1662 | if (node_isset(node, current->mems_allowed)) | ||
1663 | return 1; | ||
1664 | if (gfp_mask & __GFP_HARDWALL) /* If hardwall request, stop here */ | ||
1665 | return 0; | ||
1666 | |||
1667 | /* Not hardwall and node outside mems_allowed: scan up cpusets */ | ||
1668 | down(&cpuset_sem); | ||
1669 | cs = current->cpuset; | ||
1670 | if (!cs) | ||
1671 | goto done; /* current task exiting */ | ||
1672 | cs = nearest_exclusive_ancestor(cs); | ||
1673 | allowed = node_isset(node, cs->mems_allowed); | ||
1674 | done: | ||
1675 | up(&cpuset_sem); | ||
1676 | return allowed; | ||
1677 | } | ||
1678 | |||
1679 | /** | ||
1680 | * cpuset_excl_nodes_overlap - Do we overlap @p's mem_exclusive ancestors? | ||
1681 | * @p: pointer to task_struct of some other task. | ||
1682 | * | ||
1683 | * Description: Return true if the nearest mem_exclusive ancestor | ||
1684 | * cpusets of tasks @p and current overlap. Used by oom killer to | ||
1685 | * determine if task @p's memory usage might impact the memory | ||
1686 | * available to the current task. | ||
1687 | * | ||
1688 | * Acquires cpuset_sem - not suitable for calling from a fast path. | ||
1689 | **/ | ||
1690 | |||
1691 | int cpuset_excl_nodes_overlap(const struct task_struct *p) | ||
1692 | { | ||
1693 | const struct cpuset *cs1, *cs2; /* my and p's cpuset ancestors */ | ||
1694 | int overlap = 0; /* do cpusets overlap? */ | ||
1695 | |||
1696 | down(&cpuset_sem); | ||
1697 | cs1 = current->cpuset; | ||
1698 | if (!cs1) | ||
1699 | goto done; /* current task exiting */ | ||
1700 | cs2 = p->cpuset; | ||
1701 | if (!cs2) | ||
1702 | goto done; /* task p is exiting */ | ||
1703 | cs1 = nearest_exclusive_ancestor(cs1); | ||
1704 | cs2 = nearest_exclusive_ancestor(cs2); | ||
1705 | overlap = nodes_intersects(cs1->mems_allowed, cs2->mems_allowed); | ||
1706 | done: | ||
1707 | up(&cpuset_sem); | ||
1708 | |||
1709 | return overlap; | ||
1625 | } | 1710 | } |
1626 | 1711 | ||
1627 | /* | 1712 | /* |
diff --git a/kernel/fork.c b/kernel/fork.c index b65187f0c74e..7e1ead9a6ba4 100644 --- a/kernel/fork.c +++ b/kernel/fork.c | |||
@@ -994,6 +994,9 @@ static task_t *copy_process(unsigned long clone_flags, | |||
994 | * of CLONE_PTRACE. | 994 | * of CLONE_PTRACE. |
995 | */ | 995 | */ |
996 | clear_tsk_thread_flag(p, TIF_SYSCALL_TRACE); | 996 | clear_tsk_thread_flag(p, TIF_SYSCALL_TRACE); |
997 | #ifdef TIF_SYSCALL_EMU | ||
998 | clear_tsk_thread_flag(p, TIF_SYSCALL_EMU); | ||
999 | #endif | ||
997 | 1000 | ||
998 | /* Our parent execution domain becomes current domain | 1001 | /* Our parent execution domain becomes current domain |
999 | These must match for thread signalling to apply */ | 1002 | These must match for thread signalling to apply */ |
diff --git a/kernel/futex.c b/kernel/futex.c index c7130f86106c..ca05fe6a70b2 100644 --- a/kernel/futex.c +++ b/kernel/futex.c | |||
@@ -40,6 +40,7 @@ | |||
40 | #include <linux/pagemap.h> | 40 | #include <linux/pagemap.h> |
41 | #include <linux/syscalls.h> | 41 | #include <linux/syscalls.h> |
42 | #include <linux/signal.h> | 42 | #include <linux/signal.h> |
43 | #include <asm/futex.h> | ||
43 | 44 | ||
44 | #define FUTEX_HASHBITS (CONFIG_BASE_SMALL ? 4 : 8) | 45 | #define FUTEX_HASHBITS (CONFIG_BASE_SMALL ? 4 : 8) |
45 | 46 | ||
@@ -327,6 +328,118 @@ out: | |||
327 | } | 328 | } |
328 | 329 | ||
329 | /* | 330 | /* |
331 | * Wake up all waiters hashed on the physical page that is mapped | ||
332 | * to this virtual address: | ||
333 | */ | ||
334 | static int futex_wake_op(unsigned long uaddr1, unsigned long uaddr2, int nr_wake, int nr_wake2, int op) | ||
335 | { | ||
336 | union futex_key key1, key2; | ||
337 | struct futex_hash_bucket *bh1, *bh2; | ||
338 | struct list_head *head; | ||
339 | struct futex_q *this, *next; | ||
340 | int ret, op_ret, attempt = 0; | ||
341 | |||
342 | retryfull: | ||
343 | down_read(¤t->mm->mmap_sem); | ||
344 | |||
345 | ret = get_futex_key(uaddr1, &key1); | ||
346 | if (unlikely(ret != 0)) | ||
347 | goto out; | ||
348 | ret = get_futex_key(uaddr2, &key2); | ||
349 | if (unlikely(ret != 0)) | ||
350 | goto out; | ||
351 | |||
352 | bh1 = hash_futex(&key1); | ||
353 | bh2 = hash_futex(&key2); | ||
354 | |||
355 | retry: | ||
356 | if (bh1 < bh2) | ||
357 | spin_lock(&bh1->lock); | ||
358 | spin_lock(&bh2->lock); | ||
359 | if (bh1 > bh2) | ||
360 | spin_lock(&bh1->lock); | ||
361 | |||
362 | op_ret = futex_atomic_op_inuser(op, (int __user *)uaddr2); | ||
363 | if (unlikely(op_ret < 0)) { | ||
364 | int dummy; | ||
365 | |||
366 | spin_unlock(&bh1->lock); | ||
367 | if (bh1 != bh2) | ||
368 | spin_unlock(&bh2->lock); | ||
369 | |||
370 | /* futex_atomic_op_inuser needs to both read and write | ||
371 | * *(int __user *)uaddr2, but we can't modify it | ||
372 | * non-atomically. Therefore, if get_user below is not | ||
373 | * enough, we need to handle the fault ourselves, while | ||
374 | * still holding the mmap_sem. */ | ||
375 | if (attempt++) { | ||
376 | struct vm_area_struct * vma; | ||
377 | struct mm_struct *mm = current->mm; | ||
378 | |||
379 | ret = -EFAULT; | ||
380 | if (attempt >= 2 || | ||
381 | !(vma = find_vma(mm, uaddr2)) || | ||
382 | vma->vm_start > uaddr2 || | ||
383 | !(vma->vm_flags & VM_WRITE)) | ||
384 | goto out; | ||
385 | |||
386 | switch (handle_mm_fault(mm, vma, uaddr2, 1)) { | ||
387 | case VM_FAULT_MINOR: | ||
388 | current->min_flt++; | ||
389 | break; | ||
390 | case VM_FAULT_MAJOR: | ||
391 | current->maj_flt++; | ||
392 | break; | ||
393 | default: | ||
394 | goto out; | ||
395 | } | ||
396 | goto retry; | ||
397 | } | ||
398 | |||
399 | /* If we would have faulted, release mmap_sem, | ||
400 | * fault it in and start all over again. */ | ||
401 | up_read(¤t->mm->mmap_sem); | ||
402 | |||
403 | ret = get_user(dummy, (int __user *)uaddr2); | ||
404 | if (ret) | ||
405 | return ret; | ||
406 | |||
407 | goto retryfull; | ||
408 | } | ||
409 | |||
410 | head = &bh1->chain; | ||
411 | |||
412 | list_for_each_entry_safe(this, next, head, list) { | ||
413 | if (match_futex (&this->key, &key1)) { | ||
414 | wake_futex(this); | ||
415 | if (++ret >= nr_wake) | ||
416 | break; | ||
417 | } | ||
418 | } | ||
419 | |||
420 | if (op_ret > 0) { | ||
421 | head = &bh2->chain; | ||
422 | |||
423 | op_ret = 0; | ||
424 | list_for_each_entry_safe(this, next, head, list) { | ||
425 | if (match_futex (&this->key, &key2)) { | ||
426 | wake_futex(this); | ||
427 | if (++op_ret >= nr_wake2) | ||
428 | break; | ||
429 | } | ||
430 | } | ||
431 | ret += op_ret; | ||
432 | } | ||
433 | |||
434 | spin_unlock(&bh1->lock); | ||
435 | if (bh1 != bh2) | ||
436 | spin_unlock(&bh2->lock); | ||
437 | out: | ||
438 | up_read(¤t->mm->mmap_sem); | ||
439 | return ret; | ||
440 | } | ||
441 | |||
442 | /* | ||
330 | * Requeue all waiters hashed on one physical page to another | 443 | * Requeue all waiters hashed on one physical page to another |
331 | * physical page. | 444 | * physical page. |
332 | */ | 445 | */ |
@@ -673,23 +786,17 @@ static int futex_fd(unsigned long uaddr, int signal) | |||
673 | filp->f_mapping = filp->f_dentry->d_inode->i_mapping; | 786 | filp->f_mapping = filp->f_dentry->d_inode->i_mapping; |
674 | 787 | ||
675 | if (signal) { | 788 | if (signal) { |
676 | int err; | ||
677 | err = f_setown(filp, current->pid, 1); | 789 | err = f_setown(filp, current->pid, 1); |
678 | if (err < 0) { | 790 | if (err < 0) { |
679 | put_unused_fd(ret); | 791 | goto error; |
680 | put_filp(filp); | ||
681 | ret = err; | ||
682 | goto out; | ||
683 | } | 792 | } |
684 | filp->f_owner.signum = signal; | 793 | filp->f_owner.signum = signal; |
685 | } | 794 | } |
686 | 795 | ||
687 | q = kmalloc(sizeof(*q), GFP_KERNEL); | 796 | q = kmalloc(sizeof(*q), GFP_KERNEL); |
688 | if (!q) { | 797 | if (!q) { |
689 | put_unused_fd(ret); | 798 | err = -ENOMEM; |
690 | put_filp(filp); | 799 | goto error; |
691 | ret = -ENOMEM; | ||
692 | goto out; | ||
693 | } | 800 | } |
694 | 801 | ||
695 | down_read(¤t->mm->mmap_sem); | 802 | down_read(¤t->mm->mmap_sem); |
@@ -697,10 +804,8 @@ static int futex_fd(unsigned long uaddr, int signal) | |||
697 | 804 | ||
698 | if (unlikely(err != 0)) { | 805 | if (unlikely(err != 0)) { |
699 | up_read(¤t->mm->mmap_sem); | 806 | up_read(¤t->mm->mmap_sem); |
700 | put_unused_fd(ret); | ||
701 | put_filp(filp); | ||
702 | kfree(q); | 807 | kfree(q); |
703 | return err; | 808 | goto error; |
704 | } | 809 | } |
705 | 810 | ||
706 | /* | 811 | /* |
@@ -716,6 +821,11 @@ static int futex_fd(unsigned long uaddr, int signal) | |||
716 | fd_install(ret, filp); | 821 | fd_install(ret, filp); |
717 | out: | 822 | out: |
718 | return ret; | 823 | return ret; |
824 | error: | ||
825 | put_unused_fd(ret); | ||
826 | put_filp(filp); | ||
827 | ret = err; | ||
828 | goto out; | ||
719 | } | 829 | } |
720 | 830 | ||
721 | long do_futex(unsigned long uaddr, int op, int val, unsigned long timeout, | 831 | long do_futex(unsigned long uaddr, int op, int val, unsigned long timeout, |
@@ -740,6 +850,9 @@ long do_futex(unsigned long uaddr, int op, int val, unsigned long timeout, | |||
740 | case FUTEX_CMP_REQUEUE: | 850 | case FUTEX_CMP_REQUEUE: |
741 | ret = futex_requeue(uaddr, uaddr2, val, val2, &val3); | 851 | ret = futex_requeue(uaddr, uaddr2, val, val2, &val3); |
742 | break; | 852 | break; |
853 | case FUTEX_WAKE_OP: | ||
854 | ret = futex_wake_op(uaddr, uaddr2, val, val2, val3); | ||
855 | break; | ||
743 | default: | 856 | default: |
744 | ret = -ENOSYS; | 857 | ret = -ENOSYS; |
745 | } | 858 | } |
diff --git a/kernel/intermodule.c b/kernel/intermodule.c index 388977f3e9b7..0cbe633420fb 100644 --- a/kernel/intermodule.c +++ b/kernel/intermodule.c | |||
@@ -39,7 +39,7 @@ void inter_module_register(const char *im_name, struct module *owner, const void | |||
39 | struct list_head *tmp; | 39 | struct list_head *tmp; |
40 | struct inter_module_entry *ime, *ime_new; | 40 | struct inter_module_entry *ime, *ime_new; |
41 | 41 | ||
42 | if (!(ime_new = kmalloc(sizeof(*ime), GFP_KERNEL))) { | 42 | if (!(ime_new = kzalloc(sizeof(*ime), GFP_KERNEL))) { |
43 | /* Overloaded kernel, not fatal */ | 43 | /* Overloaded kernel, not fatal */ |
44 | printk(KERN_ERR | 44 | printk(KERN_ERR |
45 | "Aiee, inter_module_register: cannot kmalloc entry for '%s'\n", | 45 | "Aiee, inter_module_register: cannot kmalloc entry for '%s'\n", |
@@ -47,7 +47,6 @@ void inter_module_register(const char *im_name, struct module *owner, const void | |||
47 | kmalloc_failed = 1; | 47 | kmalloc_failed = 1; |
48 | return; | 48 | return; |
49 | } | 49 | } |
50 | memset(ime_new, 0, sizeof(*ime_new)); | ||
51 | ime_new->im_name = im_name; | 50 | ime_new->im_name = im_name; |
52 | ime_new->owner = owner; | 51 | ime_new->owner = owner; |
53 | ime_new->userdata = userdata; | 52 | ime_new->userdata = userdata; |
diff --git a/kernel/irq/handle.c b/kernel/irq/handle.c index c29f83c16497..3ff7b925c387 100644 --- a/kernel/irq/handle.c +++ b/kernel/irq/handle.c | |||
@@ -111,7 +111,7 @@ fastcall unsigned int __do_IRQ(unsigned int irq, struct pt_regs *regs) | |||
111 | unsigned int status; | 111 | unsigned int status; |
112 | 112 | ||
113 | kstat_this_cpu.irqs[irq]++; | 113 | kstat_this_cpu.irqs[irq]++; |
114 | if (desc->status & IRQ_PER_CPU) { | 114 | if (CHECK_IRQ_PER_CPU(desc->status)) { |
115 | irqreturn_t action_ret; | 115 | irqreturn_t action_ret; |
116 | 116 | ||
117 | /* | 117 | /* |
diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c index ac6700985705..1cfdb08ddf20 100644 --- a/kernel/irq/manage.c +++ b/kernel/irq/manage.c | |||
@@ -18,6 +18,10 @@ | |||
18 | 18 | ||
19 | cpumask_t irq_affinity[NR_IRQS] = { [0 ... NR_IRQS-1] = CPU_MASK_ALL }; | 19 | cpumask_t irq_affinity[NR_IRQS] = { [0 ... NR_IRQS-1] = CPU_MASK_ALL }; |
20 | 20 | ||
21 | #if defined (CONFIG_GENERIC_PENDING_IRQ) || defined (CONFIG_IRQBALANCE) | ||
22 | cpumask_t __cacheline_aligned pending_irq_cpumask[NR_IRQS]; | ||
23 | #endif | ||
24 | |||
21 | /** | 25 | /** |
22 | * synchronize_irq - wait for pending IRQ handlers (on other CPUs) | 26 | * synchronize_irq - wait for pending IRQ handlers (on other CPUs) |
23 | * | 27 | * |
diff --git a/kernel/irq/proc.c b/kernel/irq/proc.c index 85d08daa6600..f26e534c6585 100644 --- a/kernel/irq/proc.c +++ b/kernel/irq/proc.c | |||
@@ -19,12 +19,22 @@ static struct proc_dir_entry *root_irq_dir, *irq_dir[NR_IRQS]; | |||
19 | */ | 19 | */ |
20 | static struct proc_dir_entry *smp_affinity_entry[NR_IRQS]; | 20 | static struct proc_dir_entry *smp_affinity_entry[NR_IRQS]; |
21 | 21 | ||
22 | void __attribute__((weak)) | 22 | #ifdef CONFIG_GENERIC_PENDING_IRQ |
23 | proc_set_irq_affinity(unsigned int irq, cpumask_t mask_val) | 23 | void proc_set_irq_affinity(unsigned int irq, cpumask_t mask_val) |
24 | { | ||
25 | /* | ||
26 | * Save these away for later use. Re-progam when the | ||
27 | * interrupt is pending | ||
28 | */ | ||
29 | set_pending_irq(irq, mask_val); | ||
30 | } | ||
31 | #else | ||
32 | void proc_set_irq_affinity(unsigned int irq, cpumask_t mask_val) | ||
24 | { | 33 | { |
25 | irq_affinity[irq] = mask_val; | 34 | irq_affinity[irq] = mask_val; |
26 | irq_desc[irq].handler->set_affinity(irq, mask_val); | 35 | irq_desc[irq].handler->set_affinity(irq, mask_val); |
27 | } | 36 | } |
37 | #endif | ||
28 | 38 | ||
29 | static int irq_affinity_read_proc(char *page, char **start, off_t off, | 39 | static int irq_affinity_read_proc(char *page, char **start, off_t off, |
30 | int count, int *eof, void *data) | 40 | int count, int *eof, void *data) |
diff --git a/kernel/kprobes.c b/kernel/kprobes.c index b0237122b24e..f3ea492ab44d 100644 --- a/kernel/kprobes.c +++ b/kernel/kprobes.c | |||
@@ -37,6 +37,7 @@ | |||
37 | #include <linux/init.h> | 37 | #include <linux/init.h> |
38 | #include <linux/module.h> | 38 | #include <linux/module.h> |
39 | #include <linux/moduleloader.h> | 39 | #include <linux/moduleloader.h> |
40 | #include <asm-generic/sections.h> | ||
40 | #include <asm/cacheflush.h> | 41 | #include <asm/cacheflush.h> |
41 | #include <asm/errno.h> | 42 | #include <asm/errno.h> |
42 | #include <asm/kdebug.h> | 43 | #include <asm/kdebug.h> |
@@ -72,7 +73,7 @@ static struct hlist_head kprobe_insn_pages; | |||
72 | * get_insn_slot() - Find a slot on an executable page for an instruction. | 73 | * get_insn_slot() - Find a slot on an executable page for an instruction. |
73 | * We allocate an executable page if there's no room on existing ones. | 74 | * We allocate an executable page if there's no room on existing ones. |
74 | */ | 75 | */ |
75 | kprobe_opcode_t *get_insn_slot(void) | 76 | kprobe_opcode_t __kprobes *get_insn_slot(void) |
76 | { | 77 | { |
77 | struct kprobe_insn_page *kip; | 78 | struct kprobe_insn_page *kip; |
78 | struct hlist_node *pos; | 79 | struct hlist_node *pos; |
@@ -117,7 +118,7 @@ kprobe_opcode_t *get_insn_slot(void) | |||
117 | return kip->insns; | 118 | return kip->insns; |
118 | } | 119 | } |
119 | 120 | ||
120 | void free_insn_slot(kprobe_opcode_t *slot) | 121 | void __kprobes free_insn_slot(kprobe_opcode_t *slot) |
121 | { | 122 | { |
122 | struct kprobe_insn_page *kip; | 123 | struct kprobe_insn_page *kip; |
123 | struct hlist_node *pos; | 124 | struct hlist_node *pos; |
@@ -152,20 +153,42 @@ void free_insn_slot(kprobe_opcode_t *slot) | |||
152 | } | 153 | } |
153 | 154 | ||
154 | /* Locks kprobe: irqs must be disabled */ | 155 | /* Locks kprobe: irqs must be disabled */ |
155 | void lock_kprobes(void) | 156 | void __kprobes lock_kprobes(void) |
156 | { | 157 | { |
158 | unsigned long flags = 0; | ||
159 | |||
160 | /* Avoiding local interrupts to happen right after we take the kprobe_lock | ||
161 | * and before we get a chance to update kprobe_cpu, this to prevent | ||
162 | * deadlock when we have a kprobe on ISR routine and a kprobe on task | ||
163 | * routine | ||
164 | */ | ||
165 | local_irq_save(flags); | ||
166 | |||
157 | spin_lock(&kprobe_lock); | 167 | spin_lock(&kprobe_lock); |
158 | kprobe_cpu = smp_processor_id(); | 168 | kprobe_cpu = smp_processor_id(); |
169 | |||
170 | local_irq_restore(flags); | ||
159 | } | 171 | } |
160 | 172 | ||
161 | void unlock_kprobes(void) | 173 | void __kprobes unlock_kprobes(void) |
162 | { | 174 | { |
175 | unsigned long flags = 0; | ||
176 | |||
177 | /* Avoiding local interrupts to happen right after we update | ||
178 | * kprobe_cpu and before we get a a chance to release kprobe_lock, | ||
179 | * this to prevent deadlock when we have a kprobe on ISR routine and | ||
180 | * a kprobe on task routine | ||
181 | */ | ||
182 | local_irq_save(flags); | ||
183 | |||
163 | kprobe_cpu = NR_CPUS; | 184 | kprobe_cpu = NR_CPUS; |
164 | spin_unlock(&kprobe_lock); | 185 | spin_unlock(&kprobe_lock); |
186 | |||
187 | local_irq_restore(flags); | ||
165 | } | 188 | } |
166 | 189 | ||
167 | /* You have to be holding the kprobe_lock */ | 190 | /* You have to be holding the kprobe_lock */ |
168 | struct kprobe *get_kprobe(void *addr) | 191 | struct kprobe __kprobes *get_kprobe(void *addr) |
169 | { | 192 | { |
170 | struct hlist_head *head; | 193 | struct hlist_head *head; |
171 | struct hlist_node *node; | 194 | struct hlist_node *node; |
@@ -183,7 +206,7 @@ struct kprobe *get_kprobe(void *addr) | |||
183 | * Aggregate handlers for multiple kprobes support - these handlers | 206 | * Aggregate handlers for multiple kprobes support - these handlers |
184 | * take care of invoking the individual kprobe handlers on p->list | 207 | * take care of invoking the individual kprobe handlers on p->list |
185 | */ | 208 | */ |
186 | static int aggr_pre_handler(struct kprobe *p, struct pt_regs *regs) | 209 | static int __kprobes aggr_pre_handler(struct kprobe *p, struct pt_regs *regs) |
187 | { | 210 | { |
188 | struct kprobe *kp; | 211 | struct kprobe *kp; |
189 | 212 | ||
@@ -198,8 +221,8 @@ static int aggr_pre_handler(struct kprobe *p, struct pt_regs *regs) | |||
198 | return 0; | 221 | return 0; |
199 | } | 222 | } |
200 | 223 | ||
201 | static void aggr_post_handler(struct kprobe *p, struct pt_regs *regs, | 224 | static void __kprobes aggr_post_handler(struct kprobe *p, struct pt_regs *regs, |
202 | unsigned long flags) | 225 | unsigned long flags) |
203 | { | 226 | { |
204 | struct kprobe *kp; | 227 | struct kprobe *kp; |
205 | 228 | ||
@@ -213,8 +236,8 @@ static void aggr_post_handler(struct kprobe *p, struct pt_regs *regs, | |||
213 | return; | 236 | return; |
214 | } | 237 | } |
215 | 238 | ||
216 | static int aggr_fault_handler(struct kprobe *p, struct pt_regs *regs, | 239 | static int __kprobes aggr_fault_handler(struct kprobe *p, struct pt_regs *regs, |
217 | int trapnr) | 240 | int trapnr) |
218 | { | 241 | { |
219 | /* | 242 | /* |
220 | * if we faulted "during" the execution of a user specified | 243 | * if we faulted "during" the execution of a user specified |
@@ -227,7 +250,7 @@ static int aggr_fault_handler(struct kprobe *p, struct pt_regs *regs, | |||
227 | return 0; | 250 | return 0; |
228 | } | 251 | } |
229 | 252 | ||
230 | static int aggr_break_handler(struct kprobe *p, struct pt_regs *regs) | 253 | static int __kprobes aggr_break_handler(struct kprobe *p, struct pt_regs *regs) |
231 | { | 254 | { |
232 | struct kprobe *kp = curr_kprobe; | 255 | struct kprobe *kp = curr_kprobe; |
233 | if (curr_kprobe && kp->break_handler) { | 256 | if (curr_kprobe && kp->break_handler) { |
@@ -240,7 +263,7 @@ static int aggr_break_handler(struct kprobe *p, struct pt_regs *regs) | |||
240 | return 0; | 263 | return 0; |
241 | } | 264 | } |
242 | 265 | ||
243 | struct kretprobe_instance *get_free_rp_inst(struct kretprobe *rp) | 266 | struct kretprobe_instance __kprobes *get_free_rp_inst(struct kretprobe *rp) |
244 | { | 267 | { |
245 | struct hlist_node *node; | 268 | struct hlist_node *node; |
246 | struct kretprobe_instance *ri; | 269 | struct kretprobe_instance *ri; |
@@ -249,7 +272,8 @@ struct kretprobe_instance *get_free_rp_inst(struct kretprobe *rp) | |||
249 | return NULL; | 272 | return NULL; |
250 | } | 273 | } |
251 | 274 | ||
252 | static struct kretprobe_instance *get_used_rp_inst(struct kretprobe *rp) | 275 | static struct kretprobe_instance __kprobes *get_used_rp_inst(struct kretprobe |
276 | *rp) | ||
253 | { | 277 | { |
254 | struct hlist_node *node; | 278 | struct hlist_node *node; |
255 | struct kretprobe_instance *ri; | 279 | struct kretprobe_instance *ri; |
@@ -258,7 +282,7 @@ static struct kretprobe_instance *get_used_rp_inst(struct kretprobe *rp) | |||
258 | return NULL; | 282 | return NULL; |
259 | } | 283 | } |
260 | 284 | ||
261 | void add_rp_inst(struct kretprobe_instance *ri) | 285 | void __kprobes add_rp_inst(struct kretprobe_instance *ri) |
262 | { | 286 | { |
263 | /* | 287 | /* |
264 | * Remove rp inst off the free list - | 288 | * Remove rp inst off the free list - |
@@ -276,7 +300,7 @@ void add_rp_inst(struct kretprobe_instance *ri) | |||
276 | hlist_add_head(&ri->uflist, &ri->rp->used_instances); | 300 | hlist_add_head(&ri->uflist, &ri->rp->used_instances); |
277 | } | 301 | } |
278 | 302 | ||
279 | void recycle_rp_inst(struct kretprobe_instance *ri) | 303 | void __kprobes recycle_rp_inst(struct kretprobe_instance *ri) |
280 | { | 304 | { |
281 | /* remove rp inst off the rprobe_inst_table */ | 305 | /* remove rp inst off the rprobe_inst_table */ |
282 | hlist_del(&ri->hlist); | 306 | hlist_del(&ri->hlist); |
@@ -291,7 +315,7 @@ void recycle_rp_inst(struct kretprobe_instance *ri) | |||
291 | kfree(ri); | 315 | kfree(ri); |
292 | } | 316 | } |
293 | 317 | ||
294 | struct hlist_head * kretprobe_inst_table_head(struct task_struct *tsk) | 318 | struct hlist_head __kprobes *kretprobe_inst_table_head(struct task_struct *tsk) |
295 | { | 319 | { |
296 | return &kretprobe_inst_table[hash_ptr(tsk, KPROBE_HASH_BITS)]; | 320 | return &kretprobe_inst_table[hash_ptr(tsk, KPROBE_HASH_BITS)]; |
297 | } | 321 | } |
@@ -302,7 +326,7 @@ struct hlist_head * kretprobe_inst_table_head(struct task_struct *tsk) | |||
302 | * instances associated with this task. These left over instances represent | 326 | * instances associated with this task. These left over instances represent |
303 | * probed functions that have been called but will never return. | 327 | * probed functions that have been called but will never return. |
304 | */ | 328 | */ |
305 | void kprobe_flush_task(struct task_struct *tk) | 329 | void __kprobes kprobe_flush_task(struct task_struct *tk) |
306 | { | 330 | { |
307 | struct kretprobe_instance *ri; | 331 | struct kretprobe_instance *ri; |
308 | struct hlist_head *head; | 332 | struct hlist_head *head; |
@@ -322,7 +346,8 @@ void kprobe_flush_task(struct task_struct *tk) | |||
322 | * This kprobe pre_handler is registered with every kretprobe. When probe | 346 | * This kprobe pre_handler is registered with every kretprobe. When probe |
323 | * hits it will set up the return probe. | 347 | * hits it will set up the return probe. |
324 | */ | 348 | */ |
325 | static int pre_handler_kretprobe(struct kprobe *p, struct pt_regs *regs) | 349 | static int __kprobes pre_handler_kretprobe(struct kprobe *p, |
350 | struct pt_regs *regs) | ||
326 | { | 351 | { |
327 | struct kretprobe *rp = container_of(p, struct kretprobe, kp); | 352 | struct kretprobe *rp = container_of(p, struct kretprobe, kp); |
328 | 353 | ||
@@ -353,7 +378,7 @@ static inline void copy_kprobe(struct kprobe *old_p, struct kprobe *p) | |||
353 | * Add the new probe to old_p->list. Fail if this is the | 378 | * Add the new probe to old_p->list. Fail if this is the |
354 | * second jprobe at the address - two jprobes can't coexist | 379 | * second jprobe at the address - two jprobes can't coexist |
355 | */ | 380 | */ |
356 | static int add_new_kprobe(struct kprobe *old_p, struct kprobe *p) | 381 | static int __kprobes add_new_kprobe(struct kprobe *old_p, struct kprobe *p) |
357 | { | 382 | { |
358 | struct kprobe *kp; | 383 | struct kprobe *kp; |
359 | 384 | ||
@@ -395,7 +420,8 @@ static inline void add_aggr_kprobe(struct kprobe *ap, struct kprobe *p) | |||
395 | * the intricacies | 420 | * the intricacies |
396 | * TODO: Move kcalloc outside the spinlock | 421 | * TODO: Move kcalloc outside the spinlock |
397 | */ | 422 | */ |
398 | static int register_aggr_kprobe(struct kprobe *old_p, struct kprobe *p) | 423 | static int __kprobes register_aggr_kprobe(struct kprobe *old_p, |
424 | struct kprobe *p) | ||
399 | { | 425 | { |
400 | int ret = 0; | 426 | int ret = 0; |
401 | struct kprobe *ap; | 427 | struct kprobe *ap; |
@@ -434,15 +460,25 @@ static inline void cleanup_aggr_kprobe(struct kprobe *old_p, | |||
434 | spin_unlock_irqrestore(&kprobe_lock, flags); | 460 | spin_unlock_irqrestore(&kprobe_lock, flags); |
435 | } | 461 | } |
436 | 462 | ||
437 | int register_kprobe(struct kprobe *p) | 463 | static int __kprobes in_kprobes_functions(unsigned long addr) |
464 | { | ||
465 | if (addr >= (unsigned long)__kprobes_text_start | ||
466 | && addr < (unsigned long)__kprobes_text_end) | ||
467 | return -EINVAL; | ||
468 | return 0; | ||
469 | } | ||
470 | |||
471 | int __kprobes register_kprobe(struct kprobe *p) | ||
438 | { | 472 | { |
439 | int ret = 0; | 473 | int ret = 0; |
440 | unsigned long flags = 0; | 474 | unsigned long flags = 0; |
441 | struct kprobe *old_p; | 475 | struct kprobe *old_p; |
442 | 476 | ||
443 | if ((ret = arch_prepare_kprobe(p)) != 0) { | 477 | if ((ret = in_kprobes_functions((unsigned long) p->addr)) != 0) |
478 | return ret; | ||
479 | if ((ret = arch_prepare_kprobe(p)) != 0) | ||
444 | goto rm_kprobe; | 480 | goto rm_kprobe; |
445 | } | 481 | |
446 | spin_lock_irqsave(&kprobe_lock, flags); | 482 | spin_lock_irqsave(&kprobe_lock, flags); |
447 | old_p = get_kprobe(p->addr); | 483 | old_p = get_kprobe(p->addr); |
448 | p->nmissed = 0; | 484 | p->nmissed = 0; |
@@ -466,7 +502,7 @@ rm_kprobe: | |||
466 | return ret; | 502 | return ret; |
467 | } | 503 | } |
468 | 504 | ||
469 | void unregister_kprobe(struct kprobe *p) | 505 | void __kprobes unregister_kprobe(struct kprobe *p) |
470 | { | 506 | { |
471 | unsigned long flags; | 507 | unsigned long flags; |
472 | struct kprobe *old_p; | 508 | struct kprobe *old_p; |
@@ -487,7 +523,7 @@ static struct notifier_block kprobe_exceptions_nb = { | |||
487 | .priority = 0x7fffffff /* we need to notified first */ | 523 | .priority = 0x7fffffff /* we need to notified first */ |
488 | }; | 524 | }; |
489 | 525 | ||
490 | int register_jprobe(struct jprobe *jp) | 526 | int __kprobes register_jprobe(struct jprobe *jp) |
491 | { | 527 | { |
492 | /* Todo: Verify probepoint is a function entry point */ | 528 | /* Todo: Verify probepoint is a function entry point */ |
493 | jp->kp.pre_handler = setjmp_pre_handler; | 529 | jp->kp.pre_handler = setjmp_pre_handler; |
@@ -496,14 +532,14 @@ int register_jprobe(struct jprobe *jp) | |||
496 | return register_kprobe(&jp->kp); | 532 | return register_kprobe(&jp->kp); |
497 | } | 533 | } |
498 | 534 | ||
499 | void unregister_jprobe(struct jprobe *jp) | 535 | void __kprobes unregister_jprobe(struct jprobe *jp) |
500 | { | 536 | { |
501 | unregister_kprobe(&jp->kp); | 537 | unregister_kprobe(&jp->kp); |
502 | } | 538 | } |
503 | 539 | ||
504 | #ifdef ARCH_SUPPORTS_KRETPROBES | 540 | #ifdef ARCH_SUPPORTS_KRETPROBES |
505 | 541 | ||
506 | int register_kretprobe(struct kretprobe *rp) | 542 | int __kprobes register_kretprobe(struct kretprobe *rp) |
507 | { | 543 | { |
508 | int ret = 0; | 544 | int ret = 0; |
509 | struct kretprobe_instance *inst; | 545 | struct kretprobe_instance *inst; |
@@ -540,14 +576,14 @@ int register_kretprobe(struct kretprobe *rp) | |||
540 | 576 | ||
541 | #else /* ARCH_SUPPORTS_KRETPROBES */ | 577 | #else /* ARCH_SUPPORTS_KRETPROBES */ |
542 | 578 | ||
543 | int register_kretprobe(struct kretprobe *rp) | 579 | int __kprobes register_kretprobe(struct kretprobe *rp) |
544 | { | 580 | { |
545 | return -ENOSYS; | 581 | return -ENOSYS; |
546 | } | 582 | } |
547 | 583 | ||
548 | #endif /* ARCH_SUPPORTS_KRETPROBES */ | 584 | #endif /* ARCH_SUPPORTS_KRETPROBES */ |
549 | 585 | ||
550 | void unregister_kretprobe(struct kretprobe *rp) | 586 | void __kprobes unregister_kretprobe(struct kretprobe *rp) |
551 | { | 587 | { |
552 | unsigned long flags; | 588 | unsigned long flags; |
553 | struct kretprobe_instance *ri; | 589 | struct kretprobe_instance *ri; |
diff --git a/kernel/module.c b/kernel/module.c index c32995fbd8fd..4b39d3793c72 100644 --- a/kernel/module.c +++ b/kernel/module.c | |||
@@ -1509,6 +1509,7 @@ static struct module *load_module(void __user *umod, | |||
1509 | long err = 0; | 1509 | long err = 0; |
1510 | void *percpu = NULL, *ptr = NULL; /* Stops spurious gcc warning */ | 1510 | void *percpu = NULL, *ptr = NULL; /* Stops spurious gcc warning */ |
1511 | struct exception_table_entry *extable; | 1511 | struct exception_table_entry *extable; |
1512 | mm_segment_t old_fs; | ||
1512 | 1513 | ||
1513 | DEBUGP("load_module: umod=%p, len=%lu, uargs=%p\n", | 1514 | DEBUGP("load_module: umod=%p, len=%lu, uargs=%p\n", |
1514 | umod, len, uargs); | 1515 | umod, len, uargs); |
@@ -1779,6 +1780,24 @@ static struct module *load_module(void __user *umod, | |||
1779 | if (err < 0) | 1780 | if (err < 0) |
1780 | goto cleanup; | 1781 | goto cleanup; |
1781 | 1782 | ||
1783 | /* flush the icache in correct context */ | ||
1784 | old_fs = get_fs(); | ||
1785 | set_fs(KERNEL_DS); | ||
1786 | |||
1787 | /* | ||
1788 | * Flush the instruction cache, since we've played with text. | ||
1789 | * Do it before processing of module parameters, so the module | ||
1790 | * can provide parameter accessor functions of its own. | ||
1791 | */ | ||
1792 | if (mod->module_init) | ||
1793 | flush_icache_range((unsigned long)mod->module_init, | ||
1794 | (unsigned long)mod->module_init | ||
1795 | + mod->init_size); | ||
1796 | flush_icache_range((unsigned long)mod->module_core, | ||
1797 | (unsigned long)mod->module_core + mod->core_size); | ||
1798 | |||
1799 | set_fs(old_fs); | ||
1800 | |||
1782 | mod->args = args; | 1801 | mod->args = args; |
1783 | if (obsparmindex) { | 1802 | if (obsparmindex) { |
1784 | err = obsolete_params(mod->name, mod->args, | 1803 | err = obsolete_params(mod->name, mod->args, |
@@ -1860,7 +1879,6 @@ sys_init_module(void __user *umod, | |||
1860 | const char __user *uargs) | 1879 | const char __user *uargs) |
1861 | { | 1880 | { |
1862 | struct module *mod; | 1881 | struct module *mod; |
1863 | mm_segment_t old_fs = get_fs(); | ||
1864 | int ret = 0; | 1882 | int ret = 0; |
1865 | 1883 | ||
1866 | /* Must have permission */ | 1884 | /* Must have permission */ |
@@ -1878,19 +1896,6 @@ sys_init_module(void __user *umod, | |||
1878 | return PTR_ERR(mod); | 1896 | return PTR_ERR(mod); |
1879 | } | 1897 | } |
1880 | 1898 | ||
1881 | /* flush the icache in correct context */ | ||
1882 | set_fs(KERNEL_DS); | ||
1883 | |||
1884 | /* Flush the instruction cache, since we've played with text */ | ||
1885 | if (mod->module_init) | ||
1886 | flush_icache_range((unsigned long)mod->module_init, | ||
1887 | (unsigned long)mod->module_init | ||
1888 | + mod->init_size); | ||
1889 | flush_icache_range((unsigned long)mod->module_core, | ||
1890 | (unsigned long)mod->module_core + mod->core_size); | ||
1891 | |||
1892 | set_fs(old_fs); | ||
1893 | |||
1894 | /* Now sew it into the lists. They won't access us, since | 1899 | /* Now sew it into the lists. They won't access us, since |
1895 | strong_try_module_get() will fail. */ | 1900 | strong_try_module_get() will fail. */ |
1896 | stop_machine_run(__link_module, mod, NR_CPUS); | 1901 | stop_machine_run(__link_module, mod, NR_CPUS); |
diff --git a/kernel/params.c b/kernel/params.c index d586c35ef8fc..fbf173215fd2 100644 --- a/kernel/params.c +++ b/kernel/params.c | |||
@@ -542,8 +542,8 @@ static void __init kernel_param_sysfs_setup(const char *name, | |||
542 | { | 542 | { |
543 | struct module_kobject *mk; | 543 | struct module_kobject *mk; |
544 | 544 | ||
545 | mk = kmalloc(sizeof(struct module_kobject), GFP_KERNEL); | 545 | mk = kzalloc(sizeof(struct module_kobject), GFP_KERNEL); |
546 | memset(mk, 0, sizeof(struct module_kobject)); | 546 | BUG_ON(!mk); |
547 | 547 | ||
548 | mk->mod = THIS_MODULE; | 548 | mk->mod = THIS_MODULE; |
549 | kobj_set_kset_s(mk, module_subsys); | 549 | kobj_set_kset_s(mk, module_subsys); |
diff --git a/kernel/posix-timers.c b/kernel/posix-timers.c index 38798a2ff994..b7b532acd9fc 100644 --- a/kernel/posix-timers.c +++ b/kernel/posix-timers.c | |||
@@ -427,21 +427,23 @@ int posix_timer_event(struct k_itimer *timr,int si_private) | |||
427 | timr->sigq->info.si_code = SI_TIMER; | 427 | timr->sigq->info.si_code = SI_TIMER; |
428 | timr->sigq->info.si_tid = timr->it_id; | 428 | timr->sigq->info.si_tid = timr->it_id; |
429 | timr->sigq->info.si_value = timr->it_sigev_value; | 429 | timr->sigq->info.si_value = timr->it_sigev_value; |
430 | |||
430 | if (timr->it_sigev_notify & SIGEV_THREAD_ID) { | 431 | if (timr->it_sigev_notify & SIGEV_THREAD_ID) { |
431 | if (unlikely(timr->it_process->flags & PF_EXITING)) { | 432 | struct task_struct *leader; |
432 | timr->it_sigev_notify = SIGEV_SIGNAL; | 433 | int ret = send_sigqueue(timr->it_sigev_signo, timr->sigq, |
433 | put_task_struct(timr->it_process); | 434 | timr->it_process); |
434 | timr->it_process = timr->it_process->group_leader; | 435 | |
435 | goto group; | 436 | if (likely(ret >= 0)) |
436 | } | 437 | return ret; |
437 | return send_sigqueue(timr->it_sigev_signo, timr->sigq, | 438 | |
438 | timr->it_process); | 439 | timr->it_sigev_notify = SIGEV_SIGNAL; |
439 | } | 440 | leader = timr->it_process->group_leader; |
440 | else { | 441 | put_task_struct(timr->it_process); |
441 | group: | 442 | timr->it_process = leader; |
442 | return send_group_sigqueue(timr->it_sigev_signo, timr->sigq, | ||
443 | timr->it_process); | ||
444 | } | 443 | } |
444 | |||
445 | return send_group_sigqueue(timr->it_sigev_signo, timr->sigq, | ||
446 | timr->it_process); | ||
445 | } | 447 | } |
446 | EXPORT_SYMBOL_GPL(posix_timer_event); | 448 | EXPORT_SYMBOL_GPL(posix_timer_event); |
447 | 449 | ||
diff --git a/kernel/power/Kconfig b/kernel/power/Kconfig index b99f61b82685..396c7873e804 100644 --- a/kernel/power/Kconfig +++ b/kernel/power/Kconfig | |||
@@ -29,7 +29,7 @@ config PM_DEBUG | |||
29 | 29 | ||
30 | config SOFTWARE_SUSPEND | 30 | config SOFTWARE_SUSPEND |
31 | bool "Software Suspend" | 31 | bool "Software Suspend" |
32 | depends on EXPERIMENTAL && PM && SWAP && ((X86 && SMP) || ((FVR || PPC32 || X86) && !SMP)) | 32 | depends on PM && SWAP && (X86 || ((FVR || PPC32) && !SMP)) |
33 | ---help--- | 33 | ---help--- |
34 | Enable the possibility of suspending the machine. | 34 | Enable the possibility of suspending the machine. |
35 | It doesn't need APM. | 35 | It doesn't need APM. |
@@ -73,6 +73,18 @@ config PM_STD_PARTITION | |||
73 | suspended image to. It will simply pick the first available swap | 73 | suspended image to. It will simply pick the first available swap |
74 | device. | 74 | device. |
75 | 75 | ||
76 | config SWSUSP_ENCRYPT | ||
77 | bool "Encrypt suspend image" | ||
78 | depends on SOFTWARE_SUSPEND && CRYPTO=y && (CRYPTO_AES=y || CRYPTO_AES_586=y || CRYPTO_AES_X86_64=y) | ||
79 | default "" | ||
80 | ---help--- | ||
81 | To prevent data gathering from swap after resume you can encrypt | ||
82 | the suspend image with a temporary key that is deleted on | ||
83 | resume. | ||
84 | |||
85 | Note that the temporary key is stored unencrypted on disk while the | ||
86 | system is suspended. | ||
87 | |||
76 | config SUSPEND_SMP | 88 | config SUSPEND_SMP |
77 | bool | 89 | bool |
78 | depends on HOTPLUG_CPU && X86 && PM | 90 | depends on HOTPLUG_CPU && X86 && PM |
diff --git a/kernel/power/disk.c b/kernel/power/disk.c index 664eb0469b6e..2d8bf054d036 100644 --- a/kernel/power/disk.c +++ b/kernel/power/disk.c | |||
@@ -112,24 +112,12 @@ static inline void platform_finish(void) | |||
112 | } | 112 | } |
113 | } | 113 | } |
114 | 114 | ||
115 | static void finish(void) | ||
116 | { | ||
117 | device_resume(); | ||
118 | platform_finish(); | ||
119 | thaw_processes(); | ||
120 | enable_nonboot_cpus(); | ||
121 | pm_restore_console(); | ||
122 | } | ||
123 | |||
124 | |||
125 | static int prepare_processes(void) | 115 | static int prepare_processes(void) |
126 | { | 116 | { |
127 | int error; | 117 | int error; |
128 | 118 | ||
129 | pm_prepare_console(); | 119 | pm_prepare_console(); |
130 | |||
131 | sys_sync(); | 120 | sys_sync(); |
132 | |||
133 | disable_nonboot_cpus(); | 121 | disable_nonboot_cpus(); |
134 | 122 | ||
135 | if (freeze_processes()) { | 123 | if (freeze_processes()) { |
@@ -162,15 +150,6 @@ static void unprepare_processes(void) | |||
162 | pm_restore_console(); | 150 | pm_restore_console(); |
163 | } | 151 | } |
164 | 152 | ||
165 | static int prepare_devices(void) | ||
166 | { | ||
167 | int error; | ||
168 | |||
169 | if ((error = device_suspend(PMSG_FREEZE))) | ||
170 | printk("Some devices failed to suspend\n"); | ||
171 | return error; | ||
172 | } | ||
173 | |||
174 | /** | 153 | /** |
175 | * pm_suspend_disk - The granpappy of power management. | 154 | * pm_suspend_disk - The granpappy of power management. |
176 | * | 155 | * |
@@ -187,17 +166,14 @@ int pm_suspend_disk(void) | |||
187 | error = prepare_processes(); | 166 | error = prepare_processes(); |
188 | if (error) | 167 | if (error) |
189 | return error; | 168 | return error; |
190 | error = prepare_devices(); | ||
191 | 169 | ||
170 | error = device_suspend(PMSG_FREEZE); | ||
192 | if (error) { | 171 | if (error) { |
172 | printk("Some devices failed to suspend\n"); | ||
193 | unprepare_processes(); | 173 | unprepare_processes(); |
194 | return error; | 174 | return error; |
195 | } | 175 | } |
196 | 176 | ||
197 | pr_debug("PM: Attempting to suspend to disk.\n"); | ||
198 | if (pm_disk_mode == PM_DISK_FIRMWARE) | ||
199 | return pm_ops->enter(PM_SUSPEND_DISK); | ||
200 | |||
201 | pr_debug("PM: snapshotting memory.\n"); | 177 | pr_debug("PM: snapshotting memory.\n"); |
202 | in_suspend = 1; | 178 | in_suspend = 1; |
203 | if ((error = swsusp_suspend())) | 179 | if ((error = swsusp_suspend())) |
@@ -208,11 +184,20 @@ int pm_suspend_disk(void) | |||
208 | error = swsusp_write(); | 184 | error = swsusp_write(); |
209 | if (!error) | 185 | if (!error) |
210 | power_down(pm_disk_mode); | 186 | power_down(pm_disk_mode); |
187 | else { | ||
188 | /* swsusp_write can not fail in device_resume, | ||
189 | no need to do second device_resume */ | ||
190 | swsusp_free(); | ||
191 | unprepare_processes(); | ||
192 | return error; | ||
193 | } | ||
211 | } else | 194 | } else |
212 | pr_debug("PM: Image restored successfully.\n"); | 195 | pr_debug("PM: Image restored successfully.\n"); |
196 | |||
213 | swsusp_free(); | 197 | swsusp_free(); |
214 | Done: | 198 | Done: |
215 | finish(); | 199 | device_resume(); |
200 | unprepare_processes(); | ||
216 | return error; | 201 | return error; |
217 | } | 202 | } |
218 | 203 | ||
@@ -233,9 +218,12 @@ static int software_resume(void) | |||
233 | { | 218 | { |
234 | int error; | 219 | int error; |
235 | 220 | ||
221 | down(&pm_sem); | ||
236 | if (!swsusp_resume_device) { | 222 | if (!swsusp_resume_device) { |
237 | if (!strlen(resume_file)) | 223 | if (!strlen(resume_file)) { |
224 | up(&pm_sem); | ||
238 | return -ENOENT; | 225 | return -ENOENT; |
226 | } | ||
239 | swsusp_resume_device = name_to_dev_t(resume_file); | 227 | swsusp_resume_device = name_to_dev_t(resume_file); |
240 | pr_debug("swsusp: Resume From Partition %s\n", resume_file); | 228 | pr_debug("swsusp: Resume From Partition %s\n", resume_file); |
241 | } else { | 229 | } else { |
@@ -248,6 +236,7 @@ static int software_resume(void) | |||
248 | * FIXME: If noresume is specified, we need to find the partition | 236 | * FIXME: If noresume is specified, we need to find the partition |
249 | * and reset it back to normal swap space. | 237 | * and reset it back to normal swap space. |
250 | */ | 238 | */ |
239 | up(&pm_sem); | ||
251 | return 0; | 240 | return 0; |
252 | } | 241 | } |
253 | 242 | ||
@@ -270,20 +259,24 @@ static int software_resume(void) | |||
270 | 259 | ||
271 | pr_debug("PM: Preparing devices for restore.\n"); | 260 | pr_debug("PM: Preparing devices for restore.\n"); |
272 | 261 | ||
273 | if ((error = prepare_devices())) | 262 | if ((error = device_suspend(PMSG_FREEZE))) { |
263 | printk("Some devices failed to suspend\n"); | ||
274 | goto Free; | 264 | goto Free; |
265 | } | ||
275 | 266 | ||
276 | mb(); | 267 | mb(); |
277 | 268 | ||
278 | pr_debug("PM: Restoring saved image.\n"); | 269 | pr_debug("PM: Restoring saved image.\n"); |
279 | swsusp_resume(); | 270 | swsusp_resume(); |
280 | pr_debug("PM: Restore failed, recovering.n"); | 271 | pr_debug("PM: Restore failed, recovering.n"); |
281 | finish(); | 272 | device_resume(); |
282 | Free: | 273 | Free: |
283 | swsusp_free(); | 274 | swsusp_free(); |
284 | Cleanup: | 275 | Cleanup: |
285 | unprepare_processes(); | 276 | unprepare_processes(); |
286 | Done: | 277 | Done: |
278 | /* For success case, the suspend path will release the lock */ | ||
279 | up(&pm_sem); | ||
287 | pr_debug("PM: Resume from disk failed.\n"); | 280 | pr_debug("PM: Resume from disk failed.\n"); |
288 | return 0; | 281 | return 0; |
289 | } | 282 | } |
@@ -390,7 +383,9 @@ static ssize_t resume_store(struct subsystem * subsys, const char * buf, size_t | |||
390 | if (sscanf(buf, "%u:%u", &maj, &min) == 2) { | 383 | if (sscanf(buf, "%u:%u", &maj, &min) == 2) { |
391 | res = MKDEV(maj,min); | 384 | res = MKDEV(maj,min); |
392 | if (maj == MAJOR(res) && min == MINOR(res)) { | 385 | if (maj == MAJOR(res) && min == MINOR(res)) { |
386 | down(&pm_sem); | ||
393 | swsusp_resume_device = res; | 387 | swsusp_resume_device = res; |
388 | up(&pm_sem); | ||
394 | printk("Attempting manual resume\n"); | 389 | printk("Attempting manual resume\n"); |
395 | noresume = 0; | 390 | noresume = 0; |
396 | software_resume(); | 391 | software_resume(); |
diff --git a/kernel/power/main.c b/kernel/power/main.c index 71aa0fd22007..22bdc93cc038 100644 --- a/kernel/power/main.c +++ b/kernel/power/main.c | |||
@@ -143,11 +143,12 @@ static void suspend_finish(suspend_state_t state) | |||
143 | 143 | ||
144 | 144 | ||
145 | 145 | ||
146 | static char * pm_states[] = { | 146 | static char *pm_states[PM_SUSPEND_MAX] = { |
147 | [PM_SUSPEND_STANDBY] = "standby", | 147 | [PM_SUSPEND_STANDBY] = "standby", |
148 | [PM_SUSPEND_MEM] = "mem", | 148 | [PM_SUSPEND_MEM] = "mem", |
149 | #ifdef CONFIG_SOFTWARE_SUSPEND | ||
149 | [PM_SUSPEND_DISK] = "disk", | 150 | [PM_SUSPEND_DISK] = "disk", |
150 | NULL, | 151 | #endif |
151 | }; | 152 | }; |
152 | 153 | ||
153 | 154 | ||
diff --git a/kernel/power/pm.c b/kernel/power/pm.c index 61deda04e39e..159149321b3c 100644 --- a/kernel/power/pm.c +++ b/kernel/power/pm.c | |||
@@ -60,9 +60,8 @@ struct pm_dev *pm_register(pm_dev_t type, | |||
60 | unsigned long id, | 60 | unsigned long id, |
61 | pm_callback callback) | 61 | pm_callback callback) |
62 | { | 62 | { |
63 | struct pm_dev *dev = kmalloc(sizeof(struct pm_dev), GFP_KERNEL); | 63 | struct pm_dev *dev = kzalloc(sizeof(struct pm_dev), GFP_KERNEL); |
64 | if (dev) { | 64 | if (dev) { |
65 | memset(dev, 0, sizeof(*dev)); | ||
66 | dev->type = type; | 65 | dev->type = type; |
67 | dev->id = id; | 66 | dev->id = id; |
68 | dev->callback = callback; | 67 | dev->callback = callback; |
diff --git a/kernel/power/process.c b/kernel/power/process.c index 3bd0d261818f..28de118f7a0b 100644 --- a/kernel/power/process.c +++ b/kernel/power/process.c | |||
@@ -38,7 +38,6 @@ void refrigerator(void) | |||
38 | processes around? */ | 38 | processes around? */ |
39 | long save; | 39 | long save; |
40 | save = current->state; | 40 | save = current->state; |
41 | current->state = TASK_UNINTERRUPTIBLE; | ||
42 | pr_debug("%s entered refrigerator\n", current->comm); | 41 | pr_debug("%s entered refrigerator\n", current->comm); |
43 | printk("="); | 42 | printk("="); |
44 | 43 | ||
@@ -47,8 +46,10 @@ void refrigerator(void) | |||
47 | recalc_sigpending(); /* We sent fake signal, clean it up */ | 46 | recalc_sigpending(); /* We sent fake signal, clean it up */ |
48 | spin_unlock_irq(¤t->sighand->siglock); | 47 | spin_unlock_irq(¤t->sighand->siglock); |
49 | 48 | ||
50 | while (frozen(current)) | 49 | while (frozen(current)) { |
50 | current->state = TASK_UNINTERRUPTIBLE; | ||
51 | schedule(); | 51 | schedule(); |
52 | } | ||
52 | pr_debug("%s left refrigerator\n", current->comm); | 53 | pr_debug("%s left refrigerator\n", current->comm); |
53 | current->state = save; | 54 | current->state = save; |
54 | } | 55 | } |
@@ -80,13 +81,33 @@ int freeze_processes(void) | |||
80 | } while_each_thread(g, p); | 81 | } while_each_thread(g, p); |
81 | read_unlock(&tasklist_lock); | 82 | read_unlock(&tasklist_lock); |
82 | yield(); /* Yield is okay here */ | 83 | yield(); /* Yield is okay here */ |
83 | if (time_after(jiffies, start_time + TIMEOUT)) { | 84 | if (todo && time_after(jiffies, start_time + TIMEOUT)) { |
84 | printk( "\n" ); | 85 | printk( "\n" ); |
85 | printk(KERN_ERR " stopping tasks failed (%d tasks remaining)\n", todo ); | 86 | printk(KERN_ERR " stopping tasks failed (%d tasks remaining)\n", todo ); |
86 | return todo; | 87 | break; |
87 | } | 88 | } |
88 | } while(todo); | 89 | } while(todo); |
89 | 90 | ||
91 | /* This does not unfreeze processes that are already frozen | ||
92 | * (we have slightly ugly calling convention in that respect, | ||
93 | * and caller must call thaw_processes() if something fails), | ||
94 | * but it cleans up leftover PF_FREEZE requests. | ||
95 | */ | ||
96 | if (todo) { | ||
97 | read_lock(&tasklist_lock); | ||
98 | do_each_thread(g, p) | ||
99 | if (freezing(p)) { | ||
100 | pr_debug(" clean up: %s\n", p->comm); | ||
101 | p->flags &= ~PF_FREEZE; | ||
102 | spin_lock_irqsave(&p->sighand->siglock, flags); | ||
103 | recalc_sigpending_tsk(p); | ||
104 | spin_unlock_irqrestore(&p->sighand->siglock, flags); | ||
105 | } | ||
106 | while_each_thread(g, p); | ||
107 | read_unlock(&tasklist_lock); | ||
108 | return todo; | ||
109 | } | ||
110 | |||
90 | printk( "|\n" ); | 111 | printk( "|\n" ); |
91 | BUG_ON(in_atomic()); | 112 | BUG_ON(in_atomic()); |
92 | return 0; | 113 | return 0; |
diff --git a/kernel/power/swsusp.c b/kernel/power/swsusp.c index f2bc71b9fe8b..d967e875ee82 100644 --- a/kernel/power/swsusp.c +++ b/kernel/power/swsusp.c | |||
@@ -31,6 +31,9 @@ | |||
31 | * Alex Badea <vampire@go.ro>: | 31 | * Alex Badea <vampire@go.ro>: |
32 | * Fixed runaway init | 32 | * Fixed runaway init |
33 | * | 33 | * |
34 | * Andreas Steinmetz <ast@domdv.de>: | ||
35 | * Added encrypted suspend option | ||
36 | * | ||
34 | * More state savers are welcome. Especially for the scsi layer... | 37 | * More state savers are welcome. Especially for the scsi layer... |
35 | * | 38 | * |
36 | * For TODOs,FIXMEs also look in Documentation/power/swsusp.txt | 39 | * For TODOs,FIXMEs also look in Documentation/power/swsusp.txt |
@@ -71,8 +74,16 @@ | |||
71 | #include <asm/tlbflush.h> | 74 | #include <asm/tlbflush.h> |
72 | #include <asm/io.h> | 75 | #include <asm/io.h> |
73 | 76 | ||
77 | #include <linux/random.h> | ||
78 | #include <linux/crypto.h> | ||
79 | #include <asm/scatterlist.h> | ||
80 | |||
74 | #include "power.h" | 81 | #include "power.h" |
75 | 82 | ||
83 | #define CIPHER "aes" | ||
84 | #define MAXKEY 32 | ||
85 | #define MAXIV 32 | ||
86 | |||
76 | /* References to section boundaries */ | 87 | /* References to section boundaries */ |
77 | extern const void __nosave_begin, __nosave_end; | 88 | extern const void __nosave_begin, __nosave_end; |
78 | 89 | ||
@@ -103,7 +114,8 @@ static suspend_pagedir_t *pagedir_save; | |||
103 | #define SWSUSP_SIG "S1SUSPEND" | 114 | #define SWSUSP_SIG "S1SUSPEND" |
104 | 115 | ||
105 | static struct swsusp_header { | 116 | static struct swsusp_header { |
106 | char reserved[PAGE_SIZE - 20 - sizeof(swp_entry_t)]; | 117 | char reserved[PAGE_SIZE - 20 - MAXKEY - MAXIV - sizeof(swp_entry_t)]; |
118 | u8 key_iv[MAXKEY+MAXIV]; | ||
107 | swp_entry_t swsusp_info; | 119 | swp_entry_t swsusp_info; |
108 | char orig_sig[10]; | 120 | char orig_sig[10]; |
109 | char sig[10]; | 121 | char sig[10]; |
@@ -129,6 +141,131 @@ static struct swsusp_info swsusp_info; | |||
129 | static unsigned short swapfile_used[MAX_SWAPFILES]; | 141 | static unsigned short swapfile_used[MAX_SWAPFILES]; |
130 | static unsigned short root_swap; | 142 | static unsigned short root_swap; |
131 | 143 | ||
144 | static int write_page(unsigned long addr, swp_entry_t * loc); | ||
145 | static int bio_read_page(pgoff_t page_off, void * page); | ||
146 | |||
147 | static u8 key_iv[MAXKEY+MAXIV]; | ||
148 | |||
149 | #ifdef CONFIG_SWSUSP_ENCRYPT | ||
150 | |||
151 | static int crypto_init(int mode, void **mem) | ||
152 | { | ||
153 | int error = 0; | ||
154 | int len; | ||
155 | char *modemsg; | ||
156 | struct crypto_tfm *tfm; | ||
157 | |||
158 | modemsg = mode ? "suspend not possible" : "resume not possible"; | ||
159 | |||
160 | tfm = crypto_alloc_tfm(CIPHER, CRYPTO_TFM_MODE_CBC); | ||
161 | if(!tfm) { | ||
162 | printk(KERN_ERR "swsusp: no tfm, %s\n", modemsg); | ||
163 | error = -EINVAL; | ||
164 | goto out; | ||
165 | } | ||
166 | |||
167 | if(MAXKEY < crypto_tfm_alg_min_keysize(tfm)) { | ||
168 | printk(KERN_ERR "swsusp: key buffer too small, %s\n", modemsg); | ||
169 | error = -ENOKEY; | ||
170 | goto fail; | ||
171 | } | ||
172 | |||
173 | if (mode) | ||
174 | get_random_bytes(key_iv, MAXKEY+MAXIV); | ||
175 | |||
176 | len = crypto_tfm_alg_max_keysize(tfm); | ||
177 | if (len > MAXKEY) | ||
178 | len = MAXKEY; | ||
179 | |||
180 | if (crypto_cipher_setkey(tfm, key_iv, len)) { | ||
181 | printk(KERN_ERR "swsusp: key setup failure, %s\n", modemsg); | ||
182 | error = -EKEYREJECTED; | ||
183 | goto fail; | ||
184 | } | ||
185 | |||
186 | len = crypto_tfm_alg_ivsize(tfm); | ||
187 | |||
188 | if (MAXIV < len) { | ||
189 | printk(KERN_ERR "swsusp: iv buffer too small, %s\n", modemsg); | ||
190 | error = -EOVERFLOW; | ||
191 | goto fail; | ||
192 | } | ||
193 | |||
194 | crypto_cipher_set_iv(tfm, key_iv+MAXKEY, len); | ||
195 | |||
196 | *mem=(void *)tfm; | ||
197 | |||
198 | goto out; | ||
199 | |||
200 | fail: crypto_free_tfm(tfm); | ||
201 | out: return error; | ||
202 | } | ||
203 | |||
204 | static __inline__ void crypto_exit(void *mem) | ||
205 | { | ||
206 | crypto_free_tfm((struct crypto_tfm *)mem); | ||
207 | } | ||
208 | |||
209 | static __inline__ int crypto_write(struct pbe *p, void *mem) | ||
210 | { | ||
211 | int error = 0; | ||
212 | struct scatterlist src, dst; | ||
213 | |||
214 | src.page = virt_to_page(p->address); | ||
215 | src.offset = 0; | ||
216 | src.length = PAGE_SIZE; | ||
217 | dst.page = virt_to_page((void *)&swsusp_header); | ||
218 | dst.offset = 0; | ||
219 | dst.length = PAGE_SIZE; | ||
220 | |||
221 | error = crypto_cipher_encrypt((struct crypto_tfm *)mem, &dst, &src, | ||
222 | PAGE_SIZE); | ||
223 | |||
224 | if (!error) | ||
225 | error = write_page((unsigned long)&swsusp_header, | ||
226 | &(p->swap_address)); | ||
227 | return error; | ||
228 | } | ||
229 | |||
230 | static __inline__ int crypto_read(struct pbe *p, void *mem) | ||
231 | { | ||
232 | int error = 0; | ||
233 | struct scatterlist src, dst; | ||
234 | |||
235 | error = bio_read_page(swp_offset(p->swap_address), (void *)p->address); | ||
236 | if (!error) { | ||
237 | src.offset = 0; | ||
238 | src.length = PAGE_SIZE; | ||
239 | dst.offset = 0; | ||
240 | dst.length = PAGE_SIZE; | ||
241 | src.page = dst.page = virt_to_page((void *)p->address); | ||
242 | |||
243 | error = crypto_cipher_decrypt((struct crypto_tfm *)mem, &dst, | ||
244 | &src, PAGE_SIZE); | ||
245 | } | ||
246 | return error; | ||
247 | } | ||
248 | #else | ||
249 | static __inline__ int crypto_init(int mode, void *mem) | ||
250 | { | ||
251 | return 0; | ||
252 | } | ||
253 | |||
254 | static __inline__ void crypto_exit(void *mem) | ||
255 | { | ||
256 | } | ||
257 | |||
258 | static __inline__ int crypto_write(struct pbe *p, void *mem) | ||
259 | { | ||
260 | return write_page(p->address, &(p->swap_address)); | ||
261 | } | ||
262 | |||
263 | static __inline__ int crypto_read(struct pbe *p, void *mem) | ||
264 | { | ||
265 | return bio_read_page(swp_offset(p->swap_address), (void *)p->address); | ||
266 | } | ||
267 | #endif | ||
268 | |||
132 | static int mark_swapfiles(swp_entry_t prev) | 269 | static int mark_swapfiles(swp_entry_t prev) |
133 | { | 270 | { |
134 | int error; | 271 | int error; |
@@ -140,6 +277,7 @@ static int mark_swapfiles(swp_entry_t prev) | |||
140 | !memcmp("SWAPSPACE2",swsusp_header.sig, 10)) { | 277 | !memcmp("SWAPSPACE2",swsusp_header.sig, 10)) { |
141 | memcpy(swsusp_header.orig_sig,swsusp_header.sig, 10); | 278 | memcpy(swsusp_header.orig_sig,swsusp_header.sig, 10); |
142 | memcpy(swsusp_header.sig,SWSUSP_SIG, 10); | 279 | memcpy(swsusp_header.sig,SWSUSP_SIG, 10); |
280 | memcpy(swsusp_header.key_iv, key_iv, MAXKEY+MAXIV); | ||
143 | swsusp_header.swsusp_info = prev; | 281 | swsusp_header.swsusp_info = prev; |
144 | error = rw_swap_page_sync(WRITE, | 282 | error = rw_swap_page_sync(WRITE, |
145 | swp_entry(root_swap, 0), | 283 | swp_entry(root_swap, 0), |
@@ -179,9 +317,9 @@ static int swsusp_swap_check(void) /* This is called before saving image */ | |||
179 | len=strlen(resume_file); | 317 | len=strlen(resume_file); |
180 | root_swap = 0xFFFF; | 318 | root_swap = 0xFFFF; |
181 | 319 | ||
182 | swap_list_lock(); | 320 | spin_lock(&swap_lock); |
183 | for (i=0; i<MAX_SWAPFILES; i++) { | 321 | for (i=0; i<MAX_SWAPFILES; i++) { |
184 | if (swap_info[i].flags == 0) { | 322 | if (!(swap_info[i].flags & SWP_WRITEOK)) { |
185 | swapfile_used[i]=SWAPFILE_UNUSED; | 323 | swapfile_used[i]=SWAPFILE_UNUSED; |
186 | } else { | 324 | } else { |
187 | if (!len) { | 325 | if (!len) { |
@@ -202,7 +340,7 @@ static int swsusp_swap_check(void) /* This is called before saving image */ | |||
202 | } | 340 | } |
203 | } | 341 | } |
204 | } | 342 | } |
205 | swap_list_unlock(); | 343 | spin_unlock(&swap_lock); |
206 | return (root_swap != 0xffff) ? 0 : -ENODEV; | 344 | return (root_swap != 0xffff) ? 0 : -ENODEV; |
207 | } | 345 | } |
208 | 346 | ||
@@ -216,12 +354,12 @@ static void lock_swapdevices(void) | |||
216 | { | 354 | { |
217 | int i; | 355 | int i; |
218 | 356 | ||
219 | swap_list_lock(); | 357 | spin_lock(&swap_lock); |
220 | for (i = 0; i< MAX_SWAPFILES; i++) | 358 | for (i = 0; i< MAX_SWAPFILES; i++) |
221 | if (swapfile_used[i] == SWAPFILE_IGNORED) { | 359 | if (swapfile_used[i] == SWAPFILE_IGNORED) { |
222 | swap_info[i].flags ^= 0xFF; | 360 | swap_info[i].flags ^= SWP_WRITEOK; |
223 | } | 361 | } |
224 | swap_list_unlock(); | 362 | spin_unlock(&swap_lock); |
225 | } | 363 | } |
226 | 364 | ||
227 | /** | 365 | /** |
@@ -286,6 +424,10 @@ static int data_write(void) | |||
286 | int error = 0, i = 0; | 424 | int error = 0, i = 0; |
287 | unsigned int mod = nr_copy_pages / 100; | 425 | unsigned int mod = nr_copy_pages / 100; |
288 | struct pbe *p; | 426 | struct pbe *p; |
427 | void *tfm; | ||
428 | |||
429 | if ((error = crypto_init(1, &tfm))) | ||
430 | return error; | ||
289 | 431 | ||
290 | if (!mod) | 432 | if (!mod) |
291 | mod = 1; | 433 | mod = 1; |
@@ -294,11 +436,14 @@ static int data_write(void) | |||
294 | for_each_pbe (p, pagedir_nosave) { | 436 | for_each_pbe (p, pagedir_nosave) { |
295 | if (!(i%mod)) | 437 | if (!(i%mod)) |
296 | printk( "\b\b\b\b%3d%%", i / mod ); | 438 | printk( "\b\b\b\b%3d%%", i / mod ); |
297 | if ((error = write_page(p->address, &(p->swap_address)))) | 439 | if ((error = crypto_write(p, tfm))) { |
440 | crypto_exit(tfm); | ||
298 | return error; | 441 | return error; |
442 | } | ||
299 | i++; | 443 | i++; |
300 | } | 444 | } |
301 | printk("\b\b\b\bdone\n"); | 445 | printk("\b\b\b\bdone\n"); |
446 | crypto_exit(tfm); | ||
302 | return error; | 447 | return error; |
303 | } | 448 | } |
304 | 449 | ||
@@ -385,7 +530,6 @@ static int write_pagedir(void) | |||
385 | * write_suspend_image - Write entire image and metadata. | 530 | * write_suspend_image - Write entire image and metadata. |
386 | * | 531 | * |
387 | */ | 532 | */ |
388 | |||
389 | static int write_suspend_image(void) | 533 | static int write_suspend_image(void) |
390 | { | 534 | { |
391 | int error; | 535 | int error; |
@@ -400,6 +544,7 @@ static int write_suspend_image(void) | |||
400 | if ((error = close_swap())) | 544 | if ((error = close_swap())) |
401 | goto FreePagedir; | 545 | goto FreePagedir; |
402 | Done: | 546 | Done: |
547 | memset(key_iv, 0, MAXKEY+MAXIV); | ||
403 | return error; | 548 | return error; |
404 | FreePagedir: | 549 | FreePagedir: |
405 | free_pagedir_entries(); | 550 | free_pagedir_entries(); |
@@ -591,18 +736,7 @@ static void copy_data_pages(void) | |||
591 | 736 | ||
592 | static int calc_nr(int nr_copy) | 737 | static int calc_nr(int nr_copy) |
593 | { | 738 | { |
594 | int extra = 0; | 739 | return nr_copy + (nr_copy+PBES_PER_PAGE-2)/(PBES_PER_PAGE-1); |
595 | int mod = !!(nr_copy % PBES_PER_PAGE); | ||
596 | int diff = (nr_copy / PBES_PER_PAGE) + mod; | ||
597 | |||
598 | do { | ||
599 | extra += diff; | ||
600 | nr_copy += diff; | ||
601 | mod = !!(nr_copy % PBES_PER_PAGE); | ||
602 | diff = (nr_copy / PBES_PER_PAGE) + mod - extra; | ||
603 | } while (diff > 0); | ||
604 | |||
605 | return nr_copy; | ||
606 | } | 740 | } |
607 | 741 | ||
608 | /** | 742 | /** |
@@ -886,20 +1020,21 @@ int swsusp_suspend(void) | |||
886 | * at resume time, and evil weirdness ensues. | 1020 | * at resume time, and evil weirdness ensues. |
887 | */ | 1021 | */ |
888 | if ((error = device_power_down(PMSG_FREEZE))) { | 1022 | if ((error = device_power_down(PMSG_FREEZE))) { |
1023 | printk(KERN_ERR "Some devices failed to power down, aborting suspend\n"); | ||
889 | local_irq_enable(); | 1024 | local_irq_enable(); |
890 | return error; | 1025 | return error; |
891 | } | 1026 | } |
892 | 1027 | ||
893 | if ((error = swsusp_swap_check())) { | 1028 | if ((error = swsusp_swap_check())) { |
894 | printk(KERN_ERR "swsusp: FATAL: cannot find swap device, try " | 1029 | printk(KERN_ERR "swsusp: cannot find swap device, try swapon -a.\n"); |
895 | "swapon -a!\n"); | 1030 | device_power_up(); |
896 | local_irq_enable(); | 1031 | local_irq_enable(); |
897 | return error; | 1032 | return error; |
898 | } | 1033 | } |
899 | 1034 | ||
900 | save_processor_state(); | 1035 | save_processor_state(); |
901 | if ((error = swsusp_arch_suspend())) | 1036 | if ((error = swsusp_arch_suspend())) |
902 | printk("Error %d suspending\n", error); | 1037 | printk(KERN_ERR "Error %d suspending\n", error); |
903 | /* Restore control flow magically appears here */ | 1038 | /* Restore control flow magically appears here */ |
904 | restore_processor_state(); | 1039 | restore_processor_state(); |
905 | BUG_ON (nr_copy_pages_check != nr_copy_pages); | 1040 | BUG_ON (nr_copy_pages_check != nr_copy_pages); |
@@ -924,6 +1059,7 @@ int swsusp_resume(void) | |||
924 | BUG_ON(!error); | 1059 | BUG_ON(!error); |
925 | restore_processor_state(); | 1060 | restore_processor_state(); |
926 | restore_highmem(); | 1061 | restore_highmem(); |
1062 | touch_softlockup_watchdog(); | ||
927 | device_power_up(); | 1063 | device_power_up(); |
928 | local_irq_enable(); | 1064 | local_irq_enable(); |
929 | return error; | 1065 | return error; |
@@ -1179,7 +1315,8 @@ static const char * sanity_check(void) | |||
1179 | if (strcmp(swsusp_info.uts.machine,system_utsname.machine)) | 1315 | if (strcmp(swsusp_info.uts.machine,system_utsname.machine)) |
1180 | return "machine"; | 1316 | return "machine"; |
1181 | #if 0 | 1317 | #if 0 |
1182 | if(swsusp_info.cpus != num_online_cpus()) | 1318 | /* We can't use number of online CPUs when we use hotplug to remove them ;-))) */ |
1319 | if (swsusp_info.cpus != num_possible_cpus()) | ||
1183 | return "number of cpus"; | 1320 | return "number of cpus"; |
1184 | #endif | 1321 | #endif |
1185 | return NULL; | 1322 | return NULL; |
@@ -1212,13 +1349,14 @@ static int check_sig(void) | |||
1212 | return error; | 1349 | return error; |
1213 | if (!memcmp(SWSUSP_SIG, swsusp_header.sig, 10)) { | 1350 | if (!memcmp(SWSUSP_SIG, swsusp_header.sig, 10)) { |
1214 | memcpy(swsusp_header.sig, swsusp_header.orig_sig, 10); | 1351 | memcpy(swsusp_header.sig, swsusp_header.orig_sig, 10); |
1352 | memcpy(key_iv, swsusp_header.key_iv, MAXKEY+MAXIV); | ||
1353 | memset(swsusp_header.key_iv, 0, MAXKEY+MAXIV); | ||
1215 | 1354 | ||
1216 | /* | 1355 | /* |
1217 | * Reset swap signature now. | 1356 | * Reset swap signature now. |
1218 | */ | 1357 | */ |
1219 | error = bio_write_page(0, &swsusp_header); | 1358 | error = bio_write_page(0, &swsusp_header); |
1220 | } else { | 1359 | } else { |
1221 | printk(KERN_ERR "swsusp: Suspend partition has wrong signature?\n"); | ||
1222 | return -EINVAL; | 1360 | return -EINVAL; |
1223 | } | 1361 | } |
1224 | if (!error) | 1362 | if (!error) |
@@ -1239,6 +1377,10 @@ static int data_read(struct pbe *pblist) | |||
1239 | int error = 0; | 1377 | int error = 0; |
1240 | int i = 0; | 1378 | int i = 0; |
1241 | int mod = swsusp_info.image_pages / 100; | 1379 | int mod = swsusp_info.image_pages / 100; |
1380 | void *tfm; | ||
1381 | |||
1382 | if ((error = crypto_init(0, &tfm))) | ||
1383 | return error; | ||
1242 | 1384 | ||
1243 | if (!mod) | 1385 | if (!mod) |
1244 | mod = 1; | 1386 | mod = 1; |
@@ -1250,14 +1392,15 @@ static int data_read(struct pbe *pblist) | |||
1250 | if (!(i % mod)) | 1392 | if (!(i % mod)) |
1251 | printk("\b\b\b\b%3d%%", i / mod); | 1393 | printk("\b\b\b\b%3d%%", i / mod); |
1252 | 1394 | ||
1253 | error = bio_read_page(swp_offset(p->swap_address), | 1395 | if ((error = crypto_read(p, tfm))) { |
1254 | (void *)p->address); | 1396 | crypto_exit(tfm); |
1255 | if (error) | ||
1256 | return error; | 1397 | return error; |
1398 | } | ||
1257 | 1399 | ||
1258 | i++; | 1400 | i++; |
1259 | } | 1401 | } |
1260 | printk("\b\b\b\bdone\n"); | 1402 | printk("\b\b\b\bdone\n"); |
1403 | crypto_exit(tfm); | ||
1261 | return error; | 1404 | return error; |
1262 | } | 1405 | } |
1263 | 1406 | ||
@@ -1385,6 +1528,7 @@ int swsusp_read(void) | |||
1385 | 1528 | ||
1386 | error = read_suspend_image(); | 1529 | error = read_suspend_image(); |
1387 | blkdev_put(resume_bdev); | 1530 | blkdev_put(resume_bdev); |
1531 | memset(key_iv, 0, MAXKEY+MAXIV); | ||
1388 | 1532 | ||
1389 | if (!error) | 1533 | if (!error) |
1390 | pr_debug("swsusp: Reading resume file was successful\n"); | 1534 | pr_debug("swsusp: Reading resume file was successful\n"); |
diff --git a/kernel/printk.c b/kernel/printk.c index 5092397fac29..a967605bc2e3 100644 --- a/kernel/printk.c +++ b/kernel/printk.c | |||
@@ -514,6 +514,9 @@ asmlinkage int printk(const char *fmt, ...) | |||
514 | return r; | 514 | return r; |
515 | } | 515 | } |
516 | 516 | ||
517 | /* cpu currently holding logbuf_lock */ | ||
518 | static volatile unsigned int printk_cpu = UINT_MAX; | ||
519 | |||
517 | asmlinkage int vprintk(const char *fmt, va_list args) | 520 | asmlinkage int vprintk(const char *fmt, va_list args) |
518 | { | 521 | { |
519 | unsigned long flags; | 522 | unsigned long flags; |
@@ -522,11 +525,15 @@ asmlinkage int vprintk(const char *fmt, va_list args) | |||
522 | static char printk_buf[1024]; | 525 | static char printk_buf[1024]; |
523 | static int log_level_unknown = 1; | 526 | static int log_level_unknown = 1; |
524 | 527 | ||
525 | if (unlikely(oops_in_progress)) | 528 | preempt_disable(); |
529 | if (unlikely(oops_in_progress) && printk_cpu == smp_processor_id()) | ||
530 | /* If a crash is occurring during printk() on this CPU, | ||
531 | * make sure we can't deadlock */ | ||
526 | zap_locks(); | 532 | zap_locks(); |
527 | 533 | ||
528 | /* This stops the holder of console_sem just where we want him */ | 534 | /* This stops the holder of console_sem just where we want him */ |
529 | spin_lock_irqsave(&logbuf_lock, flags); | 535 | spin_lock_irqsave(&logbuf_lock, flags); |
536 | printk_cpu = smp_processor_id(); | ||
530 | 537 | ||
531 | /* Emit the output into the temporary buffer */ | 538 | /* Emit the output into the temporary buffer */ |
532 | printed_len = vscnprintf(printk_buf, sizeof(printk_buf), fmt, args); | 539 | printed_len = vscnprintf(printk_buf, sizeof(printk_buf), fmt, args); |
@@ -595,6 +602,7 @@ asmlinkage int vprintk(const char *fmt, va_list args) | |||
595 | * CPU until it is officially up. We shouldn't be calling into | 602 | * CPU until it is officially up. We shouldn't be calling into |
596 | * random console drivers on a CPU which doesn't exist yet.. | 603 | * random console drivers on a CPU which doesn't exist yet.. |
597 | */ | 604 | */ |
605 | printk_cpu = UINT_MAX; | ||
598 | spin_unlock_irqrestore(&logbuf_lock, flags); | 606 | spin_unlock_irqrestore(&logbuf_lock, flags); |
599 | goto out; | 607 | goto out; |
600 | } | 608 | } |
@@ -604,6 +612,7 @@ asmlinkage int vprintk(const char *fmt, va_list args) | |||
604 | * We own the drivers. We can drop the spinlock and let | 612 | * We own the drivers. We can drop the spinlock and let |
605 | * release_console_sem() print the text | 613 | * release_console_sem() print the text |
606 | */ | 614 | */ |
615 | printk_cpu = UINT_MAX; | ||
607 | spin_unlock_irqrestore(&logbuf_lock, flags); | 616 | spin_unlock_irqrestore(&logbuf_lock, flags); |
608 | console_may_schedule = 0; | 617 | console_may_schedule = 0; |
609 | release_console_sem(); | 618 | release_console_sem(); |
@@ -613,9 +622,11 @@ asmlinkage int vprintk(const char *fmt, va_list args) | |||
613 | * allows the semaphore holder to proceed and to call the | 622 | * allows the semaphore holder to proceed and to call the |
614 | * console drivers with the output which we just produced. | 623 | * console drivers with the output which we just produced. |
615 | */ | 624 | */ |
625 | printk_cpu = UINT_MAX; | ||
616 | spin_unlock_irqrestore(&logbuf_lock, flags); | 626 | spin_unlock_irqrestore(&logbuf_lock, flags); |
617 | } | 627 | } |
618 | out: | 628 | out: |
629 | preempt_enable(); | ||
619 | return printed_len; | 630 | return printed_len; |
620 | } | 631 | } |
621 | EXPORT_SYMBOL(printk); | 632 | EXPORT_SYMBOL(printk); |
diff --git a/kernel/ptrace.c b/kernel/ptrace.c index 8dcb8f6288bc..019e04ec065a 100644 --- a/kernel/ptrace.c +++ b/kernel/ptrace.c | |||
@@ -118,6 +118,33 @@ int ptrace_check_attach(struct task_struct *child, int kill) | |||
118 | return ret; | 118 | return ret; |
119 | } | 119 | } |
120 | 120 | ||
121 | static int may_attach(struct task_struct *task) | ||
122 | { | ||
123 | if (!task->mm) | ||
124 | return -EPERM; | ||
125 | if (((current->uid != task->euid) || | ||
126 | (current->uid != task->suid) || | ||
127 | (current->uid != task->uid) || | ||
128 | (current->gid != task->egid) || | ||
129 | (current->gid != task->sgid) || | ||
130 | (current->gid != task->gid)) && !capable(CAP_SYS_PTRACE)) | ||
131 | return -EPERM; | ||
132 | smp_rmb(); | ||
133 | if (!task->mm->dumpable && !capable(CAP_SYS_PTRACE)) | ||
134 | return -EPERM; | ||
135 | |||
136 | return security_ptrace(current, task); | ||
137 | } | ||
138 | |||
139 | int ptrace_may_attach(struct task_struct *task) | ||
140 | { | ||
141 | int err; | ||
142 | task_lock(task); | ||
143 | err = may_attach(task); | ||
144 | task_unlock(task); | ||
145 | return !err; | ||
146 | } | ||
147 | |||
121 | int ptrace_attach(struct task_struct *task) | 148 | int ptrace_attach(struct task_struct *task) |
122 | { | 149 | { |
123 | int retval; | 150 | int retval; |
@@ -127,22 +154,10 @@ int ptrace_attach(struct task_struct *task) | |||
127 | goto bad; | 154 | goto bad; |
128 | if (task == current) | 155 | if (task == current) |
129 | goto bad; | 156 | goto bad; |
130 | if (!task->mm) | ||
131 | goto bad; | ||
132 | if(((current->uid != task->euid) || | ||
133 | (current->uid != task->suid) || | ||
134 | (current->uid != task->uid) || | ||
135 | (current->gid != task->egid) || | ||
136 | (current->gid != task->sgid) || | ||
137 | (current->gid != task->gid)) && !capable(CAP_SYS_PTRACE)) | ||
138 | goto bad; | ||
139 | smp_rmb(); | ||
140 | if (!task->mm->dumpable && !capable(CAP_SYS_PTRACE)) | ||
141 | goto bad; | ||
142 | /* the same process cannot be attached many times */ | 157 | /* the same process cannot be attached many times */ |
143 | if (task->ptrace & PT_PTRACED) | 158 | if (task->ptrace & PT_PTRACED) |
144 | goto bad; | 159 | goto bad; |
145 | retval = security_ptrace(current, task); | 160 | retval = may_attach(task); |
146 | if (retval) | 161 | if (retval) |
147 | goto bad; | 162 | goto bad; |
148 | 163 | ||
diff --git a/kernel/resource.c b/kernel/resource.c index 26967e042201..92285d822de6 100644 --- a/kernel/resource.c +++ b/kernel/resource.c | |||
@@ -430,10 +430,9 @@ EXPORT_SYMBOL(adjust_resource); | |||
430 | */ | 430 | */ |
431 | struct resource * __request_region(struct resource *parent, unsigned long start, unsigned long n, const char *name) | 431 | struct resource * __request_region(struct resource *parent, unsigned long start, unsigned long n, const char *name) |
432 | { | 432 | { |
433 | struct resource *res = kmalloc(sizeof(*res), GFP_KERNEL); | 433 | struct resource *res = kzalloc(sizeof(*res), GFP_KERNEL); |
434 | 434 | ||
435 | if (res) { | 435 | if (res) { |
436 | memset(res, 0, sizeof(*res)); | ||
437 | res->name = name; | 436 | res->name = name; |
438 | res->start = start; | 437 | res->start = start; |
439 | res->end = start + n - 1; | 438 | res->end = start + n - 1; |
diff --git a/kernel/sched.c b/kernel/sched.c index 5f889d0cbfcc..18b95520a2e2 100644 --- a/kernel/sched.c +++ b/kernel/sched.c | |||
@@ -1478,6 +1478,7 @@ static inline void prepare_task_switch(runqueue_t *rq, task_t *next) | |||
1478 | 1478 | ||
1479 | /** | 1479 | /** |
1480 | * finish_task_switch - clean up after a task-switch | 1480 | * finish_task_switch - clean up after a task-switch |
1481 | * @rq: runqueue associated with task-switch | ||
1481 | * @prev: the thread we just switched away from. | 1482 | * @prev: the thread we just switched away from. |
1482 | * | 1483 | * |
1483 | * finish_task_switch must be called after the context switch, paired | 1484 | * finish_task_switch must be called after the context switch, paired |
@@ -4779,7 +4780,7 @@ static int sd_parent_degenerate(struct sched_domain *sd, | |||
4779 | * Attach the domain 'sd' to 'cpu' as its base domain. Callers must | 4780 | * Attach the domain 'sd' to 'cpu' as its base domain. Callers must |
4780 | * hold the hotplug lock. | 4781 | * hold the hotplug lock. |
4781 | */ | 4782 | */ |
4782 | void cpu_attach_domain(struct sched_domain *sd, int cpu) | 4783 | static void cpu_attach_domain(struct sched_domain *sd, int cpu) |
4783 | { | 4784 | { |
4784 | runqueue_t *rq = cpu_rq(cpu); | 4785 | runqueue_t *rq = cpu_rq(cpu); |
4785 | struct sched_domain *tmp; | 4786 | struct sched_domain *tmp; |
@@ -4802,7 +4803,7 @@ void cpu_attach_domain(struct sched_domain *sd, int cpu) | |||
4802 | } | 4803 | } |
4803 | 4804 | ||
4804 | /* cpus with isolated domains */ | 4805 | /* cpus with isolated domains */ |
4805 | cpumask_t __devinitdata cpu_isolated_map = CPU_MASK_NONE; | 4806 | static cpumask_t __devinitdata cpu_isolated_map = CPU_MASK_NONE; |
4806 | 4807 | ||
4807 | /* Setup the mask of cpus configured for isolated domains */ | 4808 | /* Setup the mask of cpus configured for isolated domains */ |
4808 | static int __init isolated_cpu_setup(char *str) | 4809 | static int __init isolated_cpu_setup(char *str) |
@@ -4830,8 +4831,8 @@ __setup ("isolcpus=", isolated_cpu_setup); | |||
4830 | * covered by the given span, and will set each group's ->cpumask correctly, | 4831 | * covered by the given span, and will set each group's ->cpumask correctly, |
4831 | * and ->cpu_power to 0. | 4832 | * and ->cpu_power to 0. |
4832 | */ | 4833 | */ |
4833 | void init_sched_build_groups(struct sched_group groups[], | 4834 | static void init_sched_build_groups(struct sched_group groups[], cpumask_t span, |
4834 | cpumask_t span, int (*group_fn)(int cpu)) | 4835 | int (*group_fn)(int cpu)) |
4835 | { | 4836 | { |
4836 | struct sched_group *first = NULL, *last = NULL; | 4837 | struct sched_group *first = NULL, *last = NULL; |
4837 | cpumask_t covered = CPU_MASK_NONE; | 4838 | cpumask_t covered = CPU_MASK_NONE; |
@@ -4864,12 +4865,85 @@ void init_sched_build_groups(struct sched_group groups[], | |||
4864 | last->next = first; | 4865 | last->next = first; |
4865 | } | 4866 | } |
4866 | 4867 | ||
4868 | #define SD_NODES_PER_DOMAIN 16 | ||
4867 | 4869 | ||
4868 | #ifdef ARCH_HAS_SCHED_DOMAIN | 4870 | #ifdef CONFIG_NUMA |
4869 | extern void build_sched_domains(const cpumask_t *cpu_map); | 4871 | /** |
4870 | extern void arch_init_sched_domains(const cpumask_t *cpu_map); | 4872 | * find_next_best_node - find the next node to include in a sched_domain |
4871 | extern void arch_destroy_sched_domains(const cpumask_t *cpu_map); | 4873 | * @node: node whose sched_domain we're building |
4872 | #else | 4874 | * @used_nodes: nodes already in the sched_domain |
4875 | * | ||
4876 | * Find the next node to include in a given scheduling domain. Simply | ||
4877 | * finds the closest node not already in the @used_nodes map. | ||
4878 | * | ||
4879 | * Should use nodemask_t. | ||
4880 | */ | ||
4881 | static int find_next_best_node(int node, unsigned long *used_nodes) | ||
4882 | { | ||
4883 | int i, n, val, min_val, best_node = 0; | ||
4884 | |||
4885 | min_val = INT_MAX; | ||
4886 | |||
4887 | for (i = 0; i < MAX_NUMNODES; i++) { | ||
4888 | /* Start at @node */ | ||
4889 | n = (node + i) % MAX_NUMNODES; | ||
4890 | |||
4891 | if (!nr_cpus_node(n)) | ||
4892 | continue; | ||
4893 | |||
4894 | /* Skip already used nodes */ | ||
4895 | if (test_bit(n, used_nodes)) | ||
4896 | continue; | ||
4897 | |||
4898 | /* Simple min distance search */ | ||
4899 | val = node_distance(node, n); | ||
4900 | |||
4901 | if (val < min_val) { | ||
4902 | min_val = val; | ||
4903 | best_node = n; | ||
4904 | } | ||
4905 | } | ||
4906 | |||
4907 | set_bit(best_node, used_nodes); | ||
4908 | return best_node; | ||
4909 | } | ||
4910 | |||
4911 | /** | ||
4912 | * sched_domain_node_span - get a cpumask for a node's sched_domain | ||
4913 | * @node: node whose cpumask we're constructing | ||
4914 | * @size: number of nodes to include in this span | ||
4915 | * | ||
4916 | * Given a node, construct a good cpumask for its sched_domain to span. It | ||
4917 | * should be one that prevents unnecessary balancing, but also spreads tasks | ||
4918 | * out optimally. | ||
4919 | */ | ||
4920 | static cpumask_t sched_domain_node_span(int node) | ||
4921 | { | ||
4922 | int i; | ||
4923 | cpumask_t span, nodemask; | ||
4924 | DECLARE_BITMAP(used_nodes, MAX_NUMNODES); | ||
4925 | |||
4926 | cpus_clear(span); | ||
4927 | bitmap_zero(used_nodes, MAX_NUMNODES); | ||
4928 | |||
4929 | nodemask = node_to_cpumask(node); | ||
4930 | cpus_or(span, span, nodemask); | ||
4931 | set_bit(node, used_nodes); | ||
4932 | |||
4933 | for (i = 1; i < SD_NODES_PER_DOMAIN; i++) { | ||
4934 | int next_node = find_next_best_node(node, used_nodes); | ||
4935 | nodemask = node_to_cpumask(next_node); | ||
4936 | cpus_or(span, span, nodemask); | ||
4937 | } | ||
4938 | |||
4939 | return span; | ||
4940 | } | ||
4941 | #endif | ||
4942 | |||
4943 | /* | ||
4944 | * At the moment, CONFIG_SCHED_SMT is never defined, but leave it in so we | ||
4945 | * can switch it on easily if needed. | ||
4946 | */ | ||
4873 | #ifdef CONFIG_SCHED_SMT | 4947 | #ifdef CONFIG_SCHED_SMT |
4874 | static DEFINE_PER_CPU(struct sched_domain, cpu_domains); | 4948 | static DEFINE_PER_CPU(struct sched_domain, cpu_domains); |
4875 | static struct sched_group sched_group_cpus[NR_CPUS]; | 4949 | static struct sched_group sched_group_cpus[NR_CPUS]; |
@@ -4891,36 +4965,20 @@ static int cpu_to_phys_group(int cpu) | |||
4891 | } | 4965 | } |
4892 | 4966 | ||
4893 | #ifdef CONFIG_NUMA | 4967 | #ifdef CONFIG_NUMA |
4894 | |||
4895 | static DEFINE_PER_CPU(struct sched_domain, node_domains); | ||
4896 | static struct sched_group sched_group_nodes[MAX_NUMNODES]; | ||
4897 | static int cpu_to_node_group(int cpu) | ||
4898 | { | ||
4899 | return cpu_to_node(cpu); | ||
4900 | } | ||
4901 | #endif | ||
4902 | |||
4903 | #if defined(CONFIG_SCHED_SMT) && defined(CONFIG_NUMA) | ||
4904 | /* | 4968 | /* |
4905 | * The domains setup code relies on siblings not spanning | 4969 | * The init_sched_build_groups can't handle what we want to do with node |
4906 | * multiple nodes. Make sure the architecture has a proper | 4970 | * groups, so roll our own. Now each node has its own list of groups which |
4907 | * siblings map: | 4971 | * gets dynamically allocated. |
4908 | */ | 4972 | */ |
4909 | static void check_sibling_maps(void) | 4973 | static DEFINE_PER_CPU(struct sched_domain, node_domains); |
4910 | { | 4974 | static struct sched_group **sched_group_nodes_bycpu[NR_CPUS]; |
4911 | int i, j; | ||
4912 | 4975 | ||
4913 | for_each_online_cpu(i) { | 4976 | static DEFINE_PER_CPU(struct sched_domain, allnodes_domains); |
4914 | for_each_cpu_mask(j, cpu_sibling_map[i]) { | 4977 | static struct sched_group *sched_group_allnodes_bycpu[NR_CPUS]; |
4915 | if (cpu_to_node(i) != cpu_to_node(j)) { | 4978 | |
4916 | printk(KERN_INFO "warning: CPU %d siblings map " | 4979 | static int cpu_to_allnodes_group(int cpu) |
4917 | "to different node - isolating " | 4980 | { |
4918 | "them.\n", i); | 4981 | return cpu_to_node(cpu); |
4919 | cpu_sibling_map[i] = cpumask_of_cpu(i); | ||
4920 | break; | ||
4921 | } | ||
4922 | } | ||
4923 | } | ||
4924 | } | 4982 | } |
4925 | #endif | 4983 | #endif |
4926 | 4984 | ||
@@ -4928,9 +4986,24 @@ static void check_sibling_maps(void) | |||
4928 | * Build sched domains for a given set of cpus and attach the sched domains | 4986 | * Build sched domains for a given set of cpus and attach the sched domains |
4929 | * to the individual cpus | 4987 | * to the individual cpus |
4930 | */ | 4988 | */ |
4931 | static void build_sched_domains(const cpumask_t *cpu_map) | 4989 | void build_sched_domains(const cpumask_t *cpu_map) |
4932 | { | 4990 | { |
4933 | int i; | 4991 | int i; |
4992 | #ifdef CONFIG_NUMA | ||
4993 | struct sched_group **sched_group_nodes = NULL; | ||
4994 | struct sched_group *sched_group_allnodes = NULL; | ||
4995 | |||
4996 | /* | ||
4997 | * Allocate the per-node list of sched groups | ||
4998 | */ | ||
4999 | sched_group_nodes = kmalloc(sizeof(struct sched_group*)*MAX_NUMNODES, | ||
5000 | GFP_ATOMIC); | ||
5001 | if (!sched_group_nodes) { | ||
5002 | printk(KERN_WARNING "Can not alloc sched group node list\n"); | ||
5003 | return; | ||
5004 | } | ||
5005 | sched_group_nodes_bycpu[first_cpu(*cpu_map)] = sched_group_nodes; | ||
5006 | #endif | ||
4934 | 5007 | ||
4935 | /* | 5008 | /* |
4936 | * Set up domains for cpus specified by the cpu_map. | 5009 | * Set up domains for cpus specified by the cpu_map. |
@@ -4943,11 +5016,35 @@ static void build_sched_domains(const cpumask_t *cpu_map) | |||
4943 | cpus_and(nodemask, nodemask, *cpu_map); | 5016 | cpus_and(nodemask, nodemask, *cpu_map); |
4944 | 5017 | ||
4945 | #ifdef CONFIG_NUMA | 5018 | #ifdef CONFIG_NUMA |
5019 | if (cpus_weight(*cpu_map) | ||
5020 | > SD_NODES_PER_DOMAIN*cpus_weight(nodemask)) { | ||
5021 | if (!sched_group_allnodes) { | ||
5022 | sched_group_allnodes | ||
5023 | = kmalloc(sizeof(struct sched_group) | ||
5024 | * MAX_NUMNODES, | ||
5025 | GFP_KERNEL); | ||
5026 | if (!sched_group_allnodes) { | ||
5027 | printk(KERN_WARNING | ||
5028 | "Can not alloc allnodes sched group\n"); | ||
5029 | break; | ||
5030 | } | ||
5031 | sched_group_allnodes_bycpu[i] | ||
5032 | = sched_group_allnodes; | ||
5033 | } | ||
5034 | sd = &per_cpu(allnodes_domains, i); | ||
5035 | *sd = SD_ALLNODES_INIT; | ||
5036 | sd->span = *cpu_map; | ||
5037 | group = cpu_to_allnodes_group(i); | ||
5038 | sd->groups = &sched_group_allnodes[group]; | ||
5039 | p = sd; | ||
5040 | } else | ||
5041 | p = NULL; | ||
5042 | |||
4946 | sd = &per_cpu(node_domains, i); | 5043 | sd = &per_cpu(node_domains, i); |
4947 | group = cpu_to_node_group(i); | ||
4948 | *sd = SD_NODE_INIT; | 5044 | *sd = SD_NODE_INIT; |
4949 | sd->span = *cpu_map; | 5045 | sd->span = sched_domain_node_span(cpu_to_node(i)); |
4950 | sd->groups = &sched_group_nodes[group]; | 5046 | sd->parent = p; |
5047 | cpus_and(sd->span, sd->span, *cpu_map); | ||
4951 | #endif | 5048 | #endif |
4952 | 5049 | ||
4953 | p = sd; | 5050 | p = sd; |
@@ -4972,7 +5069,7 @@ static void build_sched_domains(const cpumask_t *cpu_map) | |||
4972 | 5069 | ||
4973 | #ifdef CONFIG_SCHED_SMT | 5070 | #ifdef CONFIG_SCHED_SMT |
4974 | /* Set up CPU (sibling) groups */ | 5071 | /* Set up CPU (sibling) groups */ |
4975 | for_each_online_cpu(i) { | 5072 | for_each_cpu_mask(i, *cpu_map) { |
4976 | cpumask_t this_sibling_map = cpu_sibling_map[i]; | 5073 | cpumask_t this_sibling_map = cpu_sibling_map[i]; |
4977 | cpus_and(this_sibling_map, this_sibling_map, *cpu_map); | 5074 | cpus_and(this_sibling_map, this_sibling_map, *cpu_map); |
4978 | if (i != first_cpu(this_sibling_map)) | 5075 | if (i != first_cpu(this_sibling_map)) |
@@ -4997,8 +5094,77 @@ static void build_sched_domains(const cpumask_t *cpu_map) | |||
4997 | 5094 | ||
4998 | #ifdef CONFIG_NUMA | 5095 | #ifdef CONFIG_NUMA |
4999 | /* Set up node groups */ | 5096 | /* Set up node groups */ |
5000 | init_sched_build_groups(sched_group_nodes, *cpu_map, | 5097 | if (sched_group_allnodes) |
5001 | &cpu_to_node_group); | 5098 | init_sched_build_groups(sched_group_allnodes, *cpu_map, |
5099 | &cpu_to_allnodes_group); | ||
5100 | |||
5101 | for (i = 0; i < MAX_NUMNODES; i++) { | ||
5102 | /* Set up node groups */ | ||
5103 | struct sched_group *sg, *prev; | ||
5104 | cpumask_t nodemask = node_to_cpumask(i); | ||
5105 | cpumask_t domainspan; | ||
5106 | cpumask_t covered = CPU_MASK_NONE; | ||
5107 | int j; | ||
5108 | |||
5109 | cpus_and(nodemask, nodemask, *cpu_map); | ||
5110 | if (cpus_empty(nodemask)) { | ||
5111 | sched_group_nodes[i] = NULL; | ||
5112 | continue; | ||
5113 | } | ||
5114 | |||
5115 | domainspan = sched_domain_node_span(i); | ||
5116 | cpus_and(domainspan, domainspan, *cpu_map); | ||
5117 | |||
5118 | sg = kmalloc(sizeof(struct sched_group), GFP_KERNEL); | ||
5119 | sched_group_nodes[i] = sg; | ||
5120 | for_each_cpu_mask(j, nodemask) { | ||
5121 | struct sched_domain *sd; | ||
5122 | sd = &per_cpu(node_domains, j); | ||
5123 | sd->groups = sg; | ||
5124 | if (sd->groups == NULL) { | ||
5125 | /* Turn off balancing if we have no groups */ | ||
5126 | sd->flags = 0; | ||
5127 | } | ||
5128 | } | ||
5129 | if (!sg) { | ||
5130 | printk(KERN_WARNING | ||
5131 | "Can not alloc domain group for node %d\n", i); | ||
5132 | continue; | ||
5133 | } | ||
5134 | sg->cpu_power = 0; | ||
5135 | sg->cpumask = nodemask; | ||
5136 | cpus_or(covered, covered, nodemask); | ||
5137 | prev = sg; | ||
5138 | |||
5139 | for (j = 0; j < MAX_NUMNODES; j++) { | ||
5140 | cpumask_t tmp, notcovered; | ||
5141 | int n = (i + j) % MAX_NUMNODES; | ||
5142 | |||
5143 | cpus_complement(notcovered, covered); | ||
5144 | cpus_and(tmp, notcovered, *cpu_map); | ||
5145 | cpus_and(tmp, tmp, domainspan); | ||
5146 | if (cpus_empty(tmp)) | ||
5147 | break; | ||
5148 | |||
5149 | nodemask = node_to_cpumask(n); | ||
5150 | cpus_and(tmp, tmp, nodemask); | ||
5151 | if (cpus_empty(tmp)) | ||
5152 | continue; | ||
5153 | |||
5154 | sg = kmalloc(sizeof(struct sched_group), GFP_KERNEL); | ||
5155 | if (!sg) { | ||
5156 | printk(KERN_WARNING | ||
5157 | "Can not alloc domain group for node %d\n", j); | ||
5158 | break; | ||
5159 | } | ||
5160 | sg->cpu_power = 0; | ||
5161 | sg->cpumask = tmp; | ||
5162 | cpus_or(covered, covered, tmp); | ||
5163 | prev->next = sg; | ||
5164 | prev = sg; | ||
5165 | } | ||
5166 | prev->next = sched_group_nodes[i]; | ||
5167 | } | ||
5002 | #endif | 5168 | #endif |
5003 | 5169 | ||
5004 | /* Calculate CPU power for physical packages and nodes */ | 5170 | /* Calculate CPU power for physical packages and nodes */ |
@@ -5017,14 +5183,46 @@ static void build_sched_domains(const cpumask_t *cpu_map) | |||
5017 | sd->groups->cpu_power = power; | 5183 | sd->groups->cpu_power = power; |
5018 | 5184 | ||
5019 | #ifdef CONFIG_NUMA | 5185 | #ifdef CONFIG_NUMA |
5020 | if (i == first_cpu(sd->groups->cpumask)) { | 5186 | sd = &per_cpu(allnodes_domains, i); |
5021 | /* Only add "power" once for each physical package. */ | 5187 | if (sd->groups) { |
5022 | sd = &per_cpu(node_domains, i); | 5188 | power = SCHED_LOAD_SCALE + SCHED_LOAD_SCALE * |
5023 | sd->groups->cpu_power += power; | 5189 | (cpus_weight(sd->groups->cpumask)-1) / 10; |
5190 | sd->groups->cpu_power = power; | ||
5024 | } | 5191 | } |
5025 | #endif | 5192 | #endif |
5026 | } | 5193 | } |
5027 | 5194 | ||
5195 | #ifdef CONFIG_NUMA | ||
5196 | for (i = 0; i < MAX_NUMNODES; i++) { | ||
5197 | struct sched_group *sg = sched_group_nodes[i]; | ||
5198 | int j; | ||
5199 | |||
5200 | if (sg == NULL) | ||
5201 | continue; | ||
5202 | next_sg: | ||
5203 | for_each_cpu_mask(j, sg->cpumask) { | ||
5204 | struct sched_domain *sd; | ||
5205 | int power; | ||
5206 | |||
5207 | sd = &per_cpu(phys_domains, j); | ||
5208 | if (j != first_cpu(sd->groups->cpumask)) { | ||
5209 | /* | ||
5210 | * Only add "power" once for each | ||
5211 | * physical package. | ||
5212 | */ | ||
5213 | continue; | ||
5214 | } | ||
5215 | power = SCHED_LOAD_SCALE + SCHED_LOAD_SCALE * | ||
5216 | (cpus_weight(sd->groups->cpumask)-1) / 10; | ||
5217 | |||
5218 | sg->cpu_power += power; | ||
5219 | } | ||
5220 | sg = sg->next; | ||
5221 | if (sg != sched_group_nodes[i]) | ||
5222 | goto next_sg; | ||
5223 | } | ||
5224 | #endif | ||
5225 | |||
5028 | /* Attach the domains */ | 5226 | /* Attach the domains */ |
5029 | for_each_cpu_mask(i, *cpu_map) { | 5227 | for_each_cpu_mask(i, *cpu_map) { |
5030 | struct sched_domain *sd; | 5228 | struct sched_domain *sd; |
@@ -5039,13 +5237,10 @@ static void build_sched_domains(const cpumask_t *cpu_map) | |||
5039 | /* | 5237 | /* |
5040 | * Set up scheduler domains and groups. Callers must hold the hotplug lock. | 5238 | * Set up scheduler domains and groups. Callers must hold the hotplug lock. |
5041 | */ | 5239 | */ |
5042 | static void arch_init_sched_domains(cpumask_t *cpu_map) | 5240 | static void arch_init_sched_domains(const cpumask_t *cpu_map) |
5043 | { | 5241 | { |
5044 | cpumask_t cpu_default_map; | 5242 | cpumask_t cpu_default_map; |
5045 | 5243 | ||
5046 | #if defined(CONFIG_SCHED_SMT) && defined(CONFIG_NUMA) | ||
5047 | check_sibling_maps(); | ||
5048 | #endif | ||
5049 | /* | 5244 | /* |
5050 | * Setup mask for cpus without special case scheduling requirements. | 5245 | * Setup mask for cpus without special case scheduling requirements. |
5051 | * For now this just excludes isolated cpus, but could be used to | 5246 | * For now this just excludes isolated cpus, but could be used to |
@@ -5058,10 +5253,47 @@ static void arch_init_sched_domains(cpumask_t *cpu_map) | |||
5058 | 5253 | ||
5059 | static void arch_destroy_sched_domains(const cpumask_t *cpu_map) | 5254 | static void arch_destroy_sched_domains(const cpumask_t *cpu_map) |
5060 | { | 5255 | { |
5061 | /* Do nothing: everything is statically allocated. */ | 5256 | #ifdef CONFIG_NUMA |
5062 | } | 5257 | int i; |
5258 | int cpu; | ||
5259 | |||
5260 | for_each_cpu_mask(cpu, *cpu_map) { | ||
5261 | struct sched_group *sched_group_allnodes | ||
5262 | = sched_group_allnodes_bycpu[cpu]; | ||
5263 | struct sched_group **sched_group_nodes | ||
5264 | = sched_group_nodes_bycpu[cpu]; | ||
5265 | |||
5266 | if (sched_group_allnodes) { | ||
5267 | kfree(sched_group_allnodes); | ||
5268 | sched_group_allnodes_bycpu[cpu] = NULL; | ||
5269 | } | ||
5270 | |||
5271 | if (!sched_group_nodes) | ||
5272 | continue; | ||
5273 | |||
5274 | for (i = 0; i < MAX_NUMNODES; i++) { | ||
5275 | cpumask_t nodemask = node_to_cpumask(i); | ||
5276 | struct sched_group *oldsg, *sg = sched_group_nodes[i]; | ||
5063 | 5277 | ||
5064 | #endif /* ARCH_HAS_SCHED_DOMAIN */ | 5278 | cpus_and(nodemask, nodemask, *cpu_map); |
5279 | if (cpus_empty(nodemask)) | ||
5280 | continue; | ||
5281 | |||
5282 | if (sg == NULL) | ||
5283 | continue; | ||
5284 | sg = sg->next; | ||
5285 | next_sg: | ||
5286 | oldsg = sg; | ||
5287 | sg = sg->next; | ||
5288 | kfree(oldsg); | ||
5289 | if (oldsg != sched_group_nodes[i]) | ||
5290 | goto next_sg; | ||
5291 | } | ||
5292 | kfree(sched_group_nodes); | ||
5293 | sched_group_nodes_bycpu[cpu] = NULL; | ||
5294 | } | ||
5295 | #endif | ||
5296 | } | ||
5065 | 5297 | ||
5066 | /* | 5298 | /* |
5067 | * Detach sched domains from a group of cpus specified in cpu_map | 5299 | * Detach sched domains from a group of cpus specified in cpu_map |
diff --git a/kernel/signal.c b/kernel/signal.c index d282fea81138..4980a073237f 100644 --- a/kernel/signal.c +++ b/kernel/signal.c | |||
@@ -678,7 +678,7 @@ static int check_kill_permission(int sig, struct siginfo *info, | |||
678 | 678 | ||
679 | /* forward decl */ | 679 | /* forward decl */ |
680 | static void do_notify_parent_cldstop(struct task_struct *tsk, | 680 | static void do_notify_parent_cldstop(struct task_struct *tsk, |
681 | struct task_struct *parent, | 681 | int to_self, |
682 | int why); | 682 | int why); |
683 | 683 | ||
684 | /* | 684 | /* |
@@ -729,14 +729,7 @@ static void handle_stop_signal(int sig, struct task_struct *p) | |||
729 | p->signal->group_stop_count = 0; | 729 | p->signal->group_stop_count = 0; |
730 | p->signal->flags = SIGNAL_STOP_CONTINUED; | 730 | p->signal->flags = SIGNAL_STOP_CONTINUED; |
731 | spin_unlock(&p->sighand->siglock); | 731 | spin_unlock(&p->sighand->siglock); |
732 | if (p->ptrace & PT_PTRACED) | 732 | do_notify_parent_cldstop(p, (p->ptrace & PT_PTRACED), CLD_STOPPED); |
733 | do_notify_parent_cldstop(p, p->parent, | ||
734 | CLD_STOPPED); | ||
735 | else | ||
736 | do_notify_parent_cldstop( | ||
737 | p->group_leader, | ||
738 | p->group_leader->real_parent, | ||
739 | CLD_STOPPED); | ||
740 | spin_lock(&p->sighand->siglock); | 733 | spin_lock(&p->sighand->siglock); |
741 | } | 734 | } |
742 | rm_from_queue(SIG_KERNEL_STOP_MASK, &p->signal->shared_pending); | 735 | rm_from_queue(SIG_KERNEL_STOP_MASK, &p->signal->shared_pending); |
@@ -777,14 +770,7 @@ static void handle_stop_signal(int sig, struct task_struct *p) | |||
777 | p->signal->flags = SIGNAL_STOP_CONTINUED; | 770 | p->signal->flags = SIGNAL_STOP_CONTINUED; |
778 | p->signal->group_exit_code = 0; | 771 | p->signal->group_exit_code = 0; |
779 | spin_unlock(&p->sighand->siglock); | 772 | spin_unlock(&p->sighand->siglock); |
780 | if (p->ptrace & PT_PTRACED) | 773 | do_notify_parent_cldstop(p, (p->ptrace & PT_PTRACED), CLD_CONTINUED); |
781 | do_notify_parent_cldstop(p, p->parent, | ||
782 | CLD_CONTINUED); | ||
783 | else | ||
784 | do_notify_parent_cldstop( | ||
785 | p->group_leader, | ||
786 | p->group_leader->real_parent, | ||
787 | CLD_CONTINUED); | ||
788 | spin_lock(&p->sighand->siglock); | 774 | spin_lock(&p->sighand->siglock); |
789 | } else { | 775 | } else { |
790 | /* | 776 | /* |
@@ -1380,16 +1366,16 @@ send_sigqueue(int sig, struct sigqueue *q, struct task_struct *p) | |||
1380 | unsigned long flags; | 1366 | unsigned long flags; |
1381 | int ret = 0; | 1367 | int ret = 0; |
1382 | 1368 | ||
1383 | /* | ||
1384 | * We need the tasklist lock even for the specific | ||
1385 | * thread case (when we don't need to follow the group | ||
1386 | * lists) in order to avoid races with "p->sighand" | ||
1387 | * going away or changing from under us. | ||
1388 | */ | ||
1389 | BUG_ON(!(q->flags & SIGQUEUE_PREALLOC)); | 1369 | BUG_ON(!(q->flags & SIGQUEUE_PREALLOC)); |
1390 | read_lock(&tasklist_lock); | 1370 | read_lock(&tasklist_lock); |
1371 | |||
1372 | if (unlikely(p->flags & PF_EXITING)) { | ||
1373 | ret = -1; | ||
1374 | goto out_err; | ||
1375 | } | ||
1376 | |||
1391 | spin_lock_irqsave(&p->sighand->siglock, flags); | 1377 | spin_lock_irqsave(&p->sighand->siglock, flags); |
1392 | 1378 | ||
1393 | if (unlikely(!list_empty(&q->list))) { | 1379 | if (unlikely(!list_empty(&q->list))) { |
1394 | /* | 1380 | /* |
1395 | * If an SI_TIMER entry is already queue just increment | 1381 | * If an SI_TIMER entry is already queue just increment |
@@ -1399,7 +1385,7 @@ send_sigqueue(int sig, struct sigqueue *q, struct task_struct *p) | |||
1399 | BUG(); | 1385 | BUG(); |
1400 | q->info.si_overrun++; | 1386 | q->info.si_overrun++; |
1401 | goto out; | 1387 | goto out; |
1402 | } | 1388 | } |
1403 | /* Short-circuit ignored signals. */ | 1389 | /* Short-circuit ignored signals. */ |
1404 | if (sig_ignored(p, sig)) { | 1390 | if (sig_ignored(p, sig)) { |
1405 | ret = 1; | 1391 | ret = 1; |
@@ -1414,8 +1400,10 @@ send_sigqueue(int sig, struct sigqueue *q, struct task_struct *p) | |||
1414 | 1400 | ||
1415 | out: | 1401 | out: |
1416 | spin_unlock_irqrestore(&p->sighand->siglock, flags); | 1402 | spin_unlock_irqrestore(&p->sighand->siglock, flags); |
1403 | out_err: | ||
1417 | read_unlock(&tasklist_lock); | 1404 | read_unlock(&tasklist_lock); |
1418 | return(ret); | 1405 | |
1406 | return ret; | ||
1419 | } | 1407 | } |
1420 | 1408 | ||
1421 | int | 1409 | int |
@@ -1542,14 +1530,20 @@ void do_notify_parent(struct task_struct *tsk, int sig) | |||
1542 | spin_unlock_irqrestore(&psig->siglock, flags); | 1530 | spin_unlock_irqrestore(&psig->siglock, flags); |
1543 | } | 1531 | } |
1544 | 1532 | ||
1545 | static void | 1533 | static void do_notify_parent_cldstop(struct task_struct *tsk, int to_self, int why) |
1546 | do_notify_parent_cldstop(struct task_struct *tsk, struct task_struct *parent, | ||
1547 | int why) | ||
1548 | { | 1534 | { |
1549 | struct siginfo info; | 1535 | struct siginfo info; |
1550 | unsigned long flags; | 1536 | unsigned long flags; |
1537 | struct task_struct *parent; | ||
1551 | struct sighand_struct *sighand; | 1538 | struct sighand_struct *sighand; |
1552 | 1539 | ||
1540 | if (to_self) | ||
1541 | parent = tsk->parent; | ||
1542 | else { | ||
1543 | tsk = tsk->group_leader; | ||
1544 | parent = tsk->real_parent; | ||
1545 | } | ||
1546 | |||
1553 | info.si_signo = SIGCHLD; | 1547 | info.si_signo = SIGCHLD; |
1554 | info.si_errno = 0; | 1548 | info.si_errno = 0; |
1555 | info.si_pid = tsk->pid; | 1549 | info.si_pid = tsk->pid; |
@@ -1618,8 +1612,7 @@ static void ptrace_stop(int exit_code, int nostop_code, siginfo_t *info) | |||
1618 | !(current->ptrace & PT_ATTACHED)) && | 1612 | !(current->ptrace & PT_ATTACHED)) && |
1619 | (likely(current->parent->signal != current->signal) || | 1613 | (likely(current->parent->signal != current->signal) || |
1620 | !unlikely(current->signal->flags & SIGNAL_GROUP_EXIT))) { | 1614 | !unlikely(current->signal->flags & SIGNAL_GROUP_EXIT))) { |
1621 | do_notify_parent_cldstop(current, current->parent, | 1615 | do_notify_parent_cldstop(current, 1, CLD_TRAPPED); |
1622 | CLD_TRAPPED); | ||
1623 | read_unlock(&tasklist_lock); | 1616 | read_unlock(&tasklist_lock); |
1624 | schedule(); | 1617 | schedule(); |
1625 | } else { | 1618 | } else { |
@@ -1668,25 +1661,25 @@ void ptrace_notify(int exit_code) | |||
1668 | static void | 1661 | static void |
1669 | finish_stop(int stop_count) | 1662 | finish_stop(int stop_count) |
1670 | { | 1663 | { |
1664 | int to_self; | ||
1665 | |||
1671 | /* | 1666 | /* |
1672 | * If there are no other threads in the group, or if there is | 1667 | * If there are no other threads in the group, or if there is |
1673 | * a group stop in progress and we are the last to stop, | 1668 | * a group stop in progress and we are the last to stop, |
1674 | * report to the parent. When ptraced, every thread reports itself. | 1669 | * report to the parent. When ptraced, every thread reports itself. |
1675 | */ | 1670 | */ |
1676 | if (stop_count < 0 || (current->ptrace & PT_PTRACED)) { | 1671 | if (stop_count < 0 || (current->ptrace & PT_PTRACED)) |
1677 | read_lock(&tasklist_lock); | 1672 | to_self = 1; |
1678 | do_notify_parent_cldstop(current, current->parent, | 1673 | else if (stop_count == 0) |
1679 | CLD_STOPPED); | 1674 | to_self = 0; |
1680 | read_unlock(&tasklist_lock); | 1675 | else |
1681 | } | 1676 | goto out; |
1682 | else if (stop_count == 0) { | ||
1683 | read_lock(&tasklist_lock); | ||
1684 | do_notify_parent_cldstop(current->group_leader, | ||
1685 | current->group_leader->real_parent, | ||
1686 | CLD_STOPPED); | ||
1687 | read_unlock(&tasklist_lock); | ||
1688 | } | ||
1689 | 1677 | ||
1678 | read_lock(&tasklist_lock); | ||
1679 | do_notify_parent_cldstop(current, to_self, CLD_STOPPED); | ||
1680 | read_unlock(&tasklist_lock); | ||
1681 | |||
1682 | out: | ||
1690 | schedule(); | 1683 | schedule(); |
1691 | /* | 1684 | /* |
1692 | * Now we don't run again until continued. | 1685 | * Now we don't run again until continued. |
diff --git a/kernel/softlockup.c b/kernel/softlockup.c new file mode 100644 index 000000000000..75976209cea7 --- /dev/null +++ b/kernel/softlockup.c | |||
@@ -0,0 +1,151 @@ | |||
1 | /* | ||
2 | * Detect Soft Lockups | ||
3 | * | ||
4 | * started by Ingo Molnar, (C) 2005, Red Hat | ||
5 | * | ||
6 | * this code detects soft lockups: incidents in where on a CPU | ||
7 | * the kernel does not reschedule for 10 seconds or more. | ||
8 | */ | ||
9 | |||
10 | #include <linux/mm.h> | ||
11 | #include <linux/cpu.h> | ||
12 | #include <linux/init.h> | ||
13 | #include <linux/delay.h> | ||
14 | #include <linux/kthread.h> | ||
15 | #include <linux/notifier.h> | ||
16 | #include <linux/module.h> | ||
17 | |||
18 | static DEFINE_SPINLOCK(print_lock); | ||
19 | |||
20 | static DEFINE_PER_CPU(unsigned long, timestamp) = 0; | ||
21 | static DEFINE_PER_CPU(unsigned long, print_timestamp) = 0; | ||
22 | static DEFINE_PER_CPU(struct task_struct *, watchdog_task); | ||
23 | |||
24 | static int did_panic = 0; | ||
25 | static int softlock_panic(struct notifier_block *this, unsigned long event, | ||
26 | void *ptr) | ||
27 | { | ||
28 | did_panic = 1; | ||
29 | |||
30 | return NOTIFY_DONE; | ||
31 | } | ||
32 | |||
33 | static struct notifier_block panic_block = { | ||
34 | .notifier_call = softlock_panic, | ||
35 | }; | ||
36 | |||
37 | void touch_softlockup_watchdog(void) | ||
38 | { | ||
39 | per_cpu(timestamp, raw_smp_processor_id()) = jiffies; | ||
40 | } | ||
41 | EXPORT_SYMBOL(touch_softlockup_watchdog); | ||
42 | |||
43 | /* | ||
44 | * This callback runs from the timer interrupt, and checks | ||
45 | * whether the watchdog thread has hung or not: | ||
46 | */ | ||
47 | void softlockup_tick(struct pt_regs *regs) | ||
48 | { | ||
49 | int this_cpu = smp_processor_id(); | ||
50 | unsigned long timestamp = per_cpu(timestamp, this_cpu); | ||
51 | |||
52 | if (per_cpu(print_timestamp, this_cpu) == timestamp) | ||
53 | return; | ||
54 | |||
55 | /* Do not cause a second panic when there already was one */ | ||
56 | if (did_panic) | ||
57 | return; | ||
58 | |||
59 | if (time_after(jiffies, timestamp + 10*HZ)) { | ||
60 | per_cpu(print_timestamp, this_cpu) = timestamp; | ||
61 | |||
62 | spin_lock(&print_lock); | ||
63 | printk(KERN_ERR "BUG: soft lockup detected on CPU#%d!\n", | ||
64 | this_cpu); | ||
65 | show_regs(regs); | ||
66 | spin_unlock(&print_lock); | ||
67 | } | ||
68 | } | ||
69 | |||
70 | /* | ||
71 | * The watchdog thread - runs every second and touches the timestamp. | ||
72 | */ | ||
73 | static int watchdog(void * __bind_cpu) | ||
74 | { | ||
75 | struct sched_param param = { .sched_priority = 99 }; | ||
76 | int this_cpu = (long) __bind_cpu; | ||
77 | |||
78 | printk("softlockup thread %d started up.\n", this_cpu); | ||
79 | |||
80 | sched_setscheduler(current, SCHED_FIFO, ¶m); | ||
81 | current->flags |= PF_NOFREEZE; | ||
82 | |||
83 | set_current_state(TASK_INTERRUPTIBLE); | ||
84 | |||
85 | /* | ||
86 | * Run briefly once per second - if this gets delayed for | ||
87 | * more than 10 seconds then the debug-printout triggers | ||
88 | * in softlockup_tick(): | ||
89 | */ | ||
90 | while (!kthread_should_stop()) { | ||
91 | msleep_interruptible(1000); | ||
92 | touch_softlockup_watchdog(); | ||
93 | } | ||
94 | __set_current_state(TASK_RUNNING); | ||
95 | |||
96 | return 0; | ||
97 | } | ||
98 | |||
99 | /* | ||
100 | * Create/destroy watchdog threads as CPUs come and go: | ||
101 | */ | ||
102 | static int __devinit | ||
103 | cpu_callback(struct notifier_block *nfb, unsigned long action, void *hcpu) | ||
104 | { | ||
105 | int hotcpu = (unsigned long)hcpu; | ||
106 | struct task_struct *p; | ||
107 | |||
108 | switch (action) { | ||
109 | case CPU_UP_PREPARE: | ||
110 | BUG_ON(per_cpu(watchdog_task, hotcpu)); | ||
111 | p = kthread_create(watchdog, hcpu, "watchdog/%d", hotcpu); | ||
112 | if (IS_ERR(p)) { | ||
113 | printk("watchdog for %i failed\n", hotcpu); | ||
114 | return NOTIFY_BAD; | ||
115 | } | ||
116 | per_cpu(watchdog_task, hotcpu) = p; | ||
117 | kthread_bind(p, hotcpu); | ||
118 | break; | ||
119 | case CPU_ONLINE: | ||
120 | |||
121 | wake_up_process(per_cpu(watchdog_task, hotcpu)); | ||
122 | break; | ||
123 | #ifdef CONFIG_HOTPLUG_CPU | ||
124 | case CPU_UP_CANCELED: | ||
125 | /* Unbind so it can run. Fall thru. */ | ||
126 | kthread_bind(per_cpu(watchdog_task, hotcpu), smp_processor_id()); | ||
127 | case CPU_DEAD: | ||
128 | p = per_cpu(watchdog_task, hotcpu); | ||
129 | per_cpu(watchdog_task, hotcpu) = NULL; | ||
130 | kthread_stop(p); | ||
131 | break; | ||
132 | #endif /* CONFIG_HOTPLUG_CPU */ | ||
133 | } | ||
134 | return NOTIFY_OK; | ||
135 | } | ||
136 | |||
137 | static struct notifier_block __devinitdata cpu_nfb = { | ||
138 | .notifier_call = cpu_callback | ||
139 | }; | ||
140 | |||
141 | __init void spawn_softlockup_task(void) | ||
142 | { | ||
143 | void *cpu = (void *)(long)smp_processor_id(); | ||
144 | |||
145 | cpu_callback(&cpu_nfb, CPU_UP_PREPARE, cpu); | ||
146 | cpu_callback(&cpu_nfb, CPU_ONLINE, cpu); | ||
147 | register_cpu_notifier(&cpu_nfb); | ||
148 | |||
149 | notifier_chain_register(&panic_notifier_list, &panic_block); | ||
150 | } | ||
151 | |||
diff --git a/kernel/sys.c b/kernel/sys.c index 0bcaed6560ac..c80412be2302 100644 --- a/kernel/sys.c +++ b/kernel/sys.c | |||
@@ -1711,7 +1711,6 @@ asmlinkage long sys_prctl(int option, unsigned long arg2, unsigned long arg3, | |||
1711 | unsigned long arg4, unsigned long arg5) | 1711 | unsigned long arg4, unsigned long arg5) |
1712 | { | 1712 | { |
1713 | long error; | 1713 | long error; |
1714 | int sig; | ||
1715 | 1714 | ||
1716 | error = security_task_prctl(option, arg2, arg3, arg4, arg5); | 1715 | error = security_task_prctl(option, arg2, arg3, arg4, arg5); |
1717 | if (error) | 1716 | if (error) |
@@ -1719,12 +1718,11 @@ asmlinkage long sys_prctl(int option, unsigned long arg2, unsigned long arg3, | |||
1719 | 1718 | ||
1720 | switch (option) { | 1719 | switch (option) { |
1721 | case PR_SET_PDEATHSIG: | 1720 | case PR_SET_PDEATHSIG: |
1722 | sig = arg2; | 1721 | if (!valid_signal(arg2)) { |
1723 | if (!valid_signal(sig)) { | ||
1724 | error = -EINVAL; | 1722 | error = -EINVAL; |
1725 | break; | 1723 | break; |
1726 | } | 1724 | } |
1727 | current->pdeath_signal = sig; | 1725 | current->pdeath_signal = arg2; |
1728 | break; | 1726 | break; |
1729 | case PR_GET_PDEATHSIG: | 1727 | case PR_GET_PDEATHSIG: |
1730 | error = put_user(current->pdeath_signal, (int __user *)arg2); | 1728 | error = put_user(current->pdeath_signal, (int __user *)arg2); |
diff --git a/kernel/timer.c b/kernel/timer.c index 5377f40723ff..13e2b513be01 100644 --- a/kernel/timer.c +++ b/kernel/timer.c | |||
@@ -950,6 +950,7 @@ void do_timer(struct pt_regs *regs) | |||
950 | { | 950 | { |
951 | jiffies_64++; | 951 | jiffies_64++; |
952 | update_times(); | 952 | update_times(); |
953 | softlockup_tick(regs); | ||
953 | } | 954 | } |
954 | 955 | ||
955 | #ifdef __ARCH_WANT_SYS_ALARM | 956 | #ifdef __ARCH_WANT_SYS_ALARM |
@@ -1428,7 +1429,7 @@ static inline u64 time_interpolator_get_cycles(unsigned int src) | |||
1428 | } | 1429 | } |
1429 | } | 1430 | } |
1430 | 1431 | ||
1431 | static inline u64 time_interpolator_get_counter(void) | 1432 | static inline u64 time_interpolator_get_counter(int writelock) |
1432 | { | 1433 | { |
1433 | unsigned int src = time_interpolator->source; | 1434 | unsigned int src = time_interpolator->source; |
1434 | 1435 | ||
@@ -1442,6 +1443,15 @@ static inline u64 time_interpolator_get_counter(void) | |||
1442 | now = time_interpolator_get_cycles(src); | 1443 | now = time_interpolator_get_cycles(src); |
1443 | if (lcycle && time_after(lcycle, now)) | 1444 | if (lcycle && time_after(lcycle, now)) |
1444 | return lcycle; | 1445 | return lcycle; |
1446 | |||
1447 | /* When holding the xtime write lock, there's no need | ||
1448 | * to add the overhead of the cmpxchg. Readers are | ||
1449 | * force to retry until the write lock is released. | ||
1450 | */ | ||
1451 | if (writelock) { | ||
1452 | time_interpolator->last_cycle = now; | ||
1453 | return now; | ||
1454 | } | ||
1445 | /* Keep track of the last timer value returned. The use of cmpxchg here | 1455 | /* Keep track of the last timer value returned. The use of cmpxchg here |
1446 | * will cause contention in an SMP environment. | 1456 | * will cause contention in an SMP environment. |
1447 | */ | 1457 | */ |
@@ -1455,7 +1465,7 @@ static inline u64 time_interpolator_get_counter(void) | |||
1455 | void time_interpolator_reset(void) | 1465 | void time_interpolator_reset(void) |
1456 | { | 1466 | { |
1457 | time_interpolator->offset = 0; | 1467 | time_interpolator->offset = 0; |
1458 | time_interpolator->last_counter = time_interpolator_get_counter(); | 1468 | time_interpolator->last_counter = time_interpolator_get_counter(1); |
1459 | } | 1469 | } |
1460 | 1470 | ||
1461 | #define GET_TI_NSECS(count,i) (((((count) - i->last_counter) & (i)->mask) * (i)->nsec_per_cyc) >> (i)->shift) | 1471 | #define GET_TI_NSECS(count,i) (((((count) - i->last_counter) & (i)->mask) * (i)->nsec_per_cyc) >> (i)->shift) |
@@ -1467,7 +1477,7 @@ unsigned long time_interpolator_get_offset(void) | |||
1467 | return 0; | 1477 | return 0; |
1468 | 1478 | ||
1469 | return time_interpolator->offset + | 1479 | return time_interpolator->offset + |
1470 | GET_TI_NSECS(time_interpolator_get_counter(), time_interpolator); | 1480 | GET_TI_NSECS(time_interpolator_get_counter(0), time_interpolator); |
1471 | } | 1481 | } |
1472 | 1482 | ||
1473 | #define INTERPOLATOR_ADJUST 65536 | 1483 | #define INTERPOLATOR_ADJUST 65536 |
@@ -1490,7 +1500,7 @@ static void time_interpolator_update(long delta_nsec) | |||
1490 | * and the tuning logic insures that. | 1500 | * and the tuning logic insures that. |
1491 | */ | 1501 | */ |
1492 | 1502 | ||
1493 | counter = time_interpolator_get_counter(); | 1503 | counter = time_interpolator_get_counter(1); |
1494 | offset = time_interpolator->offset + GET_TI_NSECS(counter, time_interpolator); | 1504 | offset = time_interpolator->offset + GET_TI_NSECS(counter, time_interpolator); |
1495 | 1505 | ||
1496 | if (delta_nsec < 0 || (unsigned long) delta_nsec < offset) | 1506 | if (delta_nsec < 0 || (unsigned long) delta_nsec < offset) |
diff --git a/kernel/workqueue.c b/kernel/workqueue.c index c7e36d4a70ca..91bacb13a7e2 100644 --- a/kernel/workqueue.c +++ b/kernel/workqueue.c | |||
@@ -308,10 +308,9 @@ struct workqueue_struct *__create_workqueue(const char *name, | |||
308 | struct workqueue_struct *wq; | 308 | struct workqueue_struct *wq; |
309 | struct task_struct *p; | 309 | struct task_struct *p; |
310 | 310 | ||
311 | wq = kmalloc(sizeof(*wq), GFP_KERNEL); | 311 | wq = kzalloc(sizeof(*wq), GFP_KERNEL); |
312 | if (!wq) | 312 | if (!wq) |
313 | return NULL; | 313 | return NULL; |
314 | memset(wq, 0, sizeof(*wq)); | ||
315 | 314 | ||
316 | wq->name = name; | 315 | wq->name = name; |
317 | /* We don't need the distraction of CPUs appearing and vanishing. */ | 316 | /* We don't need the distraction of CPUs appearing and vanishing. */ |
@@ -499,7 +498,7 @@ static int __devinit workqueue_cpu_callback(struct notifier_block *nfb, | |||
499 | case CPU_UP_PREPARE: | 498 | case CPU_UP_PREPARE: |
500 | /* Create a new workqueue thread for it. */ | 499 | /* Create a new workqueue thread for it. */ |
501 | list_for_each_entry(wq, &workqueues, list) { | 500 | list_for_each_entry(wq, &workqueues, list) { |
502 | if (create_workqueue_thread(wq, hotcpu) < 0) { | 501 | if (!create_workqueue_thread(wq, hotcpu)) { |
503 | printk("workqueue for %i failed\n", hotcpu); | 502 | printk("workqueue for %i failed\n", hotcpu); |
504 | return NOTIFY_BAD; | 503 | return NOTIFY_BAD; |
505 | } | 504 | } |