aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--drivers/infiniband/hw/ehca/ehca_irq.c250
-rw-r--r--drivers/infiniband/hw/ehca/ehca_irq.h6
-rw-r--r--include/linux/kthread.h11
-rw-r--r--include/linux/smpboot.h43
-rw-r--r--kernel/Makefile3
-rw-r--r--kernel/cpu.c10
-rw-r--r--kernel/kthread.c185
-rw-r--r--kernel/rcutree.c12
-rw-r--r--kernel/rcutree.h15
-rw-r--r--kernel/rcutree_plugin.h403
-rw-r--r--kernel/rcutree_trace.c3
-rw-r--r--kernel/smpboot.c233
-rw-r--r--kernel/smpboot.h4
-rw-r--r--kernel/softirq.c111
-rw-r--r--kernel/watchdog.c263
15 files changed, 751 insertions, 801 deletions
diff --git a/drivers/infiniband/hw/ehca/ehca_irq.c b/drivers/infiniband/hw/ehca/ehca_irq.c
index 53589000fd07..8615d7cf7e01 100644
--- a/drivers/infiniband/hw/ehca/ehca_irq.c
+++ b/drivers/infiniband/hw/ehca/ehca_irq.c
@@ -42,6 +42,7 @@
42 */ 42 */
43 43
44#include <linux/slab.h> 44#include <linux/slab.h>
45#include <linux/smpboot.h>
45 46
46#include "ehca_classes.h" 47#include "ehca_classes.h"
47#include "ehca_irq.h" 48#include "ehca_irq.h"
@@ -652,7 +653,7 @@ void ehca_tasklet_eq(unsigned long data)
652 ehca_process_eq((struct ehca_shca*)data, 1); 653 ehca_process_eq((struct ehca_shca*)data, 1);
653} 654}
654 655
655static inline int find_next_online_cpu(struct ehca_comp_pool *pool) 656static int find_next_online_cpu(struct ehca_comp_pool *pool)
656{ 657{
657 int cpu; 658 int cpu;
658 unsigned long flags; 659 unsigned long flags;
@@ -662,17 +663,20 @@ static inline int find_next_online_cpu(struct ehca_comp_pool *pool)
662 ehca_dmp(cpu_online_mask, cpumask_size(), ""); 663 ehca_dmp(cpu_online_mask, cpumask_size(), "");
663 664
664 spin_lock_irqsave(&pool->last_cpu_lock, flags); 665 spin_lock_irqsave(&pool->last_cpu_lock, flags);
665 cpu = cpumask_next(pool->last_cpu, cpu_online_mask); 666 do {
666 if (cpu >= nr_cpu_ids) 667 cpu = cpumask_next(pool->last_cpu, cpu_online_mask);
667 cpu = cpumask_first(cpu_online_mask); 668 if (cpu >= nr_cpu_ids)
668 pool->last_cpu = cpu; 669 cpu = cpumask_first(cpu_online_mask);
670 pool->last_cpu = cpu;
671 } while (!per_cpu_ptr(pool->cpu_comp_tasks, cpu)->active);
669 spin_unlock_irqrestore(&pool->last_cpu_lock, flags); 672 spin_unlock_irqrestore(&pool->last_cpu_lock, flags);
670 673
671 return cpu; 674 return cpu;
672} 675}
673 676
674static void __queue_comp_task(struct ehca_cq *__cq, 677static void __queue_comp_task(struct ehca_cq *__cq,
675 struct ehca_cpu_comp_task *cct) 678 struct ehca_cpu_comp_task *cct,
679 struct task_struct *thread)
676{ 680{
677 unsigned long flags; 681 unsigned long flags;
678 682
@@ -683,7 +687,7 @@ static void __queue_comp_task(struct ehca_cq *__cq,
683 __cq->nr_callbacks++; 687 __cq->nr_callbacks++;
684 list_add_tail(&__cq->entry, &cct->cq_list); 688 list_add_tail(&__cq->entry, &cct->cq_list);
685 cct->cq_jobs++; 689 cct->cq_jobs++;
686 wake_up(&cct->wait_queue); 690 wake_up_process(thread);
687 } else 691 } else
688 __cq->nr_callbacks++; 692 __cq->nr_callbacks++;
689 693
@@ -695,6 +699,7 @@ static void queue_comp_task(struct ehca_cq *__cq)
695{ 699{
696 int cpu_id; 700 int cpu_id;
697 struct ehca_cpu_comp_task *cct; 701 struct ehca_cpu_comp_task *cct;
702 struct task_struct *thread;
698 int cq_jobs; 703 int cq_jobs;
699 unsigned long flags; 704 unsigned long flags;
700 705
@@ -702,7 +707,8 @@ static void queue_comp_task(struct ehca_cq *__cq)
702 BUG_ON(!cpu_online(cpu_id)); 707 BUG_ON(!cpu_online(cpu_id));
703 708
704 cct = per_cpu_ptr(pool->cpu_comp_tasks, cpu_id); 709 cct = per_cpu_ptr(pool->cpu_comp_tasks, cpu_id);
705 BUG_ON(!cct); 710 thread = *per_cpu_ptr(pool->cpu_comp_threads, cpu_id);
711 BUG_ON(!cct || !thread);
706 712
707 spin_lock_irqsave(&cct->task_lock, flags); 713 spin_lock_irqsave(&cct->task_lock, flags);
708 cq_jobs = cct->cq_jobs; 714 cq_jobs = cct->cq_jobs;
@@ -710,28 +716,25 @@ static void queue_comp_task(struct ehca_cq *__cq)
710 if (cq_jobs > 0) { 716 if (cq_jobs > 0) {
711 cpu_id = find_next_online_cpu(pool); 717 cpu_id = find_next_online_cpu(pool);
712 cct = per_cpu_ptr(pool->cpu_comp_tasks, cpu_id); 718 cct = per_cpu_ptr(pool->cpu_comp_tasks, cpu_id);
713 BUG_ON(!cct); 719 thread = *per_cpu_ptr(pool->cpu_comp_threads, cpu_id);
720 BUG_ON(!cct || !thread);
714 } 721 }
715 722 __queue_comp_task(__cq, cct, thread);
716 __queue_comp_task(__cq, cct);
717} 723}
718 724
719static void run_comp_task(struct ehca_cpu_comp_task *cct) 725static void run_comp_task(struct ehca_cpu_comp_task *cct)
720{ 726{
721 struct ehca_cq *cq; 727 struct ehca_cq *cq;
722 unsigned long flags;
723
724 spin_lock_irqsave(&cct->task_lock, flags);
725 728
726 while (!list_empty(&cct->cq_list)) { 729 while (!list_empty(&cct->cq_list)) {
727 cq = list_entry(cct->cq_list.next, struct ehca_cq, entry); 730 cq = list_entry(cct->cq_list.next, struct ehca_cq, entry);
728 spin_unlock_irqrestore(&cct->task_lock, flags); 731 spin_unlock_irq(&cct->task_lock);
729 732
730 comp_event_callback(cq); 733 comp_event_callback(cq);
731 if (atomic_dec_and_test(&cq->nr_events)) 734 if (atomic_dec_and_test(&cq->nr_events))
732 wake_up(&cq->wait_completion); 735 wake_up(&cq->wait_completion);
733 736
734 spin_lock_irqsave(&cct->task_lock, flags); 737 spin_lock_irq(&cct->task_lock);
735 spin_lock(&cq->task_lock); 738 spin_lock(&cq->task_lock);
736 cq->nr_callbacks--; 739 cq->nr_callbacks--;
737 if (!cq->nr_callbacks) { 740 if (!cq->nr_callbacks) {
@@ -740,159 +743,76 @@ static void run_comp_task(struct ehca_cpu_comp_task *cct)
740 } 743 }
741 spin_unlock(&cq->task_lock); 744 spin_unlock(&cq->task_lock);
742 } 745 }
743
744 spin_unlock_irqrestore(&cct->task_lock, flags);
745} 746}
746 747
747static int comp_task(void *__cct) 748static void comp_task_park(unsigned int cpu)
748{ 749{
749 struct ehca_cpu_comp_task *cct = __cct; 750 struct ehca_cpu_comp_task *cct = per_cpu_ptr(pool->cpu_comp_tasks, cpu);
750 int cql_empty; 751 struct ehca_cpu_comp_task *target;
751 DECLARE_WAITQUEUE(wait, current); 752 struct task_struct *thread;
752 753 struct ehca_cq *cq, *tmp;
753 set_current_state(TASK_INTERRUPTIBLE); 754 LIST_HEAD(list);
754 while (!kthread_should_stop()) {
755 add_wait_queue(&cct->wait_queue, &wait);
756
757 spin_lock_irq(&cct->task_lock);
758 cql_empty = list_empty(&cct->cq_list);
759 spin_unlock_irq(&cct->task_lock);
760 if (cql_empty)
761 schedule();
762 else
763 __set_current_state(TASK_RUNNING);
764
765 remove_wait_queue(&cct->wait_queue, &wait);
766 755
767 spin_lock_irq(&cct->task_lock); 756 spin_lock_irq(&cct->task_lock);
768 cql_empty = list_empty(&cct->cq_list); 757 cct->cq_jobs = 0;
769 spin_unlock_irq(&cct->task_lock); 758 cct->active = 0;
770 if (!cql_empty) 759 list_splice_init(&cct->cq_list, &list);
771 run_comp_task(__cct); 760 spin_unlock_irq(&cct->task_lock);
772 761
773 set_current_state(TASK_INTERRUPTIBLE); 762 cpu = find_next_online_cpu(pool);
763 target = per_cpu_ptr(pool->cpu_comp_tasks, cpu);
764 thread = *per_cpu_ptr(pool->cpu_comp_threads, cpu);
765 spin_lock_irq(&target->task_lock);
766 list_for_each_entry_safe(cq, tmp, &list, entry) {
767 list_del(&cq->entry);
768 __queue_comp_task(cq, target, thread);
774 } 769 }
775 __set_current_state(TASK_RUNNING); 770 spin_unlock_irq(&target->task_lock);
776
777 return 0;
778}
779
780static struct task_struct *create_comp_task(struct ehca_comp_pool *pool,
781 int cpu)
782{
783 struct ehca_cpu_comp_task *cct;
784
785 cct = per_cpu_ptr(pool->cpu_comp_tasks, cpu);
786 spin_lock_init(&cct->task_lock);
787 INIT_LIST_HEAD(&cct->cq_list);
788 init_waitqueue_head(&cct->wait_queue);
789 cct->task = kthread_create_on_node(comp_task, cct, cpu_to_node(cpu),
790 "ehca_comp/%d", cpu);
791
792 return cct->task;
793} 771}
794 772
795static void destroy_comp_task(struct ehca_comp_pool *pool, 773static void comp_task_stop(unsigned int cpu, bool online)
796 int cpu)
797{ 774{
798 struct ehca_cpu_comp_task *cct; 775 struct ehca_cpu_comp_task *cct = per_cpu_ptr(pool->cpu_comp_tasks, cpu);
799 struct task_struct *task;
800 unsigned long flags_cct;
801
802 cct = per_cpu_ptr(pool->cpu_comp_tasks, cpu);
803
804 spin_lock_irqsave(&cct->task_lock, flags_cct);
805 776
806 task = cct->task; 777 spin_lock_irq(&cct->task_lock);
807 cct->task = NULL;
808 cct->cq_jobs = 0; 778 cct->cq_jobs = 0;
809 779 cct->active = 0;
810 spin_unlock_irqrestore(&cct->task_lock, flags_cct); 780 WARN_ON(!list_empty(&cct->cq_list));
811 781 spin_unlock_irq(&cct->task_lock);
812 if (task)
813 kthread_stop(task);
814} 782}
815 783
816static void __cpuinit take_over_work(struct ehca_comp_pool *pool, int cpu) 784static int comp_task_should_run(unsigned int cpu)
817{ 785{
818 struct ehca_cpu_comp_task *cct = per_cpu_ptr(pool->cpu_comp_tasks, cpu); 786 struct ehca_cpu_comp_task *cct = per_cpu_ptr(pool->cpu_comp_tasks, cpu);
819 LIST_HEAD(list);
820 struct ehca_cq *cq;
821 unsigned long flags_cct;
822
823 spin_lock_irqsave(&cct->task_lock, flags_cct);
824
825 list_splice_init(&cct->cq_list, &list);
826
827 while (!list_empty(&list)) {
828 cq = list_entry(cct->cq_list.next, struct ehca_cq, entry);
829
830 list_del(&cq->entry);
831 __queue_comp_task(cq, this_cpu_ptr(pool->cpu_comp_tasks));
832 }
833
834 spin_unlock_irqrestore(&cct->task_lock, flags_cct);
835 787
788 return cct->cq_jobs;
836} 789}
837 790
838static int __cpuinit comp_pool_callback(struct notifier_block *nfb, 791static void comp_task(unsigned int cpu)
839 unsigned long action,
840 void *hcpu)
841{ 792{
842 unsigned int cpu = (unsigned long)hcpu; 793 struct ehca_cpu_comp_task *cct = this_cpu_ptr(pool->cpu_comp_tasks);
843 struct ehca_cpu_comp_task *cct; 794 int cql_empty;
844 795
845 switch (action) { 796 spin_lock_irq(&cct->task_lock);
846 case CPU_UP_PREPARE: 797 cql_empty = list_empty(&cct->cq_list);
847 case CPU_UP_PREPARE_FROZEN: 798 if (!cql_empty) {
848 ehca_gen_dbg("CPU: %x (CPU_PREPARE)", cpu); 799 __set_current_state(TASK_RUNNING);
849 if (!create_comp_task(pool, cpu)) { 800 run_comp_task(cct);
850 ehca_gen_err("Can't create comp_task for cpu: %x", cpu);
851 return notifier_from_errno(-ENOMEM);
852 }
853 break;
854 case CPU_UP_CANCELED:
855 case CPU_UP_CANCELED_FROZEN:
856 ehca_gen_dbg("CPU: %x (CPU_CANCELED)", cpu);
857 cct = per_cpu_ptr(pool->cpu_comp_tasks, cpu);
858 kthread_bind(cct->task, cpumask_any(cpu_online_mask));
859 destroy_comp_task(pool, cpu);
860 break;
861 case CPU_ONLINE:
862 case CPU_ONLINE_FROZEN:
863 ehca_gen_dbg("CPU: %x (CPU_ONLINE)", cpu);
864 cct = per_cpu_ptr(pool->cpu_comp_tasks, cpu);
865 kthread_bind(cct->task, cpu);
866 wake_up_process(cct->task);
867 break;
868 case CPU_DOWN_PREPARE:
869 case CPU_DOWN_PREPARE_FROZEN:
870 ehca_gen_dbg("CPU: %x (CPU_DOWN_PREPARE)", cpu);
871 break;
872 case CPU_DOWN_FAILED:
873 case CPU_DOWN_FAILED_FROZEN:
874 ehca_gen_dbg("CPU: %x (CPU_DOWN_FAILED)", cpu);
875 break;
876 case CPU_DEAD:
877 case CPU_DEAD_FROZEN:
878 ehca_gen_dbg("CPU: %x (CPU_DEAD)", cpu);
879 destroy_comp_task(pool, cpu);
880 take_over_work(pool, cpu);
881 break;
882 } 801 }
883 802 spin_unlock_irq(&cct->task_lock);
884 return NOTIFY_OK;
885} 803}
886 804
887static struct notifier_block comp_pool_callback_nb __cpuinitdata = { 805static struct smp_hotplug_thread comp_pool_threads = {
888 .notifier_call = comp_pool_callback, 806 .thread_should_run = comp_task_should_run,
889 .priority = 0, 807 .thread_fn = comp_task,
808 .thread_comm = "ehca_comp/%u",
809 .cleanup = comp_task_stop,
810 .park = comp_task_park,
890}; 811};
891 812
892int ehca_create_comp_pool(void) 813int ehca_create_comp_pool(void)
893{ 814{
894 int cpu; 815 int cpu, ret = -ENOMEM;
895 struct task_struct *task;
896 816
897 if (!ehca_scaling_code) 817 if (!ehca_scaling_code)
898 return 0; 818 return 0;
@@ -905,38 +825,46 @@ int ehca_create_comp_pool(void)
905 pool->last_cpu = cpumask_any(cpu_online_mask); 825 pool->last_cpu = cpumask_any(cpu_online_mask);
906 826
907 pool->cpu_comp_tasks = alloc_percpu(struct ehca_cpu_comp_task); 827 pool->cpu_comp_tasks = alloc_percpu(struct ehca_cpu_comp_task);
908 if (pool->cpu_comp_tasks == NULL) { 828 if (!pool->cpu_comp_tasks)
909 kfree(pool); 829 goto out_pool;
910 return -EINVAL;
911 }
912 830
913 for_each_online_cpu(cpu) { 831 pool->cpu_comp_threads = alloc_percpu(struct task_struct *);
914 task = create_comp_task(pool, cpu); 832 if (!pool->cpu_comp_threads)
915 if (task) { 833 goto out_tasks;
916 kthread_bind(task, cpu); 834
917 wake_up_process(task); 835 for_each_present_cpu(cpu) {
918 } 836 struct ehca_cpu_comp_task *cct;
837
838 cct = per_cpu_ptr(pool->cpu_comp_tasks, cpu);
839 spin_lock_init(&cct->task_lock);
840 INIT_LIST_HEAD(&cct->cq_list);
919 } 841 }
920 842
921 register_hotcpu_notifier(&comp_pool_callback_nb); 843 comp_pool_threads.store = pool->cpu_comp_threads;
844 ret = smpboot_register_percpu_thread(&comp_pool_threads);
845 if (ret)
846 goto out_threads;
922 847
923 printk(KERN_INFO "eHCA scaling code enabled\n"); 848 pr_info("eHCA scaling code enabled\n");
849 return ret;
924 850
925 return 0; 851out_threads:
852 free_percpu(pool->cpu_comp_threads);
853out_tasks:
854 free_percpu(pool->cpu_comp_tasks);
855out_pool:
856 kfree(pool);
857 return ret;
926} 858}
927 859
928void ehca_destroy_comp_pool(void) 860void ehca_destroy_comp_pool(void)
929{ 861{
930 int i;
931
932 if (!ehca_scaling_code) 862 if (!ehca_scaling_code)
933 return; 863 return;
934 864
935 unregister_hotcpu_notifier(&comp_pool_callback_nb); 865 smpboot_unregister_percpu_thread(&comp_pool_threads);
936
937 for_each_online_cpu(i)
938 destroy_comp_task(pool, i);
939 866
867 free_percpu(pool->cpu_comp_threads);
940 free_percpu(pool->cpu_comp_tasks); 868 free_percpu(pool->cpu_comp_tasks);
941 kfree(pool); 869 kfree(pool);
942} 870}
diff --git a/drivers/infiniband/hw/ehca/ehca_irq.h b/drivers/infiniband/hw/ehca/ehca_irq.h
index 3346cb06cea6..5370199f08c7 100644
--- a/drivers/infiniband/hw/ehca/ehca_irq.h
+++ b/drivers/infiniband/hw/ehca/ehca_irq.h
@@ -58,15 +58,15 @@ void ehca_tasklet_eq(unsigned long data);
58void ehca_process_eq(struct ehca_shca *shca, int is_irq); 58void ehca_process_eq(struct ehca_shca *shca, int is_irq);
59 59
60struct ehca_cpu_comp_task { 60struct ehca_cpu_comp_task {
61 wait_queue_head_t wait_queue;
62 struct list_head cq_list; 61 struct list_head cq_list;
63 struct task_struct *task;
64 spinlock_t task_lock; 62 spinlock_t task_lock;
65 int cq_jobs; 63 int cq_jobs;
64 int active;
66}; 65};
67 66
68struct ehca_comp_pool { 67struct ehca_comp_pool {
69 struct ehca_cpu_comp_task *cpu_comp_tasks; 68 struct ehca_cpu_comp_task __percpu *cpu_comp_tasks;
69 struct task_struct * __percpu *cpu_comp_threads;
70 int last_cpu; 70 int last_cpu;
71 spinlock_t last_cpu_lock; 71 spinlock_t last_cpu_lock;
72}; 72};
diff --git a/include/linux/kthread.h b/include/linux/kthread.h
index 22ccf9dee177..8d816646f766 100644
--- a/include/linux/kthread.h
+++ b/include/linux/kthread.h
@@ -14,6 +14,11 @@ struct task_struct *kthread_create_on_node(int (*threadfn)(void *data),
14 kthread_create_on_node(threadfn, data, -1, namefmt, ##arg) 14 kthread_create_on_node(threadfn, data, -1, namefmt, ##arg)
15 15
16 16
17struct task_struct *kthread_create_on_cpu(int (*threadfn)(void *data),
18 void *data,
19 unsigned int cpu,
20 const char *namefmt);
21
17/** 22/**
18 * kthread_run - create and wake a thread. 23 * kthread_run - create and wake a thread.
19 * @threadfn: the function to run until signal_pending(current). 24 * @threadfn: the function to run until signal_pending(current).
@@ -34,9 +39,13 @@ struct task_struct *kthread_create_on_node(int (*threadfn)(void *data),
34 39
35void kthread_bind(struct task_struct *k, unsigned int cpu); 40void kthread_bind(struct task_struct *k, unsigned int cpu);
36int kthread_stop(struct task_struct *k); 41int kthread_stop(struct task_struct *k);
37int kthread_should_stop(void); 42bool kthread_should_stop(void);
43bool kthread_should_park(void);
38bool kthread_freezable_should_stop(bool *was_frozen); 44bool kthread_freezable_should_stop(bool *was_frozen);
39void *kthread_data(struct task_struct *k); 45void *kthread_data(struct task_struct *k);
46int kthread_park(struct task_struct *k);
47void kthread_unpark(struct task_struct *k);
48void kthread_parkme(void);
40 49
41int kthreadd(void *unused); 50int kthreadd(void *unused);
42extern struct task_struct *kthreadd_task; 51extern struct task_struct *kthreadd_task;
diff --git a/include/linux/smpboot.h b/include/linux/smpboot.h
new file mode 100644
index 000000000000..e0106d8581d3
--- /dev/null
+++ b/include/linux/smpboot.h
@@ -0,0 +1,43 @@
1#ifndef _LINUX_SMPBOOT_H
2#define _LINUX_SMPBOOT_H
3
4#include <linux/types.h>
5
6struct task_struct;
7/* Cookie handed to the thread_fn*/
8struct smpboot_thread_data;
9
10/**
11 * struct smp_hotplug_thread - CPU hotplug related thread descriptor
12 * @store: Pointer to per cpu storage for the task pointers
13 * @list: List head for core management
14 * @thread_should_run: Check whether the thread should run or not. Called with
15 * preemption disabled.
16 * @thread_fn: The associated thread function
17 * @setup: Optional setup function, called when the thread gets
18 * operational the first time
19 * @cleanup: Optional cleanup function, called when the thread
20 * should stop (module exit)
21 * @park: Optional park function, called when the thread is
22 * parked (cpu offline)
23 * @unpark: Optional unpark function, called when the thread is
24 * unparked (cpu online)
25 * @thread_comm: The base name of the thread
26 */
27struct smp_hotplug_thread {
28 struct task_struct __percpu **store;
29 struct list_head list;
30 int (*thread_should_run)(unsigned int cpu);
31 void (*thread_fn)(unsigned int cpu);
32 void (*setup)(unsigned int cpu);
33 void (*cleanup)(unsigned int cpu, bool online);
34 void (*park)(unsigned int cpu);
35 void (*unpark)(unsigned int cpu);
36 const char *thread_comm;
37};
38
39int smpboot_register_percpu_thread(struct smp_hotplug_thread *plug_thread);
40void smpboot_unregister_percpu_thread(struct smp_hotplug_thread *plug_thread);
41int smpboot_thread_schedule(void);
42
43#endif
diff --git a/kernel/Makefile b/kernel/Makefile
index c0cc67ad764c..e5602d32acb3 100644
--- a/kernel/Makefile
+++ b/kernel/Makefile
@@ -10,7 +10,7 @@ obj-y = fork.o exec_domain.o panic.o printk.o \
10 kthread.o wait.o kfifo.o sys_ni.o posix-cpu-timers.o mutex.o \ 10 kthread.o wait.o kfifo.o sys_ni.o posix-cpu-timers.o mutex.o \
11 hrtimer.o rwsem.o nsproxy.o srcu.o semaphore.o \ 11 hrtimer.o rwsem.o nsproxy.o srcu.o semaphore.o \
12 notifier.o ksysfs.o cred.o \ 12 notifier.o ksysfs.o cred.o \
13 async.o range.o groups.o lglock.o 13 async.o range.o groups.o lglock.o smpboot.o
14 14
15ifdef CONFIG_FUNCTION_TRACER 15ifdef CONFIG_FUNCTION_TRACER
16# Do not trace debug files and internal ftrace files 16# Do not trace debug files and internal ftrace files
@@ -46,7 +46,6 @@ obj-$(CONFIG_DEBUG_RT_MUTEXES) += rtmutex-debug.o
46obj-$(CONFIG_RT_MUTEX_TESTER) += rtmutex-tester.o 46obj-$(CONFIG_RT_MUTEX_TESTER) += rtmutex-tester.o
47obj-$(CONFIG_GENERIC_ISA_DMA) += dma.o 47obj-$(CONFIG_GENERIC_ISA_DMA) += dma.o
48obj-$(CONFIG_SMP) += smp.o 48obj-$(CONFIG_SMP) += smp.o
49obj-$(CONFIG_SMP) += smpboot.o
50ifneq ($(CONFIG_SMP),y) 49ifneq ($(CONFIG_SMP),y)
51obj-y += up.o 50obj-y += up.o
52endif 51endif
diff --git a/kernel/cpu.c b/kernel/cpu.c
index 14d32588cccd..e615dfbcf794 100644
--- a/kernel/cpu.c
+++ b/kernel/cpu.c
@@ -280,12 +280,13 @@ static int __ref _cpu_down(unsigned int cpu, int tasks_frozen)
280 __func__, cpu); 280 __func__, cpu);
281 goto out_release; 281 goto out_release;
282 } 282 }
283 smpboot_park_threads(cpu);
283 284
284 err = __stop_machine(take_cpu_down, &tcd_param, cpumask_of(cpu)); 285 err = __stop_machine(take_cpu_down, &tcd_param, cpumask_of(cpu));
285 if (err) { 286 if (err) {
286 /* CPU didn't die: tell everyone. Can't complain. */ 287 /* CPU didn't die: tell everyone. Can't complain. */
288 smpboot_unpark_threads(cpu);
287 cpu_notify_nofail(CPU_DOWN_FAILED | mod, hcpu); 289 cpu_notify_nofail(CPU_DOWN_FAILED | mod, hcpu);
288
289 goto out_release; 290 goto out_release;
290 } 291 }
291 BUG_ON(cpu_online(cpu)); 292 BUG_ON(cpu_online(cpu));
@@ -354,6 +355,10 @@ static int __cpuinit _cpu_up(unsigned int cpu, int tasks_frozen)
354 goto out; 355 goto out;
355 } 356 }
356 357
358 ret = smpboot_create_threads(cpu);
359 if (ret)
360 goto out;
361
357 ret = __cpu_notify(CPU_UP_PREPARE | mod, hcpu, -1, &nr_calls); 362 ret = __cpu_notify(CPU_UP_PREPARE | mod, hcpu, -1, &nr_calls);
358 if (ret) { 363 if (ret) {
359 nr_calls--; 364 nr_calls--;
@@ -368,6 +373,9 @@ static int __cpuinit _cpu_up(unsigned int cpu, int tasks_frozen)
368 goto out_notify; 373 goto out_notify;
369 BUG_ON(!cpu_online(cpu)); 374 BUG_ON(!cpu_online(cpu));
370 375
376 /* Wake the per cpu threads */
377 smpboot_unpark_threads(cpu);
378
371 /* Now call notifier in preparation. */ 379 /* Now call notifier in preparation. */
372 cpu_notify(CPU_ONLINE | mod, hcpu); 380 cpu_notify(CPU_ONLINE | mod, hcpu);
373 381
diff --git a/kernel/kthread.c b/kernel/kthread.c
index b579af57ea10..146a6fa96825 100644
--- a/kernel/kthread.c
+++ b/kernel/kthread.c
@@ -37,11 +37,20 @@ struct kthread_create_info
37}; 37};
38 38
39struct kthread { 39struct kthread {
40 int should_stop; 40 unsigned long flags;
41 unsigned int cpu;
41 void *data; 42 void *data;
43 struct completion parked;
42 struct completion exited; 44 struct completion exited;
43}; 45};
44 46
47enum KTHREAD_BITS {
48 KTHREAD_IS_PER_CPU = 0,
49 KTHREAD_SHOULD_STOP,
50 KTHREAD_SHOULD_PARK,
51 KTHREAD_IS_PARKED,
52};
53
45#define to_kthread(tsk) \ 54#define to_kthread(tsk) \
46 container_of((tsk)->vfork_done, struct kthread, exited) 55 container_of((tsk)->vfork_done, struct kthread, exited)
47 56
@@ -52,13 +61,29 @@ struct kthread {
52 * and this will return true. You should then return, and your return 61 * and this will return true. You should then return, and your return
53 * value will be passed through to kthread_stop(). 62 * value will be passed through to kthread_stop().
54 */ 63 */
55int kthread_should_stop(void) 64bool kthread_should_stop(void)
56{ 65{
57 return to_kthread(current)->should_stop; 66 return test_bit(KTHREAD_SHOULD_STOP, &to_kthread(current)->flags);
58} 67}
59EXPORT_SYMBOL(kthread_should_stop); 68EXPORT_SYMBOL(kthread_should_stop);
60 69
61/** 70/**
71 * kthread_should_park - should this kthread park now?
72 *
73 * When someone calls kthread_park() on your kthread, it will be woken
74 * and this will return true. You should then do the necessary
75 * cleanup and call kthread_parkme()
76 *
77 * Similar to kthread_should_stop(), but this keeps the thread alive
78 * and in a park position. kthread_unpark() "restarts" the thread and
79 * calls the thread function again.
80 */
81bool kthread_should_park(void)
82{
83 return test_bit(KTHREAD_SHOULD_PARK, &to_kthread(current)->flags);
84}
85
86/**
62 * kthread_freezable_should_stop - should this freezable kthread return now? 87 * kthread_freezable_should_stop - should this freezable kthread return now?
63 * @was_frozen: optional out parameter, indicates whether %current was frozen 88 * @was_frozen: optional out parameter, indicates whether %current was frozen
64 * 89 *
@@ -96,6 +121,24 @@ void *kthread_data(struct task_struct *task)
96 return to_kthread(task)->data; 121 return to_kthread(task)->data;
97} 122}
98 123
124static void __kthread_parkme(struct kthread *self)
125{
126 __set_current_state(TASK_INTERRUPTIBLE);
127 while (test_bit(KTHREAD_SHOULD_PARK, &self->flags)) {
128 if (!test_and_set_bit(KTHREAD_IS_PARKED, &self->flags))
129 complete(&self->parked);
130 schedule();
131 __set_current_state(TASK_INTERRUPTIBLE);
132 }
133 clear_bit(KTHREAD_IS_PARKED, &self->flags);
134 __set_current_state(TASK_RUNNING);
135}
136
137void kthread_parkme(void)
138{
139 __kthread_parkme(to_kthread(current));
140}
141
99static int kthread(void *_create) 142static int kthread(void *_create)
100{ 143{
101 /* Copy data: it's on kthread's stack */ 144 /* Copy data: it's on kthread's stack */
@@ -105,9 +148,10 @@ static int kthread(void *_create)
105 struct kthread self; 148 struct kthread self;
106 int ret; 149 int ret;
107 150
108 self.should_stop = 0; 151 self.flags = 0;
109 self.data = data; 152 self.data = data;
110 init_completion(&self.exited); 153 init_completion(&self.exited);
154 init_completion(&self.parked);
111 current->vfork_done = &self.exited; 155 current->vfork_done = &self.exited;
112 156
113 /* OK, tell user we're spawned, wait for stop or wakeup */ 157 /* OK, tell user we're spawned, wait for stop or wakeup */
@@ -117,9 +161,11 @@ static int kthread(void *_create)
117 schedule(); 161 schedule();
118 162
119 ret = -EINTR; 163 ret = -EINTR;
120 if (!self.should_stop)
121 ret = threadfn(data);
122 164
165 if (!test_bit(KTHREAD_SHOULD_STOP, &self.flags)) {
166 __kthread_parkme(&self);
167 ret = threadfn(data);
168 }
123 /* we can't just return, we must preserve "self" on stack */ 169 /* we can't just return, we must preserve "self" on stack */
124 do_exit(ret); 170 do_exit(ret);
125} 171}
@@ -172,8 +218,7 @@ static void create_kthread(struct kthread_create_info *create)
172 * Returns a task_struct or ERR_PTR(-ENOMEM). 218 * Returns a task_struct or ERR_PTR(-ENOMEM).
173 */ 219 */
174struct task_struct *kthread_create_on_node(int (*threadfn)(void *data), 220struct task_struct *kthread_create_on_node(int (*threadfn)(void *data),
175 void *data, 221 void *data, int node,
176 int node,
177 const char namefmt[], 222 const char namefmt[],
178 ...) 223 ...)
179{ 224{
@@ -210,6 +255,13 @@ struct task_struct *kthread_create_on_node(int (*threadfn)(void *data),
210} 255}
211EXPORT_SYMBOL(kthread_create_on_node); 256EXPORT_SYMBOL(kthread_create_on_node);
212 257
258static void __kthread_bind(struct task_struct *p, unsigned int cpu)
259{
260 /* It's safe because the task is inactive. */
261 do_set_cpus_allowed(p, cpumask_of(cpu));
262 p->flags |= PF_THREAD_BOUND;
263}
264
213/** 265/**
214 * kthread_bind - bind a just-created kthread to a cpu. 266 * kthread_bind - bind a just-created kthread to a cpu.
215 * @p: thread created by kthread_create(). 267 * @p: thread created by kthread_create().
@@ -226,14 +278,112 @@ void kthread_bind(struct task_struct *p, unsigned int cpu)
226 WARN_ON(1); 278 WARN_ON(1);
227 return; 279 return;
228 } 280 }
229 281 __kthread_bind(p, cpu);
230 /* It's safe because the task is inactive. */
231 do_set_cpus_allowed(p, cpumask_of(cpu));
232 p->flags |= PF_THREAD_BOUND;
233} 282}
234EXPORT_SYMBOL(kthread_bind); 283EXPORT_SYMBOL(kthread_bind);
235 284
236/** 285/**
286 * kthread_create_on_cpu - Create a cpu bound kthread
287 * @threadfn: the function to run until signal_pending(current).
288 * @data: data ptr for @threadfn.
289 * @cpu: The cpu on which the thread should be bound,
290 * @namefmt: printf-style name for the thread. Format is restricted
291 * to "name.*%u". Code fills in cpu number.
292 *
293 * Description: This helper function creates and names a kernel thread
294 * The thread will be woken and put into park mode.
295 */
296struct task_struct *kthread_create_on_cpu(int (*threadfn)(void *data),
297 void *data, unsigned int cpu,
298 const char *namefmt)
299{
300 struct task_struct *p;
301
302 p = kthread_create_on_node(threadfn, data, cpu_to_node(cpu), namefmt,
303 cpu);
304 if (IS_ERR(p))
305 return p;
306 set_bit(KTHREAD_IS_PER_CPU, &to_kthread(p)->flags);
307 to_kthread(p)->cpu = cpu;
308 /* Park the thread to get it out of TASK_UNINTERRUPTIBLE state */
309 kthread_park(p);
310 return p;
311}
312
313static struct kthread *task_get_live_kthread(struct task_struct *k)
314{
315 struct kthread *kthread;
316
317 get_task_struct(k);
318 kthread = to_kthread(k);
319 /* It might have exited */
320 barrier();
321 if (k->vfork_done != NULL)
322 return kthread;
323 return NULL;
324}
325
326/**
327 * kthread_unpark - unpark a thread created by kthread_create().
328 * @k: thread created by kthread_create().
329 *
330 * Sets kthread_should_park() for @k to return false, wakes it, and
331 * waits for it to return. If the thread is marked percpu then its
332 * bound to the cpu again.
333 */
334void kthread_unpark(struct task_struct *k)
335{
336 struct kthread *kthread = task_get_live_kthread(k);
337
338 if (kthread) {
339 clear_bit(KTHREAD_SHOULD_PARK, &kthread->flags);
340 /*
341 * We clear the IS_PARKED bit here as we don't wait
342 * until the task has left the park code. So if we'd
343 * park before that happens we'd see the IS_PARKED bit
344 * which might be about to be cleared.
345 */
346 if (test_and_clear_bit(KTHREAD_IS_PARKED, &kthread->flags)) {
347 if (test_bit(KTHREAD_IS_PER_CPU, &kthread->flags))
348 __kthread_bind(k, kthread->cpu);
349 wake_up_process(k);
350 }
351 }
352 put_task_struct(k);
353}
354
355/**
356 * kthread_park - park a thread created by kthread_create().
357 * @k: thread created by kthread_create().
358 *
359 * Sets kthread_should_park() for @k to return true, wakes it, and
360 * waits for it to return. This can also be called after kthread_create()
361 * instead of calling wake_up_process(): the thread will park without
362 * calling threadfn().
363 *
364 * Returns 0 if the thread is parked, -ENOSYS if the thread exited.
365 * If called by the kthread itself just the park bit is set.
366 */
367int kthread_park(struct task_struct *k)
368{
369 struct kthread *kthread = task_get_live_kthread(k);
370 int ret = -ENOSYS;
371
372 if (kthread) {
373 if (!test_bit(KTHREAD_IS_PARKED, &kthread->flags)) {
374 set_bit(KTHREAD_SHOULD_PARK, &kthread->flags);
375 if (k != current) {
376 wake_up_process(k);
377 wait_for_completion(&kthread->parked);
378 }
379 }
380 ret = 0;
381 }
382 put_task_struct(k);
383 return ret;
384}
385
386/**
237 * kthread_stop - stop a thread created by kthread_create(). 387 * kthread_stop - stop a thread created by kthread_create().
238 * @k: thread created by kthread_create(). 388 * @k: thread created by kthread_create().
239 * 389 *
@@ -250,16 +400,13 @@ EXPORT_SYMBOL(kthread_bind);
250 */ 400 */
251int kthread_stop(struct task_struct *k) 401int kthread_stop(struct task_struct *k)
252{ 402{
253 struct kthread *kthread; 403 struct kthread *kthread = task_get_live_kthread(k);
254 int ret; 404 int ret;
255 405
256 trace_sched_kthread_stop(k); 406 trace_sched_kthread_stop(k);
257 get_task_struct(k); 407 if (kthread) {
258 408 set_bit(KTHREAD_SHOULD_STOP, &kthread->flags);
259 kthread = to_kthread(k); 409 clear_bit(KTHREAD_SHOULD_PARK, &kthread->flags);
260 barrier(); /* it might have exited */
261 if (k->vfork_done != NULL) {
262 kthread->should_stop = 1;
263 wake_up_process(k); 410 wake_up_process(k);
264 wait_for_completion(&kthread->exited); 411 wait_for_completion(&kthread->exited);
265 } 412 }
diff --git a/kernel/rcutree.c b/kernel/rcutree.c
index 80fd02e5d115..7387e46009d9 100644
--- a/kernel/rcutree.c
+++ b/kernel/rcutree.c
@@ -134,13 +134,12 @@ static int rcu_scheduler_fully_active __read_mostly;
134 */ 134 */
135static DEFINE_PER_CPU(struct task_struct *, rcu_cpu_kthread_task); 135static DEFINE_PER_CPU(struct task_struct *, rcu_cpu_kthread_task);
136DEFINE_PER_CPU(unsigned int, rcu_cpu_kthread_status); 136DEFINE_PER_CPU(unsigned int, rcu_cpu_kthread_status);
137DEFINE_PER_CPU(int, rcu_cpu_kthread_cpu);
138DEFINE_PER_CPU(unsigned int, rcu_cpu_kthread_loops); 137DEFINE_PER_CPU(unsigned int, rcu_cpu_kthread_loops);
139DEFINE_PER_CPU(char, rcu_cpu_has_work); 138DEFINE_PER_CPU(char, rcu_cpu_has_work);
140 139
141#endif /* #ifdef CONFIG_RCU_BOOST */ 140#endif /* #ifdef CONFIG_RCU_BOOST */
142 141
143static void rcu_node_kthread_setaffinity(struct rcu_node *rnp, int outgoingcpu); 142static void rcu_boost_kthread_setaffinity(struct rcu_node *rnp, int outgoingcpu);
144static void invoke_rcu_core(void); 143static void invoke_rcu_core(void);
145static void invoke_rcu_callbacks(struct rcu_state *rsp, struct rcu_data *rdp); 144static void invoke_rcu_callbacks(struct rcu_state *rsp, struct rcu_data *rdp);
146 145
@@ -1543,8 +1542,7 @@ static void rcu_cleanup_dead_cpu(int cpu, struct rcu_state *rsp)
1543 struct rcu_node *rnp = rdp->mynode; /* Outgoing CPU's rdp & rnp. */ 1542 struct rcu_node *rnp = rdp->mynode; /* Outgoing CPU's rdp & rnp. */
1544 1543
1545 /* Adjust any no-longer-needed kthreads. */ 1544 /* Adjust any no-longer-needed kthreads. */
1546 rcu_stop_cpu_kthread(cpu); 1545 rcu_boost_kthread_setaffinity(rnp, -1);
1547 rcu_node_kthread_setaffinity(rnp, -1);
1548 1546
1549 /* Remove the dead CPU from the bitmasks in the rcu_node hierarchy. */ 1547 /* Remove the dead CPU from the bitmasks in the rcu_node hierarchy. */
1550 1548
@@ -2572,12 +2570,10 @@ static int __cpuinit rcu_cpu_notify(struct notifier_block *self,
2572 break; 2570 break;
2573 case CPU_ONLINE: 2571 case CPU_ONLINE:
2574 case CPU_DOWN_FAILED: 2572 case CPU_DOWN_FAILED:
2575 rcu_node_kthread_setaffinity(rnp, -1); 2573 rcu_boost_kthread_setaffinity(rnp, -1);
2576 rcu_cpu_kthread_setrt(cpu, 1);
2577 break; 2574 break;
2578 case CPU_DOWN_PREPARE: 2575 case CPU_DOWN_PREPARE:
2579 rcu_node_kthread_setaffinity(rnp, cpu); 2576 rcu_boost_kthread_setaffinity(rnp, cpu);
2580 rcu_cpu_kthread_setrt(cpu, 0);
2581 break; 2577 break;
2582 case CPU_DYING: 2578 case CPU_DYING:
2583 case CPU_DYING_FROZEN: 2579 case CPU_DYING_FROZEN:
diff --git a/kernel/rcutree.h b/kernel/rcutree.h
index 3cf71ce206e0..7576fd4d8ce6 100644
--- a/kernel/rcutree.h
+++ b/kernel/rcutree.h
@@ -196,12 +196,6 @@ struct rcu_node {
196 /* Refused to boost: not sure why, though. */ 196 /* Refused to boost: not sure why, though. */
197 /* This can happen due to race conditions. */ 197 /* This can happen due to race conditions. */
198#endif /* #ifdef CONFIG_RCU_BOOST */ 198#endif /* #ifdef CONFIG_RCU_BOOST */
199 struct task_struct *node_kthread_task;
200 /* kthread that takes care of this rcu_node */
201 /* structure, for example, awakening the */
202 /* per-CPU kthreads as needed. */
203 unsigned int node_kthread_status;
204 /* State of node_kthread_task for tracing. */
205 raw_spinlock_t fqslock ____cacheline_internodealigned_in_smp; 199 raw_spinlock_t fqslock ____cacheline_internodealigned_in_smp;
206} ____cacheline_internodealigned_in_smp; 200} ____cacheline_internodealigned_in_smp;
207 201
@@ -465,7 +459,6 @@ static int rcu_preempt_blocked_readers_cgp(struct rcu_node *rnp);
465#ifdef CONFIG_HOTPLUG_CPU 459#ifdef CONFIG_HOTPLUG_CPU
466static void rcu_report_unblock_qs_rnp(struct rcu_node *rnp, 460static void rcu_report_unblock_qs_rnp(struct rcu_node *rnp,
467 unsigned long flags); 461 unsigned long flags);
468static void rcu_stop_cpu_kthread(int cpu);
469#endif /* #ifdef CONFIG_HOTPLUG_CPU */ 462#endif /* #ifdef CONFIG_HOTPLUG_CPU */
470static void rcu_print_detail_task_stall(struct rcu_state *rsp); 463static void rcu_print_detail_task_stall(struct rcu_state *rsp);
471static int rcu_print_task_stall(struct rcu_node *rnp); 464static int rcu_print_task_stall(struct rcu_node *rnp);
@@ -488,15 +481,9 @@ static void invoke_rcu_callbacks_kthread(void);
488static bool rcu_is_callbacks_kthread(void); 481static bool rcu_is_callbacks_kthread(void);
489#ifdef CONFIG_RCU_BOOST 482#ifdef CONFIG_RCU_BOOST
490static void rcu_preempt_do_callbacks(void); 483static void rcu_preempt_do_callbacks(void);
491static void rcu_boost_kthread_setaffinity(struct rcu_node *rnp,
492 cpumask_var_t cm);
493static int __cpuinit rcu_spawn_one_boost_kthread(struct rcu_state *rsp, 484static int __cpuinit rcu_spawn_one_boost_kthread(struct rcu_state *rsp,
494 struct rcu_node *rnp, 485 struct rcu_node *rnp);
495 int rnp_index);
496static void invoke_rcu_node_kthread(struct rcu_node *rnp);
497static void rcu_yield(void (*f)(unsigned long), unsigned long arg);
498#endif /* #ifdef CONFIG_RCU_BOOST */ 486#endif /* #ifdef CONFIG_RCU_BOOST */
499static void rcu_cpu_kthread_setrt(int cpu, int to_rt);
500static void __cpuinit rcu_prepare_kthreads(int cpu); 487static void __cpuinit rcu_prepare_kthreads(int cpu);
501static void rcu_prepare_for_idle_init(int cpu); 488static void rcu_prepare_for_idle_init(int cpu);
502static void rcu_cleanup_after_idle(int cpu); 489static void rcu_cleanup_after_idle(int cpu);
diff --git a/kernel/rcutree_plugin.h b/kernel/rcutree_plugin.h
index 46d7d6cf16db..9c71c1b18e03 100644
--- a/kernel/rcutree_plugin.h
+++ b/kernel/rcutree_plugin.h
@@ -26,6 +26,7 @@
26 26
27#include <linux/delay.h> 27#include <linux/delay.h>
28#include <linux/oom.h> 28#include <linux/oom.h>
29#include <linux/smpboot.h>
29 30
30#define RCU_KTHREAD_PRIO 1 31#define RCU_KTHREAD_PRIO 1
31 32
@@ -1090,6 +1091,16 @@ static void rcu_initiate_boost_trace(struct rcu_node *rnp)
1090 1091
1091#endif /* #else #ifdef CONFIG_RCU_TRACE */ 1092#endif /* #else #ifdef CONFIG_RCU_TRACE */
1092 1093
1094static void rcu_wake_cond(struct task_struct *t, int status)
1095{
1096 /*
1097 * If the thread is yielding, only wake it when this
1098 * is invoked from idle
1099 */
1100 if (status != RCU_KTHREAD_YIELDING || is_idle_task(current))
1101 wake_up_process(t);
1102}
1103
1093/* 1104/*
1094 * Carry out RCU priority boosting on the task indicated by ->exp_tasks 1105 * Carry out RCU priority boosting on the task indicated by ->exp_tasks
1095 * or ->boost_tasks, advancing the pointer to the next task in the 1106 * or ->boost_tasks, advancing the pointer to the next task in the
@@ -1162,17 +1173,6 @@ static int rcu_boost(struct rcu_node *rnp)
1162} 1173}
1163 1174
1164/* 1175/*
1165 * Timer handler to initiate waking up of boost kthreads that
1166 * have yielded the CPU due to excessive numbers of tasks to
1167 * boost. We wake up the per-rcu_node kthread, which in turn
1168 * will wake up the booster kthread.
1169 */
1170static void rcu_boost_kthread_timer(unsigned long arg)
1171{
1172 invoke_rcu_node_kthread((struct rcu_node *)arg);
1173}
1174
1175/*
1176 * Priority-boosting kthread. One per leaf rcu_node and one for the 1176 * Priority-boosting kthread. One per leaf rcu_node and one for the
1177 * root rcu_node. 1177 * root rcu_node.
1178 */ 1178 */
@@ -1195,8 +1195,9 @@ static int rcu_boost_kthread(void *arg)
1195 else 1195 else
1196 spincnt = 0; 1196 spincnt = 0;
1197 if (spincnt > 10) { 1197 if (spincnt > 10) {
1198 rnp->boost_kthread_status = RCU_KTHREAD_YIELDING;
1198 trace_rcu_utilization("End boost kthread@rcu_yield"); 1199 trace_rcu_utilization("End boost kthread@rcu_yield");
1199 rcu_yield(rcu_boost_kthread_timer, (unsigned long)rnp); 1200 schedule_timeout_interruptible(2);
1200 trace_rcu_utilization("Start boost kthread@rcu_yield"); 1201 trace_rcu_utilization("Start boost kthread@rcu_yield");
1201 spincnt = 0; 1202 spincnt = 0;
1202 } 1203 }
@@ -1234,8 +1235,8 @@ static void rcu_initiate_boost(struct rcu_node *rnp, unsigned long flags)
1234 rnp->boost_tasks = rnp->gp_tasks; 1235 rnp->boost_tasks = rnp->gp_tasks;
1235 raw_spin_unlock_irqrestore(&rnp->lock, flags); 1236 raw_spin_unlock_irqrestore(&rnp->lock, flags);
1236 t = rnp->boost_kthread_task; 1237 t = rnp->boost_kthread_task;
1237 if (t != NULL) 1238 if (t)
1238 wake_up_process(t); 1239 rcu_wake_cond(t, rnp->boost_kthread_status);
1239 } else { 1240 } else {
1240 rcu_initiate_boost_trace(rnp); 1241 rcu_initiate_boost_trace(rnp);
1241 raw_spin_unlock_irqrestore(&rnp->lock, flags); 1242 raw_spin_unlock_irqrestore(&rnp->lock, flags);
@@ -1252,8 +1253,10 @@ static void invoke_rcu_callbacks_kthread(void)
1252 local_irq_save(flags); 1253 local_irq_save(flags);
1253 __this_cpu_write(rcu_cpu_has_work, 1); 1254 __this_cpu_write(rcu_cpu_has_work, 1);
1254 if (__this_cpu_read(rcu_cpu_kthread_task) != NULL && 1255 if (__this_cpu_read(rcu_cpu_kthread_task) != NULL &&
1255 current != __this_cpu_read(rcu_cpu_kthread_task)) 1256 current != __this_cpu_read(rcu_cpu_kthread_task)) {
1256 wake_up_process(__this_cpu_read(rcu_cpu_kthread_task)); 1257 rcu_wake_cond(__this_cpu_read(rcu_cpu_kthread_task),
1258 __this_cpu_read(rcu_cpu_kthread_status));
1259 }
1257 local_irq_restore(flags); 1260 local_irq_restore(flags);
1258} 1261}
1259 1262
@@ -1266,21 +1269,6 @@ static bool rcu_is_callbacks_kthread(void)
1266 return __get_cpu_var(rcu_cpu_kthread_task) == current; 1269 return __get_cpu_var(rcu_cpu_kthread_task) == current;
1267} 1270}
1268 1271
1269/*
1270 * Set the affinity of the boost kthread. The CPU-hotplug locks are
1271 * held, so no one should be messing with the existence of the boost
1272 * kthread.
1273 */
1274static void rcu_boost_kthread_setaffinity(struct rcu_node *rnp,
1275 cpumask_var_t cm)
1276{
1277 struct task_struct *t;
1278
1279 t = rnp->boost_kthread_task;
1280 if (t != NULL)
1281 set_cpus_allowed_ptr(rnp->boost_kthread_task, cm);
1282}
1283
1284#define RCU_BOOST_DELAY_JIFFIES DIV_ROUND_UP(CONFIG_RCU_BOOST_DELAY * HZ, 1000) 1272#define RCU_BOOST_DELAY_JIFFIES DIV_ROUND_UP(CONFIG_RCU_BOOST_DELAY * HZ, 1000)
1285 1273
1286/* 1274/*
@@ -1297,15 +1285,19 @@ static void rcu_preempt_boost_start_gp(struct rcu_node *rnp)
1297 * Returns zero if all is well, a negated errno otherwise. 1285 * Returns zero if all is well, a negated errno otherwise.
1298 */ 1286 */
1299static int __cpuinit rcu_spawn_one_boost_kthread(struct rcu_state *rsp, 1287static int __cpuinit rcu_spawn_one_boost_kthread(struct rcu_state *rsp,
1300 struct rcu_node *rnp, 1288 struct rcu_node *rnp)
1301 int rnp_index)
1302{ 1289{
1290 int rnp_index = rnp - &rsp->node[0];
1303 unsigned long flags; 1291 unsigned long flags;
1304 struct sched_param sp; 1292 struct sched_param sp;
1305 struct task_struct *t; 1293 struct task_struct *t;
1306 1294
1307 if (&rcu_preempt_state != rsp) 1295 if (&rcu_preempt_state != rsp)
1308 return 0; 1296 return 0;
1297
1298 if (!rcu_scheduler_fully_active || rnp->qsmaskinit == 0)
1299 return 0;
1300
1309 rsp->boost = 1; 1301 rsp->boost = 1;
1310 if (rnp->boost_kthread_task != NULL) 1302 if (rnp->boost_kthread_task != NULL)
1311 return 0; 1303 return 0;
@@ -1322,25 +1314,6 @@ static int __cpuinit rcu_spawn_one_boost_kthread(struct rcu_state *rsp,
1322 return 0; 1314 return 0;
1323} 1315}
1324 1316
1325#ifdef CONFIG_HOTPLUG_CPU
1326
1327/*
1328 * Stop the RCU's per-CPU kthread when its CPU goes offline,.
1329 */
1330static void rcu_stop_cpu_kthread(int cpu)
1331{
1332 struct task_struct *t;
1333
1334 /* Stop the CPU's kthread. */
1335 t = per_cpu(rcu_cpu_kthread_task, cpu);
1336 if (t != NULL) {
1337 per_cpu(rcu_cpu_kthread_task, cpu) = NULL;
1338 kthread_stop(t);
1339 }
1340}
1341
1342#endif /* #ifdef CONFIG_HOTPLUG_CPU */
1343
1344static void rcu_kthread_do_work(void) 1317static void rcu_kthread_do_work(void)
1345{ 1318{
1346 rcu_do_batch(&rcu_sched_state, &__get_cpu_var(rcu_sched_data)); 1319 rcu_do_batch(&rcu_sched_state, &__get_cpu_var(rcu_sched_data));
@@ -1348,112 +1321,22 @@ static void rcu_kthread_do_work(void)
1348 rcu_preempt_do_callbacks(); 1321 rcu_preempt_do_callbacks();
1349} 1322}
1350 1323
1351/* 1324static void rcu_cpu_kthread_setup(unsigned int cpu)
1352 * Wake up the specified per-rcu_node-structure kthread.
1353 * Because the per-rcu_node kthreads are immortal, we don't need
1354 * to do anything to keep them alive.
1355 */
1356static void invoke_rcu_node_kthread(struct rcu_node *rnp)
1357{
1358 struct task_struct *t;
1359
1360 t = rnp->node_kthread_task;
1361 if (t != NULL)
1362 wake_up_process(t);
1363}
1364
1365/*
1366 * Set the specified CPU's kthread to run RT or not, as specified by
1367 * the to_rt argument. The CPU-hotplug locks are held, so the task
1368 * is not going away.
1369 */
1370static void rcu_cpu_kthread_setrt(int cpu, int to_rt)
1371{ 1325{
1372 int policy;
1373 struct sched_param sp; 1326 struct sched_param sp;
1374 struct task_struct *t;
1375
1376 t = per_cpu(rcu_cpu_kthread_task, cpu);
1377 if (t == NULL)
1378 return;
1379 if (to_rt) {
1380 policy = SCHED_FIFO;
1381 sp.sched_priority = RCU_KTHREAD_PRIO;
1382 } else {
1383 policy = SCHED_NORMAL;
1384 sp.sched_priority = 0;
1385 }
1386 sched_setscheduler_nocheck(t, policy, &sp);
1387}
1388
1389/*
1390 * Timer handler to initiate the waking up of per-CPU kthreads that
1391 * have yielded the CPU due to excess numbers of RCU callbacks.
1392 * We wake up the per-rcu_node kthread, which in turn will wake up
1393 * the booster kthread.
1394 */
1395static void rcu_cpu_kthread_timer(unsigned long arg)
1396{
1397 struct rcu_data *rdp = per_cpu_ptr(rcu_state->rda, arg);
1398 struct rcu_node *rnp = rdp->mynode;
1399 1327
1400 atomic_or(rdp->grpmask, &rnp->wakemask); 1328 sp.sched_priority = RCU_KTHREAD_PRIO;
1401 invoke_rcu_node_kthread(rnp); 1329 sched_setscheduler_nocheck(current, SCHED_FIFO, &sp);
1402} 1330}
1403 1331
1404/* 1332static void rcu_cpu_kthread_park(unsigned int cpu)
1405 * Drop to non-real-time priority and yield, but only after posting a
1406 * timer that will cause us to regain our real-time priority if we
1407 * remain preempted. Either way, we restore our real-time priority
1408 * before returning.
1409 */
1410static void rcu_yield(void (*f)(unsigned long), unsigned long arg)
1411{ 1333{
1412 struct sched_param sp; 1334 per_cpu(rcu_cpu_kthread_status, cpu) = RCU_KTHREAD_OFFCPU;
1413 struct timer_list yield_timer;
1414 int prio = current->rt_priority;
1415
1416 setup_timer_on_stack(&yield_timer, f, arg);
1417 mod_timer(&yield_timer, jiffies + 2);
1418 sp.sched_priority = 0;
1419 sched_setscheduler_nocheck(current, SCHED_NORMAL, &sp);
1420 set_user_nice(current, 19);
1421 schedule();
1422 set_user_nice(current, 0);
1423 sp.sched_priority = prio;
1424 sched_setscheduler_nocheck(current, SCHED_FIFO, &sp);
1425 del_timer(&yield_timer);
1426} 1335}
1427 1336
1428/* 1337static int rcu_cpu_kthread_should_run(unsigned int cpu)
1429 * Handle cases where the rcu_cpu_kthread() ends up on the wrong CPU.
1430 * This can happen while the corresponding CPU is either coming online
1431 * or going offline. We cannot wait until the CPU is fully online
1432 * before starting the kthread, because the various notifier functions
1433 * can wait for RCU grace periods. So we park rcu_cpu_kthread() until
1434 * the corresponding CPU is online.
1435 *
1436 * Return 1 if the kthread needs to stop, 0 otherwise.
1437 *
1438 * Caller must disable bh. This function can momentarily enable it.
1439 */
1440static int rcu_cpu_kthread_should_stop(int cpu)
1441{ 1338{
1442 while (cpu_is_offline(cpu) || 1339 return __get_cpu_var(rcu_cpu_has_work);
1443 !cpumask_equal(&current->cpus_allowed, cpumask_of(cpu)) ||
1444 smp_processor_id() != cpu) {
1445 if (kthread_should_stop())
1446 return 1;
1447 per_cpu(rcu_cpu_kthread_status, cpu) = RCU_KTHREAD_OFFCPU;
1448 per_cpu(rcu_cpu_kthread_cpu, cpu) = raw_smp_processor_id();
1449 local_bh_enable();
1450 schedule_timeout_uninterruptible(1);
1451 if (!cpumask_equal(&current->cpus_allowed, cpumask_of(cpu)))
1452 set_cpus_allowed_ptr(current, cpumask_of(cpu));
1453 local_bh_disable();
1454 }
1455 per_cpu(rcu_cpu_kthread_cpu, cpu) = cpu;
1456 return 0;
1457} 1340}
1458 1341
1459/* 1342/*
@@ -1461,138 +1344,35 @@ static int rcu_cpu_kthread_should_stop(int cpu)
1461 * RCU softirq used in flavors and configurations of RCU that do not 1344 * RCU softirq used in flavors and configurations of RCU that do not
1462 * support RCU priority boosting. 1345 * support RCU priority boosting.
1463 */ 1346 */
1464static int rcu_cpu_kthread(void *arg) 1347static void rcu_cpu_kthread(unsigned int cpu)
1465{ 1348{
1466 int cpu = (int)(long)arg; 1349 unsigned int *statusp = &__get_cpu_var(rcu_cpu_kthread_status);
1467 unsigned long flags; 1350 char work, *workp = &__get_cpu_var(rcu_cpu_has_work);
1468 int spincnt = 0; 1351 int spincnt;
1469 unsigned int *statusp = &per_cpu(rcu_cpu_kthread_status, cpu);
1470 char work;
1471 char *workp = &per_cpu(rcu_cpu_has_work, cpu);
1472 1352
1473 trace_rcu_utilization("Start CPU kthread@init"); 1353 for (spincnt = 0; spincnt < 10; spincnt++) {
1474 for (;;) {
1475 *statusp = RCU_KTHREAD_WAITING;
1476 trace_rcu_utilization("End CPU kthread@rcu_wait");
1477 rcu_wait(*workp != 0 || kthread_should_stop());
1478 trace_rcu_utilization("Start CPU kthread@rcu_wait"); 1354 trace_rcu_utilization("Start CPU kthread@rcu_wait");
1479 local_bh_disable(); 1355 local_bh_disable();
1480 if (rcu_cpu_kthread_should_stop(cpu)) {
1481 local_bh_enable();
1482 break;
1483 }
1484 *statusp = RCU_KTHREAD_RUNNING; 1356 *statusp = RCU_KTHREAD_RUNNING;
1485 per_cpu(rcu_cpu_kthread_loops, cpu)++; 1357 this_cpu_inc(rcu_cpu_kthread_loops);
1486 local_irq_save(flags); 1358 local_irq_disable();
1487 work = *workp; 1359 work = *workp;
1488 *workp = 0; 1360 *workp = 0;
1489 local_irq_restore(flags); 1361 local_irq_enable();
1490 if (work) 1362 if (work)
1491 rcu_kthread_do_work(); 1363 rcu_kthread_do_work();
1492 local_bh_enable(); 1364 local_bh_enable();
1493 if (*workp != 0) 1365 if (*workp == 0) {
1494 spincnt++; 1366 trace_rcu_utilization("End CPU kthread@rcu_wait");
1495 else 1367 *statusp = RCU_KTHREAD_WAITING;
1496 spincnt = 0; 1368 return;
1497 if (spincnt > 10) {
1498 *statusp = RCU_KTHREAD_YIELDING;
1499 trace_rcu_utilization("End CPU kthread@rcu_yield");
1500 rcu_yield(rcu_cpu_kthread_timer, (unsigned long)cpu);
1501 trace_rcu_utilization("Start CPU kthread@rcu_yield");
1502 spincnt = 0;
1503 }
1504 }
1505 *statusp = RCU_KTHREAD_STOPPED;
1506 trace_rcu_utilization("End CPU kthread@term");
1507 return 0;
1508}
1509
1510/*
1511 * Spawn a per-CPU kthread, setting up affinity and priority.
1512 * Because the CPU hotplug lock is held, no other CPU will be attempting
1513 * to manipulate rcu_cpu_kthread_task. There might be another CPU
1514 * attempting to access it during boot, but the locking in kthread_bind()
1515 * will enforce sufficient ordering.
1516 *
1517 * Please note that we cannot simply refuse to wake up the per-CPU
1518 * kthread because kthreads are created in TASK_UNINTERRUPTIBLE state,
1519 * which can result in softlockup complaints if the task ends up being
1520 * idle for more than a couple of minutes.
1521 *
1522 * However, please note also that we cannot bind the per-CPU kthread to its
1523 * CPU until that CPU is fully online. We also cannot wait until the
1524 * CPU is fully online before we create its per-CPU kthread, as this would
1525 * deadlock the system when CPU notifiers tried waiting for grace
1526 * periods. So we bind the per-CPU kthread to its CPU only if the CPU
1527 * is online. If its CPU is not yet fully online, then the code in
1528 * rcu_cpu_kthread() will wait until it is fully online, and then do
1529 * the binding.
1530 */
1531static int __cpuinit rcu_spawn_one_cpu_kthread(int cpu)
1532{
1533 struct sched_param sp;
1534 struct task_struct *t;
1535
1536 if (!rcu_scheduler_fully_active ||
1537 per_cpu(rcu_cpu_kthread_task, cpu) != NULL)
1538 return 0;
1539 t = kthread_create_on_node(rcu_cpu_kthread,
1540 (void *)(long)cpu,
1541 cpu_to_node(cpu),
1542 "rcuc/%d", cpu);
1543 if (IS_ERR(t))
1544 return PTR_ERR(t);
1545 if (cpu_online(cpu))
1546 kthread_bind(t, cpu);
1547 per_cpu(rcu_cpu_kthread_cpu, cpu) = cpu;
1548 WARN_ON_ONCE(per_cpu(rcu_cpu_kthread_task, cpu) != NULL);
1549 sp.sched_priority = RCU_KTHREAD_PRIO;
1550 sched_setscheduler_nocheck(t, SCHED_FIFO, &sp);
1551 per_cpu(rcu_cpu_kthread_task, cpu) = t;
1552 wake_up_process(t); /* Get to TASK_INTERRUPTIBLE quickly. */
1553 return 0;
1554}
1555
1556/*
1557 * Per-rcu_node kthread, which is in charge of waking up the per-CPU
1558 * kthreads when needed. We ignore requests to wake up kthreads
1559 * for offline CPUs, which is OK because force_quiescent_state()
1560 * takes care of this case.
1561 */
1562static int rcu_node_kthread(void *arg)
1563{
1564 int cpu;
1565 unsigned long flags;
1566 unsigned long mask;
1567 struct rcu_node *rnp = (struct rcu_node *)arg;
1568 struct sched_param sp;
1569 struct task_struct *t;
1570
1571 for (;;) {
1572 rnp->node_kthread_status = RCU_KTHREAD_WAITING;
1573 rcu_wait(atomic_read(&rnp->wakemask) != 0);
1574 rnp->node_kthread_status = RCU_KTHREAD_RUNNING;
1575 raw_spin_lock_irqsave(&rnp->lock, flags);
1576 mask = atomic_xchg(&rnp->wakemask, 0);
1577 rcu_initiate_boost(rnp, flags); /* releases rnp->lock. */
1578 for (cpu = rnp->grplo; cpu <= rnp->grphi; cpu++, mask >>= 1) {
1579 if ((mask & 0x1) == 0)
1580 continue;
1581 preempt_disable();
1582 t = per_cpu(rcu_cpu_kthread_task, cpu);
1583 if (!cpu_online(cpu) || t == NULL) {
1584 preempt_enable();
1585 continue;
1586 }
1587 per_cpu(rcu_cpu_has_work, cpu) = 1;
1588 sp.sched_priority = RCU_KTHREAD_PRIO;
1589 sched_setscheduler_nocheck(t, SCHED_FIFO, &sp);
1590 preempt_enable();
1591 } 1369 }
1592 } 1370 }
1593 /* NOTREACHED */ 1371 *statusp = RCU_KTHREAD_YIELDING;
1594 rnp->node_kthread_status = RCU_KTHREAD_STOPPED; 1372 trace_rcu_utilization("Start CPU kthread@rcu_yield");
1595 return 0; 1373 schedule_timeout_interruptible(2);
1374 trace_rcu_utilization("End CPU kthread@rcu_yield");
1375 *statusp = RCU_KTHREAD_WAITING;
1596} 1376}
1597 1377
1598/* 1378/*
@@ -1604,17 +1384,17 @@ static int rcu_node_kthread(void *arg)
1604 * no outgoing CPU. If there are no CPUs left in the affinity set, 1384 * no outgoing CPU. If there are no CPUs left in the affinity set,
1605 * this function allows the kthread to execute on any CPU. 1385 * this function allows the kthread to execute on any CPU.
1606 */ 1386 */
1607static void rcu_node_kthread_setaffinity(struct rcu_node *rnp, int outgoingcpu) 1387static void rcu_boost_kthread_setaffinity(struct rcu_node *rnp, int outgoingcpu)
1608{ 1388{
1389 struct task_struct *t = rnp->boost_kthread_task;
1390 unsigned long mask = rnp->qsmaskinit;
1609 cpumask_var_t cm; 1391 cpumask_var_t cm;
1610 int cpu; 1392 int cpu;
1611 unsigned long mask = rnp->qsmaskinit;
1612 1393
1613 if (rnp->node_kthread_task == NULL) 1394 if (!t)
1614 return; 1395 return;
1615 if (!alloc_cpumask_var(&cm, GFP_KERNEL)) 1396 if (!zalloc_cpumask_var(&cm, GFP_KERNEL))
1616 return; 1397 return;
1617 cpumask_clear(cm);
1618 for (cpu = rnp->grplo; cpu <= rnp->grphi; cpu++, mask >>= 1) 1398 for (cpu = rnp->grplo; cpu <= rnp->grphi; cpu++, mask >>= 1)
1619 if ((mask & 0x1) && cpu != outgoingcpu) 1399 if ((mask & 0x1) && cpu != outgoingcpu)
1620 cpumask_set_cpu(cpu, cm); 1400 cpumask_set_cpu(cpu, cm);
@@ -1624,62 +1404,36 @@ static void rcu_node_kthread_setaffinity(struct rcu_node *rnp, int outgoingcpu)
1624 cpumask_clear_cpu(cpu, cm); 1404 cpumask_clear_cpu(cpu, cm);
1625 WARN_ON_ONCE(cpumask_weight(cm) == 0); 1405 WARN_ON_ONCE(cpumask_weight(cm) == 0);
1626 } 1406 }
1627 set_cpus_allowed_ptr(rnp->node_kthread_task, cm); 1407 set_cpus_allowed_ptr(t, cm);
1628 rcu_boost_kthread_setaffinity(rnp, cm);
1629 free_cpumask_var(cm); 1408 free_cpumask_var(cm);
1630} 1409}
1631 1410
1632/* 1411static struct smp_hotplug_thread rcu_cpu_thread_spec = {
1633 * Spawn a per-rcu_node kthread, setting priority and affinity. 1412 .store = &rcu_cpu_kthread_task,
1634 * Called during boot before online/offline can happen, or, if 1413 .thread_should_run = rcu_cpu_kthread_should_run,
1635 * during runtime, with the main CPU-hotplug locks held. So only 1414 .thread_fn = rcu_cpu_kthread,
1636 * one of these can be executing at a time. 1415 .thread_comm = "rcuc/%u",
1637 */ 1416 .setup = rcu_cpu_kthread_setup,
1638static int __cpuinit rcu_spawn_one_node_kthread(struct rcu_state *rsp, 1417 .park = rcu_cpu_kthread_park,
1639 struct rcu_node *rnp) 1418};
1640{
1641 unsigned long flags;
1642 int rnp_index = rnp - &rsp->node[0];
1643 struct sched_param sp;
1644 struct task_struct *t;
1645
1646 if (!rcu_scheduler_fully_active ||
1647 rnp->qsmaskinit == 0)
1648 return 0;
1649 if (rnp->node_kthread_task == NULL) {
1650 t = kthread_create(rcu_node_kthread, (void *)rnp,
1651 "rcun/%d", rnp_index);
1652 if (IS_ERR(t))
1653 return PTR_ERR(t);
1654 raw_spin_lock_irqsave(&rnp->lock, flags);
1655 rnp->node_kthread_task = t;
1656 raw_spin_unlock_irqrestore(&rnp->lock, flags);
1657 sp.sched_priority = 99;
1658 sched_setscheduler_nocheck(t, SCHED_FIFO, &sp);
1659 wake_up_process(t); /* get to TASK_INTERRUPTIBLE quickly. */
1660 }
1661 return rcu_spawn_one_boost_kthread(rsp, rnp, rnp_index);
1662}
1663 1419
1664/* 1420/*
1665 * Spawn all kthreads -- called as soon as the scheduler is running. 1421 * Spawn all kthreads -- called as soon as the scheduler is running.
1666 */ 1422 */
1667static int __init rcu_spawn_kthreads(void) 1423static int __init rcu_spawn_kthreads(void)
1668{ 1424{
1669 int cpu;
1670 struct rcu_node *rnp; 1425 struct rcu_node *rnp;
1426 int cpu;
1671 1427
1672 rcu_scheduler_fully_active = 1; 1428 rcu_scheduler_fully_active = 1;
1673 for_each_possible_cpu(cpu) { 1429 for_each_possible_cpu(cpu)
1674 per_cpu(rcu_cpu_has_work, cpu) = 0; 1430 per_cpu(rcu_cpu_has_work, cpu) = 0;
1675 if (cpu_online(cpu)) 1431 BUG_ON(smpboot_register_percpu_thread(&rcu_cpu_thread_spec));
1676 (void)rcu_spawn_one_cpu_kthread(cpu);
1677 }
1678 rnp = rcu_get_root(rcu_state); 1432 rnp = rcu_get_root(rcu_state);
1679 (void)rcu_spawn_one_node_kthread(rcu_state, rnp); 1433 (void)rcu_spawn_one_boost_kthread(rcu_state, rnp);
1680 if (NUM_RCU_NODES > 1) { 1434 if (NUM_RCU_NODES > 1) {
1681 rcu_for_each_leaf_node(rcu_state, rnp) 1435 rcu_for_each_leaf_node(rcu_state, rnp)
1682 (void)rcu_spawn_one_node_kthread(rcu_state, rnp); 1436 (void)rcu_spawn_one_boost_kthread(rcu_state, rnp);
1683 } 1437 }
1684 return 0; 1438 return 0;
1685} 1439}
@@ -1691,11 +1445,8 @@ static void __cpuinit rcu_prepare_kthreads(int cpu)
1691 struct rcu_node *rnp = rdp->mynode; 1445 struct rcu_node *rnp = rdp->mynode;
1692 1446
1693 /* Fire up the incoming CPU's kthread and leaf rcu_node kthread. */ 1447 /* Fire up the incoming CPU's kthread and leaf rcu_node kthread. */
1694 if (rcu_scheduler_fully_active) { 1448 if (rcu_scheduler_fully_active)
1695 (void)rcu_spawn_one_cpu_kthread(cpu); 1449 (void)rcu_spawn_one_boost_kthread(rcu_state, rnp);
1696 if (rnp->node_kthread_task == NULL)
1697 (void)rcu_spawn_one_node_kthread(rcu_state, rnp);
1698 }
1699} 1450}
1700 1451
1701#else /* #ifdef CONFIG_RCU_BOOST */ 1452#else /* #ifdef CONFIG_RCU_BOOST */
@@ -1719,19 +1470,7 @@ static void rcu_preempt_boost_start_gp(struct rcu_node *rnp)
1719{ 1470{
1720} 1471}
1721 1472
1722#ifdef CONFIG_HOTPLUG_CPU 1473static void rcu_boost_kthread_setaffinity(struct rcu_node *rnp, int outgoingcpu)
1723
1724static void rcu_stop_cpu_kthread(int cpu)
1725{
1726}
1727
1728#endif /* #ifdef CONFIG_HOTPLUG_CPU */
1729
1730static void rcu_node_kthread_setaffinity(struct rcu_node *rnp, int outgoingcpu)
1731{
1732}
1733
1734static void rcu_cpu_kthread_setrt(int cpu, int to_rt)
1735{ 1474{
1736} 1475}
1737 1476
diff --git a/kernel/rcutree_trace.c b/kernel/rcutree_trace.c
index 7340efdcd324..693513bc50e6 100644
--- a/kernel/rcutree_trace.c
+++ b/kernel/rcutree_trace.c
@@ -107,11 +107,10 @@ static void print_one_rcu_data(struct seq_file *m, struct rcu_data *rdp)
107 rdp->nxttail[RCU_WAIT_TAIL]], 107 rdp->nxttail[RCU_WAIT_TAIL]],
108 ".D"[&rdp->nxtlist != rdp->nxttail[RCU_DONE_TAIL]]); 108 ".D"[&rdp->nxtlist != rdp->nxttail[RCU_DONE_TAIL]]);
109#ifdef CONFIG_RCU_BOOST 109#ifdef CONFIG_RCU_BOOST
110 seq_printf(m, " kt=%d/%c/%d ktl=%x", 110 seq_printf(m, " kt=%d/%c ktl=%x",
111 per_cpu(rcu_cpu_has_work, rdp->cpu), 111 per_cpu(rcu_cpu_has_work, rdp->cpu),
112 convert_kthread_status(per_cpu(rcu_cpu_kthread_status, 112 convert_kthread_status(per_cpu(rcu_cpu_kthread_status,
113 rdp->cpu)), 113 rdp->cpu)),
114 per_cpu(rcu_cpu_kthread_cpu, rdp->cpu),
115 per_cpu(rcu_cpu_kthread_loops, rdp->cpu) & 0xffff); 114 per_cpu(rcu_cpu_kthread_loops, rdp->cpu) & 0xffff);
116#endif /* #ifdef CONFIG_RCU_BOOST */ 115#endif /* #ifdef CONFIG_RCU_BOOST */
117 seq_printf(m, " b=%ld", rdp->blimit); 116 seq_printf(m, " b=%ld", rdp->blimit);
diff --git a/kernel/smpboot.c b/kernel/smpboot.c
index 98f60c5caa1b..d6c5fc054242 100644
--- a/kernel/smpboot.c
+++ b/kernel/smpboot.c
@@ -1,14 +1,22 @@
1/* 1/*
2 * Common SMP CPU bringup/teardown functions 2 * Common SMP CPU bringup/teardown functions
3 */ 3 */
4#include <linux/cpu.h>
4#include <linux/err.h> 5#include <linux/err.h>
5#include <linux/smp.h> 6#include <linux/smp.h>
6#include <linux/init.h> 7#include <linux/init.h>
8#include <linux/list.h>
9#include <linux/slab.h>
7#include <linux/sched.h> 10#include <linux/sched.h>
11#include <linux/export.h>
8#include <linux/percpu.h> 12#include <linux/percpu.h>
13#include <linux/kthread.h>
14#include <linux/smpboot.h>
9 15
10#include "smpboot.h" 16#include "smpboot.h"
11 17
18#ifdef CONFIG_SMP
19
12#ifdef CONFIG_GENERIC_SMP_IDLE_THREAD 20#ifdef CONFIG_GENERIC_SMP_IDLE_THREAD
13/* 21/*
14 * For the hotplug case we keep the task structs around and reuse 22 * For the hotplug case we keep the task structs around and reuse
@@ -65,3 +73,228 @@ void __init idle_threads_init(void)
65 } 73 }
66} 74}
67#endif 75#endif
76
77#endif /* #ifdef CONFIG_SMP */
78
79static LIST_HEAD(hotplug_threads);
80static DEFINE_MUTEX(smpboot_threads_lock);
81
82struct smpboot_thread_data {
83 unsigned int cpu;
84 unsigned int status;
85 struct smp_hotplug_thread *ht;
86};
87
88enum {
89 HP_THREAD_NONE = 0,
90 HP_THREAD_ACTIVE,
91 HP_THREAD_PARKED,
92};
93
94/**
95 * smpboot_thread_fn - percpu hotplug thread loop function
96 * @data: thread data pointer
97 *
98 * Checks for thread stop and park conditions. Calls the necessary
99 * setup, cleanup, park and unpark functions for the registered
100 * thread.
101 *
102 * Returns 1 when the thread should exit, 0 otherwise.
103 */
104static int smpboot_thread_fn(void *data)
105{
106 struct smpboot_thread_data *td = data;
107 struct smp_hotplug_thread *ht = td->ht;
108
109 while (1) {
110 set_current_state(TASK_INTERRUPTIBLE);
111 preempt_disable();
112 if (kthread_should_stop()) {
113 set_current_state(TASK_RUNNING);
114 preempt_enable();
115 if (ht->cleanup)
116 ht->cleanup(td->cpu, cpu_online(td->cpu));
117 kfree(td);
118 return 0;
119 }
120
121 if (kthread_should_park()) {
122 __set_current_state(TASK_RUNNING);
123 preempt_enable();
124 if (ht->park && td->status == HP_THREAD_ACTIVE) {
125 BUG_ON(td->cpu != smp_processor_id());
126 ht->park(td->cpu);
127 td->status = HP_THREAD_PARKED;
128 }
129 kthread_parkme();
130 /* We might have been woken for stop */
131 continue;
132 }
133
134 BUG_ON(td->cpu != smp_processor_id());
135
136 /* Check for state change setup */
137 switch (td->status) {
138 case HP_THREAD_NONE:
139 preempt_enable();
140 if (ht->setup)
141 ht->setup(td->cpu);
142 td->status = HP_THREAD_ACTIVE;
143 preempt_disable();
144 break;
145 case HP_THREAD_PARKED:
146 preempt_enable();
147 if (ht->unpark)
148 ht->unpark(td->cpu);
149 td->status = HP_THREAD_ACTIVE;
150 preempt_disable();
151 break;
152 }
153
154 if (!ht->thread_should_run(td->cpu)) {
155 preempt_enable();
156 schedule();
157 } else {
158 set_current_state(TASK_RUNNING);
159 preempt_enable();
160 ht->thread_fn(td->cpu);
161 }
162 }
163}
164
165static int
166__smpboot_create_thread(struct smp_hotplug_thread *ht, unsigned int cpu)
167{
168 struct task_struct *tsk = *per_cpu_ptr(ht->store, cpu);
169 struct smpboot_thread_data *td;
170
171 if (tsk)
172 return 0;
173
174 td = kzalloc_node(sizeof(*td), GFP_KERNEL, cpu_to_node(cpu));
175 if (!td)
176 return -ENOMEM;
177 td->cpu = cpu;
178 td->ht = ht;
179
180 tsk = kthread_create_on_cpu(smpboot_thread_fn, td, cpu,
181 ht->thread_comm);
182 if (IS_ERR(tsk)) {
183 kfree(td);
184 return PTR_ERR(tsk);
185 }
186
187 get_task_struct(tsk);
188 *per_cpu_ptr(ht->store, cpu) = tsk;
189 return 0;
190}
191
192int smpboot_create_threads(unsigned int cpu)
193{
194 struct smp_hotplug_thread *cur;
195 int ret = 0;
196
197 mutex_lock(&smpboot_threads_lock);
198 list_for_each_entry(cur, &hotplug_threads, list) {
199 ret = __smpboot_create_thread(cur, cpu);
200 if (ret)
201 break;
202 }
203 mutex_unlock(&smpboot_threads_lock);
204 return ret;
205}
206
207static void smpboot_unpark_thread(struct smp_hotplug_thread *ht, unsigned int cpu)
208{
209 struct task_struct *tsk = *per_cpu_ptr(ht->store, cpu);
210
211 kthread_unpark(tsk);
212}
213
214void smpboot_unpark_threads(unsigned int cpu)
215{
216 struct smp_hotplug_thread *cur;
217
218 mutex_lock(&smpboot_threads_lock);
219 list_for_each_entry(cur, &hotplug_threads, list)
220 smpboot_unpark_thread(cur, cpu);
221 mutex_unlock(&smpboot_threads_lock);
222}
223
224static void smpboot_park_thread(struct smp_hotplug_thread *ht, unsigned int cpu)
225{
226 struct task_struct *tsk = *per_cpu_ptr(ht->store, cpu);
227
228 if (tsk)
229 kthread_park(tsk);
230}
231
232void smpboot_park_threads(unsigned int cpu)
233{
234 struct smp_hotplug_thread *cur;
235
236 mutex_lock(&smpboot_threads_lock);
237 list_for_each_entry_reverse(cur, &hotplug_threads, list)
238 smpboot_park_thread(cur, cpu);
239 mutex_unlock(&smpboot_threads_lock);
240}
241
242static void smpboot_destroy_threads(struct smp_hotplug_thread *ht)
243{
244 unsigned int cpu;
245
246 /* We need to destroy also the parked threads of offline cpus */
247 for_each_possible_cpu(cpu) {
248 struct task_struct *tsk = *per_cpu_ptr(ht->store, cpu);
249
250 if (tsk) {
251 kthread_stop(tsk);
252 put_task_struct(tsk);
253 *per_cpu_ptr(ht->store, cpu) = NULL;
254 }
255 }
256}
257
258/**
259 * smpboot_register_percpu_thread - Register a per_cpu thread related to hotplug
260 * @plug_thread: Hotplug thread descriptor
261 *
262 * Creates and starts the threads on all online cpus.
263 */
264int smpboot_register_percpu_thread(struct smp_hotplug_thread *plug_thread)
265{
266 unsigned int cpu;
267 int ret = 0;
268
269 mutex_lock(&smpboot_threads_lock);
270 for_each_online_cpu(cpu) {
271 ret = __smpboot_create_thread(plug_thread, cpu);
272 if (ret) {
273 smpboot_destroy_threads(plug_thread);
274 goto out;
275 }
276 smpboot_unpark_thread(plug_thread, cpu);
277 }
278 list_add(&plug_thread->list, &hotplug_threads);
279out:
280 mutex_unlock(&smpboot_threads_lock);
281 return ret;
282}
283EXPORT_SYMBOL_GPL(smpboot_register_percpu_thread);
284
285/**
286 * smpboot_unregister_percpu_thread - Unregister a per_cpu thread related to hotplug
287 * @plug_thread: Hotplug thread descriptor
288 *
289 * Stops all threads on all possible cpus.
290 */
291void smpboot_unregister_percpu_thread(struct smp_hotplug_thread *plug_thread)
292{
293 get_online_cpus();
294 mutex_lock(&smpboot_threads_lock);
295 list_del(&plug_thread->list);
296 smpboot_destroy_threads(plug_thread);
297 mutex_unlock(&smpboot_threads_lock);
298 put_online_cpus();
299}
300EXPORT_SYMBOL_GPL(smpboot_unregister_percpu_thread);
diff --git a/kernel/smpboot.h b/kernel/smpboot.h
index 6ef9433e1c70..72415a0eb955 100644
--- a/kernel/smpboot.h
+++ b/kernel/smpboot.h
@@ -13,4 +13,8 @@ static inline void idle_thread_set_boot_cpu(void) { }
13static inline void idle_threads_init(void) { } 13static inline void idle_threads_init(void) { }
14#endif 14#endif
15 15
16int smpboot_create_threads(unsigned int cpu);
17void smpboot_park_threads(unsigned int cpu);
18void smpboot_unpark_threads(unsigned int cpu);
19
16#endif 20#endif
diff --git a/kernel/softirq.c b/kernel/softirq.c
index b73e681df09e..5c6a5bd8462f 100644
--- a/kernel/softirq.c
+++ b/kernel/softirq.c
@@ -23,6 +23,7 @@
23#include <linux/rcupdate.h> 23#include <linux/rcupdate.h>
24#include <linux/ftrace.h> 24#include <linux/ftrace.h>
25#include <linux/smp.h> 25#include <linux/smp.h>
26#include <linux/smpboot.h>
26#include <linux/tick.h> 27#include <linux/tick.h>
27 28
28#define CREATE_TRACE_POINTS 29#define CREATE_TRACE_POINTS
@@ -742,49 +743,22 @@ void __init softirq_init(void)
742 open_softirq(HI_SOFTIRQ, tasklet_hi_action); 743 open_softirq(HI_SOFTIRQ, tasklet_hi_action);
743} 744}
744 745
745static int run_ksoftirqd(void * __bind_cpu) 746static int ksoftirqd_should_run(unsigned int cpu)
746{ 747{
747 set_current_state(TASK_INTERRUPTIBLE); 748 return local_softirq_pending();
748 749}
749 while (!kthread_should_stop()) {
750 preempt_disable();
751 if (!local_softirq_pending()) {
752 schedule_preempt_disabled();
753 }
754
755 __set_current_state(TASK_RUNNING);
756
757 while (local_softirq_pending()) {
758 /* Preempt disable stops cpu going offline.
759 If already offline, we'll be on wrong CPU:
760 don't process */
761 if (cpu_is_offline((long)__bind_cpu))
762 goto wait_to_die;
763 local_irq_disable();
764 if (local_softirq_pending())
765 __do_softirq();
766 local_irq_enable();
767 sched_preempt_enable_no_resched();
768 cond_resched();
769 preempt_disable();
770 rcu_note_context_switch((long)__bind_cpu);
771 }
772 preempt_enable();
773 set_current_state(TASK_INTERRUPTIBLE);
774 }
775 __set_current_state(TASK_RUNNING);
776 return 0;
777 750
778wait_to_die: 751static void run_ksoftirqd(unsigned int cpu)
779 preempt_enable(); 752{
780 /* Wait for kthread_stop */ 753 local_irq_disable();
781 set_current_state(TASK_INTERRUPTIBLE); 754 if (local_softirq_pending()) {
782 while (!kthread_should_stop()) { 755 __do_softirq();
783 schedule(); 756 rcu_note_context_switch(cpu);
784 set_current_state(TASK_INTERRUPTIBLE); 757 local_irq_enable();
758 cond_resched();
759 return;
785 } 760 }
786 __set_current_state(TASK_RUNNING); 761 local_irq_enable();
787 return 0;
788} 762}
789 763
790#ifdef CONFIG_HOTPLUG_CPU 764#ifdef CONFIG_HOTPLUG_CPU
@@ -850,50 +824,14 @@ static int __cpuinit cpu_callback(struct notifier_block *nfb,
850 unsigned long action, 824 unsigned long action,
851 void *hcpu) 825 void *hcpu)
852{ 826{
853 int hotcpu = (unsigned long)hcpu;
854 struct task_struct *p;
855
856 switch (action) { 827 switch (action) {
857 case CPU_UP_PREPARE:
858 case CPU_UP_PREPARE_FROZEN:
859 p = kthread_create_on_node(run_ksoftirqd,
860 hcpu,
861 cpu_to_node(hotcpu),
862 "ksoftirqd/%d", hotcpu);
863 if (IS_ERR(p)) {
864 printk("ksoftirqd for %i failed\n", hotcpu);
865 return notifier_from_errno(PTR_ERR(p));
866 }
867 kthread_bind(p, hotcpu);
868 per_cpu(ksoftirqd, hotcpu) = p;
869 break;
870 case CPU_ONLINE:
871 case CPU_ONLINE_FROZEN:
872 wake_up_process(per_cpu(ksoftirqd, hotcpu));
873 break;
874#ifdef CONFIG_HOTPLUG_CPU 828#ifdef CONFIG_HOTPLUG_CPU
875 case CPU_UP_CANCELED:
876 case CPU_UP_CANCELED_FROZEN:
877 if (!per_cpu(ksoftirqd, hotcpu))
878 break;
879 /* Unbind so it can run. Fall thru. */
880 kthread_bind(per_cpu(ksoftirqd, hotcpu),
881 cpumask_any(cpu_online_mask));
882 case CPU_DEAD: 829 case CPU_DEAD:
883 case CPU_DEAD_FROZEN: { 830 case CPU_DEAD_FROZEN:
884 static const struct sched_param param = { 831 takeover_tasklets((unsigned long)hcpu);
885 .sched_priority = MAX_RT_PRIO-1
886 };
887
888 p = per_cpu(ksoftirqd, hotcpu);
889 per_cpu(ksoftirqd, hotcpu) = NULL;
890 sched_setscheduler_nocheck(p, SCHED_FIFO, &param);
891 kthread_stop(p);
892 takeover_tasklets(hotcpu);
893 break; 832 break;
894 }
895#endif /* CONFIG_HOTPLUG_CPU */ 833#endif /* CONFIG_HOTPLUG_CPU */
896 } 834 }
897 return NOTIFY_OK; 835 return NOTIFY_OK;
898} 836}
899 837
@@ -901,14 +839,19 @@ static struct notifier_block __cpuinitdata cpu_nfb = {
901 .notifier_call = cpu_callback 839 .notifier_call = cpu_callback
902}; 840};
903 841
842static struct smp_hotplug_thread softirq_threads = {
843 .store = &ksoftirqd,
844 .thread_should_run = ksoftirqd_should_run,
845 .thread_fn = run_ksoftirqd,
846 .thread_comm = "ksoftirqd/%u",
847};
848
904static __init int spawn_ksoftirqd(void) 849static __init int spawn_ksoftirqd(void)
905{ 850{
906 void *cpu = (void *)(long)smp_processor_id();
907 int err = cpu_callback(&cpu_nfb, CPU_UP_PREPARE, cpu);
908
909 BUG_ON(err != NOTIFY_OK);
910 cpu_callback(&cpu_nfb, CPU_ONLINE, cpu);
911 register_cpu_notifier(&cpu_nfb); 851 register_cpu_notifier(&cpu_nfb);
852
853 BUG_ON(smpboot_register_percpu_thread(&softirq_threads));
854
912 return 0; 855 return 0;
913} 856}
914early_initcall(spawn_ksoftirqd); 857early_initcall(spawn_ksoftirqd);
diff --git a/kernel/watchdog.c b/kernel/watchdog.c
index 4b1dfba70f7c..9d4c8d5a1f53 100644
--- a/kernel/watchdog.c
+++ b/kernel/watchdog.c
@@ -22,6 +22,7 @@
22#include <linux/notifier.h> 22#include <linux/notifier.h>
23#include <linux/module.h> 23#include <linux/module.h>
24#include <linux/sysctl.h> 24#include <linux/sysctl.h>
25#include <linux/smpboot.h>
25 26
26#include <asm/irq_regs.h> 27#include <asm/irq_regs.h>
27#include <linux/kvm_para.h> 28#include <linux/kvm_para.h>
@@ -29,16 +30,18 @@
29 30
30int watchdog_enabled = 1; 31int watchdog_enabled = 1;
31int __read_mostly watchdog_thresh = 10; 32int __read_mostly watchdog_thresh = 10;
33static int __read_mostly watchdog_disabled;
32 34
33static DEFINE_PER_CPU(unsigned long, watchdog_touch_ts); 35static DEFINE_PER_CPU(unsigned long, watchdog_touch_ts);
34static DEFINE_PER_CPU(struct task_struct *, softlockup_watchdog); 36static DEFINE_PER_CPU(struct task_struct *, softlockup_watchdog);
35static DEFINE_PER_CPU(struct hrtimer, watchdog_hrtimer); 37static DEFINE_PER_CPU(struct hrtimer, watchdog_hrtimer);
36static DEFINE_PER_CPU(bool, softlockup_touch_sync); 38static DEFINE_PER_CPU(bool, softlockup_touch_sync);
37static DEFINE_PER_CPU(bool, soft_watchdog_warn); 39static DEFINE_PER_CPU(bool, soft_watchdog_warn);
40static DEFINE_PER_CPU(unsigned long, hrtimer_interrupts);
41static DEFINE_PER_CPU(unsigned long, soft_lockup_hrtimer_cnt);
38#ifdef CONFIG_HARDLOCKUP_DETECTOR 42#ifdef CONFIG_HARDLOCKUP_DETECTOR
39static DEFINE_PER_CPU(bool, hard_watchdog_warn); 43static DEFINE_PER_CPU(bool, hard_watchdog_warn);
40static DEFINE_PER_CPU(bool, watchdog_nmi_touch); 44static DEFINE_PER_CPU(bool, watchdog_nmi_touch);
41static DEFINE_PER_CPU(unsigned long, hrtimer_interrupts);
42static DEFINE_PER_CPU(unsigned long, hrtimer_interrupts_saved); 45static DEFINE_PER_CPU(unsigned long, hrtimer_interrupts_saved);
43static DEFINE_PER_CPU(struct perf_event *, watchdog_ev); 46static DEFINE_PER_CPU(struct perf_event *, watchdog_ev);
44#endif 47#endif
@@ -248,13 +251,15 @@ static void watchdog_overflow_callback(struct perf_event *event,
248 __this_cpu_write(hard_watchdog_warn, false); 251 __this_cpu_write(hard_watchdog_warn, false);
249 return; 252 return;
250} 253}
254#endif /* CONFIG_HARDLOCKUP_DETECTOR */
255
251static void watchdog_interrupt_count(void) 256static void watchdog_interrupt_count(void)
252{ 257{
253 __this_cpu_inc(hrtimer_interrupts); 258 __this_cpu_inc(hrtimer_interrupts);
254} 259}
255#else 260
256static inline void watchdog_interrupt_count(void) { return; } 261static int watchdog_nmi_enable(unsigned int cpu);
257#endif /* CONFIG_HARDLOCKUP_DETECTOR */ 262static void watchdog_nmi_disable(unsigned int cpu);
258 263
259/* watchdog kicker functions */ 264/* watchdog kicker functions */
260static enum hrtimer_restart watchdog_timer_fn(struct hrtimer *hrtimer) 265static enum hrtimer_restart watchdog_timer_fn(struct hrtimer *hrtimer)
@@ -327,49 +332,68 @@ static enum hrtimer_restart watchdog_timer_fn(struct hrtimer *hrtimer)
327 return HRTIMER_RESTART; 332 return HRTIMER_RESTART;
328} 333}
329 334
335static void watchdog_set_prio(unsigned int policy, unsigned int prio)
336{
337 struct sched_param param = { .sched_priority = prio };
330 338
331/* 339 sched_setscheduler(current, policy, &param);
332 * The watchdog thread - touches the timestamp. 340}
333 */ 341
334static int watchdog(void *unused) 342static void watchdog_enable(unsigned int cpu)
335{ 343{
336 struct sched_param param = { .sched_priority = 0 };
337 struct hrtimer *hrtimer = &__raw_get_cpu_var(watchdog_hrtimer); 344 struct hrtimer *hrtimer = &__raw_get_cpu_var(watchdog_hrtimer);
338 345
339 /* initialize timestamp */ 346 if (!watchdog_enabled) {
340 __touch_watchdog(); 347 kthread_park(current);
348 return;
349 }
350
351 /* Enable the perf event */
352 watchdog_nmi_enable(cpu);
341 353
342 /* kick off the timer for the hardlockup detector */ 354 /* kick off the timer for the hardlockup detector */
355 hrtimer_init(hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
356 hrtimer->function = watchdog_timer_fn;
357
343 /* done here because hrtimer_start can only pin to smp_processor_id() */ 358 /* done here because hrtimer_start can only pin to smp_processor_id() */
344 hrtimer_start(hrtimer, ns_to_ktime(get_sample_period()), 359 hrtimer_start(hrtimer, ns_to_ktime(get_sample_period()),
345 HRTIMER_MODE_REL_PINNED); 360 HRTIMER_MODE_REL_PINNED);
346 361
347 set_current_state(TASK_INTERRUPTIBLE); 362 /* initialize timestamp */
348 /* 363 watchdog_set_prio(SCHED_FIFO, MAX_RT_PRIO - 1);
349 * Run briefly (kicked by the hrtimer callback function) once every 364 __touch_watchdog();
350 * get_sample_period() seconds (4 seconds by default) to reset the 365}
351 * softlockup timestamp. If this gets delayed for more than
352 * 2*watchdog_thresh seconds then the debug-printout triggers in
353 * watchdog_timer_fn().
354 */
355 while (!kthread_should_stop()) {
356 __touch_watchdog();
357 schedule();
358 366
359 if (kthread_should_stop()) 367static void watchdog_disable(unsigned int cpu)
360 break; 368{
369 struct hrtimer *hrtimer = &__raw_get_cpu_var(watchdog_hrtimer);
361 370
362 set_current_state(TASK_INTERRUPTIBLE); 371 watchdog_set_prio(SCHED_NORMAL, 0);
363 } 372 hrtimer_cancel(hrtimer);
364 /* 373 /* disable the perf event */
365 * Drop the policy/priority elevation during thread exit to avoid a 374 watchdog_nmi_disable(cpu);
366 * scheduling latency spike.
367 */
368 __set_current_state(TASK_RUNNING);
369 sched_setscheduler(current, SCHED_NORMAL, &param);
370 return 0;
371} 375}
372 376
377static int watchdog_should_run(unsigned int cpu)
378{
379 return __this_cpu_read(hrtimer_interrupts) !=
380 __this_cpu_read(soft_lockup_hrtimer_cnt);
381}
382
383/*
384 * The watchdog thread function - touches the timestamp.
385 *
386 * It only runs once every get_sample_period() seconds (4 seconds by
387 * default) to reset the softlockup timestamp. If this gets delayed
388 * for more than 2*watchdog_thresh seconds then the debug-printout
389 * triggers in watchdog_timer_fn().
390 */
391static void watchdog(unsigned int cpu)
392{
393 __this_cpu_write(soft_lockup_hrtimer_cnt,
394 __this_cpu_read(hrtimer_interrupts));
395 __touch_watchdog();
396}
373 397
374#ifdef CONFIG_HARDLOCKUP_DETECTOR 398#ifdef CONFIG_HARDLOCKUP_DETECTOR
375/* 399/*
@@ -379,7 +403,7 @@ static int watchdog(void *unused)
379 */ 403 */
380static unsigned long cpu0_err; 404static unsigned long cpu0_err;
381 405
382static int watchdog_nmi_enable(int cpu) 406static int watchdog_nmi_enable(unsigned int cpu)
383{ 407{
384 struct perf_event_attr *wd_attr; 408 struct perf_event_attr *wd_attr;
385 struct perf_event *event = per_cpu(watchdog_ev, cpu); 409 struct perf_event *event = per_cpu(watchdog_ev, cpu);
@@ -433,7 +457,7 @@ out:
433 return 0; 457 return 0;
434} 458}
435 459
436static void watchdog_nmi_disable(int cpu) 460static void watchdog_nmi_disable(unsigned int cpu)
437{ 461{
438 struct perf_event *event = per_cpu(watchdog_ev, cpu); 462 struct perf_event *event = per_cpu(watchdog_ev, cpu);
439 463
@@ -447,107 +471,35 @@ static void watchdog_nmi_disable(int cpu)
447 return; 471 return;
448} 472}
449#else 473#else
450static int watchdog_nmi_enable(int cpu) { return 0; } 474static int watchdog_nmi_enable(unsigned int cpu) { return 0; }
451static void watchdog_nmi_disable(int cpu) { return; } 475static void watchdog_nmi_disable(unsigned int cpu) { return; }
452#endif /* CONFIG_HARDLOCKUP_DETECTOR */ 476#endif /* CONFIG_HARDLOCKUP_DETECTOR */
453 477
454/* prepare/enable/disable routines */ 478/* prepare/enable/disable routines */
455static void watchdog_prepare_cpu(int cpu)
456{
457 struct hrtimer *hrtimer = &per_cpu(watchdog_hrtimer, cpu);
458
459 WARN_ON(per_cpu(softlockup_watchdog, cpu));
460 hrtimer_init(hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
461 hrtimer->function = watchdog_timer_fn;
462}
463
464static int watchdog_enable(int cpu)
465{
466 struct task_struct *p = per_cpu(softlockup_watchdog, cpu);
467 int err = 0;
468
469 /* enable the perf event */
470 err = watchdog_nmi_enable(cpu);
471
472 /* Regardless of err above, fall through and start softlockup */
473
474 /* create the watchdog thread */
475 if (!p) {
476 struct sched_param param = { .sched_priority = MAX_RT_PRIO-1 };
477 p = kthread_create_on_node(watchdog, NULL, cpu_to_node(cpu), "watchdog/%d", cpu);
478 if (IS_ERR(p)) {
479 pr_err("softlockup watchdog for %i failed\n", cpu);
480 if (!err) {
481 /* if hardlockup hasn't already set this */
482 err = PTR_ERR(p);
483 /* and disable the perf event */
484 watchdog_nmi_disable(cpu);
485 }
486 goto out;
487 }
488 sched_setscheduler(p, SCHED_FIFO, &param);
489 kthread_bind(p, cpu);
490 per_cpu(watchdog_touch_ts, cpu) = 0;
491 per_cpu(softlockup_watchdog, cpu) = p;
492 wake_up_process(p);
493 }
494
495out:
496 return err;
497}
498
499static void watchdog_disable(int cpu)
500{
501 struct task_struct *p = per_cpu(softlockup_watchdog, cpu);
502 struct hrtimer *hrtimer = &per_cpu(watchdog_hrtimer, cpu);
503
504 /*
505 * cancel the timer first to stop incrementing the stats
506 * and waking up the kthread
507 */
508 hrtimer_cancel(hrtimer);
509
510 /* disable the perf event */
511 watchdog_nmi_disable(cpu);
512
513 /* stop the watchdog thread */
514 if (p) {
515 per_cpu(softlockup_watchdog, cpu) = NULL;
516 kthread_stop(p);
517 }
518}
519
520/* sysctl functions */ 479/* sysctl functions */
521#ifdef CONFIG_SYSCTL 480#ifdef CONFIG_SYSCTL
522static void watchdog_enable_all_cpus(void) 481static void watchdog_enable_all_cpus(void)
523{ 482{
524 int cpu; 483 unsigned int cpu;
525
526 watchdog_enabled = 0;
527
528 for_each_online_cpu(cpu)
529 if (!watchdog_enable(cpu))
530 /* if any cpu succeeds, watchdog is considered
531 enabled for the system */
532 watchdog_enabled = 1;
533
534 if (!watchdog_enabled)
535 pr_err("failed to be enabled on some cpus\n");
536 484
485 if (watchdog_disabled) {
486 watchdog_disabled = 0;
487 for_each_online_cpu(cpu)
488 kthread_unpark(per_cpu(softlockup_watchdog, cpu));
489 }
537} 490}
538 491
539static void watchdog_disable_all_cpus(void) 492static void watchdog_disable_all_cpus(void)
540{ 493{
541 int cpu; 494 unsigned int cpu;
542
543 for_each_online_cpu(cpu)
544 watchdog_disable(cpu);
545 495
546 /* if all watchdogs are disabled, then they are disabled for the system */ 496 if (!watchdog_disabled) {
547 watchdog_enabled = 0; 497 watchdog_disabled = 1;
498 for_each_online_cpu(cpu)
499 kthread_park(per_cpu(softlockup_watchdog, cpu));
500 }
548} 501}
549 502
550
551/* 503/*
552 * proc handler for /proc/sys/kernel/nmi_watchdog,watchdog_thresh 504 * proc handler for /proc/sys/kernel/nmi_watchdog,watchdog_thresh
553 */ 505 */
@@ -557,73 +509,36 @@ int proc_dowatchdog(struct ctl_table *table, int write,
557{ 509{
558 int ret; 510 int ret;
559 511
512 if (watchdog_disabled < 0)
513 return -ENODEV;
514
560 ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos); 515 ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos);
561 if (ret || !write) 516 if (ret || !write)
562 goto out; 517 return ret;
563 518
564 if (watchdog_enabled && watchdog_thresh) 519 if (watchdog_enabled && watchdog_thresh)
565 watchdog_enable_all_cpus(); 520 watchdog_enable_all_cpus();
566 else 521 else
567 watchdog_disable_all_cpus(); 522 watchdog_disable_all_cpus();
568 523
569out:
570 return ret; 524 return ret;
571} 525}
572#endif /* CONFIG_SYSCTL */ 526#endif /* CONFIG_SYSCTL */
573 527
574 528static struct smp_hotplug_thread watchdog_threads = {
575/* 529 .store = &softlockup_watchdog,
576 * Create/destroy watchdog threads as CPUs come and go: 530 .thread_should_run = watchdog_should_run,
577 */ 531 .thread_fn = watchdog,
578static int __cpuinit 532 .thread_comm = "watchdog/%u",
579cpu_callback(struct notifier_block *nfb, unsigned long action, void *hcpu) 533 .setup = watchdog_enable,
580{ 534 .park = watchdog_disable,
581 int hotcpu = (unsigned long)hcpu; 535 .unpark = watchdog_enable,
582
583 switch (action) {
584 case CPU_UP_PREPARE:
585 case CPU_UP_PREPARE_FROZEN:
586 watchdog_prepare_cpu(hotcpu);
587 break;
588 case CPU_ONLINE:
589 case CPU_ONLINE_FROZEN:
590 if (watchdog_enabled)
591 watchdog_enable(hotcpu);
592 break;
593#ifdef CONFIG_HOTPLUG_CPU
594 case CPU_UP_CANCELED:
595 case CPU_UP_CANCELED_FROZEN:
596 watchdog_disable(hotcpu);
597 break;
598 case CPU_DEAD:
599 case CPU_DEAD_FROZEN:
600 watchdog_disable(hotcpu);
601 break;
602#endif /* CONFIG_HOTPLUG_CPU */
603 }
604
605 /*
606 * hardlockup and softlockup are not important enough
607 * to block cpu bring up. Just always succeed and
608 * rely on printk output to flag problems.
609 */
610 return NOTIFY_OK;
611}
612
613static struct notifier_block __cpuinitdata cpu_nfb = {
614 .notifier_call = cpu_callback
615}; 536};
616 537
617void __init lockup_detector_init(void) 538void __init lockup_detector_init(void)
618{ 539{
619 void *cpu = (void *)(long)smp_processor_id(); 540 if (smpboot_register_percpu_thread(&watchdog_threads)) {
620 int err; 541 pr_err("Failed to create watchdog threads, disabled\n");
621 542 watchdog_disabled = -ENODEV;
622 err = cpu_callback(&cpu_nfb, CPU_UP_PREPARE, cpu); 543 }
623 WARN_ON(notifier_to_errno(err));
624
625 cpu_callback(&cpu_nfb, CPU_ONLINE, cpu);
626 register_cpu_notifier(&cpu_nfb);
627
628 return;
629} 544}