diff options
author | Lai Jiangshan <laijs@cn.fujitsu.com> | 2010-10-20 02:13:06 -0400 |
---|---|---|
committer | Paul E. McKenney <paulmck@linux.vnet.ibm.com> | 2010-11-30 01:01:58 -0500 |
commit | 29494be71afe2a16ad04e344306a620d7cc22d06 (patch) | |
tree | d16a2acd1566be326483d5217d39b78d2ef798c5 /kernel | |
parent | 7b27d5475f86186914e54e4a6bb994e9a985337b (diff) |
rcu,cleanup: simplify the code when cpu is dying
When we handle the CPU_DYING notifier, the whole system is stopped except
for the current CPU. We therefore need no synchronization with the other
CPUs. This allows us to move any orphaned RCU callbacks directly to the
list of any online CPU without needing to run them through the global
orphan lists. These global orphan lists can therefore be dispensed with.
This commit makes thes changes, though currently victimizes CPU 0 @@@.
Signed-off-by: Lai Jiangshan <laijs@cn.fujitsu.com>
Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Diffstat (limited to 'kernel')
-rw-r--r-- | kernel/rcutree.c | 81 | ||||
-rw-r--r-- | kernel/rcutree.h | 16 | ||||
-rw-r--r-- | kernel/rcutree_plugin.h | 8 | ||||
-rw-r--r-- | kernel/rcutree_trace.c | 4 |
4 files changed, 31 insertions, 78 deletions
diff --git a/kernel/rcutree.c b/kernel/rcutree.c index ccdc04c47981..669d7fe049d1 100644 --- a/kernel/rcutree.c +++ b/kernel/rcutree.c | |||
@@ -67,9 +67,6 @@ static struct lock_class_key rcu_node_class[NUM_RCU_LVLS]; | |||
67 | .gpnum = -300, \ | 67 | .gpnum = -300, \ |
68 | .completed = -300, \ | 68 | .completed = -300, \ |
69 | .onofflock = __RAW_SPIN_LOCK_UNLOCKED(&structname.onofflock), \ | 69 | .onofflock = __RAW_SPIN_LOCK_UNLOCKED(&structname.onofflock), \ |
70 | .orphan_cbs_list = NULL, \ | ||
71 | .orphan_cbs_tail = &structname.orphan_cbs_list, \ | ||
72 | .orphan_qlen = 0, \ | ||
73 | .fqslock = __RAW_SPIN_LOCK_UNLOCKED(&structname.fqslock), \ | 70 | .fqslock = __RAW_SPIN_LOCK_UNLOCKED(&structname.fqslock), \ |
74 | .n_force_qs = 0, \ | 71 | .n_force_qs = 0, \ |
75 | .n_force_qs_ngp = 0, \ | 72 | .n_force_qs_ngp = 0, \ |
@@ -984,53 +981,31 @@ rcu_check_quiescent_state(struct rcu_state *rsp, struct rcu_data *rdp) | |||
984 | #ifdef CONFIG_HOTPLUG_CPU | 981 | #ifdef CONFIG_HOTPLUG_CPU |
985 | 982 | ||
986 | /* | 983 | /* |
987 | * Move a dying CPU's RCU callbacks to the ->orphan_cbs_list for the | 984 | * Move a dying CPU's RCU callbacks to online CPU's callback list. |
988 | * specified flavor of RCU. The callbacks will be adopted by the next | 985 | * Synchronization is not required because this function executes |
989 | * _rcu_barrier() invocation or by the CPU_DEAD notifier, whichever | 986 | * in stop_machine() context. |
990 | * comes first. Because this is invoked from the CPU_DYING notifier, | ||
991 | * irqs are already disabled. | ||
992 | */ | 987 | */ |
993 | static void rcu_send_cbs_to_orphanage(struct rcu_state *rsp) | 988 | static void rcu_send_cbs_to_online(struct rcu_state *rsp) |
994 | { | 989 | { |
995 | int i; | 990 | int i; |
991 | /* current DYING CPU is cleared in the cpu_online_mask */ | ||
992 | int receive_cpu = cpumask_any(cpu_online_mask); | ||
996 | struct rcu_data *rdp = this_cpu_ptr(rsp->rda); | 993 | struct rcu_data *rdp = this_cpu_ptr(rsp->rda); |
994 | struct rcu_data *receive_rdp = per_cpu_ptr(rsp->rda, receive_cpu); | ||
997 | 995 | ||
998 | if (rdp->nxtlist == NULL) | 996 | if (rdp->nxtlist == NULL) |
999 | return; /* irqs disabled, so comparison is stable. */ | 997 | return; /* irqs disabled, so comparison is stable. */ |
1000 | raw_spin_lock(&rsp->onofflock); /* irqs already disabled. */ | 998 | |
1001 | *rsp->orphan_cbs_tail = rdp->nxtlist; | 999 | *receive_rdp->nxttail[RCU_NEXT_TAIL] = rdp->nxtlist; |
1002 | rsp->orphan_cbs_tail = rdp->nxttail[RCU_NEXT_TAIL]; | 1000 | receive_rdp->nxttail[RCU_NEXT_TAIL] = rdp->nxttail[RCU_NEXT_TAIL]; |
1001 | receive_rdp->qlen += rdp->qlen; | ||
1002 | receive_rdp->n_cbs_adopted += rdp->qlen; | ||
1003 | rdp->n_cbs_orphaned += rdp->qlen; | ||
1004 | |||
1003 | rdp->nxtlist = NULL; | 1005 | rdp->nxtlist = NULL; |
1004 | for (i = 0; i < RCU_NEXT_SIZE; i++) | 1006 | for (i = 0; i < RCU_NEXT_SIZE; i++) |
1005 | rdp->nxttail[i] = &rdp->nxtlist; | 1007 | rdp->nxttail[i] = &rdp->nxtlist; |
1006 | rsp->orphan_qlen += rdp->qlen; | ||
1007 | rdp->n_cbs_orphaned += rdp->qlen; | ||
1008 | rdp->qlen = 0; | 1008 | rdp->qlen = 0; |
1009 | raw_spin_unlock(&rsp->onofflock); /* irqs remain disabled. */ | ||
1010 | } | ||
1011 | |||
1012 | /* | ||
1013 | * Adopt previously orphaned RCU callbacks. | ||
1014 | */ | ||
1015 | static void rcu_adopt_orphan_cbs(struct rcu_state *rsp) | ||
1016 | { | ||
1017 | unsigned long flags; | ||
1018 | struct rcu_data *rdp; | ||
1019 | |||
1020 | raw_spin_lock_irqsave(&rsp->onofflock, flags); | ||
1021 | rdp = this_cpu_ptr(rsp->rda); | ||
1022 | if (rsp->orphan_cbs_list == NULL) { | ||
1023 | raw_spin_unlock_irqrestore(&rsp->onofflock, flags); | ||
1024 | return; | ||
1025 | } | ||
1026 | *rdp->nxttail[RCU_NEXT_TAIL] = rsp->orphan_cbs_list; | ||
1027 | rdp->nxttail[RCU_NEXT_TAIL] = rsp->orphan_cbs_tail; | ||
1028 | rdp->qlen += rsp->orphan_qlen; | ||
1029 | rdp->n_cbs_adopted += rsp->orphan_qlen; | ||
1030 | rsp->orphan_cbs_list = NULL; | ||
1031 | rsp->orphan_cbs_tail = &rsp->orphan_cbs_list; | ||
1032 | rsp->orphan_qlen = 0; | ||
1033 | raw_spin_unlock_irqrestore(&rsp->onofflock, flags); | ||
1034 | } | 1009 | } |
1035 | 1010 | ||
1036 | /* | 1011 | /* |
@@ -1081,8 +1056,6 @@ static void __rcu_offline_cpu(int cpu, struct rcu_state *rsp) | |||
1081 | raw_spin_unlock_irqrestore(&rnp->lock, flags); | 1056 | raw_spin_unlock_irqrestore(&rnp->lock, flags); |
1082 | if (need_report & RCU_OFL_TASKS_EXP_GP) | 1057 | if (need_report & RCU_OFL_TASKS_EXP_GP) |
1083 | rcu_report_exp_rnp(rsp, rnp); | 1058 | rcu_report_exp_rnp(rsp, rnp); |
1084 | |||
1085 | rcu_adopt_orphan_cbs(rsp); | ||
1086 | } | 1059 | } |
1087 | 1060 | ||
1088 | /* | 1061 | /* |
@@ -1100,11 +1073,7 @@ static void rcu_offline_cpu(int cpu) | |||
1100 | 1073 | ||
1101 | #else /* #ifdef CONFIG_HOTPLUG_CPU */ | 1074 | #else /* #ifdef CONFIG_HOTPLUG_CPU */ |
1102 | 1075 | ||
1103 | static void rcu_send_cbs_to_orphanage(struct rcu_state *rsp) | 1076 | static void rcu_send_cbs_to_online(struct rcu_state *rsp) |
1104 | { | ||
1105 | } | ||
1106 | |||
1107 | static void rcu_adopt_orphan_cbs(struct rcu_state *rsp) | ||
1108 | { | 1077 | { |
1109 | } | 1078 | } |
1110 | 1079 | ||
@@ -1702,10 +1671,7 @@ static void _rcu_barrier(struct rcu_state *rsp, | |||
1702 | * early. | 1671 | * early. |
1703 | */ | 1672 | */ |
1704 | atomic_set(&rcu_barrier_cpu_count, 1); | 1673 | atomic_set(&rcu_barrier_cpu_count, 1); |
1705 | preempt_disable(); /* stop CPU_DYING from filling orphan_cbs_list */ | ||
1706 | rcu_adopt_orphan_cbs(rsp); | ||
1707 | on_each_cpu(rcu_barrier_func, (void *)call_rcu_func, 1); | 1674 | on_each_cpu(rcu_barrier_func, (void *)call_rcu_func, 1); |
1708 | preempt_enable(); /* CPU_DYING can again fill orphan_cbs_list */ | ||
1709 | if (atomic_dec_and_test(&rcu_barrier_cpu_count)) | 1675 | if (atomic_dec_and_test(&rcu_barrier_cpu_count)) |
1710 | complete(&rcu_barrier_completion); | 1676 | complete(&rcu_barrier_completion); |
1711 | wait_for_completion(&rcu_barrier_completion); | 1677 | wait_for_completion(&rcu_barrier_completion); |
@@ -1831,18 +1797,13 @@ static int __cpuinit rcu_cpu_notify(struct notifier_block *self, | |||
1831 | case CPU_DYING: | 1797 | case CPU_DYING: |
1832 | case CPU_DYING_FROZEN: | 1798 | case CPU_DYING_FROZEN: |
1833 | /* | 1799 | /* |
1834 | * preempt_disable() in _rcu_barrier() prevents stop_machine(), | 1800 | * The whole machine is "stopped" except this cpu, so we can |
1835 | * so when "on_each_cpu(rcu_barrier_func, (void *)type, 1);" | 1801 | * touch any data without introducing corruption. And we send |
1836 | * returns, all online cpus have queued rcu_barrier_func(). | 1802 | * the callbacks to an attribute chosen online cpu. |
1837 | * The dying CPU clears its cpu_online_mask bit and | ||
1838 | * moves all of its RCU callbacks to ->orphan_cbs_list | ||
1839 | * in the context of stop_machine(), so subsequent calls | ||
1840 | * to _rcu_barrier() will adopt these callbacks and only | ||
1841 | * then queue rcu_barrier_func() on all remaining CPUs. | ||
1842 | */ | 1803 | */ |
1843 | rcu_send_cbs_to_orphanage(&rcu_bh_state); | 1804 | rcu_send_cbs_to_online(&rcu_bh_state); |
1844 | rcu_send_cbs_to_orphanage(&rcu_sched_state); | 1805 | rcu_send_cbs_to_online(&rcu_sched_state); |
1845 | rcu_preempt_send_cbs_to_orphanage(); | 1806 | rcu_preempt_send_cbs_to_online(); |
1846 | break; | 1807 | break; |
1847 | case CPU_DEAD: | 1808 | case CPU_DEAD: |
1848 | case CPU_DEAD_FROZEN: | 1809 | case CPU_DEAD_FROZEN: |
diff --git a/kernel/rcutree.h b/kernel/rcutree.h index 91d4170c5c13..1a54be2a902f 100644 --- a/kernel/rcutree.h +++ b/kernel/rcutree.h | |||
@@ -203,8 +203,8 @@ struct rcu_data { | |||
203 | long qlen_last_fqs_check; | 203 | long qlen_last_fqs_check; |
204 | /* qlen at last check for QS forcing */ | 204 | /* qlen at last check for QS forcing */ |
205 | unsigned long n_cbs_invoked; /* count of RCU cbs invoked. */ | 205 | unsigned long n_cbs_invoked; /* count of RCU cbs invoked. */ |
206 | unsigned long n_cbs_orphaned; /* RCU cbs sent to orphanage. */ | 206 | unsigned long n_cbs_orphaned; /* RCU cbs orphaned by dying CPU */ |
207 | unsigned long n_cbs_adopted; /* RCU cbs adopted from orphanage. */ | 207 | unsigned long n_cbs_adopted; /* RCU cbs adopted from dying CPU */ |
208 | unsigned long n_force_qs_snap; | 208 | unsigned long n_force_qs_snap; |
209 | /* did other CPU force QS recently? */ | 209 | /* did other CPU force QS recently? */ |
210 | long blimit; /* Upper limit on a processed batch */ | 210 | long blimit; /* Upper limit on a processed batch */ |
@@ -309,15 +309,7 @@ struct rcu_state { | |||
309 | /* End of fields guarded by root rcu_node's lock. */ | 309 | /* End of fields guarded by root rcu_node's lock. */ |
310 | 310 | ||
311 | raw_spinlock_t onofflock; /* exclude on/offline and */ | 311 | raw_spinlock_t onofflock; /* exclude on/offline and */ |
312 | /* starting new GP. Also */ | 312 | /* starting new GP. */ |
313 | /* protects the following */ | ||
314 | /* orphan_cbs fields. */ | ||
315 | struct rcu_head *orphan_cbs_list; /* list of rcu_head structs */ | ||
316 | /* orphaned by all CPUs in */ | ||
317 | /* a given leaf rcu_node */ | ||
318 | /* going offline. */ | ||
319 | struct rcu_head **orphan_cbs_tail; /* And tail pointer. */ | ||
320 | long orphan_qlen; /* Number of orphaned cbs. */ | ||
321 | raw_spinlock_t fqslock; /* Only one task forcing */ | 313 | raw_spinlock_t fqslock; /* Only one task forcing */ |
322 | /* quiescent states. */ | 314 | /* quiescent states. */ |
323 | unsigned long jiffies_force_qs; /* Time at which to invoke */ | 315 | unsigned long jiffies_force_qs; /* Time at which to invoke */ |
@@ -390,7 +382,7 @@ static void rcu_report_exp_rnp(struct rcu_state *rsp, struct rcu_node *rnp); | |||
390 | static int rcu_preempt_pending(int cpu); | 382 | static int rcu_preempt_pending(int cpu); |
391 | static int rcu_preempt_needs_cpu(int cpu); | 383 | static int rcu_preempt_needs_cpu(int cpu); |
392 | static void __cpuinit rcu_preempt_init_percpu_data(int cpu); | 384 | static void __cpuinit rcu_preempt_init_percpu_data(int cpu); |
393 | static void rcu_preempt_send_cbs_to_orphanage(void); | 385 | static void rcu_preempt_send_cbs_to_online(void); |
394 | static void __init __rcu_init_preempt(void); | 386 | static void __init __rcu_init_preempt(void); |
395 | static void rcu_needs_cpu_flush(void); | 387 | static void rcu_needs_cpu_flush(void); |
396 | 388 | ||
diff --git a/kernel/rcutree_plugin.h b/kernel/rcutree_plugin.h index 21df7f3e7273..0de359be5b41 100644 --- a/kernel/rcutree_plugin.h +++ b/kernel/rcutree_plugin.h | |||
@@ -774,11 +774,11 @@ static void __cpuinit rcu_preempt_init_percpu_data(int cpu) | |||
774 | } | 774 | } |
775 | 775 | ||
776 | /* | 776 | /* |
777 | * Move preemptable RCU's callbacks to ->orphan_cbs_list. | 777 | * Move preemptable DYING RCU's callbacks to other online CPU. |
778 | */ | 778 | */ |
779 | static void rcu_preempt_send_cbs_to_orphanage(void) | 779 | static void rcu_preempt_send_cbs_to_online(void) |
780 | { | 780 | { |
781 | rcu_send_cbs_to_orphanage(&rcu_preempt_state); | 781 | rcu_send_cbs_to_online(&rcu_preempt_state); |
782 | } | 782 | } |
783 | 783 | ||
784 | /* | 784 | /* |
@@ -1002,7 +1002,7 @@ static void __cpuinit rcu_preempt_init_percpu_data(int cpu) | |||
1002 | /* | 1002 | /* |
1003 | * Because there is no preemptable RCU, there are no callbacks to move. | 1003 | * Because there is no preemptable RCU, there are no callbacks to move. |
1004 | */ | 1004 | */ |
1005 | static void rcu_preempt_send_cbs_to_orphanage(void) | 1005 | static void rcu_preempt_send_cbs_to_online(void) |
1006 | { | 1006 | { |
1007 | } | 1007 | } |
1008 | 1008 | ||
diff --git a/kernel/rcutree_trace.c b/kernel/rcutree_trace.c index 78ad3c35b683..c8e97853b970 100644 --- a/kernel/rcutree_trace.c +++ b/kernel/rcutree_trace.c | |||
@@ -166,13 +166,13 @@ static void print_one_rcu_state(struct seq_file *m, struct rcu_state *rsp) | |||
166 | 166 | ||
167 | gpnum = rsp->gpnum; | 167 | gpnum = rsp->gpnum; |
168 | seq_printf(m, "c=%lu g=%lu s=%d jfq=%ld j=%x " | 168 | seq_printf(m, "c=%lu g=%lu s=%d jfq=%ld j=%x " |
169 | "nfqs=%lu/nfqsng=%lu(%lu) fqlh=%lu oqlen=%ld\n", | 169 | "nfqs=%lu/nfqsng=%lu(%lu) fqlh=%lu\n", |
170 | rsp->completed, gpnum, rsp->signaled, | 170 | rsp->completed, gpnum, rsp->signaled, |
171 | (long)(rsp->jiffies_force_qs - jiffies), | 171 | (long)(rsp->jiffies_force_qs - jiffies), |
172 | (int)(jiffies & 0xffff), | 172 | (int)(jiffies & 0xffff), |
173 | rsp->n_force_qs, rsp->n_force_qs_ngp, | 173 | rsp->n_force_qs, rsp->n_force_qs_ngp, |
174 | rsp->n_force_qs - rsp->n_force_qs_ngp, | 174 | rsp->n_force_qs - rsp->n_force_qs_ngp, |
175 | rsp->n_force_qs_lh, rsp->orphan_qlen); | 175 | rsp->n_force_qs_lh); |
176 | for (rnp = &rsp->node[0]; rnp - &rsp->node[0] < NUM_RCU_NODES; rnp++) { | 176 | for (rnp = &rsp->node[0]; rnp - &rsp->node[0] < NUM_RCU_NODES; rnp++) { |
177 | if (rnp->level != level) { | 177 | if (rnp->level != level) { |
178 | seq_puts(m, "\n"); | 178 | seq_puts(m, "\n"); |