diff options
Diffstat (limited to 'kernel')
| -rw-r--r-- | kernel/rcutree.c | 190 | ||||
| -rw-r--r-- | kernel/rcutree.h | 3 |
2 files changed, 129 insertions, 64 deletions
diff --git a/kernel/rcutree.c b/kernel/rcutree.c index f7bcd9e6c054..4792f1642bf2 100644 --- a/kernel/rcutree.c +++ b/kernel/rcutree.c | |||
| @@ -1042,6 +1042,102 @@ rcu_start_gp_per_cpu(struct rcu_state *rsp, struct rcu_node *rnp, struct rcu_dat | |||
| 1042 | } | 1042 | } |
| 1043 | 1043 | ||
| 1044 | /* | 1044 | /* |
| 1045 | * Body of kthread that handles grace periods. | ||
| 1046 | */ | ||
| 1047 | static int rcu_gp_kthread(void *arg) | ||
| 1048 | { | ||
| 1049 | struct rcu_data *rdp; | ||
| 1050 | struct rcu_node *rnp; | ||
| 1051 | struct rcu_state *rsp = arg; | ||
| 1052 | |||
| 1053 | for (;;) { | ||
| 1054 | |||
| 1055 | /* Handle grace-period start. */ | ||
| 1056 | rnp = rcu_get_root(rsp); | ||
| 1057 | for (;;) { | ||
| 1058 | wait_event_interruptible(rsp->gp_wq, rsp->gp_flags); | ||
| 1059 | if (rsp->gp_flags) | ||
| 1060 | break; | ||
| 1061 | flush_signals(current); | ||
| 1062 | } | ||
| 1063 | raw_spin_lock_irq(&rnp->lock); | ||
| 1064 | rsp->gp_flags = 0; | ||
| 1065 | rdp = this_cpu_ptr(rsp->rda); | ||
| 1066 | |||
| 1067 | if (rcu_gp_in_progress(rsp)) { | ||
| 1068 | /* | ||
| 1069 | * A grace period is already in progress, so | ||
| 1070 | * don't start another one. | ||
| 1071 | */ | ||
| 1072 | raw_spin_unlock_irq(&rnp->lock); | ||
| 1073 | continue; | ||
| 1074 | } | ||
| 1075 | |||
| 1076 | if (rsp->fqs_active) { | ||
| 1077 | /* | ||
| 1078 | * We need a grace period, but force_quiescent_state() | ||
| 1079 | * is running. Tell it to start one on our behalf. | ||
| 1080 | */ | ||
| 1081 | rsp->fqs_need_gp = 1; | ||
| 1082 | raw_spin_unlock_irq(&rnp->lock); | ||
| 1083 | continue; | ||
| 1084 | } | ||
| 1085 | |||
| 1086 | /* Advance to a new grace period and initialize state. */ | ||
| 1087 | rsp->gpnum++; | ||
| 1088 | trace_rcu_grace_period(rsp->name, rsp->gpnum, "start"); | ||
| 1089 | WARN_ON_ONCE(rsp->fqs_state == RCU_GP_INIT); | ||
| 1090 | rsp->fqs_state = RCU_GP_INIT; /* Stop force_quiescent_state. */ | ||
| 1091 | rsp->jiffies_force_qs = jiffies + RCU_JIFFIES_TILL_FORCE_QS; | ||
| 1092 | record_gp_stall_check_time(rsp); | ||
| 1093 | raw_spin_unlock(&rnp->lock); /* leave irqs disabled. */ | ||
| 1094 | |||
| 1095 | /* Exclude any concurrent CPU-hotplug operations. */ | ||
| 1096 | raw_spin_lock(&rsp->onofflock); /* irqs already disabled. */ | ||
| 1097 | |||
| 1098 | /* | ||
| 1099 | * Set the quiescent-state-needed bits in all the rcu_node | ||
| 1100 | * structures for all currently online CPUs in breadth-first | ||
| 1101 | * order, starting from the root rcu_node structure. | ||
| 1102 | * This operation relies on the layout of the hierarchy | ||
| 1103 | * within the rsp->node[] array. Note that other CPUs will | ||
| 1104 | * access only the leaves of the hierarchy, which still | ||
| 1105 | * indicate that no grace period is in progress, at least | ||
| 1106 | * until the corresponding leaf node has been initialized. | ||
| 1107 | * In addition, we have excluded CPU-hotplug operations. | ||
| 1108 | * | ||
| 1109 | * Note that the grace period cannot complete until | ||
| 1110 | * we finish the initialization process, as there will | ||
| 1111 | * be at least one qsmask bit set in the root node until | ||
| 1112 | * that time, namely the one corresponding to this CPU, | ||
| 1113 | * due to the fact that we have irqs disabled. | ||
| 1114 | */ | ||
| 1115 | rcu_for_each_node_breadth_first(rsp, rnp) { | ||
| 1116 | raw_spin_lock(&rnp->lock); /* irqs already disabled. */ | ||
| 1117 | rcu_preempt_check_blocked_tasks(rnp); | ||
| 1118 | rnp->qsmask = rnp->qsmaskinit; | ||
| 1119 | rnp->gpnum = rsp->gpnum; | ||
| 1120 | rnp->completed = rsp->completed; | ||
| 1121 | if (rnp == rdp->mynode) | ||
| 1122 | rcu_start_gp_per_cpu(rsp, rnp, rdp); | ||
| 1123 | rcu_preempt_boost_start_gp(rnp); | ||
| 1124 | trace_rcu_grace_period_init(rsp->name, rnp->gpnum, | ||
| 1125 | rnp->level, rnp->grplo, | ||
| 1126 | rnp->grphi, rnp->qsmask); | ||
| 1127 | raw_spin_unlock(&rnp->lock); /* irqs remain disabled. */ | ||
| 1128 | } | ||
| 1129 | |||
| 1130 | rnp = rcu_get_root(rsp); | ||
| 1131 | raw_spin_lock(&rnp->lock); /* irqs already disabled. */ | ||
| 1132 | /* force_quiescent_state() now OK. */ | ||
| 1133 | rsp->fqs_state = RCU_SIGNAL_INIT; | ||
| 1134 | raw_spin_unlock(&rnp->lock); /* irqs remain disabled. */ | ||
| 1135 | raw_spin_unlock_irq(&rsp->onofflock); | ||
| 1136 | } | ||
| 1137 | return 0; | ||
| 1138 | } | ||
| 1139 | |||
| 1140 | /* | ||
| 1045 | * Start a new RCU grace period if warranted, re-initializing the hierarchy | 1141 | * Start a new RCU grace period if warranted, re-initializing the hierarchy |
| 1046 | * in preparation for detecting the next grace period. The caller must hold | 1142 | * in preparation for detecting the next grace period. The caller must hold |
| 1047 | * the root node's ->lock, which is released before return. Hard irqs must | 1143 | * the root node's ->lock, which is released before return. Hard irqs must |
| @@ -1058,77 +1154,20 @@ rcu_start_gp(struct rcu_state *rsp, unsigned long flags) | |||
| 1058 | struct rcu_data *rdp = this_cpu_ptr(rsp->rda); | 1154 | struct rcu_data *rdp = this_cpu_ptr(rsp->rda); |
| 1059 | struct rcu_node *rnp = rcu_get_root(rsp); | 1155 | struct rcu_node *rnp = rcu_get_root(rsp); |
| 1060 | 1156 | ||
| 1061 | if (!rcu_scheduler_fully_active || | 1157 | if (!rsp->gp_kthread || |
| 1062 | !cpu_needs_another_gp(rsp, rdp)) { | 1158 | !cpu_needs_another_gp(rsp, rdp)) { |
| 1063 | /* | 1159 | /* |
| 1064 | * Either the scheduler hasn't yet spawned the first | 1160 | * Either we have not yet spawned the grace-period |
| 1065 | * non-idle task or this CPU does not need another | 1161 | * task or this CPU does not need another grace period. |
| 1066 | * grace period. Either way, don't start a new grace | 1162 | * Either way, don't start a new grace period. |
| 1067 | * period. | ||
| 1068 | */ | 1163 | */ |
| 1069 | raw_spin_unlock_irqrestore(&rnp->lock, flags); | 1164 | raw_spin_unlock_irqrestore(&rnp->lock, flags); |
| 1070 | return; | 1165 | return; |
| 1071 | } | 1166 | } |
| 1072 | 1167 | ||
| 1073 | if (rsp->fqs_active) { | 1168 | rsp->gp_flags = 1; |
| 1074 | /* | 1169 | raw_spin_unlock_irqrestore(&rnp->lock, flags); |
| 1075 | * This CPU needs a grace period, but force_quiescent_state() | 1170 | wake_up(&rsp->gp_wq); |
| 1076 | * is running. Tell it to start one on this CPU's behalf. | ||
| 1077 | */ | ||
| 1078 | rsp->fqs_need_gp = 1; | ||
| 1079 | raw_spin_unlock_irqrestore(&rnp->lock, flags); | ||
| 1080 | return; | ||
| 1081 | } | ||
| 1082 | |||
| 1083 | /* Advance to a new grace period and initialize state. */ | ||
| 1084 | rsp->gpnum++; | ||
| 1085 | trace_rcu_grace_period(rsp->name, rsp->gpnum, "start"); | ||
| 1086 | WARN_ON_ONCE(rsp->fqs_state == RCU_GP_INIT); | ||
| 1087 | rsp->fqs_state = RCU_GP_INIT; /* Hold off force_quiescent_state. */ | ||
| 1088 | rsp->jiffies_force_qs = jiffies + RCU_JIFFIES_TILL_FORCE_QS; | ||
| 1089 | record_gp_stall_check_time(rsp); | ||
| 1090 | raw_spin_unlock(&rnp->lock); /* leave irqs disabled. */ | ||
| 1091 | |||
| 1092 | /* Exclude any concurrent CPU-hotplug operations. */ | ||
| 1093 | raw_spin_lock(&rsp->onofflock); /* irqs already disabled. */ | ||
| 1094 | |||
| 1095 | /* | ||
| 1096 | * Set the quiescent-state-needed bits in all the rcu_node | ||
| 1097 | * structures for all currently online CPUs in breadth-first | ||
| 1098 | * order, starting from the root rcu_node structure. This | ||
| 1099 | * operation relies on the layout of the hierarchy within the | ||
| 1100 | * rsp->node[] array. Note that other CPUs will access only | ||
| 1101 | * the leaves of the hierarchy, which still indicate that no | ||
| 1102 | * grace period is in progress, at least until the corresponding | ||
| 1103 | * leaf node has been initialized. In addition, we have excluded | ||
| 1104 | * CPU-hotplug operations. | ||
| 1105 | * | ||
| 1106 | * Note that the grace period cannot complete until we finish | ||
| 1107 | * the initialization process, as there will be at least one | ||
| 1108 | * qsmask bit set in the root node until that time, namely the | ||
| 1109 | * one corresponding to this CPU, due to the fact that we have | ||
| 1110 | * irqs disabled. | ||
| 1111 | */ | ||
| 1112 | rcu_for_each_node_breadth_first(rsp, rnp) { | ||
| 1113 | raw_spin_lock(&rnp->lock); /* irqs already disabled. */ | ||
| 1114 | rcu_preempt_check_blocked_tasks(rnp); | ||
| 1115 | rnp->qsmask = rnp->qsmaskinit; | ||
| 1116 | rnp->gpnum = rsp->gpnum; | ||
| 1117 | rnp->completed = rsp->completed; | ||
| 1118 | if (rnp == rdp->mynode) | ||
| 1119 | rcu_start_gp_per_cpu(rsp, rnp, rdp); | ||
| 1120 | rcu_preempt_boost_start_gp(rnp); | ||
| 1121 | trace_rcu_grace_period_init(rsp->name, rnp->gpnum, | ||
| 1122 | rnp->level, rnp->grplo, | ||
| 1123 | rnp->grphi, rnp->qsmask); | ||
| 1124 | raw_spin_unlock(&rnp->lock); /* irqs remain disabled. */ | ||
| 1125 | } | ||
| 1126 | |||
| 1127 | rnp = rcu_get_root(rsp); | ||
| 1128 | raw_spin_lock(&rnp->lock); /* irqs already disabled. */ | ||
| 1129 | rsp->fqs_state = RCU_SIGNAL_INIT; /* force_quiescent_state now OK. */ | ||
| 1130 | raw_spin_unlock(&rnp->lock); /* irqs remain disabled. */ | ||
| 1131 | raw_spin_unlock_irqrestore(&rsp->onofflock, flags); | ||
| 1132 | } | 1171 | } |
| 1133 | 1172 | ||
| 1134 | /* | 1173 | /* |
| @@ -2629,6 +2668,28 @@ static int __cpuinit rcu_cpu_notify(struct notifier_block *self, | |||
| 2629 | } | 2668 | } |
| 2630 | 2669 | ||
| 2631 | /* | 2670 | /* |
| 2671 | * Spawn the kthread that handles this RCU flavor's grace periods. | ||
| 2672 | */ | ||
| 2673 | static int __init rcu_spawn_gp_kthread(void) | ||
| 2674 | { | ||
| 2675 | unsigned long flags; | ||
| 2676 | struct rcu_node *rnp; | ||
| 2677 | struct rcu_state *rsp; | ||
| 2678 | struct task_struct *t; | ||
| 2679 | |||
| 2680 | for_each_rcu_flavor(rsp) { | ||
| 2681 | t = kthread_run(rcu_gp_kthread, rsp, rsp->name); | ||
| 2682 | BUG_ON(IS_ERR(t)); | ||
| 2683 | rnp = rcu_get_root(rsp); | ||
| 2684 | raw_spin_lock_irqsave(&rnp->lock, flags); | ||
| 2685 | rsp->gp_kthread = t; | ||
| 2686 | raw_spin_unlock_irqrestore(&rnp->lock, flags); | ||
| 2687 | } | ||
| 2688 | return 0; | ||
| 2689 | } | ||
| 2690 | early_initcall(rcu_spawn_gp_kthread); | ||
| 2691 | |||
| 2692 | /* | ||
| 2632 | * This function is invoked towards the end of the scheduler's initialization | 2693 | * This function is invoked towards the end of the scheduler's initialization |
| 2633 | * process. Before this is called, the idle task might contain | 2694 | * process. Before this is called, the idle task might contain |
| 2634 | * RCU read-side critical sections (during which time, this idle | 2695 | * RCU read-side critical sections (during which time, this idle |
| @@ -2729,6 +2790,7 @@ static void __init rcu_init_one(struct rcu_state *rsp, | |||
| 2729 | } | 2790 | } |
| 2730 | 2791 | ||
| 2731 | rsp->rda = rda; | 2792 | rsp->rda = rda; |
| 2793 | init_waitqueue_head(&rsp->gp_wq); | ||
| 2732 | rnp = rsp->level[rcu_num_lvls - 1]; | 2794 | rnp = rsp->level[rcu_num_lvls - 1]; |
| 2733 | for_each_possible_cpu(i) { | 2795 | for_each_possible_cpu(i) { |
| 2734 | while (i > rnp->grphi) | 2796 | while (i > rnp->grphi) |
diff --git a/kernel/rcutree.h b/kernel/rcutree.h index 4d29169f2124..117a15019e99 100644 --- a/kernel/rcutree.h +++ b/kernel/rcutree.h | |||
| @@ -385,6 +385,9 @@ struct rcu_state { | |||
| 385 | u8 boost; /* Subject to priority boost. */ | 385 | u8 boost; /* Subject to priority boost. */ |
| 386 | unsigned long gpnum; /* Current gp number. */ | 386 | unsigned long gpnum; /* Current gp number. */ |
| 387 | unsigned long completed; /* # of last completed gp. */ | 387 | unsigned long completed; /* # of last completed gp. */ |
| 388 | struct task_struct *gp_kthread; /* Task for grace periods. */ | ||
| 389 | wait_queue_head_t gp_wq; /* Where GP task waits. */ | ||
| 390 | int gp_flags; /* Commands for GP task. */ | ||
| 388 | 391 | ||
| 389 | /* End of fields guarded by root rcu_node's lock. */ | 392 | /* End of fields guarded by root rcu_node's lock. */ |
| 390 | 393 | ||
