aboutsummaryrefslogtreecommitdiffstats
path: root/kernel/rcutree_plugin.h
diff options
context:
space:
mode:
Diffstat (limited to 'kernel/rcutree_plugin.h')
-rw-r--r--kernel/rcutree_plugin.h314
1 files changed, 303 insertions, 11 deletions
diff --git a/kernel/rcutree_plugin.h b/kernel/rcutree_plugin.h
index b9bd69a5a4fe..5964f82e2d96 100644
--- a/kernel/rcutree_plugin.h
+++ b/kernel/rcutree_plugin.h
@@ -66,6 +66,7 @@ static void __init rcu_bootup_announce_oddness(void)
66 66
67struct rcu_state rcu_preempt_state = RCU_STATE_INITIALIZER(rcu_preempt_state); 67struct rcu_state rcu_preempt_state = RCU_STATE_INITIALIZER(rcu_preempt_state);
68DEFINE_PER_CPU(struct rcu_data, rcu_preempt_data); 68DEFINE_PER_CPU(struct rcu_data, rcu_preempt_data);
69static struct rcu_state *rcu_state = &rcu_preempt_state;
69 70
70static int rcu_preempted_readers_exp(struct rcu_node *rnp); 71static int rcu_preempted_readers_exp(struct rcu_node *rnp);
71 72
@@ -179,6 +180,10 @@ static void rcu_preempt_note_context_switch(int cpu)
179 if ((rnp->qsmask & rdp->grpmask) && rnp->gp_tasks != NULL) { 180 if ((rnp->qsmask & rdp->grpmask) && rnp->gp_tasks != NULL) {
180 list_add(&t->rcu_node_entry, rnp->gp_tasks->prev); 181 list_add(&t->rcu_node_entry, rnp->gp_tasks->prev);
181 rnp->gp_tasks = &t->rcu_node_entry; 182 rnp->gp_tasks = &t->rcu_node_entry;
183#ifdef CONFIG_RCU_BOOST
184 if (rnp->boost_tasks != NULL)
185 rnp->boost_tasks = rnp->gp_tasks;
186#endif /* #ifdef CONFIG_RCU_BOOST */
182 } else { 187 } else {
183 list_add(&t->rcu_node_entry, &rnp->blkd_tasks); 188 list_add(&t->rcu_node_entry, &rnp->blkd_tasks);
184 if (rnp->qsmask & rdp->grpmask) 189 if (rnp->qsmask & rdp->grpmask)
@@ -218,7 +223,7 @@ EXPORT_SYMBOL_GPL(__rcu_read_lock);
218 * for the specified rcu_node structure. If the caller needs a reliable 223 * for the specified rcu_node structure. If the caller needs a reliable
219 * answer, it must hold the rcu_node's ->lock. 224 * answer, it must hold the rcu_node's ->lock.
220 */ 225 */
221static int rcu_preempted_readers(struct rcu_node *rnp) 226static int rcu_preempt_blocked_readers_cgp(struct rcu_node *rnp)
222{ 227{
223 return rnp->gp_tasks != NULL; 228 return rnp->gp_tasks != NULL;
224} 229}
@@ -236,7 +241,7 @@ static void rcu_report_unblock_qs_rnp(struct rcu_node *rnp, unsigned long flags)
236 unsigned long mask; 241 unsigned long mask;
237 struct rcu_node *rnp_p; 242 struct rcu_node *rnp_p;
238 243
239 if (rnp->qsmask != 0 || rcu_preempted_readers(rnp)) { 244 if (rnp->qsmask != 0 || rcu_preempt_blocked_readers_cgp(rnp)) {
240 raw_spin_unlock_irqrestore(&rnp->lock, flags); 245 raw_spin_unlock_irqrestore(&rnp->lock, flags);
241 return; /* Still need more quiescent states! */ 246 return; /* Still need more quiescent states! */
242 } 247 }
@@ -325,7 +330,7 @@ static void rcu_read_unlock_special(struct task_struct *t)
325 break; 330 break;
326 raw_spin_unlock(&rnp->lock); /* irqs remain disabled. */ 331 raw_spin_unlock(&rnp->lock); /* irqs remain disabled. */
327 } 332 }
328 empty = !rcu_preempted_readers(rnp); 333 empty = !rcu_preempt_blocked_readers_cgp(rnp);
329 empty_exp = !rcu_preempted_readers_exp(rnp); 334 empty_exp = !rcu_preempted_readers_exp(rnp);
330 smp_mb(); /* ensure expedited fastpath sees end of RCU c-s. */ 335 smp_mb(); /* ensure expedited fastpath sees end of RCU c-s. */
331 np = rcu_next_node_entry(t, rnp); 336 np = rcu_next_node_entry(t, rnp);
@@ -334,6 +339,10 @@ static void rcu_read_unlock_special(struct task_struct *t)
334 rnp->gp_tasks = np; 339 rnp->gp_tasks = np;
335 if (&t->rcu_node_entry == rnp->exp_tasks) 340 if (&t->rcu_node_entry == rnp->exp_tasks)
336 rnp->exp_tasks = np; 341 rnp->exp_tasks = np;
342#ifdef CONFIG_RCU_BOOST
343 if (&t->rcu_node_entry == rnp->boost_tasks)
344 rnp->boost_tasks = np;
345#endif /* #ifdef CONFIG_RCU_BOOST */
337 t->rcu_blocked_node = NULL; 346 t->rcu_blocked_node = NULL;
338 347
339 /* 348 /*
@@ -346,6 +355,15 @@ static void rcu_read_unlock_special(struct task_struct *t)
346 else 355 else
347 rcu_report_unblock_qs_rnp(rnp, flags); 356 rcu_report_unblock_qs_rnp(rnp, flags);
348 357
358#ifdef CONFIG_RCU_BOOST
359 /* Unboost if we were boosted. */
360 if (special & RCU_READ_UNLOCK_BOOSTED) {
361 t->rcu_read_unlock_special &= ~RCU_READ_UNLOCK_BOOSTED;
362 rt_mutex_unlock(t->rcu_boost_mutex);
363 t->rcu_boost_mutex = NULL;
364 }
365#endif /* #ifdef CONFIG_RCU_BOOST */
366
349 /* 367 /*
350 * If this was the last task on the expedited lists, 368 * If this was the last task on the expedited lists,
351 * then we need to report up the rcu_node hierarchy. 369 * then we need to report up the rcu_node hierarchy.
@@ -391,7 +409,7 @@ static void rcu_print_detail_task_stall_rnp(struct rcu_node *rnp)
391 unsigned long flags; 409 unsigned long flags;
392 struct task_struct *t; 410 struct task_struct *t;
393 411
394 if (!rcu_preempted_readers(rnp)) 412 if (!rcu_preempt_blocked_readers_cgp(rnp))
395 return; 413 return;
396 raw_spin_lock_irqsave(&rnp->lock, flags); 414 raw_spin_lock_irqsave(&rnp->lock, flags);
397 t = list_entry(rnp->gp_tasks, 415 t = list_entry(rnp->gp_tasks,
@@ -430,7 +448,7 @@ static void rcu_print_task_stall(struct rcu_node *rnp)
430{ 448{
431 struct task_struct *t; 449 struct task_struct *t;
432 450
433 if (!rcu_preempted_readers(rnp)) 451 if (!rcu_preempt_blocked_readers_cgp(rnp))
434 return; 452 return;
435 t = list_entry(rnp->gp_tasks, 453 t = list_entry(rnp->gp_tasks,
436 struct task_struct, rcu_node_entry); 454 struct task_struct, rcu_node_entry);
@@ -460,7 +478,7 @@ static void rcu_preempt_stall_reset(void)
460 */ 478 */
461static void rcu_preempt_check_blocked_tasks(struct rcu_node *rnp) 479static void rcu_preempt_check_blocked_tasks(struct rcu_node *rnp)
462{ 480{
463 WARN_ON_ONCE(rcu_preempted_readers(rnp)); 481 WARN_ON_ONCE(rcu_preempt_blocked_readers_cgp(rnp));
464 if (!list_empty(&rnp->blkd_tasks)) 482 if (!list_empty(&rnp->blkd_tasks))
465 rnp->gp_tasks = rnp->blkd_tasks.next; 483 rnp->gp_tasks = rnp->blkd_tasks.next;
466 WARN_ON_ONCE(rnp->qsmask); 484 WARN_ON_ONCE(rnp->qsmask);
@@ -509,7 +527,7 @@ static int rcu_preempt_offline_tasks(struct rcu_state *rsp,
509 * absolutely necessary, but this is a good performance/complexity 527 * absolutely necessary, but this is a good performance/complexity
510 * tradeoff. 528 * tradeoff.
511 */ 529 */
512 if (rcu_preempted_readers(rnp)) 530 if (rcu_preempt_blocked_readers_cgp(rnp))
513 retval |= RCU_OFL_TASKS_NORM_GP; 531 retval |= RCU_OFL_TASKS_NORM_GP;
514 if (rcu_preempted_readers_exp(rnp)) 532 if (rcu_preempted_readers_exp(rnp))
515 retval |= RCU_OFL_TASKS_EXP_GP; 533 retval |= RCU_OFL_TASKS_EXP_GP;
@@ -525,8 +543,22 @@ static int rcu_preempt_offline_tasks(struct rcu_state *rsp,
525 rnp_root->gp_tasks = rnp->gp_tasks; 543 rnp_root->gp_tasks = rnp->gp_tasks;
526 if (&t->rcu_node_entry == rnp->exp_tasks) 544 if (&t->rcu_node_entry == rnp->exp_tasks)
527 rnp_root->exp_tasks = rnp->exp_tasks; 545 rnp_root->exp_tasks = rnp->exp_tasks;
546#ifdef CONFIG_RCU_BOOST
547 if (&t->rcu_node_entry == rnp->boost_tasks)
548 rnp_root->boost_tasks = rnp->boost_tasks;
549#endif /* #ifdef CONFIG_RCU_BOOST */
528 raw_spin_unlock(&rnp_root->lock); /* irqs still disabled */ 550 raw_spin_unlock(&rnp_root->lock); /* irqs still disabled */
529 } 551 }
552
553#ifdef CONFIG_RCU_BOOST
554 /* In case root is being boosted and leaf is not. */
555 raw_spin_lock(&rnp_root->lock); /* irqs already disabled */
556 if (rnp_root->boost_tasks != NULL &&
557 rnp_root->boost_tasks != rnp_root->gp_tasks)
558 rnp_root->boost_tasks = rnp_root->gp_tasks;
559 raw_spin_unlock(&rnp_root->lock); /* irqs still disabled */
560#endif /* #ifdef CONFIG_RCU_BOOST */
561
530 rnp->gp_tasks = NULL; 562 rnp->gp_tasks = NULL;
531 rnp->exp_tasks = NULL; 563 rnp->exp_tasks = NULL;
532 return retval; 564 return retval;
@@ -684,6 +716,7 @@ sync_rcu_preempt_exp_init(struct rcu_state *rsp, struct rcu_node *rnp)
684 raw_spin_lock(&rnp->lock); /* irqs already disabled */ 716 raw_spin_lock(&rnp->lock); /* irqs already disabled */
685 if (!list_empty(&rnp->blkd_tasks)) { 717 if (!list_empty(&rnp->blkd_tasks)) {
686 rnp->exp_tasks = rnp->blkd_tasks.next; 718 rnp->exp_tasks = rnp->blkd_tasks.next;
719 rcu_initiate_boost(rnp);
687 must_wait = 1; 720 must_wait = 1;
688 } 721 }
689 raw_spin_unlock(&rnp->lock); /* irqs remain disabled */ 722 raw_spin_unlock(&rnp->lock); /* irqs remain disabled */
@@ -830,6 +863,8 @@ void exit_rcu(void)
830 863
831#else /* #ifdef CONFIG_TREE_PREEMPT_RCU */ 864#else /* #ifdef CONFIG_TREE_PREEMPT_RCU */
832 865
866static struct rcu_state *rcu_state = &rcu_sched_state;
867
833/* 868/*
834 * Tell them what RCU they are running. 869 * Tell them what RCU they are running.
835 */ 870 */
@@ -870,7 +905,7 @@ static void rcu_preempt_note_context_switch(int cpu)
870 * Because preemptable RCU does not exist, there are never any preempted 905 * Because preemptable RCU does not exist, there are never any preempted
871 * RCU readers. 906 * RCU readers.
872 */ 907 */
873static int rcu_preempted_readers(struct rcu_node *rnp) 908static int rcu_preempt_blocked_readers_cgp(struct rcu_node *rnp)
874{ 909{
875 return 0; 910 return 0;
876} 911}
@@ -1034,6 +1069,263 @@ static void __init __rcu_init_preempt(void)
1034 1069
1035#endif /* #else #ifdef CONFIG_TREE_PREEMPT_RCU */ 1070#endif /* #else #ifdef CONFIG_TREE_PREEMPT_RCU */
1036 1071
1072#ifdef CONFIG_RCU_BOOST
1073
1074#include "rtmutex_common.h"
1075
1076/*
1077 * Carry out RCU priority boosting on the task indicated by ->exp_tasks
1078 * or ->boost_tasks, advancing the pointer to the next task in the
1079 * ->blkd_tasks list.
1080 *
1081 * Note that irqs must be enabled: boosting the task can block.
1082 * Returns 1 if there are more tasks needing to be boosted.
1083 */
1084static int rcu_boost(struct rcu_node *rnp)
1085{
1086 unsigned long flags;
1087 struct rt_mutex mtx;
1088 struct task_struct *t;
1089 struct list_head *tb;
1090
1091 if (rnp->exp_tasks == NULL && rnp->boost_tasks == NULL)
1092 return 0; /* Nothing left to boost. */
1093
1094 raw_spin_lock_irqsave(&rnp->lock, flags);
1095
1096 /*
1097 * Recheck under the lock: all tasks in need of boosting
1098 * might exit their RCU read-side critical sections on their own.
1099 */
1100 if (rnp->exp_tasks == NULL && rnp->boost_tasks == NULL) {
1101 raw_spin_unlock_irqrestore(&rnp->lock, flags);
1102 return 0;
1103 }
1104
1105 /*
1106 * Preferentially boost tasks blocking expedited grace periods.
1107 * This cannot starve the normal grace periods because a second
1108 * expedited grace period must boost all blocked tasks, including
1109 * those blocking the pre-existing normal grace period.
1110 */
1111 if (rnp->exp_tasks != NULL)
1112 tb = rnp->exp_tasks;
1113 else
1114 tb = rnp->boost_tasks;
1115
1116 /*
1117 * We boost task t by manufacturing an rt_mutex that appears to
1118 * be held by task t. We leave a pointer to that rt_mutex where
1119 * task t can find it, and task t will release the mutex when it
1120 * exits its outermost RCU read-side critical section. Then
1121 * simply acquiring this artificial rt_mutex will boost task
1122 * t's priority. (Thanks to tglx for suggesting this approach!)
1123 *
1124 * Note that task t must acquire rnp->lock to remove itself from
1125 * the ->blkd_tasks list, which it will do from exit() if from
1126 * nowhere else. We therefore are guaranteed that task t will
1127 * stay around at least until we drop rnp->lock. Note that
1128 * rnp->lock also resolves races between our priority boosting
1129 * and task t's exiting its outermost RCU read-side critical
1130 * section.
1131 */
1132 t = container_of(tb, struct task_struct, rcu_node_entry);
1133 rt_mutex_init_proxy_locked(&mtx, t);
1134 t->rcu_boost_mutex = &mtx;
1135 t->rcu_read_unlock_special |= RCU_READ_UNLOCK_BOOSTED;
1136 raw_spin_unlock_irqrestore(&rnp->lock, flags);
1137 rt_mutex_lock(&mtx); /* Side effect: boosts task t's priority. */
1138 rt_mutex_unlock(&mtx); /* Keep lockdep happy. */
1139
1140 return rnp->exp_tasks != NULL || rnp->boost_tasks != NULL;
1141}
1142
1143/*
1144 * Timer handler to initiate waking up of boost kthreads that
1145 * have yielded the CPU due to excessive numbers of tasks to
1146 * boost. We wake up the per-rcu_node kthread, which in turn
1147 * will wake up the booster kthread.
1148 */
1149static void rcu_boost_kthread_timer(unsigned long arg)
1150{
1151 unsigned long flags;
1152 struct rcu_node *rnp = (struct rcu_node *)arg;
1153
1154 raw_spin_lock_irqsave(&rnp->lock, flags);
1155 invoke_rcu_node_kthread(rnp);
1156 raw_spin_unlock_irqrestore(&rnp->lock, flags);
1157}
1158
1159/*
1160 * Priority-boosting kthread. One per leaf rcu_node and one for the
1161 * root rcu_node.
1162 */
1163static int rcu_boost_kthread(void *arg)
1164{
1165 struct rcu_node *rnp = (struct rcu_node *)arg;
1166 int spincnt = 0;
1167 int more2boost;
1168
1169 for (;;) {
1170 wait_event_interruptible(rnp->boost_wq, rnp->boost_tasks ||
1171 rnp->exp_tasks ||
1172 kthread_should_stop());
1173 if (kthread_should_stop())
1174 break;
1175 more2boost = rcu_boost(rnp);
1176 if (more2boost)
1177 spincnt++;
1178 else
1179 spincnt = 0;
1180 if (spincnt > 10) {
1181 rcu_yield(rcu_boost_kthread_timer, (unsigned long)rnp);
1182 spincnt = 0;
1183 }
1184 }
1185 return 0;
1186}
1187
1188/*
1189 * Check to see if it is time to start boosting RCU readers that are
1190 * blocking the current grace period, and, if so, tell the per-rcu_node
1191 * kthread to start boosting them. If there is an expedited grace
1192 * period in progress, it is always time to boost.
1193 *
1194 * The caller must hold rnp->lock.
1195 */
1196static void rcu_initiate_boost(struct rcu_node *rnp)
1197{
1198 struct task_struct *t;
1199
1200 if (!rcu_preempt_blocked_readers_cgp(rnp) && rnp->exp_tasks == NULL)
1201 return;
1202 if (rnp->exp_tasks != NULL ||
1203 (rnp->gp_tasks != NULL &&
1204 rnp->boost_tasks == NULL &&
1205 rnp->qsmask == 0 &&
1206 ULONG_CMP_GE(jiffies, rnp->boost_time))) {
1207 if (rnp->exp_tasks == NULL)
1208 rnp->boost_tasks = rnp->gp_tasks;
1209 t = rnp->boost_kthread_task;
1210 if (t != NULL)
1211 wake_up_process(t);
1212 }
1213}
1214
1215static void rcu_boost_kthread_setaffinity(struct rcu_node *rnp,
1216 cpumask_var_t cm)
1217{
1218 unsigned long flags;
1219 struct task_struct *t;
1220
1221 raw_spin_lock_irqsave(&rnp->lock, flags);
1222 t = rnp->boost_kthread_task;
1223 if (t != NULL)
1224 set_cpus_allowed_ptr(rnp->boost_kthread_task, cm);
1225 raw_spin_unlock_irqrestore(&rnp->lock, flags);
1226}
1227
1228#define RCU_BOOST_DELAY_JIFFIES DIV_ROUND_UP(CONFIG_RCU_BOOST_DELAY * HZ, 1000)
1229
1230/*
1231 * Do priority-boost accounting for the start of a new grace period.
1232 */
1233static void rcu_preempt_boost_start_gp(struct rcu_node *rnp)
1234{
1235 rnp->boost_time = jiffies + RCU_BOOST_DELAY_JIFFIES;
1236}
1237
1238/*
1239 * Initialize the RCU-boost waitqueue.
1240 */
1241static void __init rcu_init_boost_waitqueue(struct rcu_node *rnp)
1242{
1243 init_waitqueue_head(&rnp->boost_wq);
1244}
1245
1246/*
1247 * Create an RCU-boost kthread for the specified node if one does not
1248 * already exist. We only create this kthread for preemptible RCU.
1249 * Returns zero if all is well, a negated errno otherwise.
1250 */
1251static int __cpuinit rcu_spawn_one_boost_kthread(struct rcu_state *rsp,
1252 struct rcu_node *rnp,
1253 int rnp_index)
1254{
1255 unsigned long flags;
1256 struct sched_param sp;
1257 struct task_struct *t;
1258
1259 if (&rcu_preempt_state != rsp)
1260 return 0;
1261 if (rnp->boost_kthread_task != NULL)
1262 return 0;
1263 t = kthread_create(rcu_boost_kthread, (void *)rnp,
1264 "rcub%d", rnp_index);
1265 if (IS_ERR(t))
1266 return PTR_ERR(t);
1267 raw_spin_lock_irqsave(&rnp->lock, flags);
1268 rnp->boost_kthread_task = t;
1269 raw_spin_unlock_irqrestore(&rnp->lock, flags);
1270 wake_up_process(t);
1271 sp.sched_priority = RCU_KTHREAD_PRIO;
1272 sched_setscheduler_nocheck(t, SCHED_FIFO, &sp);
1273 return 0;
1274}
1275
1276#ifdef CONFIG_HOTPLUG_CPU
1277
1278static void rcu_stop_boost_kthread(struct rcu_node *rnp)
1279{
1280 unsigned long flags;
1281 struct task_struct *t;
1282
1283 raw_spin_lock_irqsave(&rnp->lock, flags);
1284 t = rnp->boost_kthread_task;
1285 rnp->boost_kthread_task = NULL;
1286 raw_spin_unlock_irqrestore(&rnp->lock, flags);
1287 if (t != NULL)
1288 kthread_stop(t);
1289}
1290
1291#endif /* #ifdef CONFIG_HOTPLUG_CPU */
1292
1293#else /* #ifdef CONFIG_RCU_BOOST */
1294
1295static void rcu_initiate_boost(struct rcu_node *rnp)
1296{
1297}
1298
1299static void rcu_boost_kthread_setaffinity(struct rcu_node *rnp,
1300 cpumask_var_t cm)
1301{
1302}
1303
1304static void rcu_preempt_boost_start_gp(struct rcu_node *rnp)
1305{
1306}
1307
1308static void __init rcu_init_boost_waitqueue(struct rcu_node *rnp)
1309{
1310}
1311
1312static int __cpuinit rcu_spawn_one_boost_kthread(struct rcu_state *rsp,
1313 struct rcu_node *rnp,
1314 int rnp_index)
1315{
1316 return 0;
1317}
1318
1319#ifdef CONFIG_HOTPLUG_CPU
1320
1321static void rcu_stop_boost_kthread(struct rcu_node *rnp)
1322{
1323}
1324
1325#endif /* #ifdef CONFIG_HOTPLUG_CPU */
1326
1327#endif /* #else #ifdef CONFIG_RCU_BOOST */
1328
1037#ifndef CONFIG_SMP 1329#ifndef CONFIG_SMP
1038 1330
1039void synchronize_sched_expedited(void) 1331void synchronize_sched_expedited(void)
@@ -1206,8 +1498,8 @@ static DEFINE_PER_CPU(unsigned long, rcu_dyntick_holdoff);
1206 * 1498 *
1207 * Because it is not legal to invoke rcu_process_callbacks() with irqs 1499 * Because it is not legal to invoke rcu_process_callbacks() with irqs
1208 * disabled, we do one pass of force_quiescent_state(), then do a 1500 * disabled, we do one pass of force_quiescent_state(), then do a
1209 * invoke_rcu_kthread() to cause rcu_process_callbacks() to be invoked later. 1501 * invoke_rcu_cpu_kthread() to cause rcu_process_callbacks() to be invoked
1210 * The per-cpu rcu_dyntick_drain variable controls the sequencing. 1502 * later. The per-cpu rcu_dyntick_drain variable controls the sequencing.
1211 */ 1503 */
1212int rcu_needs_cpu(int cpu) 1504int rcu_needs_cpu(int cpu)
1213{ 1505{
@@ -1257,7 +1549,7 @@ int rcu_needs_cpu(int cpu)
1257 1549
1258 /* If RCU callbacks are still pending, RCU still needs this CPU. */ 1550 /* If RCU callbacks are still pending, RCU still needs this CPU. */
1259 if (c) 1551 if (c)
1260 invoke_rcu_kthread(); 1552 invoke_rcu_cpu_kthread();
1261 return c; 1553 return c;
1262} 1554}
1263 1555