aboutsummaryrefslogtreecommitdiffstats
path: root/kernel/rcutree.c
diff options
context:
space:
mode:
authorPaul E. McKenney <paul.mckenney@linaro.org>2011-09-30 15:10:22 -0400
committerPaul E. McKenney <paulmck@linux.vnet.ibm.com>2011-12-11 13:31:24 -0500
commit9b2e4f1880b789be1f24f9684f7a54b90310b5c0 (patch)
tree1fa922e0616e298837a7079cb49118188a58186c /kernel/rcutree.c
parentb804cb9e91c6c304959c69d4f9daeef4ffdba71c (diff)
rcu: Track idleness independent of idle tasks
Earlier versions of RCU used the scheduling-clock tick to detect idleness by checking for the idle task, but handled idleness differently for CONFIG_NO_HZ=y. But there are now a number of uses of RCU read-side critical sections in the idle task, for example, for tracing. A more fine-grained detection of idleness is therefore required. This commit presses the old dyntick-idle code into full-time service, so that rcu_idle_enter(), previously known as rcu_enter_nohz(), is always invoked at the beginning of an idle loop iteration. Similarly, rcu_idle_exit(), previously known as rcu_exit_nohz(), is always invoked at the end of an idle-loop iteration. This allows the idle task to use RCU everywhere except between consecutive rcu_idle_enter() and rcu_idle_exit() calls, in turn allowing architecture maintainers to specify exactly where in the idle loop that RCU may be used. Because some of the userspace upcall uses can result in what looks to RCU like half of an interrupt, it is not possible to expect that the irq_enter() and irq_exit() hooks will give exact counts. This patch therefore expands the ->dynticks_nesting counter to 64 bits and uses two separate bitfields to count process/idle transitions and interrupt entry/exit transitions. It is presumed that userspace upcalls do not happen in the idle loop or from usermode execution (though usermode might do a system call that results in an upcall). The counter is hard-reset on each process/idle transition, which avoids the interrupt entry/exit error from accumulating. Overflow is avoided by the 64-bitness of the ->dyntick_nesting counter. This commit also adds warnings if a non-idle task asks RCU to enter idle state (and these checks will need some adjustment before applying Frederic's OS-jitter patches (http://lkml.org/lkml/2011/10/7/246). In addition, validation of ->dynticks and ->dynticks_nesting is added. Signed-off-by: Paul E. McKenney <paul.mckenney@linaro.org> Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com> Reviewed-by: Josh Triplett <josh@joshtriplett.org>
Diffstat (limited to 'kernel/rcutree.c')
-rw-r--r--kernel/rcutree.c229
1 files changed, 162 insertions, 67 deletions
diff --git a/kernel/rcutree.c b/kernel/rcutree.c
index 5d0b55a3a8c0..1c40326724f6 100644
--- a/kernel/rcutree.c
+++ b/kernel/rcutree.c
@@ -195,12 +195,10 @@ void rcu_note_context_switch(int cpu)
195} 195}
196EXPORT_SYMBOL_GPL(rcu_note_context_switch); 196EXPORT_SYMBOL_GPL(rcu_note_context_switch);
197 197
198#ifdef CONFIG_NO_HZ
199DEFINE_PER_CPU(struct rcu_dynticks, rcu_dynticks) = { 198DEFINE_PER_CPU(struct rcu_dynticks, rcu_dynticks) = {
200 .dynticks_nesting = 1, 199 .dynticks_nesting = LLONG_MAX / 2,
201 .dynticks = ATOMIC_INIT(1), 200 .dynticks = ATOMIC_INIT(1),
202}; 201};
203#endif /* #ifdef CONFIG_NO_HZ */
204 202
205static int blimit = 10; /* Maximum callbacks per rcu_do_batch. */ 203static int blimit = 10; /* Maximum callbacks per rcu_do_batch. */
206static int qhimark = 10000; /* If this many pending, ignore blimit. */ 204static int qhimark = 10000; /* If this many pending, ignore blimit. */
@@ -328,11 +326,11 @@ static int rcu_implicit_offline_qs(struct rcu_data *rdp)
328 return 1; 326 return 1;
329 } 327 }
330 328
331 /* If preemptible RCU, no point in sending reschedule IPI. */ 329 /*
332 if (rdp->preemptible) 330 * The CPU is online, so send it a reschedule IPI. This forces
333 return 0; 331 * it through the scheduler, and (inefficiently) also handles cases
334 332 * where idle loops fail to inform RCU about the CPU being idle.
335 /* The CPU is online, so send it a reschedule IPI. */ 333 */
336 if (rdp->cpu != smp_processor_id()) 334 if (rdp->cpu != smp_processor_id())
337 smp_send_reschedule(rdp->cpu); 335 smp_send_reschedule(rdp->cpu);
338 else 336 else
@@ -343,51 +341,97 @@ static int rcu_implicit_offline_qs(struct rcu_data *rdp)
343 341
344#endif /* #ifdef CONFIG_SMP */ 342#endif /* #ifdef CONFIG_SMP */
345 343
346#ifdef CONFIG_NO_HZ 344/*
345 * rcu_idle_enter_common - inform RCU that current CPU is moving towards idle
346 *
347 * If the new value of the ->dynticks_nesting counter now is zero,
348 * we really have entered idle, and must do the appropriate accounting.
349 * The caller must have disabled interrupts.
350 */
351static void rcu_idle_enter_common(struct rcu_dynticks *rdtp)
352{
353 if (rdtp->dynticks_nesting) {
354 trace_rcu_dyntick("--=", rdtp->dynticks_nesting);
355 return;
356 }
357 trace_rcu_dyntick("Start", rdtp->dynticks_nesting);
358 if (!idle_cpu(smp_processor_id())) {
359 WARN_ON_ONCE(1); /* must be idle task! */
360 trace_rcu_dyntick("Error on entry: not idle task",
361 rdtp->dynticks_nesting);
362 ftrace_dump(DUMP_ALL);
363 }
364 /* CPUs seeing atomic_inc() must see prior RCU read-side crit sects */
365 smp_mb__before_atomic_inc(); /* See above. */
366 atomic_inc(&rdtp->dynticks);
367 smp_mb__after_atomic_inc(); /* Force ordering with next sojourn. */
368 WARN_ON_ONCE(atomic_read(&rdtp->dynticks) & 0x1);
369}
347 370
348/** 371/**
349 * rcu_enter_nohz - inform RCU that current CPU is entering nohz 372 * rcu_idle_enter - inform RCU that current CPU is entering idle
350 * 373 *
351 * Enter nohz mode, in other words, -leave- the mode in which RCU 374 * Enter idle mode, in other words, -leave- the mode in which RCU
352 * read-side critical sections can occur. (Though RCU read-side 375 * read-side critical sections can occur. (Though RCU read-side
353 * critical sections can occur in irq handlers in nohz mode, a possibility 376 * critical sections can occur in irq handlers in idle, a possibility
354 * handled by rcu_irq_enter() and rcu_irq_exit()). 377 * handled by irq_enter() and irq_exit().)
378 *
379 * We crowbar the ->dynticks_nesting field to zero to allow for
380 * the possibility of usermode upcalls having messed up our count
381 * of interrupt nesting level during the prior busy period.
355 */ 382 */
356void rcu_enter_nohz(void) 383void rcu_idle_enter(void)
357{ 384{
358 unsigned long flags; 385 unsigned long flags;
359 struct rcu_dynticks *rdtp; 386 struct rcu_dynticks *rdtp;
360 387
361 local_irq_save(flags); 388 local_irq_save(flags);
362 rdtp = &__get_cpu_var(rcu_dynticks); 389 rdtp = &__get_cpu_var(rcu_dynticks);
363 if (--rdtp->dynticks_nesting) { 390 rdtp->dynticks_nesting = 0;
364 local_irq_restore(flags); 391 rcu_idle_enter_common(rdtp);
365 return;
366 }
367 trace_rcu_dyntick("Start");
368 /* CPUs seeing atomic_inc() must see prior RCU read-side crit sects */
369 smp_mb__before_atomic_inc(); /* See above. */
370 atomic_inc(&rdtp->dynticks);
371 smp_mb__after_atomic_inc(); /* Force ordering with next sojourn. */
372 WARN_ON_ONCE(atomic_read(&rdtp->dynticks) & 0x1);
373 local_irq_restore(flags); 392 local_irq_restore(flags);
374} 393}
375 394
376/* 395/**
377 * rcu_exit_nohz - inform RCU that current CPU is leaving nohz 396 * rcu_irq_exit - inform RCU that current CPU is exiting irq towards idle
397 *
398 * Exit from an interrupt handler, which might possibly result in entering
399 * idle mode, in other words, leaving the mode in which read-side critical
400 * sections can occur.
378 * 401 *
379 * Exit nohz mode, in other words, -enter- the mode in which RCU 402 * This code assumes that the idle loop never does anything that might
380 * read-side critical sections normally occur. 403 * result in unbalanced calls to irq_enter() and irq_exit(). If your
404 * architecture violates this assumption, RCU will give you what you
405 * deserve, good and hard. But very infrequently and irreproducibly.
406 *
407 * Use things like work queues to work around this limitation.
408 *
409 * You have been warned.
381 */ 410 */
382void rcu_exit_nohz(void) 411void rcu_irq_exit(void)
383{ 412{
384 unsigned long flags; 413 unsigned long flags;
385 struct rcu_dynticks *rdtp; 414 struct rcu_dynticks *rdtp;
386 415
387 local_irq_save(flags); 416 local_irq_save(flags);
388 rdtp = &__get_cpu_var(rcu_dynticks); 417 rdtp = &__get_cpu_var(rcu_dynticks);
389 if (rdtp->dynticks_nesting++) { 418 rdtp->dynticks_nesting--;
390 local_irq_restore(flags); 419 WARN_ON_ONCE(rdtp->dynticks_nesting < 0);
420 rcu_idle_enter_common(rdtp);
421 local_irq_restore(flags);
422}
423
424/*
425 * rcu_idle_exit_common - inform RCU that current CPU is moving away from idle
426 *
427 * If the new value of the ->dynticks_nesting counter was previously zero,
428 * we really have exited idle, and must do the appropriate accounting.
429 * The caller must have disabled interrupts.
430 */
431static void rcu_idle_exit_common(struct rcu_dynticks *rdtp, long long oldval)
432{
433 if (oldval) {
434 trace_rcu_dyntick("++=", rdtp->dynticks_nesting);
391 return; 435 return;
392 } 436 }
393 smp_mb__before_atomic_inc(); /* Force ordering w/previous sojourn. */ 437 smp_mb__before_atomic_inc(); /* Force ordering w/previous sojourn. */
@@ -395,7 +439,71 @@ void rcu_exit_nohz(void)
395 /* CPUs seeing atomic_inc() must see later RCU read-side crit sects */ 439 /* CPUs seeing atomic_inc() must see later RCU read-side crit sects */
396 smp_mb__after_atomic_inc(); /* See above. */ 440 smp_mb__after_atomic_inc(); /* See above. */
397 WARN_ON_ONCE(!(atomic_read(&rdtp->dynticks) & 0x1)); 441 WARN_ON_ONCE(!(atomic_read(&rdtp->dynticks) & 0x1));
398 trace_rcu_dyntick("End"); 442 trace_rcu_dyntick("End", oldval);
443 if (!idle_cpu(smp_processor_id())) {
444 WARN_ON_ONCE(1); /* must be idle task! */
445 trace_rcu_dyntick("Error on exit: not idle task", oldval);
446 ftrace_dump(DUMP_ALL);
447 }
448}
449
450/**
451 * rcu_idle_exit - inform RCU that current CPU is leaving idle
452 *
453 * Exit idle mode, in other words, -enter- the mode in which RCU
454 * read-side critical sections can occur.
455 *
456 * We crowbar the ->dynticks_nesting field to LLONG_MAX/2 to allow for
457 * the possibility of usermode upcalls messing up our count
458 * of interrupt nesting level during the busy period that is just
459 * now starting.
460 */
461void rcu_idle_exit(void)
462{
463 unsigned long flags;
464 struct rcu_dynticks *rdtp;
465 long long oldval;
466
467 local_irq_save(flags);
468 rdtp = &__get_cpu_var(rcu_dynticks);
469 oldval = rdtp->dynticks_nesting;
470 WARN_ON_ONCE(oldval != 0);
471 rdtp->dynticks_nesting = LLONG_MAX / 2;
472 rcu_idle_exit_common(rdtp, oldval);
473 local_irq_restore(flags);
474}
475
476/**
477 * rcu_irq_enter - inform RCU that current CPU is entering irq away from idle
478 *
479 * Enter an interrupt handler, which might possibly result in exiting
480 * idle mode, in other words, entering the mode in which read-side critical
481 * sections can occur.
482 *
483 * Note that the Linux kernel is fully capable of entering an interrupt
484 * handler that it never exits, for example when doing upcalls to
485 * user mode! This code assumes that the idle loop never does upcalls to
486 * user mode. If your architecture does do upcalls from the idle loop (or
487 * does anything else that results in unbalanced calls to the irq_enter()
488 * and irq_exit() functions), RCU will give you what you deserve, good
489 * and hard. But very infrequently and irreproducibly.
490 *
491 * Use things like work queues to work around this limitation.
492 *
493 * You have been warned.
494 */
495void rcu_irq_enter(void)
496{
497 unsigned long flags;
498 struct rcu_dynticks *rdtp;
499 long long oldval;
500
501 local_irq_save(flags);
502 rdtp = &__get_cpu_var(rcu_dynticks);
503 oldval = rdtp->dynticks_nesting;
504 rdtp->dynticks_nesting++;
505 WARN_ON_ONCE(rdtp->dynticks_nesting == 0);
506 rcu_idle_exit_common(rdtp, oldval);
399 local_irq_restore(flags); 507 local_irq_restore(flags);
400} 508}
401 509
@@ -442,27 +550,32 @@ void rcu_nmi_exit(void)
442 WARN_ON_ONCE(atomic_read(&rdtp->dynticks) & 0x1); 550 WARN_ON_ONCE(atomic_read(&rdtp->dynticks) & 0x1);
443} 551}
444 552
553#ifdef CONFIG_PROVE_RCU
554
445/** 555/**
446 * rcu_irq_enter - inform RCU of entry to hard irq context 556 * rcu_is_cpu_idle - see if RCU thinks that the current CPU is idle
447 * 557 *
448 * If the CPU was idle with dynamic ticks active, this updates the 558 * If the current CPU is in its idle loop and is neither in an interrupt
449 * rdtp->dynticks to let the RCU handling know that the CPU is active. 559 * or NMI handler, return true. The caller must have at least disabled
560 * preemption.
450 */ 561 */
451void rcu_irq_enter(void) 562int rcu_is_cpu_idle(void)
452{ 563{
453 rcu_exit_nohz(); 564 return (atomic_read(&__get_cpu_var(rcu_dynticks).dynticks) & 0x1) == 0;
454} 565}
455 566
567#endif /* #ifdef CONFIG_PROVE_RCU */
568
456/** 569/**
457 * rcu_irq_exit - inform RCU of exit from hard irq context 570 * rcu_is_cpu_rrupt_from_idle - see if idle or immediately interrupted from idle
458 * 571 *
459 * If the CPU was idle with dynamic ticks active, update the rdp->dynticks 572 * If the current CPU is idle or running at a first-level (not nested)
460 * to put let the RCU handling be aware that the CPU is going back to idle 573 * interrupt from idle, return true. The caller must have at least
461 * with no ticks. 574 * disabled preemption.
462 */ 575 */
463void rcu_irq_exit(void) 576int rcu_is_cpu_rrupt_from_idle(void)
464{ 577{
465 rcu_enter_nohz(); 578 return __get_cpu_var(rcu_dynticks).dynticks_nesting <= 1;
466} 579}
467 580
468#ifdef CONFIG_SMP 581#ifdef CONFIG_SMP
@@ -512,24 +625,6 @@ static int rcu_implicit_dynticks_qs(struct rcu_data *rdp)
512 625
513#endif /* #ifdef CONFIG_SMP */ 626#endif /* #ifdef CONFIG_SMP */
514 627
515#else /* #ifdef CONFIG_NO_HZ */
516
517#ifdef CONFIG_SMP
518
519static int dyntick_save_progress_counter(struct rcu_data *rdp)
520{
521 return 0;
522}
523
524static int rcu_implicit_dynticks_qs(struct rcu_data *rdp)
525{
526 return rcu_implicit_offline_qs(rdp);
527}
528
529#endif /* #ifdef CONFIG_SMP */
530
531#endif /* #else #ifdef CONFIG_NO_HZ */
532
533int rcu_cpu_stall_suppress __read_mostly; 628int rcu_cpu_stall_suppress __read_mostly;
534 629
535static void record_gp_stall_check_time(struct rcu_state *rsp) 630static void record_gp_stall_check_time(struct rcu_state *rsp)
@@ -1334,16 +1429,14 @@ static void rcu_do_batch(struct rcu_state *rsp, struct rcu_data *rdp)
1334 * (user mode or idle loop for rcu, non-softirq execution for rcu_bh). 1429 * (user mode or idle loop for rcu, non-softirq execution for rcu_bh).
1335 * Also schedule RCU core processing. 1430 * Also schedule RCU core processing.
1336 * 1431 *
1337 * This function must be called with hardirqs disabled. It is normally 1432 * This function must be called from hardirq context. It is normally
1338 * invoked from the scheduling-clock interrupt. If rcu_pending returns 1433 * invoked from the scheduling-clock interrupt. If rcu_pending returns
1339 * false, there is no point in invoking rcu_check_callbacks(). 1434 * false, there is no point in invoking rcu_check_callbacks().
1340 */ 1435 */
1341void rcu_check_callbacks(int cpu, int user) 1436void rcu_check_callbacks(int cpu, int user)
1342{ 1437{
1343 trace_rcu_utilization("Start scheduler-tick"); 1438 trace_rcu_utilization("Start scheduler-tick");
1344 if (user || 1439 if (user || rcu_is_cpu_rrupt_from_idle()) {
1345 (idle_cpu(cpu) && rcu_scheduler_active &&
1346 !in_softirq() && hardirq_count() <= (1 << HARDIRQ_SHIFT))) {
1347 1440
1348 /* 1441 /*
1349 * Get here if this CPU took its interrupt from user 1442 * Get here if this CPU took its interrupt from user
@@ -1913,9 +2006,9 @@ rcu_boot_init_percpu_data(int cpu, struct rcu_state *rsp)
1913 for (i = 0; i < RCU_NEXT_SIZE; i++) 2006 for (i = 0; i < RCU_NEXT_SIZE; i++)
1914 rdp->nxttail[i] = &rdp->nxtlist; 2007 rdp->nxttail[i] = &rdp->nxtlist;
1915 rdp->qlen = 0; 2008 rdp->qlen = 0;
1916#ifdef CONFIG_NO_HZ
1917 rdp->dynticks = &per_cpu(rcu_dynticks, cpu); 2009 rdp->dynticks = &per_cpu(rcu_dynticks, cpu);
1918#endif /* #ifdef CONFIG_NO_HZ */ 2010 WARN_ON_ONCE(rdp->dynticks->dynticks_nesting != LLONG_MAX / 2);
2011 WARN_ON_ONCE(atomic_read(&rdp->dynticks->dynticks) != 1);
1919 rdp->cpu = cpu; 2012 rdp->cpu = cpu;
1920 rdp->rsp = rsp; 2013 rdp->rsp = rsp;
1921 raw_spin_unlock_irqrestore(&rnp->lock, flags); 2014 raw_spin_unlock_irqrestore(&rnp->lock, flags);
@@ -1942,6 +2035,8 @@ rcu_init_percpu_data(int cpu, struct rcu_state *rsp, int preemptible)
1942 rdp->qlen_last_fqs_check = 0; 2035 rdp->qlen_last_fqs_check = 0;
1943 rdp->n_force_qs_snap = rsp->n_force_qs; 2036 rdp->n_force_qs_snap = rsp->n_force_qs;
1944 rdp->blimit = blimit; 2037 rdp->blimit = blimit;
2038 WARN_ON_ONCE(rdp->dynticks->dynticks_nesting != LLONG_MAX / 2);
2039 WARN_ON_ONCE((atomic_read(&rdp->dynticks->dynticks) & 0x1) != 1);
1945 raw_spin_unlock(&rnp->lock); /* irqs remain disabled. */ 2040 raw_spin_unlock(&rnp->lock); /* irqs remain disabled. */
1946 2041
1947 /* 2042 /*