diff options
author | Shaohua Li <shaohua.li@intel.com> | 2011-06-14 01:26:25 -0400 |
---|---|---|
committer | Paul E. McKenney <paulmck@linux.vnet.ibm.com> | 2011-06-14 18:25:39 -0400 |
commit | 09223371deac67d08ca0b70bd18787920284c967 (patch) | |
tree | 0ad0354a93ac209c8cfd2b79d03d0462aa3c5c58 | |
parent | 9a432736904d386cda28b987b38ba14dae960ecc (diff) |
rcu: Use softirq to address performance regression
Commit a26ac2455ffcf3(rcu: move TREE_RCU from softirq to kthread)
introduced performance regression. In an AIM7 test, this commit degraded
performance by about 40%.
The commit runs rcu callbacks in a kthread instead of softirq. We observed
high rate of context switch which is caused by this. Out test system has
64 CPUs and HZ is 1000, so we saw more than 64k context switch per second
which is caused by RCU's per-CPU kthread. A trace showed that most of
the time the RCU per-CPU kthread doesn't actually handle any callbacks,
but instead just does a very small amount of work handling grace periods.
This means that RCU's per-CPU kthreads are making the scheduler do quite
a bit of work in order to allow a very small amount of RCU-related
processing to be done.
Alex Shi's analysis determined that this slowdown is due to lock
contention within the scheduler. Unfortunately, as Peter Zijlstra points
out, the scheduler's real-time semantics require global action, which
means that this contention is inherent in real-time scheduling. (Yes,
perhaps someone will come up with a workaround -- otherwise, -rt is not
going to do well on large SMP systems -- but this patch will work around
this issue in the meantime. And "the meantime" might well be forever.)
This patch therefore re-introduces softirq processing to RCU, but only
for core RCU work. RCU callbacks are still executed in kthread context,
so that only a small amount of RCU work runs in softirq context in the
common case. This should minimize ksoftirqd execution, allowing us to
skip boosting of ksoftirqd for CONFIG_RCU_BOOST=y kernels.
Signed-off-by: Shaohua Li <shaohua.li@intel.com>
Tested-by: "Alex,Shi" <alex.shi@intel.com>
Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
-rw-r--r-- | Documentation/filesystems/proc.txt | 1 | ||||
-rw-r--r-- | include/linux/interrupt.h | 1 | ||||
-rw-r--r-- | include/trace/events/irq.h | 3 | ||||
-rw-r--r-- | kernel/rcutree.c | 23 | ||||
-rw-r--r-- | kernel/rcutree.h | 1 | ||||
-rw-r--r-- | kernel/rcutree_plugin.h | 9 | ||||
-rw-r--r-- | kernel/softirq.c | 2 | ||||
-rw-r--r-- | tools/perf/util/trace-event-parse.c | 1 |
8 files changed, 35 insertions, 6 deletions
diff --git a/Documentation/filesystems/proc.txt b/Documentation/filesystems/proc.txt index f48178024067..db3b1aba32a3 100644 --- a/Documentation/filesystems/proc.txt +++ b/Documentation/filesystems/proc.txt | |||
@@ -843,6 +843,7 @@ Provides counts of softirq handlers serviced since boot time, for each cpu. | |||
843 | TASKLET: 0 0 0 290 | 843 | TASKLET: 0 0 0 290 |
844 | SCHED: 27035 26983 26971 26746 | 844 | SCHED: 27035 26983 26971 26746 |
845 | HRTIMER: 0 0 0 0 | 845 | HRTIMER: 0 0 0 0 |
846 | RCU: 1678 1769 2178 2250 | ||
846 | 847 | ||
847 | 848 | ||
848 | 1.3 IDE devices in /proc/ide | 849 | 1.3 IDE devices in /proc/ide |
diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h index 6c12989839d9..f6efed0039ed 100644 --- a/include/linux/interrupt.h +++ b/include/linux/interrupt.h | |||
@@ -414,6 +414,7 @@ enum | |||
414 | TASKLET_SOFTIRQ, | 414 | TASKLET_SOFTIRQ, |
415 | SCHED_SOFTIRQ, | 415 | SCHED_SOFTIRQ, |
416 | HRTIMER_SOFTIRQ, | 416 | HRTIMER_SOFTIRQ, |
417 | RCU_SOFTIRQ, /* Preferable RCU should always be the last softirq */ | ||
417 | 418 | ||
418 | NR_SOFTIRQS | 419 | NR_SOFTIRQS |
419 | }; | 420 | }; |
diff --git a/include/trace/events/irq.h b/include/trace/events/irq.h index ae045ca7d356..1c09820df585 100644 --- a/include/trace/events/irq.h +++ b/include/trace/events/irq.h | |||
@@ -20,7 +20,8 @@ struct softirq_action; | |||
20 | softirq_name(BLOCK_IOPOLL), \ | 20 | softirq_name(BLOCK_IOPOLL), \ |
21 | softirq_name(TASKLET), \ | 21 | softirq_name(TASKLET), \ |
22 | softirq_name(SCHED), \ | 22 | softirq_name(SCHED), \ |
23 | softirq_name(HRTIMER)) | 23 | softirq_name(HRTIMER), \ |
24 | softirq_name(RCU)) | ||
24 | 25 | ||
25 | /** | 26 | /** |
26 | * irq_handler_entry - called immediately before the irq action handler | 27 | * irq_handler_entry - called immediately before the irq action handler |
diff --git a/kernel/rcutree.c b/kernel/rcutree.c index 0a8ec5b2e208..ae5c9ea68662 100644 --- a/kernel/rcutree.c +++ b/kernel/rcutree.c | |||
@@ -100,6 +100,7 @@ static char rcu_kthreads_spawnable; | |||
100 | 100 | ||
101 | static void rcu_node_kthread_setaffinity(struct rcu_node *rnp, int outgoingcpu); | 101 | static void rcu_node_kthread_setaffinity(struct rcu_node *rnp, int outgoingcpu); |
102 | static void invoke_rcu_cpu_kthread(void); | 102 | static void invoke_rcu_cpu_kthread(void); |
103 | static void __invoke_rcu_cpu_kthread(void); | ||
103 | 104 | ||
104 | #define RCU_KTHREAD_PRIO 1 /* RT priority for per-CPU kthreads. */ | 105 | #define RCU_KTHREAD_PRIO 1 /* RT priority for per-CPU kthreads. */ |
105 | 106 | ||
@@ -1442,13 +1443,21 @@ __rcu_process_callbacks(struct rcu_state *rsp, struct rcu_data *rdp) | |||
1442 | } | 1443 | } |
1443 | 1444 | ||
1444 | /* If there are callbacks ready, invoke them. */ | 1445 | /* If there are callbacks ready, invoke them. */ |
1445 | rcu_do_batch(rsp, rdp); | 1446 | if (cpu_has_callbacks_ready_to_invoke(rdp)) |
1447 | __invoke_rcu_cpu_kthread(); | ||
1448 | } | ||
1449 | |||
1450 | static void rcu_kthread_do_work(void) | ||
1451 | { | ||
1452 | rcu_do_batch(&rcu_sched_state, &__get_cpu_var(rcu_sched_data)); | ||
1453 | rcu_do_batch(&rcu_bh_state, &__get_cpu_var(rcu_bh_data)); | ||
1454 | rcu_preempt_do_callbacks(); | ||
1446 | } | 1455 | } |
1447 | 1456 | ||
1448 | /* | 1457 | /* |
1449 | * Do softirq processing for the current CPU. | 1458 | * Do softirq processing for the current CPU. |
1450 | */ | 1459 | */ |
1451 | static void rcu_process_callbacks(void) | 1460 | static void rcu_process_callbacks(struct softirq_action *unused) |
1452 | { | 1461 | { |
1453 | __rcu_process_callbacks(&rcu_sched_state, | 1462 | __rcu_process_callbacks(&rcu_sched_state, |
1454 | &__get_cpu_var(rcu_sched_data)); | 1463 | &__get_cpu_var(rcu_sched_data)); |
@@ -1465,7 +1474,7 @@ static void rcu_process_callbacks(void) | |||
1465 | * the current CPU with interrupts disabled, the rcu_cpu_kthread_task | 1474 | * the current CPU with interrupts disabled, the rcu_cpu_kthread_task |
1466 | * cannot disappear out from under us. | 1475 | * cannot disappear out from under us. |
1467 | */ | 1476 | */ |
1468 | static void invoke_rcu_cpu_kthread(void) | 1477 | static void __invoke_rcu_cpu_kthread(void) |
1469 | { | 1478 | { |
1470 | unsigned long flags; | 1479 | unsigned long flags; |
1471 | 1480 | ||
@@ -1479,6 +1488,11 @@ static void invoke_rcu_cpu_kthread(void) | |||
1479 | local_irq_restore(flags); | 1488 | local_irq_restore(flags); |
1480 | } | 1489 | } |
1481 | 1490 | ||
1491 | static void invoke_rcu_cpu_kthread(void) | ||
1492 | { | ||
1493 | raise_softirq(RCU_SOFTIRQ); | ||
1494 | } | ||
1495 | |||
1482 | /* | 1496 | /* |
1483 | * Wake up the specified per-rcu_node-structure kthread. | 1497 | * Wake up the specified per-rcu_node-structure kthread. |
1484 | * Because the per-rcu_node kthreads are immortal, we don't need | 1498 | * Because the per-rcu_node kthreads are immortal, we don't need |
@@ -1613,7 +1627,7 @@ static int rcu_cpu_kthread(void *arg) | |||
1613 | *workp = 0; | 1627 | *workp = 0; |
1614 | local_irq_restore(flags); | 1628 | local_irq_restore(flags); |
1615 | if (work) | 1629 | if (work) |
1616 | rcu_process_callbacks(); | 1630 | rcu_kthread_do_work(); |
1617 | local_bh_enable(); | 1631 | local_bh_enable(); |
1618 | if (*workp != 0) | 1632 | if (*workp != 0) |
1619 | spincnt++; | 1633 | spincnt++; |
@@ -2387,6 +2401,7 @@ void __init rcu_init(void) | |||
2387 | rcu_init_one(&rcu_sched_state, &rcu_sched_data); | 2401 | rcu_init_one(&rcu_sched_state, &rcu_sched_data); |
2388 | rcu_init_one(&rcu_bh_state, &rcu_bh_data); | 2402 | rcu_init_one(&rcu_bh_state, &rcu_bh_data); |
2389 | __rcu_init_preempt(); | 2403 | __rcu_init_preempt(); |
2404 | open_softirq(RCU_SOFTIRQ, rcu_process_callbacks); | ||
2390 | 2405 | ||
2391 | /* | 2406 | /* |
2392 | * We don't need protection against CPU-hotplug here because | 2407 | * We don't need protection against CPU-hotplug here because |
diff --git a/kernel/rcutree.h b/kernel/rcutree.h index 7b9a08b4aaea..0fed6b934d2a 100644 --- a/kernel/rcutree.h +++ b/kernel/rcutree.h | |||
@@ -439,6 +439,7 @@ static void rcu_preempt_offline_cpu(int cpu); | |||
439 | #endif /* #ifdef CONFIG_HOTPLUG_CPU */ | 439 | #endif /* #ifdef CONFIG_HOTPLUG_CPU */ |
440 | static void rcu_preempt_check_callbacks(int cpu); | 440 | static void rcu_preempt_check_callbacks(int cpu); |
441 | static void rcu_preempt_process_callbacks(void); | 441 | static void rcu_preempt_process_callbacks(void); |
442 | static void rcu_preempt_do_callbacks(void); | ||
442 | void call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *rcu)); | 443 | void call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *rcu)); |
443 | #if defined(CONFIG_HOTPLUG_CPU) || defined(CONFIG_TREE_PREEMPT_RCU) | 444 | #if defined(CONFIG_HOTPLUG_CPU) || defined(CONFIG_TREE_PREEMPT_RCU) |
444 | static void rcu_report_exp_rnp(struct rcu_state *rsp, struct rcu_node *rnp); | 445 | static void rcu_report_exp_rnp(struct rcu_state *rsp, struct rcu_node *rnp); |
diff --git a/kernel/rcutree_plugin.h b/kernel/rcutree_plugin.h index ea2e2fb79e81..38d09c5f2b41 100644 --- a/kernel/rcutree_plugin.h +++ b/kernel/rcutree_plugin.h | |||
@@ -602,6 +602,11 @@ static void rcu_preempt_process_callbacks(void) | |||
602 | &__get_cpu_var(rcu_preempt_data)); | 602 | &__get_cpu_var(rcu_preempt_data)); |
603 | } | 603 | } |
604 | 604 | ||
605 | static void rcu_preempt_do_callbacks(void) | ||
606 | { | ||
607 | rcu_do_batch(&rcu_preempt_state, &__get_cpu_var(rcu_preempt_data)); | ||
608 | } | ||
609 | |||
605 | /* | 610 | /* |
606 | * Queue a preemptible-RCU callback for invocation after a grace period. | 611 | * Queue a preemptible-RCU callback for invocation after a grace period. |
607 | */ | 612 | */ |
@@ -997,6 +1002,10 @@ static void rcu_preempt_process_callbacks(void) | |||
997 | { | 1002 | { |
998 | } | 1003 | } |
999 | 1004 | ||
1005 | static void rcu_preempt_do_callbacks(void) | ||
1006 | { | ||
1007 | } | ||
1008 | |||
1000 | /* | 1009 | /* |
1001 | * Wait for an rcu-preempt grace period, but make it happen quickly. | 1010 | * Wait for an rcu-preempt grace period, but make it happen quickly. |
1002 | * But because preemptible RCU does not exist, map to rcu-sched. | 1011 | * But because preemptible RCU does not exist, map to rcu-sched. |
diff --git a/kernel/softirq.c b/kernel/softirq.c index 13960170cad4..40cf63ddd4b3 100644 --- a/kernel/softirq.c +++ b/kernel/softirq.c | |||
@@ -58,7 +58,7 @@ DEFINE_PER_CPU(struct task_struct *, ksoftirqd); | |||
58 | 58 | ||
59 | char *softirq_to_name[NR_SOFTIRQS] = { | 59 | char *softirq_to_name[NR_SOFTIRQS] = { |
60 | "HI", "TIMER", "NET_TX", "NET_RX", "BLOCK", "BLOCK_IOPOLL", | 60 | "HI", "TIMER", "NET_TX", "NET_RX", "BLOCK", "BLOCK_IOPOLL", |
61 | "TASKLET", "SCHED", "HRTIMER" | 61 | "TASKLET", "SCHED", "HRTIMER", "RCU" |
62 | }; | 62 | }; |
63 | 63 | ||
64 | /* | 64 | /* |
diff --git a/tools/perf/util/trace-event-parse.c b/tools/perf/util/trace-event-parse.c index 1e88485c16a0..0a7ed5b5e281 100644 --- a/tools/perf/util/trace-event-parse.c +++ b/tools/perf/util/trace-event-parse.c | |||
@@ -2187,6 +2187,7 @@ static const struct flag flags[] = { | |||
2187 | { "TASKLET_SOFTIRQ", 6 }, | 2187 | { "TASKLET_SOFTIRQ", 6 }, |
2188 | { "SCHED_SOFTIRQ", 7 }, | 2188 | { "SCHED_SOFTIRQ", 7 }, |
2189 | { "HRTIMER_SOFTIRQ", 8 }, | 2189 | { "HRTIMER_SOFTIRQ", 8 }, |
2190 | { "RCU_SOFTIRQ", 9 }, | ||
2190 | 2191 | ||
2191 | { "HRTIMER_NORESTART", 0 }, | 2192 | { "HRTIMER_NORESTART", 0 }, |
2192 | { "HRTIMER_RESTART", 1 }, | 2193 | { "HRTIMER_RESTART", 1 }, |