aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorShaohua Li <shaohua.li@intel.com>2011-06-14 01:26:25 -0400
committerPaul E. McKenney <paulmck@linux.vnet.ibm.com>2011-06-14 18:25:39 -0400
commit09223371deac67d08ca0b70bd18787920284c967 (patch)
tree0ad0354a93ac209c8cfd2b79d03d0462aa3c5c58
parent9a432736904d386cda28b987b38ba14dae960ecc (diff)
rcu: Use softirq to address performance regression
Commit a26ac2455ffcf3(rcu: move TREE_RCU from softirq to kthread) introduced performance regression. In an AIM7 test, this commit degraded performance by about 40%. The commit runs rcu callbacks in a kthread instead of softirq. We observed high rate of context switch which is caused by this. Out test system has 64 CPUs and HZ is 1000, so we saw more than 64k context switch per second which is caused by RCU's per-CPU kthread. A trace showed that most of the time the RCU per-CPU kthread doesn't actually handle any callbacks, but instead just does a very small amount of work handling grace periods. This means that RCU's per-CPU kthreads are making the scheduler do quite a bit of work in order to allow a very small amount of RCU-related processing to be done. Alex Shi's analysis determined that this slowdown is due to lock contention within the scheduler. Unfortunately, as Peter Zijlstra points out, the scheduler's real-time semantics require global action, which means that this contention is inherent in real-time scheduling. (Yes, perhaps someone will come up with a workaround -- otherwise, -rt is not going to do well on large SMP systems -- but this patch will work around this issue in the meantime. And "the meantime" might well be forever.) This patch therefore re-introduces softirq processing to RCU, but only for core RCU work. RCU callbacks are still executed in kthread context, so that only a small amount of RCU work runs in softirq context in the common case. This should minimize ksoftirqd execution, allowing us to skip boosting of ksoftirqd for CONFIG_RCU_BOOST=y kernels. Signed-off-by: Shaohua Li <shaohua.li@intel.com> Tested-by: "Alex,Shi" <alex.shi@intel.com> Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
-rw-r--r--Documentation/filesystems/proc.txt1
-rw-r--r--include/linux/interrupt.h1
-rw-r--r--include/trace/events/irq.h3
-rw-r--r--kernel/rcutree.c23
-rw-r--r--kernel/rcutree.h1
-rw-r--r--kernel/rcutree_plugin.h9
-rw-r--r--kernel/softirq.c2
-rw-r--r--tools/perf/util/trace-event-parse.c1
8 files changed, 35 insertions, 6 deletions
diff --git a/Documentation/filesystems/proc.txt b/Documentation/filesystems/proc.txt
index f48178024067..db3b1aba32a3 100644
--- a/Documentation/filesystems/proc.txt
+++ b/Documentation/filesystems/proc.txt
@@ -843,6 +843,7 @@ Provides counts of softirq handlers serviced since boot time, for each cpu.
843 TASKLET: 0 0 0 290 843 TASKLET: 0 0 0 290
844 SCHED: 27035 26983 26971 26746 844 SCHED: 27035 26983 26971 26746
845 HRTIMER: 0 0 0 0 845 HRTIMER: 0 0 0 0
846 RCU: 1678 1769 2178 2250
846 847
847 848
8481.3 IDE devices in /proc/ide 8491.3 IDE devices in /proc/ide
diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h
index 6c12989839d9..f6efed0039ed 100644
--- a/include/linux/interrupt.h
+++ b/include/linux/interrupt.h
@@ -414,6 +414,7 @@ enum
414 TASKLET_SOFTIRQ, 414 TASKLET_SOFTIRQ,
415 SCHED_SOFTIRQ, 415 SCHED_SOFTIRQ,
416 HRTIMER_SOFTIRQ, 416 HRTIMER_SOFTIRQ,
417 RCU_SOFTIRQ, /* Preferable RCU should always be the last softirq */
417 418
418 NR_SOFTIRQS 419 NR_SOFTIRQS
419}; 420};
diff --git a/include/trace/events/irq.h b/include/trace/events/irq.h
index ae045ca7d356..1c09820df585 100644
--- a/include/trace/events/irq.h
+++ b/include/trace/events/irq.h
@@ -20,7 +20,8 @@ struct softirq_action;
20 softirq_name(BLOCK_IOPOLL), \ 20 softirq_name(BLOCK_IOPOLL), \
21 softirq_name(TASKLET), \ 21 softirq_name(TASKLET), \
22 softirq_name(SCHED), \ 22 softirq_name(SCHED), \
23 softirq_name(HRTIMER)) 23 softirq_name(HRTIMER), \
24 softirq_name(RCU))
24 25
25/** 26/**
26 * irq_handler_entry - called immediately before the irq action handler 27 * irq_handler_entry - called immediately before the irq action handler
diff --git a/kernel/rcutree.c b/kernel/rcutree.c
index 0a8ec5b2e208..ae5c9ea68662 100644
--- a/kernel/rcutree.c
+++ b/kernel/rcutree.c
@@ -100,6 +100,7 @@ static char rcu_kthreads_spawnable;
100 100
101static void rcu_node_kthread_setaffinity(struct rcu_node *rnp, int outgoingcpu); 101static void rcu_node_kthread_setaffinity(struct rcu_node *rnp, int outgoingcpu);
102static void invoke_rcu_cpu_kthread(void); 102static void invoke_rcu_cpu_kthread(void);
103static void __invoke_rcu_cpu_kthread(void);
103 104
104#define RCU_KTHREAD_PRIO 1 /* RT priority for per-CPU kthreads. */ 105#define RCU_KTHREAD_PRIO 1 /* RT priority for per-CPU kthreads. */
105 106
@@ -1442,13 +1443,21 @@ __rcu_process_callbacks(struct rcu_state *rsp, struct rcu_data *rdp)
1442 } 1443 }
1443 1444
1444 /* If there are callbacks ready, invoke them. */ 1445 /* If there are callbacks ready, invoke them. */
1445 rcu_do_batch(rsp, rdp); 1446 if (cpu_has_callbacks_ready_to_invoke(rdp))
1447 __invoke_rcu_cpu_kthread();
1448}
1449
1450static void rcu_kthread_do_work(void)
1451{
1452 rcu_do_batch(&rcu_sched_state, &__get_cpu_var(rcu_sched_data));
1453 rcu_do_batch(&rcu_bh_state, &__get_cpu_var(rcu_bh_data));
1454 rcu_preempt_do_callbacks();
1446} 1455}
1447 1456
1448/* 1457/*
1449 * Do softirq processing for the current CPU. 1458 * Do softirq processing for the current CPU.
1450 */ 1459 */
1451static void rcu_process_callbacks(void) 1460static void rcu_process_callbacks(struct softirq_action *unused)
1452{ 1461{
1453 __rcu_process_callbacks(&rcu_sched_state, 1462 __rcu_process_callbacks(&rcu_sched_state,
1454 &__get_cpu_var(rcu_sched_data)); 1463 &__get_cpu_var(rcu_sched_data));
@@ -1465,7 +1474,7 @@ static void rcu_process_callbacks(void)
1465 * the current CPU with interrupts disabled, the rcu_cpu_kthread_task 1474 * the current CPU with interrupts disabled, the rcu_cpu_kthread_task
1466 * cannot disappear out from under us. 1475 * cannot disappear out from under us.
1467 */ 1476 */
1468static void invoke_rcu_cpu_kthread(void) 1477static void __invoke_rcu_cpu_kthread(void)
1469{ 1478{
1470 unsigned long flags; 1479 unsigned long flags;
1471 1480
@@ -1479,6 +1488,11 @@ static void invoke_rcu_cpu_kthread(void)
1479 local_irq_restore(flags); 1488 local_irq_restore(flags);
1480} 1489}
1481 1490
1491static void invoke_rcu_cpu_kthread(void)
1492{
1493 raise_softirq(RCU_SOFTIRQ);
1494}
1495
1482/* 1496/*
1483 * Wake up the specified per-rcu_node-structure kthread. 1497 * Wake up the specified per-rcu_node-structure kthread.
1484 * Because the per-rcu_node kthreads are immortal, we don't need 1498 * Because the per-rcu_node kthreads are immortal, we don't need
@@ -1613,7 +1627,7 @@ static int rcu_cpu_kthread(void *arg)
1613 *workp = 0; 1627 *workp = 0;
1614 local_irq_restore(flags); 1628 local_irq_restore(flags);
1615 if (work) 1629 if (work)
1616 rcu_process_callbacks(); 1630 rcu_kthread_do_work();
1617 local_bh_enable(); 1631 local_bh_enable();
1618 if (*workp != 0) 1632 if (*workp != 0)
1619 spincnt++; 1633 spincnt++;
@@ -2387,6 +2401,7 @@ void __init rcu_init(void)
2387 rcu_init_one(&rcu_sched_state, &rcu_sched_data); 2401 rcu_init_one(&rcu_sched_state, &rcu_sched_data);
2388 rcu_init_one(&rcu_bh_state, &rcu_bh_data); 2402 rcu_init_one(&rcu_bh_state, &rcu_bh_data);
2389 __rcu_init_preempt(); 2403 __rcu_init_preempt();
2404 open_softirq(RCU_SOFTIRQ, rcu_process_callbacks);
2390 2405
2391 /* 2406 /*
2392 * We don't need protection against CPU-hotplug here because 2407 * We don't need protection against CPU-hotplug here because
diff --git a/kernel/rcutree.h b/kernel/rcutree.h
index 7b9a08b4aaea..0fed6b934d2a 100644
--- a/kernel/rcutree.h
+++ b/kernel/rcutree.h
@@ -439,6 +439,7 @@ static void rcu_preempt_offline_cpu(int cpu);
439#endif /* #ifdef CONFIG_HOTPLUG_CPU */ 439#endif /* #ifdef CONFIG_HOTPLUG_CPU */
440static void rcu_preempt_check_callbacks(int cpu); 440static void rcu_preempt_check_callbacks(int cpu);
441static void rcu_preempt_process_callbacks(void); 441static void rcu_preempt_process_callbacks(void);
442static void rcu_preempt_do_callbacks(void);
442void call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *rcu)); 443void call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *rcu));
443#if defined(CONFIG_HOTPLUG_CPU) || defined(CONFIG_TREE_PREEMPT_RCU) 444#if defined(CONFIG_HOTPLUG_CPU) || defined(CONFIG_TREE_PREEMPT_RCU)
444static void rcu_report_exp_rnp(struct rcu_state *rsp, struct rcu_node *rnp); 445static void rcu_report_exp_rnp(struct rcu_state *rsp, struct rcu_node *rnp);
diff --git a/kernel/rcutree_plugin.h b/kernel/rcutree_plugin.h
index ea2e2fb79e81..38d09c5f2b41 100644
--- a/kernel/rcutree_plugin.h
+++ b/kernel/rcutree_plugin.h
@@ -602,6 +602,11 @@ static void rcu_preempt_process_callbacks(void)
602 &__get_cpu_var(rcu_preempt_data)); 602 &__get_cpu_var(rcu_preempt_data));
603} 603}
604 604
605static void rcu_preempt_do_callbacks(void)
606{
607 rcu_do_batch(&rcu_preempt_state, &__get_cpu_var(rcu_preempt_data));
608}
609
605/* 610/*
606 * Queue a preemptible-RCU callback for invocation after a grace period. 611 * Queue a preemptible-RCU callback for invocation after a grace period.
607 */ 612 */
@@ -997,6 +1002,10 @@ static void rcu_preempt_process_callbacks(void)
997{ 1002{
998} 1003}
999 1004
1005static void rcu_preempt_do_callbacks(void)
1006{
1007}
1008
1000/* 1009/*
1001 * Wait for an rcu-preempt grace period, but make it happen quickly. 1010 * Wait for an rcu-preempt grace period, but make it happen quickly.
1002 * But because preemptible RCU does not exist, map to rcu-sched. 1011 * But because preemptible RCU does not exist, map to rcu-sched.
diff --git a/kernel/softirq.c b/kernel/softirq.c
index 13960170cad4..40cf63ddd4b3 100644
--- a/kernel/softirq.c
+++ b/kernel/softirq.c
@@ -58,7 +58,7 @@ DEFINE_PER_CPU(struct task_struct *, ksoftirqd);
58 58
59char *softirq_to_name[NR_SOFTIRQS] = { 59char *softirq_to_name[NR_SOFTIRQS] = {
60 "HI", "TIMER", "NET_TX", "NET_RX", "BLOCK", "BLOCK_IOPOLL", 60 "HI", "TIMER", "NET_TX", "NET_RX", "BLOCK", "BLOCK_IOPOLL",
61 "TASKLET", "SCHED", "HRTIMER" 61 "TASKLET", "SCHED", "HRTIMER", "RCU"
62}; 62};
63 63
64/* 64/*
diff --git a/tools/perf/util/trace-event-parse.c b/tools/perf/util/trace-event-parse.c
index 1e88485c16a0..0a7ed5b5e281 100644
--- a/tools/perf/util/trace-event-parse.c
+++ b/tools/perf/util/trace-event-parse.c
@@ -2187,6 +2187,7 @@ static const struct flag flags[] = {
2187 { "TASKLET_SOFTIRQ", 6 }, 2187 { "TASKLET_SOFTIRQ", 6 },
2188 { "SCHED_SOFTIRQ", 7 }, 2188 { "SCHED_SOFTIRQ", 7 },
2189 { "HRTIMER_SOFTIRQ", 8 }, 2189 { "HRTIMER_SOFTIRQ", 8 },
2190 { "RCU_SOFTIRQ", 9 },
2190 2191
2191 { "HRTIMER_NORESTART", 0 }, 2192 { "HRTIMER_NORESTART", 0 },
2192 { "HRTIMER_RESTART", 1 }, 2193 { "HRTIMER_RESTART", 1 },