aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorIngo Molnar <mingo@kernel.org>2014-06-25 01:37:49 -0400
committerIngo Molnar <mingo@kernel.org>2014-06-25 01:37:49 -0400
commit5cfec3422adcc1987a1b5fc5ff59ad42a1bc910e (patch)
treeefb9591a225f1c760ab26a951f7c8623368225b2
parenta497c3ba1d97fc69c1e78e7b96435ba8c2cb42ee (diff)
parent4a81e8328d3791a4f99bf5b436d050f6dc5ffea3 (diff)
Merge branch 'urgent.2014.06.23a' of git://git.kernel.org/pub/scm/linux/kernel/git/paulmck/linux-rcu into core/urgent
Pull RCU fixes from Paul E. McKenney: " This series includes the following: 1. Export a pair of debug-object interfaces for RCU that will allow the slab allocators to avoid a recursion bug located by Sasha Levin. Strictly speaking, this is not a regression, but it would be good to enable the fix. 2. Address a serious performance regression on an open/close micro-benchmark located by Dave Hansen. The offending commit is ac1bea85781e (Make cond_resched() report RCU quiescent states). " Signed-off-by: Ingo Molnar <mingo@kernel.org>
-rw-r--r--Documentation/kernel-parameters.txt6
-rw-r--r--include/linux/rcupdate.h46
-rw-r--r--kernel/rcu/tree.c140
-rw-r--r--kernel/rcu/tree.h6
-rw-r--r--kernel/rcu/tree_plugin.h2
-rw-r--r--kernel/rcu/update.c22
-rw-r--r--kernel/sched/core.c7
7 files changed, 137 insertions, 92 deletions
diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt
index 884904975d0b..7ffecb5fd004 100644
--- a/Documentation/kernel-parameters.txt
+++ b/Documentation/kernel-parameters.txt
@@ -2790,6 +2790,12 @@ bytes respectively. Such letter suffixes can also be entirely omitted.
2790 leaf rcu_node structure. Useful for very large 2790 leaf rcu_node structure. Useful for very large
2791 systems. 2791 systems.
2792 2792
2793 rcutree.jiffies_till_sched_qs= [KNL]
2794 Set required age in jiffies for a
2795 given grace period before RCU starts
2796 soliciting quiescent-state help from
2797 rcu_note_context_switch().
2798
2793 rcutree.jiffies_till_first_fqs= [KNL] 2799 rcutree.jiffies_till_first_fqs= [KNL]
2794 Set delay from grace-period initialization to 2800 Set delay from grace-period initialization to
2795 first attempt to force quiescent states. 2801 first attempt to force quiescent states.
diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h
index 5a75d19aa661..6a94cc8b1ca0 100644
--- a/include/linux/rcupdate.h
+++ b/include/linux/rcupdate.h
@@ -44,7 +44,6 @@
44#include <linux/debugobjects.h> 44#include <linux/debugobjects.h>
45#include <linux/bug.h> 45#include <linux/bug.h>
46#include <linux/compiler.h> 46#include <linux/compiler.h>
47#include <linux/percpu.h>
48#include <asm/barrier.h> 47#include <asm/barrier.h>
49 48
50extern int rcu_expedited; /* for sysctl */ 49extern int rcu_expedited; /* for sysctl */
@@ -300,41 +299,6 @@ bool __rcu_is_watching(void);
300#endif /* #if defined(CONFIG_DEBUG_LOCK_ALLOC) || defined(CONFIG_RCU_TRACE) || defined(CONFIG_SMP) */ 299#endif /* #if defined(CONFIG_DEBUG_LOCK_ALLOC) || defined(CONFIG_RCU_TRACE) || defined(CONFIG_SMP) */
301 300
302/* 301/*
303 * Hooks for cond_resched() and friends to avoid RCU CPU stall warnings.
304 */
305
306#define RCU_COND_RESCHED_LIM 256 /* ms vs. 100s of ms. */
307DECLARE_PER_CPU(int, rcu_cond_resched_count);
308void rcu_resched(void);
309
310/*
311 * Is it time to report RCU quiescent states?
312 *
313 * Note unsynchronized access to rcu_cond_resched_count. Yes, we might
314 * increment some random CPU's count, and possibly also load the result from
315 * yet another CPU's count. We might even clobber some other CPU's attempt
316 * to zero its counter. This is all OK because the goal is not precision,
317 * but rather reasonable amortization of rcu_note_context_switch() overhead
318 * and extremely high probability of avoiding RCU CPU stall warnings.
319 * Note that this function has to be preempted in just the wrong place,
320 * many thousands of times in a row, for anything bad to happen.
321 */
322static inline bool rcu_should_resched(void)
323{
324 return raw_cpu_inc_return(rcu_cond_resched_count) >=
325 RCU_COND_RESCHED_LIM;
326}
327
328/*
329 * Report quiscent states to RCU if it is time to do so.
330 */
331static inline void rcu_cond_resched(void)
332{
333 if (unlikely(rcu_should_resched()))
334 rcu_resched();
335}
336
337/*
338 * Infrastructure to implement the synchronize_() primitives in 302 * Infrastructure to implement the synchronize_() primitives in
339 * TREE_RCU and rcu_barrier_() primitives in TINY_RCU. 303 * TREE_RCU and rcu_barrier_() primitives in TINY_RCU.
340 */ 304 */
@@ -358,9 +322,19 @@ void wait_rcu_gp(call_rcu_func_t crf);
358 * initialization. 322 * initialization.
359 */ 323 */
360#ifdef CONFIG_DEBUG_OBJECTS_RCU_HEAD 324#ifdef CONFIG_DEBUG_OBJECTS_RCU_HEAD
325void init_rcu_head(struct rcu_head *head);
326void destroy_rcu_head(struct rcu_head *head);
361void init_rcu_head_on_stack(struct rcu_head *head); 327void init_rcu_head_on_stack(struct rcu_head *head);
362void destroy_rcu_head_on_stack(struct rcu_head *head); 328void destroy_rcu_head_on_stack(struct rcu_head *head);
363#else /* !CONFIG_DEBUG_OBJECTS_RCU_HEAD */ 329#else /* !CONFIG_DEBUG_OBJECTS_RCU_HEAD */
330static inline void init_rcu_head(struct rcu_head *head)
331{
332}
333
334static inline void destroy_rcu_head(struct rcu_head *head)
335{
336}
337
364static inline void init_rcu_head_on_stack(struct rcu_head *head) 338static inline void init_rcu_head_on_stack(struct rcu_head *head)
365{ 339{
366} 340}
diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c
index f1ba77363fbb..625d0b0cd75a 100644
--- a/kernel/rcu/tree.c
+++ b/kernel/rcu/tree.c
@@ -206,6 +206,70 @@ void rcu_bh_qs(int cpu)
206 rdp->passed_quiesce = 1; 206 rdp->passed_quiesce = 1;
207} 207}
208 208
209static DEFINE_PER_CPU(int, rcu_sched_qs_mask);
210
211static DEFINE_PER_CPU(struct rcu_dynticks, rcu_dynticks) = {
212 .dynticks_nesting = DYNTICK_TASK_EXIT_IDLE,
213 .dynticks = ATOMIC_INIT(1),
214#ifdef CONFIG_NO_HZ_FULL_SYSIDLE
215 .dynticks_idle_nesting = DYNTICK_TASK_NEST_VALUE,
216 .dynticks_idle = ATOMIC_INIT(1),
217#endif /* #ifdef CONFIG_NO_HZ_FULL_SYSIDLE */
218};
219
220/*
221 * Let the RCU core know that this CPU has gone through the scheduler,
222 * which is a quiescent state. This is called when the need for a
223 * quiescent state is urgent, so we burn an atomic operation and full
224 * memory barriers to let the RCU core know about it, regardless of what
225 * this CPU might (or might not) do in the near future.
226 *
227 * We inform the RCU core by emulating a zero-duration dyntick-idle
228 * period, which we in turn do by incrementing the ->dynticks counter
229 * by two.
230 */
231static void rcu_momentary_dyntick_idle(void)
232{
233 unsigned long flags;
234 struct rcu_data *rdp;
235 struct rcu_dynticks *rdtp;
236 int resched_mask;
237 struct rcu_state *rsp;
238
239 local_irq_save(flags);
240
241 /*
242 * Yes, we can lose flag-setting operations. This is OK, because
243 * the flag will be set again after some delay.
244 */
245 resched_mask = raw_cpu_read(rcu_sched_qs_mask);
246 raw_cpu_write(rcu_sched_qs_mask, 0);
247
248 /* Find the flavor that needs a quiescent state. */
249 for_each_rcu_flavor(rsp) {
250 rdp = raw_cpu_ptr(rsp->rda);
251 if (!(resched_mask & rsp->flavor_mask))
252 continue;
253 smp_mb(); /* rcu_sched_qs_mask before cond_resched_completed. */
254 if (ACCESS_ONCE(rdp->mynode->completed) !=
255 ACCESS_ONCE(rdp->cond_resched_completed))
256 continue;
257
258 /*
259 * Pretend to be momentarily idle for the quiescent state.
260 * This allows the grace-period kthread to record the
261 * quiescent state, with no need for this CPU to do anything
262 * further.
263 */
264 rdtp = this_cpu_ptr(&rcu_dynticks);
265 smp_mb__before_atomic(); /* Earlier stuff before QS. */
266 atomic_add(2, &rdtp->dynticks); /* QS. */
267 smp_mb__after_atomic(); /* Later stuff after QS. */
268 break;
269 }
270 local_irq_restore(flags);
271}
272
209/* 273/*
210 * Note a context switch. This is a quiescent state for RCU-sched, 274 * Note a context switch. This is a quiescent state for RCU-sched,
211 * and requires special handling for preemptible RCU. 275 * and requires special handling for preemptible RCU.
@@ -216,19 +280,12 @@ void rcu_note_context_switch(int cpu)
216 trace_rcu_utilization(TPS("Start context switch")); 280 trace_rcu_utilization(TPS("Start context switch"));
217 rcu_sched_qs(cpu); 281 rcu_sched_qs(cpu);
218 rcu_preempt_note_context_switch(cpu); 282 rcu_preempt_note_context_switch(cpu);
283 if (unlikely(raw_cpu_read(rcu_sched_qs_mask)))
284 rcu_momentary_dyntick_idle();
219 trace_rcu_utilization(TPS("End context switch")); 285 trace_rcu_utilization(TPS("End context switch"));
220} 286}
221EXPORT_SYMBOL_GPL(rcu_note_context_switch); 287EXPORT_SYMBOL_GPL(rcu_note_context_switch);
222 288
223static DEFINE_PER_CPU(struct rcu_dynticks, rcu_dynticks) = {
224 .dynticks_nesting = DYNTICK_TASK_EXIT_IDLE,
225 .dynticks = ATOMIC_INIT(1),
226#ifdef CONFIG_NO_HZ_FULL_SYSIDLE
227 .dynticks_idle_nesting = DYNTICK_TASK_NEST_VALUE,
228 .dynticks_idle = ATOMIC_INIT(1),
229#endif /* #ifdef CONFIG_NO_HZ_FULL_SYSIDLE */
230};
231
232static long blimit = 10; /* Maximum callbacks per rcu_do_batch. */ 289static long blimit = 10; /* Maximum callbacks per rcu_do_batch. */
233static long qhimark = 10000; /* If this many pending, ignore blimit. */ 290static long qhimark = 10000; /* If this many pending, ignore blimit. */
234static long qlowmark = 100; /* Once only this many pending, use blimit. */ 291static long qlowmark = 100; /* Once only this many pending, use blimit. */
@@ -243,6 +300,13 @@ static ulong jiffies_till_next_fqs = ULONG_MAX;
243module_param(jiffies_till_first_fqs, ulong, 0644); 300module_param(jiffies_till_first_fqs, ulong, 0644);
244module_param(jiffies_till_next_fqs, ulong, 0644); 301module_param(jiffies_till_next_fqs, ulong, 0644);
245 302
303/*
304 * How long the grace period must be before we start recruiting
305 * quiescent-state help from rcu_note_context_switch().
306 */
307static ulong jiffies_till_sched_qs = HZ / 20;
308module_param(jiffies_till_sched_qs, ulong, 0644);
309
246static bool rcu_start_gp_advanced(struct rcu_state *rsp, struct rcu_node *rnp, 310static bool rcu_start_gp_advanced(struct rcu_state *rsp, struct rcu_node *rnp,
247 struct rcu_data *rdp); 311 struct rcu_data *rdp);
248static void force_qs_rnp(struct rcu_state *rsp, 312static void force_qs_rnp(struct rcu_state *rsp,
@@ -853,6 +917,7 @@ static int rcu_implicit_dynticks_qs(struct rcu_data *rdp,
853 bool *isidle, unsigned long *maxj) 917 bool *isidle, unsigned long *maxj)
854{ 918{
855 unsigned int curr; 919 unsigned int curr;
920 int *rcrmp;
856 unsigned int snap; 921 unsigned int snap;
857 922
858 curr = (unsigned int)atomic_add_return(0, &rdp->dynticks->dynticks); 923 curr = (unsigned int)atomic_add_return(0, &rdp->dynticks->dynticks);
@@ -893,27 +958,43 @@ static int rcu_implicit_dynticks_qs(struct rcu_data *rdp,
893 } 958 }
894 959
895 /* 960 /*
896 * There is a possibility that a CPU in adaptive-ticks state 961 * A CPU running for an extended time within the kernel can
897 * might run in the kernel with the scheduling-clock tick disabled 962 * delay RCU grace periods. When the CPU is in NO_HZ_FULL mode,
898 * for an extended time period. Invoke rcu_kick_nohz_cpu() to 963 * even context-switching back and forth between a pair of
899 * force the CPU to restart the scheduling-clock tick in this 964 * in-kernel CPU-bound tasks cannot advance grace periods.
900 * CPU is in this state. 965 * So if the grace period is old enough, make the CPU pay attention.
901 */ 966 * Note that the unsynchronized assignments to the per-CPU
902 rcu_kick_nohz_cpu(rdp->cpu); 967 * rcu_sched_qs_mask variable are safe. Yes, setting of
903 968 * bits can be lost, but they will be set again on the next
904 /* 969 * force-quiescent-state pass. So lost bit sets do not result
905 * Alternatively, the CPU might be running in the kernel 970 * in incorrect behavior, merely in a grace period lasting
906 * for an extended period of time without a quiescent state. 971 * a few jiffies longer than it might otherwise. Because
907 * Attempt to force the CPU through the scheduler to gain the 972 * there are at most four threads involved, and because the
908 * needed quiescent state, but only if the grace period has gone 973 * updates are only once every few jiffies, the probability of
909 * on for an uncommonly long time. If there are many stuck CPUs, 974 * lossage (and thus of slight grace-period extension) is
910 * we will beat on the first one until it gets unstuck, then move 975 * quite low.
911 * to the next. Only do this for the primary flavor of RCU. 976 *
977 * Note that if the jiffies_till_sched_qs boot/sysfs parameter
978 * is set too high, we override with half of the RCU CPU stall
979 * warning delay.
912 */ 980 */
913 if (rdp->rsp == rcu_state_p && 981 rcrmp = &per_cpu(rcu_sched_qs_mask, rdp->cpu);
982 if (ULONG_CMP_GE(jiffies,
983 rdp->rsp->gp_start + jiffies_till_sched_qs) ||
914 ULONG_CMP_GE(jiffies, rdp->rsp->jiffies_resched)) { 984 ULONG_CMP_GE(jiffies, rdp->rsp->jiffies_resched)) {
915 rdp->rsp->jiffies_resched += 5; 985 if (!(ACCESS_ONCE(*rcrmp) & rdp->rsp->flavor_mask)) {
916 resched_cpu(rdp->cpu); 986 ACCESS_ONCE(rdp->cond_resched_completed) =
987 ACCESS_ONCE(rdp->mynode->completed);
988 smp_mb(); /* ->cond_resched_completed before *rcrmp. */
989 ACCESS_ONCE(*rcrmp) =
990 ACCESS_ONCE(*rcrmp) + rdp->rsp->flavor_mask;
991 resched_cpu(rdp->cpu); /* Force CPU into scheduler. */
992 rdp->rsp->jiffies_resched += 5; /* Enable beating. */
993 } else if (ULONG_CMP_GE(jiffies, rdp->rsp->jiffies_resched)) {
994 /* Time to beat on that CPU again! */
995 resched_cpu(rdp->cpu); /* Force CPU into scheduler. */
996 rdp->rsp->jiffies_resched += 5; /* Re-enable beating. */
997 }
917 } 998 }
918 999
919 return 0; 1000 return 0;
@@ -3491,6 +3572,7 @@ static void __init rcu_init_one(struct rcu_state *rsp,
3491 "rcu_node_fqs_1", 3572 "rcu_node_fqs_1",
3492 "rcu_node_fqs_2", 3573 "rcu_node_fqs_2",
3493 "rcu_node_fqs_3" }; /* Match MAX_RCU_LVLS */ 3574 "rcu_node_fqs_3" }; /* Match MAX_RCU_LVLS */
3575 static u8 fl_mask = 0x1;
3494 int cpustride = 1; 3576 int cpustride = 1;
3495 int i; 3577 int i;
3496 int j; 3578 int j;
@@ -3509,6 +3591,8 @@ static void __init rcu_init_one(struct rcu_state *rsp,
3509 for (i = 1; i < rcu_num_lvls; i++) 3591 for (i = 1; i < rcu_num_lvls; i++)
3510 rsp->level[i] = rsp->level[i - 1] + rsp->levelcnt[i - 1]; 3592 rsp->level[i] = rsp->level[i - 1] + rsp->levelcnt[i - 1];
3511 rcu_init_levelspread(rsp); 3593 rcu_init_levelspread(rsp);
3594 rsp->flavor_mask = fl_mask;
3595 fl_mask <<= 1;
3512 3596
3513 /* Initialize the elements themselves, starting from the leaves. */ 3597 /* Initialize the elements themselves, starting from the leaves. */
3514 3598
diff --git a/kernel/rcu/tree.h b/kernel/rcu/tree.h
index bf2c1e669691..0f69a79c5b7d 100644
--- a/kernel/rcu/tree.h
+++ b/kernel/rcu/tree.h
@@ -307,6 +307,9 @@ struct rcu_data {
307 /* 4) reasons this CPU needed to be kicked by force_quiescent_state */ 307 /* 4) reasons this CPU needed to be kicked by force_quiescent_state */
308 unsigned long dynticks_fqs; /* Kicked due to dynticks idle. */ 308 unsigned long dynticks_fqs; /* Kicked due to dynticks idle. */
309 unsigned long offline_fqs; /* Kicked due to being offline. */ 309 unsigned long offline_fqs; /* Kicked due to being offline. */
310 unsigned long cond_resched_completed;
311 /* Grace period that needs help */
312 /* from cond_resched(). */
310 313
311 /* 5) __rcu_pending() statistics. */ 314 /* 5) __rcu_pending() statistics. */
312 unsigned long n_rcu_pending; /* rcu_pending() calls since boot. */ 315 unsigned long n_rcu_pending; /* rcu_pending() calls since boot. */
@@ -392,6 +395,7 @@ struct rcu_state {
392 struct rcu_node *level[RCU_NUM_LVLS]; /* Hierarchy levels. */ 395 struct rcu_node *level[RCU_NUM_LVLS]; /* Hierarchy levels. */
393 u32 levelcnt[MAX_RCU_LVLS + 1]; /* # nodes in each level. */ 396 u32 levelcnt[MAX_RCU_LVLS + 1]; /* # nodes in each level. */
394 u8 levelspread[RCU_NUM_LVLS]; /* kids/node in each level. */ 397 u8 levelspread[RCU_NUM_LVLS]; /* kids/node in each level. */
398 u8 flavor_mask; /* bit in flavor mask. */
395 struct rcu_data __percpu *rda; /* pointer of percu rcu_data. */ 399 struct rcu_data __percpu *rda; /* pointer of percu rcu_data. */
396 void (*call)(struct rcu_head *head, /* call_rcu() flavor. */ 400 void (*call)(struct rcu_head *head, /* call_rcu() flavor. */
397 void (*func)(struct rcu_head *head)); 401 void (*func)(struct rcu_head *head));
@@ -563,7 +567,7 @@ static bool rcu_nocb_need_deferred_wakeup(struct rcu_data *rdp);
563static void do_nocb_deferred_wakeup(struct rcu_data *rdp); 567static void do_nocb_deferred_wakeup(struct rcu_data *rdp);
564static void rcu_boot_init_nocb_percpu_data(struct rcu_data *rdp); 568static void rcu_boot_init_nocb_percpu_data(struct rcu_data *rdp);
565static void rcu_spawn_nocb_kthreads(struct rcu_state *rsp); 569static void rcu_spawn_nocb_kthreads(struct rcu_state *rsp);
566static void rcu_kick_nohz_cpu(int cpu); 570static void __maybe_unused rcu_kick_nohz_cpu(int cpu);
567static bool init_nocb_callback_list(struct rcu_data *rdp); 571static bool init_nocb_callback_list(struct rcu_data *rdp);
568static void rcu_sysidle_enter(struct rcu_dynticks *rdtp, int irq); 572static void rcu_sysidle_enter(struct rcu_dynticks *rdtp, int irq);
569static void rcu_sysidle_exit(struct rcu_dynticks *rdtp, int irq); 573static void rcu_sysidle_exit(struct rcu_dynticks *rdtp, int irq);
diff --git a/kernel/rcu/tree_plugin.h b/kernel/rcu/tree_plugin.h
index cbc2c45265e2..02ac0fb186b8 100644
--- a/kernel/rcu/tree_plugin.h
+++ b/kernel/rcu/tree_plugin.h
@@ -2404,7 +2404,7 @@ static bool init_nocb_callback_list(struct rcu_data *rdp)
2404 * if an adaptive-ticks CPU is failing to respond to the current grace 2404 * if an adaptive-ticks CPU is failing to respond to the current grace
2405 * period and has not be idle from an RCU perspective, kick it. 2405 * period and has not be idle from an RCU perspective, kick it.
2406 */ 2406 */
2407static void rcu_kick_nohz_cpu(int cpu) 2407static void __maybe_unused rcu_kick_nohz_cpu(int cpu)
2408{ 2408{
2409#ifdef CONFIG_NO_HZ_FULL 2409#ifdef CONFIG_NO_HZ_FULL
2410 if (tick_nohz_full_cpu(cpu)) 2410 if (tick_nohz_full_cpu(cpu))
diff --git a/kernel/rcu/update.c b/kernel/rcu/update.c
index a2aeb4df0f60..bc7883570530 100644
--- a/kernel/rcu/update.c
+++ b/kernel/rcu/update.c
@@ -200,12 +200,12 @@ void wait_rcu_gp(call_rcu_func_t crf)
200EXPORT_SYMBOL_GPL(wait_rcu_gp); 200EXPORT_SYMBOL_GPL(wait_rcu_gp);
201 201
202#ifdef CONFIG_DEBUG_OBJECTS_RCU_HEAD 202#ifdef CONFIG_DEBUG_OBJECTS_RCU_HEAD
203static inline void debug_init_rcu_head(struct rcu_head *head) 203void init_rcu_head(struct rcu_head *head)
204{ 204{
205 debug_object_init(head, &rcuhead_debug_descr); 205 debug_object_init(head, &rcuhead_debug_descr);
206} 206}
207 207
208static inline void debug_rcu_head_free(struct rcu_head *head) 208void destroy_rcu_head(struct rcu_head *head)
209{ 209{
210 debug_object_free(head, &rcuhead_debug_descr); 210 debug_object_free(head, &rcuhead_debug_descr);
211} 211}
@@ -350,21 +350,3 @@ static int __init check_cpu_stall_init(void)
350early_initcall(check_cpu_stall_init); 350early_initcall(check_cpu_stall_init);
351 351
352#endif /* #ifdef CONFIG_RCU_STALL_COMMON */ 352#endif /* #ifdef CONFIG_RCU_STALL_COMMON */
353
354/*
355 * Hooks for cond_resched() and friends to avoid RCU CPU stall warnings.
356 */
357
358DEFINE_PER_CPU(int, rcu_cond_resched_count);
359
360/*
361 * Report a set of RCU quiescent states, for use by cond_resched()
362 * and friends. Out of line due to being called infrequently.
363 */
364void rcu_resched(void)
365{
366 preempt_disable();
367 __this_cpu_write(rcu_cond_resched_count, 0);
368 rcu_note_context_switch(smp_processor_id());
369 preempt_enable();
370}
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 3bdf01b494fe..bc1638b33449 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -4147,7 +4147,6 @@ static void __cond_resched(void)
4147 4147
4148int __sched _cond_resched(void) 4148int __sched _cond_resched(void)
4149{ 4149{
4150 rcu_cond_resched();
4151 if (should_resched()) { 4150 if (should_resched()) {
4152 __cond_resched(); 4151 __cond_resched();
4153 return 1; 4152 return 1;
@@ -4166,18 +4165,15 @@ EXPORT_SYMBOL(_cond_resched);
4166 */ 4165 */
4167int __cond_resched_lock(spinlock_t *lock) 4166int __cond_resched_lock(spinlock_t *lock)
4168{ 4167{
4169 bool need_rcu_resched = rcu_should_resched();
4170 int resched = should_resched(); 4168 int resched = should_resched();
4171 int ret = 0; 4169 int ret = 0;
4172 4170
4173 lockdep_assert_held(lock); 4171 lockdep_assert_held(lock);
4174 4172
4175 if (spin_needbreak(lock) || resched || need_rcu_resched) { 4173 if (spin_needbreak(lock) || resched) {
4176 spin_unlock(lock); 4174 spin_unlock(lock);
4177 if (resched) 4175 if (resched)
4178 __cond_resched(); 4176 __cond_resched();
4179 else if (unlikely(need_rcu_resched))
4180 rcu_resched();
4181 else 4177 else
4182 cpu_relax(); 4178 cpu_relax();
4183 ret = 1; 4179 ret = 1;
@@ -4191,7 +4187,6 @@ int __sched __cond_resched_softirq(void)
4191{ 4187{
4192 BUG_ON(!in_softirq()); 4188 BUG_ON(!in_softirq());
4193 4189
4194 rcu_cond_resched(); /* BH disabled OK, just recording QSes. */
4195 if (should_resched()) { 4190 if (should_resched()) {
4196 local_bh_enable(); 4191 local_bh_enable();
4197 __cond_resched(); 4192 __cond_resched();