summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorEric Dumazet <edumazet@google.com>2019-07-24 21:07:52 -0400
committerPaul E. McKenney <paulmck@linux.ibm.com>2019-08-13 17:38:24 -0400
commitcfcdef5e30469f3f2d6786ad35fc3fdef2a3833f (patch)
tree5135240b47f19f4096a6a6ab2344289517234a21
parentf48fe4c586604c3a09938c6a6e9fd3356dfe8f3c (diff)
rcu: Allow rcu_do_batch() to dynamically adjust batch sizes
Bimodal behavior of rcu_do_batch() is not really suited to Google applications like gfe servers. When a process with millions of sockets exits, closing all files queues two rcu callbacks per socket. This eventually reaches the point where RCU enters an emergency mode, where rcu_do_batch() do not return until whole queue is flushed. Each rcu callback lasts at least 70 nsec, so with millions of elements, we easily spend more than 100 msec without rescheduling. Goal of this patch is to avoid the infamous message like following "need_resched set for > 51999388 ns (52 ticks) without schedule" We dynamically adjust the number of elements we process, instead of 10 / INFINITE choices, we use a floor of ~1 % of current entries. If the number is above 1000, we switch to a time based limit of 3 msec per batch, adjustable with /sys/module/rcutree/parameters/rcu_resched_ns Signed-off-by: Eric Dumazet <edumazet@google.com> [ paulmck: Forward-port and remove debug statements. ] Signed-off-by: Paul E. McKenney <paulmck@linux.ibm.com>
-rw-r--r--kernel/rcu/tree.c20
1 files changed, 19 insertions, 1 deletions
diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c
index 3e89b5b83ea0..71395e91b876 100644
--- a/kernel/rcu/tree.c
+++ b/kernel/rcu/tree.c
@@ -56,6 +56,7 @@
56#include <linux/smpboot.h> 56#include <linux/smpboot.h>
57#include <linux/jiffies.h> 57#include <linux/jiffies.h>
58#include <linux/sched/isolation.h> 58#include <linux/sched/isolation.h>
59#include <linux/sched/clock.h>
59#include "../time/tick-internal.h" 60#include "../time/tick-internal.h"
60 61
61#include "tree.h" 62#include "tree.h"
@@ -416,6 +417,12 @@ module_param(qlowmark, long, 0444);
416static ulong jiffies_till_first_fqs = ULONG_MAX; 417static ulong jiffies_till_first_fqs = ULONG_MAX;
417static ulong jiffies_till_next_fqs = ULONG_MAX; 418static ulong jiffies_till_next_fqs = ULONG_MAX;
418static bool rcu_kick_kthreads; 419static bool rcu_kick_kthreads;
420static int rcu_divisor = 7;
421module_param(rcu_divisor, int, 0644);
422
423/* Force an exit from rcu_do_batch() after 3 milliseconds. */
424static long rcu_resched_ns = 3 * NSEC_PER_MSEC;
425module_param(rcu_resched_ns, long, 0644);
419 426
420/* 427/*
421 * How long the grace period must be before we start recruiting 428 * How long the grace period must be before we start recruiting
@@ -2109,6 +2116,7 @@ static void rcu_do_batch(struct rcu_data *rdp)
2109 struct rcu_head *rhp; 2116 struct rcu_head *rhp;
2110 struct rcu_cblist rcl = RCU_CBLIST_INITIALIZER(rcl); 2117 struct rcu_cblist rcl = RCU_CBLIST_INITIALIZER(rcl);
2111 long bl, count; 2118 long bl, count;
2119 long pending, tlimit = 0;
2112 2120
2113 /* If no callbacks are ready, just return. */ 2121 /* If no callbacks are ready, just return. */
2114 if (!rcu_segcblist_ready_cbs(&rdp->cblist)) { 2122 if (!rcu_segcblist_ready_cbs(&rdp->cblist)) {
@@ -2130,7 +2138,10 @@ static void rcu_do_batch(struct rcu_data *rdp)
2130 local_irq_save(flags); 2138 local_irq_save(flags);
2131 rcu_nocb_lock(rdp); 2139 rcu_nocb_lock(rdp);
2132 WARN_ON_ONCE(cpu_is_offline(smp_processor_id())); 2140 WARN_ON_ONCE(cpu_is_offline(smp_processor_id()));
2133 bl = rdp->blimit; 2141 pending = rcu_segcblist_n_cbs(&rdp->cblist);
2142 bl = max(rdp->blimit, pending >> rcu_divisor);
2143 if (unlikely(bl > 100))
2144 tlimit = local_clock() + rcu_resched_ns;
2134 trace_rcu_batch_start(rcu_state.name, 2145 trace_rcu_batch_start(rcu_state.name,
2135 rcu_segcblist_n_lazy_cbs(&rdp->cblist), 2146 rcu_segcblist_n_lazy_cbs(&rdp->cblist),
2136 rcu_segcblist_n_cbs(&rdp->cblist), bl); 2147 rcu_segcblist_n_cbs(&rdp->cblist), bl);
@@ -2153,6 +2164,13 @@ static void rcu_do_batch(struct rcu_data *rdp)
2153 (need_resched() || 2164 (need_resched() ||
2154 (!is_idle_task(current) && !rcu_is_callbacks_kthread()))) 2165 (!is_idle_task(current) && !rcu_is_callbacks_kthread())))
2155 break; 2166 break;
2167 if (unlikely(tlimit)) {
2168 /* only call local_clock() every 32 callbacks */
2169 if (likely((-rcl.len & 31) || local_clock() < tlimit))
2170 continue;
2171 /* Exceeded the time limit, so leave. */
2172 break;
2173 }
2156 if (offloaded) { 2174 if (offloaded) {
2157 WARN_ON_ONCE(in_serving_softirq()); 2175 WARN_ON_ONCE(in_serving_softirq());
2158 local_bh_enable(); 2176 local_bh_enable();