aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorFlorian Westphal <fw@strlen.de>2017-01-17 20:01:22 -0500
committerPablo Neira Ayuso <pablo@netfilter.org>2017-01-19 08:28:01 -0500
commite5072053b09642b8ff417d47da05b84720aea3ee (patch)
tree27c6a3089d66f995cd8542a913209492a5d01609
parent524b698db06b9b6da7192e749f637904e2f62d7b (diff)
netfilter: conntrack: refine gc worker heuristics, redux
This further refines the changes made to conntrack gc_worker in commit e0df8cae6c16 ("netfilter: conntrack: refine gc worker heuristics"). The main idea of that change was to reduce the scan interval when evictions take place. However, on the reporters' setup, there are 1-2 million conntrack entries in total and roughly 8k new (and closing) connections per second. In this case we'll always evict at least one entry per gc cycle and scan interval is always at 1 jiffy because of this test: } else if (expired_count) { gc_work->next_gc_run /= 2U; next_run = msecs_to_jiffies(1); being true almost all the time. Given we scan ~10k entries per run its clearly wrong to reduce interval based on nonzero eviction count, it will only waste cpu cycles since a vast majorities of conntracks are not timed out. Thus only look at the ratio (scanned entries vs. evicted entries) to make a decision on whether to reduce or not. Because evictor is supposed to only kick in when system turns idle after a busy period, pick a high ratio -- this makes it 50%. We thus keep the idea of increasing scan rate when its likely that table contains many expired entries. In order to not let timed-out entries hang around for too long (important when using event logging, in which case we want to timely destroy events), we now scan the full table within at most GC_MAX_SCAN_JIFFIES (16 seconds) even in worst-case scenario where all timed-out entries sit in same slot. I tested this with a vm under synflood (with sysctl net.netfilter.nf_conntrack_tcp_timeout_syn_recv=3). While flood is ongoing, interval now stays at its max rate (GC_MAX_SCAN_JIFFIES / GC_MAX_BUCKETS_DIV -> 125ms). With feedback from Nicolas Dichtel. Reported-by: Denys Fedoryshchenko <nuclearcat@nuclearcat.com> Cc: Nicolas Dichtel <nicolas.dichtel@6wind.com> Fixes: b87a2f9199ea82eaadc ("netfilter: conntrack: add gc worker to remove timed-out entries") Signed-off-by: Florian Westphal <fw@strlen.de> Tested-by: Nicolas Dichtel <nicolas.dichtel@6wind.com> Acked-by: Nicolas Dichtel <nicolas.dichtel@6wind.com> Tested-by: Denys Fedoryshchenko <nuclearcat@nuclearcat.com> Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
-rw-r--r--net/netfilter/nf_conntrack_core.c39
1 files changed, 20 insertions, 19 deletions
diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c
index 6feb5d370319..4e8083c5e01d 100644
--- a/net/netfilter/nf_conntrack_core.c
+++ b/net/netfilter/nf_conntrack_core.c
@@ -85,9 +85,11 @@ static __read_mostly DEFINE_SPINLOCK(nf_conntrack_locks_all_lock);
85static __read_mostly bool nf_conntrack_locks_all; 85static __read_mostly bool nf_conntrack_locks_all;
86 86
87/* every gc cycle scans at most 1/GC_MAX_BUCKETS_DIV part of table */ 87/* every gc cycle scans at most 1/GC_MAX_BUCKETS_DIV part of table */
88#define GC_MAX_BUCKETS_DIV 64u 88#define GC_MAX_BUCKETS_DIV 128u
89/* upper bound of scan intervals */ 89/* upper bound of full table scan */
90#define GC_INTERVAL_MAX (2 * HZ) 90#define GC_MAX_SCAN_JIFFIES (16u * HZ)
91/* desired ratio of entries found to be expired */
92#define GC_EVICT_RATIO 50u
91 93
92static struct conntrack_gc_work conntrack_gc_work; 94static struct conntrack_gc_work conntrack_gc_work;
93 95
@@ -936,6 +938,7 @@ static noinline int early_drop(struct net *net, unsigned int _hash)
936 938
937static void gc_worker(struct work_struct *work) 939static void gc_worker(struct work_struct *work)
938{ 940{
941 unsigned int min_interval = max(HZ / GC_MAX_BUCKETS_DIV, 1u);
939 unsigned int i, goal, buckets = 0, expired_count = 0; 942 unsigned int i, goal, buckets = 0, expired_count = 0;
940 struct conntrack_gc_work *gc_work; 943 struct conntrack_gc_work *gc_work;
941 unsigned int ratio, scanned = 0; 944 unsigned int ratio, scanned = 0;
@@ -994,27 +997,25 @@ static void gc_worker(struct work_struct *work)
994 * 1. Minimize time until we notice a stale entry 997 * 1. Minimize time until we notice a stale entry
995 * 2. Maximize scan intervals to not waste cycles 998 * 2. Maximize scan intervals to not waste cycles
996 * 999 *
997 * Normally, expired_count will be 0, this increases the next_run time 1000 * Normally, expire ratio will be close to 0.
998 * to priorize 2) above.
999 * 1001 *
1000 * As soon as a timed-out entry is found, move towards 1) and increase 1002 * As soon as a sizeable fraction of the entries have expired
1001 * the scan frequency. 1003 * increase scan frequency.
1002 * In case we have lots of evictions next scan is done immediately.
1003 */ 1004 */
1004 ratio = scanned ? expired_count * 100 / scanned : 0; 1005 ratio = scanned ? expired_count * 100 / scanned : 0;
1005 if (ratio >= 90) { 1006 if (ratio > GC_EVICT_RATIO) {
1006 gc_work->next_gc_run = 0; 1007 gc_work->next_gc_run = min_interval;
1007 next_run = 0;
1008 } else if (expired_count) {
1009 gc_work->next_gc_run /= 2U;
1010 next_run = msecs_to_jiffies(1);
1011 } else { 1008 } else {
1012 if (gc_work->next_gc_run < GC_INTERVAL_MAX) 1009 unsigned int max = GC_MAX_SCAN_JIFFIES / GC_MAX_BUCKETS_DIV;
1013 gc_work->next_gc_run += msecs_to_jiffies(1);
1014 1010
1015 next_run = gc_work->next_gc_run; 1011 BUILD_BUG_ON((GC_MAX_SCAN_JIFFIES / GC_MAX_BUCKETS_DIV) == 0);
1012
1013 gc_work->next_gc_run += min_interval;
1014 if (gc_work->next_gc_run > max)
1015 gc_work->next_gc_run = max;
1016 } 1016 }
1017 1017
1018 next_run = gc_work->next_gc_run;
1018 gc_work->last_bucket = i; 1019 gc_work->last_bucket = i;
1019 queue_delayed_work(system_long_wq, &gc_work->dwork, next_run); 1020 queue_delayed_work(system_long_wq, &gc_work->dwork, next_run);
1020} 1021}
@@ -1022,7 +1023,7 @@ static void gc_worker(struct work_struct *work)
1022static void conntrack_gc_work_init(struct conntrack_gc_work *gc_work) 1023static void conntrack_gc_work_init(struct conntrack_gc_work *gc_work)
1023{ 1024{
1024 INIT_DELAYED_WORK(&gc_work->dwork, gc_worker); 1025 INIT_DELAYED_WORK(&gc_work->dwork, gc_worker);
1025 gc_work->next_gc_run = GC_INTERVAL_MAX; 1026 gc_work->next_gc_run = HZ;
1026 gc_work->exiting = false; 1027 gc_work->exiting = false;
1027} 1028}
1028 1029
@@ -1914,7 +1915,7 @@ int nf_conntrack_init_start(void)
1914 nf_ct_untracked_status_or(IPS_CONFIRMED | IPS_UNTRACKED); 1915 nf_ct_untracked_status_or(IPS_CONFIRMED | IPS_UNTRACKED);
1915 1916
1916 conntrack_gc_work_init(&conntrack_gc_work); 1917 conntrack_gc_work_init(&conntrack_gc_work);
1917 queue_delayed_work(system_long_wq, &conntrack_gc_work.dwork, GC_INTERVAL_MAX); 1918 queue_delayed_work(system_long_wq, &conntrack_gc_work.dwork, HZ);
1918 1919
1919 return 0; 1920 return 0;
1920 1921