diff options
-rw-r--r-- | net/core/flow.c | 223 |
1 files changed, 119 insertions, 104 deletions
diff --git a/net/core/flow.c b/net/core/flow.c index 96015871ecea..1d27ca6b421d 100644 --- a/net/core/flow.c +++ b/net/core/flow.c | |||
@@ -35,104 +35,105 @@ struct flow_cache_entry { | |||
35 | atomic_t *object_ref; | 35 | atomic_t *object_ref; |
36 | }; | 36 | }; |
37 | 37 | ||
38 | atomic_t flow_cache_genid = ATOMIC_INIT(0); | 38 | struct flow_cache_percpu { |
39 | 39 | struct flow_cache_entry ** hash_table; | |
40 | static u32 flow_hash_shift; | 40 | int hash_count; |
41 | #define flow_hash_size (1 << flow_hash_shift) | 41 | u32 hash_rnd; |
42 | static DEFINE_PER_CPU(struct flow_cache_entry **, flow_tables) = { NULL }; | 42 | int hash_rnd_recalc; |
43 | 43 | struct tasklet_struct flush_tasklet; | |
44 | #define flow_table(cpu) (per_cpu(flow_tables, cpu)) | ||
45 | |||
46 | static struct kmem_cache *flow_cachep __read_mostly; | ||
47 | |||
48 | static int flow_lwm, flow_hwm; | ||
49 | |||
50 | struct flow_percpu_info { | ||
51 | int hash_rnd_recalc; | ||
52 | u32 hash_rnd; | ||
53 | int count; | ||
54 | }; | 44 | }; |
55 | static DEFINE_PER_CPU(struct flow_percpu_info, flow_hash_info) = { 0 }; | ||
56 | |||
57 | #define flow_hash_rnd_recalc(cpu) \ | ||
58 | (per_cpu(flow_hash_info, cpu).hash_rnd_recalc) | ||
59 | #define flow_hash_rnd(cpu) \ | ||
60 | (per_cpu(flow_hash_info, cpu).hash_rnd) | ||
61 | #define flow_count(cpu) \ | ||
62 | (per_cpu(flow_hash_info, cpu).count) | ||
63 | |||
64 | static struct timer_list flow_hash_rnd_timer; | ||
65 | |||
66 | #define FLOW_HASH_RND_PERIOD (10 * 60 * HZ) | ||
67 | 45 | ||
68 | struct flow_flush_info { | 46 | struct flow_flush_info { |
69 | atomic_t cpuleft; | 47 | struct flow_cache * cache; |
70 | struct completion completion; | 48 | atomic_t cpuleft; |
49 | struct completion completion; | ||
71 | }; | 50 | }; |
72 | static DEFINE_PER_CPU(struct tasklet_struct, flow_flush_tasklets) = { NULL }; | ||
73 | 51 | ||
74 | #define flow_flush_tasklet(cpu) (&per_cpu(flow_flush_tasklets, cpu)) | 52 | struct flow_cache { |
53 | u32 hash_shift; | ||
54 | unsigned long order; | ||
55 | struct flow_cache_percpu * percpu; | ||
56 | struct notifier_block hotcpu_notifier; | ||
57 | int low_watermark; | ||
58 | int high_watermark; | ||
59 | struct timer_list rnd_timer; | ||
60 | }; | ||
61 | |||
62 | atomic_t flow_cache_genid = ATOMIC_INIT(0); | ||
63 | static struct flow_cache flow_cache_global; | ||
64 | static struct kmem_cache *flow_cachep; | ||
65 | |||
66 | #define flow_cache_hash_size(cache) (1 << (cache)->hash_shift) | ||
67 | #define FLOW_HASH_RND_PERIOD (10 * 60 * HZ) | ||
75 | 68 | ||
76 | static void flow_cache_new_hashrnd(unsigned long arg) | 69 | static void flow_cache_new_hashrnd(unsigned long arg) |
77 | { | 70 | { |
71 | struct flow_cache *fc = (void *) arg; | ||
78 | int i; | 72 | int i; |
79 | 73 | ||
80 | for_each_possible_cpu(i) | 74 | for_each_possible_cpu(i) |
81 | flow_hash_rnd_recalc(i) = 1; | 75 | per_cpu_ptr(fc->percpu, i)->hash_rnd_recalc = 1; |
82 | 76 | ||
83 | flow_hash_rnd_timer.expires = jiffies + FLOW_HASH_RND_PERIOD; | 77 | fc->rnd_timer.expires = jiffies + FLOW_HASH_RND_PERIOD; |
84 | add_timer(&flow_hash_rnd_timer); | 78 | add_timer(&fc->rnd_timer); |
85 | } | 79 | } |
86 | 80 | ||
87 | static void flow_entry_kill(int cpu, struct flow_cache_entry *fle) | 81 | static void flow_entry_kill(struct flow_cache *fc, |
82 | struct flow_cache_percpu *fcp, | ||
83 | struct flow_cache_entry *fle) | ||
88 | { | 84 | { |
89 | if (fle->object) | 85 | if (fle->object) |
90 | atomic_dec(fle->object_ref); | 86 | atomic_dec(fle->object_ref); |
91 | kmem_cache_free(flow_cachep, fle); | 87 | kmem_cache_free(flow_cachep, fle); |
92 | flow_count(cpu)--; | 88 | fcp->hash_count--; |
93 | } | 89 | } |
94 | 90 | ||
95 | static void __flow_cache_shrink(int cpu, int shrink_to) | 91 | static void __flow_cache_shrink(struct flow_cache *fc, |
92 | struct flow_cache_percpu *fcp, | ||
93 | int shrink_to) | ||
96 | { | 94 | { |
97 | struct flow_cache_entry *fle, **flp; | 95 | struct flow_cache_entry *fle, **flp; |
98 | int i; | 96 | int i; |
99 | 97 | ||
100 | for (i = 0; i < flow_hash_size; i++) { | 98 | for (i = 0; i < flow_cache_hash_size(fc); i++) { |
101 | int k = 0; | 99 | int k = 0; |
102 | 100 | ||
103 | flp = &flow_table(cpu)[i]; | 101 | flp = &fcp->hash_table[i]; |
104 | while ((fle = *flp) != NULL && k < shrink_to) { | 102 | while ((fle = *flp) != NULL && k < shrink_to) { |
105 | k++; | 103 | k++; |
106 | flp = &fle->next; | 104 | flp = &fle->next; |
107 | } | 105 | } |
108 | while ((fle = *flp) != NULL) { | 106 | while ((fle = *flp) != NULL) { |
109 | *flp = fle->next; | 107 | *flp = fle->next; |
110 | flow_entry_kill(cpu, fle); | 108 | flow_entry_kill(fc, fcp, fle); |
111 | } | 109 | } |
112 | } | 110 | } |
113 | } | 111 | } |
114 | 112 | ||
115 | static void flow_cache_shrink(int cpu) | 113 | static void flow_cache_shrink(struct flow_cache *fc, |
114 | struct flow_cache_percpu *fcp) | ||
116 | { | 115 | { |
117 | int shrink_to = flow_lwm / flow_hash_size; | 116 | int shrink_to = fc->low_watermark / flow_cache_hash_size(fc); |
118 | 117 | ||
119 | __flow_cache_shrink(cpu, shrink_to); | 118 | __flow_cache_shrink(fc, fcp, shrink_to); |
120 | } | 119 | } |
121 | 120 | ||
122 | static void flow_new_hash_rnd(int cpu) | 121 | static void flow_new_hash_rnd(struct flow_cache *fc, |
122 | struct flow_cache_percpu *fcp) | ||
123 | { | 123 | { |
124 | get_random_bytes(&flow_hash_rnd(cpu), sizeof(u32)); | 124 | get_random_bytes(&fcp->hash_rnd, sizeof(u32)); |
125 | flow_hash_rnd_recalc(cpu) = 0; | 125 | fcp->hash_rnd_recalc = 0; |
126 | 126 | __flow_cache_shrink(fc, fcp, 0); | |
127 | __flow_cache_shrink(cpu, 0); | ||
128 | } | 127 | } |
129 | 128 | ||
130 | static u32 flow_hash_code(struct flowi *key, int cpu) | 129 | static u32 flow_hash_code(struct flow_cache *fc, |
130 | struct flow_cache_percpu *fcp, | ||
131 | struct flowi *key) | ||
131 | { | 132 | { |
132 | u32 *k = (u32 *) key; | 133 | u32 *k = (u32 *) key; |
133 | 134 | ||
134 | return (jhash2(k, (sizeof(*key) / sizeof(u32)), flow_hash_rnd(cpu)) & | 135 | return (jhash2(k, (sizeof(*key) / sizeof(u32)), fcp->hash_rnd) |
135 | (flow_hash_size - 1)); | 136 | & (flow_cache_hash_size(fc) - 1)); |
136 | } | 137 | } |
137 | 138 | ||
138 | #if (BITS_PER_LONG == 64) | 139 | #if (BITS_PER_LONG == 64) |
@@ -168,24 +169,25 @@ static int flow_key_compare(struct flowi *key1, struct flowi *key2) | |||
168 | void *flow_cache_lookup(struct net *net, struct flowi *key, u16 family, u8 dir, | 169 | void *flow_cache_lookup(struct net *net, struct flowi *key, u16 family, u8 dir, |
169 | flow_resolve_t resolver) | 170 | flow_resolve_t resolver) |
170 | { | 171 | { |
172 | struct flow_cache *fc = &flow_cache_global; | ||
173 | struct flow_cache_percpu *fcp; | ||
171 | struct flow_cache_entry *fle, **head; | 174 | struct flow_cache_entry *fle, **head; |
172 | unsigned int hash; | 175 | unsigned int hash; |
173 | int cpu; | ||
174 | 176 | ||
175 | local_bh_disable(); | 177 | local_bh_disable(); |
176 | cpu = smp_processor_id(); | 178 | fcp = per_cpu_ptr(fc->percpu, smp_processor_id()); |
177 | 179 | ||
178 | fle = NULL; | 180 | fle = NULL; |
179 | /* Packet really early in init? Making flow_cache_init a | 181 | /* Packet really early in init? Making flow_cache_init a |
180 | * pre-smp initcall would solve this. --RR */ | 182 | * pre-smp initcall would solve this. --RR */ |
181 | if (!flow_table(cpu)) | 183 | if (!fcp->hash_table) |
182 | goto nocache; | 184 | goto nocache; |
183 | 185 | ||
184 | if (flow_hash_rnd_recalc(cpu)) | 186 | if (fcp->hash_rnd_recalc) |
185 | flow_new_hash_rnd(cpu); | 187 | flow_new_hash_rnd(fc, fcp); |
186 | hash = flow_hash_code(key, cpu); | 188 | hash = flow_hash_code(fc, fcp, key); |
187 | 189 | ||
188 | head = &flow_table(cpu)[hash]; | 190 | head = &fcp->hash_table[hash]; |
189 | for (fle = *head; fle; fle = fle->next) { | 191 | for (fle = *head; fle; fle = fle->next) { |
190 | if (fle->family == family && | 192 | if (fle->family == family && |
191 | fle->dir == dir && | 193 | fle->dir == dir && |
@@ -204,8 +206,8 @@ void *flow_cache_lookup(struct net *net, struct flowi *key, u16 family, u8 dir, | |||
204 | } | 206 | } |
205 | 207 | ||
206 | if (!fle) { | 208 | if (!fle) { |
207 | if (flow_count(cpu) > flow_hwm) | 209 | if (fcp->hash_count > fc->high_watermark) |
208 | flow_cache_shrink(cpu); | 210 | flow_cache_shrink(fc, fcp); |
209 | 211 | ||
210 | fle = kmem_cache_alloc(flow_cachep, GFP_ATOMIC); | 212 | fle = kmem_cache_alloc(flow_cachep, GFP_ATOMIC); |
211 | if (fle) { | 213 | if (fle) { |
@@ -215,7 +217,7 @@ void *flow_cache_lookup(struct net *net, struct flowi *key, u16 family, u8 dir, | |||
215 | fle->dir = dir; | 217 | fle->dir = dir; |
216 | memcpy(&fle->key, key, sizeof(*key)); | 218 | memcpy(&fle->key, key, sizeof(*key)); |
217 | fle->object = NULL; | 219 | fle->object = NULL; |
218 | flow_count(cpu)++; | 220 | fcp->hash_count++; |
219 | } | 221 | } |
220 | } | 222 | } |
221 | 223 | ||
@@ -249,14 +251,15 @@ nocache: | |||
249 | static void flow_cache_flush_tasklet(unsigned long data) | 251 | static void flow_cache_flush_tasklet(unsigned long data) |
250 | { | 252 | { |
251 | struct flow_flush_info *info = (void *)data; | 253 | struct flow_flush_info *info = (void *)data; |
254 | struct flow_cache *fc = info->cache; | ||
255 | struct flow_cache_percpu *fcp; | ||
252 | int i; | 256 | int i; |
253 | int cpu; | ||
254 | 257 | ||
255 | cpu = smp_processor_id(); | 258 | fcp = per_cpu_ptr(fc->percpu, smp_processor_id()); |
256 | for (i = 0; i < flow_hash_size; i++) { | 259 | for (i = 0; i < flow_cache_hash_size(fc); i++) { |
257 | struct flow_cache_entry *fle; | 260 | struct flow_cache_entry *fle; |
258 | 261 | ||
259 | fle = flow_table(cpu)[i]; | 262 | fle = fcp->hash_table[i]; |
260 | for (; fle; fle = fle->next) { | 263 | for (; fle; fle = fle->next) { |
261 | unsigned genid = atomic_read(&flow_cache_genid); | 264 | unsigned genid = atomic_read(&flow_cache_genid); |
262 | 265 | ||
@@ -272,7 +275,6 @@ static void flow_cache_flush_tasklet(unsigned long data) | |||
272 | complete(&info->completion); | 275 | complete(&info->completion); |
273 | } | 276 | } |
274 | 277 | ||
275 | static void flow_cache_flush_per_cpu(void *) __attribute__((__unused__)); | ||
276 | static void flow_cache_flush_per_cpu(void *data) | 278 | static void flow_cache_flush_per_cpu(void *data) |
277 | { | 279 | { |
278 | struct flow_flush_info *info = data; | 280 | struct flow_flush_info *info = data; |
@@ -280,8 +282,7 @@ static void flow_cache_flush_per_cpu(void *data) | |||
280 | struct tasklet_struct *tasklet; | 282 | struct tasklet_struct *tasklet; |
281 | 283 | ||
282 | cpu = smp_processor_id(); | 284 | cpu = smp_processor_id(); |
283 | 285 | tasklet = &per_cpu_ptr(info->cache->percpu, cpu)->flush_tasklet; | |
284 | tasklet = flow_flush_tasklet(cpu); | ||
285 | tasklet->data = (unsigned long)info; | 286 | tasklet->data = (unsigned long)info; |
286 | tasklet_schedule(tasklet); | 287 | tasklet_schedule(tasklet); |
287 | } | 288 | } |
@@ -294,6 +295,7 @@ void flow_cache_flush(void) | |||
294 | /* Don't want cpus going down or up during this. */ | 295 | /* Don't want cpus going down or up during this. */ |
295 | get_online_cpus(); | 296 | get_online_cpus(); |
296 | mutex_lock(&flow_flush_sem); | 297 | mutex_lock(&flow_flush_sem); |
298 | info.cache = &flow_cache_global; | ||
297 | atomic_set(&info.cpuleft, num_online_cpus()); | 299 | atomic_set(&info.cpuleft, num_online_cpus()); |
298 | init_completion(&info.completion); | 300 | init_completion(&info.completion); |
299 | 301 | ||
@@ -307,62 +309,75 @@ void flow_cache_flush(void) | |||
307 | put_online_cpus(); | 309 | put_online_cpus(); |
308 | } | 310 | } |
309 | 311 | ||
310 | static void __init flow_cache_cpu_prepare(int cpu) | 312 | static void __init flow_cache_cpu_prepare(struct flow_cache *fc, |
313 | struct flow_cache_percpu *fcp) | ||
311 | { | 314 | { |
312 | struct tasklet_struct *tasklet; | 315 | fcp->hash_table = (struct flow_cache_entry **) |
313 | unsigned long order; | 316 | __get_free_pages(GFP_KERNEL|__GFP_ZERO, fc->order); |
314 | 317 | if (!fcp->hash_table) | |
315 | for (order = 0; | 318 | panic("NET: failed to allocate flow cache order %lu\n", fc->order); |
316 | (PAGE_SIZE << order) < | 319 | |
317 | (sizeof(struct flow_cache_entry *)*flow_hash_size); | 320 | fcp->hash_rnd_recalc = 1; |
318 | order++) | 321 | fcp->hash_count = 0; |
319 | /* NOTHING */; | 322 | tasklet_init(&fcp->flush_tasklet, flow_cache_flush_tasklet, 0); |
320 | |||
321 | flow_table(cpu) = (struct flow_cache_entry **) | ||
322 | __get_free_pages(GFP_KERNEL|__GFP_ZERO, order); | ||
323 | if (!flow_table(cpu)) | ||
324 | panic("NET: failed to allocate flow cache order %lu\n", order); | ||
325 | |||
326 | flow_hash_rnd_recalc(cpu) = 1; | ||
327 | flow_count(cpu) = 0; | ||
328 | |||
329 | tasklet = flow_flush_tasklet(cpu); | ||
330 | tasklet_init(tasklet, flow_cache_flush_tasklet, 0); | ||
331 | } | 323 | } |
332 | 324 | ||
333 | static int flow_cache_cpu(struct notifier_block *nfb, | 325 | static int flow_cache_cpu(struct notifier_block *nfb, |
334 | unsigned long action, | 326 | unsigned long action, |
335 | void *hcpu) | 327 | void *hcpu) |
336 | { | 328 | { |
329 | struct flow_cache *fc = container_of(nfb, struct flow_cache, hotcpu_notifier); | ||
330 | int cpu = (unsigned long) hcpu; | ||
331 | struct flow_cache_percpu *fcp = per_cpu_ptr(fc->percpu, cpu); | ||
332 | |||
337 | if (action == CPU_DEAD || action == CPU_DEAD_FROZEN) | 333 | if (action == CPU_DEAD || action == CPU_DEAD_FROZEN) |
338 | __flow_cache_shrink((unsigned long)hcpu, 0); | 334 | __flow_cache_shrink(fc, fcp, 0); |
339 | return NOTIFY_OK; | 335 | return NOTIFY_OK; |
340 | } | 336 | } |
341 | 337 | ||
342 | static int __init flow_cache_init(void) | 338 | static int flow_cache_init(struct flow_cache *fc) |
343 | { | 339 | { |
340 | unsigned long order; | ||
344 | int i; | 341 | int i; |
345 | 342 | ||
346 | flow_cachep = kmem_cache_create("flow_cache", | 343 | fc->hash_shift = 10; |
347 | sizeof(struct flow_cache_entry), | 344 | fc->low_watermark = 2 * flow_cache_hash_size(fc); |
348 | 0, SLAB_PANIC, | 345 | fc->high_watermark = 4 * flow_cache_hash_size(fc); |
349 | NULL); | 346 | |
350 | flow_hash_shift = 10; | 347 | for (order = 0; |
351 | flow_lwm = 2 * flow_hash_size; | 348 | (PAGE_SIZE << order) < |
352 | flow_hwm = 4 * flow_hash_size; | 349 | (sizeof(struct flow_cache_entry *)*flow_cache_hash_size(fc)); |
350 | order++) | ||
351 | /* NOTHING */; | ||
352 | fc->order = order; | ||
353 | fc->percpu = alloc_percpu(struct flow_cache_percpu); | ||
353 | 354 | ||
354 | setup_timer(&flow_hash_rnd_timer, flow_cache_new_hashrnd, 0); | 355 | setup_timer(&fc->rnd_timer, flow_cache_new_hashrnd, |
355 | flow_hash_rnd_timer.expires = jiffies + FLOW_HASH_RND_PERIOD; | 356 | (unsigned long) fc); |
356 | add_timer(&flow_hash_rnd_timer); | 357 | fc->rnd_timer.expires = jiffies + FLOW_HASH_RND_PERIOD; |
358 | add_timer(&fc->rnd_timer); | ||
357 | 359 | ||
358 | for_each_possible_cpu(i) | 360 | for_each_possible_cpu(i) |
359 | flow_cache_cpu_prepare(i); | 361 | flow_cache_cpu_prepare(fc, per_cpu_ptr(fc->percpu, i)); |
362 | |||
363 | fc->hotcpu_notifier = (struct notifier_block){ | ||
364 | .notifier_call = flow_cache_cpu, | ||
365 | }; | ||
366 | register_hotcpu_notifier(&fc->hotcpu_notifier); | ||
360 | 367 | ||
361 | hotcpu_notifier(flow_cache_cpu, 0); | ||
362 | return 0; | 368 | return 0; |
363 | } | 369 | } |
364 | 370 | ||
365 | module_init(flow_cache_init); | 371 | static int __init flow_cache_init_global(void) |
372 | { | ||
373 | flow_cachep = kmem_cache_create("flow_cache", | ||
374 | sizeof(struct flow_cache_entry), | ||
375 | 0, SLAB_PANIC, NULL); | ||
376 | |||
377 | return flow_cache_init(&flow_cache_global); | ||
378 | } | ||
379 | |||
380 | module_init(flow_cache_init_global); | ||
366 | 381 | ||
367 | EXPORT_SYMBOL(flow_cache_genid); | 382 | EXPORT_SYMBOL(flow_cache_genid); |
368 | EXPORT_SYMBOL(flow_cache_lookup); | 383 | EXPORT_SYMBOL(flow_cache_lookup); |