aboutsummaryrefslogtreecommitdiffstats
path: root/net/core/dst.c
diff options
context:
space:
mode:
Diffstat (limited to 'net/core/dst.c')
-rw-r--r--net/core/dst.c134
1 files changed, 102 insertions, 32 deletions
diff --git a/net/core/dst.c b/net/core/dst.c
index 6c41b1fac3db..6135f3671692 100644
--- a/net/core/dst.c
+++ b/net/core/dst.c
@@ -19,6 +19,7 @@
19#include <linux/types.h> 19#include <linux/types.h>
20#include <net/net_namespace.h> 20#include <net/net_namespace.h>
21#include <linux/sched.h> 21#include <linux/sched.h>
22#include <linux/prefetch.h>
22 23
23#include <net/dst.h> 24#include <net/dst.h>
24 25
@@ -33,9 +34,6 @@
33 * 3) This list is guarded by a mutex, 34 * 3) This list is guarded by a mutex,
34 * so that the gc_task and dst_dev_event() can be synchronized. 35 * so that the gc_task and dst_dev_event() can be synchronized.
35 */ 36 */
36#if RT_CACHE_DEBUG >= 2
37static atomic_t dst_total = ATOMIC_INIT(0);
38#endif
39 37
40/* 38/*
41 * We want to keep lock & list close together 39 * We want to keep lock & list close together
@@ -69,10 +67,6 @@ static void dst_gc_task(struct work_struct *work)
69 unsigned long expires = ~0L; 67 unsigned long expires = ~0L;
70 struct dst_entry *dst, *next, head; 68 struct dst_entry *dst, *next, head;
71 struct dst_entry *last = &head; 69 struct dst_entry *last = &head;
72#if RT_CACHE_DEBUG >= 2
73 ktime_t time_start = ktime_get();
74 struct timespec elapsed;
75#endif
76 70
77 mutex_lock(&dst_gc_mutex); 71 mutex_lock(&dst_gc_mutex);
78 next = dst_busy_list; 72 next = dst_busy_list;
@@ -146,15 +140,6 @@ loop:
146 140
147 spin_unlock_bh(&dst_garbage.lock); 141 spin_unlock_bh(&dst_garbage.lock);
148 mutex_unlock(&dst_gc_mutex); 142 mutex_unlock(&dst_gc_mutex);
149#if RT_CACHE_DEBUG >= 2
150 elapsed = ktime_to_timespec(ktime_sub(ktime_get(), time_start));
151 printk(KERN_DEBUG "dst_total: %d delayed: %d work_perf: %d"
152 " expires: %lu elapsed: %lu us\n",
153 atomic_read(&dst_total), delayed, work_performed,
154 expires,
155 elapsed.tv_sec * USEC_PER_SEC +
156 elapsed.tv_nsec / NSEC_PER_USEC);
157#endif
158} 143}
159 144
160int dst_discard(struct sk_buff *skb) 145int dst_discard(struct sk_buff *skb)
@@ -164,26 +149,49 @@ int dst_discard(struct sk_buff *skb)
164} 149}
165EXPORT_SYMBOL(dst_discard); 150EXPORT_SYMBOL(dst_discard);
166 151
167void *dst_alloc(struct dst_ops *ops) 152const u32 dst_default_metrics[RTAX_MAX];
153
154void *dst_alloc(struct dst_ops *ops, struct net_device *dev,
155 int initial_ref, int initial_obsolete, int flags)
168{ 156{
169 struct dst_entry *dst; 157 struct dst_entry *dst;
170 158
171 if (ops->gc && atomic_read(&ops->entries) > ops->gc_thresh) { 159 if (ops->gc && dst_entries_get_fast(ops) > ops->gc_thresh) {
172 if (ops->gc(ops)) 160 if (ops->gc(ops))
173 return NULL; 161 return NULL;
174 } 162 }
175 dst = kmem_cache_zalloc(ops->kmem_cachep, GFP_ATOMIC); 163 dst = kmem_cache_alloc(ops->kmem_cachep, GFP_ATOMIC);
176 if (!dst) 164 if (!dst)
177 return NULL; 165 return NULL;
178 atomic_set(&dst->__refcnt, 0); 166 dst->child = NULL;
167 dst->dev = dev;
168 if (dev)
169 dev_hold(dev);
179 dst->ops = ops; 170 dst->ops = ops;
180 dst->lastuse = jiffies; 171 dst_init_metrics(dst, dst_default_metrics, true);
172 dst->expires = 0UL;
181 dst->path = dst; 173 dst->path = dst;
182 dst->input = dst->output = dst_discard; 174 dst->neighbour = NULL;
183#if RT_CACHE_DEBUG >= 2 175 dst->hh = NULL;
184 atomic_inc(&dst_total); 176#ifdef CONFIG_XFRM
177 dst->xfrm = NULL;
185#endif 178#endif
186 atomic_inc(&ops->entries); 179 dst->input = dst_discard;
180 dst->output = dst_discard;
181 dst->error = 0;
182 dst->obsolete = initial_obsolete;
183 dst->header_len = 0;
184 dst->trailer_len = 0;
185#ifdef CONFIG_IP_ROUTE_CLASSID
186 dst->tclassid = 0;
187#endif
188 atomic_set(&dst->__refcnt, initial_ref);
189 dst->__use = 0;
190 dst->lastuse = jiffies;
191 dst->flags = flags;
192 dst->next = NULL;
193 if (!(flags & DST_NOCOUNT))
194 dst_entries_add(ops, 1);
187 return dst; 195 return dst;
188} 196}
189EXPORT_SYMBOL(dst_alloc); 197EXPORT_SYMBOL(dst_alloc);
@@ -228,23 +236,21 @@ again:
228 child = dst->child; 236 child = dst->child;
229 237
230 dst->hh = NULL; 238 dst->hh = NULL;
231 if (hh && atomic_dec_and_test(&hh->hh_refcnt)) 239 if (hh)
232 kfree(hh); 240 hh_cache_put(hh);
233 241
234 if (neigh) { 242 if (neigh) {
235 dst->neighbour = NULL; 243 dst->neighbour = NULL;
236 neigh_release(neigh); 244 neigh_release(neigh);
237 } 245 }
238 246
239 atomic_dec(&dst->ops->entries); 247 if (!(dst->flags & DST_NOCOUNT))
248 dst_entries_add(dst->ops, -1);
240 249
241 if (dst->ops->destroy) 250 if (dst->ops->destroy)
242 dst->ops->destroy(dst); 251 dst->ops->destroy(dst);
243 if (dst->dev) 252 if (dst->dev)
244 dev_put(dst->dev); 253 dev_put(dst->dev);
245#if RT_CACHE_DEBUG >= 2
246 atomic_dec(&dst_total);
247#endif
248 kmem_cache_free(dst->ops->kmem_cachep, dst); 254 kmem_cache_free(dst->ops->kmem_cachep, dst);
249 255
250 dst = child; 256 dst = child;
@@ -271,13 +277,76 @@ void dst_release(struct dst_entry *dst)
271 if (dst) { 277 if (dst) {
272 int newrefcnt; 278 int newrefcnt;
273 279
274 smp_mb__before_atomic_dec();
275 newrefcnt = atomic_dec_return(&dst->__refcnt); 280 newrefcnt = atomic_dec_return(&dst->__refcnt);
276 WARN_ON(newrefcnt < 0); 281 WARN_ON(newrefcnt < 0);
282 if (unlikely(dst->flags & DST_NOCACHE) && !newrefcnt) {
283 dst = dst_destroy(dst);
284 if (dst)
285 __dst_free(dst);
286 }
277 } 287 }
278} 288}
279EXPORT_SYMBOL(dst_release); 289EXPORT_SYMBOL(dst_release);
280 290
291u32 *dst_cow_metrics_generic(struct dst_entry *dst, unsigned long old)
292{
293 u32 *p = kmalloc(sizeof(u32) * RTAX_MAX, GFP_ATOMIC);
294
295 if (p) {
296 u32 *old_p = __DST_METRICS_PTR(old);
297 unsigned long prev, new;
298
299 memcpy(p, old_p, sizeof(u32) * RTAX_MAX);
300
301 new = (unsigned long) p;
302 prev = cmpxchg(&dst->_metrics, old, new);
303
304 if (prev != old) {
305 kfree(p);
306 p = __DST_METRICS_PTR(prev);
307 if (prev & DST_METRICS_READ_ONLY)
308 p = NULL;
309 }
310 }
311 return p;
312}
313EXPORT_SYMBOL(dst_cow_metrics_generic);
314
315/* Caller asserts that dst_metrics_read_only(dst) is false. */
316void __dst_destroy_metrics_generic(struct dst_entry *dst, unsigned long old)
317{
318 unsigned long prev, new;
319
320 new = ((unsigned long) dst_default_metrics) | DST_METRICS_READ_ONLY;
321 prev = cmpxchg(&dst->_metrics, old, new);
322 if (prev == old)
323 kfree(__DST_METRICS_PTR(old));
324}
325EXPORT_SYMBOL(__dst_destroy_metrics_generic);
326
327/**
328 * skb_dst_set_noref - sets skb dst, without a reference
329 * @skb: buffer
330 * @dst: dst entry
331 *
332 * Sets skb dst, assuming a reference was not taken on dst
333 * skb_dst_drop() should not dst_release() this dst
334 */
335void skb_dst_set_noref(struct sk_buff *skb, struct dst_entry *dst)
336{
337 WARN_ON(!rcu_read_lock_held() && !rcu_read_lock_bh_held());
338 /* If dst not in cache, we must take a reference, because
339 * dst_release() will destroy dst as soon as its refcount becomes zero
340 */
341 if (unlikely(dst->flags & DST_NOCACHE)) {
342 dst_hold(dst);
343 skb_dst_set(skb, dst);
344 } else {
345 skb->_skb_refdst = (unsigned long)dst | SKB_DST_NOREF;
346 }
347}
348EXPORT_SYMBOL(skb_dst_set_noref);
349
281/* Dirty hack. We did it in 2.2 (in __dst_free), 350/* Dirty hack. We did it in 2.2 (in __dst_free),
282 * we have _very_ good reasons not to repeat 351 * we have _very_ good reasons not to repeat
283 * this mistake in 2.3, but we have no choice 352 * this mistake in 2.3, but we have no choice
@@ -343,6 +412,7 @@ static int dst_dev_event(struct notifier_block *this, unsigned long event,
343 412
344static struct notifier_block dst_dev_notifier = { 413static struct notifier_block dst_dev_notifier = {
345 .notifier_call = dst_dev_event, 414 .notifier_call = dst_dev_event,
415 .priority = -10, /* must be called after other network notifiers */
346}; 416};
347 417
348void __init dst_init(void) 418void __init dst_init(void)