aboutsummaryrefslogtreecommitdiffstats
path: root/net/core/dst.c
diff options
context:
space:
mode:
authorDavid S. Miller <davem@davemloft.net>2011-01-26 23:51:05 -0500
committerDavid S. Miller <davem@davemloft.net>2011-01-26 23:51:05 -0500
commit62fa8a846d7de4b299232e330c74b7783539df76 (patch)
treee401dbdbf4b11cbd27bdc3a47d9dc8b512173c9f /net/core/dst.c
parentb4e69ac670d71b5748dc81e536b2cb103489badd (diff)
net: Implement read-only protection and COW'ing of metrics.
Routing metrics are now copy-on-write. Initially a route entry points it's metrics at a read-only location. If a routing table entry exists, it will point there. Else it will point at the all zero metric place-holder called 'dst_default_metrics'. The writeability state of the metrics is stored in the low bits of the metrics pointer, we have two bits left to spare if we want to store more states. For the initial implementation, COW is implemented simply via kmalloc. However future enhancements will change this to place the writable metrics somewhere else, in order to increase sharing. Very likely this "somewhere else" will be the inetpeer cache. Note also that this means that metrics updates may transiently fail if we cannot COW the metrics successfully. But even by itself, this patch should decrease memory usage and increase cache locality especially for routing workloads. In those cases the read-only metric copies stay in place and never get written to. TCP workloads where metrics get updated, and those rare cases where PMTU triggers occur, will take a very slight performance hit. But that hit will be alleviated when the long-term writable metrics move to a more sharable location. Since the metrics storage went from a u32 array of RTAX_MAX entries to what is essentially a pointer, some retooling of the dst_entry layout was necessary. Most importantly, we need to preserve the alignment of the reference count so that it doesn't share cache lines with the read-mostly state, as per Eric Dumazet's alignment assertion checks. The only non-trivial bit here is the move of the 'flags' member into the writeable cacheline. This is OK since we are always accessing the flags around the same moment when we made a modification to the reference count. Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'net/core/dst.c')
-rw-r--r--net/core/dst.c39
1 files changed, 39 insertions, 0 deletions
diff --git a/net/core/dst.c b/net/core/dst.c
index b99c7c7ffce2..578893505702 100644
--- a/net/core/dst.c
+++ b/net/core/dst.c
@@ -164,6 +164,8 @@ int dst_discard(struct sk_buff *skb)
164} 164}
165EXPORT_SYMBOL(dst_discard); 165EXPORT_SYMBOL(dst_discard);
166 166
167static const u32 dst_default_metrics[RTAX_MAX];
168
167void *dst_alloc(struct dst_ops *ops) 169void *dst_alloc(struct dst_ops *ops)
168{ 170{
169 struct dst_entry *dst; 171 struct dst_entry *dst;
@@ -180,6 +182,7 @@ void *dst_alloc(struct dst_ops *ops)
180 dst->lastuse = jiffies; 182 dst->lastuse = jiffies;
181 dst->path = dst; 183 dst->path = dst;
182 dst->input = dst->output = dst_discard; 184 dst->input = dst->output = dst_discard;
185 dst_init_metrics(dst, dst_default_metrics, true);
183#if RT_CACHE_DEBUG >= 2 186#if RT_CACHE_DEBUG >= 2
184 atomic_inc(&dst_total); 187 atomic_inc(&dst_total);
185#endif 188#endif
@@ -282,6 +285,42 @@ void dst_release(struct dst_entry *dst)
282} 285}
283EXPORT_SYMBOL(dst_release); 286EXPORT_SYMBOL(dst_release);
284 287
288u32 *dst_cow_metrics_generic(struct dst_entry *dst, unsigned long old)
289{
290 u32 *p = kmalloc(sizeof(u32) * RTAX_MAX, GFP_ATOMIC);
291
292 if (p) {
293 u32 *old_p = __DST_METRICS_PTR(old);
294 unsigned long prev, new;
295
296 memcpy(p, old_p, sizeof(u32) * RTAX_MAX);
297
298 new = (unsigned long) p;
299 prev = cmpxchg(&dst->_metrics, old, new);
300
301 if (prev != old) {
302 kfree(p);
303 p = __DST_METRICS_PTR(prev);
304 if (prev & DST_METRICS_READ_ONLY)
305 p = NULL;
306 }
307 }
308 return p;
309}
310EXPORT_SYMBOL(dst_cow_metrics_generic);
311
312/* Caller asserts that dst_metrics_read_only(dst) is false. */
313void __dst_destroy_metrics_generic(struct dst_entry *dst, unsigned long old)
314{
315 unsigned long prev, new;
316
317 new = (unsigned long) dst_default_metrics;
318 prev = cmpxchg(&dst->_metrics, old, new);
319 if (prev == old)
320 kfree(__DST_METRICS_PTR(old));
321}
322EXPORT_SYMBOL(__dst_destroy_metrics_generic);
323
285/** 324/**
286 * skb_dst_set_noref - sets skb dst, without a reference 325 * skb_dst_set_noref - sets skb dst, without a reference
287 * @skb: buffer 326 * @skb: buffer