aboutsummaryrefslogtreecommitdiffstats
path: root/net/decnet/dn_route.c
diff options
context:
space:
mode:
authorDavid S. Miller <davem@davemloft.net>2011-01-26 23:51:05 -0500
committerDavid S. Miller <davem@davemloft.net>2011-01-26 23:51:05 -0500
commit62fa8a846d7de4b299232e330c74b7783539df76 (patch)
treee401dbdbf4b11cbd27bdc3a47d9dc8b512173c9f /net/decnet/dn_route.c
parentb4e69ac670d71b5748dc81e536b2cb103489badd (diff)
net: Implement read-only protection and COW'ing of metrics.
Routing metrics are now copy-on-write. Initially a route entry points it's metrics at a read-only location. If a routing table entry exists, it will point there. Else it will point at the all zero metric place-holder called 'dst_default_metrics'. The writeability state of the metrics is stored in the low bits of the metrics pointer, we have two bits left to spare if we want to store more states. For the initial implementation, COW is implemented simply via kmalloc. However future enhancements will change this to place the writable metrics somewhere else, in order to increase sharing. Very likely this "somewhere else" will be the inetpeer cache. Note also that this means that metrics updates may transiently fail if we cannot COW the metrics successfully. But even by itself, this patch should decrease memory usage and increase cache locality especially for routing workloads. In those cases the read-only metric copies stay in place and never get written to. TCP workloads where metrics get updated, and those rare cases where PMTU triggers occur, will take a very slight performance hit. But that hit will be alleviated when the long-term writable metrics move to a more sharable location. Since the metrics storage went from a u32 array of RTAX_MAX entries to what is essentially a pointer, some retooling of the dst_entry layout was necessary. Most importantly, we need to preserve the alignment of the reference count so that it doesn't share cache lines with the read-mostly state, as per Eric Dumazet's alignment assertion checks. The only non-trivial bit here is the move of the 'flags' member into the writeable cacheline. This is OK since we are always accessing the flags around the same moment when we made a modification to the reference count. Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'net/decnet/dn_route.c')
-rw-r--r--net/decnet/dn_route.c18
1 files changed, 13 insertions, 5 deletions
diff --git a/net/decnet/dn_route.c b/net/decnet/dn_route.c
index 5e636365d33c..42c9c62d3417 100644
--- a/net/decnet/dn_route.c
+++ b/net/decnet/dn_route.c
@@ -112,6 +112,7 @@ static int dn_dst_gc(struct dst_ops *ops);
112static struct dst_entry *dn_dst_check(struct dst_entry *, __u32); 112static struct dst_entry *dn_dst_check(struct dst_entry *, __u32);
113static unsigned int dn_dst_default_advmss(const struct dst_entry *dst); 113static unsigned int dn_dst_default_advmss(const struct dst_entry *dst);
114static unsigned int dn_dst_default_mtu(const struct dst_entry *dst); 114static unsigned int dn_dst_default_mtu(const struct dst_entry *dst);
115static void dn_dst_destroy(struct dst_entry *);
115static struct dst_entry *dn_dst_negative_advice(struct dst_entry *); 116static struct dst_entry *dn_dst_negative_advice(struct dst_entry *);
116static void dn_dst_link_failure(struct sk_buff *); 117static void dn_dst_link_failure(struct sk_buff *);
117static void dn_dst_update_pmtu(struct dst_entry *dst, u32 mtu); 118static void dn_dst_update_pmtu(struct dst_entry *dst, u32 mtu);
@@ -133,11 +134,18 @@ static struct dst_ops dn_dst_ops = {
133 .check = dn_dst_check, 134 .check = dn_dst_check,
134 .default_advmss = dn_dst_default_advmss, 135 .default_advmss = dn_dst_default_advmss,
135 .default_mtu = dn_dst_default_mtu, 136 .default_mtu = dn_dst_default_mtu,
137 .cow_metrics = dst_cow_metrics_generic,
138 .destroy = dn_dst_destroy,
136 .negative_advice = dn_dst_negative_advice, 139 .negative_advice = dn_dst_negative_advice,
137 .link_failure = dn_dst_link_failure, 140 .link_failure = dn_dst_link_failure,
138 .update_pmtu = dn_dst_update_pmtu, 141 .update_pmtu = dn_dst_update_pmtu,
139}; 142};
140 143
144static void dn_dst_destroy(struct dst_entry *dst)
145{
146 dst_destroy_metrics_generic(dst);
147}
148
141static __inline__ unsigned dn_hash(__le16 src, __le16 dst) 149static __inline__ unsigned dn_hash(__le16 src, __le16 dst)
142{ 150{
143 __u16 tmp = (__u16 __force)(src ^ dst); 151 __u16 tmp = (__u16 __force)(src ^ dst);
@@ -814,14 +822,14 @@ static int dn_rt_set_next_hop(struct dn_route *rt, struct dn_fib_res *res)
814{ 822{
815 struct dn_fib_info *fi = res->fi; 823 struct dn_fib_info *fi = res->fi;
816 struct net_device *dev = rt->dst.dev; 824 struct net_device *dev = rt->dst.dev;
825 unsigned int mss_metric;
817 struct neighbour *n; 826 struct neighbour *n;
818 unsigned int metric;
819 827
820 if (fi) { 828 if (fi) {
821 if (DN_FIB_RES_GW(*res) && 829 if (DN_FIB_RES_GW(*res) &&
822 DN_FIB_RES_NH(*res).nh_scope == RT_SCOPE_LINK) 830 DN_FIB_RES_NH(*res).nh_scope == RT_SCOPE_LINK)
823 rt->rt_gateway = DN_FIB_RES_GW(*res); 831 rt->rt_gateway = DN_FIB_RES_GW(*res);
824 dst_import_metrics(&rt->dst, fi->fib_metrics); 832 dst_init_metrics(&rt->dst, fi->fib_metrics, true);
825 } 833 }
826 rt->rt_type = res->type; 834 rt->rt_type = res->type;
827 835
@@ -834,10 +842,10 @@ static int dn_rt_set_next_hop(struct dn_route *rt, struct dn_fib_res *res)
834 842
835 if (dst_metric(&rt->dst, RTAX_MTU) > rt->dst.dev->mtu) 843 if (dst_metric(&rt->dst, RTAX_MTU) > rt->dst.dev->mtu)
836 dst_metric_set(&rt->dst, RTAX_MTU, rt->dst.dev->mtu); 844 dst_metric_set(&rt->dst, RTAX_MTU, rt->dst.dev->mtu);
837 metric = dst_metric_raw(&rt->dst, RTAX_ADVMSS); 845 mss_metric = dst_metric_raw(&rt->dst, RTAX_ADVMSS);
838 if (metric) { 846 if (mss_metric) {
839 unsigned int mss = dn_mss_from_pmtu(dev, dst_mtu(&rt->dst)); 847 unsigned int mss = dn_mss_from_pmtu(dev, dst_mtu(&rt->dst));
840 if (metric > mss) 848 if (mss_metric > mss)
841 dst_metric_set(&rt->dst, RTAX_ADVMSS, mss); 849 dst_metric_set(&rt->dst, RTAX_ADVMSS, mss);
842 } 850 }
843 return 0; 851 return 0;