aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorDavid S. Miller <davem@davemloft.net>2011-01-26 23:51:05 -0500
committerDavid S. Miller <davem@davemloft.net>2011-01-26 23:51:05 -0500
commit62fa8a846d7de4b299232e330c74b7783539df76 (patch)
treee401dbdbf4b11cbd27bdc3a47d9dc8b512173c9f
parentb4e69ac670d71b5748dc81e536b2cb103489badd (diff)
net: Implement read-only protection and COW'ing of metrics.
Routing metrics are now copy-on-write. Initially a route entry points it's metrics at a read-only location. If a routing table entry exists, it will point there. Else it will point at the all zero metric place-holder called 'dst_default_metrics'. The writeability state of the metrics is stored in the low bits of the metrics pointer, we have two bits left to spare if we want to store more states. For the initial implementation, COW is implemented simply via kmalloc. However future enhancements will change this to place the writable metrics somewhere else, in order to increase sharing. Very likely this "somewhere else" will be the inetpeer cache. Note also that this means that metrics updates may transiently fail if we cannot COW the metrics successfully. But even by itself, this patch should decrease memory usage and increase cache locality especially for routing workloads. In those cases the read-only metric copies stay in place and never get written to. TCP workloads where metrics get updated, and those rare cases where PMTU triggers occur, will take a very slight performance hit. But that hit will be alleviated when the long-term writable metrics move to a more sharable location. Since the metrics storage went from a u32 array of RTAX_MAX entries to what is essentially a pointer, some retooling of the dst_entry layout was necessary. Most importantly, we need to preserve the alignment of the reference count so that it doesn't share cache lines with the read-mostly state, as per Eric Dumazet's alignment assertion checks. The only non-trivial bit here is the move of the 'flags' member into the writeable cacheline. This is OK since we are always accessing the flags around the same moment when we made a modification to the reference count. Signed-off-by: David S. Miller <davem@davemloft.net>
-rw-r--r--include/net/dst.h114
-rw-r--r--include/net/dst_ops.h1
-rw-r--r--include/net/route.h2
-rw-r--r--net/core/dst.c39
-rw-r--r--net/decnet/dn_route.c18
-rw-r--r--net/ipv4/route.c45
-rw-r--r--net/ipv4/xfrm4_policy.c4
-rw-r--r--net/ipv6/route.c15
-rw-r--r--net/ipv6/xfrm6_policy.c2
9 files changed, 194 insertions, 46 deletions
diff --git a/include/net/dst.h b/include/net/dst.h
index be5a0d4c491d..94a8c234ea2a 100644
--- a/include/net/dst.h
+++ b/include/net/dst.h
@@ -40,24 +40,10 @@ struct dst_entry {
40 struct rcu_head rcu_head; 40 struct rcu_head rcu_head;
41 struct dst_entry *child; 41 struct dst_entry *child;
42 struct net_device *dev; 42 struct net_device *dev;
43 short error; 43 struct dst_ops *ops;
44 short obsolete; 44 unsigned long _metrics;
45 int flags;
46#define DST_HOST 0x0001
47#define DST_NOXFRM 0x0002
48#define DST_NOPOLICY 0x0004
49#define DST_NOHASH 0x0008
50#define DST_NOCACHE 0x0010
51 unsigned long expires; 45 unsigned long expires;
52
53 unsigned short header_len; /* more space at head required */
54 unsigned short trailer_len; /* space to reserve at tail */
55
56 unsigned int rate_tokens;
57 unsigned long rate_last; /* rate limiting for ICMP */
58
59 struct dst_entry *path; 46 struct dst_entry *path;
60
61 struct neighbour *neighbour; 47 struct neighbour *neighbour;
62 struct hh_cache *hh; 48 struct hh_cache *hh;
63#ifdef CONFIG_XFRM 49#ifdef CONFIG_XFRM
@@ -68,17 +54,16 @@ struct dst_entry {
68 int (*input)(struct sk_buff*); 54 int (*input)(struct sk_buff*);
69 int (*output)(struct sk_buff*); 55 int (*output)(struct sk_buff*);
70 56
71 struct dst_ops *ops; 57 short error;
72 58 short obsolete;
73 u32 _metrics[RTAX_MAX]; 59 unsigned short header_len; /* more space at head required */
74 60 unsigned short trailer_len; /* space to reserve at tail */
75#ifdef CONFIG_IP_ROUTE_CLASSID 61#ifdef CONFIG_IP_ROUTE_CLASSID
76 __u32 tclassid; 62 __u32 tclassid;
77#else 63#else
78 __u32 __pad2; 64 __u32 __pad2;
79#endif 65#endif
80 66
81
82 /* 67 /*
83 * Align __refcnt to a 64 bytes alignment 68 * Align __refcnt to a 64 bytes alignment
84 * (L1_CACHE_SIZE would be too much) 69 * (L1_CACHE_SIZE would be too much)
@@ -93,6 +78,14 @@ struct dst_entry {
93 atomic_t __refcnt; /* client references */ 78 atomic_t __refcnt; /* client references */
94 int __use; 79 int __use;
95 unsigned long lastuse; 80 unsigned long lastuse;
81 unsigned long rate_last; /* rate limiting for ICMP */
82 unsigned int rate_tokens;
83 int flags;
84#define DST_HOST 0x0001
85#define DST_NOXFRM 0x0002
86#define DST_NOPOLICY 0x0004
87#define DST_NOHASH 0x0008
88#define DST_NOCACHE 0x0010
96 union { 89 union {
97 struct dst_entry *next; 90 struct dst_entry *next;
98 struct rtable __rcu *rt_next; 91 struct rtable __rcu *rt_next;
@@ -103,10 +96,69 @@ struct dst_entry {
103 96
104#ifdef __KERNEL__ 97#ifdef __KERNEL__
105 98
99extern u32 *dst_cow_metrics_generic(struct dst_entry *dst, unsigned long old);
100
101#define DST_METRICS_READ_ONLY 0x1UL
102#define __DST_METRICS_PTR(Y) \
103 ((u32 *)((Y) & ~DST_METRICS_READ_ONLY))
104#define DST_METRICS_PTR(X) __DST_METRICS_PTR((X)->_metrics)
105
106static inline bool dst_metrics_read_only(const struct dst_entry *dst)
107{
108 return dst->_metrics & DST_METRICS_READ_ONLY;
109}
110
111extern void __dst_destroy_metrics_generic(struct dst_entry *dst, unsigned long old);
112
113static inline void dst_destroy_metrics_generic(struct dst_entry *dst)
114{
115 unsigned long val = dst->_metrics;
116 if (!(val & DST_METRICS_READ_ONLY))
117 __dst_destroy_metrics_generic(dst, val);
118}
119
120static inline u32 *dst_metrics_write_ptr(struct dst_entry *dst)
121{
122 unsigned long p = dst->_metrics;
123
124 if (p & DST_METRICS_READ_ONLY)
125 return dst->ops->cow_metrics(dst, p);
126 return __DST_METRICS_PTR(p);
127}
128
129/* This may only be invoked before the entry has reached global
130 * visibility.
131 */
132static inline void dst_init_metrics(struct dst_entry *dst,
133 const u32 *src_metrics,
134 bool read_only)
135{
136 dst->_metrics = ((unsigned long) src_metrics) |
137 (read_only ? DST_METRICS_READ_ONLY : 0);
138}
139
140static inline void dst_copy_metrics(struct dst_entry *dest, const struct dst_entry *src)
141{
142 u32 *dst_metrics = dst_metrics_write_ptr(dest);
143
144 if (dst_metrics) {
145 u32 *src_metrics = DST_METRICS_PTR(src);
146
147 memcpy(dst_metrics, src_metrics, RTAX_MAX * sizeof(u32));
148 }
149}
150
151static inline u32 *dst_metrics_ptr(struct dst_entry *dst)
152{
153 return DST_METRICS_PTR(dst);
154}
155
106static inline u32 156static inline u32
107dst_metric_raw(const struct dst_entry *dst, const int metric) 157dst_metric_raw(const struct dst_entry *dst, const int metric)
108{ 158{
109 return dst->_metrics[metric-1]; 159 u32 *p = DST_METRICS_PTR(dst);
160
161 return p[metric-1];
110} 162}
111 163
112static inline u32 164static inline u32
@@ -131,22 +183,10 @@ dst_metric_advmss(const struct dst_entry *dst)
131 183
132static inline void dst_metric_set(struct dst_entry *dst, int metric, u32 val) 184static inline void dst_metric_set(struct dst_entry *dst, int metric, u32 val)
133{ 185{
134 dst->_metrics[metric-1] = val; 186 u32 *p = dst_metrics_write_ptr(dst);
135}
136
137static inline void dst_import_metrics(struct dst_entry *dst, const u32 *src_metrics)
138{
139 memcpy(dst->_metrics, src_metrics, RTAX_MAX * sizeof(u32));
140}
141 187
142static inline void dst_copy_metrics(struct dst_entry *dest, const struct dst_entry *src) 188 if (p)
143{ 189 p[metric-1] = val;
144 dst_import_metrics(dest, src->_metrics);
145}
146
147static inline u32 *dst_metrics_ptr(struct dst_entry *dst)
148{
149 return dst->_metrics;
150} 190}
151 191
152static inline u32 192static inline u32
diff --git a/include/net/dst_ops.h b/include/net/dst_ops.h
index 21a320b8708e..dc0746328947 100644
--- a/include/net/dst_ops.h
+++ b/include/net/dst_ops.h
@@ -18,6 +18,7 @@ struct dst_ops {
18 struct dst_entry * (*check)(struct dst_entry *, __u32 cookie); 18 struct dst_entry * (*check)(struct dst_entry *, __u32 cookie);
19 unsigned int (*default_advmss)(const struct dst_entry *); 19 unsigned int (*default_advmss)(const struct dst_entry *);
20 unsigned int (*default_mtu)(const struct dst_entry *); 20 unsigned int (*default_mtu)(const struct dst_entry *);
21 u32 * (*cow_metrics)(struct dst_entry *, unsigned long);
21 void (*destroy)(struct dst_entry *); 22 void (*destroy)(struct dst_entry *);
22 void (*ifdown)(struct dst_entry *, 23 void (*ifdown)(struct dst_entry *,
23 struct net_device *dev, int how); 24 struct net_device *dev, int how);
diff --git a/include/net/route.h b/include/net/route.h
index 93e10c453f6b..5677cbf0c6e6 100644
--- a/include/net/route.h
+++ b/include/net/route.h
@@ -49,6 +49,7 @@
49 49
50struct fib_nh; 50struct fib_nh;
51struct inet_peer; 51struct inet_peer;
52struct fib_info;
52struct rtable { 53struct rtable {
53 struct dst_entry dst; 54 struct dst_entry dst;
54 55
@@ -69,6 +70,7 @@ struct rtable {
69 /* Miscellaneous cached information */ 70 /* Miscellaneous cached information */
70 __be32 rt_spec_dst; /* RFC1122 specific destination */ 71 __be32 rt_spec_dst; /* RFC1122 specific destination */
71 struct inet_peer *peer; /* long-living peer info */ 72 struct inet_peer *peer; /* long-living peer info */
73 struct fib_info *fi; /* for client ref to shared metrics */
72}; 74};
73 75
74static inline bool rt_is_input_route(struct rtable *rt) 76static inline bool rt_is_input_route(struct rtable *rt)
diff --git a/net/core/dst.c b/net/core/dst.c
index b99c7c7ffce2..578893505702 100644
--- a/net/core/dst.c
+++ b/net/core/dst.c
@@ -164,6 +164,8 @@ int dst_discard(struct sk_buff *skb)
164} 164}
165EXPORT_SYMBOL(dst_discard); 165EXPORT_SYMBOL(dst_discard);
166 166
167static const u32 dst_default_metrics[RTAX_MAX];
168
167void *dst_alloc(struct dst_ops *ops) 169void *dst_alloc(struct dst_ops *ops)
168{ 170{
169 struct dst_entry *dst; 171 struct dst_entry *dst;
@@ -180,6 +182,7 @@ void *dst_alloc(struct dst_ops *ops)
180 dst->lastuse = jiffies; 182 dst->lastuse = jiffies;
181 dst->path = dst; 183 dst->path = dst;
182 dst->input = dst->output = dst_discard; 184 dst->input = dst->output = dst_discard;
185 dst_init_metrics(dst, dst_default_metrics, true);
183#if RT_CACHE_DEBUG >= 2 186#if RT_CACHE_DEBUG >= 2
184 atomic_inc(&dst_total); 187 atomic_inc(&dst_total);
185#endif 188#endif
@@ -282,6 +285,42 @@ void dst_release(struct dst_entry *dst)
282} 285}
283EXPORT_SYMBOL(dst_release); 286EXPORT_SYMBOL(dst_release);
284 287
288u32 *dst_cow_metrics_generic(struct dst_entry *dst, unsigned long old)
289{
290 u32 *p = kmalloc(sizeof(u32) * RTAX_MAX, GFP_ATOMIC);
291
292 if (p) {
293 u32 *old_p = __DST_METRICS_PTR(old);
294 unsigned long prev, new;
295
296 memcpy(p, old_p, sizeof(u32) * RTAX_MAX);
297
298 new = (unsigned long) p;
299 prev = cmpxchg(&dst->_metrics, old, new);
300
301 if (prev != old) {
302 kfree(p);
303 p = __DST_METRICS_PTR(prev);
304 if (prev & DST_METRICS_READ_ONLY)
305 p = NULL;
306 }
307 }
308 return p;
309}
310EXPORT_SYMBOL(dst_cow_metrics_generic);
311
312/* Caller asserts that dst_metrics_read_only(dst) is false. */
313void __dst_destroy_metrics_generic(struct dst_entry *dst, unsigned long old)
314{
315 unsigned long prev, new;
316
317 new = (unsigned long) dst_default_metrics;
318 prev = cmpxchg(&dst->_metrics, old, new);
319 if (prev == old)
320 kfree(__DST_METRICS_PTR(old));
321}
322EXPORT_SYMBOL(__dst_destroy_metrics_generic);
323
285/** 324/**
286 * skb_dst_set_noref - sets skb dst, without a reference 325 * skb_dst_set_noref - sets skb dst, without a reference
287 * @skb: buffer 326 * @skb: buffer
diff --git a/net/decnet/dn_route.c b/net/decnet/dn_route.c
index 5e636365d33c..42c9c62d3417 100644
--- a/net/decnet/dn_route.c
+++ b/net/decnet/dn_route.c
@@ -112,6 +112,7 @@ static int dn_dst_gc(struct dst_ops *ops);
112static struct dst_entry *dn_dst_check(struct dst_entry *, __u32); 112static struct dst_entry *dn_dst_check(struct dst_entry *, __u32);
113static unsigned int dn_dst_default_advmss(const struct dst_entry *dst); 113static unsigned int dn_dst_default_advmss(const struct dst_entry *dst);
114static unsigned int dn_dst_default_mtu(const struct dst_entry *dst); 114static unsigned int dn_dst_default_mtu(const struct dst_entry *dst);
115static void dn_dst_destroy(struct dst_entry *);
115static struct dst_entry *dn_dst_negative_advice(struct dst_entry *); 116static struct dst_entry *dn_dst_negative_advice(struct dst_entry *);
116static void dn_dst_link_failure(struct sk_buff *); 117static void dn_dst_link_failure(struct sk_buff *);
117static void dn_dst_update_pmtu(struct dst_entry *dst, u32 mtu); 118static void dn_dst_update_pmtu(struct dst_entry *dst, u32 mtu);
@@ -133,11 +134,18 @@ static struct dst_ops dn_dst_ops = {
133 .check = dn_dst_check, 134 .check = dn_dst_check,
134 .default_advmss = dn_dst_default_advmss, 135 .default_advmss = dn_dst_default_advmss,
135 .default_mtu = dn_dst_default_mtu, 136 .default_mtu = dn_dst_default_mtu,
137 .cow_metrics = dst_cow_metrics_generic,
138 .destroy = dn_dst_destroy,
136 .negative_advice = dn_dst_negative_advice, 139 .negative_advice = dn_dst_negative_advice,
137 .link_failure = dn_dst_link_failure, 140 .link_failure = dn_dst_link_failure,
138 .update_pmtu = dn_dst_update_pmtu, 141 .update_pmtu = dn_dst_update_pmtu,
139}; 142};
140 143
144static void dn_dst_destroy(struct dst_entry *dst)
145{
146 dst_destroy_metrics_generic(dst);
147}
148
141static __inline__ unsigned dn_hash(__le16 src, __le16 dst) 149static __inline__ unsigned dn_hash(__le16 src, __le16 dst)
142{ 150{
143 __u16 tmp = (__u16 __force)(src ^ dst); 151 __u16 tmp = (__u16 __force)(src ^ dst);
@@ -814,14 +822,14 @@ static int dn_rt_set_next_hop(struct dn_route *rt, struct dn_fib_res *res)
814{ 822{
815 struct dn_fib_info *fi = res->fi; 823 struct dn_fib_info *fi = res->fi;
816 struct net_device *dev = rt->dst.dev; 824 struct net_device *dev = rt->dst.dev;
825 unsigned int mss_metric;
817 struct neighbour *n; 826 struct neighbour *n;
818 unsigned int metric;
819 827
820 if (fi) { 828 if (fi) {
821 if (DN_FIB_RES_GW(*res) && 829 if (DN_FIB_RES_GW(*res) &&
822 DN_FIB_RES_NH(*res).nh_scope == RT_SCOPE_LINK) 830 DN_FIB_RES_NH(*res).nh_scope == RT_SCOPE_LINK)
823 rt->rt_gateway = DN_FIB_RES_GW(*res); 831 rt->rt_gateway = DN_FIB_RES_GW(*res);
824 dst_import_metrics(&rt->dst, fi->fib_metrics); 832 dst_init_metrics(&rt->dst, fi->fib_metrics, true);
825 } 833 }
826 rt->rt_type = res->type; 834 rt->rt_type = res->type;
827 835
@@ -834,10 +842,10 @@ static int dn_rt_set_next_hop(struct dn_route *rt, struct dn_fib_res *res)
834 842
835 if (dst_metric(&rt->dst, RTAX_MTU) > rt->dst.dev->mtu) 843 if (dst_metric(&rt->dst, RTAX_MTU) > rt->dst.dev->mtu)
836 dst_metric_set(&rt->dst, RTAX_MTU, rt->dst.dev->mtu); 844 dst_metric_set(&rt->dst, RTAX_MTU, rt->dst.dev->mtu);
837 metric = dst_metric_raw(&rt->dst, RTAX_ADVMSS); 845 mss_metric = dst_metric_raw(&rt->dst, RTAX_ADVMSS);
838 if (metric) { 846 if (mss_metric) {
839 unsigned int mss = dn_mss_from_pmtu(dev, dst_mtu(&rt->dst)); 847 unsigned int mss = dn_mss_from_pmtu(dev, dst_mtu(&rt->dst));
840 if (metric > mss) 848 if (mss_metric > mss)
841 dst_metric_set(&rt->dst, RTAX_ADVMSS, mss); 849 dst_metric_set(&rt->dst, RTAX_ADVMSS, mss);
842 } 850 }
843 return 0; 851 return 0;
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index 3e5b7cc2db4f..980030d4e4ae 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -152,6 +152,36 @@ static void ipv4_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
152{ 152{
153} 153}
154 154
155static u32 *ipv4_cow_metrics(struct dst_entry *dst, unsigned long old)
156{
157 u32 *p = kmalloc(sizeof(u32) * RTAX_MAX, GFP_ATOMIC);
158
159 if (p) {
160 u32 *old_p = __DST_METRICS_PTR(old);
161 unsigned long prev, new;
162
163 memcpy(p, old_p, sizeof(u32) * RTAX_MAX);
164
165 new = (unsigned long) p;
166 prev = cmpxchg(&dst->_metrics, old, new);
167
168 if (prev != old) {
169 kfree(p);
170 p = __DST_METRICS_PTR(prev);
171 if (prev & DST_METRICS_READ_ONLY)
172 p = NULL;
173 } else {
174 struct rtable *rt = (struct rtable *) dst;
175
176 if (rt->fi) {
177 fib_info_put(rt->fi);
178 rt->fi = NULL;
179 }
180 }
181 }
182 return p;
183}
184
155static struct dst_ops ipv4_dst_ops = { 185static struct dst_ops ipv4_dst_ops = {
156 .family = AF_INET, 186 .family = AF_INET,
157 .protocol = cpu_to_be16(ETH_P_IP), 187 .protocol = cpu_to_be16(ETH_P_IP),
@@ -159,6 +189,7 @@ static struct dst_ops ipv4_dst_ops = {
159 .check = ipv4_dst_check, 189 .check = ipv4_dst_check,
160 .default_advmss = ipv4_default_advmss, 190 .default_advmss = ipv4_default_advmss,
161 .default_mtu = ipv4_default_mtu, 191 .default_mtu = ipv4_default_mtu,
192 .cow_metrics = ipv4_cow_metrics,
162 .destroy = ipv4_dst_destroy, 193 .destroy = ipv4_dst_destroy,
163 .ifdown = ipv4_dst_ifdown, 194 .ifdown = ipv4_dst_ifdown,
164 .negative_advice = ipv4_negative_advice, 195 .negative_advice = ipv4_negative_advice,
@@ -1441,6 +1472,8 @@ void ip_rt_redirect(__be32 old_gw, __be32 daddr, __be32 new_gw,
1441 1472
1442 if (rt->peer) 1473 if (rt->peer)
1443 atomic_inc(&rt->peer->refcnt); 1474 atomic_inc(&rt->peer->refcnt);
1475 if (rt->fi)
1476 atomic_inc(&rt->fi->fib_clntref);
1444 1477
1445 if (arp_bind_neighbour(&rt->dst) || 1478 if (arp_bind_neighbour(&rt->dst) ||
1446 !(rt->dst.neighbour->nud_state & 1479 !(rt->dst.neighbour->nud_state &
@@ -1720,6 +1753,11 @@ static void ipv4_dst_destroy(struct dst_entry *dst)
1720 struct rtable *rt = (struct rtable *) dst; 1753 struct rtable *rt = (struct rtable *) dst;
1721 struct inet_peer *peer = rt->peer; 1754 struct inet_peer *peer = rt->peer;
1722 1755
1756 dst_destroy_metrics_generic(dst);
1757 if (rt->fi) {
1758 fib_info_put(rt->fi);
1759 rt->fi = NULL;
1760 }
1723 if (peer) { 1761 if (peer) {
1724 rt->peer = NULL; 1762 rt->peer = NULL;
1725 inet_putpeer(peer); 1763 inet_putpeer(peer);
@@ -1824,7 +1862,9 @@ static void rt_set_nexthop(struct rtable *rt, struct fib_result *res, u32 itag)
1824 if (FIB_RES_GW(*res) && 1862 if (FIB_RES_GW(*res) &&
1825 FIB_RES_NH(*res).nh_scope == RT_SCOPE_LINK) 1863 FIB_RES_NH(*res).nh_scope == RT_SCOPE_LINK)
1826 rt->rt_gateway = FIB_RES_GW(*res); 1864 rt->rt_gateway = FIB_RES_GW(*res);
1827 dst_import_metrics(dst, fi->fib_metrics); 1865 rt->fi = fi;
1866 atomic_inc(&fi->fib_clntref);
1867 dst_init_metrics(dst, fi->fib_metrics, true);
1828#ifdef CONFIG_IP_ROUTE_CLASSID 1868#ifdef CONFIG_IP_ROUTE_CLASSID
1829 dst->tclassid = FIB_RES_NH(*res).nh_tclassid; 1869 dst->tclassid = FIB_RES_NH(*res).nh_tclassid;
1830#endif 1870#endif
@@ -2752,6 +2792,9 @@ static int ipv4_dst_blackhole(struct net *net, struct rtable **rp, struct flowi
2752 rt->peer = ort->peer; 2792 rt->peer = ort->peer;
2753 if (rt->peer) 2793 if (rt->peer)
2754 atomic_inc(&rt->peer->refcnt); 2794 atomic_inc(&rt->peer->refcnt);
2795 rt->fi = ort->fi;
2796 if (rt->fi)
2797 atomic_inc(&rt->fi->fib_clntref);
2755 2798
2756 dst_free(new); 2799 dst_free(new);
2757 } 2800 }
diff --git a/net/ipv4/xfrm4_policy.c b/net/ipv4/xfrm4_policy.c
index b057d40addec..19fbdec6baaa 100644
--- a/net/ipv4/xfrm4_policy.c
+++ b/net/ipv4/xfrm4_policy.c
@@ -196,8 +196,11 @@ static void xfrm4_dst_destroy(struct dst_entry *dst)
196{ 196{
197 struct xfrm_dst *xdst = (struct xfrm_dst *)dst; 197 struct xfrm_dst *xdst = (struct xfrm_dst *)dst;
198 198
199 dst_destroy_metrics_generic(dst);
200
199 if (likely(xdst->u.rt.peer)) 201 if (likely(xdst->u.rt.peer))
200 inet_putpeer(xdst->u.rt.peer); 202 inet_putpeer(xdst->u.rt.peer);
203
201 xfrm_dst_destroy(xdst); 204 xfrm_dst_destroy(xdst);
202} 205}
203 206
@@ -215,6 +218,7 @@ static struct dst_ops xfrm4_dst_ops = {
215 .protocol = cpu_to_be16(ETH_P_IP), 218 .protocol = cpu_to_be16(ETH_P_IP),
216 .gc = xfrm4_garbage_collect, 219 .gc = xfrm4_garbage_collect,
217 .update_pmtu = xfrm4_update_pmtu, 220 .update_pmtu = xfrm4_update_pmtu,
221 .cow_metrics = dst_cow_metrics_generic,
218 .destroy = xfrm4_dst_destroy, 222 .destroy = xfrm4_dst_destroy,
219 .ifdown = xfrm4_dst_ifdown, 223 .ifdown = xfrm4_dst_ifdown,
220 .local_out = __ip_local_out, 224 .local_out = __ip_local_out,
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index 1534508f6c68..45fafa018f12 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -105,6 +105,7 @@ static struct dst_ops ip6_dst_ops_template = {
105 .check = ip6_dst_check, 105 .check = ip6_dst_check,
106 .default_advmss = ip6_default_advmss, 106 .default_advmss = ip6_default_advmss,
107 .default_mtu = ip6_default_mtu, 107 .default_mtu = ip6_default_mtu,
108 .cow_metrics = dst_cow_metrics_generic,
108 .destroy = ip6_dst_destroy, 109 .destroy = ip6_dst_destroy,
109 .ifdown = ip6_dst_ifdown, 110 .ifdown = ip6_dst_ifdown,
110 .negative_advice = ip6_negative_advice, 111 .negative_advice = ip6_negative_advice,
@@ -125,6 +126,10 @@ static struct dst_ops ip6_dst_blackhole_ops = {
125 .update_pmtu = ip6_rt_blackhole_update_pmtu, 126 .update_pmtu = ip6_rt_blackhole_update_pmtu,
126}; 127};
127 128
129static const u32 ip6_template_metrics[RTAX_MAX] = {
130 [RTAX_HOPLIMIT - 1] = 255,
131};
132
128static struct rt6_info ip6_null_entry_template = { 133static struct rt6_info ip6_null_entry_template = {
129 .dst = { 134 .dst = {
130 .__refcnt = ATOMIC_INIT(1), 135 .__refcnt = ATOMIC_INIT(1),
@@ -193,6 +198,7 @@ static void ip6_dst_destroy(struct dst_entry *dst)
193 rt->rt6i_idev = NULL; 198 rt->rt6i_idev = NULL;
194 in6_dev_put(idev); 199 in6_dev_put(idev);
195 } 200 }
201 dst_destroy_metrics_generic(dst);
196 if (peer) { 202 if (peer) {
197 BUG_ON(!(rt->rt6i_flags & RTF_CACHE)); 203 BUG_ON(!(rt->rt6i_flags & RTF_CACHE));
198 rt->rt6i_peer = NULL; 204 rt->rt6i_peer = NULL;
@@ -2681,7 +2687,8 @@ static int __net_init ip6_route_net_init(struct net *net)
2681 net->ipv6.ip6_null_entry->dst.path = 2687 net->ipv6.ip6_null_entry->dst.path =
2682 (struct dst_entry *)net->ipv6.ip6_null_entry; 2688 (struct dst_entry *)net->ipv6.ip6_null_entry;
2683 net->ipv6.ip6_null_entry->dst.ops = &net->ipv6.ip6_dst_ops; 2689 net->ipv6.ip6_null_entry->dst.ops = &net->ipv6.ip6_dst_ops;
2684 dst_metric_set(&net->ipv6.ip6_null_entry->dst, RTAX_HOPLIMIT, 255); 2690 dst_init_metrics(&net->ipv6.ip6_null_entry->dst,
2691 ip6_template_metrics, true);
2685 2692
2686#ifdef CONFIG_IPV6_MULTIPLE_TABLES 2693#ifdef CONFIG_IPV6_MULTIPLE_TABLES
2687 net->ipv6.ip6_prohibit_entry = kmemdup(&ip6_prohibit_entry_template, 2694 net->ipv6.ip6_prohibit_entry = kmemdup(&ip6_prohibit_entry_template,
@@ -2692,7 +2699,8 @@ static int __net_init ip6_route_net_init(struct net *net)
2692 net->ipv6.ip6_prohibit_entry->dst.path = 2699 net->ipv6.ip6_prohibit_entry->dst.path =
2693 (struct dst_entry *)net->ipv6.ip6_prohibit_entry; 2700 (struct dst_entry *)net->ipv6.ip6_prohibit_entry;
2694 net->ipv6.ip6_prohibit_entry->dst.ops = &net->ipv6.ip6_dst_ops; 2701 net->ipv6.ip6_prohibit_entry->dst.ops = &net->ipv6.ip6_dst_ops;
2695 dst_metric_set(&net->ipv6.ip6_prohibit_entry->dst, RTAX_HOPLIMIT, 255); 2702 dst_init_metrics(&net->ipv6.ip6_prohibit_entry->dst,
2703 ip6_template_metrics, true);
2696 2704
2697 net->ipv6.ip6_blk_hole_entry = kmemdup(&ip6_blk_hole_entry_template, 2705 net->ipv6.ip6_blk_hole_entry = kmemdup(&ip6_blk_hole_entry_template,
2698 sizeof(*net->ipv6.ip6_blk_hole_entry), 2706 sizeof(*net->ipv6.ip6_blk_hole_entry),
@@ -2702,7 +2710,8 @@ static int __net_init ip6_route_net_init(struct net *net)
2702 net->ipv6.ip6_blk_hole_entry->dst.path = 2710 net->ipv6.ip6_blk_hole_entry->dst.path =
2703 (struct dst_entry *)net->ipv6.ip6_blk_hole_entry; 2711 (struct dst_entry *)net->ipv6.ip6_blk_hole_entry;
2704 net->ipv6.ip6_blk_hole_entry->dst.ops = &net->ipv6.ip6_dst_ops; 2712 net->ipv6.ip6_blk_hole_entry->dst.ops = &net->ipv6.ip6_dst_ops;
2705 dst_metric_set(&net->ipv6.ip6_blk_hole_entry->dst, RTAX_HOPLIMIT, 255); 2713 dst_init_metrics(&net->ipv6.ip6_blk_hole_entry->dst,
2714 ip6_template_metrics, true);
2706#endif 2715#endif
2707 2716
2708 net->ipv6.sysctl.flush_delay = 0; 2717 net->ipv6.sysctl.flush_delay = 0;
diff --git a/net/ipv6/xfrm6_policy.c b/net/ipv6/xfrm6_policy.c
index da87428681cc..834dc02f1d4f 100644
--- a/net/ipv6/xfrm6_policy.c
+++ b/net/ipv6/xfrm6_policy.c
@@ -220,6 +220,7 @@ static void xfrm6_dst_destroy(struct dst_entry *dst)
220 220
221 if (likely(xdst->u.rt6.rt6i_idev)) 221 if (likely(xdst->u.rt6.rt6i_idev))
222 in6_dev_put(xdst->u.rt6.rt6i_idev); 222 in6_dev_put(xdst->u.rt6.rt6i_idev);
223 dst_destroy_metrics_generic(dst);
223 if (likely(xdst->u.rt6.rt6i_peer)) 224 if (likely(xdst->u.rt6.rt6i_peer))
224 inet_putpeer(xdst->u.rt6.rt6i_peer); 225 inet_putpeer(xdst->u.rt6.rt6i_peer);
225 xfrm_dst_destroy(xdst); 226 xfrm_dst_destroy(xdst);
@@ -257,6 +258,7 @@ static struct dst_ops xfrm6_dst_ops = {
257 .protocol = cpu_to_be16(ETH_P_IPV6), 258 .protocol = cpu_to_be16(ETH_P_IPV6),
258 .gc = xfrm6_garbage_collect, 259 .gc = xfrm6_garbage_collect,
259 .update_pmtu = xfrm6_update_pmtu, 260 .update_pmtu = xfrm6_update_pmtu,
261 .cow_metrics = dst_cow_metrics_generic,
260 .destroy = xfrm6_dst_destroy, 262 .destroy = xfrm6_dst_destroy,
261 .ifdown = xfrm6_dst_ifdown, 263 .ifdown = xfrm6_dst_ifdown,
262 .local_out = __ip6_local_out, 264 .local_out = __ip6_local_out,