aboutsummaryrefslogtreecommitdiffstats
path: root/include/net/dst.h
diff options
context:
space:
mode:
authorDavid S. Miller <davem@davemloft.net>2011-01-26 23:51:05 -0500
committerDavid S. Miller <davem@davemloft.net>2011-01-26 23:51:05 -0500
commit62fa8a846d7de4b299232e330c74b7783539df76 (patch)
treee401dbdbf4b11cbd27bdc3a47d9dc8b512173c9f /include/net/dst.h
parentb4e69ac670d71b5748dc81e536b2cb103489badd (diff)
net: Implement read-only protection and COW'ing of metrics.
Routing metrics are now copy-on-write. Initially a route entry points it's metrics at a read-only location. If a routing table entry exists, it will point there. Else it will point at the all zero metric place-holder called 'dst_default_metrics'. The writeability state of the metrics is stored in the low bits of the metrics pointer, we have two bits left to spare if we want to store more states. For the initial implementation, COW is implemented simply via kmalloc. However future enhancements will change this to place the writable metrics somewhere else, in order to increase sharing. Very likely this "somewhere else" will be the inetpeer cache. Note also that this means that metrics updates may transiently fail if we cannot COW the metrics successfully. But even by itself, this patch should decrease memory usage and increase cache locality especially for routing workloads. In those cases the read-only metric copies stay in place and never get written to. TCP workloads where metrics get updated, and those rare cases where PMTU triggers occur, will take a very slight performance hit. But that hit will be alleviated when the long-term writable metrics move to a more sharable location. Since the metrics storage went from a u32 array of RTAX_MAX entries to what is essentially a pointer, some retooling of the dst_entry layout was necessary. Most importantly, we need to preserve the alignment of the reference count so that it doesn't share cache lines with the read-mostly state, as per Eric Dumazet's alignment assertion checks. The only non-trivial bit here is the move of the 'flags' member into the writeable cacheline. This is OK since we are always accessing the flags around the same moment when we made a modification to the reference count. Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'include/net/dst.h')
-rw-r--r--include/net/dst.h114
1 files changed, 77 insertions, 37 deletions
diff --git a/include/net/dst.h b/include/net/dst.h
index be5a0d4c491d..94a8c234ea2a 100644
--- a/include/net/dst.h
+++ b/include/net/dst.h
@@ -40,24 +40,10 @@ struct dst_entry {
40 struct rcu_head rcu_head; 40 struct rcu_head rcu_head;
41 struct dst_entry *child; 41 struct dst_entry *child;
42 struct net_device *dev; 42 struct net_device *dev;
43 short error; 43 struct dst_ops *ops;
44 short obsolete; 44 unsigned long _metrics;
45 int flags;
46#define DST_HOST 0x0001
47#define DST_NOXFRM 0x0002
48#define DST_NOPOLICY 0x0004
49#define DST_NOHASH 0x0008
50#define DST_NOCACHE 0x0010
51 unsigned long expires; 45 unsigned long expires;
52
53 unsigned short header_len; /* more space at head required */
54 unsigned short trailer_len; /* space to reserve at tail */
55
56 unsigned int rate_tokens;
57 unsigned long rate_last; /* rate limiting for ICMP */
58
59 struct dst_entry *path; 46 struct dst_entry *path;
60
61 struct neighbour *neighbour; 47 struct neighbour *neighbour;
62 struct hh_cache *hh; 48 struct hh_cache *hh;
63#ifdef CONFIG_XFRM 49#ifdef CONFIG_XFRM
@@ -68,17 +54,16 @@ struct dst_entry {
68 int (*input)(struct sk_buff*); 54 int (*input)(struct sk_buff*);
69 int (*output)(struct sk_buff*); 55 int (*output)(struct sk_buff*);
70 56
71 struct dst_ops *ops; 57 short error;
72 58 short obsolete;
73 u32 _metrics[RTAX_MAX]; 59 unsigned short header_len; /* more space at head required */
74 60 unsigned short trailer_len; /* space to reserve at tail */
75#ifdef CONFIG_IP_ROUTE_CLASSID 61#ifdef CONFIG_IP_ROUTE_CLASSID
76 __u32 tclassid; 62 __u32 tclassid;
77#else 63#else
78 __u32 __pad2; 64 __u32 __pad2;
79#endif 65#endif
80 66
81
82 /* 67 /*
83 * Align __refcnt to a 64 bytes alignment 68 * Align __refcnt to a 64 bytes alignment
84 * (L1_CACHE_SIZE would be too much) 69 * (L1_CACHE_SIZE would be too much)
@@ -93,6 +78,14 @@ struct dst_entry {
93 atomic_t __refcnt; /* client references */ 78 atomic_t __refcnt; /* client references */
94 int __use; 79 int __use;
95 unsigned long lastuse; 80 unsigned long lastuse;
81 unsigned long rate_last; /* rate limiting for ICMP */
82 unsigned int rate_tokens;
83 int flags;
84#define DST_HOST 0x0001
85#define DST_NOXFRM 0x0002
86#define DST_NOPOLICY 0x0004
87#define DST_NOHASH 0x0008
88#define DST_NOCACHE 0x0010
96 union { 89 union {
97 struct dst_entry *next; 90 struct dst_entry *next;
98 struct rtable __rcu *rt_next; 91 struct rtable __rcu *rt_next;
@@ -103,10 +96,69 @@ struct dst_entry {
103 96
104#ifdef __KERNEL__ 97#ifdef __KERNEL__
105 98
99extern u32 *dst_cow_metrics_generic(struct dst_entry *dst, unsigned long old);
100
101#define DST_METRICS_READ_ONLY 0x1UL
102#define __DST_METRICS_PTR(Y) \
103 ((u32 *)((Y) & ~DST_METRICS_READ_ONLY))
104#define DST_METRICS_PTR(X) __DST_METRICS_PTR((X)->_metrics)
105
106static inline bool dst_metrics_read_only(const struct dst_entry *dst)
107{
108 return dst->_metrics & DST_METRICS_READ_ONLY;
109}
110
111extern void __dst_destroy_metrics_generic(struct dst_entry *dst, unsigned long old);
112
113static inline void dst_destroy_metrics_generic(struct dst_entry *dst)
114{
115 unsigned long val = dst->_metrics;
116 if (!(val & DST_METRICS_READ_ONLY))
117 __dst_destroy_metrics_generic(dst, val);
118}
119
120static inline u32 *dst_metrics_write_ptr(struct dst_entry *dst)
121{
122 unsigned long p = dst->_metrics;
123
124 if (p & DST_METRICS_READ_ONLY)
125 return dst->ops->cow_metrics(dst, p);
126 return __DST_METRICS_PTR(p);
127}
128
129/* This may only be invoked before the entry has reached global
130 * visibility.
131 */
132static inline void dst_init_metrics(struct dst_entry *dst,
133 const u32 *src_metrics,
134 bool read_only)
135{
136 dst->_metrics = ((unsigned long) src_metrics) |
137 (read_only ? DST_METRICS_READ_ONLY : 0);
138}
139
140static inline void dst_copy_metrics(struct dst_entry *dest, const struct dst_entry *src)
141{
142 u32 *dst_metrics = dst_metrics_write_ptr(dest);
143
144 if (dst_metrics) {
145 u32 *src_metrics = DST_METRICS_PTR(src);
146
147 memcpy(dst_metrics, src_metrics, RTAX_MAX * sizeof(u32));
148 }
149}
150
151static inline u32 *dst_metrics_ptr(struct dst_entry *dst)
152{
153 return DST_METRICS_PTR(dst);
154}
155
106static inline u32 156static inline u32
107dst_metric_raw(const struct dst_entry *dst, const int metric) 157dst_metric_raw(const struct dst_entry *dst, const int metric)
108{ 158{
109 return dst->_metrics[metric-1]; 159 u32 *p = DST_METRICS_PTR(dst);
160
161 return p[metric-1];
110} 162}
111 163
112static inline u32 164static inline u32
@@ -131,22 +183,10 @@ dst_metric_advmss(const struct dst_entry *dst)
131 183
132static inline void dst_metric_set(struct dst_entry *dst, int metric, u32 val) 184static inline void dst_metric_set(struct dst_entry *dst, int metric, u32 val)
133{ 185{
134 dst->_metrics[metric-1] = val; 186 u32 *p = dst_metrics_write_ptr(dst);
135}
136
137static inline void dst_import_metrics(struct dst_entry *dst, const u32 *src_metrics)
138{
139 memcpy(dst->_metrics, src_metrics, RTAX_MAX * sizeof(u32));
140}
141 187
142static inline void dst_copy_metrics(struct dst_entry *dest, const struct dst_entry *src) 188 if (p)
143{ 189 p[metric-1] = val;
144 dst_import_metrics(dest, src->_metrics);
145}
146
147static inline u32 *dst_metrics_ptr(struct dst_entry *dst)
148{
149 return dst->_metrics;
150} 190}
151 191
152static inline u32 192static inline u32