diff options
Diffstat (limited to 'net/ipv4/tcp_metrics.c')
-rw-r--r-- | net/ipv4/tcp_metrics.c | 208 |
1 files changed, 108 insertions, 100 deletions
diff --git a/net/ipv4/tcp_metrics.c b/net/ipv4/tcp_metrics.c index e5f41bd5ec1b..a51d63a43e33 100644 --- a/net/ipv4/tcp_metrics.c +++ b/net/ipv4/tcp_metrics.c | |||
@@ -28,7 +28,8 @@ static struct tcp_metrics_block *__tcp_get_metrics(const struct inetpeer_addr *s | |||
28 | 28 | ||
29 | struct tcp_fastopen_metrics { | 29 | struct tcp_fastopen_metrics { |
30 | u16 mss; | 30 | u16 mss; |
31 | u16 syn_loss:10; /* Recurring Fast Open SYN losses */ | 31 | u16 syn_loss:10, /* Recurring Fast Open SYN losses */ |
32 | try_exp:2; /* Request w/ exp. option (once) */ | ||
32 | unsigned long last_syn_loss; /* Last Fast Open SYN loss */ | 33 | unsigned long last_syn_loss; /* Last Fast Open SYN loss */ |
33 | struct tcp_fastopen_cookie cookie; | 34 | struct tcp_fastopen_cookie cookie; |
34 | }; | 35 | }; |
@@ -40,6 +41,7 @@ struct tcp_fastopen_metrics { | |||
40 | 41 | ||
41 | struct tcp_metrics_block { | 42 | struct tcp_metrics_block { |
42 | struct tcp_metrics_block __rcu *tcpm_next; | 43 | struct tcp_metrics_block __rcu *tcpm_next; |
44 | possible_net_t tcpm_net; | ||
43 | struct inetpeer_addr tcpm_saddr; | 45 | struct inetpeer_addr tcpm_saddr; |
44 | struct inetpeer_addr tcpm_daddr; | 46 | struct inetpeer_addr tcpm_daddr; |
45 | unsigned long tcpm_stamp; | 47 | unsigned long tcpm_stamp; |
@@ -52,6 +54,11 @@ struct tcp_metrics_block { | |||
52 | struct rcu_head rcu_head; | 54 | struct rcu_head rcu_head; |
53 | }; | 55 | }; |
54 | 56 | ||
57 | static inline struct net *tm_net(struct tcp_metrics_block *tm) | ||
58 | { | ||
59 | return read_pnet(&tm->tcpm_net); | ||
60 | } | ||
61 | |||
55 | static bool tcp_metric_locked(struct tcp_metrics_block *tm, | 62 | static bool tcp_metric_locked(struct tcp_metrics_block *tm, |
56 | enum tcp_metric_index idx) | 63 | enum tcp_metric_index idx) |
57 | { | 64 | { |
@@ -74,23 +81,20 @@ static void tcp_metric_set(struct tcp_metrics_block *tm, | |||
74 | static bool addr_same(const struct inetpeer_addr *a, | 81 | static bool addr_same(const struct inetpeer_addr *a, |
75 | const struct inetpeer_addr *b) | 82 | const struct inetpeer_addr *b) |
76 | { | 83 | { |
77 | const struct in6_addr *a6, *b6; | ||
78 | |||
79 | if (a->family != b->family) | 84 | if (a->family != b->family) |
80 | return false; | 85 | return false; |
81 | if (a->family == AF_INET) | 86 | if (a->family == AF_INET) |
82 | return a->addr.a4 == b->addr.a4; | 87 | return a->addr.a4 == b->addr.a4; |
83 | 88 | return ipv6_addr_equal(&a->addr.in6, &b->addr.in6); | |
84 | a6 = (const struct in6_addr *) &a->addr.a6[0]; | ||
85 | b6 = (const struct in6_addr *) &b->addr.a6[0]; | ||
86 | |||
87 | return ipv6_addr_equal(a6, b6); | ||
88 | } | 89 | } |
89 | 90 | ||
90 | struct tcpm_hash_bucket { | 91 | struct tcpm_hash_bucket { |
91 | struct tcp_metrics_block __rcu *chain; | 92 | struct tcp_metrics_block __rcu *chain; |
92 | }; | 93 | }; |
93 | 94 | ||
95 | static struct tcpm_hash_bucket *tcp_metrics_hash __read_mostly; | ||
96 | static unsigned int tcp_metrics_hash_log __read_mostly; | ||
97 | |||
94 | static DEFINE_SPINLOCK(tcp_metrics_lock); | 98 | static DEFINE_SPINLOCK(tcp_metrics_lock); |
95 | 99 | ||
96 | static void tcpm_suck_dst(struct tcp_metrics_block *tm, | 100 | static void tcpm_suck_dst(struct tcp_metrics_block *tm, |
@@ -128,6 +132,8 @@ static void tcpm_suck_dst(struct tcp_metrics_block *tm, | |||
128 | if (fastopen_clear) { | 132 | if (fastopen_clear) { |
129 | tm->tcpm_fastopen.mss = 0; | 133 | tm->tcpm_fastopen.mss = 0; |
130 | tm->tcpm_fastopen.syn_loss = 0; | 134 | tm->tcpm_fastopen.syn_loss = 0; |
135 | tm->tcpm_fastopen.try_exp = 0; | ||
136 | tm->tcpm_fastopen.cookie.exp = false; | ||
131 | tm->tcpm_fastopen.cookie.len = 0; | 137 | tm->tcpm_fastopen.cookie.len = 0; |
132 | } | 138 | } |
133 | } | 139 | } |
@@ -143,6 +149,9 @@ static void tcpm_check_stamp(struct tcp_metrics_block *tm, struct dst_entry *dst | |||
143 | #define TCP_METRICS_RECLAIM_DEPTH 5 | 149 | #define TCP_METRICS_RECLAIM_DEPTH 5 |
144 | #define TCP_METRICS_RECLAIM_PTR (struct tcp_metrics_block *) 0x1UL | 150 | #define TCP_METRICS_RECLAIM_PTR (struct tcp_metrics_block *) 0x1UL |
145 | 151 | ||
152 | #define deref_locked(p) \ | ||
153 | rcu_dereference_protected(p, lockdep_is_held(&tcp_metrics_lock)) | ||
154 | |||
146 | static struct tcp_metrics_block *tcpm_new(struct dst_entry *dst, | 155 | static struct tcp_metrics_block *tcpm_new(struct dst_entry *dst, |
147 | struct inetpeer_addr *saddr, | 156 | struct inetpeer_addr *saddr, |
148 | struct inetpeer_addr *daddr, | 157 | struct inetpeer_addr *daddr, |
@@ -171,9 +180,9 @@ static struct tcp_metrics_block *tcpm_new(struct dst_entry *dst, | |||
171 | if (unlikely(reclaim)) { | 180 | if (unlikely(reclaim)) { |
172 | struct tcp_metrics_block *oldest; | 181 | struct tcp_metrics_block *oldest; |
173 | 182 | ||
174 | oldest = rcu_dereference(net->ipv4.tcp_metrics_hash[hash].chain); | 183 | oldest = deref_locked(tcp_metrics_hash[hash].chain); |
175 | for (tm = rcu_dereference(oldest->tcpm_next); tm; | 184 | for (tm = deref_locked(oldest->tcpm_next); tm; |
176 | tm = rcu_dereference(tm->tcpm_next)) { | 185 | tm = deref_locked(tm->tcpm_next)) { |
177 | if (time_before(tm->tcpm_stamp, oldest->tcpm_stamp)) | 186 | if (time_before(tm->tcpm_stamp, oldest->tcpm_stamp)) |
178 | oldest = tm; | 187 | oldest = tm; |
179 | } | 188 | } |
@@ -183,14 +192,15 @@ static struct tcp_metrics_block *tcpm_new(struct dst_entry *dst, | |||
183 | if (!tm) | 192 | if (!tm) |
184 | goto out_unlock; | 193 | goto out_unlock; |
185 | } | 194 | } |
195 | write_pnet(&tm->tcpm_net, net); | ||
186 | tm->tcpm_saddr = *saddr; | 196 | tm->tcpm_saddr = *saddr; |
187 | tm->tcpm_daddr = *daddr; | 197 | tm->tcpm_daddr = *daddr; |
188 | 198 | ||
189 | tcpm_suck_dst(tm, dst, true); | 199 | tcpm_suck_dst(tm, dst, true); |
190 | 200 | ||
191 | if (likely(!reclaim)) { | 201 | if (likely(!reclaim)) { |
192 | tm->tcpm_next = net->ipv4.tcp_metrics_hash[hash].chain; | 202 | tm->tcpm_next = tcp_metrics_hash[hash].chain; |
193 | rcu_assign_pointer(net->ipv4.tcp_metrics_hash[hash].chain, tm); | 203 | rcu_assign_pointer(tcp_metrics_hash[hash].chain, tm); |
194 | } | 204 | } |
195 | 205 | ||
196 | out_unlock: | 206 | out_unlock: |
@@ -214,10 +224,11 @@ static struct tcp_metrics_block *__tcp_get_metrics(const struct inetpeer_addr *s | |||
214 | struct tcp_metrics_block *tm; | 224 | struct tcp_metrics_block *tm; |
215 | int depth = 0; | 225 | int depth = 0; |
216 | 226 | ||
217 | for (tm = rcu_dereference(net->ipv4.tcp_metrics_hash[hash].chain); tm; | 227 | for (tm = rcu_dereference(tcp_metrics_hash[hash].chain); tm; |
218 | tm = rcu_dereference(tm->tcpm_next)) { | 228 | tm = rcu_dereference(tm->tcpm_next)) { |
219 | if (addr_same(&tm->tcpm_saddr, saddr) && | 229 | if (addr_same(&tm->tcpm_saddr, saddr) && |
220 | addr_same(&tm->tcpm_daddr, daddr)) | 230 | addr_same(&tm->tcpm_daddr, daddr) && |
231 | net_eq(tm_net(tm), net)) | ||
221 | break; | 232 | break; |
222 | depth++; | 233 | depth++; |
223 | } | 234 | } |
@@ -242,8 +253,8 @@ static struct tcp_metrics_block *__tcp_get_metrics_req(struct request_sock *req, | |||
242 | break; | 253 | break; |
243 | #if IS_ENABLED(CONFIG_IPV6) | 254 | #if IS_ENABLED(CONFIG_IPV6) |
244 | case AF_INET6: | 255 | case AF_INET6: |
245 | *(struct in6_addr *)saddr.addr.a6 = inet_rsk(req)->ir_v6_loc_addr; | 256 | saddr.addr.in6 = inet_rsk(req)->ir_v6_loc_addr; |
246 | *(struct in6_addr *)daddr.addr.a6 = inet_rsk(req)->ir_v6_rmt_addr; | 257 | daddr.addr.in6 = inet_rsk(req)->ir_v6_rmt_addr; |
247 | hash = ipv6_addr_hash(&inet_rsk(req)->ir_v6_rmt_addr); | 258 | hash = ipv6_addr_hash(&inet_rsk(req)->ir_v6_rmt_addr); |
248 | break; | 259 | break; |
249 | #endif | 260 | #endif |
@@ -252,12 +263,14 @@ static struct tcp_metrics_block *__tcp_get_metrics_req(struct request_sock *req, | |||
252 | } | 263 | } |
253 | 264 | ||
254 | net = dev_net(dst->dev); | 265 | net = dev_net(dst->dev); |
255 | hash = hash_32(hash, net->ipv4.tcp_metrics_hash_log); | 266 | hash ^= net_hash_mix(net); |
267 | hash = hash_32(hash, tcp_metrics_hash_log); | ||
256 | 268 | ||
257 | for (tm = rcu_dereference(net->ipv4.tcp_metrics_hash[hash].chain); tm; | 269 | for (tm = rcu_dereference(tcp_metrics_hash[hash].chain); tm; |
258 | tm = rcu_dereference(tm->tcpm_next)) { | 270 | tm = rcu_dereference(tm->tcpm_next)) { |
259 | if (addr_same(&tm->tcpm_saddr, &saddr) && | 271 | if (addr_same(&tm->tcpm_saddr, &saddr) && |
260 | addr_same(&tm->tcpm_daddr, &daddr)) | 272 | addr_same(&tm->tcpm_daddr, &daddr) && |
273 | net_eq(tm_net(tm), net)) | ||
261 | break; | 274 | break; |
262 | } | 275 | } |
263 | tcpm_check_stamp(tm, dst); | 276 | tcpm_check_stamp(tm, dst); |
@@ -288,9 +301,9 @@ static struct tcp_metrics_block *__tcp_get_metrics_tw(struct inet_timewait_sock | |||
288 | hash = (__force unsigned int) daddr.addr.a4; | 301 | hash = (__force unsigned int) daddr.addr.a4; |
289 | } else { | 302 | } else { |
290 | saddr.family = AF_INET6; | 303 | saddr.family = AF_INET6; |
291 | *(struct in6_addr *)saddr.addr.a6 = tw->tw_v6_rcv_saddr; | 304 | saddr.addr.in6 = tw->tw_v6_rcv_saddr; |
292 | daddr.family = AF_INET6; | 305 | daddr.family = AF_INET6; |
293 | *(struct in6_addr *)daddr.addr.a6 = tw->tw_v6_daddr; | 306 | daddr.addr.in6 = tw->tw_v6_daddr; |
294 | hash = ipv6_addr_hash(&tw->tw_v6_daddr); | 307 | hash = ipv6_addr_hash(&tw->tw_v6_daddr); |
295 | } | 308 | } |
296 | } | 309 | } |
@@ -299,12 +312,14 @@ static struct tcp_metrics_block *__tcp_get_metrics_tw(struct inet_timewait_sock | |||
299 | return NULL; | 312 | return NULL; |
300 | 313 | ||
301 | net = twsk_net(tw); | 314 | net = twsk_net(tw); |
302 | hash = hash_32(hash, net->ipv4.tcp_metrics_hash_log); | 315 | hash ^= net_hash_mix(net); |
316 | hash = hash_32(hash, tcp_metrics_hash_log); | ||
303 | 317 | ||
304 | for (tm = rcu_dereference(net->ipv4.tcp_metrics_hash[hash].chain); tm; | 318 | for (tm = rcu_dereference(tcp_metrics_hash[hash].chain); tm; |
305 | tm = rcu_dereference(tm->tcpm_next)) { | 319 | tm = rcu_dereference(tm->tcpm_next)) { |
306 | if (addr_same(&tm->tcpm_saddr, &saddr) && | 320 | if (addr_same(&tm->tcpm_saddr, &saddr) && |
307 | addr_same(&tm->tcpm_daddr, &daddr)) | 321 | addr_same(&tm->tcpm_daddr, &daddr) && |
322 | net_eq(tm_net(tm), net)) | ||
308 | break; | 323 | break; |
309 | } | 324 | } |
310 | return tm; | 325 | return tm; |
@@ -336,9 +351,9 @@ static struct tcp_metrics_block *tcp_get_metrics(struct sock *sk, | |||
336 | hash = (__force unsigned int) daddr.addr.a4; | 351 | hash = (__force unsigned int) daddr.addr.a4; |
337 | } else { | 352 | } else { |
338 | saddr.family = AF_INET6; | 353 | saddr.family = AF_INET6; |
339 | *(struct in6_addr *)saddr.addr.a6 = sk->sk_v6_rcv_saddr; | 354 | saddr.addr.in6 = sk->sk_v6_rcv_saddr; |
340 | daddr.family = AF_INET6; | 355 | daddr.family = AF_INET6; |
341 | *(struct in6_addr *)daddr.addr.a6 = sk->sk_v6_daddr; | 356 | daddr.addr.in6 = sk->sk_v6_daddr; |
342 | hash = ipv6_addr_hash(&sk->sk_v6_daddr); | 357 | hash = ipv6_addr_hash(&sk->sk_v6_daddr); |
343 | } | 358 | } |
344 | } | 359 | } |
@@ -347,7 +362,8 @@ static struct tcp_metrics_block *tcp_get_metrics(struct sock *sk, | |||
347 | return NULL; | 362 | return NULL; |
348 | 363 | ||
349 | net = dev_net(dst->dev); | 364 | net = dev_net(dst->dev); |
350 | hash = hash_32(hash, net->ipv4.tcp_metrics_hash_log); | 365 | hash ^= net_hash_mix(net); |
366 | hash = hash_32(hash, tcp_metrics_hash_log); | ||
351 | 367 | ||
352 | tm = __tcp_get_metrics(&saddr, &daddr, net, hash); | 368 | tm = __tcp_get_metrics(&saddr, &daddr, net, hash); |
353 | if (tm == TCP_METRICS_RECLAIM_PTR) | 369 | if (tm == TCP_METRICS_RECLAIM_PTR) |
@@ -492,7 +508,7 @@ void tcp_init_metrics(struct sock *sk) | |||
492 | struct tcp_metrics_block *tm; | 508 | struct tcp_metrics_block *tm; |
493 | u32 val, crtt = 0; /* cached RTT scaled by 8 */ | 509 | u32 val, crtt = 0; /* cached RTT scaled by 8 */ |
494 | 510 | ||
495 | if (dst == NULL) | 511 | if (!dst) |
496 | goto reset; | 512 | goto reset; |
497 | 513 | ||
498 | dst_confirm(dst); | 514 | dst_confirm(dst); |
@@ -700,6 +716,8 @@ void tcp_fastopen_cache_get(struct sock *sk, u16 *mss, | |||
700 | if (tfom->mss) | 716 | if (tfom->mss) |
701 | *mss = tfom->mss; | 717 | *mss = tfom->mss; |
702 | *cookie = tfom->cookie; | 718 | *cookie = tfom->cookie; |
719 | if (cookie->len <= 0 && tfom->try_exp == 1) | ||
720 | cookie->exp = true; | ||
703 | *syn_loss = tfom->syn_loss; | 721 | *syn_loss = tfom->syn_loss; |
704 | *last_syn_loss = *syn_loss ? tfom->last_syn_loss : 0; | 722 | *last_syn_loss = *syn_loss ? tfom->last_syn_loss : 0; |
705 | } while (read_seqretry(&fastopen_seqlock, seq)); | 723 | } while (read_seqretry(&fastopen_seqlock, seq)); |
@@ -708,7 +726,8 @@ void tcp_fastopen_cache_get(struct sock *sk, u16 *mss, | |||
708 | } | 726 | } |
709 | 727 | ||
710 | void tcp_fastopen_cache_set(struct sock *sk, u16 mss, | 728 | void tcp_fastopen_cache_set(struct sock *sk, u16 mss, |
711 | struct tcp_fastopen_cookie *cookie, bool syn_lost) | 729 | struct tcp_fastopen_cookie *cookie, bool syn_lost, |
730 | u16 try_exp) | ||
712 | { | 731 | { |
713 | struct dst_entry *dst = __sk_dst_get(sk); | 732 | struct dst_entry *dst = __sk_dst_get(sk); |
714 | struct tcp_metrics_block *tm; | 733 | struct tcp_metrics_block *tm; |
@@ -725,6 +744,9 @@ void tcp_fastopen_cache_set(struct sock *sk, u16 mss, | |||
725 | tfom->mss = mss; | 744 | tfom->mss = mss; |
726 | if (cookie && cookie->len > 0) | 745 | if (cookie && cookie->len > 0) |
727 | tfom->cookie = *cookie; | 746 | tfom->cookie = *cookie; |
747 | else if (try_exp > tfom->try_exp && | ||
748 | tfom->cookie.len <= 0 && !tfom->cookie.exp) | ||
749 | tfom->try_exp = try_exp; | ||
728 | if (syn_lost) { | 750 | if (syn_lost) { |
729 | ++tfom->syn_loss; | 751 | ++tfom->syn_loss; |
730 | tfom->last_syn_loss = jiffies; | 752 | tfom->last_syn_loss = jiffies; |
@@ -773,19 +795,19 @@ static int tcp_metrics_fill_info(struct sk_buff *msg, | |||
773 | 795 | ||
774 | switch (tm->tcpm_daddr.family) { | 796 | switch (tm->tcpm_daddr.family) { |
775 | case AF_INET: | 797 | case AF_INET: |
776 | if (nla_put_be32(msg, TCP_METRICS_ATTR_ADDR_IPV4, | 798 | if (nla_put_in_addr(msg, TCP_METRICS_ATTR_ADDR_IPV4, |
777 | tm->tcpm_daddr.addr.a4) < 0) | 799 | tm->tcpm_daddr.addr.a4) < 0) |
778 | goto nla_put_failure; | 800 | goto nla_put_failure; |
779 | if (nla_put_be32(msg, TCP_METRICS_ATTR_SADDR_IPV4, | 801 | if (nla_put_in_addr(msg, TCP_METRICS_ATTR_SADDR_IPV4, |
780 | tm->tcpm_saddr.addr.a4) < 0) | 802 | tm->tcpm_saddr.addr.a4) < 0) |
781 | goto nla_put_failure; | 803 | goto nla_put_failure; |
782 | break; | 804 | break; |
783 | case AF_INET6: | 805 | case AF_INET6: |
784 | if (nla_put(msg, TCP_METRICS_ATTR_ADDR_IPV6, 16, | 806 | if (nla_put_in6_addr(msg, TCP_METRICS_ATTR_ADDR_IPV6, |
785 | tm->tcpm_daddr.addr.a6) < 0) | 807 | &tm->tcpm_daddr.addr.in6) < 0) |
786 | goto nla_put_failure; | 808 | goto nla_put_failure; |
787 | if (nla_put(msg, TCP_METRICS_ATTR_SADDR_IPV6, 16, | 809 | if (nla_put_in6_addr(msg, TCP_METRICS_ATTR_SADDR_IPV6, |
788 | tm->tcpm_saddr.addr.a6) < 0) | 810 | &tm->tcpm_saddr.addr.in6) < 0) |
789 | goto nla_put_failure; | 811 | goto nla_put_failure; |
790 | break; | 812 | break; |
791 | default: | 813 | default: |
@@ -898,17 +920,19 @@ static int tcp_metrics_nl_dump(struct sk_buff *skb, | |||
898 | struct netlink_callback *cb) | 920 | struct netlink_callback *cb) |
899 | { | 921 | { |
900 | struct net *net = sock_net(skb->sk); | 922 | struct net *net = sock_net(skb->sk); |
901 | unsigned int max_rows = 1U << net->ipv4.tcp_metrics_hash_log; | 923 | unsigned int max_rows = 1U << tcp_metrics_hash_log; |
902 | unsigned int row, s_row = cb->args[0]; | 924 | unsigned int row, s_row = cb->args[0]; |
903 | int s_col = cb->args[1], col = s_col; | 925 | int s_col = cb->args[1], col = s_col; |
904 | 926 | ||
905 | for (row = s_row; row < max_rows; row++, s_col = 0) { | 927 | for (row = s_row; row < max_rows; row++, s_col = 0) { |
906 | struct tcp_metrics_block *tm; | 928 | struct tcp_metrics_block *tm; |
907 | struct tcpm_hash_bucket *hb = net->ipv4.tcp_metrics_hash + row; | 929 | struct tcpm_hash_bucket *hb = tcp_metrics_hash + row; |
908 | 930 | ||
909 | rcu_read_lock(); | 931 | rcu_read_lock(); |
910 | for (col = 0, tm = rcu_dereference(hb->chain); tm; | 932 | for (col = 0, tm = rcu_dereference(hb->chain); tm; |
911 | tm = rcu_dereference(tm->tcpm_next), col++) { | 933 | tm = rcu_dereference(tm->tcpm_next), col++) { |
934 | if (!net_eq(tm_net(tm), net)) | ||
935 | continue; | ||
912 | if (col < s_col) | 936 | if (col < s_col) |
913 | continue; | 937 | continue; |
914 | if (tcp_metrics_dump_info(skb, cb, tm) < 0) { | 938 | if (tcp_metrics_dump_info(skb, cb, tm) < 0) { |
@@ -933,7 +957,7 @@ static int __parse_nl_addr(struct genl_info *info, struct inetpeer_addr *addr, | |||
933 | a = info->attrs[v4]; | 957 | a = info->attrs[v4]; |
934 | if (a) { | 958 | if (a) { |
935 | addr->family = AF_INET; | 959 | addr->family = AF_INET; |
936 | addr->addr.a4 = nla_get_be32(a); | 960 | addr->addr.a4 = nla_get_in_addr(a); |
937 | if (hash) | 961 | if (hash) |
938 | *hash = (__force unsigned int) addr->addr.a4; | 962 | *hash = (__force unsigned int) addr->addr.a4; |
939 | return 0; | 963 | return 0; |
@@ -943,9 +967,9 @@ static int __parse_nl_addr(struct genl_info *info, struct inetpeer_addr *addr, | |||
943 | if (nla_len(a) != sizeof(struct in6_addr)) | 967 | if (nla_len(a) != sizeof(struct in6_addr)) |
944 | return -EINVAL; | 968 | return -EINVAL; |
945 | addr->family = AF_INET6; | 969 | addr->family = AF_INET6; |
946 | memcpy(addr->addr.a6, nla_data(a), sizeof(addr->addr.a6)); | 970 | addr->addr.in6 = nla_get_in6_addr(a); |
947 | if (hash) | 971 | if (hash) |
948 | *hash = ipv6_addr_hash((struct in6_addr *) addr->addr.a6); | 972 | *hash = ipv6_addr_hash(&addr->addr.in6); |
949 | return 0; | 973 | return 0; |
950 | } | 974 | } |
951 | return optional ? 1 : -EAFNOSUPPORT; | 975 | return optional ? 1 : -EAFNOSUPPORT; |
@@ -994,13 +1018,15 @@ static int tcp_metrics_nl_cmd_get(struct sk_buff *skb, struct genl_info *info) | |||
994 | if (!reply) | 1018 | if (!reply) |
995 | goto nla_put_failure; | 1019 | goto nla_put_failure; |
996 | 1020 | ||
997 | hash = hash_32(hash, net->ipv4.tcp_metrics_hash_log); | 1021 | hash ^= net_hash_mix(net); |
1022 | hash = hash_32(hash, tcp_metrics_hash_log); | ||
998 | ret = -ESRCH; | 1023 | ret = -ESRCH; |
999 | rcu_read_lock(); | 1024 | rcu_read_lock(); |
1000 | for (tm = rcu_dereference(net->ipv4.tcp_metrics_hash[hash].chain); tm; | 1025 | for (tm = rcu_dereference(tcp_metrics_hash[hash].chain); tm; |
1001 | tm = rcu_dereference(tm->tcpm_next)) { | 1026 | tm = rcu_dereference(tm->tcpm_next)) { |
1002 | if (addr_same(&tm->tcpm_daddr, &daddr) && | 1027 | if (addr_same(&tm->tcpm_daddr, &daddr) && |
1003 | (!src || addr_same(&tm->tcpm_saddr, &saddr))) { | 1028 | (!src || addr_same(&tm->tcpm_saddr, &saddr)) && |
1029 | net_eq(tm_net(tm), net)) { | ||
1004 | ret = tcp_metrics_fill_info(msg, tm); | 1030 | ret = tcp_metrics_fill_info(msg, tm); |
1005 | break; | 1031 | break; |
1006 | } | 1032 | } |
@@ -1020,34 +1046,27 @@ out_free: | |||
1020 | return ret; | 1046 | return ret; |
1021 | } | 1047 | } |
1022 | 1048 | ||
1023 | #define deref_locked_genl(p) \ | 1049 | static void tcp_metrics_flush_all(struct net *net) |
1024 | rcu_dereference_protected(p, lockdep_genl_is_held() && \ | ||
1025 | lockdep_is_held(&tcp_metrics_lock)) | ||
1026 | |||
1027 | #define deref_genl(p) rcu_dereference_protected(p, lockdep_genl_is_held()) | ||
1028 | |||
1029 | static int tcp_metrics_flush_all(struct net *net) | ||
1030 | { | 1050 | { |
1031 | unsigned int max_rows = 1U << net->ipv4.tcp_metrics_hash_log; | 1051 | unsigned int max_rows = 1U << tcp_metrics_hash_log; |
1032 | struct tcpm_hash_bucket *hb = net->ipv4.tcp_metrics_hash; | 1052 | struct tcpm_hash_bucket *hb = tcp_metrics_hash; |
1033 | struct tcp_metrics_block *tm; | 1053 | struct tcp_metrics_block *tm; |
1034 | unsigned int row; | 1054 | unsigned int row; |
1035 | 1055 | ||
1036 | for (row = 0; row < max_rows; row++, hb++) { | 1056 | for (row = 0; row < max_rows; row++, hb++) { |
1057 | struct tcp_metrics_block __rcu **pp; | ||
1037 | spin_lock_bh(&tcp_metrics_lock); | 1058 | spin_lock_bh(&tcp_metrics_lock); |
1038 | tm = deref_locked_genl(hb->chain); | 1059 | pp = &hb->chain; |
1039 | if (tm) | 1060 | for (tm = deref_locked(*pp); tm; tm = deref_locked(*pp)) { |
1040 | hb->chain = NULL; | 1061 | if (net_eq(tm_net(tm), net)) { |
1041 | spin_unlock_bh(&tcp_metrics_lock); | 1062 | *pp = tm->tcpm_next; |
1042 | while (tm) { | 1063 | kfree_rcu(tm, rcu_head); |
1043 | struct tcp_metrics_block *next; | 1064 | } else { |
1044 | 1065 | pp = &tm->tcpm_next; | |
1045 | next = deref_genl(tm->tcpm_next); | 1066 | } |
1046 | kfree_rcu(tm, rcu_head); | ||
1047 | tm = next; | ||
1048 | } | 1067 | } |
1068 | spin_unlock_bh(&tcp_metrics_lock); | ||
1049 | } | 1069 | } |
1050 | return 0; | ||
1051 | } | 1070 | } |
1052 | 1071 | ||
1053 | static int tcp_metrics_nl_cmd_del(struct sk_buff *skb, struct genl_info *info) | 1072 | static int tcp_metrics_nl_cmd_del(struct sk_buff *skb, struct genl_info *info) |
@@ -1064,19 +1083,23 @@ static int tcp_metrics_nl_cmd_del(struct sk_buff *skb, struct genl_info *info) | |||
1064 | ret = parse_nl_addr(info, &daddr, &hash, 1); | 1083 | ret = parse_nl_addr(info, &daddr, &hash, 1); |
1065 | if (ret < 0) | 1084 | if (ret < 0) |
1066 | return ret; | 1085 | return ret; |
1067 | if (ret > 0) | 1086 | if (ret > 0) { |
1068 | return tcp_metrics_flush_all(net); | 1087 | tcp_metrics_flush_all(net); |
1088 | return 0; | ||
1089 | } | ||
1069 | ret = parse_nl_saddr(info, &saddr); | 1090 | ret = parse_nl_saddr(info, &saddr); |
1070 | if (ret < 0) | 1091 | if (ret < 0) |
1071 | src = false; | 1092 | src = false; |
1072 | 1093 | ||
1073 | hash = hash_32(hash, net->ipv4.tcp_metrics_hash_log); | 1094 | hash ^= net_hash_mix(net); |
1074 | hb = net->ipv4.tcp_metrics_hash + hash; | 1095 | hash = hash_32(hash, tcp_metrics_hash_log); |
1096 | hb = tcp_metrics_hash + hash; | ||
1075 | pp = &hb->chain; | 1097 | pp = &hb->chain; |
1076 | spin_lock_bh(&tcp_metrics_lock); | 1098 | spin_lock_bh(&tcp_metrics_lock); |
1077 | for (tm = deref_locked_genl(*pp); tm; tm = deref_locked_genl(*pp)) { | 1099 | for (tm = deref_locked(*pp); tm; tm = deref_locked(*pp)) { |
1078 | if (addr_same(&tm->tcpm_daddr, &daddr) && | 1100 | if (addr_same(&tm->tcpm_daddr, &daddr) && |
1079 | (!src || addr_same(&tm->tcpm_saddr, &saddr))) { | 1101 | (!src || addr_same(&tm->tcpm_saddr, &saddr)) && |
1102 | net_eq(tm_net(tm), net)) { | ||
1080 | *pp = tm->tcpm_next; | 1103 | *pp = tm->tcpm_next; |
1081 | kfree_rcu(tm, rcu_head); | 1104 | kfree_rcu(tm, rcu_head); |
1082 | found = true; | 1105 | found = true; |
@@ -1126,6 +1149,9 @@ static int __net_init tcp_net_metrics_init(struct net *net) | |||
1126 | size_t size; | 1149 | size_t size; |
1127 | unsigned int slots; | 1150 | unsigned int slots; |
1128 | 1151 | ||
1152 | if (!net_eq(net, &init_net)) | ||
1153 | return 0; | ||
1154 | |||
1129 | slots = tcpmhash_entries; | 1155 | slots = tcpmhash_entries; |
1130 | if (!slots) { | 1156 | if (!slots) { |
1131 | if (totalram_pages >= 128 * 1024) | 1157 | if (totalram_pages >= 128 * 1024) |
@@ -1134,14 +1160,14 @@ static int __net_init tcp_net_metrics_init(struct net *net) | |||
1134 | slots = 8 * 1024; | 1160 | slots = 8 * 1024; |
1135 | } | 1161 | } |
1136 | 1162 | ||
1137 | net->ipv4.tcp_metrics_hash_log = order_base_2(slots); | 1163 | tcp_metrics_hash_log = order_base_2(slots); |
1138 | size = sizeof(struct tcpm_hash_bucket) << net->ipv4.tcp_metrics_hash_log; | 1164 | size = sizeof(struct tcpm_hash_bucket) << tcp_metrics_hash_log; |
1139 | 1165 | ||
1140 | net->ipv4.tcp_metrics_hash = kzalloc(size, GFP_KERNEL | __GFP_NOWARN); | 1166 | tcp_metrics_hash = kzalloc(size, GFP_KERNEL | __GFP_NOWARN); |
1141 | if (!net->ipv4.tcp_metrics_hash) | 1167 | if (!tcp_metrics_hash) |
1142 | net->ipv4.tcp_metrics_hash = vzalloc(size); | 1168 | tcp_metrics_hash = vzalloc(size); |
1143 | 1169 | ||
1144 | if (!net->ipv4.tcp_metrics_hash) | 1170 | if (!tcp_metrics_hash) |
1145 | return -ENOMEM; | 1171 | return -ENOMEM; |
1146 | 1172 | ||
1147 | return 0; | 1173 | return 0; |
@@ -1149,19 +1175,7 @@ static int __net_init tcp_net_metrics_init(struct net *net) | |||
1149 | 1175 | ||
1150 | static void __net_exit tcp_net_metrics_exit(struct net *net) | 1176 | static void __net_exit tcp_net_metrics_exit(struct net *net) |
1151 | { | 1177 | { |
1152 | unsigned int i; | 1178 | tcp_metrics_flush_all(net); |
1153 | |||
1154 | for (i = 0; i < (1U << net->ipv4.tcp_metrics_hash_log) ; i++) { | ||
1155 | struct tcp_metrics_block *tm, *next; | ||
1156 | |||
1157 | tm = rcu_dereference_protected(net->ipv4.tcp_metrics_hash[i].chain, 1); | ||
1158 | while (tm) { | ||
1159 | next = rcu_dereference_protected(tm->tcpm_next, 1); | ||
1160 | kfree(tm); | ||
1161 | tm = next; | ||
1162 | } | ||
1163 | } | ||
1164 | kvfree(net->ipv4.tcp_metrics_hash); | ||
1165 | } | 1179 | } |
1166 | 1180 | ||
1167 | static __net_initdata struct pernet_operations tcp_net_metrics_ops = { | 1181 | static __net_initdata struct pernet_operations tcp_net_metrics_ops = { |
@@ -1175,16 +1189,10 @@ void __init tcp_metrics_init(void) | |||
1175 | 1189 | ||
1176 | ret = register_pernet_subsys(&tcp_net_metrics_ops); | 1190 | ret = register_pernet_subsys(&tcp_net_metrics_ops); |
1177 | if (ret < 0) | 1191 | if (ret < 0) |
1178 | goto cleanup; | 1192 | panic("Could not allocate the tcp_metrics hash table\n"); |
1193 | |||
1179 | ret = genl_register_family_with_ops(&tcp_metrics_nl_family, | 1194 | ret = genl_register_family_with_ops(&tcp_metrics_nl_family, |
1180 | tcp_metrics_nl_ops); | 1195 | tcp_metrics_nl_ops); |
1181 | if (ret < 0) | 1196 | if (ret < 0) |
1182 | goto cleanup_subsys; | 1197 | panic("Could not register tcp_metrics generic netlink\n"); |
1183 | return; | ||
1184 | |||
1185 | cleanup_subsys: | ||
1186 | unregister_pernet_subsys(&tcp_net_metrics_ops); | ||
1187 | |||
1188 | cleanup: | ||
1189 | return; | ||
1190 | } | 1198 | } |