aboutsummaryrefslogtreecommitdiffstats
path: root/net/ipv4/tcp_metrics.c
diff options
context:
space:
mode:
Diffstat (limited to 'net/ipv4/tcp_metrics.c')
-rw-r--r--net/ipv4/tcp_metrics.c208
1 files changed, 108 insertions, 100 deletions
diff --git a/net/ipv4/tcp_metrics.c b/net/ipv4/tcp_metrics.c
index e5f41bd5ec1b..a51d63a43e33 100644
--- a/net/ipv4/tcp_metrics.c
+++ b/net/ipv4/tcp_metrics.c
@@ -28,7 +28,8 @@ static struct tcp_metrics_block *__tcp_get_metrics(const struct inetpeer_addr *s
28 28
29struct tcp_fastopen_metrics { 29struct tcp_fastopen_metrics {
30 u16 mss; 30 u16 mss;
31 u16 syn_loss:10; /* Recurring Fast Open SYN losses */ 31 u16 syn_loss:10, /* Recurring Fast Open SYN losses */
32 try_exp:2; /* Request w/ exp. option (once) */
32 unsigned long last_syn_loss; /* Last Fast Open SYN loss */ 33 unsigned long last_syn_loss; /* Last Fast Open SYN loss */
33 struct tcp_fastopen_cookie cookie; 34 struct tcp_fastopen_cookie cookie;
34}; 35};
@@ -40,6 +41,7 @@ struct tcp_fastopen_metrics {
40 41
41struct tcp_metrics_block { 42struct tcp_metrics_block {
42 struct tcp_metrics_block __rcu *tcpm_next; 43 struct tcp_metrics_block __rcu *tcpm_next;
44 possible_net_t tcpm_net;
43 struct inetpeer_addr tcpm_saddr; 45 struct inetpeer_addr tcpm_saddr;
44 struct inetpeer_addr tcpm_daddr; 46 struct inetpeer_addr tcpm_daddr;
45 unsigned long tcpm_stamp; 47 unsigned long tcpm_stamp;
@@ -52,6 +54,11 @@ struct tcp_metrics_block {
52 struct rcu_head rcu_head; 54 struct rcu_head rcu_head;
53}; 55};
54 56
57static inline struct net *tm_net(struct tcp_metrics_block *tm)
58{
59 return read_pnet(&tm->tcpm_net);
60}
61
55static bool tcp_metric_locked(struct tcp_metrics_block *tm, 62static bool tcp_metric_locked(struct tcp_metrics_block *tm,
56 enum tcp_metric_index idx) 63 enum tcp_metric_index idx)
57{ 64{
@@ -74,23 +81,20 @@ static void tcp_metric_set(struct tcp_metrics_block *tm,
74static bool addr_same(const struct inetpeer_addr *a, 81static bool addr_same(const struct inetpeer_addr *a,
75 const struct inetpeer_addr *b) 82 const struct inetpeer_addr *b)
76{ 83{
77 const struct in6_addr *a6, *b6;
78
79 if (a->family != b->family) 84 if (a->family != b->family)
80 return false; 85 return false;
81 if (a->family == AF_INET) 86 if (a->family == AF_INET)
82 return a->addr.a4 == b->addr.a4; 87 return a->addr.a4 == b->addr.a4;
83 88 return ipv6_addr_equal(&a->addr.in6, &b->addr.in6);
84 a6 = (const struct in6_addr *) &a->addr.a6[0];
85 b6 = (const struct in6_addr *) &b->addr.a6[0];
86
87 return ipv6_addr_equal(a6, b6);
88} 89}
89 90
90struct tcpm_hash_bucket { 91struct tcpm_hash_bucket {
91 struct tcp_metrics_block __rcu *chain; 92 struct tcp_metrics_block __rcu *chain;
92}; 93};
93 94
95static struct tcpm_hash_bucket *tcp_metrics_hash __read_mostly;
96static unsigned int tcp_metrics_hash_log __read_mostly;
97
94static DEFINE_SPINLOCK(tcp_metrics_lock); 98static DEFINE_SPINLOCK(tcp_metrics_lock);
95 99
96static void tcpm_suck_dst(struct tcp_metrics_block *tm, 100static void tcpm_suck_dst(struct tcp_metrics_block *tm,
@@ -128,6 +132,8 @@ static void tcpm_suck_dst(struct tcp_metrics_block *tm,
128 if (fastopen_clear) { 132 if (fastopen_clear) {
129 tm->tcpm_fastopen.mss = 0; 133 tm->tcpm_fastopen.mss = 0;
130 tm->tcpm_fastopen.syn_loss = 0; 134 tm->tcpm_fastopen.syn_loss = 0;
135 tm->tcpm_fastopen.try_exp = 0;
136 tm->tcpm_fastopen.cookie.exp = false;
131 tm->tcpm_fastopen.cookie.len = 0; 137 tm->tcpm_fastopen.cookie.len = 0;
132 } 138 }
133} 139}
@@ -143,6 +149,9 @@ static void tcpm_check_stamp(struct tcp_metrics_block *tm, struct dst_entry *dst
143#define TCP_METRICS_RECLAIM_DEPTH 5 149#define TCP_METRICS_RECLAIM_DEPTH 5
144#define TCP_METRICS_RECLAIM_PTR (struct tcp_metrics_block *) 0x1UL 150#define TCP_METRICS_RECLAIM_PTR (struct tcp_metrics_block *) 0x1UL
145 151
152#define deref_locked(p) \
153 rcu_dereference_protected(p, lockdep_is_held(&tcp_metrics_lock))
154
146static struct tcp_metrics_block *tcpm_new(struct dst_entry *dst, 155static struct tcp_metrics_block *tcpm_new(struct dst_entry *dst,
147 struct inetpeer_addr *saddr, 156 struct inetpeer_addr *saddr,
148 struct inetpeer_addr *daddr, 157 struct inetpeer_addr *daddr,
@@ -171,9 +180,9 @@ static struct tcp_metrics_block *tcpm_new(struct dst_entry *dst,
171 if (unlikely(reclaim)) { 180 if (unlikely(reclaim)) {
172 struct tcp_metrics_block *oldest; 181 struct tcp_metrics_block *oldest;
173 182
174 oldest = rcu_dereference(net->ipv4.tcp_metrics_hash[hash].chain); 183 oldest = deref_locked(tcp_metrics_hash[hash].chain);
175 for (tm = rcu_dereference(oldest->tcpm_next); tm; 184 for (tm = deref_locked(oldest->tcpm_next); tm;
176 tm = rcu_dereference(tm->tcpm_next)) { 185 tm = deref_locked(tm->tcpm_next)) {
177 if (time_before(tm->tcpm_stamp, oldest->tcpm_stamp)) 186 if (time_before(tm->tcpm_stamp, oldest->tcpm_stamp))
178 oldest = tm; 187 oldest = tm;
179 } 188 }
@@ -183,14 +192,15 @@ static struct tcp_metrics_block *tcpm_new(struct dst_entry *dst,
183 if (!tm) 192 if (!tm)
184 goto out_unlock; 193 goto out_unlock;
185 } 194 }
195 write_pnet(&tm->tcpm_net, net);
186 tm->tcpm_saddr = *saddr; 196 tm->tcpm_saddr = *saddr;
187 tm->tcpm_daddr = *daddr; 197 tm->tcpm_daddr = *daddr;
188 198
189 tcpm_suck_dst(tm, dst, true); 199 tcpm_suck_dst(tm, dst, true);
190 200
191 if (likely(!reclaim)) { 201 if (likely(!reclaim)) {
192 tm->tcpm_next = net->ipv4.tcp_metrics_hash[hash].chain; 202 tm->tcpm_next = tcp_metrics_hash[hash].chain;
193 rcu_assign_pointer(net->ipv4.tcp_metrics_hash[hash].chain, tm); 203 rcu_assign_pointer(tcp_metrics_hash[hash].chain, tm);
194 } 204 }
195 205
196out_unlock: 206out_unlock:
@@ -214,10 +224,11 @@ static struct tcp_metrics_block *__tcp_get_metrics(const struct inetpeer_addr *s
214 struct tcp_metrics_block *tm; 224 struct tcp_metrics_block *tm;
215 int depth = 0; 225 int depth = 0;
216 226
217 for (tm = rcu_dereference(net->ipv4.tcp_metrics_hash[hash].chain); tm; 227 for (tm = rcu_dereference(tcp_metrics_hash[hash].chain); tm;
218 tm = rcu_dereference(tm->tcpm_next)) { 228 tm = rcu_dereference(tm->tcpm_next)) {
219 if (addr_same(&tm->tcpm_saddr, saddr) && 229 if (addr_same(&tm->tcpm_saddr, saddr) &&
220 addr_same(&tm->tcpm_daddr, daddr)) 230 addr_same(&tm->tcpm_daddr, daddr) &&
231 net_eq(tm_net(tm), net))
221 break; 232 break;
222 depth++; 233 depth++;
223 } 234 }
@@ -242,8 +253,8 @@ static struct tcp_metrics_block *__tcp_get_metrics_req(struct request_sock *req,
242 break; 253 break;
243#if IS_ENABLED(CONFIG_IPV6) 254#if IS_ENABLED(CONFIG_IPV6)
244 case AF_INET6: 255 case AF_INET6:
245 *(struct in6_addr *)saddr.addr.a6 = inet_rsk(req)->ir_v6_loc_addr; 256 saddr.addr.in6 = inet_rsk(req)->ir_v6_loc_addr;
246 *(struct in6_addr *)daddr.addr.a6 = inet_rsk(req)->ir_v6_rmt_addr; 257 daddr.addr.in6 = inet_rsk(req)->ir_v6_rmt_addr;
247 hash = ipv6_addr_hash(&inet_rsk(req)->ir_v6_rmt_addr); 258 hash = ipv6_addr_hash(&inet_rsk(req)->ir_v6_rmt_addr);
248 break; 259 break;
249#endif 260#endif
@@ -252,12 +263,14 @@ static struct tcp_metrics_block *__tcp_get_metrics_req(struct request_sock *req,
252 } 263 }
253 264
254 net = dev_net(dst->dev); 265 net = dev_net(dst->dev);
255 hash = hash_32(hash, net->ipv4.tcp_metrics_hash_log); 266 hash ^= net_hash_mix(net);
267 hash = hash_32(hash, tcp_metrics_hash_log);
256 268
257 for (tm = rcu_dereference(net->ipv4.tcp_metrics_hash[hash].chain); tm; 269 for (tm = rcu_dereference(tcp_metrics_hash[hash].chain); tm;
258 tm = rcu_dereference(tm->tcpm_next)) { 270 tm = rcu_dereference(tm->tcpm_next)) {
259 if (addr_same(&tm->tcpm_saddr, &saddr) && 271 if (addr_same(&tm->tcpm_saddr, &saddr) &&
260 addr_same(&tm->tcpm_daddr, &daddr)) 272 addr_same(&tm->tcpm_daddr, &daddr) &&
273 net_eq(tm_net(tm), net))
261 break; 274 break;
262 } 275 }
263 tcpm_check_stamp(tm, dst); 276 tcpm_check_stamp(tm, dst);
@@ -288,9 +301,9 @@ static struct tcp_metrics_block *__tcp_get_metrics_tw(struct inet_timewait_sock
288 hash = (__force unsigned int) daddr.addr.a4; 301 hash = (__force unsigned int) daddr.addr.a4;
289 } else { 302 } else {
290 saddr.family = AF_INET6; 303 saddr.family = AF_INET6;
291 *(struct in6_addr *)saddr.addr.a6 = tw->tw_v6_rcv_saddr; 304 saddr.addr.in6 = tw->tw_v6_rcv_saddr;
292 daddr.family = AF_INET6; 305 daddr.family = AF_INET6;
293 *(struct in6_addr *)daddr.addr.a6 = tw->tw_v6_daddr; 306 daddr.addr.in6 = tw->tw_v6_daddr;
294 hash = ipv6_addr_hash(&tw->tw_v6_daddr); 307 hash = ipv6_addr_hash(&tw->tw_v6_daddr);
295 } 308 }
296 } 309 }
@@ -299,12 +312,14 @@ static struct tcp_metrics_block *__tcp_get_metrics_tw(struct inet_timewait_sock
299 return NULL; 312 return NULL;
300 313
301 net = twsk_net(tw); 314 net = twsk_net(tw);
302 hash = hash_32(hash, net->ipv4.tcp_metrics_hash_log); 315 hash ^= net_hash_mix(net);
316 hash = hash_32(hash, tcp_metrics_hash_log);
303 317
304 for (tm = rcu_dereference(net->ipv4.tcp_metrics_hash[hash].chain); tm; 318 for (tm = rcu_dereference(tcp_metrics_hash[hash].chain); tm;
305 tm = rcu_dereference(tm->tcpm_next)) { 319 tm = rcu_dereference(tm->tcpm_next)) {
306 if (addr_same(&tm->tcpm_saddr, &saddr) && 320 if (addr_same(&tm->tcpm_saddr, &saddr) &&
307 addr_same(&tm->tcpm_daddr, &daddr)) 321 addr_same(&tm->tcpm_daddr, &daddr) &&
322 net_eq(tm_net(tm), net))
308 break; 323 break;
309 } 324 }
310 return tm; 325 return tm;
@@ -336,9 +351,9 @@ static struct tcp_metrics_block *tcp_get_metrics(struct sock *sk,
336 hash = (__force unsigned int) daddr.addr.a4; 351 hash = (__force unsigned int) daddr.addr.a4;
337 } else { 352 } else {
338 saddr.family = AF_INET6; 353 saddr.family = AF_INET6;
339 *(struct in6_addr *)saddr.addr.a6 = sk->sk_v6_rcv_saddr; 354 saddr.addr.in6 = sk->sk_v6_rcv_saddr;
340 daddr.family = AF_INET6; 355 daddr.family = AF_INET6;
341 *(struct in6_addr *)daddr.addr.a6 = sk->sk_v6_daddr; 356 daddr.addr.in6 = sk->sk_v6_daddr;
342 hash = ipv6_addr_hash(&sk->sk_v6_daddr); 357 hash = ipv6_addr_hash(&sk->sk_v6_daddr);
343 } 358 }
344 } 359 }
@@ -347,7 +362,8 @@ static struct tcp_metrics_block *tcp_get_metrics(struct sock *sk,
347 return NULL; 362 return NULL;
348 363
349 net = dev_net(dst->dev); 364 net = dev_net(dst->dev);
350 hash = hash_32(hash, net->ipv4.tcp_metrics_hash_log); 365 hash ^= net_hash_mix(net);
366 hash = hash_32(hash, tcp_metrics_hash_log);
351 367
352 tm = __tcp_get_metrics(&saddr, &daddr, net, hash); 368 tm = __tcp_get_metrics(&saddr, &daddr, net, hash);
353 if (tm == TCP_METRICS_RECLAIM_PTR) 369 if (tm == TCP_METRICS_RECLAIM_PTR)
@@ -492,7 +508,7 @@ void tcp_init_metrics(struct sock *sk)
492 struct tcp_metrics_block *tm; 508 struct tcp_metrics_block *tm;
493 u32 val, crtt = 0; /* cached RTT scaled by 8 */ 509 u32 val, crtt = 0; /* cached RTT scaled by 8 */
494 510
495 if (dst == NULL) 511 if (!dst)
496 goto reset; 512 goto reset;
497 513
498 dst_confirm(dst); 514 dst_confirm(dst);
@@ -700,6 +716,8 @@ void tcp_fastopen_cache_get(struct sock *sk, u16 *mss,
700 if (tfom->mss) 716 if (tfom->mss)
701 *mss = tfom->mss; 717 *mss = tfom->mss;
702 *cookie = tfom->cookie; 718 *cookie = tfom->cookie;
719 if (cookie->len <= 0 && tfom->try_exp == 1)
720 cookie->exp = true;
703 *syn_loss = tfom->syn_loss; 721 *syn_loss = tfom->syn_loss;
704 *last_syn_loss = *syn_loss ? tfom->last_syn_loss : 0; 722 *last_syn_loss = *syn_loss ? tfom->last_syn_loss : 0;
705 } while (read_seqretry(&fastopen_seqlock, seq)); 723 } while (read_seqretry(&fastopen_seqlock, seq));
@@ -708,7 +726,8 @@ void tcp_fastopen_cache_get(struct sock *sk, u16 *mss,
708} 726}
709 727
710void tcp_fastopen_cache_set(struct sock *sk, u16 mss, 728void tcp_fastopen_cache_set(struct sock *sk, u16 mss,
711 struct tcp_fastopen_cookie *cookie, bool syn_lost) 729 struct tcp_fastopen_cookie *cookie, bool syn_lost,
730 u16 try_exp)
712{ 731{
713 struct dst_entry *dst = __sk_dst_get(sk); 732 struct dst_entry *dst = __sk_dst_get(sk);
714 struct tcp_metrics_block *tm; 733 struct tcp_metrics_block *tm;
@@ -725,6 +744,9 @@ void tcp_fastopen_cache_set(struct sock *sk, u16 mss,
725 tfom->mss = mss; 744 tfom->mss = mss;
726 if (cookie && cookie->len > 0) 745 if (cookie && cookie->len > 0)
727 tfom->cookie = *cookie; 746 tfom->cookie = *cookie;
747 else if (try_exp > tfom->try_exp &&
748 tfom->cookie.len <= 0 && !tfom->cookie.exp)
749 tfom->try_exp = try_exp;
728 if (syn_lost) { 750 if (syn_lost) {
729 ++tfom->syn_loss; 751 ++tfom->syn_loss;
730 tfom->last_syn_loss = jiffies; 752 tfom->last_syn_loss = jiffies;
@@ -773,19 +795,19 @@ static int tcp_metrics_fill_info(struct sk_buff *msg,
773 795
774 switch (tm->tcpm_daddr.family) { 796 switch (tm->tcpm_daddr.family) {
775 case AF_INET: 797 case AF_INET:
776 if (nla_put_be32(msg, TCP_METRICS_ATTR_ADDR_IPV4, 798 if (nla_put_in_addr(msg, TCP_METRICS_ATTR_ADDR_IPV4,
777 tm->tcpm_daddr.addr.a4) < 0) 799 tm->tcpm_daddr.addr.a4) < 0)
778 goto nla_put_failure; 800 goto nla_put_failure;
779 if (nla_put_be32(msg, TCP_METRICS_ATTR_SADDR_IPV4, 801 if (nla_put_in_addr(msg, TCP_METRICS_ATTR_SADDR_IPV4,
780 tm->tcpm_saddr.addr.a4) < 0) 802 tm->tcpm_saddr.addr.a4) < 0)
781 goto nla_put_failure; 803 goto nla_put_failure;
782 break; 804 break;
783 case AF_INET6: 805 case AF_INET6:
784 if (nla_put(msg, TCP_METRICS_ATTR_ADDR_IPV6, 16, 806 if (nla_put_in6_addr(msg, TCP_METRICS_ATTR_ADDR_IPV6,
785 tm->tcpm_daddr.addr.a6) < 0) 807 &tm->tcpm_daddr.addr.in6) < 0)
786 goto nla_put_failure; 808 goto nla_put_failure;
787 if (nla_put(msg, TCP_METRICS_ATTR_SADDR_IPV6, 16, 809 if (nla_put_in6_addr(msg, TCP_METRICS_ATTR_SADDR_IPV6,
788 tm->tcpm_saddr.addr.a6) < 0) 810 &tm->tcpm_saddr.addr.in6) < 0)
789 goto nla_put_failure; 811 goto nla_put_failure;
790 break; 812 break;
791 default: 813 default:
@@ -898,17 +920,19 @@ static int tcp_metrics_nl_dump(struct sk_buff *skb,
898 struct netlink_callback *cb) 920 struct netlink_callback *cb)
899{ 921{
900 struct net *net = sock_net(skb->sk); 922 struct net *net = sock_net(skb->sk);
901 unsigned int max_rows = 1U << net->ipv4.tcp_metrics_hash_log; 923 unsigned int max_rows = 1U << tcp_metrics_hash_log;
902 unsigned int row, s_row = cb->args[0]; 924 unsigned int row, s_row = cb->args[0];
903 int s_col = cb->args[1], col = s_col; 925 int s_col = cb->args[1], col = s_col;
904 926
905 for (row = s_row; row < max_rows; row++, s_col = 0) { 927 for (row = s_row; row < max_rows; row++, s_col = 0) {
906 struct tcp_metrics_block *tm; 928 struct tcp_metrics_block *tm;
907 struct tcpm_hash_bucket *hb = net->ipv4.tcp_metrics_hash + row; 929 struct tcpm_hash_bucket *hb = tcp_metrics_hash + row;
908 930
909 rcu_read_lock(); 931 rcu_read_lock();
910 for (col = 0, tm = rcu_dereference(hb->chain); tm; 932 for (col = 0, tm = rcu_dereference(hb->chain); tm;
911 tm = rcu_dereference(tm->tcpm_next), col++) { 933 tm = rcu_dereference(tm->tcpm_next), col++) {
934 if (!net_eq(tm_net(tm), net))
935 continue;
912 if (col < s_col) 936 if (col < s_col)
913 continue; 937 continue;
914 if (tcp_metrics_dump_info(skb, cb, tm) < 0) { 938 if (tcp_metrics_dump_info(skb, cb, tm) < 0) {
@@ -933,7 +957,7 @@ static int __parse_nl_addr(struct genl_info *info, struct inetpeer_addr *addr,
933 a = info->attrs[v4]; 957 a = info->attrs[v4];
934 if (a) { 958 if (a) {
935 addr->family = AF_INET; 959 addr->family = AF_INET;
936 addr->addr.a4 = nla_get_be32(a); 960 addr->addr.a4 = nla_get_in_addr(a);
937 if (hash) 961 if (hash)
938 *hash = (__force unsigned int) addr->addr.a4; 962 *hash = (__force unsigned int) addr->addr.a4;
939 return 0; 963 return 0;
@@ -943,9 +967,9 @@ static int __parse_nl_addr(struct genl_info *info, struct inetpeer_addr *addr,
943 if (nla_len(a) != sizeof(struct in6_addr)) 967 if (nla_len(a) != sizeof(struct in6_addr))
944 return -EINVAL; 968 return -EINVAL;
945 addr->family = AF_INET6; 969 addr->family = AF_INET6;
946 memcpy(addr->addr.a6, nla_data(a), sizeof(addr->addr.a6)); 970 addr->addr.in6 = nla_get_in6_addr(a);
947 if (hash) 971 if (hash)
948 *hash = ipv6_addr_hash((struct in6_addr *) addr->addr.a6); 972 *hash = ipv6_addr_hash(&addr->addr.in6);
949 return 0; 973 return 0;
950 } 974 }
951 return optional ? 1 : -EAFNOSUPPORT; 975 return optional ? 1 : -EAFNOSUPPORT;
@@ -994,13 +1018,15 @@ static int tcp_metrics_nl_cmd_get(struct sk_buff *skb, struct genl_info *info)
994 if (!reply) 1018 if (!reply)
995 goto nla_put_failure; 1019 goto nla_put_failure;
996 1020
997 hash = hash_32(hash, net->ipv4.tcp_metrics_hash_log); 1021 hash ^= net_hash_mix(net);
1022 hash = hash_32(hash, tcp_metrics_hash_log);
998 ret = -ESRCH; 1023 ret = -ESRCH;
999 rcu_read_lock(); 1024 rcu_read_lock();
1000 for (tm = rcu_dereference(net->ipv4.tcp_metrics_hash[hash].chain); tm; 1025 for (tm = rcu_dereference(tcp_metrics_hash[hash].chain); tm;
1001 tm = rcu_dereference(tm->tcpm_next)) { 1026 tm = rcu_dereference(tm->tcpm_next)) {
1002 if (addr_same(&tm->tcpm_daddr, &daddr) && 1027 if (addr_same(&tm->tcpm_daddr, &daddr) &&
1003 (!src || addr_same(&tm->tcpm_saddr, &saddr))) { 1028 (!src || addr_same(&tm->tcpm_saddr, &saddr)) &&
1029 net_eq(tm_net(tm), net)) {
1004 ret = tcp_metrics_fill_info(msg, tm); 1030 ret = tcp_metrics_fill_info(msg, tm);
1005 break; 1031 break;
1006 } 1032 }
@@ -1020,34 +1046,27 @@ out_free:
1020 return ret; 1046 return ret;
1021} 1047}
1022 1048
1023#define deref_locked_genl(p) \ 1049static void tcp_metrics_flush_all(struct net *net)
1024 rcu_dereference_protected(p, lockdep_genl_is_held() && \
1025 lockdep_is_held(&tcp_metrics_lock))
1026
1027#define deref_genl(p) rcu_dereference_protected(p, lockdep_genl_is_held())
1028
1029static int tcp_metrics_flush_all(struct net *net)
1030{ 1050{
1031 unsigned int max_rows = 1U << net->ipv4.tcp_metrics_hash_log; 1051 unsigned int max_rows = 1U << tcp_metrics_hash_log;
1032 struct tcpm_hash_bucket *hb = net->ipv4.tcp_metrics_hash; 1052 struct tcpm_hash_bucket *hb = tcp_metrics_hash;
1033 struct tcp_metrics_block *tm; 1053 struct tcp_metrics_block *tm;
1034 unsigned int row; 1054 unsigned int row;
1035 1055
1036 for (row = 0; row < max_rows; row++, hb++) { 1056 for (row = 0; row < max_rows; row++, hb++) {
1057 struct tcp_metrics_block __rcu **pp;
1037 spin_lock_bh(&tcp_metrics_lock); 1058 spin_lock_bh(&tcp_metrics_lock);
1038 tm = deref_locked_genl(hb->chain); 1059 pp = &hb->chain;
1039 if (tm) 1060 for (tm = deref_locked(*pp); tm; tm = deref_locked(*pp)) {
1040 hb->chain = NULL; 1061 if (net_eq(tm_net(tm), net)) {
1041 spin_unlock_bh(&tcp_metrics_lock); 1062 *pp = tm->tcpm_next;
1042 while (tm) { 1063 kfree_rcu(tm, rcu_head);
1043 struct tcp_metrics_block *next; 1064 } else {
1044 1065 pp = &tm->tcpm_next;
1045 next = deref_genl(tm->tcpm_next); 1066 }
1046 kfree_rcu(tm, rcu_head);
1047 tm = next;
1048 } 1067 }
1068 spin_unlock_bh(&tcp_metrics_lock);
1049 } 1069 }
1050 return 0;
1051} 1070}
1052 1071
1053static int tcp_metrics_nl_cmd_del(struct sk_buff *skb, struct genl_info *info) 1072static int tcp_metrics_nl_cmd_del(struct sk_buff *skb, struct genl_info *info)
@@ -1064,19 +1083,23 @@ static int tcp_metrics_nl_cmd_del(struct sk_buff *skb, struct genl_info *info)
1064 ret = parse_nl_addr(info, &daddr, &hash, 1); 1083 ret = parse_nl_addr(info, &daddr, &hash, 1);
1065 if (ret < 0) 1084 if (ret < 0)
1066 return ret; 1085 return ret;
1067 if (ret > 0) 1086 if (ret > 0) {
1068 return tcp_metrics_flush_all(net); 1087 tcp_metrics_flush_all(net);
1088 return 0;
1089 }
1069 ret = parse_nl_saddr(info, &saddr); 1090 ret = parse_nl_saddr(info, &saddr);
1070 if (ret < 0) 1091 if (ret < 0)
1071 src = false; 1092 src = false;
1072 1093
1073 hash = hash_32(hash, net->ipv4.tcp_metrics_hash_log); 1094 hash ^= net_hash_mix(net);
1074 hb = net->ipv4.tcp_metrics_hash + hash; 1095 hash = hash_32(hash, tcp_metrics_hash_log);
1096 hb = tcp_metrics_hash + hash;
1075 pp = &hb->chain; 1097 pp = &hb->chain;
1076 spin_lock_bh(&tcp_metrics_lock); 1098 spin_lock_bh(&tcp_metrics_lock);
1077 for (tm = deref_locked_genl(*pp); tm; tm = deref_locked_genl(*pp)) { 1099 for (tm = deref_locked(*pp); tm; tm = deref_locked(*pp)) {
1078 if (addr_same(&tm->tcpm_daddr, &daddr) && 1100 if (addr_same(&tm->tcpm_daddr, &daddr) &&
1079 (!src || addr_same(&tm->tcpm_saddr, &saddr))) { 1101 (!src || addr_same(&tm->tcpm_saddr, &saddr)) &&
1102 net_eq(tm_net(tm), net)) {
1080 *pp = tm->tcpm_next; 1103 *pp = tm->tcpm_next;
1081 kfree_rcu(tm, rcu_head); 1104 kfree_rcu(tm, rcu_head);
1082 found = true; 1105 found = true;
@@ -1126,6 +1149,9 @@ static int __net_init tcp_net_metrics_init(struct net *net)
1126 size_t size; 1149 size_t size;
1127 unsigned int slots; 1150 unsigned int slots;
1128 1151
1152 if (!net_eq(net, &init_net))
1153 return 0;
1154
1129 slots = tcpmhash_entries; 1155 slots = tcpmhash_entries;
1130 if (!slots) { 1156 if (!slots) {
1131 if (totalram_pages >= 128 * 1024) 1157 if (totalram_pages >= 128 * 1024)
@@ -1134,14 +1160,14 @@ static int __net_init tcp_net_metrics_init(struct net *net)
1134 slots = 8 * 1024; 1160 slots = 8 * 1024;
1135 } 1161 }
1136 1162
1137 net->ipv4.tcp_metrics_hash_log = order_base_2(slots); 1163 tcp_metrics_hash_log = order_base_2(slots);
1138 size = sizeof(struct tcpm_hash_bucket) << net->ipv4.tcp_metrics_hash_log; 1164 size = sizeof(struct tcpm_hash_bucket) << tcp_metrics_hash_log;
1139 1165
1140 net->ipv4.tcp_metrics_hash = kzalloc(size, GFP_KERNEL | __GFP_NOWARN); 1166 tcp_metrics_hash = kzalloc(size, GFP_KERNEL | __GFP_NOWARN);
1141 if (!net->ipv4.tcp_metrics_hash) 1167 if (!tcp_metrics_hash)
1142 net->ipv4.tcp_metrics_hash = vzalloc(size); 1168 tcp_metrics_hash = vzalloc(size);
1143 1169
1144 if (!net->ipv4.tcp_metrics_hash) 1170 if (!tcp_metrics_hash)
1145 return -ENOMEM; 1171 return -ENOMEM;
1146 1172
1147 return 0; 1173 return 0;
@@ -1149,19 +1175,7 @@ static int __net_init tcp_net_metrics_init(struct net *net)
1149 1175
1150static void __net_exit tcp_net_metrics_exit(struct net *net) 1176static void __net_exit tcp_net_metrics_exit(struct net *net)
1151{ 1177{
1152 unsigned int i; 1178 tcp_metrics_flush_all(net);
1153
1154 for (i = 0; i < (1U << net->ipv4.tcp_metrics_hash_log) ; i++) {
1155 struct tcp_metrics_block *tm, *next;
1156
1157 tm = rcu_dereference_protected(net->ipv4.tcp_metrics_hash[i].chain, 1);
1158 while (tm) {
1159 next = rcu_dereference_protected(tm->tcpm_next, 1);
1160 kfree(tm);
1161 tm = next;
1162 }
1163 }
1164 kvfree(net->ipv4.tcp_metrics_hash);
1165} 1179}
1166 1180
1167static __net_initdata struct pernet_operations tcp_net_metrics_ops = { 1181static __net_initdata struct pernet_operations tcp_net_metrics_ops = {
@@ -1175,16 +1189,10 @@ void __init tcp_metrics_init(void)
1175 1189
1176 ret = register_pernet_subsys(&tcp_net_metrics_ops); 1190 ret = register_pernet_subsys(&tcp_net_metrics_ops);
1177 if (ret < 0) 1191 if (ret < 0)
1178 goto cleanup; 1192 panic("Could not allocate the tcp_metrics hash table\n");
1193
1179 ret = genl_register_family_with_ops(&tcp_metrics_nl_family, 1194 ret = genl_register_family_with_ops(&tcp_metrics_nl_family,
1180 tcp_metrics_nl_ops); 1195 tcp_metrics_nl_ops);
1181 if (ret < 0) 1196 if (ret < 0)
1182 goto cleanup_subsys; 1197 panic("Could not register tcp_metrics generic netlink\n");
1183 return;
1184
1185cleanup_subsys:
1186 unregister_pernet_subsys(&tcp_net_metrics_ops);
1187
1188cleanup:
1189 return;
1190} 1198}