aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorShaohua Li <shli@fb.com>2017-12-20 15:10:21 -0500
committerDavid S. Miller <davem@davemloft.net>2017-12-21 13:07:20 -0500
commit513674b5a2c9c7a67501506419da5c3c77ac6f08 (patch)
treee5edafc79071a2181ba5dd6b3d6b6ff7f484ec8a
parentc48e74736fccf25fb32bb015426359e1c2016e3b (diff)
net: reevalulate autoflowlabel setting after sysctl setting
sysctl.ip6.auto_flowlabels is default 1. In our hosts, we set it to 2. If sockopt doesn't set autoflowlabel, outcome packets from the hosts are supposed to not include flowlabel. This is true for normal packet, but not for reset packet. The reason is ipv6_pinfo.autoflowlabel is set in sock creation. Later if we change sysctl.ip6.auto_flowlabels, the ipv6_pinfo.autoflowlabel isn't changed, so the sock will keep the old behavior in terms of auto flowlabel. Reset packet is suffering from this problem, because reset packet is sent from a special control socket, which is created at boot time. Since sysctl.ipv6.auto_flowlabels is 1 by default, the control socket will always have its ipv6_pinfo.autoflowlabel set, even after user set sysctl.ipv6.auto_flowlabels to 1, so reset packset will always have flowlabel. Normal sock created before sysctl setting suffers from the same issue. We can't even turn off autoflowlabel unless we kill all socks in the hosts. To fix this, if IPV6_AUTOFLOWLABEL sockopt is used, we use the autoflowlabel setting from user, otherwise we always call ip6_default_np_autolabel() which has the new settings of sysctl. Note, this changes behavior a little bit. Before commit 42240901f7c4 (ipv6: Implement different admin modes for automatic flow labels), the autoflowlabel behavior of a sock isn't sticky, eg, if sysctl changes, existing connection will change autoflowlabel behavior. After that commit, autoflowlabel behavior is sticky in the whole life of the sock. With this patch, the behavior isn't sticky again. Cc: Martin KaFai Lau <kafai@fb.com> Cc: Eric Dumazet <eric.dumazet@gmail.com> Cc: Tom Herbert <tom@quantonium.net> Signed-off-by: Shaohua Li <shli@fb.com> Signed-off-by: David S. Miller <davem@davemloft.net>
-rw-r--r--include/linux/ipv6.h3
-rw-r--r--net/ipv6/af_inet6.c1
-rw-r--r--net/ipv6/ip6_output.c12
-rw-r--r--net/ipv6/ipv6_sockglue.c1
4 files changed, 13 insertions, 4 deletions
diff --git a/include/linux/ipv6.h b/include/linux/ipv6.h
index cb18c6290ca8..8415bf1a9776 100644
--- a/include/linux/ipv6.h
+++ b/include/linux/ipv6.h
@@ -273,7 +273,8 @@ struct ipv6_pinfo {
273 * 100: prefer care-of address 273 * 100: prefer care-of address
274 */ 274 */
275 dontfrag:1, 275 dontfrag:1,
276 autoflowlabel:1; 276 autoflowlabel:1,
277 autoflowlabel_set:1;
277 __u8 min_hopcount; 278 __u8 min_hopcount;
278 __u8 tclass; 279 __u8 tclass;
279 __be32 rcv_flowinfo; 280 __be32 rcv_flowinfo;
diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c
index c26f71234b9c..c9441ca45399 100644
--- a/net/ipv6/af_inet6.c
+++ b/net/ipv6/af_inet6.c
@@ -210,7 +210,6 @@ lookup_protocol:
210 np->mcast_hops = IPV6_DEFAULT_MCASTHOPS; 210 np->mcast_hops = IPV6_DEFAULT_MCASTHOPS;
211 np->mc_loop = 1; 211 np->mc_loop = 1;
212 np->pmtudisc = IPV6_PMTUDISC_WANT; 212 np->pmtudisc = IPV6_PMTUDISC_WANT;
213 np->autoflowlabel = ip6_default_np_autolabel(net);
214 np->repflow = net->ipv6.sysctl.flowlabel_reflect; 213 np->repflow = net->ipv6.sysctl.flowlabel_reflect;
215 sk->sk_ipv6only = net->ipv6.sysctl.bindv6only; 214 sk->sk_ipv6only = net->ipv6.sysctl.bindv6only;
216 215
diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index 5110a418cc4d..f7dd51c42314 100644
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -166,6 +166,14 @@ int ip6_output(struct net *net, struct sock *sk, struct sk_buff *skb)
166 !(IP6CB(skb)->flags & IP6SKB_REROUTED)); 166 !(IP6CB(skb)->flags & IP6SKB_REROUTED));
167} 167}
168 168
169static bool ip6_autoflowlabel(struct net *net, const struct ipv6_pinfo *np)
170{
171 if (!np->autoflowlabel_set)
172 return ip6_default_np_autolabel(net);
173 else
174 return np->autoflowlabel;
175}
176
169/* 177/*
170 * xmit an sk_buff (used by TCP, SCTP and DCCP) 178 * xmit an sk_buff (used by TCP, SCTP and DCCP)
171 * Note : socket lock is not held for SYNACK packets, but might be modified 179 * Note : socket lock is not held for SYNACK packets, but might be modified
@@ -230,7 +238,7 @@ int ip6_xmit(const struct sock *sk, struct sk_buff *skb, struct flowi6 *fl6,
230 hlimit = ip6_dst_hoplimit(dst); 238 hlimit = ip6_dst_hoplimit(dst);
231 239
232 ip6_flow_hdr(hdr, tclass, ip6_make_flowlabel(net, skb, fl6->flowlabel, 240 ip6_flow_hdr(hdr, tclass, ip6_make_flowlabel(net, skb, fl6->flowlabel,
233 np->autoflowlabel, fl6)); 241 ip6_autoflowlabel(net, np), fl6));
234 242
235 hdr->payload_len = htons(seg_len); 243 hdr->payload_len = htons(seg_len);
236 hdr->nexthdr = proto; 244 hdr->nexthdr = proto;
@@ -1626,7 +1634,7 @@ struct sk_buff *__ip6_make_skb(struct sock *sk,
1626 1634
1627 ip6_flow_hdr(hdr, v6_cork->tclass, 1635 ip6_flow_hdr(hdr, v6_cork->tclass,
1628 ip6_make_flowlabel(net, skb, fl6->flowlabel, 1636 ip6_make_flowlabel(net, skb, fl6->flowlabel,
1629 np->autoflowlabel, fl6)); 1637 ip6_autoflowlabel(net, np), fl6));
1630 hdr->hop_limit = v6_cork->hop_limit; 1638 hdr->hop_limit = v6_cork->hop_limit;
1631 hdr->nexthdr = proto; 1639 hdr->nexthdr = proto;
1632 hdr->saddr = fl6->saddr; 1640 hdr->saddr = fl6->saddr;
diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c
index b9404feabd78..2d4680e0376f 100644
--- a/net/ipv6/ipv6_sockglue.c
+++ b/net/ipv6/ipv6_sockglue.c
@@ -886,6 +886,7 @@ pref_skip_coa:
886 break; 886 break;
887 case IPV6_AUTOFLOWLABEL: 887 case IPV6_AUTOFLOWLABEL:
888 np->autoflowlabel = valbool; 888 np->autoflowlabel = valbool;
889 np->autoflowlabel_set = 1;
889 retv = 0; 890 retv = 0;
890 break; 891 break;
891 case IPV6_RECVFRAGSIZE: 892 case IPV6_RECVFRAGSIZE: