diff options
Diffstat (limited to 'net/ipv4')
39 files changed, 875 insertions, 685 deletions
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index f581f77d1097..f2b61107df6c 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c | |||
@@ -1148,21 +1148,13 @@ int inet_sk_rebuild_header(struct sock *sk) | |||
1148 | struct flowi fl = { | 1148 | struct flowi fl = { |
1149 | .oif = sk->sk_bound_dev_if, | 1149 | .oif = sk->sk_bound_dev_if, |
1150 | .mark = sk->sk_mark, | 1150 | .mark = sk->sk_mark, |
1151 | .nl_u = { | 1151 | .fl4_dst = daddr, |
1152 | .ip4_u = { | 1152 | .fl4_src = inet->inet_saddr, |
1153 | .daddr = daddr, | 1153 | .fl4_tos = RT_CONN_FLAGS(sk), |
1154 | .saddr = inet->inet_saddr, | ||
1155 | .tos = RT_CONN_FLAGS(sk), | ||
1156 | }, | ||
1157 | }, | ||
1158 | .proto = sk->sk_protocol, | 1154 | .proto = sk->sk_protocol, |
1159 | .flags = inet_sk_flowi_flags(sk), | 1155 | .flags = inet_sk_flowi_flags(sk), |
1160 | .uli_u = { | 1156 | .fl_ip_sport = inet->inet_sport, |
1161 | .ports = { | 1157 | .fl_ip_dport = inet->inet_dport, |
1162 | .sport = inet->inet_sport, | ||
1163 | .dport = inet->inet_dport, | ||
1164 | }, | ||
1165 | }, | ||
1166 | }; | 1158 | }; |
1167 | 1159 | ||
1168 | security_sk_classify_flow(sk, &fl); | 1160 | security_sk_classify_flow(sk, &fl); |
diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c index d8e540c5b071..a2fc7b961dbc 100644 --- a/net/ipv4/arp.c +++ b/net/ipv4/arp.c | |||
@@ -433,8 +433,8 @@ static int arp_ignore(struct in_device *in_dev, __be32 sip, __be32 tip) | |||
433 | 433 | ||
434 | static int arp_filter(__be32 sip, __be32 tip, struct net_device *dev) | 434 | static int arp_filter(__be32 sip, __be32 tip, struct net_device *dev) |
435 | { | 435 | { |
436 | struct flowi fl = { .nl_u = { .ip4_u = { .daddr = sip, | 436 | struct flowi fl = { .fl4_dst = sip, |
437 | .saddr = tip } } }; | 437 | .fl4_src = tip }; |
438 | struct rtable *rt; | 438 | struct rtable *rt; |
439 | int flag = 0; | 439 | int flag = 0; |
440 | /*unsigned long now; */ | 440 | /*unsigned long now; */ |
@@ -883,7 +883,7 @@ static int arp_process(struct sk_buff *skb) | |||
883 | 883 | ||
884 | dont_send = arp_ignore(in_dev, sip, tip); | 884 | dont_send = arp_ignore(in_dev, sip, tip); |
885 | if (!dont_send && IN_DEV_ARPFILTER(in_dev)) | 885 | if (!dont_send && IN_DEV_ARPFILTER(in_dev)) |
886 | dont_send |= arp_filter(sip, tip, dev); | 886 | dont_send = arp_filter(sip, tip, dev); |
887 | if (!dont_send) { | 887 | if (!dont_send) { |
888 | n = neigh_event_ns(&arp_tbl, sha, &sip, dev); | 888 | n = neigh_event_ns(&arp_tbl, sha, &sip, dev); |
889 | if (n) { | 889 | if (n) { |
@@ -1017,13 +1017,14 @@ static int arp_req_set_proxy(struct net *net, struct net_device *dev, int on) | |||
1017 | IPV4_DEVCONF_ALL(net, PROXY_ARP) = on; | 1017 | IPV4_DEVCONF_ALL(net, PROXY_ARP) = on; |
1018 | return 0; | 1018 | return 0; |
1019 | } | 1019 | } |
1020 | if (__in_dev_get_rtnl(dev)) { | 1020 | if (__in_dev_get_rcu(dev)) { |
1021 | IN_DEV_CONF_SET(__in_dev_get_rtnl(dev), PROXY_ARP, on); | 1021 | IN_DEV_CONF_SET(__in_dev_get_rcu(dev), PROXY_ARP, on); |
1022 | return 0; | 1022 | return 0; |
1023 | } | 1023 | } |
1024 | return -ENXIO; | 1024 | return -ENXIO; |
1025 | } | 1025 | } |
1026 | 1026 | ||
1027 | /* must be called with rcu_read_lock() */ | ||
1027 | static int arp_req_set_public(struct net *net, struct arpreq *r, | 1028 | static int arp_req_set_public(struct net *net, struct arpreq *r, |
1028 | struct net_device *dev) | 1029 | struct net_device *dev) |
1029 | { | 1030 | { |
@@ -1033,7 +1034,7 @@ static int arp_req_set_public(struct net *net, struct arpreq *r, | |||
1033 | if (mask && mask != htonl(0xFFFFFFFF)) | 1034 | if (mask && mask != htonl(0xFFFFFFFF)) |
1034 | return -EINVAL; | 1035 | return -EINVAL; |
1035 | if (!dev && (r->arp_flags & ATF_COM)) { | 1036 | if (!dev && (r->arp_flags & ATF_COM)) { |
1036 | dev = dev_getbyhwaddr(net, r->arp_ha.sa_family, | 1037 | dev = dev_getbyhwaddr_rcu(net, r->arp_ha.sa_family, |
1037 | r->arp_ha.sa_data); | 1038 | r->arp_ha.sa_data); |
1038 | if (!dev) | 1039 | if (!dev) |
1039 | return -ENODEV; | 1040 | return -ENODEV; |
@@ -1061,8 +1062,8 @@ static int arp_req_set(struct net *net, struct arpreq *r, | |||
1061 | if (r->arp_flags & ATF_PERM) | 1062 | if (r->arp_flags & ATF_PERM) |
1062 | r->arp_flags |= ATF_COM; | 1063 | r->arp_flags |= ATF_COM; |
1063 | if (dev == NULL) { | 1064 | if (dev == NULL) { |
1064 | struct flowi fl = { .nl_u.ip4_u = { .daddr = ip, | 1065 | struct flowi fl = { .fl4_dst = ip, |
1065 | .tos = RTO_ONLINK } }; | 1066 | .fl4_tos = RTO_ONLINK }; |
1066 | struct rtable *rt; | 1067 | struct rtable *rt; |
1067 | err = ip_route_output_key(net, &rt, &fl); | 1068 | err = ip_route_output_key(net, &rt, &fl); |
1068 | if (err != 0) | 1069 | if (err != 0) |
@@ -1169,8 +1170,8 @@ static int arp_req_delete(struct net *net, struct arpreq *r, | |||
1169 | 1170 | ||
1170 | ip = ((struct sockaddr_in *)&r->arp_pa)->sin_addr.s_addr; | 1171 | ip = ((struct sockaddr_in *)&r->arp_pa)->sin_addr.s_addr; |
1171 | if (dev == NULL) { | 1172 | if (dev == NULL) { |
1172 | struct flowi fl = { .nl_u.ip4_u = { .daddr = ip, | 1173 | struct flowi fl = { .fl4_dst = ip, |
1173 | .tos = RTO_ONLINK } }; | 1174 | .fl4_tos = RTO_ONLINK }; |
1174 | struct rtable *rt; | 1175 | struct rtable *rt; |
1175 | err = ip_route_output_key(net, &rt, &fl); | 1176 | err = ip_route_output_key(net, &rt, &fl); |
1176 | if (err != 0) | 1177 | if (err != 0) |
@@ -1225,10 +1226,10 @@ int arp_ioctl(struct net *net, unsigned int cmd, void __user *arg) | |||
1225 | if (!(r.arp_flags & ATF_NETMASK)) | 1226 | if (!(r.arp_flags & ATF_NETMASK)) |
1226 | ((struct sockaddr_in *)&r.arp_netmask)->sin_addr.s_addr = | 1227 | ((struct sockaddr_in *)&r.arp_netmask)->sin_addr.s_addr = |
1227 | htonl(0xFFFFFFFFUL); | 1228 | htonl(0xFFFFFFFFUL); |
1228 | rtnl_lock(); | 1229 | rcu_read_lock(); |
1229 | if (r.arp_dev[0]) { | 1230 | if (r.arp_dev[0]) { |
1230 | err = -ENODEV; | 1231 | err = -ENODEV; |
1231 | dev = __dev_get_by_name(net, r.arp_dev); | 1232 | dev = dev_get_by_name_rcu(net, r.arp_dev); |
1232 | if (dev == NULL) | 1233 | if (dev == NULL) |
1233 | goto out; | 1234 | goto out; |
1234 | 1235 | ||
@@ -1252,12 +1253,12 @@ int arp_ioctl(struct net *net, unsigned int cmd, void __user *arg) | |||
1252 | break; | 1253 | break; |
1253 | case SIOCGARP: | 1254 | case SIOCGARP: |
1254 | err = arp_req_get(&r, dev); | 1255 | err = arp_req_get(&r, dev); |
1255 | if (!err && copy_to_user(arg, &r, sizeof(r))) | ||
1256 | err = -EFAULT; | ||
1257 | break; | 1256 | break; |
1258 | } | 1257 | } |
1259 | out: | 1258 | out: |
1260 | rtnl_unlock(); | 1259 | rcu_read_unlock(); |
1260 | if (cmd == SIOCGARP && !err && copy_to_user(arg, &r, sizeof(r))) | ||
1261 | err = -EFAULT; | ||
1261 | return err; | 1262 | return err; |
1262 | } | 1263 | } |
1263 | 1264 | ||
diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c index dc94b0316b78..748cb5b337bd 100644 --- a/net/ipv4/devinet.c +++ b/net/ipv4/devinet.c | |||
@@ -1256,6 +1256,87 @@ errout: | |||
1256 | rtnl_set_sk_err(net, RTNLGRP_IPV4_IFADDR, err); | 1256 | rtnl_set_sk_err(net, RTNLGRP_IPV4_IFADDR, err); |
1257 | } | 1257 | } |
1258 | 1258 | ||
1259 | static size_t inet_get_link_af_size(const struct net_device *dev) | ||
1260 | { | ||
1261 | struct in_device *in_dev = __in_dev_get_rtnl(dev); | ||
1262 | |||
1263 | if (!in_dev) | ||
1264 | return 0; | ||
1265 | |||
1266 | return nla_total_size(IPV4_DEVCONF_MAX * 4); /* IFLA_INET_CONF */ | ||
1267 | } | ||
1268 | |||
1269 | static int inet_fill_link_af(struct sk_buff *skb, const struct net_device *dev) | ||
1270 | { | ||
1271 | struct in_device *in_dev = __in_dev_get_rtnl(dev); | ||
1272 | struct nlattr *nla; | ||
1273 | int i; | ||
1274 | |||
1275 | if (!in_dev) | ||
1276 | return -ENODATA; | ||
1277 | |||
1278 | nla = nla_reserve(skb, IFLA_INET_CONF, IPV4_DEVCONF_MAX * 4); | ||
1279 | if (nla == NULL) | ||
1280 | return -EMSGSIZE; | ||
1281 | |||
1282 | for (i = 0; i < IPV4_DEVCONF_MAX; i++) | ||
1283 | ((u32 *) nla_data(nla))[i] = in_dev->cnf.data[i]; | ||
1284 | |||
1285 | return 0; | ||
1286 | } | ||
1287 | |||
1288 | static const struct nla_policy inet_af_policy[IFLA_INET_MAX+1] = { | ||
1289 | [IFLA_INET_CONF] = { .type = NLA_NESTED }, | ||
1290 | }; | ||
1291 | |||
1292 | static int inet_validate_link_af(const struct net_device *dev, | ||
1293 | const struct nlattr *nla) | ||
1294 | { | ||
1295 | struct nlattr *a, *tb[IFLA_INET_MAX+1]; | ||
1296 | int err, rem; | ||
1297 | |||
1298 | if (dev && !__in_dev_get_rtnl(dev)) | ||
1299 | return -EAFNOSUPPORT; | ||
1300 | |||
1301 | err = nla_parse_nested(tb, IFLA_INET_MAX, nla, inet_af_policy); | ||
1302 | if (err < 0) | ||
1303 | return err; | ||
1304 | |||
1305 | if (tb[IFLA_INET_CONF]) { | ||
1306 | nla_for_each_nested(a, tb[IFLA_INET_CONF], rem) { | ||
1307 | int cfgid = nla_type(a); | ||
1308 | |||
1309 | if (nla_len(a) < 4) | ||
1310 | return -EINVAL; | ||
1311 | |||
1312 | if (cfgid <= 0 || cfgid > IPV4_DEVCONF_MAX) | ||
1313 | return -EINVAL; | ||
1314 | } | ||
1315 | } | ||
1316 | |||
1317 | return 0; | ||
1318 | } | ||
1319 | |||
1320 | static int inet_set_link_af(struct net_device *dev, const struct nlattr *nla) | ||
1321 | { | ||
1322 | struct in_device *in_dev = __in_dev_get_rtnl(dev); | ||
1323 | struct nlattr *a, *tb[IFLA_INET_MAX+1]; | ||
1324 | int rem; | ||
1325 | |||
1326 | if (!in_dev) | ||
1327 | return -EAFNOSUPPORT; | ||
1328 | |||
1329 | if (nla_parse_nested(tb, IFLA_INET_MAX, nla, NULL) < 0) | ||
1330 | BUG(); | ||
1331 | |||
1332 | if (tb[IFLA_INET_CONF]) { | ||
1333 | nla_for_each_nested(a, tb[IFLA_INET_CONF], rem) | ||
1334 | ipv4_devconf_set(in_dev, nla_type(a), nla_get_u32(a)); | ||
1335 | } | ||
1336 | |||
1337 | return 0; | ||
1338 | } | ||
1339 | |||
1259 | #ifdef CONFIG_SYSCTL | 1340 | #ifdef CONFIG_SYSCTL |
1260 | 1341 | ||
1261 | static void devinet_copy_dflt_conf(struct net *net, int i) | 1342 | static void devinet_copy_dflt_conf(struct net *net, int i) |
@@ -1349,9 +1430,9 @@ static int devinet_sysctl_forward(ctl_table *ctl, int write, | |||
1349 | return ret; | 1430 | return ret; |
1350 | } | 1431 | } |
1351 | 1432 | ||
1352 | int ipv4_doint_and_flush(ctl_table *ctl, int write, | 1433 | static int ipv4_doint_and_flush(ctl_table *ctl, int write, |
1353 | void __user *buffer, | 1434 | void __user *buffer, |
1354 | size_t *lenp, loff_t *ppos) | 1435 | size_t *lenp, loff_t *ppos) |
1355 | { | 1436 | { |
1356 | int *valp = ctl->data; | 1437 | int *valp = ctl->data; |
1357 | int val = *valp; | 1438 | int val = *valp; |
@@ -1619,6 +1700,14 @@ static __net_initdata struct pernet_operations devinet_ops = { | |||
1619 | .exit = devinet_exit_net, | 1700 | .exit = devinet_exit_net, |
1620 | }; | 1701 | }; |
1621 | 1702 | ||
1703 | static struct rtnl_af_ops inet_af_ops = { | ||
1704 | .family = AF_INET, | ||
1705 | .fill_link_af = inet_fill_link_af, | ||
1706 | .get_link_af_size = inet_get_link_af_size, | ||
1707 | .validate_link_af = inet_validate_link_af, | ||
1708 | .set_link_af = inet_set_link_af, | ||
1709 | }; | ||
1710 | |||
1622 | void __init devinet_init(void) | 1711 | void __init devinet_init(void) |
1623 | { | 1712 | { |
1624 | register_pernet_subsys(&devinet_ops); | 1713 | register_pernet_subsys(&devinet_ops); |
@@ -1626,6 +1715,8 @@ void __init devinet_init(void) | |||
1626 | register_gifconf(PF_INET, inet_gifconf); | 1715 | register_gifconf(PF_INET, inet_gifconf); |
1627 | register_netdevice_notifier(&ip_netdev_notifier); | 1716 | register_netdevice_notifier(&ip_netdev_notifier); |
1628 | 1717 | ||
1718 | rtnl_af_register(&inet_af_ops); | ||
1719 | |||
1629 | rtnl_register(PF_INET, RTM_NEWADDR, inet_rtm_newaddr, NULL); | 1720 | rtnl_register(PF_INET, RTM_NEWADDR, inet_rtm_newaddr, NULL); |
1630 | rtnl_register(PF_INET, RTM_DELADDR, inet_rtm_deladdr, NULL); | 1721 | rtnl_register(PF_INET, RTM_DELADDR, inet_rtm_deladdr, NULL); |
1631 | rtnl_register(PF_INET, RTM_GETADDR, NULL, inet_dump_ifaddr); | 1722 | rtnl_register(PF_INET, RTM_GETADDR, NULL, inet_dump_ifaddr); |
diff --git a/net/ipv4/esp4.c b/net/ipv4/esp4.c index 14ca1f1c3fb0..e42a905180f0 100644 --- a/net/ipv4/esp4.c +++ b/net/ipv4/esp4.c | |||
@@ -23,6 +23,8 @@ struct esp_skb_cb { | |||
23 | 23 | ||
24 | #define ESP_SKB_CB(__skb) ((struct esp_skb_cb *)&((__skb)->cb[0])) | 24 | #define ESP_SKB_CB(__skb) ((struct esp_skb_cb *)&((__skb)->cb[0])) |
25 | 25 | ||
26 | static u32 esp4_get_mtu(struct xfrm_state *x, int mtu); | ||
27 | |||
26 | /* | 28 | /* |
27 | * Allocate an AEAD request structure with extra space for SG and IV. | 29 | * Allocate an AEAD request structure with extra space for SG and IV. |
28 | * | 30 | * |
@@ -117,25 +119,35 @@ static int esp_output(struct xfrm_state *x, struct sk_buff *skb) | |||
117 | int blksize; | 119 | int blksize; |
118 | int clen; | 120 | int clen; |
119 | int alen; | 121 | int alen; |
122 | int plen; | ||
123 | int tfclen; | ||
120 | int nfrags; | 124 | int nfrags; |
121 | 125 | ||
122 | /* skb is pure payload to encrypt */ | 126 | /* skb is pure payload to encrypt */ |
123 | 127 | ||
124 | err = -ENOMEM; | 128 | err = -ENOMEM; |
125 | 129 | ||
126 | /* Round to block size */ | ||
127 | clen = skb->len; | ||
128 | |||
129 | esp = x->data; | 130 | esp = x->data; |
130 | aead = esp->aead; | 131 | aead = esp->aead; |
131 | alen = crypto_aead_authsize(aead); | 132 | alen = crypto_aead_authsize(aead); |
132 | 133 | ||
134 | tfclen = 0; | ||
135 | if (x->tfcpad) { | ||
136 | struct xfrm_dst *dst = (struct xfrm_dst *)skb_dst(skb); | ||
137 | u32 padto; | ||
138 | |||
139 | padto = min(x->tfcpad, esp4_get_mtu(x, dst->child_mtu_cached)); | ||
140 | if (skb->len < padto) | ||
141 | tfclen = padto - skb->len; | ||
142 | } | ||
133 | blksize = ALIGN(crypto_aead_blocksize(aead), 4); | 143 | blksize = ALIGN(crypto_aead_blocksize(aead), 4); |
134 | clen = ALIGN(clen + 2, blksize); | 144 | clen = ALIGN(skb->len + 2 + tfclen, blksize); |
135 | if (esp->padlen) | 145 | if (esp->padlen) |
136 | clen = ALIGN(clen, esp->padlen); | 146 | clen = ALIGN(clen, esp->padlen); |
147 | plen = clen - skb->len - tfclen; | ||
137 | 148 | ||
138 | if ((err = skb_cow_data(skb, clen - skb->len + alen, &trailer)) < 0) | 149 | err = skb_cow_data(skb, tfclen + plen + alen, &trailer); |
150 | if (err < 0) | ||
139 | goto error; | 151 | goto error; |
140 | nfrags = err; | 152 | nfrags = err; |
141 | 153 | ||
@@ -150,13 +162,17 @@ static int esp_output(struct xfrm_state *x, struct sk_buff *skb) | |||
150 | 162 | ||
151 | /* Fill padding... */ | 163 | /* Fill padding... */ |
152 | tail = skb_tail_pointer(trailer); | 164 | tail = skb_tail_pointer(trailer); |
165 | if (tfclen) { | ||
166 | memset(tail, 0, tfclen); | ||
167 | tail += tfclen; | ||
168 | } | ||
153 | do { | 169 | do { |
154 | int i; | 170 | int i; |
155 | for (i=0; i<clen-skb->len - 2; i++) | 171 | for (i = 0; i < plen - 2; i++) |
156 | tail[i] = i + 1; | 172 | tail[i] = i + 1; |
157 | } while (0); | 173 | } while (0); |
158 | tail[clen - skb->len - 2] = (clen - skb->len) - 2; | 174 | tail[plen - 2] = plen - 2; |
159 | tail[clen - skb->len - 1] = *skb_mac_header(skb); | 175 | tail[plen - 1] = *skb_mac_header(skb); |
160 | pskb_put(skb, trailer, clen - skb->len + alen); | 176 | pskb_put(skb, trailer, clen - skb->len + alen); |
161 | 177 | ||
162 | skb_push(skb, -skb_network_offset(skb)); | 178 | skb_push(skb, -skb_network_offset(skb)); |
diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c index eb6f69a8f27a..1d2cdd43a878 100644 --- a/net/ipv4/fib_frontend.c +++ b/net/ipv4/fib_frontend.c | |||
@@ -158,18 +158,20 @@ static void fib_flush(struct net *net) | |||
158 | struct net_device *__ip_dev_find(struct net *net, __be32 addr, bool devref) | 158 | struct net_device *__ip_dev_find(struct net *net, __be32 addr, bool devref) |
159 | { | 159 | { |
160 | struct flowi fl = { | 160 | struct flowi fl = { |
161 | .nl_u = { | 161 | .fl4_dst = addr, |
162 | .ip4_u = { | ||
163 | .daddr = addr | ||
164 | } | ||
165 | }, | ||
166 | .flags = FLOWI_FLAG_MATCH_ANY_IIF | ||
167 | }; | 162 | }; |
168 | struct fib_result res = { 0 }; | 163 | struct fib_result res = { 0 }; |
169 | struct net_device *dev = NULL; | 164 | struct net_device *dev = NULL; |
165 | struct fib_table *local_table; | ||
166 | |||
167 | #ifdef CONFIG_IP_MULTIPLE_TABLES | ||
168 | res.r = NULL; | ||
169 | #endif | ||
170 | 170 | ||
171 | rcu_read_lock(); | 171 | rcu_read_lock(); |
172 | if (fib_lookup(net, &fl, &res)) { | 172 | local_table = fib_get_table(net, RT_TABLE_LOCAL); |
173 | if (!local_table || | ||
174 | fib_table_lookup(local_table, &fl, &res, FIB_LOOKUP_NOREF)) { | ||
173 | rcu_read_unlock(); | 175 | rcu_read_unlock(); |
174 | return NULL; | 176 | return NULL; |
175 | } | 177 | } |
@@ -193,7 +195,7 @@ static inline unsigned __inet_dev_addr_type(struct net *net, | |||
193 | const struct net_device *dev, | 195 | const struct net_device *dev, |
194 | __be32 addr) | 196 | __be32 addr) |
195 | { | 197 | { |
196 | struct flowi fl = { .nl_u = { .ip4_u = { .daddr = addr } } }; | 198 | struct flowi fl = { .fl4_dst = addr }; |
197 | struct fib_result res; | 199 | struct fib_result res; |
198 | unsigned ret = RTN_BROADCAST; | 200 | unsigned ret = RTN_BROADCAST; |
199 | struct fib_table *local_table; | 201 | struct fib_table *local_table; |
@@ -247,13 +249,9 @@ int fib_validate_source(__be32 src, __be32 dst, u8 tos, int oif, | |||
247 | { | 249 | { |
248 | struct in_device *in_dev; | 250 | struct in_device *in_dev; |
249 | struct flowi fl = { | 251 | struct flowi fl = { |
250 | .nl_u = { | 252 | .fl4_dst = src, |
251 | .ip4_u = { | 253 | .fl4_src = dst, |
252 | .daddr = src, | 254 | .fl4_tos = tos, |
253 | .saddr = dst, | ||
254 | .tos = tos | ||
255 | } | ||
256 | }, | ||
257 | .mark = mark, | 255 | .mark = mark, |
258 | .iif = oif | 256 | .iif = oif |
259 | }; | 257 | }; |
@@ -853,13 +851,9 @@ static void nl_fib_lookup(struct fib_result_nl *frn, struct fib_table *tb) | |||
853 | struct fib_result res; | 851 | struct fib_result res; |
854 | struct flowi fl = { | 852 | struct flowi fl = { |
855 | .mark = frn->fl_mark, | 853 | .mark = frn->fl_mark, |
856 | .nl_u = { | 854 | .fl4_dst = frn->fl_addr, |
857 | .ip4_u = { | 855 | .fl4_tos = frn->fl_tos, |
858 | .daddr = frn->fl_addr, | 856 | .fl4_scope = frn->fl_scope, |
859 | .tos = frn->fl_tos, | ||
860 | .scope = frn->fl_scope | ||
861 | } | ||
862 | } | ||
863 | }; | 857 | }; |
864 | 858 | ||
865 | #ifdef CONFIG_IP_MULTIPLE_TABLES | 859 | #ifdef CONFIG_IP_MULTIPLE_TABLES |
@@ -999,7 +993,11 @@ static int fib_netdev_event(struct notifier_block *this, unsigned long event, vo | |||
999 | rt_cache_flush(dev_net(dev), 0); | 993 | rt_cache_flush(dev_net(dev), 0); |
1000 | break; | 994 | break; |
1001 | case NETDEV_UNREGISTER_BATCH: | 995 | case NETDEV_UNREGISTER_BATCH: |
1002 | rt_cache_flush_batch(); | 996 | /* The batch unregister is only called on the first |
997 | * device in the list of devices being unregistered. | ||
998 | * Therefore we should not pass dev_net(dev) in here. | ||
999 | */ | ||
1000 | rt_cache_flush_batch(NULL); | ||
1003 | break; | 1001 | break; |
1004 | } | 1002 | } |
1005 | return NOTIFY_DONE; | 1003 | return NOTIFY_DONE; |
diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c index a72c62d03106..9aff11d7278f 100644 --- a/net/ipv4/fib_semantics.c +++ b/net/ipv4/fib_semantics.c | |||
@@ -563,12 +563,8 @@ static int fib_check_nh(struct fib_config *cfg, struct fib_info *fi, | |||
563 | rcu_read_lock(); | 563 | rcu_read_lock(); |
564 | { | 564 | { |
565 | struct flowi fl = { | 565 | struct flowi fl = { |
566 | .nl_u = { | 566 | .fl4_dst = nh->nh_gw, |
567 | .ip4_u = { | 567 | .fl4_scope = cfg->fc_scope + 1, |
568 | .daddr = nh->nh_gw, | ||
569 | .scope = cfg->fc_scope + 1, | ||
570 | }, | ||
571 | }, | ||
572 | .oif = nh->nh_oif, | 568 | .oif = nh->nh_oif, |
573 | }; | 569 | }; |
574 | 570 | ||
diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c index 200eb538fbb3..0f280348e0fd 100644 --- a/net/ipv4/fib_trie.c +++ b/net/ipv4/fib_trie.c | |||
@@ -365,7 +365,7 @@ static struct tnode *tnode_alloc(size_t size) | |||
365 | if (size <= PAGE_SIZE) | 365 | if (size <= PAGE_SIZE) |
366 | return kzalloc(size, GFP_KERNEL); | 366 | return kzalloc(size, GFP_KERNEL); |
367 | else | 367 | else |
368 | return __vmalloc(size, GFP_KERNEL | __GFP_ZERO, PAGE_KERNEL); | 368 | return vzalloc(size); |
369 | } | 369 | } |
370 | 370 | ||
371 | static void __tnode_vfree(struct work_struct *arg) | 371 | static void __tnode_vfree(struct work_struct *arg) |
diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c index c6e2affafbd3..4aa1b7f01ea0 100644 --- a/net/ipv4/icmp.c +++ b/net/ipv4/icmp.c | |||
@@ -386,10 +386,9 @@ static void icmp_reply(struct icmp_bxm *icmp_param, struct sk_buff *skb) | |||
386 | daddr = icmp_param->replyopts.faddr; | 386 | daddr = icmp_param->replyopts.faddr; |
387 | } | 387 | } |
388 | { | 388 | { |
389 | struct flowi fl = { .nl_u = { .ip4_u = | 389 | struct flowi fl = { .fl4_dst= daddr, |
390 | { .daddr = daddr, | 390 | .fl4_src = rt->rt_spec_dst, |
391 | .saddr = rt->rt_spec_dst, | 391 | .fl4_tos = RT_TOS(ip_hdr(skb)->tos), |
392 | .tos = RT_TOS(ip_hdr(skb)->tos) } }, | ||
393 | .proto = IPPROTO_ICMP }; | 392 | .proto = IPPROTO_ICMP }; |
394 | security_skb_classify_flow(skb, &fl); | 393 | security_skb_classify_flow(skb, &fl); |
395 | if (ip_route_output_key(net, &rt, &fl)) | 394 | if (ip_route_output_key(net, &rt, &fl)) |
@@ -542,22 +541,13 @@ void icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info) | |||
542 | 541 | ||
543 | { | 542 | { |
544 | struct flowi fl = { | 543 | struct flowi fl = { |
545 | .nl_u = { | 544 | .fl4_dst = icmp_param.replyopts.srr ? |
546 | .ip4_u = { | 545 | icmp_param.replyopts.faddr : iph->saddr, |
547 | .daddr = icmp_param.replyopts.srr ? | 546 | .fl4_src = saddr, |
548 | icmp_param.replyopts.faddr : | 547 | .fl4_tos = RT_TOS(tos), |
549 | iph->saddr, | ||
550 | .saddr = saddr, | ||
551 | .tos = RT_TOS(tos) | ||
552 | } | ||
553 | }, | ||
554 | .proto = IPPROTO_ICMP, | 548 | .proto = IPPROTO_ICMP, |
555 | .uli_u = { | 549 | .fl_icmp_type = type, |
556 | .icmpt = { | 550 | .fl_icmp_code = code, |
557 | .type = type, | ||
558 | .code = code | ||
559 | } | ||
560 | } | ||
561 | }; | 551 | }; |
562 | int err; | 552 | int err; |
563 | struct rtable *rt2; | 553 | struct rtable *rt2; |
@@ -569,6 +559,9 @@ void icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info) | |||
569 | /* No need to clone since we're just using its address. */ | 559 | /* No need to clone since we're just using its address. */ |
570 | rt2 = rt; | 560 | rt2 = rt; |
571 | 561 | ||
562 | if (!fl.nl_u.ip4_u.saddr) | ||
563 | fl.nl_u.ip4_u.saddr = rt->rt_src; | ||
564 | |||
572 | err = xfrm_lookup(net, (struct dst_entry **)&rt, &fl, NULL, 0); | 565 | err = xfrm_lookup(net, (struct dst_entry **)&rt, &fl, NULL, 0); |
573 | switch (err) { | 566 | switch (err) { |
574 | case 0: | 567 | case 0: |
diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c index 08d0d81ffc15..e0e77e297de3 100644 --- a/net/ipv4/igmp.c +++ b/net/ipv4/igmp.c | |||
@@ -149,21 +149,37 @@ static void ip_mc_clear_src(struct ip_mc_list *pmc); | |||
149 | static int ip_mc_add_src(struct in_device *in_dev, __be32 *pmca, int sfmode, | 149 | static int ip_mc_add_src(struct in_device *in_dev, __be32 *pmca, int sfmode, |
150 | int sfcount, __be32 *psfsrc, int delta); | 150 | int sfcount, __be32 *psfsrc, int delta); |
151 | 151 | ||
152 | |||
153 | static void ip_mc_list_reclaim(struct rcu_head *head) | ||
154 | { | ||
155 | kfree(container_of(head, struct ip_mc_list, rcu)); | ||
156 | } | ||
157 | |||
152 | static void ip_ma_put(struct ip_mc_list *im) | 158 | static void ip_ma_put(struct ip_mc_list *im) |
153 | { | 159 | { |
154 | if (atomic_dec_and_test(&im->refcnt)) { | 160 | if (atomic_dec_and_test(&im->refcnt)) { |
155 | in_dev_put(im->interface); | 161 | in_dev_put(im->interface); |
156 | kfree(im); | 162 | call_rcu(&im->rcu, ip_mc_list_reclaim); |
157 | } | 163 | } |
158 | } | 164 | } |
159 | 165 | ||
166 | #define for_each_pmc_rcu(in_dev, pmc) \ | ||
167 | for (pmc = rcu_dereference(in_dev->mc_list); \ | ||
168 | pmc != NULL; \ | ||
169 | pmc = rcu_dereference(pmc->next_rcu)) | ||
170 | |||
171 | #define for_each_pmc_rtnl(in_dev, pmc) \ | ||
172 | for (pmc = rtnl_dereference(in_dev->mc_list); \ | ||
173 | pmc != NULL; \ | ||
174 | pmc = rtnl_dereference(pmc->next_rcu)) | ||
175 | |||
160 | #ifdef CONFIG_IP_MULTICAST | 176 | #ifdef CONFIG_IP_MULTICAST |
161 | 177 | ||
162 | /* | 178 | /* |
163 | * Timer management | 179 | * Timer management |
164 | */ | 180 | */ |
165 | 181 | ||
166 | static __inline__ void igmp_stop_timer(struct ip_mc_list *im) | 182 | static void igmp_stop_timer(struct ip_mc_list *im) |
167 | { | 183 | { |
168 | spin_lock_bh(&im->lock); | 184 | spin_lock_bh(&im->lock); |
169 | if (del_timer(&im->timer)) | 185 | if (del_timer(&im->timer)) |
@@ -284,6 +300,8 @@ igmp_scount(struct ip_mc_list *pmc, int type, int gdeleted, int sdeleted) | |||
284 | return scount; | 300 | return scount; |
285 | } | 301 | } |
286 | 302 | ||
303 | #define igmp_skb_size(skb) (*(unsigned int *)((skb)->cb)) | ||
304 | |||
287 | static struct sk_buff *igmpv3_newpack(struct net_device *dev, int size) | 305 | static struct sk_buff *igmpv3_newpack(struct net_device *dev, int size) |
288 | { | 306 | { |
289 | struct sk_buff *skb; | 307 | struct sk_buff *skb; |
@@ -292,14 +310,20 @@ static struct sk_buff *igmpv3_newpack(struct net_device *dev, int size) | |||
292 | struct igmpv3_report *pig; | 310 | struct igmpv3_report *pig; |
293 | struct net *net = dev_net(dev); | 311 | struct net *net = dev_net(dev); |
294 | 312 | ||
295 | skb = alloc_skb(size + LL_ALLOCATED_SPACE(dev), GFP_ATOMIC); | 313 | while (1) { |
296 | if (skb == NULL) | 314 | skb = alloc_skb(size + LL_ALLOCATED_SPACE(dev), |
297 | return NULL; | 315 | GFP_ATOMIC | __GFP_NOWARN); |
316 | if (skb) | ||
317 | break; | ||
318 | size >>= 1; | ||
319 | if (size < 256) | ||
320 | return NULL; | ||
321 | } | ||
322 | igmp_skb_size(skb) = size; | ||
298 | 323 | ||
299 | { | 324 | { |
300 | struct flowi fl = { .oif = dev->ifindex, | 325 | struct flowi fl = { .oif = dev->ifindex, |
301 | .nl_u = { .ip4_u = { | 326 | .fl4_dst = IGMPV3_ALL_MCR, |
302 | .daddr = IGMPV3_ALL_MCR } }, | ||
303 | .proto = IPPROTO_IGMP }; | 327 | .proto = IPPROTO_IGMP }; |
304 | if (ip_route_output_key(net, &rt, &fl)) { | 328 | if (ip_route_output_key(net, &rt, &fl)) { |
305 | kfree_skb(skb); | 329 | kfree_skb(skb); |
@@ -384,7 +408,7 @@ static struct sk_buff *add_grhead(struct sk_buff *skb, struct ip_mc_list *pmc, | |||
384 | return skb; | 408 | return skb; |
385 | } | 409 | } |
386 | 410 | ||
387 | #define AVAILABLE(skb) ((skb) ? ((skb)->dev ? (skb)->dev->mtu - (skb)->len : \ | 411 | #define AVAILABLE(skb) ((skb) ? ((skb)->dev ? igmp_skb_size(skb) - (skb)->len : \ |
388 | skb_tailroom(skb)) : 0) | 412 | skb_tailroom(skb)) : 0) |
389 | 413 | ||
390 | static struct sk_buff *add_grec(struct sk_buff *skb, struct ip_mc_list *pmc, | 414 | static struct sk_buff *add_grec(struct sk_buff *skb, struct ip_mc_list *pmc, |
@@ -502,8 +526,8 @@ static int igmpv3_send_report(struct in_device *in_dev, struct ip_mc_list *pmc) | |||
502 | int type; | 526 | int type; |
503 | 527 | ||
504 | if (!pmc) { | 528 | if (!pmc) { |
505 | read_lock(&in_dev->mc_list_lock); | 529 | rcu_read_lock(); |
506 | for (pmc=in_dev->mc_list; pmc; pmc=pmc->next) { | 530 | for_each_pmc_rcu(in_dev, pmc) { |
507 | if (pmc->multiaddr == IGMP_ALL_HOSTS) | 531 | if (pmc->multiaddr == IGMP_ALL_HOSTS) |
508 | continue; | 532 | continue; |
509 | spin_lock_bh(&pmc->lock); | 533 | spin_lock_bh(&pmc->lock); |
@@ -514,7 +538,7 @@ static int igmpv3_send_report(struct in_device *in_dev, struct ip_mc_list *pmc) | |||
514 | skb = add_grec(skb, pmc, type, 0, 0); | 538 | skb = add_grec(skb, pmc, type, 0, 0); |
515 | spin_unlock_bh(&pmc->lock); | 539 | spin_unlock_bh(&pmc->lock); |
516 | } | 540 | } |
517 | read_unlock(&in_dev->mc_list_lock); | 541 | rcu_read_unlock(); |
518 | } else { | 542 | } else { |
519 | spin_lock_bh(&pmc->lock); | 543 | spin_lock_bh(&pmc->lock); |
520 | if (pmc->sfcount[MCAST_EXCLUDE]) | 544 | if (pmc->sfcount[MCAST_EXCLUDE]) |
@@ -556,7 +580,7 @@ static void igmpv3_send_cr(struct in_device *in_dev) | |||
556 | struct sk_buff *skb = NULL; | 580 | struct sk_buff *skb = NULL; |
557 | int type, dtype; | 581 | int type, dtype; |
558 | 582 | ||
559 | read_lock(&in_dev->mc_list_lock); | 583 | rcu_read_lock(); |
560 | spin_lock_bh(&in_dev->mc_tomb_lock); | 584 | spin_lock_bh(&in_dev->mc_tomb_lock); |
561 | 585 | ||
562 | /* deleted MCA's */ | 586 | /* deleted MCA's */ |
@@ -593,7 +617,7 @@ static void igmpv3_send_cr(struct in_device *in_dev) | |||
593 | spin_unlock_bh(&in_dev->mc_tomb_lock); | 617 | spin_unlock_bh(&in_dev->mc_tomb_lock); |
594 | 618 | ||
595 | /* change recs */ | 619 | /* change recs */ |
596 | for (pmc=in_dev->mc_list; pmc; pmc=pmc->next) { | 620 | for_each_pmc_rcu(in_dev, pmc) { |
597 | spin_lock_bh(&pmc->lock); | 621 | spin_lock_bh(&pmc->lock); |
598 | if (pmc->sfcount[MCAST_EXCLUDE]) { | 622 | if (pmc->sfcount[MCAST_EXCLUDE]) { |
599 | type = IGMPV3_BLOCK_OLD_SOURCES; | 623 | type = IGMPV3_BLOCK_OLD_SOURCES; |
@@ -616,7 +640,7 @@ static void igmpv3_send_cr(struct in_device *in_dev) | |||
616 | } | 640 | } |
617 | spin_unlock_bh(&pmc->lock); | 641 | spin_unlock_bh(&pmc->lock); |
618 | } | 642 | } |
619 | read_unlock(&in_dev->mc_list_lock); | 643 | rcu_read_unlock(); |
620 | 644 | ||
621 | if (!skb) | 645 | if (!skb) |
622 | return; | 646 | return; |
@@ -644,7 +668,7 @@ static int igmp_send_report(struct in_device *in_dev, struct ip_mc_list *pmc, | |||
644 | 668 | ||
645 | { | 669 | { |
646 | struct flowi fl = { .oif = dev->ifindex, | 670 | struct flowi fl = { .oif = dev->ifindex, |
647 | .nl_u = { .ip4_u = { .daddr = dst } }, | 671 | .fl4_dst = dst, |
648 | .proto = IPPROTO_IGMP }; | 672 | .proto = IPPROTO_IGMP }; |
649 | if (ip_route_output_key(net, &rt, &fl)) | 673 | if (ip_route_output_key(net, &rt, &fl)) |
650 | return -1; | 674 | return -1; |
@@ -813,14 +837,14 @@ static void igmp_heard_report(struct in_device *in_dev, __be32 group) | |||
813 | if (group == IGMP_ALL_HOSTS) | 837 | if (group == IGMP_ALL_HOSTS) |
814 | return; | 838 | return; |
815 | 839 | ||
816 | read_lock(&in_dev->mc_list_lock); | 840 | rcu_read_lock(); |
817 | for (im=in_dev->mc_list; im!=NULL; im=im->next) { | 841 | for_each_pmc_rcu(in_dev, im) { |
818 | if (im->multiaddr == group) { | 842 | if (im->multiaddr == group) { |
819 | igmp_stop_timer(im); | 843 | igmp_stop_timer(im); |
820 | break; | 844 | break; |
821 | } | 845 | } |
822 | } | 846 | } |
823 | read_unlock(&in_dev->mc_list_lock); | 847 | rcu_read_unlock(); |
824 | } | 848 | } |
825 | 849 | ||
826 | static void igmp_heard_query(struct in_device *in_dev, struct sk_buff *skb, | 850 | static void igmp_heard_query(struct in_device *in_dev, struct sk_buff *skb, |
@@ -906,8 +930,8 @@ static void igmp_heard_query(struct in_device *in_dev, struct sk_buff *skb, | |||
906 | * - Use the igmp->igmp_code field as the maximum | 930 | * - Use the igmp->igmp_code field as the maximum |
907 | * delay possible | 931 | * delay possible |
908 | */ | 932 | */ |
909 | read_lock(&in_dev->mc_list_lock); | 933 | rcu_read_lock(); |
910 | for (im=in_dev->mc_list; im!=NULL; im=im->next) { | 934 | for_each_pmc_rcu(in_dev, im) { |
911 | int changed; | 935 | int changed; |
912 | 936 | ||
913 | if (group && group != im->multiaddr) | 937 | if (group && group != im->multiaddr) |
@@ -925,7 +949,7 @@ static void igmp_heard_query(struct in_device *in_dev, struct sk_buff *skb, | |||
925 | if (changed) | 949 | if (changed) |
926 | igmp_mod_timer(im, max_delay); | 950 | igmp_mod_timer(im, max_delay); |
927 | } | 951 | } |
928 | read_unlock(&in_dev->mc_list_lock); | 952 | rcu_read_unlock(); |
929 | } | 953 | } |
930 | 954 | ||
931 | /* called in rcu_read_lock() section */ | 955 | /* called in rcu_read_lock() section */ |
@@ -1110,8 +1134,8 @@ static void igmpv3_clear_delrec(struct in_device *in_dev) | |||
1110 | kfree(pmc); | 1134 | kfree(pmc); |
1111 | } | 1135 | } |
1112 | /* clear dead sources, too */ | 1136 | /* clear dead sources, too */ |
1113 | read_lock(&in_dev->mc_list_lock); | 1137 | rcu_read_lock(); |
1114 | for (pmc=in_dev->mc_list; pmc; pmc=pmc->next) { | 1138 | for_each_pmc_rcu(in_dev, pmc) { |
1115 | struct ip_sf_list *psf, *psf_next; | 1139 | struct ip_sf_list *psf, *psf_next; |
1116 | 1140 | ||
1117 | spin_lock_bh(&pmc->lock); | 1141 | spin_lock_bh(&pmc->lock); |
@@ -1123,7 +1147,7 @@ static void igmpv3_clear_delrec(struct in_device *in_dev) | |||
1123 | kfree(psf); | 1147 | kfree(psf); |
1124 | } | 1148 | } |
1125 | } | 1149 | } |
1126 | read_unlock(&in_dev->mc_list_lock); | 1150 | rcu_read_unlock(); |
1127 | } | 1151 | } |
1128 | #endif | 1152 | #endif |
1129 | 1153 | ||
@@ -1209,7 +1233,7 @@ void ip_mc_inc_group(struct in_device *in_dev, __be32 addr) | |||
1209 | 1233 | ||
1210 | ASSERT_RTNL(); | 1234 | ASSERT_RTNL(); |
1211 | 1235 | ||
1212 | for (im=in_dev->mc_list; im; im=im->next) { | 1236 | for_each_pmc_rtnl(in_dev, im) { |
1213 | if (im->multiaddr == addr) { | 1237 | if (im->multiaddr == addr) { |
1214 | im->users++; | 1238 | im->users++; |
1215 | ip_mc_add_src(in_dev, &addr, MCAST_EXCLUDE, 0, NULL, 0); | 1239 | ip_mc_add_src(in_dev, &addr, MCAST_EXCLUDE, 0, NULL, 0); |
@@ -1217,7 +1241,7 @@ void ip_mc_inc_group(struct in_device *in_dev, __be32 addr) | |||
1217 | } | 1241 | } |
1218 | } | 1242 | } |
1219 | 1243 | ||
1220 | im = kmalloc(sizeof(*im), GFP_KERNEL); | 1244 | im = kzalloc(sizeof(*im), GFP_KERNEL); |
1221 | if (!im) | 1245 | if (!im) |
1222 | goto out; | 1246 | goto out; |
1223 | 1247 | ||
@@ -1227,26 +1251,18 @@ void ip_mc_inc_group(struct in_device *in_dev, __be32 addr) | |||
1227 | im->multiaddr = addr; | 1251 | im->multiaddr = addr; |
1228 | /* initial mode is (EX, empty) */ | 1252 | /* initial mode is (EX, empty) */ |
1229 | im->sfmode = MCAST_EXCLUDE; | 1253 | im->sfmode = MCAST_EXCLUDE; |
1230 | im->sfcount[MCAST_INCLUDE] = 0; | ||
1231 | im->sfcount[MCAST_EXCLUDE] = 1; | 1254 | im->sfcount[MCAST_EXCLUDE] = 1; |
1232 | im->sources = NULL; | ||
1233 | im->tomb = NULL; | ||
1234 | im->crcount = 0; | ||
1235 | atomic_set(&im->refcnt, 1); | 1255 | atomic_set(&im->refcnt, 1); |
1236 | spin_lock_init(&im->lock); | 1256 | spin_lock_init(&im->lock); |
1237 | #ifdef CONFIG_IP_MULTICAST | 1257 | #ifdef CONFIG_IP_MULTICAST |
1238 | im->tm_running = 0; | ||
1239 | setup_timer(&im->timer, &igmp_timer_expire, (unsigned long)im); | 1258 | setup_timer(&im->timer, &igmp_timer_expire, (unsigned long)im); |
1240 | im->unsolicit_count = IGMP_Unsolicited_Report_Count; | 1259 | im->unsolicit_count = IGMP_Unsolicited_Report_Count; |
1241 | im->reporter = 0; | ||
1242 | im->gsquery = 0; | ||
1243 | #endif | 1260 | #endif |
1244 | im->loaded = 0; | 1261 | |
1245 | write_lock_bh(&in_dev->mc_list_lock); | 1262 | im->next_rcu = in_dev->mc_list; |
1246 | im->next = in_dev->mc_list; | ||
1247 | in_dev->mc_list = im; | ||
1248 | in_dev->mc_count++; | 1263 | in_dev->mc_count++; |
1249 | write_unlock_bh(&in_dev->mc_list_lock); | 1264 | rcu_assign_pointer(in_dev->mc_list, im); |
1265 | |||
1250 | #ifdef CONFIG_IP_MULTICAST | 1266 | #ifdef CONFIG_IP_MULTICAST |
1251 | igmpv3_del_delrec(in_dev, im->multiaddr); | 1267 | igmpv3_del_delrec(in_dev, im->multiaddr); |
1252 | #endif | 1268 | #endif |
@@ -1260,26 +1276,32 @@ EXPORT_SYMBOL(ip_mc_inc_group); | |||
1260 | 1276 | ||
1261 | /* | 1277 | /* |
1262 | * Resend IGMP JOIN report; used for bonding. | 1278 | * Resend IGMP JOIN report; used for bonding. |
1279 | * Called with rcu_read_lock() | ||
1263 | */ | 1280 | */ |
1264 | void ip_mc_rejoin_group(struct ip_mc_list *im) | 1281 | void ip_mc_rejoin_groups(struct in_device *in_dev) |
1265 | { | 1282 | { |
1266 | #ifdef CONFIG_IP_MULTICAST | 1283 | #ifdef CONFIG_IP_MULTICAST |
1267 | struct in_device *in_dev = im->interface; | 1284 | struct ip_mc_list *im; |
1285 | int type; | ||
1268 | 1286 | ||
1269 | if (im->multiaddr == IGMP_ALL_HOSTS) | 1287 | for_each_pmc_rcu(in_dev, im) { |
1270 | return; | 1288 | if (im->multiaddr == IGMP_ALL_HOSTS) |
1289 | continue; | ||
1271 | 1290 | ||
1272 | /* a failover is happening and switches | 1291 | /* a failover is happening and switches |
1273 | * must be notified immediately */ | 1292 | * must be notified immediately |
1274 | if (IGMP_V1_SEEN(in_dev)) | 1293 | */ |
1275 | igmp_send_report(in_dev, im, IGMP_HOST_MEMBERSHIP_REPORT); | 1294 | if (IGMP_V1_SEEN(in_dev)) |
1276 | else if (IGMP_V2_SEEN(in_dev)) | 1295 | type = IGMP_HOST_MEMBERSHIP_REPORT; |
1277 | igmp_send_report(in_dev, im, IGMPV2_HOST_MEMBERSHIP_REPORT); | 1296 | else if (IGMP_V2_SEEN(in_dev)) |
1278 | else | 1297 | type = IGMPV2_HOST_MEMBERSHIP_REPORT; |
1279 | igmp_send_report(in_dev, im, IGMPV3_HOST_MEMBERSHIP_REPORT); | 1298 | else |
1299 | type = IGMPV3_HOST_MEMBERSHIP_REPORT; | ||
1300 | igmp_send_report(in_dev, im, type); | ||
1301 | } | ||
1280 | #endif | 1302 | #endif |
1281 | } | 1303 | } |
1282 | EXPORT_SYMBOL(ip_mc_rejoin_group); | 1304 | EXPORT_SYMBOL(ip_mc_rejoin_groups); |
1283 | 1305 | ||
1284 | /* | 1306 | /* |
1285 | * A socket has left a multicast group on device dev | 1307 | * A socket has left a multicast group on device dev |
@@ -1287,17 +1309,18 @@ EXPORT_SYMBOL(ip_mc_rejoin_group); | |||
1287 | 1309 | ||
1288 | void ip_mc_dec_group(struct in_device *in_dev, __be32 addr) | 1310 | void ip_mc_dec_group(struct in_device *in_dev, __be32 addr) |
1289 | { | 1311 | { |
1290 | struct ip_mc_list *i, **ip; | 1312 | struct ip_mc_list *i; |
1313 | struct ip_mc_list __rcu **ip; | ||
1291 | 1314 | ||
1292 | ASSERT_RTNL(); | 1315 | ASSERT_RTNL(); |
1293 | 1316 | ||
1294 | for (ip=&in_dev->mc_list; (i=*ip)!=NULL; ip=&i->next) { | 1317 | for (ip = &in_dev->mc_list; |
1318 | (i = rtnl_dereference(*ip)) != NULL; | ||
1319 | ip = &i->next_rcu) { | ||
1295 | if (i->multiaddr == addr) { | 1320 | if (i->multiaddr == addr) { |
1296 | if (--i->users == 0) { | 1321 | if (--i->users == 0) { |
1297 | write_lock_bh(&in_dev->mc_list_lock); | 1322 | *ip = i->next_rcu; |
1298 | *ip = i->next; | ||
1299 | in_dev->mc_count--; | 1323 | in_dev->mc_count--; |
1300 | write_unlock_bh(&in_dev->mc_list_lock); | ||
1301 | igmp_group_dropped(i); | 1324 | igmp_group_dropped(i); |
1302 | 1325 | ||
1303 | if (!in_dev->dead) | 1326 | if (!in_dev->dead) |
@@ -1316,34 +1339,34 @@ EXPORT_SYMBOL(ip_mc_dec_group); | |||
1316 | 1339 | ||
1317 | void ip_mc_unmap(struct in_device *in_dev) | 1340 | void ip_mc_unmap(struct in_device *in_dev) |
1318 | { | 1341 | { |
1319 | struct ip_mc_list *i; | 1342 | struct ip_mc_list *pmc; |
1320 | 1343 | ||
1321 | ASSERT_RTNL(); | 1344 | ASSERT_RTNL(); |
1322 | 1345 | ||
1323 | for (i = in_dev->mc_list; i; i = i->next) | 1346 | for_each_pmc_rtnl(in_dev, pmc) |
1324 | igmp_group_dropped(i); | 1347 | igmp_group_dropped(pmc); |
1325 | } | 1348 | } |
1326 | 1349 | ||
1327 | void ip_mc_remap(struct in_device *in_dev) | 1350 | void ip_mc_remap(struct in_device *in_dev) |
1328 | { | 1351 | { |
1329 | struct ip_mc_list *i; | 1352 | struct ip_mc_list *pmc; |
1330 | 1353 | ||
1331 | ASSERT_RTNL(); | 1354 | ASSERT_RTNL(); |
1332 | 1355 | ||
1333 | for (i = in_dev->mc_list; i; i = i->next) | 1356 | for_each_pmc_rtnl(in_dev, pmc) |
1334 | igmp_group_added(i); | 1357 | igmp_group_added(pmc); |
1335 | } | 1358 | } |
1336 | 1359 | ||
1337 | /* Device going down */ | 1360 | /* Device going down */ |
1338 | 1361 | ||
1339 | void ip_mc_down(struct in_device *in_dev) | 1362 | void ip_mc_down(struct in_device *in_dev) |
1340 | { | 1363 | { |
1341 | struct ip_mc_list *i; | 1364 | struct ip_mc_list *pmc; |
1342 | 1365 | ||
1343 | ASSERT_RTNL(); | 1366 | ASSERT_RTNL(); |
1344 | 1367 | ||
1345 | for (i=in_dev->mc_list; i; i=i->next) | 1368 | for_each_pmc_rtnl(in_dev, pmc) |
1346 | igmp_group_dropped(i); | 1369 | igmp_group_dropped(pmc); |
1347 | 1370 | ||
1348 | #ifdef CONFIG_IP_MULTICAST | 1371 | #ifdef CONFIG_IP_MULTICAST |
1349 | in_dev->mr_ifc_count = 0; | 1372 | in_dev->mr_ifc_count = 0; |
@@ -1374,7 +1397,6 @@ void ip_mc_init_dev(struct in_device *in_dev) | |||
1374 | in_dev->mr_qrv = IGMP_Unsolicited_Report_Count; | 1397 | in_dev->mr_qrv = IGMP_Unsolicited_Report_Count; |
1375 | #endif | 1398 | #endif |
1376 | 1399 | ||
1377 | rwlock_init(&in_dev->mc_list_lock); | ||
1378 | spin_lock_init(&in_dev->mc_tomb_lock); | 1400 | spin_lock_init(&in_dev->mc_tomb_lock); |
1379 | } | 1401 | } |
1380 | 1402 | ||
@@ -1382,14 +1404,14 @@ void ip_mc_init_dev(struct in_device *in_dev) | |||
1382 | 1404 | ||
1383 | void ip_mc_up(struct in_device *in_dev) | 1405 | void ip_mc_up(struct in_device *in_dev) |
1384 | { | 1406 | { |
1385 | struct ip_mc_list *i; | 1407 | struct ip_mc_list *pmc; |
1386 | 1408 | ||
1387 | ASSERT_RTNL(); | 1409 | ASSERT_RTNL(); |
1388 | 1410 | ||
1389 | ip_mc_inc_group(in_dev, IGMP_ALL_HOSTS); | 1411 | ip_mc_inc_group(in_dev, IGMP_ALL_HOSTS); |
1390 | 1412 | ||
1391 | for (i=in_dev->mc_list; i; i=i->next) | 1413 | for_each_pmc_rtnl(in_dev, pmc) |
1392 | igmp_group_added(i); | 1414 | igmp_group_added(pmc); |
1393 | } | 1415 | } |
1394 | 1416 | ||
1395 | /* | 1417 | /* |
@@ -1405,24 +1427,19 @@ void ip_mc_destroy_dev(struct in_device *in_dev) | |||
1405 | /* Deactivate timers */ | 1427 | /* Deactivate timers */ |
1406 | ip_mc_down(in_dev); | 1428 | ip_mc_down(in_dev); |
1407 | 1429 | ||
1408 | write_lock_bh(&in_dev->mc_list_lock); | 1430 | while ((i = rtnl_dereference(in_dev->mc_list)) != NULL) { |
1409 | while ((i = in_dev->mc_list) != NULL) { | 1431 | in_dev->mc_list = i->next_rcu; |
1410 | in_dev->mc_list = i->next; | ||
1411 | in_dev->mc_count--; | 1432 | in_dev->mc_count--; |
1412 | write_unlock_bh(&in_dev->mc_list_lock); | 1433 | |
1413 | igmp_group_dropped(i); | 1434 | igmp_group_dropped(i); |
1414 | ip_ma_put(i); | 1435 | ip_ma_put(i); |
1415 | |||
1416 | write_lock_bh(&in_dev->mc_list_lock); | ||
1417 | } | 1436 | } |
1418 | write_unlock_bh(&in_dev->mc_list_lock); | ||
1419 | } | 1437 | } |
1420 | 1438 | ||
1421 | /* RTNL is locked */ | 1439 | /* RTNL is locked */ |
1422 | static struct in_device *ip_mc_find_dev(struct net *net, struct ip_mreqn *imr) | 1440 | static struct in_device *ip_mc_find_dev(struct net *net, struct ip_mreqn *imr) |
1423 | { | 1441 | { |
1424 | struct flowi fl = { .nl_u = { .ip4_u = | 1442 | struct flowi fl = { .fl4_dst = imr->imr_multiaddr.s_addr }; |
1425 | { .daddr = imr->imr_multiaddr.s_addr } } }; | ||
1426 | struct rtable *rt; | 1443 | struct rtable *rt; |
1427 | struct net_device *dev = NULL; | 1444 | struct net_device *dev = NULL; |
1428 | struct in_device *idev = NULL; | 1445 | struct in_device *idev = NULL; |
@@ -1513,18 +1530,18 @@ static int ip_mc_del_src(struct in_device *in_dev, __be32 *pmca, int sfmode, | |||
1513 | 1530 | ||
1514 | if (!in_dev) | 1531 | if (!in_dev) |
1515 | return -ENODEV; | 1532 | return -ENODEV; |
1516 | read_lock(&in_dev->mc_list_lock); | 1533 | rcu_read_lock(); |
1517 | for (pmc=in_dev->mc_list; pmc; pmc=pmc->next) { | 1534 | for_each_pmc_rcu(in_dev, pmc) { |
1518 | if (*pmca == pmc->multiaddr) | 1535 | if (*pmca == pmc->multiaddr) |
1519 | break; | 1536 | break; |
1520 | } | 1537 | } |
1521 | if (!pmc) { | 1538 | if (!pmc) { |
1522 | /* MCA not found?? bug */ | 1539 | /* MCA not found?? bug */ |
1523 | read_unlock(&in_dev->mc_list_lock); | 1540 | rcu_read_unlock(); |
1524 | return -ESRCH; | 1541 | return -ESRCH; |
1525 | } | 1542 | } |
1526 | spin_lock_bh(&pmc->lock); | 1543 | spin_lock_bh(&pmc->lock); |
1527 | read_unlock(&in_dev->mc_list_lock); | 1544 | rcu_read_unlock(); |
1528 | #ifdef CONFIG_IP_MULTICAST | 1545 | #ifdef CONFIG_IP_MULTICAST |
1529 | sf_markstate(pmc); | 1546 | sf_markstate(pmc); |
1530 | #endif | 1547 | #endif |
@@ -1685,18 +1702,18 @@ static int ip_mc_add_src(struct in_device *in_dev, __be32 *pmca, int sfmode, | |||
1685 | 1702 | ||
1686 | if (!in_dev) | 1703 | if (!in_dev) |
1687 | return -ENODEV; | 1704 | return -ENODEV; |
1688 | read_lock(&in_dev->mc_list_lock); | 1705 | rcu_read_lock(); |
1689 | for (pmc=in_dev->mc_list; pmc; pmc=pmc->next) { | 1706 | for_each_pmc_rcu(in_dev, pmc) { |
1690 | if (*pmca == pmc->multiaddr) | 1707 | if (*pmca == pmc->multiaddr) |
1691 | break; | 1708 | break; |
1692 | } | 1709 | } |
1693 | if (!pmc) { | 1710 | if (!pmc) { |
1694 | /* MCA not found?? bug */ | 1711 | /* MCA not found?? bug */ |
1695 | read_unlock(&in_dev->mc_list_lock); | 1712 | rcu_read_unlock(); |
1696 | return -ESRCH; | 1713 | return -ESRCH; |
1697 | } | 1714 | } |
1698 | spin_lock_bh(&pmc->lock); | 1715 | spin_lock_bh(&pmc->lock); |
1699 | read_unlock(&in_dev->mc_list_lock); | 1716 | rcu_read_unlock(); |
1700 | 1717 | ||
1701 | #ifdef CONFIG_IP_MULTICAST | 1718 | #ifdef CONFIG_IP_MULTICAST |
1702 | sf_markstate(pmc); | 1719 | sf_markstate(pmc); |
@@ -1793,7 +1810,7 @@ int ip_mc_join_group(struct sock *sk , struct ip_mreqn *imr) | |||
1793 | 1810 | ||
1794 | err = -EADDRINUSE; | 1811 | err = -EADDRINUSE; |
1795 | ifindex = imr->imr_ifindex; | 1812 | ifindex = imr->imr_ifindex; |
1796 | for (i = inet->mc_list; i; i = i->next) { | 1813 | for_each_pmc_rtnl(inet, i) { |
1797 | if (i->multi.imr_multiaddr.s_addr == addr && | 1814 | if (i->multi.imr_multiaddr.s_addr == addr && |
1798 | i->multi.imr_ifindex == ifindex) | 1815 | i->multi.imr_ifindex == ifindex) |
1799 | goto done; | 1816 | goto done; |
@@ -1807,7 +1824,7 @@ int ip_mc_join_group(struct sock *sk , struct ip_mreqn *imr) | |||
1807 | goto done; | 1824 | goto done; |
1808 | 1825 | ||
1809 | memcpy(&iml->multi, imr, sizeof(*imr)); | 1826 | memcpy(&iml->multi, imr, sizeof(*imr)); |
1810 | iml->next = inet->mc_list; | 1827 | iml->next_rcu = inet->mc_list; |
1811 | iml->sflist = NULL; | 1828 | iml->sflist = NULL; |
1812 | iml->sfmode = MCAST_EXCLUDE; | 1829 | iml->sfmode = MCAST_EXCLUDE; |
1813 | rcu_assign_pointer(inet->mc_list, iml); | 1830 | rcu_assign_pointer(inet->mc_list, iml); |
@@ -1821,17 +1838,14 @@ EXPORT_SYMBOL(ip_mc_join_group); | |||
1821 | 1838 | ||
1822 | static void ip_sf_socklist_reclaim(struct rcu_head *rp) | 1839 | static void ip_sf_socklist_reclaim(struct rcu_head *rp) |
1823 | { | 1840 | { |
1824 | struct ip_sf_socklist *psf; | 1841 | kfree(container_of(rp, struct ip_sf_socklist, rcu)); |
1825 | |||
1826 | psf = container_of(rp, struct ip_sf_socklist, rcu); | ||
1827 | /* sk_omem_alloc should have been decreased by the caller*/ | 1842 | /* sk_omem_alloc should have been decreased by the caller*/ |
1828 | kfree(psf); | ||
1829 | } | 1843 | } |
1830 | 1844 | ||
1831 | static int ip_mc_leave_src(struct sock *sk, struct ip_mc_socklist *iml, | 1845 | static int ip_mc_leave_src(struct sock *sk, struct ip_mc_socklist *iml, |
1832 | struct in_device *in_dev) | 1846 | struct in_device *in_dev) |
1833 | { | 1847 | { |
1834 | struct ip_sf_socklist *psf = iml->sflist; | 1848 | struct ip_sf_socklist *psf = rtnl_dereference(iml->sflist); |
1835 | int err; | 1849 | int err; |
1836 | 1850 | ||
1837 | if (psf == NULL) { | 1851 | if (psf == NULL) { |
@@ -1851,11 +1865,8 @@ static int ip_mc_leave_src(struct sock *sk, struct ip_mc_socklist *iml, | |||
1851 | 1865 | ||
1852 | static void ip_mc_socklist_reclaim(struct rcu_head *rp) | 1866 | static void ip_mc_socklist_reclaim(struct rcu_head *rp) |
1853 | { | 1867 | { |
1854 | struct ip_mc_socklist *iml; | 1868 | kfree(container_of(rp, struct ip_mc_socklist, rcu)); |
1855 | |||
1856 | iml = container_of(rp, struct ip_mc_socklist, rcu); | ||
1857 | /* sk_omem_alloc should have been decreased by the caller*/ | 1869 | /* sk_omem_alloc should have been decreased by the caller*/ |
1858 | kfree(iml); | ||
1859 | } | 1870 | } |
1860 | 1871 | ||
1861 | 1872 | ||
@@ -1866,7 +1877,8 @@ static void ip_mc_socklist_reclaim(struct rcu_head *rp) | |||
1866 | int ip_mc_leave_group(struct sock *sk, struct ip_mreqn *imr) | 1877 | int ip_mc_leave_group(struct sock *sk, struct ip_mreqn *imr) |
1867 | { | 1878 | { |
1868 | struct inet_sock *inet = inet_sk(sk); | 1879 | struct inet_sock *inet = inet_sk(sk); |
1869 | struct ip_mc_socklist *iml, **imlp; | 1880 | struct ip_mc_socklist *iml; |
1881 | struct ip_mc_socklist __rcu **imlp; | ||
1870 | struct in_device *in_dev; | 1882 | struct in_device *in_dev; |
1871 | struct net *net = sock_net(sk); | 1883 | struct net *net = sock_net(sk); |
1872 | __be32 group = imr->imr_multiaddr.s_addr; | 1884 | __be32 group = imr->imr_multiaddr.s_addr; |
@@ -1876,7 +1888,9 @@ int ip_mc_leave_group(struct sock *sk, struct ip_mreqn *imr) | |||
1876 | rtnl_lock(); | 1888 | rtnl_lock(); |
1877 | in_dev = ip_mc_find_dev(net, imr); | 1889 | in_dev = ip_mc_find_dev(net, imr); |
1878 | ifindex = imr->imr_ifindex; | 1890 | ifindex = imr->imr_ifindex; |
1879 | for (imlp = &inet->mc_list; (iml = *imlp) != NULL; imlp = &iml->next) { | 1891 | for (imlp = &inet->mc_list; |
1892 | (iml = rtnl_dereference(*imlp)) != NULL; | ||
1893 | imlp = &iml->next_rcu) { | ||
1880 | if (iml->multi.imr_multiaddr.s_addr != group) | 1894 | if (iml->multi.imr_multiaddr.s_addr != group) |
1881 | continue; | 1895 | continue; |
1882 | if (ifindex) { | 1896 | if (ifindex) { |
@@ -1888,7 +1902,7 @@ int ip_mc_leave_group(struct sock *sk, struct ip_mreqn *imr) | |||
1888 | 1902 | ||
1889 | (void) ip_mc_leave_src(sk, iml, in_dev); | 1903 | (void) ip_mc_leave_src(sk, iml, in_dev); |
1890 | 1904 | ||
1891 | rcu_assign_pointer(*imlp, iml->next); | 1905 | *imlp = iml->next_rcu; |
1892 | 1906 | ||
1893 | if (in_dev) | 1907 | if (in_dev) |
1894 | ip_mc_dec_group(in_dev, group); | 1908 | ip_mc_dec_group(in_dev, group); |
@@ -1934,7 +1948,7 @@ int ip_mc_source(int add, int omode, struct sock *sk, struct | |||
1934 | } | 1948 | } |
1935 | err = -EADDRNOTAVAIL; | 1949 | err = -EADDRNOTAVAIL; |
1936 | 1950 | ||
1937 | for (pmc=inet->mc_list; pmc; pmc=pmc->next) { | 1951 | for_each_pmc_rtnl(inet, pmc) { |
1938 | if ((pmc->multi.imr_multiaddr.s_addr == | 1952 | if ((pmc->multi.imr_multiaddr.s_addr == |
1939 | imr.imr_multiaddr.s_addr) && | 1953 | imr.imr_multiaddr.s_addr) && |
1940 | (pmc->multi.imr_ifindex == imr.imr_ifindex)) | 1954 | (pmc->multi.imr_ifindex == imr.imr_ifindex)) |
@@ -1958,7 +1972,7 @@ int ip_mc_source(int add, int omode, struct sock *sk, struct | |||
1958 | pmc->sfmode = omode; | 1972 | pmc->sfmode = omode; |
1959 | } | 1973 | } |
1960 | 1974 | ||
1961 | psl = pmc->sflist; | 1975 | psl = rtnl_dereference(pmc->sflist); |
1962 | if (!add) { | 1976 | if (!add) { |
1963 | if (!psl) | 1977 | if (!psl) |
1964 | goto done; /* err = -EADDRNOTAVAIL */ | 1978 | goto done; /* err = -EADDRNOTAVAIL */ |
@@ -2077,7 +2091,7 @@ int ip_mc_msfilter(struct sock *sk, struct ip_msfilter *msf, int ifindex) | |||
2077 | goto done; | 2091 | goto done; |
2078 | } | 2092 | } |
2079 | 2093 | ||
2080 | for (pmc=inet->mc_list; pmc; pmc=pmc->next) { | 2094 | for_each_pmc_rtnl(inet, pmc) { |
2081 | if (pmc->multi.imr_multiaddr.s_addr == msf->imsf_multiaddr && | 2095 | if (pmc->multi.imr_multiaddr.s_addr == msf->imsf_multiaddr && |
2082 | pmc->multi.imr_ifindex == imr.imr_ifindex) | 2096 | pmc->multi.imr_ifindex == imr.imr_ifindex) |
2083 | break; | 2097 | break; |
@@ -2107,7 +2121,7 @@ int ip_mc_msfilter(struct sock *sk, struct ip_msfilter *msf, int ifindex) | |||
2107 | (void) ip_mc_add_src(in_dev, &msf->imsf_multiaddr, | 2121 | (void) ip_mc_add_src(in_dev, &msf->imsf_multiaddr, |
2108 | msf->imsf_fmode, 0, NULL, 0); | 2122 | msf->imsf_fmode, 0, NULL, 0); |
2109 | } | 2123 | } |
2110 | psl = pmc->sflist; | 2124 | psl = rtnl_dereference(pmc->sflist); |
2111 | if (psl) { | 2125 | if (psl) { |
2112 | (void) ip_mc_del_src(in_dev, &msf->imsf_multiaddr, pmc->sfmode, | 2126 | (void) ip_mc_del_src(in_dev, &msf->imsf_multiaddr, pmc->sfmode, |
2113 | psl->sl_count, psl->sl_addr, 0); | 2127 | psl->sl_count, psl->sl_addr, 0); |
@@ -2155,7 +2169,7 @@ int ip_mc_msfget(struct sock *sk, struct ip_msfilter *msf, | |||
2155 | } | 2169 | } |
2156 | err = -EADDRNOTAVAIL; | 2170 | err = -EADDRNOTAVAIL; |
2157 | 2171 | ||
2158 | for (pmc=inet->mc_list; pmc; pmc=pmc->next) { | 2172 | for_each_pmc_rtnl(inet, pmc) { |
2159 | if (pmc->multi.imr_multiaddr.s_addr == msf->imsf_multiaddr && | 2173 | if (pmc->multi.imr_multiaddr.s_addr == msf->imsf_multiaddr && |
2160 | pmc->multi.imr_ifindex == imr.imr_ifindex) | 2174 | pmc->multi.imr_ifindex == imr.imr_ifindex) |
2161 | break; | 2175 | break; |
@@ -2163,7 +2177,7 @@ int ip_mc_msfget(struct sock *sk, struct ip_msfilter *msf, | |||
2163 | if (!pmc) /* must have a prior join */ | 2177 | if (!pmc) /* must have a prior join */ |
2164 | goto done; | 2178 | goto done; |
2165 | msf->imsf_fmode = pmc->sfmode; | 2179 | msf->imsf_fmode = pmc->sfmode; |
2166 | psl = pmc->sflist; | 2180 | psl = rtnl_dereference(pmc->sflist); |
2167 | rtnl_unlock(); | 2181 | rtnl_unlock(); |
2168 | if (!psl) { | 2182 | if (!psl) { |
2169 | len = 0; | 2183 | len = 0; |
@@ -2208,7 +2222,7 @@ int ip_mc_gsfget(struct sock *sk, struct group_filter *gsf, | |||
2208 | 2222 | ||
2209 | err = -EADDRNOTAVAIL; | 2223 | err = -EADDRNOTAVAIL; |
2210 | 2224 | ||
2211 | for (pmc=inet->mc_list; pmc; pmc=pmc->next) { | 2225 | for_each_pmc_rtnl(inet, pmc) { |
2212 | if (pmc->multi.imr_multiaddr.s_addr == addr && | 2226 | if (pmc->multi.imr_multiaddr.s_addr == addr && |
2213 | pmc->multi.imr_ifindex == gsf->gf_interface) | 2227 | pmc->multi.imr_ifindex == gsf->gf_interface) |
2214 | break; | 2228 | break; |
@@ -2216,7 +2230,7 @@ int ip_mc_gsfget(struct sock *sk, struct group_filter *gsf, | |||
2216 | if (!pmc) /* must have a prior join */ | 2230 | if (!pmc) /* must have a prior join */ |
2217 | goto done; | 2231 | goto done; |
2218 | gsf->gf_fmode = pmc->sfmode; | 2232 | gsf->gf_fmode = pmc->sfmode; |
2219 | psl = pmc->sflist; | 2233 | psl = rtnl_dereference(pmc->sflist); |
2220 | rtnl_unlock(); | 2234 | rtnl_unlock(); |
2221 | count = psl ? psl->sl_count : 0; | 2235 | count = psl ? psl->sl_count : 0; |
2222 | copycount = count < gsf->gf_numsrc ? count : gsf->gf_numsrc; | 2236 | copycount = count < gsf->gf_numsrc ? count : gsf->gf_numsrc; |
@@ -2257,7 +2271,7 @@ int ip_mc_sf_allow(struct sock *sk, __be32 loc_addr, __be32 rmt_addr, int dif) | |||
2257 | goto out; | 2271 | goto out; |
2258 | 2272 | ||
2259 | rcu_read_lock(); | 2273 | rcu_read_lock(); |
2260 | for (pmc=rcu_dereference(inet->mc_list); pmc; pmc=rcu_dereference(pmc->next)) { | 2274 | for_each_pmc_rcu(inet, pmc) { |
2261 | if (pmc->multi.imr_multiaddr.s_addr == loc_addr && | 2275 | if (pmc->multi.imr_multiaddr.s_addr == loc_addr && |
2262 | pmc->multi.imr_ifindex == dif) | 2276 | pmc->multi.imr_ifindex == dif) |
2263 | break; | 2277 | break; |
@@ -2265,7 +2279,7 @@ int ip_mc_sf_allow(struct sock *sk, __be32 loc_addr, __be32 rmt_addr, int dif) | |||
2265 | ret = inet->mc_all; | 2279 | ret = inet->mc_all; |
2266 | if (!pmc) | 2280 | if (!pmc) |
2267 | goto unlock; | 2281 | goto unlock; |
2268 | psl = pmc->sflist; | 2282 | psl = rcu_dereference(pmc->sflist); |
2269 | ret = (pmc->sfmode == MCAST_EXCLUDE); | 2283 | ret = (pmc->sfmode == MCAST_EXCLUDE); |
2270 | if (!psl) | 2284 | if (!psl) |
2271 | goto unlock; | 2285 | goto unlock; |
@@ -2300,16 +2314,14 @@ void ip_mc_drop_socket(struct sock *sk) | |||
2300 | return; | 2314 | return; |
2301 | 2315 | ||
2302 | rtnl_lock(); | 2316 | rtnl_lock(); |
2303 | while ((iml = inet->mc_list) != NULL) { | 2317 | while ((iml = rtnl_dereference(inet->mc_list)) != NULL) { |
2304 | struct in_device *in_dev; | 2318 | struct in_device *in_dev; |
2305 | rcu_assign_pointer(inet->mc_list, iml->next); | ||
2306 | 2319 | ||
2320 | inet->mc_list = iml->next_rcu; | ||
2307 | in_dev = inetdev_by_index(net, iml->multi.imr_ifindex); | 2321 | in_dev = inetdev_by_index(net, iml->multi.imr_ifindex); |
2308 | (void) ip_mc_leave_src(sk, iml, in_dev); | 2322 | (void) ip_mc_leave_src(sk, iml, in_dev); |
2309 | if (in_dev != NULL) { | 2323 | if (in_dev != NULL) |
2310 | ip_mc_dec_group(in_dev, iml->multi.imr_multiaddr.s_addr); | 2324 | ip_mc_dec_group(in_dev, iml->multi.imr_multiaddr.s_addr); |
2311 | in_dev_put(in_dev); | ||
2312 | } | ||
2313 | /* decrease mem now to avoid the memleak warning */ | 2325 | /* decrease mem now to avoid the memleak warning */ |
2314 | atomic_sub(sizeof(*iml), &sk->sk_omem_alloc); | 2326 | atomic_sub(sizeof(*iml), &sk->sk_omem_alloc); |
2315 | call_rcu(&iml->rcu, ip_mc_socklist_reclaim); | 2327 | call_rcu(&iml->rcu, ip_mc_socklist_reclaim); |
@@ -2323,8 +2335,8 @@ int ip_check_mc(struct in_device *in_dev, __be32 mc_addr, __be32 src_addr, u16 p | |||
2323 | struct ip_sf_list *psf; | 2335 | struct ip_sf_list *psf; |
2324 | int rv = 0; | 2336 | int rv = 0; |
2325 | 2337 | ||
2326 | read_lock(&in_dev->mc_list_lock); | 2338 | rcu_read_lock(); |
2327 | for (im=in_dev->mc_list; im; im=im->next) { | 2339 | for_each_pmc_rcu(in_dev, im) { |
2328 | if (im->multiaddr == mc_addr) | 2340 | if (im->multiaddr == mc_addr) |
2329 | break; | 2341 | break; |
2330 | } | 2342 | } |
@@ -2345,7 +2357,7 @@ int ip_check_mc(struct in_device *in_dev, __be32 mc_addr, __be32 src_addr, u16 p | |||
2345 | } else | 2357 | } else |
2346 | rv = 1; /* unspecified source; tentatively allow */ | 2358 | rv = 1; /* unspecified source; tentatively allow */ |
2347 | } | 2359 | } |
2348 | read_unlock(&in_dev->mc_list_lock); | 2360 | rcu_read_unlock(); |
2349 | return rv; | 2361 | return rv; |
2350 | } | 2362 | } |
2351 | 2363 | ||
@@ -2371,13 +2383,11 @@ static inline struct ip_mc_list *igmp_mc_get_first(struct seq_file *seq) | |||
2371 | in_dev = __in_dev_get_rcu(state->dev); | 2383 | in_dev = __in_dev_get_rcu(state->dev); |
2372 | if (!in_dev) | 2384 | if (!in_dev) |
2373 | continue; | 2385 | continue; |
2374 | read_lock(&in_dev->mc_list_lock); | 2386 | im = rcu_dereference(in_dev->mc_list); |
2375 | im = in_dev->mc_list; | ||
2376 | if (im) { | 2387 | if (im) { |
2377 | state->in_dev = in_dev; | 2388 | state->in_dev = in_dev; |
2378 | break; | 2389 | break; |
2379 | } | 2390 | } |
2380 | read_unlock(&in_dev->mc_list_lock); | ||
2381 | } | 2391 | } |
2382 | return im; | 2392 | return im; |
2383 | } | 2393 | } |
@@ -2385,11 +2395,9 @@ static inline struct ip_mc_list *igmp_mc_get_first(struct seq_file *seq) | |||
2385 | static struct ip_mc_list *igmp_mc_get_next(struct seq_file *seq, struct ip_mc_list *im) | 2395 | static struct ip_mc_list *igmp_mc_get_next(struct seq_file *seq, struct ip_mc_list *im) |
2386 | { | 2396 | { |
2387 | struct igmp_mc_iter_state *state = igmp_mc_seq_private(seq); | 2397 | struct igmp_mc_iter_state *state = igmp_mc_seq_private(seq); |
2388 | im = im->next; | ||
2389 | while (!im) { | ||
2390 | if (likely(state->in_dev != NULL)) | ||
2391 | read_unlock(&state->in_dev->mc_list_lock); | ||
2392 | 2398 | ||
2399 | im = rcu_dereference(im->next_rcu); | ||
2400 | while (!im) { | ||
2393 | state->dev = next_net_device_rcu(state->dev); | 2401 | state->dev = next_net_device_rcu(state->dev); |
2394 | if (!state->dev) { | 2402 | if (!state->dev) { |
2395 | state->in_dev = NULL; | 2403 | state->in_dev = NULL; |
@@ -2398,8 +2406,7 @@ static struct ip_mc_list *igmp_mc_get_next(struct seq_file *seq, struct ip_mc_li | |||
2398 | state->in_dev = __in_dev_get_rcu(state->dev); | 2406 | state->in_dev = __in_dev_get_rcu(state->dev); |
2399 | if (!state->in_dev) | 2407 | if (!state->in_dev) |
2400 | continue; | 2408 | continue; |
2401 | read_lock(&state->in_dev->mc_list_lock); | 2409 | im = rcu_dereference(state->in_dev->mc_list); |
2402 | im = state->in_dev->mc_list; | ||
2403 | } | 2410 | } |
2404 | return im; | 2411 | return im; |
2405 | } | 2412 | } |
@@ -2435,10 +2442,8 @@ static void igmp_mc_seq_stop(struct seq_file *seq, void *v) | |||
2435 | __releases(rcu) | 2442 | __releases(rcu) |
2436 | { | 2443 | { |
2437 | struct igmp_mc_iter_state *state = igmp_mc_seq_private(seq); | 2444 | struct igmp_mc_iter_state *state = igmp_mc_seq_private(seq); |
2438 | if (likely(state->in_dev != NULL)) { | 2445 | |
2439 | read_unlock(&state->in_dev->mc_list_lock); | 2446 | state->in_dev = NULL; |
2440 | state->in_dev = NULL; | ||
2441 | } | ||
2442 | state->dev = NULL; | 2447 | state->dev = NULL; |
2443 | rcu_read_unlock(); | 2448 | rcu_read_unlock(); |
2444 | } | 2449 | } |
@@ -2460,7 +2465,7 @@ static int igmp_mc_seq_show(struct seq_file *seq, void *v) | |||
2460 | querier = "NONE"; | 2465 | querier = "NONE"; |
2461 | #endif | 2466 | #endif |
2462 | 2467 | ||
2463 | if (state->in_dev->mc_list == im) { | 2468 | if (rcu_dereference(state->in_dev->mc_list) == im) { |
2464 | seq_printf(seq, "%d\t%-10s: %5d %7s\n", | 2469 | seq_printf(seq, "%d\t%-10s: %5d %7s\n", |
2465 | state->dev->ifindex, state->dev->name, state->in_dev->mc_count, querier); | 2470 | state->dev->ifindex, state->dev->name, state->in_dev->mc_count, querier); |
2466 | } | 2471 | } |
@@ -2519,8 +2524,7 @@ static inline struct ip_sf_list *igmp_mcf_get_first(struct seq_file *seq) | |||
2519 | idev = __in_dev_get_rcu(state->dev); | 2524 | idev = __in_dev_get_rcu(state->dev); |
2520 | if (unlikely(idev == NULL)) | 2525 | if (unlikely(idev == NULL)) |
2521 | continue; | 2526 | continue; |
2522 | read_lock(&idev->mc_list_lock); | 2527 | im = rcu_dereference(idev->mc_list); |
2523 | im = idev->mc_list; | ||
2524 | if (likely(im != NULL)) { | 2528 | if (likely(im != NULL)) { |
2525 | spin_lock_bh(&im->lock); | 2529 | spin_lock_bh(&im->lock); |
2526 | psf = im->sources; | 2530 | psf = im->sources; |
@@ -2531,7 +2535,6 @@ static inline struct ip_sf_list *igmp_mcf_get_first(struct seq_file *seq) | |||
2531 | } | 2535 | } |
2532 | spin_unlock_bh(&im->lock); | 2536 | spin_unlock_bh(&im->lock); |
2533 | } | 2537 | } |
2534 | read_unlock(&idev->mc_list_lock); | ||
2535 | } | 2538 | } |
2536 | return psf; | 2539 | return psf; |
2537 | } | 2540 | } |
@@ -2545,9 +2548,6 @@ static struct ip_sf_list *igmp_mcf_get_next(struct seq_file *seq, struct ip_sf_l | |||
2545 | spin_unlock_bh(&state->im->lock); | 2548 | spin_unlock_bh(&state->im->lock); |
2546 | state->im = state->im->next; | 2549 | state->im = state->im->next; |
2547 | while (!state->im) { | 2550 | while (!state->im) { |
2548 | if (likely(state->idev != NULL)) | ||
2549 | read_unlock(&state->idev->mc_list_lock); | ||
2550 | |||
2551 | state->dev = next_net_device_rcu(state->dev); | 2551 | state->dev = next_net_device_rcu(state->dev); |
2552 | if (!state->dev) { | 2552 | if (!state->dev) { |
2553 | state->idev = NULL; | 2553 | state->idev = NULL; |
@@ -2556,8 +2556,7 @@ static struct ip_sf_list *igmp_mcf_get_next(struct seq_file *seq, struct ip_sf_l | |||
2556 | state->idev = __in_dev_get_rcu(state->dev); | 2556 | state->idev = __in_dev_get_rcu(state->dev); |
2557 | if (!state->idev) | 2557 | if (!state->idev) |
2558 | continue; | 2558 | continue; |
2559 | read_lock(&state->idev->mc_list_lock); | 2559 | state->im = rcu_dereference(state->idev->mc_list); |
2560 | state->im = state->idev->mc_list; | ||
2561 | } | 2560 | } |
2562 | if (!state->im) | 2561 | if (!state->im) |
2563 | break; | 2562 | break; |
@@ -2603,10 +2602,7 @@ static void igmp_mcf_seq_stop(struct seq_file *seq, void *v) | |||
2603 | spin_unlock_bh(&state->im->lock); | 2602 | spin_unlock_bh(&state->im->lock); |
2604 | state->im = NULL; | 2603 | state->im = NULL; |
2605 | } | 2604 | } |
2606 | if (likely(state->idev != NULL)) { | 2605 | state->idev = NULL; |
2607 | read_unlock(&state->idev->mc_list_lock); | ||
2608 | state->idev = NULL; | ||
2609 | } | ||
2610 | state->dev = NULL; | 2606 | state->dev = NULL; |
2611 | rcu_read_unlock(); | 2607 | rcu_read_unlock(); |
2612 | } | 2608 | } |
diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c index 7174370b1195..25e318153f14 100644 --- a/net/ipv4/inet_connection_sock.c +++ b/net/ipv4/inet_connection_sock.c | |||
@@ -55,7 +55,6 @@ EXPORT_SYMBOL(inet_get_local_port_range); | |||
55 | int inet_csk_bind_conflict(const struct sock *sk, | 55 | int inet_csk_bind_conflict(const struct sock *sk, |
56 | const struct inet_bind_bucket *tb) | 56 | const struct inet_bind_bucket *tb) |
57 | { | 57 | { |
58 | const __be32 sk_rcv_saddr = inet_rcv_saddr(sk); | ||
59 | struct sock *sk2; | 58 | struct sock *sk2; |
60 | struct hlist_node *node; | 59 | struct hlist_node *node; |
61 | int reuse = sk->sk_reuse; | 60 | int reuse = sk->sk_reuse; |
@@ -75,9 +74,9 @@ int inet_csk_bind_conflict(const struct sock *sk, | |||
75 | sk->sk_bound_dev_if == sk2->sk_bound_dev_if)) { | 74 | sk->sk_bound_dev_if == sk2->sk_bound_dev_if)) { |
76 | if (!reuse || !sk2->sk_reuse || | 75 | if (!reuse || !sk2->sk_reuse || |
77 | sk2->sk_state == TCP_LISTEN) { | 76 | sk2->sk_state == TCP_LISTEN) { |
78 | const __be32 sk2_rcv_saddr = inet_rcv_saddr(sk2); | 77 | const __be32 sk2_rcv_saddr = sk_rcv_saddr(sk2); |
79 | if (!sk2_rcv_saddr || !sk_rcv_saddr || | 78 | if (!sk2_rcv_saddr || !sk_rcv_saddr(sk) || |
80 | sk2_rcv_saddr == sk_rcv_saddr) | 79 | sk2_rcv_saddr == sk_rcv_saddr(sk)) |
81 | break; | 80 | break; |
82 | } | 81 | } |
83 | } | 82 | } |
@@ -358,17 +357,14 @@ struct dst_entry *inet_csk_route_req(struct sock *sk, | |||
358 | struct ip_options *opt = inet_rsk(req)->opt; | 357 | struct ip_options *opt = inet_rsk(req)->opt; |
359 | struct flowi fl = { .oif = sk->sk_bound_dev_if, | 358 | struct flowi fl = { .oif = sk->sk_bound_dev_if, |
360 | .mark = sk->sk_mark, | 359 | .mark = sk->sk_mark, |
361 | .nl_u = { .ip4_u = | 360 | .fl4_dst = ((opt && opt->srr) ? |
362 | { .daddr = ((opt && opt->srr) ? | 361 | opt->faddr : ireq->rmt_addr), |
363 | opt->faddr : | 362 | .fl4_src = ireq->loc_addr, |
364 | ireq->rmt_addr), | 363 | .fl4_tos = RT_CONN_FLAGS(sk), |
365 | .saddr = ireq->loc_addr, | ||
366 | .tos = RT_CONN_FLAGS(sk) } }, | ||
367 | .proto = sk->sk_protocol, | 364 | .proto = sk->sk_protocol, |
368 | .flags = inet_sk_flowi_flags(sk), | 365 | .flags = inet_sk_flowi_flags(sk), |
369 | .uli_u = { .ports = | 366 | .fl_ip_sport = inet_sk(sk)->inet_sport, |
370 | { .sport = inet_sk(sk)->inet_sport, | 367 | .fl_ip_dport = ireq->rmt_port }; |
371 | .dport = ireq->rmt_port } } }; | ||
372 | struct net *net = sock_net(sk); | 368 | struct net *net = sock_net(sk); |
373 | 369 | ||
374 | security_req_classify_flow(req, &fl); | 370 | security_req_classify_flow(req, &fl); |
diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c index 1b344f30b463..3c0369a3a663 100644 --- a/net/ipv4/inet_hashtables.c +++ b/net/ipv4/inet_hashtables.c | |||
@@ -133,8 +133,7 @@ int __inet_inherit_port(struct sock *sk, struct sock *child) | |||
133 | } | 133 | } |
134 | } | 134 | } |
135 | } | 135 | } |
136 | sk_add_bind_node(child, &tb->owners); | 136 | inet_bind_hash(child, tb, port); |
137 | inet_csk(child)->icsk_bind_hash = tb; | ||
138 | spin_unlock(&head->lock); | 137 | spin_unlock(&head->lock); |
139 | 138 | ||
140 | return 0; | 139 | return 0; |
diff --git a/net/ipv4/inetpeer.c b/net/ipv4/inetpeer.c index 9e94d7cf4f8a..d9bc85751c74 100644 --- a/net/ipv4/inetpeer.c +++ b/net/ipv4/inetpeer.c | |||
@@ -63,7 +63,7 @@ | |||
63 | * refcnt: atomically against modifications on other CPU; | 63 | * refcnt: atomically against modifications on other CPU; |
64 | * usually under some other lock to prevent node disappearing | 64 | * usually under some other lock to prevent node disappearing |
65 | * dtime: unused node list lock | 65 | * dtime: unused node list lock |
66 | * v4daddr: unchangeable | 66 | * daddr: unchangeable |
67 | * ip_id_count: atomic value (no lock needed) | 67 | * ip_id_count: atomic value (no lock needed) |
68 | */ | 68 | */ |
69 | 69 | ||
@@ -79,15 +79,24 @@ static const struct inet_peer peer_fake_node = { | |||
79 | .avl_height = 0 | 79 | .avl_height = 0 |
80 | }; | 80 | }; |
81 | 81 | ||
82 | static struct { | 82 | struct inet_peer_base { |
83 | struct inet_peer __rcu *root; | 83 | struct inet_peer __rcu *root; |
84 | spinlock_t lock; | 84 | spinlock_t lock; |
85 | int total; | 85 | int total; |
86 | } peers = { | 86 | }; |
87 | |||
88 | static struct inet_peer_base v4_peers = { | ||
89 | .root = peer_avl_empty_rcu, | ||
90 | .lock = __SPIN_LOCK_UNLOCKED(v4_peers.lock), | ||
91 | .total = 0, | ||
92 | }; | ||
93 | |||
94 | static struct inet_peer_base v6_peers = { | ||
87 | .root = peer_avl_empty_rcu, | 95 | .root = peer_avl_empty_rcu, |
88 | .lock = __SPIN_LOCK_UNLOCKED(peers.lock), | 96 | .lock = __SPIN_LOCK_UNLOCKED(v6_peers.lock), |
89 | .total = 0, | 97 | .total = 0, |
90 | }; | 98 | }; |
99 | |||
91 | #define PEER_MAXDEPTH 40 /* sufficient for about 2^27 nodes */ | 100 | #define PEER_MAXDEPTH 40 /* sufficient for about 2^27 nodes */ |
92 | 101 | ||
93 | /* Exported for sysctl_net_ipv4. */ | 102 | /* Exported for sysctl_net_ipv4. */ |
@@ -152,28 +161,45 @@ static void unlink_from_unused(struct inet_peer *p) | |||
152 | } | 161 | } |
153 | } | 162 | } |
154 | 163 | ||
164 | static int addr_compare(const struct inetpeer_addr *a, | ||
165 | const struct inetpeer_addr *b) | ||
166 | { | ||
167 | int i, n = (a->family == AF_INET ? 1 : 4); | ||
168 | |||
169 | for (i = 0; i < n; i++) { | ||
170 | if (a->a6[i] == b->a6[i]) | ||
171 | continue; | ||
172 | if (a->a6[i] < b->a6[i]) | ||
173 | return -1; | ||
174 | return 1; | ||
175 | } | ||
176 | |||
177 | return 0; | ||
178 | } | ||
179 | |||
155 | /* | 180 | /* |
156 | * Called with local BH disabled and the pool lock held. | 181 | * Called with local BH disabled and the pool lock held. |
157 | */ | 182 | */ |
158 | #define lookup(_daddr, _stack) \ | 183 | #define lookup(_daddr, _stack, _base) \ |
159 | ({ \ | 184 | ({ \ |
160 | struct inet_peer *u; \ | 185 | struct inet_peer *u; \ |
161 | struct inet_peer __rcu **v; \ | 186 | struct inet_peer __rcu **v; \ |
162 | \ | 187 | \ |
163 | stackptr = _stack; \ | 188 | stackptr = _stack; \ |
164 | *stackptr++ = &peers.root; \ | 189 | *stackptr++ = &_base->root; \ |
165 | for (u = rcu_dereference_protected(peers.root, \ | 190 | for (u = rcu_dereference_protected(_base->root, \ |
166 | lockdep_is_held(&peers.lock)); \ | 191 | lockdep_is_held(&_base->lock)); \ |
167 | u != peer_avl_empty; ) { \ | 192 | u != peer_avl_empty; ) { \ |
168 | if (_daddr == u->v4daddr) \ | 193 | int cmp = addr_compare(_daddr, &u->daddr); \ |
194 | if (cmp == 0) \ | ||
169 | break; \ | 195 | break; \ |
170 | if ((__force __u32)_daddr < (__force __u32)u->v4daddr) \ | 196 | if (cmp == -1) \ |
171 | v = &u->avl_left; \ | 197 | v = &u->avl_left; \ |
172 | else \ | 198 | else \ |
173 | v = &u->avl_right; \ | 199 | v = &u->avl_right; \ |
174 | *stackptr++ = v; \ | 200 | *stackptr++ = v; \ |
175 | u = rcu_dereference_protected(*v, \ | 201 | u = rcu_dereference_protected(*v, \ |
176 | lockdep_is_held(&peers.lock)); \ | 202 | lockdep_is_held(&_base->lock)); \ |
177 | } \ | 203 | } \ |
178 | u; \ | 204 | u; \ |
179 | }) | 205 | }) |
@@ -185,13 +211,15 @@ static void unlink_from_unused(struct inet_peer *p) | |||
185 | * But every pointer we follow is guaranteed to be valid thanks to RCU. | 211 | * But every pointer we follow is guaranteed to be valid thanks to RCU. |
186 | * We exit from this function if number of links exceeds PEER_MAXDEPTH | 212 | * We exit from this function if number of links exceeds PEER_MAXDEPTH |
187 | */ | 213 | */ |
188 | static struct inet_peer *lookup_rcu_bh(__be32 daddr) | 214 | static struct inet_peer *lookup_rcu_bh(const struct inetpeer_addr *daddr, |
215 | struct inet_peer_base *base) | ||
189 | { | 216 | { |
190 | struct inet_peer *u = rcu_dereference_bh(peers.root); | 217 | struct inet_peer *u = rcu_dereference_bh(base->root); |
191 | int count = 0; | 218 | int count = 0; |
192 | 219 | ||
193 | while (u != peer_avl_empty) { | 220 | while (u != peer_avl_empty) { |
194 | if (daddr == u->v4daddr) { | 221 | int cmp = addr_compare(daddr, &u->daddr); |
222 | if (cmp == 0) { | ||
195 | /* Before taking a reference, check if this entry was | 223 | /* Before taking a reference, check if this entry was |
196 | * deleted, unlink_from_pool() sets refcnt=-1 to make | 224 | * deleted, unlink_from_pool() sets refcnt=-1 to make |
197 | * distinction between an unused entry (refcnt=0) and | 225 | * distinction between an unused entry (refcnt=0) and |
@@ -201,7 +229,7 @@ static struct inet_peer *lookup_rcu_bh(__be32 daddr) | |||
201 | u = NULL; | 229 | u = NULL; |
202 | return u; | 230 | return u; |
203 | } | 231 | } |
204 | if ((__force __u32)daddr < (__force __u32)u->v4daddr) | 232 | if (cmp == -1) |
205 | u = rcu_dereference_bh(u->avl_left); | 233 | u = rcu_dereference_bh(u->avl_left); |
206 | else | 234 | else |
207 | u = rcu_dereference_bh(u->avl_right); | 235 | u = rcu_dereference_bh(u->avl_right); |
@@ -212,19 +240,19 @@ static struct inet_peer *lookup_rcu_bh(__be32 daddr) | |||
212 | } | 240 | } |
213 | 241 | ||
214 | /* Called with local BH disabled and the pool lock held. */ | 242 | /* Called with local BH disabled and the pool lock held. */ |
215 | #define lookup_rightempty(start) \ | 243 | #define lookup_rightempty(start, base) \ |
216 | ({ \ | 244 | ({ \ |
217 | struct inet_peer *u; \ | 245 | struct inet_peer *u; \ |
218 | struct inet_peer __rcu **v; \ | 246 | struct inet_peer __rcu **v; \ |
219 | *stackptr++ = &start->avl_left; \ | 247 | *stackptr++ = &start->avl_left; \ |
220 | v = &start->avl_left; \ | 248 | v = &start->avl_left; \ |
221 | for (u = rcu_dereference_protected(*v, \ | 249 | for (u = rcu_dereference_protected(*v, \ |
222 | lockdep_is_held(&peers.lock)); \ | 250 | lockdep_is_held(&base->lock)); \ |
223 | u->avl_right != peer_avl_empty_rcu; ) { \ | 251 | u->avl_right != peer_avl_empty_rcu; ) { \ |
224 | v = &u->avl_right; \ | 252 | v = &u->avl_right; \ |
225 | *stackptr++ = v; \ | 253 | *stackptr++ = v; \ |
226 | u = rcu_dereference_protected(*v, \ | 254 | u = rcu_dereference_protected(*v, \ |
227 | lockdep_is_held(&peers.lock)); \ | 255 | lockdep_is_held(&base->lock)); \ |
228 | } \ | 256 | } \ |
229 | u; \ | 257 | u; \ |
230 | }) | 258 | }) |
@@ -234,7 +262,8 @@ static struct inet_peer *lookup_rcu_bh(__be32 daddr) | |||
234 | * Look into mm/map_avl.c for more detail description of the ideas. | 262 | * Look into mm/map_avl.c for more detail description of the ideas. |
235 | */ | 263 | */ |
236 | static void peer_avl_rebalance(struct inet_peer __rcu **stack[], | 264 | static void peer_avl_rebalance(struct inet_peer __rcu **stack[], |
237 | struct inet_peer __rcu ***stackend) | 265 | struct inet_peer __rcu ***stackend, |
266 | struct inet_peer_base *base) | ||
238 | { | 267 | { |
239 | struct inet_peer __rcu **nodep; | 268 | struct inet_peer __rcu **nodep; |
240 | struct inet_peer *node, *l, *r; | 269 | struct inet_peer *node, *l, *r; |
@@ -243,20 +272,20 @@ static void peer_avl_rebalance(struct inet_peer __rcu **stack[], | |||
243 | while (stackend > stack) { | 272 | while (stackend > stack) { |
244 | nodep = *--stackend; | 273 | nodep = *--stackend; |
245 | node = rcu_dereference_protected(*nodep, | 274 | node = rcu_dereference_protected(*nodep, |
246 | lockdep_is_held(&peers.lock)); | 275 | lockdep_is_held(&base->lock)); |
247 | l = rcu_dereference_protected(node->avl_left, | 276 | l = rcu_dereference_protected(node->avl_left, |
248 | lockdep_is_held(&peers.lock)); | 277 | lockdep_is_held(&base->lock)); |
249 | r = rcu_dereference_protected(node->avl_right, | 278 | r = rcu_dereference_protected(node->avl_right, |
250 | lockdep_is_held(&peers.lock)); | 279 | lockdep_is_held(&base->lock)); |
251 | lh = node_height(l); | 280 | lh = node_height(l); |
252 | rh = node_height(r); | 281 | rh = node_height(r); |
253 | if (lh > rh + 1) { /* l: RH+2 */ | 282 | if (lh > rh + 1) { /* l: RH+2 */ |
254 | struct inet_peer *ll, *lr, *lrl, *lrr; | 283 | struct inet_peer *ll, *lr, *lrl, *lrr; |
255 | int lrh; | 284 | int lrh; |
256 | ll = rcu_dereference_protected(l->avl_left, | 285 | ll = rcu_dereference_protected(l->avl_left, |
257 | lockdep_is_held(&peers.lock)); | 286 | lockdep_is_held(&base->lock)); |
258 | lr = rcu_dereference_protected(l->avl_right, | 287 | lr = rcu_dereference_protected(l->avl_right, |
259 | lockdep_is_held(&peers.lock)); | 288 | lockdep_is_held(&base->lock)); |
260 | lrh = node_height(lr); | 289 | lrh = node_height(lr); |
261 | if (lrh <= node_height(ll)) { /* ll: RH+1 */ | 290 | if (lrh <= node_height(ll)) { /* ll: RH+1 */ |
262 | RCU_INIT_POINTER(node->avl_left, lr); /* lr: RH or RH+1 */ | 291 | RCU_INIT_POINTER(node->avl_left, lr); /* lr: RH or RH+1 */ |
@@ -268,9 +297,9 @@ static void peer_avl_rebalance(struct inet_peer __rcu **stack[], | |||
268 | RCU_INIT_POINTER(*nodep, l); | 297 | RCU_INIT_POINTER(*nodep, l); |
269 | } else { /* ll: RH, lr: RH+1 */ | 298 | } else { /* ll: RH, lr: RH+1 */ |
270 | lrl = rcu_dereference_protected(lr->avl_left, | 299 | lrl = rcu_dereference_protected(lr->avl_left, |
271 | lockdep_is_held(&peers.lock)); /* lrl: RH or RH-1 */ | 300 | lockdep_is_held(&base->lock)); /* lrl: RH or RH-1 */ |
272 | lrr = rcu_dereference_protected(lr->avl_right, | 301 | lrr = rcu_dereference_protected(lr->avl_right, |
273 | lockdep_is_held(&peers.lock)); /* lrr: RH or RH-1 */ | 302 | lockdep_is_held(&base->lock)); /* lrr: RH or RH-1 */ |
274 | RCU_INIT_POINTER(node->avl_left, lrr); /* lrr: RH or RH-1 */ | 303 | RCU_INIT_POINTER(node->avl_left, lrr); /* lrr: RH or RH-1 */ |
275 | RCU_INIT_POINTER(node->avl_right, r); /* r: RH */ | 304 | RCU_INIT_POINTER(node->avl_right, r); /* r: RH */ |
276 | node->avl_height = rh + 1; /* node: RH+1 */ | 305 | node->avl_height = rh + 1; /* node: RH+1 */ |
@@ -286,9 +315,9 @@ static void peer_avl_rebalance(struct inet_peer __rcu **stack[], | |||
286 | struct inet_peer *rr, *rl, *rlr, *rll; | 315 | struct inet_peer *rr, *rl, *rlr, *rll; |
287 | int rlh; | 316 | int rlh; |
288 | rr = rcu_dereference_protected(r->avl_right, | 317 | rr = rcu_dereference_protected(r->avl_right, |
289 | lockdep_is_held(&peers.lock)); | 318 | lockdep_is_held(&base->lock)); |
290 | rl = rcu_dereference_protected(r->avl_left, | 319 | rl = rcu_dereference_protected(r->avl_left, |
291 | lockdep_is_held(&peers.lock)); | 320 | lockdep_is_held(&base->lock)); |
292 | rlh = node_height(rl); | 321 | rlh = node_height(rl); |
293 | if (rlh <= node_height(rr)) { /* rr: LH+1 */ | 322 | if (rlh <= node_height(rr)) { /* rr: LH+1 */ |
294 | RCU_INIT_POINTER(node->avl_right, rl); /* rl: LH or LH+1 */ | 323 | RCU_INIT_POINTER(node->avl_right, rl); /* rl: LH or LH+1 */ |
@@ -300,9 +329,9 @@ static void peer_avl_rebalance(struct inet_peer __rcu **stack[], | |||
300 | RCU_INIT_POINTER(*nodep, r); | 329 | RCU_INIT_POINTER(*nodep, r); |
301 | } else { /* rr: RH, rl: RH+1 */ | 330 | } else { /* rr: RH, rl: RH+1 */ |
302 | rlr = rcu_dereference_protected(rl->avl_right, | 331 | rlr = rcu_dereference_protected(rl->avl_right, |
303 | lockdep_is_held(&peers.lock)); /* rlr: LH or LH-1 */ | 332 | lockdep_is_held(&base->lock)); /* rlr: LH or LH-1 */ |
304 | rll = rcu_dereference_protected(rl->avl_left, | 333 | rll = rcu_dereference_protected(rl->avl_left, |
305 | lockdep_is_held(&peers.lock)); /* rll: LH or LH-1 */ | 334 | lockdep_is_held(&base->lock)); /* rll: LH or LH-1 */ |
306 | RCU_INIT_POINTER(node->avl_right, rll); /* rll: LH or LH-1 */ | 335 | RCU_INIT_POINTER(node->avl_right, rll); /* rll: LH or LH-1 */ |
307 | RCU_INIT_POINTER(node->avl_left, l); /* l: LH */ | 336 | RCU_INIT_POINTER(node->avl_left, l); /* l: LH */ |
308 | node->avl_height = lh + 1; /* node: LH+1 */ | 337 | node->avl_height = lh + 1; /* node: LH+1 */ |
@@ -321,14 +350,14 @@ static void peer_avl_rebalance(struct inet_peer __rcu **stack[], | |||
321 | } | 350 | } |
322 | 351 | ||
323 | /* Called with local BH disabled and the pool lock held. */ | 352 | /* Called with local BH disabled and the pool lock held. */ |
324 | #define link_to_pool(n) \ | 353 | #define link_to_pool(n, base) \ |
325 | do { \ | 354 | do { \ |
326 | n->avl_height = 1; \ | 355 | n->avl_height = 1; \ |
327 | n->avl_left = peer_avl_empty_rcu; \ | 356 | n->avl_left = peer_avl_empty_rcu; \ |
328 | n->avl_right = peer_avl_empty_rcu; \ | 357 | n->avl_right = peer_avl_empty_rcu; \ |
329 | /* lockless readers can catch us now */ \ | 358 | /* lockless readers can catch us now */ \ |
330 | rcu_assign_pointer(**--stackptr, n); \ | 359 | rcu_assign_pointer(**--stackptr, n); \ |
331 | peer_avl_rebalance(stack, stackptr); \ | 360 | peer_avl_rebalance(stack, stackptr, base); \ |
332 | } while (0) | 361 | } while (0) |
333 | 362 | ||
334 | static void inetpeer_free_rcu(struct rcu_head *head) | 363 | static void inetpeer_free_rcu(struct rcu_head *head) |
@@ -337,13 +366,13 @@ static void inetpeer_free_rcu(struct rcu_head *head) | |||
337 | } | 366 | } |
338 | 367 | ||
339 | /* May be called with local BH enabled. */ | 368 | /* May be called with local BH enabled. */ |
340 | static void unlink_from_pool(struct inet_peer *p) | 369 | static void unlink_from_pool(struct inet_peer *p, struct inet_peer_base *base) |
341 | { | 370 | { |
342 | int do_free; | 371 | int do_free; |
343 | 372 | ||
344 | do_free = 0; | 373 | do_free = 0; |
345 | 374 | ||
346 | spin_lock_bh(&peers.lock); | 375 | spin_lock_bh(&base->lock); |
347 | /* Check the reference counter. It was artificially incremented by 1 | 376 | /* Check the reference counter. It was artificially incremented by 1 |
348 | * in cleanup() function to prevent sudden disappearing. If we can | 377 | * in cleanup() function to prevent sudden disappearing. If we can |
349 | * atomically (because of lockless readers) take this last reference, | 378 | * atomically (because of lockless readers) take this last reference, |
@@ -353,7 +382,7 @@ static void unlink_from_pool(struct inet_peer *p) | |||
353 | if (atomic_cmpxchg(&p->refcnt, 1, -1) == 1) { | 382 | if (atomic_cmpxchg(&p->refcnt, 1, -1) == 1) { |
354 | struct inet_peer __rcu **stack[PEER_MAXDEPTH]; | 383 | struct inet_peer __rcu **stack[PEER_MAXDEPTH]; |
355 | struct inet_peer __rcu ***stackptr, ***delp; | 384 | struct inet_peer __rcu ***stackptr, ***delp; |
356 | if (lookup(p->v4daddr, stack) != p) | 385 | if (lookup(&p->daddr, stack, base) != p) |
357 | BUG(); | 386 | BUG(); |
358 | delp = stackptr - 1; /* *delp[0] == p */ | 387 | delp = stackptr - 1; /* *delp[0] == p */ |
359 | if (p->avl_left == peer_avl_empty_rcu) { | 388 | if (p->avl_left == peer_avl_empty_rcu) { |
@@ -362,11 +391,11 @@ static void unlink_from_pool(struct inet_peer *p) | |||
362 | } else { | 391 | } else { |
363 | /* look for a node to insert instead of p */ | 392 | /* look for a node to insert instead of p */ |
364 | struct inet_peer *t; | 393 | struct inet_peer *t; |
365 | t = lookup_rightempty(p); | 394 | t = lookup_rightempty(p, base); |
366 | BUG_ON(rcu_dereference_protected(*stackptr[-1], | 395 | BUG_ON(rcu_dereference_protected(*stackptr[-1], |
367 | lockdep_is_held(&peers.lock)) != t); | 396 | lockdep_is_held(&base->lock)) != t); |
368 | **--stackptr = t->avl_left; | 397 | **--stackptr = t->avl_left; |
369 | /* t is removed, t->v4daddr > x->v4daddr for any | 398 | /* t is removed, t->daddr > x->daddr for any |
370 | * x in p->avl_left subtree. | 399 | * x in p->avl_left subtree. |
371 | * Put t in the old place of p. */ | 400 | * Put t in the old place of p. */ |
372 | RCU_INIT_POINTER(*delp[0], t); | 401 | RCU_INIT_POINTER(*delp[0], t); |
@@ -376,11 +405,11 @@ static void unlink_from_pool(struct inet_peer *p) | |||
376 | BUG_ON(delp[1] != &p->avl_left); | 405 | BUG_ON(delp[1] != &p->avl_left); |
377 | delp[1] = &t->avl_left; /* was &p->avl_left */ | 406 | delp[1] = &t->avl_left; /* was &p->avl_left */ |
378 | } | 407 | } |
379 | peer_avl_rebalance(stack, stackptr); | 408 | peer_avl_rebalance(stack, stackptr, base); |
380 | peers.total--; | 409 | base->total--; |
381 | do_free = 1; | 410 | do_free = 1; |
382 | } | 411 | } |
383 | spin_unlock_bh(&peers.lock); | 412 | spin_unlock_bh(&base->lock); |
384 | 413 | ||
385 | if (do_free) | 414 | if (do_free) |
386 | call_rcu_bh(&p->rcu, inetpeer_free_rcu); | 415 | call_rcu_bh(&p->rcu, inetpeer_free_rcu); |
@@ -395,6 +424,16 @@ static void unlink_from_pool(struct inet_peer *p) | |||
395 | inet_putpeer(p); | 424 | inet_putpeer(p); |
396 | } | 425 | } |
397 | 426 | ||
427 | static struct inet_peer_base *family_to_base(int family) | ||
428 | { | ||
429 | return (family == AF_INET ? &v4_peers : &v6_peers); | ||
430 | } | ||
431 | |||
432 | static struct inet_peer_base *peer_to_base(struct inet_peer *p) | ||
433 | { | ||
434 | return family_to_base(p->daddr.family); | ||
435 | } | ||
436 | |||
398 | /* May be called with local BH enabled. */ | 437 | /* May be called with local BH enabled. */ |
399 | static int cleanup_once(unsigned long ttl) | 438 | static int cleanup_once(unsigned long ttl) |
400 | { | 439 | { |
@@ -428,21 +467,22 @@ static int cleanup_once(unsigned long ttl) | |||
428 | * happen because of entry limits in route cache. */ | 467 | * happen because of entry limits in route cache. */ |
429 | return -1; | 468 | return -1; |
430 | 469 | ||
431 | unlink_from_pool(p); | 470 | unlink_from_pool(p, peer_to_base(p)); |
432 | return 0; | 471 | return 0; |
433 | } | 472 | } |
434 | 473 | ||
435 | /* Called with or without local BH being disabled. */ | 474 | /* Called with or without local BH being disabled. */ |
436 | struct inet_peer *inet_getpeer(__be32 daddr, int create) | 475 | struct inet_peer *inet_getpeer(struct inetpeer_addr *daddr, int create) |
437 | { | 476 | { |
438 | struct inet_peer *p; | ||
439 | struct inet_peer __rcu **stack[PEER_MAXDEPTH], ***stackptr; | 477 | struct inet_peer __rcu **stack[PEER_MAXDEPTH], ***stackptr; |
478 | struct inet_peer_base *base = family_to_base(AF_INET); | ||
479 | struct inet_peer *p; | ||
440 | 480 | ||
441 | /* Look up for the address quickly, lockless. | 481 | /* Look up for the address quickly, lockless. |
442 | * Because of a concurrent writer, we might not find an existing entry. | 482 | * Because of a concurrent writer, we might not find an existing entry. |
443 | */ | 483 | */ |
444 | rcu_read_lock_bh(); | 484 | rcu_read_lock_bh(); |
445 | p = lookup_rcu_bh(daddr); | 485 | p = lookup_rcu_bh(daddr, base); |
446 | rcu_read_unlock_bh(); | 486 | rcu_read_unlock_bh(); |
447 | 487 | ||
448 | if (p) { | 488 | if (p) { |
@@ -456,50 +496,57 @@ struct inet_peer *inet_getpeer(__be32 daddr, int create) | |||
456 | /* retry an exact lookup, taking the lock before. | 496 | /* retry an exact lookup, taking the lock before. |
457 | * At least, nodes should be hot in our cache. | 497 | * At least, nodes should be hot in our cache. |
458 | */ | 498 | */ |
459 | spin_lock_bh(&peers.lock); | 499 | spin_lock_bh(&base->lock); |
460 | p = lookup(daddr, stack); | 500 | p = lookup(daddr, stack, base); |
461 | if (p != peer_avl_empty) { | 501 | if (p != peer_avl_empty) { |
462 | atomic_inc(&p->refcnt); | 502 | atomic_inc(&p->refcnt); |
463 | spin_unlock_bh(&peers.lock); | 503 | spin_unlock_bh(&base->lock); |
464 | /* Remove the entry from unused list if it was there. */ | 504 | /* Remove the entry from unused list if it was there. */ |
465 | unlink_from_unused(p); | 505 | unlink_from_unused(p); |
466 | return p; | 506 | return p; |
467 | } | 507 | } |
468 | p = create ? kmem_cache_alloc(peer_cachep, GFP_ATOMIC) : NULL; | 508 | p = create ? kmem_cache_alloc(peer_cachep, GFP_ATOMIC) : NULL; |
469 | if (p) { | 509 | if (p) { |
470 | p->v4daddr = daddr; | 510 | p->daddr = *daddr; |
471 | atomic_set(&p->refcnt, 1); | 511 | atomic_set(&p->refcnt, 1); |
472 | atomic_set(&p->rid, 0); | 512 | atomic_set(&p->rid, 0); |
473 | atomic_set(&p->ip_id_count, secure_ip_id(daddr)); | 513 | atomic_set(&p->ip_id_count, secure_ip_id(daddr->a4)); |
474 | p->tcp_ts_stamp = 0; | 514 | p->tcp_ts_stamp = 0; |
475 | INIT_LIST_HEAD(&p->unused); | 515 | INIT_LIST_HEAD(&p->unused); |
476 | 516 | ||
477 | 517 | ||
478 | /* Link the node. */ | 518 | /* Link the node. */ |
479 | link_to_pool(p); | 519 | link_to_pool(p, base); |
480 | peers.total++; | 520 | base->total++; |
481 | } | 521 | } |
482 | spin_unlock_bh(&peers.lock); | 522 | spin_unlock_bh(&base->lock); |
483 | 523 | ||
484 | if (peers.total >= inet_peer_threshold) | 524 | if (base->total >= inet_peer_threshold) |
485 | /* Remove one less-recently-used entry. */ | 525 | /* Remove one less-recently-used entry. */ |
486 | cleanup_once(0); | 526 | cleanup_once(0); |
487 | 527 | ||
488 | return p; | 528 | return p; |
489 | } | 529 | } |
490 | 530 | ||
531 | static int compute_total(void) | ||
532 | { | ||
533 | return v4_peers.total + v6_peers.total; | ||
534 | } | ||
535 | EXPORT_SYMBOL_GPL(inet_getpeer); | ||
536 | |||
491 | /* Called with local BH disabled. */ | 537 | /* Called with local BH disabled. */ |
492 | static void peer_check_expire(unsigned long dummy) | 538 | static void peer_check_expire(unsigned long dummy) |
493 | { | 539 | { |
494 | unsigned long now = jiffies; | 540 | unsigned long now = jiffies; |
495 | int ttl; | 541 | int ttl, total; |
496 | 542 | ||
497 | if (peers.total >= inet_peer_threshold) | 543 | total = compute_total(); |
544 | if (total >= inet_peer_threshold) | ||
498 | ttl = inet_peer_minttl; | 545 | ttl = inet_peer_minttl; |
499 | else | 546 | else |
500 | ttl = inet_peer_maxttl | 547 | ttl = inet_peer_maxttl |
501 | - (inet_peer_maxttl - inet_peer_minttl) / HZ * | 548 | - (inet_peer_maxttl - inet_peer_minttl) / HZ * |
502 | peers.total / inet_peer_threshold * HZ; | 549 | total / inet_peer_threshold * HZ; |
503 | while (!cleanup_once(ttl)) { | 550 | while (!cleanup_once(ttl)) { |
504 | if (jiffies != now) | 551 | if (jiffies != now) |
505 | break; | 552 | break; |
@@ -508,13 +555,14 @@ static void peer_check_expire(unsigned long dummy) | |||
508 | /* Trigger the timer after inet_peer_gc_mintime .. inet_peer_gc_maxtime | 555 | /* Trigger the timer after inet_peer_gc_mintime .. inet_peer_gc_maxtime |
509 | * interval depending on the total number of entries (more entries, | 556 | * interval depending on the total number of entries (more entries, |
510 | * less interval). */ | 557 | * less interval). */ |
511 | if (peers.total >= inet_peer_threshold) | 558 | total = compute_total(); |
559 | if (total >= inet_peer_threshold) | ||
512 | peer_periodic_timer.expires = jiffies + inet_peer_gc_mintime; | 560 | peer_periodic_timer.expires = jiffies + inet_peer_gc_mintime; |
513 | else | 561 | else |
514 | peer_periodic_timer.expires = jiffies | 562 | peer_periodic_timer.expires = jiffies |
515 | + inet_peer_gc_maxtime | 563 | + inet_peer_gc_maxtime |
516 | - (inet_peer_gc_maxtime - inet_peer_gc_mintime) / HZ * | 564 | - (inet_peer_gc_maxtime - inet_peer_gc_mintime) / HZ * |
517 | peers.total / inet_peer_threshold * HZ; | 565 | total / inet_peer_threshold * HZ; |
518 | add_timer(&peer_periodic_timer); | 566 | add_timer(&peer_periodic_timer); |
519 | } | 567 | } |
520 | 568 | ||
@@ -530,3 +578,4 @@ void inet_putpeer(struct inet_peer *p) | |||
530 | 578 | ||
531 | local_bh_enable(); | 579 | local_bh_enable(); |
532 | } | 580 | } |
581 | EXPORT_SYMBOL_GPL(inet_putpeer); | ||
diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c index 168440834ade..a1151b8adf3c 100644 --- a/net/ipv4/ip_fragment.c +++ b/net/ipv4/ip_fragment.c | |||
@@ -45,6 +45,7 @@ | |||
45 | #include <linux/udp.h> | 45 | #include <linux/udp.h> |
46 | #include <linux/inet.h> | 46 | #include <linux/inet.h> |
47 | #include <linux/netfilter_ipv4.h> | 47 | #include <linux/netfilter_ipv4.h> |
48 | #include <net/inet_ecn.h> | ||
48 | 49 | ||
49 | /* NOTE. Logic of IP defragmentation is parallel to corresponding IPv6 | 50 | /* NOTE. Logic of IP defragmentation is parallel to corresponding IPv6 |
50 | * code now. If you change something here, _PLEASE_ update ipv6/reassembly.c | 51 | * code now. If you change something here, _PLEASE_ update ipv6/reassembly.c |
@@ -70,11 +71,28 @@ struct ipq { | |||
70 | __be32 daddr; | 71 | __be32 daddr; |
71 | __be16 id; | 72 | __be16 id; |
72 | u8 protocol; | 73 | u8 protocol; |
74 | u8 ecn; /* RFC3168 support */ | ||
73 | int iif; | 75 | int iif; |
74 | unsigned int rid; | 76 | unsigned int rid; |
75 | struct inet_peer *peer; | 77 | struct inet_peer *peer; |
76 | }; | 78 | }; |
77 | 79 | ||
80 | #define IPFRAG_ECN_CLEAR 0x01 /* one frag had INET_ECN_NOT_ECT */ | ||
81 | #define IPFRAG_ECN_SET_CE 0x04 /* one frag had INET_ECN_CE */ | ||
82 | |||
83 | static inline u8 ip4_frag_ecn(u8 tos) | ||
84 | { | ||
85 | tos = (tos & INET_ECN_MASK) + 1; | ||
86 | /* | ||
87 | * After the last operation we have (in binary): | ||
88 | * INET_ECN_NOT_ECT => 001 | ||
89 | * INET_ECN_ECT_1 => 010 | ||
90 | * INET_ECN_ECT_0 => 011 | ||
91 | * INET_ECN_CE => 100 | ||
92 | */ | ||
93 | return (tos & 2) ? 0 : tos; | ||
94 | } | ||
95 | |||
78 | static struct inet_frags ip4_frags; | 96 | static struct inet_frags ip4_frags; |
79 | 97 | ||
80 | int ip_frag_nqueues(struct net *net) | 98 | int ip_frag_nqueues(struct net *net) |
@@ -137,11 +155,12 @@ static void ip4_frag_init(struct inet_frag_queue *q, void *a) | |||
137 | 155 | ||
138 | qp->protocol = arg->iph->protocol; | 156 | qp->protocol = arg->iph->protocol; |
139 | qp->id = arg->iph->id; | 157 | qp->id = arg->iph->id; |
158 | qp->ecn = ip4_frag_ecn(arg->iph->tos); | ||
140 | qp->saddr = arg->iph->saddr; | 159 | qp->saddr = arg->iph->saddr; |
141 | qp->daddr = arg->iph->daddr; | 160 | qp->daddr = arg->iph->daddr; |
142 | qp->user = arg->user; | 161 | qp->user = arg->user; |
143 | qp->peer = sysctl_ipfrag_max_dist ? | 162 | qp->peer = sysctl_ipfrag_max_dist ? |
144 | inet_getpeer(arg->iph->saddr, 1) : NULL; | 163 | inet_getpeer_v4(arg->iph->saddr, 1) : NULL; |
145 | } | 164 | } |
146 | 165 | ||
147 | static __inline__ void ip4_frag_free(struct inet_frag_queue *q) | 166 | static __inline__ void ip4_frag_free(struct inet_frag_queue *q) |
@@ -316,6 +335,7 @@ static int ip_frag_reinit(struct ipq *qp) | |||
316 | qp->q.fragments = NULL; | 335 | qp->q.fragments = NULL; |
317 | qp->q.fragments_tail = NULL; | 336 | qp->q.fragments_tail = NULL; |
318 | qp->iif = 0; | 337 | qp->iif = 0; |
338 | qp->ecn = 0; | ||
319 | 339 | ||
320 | return 0; | 340 | return 0; |
321 | } | 341 | } |
@@ -328,6 +348,7 @@ static int ip_frag_queue(struct ipq *qp, struct sk_buff *skb) | |||
328 | int flags, offset; | 348 | int flags, offset; |
329 | int ihl, end; | 349 | int ihl, end; |
330 | int err = -ENOENT; | 350 | int err = -ENOENT; |
351 | u8 ecn; | ||
331 | 352 | ||
332 | if (qp->q.last_in & INET_FRAG_COMPLETE) | 353 | if (qp->q.last_in & INET_FRAG_COMPLETE) |
333 | goto err; | 354 | goto err; |
@@ -339,6 +360,7 @@ static int ip_frag_queue(struct ipq *qp, struct sk_buff *skb) | |||
339 | goto err; | 360 | goto err; |
340 | } | 361 | } |
341 | 362 | ||
363 | ecn = ip4_frag_ecn(ip_hdr(skb)->tos); | ||
342 | offset = ntohs(ip_hdr(skb)->frag_off); | 364 | offset = ntohs(ip_hdr(skb)->frag_off); |
343 | flags = offset & ~IP_OFFSET; | 365 | flags = offset & ~IP_OFFSET; |
344 | offset &= IP_OFFSET; | 366 | offset &= IP_OFFSET; |
@@ -472,6 +494,7 @@ found: | |||
472 | } | 494 | } |
473 | qp->q.stamp = skb->tstamp; | 495 | qp->q.stamp = skb->tstamp; |
474 | qp->q.meat += skb->len; | 496 | qp->q.meat += skb->len; |
497 | qp->ecn |= ecn; | ||
475 | atomic_add(skb->truesize, &qp->q.net->mem); | 498 | atomic_add(skb->truesize, &qp->q.net->mem); |
476 | if (offset == 0) | 499 | if (offset == 0) |
477 | qp->q.last_in |= INET_FRAG_FIRST_IN; | 500 | qp->q.last_in |= INET_FRAG_FIRST_IN; |
@@ -583,6 +606,17 @@ static int ip_frag_reasm(struct ipq *qp, struct sk_buff *prev, | |||
583 | iph = ip_hdr(head); | 606 | iph = ip_hdr(head); |
584 | iph->frag_off = 0; | 607 | iph->frag_off = 0; |
585 | iph->tot_len = htons(len); | 608 | iph->tot_len = htons(len); |
609 | /* RFC3168 5.3 Fragmentation support | ||
610 | * If one fragment had INET_ECN_NOT_ECT, | ||
611 | * reassembled frame also has INET_ECN_NOT_ECT | ||
612 | * Elif one fragment had INET_ECN_CE | ||
613 | * reassembled frame also has INET_ECN_CE | ||
614 | */ | ||
615 | if (qp->ecn & IPFRAG_ECN_CLEAR) | ||
616 | iph->tos &= ~INET_ECN_MASK; | ||
617 | else if (qp->ecn & IPFRAG_ECN_SET_CE) | ||
618 | iph->tos |= INET_ECN_CE; | ||
619 | |||
586 | IP_INC_STATS_BH(net, IPSTATS_MIB_REASMOKS); | 620 | IP_INC_STATS_BH(net, IPSTATS_MIB_REASMOKS); |
587 | qp->q.fragments = NULL; | 621 | qp->q.fragments = NULL; |
588 | qp->q.fragments_tail = NULL; | 622 | qp->q.fragments_tail = NULL; |
diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c index cab2057d5430..eb68a0e34e49 100644 --- a/net/ipv4/ip_gre.c +++ b/net/ipv4/ip_gre.c | |||
@@ -405,11 +405,11 @@ static struct ip_tunnel *ipgre_tunnel_locate(struct net *net, | |||
405 | if (parms->name[0]) | 405 | if (parms->name[0]) |
406 | strlcpy(name, parms->name, IFNAMSIZ); | 406 | strlcpy(name, parms->name, IFNAMSIZ); |
407 | else | 407 | else |
408 | sprintf(name, "gre%%d"); | 408 | strcpy(name, "gre%d"); |
409 | 409 | ||
410 | dev = alloc_netdev(sizeof(*t), name, ipgre_tunnel_setup); | 410 | dev = alloc_netdev(sizeof(*t), name, ipgre_tunnel_setup); |
411 | if (!dev) | 411 | if (!dev) |
412 | return NULL; | 412 | return NULL; |
413 | 413 | ||
414 | dev_net_set(dev, net); | 414 | dev_net_set(dev, net); |
415 | 415 | ||
@@ -772,16 +772,11 @@ static netdev_tx_t ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev | |||
772 | { | 772 | { |
773 | struct flowi fl = { | 773 | struct flowi fl = { |
774 | .oif = tunnel->parms.link, | 774 | .oif = tunnel->parms.link, |
775 | .nl_u = { | 775 | .fl4_dst = dst, |
776 | .ip4_u = { | 776 | .fl4_src = tiph->saddr, |
777 | .daddr = dst, | 777 | .fl4_tos = RT_TOS(tos), |
778 | .saddr = tiph->saddr, | 778 | .fl_gre_key = tunnel->parms.o_key |
779 | .tos = RT_TOS(tos) | 779 | }; |
780 | } | ||
781 | }, | ||
782 | .proto = IPPROTO_GRE | ||
783 | } | ||
784 | ; | ||
785 | if (ip_route_output_key(dev_net(dev), &rt, &fl)) { | 780 | if (ip_route_output_key(dev_net(dev), &rt, &fl)) { |
786 | dev->stats.tx_carrier_errors++; | 781 | dev->stats.tx_carrier_errors++; |
787 | goto tx_error; | 782 | goto tx_error; |
@@ -823,7 +818,7 @@ static netdev_tx_t ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev | |||
823 | !ipv4_is_multicast(tunnel->parms.iph.daddr)) || | 818 | !ipv4_is_multicast(tunnel->parms.iph.daddr)) || |
824 | rt6->rt6i_dst.plen == 128) { | 819 | rt6->rt6i_dst.plen == 128) { |
825 | rt6->rt6i_flags |= RTF_MODIFIED; | 820 | rt6->rt6i_flags |= RTF_MODIFIED; |
826 | skb_dst(skb)->metrics[RTAX_MTU-1] = mtu; | 821 | dst_metric_set(skb_dst(skb), RTAX_MTU, mtu); |
827 | } | 822 | } |
828 | } | 823 | } |
829 | 824 | ||
@@ -895,7 +890,7 @@ static netdev_tx_t ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev | |||
895 | iph->ttl = ((struct ipv6hdr *)old_iph)->hop_limit; | 890 | iph->ttl = ((struct ipv6hdr *)old_iph)->hop_limit; |
896 | #endif | 891 | #endif |
897 | else | 892 | else |
898 | iph->ttl = dst_metric(&rt->dst, RTAX_HOPLIMIT); | 893 | iph->ttl = ip4_dst_hoplimit(&rt->dst); |
899 | } | 894 | } |
900 | 895 | ||
901 | ((__be16 *)(iph + 1))[0] = tunnel->parms.o_flags; | 896 | ((__be16 *)(iph + 1))[0] = tunnel->parms.o_flags; |
@@ -951,14 +946,11 @@ static int ipgre_tunnel_bind_dev(struct net_device *dev) | |||
951 | if (iph->daddr) { | 946 | if (iph->daddr) { |
952 | struct flowi fl = { | 947 | struct flowi fl = { |
953 | .oif = tunnel->parms.link, | 948 | .oif = tunnel->parms.link, |
954 | .nl_u = { | 949 | .fl4_dst = iph->daddr, |
955 | .ip4_u = { | 950 | .fl4_src = iph->saddr, |
956 | .daddr = iph->daddr, | 951 | .fl4_tos = RT_TOS(iph->tos), |
957 | .saddr = iph->saddr, | 952 | .proto = IPPROTO_GRE, |
958 | .tos = RT_TOS(iph->tos) | 953 | .fl_gre_key = tunnel->parms.o_key |
959 | } | ||
960 | }, | ||
961 | .proto = IPPROTO_GRE | ||
962 | }; | 954 | }; |
963 | struct rtable *rt; | 955 | struct rtable *rt; |
964 | 956 | ||
@@ -1216,14 +1208,11 @@ static int ipgre_open(struct net_device *dev) | |||
1216 | if (ipv4_is_multicast(t->parms.iph.daddr)) { | 1208 | if (ipv4_is_multicast(t->parms.iph.daddr)) { |
1217 | struct flowi fl = { | 1209 | struct flowi fl = { |
1218 | .oif = t->parms.link, | 1210 | .oif = t->parms.link, |
1219 | .nl_u = { | 1211 | .fl4_dst = t->parms.iph.daddr, |
1220 | .ip4_u = { | 1212 | .fl4_src = t->parms.iph.saddr, |
1221 | .daddr = t->parms.iph.daddr, | 1213 | .fl4_tos = RT_TOS(t->parms.iph.tos), |
1222 | .saddr = t->parms.iph.saddr, | 1214 | .proto = IPPROTO_GRE, |
1223 | .tos = RT_TOS(t->parms.iph.tos) | 1215 | .fl_gre_key = t->parms.o_key |
1224 | } | ||
1225 | }, | ||
1226 | .proto = IPPROTO_GRE | ||
1227 | }; | 1216 | }; |
1228 | struct rtable *rt; | 1217 | struct rtable *rt; |
1229 | 1218 | ||
@@ -1775,3 +1764,4 @@ module_exit(ipgre_fini); | |||
1775 | MODULE_LICENSE("GPL"); | 1764 | MODULE_LICENSE("GPL"); |
1776 | MODULE_ALIAS_RTNL_LINK("gre"); | 1765 | MODULE_ALIAS_RTNL_LINK("gre"); |
1777 | MODULE_ALIAS_RTNL_LINK("gretap"); | 1766 | MODULE_ALIAS_RTNL_LINK("gretap"); |
1767 | MODULE_ALIAS("gre0"); | ||
diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index 439d2a34ee44..04c7b3ba6b39 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c | |||
@@ -82,6 +82,7 @@ | |||
82 | #include <linux/tcp.h> | 82 | #include <linux/tcp.h> |
83 | 83 | ||
84 | int sysctl_ip_default_ttl __read_mostly = IPDEFTTL; | 84 | int sysctl_ip_default_ttl __read_mostly = IPDEFTTL; |
85 | EXPORT_SYMBOL(sysctl_ip_default_ttl); | ||
85 | 86 | ||
86 | /* Generate a checksum for an outgoing IP datagram. */ | 87 | /* Generate a checksum for an outgoing IP datagram. */ |
87 | __inline__ void ip_send_check(struct iphdr *iph) | 88 | __inline__ void ip_send_check(struct iphdr *iph) |
@@ -130,7 +131,7 @@ static inline int ip_select_ttl(struct inet_sock *inet, struct dst_entry *dst) | |||
130 | int ttl = inet->uc_ttl; | 131 | int ttl = inet->uc_ttl; |
131 | 132 | ||
132 | if (ttl < 0) | 133 | if (ttl < 0) |
133 | ttl = dst_metric(dst, RTAX_HOPLIMIT); | 134 | ttl = ip4_dst_hoplimit(dst); |
134 | return ttl; | 135 | return ttl; |
135 | } | 136 | } |
136 | 137 | ||
@@ -341,15 +342,13 @@ int ip_queue_xmit(struct sk_buff *skb) | |||
341 | { | 342 | { |
342 | struct flowi fl = { .oif = sk->sk_bound_dev_if, | 343 | struct flowi fl = { .oif = sk->sk_bound_dev_if, |
343 | .mark = sk->sk_mark, | 344 | .mark = sk->sk_mark, |
344 | .nl_u = { .ip4_u = | 345 | .fl4_dst = daddr, |
345 | { .daddr = daddr, | 346 | .fl4_src = inet->inet_saddr, |
346 | .saddr = inet->inet_saddr, | 347 | .fl4_tos = RT_CONN_FLAGS(sk), |
347 | .tos = RT_CONN_FLAGS(sk) } }, | ||
348 | .proto = sk->sk_protocol, | 348 | .proto = sk->sk_protocol, |
349 | .flags = inet_sk_flowi_flags(sk), | 349 | .flags = inet_sk_flowi_flags(sk), |
350 | .uli_u = { .ports = | 350 | .fl_ip_sport = inet->inet_sport, |
351 | { .sport = inet->inet_sport, | 351 | .fl_ip_dport = inet->inet_dport }; |
352 | .dport = inet->inet_dport } } }; | ||
353 | 352 | ||
354 | /* If this fails, retransmit mechanism of transport layer will | 353 | /* If this fails, retransmit mechanism of transport layer will |
355 | * keep trying until route appears or the connection times | 354 | * keep trying until route appears or the connection times |
@@ -1404,14 +1403,11 @@ void ip_send_reply(struct sock *sk, struct sk_buff *skb, struct ip_reply_arg *ar | |||
1404 | 1403 | ||
1405 | { | 1404 | { |
1406 | struct flowi fl = { .oif = arg->bound_dev_if, | 1405 | struct flowi fl = { .oif = arg->bound_dev_if, |
1407 | .nl_u = { .ip4_u = | 1406 | .fl4_dst = daddr, |
1408 | { .daddr = daddr, | 1407 | .fl4_src = rt->rt_spec_dst, |
1409 | .saddr = rt->rt_spec_dst, | 1408 | .fl4_tos = RT_TOS(ip_hdr(skb)->tos), |
1410 | .tos = RT_TOS(ip_hdr(skb)->tos) } }, | 1409 | .fl_ip_sport = tcp_hdr(skb)->dest, |
1411 | /* Not quite clean, but right. */ | 1410 | .fl_ip_dport = tcp_hdr(skb)->source, |
1412 | .uli_u = { .ports = | ||
1413 | { .sport = tcp_hdr(skb)->dest, | ||
1414 | .dport = tcp_hdr(skb)->source } }, | ||
1415 | .proto = sk->sk_protocol, | 1411 | .proto = sk->sk_protocol, |
1416 | .flags = ip_reply_arg_flowi_flags(arg) }; | 1412 | .flags = ip_reply_arg_flowi_flags(arg) }; |
1417 | security_skb_classify_flow(skb, &fl); | 1413 | security_skb_classify_flow(skb, &fl); |
diff --git a/net/ipv4/ipconfig.c b/net/ipv4/ipconfig.c index 3a6e1ec5e9ae..2b097752426b 100644 --- a/net/ipv4/ipconfig.c +++ b/net/ipv4/ipconfig.c | |||
@@ -1191,13 +1191,13 @@ static int __init ic_dynamic(void) | |||
1191 | (ic_proto_enabled & IC_USE_DHCP) && | 1191 | (ic_proto_enabled & IC_USE_DHCP) && |
1192 | ic_dhcp_msgtype != DHCPACK) { | 1192 | ic_dhcp_msgtype != DHCPACK) { |
1193 | ic_got_reply = 0; | 1193 | ic_got_reply = 0; |
1194 | printk(","); | 1194 | printk(KERN_CONT ","); |
1195 | continue; | 1195 | continue; |
1196 | } | 1196 | } |
1197 | #endif /* IPCONFIG_DHCP */ | 1197 | #endif /* IPCONFIG_DHCP */ |
1198 | 1198 | ||
1199 | if (ic_got_reply) { | 1199 | if (ic_got_reply) { |
1200 | printk(" OK\n"); | 1200 | printk(KERN_CONT " OK\n"); |
1201 | break; | 1201 | break; |
1202 | } | 1202 | } |
1203 | 1203 | ||
@@ -1205,7 +1205,7 @@ static int __init ic_dynamic(void) | |||
1205 | continue; | 1205 | continue; |
1206 | 1206 | ||
1207 | if (! --retries) { | 1207 | if (! --retries) { |
1208 | printk(" timed out!\n"); | 1208 | printk(KERN_CONT " timed out!\n"); |
1209 | break; | 1209 | break; |
1210 | } | 1210 | } |
1211 | 1211 | ||
@@ -1215,7 +1215,7 @@ static int __init ic_dynamic(void) | |||
1215 | if (timeout > CONF_TIMEOUT_MAX) | 1215 | if (timeout > CONF_TIMEOUT_MAX) |
1216 | timeout = CONF_TIMEOUT_MAX; | 1216 | timeout = CONF_TIMEOUT_MAX; |
1217 | 1217 | ||
1218 | printk("."); | 1218 | printk(KERN_CONT "."); |
1219 | } | 1219 | } |
1220 | 1220 | ||
1221 | #ifdef IPCONFIG_BOOTP | 1221 | #ifdef IPCONFIG_BOOTP |
@@ -1236,7 +1236,7 @@ static int __init ic_dynamic(void) | |||
1236 | ((ic_got_reply & IC_RARP) ? "RARP" | 1236 | ((ic_got_reply & IC_RARP) ? "RARP" |
1237 | : (ic_proto_enabled & IC_USE_DHCP) ? "DHCP" : "BOOTP"), | 1237 | : (ic_proto_enabled & IC_USE_DHCP) ? "DHCP" : "BOOTP"), |
1238 | &ic_servaddr); | 1238 | &ic_servaddr); |
1239 | printk("my address is %pI4\n", &ic_myaddr); | 1239 | printk(KERN_CONT "my address is %pI4\n", &ic_myaddr); |
1240 | 1240 | ||
1241 | return 0; | 1241 | return 0; |
1242 | } | 1242 | } |
@@ -1468,19 +1468,19 @@ static int __init ip_auto_config(void) | |||
1468 | /* | 1468 | /* |
1469 | * Clue in the operator. | 1469 | * Clue in the operator. |
1470 | */ | 1470 | */ |
1471 | printk("IP-Config: Complete:"); | 1471 | printk("IP-Config: Complete:\n"); |
1472 | printk("\n device=%s", ic_dev->name); | 1472 | printk(" device=%s", ic_dev->name); |
1473 | printk(", addr=%pI4", &ic_myaddr); | 1473 | printk(KERN_CONT ", addr=%pI4", &ic_myaddr); |
1474 | printk(", mask=%pI4", &ic_netmask); | 1474 | printk(KERN_CONT ", mask=%pI4", &ic_netmask); |
1475 | printk(", gw=%pI4", &ic_gateway); | 1475 | printk(KERN_CONT ", gw=%pI4", &ic_gateway); |
1476 | printk(",\n host=%s, domain=%s, nis-domain=%s", | 1476 | printk(KERN_CONT ",\n host=%s, domain=%s, nis-domain=%s", |
1477 | utsname()->nodename, ic_domain, utsname()->domainname); | 1477 | utsname()->nodename, ic_domain, utsname()->domainname); |
1478 | printk(",\n bootserver=%pI4", &ic_servaddr); | 1478 | printk(KERN_CONT ",\n bootserver=%pI4", &ic_servaddr); |
1479 | printk(", rootserver=%pI4", &root_server_addr); | 1479 | printk(KERN_CONT ", rootserver=%pI4", &root_server_addr); |
1480 | printk(", rootpath=%s", root_server_path); | 1480 | printk(KERN_CONT ", rootpath=%s", root_server_path); |
1481 | if (ic_dev_mtu) | 1481 | if (ic_dev_mtu) |
1482 | printk(", mtu=%d", ic_dev_mtu); | 1482 | printk(KERN_CONT ", mtu=%d", ic_dev_mtu); |
1483 | printk("\n"); | 1483 | printk(KERN_CONT "\n"); |
1484 | #endif /* !SILENT */ | 1484 | #endif /* !SILENT */ |
1485 | 1485 | ||
1486 | return 0; | 1486 | return 0; |
diff --git a/net/ipv4/ipip.c b/net/ipv4/ipip.c index cd300aaee78f..988f52fba54a 100644 --- a/net/ipv4/ipip.c +++ b/net/ipv4/ipip.c | |||
@@ -463,13 +463,9 @@ static netdev_tx_t ipip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev) | |||
463 | { | 463 | { |
464 | struct flowi fl = { | 464 | struct flowi fl = { |
465 | .oif = tunnel->parms.link, | 465 | .oif = tunnel->parms.link, |
466 | .nl_u = { | 466 | .fl4_dst = dst, |
467 | .ip4_u = { | 467 | .fl4_src= tiph->saddr, |
468 | .daddr = dst, | 468 | .fl4_tos = RT_TOS(tos), |
469 | .saddr = tiph->saddr, | ||
470 | .tos = RT_TOS(tos) | ||
471 | } | ||
472 | }, | ||
473 | .proto = IPPROTO_IPIP | 469 | .proto = IPPROTO_IPIP |
474 | }; | 470 | }; |
475 | 471 | ||
@@ -589,13 +585,9 @@ static void ipip_tunnel_bind_dev(struct net_device *dev) | |||
589 | if (iph->daddr) { | 585 | if (iph->daddr) { |
590 | struct flowi fl = { | 586 | struct flowi fl = { |
591 | .oif = tunnel->parms.link, | 587 | .oif = tunnel->parms.link, |
592 | .nl_u = { | 588 | .fl4_dst = iph->daddr, |
593 | .ip4_u = { | 589 | .fl4_src = iph->saddr, |
594 | .daddr = iph->daddr, | 590 | .fl4_tos = RT_TOS(iph->tos), |
595 | .saddr = iph->saddr, | ||
596 | .tos = RT_TOS(iph->tos) | ||
597 | } | ||
598 | }, | ||
599 | .proto = IPPROTO_IPIP | 591 | .proto = IPPROTO_IPIP |
600 | }; | 592 | }; |
601 | struct rtable *rt; | 593 | struct rtable *rt; |
@@ -921,3 +913,4 @@ static void __exit ipip_fini(void) | |||
921 | module_init(ipip_init); | 913 | module_init(ipip_init); |
922 | module_exit(ipip_fini); | 914 | module_exit(ipip_fini); |
923 | MODULE_LICENSE("GPL"); | 915 | MODULE_LICENSE("GPL"); |
916 | MODULE_ALIAS("tunl0"); | ||
diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c index ef2b0089e0ea..3f3a9afd73e0 100644 --- a/net/ipv4/ipmr.c +++ b/net/ipv4/ipmr.c | |||
@@ -1537,13 +1537,9 @@ static void ipmr_queue_xmit(struct net *net, struct mr_table *mrt, | |||
1537 | if (vif->flags & VIFF_TUNNEL) { | 1537 | if (vif->flags & VIFF_TUNNEL) { |
1538 | struct flowi fl = { | 1538 | struct flowi fl = { |
1539 | .oif = vif->link, | 1539 | .oif = vif->link, |
1540 | .nl_u = { | 1540 | .fl4_dst = vif->remote, |
1541 | .ip4_u = { | 1541 | .fl4_src = vif->local, |
1542 | .daddr = vif->remote, | 1542 | .fl4_tos = RT_TOS(iph->tos), |
1543 | .saddr = vif->local, | ||
1544 | .tos = RT_TOS(iph->tos) | ||
1545 | } | ||
1546 | }, | ||
1547 | .proto = IPPROTO_IPIP | 1543 | .proto = IPPROTO_IPIP |
1548 | }; | 1544 | }; |
1549 | 1545 | ||
@@ -1553,12 +1549,8 @@ static void ipmr_queue_xmit(struct net *net, struct mr_table *mrt, | |||
1553 | } else { | 1549 | } else { |
1554 | struct flowi fl = { | 1550 | struct flowi fl = { |
1555 | .oif = vif->link, | 1551 | .oif = vif->link, |
1556 | .nl_u = { | 1552 | .fl4_dst = iph->daddr, |
1557 | .ip4_u = { | 1553 | .fl4_tos = RT_TOS(iph->tos), |
1558 | .daddr = iph->daddr, | ||
1559 | .tos = RT_TOS(iph->tos) | ||
1560 | } | ||
1561 | }, | ||
1562 | .proto = IPPROTO_IPIP | 1554 | .proto = IPPROTO_IPIP |
1563 | }; | 1555 | }; |
1564 | 1556 | ||
diff --git a/net/ipv4/netfilter.c b/net/ipv4/netfilter.c index d88a46c54fd1..994a1f29ebbc 100644 --- a/net/ipv4/netfilter.c +++ b/net/ipv4/netfilter.c | |||
@@ -31,10 +31,10 @@ int ip_route_me_harder(struct sk_buff *skb, unsigned addr_type) | |||
31 | * packets with foreign saddr to appear on the NF_INET_LOCAL_OUT hook. | 31 | * packets with foreign saddr to appear on the NF_INET_LOCAL_OUT hook. |
32 | */ | 32 | */ |
33 | if (addr_type == RTN_LOCAL) { | 33 | if (addr_type == RTN_LOCAL) { |
34 | fl.nl_u.ip4_u.daddr = iph->daddr; | 34 | fl.fl4_dst = iph->daddr; |
35 | if (type == RTN_LOCAL) | 35 | if (type == RTN_LOCAL) |
36 | fl.nl_u.ip4_u.saddr = iph->saddr; | 36 | fl.fl4_src = iph->saddr; |
37 | fl.nl_u.ip4_u.tos = RT_TOS(iph->tos); | 37 | fl.fl4_tos = RT_TOS(iph->tos); |
38 | fl.oif = skb->sk ? skb->sk->sk_bound_dev_if : 0; | 38 | fl.oif = skb->sk ? skb->sk->sk_bound_dev_if : 0; |
39 | fl.mark = skb->mark; | 39 | fl.mark = skb->mark; |
40 | fl.flags = skb->sk ? inet_sk_flowi_flags(skb->sk) : 0; | 40 | fl.flags = skb->sk ? inet_sk_flowi_flags(skb->sk) : 0; |
@@ -47,7 +47,7 @@ int ip_route_me_harder(struct sk_buff *skb, unsigned addr_type) | |||
47 | } else { | 47 | } else { |
48 | /* non-local src, find valid iif to satisfy | 48 | /* non-local src, find valid iif to satisfy |
49 | * rp-filter when calling ip_route_input. */ | 49 | * rp-filter when calling ip_route_input. */ |
50 | fl.nl_u.ip4_u.daddr = iph->saddr; | 50 | fl.fl4_dst = iph->saddr; |
51 | if (ip_route_output_key(net, &rt, &fl) != 0) | 51 | if (ip_route_output_key(net, &rt, &fl) != 0) |
52 | return -1; | 52 | return -1; |
53 | 53 | ||
diff --git a/net/ipv4/netfilter/Makefile b/net/ipv4/netfilter/Makefile index 48111594ee9b..19eb59d01037 100644 --- a/net/ipv4/netfilter/Makefile +++ b/net/ipv4/netfilter/Makefile | |||
@@ -3,15 +3,15 @@ | |||
3 | # | 3 | # |
4 | 4 | ||
5 | # objects for l3 independent conntrack | 5 | # objects for l3 independent conntrack |
6 | nf_conntrack_ipv4-objs := nf_conntrack_l3proto_ipv4.o nf_conntrack_proto_icmp.o | 6 | nf_conntrack_ipv4-y := nf_conntrack_l3proto_ipv4.o nf_conntrack_proto_icmp.o |
7 | ifeq ($(CONFIG_NF_CONNTRACK_PROC_COMPAT),y) | 7 | ifeq ($(CONFIG_NF_CONNTRACK_PROC_COMPAT),y) |
8 | ifeq ($(CONFIG_PROC_FS),y) | 8 | ifeq ($(CONFIG_PROC_FS),y) |
9 | nf_conntrack_ipv4-objs += nf_conntrack_l3proto_ipv4_compat.o | 9 | nf_conntrack_ipv4-objs += nf_conntrack_l3proto_ipv4_compat.o |
10 | endif | 10 | endif |
11 | endif | 11 | endif |
12 | 12 | ||
13 | nf_nat-objs := nf_nat_core.o nf_nat_helper.o nf_nat_proto_unknown.o nf_nat_proto_common.o nf_nat_proto_tcp.o nf_nat_proto_udp.o nf_nat_proto_icmp.o | 13 | nf_nat-y := nf_nat_core.o nf_nat_helper.o nf_nat_proto_unknown.o nf_nat_proto_common.o nf_nat_proto_tcp.o nf_nat_proto_udp.o nf_nat_proto_icmp.o |
14 | iptable_nat-objs := nf_nat_rule.o nf_nat_standalone.o | 14 | iptable_nat-y := nf_nat_rule.o nf_nat_standalone.o |
15 | 15 | ||
16 | # connection tracking | 16 | # connection tracking |
17 | obj-$(CONFIG_NF_CONNTRACK_IPV4) += nf_conntrack_ipv4.o | 17 | obj-$(CONFIG_NF_CONNTRACK_IPV4) += nf_conntrack_ipv4.o |
diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c index 3fac340a28d5..47e5178b998b 100644 --- a/net/ipv4/netfilter/arp_tables.c +++ b/net/ipv4/netfilter/arp_tables.c | |||
@@ -883,6 +883,7 @@ static int compat_table_info(const struct xt_table_info *info, | |||
883 | memcpy(newinfo, info, offsetof(struct xt_table_info, entries)); | 883 | memcpy(newinfo, info, offsetof(struct xt_table_info, entries)); |
884 | newinfo->initial_entries = 0; | 884 | newinfo->initial_entries = 0; |
885 | loc_cpu_entry = info->entries[raw_smp_processor_id()]; | 885 | loc_cpu_entry = info->entries[raw_smp_processor_id()]; |
886 | xt_compat_init_offsets(NFPROTO_ARP, info->number); | ||
886 | xt_entry_foreach(iter, loc_cpu_entry, info->size) { | 887 | xt_entry_foreach(iter, loc_cpu_entry, info->size) { |
887 | ret = compat_calc_entry(iter, info, loc_cpu_entry, newinfo); | 888 | ret = compat_calc_entry(iter, info, loc_cpu_entry, newinfo); |
888 | if (ret != 0) | 889 | if (ret != 0) |
@@ -1350,6 +1351,7 @@ static int translate_compat_table(const char *name, | |||
1350 | duprintf("translate_compat_table: size %u\n", info->size); | 1351 | duprintf("translate_compat_table: size %u\n", info->size); |
1351 | j = 0; | 1352 | j = 0; |
1352 | xt_compat_lock(NFPROTO_ARP); | 1353 | xt_compat_lock(NFPROTO_ARP); |
1354 | xt_compat_init_offsets(NFPROTO_ARP, number); | ||
1353 | /* Walk through entries, checking offsets. */ | 1355 | /* Walk through entries, checking offsets. */ |
1354 | xt_entry_foreach(iter0, entry0, total_size) { | 1356 | xt_entry_foreach(iter0, entry0, total_size) { |
1355 | ret = check_compat_entry_size_and_hooks(iter0, info, &size, | 1357 | ret = check_compat_entry_size_and_hooks(iter0, info, &size, |
diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c index a846d633b3b6..c5a75d70970f 100644 --- a/net/ipv4/netfilter/ip_tables.c +++ b/net/ipv4/netfilter/ip_tables.c | |||
@@ -1080,6 +1080,7 @@ static int compat_table_info(const struct xt_table_info *info, | |||
1080 | memcpy(newinfo, info, offsetof(struct xt_table_info, entries)); | 1080 | memcpy(newinfo, info, offsetof(struct xt_table_info, entries)); |
1081 | newinfo->initial_entries = 0; | 1081 | newinfo->initial_entries = 0; |
1082 | loc_cpu_entry = info->entries[raw_smp_processor_id()]; | 1082 | loc_cpu_entry = info->entries[raw_smp_processor_id()]; |
1083 | xt_compat_init_offsets(AF_INET, info->number); | ||
1083 | xt_entry_foreach(iter, loc_cpu_entry, info->size) { | 1084 | xt_entry_foreach(iter, loc_cpu_entry, info->size) { |
1084 | ret = compat_calc_entry(iter, info, loc_cpu_entry, newinfo); | 1085 | ret = compat_calc_entry(iter, info, loc_cpu_entry, newinfo); |
1085 | if (ret != 0) | 1086 | if (ret != 0) |
@@ -1681,6 +1682,7 @@ translate_compat_table(struct net *net, | |||
1681 | duprintf("translate_compat_table: size %u\n", info->size); | 1682 | duprintf("translate_compat_table: size %u\n", info->size); |
1682 | j = 0; | 1683 | j = 0; |
1683 | xt_compat_lock(AF_INET); | 1684 | xt_compat_lock(AF_INET); |
1685 | xt_compat_init_offsets(AF_INET, number); | ||
1684 | /* Walk through entries, checking offsets. */ | 1686 | /* Walk through entries, checking offsets. */ |
1685 | xt_entry_foreach(iter0, entry0, total_size) { | 1687 | xt_entry_foreach(iter0, entry0, total_size) { |
1686 | ret = check_compat_entry_size_and_hooks(iter0, info, &size, | 1688 | ret = check_compat_entry_size_and_hooks(iter0, info, &size, |
diff --git a/net/ipv4/netfilter/ipt_REJECT.c b/net/ipv4/netfilter/ipt_REJECT.c index 43eec80c0e7c..1ff79e557f96 100644 --- a/net/ipv4/netfilter/ipt_REJECT.c +++ b/net/ipv4/netfilter/ipt_REJECT.c | |||
@@ -116,7 +116,7 @@ static void send_reset(struct sk_buff *oldskb, int hook) | |||
116 | if (ip_route_me_harder(nskb, addr_type)) | 116 | if (ip_route_me_harder(nskb, addr_type)) |
117 | goto free_nskb; | 117 | goto free_nskb; |
118 | 118 | ||
119 | niph->ttl = dst_metric(skb_dst(nskb), RTAX_HOPLIMIT); | 119 | niph->ttl = ip4_dst_hoplimit(skb_dst(nskb)); |
120 | 120 | ||
121 | /* "Never happens" */ | 121 | /* "Never happens" */ |
122 | if (nskb->len > dst_mtu(skb_dst(nskb))) | 122 | if (nskb->len > dst_mtu(skb_dst(nskb))) |
diff --git a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c index ab9c05c9734e..5585980fce2e 100644 --- a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c +++ b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c | |||
@@ -100,7 +100,7 @@ static int ct_show_secctx(struct seq_file *s, const struct nf_conn *ct) | |||
100 | 100 | ||
101 | ret = security_secid_to_secctx(ct->secmark, &secctx, &len); | 101 | ret = security_secid_to_secctx(ct->secmark, &secctx, &len); |
102 | if (ret) | 102 | if (ret) |
103 | return ret; | 103 | return 0; |
104 | 104 | ||
105 | ret = seq_printf(s, "secctx=%s ", secctx); | 105 | ret = seq_printf(s, "secctx=%s ", secctx); |
106 | 106 | ||
diff --git a/net/ipv4/proc.c b/net/ipv4/proc.c index 4ae1f203f7cb..b14ec7d03b6e 100644 --- a/net/ipv4/proc.c +++ b/net/ipv4/proc.c | |||
@@ -59,13 +59,13 @@ static int sockstat_seq_show(struct seq_file *seq, void *v) | |||
59 | local_bh_enable(); | 59 | local_bh_enable(); |
60 | 60 | ||
61 | socket_seq_show(seq); | 61 | socket_seq_show(seq); |
62 | seq_printf(seq, "TCP: inuse %d orphan %d tw %d alloc %d mem %d\n", | 62 | seq_printf(seq, "TCP: inuse %d orphan %d tw %d alloc %d mem %ld\n", |
63 | sock_prot_inuse_get(net, &tcp_prot), orphans, | 63 | sock_prot_inuse_get(net, &tcp_prot), orphans, |
64 | tcp_death_row.tw_count, sockets, | 64 | tcp_death_row.tw_count, sockets, |
65 | atomic_read(&tcp_memory_allocated)); | 65 | atomic_long_read(&tcp_memory_allocated)); |
66 | seq_printf(seq, "UDP: inuse %d mem %d\n", | 66 | seq_printf(seq, "UDP: inuse %d mem %ld\n", |
67 | sock_prot_inuse_get(net, &udp_prot), | 67 | sock_prot_inuse_get(net, &udp_prot), |
68 | atomic_read(&udp_memory_allocated)); | 68 | atomic_long_read(&udp_memory_allocated)); |
69 | seq_printf(seq, "UDPLITE: inuse %d\n", | 69 | seq_printf(seq, "UDPLITE: inuse %d\n", |
70 | sock_prot_inuse_get(net, &udplite_prot)); | 70 | sock_prot_inuse_get(net, &udplite_prot)); |
71 | seq_printf(seq, "RAW: inuse %d\n", | 71 | seq_printf(seq, "RAW: inuse %d\n", |
@@ -253,6 +253,7 @@ static const struct snmp_mib snmp4_net_list[] = { | |||
253 | SNMP_MIB_ITEM("TCPMinTTLDrop", LINUX_MIB_TCPMINTTLDROP), | 253 | SNMP_MIB_ITEM("TCPMinTTLDrop", LINUX_MIB_TCPMINTTLDROP), |
254 | SNMP_MIB_ITEM("TCPDeferAcceptDrop", LINUX_MIB_TCPDEFERACCEPTDROP), | 254 | SNMP_MIB_ITEM("TCPDeferAcceptDrop", LINUX_MIB_TCPDEFERACCEPTDROP), |
255 | SNMP_MIB_ITEM("IPReversePathFilter", LINUX_MIB_IPRPFILTER), | 255 | SNMP_MIB_ITEM("IPReversePathFilter", LINUX_MIB_IPRPFILTER), |
256 | SNMP_MIB_ITEM("TCPTimeWaitOverflow", LINUX_MIB_TCPTIMEWAITOVERFLOW), | ||
256 | SNMP_MIB_SENTINEL | 257 | SNMP_MIB_SENTINEL |
257 | }; | 258 | }; |
258 | 259 | ||
diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c index 1f85ef289895..a3d5ab786e81 100644 --- a/net/ipv4/raw.c +++ b/net/ipv4/raw.c | |||
@@ -549,10 +549,9 @@ static int raw_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, | |||
549 | { | 549 | { |
550 | struct flowi fl = { .oif = ipc.oif, | 550 | struct flowi fl = { .oif = ipc.oif, |
551 | .mark = sk->sk_mark, | 551 | .mark = sk->sk_mark, |
552 | .nl_u = { .ip4_u = | 552 | .fl4_dst = daddr, |
553 | { .daddr = daddr, | 553 | .fl4_src = saddr, |
554 | .saddr = saddr, | 554 | .fl4_tos = tos, |
555 | .tos = tos } }, | ||
556 | .proto = inet->hdrincl ? IPPROTO_RAW : | 555 | .proto = inet->hdrincl ? IPPROTO_RAW : |
557 | sk->sk_protocol, | 556 | sk->sk_protocol, |
558 | }; | 557 | }; |
diff --git a/net/ipv4/route.c b/net/ipv4/route.c index f70ae1bccb8a..3e5b7cc2db4f 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c | |||
@@ -139,6 +139,8 @@ static unsigned long expires_ljiffies; | |||
139 | */ | 139 | */ |
140 | 140 | ||
141 | static struct dst_entry *ipv4_dst_check(struct dst_entry *dst, u32 cookie); | 141 | static struct dst_entry *ipv4_dst_check(struct dst_entry *dst, u32 cookie); |
142 | static unsigned int ipv4_default_advmss(const struct dst_entry *dst); | ||
143 | static unsigned int ipv4_default_mtu(const struct dst_entry *dst); | ||
142 | static void ipv4_dst_destroy(struct dst_entry *dst); | 144 | static void ipv4_dst_destroy(struct dst_entry *dst); |
143 | static struct dst_entry *ipv4_negative_advice(struct dst_entry *dst); | 145 | static struct dst_entry *ipv4_negative_advice(struct dst_entry *dst); |
144 | static void ipv4_link_failure(struct sk_buff *skb); | 146 | static void ipv4_link_failure(struct sk_buff *skb); |
@@ -155,6 +157,8 @@ static struct dst_ops ipv4_dst_ops = { | |||
155 | .protocol = cpu_to_be16(ETH_P_IP), | 157 | .protocol = cpu_to_be16(ETH_P_IP), |
156 | .gc = rt_garbage_collect, | 158 | .gc = rt_garbage_collect, |
157 | .check = ipv4_dst_check, | 159 | .check = ipv4_dst_check, |
160 | .default_advmss = ipv4_default_advmss, | ||
161 | .default_mtu = ipv4_default_mtu, | ||
158 | .destroy = ipv4_dst_destroy, | 162 | .destroy = ipv4_dst_destroy, |
159 | .ifdown = ipv4_dst_ifdown, | 163 | .ifdown = ipv4_dst_ifdown, |
160 | .negative_advice = ipv4_negative_advice, | 164 | .negative_advice = ipv4_negative_advice, |
@@ -383,8 +387,7 @@ static int rt_cache_seq_show(struct seq_file *seq, void *v) | |||
383 | (__force u32)r->rt_gateway, | 387 | (__force u32)r->rt_gateway, |
384 | r->rt_flags, atomic_read(&r->dst.__refcnt), | 388 | r->rt_flags, atomic_read(&r->dst.__refcnt), |
385 | r->dst.__use, 0, (__force u32)r->rt_src, | 389 | r->dst.__use, 0, (__force u32)r->rt_src, |
386 | (dst_metric(&r->dst, RTAX_ADVMSS) ? | 390 | dst_metric_advmss(&r->dst) + 40, |
387 | (int)dst_metric(&r->dst, RTAX_ADVMSS) + 40 : 0), | ||
388 | dst_metric(&r->dst, RTAX_WINDOW), | 391 | dst_metric(&r->dst, RTAX_WINDOW), |
389 | (int)((dst_metric(&r->dst, RTAX_RTT) >> 3) + | 392 | (int)((dst_metric(&r->dst, RTAX_RTT) >> 3) + |
390 | dst_metric(&r->dst, RTAX_RTTVAR)), | 393 | dst_metric(&r->dst, RTAX_RTTVAR)), |
@@ -684,17 +687,17 @@ static inline bool rt_caching(const struct net *net) | |||
684 | static inline bool compare_hash_inputs(const struct flowi *fl1, | 687 | static inline bool compare_hash_inputs(const struct flowi *fl1, |
685 | const struct flowi *fl2) | 688 | const struct flowi *fl2) |
686 | { | 689 | { |
687 | return ((((__force u32)fl1->nl_u.ip4_u.daddr ^ (__force u32)fl2->nl_u.ip4_u.daddr) | | 690 | return ((((__force u32)fl1->fl4_dst ^ (__force u32)fl2->fl4_dst) | |
688 | ((__force u32)fl1->nl_u.ip4_u.saddr ^ (__force u32)fl2->nl_u.ip4_u.saddr) | | 691 | ((__force u32)fl1->fl4_src ^ (__force u32)fl2->fl4_src) | |
689 | (fl1->iif ^ fl2->iif)) == 0); | 692 | (fl1->iif ^ fl2->iif)) == 0); |
690 | } | 693 | } |
691 | 694 | ||
692 | static inline int compare_keys(struct flowi *fl1, struct flowi *fl2) | 695 | static inline int compare_keys(struct flowi *fl1, struct flowi *fl2) |
693 | { | 696 | { |
694 | return (((__force u32)fl1->nl_u.ip4_u.daddr ^ (__force u32)fl2->nl_u.ip4_u.daddr) | | 697 | return (((__force u32)fl1->fl4_dst ^ (__force u32)fl2->fl4_dst) | |
695 | ((__force u32)fl1->nl_u.ip4_u.saddr ^ (__force u32)fl2->nl_u.ip4_u.saddr) | | 698 | ((__force u32)fl1->fl4_src ^ (__force u32)fl2->fl4_src) | |
696 | (fl1->mark ^ fl2->mark) | | 699 | (fl1->mark ^ fl2->mark) | |
697 | (*(u16 *)&fl1->nl_u.ip4_u.tos ^ *(u16 *)&fl2->nl_u.ip4_u.tos) | | 700 | (*(u16 *)&fl1->fl4_tos ^ *(u16 *)&fl2->fl4_tos) | |
698 | (fl1->oif ^ fl2->oif) | | 701 | (fl1->oif ^ fl2->oif) | |
699 | (fl1->iif ^ fl2->iif)) == 0; | 702 | (fl1->iif ^ fl2->iif)) == 0; |
700 | } | 703 | } |
@@ -714,13 +717,15 @@ static inline int rt_is_expired(struct rtable *rth) | |||
714 | * Can be called by a softirq or a process. | 717 | * Can be called by a softirq or a process. |
715 | * In the later case, we want to be reschedule if necessary | 718 | * In the later case, we want to be reschedule if necessary |
716 | */ | 719 | */ |
717 | static void rt_do_flush(int process_context) | 720 | static void rt_do_flush(struct net *net, int process_context) |
718 | { | 721 | { |
719 | unsigned int i; | 722 | unsigned int i; |
720 | struct rtable *rth, *next; | 723 | struct rtable *rth, *next; |
721 | struct rtable * tail; | ||
722 | 724 | ||
723 | for (i = 0; i <= rt_hash_mask; i++) { | 725 | for (i = 0; i <= rt_hash_mask; i++) { |
726 | struct rtable __rcu **pprev; | ||
727 | struct rtable *list; | ||
728 | |||
724 | if (process_context && need_resched()) | 729 | if (process_context && need_resched()) |
725 | cond_resched(); | 730 | cond_resched(); |
726 | rth = rcu_dereference_raw(rt_hash_table[i].chain); | 731 | rth = rcu_dereference_raw(rt_hash_table[i].chain); |
@@ -728,50 +733,32 @@ static void rt_do_flush(int process_context) | |||
728 | continue; | 733 | continue; |
729 | 734 | ||
730 | spin_lock_bh(rt_hash_lock_addr(i)); | 735 | spin_lock_bh(rt_hash_lock_addr(i)); |
731 | #ifdef CONFIG_NET_NS | ||
732 | { | ||
733 | struct rtable __rcu **prev; | ||
734 | struct rtable *p; | ||
735 | 736 | ||
736 | rth = rcu_dereference_protected(rt_hash_table[i].chain, | 737 | list = NULL; |
738 | pprev = &rt_hash_table[i].chain; | ||
739 | rth = rcu_dereference_protected(*pprev, | ||
737 | lockdep_is_held(rt_hash_lock_addr(i))); | 740 | lockdep_is_held(rt_hash_lock_addr(i))); |
738 | 741 | ||
739 | /* defer releasing the head of the list after spin_unlock */ | 742 | while (rth) { |
740 | for (tail = rth; tail; | 743 | next = rcu_dereference_protected(rth->dst.rt_next, |
741 | tail = rcu_dereference_protected(tail->dst.rt_next, | ||
742 | lockdep_is_held(rt_hash_lock_addr(i)))) | ||
743 | if (!rt_is_expired(tail)) | ||
744 | break; | ||
745 | if (rth != tail) | ||
746 | rt_hash_table[i].chain = tail; | ||
747 | |||
748 | /* call rt_free on entries after the tail requiring flush */ | ||
749 | prev = &rt_hash_table[i].chain; | ||
750 | for (p = rcu_dereference_protected(*prev, | ||
751 | lockdep_is_held(rt_hash_lock_addr(i))); | 744 | lockdep_is_held(rt_hash_lock_addr(i))); |
752 | p != NULL; | 745 | |
753 | p = next) { | 746 | if (!net || |
754 | next = rcu_dereference_protected(p->dst.rt_next, | 747 | net_eq(dev_net(rth->dst.dev), net)) { |
755 | lockdep_is_held(rt_hash_lock_addr(i))); | 748 | rcu_assign_pointer(*pprev, next); |
756 | if (!rt_is_expired(p)) { | 749 | rcu_assign_pointer(rth->dst.rt_next, list); |
757 | prev = &p->dst.rt_next; | 750 | list = rth; |
758 | } else { | 751 | } else { |
759 | *prev = next; | 752 | pprev = &rth->dst.rt_next; |
760 | rt_free(p); | ||
761 | } | 753 | } |
754 | rth = next; | ||
762 | } | 755 | } |
763 | } | 756 | |
764 | #else | ||
765 | rth = rcu_dereference_protected(rt_hash_table[i].chain, | ||
766 | lockdep_is_held(rt_hash_lock_addr(i))); | ||
767 | rcu_assign_pointer(rt_hash_table[i].chain, NULL); | ||
768 | tail = NULL; | ||
769 | #endif | ||
770 | spin_unlock_bh(rt_hash_lock_addr(i)); | 757 | spin_unlock_bh(rt_hash_lock_addr(i)); |
771 | 758 | ||
772 | for (; rth != tail; rth = next) { | 759 | for (; list; list = next) { |
773 | next = rcu_dereference_protected(rth->dst.rt_next, 1); | 760 | next = rcu_dereference_protected(list->dst.rt_next, 1); |
774 | rt_free(rth); | 761 | rt_free(list); |
775 | } | 762 | } |
776 | } | 763 | } |
777 | } | 764 | } |
@@ -919,13 +906,13 @@ void rt_cache_flush(struct net *net, int delay) | |||
919 | { | 906 | { |
920 | rt_cache_invalidate(net); | 907 | rt_cache_invalidate(net); |
921 | if (delay >= 0) | 908 | if (delay >= 0) |
922 | rt_do_flush(!in_softirq()); | 909 | rt_do_flush(net, !in_softirq()); |
923 | } | 910 | } |
924 | 911 | ||
925 | /* Flush previous cache invalidated entries from the cache */ | 912 | /* Flush previous cache invalidated entries from the cache */ |
926 | void rt_cache_flush_batch(void) | 913 | void rt_cache_flush_batch(struct net *net) |
927 | { | 914 | { |
928 | rt_do_flush(!in_softirq()); | 915 | rt_do_flush(net, !in_softirq()); |
929 | } | 916 | } |
930 | 917 | ||
931 | static void rt_emergency_hash_rebuild(struct net *net) | 918 | static void rt_emergency_hash_rebuild(struct net *net) |
@@ -1289,7 +1276,7 @@ void rt_bind_peer(struct rtable *rt, int create) | |||
1289 | { | 1276 | { |
1290 | struct inet_peer *peer; | 1277 | struct inet_peer *peer; |
1291 | 1278 | ||
1292 | peer = inet_getpeer(rt->rt_dst, create); | 1279 | peer = inet_getpeer_v4(rt->rt_dst, create); |
1293 | 1280 | ||
1294 | if (peer && cmpxchg(&rt->peer, NULL, peer) != NULL) | 1281 | if (peer && cmpxchg(&rt->peer, NULL, peer) != NULL) |
1295 | inet_putpeer(peer); | 1282 | inet_putpeer(peer); |
@@ -1686,11 +1673,14 @@ unsigned short ip_rt_frag_needed(struct net *net, struct iphdr *iph, | |||
1686 | if (mtu < dst_mtu(&rth->dst)) { | 1673 | if (mtu < dst_mtu(&rth->dst)) { |
1687 | dst_confirm(&rth->dst); | 1674 | dst_confirm(&rth->dst); |
1688 | if (mtu < ip_rt_min_pmtu) { | 1675 | if (mtu < ip_rt_min_pmtu) { |
1676 | u32 lock = dst_metric(&rth->dst, | ||
1677 | RTAX_LOCK); | ||
1689 | mtu = ip_rt_min_pmtu; | 1678 | mtu = ip_rt_min_pmtu; |
1690 | rth->dst.metrics[RTAX_LOCK-1] |= | 1679 | lock |= (1 << RTAX_MTU); |
1691 | (1 << RTAX_MTU); | 1680 | dst_metric_set(&rth->dst, RTAX_LOCK, |
1681 | lock); | ||
1692 | } | 1682 | } |
1693 | rth->dst.metrics[RTAX_MTU-1] = mtu; | 1683 | dst_metric_set(&rth->dst, RTAX_MTU, mtu); |
1694 | dst_set_expires(&rth->dst, | 1684 | dst_set_expires(&rth->dst, |
1695 | ip_rt_mtu_expires); | 1685 | ip_rt_mtu_expires); |
1696 | } | 1686 | } |
@@ -1708,10 +1698,11 @@ static void ip_rt_update_pmtu(struct dst_entry *dst, u32 mtu) | |||
1708 | if (dst_mtu(dst) > mtu && mtu >= 68 && | 1698 | if (dst_mtu(dst) > mtu && mtu >= 68 && |
1709 | !(dst_metric_locked(dst, RTAX_MTU))) { | 1699 | !(dst_metric_locked(dst, RTAX_MTU))) { |
1710 | if (mtu < ip_rt_min_pmtu) { | 1700 | if (mtu < ip_rt_min_pmtu) { |
1701 | u32 lock = dst_metric(dst, RTAX_LOCK); | ||
1711 | mtu = ip_rt_min_pmtu; | 1702 | mtu = ip_rt_min_pmtu; |
1712 | dst->metrics[RTAX_LOCK-1] |= (1 << RTAX_MTU); | 1703 | dst_metric_set(dst, RTAX_LOCK, lock | (1 << RTAX_MTU)); |
1713 | } | 1704 | } |
1714 | dst->metrics[RTAX_MTU-1] = mtu; | 1705 | dst_metric_set(dst, RTAX_MTU, mtu); |
1715 | dst_set_expires(dst, ip_rt_mtu_expires); | 1706 | dst_set_expires(dst, ip_rt_mtu_expires); |
1716 | call_netevent_notifiers(NETEVENT_PMTU_UPDATE, dst); | 1707 | call_netevent_notifiers(NETEVENT_PMTU_UPDATE, dst); |
1717 | } | 1708 | } |
@@ -1794,38 +1785,55 @@ static void set_class_tag(struct rtable *rt, u32 tag) | |||
1794 | } | 1785 | } |
1795 | #endif | 1786 | #endif |
1796 | 1787 | ||
1788 | static unsigned int ipv4_default_advmss(const struct dst_entry *dst) | ||
1789 | { | ||
1790 | unsigned int advmss = dst_metric_raw(dst, RTAX_ADVMSS); | ||
1791 | |||
1792 | if (advmss == 0) { | ||
1793 | advmss = max_t(unsigned int, dst->dev->mtu - 40, | ||
1794 | ip_rt_min_advmss); | ||
1795 | if (advmss > 65535 - 40) | ||
1796 | advmss = 65535 - 40; | ||
1797 | } | ||
1798 | return advmss; | ||
1799 | } | ||
1800 | |||
1801 | static unsigned int ipv4_default_mtu(const struct dst_entry *dst) | ||
1802 | { | ||
1803 | unsigned int mtu = dst->dev->mtu; | ||
1804 | |||
1805 | if (unlikely(dst_metric_locked(dst, RTAX_MTU))) { | ||
1806 | const struct rtable *rt = (const struct rtable *) dst; | ||
1807 | |||
1808 | if (rt->rt_gateway != rt->rt_dst && mtu > 576) | ||
1809 | mtu = 576; | ||
1810 | } | ||
1811 | |||
1812 | if (mtu > IP_MAX_MTU) | ||
1813 | mtu = IP_MAX_MTU; | ||
1814 | |||
1815 | return mtu; | ||
1816 | } | ||
1817 | |||
1797 | static void rt_set_nexthop(struct rtable *rt, struct fib_result *res, u32 itag) | 1818 | static void rt_set_nexthop(struct rtable *rt, struct fib_result *res, u32 itag) |
1798 | { | 1819 | { |
1820 | struct dst_entry *dst = &rt->dst; | ||
1799 | struct fib_info *fi = res->fi; | 1821 | struct fib_info *fi = res->fi; |
1800 | 1822 | ||
1801 | if (fi) { | 1823 | if (fi) { |
1802 | if (FIB_RES_GW(*res) && | 1824 | if (FIB_RES_GW(*res) && |
1803 | FIB_RES_NH(*res).nh_scope == RT_SCOPE_LINK) | 1825 | FIB_RES_NH(*res).nh_scope == RT_SCOPE_LINK) |
1804 | rt->rt_gateway = FIB_RES_GW(*res); | 1826 | rt->rt_gateway = FIB_RES_GW(*res); |
1805 | memcpy(rt->dst.metrics, fi->fib_metrics, | 1827 | dst_import_metrics(dst, fi->fib_metrics); |
1806 | sizeof(rt->dst.metrics)); | ||
1807 | if (fi->fib_mtu == 0) { | ||
1808 | rt->dst.metrics[RTAX_MTU-1] = rt->dst.dev->mtu; | ||
1809 | if (dst_metric_locked(&rt->dst, RTAX_MTU) && | ||
1810 | rt->rt_gateway != rt->rt_dst && | ||
1811 | rt->dst.dev->mtu > 576) | ||
1812 | rt->dst.metrics[RTAX_MTU-1] = 576; | ||
1813 | } | ||
1814 | #ifdef CONFIG_IP_ROUTE_CLASSID | 1828 | #ifdef CONFIG_IP_ROUTE_CLASSID |
1815 | rt->dst.tclassid = FIB_RES_NH(*res).nh_tclassid; | 1829 | dst->tclassid = FIB_RES_NH(*res).nh_tclassid; |
1816 | #endif | 1830 | #endif |
1817 | } else | 1831 | } |
1818 | rt->dst.metrics[RTAX_MTU-1]= rt->dst.dev->mtu; | 1832 | |
1819 | 1833 | if (dst_mtu(dst) > IP_MAX_MTU) | |
1820 | if (dst_metric(&rt->dst, RTAX_HOPLIMIT) == 0) | 1834 | dst_metric_set(dst, RTAX_MTU, IP_MAX_MTU); |
1821 | rt->dst.metrics[RTAX_HOPLIMIT-1] = sysctl_ip_default_ttl; | 1835 | if (dst_metric_raw(dst, RTAX_ADVMSS) > 65535 - 40) |
1822 | if (dst_mtu(&rt->dst) > IP_MAX_MTU) | 1836 | dst_metric_set(dst, RTAX_ADVMSS, 65535 - 40); |
1823 | rt->dst.metrics[RTAX_MTU-1] = IP_MAX_MTU; | ||
1824 | if (dst_metric(&rt->dst, RTAX_ADVMSS) == 0) | ||
1825 | rt->dst.metrics[RTAX_ADVMSS-1] = max_t(unsigned int, rt->dst.dev->mtu - 40, | ||
1826 | ip_rt_min_advmss); | ||
1827 | if (dst_metric(&rt->dst, RTAX_ADVMSS) > 65535 - 40) | ||
1828 | rt->dst.metrics[RTAX_ADVMSS-1] = 65535 - 40; | ||
1829 | 1837 | ||
1830 | #ifdef CONFIG_IP_ROUTE_CLASSID | 1838 | #ifdef CONFIG_IP_ROUTE_CLASSID |
1831 | #ifdef CONFIG_IP_MULTIPLE_TABLES | 1839 | #ifdef CONFIG_IP_MULTIPLE_TABLES |
@@ -2089,12 +2097,10 @@ static int ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr, | |||
2089 | { | 2097 | { |
2090 | struct fib_result res; | 2098 | struct fib_result res; |
2091 | struct in_device *in_dev = __in_dev_get_rcu(dev); | 2099 | struct in_device *in_dev = __in_dev_get_rcu(dev); |
2092 | struct flowi fl = { .nl_u = { .ip4_u = | 2100 | struct flowi fl = { .fl4_dst = daddr, |
2093 | { .daddr = daddr, | 2101 | .fl4_src = saddr, |
2094 | .saddr = saddr, | 2102 | .fl4_tos = tos, |
2095 | .tos = tos, | 2103 | .fl4_scope = RT_SCOPE_UNIVERSE, |
2096 | .scope = RT_SCOPE_UNIVERSE, | ||
2097 | } }, | ||
2098 | .mark = skb->mark, | 2104 | .mark = skb->mark, |
2099 | .iif = dev->ifindex }; | 2105 | .iif = dev->ifindex }; |
2100 | unsigned flags = 0; | 2106 | unsigned flags = 0; |
@@ -2480,14 +2486,11 @@ static int ip_route_output_slow(struct net *net, struct rtable **rp, | |||
2480 | const struct flowi *oldflp) | 2486 | const struct flowi *oldflp) |
2481 | { | 2487 | { |
2482 | u32 tos = RT_FL_TOS(oldflp); | 2488 | u32 tos = RT_FL_TOS(oldflp); |
2483 | struct flowi fl = { .nl_u = { .ip4_u = | 2489 | struct flowi fl = { .fl4_dst = oldflp->fl4_dst, |
2484 | { .daddr = oldflp->fl4_dst, | 2490 | .fl4_src = oldflp->fl4_src, |
2485 | .saddr = oldflp->fl4_src, | 2491 | .fl4_tos = tos & IPTOS_RT_MASK, |
2486 | .tos = tos & IPTOS_RT_MASK, | 2492 | .fl4_scope = ((tos & RTO_ONLINK) ? |
2487 | .scope = ((tos & RTO_ONLINK) ? | 2493 | RT_SCOPE_LINK : RT_SCOPE_UNIVERSE), |
2488 | RT_SCOPE_LINK : | ||
2489 | RT_SCOPE_UNIVERSE), | ||
2490 | } }, | ||
2491 | .mark = oldflp->mark, | 2494 | .mark = oldflp->mark, |
2492 | .iif = net->loopback_dev->ifindex, | 2495 | .iif = net->loopback_dev->ifindex, |
2493 | .oif = oldflp->oif }; | 2496 | .oif = oldflp->oif }; |
@@ -2559,9 +2562,10 @@ static int ip_route_output_slow(struct net *net, struct rtable **rp, | |||
2559 | goto out; | 2562 | goto out; |
2560 | 2563 | ||
2561 | /* RACE: Check return value of inet_select_addr instead. */ | 2564 | /* RACE: Check return value of inet_select_addr instead. */ |
2562 | if (rcu_dereference(dev_out->ip_ptr) == NULL) | 2565 | if (!(dev_out->flags & IFF_UP) || !__in_dev_get_rcu(dev_out)) { |
2563 | goto out; /* Wrong error code */ | 2566 | err = -ENETUNREACH; |
2564 | 2567 | goto out; | |
2568 | } | ||
2565 | if (ipv4_is_local_multicast(oldflp->fl4_dst) || | 2569 | if (ipv4_is_local_multicast(oldflp->fl4_dst) || |
2566 | ipv4_is_lbcast(oldflp->fl4_dst)) { | 2570 | ipv4_is_lbcast(oldflp->fl4_dst)) { |
2567 | if (!fl.fl4_src) | 2571 | if (!fl.fl4_src) |
@@ -2622,8 +2626,12 @@ static int ip_route_output_slow(struct net *net, struct rtable **rp, | |||
2622 | } | 2626 | } |
2623 | 2627 | ||
2624 | if (res.type == RTN_LOCAL) { | 2628 | if (res.type == RTN_LOCAL) { |
2625 | if (!fl.fl4_src) | 2629 | if (!fl.fl4_src) { |
2626 | fl.fl4_src = fl.fl4_dst; | 2630 | if (res.fi->fib_prefsrc) |
2631 | fl.fl4_src = res.fi->fib_prefsrc; | ||
2632 | else | ||
2633 | fl.fl4_src = fl.fl4_dst; | ||
2634 | } | ||
2627 | dev_out = net->loopback_dev; | 2635 | dev_out = net->loopback_dev; |
2628 | fl.oif = dev_out->ifindex; | 2636 | fl.oif = dev_out->ifindex; |
2629 | res.fi = NULL; | 2637 | res.fi = NULL; |
@@ -2725,7 +2733,7 @@ static int ipv4_dst_blackhole(struct net *net, struct rtable **rp, struct flowi | |||
2725 | new->__use = 1; | 2733 | new->__use = 1; |
2726 | new->input = dst_discard; | 2734 | new->input = dst_discard; |
2727 | new->output = dst_discard; | 2735 | new->output = dst_discard; |
2728 | memcpy(new->metrics, ort->dst.metrics, RTAX_MAX*sizeof(u32)); | 2736 | dst_copy_metrics(new, &ort->dst); |
2729 | 2737 | ||
2730 | new->dev = ort->dst.dev; | 2738 | new->dev = ort->dst.dev; |
2731 | if (new->dev) | 2739 | if (new->dev) |
@@ -2832,7 +2840,7 @@ static int rt_fill_info(struct net *net, | |||
2832 | if (rt->rt_dst != rt->rt_gateway) | 2840 | if (rt->rt_dst != rt->rt_gateway) |
2833 | NLA_PUT_BE32(skb, RTA_GATEWAY, rt->rt_gateway); | 2841 | NLA_PUT_BE32(skb, RTA_GATEWAY, rt->rt_gateway); |
2834 | 2842 | ||
2835 | if (rtnetlink_put_metrics(skb, rt->dst.metrics) < 0) | 2843 | if (rtnetlink_put_metrics(skb, dst_metrics_ptr(&rt->dst)) < 0) |
2836 | goto nla_put_failure; | 2844 | goto nla_put_failure; |
2837 | 2845 | ||
2838 | if (rt->fl.mark) | 2846 | if (rt->fl.mark) |
@@ -2944,13 +2952,9 @@ static int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void | |||
2944 | err = -rt->dst.error; | 2952 | err = -rt->dst.error; |
2945 | } else { | 2953 | } else { |
2946 | struct flowi fl = { | 2954 | struct flowi fl = { |
2947 | .nl_u = { | 2955 | .fl4_dst = dst, |
2948 | .ip4_u = { | 2956 | .fl4_src = src, |
2949 | .daddr = dst, | 2957 | .fl4_tos = rtm->rtm_tos, |
2950 | .saddr = src, | ||
2951 | .tos = rtm->rtm_tos, | ||
2952 | }, | ||
2953 | }, | ||
2954 | .oif = tb[RTA_OIF] ? nla_get_u32(tb[RTA_OIF]) : 0, | 2958 | .oif = tb[RTA_OIF] ? nla_get_u32(tb[RTA_OIF]) : 0, |
2955 | .mark = mark, | 2959 | .mark = mark, |
2956 | }; | 2960 | }; |
diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c index 650cace2180d..47519205a014 100644 --- a/net/ipv4/syncookies.c +++ b/net/ipv4/syncookies.c | |||
@@ -346,17 +346,14 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb, | |||
346 | */ | 346 | */ |
347 | { | 347 | { |
348 | struct flowi fl = { .mark = sk->sk_mark, | 348 | struct flowi fl = { .mark = sk->sk_mark, |
349 | .nl_u = { .ip4_u = | 349 | .fl4_dst = ((opt && opt->srr) ? |
350 | { .daddr = ((opt && opt->srr) ? | 350 | opt->faddr : ireq->rmt_addr), |
351 | opt->faddr : | 351 | .fl4_src = ireq->loc_addr, |
352 | ireq->rmt_addr), | 352 | .fl4_tos = RT_CONN_FLAGS(sk), |
353 | .saddr = ireq->loc_addr, | ||
354 | .tos = RT_CONN_FLAGS(sk) } }, | ||
355 | .proto = IPPROTO_TCP, | 353 | .proto = IPPROTO_TCP, |
356 | .flags = inet_sk_flowi_flags(sk), | 354 | .flags = inet_sk_flowi_flags(sk), |
357 | .uli_u = { .ports = | 355 | .fl_ip_sport = th->dest, |
358 | { .sport = th->dest, | 356 | .fl_ip_dport = th->source }; |
359 | .dport = th->source } } }; | ||
360 | security_req_classify_flow(req, &fl); | 357 | security_req_classify_flow(req, &fl); |
361 | if (ip_route_output_key(sock_net(sk), &rt, &fl)) { | 358 | if (ip_route_output_key(sock_net(sk), &rt, &fl)) { |
362 | reqsk_free(req); | 359 | reqsk_free(req); |
diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c index d96c1da4b17c..1a456652086b 100644 --- a/net/ipv4/sysctl_net_ipv4.c +++ b/net/ipv4/sysctl_net_ipv4.c | |||
@@ -26,6 +26,10 @@ static int zero; | |||
26 | static int tcp_retr1_max = 255; | 26 | static int tcp_retr1_max = 255; |
27 | static int ip_local_port_range_min[] = { 1, 1 }; | 27 | static int ip_local_port_range_min[] = { 1, 1 }; |
28 | static int ip_local_port_range_max[] = { 65535, 65535 }; | 28 | static int ip_local_port_range_max[] = { 65535, 65535 }; |
29 | static int tcp_adv_win_scale_min = -31; | ||
30 | static int tcp_adv_win_scale_max = 31; | ||
31 | static int ip_ttl_min = 1; | ||
32 | static int ip_ttl_max = 255; | ||
29 | 33 | ||
30 | /* Update system visible IP port range */ | 34 | /* Update system visible IP port range */ |
31 | static void set_local_port_range(int range[2]) | 35 | static void set_local_port_range(int range[2]) |
@@ -153,8 +157,9 @@ static struct ctl_table ipv4_table[] = { | |||
153 | .data = &sysctl_ip_default_ttl, | 157 | .data = &sysctl_ip_default_ttl, |
154 | .maxlen = sizeof(int), | 158 | .maxlen = sizeof(int), |
155 | .mode = 0644, | 159 | .mode = 0644, |
156 | .proc_handler = ipv4_doint_and_flush, | 160 | .proc_handler = proc_dointvec_minmax, |
157 | .extra2 = &init_net, | 161 | .extra1 = &ip_ttl_min, |
162 | .extra2 = &ip_ttl_max, | ||
158 | }, | 163 | }, |
159 | { | 164 | { |
160 | .procname = "ip_no_pmtu_disc", | 165 | .procname = "ip_no_pmtu_disc", |
@@ -398,7 +403,7 @@ static struct ctl_table ipv4_table[] = { | |||
398 | .data = &sysctl_tcp_mem, | 403 | .data = &sysctl_tcp_mem, |
399 | .maxlen = sizeof(sysctl_tcp_mem), | 404 | .maxlen = sizeof(sysctl_tcp_mem), |
400 | .mode = 0644, | 405 | .mode = 0644, |
401 | .proc_handler = proc_dointvec | 406 | .proc_handler = proc_doulongvec_minmax |
402 | }, | 407 | }, |
403 | { | 408 | { |
404 | .procname = "tcp_wmem", | 409 | .procname = "tcp_wmem", |
@@ -426,7 +431,9 @@ static struct ctl_table ipv4_table[] = { | |||
426 | .data = &sysctl_tcp_adv_win_scale, | 431 | .data = &sysctl_tcp_adv_win_scale, |
427 | .maxlen = sizeof(int), | 432 | .maxlen = sizeof(int), |
428 | .mode = 0644, | 433 | .mode = 0644, |
429 | .proc_handler = proc_dointvec | 434 | .proc_handler = proc_dointvec_minmax, |
435 | .extra1 = &tcp_adv_win_scale_min, | ||
436 | .extra2 = &tcp_adv_win_scale_max, | ||
430 | }, | 437 | }, |
431 | { | 438 | { |
432 | .procname = "tcp_tw_reuse", | 439 | .procname = "tcp_tw_reuse", |
@@ -602,8 +609,7 @@ static struct ctl_table ipv4_table[] = { | |||
602 | .data = &sysctl_udp_mem, | 609 | .data = &sysctl_udp_mem, |
603 | .maxlen = sizeof(sysctl_udp_mem), | 610 | .maxlen = sizeof(sysctl_udp_mem), |
604 | .mode = 0644, | 611 | .mode = 0644, |
605 | .proc_handler = proc_dointvec_minmax, | 612 | .proc_handler = proc_doulongvec_minmax, |
606 | .extra1 = &zero | ||
607 | }, | 613 | }, |
608 | { | 614 | { |
609 | .procname = "udp_rmem_min", | 615 | .procname = "udp_rmem_min", |
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 5f738c5c0dc4..6c11eece262c 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c | |||
@@ -282,7 +282,7 @@ int sysctl_tcp_fin_timeout __read_mostly = TCP_FIN_TIMEOUT; | |||
282 | struct percpu_counter tcp_orphan_count; | 282 | struct percpu_counter tcp_orphan_count; |
283 | EXPORT_SYMBOL_GPL(tcp_orphan_count); | 283 | EXPORT_SYMBOL_GPL(tcp_orphan_count); |
284 | 284 | ||
285 | int sysctl_tcp_mem[3] __read_mostly; | 285 | long sysctl_tcp_mem[3] __read_mostly; |
286 | int sysctl_tcp_wmem[3] __read_mostly; | 286 | int sysctl_tcp_wmem[3] __read_mostly; |
287 | int sysctl_tcp_rmem[3] __read_mostly; | 287 | int sysctl_tcp_rmem[3] __read_mostly; |
288 | 288 | ||
@@ -290,7 +290,7 @@ EXPORT_SYMBOL(sysctl_tcp_mem); | |||
290 | EXPORT_SYMBOL(sysctl_tcp_rmem); | 290 | EXPORT_SYMBOL(sysctl_tcp_rmem); |
291 | EXPORT_SYMBOL(sysctl_tcp_wmem); | 291 | EXPORT_SYMBOL(sysctl_tcp_wmem); |
292 | 292 | ||
293 | atomic_t tcp_memory_allocated; /* Current allocated memory. */ | 293 | atomic_long_t tcp_memory_allocated; /* Current allocated memory. */ |
294 | EXPORT_SYMBOL(tcp_memory_allocated); | 294 | EXPORT_SYMBOL(tcp_memory_allocated); |
295 | 295 | ||
296 | /* | 296 | /* |
@@ -2244,7 +2244,7 @@ static int do_tcp_setsockopt(struct sock *sk, int level, | |||
2244 | /* Values greater than interface MTU won't take effect. However | 2244 | /* Values greater than interface MTU won't take effect. However |
2245 | * at the point when this call is done we typically don't yet | 2245 | * at the point when this call is done we typically don't yet |
2246 | * know which interface is going to be used */ | 2246 | * know which interface is going to be used */ |
2247 | if (val < 8 || val > MAX_TCP_WINDOW) { | 2247 | if (val < TCP_MIN_MSS || val > MAX_TCP_WINDOW) { |
2248 | err = -EINVAL; | 2248 | err = -EINVAL; |
2249 | break; | 2249 | break; |
2250 | } | 2250 | } |
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 3357f69e353d..2549b29b062d 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c | |||
@@ -259,8 +259,11 @@ static void tcp_fixup_sndbuf(struct sock *sk) | |||
259 | int sndmem = tcp_sk(sk)->rx_opt.mss_clamp + MAX_TCP_HEADER + 16 + | 259 | int sndmem = tcp_sk(sk)->rx_opt.mss_clamp + MAX_TCP_HEADER + 16 + |
260 | sizeof(struct sk_buff); | 260 | sizeof(struct sk_buff); |
261 | 261 | ||
262 | if (sk->sk_sndbuf < 3 * sndmem) | 262 | if (sk->sk_sndbuf < 3 * sndmem) { |
263 | sk->sk_sndbuf = min(3 * sndmem, sysctl_tcp_wmem[2]); | 263 | sk->sk_sndbuf = 3 * sndmem; |
264 | if (sk->sk_sndbuf > sysctl_tcp_wmem[2]) | ||
265 | sk->sk_sndbuf = sysctl_tcp_wmem[2]; | ||
266 | } | ||
264 | } | 267 | } |
265 | 268 | ||
266 | /* 2. Tuning advertised window (window_clamp, rcv_ssthresh) | 269 | /* 2. Tuning advertised window (window_clamp, rcv_ssthresh) |
@@ -396,7 +399,7 @@ static void tcp_clamp_window(struct sock *sk) | |||
396 | if (sk->sk_rcvbuf < sysctl_tcp_rmem[2] && | 399 | if (sk->sk_rcvbuf < sysctl_tcp_rmem[2] && |
397 | !(sk->sk_userlocks & SOCK_RCVBUF_LOCK) && | 400 | !(sk->sk_userlocks & SOCK_RCVBUF_LOCK) && |
398 | !tcp_memory_pressure && | 401 | !tcp_memory_pressure && |
399 | atomic_read(&tcp_memory_allocated) < sysctl_tcp_mem[0]) { | 402 | atomic_long_read(&tcp_memory_allocated) < sysctl_tcp_mem[0]) { |
400 | sk->sk_rcvbuf = min(atomic_read(&sk->sk_rmem_alloc), | 403 | sk->sk_rcvbuf = min(atomic_read(&sk->sk_rmem_alloc), |
401 | sysctl_tcp_rmem[2]); | 404 | sysctl_tcp_rmem[2]); |
402 | } | 405 | } |
@@ -731,7 +734,7 @@ void tcp_update_metrics(struct sock *sk) | |||
731 | * Reset our results. | 734 | * Reset our results. |
732 | */ | 735 | */ |
733 | if (!(dst_metric_locked(dst, RTAX_RTT))) | 736 | if (!(dst_metric_locked(dst, RTAX_RTT))) |
734 | dst->metrics[RTAX_RTT - 1] = 0; | 737 | dst_metric_set(dst, RTAX_RTT, 0); |
735 | return; | 738 | return; |
736 | } | 739 | } |
737 | 740 | ||
@@ -773,34 +776,38 @@ void tcp_update_metrics(struct sock *sk) | |||
773 | if (dst_metric(dst, RTAX_SSTHRESH) && | 776 | if (dst_metric(dst, RTAX_SSTHRESH) && |
774 | !dst_metric_locked(dst, RTAX_SSTHRESH) && | 777 | !dst_metric_locked(dst, RTAX_SSTHRESH) && |
775 | (tp->snd_cwnd >> 1) > dst_metric(dst, RTAX_SSTHRESH)) | 778 | (tp->snd_cwnd >> 1) > dst_metric(dst, RTAX_SSTHRESH)) |
776 | dst->metrics[RTAX_SSTHRESH-1] = tp->snd_cwnd >> 1; | 779 | dst_metric_set(dst, RTAX_SSTHRESH, tp->snd_cwnd >> 1); |
777 | if (!dst_metric_locked(dst, RTAX_CWND) && | 780 | if (!dst_metric_locked(dst, RTAX_CWND) && |
778 | tp->snd_cwnd > dst_metric(dst, RTAX_CWND)) | 781 | tp->snd_cwnd > dst_metric(dst, RTAX_CWND)) |
779 | dst->metrics[RTAX_CWND - 1] = tp->snd_cwnd; | 782 | dst_metric_set(dst, RTAX_CWND, tp->snd_cwnd); |
780 | } else if (tp->snd_cwnd > tp->snd_ssthresh && | 783 | } else if (tp->snd_cwnd > tp->snd_ssthresh && |
781 | icsk->icsk_ca_state == TCP_CA_Open) { | 784 | icsk->icsk_ca_state == TCP_CA_Open) { |
782 | /* Cong. avoidance phase, cwnd is reliable. */ | 785 | /* Cong. avoidance phase, cwnd is reliable. */ |
783 | if (!dst_metric_locked(dst, RTAX_SSTHRESH)) | 786 | if (!dst_metric_locked(dst, RTAX_SSTHRESH)) |
784 | dst->metrics[RTAX_SSTHRESH-1] = | 787 | dst_metric_set(dst, RTAX_SSTHRESH, |
785 | max(tp->snd_cwnd >> 1, tp->snd_ssthresh); | 788 | max(tp->snd_cwnd >> 1, tp->snd_ssthresh)); |
786 | if (!dst_metric_locked(dst, RTAX_CWND)) | 789 | if (!dst_metric_locked(dst, RTAX_CWND)) |
787 | dst->metrics[RTAX_CWND-1] = (dst_metric(dst, RTAX_CWND) + tp->snd_cwnd) >> 1; | 790 | dst_metric_set(dst, RTAX_CWND, |
791 | (dst_metric(dst, RTAX_CWND) + | ||
792 | tp->snd_cwnd) >> 1); | ||
788 | } else { | 793 | } else { |
789 | /* Else slow start did not finish, cwnd is non-sense, | 794 | /* Else slow start did not finish, cwnd is non-sense, |
790 | ssthresh may be also invalid. | 795 | ssthresh may be also invalid. |
791 | */ | 796 | */ |
792 | if (!dst_metric_locked(dst, RTAX_CWND)) | 797 | if (!dst_metric_locked(dst, RTAX_CWND)) |
793 | dst->metrics[RTAX_CWND-1] = (dst_metric(dst, RTAX_CWND) + tp->snd_ssthresh) >> 1; | 798 | dst_metric_set(dst, RTAX_CWND, |
799 | (dst_metric(dst, RTAX_CWND) + | ||
800 | tp->snd_ssthresh) >> 1); | ||
794 | if (dst_metric(dst, RTAX_SSTHRESH) && | 801 | if (dst_metric(dst, RTAX_SSTHRESH) && |
795 | !dst_metric_locked(dst, RTAX_SSTHRESH) && | 802 | !dst_metric_locked(dst, RTAX_SSTHRESH) && |
796 | tp->snd_ssthresh > dst_metric(dst, RTAX_SSTHRESH)) | 803 | tp->snd_ssthresh > dst_metric(dst, RTAX_SSTHRESH)) |
797 | dst->metrics[RTAX_SSTHRESH-1] = tp->snd_ssthresh; | 804 | dst_metric_set(dst, RTAX_SSTHRESH, tp->snd_ssthresh); |
798 | } | 805 | } |
799 | 806 | ||
800 | if (!dst_metric_locked(dst, RTAX_REORDERING)) { | 807 | if (!dst_metric_locked(dst, RTAX_REORDERING)) { |
801 | if (dst_metric(dst, RTAX_REORDERING) < tp->reordering && | 808 | if (dst_metric(dst, RTAX_REORDERING) < tp->reordering && |
802 | tp->reordering != sysctl_tcp_reordering) | 809 | tp->reordering != sysctl_tcp_reordering) |
803 | dst->metrics[RTAX_REORDERING-1] = tp->reordering; | 810 | dst_metric_set(dst, RTAX_REORDERING, tp->reordering); |
804 | } | 811 | } |
805 | } | 812 | } |
806 | } | 813 | } |
@@ -909,25 +916,20 @@ static void tcp_init_metrics(struct sock *sk) | |||
909 | tp->mdev_max = tp->rttvar = max(tp->mdev, tcp_rto_min(sk)); | 916 | tp->mdev_max = tp->rttvar = max(tp->mdev, tcp_rto_min(sk)); |
910 | } | 917 | } |
911 | tcp_set_rto(sk); | 918 | tcp_set_rto(sk); |
912 | if (inet_csk(sk)->icsk_rto < TCP_TIMEOUT_INIT && !tp->rx_opt.saw_tstamp) | 919 | if (inet_csk(sk)->icsk_rto < TCP_TIMEOUT_INIT && !tp->rx_opt.saw_tstamp) { |
913 | goto reset; | ||
914 | |||
915 | cwnd: | ||
916 | tp->snd_cwnd = tcp_init_cwnd(tp, dst); | ||
917 | tp->snd_cwnd_stamp = tcp_time_stamp; | ||
918 | return; | ||
919 | |||
920 | reset: | 920 | reset: |
921 | /* Play conservative. If timestamps are not | 921 | /* Play conservative. If timestamps are not |
922 | * supported, TCP will fail to recalculate correct | 922 | * supported, TCP will fail to recalculate correct |
923 | * rtt, if initial rto is too small. FORGET ALL AND RESET! | 923 | * rtt, if initial rto is too small. FORGET ALL AND RESET! |
924 | */ | 924 | */ |
925 | if (!tp->rx_opt.saw_tstamp && tp->srtt) { | 925 | if (!tp->rx_opt.saw_tstamp && tp->srtt) { |
926 | tp->srtt = 0; | 926 | tp->srtt = 0; |
927 | tp->mdev = tp->mdev_max = tp->rttvar = TCP_TIMEOUT_INIT; | 927 | tp->mdev = tp->mdev_max = tp->rttvar = TCP_TIMEOUT_INIT; |
928 | inet_csk(sk)->icsk_rto = TCP_TIMEOUT_INIT; | 928 | inet_csk(sk)->icsk_rto = TCP_TIMEOUT_INIT; |
929 | } | ||
929 | } | 930 | } |
930 | goto cwnd; | 931 | tp->snd_cwnd = tcp_init_cwnd(tp, dst); |
932 | tp->snd_cwnd_stamp = tcp_time_stamp; | ||
931 | } | 933 | } |
932 | 934 | ||
933 | static void tcp_update_reordering(struct sock *sk, const int metric, | 935 | static void tcp_update_reordering(struct sock *sk, const int metric, |
@@ -4861,7 +4863,7 @@ static int tcp_should_expand_sndbuf(struct sock *sk) | |||
4861 | return 0; | 4863 | return 0; |
4862 | 4864 | ||
4863 | /* If we are under soft global TCP memory pressure, do not expand. */ | 4865 | /* If we are under soft global TCP memory pressure, do not expand. */ |
4864 | if (atomic_read(&tcp_memory_allocated) >= sysctl_tcp_mem[0]) | 4866 | if (atomic_long_read(&tcp_memory_allocated) >= sysctl_tcp_mem[0]) |
4865 | return 0; | 4867 | return 0; |
4866 | 4868 | ||
4867 | /* If we filled the congestion window, do not expand. */ | 4869 | /* If we filled the congestion window, do not expand. */ |
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 8f8527d41682..856f68466d49 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c | |||
@@ -415,6 +415,9 @@ void tcp_v4_err(struct sk_buff *icmp_skb, u32 info) | |||
415 | !icsk->icsk_backoff) | 415 | !icsk->icsk_backoff) |
416 | break; | 416 | break; |
417 | 417 | ||
418 | if (sock_owned_by_user(sk)) | ||
419 | break; | ||
420 | |||
418 | icsk->icsk_backoff--; | 421 | icsk->icsk_backoff--; |
419 | inet_csk(sk)->icsk_rto = __tcp_set_rto(tp) << | 422 | inet_csk(sk)->icsk_rto = __tcp_set_rto(tp) << |
420 | icsk->icsk_backoff; | 423 | icsk->icsk_backoff; |
@@ -429,11 +432,6 @@ void tcp_v4_err(struct sk_buff *icmp_skb, u32 info) | |||
429 | if (remaining) { | 432 | if (remaining) { |
430 | inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS, | 433 | inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS, |
431 | remaining, TCP_RTO_MAX); | 434 | remaining, TCP_RTO_MAX); |
432 | } else if (sock_owned_by_user(sk)) { | ||
433 | /* RTO revert clocked out retransmission, | ||
434 | * but socket is locked. Will defer. */ | ||
435 | inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS, | ||
436 | HZ/20, TCP_RTO_MAX); | ||
437 | } else { | 435 | } else { |
438 | /* RTO revert clocked out retransmission. | 436 | /* RTO revert clocked out retransmission. |
439 | * Will retransmit now */ | 437 | * Will retransmit now */ |
@@ -1212,12 +1210,6 @@ static const struct tcp_request_sock_ops tcp_request_sock_ipv4_ops = { | |||
1212 | }; | 1210 | }; |
1213 | #endif | 1211 | #endif |
1214 | 1212 | ||
1215 | static struct timewait_sock_ops tcp_timewait_sock_ops = { | ||
1216 | .twsk_obj_size = sizeof(struct tcp_timewait_sock), | ||
1217 | .twsk_unique = tcp_twsk_unique, | ||
1218 | .twsk_destructor= tcp_twsk_destructor, | ||
1219 | }; | ||
1220 | |||
1221 | int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb) | 1213 | int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb) |
1222 | { | 1214 | { |
1223 | struct tcp_extend_values tmp_ext; | 1215 | struct tcp_extend_values tmp_ext; |
@@ -1349,7 +1341,7 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb) | |||
1349 | tcp_death_row.sysctl_tw_recycle && | 1341 | tcp_death_row.sysctl_tw_recycle && |
1350 | (dst = inet_csk_route_req(sk, req)) != NULL && | 1342 | (dst = inet_csk_route_req(sk, req)) != NULL && |
1351 | (peer = rt_get_peer((struct rtable *)dst)) != NULL && | 1343 | (peer = rt_get_peer((struct rtable *)dst)) != NULL && |
1352 | peer->v4daddr == saddr) { | 1344 | peer->daddr.a4 == saddr) { |
1353 | inet_peer_refcheck(peer); | 1345 | inet_peer_refcheck(peer); |
1354 | if ((u32)get_seconds() - peer->tcp_ts_stamp < TCP_PAWS_MSL && | 1346 | if ((u32)get_seconds() - peer->tcp_ts_stamp < TCP_PAWS_MSL && |
1355 | (s32)(peer->tcp_ts - req->ts_recent) > | 1347 | (s32)(peer->tcp_ts - req->ts_recent) > |
@@ -1444,7 +1436,7 @@ struct sock *tcp_v4_syn_recv_sock(struct sock *sk, struct sk_buff *skb, | |||
1444 | 1436 | ||
1445 | tcp_mtup_init(newsk); | 1437 | tcp_mtup_init(newsk); |
1446 | tcp_sync_mss(newsk, dst_mtu(dst)); | 1438 | tcp_sync_mss(newsk, dst_mtu(dst)); |
1447 | newtp->advmss = dst_metric(dst, RTAX_ADVMSS); | 1439 | newtp->advmss = dst_metric_advmss(dst); |
1448 | if (tcp_sk(sk)->rx_opt.user_mss && | 1440 | if (tcp_sk(sk)->rx_opt.user_mss && |
1449 | tcp_sk(sk)->rx_opt.user_mss < newtp->advmss) | 1441 | tcp_sk(sk)->rx_opt.user_mss < newtp->advmss) |
1450 | newtp->advmss = tcp_sk(sk)->rx_opt.user_mss; | 1442 | newtp->advmss = tcp_sk(sk)->rx_opt.user_mss; |
@@ -1765,64 +1757,40 @@ do_time_wait: | |||
1765 | goto discard_it; | 1757 | goto discard_it; |
1766 | } | 1758 | } |
1767 | 1759 | ||
1768 | /* VJ's idea. Save last timestamp seen from this destination | 1760 | struct inet_peer *tcp_v4_get_peer(struct sock *sk, bool *release_it) |
1769 | * and hold it at least for normal timewait interval to use for duplicate | ||
1770 | * segment detection in subsequent connections, before they enter synchronized | ||
1771 | * state. | ||
1772 | */ | ||
1773 | |||
1774 | int tcp_v4_remember_stamp(struct sock *sk) | ||
1775 | { | 1761 | { |
1762 | struct rtable *rt = (struct rtable *) __sk_dst_get(sk); | ||
1776 | struct inet_sock *inet = inet_sk(sk); | 1763 | struct inet_sock *inet = inet_sk(sk); |
1777 | struct tcp_sock *tp = tcp_sk(sk); | 1764 | struct inet_peer *peer; |
1778 | struct rtable *rt = (struct rtable *)__sk_dst_get(sk); | ||
1779 | struct inet_peer *peer = NULL; | ||
1780 | int release_it = 0; | ||
1781 | 1765 | ||
1782 | if (!rt || rt->rt_dst != inet->inet_daddr) { | 1766 | if (!rt || rt->rt_dst != inet->inet_daddr) { |
1783 | peer = inet_getpeer(inet->inet_daddr, 1); | 1767 | peer = inet_getpeer_v4(inet->inet_daddr, 1); |
1784 | release_it = 1; | 1768 | *release_it = true; |
1785 | } else { | 1769 | } else { |
1786 | if (!rt->peer) | 1770 | if (!rt->peer) |
1787 | rt_bind_peer(rt, 1); | 1771 | rt_bind_peer(rt, 1); |
1788 | peer = rt->peer; | 1772 | peer = rt->peer; |
1773 | *release_it = false; | ||
1789 | } | 1774 | } |
1790 | 1775 | ||
1791 | if (peer) { | 1776 | return peer; |
1792 | if ((s32)(peer->tcp_ts - tp->rx_opt.ts_recent) <= 0 || | ||
1793 | ((u32)get_seconds() - peer->tcp_ts_stamp > TCP_PAWS_MSL && | ||
1794 | peer->tcp_ts_stamp <= (u32)tp->rx_opt.ts_recent_stamp)) { | ||
1795 | peer->tcp_ts_stamp = (u32)tp->rx_opt.ts_recent_stamp; | ||
1796 | peer->tcp_ts = tp->rx_opt.ts_recent; | ||
1797 | } | ||
1798 | if (release_it) | ||
1799 | inet_putpeer(peer); | ||
1800 | return 1; | ||
1801 | } | ||
1802 | |||
1803 | return 0; | ||
1804 | } | 1777 | } |
1805 | EXPORT_SYMBOL(tcp_v4_remember_stamp); | 1778 | EXPORT_SYMBOL(tcp_v4_get_peer); |
1806 | 1779 | ||
1807 | int tcp_v4_tw_remember_stamp(struct inet_timewait_sock *tw) | 1780 | void *tcp_v4_tw_get_peer(struct sock *sk) |
1808 | { | 1781 | { |
1809 | struct inet_peer *peer = inet_getpeer(tw->tw_daddr, 1); | 1782 | struct inet_timewait_sock *tw = inet_twsk(sk); |
1810 | |||
1811 | if (peer) { | ||
1812 | const struct tcp_timewait_sock *tcptw = tcp_twsk((struct sock *)tw); | ||
1813 | |||
1814 | if ((s32)(peer->tcp_ts - tcptw->tw_ts_recent) <= 0 || | ||
1815 | ((u32)get_seconds() - peer->tcp_ts_stamp > TCP_PAWS_MSL && | ||
1816 | peer->tcp_ts_stamp <= (u32)tcptw->tw_ts_recent_stamp)) { | ||
1817 | peer->tcp_ts_stamp = (u32)tcptw->tw_ts_recent_stamp; | ||
1818 | peer->tcp_ts = tcptw->tw_ts_recent; | ||
1819 | } | ||
1820 | inet_putpeer(peer); | ||
1821 | return 1; | ||
1822 | } | ||
1823 | 1783 | ||
1824 | return 0; | 1784 | return inet_getpeer_v4(tw->tw_daddr, 1); |
1825 | } | 1785 | } |
1786 | EXPORT_SYMBOL(tcp_v4_tw_get_peer); | ||
1787 | |||
1788 | static struct timewait_sock_ops tcp_timewait_sock_ops = { | ||
1789 | .twsk_obj_size = sizeof(struct tcp_timewait_sock), | ||
1790 | .twsk_unique = tcp_twsk_unique, | ||
1791 | .twsk_destructor= tcp_twsk_destructor, | ||
1792 | .twsk_getpeer = tcp_v4_tw_get_peer, | ||
1793 | }; | ||
1826 | 1794 | ||
1827 | const struct inet_connection_sock_af_ops ipv4_specific = { | 1795 | const struct inet_connection_sock_af_ops ipv4_specific = { |
1828 | .queue_xmit = ip_queue_xmit, | 1796 | .queue_xmit = ip_queue_xmit, |
@@ -1830,7 +1798,7 @@ const struct inet_connection_sock_af_ops ipv4_specific = { | |||
1830 | .rebuild_header = inet_sk_rebuild_header, | 1798 | .rebuild_header = inet_sk_rebuild_header, |
1831 | .conn_request = tcp_v4_conn_request, | 1799 | .conn_request = tcp_v4_conn_request, |
1832 | .syn_recv_sock = tcp_v4_syn_recv_sock, | 1800 | .syn_recv_sock = tcp_v4_syn_recv_sock, |
1833 | .remember_stamp = tcp_v4_remember_stamp, | 1801 | .get_peer = tcp_v4_get_peer, |
1834 | .net_header_len = sizeof(struct iphdr), | 1802 | .net_header_len = sizeof(struct iphdr), |
1835 | .setsockopt = ip_setsockopt, | 1803 | .setsockopt = ip_setsockopt, |
1836 | .getsockopt = ip_getsockopt, | 1804 | .getsockopt = ip_getsockopt, |
@@ -2032,7 +2000,7 @@ static void *listening_get_next(struct seq_file *seq, void *cur) | |||
2032 | get_req: | 2000 | get_req: |
2033 | req = icsk->icsk_accept_queue.listen_opt->syn_table[st->sbucket]; | 2001 | req = icsk->icsk_accept_queue.listen_opt->syn_table[st->sbucket]; |
2034 | } | 2002 | } |
2035 | sk = sk_next(st->syn_wait_sk); | 2003 | sk = sk_nulls_next(st->syn_wait_sk); |
2036 | st->state = TCP_SEQ_STATE_LISTENING; | 2004 | st->state = TCP_SEQ_STATE_LISTENING; |
2037 | read_unlock_bh(&icsk->icsk_accept_queue.syn_wait_lock); | 2005 | read_unlock_bh(&icsk->icsk_accept_queue.syn_wait_lock); |
2038 | } else { | 2006 | } else { |
@@ -2041,11 +2009,13 @@ get_req: | |||
2041 | if (reqsk_queue_len(&icsk->icsk_accept_queue)) | 2009 | if (reqsk_queue_len(&icsk->icsk_accept_queue)) |
2042 | goto start_req; | 2010 | goto start_req; |
2043 | read_unlock_bh(&icsk->icsk_accept_queue.syn_wait_lock); | 2011 | read_unlock_bh(&icsk->icsk_accept_queue.syn_wait_lock); |
2044 | sk = sk_next(sk); | 2012 | sk = sk_nulls_next(sk); |
2045 | } | 2013 | } |
2046 | get_sk: | 2014 | get_sk: |
2047 | sk_nulls_for_each_from(sk, node) { | 2015 | sk_nulls_for_each_from(sk, node) { |
2048 | if (sk->sk_family == st->family && net_eq(sock_net(sk), net)) { | 2016 | if (!net_eq(sock_net(sk), net)) |
2017 | continue; | ||
2018 | if (sk->sk_family == st->family) { | ||
2049 | cur = sk; | 2019 | cur = sk; |
2050 | goto out; | 2020 | goto out; |
2051 | } | 2021 | } |
diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c index 43cf901d7659..80b1f80759ab 100644 --- a/net/ipv4/tcp_minisocks.c +++ b/net/ipv4/tcp_minisocks.c | |||
@@ -49,6 +49,56 @@ struct inet_timewait_death_row tcp_death_row = { | |||
49 | }; | 49 | }; |
50 | EXPORT_SYMBOL_GPL(tcp_death_row); | 50 | EXPORT_SYMBOL_GPL(tcp_death_row); |
51 | 51 | ||
52 | /* VJ's idea. Save last timestamp seen from this destination | ||
53 | * and hold it at least for normal timewait interval to use for duplicate | ||
54 | * segment detection in subsequent connections, before they enter synchronized | ||
55 | * state. | ||
56 | */ | ||
57 | |||
58 | static int tcp_remember_stamp(struct sock *sk) | ||
59 | { | ||
60 | const struct inet_connection_sock *icsk = inet_csk(sk); | ||
61 | struct tcp_sock *tp = tcp_sk(sk); | ||
62 | struct inet_peer *peer; | ||
63 | bool release_it; | ||
64 | |||
65 | peer = icsk->icsk_af_ops->get_peer(sk, &release_it); | ||
66 | if (peer) { | ||
67 | if ((s32)(peer->tcp_ts - tp->rx_opt.ts_recent) <= 0 || | ||
68 | ((u32)get_seconds() - peer->tcp_ts_stamp > TCP_PAWS_MSL && | ||
69 | peer->tcp_ts_stamp <= (u32)tp->rx_opt.ts_recent_stamp)) { | ||
70 | peer->tcp_ts_stamp = (u32)tp->rx_opt.ts_recent_stamp; | ||
71 | peer->tcp_ts = tp->rx_opt.ts_recent; | ||
72 | } | ||
73 | if (release_it) | ||
74 | inet_putpeer(peer); | ||
75 | return 1; | ||
76 | } | ||
77 | |||
78 | return 0; | ||
79 | } | ||
80 | |||
81 | static int tcp_tw_remember_stamp(struct inet_timewait_sock *tw) | ||
82 | { | ||
83 | struct sock *sk = (struct sock *) tw; | ||
84 | struct inet_peer *peer; | ||
85 | |||
86 | peer = twsk_getpeer(sk); | ||
87 | if (peer) { | ||
88 | const struct tcp_timewait_sock *tcptw = tcp_twsk(sk); | ||
89 | |||
90 | if ((s32)(peer->tcp_ts - tcptw->tw_ts_recent) <= 0 || | ||
91 | ((u32)get_seconds() - peer->tcp_ts_stamp > TCP_PAWS_MSL && | ||
92 | peer->tcp_ts_stamp <= (u32)tcptw->tw_ts_recent_stamp)) { | ||
93 | peer->tcp_ts_stamp = (u32)tcptw->tw_ts_recent_stamp; | ||
94 | peer->tcp_ts = tcptw->tw_ts_recent; | ||
95 | } | ||
96 | inet_putpeer(peer); | ||
97 | return 1; | ||
98 | } | ||
99 | return 0; | ||
100 | } | ||
101 | |||
52 | static __inline__ int tcp_in_window(u32 seq, u32 end_seq, u32 s_win, u32 e_win) | 102 | static __inline__ int tcp_in_window(u32 seq, u32 end_seq, u32 s_win, u32 e_win) |
53 | { | 103 | { |
54 | if (seq == s_win) | 104 | if (seq == s_win) |
@@ -149,14 +199,9 @@ kill_with_rst: | |||
149 | tcptw->tw_ts_recent = tmp_opt.rcv_tsval; | 199 | tcptw->tw_ts_recent = tmp_opt.rcv_tsval; |
150 | } | 200 | } |
151 | 201 | ||
152 | /* I am shamed, but failed to make it more elegant. | 202 | if (tcp_death_row.sysctl_tw_recycle && |
153 | * Yes, it is direct reference to IP, which is impossible | 203 | tcptw->tw_ts_recent_stamp && |
154 | * to generalize to IPv6. Taking into account that IPv6 | 204 | tcp_tw_remember_stamp(tw)) |
155 | * do not understand recycling in any case, it not | ||
156 | * a big problem in practice. --ANK */ | ||
157 | if (tw->tw_family == AF_INET && | ||
158 | tcp_death_row.sysctl_tw_recycle && tcptw->tw_ts_recent_stamp && | ||
159 | tcp_v4_tw_remember_stamp(tw)) | ||
160 | inet_twsk_schedule(tw, &tcp_death_row, tw->tw_timeout, | 205 | inet_twsk_schedule(tw, &tcp_death_row, tw->tw_timeout, |
161 | TCP_TIMEWAIT_LEN); | 206 | TCP_TIMEWAIT_LEN); |
162 | else | 207 | else |
@@ -274,7 +319,7 @@ void tcp_time_wait(struct sock *sk, int state, int timeo) | |||
274 | int recycle_ok = 0; | 319 | int recycle_ok = 0; |
275 | 320 | ||
276 | if (tcp_death_row.sysctl_tw_recycle && tp->rx_opt.ts_recent_stamp) | 321 | if (tcp_death_row.sysctl_tw_recycle && tp->rx_opt.ts_recent_stamp) |
277 | recycle_ok = icsk->icsk_af_ops->remember_stamp(sk); | 322 | recycle_ok = tcp_remember_stamp(sk); |
278 | 323 | ||
279 | if (tcp_death_row.tw_count < tcp_death_row.sysctl_max_tw_buckets) | 324 | if (tcp_death_row.tw_count < tcp_death_row.sysctl_max_tw_buckets) |
280 | tw = inet_twsk_alloc(sk, state); | 325 | tw = inet_twsk_alloc(sk, state); |
@@ -347,7 +392,7 @@ void tcp_time_wait(struct sock *sk, int state, int timeo) | |||
347 | * socket up. We've got bigger problems than | 392 | * socket up. We've got bigger problems than |
348 | * non-graceful socket closings. | 393 | * non-graceful socket closings. |
349 | */ | 394 | */ |
350 | LIMIT_NETDEBUG(KERN_INFO "TCP: time wait bucket table overflow\n"); | 395 | NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPTIMEWAITOVERFLOW); |
351 | } | 396 | } |
352 | 397 | ||
353 | tcp_update_metrics(sk); | 398 | tcp_update_metrics(sk); |
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 05b1ecf36763..dc7c096ddfef 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c | |||
@@ -55,7 +55,7 @@ int sysctl_tcp_workaround_signed_windows __read_mostly = 0; | |||
55 | int sysctl_tcp_tso_win_divisor __read_mostly = 3; | 55 | int sysctl_tcp_tso_win_divisor __read_mostly = 3; |
56 | 56 | ||
57 | int sysctl_tcp_mtu_probing __read_mostly = 0; | 57 | int sysctl_tcp_mtu_probing __read_mostly = 0; |
58 | int sysctl_tcp_base_mss __read_mostly = 512; | 58 | int sysctl_tcp_base_mss __read_mostly = TCP_BASE_MSS; |
59 | 59 | ||
60 | /* By default, RFC2861 behavior. */ | 60 | /* By default, RFC2861 behavior. */ |
61 | int sysctl_tcp_slow_start_after_idle __read_mostly = 1; | 61 | int sysctl_tcp_slow_start_after_idle __read_mostly = 1; |
@@ -119,9 +119,13 @@ static __u16 tcp_advertise_mss(struct sock *sk) | |||
119 | struct dst_entry *dst = __sk_dst_get(sk); | 119 | struct dst_entry *dst = __sk_dst_get(sk); |
120 | int mss = tp->advmss; | 120 | int mss = tp->advmss; |
121 | 121 | ||
122 | if (dst && dst_metric(dst, RTAX_ADVMSS) < mss) { | 122 | if (dst) { |
123 | mss = dst_metric(dst, RTAX_ADVMSS); | 123 | unsigned int metric = dst_metric_advmss(dst); |
124 | tp->advmss = mss; | 124 | |
125 | if (metric < mss) { | ||
126 | mss = metric; | ||
127 | tp->advmss = mss; | ||
128 | } | ||
125 | } | 129 | } |
126 | 130 | ||
127 | return (__u16)mss; | 131 | return (__u16)mss; |
@@ -224,18 +228,22 @@ void tcp_select_initial_window(int __space, __u32 mss, | |||
224 | } | 228 | } |
225 | } | 229 | } |
226 | 230 | ||
227 | /* Set initial window to value enough for senders, following RFC5681. */ | 231 | /* Set initial window to a value enough for senders starting with |
232 | * initial congestion window of TCP_DEFAULT_INIT_RCVWND. Place | ||
233 | * a limit on the initial window when mss is larger than 1460. | ||
234 | */ | ||
228 | if (mss > (1 << *rcv_wscale)) { | 235 | if (mss > (1 << *rcv_wscale)) { |
229 | int init_cwnd = rfc3390_bytes_to_packets(mss); | 236 | int init_cwnd = TCP_DEFAULT_INIT_RCVWND; |
230 | 237 | if (mss > 1460) | |
238 | init_cwnd = | ||
239 | max_t(u32, (1460 * TCP_DEFAULT_INIT_RCVWND) / mss, 2); | ||
231 | /* when initializing use the value from init_rcv_wnd | 240 | /* when initializing use the value from init_rcv_wnd |
232 | * rather than the default from above | 241 | * rather than the default from above |
233 | */ | 242 | */ |
234 | if (init_rcv_wnd && | 243 | if (init_rcv_wnd) |
235 | (*rcv_wnd > init_rcv_wnd * mss)) | 244 | *rcv_wnd = min(*rcv_wnd, init_rcv_wnd * mss); |
236 | *rcv_wnd = init_rcv_wnd * mss; | 245 | else |
237 | else if (*rcv_wnd > init_cwnd * mss) | 246 | *rcv_wnd = min(*rcv_wnd, init_cwnd * mss); |
238 | *rcv_wnd = init_cwnd * mss; | ||
239 | } | 247 | } |
240 | 248 | ||
241 | /* Set the clamp no higher than max representable value */ | 249 | /* Set the clamp no higher than max representable value */ |
@@ -386,27 +394,30 @@ struct tcp_out_options { | |||
386 | */ | 394 | */ |
387 | static u8 tcp_cookie_size_check(u8 desired) | 395 | static u8 tcp_cookie_size_check(u8 desired) |
388 | { | 396 | { |
389 | if (desired > 0) { | 397 | int cookie_size; |
398 | |||
399 | if (desired > 0) | ||
390 | /* previously specified */ | 400 | /* previously specified */ |
391 | return desired; | 401 | return desired; |
392 | } | 402 | |
393 | if (sysctl_tcp_cookie_size <= 0) { | 403 | cookie_size = ACCESS_ONCE(sysctl_tcp_cookie_size); |
404 | if (cookie_size <= 0) | ||
394 | /* no default specified */ | 405 | /* no default specified */ |
395 | return 0; | 406 | return 0; |
396 | } | 407 | |
397 | if (sysctl_tcp_cookie_size <= TCP_COOKIE_MIN) { | 408 | if (cookie_size <= TCP_COOKIE_MIN) |
398 | /* value too small, specify minimum */ | 409 | /* value too small, specify minimum */ |
399 | return TCP_COOKIE_MIN; | 410 | return TCP_COOKIE_MIN; |
400 | } | 411 | |
401 | if (sysctl_tcp_cookie_size >= TCP_COOKIE_MAX) { | 412 | if (cookie_size >= TCP_COOKIE_MAX) |
402 | /* value too large, specify maximum */ | 413 | /* value too large, specify maximum */ |
403 | return TCP_COOKIE_MAX; | 414 | return TCP_COOKIE_MAX; |
404 | } | 415 | |
405 | if (0x1 & sysctl_tcp_cookie_size) { | 416 | if (cookie_size & 1) |
406 | /* 8-bit multiple, illegal, fix it */ | 417 | /* 8-bit multiple, illegal, fix it */ |
407 | return (u8)(sysctl_tcp_cookie_size + 0x1); | 418 | cookie_size++; |
408 | } | 419 | |
409 | return (u8)sysctl_tcp_cookie_size; | 420 | return (u8)cookie_size; |
410 | } | 421 | } |
411 | 422 | ||
412 | /* Write previously computed TCP options to the packet. | 423 | /* Write previously computed TCP options to the packet. |
@@ -822,8 +833,11 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it, | |||
822 | &md5); | 833 | &md5); |
823 | tcp_header_size = tcp_options_size + sizeof(struct tcphdr); | 834 | tcp_header_size = tcp_options_size + sizeof(struct tcphdr); |
824 | 835 | ||
825 | if (tcp_packets_in_flight(tp) == 0) | 836 | if (tcp_packets_in_flight(tp) == 0) { |
826 | tcp_ca_event(sk, CA_EVENT_TX_START); | 837 | tcp_ca_event(sk, CA_EVENT_TX_START); |
838 | skb->ooo_okay = 1; | ||
839 | } else | ||
840 | skb->ooo_okay = 0; | ||
827 | 841 | ||
828 | skb_push(skb, tcp_header_size); | 842 | skb_push(skb, tcp_header_size); |
829 | skb_reset_transport_header(skb); | 843 | skb_reset_transport_header(skb); |
@@ -1513,6 +1527,7 @@ static int tcp_tso_should_defer(struct sock *sk, struct sk_buff *skb) | |||
1513 | struct tcp_sock *tp = tcp_sk(sk); | 1527 | struct tcp_sock *tp = tcp_sk(sk); |
1514 | const struct inet_connection_sock *icsk = inet_csk(sk); | 1528 | const struct inet_connection_sock *icsk = inet_csk(sk); |
1515 | u32 send_win, cong_win, limit, in_flight; | 1529 | u32 send_win, cong_win, limit, in_flight; |
1530 | int win_divisor; | ||
1516 | 1531 | ||
1517 | if (TCP_SKB_CB(skb)->flags & TCPHDR_FIN) | 1532 | if (TCP_SKB_CB(skb)->flags & TCPHDR_FIN) |
1518 | goto send_now; | 1533 | goto send_now; |
@@ -1544,13 +1559,14 @@ static int tcp_tso_should_defer(struct sock *sk, struct sk_buff *skb) | |||
1544 | if ((skb != tcp_write_queue_tail(sk)) && (limit >= skb->len)) | 1559 | if ((skb != tcp_write_queue_tail(sk)) && (limit >= skb->len)) |
1545 | goto send_now; | 1560 | goto send_now; |
1546 | 1561 | ||
1547 | if (sysctl_tcp_tso_win_divisor) { | 1562 | win_divisor = ACCESS_ONCE(sysctl_tcp_tso_win_divisor); |
1563 | if (win_divisor) { | ||
1548 | u32 chunk = min(tp->snd_wnd, tp->snd_cwnd * tp->mss_cache); | 1564 | u32 chunk = min(tp->snd_wnd, tp->snd_cwnd * tp->mss_cache); |
1549 | 1565 | ||
1550 | /* If at least some fraction of a window is available, | 1566 | /* If at least some fraction of a window is available, |
1551 | * just use it. | 1567 | * just use it. |
1552 | */ | 1568 | */ |
1553 | chunk /= sysctl_tcp_tso_win_divisor; | 1569 | chunk /= win_divisor; |
1554 | if (limit >= chunk) | 1570 | if (limit >= chunk) |
1555 | goto send_now; | 1571 | goto send_now; |
1556 | } else { | 1572 | } else { |
@@ -2415,7 +2431,7 @@ struct sk_buff *tcp_make_synack(struct sock *sk, struct dst_entry *dst, | |||
2415 | 2431 | ||
2416 | skb_dst_set(skb, dst_clone(dst)); | 2432 | skb_dst_set(skb, dst_clone(dst)); |
2417 | 2433 | ||
2418 | mss = dst_metric(dst, RTAX_ADVMSS); | 2434 | mss = dst_metric_advmss(dst); |
2419 | if (tp->rx_opt.user_mss && tp->rx_opt.user_mss < mss) | 2435 | if (tp->rx_opt.user_mss && tp->rx_opt.user_mss < mss) |
2420 | mss = tp->rx_opt.user_mss; | 2436 | mss = tp->rx_opt.user_mss; |
2421 | 2437 | ||
@@ -2549,7 +2565,7 @@ static void tcp_connect_init(struct sock *sk) | |||
2549 | 2565 | ||
2550 | if (!tp->window_clamp) | 2566 | if (!tp->window_clamp) |
2551 | tp->window_clamp = dst_metric(dst, RTAX_WINDOW); | 2567 | tp->window_clamp = dst_metric(dst, RTAX_WINDOW); |
2552 | tp->advmss = dst_metric(dst, RTAX_ADVMSS); | 2568 | tp->advmss = dst_metric_advmss(dst); |
2553 | if (tp->rx_opt.user_mss && tp->rx_opt.user_mss < tp->advmss) | 2569 | if (tp->rx_opt.user_mss && tp->rx_opt.user_mss < tp->advmss) |
2554 | tp->advmss = tp->rx_opt.user_mss; | 2570 | tp->advmss = tp->rx_opt.user_mss; |
2555 | 2571 | ||
@@ -2592,6 +2608,7 @@ int tcp_connect(struct sock *sk) | |||
2592 | { | 2608 | { |
2593 | struct tcp_sock *tp = tcp_sk(sk); | 2609 | struct tcp_sock *tp = tcp_sk(sk); |
2594 | struct sk_buff *buff; | 2610 | struct sk_buff *buff; |
2611 | int err; | ||
2595 | 2612 | ||
2596 | tcp_connect_init(sk); | 2613 | tcp_connect_init(sk); |
2597 | 2614 | ||
@@ -2614,7 +2631,9 @@ int tcp_connect(struct sock *sk) | |||
2614 | sk->sk_wmem_queued += buff->truesize; | 2631 | sk->sk_wmem_queued += buff->truesize; |
2615 | sk_mem_charge(sk, buff->truesize); | 2632 | sk_mem_charge(sk, buff->truesize); |
2616 | tp->packets_out += tcp_skb_pcount(buff); | 2633 | tp->packets_out += tcp_skb_pcount(buff); |
2617 | tcp_transmit_skb(sk, buff, 1, sk->sk_allocation); | 2634 | err = tcp_transmit_skb(sk, buff, 1, sk->sk_allocation); |
2635 | if (err == -ECONNREFUSED) | ||
2636 | return err; | ||
2618 | 2637 | ||
2619 | /* We change tp->snd_nxt after the tcp_transmit_skb() call | 2638 | /* We change tp->snd_nxt after the tcp_transmit_skb() call |
2620 | * in order to make this packet get counted in tcpOutSegs. | 2639 | * in order to make this packet get counted in tcpOutSegs. |
diff --git a/net/ipv4/tcp_probe.c b/net/ipv4/tcp_probe.c index 6211e2114173..85ee7eb7e38e 100644 --- a/net/ipv4/tcp_probe.c +++ b/net/ipv4/tcp_probe.c | |||
@@ -154,7 +154,7 @@ static int tcpprobe_sprint(char *tbuf, int n) | |||
154 | struct timespec tv | 154 | struct timespec tv |
155 | = ktime_to_timespec(ktime_sub(p->tstamp, tcp_probe.start)); | 155 | = ktime_to_timespec(ktime_sub(p->tstamp, tcp_probe.start)); |
156 | 156 | ||
157 | return snprintf(tbuf, n, | 157 | return scnprintf(tbuf, n, |
158 | "%lu.%09lu %pI4:%u %pI4:%u %d %#x %#x %u %u %u %u\n", | 158 | "%lu.%09lu %pI4:%u %pI4:%u %d %#x %#x %u %u %u %u\n", |
159 | (unsigned long) tv.tv_sec, | 159 | (unsigned long) tv.tv_sec, |
160 | (unsigned long) tv.tv_nsec, | 160 | (unsigned long) tv.tv_nsec, |
@@ -174,7 +174,7 @@ static ssize_t tcpprobe_read(struct file *file, char __user *buf, | |||
174 | return -EINVAL; | 174 | return -EINVAL; |
175 | 175 | ||
176 | while (cnt < len) { | 176 | while (cnt < len) { |
177 | char tbuf[128]; | 177 | char tbuf[164]; |
178 | int width; | 178 | int width; |
179 | 179 | ||
180 | /* Wait for data in buffer */ | 180 | /* Wait for data in buffer */ |
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 28cb2d733a3c..8157b17959ee 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c | |||
@@ -110,7 +110,7 @@ | |||
110 | struct udp_table udp_table __read_mostly; | 110 | struct udp_table udp_table __read_mostly; |
111 | EXPORT_SYMBOL(udp_table); | 111 | EXPORT_SYMBOL(udp_table); |
112 | 112 | ||
113 | int sysctl_udp_mem[3] __read_mostly; | 113 | long sysctl_udp_mem[3] __read_mostly; |
114 | EXPORT_SYMBOL(sysctl_udp_mem); | 114 | EXPORT_SYMBOL(sysctl_udp_mem); |
115 | 115 | ||
116 | int sysctl_udp_rmem_min __read_mostly; | 116 | int sysctl_udp_rmem_min __read_mostly; |
@@ -119,7 +119,7 @@ EXPORT_SYMBOL(sysctl_udp_rmem_min); | |||
119 | int sysctl_udp_wmem_min __read_mostly; | 119 | int sysctl_udp_wmem_min __read_mostly; |
120 | EXPORT_SYMBOL(sysctl_udp_wmem_min); | 120 | EXPORT_SYMBOL(sysctl_udp_wmem_min); |
121 | 121 | ||
122 | atomic_t udp_memory_allocated; | 122 | atomic_long_t udp_memory_allocated; |
123 | EXPORT_SYMBOL(udp_memory_allocated); | 123 | EXPORT_SYMBOL(udp_memory_allocated); |
124 | 124 | ||
125 | #define MAX_UDP_PORTS 65536 | 125 | #define MAX_UDP_PORTS 65536 |
@@ -430,7 +430,7 @@ begin: | |||
430 | 430 | ||
431 | if (result) { | 431 | if (result) { |
432 | exact_match: | 432 | exact_match: |
433 | if (unlikely(!atomic_inc_not_zero(&result->sk_refcnt))) | 433 | if (unlikely(!atomic_inc_not_zero_hint(&result->sk_refcnt, 2))) |
434 | result = NULL; | 434 | result = NULL; |
435 | else if (unlikely(compute_score2(result, net, saddr, sport, | 435 | else if (unlikely(compute_score2(result, net, saddr, sport, |
436 | daddr, hnum, dif) < badness)) { | 436 | daddr, hnum, dif) < badness)) { |
@@ -500,7 +500,7 @@ begin: | |||
500 | goto begin; | 500 | goto begin; |
501 | 501 | ||
502 | if (result) { | 502 | if (result) { |
503 | if (unlikely(!atomic_inc_not_zero(&result->sk_refcnt))) | 503 | if (unlikely(!atomic_inc_not_zero_hint(&result->sk_refcnt, 2))) |
504 | result = NULL; | 504 | result = NULL; |
505 | else if (unlikely(compute_score(result, net, saddr, hnum, sport, | 505 | else if (unlikely(compute_score(result, net, saddr, hnum, sport, |
506 | daddr, dport, dif) < badness)) { | 506 | daddr, dport, dif) < badness)) { |
@@ -890,15 +890,13 @@ int udp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, | |||
890 | if (rt == NULL) { | 890 | if (rt == NULL) { |
891 | struct flowi fl = { .oif = ipc.oif, | 891 | struct flowi fl = { .oif = ipc.oif, |
892 | .mark = sk->sk_mark, | 892 | .mark = sk->sk_mark, |
893 | .nl_u = { .ip4_u = | 893 | .fl4_dst = faddr, |
894 | { .daddr = faddr, | 894 | .fl4_src = saddr, |
895 | .saddr = saddr, | 895 | .fl4_tos = tos, |
896 | .tos = tos } }, | ||
897 | .proto = sk->sk_protocol, | 896 | .proto = sk->sk_protocol, |
898 | .flags = inet_sk_flowi_flags(sk), | 897 | .flags = inet_sk_flowi_flags(sk), |
899 | .uli_u = { .ports = | 898 | .fl_ip_sport = inet->inet_sport, |
900 | { .sport = inet->inet_sport, | 899 | .fl_ip_dport = dport }; |
901 | .dport = dport } } }; | ||
902 | struct net *net = sock_net(sk); | 900 | struct net *net = sock_net(sk); |
903 | 901 | ||
904 | security_sk_classify_flow(sk, &fl); | 902 | security_sk_classify_flow(sk, &fl); |
@@ -1899,6 +1897,7 @@ struct proto udp_prot = { | |||
1899 | .compat_setsockopt = compat_udp_setsockopt, | 1897 | .compat_setsockopt = compat_udp_setsockopt, |
1900 | .compat_getsockopt = compat_udp_getsockopt, | 1898 | .compat_getsockopt = compat_udp_getsockopt, |
1901 | #endif | 1899 | #endif |
1900 | .clear_sk = sk_prot_clear_portaddr_nulls, | ||
1902 | }; | 1901 | }; |
1903 | EXPORT_SYMBOL(udp_prot); | 1902 | EXPORT_SYMBOL(udp_prot); |
1904 | 1903 | ||
@@ -2228,7 +2227,7 @@ struct sk_buff *udp4_ufo_fragment(struct sk_buff *skb, int features) | |||
2228 | /* Do software UFO. Complete and fill in the UDP checksum as HW cannot | 2227 | /* Do software UFO. Complete and fill in the UDP checksum as HW cannot |
2229 | * do checksum of UDP packets sent as multiple IP fragments. | 2228 | * do checksum of UDP packets sent as multiple IP fragments. |
2230 | */ | 2229 | */ |
2231 | offset = skb->csum_start - skb_headroom(skb); | 2230 | offset = skb_checksum_start_offset(skb); |
2232 | csum = skb_checksum(skb, offset, skb->len - offset, 0); | 2231 | csum = skb_checksum(skb, offset, skb->len - offset, 0); |
2233 | offset += skb->csum_offset; | 2232 | offset += skb->csum_offset; |
2234 | *(__sum16 *)(skb->data + offset) = csum_fold(csum); | 2233 | *(__sum16 *)(skb->data + offset) = csum_fold(csum); |
diff --git a/net/ipv4/udplite.c b/net/ipv4/udplite.c index ab76aa928fa9..aee9963f7f5a 100644 --- a/net/ipv4/udplite.c +++ b/net/ipv4/udplite.c | |||
@@ -57,6 +57,7 @@ struct proto udplite_prot = { | |||
57 | .compat_setsockopt = compat_udp_setsockopt, | 57 | .compat_setsockopt = compat_udp_setsockopt, |
58 | .compat_getsockopt = compat_udp_getsockopt, | 58 | .compat_getsockopt = compat_udp_getsockopt, |
59 | #endif | 59 | #endif |
60 | .clear_sk = sk_prot_clear_portaddr_nulls, | ||
60 | }; | 61 | }; |
61 | EXPORT_SYMBOL(udplite_prot); | 62 | EXPORT_SYMBOL(udplite_prot); |
62 | 63 | ||
diff --git a/net/ipv4/xfrm4_mode_tunnel.c b/net/ipv4/xfrm4_mode_tunnel.c index 6f368413eb0e..534972e114ac 100644 --- a/net/ipv4/xfrm4_mode_tunnel.c +++ b/net/ipv4/xfrm4_mode_tunnel.c | |||
@@ -56,7 +56,7 @@ static int xfrm4_mode_tunnel_output(struct xfrm_state *x, struct sk_buff *skb) | |||
56 | 0 : (XFRM_MODE_SKB_CB(skb)->frag_off & htons(IP_DF)); | 56 | 0 : (XFRM_MODE_SKB_CB(skb)->frag_off & htons(IP_DF)); |
57 | ip_select_ident(top_iph, dst->child, NULL); | 57 | ip_select_ident(top_iph, dst->child, NULL); |
58 | 58 | ||
59 | top_iph->ttl = dst_metric(dst->child, RTAX_HOPLIMIT); | 59 | top_iph->ttl = ip4_dst_hoplimit(dst->child); |
60 | 60 | ||
61 | top_iph->saddr = x->props.saddr.a4; | 61 | top_iph->saddr = x->props.saddr.a4; |
62 | top_iph->daddr = x->id.daddr.a4; | 62 | top_iph->daddr = x->id.daddr.a4; |
diff --git a/net/ipv4/xfrm4_policy.c b/net/ipv4/xfrm4_policy.c index dd1fd8c473fc..b057d40addec 100644 --- a/net/ipv4/xfrm4_policy.c +++ b/net/ipv4/xfrm4_policy.c | |||
@@ -11,6 +11,7 @@ | |||
11 | #include <linux/err.h> | 11 | #include <linux/err.h> |
12 | #include <linux/kernel.h> | 12 | #include <linux/kernel.h> |
13 | #include <linux/inetdevice.h> | 13 | #include <linux/inetdevice.h> |
14 | #include <linux/if_tunnel.h> | ||
14 | #include <net/dst.h> | 15 | #include <net/dst.h> |
15 | #include <net/xfrm.h> | 16 | #include <net/xfrm.h> |
16 | #include <net/ip.h> | 17 | #include <net/ip.h> |
@@ -22,12 +23,8 @@ static struct dst_entry *xfrm4_dst_lookup(struct net *net, int tos, | |||
22 | xfrm_address_t *daddr) | 23 | xfrm_address_t *daddr) |
23 | { | 24 | { |
24 | struct flowi fl = { | 25 | struct flowi fl = { |
25 | .nl_u = { | 26 | .fl4_dst = daddr->a4, |
26 | .ip4_u = { | 27 | .fl4_tos = tos, |
27 | .tos = tos, | ||
28 | .daddr = daddr->a4, | ||
29 | }, | ||
30 | }, | ||
31 | }; | 28 | }; |
32 | struct dst_entry *dst; | 29 | struct dst_entry *dst; |
33 | struct rtable *rt; | 30 | struct rtable *rt; |
@@ -154,6 +151,20 @@ _decode_session4(struct sk_buff *skb, struct flowi *fl, int reverse) | |||
154 | fl->fl_ipsec_spi = htonl(ntohs(ipcomp_hdr[1])); | 151 | fl->fl_ipsec_spi = htonl(ntohs(ipcomp_hdr[1])); |
155 | } | 152 | } |
156 | break; | 153 | break; |
154 | |||
155 | case IPPROTO_GRE: | ||
156 | if (pskb_may_pull(skb, xprth + 12 - skb->data)) { | ||
157 | __be16 *greflags = (__be16 *)xprth; | ||
158 | __be32 *gre_hdr = (__be32 *)xprth; | ||
159 | |||
160 | if (greflags[0] & GRE_KEY) { | ||
161 | if (greflags[0] & GRE_CSUM) | ||
162 | gre_hdr++; | ||
163 | fl->fl_gre_key = gre_hdr[1]; | ||
164 | } | ||
165 | } | ||
166 | break; | ||
167 | |||
157 | default: | 168 | default: |
158 | fl->fl_ipsec_spi = 0; | 169 | fl->fl_ipsec_spi = 0; |
159 | break; | 170 | break; |