diff options
Diffstat (limited to 'net/ipv4')
64 files changed, 2271 insertions, 3120 deletions
diff --git a/net/ipv4/Kconfig b/net/ipv4/Kconfig index a5a1050595d1..cbb505ba9324 100644 --- a/net/ipv4/Kconfig +++ b/net/ipv4/Kconfig | |||
| @@ -55,45 +55,9 @@ config IP_ADVANCED_ROUTER | |||
| 55 | 55 | ||
| 56 | If unsure, say N here. | 56 | If unsure, say N here. |
| 57 | 57 | ||
| 58 | choice | ||
| 59 | prompt "Choose IP: FIB lookup algorithm (choose FIB_HASH if unsure)" | ||
| 60 | depends on IP_ADVANCED_ROUTER | ||
| 61 | default ASK_IP_FIB_HASH | ||
| 62 | |||
| 63 | config ASK_IP_FIB_HASH | ||
| 64 | bool "FIB_HASH" | ||
| 65 | ---help--- | ||
| 66 | Current FIB is very proven and good enough for most users. | ||
| 67 | |||
| 68 | config IP_FIB_TRIE | ||
| 69 | bool "FIB_TRIE" | ||
| 70 | ---help--- | ||
| 71 | Use new experimental LC-trie as FIB lookup algorithm. | ||
| 72 | This improves lookup performance if you have a large | ||
| 73 | number of routes. | ||
| 74 | |||
| 75 | LC-trie is a longest matching prefix lookup algorithm which | ||
| 76 | performs better than FIB_HASH for large routing tables. | ||
| 77 | But, it consumes more memory and is more complex. | ||
| 78 | |||
| 79 | LC-trie is described in: | ||
| 80 | |||
| 81 | IP-address lookup using LC-tries. Stefan Nilsson and Gunnar Karlsson | ||
| 82 | IEEE Journal on Selected Areas in Communications, 17(6):1083-1092, | ||
| 83 | June 1999 | ||
| 84 | |||
| 85 | An experimental study of compression methods for dynamic tries | ||
| 86 | Stefan Nilsson and Matti Tikkanen. Algorithmica, 33(1):19-33, 2002. | ||
| 87 | <http://www.csc.kth.se/~snilsson/software/dyntrie2/> | ||
| 88 | |||
| 89 | endchoice | ||
| 90 | |||
| 91 | config IP_FIB_HASH | ||
| 92 | def_bool ASK_IP_FIB_HASH || !IP_ADVANCED_ROUTER | ||
| 93 | |||
| 94 | config IP_FIB_TRIE_STATS | 58 | config IP_FIB_TRIE_STATS |
| 95 | bool "FIB TRIE statistics" | 59 | bool "FIB TRIE statistics" |
| 96 | depends on IP_FIB_TRIE | 60 | depends on IP_ADVANCED_ROUTER |
| 97 | ---help--- | 61 | ---help--- |
| 98 | Keep track of statistics on structure of FIB TRIE table. | 62 | Keep track of statistics on structure of FIB TRIE table. |
| 99 | Useful for testing and measuring TRIE performance. | 63 | Useful for testing and measuring TRIE performance. |
| @@ -140,6 +104,9 @@ config IP_ROUTE_VERBOSE | |||
| 140 | handled by the klogd daemon which is responsible for kernel messages | 104 | handled by the klogd daemon which is responsible for kernel messages |
| 141 | ("man klogd"). | 105 | ("man klogd"). |
| 142 | 106 | ||
| 107 | config IP_ROUTE_CLASSID | ||
| 108 | bool | ||
| 109 | |||
| 143 | config IP_PNP | 110 | config IP_PNP |
| 144 | bool "IP: kernel level autoconfiguration" | 111 | bool "IP: kernel level autoconfiguration" |
| 145 | help | 112 | help |
| @@ -657,4 +624,3 @@ config TCP_MD5SIG | |||
| 657 | on the Internet. | 624 | on the Internet. |
| 658 | 625 | ||
| 659 | If unsure, say N. | 626 | If unsure, say N. |
| 660 | |||
diff --git a/net/ipv4/Makefile b/net/ipv4/Makefile index 4978d22f9a75..0dc772d0d125 100644 --- a/net/ipv4/Makefile +++ b/net/ipv4/Makefile | |||
| @@ -10,12 +10,10 @@ obj-y := route.o inetpeer.o protocol.o \ | |||
| 10 | tcp_minisocks.o tcp_cong.o \ | 10 | tcp_minisocks.o tcp_cong.o \ |
| 11 | datagram.o raw.o udp.o udplite.o \ | 11 | datagram.o raw.o udp.o udplite.o \ |
| 12 | arp.o icmp.o devinet.o af_inet.o igmp.o \ | 12 | arp.o icmp.o devinet.o af_inet.o igmp.o \ |
| 13 | fib_frontend.o fib_semantics.o \ | 13 | fib_frontend.o fib_semantics.o fib_trie.o \ |
| 14 | inet_fragment.o | 14 | inet_fragment.o |
| 15 | 15 | ||
| 16 | obj-$(CONFIG_SYSCTL) += sysctl_net_ipv4.o | 16 | obj-$(CONFIG_SYSCTL) += sysctl_net_ipv4.o |
| 17 | obj-$(CONFIG_IP_FIB_HASH) += fib_hash.o | ||
| 18 | obj-$(CONFIG_IP_FIB_TRIE) += fib_trie.o | ||
| 19 | obj-$(CONFIG_PROC_FS) += proc.o | 17 | obj-$(CONFIG_PROC_FS) += proc.o |
| 20 | obj-$(CONFIG_IP_MULTIPLE_TABLES) += fib_rules.o | 18 | obj-$(CONFIG_IP_MULTIPLE_TABLES) += fib_rules.o |
| 21 | obj-$(CONFIG_IP_MROUTE) += ipmr.o | 19 | obj-$(CONFIG_IP_MROUTE) += ipmr.o |
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index f2b61107df6c..807d83c02ef6 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c | |||
| @@ -880,6 +880,19 @@ int inet_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) | |||
| 880 | } | 880 | } |
| 881 | EXPORT_SYMBOL(inet_ioctl); | 881 | EXPORT_SYMBOL(inet_ioctl); |
| 882 | 882 | ||
| 883 | #ifdef CONFIG_COMPAT | ||
| 884 | int inet_compat_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) | ||
| 885 | { | ||
| 886 | struct sock *sk = sock->sk; | ||
| 887 | int err = -ENOIOCTLCMD; | ||
| 888 | |||
| 889 | if (sk->sk_prot->compat_ioctl) | ||
| 890 | err = sk->sk_prot->compat_ioctl(sk, cmd, arg); | ||
| 891 | |||
| 892 | return err; | ||
| 893 | } | ||
| 894 | #endif | ||
| 895 | |||
| 883 | const struct proto_ops inet_stream_ops = { | 896 | const struct proto_ops inet_stream_ops = { |
| 884 | .family = PF_INET, | 897 | .family = PF_INET, |
| 885 | .owner = THIS_MODULE, | 898 | .owner = THIS_MODULE, |
| @@ -903,6 +916,7 @@ const struct proto_ops inet_stream_ops = { | |||
| 903 | #ifdef CONFIG_COMPAT | 916 | #ifdef CONFIG_COMPAT |
| 904 | .compat_setsockopt = compat_sock_common_setsockopt, | 917 | .compat_setsockopt = compat_sock_common_setsockopt, |
| 905 | .compat_getsockopt = compat_sock_common_getsockopt, | 918 | .compat_getsockopt = compat_sock_common_getsockopt, |
| 919 | .compat_ioctl = inet_compat_ioctl, | ||
| 906 | #endif | 920 | #endif |
| 907 | }; | 921 | }; |
| 908 | EXPORT_SYMBOL(inet_stream_ops); | 922 | EXPORT_SYMBOL(inet_stream_ops); |
| @@ -929,6 +943,7 @@ const struct proto_ops inet_dgram_ops = { | |||
| 929 | #ifdef CONFIG_COMPAT | 943 | #ifdef CONFIG_COMPAT |
| 930 | .compat_setsockopt = compat_sock_common_setsockopt, | 944 | .compat_setsockopt = compat_sock_common_setsockopt, |
| 931 | .compat_getsockopt = compat_sock_common_getsockopt, | 945 | .compat_getsockopt = compat_sock_common_getsockopt, |
| 946 | .compat_ioctl = inet_compat_ioctl, | ||
| 932 | #endif | 947 | #endif |
| 933 | }; | 948 | }; |
| 934 | EXPORT_SYMBOL(inet_dgram_ops); | 949 | EXPORT_SYMBOL(inet_dgram_ops); |
| @@ -959,6 +974,7 @@ static const struct proto_ops inet_sockraw_ops = { | |||
| 959 | #ifdef CONFIG_COMPAT | 974 | #ifdef CONFIG_COMPAT |
| 960 | .compat_setsockopt = compat_sock_common_setsockopt, | 975 | .compat_setsockopt = compat_sock_common_setsockopt, |
| 961 | .compat_getsockopt = compat_sock_common_getsockopt, | 976 | .compat_getsockopt = compat_sock_common_getsockopt, |
| 977 | .compat_ioctl = inet_compat_ioctl, | ||
| 962 | #endif | 978 | #endif |
| 963 | }; | 979 | }; |
| 964 | 980 | ||
| @@ -1085,23 +1101,20 @@ int sysctl_ip_dynaddr __read_mostly; | |||
| 1085 | static int inet_sk_reselect_saddr(struct sock *sk) | 1101 | static int inet_sk_reselect_saddr(struct sock *sk) |
| 1086 | { | 1102 | { |
| 1087 | struct inet_sock *inet = inet_sk(sk); | 1103 | struct inet_sock *inet = inet_sk(sk); |
| 1088 | int err; | ||
| 1089 | struct rtable *rt; | ||
| 1090 | __be32 old_saddr = inet->inet_saddr; | 1104 | __be32 old_saddr = inet->inet_saddr; |
| 1091 | __be32 new_saddr; | ||
| 1092 | __be32 daddr = inet->inet_daddr; | 1105 | __be32 daddr = inet->inet_daddr; |
| 1106 | struct rtable *rt; | ||
| 1107 | __be32 new_saddr; | ||
| 1093 | 1108 | ||
| 1094 | if (inet->opt && inet->opt->srr) | 1109 | if (inet->opt && inet->opt->srr) |
| 1095 | daddr = inet->opt->faddr; | 1110 | daddr = inet->opt->faddr; |
| 1096 | 1111 | ||
| 1097 | /* Query new route. */ | 1112 | /* Query new route. */ |
| 1098 | err = ip_route_connect(&rt, daddr, 0, | 1113 | rt = ip_route_connect(daddr, 0, RT_CONN_FLAGS(sk), |
| 1099 | RT_CONN_FLAGS(sk), | 1114 | sk->sk_bound_dev_if, sk->sk_protocol, |
| 1100 | sk->sk_bound_dev_if, | 1115 | inet->inet_sport, inet->inet_dport, sk, false); |
| 1101 | sk->sk_protocol, | 1116 | if (IS_ERR(rt)) |
| 1102 | inet->inet_sport, inet->inet_dport, sk, 0); | 1117 | return PTR_ERR(rt); |
| 1103 | if (err) | ||
| 1104 | return err; | ||
| 1105 | 1118 | ||
| 1106 | sk_setup_caps(sk, &rt->dst); | 1119 | sk_setup_caps(sk, &rt->dst); |
| 1107 | 1120 | ||
| @@ -1144,25 +1157,16 @@ int inet_sk_rebuild_header(struct sock *sk) | |||
| 1144 | daddr = inet->inet_daddr; | 1157 | daddr = inet->inet_daddr; |
| 1145 | if (inet->opt && inet->opt->srr) | 1158 | if (inet->opt && inet->opt->srr) |
| 1146 | daddr = inet->opt->faddr; | 1159 | daddr = inet->opt->faddr; |
| 1147 | { | 1160 | rt = ip_route_output_ports(sock_net(sk), sk, daddr, inet->inet_saddr, |
| 1148 | struct flowi fl = { | 1161 | inet->inet_dport, inet->inet_sport, |
| 1149 | .oif = sk->sk_bound_dev_if, | 1162 | sk->sk_protocol, RT_CONN_FLAGS(sk), |
| 1150 | .mark = sk->sk_mark, | 1163 | sk->sk_bound_dev_if); |
| 1151 | .fl4_dst = daddr, | 1164 | if (!IS_ERR(rt)) { |
| 1152 | .fl4_src = inet->inet_saddr, | 1165 | err = 0; |
| 1153 | .fl4_tos = RT_CONN_FLAGS(sk), | ||
| 1154 | .proto = sk->sk_protocol, | ||
| 1155 | .flags = inet_sk_flowi_flags(sk), | ||
| 1156 | .fl_ip_sport = inet->inet_sport, | ||
| 1157 | .fl_ip_dport = inet->inet_dport, | ||
| 1158 | }; | ||
| 1159 | |||
| 1160 | security_sk_classify_flow(sk, &fl); | ||
| 1161 | err = ip_route_output_flow(sock_net(sk), &rt, &fl, sk, 0); | ||
| 1162 | } | ||
| 1163 | if (!err) | ||
| 1164 | sk_setup_caps(sk, &rt->dst); | 1166 | sk_setup_caps(sk, &rt->dst); |
| 1165 | else { | 1167 | } else { |
| 1168 | err = PTR_ERR(rt); | ||
| 1169 | |||
| 1166 | /* Routing failed... */ | 1170 | /* Routing failed... */ |
| 1167 | sk->sk_route_caps = 0; | 1171 | sk->sk_route_caps = 0; |
| 1168 | /* | 1172 | /* |
| @@ -1215,7 +1219,7 @@ out: | |||
| 1215 | return err; | 1219 | return err; |
| 1216 | } | 1220 | } |
| 1217 | 1221 | ||
| 1218 | static struct sk_buff *inet_gso_segment(struct sk_buff *skb, int features) | 1222 | static struct sk_buff *inet_gso_segment(struct sk_buff *skb, u32 features) |
| 1219 | { | 1223 | { |
| 1220 | struct sk_buff *segs = ERR_PTR(-EINVAL); | 1224 | struct sk_buff *segs = ERR_PTR(-EINVAL); |
| 1221 | struct iphdr *iph; | 1225 | struct iphdr *iph; |
diff --git a/net/ipv4/ah4.c b/net/ipv4/ah4.c index 86961bec70ab..4286fd3cc0e2 100644 --- a/net/ipv4/ah4.c +++ b/net/ipv4/ah4.c | |||
| @@ -201,11 +201,14 @@ static int ah_output(struct xfrm_state *x, struct sk_buff *skb) | |||
| 201 | top_iph->ttl = 0; | 201 | top_iph->ttl = 0; |
| 202 | top_iph->check = 0; | 202 | top_iph->check = 0; |
| 203 | 203 | ||
| 204 | ah->hdrlen = (XFRM_ALIGN8(sizeof(*ah) + ahp->icv_trunc_len) >> 2) - 2; | 204 | if (x->props.flags & XFRM_STATE_ALIGN4) |
| 205 | ah->hdrlen = (XFRM_ALIGN4(sizeof(*ah) + ahp->icv_trunc_len) >> 2) - 2; | ||
| 206 | else | ||
| 207 | ah->hdrlen = (XFRM_ALIGN8(sizeof(*ah) + ahp->icv_trunc_len) >> 2) - 2; | ||
| 205 | 208 | ||
| 206 | ah->reserved = 0; | 209 | ah->reserved = 0; |
| 207 | ah->spi = x->id.spi; | 210 | ah->spi = x->id.spi; |
| 208 | ah->seq_no = htonl(XFRM_SKB_CB(skb)->seq.output); | 211 | ah->seq_no = htonl(XFRM_SKB_CB(skb)->seq.output.low); |
| 209 | 212 | ||
| 210 | sg_init_table(sg, nfrags); | 213 | sg_init_table(sg, nfrags); |
| 211 | skb_to_sgvec(skb, sg, 0, skb->len); | 214 | skb_to_sgvec(skb, sg, 0, skb->len); |
| @@ -299,9 +302,15 @@ static int ah_input(struct xfrm_state *x, struct sk_buff *skb) | |||
| 299 | nexthdr = ah->nexthdr; | 302 | nexthdr = ah->nexthdr; |
| 300 | ah_hlen = (ah->hdrlen + 2) << 2; | 303 | ah_hlen = (ah->hdrlen + 2) << 2; |
| 301 | 304 | ||
| 302 | if (ah_hlen != XFRM_ALIGN8(sizeof(*ah) + ahp->icv_full_len) && | 305 | if (x->props.flags & XFRM_STATE_ALIGN4) { |
| 303 | ah_hlen != XFRM_ALIGN8(sizeof(*ah) + ahp->icv_trunc_len)) | 306 | if (ah_hlen != XFRM_ALIGN4(sizeof(*ah) + ahp->icv_full_len) && |
| 304 | goto out; | 307 | ah_hlen != XFRM_ALIGN4(sizeof(*ah) + ahp->icv_trunc_len)) |
| 308 | goto out; | ||
| 309 | } else { | ||
| 310 | if (ah_hlen != XFRM_ALIGN8(sizeof(*ah) + ahp->icv_full_len) && | ||
| 311 | ah_hlen != XFRM_ALIGN8(sizeof(*ah) + ahp->icv_trunc_len)) | ||
| 312 | goto out; | ||
| 313 | } | ||
| 305 | 314 | ||
| 306 | if (!pskb_may_pull(skb, ah_hlen)) | 315 | if (!pskb_may_pull(skb, ah_hlen)) |
| 307 | goto out; | 316 | goto out; |
| @@ -450,8 +459,12 @@ static int ah_init_state(struct xfrm_state *x) | |||
| 450 | 459 | ||
| 451 | BUG_ON(ahp->icv_trunc_len > MAX_AH_AUTH_LEN); | 460 | BUG_ON(ahp->icv_trunc_len > MAX_AH_AUTH_LEN); |
| 452 | 461 | ||
| 453 | x->props.header_len = XFRM_ALIGN8(sizeof(struct ip_auth_hdr) + | 462 | if (x->props.flags & XFRM_STATE_ALIGN4) |
| 454 | ahp->icv_trunc_len); | 463 | x->props.header_len = XFRM_ALIGN4(sizeof(struct ip_auth_hdr) + |
| 464 | ahp->icv_trunc_len); | ||
| 465 | else | ||
| 466 | x->props.header_len = XFRM_ALIGN8(sizeof(struct ip_auth_hdr) + | ||
| 467 | ahp->icv_trunc_len); | ||
| 455 | if (x->props.mode == XFRM_MODE_TUNNEL) | 468 | if (x->props.mode == XFRM_MODE_TUNNEL) |
| 456 | x->props.header_len += sizeof(struct iphdr); | 469 | x->props.header_len += sizeof(struct iphdr); |
| 457 | x->data = ahp; | 470 | x->data = ahp; |
diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c index 04c8b69fd426..090d273d7865 100644 --- a/net/ipv4/arp.c +++ b/net/ipv4/arp.c | |||
| @@ -433,14 +433,13 @@ static int arp_ignore(struct in_device *in_dev, __be32 sip, __be32 tip) | |||
| 433 | 433 | ||
| 434 | static int arp_filter(__be32 sip, __be32 tip, struct net_device *dev) | 434 | static int arp_filter(__be32 sip, __be32 tip, struct net_device *dev) |
| 435 | { | 435 | { |
| 436 | struct flowi fl = { .fl4_dst = sip, | ||
| 437 | .fl4_src = tip }; | ||
| 438 | struct rtable *rt; | 436 | struct rtable *rt; |
| 439 | int flag = 0; | 437 | int flag = 0; |
| 440 | /*unsigned long now; */ | 438 | /*unsigned long now; */ |
| 441 | struct net *net = dev_net(dev); | 439 | struct net *net = dev_net(dev); |
| 442 | 440 | ||
| 443 | if (ip_route_output_key(net, &rt, &fl) < 0) | 441 | rt = ip_route_output(net, sip, tip, 0, 0); |
| 442 | if (IS_ERR(rt)) | ||
| 444 | return 1; | 443 | return 1; |
| 445 | if (rt->dst.dev != dev) { | 444 | if (rt->dst.dev != dev) { |
| 446 | NET_INC_STATS_BH(net, LINUX_MIB_ARPFILTER); | 445 | NET_INC_STATS_BH(net, LINUX_MIB_ARPFILTER); |
| @@ -1017,14 +1016,13 @@ static int arp_req_set_proxy(struct net *net, struct net_device *dev, int on) | |||
| 1017 | IPV4_DEVCONF_ALL(net, PROXY_ARP) = on; | 1016 | IPV4_DEVCONF_ALL(net, PROXY_ARP) = on; |
| 1018 | return 0; | 1017 | return 0; |
| 1019 | } | 1018 | } |
| 1020 | if (__in_dev_get_rcu(dev)) { | 1019 | if (__in_dev_get_rtnl(dev)) { |
| 1021 | IN_DEV_CONF_SET(__in_dev_get_rcu(dev), PROXY_ARP, on); | 1020 | IN_DEV_CONF_SET(__in_dev_get_rtnl(dev), PROXY_ARP, on); |
| 1022 | return 0; | 1021 | return 0; |
| 1023 | } | 1022 | } |
| 1024 | return -ENXIO; | 1023 | return -ENXIO; |
| 1025 | } | 1024 | } |
| 1026 | 1025 | ||
| 1027 | /* must be called with rcu_read_lock() */ | ||
| 1028 | static int arp_req_set_public(struct net *net, struct arpreq *r, | 1026 | static int arp_req_set_public(struct net *net, struct arpreq *r, |
| 1029 | struct net_device *dev) | 1027 | struct net_device *dev) |
| 1030 | { | 1028 | { |
| @@ -1062,12 +1060,10 @@ static int arp_req_set(struct net *net, struct arpreq *r, | |||
| 1062 | if (r->arp_flags & ATF_PERM) | 1060 | if (r->arp_flags & ATF_PERM) |
| 1063 | r->arp_flags |= ATF_COM; | 1061 | r->arp_flags |= ATF_COM; |
| 1064 | if (dev == NULL) { | 1062 | if (dev == NULL) { |
| 1065 | struct flowi fl = { .fl4_dst = ip, | 1063 | struct rtable *rt = ip_route_output(net, ip, 0, RTO_ONLINK, 0); |
| 1066 | .fl4_tos = RTO_ONLINK }; | 1064 | |
| 1067 | struct rtable *rt; | 1065 | if (IS_ERR(rt)) |
| 1068 | err = ip_route_output_key(net, &rt, &fl); | 1066 | return PTR_ERR(rt); |
| 1069 | if (err != 0) | ||
| 1070 | return err; | ||
| 1071 | dev = rt->dst.dev; | 1067 | dev = rt->dst.dev; |
| 1072 | ip_rt_put(rt); | 1068 | ip_rt_put(rt); |
| 1073 | if (!dev) | 1069 | if (!dev) |
| @@ -1178,7 +1174,6 @@ static int arp_req_delete_public(struct net *net, struct arpreq *r, | |||
| 1178 | static int arp_req_delete(struct net *net, struct arpreq *r, | 1174 | static int arp_req_delete(struct net *net, struct arpreq *r, |
| 1179 | struct net_device *dev) | 1175 | struct net_device *dev) |
| 1180 | { | 1176 | { |
| 1181 | int err; | ||
| 1182 | __be32 ip; | 1177 | __be32 ip; |
| 1183 | 1178 | ||
| 1184 | if (r->arp_flags & ATF_PUBL) | 1179 | if (r->arp_flags & ATF_PUBL) |
| @@ -1186,12 +1181,9 @@ static int arp_req_delete(struct net *net, struct arpreq *r, | |||
| 1186 | 1181 | ||
| 1187 | ip = ((struct sockaddr_in *)&r->arp_pa)->sin_addr.s_addr; | 1182 | ip = ((struct sockaddr_in *)&r->arp_pa)->sin_addr.s_addr; |
| 1188 | if (dev == NULL) { | 1183 | if (dev == NULL) { |
| 1189 | struct flowi fl = { .fl4_dst = ip, | 1184 | struct rtable *rt = ip_route_output(net, ip, 0, RTO_ONLINK, 0); |
| 1190 | .fl4_tos = RTO_ONLINK }; | 1185 | if (IS_ERR(rt)) |
| 1191 | struct rtable *rt; | 1186 | return PTR_ERR(rt); |
| 1192 | err = ip_route_output_key(net, &rt, &fl); | ||
| 1193 | if (err != 0) | ||
| 1194 | return err; | ||
| 1195 | dev = rt->dst.dev; | 1187 | dev = rt->dst.dev; |
| 1196 | ip_rt_put(rt); | 1188 | ip_rt_put(rt); |
| 1197 | if (!dev) | 1189 | if (!dev) |
| @@ -1233,10 +1225,10 @@ int arp_ioctl(struct net *net, unsigned int cmd, void __user *arg) | |||
| 1233 | if (!(r.arp_flags & ATF_NETMASK)) | 1225 | if (!(r.arp_flags & ATF_NETMASK)) |
| 1234 | ((struct sockaddr_in *)&r.arp_netmask)->sin_addr.s_addr = | 1226 | ((struct sockaddr_in *)&r.arp_netmask)->sin_addr.s_addr = |
| 1235 | htonl(0xFFFFFFFFUL); | 1227 | htonl(0xFFFFFFFFUL); |
| 1236 | rcu_read_lock(); | 1228 | rtnl_lock(); |
| 1237 | if (r.arp_dev[0]) { | 1229 | if (r.arp_dev[0]) { |
| 1238 | err = -ENODEV; | 1230 | err = -ENODEV; |
| 1239 | dev = dev_get_by_name_rcu(net, r.arp_dev); | 1231 | dev = __dev_get_by_name(net, r.arp_dev); |
| 1240 | if (dev == NULL) | 1232 | if (dev == NULL) |
| 1241 | goto out; | 1233 | goto out; |
| 1242 | 1234 | ||
| @@ -1263,7 +1255,7 @@ int arp_ioctl(struct net *net, unsigned int cmd, void __user *arg) | |||
| 1263 | break; | 1255 | break; |
| 1264 | } | 1256 | } |
| 1265 | out: | 1257 | out: |
| 1266 | rcu_read_unlock(); | 1258 | rtnl_unlock(); |
| 1267 | if (cmd == SIOCGARP && !err && copy_to_user(arg, &r, sizeof(r))) | 1259 | if (cmd == SIOCGARP && !err && copy_to_user(arg, &r, sizeof(r))) |
| 1268 | err = -EFAULT; | 1260 | err = -EFAULT; |
| 1269 | return err; | 1261 | return err; |
diff --git a/net/ipv4/datagram.c b/net/ipv4/datagram.c index 174be6caa5c8..85bd24ca4f6d 100644 --- a/net/ipv4/datagram.c +++ b/net/ipv4/datagram.c | |||
| @@ -46,11 +46,12 @@ int ip4_datagram_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) | |||
| 46 | if (!saddr) | 46 | if (!saddr) |
| 47 | saddr = inet->mc_addr; | 47 | saddr = inet->mc_addr; |
| 48 | } | 48 | } |
| 49 | err = ip_route_connect(&rt, usin->sin_addr.s_addr, saddr, | 49 | rt = ip_route_connect(usin->sin_addr.s_addr, saddr, |
| 50 | RT_CONN_FLAGS(sk), oif, | 50 | RT_CONN_FLAGS(sk), oif, |
| 51 | sk->sk_protocol, | 51 | sk->sk_protocol, |
| 52 | inet->inet_sport, usin->sin_port, sk, 1); | 52 | inet->inet_sport, usin->sin_port, sk, true); |
| 53 | if (err) { | 53 | if (IS_ERR(rt)) { |
| 54 | err = PTR_ERR(rt); | ||
| 54 | if (err == -ENETUNREACH) | 55 | if (err == -ENETUNREACH) |
| 55 | IP_INC_STATS_BH(sock_net(sk), IPSTATS_MIB_OUTNOROUTES); | 56 | IP_INC_STATS_BH(sock_net(sk), IPSTATS_MIB_OUTNOROUTES); |
| 56 | return err; | 57 | return err; |
diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c index 748cb5b337bd..5345b0bee6df 100644 --- a/net/ipv4/devinet.c +++ b/net/ipv4/devinet.c | |||
| @@ -51,6 +51,7 @@ | |||
| 51 | #include <linux/inetdevice.h> | 51 | #include <linux/inetdevice.h> |
| 52 | #include <linux/igmp.h> | 52 | #include <linux/igmp.h> |
| 53 | #include <linux/slab.h> | 53 | #include <linux/slab.h> |
| 54 | #include <linux/hash.h> | ||
| 54 | #ifdef CONFIG_SYSCTL | 55 | #ifdef CONFIG_SYSCTL |
| 55 | #include <linux/sysctl.h> | 56 | #include <linux/sysctl.h> |
| 56 | #endif | 57 | #endif |
| @@ -63,6 +64,8 @@ | |||
| 63 | #include <net/rtnetlink.h> | 64 | #include <net/rtnetlink.h> |
| 64 | #include <net/net_namespace.h> | 65 | #include <net/net_namespace.h> |
| 65 | 66 | ||
| 67 | #include "fib_lookup.h" | ||
| 68 | |||
| 66 | static struct ipv4_devconf ipv4_devconf = { | 69 | static struct ipv4_devconf ipv4_devconf = { |
| 67 | .data = { | 70 | .data = { |
| 68 | [IPV4_DEVCONF_ACCEPT_REDIRECTS - 1] = 1, | 71 | [IPV4_DEVCONF_ACCEPT_REDIRECTS - 1] = 1, |
| @@ -92,6 +95,85 @@ static const struct nla_policy ifa_ipv4_policy[IFA_MAX+1] = { | |||
| 92 | [IFA_LABEL] = { .type = NLA_STRING, .len = IFNAMSIZ - 1 }, | 95 | [IFA_LABEL] = { .type = NLA_STRING, .len = IFNAMSIZ - 1 }, |
| 93 | }; | 96 | }; |
| 94 | 97 | ||
| 98 | /* inet_addr_hash's shifting is dependent upon this IN4_ADDR_HSIZE | ||
| 99 | * value. So if you change this define, make appropriate changes to | ||
| 100 | * inet_addr_hash as well. | ||
| 101 | */ | ||
| 102 | #define IN4_ADDR_HSIZE 256 | ||
| 103 | static struct hlist_head inet_addr_lst[IN4_ADDR_HSIZE]; | ||
| 104 | static DEFINE_SPINLOCK(inet_addr_hash_lock); | ||
| 105 | |||
| 106 | static inline unsigned int inet_addr_hash(struct net *net, __be32 addr) | ||
| 107 | { | ||
| 108 | u32 val = (__force u32) addr ^ hash_ptr(net, 8); | ||
| 109 | |||
| 110 | return ((val ^ (val >> 8) ^ (val >> 16) ^ (val >> 24)) & | ||
| 111 | (IN4_ADDR_HSIZE - 1)); | ||
| 112 | } | ||
| 113 | |||
| 114 | static void inet_hash_insert(struct net *net, struct in_ifaddr *ifa) | ||
| 115 | { | ||
| 116 | unsigned int hash = inet_addr_hash(net, ifa->ifa_local); | ||
| 117 | |||
| 118 | spin_lock(&inet_addr_hash_lock); | ||
| 119 | hlist_add_head_rcu(&ifa->hash, &inet_addr_lst[hash]); | ||
| 120 | spin_unlock(&inet_addr_hash_lock); | ||
| 121 | } | ||
| 122 | |||
| 123 | static void inet_hash_remove(struct in_ifaddr *ifa) | ||
| 124 | { | ||
| 125 | spin_lock(&inet_addr_hash_lock); | ||
| 126 | hlist_del_init_rcu(&ifa->hash); | ||
| 127 | spin_unlock(&inet_addr_hash_lock); | ||
| 128 | } | ||
| 129 | |||
| 130 | /** | ||
| 131 | * __ip_dev_find - find the first device with a given source address. | ||
| 132 | * @net: the net namespace | ||
| 133 | * @addr: the source address | ||
| 134 | * @devref: if true, take a reference on the found device | ||
| 135 | * | ||
| 136 | * If a caller uses devref=false, it should be protected by RCU, or RTNL | ||
| 137 | */ | ||
| 138 | struct net_device *__ip_dev_find(struct net *net, __be32 addr, bool devref) | ||
| 139 | { | ||
| 140 | unsigned int hash = inet_addr_hash(net, addr); | ||
| 141 | struct net_device *result = NULL; | ||
| 142 | struct in_ifaddr *ifa; | ||
| 143 | struct hlist_node *node; | ||
| 144 | |||
| 145 | rcu_read_lock(); | ||
| 146 | hlist_for_each_entry_rcu(ifa, node, &inet_addr_lst[hash], hash) { | ||
| 147 | struct net_device *dev = ifa->ifa_dev->dev; | ||
| 148 | |||
| 149 | if (!net_eq(dev_net(dev), net)) | ||
| 150 | continue; | ||
| 151 | if (ifa->ifa_local == addr) { | ||
| 152 | result = dev; | ||
| 153 | break; | ||
| 154 | } | ||
| 155 | } | ||
| 156 | if (!result) { | ||
| 157 | struct flowi4 fl4 = { .daddr = addr }; | ||
| 158 | struct fib_result res = { 0 }; | ||
| 159 | struct fib_table *local; | ||
| 160 | |||
| 161 | /* Fallback to FIB local table so that communication | ||
| 162 | * over loopback subnets work. | ||
| 163 | */ | ||
| 164 | local = fib_get_table(net, RT_TABLE_LOCAL); | ||
| 165 | if (local && | ||
| 166 | !fib_table_lookup(local, &fl4, &res, FIB_LOOKUP_NOREF) && | ||
| 167 | res.type == RTN_LOCAL) | ||
| 168 | result = FIB_RES_DEV(res); | ||
| 169 | } | ||
| 170 | if (result && devref) | ||
| 171 | dev_hold(result); | ||
| 172 | rcu_read_unlock(); | ||
| 173 | return result; | ||
| 174 | } | ||
| 175 | EXPORT_SYMBOL(__ip_dev_find); | ||
| 176 | |||
| 95 | static void rtmsg_ifa(int event, struct in_ifaddr *, struct nlmsghdr *, u32); | 177 | static void rtmsg_ifa(int event, struct in_ifaddr *, struct nlmsghdr *, u32); |
| 96 | 178 | ||
| 97 | static BLOCKING_NOTIFIER_HEAD(inetaddr_chain); | 179 | static BLOCKING_NOTIFIER_HEAD(inetaddr_chain); |
| @@ -265,6 +347,7 @@ static void __inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap, | |||
| 265 | } | 347 | } |
| 266 | 348 | ||
| 267 | if (!do_promote) { | 349 | if (!do_promote) { |
| 350 | inet_hash_remove(ifa); | ||
| 268 | *ifap1 = ifa->ifa_next; | 351 | *ifap1 = ifa->ifa_next; |
| 269 | 352 | ||
| 270 | rtmsg_ifa(RTM_DELADDR, ifa, nlh, pid); | 353 | rtmsg_ifa(RTM_DELADDR, ifa, nlh, pid); |
| @@ -278,9 +361,21 @@ static void __inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap, | |||
| 278 | } | 361 | } |
| 279 | } | 362 | } |
| 280 | 363 | ||
| 364 | /* On promotion all secondaries from subnet are changing | ||
| 365 | * the primary IP, we must remove all their routes silently | ||
| 366 | * and later to add them back with new prefsrc. Do this | ||
| 367 | * while all addresses are on the device list. | ||
| 368 | */ | ||
| 369 | for (ifa = promote; ifa; ifa = ifa->ifa_next) { | ||
| 370 | if (ifa1->ifa_mask == ifa->ifa_mask && | ||
| 371 | inet_ifa_match(ifa1->ifa_address, ifa)) | ||
| 372 | fib_del_ifaddr(ifa, ifa1); | ||
| 373 | } | ||
| 374 | |||
| 281 | /* 2. Unlink it */ | 375 | /* 2. Unlink it */ |
| 282 | 376 | ||
| 283 | *ifap = ifa1->ifa_next; | 377 | *ifap = ifa1->ifa_next; |
| 378 | inet_hash_remove(ifa1); | ||
| 284 | 379 | ||
| 285 | /* 3. Announce address deletion */ | 380 | /* 3. Announce address deletion */ |
| 286 | 381 | ||
| @@ -296,6 +391,7 @@ static void __inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap, | |||
| 296 | blocking_notifier_call_chain(&inetaddr_chain, NETDEV_DOWN, ifa1); | 391 | blocking_notifier_call_chain(&inetaddr_chain, NETDEV_DOWN, ifa1); |
| 297 | 392 | ||
| 298 | if (promote) { | 393 | if (promote) { |
| 394 | struct in_ifaddr *next_sec = promote->ifa_next; | ||
| 299 | 395 | ||
| 300 | if (prev_prom) { | 396 | if (prev_prom) { |
| 301 | prev_prom->ifa_next = promote->ifa_next; | 397 | prev_prom->ifa_next = promote->ifa_next; |
| @@ -307,7 +403,7 @@ static void __inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap, | |||
| 307 | rtmsg_ifa(RTM_NEWADDR, promote, nlh, pid); | 403 | rtmsg_ifa(RTM_NEWADDR, promote, nlh, pid); |
| 308 | blocking_notifier_call_chain(&inetaddr_chain, | 404 | blocking_notifier_call_chain(&inetaddr_chain, |
| 309 | NETDEV_UP, promote); | 405 | NETDEV_UP, promote); |
| 310 | for (ifa = promote->ifa_next; ifa; ifa = ifa->ifa_next) { | 406 | for (ifa = next_sec; ifa; ifa = ifa->ifa_next) { |
| 311 | if (ifa1->ifa_mask != ifa->ifa_mask || | 407 | if (ifa1->ifa_mask != ifa->ifa_mask || |
| 312 | !inet_ifa_match(ifa1->ifa_address, ifa)) | 408 | !inet_ifa_match(ifa1->ifa_address, ifa)) |
| 313 | continue; | 409 | continue; |
| @@ -368,6 +464,8 @@ static int __inet_insert_ifa(struct in_ifaddr *ifa, struct nlmsghdr *nlh, | |||
| 368 | ifa->ifa_next = *ifap; | 464 | ifa->ifa_next = *ifap; |
| 369 | *ifap = ifa; | 465 | *ifap = ifa; |
| 370 | 466 | ||
| 467 | inet_hash_insert(dev_net(in_dev->dev), ifa); | ||
| 468 | |||
| 371 | /* Send message first, then call notifier. | 469 | /* Send message first, then call notifier. |
| 372 | Notifier will trigger FIB update, so that | 470 | Notifier will trigger FIB update, so that |
| 373 | listeners of netlink will know about new ifaddr */ | 471 | listeners of netlink will know about new ifaddr */ |
| @@ -521,6 +619,7 @@ static struct in_ifaddr *rtm_to_ifaddr(struct net *net, struct nlmsghdr *nlh) | |||
| 521 | if (tb[IFA_ADDRESS] == NULL) | 619 | if (tb[IFA_ADDRESS] == NULL) |
| 522 | tb[IFA_ADDRESS] = tb[IFA_LOCAL]; | 620 | tb[IFA_ADDRESS] = tb[IFA_LOCAL]; |
| 523 | 621 | ||
| 622 | INIT_HLIST_NODE(&ifa->hash); | ||
| 524 | ifa->ifa_prefixlen = ifm->ifa_prefixlen; | 623 | ifa->ifa_prefixlen = ifm->ifa_prefixlen; |
| 525 | ifa->ifa_mask = inet_make_mask(ifm->ifa_prefixlen); | 624 | ifa->ifa_mask = inet_make_mask(ifm->ifa_prefixlen); |
| 526 | ifa->ifa_flags = ifm->ifa_flags; | 625 | ifa->ifa_flags = ifm->ifa_flags; |
| @@ -670,7 +769,7 @@ int devinet_ioctl(struct net *net, unsigned int cmd, void __user *arg) | |||
| 670 | ifap = &ifa->ifa_next) { | 769 | ifap = &ifa->ifa_next) { |
| 671 | if (!strcmp(ifr.ifr_name, ifa->ifa_label) && | 770 | if (!strcmp(ifr.ifr_name, ifa->ifa_label) && |
| 672 | sin_orig.sin_addr.s_addr == | 771 | sin_orig.sin_addr.s_addr == |
| 673 | ifa->ifa_address) { | 772 | ifa->ifa_local) { |
| 674 | break; /* found */ | 773 | break; /* found */ |
| 675 | } | 774 | } |
| 676 | } | 775 | } |
| @@ -728,6 +827,7 @@ int devinet_ioctl(struct net *net, unsigned int cmd, void __user *arg) | |||
| 728 | if (!ifa) { | 827 | if (!ifa) { |
| 729 | ret = -ENOBUFS; | 828 | ret = -ENOBUFS; |
| 730 | ifa = inet_alloc_ifa(); | 829 | ifa = inet_alloc_ifa(); |
| 830 | INIT_HLIST_NODE(&ifa->hash); | ||
| 731 | if (!ifa) | 831 | if (!ifa) |
| 732 | break; | 832 | break; |
| 733 | if (colon) | 833 | if (colon) |
| @@ -1030,6 +1130,21 @@ static inline bool inetdev_valid_mtu(unsigned mtu) | |||
| 1030 | return mtu >= 68; | 1130 | return mtu >= 68; |
| 1031 | } | 1131 | } |
| 1032 | 1132 | ||
| 1133 | static void inetdev_send_gratuitous_arp(struct net_device *dev, | ||
| 1134 | struct in_device *in_dev) | ||
| 1135 | |||
| 1136 | { | ||
| 1137 | struct in_ifaddr *ifa = in_dev->ifa_list; | ||
| 1138 | |||
| 1139 | if (!ifa) | ||
| 1140 | return; | ||
| 1141 | |||
| 1142 | arp_send(ARPOP_REQUEST, ETH_P_ARP, | ||
| 1143 | ifa->ifa_local, dev, | ||
| 1144 | ifa->ifa_local, NULL, | ||
| 1145 | dev->dev_addr, NULL); | ||
| 1146 | } | ||
| 1147 | |||
| 1033 | /* Called only under RTNL semaphore */ | 1148 | /* Called only under RTNL semaphore */ |
| 1034 | 1149 | ||
| 1035 | static int inetdev_event(struct notifier_block *this, unsigned long event, | 1150 | static int inetdev_event(struct notifier_block *this, unsigned long event, |
| @@ -1069,6 +1184,7 @@ static int inetdev_event(struct notifier_block *this, unsigned long event, | |||
| 1069 | struct in_ifaddr *ifa = inet_alloc_ifa(); | 1184 | struct in_ifaddr *ifa = inet_alloc_ifa(); |
| 1070 | 1185 | ||
| 1071 | if (ifa) { | 1186 | if (ifa) { |
| 1187 | INIT_HLIST_NODE(&ifa->hash); | ||
| 1072 | ifa->ifa_local = | 1188 | ifa->ifa_local = |
| 1073 | ifa->ifa_address = htonl(INADDR_LOOPBACK); | 1189 | ifa->ifa_address = htonl(INADDR_LOOPBACK); |
| 1074 | ifa->ifa_prefixlen = 8; | 1190 | ifa->ifa_prefixlen = 8; |
| @@ -1082,18 +1198,13 @@ static int inetdev_event(struct notifier_block *this, unsigned long event, | |||
| 1082 | } | 1198 | } |
| 1083 | ip_mc_up(in_dev); | 1199 | ip_mc_up(in_dev); |
| 1084 | /* fall through */ | 1200 | /* fall through */ |
| 1085 | case NETDEV_NOTIFY_PEERS: | ||
| 1086 | case NETDEV_CHANGEADDR: | 1201 | case NETDEV_CHANGEADDR: |
| 1202 | if (!IN_DEV_ARP_NOTIFY(in_dev)) | ||
| 1203 | break; | ||
| 1204 | /* fall through */ | ||
| 1205 | case NETDEV_NOTIFY_PEERS: | ||
| 1087 | /* Send gratuitous ARP to notify of link change */ | 1206 | /* Send gratuitous ARP to notify of link change */ |
| 1088 | if (IN_DEV_ARP_NOTIFY(in_dev)) { | 1207 | inetdev_send_gratuitous_arp(dev, in_dev); |
| 1089 | struct in_ifaddr *ifa = in_dev->ifa_list; | ||
| 1090 | |||
| 1091 | if (ifa) | ||
| 1092 | arp_send(ARPOP_REQUEST, ETH_P_ARP, | ||
| 1093 | ifa->ifa_address, dev, | ||
| 1094 | ifa->ifa_address, NULL, | ||
| 1095 | dev->dev_addr, NULL); | ||
| 1096 | } | ||
| 1097 | break; | 1208 | break; |
| 1098 | case NETDEV_DOWN: | 1209 | case NETDEV_DOWN: |
| 1099 | ip_mc_down(in_dev); | 1210 | ip_mc_down(in_dev); |
| @@ -1710,6 +1821,11 @@ static struct rtnl_af_ops inet_af_ops = { | |||
| 1710 | 1821 | ||
| 1711 | void __init devinet_init(void) | 1822 | void __init devinet_init(void) |
| 1712 | { | 1823 | { |
| 1824 | int i; | ||
| 1825 | |||
| 1826 | for (i = 0; i < IN4_ADDR_HSIZE; i++) | ||
| 1827 | INIT_HLIST_HEAD(&inet_addr_lst[i]); | ||
| 1828 | |||
| 1713 | register_pernet_subsys(&devinet_ops); | 1829 | register_pernet_subsys(&devinet_ops); |
| 1714 | 1830 | ||
| 1715 | register_gifconf(PF_INET, inet_gifconf); | 1831 | register_gifconf(PF_INET, inet_gifconf); |
diff --git a/net/ipv4/esp4.c b/net/ipv4/esp4.c index e42a905180f0..03f994bcf7de 100644 --- a/net/ipv4/esp4.c +++ b/net/ipv4/esp4.c | |||
| @@ -33,11 +33,14 @@ static u32 esp4_get_mtu(struct xfrm_state *x, int mtu); | |||
| 33 | * | 33 | * |
| 34 | * TODO: Use spare space in skb for this where possible. | 34 | * TODO: Use spare space in skb for this where possible. |
| 35 | */ | 35 | */ |
| 36 | static void *esp_alloc_tmp(struct crypto_aead *aead, int nfrags) | 36 | static void *esp_alloc_tmp(struct crypto_aead *aead, int nfrags, int seqhilen) |
| 37 | { | 37 | { |
| 38 | unsigned int len; | 38 | unsigned int len; |
| 39 | 39 | ||
| 40 | len = crypto_aead_ivsize(aead); | 40 | len = seqhilen; |
| 41 | |||
| 42 | len += crypto_aead_ivsize(aead); | ||
| 43 | |||
| 41 | if (len) { | 44 | if (len) { |
| 42 | len += crypto_aead_alignmask(aead) & | 45 | len += crypto_aead_alignmask(aead) & |
| 43 | ~(crypto_tfm_ctx_alignment() - 1); | 46 | ~(crypto_tfm_ctx_alignment() - 1); |
| @@ -52,10 +55,15 @@ static void *esp_alloc_tmp(struct crypto_aead *aead, int nfrags) | |||
| 52 | return kmalloc(len, GFP_ATOMIC); | 55 | return kmalloc(len, GFP_ATOMIC); |
| 53 | } | 56 | } |
| 54 | 57 | ||
| 55 | static inline u8 *esp_tmp_iv(struct crypto_aead *aead, void *tmp) | 58 | static inline __be32 *esp_tmp_seqhi(void *tmp) |
| 59 | { | ||
| 60 | return PTR_ALIGN((__be32 *)tmp, __alignof__(__be32)); | ||
| 61 | } | ||
| 62 | static inline u8 *esp_tmp_iv(struct crypto_aead *aead, void *tmp, int seqhilen) | ||
| 56 | { | 63 | { |
| 57 | return crypto_aead_ivsize(aead) ? | 64 | return crypto_aead_ivsize(aead) ? |
| 58 | PTR_ALIGN((u8 *)tmp, crypto_aead_alignmask(aead) + 1) : tmp; | 65 | PTR_ALIGN((u8 *)tmp + seqhilen, |
| 66 | crypto_aead_alignmask(aead) + 1) : tmp + seqhilen; | ||
| 59 | } | 67 | } |
| 60 | 68 | ||
| 61 | static inline struct aead_givcrypt_request *esp_tmp_givreq( | 69 | static inline struct aead_givcrypt_request *esp_tmp_givreq( |
| @@ -122,6 +130,10 @@ static int esp_output(struct xfrm_state *x, struct sk_buff *skb) | |||
| 122 | int plen; | 130 | int plen; |
| 123 | int tfclen; | 131 | int tfclen; |
| 124 | int nfrags; | 132 | int nfrags; |
| 133 | int assoclen; | ||
| 134 | int sglists; | ||
| 135 | int seqhilen; | ||
| 136 | __be32 *seqhi; | ||
| 125 | 137 | ||
| 126 | /* skb is pure payload to encrypt */ | 138 | /* skb is pure payload to encrypt */ |
| 127 | 139 | ||
| @@ -151,14 +163,25 @@ static int esp_output(struct xfrm_state *x, struct sk_buff *skb) | |||
| 151 | goto error; | 163 | goto error; |
| 152 | nfrags = err; | 164 | nfrags = err; |
| 153 | 165 | ||
| 154 | tmp = esp_alloc_tmp(aead, nfrags + 1); | 166 | assoclen = sizeof(*esph); |
| 167 | sglists = 1; | ||
| 168 | seqhilen = 0; | ||
| 169 | |||
| 170 | if (x->props.flags & XFRM_STATE_ESN) { | ||
| 171 | sglists += 2; | ||
| 172 | seqhilen += sizeof(__be32); | ||
| 173 | assoclen += seqhilen; | ||
| 174 | } | ||
| 175 | |||
| 176 | tmp = esp_alloc_tmp(aead, nfrags + sglists, seqhilen); | ||
| 155 | if (!tmp) | 177 | if (!tmp) |
| 156 | goto error; | 178 | goto error; |
| 157 | 179 | ||
| 158 | iv = esp_tmp_iv(aead, tmp); | 180 | seqhi = esp_tmp_seqhi(tmp); |
| 181 | iv = esp_tmp_iv(aead, tmp, seqhilen); | ||
| 159 | req = esp_tmp_givreq(aead, iv); | 182 | req = esp_tmp_givreq(aead, iv); |
| 160 | asg = esp_givreq_sg(aead, req); | 183 | asg = esp_givreq_sg(aead, req); |
| 161 | sg = asg + 1; | 184 | sg = asg + sglists; |
| 162 | 185 | ||
| 163 | /* Fill padding... */ | 186 | /* Fill padding... */ |
| 164 | tail = skb_tail_pointer(trailer); | 187 | tail = skb_tail_pointer(trailer); |
| @@ -215,19 +238,27 @@ static int esp_output(struct xfrm_state *x, struct sk_buff *skb) | |||
| 215 | } | 238 | } |
| 216 | 239 | ||
| 217 | esph->spi = x->id.spi; | 240 | esph->spi = x->id.spi; |
| 218 | esph->seq_no = htonl(XFRM_SKB_CB(skb)->seq.output); | 241 | esph->seq_no = htonl(XFRM_SKB_CB(skb)->seq.output.low); |
| 219 | 242 | ||
| 220 | sg_init_table(sg, nfrags); | 243 | sg_init_table(sg, nfrags); |
| 221 | skb_to_sgvec(skb, sg, | 244 | skb_to_sgvec(skb, sg, |
| 222 | esph->enc_data + crypto_aead_ivsize(aead) - skb->data, | 245 | esph->enc_data + crypto_aead_ivsize(aead) - skb->data, |
| 223 | clen + alen); | 246 | clen + alen); |
| 224 | sg_init_one(asg, esph, sizeof(*esph)); | 247 | |
| 248 | if ((x->props.flags & XFRM_STATE_ESN)) { | ||
| 249 | sg_init_table(asg, 3); | ||
| 250 | sg_set_buf(asg, &esph->spi, sizeof(__be32)); | ||
| 251 | *seqhi = htonl(XFRM_SKB_CB(skb)->seq.output.hi); | ||
| 252 | sg_set_buf(asg + 1, seqhi, seqhilen); | ||
| 253 | sg_set_buf(asg + 2, &esph->seq_no, sizeof(__be32)); | ||
| 254 | } else | ||
| 255 | sg_init_one(asg, esph, sizeof(*esph)); | ||
| 225 | 256 | ||
| 226 | aead_givcrypt_set_callback(req, 0, esp_output_done, skb); | 257 | aead_givcrypt_set_callback(req, 0, esp_output_done, skb); |
| 227 | aead_givcrypt_set_crypt(req, sg, sg, clen, iv); | 258 | aead_givcrypt_set_crypt(req, sg, sg, clen, iv); |
| 228 | aead_givcrypt_set_assoc(req, asg, sizeof(*esph)); | 259 | aead_givcrypt_set_assoc(req, asg, assoclen); |
| 229 | aead_givcrypt_set_giv(req, esph->enc_data, | 260 | aead_givcrypt_set_giv(req, esph->enc_data, |
| 230 | XFRM_SKB_CB(skb)->seq.output); | 261 | XFRM_SKB_CB(skb)->seq.output.low); |
| 231 | 262 | ||
| 232 | ESP_SKB_CB(skb)->tmp = tmp; | 263 | ESP_SKB_CB(skb)->tmp = tmp; |
| 233 | err = crypto_aead_givencrypt(req); | 264 | err = crypto_aead_givencrypt(req); |
| @@ -346,6 +377,10 @@ static int esp_input(struct xfrm_state *x, struct sk_buff *skb) | |||
| 346 | struct sk_buff *trailer; | 377 | struct sk_buff *trailer; |
| 347 | int elen = skb->len - sizeof(*esph) - crypto_aead_ivsize(aead); | 378 | int elen = skb->len - sizeof(*esph) - crypto_aead_ivsize(aead); |
| 348 | int nfrags; | 379 | int nfrags; |
| 380 | int assoclen; | ||
| 381 | int sglists; | ||
| 382 | int seqhilen; | ||
| 383 | __be32 *seqhi; | ||
| 349 | void *tmp; | 384 | void *tmp; |
| 350 | u8 *iv; | 385 | u8 *iv; |
| 351 | struct scatterlist *sg; | 386 | struct scatterlist *sg; |
| @@ -362,16 +397,27 @@ static int esp_input(struct xfrm_state *x, struct sk_buff *skb) | |||
| 362 | goto out; | 397 | goto out; |
| 363 | nfrags = err; | 398 | nfrags = err; |
| 364 | 399 | ||
| 400 | assoclen = sizeof(*esph); | ||
| 401 | sglists = 1; | ||
| 402 | seqhilen = 0; | ||
| 403 | |||
| 404 | if (x->props.flags & XFRM_STATE_ESN) { | ||
| 405 | sglists += 2; | ||
| 406 | seqhilen += sizeof(__be32); | ||
| 407 | assoclen += seqhilen; | ||
| 408 | } | ||
| 409 | |||
| 365 | err = -ENOMEM; | 410 | err = -ENOMEM; |
| 366 | tmp = esp_alloc_tmp(aead, nfrags + 1); | 411 | tmp = esp_alloc_tmp(aead, nfrags + sglists, seqhilen); |
| 367 | if (!tmp) | 412 | if (!tmp) |
| 368 | goto out; | 413 | goto out; |
| 369 | 414 | ||
| 370 | ESP_SKB_CB(skb)->tmp = tmp; | 415 | ESP_SKB_CB(skb)->tmp = tmp; |
| 371 | iv = esp_tmp_iv(aead, tmp); | 416 | seqhi = esp_tmp_seqhi(tmp); |
| 417 | iv = esp_tmp_iv(aead, tmp, seqhilen); | ||
| 372 | req = esp_tmp_req(aead, iv); | 418 | req = esp_tmp_req(aead, iv); |
| 373 | asg = esp_req_sg(aead, req); | 419 | asg = esp_req_sg(aead, req); |
| 374 | sg = asg + 1; | 420 | sg = asg + sglists; |
| 375 | 421 | ||
| 376 | skb->ip_summed = CHECKSUM_NONE; | 422 | skb->ip_summed = CHECKSUM_NONE; |
| 377 | 423 | ||
| @@ -382,11 +428,19 @@ static int esp_input(struct xfrm_state *x, struct sk_buff *skb) | |||
| 382 | 428 | ||
| 383 | sg_init_table(sg, nfrags); | 429 | sg_init_table(sg, nfrags); |
| 384 | skb_to_sgvec(skb, sg, sizeof(*esph) + crypto_aead_ivsize(aead), elen); | 430 | skb_to_sgvec(skb, sg, sizeof(*esph) + crypto_aead_ivsize(aead), elen); |
| 385 | sg_init_one(asg, esph, sizeof(*esph)); | 431 | |
| 432 | if ((x->props.flags & XFRM_STATE_ESN)) { | ||
| 433 | sg_init_table(asg, 3); | ||
| 434 | sg_set_buf(asg, &esph->spi, sizeof(__be32)); | ||
| 435 | *seqhi = XFRM_SKB_CB(skb)->seq.input.hi; | ||
| 436 | sg_set_buf(asg + 1, seqhi, seqhilen); | ||
| 437 | sg_set_buf(asg + 2, &esph->seq_no, sizeof(__be32)); | ||
| 438 | } else | ||
| 439 | sg_init_one(asg, esph, sizeof(*esph)); | ||
| 386 | 440 | ||
| 387 | aead_request_set_callback(req, 0, esp_input_done, skb); | 441 | aead_request_set_callback(req, 0, esp_input_done, skb); |
| 388 | aead_request_set_crypt(req, sg, sg, elen, iv); | 442 | aead_request_set_crypt(req, sg, sg, elen, iv); |
| 389 | aead_request_set_assoc(req, asg, sizeof(*esph)); | 443 | aead_request_set_assoc(req, asg, assoclen); |
| 390 | 444 | ||
| 391 | err = crypto_aead_decrypt(req); | 445 | err = crypto_aead_decrypt(req); |
| 392 | if (err == -EINPROGRESS) | 446 | if (err == -EINPROGRESS) |
| @@ -500,10 +554,20 @@ static int esp_init_authenc(struct xfrm_state *x) | |||
| 500 | goto error; | 554 | goto error; |
| 501 | 555 | ||
| 502 | err = -ENAMETOOLONG; | 556 | err = -ENAMETOOLONG; |
| 503 | if (snprintf(authenc_name, CRYPTO_MAX_ALG_NAME, "authenc(%s,%s)", | 557 | |
| 504 | x->aalg ? x->aalg->alg_name : "digest_null", | 558 | if ((x->props.flags & XFRM_STATE_ESN)) { |
| 505 | x->ealg->alg_name) >= CRYPTO_MAX_ALG_NAME) | 559 | if (snprintf(authenc_name, CRYPTO_MAX_ALG_NAME, |
| 506 | goto error; | 560 | "authencesn(%s,%s)", |
| 561 | x->aalg ? x->aalg->alg_name : "digest_null", | ||
| 562 | x->ealg->alg_name) >= CRYPTO_MAX_ALG_NAME) | ||
| 563 | goto error; | ||
| 564 | } else { | ||
| 565 | if (snprintf(authenc_name, CRYPTO_MAX_ALG_NAME, | ||
| 566 | "authenc(%s,%s)", | ||
| 567 | x->aalg ? x->aalg->alg_name : "digest_null", | ||
| 568 | x->ealg->alg_name) >= CRYPTO_MAX_ALG_NAME) | ||
| 569 | goto error; | ||
| 570 | } | ||
| 507 | 571 | ||
| 508 | aead = crypto_alloc_aead(authenc_name, 0, 0); | 572 | aead = crypto_alloc_aead(authenc_name, 0, 0); |
| 509 | err = PTR_ERR(aead); | 573 | err = PTR_ERR(aead); |
diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c index 1d2cdd43a878..f116ce8f1b46 100644 --- a/net/ipv4/fib_frontend.c +++ b/net/ipv4/fib_frontend.c | |||
| @@ -51,11 +51,11 @@ static int __net_init fib4_rules_init(struct net *net) | |||
| 51 | { | 51 | { |
| 52 | struct fib_table *local_table, *main_table; | 52 | struct fib_table *local_table, *main_table; |
| 53 | 53 | ||
| 54 | local_table = fib_hash_table(RT_TABLE_LOCAL); | 54 | local_table = fib_trie_table(RT_TABLE_LOCAL); |
| 55 | if (local_table == NULL) | 55 | if (local_table == NULL) |
| 56 | return -ENOMEM; | 56 | return -ENOMEM; |
| 57 | 57 | ||
| 58 | main_table = fib_hash_table(RT_TABLE_MAIN); | 58 | main_table = fib_trie_table(RT_TABLE_MAIN); |
| 59 | if (main_table == NULL) | 59 | if (main_table == NULL) |
| 60 | goto fail; | 60 | goto fail; |
| 61 | 61 | ||
| @@ -82,7 +82,7 @@ struct fib_table *fib_new_table(struct net *net, u32 id) | |||
| 82 | if (tb) | 82 | if (tb) |
| 83 | return tb; | 83 | return tb; |
| 84 | 84 | ||
| 85 | tb = fib_hash_table(id); | 85 | tb = fib_trie_table(id); |
| 86 | if (!tb) | 86 | if (!tb) |
| 87 | return NULL; | 87 | return NULL; |
| 88 | h = id & (FIB_TABLE_HASHSZ - 1); | 88 | h = id & (FIB_TABLE_HASHSZ - 1); |
| @@ -114,21 +114,6 @@ struct fib_table *fib_get_table(struct net *net, u32 id) | |||
| 114 | } | 114 | } |
| 115 | #endif /* CONFIG_IP_MULTIPLE_TABLES */ | 115 | #endif /* CONFIG_IP_MULTIPLE_TABLES */ |
| 116 | 116 | ||
| 117 | void fib_select_default(struct net *net, | ||
| 118 | const struct flowi *flp, struct fib_result *res) | ||
| 119 | { | ||
| 120 | struct fib_table *tb; | ||
| 121 | int table = RT_TABLE_MAIN; | ||
| 122 | #ifdef CONFIG_IP_MULTIPLE_TABLES | ||
| 123 | if (res->r == NULL || res->r->action != FR_ACT_TO_TBL) | ||
| 124 | return; | ||
| 125 | table = res->r->table; | ||
| 126 | #endif | ||
| 127 | tb = fib_get_table(net, table); | ||
| 128 | if (FIB_RES_GW(*res) && FIB_RES_NH(*res).nh_scope == RT_SCOPE_LINK) | ||
| 129 | fib_table_select_default(tb, flp, res); | ||
| 130 | } | ||
| 131 | |||
| 132 | static void fib_flush(struct net *net) | 117 | static void fib_flush(struct net *net) |
| 133 | { | 118 | { |
| 134 | int flushed = 0; | 119 | int flushed = 0; |
| @@ -147,46 +132,6 @@ static void fib_flush(struct net *net) | |||
| 147 | rt_cache_flush(net, -1); | 132 | rt_cache_flush(net, -1); |
| 148 | } | 133 | } |
| 149 | 134 | ||
| 150 | /** | ||
| 151 | * __ip_dev_find - find the first device with a given source address. | ||
| 152 | * @net: the net namespace | ||
| 153 | * @addr: the source address | ||
| 154 | * @devref: if true, take a reference on the found device | ||
| 155 | * | ||
| 156 | * If a caller uses devref=false, it should be protected by RCU, or RTNL | ||
| 157 | */ | ||
| 158 | struct net_device *__ip_dev_find(struct net *net, __be32 addr, bool devref) | ||
| 159 | { | ||
| 160 | struct flowi fl = { | ||
| 161 | .fl4_dst = addr, | ||
| 162 | }; | ||
| 163 | struct fib_result res = { 0 }; | ||
| 164 | struct net_device *dev = NULL; | ||
| 165 | struct fib_table *local_table; | ||
| 166 | |||
| 167 | #ifdef CONFIG_IP_MULTIPLE_TABLES | ||
| 168 | res.r = NULL; | ||
| 169 | #endif | ||
| 170 | |||
| 171 | rcu_read_lock(); | ||
| 172 | local_table = fib_get_table(net, RT_TABLE_LOCAL); | ||
| 173 | if (!local_table || | ||
| 174 | fib_table_lookup(local_table, &fl, &res, FIB_LOOKUP_NOREF)) { | ||
| 175 | rcu_read_unlock(); | ||
| 176 | return NULL; | ||
| 177 | } | ||
| 178 | if (res.type != RTN_LOCAL) | ||
| 179 | goto out; | ||
| 180 | dev = FIB_RES_DEV(res); | ||
| 181 | |||
| 182 | if (dev && devref) | ||
| 183 | dev_hold(dev); | ||
| 184 | out: | ||
| 185 | rcu_read_unlock(); | ||
| 186 | return dev; | ||
| 187 | } | ||
| 188 | EXPORT_SYMBOL(__ip_dev_find); | ||
| 189 | |||
| 190 | /* | 135 | /* |
| 191 | * Find address type as if only "dev" was present in the system. If | 136 | * Find address type as if only "dev" was present in the system. If |
| 192 | * on_dev is NULL then all interfaces are taken into consideration. | 137 | * on_dev is NULL then all interfaces are taken into consideration. |
| @@ -195,7 +140,7 @@ static inline unsigned __inet_dev_addr_type(struct net *net, | |||
| 195 | const struct net_device *dev, | 140 | const struct net_device *dev, |
| 196 | __be32 addr) | 141 | __be32 addr) |
| 197 | { | 142 | { |
| 198 | struct flowi fl = { .fl4_dst = addr }; | 143 | struct flowi4 fl4 = { .daddr = addr }; |
| 199 | struct fib_result res; | 144 | struct fib_result res; |
| 200 | unsigned ret = RTN_BROADCAST; | 145 | unsigned ret = RTN_BROADCAST; |
| 201 | struct fib_table *local_table; | 146 | struct fib_table *local_table; |
| @@ -213,7 +158,7 @@ static inline unsigned __inet_dev_addr_type(struct net *net, | |||
| 213 | if (local_table) { | 158 | if (local_table) { |
| 214 | ret = RTN_UNICAST; | 159 | ret = RTN_UNICAST; |
| 215 | rcu_read_lock(); | 160 | rcu_read_lock(); |
| 216 | if (!fib_table_lookup(local_table, &fl, &res, FIB_LOOKUP_NOREF)) { | 161 | if (!fib_table_lookup(local_table, &fl4, &res, FIB_LOOKUP_NOREF)) { |
| 217 | if (!dev || dev == res.fi->fib_dev) | 162 | if (!dev || dev == res.fi->fib_dev) |
| 218 | ret = res.type; | 163 | ret = res.type; |
| 219 | } | 164 | } |
| @@ -248,19 +193,21 @@ int fib_validate_source(__be32 src, __be32 dst, u8 tos, int oif, | |||
| 248 | u32 *itag, u32 mark) | 193 | u32 *itag, u32 mark) |
| 249 | { | 194 | { |
| 250 | struct in_device *in_dev; | 195 | struct in_device *in_dev; |
| 251 | struct flowi fl = { | 196 | struct flowi4 fl4; |
| 252 | .fl4_dst = src, | ||
| 253 | .fl4_src = dst, | ||
| 254 | .fl4_tos = tos, | ||
| 255 | .mark = mark, | ||
| 256 | .iif = oif | ||
| 257 | }; | ||
| 258 | struct fib_result res; | 197 | struct fib_result res; |
| 259 | int no_addr, rpf, accept_local; | 198 | int no_addr, rpf, accept_local; |
| 260 | bool dev_match; | 199 | bool dev_match; |
| 261 | int ret; | 200 | int ret; |
| 262 | struct net *net; | 201 | struct net *net; |
| 263 | 202 | ||
| 203 | fl4.flowi4_oif = 0; | ||
| 204 | fl4.flowi4_iif = oif; | ||
| 205 | fl4.flowi4_mark = mark; | ||
| 206 | fl4.daddr = src; | ||
| 207 | fl4.saddr = dst; | ||
| 208 | fl4.flowi4_tos = tos; | ||
| 209 | fl4.flowi4_scope = RT_SCOPE_UNIVERSE; | ||
| 210 | |||
| 264 | no_addr = rpf = accept_local = 0; | 211 | no_addr = rpf = accept_local = 0; |
| 265 | in_dev = __in_dev_get_rcu(dev); | 212 | in_dev = __in_dev_get_rcu(dev); |
| 266 | if (in_dev) { | 213 | if (in_dev) { |
| @@ -268,20 +215,20 @@ int fib_validate_source(__be32 src, __be32 dst, u8 tos, int oif, | |||
| 268 | rpf = IN_DEV_RPFILTER(in_dev); | 215 | rpf = IN_DEV_RPFILTER(in_dev); |
| 269 | accept_local = IN_DEV_ACCEPT_LOCAL(in_dev); | 216 | accept_local = IN_DEV_ACCEPT_LOCAL(in_dev); |
| 270 | if (mark && !IN_DEV_SRC_VMARK(in_dev)) | 217 | if (mark && !IN_DEV_SRC_VMARK(in_dev)) |
| 271 | fl.mark = 0; | 218 | fl4.flowi4_mark = 0; |
| 272 | } | 219 | } |
| 273 | 220 | ||
| 274 | if (in_dev == NULL) | 221 | if (in_dev == NULL) |
| 275 | goto e_inval; | 222 | goto e_inval; |
| 276 | 223 | ||
| 277 | net = dev_net(dev); | 224 | net = dev_net(dev); |
| 278 | if (fib_lookup(net, &fl, &res)) | 225 | if (fib_lookup(net, &fl4, &res)) |
| 279 | goto last_resort; | 226 | goto last_resort; |
| 280 | if (res.type != RTN_UNICAST) { | 227 | if (res.type != RTN_UNICAST) { |
| 281 | if (res.type != RTN_LOCAL || !accept_local) | 228 | if (res.type != RTN_LOCAL || !accept_local) |
| 282 | goto e_inval; | 229 | goto e_inval; |
| 283 | } | 230 | } |
| 284 | *spec_dst = FIB_RES_PREFSRC(res); | 231 | *spec_dst = FIB_RES_PREFSRC(net, res); |
| 285 | fib_combine_itag(itag, &res); | 232 | fib_combine_itag(itag, &res); |
| 286 | dev_match = false; | 233 | dev_match = false; |
| 287 | 234 | ||
| @@ -306,12 +253,12 @@ int fib_validate_source(__be32 src, __be32 dst, u8 tos, int oif, | |||
| 306 | goto last_resort; | 253 | goto last_resort; |
| 307 | if (rpf == 1) | 254 | if (rpf == 1) |
| 308 | goto e_rpf; | 255 | goto e_rpf; |
| 309 | fl.oif = dev->ifindex; | 256 | fl4.flowi4_oif = dev->ifindex; |
| 310 | 257 | ||
| 311 | ret = 0; | 258 | ret = 0; |
| 312 | if (fib_lookup(net, &fl, &res) == 0) { | 259 | if (fib_lookup(net, &fl4, &res) == 0) { |
| 313 | if (res.type == RTN_UNICAST) { | 260 | if (res.type == RTN_UNICAST) { |
| 314 | *spec_dst = FIB_RES_PREFSRC(res); | 261 | *spec_dst = FIB_RES_PREFSRC(net, res); |
| 315 | ret = FIB_RES_NH(res).nh_scope >= RT_SCOPE_HOST; | 262 | ret = FIB_RES_NH(res).nh_scope >= RT_SCOPE_HOST; |
| 316 | } | 263 | } |
| 317 | } | 264 | } |
| @@ -775,12 +722,17 @@ void fib_add_ifaddr(struct in_ifaddr *ifa) | |||
| 775 | } | 722 | } |
| 776 | } | 723 | } |
| 777 | 724 | ||
| 778 | static void fib_del_ifaddr(struct in_ifaddr *ifa) | 725 | /* Delete primary or secondary address. |
| 726 | * Optionally, on secondary address promotion consider the addresses | ||
| 727 | * from subnet iprim as deleted, even if they are in device list. | ||
| 728 | * In this case the secondary ifa can be in device list. | ||
| 729 | */ | ||
| 730 | void fib_del_ifaddr(struct in_ifaddr *ifa, struct in_ifaddr *iprim) | ||
| 779 | { | 731 | { |
| 780 | struct in_device *in_dev = ifa->ifa_dev; | 732 | struct in_device *in_dev = ifa->ifa_dev; |
| 781 | struct net_device *dev = in_dev->dev; | 733 | struct net_device *dev = in_dev->dev; |
| 782 | struct in_ifaddr *ifa1; | 734 | struct in_ifaddr *ifa1; |
| 783 | struct in_ifaddr *prim = ifa; | 735 | struct in_ifaddr *prim = ifa, *prim1 = NULL; |
| 784 | __be32 brd = ifa->ifa_address | ~ifa->ifa_mask; | 736 | __be32 brd = ifa->ifa_address | ~ifa->ifa_mask; |
| 785 | __be32 any = ifa->ifa_address & ifa->ifa_mask; | 737 | __be32 any = ifa->ifa_address & ifa->ifa_mask; |
| 786 | #define LOCAL_OK 1 | 738 | #define LOCAL_OK 1 |
| @@ -788,17 +740,26 @@ static void fib_del_ifaddr(struct in_ifaddr *ifa) | |||
| 788 | #define BRD0_OK 4 | 740 | #define BRD0_OK 4 |
| 789 | #define BRD1_OK 8 | 741 | #define BRD1_OK 8 |
| 790 | unsigned ok = 0; | 742 | unsigned ok = 0; |
| 743 | int subnet = 0; /* Primary network */ | ||
| 744 | int gone = 1; /* Address is missing */ | ||
| 745 | int same_prefsrc = 0; /* Another primary with same IP */ | ||
| 791 | 746 | ||
| 792 | if (!(ifa->ifa_flags & IFA_F_SECONDARY)) | 747 | if (ifa->ifa_flags & IFA_F_SECONDARY) { |
| 793 | fib_magic(RTM_DELROUTE, | ||
| 794 | dev->flags & IFF_LOOPBACK ? RTN_LOCAL : RTN_UNICAST, | ||
| 795 | any, ifa->ifa_prefixlen, prim); | ||
| 796 | else { | ||
| 797 | prim = inet_ifa_byprefix(in_dev, any, ifa->ifa_mask); | 748 | prim = inet_ifa_byprefix(in_dev, any, ifa->ifa_mask); |
| 798 | if (prim == NULL) { | 749 | if (prim == NULL) { |
| 799 | printk(KERN_WARNING "fib_del_ifaddr: bug: prim == NULL\n"); | 750 | printk(KERN_WARNING "fib_del_ifaddr: bug: prim == NULL\n"); |
| 800 | return; | 751 | return; |
| 801 | } | 752 | } |
| 753 | if (iprim && iprim != prim) { | ||
| 754 | printk(KERN_WARNING "fib_del_ifaddr: bug: iprim != prim\n"); | ||
| 755 | return; | ||
| 756 | } | ||
| 757 | } else if (!ipv4_is_zeronet(any) && | ||
| 758 | (any != ifa->ifa_local || ifa->ifa_prefixlen < 32)) { | ||
| 759 | fib_magic(RTM_DELROUTE, | ||
| 760 | dev->flags & IFF_LOOPBACK ? RTN_LOCAL : RTN_UNICAST, | ||
| 761 | any, ifa->ifa_prefixlen, prim); | ||
| 762 | subnet = 1; | ||
| 802 | } | 763 | } |
| 803 | 764 | ||
| 804 | /* Deletion is more complicated than add. | 765 | /* Deletion is more complicated than add. |
| @@ -808,6 +769,49 @@ static void fib_del_ifaddr(struct in_ifaddr *ifa) | |||
| 808 | */ | 769 | */ |
| 809 | 770 | ||
| 810 | for (ifa1 = in_dev->ifa_list; ifa1; ifa1 = ifa1->ifa_next) { | 771 | for (ifa1 = in_dev->ifa_list; ifa1; ifa1 = ifa1->ifa_next) { |
| 772 | if (ifa1 == ifa) { | ||
| 773 | /* promotion, keep the IP */ | ||
| 774 | gone = 0; | ||
| 775 | continue; | ||
| 776 | } | ||
| 777 | /* Ignore IFAs from our subnet */ | ||
| 778 | if (iprim && ifa1->ifa_mask == iprim->ifa_mask && | ||
| 779 | inet_ifa_match(ifa1->ifa_address, iprim)) | ||
| 780 | continue; | ||
| 781 | |||
| 782 | /* Ignore ifa1 if it uses different primary IP (prefsrc) */ | ||
| 783 | if (ifa1->ifa_flags & IFA_F_SECONDARY) { | ||
| 784 | /* Another address from our subnet? */ | ||
| 785 | if (ifa1->ifa_mask == prim->ifa_mask && | ||
| 786 | inet_ifa_match(ifa1->ifa_address, prim)) | ||
| 787 | prim1 = prim; | ||
| 788 | else { | ||
| 789 | /* We reached the secondaries, so | ||
| 790 | * same_prefsrc should be determined. | ||
| 791 | */ | ||
| 792 | if (!same_prefsrc) | ||
| 793 | continue; | ||
| 794 | /* Search new prim1 if ifa1 is not | ||
| 795 | * using the current prim1 | ||
| 796 | */ | ||
| 797 | if (!prim1 || | ||
| 798 | ifa1->ifa_mask != prim1->ifa_mask || | ||
| 799 | !inet_ifa_match(ifa1->ifa_address, prim1)) | ||
| 800 | prim1 = inet_ifa_byprefix(in_dev, | ||
| 801 | ifa1->ifa_address, | ||
| 802 | ifa1->ifa_mask); | ||
| 803 | if (!prim1) | ||
| 804 | continue; | ||
| 805 | if (prim1->ifa_local != prim->ifa_local) | ||
| 806 | continue; | ||
| 807 | } | ||
| 808 | } else { | ||
| 809 | if (prim->ifa_local != ifa1->ifa_local) | ||
| 810 | continue; | ||
| 811 | prim1 = ifa1; | ||
| 812 | if (prim != prim1) | ||
| 813 | same_prefsrc = 1; | ||
| 814 | } | ||
| 811 | if (ifa->ifa_local == ifa1->ifa_local) | 815 | if (ifa->ifa_local == ifa1->ifa_local) |
| 812 | ok |= LOCAL_OK; | 816 | ok |= LOCAL_OK; |
| 813 | if (ifa->ifa_broadcast == ifa1->ifa_broadcast) | 817 | if (ifa->ifa_broadcast == ifa1->ifa_broadcast) |
| @@ -816,19 +820,37 @@ static void fib_del_ifaddr(struct in_ifaddr *ifa) | |||
| 816 | ok |= BRD1_OK; | 820 | ok |= BRD1_OK; |
| 817 | if (any == ifa1->ifa_broadcast) | 821 | if (any == ifa1->ifa_broadcast) |
| 818 | ok |= BRD0_OK; | 822 | ok |= BRD0_OK; |
| 823 | /* primary has network specific broadcasts */ | ||
| 824 | if (prim1 == ifa1 && ifa1->ifa_prefixlen < 31) { | ||
| 825 | __be32 brd1 = ifa1->ifa_address | ~ifa1->ifa_mask; | ||
| 826 | __be32 any1 = ifa1->ifa_address & ifa1->ifa_mask; | ||
| 827 | |||
| 828 | if (!ipv4_is_zeronet(any1)) { | ||
| 829 | if (ifa->ifa_broadcast == brd1 || | ||
| 830 | ifa->ifa_broadcast == any1) | ||
| 831 | ok |= BRD_OK; | ||
| 832 | if (brd == brd1 || brd == any1) | ||
| 833 | ok |= BRD1_OK; | ||
| 834 | if (any == brd1 || any == any1) | ||
| 835 | ok |= BRD0_OK; | ||
| 836 | } | ||
| 837 | } | ||
| 819 | } | 838 | } |
| 820 | 839 | ||
| 821 | if (!(ok & BRD_OK)) | 840 | if (!(ok & BRD_OK)) |
| 822 | fib_magic(RTM_DELROUTE, RTN_BROADCAST, ifa->ifa_broadcast, 32, prim); | 841 | fib_magic(RTM_DELROUTE, RTN_BROADCAST, ifa->ifa_broadcast, 32, prim); |
| 823 | if (!(ok & BRD1_OK)) | 842 | if (subnet && ifa->ifa_prefixlen < 31) { |
| 824 | fib_magic(RTM_DELROUTE, RTN_BROADCAST, brd, 32, prim); | 843 | if (!(ok & BRD1_OK)) |
| 825 | if (!(ok & BRD0_OK)) | 844 | fib_magic(RTM_DELROUTE, RTN_BROADCAST, brd, 32, prim); |
| 826 | fib_magic(RTM_DELROUTE, RTN_BROADCAST, any, 32, prim); | 845 | if (!(ok & BRD0_OK)) |
| 846 | fib_magic(RTM_DELROUTE, RTN_BROADCAST, any, 32, prim); | ||
| 847 | } | ||
| 827 | if (!(ok & LOCAL_OK)) { | 848 | if (!(ok & LOCAL_OK)) { |
| 828 | fib_magic(RTM_DELROUTE, RTN_LOCAL, ifa->ifa_local, 32, prim); | 849 | fib_magic(RTM_DELROUTE, RTN_LOCAL, ifa->ifa_local, 32, prim); |
| 829 | 850 | ||
| 830 | /* Check, that this local address finally disappeared. */ | 851 | /* Check, that this local address finally disappeared. */ |
| 831 | if (inet_addr_type(dev_net(dev), ifa->ifa_local) != RTN_LOCAL) { | 852 | if (gone && |
| 853 | inet_addr_type(dev_net(dev), ifa->ifa_local) != RTN_LOCAL) { | ||
| 832 | /* And the last, but not the least thing. | 854 | /* And the last, but not the least thing. |
| 833 | * We must flush stray FIB entries. | 855 | * We must flush stray FIB entries. |
| 834 | * | 856 | * |
| @@ -849,11 +871,11 @@ static void nl_fib_lookup(struct fib_result_nl *frn, struct fib_table *tb) | |||
| 849 | { | 871 | { |
| 850 | 872 | ||
| 851 | struct fib_result res; | 873 | struct fib_result res; |
| 852 | struct flowi fl = { | 874 | struct flowi4 fl4 = { |
| 853 | .mark = frn->fl_mark, | 875 | .flowi4_mark = frn->fl_mark, |
| 854 | .fl4_dst = frn->fl_addr, | 876 | .daddr = frn->fl_addr, |
| 855 | .fl4_tos = frn->fl_tos, | 877 | .flowi4_tos = frn->fl_tos, |
| 856 | .fl4_scope = frn->fl_scope, | 878 | .flowi4_scope = frn->fl_scope, |
| 857 | }; | 879 | }; |
| 858 | 880 | ||
| 859 | #ifdef CONFIG_IP_MULTIPLE_TABLES | 881 | #ifdef CONFIG_IP_MULTIPLE_TABLES |
| @@ -866,7 +888,7 @@ static void nl_fib_lookup(struct fib_result_nl *frn, struct fib_table *tb) | |||
| 866 | 888 | ||
| 867 | frn->tb_id = tb->tb_id; | 889 | frn->tb_id = tb->tb_id; |
| 868 | rcu_read_lock(); | 890 | rcu_read_lock(); |
| 869 | frn->err = fib_table_lookup(tb, &fl, &res, FIB_LOOKUP_NOREF); | 891 | frn->err = fib_table_lookup(tb, &fl4, &res, FIB_LOOKUP_NOREF); |
| 870 | 892 | ||
| 871 | if (!frn->err) { | 893 | if (!frn->err) { |
| 872 | frn->prefixlen = res.prefixlen; | 894 | frn->prefixlen = res.prefixlen; |
| @@ -938,6 +960,7 @@ static int fib_inetaddr_event(struct notifier_block *this, unsigned long event, | |||
| 938 | { | 960 | { |
| 939 | struct in_ifaddr *ifa = (struct in_ifaddr *)ptr; | 961 | struct in_ifaddr *ifa = (struct in_ifaddr *)ptr; |
| 940 | struct net_device *dev = ifa->ifa_dev->dev; | 962 | struct net_device *dev = ifa->ifa_dev->dev; |
| 963 | struct net *net = dev_net(dev); | ||
| 941 | 964 | ||
| 942 | switch (event) { | 965 | switch (event) { |
| 943 | case NETDEV_UP: | 966 | case NETDEV_UP: |
| @@ -945,10 +968,12 @@ static int fib_inetaddr_event(struct notifier_block *this, unsigned long event, | |||
| 945 | #ifdef CONFIG_IP_ROUTE_MULTIPATH | 968 | #ifdef CONFIG_IP_ROUTE_MULTIPATH |
| 946 | fib_sync_up(dev); | 969 | fib_sync_up(dev); |
| 947 | #endif | 970 | #endif |
| 971 | atomic_inc(&net->ipv4.dev_addr_genid); | ||
| 948 | rt_cache_flush(dev_net(dev), -1); | 972 | rt_cache_flush(dev_net(dev), -1); |
| 949 | break; | 973 | break; |
| 950 | case NETDEV_DOWN: | 974 | case NETDEV_DOWN: |
| 951 | fib_del_ifaddr(ifa); | 975 | fib_del_ifaddr(ifa, NULL); |
| 976 | atomic_inc(&net->ipv4.dev_addr_genid); | ||
| 952 | if (ifa->ifa_dev->ifa_list == NULL) { | 977 | if (ifa->ifa_dev->ifa_list == NULL) { |
| 953 | /* Last address was deleted from this interface. | 978 | /* Last address was deleted from this interface. |
| 954 | * Disable IP. | 979 | * Disable IP. |
| @@ -966,6 +991,7 @@ static int fib_netdev_event(struct notifier_block *this, unsigned long event, vo | |||
| 966 | { | 991 | { |
| 967 | struct net_device *dev = ptr; | 992 | struct net_device *dev = ptr; |
| 968 | struct in_device *in_dev = __in_dev_get_rtnl(dev); | 993 | struct in_device *in_dev = __in_dev_get_rtnl(dev); |
| 994 | struct net *net = dev_net(dev); | ||
| 969 | 995 | ||
| 970 | if (event == NETDEV_UNREGISTER) { | 996 | if (event == NETDEV_UNREGISTER) { |
| 971 | fib_disable_ip(dev, 2, -1); | 997 | fib_disable_ip(dev, 2, -1); |
| @@ -983,6 +1009,7 @@ static int fib_netdev_event(struct notifier_block *this, unsigned long event, vo | |||
| 983 | #ifdef CONFIG_IP_ROUTE_MULTIPATH | 1009 | #ifdef CONFIG_IP_ROUTE_MULTIPATH |
| 984 | fib_sync_up(dev); | 1010 | fib_sync_up(dev); |
| 985 | #endif | 1011 | #endif |
| 1012 | atomic_inc(&net->ipv4.dev_addr_genid); | ||
| 986 | rt_cache_flush(dev_net(dev), -1); | 1013 | rt_cache_flush(dev_net(dev), -1); |
| 987 | break; | 1014 | break; |
| 988 | case NETDEV_DOWN: | 1015 | case NETDEV_DOWN: |
| @@ -1101,5 +1128,5 @@ void __init ip_fib_init(void) | |||
| 1101 | register_netdevice_notifier(&fib_netdev_notifier); | 1128 | register_netdevice_notifier(&fib_netdev_notifier); |
| 1102 | register_inetaddr_notifier(&fib_inetaddr_notifier); | 1129 | register_inetaddr_notifier(&fib_inetaddr_notifier); |
| 1103 | 1130 | ||
| 1104 | fib_hash_init(); | 1131 | fib_trie_init(); |
| 1105 | } | 1132 | } |
diff --git a/net/ipv4/fib_hash.c b/net/ipv4/fib_hash.c deleted file mode 100644 index b3acb0417b21..000000000000 --- a/net/ipv4/fib_hash.c +++ /dev/null | |||
| @@ -1,1133 +0,0 @@ | |||
| 1 | /* | ||
| 2 | * INET An implementation of the TCP/IP protocol suite for the LINUX | ||
| 3 | * operating system. INET is implemented using the BSD Socket | ||
| 4 | * interface as the means of communication with the user level. | ||
| 5 | * | ||
| 6 | * IPv4 FIB: lookup engine and maintenance routines. | ||
| 7 | * | ||
| 8 | * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru> | ||
| 9 | * | ||
| 10 | * This program is free software; you can redistribute it and/or | ||
| 11 | * modify it under the terms of the GNU General Public License | ||
| 12 | * as published by the Free Software Foundation; either version | ||
| 13 | * 2 of the License, or (at your option) any later version. | ||
| 14 | */ | ||
| 15 | |||
| 16 | #include <asm/uaccess.h> | ||
| 17 | #include <asm/system.h> | ||
| 18 | #include <linux/bitops.h> | ||
| 19 | #include <linux/types.h> | ||
| 20 | #include <linux/kernel.h> | ||
| 21 | #include <linux/mm.h> | ||
| 22 | #include <linux/string.h> | ||
| 23 | #include <linux/socket.h> | ||
| 24 | #include <linux/sockios.h> | ||
| 25 | #include <linux/errno.h> | ||
| 26 | #include <linux/in.h> | ||
| 27 | #include <linux/inet.h> | ||
| 28 | #include <linux/inetdevice.h> | ||
| 29 | #include <linux/netdevice.h> | ||
| 30 | #include <linux/if_arp.h> | ||
| 31 | #include <linux/proc_fs.h> | ||
| 32 | #include <linux/skbuff.h> | ||
| 33 | #include <linux/netlink.h> | ||
| 34 | #include <linux/init.h> | ||
| 35 | #include <linux/slab.h> | ||
| 36 | |||
| 37 | #include <net/net_namespace.h> | ||
| 38 | #include <net/ip.h> | ||
| 39 | #include <net/protocol.h> | ||
| 40 | #include <net/route.h> | ||
| 41 | #include <net/tcp.h> | ||
| 42 | #include <net/sock.h> | ||
| 43 | #include <net/ip_fib.h> | ||
| 44 | |||
| 45 | #include "fib_lookup.h" | ||
| 46 | |||
| 47 | static struct kmem_cache *fn_hash_kmem __read_mostly; | ||
| 48 | static struct kmem_cache *fn_alias_kmem __read_mostly; | ||
| 49 | |||
| 50 | struct fib_node { | ||
| 51 | struct hlist_node fn_hash; | ||
| 52 | struct list_head fn_alias; | ||
| 53 | __be32 fn_key; | ||
| 54 | struct fib_alias fn_embedded_alias; | ||
| 55 | }; | ||
| 56 | |||
| 57 | #define EMBEDDED_HASH_SIZE (L1_CACHE_BYTES / sizeof(struct hlist_head)) | ||
| 58 | |||
| 59 | struct fn_zone { | ||
| 60 | struct fn_zone __rcu *fz_next; /* Next not empty zone */ | ||
| 61 | struct hlist_head __rcu *fz_hash; /* Hash table pointer */ | ||
| 62 | seqlock_t fz_lock; | ||
| 63 | u32 fz_hashmask; /* (fz_divisor - 1) */ | ||
| 64 | |||
| 65 | u8 fz_order; /* Zone order (0..32) */ | ||
| 66 | u8 fz_revorder; /* 32 - fz_order */ | ||
| 67 | __be32 fz_mask; /* inet_make_mask(order) */ | ||
| 68 | #define FZ_MASK(fz) ((fz)->fz_mask) | ||
| 69 | |||
| 70 | struct hlist_head fz_embedded_hash[EMBEDDED_HASH_SIZE]; | ||
| 71 | |||
| 72 | int fz_nent; /* Number of entries */ | ||
| 73 | int fz_divisor; /* Hash size (mask+1) */ | ||
| 74 | }; | ||
| 75 | |||
| 76 | struct fn_hash { | ||
| 77 | struct fn_zone *fn_zones[33]; | ||
| 78 | struct fn_zone __rcu *fn_zone_list; | ||
| 79 | }; | ||
| 80 | |||
| 81 | static inline u32 fn_hash(__be32 key, struct fn_zone *fz) | ||
| 82 | { | ||
| 83 | u32 h = ntohl(key) >> fz->fz_revorder; | ||
| 84 | h ^= (h>>20); | ||
| 85 | h ^= (h>>10); | ||
| 86 | h ^= (h>>5); | ||
| 87 | h &= fz->fz_hashmask; | ||
| 88 | return h; | ||
| 89 | } | ||
| 90 | |||
| 91 | static inline __be32 fz_key(__be32 dst, struct fn_zone *fz) | ||
| 92 | { | ||
| 93 | return dst & FZ_MASK(fz); | ||
| 94 | } | ||
| 95 | |||
| 96 | static unsigned int fib_hash_genid; | ||
| 97 | |||
| 98 | #define FZ_MAX_DIVISOR ((PAGE_SIZE<<MAX_ORDER) / sizeof(struct hlist_head)) | ||
| 99 | |||
| 100 | static struct hlist_head *fz_hash_alloc(int divisor) | ||
| 101 | { | ||
| 102 | unsigned long size = divisor * sizeof(struct hlist_head); | ||
| 103 | |||
| 104 | if (size <= PAGE_SIZE) | ||
| 105 | return kzalloc(size, GFP_KERNEL); | ||
| 106 | |||
| 107 | return (struct hlist_head *) | ||
| 108 | __get_free_pages(GFP_KERNEL | __GFP_ZERO, get_order(size)); | ||
| 109 | } | ||
| 110 | |||
| 111 | /* The fib hash lock must be held when this is called. */ | ||
| 112 | static inline void fn_rebuild_zone(struct fn_zone *fz, | ||
| 113 | struct hlist_head *old_ht, | ||
| 114 | int old_divisor) | ||
| 115 | { | ||
| 116 | int i; | ||
| 117 | |||
| 118 | for (i = 0; i < old_divisor; i++) { | ||
| 119 | struct hlist_node *node, *n; | ||
| 120 | struct fib_node *f; | ||
| 121 | |||
| 122 | hlist_for_each_entry_safe(f, node, n, &old_ht[i], fn_hash) { | ||
| 123 | struct hlist_head *new_head; | ||
| 124 | |||
| 125 | hlist_del_rcu(&f->fn_hash); | ||
| 126 | |||
| 127 | new_head = rcu_dereference_protected(fz->fz_hash, 1) + | ||
| 128 | fn_hash(f->fn_key, fz); | ||
| 129 | hlist_add_head_rcu(&f->fn_hash, new_head); | ||
| 130 | } | ||
| 131 | } | ||
| 132 | } | ||
| 133 | |||
| 134 | static void fz_hash_free(struct hlist_head *hash, int divisor) | ||
| 135 | { | ||
| 136 | unsigned long size = divisor * sizeof(struct hlist_head); | ||
| 137 | |||
| 138 | if (size <= PAGE_SIZE) | ||
| 139 | kfree(hash); | ||
| 140 | else | ||
| 141 | free_pages((unsigned long)hash, get_order(size)); | ||
| 142 | } | ||
| 143 | |||
| 144 | static void fn_rehash_zone(struct fn_zone *fz) | ||
| 145 | { | ||
| 146 | struct hlist_head *ht, *old_ht; | ||
| 147 | int old_divisor, new_divisor; | ||
| 148 | u32 new_hashmask; | ||
| 149 | |||
| 150 | new_divisor = old_divisor = fz->fz_divisor; | ||
| 151 | |||
| 152 | switch (old_divisor) { | ||
| 153 | case EMBEDDED_HASH_SIZE: | ||
| 154 | new_divisor *= EMBEDDED_HASH_SIZE; | ||
| 155 | break; | ||
| 156 | case EMBEDDED_HASH_SIZE*EMBEDDED_HASH_SIZE: | ||
| 157 | new_divisor *= (EMBEDDED_HASH_SIZE/2); | ||
| 158 | break; | ||
| 159 | default: | ||
| 160 | if ((old_divisor << 1) > FZ_MAX_DIVISOR) { | ||
| 161 | printk(KERN_CRIT "route.c: bad divisor %d!\n", old_divisor); | ||
| 162 | return; | ||
| 163 | } | ||
| 164 | new_divisor = (old_divisor << 1); | ||
| 165 | break; | ||
| 166 | } | ||
| 167 | |||
| 168 | new_hashmask = (new_divisor - 1); | ||
| 169 | |||
| 170 | #if RT_CACHE_DEBUG >= 2 | ||
| 171 | printk(KERN_DEBUG "fn_rehash_zone: hash for zone %d grows from %d\n", | ||
| 172 | fz->fz_order, old_divisor); | ||
| 173 | #endif | ||
| 174 | |||
| 175 | ht = fz_hash_alloc(new_divisor); | ||
| 176 | |||
| 177 | if (ht) { | ||
| 178 | struct fn_zone nfz; | ||
| 179 | |||
| 180 | memcpy(&nfz, fz, sizeof(nfz)); | ||
| 181 | |||
| 182 | write_seqlock_bh(&fz->fz_lock); | ||
| 183 | old_ht = rcu_dereference_protected(fz->fz_hash, 1); | ||
| 184 | RCU_INIT_POINTER(nfz.fz_hash, ht); | ||
| 185 | nfz.fz_hashmask = new_hashmask; | ||
| 186 | nfz.fz_divisor = new_divisor; | ||
| 187 | fn_rebuild_zone(&nfz, old_ht, old_divisor); | ||
| 188 | fib_hash_genid++; | ||
| 189 | rcu_assign_pointer(fz->fz_hash, ht); | ||
| 190 | fz->fz_hashmask = new_hashmask; | ||
| 191 | fz->fz_divisor = new_divisor; | ||
| 192 | write_sequnlock_bh(&fz->fz_lock); | ||
| 193 | |||
| 194 | if (old_ht != fz->fz_embedded_hash) { | ||
| 195 | synchronize_rcu(); | ||
| 196 | fz_hash_free(old_ht, old_divisor); | ||
| 197 | } | ||
| 198 | } | ||
| 199 | } | ||
| 200 | |||
| 201 | static void fn_free_node_rcu(struct rcu_head *head) | ||
| 202 | { | ||
| 203 | struct fib_node *f = container_of(head, struct fib_node, fn_embedded_alias.rcu); | ||
| 204 | |||
| 205 | kmem_cache_free(fn_hash_kmem, f); | ||
| 206 | } | ||
| 207 | |||
| 208 | static inline void fn_free_node(struct fib_node *f) | ||
| 209 | { | ||
| 210 | call_rcu(&f->fn_embedded_alias.rcu, fn_free_node_rcu); | ||
| 211 | } | ||
| 212 | |||
| 213 | static void fn_free_alias_rcu(struct rcu_head *head) | ||
| 214 | { | ||
| 215 | struct fib_alias *fa = container_of(head, struct fib_alias, rcu); | ||
| 216 | |||
| 217 | kmem_cache_free(fn_alias_kmem, fa); | ||
| 218 | } | ||
| 219 | |||
| 220 | static inline void fn_free_alias(struct fib_alias *fa, struct fib_node *f) | ||
| 221 | { | ||
| 222 | fib_release_info(fa->fa_info); | ||
| 223 | if (fa == &f->fn_embedded_alias) | ||
| 224 | fa->fa_info = NULL; | ||
| 225 | else | ||
| 226 | call_rcu(&fa->rcu, fn_free_alias_rcu); | ||
| 227 | } | ||
| 228 | |||
| 229 | static struct fn_zone * | ||
| 230 | fn_new_zone(struct fn_hash *table, int z) | ||
| 231 | { | ||
| 232 | int i; | ||
| 233 | struct fn_zone *fz = kzalloc(sizeof(struct fn_zone), GFP_KERNEL); | ||
| 234 | if (!fz) | ||
| 235 | return NULL; | ||
| 236 | |||
| 237 | seqlock_init(&fz->fz_lock); | ||
| 238 | fz->fz_divisor = z ? EMBEDDED_HASH_SIZE : 1; | ||
| 239 | fz->fz_hashmask = fz->fz_divisor - 1; | ||
| 240 | RCU_INIT_POINTER(fz->fz_hash, fz->fz_embedded_hash); | ||
| 241 | fz->fz_order = z; | ||
| 242 | fz->fz_revorder = 32 - z; | ||
| 243 | fz->fz_mask = inet_make_mask(z); | ||
| 244 | |||
| 245 | /* Find the first not empty zone with more specific mask */ | ||
| 246 | for (i = z + 1; i <= 32; i++) | ||
| 247 | if (table->fn_zones[i]) | ||
| 248 | break; | ||
| 249 | if (i > 32) { | ||
| 250 | /* No more specific masks, we are the first. */ | ||
| 251 | rcu_assign_pointer(fz->fz_next, | ||
| 252 | rtnl_dereference(table->fn_zone_list)); | ||
| 253 | rcu_assign_pointer(table->fn_zone_list, fz); | ||
| 254 | } else { | ||
| 255 | rcu_assign_pointer(fz->fz_next, | ||
| 256 | rtnl_dereference(table->fn_zones[i]->fz_next)); | ||
| 257 | rcu_assign_pointer(table->fn_zones[i]->fz_next, fz); | ||
| 258 | } | ||
| 259 | table->fn_zones[z] = fz; | ||
| 260 | fib_hash_genid++; | ||
| 261 | return fz; | ||
| 262 | } | ||
| 263 | |||
| 264 | int fib_table_lookup(struct fib_table *tb, | ||
| 265 | const struct flowi *flp, struct fib_result *res, | ||
| 266 | int fib_flags) | ||
| 267 | { | ||
| 268 | int err; | ||
| 269 | struct fn_zone *fz; | ||
| 270 | struct fn_hash *t = (struct fn_hash *)tb->tb_data; | ||
| 271 | |||
| 272 | rcu_read_lock(); | ||
| 273 | for (fz = rcu_dereference(t->fn_zone_list); | ||
| 274 | fz != NULL; | ||
| 275 | fz = rcu_dereference(fz->fz_next)) { | ||
| 276 | struct hlist_head *head; | ||
| 277 | struct hlist_node *node; | ||
| 278 | struct fib_node *f; | ||
| 279 | __be32 k; | ||
| 280 | unsigned int seq; | ||
| 281 | |||
| 282 | do { | ||
| 283 | seq = read_seqbegin(&fz->fz_lock); | ||
| 284 | k = fz_key(flp->fl4_dst, fz); | ||
| 285 | |||
| 286 | head = rcu_dereference(fz->fz_hash) + fn_hash(k, fz); | ||
| 287 | hlist_for_each_entry_rcu(f, node, head, fn_hash) { | ||
| 288 | if (f->fn_key != k) | ||
| 289 | continue; | ||
| 290 | |||
| 291 | err = fib_semantic_match(&f->fn_alias, | ||
| 292 | flp, res, | ||
| 293 | fz->fz_order, fib_flags); | ||
| 294 | if (err <= 0) | ||
| 295 | goto out; | ||
| 296 | } | ||
| 297 | } while (read_seqretry(&fz->fz_lock, seq)); | ||
| 298 | } | ||
| 299 | err = 1; | ||
| 300 | out: | ||
| 301 | rcu_read_unlock(); | ||
| 302 | return err; | ||
| 303 | } | ||
| 304 | |||
| 305 | void fib_table_select_default(struct fib_table *tb, | ||
| 306 | const struct flowi *flp, struct fib_result *res) | ||
| 307 | { | ||
| 308 | int order, last_idx; | ||
| 309 | struct hlist_node *node; | ||
| 310 | struct fib_node *f; | ||
| 311 | struct fib_info *fi = NULL; | ||
| 312 | struct fib_info *last_resort; | ||
| 313 | struct fn_hash *t = (struct fn_hash *)tb->tb_data; | ||
| 314 | struct fn_zone *fz = t->fn_zones[0]; | ||
| 315 | struct hlist_head *head; | ||
| 316 | |||
| 317 | if (fz == NULL) | ||
| 318 | return; | ||
| 319 | |||
| 320 | last_idx = -1; | ||
| 321 | last_resort = NULL; | ||
| 322 | order = -1; | ||
| 323 | |||
| 324 | rcu_read_lock(); | ||
| 325 | head = rcu_dereference(fz->fz_hash); | ||
| 326 | hlist_for_each_entry_rcu(f, node, head, fn_hash) { | ||
| 327 | struct fib_alias *fa; | ||
| 328 | |||
| 329 | list_for_each_entry_rcu(fa, &f->fn_alias, fa_list) { | ||
| 330 | struct fib_info *next_fi = fa->fa_info; | ||
| 331 | |||
| 332 | if (fa->fa_scope != res->scope || | ||
| 333 | fa->fa_type != RTN_UNICAST) | ||
| 334 | continue; | ||
| 335 | |||
| 336 | if (next_fi->fib_priority > res->fi->fib_priority) | ||
| 337 | break; | ||
| 338 | if (!next_fi->fib_nh[0].nh_gw || | ||
| 339 | next_fi->fib_nh[0].nh_scope != RT_SCOPE_LINK) | ||
| 340 | continue; | ||
| 341 | |||
| 342 | fib_alias_accessed(fa); | ||
| 343 | |||
| 344 | if (fi == NULL) { | ||
| 345 | if (next_fi != res->fi) | ||
| 346 | break; | ||
| 347 | } else if (!fib_detect_death(fi, order, &last_resort, | ||
| 348 | &last_idx, tb->tb_default)) { | ||
| 349 | fib_result_assign(res, fi); | ||
| 350 | tb->tb_default = order; | ||
| 351 | goto out; | ||
| 352 | } | ||
| 353 | fi = next_fi; | ||
| 354 | order++; | ||
| 355 | } | ||
| 356 | } | ||
| 357 | |||
| 358 | if (order <= 0 || fi == NULL) { | ||
| 359 | tb->tb_default = -1; | ||
| 360 | goto out; | ||
| 361 | } | ||
| 362 | |||
| 363 | if (!fib_detect_death(fi, order, &last_resort, &last_idx, | ||
| 364 | tb->tb_default)) { | ||
| 365 | fib_result_assign(res, fi); | ||
| 366 | tb->tb_default = order; | ||
| 367 | goto out; | ||
| 368 | } | ||
| 369 | |||
| 370 | if (last_idx >= 0) | ||
| 371 | fib_result_assign(res, last_resort); | ||
| 372 | tb->tb_default = last_idx; | ||
| 373 | out: | ||
| 374 | rcu_read_unlock(); | ||
| 375 | } | ||
| 376 | |||
| 377 | /* Insert node F to FZ. */ | ||
| 378 | static inline void fib_insert_node(struct fn_zone *fz, struct fib_node *f) | ||
| 379 | { | ||
| 380 | struct hlist_head *head = rtnl_dereference(fz->fz_hash) + fn_hash(f->fn_key, fz); | ||
| 381 | |||
| 382 | hlist_add_head_rcu(&f->fn_hash, head); | ||
| 383 | } | ||
| 384 | |||
| 385 | /* Return the node in FZ matching KEY. */ | ||
| 386 | static struct fib_node *fib_find_node(struct fn_zone *fz, __be32 key) | ||
| 387 | { | ||
| 388 | struct hlist_head *head = rtnl_dereference(fz->fz_hash) + fn_hash(key, fz); | ||
| 389 | struct hlist_node *node; | ||
| 390 | struct fib_node *f; | ||
| 391 | |||
| 392 | hlist_for_each_entry_rcu(f, node, head, fn_hash) { | ||
| 393 | if (f->fn_key == key) | ||
| 394 | return f; | ||
| 395 | } | ||
| 396 | |||
| 397 | return NULL; | ||
| 398 | } | ||
| 399 | |||
| 400 | |||
| 401 | static struct fib_alias *fib_fast_alloc(struct fib_node *f) | ||
| 402 | { | ||
| 403 | struct fib_alias *fa = &f->fn_embedded_alias; | ||
| 404 | |||
| 405 | if (fa->fa_info != NULL) | ||
| 406 | fa = kmem_cache_alloc(fn_alias_kmem, GFP_KERNEL); | ||
| 407 | return fa; | ||
| 408 | } | ||
| 409 | |||
| 410 | /* Caller must hold RTNL. */ | ||
| 411 | int fib_table_insert(struct fib_table *tb, struct fib_config *cfg) | ||
| 412 | { | ||
| 413 | struct fn_hash *table = (struct fn_hash *) tb->tb_data; | ||
| 414 | struct fib_node *new_f = NULL; | ||
| 415 | struct fib_node *f; | ||
| 416 | struct fib_alias *fa, *new_fa; | ||
| 417 | struct fn_zone *fz; | ||
| 418 | struct fib_info *fi; | ||
| 419 | u8 tos = cfg->fc_tos; | ||
| 420 | __be32 key; | ||
| 421 | int err; | ||
| 422 | |||
| 423 | if (cfg->fc_dst_len > 32) | ||
| 424 | return -EINVAL; | ||
| 425 | |||
| 426 | fz = table->fn_zones[cfg->fc_dst_len]; | ||
| 427 | if (!fz && !(fz = fn_new_zone(table, cfg->fc_dst_len))) | ||
| 428 | return -ENOBUFS; | ||
| 429 | |||
| 430 | key = 0; | ||
| 431 | if (cfg->fc_dst) { | ||
| 432 | if (cfg->fc_dst & ~FZ_MASK(fz)) | ||
| 433 | return -EINVAL; | ||
| 434 | key = fz_key(cfg->fc_dst, fz); | ||
| 435 | } | ||
| 436 | |||
| 437 | fi = fib_create_info(cfg); | ||
| 438 | if (IS_ERR(fi)) | ||
| 439 | return PTR_ERR(fi); | ||
| 440 | |||
| 441 | if (fz->fz_nent > (fz->fz_divisor<<1) && | ||
| 442 | fz->fz_divisor < FZ_MAX_DIVISOR && | ||
| 443 | (cfg->fc_dst_len == 32 || | ||
| 444 | (1 << cfg->fc_dst_len) > fz->fz_divisor)) | ||
| 445 | fn_rehash_zone(fz); | ||
| 446 | |||
| 447 | f = fib_find_node(fz, key); | ||
| 448 | |||
| 449 | if (!f) | ||
| 450 | fa = NULL; | ||
| 451 | else | ||
| 452 | fa = fib_find_alias(&f->fn_alias, tos, fi->fib_priority); | ||
| 453 | |||
| 454 | /* Now fa, if non-NULL, points to the first fib alias | ||
| 455 | * with the same keys [prefix,tos,priority], if such key already | ||
| 456 | * exists or to the node before which we will insert new one. | ||
| 457 | * | ||
| 458 | * If fa is NULL, we will need to allocate a new one and | ||
| 459 | * insert to the head of f. | ||
| 460 | * | ||
| 461 | * If f is NULL, no fib node matched the destination key | ||
| 462 | * and we need to allocate a new one of those as well. | ||
| 463 | */ | ||
| 464 | |||
| 465 | if (fa && fa->fa_tos == tos && | ||
| 466 | fa->fa_info->fib_priority == fi->fib_priority) { | ||
| 467 | struct fib_alias *fa_first, *fa_match; | ||
| 468 | |||
| 469 | err = -EEXIST; | ||
| 470 | if (cfg->fc_nlflags & NLM_F_EXCL) | ||
| 471 | goto out; | ||
| 472 | |||
| 473 | /* We have 2 goals: | ||
| 474 | * 1. Find exact match for type, scope, fib_info to avoid | ||
| 475 | * duplicate routes | ||
| 476 | * 2. Find next 'fa' (or head), NLM_F_APPEND inserts before it | ||
| 477 | */ | ||
| 478 | fa_match = NULL; | ||
| 479 | fa_first = fa; | ||
| 480 | fa = list_entry(fa->fa_list.prev, struct fib_alias, fa_list); | ||
| 481 | list_for_each_entry_continue(fa, &f->fn_alias, fa_list) { | ||
| 482 | if (fa->fa_tos != tos) | ||
| 483 | break; | ||
| 484 | if (fa->fa_info->fib_priority != fi->fib_priority) | ||
| 485 | break; | ||
| 486 | if (fa->fa_type == cfg->fc_type && | ||
| 487 | fa->fa_scope == cfg->fc_scope && | ||
| 488 | fa->fa_info == fi) { | ||
| 489 | fa_match = fa; | ||
| 490 | break; | ||
| 491 | } | ||
| 492 | } | ||
| 493 | |||
| 494 | if (cfg->fc_nlflags & NLM_F_REPLACE) { | ||
| 495 | u8 state; | ||
| 496 | |||
| 497 | fa = fa_first; | ||
| 498 | if (fa_match) { | ||
| 499 | if (fa == fa_match) | ||
| 500 | err = 0; | ||
| 501 | goto out; | ||
| 502 | } | ||
| 503 | err = -ENOBUFS; | ||
| 504 | new_fa = fib_fast_alloc(f); | ||
| 505 | if (new_fa == NULL) | ||
| 506 | goto out; | ||
| 507 | |||
| 508 | new_fa->fa_tos = fa->fa_tos; | ||
| 509 | new_fa->fa_info = fi; | ||
| 510 | new_fa->fa_type = cfg->fc_type; | ||
| 511 | new_fa->fa_scope = cfg->fc_scope; | ||
| 512 | state = fa->fa_state; | ||
| 513 | new_fa->fa_state = state & ~FA_S_ACCESSED; | ||
| 514 | fib_hash_genid++; | ||
| 515 | list_replace_rcu(&fa->fa_list, &new_fa->fa_list); | ||
| 516 | |||
| 517 | fn_free_alias(fa, f); | ||
| 518 | if (state & FA_S_ACCESSED) | ||
| 519 | rt_cache_flush(cfg->fc_nlinfo.nl_net, -1); | ||
| 520 | rtmsg_fib(RTM_NEWROUTE, key, new_fa, cfg->fc_dst_len, | ||
| 521 | tb->tb_id, &cfg->fc_nlinfo, NLM_F_REPLACE); | ||
| 522 | return 0; | ||
| 523 | } | ||
| 524 | |||
| 525 | /* Error if we find a perfect match which | ||
| 526 | * uses the same scope, type, and nexthop | ||
| 527 | * information. | ||
| 528 | */ | ||
| 529 | if (fa_match) | ||
| 530 | goto out; | ||
| 531 | |||
| 532 | if (!(cfg->fc_nlflags & NLM_F_APPEND)) | ||
| 533 | fa = fa_first; | ||
| 534 | } | ||
| 535 | |||
| 536 | err = -ENOENT; | ||
| 537 | if (!(cfg->fc_nlflags & NLM_F_CREATE)) | ||
| 538 | goto out; | ||
| 539 | |||
| 540 | err = -ENOBUFS; | ||
| 541 | |||
| 542 | if (!f) { | ||
| 543 | new_f = kmem_cache_zalloc(fn_hash_kmem, GFP_KERNEL); | ||
| 544 | if (new_f == NULL) | ||
| 545 | goto out; | ||
| 546 | |||
| 547 | INIT_HLIST_NODE(&new_f->fn_hash); | ||
| 548 | INIT_LIST_HEAD(&new_f->fn_alias); | ||
| 549 | new_f->fn_key = key; | ||
| 550 | f = new_f; | ||
| 551 | } | ||
| 552 | |||
| 553 | new_fa = fib_fast_alloc(f); | ||
| 554 | if (new_fa == NULL) | ||
| 555 | goto out; | ||
| 556 | |||
| 557 | new_fa->fa_info = fi; | ||
| 558 | new_fa->fa_tos = tos; | ||
| 559 | new_fa->fa_type = cfg->fc_type; | ||
| 560 | new_fa->fa_scope = cfg->fc_scope; | ||
| 561 | new_fa->fa_state = 0; | ||
| 562 | |||
| 563 | /* | ||
| 564 | * Insert new entry to the list. | ||
| 565 | */ | ||
| 566 | |||
| 567 | if (new_f) | ||
| 568 | fib_insert_node(fz, new_f); | ||
| 569 | list_add_tail_rcu(&new_fa->fa_list, | ||
| 570 | (fa ? &fa->fa_list : &f->fn_alias)); | ||
| 571 | fib_hash_genid++; | ||
| 572 | |||
| 573 | if (new_f) | ||
| 574 | fz->fz_nent++; | ||
| 575 | rt_cache_flush(cfg->fc_nlinfo.nl_net, -1); | ||
| 576 | |||
| 577 | rtmsg_fib(RTM_NEWROUTE, key, new_fa, cfg->fc_dst_len, tb->tb_id, | ||
| 578 | &cfg->fc_nlinfo, 0); | ||
| 579 | return 0; | ||
| 580 | |||
| 581 | out: | ||
| 582 | if (new_f) | ||
| 583 | kmem_cache_free(fn_hash_kmem, new_f); | ||
| 584 | fib_release_info(fi); | ||
| 585 | return err; | ||
| 586 | } | ||
| 587 | |||
| 588 | int fib_table_delete(struct fib_table *tb, struct fib_config *cfg) | ||
| 589 | { | ||
| 590 | struct fn_hash *table = (struct fn_hash *)tb->tb_data; | ||
| 591 | struct fib_node *f; | ||
| 592 | struct fib_alias *fa, *fa_to_delete; | ||
| 593 | struct fn_zone *fz; | ||
| 594 | __be32 key; | ||
| 595 | |||
| 596 | if (cfg->fc_dst_len > 32) | ||
| 597 | return -EINVAL; | ||
| 598 | |||
| 599 | if ((fz = table->fn_zones[cfg->fc_dst_len]) == NULL) | ||
| 600 | return -ESRCH; | ||
| 601 | |||
| 602 | key = 0; | ||
| 603 | if (cfg->fc_dst) { | ||
| 604 | if (cfg->fc_dst & ~FZ_MASK(fz)) | ||
| 605 | return -EINVAL; | ||
| 606 | key = fz_key(cfg->fc_dst, fz); | ||
| 607 | } | ||
| 608 | |||
| 609 | f = fib_find_node(fz, key); | ||
| 610 | |||
| 611 | if (!f) | ||
| 612 | fa = NULL; | ||
| 613 | else | ||
| 614 | fa = fib_find_alias(&f->fn_alias, cfg->fc_tos, 0); | ||
| 615 | if (!fa) | ||
| 616 | return -ESRCH; | ||
| 617 | |||
| 618 | fa_to_delete = NULL; | ||
| 619 | fa = list_entry(fa->fa_list.prev, struct fib_alias, fa_list); | ||
| 620 | list_for_each_entry_continue(fa, &f->fn_alias, fa_list) { | ||
| 621 | struct fib_info *fi = fa->fa_info; | ||
| 622 | |||
| 623 | if (fa->fa_tos != cfg->fc_tos) | ||
| 624 | break; | ||
| 625 | |||
| 626 | if ((!cfg->fc_type || | ||
| 627 | fa->fa_type == cfg->fc_type) && | ||
| 628 | (cfg->fc_scope == RT_SCOPE_NOWHERE || | ||
| 629 | fa->fa_scope == cfg->fc_scope) && | ||
| 630 | (!cfg->fc_protocol || | ||
| 631 | fi->fib_protocol == cfg->fc_protocol) && | ||
| 632 | fib_nh_match(cfg, fi) == 0) { | ||
| 633 | fa_to_delete = fa; | ||
| 634 | break; | ||
| 635 | } | ||
| 636 | } | ||
| 637 | |||
| 638 | if (fa_to_delete) { | ||
| 639 | int kill_fn; | ||
| 640 | |||
| 641 | fa = fa_to_delete; | ||
| 642 | rtmsg_fib(RTM_DELROUTE, key, fa, cfg->fc_dst_len, | ||
| 643 | tb->tb_id, &cfg->fc_nlinfo, 0); | ||
| 644 | |||
| 645 | kill_fn = 0; | ||
| 646 | list_del_rcu(&fa->fa_list); | ||
| 647 | if (list_empty(&f->fn_alias)) { | ||
| 648 | hlist_del_rcu(&f->fn_hash); | ||
| 649 | kill_fn = 1; | ||
| 650 | } | ||
| 651 | fib_hash_genid++; | ||
| 652 | |||
| 653 | if (fa->fa_state & FA_S_ACCESSED) | ||
| 654 | rt_cache_flush(cfg->fc_nlinfo.nl_net, -1); | ||
| 655 | fn_free_alias(fa, f); | ||
| 656 | if (kill_fn) { | ||
| 657 | fn_free_node(f); | ||
| 658 | fz->fz_nent--; | ||
| 659 | } | ||
| 660 | |||
| 661 | return 0; | ||
| 662 | } | ||
| 663 | return -ESRCH; | ||
| 664 | } | ||
| 665 | |||
| 666 | static int fn_flush_list(struct fn_zone *fz, int idx) | ||
| 667 | { | ||
| 668 | struct hlist_head *head = rtnl_dereference(fz->fz_hash) + idx; | ||
| 669 | struct hlist_node *node, *n; | ||
| 670 | struct fib_node *f; | ||
| 671 | int found = 0; | ||
| 672 | |||
| 673 | hlist_for_each_entry_safe(f, node, n, head, fn_hash) { | ||
| 674 | struct fib_alias *fa, *fa_node; | ||
| 675 | int kill_f; | ||
| 676 | |||
| 677 | kill_f = 0; | ||
| 678 | list_for_each_entry_safe(fa, fa_node, &f->fn_alias, fa_list) { | ||
| 679 | struct fib_info *fi = fa->fa_info; | ||
| 680 | |||
| 681 | if (fi && (fi->fib_flags&RTNH_F_DEAD)) { | ||
| 682 | list_del_rcu(&fa->fa_list); | ||
| 683 | if (list_empty(&f->fn_alias)) { | ||
| 684 | hlist_del_rcu(&f->fn_hash); | ||
| 685 | kill_f = 1; | ||
| 686 | } | ||
| 687 | fib_hash_genid++; | ||
| 688 | |||
| 689 | fn_free_alias(fa, f); | ||
| 690 | found++; | ||
| 691 | } | ||
| 692 | } | ||
| 693 | if (kill_f) { | ||
| 694 | fn_free_node(f); | ||
| 695 | fz->fz_nent--; | ||
| 696 | } | ||
| 697 | } | ||
| 698 | return found; | ||
| 699 | } | ||
| 700 | |||
| 701 | /* caller must hold RTNL. */ | ||
| 702 | int fib_table_flush(struct fib_table *tb) | ||
| 703 | { | ||
| 704 | struct fn_hash *table = (struct fn_hash *) tb->tb_data; | ||
| 705 | struct fn_zone *fz; | ||
| 706 | int found = 0; | ||
| 707 | |||
| 708 | for (fz = rtnl_dereference(table->fn_zone_list); | ||
| 709 | fz != NULL; | ||
| 710 | fz = rtnl_dereference(fz->fz_next)) { | ||
| 711 | int i; | ||
| 712 | |||
| 713 | for (i = fz->fz_divisor - 1; i >= 0; i--) | ||
| 714 | found += fn_flush_list(fz, i); | ||
| 715 | } | ||
| 716 | return found; | ||
| 717 | } | ||
| 718 | |||
| 719 | void fib_free_table(struct fib_table *tb) | ||
| 720 | { | ||
| 721 | struct fn_hash *table = (struct fn_hash *) tb->tb_data; | ||
| 722 | struct fn_zone *fz, *next; | ||
| 723 | |||
| 724 | next = table->fn_zone_list; | ||
| 725 | while (next != NULL) { | ||
| 726 | fz = next; | ||
| 727 | next = fz->fz_next; | ||
| 728 | |||
| 729 | if (fz->fz_hash != fz->fz_embedded_hash) | ||
| 730 | fz_hash_free(fz->fz_hash, fz->fz_divisor); | ||
| 731 | |||
| 732 | kfree(fz); | ||
| 733 | } | ||
| 734 | |||
| 735 | kfree(tb); | ||
| 736 | } | ||
| 737 | |||
| 738 | static inline int | ||
| 739 | fn_hash_dump_bucket(struct sk_buff *skb, struct netlink_callback *cb, | ||
| 740 | struct fib_table *tb, | ||
| 741 | struct fn_zone *fz, | ||
| 742 | struct hlist_head *head) | ||
| 743 | { | ||
| 744 | struct hlist_node *node; | ||
| 745 | struct fib_node *f; | ||
| 746 | int i, s_i; | ||
| 747 | |||
| 748 | s_i = cb->args[4]; | ||
| 749 | i = 0; | ||
| 750 | hlist_for_each_entry_rcu(f, node, head, fn_hash) { | ||
| 751 | struct fib_alias *fa; | ||
| 752 | |||
| 753 | list_for_each_entry_rcu(fa, &f->fn_alias, fa_list) { | ||
| 754 | if (i < s_i) | ||
| 755 | goto next; | ||
| 756 | |||
| 757 | if (fib_dump_info(skb, NETLINK_CB(cb->skb).pid, | ||
| 758 | cb->nlh->nlmsg_seq, | ||
| 759 | RTM_NEWROUTE, | ||
| 760 | tb->tb_id, | ||
| 761 | fa->fa_type, | ||
| 762 | fa->fa_scope, | ||
| 763 | f->fn_key, | ||
| 764 | fz->fz_order, | ||
| 765 | fa->fa_tos, | ||
| 766 | fa->fa_info, | ||
| 767 | NLM_F_MULTI) < 0) { | ||
| 768 | cb->args[4] = i; | ||
| 769 | return -1; | ||
| 770 | } | ||
| 771 | next: | ||
| 772 | i++; | ||
| 773 | } | ||
| 774 | } | ||
| 775 | cb->args[4] = i; | ||
| 776 | return skb->len; | ||
| 777 | } | ||
| 778 | |||
| 779 | static inline int | ||
| 780 | fn_hash_dump_zone(struct sk_buff *skb, struct netlink_callback *cb, | ||
| 781 | struct fib_table *tb, | ||
| 782 | struct fn_zone *fz) | ||
| 783 | { | ||
| 784 | int h, s_h; | ||
| 785 | struct hlist_head *head = rcu_dereference(fz->fz_hash); | ||
| 786 | |||
| 787 | if (head == NULL) | ||
| 788 | return skb->len; | ||
| 789 | s_h = cb->args[3]; | ||
| 790 | for (h = s_h; h < fz->fz_divisor; h++) { | ||
| 791 | if (hlist_empty(head + h)) | ||
| 792 | continue; | ||
| 793 | if (fn_hash_dump_bucket(skb, cb, tb, fz, head + h) < 0) { | ||
| 794 | cb->args[3] = h; | ||
| 795 | return -1; | ||
| 796 | } | ||
| 797 | memset(&cb->args[4], 0, | ||
| 798 | sizeof(cb->args) - 4*sizeof(cb->args[0])); | ||
| 799 | } | ||
| 800 | cb->args[3] = h; | ||
| 801 | return skb->len; | ||
| 802 | } | ||
| 803 | |||
| 804 | int fib_table_dump(struct fib_table *tb, struct sk_buff *skb, | ||
| 805 | struct netlink_callback *cb) | ||
| 806 | { | ||
| 807 | int m = 0, s_m; | ||
| 808 | struct fn_zone *fz; | ||
| 809 | struct fn_hash *table = (struct fn_hash *)tb->tb_data; | ||
| 810 | |||
| 811 | s_m = cb->args[2]; | ||
| 812 | rcu_read_lock(); | ||
| 813 | for (fz = rcu_dereference(table->fn_zone_list); | ||
| 814 | fz != NULL; | ||
| 815 | fz = rcu_dereference(fz->fz_next), m++) { | ||
| 816 | if (m < s_m) | ||
| 817 | continue; | ||
| 818 | if (fn_hash_dump_zone(skb, cb, tb, fz) < 0) { | ||
| 819 | cb->args[2] = m; | ||
| 820 | rcu_read_unlock(); | ||
| 821 | return -1; | ||
| 822 | } | ||
| 823 | memset(&cb->args[3], 0, | ||
| 824 | sizeof(cb->args) - 3*sizeof(cb->args[0])); | ||
| 825 | } | ||
| 826 | rcu_read_unlock(); | ||
| 827 | cb->args[2] = m; | ||
| 828 | return skb->len; | ||
| 829 | } | ||
| 830 | |||
| 831 | void __init fib_hash_init(void) | ||
| 832 | { | ||
| 833 | fn_hash_kmem = kmem_cache_create("ip_fib_hash", sizeof(struct fib_node), | ||
| 834 | 0, SLAB_PANIC, NULL); | ||
| 835 | |||
| 836 | fn_alias_kmem = kmem_cache_create("ip_fib_alias", sizeof(struct fib_alias), | ||
| 837 | 0, SLAB_PANIC, NULL); | ||
| 838 | |||
| 839 | } | ||
| 840 | |||
| 841 | struct fib_table *fib_hash_table(u32 id) | ||
| 842 | { | ||
| 843 | struct fib_table *tb; | ||
| 844 | |||
| 845 | tb = kmalloc(sizeof(struct fib_table) + sizeof(struct fn_hash), | ||
| 846 | GFP_KERNEL); | ||
| 847 | if (tb == NULL) | ||
| 848 | return NULL; | ||
| 849 | |||
| 850 | tb->tb_id = id; | ||
| 851 | tb->tb_default = -1; | ||
| 852 | |||
| 853 | memset(tb->tb_data, 0, sizeof(struct fn_hash)); | ||
| 854 | return tb; | ||
| 855 | } | ||
| 856 | |||
| 857 | /* ------------------------------------------------------------------------ */ | ||
| 858 | #ifdef CONFIG_PROC_FS | ||
| 859 | |||
| 860 | struct fib_iter_state { | ||
| 861 | struct seq_net_private p; | ||
| 862 | struct fn_zone *zone; | ||
| 863 | int bucket; | ||
| 864 | struct hlist_head *hash_head; | ||
| 865 | struct fib_node *fn; | ||
| 866 | struct fib_alias *fa; | ||
| 867 | loff_t pos; | ||
| 868 | unsigned int genid; | ||
| 869 | int valid; | ||
| 870 | }; | ||
| 871 | |||
| 872 | static struct fib_alias *fib_get_first(struct seq_file *seq) | ||
| 873 | { | ||
| 874 | struct fib_iter_state *iter = seq->private; | ||
| 875 | struct fib_table *main_table; | ||
| 876 | struct fn_hash *table; | ||
| 877 | |||
| 878 | main_table = fib_get_table(seq_file_net(seq), RT_TABLE_MAIN); | ||
| 879 | table = (struct fn_hash *)main_table->tb_data; | ||
| 880 | |||
| 881 | iter->bucket = 0; | ||
| 882 | iter->hash_head = NULL; | ||
| 883 | iter->fn = NULL; | ||
| 884 | iter->fa = NULL; | ||
| 885 | iter->pos = 0; | ||
| 886 | iter->genid = fib_hash_genid; | ||
| 887 | iter->valid = 1; | ||
| 888 | |||
| 889 | for (iter->zone = rcu_dereference(table->fn_zone_list); | ||
| 890 | iter->zone != NULL; | ||
| 891 | iter->zone = rcu_dereference(iter->zone->fz_next)) { | ||
| 892 | int maxslot; | ||
| 893 | |||
| 894 | if (!iter->zone->fz_nent) | ||
| 895 | continue; | ||
| 896 | |||
| 897 | iter->hash_head = rcu_dereference(iter->zone->fz_hash); | ||
| 898 | maxslot = iter->zone->fz_divisor; | ||
| 899 | |||
| 900 | for (iter->bucket = 0; iter->bucket < maxslot; | ||
| 901 | ++iter->bucket, ++iter->hash_head) { | ||
| 902 | struct hlist_node *node; | ||
| 903 | struct fib_node *fn; | ||
| 904 | |||
| 905 | hlist_for_each_entry(fn, node, iter->hash_head, fn_hash) { | ||
| 906 | struct fib_alias *fa; | ||
| 907 | |||
| 908 | list_for_each_entry(fa, &fn->fn_alias, fa_list) { | ||
| 909 | iter->fn = fn; | ||
| 910 | iter->fa = fa; | ||
| 911 | goto out; | ||
| 912 | } | ||
| 913 | } | ||
| 914 | } | ||
| 915 | } | ||
| 916 | out: | ||
| 917 | return iter->fa; | ||
| 918 | } | ||
| 919 | |||
| 920 | static struct fib_alias *fib_get_next(struct seq_file *seq) | ||
| 921 | { | ||
| 922 | struct fib_iter_state *iter = seq->private; | ||
| 923 | struct fib_node *fn; | ||
| 924 | struct fib_alias *fa; | ||
| 925 | |||
| 926 | /* Advance FA, if any. */ | ||
| 927 | fn = iter->fn; | ||
| 928 | fa = iter->fa; | ||
| 929 | if (fa) { | ||
| 930 | BUG_ON(!fn); | ||
| 931 | list_for_each_entry_continue(fa, &fn->fn_alias, fa_list) { | ||
| 932 | iter->fa = fa; | ||
| 933 | goto out; | ||
| 934 | } | ||
| 935 | } | ||
| 936 | |||
| 937 | fa = iter->fa = NULL; | ||
| 938 | |||
| 939 | /* Advance FN. */ | ||
| 940 | if (fn) { | ||
| 941 | struct hlist_node *node = &fn->fn_hash; | ||
| 942 | hlist_for_each_entry_continue(fn, node, fn_hash) { | ||
| 943 | iter->fn = fn; | ||
| 944 | |||
| 945 | list_for_each_entry(fa, &fn->fn_alias, fa_list) { | ||
| 946 | iter->fa = fa; | ||
| 947 | goto out; | ||
| 948 | } | ||
| 949 | } | ||
| 950 | } | ||
| 951 | |||
| 952 | fn = iter->fn = NULL; | ||
| 953 | |||
| 954 | /* Advance hash chain. */ | ||
| 955 | if (!iter->zone) | ||
| 956 | goto out; | ||
| 957 | |||
| 958 | for (;;) { | ||
| 959 | struct hlist_node *node; | ||
| 960 | int maxslot; | ||
| 961 | |||
| 962 | maxslot = iter->zone->fz_divisor; | ||
| 963 | |||
| 964 | while (++iter->bucket < maxslot) { | ||
| 965 | iter->hash_head++; | ||
| 966 | |||
| 967 | hlist_for_each_entry(fn, node, iter->hash_head, fn_hash) { | ||
| 968 | list_for_each_entry(fa, &fn->fn_alias, fa_list) { | ||
| 969 | iter->fn = fn; | ||
| 970 | iter->fa = fa; | ||
| 971 | goto out; | ||
| 972 | } | ||
| 973 | } | ||
| 974 | } | ||
| 975 | |||
| 976 | iter->zone = rcu_dereference(iter->zone->fz_next); | ||
| 977 | |||
| 978 | if (!iter->zone) | ||
| 979 | goto out; | ||
| 980 | |||
| 981 | iter->bucket = 0; | ||
| 982 | iter->hash_head = rcu_dereference(iter->zone->fz_hash); | ||
| 983 | |||
| 984 | hlist_for_each_entry(fn, node, iter->hash_head, fn_hash) { | ||
| 985 | list_for_each_entry(fa, &fn->fn_alias, fa_list) { | ||
| 986 | iter->fn = fn; | ||
| 987 | iter->fa = fa; | ||
| 988 | goto out; | ||
| 989 | } | ||
| 990 | } | ||
| 991 | } | ||
| 992 | out: | ||
| 993 | iter->pos++; | ||
| 994 | return fa; | ||
| 995 | } | ||
| 996 | |||
| 997 | static struct fib_alias *fib_get_idx(struct seq_file *seq, loff_t pos) | ||
| 998 | { | ||
| 999 | struct fib_iter_state *iter = seq->private; | ||
| 1000 | struct fib_alias *fa; | ||
| 1001 | |||
| 1002 | if (iter->valid && pos >= iter->pos && iter->genid == fib_hash_genid) { | ||
| 1003 | fa = iter->fa; | ||
| 1004 | pos -= iter->pos; | ||
| 1005 | } else | ||
| 1006 | fa = fib_get_first(seq); | ||
| 1007 | |||
| 1008 | if (fa) | ||
| 1009 | while (pos && (fa = fib_get_next(seq))) | ||
| 1010 | --pos; | ||
| 1011 | return pos ? NULL : fa; | ||
| 1012 | } | ||
| 1013 | |||
| 1014 | static void *fib_seq_start(struct seq_file *seq, loff_t *pos) | ||
| 1015 | __acquires(RCU) | ||
| 1016 | { | ||
| 1017 | void *v = NULL; | ||
| 1018 | |||
| 1019 | rcu_read_lock(); | ||
| 1020 | if (fib_get_table(seq_file_net(seq), RT_TABLE_MAIN)) | ||
| 1021 | v = *pos ? fib_get_idx(seq, *pos - 1) : SEQ_START_TOKEN; | ||
| 1022 | return v; | ||
| 1023 | } | ||
| 1024 | |||
| 1025 | static void *fib_seq_next(struct seq_file *seq, void *v, loff_t *pos) | ||
| 1026 | { | ||
| 1027 | ++*pos; | ||
| 1028 | return v == SEQ_START_TOKEN ? fib_get_first(seq) : fib_get_next(seq); | ||
| 1029 | } | ||
| 1030 | |||
| 1031 | static void fib_seq_stop(struct seq_file *seq, void *v) | ||
| 1032 | __releases(RCU) | ||
| 1033 | { | ||
| 1034 | rcu_read_unlock(); | ||
| 1035 | } | ||
| 1036 | |||
| 1037 | static unsigned fib_flag_trans(int type, __be32 mask, struct fib_info *fi) | ||
| 1038 | { | ||
| 1039 | static const unsigned type2flags[RTN_MAX + 1] = { | ||
| 1040 | [7] = RTF_REJECT, | ||
| 1041 | [8] = RTF_REJECT, | ||
| 1042 | }; | ||
| 1043 | unsigned flags = type2flags[type]; | ||
| 1044 | |||
| 1045 | if (fi && fi->fib_nh->nh_gw) | ||
| 1046 | flags |= RTF_GATEWAY; | ||
| 1047 | if (mask == htonl(0xFFFFFFFF)) | ||
| 1048 | flags |= RTF_HOST; | ||
| 1049 | flags |= RTF_UP; | ||
| 1050 | return flags; | ||
| 1051 | } | ||
| 1052 | |||
| 1053 | /* | ||
| 1054 | * This outputs /proc/net/route. | ||
| 1055 | * | ||
| 1056 | * It always works in backward compatibility mode. | ||
| 1057 | * The format of the file is not supposed to be changed. | ||
| 1058 | */ | ||
| 1059 | static int fib_seq_show(struct seq_file *seq, void *v) | ||
| 1060 | { | ||
| 1061 | struct fib_iter_state *iter; | ||
| 1062 | int len; | ||
| 1063 | __be32 prefix, mask; | ||
| 1064 | unsigned flags; | ||
| 1065 | struct fib_node *f; | ||
| 1066 | struct fib_alias *fa; | ||
| 1067 | struct fib_info *fi; | ||
| 1068 | |||
| 1069 | if (v == SEQ_START_TOKEN) { | ||
| 1070 | seq_printf(seq, "%-127s\n", "Iface\tDestination\tGateway " | ||
| 1071 | "\tFlags\tRefCnt\tUse\tMetric\tMask\t\tMTU" | ||
| 1072 | "\tWindow\tIRTT"); | ||
| 1073 | goto out; | ||
| 1074 | } | ||
| 1075 | |||
| 1076 | iter = seq->private; | ||
| 1077 | f = iter->fn; | ||
| 1078 | fa = iter->fa; | ||
| 1079 | fi = fa->fa_info; | ||
| 1080 | prefix = f->fn_key; | ||
| 1081 | mask = FZ_MASK(iter->zone); | ||
| 1082 | flags = fib_flag_trans(fa->fa_type, mask, fi); | ||
| 1083 | if (fi) | ||
| 1084 | seq_printf(seq, | ||
| 1085 | "%s\t%08X\t%08X\t%04X\t%d\t%u\t%d\t%08X\t%d\t%u\t%u%n", | ||
| 1086 | fi->fib_dev ? fi->fib_dev->name : "*", prefix, | ||
| 1087 | fi->fib_nh->nh_gw, flags, 0, 0, fi->fib_priority, | ||
| 1088 | mask, (fi->fib_advmss ? fi->fib_advmss + 40 : 0), | ||
| 1089 | fi->fib_window, | ||
| 1090 | fi->fib_rtt >> 3, &len); | ||
| 1091 | else | ||
| 1092 | seq_printf(seq, | ||
| 1093 | "*\t%08X\t%08X\t%04X\t%d\t%u\t%d\t%08X\t%d\t%u\t%u%n", | ||
| 1094 | prefix, 0, flags, 0, 0, 0, mask, 0, 0, 0, &len); | ||
| 1095 | |||
| 1096 | seq_printf(seq, "%*s\n", 127 - len, ""); | ||
| 1097 | out: | ||
| 1098 | return 0; | ||
| 1099 | } | ||
| 1100 | |||
| 1101 | static const struct seq_operations fib_seq_ops = { | ||
| 1102 | .start = fib_seq_start, | ||
| 1103 | .next = fib_seq_next, | ||
| 1104 | .stop = fib_seq_stop, | ||
| 1105 | .show = fib_seq_show, | ||
| 1106 | }; | ||
| 1107 | |||
| 1108 | static int fib_seq_open(struct inode *inode, struct file *file) | ||
| 1109 | { | ||
| 1110 | return seq_open_net(inode, file, &fib_seq_ops, | ||
| 1111 | sizeof(struct fib_iter_state)); | ||
| 1112 | } | ||
| 1113 | |||
| 1114 | static const struct file_operations fib_seq_fops = { | ||
| 1115 | .owner = THIS_MODULE, | ||
| 1116 | .open = fib_seq_open, | ||
| 1117 | .read = seq_read, | ||
| 1118 | .llseek = seq_lseek, | ||
| 1119 | .release = seq_release_net, | ||
| 1120 | }; | ||
| 1121 | |||
| 1122 | int __net_init fib_proc_init(struct net *net) | ||
| 1123 | { | ||
| 1124 | if (!proc_net_fops_create(net, "route", S_IRUGO, &fib_seq_fops)) | ||
| 1125 | return -ENOMEM; | ||
| 1126 | return 0; | ||
| 1127 | } | ||
| 1128 | |||
| 1129 | void __net_exit fib_proc_exit(struct net *net) | ||
| 1130 | { | ||
| 1131 | proc_net_remove(net, "route"); | ||
| 1132 | } | ||
| 1133 | #endif /* CONFIG_PROC_FS */ | ||
diff --git a/net/ipv4/fib_lookup.h b/net/ipv4/fib_lookup.h index c079cc0ec651..af0f14aba169 100644 --- a/net/ipv4/fib_lookup.h +++ b/net/ipv4/fib_lookup.h | |||
| @@ -10,7 +10,6 @@ struct fib_alias { | |||
| 10 | struct fib_info *fa_info; | 10 | struct fib_info *fa_info; |
| 11 | u8 fa_tos; | 11 | u8 fa_tos; |
| 12 | u8 fa_type; | 12 | u8 fa_type; |
| 13 | u8 fa_scope; | ||
| 14 | u8 fa_state; | 13 | u8 fa_state; |
| 15 | struct rcu_head rcu; | 14 | struct rcu_head rcu; |
| 16 | }; | 15 | }; |
| @@ -25,14 +24,11 @@ static inline void fib_alias_accessed(struct fib_alias *fa) | |||
| 25 | } | 24 | } |
| 26 | 25 | ||
| 27 | /* Exported by fib_semantics.c */ | 26 | /* Exported by fib_semantics.c */ |
| 28 | extern int fib_semantic_match(struct list_head *head, | ||
| 29 | const struct flowi *flp, | ||
| 30 | struct fib_result *res, int prefixlen, int fib_flags); | ||
| 31 | extern void fib_release_info(struct fib_info *); | 27 | extern void fib_release_info(struct fib_info *); |
| 32 | extern struct fib_info *fib_create_info(struct fib_config *cfg); | 28 | extern struct fib_info *fib_create_info(struct fib_config *cfg); |
| 33 | extern int fib_nh_match(struct fib_config *cfg, struct fib_info *fi); | 29 | extern int fib_nh_match(struct fib_config *cfg, struct fib_info *fi); |
| 34 | extern int fib_dump_info(struct sk_buff *skb, u32 pid, u32 seq, int event, | 30 | extern int fib_dump_info(struct sk_buff *skb, u32 pid, u32 seq, int event, |
| 35 | u32 tb_id, u8 type, u8 scope, __be32 dst, | 31 | u32 tb_id, u8 type, __be32 dst, |
| 36 | int dst_len, u8 tos, struct fib_info *fi, | 32 | int dst_len, u8 tos, struct fib_info *fi, |
| 37 | unsigned int); | 33 | unsigned int); |
| 38 | extern void rtmsg_fib(int event, __be32 key, struct fib_alias *fa, | 34 | extern void rtmsg_fib(int event, __be32 key, struct fib_alias *fa, |
| @@ -51,4 +47,11 @@ static inline void fib_result_assign(struct fib_result *res, | |||
| 51 | res->fi = fi; | 47 | res->fi = fi; |
| 52 | } | 48 | } |
| 53 | 49 | ||
| 50 | struct fib_prop { | ||
| 51 | int error; | ||
| 52 | u8 scope; | ||
| 53 | }; | ||
| 54 | |||
| 55 | extern const struct fib_prop fib_props[RTN_MAX + 1]; | ||
| 56 | |||
| 54 | #endif /* _FIB_LOOKUP_H */ | 57 | #endif /* _FIB_LOOKUP_H */ |
diff --git a/net/ipv4/fib_rules.c b/net/ipv4/fib_rules.c index 7981a24f5c7b..a53bb1b5b118 100644 --- a/net/ipv4/fib_rules.c +++ b/net/ipv4/fib_rules.c | |||
| @@ -41,19 +41,19 @@ struct fib4_rule { | |||
| 41 | __be32 srcmask; | 41 | __be32 srcmask; |
| 42 | __be32 dst; | 42 | __be32 dst; |
| 43 | __be32 dstmask; | 43 | __be32 dstmask; |
| 44 | #ifdef CONFIG_NET_CLS_ROUTE | 44 | #ifdef CONFIG_IP_ROUTE_CLASSID |
| 45 | u32 tclassid; | 45 | u32 tclassid; |
| 46 | #endif | 46 | #endif |
| 47 | }; | 47 | }; |
| 48 | 48 | ||
| 49 | #ifdef CONFIG_NET_CLS_ROUTE | 49 | #ifdef CONFIG_IP_ROUTE_CLASSID |
| 50 | u32 fib_rules_tclass(struct fib_result *res) | 50 | u32 fib_rules_tclass(const struct fib_result *res) |
| 51 | { | 51 | { |
| 52 | return res->r ? ((struct fib4_rule *) res->r)->tclassid : 0; | 52 | return res->r ? ((struct fib4_rule *) res->r)->tclassid : 0; |
| 53 | } | 53 | } |
| 54 | #endif | 54 | #endif |
| 55 | 55 | ||
| 56 | int fib_lookup(struct net *net, struct flowi *flp, struct fib_result *res) | 56 | int fib_lookup(struct net *net, struct flowi4 *flp, struct fib_result *res) |
| 57 | { | 57 | { |
| 58 | struct fib_lookup_arg arg = { | 58 | struct fib_lookup_arg arg = { |
| 59 | .result = res, | 59 | .result = res, |
| @@ -61,7 +61,7 @@ int fib_lookup(struct net *net, struct flowi *flp, struct fib_result *res) | |||
| 61 | }; | 61 | }; |
| 62 | int err; | 62 | int err; |
| 63 | 63 | ||
| 64 | err = fib_rules_lookup(net->ipv4.rules_ops, flp, 0, &arg); | 64 | err = fib_rules_lookup(net->ipv4.rules_ops, flowi4_to_flowi(flp), 0, &arg); |
| 65 | res->r = arg.rule; | 65 | res->r = arg.rule; |
| 66 | 66 | ||
| 67 | return err; | 67 | return err; |
| @@ -95,7 +95,7 @@ static int fib4_rule_action(struct fib_rule *rule, struct flowi *flp, | |||
| 95 | if (!tbl) | 95 | if (!tbl) |
| 96 | goto errout; | 96 | goto errout; |
| 97 | 97 | ||
| 98 | err = fib_table_lookup(tbl, flp, (struct fib_result *) arg->result, arg->flags); | 98 | err = fib_table_lookup(tbl, &flp->u.ip4, (struct fib_result *) arg->result, arg->flags); |
| 99 | if (err > 0) | 99 | if (err > 0) |
| 100 | err = -EAGAIN; | 100 | err = -EAGAIN; |
| 101 | errout: | 101 | errout: |
| @@ -106,14 +106,15 @@ errout: | |||
| 106 | static int fib4_rule_match(struct fib_rule *rule, struct flowi *fl, int flags) | 106 | static int fib4_rule_match(struct fib_rule *rule, struct flowi *fl, int flags) |
| 107 | { | 107 | { |
| 108 | struct fib4_rule *r = (struct fib4_rule *) rule; | 108 | struct fib4_rule *r = (struct fib4_rule *) rule; |
| 109 | __be32 daddr = fl->fl4_dst; | 109 | struct flowi4 *fl4 = &fl->u.ip4; |
| 110 | __be32 saddr = fl->fl4_src; | 110 | __be32 daddr = fl4->daddr; |
| 111 | __be32 saddr = fl4->saddr; | ||
| 111 | 112 | ||
| 112 | if (((saddr ^ r->src) & r->srcmask) || | 113 | if (((saddr ^ r->src) & r->srcmask) || |
| 113 | ((daddr ^ r->dst) & r->dstmask)) | 114 | ((daddr ^ r->dst) & r->dstmask)) |
| 114 | return 0; | 115 | return 0; |
| 115 | 116 | ||
| 116 | if (r->tos && (r->tos != fl->fl4_tos)) | 117 | if (r->tos && (r->tos != fl4->flowi4_tos)) |
| 117 | return 0; | 118 | return 0; |
| 118 | 119 | ||
| 119 | return 1; | 120 | return 1; |
| @@ -165,7 +166,7 @@ static int fib4_rule_configure(struct fib_rule *rule, struct sk_buff *skb, | |||
| 165 | if (frh->dst_len) | 166 | if (frh->dst_len) |
| 166 | rule4->dst = nla_get_be32(tb[FRA_DST]); | 167 | rule4->dst = nla_get_be32(tb[FRA_DST]); |
| 167 | 168 | ||
| 168 | #ifdef CONFIG_NET_CLS_ROUTE | 169 | #ifdef CONFIG_IP_ROUTE_CLASSID |
| 169 | if (tb[FRA_FLOW]) | 170 | if (tb[FRA_FLOW]) |
| 170 | rule4->tclassid = nla_get_u32(tb[FRA_FLOW]); | 171 | rule4->tclassid = nla_get_u32(tb[FRA_FLOW]); |
| 171 | #endif | 172 | #endif |
| @@ -195,7 +196,7 @@ static int fib4_rule_compare(struct fib_rule *rule, struct fib_rule_hdr *frh, | |||
| 195 | if (frh->tos && (rule4->tos != frh->tos)) | 196 | if (frh->tos && (rule4->tos != frh->tos)) |
| 196 | return 0; | 197 | return 0; |
| 197 | 198 | ||
| 198 | #ifdef CONFIG_NET_CLS_ROUTE | 199 | #ifdef CONFIG_IP_ROUTE_CLASSID |
| 199 | if (tb[FRA_FLOW] && (rule4->tclassid != nla_get_u32(tb[FRA_FLOW]))) | 200 | if (tb[FRA_FLOW] && (rule4->tclassid != nla_get_u32(tb[FRA_FLOW]))) |
| 200 | return 0; | 201 | return 0; |
| 201 | #endif | 202 | #endif |
| @@ -224,7 +225,7 @@ static int fib4_rule_fill(struct fib_rule *rule, struct sk_buff *skb, | |||
| 224 | if (rule4->src_len) | 225 | if (rule4->src_len) |
| 225 | NLA_PUT_BE32(skb, FRA_SRC, rule4->src); | 226 | NLA_PUT_BE32(skb, FRA_SRC, rule4->src); |
| 226 | 227 | ||
| 227 | #ifdef CONFIG_NET_CLS_ROUTE | 228 | #ifdef CONFIG_IP_ROUTE_CLASSID |
| 228 | if (rule4->tclassid) | 229 | if (rule4->tclassid) |
| 229 | NLA_PUT_U32(skb, FRA_FLOW, rule4->tclassid); | 230 | NLA_PUT_U32(skb, FRA_FLOW, rule4->tclassid); |
| 230 | #endif | 231 | #endif |
diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c index 12d3dc3df1b7..641a5a2a9f9c 100644 --- a/net/ipv4/fib_semantics.c +++ b/net/ipv4/fib_semantics.c | |||
| @@ -49,7 +49,7 @@ | |||
| 49 | static DEFINE_SPINLOCK(fib_info_lock); | 49 | static DEFINE_SPINLOCK(fib_info_lock); |
| 50 | static struct hlist_head *fib_info_hash; | 50 | static struct hlist_head *fib_info_hash; |
| 51 | static struct hlist_head *fib_info_laddrhash; | 51 | static struct hlist_head *fib_info_laddrhash; |
| 52 | static unsigned int fib_hash_size; | 52 | static unsigned int fib_info_hash_size; |
| 53 | static unsigned int fib_info_cnt; | 53 | static unsigned int fib_info_cnt; |
| 54 | 54 | ||
| 55 | #define DEVINDEX_HASHBITS 8 | 55 | #define DEVINDEX_HASHBITS 8 |
| @@ -90,11 +90,7 @@ static DEFINE_SPINLOCK(fib_multipath_lock); | |||
| 90 | #define endfor_nexthops(fi) } | 90 | #define endfor_nexthops(fi) } |
| 91 | 91 | ||
| 92 | 92 | ||
| 93 | static const struct | 93 | const struct fib_prop fib_props[RTN_MAX + 1] = { |
| 94 | { | ||
| 95 | int error; | ||
| 96 | u8 scope; | ||
| 97 | } fib_props[RTN_MAX + 1] = { | ||
| 98 | [RTN_UNSPEC] = { | 94 | [RTN_UNSPEC] = { |
| 99 | .error = 0, | 95 | .error = 0, |
| 100 | .scope = RT_SCOPE_NOWHERE, | 96 | .scope = RT_SCOPE_NOWHERE, |
| @@ -152,6 +148,8 @@ static void free_fib_info_rcu(struct rcu_head *head) | |||
| 152 | { | 148 | { |
| 153 | struct fib_info *fi = container_of(head, struct fib_info, rcu); | 149 | struct fib_info *fi = container_of(head, struct fib_info, rcu); |
| 154 | 150 | ||
| 151 | if (fi->fib_metrics != (u32 *) dst_default_metrics) | ||
| 152 | kfree(fi->fib_metrics); | ||
| 155 | kfree(fi); | 153 | kfree(fi); |
| 156 | } | 154 | } |
| 157 | 155 | ||
| @@ -200,7 +198,7 @@ static inline int nh_comp(const struct fib_info *fi, const struct fib_info *ofi) | |||
| 200 | #ifdef CONFIG_IP_ROUTE_MULTIPATH | 198 | #ifdef CONFIG_IP_ROUTE_MULTIPATH |
| 201 | nh->nh_weight != onh->nh_weight || | 199 | nh->nh_weight != onh->nh_weight || |
| 202 | #endif | 200 | #endif |
| 203 | #ifdef CONFIG_NET_CLS_ROUTE | 201 | #ifdef CONFIG_IP_ROUTE_CLASSID |
| 204 | nh->nh_tclassid != onh->nh_tclassid || | 202 | nh->nh_tclassid != onh->nh_tclassid || |
| 205 | #endif | 203 | #endif |
| 206 | ((nh->nh_flags ^ onh->nh_flags) & ~RTNH_F_DEAD)) | 204 | ((nh->nh_flags ^ onh->nh_flags) & ~RTNH_F_DEAD)) |
| @@ -221,10 +219,10 @@ static inline unsigned int fib_devindex_hashfn(unsigned int val) | |||
| 221 | 219 | ||
| 222 | static inline unsigned int fib_info_hashfn(const struct fib_info *fi) | 220 | static inline unsigned int fib_info_hashfn(const struct fib_info *fi) |
| 223 | { | 221 | { |
| 224 | unsigned int mask = (fib_hash_size - 1); | 222 | unsigned int mask = (fib_info_hash_size - 1); |
| 225 | unsigned int val = fi->fib_nhs; | 223 | unsigned int val = fi->fib_nhs; |
| 226 | 224 | ||
| 227 | val ^= fi->fib_protocol; | 225 | val ^= (fi->fib_protocol << 8) | fi->fib_scope; |
| 228 | val ^= (__force u32)fi->fib_prefsrc; | 226 | val ^= (__force u32)fi->fib_prefsrc; |
| 229 | val ^= fi->fib_priority; | 227 | val ^= fi->fib_priority; |
| 230 | for_nexthops(fi) { | 228 | for_nexthops(fi) { |
| @@ -250,10 +248,11 @@ static struct fib_info *fib_find_info(const struct fib_info *nfi) | |||
| 250 | if (fi->fib_nhs != nfi->fib_nhs) | 248 | if (fi->fib_nhs != nfi->fib_nhs) |
| 251 | continue; | 249 | continue; |
| 252 | if (nfi->fib_protocol == fi->fib_protocol && | 250 | if (nfi->fib_protocol == fi->fib_protocol && |
| 251 | nfi->fib_scope == fi->fib_scope && | ||
| 253 | nfi->fib_prefsrc == fi->fib_prefsrc && | 252 | nfi->fib_prefsrc == fi->fib_prefsrc && |
| 254 | nfi->fib_priority == fi->fib_priority && | 253 | nfi->fib_priority == fi->fib_priority && |
| 255 | memcmp(nfi->fib_metrics, fi->fib_metrics, | 254 | memcmp(nfi->fib_metrics, fi->fib_metrics, |
| 256 | sizeof(fi->fib_metrics)) == 0 && | 255 | sizeof(u32) * RTAX_MAX) == 0 && |
| 257 | ((nfi->fib_flags ^ fi->fib_flags) & ~RTNH_F_DEAD) == 0 && | 256 | ((nfi->fib_flags ^ fi->fib_flags) & ~RTNH_F_DEAD) == 0 && |
| 258 | (nfi->fib_nhs == 0 || nh_comp(fi, nfi) == 0)) | 257 | (nfi->fib_nhs == 0 || nh_comp(fi, nfi) == 0)) |
| 259 | return fi; | 258 | return fi; |
| @@ -330,7 +329,7 @@ void rtmsg_fib(int event, __be32 key, struct fib_alias *fa, | |||
| 330 | goto errout; | 329 | goto errout; |
| 331 | 330 | ||
| 332 | err = fib_dump_info(skb, info->pid, seq, event, tb_id, | 331 | err = fib_dump_info(skb, info->pid, seq, event, tb_id, |
| 333 | fa->fa_type, fa->fa_scope, key, dst_len, | 332 | fa->fa_type, key, dst_len, |
| 334 | fa->fa_tos, fa->fa_info, nlm_flags); | 333 | fa->fa_tos, fa->fa_info, nlm_flags); |
| 335 | if (err < 0) { | 334 | if (err < 0) { |
| 336 | /* -EMSGSIZE implies BUG in fib_nlmsg_size() */ | 335 | /* -EMSGSIZE implies BUG in fib_nlmsg_size() */ |
| @@ -422,7 +421,7 @@ static int fib_get_nhs(struct fib_info *fi, struct rtnexthop *rtnh, | |||
| 422 | 421 | ||
| 423 | nla = nla_find(attrs, attrlen, RTA_GATEWAY); | 422 | nla = nla_find(attrs, attrlen, RTA_GATEWAY); |
| 424 | nexthop_nh->nh_gw = nla ? nla_get_be32(nla) : 0; | 423 | nexthop_nh->nh_gw = nla ? nla_get_be32(nla) : 0; |
| 425 | #ifdef CONFIG_NET_CLS_ROUTE | 424 | #ifdef CONFIG_IP_ROUTE_CLASSID |
| 426 | nla = nla_find(attrs, attrlen, RTA_FLOW); | 425 | nla = nla_find(attrs, attrlen, RTA_FLOW); |
| 427 | nexthop_nh->nh_tclassid = nla ? nla_get_u32(nla) : 0; | 426 | nexthop_nh->nh_tclassid = nla ? nla_get_u32(nla) : 0; |
| 428 | #endif | 427 | #endif |
| @@ -476,7 +475,7 @@ int fib_nh_match(struct fib_config *cfg, struct fib_info *fi) | |||
| 476 | nla = nla_find(attrs, attrlen, RTA_GATEWAY); | 475 | nla = nla_find(attrs, attrlen, RTA_GATEWAY); |
| 477 | if (nla && nla_get_be32(nla) != nh->nh_gw) | 476 | if (nla && nla_get_be32(nla) != nh->nh_gw) |
| 478 | return 1; | 477 | return 1; |
| 479 | #ifdef CONFIG_NET_CLS_ROUTE | 478 | #ifdef CONFIG_IP_ROUTE_CLASSID |
| 480 | nla = nla_find(attrs, attrlen, RTA_FLOW); | 479 | nla = nla_find(attrs, attrlen, RTA_FLOW); |
| 481 | if (nla && nla_get_u32(nla) != nh->nh_tclassid) | 480 | if (nla && nla_get_u32(nla) != nh->nh_tclassid) |
| 482 | return 1; | 481 | return 1; |
| @@ -562,16 +561,16 @@ static int fib_check_nh(struct fib_config *cfg, struct fib_info *fi, | |||
| 562 | } | 561 | } |
| 563 | rcu_read_lock(); | 562 | rcu_read_lock(); |
| 564 | { | 563 | { |
| 565 | struct flowi fl = { | 564 | struct flowi4 fl4 = { |
| 566 | .fl4_dst = nh->nh_gw, | 565 | .daddr = nh->nh_gw, |
| 567 | .fl4_scope = cfg->fc_scope + 1, | 566 | .flowi4_scope = cfg->fc_scope + 1, |
| 568 | .oif = nh->nh_oif, | 567 | .flowi4_oif = nh->nh_oif, |
| 569 | }; | 568 | }; |
| 570 | 569 | ||
| 571 | /* It is not necessary, but requires a bit of thinking */ | 570 | /* It is not necessary, but requires a bit of thinking */ |
| 572 | if (fl.fl4_scope < RT_SCOPE_LINK) | 571 | if (fl4.flowi4_scope < RT_SCOPE_LINK) |
| 573 | fl.fl4_scope = RT_SCOPE_LINK; | 572 | fl4.flowi4_scope = RT_SCOPE_LINK; |
| 574 | err = fib_lookup(net, &fl, &res); | 573 | err = fib_lookup(net, &fl4, &res); |
| 575 | if (err) { | 574 | if (err) { |
| 576 | rcu_read_unlock(); | 575 | rcu_read_unlock(); |
| 577 | return err; | 576 | return err; |
| @@ -613,14 +612,14 @@ out: | |||
| 613 | 612 | ||
| 614 | static inline unsigned int fib_laddr_hashfn(__be32 val) | 613 | static inline unsigned int fib_laddr_hashfn(__be32 val) |
| 615 | { | 614 | { |
| 616 | unsigned int mask = (fib_hash_size - 1); | 615 | unsigned int mask = (fib_info_hash_size - 1); |
| 617 | 616 | ||
| 618 | return ((__force u32)val ^ | 617 | return ((__force u32)val ^ |
| 619 | ((__force u32)val >> 7) ^ | 618 | ((__force u32)val >> 7) ^ |
| 620 | ((__force u32)val >> 14)) & mask; | 619 | ((__force u32)val >> 14)) & mask; |
| 621 | } | 620 | } |
| 622 | 621 | ||
| 623 | static struct hlist_head *fib_hash_alloc(int bytes) | 622 | static struct hlist_head *fib_info_hash_alloc(int bytes) |
| 624 | { | 623 | { |
| 625 | if (bytes <= PAGE_SIZE) | 624 | if (bytes <= PAGE_SIZE) |
| 626 | return kzalloc(bytes, GFP_KERNEL); | 625 | return kzalloc(bytes, GFP_KERNEL); |
| @@ -630,7 +629,7 @@ static struct hlist_head *fib_hash_alloc(int bytes) | |||
| 630 | get_order(bytes)); | 629 | get_order(bytes)); |
| 631 | } | 630 | } |
| 632 | 631 | ||
| 633 | static void fib_hash_free(struct hlist_head *hash, int bytes) | 632 | static void fib_info_hash_free(struct hlist_head *hash, int bytes) |
| 634 | { | 633 | { |
| 635 | if (!hash) | 634 | if (!hash) |
| 636 | return; | 635 | return; |
| @@ -641,18 +640,18 @@ static void fib_hash_free(struct hlist_head *hash, int bytes) | |||
| 641 | free_pages((unsigned long) hash, get_order(bytes)); | 640 | free_pages((unsigned long) hash, get_order(bytes)); |
| 642 | } | 641 | } |
| 643 | 642 | ||
| 644 | static void fib_hash_move(struct hlist_head *new_info_hash, | 643 | static void fib_info_hash_move(struct hlist_head *new_info_hash, |
| 645 | struct hlist_head *new_laddrhash, | 644 | struct hlist_head *new_laddrhash, |
| 646 | unsigned int new_size) | 645 | unsigned int new_size) |
| 647 | { | 646 | { |
| 648 | struct hlist_head *old_info_hash, *old_laddrhash; | 647 | struct hlist_head *old_info_hash, *old_laddrhash; |
| 649 | unsigned int old_size = fib_hash_size; | 648 | unsigned int old_size = fib_info_hash_size; |
| 650 | unsigned int i, bytes; | 649 | unsigned int i, bytes; |
| 651 | 650 | ||
| 652 | spin_lock_bh(&fib_info_lock); | 651 | spin_lock_bh(&fib_info_lock); |
| 653 | old_info_hash = fib_info_hash; | 652 | old_info_hash = fib_info_hash; |
| 654 | old_laddrhash = fib_info_laddrhash; | 653 | old_laddrhash = fib_info_laddrhash; |
| 655 | fib_hash_size = new_size; | 654 | fib_info_hash_size = new_size; |
| 656 | 655 | ||
| 657 | for (i = 0; i < old_size; i++) { | 656 | for (i = 0; i < old_size; i++) { |
| 658 | struct hlist_head *head = &fib_info_hash[i]; | 657 | struct hlist_head *head = &fib_info_hash[i]; |
| @@ -693,8 +692,18 @@ static void fib_hash_move(struct hlist_head *new_info_hash, | |||
| 693 | spin_unlock_bh(&fib_info_lock); | 692 | spin_unlock_bh(&fib_info_lock); |
| 694 | 693 | ||
| 695 | bytes = old_size * sizeof(struct hlist_head *); | 694 | bytes = old_size * sizeof(struct hlist_head *); |
| 696 | fib_hash_free(old_info_hash, bytes); | 695 | fib_info_hash_free(old_info_hash, bytes); |
| 697 | fib_hash_free(old_laddrhash, bytes); | 696 | fib_info_hash_free(old_laddrhash, bytes); |
| 697 | } | ||
| 698 | |||
| 699 | __be32 fib_info_update_nh_saddr(struct net *net, struct fib_nh *nh) | ||
| 700 | { | ||
| 701 | nh->nh_saddr = inet_select_addr(nh->nh_dev, | ||
| 702 | nh->nh_gw, | ||
| 703 | nh->nh_parent->fib_scope); | ||
| 704 | nh->nh_saddr_genid = atomic_read(&net->ipv4.dev_addr_genid); | ||
| 705 | |||
| 706 | return nh->nh_saddr; | ||
| 698 | } | 707 | } |
| 699 | 708 | ||
| 700 | struct fib_info *fib_create_info(struct fib_config *cfg) | 709 | struct fib_info *fib_create_info(struct fib_config *cfg) |
| @@ -705,6 +714,9 @@ struct fib_info *fib_create_info(struct fib_config *cfg) | |||
| 705 | int nhs = 1; | 714 | int nhs = 1; |
| 706 | struct net *net = cfg->fc_nlinfo.nl_net; | 715 | struct net *net = cfg->fc_nlinfo.nl_net; |
| 707 | 716 | ||
| 717 | if (cfg->fc_type > RTN_MAX) | ||
| 718 | goto err_inval; | ||
| 719 | |||
| 708 | /* Fast check to catch the most weird cases */ | 720 | /* Fast check to catch the most weird cases */ |
| 709 | if (fib_props[cfg->fc_type].scope > cfg->fc_scope) | 721 | if (fib_props[cfg->fc_type].scope > cfg->fc_scope) |
| 710 | goto err_inval; | 722 | goto err_inval; |
| @@ -718,8 +730,8 @@ struct fib_info *fib_create_info(struct fib_config *cfg) | |||
| 718 | #endif | 730 | #endif |
| 719 | 731 | ||
| 720 | err = -ENOBUFS; | 732 | err = -ENOBUFS; |
| 721 | if (fib_info_cnt >= fib_hash_size) { | 733 | if (fib_info_cnt >= fib_info_hash_size) { |
| 722 | unsigned int new_size = fib_hash_size << 1; | 734 | unsigned int new_size = fib_info_hash_size << 1; |
| 723 | struct hlist_head *new_info_hash; | 735 | struct hlist_head *new_info_hash; |
| 724 | struct hlist_head *new_laddrhash; | 736 | struct hlist_head *new_laddrhash; |
| 725 | unsigned int bytes; | 737 | unsigned int bytes; |
| @@ -727,25 +739,32 @@ struct fib_info *fib_create_info(struct fib_config *cfg) | |||
| 727 | if (!new_size) | 739 | if (!new_size) |
| 728 | new_size = 1; | 740 | new_size = 1; |
| 729 | bytes = new_size * sizeof(struct hlist_head *); | 741 | bytes = new_size * sizeof(struct hlist_head *); |
| 730 | new_info_hash = fib_hash_alloc(bytes); | 742 | new_info_hash = fib_info_hash_alloc(bytes); |
| 731 | new_laddrhash = fib_hash_alloc(bytes); | 743 | new_laddrhash = fib_info_hash_alloc(bytes); |
| 732 | if (!new_info_hash || !new_laddrhash) { | 744 | if (!new_info_hash || !new_laddrhash) { |
| 733 | fib_hash_free(new_info_hash, bytes); | 745 | fib_info_hash_free(new_info_hash, bytes); |
| 734 | fib_hash_free(new_laddrhash, bytes); | 746 | fib_info_hash_free(new_laddrhash, bytes); |
| 735 | } else | 747 | } else |
| 736 | fib_hash_move(new_info_hash, new_laddrhash, new_size); | 748 | fib_info_hash_move(new_info_hash, new_laddrhash, new_size); |
| 737 | 749 | ||
| 738 | if (!fib_hash_size) | 750 | if (!fib_info_hash_size) |
| 739 | goto failure; | 751 | goto failure; |
| 740 | } | 752 | } |
| 741 | 753 | ||
| 742 | fi = kzalloc(sizeof(*fi)+nhs*sizeof(struct fib_nh), GFP_KERNEL); | 754 | fi = kzalloc(sizeof(*fi)+nhs*sizeof(struct fib_nh), GFP_KERNEL); |
| 743 | if (fi == NULL) | 755 | if (fi == NULL) |
| 744 | goto failure; | 756 | goto failure; |
| 757 | if (cfg->fc_mx) { | ||
| 758 | fi->fib_metrics = kzalloc(sizeof(u32) * RTAX_MAX, GFP_KERNEL); | ||
| 759 | if (!fi->fib_metrics) | ||
| 760 | goto failure; | ||
| 761 | } else | ||
| 762 | fi->fib_metrics = (u32 *) dst_default_metrics; | ||
| 745 | fib_info_cnt++; | 763 | fib_info_cnt++; |
| 746 | 764 | ||
| 747 | fi->fib_net = hold_net(net); | 765 | fi->fib_net = hold_net(net); |
| 748 | fi->fib_protocol = cfg->fc_protocol; | 766 | fi->fib_protocol = cfg->fc_protocol; |
| 767 | fi->fib_scope = cfg->fc_scope; | ||
| 749 | fi->fib_flags = cfg->fc_flags; | 768 | fi->fib_flags = cfg->fc_flags; |
| 750 | fi->fib_priority = cfg->fc_priority; | 769 | fi->fib_priority = cfg->fc_priority; |
| 751 | fi->fib_prefsrc = cfg->fc_prefsrc; | 770 | fi->fib_prefsrc = cfg->fc_prefsrc; |
| @@ -779,7 +798,7 @@ struct fib_info *fib_create_info(struct fib_config *cfg) | |||
| 779 | goto err_inval; | 798 | goto err_inval; |
| 780 | if (cfg->fc_gw && fi->fib_nh->nh_gw != cfg->fc_gw) | 799 | if (cfg->fc_gw && fi->fib_nh->nh_gw != cfg->fc_gw) |
| 781 | goto err_inval; | 800 | goto err_inval; |
| 782 | #ifdef CONFIG_NET_CLS_ROUTE | 801 | #ifdef CONFIG_IP_ROUTE_CLASSID |
| 783 | if (cfg->fc_flow && fi->fib_nh->nh_tclassid != cfg->fc_flow) | 802 | if (cfg->fc_flow && fi->fib_nh->nh_tclassid != cfg->fc_flow) |
| 784 | goto err_inval; | 803 | goto err_inval; |
| 785 | #endif | 804 | #endif |
| @@ -792,7 +811,7 @@ struct fib_info *fib_create_info(struct fib_config *cfg) | |||
| 792 | nh->nh_oif = cfg->fc_oif; | 811 | nh->nh_oif = cfg->fc_oif; |
| 793 | nh->nh_gw = cfg->fc_gw; | 812 | nh->nh_gw = cfg->fc_gw; |
| 794 | nh->nh_flags = cfg->fc_flags; | 813 | nh->nh_flags = cfg->fc_flags; |
| 795 | #ifdef CONFIG_NET_CLS_ROUTE | 814 | #ifdef CONFIG_IP_ROUTE_CLASSID |
| 796 | nh->nh_tclassid = cfg->fc_flow; | 815 | nh->nh_tclassid = cfg->fc_flow; |
| 797 | #endif | 816 | #endif |
| 798 | #ifdef CONFIG_IP_ROUTE_MULTIPATH | 817 | #ifdef CONFIG_IP_ROUTE_MULTIPATH |
| @@ -804,6 +823,17 @@ struct fib_info *fib_create_info(struct fib_config *cfg) | |||
| 804 | if (cfg->fc_gw || cfg->fc_oif || cfg->fc_mp) | 823 | if (cfg->fc_gw || cfg->fc_oif || cfg->fc_mp) |
| 805 | goto err_inval; | 824 | goto err_inval; |
| 806 | goto link_it; | 825 | goto link_it; |
| 826 | } else { | ||
| 827 | switch (cfg->fc_type) { | ||
| 828 | case RTN_UNICAST: | ||
| 829 | case RTN_LOCAL: | ||
| 830 | case RTN_BROADCAST: | ||
| 831 | case RTN_ANYCAST: | ||
| 832 | case RTN_MULTICAST: | ||
| 833 | break; | ||
| 834 | default: | ||
| 835 | goto err_inval; | ||
| 836 | } | ||
| 807 | } | 837 | } |
| 808 | 838 | ||
| 809 | if (cfg->fc_scope > RT_SCOPE_HOST) | 839 | if (cfg->fc_scope > RT_SCOPE_HOST) |
| @@ -835,6 +865,10 @@ struct fib_info *fib_create_info(struct fib_config *cfg) | |||
| 835 | goto err_inval; | 865 | goto err_inval; |
| 836 | } | 866 | } |
| 837 | 867 | ||
| 868 | change_nexthops(fi) { | ||
| 869 | fib_info_update_nh_saddr(net, nexthop_nh); | ||
| 870 | } endfor_nexthops(fi) | ||
| 871 | |||
| 838 | link_it: | 872 | link_it: |
| 839 | ofi = fib_find_info(fi); | 873 | ofi = fib_find_info(fi); |
| 840 | if (ofi) { | 874 | if (ofi) { |
| @@ -880,86 +914,8 @@ failure: | |||
| 880 | return ERR_PTR(err); | 914 | return ERR_PTR(err); |
| 881 | } | 915 | } |
| 882 | 916 | ||
| 883 | /* Note! fib_semantic_match intentionally uses RCU list functions. */ | ||
| 884 | int fib_semantic_match(struct list_head *head, const struct flowi *flp, | ||
| 885 | struct fib_result *res, int prefixlen, int fib_flags) | ||
| 886 | { | ||
| 887 | struct fib_alias *fa; | ||
| 888 | int nh_sel = 0; | ||
| 889 | |||
| 890 | list_for_each_entry_rcu(fa, head, fa_list) { | ||
| 891 | int err; | ||
| 892 | |||
| 893 | if (fa->fa_tos && | ||
| 894 | fa->fa_tos != flp->fl4_tos) | ||
| 895 | continue; | ||
| 896 | |||
| 897 | if (fa->fa_scope < flp->fl4_scope) | ||
| 898 | continue; | ||
| 899 | |||
| 900 | fib_alias_accessed(fa); | ||
| 901 | |||
| 902 | err = fib_props[fa->fa_type].error; | ||
| 903 | if (err == 0) { | ||
| 904 | struct fib_info *fi = fa->fa_info; | ||
| 905 | |||
| 906 | if (fi->fib_flags & RTNH_F_DEAD) | ||
| 907 | continue; | ||
| 908 | |||
| 909 | switch (fa->fa_type) { | ||
| 910 | case RTN_UNICAST: | ||
| 911 | case RTN_LOCAL: | ||
| 912 | case RTN_BROADCAST: | ||
| 913 | case RTN_ANYCAST: | ||
| 914 | case RTN_MULTICAST: | ||
| 915 | for_nexthops(fi) { | ||
| 916 | if (nh->nh_flags & RTNH_F_DEAD) | ||
| 917 | continue; | ||
| 918 | if (!flp->oif || flp->oif == nh->nh_oif) | ||
| 919 | break; | ||
| 920 | } | ||
| 921 | #ifdef CONFIG_IP_ROUTE_MULTIPATH | ||
| 922 | if (nhsel < fi->fib_nhs) { | ||
| 923 | nh_sel = nhsel; | ||
| 924 | goto out_fill_res; | ||
| 925 | } | ||
| 926 | #else | ||
| 927 | if (nhsel < 1) | ||
| 928 | goto out_fill_res; | ||
| 929 | #endif | ||
| 930 | endfor_nexthops(fi); | ||
| 931 | continue; | ||
| 932 | |||
| 933 | default: | ||
| 934 | pr_warning("fib_semantic_match bad type %#x\n", | ||
| 935 | fa->fa_type); | ||
| 936 | return -EINVAL; | ||
| 937 | } | ||
| 938 | } | ||
| 939 | return err; | ||
| 940 | } | ||
| 941 | return 1; | ||
| 942 | |||
| 943 | out_fill_res: | ||
| 944 | res->prefixlen = prefixlen; | ||
| 945 | res->nh_sel = nh_sel; | ||
| 946 | res->type = fa->fa_type; | ||
| 947 | res->scope = fa->fa_scope; | ||
| 948 | res->fi = fa->fa_info; | ||
| 949 | if (!(fib_flags & FIB_LOOKUP_NOREF)) | ||
| 950 | atomic_inc(&res->fi->fib_clntref); | ||
| 951 | return 0; | ||
| 952 | } | ||
| 953 | |||
| 954 | /* Find appropriate source address to this destination */ | ||
| 955 | |||
| 956 | __be32 __fib_res_prefsrc(struct fib_result *res) | ||
| 957 | { | ||
| 958 | return inet_select_addr(FIB_RES_DEV(*res), FIB_RES_GW(*res), res->scope); | ||
| 959 | } | ||
| 960 | |||
| 961 | int fib_dump_info(struct sk_buff *skb, u32 pid, u32 seq, int event, | 917 | int fib_dump_info(struct sk_buff *skb, u32 pid, u32 seq, int event, |
| 962 | u32 tb_id, u8 type, u8 scope, __be32 dst, int dst_len, u8 tos, | 918 | u32 tb_id, u8 type, __be32 dst, int dst_len, u8 tos, |
| 963 | struct fib_info *fi, unsigned int flags) | 919 | struct fib_info *fi, unsigned int flags) |
| 964 | { | 920 | { |
| 965 | struct nlmsghdr *nlh; | 921 | struct nlmsghdr *nlh; |
| @@ -981,7 +937,7 @@ int fib_dump_info(struct sk_buff *skb, u32 pid, u32 seq, int event, | |||
| 981 | NLA_PUT_U32(skb, RTA_TABLE, tb_id); | 937 | NLA_PUT_U32(skb, RTA_TABLE, tb_id); |
| 982 | rtm->rtm_type = type; | 938 | rtm->rtm_type = type; |
| 983 | rtm->rtm_flags = fi->fib_flags; | 939 | rtm->rtm_flags = fi->fib_flags; |
| 984 | rtm->rtm_scope = scope; | 940 | rtm->rtm_scope = fi->fib_scope; |
| 985 | rtm->rtm_protocol = fi->fib_protocol; | 941 | rtm->rtm_protocol = fi->fib_protocol; |
| 986 | 942 | ||
| 987 | if (rtm->rtm_dst_len) | 943 | if (rtm->rtm_dst_len) |
| @@ -1002,7 +958,7 @@ int fib_dump_info(struct sk_buff *skb, u32 pid, u32 seq, int event, | |||
| 1002 | 958 | ||
| 1003 | if (fi->fib_nh->nh_oif) | 959 | if (fi->fib_nh->nh_oif) |
| 1004 | NLA_PUT_U32(skb, RTA_OIF, fi->fib_nh->nh_oif); | 960 | NLA_PUT_U32(skb, RTA_OIF, fi->fib_nh->nh_oif); |
| 1005 | #ifdef CONFIG_NET_CLS_ROUTE | 961 | #ifdef CONFIG_IP_ROUTE_CLASSID |
| 1006 | if (fi->fib_nh[0].nh_tclassid) | 962 | if (fi->fib_nh[0].nh_tclassid) |
| 1007 | NLA_PUT_U32(skb, RTA_FLOW, fi->fib_nh[0].nh_tclassid); | 963 | NLA_PUT_U32(skb, RTA_FLOW, fi->fib_nh[0].nh_tclassid); |
| 1008 | #endif | 964 | #endif |
| @@ -1027,7 +983,7 @@ int fib_dump_info(struct sk_buff *skb, u32 pid, u32 seq, int event, | |||
| 1027 | 983 | ||
| 1028 | if (nh->nh_gw) | 984 | if (nh->nh_gw) |
| 1029 | NLA_PUT_BE32(skb, RTA_GATEWAY, nh->nh_gw); | 985 | NLA_PUT_BE32(skb, RTA_GATEWAY, nh->nh_gw); |
| 1030 | #ifdef CONFIG_NET_CLS_ROUTE | 986 | #ifdef CONFIG_IP_ROUTE_CLASSID |
| 1031 | if (nh->nh_tclassid) | 987 | if (nh->nh_tclassid) |
| 1032 | NLA_PUT_U32(skb, RTA_FLOW, nh->nh_tclassid); | 988 | NLA_PUT_U32(skb, RTA_FLOW, nh->nh_tclassid); |
| 1033 | #endif | 989 | #endif |
| @@ -1125,6 +1081,62 @@ int fib_sync_down_dev(struct net_device *dev, int force) | |||
| 1125 | return ret; | 1081 | return ret; |
| 1126 | } | 1082 | } |
| 1127 | 1083 | ||
| 1084 | /* Must be invoked inside of an RCU protected region. */ | ||
| 1085 | void fib_select_default(struct fib_result *res) | ||
| 1086 | { | ||
| 1087 | struct fib_info *fi = NULL, *last_resort = NULL; | ||
| 1088 | struct list_head *fa_head = res->fa_head; | ||
| 1089 | struct fib_table *tb = res->table; | ||
| 1090 | int order = -1, last_idx = -1; | ||
| 1091 | struct fib_alias *fa; | ||
| 1092 | |||
| 1093 | list_for_each_entry_rcu(fa, fa_head, fa_list) { | ||
| 1094 | struct fib_info *next_fi = fa->fa_info; | ||
| 1095 | |||
| 1096 | if (next_fi->fib_scope != res->scope || | ||
| 1097 | fa->fa_type != RTN_UNICAST) | ||
| 1098 | continue; | ||
| 1099 | |||
| 1100 | if (next_fi->fib_priority > res->fi->fib_priority) | ||
| 1101 | break; | ||
| 1102 | if (!next_fi->fib_nh[0].nh_gw || | ||
| 1103 | next_fi->fib_nh[0].nh_scope != RT_SCOPE_LINK) | ||
| 1104 | continue; | ||
| 1105 | |||
| 1106 | fib_alias_accessed(fa); | ||
| 1107 | |||
| 1108 | if (fi == NULL) { | ||
| 1109 | if (next_fi != res->fi) | ||
| 1110 | break; | ||
| 1111 | } else if (!fib_detect_death(fi, order, &last_resort, | ||
| 1112 | &last_idx, tb->tb_default)) { | ||
| 1113 | fib_result_assign(res, fi); | ||
| 1114 | tb->tb_default = order; | ||
| 1115 | goto out; | ||
| 1116 | } | ||
| 1117 | fi = next_fi; | ||
| 1118 | order++; | ||
| 1119 | } | ||
| 1120 | |||
| 1121 | if (order <= 0 || fi == NULL) { | ||
| 1122 | tb->tb_default = -1; | ||
| 1123 | goto out; | ||
| 1124 | } | ||
| 1125 | |||
| 1126 | if (!fib_detect_death(fi, order, &last_resort, &last_idx, | ||
| 1127 | tb->tb_default)) { | ||
| 1128 | fib_result_assign(res, fi); | ||
| 1129 | tb->tb_default = order; | ||
| 1130 | goto out; | ||
| 1131 | } | ||
| 1132 | |||
| 1133 | if (last_idx >= 0) | ||
| 1134 | fib_result_assign(res, last_resort); | ||
| 1135 | tb->tb_default = last_idx; | ||
| 1136 | out: | ||
| 1137 | return; | ||
| 1138 | } | ||
| 1139 | |||
| 1128 | #ifdef CONFIG_IP_ROUTE_MULTIPATH | 1140 | #ifdef CONFIG_IP_ROUTE_MULTIPATH |
| 1129 | 1141 | ||
| 1130 | /* | 1142 | /* |
| @@ -1189,7 +1201,7 @@ int fib_sync_up(struct net_device *dev) | |||
| 1189 | * The algorithm is suboptimal, but it provides really | 1201 | * The algorithm is suboptimal, but it provides really |
| 1190 | * fair weighted route distribution. | 1202 | * fair weighted route distribution. |
| 1191 | */ | 1203 | */ |
| 1192 | void fib_select_multipath(const struct flowi *flp, struct fib_result *res) | 1204 | void fib_select_multipath(struct fib_result *res) |
| 1193 | { | 1205 | { |
| 1194 | struct fib_info *fi = res->fi; | 1206 | struct fib_info *fi = res->fi; |
| 1195 | int w; | 1207 | int w; |
diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c index 0f280348e0fd..b92c86f6e9b3 100644 --- a/net/ipv4/fib_trie.c +++ b/net/ipv4/fib_trie.c | |||
| @@ -95,7 +95,7 @@ typedef unsigned int t_key; | |||
| 95 | #define IS_TNODE(n) (!(n->parent & T_LEAF)) | 95 | #define IS_TNODE(n) (!(n->parent & T_LEAF)) |
| 96 | #define IS_LEAF(n) (n->parent & T_LEAF) | 96 | #define IS_LEAF(n) (n->parent & T_LEAF) |
| 97 | 97 | ||
| 98 | struct node { | 98 | struct rt_trie_node { |
| 99 | unsigned long parent; | 99 | unsigned long parent; |
| 100 | t_key key; | 100 | t_key key; |
| 101 | }; | 101 | }; |
| @@ -126,7 +126,7 @@ struct tnode { | |||
| 126 | struct work_struct work; | 126 | struct work_struct work; |
| 127 | struct tnode *tnode_free; | 127 | struct tnode *tnode_free; |
| 128 | }; | 128 | }; |
| 129 | struct node *child[0]; | 129 | struct rt_trie_node *child[0]; |
| 130 | }; | 130 | }; |
| 131 | 131 | ||
| 132 | #ifdef CONFIG_IP_FIB_TRIE_STATS | 132 | #ifdef CONFIG_IP_FIB_TRIE_STATS |
| @@ -151,16 +151,16 @@ struct trie_stat { | |||
| 151 | }; | 151 | }; |
| 152 | 152 | ||
| 153 | struct trie { | 153 | struct trie { |
| 154 | struct node *trie; | 154 | struct rt_trie_node *trie; |
| 155 | #ifdef CONFIG_IP_FIB_TRIE_STATS | 155 | #ifdef CONFIG_IP_FIB_TRIE_STATS |
| 156 | struct trie_use_stats stats; | 156 | struct trie_use_stats stats; |
| 157 | #endif | 157 | #endif |
| 158 | }; | 158 | }; |
| 159 | 159 | ||
| 160 | static void put_child(struct trie *t, struct tnode *tn, int i, struct node *n); | 160 | static void put_child(struct trie *t, struct tnode *tn, int i, struct rt_trie_node *n); |
| 161 | static void tnode_put_child_reorg(struct tnode *tn, int i, struct node *n, | 161 | static void tnode_put_child_reorg(struct tnode *tn, int i, struct rt_trie_node *n, |
| 162 | int wasfull); | 162 | int wasfull); |
| 163 | static struct node *resize(struct trie *t, struct tnode *tn); | 163 | static struct rt_trie_node *resize(struct trie *t, struct tnode *tn); |
| 164 | static struct tnode *inflate(struct trie *t, struct tnode *tn); | 164 | static struct tnode *inflate(struct trie *t, struct tnode *tn); |
| 165 | static struct tnode *halve(struct trie *t, struct tnode *tn); | 165 | static struct tnode *halve(struct trie *t, struct tnode *tn); |
| 166 | /* tnodes to free after resize(); protected by RTNL */ | 166 | /* tnodes to free after resize(); protected by RTNL */ |
| @@ -177,12 +177,12 @@ static const int sync_pages = 128; | |||
| 177 | static struct kmem_cache *fn_alias_kmem __read_mostly; | 177 | static struct kmem_cache *fn_alias_kmem __read_mostly; |
| 178 | static struct kmem_cache *trie_leaf_kmem __read_mostly; | 178 | static struct kmem_cache *trie_leaf_kmem __read_mostly; |
| 179 | 179 | ||
| 180 | static inline struct tnode *node_parent(struct node *node) | 180 | static inline struct tnode *node_parent(struct rt_trie_node *node) |
| 181 | { | 181 | { |
| 182 | return (struct tnode *)(node->parent & ~NODE_TYPE_MASK); | 182 | return (struct tnode *)(node->parent & ~NODE_TYPE_MASK); |
| 183 | } | 183 | } |
| 184 | 184 | ||
| 185 | static inline struct tnode *node_parent_rcu(struct node *node) | 185 | static inline struct tnode *node_parent_rcu(struct rt_trie_node *node) |
| 186 | { | 186 | { |
| 187 | struct tnode *ret = node_parent(node); | 187 | struct tnode *ret = node_parent(node); |
| 188 | 188 | ||
| @@ -192,22 +192,22 @@ static inline struct tnode *node_parent_rcu(struct node *node) | |||
| 192 | /* Same as rcu_assign_pointer | 192 | /* Same as rcu_assign_pointer |
| 193 | * but that macro() assumes that value is a pointer. | 193 | * but that macro() assumes that value is a pointer. |
| 194 | */ | 194 | */ |
| 195 | static inline void node_set_parent(struct node *node, struct tnode *ptr) | 195 | static inline void node_set_parent(struct rt_trie_node *node, struct tnode *ptr) |
| 196 | { | 196 | { |
| 197 | smp_wmb(); | 197 | smp_wmb(); |
| 198 | node->parent = (unsigned long)ptr | NODE_TYPE(node); | 198 | node->parent = (unsigned long)ptr | NODE_TYPE(node); |
| 199 | } | 199 | } |
| 200 | 200 | ||
| 201 | static inline struct node *tnode_get_child(struct tnode *tn, unsigned int i) | 201 | static inline struct rt_trie_node *tnode_get_child(struct tnode *tn, unsigned int i) |
| 202 | { | 202 | { |
| 203 | BUG_ON(i >= 1U << tn->bits); | 203 | BUG_ON(i >= 1U << tn->bits); |
| 204 | 204 | ||
| 205 | return tn->child[i]; | 205 | return tn->child[i]; |
| 206 | } | 206 | } |
| 207 | 207 | ||
| 208 | static inline struct node *tnode_get_child_rcu(struct tnode *tn, unsigned int i) | 208 | static inline struct rt_trie_node *tnode_get_child_rcu(struct tnode *tn, unsigned int i) |
| 209 | { | 209 | { |
| 210 | struct node *ret = tnode_get_child(tn, i); | 210 | struct rt_trie_node *ret = tnode_get_child(tn, i); |
| 211 | 211 | ||
| 212 | return rcu_dereference_rtnl(ret); | 212 | return rcu_dereference_rtnl(ret); |
| 213 | } | 213 | } |
| @@ -217,12 +217,12 @@ static inline int tnode_child_length(const struct tnode *tn) | |||
| 217 | return 1 << tn->bits; | 217 | return 1 << tn->bits; |
| 218 | } | 218 | } |
| 219 | 219 | ||
| 220 | static inline t_key mask_pfx(t_key k, unsigned short l) | 220 | static inline t_key mask_pfx(t_key k, unsigned int l) |
| 221 | { | 221 | { |
| 222 | return (l == 0) ? 0 : k >> (KEYLENGTH-l) << (KEYLENGTH-l); | 222 | return (l == 0) ? 0 : k >> (KEYLENGTH-l) << (KEYLENGTH-l); |
| 223 | } | 223 | } |
| 224 | 224 | ||
| 225 | static inline t_key tkey_extract_bits(t_key a, int offset, int bits) | 225 | static inline t_key tkey_extract_bits(t_key a, unsigned int offset, unsigned int bits) |
| 226 | { | 226 | { |
| 227 | if (offset < KEYLENGTH) | 227 | if (offset < KEYLENGTH) |
| 228 | return ((t_key)(a << offset)) >> (KEYLENGTH - bits); | 228 | return ((t_key)(a << offset)) >> (KEYLENGTH - bits); |
| @@ -378,7 +378,7 @@ static void __tnode_free_rcu(struct rcu_head *head) | |||
| 378 | { | 378 | { |
| 379 | struct tnode *tn = container_of(head, struct tnode, rcu); | 379 | struct tnode *tn = container_of(head, struct tnode, rcu); |
| 380 | size_t size = sizeof(struct tnode) + | 380 | size_t size = sizeof(struct tnode) + |
| 381 | (sizeof(struct node *) << tn->bits); | 381 | (sizeof(struct rt_trie_node *) << tn->bits); |
| 382 | 382 | ||
| 383 | if (size <= PAGE_SIZE) | 383 | if (size <= PAGE_SIZE) |
| 384 | kfree(tn); | 384 | kfree(tn); |
| @@ -402,7 +402,7 @@ static void tnode_free_safe(struct tnode *tn) | |||
| 402 | tn->tnode_free = tnode_free_head; | 402 | tn->tnode_free = tnode_free_head; |
| 403 | tnode_free_head = tn; | 403 | tnode_free_head = tn; |
| 404 | tnode_free_size += sizeof(struct tnode) + | 404 | tnode_free_size += sizeof(struct tnode) + |
| 405 | (sizeof(struct node *) << tn->bits); | 405 | (sizeof(struct rt_trie_node *) << tn->bits); |
| 406 | } | 406 | } |
| 407 | 407 | ||
| 408 | static void tnode_free_flush(void) | 408 | static void tnode_free_flush(void) |
| @@ -443,7 +443,7 @@ static struct leaf_info *leaf_info_new(int plen) | |||
| 443 | 443 | ||
| 444 | static struct tnode *tnode_new(t_key key, int pos, int bits) | 444 | static struct tnode *tnode_new(t_key key, int pos, int bits) |
| 445 | { | 445 | { |
| 446 | size_t sz = sizeof(struct tnode) + (sizeof(struct node *) << bits); | 446 | size_t sz = sizeof(struct tnode) + (sizeof(struct rt_trie_node *) << bits); |
| 447 | struct tnode *tn = tnode_alloc(sz); | 447 | struct tnode *tn = tnode_alloc(sz); |
| 448 | 448 | ||
| 449 | if (tn) { | 449 | if (tn) { |
| @@ -456,7 +456,7 @@ static struct tnode *tnode_new(t_key key, int pos, int bits) | |||
| 456 | } | 456 | } |
| 457 | 457 | ||
| 458 | pr_debug("AT %p s=%zu %zu\n", tn, sizeof(struct tnode), | 458 | pr_debug("AT %p s=%zu %zu\n", tn, sizeof(struct tnode), |
| 459 | sizeof(struct node) << bits); | 459 | sizeof(struct rt_trie_node) << bits); |
| 460 | return tn; | 460 | return tn; |
| 461 | } | 461 | } |
| 462 | 462 | ||
| @@ -465,7 +465,7 @@ static struct tnode *tnode_new(t_key key, int pos, int bits) | |||
| 465 | * and no bits are skipped. See discussion in dyntree paper p. 6 | 465 | * and no bits are skipped. See discussion in dyntree paper p. 6 |
| 466 | */ | 466 | */ |
| 467 | 467 | ||
| 468 | static inline int tnode_full(const struct tnode *tn, const struct node *n) | 468 | static inline int tnode_full(const struct tnode *tn, const struct rt_trie_node *n) |
| 469 | { | 469 | { |
| 470 | if (n == NULL || IS_LEAF(n)) | 470 | if (n == NULL || IS_LEAF(n)) |
| 471 | return 0; | 471 | return 0; |
| @@ -474,7 +474,7 @@ static inline int tnode_full(const struct tnode *tn, const struct node *n) | |||
| 474 | } | 474 | } |
| 475 | 475 | ||
| 476 | static inline void put_child(struct trie *t, struct tnode *tn, int i, | 476 | static inline void put_child(struct trie *t, struct tnode *tn, int i, |
| 477 | struct node *n) | 477 | struct rt_trie_node *n) |
| 478 | { | 478 | { |
| 479 | tnode_put_child_reorg(tn, i, n, -1); | 479 | tnode_put_child_reorg(tn, i, n, -1); |
| 480 | } | 480 | } |
| @@ -484,10 +484,10 @@ static inline void put_child(struct trie *t, struct tnode *tn, int i, | |||
| 484 | * Update the value of full_children and empty_children. | 484 | * Update the value of full_children and empty_children. |
| 485 | */ | 485 | */ |
| 486 | 486 | ||
| 487 | static void tnode_put_child_reorg(struct tnode *tn, int i, struct node *n, | 487 | static void tnode_put_child_reorg(struct tnode *tn, int i, struct rt_trie_node *n, |
| 488 | int wasfull) | 488 | int wasfull) |
| 489 | { | 489 | { |
| 490 | struct node *chi = tn->child[i]; | 490 | struct rt_trie_node *chi = tn->child[i]; |
| 491 | int isfull; | 491 | int isfull; |
| 492 | 492 | ||
| 493 | BUG_ON(i >= 1<<tn->bits); | 493 | BUG_ON(i >= 1<<tn->bits); |
| @@ -515,7 +515,7 @@ static void tnode_put_child_reorg(struct tnode *tn, int i, struct node *n, | |||
| 515 | } | 515 | } |
| 516 | 516 | ||
| 517 | #define MAX_WORK 10 | 517 | #define MAX_WORK 10 |
| 518 | static struct node *resize(struct trie *t, struct tnode *tn) | 518 | static struct rt_trie_node *resize(struct trie *t, struct tnode *tn) |
| 519 | { | 519 | { |
| 520 | int i; | 520 | int i; |
| 521 | struct tnode *old_tn; | 521 | struct tnode *old_tn; |
| @@ -605,7 +605,7 @@ static struct node *resize(struct trie *t, struct tnode *tn) | |||
| 605 | 605 | ||
| 606 | /* Keep root node larger */ | 606 | /* Keep root node larger */ |
| 607 | 607 | ||
| 608 | if (!node_parent((struct node *)tn)) { | 608 | if (!node_parent((struct rt_trie_node *)tn)) { |
| 609 | inflate_threshold_use = inflate_threshold_root; | 609 | inflate_threshold_use = inflate_threshold_root; |
| 610 | halve_threshold_use = halve_threshold_root; | 610 | halve_threshold_use = halve_threshold_root; |
| 611 | } else { | 611 | } else { |
| @@ -635,7 +635,7 @@ static struct node *resize(struct trie *t, struct tnode *tn) | |||
| 635 | 635 | ||
| 636 | /* Return if at least one inflate is run */ | 636 | /* Return if at least one inflate is run */ |
| 637 | if (max_work != MAX_WORK) | 637 | if (max_work != MAX_WORK) |
| 638 | return (struct node *) tn; | 638 | return (struct rt_trie_node *) tn; |
| 639 | 639 | ||
| 640 | /* | 640 | /* |
| 641 | * Halve as long as the number of empty children in this | 641 | * Halve as long as the number of empty children in this |
| @@ -663,7 +663,7 @@ static struct node *resize(struct trie *t, struct tnode *tn) | |||
| 663 | if (tn->empty_children == tnode_child_length(tn) - 1) { | 663 | if (tn->empty_children == tnode_child_length(tn) - 1) { |
| 664 | one_child: | 664 | one_child: |
| 665 | for (i = 0; i < tnode_child_length(tn); i++) { | 665 | for (i = 0; i < tnode_child_length(tn); i++) { |
| 666 | struct node *n; | 666 | struct rt_trie_node *n; |
| 667 | 667 | ||
| 668 | n = tn->child[i]; | 668 | n = tn->child[i]; |
| 669 | if (!n) | 669 | if (!n) |
| @@ -676,7 +676,7 @@ one_child: | |||
| 676 | return n; | 676 | return n; |
| 677 | } | 677 | } |
| 678 | } | 678 | } |
| 679 | return (struct node *) tn; | 679 | return (struct rt_trie_node *) tn; |
| 680 | } | 680 | } |
| 681 | 681 | ||
| 682 | static struct tnode *inflate(struct trie *t, struct tnode *tn) | 682 | static struct tnode *inflate(struct trie *t, struct tnode *tn) |
| @@ -723,14 +723,14 @@ static struct tnode *inflate(struct trie *t, struct tnode *tn) | |||
| 723 | goto nomem; | 723 | goto nomem; |
| 724 | } | 724 | } |
| 725 | 725 | ||
| 726 | put_child(t, tn, 2*i, (struct node *) left); | 726 | put_child(t, tn, 2*i, (struct rt_trie_node *) left); |
| 727 | put_child(t, tn, 2*i+1, (struct node *) right); | 727 | put_child(t, tn, 2*i+1, (struct rt_trie_node *) right); |
| 728 | } | 728 | } |
| 729 | } | 729 | } |
| 730 | 730 | ||
| 731 | for (i = 0; i < olen; i++) { | 731 | for (i = 0; i < olen; i++) { |
| 732 | struct tnode *inode; | 732 | struct tnode *inode; |
| 733 | struct node *node = tnode_get_child(oldtnode, i); | 733 | struct rt_trie_node *node = tnode_get_child(oldtnode, i); |
| 734 | struct tnode *left, *right; | 734 | struct tnode *left, *right; |
| 735 | int size, j; | 735 | int size, j; |
| 736 | 736 | ||
| @@ -825,7 +825,7 @@ nomem: | |||
| 825 | static struct tnode *halve(struct trie *t, struct tnode *tn) | 825 | static struct tnode *halve(struct trie *t, struct tnode *tn) |
| 826 | { | 826 | { |
| 827 | struct tnode *oldtnode = tn; | 827 | struct tnode *oldtnode = tn; |
| 828 | struct node *left, *right; | 828 | struct rt_trie_node *left, *right; |
| 829 | int i; | 829 | int i; |
| 830 | int olen = tnode_child_length(tn); | 830 | int olen = tnode_child_length(tn); |
| 831 | 831 | ||
| @@ -856,7 +856,7 @@ static struct tnode *halve(struct trie *t, struct tnode *tn) | |||
| 856 | if (!newn) | 856 | if (!newn) |
| 857 | goto nomem; | 857 | goto nomem; |
| 858 | 858 | ||
| 859 | put_child(t, tn, i/2, (struct node *)newn); | 859 | put_child(t, tn, i/2, (struct rt_trie_node *)newn); |
| 860 | } | 860 | } |
| 861 | 861 | ||
| 862 | } | 862 | } |
| @@ -958,7 +958,7 @@ fib_find_node(struct trie *t, u32 key) | |||
| 958 | { | 958 | { |
| 959 | int pos; | 959 | int pos; |
| 960 | struct tnode *tn; | 960 | struct tnode *tn; |
| 961 | struct node *n; | 961 | struct rt_trie_node *n; |
| 962 | 962 | ||
| 963 | pos = 0; | 963 | pos = 0; |
| 964 | n = rcu_dereference_rtnl(t->trie); | 964 | n = rcu_dereference_rtnl(t->trie); |
| @@ -993,17 +993,17 @@ static void trie_rebalance(struct trie *t, struct tnode *tn) | |||
| 993 | 993 | ||
| 994 | key = tn->key; | 994 | key = tn->key; |
| 995 | 995 | ||
| 996 | while (tn != NULL && (tp = node_parent((struct node *)tn)) != NULL) { | 996 | while (tn != NULL && (tp = node_parent((struct rt_trie_node *)tn)) != NULL) { |
| 997 | cindex = tkey_extract_bits(key, tp->pos, tp->bits); | 997 | cindex = tkey_extract_bits(key, tp->pos, tp->bits); |
| 998 | wasfull = tnode_full(tp, tnode_get_child(tp, cindex)); | 998 | wasfull = tnode_full(tp, tnode_get_child(tp, cindex)); |
| 999 | tn = (struct tnode *) resize(t, (struct tnode *)tn); | 999 | tn = (struct tnode *) resize(t, (struct tnode *)tn); |
| 1000 | 1000 | ||
| 1001 | tnode_put_child_reorg((struct tnode *)tp, cindex, | 1001 | tnode_put_child_reorg((struct tnode *)tp, cindex, |
| 1002 | (struct node *)tn, wasfull); | 1002 | (struct rt_trie_node *)tn, wasfull); |
| 1003 | 1003 | ||
| 1004 | tp = node_parent((struct node *) tn); | 1004 | tp = node_parent((struct rt_trie_node *) tn); |
| 1005 | if (!tp) | 1005 | if (!tp) |
| 1006 | rcu_assign_pointer(t->trie, (struct node *)tn); | 1006 | rcu_assign_pointer(t->trie, (struct rt_trie_node *)tn); |
| 1007 | 1007 | ||
| 1008 | tnode_free_flush(); | 1008 | tnode_free_flush(); |
| 1009 | if (!tp) | 1009 | if (!tp) |
| @@ -1015,7 +1015,7 @@ static void trie_rebalance(struct trie *t, struct tnode *tn) | |||
| 1015 | if (IS_TNODE(tn)) | 1015 | if (IS_TNODE(tn)) |
| 1016 | tn = (struct tnode *)resize(t, (struct tnode *)tn); | 1016 | tn = (struct tnode *)resize(t, (struct tnode *)tn); |
| 1017 | 1017 | ||
| 1018 | rcu_assign_pointer(t->trie, (struct node *)tn); | 1018 | rcu_assign_pointer(t->trie, (struct rt_trie_node *)tn); |
| 1019 | tnode_free_flush(); | 1019 | tnode_free_flush(); |
| 1020 | } | 1020 | } |
| 1021 | 1021 | ||
| @@ -1025,7 +1025,7 @@ static struct list_head *fib_insert_node(struct trie *t, u32 key, int plen) | |||
| 1025 | { | 1025 | { |
| 1026 | int pos, newpos; | 1026 | int pos, newpos; |
| 1027 | struct tnode *tp = NULL, *tn = NULL; | 1027 | struct tnode *tp = NULL, *tn = NULL; |
| 1028 | struct node *n; | 1028 | struct rt_trie_node *n; |
| 1029 | struct leaf *l; | 1029 | struct leaf *l; |
| 1030 | int missbit; | 1030 | int missbit; |
| 1031 | struct list_head *fa_head = NULL; | 1031 | struct list_head *fa_head = NULL; |
| @@ -1111,10 +1111,10 @@ static struct list_head *fib_insert_node(struct trie *t, u32 key, int plen) | |||
| 1111 | if (t->trie && n == NULL) { | 1111 | if (t->trie && n == NULL) { |
| 1112 | /* Case 2: n is NULL, and will just insert a new leaf */ | 1112 | /* Case 2: n is NULL, and will just insert a new leaf */ |
| 1113 | 1113 | ||
| 1114 | node_set_parent((struct node *)l, tp); | 1114 | node_set_parent((struct rt_trie_node *)l, tp); |
| 1115 | 1115 | ||
| 1116 | cindex = tkey_extract_bits(key, tp->pos, tp->bits); | 1116 | cindex = tkey_extract_bits(key, tp->pos, tp->bits); |
| 1117 | put_child(t, (struct tnode *)tp, cindex, (struct node *)l); | 1117 | put_child(t, (struct tnode *)tp, cindex, (struct rt_trie_node *)l); |
| 1118 | } else { | 1118 | } else { |
| 1119 | /* Case 3: n is a LEAF or a TNODE and the key doesn't match. */ | 1119 | /* Case 3: n is a LEAF or a TNODE and the key doesn't match. */ |
| 1120 | /* | 1120 | /* |
| @@ -1141,18 +1141,18 @@ static struct list_head *fib_insert_node(struct trie *t, u32 key, int plen) | |||
| 1141 | return NULL; | 1141 | return NULL; |
| 1142 | } | 1142 | } |
| 1143 | 1143 | ||
| 1144 | node_set_parent((struct node *)tn, tp); | 1144 | node_set_parent((struct rt_trie_node *)tn, tp); |
| 1145 | 1145 | ||
| 1146 | missbit = tkey_extract_bits(key, newpos, 1); | 1146 | missbit = tkey_extract_bits(key, newpos, 1); |
| 1147 | put_child(t, tn, missbit, (struct node *)l); | 1147 | put_child(t, tn, missbit, (struct rt_trie_node *)l); |
| 1148 | put_child(t, tn, 1-missbit, n); | 1148 | put_child(t, tn, 1-missbit, n); |
| 1149 | 1149 | ||
| 1150 | if (tp) { | 1150 | if (tp) { |
| 1151 | cindex = tkey_extract_bits(key, tp->pos, tp->bits); | 1151 | cindex = tkey_extract_bits(key, tp->pos, tp->bits); |
| 1152 | put_child(t, (struct tnode *)tp, cindex, | 1152 | put_child(t, (struct tnode *)tp, cindex, |
| 1153 | (struct node *)tn); | 1153 | (struct rt_trie_node *)tn); |
| 1154 | } else { | 1154 | } else { |
| 1155 | rcu_assign_pointer(t->trie, (struct node *)tn); | 1155 | rcu_assign_pointer(t->trie, (struct rt_trie_node *)tn); |
| 1156 | tp = tn; | 1156 | tp = tn; |
| 1157 | } | 1157 | } |
| 1158 | } | 1158 | } |
| @@ -1245,7 +1245,6 @@ int fib_table_insert(struct fib_table *tb, struct fib_config *cfg) | |||
| 1245 | if (fa->fa_info->fib_priority != fi->fib_priority) | 1245 | if (fa->fa_info->fib_priority != fi->fib_priority) |
| 1246 | break; | 1246 | break; |
| 1247 | if (fa->fa_type == cfg->fc_type && | 1247 | if (fa->fa_type == cfg->fc_type && |
| 1248 | fa->fa_scope == cfg->fc_scope && | ||
| 1249 | fa->fa_info == fi) { | 1248 | fa->fa_info == fi) { |
| 1250 | fa_match = fa; | 1249 | fa_match = fa; |
| 1251 | break; | 1250 | break; |
| @@ -1271,7 +1270,6 @@ int fib_table_insert(struct fib_table *tb, struct fib_config *cfg) | |||
| 1271 | new_fa->fa_tos = fa->fa_tos; | 1270 | new_fa->fa_tos = fa->fa_tos; |
| 1272 | new_fa->fa_info = fi; | 1271 | new_fa->fa_info = fi; |
| 1273 | new_fa->fa_type = cfg->fc_type; | 1272 | new_fa->fa_type = cfg->fc_type; |
| 1274 | new_fa->fa_scope = cfg->fc_scope; | ||
| 1275 | state = fa->fa_state; | 1273 | state = fa->fa_state; |
| 1276 | new_fa->fa_state = state & ~FA_S_ACCESSED; | 1274 | new_fa->fa_state = state & ~FA_S_ACCESSED; |
| 1277 | 1275 | ||
| @@ -1308,7 +1306,6 @@ int fib_table_insert(struct fib_table *tb, struct fib_config *cfg) | |||
| 1308 | new_fa->fa_info = fi; | 1306 | new_fa->fa_info = fi; |
| 1309 | new_fa->fa_tos = tos; | 1307 | new_fa->fa_tos = tos; |
| 1310 | new_fa->fa_type = cfg->fc_type; | 1308 | new_fa->fa_type = cfg->fc_type; |
| 1311 | new_fa->fa_scope = cfg->fc_scope; | ||
| 1312 | new_fa->fa_state = 0; | 1309 | new_fa->fa_state = 0; |
| 1313 | /* | 1310 | /* |
| 1314 | * Insert new entry to the list. | 1311 | * Insert new entry to the list. |
| @@ -1340,8 +1337,8 @@ err: | |||
| 1340 | } | 1337 | } |
| 1341 | 1338 | ||
| 1342 | /* should be called with rcu_read_lock */ | 1339 | /* should be called with rcu_read_lock */ |
| 1343 | static int check_leaf(struct trie *t, struct leaf *l, | 1340 | static int check_leaf(struct fib_table *tb, struct trie *t, struct leaf *l, |
| 1344 | t_key key, const struct flowi *flp, | 1341 | t_key key, const struct flowi4 *flp, |
| 1345 | struct fib_result *res, int fib_flags) | 1342 | struct fib_result *res, int fib_flags) |
| 1346 | { | 1343 | { |
| 1347 | struct leaf_info *li; | 1344 | struct leaf_info *li; |
| @@ -1349,40 +1346,75 @@ static int check_leaf(struct trie *t, struct leaf *l, | |||
| 1349 | struct hlist_node *node; | 1346 | struct hlist_node *node; |
| 1350 | 1347 | ||
| 1351 | hlist_for_each_entry_rcu(li, node, hhead, hlist) { | 1348 | hlist_for_each_entry_rcu(li, node, hhead, hlist) { |
| 1352 | int err; | 1349 | struct fib_alias *fa; |
| 1353 | int plen = li->plen; | 1350 | int plen = li->plen; |
| 1354 | __be32 mask = inet_make_mask(plen); | 1351 | __be32 mask = inet_make_mask(plen); |
| 1355 | 1352 | ||
| 1356 | if (l->key != (key & ntohl(mask))) | 1353 | if (l->key != (key & ntohl(mask))) |
| 1357 | continue; | 1354 | continue; |
| 1358 | 1355 | ||
| 1359 | err = fib_semantic_match(&li->falh, flp, res, plen, fib_flags); | 1356 | list_for_each_entry_rcu(fa, &li->falh, fa_list) { |
| 1357 | struct fib_info *fi = fa->fa_info; | ||
| 1358 | int nhsel, err; | ||
| 1360 | 1359 | ||
| 1360 | if (fa->fa_tos && fa->fa_tos != flp->flowi4_tos) | ||
| 1361 | continue; | ||
| 1362 | if (fa->fa_info->fib_scope < flp->flowi4_scope) | ||
| 1363 | continue; | ||
| 1364 | fib_alias_accessed(fa); | ||
| 1365 | err = fib_props[fa->fa_type].error; | ||
| 1366 | if (err) { | ||
| 1361 | #ifdef CONFIG_IP_FIB_TRIE_STATS | 1367 | #ifdef CONFIG_IP_FIB_TRIE_STATS |
| 1362 | if (err <= 0) | 1368 | t->stats.semantic_match_passed++; |
| 1363 | t->stats.semantic_match_passed++; | 1369 | #endif |
| 1364 | else | 1370 | return err; |
| 1365 | t->stats.semantic_match_miss++; | 1371 | } |
| 1372 | if (fi->fib_flags & RTNH_F_DEAD) | ||
| 1373 | continue; | ||
| 1374 | for (nhsel = 0; nhsel < fi->fib_nhs; nhsel++) { | ||
| 1375 | const struct fib_nh *nh = &fi->fib_nh[nhsel]; | ||
| 1376 | |||
| 1377 | if (nh->nh_flags & RTNH_F_DEAD) | ||
| 1378 | continue; | ||
| 1379 | if (flp->flowi4_oif && flp->flowi4_oif != nh->nh_oif) | ||
| 1380 | continue; | ||
| 1381 | |||
| 1382 | #ifdef CONFIG_IP_FIB_TRIE_STATS | ||
| 1383 | t->stats.semantic_match_passed++; | ||
| 1384 | #endif | ||
| 1385 | res->prefixlen = plen; | ||
| 1386 | res->nh_sel = nhsel; | ||
| 1387 | res->type = fa->fa_type; | ||
| 1388 | res->scope = fa->fa_info->fib_scope; | ||
| 1389 | res->fi = fi; | ||
| 1390 | res->table = tb; | ||
| 1391 | res->fa_head = &li->falh; | ||
| 1392 | if (!(fib_flags & FIB_LOOKUP_NOREF)) | ||
| 1393 | atomic_inc(&res->fi->fib_clntref); | ||
| 1394 | return 0; | ||
| 1395 | } | ||
| 1396 | } | ||
| 1397 | |||
| 1398 | #ifdef CONFIG_IP_FIB_TRIE_STATS | ||
| 1399 | t->stats.semantic_match_miss++; | ||
| 1366 | #endif | 1400 | #endif |
| 1367 | if (err <= 0) | ||
| 1368 | return err; | ||
| 1369 | } | 1401 | } |
| 1370 | 1402 | ||
| 1371 | return 1; | 1403 | return 1; |
| 1372 | } | 1404 | } |
| 1373 | 1405 | ||
| 1374 | int fib_table_lookup(struct fib_table *tb, const struct flowi *flp, | 1406 | int fib_table_lookup(struct fib_table *tb, const struct flowi4 *flp, |
| 1375 | struct fib_result *res, int fib_flags) | 1407 | struct fib_result *res, int fib_flags) |
| 1376 | { | 1408 | { |
| 1377 | struct trie *t = (struct trie *) tb->tb_data; | 1409 | struct trie *t = (struct trie *) tb->tb_data; |
| 1378 | int ret; | 1410 | int ret; |
| 1379 | struct node *n; | 1411 | struct rt_trie_node *n; |
| 1380 | struct tnode *pn; | 1412 | struct tnode *pn; |
| 1381 | int pos, bits; | 1413 | unsigned int pos, bits; |
| 1382 | t_key key = ntohl(flp->fl4_dst); | 1414 | t_key key = ntohl(flp->daddr); |
| 1383 | int chopped_off; | 1415 | unsigned int chopped_off; |
| 1384 | t_key cindex = 0; | 1416 | t_key cindex = 0; |
| 1385 | int current_prefix_length = KEYLENGTH; | 1417 | unsigned int current_prefix_length = KEYLENGTH; |
| 1386 | struct tnode *cn; | 1418 | struct tnode *cn; |
| 1387 | t_key pref_mismatch; | 1419 | t_key pref_mismatch; |
| 1388 | 1420 | ||
| @@ -1398,7 +1430,7 @@ int fib_table_lookup(struct fib_table *tb, const struct flowi *flp, | |||
| 1398 | 1430 | ||
| 1399 | /* Just a leaf? */ | 1431 | /* Just a leaf? */ |
| 1400 | if (IS_LEAF(n)) { | 1432 | if (IS_LEAF(n)) { |
| 1401 | ret = check_leaf(t, (struct leaf *)n, key, flp, res, fib_flags); | 1433 | ret = check_leaf(tb, t, (struct leaf *)n, key, flp, res, fib_flags); |
| 1402 | goto found; | 1434 | goto found; |
| 1403 | } | 1435 | } |
| 1404 | 1436 | ||
| @@ -1423,7 +1455,7 @@ int fib_table_lookup(struct fib_table *tb, const struct flowi *flp, | |||
| 1423 | } | 1455 | } |
| 1424 | 1456 | ||
| 1425 | if (IS_LEAF(n)) { | 1457 | if (IS_LEAF(n)) { |
| 1426 | ret = check_leaf(t, (struct leaf *)n, key, flp, res, fib_flags); | 1458 | ret = check_leaf(tb, t, (struct leaf *)n, key, flp, res, fib_flags); |
| 1427 | if (ret > 0) | 1459 | if (ret > 0) |
| 1428 | goto backtrace; | 1460 | goto backtrace; |
| 1429 | goto found; | 1461 | goto found; |
| @@ -1541,7 +1573,7 @@ backtrace: | |||
| 1541 | if (chopped_off <= pn->bits) { | 1573 | if (chopped_off <= pn->bits) { |
| 1542 | cindex &= ~(1 << (chopped_off-1)); | 1574 | cindex &= ~(1 << (chopped_off-1)); |
| 1543 | } else { | 1575 | } else { |
| 1544 | struct tnode *parent = node_parent_rcu((struct node *) pn); | 1576 | struct tnode *parent = node_parent_rcu((struct rt_trie_node *) pn); |
| 1545 | if (!parent) | 1577 | if (!parent) |
| 1546 | goto failed; | 1578 | goto failed; |
| 1547 | 1579 | ||
| @@ -1568,7 +1600,7 @@ found: | |||
| 1568 | */ | 1600 | */ |
| 1569 | static void trie_leaf_remove(struct trie *t, struct leaf *l) | 1601 | static void trie_leaf_remove(struct trie *t, struct leaf *l) |
| 1570 | { | 1602 | { |
| 1571 | struct tnode *tp = node_parent((struct node *) l); | 1603 | struct tnode *tp = node_parent((struct rt_trie_node *) l); |
| 1572 | 1604 | ||
| 1573 | pr_debug("entering trie_leaf_remove(%p)\n", l); | 1605 | pr_debug("entering trie_leaf_remove(%p)\n", l); |
| 1574 | 1606 | ||
| @@ -1629,7 +1661,9 @@ int fib_table_delete(struct fib_table *tb, struct fib_config *cfg) | |||
| 1629 | 1661 | ||
| 1630 | if ((!cfg->fc_type || fa->fa_type == cfg->fc_type) && | 1662 | if ((!cfg->fc_type || fa->fa_type == cfg->fc_type) && |
| 1631 | (cfg->fc_scope == RT_SCOPE_NOWHERE || | 1663 | (cfg->fc_scope == RT_SCOPE_NOWHERE || |
| 1632 | fa->fa_scope == cfg->fc_scope) && | 1664 | fa->fa_info->fib_scope == cfg->fc_scope) && |
| 1665 | (!cfg->fc_prefsrc || | ||
| 1666 | fi->fib_prefsrc == cfg->fc_prefsrc) && | ||
| 1633 | (!cfg->fc_protocol || | 1667 | (!cfg->fc_protocol || |
| 1634 | fi->fib_protocol == cfg->fc_protocol) && | 1668 | fi->fib_protocol == cfg->fc_protocol) && |
| 1635 | fib_nh_match(cfg, fi) == 0) { | 1669 | fib_nh_match(cfg, fi) == 0) { |
| @@ -1706,7 +1740,7 @@ static int trie_flush_leaf(struct leaf *l) | |||
| 1706 | * Scan for the next right leaf starting at node p->child[idx] | 1740 | * Scan for the next right leaf starting at node p->child[idx] |
| 1707 | * Since we have back pointer, no recursion necessary. | 1741 | * Since we have back pointer, no recursion necessary. |
| 1708 | */ | 1742 | */ |
| 1709 | static struct leaf *leaf_walk_rcu(struct tnode *p, struct node *c) | 1743 | static struct leaf *leaf_walk_rcu(struct tnode *p, struct rt_trie_node *c) |
| 1710 | { | 1744 | { |
| 1711 | do { | 1745 | do { |
| 1712 | t_key idx; | 1746 | t_key idx; |
| @@ -1732,7 +1766,7 @@ static struct leaf *leaf_walk_rcu(struct tnode *p, struct node *c) | |||
| 1732 | } | 1766 | } |
| 1733 | 1767 | ||
| 1734 | /* Node empty, walk back up to parent */ | 1768 | /* Node empty, walk back up to parent */ |
| 1735 | c = (struct node *) p; | 1769 | c = (struct rt_trie_node *) p; |
| 1736 | } while ((p = node_parent_rcu(c)) != NULL); | 1770 | } while ((p = node_parent_rcu(c)) != NULL); |
| 1737 | 1771 | ||
| 1738 | return NULL; /* Root of trie */ | 1772 | return NULL; /* Root of trie */ |
| @@ -1753,7 +1787,7 @@ static struct leaf *trie_firstleaf(struct trie *t) | |||
| 1753 | 1787 | ||
| 1754 | static struct leaf *trie_nextleaf(struct leaf *l) | 1788 | static struct leaf *trie_nextleaf(struct leaf *l) |
| 1755 | { | 1789 | { |
| 1756 | struct node *c = (struct node *) l; | 1790 | struct rt_trie_node *c = (struct rt_trie_node *) l; |
| 1757 | struct tnode *p = node_parent_rcu(c); | 1791 | struct tnode *p = node_parent_rcu(c); |
| 1758 | 1792 | ||
| 1759 | if (!p) | 1793 | if (!p) |
| @@ -1802,80 +1836,6 @@ void fib_free_table(struct fib_table *tb) | |||
| 1802 | kfree(tb); | 1836 | kfree(tb); |
| 1803 | } | 1837 | } |
| 1804 | 1838 | ||
| 1805 | void fib_table_select_default(struct fib_table *tb, | ||
| 1806 | const struct flowi *flp, | ||
| 1807 | struct fib_result *res) | ||
| 1808 | { | ||
| 1809 | struct trie *t = (struct trie *) tb->tb_data; | ||
| 1810 | int order, last_idx; | ||
| 1811 | struct fib_info *fi = NULL; | ||
| 1812 | struct fib_info *last_resort; | ||
| 1813 | struct fib_alias *fa = NULL; | ||
| 1814 | struct list_head *fa_head; | ||
| 1815 | struct leaf *l; | ||
| 1816 | |||
| 1817 | last_idx = -1; | ||
| 1818 | last_resort = NULL; | ||
| 1819 | order = -1; | ||
| 1820 | |||
| 1821 | rcu_read_lock(); | ||
| 1822 | |||
| 1823 | l = fib_find_node(t, 0); | ||
| 1824 | if (!l) | ||
| 1825 | goto out; | ||
| 1826 | |||
| 1827 | fa_head = get_fa_head(l, 0); | ||
| 1828 | if (!fa_head) | ||
| 1829 | goto out; | ||
| 1830 | |||
| 1831 | if (list_empty(fa_head)) | ||
| 1832 | goto out; | ||
| 1833 | |||
| 1834 | list_for_each_entry_rcu(fa, fa_head, fa_list) { | ||
| 1835 | struct fib_info *next_fi = fa->fa_info; | ||
| 1836 | |||
| 1837 | if (fa->fa_scope != res->scope || | ||
| 1838 | fa->fa_type != RTN_UNICAST) | ||
| 1839 | continue; | ||
| 1840 | |||
| 1841 | if (next_fi->fib_priority > res->fi->fib_priority) | ||
| 1842 | break; | ||
| 1843 | if (!next_fi->fib_nh[0].nh_gw || | ||
| 1844 | next_fi->fib_nh[0].nh_scope != RT_SCOPE_LINK) | ||
| 1845 | continue; | ||
| 1846 | |||
| 1847 | fib_alias_accessed(fa); | ||
| 1848 | |||
| 1849 | if (fi == NULL) { | ||
| 1850 | if (next_fi != res->fi) | ||
| 1851 | break; | ||
| 1852 | } else if (!fib_detect_death(fi, order, &last_resort, | ||
| 1853 | &last_idx, tb->tb_default)) { | ||
| 1854 | fib_result_assign(res, fi); | ||
| 1855 | tb->tb_default = order; | ||
| 1856 | goto out; | ||
| 1857 | } | ||
| 1858 | fi = next_fi; | ||
| 1859 | order++; | ||
| 1860 | } | ||
| 1861 | if (order <= 0 || fi == NULL) { | ||
| 1862 | tb->tb_default = -1; | ||
| 1863 | goto out; | ||
| 1864 | } | ||
| 1865 | |||
| 1866 | if (!fib_detect_death(fi, order, &last_resort, &last_idx, | ||
| 1867 | tb->tb_default)) { | ||
| 1868 | fib_result_assign(res, fi); | ||
| 1869 | tb->tb_default = order; | ||
| 1870 | goto out; | ||
| 1871 | } | ||
| 1872 | if (last_idx >= 0) | ||
| 1873 | fib_result_assign(res, last_resort); | ||
| 1874 | tb->tb_default = last_idx; | ||
| 1875 | out: | ||
| 1876 | rcu_read_unlock(); | ||
| 1877 | } | ||
| 1878 | |||
| 1879 | static int fn_trie_dump_fa(t_key key, int plen, struct list_head *fah, | 1839 | static int fn_trie_dump_fa(t_key key, int plen, struct list_head *fah, |
| 1880 | struct fib_table *tb, | 1840 | struct fib_table *tb, |
| 1881 | struct sk_buff *skb, struct netlink_callback *cb) | 1841 | struct sk_buff *skb, struct netlink_callback *cb) |
| @@ -1900,7 +1860,6 @@ static int fn_trie_dump_fa(t_key key, int plen, struct list_head *fah, | |||
| 1900 | RTM_NEWROUTE, | 1860 | RTM_NEWROUTE, |
| 1901 | tb->tb_id, | 1861 | tb->tb_id, |
| 1902 | fa->fa_type, | 1862 | fa->fa_type, |
| 1903 | fa->fa_scope, | ||
| 1904 | xkey, | 1863 | xkey, |
| 1905 | plen, | 1864 | plen, |
| 1906 | fa->fa_tos, | 1865 | fa->fa_tos, |
| @@ -1990,7 +1949,7 @@ int fib_table_dump(struct fib_table *tb, struct sk_buff *skb, | |||
| 1990 | return skb->len; | 1949 | return skb->len; |
| 1991 | } | 1950 | } |
| 1992 | 1951 | ||
| 1993 | void __init fib_hash_init(void) | 1952 | void __init fib_trie_init(void) |
| 1994 | { | 1953 | { |
| 1995 | fn_alias_kmem = kmem_cache_create("ip_fib_alias", | 1954 | fn_alias_kmem = kmem_cache_create("ip_fib_alias", |
| 1996 | sizeof(struct fib_alias), | 1955 | sizeof(struct fib_alias), |
| @@ -2003,8 +1962,7 @@ void __init fib_hash_init(void) | |||
| 2003 | } | 1962 | } |
| 2004 | 1963 | ||
| 2005 | 1964 | ||
| 2006 | /* Fix more generic FIB names for init later */ | 1965 | struct fib_table *fib_trie_table(u32 id) |
| 2007 | struct fib_table *fib_hash_table(u32 id) | ||
| 2008 | { | 1966 | { |
| 2009 | struct fib_table *tb; | 1967 | struct fib_table *tb; |
| 2010 | struct trie *t; | 1968 | struct trie *t; |
| @@ -2036,7 +1994,7 @@ struct fib_trie_iter { | |||
| 2036 | unsigned int depth; | 1994 | unsigned int depth; |
| 2037 | }; | 1995 | }; |
| 2038 | 1996 | ||
| 2039 | static struct node *fib_trie_get_next(struct fib_trie_iter *iter) | 1997 | static struct rt_trie_node *fib_trie_get_next(struct fib_trie_iter *iter) |
| 2040 | { | 1998 | { |
| 2041 | struct tnode *tn = iter->tnode; | 1999 | struct tnode *tn = iter->tnode; |
| 2042 | unsigned int cindex = iter->index; | 2000 | unsigned int cindex = iter->index; |
| @@ -2050,7 +2008,7 @@ static struct node *fib_trie_get_next(struct fib_trie_iter *iter) | |||
| 2050 | iter->tnode, iter->index, iter->depth); | 2008 | iter->tnode, iter->index, iter->depth); |
| 2051 | rescan: | 2009 | rescan: |
| 2052 | while (cindex < (1<<tn->bits)) { | 2010 | while (cindex < (1<<tn->bits)) { |
| 2053 | struct node *n = tnode_get_child_rcu(tn, cindex); | 2011 | struct rt_trie_node *n = tnode_get_child_rcu(tn, cindex); |
| 2054 | 2012 | ||
| 2055 | if (n) { | 2013 | if (n) { |
| 2056 | if (IS_LEAF(n)) { | 2014 | if (IS_LEAF(n)) { |
| @@ -2069,7 +2027,7 @@ rescan: | |||
| 2069 | } | 2027 | } |
| 2070 | 2028 | ||
| 2071 | /* Current node exhausted, pop back up */ | 2029 | /* Current node exhausted, pop back up */ |
| 2072 | p = node_parent_rcu((struct node *)tn); | 2030 | p = node_parent_rcu((struct rt_trie_node *)tn); |
| 2073 | if (p) { | 2031 | if (p) { |
| 2074 | cindex = tkey_extract_bits(tn->key, p->pos, p->bits)+1; | 2032 | cindex = tkey_extract_bits(tn->key, p->pos, p->bits)+1; |
| 2075 | tn = p; | 2033 | tn = p; |
| @@ -2081,10 +2039,10 @@ rescan: | |||
| 2081 | return NULL; | 2039 | return NULL; |
| 2082 | } | 2040 | } |
| 2083 | 2041 | ||
| 2084 | static struct node *fib_trie_get_first(struct fib_trie_iter *iter, | 2042 | static struct rt_trie_node *fib_trie_get_first(struct fib_trie_iter *iter, |
| 2085 | struct trie *t) | 2043 | struct trie *t) |
| 2086 | { | 2044 | { |
| 2087 | struct node *n; | 2045 | struct rt_trie_node *n; |
| 2088 | 2046 | ||
| 2089 | if (!t) | 2047 | if (!t) |
| 2090 | return NULL; | 2048 | return NULL; |
| @@ -2108,7 +2066,7 @@ static struct node *fib_trie_get_first(struct fib_trie_iter *iter, | |||
| 2108 | 2066 | ||
| 2109 | static void trie_collect_stats(struct trie *t, struct trie_stat *s) | 2067 | static void trie_collect_stats(struct trie *t, struct trie_stat *s) |
| 2110 | { | 2068 | { |
| 2111 | struct node *n; | 2069 | struct rt_trie_node *n; |
| 2112 | struct fib_trie_iter iter; | 2070 | struct fib_trie_iter iter; |
| 2113 | 2071 | ||
| 2114 | memset(s, 0, sizeof(*s)); | 2072 | memset(s, 0, sizeof(*s)); |
| @@ -2181,7 +2139,7 @@ static void trie_show_stats(struct seq_file *seq, struct trie_stat *stat) | |||
| 2181 | seq_putc(seq, '\n'); | 2139 | seq_putc(seq, '\n'); |
| 2182 | seq_printf(seq, "\tPointers: %u\n", pointers); | 2140 | seq_printf(seq, "\tPointers: %u\n", pointers); |
| 2183 | 2141 | ||
| 2184 | bytes += sizeof(struct node *) * pointers; | 2142 | bytes += sizeof(struct rt_trie_node *) * pointers; |
| 2185 | seq_printf(seq, "Null ptrs: %u\n", stat->nullpointers); | 2143 | seq_printf(seq, "Null ptrs: %u\n", stat->nullpointers); |
| 2186 | seq_printf(seq, "Total size: %u kB\n", (bytes + 1023) / 1024); | 2144 | seq_printf(seq, "Total size: %u kB\n", (bytes + 1023) / 1024); |
| 2187 | } | 2145 | } |
| @@ -2262,7 +2220,7 @@ static const struct file_operations fib_triestat_fops = { | |||
| 2262 | .release = single_release_net, | 2220 | .release = single_release_net, |
| 2263 | }; | 2221 | }; |
| 2264 | 2222 | ||
| 2265 | static struct node *fib_trie_get_idx(struct seq_file *seq, loff_t pos) | 2223 | static struct rt_trie_node *fib_trie_get_idx(struct seq_file *seq, loff_t pos) |
| 2266 | { | 2224 | { |
| 2267 | struct fib_trie_iter *iter = seq->private; | 2225 | struct fib_trie_iter *iter = seq->private; |
| 2268 | struct net *net = seq_file_net(seq); | 2226 | struct net *net = seq_file_net(seq); |
| @@ -2275,7 +2233,7 @@ static struct node *fib_trie_get_idx(struct seq_file *seq, loff_t pos) | |||
| 2275 | struct fib_table *tb; | 2233 | struct fib_table *tb; |
| 2276 | 2234 | ||
| 2277 | hlist_for_each_entry_rcu(tb, node, head, tb_hlist) { | 2235 | hlist_for_each_entry_rcu(tb, node, head, tb_hlist) { |
| 2278 | struct node *n; | 2236 | struct rt_trie_node *n; |
| 2279 | 2237 | ||
| 2280 | for (n = fib_trie_get_first(iter, | 2238 | for (n = fib_trie_get_first(iter, |
| 2281 | (struct trie *) tb->tb_data); | 2239 | (struct trie *) tb->tb_data); |
| @@ -2304,7 +2262,7 @@ static void *fib_trie_seq_next(struct seq_file *seq, void *v, loff_t *pos) | |||
| 2304 | struct fib_table *tb = iter->tb; | 2262 | struct fib_table *tb = iter->tb; |
| 2305 | struct hlist_node *tb_node; | 2263 | struct hlist_node *tb_node; |
| 2306 | unsigned int h; | 2264 | unsigned int h; |
| 2307 | struct node *n; | 2265 | struct rt_trie_node *n; |
| 2308 | 2266 | ||
| 2309 | ++*pos; | 2267 | ++*pos; |
| 2310 | /* next node in same table */ | 2268 | /* next node in same table */ |
| @@ -2390,7 +2348,7 @@ static inline const char *rtn_type(char *buf, size_t len, unsigned int t) | |||
| 2390 | static int fib_trie_seq_show(struct seq_file *seq, void *v) | 2348 | static int fib_trie_seq_show(struct seq_file *seq, void *v) |
| 2391 | { | 2349 | { |
| 2392 | const struct fib_trie_iter *iter = seq->private; | 2350 | const struct fib_trie_iter *iter = seq->private; |
| 2393 | struct node *n = v; | 2351 | struct rt_trie_node *n = v; |
| 2394 | 2352 | ||
| 2395 | if (!node_parent_rcu(n)) | 2353 | if (!node_parent_rcu(n)) |
| 2396 | fib_table_print(seq, iter->tb); | 2354 | fib_table_print(seq, iter->tb); |
| @@ -2422,7 +2380,7 @@ static int fib_trie_seq_show(struct seq_file *seq, void *v) | |||
| 2422 | seq_indent(seq, iter->depth+1); | 2380 | seq_indent(seq, iter->depth+1); |
| 2423 | seq_printf(seq, " /%d %s %s", li->plen, | 2381 | seq_printf(seq, " /%d %s %s", li->plen, |
| 2424 | rtn_scope(buf1, sizeof(buf1), | 2382 | rtn_scope(buf1, sizeof(buf1), |
| 2425 | fa->fa_scope), | 2383 | fa->fa_info->fib_scope), |
| 2426 | rtn_type(buf2, sizeof(buf2), | 2384 | rtn_type(buf2, sizeof(buf2), |
| 2427 | fa->fa_type)); | 2385 | fa->fa_type)); |
| 2428 | if (fa->fa_tos) | 2386 | if (fa->fa_tos) |
diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c index 4aa1b7f01ea0..a91dc1611081 100644 --- a/net/ipv4/icmp.c +++ b/net/ipv4/icmp.c | |||
| @@ -233,48 +233,11 @@ static inline void icmp_xmit_unlock(struct sock *sk) | |||
| 233 | * Send an ICMP frame. | 233 | * Send an ICMP frame. |
| 234 | */ | 234 | */ |
| 235 | 235 | ||
| 236 | /* | 236 | static inline bool icmpv4_xrlim_allow(struct net *net, struct rtable *rt, |
| 237 | * Check transmit rate limitation for given message. | ||
| 238 | * The rate information is held in the destination cache now. | ||
| 239 | * This function is generic and could be used for other purposes | ||
| 240 | * too. It uses a Token bucket filter as suggested by Alexey Kuznetsov. | ||
| 241 | * | ||
| 242 | * Note that the same dst_entry fields are modified by functions in | ||
| 243 | * route.c too, but these work for packet destinations while xrlim_allow | ||
| 244 | * works for icmp destinations. This means the rate limiting information | ||
| 245 | * for one "ip object" is shared - and these ICMPs are twice limited: | ||
| 246 | * by source and by destination. | ||
| 247 | * | ||
| 248 | * RFC 1812: 4.3.2.8 SHOULD be able to limit error message rate | ||
| 249 | * SHOULD allow setting of rate limits | ||
| 250 | * | ||
| 251 | * Shared between ICMPv4 and ICMPv6. | ||
| 252 | */ | ||
| 253 | #define XRLIM_BURST_FACTOR 6 | ||
| 254 | int xrlim_allow(struct dst_entry *dst, int timeout) | ||
| 255 | { | ||
| 256 | unsigned long now, token = dst->rate_tokens; | ||
| 257 | int rc = 0; | ||
| 258 | |||
| 259 | now = jiffies; | ||
| 260 | token += now - dst->rate_last; | ||
| 261 | dst->rate_last = now; | ||
| 262 | if (token > XRLIM_BURST_FACTOR * timeout) | ||
| 263 | token = XRLIM_BURST_FACTOR * timeout; | ||
| 264 | if (token >= timeout) { | ||
| 265 | token -= timeout; | ||
| 266 | rc = 1; | ||
| 267 | } | ||
| 268 | dst->rate_tokens = token; | ||
| 269 | return rc; | ||
| 270 | } | ||
| 271 | EXPORT_SYMBOL(xrlim_allow); | ||
| 272 | |||
| 273 | static inline int icmpv4_xrlim_allow(struct net *net, struct rtable *rt, | ||
| 274 | int type, int code) | 237 | int type, int code) |
| 275 | { | 238 | { |
| 276 | struct dst_entry *dst = &rt->dst; | 239 | struct dst_entry *dst = &rt->dst; |
| 277 | int rc = 1; | 240 | bool rc = true; |
| 278 | 241 | ||
| 279 | if (type > NR_ICMP_TYPES) | 242 | if (type > NR_ICMP_TYPES) |
| 280 | goto out; | 243 | goto out; |
| @@ -288,8 +251,12 @@ static inline int icmpv4_xrlim_allow(struct net *net, struct rtable *rt, | |||
| 288 | goto out; | 251 | goto out; |
| 289 | 252 | ||
| 290 | /* Limit if icmp type is enabled in ratemask. */ | 253 | /* Limit if icmp type is enabled in ratemask. */ |
| 291 | if ((1 << type) & net->ipv4.sysctl_icmp_ratemask) | 254 | if ((1 << type) & net->ipv4.sysctl_icmp_ratemask) { |
| 292 | rc = xrlim_allow(dst, net->ipv4.sysctl_icmp_ratelimit); | 255 | if (!rt->peer) |
| 256 | rt_bind_peer(rt, 1); | ||
| 257 | rc = inet_peer_xrlim_allow(rt->peer, | ||
| 258 | net->ipv4.sysctl_icmp_ratelimit); | ||
| 259 | } | ||
| 293 | out: | 260 | out: |
| 294 | return rc; | 261 | return rc; |
| 295 | } | 262 | } |
| @@ -386,12 +353,15 @@ static void icmp_reply(struct icmp_bxm *icmp_param, struct sk_buff *skb) | |||
| 386 | daddr = icmp_param->replyopts.faddr; | 353 | daddr = icmp_param->replyopts.faddr; |
| 387 | } | 354 | } |
| 388 | { | 355 | { |
| 389 | struct flowi fl = { .fl4_dst= daddr, | 356 | struct flowi4 fl4 = { |
| 390 | .fl4_src = rt->rt_spec_dst, | 357 | .daddr = daddr, |
| 391 | .fl4_tos = RT_TOS(ip_hdr(skb)->tos), | 358 | .saddr = rt->rt_spec_dst, |
| 392 | .proto = IPPROTO_ICMP }; | 359 | .flowi4_tos = RT_TOS(ip_hdr(skb)->tos), |
| 393 | security_skb_classify_flow(skb, &fl); | 360 | .flowi4_proto = IPPROTO_ICMP, |
| 394 | if (ip_route_output_key(net, &rt, &fl)) | 361 | }; |
| 362 | security_skb_classify_flow(skb, flowi4_to_flowi(&fl4)); | ||
| 363 | rt = ip_route_output_key(net, &fl4); | ||
| 364 | if (IS_ERR(rt)) | ||
| 395 | goto out_unlock; | 365 | goto out_unlock; |
| 396 | } | 366 | } |
| 397 | if (icmpv4_xrlim_allow(net, rt, icmp_param->data.icmph.type, | 367 | if (icmpv4_xrlim_allow(net, rt, icmp_param->data.icmph.type, |
| @@ -402,6 +372,97 @@ out_unlock: | |||
| 402 | icmp_xmit_unlock(sk); | 372 | icmp_xmit_unlock(sk); |
| 403 | } | 373 | } |
| 404 | 374 | ||
| 375 | static struct rtable *icmp_route_lookup(struct net *net, struct sk_buff *skb_in, | ||
| 376 | struct iphdr *iph, | ||
| 377 | __be32 saddr, u8 tos, | ||
| 378 | int type, int code, | ||
| 379 | struct icmp_bxm *param) | ||
| 380 | { | ||
| 381 | struct flowi4 fl4 = { | ||
| 382 | .daddr = (param->replyopts.srr ? | ||
| 383 | param->replyopts.faddr : iph->saddr), | ||
| 384 | .saddr = saddr, | ||
| 385 | .flowi4_tos = RT_TOS(tos), | ||
| 386 | .flowi4_proto = IPPROTO_ICMP, | ||
| 387 | .fl4_icmp_type = type, | ||
| 388 | .fl4_icmp_code = code, | ||
| 389 | }; | ||
| 390 | struct rtable *rt, *rt2; | ||
| 391 | int err; | ||
| 392 | |||
| 393 | security_skb_classify_flow(skb_in, flowi4_to_flowi(&fl4)); | ||
| 394 | rt = __ip_route_output_key(net, &fl4); | ||
| 395 | if (IS_ERR(rt)) | ||
| 396 | return rt; | ||
| 397 | |||
| 398 | /* No need to clone since we're just using its address. */ | ||
| 399 | rt2 = rt; | ||
| 400 | |||
| 401 | if (!fl4.saddr) | ||
| 402 | fl4.saddr = rt->rt_src; | ||
| 403 | |||
| 404 | rt = (struct rtable *) xfrm_lookup(net, &rt->dst, | ||
| 405 | flowi4_to_flowi(&fl4), NULL, 0); | ||
| 406 | if (!IS_ERR(rt)) { | ||
| 407 | if (rt != rt2) | ||
| 408 | return rt; | ||
| 409 | } else if (PTR_ERR(rt) == -EPERM) { | ||
| 410 | rt = NULL; | ||
| 411 | } else | ||
| 412 | return rt; | ||
| 413 | |||
| 414 | err = xfrm_decode_session_reverse(skb_in, flowi4_to_flowi(&fl4), AF_INET); | ||
| 415 | if (err) | ||
| 416 | goto relookup_failed; | ||
| 417 | |||
| 418 | if (inet_addr_type(net, fl4.saddr) == RTN_LOCAL) { | ||
| 419 | rt2 = __ip_route_output_key(net, &fl4); | ||
| 420 | if (IS_ERR(rt2)) | ||
| 421 | err = PTR_ERR(rt2); | ||
| 422 | } else { | ||
| 423 | struct flowi4 fl4_2 = {}; | ||
| 424 | unsigned long orefdst; | ||
| 425 | |||
| 426 | fl4_2.daddr = fl4.saddr; | ||
| 427 | rt2 = ip_route_output_key(net, &fl4_2); | ||
| 428 | if (IS_ERR(rt2)) { | ||
| 429 | err = PTR_ERR(rt2); | ||
| 430 | goto relookup_failed; | ||
| 431 | } | ||
| 432 | /* Ugh! */ | ||
| 433 | orefdst = skb_in->_skb_refdst; /* save old refdst */ | ||
| 434 | err = ip_route_input(skb_in, fl4.daddr, fl4.saddr, | ||
| 435 | RT_TOS(tos), rt2->dst.dev); | ||
| 436 | |||
| 437 | dst_release(&rt2->dst); | ||
| 438 | rt2 = skb_rtable(skb_in); | ||
| 439 | skb_in->_skb_refdst = orefdst; /* restore old refdst */ | ||
| 440 | } | ||
| 441 | |||
| 442 | if (err) | ||
| 443 | goto relookup_failed; | ||
| 444 | |||
| 445 | rt2 = (struct rtable *) xfrm_lookup(net, &rt2->dst, | ||
| 446 | flowi4_to_flowi(&fl4), NULL, | ||
| 447 | XFRM_LOOKUP_ICMP); | ||
| 448 | if (!IS_ERR(rt2)) { | ||
| 449 | dst_release(&rt->dst); | ||
| 450 | rt = rt2; | ||
| 451 | } else if (PTR_ERR(rt2) == -EPERM) { | ||
| 452 | if (rt) | ||
| 453 | dst_release(&rt->dst); | ||
| 454 | return rt2; | ||
| 455 | } else { | ||
| 456 | err = PTR_ERR(rt2); | ||
| 457 | goto relookup_failed; | ||
| 458 | } | ||
| 459 | return rt; | ||
| 460 | |||
| 461 | relookup_failed: | ||
| 462 | if (rt) | ||
| 463 | return rt; | ||
| 464 | return ERR_PTR(err); | ||
| 465 | } | ||
| 405 | 466 | ||
| 406 | /* | 467 | /* |
| 407 | * Send an ICMP message in response to a situation | 468 | * Send an ICMP message in response to a situation |
| @@ -507,7 +568,7 @@ void icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info) | |||
| 507 | rcu_read_lock(); | 568 | rcu_read_lock(); |
| 508 | if (rt_is_input_route(rt) && | 569 | if (rt_is_input_route(rt) && |
| 509 | net->ipv4.sysctl_icmp_errors_use_inbound_ifaddr) | 570 | net->ipv4.sysctl_icmp_errors_use_inbound_ifaddr) |
| 510 | dev = dev_get_by_index_rcu(net, rt->fl.iif); | 571 | dev = dev_get_by_index_rcu(net, rt->rt_iif); |
| 511 | 572 | ||
| 512 | if (dev) | 573 | if (dev) |
| 513 | saddr = inet_select_addr(dev, 0, RT_SCOPE_LINK); | 574 | saddr = inet_select_addr(dev, 0, RT_SCOPE_LINK); |
| @@ -539,86 +600,11 @@ void icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info) | |||
| 539 | ipc.opt = &icmp_param.replyopts; | 600 | ipc.opt = &icmp_param.replyopts; |
| 540 | ipc.tx_flags = 0; | 601 | ipc.tx_flags = 0; |
| 541 | 602 | ||
| 542 | { | 603 | rt = icmp_route_lookup(net, skb_in, iph, saddr, tos, |
| 543 | struct flowi fl = { | 604 | type, code, &icmp_param); |
| 544 | .fl4_dst = icmp_param.replyopts.srr ? | 605 | if (IS_ERR(rt)) |
| 545 | icmp_param.replyopts.faddr : iph->saddr, | 606 | goto out_unlock; |
| 546 | .fl4_src = saddr, | ||
| 547 | .fl4_tos = RT_TOS(tos), | ||
| 548 | .proto = IPPROTO_ICMP, | ||
| 549 | .fl_icmp_type = type, | ||
| 550 | .fl_icmp_code = code, | ||
| 551 | }; | ||
| 552 | int err; | ||
| 553 | struct rtable *rt2; | ||
| 554 | |||
| 555 | security_skb_classify_flow(skb_in, &fl); | ||
| 556 | if (__ip_route_output_key(net, &rt, &fl)) | ||
| 557 | goto out_unlock; | ||
| 558 | |||
| 559 | /* No need to clone since we're just using its address. */ | ||
| 560 | rt2 = rt; | ||
| 561 | |||
| 562 | if (!fl.nl_u.ip4_u.saddr) | ||
| 563 | fl.nl_u.ip4_u.saddr = rt->rt_src; | ||
| 564 | |||
| 565 | err = xfrm_lookup(net, (struct dst_entry **)&rt, &fl, NULL, 0); | ||
| 566 | switch (err) { | ||
| 567 | case 0: | ||
| 568 | if (rt != rt2) | ||
| 569 | goto route_done; | ||
| 570 | break; | ||
| 571 | case -EPERM: | ||
| 572 | rt = NULL; | ||
| 573 | break; | ||
| 574 | default: | ||
| 575 | goto out_unlock; | ||
| 576 | } | ||
| 577 | |||
| 578 | if (xfrm_decode_session_reverse(skb_in, &fl, AF_INET)) | ||
| 579 | goto relookup_failed; | ||
| 580 | |||
| 581 | if (inet_addr_type(net, fl.fl4_src) == RTN_LOCAL) | ||
| 582 | err = __ip_route_output_key(net, &rt2, &fl); | ||
| 583 | else { | ||
| 584 | struct flowi fl2 = {}; | ||
| 585 | unsigned long orefdst; | ||
| 586 | |||
| 587 | fl2.fl4_dst = fl.fl4_src; | ||
| 588 | if (ip_route_output_key(net, &rt2, &fl2)) | ||
| 589 | goto relookup_failed; | ||
| 590 | |||
| 591 | /* Ugh! */ | ||
| 592 | orefdst = skb_in->_skb_refdst; /* save old refdst */ | ||
| 593 | err = ip_route_input(skb_in, fl.fl4_dst, fl.fl4_src, | ||
| 594 | RT_TOS(tos), rt2->dst.dev); | ||
| 595 | |||
| 596 | dst_release(&rt2->dst); | ||
| 597 | rt2 = skb_rtable(skb_in); | ||
| 598 | skb_in->_skb_refdst = orefdst; /* restore old refdst */ | ||
| 599 | } | ||
| 600 | |||
| 601 | if (err) | ||
| 602 | goto relookup_failed; | ||
| 603 | |||
| 604 | err = xfrm_lookup(net, (struct dst_entry **)&rt2, &fl, NULL, | ||
| 605 | XFRM_LOOKUP_ICMP); | ||
| 606 | switch (err) { | ||
| 607 | case 0: | ||
| 608 | dst_release(&rt->dst); | ||
| 609 | rt = rt2; | ||
| 610 | break; | ||
| 611 | case -EPERM: | ||
| 612 | goto ende; | ||
| 613 | default: | ||
| 614 | relookup_failed: | ||
| 615 | if (!rt) | ||
| 616 | goto out_unlock; | ||
| 617 | break; | ||
| 618 | } | ||
| 619 | } | ||
| 620 | 607 | ||
| 621 | route_done: | ||
| 622 | if (!icmpv4_xrlim_allow(net, rt, type, code)) | 608 | if (!icmpv4_xrlim_allow(net, rt, type, code)) |
| 623 | goto ende; | 609 | goto ende; |
| 624 | 610 | ||
diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c index e0e77e297de3..1fd3d9ce8398 100644 --- a/net/ipv4/igmp.c +++ b/net/ipv4/igmp.c | |||
| @@ -321,14 +321,12 @@ static struct sk_buff *igmpv3_newpack(struct net_device *dev, int size) | |||
| 321 | } | 321 | } |
| 322 | igmp_skb_size(skb) = size; | 322 | igmp_skb_size(skb) = size; |
| 323 | 323 | ||
| 324 | { | 324 | rt = ip_route_output_ports(net, NULL, IGMPV3_ALL_MCR, 0, |
| 325 | struct flowi fl = { .oif = dev->ifindex, | 325 | 0, 0, |
| 326 | .fl4_dst = IGMPV3_ALL_MCR, | 326 | IPPROTO_IGMP, 0, dev->ifindex); |
| 327 | .proto = IPPROTO_IGMP }; | 327 | if (IS_ERR(rt)) { |
| 328 | if (ip_route_output_key(net, &rt, &fl)) { | 328 | kfree_skb(skb); |
| 329 | kfree_skb(skb); | 329 | return NULL; |
| 330 | return NULL; | ||
| 331 | } | ||
| 332 | } | 330 | } |
| 333 | if (rt->rt_src == 0) { | 331 | if (rt->rt_src == 0) { |
| 334 | kfree_skb(skb); | 332 | kfree_skb(skb); |
| @@ -666,13 +664,12 @@ static int igmp_send_report(struct in_device *in_dev, struct ip_mc_list *pmc, | |||
| 666 | else | 664 | else |
| 667 | dst = group; | 665 | dst = group; |
| 668 | 666 | ||
| 669 | { | 667 | rt = ip_route_output_ports(net, NULL, dst, 0, |
| 670 | struct flowi fl = { .oif = dev->ifindex, | 668 | 0, 0, |
| 671 | .fl4_dst = dst, | 669 | IPPROTO_IGMP, 0, dev->ifindex); |
| 672 | .proto = IPPROTO_IGMP }; | 670 | if (IS_ERR(rt)) |
| 673 | if (ip_route_output_key(net, &rt, &fl)) | 671 | return -1; |
| 674 | return -1; | 672 | |
| 675 | } | ||
| 676 | if (rt->rt_src == 0) { | 673 | if (rt->rt_src == 0) { |
| 677 | ip_rt_put(rt); | 674 | ip_rt_put(rt); |
| 678 | return -1; | 675 | return -1; |
| @@ -1439,8 +1436,6 @@ void ip_mc_destroy_dev(struct in_device *in_dev) | |||
| 1439 | /* RTNL is locked */ | 1436 | /* RTNL is locked */ |
| 1440 | static struct in_device *ip_mc_find_dev(struct net *net, struct ip_mreqn *imr) | 1437 | static struct in_device *ip_mc_find_dev(struct net *net, struct ip_mreqn *imr) |
| 1441 | { | 1438 | { |
| 1442 | struct flowi fl = { .fl4_dst = imr->imr_multiaddr.s_addr }; | ||
| 1443 | struct rtable *rt; | ||
| 1444 | struct net_device *dev = NULL; | 1439 | struct net_device *dev = NULL; |
| 1445 | struct in_device *idev = NULL; | 1440 | struct in_device *idev = NULL; |
| 1446 | 1441 | ||
| @@ -1454,9 +1449,14 @@ static struct in_device *ip_mc_find_dev(struct net *net, struct ip_mreqn *imr) | |||
| 1454 | return NULL; | 1449 | return NULL; |
| 1455 | } | 1450 | } |
| 1456 | 1451 | ||
| 1457 | if (!dev && !ip_route_output_key(net, &rt, &fl)) { | 1452 | if (!dev) { |
| 1458 | dev = rt->dst.dev; | 1453 | struct rtable *rt = ip_route_output(net, |
| 1459 | ip_rt_put(rt); | 1454 | imr->imr_multiaddr.s_addr, |
| 1455 | 0, 0, 0); | ||
| 1456 | if (!IS_ERR(rt)) { | ||
| 1457 | dev = rt->dst.dev; | ||
| 1458 | ip_rt_put(rt); | ||
| 1459 | } | ||
| 1460 | } | 1460 | } |
| 1461 | if (dev) { | 1461 | if (dev) { |
| 1462 | imr->imr_ifindex = dev->ifindex; | 1462 | imr->imr_ifindex = dev->ifindex; |
| @@ -2329,13 +2329,13 @@ void ip_mc_drop_socket(struct sock *sk) | |||
| 2329 | rtnl_unlock(); | 2329 | rtnl_unlock(); |
| 2330 | } | 2330 | } |
| 2331 | 2331 | ||
| 2332 | int ip_check_mc(struct in_device *in_dev, __be32 mc_addr, __be32 src_addr, u16 proto) | 2332 | /* called with rcu_read_lock() */ |
| 2333 | int ip_check_mc_rcu(struct in_device *in_dev, __be32 mc_addr, __be32 src_addr, u16 proto) | ||
| 2333 | { | 2334 | { |
| 2334 | struct ip_mc_list *im; | 2335 | struct ip_mc_list *im; |
| 2335 | struct ip_sf_list *psf; | 2336 | struct ip_sf_list *psf; |
| 2336 | int rv = 0; | 2337 | int rv = 0; |
| 2337 | 2338 | ||
| 2338 | rcu_read_lock(); | ||
| 2339 | for_each_pmc_rcu(in_dev, im) { | 2339 | for_each_pmc_rcu(in_dev, im) { |
| 2340 | if (im->multiaddr == mc_addr) | 2340 | if (im->multiaddr == mc_addr) |
| 2341 | break; | 2341 | break; |
| @@ -2357,7 +2357,6 @@ int ip_check_mc(struct in_device *in_dev, __be32 mc_addr, __be32 src_addr, u16 p | |||
| 2357 | } else | 2357 | } else |
| 2358 | rv = 1; /* unspecified source; tentatively allow */ | 2358 | rv = 1; /* unspecified source; tentatively allow */ |
| 2359 | } | 2359 | } |
| 2360 | rcu_read_unlock(); | ||
| 2361 | return rv; | 2360 | return rv; |
| 2362 | } | 2361 | } |
| 2363 | 2362 | ||
diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c index 97e5fb765265..6c0b7f4a3d7d 100644 --- a/net/ipv4/inet_connection_sock.c +++ b/net/ipv4/inet_connection_sock.c | |||
| @@ -356,20 +356,23 @@ struct dst_entry *inet_csk_route_req(struct sock *sk, | |||
| 356 | struct rtable *rt; | 356 | struct rtable *rt; |
| 357 | const struct inet_request_sock *ireq = inet_rsk(req); | 357 | const struct inet_request_sock *ireq = inet_rsk(req); |
| 358 | struct ip_options *opt = inet_rsk(req)->opt; | 358 | struct ip_options *opt = inet_rsk(req)->opt; |
| 359 | struct flowi fl = { .oif = sk->sk_bound_dev_if, | 359 | struct flowi4 fl4 = { |
| 360 | .mark = sk->sk_mark, | 360 | .flowi4_oif = sk->sk_bound_dev_if, |
| 361 | .fl4_dst = ((opt && opt->srr) ? | 361 | .flowi4_mark = sk->sk_mark, |
| 362 | opt->faddr : ireq->rmt_addr), | 362 | .daddr = ((opt && opt->srr) ? |
| 363 | .fl4_src = ireq->loc_addr, | 363 | opt->faddr : ireq->rmt_addr), |
| 364 | .fl4_tos = RT_CONN_FLAGS(sk), | 364 | .saddr = ireq->loc_addr, |
| 365 | .proto = sk->sk_protocol, | 365 | .flowi4_tos = RT_CONN_FLAGS(sk), |
| 366 | .flags = inet_sk_flowi_flags(sk), | 366 | .flowi4_proto = sk->sk_protocol, |
| 367 | .fl_ip_sport = inet_sk(sk)->inet_sport, | 367 | .flowi4_flags = inet_sk_flowi_flags(sk), |
| 368 | .fl_ip_dport = ireq->rmt_port }; | 368 | .fl4_sport = inet_sk(sk)->inet_sport, |
| 369 | .fl4_dport = ireq->rmt_port, | ||
| 370 | }; | ||
| 369 | struct net *net = sock_net(sk); | 371 | struct net *net = sock_net(sk); |
| 370 | 372 | ||
| 371 | security_req_classify_flow(req, &fl); | 373 | security_req_classify_flow(req, flowi4_to_flowi(&fl4)); |
| 372 | if (ip_route_output_flow(net, &rt, &fl, sk, 0)) | 374 | rt = ip_route_output_flow(net, &fl4, sk); |
| 375 | if (IS_ERR(rt)) | ||
| 373 | goto no_route; | 376 | goto no_route; |
| 374 | if (opt && opt->is_strictroute && rt->rt_dst != rt->rt_gateway) | 377 | if (opt && opt->is_strictroute && rt->rt_dst != rt->rt_gateway) |
| 375 | goto route_err; | 378 | goto route_err; |
diff --git a/net/ipv4/inet_diag.c b/net/ipv4/inet_diag.c index 2746c1fa6417..2ada17129fce 100644 --- a/net/ipv4/inet_diag.c +++ b/net/ipv4/inet_diag.c | |||
| @@ -858,7 +858,7 @@ static int inet_diag_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh) | |||
| 858 | nlmsg_len(nlh) < hdrlen) | 858 | nlmsg_len(nlh) < hdrlen) |
| 859 | return -EINVAL; | 859 | return -EINVAL; |
| 860 | 860 | ||
| 861 | if ((nlh->nlmsg_flags & NLM_F_DUMP) == NLM_F_DUMP) { | 861 | if (nlh->nlmsg_flags & NLM_F_DUMP) { |
| 862 | if (nlmsg_attrlen(nlh, hdrlen)) { | 862 | if (nlmsg_attrlen(nlh, hdrlen)) { |
| 863 | struct nlattr *attr; | 863 | struct nlattr *attr; |
| 864 | 864 | ||
diff --git a/net/ipv4/inet_timewait_sock.c b/net/ipv4/inet_timewait_sock.c index c5af909cf701..3c8dfa16614d 100644 --- a/net/ipv4/inet_timewait_sock.c +++ b/net/ipv4/inet_timewait_sock.c | |||
| @@ -505,7 +505,9 @@ restart: | |||
| 505 | } | 505 | } |
| 506 | 506 | ||
| 507 | rcu_read_unlock(); | 507 | rcu_read_unlock(); |
| 508 | local_bh_disable(); | ||
| 508 | inet_twsk_deschedule(tw, twdr); | 509 | inet_twsk_deschedule(tw, twdr); |
| 510 | local_bh_enable(); | ||
| 509 | inet_twsk_put(tw); | 511 | inet_twsk_put(tw); |
| 510 | goto restart_rcu; | 512 | goto restart_rcu; |
| 511 | } | 513 | } |
diff --git a/net/ipv4/inetpeer.c b/net/ipv4/inetpeer.c index d9bc85751c74..dd1b20eca1a2 100644 --- a/net/ipv4/inetpeer.c +++ b/net/ipv4/inetpeer.c | |||
| @@ -81,19 +81,19 @@ static const struct inet_peer peer_fake_node = { | |||
| 81 | 81 | ||
| 82 | struct inet_peer_base { | 82 | struct inet_peer_base { |
| 83 | struct inet_peer __rcu *root; | 83 | struct inet_peer __rcu *root; |
| 84 | spinlock_t lock; | 84 | seqlock_t lock; |
| 85 | int total; | 85 | int total; |
| 86 | }; | 86 | }; |
| 87 | 87 | ||
| 88 | static struct inet_peer_base v4_peers = { | 88 | static struct inet_peer_base v4_peers = { |
| 89 | .root = peer_avl_empty_rcu, | 89 | .root = peer_avl_empty_rcu, |
| 90 | .lock = __SPIN_LOCK_UNLOCKED(v4_peers.lock), | 90 | .lock = __SEQLOCK_UNLOCKED(v4_peers.lock), |
| 91 | .total = 0, | 91 | .total = 0, |
| 92 | }; | 92 | }; |
| 93 | 93 | ||
| 94 | static struct inet_peer_base v6_peers = { | 94 | static struct inet_peer_base v6_peers = { |
| 95 | .root = peer_avl_empty_rcu, | 95 | .root = peer_avl_empty_rcu, |
| 96 | .lock = __SPIN_LOCK_UNLOCKED(v6_peers.lock), | 96 | .lock = __SEQLOCK_UNLOCKED(v6_peers.lock), |
| 97 | .total = 0, | 97 | .total = 0, |
| 98 | }; | 98 | }; |
| 99 | 99 | ||
| @@ -167,9 +167,9 @@ static int addr_compare(const struct inetpeer_addr *a, | |||
| 167 | int i, n = (a->family == AF_INET ? 1 : 4); | 167 | int i, n = (a->family == AF_INET ? 1 : 4); |
| 168 | 168 | ||
| 169 | for (i = 0; i < n; i++) { | 169 | for (i = 0; i < n; i++) { |
| 170 | if (a->a6[i] == b->a6[i]) | 170 | if (a->addr.a6[i] == b->addr.a6[i]) |
| 171 | continue; | 171 | continue; |
| 172 | if (a->a6[i] < b->a6[i]) | 172 | if (a->addr.a6[i] < b->addr.a6[i]) |
| 173 | return -1; | 173 | return -1; |
| 174 | return 1; | 174 | return 1; |
| 175 | } | 175 | } |
| @@ -177,6 +177,9 @@ static int addr_compare(const struct inetpeer_addr *a, | |||
| 177 | return 0; | 177 | return 0; |
| 178 | } | 178 | } |
| 179 | 179 | ||
| 180 | #define rcu_deref_locked(X, BASE) \ | ||
| 181 | rcu_dereference_protected(X, lockdep_is_held(&(BASE)->lock.lock)) | ||
| 182 | |||
| 180 | /* | 183 | /* |
| 181 | * Called with local BH disabled and the pool lock held. | 184 | * Called with local BH disabled and the pool lock held. |
| 182 | */ | 185 | */ |
| @@ -187,8 +190,7 @@ static int addr_compare(const struct inetpeer_addr *a, | |||
| 187 | \ | 190 | \ |
| 188 | stackptr = _stack; \ | 191 | stackptr = _stack; \ |
| 189 | *stackptr++ = &_base->root; \ | 192 | *stackptr++ = &_base->root; \ |
| 190 | for (u = rcu_dereference_protected(_base->root, \ | 193 | for (u = rcu_deref_locked(_base->root, _base); \ |
| 191 | lockdep_is_held(&_base->lock)); \ | ||
| 192 | u != peer_avl_empty; ) { \ | 194 | u != peer_avl_empty; ) { \ |
| 193 | int cmp = addr_compare(_daddr, &u->daddr); \ | 195 | int cmp = addr_compare(_daddr, &u->daddr); \ |
| 194 | if (cmp == 0) \ | 196 | if (cmp == 0) \ |
| @@ -198,23 +200,22 @@ static int addr_compare(const struct inetpeer_addr *a, | |||
| 198 | else \ | 200 | else \ |
| 199 | v = &u->avl_right; \ | 201 | v = &u->avl_right; \ |
| 200 | *stackptr++ = v; \ | 202 | *stackptr++ = v; \ |
| 201 | u = rcu_dereference_protected(*v, \ | 203 | u = rcu_deref_locked(*v, _base); \ |
| 202 | lockdep_is_held(&_base->lock)); \ | ||
| 203 | } \ | 204 | } \ |
| 204 | u; \ | 205 | u; \ |
| 205 | }) | 206 | }) |
| 206 | 207 | ||
| 207 | /* | 208 | /* |
| 208 | * Called with rcu_read_lock_bh() | 209 | * Called with rcu_read_lock() |
| 209 | * Because we hold no lock against a writer, its quite possible we fall | 210 | * Because we hold no lock against a writer, its quite possible we fall |
| 210 | * in an endless loop. | 211 | * in an endless loop. |
| 211 | * But every pointer we follow is guaranteed to be valid thanks to RCU. | 212 | * But every pointer we follow is guaranteed to be valid thanks to RCU. |
| 212 | * We exit from this function if number of links exceeds PEER_MAXDEPTH | 213 | * We exit from this function if number of links exceeds PEER_MAXDEPTH |
| 213 | */ | 214 | */ |
| 214 | static struct inet_peer *lookup_rcu_bh(const struct inetpeer_addr *daddr, | 215 | static struct inet_peer *lookup_rcu(const struct inetpeer_addr *daddr, |
| 215 | struct inet_peer_base *base) | 216 | struct inet_peer_base *base) |
| 216 | { | 217 | { |
| 217 | struct inet_peer *u = rcu_dereference_bh(base->root); | 218 | struct inet_peer *u = rcu_dereference(base->root); |
| 218 | int count = 0; | 219 | int count = 0; |
| 219 | 220 | ||
| 220 | while (u != peer_avl_empty) { | 221 | while (u != peer_avl_empty) { |
| @@ -230,9 +231,9 @@ static struct inet_peer *lookup_rcu_bh(const struct inetpeer_addr *daddr, | |||
| 230 | return u; | 231 | return u; |
| 231 | } | 232 | } |
| 232 | if (cmp == -1) | 233 | if (cmp == -1) |
| 233 | u = rcu_dereference_bh(u->avl_left); | 234 | u = rcu_dereference(u->avl_left); |
| 234 | else | 235 | else |
| 235 | u = rcu_dereference_bh(u->avl_right); | 236 | u = rcu_dereference(u->avl_right); |
| 236 | if (unlikely(++count == PEER_MAXDEPTH)) | 237 | if (unlikely(++count == PEER_MAXDEPTH)) |
| 237 | break; | 238 | break; |
| 238 | } | 239 | } |
| @@ -246,13 +247,11 @@ static struct inet_peer *lookup_rcu_bh(const struct inetpeer_addr *daddr, | |||
| 246 | struct inet_peer __rcu **v; \ | 247 | struct inet_peer __rcu **v; \ |
| 247 | *stackptr++ = &start->avl_left; \ | 248 | *stackptr++ = &start->avl_left; \ |
| 248 | v = &start->avl_left; \ | 249 | v = &start->avl_left; \ |
| 249 | for (u = rcu_dereference_protected(*v, \ | 250 | for (u = rcu_deref_locked(*v, base); \ |
| 250 | lockdep_is_held(&base->lock)); \ | ||
| 251 | u->avl_right != peer_avl_empty_rcu; ) { \ | 251 | u->avl_right != peer_avl_empty_rcu; ) { \ |
| 252 | v = &u->avl_right; \ | 252 | v = &u->avl_right; \ |
| 253 | *stackptr++ = v; \ | 253 | *stackptr++ = v; \ |
| 254 | u = rcu_dereference_protected(*v, \ | 254 | u = rcu_deref_locked(*v, base); \ |
| 255 | lockdep_is_held(&base->lock)); \ | ||
| 256 | } \ | 255 | } \ |
| 257 | u; \ | 256 | u; \ |
| 258 | }) | 257 | }) |
| @@ -271,21 +270,16 @@ static void peer_avl_rebalance(struct inet_peer __rcu **stack[], | |||
| 271 | 270 | ||
| 272 | while (stackend > stack) { | 271 | while (stackend > stack) { |
| 273 | nodep = *--stackend; | 272 | nodep = *--stackend; |
| 274 | node = rcu_dereference_protected(*nodep, | 273 | node = rcu_deref_locked(*nodep, base); |
| 275 | lockdep_is_held(&base->lock)); | 274 | l = rcu_deref_locked(node->avl_left, base); |
| 276 | l = rcu_dereference_protected(node->avl_left, | 275 | r = rcu_deref_locked(node->avl_right, base); |
| 277 | lockdep_is_held(&base->lock)); | ||
| 278 | r = rcu_dereference_protected(node->avl_right, | ||
| 279 | lockdep_is_held(&base->lock)); | ||
| 280 | lh = node_height(l); | 276 | lh = node_height(l); |
| 281 | rh = node_height(r); | 277 | rh = node_height(r); |
| 282 | if (lh > rh + 1) { /* l: RH+2 */ | 278 | if (lh > rh + 1) { /* l: RH+2 */ |
| 283 | struct inet_peer *ll, *lr, *lrl, *lrr; | 279 | struct inet_peer *ll, *lr, *lrl, *lrr; |
| 284 | int lrh; | 280 | int lrh; |
| 285 | ll = rcu_dereference_protected(l->avl_left, | 281 | ll = rcu_deref_locked(l->avl_left, base); |
| 286 | lockdep_is_held(&base->lock)); | 282 | lr = rcu_deref_locked(l->avl_right, base); |
| 287 | lr = rcu_dereference_protected(l->avl_right, | ||
| 288 | lockdep_is_held(&base->lock)); | ||
| 289 | lrh = node_height(lr); | 283 | lrh = node_height(lr); |
| 290 | if (lrh <= node_height(ll)) { /* ll: RH+1 */ | 284 | if (lrh <= node_height(ll)) { /* ll: RH+1 */ |
| 291 | RCU_INIT_POINTER(node->avl_left, lr); /* lr: RH or RH+1 */ | 285 | RCU_INIT_POINTER(node->avl_left, lr); /* lr: RH or RH+1 */ |
| @@ -296,10 +290,8 @@ static void peer_avl_rebalance(struct inet_peer __rcu **stack[], | |||
| 296 | l->avl_height = node->avl_height + 1; | 290 | l->avl_height = node->avl_height + 1; |
| 297 | RCU_INIT_POINTER(*nodep, l); | 291 | RCU_INIT_POINTER(*nodep, l); |
| 298 | } else { /* ll: RH, lr: RH+1 */ | 292 | } else { /* ll: RH, lr: RH+1 */ |
| 299 | lrl = rcu_dereference_protected(lr->avl_left, | 293 | lrl = rcu_deref_locked(lr->avl_left, base);/* lrl: RH or RH-1 */ |
| 300 | lockdep_is_held(&base->lock)); /* lrl: RH or RH-1 */ | 294 | lrr = rcu_deref_locked(lr->avl_right, base);/* lrr: RH or RH-1 */ |
| 301 | lrr = rcu_dereference_protected(lr->avl_right, | ||
| 302 | lockdep_is_held(&base->lock)); /* lrr: RH or RH-1 */ | ||
| 303 | RCU_INIT_POINTER(node->avl_left, lrr); /* lrr: RH or RH-1 */ | 295 | RCU_INIT_POINTER(node->avl_left, lrr); /* lrr: RH or RH-1 */ |
| 304 | RCU_INIT_POINTER(node->avl_right, r); /* r: RH */ | 296 | RCU_INIT_POINTER(node->avl_right, r); /* r: RH */ |
| 305 | node->avl_height = rh + 1; /* node: RH+1 */ | 297 | node->avl_height = rh + 1; /* node: RH+1 */ |
| @@ -314,10 +306,8 @@ static void peer_avl_rebalance(struct inet_peer __rcu **stack[], | |||
| 314 | } else if (rh > lh + 1) { /* r: LH+2 */ | 306 | } else if (rh > lh + 1) { /* r: LH+2 */ |
| 315 | struct inet_peer *rr, *rl, *rlr, *rll; | 307 | struct inet_peer *rr, *rl, *rlr, *rll; |
| 316 | int rlh; | 308 | int rlh; |
| 317 | rr = rcu_dereference_protected(r->avl_right, | 309 | rr = rcu_deref_locked(r->avl_right, base); |
| 318 | lockdep_is_held(&base->lock)); | 310 | rl = rcu_deref_locked(r->avl_left, base); |
| 319 | rl = rcu_dereference_protected(r->avl_left, | ||
| 320 | lockdep_is_held(&base->lock)); | ||
| 321 | rlh = node_height(rl); | 311 | rlh = node_height(rl); |
| 322 | if (rlh <= node_height(rr)) { /* rr: LH+1 */ | 312 | if (rlh <= node_height(rr)) { /* rr: LH+1 */ |
| 323 | RCU_INIT_POINTER(node->avl_right, rl); /* rl: LH or LH+1 */ | 313 | RCU_INIT_POINTER(node->avl_right, rl); /* rl: LH or LH+1 */ |
| @@ -328,10 +318,8 @@ static void peer_avl_rebalance(struct inet_peer __rcu **stack[], | |||
| 328 | r->avl_height = node->avl_height + 1; | 318 | r->avl_height = node->avl_height + 1; |
| 329 | RCU_INIT_POINTER(*nodep, r); | 319 | RCU_INIT_POINTER(*nodep, r); |
| 330 | } else { /* rr: RH, rl: RH+1 */ | 320 | } else { /* rr: RH, rl: RH+1 */ |
| 331 | rlr = rcu_dereference_protected(rl->avl_right, | 321 | rlr = rcu_deref_locked(rl->avl_right, base);/* rlr: LH or LH-1 */ |
| 332 | lockdep_is_held(&base->lock)); /* rlr: LH or LH-1 */ | 322 | rll = rcu_deref_locked(rl->avl_left, base);/* rll: LH or LH-1 */ |
| 333 | rll = rcu_dereference_protected(rl->avl_left, | ||
| 334 | lockdep_is_held(&base->lock)); /* rll: LH or LH-1 */ | ||
| 335 | RCU_INIT_POINTER(node->avl_right, rll); /* rll: LH or LH-1 */ | 323 | RCU_INIT_POINTER(node->avl_right, rll); /* rll: LH or LH-1 */ |
| 336 | RCU_INIT_POINTER(node->avl_left, l); /* l: LH */ | 324 | RCU_INIT_POINTER(node->avl_left, l); /* l: LH */ |
| 337 | node->avl_height = lh + 1; /* node: LH+1 */ | 325 | node->avl_height = lh + 1; /* node: LH+1 */ |
| @@ -372,7 +360,7 @@ static void unlink_from_pool(struct inet_peer *p, struct inet_peer_base *base) | |||
| 372 | 360 | ||
| 373 | do_free = 0; | 361 | do_free = 0; |
| 374 | 362 | ||
| 375 | spin_lock_bh(&base->lock); | 363 | write_seqlock_bh(&base->lock); |
| 376 | /* Check the reference counter. It was artificially incremented by 1 | 364 | /* Check the reference counter. It was artificially incremented by 1 |
| 377 | * in cleanup() function to prevent sudden disappearing. If we can | 365 | * in cleanup() function to prevent sudden disappearing. If we can |
| 378 | * atomically (because of lockless readers) take this last reference, | 366 | * atomically (because of lockless readers) take this last reference, |
| @@ -392,8 +380,7 @@ static void unlink_from_pool(struct inet_peer *p, struct inet_peer_base *base) | |||
| 392 | /* look for a node to insert instead of p */ | 380 | /* look for a node to insert instead of p */ |
| 393 | struct inet_peer *t; | 381 | struct inet_peer *t; |
| 394 | t = lookup_rightempty(p, base); | 382 | t = lookup_rightempty(p, base); |
| 395 | BUG_ON(rcu_dereference_protected(*stackptr[-1], | 383 | BUG_ON(rcu_deref_locked(*stackptr[-1], base) != t); |
| 396 | lockdep_is_held(&base->lock)) != t); | ||
| 397 | **--stackptr = t->avl_left; | 384 | **--stackptr = t->avl_left; |
| 398 | /* t is removed, t->daddr > x->daddr for any | 385 | /* t is removed, t->daddr > x->daddr for any |
| 399 | * x in p->avl_left subtree. | 386 | * x in p->avl_left subtree. |
| @@ -409,10 +396,10 @@ static void unlink_from_pool(struct inet_peer *p, struct inet_peer_base *base) | |||
| 409 | base->total--; | 396 | base->total--; |
| 410 | do_free = 1; | 397 | do_free = 1; |
| 411 | } | 398 | } |
| 412 | spin_unlock_bh(&base->lock); | 399 | write_sequnlock_bh(&base->lock); |
| 413 | 400 | ||
| 414 | if (do_free) | 401 | if (do_free) |
| 415 | call_rcu_bh(&p->rcu, inetpeer_free_rcu); | 402 | call_rcu(&p->rcu, inetpeer_free_rcu); |
| 416 | else | 403 | else |
| 417 | /* The node is used again. Decrease the reference counter | 404 | /* The node is used again. Decrease the reference counter |
| 418 | * back. The loop "cleanup -> unlink_from_unused | 405 | * back. The loop "cleanup -> unlink_from_unused |
| @@ -475,15 +462,19 @@ static int cleanup_once(unsigned long ttl) | |||
| 475 | struct inet_peer *inet_getpeer(struct inetpeer_addr *daddr, int create) | 462 | struct inet_peer *inet_getpeer(struct inetpeer_addr *daddr, int create) |
| 476 | { | 463 | { |
| 477 | struct inet_peer __rcu **stack[PEER_MAXDEPTH], ***stackptr; | 464 | struct inet_peer __rcu **stack[PEER_MAXDEPTH], ***stackptr; |
| 478 | struct inet_peer_base *base = family_to_base(AF_INET); | 465 | struct inet_peer_base *base = family_to_base(daddr->family); |
| 479 | struct inet_peer *p; | 466 | struct inet_peer *p; |
| 467 | unsigned int sequence; | ||
| 468 | int invalidated; | ||
| 480 | 469 | ||
| 481 | /* Look up for the address quickly, lockless. | 470 | /* Look up for the address quickly, lockless. |
| 482 | * Because of a concurrent writer, we might not find an existing entry. | 471 | * Because of a concurrent writer, we might not find an existing entry. |
| 483 | */ | 472 | */ |
| 484 | rcu_read_lock_bh(); | 473 | rcu_read_lock(); |
| 485 | p = lookup_rcu_bh(daddr, base); | 474 | sequence = read_seqbegin(&base->lock); |
| 486 | rcu_read_unlock_bh(); | 475 | p = lookup_rcu(daddr, base); |
| 476 | invalidated = read_seqretry(&base->lock, sequence); | ||
| 477 | rcu_read_unlock(); | ||
| 487 | 478 | ||
| 488 | if (p) { | 479 | if (p) { |
| 489 | /* The existing node has been found. | 480 | /* The existing node has been found. |
| @@ -493,14 +484,18 @@ struct inet_peer *inet_getpeer(struct inetpeer_addr *daddr, int create) | |||
| 493 | return p; | 484 | return p; |
| 494 | } | 485 | } |
| 495 | 486 | ||
| 487 | /* If no writer did a change during our lookup, we can return early. */ | ||
| 488 | if (!create && !invalidated) | ||
| 489 | return NULL; | ||
| 490 | |||
| 496 | /* retry an exact lookup, taking the lock before. | 491 | /* retry an exact lookup, taking the lock before. |
| 497 | * At least, nodes should be hot in our cache. | 492 | * At least, nodes should be hot in our cache. |
| 498 | */ | 493 | */ |
| 499 | spin_lock_bh(&base->lock); | 494 | write_seqlock_bh(&base->lock); |
| 500 | p = lookup(daddr, stack, base); | 495 | p = lookup(daddr, stack, base); |
| 501 | if (p != peer_avl_empty) { | 496 | if (p != peer_avl_empty) { |
| 502 | atomic_inc(&p->refcnt); | 497 | atomic_inc(&p->refcnt); |
| 503 | spin_unlock_bh(&base->lock); | 498 | write_sequnlock_bh(&base->lock); |
| 504 | /* Remove the entry from unused list if it was there. */ | 499 | /* Remove the entry from unused list if it was there. */ |
| 505 | unlink_from_unused(p); | 500 | unlink_from_unused(p); |
| 506 | return p; | 501 | return p; |
| @@ -510,8 +505,14 @@ struct inet_peer *inet_getpeer(struct inetpeer_addr *daddr, int create) | |||
| 510 | p->daddr = *daddr; | 505 | p->daddr = *daddr; |
| 511 | atomic_set(&p->refcnt, 1); | 506 | atomic_set(&p->refcnt, 1); |
| 512 | atomic_set(&p->rid, 0); | 507 | atomic_set(&p->rid, 0); |
| 513 | atomic_set(&p->ip_id_count, secure_ip_id(daddr->a4)); | 508 | atomic_set(&p->ip_id_count, secure_ip_id(daddr->addr.a4)); |
| 514 | p->tcp_ts_stamp = 0; | 509 | p->tcp_ts_stamp = 0; |
| 510 | p->metrics[RTAX_LOCK-1] = INETPEER_METRICS_NEW; | ||
| 511 | p->rate_tokens = 0; | ||
| 512 | p->rate_last = 0; | ||
| 513 | p->pmtu_expires = 0; | ||
| 514 | p->pmtu_orig = 0; | ||
| 515 | memset(&p->redirect_learned, 0, sizeof(p->redirect_learned)); | ||
| 515 | INIT_LIST_HEAD(&p->unused); | 516 | INIT_LIST_HEAD(&p->unused); |
| 516 | 517 | ||
| 517 | 518 | ||
| @@ -519,7 +520,7 @@ struct inet_peer *inet_getpeer(struct inetpeer_addr *daddr, int create) | |||
| 519 | link_to_pool(p, base); | 520 | link_to_pool(p, base); |
| 520 | base->total++; | 521 | base->total++; |
| 521 | } | 522 | } |
| 522 | spin_unlock_bh(&base->lock); | 523 | write_sequnlock_bh(&base->lock); |
| 523 | 524 | ||
| 524 | if (base->total >= inet_peer_threshold) | 525 | if (base->total >= inet_peer_threshold) |
| 525 | /* Remove one less-recently-used entry. */ | 526 | /* Remove one less-recently-used entry. */ |
| @@ -579,3 +580,44 @@ void inet_putpeer(struct inet_peer *p) | |||
| 579 | local_bh_enable(); | 580 | local_bh_enable(); |
| 580 | } | 581 | } |
| 581 | EXPORT_SYMBOL_GPL(inet_putpeer); | 582 | EXPORT_SYMBOL_GPL(inet_putpeer); |
| 583 | |||
| 584 | /* | ||
| 585 | * Check transmit rate limitation for given message. | ||
| 586 | * The rate information is held in the inet_peer entries now. | ||
| 587 | * This function is generic and could be used for other purposes | ||
| 588 | * too. It uses a Token bucket filter as suggested by Alexey Kuznetsov. | ||
| 589 | * | ||
| 590 | * Note that the same inet_peer fields are modified by functions in | ||
| 591 | * route.c too, but these work for packet destinations while xrlim_allow | ||
| 592 | * works for icmp destinations. This means the rate limiting information | ||
| 593 | * for one "ip object" is shared - and these ICMPs are twice limited: | ||
| 594 | * by source and by destination. | ||
| 595 | * | ||
| 596 | * RFC 1812: 4.3.2.8 SHOULD be able to limit error message rate | ||
| 597 | * SHOULD allow setting of rate limits | ||
| 598 | * | ||
| 599 | * Shared between ICMPv4 and ICMPv6. | ||
| 600 | */ | ||
| 601 | #define XRLIM_BURST_FACTOR 6 | ||
| 602 | bool inet_peer_xrlim_allow(struct inet_peer *peer, int timeout) | ||
| 603 | { | ||
| 604 | unsigned long now, token; | ||
| 605 | bool rc = false; | ||
| 606 | |||
| 607 | if (!peer) | ||
| 608 | return true; | ||
| 609 | |||
| 610 | token = peer->rate_tokens; | ||
| 611 | now = jiffies; | ||
| 612 | token += now - peer->rate_last; | ||
| 613 | peer->rate_last = now; | ||
| 614 | if (token > XRLIM_BURST_FACTOR * timeout) | ||
| 615 | token = XRLIM_BURST_FACTOR * timeout; | ||
| 616 | if (token >= timeout) { | ||
| 617 | token -= timeout; | ||
| 618 | rc = true; | ||
| 619 | } | ||
| 620 | peer->rate_tokens = token; | ||
| 621 | return rc; | ||
| 622 | } | ||
| 623 | EXPORT_SYMBOL(inet_peer_xrlim_allow); | ||
diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c index eb68a0e34e49..da5941f18c3c 100644 --- a/net/ipv4/ip_gre.c +++ b/net/ipv4/ip_gre.c | |||
| @@ -769,18 +769,12 @@ static netdev_tx_t ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev | |||
| 769 | tos = ipv6_get_dsfield((struct ipv6hdr *)old_iph); | 769 | tos = ipv6_get_dsfield((struct ipv6hdr *)old_iph); |
| 770 | } | 770 | } |
| 771 | 771 | ||
| 772 | { | 772 | rt = ip_route_output_gre(dev_net(dev), dst, tiph->saddr, |
| 773 | struct flowi fl = { | 773 | tunnel->parms.o_key, RT_TOS(tos), |
| 774 | .oif = tunnel->parms.link, | 774 | tunnel->parms.link); |
| 775 | .fl4_dst = dst, | 775 | if (IS_ERR(rt)) { |
| 776 | .fl4_src = tiph->saddr, | 776 | dev->stats.tx_carrier_errors++; |
| 777 | .fl4_tos = RT_TOS(tos), | 777 | goto tx_error; |
| 778 | .fl_gre_key = tunnel->parms.o_key | ||
| 779 | }; | ||
| 780 | if (ip_route_output_key(dev_net(dev), &rt, &fl)) { | ||
| 781 | dev->stats.tx_carrier_errors++; | ||
| 782 | goto tx_error; | ||
| 783 | } | ||
| 784 | } | 778 | } |
| 785 | tdev = rt->dst.dev; | 779 | tdev = rt->dst.dev; |
| 786 | 780 | ||
| @@ -944,17 +938,13 @@ static int ipgre_tunnel_bind_dev(struct net_device *dev) | |||
| 944 | /* Guess output device to choose reasonable mtu and needed_headroom */ | 938 | /* Guess output device to choose reasonable mtu and needed_headroom */ |
| 945 | 939 | ||
| 946 | if (iph->daddr) { | 940 | if (iph->daddr) { |
| 947 | struct flowi fl = { | 941 | struct rtable *rt = ip_route_output_gre(dev_net(dev), |
| 948 | .oif = tunnel->parms.link, | 942 | iph->daddr, iph->saddr, |
| 949 | .fl4_dst = iph->daddr, | 943 | tunnel->parms.o_key, |
| 950 | .fl4_src = iph->saddr, | 944 | RT_TOS(iph->tos), |
| 951 | .fl4_tos = RT_TOS(iph->tos), | 945 | tunnel->parms.link); |
| 952 | .proto = IPPROTO_GRE, | 946 | |
| 953 | .fl_gre_key = tunnel->parms.o_key | 947 | if (!IS_ERR(rt)) { |
| 954 | }; | ||
| 955 | struct rtable *rt; | ||
| 956 | |||
| 957 | if (!ip_route_output_key(dev_net(dev), &rt, &fl)) { | ||
| 958 | tdev = rt->dst.dev; | 948 | tdev = rt->dst.dev; |
| 959 | ip_rt_put(rt); | 949 | ip_rt_put(rt); |
| 960 | } | 950 | } |
| @@ -1206,17 +1196,14 @@ static int ipgre_open(struct net_device *dev) | |||
| 1206 | struct ip_tunnel *t = netdev_priv(dev); | 1196 | struct ip_tunnel *t = netdev_priv(dev); |
| 1207 | 1197 | ||
| 1208 | if (ipv4_is_multicast(t->parms.iph.daddr)) { | 1198 | if (ipv4_is_multicast(t->parms.iph.daddr)) { |
| 1209 | struct flowi fl = { | 1199 | struct rtable *rt = ip_route_output_gre(dev_net(dev), |
| 1210 | .oif = t->parms.link, | 1200 | t->parms.iph.daddr, |
| 1211 | .fl4_dst = t->parms.iph.daddr, | 1201 | t->parms.iph.saddr, |
| 1212 | .fl4_src = t->parms.iph.saddr, | 1202 | t->parms.o_key, |
| 1213 | .fl4_tos = RT_TOS(t->parms.iph.tos), | 1203 | RT_TOS(t->parms.iph.tos), |
| 1214 | .proto = IPPROTO_GRE, | 1204 | t->parms.link); |
| 1215 | .fl_gre_key = t->parms.o_key | 1205 | |
| 1216 | }; | 1206 | if (IS_ERR(rt)) |
| 1217 | struct rtable *rt; | ||
| 1218 | |||
| 1219 | if (ip_route_output_key(dev_net(dev), &rt, &fl)) | ||
| 1220 | return -EADDRNOTAVAIL; | 1207 | return -EADDRNOTAVAIL; |
| 1221 | dev = rt->dst.dev; | 1208 | dev = rt->dst.dev; |
| 1222 | ip_rt_put(rt); | 1209 | ip_rt_put(rt); |
| @@ -1764,4 +1751,4 @@ module_exit(ipgre_fini); | |||
| 1764 | MODULE_LICENSE("GPL"); | 1751 | MODULE_LICENSE("GPL"); |
| 1765 | MODULE_ALIAS_RTNL_LINK("gre"); | 1752 | MODULE_ALIAS_RTNL_LINK("gre"); |
| 1766 | MODULE_ALIAS_RTNL_LINK("gretap"); | 1753 | MODULE_ALIAS_RTNL_LINK("gretap"); |
| 1767 | MODULE_ALIAS("gre0"); | 1754 | MODULE_ALIAS_NETDEV("gre0"); |
diff --git a/net/ipv4/ip_input.c b/net/ipv4/ip_input.c index d859bcc26cb7..d7b2b0987a3b 100644 --- a/net/ipv4/ip_input.c +++ b/net/ipv4/ip_input.c | |||
| @@ -340,7 +340,7 @@ static int ip_rcv_finish(struct sk_buff *skb) | |||
| 340 | } | 340 | } |
| 341 | } | 341 | } |
| 342 | 342 | ||
| 343 | #ifdef CONFIG_NET_CLS_ROUTE | 343 | #ifdef CONFIG_IP_ROUTE_CLASSID |
| 344 | if (unlikely(skb_dst(skb)->tclassid)) { | 344 | if (unlikely(skb_dst(skb)->tclassid)) { |
| 345 | struct ip_rt_acct *st = this_cpu_ptr(ip_rt_acct); | 345 | struct ip_rt_acct *st = this_cpu_ptr(ip_rt_acct); |
| 346 | u32 idx = skb_dst(skb)->tclassid; | 346 | u32 idx = skb_dst(skb)->tclassid; |
diff --git a/net/ipv4/ip_options.c b/net/ipv4/ip_options.c index 1906fa35860c..28a736f3442f 100644 --- a/net/ipv4/ip_options.c +++ b/net/ipv4/ip_options.c | |||
| @@ -140,11 +140,11 @@ int ip_options_echo(struct ip_options * dopt, struct sk_buff * skb) | |||
| 140 | } else { | 140 | } else { |
| 141 | dopt->ts_needtime = 0; | 141 | dopt->ts_needtime = 0; |
| 142 | 142 | ||
| 143 | if (soffset + 8 <= optlen) { | 143 | if (soffset + 7 <= optlen) { |
| 144 | __be32 addr; | 144 | __be32 addr; |
| 145 | 145 | ||
| 146 | memcpy(&addr, sptr+soffset-1, 4); | 146 | memcpy(&addr, dptr+soffset-1, 4); |
| 147 | if (inet_addr_type(dev_net(skb_dst(skb)->dev), addr) != RTN_LOCAL) { | 147 | if (inet_addr_type(dev_net(skb_dst(skb)->dev), addr) != RTN_UNICAST) { |
| 148 | dopt->ts_needtime = 1; | 148 | dopt->ts_needtime = 1; |
| 149 | soffset += 8; | 149 | soffset += 8; |
| 150 | } | 150 | } |
diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index 04c7b3ba6b39..67f241b97649 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c | |||
| @@ -339,25 +339,19 @@ int ip_queue_xmit(struct sk_buff *skb) | |||
| 339 | if(opt && opt->srr) | 339 | if(opt && opt->srr) |
| 340 | daddr = opt->faddr; | 340 | daddr = opt->faddr; |
| 341 | 341 | ||
| 342 | { | 342 | /* If this fails, retransmit mechanism of transport layer will |
| 343 | struct flowi fl = { .oif = sk->sk_bound_dev_if, | 343 | * keep trying until route appears or the connection times |
| 344 | .mark = sk->sk_mark, | 344 | * itself out. |
| 345 | .fl4_dst = daddr, | 345 | */ |
| 346 | .fl4_src = inet->inet_saddr, | 346 | rt = ip_route_output_ports(sock_net(sk), sk, |
| 347 | .fl4_tos = RT_CONN_FLAGS(sk), | 347 | daddr, inet->inet_saddr, |
| 348 | .proto = sk->sk_protocol, | 348 | inet->inet_dport, |
| 349 | .flags = inet_sk_flowi_flags(sk), | 349 | inet->inet_sport, |
| 350 | .fl_ip_sport = inet->inet_sport, | 350 | sk->sk_protocol, |
| 351 | .fl_ip_dport = inet->inet_dport }; | 351 | RT_CONN_FLAGS(sk), |
| 352 | 352 | sk->sk_bound_dev_if); | |
| 353 | /* If this fails, retransmit mechanism of transport layer will | 353 | if (IS_ERR(rt)) |
| 354 | * keep trying until route appears or the connection times | 354 | goto no_route; |
| 355 | * itself out. | ||
| 356 | */ | ||
| 357 | security_sk_classify_flow(sk, &fl); | ||
| 358 | if (ip_route_output_flow(sock_net(sk), &rt, &fl, sk, 0)) | ||
| 359 | goto no_route; | ||
| 360 | } | ||
| 361 | sk_setup_caps(sk, &rt->dst); | 355 | sk_setup_caps(sk, &rt->dst); |
| 362 | } | 356 | } |
| 363 | skb_dst_set_noref(skb, &rt->dst); | 357 | skb_dst_set_noref(skb, &rt->dst); |
| @@ -733,6 +727,7 @@ csum_page(struct page *page, int offset, int copy) | |||
| 733 | } | 727 | } |
| 734 | 728 | ||
| 735 | static inline int ip_ufo_append_data(struct sock *sk, | 729 | static inline int ip_ufo_append_data(struct sock *sk, |
| 730 | struct sk_buff_head *queue, | ||
| 736 | int getfrag(void *from, char *to, int offset, int len, | 731 | int getfrag(void *from, char *to, int offset, int len, |
| 737 | int odd, struct sk_buff *skb), | 732 | int odd, struct sk_buff *skb), |
| 738 | void *from, int length, int hh_len, int fragheaderlen, | 733 | void *from, int length, int hh_len, int fragheaderlen, |
| @@ -745,7 +740,7 @@ static inline int ip_ufo_append_data(struct sock *sk, | |||
| 745 | * device, so create one single skb packet containing complete | 740 | * device, so create one single skb packet containing complete |
| 746 | * udp datagram | 741 | * udp datagram |
| 747 | */ | 742 | */ |
| 748 | if ((skb = skb_peek_tail(&sk->sk_write_queue)) == NULL) { | 743 | if ((skb = skb_peek_tail(queue)) == NULL) { |
| 749 | skb = sock_alloc_send_skb(sk, | 744 | skb = sock_alloc_send_skb(sk, |
| 750 | hh_len + fragheaderlen + transhdrlen + 20, | 745 | hh_len + fragheaderlen + transhdrlen + 20, |
| 751 | (flags & MSG_DONTWAIT), &err); | 746 | (flags & MSG_DONTWAIT), &err); |
| @@ -767,40 +762,28 @@ static inline int ip_ufo_append_data(struct sock *sk, | |||
| 767 | 762 | ||
| 768 | skb->ip_summed = CHECKSUM_PARTIAL; | 763 | skb->ip_summed = CHECKSUM_PARTIAL; |
| 769 | skb->csum = 0; | 764 | skb->csum = 0; |
| 770 | sk->sk_sndmsg_off = 0; | ||
| 771 | 765 | ||
| 772 | /* specify the length of each IP datagram fragment */ | 766 | /* specify the length of each IP datagram fragment */ |
| 773 | skb_shinfo(skb)->gso_size = mtu - fragheaderlen; | 767 | skb_shinfo(skb)->gso_size = mtu - fragheaderlen; |
| 774 | skb_shinfo(skb)->gso_type = SKB_GSO_UDP; | 768 | skb_shinfo(skb)->gso_type = SKB_GSO_UDP; |
| 775 | __skb_queue_tail(&sk->sk_write_queue, skb); | 769 | __skb_queue_tail(queue, skb); |
| 776 | } | 770 | } |
| 777 | 771 | ||
| 778 | return skb_append_datato_frags(sk, skb, getfrag, from, | 772 | return skb_append_datato_frags(sk, skb, getfrag, from, |
| 779 | (length - transhdrlen)); | 773 | (length - transhdrlen)); |
| 780 | } | 774 | } |
| 781 | 775 | ||
| 782 | /* | 776 | static int __ip_append_data(struct sock *sk, struct sk_buff_head *queue, |
| 783 | * ip_append_data() and ip_append_page() can make one large IP datagram | 777 | struct inet_cork *cork, |
| 784 | * from many pieces of data. Each pieces will be holded on the socket | 778 | int getfrag(void *from, char *to, int offset, |
| 785 | * until ip_push_pending_frames() is called. Each piece can be a page | 779 | int len, int odd, struct sk_buff *skb), |
| 786 | * or non-page data. | 780 | void *from, int length, int transhdrlen, |
| 787 | * | 781 | unsigned int flags) |
| 788 | * Not only UDP, other transport protocols - e.g. raw sockets - can use | ||
| 789 | * this interface potentially. | ||
| 790 | * | ||
| 791 | * LATER: length must be adjusted by pad at tail, when it is required. | ||
| 792 | */ | ||
| 793 | int ip_append_data(struct sock *sk, | ||
| 794 | int getfrag(void *from, char *to, int offset, int len, | ||
| 795 | int odd, struct sk_buff *skb), | ||
| 796 | void *from, int length, int transhdrlen, | ||
| 797 | struct ipcm_cookie *ipc, struct rtable **rtp, | ||
| 798 | unsigned int flags) | ||
| 799 | { | 782 | { |
| 800 | struct inet_sock *inet = inet_sk(sk); | 783 | struct inet_sock *inet = inet_sk(sk); |
| 801 | struct sk_buff *skb; | 784 | struct sk_buff *skb; |
| 802 | 785 | ||
| 803 | struct ip_options *opt = NULL; | 786 | struct ip_options *opt = cork->opt; |
| 804 | int hh_len; | 787 | int hh_len; |
| 805 | int exthdrlen; | 788 | int exthdrlen; |
| 806 | int mtu; | 789 | int mtu; |
| @@ -809,58 +792,19 @@ int ip_append_data(struct sock *sk, | |||
| 809 | int offset = 0; | 792 | int offset = 0; |
| 810 | unsigned int maxfraglen, fragheaderlen; | 793 | unsigned int maxfraglen, fragheaderlen; |
| 811 | int csummode = CHECKSUM_NONE; | 794 | int csummode = CHECKSUM_NONE; |
| 812 | struct rtable *rt; | 795 | struct rtable *rt = (struct rtable *)cork->dst; |
| 813 | 796 | ||
| 814 | if (flags&MSG_PROBE) | 797 | exthdrlen = transhdrlen ? rt->dst.header_len : 0; |
| 815 | return 0; | 798 | length += exthdrlen; |
| 816 | 799 | transhdrlen += exthdrlen; | |
| 817 | if (skb_queue_empty(&sk->sk_write_queue)) { | 800 | mtu = cork->fragsize; |
| 818 | /* | ||
| 819 | * setup for corking. | ||
| 820 | */ | ||
| 821 | opt = ipc->opt; | ||
| 822 | if (opt) { | ||
| 823 | if (inet->cork.opt == NULL) { | ||
| 824 | inet->cork.opt = kmalloc(sizeof(struct ip_options) + 40, sk->sk_allocation); | ||
| 825 | if (unlikely(inet->cork.opt == NULL)) | ||
| 826 | return -ENOBUFS; | ||
| 827 | } | ||
| 828 | memcpy(inet->cork.opt, opt, sizeof(struct ip_options)+opt->optlen); | ||
| 829 | inet->cork.flags |= IPCORK_OPT; | ||
| 830 | inet->cork.addr = ipc->addr; | ||
| 831 | } | ||
| 832 | rt = *rtp; | ||
| 833 | if (unlikely(!rt)) | ||
| 834 | return -EFAULT; | ||
| 835 | /* | ||
| 836 | * We steal reference to this route, caller should not release it | ||
| 837 | */ | ||
| 838 | *rtp = NULL; | ||
| 839 | inet->cork.fragsize = mtu = inet->pmtudisc == IP_PMTUDISC_PROBE ? | ||
| 840 | rt->dst.dev->mtu : | ||
| 841 | dst_mtu(rt->dst.path); | ||
| 842 | inet->cork.dst = &rt->dst; | ||
| 843 | inet->cork.length = 0; | ||
| 844 | sk->sk_sndmsg_page = NULL; | ||
| 845 | sk->sk_sndmsg_off = 0; | ||
| 846 | exthdrlen = rt->dst.header_len; | ||
| 847 | length += exthdrlen; | ||
| 848 | transhdrlen += exthdrlen; | ||
| 849 | } else { | ||
| 850 | rt = (struct rtable *)inet->cork.dst; | ||
| 851 | if (inet->cork.flags & IPCORK_OPT) | ||
| 852 | opt = inet->cork.opt; | ||
| 853 | 801 | ||
| 854 | transhdrlen = 0; | ||
| 855 | exthdrlen = 0; | ||
| 856 | mtu = inet->cork.fragsize; | ||
| 857 | } | ||
| 858 | hh_len = LL_RESERVED_SPACE(rt->dst.dev); | 802 | hh_len = LL_RESERVED_SPACE(rt->dst.dev); |
| 859 | 803 | ||
| 860 | fragheaderlen = sizeof(struct iphdr) + (opt ? opt->optlen : 0); | 804 | fragheaderlen = sizeof(struct iphdr) + (opt ? opt->optlen : 0); |
| 861 | maxfraglen = ((mtu - fragheaderlen) & ~7) + fragheaderlen; | 805 | maxfraglen = ((mtu - fragheaderlen) & ~7) + fragheaderlen; |
| 862 | 806 | ||
| 863 | if (inet->cork.length + length > 0xFFFF - fragheaderlen) { | 807 | if (cork->length + length > 0xFFFF - fragheaderlen) { |
| 864 | ip_local_error(sk, EMSGSIZE, rt->rt_dst, inet->inet_dport, | 808 | ip_local_error(sk, EMSGSIZE, rt->rt_dst, inet->inet_dport, |
| 865 | mtu-exthdrlen); | 809 | mtu-exthdrlen); |
| 866 | return -EMSGSIZE; | 810 | return -EMSGSIZE; |
| @@ -876,15 +820,15 @@ int ip_append_data(struct sock *sk, | |||
| 876 | !exthdrlen) | 820 | !exthdrlen) |
| 877 | csummode = CHECKSUM_PARTIAL; | 821 | csummode = CHECKSUM_PARTIAL; |
| 878 | 822 | ||
| 879 | skb = skb_peek_tail(&sk->sk_write_queue); | 823 | skb = skb_peek_tail(queue); |
| 880 | 824 | ||
| 881 | inet->cork.length += length; | 825 | cork->length += length; |
| 882 | if (((length > mtu) || (skb && skb_is_gso(skb))) && | 826 | if (((length > mtu) || (skb && skb_is_gso(skb))) && |
| 883 | (sk->sk_protocol == IPPROTO_UDP) && | 827 | (sk->sk_protocol == IPPROTO_UDP) && |
| 884 | (rt->dst.dev->features & NETIF_F_UFO)) { | 828 | (rt->dst.dev->features & NETIF_F_UFO)) { |
| 885 | err = ip_ufo_append_data(sk, getfrag, from, length, hh_len, | 829 | err = ip_ufo_append_data(sk, queue, getfrag, from, length, |
| 886 | fragheaderlen, transhdrlen, mtu, | 830 | hh_len, fragheaderlen, transhdrlen, |
| 887 | flags); | 831 | mtu, flags); |
| 888 | if (err) | 832 | if (err) |
| 889 | goto error; | 833 | goto error; |
| 890 | return 0; | 834 | return 0; |
| @@ -961,7 +905,7 @@ alloc_new_skb: | |||
| 961 | else | 905 | else |
| 962 | /* only the initial fragment is | 906 | /* only the initial fragment is |
| 963 | time stamped */ | 907 | time stamped */ |
| 964 | ipc->tx_flags = 0; | 908 | cork->tx_flags = 0; |
| 965 | } | 909 | } |
| 966 | if (skb == NULL) | 910 | if (skb == NULL) |
| 967 | goto error; | 911 | goto error; |
| @@ -972,7 +916,7 @@ alloc_new_skb: | |||
| 972 | skb->ip_summed = csummode; | 916 | skb->ip_summed = csummode; |
| 973 | skb->csum = 0; | 917 | skb->csum = 0; |
| 974 | skb_reserve(skb, hh_len); | 918 | skb_reserve(skb, hh_len); |
| 975 | skb_shinfo(skb)->tx_flags = ipc->tx_flags; | 919 | skb_shinfo(skb)->tx_flags = cork->tx_flags; |
| 976 | 920 | ||
| 977 | /* | 921 | /* |
| 978 | * Find where to start putting bytes. | 922 | * Find where to start putting bytes. |
| @@ -1009,7 +953,7 @@ alloc_new_skb: | |||
| 1009 | /* | 953 | /* |
| 1010 | * Put the packet on the pending queue. | 954 | * Put the packet on the pending queue. |
| 1011 | */ | 955 | */ |
| 1012 | __skb_queue_tail(&sk->sk_write_queue, skb); | 956 | __skb_queue_tail(queue, skb); |
| 1013 | continue; | 957 | continue; |
| 1014 | } | 958 | } |
| 1015 | 959 | ||
| @@ -1029,8 +973,8 @@ alloc_new_skb: | |||
| 1029 | } else { | 973 | } else { |
| 1030 | int i = skb_shinfo(skb)->nr_frags; | 974 | int i = skb_shinfo(skb)->nr_frags; |
| 1031 | skb_frag_t *frag = &skb_shinfo(skb)->frags[i-1]; | 975 | skb_frag_t *frag = &skb_shinfo(skb)->frags[i-1]; |
| 1032 | struct page *page = sk->sk_sndmsg_page; | 976 | struct page *page = cork->page; |
| 1033 | int off = sk->sk_sndmsg_off; | 977 | int off = cork->off; |
| 1034 | unsigned int left; | 978 | unsigned int left; |
| 1035 | 979 | ||
| 1036 | if (page && (left = PAGE_SIZE - off) > 0) { | 980 | if (page && (left = PAGE_SIZE - off) > 0) { |
| @@ -1042,7 +986,7 @@ alloc_new_skb: | |||
| 1042 | goto error; | 986 | goto error; |
| 1043 | } | 987 | } |
| 1044 | get_page(page); | 988 | get_page(page); |
| 1045 | skb_fill_page_desc(skb, i, page, sk->sk_sndmsg_off, 0); | 989 | skb_fill_page_desc(skb, i, page, off, 0); |
| 1046 | frag = &skb_shinfo(skb)->frags[i]; | 990 | frag = &skb_shinfo(skb)->frags[i]; |
| 1047 | } | 991 | } |
| 1048 | } else if (i < MAX_SKB_FRAGS) { | 992 | } else if (i < MAX_SKB_FRAGS) { |
| @@ -1053,8 +997,8 @@ alloc_new_skb: | |||
| 1053 | err = -ENOMEM; | 997 | err = -ENOMEM; |
| 1054 | goto error; | 998 | goto error; |
| 1055 | } | 999 | } |
| 1056 | sk->sk_sndmsg_page = page; | 1000 | cork->page = page; |
| 1057 | sk->sk_sndmsg_off = 0; | 1001 | cork->off = 0; |
| 1058 | 1002 | ||
| 1059 | skb_fill_page_desc(skb, i, page, 0, 0); | 1003 | skb_fill_page_desc(skb, i, page, 0, 0); |
| 1060 | frag = &skb_shinfo(skb)->frags[i]; | 1004 | frag = &skb_shinfo(skb)->frags[i]; |
| @@ -1066,7 +1010,7 @@ alloc_new_skb: | |||
| 1066 | err = -EFAULT; | 1010 | err = -EFAULT; |
| 1067 | goto error; | 1011 | goto error; |
| 1068 | } | 1012 | } |
| 1069 | sk->sk_sndmsg_off += copy; | 1013 | cork->off += copy; |
| 1070 | frag->size += copy; | 1014 | frag->size += copy; |
| 1071 | skb->len += copy; | 1015 | skb->len += copy; |
| 1072 | skb->data_len += copy; | 1016 | skb->data_len += copy; |
| @@ -1080,11 +1024,87 @@ alloc_new_skb: | |||
| 1080 | return 0; | 1024 | return 0; |
| 1081 | 1025 | ||
| 1082 | error: | 1026 | error: |
| 1083 | inet->cork.length -= length; | 1027 | cork->length -= length; |
| 1084 | IP_INC_STATS(sock_net(sk), IPSTATS_MIB_OUTDISCARDS); | 1028 | IP_INC_STATS(sock_net(sk), IPSTATS_MIB_OUTDISCARDS); |
| 1085 | return err; | 1029 | return err; |
| 1086 | } | 1030 | } |
| 1087 | 1031 | ||
| 1032 | static int ip_setup_cork(struct sock *sk, struct inet_cork *cork, | ||
| 1033 | struct ipcm_cookie *ipc, struct rtable **rtp) | ||
| 1034 | { | ||
| 1035 | struct inet_sock *inet = inet_sk(sk); | ||
| 1036 | struct ip_options *opt; | ||
| 1037 | struct rtable *rt; | ||
| 1038 | |||
| 1039 | /* | ||
| 1040 | * setup for corking. | ||
| 1041 | */ | ||
| 1042 | opt = ipc->opt; | ||
| 1043 | if (opt) { | ||
| 1044 | if (cork->opt == NULL) { | ||
| 1045 | cork->opt = kmalloc(sizeof(struct ip_options) + 40, | ||
| 1046 | sk->sk_allocation); | ||
| 1047 | if (unlikely(cork->opt == NULL)) | ||
| 1048 | return -ENOBUFS; | ||
| 1049 | } | ||
| 1050 | memcpy(cork->opt, opt, sizeof(struct ip_options) + opt->optlen); | ||
| 1051 | cork->flags |= IPCORK_OPT; | ||
| 1052 | cork->addr = ipc->addr; | ||
| 1053 | } | ||
| 1054 | rt = *rtp; | ||
| 1055 | if (unlikely(!rt)) | ||
| 1056 | return -EFAULT; | ||
| 1057 | /* | ||
| 1058 | * We steal reference to this route, caller should not release it | ||
| 1059 | */ | ||
| 1060 | *rtp = NULL; | ||
| 1061 | cork->fragsize = inet->pmtudisc == IP_PMTUDISC_PROBE ? | ||
| 1062 | rt->dst.dev->mtu : dst_mtu(rt->dst.path); | ||
| 1063 | cork->dst = &rt->dst; | ||
| 1064 | cork->length = 0; | ||
| 1065 | cork->tx_flags = ipc->tx_flags; | ||
| 1066 | cork->page = NULL; | ||
| 1067 | cork->off = 0; | ||
| 1068 | |||
| 1069 | return 0; | ||
| 1070 | } | ||
| 1071 | |||
| 1072 | /* | ||
| 1073 | * ip_append_data() and ip_append_page() can make one large IP datagram | ||
| 1074 | * from many pieces of data. Each pieces will be holded on the socket | ||
| 1075 | * until ip_push_pending_frames() is called. Each piece can be a page | ||
| 1076 | * or non-page data. | ||
| 1077 | * | ||
| 1078 | * Not only UDP, other transport protocols - e.g. raw sockets - can use | ||
| 1079 | * this interface potentially. | ||
| 1080 | * | ||
| 1081 | * LATER: length must be adjusted by pad at tail, when it is required. | ||
| 1082 | */ | ||
| 1083 | int ip_append_data(struct sock *sk, | ||
| 1084 | int getfrag(void *from, char *to, int offset, int len, | ||
| 1085 | int odd, struct sk_buff *skb), | ||
| 1086 | void *from, int length, int transhdrlen, | ||
| 1087 | struct ipcm_cookie *ipc, struct rtable **rtp, | ||
| 1088 | unsigned int flags) | ||
| 1089 | { | ||
| 1090 | struct inet_sock *inet = inet_sk(sk); | ||
| 1091 | int err; | ||
| 1092 | |||
| 1093 | if (flags&MSG_PROBE) | ||
| 1094 | return 0; | ||
| 1095 | |||
| 1096 | if (skb_queue_empty(&sk->sk_write_queue)) { | ||
| 1097 | err = ip_setup_cork(sk, &inet->cork, ipc, rtp); | ||
| 1098 | if (err) | ||
| 1099 | return err; | ||
| 1100 | } else { | ||
| 1101 | transhdrlen = 0; | ||
| 1102 | } | ||
| 1103 | |||
| 1104 | return __ip_append_data(sk, &sk->sk_write_queue, &inet->cork, getfrag, | ||
| 1105 | from, length, transhdrlen, flags); | ||
| 1106 | } | ||
| 1107 | |||
| 1088 | ssize_t ip_append_page(struct sock *sk, struct page *page, | 1108 | ssize_t ip_append_page(struct sock *sk, struct page *page, |
| 1089 | int offset, size_t size, int flags) | 1109 | int offset, size_t size, int flags) |
| 1090 | { | 1110 | { |
| @@ -1228,40 +1248,41 @@ error: | |||
| 1228 | return err; | 1248 | return err; |
| 1229 | } | 1249 | } |
| 1230 | 1250 | ||
| 1231 | static void ip_cork_release(struct inet_sock *inet) | 1251 | static void ip_cork_release(struct inet_cork *cork) |
| 1232 | { | 1252 | { |
| 1233 | inet->cork.flags &= ~IPCORK_OPT; | 1253 | cork->flags &= ~IPCORK_OPT; |
| 1234 | kfree(inet->cork.opt); | 1254 | kfree(cork->opt); |
| 1235 | inet->cork.opt = NULL; | 1255 | cork->opt = NULL; |
| 1236 | dst_release(inet->cork.dst); | 1256 | dst_release(cork->dst); |
| 1237 | inet->cork.dst = NULL; | 1257 | cork->dst = NULL; |
| 1238 | } | 1258 | } |
| 1239 | 1259 | ||
| 1240 | /* | 1260 | /* |
| 1241 | * Combined all pending IP fragments on the socket as one IP datagram | 1261 | * Combined all pending IP fragments on the socket as one IP datagram |
| 1242 | * and push them out. | 1262 | * and push them out. |
| 1243 | */ | 1263 | */ |
| 1244 | int ip_push_pending_frames(struct sock *sk) | 1264 | struct sk_buff *__ip_make_skb(struct sock *sk, |
| 1265 | struct sk_buff_head *queue, | ||
| 1266 | struct inet_cork *cork) | ||
| 1245 | { | 1267 | { |
| 1246 | struct sk_buff *skb, *tmp_skb; | 1268 | struct sk_buff *skb, *tmp_skb; |
| 1247 | struct sk_buff **tail_skb; | 1269 | struct sk_buff **tail_skb; |
| 1248 | struct inet_sock *inet = inet_sk(sk); | 1270 | struct inet_sock *inet = inet_sk(sk); |
| 1249 | struct net *net = sock_net(sk); | 1271 | struct net *net = sock_net(sk); |
| 1250 | struct ip_options *opt = NULL; | 1272 | struct ip_options *opt = NULL; |
| 1251 | struct rtable *rt = (struct rtable *)inet->cork.dst; | 1273 | struct rtable *rt = (struct rtable *)cork->dst; |
| 1252 | struct iphdr *iph; | 1274 | struct iphdr *iph; |
| 1253 | __be16 df = 0; | 1275 | __be16 df = 0; |
| 1254 | __u8 ttl; | 1276 | __u8 ttl; |
| 1255 | int err = 0; | ||
| 1256 | 1277 | ||
| 1257 | if ((skb = __skb_dequeue(&sk->sk_write_queue)) == NULL) | 1278 | if ((skb = __skb_dequeue(queue)) == NULL) |
| 1258 | goto out; | 1279 | goto out; |
| 1259 | tail_skb = &(skb_shinfo(skb)->frag_list); | 1280 | tail_skb = &(skb_shinfo(skb)->frag_list); |
| 1260 | 1281 | ||
| 1261 | /* move skb->data to ip header from ext header */ | 1282 | /* move skb->data to ip header from ext header */ |
| 1262 | if (skb->data < skb_network_header(skb)) | 1283 | if (skb->data < skb_network_header(skb)) |
| 1263 | __skb_pull(skb, skb_network_offset(skb)); | 1284 | __skb_pull(skb, skb_network_offset(skb)); |
| 1264 | while ((tmp_skb = __skb_dequeue(&sk->sk_write_queue)) != NULL) { | 1285 | while ((tmp_skb = __skb_dequeue(queue)) != NULL) { |
| 1265 | __skb_pull(tmp_skb, skb_network_header_len(skb)); | 1286 | __skb_pull(tmp_skb, skb_network_header_len(skb)); |
| 1266 | *tail_skb = tmp_skb; | 1287 | *tail_skb = tmp_skb; |
| 1267 | tail_skb = &(tmp_skb->next); | 1288 | tail_skb = &(tmp_skb->next); |
| @@ -1287,8 +1308,8 @@ int ip_push_pending_frames(struct sock *sk) | |||
| 1287 | ip_dont_fragment(sk, &rt->dst))) | 1308 | ip_dont_fragment(sk, &rt->dst))) |
| 1288 | df = htons(IP_DF); | 1309 | df = htons(IP_DF); |
| 1289 | 1310 | ||
| 1290 | if (inet->cork.flags & IPCORK_OPT) | 1311 | if (cork->flags & IPCORK_OPT) |
| 1291 | opt = inet->cork.opt; | 1312 | opt = cork->opt; |
| 1292 | 1313 | ||
| 1293 | if (rt->rt_type == RTN_MULTICAST) | 1314 | if (rt->rt_type == RTN_MULTICAST) |
| 1294 | ttl = inet->mc_ttl; | 1315 | ttl = inet->mc_ttl; |
| @@ -1300,7 +1321,7 @@ int ip_push_pending_frames(struct sock *sk) | |||
| 1300 | iph->ihl = 5; | 1321 | iph->ihl = 5; |
| 1301 | if (opt) { | 1322 | if (opt) { |
| 1302 | iph->ihl += opt->optlen>>2; | 1323 | iph->ihl += opt->optlen>>2; |
| 1303 | ip_options_build(skb, opt, inet->cork.addr, rt, 0); | 1324 | ip_options_build(skb, opt, cork->addr, rt, 0); |
| 1304 | } | 1325 | } |
| 1305 | iph->tos = inet->tos; | 1326 | iph->tos = inet->tos; |
| 1306 | iph->frag_off = df; | 1327 | iph->frag_off = df; |
| @@ -1316,44 +1337,95 @@ int ip_push_pending_frames(struct sock *sk) | |||
| 1316 | * Steal rt from cork.dst to avoid a pair of atomic_inc/atomic_dec | 1337 | * Steal rt from cork.dst to avoid a pair of atomic_inc/atomic_dec |
| 1317 | * on dst refcount | 1338 | * on dst refcount |
| 1318 | */ | 1339 | */ |
| 1319 | inet->cork.dst = NULL; | 1340 | cork->dst = NULL; |
| 1320 | skb_dst_set(skb, &rt->dst); | 1341 | skb_dst_set(skb, &rt->dst); |
| 1321 | 1342 | ||
| 1322 | if (iph->protocol == IPPROTO_ICMP) | 1343 | if (iph->protocol == IPPROTO_ICMP) |
| 1323 | icmp_out_count(net, ((struct icmphdr *) | 1344 | icmp_out_count(net, ((struct icmphdr *) |
| 1324 | skb_transport_header(skb))->type); | 1345 | skb_transport_header(skb))->type); |
| 1325 | 1346 | ||
| 1326 | /* Netfilter gets whole the not fragmented skb. */ | 1347 | ip_cork_release(cork); |
| 1348 | out: | ||
| 1349 | return skb; | ||
| 1350 | } | ||
| 1351 | |||
| 1352 | int ip_send_skb(struct sk_buff *skb) | ||
| 1353 | { | ||
| 1354 | struct net *net = sock_net(skb->sk); | ||
| 1355 | int err; | ||
| 1356 | |||
| 1327 | err = ip_local_out(skb); | 1357 | err = ip_local_out(skb); |
| 1328 | if (err) { | 1358 | if (err) { |
| 1329 | if (err > 0) | 1359 | if (err > 0) |
| 1330 | err = net_xmit_errno(err); | 1360 | err = net_xmit_errno(err); |
| 1331 | if (err) | 1361 | if (err) |
| 1332 | goto error; | 1362 | IP_INC_STATS(net, IPSTATS_MIB_OUTDISCARDS); |
| 1333 | } | 1363 | } |
| 1334 | 1364 | ||
| 1335 | out: | ||
| 1336 | ip_cork_release(inet); | ||
| 1337 | return err; | 1365 | return err; |
| 1366 | } | ||
| 1338 | 1367 | ||
| 1339 | error: | 1368 | int ip_push_pending_frames(struct sock *sk) |
| 1340 | IP_INC_STATS(net, IPSTATS_MIB_OUTDISCARDS); | 1369 | { |
| 1341 | goto out; | 1370 | struct sk_buff *skb; |
| 1371 | |||
| 1372 | skb = ip_finish_skb(sk); | ||
| 1373 | if (!skb) | ||
| 1374 | return 0; | ||
| 1375 | |||
| 1376 | /* Netfilter gets whole the not fragmented skb. */ | ||
| 1377 | return ip_send_skb(skb); | ||
| 1342 | } | 1378 | } |
| 1343 | 1379 | ||
| 1344 | /* | 1380 | /* |
| 1345 | * Throw away all pending data on the socket. | 1381 | * Throw away all pending data on the socket. |
| 1346 | */ | 1382 | */ |
| 1347 | void ip_flush_pending_frames(struct sock *sk) | 1383 | static void __ip_flush_pending_frames(struct sock *sk, |
| 1384 | struct sk_buff_head *queue, | ||
| 1385 | struct inet_cork *cork) | ||
| 1348 | { | 1386 | { |
| 1349 | struct sk_buff *skb; | 1387 | struct sk_buff *skb; |
| 1350 | 1388 | ||
| 1351 | while ((skb = __skb_dequeue_tail(&sk->sk_write_queue)) != NULL) | 1389 | while ((skb = __skb_dequeue_tail(queue)) != NULL) |
| 1352 | kfree_skb(skb); | 1390 | kfree_skb(skb); |
| 1353 | 1391 | ||
| 1354 | ip_cork_release(inet_sk(sk)); | 1392 | ip_cork_release(cork); |
| 1393 | } | ||
| 1394 | |||
| 1395 | void ip_flush_pending_frames(struct sock *sk) | ||
| 1396 | { | ||
| 1397 | __ip_flush_pending_frames(sk, &sk->sk_write_queue, &inet_sk(sk)->cork); | ||
| 1355 | } | 1398 | } |
| 1356 | 1399 | ||
| 1400 | struct sk_buff *ip_make_skb(struct sock *sk, | ||
| 1401 | int getfrag(void *from, char *to, int offset, | ||
| 1402 | int len, int odd, struct sk_buff *skb), | ||
| 1403 | void *from, int length, int transhdrlen, | ||
| 1404 | struct ipcm_cookie *ipc, struct rtable **rtp, | ||
| 1405 | unsigned int flags) | ||
| 1406 | { | ||
| 1407 | struct inet_cork cork = {}; | ||
| 1408 | struct sk_buff_head queue; | ||
| 1409 | int err; | ||
| 1410 | |||
| 1411 | if (flags & MSG_PROBE) | ||
| 1412 | return NULL; | ||
| 1413 | |||
| 1414 | __skb_queue_head_init(&queue); | ||
| 1415 | |||
| 1416 | err = ip_setup_cork(sk, &cork, ipc, rtp); | ||
| 1417 | if (err) | ||
| 1418 | return ERR_PTR(err); | ||
| 1419 | |||
| 1420 | err = __ip_append_data(sk, &queue, &cork, getfrag, | ||
| 1421 | from, length, transhdrlen, flags); | ||
| 1422 | if (err) { | ||
| 1423 | __ip_flush_pending_frames(sk, &queue, &cork); | ||
| 1424 | return ERR_PTR(err); | ||
| 1425 | } | ||
| 1426 | |||
| 1427 | return __ip_make_skb(sk, &queue, &cork); | ||
| 1428 | } | ||
| 1357 | 1429 | ||
| 1358 | /* | 1430 | /* |
| 1359 | * Fetch data from kernel space and fill in checksum if needed. | 1431 | * Fetch data from kernel space and fill in checksum if needed. |
| @@ -1402,16 +1474,19 @@ void ip_send_reply(struct sock *sk, struct sk_buff *skb, struct ip_reply_arg *ar | |||
| 1402 | } | 1474 | } |
| 1403 | 1475 | ||
| 1404 | { | 1476 | { |
| 1405 | struct flowi fl = { .oif = arg->bound_dev_if, | 1477 | struct flowi4 fl4 = { |
| 1406 | .fl4_dst = daddr, | 1478 | .flowi4_oif = arg->bound_dev_if, |
| 1407 | .fl4_src = rt->rt_spec_dst, | 1479 | .daddr = daddr, |
| 1408 | .fl4_tos = RT_TOS(ip_hdr(skb)->tos), | 1480 | .saddr = rt->rt_spec_dst, |
| 1409 | .fl_ip_sport = tcp_hdr(skb)->dest, | 1481 | .flowi4_tos = RT_TOS(ip_hdr(skb)->tos), |
| 1410 | .fl_ip_dport = tcp_hdr(skb)->source, | 1482 | .fl4_sport = tcp_hdr(skb)->dest, |
| 1411 | .proto = sk->sk_protocol, | 1483 | .fl4_dport = tcp_hdr(skb)->source, |
| 1412 | .flags = ip_reply_arg_flowi_flags(arg) }; | 1484 | .flowi4_proto = sk->sk_protocol, |
| 1413 | security_skb_classify_flow(skb, &fl); | 1485 | .flowi4_flags = ip_reply_arg_flowi_flags(arg), |
| 1414 | if (ip_route_output_key(sock_net(sk), &rt, &fl)) | 1486 | }; |
| 1487 | security_skb_classify_flow(skb, flowi4_to_flowi(&fl4)); | ||
| 1488 | rt = ip_route_output_key(sock_net(sk), &fl4); | ||
| 1489 | if (IS_ERR(rt)) | ||
| 1415 | return; | 1490 | return; |
| 1416 | } | 1491 | } |
| 1417 | 1492 | ||
diff --git a/net/ipv4/ipip.c b/net/ipv4/ipip.c index 988f52fba54a..bfc17c5914e7 100644 --- a/net/ipv4/ipip.c +++ b/net/ipv4/ipip.c | |||
| @@ -460,19 +460,14 @@ static netdev_tx_t ipip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev) | |||
| 460 | goto tx_error_icmp; | 460 | goto tx_error_icmp; |
| 461 | } | 461 | } |
| 462 | 462 | ||
| 463 | { | 463 | rt = ip_route_output_ports(dev_net(dev), NULL, |
| 464 | struct flowi fl = { | 464 | dst, tiph->saddr, |
| 465 | .oif = tunnel->parms.link, | 465 | 0, 0, |
| 466 | .fl4_dst = dst, | 466 | IPPROTO_IPIP, RT_TOS(tos), |
| 467 | .fl4_src= tiph->saddr, | 467 | tunnel->parms.link); |
| 468 | .fl4_tos = RT_TOS(tos), | 468 | if (IS_ERR(rt)) { |
| 469 | .proto = IPPROTO_IPIP | 469 | dev->stats.tx_carrier_errors++; |
| 470 | }; | 470 | goto tx_error_icmp; |
| 471 | |||
| 472 | if (ip_route_output_key(dev_net(dev), &rt, &fl)) { | ||
| 473 | dev->stats.tx_carrier_errors++; | ||
| 474 | goto tx_error_icmp; | ||
| 475 | } | ||
| 476 | } | 471 | } |
| 477 | tdev = rt->dst.dev; | 472 | tdev = rt->dst.dev; |
| 478 | 473 | ||
| @@ -583,16 +578,14 @@ static void ipip_tunnel_bind_dev(struct net_device *dev) | |||
| 583 | iph = &tunnel->parms.iph; | 578 | iph = &tunnel->parms.iph; |
| 584 | 579 | ||
| 585 | if (iph->daddr) { | 580 | if (iph->daddr) { |
| 586 | struct flowi fl = { | 581 | struct rtable *rt = ip_route_output_ports(dev_net(dev), NULL, |
| 587 | .oif = tunnel->parms.link, | 582 | iph->daddr, iph->saddr, |
| 588 | .fl4_dst = iph->daddr, | 583 | 0, 0, |
| 589 | .fl4_src = iph->saddr, | 584 | IPPROTO_IPIP, |
| 590 | .fl4_tos = RT_TOS(iph->tos), | 585 | RT_TOS(iph->tos), |
| 591 | .proto = IPPROTO_IPIP | 586 | tunnel->parms.link); |
| 592 | }; | 587 | |
| 593 | struct rtable *rt; | 588 | if (!IS_ERR(rt)) { |
| 594 | |||
| 595 | if (!ip_route_output_key(dev_net(dev), &rt, &fl)) { | ||
| 596 | tdev = rt->dst.dev; | 589 | tdev = rt->dst.dev; |
| 597 | ip_rt_put(rt); | 590 | ip_rt_put(rt); |
| 598 | } | 591 | } |
| @@ -913,4 +906,4 @@ static void __exit ipip_fini(void) | |||
| 913 | module_init(ipip_init); | 906 | module_init(ipip_init); |
| 914 | module_exit(ipip_fini); | 907 | module_exit(ipip_fini); |
| 915 | MODULE_LICENSE("GPL"); | 908 | MODULE_LICENSE("GPL"); |
| 916 | MODULE_ALIAS("tunl0"); | 909 | MODULE_ALIAS_NETDEV("tunl0"); |
diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c index 3f3a9afd73e0..1f62eaeb6de4 100644 --- a/net/ipv4/ipmr.c +++ b/net/ipv4/ipmr.c | |||
| @@ -60,6 +60,7 @@ | |||
| 60 | #include <linux/notifier.h> | 60 | #include <linux/notifier.h> |
| 61 | #include <linux/if_arp.h> | 61 | #include <linux/if_arp.h> |
| 62 | #include <linux/netfilter_ipv4.h> | 62 | #include <linux/netfilter_ipv4.h> |
| 63 | #include <linux/compat.h> | ||
| 63 | #include <net/ipip.h> | 64 | #include <net/ipip.h> |
| 64 | #include <net/checksum.h> | 65 | #include <net/checksum.h> |
| 65 | #include <net/netlink.h> | 66 | #include <net/netlink.h> |
| @@ -147,14 +148,15 @@ static struct mr_table *ipmr_get_table(struct net *net, u32 id) | |||
| 147 | return NULL; | 148 | return NULL; |
| 148 | } | 149 | } |
| 149 | 150 | ||
| 150 | static int ipmr_fib_lookup(struct net *net, struct flowi *flp, | 151 | static int ipmr_fib_lookup(struct net *net, struct flowi4 *flp4, |
| 151 | struct mr_table **mrt) | 152 | struct mr_table **mrt) |
| 152 | { | 153 | { |
| 153 | struct ipmr_result res; | 154 | struct ipmr_result res; |
| 154 | struct fib_lookup_arg arg = { .result = &res, }; | 155 | struct fib_lookup_arg arg = { .result = &res, }; |
| 155 | int err; | 156 | int err; |
| 156 | 157 | ||
| 157 | err = fib_rules_lookup(net->ipv4.mr_rules_ops, flp, 0, &arg); | 158 | err = fib_rules_lookup(net->ipv4.mr_rules_ops, |
| 159 | flowi4_to_flowi(flp4), 0, &arg); | ||
| 158 | if (err < 0) | 160 | if (err < 0) |
| 159 | return err; | 161 | return err; |
| 160 | *mrt = res.mrt; | 162 | *mrt = res.mrt; |
| @@ -282,7 +284,7 @@ static struct mr_table *ipmr_get_table(struct net *net, u32 id) | |||
| 282 | return net->ipv4.mrt; | 284 | return net->ipv4.mrt; |
| 283 | } | 285 | } |
| 284 | 286 | ||
| 285 | static int ipmr_fib_lookup(struct net *net, struct flowi *flp, | 287 | static int ipmr_fib_lookup(struct net *net, struct flowi4 *flp4, |
| 286 | struct mr_table **mrt) | 288 | struct mr_table **mrt) |
| 287 | { | 289 | { |
| 288 | *mrt = net->ipv4.mrt; | 290 | *mrt = net->ipv4.mrt; |
| @@ -434,14 +436,14 @@ static netdev_tx_t reg_vif_xmit(struct sk_buff *skb, struct net_device *dev) | |||
| 434 | { | 436 | { |
| 435 | struct net *net = dev_net(dev); | 437 | struct net *net = dev_net(dev); |
| 436 | struct mr_table *mrt; | 438 | struct mr_table *mrt; |
| 437 | struct flowi fl = { | 439 | struct flowi4 fl4 = { |
| 438 | .oif = dev->ifindex, | 440 | .flowi4_oif = dev->ifindex, |
| 439 | .iif = skb->skb_iif, | 441 | .flowi4_iif = skb->skb_iif, |
| 440 | .mark = skb->mark, | 442 | .flowi4_mark = skb->mark, |
| 441 | }; | 443 | }; |
| 442 | int err; | 444 | int err; |
| 443 | 445 | ||
| 444 | err = ipmr_fib_lookup(net, &fl, &mrt); | 446 | err = ipmr_fib_lookup(net, &fl4, &mrt); |
| 445 | if (err < 0) { | 447 | if (err < 0) { |
| 446 | kfree_skb(skb); | 448 | kfree_skb(skb); |
| 447 | return err; | 449 | return err; |
| @@ -1434,6 +1436,81 @@ int ipmr_ioctl(struct sock *sk, int cmd, void __user *arg) | |||
| 1434 | } | 1436 | } |
| 1435 | } | 1437 | } |
| 1436 | 1438 | ||
| 1439 | #ifdef CONFIG_COMPAT | ||
| 1440 | struct compat_sioc_sg_req { | ||
| 1441 | struct in_addr src; | ||
| 1442 | struct in_addr grp; | ||
| 1443 | compat_ulong_t pktcnt; | ||
| 1444 | compat_ulong_t bytecnt; | ||
| 1445 | compat_ulong_t wrong_if; | ||
| 1446 | }; | ||
| 1447 | |||
| 1448 | struct compat_sioc_vif_req { | ||
| 1449 | vifi_t vifi; /* Which iface */ | ||
| 1450 | compat_ulong_t icount; | ||
| 1451 | compat_ulong_t ocount; | ||
| 1452 | compat_ulong_t ibytes; | ||
| 1453 | compat_ulong_t obytes; | ||
| 1454 | }; | ||
| 1455 | |||
| 1456 | int ipmr_compat_ioctl(struct sock *sk, unsigned int cmd, void __user *arg) | ||
| 1457 | { | ||
| 1458 | struct compat_sioc_sg_req sr; | ||
| 1459 | struct compat_sioc_vif_req vr; | ||
| 1460 | struct vif_device *vif; | ||
| 1461 | struct mfc_cache *c; | ||
| 1462 | struct net *net = sock_net(sk); | ||
| 1463 | struct mr_table *mrt; | ||
| 1464 | |||
| 1465 | mrt = ipmr_get_table(net, raw_sk(sk)->ipmr_table ? : RT_TABLE_DEFAULT); | ||
| 1466 | if (mrt == NULL) | ||
| 1467 | return -ENOENT; | ||
| 1468 | |||
| 1469 | switch (cmd) { | ||
| 1470 | case SIOCGETVIFCNT: | ||
| 1471 | if (copy_from_user(&vr, arg, sizeof(vr))) | ||
| 1472 | return -EFAULT; | ||
| 1473 | if (vr.vifi >= mrt->maxvif) | ||
| 1474 | return -EINVAL; | ||
| 1475 | read_lock(&mrt_lock); | ||
| 1476 | vif = &mrt->vif_table[vr.vifi]; | ||
| 1477 | if (VIF_EXISTS(mrt, vr.vifi)) { | ||
| 1478 | vr.icount = vif->pkt_in; | ||
| 1479 | vr.ocount = vif->pkt_out; | ||
| 1480 | vr.ibytes = vif->bytes_in; | ||
| 1481 | vr.obytes = vif->bytes_out; | ||
| 1482 | read_unlock(&mrt_lock); | ||
| 1483 | |||
| 1484 | if (copy_to_user(arg, &vr, sizeof(vr))) | ||
| 1485 | return -EFAULT; | ||
| 1486 | return 0; | ||
| 1487 | } | ||
| 1488 | read_unlock(&mrt_lock); | ||
| 1489 | return -EADDRNOTAVAIL; | ||
| 1490 | case SIOCGETSGCNT: | ||
| 1491 | if (copy_from_user(&sr, arg, sizeof(sr))) | ||
| 1492 | return -EFAULT; | ||
| 1493 | |||
| 1494 | rcu_read_lock(); | ||
| 1495 | c = ipmr_cache_find(mrt, sr.src.s_addr, sr.grp.s_addr); | ||
| 1496 | if (c) { | ||
| 1497 | sr.pktcnt = c->mfc_un.res.pkt; | ||
| 1498 | sr.bytecnt = c->mfc_un.res.bytes; | ||
| 1499 | sr.wrong_if = c->mfc_un.res.wrong_if; | ||
| 1500 | rcu_read_unlock(); | ||
| 1501 | |||
| 1502 | if (copy_to_user(arg, &sr, sizeof(sr))) | ||
| 1503 | return -EFAULT; | ||
| 1504 | return 0; | ||
| 1505 | } | ||
| 1506 | rcu_read_unlock(); | ||
| 1507 | return -EADDRNOTAVAIL; | ||
| 1508 | default: | ||
| 1509 | return -ENOIOCTLCMD; | ||
| 1510 | } | ||
| 1511 | } | ||
| 1512 | #endif | ||
| 1513 | |||
| 1437 | 1514 | ||
| 1438 | static int ipmr_device_event(struct notifier_block *this, unsigned long event, void *ptr) | 1515 | static int ipmr_device_event(struct notifier_block *this, unsigned long event, void *ptr) |
| 1439 | { | 1516 | { |
| @@ -1535,26 +1612,20 @@ static void ipmr_queue_xmit(struct net *net, struct mr_table *mrt, | |||
| 1535 | #endif | 1612 | #endif |
| 1536 | 1613 | ||
| 1537 | if (vif->flags & VIFF_TUNNEL) { | 1614 | if (vif->flags & VIFF_TUNNEL) { |
| 1538 | struct flowi fl = { | 1615 | rt = ip_route_output_ports(net, NULL, |
| 1539 | .oif = vif->link, | 1616 | vif->remote, vif->local, |
| 1540 | .fl4_dst = vif->remote, | 1617 | 0, 0, |
| 1541 | .fl4_src = vif->local, | 1618 | IPPROTO_IPIP, |
| 1542 | .fl4_tos = RT_TOS(iph->tos), | 1619 | RT_TOS(iph->tos), vif->link); |
| 1543 | .proto = IPPROTO_IPIP | 1620 | if (IS_ERR(rt)) |
| 1544 | }; | ||
| 1545 | |||
| 1546 | if (ip_route_output_key(net, &rt, &fl)) | ||
| 1547 | goto out_free; | 1621 | goto out_free; |
| 1548 | encap = sizeof(struct iphdr); | 1622 | encap = sizeof(struct iphdr); |
| 1549 | } else { | 1623 | } else { |
| 1550 | struct flowi fl = { | 1624 | rt = ip_route_output_ports(net, NULL, iph->daddr, 0, |
| 1551 | .oif = vif->link, | 1625 | 0, 0, |
| 1552 | .fl4_dst = iph->daddr, | 1626 | IPPROTO_IPIP, |
| 1553 | .fl4_tos = RT_TOS(iph->tos), | 1627 | RT_TOS(iph->tos), vif->link); |
| 1554 | .proto = IPPROTO_IPIP | 1628 | if (IS_ERR(rt)) |
| 1555 | }; | ||
| 1556 | |||
| 1557 | if (ip_route_output_key(net, &rt, &fl)) | ||
| 1558 | goto out_free; | 1629 | goto out_free; |
| 1559 | } | 1630 | } |
| 1560 | 1631 | ||
| @@ -1717,6 +1788,24 @@ dont_forward: | |||
| 1717 | return 0; | 1788 | return 0; |
| 1718 | } | 1789 | } |
| 1719 | 1790 | ||
| 1791 | static struct mr_table *ipmr_rt_fib_lookup(struct net *net, struct rtable *rt) | ||
| 1792 | { | ||
| 1793 | struct flowi4 fl4 = { | ||
| 1794 | .daddr = rt->rt_key_dst, | ||
| 1795 | .saddr = rt->rt_key_src, | ||
| 1796 | .flowi4_tos = rt->rt_tos, | ||
| 1797 | .flowi4_oif = rt->rt_oif, | ||
| 1798 | .flowi4_iif = rt->rt_iif, | ||
| 1799 | .flowi4_mark = rt->rt_mark, | ||
| 1800 | }; | ||
| 1801 | struct mr_table *mrt; | ||
| 1802 | int err; | ||
| 1803 | |||
| 1804 | err = ipmr_fib_lookup(net, &fl4, &mrt); | ||
| 1805 | if (err) | ||
| 1806 | return ERR_PTR(err); | ||
| 1807 | return mrt; | ||
| 1808 | } | ||
| 1720 | 1809 | ||
| 1721 | /* | 1810 | /* |
| 1722 | * Multicast packets for forwarding arrive here | 1811 | * Multicast packets for forwarding arrive here |
| @@ -1729,7 +1818,6 @@ int ip_mr_input(struct sk_buff *skb) | |||
| 1729 | struct net *net = dev_net(skb->dev); | 1818 | struct net *net = dev_net(skb->dev); |
| 1730 | int local = skb_rtable(skb)->rt_flags & RTCF_LOCAL; | 1819 | int local = skb_rtable(skb)->rt_flags & RTCF_LOCAL; |
| 1731 | struct mr_table *mrt; | 1820 | struct mr_table *mrt; |
| 1732 | int err; | ||
| 1733 | 1821 | ||
| 1734 | /* Packet is looped back after forward, it should not be | 1822 | /* Packet is looped back after forward, it should not be |
| 1735 | * forwarded second time, but still can be delivered locally. | 1823 | * forwarded second time, but still can be delivered locally. |
| @@ -1737,12 +1825,11 @@ int ip_mr_input(struct sk_buff *skb) | |||
| 1737 | if (IPCB(skb)->flags & IPSKB_FORWARDED) | 1825 | if (IPCB(skb)->flags & IPSKB_FORWARDED) |
| 1738 | goto dont_forward; | 1826 | goto dont_forward; |
| 1739 | 1827 | ||
| 1740 | err = ipmr_fib_lookup(net, &skb_rtable(skb)->fl, &mrt); | 1828 | mrt = ipmr_rt_fib_lookup(net, skb_rtable(skb)); |
| 1741 | if (err < 0) { | 1829 | if (IS_ERR(mrt)) { |
| 1742 | kfree_skb(skb); | 1830 | kfree_skb(skb); |
| 1743 | return err; | 1831 | return PTR_ERR(mrt); |
| 1744 | } | 1832 | } |
| 1745 | |||
| 1746 | if (!local) { | 1833 | if (!local) { |
| 1747 | if (IPCB(skb)->opt.router_alert) { | 1834 | if (IPCB(skb)->opt.router_alert) { |
| 1748 | if (ip_call_ra_chain(skb)) | 1835 | if (ip_call_ra_chain(skb)) |
| @@ -1870,9 +1957,9 @@ int pim_rcv_v1(struct sk_buff *skb) | |||
| 1870 | 1957 | ||
| 1871 | pim = igmp_hdr(skb); | 1958 | pim = igmp_hdr(skb); |
| 1872 | 1959 | ||
| 1873 | if (ipmr_fib_lookup(net, &skb_rtable(skb)->fl, &mrt) < 0) | 1960 | mrt = ipmr_rt_fib_lookup(net, skb_rtable(skb)); |
| 1961 | if (IS_ERR(mrt)) | ||
| 1874 | goto drop; | 1962 | goto drop; |
| 1875 | |||
| 1876 | if (!mrt->mroute_do_pim || | 1963 | if (!mrt->mroute_do_pim || |
| 1877 | pim->group != PIM_V1_VERSION || pim->code != PIM_V1_REGISTER) | 1964 | pim->group != PIM_V1_VERSION || pim->code != PIM_V1_REGISTER) |
| 1878 | goto drop; | 1965 | goto drop; |
| @@ -1902,9 +1989,9 @@ static int pim_rcv(struct sk_buff *skb) | |||
| 1902 | csum_fold(skb_checksum(skb, 0, skb->len, 0)))) | 1989 | csum_fold(skb_checksum(skb, 0, skb->len, 0)))) |
| 1903 | goto drop; | 1990 | goto drop; |
| 1904 | 1991 | ||
| 1905 | if (ipmr_fib_lookup(net, &skb_rtable(skb)->fl, &mrt) < 0) | 1992 | mrt = ipmr_rt_fib_lookup(net, skb_rtable(skb)); |
| 1993 | if (IS_ERR(mrt)) | ||
| 1906 | goto drop; | 1994 | goto drop; |
| 1907 | |||
| 1908 | if (__pim_rcv(mrt, skb, sizeof(*pim))) { | 1995 | if (__pim_rcv(mrt, skb, sizeof(*pim))) { |
| 1909 | drop: | 1996 | drop: |
| 1910 | kfree_skb(skb); | 1997 | kfree_skb(skb); |
diff --git a/net/ipv4/netfilter.c b/net/ipv4/netfilter.c index 994a1f29ebbc..f3c0b549b8e1 100644 --- a/net/ipv4/netfilter.c +++ b/net/ipv4/netfilter.c | |||
| @@ -16,7 +16,7 @@ int ip_route_me_harder(struct sk_buff *skb, unsigned addr_type) | |||
| 16 | struct net *net = dev_net(skb_dst(skb)->dev); | 16 | struct net *net = dev_net(skb_dst(skb)->dev); |
| 17 | const struct iphdr *iph = ip_hdr(skb); | 17 | const struct iphdr *iph = ip_hdr(skb); |
| 18 | struct rtable *rt; | 18 | struct rtable *rt; |
| 19 | struct flowi fl = {}; | 19 | struct flowi4 fl4 = {}; |
| 20 | unsigned long orefdst; | 20 | unsigned long orefdst; |
| 21 | unsigned int hh_len; | 21 | unsigned int hh_len; |
| 22 | unsigned int type; | 22 | unsigned int type; |
| @@ -31,14 +31,15 @@ int ip_route_me_harder(struct sk_buff *skb, unsigned addr_type) | |||
| 31 | * packets with foreign saddr to appear on the NF_INET_LOCAL_OUT hook. | 31 | * packets with foreign saddr to appear on the NF_INET_LOCAL_OUT hook. |
| 32 | */ | 32 | */ |
| 33 | if (addr_type == RTN_LOCAL) { | 33 | if (addr_type == RTN_LOCAL) { |
| 34 | fl.fl4_dst = iph->daddr; | 34 | fl4.daddr = iph->daddr; |
| 35 | if (type == RTN_LOCAL) | 35 | if (type == RTN_LOCAL) |
| 36 | fl.fl4_src = iph->saddr; | 36 | fl4.saddr = iph->saddr; |
| 37 | fl.fl4_tos = RT_TOS(iph->tos); | 37 | fl4.flowi4_tos = RT_TOS(iph->tos); |
| 38 | fl.oif = skb->sk ? skb->sk->sk_bound_dev_if : 0; | 38 | fl4.flowi4_oif = skb->sk ? skb->sk->sk_bound_dev_if : 0; |
| 39 | fl.mark = skb->mark; | 39 | fl4.flowi4_mark = skb->mark; |
| 40 | fl.flags = skb->sk ? inet_sk_flowi_flags(skb->sk) : 0; | 40 | fl4.flowi4_flags = skb->sk ? inet_sk_flowi_flags(skb->sk) : 0; |
| 41 | if (ip_route_output_key(net, &rt, &fl) != 0) | 41 | rt = ip_route_output_key(net, &fl4); |
| 42 | if (IS_ERR(rt)) | ||
| 42 | return -1; | 43 | return -1; |
| 43 | 44 | ||
| 44 | /* Drop old route. */ | 45 | /* Drop old route. */ |
| @@ -47,8 +48,9 @@ int ip_route_me_harder(struct sk_buff *skb, unsigned addr_type) | |||
| 47 | } else { | 48 | } else { |
| 48 | /* non-local src, find valid iif to satisfy | 49 | /* non-local src, find valid iif to satisfy |
| 49 | * rp-filter when calling ip_route_input. */ | 50 | * rp-filter when calling ip_route_input. */ |
| 50 | fl.fl4_dst = iph->saddr; | 51 | fl4.daddr = iph->saddr; |
| 51 | if (ip_route_output_key(net, &rt, &fl) != 0) | 52 | rt = ip_route_output_key(net, &fl4); |
| 53 | if (IS_ERR(rt)) | ||
| 52 | return -1; | 54 | return -1; |
| 53 | 55 | ||
| 54 | orefdst = skb->_skb_refdst; | 56 | orefdst = skb->_skb_refdst; |
| @@ -66,10 +68,11 @@ int ip_route_me_harder(struct sk_buff *skb, unsigned addr_type) | |||
| 66 | 68 | ||
| 67 | #ifdef CONFIG_XFRM | 69 | #ifdef CONFIG_XFRM |
| 68 | if (!(IPCB(skb)->flags & IPSKB_XFRM_TRANSFORMED) && | 70 | if (!(IPCB(skb)->flags & IPSKB_XFRM_TRANSFORMED) && |
| 69 | xfrm_decode_session(skb, &fl, AF_INET) == 0) { | 71 | xfrm_decode_session(skb, flowi4_to_flowi(&fl4), AF_INET) == 0) { |
| 70 | struct dst_entry *dst = skb_dst(skb); | 72 | struct dst_entry *dst = skb_dst(skb); |
| 71 | skb_dst_set(skb, NULL); | 73 | skb_dst_set(skb, NULL); |
| 72 | if (xfrm_lookup(net, &dst, &fl, skb->sk, 0)) | 74 | dst = xfrm_lookup(net, dst, flowi4_to_flowi(&fl4), skb->sk, 0); |
| 75 | if (IS_ERR(dst)) | ||
| 73 | return -1; | 76 | return -1; |
| 74 | skb_dst_set(skb, dst); | 77 | skb_dst_set(skb, dst); |
| 75 | } | 78 | } |
| @@ -102,7 +105,8 @@ int ip_xfrm_me_harder(struct sk_buff *skb) | |||
| 102 | dst = ((struct xfrm_dst *)dst)->route; | 105 | dst = ((struct xfrm_dst *)dst)->route; |
| 103 | dst_hold(dst); | 106 | dst_hold(dst); |
| 104 | 107 | ||
| 105 | if (xfrm_lookup(dev_net(dst->dev), &dst, &fl, skb->sk, 0) < 0) | 108 | dst = xfrm_lookup(dev_net(dst->dev), dst, &fl, skb->sk, 0); |
| 109 | if (IS_ERR(dst)) | ||
| 106 | return -1; | 110 | return -1; |
| 107 | 111 | ||
| 108 | skb_dst_drop(skb); | 112 | skb_dst_drop(skb); |
| @@ -219,7 +223,11 @@ static __sum16 nf_ip_checksum_partial(struct sk_buff *skb, unsigned int hook, | |||
| 219 | 223 | ||
| 220 | static int nf_ip_route(struct dst_entry **dst, struct flowi *fl) | 224 | static int nf_ip_route(struct dst_entry **dst, struct flowi *fl) |
| 221 | { | 225 | { |
| 222 | return ip_route_output_key(&init_net, (struct rtable **)dst, fl); | 226 | struct rtable *rt = ip_route_output_key(&init_net, &fl->u.ip4); |
| 227 | if (IS_ERR(rt)) | ||
| 228 | return PTR_ERR(rt); | ||
| 229 | *dst = &rt->dst; | ||
| 230 | return 0; | ||
| 223 | } | 231 | } |
| 224 | 232 | ||
| 225 | static const struct nf_afinfo nf_ip_afinfo = { | 233 | static const struct nf_afinfo nf_ip_afinfo = { |
diff --git a/net/ipv4/netfilter/Kconfig b/net/ipv4/netfilter/Kconfig index babd1a2bae5f..1dfc18a03fd4 100644 --- a/net/ipv4/netfilter/Kconfig +++ b/net/ipv4/netfilter/Kconfig | |||
| @@ -64,16 +64,6 @@ config IP_NF_IPTABLES | |||
| 64 | if IP_NF_IPTABLES | 64 | if IP_NF_IPTABLES |
| 65 | 65 | ||
| 66 | # The matches. | 66 | # The matches. |
| 67 | config IP_NF_MATCH_ADDRTYPE | ||
| 68 | tristate '"addrtype" address type match support' | ||
| 69 | depends on NETFILTER_ADVANCED | ||
| 70 | help | ||
| 71 | This option allows you to match what routing thinks of an address, | ||
| 72 | eg. UNICAST, LOCAL, BROADCAST, ... | ||
| 73 | |||
| 74 | If you want to compile it as a module, say M here and read | ||
| 75 | <file:Documentation/kbuild/modules.txt>. If unsure, say `N'. | ||
| 76 | |||
| 77 | config IP_NF_MATCH_AH | 67 | config IP_NF_MATCH_AH |
| 78 | tristate '"ah" match support' | 68 | tristate '"ah" match support' |
| 79 | depends on NETFILTER_ADVANCED | 69 | depends on NETFILTER_ADVANCED |
| @@ -206,8 +196,9 @@ config IP_NF_TARGET_REDIRECT | |||
| 206 | 196 | ||
| 207 | config NF_NAT_SNMP_BASIC | 197 | config NF_NAT_SNMP_BASIC |
| 208 | tristate "Basic SNMP-ALG support" | 198 | tristate "Basic SNMP-ALG support" |
| 209 | depends on NF_NAT | 199 | depends on NF_CONNTRACK_SNMP && NF_NAT |
| 210 | depends on NETFILTER_ADVANCED | 200 | depends on NETFILTER_ADVANCED |
| 201 | default NF_NAT && NF_CONNTRACK_SNMP | ||
| 211 | ---help--- | 202 | ---help--- |
| 212 | 203 | ||
| 213 | This module implements an Application Layer Gateway (ALG) for | 204 | This module implements an Application Layer Gateway (ALG) for |
diff --git a/net/ipv4/netfilter/Makefile b/net/ipv4/netfilter/Makefile index 19eb59d01037..dca2082ec683 100644 --- a/net/ipv4/netfilter/Makefile +++ b/net/ipv4/netfilter/Makefile | |||
| @@ -48,7 +48,6 @@ obj-$(CONFIG_IP_NF_RAW) += iptable_raw.o | |||
| 48 | obj-$(CONFIG_IP_NF_SECURITY) += iptable_security.o | 48 | obj-$(CONFIG_IP_NF_SECURITY) += iptable_security.o |
| 49 | 49 | ||
| 50 | # matches | 50 | # matches |
| 51 | obj-$(CONFIG_IP_NF_MATCH_ADDRTYPE) += ipt_addrtype.o | ||
| 52 | obj-$(CONFIG_IP_NF_MATCH_AH) += ipt_ah.o | 51 | obj-$(CONFIG_IP_NF_MATCH_AH) += ipt_ah.o |
| 53 | obj-$(CONFIG_IP_NF_MATCH_ECN) += ipt_ecn.o | 52 | obj-$(CONFIG_IP_NF_MATCH_ECN) += ipt_ecn.o |
| 54 | 53 | ||
diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c index e855fffaed95..4b5d457c2d76 100644 --- a/net/ipv4/netfilter/arp_tables.c +++ b/net/ipv4/netfilter/arp_tables.c | |||
| @@ -866,6 +866,7 @@ static int compat_table_info(const struct xt_table_info *info, | |||
| 866 | memcpy(newinfo, info, offsetof(struct xt_table_info, entries)); | 866 | memcpy(newinfo, info, offsetof(struct xt_table_info, entries)); |
| 867 | newinfo->initial_entries = 0; | 867 | newinfo->initial_entries = 0; |
| 868 | loc_cpu_entry = info->entries[raw_smp_processor_id()]; | 868 | loc_cpu_entry = info->entries[raw_smp_processor_id()]; |
| 869 | xt_compat_init_offsets(NFPROTO_ARP, info->number); | ||
| 869 | xt_entry_foreach(iter, loc_cpu_entry, info->size) { | 870 | xt_entry_foreach(iter, loc_cpu_entry, info->size) { |
| 870 | ret = compat_calc_entry(iter, info, loc_cpu_entry, newinfo); | 871 | ret = compat_calc_entry(iter, info, loc_cpu_entry, newinfo); |
| 871 | if (ret != 0) | 872 | if (ret != 0) |
| @@ -1065,6 +1066,7 @@ static int do_replace(struct net *net, const void __user *user, | |||
| 1065 | /* overflow check */ | 1066 | /* overflow check */ |
| 1066 | if (tmp.num_counters >= INT_MAX / sizeof(struct xt_counters)) | 1067 | if (tmp.num_counters >= INT_MAX / sizeof(struct xt_counters)) |
| 1067 | return -ENOMEM; | 1068 | return -ENOMEM; |
| 1069 | tmp.name[sizeof(tmp.name)-1] = 0; | ||
| 1068 | 1070 | ||
| 1069 | newinfo = xt_alloc_table_info(tmp.size); | 1071 | newinfo = xt_alloc_table_info(tmp.size); |
| 1070 | if (!newinfo) | 1072 | if (!newinfo) |
| @@ -1333,6 +1335,7 @@ static int translate_compat_table(const char *name, | |||
| 1333 | duprintf("translate_compat_table: size %u\n", info->size); | 1335 | duprintf("translate_compat_table: size %u\n", info->size); |
| 1334 | j = 0; | 1336 | j = 0; |
| 1335 | xt_compat_lock(NFPROTO_ARP); | 1337 | xt_compat_lock(NFPROTO_ARP); |
| 1338 | xt_compat_init_offsets(NFPROTO_ARP, number); | ||
| 1336 | /* Walk through entries, checking offsets. */ | 1339 | /* Walk through entries, checking offsets. */ |
| 1337 | xt_entry_foreach(iter0, entry0, total_size) { | 1340 | xt_entry_foreach(iter0, entry0, total_size) { |
| 1338 | ret = check_compat_entry_size_and_hooks(iter0, info, &size, | 1341 | ret = check_compat_entry_size_and_hooks(iter0, info, &size, |
| @@ -1486,6 +1489,7 @@ static int compat_do_replace(struct net *net, void __user *user, | |||
| 1486 | return -ENOMEM; | 1489 | return -ENOMEM; |
| 1487 | if (tmp.num_counters >= INT_MAX / sizeof(struct xt_counters)) | 1490 | if (tmp.num_counters >= INT_MAX / sizeof(struct xt_counters)) |
| 1488 | return -ENOMEM; | 1491 | return -ENOMEM; |
| 1492 | tmp.name[sizeof(tmp.name)-1] = 0; | ||
| 1489 | 1493 | ||
| 1490 | newinfo = xt_alloc_table_info(tmp.size); | 1494 | newinfo = xt_alloc_table_info(tmp.size); |
| 1491 | if (!newinfo) | 1495 | if (!newinfo) |
| @@ -1738,6 +1742,7 @@ static int do_arpt_get_ctl(struct sock *sk, int cmd, void __user *user, int *len | |||
| 1738 | ret = -EFAULT; | 1742 | ret = -EFAULT; |
| 1739 | break; | 1743 | break; |
| 1740 | } | 1744 | } |
| 1745 | rev.name[sizeof(rev.name)-1] = 0; | ||
| 1741 | 1746 | ||
| 1742 | try_then_request_module(xt_find_revision(NFPROTO_ARP, rev.name, | 1747 | try_then_request_module(xt_find_revision(NFPROTO_ARP, rev.name, |
| 1743 | rev.revision, 1, &ret), | 1748 | rev.revision, 1, &ret), |
diff --git a/net/ipv4/netfilter/arpt_mangle.c b/net/ipv4/netfilter/arpt_mangle.c index b8ddcc480ed9..a5e52a9f0a12 100644 --- a/net/ipv4/netfilter/arpt_mangle.c +++ b/net/ipv4/netfilter/arpt_mangle.c | |||
| @@ -60,12 +60,12 @@ static int checkentry(const struct xt_tgchk_param *par) | |||
| 60 | 60 | ||
| 61 | if (mangle->flags & ~ARPT_MANGLE_MASK || | 61 | if (mangle->flags & ~ARPT_MANGLE_MASK || |
| 62 | !(mangle->flags & ARPT_MANGLE_MASK)) | 62 | !(mangle->flags & ARPT_MANGLE_MASK)) |
| 63 | return false; | 63 | return -EINVAL; |
| 64 | 64 | ||
| 65 | if (mangle->target != NF_DROP && mangle->target != NF_ACCEPT && | 65 | if (mangle->target != NF_DROP && mangle->target != NF_ACCEPT && |
| 66 | mangle->target != XT_CONTINUE) | 66 | mangle->target != XT_CONTINUE) |
| 67 | return false; | 67 | return -EINVAL; |
| 68 | return true; | 68 | return 0; |
| 69 | } | 69 | } |
| 70 | 70 | ||
| 71 | static struct xt_target arpt_mangle_reg __read_mostly = { | 71 | static struct xt_target arpt_mangle_reg __read_mostly = { |
diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c index 652efea013dc..ffcea0d1678e 100644 --- a/net/ipv4/netfilter/ip_tables.c +++ b/net/ipv4/netfilter/ip_tables.c | |||
| @@ -387,7 +387,7 @@ ipt_do_table(struct sk_buff *skb, | |||
| 387 | verdict = (unsigned)(-v) - 1; | 387 | verdict = (unsigned)(-v) - 1; |
| 388 | break; | 388 | break; |
| 389 | } | 389 | } |
| 390 | if (*stackptr == 0) { | 390 | if (*stackptr <= origptr) { |
| 391 | e = get_entry(table_base, | 391 | e = get_entry(table_base, |
| 392 | private->underflow[hook]); | 392 | private->underflow[hook]); |
| 393 | pr_debug("Underflow (this is normal) " | 393 | pr_debug("Underflow (this is normal) " |
| @@ -427,10 +427,10 @@ ipt_do_table(struct sk_buff *skb, | |||
| 427 | /* Verdict */ | 427 | /* Verdict */ |
| 428 | break; | 428 | break; |
| 429 | } while (!acpar.hotdrop); | 429 | } while (!acpar.hotdrop); |
| 430 | xt_info_rdunlock_bh(); | ||
| 431 | pr_debug("Exiting %s; resetting sp from %u to %u\n", | 430 | pr_debug("Exiting %s; resetting sp from %u to %u\n", |
| 432 | __func__, *stackptr, origptr); | 431 | __func__, *stackptr, origptr); |
| 433 | *stackptr = origptr; | 432 | *stackptr = origptr; |
| 433 | xt_info_rdunlock_bh(); | ||
| 434 | #ifdef DEBUG_ALLOW_ALL | 434 | #ifdef DEBUG_ALLOW_ALL |
| 435 | return NF_ACCEPT; | 435 | return NF_ACCEPT; |
| 436 | #else | 436 | #else |
| @@ -1063,6 +1063,7 @@ static int compat_table_info(const struct xt_table_info *info, | |||
| 1063 | memcpy(newinfo, info, offsetof(struct xt_table_info, entries)); | 1063 | memcpy(newinfo, info, offsetof(struct xt_table_info, entries)); |
| 1064 | newinfo->initial_entries = 0; | 1064 | newinfo->initial_entries = 0; |
| 1065 | loc_cpu_entry = info->entries[raw_smp_processor_id()]; | 1065 | loc_cpu_entry = info->entries[raw_smp_processor_id()]; |
| 1066 | xt_compat_init_offsets(AF_INET, info->number); | ||
| 1066 | xt_entry_foreach(iter, loc_cpu_entry, info->size) { | 1067 | xt_entry_foreach(iter, loc_cpu_entry, info->size) { |
| 1067 | ret = compat_calc_entry(iter, info, loc_cpu_entry, newinfo); | 1068 | ret = compat_calc_entry(iter, info, loc_cpu_entry, newinfo); |
| 1068 | if (ret != 0) | 1069 | if (ret != 0) |
| @@ -1261,6 +1262,7 @@ do_replace(struct net *net, const void __user *user, unsigned int len) | |||
| 1261 | /* overflow check */ | 1262 | /* overflow check */ |
| 1262 | if (tmp.num_counters >= INT_MAX / sizeof(struct xt_counters)) | 1263 | if (tmp.num_counters >= INT_MAX / sizeof(struct xt_counters)) |
| 1263 | return -ENOMEM; | 1264 | return -ENOMEM; |
| 1265 | tmp.name[sizeof(tmp.name)-1] = 0; | ||
| 1264 | 1266 | ||
| 1265 | newinfo = xt_alloc_table_info(tmp.size); | 1267 | newinfo = xt_alloc_table_info(tmp.size); |
| 1266 | if (!newinfo) | 1268 | if (!newinfo) |
| @@ -1664,6 +1666,7 @@ translate_compat_table(struct net *net, | |||
| 1664 | duprintf("translate_compat_table: size %u\n", info->size); | 1666 | duprintf("translate_compat_table: size %u\n", info->size); |
| 1665 | j = 0; | 1667 | j = 0; |
| 1666 | xt_compat_lock(AF_INET); | 1668 | xt_compat_lock(AF_INET); |
| 1669 | xt_compat_init_offsets(AF_INET, number); | ||
| 1667 | /* Walk through entries, checking offsets. */ | 1670 | /* Walk through entries, checking offsets. */ |
| 1668 | xt_entry_foreach(iter0, entry0, total_size) { | 1671 | xt_entry_foreach(iter0, entry0, total_size) { |
| 1669 | ret = check_compat_entry_size_and_hooks(iter0, info, &size, | 1672 | ret = check_compat_entry_size_and_hooks(iter0, info, &size, |
| @@ -1805,6 +1808,7 @@ compat_do_replace(struct net *net, void __user *user, unsigned int len) | |||
| 1805 | return -ENOMEM; | 1808 | return -ENOMEM; |
| 1806 | if (tmp.num_counters >= INT_MAX / sizeof(struct xt_counters)) | 1809 | if (tmp.num_counters >= INT_MAX / sizeof(struct xt_counters)) |
| 1807 | return -ENOMEM; | 1810 | return -ENOMEM; |
| 1811 | tmp.name[sizeof(tmp.name)-1] = 0; | ||
| 1808 | 1812 | ||
| 1809 | newinfo = xt_alloc_table_info(tmp.size); | 1813 | newinfo = xt_alloc_table_info(tmp.size); |
| 1810 | if (!newinfo) | 1814 | if (!newinfo) |
| @@ -2034,6 +2038,7 @@ do_ipt_get_ctl(struct sock *sk, int cmd, void __user *user, int *len) | |||
| 2034 | ret = -EFAULT; | 2038 | ret = -EFAULT; |
| 2035 | break; | 2039 | break; |
| 2036 | } | 2040 | } |
| 2041 | rev.name[sizeof(rev.name)-1] = 0; | ||
| 2037 | 2042 | ||
| 2038 | if (cmd == IPT_SO_GET_REVISION_TARGET) | 2043 | if (cmd == IPT_SO_GET_REVISION_TARGET) |
| 2039 | target = 1; | 2044 | target = 1; |
diff --git a/net/ipv4/netfilter/ipt_CLUSTERIP.c b/net/ipv4/netfilter/ipt_CLUSTERIP.c index 1e26a4897655..d609ac3cb9a4 100644 --- a/net/ipv4/netfilter/ipt_CLUSTERIP.c +++ b/net/ipv4/netfilter/ipt_CLUSTERIP.c | |||
| @@ -300,13 +300,8 @@ clusterip_tg(struct sk_buff *skb, const struct xt_action_param *par) | |||
| 300 | * that the ->target() function isn't called after ->destroy() */ | 300 | * that the ->target() function isn't called after ->destroy() */ |
| 301 | 301 | ||
| 302 | ct = nf_ct_get(skb, &ctinfo); | 302 | ct = nf_ct_get(skb, &ctinfo); |
| 303 | if (ct == NULL) { | 303 | if (ct == NULL) |
| 304 | pr_info("no conntrack!\n"); | ||
| 305 | /* FIXME: need to drop invalid ones, since replies | ||
| 306 | * to outgoing connections of other nodes will be | ||
| 307 | * marked as INVALID */ | ||
| 308 | return NF_DROP; | 304 | return NF_DROP; |
| 309 | } | ||
| 310 | 305 | ||
| 311 | /* special case: ICMP error handling. conntrack distinguishes between | 306 | /* special case: ICMP error handling. conntrack distinguishes between |
| 312 | * error messages (RELATED) and information requests (see below) */ | 307 | * error messages (RELATED) and information requests (see below) */ |
| @@ -669,8 +664,11 @@ static ssize_t clusterip_proc_write(struct file *file, const char __user *input, | |||
| 669 | char buffer[PROC_WRITELEN+1]; | 664 | char buffer[PROC_WRITELEN+1]; |
| 670 | unsigned long nodenum; | 665 | unsigned long nodenum; |
| 671 | 666 | ||
| 672 | if (copy_from_user(buffer, input, PROC_WRITELEN)) | 667 | if (size > PROC_WRITELEN) |
| 668 | return -EIO; | ||
| 669 | if (copy_from_user(buffer, input, size)) | ||
| 673 | return -EFAULT; | 670 | return -EFAULT; |
| 671 | buffer[size] = 0; | ||
| 674 | 672 | ||
| 675 | if (*buffer == '+') { | 673 | if (*buffer == '+') { |
| 676 | nodenum = simple_strtoul(buffer+1, NULL, 10); | 674 | nodenum = simple_strtoul(buffer+1, NULL, 10); |
diff --git a/net/ipv4/netfilter/ipt_LOG.c b/net/ipv4/netfilter/ipt_LOG.c index 72ffc8fda2e9..d76d6c9ed946 100644 --- a/net/ipv4/netfilter/ipt_LOG.c +++ b/net/ipv4/netfilter/ipt_LOG.c | |||
| @@ -442,8 +442,7 @@ ipt_log_packet(u_int8_t pf, | |||
| 442 | } | 442 | } |
| 443 | #endif | 443 | #endif |
| 444 | 444 | ||
| 445 | /* MAC logging for input path only. */ | 445 | if (in != NULL) |
| 446 | if (in && !out) | ||
| 447 | dump_mac_header(m, loginfo, skb); | 446 | dump_mac_header(m, loginfo, skb); |
| 448 | 447 | ||
| 449 | dump_packet(m, loginfo, skb, 0); | 448 | dump_packet(m, loginfo, skb, 0); |
diff --git a/net/ipv4/netfilter/ipt_addrtype.c b/net/ipv4/netfilter/ipt_addrtype.c deleted file mode 100644 index db8bff0fb86d..000000000000 --- a/net/ipv4/netfilter/ipt_addrtype.c +++ /dev/null | |||
| @@ -1,134 +0,0 @@ | |||
| 1 | /* | ||
| 2 | * iptables module to match inet_addr_type() of an ip. | ||
| 3 | * | ||
| 4 | * Copyright (c) 2004 Patrick McHardy <kaber@trash.net> | ||
| 5 | * (C) 2007 Laszlo Attila Toth <panther@balabit.hu> | ||
| 6 | * | ||
| 7 | * This program is free software; you can redistribute it and/or modify | ||
| 8 | * it under the terms of the GNU General Public License version 2 as | ||
| 9 | * published by the Free Software Foundation. | ||
| 10 | */ | ||
| 11 | #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt | ||
| 12 | #include <linux/kernel.h> | ||
| 13 | #include <linux/module.h> | ||
| 14 | #include <linux/skbuff.h> | ||
| 15 | #include <linux/netdevice.h> | ||
| 16 | #include <linux/ip.h> | ||
| 17 | #include <net/route.h> | ||
| 18 | |||
| 19 | #include <linux/netfilter_ipv4/ipt_addrtype.h> | ||
| 20 | #include <linux/netfilter/x_tables.h> | ||
| 21 | |||
| 22 | MODULE_LICENSE("GPL"); | ||
| 23 | MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>"); | ||
| 24 | MODULE_DESCRIPTION("Xtables: address type match for IPv4"); | ||
| 25 | |||
| 26 | static inline bool match_type(struct net *net, const struct net_device *dev, | ||
| 27 | __be32 addr, u_int16_t mask) | ||
| 28 | { | ||
| 29 | return !!(mask & (1 << inet_dev_addr_type(net, dev, addr))); | ||
| 30 | } | ||
| 31 | |||
| 32 | static bool | ||
| 33 | addrtype_mt_v0(const struct sk_buff *skb, struct xt_action_param *par) | ||
| 34 | { | ||
| 35 | struct net *net = dev_net(par->in ? par->in : par->out); | ||
| 36 | const struct ipt_addrtype_info *info = par->matchinfo; | ||
| 37 | const struct iphdr *iph = ip_hdr(skb); | ||
| 38 | bool ret = true; | ||
| 39 | |||
| 40 | if (info->source) | ||
| 41 | ret &= match_type(net, NULL, iph->saddr, info->source) ^ | ||
| 42 | info->invert_source; | ||
| 43 | if (info->dest) | ||
| 44 | ret &= match_type(net, NULL, iph->daddr, info->dest) ^ | ||
| 45 | info->invert_dest; | ||
| 46 | |||
| 47 | return ret; | ||
| 48 | } | ||
| 49 | |||
| 50 | static bool | ||
| 51 | addrtype_mt_v1(const struct sk_buff *skb, struct xt_action_param *par) | ||
| 52 | { | ||
| 53 | struct net *net = dev_net(par->in ? par->in : par->out); | ||
| 54 | const struct ipt_addrtype_info_v1 *info = par->matchinfo; | ||
| 55 | const struct iphdr *iph = ip_hdr(skb); | ||
| 56 | const struct net_device *dev = NULL; | ||
| 57 | bool ret = true; | ||
| 58 | |||
| 59 | if (info->flags & IPT_ADDRTYPE_LIMIT_IFACE_IN) | ||
| 60 | dev = par->in; | ||
| 61 | else if (info->flags & IPT_ADDRTYPE_LIMIT_IFACE_OUT) | ||
| 62 | dev = par->out; | ||
| 63 | |||
| 64 | if (info->source) | ||
| 65 | ret &= match_type(net, dev, iph->saddr, info->source) ^ | ||
| 66 | (info->flags & IPT_ADDRTYPE_INVERT_SOURCE); | ||
| 67 | if (ret && info->dest) | ||
| 68 | ret &= match_type(net, dev, iph->daddr, info->dest) ^ | ||
| 69 | !!(info->flags & IPT_ADDRTYPE_INVERT_DEST); | ||
| 70 | return ret; | ||
| 71 | } | ||
| 72 | |||
| 73 | static int addrtype_mt_checkentry_v1(const struct xt_mtchk_param *par) | ||
| 74 | { | ||
| 75 | struct ipt_addrtype_info_v1 *info = par->matchinfo; | ||
| 76 | |||
| 77 | if (info->flags & IPT_ADDRTYPE_LIMIT_IFACE_IN && | ||
| 78 | info->flags & IPT_ADDRTYPE_LIMIT_IFACE_OUT) { | ||
| 79 | pr_info("both incoming and outgoing " | ||
| 80 | "interface limitation cannot be selected\n"); | ||
| 81 | return -EINVAL; | ||
| 82 | } | ||
| 83 | |||
| 84 | if (par->hook_mask & ((1 << NF_INET_PRE_ROUTING) | | ||
| 85 | (1 << NF_INET_LOCAL_IN)) && | ||
| 86 | info->flags & IPT_ADDRTYPE_LIMIT_IFACE_OUT) { | ||
| 87 | pr_info("output interface limitation " | ||
| 88 | "not valid in PREROUTING and INPUT\n"); | ||
| 89 | return -EINVAL; | ||
| 90 | } | ||
| 91 | |||
| 92 | if (par->hook_mask & ((1 << NF_INET_POST_ROUTING) | | ||
| 93 | (1 << NF_INET_LOCAL_OUT)) && | ||
| 94 | info->flags & IPT_ADDRTYPE_LIMIT_IFACE_IN) { | ||
| 95 | pr_info("input interface limitation " | ||
| 96 | "not valid in POSTROUTING and OUTPUT\n"); | ||
| 97 | return -EINVAL; | ||
| 98 | } | ||
| 99 | |||
| 100 | return 0; | ||
| 101 | } | ||
| 102 | |||
| 103 | static struct xt_match addrtype_mt_reg[] __read_mostly = { | ||
| 104 | { | ||
| 105 | .name = "addrtype", | ||
| 106 | .family = NFPROTO_IPV4, | ||
| 107 | .match = addrtype_mt_v0, | ||
| 108 | .matchsize = sizeof(struct ipt_addrtype_info), | ||
| 109 | .me = THIS_MODULE | ||
| 110 | }, | ||
| 111 | { | ||
| 112 | .name = "addrtype", | ||
| 113 | .family = NFPROTO_IPV4, | ||
| 114 | .revision = 1, | ||
| 115 | .match = addrtype_mt_v1, | ||
| 116 | .checkentry = addrtype_mt_checkentry_v1, | ||
| 117 | .matchsize = sizeof(struct ipt_addrtype_info_v1), | ||
| 118 | .me = THIS_MODULE | ||
| 119 | } | ||
| 120 | }; | ||
| 121 | |||
| 122 | static int __init addrtype_mt_init(void) | ||
| 123 | { | ||
| 124 | return xt_register_matches(addrtype_mt_reg, | ||
| 125 | ARRAY_SIZE(addrtype_mt_reg)); | ||
| 126 | } | ||
| 127 | |||
| 128 | static void __exit addrtype_mt_exit(void) | ||
| 129 | { | ||
| 130 | xt_unregister_matches(addrtype_mt_reg, ARRAY_SIZE(addrtype_mt_reg)); | ||
| 131 | } | ||
| 132 | |||
| 133 | module_init(addrtype_mt_init); | ||
| 134 | module_exit(addrtype_mt_exit); | ||
diff --git a/net/ipv4/netfilter/iptable_mangle.c b/net/ipv4/netfilter/iptable_mangle.c index 294a2a32f293..aef5d1fbe77d 100644 --- a/net/ipv4/netfilter/iptable_mangle.c +++ b/net/ipv4/netfilter/iptable_mangle.c | |||
| @@ -60,7 +60,7 @@ ipt_mangle_out(struct sk_buff *skb, const struct net_device *out) | |||
| 60 | ret = ipt_do_table(skb, NF_INET_LOCAL_OUT, NULL, out, | 60 | ret = ipt_do_table(skb, NF_INET_LOCAL_OUT, NULL, out, |
| 61 | dev_net(out)->ipv4.iptable_mangle); | 61 | dev_net(out)->ipv4.iptable_mangle); |
| 62 | /* Reroute for ANY change. */ | 62 | /* Reroute for ANY change. */ |
| 63 | if (ret != NF_DROP && ret != NF_STOLEN && ret != NF_QUEUE) { | 63 | if (ret != NF_DROP && ret != NF_STOLEN) { |
| 64 | iph = ip_hdr(skb); | 64 | iph = ip_hdr(skb); |
| 65 | 65 | ||
| 66 | if (iph->saddr != saddr || | 66 | if (iph->saddr != saddr || |
diff --git a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c index 63f60fc5d26a..5585980fce2e 100644 --- a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c +++ b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c | |||
| @@ -20,6 +20,7 @@ | |||
| 20 | #include <net/netfilter/nf_conntrack_l4proto.h> | 20 | #include <net/netfilter/nf_conntrack_l4proto.h> |
| 21 | #include <net/netfilter/nf_conntrack_expect.h> | 21 | #include <net/netfilter/nf_conntrack_expect.h> |
| 22 | #include <net/netfilter/nf_conntrack_acct.h> | 22 | #include <net/netfilter/nf_conntrack_acct.h> |
| 23 | #include <linux/rculist_nulls.h> | ||
| 23 | 24 | ||
| 24 | struct ct_iter_state { | 25 | struct ct_iter_state { |
| 25 | struct seq_net_private p; | 26 | struct seq_net_private p; |
| @@ -35,7 +36,8 @@ static struct hlist_nulls_node *ct_get_first(struct seq_file *seq) | |||
| 35 | for (st->bucket = 0; | 36 | for (st->bucket = 0; |
| 36 | st->bucket < net->ct.htable_size; | 37 | st->bucket < net->ct.htable_size; |
| 37 | st->bucket++) { | 38 | st->bucket++) { |
| 38 | n = rcu_dereference(net->ct.hash[st->bucket].first); | 39 | n = rcu_dereference( |
| 40 | hlist_nulls_first_rcu(&net->ct.hash[st->bucket])); | ||
| 39 | if (!is_a_nulls(n)) | 41 | if (!is_a_nulls(n)) |
| 40 | return n; | 42 | return n; |
| 41 | } | 43 | } |
| @@ -48,13 +50,14 @@ static struct hlist_nulls_node *ct_get_next(struct seq_file *seq, | |||
| 48 | struct net *net = seq_file_net(seq); | 50 | struct net *net = seq_file_net(seq); |
| 49 | struct ct_iter_state *st = seq->private; | 51 | struct ct_iter_state *st = seq->private; |
| 50 | 52 | ||
| 51 | head = rcu_dereference(head->next); | 53 | head = rcu_dereference(hlist_nulls_next_rcu(head)); |
| 52 | while (is_a_nulls(head)) { | 54 | while (is_a_nulls(head)) { |
| 53 | if (likely(get_nulls_value(head) == st->bucket)) { | 55 | if (likely(get_nulls_value(head) == st->bucket)) { |
| 54 | if (++st->bucket >= net->ct.htable_size) | 56 | if (++st->bucket >= net->ct.htable_size) |
| 55 | return NULL; | 57 | return NULL; |
| 56 | } | 58 | } |
| 57 | head = rcu_dereference(net->ct.hash[st->bucket].first); | 59 | head = rcu_dereference( |
| 60 | hlist_nulls_first_rcu(&net->ct.hash[st->bucket])); | ||
| 58 | } | 61 | } |
| 59 | return head; | 62 | return head; |
| 60 | } | 63 | } |
| @@ -217,7 +220,8 @@ static struct hlist_node *ct_expect_get_first(struct seq_file *seq) | |||
| 217 | struct hlist_node *n; | 220 | struct hlist_node *n; |
| 218 | 221 | ||
| 219 | for (st->bucket = 0; st->bucket < nf_ct_expect_hsize; st->bucket++) { | 222 | for (st->bucket = 0; st->bucket < nf_ct_expect_hsize; st->bucket++) { |
| 220 | n = rcu_dereference(net->ct.expect_hash[st->bucket].first); | 223 | n = rcu_dereference( |
| 224 | hlist_first_rcu(&net->ct.expect_hash[st->bucket])); | ||
| 221 | if (n) | 225 | if (n) |
| 222 | return n; | 226 | return n; |
| 223 | } | 227 | } |
| @@ -230,11 +234,12 @@ static struct hlist_node *ct_expect_get_next(struct seq_file *seq, | |||
| 230 | struct net *net = seq_file_net(seq); | 234 | struct net *net = seq_file_net(seq); |
| 231 | struct ct_expect_iter_state *st = seq->private; | 235 | struct ct_expect_iter_state *st = seq->private; |
| 232 | 236 | ||
| 233 | head = rcu_dereference(head->next); | 237 | head = rcu_dereference(hlist_next_rcu(head)); |
| 234 | while (head == NULL) { | 238 | while (head == NULL) { |
| 235 | if (++st->bucket >= nf_ct_expect_hsize) | 239 | if (++st->bucket >= nf_ct_expect_hsize) |
| 236 | return NULL; | 240 | return NULL; |
| 237 | head = rcu_dereference(net->ct.expect_hash[st->bucket].first); | 241 | head = rcu_dereference( |
| 242 | hlist_first_rcu(&net->ct.expect_hash[st->bucket])); | ||
| 238 | } | 243 | } |
| 239 | return head; | 244 | return head; |
| 240 | } | 245 | } |
diff --git a/net/ipv4/netfilter/nf_nat_amanda.c b/net/ipv4/netfilter/nf_nat_amanda.c index 0f23b3f06df0..703f366fd235 100644 --- a/net/ipv4/netfilter/nf_nat_amanda.c +++ b/net/ipv4/netfilter/nf_nat_amanda.c | |||
| @@ -44,13 +44,13 @@ static unsigned int help(struct sk_buff *skb, | |||
| 44 | 44 | ||
| 45 | /* Try to get same port: if not, try to change it. */ | 45 | /* Try to get same port: if not, try to change it. */ |
| 46 | for (port = ntohs(exp->saved_proto.tcp.port); port != 0; port++) { | 46 | for (port = ntohs(exp->saved_proto.tcp.port); port != 0; port++) { |
| 47 | int ret; | 47 | int res; |
| 48 | 48 | ||
| 49 | exp->tuple.dst.u.tcp.port = htons(port); | 49 | exp->tuple.dst.u.tcp.port = htons(port); |
| 50 | ret = nf_ct_expect_related(exp); | 50 | res = nf_ct_expect_related(exp); |
| 51 | if (ret == 0) | 51 | if (res == 0) |
| 52 | break; | 52 | break; |
| 53 | else if (ret != -EBUSY) { | 53 | else if (res != -EBUSY) { |
| 54 | port = 0; | 54 | port = 0; |
| 55 | break; | 55 | break; |
| 56 | } | 56 | } |
diff --git a/net/ipv4/netfilter/nf_nat_core.c b/net/ipv4/netfilter/nf_nat_core.c index c04787ce1a71..21bcf471b25a 100644 --- a/net/ipv4/netfilter/nf_nat_core.c +++ b/net/ipv4/netfilter/nf_nat_core.c | |||
| @@ -221,7 +221,14 @@ get_unique_tuple(struct nf_conntrack_tuple *tuple, | |||
| 221 | manips not an issue. */ | 221 | manips not an issue. */ |
| 222 | if (maniptype == IP_NAT_MANIP_SRC && | 222 | if (maniptype == IP_NAT_MANIP_SRC && |
| 223 | !(range->flags & IP_NAT_RANGE_PROTO_RANDOM)) { | 223 | !(range->flags & IP_NAT_RANGE_PROTO_RANDOM)) { |
| 224 | if (find_appropriate_src(net, zone, orig_tuple, tuple, range)) { | 224 | /* try the original tuple first */ |
| 225 | if (in_range(orig_tuple, range)) { | ||
| 226 | if (!nf_nat_used_tuple(orig_tuple, ct)) { | ||
| 227 | *tuple = *orig_tuple; | ||
| 228 | return; | ||
| 229 | } | ||
| 230 | } else if (find_appropriate_src(net, zone, orig_tuple, tuple, | ||
| 231 | range)) { | ||
| 225 | pr_debug("get_unique_tuple: Found current src map\n"); | 232 | pr_debug("get_unique_tuple: Found current src map\n"); |
| 226 | if (!nf_nat_used_tuple(tuple, ct)) | 233 | if (!nf_nat_used_tuple(tuple, ct)) |
| 227 | return; | 234 | return; |
| @@ -266,7 +273,6 @@ nf_nat_setup_info(struct nf_conn *ct, | |||
| 266 | struct net *net = nf_ct_net(ct); | 273 | struct net *net = nf_ct_net(ct); |
| 267 | struct nf_conntrack_tuple curr_tuple, new_tuple; | 274 | struct nf_conntrack_tuple curr_tuple, new_tuple; |
| 268 | struct nf_conn_nat *nat; | 275 | struct nf_conn_nat *nat; |
| 269 | int have_to_hash = !(ct->status & IPS_NAT_DONE_MASK); | ||
| 270 | 276 | ||
| 271 | /* nat helper or nfctnetlink also setup binding */ | 277 | /* nat helper or nfctnetlink also setup binding */ |
| 272 | nat = nfct_nat(ct); | 278 | nat = nfct_nat(ct); |
| @@ -306,8 +312,7 @@ nf_nat_setup_info(struct nf_conn *ct, | |||
| 306 | ct->status |= IPS_DST_NAT; | 312 | ct->status |= IPS_DST_NAT; |
| 307 | } | 313 | } |
| 308 | 314 | ||
| 309 | /* Place in source hash if this is the first time. */ | 315 | if (maniptype == IP_NAT_MANIP_SRC) { |
| 310 | if (have_to_hash) { | ||
| 311 | unsigned int srchash; | 316 | unsigned int srchash; |
| 312 | 317 | ||
| 313 | srchash = hash_by_src(net, nf_ct_zone(ct), | 318 | srchash = hash_by_src(net, nf_ct_zone(ct), |
| @@ -323,9 +328,9 @@ nf_nat_setup_info(struct nf_conn *ct, | |||
| 323 | 328 | ||
| 324 | /* It's done. */ | 329 | /* It's done. */ |
| 325 | if (maniptype == IP_NAT_MANIP_DST) | 330 | if (maniptype == IP_NAT_MANIP_DST) |
| 326 | set_bit(IPS_DST_NAT_DONE_BIT, &ct->status); | 331 | ct->status |= IPS_DST_NAT_DONE; |
| 327 | else | 332 | else |
| 328 | set_bit(IPS_SRC_NAT_DONE_BIT, &ct->status); | 333 | ct->status |= IPS_SRC_NAT_DONE; |
| 329 | 334 | ||
| 330 | return NF_ACCEPT; | 335 | return NF_ACCEPT; |
| 331 | } | 336 | } |
| @@ -502,7 +507,10 @@ int nf_nat_protocol_register(const struct nf_nat_protocol *proto) | |||
| 502 | int ret = 0; | 507 | int ret = 0; |
| 503 | 508 | ||
| 504 | spin_lock_bh(&nf_nat_lock); | 509 | spin_lock_bh(&nf_nat_lock); |
| 505 | if (nf_nat_protos[proto->protonum] != &nf_nat_unknown_protocol) { | 510 | if (rcu_dereference_protected( |
| 511 | nf_nat_protos[proto->protonum], | ||
| 512 | lockdep_is_held(&nf_nat_lock) | ||
| 513 | ) != &nf_nat_unknown_protocol) { | ||
| 506 | ret = -EBUSY; | 514 | ret = -EBUSY; |
| 507 | goto out; | 515 | goto out; |
| 508 | } | 516 | } |
| @@ -532,7 +540,7 @@ static void nf_nat_cleanup_conntrack(struct nf_conn *ct) | |||
| 532 | if (nat == NULL || nat->ct == NULL) | 540 | if (nat == NULL || nat->ct == NULL) |
| 533 | return; | 541 | return; |
| 534 | 542 | ||
| 535 | NF_CT_ASSERT(nat->ct->status & IPS_NAT_DONE_MASK); | 543 | NF_CT_ASSERT(nat->ct->status & IPS_SRC_NAT_DONE); |
| 536 | 544 | ||
| 537 | spin_lock_bh(&nf_nat_lock); | 545 | spin_lock_bh(&nf_nat_lock); |
| 538 | hlist_del_rcu(&nat->bysource); | 546 | hlist_del_rcu(&nat->bysource); |
| @@ -545,11 +553,10 @@ static void nf_nat_move_storage(void *new, void *old) | |||
| 545 | struct nf_conn_nat *old_nat = old; | 553 | struct nf_conn_nat *old_nat = old; |
| 546 | struct nf_conn *ct = old_nat->ct; | 554 | struct nf_conn *ct = old_nat->ct; |
| 547 | 555 | ||
| 548 | if (!ct || !(ct->status & IPS_NAT_DONE_MASK)) | 556 | if (!ct || !(ct->status & IPS_SRC_NAT_DONE)) |
| 549 | return; | 557 | return; |
| 550 | 558 | ||
| 551 | spin_lock_bh(&nf_nat_lock); | 559 | spin_lock_bh(&nf_nat_lock); |
| 552 | new_nat->ct = ct; | ||
| 553 | hlist_replace_rcu(&old_nat->bysource, &new_nat->bysource); | 560 | hlist_replace_rcu(&old_nat->bysource, &new_nat->bysource); |
| 554 | spin_unlock_bh(&nf_nat_lock); | 561 | spin_unlock_bh(&nf_nat_lock); |
| 555 | } | 562 | } |
| @@ -679,8 +686,7 @@ static int __net_init nf_nat_net_init(struct net *net) | |||
| 679 | { | 686 | { |
| 680 | /* Leave them the same for the moment. */ | 687 | /* Leave them the same for the moment. */ |
| 681 | net->ipv4.nat_htable_size = net->ct.htable_size; | 688 | net->ipv4.nat_htable_size = net->ct.htable_size; |
| 682 | net->ipv4.nat_bysource = nf_ct_alloc_hashtable(&net->ipv4.nat_htable_size, | 689 | net->ipv4.nat_bysource = nf_ct_alloc_hashtable(&net->ipv4.nat_htable_size, 0); |
| 683 | &net->ipv4.nat_vmalloced, 0); | ||
| 684 | if (!net->ipv4.nat_bysource) | 690 | if (!net->ipv4.nat_bysource) |
| 685 | return -ENOMEM; | 691 | return -ENOMEM; |
| 686 | return 0; | 692 | return 0; |
| @@ -702,8 +708,7 @@ static void __net_exit nf_nat_net_exit(struct net *net) | |||
| 702 | { | 708 | { |
| 703 | nf_ct_iterate_cleanup(net, &clean_nat, NULL); | 709 | nf_ct_iterate_cleanup(net, &clean_nat, NULL); |
| 704 | synchronize_rcu(); | 710 | synchronize_rcu(); |
| 705 | nf_ct_free_hashtable(net->ipv4.nat_bysource, net->ipv4.nat_vmalloced, | 711 | nf_ct_free_hashtable(net->ipv4.nat_bysource, net->ipv4.nat_htable_size); |
| 706 | net->ipv4.nat_htable_size); | ||
| 707 | } | 712 | } |
| 708 | 713 | ||
| 709 | static struct pernet_operations nf_nat_net_ops = { | 714 | static struct pernet_operations nf_nat_net_ops = { |
diff --git a/net/ipv4/netfilter/nf_nat_snmp_basic.c b/net/ipv4/netfilter/nf_nat_snmp_basic.c index ee5f419d0a56..8812a02078ab 100644 --- a/net/ipv4/netfilter/nf_nat_snmp_basic.c +++ b/net/ipv4/netfilter/nf_nat_snmp_basic.c | |||
| @@ -54,6 +54,7 @@ | |||
| 54 | #include <net/netfilter/nf_conntrack_expect.h> | 54 | #include <net/netfilter/nf_conntrack_expect.h> |
| 55 | #include <net/netfilter/nf_conntrack_helper.h> | 55 | #include <net/netfilter/nf_conntrack_helper.h> |
| 56 | #include <net/netfilter/nf_nat_helper.h> | 56 | #include <net/netfilter/nf_nat_helper.h> |
| 57 | #include <linux/netfilter/nf_conntrack_snmp.h> | ||
| 57 | 58 | ||
| 58 | MODULE_LICENSE("GPL"); | 59 | MODULE_LICENSE("GPL"); |
| 59 | MODULE_AUTHOR("James Morris <jmorris@intercode.com.au>"); | 60 | MODULE_AUTHOR("James Morris <jmorris@intercode.com.au>"); |
| @@ -1310,9 +1311,9 @@ static int __init nf_nat_snmp_basic_init(void) | |||
| 1310 | { | 1311 | { |
| 1311 | int ret = 0; | 1312 | int ret = 0; |
| 1312 | 1313 | ||
| 1313 | ret = nf_conntrack_helper_register(&snmp_helper); | 1314 | BUG_ON(nf_nat_snmp_hook != NULL); |
| 1314 | if (ret < 0) | 1315 | rcu_assign_pointer(nf_nat_snmp_hook, help); |
| 1315 | return ret; | 1316 | |
| 1316 | ret = nf_conntrack_helper_register(&snmp_trap_helper); | 1317 | ret = nf_conntrack_helper_register(&snmp_trap_helper); |
| 1317 | if (ret < 0) { | 1318 | if (ret < 0) { |
| 1318 | nf_conntrack_helper_unregister(&snmp_helper); | 1319 | nf_conntrack_helper_unregister(&snmp_helper); |
| @@ -1323,7 +1324,7 @@ static int __init nf_nat_snmp_basic_init(void) | |||
| 1323 | 1324 | ||
| 1324 | static void __exit nf_nat_snmp_basic_fini(void) | 1325 | static void __exit nf_nat_snmp_basic_fini(void) |
| 1325 | { | 1326 | { |
| 1326 | nf_conntrack_helper_unregister(&snmp_helper); | 1327 | rcu_assign_pointer(nf_nat_snmp_hook, NULL); |
| 1327 | nf_conntrack_helper_unregister(&snmp_trap_helper); | 1328 | nf_conntrack_helper_unregister(&snmp_trap_helper); |
| 1328 | } | 1329 | } |
| 1329 | 1330 | ||
diff --git a/net/ipv4/netfilter/nf_nat_standalone.c b/net/ipv4/netfilter/nf_nat_standalone.c index 95481fee8bdb..7317bdf1d457 100644 --- a/net/ipv4/netfilter/nf_nat_standalone.c +++ b/net/ipv4/netfilter/nf_nat_standalone.c | |||
| @@ -31,6 +31,7 @@ | |||
| 31 | #ifdef CONFIG_XFRM | 31 | #ifdef CONFIG_XFRM |
| 32 | static void nat_decode_session(struct sk_buff *skb, struct flowi *fl) | 32 | static void nat_decode_session(struct sk_buff *skb, struct flowi *fl) |
| 33 | { | 33 | { |
| 34 | struct flowi4 *fl4 = &fl->u.ip4; | ||
| 34 | const struct nf_conn *ct; | 35 | const struct nf_conn *ct; |
| 35 | const struct nf_conntrack_tuple *t; | 36 | const struct nf_conntrack_tuple *t; |
| 36 | enum ip_conntrack_info ctinfo; | 37 | enum ip_conntrack_info ctinfo; |
| @@ -49,25 +50,25 @@ static void nat_decode_session(struct sk_buff *skb, struct flowi *fl) | |||
| 49 | statusbit = IPS_SRC_NAT; | 50 | statusbit = IPS_SRC_NAT; |
| 50 | 51 | ||
| 51 | if (ct->status & statusbit) { | 52 | if (ct->status & statusbit) { |
| 52 | fl->fl4_dst = t->dst.u3.ip; | 53 | fl4->daddr = t->dst.u3.ip; |
| 53 | if (t->dst.protonum == IPPROTO_TCP || | 54 | if (t->dst.protonum == IPPROTO_TCP || |
| 54 | t->dst.protonum == IPPROTO_UDP || | 55 | t->dst.protonum == IPPROTO_UDP || |
| 55 | t->dst.protonum == IPPROTO_UDPLITE || | 56 | t->dst.protonum == IPPROTO_UDPLITE || |
| 56 | t->dst.protonum == IPPROTO_DCCP || | 57 | t->dst.protonum == IPPROTO_DCCP || |
| 57 | t->dst.protonum == IPPROTO_SCTP) | 58 | t->dst.protonum == IPPROTO_SCTP) |
| 58 | fl->fl_ip_dport = t->dst.u.tcp.port; | 59 | fl4->fl4_dport = t->dst.u.tcp.port; |
| 59 | } | 60 | } |
| 60 | 61 | ||
| 61 | statusbit ^= IPS_NAT_MASK; | 62 | statusbit ^= IPS_NAT_MASK; |
| 62 | 63 | ||
| 63 | if (ct->status & statusbit) { | 64 | if (ct->status & statusbit) { |
| 64 | fl->fl4_src = t->src.u3.ip; | 65 | fl4->saddr = t->src.u3.ip; |
| 65 | if (t->dst.protonum == IPPROTO_TCP || | 66 | if (t->dst.protonum == IPPROTO_TCP || |
| 66 | t->dst.protonum == IPPROTO_UDP || | 67 | t->dst.protonum == IPPROTO_UDP || |
| 67 | t->dst.protonum == IPPROTO_UDPLITE || | 68 | t->dst.protonum == IPPROTO_UDPLITE || |
| 68 | t->dst.protonum == IPPROTO_DCCP || | 69 | t->dst.protonum == IPPROTO_DCCP || |
| 69 | t->dst.protonum == IPPROTO_SCTP) | 70 | t->dst.protonum == IPPROTO_SCTP) |
| 70 | fl->fl_ip_sport = t->src.u.tcp.port; | 71 | fl4->fl4_sport = t->src.u.tcp.port; |
| 71 | } | 72 | } |
| 72 | } | 73 | } |
| 73 | #endif | 74 | #endif |
diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c index a3d5ab786e81..2d3c72e5bbbf 100644 --- a/net/ipv4/raw.c +++ b/net/ipv4/raw.c | |||
| @@ -76,6 +76,7 @@ | |||
| 76 | #include <linux/seq_file.h> | 76 | #include <linux/seq_file.h> |
| 77 | #include <linux/netfilter.h> | 77 | #include <linux/netfilter.h> |
| 78 | #include <linux/netfilter_ipv4.h> | 78 | #include <linux/netfilter_ipv4.h> |
| 79 | #include <linux/compat.h> | ||
| 79 | 80 | ||
| 80 | static struct raw_hashinfo raw_v4_hashinfo = { | 81 | static struct raw_hashinfo raw_v4_hashinfo = { |
| 81 | .lock = __RW_LOCK_UNLOCKED(raw_v4_hashinfo.lock), | 82 | .lock = __RW_LOCK_UNLOCKED(raw_v4_hashinfo.lock), |
| @@ -401,7 +402,7 @@ error: | |||
| 401 | return err; | 402 | return err; |
| 402 | } | 403 | } |
| 403 | 404 | ||
| 404 | static int raw_probe_proto_opt(struct flowi *fl, struct msghdr *msg) | 405 | static int raw_probe_proto_opt(struct flowi4 *fl4, struct msghdr *msg) |
| 405 | { | 406 | { |
| 406 | struct iovec *iov; | 407 | struct iovec *iov; |
| 407 | u8 __user *type = NULL; | 408 | u8 __user *type = NULL; |
| @@ -417,7 +418,7 @@ static int raw_probe_proto_opt(struct flowi *fl, struct msghdr *msg) | |||
| 417 | if (!iov) | 418 | if (!iov) |
| 418 | continue; | 419 | continue; |
| 419 | 420 | ||
| 420 | switch (fl->proto) { | 421 | switch (fl4->flowi4_proto) { |
| 421 | case IPPROTO_ICMP: | 422 | case IPPROTO_ICMP: |
| 422 | /* check if one-byte field is readable or not. */ | 423 | /* check if one-byte field is readable or not. */ |
| 423 | if (iov->iov_base && iov->iov_len < 1) | 424 | if (iov->iov_base && iov->iov_len < 1) |
| @@ -432,8 +433,8 @@ static int raw_probe_proto_opt(struct flowi *fl, struct msghdr *msg) | |||
| 432 | code = iov->iov_base; | 433 | code = iov->iov_base; |
| 433 | 434 | ||
| 434 | if (type && code) { | 435 | if (type && code) { |
| 435 | if (get_user(fl->fl_icmp_type, type) || | 436 | if (get_user(fl4->fl4_icmp_type, type) || |
| 436 | get_user(fl->fl_icmp_code, code)) | 437 | get_user(fl4->fl4_icmp_code, code)) |
| 437 | return -EFAULT; | 438 | return -EFAULT; |
| 438 | probed = 1; | 439 | probed = 1; |
| 439 | } | 440 | } |
| @@ -547,25 +548,31 @@ static int raw_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, | |||
| 547 | } | 548 | } |
| 548 | 549 | ||
| 549 | { | 550 | { |
| 550 | struct flowi fl = { .oif = ipc.oif, | 551 | struct flowi4 fl4 = { |
| 551 | .mark = sk->sk_mark, | 552 | .flowi4_oif = ipc.oif, |
| 552 | .fl4_dst = daddr, | 553 | .flowi4_mark = sk->sk_mark, |
| 553 | .fl4_src = saddr, | 554 | .daddr = daddr, |
| 554 | .fl4_tos = tos, | 555 | .saddr = saddr, |
| 555 | .proto = inet->hdrincl ? IPPROTO_RAW : | 556 | .flowi4_tos = tos, |
| 556 | sk->sk_protocol, | 557 | .flowi4_proto = (inet->hdrincl ? |
| 557 | }; | 558 | IPPROTO_RAW : |
| 559 | sk->sk_protocol), | ||
| 560 | .flowi4_flags = FLOWI_FLAG_CAN_SLEEP, | ||
| 561 | }; | ||
| 558 | if (!inet->hdrincl) { | 562 | if (!inet->hdrincl) { |
| 559 | err = raw_probe_proto_opt(&fl, msg); | 563 | err = raw_probe_proto_opt(&fl4, msg); |
| 560 | if (err) | 564 | if (err) |
| 561 | goto done; | 565 | goto done; |
| 562 | } | 566 | } |
| 563 | 567 | ||
| 564 | security_sk_classify_flow(sk, &fl); | 568 | security_sk_classify_flow(sk, flowi4_to_flowi(&fl4)); |
| 565 | err = ip_route_output_flow(sock_net(sk), &rt, &fl, sk, 1); | 569 | rt = ip_route_output_flow(sock_net(sk), &fl4, sk); |
| 570 | if (IS_ERR(rt)) { | ||
| 571 | err = PTR_ERR(rt); | ||
| 572 | rt = NULL; | ||
| 573 | goto done; | ||
| 574 | } | ||
| 566 | } | 575 | } |
| 567 | if (err) | ||
| 568 | goto done; | ||
| 569 | 576 | ||
| 570 | err = -EACCES; | 577 | err = -EACCES; |
| 571 | if (rt->rt_flags & RTCF_BROADCAST && !sock_flag(sk, SOCK_BROADCAST)) | 578 | if (rt->rt_flags & RTCF_BROADCAST && !sock_flag(sk, SOCK_BROADCAST)) |
| @@ -838,6 +845,23 @@ static int raw_ioctl(struct sock *sk, int cmd, unsigned long arg) | |||
| 838 | } | 845 | } |
| 839 | } | 846 | } |
| 840 | 847 | ||
| 848 | #ifdef CONFIG_COMPAT | ||
| 849 | static int compat_raw_ioctl(struct sock *sk, unsigned int cmd, unsigned long arg) | ||
| 850 | { | ||
| 851 | switch (cmd) { | ||
| 852 | case SIOCOUTQ: | ||
| 853 | case SIOCINQ: | ||
| 854 | return -ENOIOCTLCMD; | ||
| 855 | default: | ||
| 856 | #ifdef CONFIG_IP_MROUTE | ||
| 857 | return ipmr_compat_ioctl(sk, cmd, compat_ptr(arg)); | ||
| 858 | #else | ||
| 859 | return -ENOIOCTLCMD; | ||
| 860 | #endif | ||
| 861 | } | ||
| 862 | } | ||
| 863 | #endif | ||
| 864 | |||
| 841 | struct proto raw_prot = { | 865 | struct proto raw_prot = { |
| 842 | .name = "RAW", | 866 | .name = "RAW", |
| 843 | .owner = THIS_MODULE, | 867 | .owner = THIS_MODULE, |
| @@ -860,6 +884,7 @@ struct proto raw_prot = { | |||
| 860 | #ifdef CONFIG_COMPAT | 884 | #ifdef CONFIG_COMPAT |
| 861 | .compat_setsockopt = compat_raw_setsockopt, | 885 | .compat_setsockopt = compat_raw_setsockopt, |
| 862 | .compat_getsockopt = compat_raw_getsockopt, | 886 | .compat_getsockopt = compat_raw_getsockopt, |
| 887 | .compat_ioctl = compat_raw_ioctl, | ||
| 863 | #endif | 888 | #endif |
| 864 | }; | 889 | }; |
| 865 | 890 | ||
diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 351dc4e85242..4b0c81180804 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c | |||
| @@ -109,8 +109,8 @@ | |||
| 109 | #include <linux/sysctl.h> | 109 | #include <linux/sysctl.h> |
| 110 | #endif | 110 | #endif |
| 111 | 111 | ||
| 112 | #define RT_FL_TOS(oldflp) \ | 112 | #define RT_FL_TOS(oldflp4) \ |
| 113 | ((u32)(oldflp->fl4_tos & (IPTOS_RT_MASK | RTO_ONLINK))) | 113 | ((u32)(oldflp4->flowi4_tos & (IPTOS_RT_MASK | RTO_ONLINK))) |
| 114 | 114 | ||
| 115 | #define IP_MAX_MTU 0xFFF0 | 115 | #define IP_MAX_MTU 0xFFF0 |
| 116 | 116 | ||
| @@ -131,9 +131,6 @@ static int ip_rt_min_pmtu __read_mostly = 512 + 20 + 20; | |||
| 131 | static int ip_rt_min_advmss __read_mostly = 256; | 131 | static int ip_rt_min_advmss __read_mostly = 256; |
| 132 | static int rt_chain_length_max __read_mostly = 20; | 132 | static int rt_chain_length_max __read_mostly = 20; |
| 133 | 133 | ||
| 134 | static struct delayed_work expires_work; | ||
| 135 | static unsigned long expires_ljiffies; | ||
| 136 | |||
| 137 | /* | 134 | /* |
| 138 | * Interface to generic destination cache. | 135 | * Interface to generic destination cache. |
| 139 | */ | 136 | */ |
| @@ -152,6 +149,41 @@ static void ipv4_dst_ifdown(struct dst_entry *dst, struct net_device *dev, | |||
| 152 | { | 149 | { |
| 153 | } | 150 | } |
| 154 | 151 | ||
| 152 | static u32 *ipv4_cow_metrics(struct dst_entry *dst, unsigned long old) | ||
| 153 | { | ||
| 154 | struct rtable *rt = (struct rtable *) dst; | ||
| 155 | struct inet_peer *peer; | ||
| 156 | u32 *p = NULL; | ||
| 157 | |||
| 158 | if (!rt->peer) | ||
| 159 | rt_bind_peer(rt, 1); | ||
| 160 | |||
| 161 | peer = rt->peer; | ||
| 162 | if (peer) { | ||
| 163 | u32 *old_p = __DST_METRICS_PTR(old); | ||
| 164 | unsigned long prev, new; | ||
| 165 | |||
| 166 | p = peer->metrics; | ||
| 167 | if (inet_metrics_new(peer)) | ||
| 168 | memcpy(p, old_p, sizeof(u32) * RTAX_MAX); | ||
| 169 | |||
| 170 | new = (unsigned long) p; | ||
| 171 | prev = cmpxchg(&dst->_metrics, old, new); | ||
| 172 | |||
| 173 | if (prev != old) { | ||
| 174 | p = __DST_METRICS_PTR(prev); | ||
| 175 | if (prev & DST_METRICS_READ_ONLY) | ||
| 176 | p = NULL; | ||
| 177 | } else { | ||
| 178 | if (rt->fi) { | ||
| 179 | fib_info_put(rt->fi); | ||
| 180 | rt->fi = NULL; | ||
| 181 | } | ||
| 182 | } | ||
| 183 | } | ||
| 184 | return p; | ||
| 185 | } | ||
| 186 | |||
| 155 | static struct dst_ops ipv4_dst_ops = { | 187 | static struct dst_ops ipv4_dst_ops = { |
| 156 | .family = AF_INET, | 188 | .family = AF_INET, |
| 157 | .protocol = cpu_to_be16(ETH_P_IP), | 189 | .protocol = cpu_to_be16(ETH_P_IP), |
| @@ -159,6 +191,7 @@ static struct dst_ops ipv4_dst_ops = { | |||
| 159 | .check = ipv4_dst_check, | 191 | .check = ipv4_dst_check, |
| 160 | .default_advmss = ipv4_default_advmss, | 192 | .default_advmss = ipv4_default_advmss, |
| 161 | .default_mtu = ipv4_default_mtu, | 193 | .default_mtu = ipv4_default_mtu, |
| 194 | .cow_metrics = ipv4_cow_metrics, | ||
| 162 | .destroy = ipv4_dst_destroy, | 195 | .destroy = ipv4_dst_destroy, |
| 163 | .ifdown = ipv4_dst_ifdown, | 196 | .ifdown = ipv4_dst_ifdown, |
| 164 | .negative_advice = ipv4_negative_advice, | 197 | .negative_advice = ipv4_negative_advice, |
| @@ -171,7 +204,7 @@ static struct dst_ops ipv4_dst_ops = { | |||
| 171 | 204 | ||
| 172 | const __u8 ip_tos2prio[16] = { | 205 | const __u8 ip_tos2prio[16] = { |
| 173 | TC_PRIO_BESTEFFORT, | 206 | TC_PRIO_BESTEFFORT, |
| 174 | ECN_OR_COST(FILLER), | 207 | ECN_OR_COST(BESTEFFORT), |
| 175 | TC_PRIO_BESTEFFORT, | 208 | TC_PRIO_BESTEFFORT, |
| 176 | ECN_OR_COST(BESTEFFORT), | 209 | ECN_OR_COST(BESTEFFORT), |
| 177 | TC_PRIO_BULK, | 210 | TC_PRIO_BULK, |
| @@ -391,7 +424,7 @@ static int rt_cache_seq_show(struct seq_file *seq, void *v) | |||
| 391 | dst_metric(&r->dst, RTAX_WINDOW), | 424 | dst_metric(&r->dst, RTAX_WINDOW), |
| 392 | (int)((dst_metric(&r->dst, RTAX_RTT) >> 3) + | 425 | (int)((dst_metric(&r->dst, RTAX_RTT) >> 3) + |
| 393 | dst_metric(&r->dst, RTAX_RTTVAR)), | 426 | dst_metric(&r->dst, RTAX_RTTVAR)), |
| 394 | r->fl.fl4_tos, | 427 | r->rt_tos, |
| 395 | r->dst.hh ? atomic_read(&r->dst.hh->hh_refcnt) : -1, | 428 | r->dst.hh ? atomic_read(&r->dst.hh->hh_refcnt) : -1, |
| 396 | r->dst.hh ? (r->dst.hh->hh_output == | 429 | r->dst.hh ? (r->dst.hh->hh_output == |
| 397 | dev_queue_xmit) : 0, | 430 | dev_queue_xmit) : 0, |
| @@ -514,7 +547,7 @@ static const struct file_operations rt_cpu_seq_fops = { | |||
| 514 | .release = seq_release, | 547 | .release = seq_release, |
| 515 | }; | 548 | }; |
| 516 | 549 | ||
| 517 | #ifdef CONFIG_NET_CLS_ROUTE | 550 | #ifdef CONFIG_IP_ROUTE_CLASSID |
| 518 | static int rt_acct_proc_show(struct seq_file *m, void *v) | 551 | static int rt_acct_proc_show(struct seq_file *m, void *v) |
| 519 | { | 552 | { |
| 520 | struct ip_rt_acct *dst, *src; | 553 | struct ip_rt_acct *dst, *src; |
| @@ -567,14 +600,14 @@ static int __net_init ip_rt_do_proc_init(struct net *net) | |||
| 567 | if (!pde) | 600 | if (!pde) |
| 568 | goto err2; | 601 | goto err2; |
| 569 | 602 | ||
| 570 | #ifdef CONFIG_NET_CLS_ROUTE | 603 | #ifdef CONFIG_IP_ROUTE_CLASSID |
| 571 | pde = proc_create("rt_acct", 0, net->proc_net, &rt_acct_proc_fops); | 604 | pde = proc_create("rt_acct", 0, net->proc_net, &rt_acct_proc_fops); |
| 572 | if (!pde) | 605 | if (!pde) |
| 573 | goto err3; | 606 | goto err3; |
| 574 | #endif | 607 | #endif |
| 575 | return 0; | 608 | return 0; |
| 576 | 609 | ||
| 577 | #ifdef CONFIG_NET_CLS_ROUTE | 610 | #ifdef CONFIG_IP_ROUTE_CLASSID |
| 578 | err3: | 611 | err3: |
| 579 | remove_proc_entry("rt_cache", net->proc_net_stat); | 612 | remove_proc_entry("rt_cache", net->proc_net_stat); |
| 580 | #endif | 613 | #endif |
| @@ -588,7 +621,7 @@ static void __net_exit ip_rt_do_proc_exit(struct net *net) | |||
| 588 | { | 621 | { |
| 589 | remove_proc_entry("rt_cache", net->proc_net_stat); | 622 | remove_proc_entry("rt_cache", net->proc_net_stat); |
| 590 | remove_proc_entry("rt_cache", net->proc_net); | 623 | remove_proc_entry("rt_cache", net->proc_net); |
| 591 | #ifdef CONFIG_NET_CLS_ROUTE | 624 | #ifdef CONFIG_IP_ROUTE_CLASSID |
| 592 | remove_proc_entry("rt_acct", net->proc_net); | 625 | remove_proc_entry("rt_acct", net->proc_net); |
| 593 | #endif | 626 | #endif |
| 594 | } | 627 | } |
| @@ -632,7 +665,7 @@ static inline int rt_fast_clean(struct rtable *rth) | |||
| 632 | static inline int rt_valuable(struct rtable *rth) | 665 | static inline int rt_valuable(struct rtable *rth) |
| 633 | { | 666 | { |
| 634 | return (rth->rt_flags & (RTCF_REDIRECTED | RTCF_NOTIFY)) || | 667 | return (rth->rt_flags & (RTCF_REDIRECTED | RTCF_NOTIFY)) || |
| 635 | rth->dst.expires; | 668 | (rth->peer && rth->peer->pmtu_expires); |
| 636 | } | 669 | } |
| 637 | 670 | ||
| 638 | static int rt_may_expire(struct rtable *rth, unsigned long tmo1, unsigned long tmo2) | 671 | static int rt_may_expire(struct rtable *rth, unsigned long tmo1, unsigned long tmo2) |
| @@ -643,13 +676,7 @@ static int rt_may_expire(struct rtable *rth, unsigned long tmo1, unsigned long t | |||
| 643 | if (atomic_read(&rth->dst.__refcnt)) | 676 | if (atomic_read(&rth->dst.__refcnt)) |
| 644 | goto out; | 677 | goto out; |
| 645 | 678 | ||
| 646 | ret = 1; | ||
| 647 | if (rth->dst.expires && | ||
| 648 | time_after_eq(jiffies, rth->dst.expires)) | ||
| 649 | goto out; | ||
| 650 | |||
| 651 | age = jiffies - rth->dst.lastuse; | 679 | age = jiffies - rth->dst.lastuse; |
| 652 | ret = 0; | ||
| 653 | if ((age <= tmo1 && !rt_fast_clean(rth)) || | 680 | if ((age <= tmo1 && !rt_fast_clean(rth)) || |
| 654 | (age <= tmo2 && rt_valuable(rth))) | 681 | (age <= tmo2 && rt_valuable(rth))) |
| 655 | goto out; | 682 | goto out; |
| @@ -684,22 +711,22 @@ static inline bool rt_caching(const struct net *net) | |||
| 684 | net->ipv4.sysctl_rt_cache_rebuild_count; | 711 | net->ipv4.sysctl_rt_cache_rebuild_count; |
| 685 | } | 712 | } |
| 686 | 713 | ||
| 687 | static inline bool compare_hash_inputs(const struct flowi *fl1, | 714 | static inline bool compare_hash_inputs(const struct rtable *rt1, |
| 688 | const struct flowi *fl2) | 715 | const struct rtable *rt2) |
| 689 | { | 716 | { |
| 690 | return ((((__force u32)fl1->fl4_dst ^ (__force u32)fl2->fl4_dst) | | 717 | return ((((__force u32)rt1->rt_key_dst ^ (__force u32)rt2->rt_key_dst) | |
| 691 | ((__force u32)fl1->fl4_src ^ (__force u32)fl2->fl4_src) | | 718 | ((__force u32)rt1->rt_key_src ^ (__force u32)rt2->rt_key_src) | |
| 692 | (fl1->iif ^ fl2->iif)) == 0); | 719 | (rt1->rt_iif ^ rt2->rt_iif)) == 0); |
| 693 | } | 720 | } |
| 694 | 721 | ||
| 695 | static inline int compare_keys(struct flowi *fl1, struct flowi *fl2) | 722 | static inline int compare_keys(struct rtable *rt1, struct rtable *rt2) |
| 696 | { | 723 | { |
| 697 | return (((__force u32)fl1->fl4_dst ^ (__force u32)fl2->fl4_dst) | | 724 | return (((__force u32)rt1->rt_key_dst ^ (__force u32)rt2->rt_key_dst) | |
| 698 | ((__force u32)fl1->fl4_src ^ (__force u32)fl2->fl4_src) | | 725 | ((__force u32)rt1->rt_key_src ^ (__force u32)rt2->rt_key_src) | |
| 699 | (fl1->mark ^ fl2->mark) | | 726 | (rt1->rt_mark ^ rt2->rt_mark) | |
| 700 | (*(u16 *)&fl1->fl4_tos ^ *(u16 *)&fl2->fl4_tos) | | 727 | (rt1->rt_tos ^ rt2->rt_tos) | |
| 701 | (fl1->oif ^ fl2->oif) | | 728 | (rt1->rt_oif ^ rt2->rt_oif) | |
| 702 | (fl1->iif ^ fl2->iif)) == 0; | 729 | (rt1->rt_iif ^ rt2->rt_iif)) == 0; |
| 703 | } | 730 | } |
| 704 | 731 | ||
| 705 | static inline int compare_netns(struct rtable *rt1, struct rtable *rt2) | 732 | static inline int compare_netns(struct rtable *rt1, struct rtable *rt2) |
| @@ -786,104 +813,13 @@ static int has_noalias(const struct rtable *head, const struct rtable *rth) | |||
| 786 | const struct rtable *aux = head; | 813 | const struct rtable *aux = head; |
| 787 | 814 | ||
| 788 | while (aux != rth) { | 815 | while (aux != rth) { |
| 789 | if (compare_hash_inputs(&aux->fl, &rth->fl)) | 816 | if (compare_hash_inputs(aux, rth)) |
| 790 | return 0; | 817 | return 0; |
| 791 | aux = rcu_dereference_protected(aux->dst.rt_next, 1); | 818 | aux = rcu_dereference_protected(aux->dst.rt_next, 1); |
| 792 | } | 819 | } |
| 793 | return ONE; | 820 | return ONE; |
| 794 | } | 821 | } |
| 795 | 822 | ||
| 796 | static void rt_check_expire(void) | ||
| 797 | { | ||
| 798 | static unsigned int rover; | ||
| 799 | unsigned int i = rover, goal; | ||
| 800 | struct rtable *rth; | ||
| 801 | struct rtable __rcu **rthp; | ||
| 802 | unsigned long samples = 0; | ||
| 803 | unsigned long sum = 0, sum2 = 0; | ||
| 804 | unsigned long delta; | ||
| 805 | u64 mult; | ||
| 806 | |||
| 807 | delta = jiffies - expires_ljiffies; | ||
| 808 | expires_ljiffies = jiffies; | ||
| 809 | mult = ((u64)delta) << rt_hash_log; | ||
| 810 | if (ip_rt_gc_timeout > 1) | ||
| 811 | do_div(mult, ip_rt_gc_timeout); | ||
| 812 | goal = (unsigned int)mult; | ||
| 813 | if (goal > rt_hash_mask) | ||
| 814 | goal = rt_hash_mask + 1; | ||
| 815 | for (; goal > 0; goal--) { | ||
| 816 | unsigned long tmo = ip_rt_gc_timeout; | ||
| 817 | unsigned long length; | ||
| 818 | |||
| 819 | i = (i + 1) & rt_hash_mask; | ||
| 820 | rthp = &rt_hash_table[i].chain; | ||
| 821 | |||
| 822 | if (need_resched()) | ||
| 823 | cond_resched(); | ||
| 824 | |||
| 825 | samples++; | ||
| 826 | |||
| 827 | if (rcu_dereference_raw(*rthp) == NULL) | ||
| 828 | continue; | ||
| 829 | length = 0; | ||
| 830 | spin_lock_bh(rt_hash_lock_addr(i)); | ||
| 831 | while ((rth = rcu_dereference_protected(*rthp, | ||
| 832 | lockdep_is_held(rt_hash_lock_addr(i)))) != NULL) { | ||
| 833 | prefetch(rth->dst.rt_next); | ||
| 834 | if (rt_is_expired(rth)) { | ||
| 835 | *rthp = rth->dst.rt_next; | ||
| 836 | rt_free(rth); | ||
| 837 | continue; | ||
| 838 | } | ||
| 839 | if (rth->dst.expires) { | ||
| 840 | /* Entry is expired even if it is in use */ | ||
| 841 | if (time_before_eq(jiffies, rth->dst.expires)) { | ||
| 842 | nofree: | ||
| 843 | tmo >>= 1; | ||
| 844 | rthp = &rth->dst.rt_next; | ||
| 845 | /* | ||
| 846 | * We only count entries on | ||
| 847 | * a chain with equal hash inputs once | ||
| 848 | * so that entries for different QOS | ||
| 849 | * levels, and other non-hash input | ||
| 850 | * attributes don't unfairly skew | ||
| 851 | * the length computation | ||
| 852 | */ | ||
| 853 | length += has_noalias(rt_hash_table[i].chain, rth); | ||
| 854 | continue; | ||
| 855 | } | ||
| 856 | } else if (!rt_may_expire(rth, tmo, ip_rt_gc_timeout)) | ||
| 857 | goto nofree; | ||
| 858 | |||
| 859 | /* Cleanup aged off entries. */ | ||
| 860 | *rthp = rth->dst.rt_next; | ||
| 861 | rt_free(rth); | ||
| 862 | } | ||
| 863 | spin_unlock_bh(rt_hash_lock_addr(i)); | ||
| 864 | sum += length; | ||
| 865 | sum2 += length*length; | ||
| 866 | } | ||
| 867 | if (samples) { | ||
| 868 | unsigned long avg = sum / samples; | ||
| 869 | unsigned long sd = int_sqrt(sum2 / samples - avg*avg); | ||
| 870 | rt_chain_length_max = max_t(unsigned long, | ||
| 871 | ip_rt_gc_elasticity, | ||
| 872 | (avg + 4*sd) >> FRACT_BITS); | ||
| 873 | } | ||
| 874 | rover = i; | ||
| 875 | } | ||
| 876 | |||
| 877 | /* | ||
| 878 | * rt_worker_func() is run in process context. | ||
| 879 | * we call rt_check_expire() to scan part of the hash table | ||
| 880 | */ | ||
| 881 | static void rt_worker_func(struct work_struct *work) | ||
| 882 | { | ||
| 883 | rt_check_expire(); | ||
| 884 | schedule_delayed_work(&expires_work, ip_rt_gc_interval); | ||
| 885 | } | ||
| 886 | |||
| 887 | /* | 823 | /* |
| 888 | * Pertubation of rt_genid by a small quantity [1..256] | 824 | * Pertubation of rt_genid by a small quantity [1..256] |
| 889 | * Using 8 bits of shuffling ensure we can call rt_cache_invalidate() | 825 | * Using 8 bits of shuffling ensure we can call rt_cache_invalidate() |
| @@ -1078,8 +1014,8 @@ static int slow_chain_length(const struct rtable *head) | |||
| 1078 | return length >> FRACT_BITS; | 1014 | return length >> FRACT_BITS; |
| 1079 | } | 1015 | } |
| 1080 | 1016 | ||
| 1081 | static int rt_intern_hash(unsigned hash, struct rtable *rt, | 1017 | static struct rtable *rt_intern_hash(unsigned hash, struct rtable *rt, |
| 1082 | struct rtable **rp, struct sk_buff *skb, int ifindex) | 1018 | struct sk_buff *skb, int ifindex) |
| 1083 | { | 1019 | { |
| 1084 | struct rtable *rth, *cand; | 1020 | struct rtable *rth, *cand; |
| 1085 | struct rtable __rcu **rthp, **candp; | 1021 | struct rtable __rcu **rthp, **candp; |
| @@ -1120,7 +1056,7 @@ restart: | |||
| 1120 | printk(KERN_WARNING | 1056 | printk(KERN_WARNING |
| 1121 | "Neighbour table failure & not caching routes.\n"); | 1057 | "Neighbour table failure & not caching routes.\n"); |
| 1122 | ip_rt_put(rt); | 1058 | ip_rt_put(rt); |
| 1123 | return err; | 1059 | return ERR_PTR(err); |
| 1124 | } | 1060 | } |
| 1125 | } | 1061 | } |
| 1126 | 1062 | ||
| @@ -1137,7 +1073,7 @@ restart: | |||
| 1137 | rt_free(rth); | 1073 | rt_free(rth); |
| 1138 | continue; | 1074 | continue; |
| 1139 | } | 1075 | } |
| 1140 | if (compare_keys(&rth->fl, &rt->fl) && compare_netns(rth, rt)) { | 1076 | if (compare_keys(rth, rt) && compare_netns(rth, rt)) { |
| 1141 | /* Put it first */ | 1077 | /* Put it first */ |
| 1142 | *rthp = rth->dst.rt_next; | 1078 | *rthp = rth->dst.rt_next; |
| 1143 | /* | 1079 | /* |
| @@ -1157,11 +1093,9 @@ restart: | |||
| 1157 | spin_unlock_bh(rt_hash_lock_addr(hash)); | 1093 | spin_unlock_bh(rt_hash_lock_addr(hash)); |
| 1158 | 1094 | ||
| 1159 | rt_drop(rt); | 1095 | rt_drop(rt); |
| 1160 | if (rp) | 1096 | if (skb) |
| 1161 | *rp = rth; | ||
| 1162 | else | ||
| 1163 | skb_dst_set(skb, &rth->dst); | 1097 | skb_dst_set(skb, &rth->dst); |
| 1164 | return 0; | 1098 | return rth; |
| 1165 | } | 1099 | } |
| 1166 | 1100 | ||
| 1167 | if (!atomic_read(&rth->dst.__refcnt)) { | 1101 | if (!atomic_read(&rth->dst.__refcnt)) { |
| @@ -1202,7 +1136,7 @@ restart: | |||
| 1202 | rt_emergency_hash_rebuild(net); | 1136 | rt_emergency_hash_rebuild(net); |
| 1203 | spin_unlock_bh(rt_hash_lock_addr(hash)); | 1137 | spin_unlock_bh(rt_hash_lock_addr(hash)); |
| 1204 | 1138 | ||
| 1205 | hash = rt_hash(rt->fl.fl4_dst, rt->fl.fl4_src, | 1139 | hash = rt_hash(rt->rt_key_dst, rt->rt_key_src, |
| 1206 | ifindex, rt_genid(net)); | 1140 | ifindex, rt_genid(net)); |
| 1207 | goto restart; | 1141 | goto restart; |
| 1208 | } | 1142 | } |
| @@ -1218,7 +1152,7 @@ restart: | |||
| 1218 | 1152 | ||
| 1219 | if (err != -ENOBUFS) { | 1153 | if (err != -ENOBUFS) { |
| 1220 | rt_drop(rt); | 1154 | rt_drop(rt); |
| 1221 | return err; | 1155 | return ERR_PTR(err); |
| 1222 | } | 1156 | } |
| 1223 | 1157 | ||
| 1224 | /* Neighbour tables are full and nothing | 1158 | /* Neighbour tables are full and nothing |
| @@ -1239,7 +1173,7 @@ restart: | |||
| 1239 | if (net_ratelimit()) | 1173 | if (net_ratelimit()) |
| 1240 | printk(KERN_WARNING "ipv4: Neighbour table overflow.\n"); | 1174 | printk(KERN_WARNING "ipv4: Neighbour table overflow.\n"); |
| 1241 | rt_drop(rt); | 1175 | rt_drop(rt); |
| 1242 | return -ENOBUFS; | 1176 | return ERR_PTR(-ENOBUFS); |
| 1243 | } | 1177 | } |
| 1244 | } | 1178 | } |
| 1245 | 1179 | ||
| @@ -1265,11 +1199,16 @@ restart: | |||
| 1265 | spin_unlock_bh(rt_hash_lock_addr(hash)); | 1199 | spin_unlock_bh(rt_hash_lock_addr(hash)); |
| 1266 | 1200 | ||
| 1267 | skip_hashing: | 1201 | skip_hashing: |
| 1268 | if (rp) | 1202 | if (skb) |
| 1269 | *rp = rt; | ||
| 1270 | else | ||
| 1271 | skb_dst_set(skb, &rt->dst); | 1203 | skb_dst_set(skb, &rt->dst); |
| 1272 | return 0; | 1204 | return rt; |
| 1205 | } | ||
| 1206 | |||
| 1207 | static atomic_t __rt_peer_genid = ATOMIC_INIT(0); | ||
| 1208 | |||
| 1209 | static u32 rt_peer_genid(void) | ||
| 1210 | { | ||
| 1211 | return atomic_read(&__rt_peer_genid); | ||
| 1273 | } | 1212 | } |
| 1274 | 1213 | ||
| 1275 | void rt_bind_peer(struct rtable *rt, int create) | 1214 | void rt_bind_peer(struct rtable *rt, int create) |
| @@ -1280,6 +1219,8 @@ void rt_bind_peer(struct rtable *rt, int create) | |||
| 1280 | 1219 | ||
| 1281 | if (peer && cmpxchg(&rt->peer, NULL, peer) != NULL) | 1220 | if (peer && cmpxchg(&rt->peer, NULL, peer) != NULL) |
| 1282 | inet_putpeer(peer); | 1221 | inet_putpeer(peer); |
| 1222 | else | ||
| 1223 | rt->rt_peer_genid = rt_peer_genid(); | ||
| 1283 | } | 1224 | } |
| 1284 | 1225 | ||
| 1285 | /* | 1226 | /* |
| @@ -1349,13 +1290,8 @@ static void rt_del(unsigned hash, struct rtable *rt) | |||
| 1349 | void ip_rt_redirect(__be32 old_gw, __be32 daddr, __be32 new_gw, | 1290 | void ip_rt_redirect(__be32 old_gw, __be32 daddr, __be32 new_gw, |
| 1350 | __be32 saddr, struct net_device *dev) | 1291 | __be32 saddr, struct net_device *dev) |
| 1351 | { | 1292 | { |
| 1352 | int i, k; | ||
| 1353 | struct in_device *in_dev = __in_dev_get_rcu(dev); | 1293 | struct in_device *in_dev = __in_dev_get_rcu(dev); |
| 1354 | struct rtable *rth; | 1294 | struct inet_peer *peer; |
| 1355 | struct rtable __rcu **rthp; | ||
| 1356 | __be32 skeys[2] = { saddr, 0 }; | ||
| 1357 | int ikeys[2] = { dev->ifindex, 0 }; | ||
| 1358 | struct netevent_redirect netevent; | ||
| 1359 | struct net *net; | 1295 | struct net *net; |
| 1360 | 1296 | ||
| 1361 | if (!in_dev) | 1297 | if (!in_dev) |
| @@ -1367,9 +1303,6 @@ void ip_rt_redirect(__be32 old_gw, __be32 daddr, __be32 new_gw, | |||
| 1367 | ipv4_is_zeronet(new_gw)) | 1303 | ipv4_is_zeronet(new_gw)) |
| 1368 | goto reject_redirect; | 1304 | goto reject_redirect; |
| 1369 | 1305 | ||
| 1370 | if (!rt_caching(net)) | ||
| 1371 | goto reject_redirect; | ||
| 1372 | |||
| 1373 | if (!IN_DEV_SHARED_MEDIA(in_dev)) { | 1306 | if (!IN_DEV_SHARED_MEDIA(in_dev)) { |
| 1374 | if (!inet_addr_onlink(in_dev, new_gw, old_gw)) | 1307 | if (!inet_addr_onlink(in_dev, new_gw, old_gw)) |
| 1375 | goto reject_redirect; | 1308 | goto reject_redirect; |
| @@ -1380,91 +1313,13 @@ void ip_rt_redirect(__be32 old_gw, __be32 daddr, __be32 new_gw, | |||
| 1380 | goto reject_redirect; | 1313 | goto reject_redirect; |
| 1381 | } | 1314 | } |
| 1382 | 1315 | ||
| 1383 | for (i = 0; i < 2; i++) { | 1316 | peer = inet_getpeer_v4(daddr, 1); |
| 1384 | for (k = 0; k < 2; k++) { | 1317 | if (peer) { |
| 1385 | unsigned hash = rt_hash(daddr, skeys[i], ikeys[k], | 1318 | peer->redirect_learned.a4 = new_gw; |
| 1386 | rt_genid(net)); | ||
| 1387 | |||
| 1388 | rthp = &rt_hash_table[hash].chain; | ||
| 1389 | |||
| 1390 | while ((rth = rcu_dereference(*rthp)) != NULL) { | ||
| 1391 | struct rtable *rt; | ||
| 1392 | |||
| 1393 | if (rth->fl.fl4_dst != daddr || | ||
| 1394 | rth->fl.fl4_src != skeys[i] || | ||
| 1395 | rth->fl.oif != ikeys[k] || | ||
| 1396 | rt_is_input_route(rth) || | ||
| 1397 | rt_is_expired(rth) || | ||
| 1398 | !net_eq(dev_net(rth->dst.dev), net)) { | ||
| 1399 | rthp = &rth->dst.rt_next; | ||
| 1400 | continue; | ||
| 1401 | } | ||
| 1402 | |||
| 1403 | if (rth->rt_dst != daddr || | ||
| 1404 | rth->rt_src != saddr || | ||
| 1405 | rth->dst.error || | ||
| 1406 | rth->rt_gateway != old_gw || | ||
| 1407 | rth->dst.dev != dev) | ||
| 1408 | break; | ||
| 1409 | |||
| 1410 | dst_hold(&rth->dst); | ||
| 1411 | |||
| 1412 | rt = dst_alloc(&ipv4_dst_ops); | ||
| 1413 | if (rt == NULL) { | ||
| 1414 | ip_rt_put(rth); | ||
| 1415 | return; | ||
| 1416 | } | ||
| 1417 | |||
| 1418 | /* Copy all the information. */ | ||
| 1419 | *rt = *rth; | ||
| 1420 | rt->dst.__use = 1; | ||
| 1421 | atomic_set(&rt->dst.__refcnt, 1); | ||
| 1422 | rt->dst.child = NULL; | ||
| 1423 | if (rt->dst.dev) | ||
| 1424 | dev_hold(rt->dst.dev); | ||
| 1425 | rt->dst.obsolete = -1; | ||
| 1426 | rt->dst.lastuse = jiffies; | ||
| 1427 | rt->dst.path = &rt->dst; | ||
| 1428 | rt->dst.neighbour = NULL; | ||
| 1429 | rt->dst.hh = NULL; | ||
| 1430 | #ifdef CONFIG_XFRM | ||
| 1431 | rt->dst.xfrm = NULL; | ||
| 1432 | #endif | ||
| 1433 | rt->rt_genid = rt_genid(net); | ||
| 1434 | rt->rt_flags |= RTCF_REDIRECTED; | ||
| 1435 | |||
| 1436 | /* Gateway is different ... */ | ||
| 1437 | rt->rt_gateway = new_gw; | ||
| 1438 | |||
| 1439 | /* Redirect received -> path was valid */ | ||
| 1440 | dst_confirm(&rth->dst); | ||
| 1441 | |||
| 1442 | if (rt->peer) | ||
| 1443 | atomic_inc(&rt->peer->refcnt); | ||
| 1444 | |||
| 1445 | if (arp_bind_neighbour(&rt->dst) || | ||
| 1446 | !(rt->dst.neighbour->nud_state & | ||
| 1447 | NUD_VALID)) { | ||
| 1448 | if (rt->dst.neighbour) | ||
| 1449 | neigh_event_send(rt->dst.neighbour, NULL); | ||
| 1450 | ip_rt_put(rth); | ||
| 1451 | rt_drop(rt); | ||
| 1452 | goto do_next; | ||
| 1453 | } | ||
| 1454 | 1319 | ||
| 1455 | netevent.old = &rth->dst; | 1320 | inet_putpeer(peer); |
| 1456 | netevent.new = &rt->dst; | ||
| 1457 | call_netevent_notifiers(NETEVENT_REDIRECT, | ||
| 1458 | &netevent); | ||
| 1459 | 1321 | ||
| 1460 | rt_del(hash, rth); | 1322 | atomic_inc(&__rt_peer_genid); |
| 1461 | if (!rt_intern_hash(hash, rt, &rt, NULL, rt->fl.oif)) | ||
| 1462 | ip_rt_put(rt); | ||
| 1463 | goto do_next; | ||
| 1464 | } | ||
| 1465 | do_next: | ||
| 1466 | ; | ||
| 1467 | } | ||
| 1468 | } | 1323 | } |
| 1469 | return; | 1324 | return; |
| 1470 | 1325 | ||
| @@ -1488,18 +1343,24 @@ static struct dst_entry *ipv4_negative_advice(struct dst_entry *dst) | |||
| 1488 | if (dst->obsolete > 0) { | 1343 | if (dst->obsolete > 0) { |
| 1489 | ip_rt_put(rt); | 1344 | ip_rt_put(rt); |
| 1490 | ret = NULL; | 1345 | ret = NULL; |
| 1491 | } else if ((rt->rt_flags & RTCF_REDIRECTED) || | 1346 | } else if (rt->rt_flags & RTCF_REDIRECTED) { |
| 1492 | (rt->dst.expires && | 1347 | unsigned hash = rt_hash(rt->rt_key_dst, rt->rt_key_src, |
| 1493 | time_after_eq(jiffies, rt->dst.expires))) { | 1348 | rt->rt_oif, |
| 1494 | unsigned hash = rt_hash(rt->fl.fl4_dst, rt->fl.fl4_src, | ||
| 1495 | rt->fl.oif, | ||
| 1496 | rt_genid(dev_net(dst->dev))); | 1349 | rt_genid(dev_net(dst->dev))); |
| 1497 | #if RT_CACHE_DEBUG >= 1 | 1350 | #if RT_CACHE_DEBUG >= 1 |
| 1498 | printk(KERN_DEBUG "ipv4_negative_advice: redirect to %pI4/%02x dropped\n", | 1351 | printk(KERN_DEBUG "ipv4_negative_advice: redirect to %pI4/%02x dropped\n", |
| 1499 | &rt->rt_dst, rt->fl.fl4_tos); | 1352 | &rt->rt_dst, rt->rt_tos); |
| 1500 | #endif | 1353 | #endif |
| 1501 | rt_del(hash, rt); | 1354 | rt_del(hash, rt); |
| 1502 | ret = NULL; | 1355 | ret = NULL; |
| 1356 | } else if (rt->peer && | ||
| 1357 | rt->peer->pmtu_expires && | ||
| 1358 | time_after_eq(jiffies, rt->peer->pmtu_expires)) { | ||
| 1359 | unsigned long orig = rt->peer->pmtu_expires; | ||
| 1360 | |||
| 1361 | if (cmpxchg(&rt->peer->pmtu_expires, orig, 0) == orig) | ||
| 1362 | dst_metric_set(dst, RTAX_MTU, | ||
| 1363 | rt->peer->pmtu_orig); | ||
| 1503 | } | 1364 | } |
| 1504 | } | 1365 | } |
| 1505 | return ret; | 1366 | return ret; |
| @@ -1525,6 +1386,7 @@ void ip_rt_send_redirect(struct sk_buff *skb) | |||
| 1525 | { | 1386 | { |
| 1526 | struct rtable *rt = skb_rtable(skb); | 1387 | struct rtable *rt = skb_rtable(skb); |
| 1527 | struct in_device *in_dev; | 1388 | struct in_device *in_dev; |
| 1389 | struct inet_peer *peer; | ||
| 1528 | int log_martians; | 1390 | int log_martians; |
| 1529 | 1391 | ||
| 1530 | rcu_read_lock(); | 1392 | rcu_read_lock(); |
| @@ -1536,33 +1398,41 @@ void ip_rt_send_redirect(struct sk_buff *skb) | |||
| 1536 | log_martians = IN_DEV_LOG_MARTIANS(in_dev); | 1398 | log_martians = IN_DEV_LOG_MARTIANS(in_dev); |
| 1537 | rcu_read_unlock(); | 1399 | rcu_read_unlock(); |
| 1538 | 1400 | ||
| 1401 | if (!rt->peer) | ||
| 1402 | rt_bind_peer(rt, 1); | ||
| 1403 | peer = rt->peer; | ||
| 1404 | if (!peer) { | ||
| 1405 | icmp_send(skb, ICMP_REDIRECT, ICMP_REDIR_HOST, rt->rt_gateway); | ||
| 1406 | return; | ||
| 1407 | } | ||
| 1408 | |||
| 1539 | /* No redirected packets during ip_rt_redirect_silence; | 1409 | /* No redirected packets during ip_rt_redirect_silence; |
| 1540 | * reset the algorithm. | 1410 | * reset the algorithm. |
| 1541 | */ | 1411 | */ |
| 1542 | if (time_after(jiffies, rt->dst.rate_last + ip_rt_redirect_silence)) | 1412 | if (time_after(jiffies, peer->rate_last + ip_rt_redirect_silence)) |
| 1543 | rt->dst.rate_tokens = 0; | 1413 | peer->rate_tokens = 0; |
| 1544 | 1414 | ||
| 1545 | /* Too many ignored redirects; do not send anything | 1415 | /* Too many ignored redirects; do not send anything |
| 1546 | * set dst.rate_last to the last seen redirected packet. | 1416 | * set dst.rate_last to the last seen redirected packet. |
| 1547 | */ | 1417 | */ |
| 1548 | if (rt->dst.rate_tokens >= ip_rt_redirect_number) { | 1418 | if (peer->rate_tokens >= ip_rt_redirect_number) { |
| 1549 | rt->dst.rate_last = jiffies; | 1419 | peer->rate_last = jiffies; |
| 1550 | return; | 1420 | return; |
| 1551 | } | 1421 | } |
| 1552 | 1422 | ||
| 1553 | /* Check for load limit; set rate_last to the latest sent | 1423 | /* Check for load limit; set rate_last to the latest sent |
| 1554 | * redirect. | 1424 | * redirect. |
| 1555 | */ | 1425 | */ |
| 1556 | if (rt->dst.rate_tokens == 0 || | 1426 | if (peer->rate_tokens == 0 || |
| 1557 | time_after(jiffies, | 1427 | time_after(jiffies, |
| 1558 | (rt->dst.rate_last + | 1428 | (peer->rate_last + |
| 1559 | (ip_rt_redirect_load << rt->dst.rate_tokens)))) { | 1429 | (ip_rt_redirect_load << peer->rate_tokens)))) { |
| 1560 | icmp_send(skb, ICMP_REDIRECT, ICMP_REDIR_HOST, rt->rt_gateway); | 1430 | icmp_send(skb, ICMP_REDIRECT, ICMP_REDIR_HOST, rt->rt_gateway); |
| 1561 | rt->dst.rate_last = jiffies; | 1431 | peer->rate_last = jiffies; |
| 1562 | ++rt->dst.rate_tokens; | 1432 | ++peer->rate_tokens; |
| 1563 | #ifdef CONFIG_IP_ROUTE_VERBOSE | 1433 | #ifdef CONFIG_IP_ROUTE_VERBOSE |
| 1564 | if (log_martians && | 1434 | if (log_martians && |
| 1565 | rt->dst.rate_tokens == ip_rt_redirect_number && | 1435 | peer->rate_tokens == ip_rt_redirect_number && |
| 1566 | net_ratelimit()) | 1436 | net_ratelimit()) |
| 1567 | printk(KERN_WARNING "host %pI4/if%d ignores redirects for %pI4 to %pI4.\n", | 1437 | printk(KERN_WARNING "host %pI4/if%d ignores redirects for %pI4 to %pI4.\n", |
| 1568 | &rt->rt_src, rt->rt_iif, | 1438 | &rt->rt_src, rt->rt_iif, |
| @@ -1574,7 +1444,9 @@ void ip_rt_send_redirect(struct sk_buff *skb) | |||
| 1574 | static int ip_error(struct sk_buff *skb) | 1444 | static int ip_error(struct sk_buff *skb) |
| 1575 | { | 1445 | { |
| 1576 | struct rtable *rt = skb_rtable(skb); | 1446 | struct rtable *rt = skb_rtable(skb); |
| 1447 | struct inet_peer *peer; | ||
| 1577 | unsigned long now; | 1448 | unsigned long now; |
| 1449 | bool send; | ||
| 1578 | int code; | 1450 | int code; |
| 1579 | 1451 | ||
| 1580 | switch (rt->dst.error) { | 1452 | switch (rt->dst.error) { |
| @@ -1594,15 +1466,24 @@ static int ip_error(struct sk_buff *skb) | |||
| 1594 | break; | 1466 | break; |
| 1595 | } | 1467 | } |
| 1596 | 1468 | ||
| 1597 | now = jiffies; | 1469 | if (!rt->peer) |
| 1598 | rt->dst.rate_tokens += now - rt->dst.rate_last; | 1470 | rt_bind_peer(rt, 1); |
| 1599 | if (rt->dst.rate_tokens > ip_rt_error_burst) | 1471 | peer = rt->peer; |
| 1600 | rt->dst.rate_tokens = ip_rt_error_burst; | 1472 | |
| 1601 | rt->dst.rate_last = now; | 1473 | send = true; |
| 1602 | if (rt->dst.rate_tokens >= ip_rt_error_cost) { | 1474 | if (peer) { |
| 1603 | rt->dst.rate_tokens -= ip_rt_error_cost; | 1475 | now = jiffies; |
| 1604 | icmp_send(skb, ICMP_DEST_UNREACH, code, 0); | 1476 | peer->rate_tokens += now - peer->rate_last; |
| 1477 | if (peer->rate_tokens > ip_rt_error_burst) | ||
| 1478 | peer->rate_tokens = ip_rt_error_burst; | ||
| 1479 | peer->rate_last = now; | ||
| 1480 | if (peer->rate_tokens >= ip_rt_error_cost) | ||
| 1481 | peer->rate_tokens -= ip_rt_error_cost; | ||
| 1482 | else | ||
| 1483 | send = false; | ||
| 1605 | } | 1484 | } |
| 1485 | if (send) | ||
| 1486 | icmp_send(skb, ICMP_DEST_UNREACH, code, 0); | ||
| 1606 | 1487 | ||
| 1607 | out: kfree_skb(skb); | 1488 | out: kfree_skb(skb); |
| 1608 | return 0; | 1489 | return 0; |
| @@ -1630,88 +1511,140 @@ unsigned short ip_rt_frag_needed(struct net *net, struct iphdr *iph, | |||
| 1630 | unsigned short new_mtu, | 1511 | unsigned short new_mtu, |
| 1631 | struct net_device *dev) | 1512 | struct net_device *dev) |
| 1632 | { | 1513 | { |
| 1633 | int i, k; | ||
| 1634 | unsigned short old_mtu = ntohs(iph->tot_len); | 1514 | unsigned short old_mtu = ntohs(iph->tot_len); |
| 1635 | struct rtable *rth; | ||
| 1636 | int ikeys[2] = { dev->ifindex, 0 }; | ||
| 1637 | __be32 skeys[2] = { iph->saddr, 0, }; | ||
| 1638 | __be32 daddr = iph->daddr; | ||
| 1639 | unsigned short est_mtu = 0; | 1515 | unsigned short est_mtu = 0; |
| 1516 | struct inet_peer *peer; | ||
| 1640 | 1517 | ||
| 1641 | for (k = 0; k < 2; k++) { | 1518 | peer = inet_getpeer_v4(iph->daddr, 1); |
| 1642 | for (i = 0; i < 2; i++) { | 1519 | if (peer) { |
| 1643 | unsigned hash = rt_hash(daddr, skeys[i], ikeys[k], | 1520 | unsigned short mtu = new_mtu; |
| 1644 | rt_genid(net)); | ||
| 1645 | |||
| 1646 | rcu_read_lock(); | ||
| 1647 | for (rth = rcu_dereference(rt_hash_table[hash].chain); rth; | ||
| 1648 | rth = rcu_dereference(rth->dst.rt_next)) { | ||
| 1649 | unsigned short mtu = new_mtu; | ||
| 1650 | |||
| 1651 | if (rth->fl.fl4_dst != daddr || | ||
| 1652 | rth->fl.fl4_src != skeys[i] || | ||
| 1653 | rth->rt_dst != daddr || | ||
| 1654 | rth->rt_src != iph->saddr || | ||
| 1655 | rth->fl.oif != ikeys[k] || | ||
| 1656 | rt_is_input_route(rth) || | ||
| 1657 | dst_metric_locked(&rth->dst, RTAX_MTU) || | ||
| 1658 | !net_eq(dev_net(rth->dst.dev), net) || | ||
| 1659 | rt_is_expired(rth)) | ||
| 1660 | continue; | ||
| 1661 | 1521 | ||
| 1662 | if (new_mtu < 68 || new_mtu >= old_mtu) { | 1522 | if (new_mtu < 68 || new_mtu >= old_mtu) { |
| 1523 | /* BSD 4.2 derived systems incorrectly adjust | ||
| 1524 | * tot_len by the IP header length, and report | ||
| 1525 | * a zero MTU in the ICMP message. | ||
| 1526 | */ | ||
| 1527 | if (mtu == 0 && | ||
| 1528 | old_mtu >= 68 + (iph->ihl << 2)) | ||
| 1529 | old_mtu -= iph->ihl << 2; | ||
| 1530 | mtu = guess_mtu(old_mtu); | ||
| 1531 | } | ||
| 1663 | 1532 | ||
| 1664 | /* BSD 4.2 compatibility hack :-( */ | 1533 | if (mtu < ip_rt_min_pmtu) |
| 1665 | if (mtu == 0 && | 1534 | mtu = ip_rt_min_pmtu; |
| 1666 | old_mtu >= dst_mtu(&rth->dst) && | 1535 | if (!peer->pmtu_expires || mtu < peer->pmtu_learned) { |
| 1667 | old_mtu >= 68 + (iph->ihl << 2)) | 1536 | unsigned long pmtu_expires; |
| 1668 | old_mtu -= iph->ihl << 2; | ||
| 1669 | 1537 | ||
| 1670 | mtu = guess_mtu(old_mtu); | 1538 | pmtu_expires = jiffies + ip_rt_mtu_expires; |
| 1671 | } | 1539 | if (!pmtu_expires) |
| 1672 | if (mtu <= dst_mtu(&rth->dst)) { | 1540 | pmtu_expires = 1UL; |
| 1673 | if (mtu < dst_mtu(&rth->dst)) { | 1541 | |
| 1674 | dst_confirm(&rth->dst); | 1542 | est_mtu = mtu; |
| 1675 | if (mtu < ip_rt_min_pmtu) { | 1543 | peer->pmtu_learned = mtu; |
| 1676 | u32 lock = dst_metric(&rth->dst, | 1544 | peer->pmtu_expires = pmtu_expires; |
| 1677 | RTAX_LOCK); | ||
| 1678 | mtu = ip_rt_min_pmtu; | ||
| 1679 | lock |= (1 << RTAX_MTU); | ||
| 1680 | dst_metric_set(&rth->dst, RTAX_LOCK, | ||
| 1681 | lock); | ||
| 1682 | } | ||
| 1683 | dst_metric_set(&rth->dst, RTAX_MTU, mtu); | ||
| 1684 | dst_set_expires(&rth->dst, | ||
| 1685 | ip_rt_mtu_expires); | ||
| 1686 | } | ||
| 1687 | est_mtu = mtu; | ||
| 1688 | } | ||
| 1689 | } | ||
| 1690 | rcu_read_unlock(); | ||
| 1691 | } | 1545 | } |
| 1546 | |||
| 1547 | inet_putpeer(peer); | ||
| 1548 | |||
| 1549 | atomic_inc(&__rt_peer_genid); | ||
| 1692 | } | 1550 | } |
| 1693 | return est_mtu ? : new_mtu; | 1551 | return est_mtu ? : new_mtu; |
| 1694 | } | 1552 | } |
| 1695 | 1553 | ||
| 1554 | static void check_peer_pmtu(struct dst_entry *dst, struct inet_peer *peer) | ||
| 1555 | { | ||
| 1556 | unsigned long expires = peer->pmtu_expires; | ||
| 1557 | |||
| 1558 | if (time_before(jiffies, expires)) { | ||
| 1559 | u32 orig_dst_mtu = dst_mtu(dst); | ||
| 1560 | if (peer->pmtu_learned < orig_dst_mtu) { | ||
| 1561 | if (!peer->pmtu_orig) | ||
| 1562 | peer->pmtu_orig = dst_metric_raw(dst, RTAX_MTU); | ||
| 1563 | dst_metric_set(dst, RTAX_MTU, peer->pmtu_learned); | ||
| 1564 | } | ||
| 1565 | } else if (cmpxchg(&peer->pmtu_expires, expires, 0) == expires) | ||
| 1566 | dst_metric_set(dst, RTAX_MTU, peer->pmtu_orig); | ||
| 1567 | } | ||
| 1568 | |||
| 1696 | static void ip_rt_update_pmtu(struct dst_entry *dst, u32 mtu) | 1569 | static void ip_rt_update_pmtu(struct dst_entry *dst, u32 mtu) |
| 1697 | { | 1570 | { |
| 1698 | if (dst_mtu(dst) > mtu && mtu >= 68 && | 1571 | struct rtable *rt = (struct rtable *) dst; |
| 1699 | !(dst_metric_locked(dst, RTAX_MTU))) { | 1572 | struct inet_peer *peer; |
| 1700 | if (mtu < ip_rt_min_pmtu) { | 1573 | |
| 1701 | u32 lock = dst_metric(dst, RTAX_LOCK); | 1574 | dst_confirm(dst); |
| 1575 | |||
| 1576 | if (!rt->peer) | ||
| 1577 | rt_bind_peer(rt, 1); | ||
| 1578 | peer = rt->peer; | ||
| 1579 | if (peer) { | ||
| 1580 | if (mtu < ip_rt_min_pmtu) | ||
| 1702 | mtu = ip_rt_min_pmtu; | 1581 | mtu = ip_rt_min_pmtu; |
| 1703 | dst_metric_set(dst, RTAX_LOCK, lock | (1 << RTAX_MTU)); | 1582 | if (!peer->pmtu_expires || mtu < peer->pmtu_learned) { |
| 1583 | unsigned long pmtu_expires; | ||
| 1584 | |||
| 1585 | pmtu_expires = jiffies + ip_rt_mtu_expires; | ||
| 1586 | if (!pmtu_expires) | ||
| 1587 | pmtu_expires = 1UL; | ||
| 1588 | |||
| 1589 | peer->pmtu_learned = mtu; | ||
| 1590 | peer->pmtu_expires = pmtu_expires; | ||
| 1591 | |||
| 1592 | atomic_inc(&__rt_peer_genid); | ||
| 1593 | rt->rt_peer_genid = rt_peer_genid(); | ||
| 1704 | } | 1594 | } |
| 1705 | dst_metric_set(dst, RTAX_MTU, mtu); | 1595 | check_peer_pmtu(dst, peer); |
| 1706 | dst_set_expires(dst, ip_rt_mtu_expires); | 1596 | } |
| 1707 | call_netevent_notifiers(NETEVENT_PMTU_UPDATE, dst); | 1597 | } |
| 1598 | |||
| 1599 | static int check_peer_redir(struct dst_entry *dst, struct inet_peer *peer) | ||
| 1600 | { | ||
| 1601 | struct rtable *rt = (struct rtable *) dst; | ||
| 1602 | __be32 orig_gw = rt->rt_gateway; | ||
| 1603 | |||
| 1604 | dst_confirm(&rt->dst); | ||
| 1605 | |||
| 1606 | neigh_release(rt->dst.neighbour); | ||
| 1607 | rt->dst.neighbour = NULL; | ||
| 1608 | |||
| 1609 | rt->rt_gateway = peer->redirect_learned.a4; | ||
| 1610 | if (arp_bind_neighbour(&rt->dst) || | ||
| 1611 | !(rt->dst.neighbour->nud_state & NUD_VALID)) { | ||
| 1612 | if (rt->dst.neighbour) | ||
| 1613 | neigh_event_send(rt->dst.neighbour, NULL); | ||
| 1614 | rt->rt_gateway = orig_gw; | ||
| 1615 | return -EAGAIN; | ||
| 1616 | } else { | ||
| 1617 | rt->rt_flags |= RTCF_REDIRECTED; | ||
| 1618 | call_netevent_notifiers(NETEVENT_NEIGH_UPDATE, | ||
| 1619 | rt->dst.neighbour); | ||
| 1708 | } | 1620 | } |
| 1621 | return 0; | ||
| 1709 | } | 1622 | } |
| 1710 | 1623 | ||
| 1711 | static struct dst_entry *ipv4_dst_check(struct dst_entry *dst, u32 cookie) | 1624 | static struct dst_entry *ipv4_dst_check(struct dst_entry *dst, u32 cookie) |
| 1712 | { | 1625 | { |
| 1713 | if (rt_is_expired((struct rtable *)dst)) | 1626 | struct rtable *rt = (struct rtable *) dst; |
| 1627 | |||
| 1628 | if (rt_is_expired(rt)) | ||
| 1714 | return NULL; | 1629 | return NULL; |
| 1630 | if (rt->rt_peer_genid != rt_peer_genid()) { | ||
| 1631 | struct inet_peer *peer; | ||
| 1632 | |||
| 1633 | if (!rt->peer) | ||
| 1634 | rt_bind_peer(rt, 0); | ||
| 1635 | |||
| 1636 | peer = rt->peer; | ||
| 1637 | if (peer && peer->pmtu_expires) | ||
| 1638 | check_peer_pmtu(dst, peer); | ||
| 1639 | |||
| 1640 | if (peer && peer->redirect_learned.a4 && | ||
| 1641 | peer->redirect_learned.a4 != rt->rt_gateway) { | ||
| 1642 | if (check_peer_redir(dst, peer)) | ||
| 1643 | return NULL; | ||
| 1644 | } | ||
| 1645 | |||
| 1646 | rt->rt_peer_genid = rt_peer_genid(); | ||
| 1647 | } | ||
| 1715 | return dst; | 1648 | return dst; |
| 1716 | } | 1649 | } |
| 1717 | 1650 | ||
| @@ -1720,6 +1653,10 @@ static void ipv4_dst_destroy(struct dst_entry *dst) | |||
| 1720 | struct rtable *rt = (struct rtable *) dst; | 1653 | struct rtable *rt = (struct rtable *) dst; |
| 1721 | struct inet_peer *peer = rt->peer; | 1654 | struct inet_peer *peer = rt->peer; |
| 1722 | 1655 | ||
| 1656 | if (rt->fi) { | ||
| 1657 | fib_info_put(rt->fi); | ||
| 1658 | rt->fi = NULL; | ||
| 1659 | } | ||
| 1723 | if (peer) { | 1660 | if (peer) { |
| 1724 | rt->peer = NULL; | 1661 | rt->peer = NULL; |
| 1725 | inet_putpeer(peer); | 1662 | inet_putpeer(peer); |
| @@ -1734,8 +1671,14 @@ static void ipv4_link_failure(struct sk_buff *skb) | |||
| 1734 | icmp_send(skb, ICMP_DEST_UNREACH, ICMP_HOST_UNREACH, 0); | 1671 | icmp_send(skb, ICMP_DEST_UNREACH, ICMP_HOST_UNREACH, 0); |
| 1735 | 1672 | ||
| 1736 | rt = skb_rtable(skb); | 1673 | rt = skb_rtable(skb); |
| 1737 | if (rt) | 1674 | if (rt && |
| 1738 | dst_set_expires(&rt->dst, 0); | 1675 | rt->peer && |
| 1676 | rt->peer->pmtu_expires) { | ||
| 1677 | unsigned long orig = rt->peer->pmtu_expires; | ||
| 1678 | |||
| 1679 | if (cmpxchg(&rt->peer->pmtu_expires, orig, 0) == orig) | ||
| 1680 | dst_metric_set(&rt->dst, RTAX_MTU, rt->peer->pmtu_orig); | ||
| 1681 | } | ||
| 1739 | } | 1682 | } |
| 1740 | 1683 | ||
| 1741 | static int ip_rt_bug(struct sk_buff *skb) | 1684 | static int ip_rt_bug(struct sk_buff *skb) |
| @@ -1764,9 +1707,18 @@ void ip_rt_get_source(u8 *addr, struct rtable *rt) | |||
| 1764 | if (rt_is_output_route(rt)) | 1707 | if (rt_is_output_route(rt)) |
| 1765 | src = rt->rt_src; | 1708 | src = rt->rt_src; |
| 1766 | else { | 1709 | else { |
| 1710 | struct flowi4 fl4 = { | ||
| 1711 | .daddr = rt->rt_key_dst, | ||
| 1712 | .saddr = rt->rt_key_src, | ||
| 1713 | .flowi4_tos = rt->rt_tos, | ||
| 1714 | .flowi4_oif = rt->rt_oif, | ||
| 1715 | .flowi4_iif = rt->rt_iif, | ||
| 1716 | .flowi4_mark = rt->rt_mark, | ||
| 1717 | }; | ||
| 1718 | |||
| 1767 | rcu_read_lock(); | 1719 | rcu_read_lock(); |
| 1768 | if (fib_lookup(dev_net(rt->dst.dev), &rt->fl, &res) == 0) | 1720 | if (fib_lookup(dev_net(rt->dst.dev), &fl4, &res) == 0) |
| 1769 | src = FIB_RES_PREFSRC(res); | 1721 | src = FIB_RES_PREFSRC(dev_net(rt->dst.dev), res); |
| 1770 | else | 1722 | else |
| 1771 | src = inet_select_addr(rt->dst.dev, rt->rt_gateway, | 1723 | src = inet_select_addr(rt->dst.dev, rt->rt_gateway, |
| 1772 | RT_SCOPE_UNIVERSE); | 1724 | RT_SCOPE_UNIVERSE); |
| @@ -1775,7 +1727,7 @@ void ip_rt_get_source(u8 *addr, struct rtable *rt) | |||
| 1775 | memcpy(addr, &src, 4); | 1727 | memcpy(addr, &src, 4); |
| 1776 | } | 1728 | } |
| 1777 | 1729 | ||
| 1778 | #ifdef CONFIG_NET_CLS_ROUTE | 1730 | #ifdef CONFIG_IP_ROUTE_CLASSID |
| 1779 | static void set_class_tag(struct rtable *rt, u32 tag) | 1731 | static void set_class_tag(struct rtable *rt, u32 tag) |
| 1780 | { | 1732 | { |
| 1781 | if (!(rt->dst.tclassid & 0xFFFF)) | 1733 | if (!(rt->dst.tclassid & 0xFFFF)) |
| @@ -1815,17 +1767,54 @@ static unsigned int ipv4_default_mtu(const struct dst_entry *dst) | |||
| 1815 | return mtu; | 1767 | return mtu; |
| 1816 | } | 1768 | } |
| 1817 | 1769 | ||
| 1818 | static void rt_set_nexthop(struct rtable *rt, struct fib_result *res, u32 itag) | 1770 | static void rt_init_metrics(struct rtable *rt, const struct flowi4 *oldflp4, |
| 1771 | struct fib_info *fi) | ||
| 1772 | { | ||
| 1773 | struct inet_peer *peer; | ||
| 1774 | int create = 0; | ||
| 1775 | |||
| 1776 | /* If a peer entry exists for this destination, we must hook | ||
| 1777 | * it up in order to get at cached metrics. | ||
| 1778 | */ | ||
| 1779 | if (oldflp4 && (oldflp4->flowi4_flags & FLOWI_FLAG_PRECOW_METRICS)) | ||
| 1780 | create = 1; | ||
| 1781 | |||
| 1782 | rt->peer = peer = inet_getpeer_v4(rt->rt_dst, create); | ||
| 1783 | if (peer) { | ||
| 1784 | rt->rt_peer_genid = rt_peer_genid(); | ||
| 1785 | if (inet_metrics_new(peer)) | ||
| 1786 | memcpy(peer->metrics, fi->fib_metrics, | ||
| 1787 | sizeof(u32) * RTAX_MAX); | ||
| 1788 | dst_init_metrics(&rt->dst, peer->metrics, false); | ||
| 1789 | |||
| 1790 | if (peer->pmtu_expires) | ||
| 1791 | check_peer_pmtu(&rt->dst, peer); | ||
| 1792 | if (peer->redirect_learned.a4 && | ||
| 1793 | peer->redirect_learned.a4 != rt->rt_gateway) { | ||
| 1794 | rt->rt_gateway = peer->redirect_learned.a4; | ||
| 1795 | rt->rt_flags |= RTCF_REDIRECTED; | ||
| 1796 | } | ||
| 1797 | } else { | ||
| 1798 | if (fi->fib_metrics != (u32 *) dst_default_metrics) { | ||
| 1799 | rt->fi = fi; | ||
| 1800 | atomic_inc(&fi->fib_clntref); | ||
| 1801 | } | ||
| 1802 | dst_init_metrics(&rt->dst, fi->fib_metrics, true); | ||
| 1803 | } | ||
| 1804 | } | ||
| 1805 | |||
| 1806 | static void rt_set_nexthop(struct rtable *rt, const struct flowi4 *oldflp4, | ||
| 1807 | const struct fib_result *res, | ||
| 1808 | struct fib_info *fi, u16 type, u32 itag) | ||
| 1819 | { | 1809 | { |
| 1820 | struct dst_entry *dst = &rt->dst; | 1810 | struct dst_entry *dst = &rt->dst; |
| 1821 | struct fib_info *fi = res->fi; | ||
| 1822 | 1811 | ||
| 1823 | if (fi) { | 1812 | if (fi) { |
| 1824 | if (FIB_RES_GW(*res) && | 1813 | if (FIB_RES_GW(*res) && |
| 1825 | FIB_RES_NH(*res).nh_scope == RT_SCOPE_LINK) | 1814 | FIB_RES_NH(*res).nh_scope == RT_SCOPE_LINK) |
| 1826 | rt->rt_gateway = FIB_RES_GW(*res); | 1815 | rt->rt_gateway = FIB_RES_GW(*res); |
| 1827 | dst_import_metrics(dst, fi->fib_metrics); | 1816 | rt_init_metrics(rt, oldflp4, fi); |
| 1828 | #ifdef CONFIG_NET_CLS_ROUTE | 1817 | #ifdef CONFIG_IP_ROUTE_CLASSID |
| 1829 | dst->tclassid = FIB_RES_NH(*res).nh_tclassid; | 1818 | dst->tclassid = FIB_RES_NH(*res).nh_tclassid; |
| 1830 | #endif | 1819 | #endif |
| 1831 | } | 1820 | } |
| @@ -1835,13 +1824,26 @@ static void rt_set_nexthop(struct rtable *rt, struct fib_result *res, u32 itag) | |||
| 1835 | if (dst_metric_raw(dst, RTAX_ADVMSS) > 65535 - 40) | 1824 | if (dst_metric_raw(dst, RTAX_ADVMSS) > 65535 - 40) |
| 1836 | dst_metric_set(dst, RTAX_ADVMSS, 65535 - 40); | 1825 | dst_metric_set(dst, RTAX_ADVMSS, 65535 - 40); |
| 1837 | 1826 | ||
| 1838 | #ifdef CONFIG_NET_CLS_ROUTE | 1827 | #ifdef CONFIG_IP_ROUTE_CLASSID |
| 1839 | #ifdef CONFIG_IP_MULTIPLE_TABLES | 1828 | #ifdef CONFIG_IP_MULTIPLE_TABLES |
| 1840 | set_class_tag(rt, fib_rules_tclass(res)); | 1829 | set_class_tag(rt, fib_rules_tclass(res)); |
| 1841 | #endif | 1830 | #endif |
| 1842 | set_class_tag(rt, itag); | 1831 | set_class_tag(rt, itag); |
| 1843 | #endif | 1832 | #endif |
| 1844 | rt->rt_type = res->type; | 1833 | rt->rt_type = type; |
| 1834 | } | ||
| 1835 | |||
| 1836 | static struct rtable *rt_dst_alloc(bool nopolicy, bool noxfrm) | ||
| 1837 | { | ||
| 1838 | struct rtable *rt = dst_alloc(&ipv4_dst_ops, 1); | ||
| 1839 | if (rt) { | ||
| 1840 | rt->dst.obsolete = -1; | ||
| 1841 | |||
| 1842 | rt->dst.flags = DST_HOST | | ||
| 1843 | (nopolicy ? DST_NOPOLICY : 0) | | ||
| 1844 | (noxfrm ? DST_NOXFRM : 0); | ||
| 1845 | } | ||
| 1846 | return rt; | ||
| 1845 | } | 1847 | } |
| 1846 | 1848 | ||
| 1847 | /* called in rcu_read_lock() section */ | 1849 | /* called in rcu_read_lock() section */ |
| @@ -1874,31 +1876,25 @@ static int ip_route_input_mc(struct sk_buff *skb, __be32 daddr, __be32 saddr, | |||
| 1874 | if (err < 0) | 1876 | if (err < 0) |
| 1875 | goto e_err; | 1877 | goto e_err; |
| 1876 | } | 1878 | } |
| 1877 | rth = dst_alloc(&ipv4_dst_ops); | 1879 | rth = rt_dst_alloc(IN_DEV_CONF_GET(in_dev, NOPOLICY), false); |
| 1878 | if (!rth) | 1880 | if (!rth) |
| 1879 | goto e_nobufs; | 1881 | goto e_nobufs; |
| 1880 | 1882 | ||
| 1881 | rth->dst.output = ip_rt_bug; | 1883 | rth->dst.output = ip_rt_bug; |
| 1882 | rth->dst.obsolete = -1; | ||
| 1883 | 1884 | ||
| 1884 | atomic_set(&rth->dst.__refcnt, 1); | 1885 | rth->rt_key_dst = daddr; |
| 1885 | rth->dst.flags= DST_HOST; | ||
| 1886 | if (IN_DEV_CONF_GET(in_dev, NOPOLICY)) | ||
| 1887 | rth->dst.flags |= DST_NOPOLICY; | ||
| 1888 | rth->fl.fl4_dst = daddr; | ||
| 1889 | rth->rt_dst = daddr; | 1886 | rth->rt_dst = daddr; |
| 1890 | rth->fl.fl4_tos = tos; | 1887 | rth->rt_tos = tos; |
| 1891 | rth->fl.mark = skb->mark; | 1888 | rth->rt_mark = skb->mark; |
| 1892 | rth->fl.fl4_src = saddr; | 1889 | rth->rt_key_src = saddr; |
| 1893 | rth->rt_src = saddr; | 1890 | rth->rt_src = saddr; |
| 1894 | #ifdef CONFIG_NET_CLS_ROUTE | 1891 | #ifdef CONFIG_IP_ROUTE_CLASSID |
| 1895 | rth->dst.tclassid = itag; | 1892 | rth->dst.tclassid = itag; |
| 1896 | #endif | 1893 | #endif |
| 1897 | rth->rt_iif = | 1894 | rth->rt_iif = dev->ifindex; |
| 1898 | rth->fl.iif = dev->ifindex; | ||
| 1899 | rth->dst.dev = init_net.loopback_dev; | 1895 | rth->dst.dev = init_net.loopback_dev; |
| 1900 | dev_hold(rth->dst.dev); | 1896 | dev_hold(rth->dst.dev); |
| 1901 | rth->fl.oif = 0; | 1897 | rth->rt_oif = 0; |
| 1902 | rth->rt_gateway = daddr; | 1898 | rth->rt_gateway = daddr; |
| 1903 | rth->rt_spec_dst= spec_dst; | 1899 | rth->rt_spec_dst= spec_dst; |
| 1904 | rth->rt_genid = rt_genid(dev_net(dev)); | 1900 | rth->rt_genid = rt_genid(dev_net(dev)); |
| @@ -1916,7 +1912,10 @@ static int ip_route_input_mc(struct sk_buff *skb, __be32 daddr, __be32 saddr, | |||
| 1916 | RT_CACHE_STAT_INC(in_slow_mc); | 1912 | RT_CACHE_STAT_INC(in_slow_mc); |
| 1917 | 1913 | ||
| 1918 | hash = rt_hash(daddr, saddr, dev->ifindex, rt_genid(dev_net(dev))); | 1914 | hash = rt_hash(daddr, saddr, dev->ifindex, rt_genid(dev_net(dev))); |
| 1919 | return rt_intern_hash(hash, rth, NULL, skb, dev->ifindex); | 1915 | rth = rt_intern_hash(hash, rth, skb, dev->ifindex); |
| 1916 | err = 0; | ||
| 1917 | if (IS_ERR(rth)) | ||
| 1918 | err = PTR_ERR(rth); | ||
| 1920 | 1919 | ||
| 1921 | e_nobufs: | 1920 | e_nobufs: |
| 1922 | return -ENOBUFS; | 1921 | return -ENOBUFS; |
| @@ -1959,7 +1958,7 @@ static void ip_handle_martian_source(struct net_device *dev, | |||
| 1959 | 1958 | ||
| 1960 | /* called in rcu_read_lock() section */ | 1959 | /* called in rcu_read_lock() section */ |
| 1961 | static int __mkroute_input(struct sk_buff *skb, | 1960 | static int __mkroute_input(struct sk_buff *skb, |
| 1962 | struct fib_result *res, | 1961 | const struct fib_result *res, |
| 1963 | struct in_device *in_dev, | 1962 | struct in_device *in_dev, |
| 1964 | __be32 daddr, __be32 saddr, u32 tos, | 1963 | __be32 daddr, __be32 saddr, u32 tos, |
| 1965 | struct rtable **result) | 1964 | struct rtable **result) |
| @@ -2013,39 +2012,31 @@ static int __mkroute_input(struct sk_buff *skb, | |||
| 2013 | } | 2012 | } |
| 2014 | } | 2013 | } |
| 2015 | 2014 | ||
| 2016 | 2015 | rth = rt_dst_alloc(IN_DEV_CONF_GET(in_dev, NOPOLICY), | |
| 2017 | rth = dst_alloc(&ipv4_dst_ops); | 2016 | IN_DEV_CONF_GET(out_dev, NOXFRM)); |
| 2018 | if (!rth) { | 2017 | if (!rth) { |
| 2019 | err = -ENOBUFS; | 2018 | err = -ENOBUFS; |
| 2020 | goto cleanup; | 2019 | goto cleanup; |
| 2021 | } | 2020 | } |
| 2022 | 2021 | ||
| 2023 | atomic_set(&rth->dst.__refcnt, 1); | 2022 | rth->rt_key_dst = daddr; |
| 2024 | rth->dst.flags= DST_HOST; | ||
| 2025 | if (IN_DEV_CONF_GET(in_dev, NOPOLICY)) | ||
| 2026 | rth->dst.flags |= DST_NOPOLICY; | ||
| 2027 | if (IN_DEV_CONF_GET(out_dev, NOXFRM)) | ||
| 2028 | rth->dst.flags |= DST_NOXFRM; | ||
| 2029 | rth->fl.fl4_dst = daddr; | ||
| 2030 | rth->rt_dst = daddr; | 2023 | rth->rt_dst = daddr; |
| 2031 | rth->fl.fl4_tos = tos; | 2024 | rth->rt_tos = tos; |
| 2032 | rth->fl.mark = skb->mark; | 2025 | rth->rt_mark = skb->mark; |
| 2033 | rth->fl.fl4_src = saddr; | 2026 | rth->rt_key_src = saddr; |
| 2034 | rth->rt_src = saddr; | 2027 | rth->rt_src = saddr; |
| 2035 | rth->rt_gateway = daddr; | 2028 | rth->rt_gateway = daddr; |
| 2036 | rth->rt_iif = | 2029 | rth->rt_iif = in_dev->dev->ifindex; |
| 2037 | rth->fl.iif = in_dev->dev->ifindex; | ||
| 2038 | rth->dst.dev = (out_dev)->dev; | 2030 | rth->dst.dev = (out_dev)->dev; |
| 2039 | dev_hold(rth->dst.dev); | 2031 | dev_hold(rth->dst.dev); |
| 2040 | rth->fl.oif = 0; | 2032 | rth->rt_oif = 0; |
| 2041 | rth->rt_spec_dst= spec_dst; | 2033 | rth->rt_spec_dst= spec_dst; |
| 2042 | 2034 | ||
| 2043 | rth->dst.obsolete = -1; | ||
| 2044 | rth->dst.input = ip_forward; | 2035 | rth->dst.input = ip_forward; |
| 2045 | rth->dst.output = ip_output; | 2036 | rth->dst.output = ip_output; |
| 2046 | rth->rt_genid = rt_genid(dev_net(rth->dst.dev)); | 2037 | rth->rt_genid = rt_genid(dev_net(rth->dst.dev)); |
| 2047 | 2038 | ||
| 2048 | rt_set_nexthop(rth, res, itag); | 2039 | rt_set_nexthop(rth, NULL, res, res->fi, res->type, itag); |
| 2049 | 2040 | ||
| 2050 | rth->rt_flags = flags; | 2041 | rth->rt_flags = flags; |
| 2051 | 2042 | ||
| @@ -2057,7 +2048,7 @@ static int __mkroute_input(struct sk_buff *skb, | |||
| 2057 | 2048 | ||
| 2058 | static int ip_mkroute_input(struct sk_buff *skb, | 2049 | static int ip_mkroute_input(struct sk_buff *skb, |
| 2059 | struct fib_result *res, | 2050 | struct fib_result *res, |
| 2060 | const struct flowi *fl, | 2051 | const struct flowi4 *fl4, |
| 2061 | struct in_device *in_dev, | 2052 | struct in_device *in_dev, |
| 2062 | __be32 daddr, __be32 saddr, u32 tos) | 2053 | __be32 daddr, __be32 saddr, u32 tos) |
| 2063 | { | 2054 | { |
| @@ -2066,8 +2057,8 @@ static int ip_mkroute_input(struct sk_buff *skb, | |||
| 2066 | unsigned hash; | 2057 | unsigned hash; |
| 2067 | 2058 | ||
| 2068 | #ifdef CONFIG_IP_ROUTE_MULTIPATH | 2059 | #ifdef CONFIG_IP_ROUTE_MULTIPATH |
| 2069 | if (res->fi && res->fi->fib_nhs > 1 && fl->oif == 0) | 2060 | if (res->fi && res->fi->fib_nhs > 1) |
| 2070 | fib_select_multipath(fl, res); | 2061 | fib_select_multipath(res); |
| 2071 | #endif | 2062 | #endif |
| 2072 | 2063 | ||
| 2073 | /* create a routing cache entry */ | 2064 | /* create a routing cache entry */ |
| @@ -2076,9 +2067,12 @@ static int ip_mkroute_input(struct sk_buff *skb, | |||
| 2076 | return err; | 2067 | return err; |
| 2077 | 2068 | ||
| 2078 | /* put it into the cache */ | 2069 | /* put it into the cache */ |
| 2079 | hash = rt_hash(daddr, saddr, fl->iif, | 2070 | hash = rt_hash(daddr, saddr, fl4->flowi4_iif, |
| 2080 | rt_genid(dev_net(rth->dst.dev))); | 2071 | rt_genid(dev_net(rth->dst.dev))); |
| 2081 | return rt_intern_hash(hash, rth, NULL, skb, fl->iif); | 2072 | rth = rt_intern_hash(hash, rth, skb, fl4->flowi4_iif); |
| 2073 | if (IS_ERR(rth)) | ||
| 2074 | return PTR_ERR(rth); | ||
| 2075 | return 0; | ||
| 2082 | } | 2076 | } |
| 2083 | 2077 | ||
| 2084 | /* | 2078 | /* |
| @@ -2097,12 +2091,7 @@ static int ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr, | |||
| 2097 | { | 2091 | { |
| 2098 | struct fib_result res; | 2092 | struct fib_result res; |
| 2099 | struct in_device *in_dev = __in_dev_get_rcu(dev); | 2093 | struct in_device *in_dev = __in_dev_get_rcu(dev); |
| 2100 | struct flowi fl = { .fl4_dst = daddr, | 2094 | struct flowi4 fl4; |
| 2101 | .fl4_src = saddr, | ||
| 2102 | .fl4_tos = tos, | ||
| 2103 | .fl4_scope = RT_SCOPE_UNIVERSE, | ||
| 2104 | .mark = skb->mark, | ||
| 2105 | .iif = dev->ifindex }; | ||
| 2106 | unsigned flags = 0; | 2095 | unsigned flags = 0; |
| 2107 | u32 itag = 0; | 2096 | u32 itag = 0; |
| 2108 | struct rtable * rth; | 2097 | struct rtable * rth; |
| @@ -2139,7 +2128,14 @@ static int ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr, | |||
| 2139 | /* | 2128 | /* |
| 2140 | * Now we are ready to route packet. | 2129 | * Now we are ready to route packet. |
| 2141 | */ | 2130 | */ |
| 2142 | err = fib_lookup(net, &fl, &res); | 2131 | fl4.flowi4_oif = 0; |
| 2132 | fl4.flowi4_iif = dev->ifindex; | ||
| 2133 | fl4.flowi4_mark = skb->mark; | ||
| 2134 | fl4.flowi4_tos = tos; | ||
| 2135 | fl4.flowi4_scope = RT_SCOPE_UNIVERSE; | ||
| 2136 | fl4.daddr = daddr; | ||
| 2137 | fl4.saddr = saddr; | ||
| 2138 | err = fib_lookup(net, &fl4, &res); | ||
| 2143 | if (err != 0) { | 2139 | if (err != 0) { |
| 2144 | if (!IN_DEV_FORWARD(in_dev)) | 2140 | if (!IN_DEV_FORWARD(in_dev)) |
| 2145 | goto e_hostunreach; | 2141 | goto e_hostunreach; |
| @@ -2168,7 +2164,7 @@ static int ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr, | |||
| 2168 | if (res.type != RTN_UNICAST) | 2164 | if (res.type != RTN_UNICAST) |
| 2169 | goto martian_destination; | 2165 | goto martian_destination; |
| 2170 | 2166 | ||
| 2171 | err = ip_mkroute_input(skb, &res, &fl, in_dev, daddr, saddr, tos); | 2167 | err = ip_mkroute_input(skb, &res, &fl4, in_dev, daddr, saddr, tos); |
| 2172 | out: return err; | 2168 | out: return err; |
| 2173 | 2169 | ||
| 2174 | brd_input: | 2170 | brd_input: |
| @@ -2190,29 +2186,23 @@ brd_input: | |||
| 2190 | RT_CACHE_STAT_INC(in_brd); | 2186 | RT_CACHE_STAT_INC(in_brd); |
| 2191 | 2187 | ||
| 2192 | local_input: | 2188 | local_input: |
| 2193 | rth = dst_alloc(&ipv4_dst_ops); | 2189 | rth = rt_dst_alloc(IN_DEV_CONF_GET(in_dev, NOPOLICY), false); |
| 2194 | if (!rth) | 2190 | if (!rth) |
| 2195 | goto e_nobufs; | 2191 | goto e_nobufs; |
| 2196 | 2192 | ||
| 2197 | rth->dst.output= ip_rt_bug; | 2193 | rth->dst.output= ip_rt_bug; |
| 2198 | rth->dst.obsolete = -1; | ||
| 2199 | rth->rt_genid = rt_genid(net); | 2194 | rth->rt_genid = rt_genid(net); |
| 2200 | 2195 | ||
| 2201 | atomic_set(&rth->dst.__refcnt, 1); | 2196 | rth->rt_key_dst = daddr; |
| 2202 | rth->dst.flags= DST_HOST; | ||
| 2203 | if (IN_DEV_CONF_GET(in_dev, NOPOLICY)) | ||
| 2204 | rth->dst.flags |= DST_NOPOLICY; | ||
| 2205 | rth->fl.fl4_dst = daddr; | ||
| 2206 | rth->rt_dst = daddr; | 2197 | rth->rt_dst = daddr; |
| 2207 | rth->fl.fl4_tos = tos; | 2198 | rth->rt_tos = tos; |
| 2208 | rth->fl.mark = skb->mark; | 2199 | rth->rt_mark = skb->mark; |
| 2209 | rth->fl.fl4_src = saddr; | 2200 | rth->rt_key_src = saddr; |
| 2210 | rth->rt_src = saddr; | 2201 | rth->rt_src = saddr; |
| 2211 | #ifdef CONFIG_NET_CLS_ROUTE | 2202 | #ifdef CONFIG_IP_ROUTE_CLASSID |
| 2212 | rth->dst.tclassid = itag; | 2203 | rth->dst.tclassid = itag; |
| 2213 | #endif | 2204 | #endif |
| 2214 | rth->rt_iif = | 2205 | rth->rt_iif = dev->ifindex; |
| 2215 | rth->fl.iif = dev->ifindex; | ||
| 2216 | rth->dst.dev = net->loopback_dev; | 2206 | rth->dst.dev = net->loopback_dev; |
| 2217 | dev_hold(rth->dst.dev); | 2207 | dev_hold(rth->dst.dev); |
| 2218 | rth->rt_gateway = daddr; | 2208 | rth->rt_gateway = daddr; |
| @@ -2225,8 +2215,11 @@ local_input: | |||
| 2225 | rth->rt_flags &= ~RTCF_LOCAL; | 2215 | rth->rt_flags &= ~RTCF_LOCAL; |
| 2226 | } | 2216 | } |
| 2227 | rth->rt_type = res.type; | 2217 | rth->rt_type = res.type; |
| 2228 | hash = rt_hash(daddr, saddr, fl.iif, rt_genid(net)); | 2218 | hash = rt_hash(daddr, saddr, fl4.flowi4_iif, rt_genid(net)); |
| 2229 | err = rt_intern_hash(hash, rth, NULL, skb, fl.iif); | 2219 | rth = rt_intern_hash(hash, rth, skb, fl4.flowi4_iif); |
| 2220 | err = 0; | ||
| 2221 | if (IS_ERR(rth)) | ||
| 2222 | err = PTR_ERR(rth); | ||
| 2230 | goto out; | 2223 | goto out; |
| 2231 | 2224 | ||
| 2232 | no_route: | 2225 | no_route: |
| @@ -2288,12 +2281,12 @@ int ip_route_input_common(struct sk_buff *skb, __be32 daddr, __be32 saddr, | |||
| 2288 | 2281 | ||
| 2289 | for (rth = rcu_dereference(rt_hash_table[hash].chain); rth; | 2282 | for (rth = rcu_dereference(rt_hash_table[hash].chain); rth; |
| 2290 | rth = rcu_dereference(rth->dst.rt_next)) { | 2283 | rth = rcu_dereference(rth->dst.rt_next)) { |
| 2291 | if ((((__force u32)rth->fl.fl4_dst ^ (__force u32)daddr) | | 2284 | if ((((__force u32)rth->rt_key_dst ^ (__force u32)daddr) | |
| 2292 | ((__force u32)rth->fl.fl4_src ^ (__force u32)saddr) | | 2285 | ((__force u32)rth->rt_key_src ^ (__force u32)saddr) | |
| 2293 | (rth->fl.iif ^ iif) | | 2286 | (rth->rt_iif ^ iif) | |
| 2294 | rth->fl.oif | | 2287 | rth->rt_oif | |
| 2295 | (rth->fl.fl4_tos ^ tos)) == 0 && | 2288 | (rth->rt_tos ^ tos)) == 0 && |
| 2296 | rth->fl.mark == skb->mark && | 2289 | rth->rt_mark == skb->mark && |
| 2297 | net_eq(dev_net(rth->dst.dev), net) && | 2290 | net_eq(dev_net(rth->dst.dev), net) && |
| 2298 | !rt_is_expired(rth)) { | 2291 | !rt_is_expired(rth)) { |
| 2299 | if (noref) { | 2292 | if (noref) { |
| @@ -2326,8 +2319,8 @@ skip_cache: | |||
| 2326 | struct in_device *in_dev = __in_dev_get_rcu(dev); | 2319 | struct in_device *in_dev = __in_dev_get_rcu(dev); |
| 2327 | 2320 | ||
| 2328 | if (in_dev) { | 2321 | if (in_dev) { |
| 2329 | int our = ip_check_mc(in_dev, daddr, saddr, | 2322 | int our = ip_check_mc_rcu(in_dev, daddr, saddr, |
| 2330 | ip_hdr(skb)->protocol); | 2323 | ip_hdr(skb)->protocol); |
| 2331 | if (our | 2324 | if (our |
| 2332 | #ifdef CONFIG_IP_MROUTE | 2325 | #ifdef CONFIG_IP_MROUTE |
| 2333 | || | 2326 | || |
| @@ -2351,98 +2344,91 @@ skip_cache: | |||
| 2351 | EXPORT_SYMBOL(ip_route_input_common); | 2344 | EXPORT_SYMBOL(ip_route_input_common); |
| 2352 | 2345 | ||
| 2353 | /* called with rcu_read_lock() */ | 2346 | /* called with rcu_read_lock() */ |
| 2354 | static int __mkroute_output(struct rtable **result, | 2347 | static struct rtable *__mkroute_output(const struct fib_result *res, |
| 2355 | struct fib_result *res, | 2348 | const struct flowi4 *fl4, |
| 2356 | const struct flowi *fl, | 2349 | const struct flowi4 *oldflp4, |
| 2357 | const struct flowi *oldflp, | 2350 | struct net_device *dev_out, |
| 2358 | struct net_device *dev_out, | 2351 | unsigned int flags) |
| 2359 | unsigned flags) | ||
| 2360 | { | 2352 | { |
| 2361 | struct rtable *rth; | 2353 | struct fib_info *fi = res->fi; |
| 2354 | u32 tos = RT_FL_TOS(oldflp4); | ||
| 2362 | struct in_device *in_dev; | 2355 | struct in_device *in_dev; |
| 2363 | u32 tos = RT_FL_TOS(oldflp); | 2356 | u16 type = res->type; |
| 2357 | struct rtable *rth; | ||
| 2364 | 2358 | ||
| 2365 | if (ipv4_is_loopback(fl->fl4_src) && !(dev_out->flags & IFF_LOOPBACK)) | 2359 | if (ipv4_is_loopback(fl4->saddr) && !(dev_out->flags & IFF_LOOPBACK)) |
| 2366 | return -EINVAL; | 2360 | return ERR_PTR(-EINVAL); |
| 2367 | 2361 | ||
| 2368 | if (ipv4_is_lbcast(fl->fl4_dst)) | 2362 | if (ipv4_is_lbcast(fl4->daddr)) |
| 2369 | res->type = RTN_BROADCAST; | 2363 | type = RTN_BROADCAST; |
| 2370 | else if (ipv4_is_multicast(fl->fl4_dst)) | 2364 | else if (ipv4_is_multicast(fl4->daddr)) |
| 2371 | res->type = RTN_MULTICAST; | 2365 | type = RTN_MULTICAST; |
| 2372 | else if (ipv4_is_zeronet(fl->fl4_dst)) | 2366 | else if (ipv4_is_zeronet(fl4->daddr)) |
| 2373 | return -EINVAL; | 2367 | return ERR_PTR(-EINVAL); |
| 2374 | 2368 | ||
| 2375 | if (dev_out->flags & IFF_LOOPBACK) | 2369 | if (dev_out->flags & IFF_LOOPBACK) |
| 2376 | flags |= RTCF_LOCAL; | 2370 | flags |= RTCF_LOCAL; |
| 2377 | 2371 | ||
| 2378 | in_dev = __in_dev_get_rcu(dev_out); | 2372 | in_dev = __in_dev_get_rcu(dev_out); |
| 2379 | if (!in_dev) | 2373 | if (!in_dev) |
| 2380 | return -EINVAL; | 2374 | return ERR_PTR(-EINVAL); |
| 2381 | 2375 | ||
| 2382 | if (res->type == RTN_BROADCAST) { | 2376 | if (type == RTN_BROADCAST) { |
| 2383 | flags |= RTCF_BROADCAST | RTCF_LOCAL; | 2377 | flags |= RTCF_BROADCAST | RTCF_LOCAL; |
| 2384 | res->fi = NULL; | 2378 | fi = NULL; |
| 2385 | } else if (res->type == RTN_MULTICAST) { | 2379 | } else if (type == RTN_MULTICAST) { |
| 2386 | flags |= RTCF_MULTICAST | RTCF_LOCAL; | 2380 | flags |= RTCF_MULTICAST | RTCF_LOCAL; |
| 2387 | if (!ip_check_mc(in_dev, oldflp->fl4_dst, oldflp->fl4_src, | 2381 | if (!ip_check_mc_rcu(in_dev, oldflp4->daddr, oldflp4->saddr, |
| 2388 | oldflp->proto)) | 2382 | oldflp4->flowi4_proto)) |
| 2389 | flags &= ~RTCF_LOCAL; | 2383 | flags &= ~RTCF_LOCAL; |
| 2390 | /* If multicast route do not exist use | 2384 | /* If multicast route do not exist use |
| 2391 | * default one, but do not gateway in this case. | 2385 | * default one, but do not gateway in this case. |
| 2392 | * Yes, it is hack. | 2386 | * Yes, it is hack. |
| 2393 | */ | 2387 | */ |
| 2394 | if (res->fi && res->prefixlen < 4) | 2388 | if (fi && res->prefixlen < 4) |
| 2395 | res->fi = NULL; | 2389 | fi = NULL; |
| 2396 | } | 2390 | } |
| 2397 | 2391 | ||
| 2398 | 2392 | rth = rt_dst_alloc(IN_DEV_CONF_GET(in_dev, NOPOLICY), | |
| 2399 | rth = dst_alloc(&ipv4_dst_ops); | 2393 | IN_DEV_CONF_GET(in_dev, NOXFRM)); |
| 2400 | if (!rth) | 2394 | if (!rth) |
| 2401 | return -ENOBUFS; | 2395 | return ERR_PTR(-ENOBUFS); |
| 2402 | 2396 | ||
| 2403 | atomic_set(&rth->dst.__refcnt, 1); | 2397 | rth->rt_key_dst = oldflp4->daddr; |
| 2404 | rth->dst.flags= DST_HOST; | 2398 | rth->rt_tos = tos; |
| 2405 | if (IN_DEV_CONF_GET(in_dev, NOXFRM)) | 2399 | rth->rt_key_src = oldflp4->saddr; |
| 2406 | rth->dst.flags |= DST_NOXFRM; | 2400 | rth->rt_oif = oldflp4->flowi4_oif; |
| 2407 | if (IN_DEV_CONF_GET(in_dev, NOPOLICY)) | 2401 | rth->rt_mark = oldflp4->flowi4_mark; |
| 2408 | rth->dst.flags |= DST_NOPOLICY; | 2402 | rth->rt_dst = fl4->daddr; |
| 2409 | 2403 | rth->rt_src = fl4->saddr; | |
| 2410 | rth->fl.fl4_dst = oldflp->fl4_dst; | 2404 | rth->rt_iif = 0; |
| 2411 | rth->fl.fl4_tos = tos; | ||
| 2412 | rth->fl.fl4_src = oldflp->fl4_src; | ||
| 2413 | rth->fl.oif = oldflp->oif; | ||
| 2414 | rth->fl.mark = oldflp->mark; | ||
| 2415 | rth->rt_dst = fl->fl4_dst; | ||
| 2416 | rth->rt_src = fl->fl4_src; | ||
| 2417 | rth->rt_iif = oldflp->oif ? : dev_out->ifindex; | ||
| 2418 | /* get references to the devices that are to be hold by the routing | 2405 | /* get references to the devices that are to be hold by the routing |
| 2419 | cache entry */ | 2406 | cache entry */ |
| 2420 | rth->dst.dev = dev_out; | 2407 | rth->dst.dev = dev_out; |
| 2421 | dev_hold(dev_out); | 2408 | dev_hold(dev_out); |
| 2422 | rth->rt_gateway = fl->fl4_dst; | 2409 | rth->rt_gateway = fl4->daddr; |
| 2423 | rth->rt_spec_dst= fl->fl4_src; | 2410 | rth->rt_spec_dst= fl4->saddr; |
| 2424 | 2411 | ||
| 2425 | rth->dst.output=ip_output; | 2412 | rth->dst.output=ip_output; |
| 2426 | rth->dst.obsolete = -1; | ||
| 2427 | rth->rt_genid = rt_genid(dev_net(dev_out)); | 2413 | rth->rt_genid = rt_genid(dev_net(dev_out)); |
| 2428 | 2414 | ||
| 2429 | RT_CACHE_STAT_INC(out_slow_tot); | 2415 | RT_CACHE_STAT_INC(out_slow_tot); |
| 2430 | 2416 | ||
| 2431 | if (flags & RTCF_LOCAL) { | 2417 | if (flags & RTCF_LOCAL) { |
| 2432 | rth->dst.input = ip_local_deliver; | 2418 | rth->dst.input = ip_local_deliver; |
| 2433 | rth->rt_spec_dst = fl->fl4_dst; | 2419 | rth->rt_spec_dst = fl4->daddr; |
| 2434 | } | 2420 | } |
| 2435 | if (flags & (RTCF_BROADCAST | RTCF_MULTICAST)) { | 2421 | if (flags & (RTCF_BROADCAST | RTCF_MULTICAST)) { |
| 2436 | rth->rt_spec_dst = fl->fl4_src; | 2422 | rth->rt_spec_dst = fl4->saddr; |
| 2437 | if (flags & RTCF_LOCAL && | 2423 | if (flags & RTCF_LOCAL && |
| 2438 | !(dev_out->flags & IFF_LOOPBACK)) { | 2424 | !(dev_out->flags & IFF_LOOPBACK)) { |
| 2439 | rth->dst.output = ip_mc_output; | 2425 | rth->dst.output = ip_mc_output; |
| 2440 | RT_CACHE_STAT_INC(out_slow_mc); | 2426 | RT_CACHE_STAT_INC(out_slow_mc); |
| 2441 | } | 2427 | } |
| 2442 | #ifdef CONFIG_IP_MROUTE | 2428 | #ifdef CONFIG_IP_MROUTE |
| 2443 | if (res->type == RTN_MULTICAST) { | 2429 | if (type == RTN_MULTICAST) { |
| 2444 | if (IN_DEV_MFORWARD(in_dev) && | 2430 | if (IN_DEV_MFORWARD(in_dev) && |
| 2445 | !ipv4_is_local_multicast(oldflp->fl4_dst)) { | 2431 | !ipv4_is_local_multicast(oldflp4->daddr)) { |
| 2446 | rth->dst.input = ip_mr_input; | 2432 | rth->dst.input = ip_mr_input; |
| 2447 | rth->dst.output = ip_mc_output; | 2433 | rth->dst.output = ip_mc_output; |
| 2448 | } | 2434 | } |
| @@ -2450,31 +2436,10 @@ static int __mkroute_output(struct rtable **result, | |||
| 2450 | #endif | 2436 | #endif |
| 2451 | } | 2437 | } |
| 2452 | 2438 | ||
| 2453 | rt_set_nexthop(rth, res, 0); | 2439 | rt_set_nexthop(rth, oldflp4, res, fi, type, 0); |
| 2454 | 2440 | ||
| 2455 | rth->rt_flags = flags; | 2441 | rth->rt_flags = flags; |
| 2456 | *result = rth; | 2442 | return rth; |
| 2457 | return 0; | ||
| 2458 | } | ||
| 2459 | |||
| 2460 | /* called with rcu_read_lock() */ | ||
| 2461 | static int ip_mkroute_output(struct rtable **rp, | ||
| 2462 | struct fib_result *res, | ||
| 2463 | const struct flowi *fl, | ||
| 2464 | const struct flowi *oldflp, | ||
| 2465 | struct net_device *dev_out, | ||
| 2466 | unsigned flags) | ||
| 2467 | { | ||
| 2468 | struct rtable *rth = NULL; | ||
| 2469 | int err = __mkroute_output(&rth, res, fl, oldflp, dev_out, flags); | ||
| 2470 | unsigned hash; | ||
| 2471 | if (err == 0) { | ||
| 2472 | hash = rt_hash(oldflp->fl4_dst, oldflp->fl4_src, oldflp->oif, | ||
| 2473 | rt_genid(dev_net(dev_out))); | ||
| 2474 | err = rt_intern_hash(hash, rth, rp, NULL, oldflp->oif); | ||
| 2475 | } | ||
| 2476 | |||
| 2477 | return err; | ||
| 2478 | } | 2443 | } |
| 2479 | 2444 | ||
| 2480 | /* | 2445 | /* |
| @@ -2482,34 +2447,36 @@ static int ip_mkroute_output(struct rtable **rp, | |||
| 2482 | * called with rcu_read_lock(); | 2447 | * called with rcu_read_lock(); |
| 2483 | */ | 2448 | */ |
| 2484 | 2449 | ||
| 2485 | static int ip_route_output_slow(struct net *net, struct rtable **rp, | 2450 | static struct rtable *ip_route_output_slow(struct net *net, |
| 2486 | const struct flowi *oldflp) | 2451 | const struct flowi4 *oldflp4) |
| 2487 | { | 2452 | { |
| 2488 | u32 tos = RT_FL_TOS(oldflp); | 2453 | u32 tos = RT_FL_TOS(oldflp4); |
| 2489 | struct flowi fl = { .fl4_dst = oldflp->fl4_dst, | 2454 | struct flowi4 fl4; |
| 2490 | .fl4_src = oldflp->fl4_src, | ||
| 2491 | .fl4_tos = tos & IPTOS_RT_MASK, | ||
| 2492 | .fl4_scope = ((tos & RTO_ONLINK) ? | ||
| 2493 | RT_SCOPE_LINK : RT_SCOPE_UNIVERSE), | ||
| 2494 | .mark = oldflp->mark, | ||
| 2495 | .iif = net->loopback_dev->ifindex, | ||
| 2496 | .oif = oldflp->oif }; | ||
| 2497 | struct fib_result res; | 2455 | struct fib_result res; |
| 2498 | unsigned int flags = 0; | 2456 | unsigned int flags = 0; |
| 2499 | struct net_device *dev_out = NULL; | 2457 | struct net_device *dev_out = NULL; |
| 2500 | int err; | 2458 | struct rtable *rth; |
| 2501 | |||
| 2502 | 2459 | ||
| 2503 | res.fi = NULL; | 2460 | res.fi = NULL; |
| 2504 | #ifdef CONFIG_IP_MULTIPLE_TABLES | 2461 | #ifdef CONFIG_IP_MULTIPLE_TABLES |
| 2505 | res.r = NULL; | 2462 | res.r = NULL; |
| 2506 | #endif | 2463 | #endif |
| 2507 | 2464 | ||
| 2508 | if (oldflp->fl4_src) { | 2465 | fl4.flowi4_oif = oldflp4->flowi4_oif; |
| 2509 | err = -EINVAL; | 2466 | fl4.flowi4_iif = net->loopback_dev->ifindex; |
| 2510 | if (ipv4_is_multicast(oldflp->fl4_src) || | 2467 | fl4.flowi4_mark = oldflp4->flowi4_mark; |
| 2511 | ipv4_is_lbcast(oldflp->fl4_src) || | 2468 | fl4.daddr = oldflp4->daddr; |
| 2512 | ipv4_is_zeronet(oldflp->fl4_src)) | 2469 | fl4.saddr = oldflp4->saddr; |
| 2470 | fl4.flowi4_tos = tos & IPTOS_RT_MASK; | ||
| 2471 | fl4.flowi4_scope = ((tos & RTO_ONLINK) ? | ||
| 2472 | RT_SCOPE_LINK : RT_SCOPE_UNIVERSE); | ||
| 2473 | |||
| 2474 | rcu_read_lock(); | ||
| 2475 | if (oldflp4->saddr) { | ||
| 2476 | rth = ERR_PTR(-EINVAL); | ||
| 2477 | if (ipv4_is_multicast(oldflp4->saddr) || | ||
| 2478 | ipv4_is_lbcast(oldflp4->saddr) || | ||
| 2479 | ipv4_is_zeronet(oldflp4->saddr)) | ||
| 2513 | goto out; | 2480 | goto out; |
| 2514 | 2481 | ||
| 2515 | /* I removed check for oif == dev_out->oif here. | 2482 | /* I removed check for oif == dev_out->oif here. |
| @@ -2520,11 +2487,11 @@ static int ip_route_output_slow(struct net *net, struct rtable **rp, | |||
| 2520 | of another iface. --ANK | 2487 | of another iface. --ANK |
| 2521 | */ | 2488 | */ |
| 2522 | 2489 | ||
| 2523 | if (oldflp->oif == 0 && | 2490 | if (oldflp4->flowi4_oif == 0 && |
| 2524 | (ipv4_is_multicast(oldflp->fl4_dst) || | 2491 | (ipv4_is_multicast(oldflp4->daddr) || |
| 2525 | ipv4_is_lbcast(oldflp->fl4_dst))) { | 2492 | ipv4_is_lbcast(oldflp4->daddr))) { |
| 2526 | /* It is equivalent to inet_addr_type(saddr) == RTN_LOCAL */ | 2493 | /* It is equivalent to inet_addr_type(saddr) == RTN_LOCAL */ |
| 2527 | dev_out = __ip_dev_find(net, oldflp->fl4_src, false); | 2494 | dev_out = __ip_dev_find(net, oldflp4->saddr, false); |
| 2528 | if (dev_out == NULL) | 2495 | if (dev_out == NULL) |
| 2529 | goto out; | 2496 | goto out; |
| 2530 | 2497 | ||
| @@ -2543,60 +2510,60 @@ static int ip_route_output_slow(struct net *net, struct rtable **rp, | |||
| 2543 | Luckily, this hack is good workaround. | 2510 | Luckily, this hack is good workaround. |
| 2544 | */ | 2511 | */ |
| 2545 | 2512 | ||
| 2546 | fl.oif = dev_out->ifindex; | 2513 | fl4.flowi4_oif = dev_out->ifindex; |
| 2547 | goto make_route; | 2514 | goto make_route; |
| 2548 | } | 2515 | } |
| 2549 | 2516 | ||
| 2550 | if (!(oldflp->flags & FLOWI_FLAG_ANYSRC)) { | 2517 | if (!(oldflp4->flowi4_flags & FLOWI_FLAG_ANYSRC)) { |
| 2551 | /* It is equivalent to inet_addr_type(saddr) == RTN_LOCAL */ | 2518 | /* It is equivalent to inet_addr_type(saddr) == RTN_LOCAL */ |
| 2552 | if (!__ip_dev_find(net, oldflp->fl4_src, false)) | 2519 | if (!__ip_dev_find(net, oldflp4->saddr, false)) |
| 2553 | goto out; | 2520 | goto out; |
| 2554 | } | 2521 | } |
| 2555 | } | 2522 | } |
| 2556 | 2523 | ||
| 2557 | 2524 | ||
| 2558 | if (oldflp->oif) { | 2525 | if (oldflp4->flowi4_oif) { |
| 2559 | dev_out = dev_get_by_index_rcu(net, oldflp->oif); | 2526 | dev_out = dev_get_by_index_rcu(net, oldflp4->flowi4_oif); |
| 2560 | err = -ENODEV; | 2527 | rth = ERR_PTR(-ENODEV); |
| 2561 | if (dev_out == NULL) | 2528 | if (dev_out == NULL) |
| 2562 | goto out; | 2529 | goto out; |
| 2563 | 2530 | ||
| 2564 | /* RACE: Check return value of inet_select_addr instead. */ | 2531 | /* RACE: Check return value of inet_select_addr instead. */ |
| 2565 | if (!(dev_out->flags & IFF_UP) || !__in_dev_get_rcu(dev_out)) { | 2532 | if (!(dev_out->flags & IFF_UP) || !__in_dev_get_rcu(dev_out)) { |
| 2566 | err = -ENETUNREACH; | 2533 | rth = ERR_PTR(-ENETUNREACH); |
| 2567 | goto out; | 2534 | goto out; |
| 2568 | } | 2535 | } |
| 2569 | if (ipv4_is_local_multicast(oldflp->fl4_dst) || | 2536 | if (ipv4_is_local_multicast(oldflp4->daddr) || |
| 2570 | ipv4_is_lbcast(oldflp->fl4_dst)) { | 2537 | ipv4_is_lbcast(oldflp4->daddr)) { |
| 2571 | if (!fl.fl4_src) | 2538 | if (!fl4.saddr) |
| 2572 | fl.fl4_src = inet_select_addr(dev_out, 0, | 2539 | fl4.saddr = inet_select_addr(dev_out, 0, |
| 2573 | RT_SCOPE_LINK); | 2540 | RT_SCOPE_LINK); |
| 2574 | goto make_route; | 2541 | goto make_route; |
| 2575 | } | 2542 | } |
| 2576 | if (!fl.fl4_src) { | 2543 | if (!fl4.saddr) { |
| 2577 | if (ipv4_is_multicast(oldflp->fl4_dst)) | 2544 | if (ipv4_is_multicast(oldflp4->daddr)) |
| 2578 | fl.fl4_src = inet_select_addr(dev_out, 0, | 2545 | fl4.saddr = inet_select_addr(dev_out, 0, |
| 2579 | fl.fl4_scope); | 2546 | fl4.flowi4_scope); |
| 2580 | else if (!oldflp->fl4_dst) | 2547 | else if (!oldflp4->daddr) |
| 2581 | fl.fl4_src = inet_select_addr(dev_out, 0, | 2548 | fl4.saddr = inet_select_addr(dev_out, 0, |
| 2582 | RT_SCOPE_HOST); | 2549 | RT_SCOPE_HOST); |
| 2583 | } | 2550 | } |
| 2584 | } | 2551 | } |
| 2585 | 2552 | ||
| 2586 | if (!fl.fl4_dst) { | 2553 | if (!fl4.daddr) { |
| 2587 | fl.fl4_dst = fl.fl4_src; | 2554 | fl4.daddr = fl4.saddr; |
| 2588 | if (!fl.fl4_dst) | 2555 | if (!fl4.daddr) |
| 2589 | fl.fl4_dst = fl.fl4_src = htonl(INADDR_LOOPBACK); | 2556 | fl4.daddr = fl4.saddr = htonl(INADDR_LOOPBACK); |
| 2590 | dev_out = net->loopback_dev; | 2557 | dev_out = net->loopback_dev; |
| 2591 | fl.oif = net->loopback_dev->ifindex; | 2558 | fl4.flowi4_oif = net->loopback_dev->ifindex; |
| 2592 | res.type = RTN_LOCAL; | 2559 | res.type = RTN_LOCAL; |
| 2593 | flags |= RTCF_LOCAL; | 2560 | flags |= RTCF_LOCAL; |
| 2594 | goto make_route; | 2561 | goto make_route; |
| 2595 | } | 2562 | } |
| 2596 | 2563 | ||
| 2597 | if (fib_lookup(net, &fl, &res)) { | 2564 | if (fib_lookup(net, &fl4, &res)) { |
| 2598 | res.fi = NULL; | 2565 | res.fi = NULL; |
| 2599 | if (oldflp->oif) { | 2566 | if (oldflp4->flowi4_oif) { |
| 2600 | /* Apparently, routing tables are wrong. Assume, | 2567 | /* Apparently, routing tables are wrong. Assume, |
| 2601 | that the destination is on link. | 2568 | that the destination is on link. |
| 2602 | 2569 | ||
| @@ -2615,90 +2582,93 @@ static int ip_route_output_slow(struct net *net, struct rtable **rp, | |||
| 2615 | likely IPv6, but we do not. | 2582 | likely IPv6, but we do not. |
| 2616 | */ | 2583 | */ |
| 2617 | 2584 | ||
| 2618 | if (fl.fl4_src == 0) | 2585 | if (fl4.saddr == 0) |
| 2619 | fl.fl4_src = inet_select_addr(dev_out, 0, | 2586 | fl4.saddr = inet_select_addr(dev_out, 0, |
| 2620 | RT_SCOPE_LINK); | 2587 | RT_SCOPE_LINK); |
| 2621 | res.type = RTN_UNICAST; | 2588 | res.type = RTN_UNICAST; |
| 2622 | goto make_route; | 2589 | goto make_route; |
| 2623 | } | 2590 | } |
| 2624 | err = -ENETUNREACH; | 2591 | rth = ERR_PTR(-ENETUNREACH); |
| 2625 | goto out; | 2592 | goto out; |
| 2626 | } | 2593 | } |
| 2627 | 2594 | ||
| 2628 | if (res.type == RTN_LOCAL) { | 2595 | if (res.type == RTN_LOCAL) { |
| 2629 | if (!fl.fl4_src) { | 2596 | if (!fl4.saddr) { |
| 2630 | if (res.fi->fib_prefsrc) | 2597 | if (res.fi->fib_prefsrc) |
| 2631 | fl.fl4_src = res.fi->fib_prefsrc; | 2598 | fl4.saddr = res.fi->fib_prefsrc; |
| 2632 | else | 2599 | else |
| 2633 | fl.fl4_src = fl.fl4_dst; | 2600 | fl4.saddr = fl4.daddr; |
| 2634 | } | 2601 | } |
| 2635 | dev_out = net->loopback_dev; | 2602 | dev_out = net->loopback_dev; |
| 2636 | fl.oif = dev_out->ifindex; | 2603 | fl4.flowi4_oif = dev_out->ifindex; |
| 2637 | res.fi = NULL; | 2604 | res.fi = NULL; |
| 2638 | flags |= RTCF_LOCAL; | 2605 | flags |= RTCF_LOCAL; |
| 2639 | goto make_route; | 2606 | goto make_route; |
| 2640 | } | 2607 | } |
| 2641 | 2608 | ||
| 2642 | #ifdef CONFIG_IP_ROUTE_MULTIPATH | 2609 | #ifdef CONFIG_IP_ROUTE_MULTIPATH |
| 2643 | if (res.fi->fib_nhs > 1 && fl.oif == 0) | 2610 | if (res.fi->fib_nhs > 1 && fl4.flowi4_oif == 0) |
| 2644 | fib_select_multipath(&fl, &res); | 2611 | fib_select_multipath(&res); |
| 2645 | else | 2612 | else |
| 2646 | #endif | 2613 | #endif |
| 2647 | if (!res.prefixlen && res.type == RTN_UNICAST && !fl.oif) | 2614 | if (!res.prefixlen && res.type == RTN_UNICAST && !fl4.flowi4_oif) |
| 2648 | fib_select_default(net, &fl, &res); | 2615 | fib_select_default(&res); |
| 2649 | 2616 | ||
| 2650 | if (!fl.fl4_src) | 2617 | if (!fl4.saddr) |
| 2651 | fl.fl4_src = FIB_RES_PREFSRC(res); | 2618 | fl4.saddr = FIB_RES_PREFSRC(net, res); |
| 2652 | 2619 | ||
| 2653 | dev_out = FIB_RES_DEV(res); | 2620 | dev_out = FIB_RES_DEV(res); |
| 2654 | fl.oif = dev_out->ifindex; | 2621 | fl4.flowi4_oif = dev_out->ifindex; |
| 2655 | 2622 | ||
| 2656 | 2623 | ||
| 2657 | make_route: | 2624 | make_route: |
| 2658 | err = ip_mkroute_output(rp, &res, &fl, oldflp, dev_out, flags); | 2625 | rth = __mkroute_output(&res, &fl4, oldflp4, dev_out, flags); |
| 2626 | if (!IS_ERR(rth)) { | ||
| 2627 | unsigned int hash; | ||
| 2659 | 2628 | ||
| 2660 | out: return err; | 2629 | hash = rt_hash(oldflp4->daddr, oldflp4->saddr, oldflp4->flowi4_oif, |
| 2630 | rt_genid(dev_net(dev_out))); | ||
| 2631 | rth = rt_intern_hash(hash, rth, NULL, oldflp4->flowi4_oif); | ||
| 2632 | } | ||
| 2633 | |||
| 2634 | out: | ||
| 2635 | rcu_read_unlock(); | ||
| 2636 | return rth; | ||
| 2661 | } | 2637 | } |
| 2662 | 2638 | ||
| 2663 | int __ip_route_output_key(struct net *net, struct rtable **rp, | 2639 | struct rtable *__ip_route_output_key(struct net *net, const struct flowi4 *flp4) |
| 2664 | const struct flowi *flp) | ||
| 2665 | { | 2640 | { |
| 2666 | unsigned int hash; | ||
| 2667 | int res; | ||
| 2668 | struct rtable *rth; | 2641 | struct rtable *rth; |
| 2642 | unsigned int hash; | ||
| 2669 | 2643 | ||
| 2670 | if (!rt_caching(net)) | 2644 | if (!rt_caching(net)) |
| 2671 | goto slow_output; | 2645 | goto slow_output; |
| 2672 | 2646 | ||
| 2673 | hash = rt_hash(flp->fl4_dst, flp->fl4_src, flp->oif, rt_genid(net)); | 2647 | hash = rt_hash(flp4->daddr, flp4->saddr, flp4->flowi4_oif, rt_genid(net)); |
| 2674 | 2648 | ||
| 2675 | rcu_read_lock_bh(); | 2649 | rcu_read_lock_bh(); |
| 2676 | for (rth = rcu_dereference_bh(rt_hash_table[hash].chain); rth; | 2650 | for (rth = rcu_dereference_bh(rt_hash_table[hash].chain); rth; |
| 2677 | rth = rcu_dereference_bh(rth->dst.rt_next)) { | 2651 | rth = rcu_dereference_bh(rth->dst.rt_next)) { |
| 2678 | if (rth->fl.fl4_dst == flp->fl4_dst && | 2652 | if (rth->rt_key_dst == flp4->daddr && |
| 2679 | rth->fl.fl4_src == flp->fl4_src && | 2653 | rth->rt_key_src == flp4->saddr && |
| 2680 | rt_is_output_route(rth) && | 2654 | rt_is_output_route(rth) && |
| 2681 | rth->fl.oif == flp->oif && | 2655 | rth->rt_oif == flp4->flowi4_oif && |
| 2682 | rth->fl.mark == flp->mark && | 2656 | rth->rt_mark == flp4->flowi4_mark && |
| 2683 | !((rth->fl.fl4_tos ^ flp->fl4_tos) & | 2657 | !((rth->rt_tos ^ flp4->flowi4_tos) & |
| 2684 | (IPTOS_RT_MASK | RTO_ONLINK)) && | 2658 | (IPTOS_RT_MASK | RTO_ONLINK)) && |
| 2685 | net_eq(dev_net(rth->dst.dev), net) && | 2659 | net_eq(dev_net(rth->dst.dev), net) && |
| 2686 | !rt_is_expired(rth)) { | 2660 | !rt_is_expired(rth)) { |
| 2687 | dst_use(&rth->dst, jiffies); | 2661 | dst_use(&rth->dst, jiffies); |
| 2688 | RT_CACHE_STAT_INC(out_hit); | 2662 | RT_CACHE_STAT_INC(out_hit); |
| 2689 | rcu_read_unlock_bh(); | 2663 | rcu_read_unlock_bh(); |
| 2690 | *rp = rth; | 2664 | return rth; |
| 2691 | return 0; | ||
| 2692 | } | 2665 | } |
| 2693 | RT_CACHE_STAT_INC(out_hlist_search); | 2666 | RT_CACHE_STAT_INC(out_hlist_search); |
| 2694 | } | 2667 | } |
| 2695 | rcu_read_unlock_bh(); | 2668 | rcu_read_unlock_bh(); |
| 2696 | 2669 | ||
| 2697 | slow_output: | 2670 | slow_output: |
| 2698 | rcu_read_lock(); | 2671 | return ip_route_output_slow(net, flp4); |
| 2699 | res = ip_route_output_slow(net, rp, flp); | ||
| 2700 | rcu_read_unlock(); | ||
| 2701 | return res; | ||
| 2702 | } | 2672 | } |
| 2703 | EXPORT_SYMBOL_GPL(__ip_route_output_key); | 2673 | EXPORT_SYMBOL_GPL(__ip_route_output_key); |
| 2704 | 2674 | ||
| @@ -2707,6 +2677,11 @@ static struct dst_entry *ipv4_blackhole_dst_check(struct dst_entry *dst, u32 coo | |||
| 2707 | return NULL; | 2677 | return NULL; |
| 2708 | } | 2678 | } |
| 2709 | 2679 | ||
| 2680 | static unsigned int ipv4_blackhole_default_mtu(const struct dst_entry *dst) | ||
| 2681 | { | ||
| 2682 | return 0; | ||
| 2683 | } | ||
| 2684 | |||
| 2710 | static void ipv4_rt_blackhole_update_pmtu(struct dst_entry *dst, u32 mtu) | 2685 | static void ipv4_rt_blackhole_update_pmtu(struct dst_entry *dst, u32 mtu) |
| 2711 | { | 2686 | { |
| 2712 | } | 2687 | } |
| @@ -2716,20 +2691,19 @@ static struct dst_ops ipv4_dst_blackhole_ops = { | |||
| 2716 | .protocol = cpu_to_be16(ETH_P_IP), | 2691 | .protocol = cpu_to_be16(ETH_P_IP), |
| 2717 | .destroy = ipv4_dst_destroy, | 2692 | .destroy = ipv4_dst_destroy, |
| 2718 | .check = ipv4_blackhole_dst_check, | 2693 | .check = ipv4_blackhole_dst_check, |
| 2694 | .default_mtu = ipv4_blackhole_default_mtu, | ||
| 2695 | .default_advmss = ipv4_default_advmss, | ||
| 2719 | .update_pmtu = ipv4_rt_blackhole_update_pmtu, | 2696 | .update_pmtu = ipv4_rt_blackhole_update_pmtu, |
| 2720 | }; | 2697 | }; |
| 2721 | 2698 | ||
| 2722 | 2699 | struct dst_entry *ipv4_blackhole_route(struct net *net, struct dst_entry *dst_orig) | |
| 2723 | static int ipv4_dst_blackhole(struct net *net, struct rtable **rp, struct flowi *flp) | ||
| 2724 | { | 2700 | { |
| 2725 | struct rtable *ort = *rp; | 2701 | struct rtable *rt = dst_alloc(&ipv4_dst_blackhole_ops, 1); |
| 2726 | struct rtable *rt = (struct rtable *) | 2702 | struct rtable *ort = (struct rtable *) dst_orig; |
| 2727 | dst_alloc(&ipv4_dst_blackhole_ops); | ||
| 2728 | 2703 | ||
| 2729 | if (rt) { | 2704 | if (rt) { |
| 2730 | struct dst_entry *new = &rt->dst; | 2705 | struct dst_entry *new = &rt->dst; |
| 2731 | 2706 | ||
| 2732 | atomic_set(&new->__refcnt, 1); | ||
| 2733 | new->__use = 1; | 2707 | new->__use = 1; |
| 2734 | new->input = dst_discard; | 2708 | new->input = dst_discard; |
| 2735 | new->output = dst_discard; | 2709 | new->output = dst_discard; |
| @@ -2739,7 +2713,12 @@ static int ipv4_dst_blackhole(struct net *net, struct rtable **rp, struct flowi | |||
| 2739 | if (new->dev) | 2713 | if (new->dev) |
| 2740 | dev_hold(new->dev); | 2714 | dev_hold(new->dev); |
| 2741 | 2715 | ||
| 2742 | rt->fl = ort->fl; | 2716 | rt->rt_key_dst = ort->rt_key_dst; |
| 2717 | rt->rt_key_src = ort->rt_key_src; | ||
| 2718 | rt->rt_tos = ort->rt_tos; | ||
| 2719 | rt->rt_iif = ort->rt_iif; | ||
| 2720 | rt->rt_oif = ort->rt_oif; | ||
| 2721 | rt->rt_mark = ort->rt_mark; | ||
| 2743 | 2722 | ||
| 2744 | rt->rt_genid = rt_genid(net); | 2723 | rt->rt_genid = rt_genid(net); |
| 2745 | rt->rt_flags = ort->rt_flags; | 2724 | rt->rt_flags = ort->rt_flags; |
| @@ -2752,46 +2731,40 @@ static int ipv4_dst_blackhole(struct net *net, struct rtable **rp, struct flowi | |||
| 2752 | rt->peer = ort->peer; | 2731 | rt->peer = ort->peer; |
| 2753 | if (rt->peer) | 2732 | if (rt->peer) |
| 2754 | atomic_inc(&rt->peer->refcnt); | 2733 | atomic_inc(&rt->peer->refcnt); |
| 2734 | rt->fi = ort->fi; | ||
| 2735 | if (rt->fi) | ||
| 2736 | atomic_inc(&rt->fi->fib_clntref); | ||
| 2755 | 2737 | ||
| 2756 | dst_free(new); | 2738 | dst_free(new); |
| 2757 | } | 2739 | } |
| 2758 | 2740 | ||
| 2759 | dst_release(&(*rp)->dst); | 2741 | dst_release(dst_orig); |
| 2760 | *rp = rt; | 2742 | |
| 2761 | return rt ? 0 : -ENOMEM; | 2743 | return rt ? &rt->dst : ERR_PTR(-ENOMEM); |
| 2762 | } | 2744 | } |
| 2763 | 2745 | ||
| 2764 | int ip_route_output_flow(struct net *net, struct rtable **rp, struct flowi *flp, | 2746 | struct rtable *ip_route_output_flow(struct net *net, struct flowi4 *flp4, |
| 2765 | struct sock *sk, int flags) | 2747 | struct sock *sk) |
| 2766 | { | 2748 | { |
| 2767 | int err; | 2749 | struct rtable *rt = __ip_route_output_key(net, flp4); |
| 2768 | 2750 | ||
| 2769 | if ((err = __ip_route_output_key(net, rp, flp)) != 0) | 2751 | if (IS_ERR(rt)) |
| 2770 | return err; | 2752 | return rt; |
| 2771 | 2753 | ||
| 2772 | if (flp->proto) { | 2754 | if (flp4->flowi4_proto) { |
| 2773 | if (!flp->fl4_src) | 2755 | if (!flp4->saddr) |
| 2774 | flp->fl4_src = (*rp)->rt_src; | 2756 | flp4->saddr = rt->rt_src; |
| 2775 | if (!flp->fl4_dst) | 2757 | if (!flp4->daddr) |
| 2776 | flp->fl4_dst = (*rp)->rt_dst; | 2758 | flp4->daddr = rt->rt_dst; |
| 2777 | err = __xfrm_lookup(net, (struct dst_entry **)rp, flp, sk, | 2759 | rt = (struct rtable *) xfrm_lookup(net, &rt->dst, |
| 2778 | flags ? XFRM_LOOKUP_WAIT : 0); | 2760 | flowi4_to_flowi(flp4), |
| 2779 | if (err == -EREMOTE) | 2761 | sk, 0); |
| 2780 | err = ipv4_dst_blackhole(net, rp, flp); | ||
| 2781 | |||
| 2782 | return err; | ||
| 2783 | } | 2762 | } |
| 2784 | 2763 | ||
| 2785 | return 0; | 2764 | return rt; |
| 2786 | } | 2765 | } |
| 2787 | EXPORT_SYMBOL_GPL(ip_route_output_flow); | 2766 | EXPORT_SYMBOL_GPL(ip_route_output_flow); |
| 2788 | 2767 | ||
| 2789 | int ip_route_output_key(struct net *net, struct rtable **rp, struct flowi *flp) | ||
| 2790 | { | ||
| 2791 | return ip_route_output_flow(net, rp, flp, NULL, 0); | ||
| 2792 | } | ||
| 2793 | EXPORT_SYMBOL(ip_route_output_key); | ||
| 2794 | |||
| 2795 | static int rt_fill_info(struct net *net, | 2768 | static int rt_fill_info(struct net *net, |
| 2796 | struct sk_buff *skb, u32 pid, u32 seq, int event, | 2769 | struct sk_buff *skb, u32 pid, u32 seq, int event, |
| 2797 | int nowait, unsigned int flags) | 2770 | int nowait, unsigned int flags) |
| @@ -2810,7 +2783,7 @@ static int rt_fill_info(struct net *net, | |||
| 2810 | r->rtm_family = AF_INET; | 2783 | r->rtm_family = AF_INET; |
| 2811 | r->rtm_dst_len = 32; | 2784 | r->rtm_dst_len = 32; |
| 2812 | r->rtm_src_len = 0; | 2785 | r->rtm_src_len = 0; |
| 2813 | r->rtm_tos = rt->fl.fl4_tos; | 2786 | r->rtm_tos = rt->rt_tos; |
| 2814 | r->rtm_table = RT_TABLE_MAIN; | 2787 | r->rtm_table = RT_TABLE_MAIN; |
| 2815 | NLA_PUT_U32(skb, RTA_TABLE, RT_TABLE_MAIN); | 2788 | NLA_PUT_U32(skb, RTA_TABLE, RT_TABLE_MAIN); |
| 2816 | r->rtm_type = rt->rt_type; | 2789 | r->rtm_type = rt->rt_type; |
| @@ -2822,19 +2795,19 @@ static int rt_fill_info(struct net *net, | |||
| 2822 | 2795 | ||
| 2823 | NLA_PUT_BE32(skb, RTA_DST, rt->rt_dst); | 2796 | NLA_PUT_BE32(skb, RTA_DST, rt->rt_dst); |
| 2824 | 2797 | ||
| 2825 | if (rt->fl.fl4_src) { | 2798 | if (rt->rt_key_src) { |
| 2826 | r->rtm_src_len = 32; | 2799 | r->rtm_src_len = 32; |
| 2827 | NLA_PUT_BE32(skb, RTA_SRC, rt->fl.fl4_src); | 2800 | NLA_PUT_BE32(skb, RTA_SRC, rt->rt_key_src); |
| 2828 | } | 2801 | } |
| 2829 | if (rt->dst.dev) | 2802 | if (rt->dst.dev) |
| 2830 | NLA_PUT_U32(skb, RTA_OIF, rt->dst.dev->ifindex); | 2803 | NLA_PUT_U32(skb, RTA_OIF, rt->dst.dev->ifindex); |
| 2831 | #ifdef CONFIG_NET_CLS_ROUTE | 2804 | #ifdef CONFIG_IP_ROUTE_CLASSID |
| 2832 | if (rt->dst.tclassid) | 2805 | if (rt->dst.tclassid) |
| 2833 | NLA_PUT_U32(skb, RTA_FLOW, rt->dst.tclassid); | 2806 | NLA_PUT_U32(skb, RTA_FLOW, rt->dst.tclassid); |
| 2834 | #endif | 2807 | #endif |
| 2835 | if (rt_is_input_route(rt)) | 2808 | if (rt_is_input_route(rt)) |
| 2836 | NLA_PUT_BE32(skb, RTA_PREFSRC, rt->rt_spec_dst); | 2809 | NLA_PUT_BE32(skb, RTA_PREFSRC, rt->rt_spec_dst); |
| 2837 | else if (rt->rt_src != rt->fl.fl4_src) | 2810 | else if (rt->rt_src != rt->rt_key_src) |
| 2838 | NLA_PUT_BE32(skb, RTA_PREFSRC, rt->rt_src); | 2811 | NLA_PUT_BE32(skb, RTA_PREFSRC, rt->rt_src); |
| 2839 | 2812 | ||
| 2840 | if (rt->rt_dst != rt->rt_gateway) | 2813 | if (rt->rt_dst != rt->rt_gateway) |
| @@ -2843,11 +2816,12 @@ static int rt_fill_info(struct net *net, | |||
| 2843 | if (rtnetlink_put_metrics(skb, dst_metrics_ptr(&rt->dst)) < 0) | 2816 | if (rtnetlink_put_metrics(skb, dst_metrics_ptr(&rt->dst)) < 0) |
| 2844 | goto nla_put_failure; | 2817 | goto nla_put_failure; |
| 2845 | 2818 | ||
| 2846 | if (rt->fl.mark) | 2819 | if (rt->rt_mark) |
| 2847 | NLA_PUT_BE32(skb, RTA_MARK, rt->fl.mark); | 2820 | NLA_PUT_BE32(skb, RTA_MARK, rt->rt_mark); |
| 2848 | 2821 | ||
| 2849 | error = rt->dst.error; | 2822 | error = rt->dst.error; |
| 2850 | expires = rt->dst.expires ? rt->dst.expires - jiffies : 0; | 2823 | expires = (rt->peer && rt->peer->pmtu_expires) ? |
| 2824 | rt->peer->pmtu_expires - jiffies : 0; | ||
| 2851 | if (rt->peer) { | 2825 | if (rt->peer) { |
| 2852 | inet_peer_refcheck(rt->peer); | 2826 | inet_peer_refcheck(rt->peer); |
| 2853 | id = atomic_read(&rt->peer->ip_id_count) & 0xffff; | 2827 | id = atomic_read(&rt->peer->ip_id_count) & 0xffff; |
| @@ -2877,7 +2851,7 @@ static int rt_fill_info(struct net *net, | |||
| 2877 | } | 2851 | } |
| 2878 | } else | 2852 | } else |
| 2879 | #endif | 2853 | #endif |
| 2880 | NLA_PUT_U32(skb, RTA_IIF, rt->fl.iif); | 2854 | NLA_PUT_U32(skb, RTA_IIF, rt->rt_iif); |
| 2881 | } | 2855 | } |
| 2882 | 2856 | ||
| 2883 | if (rtnl_put_cacheinfo(skb, &rt->dst, id, ts, tsage, | 2857 | if (rtnl_put_cacheinfo(skb, &rt->dst, id, ts, tsage, |
| @@ -2951,14 +2925,18 @@ static int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void | |||
| 2951 | if (err == 0 && rt->dst.error) | 2925 | if (err == 0 && rt->dst.error) |
| 2952 | err = -rt->dst.error; | 2926 | err = -rt->dst.error; |
| 2953 | } else { | 2927 | } else { |
| 2954 | struct flowi fl = { | 2928 | struct flowi4 fl4 = { |
| 2955 | .fl4_dst = dst, | 2929 | .daddr = dst, |
| 2956 | .fl4_src = src, | 2930 | .saddr = src, |
| 2957 | .fl4_tos = rtm->rtm_tos, | 2931 | .flowi4_tos = rtm->rtm_tos, |
| 2958 | .oif = tb[RTA_OIF] ? nla_get_u32(tb[RTA_OIF]) : 0, | 2932 | .flowi4_oif = tb[RTA_OIF] ? nla_get_u32(tb[RTA_OIF]) : 0, |
| 2959 | .mark = mark, | 2933 | .flowi4_mark = mark, |
| 2960 | }; | 2934 | }; |
| 2961 | err = ip_route_output_key(net, &rt, &fl); | 2935 | rt = ip_route_output_key(net, &fl4); |
| 2936 | |||
| 2937 | err = 0; | ||
| 2938 | if (IS_ERR(rt)) | ||
| 2939 | err = PTR_ERR(rt); | ||
| 2962 | } | 2940 | } |
| 2963 | 2941 | ||
| 2964 | if (err) | 2942 | if (err) |
| @@ -3241,6 +3219,8 @@ static __net_init int rt_genid_init(struct net *net) | |||
| 3241 | { | 3219 | { |
| 3242 | get_random_bytes(&net->ipv4.rt_genid, | 3220 | get_random_bytes(&net->ipv4.rt_genid, |
| 3243 | sizeof(net->ipv4.rt_genid)); | 3221 | sizeof(net->ipv4.rt_genid)); |
| 3222 | get_random_bytes(&net->ipv4.dev_addr_genid, | ||
| 3223 | sizeof(net->ipv4.dev_addr_genid)); | ||
| 3244 | return 0; | 3224 | return 0; |
| 3245 | } | 3225 | } |
| 3246 | 3226 | ||
| @@ -3249,9 +3229,9 @@ static __net_initdata struct pernet_operations rt_genid_ops = { | |||
| 3249 | }; | 3229 | }; |
| 3250 | 3230 | ||
| 3251 | 3231 | ||
| 3252 | #ifdef CONFIG_NET_CLS_ROUTE | 3232 | #ifdef CONFIG_IP_ROUTE_CLASSID |
| 3253 | struct ip_rt_acct __percpu *ip_rt_acct __read_mostly; | 3233 | struct ip_rt_acct __percpu *ip_rt_acct __read_mostly; |
| 3254 | #endif /* CONFIG_NET_CLS_ROUTE */ | 3234 | #endif /* CONFIG_IP_ROUTE_CLASSID */ |
| 3255 | 3235 | ||
| 3256 | static __initdata unsigned long rhash_entries; | 3236 | static __initdata unsigned long rhash_entries; |
| 3257 | static int __init set_rhash_entries(char *str) | 3237 | static int __init set_rhash_entries(char *str) |
| @@ -3267,7 +3247,7 @@ int __init ip_rt_init(void) | |||
| 3267 | { | 3247 | { |
| 3268 | int rc = 0; | 3248 | int rc = 0; |
| 3269 | 3249 | ||
| 3270 | #ifdef CONFIG_NET_CLS_ROUTE | 3250 | #ifdef CONFIG_IP_ROUTE_CLASSID |
| 3271 | ip_rt_acct = __alloc_percpu(256 * sizeof(struct ip_rt_acct), __alignof__(struct ip_rt_acct)); | 3251 | ip_rt_acct = __alloc_percpu(256 * sizeof(struct ip_rt_acct), __alignof__(struct ip_rt_acct)); |
| 3272 | if (!ip_rt_acct) | 3252 | if (!ip_rt_acct) |
| 3273 | panic("IP: failed to allocate ip_rt_acct\n"); | 3253 | panic("IP: failed to allocate ip_rt_acct\n"); |
| @@ -3304,14 +3284,6 @@ int __init ip_rt_init(void) | |||
| 3304 | devinet_init(); | 3284 | devinet_init(); |
| 3305 | ip_fib_init(); | 3285 | ip_fib_init(); |
| 3306 | 3286 | ||
| 3307 | /* All the timers, started at system startup tend | ||
| 3308 | to synchronize. Perturb it a bit. | ||
| 3309 | */ | ||
| 3310 | INIT_DELAYED_WORK_DEFERRABLE(&expires_work, rt_worker_func); | ||
| 3311 | expires_ljiffies = jiffies; | ||
| 3312 | schedule_delayed_work(&expires_work, | ||
| 3313 | net_random() % ip_rt_gc_interval + ip_rt_gc_interval); | ||
| 3314 | |||
| 3315 | if (ip_rt_proc_init()) | 3287 | if (ip_rt_proc_init()) |
| 3316 | printk(KERN_ERR "Unable to create route proc files\n"); | 3288 | printk(KERN_ERR "Unable to create route proc files\n"); |
| 3317 | #ifdef CONFIG_XFRM | 3289 | #ifdef CONFIG_XFRM |
diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c index 47519205a014..8b44c6d2a79b 100644 --- a/net/ipv4/syncookies.c +++ b/net/ipv4/syncookies.c | |||
| @@ -345,17 +345,20 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb, | |||
| 345 | * no easy way to do this. | 345 | * no easy way to do this. |
| 346 | */ | 346 | */ |
| 347 | { | 347 | { |
| 348 | struct flowi fl = { .mark = sk->sk_mark, | 348 | struct flowi4 fl4 = { |
| 349 | .fl4_dst = ((opt && opt->srr) ? | 349 | .flowi4_mark = sk->sk_mark, |
| 350 | opt->faddr : ireq->rmt_addr), | 350 | .daddr = ((opt && opt->srr) ? |
| 351 | .fl4_src = ireq->loc_addr, | 351 | opt->faddr : ireq->rmt_addr), |
| 352 | .fl4_tos = RT_CONN_FLAGS(sk), | 352 | .saddr = ireq->loc_addr, |
| 353 | .proto = IPPROTO_TCP, | 353 | .flowi4_tos = RT_CONN_FLAGS(sk), |
| 354 | .flags = inet_sk_flowi_flags(sk), | 354 | .flowi4_proto = IPPROTO_TCP, |
| 355 | .fl_ip_sport = th->dest, | 355 | .flowi4_flags = inet_sk_flowi_flags(sk), |
| 356 | .fl_ip_dport = th->source }; | 356 | .fl4_sport = th->dest, |
| 357 | security_req_classify_flow(req, &fl); | 357 | .fl4_dport = th->source, |
| 358 | if (ip_route_output_key(sock_net(sk), &rt, &fl)) { | 358 | }; |
| 359 | security_req_classify_flow(req, flowi4_to_flowi(&fl4)); | ||
| 360 | rt = ip_route_output_key(sock_net(sk), &fl4); | ||
| 361 | if (IS_ERR(rt)) { | ||
| 359 | reqsk_free(req); | 362 | reqsk_free(req); |
| 360 | goto out; | 363 | goto out; |
| 361 | } | 364 | } |
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 6c11eece262c..b22d45010545 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c | |||
| @@ -505,6 +505,15 @@ int tcp_ioctl(struct sock *sk, int cmd, unsigned long arg) | |||
| 505 | else | 505 | else |
| 506 | answ = tp->write_seq - tp->snd_una; | 506 | answ = tp->write_seq - tp->snd_una; |
| 507 | break; | 507 | break; |
| 508 | case SIOCOUTQNSD: | ||
| 509 | if (sk->sk_state == TCP_LISTEN) | ||
| 510 | return -EINVAL; | ||
| 511 | |||
| 512 | if ((1 << sk->sk_state) & (TCPF_SYN_SENT | TCPF_SYN_RECV)) | ||
| 513 | answ = 0; | ||
| 514 | else | ||
| 515 | answ = tp->write_seq - tp->snd_nxt; | ||
| 516 | break; | ||
| 508 | default: | 517 | default: |
| 509 | return -ENOIOCTLCMD; | 518 | return -ENOIOCTLCMD; |
| 510 | } | 519 | } |
| @@ -873,9 +882,7 @@ int tcp_sendpage(struct sock *sk, struct page *page, int offset, | |||
| 873 | flags); | 882 | flags); |
| 874 | 883 | ||
| 875 | lock_sock(sk); | 884 | lock_sock(sk); |
| 876 | TCP_CHECK_TIMER(sk); | ||
| 877 | res = do_tcp_sendpages(sk, &page, offset, size, flags); | 885 | res = do_tcp_sendpages(sk, &page, offset, size, flags); |
| 878 | TCP_CHECK_TIMER(sk); | ||
| 879 | release_sock(sk); | 886 | release_sock(sk); |
| 880 | return res; | 887 | return res; |
| 881 | } | 888 | } |
| @@ -916,7 +923,6 @@ int tcp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, | |||
| 916 | long timeo; | 923 | long timeo; |
| 917 | 924 | ||
| 918 | lock_sock(sk); | 925 | lock_sock(sk); |
| 919 | TCP_CHECK_TIMER(sk); | ||
| 920 | 926 | ||
| 921 | flags = msg->msg_flags; | 927 | flags = msg->msg_flags; |
| 922 | timeo = sock_sndtimeo(sk, flags & MSG_DONTWAIT); | 928 | timeo = sock_sndtimeo(sk, flags & MSG_DONTWAIT); |
| @@ -1104,7 +1110,6 @@ wait_for_memory: | |||
| 1104 | out: | 1110 | out: |
| 1105 | if (copied) | 1111 | if (copied) |
| 1106 | tcp_push(sk, flags, mss_now, tp->nonagle); | 1112 | tcp_push(sk, flags, mss_now, tp->nonagle); |
| 1107 | TCP_CHECK_TIMER(sk); | ||
| 1108 | release_sock(sk); | 1113 | release_sock(sk); |
| 1109 | return copied; | 1114 | return copied; |
| 1110 | 1115 | ||
| @@ -1123,7 +1128,6 @@ do_error: | |||
| 1123 | goto out; | 1128 | goto out; |
| 1124 | out_err: | 1129 | out_err: |
| 1125 | err = sk_stream_error(sk, flags, err); | 1130 | err = sk_stream_error(sk, flags, err); |
| 1126 | TCP_CHECK_TIMER(sk); | ||
| 1127 | release_sock(sk); | 1131 | release_sock(sk); |
| 1128 | return err; | 1132 | return err; |
| 1129 | } | 1133 | } |
| @@ -1415,8 +1419,6 @@ int tcp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, | |||
| 1415 | 1419 | ||
| 1416 | lock_sock(sk); | 1420 | lock_sock(sk); |
| 1417 | 1421 | ||
| 1418 | TCP_CHECK_TIMER(sk); | ||
| 1419 | |||
| 1420 | err = -ENOTCONN; | 1422 | err = -ENOTCONN; |
| 1421 | if (sk->sk_state == TCP_LISTEN) | 1423 | if (sk->sk_state == TCP_LISTEN) |
| 1422 | goto out; | 1424 | goto out; |
| @@ -1767,12 +1769,10 @@ skip_copy: | |||
| 1767 | /* Clean up data we have read: This will do ACK frames. */ | 1769 | /* Clean up data we have read: This will do ACK frames. */ |
| 1768 | tcp_cleanup_rbuf(sk, copied); | 1770 | tcp_cleanup_rbuf(sk, copied); |
| 1769 | 1771 | ||
| 1770 | TCP_CHECK_TIMER(sk); | ||
| 1771 | release_sock(sk); | 1772 | release_sock(sk); |
| 1772 | return copied; | 1773 | return copied; |
| 1773 | 1774 | ||
| 1774 | out: | 1775 | out: |
| 1775 | TCP_CHECK_TIMER(sk); | ||
| 1776 | release_sock(sk); | 1776 | release_sock(sk); |
| 1777 | return err; | 1777 | return err; |
| 1778 | 1778 | ||
| @@ -2653,7 +2653,7 @@ int compat_tcp_getsockopt(struct sock *sk, int level, int optname, | |||
| 2653 | EXPORT_SYMBOL(compat_tcp_getsockopt); | 2653 | EXPORT_SYMBOL(compat_tcp_getsockopt); |
| 2654 | #endif | 2654 | #endif |
| 2655 | 2655 | ||
| 2656 | struct sk_buff *tcp_tso_segment(struct sk_buff *skb, int features) | 2656 | struct sk_buff *tcp_tso_segment(struct sk_buff *skb, u32 features) |
| 2657 | { | 2657 | { |
| 2658 | struct sk_buff *segs = ERR_PTR(-EINVAL); | 2658 | struct sk_buff *segs = ERR_PTR(-EINVAL); |
| 2659 | struct tcphdr *th; | 2659 | struct tcphdr *th; |
diff --git a/net/ipv4/tcp_bic.c b/net/ipv4/tcp_bic.c index 3b53fd1af23f..6187eb4d1dcf 100644 --- a/net/ipv4/tcp_bic.c +++ b/net/ipv4/tcp_bic.c | |||
| @@ -209,7 +209,7 @@ static void bictcp_acked(struct sock *sk, u32 cnt, s32 rtt) | |||
| 209 | } | 209 | } |
| 210 | 210 | ||
| 211 | 211 | ||
| 212 | static struct tcp_congestion_ops bictcp = { | 212 | static struct tcp_congestion_ops bictcp __read_mostly = { |
| 213 | .init = bictcp_init, | 213 | .init = bictcp_init, |
| 214 | .ssthresh = bictcp_recalc_ssthresh, | 214 | .ssthresh = bictcp_recalc_ssthresh, |
| 215 | .cong_avoid = bictcp_cong_avoid, | 215 | .cong_avoid = bictcp_cong_avoid, |
diff --git a/net/ipv4/tcp_cubic.c b/net/ipv4/tcp_cubic.c index 71d5f2f29fa6..34340c9c95fa 100644 --- a/net/ipv4/tcp_cubic.c +++ b/net/ipv4/tcp_cubic.c | |||
| @@ -39,7 +39,7 @@ | |||
| 39 | 39 | ||
| 40 | /* Number of delay samples for detecting the increase of delay */ | 40 | /* Number of delay samples for detecting the increase of delay */ |
| 41 | #define HYSTART_MIN_SAMPLES 8 | 41 | #define HYSTART_MIN_SAMPLES 8 |
| 42 | #define HYSTART_DELAY_MIN (2U<<3) | 42 | #define HYSTART_DELAY_MIN (4U<<3) |
| 43 | #define HYSTART_DELAY_MAX (16U<<3) | 43 | #define HYSTART_DELAY_MAX (16U<<3) |
| 44 | #define HYSTART_DELAY_THRESH(x) clamp(x, HYSTART_DELAY_MIN, HYSTART_DELAY_MAX) | 44 | #define HYSTART_DELAY_THRESH(x) clamp(x, HYSTART_DELAY_MIN, HYSTART_DELAY_MAX) |
| 45 | 45 | ||
| @@ -52,6 +52,7 @@ static int tcp_friendliness __read_mostly = 1; | |||
| 52 | static int hystart __read_mostly = 1; | 52 | static int hystart __read_mostly = 1; |
| 53 | static int hystart_detect __read_mostly = HYSTART_ACK_TRAIN | HYSTART_DELAY; | 53 | static int hystart_detect __read_mostly = HYSTART_ACK_TRAIN | HYSTART_DELAY; |
| 54 | static int hystart_low_window __read_mostly = 16; | 54 | static int hystart_low_window __read_mostly = 16; |
| 55 | static int hystart_ack_delta __read_mostly = 2; | ||
| 55 | 56 | ||
| 56 | static u32 cube_rtt_scale __read_mostly; | 57 | static u32 cube_rtt_scale __read_mostly; |
| 57 | static u32 beta_scale __read_mostly; | 58 | static u32 beta_scale __read_mostly; |
| @@ -75,6 +76,8 @@ MODULE_PARM_DESC(hystart_detect, "hyrbrid slow start detection mechanisms" | |||
| 75 | " 1: packet-train 2: delay 3: both packet-train and delay"); | 76 | " 1: packet-train 2: delay 3: both packet-train and delay"); |
| 76 | module_param(hystart_low_window, int, 0644); | 77 | module_param(hystart_low_window, int, 0644); |
| 77 | MODULE_PARM_DESC(hystart_low_window, "lower bound cwnd for hybrid slow start"); | 78 | MODULE_PARM_DESC(hystart_low_window, "lower bound cwnd for hybrid slow start"); |
| 79 | module_param(hystart_ack_delta, int, 0644); | ||
| 80 | MODULE_PARM_DESC(hystart_ack_delta, "spacing between ack's indicating train (msecs)"); | ||
| 78 | 81 | ||
| 79 | /* BIC TCP Parameters */ | 82 | /* BIC TCP Parameters */ |
| 80 | struct bictcp { | 83 | struct bictcp { |
| @@ -85,7 +88,7 @@ struct bictcp { | |||
| 85 | u32 last_time; /* time when updated last_cwnd */ | 88 | u32 last_time; /* time when updated last_cwnd */ |
| 86 | u32 bic_origin_point;/* origin point of bic function */ | 89 | u32 bic_origin_point;/* origin point of bic function */ |
| 87 | u32 bic_K; /* time to origin point from the beginning of the current epoch */ | 90 | u32 bic_K; /* time to origin point from the beginning of the current epoch */ |
| 88 | u32 delay_min; /* min delay */ | 91 | u32 delay_min; /* min delay (msec << 3) */ |
| 89 | u32 epoch_start; /* beginning of an epoch */ | 92 | u32 epoch_start; /* beginning of an epoch */ |
| 90 | u32 ack_cnt; /* number of acks */ | 93 | u32 ack_cnt; /* number of acks */ |
| 91 | u32 tcp_cwnd; /* estimated tcp cwnd */ | 94 | u32 tcp_cwnd; /* estimated tcp cwnd */ |
| @@ -95,7 +98,7 @@ struct bictcp { | |||
| 95 | u8 found; /* the exit point is found? */ | 98 | u8 found; /* the exit point is found? */ |
| 96 | u32 round_start; /* beginning of each round */ | 99 | u32 round_start; /* beginning of each round */ |
| 97 | u32 end_seq; /* end_seq of the round */ | 100 | u32 end_seq; /* end_seq of the round */ |
| 98 | u32 last_jiffies; /* last time when the ACK spacing is close */ | 101 | u32 last_ack; /* last time when the ACK spacing is close */ |
| 99 | u32 curr_rtt; /* the minimum rtt of current round */ | 102 | u32 curr_rtt; /* the minimum rtt of current round */ |
| 100 | }; | 103 | }; |
| 101 | 104 | ||
| @@ -116,12 +119,21 @@ static inline void bictcp_reset(struct bictcp *ca) | |||
| 116 | ca->found = 0; | 119 | ca->found = 0; |
| 117 | } | 120 | } |
| 118 | 121 | ||
| 122 | static inline u32 bictcp_clock(void) | ||
| 123 | { | ||
| 124 | #if HZ < 1000 | ||
| 125 | return ktime_to_ms(ktime_get_real()); | ||
| 126 | #else | ||
| 127 | return jiffies_to_msecs(jiffies); | ||
| 128 | #endif | ||
| 129 | } | ||
| 130 | |||
| 119 | static inline void bictcp_hystart_reset(struct sock *sk) | 131 | static inline void bictcp_hystart_reset(struct sock *sk) |
| 120 | { | 132 | { |
| 121 | struct tcp_sock *tp = tcp_sk(sk); | 133 | struct tcp_sock *tp = tcp_sk(sk); |
| 122 | struct bictcp *ca = inet_csk_ca(sk); | 134 | struct bictcp *ca = inet_csk_ca(sk); |
| 123 | 135 | ||
| 124 | ca->round_start = ca->last_jiffies = jiffies; | 136 | ca->round_start = ca->last_ack = bictcp_clock(); |
| 125 | ca->end_seq = tp->snd_nxt; | 137 | ca->end_seq = tp->snd_nxt; |
| 126 | ca->curr_rtt = 0; | 138 | ca->curr_rtt = 0; |
| 127 | ca->sample_cnt = 0; | 139 | ca->sample_cnt = 0; |
| @@ -236,8 +248,8 @@ static inline void bictcp_update(struct bictcp *ca, u32 cwnd) | |||
| 236 | */ | 248 | */ |
| 237 | 249 | ||
| 238 | /* change the unit from HZ to bictcp_HZ */ | 250 | /* change the unit from HZ to bictcp_HZ */ |
| 239 | t = ((tcp_time_stamp + (ca->delay_min>>3) - ca->epoch_start) | 251 | t = ((tcp_time_stamp + msecs_to_jiffies(ca->delay_min>>3) |
| 240 | << BICTCP_HZ) / HZ; | 252 | - ca->epoch_start) << BICTCP_HZ) / HZ; |
| 241 | 253 | ||
| 242 | if (t < ca->bic_K) /* t - K */ | 254 | if (t < ca->bic_K) /* t - K */ |
| 243 | offs = ca->bic_K - t; | 255 | offs = ca->bic_K - t; |
| @@ -258,6 +270,13 @@ static inline void bictcp_update(struct bictcp *ca, u32 cwnd) | |||
| 258 | ca->cnt = 100 * cwnd; /* very small increment*/ | 270 | ca->cnt = 100 * cwnd; /* very small increment*/ |
| 259 | } | 271 | } |
| 260 | 272 | ||
| 273 | /* | ||
| 274 | * The initial growth of cubic function may be too conservative | ||
| 275 | * when the available bandwidth is still unknown. | ||
| 276 | */ | ||
| 277 | if (ca->loss_cwnd == 0 && ca->cnt > 20) | ||
| 278 | ca->cnt = 20; /* increase cwnd 5% per RTT */ | ||
| 279 | |||
| 261 | /* TCP Friendly */ | 280 | /* TCP Friendly */ |
| 262 | if (tcp_friendliness) { | 281 | if (tcp_friendliness) { |
| 263 | u32 scale = beta_scale; | 282 | u32 scale = beta_scale; |
| @@ -339,12 +358,12 @@ static void hystart_update(struct sock *sk, u32 delay) | |||
| 339 | struct bictcp *ca = inet_csk_ca(sk); | 358 | struct bictcp *ca = inet_csk_ca(sk); |
| 340 | 359 | ||
| 341 | if (!(ca->found & hystart_detect)) { | 360 | if (!(ca->found & hystart_detect)) { |
| 342 | u32 curr_jiffies = jiffies; | 361 | u32 now = bictcp_clock(); |
| 343 | 362 | ||
| 344 | /* first detection parameter - ack-train detection */ | 363 | /* first detection parameter - ack-train detection */ |
| 345 | if (curr_jiffies - ca->last_jiffies <= msecs_to_jiffies(2)) { | 364 | if ((s32)(now - ca->last_ack) <= hystart_ack_delta) { |
| 346 | ca->last_jiffies = curr_jiffies; | 365 | ca->last_ack = now; |
| 347 | if (curr_jiffies - ca->round_start >= ca->delay_min>>4) | 366 | if ((s32)(now - ca->round_start) > ca->delay_min >> 4) |
| 348 | ca->found |= HYSTART_ACK_TRAIN; | 367 | ca->found |= HYSTART_ACK_TRAIN; |
| 349 | } | 368 | } |
| 350 | 369 | ||
| @@ -391,7 +410,7 @@ static void bictcp_acked(struct sock *sk, u32 cnt, s32 rtt_us) | |||
| 391 | if ((s32)(tcp_time_stamp - ca->epoch_start) < HZ) | 410 | if ((s32)(tcp_time_stamp - ca->epoch_start) < HZ) |
| 392 | return; | 411 | return; |
| 393 | 412 | ||
| 394 | delay = usecs_to_jiffies(rtt_us) << 3; | 413 | delay = (rtt_us << 3) / USEC_PER_MSEC; |
| 395 | if (delay == 0) | 414 | if (delay == 0) |
| 396 | delay = 1; | 415 | delay = 1; |
| 397 | 416 | ||
| @@ -405,7 +424,7 @@ static void bictcp_acked(struct sock *sk, u32 cnt, s32 rtt_us) | |||
| 405 | hystart_update(sk, delay); | 424 | hystart_update(sk, delay); |
| 406 | } | 425 | } |
| 407 | 426 | ||
| 408 | static struct tcp_congestion_ops cubictcp = { | 427 | static struct tcp_congestion_ops cubictcp __read_mostly = { |
| 409 | .init = bictcp_init, | 428 | .init = bictcp_init, |
| 410 | .ssthresh = bictcp_recalc_ssthresh, | 429 | .ssthresh = bictcp_recalc_ssthresh, |
| 411 | .cong_avoid = bictcp_cong_avoid, | 430 | .cong_avoid = bictcp_cong_avoid, |
| @@ -447,6 +466,10 @@ static int __init cubictcp_register(void) | |||
| 447 | /* divide by bic_scale and by constant Srtt (100ms) */ | 466 | /* divide by bic_scale and by constant Srtt (100ms) */ |
| 448 | do_div(cube_factor, bic_scale * 10); | 467 | do_div(cube_factor, bic_scale * 10); |
| 449 | 468 | ||
| 469 | /* hystart needs ms clock resolution */ | ||
| 470 | if (hystart && HZ < 1000) | ||
| 471 | cubictcp.flags |= TCP_CONG_RTT_STAMP; | ||
| 472 | |||
| 450 | return tcp_register_congestion_control(&cubictcp); | 473 | return tcp_register_congestion_control(&cubictcp); |
| 451 | } | 474 | } |
| 452 | 475 | ||
diff --git a/net/ipv4/tcp_highspeed.c b/net/ipv4/tcp_highspeed.c index 8b6caaf75bb9..30f27f6b3655 100644 --- a/net/ipv4/tcp_highspeed.c +++ b/net/ipv4/tcp_highspeed.c | |||
| @@ -158,7 +158,7 @@ static u32 hstcp_ssthresh(struct sock *sk) | |||
| 158 | } | 158 | } |
| 159 | 159 | ||
| 160 | 160 | ||
| 161 | static struct tcp_congestion_ops tcp_highspeed = { | 161 | static struct tcp_congestion_ops tcp_highspeed __read_mostly = { |
| 162 | .init = hstcp_init, | 162 | .init = hstcp_init, |
| 163 | .ssthresh = hstcp_ssthresh, | 163 | .ssthresh = hstcp_ssthresh, |
| 164 | .cong_avoid = hstcp_cong_avoid, | 164 | .cong_avoid = hstcp_cong_avoid, |
diff --git a/net/ipv4/tcp_htcp.c b/net/ipv4/tcp_htcp.c index 7c94a4955416..c1a8175361e8 100644 --- a/net/ipv4/tcp_htcp.c +++ b/net/ipv4/tcp_htcp.c | |||
| @@ -284,7 +284,7 @@ static void htcp_state(struct sock *sk, u8 new_state) | |||
| 284 | } | 284 | } |
| 285 | } | 285 | } |
| 286 | 286 | ||
| 287 | static struct tcp_congestion_ops htcp = { | 287 | static struct tcp_congestion_ops htcp __read_mostly = { |
| 288 | .init = htcp_init, | 288 | .init = htcp_init, |
| 289 | .ssthresh = htcp_recalc_ssthresh, | 289 | .ssthresh = htcp_recalc_ssthresh, |
| 290 | .cong_avoid = htcp_cong_avoid, | 290 | .cong_avoid = htcp_cong_avoid, |
diff --git a/net/ipv4/tcp_hybla.c b/net/ipv4/tcp_hybla.c index 377bc9349371..fe3ecf484b44 100644 --- a/net/ipv4/tcp_hybla.c +++ b/net/ipv4/tcp_hybla.c | |||
| @@ -162,7 +162,7 @@ static void hybla_cong_avoid(struct sock *sk, u32 ack, u32 in_flight) | |||
| 162 | tp->snd_cwnd = min_t(u32, tp->snd_cwnd, tp->snd_cwnd_clamp); | 162 | tp->snd_cwnd = min_t(u32, tp->snd_cwnd, tp->snd_cwnd_clamp); |
| 163 | } | 163 | } |
| 164 | 164 | ||
| 165 | static struct tcp_congestion_ops tcp_hybla = { | 165 | static struct tcp_congestion_ops tcp_hybla __read_mostly = { |
| 166 | .init = hybla_init, | 166 | .init = hybla_init, |
| 167 | .ssthresh = tcp_reno_ssthresh, | 167 | .ssthresh = tcp_reno_ssthresh, |
| 168 | .min_cwnd = tcp_reno_min_cwnd, | 168 | .min_cwnd = tcp_reno_min_cwnd, |
diff --git a/net/ipv4/tcp_illinois.c b/net/ipv4/tcp_illinois.c index 00ca688d8964..813b43a76fec 100644 --- a/net/ipv4/tcp_illinois.c +++ b/net/ipv4/tcp_illinois.c | |||
| @@ -322,7 +322,7 @@ static void tcp_illinois_info(struct sock *sk, u32 ext, | |||
| 322 | } | 322 | } |
| 323 | } | 323 | } |
| 324 | 324 | ||
| 325 | static struct tcp_congestion_ops tcp_illinois = { | 325 | static struct tcp_congestion_ops tcp_illinois __read_mostly = { |
| 326 | .flags = TCP_CONG_RTT_STAMP, | 326 | .flags = TCP_CONG_RTT_STAMP, |
| 327 | .init = tcp_illinois_init, | 327 | .init = tcp_illinois_init, |
| 328 | .ssthresh = tcp_illinois_ssthresh, | 328 | .ssthresh = tcp_illinois_ssthresh, |
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 2549b29b062d..bef9f04c22ba 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c | |||
| @@ -817,7 +817,7 @@ __u32 tcp_init_cwnd(struct tcp_sock *tp, struct dst_entry *dst) | |||
| 817 | __u32 cwnd = (dst ? dst_metric(dst, RTAX_INITCWND) : 0); | 817 | __u32 cwnd = (dst ? dst_metric(dst, RTAX_INITCWND) : 0); |
| 818 | 818 | ||
| 819 | if (!cwnd) | 819 | if (!cwnd) |
| 820 | cwnd = rfc3390_bytes_to_packets(tp->mss_cache); | 820 | cwnd = TCP_INIT_CWND; |
| 821 | return min_t(__u32, cwnd, tp->snd_cwnd_clamp); | 821 | return min_t(__u32, cwnd, tp->snd_cwnd_clamp); |
| 822 | } | 822 | } |
| 823 | 823 | ||
| @@ -1222,7 +1222,7 @@ static int tcp_check_dsack(struct sock *sk, struct sk_buff *ack_skb, | |||
| 1222 | } | 1222 | } |
| 1223 | 1223 | ||
| 1224 | /* D-SACK for already forgotten data... Do dumb counting. */ | 1224 | /* D-SACK for already forgotten data... Do dumb counting. */ |
| 1225 | if (dup_sack && | 1225 | if (dup_sack && tp->undo_marker && tp->undo_retrans && |
| 1226 | !after(end_seq_0, prior_snd_una) && | 1226 | !after(end_seq_0, prior_snd_una) && |
| 1227 | after(end_seq_0, tp->undo_marker)) | 1227 | after(end_seq_0, tp->undo_marker)) |
| 1228 | tp->undo_retrans--; | 1228 | tp->undo_retrans--; |
| @@ -1299,7 +1299,8 @@ static u8 tcp_sacktag_one(struct sk_buff *skb, struct sock *sk, | |||
| 1299 | 1299 | ||
| 1300 | /* Account D-SACK for retransmitted packet. */ | 1300 | /* Account D-SACK for retransmitted packet. */ |
| 1301 | if (dup_sack && (sacked & TCPCB_RETRANS)) { | 1301 | if (dup_sack && (sacked & TCPCB_RETRANS)) { |
| 1302 | if (after(TCP_SKB_CB(skb)->end_seq, tp->undo_marker)) | 1302 | if (tp->undo_marker && tp->undo_retrans && |
| 1303 | after(TCP_SKB_CB(skb)->end_seq, tp->undo_marker)) | ||
| 1303 | tp->undo_retrans--; | 1304 | tp->undo_retrans--; |
| 1304 | if (sacked & TCPCB_SACKED_ACKED) | 1305 | if (sacked & TCPCB_SACKED_ACKED) |
| 1305 | state->reord = min(fack_count, state->reord); | 1306 | state->reord = min(fack_count, state->reord); |
| @@ -2658,7 +2659,7 @@ static void DBGUNDO(struct sock *sk, const char *msg) | |||
| 2658 | #define DBGUNDO(x...) do { } while (0) | 2659 | #define DBGUNDO(x...) do { } while (0) |
| 2659 | #endif | 2660 | #endif |
| 2660 | 2661 | ||
| 2661 | static void tcp_undo_cwr(struct sock *sk, const int undo) | 2662 | static void tcp_undo_cwr(struct sock *sk, const bool undo_ssthresh) |
| 2662 | { | 2663 | { |
| 2663 | struct tcp_sock *tp = tcp_sk(sk); | 2664 | struct tcp_sock *tp = tcp_sk(sk); |
| 2664 | 2665 | ||
| @@ -2670,14 +2671,13 @@ static void tcp_undo_cwr(struct sock *sk, const int undo) | |||
| 2670 | else | 2671 | else |
| 2671 | tp->snd_cwnd = max(tp->snd_cwnd, tp->snd_ssthresh << 1); | 2672 | tp->snd_cwnd = max(tp->snd_cwnd, tp->snd_ssthresh << 1); |
| 2672 | 2673 | ||
| 2673 | if (undo && tp->prior_ssthresh > tp->snd_ssthresh) { | 2674 | if (undo_ssthresh && tp->prior_ssthresh > tp->snd_ssthresh) { |
| 2674 | tp->snd_ssthresh = tp->prior_ssthresh; | 2675 | tp->snd_ssthresh = tp->prior_ssthresh; |
| 2675 | TCP_ECN_withdraw_cwr(tp); | 2676 | TCP_ECN_withdraw_cwr(tp); |
| 2676 | } | 2677 | } |
| 2677 | } else { | 2678 | } else { |
| 2678 | tp->snd_cwnd = max(tp->snd_cwnd, tp->snd_ssthresh); | 2679 | tp->snd_cwnd = max(tp->snd_cwnd, tp->snd_ssthresh); |
| 2679 | } | 2680 | } |
| 2680 | tcp_moderate_cwnd(tp); | ||
| 2681 | tp->snd_cwnd_stamp = tcp_time_stamp; | 2681 | tp->snd_cwnd_stamp = tcp_time_stamp; |
| 2682 | } | 2682 | } |
| 2683 | 2683 | ||
| @@ -2698,7 +2698,7 @@ static int tcp_try_undo_recovery(struct sock *sk) | |||
| 2698 | * or our original transmission succeeded. | 2698 | * or our original transmission succeeded. |
| 2699 | */ | 2699 | */ |
| 2700 | DBGUNDO(sk, inet_csk(sk)->icsk_ca_state == TCP_CA_Loss ? "loss" : "retrans"); | 2700 | DBGUNDO(sk, inet_csk(sk)->icsk_ca_state == TCP_CA_Loss ? "loss" : "retrans"); |
| 2701 | tcp_undo_cwr(sk, 1); | 2701 | tcp_undo_cwr(sk, true); |
| 2702 | if (inet_csk(sk)->icsk_ca_state == TCP_CA_Loss) | 2702 | if (inet_csk(sk)->icsk_ca_state == TCP_CA_Loss) |
| 2703 | mib_idx = LINUX_MIB_TCPLOSSUNDO; | 2703 | mib_idx = LINUX_MIB_TCPLOSSUNDO; |
| 2704 | else | 2704 | else |
| @@ -2725,7 +2725,7 @@ static void tcp_try_undo_dsack(struct sock *sk) | |||
| 2725 | 2725 | ||
| 2726 | if (tp->undo_marker && !tp->undo_retrans) { | 2726 | if (tp->undo_marker && !tp->undo_retrans) { |
| 2727 | DBGUNDO(sk, "D-SACK"); | 2727 | DBGUNDO(sk, "D-SACK"); |
| 2728 | tcp_undo_cwr(sk, 1); | 2728 | tcp_undo_cwr(sk, true); |
| 2729 | tp->undo_marker = 0; | 2729 | tp->undo_marker = 0; |
| 2730 | NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPDSACKUNDO); | 2730 | NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPDSACKUNDO); |
| 2731 | } | 2731 | } |
| @@ -2778,7 +2778,7 @@ static int tcp_try_undo_partial(struct sock *sk, int acked) | |||
| 2778 | tcp_update_reordering(sk, tcp_fackets_out(tp) + acked, 1); | 2778 | tcp_update_reordering(sk, tcp_fackets_out(tp) + acked, 1); |
| 2779 | 2779 | ||
| 2780 | DBGUNDO(sk, "Hoe"); | 2780 | DBGUNDO(sk, "Hoe"); |
| 2781 | tcp_undo_cwr(sk, 0); | 2781 | tcp_undo_cwr(sk, false); |
| 2782 | NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPPARTIALUNDO); | 2782 | NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPPARTIALUNDO); |
| 2783 | 2783 | ||
| 2784 | /* So... Do not make Hoe's retransmit yet. | 2784 | /* So... Do not make Hoe's retransmit yet. |
| @@ -2807,7 +2807,7 @@ static int tcp_try_undo_loss(struct sock *sk) | |||
| 2807 | 2807 | ||
| 2808 | DBGUNDO(sk, "partial loss"); | 2808 | DBGUNDO(sk, "partial loss"); |
| 2809 | tp->lost_out = 0; | 2809 | tp->lost_out = 0; |
| 2810 | tcp_undo_cwr(sk, 1); | 2810 | tcp_undo_cwr(sk, true); |
| 2811 | NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPLOSSUNDO); | 2811 | NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPLOSSUNDO); |
| 2812 | inet_csk(sk)->icsk_retransmits = 0; | 2812 | inet_csk(sk)->icsk_retransmits = 0; |
| 2813 | tp->undo_marker = 0; | 2813 | tp->undo_marker = 0; |
| @@ -2821,8 +2821,11 @@ static int tcp_try_undo_loss(struct sock *sk) | |||
| 2821 | static inline void tcp_complete_cwr(struct sock *sk) | 2821 | static inline void tcp_complete_cwr(struct sock *sk) |
| 2822 | { | 2822 | { |
| 2823 | struct tcp_sock *tp = tcp_sk(sk); | 2823 | struct tcp_sock *tp = tcp_sk(sk); |
| 2824 | tp->snd_cwnd = min(tp->snd_cwnd, tp->snd_ssthresh); | 2824 | /* Do not moderate cwnd if it's already undone in cwr or recovery */ |
| 2825 | tp->snd_cwnd_stamp = tcp_time_stamp; | 2825 | if (tp->undo_marker && tp->snd_cwnd > tp->snd_ssthresh) { |
| 2826 | tp->snd_cwnd = tp->snd_ssthresh; | ||
| 2827 | tp->snd_cwnd_stamp = tcp_time_stamp; | ||
| 2828 | } | ||
| 2826 | tcp_ca_event(sk, CA_EVENT_COMPLETE_CWR); | 2829 | tcp_ca_event(sk, CA_EVENT_COMPLETE_CWR); |
| 2827 | } | 2830 | } |
| 2828 | 2831 | ||
| @@ -3349,7 +3352,7 @@ static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets, | |||
| 3349 | net_invalid_timestamp())) | 3352 | net_invalid_timestamp())) |
| 3350 | rtt_us = ktime_us_delta(ktime_get_real(), | 3353 | rtt_us = ktime_us_delta(ktime_get_real(), |
| 3351 | last_ackt); | 3354 | last_ackt); |
| 3352 | else if (ca_seq_rtt > 0) | 3355 | else if (ca_seq_rtt >= 0) |
| 3353 | rtt_us = jiffies_to_usecs(ca_seq_rtt); | 3356 | rtt_us = jiffies_to_usecs(ca_seq_rtt); |
| 3354 | } | 3357 | } |
| 3355 | 3358 | ||
| @@ -3493,7 +3496,7 @@ static void tcp_undo_spur_to_response(struct sock *sk, int flag) | |||
| 3493 | if (flag & FLAG_ECE) | 3496 | if (flag & FLAG_ECE) |
| 3494 | tcp_ratehalving_spur_to_response(sk); | 3497 | tcp_ratehalving_spur_to_response(sk); |
| 3495 | else | 3498 | else |
| 3496 | tcp_undo_cwr(sk, 1); | 3499 | tcp_undo_cwr(sk, true); |
| 3497 | } | 3500 | } |
| 3498 | 3501 | ||
| 3499 | /* F-RTO spurious RTO detection algorithm (RFC4138) | 3502 | /* F-RTO spurious RTO detection algorithm (RFC4138) |
| @@ -4399,7 +4402,7 @@ static void tcp_data_queue(struct sock *sk, struct sk_buff *skb) | |||
| 4399 | if (!skb_copy_datagram_iovec(skb, 0, tp->ucopy.iov, chunk)) { | 4402 | if (!skb_copy_datagram_iovec(skb, 0, tp->ucopy.iov, chunk)) { |
| 4400 | tp->ucopy.len -= chunk; | 4403 | tp->ucopy.len -= chunk; |
| 4401 | tp->copied_seq += chunk; | 4404 | tp->copied_seq += chunk; |
| 4402 | eaten = (chunk == skb->len && !th->fin); | 4405 | eaten = (chunk == skb->len); |
| 4403 | tcp_rcv_space_adjust(sk); | 4406 | tcp_rcv_space_adjust(sk); |
| 4404 | } | 4407 | } |
| 4405 | local_bh_disable(); | 4408 | local_bh_disable(); |
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 856f68466d49..f7e6c2c2d2bb 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c | |||
| @@ -149,9 +149,9 @@ int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) | |||
| 149 | struct inet_sock *inet = inet_sk(sk); | 149 | struct inet_sock *inet = inet_sk(sk); |
| 150 | struct tcp_sock *tp = tcp_sk(sk); | 150 | struct tcp_sock *tp = tcp_sk(sk); |
| 151 | struct sockaddr_in *usin = (struct sockaddr_in *)uaddr; | 151 | struct sockaddr_in *usin = (struct sockaddr_in *)uaddr; |
| 152 | __be16 orig_sport, orig_dport; | ||
| 152 | struct rtable *rt; | 153 | struct rtable *rt; |
| 153 | __be32 daddr, nexthop; | 154 | __be32 daddr, nexthop; |
| 154 | int tmp; | ||
| 155 | int err; | 155 | int err; |
| 156 | 156 | ||
| 157 | if (addr_len < sizeof(struct sockaddr_in)) | 157 | if (addr_len < sizeof(struct sockaddr_in)) |
| @@ -167,14 +167,17 @@ int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) | |||
| 167 | nexthop = inet->opt->faddr; | 167 | nexthop = inet->opt->faddr; |
| 168 | } | 168 | } |
| 169 | 169 | ||
| 170 | tmp = ip_route_connect(&rt, nexthop, inet->inet_saddr, | 170 | orig_sport = inet->inet_sport; |
| 171 | RT_CONN_FLAGS(sk), sk->sk_bound_dev_if, | 171 | orig_dport = usin->sin_port; |
| 172 | IPPROTO_TCP, | 172 | rt = ip_route_connect(nexthop, inet->inet_saddr, |
| 173 | inet->inet_sport, usin->sin_port, sk, 1); | 173 | RT_CONN_FLAGS(sk), sk->sk_bound_dev_if, |
| 174 | if (tmp < 0) { | 174 | IPPROTO_TCP, |
| 175 | if (tmp == -ENETUNREACH) | 175 | orig_sport, orig_dport, sk, true); |
| 176 | if (IS_ERR(rt)) { | ||
| 177 | err = PTR_ERR(rt); | ||
| 178 | if (err == -ENETUNREACH) | ||
| 176 | IP_INC_STATS_BH(sock_net(sk), IPSTATS_MIB_OUTNOROUTES); | 179 | IP_INC_STATS_BH(sock_net(sk), IPSTATS_MIB_OUTNOROUTES); |
| 177 | return tmp; | 180 | return err; |
| 178 | } | 181 | } |
| 179 | 182 | ||
| 180 | if (rt->rt_flags & (RTCF_MULTICAST | RTCF_BROADCAST)) { | 183 | if (rt->rt_flags & (RTCF_MULTICAST | RTCF_BROADCAST)) { |
| @@ -233,11 +236,14 @@ int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) | |||
| 233 | if (err) | 236 | if (err) |
| 234 | goto failure; | 237 | goto failure; |
| 235 | 238 | ||
| 236 | err = ip_route_newports(&rt, IPPROTO_TCP, | 239 | rt = ip_route_newports(rt, IPPROTO_TCP, |
| 237 | inet->inet_sport, inet->inet_dport, sk); | 240 | orig_sport, orig_dport, |
| 238 | if (err) | 241 | inet->inet_sport, inet->inet_dport, sk); |
| 242 | if (IS_ERR(rt)) { | ||
| 243 | err = PTR_ERR(rt); | ||
| 244 | rt = NULL; | ||
| 239 | goto failure; | 245 | goto failure; |
| 240 | 246 | } | |
| 241 | /* OK, now commit destination to socket. */ | 247 | /* OK, now commit destination to socket. */ |
| 242 | sk->sk_gso_type = SKB_GSO_TCPV4; | 248 | sk->sk_gso_type = SKB_GSO_TCPV4; |
| 243 | sk_setup_caps(sk, &rt->dst); | 249 | sk_setup_caps(sk, &rt->dst); |
| @@ -1341,7 +1347,7 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb) | |||
| 1341 | tcp_death_row.sysctl_tw_recycle && | 1347 | tcp_death_row.sysctl_tw_recycle && |
| 1342 | (dst = inet_csk_route_req(sk, req)) != NULL && | 1348 | (dst = inet_csk_route_req(sk, req)) != NULL && |
| 1343 | (peer = rt_get_peer((struct rtable *)dst)) != NULL && | 1349 | (peer = rt_get_peer((struct rtable *)dst)) != NULL && |
| 1344 | peer->daddr.a4 == saddr) { | 1350 | peer->daddr.addr.a4 == saddr) { |
| 1345 | inet_peer_refcheck(peer); | 1351 | inet_peer_refcheck(peer); |
| 1346 | if ((u32)get_seconds() - peer->tcp_ts_stamp < TCP_PAWS_MSL && | 1352 | if ((u32)get_seconds() - peer->tcp_ts_stamp < TCP_PAWS_MSL && |
| 1347 | (s32)(peer->tcp_ts - req->ts_recent) > | 1353 | (s32)(peer->tcp_ts - req->ts_recent) > |
| @@ -1556,12 +1562,10 @@ int tcp_v4_do_rcv(struct sock *sk, struct sk_buff *skb) | |||
| 1556 | 1562 | ||
| 1557 | if (sk->sk_state == TCP_ESTABLISHED) { /* Fast path */ | 1563 | if (sk->sk_state == TCP_ESTABLISHED) { /* Fast path */ |
| 1558 | sock_rps_save_rxhash(sk, skb->rxhash); | 1564 | sock_rps_save_rxhash(sk, skb->rxhash); |
| 1559 | TCP_CHECK_TIMER(sk); | ||
| 1560 | if (tcp_rcv_established(sk, skb, tcp_hdr(skb), skb->len)) { | 1565 | if (tcp_rcv_established(sk, skb, tcp_hdr(skb), skb->len)) { |
| 1561 | rsk = sk; | 1566 | rsk = sk; |
| 1562 | goto reset; | 1567 | goto reset; |
| 1563 | } | 1568 | } |
| 1564 | TCP_CHECK_TIMER(sk); | ||
| 1565 | return 0; | 1569 | return 0; |
| 1566 | } | 1570 | } |
| 1567 | 1571 | ||
| @@ -1583,13 +1587,10 @@ int tcp_v4_do_rcv(struct sock *sk, struct sk_buff *skb) | |||
| 1583 | } else | 1587 | } else |
| 1584 | sock_rps_save_rxhash(sk, skb->rxhash); | 1588 | sock_rps_save_rxhash(sk, skb->rxhash); |
| 1585 | 1589 | ||
| 1586 | |||
| 1587 | TCP_CHECK_TIMER(sk); | ||
| 1588 | if (tcp_rcv_state_process(sk, skb, tcp_hdr(skb), skb->len)) { | 1590 | if (tcp_rcv_state_process(sk, skb, tcp_hdr(skb), skb->len)) { |
| 1589 | rsk = sk; | 1591 | rsk = sk; |
| 1590 | goto reset; | 1592 | goto reset; |
| 1591 | } | 1593 | } |
| 1592 | TCP_CHECK_TIMER(sk); | ||
| 1593 | return 0; | 1594 | return 0; |
| 1594 | 1595 | ||
| 1595 | reset: | 1596 | reset: |
| @@ -1994,7 +1995,6 @@ static void *listening_get_next(struct seq_file *seq, void *cur) | |||
| 1994 | } | 1995 | } |
| 1995 | req = req->dl_next; | 1996 | req = req->dl_next; |
| 1996 | } | 1997 | } |
| 1997 | st->offset = 0; | ||
| 1998 | if (++st->sbucket >= icsk->icsk_accept_queue.listen_opt->nr_table_entries) | 1998 | if (++st->sbucket >= icsk->icsk_accept_queue.listen_opt->nr_table_entries) |
| 1999 | break; | 1999 | break; |
| 2000 | get_req: | 2000 | get_req: |
diff --git a/net/ipv4/tcp_lp.c b/net/ipv4/tcp_lp.c index de870377fbba..656d431c99ad 100644 --- a/net/ipv4/tcp_lp.c +++ b/net/ipv4/tcp_lp.c | |||
| @@ -313,7 +313,7 @@ static void tcp_lp_pkts_acked(struct sock *sk, u32 num_acked, s32 rtt_us) | |||
| 313 | lp->last_drop = tcp_time_stamp; | 313 | lp->last_drop = tcp_time_stamp; |
| 314 | } | 314 | } |
| 315 | 315 | ||
| 316 | static struct tcp_congestion_ops tcp_lp = { | 316 | static struct tcp_congestion_ops tcp_lp __read_mostly = { |
| 317 | .flags = TCP_CONG_RTT_STAMP, | 317 | .flags = TCP_CONG_RTT_STAMP, |
| 318 | .init = tcp_lp_init, | 318 | .init = tcp_lp_init, |
| 319 | .ssthresh = tcp_reno_ssthresh, | 319 | .ssthresh = tcp_reno_ssthresh, |
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 406f320336e6..dfa5beb0c1c8 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c | |||
| @@ -2162,7 +2162,7 @@ int tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb) | |||
| 2162 | if (!tp->retrans_stamp) | 2162 | if (!tp->retrans_stamp) |
| 2163 | tp->retrans_stamp = TCP_SKB_CB(skb)->when; | 2163 | tp->retrans_stamp = TCP_SKB_CB(skb)->when; |
| 2164 | 2164 | ||
| 2165 | tp->undo_retrans++; | 2165 | tp->undo_retrans += tcp_skb_pcount(skb); |
| 2166 | 2166 | ||
| 2167 | /* snd_nxt is stored to detect loss of retransmitted segment, | 2167 | /* snd_nxt is stored to detect loss of retransmitted segment, |
| 2168 | * see tcp_input.c tcp_sacktag_write_queue(). | 2168 | * see tcp_input.c tcp_sacktag_write_queue(). |
diff --git a/net/ipv4/tcp_scalable.c b/net/ipv4/tcp_scalable.c index a76513779e2b..8ce55b8aaec8 100644 --- a/net/ipv4/tcp_scalable.c +++ b/net/ipv4/tcp_scalable.c | |||
| @@ -35,7 +35,7 @@ static u32 tcp_scalable_ssthresh(struct sock *sk) | |||
| 35 | } | 35 | } |
| 36 | 36 | ||
| 37 | 37 | ||
| 38 | static struct tcp_congestion_ops tcp_scalable = { | 38 | static struct tcp_congestion_ops tcp_scalable __read_mostly = { |
| 39 | .ssthresh = tcp_scalable_ssthresh, | 39 | .ssthresh = tcp_scalable_ssthresh, |
| 40 | .cong_avoid = tcp_scalable_cong_avoid, | 40 | .cong_avoid = tcp_scalable_cong_avoid, |
| 41 | .min_cwnd = tcp_reno_min_cwnd, | 41 | .min_cwnd = tcp_reno_min_cwnd, |
diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c index 74a6aa003657..ecd44b0c45f1 100644 --- a/net/ipv4/tcp_timer.c +++ b/net/ipv4/tcp_timer.c | |||
| @@ -259,7 +259,6 @@ static void tcp_delack_timer(unsigned long data) | |||
| 259 | tcp_send_ack(sk); | 259 | tcp_send_ack(sk); |
| 260 | NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_DELAYEDACKS); | 260 | NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_DELAYEDACKS); |
| 261 | } | 261 | } |
| 262 | TCP_CHECK_TIMER(sk); | ||
| 263 | 262 | ||
| 264 | out: | 263 | out: |
| 265 | if (tcp_memory_pressure) | 264 | if (tcp_memory_pressure) |
| @@ -481,7 +480,6 @@ static void tcp_write_timer(unsigned long data) | |||
| 481 | tcp_probe_timer(sk); | 480 | tcp_probe_timer(sk); |
| 482 | break; | 481 | break; |
| 483 | } | 482 | } |
| 484 | TCP_CHECK_TIMER(sk); | ||
| 485 | 483 | ||
| 486 | out: | 484 | out: |
| 487 | sk_mem_reclaim(sk); | 485 | sk_mem_reclaim(sk); |
| @@ -589,7 +587,6 @@ static void tcp_keepalive_timer (unsigned long data) | |||
| 589 | elapsed = keepalive_time_when(tp) - elapsed; | 587 | elapsed = keepalive_time_when(tp) - elapsed; |
| 590 | } | 588 | } |
| 591 | 589 | ||
| 592 | TCP_CHECK_TIMER(sk); | ||
| 593 | sk_mem_reclaim(sk); | 590 | sk_mem_reclaim(sk); |
| 594 | 591 | ||
| 595 | resched: | 592 | resched: |
diff --git a/net/ipv4/tcp_vegas.c b/net/ipv4/tcp_vegas.c index c6743eec9b7d..80fa2bfd7ede 100644 --- a/net/ipv4/tcp_vegas.c +++ b/net/ipv4/tcp_vegas.c | |||
| @@ -304,7 +304,7 @@ void tcp_vegas_get_info(struct sock *sk, u32 ext, struct sk_buff *skb) | |||
| 304 | } | 304 | } |
| 305 | EXPORT_SYMBOL_GPL(tcp_vegas_get_info); | 305 | EXPORT_SYMBOL_GPL(tcp_vegas_get_info); |
| 306 | 306 | ||
| 307 | static struct tcp_congestion_ops tcp_vegas = { | 307 | static struct tcp_congestion_ops tcp_vegas __read_mostly = { |
| 308 | .flags = TCP_CONG_RTT_STAMP, | 308 | .flags = TCP_CONG_RTT_STAMP, |
| 309 | .init = tcp_vegas_init, | 309 | .init = tcp_vegas_init, |
| 310 | .ssthresh = tcp_reno_ssthresh, | 310 | .ssthresh = tcp_reno_ssthresh, |
diff --git a/net/ipv4/tcp_veno.c b/net/ipv4/tcp_veno.c index 38bc0b52d745..ac43cd747bce 100644 --- a/net/ipv4/tcp_veno.c +++ b/net/ipv4/tcp_veno.c | |||
| @@ -201,7 +201,7 @@ static u32 tcp_veno_ssthresh(struct sock *sk) | |||
| 201 | return max(tp->snd_cwnd >> 1U, 2U); | 201 | return max(tp->snd_cwnd >> 1U, 2U); |
| 202 | } | 202 | } |
| 203 | 203 | ||
| 204 | static struct tcp_congestion_ops tcp_veno = { | 204 | static struct tcp_congestion_ops tcp_veno __read_mostly = { |
| 205 | .flags = TCP_CONG_RTT_STAMP, | 205 | .flags = TCP_CONG_RTT_STAMP, |
| 206 | .init = tcp_veno_init, | 206 | .init = tcp_veno_init, |
| 207 | .ssthresh = tcp_veno_ssthresh, | 207 | .ssthresh = tcp_veno_ssthresh, |
diff --git a/net/ipv4/tcp_westwood.c b/net/ipv4/tcp_westwood.c index a534dda5456e..1b91bf48e277 100644 --- a/net/ipv4/tcp_westwood.c +++ b/net/ipv4/tcp_westwood.c | |||
| @@ -272,7 +272,7 @@ static void tcp_westwood_info(struct sock *sk, u32 ext, | |||
| 272 | } | 272 | } |
| 273 | 273 | ||
| 274 | 274 | ||
| 275 | static struct tcp_congestion_ops tcp_westwood = { | 275 | static struct tcp_congestion_ops tcp_westwood __read_mostly = { |
| 276 | .init = tcp_westwood_init, | 276 | .init = tcp_westwood_init, |
| 277 | .ssthresh = tcp_reno_ssthresh, | 277 | .ssthresh = tcp_reno_ssthresh, |
| 278 | .cong_avoid = tcp_reno_cong_avoid, | 278 | .cong_avoid = tcp_reno_cong_avoid, |
diff --git a/net/ipv4/tcp_yeah.c b/net/ipv4/tcp_yeah.c index a0f240358892..dc7f43179c9a 100644 --- a/net/ipv4/tcp_yeah.c +++ b/net/ipv4/tcp_yeah.c | |||
| @@ -225,7 +225,7 @@ static u32 tcp_yeah_ssthresh(struct sock *sk) { | |||
| 225 | return tp->snd_cwnd - reduction; | 225 | return tp->snd_cwnd - reduction; |
| 226 | } | 226 | } |
| 227 | 227 | ||
| 228 | static struct tcp_congestion_ops tcp_yeah = { | 228 | static struct tcp_congestion_ops tcp_yeah __read_mostly = { |
| 229 | .flags = TCP_CONG_RTT_STAMP, | 229 | .flags = TCP_CONG_RTT_STAMP, |
| 230 | .init = tcp_yeah_init, | 230 | .init = tcp_yeah_init, |
| 231 | .ssthresh = tcp_yeah_ssthresh, | 231 | .ssthresh = tcp_yeah_ssthresh, |
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 8157b17959ee..588f47af5faf 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c | |||
| @@ -663,75 +663,72 @@ void udp_flush_pending_frames(struct sock *sk) | |||
| 663 | EXPORT_SYMBOL(udp_flush_pending_frames); | 663 | EXPORT_SYMBOL(udp_flush_pending_frames); |
| 664 | 664 | ||
| 665 | /** | 665 | /** |
| 666 | * udp4_hwcsum_outgoing - handle outgoing HW checksumming | 666 | * udp4_hwcsum - handle outgoing HW checksumming |
| 667 | * @sk: socket we are sending on | ||
| 668 | * @skb: sk_buff containing the filled-in UDP header | 667 | * @skb: sk_buff containing the filled-in UDP header |
| 669 | * (checksum field must be zeroed out) | 668 | * (checksum field must be zeroed out) |
| 669 | * @src: source IP address | ||
| 670 | * @dst: destination IP address | ||
| 670 | */ | 671 | */ |
| 671 | static void udp4_hwcsum_outgoing(struct sock *sk, struct sk_buff *skb, | 672 | static void udp4_hwcsum(struct sk_buff *skb, __be32 src, __be32 dst) |
| 672 | __be32 src, __be32 dst, int len) | ||
| 673 | { | 673 | { |
| 674 | unsigned int offset; | ||
| 675 | struct udphdr *uh = udp_hdr(skb); | 674 | struct udphdr *uh = udp_hdr(skb); |
| 675 | struct sk_buff *frags = skb_shinfo(skb)->frag_list; | ||
| 676 | int offset = skb_transport_offset(skb); | ||
| 677 | int len = skb->len - offset; | ||
| 678 | int hlen = len; | ||
| 676 | __wsum csum = 0; | 679 | __wsum csum = 0; |
| 677 | 680 | ||
| 678 | if (skb_queue_len(&sk->sk_write_queue) == 1) { | 681 | if (!frags) { |
| 679 | /* | 682 | /* |
| 680 | * Only one fragment on the socket. | 683 | * Only one fragment on the socket. |
| 681 | */ | 684 | */ |
| 682 | skb->csum_start = skb_transport_header(skb) - skb->head; | 685 | skb->csum_start = skb_transport_header(skb) - skb->head; |
| 683 | skb->csum_offset = offsetof(struct udphdr, check); | 686 | skb->csum_offset = offsetof(struct udphdr, check); |
| 684 | uh->check = ~csum_tcpudp_magic(src, dst, len, IPPROTO_UDP, 0); | 687 | uh->check = ~csum_tcpudp_magic(src, dst, len, |
| 688 | IPPROTO_UDP, 0); | ||
| 685 | } else { | 689 | } else { |
| 686 | /* | 690 | /* |
| 687 | * HW-checksum won't work as there are two or more | 691 | * HW-checksum won't work as there are two or more |
| 688 | * fragments on the socket so that all csums of sk_buffs | 692 | * fragments on the socket so that all csums of sk_buffs |
| 689 | * should be together | 693 | * should be together |
| 690 | */ | 694 | */ |
| 691 | offset = skb_transport_offset(skb); | 695 | do { |
| 692 | skb->csum = skb_checksum(skb, offset, skb->len - offset, 0); | 696 | csum = csum_add(csum, frags->csum); |
| 697 | hlen -= frags->len; | ||
| 698 | } while ((frags = frags->next)); | ||
| 693 | 699 | ||
| 700 | csum = skb_checksum(skb, offset, hlen, csum); | ||
| 694 | skb->ip_summed = CHECKSUM_NONE; | 701 | skb->ip_summed = CHECKSUM_NONE; |
| 695 | 702 | ||
| 696 | skb_queue_walk(&sk->sk_write_queue, skb) { | ||
| 697 | csum = csum_add(csum, skb->csum); | ||
| 698 | } | ||
| 699 | |||
| 700 | uh->check = csum_tcpudp_magic(src, dst, len, IPPROTO_UDP, csum); | 703 | uh->check = csum_tcpudp_magic(src, dst, len, IPPROTO_UDP, csum); |
| 701 | if (uh->check == 0) | 704 | if (uh->check == 0) |
| 702 | uh->check = CSUM_MANGLED_0; | 705 | uh->check = CSUM_MANGLED_0; |
| 703 | } | 706 | } |
| 704 | } | 707 | } |
| 705 | 708 | ||
| 706 | /* | 709 | static int udp_send_skb(struct sk_buff *skb, __be32 daddr, __be32 dport) |
| 707 | * Push out all pending data as one UDP datagram. Socket is locked. | ||
| 708 | */ | ||
| 709 | static int udp_push_pending_frames(struct sock *sk) | ||
| 710 | { | 710 | { |
| 711 | struct udp_sock *up = udp_sk(sk); | 711 | struct sock *sk = skb->sk; |
| 712 | struct inet_sock *inet = inet_sk(sk); | 712 | struct inet_sock *inet = inet_sk(sk); |
| 713 | struct flowi *fl = &inet->cork.fl; | ||
| 714 | struct sk_buff *skb; | ||
| 715 | struct udphdr *uh; | 713 | struct udphdr *uh; |
| 714 | struct rtable *rt = (struct rtable *)skb_dst(skb); | ||
| 716 | int err = 0; | 715 | int err = 0; |
| 717 | int is_udplite = IS_UDPLITE(sk); | 716 | int is_udplite = IS_UDPLITE(sk); |
| 717 | int offset = skb_transport_offset(skb); | ||
| 718 | int len = skb->len - offset; | ||
| 718 | __wsum csum = 0; | 719 | __wsum csum = 0; |
| 719 | 720 | ||
| 720 | /* Grab the skbuff where UDP header space exists. */ | ||
| 721 | if ((skb = skb_peek(&sk->sk_write_queue)) == NULL) | ||
| 722 | goto out; | ||
| 723 | |||
| 724 | /* | 721 | /* |
| 725 | * Create a UDP header | 722 | * Create a UDP header |
| 726 | */ | 723 | */ |
| 727 | uh = udp_hdr(skb); | 724 | uh = udp_hdr(skb); |
| 728 | uh->source = fl->fl_ip_sport; | 725 | uh->source = inet->inet_sport; |
| 729 | uh->dest = fl->fl_ip_dport; | 726 | uh->dest = dport; |
| 730 | uh->len = htons(up->len); | 727 | uh->len = htons(len); |
| 731 | uh->check = 0; | 728 | uh->check = 0; |
| 732 | 729 | ||
| 733 | if (is_udplite) /* UDP-Lite */ | 730 | if (is_udplite) /* UDP-Lite */ |
| 734 | csum = udplite_csum_outgoing(sk, skb); | 731 | csum = udplite_csum(skb); |
| 735 | 732 | ||
| 736 | else if (sk->sk_no_check == UDP_CSUM_NOXMIT) { /* UDP csum disabled */ | 733 | else if (sk->sk_no_check == UDP_CSUM_NOXMIT) { /* UDP csum disabled */ |
| 737 | 734 | ||
| @@ -740,20 +737,20 @@ static int udp_push_pending_frames(struct sock *sk) | |||
| 740 | 737 | ||
| 741 | } else if (skb->ip_summed == CHECKSUM_PARTIAL) { /* UDP hardware csum */ | 738 | } else if (skb->ip_summed == CHECKSUM_PARTIAL) { /* UDP hardware csum */ |
| 742 | 739 | ||
| 743 | udp4_hwcsum_outgoing(sk, skb, fl->fl4_src, fl->fl4_dst, up->len); | 740 | udp4_hwcsum(skb, rt->rt_src, daddr); |
| 744 | goto send; | 741 | goto send; |
| 745 | 742 | ||
| 746 | } else /* `normal' UDP */ | 743 | } else |
| 747 | csum = udp_csum_outgoing(sk, skb); | 744 | csum = udp_csum(skb); |
| 748 | 745 | ||
| 749 | /* add protocol-dependent pseudo-header */ | 746 | /* add protocol-dependent pseudo-header */ |
| 750 | uh->check = csum_tcpudp_magic(fl->fl4_src, fl->fl4_dst, up->len, | 747 | uh->check = csum_tcpudp_magic(rt->rt_src, daddr, len, |
| 751 | sk->sk_protocol, csum); | 748 | sk->sk_protocol, csum); |
| 752 | if (uh->check == 0) | 749 | if (uh->check == 0) |
| 753 | uh->check = CSUM_MANGLED_0; | 750 | uh->check = CSUM_MANGLED_0; |
| 754 | 751 | ||
| 755 | send: | 752 | send: |
| 756 | err = ip_push_pending_frames(sk); | 753 | err = ip_send_skb(skb); |
| 757 | if (err) { | 754 | if (err) { |
| 758 | if (err == -ENOBUFS && !inet->recverr) { | 755 | if (err == -ENOBUFS && !inet->recverr) { |
| 759 | UDP_INC_STATS_USER(sock_net(sk), | 756 | UDP_INC_STATS_USER(sock_net(sk), |
| @@ -763,6 +760,26 @@ send: | |||
| 763 | } else | 760 | } else |
| 764 | UDP_INC_STATS_USER(sock_net(sk), | 761 | UDP_INC_STATS_USER(sock_net(sk), |
| 765 | UDP_MIB_OUTDATAGRAMS, is_udplite); | 762 | UDP_MIB_OUTDATAGRAMS, is_udplite); |
| 763 | return err; | ||
| 764 | } | ||
| 765 | |||
| 766 | /* | ||
| 767 | * Push out all pending data as one UDP datagram. Socket is locked. | ||
| 768 | */ | ||
| 769 | static int udp_push_pending_frames(struct sock *sk) | ||
| 770 | { | ||
| 771 | struct udp_sock *up = udp_sk(sk); | ||
| 772 | struct inet_sock *inet = inet_sk(sk); | ||
| 773 | struct flowi4 *fl4 = &inet->cork.fl.u.ip4; | ||
| 774 | struct sk_buff *skb; | ||
| 775 | int err = 0; | ||
| 776 | |||
| 777 | skb = ip_finish_skb(sk); | ||
| 778 | if (!skb) | ||
| 779 | goto out; | ||
| 780 | |||
| 781 | err = udp_send_skb(skb, fl4->daddr, fl4->fl4_dport); | ||
| 782 | |||
| 766 | out: | 783 | out: |
| 767 | up->len = 0; | 784 | up->len = 0; |
| 768 | up->pending = 0; | 785 | up->pending = 0; |
| @@ -774,6 +791,7 @@ int udp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, | |||
| 774 | { | 791 | { |
| 775 | struct inet_sock *inet = inet_sk(sk); | 792 | struct inet_sock *inet = inet_sk(sk); |
| 776 | struct udp_sock *up = udp_sk(sk); | 793 | struct udp_sock *up = udp_sk(sk); |
| 794 | struct flowi4 *fl4; | ||
| 777 | int ulen = len; | 795 | int ulen = len; |
| 778 | struct ipcm_cookie ipc; | 796 | struct ipcm_cookie ipc; |
| 779 | struct rtable *rt = NULL; | 797 | struct rtable *rt = NULL; |
| @@ -785,6 +803,7 @@ int udp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, | |||
| 785 | int err, is_udplite = IS_UDPLITE(sk); | 803 | int err, is_udplite = IS_UDPLITE(sk); |
| 786 | int corkreq = up->corkflag || msg->msg_flags&MSG_MORE; | 804 | int corkreq = up->corkflag || msg->msg_flags&MSG_MORE; |
| 787 | int (*getfrag)(void *, char *, int, int, int, struct sk_buff *); | 805 | int (*getfrag)(void *, char *, int, int, int, struct sk_buff *); |
| 806 | struct sk_buff *skb; | ||
| 788 | 807 | ||
| 789 | if (len > 0xFFFF) | 808 | if (len > 0xFFFF) |
| 790 | return -EMSGSIZE; | 809 | return -EMSGSIZE; |
| @@ -799,6 +818,8 @@ int udp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, | |||
| 799 | ipc.opt = NULL; | 818 | ipc.opt = NULL; |
| 800 | ipc.tx_flags = 0; | 819 | ipc.tx_flags = 0; |
| 801 | 820 | ||
| 821 | getfrag = is_udplite ? udplite_getfrag : ip_generic_getfrag; | ||
| 822 | |||
| 802 | if (up->pending) { | 823 | if (up->pending) { |
| 803 | /* | 824 | /* |
| 804 | * There are pending frames. | 825 | * There are pending frames. |
| @@ -888,20 +909,25 @@ int udp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, | |||
| 888 | rt = (struct rtable *)sk_dst_check(sk, 0); | 909 | rt = (struct rtable *)sk_dst_check(sk, 0); |
| 889 | 910 | ||
| 890 | if (rt == NULL) { | 911 | if (rt == NULL) { |
| 891 | struct flowi fl = { .oif = ipc.oif, | 912 | struct flowi4 fl4 = { |
| 892 | .mark = sk->sk_mark, | 913 | .flowi4_oif = ipc.oif, |
| 893 | .fl4_dst = faddr, | 914 | .flowi4_mark = sk->sk_mark, |
| 894 | .fl4_src = saddr, | 915 | .daddr = faddr, |
| 895 | .fl4_tos = tos, | 916 | .saddr = saddr, |
| 896 | .proto = sk->sk_protocol, | 917 | .flowi4_tos = tos, |
| 897 | .flags = inet_sk_flowi_flags(sk), | 918 | .flowi4_proto = sk->sk_protocol, |
| 898 | .fl_ip_sport = inet->inet_sport, | 919 | .flowi4_flags = (inet_sk_flowi_flags(sk) | |
| 899 | .fl_ip_dport = dport }; | 920 | FLOWI_FLAG_CAN_SLEEP), |
| 921 | .fl4_sport = inet->inet_sport, | ||
| 922 | .fl4_dport = dport, | ||
| 923 | }; | ||
| 900 | struct net *net = sock_net(sk); | 924 | struct net *net = sock_net(sk); |
| 901 | 925 | ||
| 902 | security_sk_classify_flow(sk, &fl); | 926 | security_sk_classify_flow(sk, flowi4_to_flowi(&fl4)); |
| 903 | err = ip_route_output_flow(net, &rt, &fl, sk, 1); | 927 | rt = ip_route_output_flow(net, &fl4, sk); |
| 904 | if (err) { | 928 | if (IS_ERR(rt)) { |
| 929 | err = PTR_ERR(rt); | ||
| 930 | rt = NULL; | ||
| 905 | if (err == -ENETUNREACH) | 931 | if (err == -ENETUNREACH) |
| 906 | IP_INC_STATS_BH(net, IPSTATS_MIB_OUTNOROUTES); | 932 | IP_INC_STATS_BH(net, IPSTATS_MIB_OUTNOROUTES); |
| 907 | goto out; | 933 | goto out; |
| @@ -923,6 +949,17 @@ back_from_confirm: | |||
| 923 | if (!ipc.addr) | 949 | if (!ipc.addr) |
| 924 | daddr = ipc.addr = rt->rt_dst; | 950 | daddr = ipc.addr = rt->rt_dst; |
| 925 | 951 | ||
| 952 | /* Lockless fast path for the non-corking case. */ | ||
| 953 | if (!corkreq) { | ||
| 954 | skb = ip_make_skb(sk, getfrag, msg->msg_iov, ulen, | ||
| 955 | sizeof(struct udphdr), &ipc, &rt, | ||
| 956 | msg->msg_flags); | ||
| 957 | err = PTR_ERR(skb); | ||
| 958 | if (skb && !IS_ERR(skb)) | ||
| 959 | err = udp_send_skb(skb, daddr, dport); | ||
| 960 | goto out; | ||
| 961 | } | ||
| 962 | |||
| 926 | lock_sock(sk); | 963 | lock_sock(sk); |
| 927 | if (unlikely(up->pending)) { | 964 | if (unlikely(up->pending)) { |
| 928 | /* The socket is already corked while preparing it. */ | 965 | /* The socket is already corked while preparing it. */ |
| @@ -936,15 +973,15 @@ back_from_confirm: | |||
| 936 | /* | 973 | /* |
| 937 | * Now cork the socket to pend data. | 974 | * Now cork the socket to pend data. |
| 938 | */ | 975 | */ |
| 939 | inet->cork.fl.fl4_dst = daddr; | 976 | fl4 = &inet->cork.fl.u.ip4; |
| 940 | inet->cork.fl.fl_ip_dport = dport; | 977 | fl4->daddr = daddr; |
| 941 | inet->cork.fl.fl4_src = saddr; | 978 | fl4->saddr = saddr; |
| 942 | inet->cork.fl.fl_ip_sport = inet->inet_sport; | 979 | fl4->fl4_dport = dport; |
| 980 | fl4->fl4_sport = inet->inet_sport; | ||
| 943 | up->pending = AF_INET; | 981 | up->pending = AF_INET; |
| 944 | 982 | ||
| 945 | do_append_data: | 983 | do_append_data: |
| 946 | up->len += ulen; | 984 | up->len += ulen; |
| 947 | getfrag = is_udplite ? udplite_getfrag : ip_generic_getfrag; | ||
| 948 | err = ip_append_data(sk, getfrag, msg->msg_iov, ulen, | 985 | err = ip_append_data(sk, getfrag, msg->msg_iov, ulen, |
| 949 | sizeof(struct udphdr), &ipc, &rt, | 986 | sizeof(struct udphdr), &ipc, &rt, |
| 950 | corkreq ? msg->msg_flags|MSG_MORE : msg->msg_flags); | 987 | corkreq ? msg->msg_flags|MSG_MORE : msg->msg_flags); |
| @@ -2199,7 +2236,7 @@ int udp4_ufo_send_check(struct sk_buff *skb) | |||
| 2199 | return 0; | 2236 | return 0; |
| 2200 | } | 2237 | } |
| 2201 | 2238 | ||
| 2202 | struct sk_buff *udp4_ufo_fragment(struct sk_buff *skb, int features) | 2239 | struct sk_buff *udp4_ufo_fragment(struct sk_buff *skb, u32 features) |
| 2203 | { | 2240 | { |
| 2204 | struct sk_buff *segs = ERR_PTR(-EINVAL); | 2241 | struct sk_buff *segs = ERR_PTR(-EINVAL); |
| 2205 | unsigned int mss; | 2242 | unsigned int mss; |
diff --git a/net/ipv4/xfrm4_policy.c b/net/ipv4/xfrm4_policy.c index b057d40addec..13e0e7f659ff 100644 --- a/net/ipv4/xfrm4_policy.c +++ b/net/ipv4/xfrm4_policy.c | |||
| @@ -19,25 +19,23 @@ | |||
| 19 | static struct xfrm_policy_afinfo xfrm4_policy_afinfo; | 19 | static struct xfrm_policy_afinfo xfrm4_policy_afinfo; |
| 20 | 20 | ||
| 21 | static struct dst_entry *xfrm4_dst_lookup(struct net *net, int tos, | 21 | static struct dst_entry *xfrm4_dst_lookup(struct net *net, int tos, |
| 22 | xfrm_address_t *saddr, | 22 | const xfrm_address_t *saddr, |
| 23 | xfrm_address_t *daddr) | 23 | const xfrm_address_t *daddr) |
| 24 | { | 24 | { |
| 25 | struct flowi fl = { | 25 | struct flowi4 fl4 = { |
| 26 | .fl4_dst = daddr->a4, | 26 | .daddr = daddr->a4, |
| 27 | .fl4_tos = tos, | 27 | .flowi4_tos = tos, |
| 28 | }; | 28 | }; |
| 29 | struct dst_entry *dst; | ||
| 30 | struct rtable *rt; | 29 | struct rtable *rt; |
| 31 | int err; | ||
| 32 | 30 | ||
| 33 | if (saddr) | 31 | if (saddr) |
| 34 | fl.fl4_src = saddr->a4; | 32 | fl4.saddr = saddr->a4; |
| 33 | |||
| 34 | rt = __ip_route_output_key(net, &fl4); | ||
| 35 | if (!IS_ERR(rt)) | ||
| 36 | return &rt->dst; | ||
| 35 | 37 | ||
| 36 | err = __ip_route_output_key(net, &rt, &fl); | 38 | return ERR_CAST(rt); |
| 37 | dst = &rt->dst; | ||
| 38 | if (err) | ||
| 39 | dst = ERR_PTR(err); | ||
| 40 | return dst; | ||
| 41 | } | 39 | } |
| 42 | 40 | ||
| 43 | static int xfrm4_get_saddr(struct net *net, | 41 | static int xfrm4_get_saddr(struct net *net, |
| @@ -56,9 +54,9 @@ static int xfrm4_get_saddr(struct net *net, | |||
| 56 | return 0; | 54 | return 0; |
| 57 | } | 55 | } |
| 58 | 56 | ||
| 59 | static int xfrm4_get_tos(struct flowi *fl) | 57 | static int xfrm4_get_tos(const struct flowi *fl) |
| 60 | { | 58 | { |
| 61 | return IPTOS_RT_MASK & fl->fl4_tos; /* Strip ECN bits */ | 59 | return IPTOS_RT_MASK & fl->u.ip4.flowi4_tos; /* Strip ECN bits */ |
| 62 | } | 60 | } |
| 63 | 61 | ||
| 64 | static int xfrm4_init_path(struct xfrm_dst *path, struct dst_entry *dst, | 62 | static int xfrm4_init_path(struct xfrm_dst *path, struct dst_entry *dst, |
| @@ -68,11 +66,17 @@ static int xfrm4_init_path(struct xfrm_dst *path, struct dst_entry *dst, | |||
| 68 | } | 66 | } |
| 69 | 67 | ||
| 70 | static int xfrm4_fill_dst(struct xfrm_dst *xdst, struct net_device *dev, | 68 | static int xfrm4_fill_dst(struct xfrm_dst *xdst, struct net_device *dev, |
| 71 | struct flowi *fl) | 69 | const struct flowi *fl) |
| 72 | { | 70 | { |
| 73 | struct rtable *rt = (struct rtable *)xdst->route; | 71 | struct rtable *rt = (struct rtable *)xdst->route; |
| 72 | const struct flowi4 *fl4 = &fl->u.ip4; | ||
| 74 | 73 | ||
| 75 | xdst->u.rt.fl = *fl; | 74 | rt->rt_key_dst = fl4->daddr; |
| 75 | rt->rt_key_src = fl4->saddr; | ||
| 76 | rt->rt_tos = fl4->flowi4_tos; | ||
| 77 | rt->rt_iif = fl4->flowi4_iif; | ||
| 78 | rt->rt_oif = fl4->flowi4_oif; | ||
| 79 | rt->rt_mark = fl4->flowi4_mark; | ||
| 76 | 80 | ||
| 77 | xdst->u.dst.dev = dev; | 81 | xdst->u.dst.dev = dev; |
| 78 | dev_hold(dev); | 82 | dev_hold(dev); |
| @@ -99,9 +103,10 @@ _decode_session4(struct sk_buff *skb, struct flowi *fl, int reverse) | |||
| 99 | { | 103 | { |
| 100 | struct iphdr *iph = ip_hdr(skb); | 104 | struct iphdr *iph = ip_hdr(skb); |
| 101 | u8 *xprth = skb_network_header(skb) + iph->ihl * 4; | 105 | u8 *xprth = skb_network_header(skb) + iph->ihl * 4; |
| 106 | struct flowi4 *fl4 = &fl->u.ip4; | ||
| 102 | 107 | ||
| 103 | memset(fl, 0, sizeof(struct flowi)); | 108 | memset(fl4, 0, sizeof(struct flowi4)); |
| 104 | fl->mark = skb->mark; | 109 | fl4->flowi4_mark = skb->mark; |
| 105 | 110 | ||
| 106 | if (!(iph->frag_off & htons(IP_MF | IP_OFFSET))) { | 111 | if (!(iph->frag_off & htons(IP_MF | IP_OFFSET))) { |
| 107 | switch (iph->protocol) { | 112 | switch (iph->protocol) { |
| @@ -114,8 +119,8 @@ _decode_session4(struct sk_buff *skb, struct flowi *fl, int reverse) | |||
| 114 | pskb_may_pull(skb, xprth + 4 - skb->data)) { | 119 | pskb_may_pull(skb, xprth + 4 - skb->data)) { |
| 115 | __be16 *ports = (__be16 *)xprth; | 120 | __be16 *ports = (__be16 *)xprth; |
| 116 | 121 | ||
| 117 | fl->fl_ip_sport = ports[!!reverse]; | 122 | fl4->fl4_sport = ports[!!reverse]; |
| 118 | fl->fl_ip_dport = ports[!reverse]; | 123 | fl4->fl4_dport = ports[!reverse]; |
| 119 | } | 124 | } |
| 120 | break; | 125 | break; |
| 121 | 126 | ||
| @@ -123,8 +128,8 @@ _decode_session4(struct sk_buff *skb, struct flowi *fl, int reverse) | |||
| 123 | if (pskb_may_pull(skb, xprth + 2 - skb->data)) { | 128 | if (pskb_may_pull(skb, xprth + 2 - skb->data)) { |
| 124 | u8 *icmp = xprth; | 129 | u8 *icmp = xprth; |
| 125 | 130 | ||
| 126 | fl->fl_icmp_type = icmp[0]; | 131 | fl4->fl4_icmp_type = icmp[0]; |
| 127 | fl->fl_icmp_code = icmp[1]; | 132 | fl4->fl4_icmp_code = icmp[1]; |
| 128 | } | 133 | } |
| 129 | break; | 134 | break; |
| 130 | 135 | ||
| @@ -132,7 +137,7 @@ _decode_session4(struct sk_buff *skb, struct flowi *fl, int reverse) | |||
| 132 | if (pskb_may_pull(skb, xprth + 4 - skb->data)) { | 137 | if (pskb_may_pull(skb, xprth + 4 - skb->data)) { |
| 133 | __be32 *ehdr = (__be32 *)xprth; | 138 | __be32 *ehdr = (__be32 *)xprth; |
| 134 | 139 | ||
| 135 | fl->fl_ipsec_spi = ehdr[0]; | 140 | fl4->fl4_ipsec_spi = ehdr[0]; |
| 136 | } | 141 | } |
| 137 | break; | 142 | break; |
| 138 | 143 | ||
| @@ -140,7 +145,7 @@ _decode_session4(struct sk_buff *skb, struct flowi *fl, int reverse) | |||
| 140 | if (pskb_may_pull(skb, xprth + 8 - skb->data)) { | 145 | if (pskb_may_pull(skb, xprth + 8 - skb->data)) { |
| 141 | __be32 *ah_hdr = (__be32*)xprth; | 146 | __be32 *ah_hdr = (__be32*)xprth; |
| 142 | 147 | ||
| 143 | fl->fl_ipsec_spi = ah_hdr[1]; | 148 | fl4->fl4_ipsec_spi = ah_hdr[1]; |
| 144 | } | 149 | } |
| 145 | break; | 150 | break; |
| 146 | 151 | ||
| @@ -148,7 +153,7 @@ _decode_session4(struct sk_buff *skb, struct flowi *fl, int reverse) | |||
| 148 | if (pskb_may_pull(skb, xprth + 4 - skb->data)) { | 153 | if (pskb_may_pull(skb, xprth + 4 - skb->data)) { |
| 149 | __be16 *ipcomp_hdr = (__be16 *)xprth; | 154 | __be16 *ipcomp_hdr = (__be16 *)xprth; |
| 150 | 155 | ||
| 151 | fl->fl_ipsec_spi = htonl(ntohs(ipcomp_hdr[1])); | 156 | fl4->fl4_ipsec_spi = htonl(ntohs(ipcomp_hdr[1])); |
| 152 | } | 157 | } |
| 153 | break; | 158 | break; |
| 154 | 159 | ||
| @@ -160,20 +165,20 @@ _decode_session4(struct sk_buff *skb, struct flowi *fl, int reverse) | |||
| 160 | if (greflags[0] & GRE_KEY) { | 165 | if (greflags[0] & GRE_KEY) { |
| 161 | if (greflags[0] & GRE_CSUM) | 166 | if (greflags[0] & GRE_CSUM) |
| 162 | gre_hdr++; | 167 | gre_hdr++; |
| 163 | fl->fl_gre_key = gre_hdr[1]; | 168 | fl4->fl4_gre_key = gre_hdr[1]; |
| 164 | } | 169 | } |
| 165 | } | 170 | } |
| 166 | break; | 171 | break; |
| 167 | 172 | ||
| 168 | default: | 173 | default: |
| 169 | fl->fl_ipsec_spi = 0; | 174 | fl4->fl4_ipsec_spi = 0; |
| 170 | break; | 175 | break; |
| 171 | } | 176 | } |
| 172 | } | 177 | } |
| 173 | fl->proto = iph->protocol; | 178 | fl4->flowi4_proto = iph->protocol; |
| 174 | fl->fl4_dst = reverse ? iph->saddr : iph->daddr; | 179 | fl4->daddr = reverse ? iph->saddr : iph->daddr; |
| 175 | fl->fl4_src = reverse ? iph->daddr : iph->saddr; | 180 | fl4->saddr = reverse ? iph->daddr : iph->saddr; |
| 176 | fl->fl4_tos = iph->tos; | 181 | fl4->flowi4_tos = iph->tos; |
| 177 | } | 182 | } |
| 178 | 183 | ||
| 179 | static inline int xfrm4_garbage_collect(struct dst_ops *ops) | 184 | static inline int xfrm4_garbage_collect(struct dst_ops *ops) |
| @@ -196,8 +201,11 @@ static void xfrm4_dst_destroy(struct dst_entry *dst) | |||
| 196 | { | 201 | { |
| 197 | struct xfrm_dst *xdst = (struct xfrm_dst *)dst; | 202 | struct xfrm_dst *xdst = (struct xfrm_dst *)dst; |
| 198 | 203 | ||
| 204 | dst_destroy_metrics_generic(dst); | ||
| 205 | |||
| 199 | if (likely(xdst->u.rt.peer)) | 206 | if (likely(xdst->u.rt.peer)) |
| 200 | inet_putpeer(xdst->u.rt.peer); | 207 | inet_putpeer(xdst->u.rt.peer); |
| 208 | |||
| 201 | xfrm_dst_destroy(xdst); | 209 | xfrm_dst_destroy(xdst); |
| 202 | } | 210 | } |
| 203 | 211 | ||
| @@ -215,6 +223,7 @@ static struct dst_ops xfrm4_dst_ops = { | |||
| 215 | .protocol = cpu_to_be16(ETH_P_IP), | 223 | .protocol = cpu_to_be16(ETH_P_IP), |
| 216 | .gc = xfrm4_garbage_collect, | 224 | .gc = xfrm4_garbage_collect, |
| 217 | .update_pmtu = xfrm4_update_pmtu, | 225 | .update_pmtu = xfrm4_update_pmtu, |
| 226 | .cow_metrics = dst_cow_metrics_generic, | ||
| 218 | .destroy = xfrm4_dst_destroy, | 227 | .destroy = xfrm4_dst_destroy, |
| 219 | .ifdown = xfrm4_dst_ifdown, | 228 | .ifdown = xfrm4_dst_ifdown, |
| 220 | .local_out = __ip_local_out, | 229 | .local_out = __ip_local_out, |
| @@ -230,6 +239,7 @@ static struct xfrm_policy_afinfo xfrm4_policy_afinfo = { | |||
| 230 | .get_tos = xfrm4_get_tos, | 239 | .get_tos = xfrm4_get_tos, |
| 231 | .init_path = xfrm4_init_path, | 240 | .init_path = xfrm4_init_path, |
| 232 | .fill_dst = xfrm4_fill_dst, | 241 | .fill_dst = xfrm4_fill_dst, |
| 242 | .blackhole_route = ipv4_blackhole_route, | ||
| 233 | }; | 243 | }; |
| 234 | 244 | ||
| 235 | #ifdef CONFIG_SYSCTL | 245 | #ifdef CONFIG_SYSCTL |
diff --git a/net/ipv4/xfrm4_state.c b/net/ipv4/xfrm4_state.c index 47947624eccc..1717c64628d1 100644 --- a/net/ipv4/xfrm4_state.c +++ b/net/ipv4/xfrm4_state.c | |||
| @@ -21,24 +21,26 @@ static int xfrm4_init_flags(struct xfrm_state *x) | |||
| 21 | } | 21 | } |
| 22 | 22 | ||
| 23 | static void | 23 | static void |
| 24 | __xfrm4_init_tempsel(struct xfrm_selector *sel, struct flowi *fl) | 24 | __xfrm4_init_tempsel(struct xfrm_selector *sel, const struct flowi *fl) |
| 25 | { | 25 | { |
| 26 | sel->daddr.a4 = fl->fl4_dst; | 26 | const struct flowi4 *fl4 = &fl->u.ip4; |
| 27 | sel->saddr.a4 = fl->fl4_src; | 27 | |
| 28 | sel->dport = xfrm_flowi_dport(fl); | 28 | sel->daddr.a4 = fl4->daddr; |
| 29 | sel->saddr.a4 = fl4->saddr; | ||
| 30 | sel->dport = xfrm_flowi_dport(fl, &fl4->uli); | ||
| 29 | sel->dport_mask = htons(0xffff); | 31 | sel->dport_mask = htons(0xffff); |
| 30 | sel->sport = xfrm_flowi_sport(fl); | 32 | sel->sport = xfrm_flowi_sport(fl, &fl4->uli); |
| 31 | sel->sport_mask = htons(0xffff); | 33 | sel->sport_mask = htons(0xffff); |
| 32 | sel->family = AF_INET; | 34 | sel->family = AF_INET; |
| 33 | sel->prefixlen_d = 32; | 35 | sel->prefixlen_d = 32; |
| 34 | sel->prefixlen_s = 32; | 36 | sel->prefixlen_s = 32; |
| 35 | sel->proto = fl->proto; | 37 | sel->proto = fl4->flowi4_proto; |
| 36 | sel->ifindex = fl->oif; | 38 | sel->ifindex = fl4->flowi4_oif; |
| 37 | } | 39 | } |
| 38 | 40 | ||
| 39 | static void | 41 | static void |
| 40 | xfrm4_init_temprop(struct xfrm_state *x, struct xfrm_tmpl *tmpl, | 42 | xfrm4_init_temprop(struct xfrm_state *x, const struct xfrm_tmpl *tmpl, |
| 41 | xfrm_address_t *daddr, xfrm_address_t *saddr) | 43 | const xfrm_address_t *daddr, const xfrm_address_t *saddr) |
| 42 | { | 44 | { |
| 43 | x->id = tmpl->id; | 45 | x->id = tmpl->id; |
| 44 | if (x->id.daddr.a4 == 0) | 46 | if (x->id.daddr.a4 == 0) |
