aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorDavid S. Miller <davem@davemloft.net>2011-04-26 16:28:44 -0400
committerDavid S. Miller <davem@davemloft.net>2011-04-27 16:59:04 -0400
commit2d7192d6cbab20e153c47fa1559ffd41ceef0e79 (patch)
treeaac4c4132f5b4a173ad8f8d0bf24427e039bbc89
parent15ecd039b7182d725f4294e01f2fb12c3a88db17 (diff)
ipv4: Sanitize and simplify ip_route_{connect,newports}()
These functions are used together as a unit for route resolution during connect(). They address the chicken-and-egg problem that exists when ports need to be allocated during connect() processing, yet such port allocations require addressing information from the routing code. It's currently more heavy handed than it needs to be, and in particular we allocate and initialize a flow object twice. Let the callers provide the on-stack flow object. That way we only need to initialize it once in the ip_route_connect() call. Later, if ip_route_newports() needs to do anything, it re-uses that flow object as-is except for the ports which it updates before the route re-lookup. Also, describe why this set of facilities are needed and how it works in a big comment. Signed-off-by: David S. Miller <davem@davemloft.net> Reviewed-by: Eric Dumazet <eric.dumazet@gmail.com>
-rw-r--r--include/net/route.h88
-rw-r--r--net/dccp/ipv4.c10
-rw-r--r--net/ipv4/af_inet.c3
-rw-r--r--net/ipv4/datagram.c3
-rw-r--r--net/ipv4/tcp_ipv4.c10
-rw-r--r--net/l2tp/l2tp_ip.c8
6 files changed, 74 insertions, 48 deletions
diff --git a/include/net/route.h b/include/net/route.h
index 3684c3edbae4..79530da31b34 100644
--- a/include/net/route.h
+++ b/include/net/route.h
@@ -217,17 +217,37 @@ static inline char rt_tos2priority(u8 tos)
217 return ip_tos2prio[IPTOS_TOS(tos)>>1]; 217 return ip_tos2prio[IPTOS_TOS(tos)>>1];
218} 218}
219 219
220static inline struct rtable *ip_route_connect(__be32 dst, __be32 src, u32 tos, 220/* ip_route_connect() and ip_route_newports() work in tandem whilst
221 int oif, u8 protocol, 221 * binding a socket for a new outgoing connection.
222 __be16 sport, __be16 dport, 222 *
223 struct sock *sk, bool can_sleep) 223 * In order to use IPSEC properly, we must, in the end, have a
224 * route that was looked up using all available keys including source
225 * and destination ports.
226 *
227 * However, if a source port needs to be allocated (the user specified
228 * a wildcard source port) we need to obtain addressing information
229 * in order to perform that allocation.
230 *
231 * So ip_route_connect() looks up a route using wildcarded source and
232 * destination ports in the key, simply so that we can get a pair of
233 * addresses to use for port allocation.
234 *
235 * Later, once the ports are allocated, ip_route_newports() will make
236 * another route lookup if needed to make sure we catch any IPSEC
237 * rules keyed on the port information.
238 *
239 * The callers allocate the flow key on their stack, and must pass in
240 * the same flowi4 object to both the ip_route_connect() and the
241 * ip_route_newports() calls.
242 */
243
244static inline void ip_route_connect_init(struct flowi4 *fl4, __be32 dst, __be32 src,
245 u32 tos, int oif, u8 protocol,
246 __be16 sport, __be16 dport,
247 struct sock *sk, bool can_sleep)
224{ 248{
225 struct net *net = sock_net(sk); 249 __u8 flow_flags = 0;
226 struct rtable *rt;
227 struct flowi4 fl4;
228 __u8 flow_flags;
229 250
230 flow_flags = 0;
231 if (inet_sk(sk)->transparent) 251 if (inet_sk(sk)->transparent)
232 flow_flags |= FLOWI_FLAG_ANYSRC; 252 flow_flags |= FLOWI_FLAG_ANYSRC;
233 if (protocol == IPPROTO_TCP) 253 if (protocol == IPPROTO_TCP)
@@ -235,41 +255,45 @@ static inline struct rtable *ip_route_connect(__be32 dst, __be32 src, u32 tos,
235 if (can_sleep) 255 if (can_sleep)
236 flow_flags |= FLOWI_FLAG_CAN_SLEEP; 256 flow_flags |= FLOWI_FLAG_CAN_SLEEP;
237 257
238 flowi4_init_output(&fl4, oif, sk->sk_mark, tos, RT_SCOPE_UNIVERSE, 258 flowi4_init_output(fl4, oif, sk->sk_mark, tos, RT_SCOPE_UNIVERSE,
239 protocol, flow_flags, dst, src, dport, sport); 259 protocol, flow_flags, dst, src, dport, sport);
260}
261
262static inline struct rtable *ip_route_connect(struct flowi4 *fl4,
263 __be32 dst, __be32 src, u32 tos,
264 int oif, u8 protocol,
265 __be16 sport, __be16 dport,
266 struct sock *sk, bool can_sleep)
267{
268 struct net *net = sock_net(sk);
269 struct rtable *rt;
270
271 ip_route_connect_init(fl4, dst, src, tos, oif, protocol,
272 sport, dport, sk, can_sleep);
240 273
241 if (!dst || !src) { 274 if (!dst || !src) {
242 rt = __ip_route_output_key(net, &fl4); 275 rt = __ip_route_output_key(net, fl4);
243 if (IS_ERR(rt)) 276 if (IS_ERR(rt))
244 return rt; 277 return rt;
245 fl4.daddr = rt->rt_dst; 278 fl4->daddr = rt->rt_dst;
246 fl4.saddr = rt->rt_src; 279 fl4->saddr = rt->rt_src;
247 ip_rt_put(rt); 280 ip_rt_put(rt);
248 } 281 }
249 security_sk_classify_flow(sk, flowi4_to_flowi(&fl4)); 282 security_sk_classify_flow(sk, flowi4_to_flowi(fl4));
250 return ip_route_output_flow(net, &fl4, sk); 283 return ip_route_output_flow(net, fl4, sk);
251} 284}
252 285
253static inline struct rtable *ip_route_newports(struct rtable *rt, 286static inline struct rtable *ip_route_newports(struct flowi4 *fl4, struct rtable *rt,
254 u8 protocol, __be16 orig_sport, 287 __be16 orig_sport, __be16 orig_dport,
255 __be16 orig_dport, __be16 sport, 288 __be16 sport, __be16 dport,
256 __be16 dport, struct sock *sk) 289 struct sock *sk)
257{ 290{
258 if (sport != orig_sport || dport != orig_dport) { 291 if (sport != orig_sport || dport != orig_dport) {
259 struct flowi4 fl4; 292 fl4->fl4_dport = dport;
260 __u8 flow_flags; 293 fl4->fl4_sport = sport;
261
262 flow_flags = 0;
263 if (inet_sk(sk)->transparent)
264 flow_flags |= FLOWI_FLAG_ANYSRC;
265 if (protocol == IPPROTO_TCP)
266 flow_flags |= FLOWI_FLAG_PRECOW_METRICS;
267 flowi4_init_output(&fl4, rt->rt_oif, rt->rt_mark, rt->rt_tos,
268 RT_SCOPE_UNIVERSE, protocol, flow_flags,
269 rt->rt_dst, rt->rt_src, dport, sport);
270 ip_rt_put(rt); 294 ip_rt_put(rt);
271 security_sk_classify_flow(sk, flowi4_to_flowi(&fl4)); 295 security_sk_classify_flow(sk, flowi4_to_flowi(fl4));
272 return ip_route_output_flow(sock_net(sk), &fl4, sk); 296 return ip_route_output_flow(sock_net(sk), fl4, sk);
273 } 297 }
274 return rt; 298 return rt;
275} 299}
diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c
index ae451c6d83ba..b92ab655d44e 100644
--- a/net/dccp/ipv4.c
+++ b/net/dccp/ipv4.c
@@ -40,12 +40,13 @@
40 40
41int dccp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) 41int dccp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
42{ 42{
43 const struct sockaddr_in *usin = (struct sockaddr_in *)uaddr;
43 struct inet_sock *inet = inet_sk(sk); 44 struct inet_sock *inet = inet_sk(sk);
44 struct dccp_sock *dp = dccp_sk(sk); 45 struct dccp_sock *dp = dccp_sk(sk);
45 const struct sockaddr_in *usin = (struct sockaddr_in *)uaddr;
46 __be16 orig_sport, orig_dport; 46 __be16 orig_sport, orig_dport;
47 struct rtable *rt;
48 __be32 daddr, nexthop; 47 __be32 daddr, nexthop;
48 struct flowi4 fl4;
49 struct rtable *rt;
49 int err; 50 int err;
50 51
51 dp->dccps_role = DCCP_ROLE_CLIENT; 52 dp->dccps_role = DCCP_ROLE_CLIENT;
@@ -65,7 +66,7 @@ int dccp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
65 66
66 orig_sport = inet->inet_sport; 67 orig_sport = inet->inet_sport;
67 orig_dport = usin->sin_port; 68 orig_dport = usin->sin_port;
68 rt = ip_route_connect(nexthop, inet->inet_saddr, 69 rt = ip_route_connect(&fl4, nexthop, inet->inet_saddr,
69 RT_CONN_FLAGS(sk), sk->sk_bound_dev_if, 70 RT_CONN_FLAGS(sk), sk->sk_bound_dev_if,
70 IPPROTO_DCCP, 71 IPPROTO_DCCP,
71 orig_sport, orig_dport, sk, true); 72 orig_sport, orig_dport, sk, true);
@@ -101,8 +102,7 @@ int dccp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
101 if (err != 0) 102 if (err != 0)
102 goto failure; 103 goto failure;
103 104
104 rt = ip_route_newports(rt, IPPROTO_DCCP, 105 rt = ip_route_newports(&fl4, rt, orig_sport, orig_dport,
105 orig_sport, orig_dport,
106 inet->inet_sport, inet->inet_dport, sk); 106 inet->inet_sport, inet->inet_dport, sk);
107 if (IS_ERR(rt)) { 107 if (IS_ERR(rt)) {
108 rt = NULL; 108 rt = NULL;
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index cae75ef21fea..0413af3e2285 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -1103,6 +1103,7 @@ static int inet_sk_reselect_saddr(struct sock *sk)
1103 struct inet_sock *inet = inet_sk(sk); 1103 struct inet_sock *inet = inet_sk(sk);
1104 __be32 old_saddr = inet->inet_saddr; 1104 __be32 old_saddr = inet->inet_saddr;
1105 __be32 daddr = inet->inet_daddr; 1105 __be32 daddr = inet->inet_daddr;
1106 struct flowi4 fl4;
1106 struct rtable *rt; 1107 struct rtable *rt;
1107 __be32 new_saddr; 1108 __be32 new_saddr;
1108 1109
@@ -1110,7 +1111,7 @@ static int inet_sk_reselect_saddr(struct sock *sk)
1110 daddr = inet->opt->faddr; 1111 daddr = inet->opt->faddr;
1111 1112
1112 /* Query new route. */ 1113 /* Query new route. */
1113 rt = ip_route_connect(daddr, 0, RT_CONN_FLAGS(sk), 1114 rt = ip_route_connect(&fl4, daddr, 0, RT_CONN_FLAGS(sk),
1114 sk->sk_bound_dev_if, sk->sk_protocol, 1115 sk->sk_bound_dev_if, sk->sk_protocol,
1115 inet->inet_sport, inet->inet_dport, sk, false); 1116 inet->inet_sport, inet->inet_dport, sk, false);
1116 if (IS_ERR(rt)) 1117 if (IS_ERR(rt))
diff --git a/net/ipv4/datagram.c b/net/ipv4/datagram.c
index 85bd24ca4f6d..216ba2338b64 100644
--- a/net/ipv4/datagram.c
+++ b/net/ipv4/datagram.c
@@ -24,6 +24,7 @@ int ip4_datagram_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
24{ 24{
25 struct inet_sock *inet = inet_sk(sk); 25 struct inet_sock *inet = inet_sk(sk);
26 struct sockaddr_in *usin = (struct sockaddr_in *) uaddr; 26 struct sockaddr_in *usin = (struct sockaddr_in *) uaddr;
27 struct flowi4 fl4;
27 struct rtable *rt; 28 struct rtable *rt;
28 __be32 saddr; 29 __be32 saddr;
29 int oif; 30 int oif;
@@ -46,7 +47,7 @@ int ip4_datagram_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
46 if (!saddr) 47 if (!saddr)
47 saddr = inet->mc_addr; 48 saddr = inet->mc_addr;
48 } 49 }
49 rt = ip_route_connect(usin->sin_addr.s_addr, saddr, 50 rt = ip_route_connect(&fl4, usin->sin_addr.s_addr, saddr,
50 RT_CONN_FLAGS(sk), oif, 51 RT_CONN_FLAGS(sk), oif,
51 sk->sk_protocol, 52 sk->sk_protocol,
52 inet->inet_sport, usin->sin_port, sk, true); 53 inet->inet_sport, usin->sin_port, sk, true);
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index edf18bd74b87..310454c2f4d1 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -146,12 +146,13 @@ EXPORT_SYMBOL_GPL(tcp_twsk_unique);
146/* This will initiate an outgoing connection. */ 146/* This will initiate an outgoing connection. */
147int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) 147int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
148{ 148{
149 struct sockaddr_in *usin = (struct sockaddr_in *)uaddr;
149 struct inet_sock *inet = inet_sk(sk); 150 struct inet_sock *inet = inet_sk(sk);
150 struct tcp_sock *tp = tcp_sk(sk); 151 struct tcp_sock *tp = tcp_sk(sk);
151 struct sockaddr_in *usin = (struct sockaddr_in *)uaddr;
152 __be16 orig_sport, orig_dport; 152 __be16 orig_sport, orig_dport;
153 struct rtable *rt;
154 __be32 daddr, nexthop; 153 __be32 daddr, nexthop;
154 struct flowi4 fl4;
155 struct rtable *rt;
155 int err; 156 int err;
156 157
157 if (addr_len < sizeof(struct sockaddr_in)) 158 if (addr_len < sizeof(struct sockaddr_in))
@@ -169,7 +170,7 @@ int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
169 170
170 orig_sport = inet->inet_sport; 171 orig_sport = inet->inet_sport;
171 orig_dport = usin->sin_port; 172 orig_dport = usin->sin_port;
172 rt = ip_route_connect(nexthop, inet->inet_saddr, 173 rt = ip_route_connect(&fl4, nexthop, inet->inet_saddr,
173 RT_CONN_FLAGS(sk), sk->sk_bound_dev_if, 174 RT_CONN_FLAGS(sk), sk->sk_bound_dev_if,
174 IPPROTO_TCP, 175 IPPROTO_TCP,
175 orig_sport, orig_dport, sk, true); 176 orig_sport, orig_dport, sk, true);
@@ -236,8 +237,7 @@ int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
236 if (err) 237 if (err)
237 goto failure; 238 goto failure;
238 239
239 rt = ip_route_newports(rt, IPPROTO_TCP, 240 rt = ip_route_newports(&fl4, rt, orig_sport, orig_dport,
240 orig_sport, orig_dport,
241 inet->inet_sport, inet->inet_dport, sk); 241 inet->inet_sport, inet->inet_dport, sk);
242 if (IS_ERR(rt)) { 242 if (IS_ERR(rt)) {
243 err = PTR_ERR(rt); 243 err = PTR_ERR(rt);
diff --git a/net/l2tp/l2tp_ip.c b/net/l2tp/l2tp_ip.c
index fce9bd3bd3fe..cc673677c5de 100644
--- a/net/l2tp/l2tp_ip.c
+++ b/net/l2tp/l2tp_ip.c
@@ -296,12 +296,12 @@ out_in_use:
296 296
297static int l2tp_ip_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) 297static int l2tp_ip_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
298{ 298{
299 int rc;
300 struct inet_sock *inet = inet_sk(sk);
301 struct sockaddr_l2tpip *lsa = (struct sockaddr_l2tpip *) uaddr; 299 struct sockaddr_l2tpip *lsa = (struct sockaddr_l2tpip *) uaddr;
300 struct inet_sock *inet = inet_sk(sk);
301 struct flowi4 fl4;
302 struct rtable *rt; 302 struct rtable *rt;
303 __be32 saddr; 303 __be32 saddr;
304 int oif; 304 int oif, rc;
305 305
306 rc = -EINVAL; 306 rc = -EINVAL;
307 if (addr_len < sizeof(*lsa)) 307 if (addr_len < sizeof(*lsa))
@@ -320,7 +320,7 @@ static int l2tp_ip_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len
320 if (ipv4_is_multicast(lsa->l2tp_addr.s_addr)) 320 if (ipv4_is_multicast(lsa->l2tp_addr.s_addr))
321 goto out; 321 goto out;
322 322
323 rt = ip_route_connect(lsa->l2tp_addr.s_addr, saddr, 323 rt = ip_route_connect(&fl4, lsa->l2tp_addr.s_addr, saddr,
324 RT_CONN_FLAGS(sk), oif, 324 RT_CONN_FLAGS(sk), oif,
325 IPPROTO_L2TP, 325 IPPROTO_L2TP,
326 0, 0, sk, true); 326 0, 0, sk, true);