aboutsummaryrefslogtreecommitdiffstats
path: root/net/ipv4/ip_output.c
diff options
context:
space:
mode:
Diffstat (limited to 'net/ipv4/ip_output.c')
-rw-r--r--net/ipv4/ip_output.c93
1 files changed, 50 insertions, 43 deletions
diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
index 451f97c42eb4..ba39a52d18c1 100644
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -113,19 +113,6 @@ int ip_local_out(struct sk_buff *skb)
113} 113}
114EXPORT_SYMBOL_GPL(ip_local_out); 114EXPORT_SYMBOL_GPL(ip_local_out);
115 115
116/* dev_loopback_xmit for use with netfilter. */
117static int ip_dev_loopback_xmit(struct sk_buff *newskb)
118{
119 skb_reset_mac_header(newskb);
120 __skb_pull(newskb, skb_network_offset(newskb));
121 newskb->pkt_type = PACKET_LOOPBACK;
122 newskb->ip_summed = CHECKSUM_UNNECESSARY;
123 WARN_ON(!skb_dst(newskb));
124 skb_dst_force(newskb);
125 netif_rx_ni(newskb);
126 return 0;
127}
128
129static inline int ip_select_ttl(struct inet_sock *inet, struct dst_entry *dst) 116static inline int ip_select_ttl(struct inet_sock *inet, struct dst_entry *dst)
130{ 117{
131 int ttl = inet->uc_ttl; 118 int ttl = inet->uc_ttl;
@@ -183,6 +170,7 @@ static inline int ip_finish_output2(struct sk_buff *skb)
183 struct net_device *dev = dst->dev; 170 struct net_device *dev = dst->dev;
184 unsigned int hh_len = LL_RESERVED_SPACE(dev); 171 unsigned int hh_len = LL_RESERVED_SPACE(dev);
185 struct neighbour *neigh; 172 struct neighbour *neigh;
173 u32 nexthop;
186 174
187 if (rt->rt_type == RTN_MULTICAST) { 175 if (rt->rt_type == RTN_MULTICAST) {
188 IP_UPD_PO_STATS(dev_net(dev), IPSTATS_MIB_OUTMCAST, skb->len); 176 IP_UPD_PO_STATS(dev_net(dev), IPSTATS_MIB_OUTMCAST, skb->len);
@@ -200,19 +188,22 @@ static inline int ip_finish_output2(struct sk_buff *skb)
200 } 188 }
201 if (skb->sk) 189 if (skb->sk)
202 skb_set_owner_w(skb2, skb->sk); 190 skb_set_owner_w(skb2, skb->sk);
203 kfree_skb(skb); 191 consume_skb(skb);
204 skb = skb2; 192 skb = skb2;
205 } 193 }
206 194
207 rcu_read_lock(); 195 rcu_read_lock_bh();
208 neigh = dst_get_neighbour_noref(dst); 196 nexthop = rt->rt_gateway ? rt->rt_gateway : ip_hdr(skb)->daddr;
197 neigh = __ipv4_neigh_lookup_noref(dev, nexthop);
198 if (unlikely(!neigh))
199 neigh = __neigh_create(&arp_tbl, &nexthop, dev, false);
209 if (neigh) { 200 if (neigh) {
210 int res = neigh_output(neigh, skb); 201 int res = dst_neigh_output(dst, neigh, skb);
211 202
212 rcu_read_unlock(); 203 rcu_read_unlock_bh();
213 return res; 204 return res;
214 } 205 }
215 rcu_read_unlock(); 206 rcu_read_unlock_bh();
216 207
217 net_dbg_ratelimited("%s: No header cache and no neighbour!\n", 208 net_dbg_ratelimited("%s: No header cache and no neighbour!\n",
218 __func__); 209 __func__);
@@ -281,7 +272,7 @@ int ip_mc_output(struct sk_buff *skb)
281 if (newskb) 272 if (newskb)
282 NF_HOOK(NFPROTO_IPV4, NF_INET_POST_ROUTING, 273 NF_HOOK(NFPROTO_IPV4, NF_INET_POST_ROUTING,
283 newskb, NULL, newskb->dev, 274 newskb, NULL, newskb->dev,
284 ip_dev_loopback_xmit); 275 dev_loopback_xmit);
285 } 276 }
286 277
287 /* Multicasts with ttl 0 must not go beyond the host */ 278 /* Multicasts with ttl 0 must not go beyond the host */
@@ -296,7 +287,7 @@ int ip_mc_output(struct sk_buff *skb)
296 struct sk_buff *newskb = skb_clone(skb, GFP_ATOMIC); 287 struct sk_buff *newskb = skb_clone(skb, GFP_ATOMIC);
297 if (newskb) 288 if (newskb)
298 NF_HOOK(NFPROTO_IPV4, NF_INET_POST_ROUTING, newskb, 289 NF_HOOK(NFPROTO_IPV4, NF_INET_POST_ROUTING, newskb,
299 NULL, newskb->dev, ip_dev_loopback_xmit); 290 NULL, newskb->dev, dev_loopback_xmit);
300 } 291 }
301 292
302 return NF_HOOK_COND(NFPROTO_IPV4, NF_INET_POST_ROUTING, skb, NULL, 293 return NF_HOOK_COND(NFPROTO_IPV4, NF_INET_POST_ROUTING, skb, NULL,
@@ -380,7 +371,7 @@ int ip_queue_xmit(struct sk_buff *skb, struct flowi *fl)
380 skb_dst_set_noref(skb, &rt->dst); 371 skb_dst_set_noref(skb, &rt->dst);
381 372
382packet_routed: 373packet_routed:
383 if (inet_opt && inet_opt->opt.is_strictroute && fl4->daddr != rt->rt_gateway) 374 if (inet_opt && inet_opt->opt.is_strictroute && rt->rt_gateway)
384 goto no_route; 375 goto no_route;
385 376
386 /* OK, we know where to send it, allocate and build IP header. */ 377 /* OK, we know where to send it, allocate and build IP header. */
@@ -709,7 +700,7 @@ slow_path:
709 700
710 IP_INC_STATS(dev_net(dev), IPSTATS_MIB_FRAGCREATES); 701 IP_INC_STATS(dev_net(dev), IPSTATS_MIB_FRAGCREATES);
711 } 702 }
712 kfree_skb(skb); 703 consume_skb(skb);
713 IP_INC_STATS(dev_net(dev), IPSTATS_MIB_FRAGOKS); 704 IP_INC_STATS(dev_net(dev), IPSTATS_MIB_FRAGOKS);
714 return err; 705 return err;
715 706
@@ -1472,19 +1463,34 @@ static int ip_reply_glue_bits(void *dptr, char *to, int offset,
1472 1463
1473/* 1464/*
1474 * Generic function to send a packet as reply to another packet. 1465 * Generic function to send a packet as reply to another packet.
1475 * Used to send TCP resets so far. ICMP should use this function too. 1466 * Used to send some TCP resets/acks so far.
1476 * 1467 *
1477 * Should run single threaded per socket because it uses the sock 1468 * Use a fake percpu inet socket to avoid false sharing and contention.
1478 * structure to pass arguments.
1479 */ 1469 */
1480void ip_send_reply(struct sock *sk, struct sk_buff *skb, __be32 daddr, 1470static DEFINE_PER_CPU(struct inet_sock, unicast_sock) = {
1481 const struct ip_reply_arg *arg, unsigned int len) 1471 .sk = {
1472 .__sk_common = {
1473 .skc_refcnt = ATOMIC_INIT(1),
1474 },
1475 .sk_wmem_alloc = ATOMIC_INIT(1),
1476 .sk_allocation = GFP_ATOMIC,
1477 .sk_flags = (1UL << SOCK_USE_WRITE_QUEUE),
1478 },
1479 .pmtudisc = IP_PMTUDISC_WANT,
1480 .uc_ttl = -1,
1481};
1482
1483void ip_send_unicast_reply(struct net *net, struct sk_buff *skb, __be32 daddr,
1484 __be32 saddr, const struct ip_reply_arg *arg,
1485 unsigned int len)
1482{ 1486{
1483 struct inet_sock *inet = inet_sk(sk);
1484 struct ip_options_data replyopts; 1487 struct ip_options_data replyopts;
1485 struct ipcm_cookie ipc; 1488 struct ipcm_cookie ipc;
1486 struct flowi4 fl4; 1489 struct flowi4 fl4;
1487 struct rtable *rt = skb_rtable(skb); 1490 struct rtable *rt = skb_rtable(skb);
1491 struct sk_buff *nskb;
1492 struct sock *sk;
1493 struct inet_sock *inet;
1488 1494
1489 if (ip_options_echo(&replyopts.opt.opt, skb)) 1495 if (ip_options_echo(&replyopts.opt.opt, skb))
1490 return; 1496 return;
@@ -1502,38 +1508,39 @@ void ip_send_reply(struct sock *sk, struct sk_buff *skb, __be32 daddr,
1502 1508
1503 flowi4_init_output(&fl4, arg->bound_dev_if, 0, 1509 flowi4_init_output(&fl4, arg->bound_dev_if, 0,
1504 RT_TOS(arg->tos), 1510 RT_TOS(arg->tos),
1505 RT_SCOPE_UNIVERSE, sk->sk_protocol, 1511 RT_SCOPE_UNIVERSE, ip_hdr(skb)->protocol,
1506 ip_reply_arg_flowi_flags(arg), 1512 ip_reply_arg_flowi_flags(arg),
1507 daddr, rt->rt_spec_dst, 1513 daddr, saddr,
1508 tcp_hdr(skb)->source, tcp_hdr(skb)->dest); 1514 tcp_hdr(skb)->source, tcp_hdr(skb)->dest);
1509 security_skb_classify_flow(skb, flowi4_to_flowi(&fl4)); 1515 security_skb_classify_flow(skb, flowi4_to_flowi(&fl4));
1510 rt = ip_route_output_key(sock_net(sk), &fl4); 1516 rt = ip_route_output_key(net, &fl4);
1511 if (IS_ERR(rt)) 1517 if (IS_ERR(rt))
1512 return; 1518 return;
1513 1519
1514 /* And let IP do all the hard work. 1520 inet = &get_cpu_var(unicast_sock);
1515 1521
1516 This chunk is not reenterable, hence spinlock.
1517 Note that it uses the fact, that this function is called
1518 with locally disabled BH and that sk cannot be already spinlocked.
1519 */
1520 bh_lock_sock(sk);
1521 inet->tos = arg->tos; 1522 inet->tos = arg->tos;
1523 sk = &inet->sk;
1522 sk->sk_priority = skb->priority; 1524 sk->sk_priority = skb->priority;
1523 sk->sk_protocol = ip_hdr(skb)->protocol; 1525 sk->sk_protocol = ip_hdr(skb)->protocol;
1524 sk->sk_bound_dev_if = arg->bound_dev_if; 1526 sk->sk_bound_dev_if = arg->bound_dev_if;
1527 sock_net_set(sk, net);
1528 __skb_queue_head_init(&sk->sk_write_queue);
1529 sk->sk_sndbuf = sysctl_wmem_default;
1525 ip_append_data(sk, &fl4, ip_reply_glue_bits, arg->iov->iov_base, len, 0, 1530 ip_append_data(sk, &fl4, ip_reply_glue_bits, arg->iov->iov_base, len, 0,
1526 &ipc, &rt, MSG_DONTWAIT); 1531 &ipc, &rt, MSG_DONTWAIT);
1527 if ((skb = skb_peek(&sk->sk_write_queue)) != NULL) { 1532 nskb = skb_peek(&sk->sk_write_queue);
1533 if (nskb) {
1528 if (arg->csumoffset >= 0) 1534 if (arg->csumoffset >= 0)
1529 *((__sum16 *)skb_transport_header(skb) + 1535 *((__sum16 *)skb_transport_header(nskb) +
1530 arg->csumoffset) = csum_fold(csum_add(skb->csum, 1536 arg->csumoffset) = csum_fold(csum_add(nskb->csum,
1531 arg->csum)); 1537 arg->csum));
1532 skb->ip_summed = CHECKSUM_NONE; 1538 nskb->ip_summed = CHECKSUM_NONE;
1539 skb_set_queue_mapping(nskb, skb_get_queue_mapping(skb));
1533 ip_push_pending_frames(sk, &fl4); 1540 ip_push_pending_frames(sk, &fl4);
1534 } 1541 }
1535 1542
1536 bh_unlock_sock(sk); 1543 put_cpu_var(unicast_sock);
1537 1544
1538 ip_rt_put(rt); 1545 ip_rt_put(rt);
1539} 1546}