diff options
Diffstat (limited to 'net/ipv4/ip_output.c')
| -rw-r--r-- | net/ipv4/ip_output.c | 93 |
1 files changed, 50 insertions, 43 deletions
diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index 451f97c42eb4..ba39a52d18c1 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c | |||
| @@ -113,19 +113,6 @@ int ip_local_out(struct sk_buff *skb) | |||
| 113 | } | 113 | } |
| 114 | EXPORT_SYMBOL_GPL(ip_local_out); | 114 | EXPORT_SYMBOL_GPL(ip_local_out); |
| 115 | 115 | ||
| 116 | /* dev_loopback_xmit for use with netfilter. */ | ||
| 117 | static int ip_dev_loopback_xmit(struct sk_buff *newskb) | ||
| 118 | { | ||
| 119 | skb_reset_mac_header(newskb); | ||
| 120 | __skb_pull(newskb, skb_network_offset(newskb)); | ||
| 121 | newskb->pkt_type = PACKET_LOOPBACK; | ||
| 122 | newskb->ip_summed = CHECKSUM_UNNECESSARY; | ||
| 123 | WARN_ON(!skb_dst(newskb)); | ||
| 124 | skb_dst_force(newskb); | ||
| 125 | netif_rx_ni(newskb); | ||
| 126 | return 0; | ||
| 127 | } | ||
| 128 | |||
| 129 | static inline int ip_select_ttl(struct inet_sock *inet, struct dst_entry *dst) | 116 | static inline int ip_select_ttl(struct inet_sock *inet, struct dst_entry *dst) |
| 130 | { | 117 | { |
| 131 | int ttl = inet->uc_ttl; | 118 | int ttl = inet->uc_ttl; |
| @@ -183,6 +170,7 @@ static inline int ip_finish_output2(struct sk_buff *skb) | |||
| 183 | struct net_device *dev = dst->dev; | 170 | struct net_device *dev = dst->dev; |
| 184 | unsigned int hh_len = LL_RESERVED_SPACE(dev); | 171 | unsigned int hh_len = LL_RESERVED_SPACE(dev); |
| 185 | struct neighbour *neigh; | 172 | struct neighbour *neigh; |
| 173 | u32 nexthop; | ||
| 186 | 174 | ||
| 187 | if (rt->rt_type == RTN_MULTICAST) { | 175 | if (rt->rt_type == RTN_MULTICAST) { |
| 188 | IP_UPD_PO_STATS(dev_net(dev), IPSTATS_MIB_OUTMCAST, skb->len); | 176 | IP_UPD_PO_STATS(dev_net(dev), IPSTATS_MIB_OUTMCAST, skb->len); |
| @@ -200,19 +188,22 @@ static inline int ip_finish_output2(struct sk_buff *skb) | |||
| 200 | } | 188 | } |
| 201 | if (skb->sk) | 189 | if (skb->sk) |
| 202 | skb_set_owner_w(skb2, skb->sk); | 190 | skb_set_owner_w(skb2, skb->sk); |
| 203 | kfree_skb(skb); | 191 | consume_skb(skb); |
| 204 | skb = skb2; | 192 | skb = skb2; |
| 205 | } | 193 | } |
| 206 | 194 | ||
| 207 | rcu_read_lock(); | 195 | rcu_read_lock_bh(); |
| 208 | neigh = dst_get_neighbour_noref(dst); | 196 | nexthop = rt->rt_gateway ? rt->rt_gateway : ip_hdr(skb)->daddr; |
| 197 | neigh = __ipv4_neigh_lookup_noref(dev, nexthop); | ||
| 198 | if (unlikely(!neigh)) | ||
| 199 | neigh = __neigh_create(&arp_tbl, &nexthop, dev, false); | ||
| 209 | if (neigh) { | 200 | if (neigh) { |
| 210 | int res = neigh_output(neigh, skb); | 201 | int res = dst_neigh_output(dst, neigh, skb); |
| 211 | 202 | ||
| 212 | rcu_read_unlock(); | 203 | rcu_read_unlock_bh(); |
| 213 | return res; | 204 | return res; |
| 214 | } | 205 | } |
| 215 | rcu_read_unlock(); | 206 | rcu_read_unlock_bh(); |
| 216 | 207 | ||
| 217 | net_dbg_ratelimited("%s: No header cache and no neighbour!\n", | 208 | net_dbg_ratelimited("%s: No header cache and no neighbour!\n", |
| 218 | __func__); | 209 | __func__); |
| @@ -281,7 +272,7 @@ int ip_mc_output(struct sk_buff *skb) | |||
| 281 | if (newskb) | 272 | if (newskb) |
| 282 | NF_HOOK(NFPROTO_IPV4, NF_INET_POST_ROUTING, | 273 | NF_HOOK(NFPROTO_IPV4, NF_INET_POST_ROUTING, |
| 283 | newskb, NULL, newskb->dev, | 274 | newskb, NULL, newskb->dev, |
| 284 | ip_dev_loopback_xmit); | 275 | dev_loopback_xmit); |
| 285 | } | 276 | } |
| 286 | 277 | ||
| 287 | /* Multicasts with ttl 0 must not go beyond the host */ | 278 | /* Multicasts with ttl 0 must not go beyond the host */ |
| @@ -296,7 +287,7 @@ int ip_mc_output(struct sk_buff *skb) | |||
| 296 | struct sk_buff *newskb = skb_clone(skb, GFP_ATOMIC); | 287 | struct sk_buff *newskb = skb_clone(skb, GFP_ATOMIC); |
| 297 | if (newskb) | 288 | if (newskb) |
| 298 | NF_HOOK(NFPROTO_IPV4, NF_INET_POST_ROUTING, newskb, | 289 | NF_HOOK(NFPROTO_IPV4, NF_INET_POST_ROUTING, newskb, |
| 299 | NULL, newskb->dev, ip_dev_loopback_xmit); | 290 | NULL, newskb->dev, dev_loopback_xmit); |
| 300 | } | 291 | } |
| 301 | 292 | ||
| 302 | return NF_HOOK_COND(NFPROTO_IPV4, NF_INET_POST_ROUTING, skb, NULL, | 293 | return NF_HOOK_COND(NFPROTO_IPV4, NF_INET_POST_ROUTING, skb, NULL, |
| @@ -380,7 +371,7 @@ int ip_queue_xmit(struct sk_buff *skb, struct flowi *fl) | |||
| 380 | skb_dst_set_noref(skb, &rt->dst); | 371 | skb_dst_set_noref(skb, &rt->dst); |
| 381 | 372 | ||
| 382 | packet_routed: | 373 | packet_routed: |
| 383 | if (inet_opt && inet_opt->opt.is_strictroute && fl4->daddr != rt->rt_gateway) | 374 | if (inet_opt && inet_opt->opt.is_strictroute && rt->rt_gateway) |
| 384 | goto no_route; | 375 | goto no_route; |
| 385 | 376 | ||
| 386 | /* OK, we know where to send it, allocate and build IP header. */ | 377 | /* OK, we know where to send it, allocate and build IP header. */ |
| @@ -709,7 +700,7 @@ slow_path: | |||
| 709 | 700 | ||
| 710 | IP_INC_STATS(dev_net(dev), IPSTATS_MIB_FRAGCREATES); | 701 | IP_INC_STATS(dev_net(dev), IPSTATS_MIB_FRAGCREATES); |
| 711 | } | 702 | } |
| 712 | kfree_skb(skb); | 703 | consume_skb(skb); |
| 713 | IP_INC_STATS(dev_net(dev), IPSTATS_MIB_FRAGOKS); | 704 | IP_INC_STATS(dev_net(dev), IPSTATS_MIB_FRAGOKS); |
| 714 | return err; | 705 | return err; |
| 715 | 706 | ||
| @@ -1472,19 +1463,34 @@ static int ip_reply_glue_bits(void *dptr, char *to, int offset, | |||
| 1472 | 1463 | ||
| 1473 | /* | 1464 | /* |
| 1474 | * Generic function to send a packet as reply to another packet. | 1465 | * Generic function to send a packet as reply to another packet. |
| 1475 | * Used to send TCP resets so far. ICMP should use this function too. | 1466 | * Used to send some TCP resets/acks so far. |
| 1476 | * | 1467 | * |
| 1477 | * Should run single threaded per socket because it uses the sock | 1468 | * Use a fake percpu inet socket to avoid false sharing and contention. |
| 1478 | * structure to pass arguments. | ||
| 1479 | */ | 1469 | */ |
| 1480 | void ip_send_reply(struct sock *sk, struct sk_buff *skb, __be32 daddr, | 1470 | static DEFINE_PER_CPU(struct inet_sock, unicast_sock) = { |
| 1481 | const struct ip_reply_arg *arg, unsigned int len) | 1471 | .sk = { |
| 1472 | .__sk_common = { | ||
| 1473 | .skc_refcnt = ATOMIC_INIT(1), | ||
| 1474 | }, | ||
| 1475 | .sk_wmem_alloc = ATOMIC_INIT(1), | ||
| 1476 | .sk_allocation = GFP_ATOMIC, | ||
| 1477 | .sk_flags = (1UL << SOCK_USE_WRITE_QUEUE), | ||
| 1478 | }, | ||
| 1479 | .pmtudisc = IP_PMTUDISC_WANT, | ||
| 1480 | .uc_ttl = -1, | ||
| 1481 | }; | ||
| 1482 | |||
| 1483 | void ip_send_unicast_reply(struct net *net, struct sk_buff *skb, __be32 daddr, | ||
| 1484 | __be32 saddr, const struct ip_reply_arg *arg, | ||
| 1485 | unsigned int len) | ||
| 1482 | { | 1486 | { |
| 1483 | struct inet_sock *inet = inet_sk(sk); | ||
| 1484 | struct ip_options_data replyopts; | 1487 | struct ip_options_data replyopts; |
| 1485 | struct ipcm_cookie ipc; | 1488 | struct ipcm_cookie ipc; |
| 1486 | struct flowi4 fl4; | 1489 | struct flowi4 fl4; |
| 1487 | struct rtable *rt = skb_rtable(skb); | 1490 | struct rtable *rt = skb_rtable(skb); |
| 1491 | struct sk_buff *nskb; | ||
| 1492 | struct sock *sk; | ||
| 1493 | struct inet_sock *inet; | ||
| 1488 | 1494 | ||
| 1489 | if (ip_options_echo(&replyopts.opt.opt, skb)) | 1495 | if (ip_options_echo(&replyopts.opt.opt, skb)) |
| 1490 | return; | 1496 | return; |
| @@ -1502,38 +1508,39 @@ void ip_send_reply(struct sock *sk, struct sk_buff *skb, __be32 daddr, | |||
| 1502 | 1508 | ||
| 1503 | flowi4_init_output(&fl4, arg->bound_dev_if, 0, | 1509 | flowi4_init_output(&fl4, arg->bound_dev_if, 0, |
| 1504 | RT_TOS(arg->tos), | 1510 | RT_TOS(arg->tos), |
| 1505 | RT_SCOPE_UNIVERSE, sk->sk_protocol, | 1511 | RT_SCOPE_UNIVERSE, ip_hdr(skb)->protocol, |
| 1506 | ip_reply_arg_flowi_flags(arg), | 1512 | ip_reply_arg_flowi_flags(arg), |
| 1507 | daddr, rt->rt_spec_dst, | 1513 | daddr, saddr, |
| 1508 | tcp_hdr(skb)->source, tcp_hdr(skb)->dest); | 1514 | tcp_hdr(skb)->source, tcp_hdr(skb)->dest); |
| 1509 | security_skb_classify_flow(skb, flowi4_to_flowi(&fl4)); | 1515 | security_skb_classify_flow(skb, flowi4_to_flowi(&fl4)); |
| 1510 | rt = ip_route_output_key(sock_net(sk), &fl4); | 1516 | rt = ip_route_output_key(net, &fl4); |
| 1511 | if (IS_ERR(rt)) | 1517 | if (IS_ERR(rt)) |
| 1512 | return; | 1518 | return; |
| 1513 | 1519 | ||
| 1514 | /* And let IP do all the hard work. | 1520 | inet = &get_cpu_var(unicast_sock); |
| 1515 | 1521 | ||
| 1516 | This chunk is not reenterable, hence spinlock. | ||
| 1517 | Note that it uses the fact, that this function is called | ||
| 1518 | with locally disabled BH and that sk cannot be already spinlocked. | ||
| 1519 | */ | ||
| 1520 | bh_lock_sock(sk); | ||
| 1521 | inet->tos = arg->tos; | 1522 | inet->tos = arg->tos; |
| 1523 | sk = &inet->sk; | ||
| 1522 | sk->sk_priority = skb->priority; | 1524 | sk->sk_priority = skb->priority; |
| 1523 | sk->sk_protocol = ip_hdr(skb)->protocol; | 1525 | sk->sk_protocol = ip_hdr(skb)->protocol; |
| 1524 | sk->sk_bound_dev_if = arg->bound_dev_if; | 1526 | sk->sk_bound_dev_if = arg->bound_dev_if; |
| 1527 | sock_net_set(sk, net); | ||
| 1528 | __skb_queue_head_init(&sk->sk_write_queue); | ||
| 1529 | sk->sk_sndbuf = sysctl_wmem_default; | ||
| 1525 | ip_append_data(sk, &fl4, ip_reply_glue_bits, arg->iov->iov_base, len, 0, | 1530 | ip_append_data(sk, &fl4, ip_reply_glue_bits, arg->iov->iov_base, len, 0, |
| 1526 | &ipc, &rt, MSG_DONTWAIT); | 1531 | &ipc, &rt, MSG_DONTWAIT); |
| 1527 | if ((skb = skb_peek(&sk->sk_write_queue)) != NULL) { | 1532 | nskb = skb_peek(&sk->sk_write_queue); |
| 1533 | if (nskb) { | ||
| 1528 | if (arg->csumoffset >= 0) | 1534 | if (arg->csumoffset >= 0) |
| 1529 | *((__sum16 *)skb_transport_header(skb) + | 1535 | *((__sum16 *)skb_transport_header(nskb) + |
| 1530 | arg->csumoffset) = csum_fold(csum_add(skb->csum, | 1536 | arg->csumoffset) = csum_fold(csum_add(nskb->csum, |
| 1531 | arg->csum)); | 1537 | arg->csum)); |
| 1532 | skb->ip_summed = CHECKSUM_NONE; | 1538 | nskb->ip_summed = CHECKSUM_NONE; |
| 1539 | skb_set_queue_mapping(nskb, skb_get_queue_mapping(skb)); | ||
| 1533 | ip_push_pending_frames(sk, &fl4); | 1540 | ip_push_pending_frames(sk, &fl4); |
| 1534 | } | 1541 | } |
| 1535 | 1542 | ||
| 1536 | bh_unlock_sock(sk); | 1543 | put_cpu_var(unicast_sock); |
| 1537 | 1544 | ||
| 1538 | ip_rt_put(rt); | 1545 | ip_rt_put(rt); |
| 1539 | } | 1546 | } |
