diff options
Diffstat (limited to 'net/ipv4')
| -rw-r--r-- | net/ipv4/af_inet.c | 7 | ||||
| -rw-r--r-- | net/ipv4/devinet.c | 3 | ||||
| -rw-r--r-- | net/ipv4/inet_fragment.c | 5 | ||||
| -rw-r--r-- | net/ipv4/ip_forward.c | 71 | ||||
| -rw-r--r-- | net/ipv4/ip_output.c | 3 | ||||
| -rw-r--r-- | net/ipv4/ip_tunnel.c | 82 | ||||
| -rw-r--r-- | net/ipv4/ip_tunnel_core.c | 47 | ||||
| -rw-r--r-- | net/ipv4/ipconfig.c | 2 | ||||
| -rw-r--r-- | net/ipv4/ipmr.c | 13 | ||||
| -rw-r--r-- | net/ipv4/netfilter/Kconfig | 5 | ||||
| -rw-r--r-- | net/ipv4/netfilter/Makefile | 1 | ||||
| -rw-r--r-- | net/ipv4/netfilter/nf_nat_h323.c | 5 | ||||
| -rw-r--r-- | net/ipv4/netfilter/nf_nat_snmp_basic.c | 4 | ||||
| -rw-r--r-- | net/ipv4/netfilter/nft_reject_ipv4.c | 75 | ||||
| -rw-r--r-- | net/ipv4/route.c | 13 | ||||
| -rw-r--r-- | net/ipv4/tcp.c | 10 | ||||
| -rw-r--r-- | net/ipv4/tcp_cong.c | 3 | ||||
| -rw-r--r-- | net/ipv4/tcp_input.c | 21 | ||||
| -rw-r--r-- | net/ipv4/tcp_output.c | 48 | ||||
| -rw-r--r-- | net/ipv4/udp_offload.c | 17 |
20 files changed, 312 insertions, 123 deletions
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index ecd2c3f245ce..19ab78aca547 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c | |||
| @@ -1296,8 +1296,11 @@ static struct sk_buff *inet_gso_segment(struct sk_buff *skb, | |||
| 1296 | 1296 | ||
| 1297 | segs = ERR_PTR(-EPROTONOSUPPORT); | 1297 | segs = ERR_PTR(-EPROTONOSUPPORT); |
| 1298 | 1298 | ||
| 1299 | /* Note : following gso_segment() might change skb->encapsulation */ | 1299 | if (skb->encapsulation && |
| 1300 | udpfrag = !skb->encapsulation && proto == IPPROTO_UDP; | 1300 | skb_shinfo(skb)->gso_type & (SKB_GSO_SIT|SKB_GSO_IPIP)) |
| 1301 | udpfrag = proto == IPPROTO_UDP && encap; | ||
| 1302 | else | ||
| 1303 | udpfrag = proto == IPPROTO_UDP && !skb->encapsulation; | ||
| 1301 | 1304 | ||
| 1302 | ops = rcu_dereference(inet_offloads[proto]); | 1305 | ops = rcu_dereference(inet_offloads[proto]); |
| 1303 | if (likely(ops && ops->callbacks.gso_segment)) | 1306 | if (likely(ops && ops->callbacks.gso_segment)) |
diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c index ac2dff3c2c1c..bdbf68bb2e2d 100644 --- a/net/ipv4/devinet.c +++ b/net/ipv4/devinet.c | |||
| @@ -1443,7 +1443,8 @@ static size_t inet_nlmsg_size(void) | |||
| 1443 | + nla_total_size(4) /* IFA_LOCAL */ | 1443 | + nla_total_size(4) /* IFA_LOCAL */ |
| 1444 | + nla_total_size(4) /* IFA_BROADCAST */ | 1444 | + nla_total_size(4) /* IFA_BROADCAST */ |
| 1445 | + nla_total_size(IFNAMSIZ) /* IFA_LABEL */ | 1445 | + nla_total_size(IFNAMSIZ) /* IFA_LABEL */ |
| 1446 | + nla_total_size(4); /* IFA_FLAGS */ | 1446 | + nla_total_size(4) /* IFA_FLAGS */ |
| 1447 | + nla_total_size(sizeof(struct ifa_cacheinfo)); /* IFA_CACHEINFO */ | ||
| 1447 | } | 1448 | } |
| 1448 | 1449 | ||
| 1449 | static inline u32 cstamp_delta(unsigned long cstamp) | 1450 | static inline u32 cstamp_delta(unsigned long cstamp) |
diff --git a/net/ipv4/inet_fragment.c b/net/ipv4/inet_fragment.c index bb075fc9a14f..3b01959bf4bb 100644 --- a/net/ipv4/inet_fragment.c +++ b/net/ipv4/inet_fragment.c | |||
| @@ -208,7 +208,7 @@ int inet_frag_evictor(struct netns_frags *nf, struct inet_frags *f, bool force) | |||
| 208 | } | 208 | } |
| 209 | 209 | ||
| 210 | work = frag_mem_limit(nf) - nf->low_thresh; | 210 | work = frag_mem_limit(nf) - nf->low_thresh; |
| 211 | while (work > 0) { | 211 | while (work > 0 || force) { |
| 212 | spin_lock(&nf->lru_lock); | 212 | spin_lock(&nf->lru_lock); |
| 213 | 213 | ||
| 214 | if (list_empty(&nf->lru_list)) { | 214 | if (list_empty(&nf->lru_list)) { |
| @@ -278,9 +278,10 @@ static struct inet_frag_queue *inet_frag_intern(struct netns_frags *nf, | |||
| 278 | 278 | ||
| 279 | atomic_inc(&qp->refcnt); | 279 | atomic_inc(&qp->refcnt); |
| 280 | hlist_add_head(&qp->list, &hb->chain); | 280 | hlist_add_head(&qp->list, &hb->chain); |
| 281 | inet_frag_lru_add(nf, qp); | ||
| 281 | spin_unlock(&hb->chain_lock); | 282 | spin_unlock(&hb->chain_lock); |
| 282 | read_unlock(&f->lock); | 283 | read_unlock(&f->lock); |
| 283 | inet_frag_lru_add(nf, qp); | 284 | |
| 284 | return qp; | 285 | return qp; |
| 285 | } | 286 | } |
| 286 | 287 | ||
diff --git a/net/ipv4/ip_forward.c b/net/ipv4/ip_forward.c index e9f1217a8afd..f3869c186d97 100644 --- a/net/ipv4/ip_forward.c +++ b/net/ipv4/ip_forward.c | |||
| @@ -39,6 +39,71 @@ | |||
| 39 | #include <net/route.h> | 39 | #include <net/route.h> |
| 40 | #include <net/xfrm.h> | 40 | #include <net/xfrm.h> |
| 41 | 41 | ||
| 42 | static bool ip_may_fragment(const struct sk_buff *skb) | ||
| 43 | { | ||
| 44 | return unlikely((ip_hdr(skb)->frag_off & htons(IP_DF)) == 0) || | ||
| 45 | !skb->local_df; | ||
| 46 | } | ||
| 47 | |||
| 48 | static bool ip_exceeds_mtu(const struct sk_buff *skb, unsigned int mtu) | ||
| 49 | { | ||
| 50 | if (skb->len <= mtu || skb->local_df) | ||
| 51 | return false; | ||
| 52 | |||
| 53 | if (skb_is_gso(skb) && skb_gso_network_seglen(skb) <= mtu) | ||
| 54 | return false; | ||
| 55 | |||
| 56 | return true; | ||
| 57 | } | ||
| 58 | |||
| 59 | static bool ip_gso_exceeds_dst_mtu(const struct sk_buff *skb) | ||
| 60 | { | ||
| 61 | unsigned int mtu; | ||
| 62 | |||
| 63 | if (skb->local_df || !skb_is_gso(skb)) | ||
| 64 | return false; | ||
| 65 | |||
| 66 | mtu = ip_dst_mtu_maybe_forward(skb_dst(skb), true); | ||
| 67 | |||
| 68 | /* if seglen > mtu, do software segmentation for IP fragmentation on | ||
| 69 | * output. DF bit cannot be set since ip_forward would have sent | ||
| 70 | * icmp error. | ||
| 71 | */ | ||
| 72 | return skb_gso_network_seglen(skb) > mtu; | ||
| 73 | } | ||
| 74 | |||
| 75 | /* called if GSO skb needs to be fragmented on forward */ | ||
| 76 | static int ip_forward_finish_gso(struct sk_buff *skb) | ||
| 77 | { | ||
| 78 | struct dst_entry *dst = skb_dst(skb); | ||
| 79 | netdev_features_t features; | ||
| 80 | struct sk_buff *segs; | ||
| 81 | int ret = 0; | ||
| 82 | |||
| 83 | features = netif_skb_dev_features(skb, dst->dev); | ||
| 84 | segs = skb_gso_segment(skb, features & ~NETIF_F_GSO_MASK); | ||
| 85 | if (IS_ERR(segs)) { | ||
| 86 | kfree_skb(skb); | ||
| 87 | return -ENOMEM; | ||
| 88 | } | ||
| 89 | |||
| 90 | consume_skb(skb); | ||
| 91 | |||
| 92 | do { | ||
| 93 | struct sk_buff *nskb = segs->next; | ||
| 94 | int err; | ||
| 95 | |||
| 96 | segs->next = NULL; | ||
| 97 | err = dst_output(segs); | ||
| 98 | |||
| 99 | if (err && ret == 0) | ||
| 100 | ret = err; | ||
| 101 | segs = nskb; | ||
| 102 | } while (segs); | ||
| 103 | |||
| 104 | return ret; | ||
| 105 | } | ||
| 106 | |||
| 42 | static int ip_forward_finish(struct sk_buff *skb) | 107 | static int ip_forward_finish(struct sk_buff *skb) |
| 43 | { | 108 | { |
| 44 | struct ip_options *opt = &(IPCB(skb)->opt); | 109 | struct ip_options *opt = &(IPCB(skb)->opt); |
| @@ -49,6 +114,9 @@ static int ip_forward_finish(struct sk_buff *skb) | |||
| 49 | if (unlikely(opt->optlen)) | 114 | if (unlikely(opt->optlen)) |
| 50 | ip_forward_options(skb); | 115 | ip_forward_options(skb); |
| 51 | 116 | ||
| 117 | if (ip_gso_exceeds_dst_mtu(skb)) | ||
| 118 | return ip_forward_finish_gso(skb); | ||
| 119 | |||
| 52 | return dst_output(skb); | 120 | return dst_output(skb); |
| 53 | } | 121 | } |
| 54 | 122 | ||
| @@ -91,8 +159,7 @@ int ip_forward(struct sk_buff *skb) | |||
| 91 | 159 | ||
| 92 | IPCB(skb)->flags |= IPSKB_FORWARDED; | 160 | IPCB(skb)->flags |= IPSKB_FORWARDED; |
| 93 | mtu = ip_dst_mtu_maybe_forward(&rt->dst, true); | 161 | mtu = ip_dst_mtu_maybe_forward(&rt->dst, true); |
| 94 | if (unlikely(skb->len > mtu && !skb_is_gso(skb) && | 162 | if (!ip_may_fragment(skb) && ip_exceeds_mtu(skb, mtu)) { |
| 95 | (ip_hdr(skb)->frag_off & htons(IP_DF))) && !skb->local_df) { | ||
| 96 | IP_INC_STATS(dev_net(rt->dst.dev), IPSTATS_MIB_FRAGFAILS); | 163 | IP_INC_STATS(dev_net(rt->dst.dev), IPSTATS_MIB_FRAGFAILS); |
| 97 | icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, | 164 | icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, |
| 98 | htonl(mtu)); | 165 | htonl(mtu)); |
diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index 8971780aec7c..73c6b63bba74 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c | |||
| @@ -422,9 +422,6 @@ static void ip_copy_metadata(struct sk_buff *to, struct sk_buff *from) | |||
| 422 | to->tc_index = from->tc_index; | 422 | to->tc_index = from->tc_index; |
| 423 | #endif | 423 | #endif |
| 424 | nf_copy(to, from); | 424 | nf_copy(to, from); |
| 425 | #if IS_ENABLED(CONFIG_NETFILTER_XT_TARGET_TRACE) | ||
| 426 | to->nf_trace = from->nf_trace; | ||
| 427 | #endif | ||
| 428 | #if defined(CONFIG_IP_VS) || defined(CONFIG_IP_VS_MODULE) | 425 | #if defined(CONFIG_IP_VS) || defined(CONFIG_IP_VS_MODULE) |
| 429 | to->ipvs_property = from->ipvs_property; | 426 | to->ipvs_property = from->ipvs_property; |
| 430 | #endif | 427 | #endif |
diff --git a/net/ipv4/ip_tunnel.c b/net/ipv4/ip_tunnel.c index bd28f386bd02..78a89e61925d 100644 --- a/net/ipv4/ip_tunnel.c +++ b/net/ipv4/ip_tunnel.c | |||
| @@ -93,83 +93,32 @@ static void tunnel_dst_reset(struct ip_tunnel *t) | |||
| 93 | tunnel_dst_set(t, NULL); | 93 | tunnel_dst_set(t, NULL); |
| 94 | } | 94 | } |
| 95 | 95 | ||
| 96 | static void tunnel_dst_reset_all(struct ip_tunnel *t) | 96 | void ip_tunnel_dst_reset_all(struct ip_tunnel *t) |
| 97 | { | 97 | { |
| 98 | int i; | 98 | int i; |
| 99 | 99 | ||
| 100 | for_each_possible_cpu(i) | 100 | for_each_possible_cpu(i) |
| 101 | __tunnel_dst_set(per_cpu_ptr(t->dst_cache, i), NULL); | 101 | __tunnel_dst_set(per_cpu_ptr(t->dst_cache, i), NULL); |
| 102 | } | 102 | } |
| 103 | EXPORT_SYMBOL(ip_tunnel_dst_reset_all); | ||
| 103 | 104 | ||
| 104 | static struct dst_entry *tunnel_dst_get(struct ip_tunnel *t) | 105 | static struct rtable *tunnel_rtable_get(struct ip_tunnel *t, u32 cookie) |
| 105 | { | 106 | { |
| 106 | struct dst_entry *dst; | 107 | struct dst_entry *dst; |
| 107 | 108 | ||
| 108 | rcu_read_lock(); | 109 | rcu_read_lock(); |
| 109 | dst = rcu_dereference(this_cpu_ptr(t->dst_cache)->dst); | 110 | dst = rcu_dereference(this_cpu_ptr(t->dst_cache)->dst); |
| 110 | if (dst) | 111 | if (dst) { |
| 112 | if (dst->obsolete && dst->ops->check(dst, cookie) == NULL) { | ||
| 113 | rcu_read_unlock(); | ||
| 114 | tunnel_dst_reset(t); | ||
| 115 | return NULL; | ||
| 116 | } | ||
| 111 | dst_hold(dst); | 117 | dst_hold(dst); |
| 112 | rcu_read_unlock(); | ||
| 113 | return dst; | ||
| 114 | } | ||
| 115 | |||
| 116 | static struct dst_entry *tunnel_dst_check(struct ip_tunnel *t, u32 cookie) | ||
| 117 | { | ||
| 118 | struct dst_entry *dst = tunnel_dst_get(t); | ||
| 119 | |||
| 120 | if (dst && dst->obsolete && dst->ops->check(dst, cookie) == NULL) { | ||
| 121 | tunnel_dst_reset(t); | ||
| 122 | return NULL; | ||
| 123 | } | ||
| 124 | |||
| 125 | return dst; | ||
| 126 | } | ||
| 127 | |||
| 128 | /* Often modified stats are per cpu, other are shared (netdev->stats) */ | ||
| 129 | struct rtnl_link_stats64 *ip_tunnel_get_stats64(struct net_device *dev, | ||
| 130 | struct rtnl_link_stats64 *tot) | ||
| 131 | { | ||
| 132 | int i; | ||
| 133 | |||
| 134 | for_each_possible_cpu(i) { | ||
| 135 | const struct pcpu_sw_netstats *tstats = | ||
| 136 | per_cpu_ptr(dev->tstats, i); | ||
| 137 | u64 rx_packets, rx_bytes, tx_packets, tx_bytes; | ||
| 138 | unsigned int start; | ||
| 139 | |||
| 140 | do { | ||
| 141 | start = u64_stats_fetch_begin_bh(&tstats->syncp); | ||
| 142 | rx_packets = tstats->rx_packets; | ||
| 143 | tx_packets = tstats->tx_packets; | ||
| 144 | rx_bytes = tstats->rx_bytes; | ||
| 145 | tx_bytes = tstats->tx_bytes; | ||
| 146 | } while (u64_stats_fetch_retry_bh(&tstats->syncp, start)); | ||
| 147 | |||
| 148 | tot->rx_packets += rx_packets; | ||
| 149 | tot->tx_packets += tx_packets; | ||
| 150 | tot->rx_bytes += rx_bytes; | ||
| 151 | tot->tx_bytes += tx_bytes; | ||
| 152 | } | 118 | } |
| 153 | 119 | rcu_read_unlock(); | |
| 154 | tot->multicast = dev->stats.multicast; | 120 | return (struct rtable *)dst; |
| 155 | |||
| 156 | tot->rx_crc_errors = dev->stats.rx_crc_errors; | ||
| 157 | tot->rx_fifo_errors = dev->stats.rx_fifo_errors; | ||
| 158 | tot->rx_length_errors = dev->stats.rx_length_errors; | ||
| 159 | tot->rx_frame_errors = dev->stats.rx_frame_errors; | ||
| 160 | tot->rx_errors = dev->stats.rx_errors; | ||
| 161 | |||
| 162 | tot->tx_fifo_errors = dev->stats.tx_fifo_errors; | ||
| 163 | tot->tx_carrier_errors = dev->stats.tx_carrier_errors; | ||
| 164 | tot->tx_dropped = dev->stats.tx_dropped; | ||
| 165 | tot->tx_aborted_errors = dev->stats.tx_aborted_errors; | ||
| 166 | tot->tx_errors = dev->stats.tx_errors; | ||
| 167 | |||
| 168 | tot->collisions = dev->stats.collisions; | ||
| 169 | |||
| 170 | return tot; | ||
| 171 | } | 121 | } |
| 172 | EXPORT_SYMBOL_GPL(ip_tunnel_get_stats64); | ||
| 173 | 122 | ||
| 174 | static bool ip_tunnel_key_match(const struct ip_tunnel_parm *p, | 123 | static bool ip_tunnel_key_match(const struct ip_tunnel_parm *p, |
| 175 | __be16 flags, __be32 key) | 124 | __be16 flags, __be32 key) |
| @@ -584,7 +533,7 @@ void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev, | |||
| 584 | struct flowi4 fl4; | 533 | struct flowi4 fl4; |
| 585 | u8 tos, ttl; | 534 | u8 tos, ttl; |
| 586 | __be16 df; | 535 | __be16 df; |
| 587 | struct rtable *rt = NULL; /* Route to the other host */ | 536 | struct rtable *rt; /* Route to the other host */ |
| 588 | unsigned int max_headroom; /* The extra header space needed */ | 537 | unsigned int max_headroom; /* The extra header space needed */ |
| 589 | __be32 dst; | 538 | __be32 dst; |
| 590 | int err; | 539 | int err; |
| @@ -657,8 +606,7 @@ void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev, | |||
| 657 | init_tunnel_flow(&fl4, protocol, dst, tnl_params->saddr, | 606 | init_tunnel_flow(&fl4, protocol, dst, tnl_params->saddr, |
| 658 | tunnel->parms.o_key, RT_TOS(tos), tunnel->parms.link); | 607 | tunnel->parms.o_key, RT_TOS(tos), tunnel->parms.link); |
| 659 | 608 | ||
| 660 | if (connected) | 609 | rt = connected ? tunnel_rtable_get(tunnel, 0) : NULL; |
| 661 | rt = (struct rtable *)tunnel_dst_check(tunnel, 0); | ||
| 662 | 610 | ||
| 663 | if (!rt) { | 611 | if (!rt) { |
| 664 | rt = ip_route_output_key(tunnel->net, &fl4); | 612 | rt = ip_route_output_key(tunnel->net, &fl4); |
| @@ -766,7 +714,7 @@ static void ip_tunnel_update(struct ip_tunnel_net *itn, | |||
| 766 | if (set_mtu) | 714 | if (set_mtu) |
| 767 | dev->mtu = mtu; | 715 | dev->mtu = mtu; |
| 768 | } | 716 | } |
| 769 | tunnel_dst_reset_all(t); | 717 | ip_tunnel_dst_reset_all(t); |
| 770 | netdev_state_change(dev); | 718 | netdev_state_change(dev); |
| 771 | } | 719 | } |
| 772 | 720 | ||
| @@ -1095,7 +1043,7 @@ void ip_tunnel_uninit(struct net_device *dev) | |||
| 1095 | if (itn->fb_tunnel_dev != dev) | 1043 | if (itn->fb_tunnel_dev != dev) |
| 1096 | ip_tunnel_del(netdev_priv(dev)); | 1044 | ip_tunnel_del(netdev_priv(dev)); |
| 1097 | 1045 | ||
| 1098 | tunnel_dst_reset_all(tunnel); | 1046 | ip_tunnel_dst_reset_all(tunnel); |
| 1099 | } | 1047 | } |
| 1100 | EXPORT_SYMBOL_GPL(ip_tunnel_uninit); | 1048 | EXPORT_SYMBOL_GPL(ip_tunnel_uninit); |
| 1101 | 1049 | ||
diff --git a/net/ipv4/ip_tunnel_core.c b/net/ipv4/ip_tunnel_core.c index 6156f4ef5e91..6f847dd56dbc 100644 --- a/net/ipv4/ip_tunnel_core.c +++ b/net/ipv4/ip_tunnel_core.c | |||
| @@ -108,7 +108,6 @@ int iptunnel_pull_header(struct sk_buff *skb, int hdr_len, __be16 inner_proto) | |||
| 108 | nf_reset(skb); | 108 | nf_reset(skb); |
| 109 | secpath_reset(skb); | 109 | secpath_reset(skb); |
| 110 | skb_clear_hash_if_not_l4(skb); | 110 | skb_clear_hash_if_not_l4(skb); |
| 111 | skb_dst_drop(skb); | ||
| 112 | skb->vlan_tci = 0; | 111 | skb->vlan_tci = 0; |
| 113 | skb_set_queue_mapping(skb, 0); | 112 | skb_set_queue_mapping(skb, 0); |
| 114 | skb->pkt_type = PACKET_HOST; | 113 | skb->pkt_type = PACKET_HOST; |
| @@ -148,3 +147,49 @@ error: | |||
| 148 | return ERR_PTR(err); | 147 | return ERR_PTR(err); |
| 149 | } | 148 | } |
| 150 | EXPORT_SYMBOL_GPL(iptunnel_handle_offloads); | 149 | EXPORT_SYMBOL_GPL(iptunnel_handle_offloads); |
| 150 | |||
| 151 | /* Often modified stats are per cpu, other are shared (netdev->stats) */ | ||
| 152 | struct rtnl_link_stats64 *ip_tunnel_get_stats64(struct net_device *dev, | ||
| 153 | struct rtnl_link_stats64 *tot) | ||
| 154 | { | ||
| 155 | int i; | ||
| 156 | |||
| 157 | for_each_possible_cpu(i) { | ||
| 158 | const struct pcpu_sw_netstats *tstats = | ||
| 159 | per_cpu_ptr(dev->tstats, i); | ||
| 160 | u64 rx_packets, rx_bytes, tx_packets, tx_bytes; | ||
| 161 | unsigned int start; | ||
| 162 | |||
| 163 | do { | ||
| 164 | start = u64_stats_fetch_begin_bh(&tstats->syncp); | ||
| 165 | rx_packets = tstats->rx_packets; | ||
| 166 | tx_packets = tstats->tx_packets; | ||
| 167 | rx_bytes = tstats->rx_bytes; | ||
| 168 | tx_bytes = tstats->tx_bytes; | ||
| 169 | } while (u64_stats_fetch_retry_bh(&tstats->syncp, start)); | ||
| 170 | |||
| 171 | tot->rx_packets += rx_packets; | ||
| 172 | tot->tx_packets += tx_packets; | ||
| 173 | tot->rx_bytes += rx_bytes; | ||
| 174 | tot->tx_bytes += tx_bytes; | ||
| 175 | } | ||
| 176 | |||
| 177 | tot->multicast = dev->stats.multicast; | ||
| 178 | |||
| 179 | tot->rx_crc_errors = dev->stats.rx_crc_errors; | ||
| 180 | tot->rx_fifo_errors = dev->stats.rx_fifo_errors; | ||
| 181 | tot->rx_length_errors = dev->stats.rx_length_errors; | ||
| 182 | tot->rx_frame_errors = dev->stats.rx_frame_errors; | ||
| 183 | tot->rx_errors = dev->stats.rx_errors; | ||
| 184 | |||
| 185 | tot->tx_fifo_errors = dev->stats.tx_fifo_errors; | ||
| 186 | tot->tx_carrier_errors = dev->stats.tx_carrier_errors; | ||
| 187 | tot->tx_dropped = dev->stats.tx_dropped; | ||
| 188 | tot->tx_aborted_errors = dev->stats.tx_aborted_errors; | ||
| 189 | tot->tx_errors = dev->stats.tx_errors; | ||
| 190 | |||
| 191 | tot->collisions = dev->stats.collisions; | ||
| 192 | |||
| 193 | return tot; | ||
| 194 | } | ||
| 195 | EXPORT_SYMBOL_GPL(ip_tunnel_get_stats64); | ||
diff --git a/net/ipv4/ipconfig.c b/net/ipv4/ipconfig.c index efa1138fa523..b3e86ea7b71b 100644 --- a/net/ipv4/ipconfig.c +++ b/net/ipv4/ipconfig.c | |||
| @@ -273,7 +273,7 @@ static int __init ic_open_devs(void) | |||
| 273 | 273 | ||
| 274 | msleep(1); | 274 | msleep(1); |
| 275 | 275 | ||
| 276 | if time_before(jiffies, next_msg) | 276 | if (time_before(jiffies, next_msg)) |
| 277 | continue; | 277 | continue; |
| 278 | 278 | ||
| 279 | elapsed = jiffies_to_msecs(jiffies - start); | 279 | elapsed = jiffies_to_msecs(jiffies - start); |
diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c index b9b3472975ba..28863570dd60 100644 --- a/net/ipv4/ipmr.c +++ b/net/ipv4/ipmr.c | |||
| @@ -2255,13 +2255,14 @@ int ipmr_get_route(struct net *net, struct sk_buff *skb, | |||
| 2255 | } | 2255 | } |
| 2256 | 2256 | ||
| 2257 | static int ipmr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb, | 2257 | static int ipmr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb, |
| 2258 | u32 portid, u32 seq, struct mfc_cache *c, int cmd) | 2258 | u32 portid, u32 seq, struct mfc_cache *c, int cmd, |
| 2259 | int flags) | ||
| 2259 | { | 2260 | { |
| 2260 | struct nlmsghdr *nlh; | 2261 | struct nlmsghdr *nlh; |
| 2261 | struct rtmsg *rtm; | 2262 | struct rtmsg *rtm; |
| 2262 | int err; | 2263 | int err; |
| 2263 | 2264 | ||
| 2264 | nlh = nlmsg_put(skb, portid, seq, cmd, sizeof(*rtm), NLM_F_MULTI); | 2265 | nlh = nlmsg_put(skb, portid, seq, cmd, sizeof(*rtm), flags); |
| 2265 | if (nlh == NULL) | 2266 | if (nlh == NULL) |
| 2266 | return -EMSGSIZE; | 2267 | return -EMSGSIZE; |
| 2267 | 2268 | ||
| @@ -2329,7 +2330,7 @@ static void mroute_netlink_event(struct mr_table *mrt, struct mfc_cache *mfc, | |||
| 2329 | if (skb == NULL) | 2330 | if (skb == NULL) |
| 2330 | goto errout; | 2331 | goto errout; |
| 2331 | 2332 | ||
| 2332 | err = ipmr_fill_mroute(mrt, skb, 0, 0, mfc, cmd); | 2333 | err = ipmr_fill_mroute(mrt, skb, 0, 0, mfc, cmd, 0); |
| 2333 | if (err < 0) | 2334 | if (err < 0) |
| 2334 | goto errout; | 2335 | goto errout; |
| 2335 | 2336 | ||
| @@ -2368,7 +2369,8 @@ static int ipmr_rtm_dumproute(struct sk_buff *skb, struct netlink_callback *cb) | |||
| 2368 | if (ipmr_fill_mroute(mrt, skb, | 2369 | if (ipmr_fill_mroute(mrt, skb, |
| 2369 | NETLINK_CB(cb->skb).portid, | 2370 | NETLINK_CB(cb->skb).portid, |
| 2370 | cb->nlh->nlmsg_seq, | 2371 | cb->nlh->nlmsg_seq, |
| 2371 | mfc, RTM_NEWROUTE) < 0) | 2372 | mfc, RTM_NEWROUTE, |
| 2373 | NLM_F_MULTI) < 0) | ||
| 2372 | goto done; | 2374 | goto done; |
| 2373 | next_entry: | 2375 | next_entry: |
| 2374 | e++; | 2376 | e++; |
| @@ -2382,7 +2384,8 @@ next_entry: | |||
| 2382 | if (ipmr_fill_mroute(mrt, skb, | 2384 | if (ipmr_fill_mroute(mrt, skb, |
| 2383 | NETLINK_CB(cb->skb).portid, | 2385 | NETLINK_CB(cb->skb).portid, |
| 2384 | cb->nlh->nlmsg_seq, | 2386 | cb->nlh->nlmsg_seq, |
| 2385 | mfc, RTM_NEWROUTE) < 0) { | 2387 | mfc, RTM_NEWROUTE, |
| 2388 | NLM_F_MULTI) < 0) { | ||
| 2386 | spin_unlock_bh(&mfc_unres_lock); | 2389 | spin_unlock_bh(&mfc_unres_lock); |
| 2387 | goto done; | 2390 | goto done; |
| 2388 | } | 2391 | } |
diff --git a/net/ipv4/netfilter/Kconfig b/net/ipv4/netfilter/Kconfig index 81c6910cfa92..a26ce035e3fa 100644 --- a/net/ipv4/netfilter/Kconfig +++ b/net/ipv4/netfilter/Kconfig | |||
| @@ -61,6 +61,11 @@ config NFT_CHAIN_NAT_IPV4 | |||
| 61 | packet transformations such as the source, destination address and | 61 | packet transformations such as the source, destination address and |
| 62 | source and destination ports. | 62 | source and destination ports. |
| 63 | 63 | ||
| 64 | config NFT_REJECT_IPV4 | ||
| 65 | depends on NF_TABLES_IPV4 | ||
| 66 | default NFT_REJECT | ||
| 67 | tristate | ||
| 68 | |||
| 64 | config NF_TABLES_ARP | 69 | config NF_TABLES_ARP |
| 65 | depends on NF_TABLES | 70 | depends on NF_TABLES |
| 66 | tristate "ARP nf_tables support" | 71 | tristate "ARP nf_tables support" |
diff --git a/net/ipv4/netfilter/Makefile b/net/ipv4/netfilter/Makefile index c16be9d58420..90b82405331e 100644 --- a/net/ipv4/netfilter/Makefile +++ b/net/ipv4/netfilter/Makefile | |||
| @@ -30,6 +30,7 @@ obj-$(CONFIG_NF_NAT_PROTO_GRE) += nf_nat_proto_gre.o | |||
| 30 | obj-$(CONFIG_NF_TABLES_IPV4) += nf_tables_ipv4.o | 30 | obj-$(CONFIG_NF_TABLES_IPV4) += nf_tables_ipv4.o |
| 31 | obj-$(CONFIG_NFT_CHAIN_ROUTE_IPV4) += nft_chain_route_ipv4.o | 31 | obj-$(CONFIG_NFT_CHAIN_ROUTE_IPV4) += nft_chain_route_ipv4.o |
| 32 | obj-$(CONFIG_NFT_CHAIN_NAT_IPV4) += nft_chain_nat_ipv4.o | 32 | obj-$(CONFIG_NFT_CHAIN_NAT_IPV4) += nft_chain_nat_ipv4.o |
| 33 | obj-$(CONFIG_NFT_REJECT_IPV4) += nft_reject_ipv4.o | ||
| 33 | obj-$(CONFIG_NF_TABLES_ARP) += nf_tables_arp.o | 34 | obj-$(CONFIG_NF_TABLES_ARP) += nf_tables_arp.o |
| 34 | 35 | ||
| 35 | # generic IP tables | 36 | # generic IP tables |
diff --git a/net/ipv4/netfilter/nf_nat_h323.c b/net/ipv4/netfilter/nf_nat_h323.c index 9eea059dd621..574f7ebba0b6 100644 --- a/net/ipv4/netfilter/nf_nat_h323.c +++ b/net/ipv4/netfilter/nf_nat_h323.c | |||
| @@ -229,7 +229,10 @@ static int nat_rtp_rtcp(struct sk_buff *skb, struct nf_conn *ct, | |||
| 229 | ret = nf_ct_expect_related(rtcp_exp); | 229 | ret = nf_ct_expect_related(rtcp_exp); |
| 230 | if (ret == 0) | 230 | if (ret == 0) |
| 231 | break; | 231 | break; |
| 232 | else if (ret != -EBUSY) { | 232 | else if (ret == -EBUSY) { |
| 233 | nf_ct_unexpect_related(rtp_exp); | ||
| 234 | continue; | ||
| 235 | } else if (ret < 0) { | ||
| 233 | nf_ct_unexpect_related(rtp_exp); | 236 | nf_ct_unexpect_related(rtp_exp); |
| 234 | nated_port = 0; | 237 | nated_port = 0; |
| 235 | break; | 238 | break; |
diff --git a/net/ipv4/netfilter/nf_nat_snmp_basic.c b/net/ipv4/netfilter/nf_nat_snmp_basic.c index d551e31b416e..7c676671329d 100644 --- a/net/ipv4/netfilter/nf_nat_snmp_basic.c +++ b/net/ipv4/netfilter/nf_nat_snmp_basic.c | |||
| @@ -1198,8 +1198,8 @@ static int snmp_translate(struct nf_conn *ct, | |||
| 1198 | map.to = NOCT1(&ct->tuplehash[!dir].tuple.dst.u3.ip); | 1198 | map.to = NOCT1(&ct->tuplehash[!dir].tuple.dst.u3.ip); |
| 1199 | } else { | 1199 | } else { |
| 1200 | /* DNAT replies */ | 1200 | /* DNAT replies */ |
| 1201 | map.from = NOCT1(&ct->tuplehash[dir].tuple.src.u3.ip); | 1201 | map.from = NOCT1(&ct->tuplehash[!dir].tuple.src.u3.ip); |
| 1202 | map.to = NOCT1(&ct->tuplehash[!dir].tuple.dst.u3.ip); | 1202 | map.to = NOCT1(&ct->tuplehash[dir].tuple.dst.u3.ip); |
| 1203 | } | 1203 | } |
| 1204 | 1204 | ||
| 1205 | if (map.from == map.to) | 1205 | if (map.from == map.to) |
diff --git a/net/ipv4/netfilter/nft_reject_ipv4.c b/net/ipv4/netfilter/nft_reject_ipv4.c new file mode 100644 index 000000000000..e79718a382f2 --- /dev/null +++ b/net/ipv4/netfilter/nft_reject_ipv4.c | |||
| @@ -0,0 +1,75 @@ | |||
| 1 | /* | ||
| 2 | * Copyright (c) 2008-2009 Patrick McHardy <kaber@trash.net> | ||
| 3 | * Copyright (c) 2013 Eric Leblond <eric@regit.org> | ||
| 4 | * | ||
| 5 | * This program is free software; you can redistribute it and/or modify | ||
| 6 | * it under the terms of the GNU General Public License version 2 as | ||
| 7 | * published by the Free Software Foundation. | ||
| 8 | * | ||
| 9 | * Development of this code funded by Astaro AG (http://www.astaro.com/) | ||
| 10 | */ | ||
| 11 | |||
| 12 | #include <linux/kernel.h> | ||
| 13 | #include <linux/init.h> | ||
| 14 | #include <linux/module.h> | ||
| 15 | #include <linux/netlink.h> | ||
| 16 | #include <linux/netfilter.h> | ||
| 17 | #include <linux/netfilter/nf_tables.h> | ||
| 18 | #include <net/netfilter/nf_tables.h> | ||
| 19 | #include <net/icmp.h> | ||
| 20 | #include <net/netfilter/ipv4/nf_reject.h> | ||
| 21 | #include <net/netfilter/nft_reject.h> | ||
| 22 | |||
| 23 | void nft_reject_ipv4_eval(const struct nft_expr *expr, | ||
| 24 | struct nft_data data[NFT_REG_MAX + 1], | ||
| 25 | const struct nft_pktinfo *pkt) | ||
| 26 | { | ||
| 27 | struct nft_reject *priv = nft_expr_priv(expr); | ||
| 28 | |||
| 29 | switch (priv->type) { | ||
| 30 | case NFT_REJECT_ICMP_UNREACH: | ||
| 31 | nf_send_unreach(pkt->skb, priv->icmp_code); | ||
| 32 | break; | ||
| 33 | case NFT_REJECT_TCP_RST: | ||
| 34 | nf_send_reset(pkt->skb, pkt->ops->hooknum); | ||
| 35 | break; | ||
| 36 | } | ||
| 37 | |||
| 38 | data[NFT_REG_VERDICT].verdict = NF_DROP; | ||
| 39 | } | ||
| 40 | EXPORT_SYMBOL_GPL(nft_reject_ipv4_eval); | ||
| 41 | |||
| 42 | static struct nft_expr_type nft_reject_ipv4_type; | ||
| 43 | static const struct nft_expr_ops nft_reject_ipv4_ops = { | ||
| 44 | .type = &nft_reject_ipv4_type, | ||
| 45 | .size = NFT_EXPR_SIZE(sizeof(struct nft_reject)), | ||
| 46 | .eval = nft_reject_ipv4_eval, | ||
| 47 | .init = nft_reject_init, | ||
| 48 | .dump = nft_reject_dump, | ||
| 49 | }; | ||
| 50 | |||
| 51 | static struct nft_expr_type nft_reject_ipv4_type __read_mostly = { | ||
| 52 | .family = NFPROTO_IPV4, | ||
| 53 | .name = "reject", | ||
| 54 | .ops = &nft_reject_ipv4_ops, | ||
| 55 | .policy = nft_reject_policy, | ||
| 56 | .maxattr = NFTA_REJECT_MAX, | ||
| 57 | .owner = THIS_MODULE, | ||
| 58 | }; | ||
| 59 | |||
| 60 | static int __init nft_reject_ipv4_module_init(void) | ||
| 61 | { | ||
| 62 | return nft_register_expr(&nft_reject_ipv4_type); | ||
| 63 | } | ||
| 64 | |||
| 65 | static void __exit nft_reject_ipv4_module_exit(void) | ||
| 66 | { | ||
| 67 | nft_unregister_expr(&nft_reject_ipv4_type); | ||
| 68 | } | ||
| 69 | |||
| 70 | module_init(nft_reject_ipv4_module_init); | ||
| 71 | module_exit(nft_reject_ipv4_module_exit); | ||
| 72 | |||
| 73 | MODULE_LICENSE("GPL"); | ||
| 74 | MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>"); | ||
| 75 | MODULE_ALIAS_NFT_AF_EXPR(AF_INET, "reject"); | ||
diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 25071b48921c..4c011ec69ed4 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c | |||
| @@ -1597,6 +1597,7 @@ static int __mkroute_input(struct sk_buff *skb, | |||
| 1597 | rth->rt_gateway = 0; | 1597 | rth->rt_gateway = 0; |
| 1598 | rth->rt_uses_gateway = 0; | 1598 | rth->rt_uses_gateway = 0; |
| 1599 | INIT_LIST_HEAD(&rth->rt_uncached); | 1599 | INIT_LIST_HEAD(&rth->rt_uncached); |
| 1600 | RT_CACHE_STAT_INC(in_slow_tot); | ||
| 1600 | 1601 | ||
| 1601 | rth->dst.input = ip_forward; | 1602 | rth->dst.input = ip_forward; |
| 1602 | rth->dst.output = ip_output; | 1603 | rth->dst.output = ip_output; |
| @@ -1695,10 +1696,11 @@ static int ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr, | |||
| 1695 | fl4.daddr = daddr; | 1696 | fl4.daddr = daddr; |
| 1696 | fl4.saddr = saddr; | 1697 | fl4.saddr = saddr; |
| 1697 | err = fib_lookup(net, &fl4, &res); | 1698 | err = fib_lookup(net, &fl4, &res); |
| 1698 | if (err != 0) | 1699 | if (err != 0) { |
| 1700 | if (!IN_DEV_FORWARD(in_dev)) | ||
| 1701 | err = -EHOSTUNREACH; | ||
| 1699 | goto no_route; | 1702 | goto no_route; |
| 1700 | 1703 | } | |
| 1701 | RT_CACHE_STAT_INC(in_slow_tot); | ||
| 1702 | 1704 | ||
| 1703 | if (res.type == RTN_BROADCAST) | 1705 | if (res.type == RTN_BROADCAST) |
| 1704 | goto brd_input; | 1706 | goto brd_input; |
| @@ -1712,8 +1714,10 @@ static int ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr, | |||
| 1712 | goto local_input; | 1714 | goto local_input; |
| 1713 | } | 1715 | } |
| 1714 | 1716 | ||
| 1715 | if (!IN_DEV_FORWARD(in_dev)) | 1717 | if (!IN_DEV_FORWARD(in_dev)) { |
| 1718 | err = -EHOSTUNREACH; | ||
| 1716 | goto no_route; | 1719 | goto no_route; |
| 1720 | } | ||
| 1717 | if (res.type != RTN_UNICAST) | 1721 | if (res.type != RTN_UNICAST) |
| 1718 | goto martian_destination; | 1722 | goto martian_destination; |
| 1719 | 1723 | ||
| @@ -1768,6 +1772,7 @@ local_input: | |||
| 1768 | rth->rt_gateway = 0; | 1772 | rth->rt_gateway = 0; |
| 1769 | rth->rt_uses_gateway = 0; | 1773 | rth->rt_uses_gateway = 0; |
| 1770 | INIT_LIST_HEAD(&rth->rt_uncached); | 1774 | INIT_LIST_HEAD(&rth->rt_uncached); |
| 1775 | RT_CACHE_STAT_INC(in_slow_tot); | ||
| 1771 | if (res.type == RTN_UNREACHABLE) { | 1776 | if (res.type == RTN_UNREACHABLE) { |
| 1772 | rth->dst.input= ip_error; | 1777 | rth->dst.input= ip_error; |
| 1773 | rth->dst.error= -err; | 1778 | rth->dst.error= -err; |
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 4475b3bb494d..97c8f5620c43 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c | |||
| @@ -1044,7 +1044,8 @@ void tcp_free_fastopen_req(struct tcp_sock *tp) | |||
| 1044 | } | 1044 | } |
| 1045 | } | 1045 | } |
| 1046 | 1046 | ||
| 1047 | static int tcp_sendmsg_fastopen(struct sock *sk, struct msghdr *msg, int *size) | 1047 | static int tcp_sendmsg_fastopen(struct sock *sk, struct msghdr *msg, |
| 1048 | int *copied, size_t size) | ||
| 1048 | { | 1049 | { |
| 1049 | struct tcp_sock *tp = tcp_sk(sk); | 1050 | struct tcp_sock *tp = tcp_sk(sk); |
| 1050 | int err, flags; | 1051 | int err, flags; |
| @@ -1059,11 +1060,12 @@ static int tcp_sendmsg_fastopen(struct sock *sk, struct msghdr *msg, int *size) | |||
| 1059 | if (unlikely(tp->fastopen_req == NULL)) | 1060 | if (unlikely(tp->fastopen_req == NULL)) |
| 1060 | return -ENOBUFS; | 1061 | return -ENOBUFS; |
| 1061 | tp->fastopen_req->data = msg; | 1062 | tp->fastopen_req->data = msg; |
| 1063 | tp->fastopen_req->size = size; | ||
| 1062 | 1064 | ||
| 1063 | flags = (msg->msg_flags & MSG_DONTWAIT) ? O_NONBLOCK : 0; | 1065 | flags = (msg->msg_flags & MSG_DONTWAIT) ? O_NONBLOCK : 0; |
| 1064 | err = __inet_stream_connect(sk->sk_socket, msg->msg_name, | 1066 | err = __inet_stream_connect(sk->sk_socket, msg->msg_name, |
| 1065 | msg->msg_namelen, flags); | 1067 | msg->msg_namelen, flags); |
| 1066 | *size = tp->fastopen_req->copied; | 1068 | *copied = tp->fastopen_req->copied; |
| 1067 | tcp_free_fastopen_req(tp); | 1069 | tcp_free_fastopen_req(tp); |
| 1068 | return err; | 1070 | return err; |
| 1069 | } | 1071 | } |
| @@ -1083,7 +1085,7 @@ int tcp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, | |||
| 1083 | 1085 | ||
| 1084 | flags = msg->msg_flags; | 1086 | flags = msg->msg_flags; |
| 1085 | if (flags & MSG_FASTOPEN) { | 1087 | if (flags & MSG_FASTOPEN) { |
| 1086 | err = tcp_sendmsg_fastopen(sk, msg, &copied_syn); | 1088 | err = tcp_sendmsg_fastopen(sk, msg, &copied_syn, size); |
| 1087 | if (err == -EINPROGRESS && copied_syn > 0) | 1089 | if (err == -EINPROGRESS && copied_syn > 0) |
| 1088 | goto out; | 1090 | goto out; |
| 1089 | else if (err) | 1091 | else if (err) |
| @@ -2229,7 +2231,7 @@ adjudge_to_death: | |||
| 2229 | /* This is a (useful) BSD violating of the RFC. There is a | 2231 | /* This is a (useful) BSD violating of the RFC. There is a |
| 2230 | * problem with TCP as specified in that the other end could | 2232 | * problem with TCP as specified in that the other end could |
| 2231 | * keep a socket open forever with no application left this end. | 2233 | * keep a socket open forever with no application left this end. |
| 2232 | * We use a 3 minute timeout (about the same as BSD) then kill | 2234 | * We use a 1 minute timeout (about the same as BSD) then kill |
| 2233 | * our end. If they send after that then tough - BUT: long enough | 2235 | * our end. If they send after that then tough - BUT: long enough |
| 2234 | * that we won't make the old 4*rto = almost no time - whoops | 2236 | * that we won't make the old 4*rto = almost no time - whoops |
| 2235 | * reset mistake. | 2237 | * reset mistake. |
diff --git a/net/ipv4/tcp_cong.c b/net/ipv4/tcp_cong.c index ad37bf18ae4b..2388275adb9b 100644 --- a/net/ipv4/tcp_cong.c +++ b/net/ipv4/tcp_cong.c | |||
| @@ -290,8 +290,7 @@ bool tcp_is_cwnd_limited(const struct sock *sk, u32 in_flight) | |||
| 290 | left = tp->snd_cwnd - in_flight; | 290 | left = tp->snd_cwnd - in_flight; |
| 291 | if (sk_can_gso(sk) && | 291 | if (sk_can_gso(sk) && |
| 292 | left * sysctl_tcp_tso_win_divisor < tp->snd_cwnd && | 292 | left * sysctl_tcp_tso_win_divisor < tp->snd_cwnd && |
| 293 | left * tp->mss_cache < sk->sk_gso_max_size && | 293 | left < tp->xmit_size_goal_segs) |
| 294 | left < sk->sk_gso_max_segs) | ||
| 295 | return true; | 294 | return true; |
| 296 | return left <= tcp_max_tso_deferred_mss(tp); | 295 | return left <= tcp_max_tso_deferred_mss(tp); |
| 297 | } | 296 | } |
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 65cf90e063d5..eeaac399420d 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c | |||
| @@ -671,6 +671,7 @@ static void tcp_rtt_estimator(struct sock *sk, const __u32 mrtt) | |||
| 671 | { | 671 | { |
| 672 | struct tcp_sock *tp = tcp_sk(sk); | 672 | struct tcp_sock *tp = tcp_sk(sk); |
| 673 | long m = mrtt; /* RTT */ | 673 | long m = mrtt; /* RTT */ |
| 674 | u32 srtt = tp->srtt; | ||
| 674 | 675 | ||
| 675 | /* The following amusing code comes from Jacobson's | 676 | /* The following amusing code comes from Jacobson's |
| 676 | * article in SIGCOMM '88. Note that rtt and mdev | 677 | * article in SIGCOMM '88. Note that rtt and mdev |
| @@ -688,11 +689,9 @@ static void tcp_rtt_estimator(struct sock *sk, const __u32 mrtt) | |||
| 688 | * does not matter how to _calculate_ it. Seems, it was trap | 689 | * does not matter how to _calculate_ it. Seems, it was trap |
| 689 | * that VJ failed to avoid. 8) | 690 | * that VJ failed to avoid. 8) |
| 690 | */ | 691 | */ |
| 691 | if (m == 0) | 692 | if (srtt != 0) { |
| 692 | m = 1; | 693 | m -= (srtt >> 3); /* m is now error in rtt est */ |
| 693 | if (tp->srtt != 0) { | 694 | srtt += m; /* rtt = 7/8 rtt + 1/8 new */ |
| 694 | m -= (tp->srtt >> 3); /* m is now error in rtt est */ | ||
| 695 | tp->srtt += m; /* rtt = 7/8 rtt + 1/8 new */ | ||
| 696 | if (m < 0) { | 695 | if (m < 0) { |
| 697 | m = -m; /* m is now abs(error) */ | 696 | m = -m; /* m is now abs(error) */ |
| 698 | m -= (tp->mdev >> 2); /* similar update on mdev */ | 697 | m -= (tp->mdev >> 2); /* similar update on mdev */ |
| @@ -723,11 +722,12 @@ static void tcp_rtt_estimator(struct sock *sk, const __u32 mrtt) | |||
| 723 | } | 722 | } |
| 724 | } else { | 723 | } else { |
| 725 | /* no previous measure. */ | 724 | /* no previous measure. */ |
| 726 | tp->srtt = m << 3; /* take the measured time to be rtt */ | 725 | srtt = m << 3; /* take the measured time to be rtt */ |
| 727 | tp->mdev = m << 1; /* make sure rto = 3*rtt */ | 726 | tp->mdev = m << 1; /* make sure rto = 3*rtt */ |
| 728 | tp->mdev_max = tp->rttvar = max(tp->mdev, tcp_rto_min(sk)); | 727 | tp->mdev_max = tp->rttvar = max(tp->mdev, tcp_rto_min(sk)); |
| 729 | tp->rtt_seq = tp->snd_nxt; | 728 | tp->rtt_seq = tp->snd_nxt; |
| 730 | } | 729 | } |
| 730 | tp->srtt = max(1U, srtt); | ||
| 731 | } | 731 | } |
| 732 | 732 | ||
| 733 | /* Set the sk_pacing_rate to allow proper sizing of TSO packets. | 733 | /* Set the sk_pacing_rate to allow proper sizing of TSO packets. |
| @@ -746,8 +746,10 @@ static void tcp_update_pacing_rate(struct sock *sk) | |||
| 746 | 746 | ||
| 747 | rate *= max(tp->snd_cwnd, tp->packets_out); | 747 | rate *= max(tp->snd_cwnd, tp->packets_out); |
| 748 | 748 | ||
| 749 | /* Correction for small srtt : minimum srtt being 8 (1 jiffy << 3), | 749 | /* Correction for small srtt and scheduling constraints. |
| 750 | * be conservative and assume srtt = 1 (125 us instead of 1.25 ms) | 750 | * For small rtt, consider noise is too high, and use |
| 751 | * the minimal value (srtt = 1 -> 125 us for HZ=1000) | ||
| 752 | * | ||
| 751 | * We probably need usec resolution in the future. | 753 | * We probably need usec resolution in the future. |
| 752 | * Note: This also takes care of possible srtt=0 case, | 754 | * Note: This also takes care of possible srtt=0 case, |
| 753 | * when tcp_rtt_estimator() was not yet called. | 755 | * when tcp_rtt_estimator() was not yet called. |
| @@ -1943,8 +1945,9 @@ void tcp_enter_loss(struct sock *sk, int how) | |||
| 1943 | if (skb == tcp_send_head(sk)) | 1945 | if (skb == tcp_send_head(sk)) |
| 1944 | break; | 1946 | break; |
| 1945 | 1947 | ||
| 1946 | if (TCP_SKB_CB(skb)->sacked & TCPCB_RETRANS) | 1948 | if (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_RETRANS) |
| 1947 | tp->undo_marker = 0; | 1949 | tp->undo_marker = 0; |
| 1950 | |||
| 1948 | TCP_SKB_CB(skb)->sacked &= (~TCPCB_TAGBITS)|TCPCB_SACKED_ACKED; | 1951 | TCP_SKB_CB(skb)->sacked &= (~TCPCB_TAGBITS)|TCPCB_SACKED_ACKED; |
| 1949 | if (!(TCP_SKB_CB(skb)->sacked&TCPCB_SACKED_ACKED) || how) { | 1952 | if (!(TCP_SKB_CB(skb)->sacked&TCPCB_SACKED_ACKED) || how) { |
| 1950 | TCP_SKB_CB(skb)->sacked &= ~TCPCB_SACKED_ACKED; | 1953 | TCP_SKB_CB(skb)->sacked &= ~TCPCB_SACKED_ACKED; |
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 03d26b85eab8..17a11e65e57f 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c | |||
| @@ -698,7 +698,8 @@ static void tcp_tsq_handler(struct sock *sk) | |||
| 698 | if ((1 << sk->sk_state) & | 698 | if ((1 << sk->sk_state) & |
| 699 | (TCPF_ESTABLISHED | TCPF_FIN_WAIT1 | TCPF_CLOSING | | 699 | (TCPF_ESTABLISHED | TCPF_FIN_WAIT1 | TCPF_CLOSING | |
| 700 | TCPF_CLOSE_WAIT | TCPF_LAST_ACK)) | 700 | TCPF_CLOSE_WAIT | TCPF_LAST_ACK)) |
| 701 | tcp_write_xmit(sk, tcp_current_mss(sk), 0, 0, GFP_ATOMIC); | 701 | tcp_write_xmit(sk, tcp_current_mss(sk), tcp_sk(sk)->nonagle, |
| 702 | 0, GFP_ATOMIC); | ||
| 702 | } | 703 | } |
| 703 | /* | 704 | /* |
| 704 | * One tasklet per cpu tries to send more skbs. | 705 | * One tasklet per cpu tries to send more skbs. |
| @@ -766,6 +767,17 @@ void tcp_release_cb(struct sock *sk) | |||
| 766 | if (flags & (1UL << TCP_TSQ_DEFERRED)) | 767 | if (flags & (1UL << TCP_TSQ_DEFERRED)) |
| 767 | tcp_tsq_handler(sk); | 768 | tcp_tsq_handler(sk); |
| 768 | 769 | ||
| 770 | /* Here begins the tricky part : | ||
| 771 | * We are called from release_sock() with : | ||
| 772 | * 1) BH disabled | ||
| 773 | * 2) sk_lock.slock spinlock held | ||
| 774 | * 3) socket owned by us (sk->sk_lock.owned == 1) | ||
| 775 | * | ||
| 776 | * But following code is meant to be called from BH handlers, | ||
| 777 | * so we should keep BH disabled, but early release socket ownership | ||
| 778 | */ | ||
| 779 | sock_release_ownership(sk); | ||
| 780 | |||
| 769 | if (flags & (1UL << TCP_WRITE_TIMER_DEFERRED)) { | 781 | if (flags & (1UL << TCP_WRITE_TIMER_DEFERRED)) { |
| 770 | tcp_write_timer_handler(sk); | 782 | tcp_write_timer_handler(sk); |
| 771 | __sock_put(sk); | 783 | __sock_put(sk); |
| @@ -863,8 +875,8 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it, | |||
| 863 | 875 | ||
| 864 | if (unlikely(skb->fclone == SKB_FCLONE_ORIG && | 876 | if (unlikely(skb->fclone == SKB_FCLONE_ORIG && |
| 865 | fclone->fclone == SKB_FCLONE_CLONE)) | 877 | fclone->fclone == SKB_FCLONE_CLONE)) |
| 866 | NET_INC_STATS_BH(sock_net(sk), | 878 | NET_INC_STATS(sock_net(sk), |
| 867 | LINUX_MIB_TCPSPURIOUS_RTX_HOSTQUEUES); | 879 | LINUX_MIB_TCPSPURIOUS_RTX_HOSTQUEUES); |
| 868 | 880 | ||
| 869 | if (unlikely(skb_cloned(skb))) | 881 | if (unlikely(skb_cloned(skb))) |
| 870 | skb = pskb_copy(skb, gfp_mask); | 882 | skb = pskb_copy(skb, gfp_mask); |
| @@ -1904,7 +1916,15 @@ static bool tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle, | |||
| 1904 | 1916 | ||
| 1905 | if (atomic_read(&sk->sk_wmem_alloc) > limit) { | 1917 | if (atomic_read(&sk->sk_wmem_alloc) > limit) { |
| 1906 | set_bit(TSQ_THROTTLED, &tp->tsq_flags); | 1918 | set_bit(TSQ_THROTTLED, &tp->tsq_flags); |
| 1907 | break; | 1919 | /* It is possible TX completion already happened |
| 1920 | * before we set TSQ_THROTTLED, so we must | ||
| 1921 | * test again the condition. | ||
| 1922 | * We abuse smp_mb__after_clear_bit() because | ||
| 1923 | * there is no smp_mb__after_set_bit() yet | ||
| 1924 | */ | ||
| 1925 | smp_mb__after_clear_bit(); | ||
| 1926 | if (atomic_read(&sk->sk_wmem_alloc) > limit) | ||
| 1927 | break; | ||
| 1908 | } | 1928 | } |
| 1909 | 1929 | ||
| 1910 | limit = mss_now; | 1930 | limit = mss_now; |
| @@ -1977,7 +1997,7 @@ bool tcp_schedule_loss_probe(struct sock *sk) | |||
| 1977 | /* Schedule a loss probe in 2*RTT for SACK capable connections | 1997 | /* Schedule a loss probe in 2*RTT for SACK capable connections |
| 1978 | * in Open state, that are either limited by cwnd or application. | 1998 | * in Open state, that are either limited by cwnd or application. |
| 1979 | */ | 1999 | */ |
| 1980 | if (sysctl_tcp_early_retrans < 3 || !rtt || !tp->packets_out || | 2000 | if (sysctl_tcp_early_retrans < 3 || !tp->srtt || !tp->packets_out || |
| 1981 | !tcp_is_sack(tp) || inet_csk(sk)->icsk_ca_state != TCP_CA_Open) | 2001 | !tcp_is_sack(tp) || inet_csk(sk)->icsk_ca_state != TCP_CA_Open) |
| 1982 | return false; | 2002 | return false; |
| 1983 | 2003 | ||
| @@ -2328,6 +2348,7 @@ int __tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb) | |||
| 2328 | struct tcp_sock *tp = tcp_sk(sk); | 2348 | struct tcp_sock *tp = tcp_sk(sk); |
| 2329 | struct inet_connection_sock *icsk = inet_csk(sk); | 2349 | struct inet_connection_sock *icsk = inet_csk(sk); |
| 2330 | unsigned int cur_mss; | 2350 | unsigned int cur_mss; |
| 2351 | int err; | ||
| 2331 | 2352 | ||
| 2332 | /* Inconslusive MTU probe */ | 2353 | /* Inconslusive MTU probe */ |
| 2333 | if (icsk->icsk_mtup.probe_size) { | 2354 | if (icsk->icsk_mtup.probe_size) { |
| @@ -2391,11 +2412,15 @@ int __tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb) | |||
| 2391 | skb_headroom(skb) >= 0xFFFF)) { | 2412 | skb_headroom(skb) >= 0xFFFF)) { |
| 2392 | struct sk_buff *nskb = __pskb_copy(skb, MAX_TCP_HEADER, | 2413 | struct sk_buff *nskb = __pskb_copy(skb, MAX_TCP_HEADER, |
| 2393 | GFP_ATOMIC); | 2414 | GFP_ATOMIC); |
| 2394 | return nskb ? tcp_transmit_skb(sk, nskb, 0, GFP_ATOMIC) : | 2415 | err = nskb ? tcp_transmit_skb(sk, nskb, 0, GFP_ATOMIC) : |
| 2395 | -ENOBUFS; | 2416 | -ENOBUFS; |
| 2396 | } else { | 2417 | } else { |
| 2397 | return tcp_transmit_skb(sk, skb, 1, GFP_ATOMIC); | 2418 | err = tcp_transmit_skb(sk, skb, 1, GFP_ATOMIC); |
| 2398 | } | 2419 | } |
| 2420 | |||
| 2421 | if (likely(!err)) | ||
| 2422 | TCP_SKB_CB(skb)->sacked |= TCPCB_EVER_RETRANS; | ||
| 2423 | return err; | ||
| 2399 | } | 2424 | } |
| 2400 | 2425 | ||
| 2401 | int tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb) | 2426 | int tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb) |
| @@ -2899,7 +2924,12 @@ static int tcp_send_syn_data(struct sock *sk, struct sk_buff *syn) | |||
| 2899 | space = __tcp_mtu_to_mss(sk, inet_csk(sk)->icsk_pmtu_cookie) - | 2924 | space = __tcp_mtu_to_mss(sk, inet_csk(sk)->icsk_pmtu_cookie) - |
| 2900 | MAX_TCP_OPTION_SPACE; | 2925 | MAX_TCP_OPTION_SPACE; |
| 2901 | 2926 | ||
| 2902 | syn_data = skb_copy_expand(syn, skb_headroom(syn), space, | 2927 | space = min_t(size_t, space, fo->size); |
| 2928 | |||
| 2929 | /* limit to order-0 allocations */ | ||
| 2930 | space = min_t(size_t, space, SKB_MAX_HEAD(MAX_TCP_HEADER)); | ||
| 2931 | |||
| 2932 | syn_data = skb_copy_expand(syn, MAX_TCP_HEADER, space, | ||
| 2903 | sk->sk_allocation); | 2933 | sk->sk_allocation); |
| 2904 | if (syn_data == NULL) | 2934 | if (syn_data == NULL) |
| 2905 | goto fallback; | 2935 | goto fallback; |
diff --git a/net/ipv4/udp_offload.c b/net/ipv4/udp_offload.c index 25f5cee3a08a..88b4023ecfcf 100644 --- a/net/ipv4/udp_offload.c +++ b/net/ipv4/udp_offload.c | |||
| @@ -17,6 +17,8 @@ | |||
| 17 | static DEFINE_SPINLOCK(udp_offload_lock); | 17 | static DEFINE_SPINLOCK(udp_offload_lock); |
| 18 | static struct udp_offload_priv __rcu *udp_offload_base __read_mostly; | 18 | static struct udp_offload_priv __rcu *udp_offload_base __read_mostly; |
| 19 | 19 | ||
| 20 | #define udp_deref_protected(X) rcu_dereference_protected(X, lockdep_is_held(&udp_offload_lock)) | ||
| 21 | |||
| 20 | struct udp_offload_priv { | 22 | struct udp_offload_priv { |
| 21 | struct udp_offload *offload; | 23 | struct udp_offload *offload; |
| 22 | struct rcu_head rcu; | 24 | struct rcu_head rcu; |
| @@ -100,8 +102,7 @@ out: | |||
| 100 | 102 | ||
| 101 | int udp_add_offload(struct udp_offload *uo) | 103 | int udp_add_offload(struct udp_offload *uo) |
| 102 | { | 104 | { |
| 103 | struct udp_offload_priv __rcu **head = &udp_offload_base; | 105 | struct udp_offload_priv *new_offload = kzalloc(sizeof(*new_offload), GFP_ATOMIC); |
| 104 | struct udp_offload_priv *new_offload = kzalloc(sizeof(*new_offload), GFP_KERNEL); | ||
| 105 | 106 | ||
| 106 | if (!new_offload) | 107 | if (!new_offload) |
| 107 | return -ENOMEM; | 108 | return -ENOMEM; |
| @@ -109,8 +110,8 @@ int udp_add_offload(struct udp_offload *uo) | |||
| 109 | new_offload->offload = uo; | 110 | new_offload->offload = uo; |
| 110 | 111 | ||
| 111 | spin_lock(&udp_offload_lock); | 112 | spin_lock(&udp_offload_lock); |
| 112 | rcu_assign_pointer(new_offload->next, rcu_dereference(*head)); | 113 | new_offload->next = udp_offload_base; |
| 113 | rcu_assign_pointer(*head, new_offload); | 114 | rcu_assign_pointer(udp_offload_base, new_offload); |
| 114 | spin_unlock(&udp_offload_lock); | 115 | spin_unlock(&udp_offload_lock); |
| 115 | 116 | ||
| 116 | return 0; | 117 | return 0; |
| @@ -130,12 +131,12 @@ void udp_del_offload(struct udp_offload *uo) | |||
| 130 | 131 | ||
| 131 | spin_lock(&udp_offload_lock); | 132 | spin_lock(&udp_offload_lock); |
| 132 | 133 | ||
| 133 | uo_priv = rcu_dereference(*head); | 134 | uo_priv = udp_deref_protected(*head); |
| 134 | for (; uo_priv != NULL; | 135 | for (; uo_priv != NULL; |
| 135 | uo_priv = rcu_dereference(*head)) { | 136 | uo_priv = udp_deref_protected(*head)) { |
| 136 | |||
| 137 | if (uo_priv->offload == uo) { | 137 | if (uo_priv->offload == uo) { |
| 138 | rcu_assign_pointer(*head, rcu_dereference(uo_priv->next)); | 138 | rcu_assign_pointer(*head, |
| 139 | udp_deref_protected(uo_priv->next)); | ||
| 139 | goto unlock; | 140 | goto unlock; |
| 140 | } | 141 | } |
| 141 | head = &uo_priv->next; | 142 | head = &uo_priv->next; |
