12 files changed, 70 insertions, 50 deletions
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index 8b7fe5b03906..e67da4e6c324 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -1386,6 +1386,17 @@ out:
        return pp;
 }
+int inet_recv_error(struct sock *sk, struct msghdr *msg, int len, int *addr_len)
+{
+        if (sk->sk_family == AF_INET)
+                return ip_recv_error(sk, msg, len, addr_len);
+#if IS_ENABLED(CONFIG_IPV6)
+        if (sk->sk_family == AF_INET6)
+                return pingv6_ops.ipv6_recv_error(sk, msg, len, addr_len);
+#endif
+        return -EINVAL;
+}
 static int inet_gro_complete(struct sk_buff *skb, int nhoff)
 {
        __be16 newlen = htons(skb->len - nhoff);
diff --git a/net/ipv4/fib_rules.c b/net/ipv4/fib_rules.c
index f2e15738534d..8f7bd56955b0 100644
--- a/net/ipv4/fib_rules.c
+++ b/net/ipv4/fib_rules.c
@@ -62,6 +62,10 @@ int __fib_lookup(struct net *net, struct flowi4 *flp, struct fib_result *res)
        else
                res->tclassid = 0;
 #endif
+        if (err == -ESRCH)
+                err = -ENETUNREACH;
        return err;
 }
 EXPORT_SYMBOL_GPL(__fib_lookup);
diff --git a/net/ipv4/fou.c b/net/ipv4/fou.c
index 32e78924e246..606c520ffd5a 100644
--- a/net/ipv4/fou.c
+++ b/net/ipv4/fou.c
@@ -133,6 +133,8 @@ static int fou_gro_complete(struct sk_buff *skb, int nhoff)
        int err = -ENOSYS;
        const struct net_offload **offloads;
+        udp_tunnel_gro_complete(skb, nhoff);
        rcu_read_lock();
        offloads = NAPI_GRO_CB(skb)->is_ipv6 ? inet6_offloads : inet_offloads;
        ops = rcu_dereference(offloads[proto]);
diff --git a/net/ipv4/geneve.c b/net/ipv4/geneve.c
index 065cd94c640c..dedb21e99914 100644
--- a/net/ipv4/geneve.c
+++ b/net/ipv4/geneve.c
@@ -144,6 +144,8 @@ int geneve_xmit_skb(struct geneve_sock *gs, struct rtable *rt,
        gnvh = (struct genevehdr *)__skb_push(skb, sizeof(*gnvh) + opt_len);
        geneve_build_header(gnvh, tun_flags, vni, opt_len, opt);
+        skb_set_inner_protocol(skb, htons(ETH_P_TEB));
        return udp_tunnel_xmit_skb(gs->sock, rt, skb, src, dst,
                                   tos, ttl, df, src_port, dst_port, xnet);
 }
@@ -364,6 +366,7 @@ late_initcall(geneve_init_module);
 static void __exit geneve_cleanup_module(void)
 {
        destroy_workqueue(geneve_wq);
+        unregister_pernet_subsys(&geneve_net_ops);
 }
 module_exit(geneve_cleanup_module);
diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c
index fb70e3ecc3e4..bb15d0e03d4f 100644
--- a/net/ipv4/igmp.c
+++ b/net/ipv4/igmp.c
@@ -318,9 +318,7 @@ igmp_scount(struct ip_mc_list *pmc, int type, int gdeleted, int sdeleted)
        return scount;
 }
-#define igmp_skb_size(skb) (*(unsigned int *)((skb)->cb))
+static struct sk_buff *igmpv3_newpack(struct net_device *dev, unsigned int mtu)
-static struct sk_buff *igmpv3_newpack(struct net_device *dev, int size)
 {
        struct sk_buff *skb;
        struct rtable *rt;
@@ -330,6 +328,7 @@ static struct sk_buff *igmpv3_newpack(struct net_device *dev, int size)
        struct flowi4 fl4;
        int hlen = LL_RESERVED_SPACE(dev);
        int tlen = dev->needed_tailroom;
+        unsigned int size = mtu;
        while (1) {
                skb = alloc_skb(size + hlen + tlen,
@@ -341,7 +340,6 @@ static struct sk_buff *igmpv3_newpack(struct net_device *dev, int size)
                        return NULL;
        }
        skb->priority = TC_PRIO_CONTROL;
-        igmp_skb_size(skb) = size;
        rt = ip_route_output_ports(net, &fl4, NULL, IGMPV3_ALL_MCR, 0,
                                   0, 0,
@@ -354,6 +352,8 @@ static struct sk_buff *igmpv3_newpack(struct net_device *dev, int size)
        skb_dst_set(skb, &rt->dst);
        skb->dev = dev;
+        skb->reserved_tailroom = skb_end_offset(skb) -
+                                 min(mtu, skb_end_offset(skb));
        skb_reserve(skb, hlen);
        skb_reset_network_header(skb);
@@ -423,8 +423,7 @@ static struct sk_buff *add_grhead(struct sk_buff *skb, struct ip_mc_list *pmc,
        return skb;
 }
-#define AVAILABLE(skb) ((skb) ? ((skb)->dev ? igmp_skb_size(skb) - (skb)->len : \
+#define AVAILABLE(skb)  ((skb) ? skb_availroom(skb) : 0)
-        skb_tailroom(skb)) : 0)
 static struct sk_buff *add_grec(struct sk_buff *skb, struct ip_mc_list *pmc,
        int type, int gdeleted, int sdeleted)
diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c
index c373a9ad4555..9daf2177dc00 100644
--- a/net/ipv4/ip_sockglue.c
+++ b/net/ipv4/ip_sockglue.c
@@ -195,7 +195,7 @@ int ip_cmsg_send(struct net *net, struct msghdr *msg, struct ipcm_cookie *ipc,
        for (cmsg = CMSG_FIRSTHDR(msg); cmsg; cmsg = CMSG_NXTHDR(msg, cmsg)) {
                if (!CMSG_OK(msg, cmsg))
                        return -EINVAL;
-#if defined(CONFIG_IPV6)
+#if IS_ENABLED(CONFIG_IPV6)
                if (allow_ipv6 &&
                    cmsg->cmsg_level == SOL_IPV6 &&
                    cmsg->cmsg_type == IPV6_PKTINFO) {
diff --git a/net/ipv4/ip_vti.c b/net/ipv4/ip_vti.c
index 3e861011e4a3..1a7e979e80ba 100644
--- a/net/ipv4/ip_vti.c
+++ b/net/ipv4/ip_vti.c
@@ -528,6 +528,7 @@ static struct rtnl_link_ops vti_link_ops __read_mostly = {
        .validate       = vti_tunnel_validate,
        .newlink        = vti_newlink,
        .changelink     = vti_changelink,
+        .dellink        = ip_tunnel_dellink,
        .get_size       = vti_get_size,
        .fill_info      = vti_fill_info,
 };
diff --git a/net/ipv4/netfilter/nft_masq_ipv4.c b/net/ipv4/netfilter/nft_masq_ipv4.c
index c1023c445920..665de06561cd 100644
--- a/net/ipv4/netfilter/nft_masq_ipv4.c
+++ b/net/ipv4/netfilter/nft_masq_ipv4.c
@@ -24,6 +24,7 @@ static void nft_masq_ipv4_eval(const struct nft_expr *expr,
        struct nf_nat_range range;
        unsigned int verdict;
+        memset(&range, 0, sizeof(range));
        range.flags = priv->flags;
        verdict = nf_nat_masquerade_ipv4(pkt->skb, pkt->ops->hooknum,
diff --git a/net/ipv4/ping.c b/net/ipv4/ping.c
index 57f7c9804139..5d740cccf69e 100644
--- a/net/ipv4/ping.c
+++ b/net/ipv4/ping.c
@@ -217,6 +217,8 @@ static struct sock *ping_lookup(struct net *net, struct sk_buff *skb, u16 ident)
                                             &ipv6_hdr(skb)->daddr))
                                continue;
 #endif
+                } else {
+                        continue;
                }
                if (sk->sk_bound_dev_if && sk->sk_bound_dev_if != dif)
@@ -853,16 +855,8 @@ int ping_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
        if (flags & MSG_OOB)
                goto out;
-        if (flags & MSG_ERRQUEUE) {
+        if (flags & MSG_ERRQUEUE)
-                if (family == AF_INET) {
+                return inet_recv_error(sk, msg, len, addr_len);
-                        return ip_recv_error(sk, msg, len, addr_len);
-#if IS_ENABLED(CONFIG_IPV6)
-                } else if (family == AF_INET6) {
-                        return pingv6_ops.ipv6_recv_error(sk, msg, len,
-                                                          addr_len);
-#endif
-                }
-        }
        skb = skb_recv_datagram(sk, flags, noblock, &err);
        if (!skb)
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 39ec0c379545..38c2bcb8dd5d 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -1598,7 +1598,7 @@ int tcp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
        u32 urg_hole = 0;
        if (unlikely(flags & MSG_ERRQUEUE))
-                return ip_recv_error(sk, msg, len, addr_len);
+                return inet_recv_error(sk, msg, len, addr_len);
        if (sk_can_busy_loop(sk) && skb_queue_empty(&sk->sk_receive_queue) &&
            (sk->sk_state == TCP_ESTABLISHED))
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index a12b455928e5..d107ee246a1d 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -2315,6 +2315,35 @@ static inline bool tcp_packet_delayed(const struct tcp_sock *tp)
 /* Undo procedures. */
+/* We can clear retrans_stamp when there are no retransmissions in the
+ * window. It would seem that it is trivially available for us in
+ * tp->retrans_out, however, that kind of assumptions doesn't consider
+ * what will happen if errors occur when sending retransmission for the
+ * second time. ...It could the that such segment has only
+ * TCPCB_EVER_RETRANS set at the present time. It seems that checking
+ * the head skb is enough except for some reneging corner cases that
+ * are not worth the effort.
+ *
+ * Main reason for all this complexity is the fact that connection dying
+ * time now depends on the validity of the retrans_stamp, in particular,
+ * that successive retransmissions of a segment must not advance
+ * retrans_stamp under any conditions.
+ */
+static bool tcp_any_retrans_done(const struct sock *sk)
+{
+        const struct tcp_sock *tp = tcp_sk(sk);
+        struct sk_buff *skb;
+        if (tp->retrans_out)
+                return true;
+        skb = tcp_write_queue_head(sk);
+        if (unlikely(skb && TCP_SKB_CB(skb)->sacked & TCPCB_EVER_RETRANS))
+                return true;
+        return false;
+}
 #if FASTRETRANS_DEBUG > 1
 static void DBGUNDO(struct sock *sk, const char *msg)
 {
@@ -2410,6 +2439,8 @@ static bool tcp_try_undo_recovery(struct sock *sk)
                 * is ACKed. For Reno it is MUST to prevent false
                 * fast retransmits (RFC2582). SACK TCP is safe. */
                tcp_moderate_cwnd(tp);
+                if (!tcp_any_retrans_done(sk))
+                        tp->retrans_stamp = 0;
                return true;
        }
        tcp_set_ca_state(sk, TCP_CA_Open);
@@ -2430,35 +2461,6 @@ static bool tcp_try_undo_dsack(struct sock *sk)
        return false;
 }
-/* We can clear retrans_stamp when there are no retransmissions in the
- * window. It would seem that it is trivially available for us in
- * tp->retrans_out, however, that kind of assumptions doesn't consider
- * what will happen if errors occur when sending retransmission for the
- * second time. ...It could the that such segment has only
- * TCPCB_EVER_RETRANS set at the present time. It seems that checking
- * the head skb is enough except for some reneging corner cases that
- * are not worth the effort.
- *
- * Main reason for all this complexity is the fact that connection dying
- * time now depends on the validity of the retrans_stamp, in particular,
- * that successive retransmissions of a segment must not advance
- * retrans_stamp under any conditions.
- */
-static bool tcp_any_retrans_done(const struct sock *sk)
-{
-        const struct tcp_sock *tp = tcp_sk(sk);
-        struct sk_buff *skb;
-        if (tp->retrans_out)
-                return true;
-        skb = tcp_write_queue_head(sk);
-        if (unlikely(skb && TCP_SKB_CB(skb)->sacked & TCPCB_EVER_RETRANS))
-                return true;
-        return false;
-}
 /* Undo during loss recovery after partial ACK or using F-RTO. */
 static bool tcp_try_undo_loss(struct sock *sk, bool frto_undo)
 {
@@ -5229,7 +5231,7 @@ slow_path:
        if (len < (th->doff << 2) || tcp_checksum_complete_user(sk, skb))
                goto csum_error;
-        if (!th->ack && !th->rst)
+        if (!th->ack && !th->rst && !th->syn)
                goto discard;
        /*
@@ -5648,7 +5650,7 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb,
                        goto discard;
        }
-        if (!th->ack && !th->rst)
+        if (!th->ack && !th->rst && !th->syn)
                goto discard;
        if (!tcp_validate_incoming(sk, skb, th, 0))
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 9c7d7621466b..147be2024290 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -598,7 +598,10 @@ static void tcp_v4_send_reset(struct sock *sk, struct sk_buff *skb)
        if (th->rst)
                return;
-        if (skb_rtable(skb)->rt_type != RTN_LOCAL)
+        /* If sk not NULL, it means we did a successful lookup and incoming
+         * route had to be correct. prequeue might have dropped our dst.
+         */
+        if (!sk && skb_rtable(skb)->rt_type != RTN_LOCAL)
                return;
        /* Swap the send and the receive. */