Merge branch 'linus' into x86/timers

author: Ingo Molnar <mingo@elte.hu> 2008-06-16 05:20:57 -0400
committer: Ingo Molnar <mingo@elte.hu> 2008-06-16 05:20:57 -0400
commit: fd2c17e1777d46cff14c25ea774a4d17459d188a (patch)
tree: e8299216de8e99418195ba64bcf8f679df3a66b3 /net/ipv4
parent: 74e411cb6443d8bcb55fbe89fcc7a9ee574df91b (diff)
parent: 066519068ad2fbe98c7f45552b1f592903a9c8c8 (diff)
22 files changed, 131 insertions, 420 deletions
diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c
index 68b72a7a180..9b539fa9fe1 100644
--- a/net/ipv4/arp.c
+++ b/net/ipv4/arp.c
@@ -570,7 +570,7 @@ struct sk_buff *arp_create(int type, int ptype, __be32 dest_ip,
         *      Allocate a buffer
         */
-        skb = alloc_skb(arp_hdr_len(dev) + LL_RESERVED_SPACE(dev), GFP_ATOMIC);
+        skb = alloc_skb(arp_hdr_len(dev) + LL_ALLOCATED_SPACE(dev), GFP_ATOMIC);
        if (skb == NULL)
                return NULL;
@@ -1288,7 +1288,6 @@ static void arp_format_neigh_entry(struct seq_file *seq,
                                   struct neighbour *n)
 {
        char hbuffer[HBUFFERLEN];
-        const char hexbuf[] = "0123456789ABCDEF";
        int k, j;
        char tbuf[16];
        struct net_device *dev = n->dev;
@@ -1302,8 +1301,8 @@ static void arp_format_neigh_entry(struct seq_file *seq,
        else {
 #endif
        for (k = 0, j = 0; k < HBUFFERLEN - 3 && j < dev->addr_len; j++) {
-                hbuffer[k++] = hexbuf[(n->ha[j] >> 4) & 15];
+                hbuffer[k++] = hex_asc_hi(n->ha[j]);
-                hbuffer[k++] = hexbuf[n->ha[j] & 15];
+                hbuffer[k++] = hex_asc_lo(n->ha[j]);
                hbuffer[k++] = ':';
        }
        hbuffer[--k] = 0;
diff --git a/net/ipv4/cipso_ipv4.c b/net/ipv4/cipso_ipv4.c
index 05afb576d93..2c0e4572cc9 100644
--- a/net/ipv4/cipso_ipv4.c
+++ b/net/ipv4/cipso_ipv4.c
@@ -338,7 +338,7 @@ static int cipso_v4_cache_check(const unsigned char *key,
                return -ENOENT;
        hash = cipso_v4_map_cache_hash(key, key_len);
-        bkt = hash & (CIPSO_V4_CACHE_BUCKETBITS - 1);
+        bkt = hash & (CIPSO_V4_CACHE_BUCKETS - 1);
        spin_lock_bh(&cipso_v4_cache[bkt].lock);
        list_for_each_entry(entry, &cipso_v4_cache[bkt].list, list) {
                if (entry->hash == hash &&
@@ -417,7 +417,7 @@ int cipso_v4_cache_add(const struct sk_buff *skb,
        atomic_inc(&secattr->cache->refcount);
        entry->lsm_data = secattr->cache;
-        bkt = entry->hash & (CIPSO_V4_CACHE_BUCKETBITS - 1);
+        bkt = entry->hash & (CIPSO_V4_CACHE_BUCKETS - 1);
        spin_lock_bh(&cipso_v4_cache[bkt].lock);
        if (cipso_v4_cache[bkt].size < cipso_v4_cache_bucketsize) {
                list_add(&entry->list, &cipso_v4_cache[bkt].list);
diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c
index 6848e4760f3..79a7ef6209f 100644
--- a/net/ipv4/devinet.c
+++ b/net/ipv4/devinet.c
@@ -90,7 +90,6 @@ static const struct nla_policy ifa_ipv4_policy[IFA_MAX+1] = {
        [IFA_LOCAL]             = { .type = NLA_U32 },
        [IFA_ADDRESS]           = { .type = NLA_U32 },
        [IFA_BROADCAST]         = { .type = NLA_U32 },
-        [IFA_ANYCAST]           = { .type = NLA_U32 },
        [IFA_LABEL]             = { .type = NLA_STRING, .len = IFNAMSIZ - 1 },
 };
@@ -536,9 +535,6 @@ static struct in_ifaddr *rtm_to_ifaddr(struct net *net, struct nlmsghdr *nlh)
        if (tb[IFA_BROADCAST])
                ifa->ifa_broadcast = nla_get_be32(tb[IFA_BROADCAST]);
-        if (tb[IFA_ANYCAST])
-                ifa->ifa_anycast = nla_get_be32(tb[IFA_ANYCAST]);
        if (tb[IFA_LABEL])
                nla_strlcpy(ifa->ifa_label, tb[IFA_LABEL], IFNAMSIZ);
        else
@@ -745,7 +741,6 @@ int devinet_ioctl(struct net *net, unsigned int cmd, void __user *arg)
                                break;
                        inet_del_ifa(in_dev, ifap, 0);
                        ifa->ifa_broadcast = 0;
-                        ifa->ifa_anycast = 0;
                        ifa->ifa_scope = 0;
                }
@@ -1113,7 +1108,6 @@ static inline size_t inet_nlmsg_size(void)
               + nla_total_size(4) /* IFA_ADDRESS */
               + nla_total_size(4) /* IFA_LOCAL */
               + nla_total_size(4) /* IFA_BROADCAST */
-               + nla_total_size(4) /* IFA_ANYCAST */
               + nla_total_size(IFNAMSIZ); /* IFA_LABEL */
 }
@@ -1143,9 +1137,6 @@ static int inet_fill_ifaddr(struct sk_buff *skb, struct in_ifaddr *ifa,
        if (ifa->ifa_broadcast)
                NLA_PUT_BE32(skb, IFA_BROADCAST, ifa->ifa_broadcast);
-        if (ifa->ifa_anycast)
-                NLA_PUT_BE32(skb, IFA_ANYCAST, ifa->ifa_anycast);
        if (ifa->ifa_label[0])
                NLA_PUT_STRING(skb, IFA_LABEL, ifa->ifa_label);
diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c
index 0f1557a4ac7..0b2ac6a3d90 100644
--- a/net/ipv4/fib_frontend.c
+++ b/net/ipv4/fib_frontend.c
@@ -506,7 +506,6 @@ const struct nla_policy rtm_ipv4_policy[RTA_MAX+1] = {
        [RTA_PREFSRC]           = { .type = NLA_U32 },
        [RTA_METRICS]           = { .type = NLA_NESTED },
        [RTA_MULTIPATH]         = { .len = sizeof(struct rtnexthop) },
-        [RTA_PROTOINFO]         = { .type = NLA_U32 },
        [RTA_FLOW]              = { .type = NLA_U32 },
 };
diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c
index 3b83c34019f..0d4d72827e4 100644
--- a/net/ipv4/fib_semantics.c
+++ b/net/ipv4/fib_semantics.c
@@ -960,7 +960,10 @@ int fib_dump_info(struct sk_buff *skb, u32 pid, u32 seq, int event,
        rtm->rtm_dst_len = dst_len;
        rtm->rtm_src_len = 0;
        rtm->rtm_tos = tos;
-        rtm->rtm_table = tb_id;
+        if (tb_id < 256)
+                rtm->rtm_table = tb_id;
+        else
+                rtm->rtm_table = RT_TABLE_COMPAT;
        NLA_PUT_U32(skb, RTA_TABLE, tb_id);
        rtm->rtm_type = type;
        rtm->rtm_flags = fi->fib_flags;
diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c
index 6250f4239b6..2769dc4a4c8 100644
--- a/net/ipv4/igmp.c
+++ b/net/ipv4/igmp.c
@@ -292,7 +292,7 @@ static struct sk_buff *igmpv3_newpack(struct net_device *dev, int size)
        struct iphdr *pip;
        struct igmpv3_report *pig;
-        skb = alloc_skb(size + LL_RESERVED_SPACE(dev), GFP_ATOMIC);
+        skb = alloc_skb(size + LL_ALLOCATED_SPACE(dev), GFP_ATOMIC);
        if (skb == NULL)
                return NULL;
@@ -653,7 +653,7 @@ static int igmp_send_report(struct in_device *in_dev, struct ip_mc_list *pmc,
                return -1;
        }
-        skb=alloc_skb(IGMP_SIZE+LL_RESERVED_SPACE(dev), GFP_ATOMIC);
+        skb=alloc_skb(IGMP_SIZE+LL_ALLOCATED_SPACE(dev), GFP_ATOMIC);
        if (skb == NULL) {
                ip_rt_put(rt);
                return -1;
diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c
index 828ea211ff2..045e799d3e1 100644
--- a/net/ipv4/inet_connection_sock.c
+++ b/net/ipv4/inet_connection_sock.c
@@ -419,7 +419,8 @@ void inet_csk_reqsk_queue_prune(struct sock *parent,
        struct inet_connection_sock *icsk = inet_csk(parent);
        struct request_sock_queue *queue = &icsk->icsk_accept_queue;
        struct listen_sock *lopt = queue->listen_opt;
-        int thresh = icsk->icsk_syn_retries ? : sysctl_tcp_synack_retries;
+        int max_retries = icsk->icsk_syn_retries ? : sysctl_tcp_synack_retries;
+        int thresh = max_retries;
        unsigned long now = jiffies;
        struct request_sock **reqp, *req;
        int i, budget;
@@ -455,6 +456,9 @@ void inet_csk_reqsk_queue_prune(struct sock *parent,
                }
        }
+        if (queue->rskq_defer_accept)
+                max_retries = queue->rskq_defer_accept;
        budget = 2 * (lopt->nr_table_entries / (timeout / interval));
        i = lopt->clock_hand;
@@ -462,8 +466,9 @@ void inet_csk_reqsk_queue_prune(struct sock *parent,
                reqp=&lopt->syn_table[i];
                while ((req = *reqp) != NULL) {
                        if (time_after_eq(now, req->expires)) {
-                                if (req->retrans < thresh &&
+                                if ((req->retrans < (inet_rsk(req)->acked ? max_retries : thresh)) &&
-                                    !req->rsk_ops->rtx_syn_ack(parent, req)) {
+                                    (inet_rsk(req)->acked ||
+                                     !req->rsk_ops->rtx_syn_ack(parent, req))) {
                                        unsigned long timeo;
                                        if (req->retrans++ == 0)
diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c
index 2ada033406d..4342cba4ff8 100644
--- a/net/ipv4/ip_gre.c
+++ b/net/ipv4/ip_gre.c
@@ -313,9 +313,8 @@ static void ipgre_tunnel_uninit(struct net_device *dev)
 static void ipgre_err(struct sk_buff *skb, u32 info)
 {
-#ifndef I_WISH_WORLD_WERE_PERFECT
-/* It is not :-( All the routers (except for Linux) return only
+/* All the routers (except for Linux) return only
   8 bytes of packet payload. It means, that precise relaying of
   ICMP in the real Internet is absolutely infeasible.
@@ -398,149 +397,6 @@ static void ipgre_err(struct sk_buff *skb, u32 info)
 out:
        read_unlock(&ipgre_lock);
        return;
-#else
-        struct iphdr *iph = (struct iphdr*)dp;
-        struct iphdr *eiph;
-        __be16       *p = (__be16*)(dp+(iph->ihl<<2));
-        const int type = icmp_hdr(skb)->type;
-        const int code = icmp_hdr(skb)->code;
-        int rel_type = 0;
-        int rel_code = 0;
-        __be32 rel_info = 0;
-        __u32 n = 0;
-        __be16 flags;
-        int grehlen = (iph->ihl<<2) + 4;
-        struct sk_buff *skb2;
-        struct flowi fl;
-        struct rtable *rt;
-        if (p[1] != htons(ETH_P_IP))
-                return;
-        flags = p[0];
-        if (flags&(GRE_CSUM|GRE_KEY|GRE_SEQ|GRE_ROUTING|GRE_VERSION)) {
-                if (flags&(GRE_VERSION|GRE_ROUTING))
-                        return;
-                if (flags&GRE_CSUM)
-                        grehlen += 4;
-                if (flags&GRE_KEY)
-                        grehlen += 4;
-                if (flags&GRE_SEQ)
-                        grehlen += 4;
-        }
-        if (len < grehlen + sizeof(struct iphdr))
-                return;
-        eiph = (struct iphdr*)(dp + grehlen);
-        switch (type) {
-        default:
-                return;
-        case ICMP_PARAMETERPROB:
-                n = ntohl(icmp_hdr(skb)->un.gateway) >> 24;
-                if (n < (iph->ihl<<2))
-                        return;
-                /* So... This guy found something strange INSIDE encapsulated
-                   packet. Well, he is fool, but what can we do ?
-                 */
-                rel_type = ICMP_PARAMETERPROB;
-                n -= grehlen;
-                rel_info = htonl(n << 24);
-                break;
-        case ICMP_DEST_UNREACH:
-                switch (code) {
-                case ICMP_SR_FAILED:
-                case ICMP_PORT_UNREACH:
-                        /* Impossible event. */
-                        return;
-                case ICMP_FRAG_NEEDED:
-                        /* And it is the only really necessary thing :-) */
-                        n = ntohs(icmp_hdr(skb)->un.frag.mtu);
-                        if (n < grehlen+68)
-                                return;
-                        n -= grehlen;
-                        /* BSD 4.2 MORE DOES NOT EXIST IN NATURE. */
-                        if (n > ntohs(eiph->tot_len))
-                                return;
-                        rel_info = htonl(n);
-                        break;
-                default:
-                        /* All others are translated to HOST_UNREACH.
-                           rfc2003 contains "deep thoughts" about NET_UNREACH,
-                           I believe, it is just ether pollution. --ANK
-                         */
-                        rel_type = ICMP_DEST_UNREACH;
-                        rel_code = ICMP_HOST_UNREACH;
-                        break;
-                }
-                break;
-        case ICMP_TIME_EXCEEDED:
-                if (code != ICMP_EXC_TTL)
-                        return;
-                break;
-        }
-        /* Prepare fake skb to feed it to icmp_send */
-        skb2 = skb_clone(skb, GFP_ATOMIC);
-        if (skb2 == NULL)
-                return;
-        dst_release(skb2->dst);
-        skb2->dst = NULL;
-        skb_pull(skb2, skb->data - (u8*)eiph);
-        skb_reset_network_header(skb2);
-        /* Try to guess incoming interface */
-        memset(&fl, 0, sizeof(fl));
-        fl.fl4_dst = eiph->saddr;
-        fl.fl4_tos = RT_TOS(eiph->tos);
-        fl.proto = IPPROTO_GRE;
-        if (ip_route_output_key(dev_net(skb->dev), &rt, &fl)) {
-                kfree_skb(skb2);
-                return;
-        }
-        skb2->dev = rt->u.dst.dev;
-        /* route "incoming" packet */
-        if (rt->rt_flags&RTCF_LOCAL) {
-                ip_rt_put(rt);
-                rt = NULL;
-                fl.fl4_dst = eiph->daddr;
-                fl.fl4_src = eiph->saddr;
-                fl.fl4_tos = eiph->tos;
-                if (ip_route_output_key(dev_net(skb->dev), &rt, &fl) ||
-                    rt->u.dst.dev->type != ARPHRD_IPGRE) {
-                        ip_rt_put(rt);
-                        kfree_skb(skb2);
-                        return;
-                }
-        } else {
-                ip_rt_put(rt);
-                if (ip_route_input(skb2, eiph->daddr, eiph->saddr, eiph->tos, skb2->dev) ||
-                    skb2->dst->dev->type != ARPHRD_IPGRE) {
-                        kfree_skb(skb2);
-                        return;
-                }
-        }
-        /* change mtu on this route */
-        if (type == ICMP_DEST_UNREACH && code == ICMP_FRAG_NEEDED) {
-                if (n > dst_mtu(skb2->dst)) {
-                        kfree_skb(skb2);
-                        return;
-                }
-                skb2->dst->ops->update_pmtu(skb2->dst, n);
-        } else if (type == ICMP_TIME_EXCEEDED) {
-                struct ip_tunnel *t = netdev_priv(skb2->dev);
-                if (t->parms.iph.ttl) {
-                        rel_type = ICMP_DEST_UNREACH;
-                        rel_code = ICMP_HOST_UNREACH;
-                }
-        }
-        icmp_send(skb2, rel_type, rel_code, rel_info);
-        kfree_skb(skb2);
-#endif
 }
 static inline void ipgre_ecn_decapsulate(struct iphdr *iph, struct sk_buff *skb)
diff --git a/net/ipv4/ipconfig.c b/net/ipv4/ipconfig.c
index 89dee4346f6..ed45037ce9b 100644
--- a/net/ipv4/ipconfig.c
+++ b/net/ipv4/ipconfig.c
@@ -710,14 +710,14 @@ static void __init ic_bootp_send_if(struct ic_device *d, unsigned long jiffies_d
        struct net_device *dev = d->dev;
        struct sk_buff *skb;
        struct bootp_pkt *b;
-        int hh_len = LL_RESERVED_SPACE(dev);
        struct iphdr *h;
        /* Allocate packet */
-        skb = alloc_skb(sizeof(struct bootp_pkt) + hh_len + 15, GFP_KERNEL);
+        skb = alloc_skb(sizeof(struct bootp_pkt) + LL_ALLOCATED_SPACE(dev) + 15,
+                        GFP_KERNEL);
        if (!skb)
                return;
-        skb_reserve(skb, hh_len);
+        skb_reserve(skb, LL_RESERVED_SPACE(dev));
        b = (struct bootp_pkt *) skb_put(skb, sizeof(struct bootp_pkt));
        memset(b, 0, sizeof(struct bootp_pkt));
diff --git a/net/ipv4/ipip.c b/net/ipv4/ipip.c
index 149111f08e8..af5cb53da5c 100644
--- a/net/ipv4/ipip.c
+++ b/net/ipv4/ipip.c
@@ -278,9 +278,8 @@ static void ipip_tunnel_uninit(struct net_device *dev)
 static int ipip_err(struct sk_buff *skb, u32 info)
 {
-#ifndef I_WISH_WORLD_WERE_PERFECT
-/* It is not :-( All the routers (except for Linux) return only
+/* All the routers (except for Linux) return only
   8 bytes of packet payload. It means, that precise relaying of
   ICMP in the real Internet is absolutely infeasible.
 */
@@ -337,133 +336,6 @@ static int ipip_err(struct sk_buff *skb, u32 info)
 out:
        read_unlock(&ipip_lock);
        return err;
-#else
-        struct iphdr *iph = (struct iphdr*)dp;
-        int hlen = iph->ihl<<2;
-        struct iphdr *eiph;
-        const int type = icmp_hdr(skb)->type;
-        const int code = icmp_hdr(skb)->code;
-        int rel_type = 0;
-        int rel_code = 0;
-        __be32 rel_info = 0;
-        __u32 n = 0;
-        struct sk_buff *skb2;
-        struct flowi fl;
-        struct rtable *rt;
-        if (len < hlen + sizeof(struct iphdr))
-                return 0;
-        eiph = (struct iphdr*)(dp + hlen);
-        switch (type) {
-        default:
-                return 0;
-        case ICMP_PARAMETERPROB:
-                n = ntohl(icmp_hdr(skb)->un.gateway) >> 24;
-                if (n < hlen)
-                        return 0;
-                /* So... This guy found something strange INSIDE encapsulated
-                   packet. Well, he is fool, but what can we do ?
-                 */
-                rel_type = ICMP_PARAMETERPROB;
-                rel_info = htonl((n - hlen) << 24);
-                break;
-        case ICMP_DEST_UNREACH:
-                switch (code) {
-                case ICMP_SR_FAILED:
-                case ICMP_PORT_UNREACH:
-                        /* Impossible event. */
-                        return 0;
-                case ICMP_FRAG_NEEDED:
-                        /* And it is the only really necessary thing :-) */
-                        n = ntohs(icmp_hdr(skb)->un.frag.mtu);
-                        if (n < hlen+68)
-                                return 0;
-                        n -= hlen;
-                        /* BSD 4.2 MORE DOES NOT EXIST IN NATURE. */
-                        if (n > ntohs(eiph->tot_len))
-                                return 0;
-                        rel_info = htonl(n);
-                        break;
-                default:
-                        /* All others are translated to HOST_UNREACH.
-                           rfc2003 contains "deep thoughts" about NET_UNREACH,
-                           I believe, it is just ether pollution. --ANK
-                         */
-                        rel_type = ICMP_DEST_UNREACH;
-                        rel_code = ICMP_HOST_UNREACH;
-                        break;
-                }
-                break;
-        case ICMP_TIME_EXCEEDED:
-                if (code != ICMP_EXC_TTL)
-                        return 0;
-                break;
-        }
-        /* Prepare fake skb to feed it to icmp_send */
-        skb2 = skb_clone(skb, GFP_ATOMIC);
-        if (skb2 == NULL)
-                return 0;
-        dst_release(skb2->dst);
-        skb2->dst = NULL;
-        skb_pull(skb2, skb->data - (u8*)eiph);
-        skb_reset_network_header(skb2);
-        /* Try to guess incoming interface */
-        memset(&fl, 0, sizeof(fl));
-        fl.fl4_daddr = eiph->saddr;
-        fl.fl4_tos = RT_TOS(eiph->tos);
-        fl.proto = IPPROTO_IPIP;
-        if (ip_route_output_key(dev_net(skb->dev), &rt, &key)) {
-                kfree_skb(skb2);
-                return 0;
-        }
-        skb2->dev = rt->u.dst.dev;
-        /* route "incoming" packet */
-        if (rt->rt_flags&RTCF_LOCAL) {
-                ip_rt_put(rt);
-                rt = NULL;
-                fl.fl4_daddr = eiph->daddr;
-                fl.fl4_src = eiph->saddr;
-                fl.fl4_tos = eiph->tos;
-                if (ip_route_output_key(dev_net(skb->dev), &rt, &fl) ||
-                    rt->u.dst.dev->type != ARPHRD_TUNNEL) {
-                        ip_rt_put(rt);
-                        kfree_skb(skb2);
-                        return 0;
-                }
-        } else {
-                ip_rt_put(rt);
-                if (ip_route_input(skb2, eiph->daddr, eiph->saddr, eiph->tos, skb2->dev) ||
-                    skb2->dst->dev->type != ARPHRD_TUNNEL) {
-                        kfree_skb(skb2);
-                        return 0;
-                }
-        }
-        /* change mtu on this route */
-        if (type == ICMP_DEST_UNREACH && code == ICMP_FRAG_NEEDED) {
-                if (n > dst_mtu(skb2->dst)) {
-                        kfree_skb(skb2);
-                        return 0;
-                }
-                skb2->dst->ops->update_pmtu(skb2->dst, n);
-        } else if (type == ICMP_TIME_EXCEEDED) {
-                struct ip_tunnel *t = netdev_priv(skb2->dev);
-                if (t->parms.iph.ttl) {
-                        rel_type = ICMP_DEST_UNREACH;
-                        rel_code = ICMP_HOST_UNREACH;
-                }
-        }
-        icmp_send(skb2, rel_type, rel_code, rel_info);
-        kfree_skb(skb2);
-        return 0;
-#endif
 }
 static inline void ipip_ecn_decapsulate(const struct iphdr *outer_iph,
diff --git a/net/ipv4/netfilter/nf_nat_snmp_basic.c b/net/ipv4/netfilter/nf_nat_snmp_basic.c
index 5daefad3d19..7750c97fde7 100644
--- a/net/ipv4/netfilter/nf_nat_snmp_basic.c
+++ b/net/ipv4/netfilter/nf_nat_snmp_basic.c
@@ -232,6 +232,11 @@ static unsigned char asn1_length_decode(struct asn1_ctx *ctx,
                        }
                }
        }
+        /* don't trust len bigger than ctx buffer */
+        if (*len > ctx->end - ctx->pointer)
+                return 0;
        return 1;
 }
@@ -250,6 +255,10 @@ static unsigned char asn1_header_decode(struct asn1_ctx *ctx,
        if (!asn1_length_decode(ctx, &def, &len))
                return 0;
+        /* primitive shall be definite, indefinite shall be constructed */
+        if (*con == ASN1_PRI && !def)
+                return 0;
        if (def)
                *eoc = ctx->pointer + len;
        else
@@ -434,6 +443,11 @@ static unsigned char asn1_oid_decode(struct asn1_ctx *ctx,
        unsigned long *optr;
        size = eoc - ctx->pointer + 1;
+        /* first subid actually encodes first two subids */
+        if (size < 2 || size > ULONG_MAX/sizeof(unsigned long))
+                return 0;
        *oid = kmalloc(size * sizeof(unsigned long), GFP_ATOMIC);
        if (*oid == NULL) {
                if (net_ratelimit())
diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c
index 11d7f753a82..e7e091d365f 100644
--- a/net/ipv4/raw.c
+++ b/net/ipv4/raw.c
@@ -322,7 +322,6 @@ static int raw_send_hdrinc(struct sock *sk, void *from, size_t length,
                        unsigned int flags)
 {
        struct inet_sock *inet = inet_sk(sk);
-        int hh_len;
        struct iphdr *iph;
        struct sk_buff *skb;
        unsigned int iphlen;
@@ -336,13 +335,12 @@ static int raw_send_hdrinc(struct sock *sk, void *from, size_t length,
        if (flags&MSG_PROBE)
                goto out;
-        hh_len = LL_RESERVED_SPACE(rt->u.dst.dev);
+        skb = sock_alloc_send_skb(sk,
+                                  length + LL_ALLOCATED_SPACE(rt->u.dst.dev) + 15,
-        skb = sock_alloc_send_skb(sk, length+hh_len+15,
+                                  flags & MSG_DONTWAIT, &err);
-                                  flags&MSG_DONTWAIT, &err);
        if (skb == NULL)
                goto error;
-        skb_reserve(skb, hh_len);
+        skb_reserve(skb, LL_RESERVED_SPACE(rt->u.dst.dev));
        skb->priority = sk->sk_priority;
        skb->mark = sk->sk_mark;
@@ -610,6 +608,14 @@ static void raw_close(struct sock *sk, long timeout)
        sk_common_release(sk);
 }
+static int raw_destroy(struct sock *sk)
+{
+        lock_sock(sk);
+        ip_flush_pending_frames(sk);
+        release_sock(sk);
+        return 0;
+}
 /* This gets rid of all the nasties in af_inet. -DaveM */
 static int raw_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len)
 {
@@ -822,6 +828,7 @@ struct proto raw_prot = {
        .name              = "RAW",
        .owner             = THIS_MODULE,
        .close             = raw_close,
+        .destroy           = raw_destroy,
        .connect           = ip4_datagram_connect,
        .disconnect        = udp_disconnect,
        .ioctl             = raw_ioctl,
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index 92f90ae46f4..96be336064f 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -160,7 +160,7 @@ static struct dst_ops ipv4_dst_ops = {
        .negative_advice =      ipv4_negative_advice,
        .link_failure =         ipv4_link_failure,
        .update_pmtu =          ip_rt_update_pmtu,
-        .local_out =            ip_local_out,
+        .local_out =            __ip_local_out,
        .entry_size =           sizeof(struct rtable),
        .entries =              ATOMIC_INIT(0),
 };
@@ -1792,7 +1792,7 @@ static int __mkroute_input(struct sk_buff *skb,
        if (err)
                flags |= RTCF_DIRECTSRC;
-        if (out_dev == in_dev && err && !(flags & RTCF_MASQ) &&
+        if (out_dev == in_dev && err &&
            (IN_DEV_SHARED_MEDIA(out_dev) ||
             inet_addr_onlink(out_dev, saddr, FIB_RES_GW(*res))))
                flags |= RTCF_DOREDIRECT;
diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c
index 73ba98921d6..d182a2a2629 100644
--- a/net/ipv4/syncookies.c
+++ b/net/ipv4/syncookies.c
@@ -285,7 +285,7 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb,
                cookie_check_timestamp(&tcp_opt);
        ret = NULL;
-        req = reqsk_alloc(&tcp_request_sock_ops); /* for safety */
+        req = inet_reqsk_alloc(&tcp_request_sock_ops); /* for safety */
        if (!req)
                goto out;
@@ -301,7 +301,6 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb,
        ireq->rmt_port          = th->source;
        ireq->loc_addr          = ip_hdr(skb)->daddr;
        ireq->rmt_addr          = ip_hdr(skb)->saddr;
-        ireq->opt               = NULL;
        ireq->snd_wscale        = tcp_opt.snd_wscale;
        ireq->rcv_wscale        = tcp_opt.rcv_wscale;
        ireq->sack_ok           = tcp_opt.sack_ok;
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index f8865313862..fc54a48fde1 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -1227,7 +1227,14 @@ int tcp_read_sock(struct sock *sk, read_descriptor_t *desc,
                                copied += used;
                                offset += used;
                        }
-                        if (offset != skb->len)
+                        /*
+                         * If recv_actor drops the lock (e.g. TCP splice
+                         * receive) the skb pointer might be invalid when
+                         * getting here: tcp_collapse might have deleted it
+                         * while aggregating skbs from the socket queue.
+                         */
+                        skb = tcp_recv_skb(sk, seq-1, &offset);
+                        if (!skb || (offset+1 != skb->len))
                                break;
                }
                if (tcp_hdr(skb)->fin) {
@@ -2105,12 +2112,15 @@ static int do_tcp_setsockopt(struct sock *sk, int level,
                break;
        case TCP_DEFER_ACCEPT:
-                if (val < 0) {
+                icsk->icsk_accept_queue.rskq_defer_accept = 0;
-                        err = -EINVAL;
+                if (val > 0) {
-                } else {
+                        /* Translate value in seconds to number of
-                        if (val > MAX_TCP_ACCEPT_DEFERRED)
+                         * retransmits */
-                                val = MAX_TCP_ACCEPT_DEFERRED;
+                        while (icsk->icsk_accept_queue.rskq_defer_accept < 32 &&
-                        icsk->icsk_accept_queue.rskq_defer_accept = val;
+                               val > ((TCP_TIMEOUT_INIT / HZ) <<
+                                       icsk->icsk_accept_queue.rskq_defer_accept))
+                                icsk->icsk_accept_queue.rskq_defer_accept++;
+                        icsk->icsk_accept_queue.rskq_defer_accept++;
                }
                break;
@@ -2292,7 +2302,8 @@ static int do_tcp_getsockopt(struct sock *sk, int level,
                        val = (val ? : sysctl_tcp_fin_timeout) / HZ;
                break;
        case TCP_DEFER_ACCEPT:
-                val = icsk->icsk_accept_queue.rskq_defer_accept;
+                val = !icsk->icsk_accept_queue.rskq_defer_accept ? 0 :
+                        ((TCP_TIMEOUT_INIT / HZ) << (icsk->icsk_accept_queue.rskq_defer_accept - 1));
                break;
        case TCP_WINDOW_CLAMP:
                val = tp->window_clamp;
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 26c936930e9..cad73b7dfef 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -1392,9 +1392,9 @@ static struct sk_buff *tcp_maybe_skipping_dsack(struct sk_buff *skb,
        if (before(next_dup->start_seq, skip_to_seq)) {
                skb = tcp_sacktag_skip(skb, sk, next_dup->start_seq, fack_count);
-                tcp_sacktag_walk(skb, sk, NULL,
+                skb = tcp_sacktag_walk(skb, sk, NULL,
-                                 next_dup->start_seq, next_dup->end_seq,
+                                     next_dup->start_seq, next_dup->end_seq,
-                                 1, fack_count, reord, flag);
+                                     1, fack_count, reord, flag);
        }
        return skb;
@@ -1842,9 +1842,16 @@ static void tcp_enter_frto_loss(struct sock *sk, int allowed_segments, int flag)
                        TCP_SKB_CB(skb)->sacked &= ~TCPCB_SACKED_RETRANS;
                }
-                /* Don't lost mark skbs that were fwd transmitted after RTO */
+                /* Marking forward transmissions that were made after RTO lost
-                if (!(TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED) &&
+                 * can cause unnecessary retransmissions in some scenarios,
-                    !after(TCP_SKB_CB(skb)->end_seq, tp->frto_highmark)) {
+                 * SACK blocks will mitigate that in some but not in all cases.
+                 * We used to not mark them but it was causing break-ups with
+                 * receivers that do only in-order receival.
+                 *
+                 * TODO: we could detect presence of such receiver and select
+                 * different behavior per flow.
+                 */
+                if (!(TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED)) {
                        TCP_SKB_CB(skb)->sacked |= TCPCB_LOST;
                        tp->lost_out += tcp_skb_pcount(skb);
                }
@@ -1860,7 +1867,7 @@ static void tcp_enter_frto_loss(struct sock *sk, int allowed_segments, int flag)
        tp->reordering = min_t(unsigned int, tp->reordering,
                               sysctl_tcp_reordering);
        tcp_set_ca_state(sk, TCP_CA_Loss);
-        tp->high_seq = tp->frto_highmark;
+        tp->high_seq = tp->snd_nxt;
        TCP_ECN_queue_cwr(tp);
        tcp_clear_retrans_hints_partial(tp);
@@ -2476,28 +2483,34 @@ static inline void tcp_complete_cwr(struct sock *sk)
        tcp_ca_event(sk, CA_EVENT_COMPLETE_CWR);
 }
+static void tcp_try_keep_open(struct sock *sk)
+{
+        struct tcp_sock *tp = tcp_sk(sk);
+        int state = TCP_CA_Open;
+        if (tcp_left_out(tp) || tp->retrans_out || tp->undo_marker)
+                state = TCP_CA_Disorder;
+        if (inet_csk(sk)->icsk_ca_state != state) {
+                tcp_set_ca_state(sk, state);
+                tp->high_seq = tp->snd_nxt;
+        }
+}
 static void tcp_try_to_open(struct sock *sk, int flag)
 {
        struct tcp_sock *tp = tcp_sk(sk);
        tcp_verify_left_out(tp);
-        if (tp->retrans_out == 0)
+        if (!tp->frto_counter && tp->retrans_out == 0)
                tp->retrans_stamp = 0;
        if (flag & FLAG_ECE)
                tcp_enter_cwr(sk, 1);
        if (inet_csk(sk)->icsk_ca_state != TCP_CA_CWR) {
-                int state = TCP_CA_Open;
+                tcp_try_keep_open(sk);
-                if (tcp_left_out(tp) || tp->retrans_out || tp->undo_marker)
-                        state = TCP_CA_Disorder;
-                if (inet_csk(sk)->icsk_ca_state != state) {
-                        tcp_set_ca_state(sk, state);
-                        tp->high_seq = tp->snd_nxt;
-                }
                tcp_moderate_cwnd(tp);
        } else {
                tcp_cwnd_down(sk, flag);
@@ -3303,8 +3316,11 @@ no_queue:
        return 1;
 old_ack:
-        if (TCP_SKB_CB(skb)->sacked)
+        if (TCP_SKB_CB(skb)->sacked) {
                tcp_sacktag_write_queue(sk, skb, prior_snd_una);
+                if (icsk->icsk_ca_state == TCP_CA_Open)
+                        tcp_try_keep_open(sk);
+        }
 uninteresting_ack:
        SOCK_DEBUG(sk, "Ack %u out of %u:%u\n", ack, tp->snd_una, tp->snd_nxt);
@@ -4525,49 +4541,6 @@ static void tcp_urg(struct sock *sk, struct sk_buff *skb, struct tcphdr *th)
        }
 }
-static int tcp_defer_accept_check(struct sock *sk)
-{
-        struct tcp_sock *tp = tcp_sk(sk);
-        if (tp->defer_tcp_accept.request) {
-                int queued_data =  tp->rcv_nxt - tp->copied_seq;
-                int hasfin =  !skb_queue_empty(&sk->sk_receive_queue) ?
-                        tcp_hdr((struct sk_buff *)
-                                sk->sk_receive_queue.prev)->fin : 0;
-                if (queued_data && hasfin)
-                        queued_data--;
-                if (queued_data &&
-                    tp->defer_tcp_accept.listen_sk->sk_state == TCP_LISTEN) {
-                        if (sock_flag(sk, SOCK_KEEPOPEN)) {
-                                inet_csk_reset_keepalive_timer(sk,
-                                                               keepalive_time_when(tp));
-                        } else {
-                                inet_csk_delete_keepalive_timer(sk);
-                        }
-                        inet_csk_reqsk_queue_add(
-                                tp->defer_tcp_accept.listen_sk,
-                                tp->defer_tcp_accept.request,
-                                sk);
-                        tp->defer_tcp_accept.listen_sk->sk_data_ready(
-                                tp->defer_tcp_accept.listen_sk, 0);
-                        sock_put(tp->defer_tcp_accept.listen_sk);
-                        sock_put(sk);
-                        tp->defer_tcp_accept.listen_sk = NULL;
-                        tp->defer_tcp_accept.request = NULL;
-                } else if (hasfin ||
-                           tp->defer_tcp_accept.listen_sk->sk_state != TCP_LISTEN) {
-                        tcp_reset(sk);
-                        return -1;
-                }
-        }
-        return 0;
-}
 static int tcp_copy_to_iovec(struct sock *sk, struct sk_buff *skb, int hlen)
 {
        struct tcp_sock *tp = tcp_sk(sk);
@@ -4928,8 +4901,6 @@ step5:
        tcp_data_snd_check(sk);
        tcp_ack_snd_check(sk);
-        tcp_defer_accept_check(sk);
        return 0;
 csum_error:
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index cd601a866c2..97a230026e1 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -1285,7 +1285,7 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
        if (sk_acceptq_is_full(sk) && inet_csk_reqsk_queue_young(sk) > 1)
                goto drop;
-        req = reqsk_alloc(&tcp_request_sock_ops);
+        req = inet_reqsk_alloc(&tcp_request_sock_ops);
        if (!req)
                goto drop;
@@ -1918,14 +1918,6 @@ int tcp_v4_destroy_sock(struct sock *sk)
                sk->sk_sndmsg_page = NULL;
        }
-        if (tp->defer_tcp_accept.request) {
-                reqsk_free(tp->defer_tcp_accept.request);
-                sock_put(tp->defer_tcp_accept.listen_sk);
-                sock_put(sk);
-                tp->defer_tcp_accept.listen_sk = NULL;
-                tp->defer_tcp_accept.request = NULL;
-        }
        atomic_dec(&tcp_sockets_allocated);
        return 0;
diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c
index 019c8c16e5c..8245247a6ce 100644
--- a/net/ipv4/tcp_minisocks.c
+++ b/net/ipv4/tcp_minisocks.c
@@ -571,8 +571,10 @@ struct sock *tcp_check_req(struct sock *sk,struct sk_buff *skb,
           does sequence test, SYN is truncated, and thus we consider
           it a bare ACK.
-           Both ends (listening sockets) accept the new incoming
+           If icsk->icsk_accept_queue.rskq_defer_accept, we silently drop this
-           connection and try to talk to each other. 8-)
+           bare ACK.  Otherwise, we create an established connection.  Both
+           ends (listening sockets) accept the new incoming connection and try
+           to talk to each other. 8-)
           Note: This case is both harmless, and rare.  Possibility is about the
           same as us discovering intelligent life on another plant tomorrow.
@@ -640,6 +642,13 @@ struct sock *tcp_check_req(struct sock *sk,struct sk_buff *skb,
                if (!(flg & TCP_FLAG_ACK))
                        return NULL;
+                /* If TCP_DEFER_ACCEPT is set, drop bare ACK. */
+                if (inet_csk(sk)->icsk_accept_queue.rskq_defer_accept &&
+                    TCP_SKB_CB(skb)->end_seq == tcp_rsk(req)->rcv_isn + 1) {
+                        inet_rsk(req)->acked = 1;
+                        return NULL;
+                }
                /* OK, ACK is valid, create big socket and
                 * feed this segment to it. It will repeat all
                 * the tests. THIS SEGMENT MUST MOVE SOCKET TO
@@ -678,24 +687,7 @@ struct sock *tcp_check_req(struct sock *sk,struct sk_buff *skb,
                inet_csk_reqsk_queue_unlink(sk, req, prev);
                inet_csk_reqsk_queue_removed(sk, req);
-                if (inet_csk(sk)->icsk_accept_queue.rskq_defer_accept &&
+                inet_csk_reqsk_queue_add(sk, req, child);
-                    TCP_SKB_CB(skb)->end_seq == tcp_rsk(req)->rcv_isn + 1) {
-                        /* the accept queue handling is done is est recv slow
-                         * path so lets make sure to start there
-                         */
-                        tcp_sk(child)->pred_flags = 0;
-                        sock_hold(sk);
-                        sock_hold(child);
-                        tcp_sk(child)->defer_tcp_accept.listen_sk = sk;
-                        tcp_sk(child)->defer_tcp_accept.request = req;
-                        inet_csk_reset_keepalive_timer(child,
-                                                       inet_csk(sk)->icsk_accept_queue.rskq_defer_accept * HZ);
-                } else {
-                        inet_csk_reqsk_queue_add(sk, req, child);
-                }
                return child;
        listen_overflow:
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index debf2358160..ad993ecb481 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -1836,7 +1836,7 @@ int tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb)
 {
        struct tcp_sock *tp = tcp_sk(sk);
        struct inet_connection_sock *icsk = inet_csk(sk);
-        unsigned int cur_mss = tcp_current_mss(sk, 0);
+        unsigned int cur_mss;
        int err;
        /* Inconslusive MTU probe */
@@ -1858,6 +1858,11 @@ int tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb)
                        return -ENOMEM;
        }
+        if (inet_csk(sk)->icsk_af_ops->rebuild_header(sk))
+                return -EHOSTUNREACH; /* Routing failure or similar. */
+        cur_mss = tcp_current_mss(sk, 0);
        /* If receiver has shrunk his window, and skb is out of
         * new window, do not retransmit it. The exception is the
         * case, when window is shrunk to zero. In this case
@@ -1884,9 +1889,6 @@ int tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb)
            (sysctl_tcp_retrans_collapse != 0))
                tcp_retrans_try_collapse(sk, skb, cur_mss);
-        if (inet_csk(sk)->icsk_af_ops->rebuild_header(sk))
-                return -EHOSTUNREACH; /* Routing failure or similar. */
        /* Some Solaris stacks overoptimize and ignore the FIN on a
         * retransmit when old data is attached.  So strip it off
         * since it is cheap to do so and saves bytes on the network.
@@ -2129,6 +2131,8 @@ void tcp_send_active_reset(struct sock *sk, gfp_t priority)
        TCP_SKB_CB(skb)->when = tcp_time_stamp;
        if (tcp_transmit_skb(sk, skb, 0, priority))
                NET_INC_STATS(LINUX_MIB_TCPABORTFAILED);
+        TCP_INC_STATS(TCP_MIB_OUTRSTS);
 }
 /* WARNING: This routine must only be called when we have already sent
diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c
index 4de68cf5f2a..63ed9d6830e 100644
--- a/net/ipv4/tcp_timer.c
+++ b/net/ipv4/tcp_timer.c
@@ -489,11 +489,6 @@ static void tcp_keepalive_timer (unsigned long data)
                goto death;
        }
-        if (tp->defer_tcp_accept.request && sk->sk_state == TCP_ESTABLISHED) {
-                tcp_send_active_reset(sk, GFP_ATOMIC);
-                goto death;
-        }
        if (!sock_flag(sk, SOCK_KEEPOPEN) || sk->sk_state == TCP_CLOSE)
                goto out;
diff --git a/net/ipv4/tunnel4.c b/net/ipv4/tunnel4.c
index d3b709a6f26..cb1f0e83830 100644
--- a/net/ipv4/tunnel4.c
+++ b/net/ipv4/tunnel4.c
@@ -97,7 +97,7 @@ static int tunnel64_rcv(struct sk_buff *skb)
 {
        struct xfrm_tunnel *handler;
-        if (!pskb_may_pull(skb, sizeof(struct iphdr)))
+        if (!pskb_may_pull(skb, sizeof(struct ipv6hdr)))
                goto drop;
        for (handler = tunnel64_handlers; handler; handler = handler->next)
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index db1cb7c96d6..56fcda3694b 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -420,7 +420,7 @@ void udp_err(struct sk_buff *skb, u32 info)
 /*
 * Throw away all pending data and cancel the corking. Socket is locked.
 */
-static void udp_flush_pending_frames(struct sock *sk)
+void udp_flush_pending_frames(struct sock *sk)
 {
        struct udp_sock *up = udp_sk(sk);
@@ -430,6 +430,7 @@ static void udp_flush_pending_frames(struct sock *sk)
                ip_flush_pending_frames(sk);
        }
 }
+EXPORT_SYMBOL(udp_flush_pending_frames);
 /**
 *      udp4_hwcsum_outgoing  -  handle outgoing HW checksumming
author	Ingo Molnar <mingo@elte.hu>	2008-06-16 05:20:57 -0400
committer	Ingo Molnar <mingo@elte.hu>	2008-06-16 05:20:57 -0400
commit	fd2c17e1777d46cff14c25ea774a4d17459d188a (patch)
tree	e8299216de8e99418195ba64bcf8f679df3a66b3 /net/ipv4
parent	74e411cb6443d8bcb55fbe89fcc7a9ee574df91b (diff)
parent	066519068ad2fbe98c7f45552b1f592903a9c8c8 (diff)