aboutsummaryrefslogtreecommitdiffstats
path: root/net
diff options
context:
space:
mode:
Diffstat (limited to 'net')
-rw-r--r--net/core/Makefile3
-rw-r--r--net/core/neighbour.c333
-rw-r--r--net/core/request_sock.c64
-rw-r--r--net/core/rtnetlink.c33
-rw-r--r--net/core/sock.c35
-rw-r--r--net/decnet/dn_dev.c9
-rw-r--r--net/decnet/dn_neigh.c1
-rw-r--r--net/decnet/dn_route.c11
-rw-r--r--net/decnet/dn_rules.c7
-rw-r--r--net/decnet/dn_table.c8
-rw-r--r--net/ipv4/devinet.c9
-rw-r--r--net/ipv4/fib_hash.c3
-rw-r--r--net/ipv4/fib_lookup.h3
-rw-r--r--net/ipv4/fib_rules.c7
-rw-r--r--net/ipv4/fib_semantics.c10
-rw-r--r--net/ipv4/ip_sockglue.c6
-rw-r--r--net/ipv4/raw.c22
-rw-r--r--net/ipv4/route.c11
-rw-r--r--net/ipv4/syncookies.c49
-rw-r--r--net/ipv4/tcp.c84
-rw-r--r--net/ipv4/tcp_diag.c37
-rw-r--r--net/ipv4/tcp_ipv4.c172
-rw-r--r--net/ipv4/tcp_minisocks.c68
-rw-r--r--net/ipv4/tcp_output.c27
-rw-r--r--net/ipv4/tcp_timer.c18
-rw-r--r--net/ipv6/addrconf.c57
-rw-r--r--net/ipv6/datagram.c6
-rw-r--r--net/ipv6/raw.c8
-rw-r--r--net/ipv6/route.c11
-rw-r--r--net/ipv6/tcp_ipv6.c148
-rw-r--r--net/ipv6/udp.c4
-rw-r--r--net/key/af_key.c369
-rw-r--r--net/netlink/af_netlink.c8
-rw-r--r--net/sched/act_api.c11
-rw-r--r--net/sched/cls_api.c5
-rw-r--r--net/sched/sch_api.c10
-rw-r--r--net/sched/sch_dsmark.c357
-rw-r--r--net/sched/sch_fifo.c152
-rw-r--r--net/sched/sch_generic.c84
-rw-r--r--net/sctp/socket.c8
-rw-r--r--net/xfrm/xfrm_policy.c8
-rw-r--r--net/xfrm/xfrm_state.c81
-rw-r--r--net/xfrm/xfrm_user.c288
43 files changed, 1707 insertions, 938 deletions
diff --git a/net/core/Makefile b/net/core/Makefile
index 81f03243fe2f..5e0c56b7f607 100644
--- a/net/core/Makefile
+++ b/net/core/Makefile
@@ -2,7 +2,8 @@
2# Makefile for the Linux networking core. 2# Makefile for the Linux networking core.
3# 3#
4 4
5obj-y := sock.o skbuff.o iovec.o datagram.o stream.o scm.o gen_stats.o gen_estimator.o 5obj-y := sock.o request_sock.o skbuff.o iovec.o datagram.o stream.o scm.o \
6 gen_stats.o gen_estimator.o
6 7
7obj-$(CONFIG_SYSCTL) += sysctl_net_core.o 8obj-$(CONFIG_SYSCTL) += sysctl_net_core.o
8 9
diff --git a/net/core/neighbour.c b/net/core/neighbour.c
index 43bdc521e20d..f6bdcad47da6 100644
--- a/net/core/neighbour.c
+++ b/net/core/neighbour.c
@@ -1276,9 +1276,14 @@ struct neigh_parms *neigh_parms_alloc(struct net_device *dev,
1276 INIT_RCU_HEAD(&p->rcu_head); 1276 INIT_RCU_HEAD(&p->rcu_head);
1277 p->reachable_time = 1277 p->reachable_time =
1278 neigh_rand_reach_time(p->base_reachable_time); 1278 neigh_rand_reach_time(p->base_reachable_time);
1279 if (dev && dev->neigh_setup && dev->neigh_setup(dev, p)) { 1279 if (dev) {
1280 kfree(p); 1280 if (dev->neigh_setup && dev->neigh_setup(dev, p)) {
1281 return NULL; 1281 kfree(p);
1282 return NULL;
1283 }
1284
1285 dev_hold(dev);
1286 p->dev = dev;
1282 } 1287 }
1283 p->sysctl_table = NULL; 1288 p->sysctl_table = NULL;
1284 write_lock_bh(&tbl->lock); 1289 write_lock_bh(&tbl->lock);
@@ -1309,6 +1314,8 @@ void neigh_parms_release(struct neigh_table *tbl, struct neigh_parms *parms)
1309 *p = parms->next; 1314 *p = parms->next;
1310 parms->dead = 1; 1315 parms->dead = 1;
1311 write_unlock_bh(&tbl->lock); 1316 write_unlock_bh(&tbl->lock);
1317 if (parms->dev)
1318 dev_put(parms->dev);
1312 call_rcu(&parms->rcu_head, neigh_rcu_free_parms); 1319 call_rcu(&parms->rcu_head, neigh_rcu_free_parms);
1313 return; 1320 return;
1314 } 1321 }
@@ -1546,20 +1553,323 @@ out:
1546 return err; 1553 return err;
1547} 1554}
1548 1555
1556static int neightbl_fill_parms(struct sk_buff *skb, struct neigh_parms *parms)
1557{
1558 struct rtattr *nest = NULL;
1559
1560 nest = RTA_NEST(skb, NDTA_PARMS);
1561
1562 if (parms->dev)
1563 RTA_PUT_U32(skb, NDTPA_IFINDEX, parms->dev->ifindex);
1564
1565 RTA_PUT_U32(skb, NDTPA_REFCNT, atomic_read(&parms->refcnt));
1566 RTA_PUT_U32(skb, NDTPA_QUEUE_LEN, parms->queue_len);
1567 RTA_PUT_U32(skb, NDTPA_PROXY_QLEN, parms->proxy_qlen);
1568 RTA_PUT_U32(skb, NDTPA_APP_PROBES, parms->app_probes);
1569 RTA_PUT_U32(skb, NDTPA_UCAST_PROBES, parms->ucast_probes);
1570 RTA_PUT_U32(skb, NDTPA_MCAST_PROBES, parms->mcast_probes);
1571 RTA_PUT_MSECS(skb, NDTPA_REACHABLE_TIME, parms->reachable_time);
1572 RTA_PUT_MSECS(skb, NDTPA_BASE_REACHABLE_TIME,
1573 parms->base_reachable_time);
1574 RTA_PUT_MSECS(skb, NDTPA_GC_STALETIME, parms->gc_staletime);
1575 RTA_PUT_MSECS(skb, NDTPA_DELAY_PROBE_TIME, parms->delay_probe_time);
1576 RTA_PUT_MSECS(skb, NDTPA_RETRANS_TIME, parms->retrans_time);
1577 RTA_PUT_MSECS(skb, NDTPA_ANYCAST_DELAY, parms->anycast_delay);
1578 RTA_PUT_MSECS(skb, NDTPA_PROXY_DELAY, parms->proxy_delay);
1579 RTA_PUT_MSECS(skb, NDTPA_LOCKTIME, parms->locktime);
1580
1581 return RTA_NEST_END(skb, nest);
1582
1583rtattr_failure:
1584 return RTA_NEST_CANCEL(skb, nest);
1585}
1586
1587static int neightbl_fill_info(struct neigh_table *tbl, struct sk_buff *skb,
1588 struct netlink_callback *cb)
1589{
1590 struct nlmsghdr *nlh;
1591 struct ndtmsg *ndtmsg;
1592
1593 nlh = NLMSG_NEW_ANSWER(skb, cb, RTM_NEWNEIGHTBL, sizeof(struct ndtmsg),
1594 NLM_F_MULTI);
1595
1596 ndtmsg = NLMSG_DATA(nlh);
1597
1598 read_lock_bh(&tbl->lock);
1599 ndtmsg->ndtm_family = tbl->family;
1600
1601 RTA_PUT_STRING(skb, NDTA_NAME, tbl->id);
1602 RTA_PUT_MSECS(skb, NDTA_GC_INTERVAL, tbl->gc_interval);
1603 RTA_PUT_U32(skb, NDTA_THRESH1, tbl->gc_thresh1);
1604 RTA_PUT_U32(skb, NDTA_THRESH2, tbl->gc_thresh2);
1605 RTA_PUT_U32(skb, NDTA_THRESH3, tbl->gc_thresh3);
1606
1607 {
1608 unsigned long now = jiffies;
1609 unsigned int flush_delta = now - tbl->last_flush;
1610 unsigned int rand_delta = now - tbl->last_rand;
1611
1612 struct ndt_config ndc = {
1613 .ndtc_key_len = tbl->key_len,
1614 .ndtc_entry_size = tbl->entry_size,
1615 .ndtc_entries = atomic_read(&tbl->entries),
1616 .ndtc_last_flush = jiffies_to_msecs(flush_delta),
1617 .ndtc_last_rand = jiffies_to_msecs(rand_delta),
1618 .ndtc_hash_rnd = tbl->hash_rnd,
1619 .ndtc_hash_mask = tbl->hash_mask,
1620 .ndtc_hash_chain_gc = tbl->hash_chain_gc,
1621 .ndtc_proxy_qlen = tbl->proxy_queue.qlen,
1622 };
1623
1624 RTA_PUT(skb, NDTA_CONFIG, sizeof(ndc), &ndc);
1625 }
1626
1627 {
1628 int cpu;
1629 struct ndt_stats ndst;
1630
1631 memset(&ndst, 0, sizeof(ndst));
1632
1633 for (cpu = 0; cpu < NR_CPUS; cpu++) {
1634 struct neigh_statistics *st;
1635
1636 if (!cpu_possible(cpu))
1637 continue;
1638
1639 st = per_cpu_ptr(tbl->stats, cpu);
1640 ndst.ndts_allocs += st->allocs;
1641 ndst.ndts_destroys += st->destroys;
1642 ndst.ndts_hash_grows += st->hash_grows;
1643 ndst.ndts_res_failed += st->res_failed;
1644 ndst.ndts_lookups += st->lookups;
1645 ndst.ndts_hits += st->hits;
1646 ndst.ndts_rcv_probes_mcast += st->rcv_probes_mcast;
1647 ndst.ndts_rcv_probes_ucast += st->rcv_probes_ucast;
1648 ndst.ndts_periodic_gc_runs += st->periodic_gc_runs;
1649 ndst.ndts_forced_gc_runs += st->forced_gc_runs;
1650 }
1651
1652 RTA_PUT(skb, NDTA_STATS, sizeof(ndst), &ndst);
1653 }
1654
1655 BUG_ON(tbl->parms.dev);
1656 if (neightbl_fill_parms(skb, &tbl->parms) < 0)
1657 goto rtattr_failure;
1658
1659 read_unlock_bh(&tbl->lock);
1660 return NLMSG_END(skb, nlh);
1661
1662rtattr_failure:
1663 read_unlock_bh(&tbl->lock);
1664 return NLMSG_CANCEL(skb, nlh);
1665
1666nlmsg_failure:
1667 return -1;
1668}
1669
1670static int neightbl_fill_param_info(struct neigh_table *tbl,
1671 struct neigh_parms *parms,
1672 struct sk_buff *skb,
1673 struct netlink_callback *cb)
1674{
1675 struct ndtmsg *ndtmsg;
1676 struct nlmsghdr *nlh;
1677
1678 nlh = NLMSG_NEW_ANSWER(skb, cb, RTM_NEWNEIGHTBL, sizeof(struct ndtmsg),
1679 NLM_F_MULTI);
1680
1681 ndtmsg = NLMSG_DATA(nlh);
1682
1683 read_lock_bh(&tbl->lock);
1684 ndtmsg->ndtm_family = tbl->family;
1685 RTA_PUT_STRING(skb, NDTA_NAME, tbl->id);
1686
1687 if (neightbl_fill_parms(skb, parms) < 0)
1688 goto rtattr_failure;
1689
1690 read_unlock_bh(&tbl->lock);
1691 return NLMSG_END(skb, nlh);
1692
1693rtattr_failure:
1694 read_unlock_bh(&tbl->lock);
1695 return NLMSG_CANCEL(skb, nlh);
1696
1697nlmsg_failure:
1698 return -1;
1699}
1700
1701static inline struct neigh_parms *lookup_neigh_params(struct neigh_table *tbl,
1702 int ifindex)
1703{
1704 struct neigh_parms *p;
1705
1706 for (p = &tbl->parms; p; p = p->next)
1707 if ((p->dev && p->dev->ifindex == ifindex) ||
1708 (!p->dev && !ifindex))
1709 return p;
1710
1711 return NULL;
1712}
1713
1714int neightbl_set(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
1715{
1716 struct neigh_table *tbl;
1717 struct ndtmsg *ndtmsg = NLMSG_DATA(nlh);
1718 struct rtattr **tb = arg;
1719 int err = -EINVAL;
1720
1721 if (!tb[NDTA_NAME - 1] || !RTA_PAYLOAD(tb[NDTA_NAME - 1]))
1722 return -EINVAL;
1723
1724 read_lock(&neigh_tbl_lock);
1725 for (tbl = neigh_tables; tbl; tbl = tbl->next) {
1726 if (ndtmsg->ndtm_family && tbl->family != ndtmsg->ndtm_family)
1727 continue;
1728
1729 if (!rtattr_strcmp(tb[NDTA_NAME - 1], tbl->id))
1730 break;
1731 }
1732
1733 if (tbl == NULL) {
1734 err = -ENOENT;
1735 goto errout;
1736 }
1737
1738 /*
1739 * We acquire tbl->lock to be nice to the periodic timers and
1740 * make sure they always see a consistent set of values.
1741 */
1742 write_lock_bh(&tbl->lock);
1743
1744 if (tb[NDTA_THRESH1 - 1])
1745 tbl->gc_thresh1 = RTA_GET_U32(tb[NDTA_THRESH1 - 1]);
1746
1747 if (tb[NDTA_THRESH2 - 1])
1748 tbl->gc_thresh2 = RTA_GET_U32(tb[NDTA_THRESH2 - 1]);
1749
1750 if (tb[NDTA_THRESH3 - 1])
1751 tbl->gc_thresh3 = RTA_GET_U32(tb[NDTA_THRESH3 - 1]);
1752
1753 if (tb[NDTA_GC_INTERVAL - 1])
1754 tbl->gc_interval = RTA_GET_MSECS(tb[NDTA_GC_INTERVAL - 1]);
1755
1756 if (tb[NDTA_PARMS - 1]) {
1757 struct rtattr *tbp[NDTPA_MAX];
1758 struct neigh_parms *p;
1759 u32 ifindex = 0;
1760
1761 if (rtattr_parse_nested(tbp, NDTPA_MAX, tb[NDTA_PARMS - 1]) < 0)
1762 goto rtattr_failure;
1763
1764 if (tbp[NDTPA_IFINDEX - 1])
1765 ifindex = RTA_GET_U32(tbp[NDTPA_IFINDEX - 1]);
1766
1767 p = lookup_neigh_params(tbl, ifindex);
1768 if (p == NULL) {
1769 err = -ENOENT;
1770 goto rtattr_failure;
1771 }
1772
1773 if (tbp[NDTPA_QUEUE_LEN - 1])
1774 p->queue_len = RTA_GET_U32(tbp[NDTPA_QUEUE_LEN - 1]);
1775
1776 if (tbp[NDTPA_PROXY_QLEN - 1])
1777 p->proxy_qlen = RTA_GET_U32(tbp[NDTPA_PROXY_QLEN - 1]);
1778
1779 if (tbp[NDTPA_APP_PROBES - 1])
1780 p->app_probes = RTA_GET_U32(tbp[NDTPA_APP_PROBES - 1]);
1781
1782 if (tbp[NDTPA_UCAST_PROBES - 1])
1783 p->ucast_probes =
1784 RTA_GET_U32(tbp[NDTPA_UCAST_PROBES - 1]);
1785
1786 if (tbp[NDTPA_MCAST_PROBES - 1])
1787 p->mcast_probes =
1788 RTA_GET_U32(tbp[NDTPA_MCAST_PROBES - 1]);
1789
1790 if (tbp[NDTPA_BASE_REACHABLE_TIME - 1])
1791 p->base_reachable_time =
1792 RTA_GET_MSECS(tbp[NDTPA_BASE_REACHABLE_TIME - 1]);
1793
1794 if (tbp[NDTPA_GC_STALETIME - 1])
1795 p->gc_staletime =
1796 RTA_GET_MSECS(tbp[NDTPA_GC_STALETIME - 1]);
1797
1798 if (tbp[NDTPA_DELAY_PROBE_TIME - 1])
1799 p->delay_probe_time =
1800 RTA_GET_MSECS(tbp[NDTPA_DELAY_PROBE_TIME - 1]);
1801
1802 if (tbp[NDTPA_RETRANS_TIME - 1])
1803 p->retrans_time =
1804 RTA_GET_MSECS(tbp[NDTPA_RETRANS_TIME - 1]);
1805
1806 if (tbp[NDTPA_ANYCAST_DELAY - 1])
1807 p->anycast_delay =
1808 RTA_GET_MSECS(tbp[NDTPA_ANYCAST_DELAY - 1]);
1809
1810 if (tbp[NDTPA_PROXY_DELAY - 1])
1811 p->proxy_delay =
1812 RTA_GET_MSECS(tbp[NDTPA_PROXY_DELAY - 1]);
1813
1814 if (tbp[NDTPA_LOCKTIME - 1])
1815 p->locktime = RTA_GET_MSECS(tbp[NDTPA_LOCKTIME - 1]);
1816 }
1817
1818 err = 0;
1819
1820rtattr_failure:
1821 write_unlock_bh(&tbl->lock);
1822errout:
1823 read_unlock(&neigh_tbl_lock);
1824 return err;
1825}
1826
1827int neightbl_dump_info(struct sk_buff *skb, struct netlink_callback *cb)
1828{
1829 int idx, family;
1830 int s_idx = cb->args[0];
1831 struct neigh_table *tbl;
1832
1833 family = ((struct rtgenmsg *)NLMSG_DATA(cb->nlh))->rtgen_family;
1834
1835 read_lock(&neigh_tbl_lock);
1836 for (tbl = neigh_tables, idx = 0; tbl; tbl = tbl->next) {
1837 struct neigh_parms *p;
1838
1839 if (idx < s_idx || (family && tbl->family != family))
1840 continue;
1841
1842 if (neightbl_fill_info(tbl, skb, cb) <= 0)
1843 break;
1844
1845 for (++idx, p = tbl->parms.next; p; p = p->next, idx++) {
1846 if (idx < s_idx)
1847 continue;
1848
1849 if (neightbl_fill_param_info(tbl, p, skb, cb) <= 0)
1850 goto out;
1851 }
1852
1853 }
1854out:
1855 read_unlock(&neigh_tbl_lock);
1856 cb->args[0] = idx;
1857
1858 return skb->len;
1859}
1549 1860
1550static int neigh_fill_info(struct sk_buff *skb, struct neighbour *n, 1861static int neigh_fill_info(struct sk_buff *skb, struct neighbour *n,
1551 u32 pid, u32 seq, int event) 1862 u32 pid, u32 seq, int event, unsigned int flags)
1552{ 1863{
1553 unsigned long now = jiffies; 1864 unsigned long now = jiffies;
1554 unsigned char *b = skb->tail; 1865 unsigned char *b = skb->tail;
1555 struct nda_cacheinfo ci; 1866 struct nda_cacheinfo ci;
1556 int locked = 0; 1867 int locked = 0;
1557 u32 probes; 1868 u32 probes;
1558 struct nlmsghdr *nlh = NLMSG_PUT(skb, pid, seq, event, 1869 struct nlmsghdr *nlh = NLMSG_NEW(skb, pid, seq, event,
1559 sizeof(struct ndmsg)); 1870 sizeof(struct ndmsg), flags);
1560 struct ndmsg *ndm = NLMSG_DATA(nlh); 1871 struct ndmsg *ndm = NLMSG_DATA(nlh);
1561 1872
1562 nlh->nlmsg_flags = pid ? NLM_F_MULTI : 0;
1563 ndm->ndm_family = n->ops->family; 1873 ndm->ndm_family = n->ops->family;
1564 ndm->ndm_flags = n->flags; 1874 ndm->ndm_flags = n->flags;
1565 ndm->ndm_type = n->type; 1875 ndm->ndm_type = n->type;
@@ -1609,7 +1919,8 @@ static int neigh_dump_table(struct neigh_table *tbl, struct sk_buff *skb,
1609 continue; 1919 continue;
1610 if (neigh_fill_info(skb, n, NETLINK_CB(cb->skb).pid, 1920 if (neigh_fill_info(skb, n, NETLINK_CB(cb->skb).pid,
1611 cb->nlh->nlmsg_seq, 1921 cb->nlh->nlmsg_seq,
1612 RTM_NEWNEIGH) <= 0) { 1922 RTM_NEWNEIGH,
1923 NLM_F_MULTI) <= 0) {
1613 read_unlock_bh(&tbl->lock); 1924 read_unlock_bh(&tbl->lock);
1614 rc = -1; 1925 rc = -1;
1615 goto out; 1926 goto out;
@@ -2018,7 +2329,7 @@ void neigh_app_ns(struct neighbour *n)
2018 if (!skb) 2329 if (!skb)
2019 return; 2330 return;
2020 2331
2021 if (neigh_fill_info(skb, n, 0, 0, RTM_GETNEIGH) < 0) { 2332 if (neigh_fill_info(skb, n, 0, 0, RTM_GETNEIGH, 0) < 0) {
2022 kfree_skb(skb); 2333 kfree_skb(skb);
2023 return; 2334 return;
2024 } 2335 }
@@ -2037,7 +2348,7 @@ static void neigh_app_notify(struct neighbour *n)
2037 if (!skb) 2348 if (!skb)
2038 return; 2349 return;
2039 2350
2040 if (neigh_fill_info(skb, n, 0, 0, RTM_NEWNEIGH) < 0) { 2351 if (neigh_fill_info(skb, n, 0, 0, RTM_NEWNEIGH, 0) < 0) {
2041 kfree_skb(skb); 2352 kfree_skb(skb);
2042 return; 2353 return;
2043 } 2354 }
@@ -2352,6 +2663,8 @@ EXPORT_SYMBOL(neigh_update);
2352EXPORT_SYMBOL(neigh_update_hhs); 2663EXPORT_SYMBOL(neigh_update_hhs);
2353EXPORT_SYMBOL(pneigh_enqueue); 2664EXPORT_SYMBOL(pneigh_enqueue);
2354EXPORT_SYMBOL(pneigh_lookup); 2665EXPORT_SYMBOL(pneigh_lookup);
2666EXPORT_SYMBOL(neightbl_dump_info);
2667EXPORT_SYMBOL(neightbl_set);
2355 2668
2356#ifdef CONFIG_ARPD 2669#ifdef CONFIG_ARPD
2357EXPORT_SYMBOL(neigh_app_ns); 2670EXPORT_SYMBOL(neigh_app_ns);
diff --git a/net/core/request_sock.c b/net/core/request_sock.c
new file mode 100644
index 000000000000..bb55675f0685
--- /dev/null
+++ b/net/core/request_sock.c
@@ -0,0 +1,64 @@
1/*
2 * NET Generic infrastructure for Network protocols.
3 *
4 * Authors: Arnaldo Carvalho de Melo <acme@conectiva.com.br>
5 *
6 * From code originally in include/net/tcp.h
7 *
8 * This program is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU General Public License
10 * as published by the Free Software Foundation; either version
11 * 2 of the License, or (at your option) any later version.
12 */
13
14#include <linux/module.h>
15#include <linux/random.h>
16#include <linux/slab.h>
17#include <linux/string.h>
18
19#include <net/request_sock.h>
20
21/*
22 * Maximum number of SYN_RECV sockets in queue per LISTEN socket.
23 * One SYN_RECV socket costs about 80bytes on a 32bit machine.
24 * It would be better to replace it with a global counter for all sockets
25 * but then some measure against one socket starving all other sockets
26 * would be needed.
27 *
28 * It was 128 by default. Experiments with real servers show, that
29 * it is absolutely not enough even at 100conn/sec. 256 cures most
30 * of problems. This value is adjusted to 128 for very small machines
31 * (<=32Mb of memory) and to 1024 on normal or better ones (>=256Mb).
32 * Further increasing requires to change hash table size.
33 */
34int sysctl_max_syn_backlog = 256;
35EXPORT_SYMBOL(sysctl_max_syn_backlog);
36
37int reqsk_queue_alloc(struct request_sock_queue *queue,
38 const int nr_table_entries)
39{
40 const int lopt_size = sizeof(struct listen_sock) +
41 nr_table_entries * sizeof(struct request_sock *);
42 struct listen_sock *lopt = kmalloc(lopt_size, GFP_KERNEL);
43
44 if (lopt == NULL)
45 return -ENOMEM;
46
47 memset(lopt, 0, lopt_size);
48
49 for (lopt->max_qlen_log = 6;
50 (1 << lopt->max_qlen_log) < sysctl_max_syn_backlog;
51 lopt->max_qlen_log++);
52
53 get_random_bytes(&lopt->hash_rnd, sizeof(lopt->hash_rnd));
54 rwlock_init(&queue->syn_wait_lock);
55 queue->rskq_accept_head = queue->rskq_accept_head = NULL;
56
57 write_lock_bh(&queue->syn_wait_lock);
58 queue->listen_opt = lopt;
59 write_unlock_bh(&queue->syn_wait_lock);
60
61 return 0;
62}
63
64EXPORT_SYMBOL(reqsk_queue_alloc);
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index 00caf4b318b2..e013d836a7ab 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -100,6 +100,7 @@ static const int rtm_min[RTM_NR_FAMILIES] =
100 [RTM_FAM(RTM_NEWPREFIX)] = NLMSG_LENGTH(sizeof(struct rtgenmsg)), 100 [RTM_FAM(RTM_NEWPREFIX)] = NLMSG_LENGTH(sizeof(struct rtgenmsg)),
101 [RTM_FAM(RTM_GETMULTICAST)] = NLMSG_LENGTH(sizeof(struct rtgenmsg)), 101 [RTM_FAM(RTM_GETMULTICAST)] = NLMSG_LENGTH(sizeof(struct rtgenmsg)),
102 [RTM_FAM(RTM_GETANYCAST)] = NLMSG_LENGTH(sizeof(struct rtgenmsg)), 102 [RTM_FAM(RTM_GETANYCAST)] = NLMSG_LENGTH(sizeof(struct rtgenmsg)),
103 [RTM_FAM(RTM_NEWNEIGHTBL)] = NLMSG_LENGTH(sizeof(struct ndtmsg)),
103}; 104};
104 105
105static const int rta_max[RTM_NR_FAMILIES] = 106static const int rta_max[RTM_NR_FAMILIES] =
@@ -113,6 +114,7 @@ static const int rta_max[RTM_NR_FAMILIES] =
113 [RTM_FAM(RTM_NEWTCLASS)] = TCA_MAX, 114 [RTM_FAM(RTM_NEWTCLASS)] = TCA_MAX,
114 [RTM_FAM(RTM_NEWTFILTER)] = TCA_MAX, 115 [RTM_FAM(RTM_NEWTFILTER)] = TCA_MAX,
115 [RTM_FAM(RTM_NEWACTION)] = TCAA_MAX, 116 [RTM_FAM(RTM_NEWACTION)] = TCAA_MAX,
117 [RTM_FAM(RTM_NEWNEIGHTBL)] = NDTA_MAX,
116}; 118};
117 119
118void __rta_fill(struct sk_buff *skb, int attrtype, int attrlen, const void *data) 120void __rta_fill(struct sk_buff *skb, int attrtype, int attrlen, const void *data)
@@ -176,14 +178,14 @@ rtattr_failure:
176 178
177 179
178static int rtnetlink_fill_ifinfo(struct sk_buff *skb, struct net_device *dev, 180static int rtnetlink_fill_ifinfo(struct sk_buff *skb, struct net_device *dev,
179 int type, u32 pid, u32 seq, u32 change) 181 int type, u32 pid, u32 seq, u32 change,
182 unsigned int flags)
180{ 183{
181 struct ifinfomsg *r; 184 struct ifinfomsg *r;
182 struct nlmsghdr *nlh; 185 struct nlmsghdr *nlh;
183 unsigned char *b = skb->tail; 186 unsigned char *b = skb->tail;
184 187
185 nlh = NLMSG_PUT(skb, pid, seq, type, sizeof(*r)); 188 nlh = NLMSG_NEW(skb, pid, seq, type, sizeof(*r), flags);
186 if (pid) nlh->nlmsg_flags |= NLM_F_MULTI;
187 r = NLMSG_DATA(nlh); 189 r = NLMSG_DATA(nlh);
188 r->ifi_family = AF_UNSPEC; 190 r->ifi_family = AF_UNSPEC;
189 r->ifi_type = dev->type; 191 r->ifi_type = dev->type;
@@ -273,7 +275,10 @@ static int rtnetlink_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *c
273 for (dev=dev_base, idx=0; dev; dev = dev->next, idx++) { 275 for (dev=dev_base, idx=0; dev; dev = dev->next, idx++) {
274 if (idx < s_idx) 276 if (idx < s_idx)
275 continue; 277 continue;
276 if (rtnetlink_fill_ifinfo(skb, dev, RTM_NEWLINK, NETLINK_CB(cb->skb).pid, cb->nlh->nlmsg_seq, 0) <= 0) 278 if (rtnetlink_fill_ifinfo(skb, dev, RTM_NEWLINK,
279 NETLINK_CB(cb->skb).pid,
280 cb->nlh->nlmsg_seq, 0,
281 NLM_F_MULTI) <= 0)
277 break; 282 break;
278 } 283 }
279 read_unlock(&dev_base_lock); 284 read_unlock(&dev_base_lock);
@@ -447,7 +452,7 @@ void rtmsg_ifinfo(int type, struct net_device *dev, unsigned change)
447 if (!skb) 452 if (!skb)
448 return; 453 return;
449 454
450 if (rtnetlink_fill_ifinfo(skb, dev, type, 0, 0, change) < 0) { 455 if (rtnetlink_fill_ifinfo(skb, dev, type, current->pid, 0, change, 0) < 0) {
451 kfree_skb(skb); 456 kfree_skb(skb);
452 return; 457 return;
453 } 458 }
@@ -649,14 +654,16 @@ static void rtnetlink_rcv(struct sock *sk, int len)
649 654
650static struct rtnetlink_link link_rtnetlink_table[RTM_NR_MSGTYPES] = 655static struct rtnetlink_link link_rtnetlink_table[RTM_NR_MSGTYPES] =
651{ 656{
652 [RTM_GETLINK - RTM_BASE] = { .dumpit = rtnetlink_dump_ifinfo }, 657 [RTM_GETLINK - RTM_BASE] = { .dumpit = rtnetlink_dump_ifinfo },
653 [RTM_SETLINK - RTM_BASE] = { .doit = do_setlink }, 658 [RTM_SETLINK - RTM_BASE] = { .doit = do_setlink },
654 [RTM_GETADDR - RTM_BASE] = { .dumpit = rtnetlink_dump_all }, 659 [RTM_GETADDR - RTM_BASE] = { .dumpit = rtnetlink_dump_all },
655 [RTM_GETROUTE - RTM_BASE] = { .dumpit = rtnetlink_dump_all }, 660 [RTM_GETROUTE - RTM_BASE] = { .dumpit = rtnetlink_dump_all },
656 [RTM_NEWNEIGH - RTM_BASE] = { .doit = neigh_add }, 661 [RTM_NEWNEIGH - RTM_BASE] = { .doit = neigh_add },
657 [RTM_DELNEIGH - RTM_BASE] = { .doit = neigh_delete }, 662 [RTM_DELNEIGH - RTM_BASE] = { .doit = neigh_delete },
658 [RTM_GETNEIGH - RTM_BASE] = { .dumpit = neigh_dump_info }, 663 [RTM_GETNEIGH - RTM_BASE] = { .dumpit = neigh_dump_info },
659 [RTM_GETRULE - RTM_BASE] = { .dumpit = rtnetlink_dump_all }, 664 [RTM_GETRULE - RTM_BASE] = { .dumpit = rtnetlink_dump_all },
665 [RTM_GETNEIGHTBL - RTM_BASE] = { .dumpit = neightbl_dump_info },
666 [RTM_SETNEIGHTBL - RTM_BASE] = { .doit = neightbl_set },
660}; 667};
661 668
662static int rtnetlink_event(struct notifier_block *this, unsigned long event, void *ptr) 669static int rtnetlink_event(struct notifier_block *this, unsigned long event, void *ptr)
diff --git a/net/core/sock.c b/net/core/sock.c
index 96e00b08698f..a6ec3ada7f9e 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -118,6 +118,7 @@
118#include <linux/netdevice.h> 118#include <linux/netdevice.h>
119#include <net/protocol.h> 119#include <net/protocol.h>
120#include <linux/skbuff.h> 120#include <linux/skbuff.h>
121#include <net/request_sock.h>
121#include <net/sock.h> 122#include <net/sock.h>
122#include <net/xfrm.h> 123#include <net/xfrm.h>
123#include <linux/ipsec.h> 124#include <linux/ipsec.h>
@@ -1363,6 +1364,7 @@ static LIST_HEAD(proto_list);
1363 1364
1364int proto_register(struct proto *prot, int alloc_slab) 1365int proto_register(struct proto *prot, int alloc_slab)
1365{ 1366{
1367 char *request_sock_slab_name;
1366 int rc = -ENOBUFS; 1368 int rc = -ENOBUFS;
1367 1369
1368 if (alloc_slab) { 1370 if (alloc_slab) {
@@ -1374,6 +1376,25 @@ int proto_register(struct proto *prot, int alloc_slab)
1374 prot->name); 1376 prot->name);
1375 goto out; 1377 goto out;
1376 } 1378 }
1379
1380 if (prot->rsk_prot != NULL) {
1381 static const char mask[] = "request_sock_%s";
1382
1383 request_sock_slab_name = kmalloc(strlen(prot->name) + sizeof(mask) - 1, GFP_KERNEL);
1384 if (request_sock_slab_name == NULL)
1385 goto out_free_sock_slab;
1386
1387 sprintf(request_sock_slab_name, mask, prot->name);
1388 prot->rsk_prot->slab = kmem_cache_create(request_sock_slab_name,
1389 prot->rsk_prot->obj_size, 0,
1390 SLAB_HWCACHE_ALIGN, NULL, NULL);
1391
1392 if (prot->rsk_prot->slab == NULL) {
1393 printk(KERN_CRIT "%s: Can't create request sock SLAB cache!\n",
1394 prot->name);
1395 goto out_free_request_sock_slab_name;
1396 }
1397 }
1377 } 1398 }
1378 1399
1379 write_lock(&proto_list_lock); 1400 write_lock(&proto_list_lock);
@@ -1382,6 +1403,12 @@ int proto_register(struct proto *prot, int alloc_slab)
1382 rc = 0; 1403 rc = 0;
1383out: 1404out:
1384 return rc; 1405 return rc;
1406out_free_request_sock_slab_name:
1407 kfree(request_sock_slab_name);
1408out_free_sock_slab:
1409 kmem_cache_destroy(prot->slab);
1410 prot->slab = NULL;
1411 goto out;
1385} 1412}
1386 1413
1387EXPORT_SYMBOL(proto_register); 1414EXPORT_SYMBOL(proto_register);
@@ -1395,6 +1422,14 @@ void proto_unregister(struct proto *prot)
1395 prot->slab = NULL; 1422 prot->slab = NULL;
1396 } 1423 }
1397 1424
1425 if (prot->rsk_prot != NULL && prot->rsk_prot->slab != NULL) {
1426 const char *name = kmem_cache_name(prot->rsk_prot->slab);
1427
1428 kmem_cache_destroy(prot->rsk_prot->slab);
1429 kfree(name);
1430 prot->rsk_prot->slab = NULL;
1431 }
1432
1398 list_del(&prot->node); 1433 list_del(&prot->node);
1399 write_unlock(&proto_list_lock); 1434 write_unlock(&proto_list_lock);
1400} 1435}
diff --git a/net/decnet/dn_dev.c b/net/decnet/dn_dev.c
index ee7bf46eb78a..00233ecbc9cb 100644
--- a/net/decnet/dn_dev.c
+++ b/net/decnet/dn_dev.c
@@ -716,13 +716,13 @@ static int dn_dev_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh, void *a
716} 716}
717 717
718static int dn_dev_fill_ifaddr(struct sk_buff *skb, struct dn_ifaddr *ifa, 718static int dn_dev_fill_ifaddr(struct sk_buff *skb, struct dn_ifaddr *ifa,
719 u32 pid, u32 seq, int event) 719 u32 pid, u32 seq, int event, unsigned int flags)
720{ 720{
721 struct ifaddrmsg *ifm; 721 struct ifaddrmsg *ifm;
722 struct nlmsghdr *nlh; 722 struct nlmsghdr *nlh;
723 unsigned char *b = skb->tail; 723 unsigned char *b = skb->tail;
724 724
725 nlh = NLMSG_PUT(skb, pid, seq, event, sizeof(*ifm)); 725 nlh = NLMSG_NEW(skb, pid, seq, event, sizeof(*ifm), flags);
726 ifm = NLMSG_DATA(nlh); 726 ifm = NLMSG_DATA(nlh);
727 727
728 ifm->ifa_family = AF_DECnet; 728 ifm->ifa_family = AF_DECnet;
@@ -755,7 +755,7 @@ static void rtmsg_ifa(int event, struct dn_ifaddr *ifa)
755 netlink_set_err(rtnl, 0, RTMGRP_DECnet_IFADDR, ENOBUFS); 755 netlink_set_err(rtnl, 0, RTMGRP_DECnet_IFADDR, ENOBUFS);
756 return; 756 return;
757 } 757 }
758 if (dn_dev_fill_ifaddr(skb, ifa, 0, 0, event) < 0) { 758 if (dn_dev_fill_ifaddr(skb, ifa, 0, 0, event, 0) < 0) {
759 kfree_skb(skb); 759 kfree_skb(skb);
760 netlink_set_err(rtnl, 0, RTMGRP_DECnet_IFADDR, EINVAL); 760 netlink_set_err(rtnl, 0, RTMGRP_DECnet_IFADDR, EINVAL);
761 return; 761 return;
@@ -790,7 +790,8 @@ static int dn_dev_dump_ifaddr(struct sk_buff *skb, struct netlink_callback *cb)
790 if (dn_dev_fill_ifaddr(skb, ifa, 790 if (dn_dev_fill_ifaddr(skb, ifa,
791 NETLINK_CB(cb->skb).pid, 791 NETLINK_CB(cb->skb).pid,
792 cb->nlh->nlmsg_seq, 792 cb->nlh->nlmsg_seq,
793 RTM_NEWADDR) <= 0) 793 RTM_NEWADDR,
794 NLM_F_MULTI) <= 0)
794 goto done; 795 goto done;
795 } 796 }
796 } 797 }
diff --git a/net/decnet/dn_neigh.c b/net/decnet/dn_neigh.c
index f6dfe96f45b7..f32dba9e26fe 100644
--- a/net/decnet/dn_neigh.c
+++ b/net/decnet/dn_neigh.c
@@ -101,7 +101,6 @@ struct neigh_table dn_neigh_table = {
101 .id = "dn_neigh_cache", 101 .id = "dn_neigh_cache",
102 .parms ={ 102 .parms ={
103 .tbl = &dn_neigh_table, 103 .tbl = &dn_neigh_table,
104 .entries = 0,
105 .base_reachable_time = 30 * HZ, 104 .base_reachable_time = 30 * HZ,
106 .retrans_time = 1 * HZ, 105 .retrans_time = 1 * HZ,
107 .gc_staletime = 60 * HZ, 106 .gc_staletime = 60 * HZ,
diff --git a/net/decnet/dn_route.c b/net/decnet/dn_route.c
index 1e7b5c3ea215..2399fa8a3f86 100644
--- a/net/decnet/dn_route.c
+++ b/net/decnet/dn_route.c
@@ -1465,7 +1465,8 @@ int dn_route_input(struct sk_buff *skb)
1465 return dn_route_input_slow(skb); 1465 return dn_route_input_slow(skb);
1466} 1466}
1467 1467
1468static int dn_rt_fill_info(struct sk_buff *skb, u32 pid, u32 seq, int event, int nowait) 1468static int dn_rt_fill_info(struct sk_buff *skb, u32 pid, u32 seq,
1469 int event, int nowait, unsigned int flags)
1469{ 1470{
1470 struct dn_route *rt = (struct dn_route *)skb->dst; 1471 struct dn_route *rt = (struct dn_route *)skb->dst;
1471 struct rtmsg *r; 1472 struct rtmsg *r;
@@ -1473,9 +1474,8 @@ static int dn_rt_fill_info(struct sk_buff *skb, u32 pid, u32 seq, int event, int
1473 unsigned char *b = skb->tail; 1474 unsigned char *b = skb->tail;
1474 struct rta_cacheinfo ci; 1475 struct rta_cacheinfo ci;
1475 1476
1476 nlh = NLMSG_PUT(skb, pid, seq, event, sizeof(*r)); 1477 nlh = NLMSG_NEW(skb, pid, seq, event, sizeof(*r), flags);
1477 r = NLMSG_DATA(nlh); 1478 r = NLMSG_DATA(nlh);
1478 nlh->nlmsg_flags = (nowait && pid) ? NLM_F_MULTI : 0;
1479 r->rtm_family = AF_DECnet; 1479 r->rtm_family = AF_DECnet;
1480 r->rtm_dst_len = 16; 1480 r->rtm_dst_len = 16;
1481 r->rtm_src_len = 0; 1481 r->rtm_src_len = 0;
@@ -1596,7 +1596,7 @@ int dn_cache_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh, void *arg)
1596 1596
1597 NETLINK_CB(skb).dst_pid = NETLINK_CB(in_skb).pid; 1597 NETLINK_CB(skb).dst_pid = NETLINK_CB(in_skb).pid;
1598 1598
1599 err = dn_rt_fill_info(skb, NETLINK_CB(in_skb).pid, nlh->nlmsg_seq, RTM_NEWROUTE, 0); 1599 err = dn_rt_fill_info(skb, NETLINK_CB(in_skb).pid, nlh->nlmsg_seq, RTM_NEWROUTE, 0, 0);
1600 1600
1601 if (err == 0) 1601 if (err == 0)
1602 goto out_free; 1602 goto out_free;
@@ -1644,7 +1644,8 @@ int dn_cache_dump(struct sk_buff *skb, struct netlink_callback *cb)
1644 continue; 1644 continue;
1645 skb->dst = dst_clone(&rt->u.dst); 1645 skb->dst = dst_clone(&rt->u.dst);
1646 if (dn_rt_fill_info(skb, NETLINK_CB(cb->skb).pid, 1646 if (dn_rt_fill_info(skb, NETLINK_CB(cb->skb).pid,
1647 cb->nlh->nlmsg_seq, RTM_NEWROUTE, 1) <= 0) { 1647 cb->nlh->nlmsg_seq, RTM_NEWROUTE,
1648 1, NLM_F_MULTI) <= 0) {
1648 dst_release(xchg(&skb->dst, NULL)); 1649 dst_release(xchg(&skb->dst, NULL));
1649 rcu_read_unlock_bh(); 1650 rcu_read_unlock_bh();
1650 goto done; 1651 goto done;
diff --git a/net/decnet/dn_rules.c b/net/decnet/dn_rules.c
index 597587d170d8..1060de70bc0c 100644
--- a/net/decnet/dn_rules.c
+++ b/net/decnet/dn_rules.c
@@ -342,14 +342,15 @@ static struct notifier_block dn_fib_rules_notifier = {
342 .notifier_call = dn_fib_rules_event, 342 .notifier_call = dn_fib_rules_event,
343}; 343};
344 344
345static int dn_fib_fill_rule(struct sk_buff *skb, struct dn_fib_rule *r, struct netlink_callback *cb) 345static int dn_fib_fill_rule(struct sk_buff *skb, struct dn_fib_rule *r,
346 struct netlink_callback *cb, unsigned int flags)
346{ 347{
347 struct rtmsg *rtm; 348 struct rtmsg *rtm;
348 struct nlmsghdr *nlh; 349 struct nlmsghdr *nlh;
349 unsigned char *b = skb->tail; 350 unsigned char *b = skb->tail;
350 351
351 352
352 nlh = NLMSG_PUT(skb, NETLINK_CREDS(cb->skb)->pid, cb->nlh->nlmsg_seq, RTM_NEWRULE, sizeof(*rtm)); 353 nlh = NLMSG_NEW_ANSWER(skb, cb, RTM_NEWRULE, sizeof(*rtm), flags);
353 rtm = NLMSG_DATA(nlh); 354 rtm = NLMSG_DATA(nlh);
354 rtm->rtm_family = AF_DECnet; 355 rtm->rtm_family = AF_DECnet;
355 rtm->rtm_dst_len = r->r_dst_len; 356 rtm->rtm_dst_len = r->r_dst_len;
@@ -394,7 +395,7 @@ int dn_fib_dump_rules(struct sk_buff *skb, struct netlink_callback *cb)
394 for(r = dn_fib_rules, idx = 0; r; r = r->r_next, idx++) { 395 for(r = dn_fib_rules, idx = 0; r; r = r->r_next, idx++) {
395 if (idx < s_idx) 396 if (idx < s_idx)
396 continue; 397 continue;
397 if (dn_fib_fill_rule(skb, r, cb) < 0) 398 if (dn_fib_fill_rule(skb, r, cb, NLM_F_MULTI) < 0)
398 break; 399 break;
399 } 400 }
400 read_unlock(&dn_fib_rules_lock); 401 read_unlock(&dn_fib_rules_lock);
diff --git a/net/decnet/dn_table.c b/net/decnet/dn_table.c
index dad5603912be..28ba5777a25a 100644
--- a/net/decnet/dn_table.c
+++ b/net/decnet/dn_table.c
@@ -270,13 +270,13 @@ static int dn_fib_nh_match(struct rtmsg *r, struct nlmsghdr *nlh, struct dn_kern
270 270
271static int dn_fib_dump_info(struct sk_buff *skb, u32 pid, u32 seq, int event, 271static int dn_fib_dump_info(struct sk_buff *skb, u32 pid, u32 seq, int event,
272 u8 tb_id, u8 type, u8 scope, void *dst, int dst_len, 272 u8 tb_id, u8 type, u8 scope, void *dst, int dst_len,
273 struct dn_fib_info *fi) 273 struct dn_fib_info *fi, unsigned int flags)
274{ 274{
275 struct rtmsg *rtm; 275 struct rtmsg *rtm;
276 struct nlmsghdr *nlh; 276 struct nlmsghdr *nlh;
277 unsigned char *b = skb->tail; 277 unsigned char *b = skb->tail;
278 278
279 nlh = NLMSG_PUT(skb, pid, seq, event, sizeof(*rtm)); 279 nlh = NLMSG_NEW(skb, pid, seq, event, sizeof(*rtm), flags);
280 rtm = NLMSG_DATA(nlh); 280 rtm = NLMSG_DATA(nlh);
281 rtm->rtm_family = AF_DECnet; 281 rtm->rtm_family = AF_DECnet;
282 rtm->rtm_dst_len = dst_len; 282 rtm->rtm_dst_len = dst_len;
@@ -345,7 +345,7 @@ static void dn_rtmsg_fib(int event, struct dn_fib_node *f, int z, int tb_id,
345 345
346 if (dn_fib_dump_info(skb, pid, nlh->nlmsg_seq, event, tb_id, 346 if (dn_fib_dump_info(skb, pid, nlh->nlmsg_seq, event, tb_id,
347 f->fn_type, f->fn_scope, &f->fn_key, z, 347 f->fn_type, f->fn_scope, &f->fn_key, z,
348 DN_FIB_INFO(f)) < 0) { 348 DN_FIB_INFO(f), 0) < 0) {
349 kfree_skb(skb); 349 kfree_skb(skb);
350 return; 350 return;
351 } 351 }
@@ -377,7 +377,7 @@ static __inline__ int dn_hash_dump_bucket(struct sk_buff *skb,
377 tb->n, 377 tb->n,
378 (f->fn_state & DN_S_ZOMBIE) ? 0 : f->fn_type, 378 (f->fn_state & DN_S_ZOMBIE) ? 0 : f->fn_type,
379 f->fn_scope, &f->fn_key, dz->dz_order, 379 f->fn_scope, &f->fn_key, dz->dz_order,
380 f->fn_info) < 0) { 380 f->fn_info, NLM_F_MULTI) < 0) {
381 cb->args[3] = i; 381 cb->args[3] = i;
382 return -1; 382 return -1;
383 } 383 }
diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c
index 478a30179a52..650dcb12d9a1 100644
--- a/net/ipv4/devinet.c
+++ b/net/ipv4/devinet.c
@@ -1030,14 +1030,13 @@ static struct notifier_block ip_netdev_notifier = {
1030}; 1030};
1031 1031
1032static int inet_fill_ifaddr(struct sk_buff *skb, struct in_ifaddr *ifa, 1032static int inet_fill_ifaddr(struct sk_buff *skb, struct in_ifaddr *ifa,
1033 u32 pid, u32 seq, int event) 1033 u32 pid, u32 seq, int event, unsigned int flags)
1034{ 1034{
1035 struct ifaddrmsg *ifm; 1035 struct ifaddrmsg *ifm;
1036 struct nlmsghdr *nlh; 1036 struct nlmsghdr *nlh;
1037 unsigned char *b = skb->tail; 1037 unsigned char *b = skb->tail;
1038 1038
1039 nlh = NLMSG_PUT(skb, pid, seq, event, sizeof(*ifm)); 1039 nlh = NLMSG_NEW(skb, pid, seq, event, sizeof(*ifm), flags);
1040 if (pid) nlh->nlmsg_flags |= NLM_F_MULTI;
1041 ifm = NLMSG_DATA(nlh); 1040 ifm = NLMSG_DATA(nlh);
1042 ifm->ifa_family = AF_INET; 1041 ifm->ifa_family = AF_INET;
1043 ifm->ifa_prefixlen = ifa->ifa_prefixlen; 1042 ifm->ifa_prefixlen = ifa->ifa_prefixlen;
@@ -1090,7 +1089,7 @@ static int inet_dump_ifaddr(struct sk_buff *skb, struct netlink_callback *cb)
1090 continue; 1089 continue;
1091 if (inet_fill_ifaddr(skb, ifa, NETLINK_CB(cb->skb).pid, 1090 if (inet_fill_ifaddr(skb, ifa, NETLINK_CB(cb->skb).pid,
1092 cb->nlh->nlmsg_seq, 1091 cb->nlh->nlmsg_seq,
1093 RTM_NEWADDR) <= 0) { 1092 RTM_NEWADDR, NLM_F_MULTI) <= 0) {
1094 rcu_read_unlock(); 1093 rcu_read_unlock();
1095 goto done; 1094 goto done;
1096 } 1095 }
@@ -1113,7 +1112,7 @@ static void rtmsg_ifa(int event, struct in_ifaddr* ifa)
1113 1112
1114 if (!skb) 1113 if (!skb)
1115 netlink_set_err(rtnl, 0, RTMGRP_IPV4_IFADDR, ENOBUFS); 1114 netlink_set_err(rtnl, 0, RTMGRP_IPV4_IFADDR, ENOBUFS);
1116 else if (inet_fill_ifaddr(skb, ifa, 0, 0, event) < 0) { 1115 else if (inet_fill_ifaddr(skb, ifa, current->pid, 0, event, 0) < 0) {
1117 kfree_skb(skb); 1116 kfree_skb(skb);
1118 netlink_set_err(rtnl, 0, RTMGRP_IPV4_IFADDR, EINVAL); 1117 netlink_set_err(rtnl, 0, RTMGRP_IPV4_IFADDR, EINVAL);
1119 } else { 1118 } else {
diff --git a/net/ipv4/fib_hash.c b/net/ipv4/fib_hash.c
index 6506dcc01b46..b10d6bb5ef3d 100644
--- a/net/ipv4/fib_hash.c
+++ b/net/ipv4/fib_hash.c
@@ -703,7 +703,8 @@ fn_hash_dump_bucket(struct sk_buff *skb, struct netlink_callback *cb,
703 &f->fn_key, 703 &f->fn_key,
704 fz->fz_order, 704 fz->fz_order,
705 fa->fa_tos, 705 fa->fa_tos,
706 fa->fa_info) < 0) { 706 fa->fa_info,
707 NLM_F_MULTI) < 0) {
707 cb->args[3] = i; 708 cb->args[3] = i;
708 return -1; 709 return -1;
709 } 710 }
diff --git a/net/ipv4/fib_lookup.h b/net/ipv4/fib_lookup.h
index ac4485f75e97..b729d97cfa93 100644
--- a/net/ipv4/fib_lookup.h
+++ b/net/ipv4/fib_lookup.h
@@ -30,7 +30,8 @@ extern int fib_nh_match(struct rtmsg *r, struct nlmsghdr *,
30 struct kern_rta *rta, struct fib_info *fi); 30 struct kern_rta *rta, struct fib_info *fi);
31extern int fib_dump_info(struct sk_buff *skb, u32 pid, u32 seq, int event, 31extern int fib_dump_info(struct sk_buff *skb, u32 pid, u32 seq, int event,
32 u8 tb_id, u8 type, u8 scope, void *dst, 32 u8 tb_id, u8 type, u8 scope, void *dst,
33 int dst_len, u8 tos, struct fib_info *fi); 33 int dst_len, u8 tos, struct fib_info *fi,
34 unsigned int);
34extern void rtmsg_fib(int event, u32 key, struct fib_alias *fa, 35extern void rtmsg_fib(int event, u32 key, struct fib_alias *fa,
35 int z, int tb_id, 36 int z, int tb_id,
36 struct nlmsghdr *n, struct netlink_skb_parms *req); 37 struct nlmsghdr *n, struct netlink_skb_parms *req);
diff --git a/net/ipv4/fib_rules.c b/net/ipv4/fib_rules.c
index 39d0aadb9a2a..0b298bbc1518 100644
--- a/net/ipv4/fib_rules.c
+++ b/net/ipv4/fib_rules.c
@@ -367,13 +367,14 @@ static struct notifier_block fib_rules_notifier = {
367 367
368static __inline__ int inet_fill_rule(struct sk_buff *skb, 368static __inline__ int inet_fill_rule(struct sk_buff *skb,
369 struct fib_rule *r, 369 struct fib_rule *r,
370 struct netlink_callback *cb) 370 struct netlink_callback *cb,
371 unsigned int flags)
371{ 372{
372 struct rtmsg *rtm; 373 struct rtmsg *rtm;
373 struct nlmsghdr *nlh; 374 struct nlmsghdr *nlh;
374 unsigned char *b = skb->tail; 375 unsigned char *b = skb->tail;
375 376
376 nlh = NLMSG_PUT(skb, NETLINK_CREDS(cb->skb)->pid, cb->nlh->nlmsg_seq, RTM_NEWRULE, sizeof(*rtm)); 377 nlh = NLMSG_NEW_ANSWER(skb, cb, RTM_NEWRULE, sizeof(*rtm), flags);
377 rtm = NLMSG_DATA(nlh); 378 rtm = NLMSG_DATA(nlh);
378 rtm->rtm_family = AF_INET; 379 rtm->rtm_family = AF_INET;
379 rtm->rtm_dst_len = r->r_dst_len; 380 rtm->rtm_dst_len = r->r_dst_len;
@@ -422,7 +423,7 @@ int inet_dump_rules(struct sk_buff *skb, struct netlink_callback *cb)
422 for (r=fib_rules, idx=0; r; r = r->r_next, idx++) { 423 for (r=fib_rules, idx=0; r; r = r->r_next, idx++) {
423 if (idx < s_idx) 424 if (idx < s_idx)
424 continue; 425 continue;
425 if (inet_fill_rule(skb, r, cb) < 0) 426 if (inet_fill_rule(skb, r, cb, NLM_F_MULTI) < 0)
426 break; 427 break;
427 } 428 }
428 read_unlock(&fib_rules_lock); 429 read_unlock(&fib_rules_lock);
diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c
index 029362d66135..c886b28ba9f5 100644
--- a/net/ipv4/fib_semantics.c
+++ b/net/ipv4/fib_semantics.c
@@ -276,7 +276,7 @@ void rtmsg_fib(int event, u32 key, struct fib_alias *fa,
276 struct nlmsghdr *n, struct netlink_skb_parms *req) 276 struct nlmsghdr *n, struct netlink_skb_parms *req)
277{ 277{
278 struct sk_buff *skb; 278 struct sk_buff *skb;
279 u32 pid = req ? req->pid : 0; 279 u32 pid = req ? req->pid : n->nlmsg_pid;
280 int size = NLMSG_SPACE(sizeof(struct rtmsg)+256); 280 int size = NLMSG_SPACE(sizeof(struct rtmsg)+256);
281 281
282 skb = alloc_skb(size, GFP_KERNEL); 282 skb = alloc_skb(size, GFP_KERNEL);
@@ -286,7 +286,7 @@ void rtmsg_fib(int event, u32 key, struct fib_alias *fa,
286 if (fib_dump_info(skb, pid, n->nlmsg_seq, event, tb_id, 286 if (fib_dump_info(skb, pid, n->nlmsg_seq, event, tb_id,
287 fa->fa_type, fa->fa_scope, &key, z, 287 fa->fa_type, fa->fa_scope, &key, z,
288 fa->fa_tos, 288 fa->fa_tos,
289 fa->fa_info) < 0) { 289 fa->fa_info, 0) < 0) {
290 kfree_skb(skb); 290 kfree_skb(skb);
291 return; 291 return;
292 } 292 }
@@ -932,13 +932,13 @@ u32 __fib_res_prefsrc(struct fib_result *res)
932int 932int
933fib_dump_info(struct sk_buff *skb, u32 pid, u32 seq, int event, 933fib_dump_info(struct sk_buff *skb, u32 pid, u32 seq, int event,
934 u8 tb_id, u8 type, u8 scope, void *dst, int dst_len, u8 tos, 934 u8 tb_id, u8 type, u8 scope, void *dst, int dst_len, u8 tos,
935 struct fib_info *fi) 935 struct fib_info *fi, unsigned int flags)
936{ 936{
937 struct rtmsg *rtm; 937 struct rtmsg *rtm;
938 struct nlmsghdr *nlh; 938 struct nlmsghdr *nlh;
939 unsigned char *b = skb->tail; 939 unsigned char *b = skb->tail;
940 940
941 nlh = NLMSG_PUT(skb, pid, seq, event, sizeof(*rtm)); 941 nlh = NLMSG_NEW(skb, pid, seq, event, sizeof(*rtm), flags);
942 rtm = NLMSG_DATA(nlh); 942 rtm = NLMSG_DATA(nlh);
943 rtm->rtm_family = AF_INET; 943 rtm->rtm_family = AF_INET;
944 rtm->rtm_dst_len = dst_len; 944 rtm->rtm_dst_len = dst_len;
@@ -1035,7 +1035,7 @@ fib_convert_rtentry(int cmd, struct nlmsghdr *nl, struct rtmsg *rtm,
1035 } 1035 }
1036 1036
1037 nl->nlmsg_flags = NLM_F_REQUEST; 1037 nl->nlmsg_flags = NLM_F_REQUEST;
1038 nl->nlmsg_pid = 0; 1038 nl->nlmsg_pid = current->pid;
1039 nl->nlmsg_seq = 0; 1039 nl->nlmsg_seq = 0;
1040 nl->nlmsg_len = NLMSG_LENGTH(sizeof(*rtm)); 1040 nl->nlmsg_len = NLMSG_LENGTH(sizeof(*rtm));
1041 if (cmd == SIOCDELRT) { 1041 if (cmd == SIOCDELRT) {
diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c
index 47012b93cad2..f8b172f89811 100644
--- a/net/ipv4/ip_sockglue.c
+++ b/net/ipv4/ip_sockglue.c
@@ -360,14 +360,14 @@ int ip_recv_error(struct sock *sk, struct msghdr *msg, int len)
360 err = copied; 360 err = copied;
361 361
362 /* Reset and regenerate socket error */ 362 /* Reset and regenerate socket error */
363 spin_lock_irq(&sk->sk_error_queue.lock); 363 spin_lock_bh(&sk->sk_error_queue.lock);
364 sk->sk_err = 0; 364 sk->sk_err = 0;
365 if ((skb2 = skb_peek(&sk->sk_error_queue)) != NULL) { 365 if ((skb2 = skb_peek(&sk->sk_error_queue)) != NULL) {
366 sk->sk_err = SKB_EXT_ERR(skb2)->ee.ee_errno; 366 sk->sk_err = SKB_EXT_ERR(skb2)->ee.ee_errno;
367 spin_unlock_irq(&sk->sk_error_queue.lock); 367 spin_unlock_bh(&sk->sk_error_queue.lock);
368 sk->sk_error_report(sk); 368 sk->sk_error_report(sk);
369 } else 369 } else
370 spin_unlock_irq(&sk->sk_error_queue.lock); 370 spin_unlock_bh(&sk->sk_error_queue.lock);
371 371
372out_free_skb: 372out_free_skb:
373 kfree_skb(skb); 373 kfree_skb(skb);
diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c
index 5b1ec586bae6..d1835b1bc8c4 100644
--- a/net/ipv4/raw.c
+++ b/net/ipv4/raw.c
@@ -259,7 +259,7 @@ int raw_rcv(struct sock *sk, struct sk_buff *skb)
259 return 0; 259 return 0;
260} 260}
261 261
262static int raw_send_hdrinc(struct sock *sk, void *from, int length, 262static int raw_send_hdrinc(struct sock *sk, void *from, size_t length,
263 struct rtable *rt, 263 struct rtable *rt,
264 unsigned int flags) 264 unsigned int flags)
265{ 265{
@@ -298,7 +298,7 @@ static int raw_send_hdrinc(struct sock *sk, void *from, int length,
298 goto error_fault; 298 goto error_fault;
299 299
300 /* We don't modify invalid header */ 300 /* We don't modify invalid header */
301 if (length >= sizeof(*iph) && iph->ihl * 4 <= length) { 301 if (length >= sizeof(*iph) && iph->ihl * 4U <= length) {
302 if (!iph->saddr) 302 if (!iph->saddr)
303 iph->saddr = rt->rt_src; 303 iph->saddr = rt->rt_src;
304 iph->check = 0; 304 iph->check = 0;
@@ -332,7 +332,7 @@ static void raw_probe_proto_opt(struct flowi *fl, struct msghdr *msg)
332 u8 __user *type = NULL; 332 u8 __user *type = NULL;
333 u8 __user *code = NULL; 333 u8 __user *code = NULL;
334 int probed = 0; 334 int probed = 0;
335 int i; 335 unsigned int i;
336 336
337 if (!msg->msg_iov) 337 if (!msg->msg_iov)
338 return; 338 return;
@@ -384,7 +384,7 @@ static int raw_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
384 int err; 384 int err;
385 385
386 err = -EMSGSIZE; 386 err = -EMSGSIZE;
387 if (len < 0 || len > 0xFFFF) 387 if (len > 0xFFFF)
388 goto out; 388 goto out;
389 389
390 /* 390 /*
@@ -514,7 +514,10 @@ done:
514 kfree(ipc.opt); 514 kfree(ipc.opt);
515 ip_rt_put(rt); 515 ip_rt_put(rt);
516 516
517out: return err < 0 ? err : len; 517out:
518 if (err < 0)
519 return err;
520 return len;
518 521
519do_confirm: 522do_confirm:
520 dst_confirm(&rt->u.dst); 523 dst_confirm(&rt->u.dst);
@@ -610,7 +613,10 @@ static int raw_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
610 copied = skb->len; 613 copied = skb->len;
611done: 614done:
612 skb_free_datagram(sk, skb); 615 skb_free_datagram(sk, skb);
613out: return err ? err : copied; 616out:
617 if (err)
618 return err;
619 return copied;
614} 620}
615 621
616static int raw_init(struct sock *sk) 622static int raw_init(struct sock *sk)
@@ -691,11 +697,11 @@ static int raw_ioctl(struct sock *sk, int cmd, unsigned long arg)
691 struct sk_buff *skb; 697 struct sk_buff *skb;
692 int amount = 0; 698 int amount = 0;
693 699
694 spin_lock_irq(&sk->sk_receive_queue.lock); 700 spin_lock_bh(&sk->sk_receive_queue.lock);
695 skb = skb_peek(&sk->sk_receive_queue); 701 skb = skb_peek(&sk->sk_receive_queue);
696 if (skb != NULL) 702 if (skb != NULL)
697 amount = skb->len; 703 amount = skb->len;
698 spin_unlock_irq(&sk->sk_receive_queue.lock); 704 spin_unlock_bh(&sk->sk_receive_queue.lock);
699 return put_user(amount, (int __user *)arg); 705 return put_user(amount, (int __user *)arg);
700 } 706 }
701 707
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index a682d28e247b..f4d53c919869 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -2581,7 +2581,7 @@ int ip_route_output_key(struct rtable **rp, struct flowi *flp)
2581} 2581}
2582 2582
2583static int rt_fill_info(struct sk_buff *skb, u32 pid, u32 seq, int event, 2583static int rt_fill_info(struct sk_buff *skb, u32 pid, u32 seq, int event,
2584 int nowait) 2584 int nowait, unsigned int flags)
2585{ 2585{
2586 struct rtable *rt = (struct rtable*)skb->dst; 2586 struct rtable *rt = (struct rtable*)skb->dst;
2587 struct rtmsg *r; 2587 struct rtmsg *r;
@@ -2591,9 +2591,8 @@ static int rt_fill_info(struct sk_buff *skb, u32 pid, u32 seq, int event,
2591#ifdef CONFIG_IP_MROUTE 2591#ifdef CONFIG_IP_MROUTE
2592 struct rtattr *eptr; 2592 struct rtattr *eptr;
2593#endif 2593#endif
2594 nlh = NLMSG_PUT(skb, pid, seq, event, sizeof(*r)); 2594 nlh = NLMSG_NEW(skb, pid, seq, event, sizeof(*r), flags);
2595 r = NLMSG_DATA(nlh); 2595 r = NLMSG_DATA(nlh);
2596 nlh->nlmsg_flags = (nowait && pid) ? NLM_F_MULTI : 0;
2597 r->rtm_family = AF_INET; 2596 r->rtm_family = AF_INET;
2598 r->rtm_dst_len = 32; 2597 r->rtm_dst_len = 32;
2599 r->rtm_src_len = 0; 2598 r->rtm_src_len = 0;
@@ -2744,7 +2743,7 @@ int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void *arg)
2744 NETLINK_CB(skb).dst_pid = NETLINK_CB(in_skb).pid; 2743 NETLINK_CB(skb).dst_pid = NETLINK_CB(in_skb).pid;
2745 2744
2746 err = rt_fill_info(skb, NETLINK_CB(in_skb).pid, nlh->nlmsg_seq, 2745 err = rt_fill_info(skb, NETLINK_CB(in_skb).pid, nlh->nlmsg_seq,
2747 RTM_NEWROUTE, 0); 2746 RTM_NEWROUTE, 0, 0);
2748 if (!err) 2747 if (!err)
2749 goto out_free; 2748 goto out_free;
2750 if (err < 0) { 2749 if (err < 0) {
@@ -2781,8 +2780,8 @@ int ip_rt_dump(struct sk_buff *skb, struct netlink_callback *cb)
2781 continue; 2780 continue;
2782 skb->dst = dst_clone(&rt->u.dst); 2781 skb->dst = dst_clone(&rt->u.dst);
2783 if (rt_fill_info(skb, NETLINK_CB(cb->skb).pid, 2782 if (rt_fill_info(skb, NETLINK_CB(cb->skb).pid,
2784 cb->nlh->nlmsg_seq, 2783 cb->nlh->nlmsg_seq, RTM_NEWROUTE,
2785 RTM_NEWROUTE, 1) <= 0) { 2784 1, NLM_F_MULTI) <= 0) {
2786 dst_release(xchg(&skb->dst, NULL)); 2785 dst_release(xchg(&skb->dst, NULL));
2787 rcu_read_unlock_bh(); 2786 rcu_read_unlock_bh();
2788 goto done; 2787 goto done;
diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c
index e923d2f021aa..72d014442185 100644
--- a/net/ipv4/syncookies.c
+++ b/net/ipv4/syncookies.c
@@ -169,10 +169,10 @@ static inline int cookie_check(struct sk_buff *skb, __u32 cookie)
169 return mssind < NUM_MSS ? msstab[mssind] + 1 : 0; 169 return mssind < NUM_MSS ? msstab[mssind] + 1 : 0;
170} 170}
171 171
172extern struct or_calltable or_ipv4; 172extern struct request_sock_ops tcp_request_sock_ops;
173 173
174static inline struct sock *get_cookie_sock(struct sock *sk, struct sk_buff *skb, 174static inline struct sock *get_cookie_sock(struct sock *sk, struct sk_buff *skb,
175 struct open_request *req, 175 struct request_sock *req,
176 struct dst_entry *dst) 176 struct dst_entry *dst)
177{ 177{
178 struct tcp_sock *tp = tcp_sk(sk); 178 struct tcp_sock *tp = tcp_sk(sk);
@@ -182,7 +182,7 @@ static inline struct sock *get_cookie_sock(struct sock *sk, struct sk_buff *skb,
182 if (child) 182 if (child)
183 tcp_acceptq_queue(sk, req, child); 183 tcp_acceptq_queue(sk, req, child);
184 else 184 else
185 tcp_openreq_free(req); 185 reqsk_free(req);
186 186
187 return child; 187 return child;
188} 188}
@@ -190,10 +190,12 @@ static inline struct sock *get_cookie_sock(struct sock *sk, struct sk_buff *skb,
190struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb, 190struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb,
191 struct ip_options *opt) 191 struct ip_options *opt)
192{ 192{
193 struct inet_request_sock *ireq;
194 struct tcp_request_sock *treq;
193 struct tcp_sock *tp = tcp_sk(sk); 195 struct tcp_sock *tp = tcp_sk(sk);
194 __u32 cookie = ntohl(skb->h.th->ack_seq) - 1; 196 __u32 cookie = ntohl(skb->h.th->ack_seq) - 1;
195 struct sock *ret = sk; 197 struct sock *ret = sk;
196 struct open_request *req; 198 struct request_sock *req;
197 int mss; 199 int mss;
198 struct rtable *rt; 200 struct rtable *rt;
199 __u8 rcv_wscale; 201 __u8 rcv_wscale;
@@ -209,19 +211,20 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb,
209 211
210 NET_INC_STATS_BH(LINUX_MIB_SYNCOOKIESRECV); 212 NET_INC_STATS_BH(LINUX_MIB_SYNCOOKIESRECV);
211 213
212 req = tcp_openreq_alloc();
213 ret = NULL; 214 ret = NULL;
215 req = reqsk_alloc(&tcp_request_sock_ops); /* for safety */
214 if (!req) 216 if (!req)
215 goto out; 217 goto out;
216 218
217 req->rcv_isn = htonl(skb->h.th->seq) - 1; 219 ireq = inet_rsk(req);
218 req->snt_isn = cookie; 220 treq = tcp_rsk(req);
221 treq->rcv_isn = htonl(skb->h.th->seq) - 1;
222 treq->snt_isn = cookie;
219 req->mss = mss; 223 req->mss = mss;
220 req->rmt_port = skb->h.th->source; 224 ireq->rmt_port = skb->h.th->source;
221 req->af.v4_req.loc_addr = skb->nh.iph->daddr; 225 ireq->loc_addr = skb->nh.iph->daddr;
222 req->af.v4_req.rmt_addr = skb->nh.iph->saddr; 226 ireq->rmt_addr = skb->nh.iph->saddr;
223 req->class = &or_ipv4; /* for savety */ 227 ireq->opt = NULL;
224 req->af.v4_req.opt = NULL;
225 228
226 /* We throwed the options of the initial SYN away, so we hope 229 /* We throwed the options of the initial SYN away, so we hope
227 * the ACK carries the same options again (see RFC1122 4.2.3.8) 230 * the ACK carries the same options again (see RFC1122 4.2.3.8)
@@ -229,17 +232,15 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb,
229 if (opt && opt->optlen) { 232 if (opt && opt->optlen) {
230 int opt_size = sizeof(struct ip_options) + opt->optlen; 233 int opt_size = sizeof(struct ip_options) + opt->optlen;
231 234
232 req->af.v4_req.opt = kmalloc(opt_size, GFP_ATOMIC); 235 ireq->opt = kmalloc(opt_size, GFP_ATOMIC);
233 if (req->af.v4_req.opt) { 236 if (ireq->opt != NULL && ip_options_echo(ireq->opt, skb)) {
234 if (ip_options_echo(req->af.v4_req.opt, skb)) { 237 kfree(ireq->opt);
235 kfree(req->af.v4_req.opt); 238 ireq->opt = NULL;
236 req->af.v4_req.opt = NULL;
237 }
238 } 239 }
239 } 240 }
240 241
241 req->snd_wscale = req->rcv_wscale = req->tstamp_ok = 0; 242 ireq->snd_wscale = ireq->rcv_wscale = ireq->tstamp_ok = 0;
242 req->wscale_ok = req->sack_ok = 0; 243 ireq->wscale_ok = ireq->sack_ok = 0;
243 req->expires = 0UL; 244 req->expires = 0UL;
244 req->retrans = 0; 245 req->retrans = 0;
245 246
@@ -253,15 +254,15 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb,
253 struct flowi fl = { .nl_u = { .ip4_u = 254 struct flowi fl = { .nl_u = { .ip4_u =
254 { .daddr = ((opt && opt->srr) ? 255 { .daddr = ((opt && opt->srr) ?
255 opt->faddr : 256 opt->faddr :
256 req->af.v4_req.rmt_addr), 257 ireq->rmt_addr),
257 .saddr = req->af.v4_req.loc_addr, 258 .saddr = ireq->loc_addr,
258 .tos = RT_CONN_FLAGS(sk) } }, 259 .tos = RT_CONN_FLAGS(sk) } },
259 .proto = IPPROTO_TCP, 260 .proto = IPPROTO_TCP,
260 .uli_u = { .ports = 261 .uli_u = { .ports =
261 { .sport = skb->h.th->dest, 262 { .sport = skb->h.th->dest,
262 .dport = skb->h.th->source } } }; 263 .dport = skb->h.th->source } } };
263 if (ip_route_output_key(&rt, &fl)) { 264 if (ip_route_output_key(&rt, &fl)) {
264 tcp_openreq_free(req); 265 reqsk_free(req);
265 goto out; 266 goto out;
266 } 267 }
267 } 268 }
@@ -272,7 +273,7 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb,
272 &req->rcv_wnd, &req->window_clamp, 273 &req->rcv_wnd, &req->window_clamp,
273 0, &rcv_wscale); 274 0, &rcv_wscale);
274 /* BTW win scale with syncookies is 0 by definition */ 275 /* BTW win scale with syncookies is 0 by definition */
275 req->rcv_wscale = rcv_wscale; 276 ireq->rcv_wscale = rcv_wscale;
276 277
277 ret = get_cookie_sock(sk, skb, req, &rt->u.dst); 278 ret = get_cookie_sock(sk, skb, req, &rt->u.dst);
278out: return ret; 279out: return ret;
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 0d9a4fd5f1a4..674bbd8cfd36 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -271,7 +271,6 @@ int sysctl_tcp_fin_timeout = TCP_FIN_TIMEOUT;
271 271
272DEFINE_SNMP_STAT(struct tcp_mib, tcp_statistics); 272DEFINE_SNMP_STAT(struct tcp_mib, tcp_statistics);
273 273
274kmem_cache_t *tcp_openreq_cachep;
275kmem_cache_t *tcp_bucket_cachep; 274kmem_cache_t *tcp_bucket_cachep;
276kmem_cache_t *tcp_timewait_cachep; 275kmem_cache_t *tcp_timewait_cachep;
277 276
@@ -317,7 +316,7 @@ EXPORT_SYMBOL(tcp_enter_memory_pressure);
317static __inline__ unsigned int tcp_listen_poll(struct sock *sk, 316static __inline__ unsigned int tcp_listen_poll(struct sock *sk,
318 poll_table *wait) 317 poll_table *wait)
319{ 318{
320 return tcp_sk(sk)->accept_queue ? (POLLIN | POLLRDNORM) : 0; 319 return !reqsk_queue_empty(&tcp_sk(sk)->accept_queue) ? (POLLIN | POLLRDNORM) : 0;
321} 320}
322 321
323/* 322/*
@@ -463,28 +462,15 @@ int tcp_listen_start(struct sock *sk)
463{ 462{
464 struct inet_sock *inet = inet_sk(sk); 463 struct inet_sock *inet = inet_sk(sk);
465 struct tcp_sock *tp = tcp_sk(sk); 464 struct tcp_sock *tp = tcp_sk(sk);
466 struct tcp_listen_opt *lopt; 465 int rc = reqsk_queue_alloc(&tp->accept_queue, TCP_SYNQ_HSIZE);
466
467 if (rc != 0)
468 return rc;
467 469
468 sk->sk_max_ack_backlog = 0; 470 sk->sk_max_ack_backlog = 0;
469 sk->sk_ack_backlog = 0; 471 sk->sk_ack_backlog = 0;
470 tp->accept_queue = tp->accept_queue_tail = NULL;
471 rwlock_init(&tp->syn_wait_lock);
472 tcp_delack_init(tp); 472 tcp_delack_init(tp);
473 473
474 lopt = kmalloc(sizeof(struct tcp_listen_opt), GFP_KERNEL);
475 if (!lopt)
476 return -ENOMEM;
477
478 memset(lopt, 0, sizeof(struct tcp_listen_opt));
479 for (lopt->max_qlen_log = 6; ; lopt->max_qlen_log++)
480 if ((1 << lopt->max_qlen_log) >= sysctl_max_syn_backlog)
481 break;
482 get_random_bytes(&lopt->hash_rnd, 4);
483
484 write_lock_bh(&tp->syn_wait_lock);
485 tp->listen_opt = lopt;
486 write_unlock_bh(&tp->syn_wait_lock);
487
488 /* There is race window here: we announce ourselves listening, 474 /* There is race window here: we announce ourselves listening,
489 * but this transition is still not validated by get_port(). 475 * but this transition is still not validated by get_port().
490 * It is OK, because this socket enters to hash table only 476 * It is OK, because this socket enters to hash table only
@@ -501,10 +487,7 @@ int tcp_listen_start(struct sock *sk)
501 } 487 }
502 488
503 sk->sk_state = TCP_CLOSE; 489 sk->sk_state = TCP_CLOSE;
504 write_lock_bh(&tp->syn_wait_lock); 490 reqsk_queue_destroy(&tp->accept_queue);
505 tp->listen_opt = NULL;
506 write_unlock_bh(&tp->syn_wait_lock);
507 kfree(lopt);
508 return -EADDRINUSE; 491 return -EADDRINUSE;
509} 492}
510 493
@@ -516,25 +499,23 @@ int tcp_listen_start(struct sock *sk)
516static void tcp_listen_stop (struct sock *sk) 499static void tcp_listen_stop (struct sock *sk)
517{ 500{
518 struct tcp_sock *tp = tcp_sk(sk); 501 struct tcp_sock *tp = tcp_sk(sk);
519 struct tcp_listen_opt *lopt = tp->listen_opt; 502 struct listen_sock *lopt;
520 struct open_request *acc_req = tp->accept_queue; 503 struct request_sock *acc_req;
521 struct open_request *req; 504 struct request_sock *req;
522 int i; 505 int i;
523 506
524 tcp_delete_keepalive_timer(sk); 507 tcp_delete_keepalive_timer(sk);
525 508
526 /* make all the listen_opt local to us */ 509 /* make all the listen_opt local to us */
527 write_lock_bh(&tp->syn_wait_lock); 510 lopt = reqsk_queue_yank_listen_sk(&tp->accept_queue);
528 tp->listen_opt = NULL; 511 acc_req = reqsk_queue_yank_acceptq(&tp->accept_queue);
529 write_unlock_bh(&tp->syn_wait_lock);
530 tp->accept_queue = tp->accept_queue_tail = NULL;
531 512
532 if (lopt->qlen) { 513 if (lopt->qlen) {
533 for (i = 0; i < TCP_SYNQ_HSIZE; i++) { 514 for (i = 0; i < TCP_SYNQ_HSIZE; i++) {
534 while ((req = lopt->syn_table[i]) != NULL) { 515 while ((req = lopt->syn_table[i]) != NULL) {
535 lopt->syn_table[i] = req->dl_next; 516 lopt->syn_table[i] = req->dl_next;
536 lopt->qlen--; 517 lopt->qlen--;
537 tcp_openreq_free(req); 518 reqsk_free(req);
538 519
539 /* Following specs, it would be better either to send FIN 520 /* Following specs, it would be better either to send FIN
540 * (and enter FIN-WAIT-1, it is normal close) 521 * (and enter FIN-WAIT-1, it is normal close)
@@ -574,7 +555,7 @@ static void tcp_listen_stop (struct sock *sk)
574 sock_put(child); 555 sock_put(child);
575 556
576 sk_acceptq_removed(sk); 557 sk_acceptq_removed(sk);
577 tcp_openreq_fastfree(req); 558 __reqsk_free(req);
578 } 559 }
579 BUG_TRAP(!sk->sk_ack_backlog); 560 BUG_TRAP(!sk->sk_ack_backlog);
580} 561}
@@ -1345,7 +1326,7 @@ int tcp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
1345 1326
1346 cleanup_rbuf(sk, copied); 1327 cleanup_rbuf(sk, copied);
1347 1328
1348 if (tp->ucopy.task == user_recv) { 1329 if (!sysctl_tcp_low_latency && tp->ucopy.task == user_recv) {
1349 /* Install new reader */ 1330 /* Install new reader */
1350 if (!user_recv && !(flags & (MSG_TRUNC | MSG_PEEK))) { 1331 if (!user_recv && !(flags & (MSG_TRUNC | MSG_PEEK))) {
1351 user_recv = current; 1332 user_recv = current;
@@ -1868,11 +1849,11 @@ static int wait_for_connect(struct sock *sk, long timeo)
1868 prepare_to_wait_exclusive(sk->sk_sleep, &wait, 1849 prepare_to_wait_exclusive(sk->sk_sleep, &wait,
1869 TASK_INTERRUPTIBLE); 1850 TASK_INTERRUPTIBLE);
1870 release_sock(sk); 1851 release_sock(sk);
1871 if (!tp->accept_queue) 1852 if (reqsk_queue_empty(&tp->accept_queue))
1872 timeo = schedule_timeout(timeo); 1853 timeo = schedule_timeout(timeo);
1873 lock_sock(sk); 1854 lock_sock(sk);
1874 err = 0; 1855 err = 0;
1875 if (tp->accept_queue) 1856 if (!reqsk_queue_empty(&tp->accept_queue))
1876 break; 1857 break;
1877 err = -EINVAL; 1858 err = -EINVAL;
1878 if (sk->sk_state != TCP_LISTEN) 1859 if (sk->sk_state != TCP_LISTEN)
@@ -1895,7 +1876,6 @@ static int wait_for_connect(struct sock *sk, long timeo)
1895struct sock *tcp_accept(struct sock *sk, int flags, int *err) 1876struct sock *tcp_accept(struct sock *sk, int flags, int *err)
1896{ 1877{
1897 struct tcp_sock *tp = tcp_sk(sk); 1878 struct tcp_sock *tp = tcp_sk(sk);
1898 struct open_request *req;
1899 struct sock *newsk; 1879 struct sock *newsk;
1900 int error; 1880 int error;
1901 1881
@@ -1906,37 +1886,31 @@ struct sock *tcp_accept(struct sock *sk, int flags, int *err)
1906 */ 1886 */
1907 error = -EINVAL; 1887 error = -EINVAL;
1908 if (sk->sk_state != TCP_LISTEN) 1888 if (sk->sk_state != TCP_LISTEN)
1909 goto out; 1889 goto out_err;
1910 1890
1911 /* Find already established connection */ 1891 /* Find already established connection */
1912 if (!tp->accept_queue) { 1892 if (reqsk_queue_empty(&tp->accept_queue)) {
1913 long timeo = sock_rcvtimeo(sk, flags & O_NONBLOCK); 1893 long timeo = sock_rcvtimeo(sk, flags & O_NONBLOCK);
1914 1894
1915 /* If this is a non blocking socket don't sleep */ 1895 /* If this is a non blocking socket don't sleep */
1916 error = -EAGAIN; 1896 error = -EAGAIN;
1917 if (!timeo) 1897 if (!timeo)
1918 goto out; 1898 goto out_err;
1919 1899
1920 error = wait_for_connect(sk, timeo); 1900 error = wait_for_connect(sk, timeo);
1921 if (error) 1901 if (error)
1922 goto out; 1902 goto out_err;
1923 } 1903 }
1924 1904
1925 req = tp->accept_queue; 1905 newsk = reqsk_queue_get_child(&tp->accept_queue, sk);
1926 if ((tp->accept_queue = req->dl_next) == NULL)
1927 tp->accept_queue_tail = NULL;
1928
1929 newsk = req->sk;
1930 sk_acceptq_removed(sk);
1931 tcp_openreq_fastfree(req);
1932 BUG_TRAP(newsk->sk_state != TCP_SYN_RECV); 1906 BUG_TRAP(newsk->sk_state != TCP_SYN_RECV);
1933 release_sock(sk);
1934 return newsk;
1935
1936out: 1907out:
1937 release_sock(sk); 1908 release_sock(sk);
1909 return newsk;
1910out_err:
1911 newsk = NULL;
1938 *err = error; 1912 *err = error;
1939 return NULL; 1913 goto out;
1940} 1914}
1941 1915
1942/* 1916/*
@@ -2271,13 +2245,6 @@ void __init tcp_init(void)
2271 __skb_cb_too_small_for_tcp(sizeof(struct tcp_skb_cb), 2245 __skb_cb_too_small_for_tcp(sizeof(struct tcp_skb_cb),
2272 sizeof(skb->cb)); 2246 sizeof(skb->cb));
2273 2247
2274 tcp_openreq_cachep = kmem_cache_create("tcp_open_request",
2275 sizeof(struct open_request),
2276 0, SLAB_HWCACHE_ALIGN,
2277 NULL, NULL);
2278 if (!tcp_openreq_cachep)
2279 panic("tcp_init: Cannot alloc open_request cache.");
2280
2281 tcp_bucket_cachep = kmem_cache_create("tcp_bind_bucket", 2248 tcp_bucket_cachep = kmem_cache_create("tcp_bind_bucket",
2282 sizeof(struct tcp_bind_bucket), 2249 sizeof(struct tcp_bind_bucket),
2283 0, SLAB_HWCACHE_ALIGN, 2250 0, SLAB_HWCACHE_ALIGN,
@@ -2374,7 +2341,6 @@ EXPORT_SYMBOL(tcp_destroy_sock);
2374EXPORT_SYMBOL(tcp_disconnect); 2341EXPORT_SYMBOL(tcp_disconnect);
2375EXPORT_SYMBOL(tcp_getsockopt); 2342EXPORT_SYMBOL(tcp_getsockopt);
2376EXPORT_SYMBOL(tcp_ioctl); 2343EXPORT_SYMBOL(tcp_ioctl);
2377EXPORT_SYMBOL(tcp_openreq_cachep);
2378EXPORT_SYMBOL(tcp_poll); 2344EXPORT_SYMBOL(tcp_poll);
2379EXPORT_SYMBOL(tcp_read_sock); 2345EXPORT_SYMBOL(tcp_read_sock);
2380EXPORT_SYMBOL(tcp_recvmsg); 2346EXPORT_SYMBOL(tcp_recvmsg);
diff --git a/net/ipv4/tcp_diag.c b/net/ipv4/tcp_diag.c
index 8faa8948f75c..634befc07921 100644
--- a/net/ipv4/tcp_diag.c
+++ b/net/ipv4/tcp_diag.c
@@ -455,9 +455,10 @@ static int tcpdiag_dump_sock(struct sk_buff *skb, struct sock *sk,
455} 455}
456 456
457static int tcpdiag_fill_req(struct sk_buff *skb, struct sock *sk, 457static int tcpdiag_fill_req(struct sk_buff *skb, struct sock *sk,
458 struct open_request *req, 458 struct request_sock *req,
459 u32 pid, u32 seq) 459 u32 pid, u32 seq)
460{ 460{
461 const struct inet_request_sock *ireq = inet_rsk(req);
461 struct inet_sock *inet = inet_sk(sk); 462 struct inet_sock *inet = inet_sk(sk);
462 unsigned char *b = skb->tail; 463 unsigned char *b = skb->tail;
463 struct tcpdiagmsg *r; 464 struct tcpdiagmsg *r;
@@ -482,9 +483,9 @@ static int tcpdiag_fill_req(struct sk_buff *skb, struct sock *sk,
482 tmo = 0; 483 tmo = 0;
483 484
484 r->id.tcpdiag_sport = inet->sport; 485 r->id.tcpdiag_sport = inet->sport;
485 r->id.tcpdiag_dport = req->rmt_port; 486 r->id.tcpdiag_dport = ireq->rmt_port;
486 r->id.tcpdiag_src[0] = req->af.v4_req.loc_addr; 487 r->id.tcpdiag_src[0] = ireq->loc_addr;
487 r->id.tcpdiag_dst[0] = req->af.v4_req.rmt_addr; 488 r->id.tcpdiag_dst[0] = ireq->rmt_addr;
488 r->tcpdiag_expires = jiffies_to_msecs(tmo), 489 r->tcpdiag_expires = jiffies_to_msecs(tmo),
489 r->tcpdiag_rqueue = 0; 490 r->tcpdiag_rqueue = 0;
490 r->tcpdiag_wqueue = 0; 491 r->tcpdiag_wqueue = 0;
@@ -493,9 +494,9 @@ static int tcpdiag_fill_req(struct sk_buff *skb, struct sock *sk,
493#ifdef CONFIG_IP_TCPDIAG_IPV6 494#ifdef CONFIG_IP_TCPDIAG_IPV6
494 if (r->tcpdiag_family == AF_INET6) { 495 if (r->tcpdiag_family == AF_INET6) {
495 ipv6_addr_copy((struct in6_addr *)r->id.tcpdiag_src, 496 ipv6_addr_copy((struct in6_addr *)r->id.tcpdiag_src,
496 &req->af.v6_req.loc_addr); 497 &tcp6_rsk(req)->loc_addr);
497 ipv6_addr_copy((struct in6_addr *)r->id.tcpdiag_dst, 498 ipv6_addr_copy((struct in6_addr *)r->id.tcpdiag_dst,
498 &req->af.v6_req.rmt_addr); 499 &tcp6_rsk(req)->rmt_addr);
499 } 500 }
500#endif 501#endif
501 nlh->nlmsg_len = skb->tail - b; 502 nlh->nlmsg_len = skb->tail - b;
@@ -513,7 +514,7 @@ static int tcpdiag_dump_reqs(struct sk_buff *skb, struct sock *sk,
513 struct tcpdiag_entry entry; 514 struct tcpdiag_entry entry;
514 struct tcpdiagreq *r = NLMSG_DATA(cb->nlh); 515 struct tcpdiagreq *r = NLMSG_DATA(cb->nlh);
515 struct tcp_sock *tp = tcp_sk(sk); 516 struct tcp_sock *tp = tcp_sk(sk);
516 struct tcp_listen_opt *lopt; 517 struct listen_sock *lopt;
517 struct rtattr *bc = NULL; 518 struct rtattr *bc = NULL;
518 struct inet_sock *inet = inet_sk(sk); 519 struct inet_sock *inet = inet_sk(sk);
519 int j, s_j; 520 int j, s_j;
@@ -528,9 +529,9 @@ static int tcpdiag_dump_reqs(struct sk_buff *skb, struct sock *sk,
528 529
529 entry.family = sk->sk_family; 530 entry.family = sk->sk_family;
530 531
531 read_lock_bh(&tp->syn_wait_lock); 532 read_lock_bh(&tp->accept_queue.syn_wait_lock);
532 533
533 lopt = tp->listen_opt; 534 lopt = tp->accept_queue.listen_opt;
534 if (!lopt || !lopt->qlen) 535 if (!lopt || !lopt->qlen)
535 goto out; 536 goto out;
536 537
@@ -541,13 +542,15 @@ static int tcpdiag_dump_reqs(struct sk_buff *skb, struct sock *sk,
541 } 542 }
542 543
543 for (j = s_j; j < TCP_SYNQ_HSIZE; j++) { 544 for (j = s_j; j < TCP_SYNQ_HSIZE; j++) {
544 struct open_request *req, *head = lopt->syn_table[j]; 545 struct request_sock *req, *head = lopt->syn_table[j];
545 546
546 reqnum = 0; 547 reqnum = 0;
547 for (req = head; req; reqnum++, req = req->dl_next) { 548 for (req = head; req; reqnum++, req = req->dl_next) {
549 struct inet_request_sock *ireq = inet_rsk(req);
550
548 if (reqnum < s_reqnum) 551 if (reqnum < s_reqnum)
549 continue; 552 continue;
550 if (r->id.tcpdiag_dport != req->rmt_port && 553 if (r->id.tcpdiag_dport != ireq->rmt_port &&
551 r->id.tcpdiag_dport) 554 r->id.tcpdiag_dport)
552 continue; 555 continue;
553 556
@@ -555,16 +558,16 @@ static int tcpdiag_dump_reqs(struct sk_buff *skb, struct sock *sk,
555 entry.saddr = 558 entry.saddr =
556#ifdef CONFIG_IP_TCPDIAG_IPV6 559#ifdef CONFIG_IP_TCPDIAG_IPV6
557 (entry.family == AF_INET6) ? 560 (entry.family == AF_INET6) ?
558 req->af.v6_req.loc_addr.s6_addr32 : 561 tcp6_rsk(req)->loc_addr.s6_addr32 :
559#endif 562#endif
560 &req->af.v4_req.loc_addr; 563 &ireq->loc_addr;
561 entry.daddr = 564 entry.daddr =
562#ifdef CONFIG_IP_TCPDIAG_IPV6 565#ifdef CONFIG_IP_TCPDIAG_IPV6
563 (entry.family == AF_INET6) ? 566 (entry.family == AF_INET6) ?
564 req->af.v6_req.rmt_addr.s6_addr32 : 567 tcp6_rsk(req)->rmt_addr.s6_addr32 :
565#endif 568#endif
566 &req->af.v4_req.rmt_addr; 569 &ireq->rmt_addr;
567 entry.dport = ntohs(req->rmt_port); 570 entry.dport = ntohs(ireq->rmt_port);
568 571
569 if (!tcpdiag_bc_run(RTA_DATA(bc), 572 if (!tcpdiag_bc_run(RTA_DATA(bc),
570 RTA_PAYLOAD(bc), &entry)) 573 RTA_PAYLOAD(bc), &entry))
@@ -585,7 +588,7 @@ static int tcpdiag_dump_reqs(struct sk_buff *skb, struct sock *sk,
585 } 588 }
586 589
587out: 590out:
588 read_unlock_bh(&tp->syn_wait_lock); 591 read_unlock_bh(&tp->accept_queue.syn_wait_lock);
589 592
590 return err; 593 return err;
591} 594}
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index dad98e4a5043..2d41d5d6ad19 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -36,7 +36,7 @@
36 * ACK bit. 36 * ACK bit.
37 * Andi Kleen : Implemented fast path mtu discovery. 37 * Andi Kleen : Implemented fast path mtu discovery.
38 * Fixed many serious bugs in the 38 * Fixed many serious bugs in the
39 * open_request handling and moved 39 * request_sock handling and moved
40 * most of it into the af independent code. 40 * most of it into the af independent code.
41 * Added tail drop and some other bugfixes. 41 * Added tail drop and some other bugfixes.
42 * Added new listen sematics. 42 * Added new listen sematics.
@@ -869,21 +869,23 @@ static __inline__ u32 tcp_v4_synq_hash(u32 raddr, u16 rport, u32 rnd)
869 return (jhash_2words(raddr, (u32) rport, rnd) & (TCP_SYNQ_HSIZE - 1)); 869 return (jhash_2words(raddr, (u32) rport, rnd) & (TCP_SYNQ_HSIZE - 1));
870} 870}
871 871
872static struct open_request *tcp_v4_search_req(struct tcp_sock *tp, 872static struct request_sock *tcp_v4_search_req(struct tcp_sock *tp,
873 struct open_request ***prevp, 873 struct request_sock ***prevp,
874 __u16 rport, 874 __u16 rport,
875 __u32 raddr, __u32 laddr) 875 __u32 raddr, __u32 laddr)
876{ 876{
877 struct tcp_listen_opt *lopt = tp->listen_opt; 877 struct listen_sock *lopt = tp->accept_queue.listen_opt;
878 struct open_request *req, **prev; 878 struct request_sock *req, **prev;
879 879
880 for (prev = &lopt->syn_table[tcp_v4_synq_hash(raddr, rport, lopt->hash_rnd)]; 880 for (prev = &lopt->syn_table[tcp_v4_synq_hash(raddr, rport, lopt->hash_rnd)];
881 (req = *prev) != NULL; 881 (req = *prev) != NULL;
882 prev = &req->dl_next) { 882 prev = &req->dl_next) {
883 if (req->rmt_port == rport && 883 const struct inet_request_sock *ireq = inet_rsk(req);
884 req->af.v4_req.rmt_addr == raddr && 884
885 req->af.v4_req.loc_addr == laddr && 885 if (ireq->rmt_port == rport &&
886 TCP_INET_FAMILY(req->class->family)) { 886 ireq->rmt_addr == raddr &&
887 ireq->loc_addr == laddr &&
888 TCP_INET_FAMILY(req->rsk_ops->family)) {
887 BUG_TRAP(!req->sk); 889 BUG_TRAP(!req->sk);
888 *prevp = prev; 890 *prevp = prev;
889 break; 891 break;
@@ -893,21 +895,13 @@ static struct open_request *tcp_v4_search_req(struct tcp_sock *tp,
893 return req; 895 return req;
894} 896}
895 897
896static void tcp_v4_synq_add(struct sock *sk, struct open_request *req) 898static void tcp_v4_synq_add(struct sock *sk, struct request_sock *req)
897{ 899{
898 struct tcp_sock *tp = tcp_sk(sk); 900 struct tcp_sock *tp = tcp_sk(sk);
899 struct tcp_listen_opt *lopt = tp->listen_opt; 901 struct listen_sock *lopt = tp->accept_queue.listen_opt;
900 u32 h = tcp_v4_synq_hash(req->af.v4_req.rmt_addr, req->rmt_port, lopt->hash_rnd); 902 u32 h = tcp_v4_synq_hash(inet_rsk(req)->rmt_addr, inet_rsk(req)->rmt_port, lopt->hash_rnd);
901
902 req->expires = jiffies + TCP_TIMEOUT_INIT;
903 req->retrans = 0;
904 req->sk = NULL;
905 req->dl_next = lopt->syn_table[h];
906
907 write_lock(&tp->syn_wait_lock);
908 lopt->syn_table[h] = req;
909 write_unlock(&tp->syn_wait_lock);
910 903
904 reqsk_queue_hash_req(&tp->accept_queue, h, req, TCP_TIMEOUT_INIT);
911 tcp_synq_added(sk); 905 tcp_synq_added(sk);
912} 906}
913 907
@@ -1050,7 +1044,7 @@ void tcp_v4_err(struct sk_buff *skb, u32 info)
1050 } 1044 }
1051 1045
1052 switch (sk->sk_state) { 1046 switch (sk->sk_state) {
1053 struct open_request *req, **prev; 1047 struct request_sock *req, **prev;
1054 case TCP_LISTEN: 1048 case TCP_LISTEN:
1055 if (sock_owned_by_user(sk)) 1049 if (sock_owned_by_user(sk))
1056 goto out; 1050 goto out;
@@ -1065,7 +1059,7 @@ void tcp_v4_err(struct sk_buff *skb, u32 info)
1065 */ 1059 */
1066 BUG_TRAP(!req->sk); 1060 BUG_TRAP(!req->sk);
1067 1061
1068 if (seq != req->snt_isn) { 1062 if (seq != tcp_rsk(req)->snt_isn) {
1069 NET_INC_STATS_BH(LINUX_MIB_OUTOFWINDOWICMPS); 1063 NET_INC_STATS_BH(LINUX_MIB_OUTOFWINDOWICMPS);
1070 goto out; 1064 goto out;
1071 } 1065 }
@@ -1254,28 +1248,29 @@ static void tcp_v4_timewait_ack(struct sock *sk, struct sk_buff *skb)
1254 tcp_tw_put(tw); 1248 tcp_tw_put(tw);
1255} 1249}
1256 1250
1257static void tcp_v4_or_send_ack(struct sk_buff *skb, struct open_request *req) 1251static void tcp_v4_reqsk_send_ack(struct sk_buff *skb, struct request_sock *req)
1258{ 1252{
1259 tcp_v4_send_ack(skb, req->snt_isn + 1, req->rcv_isn + 1, req->rcv_wnd, 1253 tcp_v4_send_ack(skb, tcp_rsk(req)->snt_isn + 1, tcp_rsk(req)->rcv_isn + 1, req->rcv_wnd,
1260 req->ts_recent); 1254 req->ts_recent);
1261} 1255}
1262 1256
1263static struct dst_entry* tcp_v4_route_req(struct sock *sk, 1257static struct dst_entry* tcp_v4_route_req(struct sock *sk,
1264 struct open_request *req) 1258 struct request_sock *req)
1265{ 1259{
1266 struct rtable *rt; 1260 struct rtable *rt;
1267 struct ip_options *opt = req->af.v4_req.opt; 1261 const struct inet_request_sock *ireq = inet_rsk(req);
1262 struct ip_options *opt = inet_rsk(req)->opt;
1268 struct flowi fl = { .oif = sk->sk_bound_dev_if, 1263 struct flowi fl = { .oif = sk->sk_bound_dev_if,
1269 .nl_u = { .ip4_u = 1264 .nl_u = { .ip4_u =
1270 { .daddr = ((opt && opt->srr) ? 1265 { .daddr = ((opt && opt->srr) ?
1271 opt->faddr : 1266 opt->faddr :
1272 req->af.v4_req.rmt_addr), 1267 ireq->rmt_addr),
1273 .saddr = req->af.v4_req.loc_addr, 1268 .saddr = ireq->loc_addr,
1274 .tos = RT_CONN_FLAGS(sk) } }, 1269 .tos = RT_CONN_FLAGS(sk) } },
1275 .proto = IPPROTO_TCP, 1270 .proto = IPPROTO_TCP,
1276 .uli_u = { .ports = 1271 .uli_u = { .ports =
1277 { .sport = inet_sk(sk)->sport, 1272 { .sport = inet_sk(sk)->sport,
1278 .dport = req->rmt_port } } }; 1273 .dport = ireq->rmt_port } } };
1279 1274
1280 if (ip_route_output_flow(&rt, &fl, sk, 0)) { 1275 if (ip_route_output_flow(&rt, &fl, sk, 0)) {
1281 IP_INC_STATS_BH(IPSTATS_MIB_OUTNOROUTES); 1276 IP_INC_STATS_BH(IPSTATS_MIB_OUTNOROUTES);
@@ -1291,12 +1286,13 @@ static struct dst_entry* tcp_v4_route_req(struct sock *sk,
1291 1286
1292/* 1287/*
1293 * Send a SYN-ACK after having received an ACK. 1288 * Send a SYN-ACK after having received an ACK.
1294 * This still operates on a open_request only, not on a big 1289 * This still operates on a request_sock only, not on a big
1295 * socket. 1290 * socket.
1296 */ 1291 */
1297static int tcp_v4_send_synack(struct sock *sk, struct open_request *req, 1292static int tcp_v4_send_synack(struct sock *sk, struct request_sock *req,
1298 struct dst_entry *dst) 1293 struct dst_entry *dst)
1299{ 1294{
1295 const struct inet_request_sock *ireq = inet_rsk(req);
1300 int err = -1; 1296 int err = -1;
1301 struct sk_buff * skb; 1297 struct sk_buff * skb;
1302 1298
@@ -1310,14 +1306,14 @@ static int tcp_v4_send_synack(struct sock *sk, struct open_request *req,
1310 struct tcphdr *th = skb->h.th; 1306 struct tcphdr *th = skb->h.th;
1311 1307
1312 th->check = tcp_v4_check(th, skb->len, 1308 th->check = tcp_v4_check(th, skb->len,
1313 req->af.v4_req.loc_addr, 1309 ireq->loc_addr,
1314 req->af.v4_req.rmt_addr, 1310 ireq->rmt_addr,
1315 csum_partial((char *)th, skb->len, 1311 csum_partial((char *)th, skb->len,
1316 skb->csum)); 1312 skb->csum));
1317 1313
1318 err = ip_build_and_send_pkt(skb, sk, req->af.v4_req.loc_addr, 1314 err = ip_build_and_send_pkt(skb, sk, ireq->loc_addr,
1319 req->af.v4_req.rmt_addr, 1315 ireq->rmt_addr,
1320 req->af.v4_req.opt); 1316 ireq->opt);
1321 if (err == NET_XMIT_CN) 1317 if (err == NET_XMIT_CN)
1322 err = 0; 1318 err = 0;
1323 } 1319 }
@@ -1328,12 +1324,12 @@ out:
1328} 1324}
1329 1325
1330/* 1326/*
1331 * IPv4 open_request destructor. 1327 * IPv4 request_sock destructor.
1332 */ 1328 */
1333static void tcp_v4_or_free(struct open_request *req) 1329static void tcp_v4_reqsk_destructor(struct request_sock *req)
1334{ 1330{
1335 if (req->af.v4_req.opt) 1331 if (inet_rsk(req)->opt)
1336 kfree(req->af.v4_req.opt); 1332 kfree(inet_rsk(req)->opt);
1337} 1333}
1338 1334
1339static inline void syn_flood_warning(struct sk_buff *skb) 1335static inline void syn_flood_warning(struct sk_buff *skb)
@@ -1349,7 +1345,7 @@ static inline void syn_flood_warning(struct sk_buff *skb)
1349} 1345}
1350 1346
1351/* 1347/*
1352 * Save and compile IPv4 options into the open_request if needed. 1348 * Save and compile IPv4 options into the request_sock if needed.
1353 */ 1349 */
1354static inline struct ip_options *tcp_v4_save_options(struct sock *sk, 1350static inline struct ip_options *tcp_v4_save_options(struct sock *sk,
1355 struct sk_buff *skb) 1351 struct sk_buff *skb)
@@ -1370,33 +1366,20 @@ static inline struct ip_options *tcp_v4_save_options(struct sock *sk,
1370 return dopt; 1366 return dopt;
1371} 1367}
1372 1368
1373/* 1369struct request_sock_ops tcp_request_sock_ops = {
1374 * Maximum number of SYN_RECV sockets in queue per LISTEN socket.
1375 * One SYN_RECV socket costs about 80bytes on a 32bit machine.
1376 * It would be better to replace it with a global counter for all sockets
1377 * but then some measure against one socket starving all other sockets
1378 * would be needed.
1379 *
1380 * It was 128 by default. Experiments with real servers show, that
1381 * it is absolutely not enough even at 100conn/sec. 256 cures most
1382 * of problems. This value is adjusted to 128 for very small machines
1383 * (<=32Mb of memory) and to 1024 on normal or better ones (>=256Mb).
1384 * Further increasing requires to change hash table size.
1385 */
1386int sysctl_max_syn_backlog = 256;
1387
1388struct or_calltable or_ipv4 = {
1389 .family = PF_INET, 1370 .family = PF_INET,
1371 .obj_size = sizeof(struct tcp_request_sock),
1390 .rtx_syn_ack = tcp_v4_send_synack, 1372 .rtx_syn_ack = tcp_v4_send_synack,
1391 .send_ack = tcp_v4_or_send_ack, 1373 .send_ack = tcp_v4_reqsk_send_ack,
1392 .destructor = tcp_v4_or_free, 1374 .destructor = tcp_v4_reqsk_destructor,
1393 .send_reset = tcp_v4_send_reset, 1375 .send_reset = tcp_v4_send_reset,
1394}; 1376};
1395 1377
1396int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb) 1378int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
1397{ 1379{
1380 struct inet_request_sock *ireq;
1398 struct tcp_options_received tmp_opt; 1381 struct tcp_options_received tmp_opt;
1399 struct open_request *req; 1382 struct request_sock *req;
1400 __u32 saddr = skb->nh.iph->saddr; 1383 __u32 saddr = skb->nh.iph->saddr;
1401 __u32 daddr = skb->nh.iph->daddr; 1384 __u32 daddr = skb->nh.iph->daddr;
1402 __u32 isn = TCP_SKB_CB(skb)->when; 1385 __u32 isn = TCP_SKB_CB(skb)->when;
@@ -1433,7 +1416,7 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
1433 if (sk_acceptq_is_full(sk) && tcp_synq_young(sk) > 1) 1416 if (sk_acceptq_is_full(sk) && tcp_synq_young(sk) > 1)
1434 goto drop; 1417 goto drop;
1435 1418
1436 req = tcp_openreq_alloc(); 1419 req = reqsk_alloc(&tcp_request_sock_ops);
1437 if (!req) 1420 if (!req)
1438 goto drop; 1421 goto drop;
1439 1422
@@ -1461,10 +1444,10 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
1461 1444
1462 tcp_openreq_init(req, &tmp_opt, skb); 1445 tcp_openreq_init(req, &tmp_opt, skb);
1463 1446
1464 req->af.v4_req.loc_addr = daddr; 1447 ireq = inet_rsk(req);
1465 req->af.v4_req.rmt_addr = saddr; 1448 ireq->loc_addr = daddr;
1466 req->af.v4_req.opt = tcp_v4_save_options(sk, skb); 1449 ireq->rmt_addr = saddr;
1467 req->class = &or_ipv4; 1450 ireq->opt = tcp_v4_save_options(sk, skb);
1468 if (!want_cookie) 1451 if (!want_cookie)
1469 TCP_ECN_create_request(req, skb->h.th); 1452 TCP_ECN_create_request(req, skb->h.th);
1470 1453
@@ -1523,20 +1506,20 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
1523 1506
1524 isn = tcp_v4_init_sequence(sk, skb); 1507 isn = tcp_v4_init_sequence(sk, skb);
1525 } 1508 }
1526 req->snt_isn = isn; 1509 tcp_rsk(req)->snt_isn = isn;
1527 1510
1528 if (tcp_v4_send_synack(sk, req, dst)) 1511 if (tcp_v4_send_synack(sk, req, dst))
1529 goto drop_and_free; 1512 goto drop_and_free;
1530 1513
1531 if (want_cookie) { 1514 if (want_cookie) {
1532 tcp_openreq_free(req); 1515 reqsk_free(req);
1533 } else { 1516 } else {
1534 tcp_v4_synq_add(sk, req); 1517 tcp_v4_synq_add(sk, req);
1535 } 1518 }
1536 return 0; 1519 return 0;
1537 1520
1538drop_and_free: 1521drop_and_free:
1539 tcp_openreq_free(req); 1522 reqsk_free(req);
1540drop: 1523drop:
1541 TCP_INC_STATS_BH(TCP_MIB_ATTEMPTFAILS); 1524 TCP_INC_STATS_BH(TCP_MIB_ATTEMPTFAILS);
1542 return 0; 1525 return 0;
@@ -1548,9 +1531,10 @@ drop:
1548 * now create the new socket. 1531 * now create the new socket.
1549 */ 1532 */
1550struct sock *tcp_v4_syn_recv_sock(struct sock *sk, struct sk_buff *skb, 1533struct sock *tcp_v4_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
1551 struct open_request *req, 1534 struct request_sock *req,
1552 struct dst_entry *dst) 1535 struct dst_entry *dst)
1553{ 1536{
1537 struct inet_request_sock *ireq;
1554 struct inet_sock *newinet; 1538 struct inet_sock *newinet;
1555 struct tcp_sock *newtp; 1539 struct tcp_sock *newtp;
1556 struct sock *newsk; 1540 struct sock *newsk;
@@ -1570,11 +1554,12 @@ struct sock *tcp_v4_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
1570 1554
1571 newtp = tcp_sk(newsk); 1555 newtp = tcp_sk(newsk);
1572 newinet = inet_sk(newsk); 1556 newinet = inet_sk(newsk);
1573 newinet->daddr = req->af.v4_req.rmt_addr; 1557 ireq = inet_rsk(req);
1574 newinet->rcv_saddr = req->af.v4_req.loc_addr; 1558 newinet->daddr = ireq->rmt_addr;
1575 newinet->saddr = req->af.v4_req.loc_addr; 1559 newinet->rcv_saddr = ireq->loc_addr;
1576 newinet->opt = req->af.v4_req.opt; 1560 newinet->saddr = ireq->loc_addr;
1577 req->af.v4_req.opt = NULL; 1561 newinet->opt = ireq->opt;
1562 ireq->opt = NULL;
1578 newinet->mc_index = tcp_v4_iif(skb); 1563 newinet->mc_index = tcp_v4_iif(skb);
1579 newinet->mc_ttl = skb->nh.iph->ttl; 1564 newinet->mc_ttl = skb->nh.iph->ttl;
1580 newtp->ext_header_len = 0; 1565 newtp->ext_header_len = 0;
@@ -1605,9 +1590,9 @@ static struct sock *tcp_v4_hnd_req(struct sock *sk, struct sk_buff *skb)
1605 struct iphdr *iph = skb->nh.iph; 1590 struct iphdr *iph = skb->nh.iph;
1606 struct tcp_sock *tp = tcp_sk(sk); 1591 struct tcp_sock *tp = tcp_sk(sk);
1607 struct sock *nsk; 1592 struct sock *nsk;
1608 struct open_request **prev; 1593 struct request_sock **prev;
1609 /* Find possible connection requests. */ 1594 /* Find possible connection requests. */
1610 struct open_request *req = tcp_v4_search_req(tp, &prev, th->source, 1595 struct request_sock *req = tcp_v4_search_req(tp, &prev, th->source,
1611 iph->saddr, iph->daddr); 1596 iph->saddr, iph->daddr);
1612 if (req) 1597 if (req)
1613 return tcp_check_req(sk, skb, req, prev); 1598 return tcp_check_req(sk, skb, req, prev);
@@ -2144,13 +2129,13 @@ static void *listening_get_next(struct seq_file *seq, void *cur)
2144 ++st->num; 2129 ++st->num;
2145 2130
2146 if (st->state == TCP_SEQ_STATE_OPENREQ) { 2131 if (st->state == TCP_SEQ_STATE_OPENREQ) {
2147 struct open_request *req = cur; 2132 struct request_sock *req = cur;
2148 2133
2149 tp = tcp_sk(st->syn_wait_sk); 2134 tp = tcp_sk(st->syn_wait_sk);
2150 req = req->dl_next; 2135 req = req->dl_next;
2151 while (1) { 2136 while (1) {
2152 while (req) { 2137 while (req) {
2153 if (req->class->family == st->family) { 2138 if (req->rsk_ops->family == st->family) {
2154 cur = req; 2139 cur = req;
2155 goto out; 2140 goto out;
2156 } 2141 }
@@ -2159,17 +2144,17 @@ static void *listening_get_next(struct seq_file *seq, void *cur)
2159 if (++st->sbucket >= TCP_SYNQ_HSIZE) 2144 if (++st->sbucket >= TCP_SYNQ_HSIZE)
2160 break; 2145 break;
2161get_req: 2146get_req:
2162 req = tp->listen_opt->syn_table[st->sbucket]; 2147 req = tp->accept_queue.listen_opt->syn_table[st->sbucket];
2163 } 2148 }
2164 sk = sk_next(st->syn_wait_sk); 2149 sk = sk_next(st->syn_wait_sk);
2165 st->state = TCP_SEQ_STATE_LISTENING; 2150 st->state = TCP_SEQ_STATE_LISTENING;
2166 read_unlock_bh(&tp->syn_wait_lock); 2151 read_unlock_bh(&tp->accept_queue.syn_wait_lock);
2167 } else { 2152 } else {
2168 tp = tcp_sk(sk); 2153 tp = tcp_sk(sk);
2169 read_lock_bh(&tp->syn_wait_lock); 2154 read_lock_bh(&tp->accept_queue.syn_wait_lock);
2170 if (tp->listen_opt && tp->listen_opt->qlen) 2155 if (reqsk_queue_len(&tp->accept_queue))
2171 goto start_req; 2156 goto start_req;
2172 read_unlock_bh(&tp->syn_wait_lock); 2157 read_unlock_bh(&tp->accept_queue.syn_wait_lock);
2173 sk = sk_next(sk); 2158 sk = sk_next(sk);
2174 } 2159 }
2175get_sk: 2160get_sk:
@@ -2179,8 +2164,8 @@ get_sk:
2179 goto out; 2164 goto out;
2180 } 2165 }
2181 tp = tcp_sk(sk); 2166 tp = tcp_sk(sk);
2182 read_lock_bh(&tp->syn_wait_lock); 2167 read_lock_bh(&tp->accept_queue.syn_wait_lock);
2183 if (tp->listen_opt && tp->listen_opt->qlen) { 2168 if (reqsk_queue_len(&tp->accept_queue)) {
2184start_req: 2169start_req:
2185 st->uid = sock_i_uid(sk); 2170 st->uid = sock_i_uid(sk);
2186 st->syn_wait_sk = sk; 2171 st->syn_wait_sk = sk;
@@ -2188,7 +2173,7 @@ start_req:
2188 st->sbucket = 0; 2173 st->sbucket = 0;
2189 goto get_req; 2174 goto get_req;
2190 } 2175 }
2191 read_unlock_bh(&tp->syn_wait_lock); 2176 read_unlock_bh(&tp->accept_queue.syn_wait_lock);
2192 } 2177 }
2193 if (++st->bucket < TCP_LHTABLE_SIZE) { 2178 if (++st->bucket < TCP_LHTABLE_SIZE) {
2194 sk = sk_head(&tcp_listening_hash[st->bucket]); 2179 sk = sk_head(&tcp_listening_hash[st->bucket]);
@@ -2375,7 +2360,7 @@ static void tcp_seq_stop(struct seq_file *seq, void *v)
2375 case TCP_SEQ_STATE_OPENREQ: 2360 case TCP_SEQ_STATE_OPENREQ:
2376 if (v) { 2361 if (v) {
2377 struct tcp_sock *tp = tcp_sk(st->syn_wait_sk); 2362 struct tcp_sock *tp = tcp_sk(st->syn_wait_sk);
2378 read_unlock_bh(&tp->syn_wait_lock); 2363 read_unlock_bh(&tp->accept_queue.syn_wait_lock);
2379 } 2364 }
2380 case TCP_SEQ_STATE_LISTENING: 2365 case TCP_SEQ_STATE_LISTENING:
2381 if (v != SEQ_START_TOKEN) 2366 if (v != SEQ_START_TOKEN)
@@ -2451,18 +2436,19 @@ void tcp_proc_unregister(struct tcp_seq_afinfo *afinfo)
2451 memset(afinfo->seq_fops, 0, sizeof(*afinfo->seq_fops)); 2436 memset(afinfo->seq_fops, 0, sizeof(*afinfo->seq_fops));
2452} 2437}
2453 2438
2454static void get_openreq4(struct sock *sk, struct open_request *req, 2439static void get_openreq4(struct sock *sk, struct request_sock *req,
2455 char *tmpbuf, int i, int uid) 2440 char *tmpbuf, int i, int uid)
2456{ 2441{
2442 const struct inet_request_sock *ireq = inet_rsk(req);
2457 int ttd = req->expires - jiffies; 2443 int ttd = req->expires - jiffies;
2458 2444
2459 sprintf(tmpbuf, "%4d: %08X:%04X %08X:%04X" 2445 sprintf(tmpbuf, "%4d: %08X:%04X %08X:%04X"
2460 " %02X %08X:%08X %02X:%08lX %08X %5d %8d %u %d %p", 2446 " %02X %08X:%08X %02X:%08lX %08X %5d %8d %u %d %p",
2461 i, 2447 i,
2462 req->af.v4_req.loc_addr, 2448 ireq->loc_addr,
2463 ntohs(inet_sk(sk)->sport), 2449 ntohs(inet_sk(sk)->sport),
2464 req->af.v4_req.rmt_addr, 2450 ireq->rmt_addr,
2465 ntohs(req->rmt_port), 2451 ntohs(ireq->rmt_port),
2466 TCP_SYN_RECV, 2452 TCP_SYN_RECV,
2467 0, 0, /* could print option size, but that is af dependent. */ 2453 0, 0, /* could print option size, but that is af dependent. */
2468 1, /* timers active (only the expire timer) */ 2454 1, /* timers active (only the expire timer) */
@@ -2618,6 +2604,7 @@ struct proto tcp_prot = {
2618 .sysctl_rmem = sysctl_tcp_rmem, 2604 .sysctl_rmem = sysctl_tcp_rmem,
2619 .max_header = MAX_TCP_HEADER, 2605 .max_header = MAX_TCP_HEADER,
2620 .obj_size = sizeof(struct tcp_sock), 2606 .obj_size = sizeof(struct tcp_sock),
2607 .rsk_prot = &tcp_request_sock_ops,
2621}; 2608};
2622 2609
2623 2610
@@ -2660,7 +2647,6 @@ EXPORT_SYMBOL(tcp_proc_register);
2660EXPORT_SYMBOL(tcp_proc_unregister); 2647EXPORT_SYMBOL(tcp_proc_unregister);
2661#endif 2648#endif
2662EXPORT_SYMBOL(sysctl_local_port_range); 2649EXPORT_SYMBOL(sysctl_local_port_range);
2663EXPORT_SYMBOL(sysctl_max_syn_backlog);
2664EXPORT_SYMBOL(sysctl_tcp_low_latency); 2650EXPORT_SYMBOL(sysctl_tcp_low_latency);
2665EXPORT_SYMBOL(sysctl_tcp_tw_reuse); 2651EXPORT_SYMBOL(sysctl_tcp_tw_reuse);
2666 2652
diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c
index eea1a17a9ac2..b3943e7562f3 100644
--- a/net/ipv4/tcp_minisocks.c
+++ b/net/ipv4/tcp_minisocks.c
@@ -684,7 +684,7 @@ out:
684 * Actually, we could lots of memory writes here. tp of listening 684 * Actually, we could lots of memory writes here. tp of listening
685 * socket contains all necessary default parameters. 685 * socket contains all necessary default parameters.
686 */ 686 */
687struct sock *tcp_create_openreq_child(struct sock *sk, struct open_request *req, struct sk_buff *skb) 687struct sock *tcp_create_openreq_child(struct sock *sk, struct request_sock *req, struct sk_buff *skb)
688{ 688{
689 /* allocate the newsk from the same slab of the master sock, 689 /* allocate the newsk from the same slab of the master sock,
690 * if not, at sk_free time we'll try to free it from the wrong 690 * if not, at sk_free time we'll try to free it from the wrong
@@ -692,6 +692,8 @@ struct sock *tcp_create_openreq_child(struct sock *sk, struct open_request *req,
692 struct sock *newsk = sk_alloc(PF_INET, GFP_ATOMIC, sk->sk_prot, 0); 692 struct sock *newsk = sk_alloc(PF_INET, GFP_ATOMIC, sk->sk_prot, 0);
693 693
694 if(newsk != NULL) { 694 if(newsk != NULL) {
695 struct inet_request_sock *ireq = inet_rsk(req);
696 struct tcp_request_sock *treq = tcp_rsk(req);
695 struct tcp_sock *newtp; 697 struct tcp_sock *newtp;
696 struct sk_filter *filter; 698 struct sk_filter *filter;
697 699
@@ -703,7 +705,7 @@ struct sock *tcp_create_openreq_child(struct sock *sk, struct open_request *req,
703 tcp_sk(newsk)->bind_hash = NULL; 705 tcp_sk(newsk)->bind_hash = NULL;
704 706
705 /* Clone the TCP header template */ 707 /* Clone the TCP header template */
706 inet_sk(newsk)->dport = req->rmt_port; 708 inet_sk(newsk)->dport = ireq->rmt_port;
707 709
708 sock_lock_init(newsk); 710 sock_lock_init(newsk);
709 bh_lock_sock(newsk); 711 bh_lock_sock(newsk);
@@ -739,14 +741,14 @@ struct sock *tcp_create_openreq_child(struct sock *sk, struct open_request *req,
739 /* Now setup tcp_sock */ 741 /* Now setup tcp_sock */
740 newtp = tcp_sk(newsk); 742 newtp = tcp_sk(newsk);
741 newtp->pred_flags = 0; 743 newtp->pred_flags = 0;
742 newtp->rcv_nxt = req->rcv_isn + 1; 744 newtp->rcv_nxt = treq->rcv_isn + 1;
743 newtp->snd_nxt = req->snt_isn + 1; 745 newtp->snd_nxt = treq->snt_isn + 1;
744 newtp->snd_una = req->snt_isn + 1; 746 newtp->snd_una = treq->snt_isn + 1;
745 newtp->snd_sml = req->snt_isn + 1; 747 newtp->snd_sml = treq->snt_isn + 1;
746 748
747 tcp_prequeue_init(newtp); 749 tcp_prequeue_init(newtp);
748 750
749 tcp_init_wl(newtp, req->snt_isn, req->rcv_isn); 751 tcp_init_wl(newtp, treq->snt_isn, treq->rcv_isn);
750 752
751 newtp->retransmits = 0; 753 newtp->retransmits = 0;
752 newtp->backoff = 0; 754 newtp->backoff = 0;
@@ -775,10 +777,10 @@ struct sock *tcp_create_openreq_child(struct sock *sk, struct open_request *req,
775 tcp_set_ca_state(newtp, TCP_CA_Open); 777 tcp_set_ca_state(newtp, TCP_CA_Open);
776 tcp_init_xmit_timers(newsk); 778 tcp_init_xmit_timers(newsk);
777 skb_queue_head_init(&newtp->out_of_order_queue); 779 skb_queue_head_init(&newtp->out_of_order_queue);
778 newtp->rcv_wup = req->rcv_isn + 1; 780 newtp->rcv_wup = treq->rcv_isn + 1;
779 newtp->write_seq = req->snt_isn + 1; 781 newtp->write_seq = treq->snt_isn + 1;
780 newtp->pushed_seq = newtp->write_seq; 782 newtp->pushed_seq = newtp->write_seq;
781 newtp->copied_seq = req->rcv_isn + 1; 783 newtp->copied_seq = treq->rcv_isn + 1;
782 784
783 newtp->rx_opt.saw_tstamp = 0; 785 newtp->rx_opt.saw_tstamp = 0;
784 786
@@ -788,10 +790,8 @@ struct sock *tcp_create_openreq_child(struct sock *sk, struct open_request *req,
788 newtp->probes_out = 0; 790 newtp->probes_out = 0;
789 newtp->rx_opt.num_sacks = 0; 791 newtp->rx_opt.num_sacks = 0;
790 newtp->urg_data = 0; 792 newtp->urg_data = 0;
791 newtp->listen_opt = NULL; 793 /* Deinitialize accept_queue to trap illegal accesses. */
792 newtp->accept_queue = newtp->accept_queue_tail = NULL; 794 memset(&newtp->accept_queue, 0, sizeof(newtp->accept_queue));
793 /* Deinitialize syn_wait_lock to trap illegal accesses. */
794 memset(&newtp->syn_wait_lock, 0, sizeof(newtp->syn_wait_lock));
795 795
796 /* Back to base struct sock members. */ 796 /* Back to base struct sock members. */
797 newsk->sk_err = 0; 797 newsk->sk_err = 0;
@@ -808,18 +808,18 @@ struct sock *tcp_create_openreq_child(struct sock *sk, struct open_request *req,
808 newsk->sk_socket = NULL; 808 newsk->sk_socket = NULL;
809 newsk->sk_sleep = NULL; 809 newsk->sk_sleep = NULL;
810 810
811 newtp->rx_opt.tstamp_ok = req->tstamp_ok; 811 newtp->rx_opt.tstamp_ok = ireq->tstamp_ok;
812 if((newtp->rx_opt.sack_ok = req->sack_ok) != 0) { 812 if((newtp->rx_opt.sack_ok = ireq->sack_ok) != 0) {
813 if (sysctl_tcp_fack) 813 if (sysctl_tcp_fack)
814 newtp->rx_opt.sack_ok |= 2; 814 newtp->rx_opt.sack_ok |= 2;
815 } 815 }
816 newtp->window_clamp = req->window_clamp; 816 newtp->window_clamp = req->window_clamp;
817 newtp->rcv_ssthresh = req->rcv_wnd; 817 newtp->rcv_ssthresh = req->rcv_wnd;
818 newtp->rcv_wnd = req->rcv_wnd; 818 newtp->rcv_wnd = req->rcv_wnd;
819 newtp->rx_opt.wscale_ok = req->wscale_ok; 819 newtp->rx_opt.wscale_ok = ireq->wscale_ok;
820 if (newtp->rx_opt.wscale_ok) { 820 if (newtp->rx_opt.wscale_ok) {
821 newtp->rx_opt.snd_wscale = req->snd_wscale; 821 newtp->rx_opt.snd_wscale = ireq->snd_wscale;
822 newtp->rx_opt.rcv_wscale = req->rcv_wscale; 822 newtp->rx_opt.rcv_wscale = ireq->rcv_wscale;
823 } else { 823 } else {
824 newtp->rx_opt.snd_wscale = newtp->rx_opt.rcv_wscale = 0; 824 newtp->rx_opt.snd_wscale = newtp->rx_opt.rcv_wscale = 0;
825 newtp->window_clamp = min(newtp->window_clamp, 65535U); 825 newtp->window_clamp = min(newtp->window_clamp, 65535U);
@@ -851,12 +851,12 @@ struct sock *tcp_create_openreq_child(struct sock *sk, struct open_request *req,
851 851
852/* 852/*
853 * Process an incoming packet for SYN_RECV sockets represented 853 * Process an incoming packet for SYN_RECV sockets represented
854 * as an open_request. 854 * as a request_sock.
855 */ 855 */
856 856
857struct sock *tcp_check_req(struct sock *sk,struct sk_buff *skb, 857struct sock *tcp_check_req(struct sock *sk,struct sk_buff *skb,
858 struct open_request *req, 858 struct request_sock *req,
859 struct open_request **prev) 859 struct request_sock **prev)
860{ 860{
861 struct tcphdr *th = skb->h.th; 861 struct tcphdr *th = skb->h.th;
862 struct tcp_sock *tp = tcp_sk(sk); 862 struct tcp_sock *tp = tcp_sk(sk);
@@ -881,7 +881,7 @@ struct sock *tcp_check_req(struct sock *sk,struct sk_buff *skb,
881 } 881 }
882 882
883 /* Check for pure retransmitted SYN. */ 883 /* Check for pure retransmitted SYN. */
884 if (TCP_SKB_CB(skb)->seq == req->rcv_isn && 884 if (TCP_SKB_CB(skb)->seq == tcp_rsk(req)->rcv_isn &&
885 flg == TCP_FLAG_SYN && 885 flg == TCP_FLAG_SYN &&
886 !paws_reject) { 886 !paws_reject) {
887 /* 887 /*
@@ -901,7 +901,7 @@ struct sock *tcp_check_req(struct sock *sk,struct sk_buff *skb,
901 * Enforce "SYN-ACK" according to figure 8, figure 6 901 * Enforce "SYN-ACK" according to figure 8, figure 6
902 * of RFC793, fixed by RFC1122. 902 * of RFC793, fixed by RFC1122.
903 */ 903 */
904 req->class->rtx_syn_ack(sk, req, NULL); 904 req->rsk_ops->rtx_syn_ack(sk, req, NULL);
905 return NULL; 905 return NULL;
906 } 906 }
907 907
@@ -959,7 +959,7 @@ struct sock *tcp_check_req(struct sock *sk,struct sk_buff *skb,
959 * Invalid ACK: reset will be sent by listening socket 959 * Invalid ACK: reset will be sent by listening socket
960 */ 960 */
961 if ((flg & TCP_FLAG_ACK) && 961 if ((flg & TCP_FLAG_ACK) &&
962 (TCP_SKB_CB(skb)->ack_seq != req->snt_isn+1)) 962 (TCP_SKB_CB(skb)->ack_seq != tcp_rsk(req)->snt_isn + 1))
963 return sk; 963 return sk;
964 964
965 /* Also, it would be not so bad idea to check rcv_tsecr, which 965 /* Also, it would be not so bad idea to check rcv_tsecr, which
@@ -970,10 +970,10 @@ struct sock *tcp_check_req(struct sock *sk,struct sk_buff *skb,
970 /* RFC793: "first check sequence number". */ 970 /* RFC793: "first check sequence number". */
971 971
972 if (paws_reject || !tcp_in_window(TCP_SKB_CB(skb)->seq, TCP_SKB_CB(skb)->end_seq, 972 if (paws_reject || !tcp_in_window(TCP_SKB_CB(skb)->seq, TCP_SKB_CB(skb)->end_seq,
973 req->rcv_isn+1, req->rcv_isn+1+req->rcv_wnd)) { 973 tcp_rsk(req)->rcv_isn + 1, tcp_rsk(req)->rcv_isn + 1 + req->rcv_wnd)) {
974 /* Out of window: send ACK and drop. */ 974 /* Out of window: send ACK and drop. */
975 if (!(flg & TCP_FLAG_RST)) 975 if (!(flg & TCP_FLAG_RST))
976 req->class->send_ack(skb, req); 976 req->rsk_ops->send_ack(skb, req);
977 if (paws_reject) 977 if (paws_reject)
978 NET_INC_STATS_BH(LINUX_MIB_PAWSESTABREJECTED); 978 NET_INC_STATS_BH(LINUX_MIB_PAWSESTABREJECTED);
979 return NULL; 979 return NULL;
@@ -981,12 +981,12 @@ struct sock *tcp_check_req(struct sock *sk,struct sk_buff *skb,
981 981
982 /* In sequence, PAWS is OK. */ 982 /* In sequence, PAWS is OK. */
983 983
984 if (tmp_opt.saw_tstamp && !after(TCP_SKB_CB(skb)->seq, req->rcv_isn+1)) 984 if (tmp_opt.saw_tstamp && !after(TCP_SKB_CB(skb)->seq, tcp_rsk(req)->rcv_isn + 1))
985 req->ts_recent = tmp_opt.rcv_tsval; 985 req->ts_recent = tmp_opt.rcv_tsval;
986 986
987 if (TCP_SKB_CB(skb)->seq == req->rcv_isn) { 987 if (TCP_SKB_CB(skb)->seq == tcp_rsk(req)->rcv_isn) {
988 /* Truncate SYN, it is out of window starting 988 /* Truncate SYN, it is out of window starting
989 at req->rcv_isn+1. */ 989 at tcp_rsk(req)->rcv_isn + 1. */
990 flg &= ~TCP_FLAG_SYN; 990 flg &= ~TCP_FLAG_SYN;
991 } 991 }
992 992
@@ -1003,8 +1003,8 @@ struct sock *tcp_check_req(struct sock *sk,struct sk_buff *skb,
1003 return NULL; 1003 return NULL;
1004 1004
1005 /* If TCP_DEFER_ACCEPT is set, drop bare ACK. */ 1005 /* If TCP_DEFER_ACCEPT is set, drop bare ACK. */
1006 if (tp->defer_accept && TCP_SKB_CB(skb)->end_seq == req->rcv_isn+1) { 1006 if (tp->defer_accept && TCP_SKB_CB(skb)->end_seq == tcp_rsk(req)->rcv_isn + 1) {
1007 req->acked = 1; 1007 inet_rsk(req)->acked = 1;
1008 return NULL; 1008 return NULL;
1009 } 1009 }
1010 1010
@@ -1026,14 +1026,14 @@ struct sock *tcp_check_req(struct sock *sk,struct sk_buff *skb,
1026 1026
1027 listen_overflow: 1027 listen_overflow:
1028 if (!sysctl_tcp_abort_on_overflow) { 1028 if (!sysctl_tcp_abort_on_overflow) {
1029 req->acked = 1; 1029 inet_rsk(req)->acked = 1;
1030 return NULL; 1030 return NULL;
1031 } 1031 }
1032 1032
1033 embryonic_reset: 1033 embryonic_reset:
1034 NET_INC_STATS_BH(LINUX_MIB_EMBRYONICRSTS); 1034 NET_INC_STATS_BH(LINUX_MIB_EMBRYONICRSTS);
1035 if (!(flg & TCP_FLAG_RST)) 1035 if (!(flg & TCP_FLAG_RST))
1036 req->class->send_reset(skb); 1036 req->rsk_ops->send_reset(skb);
1037 1037
1038 tcp_synq_drop(sk, req, prev); 1038 tcp_synq_drop(sk, req, prev);
1039 return NULL; 1039 return NULL;
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index fa24e7ae1f40..f17c6577e337 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -1356,8 +1356,9 @@ int tcp_send_synack(struct sock *sk)
1356 * Prepare a SYN-ACK. 1356 * Prepare a SYN-ACK.
1357 */ 1357 */
1358struct sk_buff * tcp_make_synack(struct sock *sk, struct dst_entry *dst, 1358struct sk_buff * tcp_make_synack(struct sock *sk, struct dst_entry *dst,
1359 struct open_request *req) 1359 struct request_sock *req)
1360{ 1360{
1361 struct inet_request_sock *ireq = inet_rsk(req);
1361 struct tcp_sock *tp = tcp_sk(sk); 1362 struct tcp_sock *tp = tcp_sk(sk);
1362 struct tcphdr *th; 1363 struct tcphdr *th;
1363 int tcp_header_size; 1364 int tcp_header_size;
@@ -1373,47 +1374,47 @@ struct sk_buff * tcp_make_synack(struct sock *sk, struct dst_entry *dst,
1373 skb->dst = dst_clone(dst); 1374 skb->dst = dst_clone(dst);
1374 1375
1375 tcp_header_size = (sizeof(struct tcphdr) + TCPOLEN_MSS + 1376 tcp_header_size = (sizeof(struct tcphdr) + TCPOLEN_MSS +
1376 (req->tstamp_ok ? TCPOLEN_TSTAMP_ALIGNED : 0) + 1377 (ireq->tstamp_ok ? TCPOLEN_TSTAMP_ALIGNED : 0) +
1377 (req->wscale_ok ? TCPOLEN_WSCALE_ALIGNED : 0) + 1378 (ireq->wscale_ok ? TCPOLEN_WSCALE_ALIGNED : 0) +
1378 /* SACK_PERM is in the place of NOP NOP of TS */ 1379 /* SACK_PERM is in the place of NOP NOP of TS */
1379 ((req->sack_ok && !req->tstamp_ok) ? TCPOLEN_SACKPERM_ALIGNED : 0)); 1380 ((ireq->sack_ok && !ireq->tstamp_ok) ? TCPOLEN_SACKPERM_ALIGNED : 0));
1380 skb->h.th = th = (struct tcphdr *) skb_push(skb, tcp_header_size); 1381 skb->h.th = th = (struct tcphdr *) skb_push(skb, tcp_header_size);
1381 1382
1382 memset(th, 0, sizeof(struct tcphdr)); 1383 memset(th, 0, sizeof(struct tcphdr));
1383 th->syn = 1; 1384 th->syn = 1;
1384 th->ack = 1; 1385 th->ack = 1;
1385 if (dst->dev->features&NETIF_F_TSO) 1386 if (dst->dev->features&NETIF_F_TSO)
1386 req->ecn_ok = 0; 1387 ireq->ecn_ok = 0;
1387 TCP_ECN_make_synack(req, th); 1388 TCP_ECN_make_synack(req, th);
1388 th->source = inet_sk(sk)->sport; 1389 th->source = inet_sk(sk)->sport;
1389 th->dest = req->rmt_port; 1390 th->dest = ireq->rmt_port;
1390 TCP_SKB_CB(skb)->seq = req->snt_isn; 1391 TCP_SKB_CB(skb)->seq = tcp_rsk(req)->snt_isn;
1391 TCP_SKB_CB(skb)->end_seq = TCP_SKB_CB(skb)->seq + 1; 1392 TCP_SKB_CB(skb)->end_seq = TCP_SKB_CB(skb)->seq + 1;
1392 TCP_SKB_CB(skb)->sacked = 0; 1393 TCP_SKB_CB(skb)->sacked = 0;
1393 skb_shinfo(skb)->tso_segs = 1; 1394 skb_shinfo(skb)->tso_segs = 1;
1394 skb_shinfo(skb)->tso_size = 0; 1395 skb_shinfo(skb)->tso_size = 0;
1395 th->seq = htonl(TCP_SKB_CB(skb)->seq); 1396 th->seq = htonl(TCP_SKB_CB(skb)->seq);
1396 th->ack_seq = htonl(req->rcv_isn + 1); 1397 th->ack_seq = htonl(tcp_rsk(req)->rcv_isn + 1);
1397 if (req->rcv_wnd == 0) { /* ignored for retransmitted syns */ 1398 if (req->rcv_wnd == 0) { /* ignored for retransmitted syns */
1398 __u8 rcv_wscale; 1399 __u8 rcv_wscale;
1399 /* Set this up on the first call only */ 1400 /* Set this up on the first call only */
1400 req->window_clamp = tp->window_clamp ? : dst_metric(dst, RTAX_WINDOW); 1401 req->window_clamp = tp->window_clamp ? : dst_metric(dst, RTAX_WINDOW);
1401 /* tcp_full_space because it is guaranteed to be the first packet */ 1402 /* tcp_full_space because it is guaranteed to be the first packet */
1402 tcp_select_initial_window(tcp_full_space(sk), 1403 tcp_select_initial_window(tcp_full_space(sk),
1403 dst_metric(dst, RTAX_ADVMSS) - (req->tstamp_ok ? TCPOLEN_TSTAMP_ALIGNED : 0), 1404 dst_metric(dst, RTAX_ADVMSS) - (ireq->tstamp_ok ? TCPOLEN_TSTAMP_ALIGNED : 0),
1404 &req->rcv_wnd, 1405 &req->rcv_wnd,
1405 &req->window_clamp, 1406 &req->window_clamp,
1406 req->wscale_ok, 1407 ireq->wscale_ok,
1407 &rcv_wscale); 1408 &rcv_wscale);
1408 req->rcv_wscale = rcv_wscale; 1409 ireq->rcv_wscale = rcv_wscale;
1409 } 1410 }
1410 1411
1411 /* RFC1323: The window in SYN & SYN/ACK segments is never scaled. */ 1412 /* RFC1323: The window in SYN & SYN/ACK segments is never scaled. */
1412 th->window = htons(req->rcv_wnd); 1413 th->window = htons(req->rcv_wnd);
1413 1414
1414 TCP_SKB_CB(skb)->when = tcp_time_stamp; 1415 TCP_SKB_CB(skb)->when = tcp_time_stamp;
1415 tcp_syn_build_options((__u32 *)(th + 1), dst_metric(dst, RTAX_ADVMSS), req->tstamp_ok, 1416 tcp_syn_build_options((__u32 *)(th + 1), dst_metric(dst, RTAX_ADVMSS), ireq->tstamp_ok,
1416 req->sack_ok, req->wscale_ok, req->rcv_wscale, 1417 ireq->sack_ok, ireq->wscale_ok, ireq->rcv_wscale,
1417 TCP_SKB_CB(skb)->when, 1418 TCP_SKB_CB(skb)->when,
1418 req->ts_recent); 1419 req->ts_recent);
1419 1420
diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c
index 799ebe061e2c..b127b4498565 100644
--- a/net/ipv4/tcp_timer.c
+++ b/net/ipv4/tcp_timer.c
@@ -464,11 +464,11 @@ out_unlock:
464static void tcp_synack_timer(struct sock *sk) 464static void tcp_synack_timer(struct sock *sk)
465{ 465{
466 struct tcp_sock *tp = tcp_sk(sk); 466 struct tcp_sock *tp = tcp_sk(sk);
467 struct tcp_listen_opt *lopt = tp->listen_opt; 467 struct listen_sock *lopt = tp->accept_queue.listen_opt;
468 int max_retries = tp->syn_retries ? : sysctl_tcp_synack_retries; 468 int max_retries = tp->syn_retries ? : sysctl_tcp_synack_retries;
469 int thresh = max_retries; 469 int thresh = max_retries;
470 unsigned long now = jiffies; 470 unsigned long now = jiffies;
471 struct open_request **reqp, *req; 471 struct request_sock **reqp, *req;
472 int i, budget; 472 int i, budget;
473 473
474 if (lopt == NULL || lopt->qlen == 0) 474 if (lopt == NULL || lopt->qlen == 0)
@@ -513,8 +513,8 @@ static void tcp_synack_timer(struct sock *sk)
513 while ((req = *reqp) != NULL) { 513 while ((req = *reqp) != NULL) {
514 if (time_after_eq(now, req->expires)) { 514 if (time_after_eq(now, req->expires)) {
515 if ((req->retrans < thresh || 515 if ((req->retrans < thresh ||
516 (req->acked && req->retrans < max_retries)) 516 (inet_rsk(req)->acked && req->retrans < max_retries))
517 && !req->class->rtx_syn_ack(sk, req, NULL)) { 517 && !req->rsk_ops->rtx_syn_ack(sk, req, NULL)) {
518 unsigned long timeo; 518 unsigned long timeo;
519 519
520 if (req->retrans++ == 0) 520 if (req->retrans++ == 0)
@@ -527,13 +527,9 @@ static void tcp_synack_timer(struct sock *sk)
527 } 527 }
528 528
529 /* Drop this request */ 529 /* Drop this request */
530 write_lock(&tp->syn_wait_lock); 530 tcp_synq_unlink(tp, req, reqp);
531 *reqp = req->dl_next; 531 reqsk_queue_removed(&tp->accept_queue, req);
532 write_unlock(&tp->syn_wait_lock); 532 reqsk_free(req);
533 lopt->qlen--;
534 if (req->retrans == 0)
535 lopt->qlen_young--;
536 tcp_openreq_free(req);
537 continue; 533 continue;
538 } 534 }
539 reqp = &req->dl_next; 535 reqp = &req->dl_next;
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index 2720899d516c..47a30c3188ea 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -131,7 +131,7 @@ static void addrconf_leave_anycast(struct inet6_ifaddr *ifp);
131 131
132static int addrconf_ifdown(struct net_device *dev, int how); 132static int addrconf_ifdown(struct net_device *dev, int how);
133 133
134static void addrconf_dad_start(struct inet6_ifaddr *ifp, int flags); 134static void addrconf_dad_start(struct inet6_ifaddr *ifp, u32 flags);
135static void addrconf_dad_timer(unsigned long data); 135static void addrconf_dad_timer(unsigned long data);
136static void addrconf_dad_completed(struct inet6_ifaddr *ifp); 136static void addrconf_dad_completed(struct inet6_ifaddr *ifp);
137static void addrconf_rs_timer(unsigned long data); 137static void addrconf_rs_timer(unsigned long data);
@@ -492,7 +492,7 @@ void inet6_ifa_finish_destroy(struct inet6_ifaddr *ifp)
492 492
493static struct inet6_ifaddr * 493static struct inet6_ifaddr *
494ipv6_add_addr(struct inet6_dev *idev, const struct in6_addr *addr, int pfxlen, 494ipv6_add_addr(struct inet6_dev *idev, const struct in6_addr *addr, int pfxlen,
495 int scope, unsigned flags) 495 int scope, u32 flags)
496{ 496{
497 struct inet6_ifaddr *ifa = NULL; 497 struct inet6_ifaddr *ifa = NULL;
498 struct rt6_info *rt; 498 struct rt6_info *rt;
@@ -1320,7 +1320,7 @@ static int __ipv6_try_regen_rndid(struct inet6_dev *idev, struct in6_addr *tmpad
1320 1320
1321static void 1321static void
1322addrconf_prefix_route(struct in6_addr *pfx, int plen, struct net_device *dev, 1322addrconf_prefix_route(struct in6_addr *pfx, int plen, struct net_device *dev,
1323 unsigned long expires, unsigned flags) 1323 unsigned long expires, u32 flags)
1324{ 1324{
1325 struct in6_rtmsg rtmsg; 1325 struct in6_rtmsg rtmsg;
1326 1326
@@ -2229,7 +2229,7 @@ out:
2229/* 2229/*
2230 * Duplicate Address Detection 2230 * Duplicate Address Detection
2231 */ 2231 */
2232static void addrconf_dad_start(struct inet6_ifaddr *ifp, int flags) 2232static void addrconf_dad_start(struct inet6_ifaddr *ifp, u32 flags)
2233{ 2233{
2234 struct inet6_dev *idev = ifp->idev; 2234 struct inet6_dev *idev = ifp->idev;
2235 struct net_device *dev = idev->dev; 2235 struct net_device *dev = idev->dev;
@@ -2622,15 +2622,14 @@ inet6_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
2622} 2622}
2623 2623
2624static int inet6_fill_ifaddr(struct sk_buff *skb, struct inet6_ifaddr *ifa, 2624static int inet6_fill_ifaddr(struct sk_buff *skb, struct inet6_ifaddr *ifa,
2625 u32 pid, u32 seq, int event) 2625 u32 pid, u32 seq, int event, unsigned int flags)
2626{ 2626{
2627 struct ifaddrmsg *ifm; 2627 struct ifaddrmsg *ifm;
2628 struct nlmsghdr *nlh; 2628 struct nlmsghdr *nlh;
2629 struct ifa_cacheinfo ci; 2629 struct ifa_cacheinfo ci;
2630 unsigned char *b = skb->tail; 2630 unsigned char *b = skb->tail;
2631 2631
2632 nlh = NLMSG_PUT(skb, pid, seq, event, sizeof(*ifm)); 2632 nlh = NLMSG_NEW(skb, pid, seq, event, sizeof(*ifm), flags);
2633 if (pid) nlh->nlmsg_flags |= NLM_F_MULTI;
2634 ifm = NLMSG_DATA(nlh); 2633 ifm = NLMSG_DATA(nlh);
2635 ifm->ifa_family = AF_INET6; 2634 ifm->ifa_family = AF_INET6;
2636 ifm->ifa_prefixlen = ifa->prefix_len; 2635 ifm->ifa_prefixlen = ifa->prefix_len;
@@ -2672,15 +2671,14 @@ rtattr_failure:
2672} 2671}
2673 2672
2674static int inet6_fill_ifmcaddr(struct sk_buff *skb, struct ifmcaddr6 *ifmca, 2673static int inet6_fill_ifmcaddr(struct sk_buff *skb, struct ifmcaddr6 *ifmca,
2675 u32 pid, u32 seq, int event) 2674 u32 pid, u32 seq, int event, u16 flags)
2676{ 2675{
2677 struct ifaddrmsg *ifm; 2676 struct ifaddrmsg *ifm;
2678 struct nlmsghdr *nlh; 2677 struct nlmsghdr *nlh;
2679 struct ifa_cacheinfo ci; 2678 struct ifa_cacheinfo ci;
2680 unsigned char *b = skb->tail; 2679 unsigned char *b = skb->tail;
2681 2680
2682 nlh = NLMSG_PUT(skb, pid, seq, event, sizeof(*ifm)); 2681 nlh = NLMSG_NEW(skb, pid, seq, event, sizeof(*ifm), flags);
2683 if (pid) nlh->nlmsg_flags |= NLM_F_MULTI;
2684 ifm = NLMSG_DATA(nlh); 2682 ifm = NLMSG_DATA(nlh);
2685 ifm->ifa_family = AF_INET6; 2683 ifm->ifa_family = AF_INET6;
2686 ifm->ifa_prefixlen = 128; 2684 ifm->ifa_prefixlen = 128;
@@ -2709,15 +2707,14 @@ rtattr_failure:
2709} 2707}
2710 2708
2711static int inet6_fill_ifacaddr(struct sk_buff *skb, struct ifacaddr6 *ifaca, 2709static int inet6_fill_ifacaddr(struct sk_buff *skb, struct ifacaddr6 *ifaca,
2712 u32 pid, u32 seq, int event) 2710 u32 pid, u32 seq, int event, unsigned int flags)
2713{ 2711{
2714 struct ifaddrmsg *ifm; 2712 struct ifaddrmsg *ifm;
2715 struct nlmsghdr *nlh; 2713 struct nlmsghdr *nlh;
2716 struct ifa_cacheinfo ci; 2714 struct ifa_cacheinfo ci;
2717 unsigned char *b = skb->tail; 2715 unsigned char *b = skb->tail;
2718 2716
2719 nlh = NLMSG_PUT(skb, pid, seq, event, sizeof(*ifm)); 2717 nlh = NLMSG_NEW(skb, pid, seq, event, sizeof(*ifm), flags);
2720 if (pid) nlh->nlmsg_flags |= NLM_F_MULTI;
2721 ifm = NLMSG_DATA(nlh); 2718 ifm = NLMSG_DATA(nlh);
2722 ifm->ifa_family = AF_INET6; 2719 ifm->ifa_family = AF_INET6;
2723 ifm->ifa_prefixlen = 128; 2720 ifm->ifa_prefixlen = 128;
@@ -2786,7 +2783,8 @@ static int inet6_dump_addr(struct sk_buff *skb, struct netlink_callback *cb,
2786 continue; 2783 continue;
2787 if ((err = inet6_fill_ifaddr(skb, ifa, 2784 if ((err = inet6_fill_ifaddr(skb, ifa,
2788 NETLINK_CB(cb->skb).pid, 2785 NETLINK_CB(cb->skb).pid,
2789 cb->nlh->nlmsg_seq, RTM_NEWADDR)) <= 0) 2786 cb->nlh->nlmsg_seq, RTM_NEWADDR,
2787 NLM_F_MULTI)) <= 0)
2790 goto done; 2788 goto done;
2791 } 2789 }
2792 /* temp addr */ 2790 /* temp addr */
@@ -2797,7 +2795,8 @@ static int inet6_dump_addr(struct sk_buff *skb, struct netlink_callback *cb,
2797 continue; 2795 continue;
2798 if ((err = inet6_fill_ifaddr(skb, ifa, 2796 if ((err = inet6_fill_ifaddr(skb, ifa,
2799 NETLINK_CB(cb->skb).pid, 2797 NETLINK_CB(cb->skb).pid,
2800 cb->nlh->nlmsg_seq, RTM_NEWADDR)) <= 0) 2798 cb->nlh->nlmsg_seq, RTM_NEWADDR,
2799 NLM_F_MULTI)) <= 0)
2801 goto done; 2800 goto done;
2802 } 2801 }
2803#endif 2802#endif
@@ -2810,7 +2809,8 @@ static int inet6_dump_addr(struct sk_buff *skb, struct netlink_callback *cb,
2810 continue; 2809 continue;
2811 if ((err = inet6_fill_ifmcaddr(skb, ifmca, 2810 if ((err = inet6_fill_ifmcaddr(skb, ifmca,
2812 NETLINK_CB(cb->skb).pid, 2811 NETLINK_CB(cb->skb).pid,
2813 cb->nlh->nlmsg_seq, RTM_GETMULTICAST)) <= 0) 2812 cb->nlh->nlmsg_seq, RTM_GETMULTICAST,
2813 NLM_F_MULTI)) <= 0)
2814 goto done; 2814 goto done;
2815 } 2815 }
2816 break; 2816 break;
@@ -2822,7 +2822,8 @@ static int inet6_dump_addr(struct sk_buff *skb, struct netlink_callback *cb,
2822 continue; 2822 continue;
2823 if ((err = inet6_fill_ifacaddr(skb, ifaca, 2823 if ((err = inet6_fill_ifacaddr(skb, ifaca,
2824 NETLINK_CB(cb->skb).pid, 2824 NETLINK_CB(cb->skb).pid,
2825 cb->nlh->nlmsg_seq, RTM_GETANYCAST)) <= 0) 2825 cb->nlh->nlmsg_seq, RTM_GETANYCAST,
2826 NLM_F_MULTI)) <= 0)
2826 goto done; 2827 goto done;
2827 } 2828 }
2828 break; 2829 break;
@@ -2872,7 +2873,7 @@ static void inet6_ifa_notify(int event, struct inet6_ifaddr *ifa)
2872 netlink_set_err(rtnl, 0, RTMGRP_IPV6_IFADDR, ENOBUFS); 2873 netlink_set_err(rtnl, 0, RTMGRP_IPV6_IFADDR, ENOBUFS);
2873 return; 2874 return;
2874 } 2875 }
2875 if (inet6_fill_ifaddr(skb, ifa, 0, 0, event) < 0) { 2876 if (inet6_fill_ifaddr(skb, ifa, current->pid, 0, event, 0) < 0) {
2876 kfree_skb(skb); 2877 kfree_skb(skb);
2877 netlink_set_err(rtnl, 0, RTMGRP_IPV6_IFADDR, EINVAL); 2878 netlink_set_err(rtnl, 0, RTMGRP_IPV6_IFADDR, EINVAL);
2878 return; 2879 return;
@@ -2907,7 +2908,7 @@ static void inline ipv6_store_devconf(struct ipv6_devconf *cnf,
2907} 2908}
2908 2909
2909static int inet6_fill_ifinfo(struct sk_buff *skb, struct inet6_dev *idev, 2910static int inet6_fill_ifinfo(struct sk_buff *skb, struct inet6_dev *idev,
2910 u32 pid, u32 seq, int event) 2911 u32 pid, u32 seq, int event, unsigned int flags)
2911{ 2912{
2912 struct net_device *dev = idev->dev; 2913 struct net_device *dev = idev->dev;
2913 __s32 *array = NULL; 2914 __s32 *array = NULL;
@@ -2918,8 +2919,7 @@ static int inet6_fill_ifinfo(struct sk_buff *skb, struct inet6_dev *idev,
2918 __u32 mtu = dev->mtu; 2919 __u32 mtu = dev->mtu;
2919 struct ifla_cacheinfo ci; 2920 struct ifla_cacheinfo ci;
2920 2921
2921 nlh = NLMSG_PUT(skb, pid, seq, event, sizeof(*r)); 2922 nlh = NLMSG_NEW(skb, pid, seq, event, sizeof(*r), flags);
2922 if (pid) nlh->nlmsg_flags |= NLM_F_MULTI;
2923 r = NLMSG_DATA(nlh); 2923 r = NLMSG_DATA(nlh);
2924 r->ifi_family = AF_INET6; 2924 r->ifi_family = AF_INET6;
2925 r->ifi_type = dev->type; 2925 r->ifi_type = dev->type;
@@ -2986,7 +2986,7 @@ static int inet6_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb)
2986 if ((idev = in6_dev_get(dev)) == NULL) 2986 if ((idev = in6_dev_get(dev)) == NULL)
2987 continue; 2987 continue;
2988 err = inet6_fill_ifinfo(skb, idev, NETLINK_CB(cb->skb).pid, 2988 err = inet6_fill_ifinfo(skb, idev, NETLINK_CB(cb->skb).pid,
2989 cb->nlh->nlmsg_seq, RTM_NEWLINK); 2989 cb->nlh->nlmsg_seq, RTM_NEWLINK, NLM_F_MULTI);
2990 in6_dev_put(idev); 2990 in6_dev_put(idev);
2991 if (err <= 0) 2991 if (err <= 0)
2992 break; 2992 break;
@@ -3008,7 +3008,7 @@ void inet6_ifinfo_notify(int event, struct inet6_dev *idev)
3008 netlink_set_err(rtnl, 0, RTMGRP_IPV6_IFINFO, ENOBUFS); 3008 netlink_set_err(rtnl, 0, RTMGRP_IPV6_IFINFO, ENOBUFS);
3009 return; 3009 return;
3010 } 3010 }
3011 if (inet6_fill_ifinfo(skb, idev, 0, 0, event) < 0) { 3011 if (inet6_fill_ifinfo(skb, idev, current->pid, 0, event, 0) < 0) {
3012 kfree_skb(skb); 3012 kfree_skb(skb);
3013 netlink_set_err(rtnl, 0, RTMGRP_IPV6_IFINFO, EINVAL); 3013 netlink_set_err(rtnl, 0, RTMGRP_IPV6_IFINFO, EINVAL);
3014 return; 3014 return;
@@ -3018,18 +3018,15 @@ void inet6_ifinfo_notify(int event, struct inet6_dev *idev)
3018} 3018}
3019 3019
3020static int inet6_fill_prefix(struct sk_buff *skb, struct inet6_dev *idev, 3020static int inet6_fill_prefix(struct sk_buff *skb, struct inet6_dev *idev,
3021 struct prefix_info *pinfo, u32 pid, u32 seq, int event) 3021 struct prefix_info *pinfo, u32 pid, u32 seq,
3022 int event, unsigned int flags)
3022{ 3023{
3023 struct prefixmsg *pmsg; 3024 struct prefixmsg *pmsg;
3024 struct nlmsghdr *nlh; 3025 struct nlmsghdr *nlh;
3025 unsigned char *b = skb->tail; 3026 unsigned char *b = skb->tail;
3026 struct prefix_cacheinfo ci; 3027 struct prefix_cacheinfo ci;
3027 3028
3028 nlh = NLMSG_PUT(skb, pid, seq, event, sizeof(*pmsg)); 3029 nlh = NLMSG_NEW(skb, pid, seq, event, sizeof(*pmsg), flags);
3029
3030 if (pid)
3031 nlh->nlmsg_flags |= NLM_F_MULTI;
3032
3033 pmsg = NLMSG_DATA(nlh); 3030 pmsg = NLMSG_DATA(nlh);
3034 pmsg->prefix_family = AF_INET6; 3031 pmsg->prefix_family = AF_INET6;
3035 pmsg->prefix_ifindex = idev->dev->ifindex; 3032 pmsg->prefix_ifindex = idev->dev->ifindex;
@@ -3068,7 +3065,7 @@ static void inet6_prefix_notify(int event, struct inet6_dev *idev,
3068 netlink_set_err(rtnl, 0, RTMGRP_IPV6_PREFIX, ENOBUFS); 3065 netlink_set_err(rtnl, 0, RTMGRP_IPV6_PREFIX, ENOBUFS);
3069 return; 3066 return;
3070 } 3067 }
3071 if (inet6_fill_prefix(skb, idev, pinfo, 0, 0, event) < 0) { 3068 if (inet6_fill_prefix(skb, idev, pinfo, current->pid, 0, event, 0) < 0) {
3072 kfree_skb(skb); 3069 kfree_skb(skb);
3073 netlink_set_err(rtnl, 0, RTMGRP_IPV6_PREFIX, EINVAL); 3070 netlink_set_err(rtnl, 0, RTMGRP_IPV6_PREFIX, EINVAL);
3074 return; 3071 return;
diff --git a/net/ipv6/datagram.c b/net/ipv6/datagram.c
index 65b9375df57d..5229365cd8b4 100644
--- a/net/ipv6/datagram.c
+++ b/net/ipv6/datagram.c
@@ -353,14 +353,14 @@ int ipv6_recv_error(struct sock *sk, struct msghdr *msg, int len)
353 err = copied; 353 err = copied;
354 354
355 /* Reset and regenerate socket error */ 355 /* Reset and regenerate socket error */
356 spin_lock_irq(&sk->sk_error_queue.lock); 356 spin_lock_bh(&sk->sk_error_queue.lock);
357 sk->sk_err = 0; 357 sk->sk_err = 0;
358 if ((skb2 = skb_peek(&sk->sk_error_queue)) != NULL) { 358 if ((skb2 = skb_peek(&sk->sk_error_queue)) != NULL) {
359 sk->sk_err = SKB_EXT_ERR(skb2)->ee.ee_errno; 359 sk->sk_err = SKB_EXT_ERR(skb2)->ee.ee_errno;
360 spin_unlock_irq(&sk->sk_error_queue.lock); 360 spin_unlock_bh(&sk->sk_error_queue.lock);
361 sk->sk_error_report(sk); 361 sk->sk_error_report(sk);
362 } else { 362 } else {
363 spin_unlock_irq(&sk->sk_error_queue.lock); 363 spin_unlock_bh(&sk->sk_error_queue.lock);
364 } 364 }
365 365
366out_free_skb: 366out_free_skb:
diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c
index 617645bc5ed6..e2b848ec9851 100644
--- a/net/ipv6/raw.c
+++ b/net/ipv6/raw.c
@@ -434,12 +434,12 @@ csum_copy_err:
434 /* Clear queue. */ 434 /* Clear queue. */
435 if (flags&MSG_PEEK) { 435 if (flags&MSG_PEEK) {
436 int clear = 0; 436 int clear = 0;
437 spin_lock_irq(&sk->sk_receive_queue.lock); 437 spin_lock_bh(&sk->sk_receive_queue.lock);
438 if (skb == skb_peek(&sk->sk_receive_queue)) { 438 if (skb == skb_peek(&sk->sk_receive_queue)) {
439 __skb_unlink(skb, &sk->sk_receive_queue); 439 __skb_unlink(skb, &sk->sk_receive_queue);
440 clear = 1; 440 clear = 1;
441 } 441 }
442 spin_unlock_irq(&sk->sk_receive_queue.lock); 442 spin_unlock_bh(&sk->sk_receive_queue.lock);
443 if (clear) 443 if (clear)
444 kfree_skb(skb); 444 kfree_skb(skb);
445 } 445 }
@@ -971,11 +971,11 @@ static int rawv6_ioctl(struct sock *sk, int cmd, unsigned long arg)
971 struct sk_buff *skb; 971 struct sk_buff *skb;
972 int amount = 0; 972 int amount = 0;
973 973
974 spin_lock_irq(&sk->sk_receive_queue.lock); 974 spin_lock_bh(&sk->sk_receive_queue.lock);
975 skb = skb_peek(&sk->sk_receive_queue); 975 skb = skb_peek(&sk->sk_receive_queue);
976 if (skb != NULL) 976 if (skb != NULL)
977 amount = skb->tail - skb->h.raw; 977 amount = skb->tail - skb->h.raw;
978 spin_unlock_irq(&sk->sk_receive_queue.lock); 978 spin_unlock_bh(&sk->sk_receive_queue.lock);
979 return put_user(amount, (int __user *)arg); 979 return put_user(amount, (int __user *)arg);
980 } 980 }
981 981
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index 3bf8a0254f81..1f5b226c3573 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -1570,7 +1570,8 @@ static int rt6_fill_node(struct sk_buff *skb, struct rt6_info *rt,
1570 struct in6_addr *src, 1570 struct in6_addr *src,
1571 int iif, 1571 int iif,
1572 int type, u32 pid, u32 seq, 1572 int type, u32 pid, u32 seq,
1573 struct nlmsghdr *in_nlh, int prefix) 1573 struct nlmsghdr *in_nlh, int prefix,
1574 unsigned int flags)
1574{ 1575{
1575 struct rtmsg *rtm; 1576 struct rtmsg *rtm;
1576 struct nlmsghdr *nlh; 1577 struct nlmsghdr *nlh;
@@ -1588,7 +1589,7 @@ static int rt6_fill_node(struct sk_buff *skb, struct rt6_info *rt,
1588 pid = in_nlh->nlmsg_pid; 1589 pid = in_nlh->nlmsg_pid;
1589 } 1590 }
1590 1591
1591 nlh = NLMSG_PUT(skb, pid, seq, type, sizeof(*rtm)); 1592 nlh = NLMSG_NEW(skb, pid, seq, type, sizeof(*rtm), flags);
1592 rtm = NLMSG_DATA(nlh); 1593 rtm = NLMSG_DATA(nlh);
1593 rtm->rtm_family = AF_INET6; 1594 rtm->rtm_family = AF_INET6;
1594 rtm->rtm_dst_len = rt->rt6i_dst.plen; 1595 rtm->rtm_dst_len = rt->rt6i_dst.plen;
@@ -1674,7 +1675,7 @@ static int rt6_dump_route(struct rt6_info *rt, void *p_arg)
1674 1675
1675 return rt6_fill_node(arg->skb, rt, NULL, NULL, 0, RTM_NEWROUTE, 1676 return rt6_fill_node(arg->skb, rt, NULL, NULL, 0, RTM_NEWROUTE,
1676 NETLINK_CB(arg->cb->skb).pid, arg->cb->nlh->nlmsg_seq, 1677 NETLINK_CB(arg->cb->skb).pid, arg->cb->nlh->nlmsg_seq,
1677 NULL, prefix); 1678 NULL, prefix, NLM_F_MULTI);
1678} 1679}
1679 1680
1680static int fib6_dump_node(struct fib6_walker_t *w) 1681static int fib6_dump_node(struct fib6_walker_t *w)
@@ -1822,7 +1823,7 @@ int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void *arg)
1822 &fl.fl6_dst, &fl.fl6_src, 1823 &fl.fl6_dst, &fl.fl6_src,
1823 iif, 1824 iif,
1824 RTM_NEWROUTE, NETLINK_CB(in_skb).pid, 1825 RTM_NEWROUTE, NETLINK_CB(in_skb).pid,
1825 nlh->nlmsg_seq, nlh, 0); 1826 nlh->nlmsg_seq, nlh, 0, 0);
1826 if (err < 0) { 1827 if (err < 0) {
1827 err = -EMSGSIZE; 1828 err = -EMSGSIZE;
1828 goto out_free; 1829 goto out_free;
@@ -1848,7 +1849,7 @@ void inet6_rt_notify(int event, struct rt6_info *rt, struct nlmsghdr *nlh)
1848 netlink_set_err(rtnl, 0, RTMGRP_IPV6_ROUTE, ENOBUFS); 1849 netlink_set_err(rtnl, 0, RTMGRP_IPV6_ROUTE, ENOBUFS);
1849 return; 1850 return;
1850 } 1851 }
1851 if (rt6_fill_node(skb, rt, NULL, NULL, 0, event, 0, 0, nlh, 0) < 0) { 1852 if (rt6_fill_node(skb, rt, NULL, NULL, 0, event, 0, 0, nlh, 0, 0) < 0) {
1852 kfree_skb(skb); 1853 kfree_skb(skb);
1853 netlink_set_err(rtnl, 0, RTMGRP_IPV6_ROUTE, EINVAL); 1854 netlink_set_err(rtnl, 0, RTMGRP_IPV6_ROUTE, EINVAL);
1854 return; 1855 return;
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index 0f69e800a0ad..2414937f2a83 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -65,7 +65,7 @@
65#include <linux/seq_file.h> 65#include <linux/seq_file.h>
66 66
67static void tcp_v6_send_reset(struct sk_buff *skb); 67static void tcp_v6_send_reset(struct sk_buff *skb);
68static void tcp_v6_or_send_ack(struct sk_buff *skb, struct open_request *req); 68static void tcp_v6_reqsk_send_ack(struct sk_buff *skb, struct request_sock *req);
69static void tcp_v6_send_check(struct sock *sk, struct tcphdr *th, int len, 69static void tcp_v6_send_check(struct sock *sk, struct tcphdr *th, int len,
70 struct sk_buff *skb); 70 struct sk_buff *skb);
71 71
@@ -394,24 +394,26 @@ static u32 tcp_v6_synq_hash(struct in6_addr *raddr, u16 rport, u32 rnd)
394 return c & (TCP_SYNQ_HSIZE - 1); 394 return c & (TCP_SYNQ_HSIZE - 1);
395} 395}
396 396
397static struct open_request *tcp_v6_search_req(struct tcp_sock *tp, 397static struct request_sock *tcp_v6_search_req(struct tcp_sock *tp,
398 struct open_request ***prevp, 398 struct request_sock ***prevp,
399 __u16 rport, 399 __u16 rport,
400 struct in6_addr *raddr, 400 struct in6_addr *raddr,
401 struct in6_addr *laddr, 401 struct in6_addr *laddr,
402 int iif) 402 int iif)
403{ 403{
404 struct tcp_listen_opt *lopt = tp->listen_opt; 404 struct listen_sock *lopt = tp->accept_queue.listen_opt;
405 struct open_request *req, **prev; 405 struct request_sock *req, **prev;
406 406
407 for (prev = &lopt->syn_table[tcp_v6_synq_hash(raddr, rport, lopt->hash_rnd)]; 407 for (prev = &lopt->syn_table[tcp_v6_synq_hash(raddr, rport, lopt->hash_rnd)];
408 (req = *prev) != NULL; 408 (req = *prev) != NULL;
409 prev = &req->dl_next) { 409 prev = &req->dl_next) {
410 if (req->rmt_port == rport && 410 const struct tcp6_request_sock *treq = tcp6_rsk(req);
411 req->class->family == AF_INET6 && 411
412 ipv6_addr_equal(&req->af.v6_req.rmt_addr, raddr) && 412 if (inet_rsk(req)->rmt_port == rport &&
413 ipv6_addr_equal(&req->af.v6_req.loc_addr, laddr) && 413 req->rsk_ops->family == AF_INET6 &&
414 (!req->af.v6_req.iif || req->af.v6_req.iif == iif)) { 414 ipv6_addr_equal(&treq->rmt_addr, raddr) &&
415 ipv6_addr_equal(&treq->loc_addr, laddr) &&
416 (!treq->iif || treq->iif == iif)) {
415 BUG_TRAP(req->sk == NULL); 417 BUG_TRAP(req->sk == NULL);
416 *prevp = prev; 418 *prevp = prev;
417 return req; 419 return req;
@@ -906,9 +908,9 @@ static void tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
906 908
907 icmpv6_err_convert(type, code, &err); 909 icmpv6_err_convert(type, code, &err);
908 910
909 /* Might be for an open_request */ 911 /* Might be for an request_sock */
910 switch (sk->sk_state) { 912 switch (sk->sk_state) {
911 struct open_request *req, **prev; 913 struct request_sock *req, **prev;
912 case TCP_LISTEN: 914 case TCP_LISTEN:
913 if (sock_owned_by_user(sk)) 915 if (sock_owned_by_user(sk))
914 goto out; 916 goto out;
@@ -923,7 +925,7 @@ static void tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
923 */ 925 */
924 BUG_TRAP(req->sk == NULL); 926 BUG_TRAP(req->sk == NULL);
925 927
926 if (seq != req->snt_isn) { 928 if (seq != tcp_rsk(req)->snt_isn) {
927 NET_INC_STATS_BH(LINUX_MIB_OUTOFWINDOWICMPS); 929 NET_INC_STATS_BH(LINUX_MIB_OUTOFWINDOWICMPS);
928 goto out; 930 goto out;
929 } 931 }
@@ -957,9 +959,10 @@ out:
957} 959}
958 960
959 961
960static int tcp_v6_send_synack(struct sock *sk, struct open_request *req, 962static int tcp_v6_send_synack(struct sock *sk, struct request_sock *req,
961 struct dst_entry *dst) 963 struct dst_entry *dst)
962{ 964{
965 struct tcp6_request_sock *treq = tcp6_rsk(req);
963 struct ipv6_pinfo *np = inet6_sk(sk); 966 struct ipv6_pinfo *np = inet6_sk(sk);
964 struct sk_buff * skb; 967 struct sk_buff * skb;
965 struct ipv6_txoptions *opt = NULL; 968 struct ipv6_txoptions *opt = NULL;
@@ -969,19 +972,19 @@ static int tcp_v6_send_synack(struct sock *sk, struct open_request *req,
969 972
970 memset(&fl, 0, sizeof(fl)); 973 memset(&fl, 0, sizeof(fl));
971 fl.proto = IPPROTO_TCP; 974 fl.proto = IPPROTO_TCP;
972 ipv6_addr_copy(&fl.fl6_dst, &req->af.v6_req.rmt_addr); 975 ipv6_addr_copy(&fl.fl6_dst, &treq->rmt_addr);
973 ipv6_addr_copy(&fl.fl6_src, &req->af.v6_req.loc_addr); 976 ipv6_addr_copy(&fl.fl6_src, &treq->loc_addr);
974 fl.fl6_flowlabel = 0; 977 fl.fl6_flowlabel = 0;
975 fl.oif = req->af.v6_req.iif; 978 fl.oif = treq->iif;
976 fl.fl_ip_dport = req->rmt_port; 979 fl.fl_ip_dport = inet_rsk(req)->rmt_port;
977 fl.fl_ip_sport = inet_sk(sk)->sport; 980 fl.fl_ip_sport = inet_sk(sk)->sport;
978 981
979 if (dst == NULL) { 982 if (dst == NULL) {
980 opt = np->opt; 983 opt = np->opt;
981 if (opt == NULL && 984 if (opt == NULL &&
982 np->rxopt.bits.srcrt == 2 && 985 np->rxopt.bits.srcrt == 2 &&
983 req->af.v6_req.pktopts) { 986 treq->pktopts) {
984 struct sk_buff *pktopts = req->af.v6_req.pktopts; 987 struct sk_buff *pktopts = treq->pktopts;
985 struct inet6_skb_parm *rxopt = IP6CB(pktopts); 988 struct inet6_skb_parm *rxopt = IP6CB(pktopts);
986 if (rxopt->srcrt) 989 if (rxopt->srcrt)
987 opt = ipv6_invert_rthdr(sk, (struct ipv6_rt_hdr*)(pktopts->nh.raw + rxopt->srcrt)); 990 opt = ipv6_invert_rthdr(sk, (struct ipv6_rt_hdr*)(pktopts->nh.raw + rxopt->srcrt));
@@ -1008,10 +1011,10 @@ static int tcp_v6_send_synack(struct sock *sk, struct open_request *req,
1008 struct tcphdr *th = skb->h.th; 1011 struct tcphdr *th = skb->h.th;
1009 1012
1010 th->check = tcp_v6_check(th, skb->len, 1013 th->check = tcp_v6_check(th, skb->len,
1011 &req->af.v6_req.loc_addr, &req->af.v6_req.rmt_addr, 1014 &treq->loc_addr, &treq->rmt_addr,
1012 csum_partial((char *)th, skb->len, skb->csum)); 1015 csum_partial((char *)th, skb->len, skb->csum));
1013 1016
1014 ipv6_addr_copy(&fl.fl6_dst, &req->af.v6_req.rmt_addr); 1017 ipv6_addr_copy(&fl.fl6_dst, &treq->rmt_addr);
1015 err = ip6_xmit(sk, skb, &fl, opt, 0); 1018 err = ip6_xmit(sk, skb, &fl, opt, 0);
1016 if (err == NET_XMIT_CN) 1019 if (err == NET_XMIT_CN)
1017 err = 0; 1020 err = 0;
@@ -1024,17 +1027,18 @@ done:
1024 return err; 1027 return err;
1025} 1028}
1026 1029
1027static void tcp_v6_or_free(struct open_request *req) 1030static void tcp_v6_reqsk_destructor(struct request_sock *req)
1028{ 1031{
1029 if (req->af.v6_req.pktopts) 1032 if (tcp6_rsk(req)->pktopts)
1030 kfree_skb(req->af.v6_req.pktopts); 1033 kfree_skb(tcp6_rsk(req)->pktopts);
1031} 1034}
1032 1035
1033static struct or_calltable or_ipv6 = { 1036static struct request_sock_ops tcp6_request_sock_ops = {
1034 .family = AF_INET6, 1037 .family = AF_INET6,
1038 .obj_size = sizeof(struct tcp6_request_sock),
1035 .rtx_syn_ack = tcp_v6_send_synack, 1039 .rtx_syn_ack = tcp_v6_send_synack,
1036 .send_ack = tcp_v6_or_send_ack, 1040 .send_ack = tcp_v6_reqsk_send_ack,
1037 .destructor = tcp_v6_or_free, 1041 .destructor = tcp_v6_reqsk_destructor,
1038 .send_reset = tcp_v6_send_reset 1042 .send_reset = tcp_v6_send_reset
1039}; 1043};
1040 1044
@@ -1219,15 +1223,15 @@ static void tcp_v6_timewait_ack(struct sock *sk, struct sk_buff *skb)
1219 tcp_tw_put(tw); 1223 tcp_tw_put(tw);
1220} 1224}
1221 1225
1222static void tcp_v6_or_send_ack(struct sk_buff *skb, struct open_request *req) 1226static void tcp_v6_reqsk_send_ack(struct sk_buff *skb, struct request_sock *req)
1223{ 1227{
1224 tcp_v6_send_ack(skb, req->snt_isn+1, req->rcv_isn+1, req->rcv_wnd, req->ts_recent); 1228 tcp_v6_send_ack(skb, tcp_rsk(req)->snt_isn + 1, tcp_rsk(req)->rcv_isn + 1, req->rcv_wnd, req->ts_recent);
1225} 1229}
1226 1230
1227 1231
1228static struct sock *tcp_v6_hnd_req(struct sock *sk,struct sk_buff *skb) 1232static struct sock *tcp_v6_hnd_req(struct sock *sk,struct sk_buff *skb)
1229{ 1233{
1230 struct open_request *req, **prev; 1234 struct request_sock *req, **prev;
1231 struct tcphdr *th = skb->h.th; 1235 struct tcphdr *th = skb->h.th;
1232 struct tcp_sock *tp = tcp_sk(sk); 1236 struct tcp_sock *tp = tcp_sk(sk);
1233 struct sock *nsk; 1237 struct sock *nsk;
@@ -1260,21 +1264,13 @@ static struct sock *tcp_v6_hnd_req(struct sock *sk,struct sk_buff *skb)
1260 return sk; 1264 return sk;
1261} 1265}
1262 1266
1263static void tcp_v6_synq_add(struct sock *sk, struct open_request *req) 1267static void tcp_v6_synq_add(struct sock *sk, struct request_sock *req)
1264{ 1268{
1265 struct tcp_sock *tp = tcp_sk(sk); 1269 struct tcp_sock *tp = tcp_sk(sk);
1266 struct tcp_listen_opt *lopt = tp->listen_opt; 1270 struct listen_sock *lopt = tp->accept_queue.listen_opt;
1267 u32 h = tcp_v6_synq_hash(&req->af.v6_req.rmt_addr, req->rmt_port, lopt->hash_rnd); 1271 u32 h = tcp_v6_synq_hash(&tcp6_rsk(req)->rmt_addr, inet_rsk(req)->rmt_port, lopt->hash_rnd);
1268
1269 req->sk = NULL;
1270 req->expires = jiffies + TCP_TIMEOUT_INIT;
1271 req->retrans = 0;
1272 req->dl_next = lopt->syn_table[h];
1273
1274 write_lock(&tp->syn_wait_lock);
1275 lopt->syn_table[h] = req;
1276 write_unlock(&tp->syn_wait_lock);
1277 1272
1273 reqsk_queue_hash_req(&tp->accept_queue, h, req, TCP_TIMEOUT_INIT);
1278 tcp_synq_added(sk); 1274 tcp_synq_added(sk);
1279} 1275}
1280 1276
@@ -1284,10 +1280,11 @@ static void tcp_v6_synq_add(struct sock *sk, struct open_request *req)
1284 */ 1280 */
1285static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb) 1281static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
1286{ 1282{
1283 struct tcp6_request_sock *treq;
1287 struct ipv6_pinfo *np = inet6_sk(sk); 1284 struct ipv6_pinfo *np = inet6_sk(sk);
1288 struct tcp_options_received tmp_opt; 1285 struct tcp_options_received tmp_opt;
1289 struct tcp_sock *tp = tcp_sk(sk); 1286 struct tcp_sock *tp = tcp_sk(sk);
1290 struct open_request *req = NULL; 1287 struct request_sock *req = NULL;
1291 __u32 isn = TCP_SKB_CB(skb)->when; 1288 __u32 isn = TCP_SKB_CB(skb)->when;
1292 1289
1293 if (skb->protocol == htons(ETH_P_IP)) 1290 if (skb->protocol == htons(ETH_P_IP))
@@ -1308,7 +1305,7 @@ static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
1308 if (sk_acceptq_is_full(sk) && tcp_synq_young(sk) > 1) 1305 if (sk_acceptq_is_full(sk) && tcp_synq_young(sk) > 1)
1309 goto drop; 1306 goto drop;
1310 1307
1311 req = tcp_openreq_alloc(); 1308 req = reqsk_alloc(&tcp6_request_sock_ops);
1312 if (req == NULL) 1309 if (req == NULL)
1313 goto drop; 1310 goto drop;
1314 1311
@@ -1321,28 +1318,28 @@ static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
1321 tmp_opt.tstamp_ok = tmp_opt.saw_tstamp; 1318 tmp_opt.tstamp_ok = tmp_opt.saw_tstamp;
1322 tcp_openreq_init(req, &tmp_opt, skb); 1319 tcp_openreq_init(req, &tmp_opt, skb);
1323 1320
1324 req->class = &or_ipv6; 1321 treq = tcp6_rsk(req);
1325 ipv6_addr_copy(&req->af.v6_req.rmt_addr, &skb->nh.ipv6h->saddr); 1322 ipv6_addr_copy(&treq->rmt_addr, &skb->nh.ipv6h->saddr);
1326 ipv6_addr_copy(&req->af.v6_req.loc_addr, &skb->nh.ipv6h->daddr); 1323 ipv6_addr_copy(&treq->loc_addr, &skb->nh.ipv6h->daddr);
1327 TCP_ECN_create_request(req, skb->h.th); 1324 TCP_ECN_create_request(req, skb->h.th);
1328 req->af.v6_req.pktopts = NULL; 1325 treq->pktopts = NULL;
1329 if (ipv6_opt_accepted(sk, skb) || 1326 if (ipv6_opt_accepted(sk, skb) ||
1330 np->rxopt.bits.rxinfo || 1327 np->rxopt.bits.rxinfo ||
1331 np->rxopt.bits.rxhlim) { 1328 np->rxopt.bits.rxhlim) {
1332 atomic_inc(&skb->users); 1329 atomic_inc(&skb->users);
1333 req->af.v6_req.pktopts = skb; 1330 treq->pktopts = skb;
1334 } 1331 }
1335 req->af.v6_req.iif = sk->sk_bound_dev_if; 1332 treq->iif = sk->sk_bound_dev_if;
1336 1333
1337 /* So that link locals have meaning */ 1334 /* So that link locals have meaning */
1338 if (!sk->sk_bound_dev_if && 1335 if (!sk->sk_bound_dev_if &&
1339 ipv6_addr_type(&req->af.v6_req.rmt_addr) & IPV6_ADDR_LINKLOCAL) 1336 ipv6_addr_type(&treq->rmt_addr) & IPV6_ADDR_LINKLOCAL)
1340 req->af.v6_req.iif = tcp_v6_iif(skb); 1337 treq->iif = tcp_v6_iif(skb);
1341 1338
1342 if (isn == 0) 1339 if (isn == 0)
1343 isn = tcp_v6_init_sequence(sk,skb); 1340 isn = tcp_v6_init_sequence(sk,skb);
1344 1341
1345 req->snt_isn = isn; 1342 tcp_rsk(req)->snt_isn = isn;
1346 1343
1347 if (tcp_v6_send_synack(sk, req, NULL)) 1344 if (tcp_v6_send_synack(sk, req, NULL))
1348 goto drop; 1345 goto drop;
@@ -1353,16 +1350,17 @@ static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
1353 1350
1354drop: 1351drop:
1355 if (req) 1352 if (req)
1356 tcp_openreq_free(req); 1353 reqsk_free(req);
1357 1354
1358 TCP_INC_STATS_BH(TCP_MIB_ATTEMPTFAILS); 1355 TCP_INC_STATS_BH(TCP_MIB_ATTEMPTFAILS);
1359 return 0; /* don't send reset */ 1356 return 0; /* don't send reset */
1360} 1357}
1361 1358
1362static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb, 1359static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
1363 struct open_request *req, 1360 struct request_sock *req,
1364 struct dst_entry *dst) 1361 struct dst_entry *dst)
1365{ 1362{
1363 struct tcp6_request_sock *treq = tcp6_rsk(req);
1366 struct ipv6_pinfo *newnp, *np = inet6_sk(sk); 1364 struct ipv6_pinfo *newnp, *np = inet6_sk(sk);
1367 struct tcp6_sock *newtcp6sk; 1365 struct tcp6_sock *newtcp6sk;
1368 struct inet_sock *newinet; 1366 struct inet_sock *newinet;
@@ -1426,10 +1424,10 @@ static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
1426 goto out_overflow; 1424 goto out_overflow;
1427 1425
1428 if (np->rxopt.bits.srcrt == 2 && 1426 if (np->rxopt.bits.srcrt == 2 &&
1429 opt == NULL && req->af.v6_req.pktopts) { 1427 opt == NULL && treq->pktopts) {
1430 struct inet6_skb_parm *rxopt = IP6CB(req->af.v6_req.pktopts); 1428 struct inet6_skb_parm *rxopt = IP6CB(treq->pktopts);
1431 if (rxopt->srcrt) 1429 if (rxopt->srcrt)
1432 opt = ipv6_invert_rthdr(sk, (struct ipv6_rt_hdr*)(req->af.v6_req.pktopts->nh.raw+rxopt->srcrt)); 1430 opt = ipv6_invert_rthdr(sk, (struct ipv6_rt_hdr *)(treq->pktopts->nh.raw + rxopt->srcrt));
1433 } 1431 }
1434 1432
1435 if (dst == NULL) { 1433 if (dst == NULL) {
@@ -1438,16 +1436,16 @@ static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
1438 1436
1439 memset(&fl, 0, sizeof(fl)); 1437 memset(&fl, 0, sizeof(fl));
1440 fl.proto = IPPROTO_TCP; 1438 fl.proto = IPPROTO_TCP;
1441 ipv6_addr_copy(&fl.fl6_dst, &req->af.v6_req.rmt_addr); 1439 ipv6_addr_copy(&fl.fl6_dst, &treq->rmt_addr);
1442 if (opt && opt->srcrt) { 1440 if (opt && opt->srcrt) {
1443 struct rt0_hdr *rt0 = (struct rt0_hdr *) opt->srcrt; 1441 struct rt0_hdr *rt0 = (struct rt0_hdr *) opt->srcrt;
1444 ipv6_addr_copy(&final, &fl.fl6_dst); 1442 ipv6_addr_copy(&final, &fl.fl6_dst);
1445 ipv6_addr_copy(&fl.fl6_dst, rt0->addr); 1443 ipv6_addr_copy(&fl.fl6_dst, rt0->addr);
1446 final_p = &final; 1444 final_p = &final;
1447 } 1445 }
1448 ipv6_addr_copy(&fl.fl6_src, &req->af.v6_req.loc_addr); 1446 ipv6_addr_copy(&fl.fl6_src, &treq->loc_addr);
1449 fl.oif = sk->sk_bound_dev_if; 1447 fl.oif = sk->sk_bound_dev_if;
1450 fl.fl_ip_dport = req->rmt_port; 1448 fl.fl_ip_dport = inet_rsk(req)->rmt_port;
1451 fl.fl_ip_sport = inet_sk(sk)->sport; 1449 fl.fl_ip_sport = inet_sk(sk)->sport;
1452 1450
1453 if (ip6_dst_lookup(sk, &dst, &fl)) 1451 if (ip6_dst_lookup(sk, &dst, &fl))
@@ -1482,10 +1480,10 @@ static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
1482 1480
1483 memcpy(newnp, np, sizeof(struct ipv6_pinfo)); 1481 memcpy(newnp, np, sizeof(struct ipv6_pinfo));
1484 1482
1485 ipv6_addr_copy(&newnp->daddr, &req->af.v6_req.rmt_addr); 1483 ipv6_addr_copy(&newnp->daddr, &treq->rmt_addr);
1486 ipv6_addr_copy(&newnp->saddr, &req->af.v6_req.loc_addr); 1484 ipv6_addr_copy(&newnp->saddr, &treq->loc_addr);
1487 ipv6_addr_copy(&newnp->rcv_saddr, &req->af.v6_req.loc_addr); 1485 ipv6_addr_copy(&newnp->rcv_saddr, &treq->loc_addr);
1488 newsk->sk_bound_dev_if = req->af.v6_req.iif; 1486 newsk->sk_bound_dev_if = treq->iif;
1489 1487
1490 /* Now IPv6 options... 1488 /* Now IPv6 options...
1491 1489
@@ -1498,11 +1496,10 @@ static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
1498 1496
1499 /* Clone pktoptions received with SYN */ 1497 /* Clone pktoptions received with SYN */
1500 newnp->pktoptions = NULL; 1498 newnp->pktoptions = NULL;
1501 if (req->af.v6_req.pktopts) { 1499 if (treq->pktopts != NULL) {
1502 newnp->pktoptions = skb_clone(req->af.v6_req.pktopts, 1500 newnp->pktoptions = skb_clone(treq->pktopts, GFP_ATOMIC);
1503 GFP_ATOMIC); 1501 kfree_skb(treq->pktopts);
1504 kfree_skb(req->af.v6_req.pktopts); 1502 treq->pktopts = NULL;
1505 req->af.v6_req.pktopts = NULL;
1506 if (newnp->pktoptions) 1503 if (newnp->pktoptions)
1507 skb_set_owner_r(newnp->pktoptions, newsk); 1504 skb_set_owner_r(newnp->pktoptions, newsk);
1508 } 1505 }
@@ -2050,7 +2047,7 @@ static int tcp_v6_destroy_sock(struct sock *sk)
2050 2047
2051/* Proc filesystem TCPv6 sock list dumping. */ 2048/* Proc filesystem TCPv6 sock list dumping. */
2052static void get_openreq6(struct seq_file *seq, 2049static void get_openreq6(struct seq_file *seq,
2053 struct sock *sk, struct open_request *req, int i, int uid) 2050 struct sock *sk, struct request_sock *req, int i, int uid)
2054{ 2051{
2055 struct in6_addr *dest, *src; 2052 struct in6_addr *dest, *src;
2056 int ttd = req->expires - jiffies; 2053 int ttd = req->expires - jiffies;
@@ -2058,8 +2055,8 @@ static void get_openreq6(struct seq_file *seq,
2058 if (ttd < 0) 2055 if (ttd < 0)
2059 ttd = 0; 2056 ttd = 0;
2060 2057
2061 src = &req->af.v6_req.loc_addr; 2058 src = &tcp6_rsk(req)->loc_addr;
2062 dest = &req->af.v6_req.rmt_addr; 2059 dest = &tcp6_rsk(req)->rmt_addr;
2063 seq_printf(seq, 2060 seq_printf(seq,
2064 "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X " 2061 "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
2065 "%02X %08X:%08X %02X:%08lX %08X %5d %8d %d %d %p\n", 2062 "%02X %08X:%08X %02X:%08lX %08X %5d %8d %d %d %p\n",
@@ -2069,7 +2066,7 @@ static void get_openreq6(struct seq_file *seq,
2069 ntohs(inet_sk(sk)->sport), 2066 ntohs(inet_sk(sk)->sport),
2070 dest->s6_addr32[0], dest->s6_addr32[1], 2067 dest->s6_addr32[0], dest->s6_addr32[1],
2071 dest->s6_addr32[2], dest->s6_addr32[3], 2068 dest->s6_addr32[2], dest->s6_addr32[3],
2072 ntohs(req->rmt_port), 2069 ntohs(inet_rsk(req)->rmt_port),
2073 TCP_SYN_RECV, 2070 TCP_SYN_RECV,
2074 0,0, /* could print option size, but that is af dependent. */ 2071 0,0, /* could print option size, but that is af dependent. */
2075 1, /* timers active (only the expire timer) */ 2072 1, /* timers active (only the expire timer) */
@@ -2239,6 +2236,7 @@ struct proto tcpv6_prot = {
2239 .sysctl_rmem = sysctl_tcp_rmem, 2236 .sysctl_rmem = sysctl_tcp_rmem,
2240 .max_header = MAX_TCP_HEADER, 2237 .max_header = MAX_TCP_HEADER,
2241 .obj_size = sizeof(struct tcp6_sock), 2238 .obj_size = sizeof(struct tcp6_sock),
2239 .rsk_prot = &tcp6_request_sock_ops,
2242}; 2240};
2243 2241
2244static struct inet6_protocol tcpv6_protocol = { 2242static struct inet6_protocol tcpv6_protocol = {
diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
index e251d0ba4f39..eff050ac7049 100644
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -300,12 +300,12 @@ csum_copy_err:
300 /* Clear queue. */ 300 /* Clear queue. */
301 if (flags&MSG_PEEK) { 301 if (flags&MSG_PEEK) {
302 int clear = 0; 302 int clear = 0;
303 spin_lock_irq(&sk->sk_receive_queue.lock); 303 spin_lock_bh(&sk->sk_receive_queue.lock);
304 if (skb == skb_peek(&sk->sk_receive_queue)) { 304 if (skb == skb_peek(&sk->sk_receive_queue)) {
305 __skb_unlink(skb, &sk->sk_receive_queue); 305 __skb_unlink(skb, &sk->sk_receive_queue);
306 clear = 1; 306 clear = 1;
307 } 307 }
308 spin_unlock_irq(&sk->sk_receive_queue.lock); 308 spin_unlock_bh(&sk->sk_receive_queue.lock);
309 if (clear) 309 if (clear)
310 kfree_skb(skb); 310 kfree_skb(skb);
311 } 311 }
diff --git a/net/key/af_key.c b/net/key/af_key.c
index ce980aa94ed8..98b72f2024ff 100644
--- a/net/key/af_key.c
+++ b/net/key/af_key.c
@@ -656,13 +656,18 @@ static struct sk_buff * pfkey_xfrm_state2msg(struct xfrm_state *x, int add_keys,
656 sa->sadb_sa_exttype = SADB_EXT_SA; 656 sa->sadb_sa_exttype = SADB_EXT_SA;
657 sa->sadb_sa_spi = x->id.spi; 657 sa->sadb_sa_spi = x->id.spi;
658 sa->sadb_sa_replay = x->props.replay_window; 658 sa->sadb_sa_replay = x->props.replay_window;
659 sa->sadb_sa_state = SADB_SASTATE_DYING; 659 switch (x->km.state) {
660 if (x->km.state == XFRM_STATE_VALID && !x->km.dying) 660 case XFRM_STATE_VALID:
661 sa->sadb_sa_state = SADB_SASTATE_MATURE; 661 sa->sadb_sa_state = x->km.dying ?
662 else if (x->km.state == XFRM_STATE_ACQ) 662 SADB_SASTATE_DYING : SADB_SASTATE_MATURE;
663 break;
664 case XFRM_STATE_ACQ:
663 sa->sadb_sa_state = SADB_SASTATE_LARVAL; 665 sa->sadb_sa_state = SADB_SASTATE_LARVAL;
664 else if (x->km.state == XFRM_STATE_EXPIRED) 666 break;
667 default:
665 sa->sadb_sa_state = SADB_SASTATE_DEAD; 668 sa->sadb_sa_state = SADB_SASTATE_DEAD;
669 break;
670 }
666 sa->sadb_sa_auth = 0; 671 sa->sadb_sa_auth = 0;
667 if (x->aalg) { 672 if (x->aalg) {
668 struct xfrm_algo_desc *a = xfrm_aalg_get_byname(x->aalg->alg_name, 0); 673 struct xfrm_algo_desc *a = xfrm_aalg_get_byname(x->aalg->alg_name, 0);
@@ -1240,13 +1245,78 @@ static int pfkey_acquire(struct sock *sk, struct sk_buff *skb, struct sadb_msg *
1240 return 0; 1245 return 0;
1241} 1246}
1242 1247
1248static inline int event2poltype(int event)
1249{
1250 switch (event) {
1251 case XFRM_MSG_DELPOLICY:
1252 return SADB_X_SPDDELETE;
1253 case XFRM_MSG_NEWPOLICY:
1254 return SADB_X_SPDADD;
1255 case XFRM_MSG_UPDPOLICY:
1256 return SADB_X_SPDUPDATE;
1257 case XFRM_MSG_POLEXPIRE:
1258 // return SADB_X_SPDEXPIRE;
1259 default:
1260 printk("pfkey: Unknown policy event %d\n", event);
1261 break;
1262 }
1263
1264 return 0;
1265}
1266
1267static inline int event2keytype(int event)
1268{
1269 switch (event) {
1270 case XFRM_MSG_DELSA:
1271 return SADB_DELETE;
1272 case XFRM_MSG_NEWSA:
1273 return SADB_ADD;
1274 case XFRM_MSG_UPDSA:
1275 return SADB_UPDATE;
1276 case XFRM_MSG_EXPIRE:
1277 return SADB_EXPIRE;
1278 default:
1279 printk("pfkey: Unknown SA event %d\n", event);
1280 break;
1281 }
1282
1283 return 0;
1284}
1285
1286/* ADD/UPD/DEL */
1287static int key_notify_sa(struct xfrm_state *x, struct km_event *c)
1288{
1289 struct sk_buff *skb;
1290 struct sadb_msg *hdr;
1291 int hsc = 3;
1292
1293 if (c->event == XFRM_MSG_DELSA)
1294 hsc = 0;
1295
1296 skb = pfkey_xfrm_state2msg(x, 0, hsc);
1297
1298 if (IS_ERR(skb))
1299 return PTR_ERR(skb);
1300
1301 hdr = (struct sadb_msg *) skb->data;
1302 hdr->sadb_msg_version = PF_KEY_V2;
1303 hdr->sadb_msg_type = event2keytype(c->event);
1304 hdr->sadb_msg_satype = pfkey_proto2satype(x->id.proto);
1305 hdr->sadb_msg_errno = 0;
1306 hdr->sadb_msg_reserved = 0;
1307 hdr->sadb_msg_seq = c->seq;
1308 hdr->sadb_msg_pid = c->pid;
1309
1310 pfkey_broadcast(skb, GFP_ATOMIC, BROADCAST_ALL, NULL);
1311
1312 return 0;
1313}
1243 1314
1244static int pfkey_add(struct sock *sk, struct sk_buff *skb, struct sadb_msg *hdr, void **ext_hdrs) 1315static int pfkey_add(struct sock *sk, struct sk_buff *skb, struct sadb_msg *hdr, void **ext_hdrs)
1245{ 1316{
1246 struct sk_buff *out_skb;
1247 struct sadb_msg *out_hdr;
1248 struct xfrm_state *x; 1317 struct xfrm_state *x;
1249 int err; 1318 int err;
1319 struct km_event c;
1250 1320
1251 xfrm_probe_algs(); 1321 xfrm_probe_algs();
1252 1322
@@ -1254,6 +1324,7 @@ static int pfkey_add(struct sock *sk, struct sk_buff *skb, struct sadb_msg *hdr,
1254 if (IS_ERR(x)) 1324 if (IS_ERR(x))
1255 return PTR_ERR(x); 1325 return PTR_ERR(x);
1256 1326
1327 xfrm_state_hold(x);
1257 if (hdr->sadb_msg_type == SADB_ADD) 1328 if (hdr->sadb_msg_type == SADB_ADD)
1258 err = xfrm_state_add(x); 1329 err = xfrm_state_add(x);
1259 else 1330 else
@@ -1262,30 +1333,26 @@ static int pfkey_add(struct sock *sk, struct sk_buff *skb, struct sadb_msg *hdr,
1262 if (err < 0) { 1333 if (err < 0) {
1263 x->km.state = XFRM_STATE_DEAD; 1334 x->km.state = XFRM_STATE_DEAD;
1264 xfrm_state_put(x); 1335 xfrm_state_put(x);
1265 return err; 1336 goto out;
1266 } 1337 }
1267 1338
1268 out_skb = pfkey_xfrm_state2msg(x, 0, 3); 1339 if (hdr->sadb_msg_type == SADB_ADD)
1269 if (IS_ERR(out_skb)) 1340 c.event = XFRM_MSG_NEWSA;
1270 return PTR_ERR(out_skb); /* XXX Should we return 0 here ? */ 1341 else
1271 1342 c.event = XFRM_MSG_UPDSA;
1272 out_hdr = (struct sadb_msg *) out_skb->data; 1343 c.seq = hdr->sadb_msg_seq;
1273 out_hdr->sadb_msg_version = hdr->sadb_msg_version; 1344 c.pid = hdr->sadb_msg_pid;
1274 out_hdr->sadb_msg_type = hdr->sadb_msg_type; 1345 km_state_notify(x, &c);
1275 out_hdr->sadb_msg_satype = pfkey_proto2satype(x->id.proto); 1346out:
1276 out_hdr->sadb_msg_errno = 0; 1347 xfrm_state_put(x);
1277 out_hdr->sadb_msg_reserved = 0; 1348 return err;
1278 out_hdr->sadb_msg_seq = hdr->sadb_msg_seq;
1279 out_hdr->sadb_msg_pid = hdr->sadb_msg_pid;
1280
1281 pfkey_broadcast(out_skb, GFP_ATOMIC, BROADCAST_ALL, sk);
1282
1283 return 0;
1284} 1349}
1285 1350
1286static int pfkey_delete(struct sock *sk, struct sk_buff *skb, struct sadb_msg *hdr, void **ext_hdrs) 1351static int pfkey_delete(struct sock *sk, struct sk_buff *skb, struct sadb_msg *hdr, void **ext_hdrs)
1287{ 1352{
1288 struct xfrm_state *x; 1353 struct xfrm_state *x;
1354 struct km_event c;
1355 int err;
1289 1356
1290 if (!ext_hdrs[SADB_EXT_SA-1] || 1357 if (!ext_hdrs[SADB_EXT_SA-1] ||
1291 !present_and_same_family(ext_hdrs[SADB_EXT_ADDRESS_SRC-1], 1358 !present_and_same_family(ext_hdrs[SADB_EXT_ADDRESS_SRC-1],
@@ -1301,13 +1368,19 @@ static int pfkey_delete(struct sock *sk, struct sk_buff *skb, struct sadb_msg *h
1301 return -EPERM; 1368 return -EPERM;
1302 } 1369 }
1303 1370
1304 xfrm_state_delete(x); 1371 err = xfrm_state_delete(x);
1305 xfrm_state_put(x); 1372 if (err < 0) {
1373 xfrm_state_put(x);
1374 return err;
1375 }
1306 1376
1307 pfkey_broadcast(skb_clone(skb, GFP_KERNEL), GFP_KERNEL, 1377 c.seq = hdr->sadb_msg_seq;
1308 BROADCAST_ALL, sk); 1378 c.pid = hdr->sadb_msg_pid;
1379 c.event = XFRM_MSG_DELSA;
1380 km_state_notify(x, &c);
1381 xfrm_state_put(x);
1309 1382
1310 return 0; 1383 return err;
1311} 1384}
1312 1385
1313static int pfkey_get(struct sock *sk, struct sk_buff *skb, struct sadb_msg *hdr, void **ext_hdrs) 1386static int pfkey_get(struct sock *sk, struct sk_buff *skb, struct sadb_msg *hdr, void **ext_hdrs)
@@ -1445,28 +1518,42 @@ static int pfkey_register(struct sock *sk, struct sk_buff *skb, struct sadb_msg
1445 return 0; 1518 return 0;
1446} 1519}
1447 1520
1521static int key_notify_sa_flush(struct km_event *c)
1522{
1523 struct sk_buff *skb;
1524 struct sadb_msg *hdr;
1525
1526 skb = alloc_skb(sizeof(struct sadb_msg) + 16, GFP_ATOMIC);
1527 if (!skb)
1528 return -ENOBUFS;
1529 hdr = (struct sadb_msg *) skb_put(skb, sizeof(struct sadb_msg));
1530 hdr->sadb_msg_satype = pfkey_proto2satype(c->data.proto);
1531 hdr->sadb_msg_seq = c->seq;
1532 hdr->sadb_msg_pid = c->pid;
1533 hdr->sadb_msg_version = PF_KEY_V2;
1534 hdr->sadb_msg_errno = (uint8_t) 0;
1535 hdr->sadb_msg_len = (sizeof(struct sadb_msg) / sizeof(uint64_t));
1536
1537 pfkey_broadcast(skb, GFP_ATOMIC, BROADCAST_ALL, NULL);
1538
1539 return 0;
1540}
1541
1448static int pfkey_flush(struct sock *sk, struct sk_buff *skb, struct sadb_msg *hdr, void **ext_hdrs) 1542static int pfkey_flush(struct sock *sk, struct sk_buff *skb, struct sadb_msg *hdr, void **ext_hdrs)
1449{ 1543{
1450 unsigned proto; 1544 unsigned proto;
1451 struct sk_buff *skb_out; 1545 struct km_event c;
1452 struct sadb_msg *hdr_out;
1453 1546
1454 proto = pfkey_satype2proto(hdr->sadb_msg_satype); 1547 proto = pfkey_satype2proto(hdr->sadb_msg_satype);
1455 if (proto == 0) 1548 if (proto == 0)
1456 return -EINVAL; 1549 return -EINVAL;
1457 1550
1458 skb_out = alloc_skb(sizeof(struct sadb_msg) + 16, GFP_KERNEL);
1459 if (!skb_out)
1460 return -ENOBUFS;
1461
1462 xfrm_state_flush(proto); 1551 xfrm_state_flush(proto);
1463 1552 c.data.proto = proto;
1464 hdr_out = (struct sadb_msg *) skb_put(skb_out, sizeof(struct sadb_msg)); 1553 c.seq = hdr->sadb_msg_seq;
1465 pfkey_hdr_dup(hdr_out, hdr); 1554 c.pid = hdr->sadb_msg_pid;
1466 hdr_out->sadb_msg_errno = (uint8_t) 0; 1555 c.event = XFRM_MSG_FLUSHSA;
1467 hdr_out->sadb_msg_len = (sizeof(struct sadb_msg) / sizeof(uint64_t)); 1556 km_state_notify(NULL, &c);
1468
1469 pfkey_broadcast(skb_out, GFP_KERNEL, BROADCAST_ALL, NULL);
1470 1557
1471 return 0; 1558 return 0;
1472} 1559}
@@ -1859,6 +1946,35 @@ static void pfkey_xfrm_policy2msg(struct sk_buff *skb, struct xfrm_policy *xp, i
1859 hdr->sadb_msg_reserved = atomic_read(&xp->refcnt); 1946 hdr->sadb_msg_reserved = atomic_read(&xp->refcnt);
1860} 1947}
1861 1948
1949static int key_notify_policy(struct xfrm_policy *xp, int dir, struct km_event *c)
1950{
1951 struct sk_buff *out_skb;
1952 struct sadb_msg *out_hdr;
1953 int err;
1954
1955 out_skb = pfkey_xfrm_policy2msg_prep(xp);
1956 if (IS_ERR(out_skb)) {
1957 err = PTR_ERR(out_skb);
1958 goto out;
1959 }
1960 pfkey_xfrm_policy2msg(out_skb, xp, dir);
1961
1962 out_hdr = (struct sadb_msg *) out_skb->data;
1963 out_hdr->sadb_msg_version = PF_KEY_V2;
1964
1965 if (c->data.byid && c->event == XFRM_MSG_DELPOLICY)
1966 out_hdr->sadb_msg_type = SADB_X_SPDDELETE2;
1967 else
1968 out_hdr->sadb_msg_type = event2poltype(c->event);
1969 out_hdr->sadb_msg_errno = 0;
1970 out_hdr->sadb_msg_seq = c->seq;
1971 out_hdr->sadb_msg_pid = c->pid;
1972 pfkey_broadcast(out_skb, GFP_ATOMIC, BROADCAST_ALL, NULL);
1973out:
1974 return 0;
1975
1976}
1977
1862static int pfkey_spdadd(struct sock *sk, struct sk_buff *skb, struct sadb_msg *hdr, void **ext_hdrs) 1978static int pfkey_spdadd(struct sock *sk, struct sk_buff *skb, struct sadb_msg *hdr, void **ext_hdrs)
1863{ 1979{
1864 int err; 1980 int err;
@@ -1866,8 +1982,7 @@ static int pfkey_spdadd(struct sock *sk, struct sk_buff *skb, struct sadb_msg *h
1866 struct sadb_address *sa; 1982 struct sadb_address *sa;
1867 struct sadb_x_policy *pol; 1983 struct sadb_x_policy *pol;
1868 struct xfrm_policy *xp; 1984 struct xfrm_policy *xp;
1869 struct sk_buff *out_skb; 1985 struct km_event c;
1870 struct sadb_msg *out_hdr;
1871 1986
1872 if (!present_and_same_family(ext_hdrs[SADB_EXT_ADDRESS_SRC-1], 1987 if (!present_and_same_family(ext_hdrs[SADB_EXT_ADDRESS_SRC-1],
1873 ext_hdrs[SADB_EXT_ADDRESS_DST-1]) || 1988 ext_hdrs[SADB_EXT_ADDRESS_DST-1]) ||
@@ -1935,31 +2050,23 @@ static int pfkey_spdadd(struct sock *sk, struct sk_buff *skb, struct sadb_msg *h
1935 (err = parse_ipsecrequests(xp, pol)) < 0) 2050 (err = parse_ipsecrequests(xp, pol)) < 0)
1936 goto out; 2051 goto out;
1937 2052
1938 out_skb = pfkey_xfrm_policy2msg_prep(xp);
1939 if (IS_ERR(out_skb)) {
1940 err = PTR_ERR(out_skb);
1941 goto out;
1942 }
1943
1944 err = xfrm_policy_insert(pol->sadb_x_policy_dir-1, xp, 2053 err = xfrm_policy_insert(pol->sadb_x_policy_dir-1, xp,
1945 hdr->sadb_msg_type != SADB_X_SPDUPDATE); 2054 hdr->sadb_msg_type != SADB_X_SPDUPDATE);
1946 if (err) { 2055 if (err) {
1947 kfree_skb(out_skb); 2056 kfree(xp);
1948 goto out; 2057 return err;
1949 } 2058 }
1950 2059
1951 pfkey_xfrm_policy2msg(out_skb, xp, pol->sadb_x_policy_dir-1); 2060 if (hdr->sadb_msg_type == SADB_X_SPDUPDATE)
2061 c.event = XFRM_MSG_UPDPOLICY;
2062 else
2063 c.event = XFRM_MSG_NEWPOLICY;
1952 2064
1953 xfrm_pol_put(xp); 2065 c.seq = hdr->sadb_msg_seq;
2066 c.pid = hdr->sadb_msg_pid;
1954 2067
1955 out_hdr = (struct sadb_msg *) out_skb->data; 2068 km_policy_notify(xp, pol->sadb_x_policy_dir-1, &c);
1956 out_hdr->sadb_msg_version = hdr->sadb_msg_version; 2069 xfrm_pol_put(xp);
1957 out_hdr->sadb_msg_type = hdr->sadb_msg_type;
1958 out_hdr->sadb_msg_satype = 0;
1959 out_hdr->sadb_msg_errno = 0;
1960 out_hdr->sadb_msg_seq = hdr->sadb_msg_seq;
1961 out_hdr->sadb_msg_pid = hdr->sadb_msg_pid;
1962 pfkey_broadcast(out_skb, GFP_ATOMIC, BROADCAST_ALL, sk);
1963 return 0; 2070 return 0;
1964 2071
1965out: 2072out:
@@ -1973,9 +2080,8 @@ static int pfkey_spddelete(struct sock *sk, struct sk_buff *skb, struct sadb_msg
1973 struct sadb_address *sa; 2080 struct sadb_address *sa;
1974 struct sadb_x_policy *pol; 2081 struct sadb_x_policy *pol;
1975 struct xfrm_policy *xp; 2082 struct xfrm_policy *xp;
1976 struct sk_buff *out_skb;
1977 struct sadb_msg *out_hdr;
1978 struct xfrm_selector sel; 2083 struct xfrm_selector sel;
2084 struct km_event c;
1979 2085
1980 if (!present_and_same_family(ext_hdrs[SADB_EXT_ADDRESS_SRC-1], 2086 if (!present_and_same_family(ext_hdrs[SADB_EXT_ADDRESS_SRC-1],
1981 ext_hdrs[SADB_EXT_ADDRESS_DST-1]) || 2087 ext_hdrs[SADB_EXT_ADDRESS_DST-1]) ||
@@ -2010,25 +2116,40 @@ static int pfkey_spddelete(struct sock *sk, struct sk_buff *skb, struct sadb_msg
2010 2116
2011 err = 0; 2117 err = 0;
2012 2118
2119 c.seq = hdr->sadb_msg_seq;
2120 c.pid = hdr->sadb_msg_pid;
2121 c.event = XFRM_MSG_DELPOLICY;
2122 km_policy_notify(xp, pol->sadb_x_policy_dir-1, &c);
2123
2124 xfrm_pol_put(xp);
2125 return err;
2126}
2127
2128static int key_pol_get_resp(struct sock *sk, struct xfrm_policy *xp, struct sadb_msg *hdr, int dir)
2129{
2130 int err;
2131 struct sk_buff *out_skb;
2132 struct sadb_msg *out_hdr;
2133 err = 0;
2134
2013 out_skb = pfkey_xfrm_policy2msg_prep(xp); 2135 out_skb = pfkey_xfrm_policy2msg_prep(xp);
2014 if (IS_ERR(out_skb)) { 2136 if (IS_ERR(out_skb)) {
2015 err = PTR_ERR(out_skb); 2137 err = PTR_ERR(out_skb);
2016 goto out; 2138 goto out;
2017 } 2139 }
2018 pfkey_xfrm_policy2msg(out_skb, xp, pol->sadb_x_policy_dir-1); 2140 pfkey_xfrm_policy2msg(out_skb, xp, dir);
2019 2141
2020 out_hdr = (struct sadb_msg *) out_skb->data; 2142 out_hdr = (struct sadb_msg *) out_skb->data;
2021 out_hdr->sadb_msg_version = hdr->sadb_msg_version; 2143 out_hdr->sadb_msg_version = hdr->sadb_msg_version;
2022 out_hdr->sadb_msg_type = SADB_X_SPDDELETE; 2144 out_hdr->sadb_msg_type = hdr->sadb_msg_type;
2023 out_hdr->sadb_msg_satype = 0; 2145 out_hdr->sadb_msg_satype = 0;
2024 out_hdr->sadb_msg_errno = 0; 2146 out_hdr->sadb_msg_errno = 0;
2025 out_hdr->sadb_msg_seq = hdr->sadb_msg_seq; 2147 out_hdr->sadb_msg_seq = hdr->sadb_msg_seq;
2026 out_hdr->sadb_msg_pid = hdr->sadb_msg_pid; 2148 out_hdr->sadb_msg_pid = hdr->sadb_msg_pid;
2027 pfkey_broadcast(out_skb, GFP_ATOMIC, BROADCAST_ALL, sk); 2149 pfkey_broadcast(out_skb, GFP_ATOMIC, BROADCAST_ONE, sk);
2028 err = 0; 2150 err = 0;
2029 2151
2030out: 2152out:
2031 xfrm_pol_put(xp);
2032 return err; 2153 return err;
2033} 2154}
2034 2155
@@ -2037,8 +2158,7 @@ static int pfkey_spdget(struct sock *sk, struct sk_buff *skb, struct sadb_msg *h
2037 int err; 2158 int err;
2038 struct sadb_x_policy *pol; 2159 struct sadb_x_policy *pol;
2039 struct xfrm_policy *xp; 2160 struct xfrm_policy *xp;
2040 struct sk_buff *out_skb; 2161 struct km_event c;
2041 struct sadb_msg *out_hdr;
2042 2162
2043 if ((pol = ext_hdrs[SADB_X_EXT_POLICY-1]) == NULL) 2163 if ((pol = ext_hdrs[SADB_X_EXT_POLICY-1]) == NULL)
2044 return -EINVAL; 2164 return -EINVAL;
@@ -2050,24 +2170,16 @@ static int pfkey_spdget(struct sock *sk, struct sk_buff *skb, struct sadb_msg *h
2050 2170
2051 err = 0; 2171 err = 0;
2052 2172
2053 out_skb = pfkey_xfrm_policy2msg_prep(xp); 2173 c.seq = hdr->sadb_msg_seq;
2054 if (IS_ERR(out_skb)) { 2174 c.pid = hdr->sadb_msg_pid;
2055 err = PTR_ERR(out_skb); 2175 if (hdr->sadb_msg_type == SADB_X_SPDDELETE2) {
2056 goto out; 2176 c.data.byid = 1;
2177 c.event = XFRM_MSG_DELPOLICY;
2178 km_policy_notify(xp, pol->sadb_x_policy_dir-1, &c);
2179 } else {
2180 err = key_pol_get_resp(sk, xp, hdr, pol->sadb_x_policy_dir-1);
2057 } 2181 }
2058 pfkey_xfrm_policy2msg(out_skb, xp, pol->sadb_x_policy_dir-1);
2059
2060 out_hdr = (struct sadb_msg *) out_skb->data;
2061 out_hdr->sadb_msg_version = hdr->sadb_msg_version;
2062 out_hdr->sadb_msg_type = hdr->sadb_msg_type;
2063 out_hdr->sadb_msg_satype = 0;
2064 out_hdr->sadb_msg_errno = 0;
2065 out_hdr->sadb_msg_seq = hdr->sadb_msg_seq;
2066 out_hdr->sadb_msg_pid = hdr->sadb_msg_pid;
2067 pfkey_broadcast(out_skb, GFP_ATOMIC, BROADCAST_ALL, sk);
2068 err = 0;
2069 2182
2070out:
2071 xfrm_pol_put(xp); 2183 xfrm_pol_put(xp);
2072 return err; 2184 return err;
2073} 2185}
@@ -2102,22 +2214,34 @@ static int pfkey_spddump(struct sock *sk, struct sk_buff *skb, struct sadb_msg *
2102 return xfrm_policy_walk(dump_sp, &data); 2214 return xfrm_policy_walk(dump_sp, &data);
2103} 2215}
2104 2216
2105static int pfkey_spdflush(struct sock *sk, struct sk_buff *skb, struct sadb_msg *hdr, void **ext_hdrs) 2217static int key_notify_policy_flush(struct km_event *c)
2106{ 2218{
2107 struct sk_buff *skb_out; 2219 struct sk_buff *skb_out;
2108 struct sadb_msg *hdr_out; 2220 struct sadb_msg *hdr;
2109 2221
2110 skb_out = alloc_skb(sizeof(struct sadb_msg) + 16, GFP_KERNEL); 2222 skb_out = alloc_skb(sizeof(struct sadb_msg) + 16, GFP_ATOMIC);
2111 if (!skb_out) 2223 if (!skb_out)
2112 return -ENOBUFS; 2224 return -ENOBUFS;
2225 hdr = (struct sadb_msg *) skb_put(skb_out, sizeof(struct sadb_msg));
2226 hdr->sadb_msg_seq = c->seq;
2227 hdr->sadb_msg_pid = c->pid;
2228 hdr->sadb_msg_version = PF_KEY_V2;
2229 hdr->sadb_msg_errno = (uint8_t) 0;
2230 hdr->sadb_msg_len = (sizeof(struct sadb_msg) / sizeof(uint64_t));
2231 pfkey_broadcast(skb_out, GFP_ATOMIC, BROADCAST_ALL, NULL);
2232 return 0;
2113 2233
2114 xfrm_policy_flush(); 2234}
2235
2236static int pfkey_spdflush(struct sock *sk, struct sk_buff *skb, struct sadb_msg *hdr, void **ext_hdrs)
2237{
2238 struct km_event c;
2115 2239
2116 hdr_out = (struct sadb_msg *) skb_put(skb_out, sizeof(struct sadb_msg)); 2240 xfrm_policy_flush();
2117 pfkey_hdr_dup(hdr_out, hdr); 2241 c.event = XFRM_MSG_FLUSHPOLICY;
2118 hdr_out->sadb_msg_errno = (uint8_t) 0; 2242 c.pid = hdr->sadb_msg_pid;
2119 hdr_out->sadb_msg_len = (sizeof(struct sadb_msg) / sizeof(uint64_t)); 2243 c.seq = hdr->sadb_msg_seq;
2120 pfkey_broadcast(skb_out, GFP_KERNEL, BROADCAST_ALL, NULL); 2244 km_policy_notify(NULL, 0, &c);
2121 2245
2122 return 0; 2246 return 0;
2123} 2247}
@@ -2317,11 +2441,23 @@ static void dump_esp_combs(struct sk_buff *skb, struct xfrm_tmpl *t)
2317 } 2441 }
2318} 2442}
2319 2443
2320static int pfkey_send_notify(struct xfrm_state *x, int hard) 2444static int key_notify_policy_expire(struct xfrm_policy *xp, struct km_event *c)
2445{
2446 return 0;
2447}
2448
2449static int key_notify_sa_expire(struct xfrm_state *x, struct km_event *c)
2321{ 2450{
2322 struct sk_buff *out_skb; 2451 struct sk_buff *out_skb;
2323 struct sadb_msg *out_hdr; 2452 struct sadb_msg *out_hdr;
2324 int hsc = (hard ? 2 : 1); 2453 int hard;
2454 int hsc;
2455
2456 hard = c->data.hard;
2457 if (hard)
2458 hsc = 2;
2459 else
2460 hsc = 1;
2325 2461
2326 out_skb = pfkey_xfrm_state2msg(x, 0, hsc); 2462 out_skb = pfkey_xfrm_state2msg(x, 0, hsc);
2327 if (IS_ERR(out_skb)) 2463 if (IS_ERR(out_skb))
@@ -2340,6 +2476,44 @@ static int pfkey_send_notify(struct xfrm_state *x, int hard)
2340 return 0; 2476 return 0;
2341} 2477}
2342 2478
2479static int pfkey_send_notify(struct xfrm_state *x, struct km_event *c)
2480{
2481 switch (c->event) {
2482 case XFRM_MSG_EXPIRE:
2483 return key_notify_sa_expire(x, c);
2484 case XFRM_MSG_DELSA:
2485 case XFRM_MSG_NEWSA:
2486 case XFRM_MSG_UPDSA:
2487 return key_notify_sa(x, c);
2488 case XFRM_MSG_FLUSHSA:
2489 return key_notify_sa_flush(c);
2490 default:
2491 printk("pfkey: Unknown SA event %d\n", c->event);
2492 break;
2493 }
2494
2495 return 0;
2496}
2497
2498static int pfkey_send_policy_notify(struct xfrm_policy *xp, int dir, struct km_event *c)
2499{
2500 switch (c->event) {
2501 case XFRM_MSG_POLEXPIRE:
2502 return key_notify_policy_expire(xp, c);
2503 case XFRM_MSG_DELPOLICY:
2504 case XFRM_MSG_NEWPOLICY:
2505 case XFRM_MSG_UPDPOLICY:
2506 return key_notify_policy(xp, dir, c);
2507 case XFRM_MSG_FLUSHPOLICY:
2508 return key_notify_policy_flush(c);
2509 default:
2510 printk("pfkey: Unknown policy event %d\n", c->event);
2511 break;
2512 }
2513
2514 return 0;
2515}
2516
2343static u32 get_acqseq(void) 2517static u32 get_acqseq(void)
2344{ 2518{
2345 u32 res; 2519 u32 res;
@@ -2856,6 +3030,7 @@ static struct xfrm_mgr pfkeyv2_mgr =
2856 .acquire = pfkey_send_acquire, 3030 .acquire = pfkey_send_acquire,
2857 .compile_policy = pfkey_compile_policy, 3031 .compile_policy = pfkey_compile_policy,
2858 .new_mapping = pfkey_send_new_mapping, 3032 .new_mapping = pfkey_send_new_mapping,
3033 .notify_policy = pfkey_send_policy_notify,
2859}; 3034};
2860 3035
2861static void __exit ipsec_pfkey_exit(void) 3036static void __exit ipsec_pfkey_exit(void)
diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c
index e41ce458c2a9..70bcd4744d93 100644
--- a/net/netlink/af_netlink.c
+++ b/net/netlink/af_netlink.c
@@ -1095,8 +1095,7 @@ static int netlink_dump(struct sock *sk)
1095 return 0; 1095 return 0;
1096 } 1096 }
1097 1097
1098 nlh = __nlmsg_put(skb, NETLINK_CB(cb->skb).pid, cb->nlh->nlmsg_seq, NLMSG_DONE, sizeof(int)); 1098 nlh = NLMSG_NEW_ANSWER(skb, cb, NLMSG_DONE, sizeof(len), NLM_F_MULTI);
1099 nlh->nlmsg_flags |= NLM_F_MULTI;
1100 memcpy(NLMSG_DATA(nlh), &len, sizeof(len)); 1099 memcpy(NLMSG_DATA(nlh), &len, sizeof(len));
1101 skb_queue_tail(&sk->sk_receive_queue, skb); 1100 skb_queue_tail(&sk->sk_receive_queue, skb);
1102 sk->sk_data_ready(sk, skb->len); 1101 sk->sk_data_ready(sk, skb->len);
@@ -1107,6 +1106,9 @@ static int netlink_dump(struct sock *sk)
1107 1106
1108 netlink_destroy_callback(cb); 1107 netlink_destroy_callback(cb);
1109 return 0; 1108 return 0;
1109
1110nlmsg_failure:
1111 return -ENOBUFS;
1110} 1112}
1111 1113
1112int netlink_dump_start(struct sock *ssk, struct sk_buff *skb, 1114int netlink_dump_start(struct sock *ssk, struct sk_buff *skb,
@@ -1178,7 +1180,7 @@ void netlink_ack(struct sk_buff *in_skb, struct nlmsghdr *nlh, int err)
1178 } 1180 }
1179 1181
1180 rep = __nlmsg_put(skb, NETLINK_CB(in_skb).pid, nlh->nlmsg_seq, 1182 rep = __nlmsg_put(skb, NETLINK_CB(in_skb).pid, nlh->nlmsg_seq,
1181 NLMSG_ERROR, sizeof(struct nlmsgerr)); 1183 NLMSG_ERROR, sizeof(struct nlmsgerr), 0);
1182 errmsg = NLMSG_DATA(rep); 1184 errmsg = NLMSG_DATA(rep);
1183 errmsg->error = err; 1185 errmsg->error = err;
1184 memcpy(&errmsg->msg, nlh, err ? nlh->nlmsg_len : sizeof(struct nlmsghdr)); 1186 memcpy(&errmsg->msg, nlh, err ? nlh->nlmsg_len : sizeof(struct nlmsghdr));
diff --git a/net/sched/act_api.c b/net/sched/act_api.c
index 914c85ff8fe6..9594206e6035 100644
--- a/net/sched/act_api.c
+++ b/net/sched/act_api.c
@@ -428,15 +428,15 @@ errout:
428 428
429static int 429static int
430tca_get_fill(struct sk_buff *skb, struct tc_action *a, u32 pid, u32 seq, 430tca_get_fill(struct sk_buff *skb, struct tc_action *a, u32 pid, u32 seq,
431 unsigned flags, int event, int bind, int ref) 431 u16 flags, int event, int bind, int ref)
432{ 432{
433 struct tcamsg *t; 433 struct tcamsg *t;
434 struct nlmsghdr *nlh; 434 struct nlmsghdr *nlh;
435 unsigned char *b = skb->tail; 435 unsigned char *b = skb->tail;
436 struct rtattr *x; 436 struct rtattr *x;
437 437
438 nlh = NLMSG_PUT(skb, pid, seq, event, sizeof(*t)); 438 nlh = NLMSG_NEW(skb, pid, seq, event, sizeof(*t), flags);
439 nlh->nlmsg_flags = flags; 439
440 t = NLMSG_DATA(nlh); 440 t = NLMSG_DATA(nlh);
441 t->tca_family = AF_UNSPEC; 441 t->tca_family = AF_UNSPEC;
442 442
@@ -669,7 +669,7 @@ err:
669} 669}
670 670
671static int tcf_add_notify(struct tc_action *a, u32 pid, u32 seq, int event, 671static int tcf_add_notify(struct tc_action *a, u32 pid, u32 seq, int event,
672 unsigned flags) 672 u16 flags)
673{ 673{
674 struct tcamsg *t; 674 struct tcamsg *t;
675 struct nlmsghdr *nlh; 675 struct nlmsghdr *nlh;
@@ -684,8 +684,7 @@ static int tcf_add_notify(struct tc_action *a, u32 pid, u32 seq, int event,
684 684
685 b = (unsigned char *)skb->tail; 685 b = (unsigned char *)skb->tail;
686 686
687 nlh = NLMSG_PUT(skb, pid, seq, event, sizeof(*t)); 687 nlh = NLMSG_NEW(skb, pid, seq, event, sizeof(*t), flags);
688 nlh->nlmsg_flags = flags;
689 t = NLMSG_DATA(nlh); 688 t = NLMSG_DATA(nlh);
690 t->tca_family = AF_UNSPEC; 689 t->tca_family = AF_UNSPEC;
691 690
diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c
index 56e66c3fe0fa..1616bf5c9627 100644
--- a/net/sched/cls_api.c
+++ b/net/sched/cls_api.c
@@ -322,14 +322,13 @@ errout:
322 322
323static int 323static int
324tcf_fill_node(struct sk_buff *skb, struct tcf_proto *tp, unsigned long fh, 324tcf_fill_node(struct sk_buff *skb, struct tcf_proto *tp, unsigned long fh,
325 u32 pid, u32 seq, unsigned flags, int event) 325 u32 pid, u32 seq, u16 flags, int event)
326{ 326{
327 struct tcmsg *tcm; 327 struct tcmsg *tcm;
328 struct nlmsghdr *nlh; 328 struct nlmsghdr *nlh;
329 unsigned char *b = skb->tail; 329 unsigned char *b = skb->tail;
330 330
331 nlh = NLMSG_PUT(skb, pid, seq, event, sizeof(*tcm)); 331 nlh = NLMSG_NEW(skb, pid, seq, event, sizeof(*tcm), flags);
332 nlh->nlmsg_flags = flags;
333 tcm = NLMSG_DATA(nlh); 332 tcm = NLMSG_DATA(nlh);
334 tcm->tcm_family = AF_UNSPEC; 333 tcm->tcm_family = AF_UNSPEC;
335 tcm->tcm_ifindex = tp->q->dev->ifindex; 334 tcm->tcm_ifindex = tp->q->dev->ifindex;
diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c
index 07977f8f2679..97c1c75d5c78 100644
--- a/net/sched/sch_api.c
+++ b/net/sched/sch_api.c
@@ -760,15 +760,14 @@ graft:
760} 760}
761 761
762static int tc_fill_qdisc(struct sk_buff *skb, struct Qdisc *q, u32 clid, 762static int tc_fill_qdisc(struct sk_buff *skb, struct Qdisc *q, u32 clid,
763 u32 pid, u32 seq, unsigned flags, int event) 763 u32 pid, u32 seq, u16 flags, int event)
764{ 764{
765 struct tcmsg *tcm; 765 struct tcmsg *tcm;
766 struct nlmsghdr *nlh; 766 struct nlmsghdr *nlh;
767 unsigned char *b = skb->tail; 767 unsigned char *b = skb->tail;
768 struct gnet_dump d; 768 struct gnet_dump d;
769 769
770 nlh = NLMSG_PUT(skb, pid, seq, event, sizeof(*tcm)); 770 nlh = NLMSG_NEW(skb, pid, seq, event, sizeof(*tcm), flags);
771 nlh->nlmsg_flags = flags;
772 tcm = NLMSG_DATA(nlh); 771 tcm = NLMSG_DATA(nlh);
773 tcm->tcm_family = AF_UNSPEC; 772 tcm->tcm_family = AF_UNSPEC;
774 tcm->tcm_ifindex = q->dev->ifindex; 773 tcm->tcm_ifindex = q->dev->ifindex;
@@ -997,7 +996,7 @@ out:
997 996
998static int tc_fill_tclass(struct sk_buff *skb, struct Qdisc *q, 997static int tc_fill_tclass(struct sk_buff *skb, struct Qdisc *q,
999 unsigned long cl, 998 unsigned long cl,
1000 u32 pid, u32 seq, unsigned flags, int event) 999 u32 pid, u32 seq, u16 flags, int event)
1001{ 1000{
1002 struct tcmsg *tcm; 1001 struct tcmsg *tcm;
1003 struct nlmsghdr *nlh; 1002 struct nlmsghdr *nlh;
@@ -1005,8 +1004,7 @@ static int tc_fill_tclass(struct sk_buff *skb, struct Qdisc *q,
1005 struct gnet_dump d; 1004 struct gnet_dump d;
1006 struct Qdisc_class_ops *cl_ops = q->ops->cl_ops; 1005 struct Qdisc_class_ops *cl_ops = q->ops->cl_ops;
1007 1006
1008 nlh = NLMSG_PUT(skb, pid, seq, event, sizeof(*tcm)); 1007 nlh = NLMSG_NEW(skb, pid, seq, event, sizeof(*tcm), flags);
1009 nlh->nlmsg_flags = flags;
1010 tcm = NLMSG_DATA(nlh); 1008 tcm = NLMSG_DATA(nlh);
1011 tcm->tcm_family = AF_UNSPEC; 1009 tcm->tcm_family = AF_UNSPEC;
1012 tcm->tcm_ifindex = q->dev->ifindex; 1010 tcm->tcm_ifindex = q->dev->ifindex;
diff --git a/net/sched/sch_dsmark.c b/net/sched/sch_dsmark.c
index d8bd2a569c7c..13e0e7b3856b 100644
--- a/net/sched/sch_dsmark.c
+++ b/net/sched/sch_dsmark.c
@@ -31,7 +31,7 @@
31#endif 31#endif
32 32
33 33
34#define PRIV(sch) qdisc_priv(sch) 34#define PRIV(sch) ((struct dsmark_qdisc_data *) qdisc_priv(sch))
35 35
36 36
37/* 37/*
@@ -55,24 +55,38 @@
55struct dsmark_qdisc_data { 55struct dsmark_qdisc_data {
56 struct Qdisc *q; 56 struct Qdisc *q;
57 struct tcf_proto *filter_list; 57 struct tcf_proto *filter_list;
58 __u8 *mask; /* "owns" the array */ 58 u8 *mask; /* "owns" the array */
59 __u8 *value; 59 u8 *value;
60 __u16 indices; 60 u16 indices;
61 __u32 default_index; /* index range is 0...0xffff */ 61 u32 default_index; /* index range is 0...0xffff */
62 int set_tc_index; 62 int set_tc_index;
63}; 63};
64 64
65static inline int dsmark_valid_indices(u16 indices)
66{
67 while (indices != 1) {
68 if (indices & 1)
69 return 0;
70 indices >>= 1;
71 }
72
73 return 1;
74}
65 75
66/* ------------------------- Class/flow operations ------------------------- */ 76static inline int dsmark_valid_index(struct dsmark_qdisc_data *p, u16 index)
77{
78 return (index <= p->indices && index > 0);
79}
67 80
81/* ------------------------- Class/flow operations ------------------------- */
68 82
69static int dsmark_graft(struct Qdisc *sch,unsigned long arg, 83static int dsmark_graft(struct Qdisc *sch, unsigned long arg,
70 struct Qdisc *new,struct Qdisc **old) 84 struct Qdisc *new, struct Qdisc **old)
71{ 85{
72 struct dsmark_qdisc_data *p = PRIV(sch); 86 struct dsmark_qdisc_data *p = PRIV(sch);
73 87
74 DPRINTK("dsmark_graft(sch %p,[qdisc %p],new %p,old %p)\n",sch,p,new, 88 DPRINTK("dsmark_graft(sch %p,[qdisc %p],new %p,old %p)\n",
75 old); 89 sch, p, new, old);
76 90
77 if (new == NULL) { 91 if (new == NULL) {
78 new = qdisc_create_dflt(sch->dev, &pfifo_qdisc_ops); 92 new = qdisc_create_dflt(sch->dev, &pfifo_qdisc_ops);
@@ -81,91 +95,95 @@ static int dsmark_graft(struct Qdisc *sch,unsigned long arg,
81 } 95 }
82 96
83 sch_tree_lock(sch); 97 sch_tree_lock(sch);
84 *old = xchg(&p->q,new); 98 *old = xchg(&p->q, new);
85 if (*old) 99 qdisc_reset(*old);
86 qdisc_reset(*old);
87 sch->q.qlen = 0; 100 sch->q.qlen = 0;
88 sch_tree_unlock(sch); /* @@@ move up ? */ 101 sch_tree_unlock(sch);
102
89 return 0; 103 return 0;
90} 104}
91 105
92
93static struct Qdisc *dsmark_leaf(struct Qdisc *sch, unsigned long arg) 106static struct Qdisc *dsmark_leaf(struct Qdisc *sch, unsigned long arg)
94{ 107{
95 struct dsmark_qdisc_data *p = PRIV(sch); 108 return PRIV(sch)->q;
96
97 return p->q;
98} 109}
99 110
100 111static unsigned long dsmark_get(struct Qdisc *sch, u32 classid)
101static unsigned long dsmark_get(struct Qdisc *sch,u32 classid)
102{ 112{
103 struct dsmark_qdisc_data *p __attribute__((unused)) = PRIV(sch); 113 DPRINTK("dsmark_get(sch %p,[qdisc %p],classid %x)\n",
114 sch, PRIV(sch), classid);
104 115
105 DPRINTK("dsmark_get(sch %p,[qdisc %p],classid %x)\n",sch,p,classid); 116 return TC_H_MIN(classid) + 1;
106 return TC_H_MIN(classid)+1;
107} 117}
108 118
109
110static unsigned long dsmark_bind_filter(struct Qdisc *sch, 119static unsigned long dsmark_bind_filter(struct Qdisc *sch,
111 unsigned long parent, u32 classid) 120 unsigned long parent, u32 classid)
112{ 121{
113 return dsmark_get(sch,classid); 122 return dsmark_get(sch, classid);
114} 123}
115 124
116
117static void dsmark_put(struct Qdisc *sch, unsigned long cl) 125static void dsmark_put(struct Qdisc *sch, unsigned long cl)
118{ 126{
119} 127}
120 128
121
122static int dsmark_change(struct Qdisc *sch, u32 classid, u32 parent, 129static int dsmark_change(struct Qdisc *sch, u32 classid, u32 parent,
123 struct rtattr **tca, unsigned long *arg) 130 struct rtattr **tca, unsigned long *arg)
124{ 131{
125 struct dsmark_qdisc_data *p = PRIV(sch); 132 struct dsmark_qdisc_data *p = PRIV(sch);
126 struct rtattr *opt = tca[TCA_OPTIONS-1]; 133 struct rtattr *opt = tca[TCA_OPTIONS-1];
127 struct rtattr *tb[TCA_DSMARK_MAX]; 134 struct rtattr *tb[TCA_DSMARK_MAX];
135 int err = -EINVAL;
136 u8 mask = 0;
128 137
129 DPRINTK("dsmark_change(sch %p,[qdisc %p],classid %x,parent %x)," 138 DPRINTK("dsmark_change(sch %p,[qdisc %p],classid %x,parent %x),"
130 "arg 0x%lx\n",sch,p,classid,parent,*arg); 139 "arg 0x%lx\n", sch, p, classid, parent, *arg);
131 if (*arg > p->indices) 140
132 return -ENOENT; 141 if (!dsmark_valid_index(p, *arg)) {
133 if (!opt || rtattr_parse_nested(tb, TCA_DSMARK_MAX, opt)) 142 err = -ENOENT;
134 return -EINVAL; 143 goto rtattr_failure;
135 if (tb[TCA_DSMARK_MASK-1]) {
136 if (!RTA_PAYLOAD(tb[TCA_DSMARK_MASK-1]))
137 return -EINVAL;
138 p->mask[*arg-1] = *(__u8 *) RTA_DATA(tb[TCA_DSMARK_MASK-1]);
139 }
140 if (tb[TCA_DSMARK_VALUE-1]) {
141 if (!RTA_PAYLOAD(tb[TCA_DSMARK_VALUE-1]))
142 return -EINVAL;
143 p->value[*arg-1] = *(__u8 *) RTA_DATA(tb[TCA_DSMARK_VALUE-1]);
144 } 144 }
145 return 0;
146}
147 145
146 if (!opt || rtattr_parse_nested(tb, TCA_DSMARK_MAX, opt))
147 goto rtattr_failure;
148
149 if (tb[TCA_DSMARK_MASK-1])
150 mask = RTA_GET_U8(tb[TCA_DSMARK_MASK-1]);
151
152 if (tb[TCA_DSMARK_VALUE-1])
153 p->value[*arg-1] = RTA_GET_U8(tb[TCA_DSMARK_VALUE-1]);
154
155 if (tb[TCA_DSMARK_MASK-1])
156 p->mask[*arg-1] = mask;
157
158 err = 0;
148 159
149static int dsmark_delete(struct Qdisc *sch,unsigned long arg) 160rtattr_failure:
161 return err;
162}
163
164static int dsmark_delete(struct Qdisc *sch, unsigned long arg)
150{ 165{
151 struct dsmark_qdisc_data *p = PRIV(sch); 166 struct dsmark_qdisc_data *p = PRIV(sch);
152 167
153 if (!arg || arg > p->indices) 168 if (!dsmark_valid_index(p, arg))
154 return -EINVAL; 169 return -EINVAL;
170
155 p->mask[arg-1] = 0xff; 171 p->mask[arg-1] = 0xff;
156 p->value[arg-1] = 0; 172 p->value[arg-1] = 0;
173
157 return 0; 174 return 0;
158} 175}
159 176
160
161static void dsmark_walk(struct Qdisc *sch,struct qdisc_walker *walker) 177static void dsmark_walk(struct Qdisc *sch,struct qdisc_walker *walker)
162{ 178{
163 struct dsmark_qdisc_data *p = PRIV(sch); 179 struct dsmark_qdisc_data *p = PRIV(sch);
164 int i; 180 int i;
165 181
166 DPRINTK("dsmark_walk(sch %p,[qdisc %p],walker %p)\n",sch,p,walker); 182 DPRINTK("dsmark_walk(sch %p,[qdisc %p],walker %p)\n", sch, p, walker);
183
167 if (walker->stop) 184 if (walker->stop)
168 return; 185 return;
186
169 for (i = 0; i < p->indices; i++) { 187 for (i = 0; i < p->indices; i++) {
170 if (p->mask[i] == 0xff && !p->value[i]) 188 if (p->mask[i] == 0xff && !p->value[i])
171 goto ignore; 189 goto ignore;
@@ -180,26 +198,20 @@ ignore:
180 } 198 }
181} 199}
182 200
183
184static struct tcf_proto **dsmark_find_tcf(struct Qdisc *sch,unsigned long cl) 201static struct tcf_proto **dsmark_find_tcf(struct Qdisc *sch,unsigned long cl)
185{ 202{
186 struct dsmark_qdisc_data *p = PRIV(sch); 203 return &PRIV(sch)->filter_list;
187
188 return &p->filter_list;
189} 204}
190 205
191
192/* --------------------------- Qdisc operations ---------------------------- */ 206/* --------------------------- Qdisc operations ---------------------------- */
193 207
194
195static int dsmark_enqueue(struct sk_buff *skb,struct Qdisc *sch) 208static int dsmark_enqueue(struct sk_buff *skb,struct Qdisc *sch)
196{ 209{
197 struct dsmark_qdisc_data *p = PRIV(sch); 210 struct dsmark_qdisc_data *p = PRIV(sch);
198 struct tcf_result res; 211 int err;
199 int result; 212
200 int ret = NET_XMIT_POLICED; 213 D2PRINTK("dsmark_enqueue(skb %p,sch %p,[qdisc %p])\n", skb, sch, p);
201 214
202 D2PRINTK("dsmark_enqueue(skb %p,sch %p,[qdisc %p])\n",skb,sch,p);
203 if (p->set_tc_index) { 215 if (p->set_tc_index) {
204 /* FIXME: Safe with non-linear skbs? --RR */ 216 /* FIXME: Safe with non-linear skbs? --RR */
205 switch (skb->protocol) { 217 switch (skb->protocol) {
@@ -216,17 +228,21 @@ static int dsmark_enqueue(struct sk_buff *skb,struct Qdisc *sch)
216 break; 228 break;
217 }; 229 };
218 } 230 }
219 result = TC_POLICE_OK; /* be nice to gcc */ 231
220 if (TC_H_MAJ(skb->priority) == sch->handle) { 232 if (TC_H_MAJ(skb->priority) == sch->handle)
221 skb->tc_index = TC_H_MIN(skb->priority); 233 skb->tc_index = TC_H_MIN(skb->priority);
222 } else { 234 else {
223 result = tc_classify(skb,p->filter_list,&res); 235 struct tcf_result res;
224 D2PRINTK("result %d class 0x%04x\n",result,res.classid); 236 int result = tc_classify(skb, p->filter_list, &res);
237
238 D2PRINTK("result %d class 0x%04x\n", result, res.classid);
239
225 switch (result) { 240 switch (result) {
226#ifdef CONFIG_NET_CLS_POLICE 241#ifdef CONFIG_NET_CLS_POLICE
227 case TC_POLICE_SHOT: 242 case TC_POLICE_SHOT:
228 kfree_skb(skb); 243 kfree_skb(skb);
229 break; 244 sch->qstats.drops++;
245 return NET_XMIT_POLICED;
230#if 0 246#if 0
231 case TC_POLICE_RECLASSIFY: 247 case TC_POLICE_RECLASSIFY:
232 /* FIXME: what to do here ??? */ 248 /* FIXME: what to do here ??? */
@@ -243,43 +259,45 @@ static int dsmark_enqueue(struct sk_buff *skb,struct Qdisc *sch)
243 break; 259 break;
244 }; 260 };
245 } 261 }
246 if (
247#ifdef CONFIG_NET_CLS_POLICE
248 result == TC_POLICE_SHOT ||
249#endif
250 262
251 ((ret = p->q->enqueue(skb,p->q)) != 0)) { 263 err = p->q->enqueue(skb,p->q);
264 if (err != NET_XMIT_SUCCESS) {
252 sch->qstats.drops++; 265 sch->qstats.drops++;
253 return ret; 266 return err;
254 } 267 }
268
255 sch->bstats.bytes += skb->len; 269 sch->bstats.bytes += skb->len;
256 sch->bstats.packets++; 270 sch->bstats.packets++;
257 sch->q.qlen++; 271 sch->q.qlen++;
258 return ret;
259}
260 272
273 return NET_XMIT_SUCCESS;
274}
261 275
262static struct sk_buff *dsmark_dequeue(struct Qdisc *sch) 276static struct sk_buff *dsmark_dequeue(struct Qdisc *sch)
263{ 277{
264 struct dsmark_qdisc_data *p = PRIV(sch); 278 struct dsmark_qdisc_data *p = PRIV(sch);
265 struct sk_buff *skb; 279 struct sk_buff *skb;
266 int index; 280 u32 index;
281
282 D2PRINTK("dsmark_dequeue(sch %p,[qdisc %p])\n", sch, p);
267 283
268 D2PRINTK("dsmark_dequeue(sch %p,[qdisc %p])\n",sch,p);
269 skb = p->q->ops->dequeue(p->q); 284 skb = p->q->ops->dequeue(p->q);
270 if (!skb) 285 if (skb == NULL)
271 return NULL; 286 return NULL;
287
272 sch->q.qlen--; 288 sch->q.qlen--;
273 index = skb->tc_index & (p->indices-1); 289
274 D2PRINTK("index %d->%d\n",skb->tc_index,index); 290 index = skb->tc_index & (p->indices - 1);
291 D2PRINTK("index %d->%d\n", skb->tc_index, index);
292
275 switch (skb->protocol) { 293 switch (skb->protocol) {
276 case __constant_htons(ETH_P_IP): 294 case __constant_htons(ETH_P_IP):
277 ipv4_change_dsfield(skb->nh.iph, 295 ipv4_change_dsfield(skb->nh.iph, p->mask[index],
278 p->mask[index],p->value[index]); 296 p->value[index]);
279 break; 297 break;
280 case __constant_htons(ETH_P_IPV6): 298 case __constant_htons(ETH_P_IPV6):
281 ipv6_change_dsfield(skb->nh.ipv6h, 299 ipv6_change_dsfield(skb->nh.ipv6h, p->mask[index],
282 p->mask[index],p->value[index]); 300 p->value[index]);
283 break; 301 break;
284 default: 302 default:
285 /* 303 /*
@@ -293,152 +311,162 @@ static struct sk_buff *dsmark_dequeue(struct Qdisc *sch)
293 htons(skb->protocol)); 311 htons(skb->protocol));
294 break; 312 break;
295 }; 313 };
314
296 return skb; 315 return skb;
297} 316}
298 317
299
300static int dsmark_requeue(struct sk_buff *skb,struct Qdisc *sch) 318static int dsmark_requeue(struct sk_buff *skb,struct Qdisc *sch)
301{ 319{
302 int ret;
303 struct dsmark_qdisc_data *p = PRIV(sch); 320 struct dsmark_qdisc_data *p = PRIV(sch);
321 int err;
304 322
305 D2PRINTK("dsmark_requeue(skb %p,sch %p,[qdisc %p])\n",skb,sch,p); 323 D2PRINTK("dsmark_requeue(skb %p,sch %p,[qdisc %p])\n", skb, sch, p);
306 if ((ret = p->q->ops->requeue(skb, p->q)) == 0) { 324
307 sch->q.qlen++; 325 err = p->q->ops->requeue(skb, p->q);
308 sch->qstats.requeues++; 326 if (err != NET_XMIT_SUCCESS) {
309 return 0; 327 sch->qstats.drops++;
328 return err;
310 } 329 }
311 sch->qstats.drops++;
312 return ret;
313}
314 330
331 sch->q.qlen++;
332 sch->qstats.requeues++;
333
334 return NET_XMIT_SUCCESS;
335}
315 336
316static unsigned int dsmark_drop(struct Qdisc *sch) 337static unsigned int dsmark_drop(struct Qdisc *sch)
317{ 338{
318 struct dsmark_qdisc_data *p = PRIV(sch); 339 struct dsmark_qdisc_data *p = PRIV(sch);
319 unsigned int len; 340 unsigned int len;
320 341
321 DPRINTK("dsmark_reset(sch %p,[qdisc %p])\n",sch,p); 342 DPRINTK("dsmark_reset(sch %p,[qdisc %p])\n", sch, p);
322 if (!p->q->ops->drop) 343
323 return 0; 344 if (p->q->ops->drop == NULL)
324 if (!(len = p->q->ops->drop(p->q)))
325 return 0; 345 return 0;
326 sch->q.qlen--; 346
347 len = p->q->ops->drop(p->q);
348 if (len)
349 sch->q.qlen--;
350
327 return len; 351 return len;
328} 352}
329 353
330 354static int dsmark_init(struct Qdisc *sch, struct rtattr *opt)
331static int dsmark_init(struct Qdisc *sch,struct rtattr *opt)
332{ 355{
333 struct dsmark_qdisc_data *p = PRIV(sch); 356 struct dsmark_qdisc_data *p = PRIV(sch);
334 struct rtattr *tb[TCA_DSMARK_MAX]; 357 struct rtattr *tb[TCA_DSMARK_MAX];
335 __u16 tmp; 358 int err = -EINVAL;
336 359 u32 default_index = NO_DEFAULT_INDEX;
337 DPRINTK("dsmark_init(sch %p,[qdisc %p],opt %p)\n",sch,p,opt); 360 u16 indices;
338 if (!opt || 361 u8 *mask;
339 rtattr_parse(tb,TCA_DSMARK_MAX,RTA_DATA(opt),RTA_PAYLOAD(opt)) < 0 || 362
340 !tb[TCA_DSMARK_INDICES-1] || 363 DPRINTK("dsmark_init(sch %p,[qdisc %p],opt %p)\n", sch, p, opt);
341 RTA_PAYLOAD(tb[TCA_DSMARK_INDICES-1]) < sizeof(__u16)) 364
342 return -EINVAL; 365 if (!opt || rtattr_parse_nested(tb, TCA_DSMARK_MAX, opt) < 0)
343 p->indices = *(__u16 *) RTA_DATA(tb[TCA_DSMARK_INDICES-1]); 366 goto errout;
344 if (!p->indices) 367
345 return -EINVAL; 368 indices = RTA_GET_U16(tb[TCA_DSMARK_INDICES-1]);
346 for (tmp = p->indices; tmp != 1; tmp >>= 1) { 369 if (!indices || !dsmark_valid_indices(indices))
347 if (tmp & 1) 370 goto errout;
348 return -EINVAL; 371
349 } 372 if (tb[TCA_DSMARK_DEFAULT_INDEX-1])
350 p->default_index = NO_DEFAULT_INDEX; 373 default_index = RTA_GET_U16(tb[TCA_DSMARK_DEFAULT_INDEX-1]);
351 if (tb[TCA_DSMARK_DEFAULT_INDEX-1]) { 374
352 if (RTA_PAYLOAD(tb[TCA_DSMARK_DEFAULT_INDEX-1]) < sizeof(__u16)) 375 mask = kmalloc(indices * 2, GFP_KERNEL);
353 return -EINVAL; 376 if (mask == NULL) {
354 p->default_index = 377 err = -ENOMEM;
355 *(__u16 *) RTA_DATA(tb[TCA_DSMARK_DEFAULT_INDEX-1]); 378 goto errout;
356 } 379 }
357 p->set_tc_index = !!tb[TCA_DSMARK_SET_TC_INDEX-1]; 380
358 p->mask = kmalloc(p->indices*2,GFP_KERNEL); 381 p->mask = mask;
359 if (!p->mask) 382 memset(p->mask, 0xff, indices);
360 return -ENOMEM; 383
361 p->value = p->mask+p->indices; 384 p->value = p->mask + indices;
362 memset(p->mask,0xff,p->indices); 385 memset(p->value, 0, indices);
363 memset(p->value,0,p->indices); 386
364 if (!(p->q = qdisc_create_dflt(sch->dev, &pfifo_qdisc_ops))) 387 p->indices = indices;
388 p->default_index = default_index;
389 p->set_tc_index = RTA_GET_FLAG(tb[TCA_DSMARK_SET_TC_INDEX-1]);
390
391 p->q = qdisc_create_dflt(sch->dev, &pfifo_qdisc_ops);
392 if (p->q == NULL)
365 p->q = &noop_qdisc; 393 p->q = &noop_qdisc;
366 DPRINTK("dsmark_init: qdisc %p\n",&p->q);
367 return 0;
368}
369 394
395 DPRINTK("dsmark_init: qdisc %p\n", p->q);
396
397 err = 0;
398errout:
399rtattr_failure:
400 return err;
401}
370 402
371static void dsmark_reset(struct Qdisc *sch) 403static void dsmark_reset(struct Qdisc *sch)
372{ 404{
373 struct dsmark_qdisc_data *p = PRIV(sch); 405 struct dsmark_qdisc_data *p = PRIV(sch);
374 406
375 DPRINTK("dsmark_reset(sch %p,[qdisc %p])\n",sch,p); 407 DPRINTK("dsmark_reset(sch %p,[qdisc %p])\n", sch, p);
376 qdisc_reset(p->q); 408 qdisc_reset(p->q);
377 sch->q.qlen = 0; 409 sch->q.qlen = 0;
378} 410}
379 411
380
381static void dsmark_destroy(struct Qdisc *sch) 412static void dsmark_destroy(struct Qdisc *sch)
382{ 413{
383 struct dsmark_qdisc_data *p = PRIV(sch); 414 struct dsmark_qdisc_data *p = PRIV(sch);
384 struct tcf_proto *tp; 415 struct tcf_proto *tp;
385 416
386 DPRINTK("dsmark_destroy(sch %p,[qdisc %p])\n",sch,p); 417 DPRINTK("dsmark_destroy(sch %p,[qdisc %p])\n", sch, p);
418
387 while (p->filter_list) { 419 while (p->filter_list) {
388 tp = p->filter_list; 420 tp = p->filter_list;
389 p->filter_list = tp->next; 421 p->filter_list = tp->next;
390 tcf_destroy(tp); 422 tcf_destroy(tp);
391 } 423 }
424
392 qdisc_destroy(p->q); 425 qdisc_destroy(p->q);
393 kfree(p->mask); 426 kfree(p->mask);
394} 427}
395 428
396
397static int dsmark_dump_class(struct Qdisc *sch, unsigned long cl, 429static int dsmark_dump_class(struct Qdisc *sch, unsigned long cl,
398 struct sk_buff *skb, struct tcmsg *tcm) 430 struct sk_buff *skb, struct tcmsg *tcm)
399{ 431{
400 struct dsmark_qdisc_data *p = PRIV(sch); 432 struct dsmark_qdisc_data *p = PRIV(sch);
401 unsigned char *b = skb->tail; 433 struct rtattr *opts = NULL;
402 struct rtattr *rta; 434
435 DPRINTK("dsmark_dump_class(sch %p,[qdisc %p],class %ld\n", sch, p, cl);
403 436
404 DPRINTK("dsmark_dump_class(sch %p,[qdisc %p],class %ld\n",sch,p,cl); 437 if (!dsmark_valid_index(p, cl))
405 if (!cl || cl > p->indices)
406 return -EINVAL; 438 return -EINVAL;
407 tcm->tcm_handle = TC_H_MAKE(TC_H_MAJ(sch->handle),cl-1); 439
408 rta = (struct rtattr *) b; 440 tcm->tcm_handle = TC_H_MAKE(TC_H_MAJ(sch->handle), cl-1);
409 RTA_PUT(skb,TCA_OPTIONS,0,NULL); 441
410 RTA_PUT(skb,TCA_DSMARK_MASK,1,&p->mask[cl-1]); 442 opts = RTA_NEST(skb, TCA_OPTIONS);
411 RTA_PUT(skb,TCA_DSMARK_VALUE,1,&p->value[cl-1]); 443 RTA_PUT_U8(skb,TCA_DSMARK_MASK, p->mask[cl-1]);
412 rta->rta_len = skb->tail-b; 444 RTA_PUT_U8(skb,TCA_DSMARK_VALUE, p->value[cl-1]);
413 return skb->len; 445
446 return RTA_NEST_END(skb, opts);
414 447
415rtattr_failure: 448rtattr_failure:
416 skb_trim(skb,b-skb->data); 449 return RTA_NEST_CANCEL(skb, opts);
417 return -1;
418} 450}
419 451
420static int dsmark_dump(struct Qdisc *sch, struct sk_buff *skb) 452static int dsmark_dump(struct Qdisc *sch, struct sk_buff *skb)
421{ 453{
422 struct dsmark_qdisc_data *p = PRIV(sch); 454 struct dsmark_qdisc_data *p = PRIV(sch);
423 unsigned char *b = skb->tail; 455 struct rtattr *opts = NULL;
424 struct rtattr *rta;
425 456
426 rta = (struct rtattr *) b; 457 opts = RTA_NEST(skb, TCA_OPTIONS);
427 RTA_PUT(skb,TCA_OPTIONS,0,NULL); 458 RTA_PUT_U16(skb, TCA_DSMARK_INDICES, p->indices);
428 RTA_PUT(skb,TCA_DSMARK_INDICES,sizeof(__u16),&p->indices); 459
429 if (p->default_index != NO_DEFAULT_INDEX) { 460 if (p->default_index != NO_DEFAULT_INDEX)
430 __u16 tmp = p->default_index; 461 RTA_PUT_U16(skb, TCA_DSMARK_DEFAULT_INDEX, p->default_index);
431 462
432 RTA_PUT(skb,TCA_DSMARK_DEFAULT_INDEX, sizeof(__u16), &tmp);
433 }
434 if (p->set_tc_index) 463 if (p->set_tc_index)
435 RTA_PUT(skb, TCA_DSMARK_SET_TC_INDEX, 0, NULL); 464 RTA_PUT_FLAG(skb, TCA_DSMARK_SET_TC_INDEX);
436 rta->rta_len = skb->tail-b; 465
437 return skb->len; 466 return RTA_NEST_END(skb, opts);
438 467
439rtattr_failure: 468rtattr_failure:
440 skb_trim(skb,b-skb->data); 469 return RTA_NEST_CANCEL(skb, opts);
441 return -1;
442} 470}
443 471
444static struct Qdisc_class_ops dsmark_class_ops = { 472static struct Qdisc_class_ops dsmark_class_ops = {
@@ -476,10 +504,13 @@ static int __init dsmark_module_init(void)
476{ 504{
477 return register_qdisc(&dsmark_qdisc_ops); 505 return register_qdisc(&dsmark_qdisc_ops);
478} 506}
507
479static void __exit dsmark_module_exit(void) 508static void __exit dsmark_module_exit(void)
480{ 509{
481 unregister_qdisc(&dsmark_qdisc_ops); 510 unregister_qdisc(&dsmark_qdisc_ops);
482} 511}
512
483module_init(dsmark_module_init) 513module_init(dsmark_module_init)
484module_exit(dsmark_module_exit) 514module_exit(dsmark_module_exit)
515
485MODULE_LICENSE("GPL"); 516MODULE_LICENSE("GPL");
diff --git a/net/sched/sch_fifo.c b/net/sched/sch_fifo.c
index 4888305c96da..033083bf0e74 100644
--- a/net/sched/sch_fifo.c
+++ b/net/sched/sch_fifo.c
@@ -11,131 +11,38 @@
11 11
12#include <linux/config.h> 12#include <linux/config.h>
13#include <linux/module.h> 13#include <linux/module.h>
14#include <asm/uaccess.h>
15#include <asm/system.h>
16#include <linux/bitops.h>
17#include <linux/types.h> 14#include <linux/types.h>
18#include <linux/kernel.h> 15#include <linux/kernel.h>
19#include <linux/sched.h>
20#include <linux/string.h>
21#include <linux/mm.h>
22#include <linux/socket.h>
23#include <linux/sockios.h>
24#include <linux/in.h>
25#include <linux/errno.h> 16#include <linux/errno.h>
26#include <linux/interrupt.h>
27#include <linux/if_ether.h>
28#include <linux/inet.h>
29#include <linux/netdevice.h> 17#include <linux/netdevice.h>
30#include <linux/etherdevice.h>
31#include <linux/notifier.h>
32#include <net/ip.h>
33#include <net/route.h>
34#include <linux/skbuff.h> 18#include <linux/skbuff.h>
35#include <net/sock.h>
36#include <net/pkt_sched.h> 19#include <net/pkt_sched.h>
37 20
38/* 1 band FIFO pseudo-"scheduler" */ 21/* 1 band FIFO pseudo-"scheduler" */
39 22
40struct fifo_sched_data 23struct fifo_sched_data
41{ 24{
42 unsigned limit; 25 u32 limit;
43}; 26};
44 27
45static int 28static int bfifo_enqueue(struct sk_buff *skb, struct Qdisc* sch)
46bfifo_enqueue(struct sk_buff *skb, struct Qdisc* sch)
47{ 29{
48 struct fifo_sched_data *q = qdisc_priv(sch); 30 struct fifo_sched_data *q = qdisc_priv(sch);
49 31
50 if (sch->qstats.backlog + skb->len <= q->limit) { 32 if (likely(sch->qstats.backlog + skb->len <= q->limit))
51 __skb_queue_tail(&sch->q, skb); 33 return qdisc_enqueue_tail(skb, sch);
52 sch->qstats.backlog += skb->len;
53 sch->bstats.bytes += skb->len;
54 sch->bstats.packets++;
55 return 0;
56 }
57 sch->qstats.drops++;
58#ifdef CONFIG_NET_CLS_POLICE
59 if (sch->reshape_fail==NULL || sch->reshape_fail(skb, sch))
60#endif
61 kfree_skb(skb);
62 return NET_XMIT_DROP;
63}
64
65static int
66bfifo_requeue(struct sk_buff *skb, struct Qdisc* sch)
67{
68 __skb_queue_head(&sch->q, skb);
69 sch->qstats.backlog += skb->len;
70 sch->qstats.requeues++;
71 return 0;
72}
73
74static struct sk_buff *
75bfifo_dequeue(struct Qdisc* sch)
76{
77 struct sk_buff *skb;
78 34
79 skb = __skb_dequeue(&sch->q); 35 return qdisc_reshape_fail(skb, sch);
80 if (skb)
81 sch->qstats.backlog -= skb->len;
82 return skb;
83} 36}
84 37
85static unsigned int 38static int pfifo_enqueue(struct sk_buff *skb, struct Qdisc* sch)
86fifo_drop(struct Qdisc* sch)
87{
88 struct sk_buff *skb;
89
90 skb = __skb_dequeue_tail(&sch->q);
91 if (skb) {
92 unsigned int len = skb->len;
93 sch->qstats.backlog -= len;
94 kfree_skb(skb);
95 return len;
96 }
97 return 0;
98}
99
100static void
101fifo_reset(struct Qdisc* sch)
102{
103 skb_queue_purge(&sch->q);
104 sch->qstats.backlog = 0;
105}
106
107static int
108pfifo_enqueue(struct sk_buff *skb, struct Qdisc* sch)
109{ 39{
110 struct fifo_sched_data *q = qdisc_priv(sch); 40 struct fifo_sched_data *q = qdisc_priv(sch);
111 41
112 if (sch->q.qlen < q->limit) { 42 if (likely(skb_queue_len(&sch->q) < q->limit))
113 __skb_queue_tail(&sch->q, skb); 43 return qdisc_enqueue_tail(skb, sch);
114 sch->bstats.bytes += skb->len;
115 sch->bstats.packets++;
116 return 0;
117 }
118 sch->qstats.drops++;
119#ifdef CONFIG_NET_CLS_POLICE
120 if (sch->reshape_fail==NULL || sch->reshape_fail(skb, sch))
121#endif
122 kfree_skb(skb);
123 return NET_XMIT_DROP;
124}
125
126static int
127pfifo_requeue(struct sk_buff *skb, struct Qdisc* sch)
128{
129 __skb_queue_head(&sch->q, skb);
130 sch->qstats.requeues++;
131 return 0;
132}
133
134 44
135static struct sk_buff * 45 return qdisc_reshape_fail(skb, sch);
136pfifo_dequeue(struct Qdisc* sch)
137{
138 return __skb_dequeue(&sch->q);
139} 46}
140 47
141static int fifo_init(struct Qdisc *sch, struct rtattr *opt) 48static int fifo_init(struct Qdisc *sch, struct rtattr *opt)
@@ -143,66 +50,59 @@ static int fifo_init(struct Qdisc *sch, struct rtattr *opt)
143 struct fifo_sched_data *q = qdisc_priv(sch); 50 struct fifo_sched_data *q = qdisc_priv(sch);
144 51
145 if (opt == NULL) { 52 if (opt == NULL) {
146 unsigned int limit = sch->dev->tx_queue_len ? : 1; 53 u32 limit = sch->dev->tx_queue_len ? : 1;
147 54
148 if (sch->ops == &bfifo_qdisc_ops) 55 if (sch->ops == &bfifo_qdisc_ops)
149 q->limit = limit*sch->dev->mtu; 56 limit *= sch->dev->mtu;
150 else 57
151 q->limit = limit; 58 q->limit = limit;
152 } else { 59 } else {
153 struct tc_fifo_qopt *ctl = RTA_DATA(opt); 60 struct tc_fifo_qopt *ctl = RTA_DATA(opt);
154 if (opt->rta_len < RTA_LENGTH(sizeof(*ctl))) 61
62 if (RTA_PAYLOAD(opt) < sizeof(*ctl))
155 return -EINVAL; 63 return -EINVAL;
64
156 q->limit = ctl->limit; 65 q->limit = ctl->limit;
157 } 66 }
67
158 return 0; 68 return 0;
159} 69}
160 70
161static int fifo_dump(struct Qdisc *sch, struct sk_buff *skb) 71static int fifo_dump(struct Qdisc *sch, struct sk_buff *skb)
162{ 72{
163 struct fifo_sched_data *q = qdisc_priv(sch); 73 struct fifo_sched_data *q = qdisc_priv(sch);
164 unsigned char *b = skb->tail; 74 struct tc_fifo_qopt opt = { .limit = q->limit };
165 struct tc_fifo_qopt opt;
166 75
167 opt.limit = q->limit;
168 RTA_PUT(skb, TCA_OPTIONS, sizeof(opt), &opt); 76 RTA_PUT(skb, TCA_OPTIONS, sizeof(opt), &opt);
169
170 return skb->len; 77 return skb->len;
171 78
172rtattr_failure: 79rtattr_failure:
173 skb_trim(skb, b - skb->data);
174 return -1; 80 return -1;
175} 81}
176 82
177struct Qdisc_ops pfifo_qdisc_ops = { 83struct Qdisc_ops pfifo_qdisc_ops = {
178 .next = NULL,
179 .cl_ops = NULL,
180 .id = "pfifo", 84 .id = "pfifo",
181 .priv_size = sizeof(struct fifo_sched_data), 85 .priv_size = sizeof(struct fifo_sched_data),
182 .enqueue = pfifo_enqueue, 86 .enqueue = pfifo_enqueue,
183 .dequeue = pfifo_dequeue, 87 .dequeue = qdisc_dequeue_head,
184 .requeue = pfifo_requeue, 88 .requeue = qdisc_requeue,
185 .drop = fifo_drop, 89 .drop = qdisc_queue_drop,
186 .init = fifo_init, 90 .init = fifo_init,
187 .reset = fifo_reset, 91 .reset = qdisc_reset_queue,
188 .destroy = NULL,
189 .change = fifo_init, 92 .change = fifo_init,
190 .dump = fifo_dump, 93 .dump = fifo_dump,
191 .owner = THIS_MODULE, 94 .owner = THIS_MODULE,
192}; 95};
193 96
194struct Qdisc_ops bfifo_qdisc_ops = { 97struct Qdisc_ops bfifo_qdisc_ops = {
195 .next = NULL,
196 .cl_ops = NULL,
197 .id = "bfifo", 98 .id = "bfifo",
198 .priv_size = sizeof(struct fifo_sched_data), 99 .priv_size = sizeof(struct fifo_sched_data),
199 .enqueue = bfifo_enqueue, 100 .enqueue = bfifo_enqueue,
200 .dequeue = bfifo_dequeue, 101 .dequeue = qdisc_dequeue_head,
201 .requeue = bfifo_requeue, 102 .requeue = qdisc_requeue,
202 .drop = fifo_drop, 103 .drop = qdisc_queue_drop,
203 .init = fifo_init, 104 .init = fifo_init,
204 .reset = fifo_reset, 105 .reset = qdisc_reset_queue,
205 .destroy = NULL,
206 .change = fifo_init, 106 .change = fifo_init,
207 .dump = fifo_dump, 107 .dump = fifo_dump,
208 .owner = THIS_MODULE, 108 .owner = THIS_MODULE,
diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c
index 87e48a4e1051..7683b34dc6a9 100644
--- a/net/sched/sch_generic.c
+++ b/net/sched/sch_generic.c
@@ -243,31 +243,27 @@ static void dev_watchdog_down(struct net_device *dev)
243 cheaper. 243 cheaper.
244 */ 244 */
245 245
246static int 246static int noop_enqueue(struct sk_buff *skb, struct Qdisc * qdisc)
247noop_enqueue(struct sk_buff *skb, struct Qdisc * qdisc)
248{ 247{
249 kfree_skb(skb); 248 kfree_skb(skb);
250 return NET_XMIT_CN; 249 return NET_XMIT_CN;
251} 250}
252 251
253static struct sk_buff * 252static struct sk_buff *noop_dequeue(struct Qdisc * qdisc)
254noop_dequeue(struct Qdisc * qdisc)
255{ 253{
256 return NULL; 254 return NULL;
257} 255}
258 256
259static int 257static int noop_requeue(struct sk_buff *skb, struct Qdisc* qdisc)
260noop_requeue(struct sk_buff *skb, struct Qdisc* qdisc)
261{ 258{
262 if (net_ratelimit()) 259 if (net_ratelimit())
263 printk(KERN_DEBUG "%s deferred output. It is buggy.\n", skb->dev->name); 260 printk(KERN_DEBUG "%s deferred output. It is buggy.\n",
261 skb->dev->name);
264 kfree_skb(skb); 262 kfree_skb(skb);
265 return NET_XMIT_CN; 263 return NET_XMIT_CN;
266} 264}
267 265
268struct Qdisc_ops noop_qdisc_ops = { 266struct Qdisc_ops noop_qdisc_ops = {
269 .next = NULL,
270 .cl_ops = NULL,
271 .id = "noop", 267 .id = "noop",
272 .priv_size = 0, 268 .priv_size = 0,
273 .enqueue = noop_enqueue, 269 .enqueue = noop_enqueue,
@@ -285,8 +281,6 @@ struct Qdisc noop_qdisc = {
285}; 281};
286 282
287static struct Qdisc_ops noqueue_qdisc_ops = { 283static struct Qdisc_ops noqueue_qdisc_ops = {
288 .next = NULL,
289 .cl_ops = NULL,
290 .id = "noqueue", 284 .id = "noqueue",
291 .priv_size = 0, 285 .priv_size = 0,
292 .enqueue = noop_enqueue, 286 .enqueue = noop_enqueue,
@@ -311,97 +305,87 @@ static const u8 prio2band[TC_PRIO_MAX+1] =
311 generic prio+fifo combination. 305 generic prio+fifo combination.
312 */ 306 */
313 307
314static int 308#define PFIFO_FAST_BANDS 3
315pfifo_fast_enqueue(struct sk_buff *skb, struct Qdisc* qdisc) 309
310static inline struct sk_buff_head *prio2list(struct sk_buff *skb,
311 struct Qdisc *qdisc)
316{ 312{
317 struct sk_buff_head *list = qdisc_priv(qdisc); 313 struct sk_buff_head *list = qdisc_priv(qdisc);
314 return list + prio2band[skb->priority & TC_PRIO_MAX];
315}
318 316
319 list += prio2band[skb->priority&TC_PRIO_MAX]; 317static int pfifo_fast_enqueue(struct sk_buff *skb, struct Qdisc* qdisc)
318{
319 struct sk_buff_head *list = prio2list(skb, qdisc);
320 320
321 if (list->qlen < qdisc->dev->tx_queue_len) { 321 if (skb_queue_len(list) < qdisc->dev->tx_queue_len) {
322 __skb_queue_tail(list, skb);
323 qdisc->q.qlen++; 322 qdisc->q.qlen++;
324 qdisc->bstats.bytes += skb->len; 323 return __qdisc_enqueue_tail(skb, qdisc, list);
325 qdisc->bstats.packets++;
326 return 0;
327 } 324 }
328 qdisc->qstats.drops++; 325
329 kfree_skb(skb); 326 return qdisc_drop(skb, qdisc);
330 return NET_XMIT_DROP;
331} 327}
332 328
333static struct sk_buff * 329static struct sk_buff *pfifo_fast_dequeue(struct Qdisc* qdisc)
334pfifo_fast_dequeue(struct Qdisc* qdisc)
335{ 330{
336 int prio; 331 int prio;
337 struct sk_buff_head *list = qdisc_priv(qdisc); 332 struct sk_buff_head *list = qdisc_priv(qdisc);
338 struct sk_buff *skb;
339 333
340 for (prio = 0; prio < 3; prio++, list++) { 334 for (prio = 0; prio < PFIFO_FAST_BANDS; prio++, list++) {
341 skb = __skb_dequeue(list); 335 struct sk_buff *skb = __qdisc_dequeue_head(qdisc, list);
342 if (skb) { 336 if (skb) {
343 qdisc->q.qlen--; 337 qdisc->q.qlen--;
344 return skb; 338 return skb;
345 } 339 }
346 } 340 }
341
347 return NULL; 342 return NULL;
348} 343}
349 344
350static int 345static int pfifo_fast_requeue(struct sk_buff *skb, struct Qdisc* qdisc)
351pfifo_fast_requeue(struct sk_buff *skb, struct Qdisc* qdisc)
352{ 346{
353 struct sk_buff_head *list = qdisc_priv(qdisc);
354
355 list += prio2band[skb->priority&TC_PRIO_MAX];
356
357 __skb_queue_head(list, skb);
358 qdisc->q.qlen++; 347 qdisc->q.qlen++;
359 qdisc->qstats.requeues++; 348 return __qdisc_requeue(skb, qdisc, prio2list(skb, qdisc));
360 return 0;
361} 349}
362 350
363static void 351static void pfifo_fast_reset(struct Qdisc* qdisc)
364pfifo_fast_reset(struct Qdisc* qdisc)
365{ 352{
366 int prio; 353 int prio;
367 struct sk_buff_head *list = qdisc_priv(qdisc); 354 struct sk_buff_head *list = qdisc_priv(qdisc);
368 355
369 for (prio=0; prio < 3; prio++) 356 for (prio = 0; prio < PFIFO_FAST_BANDS; prio++)
370 skb_queue_purge(list+prio); 357 __qdisc_reset_queue(qdisc, list + prio);
358
359 qdisc->qstats.backlog = 0;
371 qdisc->q.qlen = 0; 360 qdisc->q.qlen = 0;
372} 361}
373 362
374static int pfifo_fast_dump(struct Qdisc *qdisc, struct sk_buff *skb) 363static int pfifo_fast_dump(struct Qdisc *qdisc, struct sk_buff *skb)
375{ 364{
376 unsigned char *b = skb->tail; 365 struct tc_prio_qopt opt = { .bands = PFIFO_FAST_BANDS };
377 struct tc_prio_qopt opt;
378 366
379 opt.bands = 3;
380 memcpy(&opt.priomap, prio2band, TC_PRIO_MAX+1); 367 memcpy(&opt.priomap, prio2band, TC_PRIO_MAX+1);
381 RTA_PUT(skb, TCA_OPTIONS, sizeof(opt), &opt); 368 RTA_PUT(skb, TCA_OPTIONS, sizeof(opt), &opt);
382 return skb->len; 369 return skb->len;
383 370
384rtattr_failure: 371rtattr_failure:
385 skb_trim(skb, b - skb->data);
386 return -1; 372 return -1;
387} 373}
388 374
389static int pfifo_fast_init(struct Qdisc *qdisc, struct rtattr *opt) 375static int pfifo_fast_init(struct Qdisc *qdisc, struct rtattr *opt)
390{ 376{
391 int i; 377 int prio;
392 struct sk_buff_head *list = qdisc_priv(qdisc); 378 struct sk_buff_head *list = qdisc_priv(qdisc);
393 379
394 for (i=0; i<3; i++) 380 for (prio = 0; prio < PFIFO_FAST_BANDS; prio++)
395 skb_queue_head_init(list+i); 381 skb_queue_head_init(list + prio);
396 382
397 return 0; 383 return 0;
398} 384}
399 385
400static struct Qdisc_ops pfifo_fast_ops = { 386static struct Qdisc_ops pfifo_fast_ops = {
401 .next = NULL,
402 .cl_ops = NULL,
403 .id = "pfifo_fast", 387 .id = "pfifo_fast",
404 .priv_size = 3 * sizeof(struct sk_buff_head), 388 .priv_size = PFIFO_FAST_BANDS * sizeof(struct sk_buff_head),
405 .enqueue = pfifo_fast_enqueue, 389 .enqueue = pfifo_fast_enqueue,
406 .dequeue = pfifo_fast_dequeue, 390 .dequeue = pfifo_fast_dequeue,
407 .requeue = pfifo_fast_requeue, 391 .requeue = pfifo_fast_requeue,
diff --git a/net/sctp/socket.c b/net/sctp/socket.c
index 2a3c0e08a090..e6926cb19420 100644
--- a/net/sctp/socket.c
+++ b/net/sctp/socket.c
@@ -4368,15 +4368,11 @@ static struct sk_buff *sctp_skb_recv_datagram(struct sock *sk, int flags,
4368 * However, this function was corrent in any case. 8) 4368 * However, this function was corrent in any case. 8)
4369 */ 4369 */
4370 if (flags & MSG_PEEK) { 4370 if (flags & MSG_PEEK) {
4371 unsigned long cpu_flags; 4371 spin_lock_bh(&sk->sk_receive_queue.lock);
4372
4373 sctp_spin_lock_irqsave(&sk->sk_receive_queue.lock,
4374 cpu_flags);
4375 skb = skb_peek(&sk->sk_receive_queue); 4372 skb = skb_peek(&sk->sk_receive_queue);
4376 if (skb) 4373 if (skb)
4377 atomic_inc(&skb->users); 4374 atomic_inc(&skb->users);
4378 sctp_spin_unlock_irqrestore(&sk->sk_receive_queue.lock, 4375 spin_unlock_bh(&sk->sk_receive_queue.lock);
4379 cpu_flags);
4380 } else { 4376 } else {
4381 skb = skb_dequeue(&sk->sk_receive_queue); 4377 skb = skb_dequeue(&sk->sk_receive_queue);
4382 } 4378 }
diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c
index d07f5ce31824..0a4260719a12 100644
--- a/net/xfrm/xfrm_policy.c
+++ b/net/xfrm/xfrm_policy.c
@@ -216,8 +216,8 @@ out:
216 216
217expired: 217expired:
218 read_unlock(&xp->lock); 218 read_unlock(&xp->lock);
219 km_policy_expired(xp, dir, 1); 219 if (!xfrm_policy_delete(xp, dir))
220 xfrm_policy_delete(xp, dir); 220 km_policy_expired(xp, dir, 1);
221 xfrm_pol_put(xp); 221 xfrm_pol_put(xp);
222} 222}
223 223
@@ -555,7 +555,7 @@ static struct xfrm_policy *__xfrm_policy_unlink(struct xfrm_policy *pol,
555 return NULL; 555 return NULL;
556} 556}
557 557
558void xfrm_policy_delete(struct xfrm_policy *pol, int dir) 558int xfrm_policy_delete(struct xfrm_policy *pol, int dir)
559{ 559{
560 write_lock_bh(&xfrm_policy_lock); 560 write_lock_bh(&xfrm_policy_lock);
561 pol = __xfrm_policy_unlink(pol, dir); 561 pol = __xfrm_policy_unlink(pol, dir);
@@ -564,7 +564,9 @@ void xfrm_policy_delete(struct xfrm_policy *pol, int dir)
564 if (dir < XFRM_POLICY_MAX) 564 if (dir < XFRM_POLICY_MAX)
565 atomic_inc(&flow_cache_genid); 565 atomic_inc(&flow_cache_genid);
566 xfrm_policy_kill(pol); 566 xfrm_policy_kill(pol);
567 return 0;
567 } 568 }
569 return -ENOENT;
568} 570}
569 571
570int xfrm_sk_policy_insert(struct sock *sk, int dir, struct xfrm_policy *pol) 572int xfrm_sk_policy_insert(struct sock *sk, int dir, struct xfrm_policy *pol)
diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c
index d11747c2a763..2537f26f097c 100644
--- a/net/xfrm/xfrm_state.c
+++ b/net/xfrm/xfrm_state.c
@@ -50,7 +50,7 @@ static DEFINE_SPINLOCK(xfrm_state_gc_lock);
50 50
51static int xfrm_state_gc_flush_bundles; 51static int xfrm_state_gc_flush_bundles;
52 52
53static void __xfrm_state_delete(struct xfrm_state *x); 53static int __xfrm_state_delete(struct xfrm_state *x);
54 54
55static struct xfrm_state_afinfo *xfrm_state_get_afinfo(unsigned short family); 55static struct xfrm_state_afinfo *xfrm_state_get_afinfo(unsigned short family);
56static void xfrm_state_put_afinfo(struct xfrm_state_afinfo *afinfo); 56static void xfrm_state_put_afinfo(struct xfrm_state_afinfo *afinfo);
@@ -154,6 +154,7 @@ static void xfrm_timer_handler(unsigned long data)
154 next = tmo; 154 next = tmo;
155 } 155 }
156 156
157 x->km.dying = warn;
157 if (warn) 158 if (warn)
158 km_state_expired(x, 0); 159 km_state_expired(x, 0);
159resched: 160resched:
@@ -169,9 +170,8 @@ expired:
169 next = 2; 170 next = 2;
170 goto resched; 171 goto resched;
171 } 172 }
172 if (x->id.spi != 0) 173 if (!__xfrm_state_delete(x) && x->id.spi)
173 km_state_expired(x, 1); 174 km_state_expired(x, 1);
174 __xfrm_state_delete(x);
175 175
176out: 176out:
177 spin_unlock(&x->lock); 177 spin_unlock(&x->lock);
@@ -215,8 +215,10 @@ void __xfrm_state_destroy(struct xfrm_state *x)
215} 215}
216EXPORT_SYMBOL(__xfrm_state_destroy); 216EXPORT_SYMBOL(__xfrm_state_destroy);
217 217
218static void __xfrm_state_delete(struct xfrm_state *x) 218static int __xfrm_state_delete(struct xfrm_state *x)
219{ 219{
220 int err = -ESRCH;
221
220 if (x->km.state != XFRM_STATE_DEAD) { 222 if (x->km.state != XFRM_STATE_DEAD) {
221 x->km.state = XFRM_STATE_DEAD; 223 x->km.state = XFRM_STATE_DEAD;
222 spin_lock(&xfrm_state_lock); 224 spin_lock(&xfrm_state_lock);
@@ -245,14 +247,21 @@ static void __xfrm_state_delete(struct xfrm_state *x)
245 * is what we are dropping here. 247 * is what we are dropping here.
246 */ 248 */
247 atomic_dec(&x->refcnt); 249 atomic_dec(&x->refcnt);
250 err = 0;
248 } 251 }
252
253 return err;
249} 254}
250 255
251void xfrm_state_delete(struct xfrm_state *x) 256int xfrm_state_delete(struct xfrm_state *x)
252{ 257{
258 int err;
259
253 spin_lock_bh(&x->lock); 260 spin_lock_bh(&x->lock);
254 __xfrm_state_delete(x); 261 err = __xfrm_state_delete(x);
255 spin_unlock_bh(&x->lock); 262 spin_unlock_bh(&x->lock);
263
264 return err;
256} 265}
257EXPORT_SYMBOL(xfrm_state_delete); 266EXPORT_SYMBOL(xfrm_state_delete);
258 267
@@ -557,16 +566,18 @@ int xfrm_state_check_expire(struct xfrm_state *x)
557 566
558 if (x->curlft.bytes >= x->lft.hard_byte_limit || 567 if (x->curlft.bytes >= x->lft.hard_byte_limit ||
559 x->curlft.packets >= x->lft.hard_packet_limit) { 568 x->curlft.packets >= x->lft.hard_packet_limit) {
560 km_state_expired(x, 1); 569 x->km.state = XFRM_STATE_EXPIRED;
561 if (!mod_timer(&x->timer, jiffies + XFRM_ACQ_EXPIRES*HZ)) 570 if (!mod_timer(&x->timer, jiffies))
562 xfrm_state_hold(x); 571 xfrm_state_hold(x);
563 return -EINVAL; 572 return -EINVAL;
564 } 573 }
565 574
566 if (!x->km.dying && 575 if (!x->km.dying &&
567 (x->curlft.bytes >= x->lft.soft_byte_limit || 576 (x->curlft.bytes >= x->lft.soft_byte_limit ||
568 x->curlft.packets >= x->lft.soft_packet_limit)) 577 x->curlft.packets >= x->lft.soft_packet_limit)) {
578 x->km.dying = 1;
569 km_state_expired(x, 0); 579 km_state_expired(x, 0);
580 }
570 return 0; 581 return 0;
571} 582}
572EXPORT_SYMBOL(xfrm_state_check_expire); 583EXPORT_SYMBOL(xfrm_state_check_expire);
@@ -796,34 +807,56 @@ EXPORT_SYMBOL(xfrm_replay_advance);
796static struct list_head xfrm_km_list = LIST_HEAD_INIT(xfrm_km_list); 807static struct list_head xfrm_km_list = LIST_HEAD_INIT(xfrm_km_list);
797static DEFINE_RWLOCK(xfrm_km_lock); 808static DEFINE_RWLOCK(xfrm_km_lock);
798 809
799static void km_state_expired(struct xfrm_state *x, int hard) 810void km_policy_notify(struct xfrm_policy *xp, int dir, struct km_event *c)
800{ 811{
801 struct xfrm_mgr *km; 812 struct xfrm_mgr *km;
802 813
803 if (hard) 814 read_lock(&xfrm_km_lock);
804 x->km.state = XFRM_STATE_EXPIRED; 815 list_for_each_entry(km, &xfrm_km_list, list)
805 else 816 if (km->notify_policy)
806 x->km.dying = 1; 817 km->notify_policy(xp, dir, c);
818 read_unlock(&xfrm_km_lock);
819}
807 820
821void km_state_notify(struct xfrm_state *x, struct km_event *c)
822{
823 struct xfrm_mgr *km;
808 read_lock(&xfrm_km_lock); 824 read_lock(&xfrm_km_lock);
809 list_for_each_entry(km, &xfrm_km_list, list) 825 list_for_each_entry(km, &xfrm_km_list, list)
810 km->notify(x, hard); 826 if (km->notify)
827 km->notify(x, c);
811 read_unlock(&xfrm_km_lock); 828 read_unlock(&xfrm_km_lock);
829}
830
831EXPORT_SYMBOL(km_policy_notify);
832EXPORT_SYMBOL(km_state_notify);
833
834static void km_state_expired(struct xfrm_state *x, int hard)
835{
836 struct km_event c;
837
838 c.data.hard = hard;
839 c.event = XFRM_MSG_EXPIRE;
840 km_state_notify(x, &c);
812 841
813 if (hard) 842 if (hard)
814 wake_up(&km_waitq); 843 wake_up(&km_waitq);
815} 844}
816 845
846/*
847 * We send to all registered managers regardless of failure
848 * We are happy with one success
849*/
817static int km_query(struct xfrm_state *x, struct xfrm_tmpl *t, struct xfrm_policy *pol) 850static int km_query(struct xfrm_state *x, struct xfrm_tmpl *t, struct xfrm_policy *pol)
818{ 851{
819 int err = -EINVAL; 852 int err = -EINVAL, acqret;
820 struct xfrm_mgr *km; 853 struct xfrm_mgr *km;
821 854
822 read_lock(&xfrm_km_lock); 855 read_lock(&xfrm_km_lock);
823 list_for_each_entry(km, &xfrm_km_list, list) { 856 list_for_each_entry(km, &xfrm_km_list, list) {
824 err = km->acquire(x, t, pol, XFRM_POLICY_OUT); 857 acqret = km->acquire(x, t, pol, XFRM_POLICY_OUT);
825 if (!err) 858 if (!acqret)
826 break; 859 err = acqret;
827 } 860 }
828 read_unlock(&xfrm_km_lock); 861 read_unlock(&xfrm_km_lock);
829 return err; 862 return err;
@@ -848,13 +881,11 @@ EXPORT_SYMBOL(km_new_mapping);
848 881
849void km_policy_expired(struct xfrm_policy *pol, int dir, int hard) 882void km_policy_expired(struct xfrm_policy *pol, int dir, int hard)
850{ 883{
851 struct xfrm_mgr *km; 884 struct km_event c;
852 885
853 read_lock(&xfrm_km_lock); 886 c.data.hard = hard;
854 list_for_each_entry(km, &xfrm_km_list, list) 887 c.event = XFRM_MSG_POLEXPIRE;
855 if (km->notify_policy) 888 km_policy_notify(pol, dir, &c);
856 km->notify_policy(pol, dir, hard);
857 read_unlock(&xfrm_km_lock);
858 889
859 if (hard) 890 if (hard)
860 wake_up(&km_waitq); 891 wake_up(&km_waitq);
diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c
index 97509011c274..5ce8558eac91 100644
--- a/net/xfrm/xfrm_user.c
+++ b/net/xfrm/xfrm_user.c
@@ -277,6 +277,7 @@ static int xfrm_add_sa(struct sk_buff *skb, struct nlmsghdr *nlh, void **xfrma)
277 struct xfrm_usersa_info *p = NLMSG_DATA(nlh); 277 struct xfrm_usersa_info *p = NLMSG_DATA(nlh);
278 struct xfrm_state *x; 278 struct xfrm_state *x;
279 int err; 279 int err;
280 struct km_event c;
280 281
281 err = verify_newsa_info(p, (struct rtattr **) xfrma); 282 err = verify_newsa_info(p, (struct rtattr **) xfrma);
282 if (err) 283 if (err)
@@ -286,6 +287,7 @@ static int xfrm_add_sa(struct sk_buff *skb, struct nlmsghdr *nlh, void **xfrma)
286 if (!x) 287 if (!x)
287 return err; 288 return err;
288 289
290 xfrm_state_hold(x);
289 if (nlh->nlmsg_type == XFRM_MSG_NEWSA) 291 if (nlh->nlmsg_type == XFRM_MSG_NEWSA)
290 err = xfrm_state_add(x); 292 err = xfrm_state_add(x);
291 else 293 else
@@ -294,14 +296,24 @@ static int xfrm_add_sa(struct sk_buff *skb, struct nlmsghdr *nlh, void **xfrma)
294 if (err < 0) { 296 if (err < 0) {
295 x->km.state = XFRM_STATE_DEAD; 297 x->km.state = XFRM_STATE_DEAD;
296 xfrm_state_put(x); 298 xfrm_state_put(x);
299 goto out;
297 } 300 }
298 301
302 c.seq = nlh->nlmsg_seq;
303 c.pid = nlh->nlmsg_pid;
304 c.event = nlh->nlmsg_type;
305
306 km_state_notify(x, &c);
307out:
308 xfrm_state_put(x);
299 return err; 309 return err;
300} 310}
301 311
302static int xfrm_del_sa(struct sk_buff *skb, struct nlmsghdr *nlh, void **xfrma) 312static int xfrm_del_sa(struct sk_buff *skb, struct nlmsghdr *nlh, void **xfrma)
303{ 313{
304 struct xfrm_state *x; 314 struct xfrm_state *x;
315 int err;
316 struct km_event c;
305 struct xfrm_usersa_id *p = NLMSG_DATA(nlh); 317 struct xfrm_usersa_id *p = NLMSG_DATA(nlh);
306 318
307 x = xfrm_state_lookup(&p->daddr, p->spi, p->proto, p->family); 319 x = xfrm_state_lookup(&p->daddr, p->spi, p->proto, p->family);
@@ -313,10 +325,19 @@ static int xfrm_del_sa(struct sk_buff *skb, struct nlmsghdr *nlh, void **xfrma)
313 return -EPERM; 325 return -EPERM;
314 } 326 }
315 327
316 xfrm_state_delete(x); 328 err = xfrm_state_delete(x);
329 if (err < 0) {
330 xfrm_state_put(x);
331 return err;
332 }
333
334 c.seq = nlh->nlmsg_seq;
335 c.pid = nlh->nlmsg_pid;
336 c.event = nlh->nlmsg_type;
337 km_state_notify(x, &c);
317 xfrm_state_put(x); 338 xfrm_state_put(x);
318 339
319 return 0; 340 return err;
320} 341}
321 342
322static void copy_to_user_state(struct xfrm_state *x, struct xfrm_usersa_info *p) 343static void copy_to_user_state(struct xfrm_state *x, struct xfrm_usersa_info *p)
@@ -681,6 +702,7 @@ static int xfrm_add_policy(struct sk_buff *skb, struct nlmsghdr *nlh, void **xfr
681{ 702{
682 struct xfrm_userpolicy_info *p = NLMSG_DATA(nlh); 703 struct xfrm_userpolicy_info *p = NLMSG_DATA(nlh);
683 struct xfrm_policy *xp; 704 struct xfrm_policy *xp;
705 struct km_event c;
684 int err; 706 int err;
685 int excl; 707 int excl;
686 708
@@ -692,6 +714,10 @@ static int xfrm_add_policy(struct sk_buff *skb, struct nlmsghdr *nlh, void **xfr
692 if (!xp) 714 if (!xp)
693 return err; 715 return err;
694 716
717 /* shouldnt excl be based on nlh flags??
718 * Aha! this is anti-netlink really i.e more pfkey derived
719 * in netlink excl is a flag and you wouldnt need
720 * a type XFRM_MSG_UPDPOLICY - JHS */
695 excl = nlh->nlmsg_type == XFRM_MSG_NEWPOLICY; 721 excl = nlh->nlmsg_type == XFRM_MSG_NEWPOLICY;
696 err = xfrm_policy_insert(p->dir, xp, excl); 722 err = xfrm_policy_insert(p->dir, xp, excl);
697 if (err) { 723 if (err) {
@@ -699,6 +725,11 @@ static int xfrm_add_policy(struct sk_buff *skb, struct nlmsghdr *nlh, void **xfr
699 return err; 725 return err;
700 } 726 }
701 727
728 c.event = nlh->nlmsg_type;
729 c.seq = nlh->nlmsg_seq;
730 c.pid = nlh->nlmsg_pid;
731 km_policy_notify(xp, p->dir, &c);
732
702 xfrm_pol_put(xp); 733 xfrm_pol_put(xp);
703 734
704 return 0; 735 return 0;
@@ -816,6 +847,7 @@ static int xfrm_get_policy(struct sk_buff *skb, struct nlmsghdr *nlh, void **xfr
816 struct xfrm_policy *xp; 847 struct xfrm_policy *xp;
817 struct xfrm_userpolicy_id *p; 848 struct xfrm_userpolicy_id *p;
818 int err; 849 int err;
850 struct km_event c;
819 int delete; 851 int delete;
820 852
821 p = NLMSG_DATA(nlh); 853 p = NLMSG_DATA(nlh);
@@ -843,6 +875,12 @@ static int xfrm_get_policy(struct sk_buff *skb, struct nlmsghdr *nlh, void **xfr
843 NETLINK_CB(skb).pid, 875 NETLINK_CB(skb).pid,
844 MSG_DONTWAIT); 876 MSG_DONTWAIT);
845 } 877 }
878 } else {
879 c.data.byid = p->index;
880 c.event = nlh->nlmsg_type;
881 c.seq = nlh->nlmsg_seq;
882 c.pid = nlh->nlmsg_pid;
883 km_policy_notify(xp, p->dir, &c);
846 } 884 }
847 885
848 xfrm_pol_put(xp); 886 xfrm_pol_put(xp);
@@ -852,15 +890,28 @@ static int xfrm_get_policy(struct sk_buff *skb, struct nlmsghdr *nlh, void **xfr
852 890
853static int xfrm_flush_sa(struct sk_buff *skb, struct nlmsghdr *nlh, void **xfrma) 891static int xfrm_flush_sa(struct sk_buff *skb, struct nlmsghdr *nlh, void **xfrma)
854{ 892{
893 struct km_event c;
855 struct xfrm_usersa_flush *p = NLMSG_DATA(nlh); 894 struct xfrm_usersa_flush *p = NLMSG_DATA(nlh);
856 895
857 xfrm_state_flush(p->proto); 896 xfrm_state_flush(p->proto);
897 c.data.proto = p->proto;
898 c.event = nlh->nlmsg_type;
899 c.seq = nlh->nlmsg_seq;
900 c.pid = nlh->nlmsg_pid;
901 km_state_notify(NULL, &c);
902
858 return 0; 903 return 0;
859} 904}
860 905
861static int xfrm_flush_policy(struct sk_buff *skb, struct nlmsghdr *nlh, void **xfrma) 906static int xfrm_flush_policy(struct sk_buff *skb, struct nlmsghdr *nlh, void **xfrma)
862{ 907{
908 struct km_event c;
909
863 xfrm_policy_flush(); 910 xfrm_policy_flush();
911 c.event = nlh->nlmsg_type;
912 c.seq = nlh->nlmsg_seq;
913 c.pid = nlh->nlmsg_pid;
914 km_policy_notify(NULL, 0, &c);
864 return 0; 915 return 0;
865} 916}
866 917
@@ -1069,15 +1120,16 @@ nlmsg_failure:
1069 return -1; 1120 return -1;
1070} 1121}
1071 1122
1072static int xfrm_send_state_notify(struct xfrm_state *x, int hard) 1123static int xfrm_exp_state_notify(struct xfrm_state *x, struct km_event *c)
1073{ 1124{
1074 struct sk_buff *skb; 1125 struct sk_buff *skb;
1126 int len = NLMSG_LENGTH(sizeof(struct xfrm_user_expire));
1075 1127
1076 skb = alloc_skb(sizeof(struct xfrm_user_expire) + 16, GFP_ATOMIC); 1128 skb = alloc_skb(len, GFP_ATOMIC);
1077 if (skb == NULL) 1129 if (skb == NULL)
1078 return -ENOMEM; 1130 return -ENOMEM;
1079 1131
1080 if (build_expire(skb, x, hard) < 0) 1132 if (build_expire(skb, x, c->data.hard) < 0)
1081 BUG(); 1133 BUG();
1082 1134
1083 NETLINK_CB(skb).dst_groups = XFRMGRP_EXPIRE; 1135 NETLINK_CB(skb).dst_groups = XFRMGRP_EXPIRE;
@@ -1085,6 +1137,131 @@ static int xfrm_send_state_notify(struct xfrm_state *x, int hard)
1085 return netlink_broadcast(xfrm_nl, skb, 0, XFRMGRP_EXPIRE, GFP_ATOMIC); 1137 return netlink_broadcast(xfrm_nl, skb, 0, XFRMGRP_EXPIRE, GFP_ATOMIC);
1086} 1138}
1087 1139
1140static int xfrm_notify_sa_flush(struct km_event *c)
1141{
1142 struct xfrm_usersa_flush *p;
1143 struct nlmsghdr *nlh;
1144 struct sk_buff *skb;
1145 unsigned char *b;
1146 int len = NLMSG_LENGTH(sizeof(struct xfrm_usersa_flush));
1147
1148 skb = alloc_skb(len, GFP_ATOMIC);
1149 if (skb == NULL)
1150 return -ENOMEM;
1151 b = skb->tail;
1152
1153 nlh = NLMSG_PUT(skb, c->pid, c->seq,
1154 XFRM_MSG_FLUSHSA, sizeof(*p));
1155 nlh->nlmsg_flags = 0;
1156
1157 p = NLMSG_DATA(nlh);
1158 p->proto = c->data.proto;
1159
1160 nlh->nlmsg_len = skb->tail - b;
1161
1162 return netlink_broadcast(xfrm_nl, skb, 0, XFRMGRP_SA, GFP_ATOMIC);
1163
1164nlmsg_failure:
1165 kfree_skb(skb);
1166 return -1;
1167}
1168
1169static int inline xfrm_sa_len(struct xfrm_state *x)
1170{
1171 int l = 0;
1172 if (x->aalg)
1173 l += RTA_SPACE(sizeof(*x->aalg) + (x->aalg->alg_key_len+7)/8);
1174 if (x->ealg)
1175 l += RTA_SPACE(sizeof(*x->ealg) + (x->ealg->alg_key_len+7)/8);
1176 if (x->calg)
1177 l += RTA_SPACE(sizeof(*x->calg));
1178 if (x->encap)
1179 l += RTA_SPACE(sizeof(*x->encap));
1180
1181 return l;
1182}
1183
1184static int xfrm_notify_sa(struct xfrm_state *x, struct km_event *c)
1185{
1186 struct xfrm_usersa_info *p;
1187 struct xfrm_usersa_id *id;
1188 struct nlmsghdr *nlh;
1189 struct sk_buff *skb;
1190 unsigned char *b;
1191 int len = xfrm_sa_len(x);
1192 int headlen;
1193
1194 headlen = sizeof(*p);
1195 if (c->event == XFRM_MSG_DELSA) {
1196 len += RTA_SPACE(headlen);
1197 headlen = sizeof(*id);
1198 }
1199 len += NLMSG_SPACE(headlen);
1200
1201 skb = alloc_skb(len, GFP_ATOMIC);
1202 if (skb == NULL)
1203 return -ENOMEM;
1204 b = skb->tail;
1205
1206 nlh = NLMSG_PUT(skb, c->pid, c->seq, c->event, headlen);
1207 nlh->nlmsg_flags = 0;
1208
1209 p = NLMSG_DATA(nlh);
1210 if (c->event == XFRM_MSG_DELSA) {
1211 id = NLMSG_DATA(nlh);
1212 memcpy(&id->daddr, &x->id.daddr, sizeof(id->daddr));
1213 id->spi = x->id.spi;
1214 id->family = x->props.family;
1215 id->proto = x->id.proto;
1216
1217 p = RTA_DATA(__RTA_PUT(skb, XFRMA_SA, sizeof(*p)));
1218 }
1219
1220 copy_to_user_state(x, p);
1221
1222 if (x->aalg)
1223 RTA_PUT(skb, XFRMA_ALG_AUTH,
1224 sizeof(*(x->aalg))+(x->aalg->alg_key_len+7)/8, x->aalg);
1225 if (x->ealg)
1226 RTA_PUT(skb, XFRMA_ALG_CRYPT,
1227 sizeof(*(x->ealg))+(x->ealg->alg_key_len+7)/8, x->ealg);
1228 if (x->calg)
1229 RTA_PUT(skb, XFRMA_ALG_COMP, sizeof(*(x->calg)), x->calg);
1230
1231 if (x->encap)
1232 RTA_PUT(skb, XFRMA_ENCAP, sizeof(*x->encap), x->encap);
1233
1234 nlh->nlmsg_len = skb->tail - b;
1235
1236 return netlink_broadcast(xfrm_nl, skb, 0, XFRMGRP_SA, GFP_ATOMIC);
1237
1238nlmsg_failure:
1239rtattr_failure:
1240 kfree_skb(skb);
1241 return -1;
1242}
1243
1244static int xfrm_send_state_notify(struct xfrm_state *x, struct km_event *c)
1245{
1246
1247 switch (c->event) {
1248 case XFRM_MSG_EXPIRE:
1249 return xfrm_exp_state_notify(x, c);
1250 case XFRM_MSG_DELSA:
1251 case XFRM_MSG_UPDSA:
1252 case XFRM_MSG_NEWSA:
1253 return xfrm_notify_sa(x, c);
1254 case XFRM_MSG_FLUSHSA:
1255 return xfrm_notify_sa_flush(c);
1256 default:
1257 printk("xfrm_user: Unknown SA event %d\n", c->event);
1258 break;
1259 }
1260
1261 return 0;
1262
1263}
1264
1088static int build_acquire(struct sk_buff *skb, struct xfrm_state *x, 1265static int build_acquire(struct sk_buff *skb, struct xfrm_state *x,
1089 struct xfrm_tmpl *xt, struct xfrm_policy *xp, 1266 struct xfrm_tmpl *xt, struct xfrm_policy *xp,
1090 int dir) 1267 int dir)
@@ -1218,7 +1395,7 @@ nlmsg_failure:
1218 return -1; 1395 return -1;
1219} 1396}
1220 1397
1221static int xfrm_send_policy_notify(struct xfrm_policy *xp, int dir, int hard) 1398static int xfrm_exp_policy_notify(struct xfrm_policy *xp, int dir, struct km_event *c)
1222{ 1399{
1223 struct sk_buff *skb; 1400 struct sk_buff *skb;
1224 size_t len; 1401 size_t len;
@@ -1229,7 +1406,7 @@ static int xfrm_send_policy_notify(struct xfrm_policy *xp, int dir, int hard)
1229 if (skb == NULL) 1406 if (skb == NULL)
1230 return -ENOMEM; 1407 return -ENOMEM;
1231 1408
1232 if (build_polexpire(skb, xp, dir, hard) < 0) 1409 if (build_polexpire(skb, xp, dir, c->data.hard) < 0)
1233 BUG(); 1410 BUG();
1234 1411
1235 NETLINK_CB(skb).dst_groups = XFRMGRP_EXPIRE; 1412 NETLINK_CB(skb).dst_groups = XFRMGRP_EXPIRE;
@@ -1237,6 +1414,103 @@ static int xfrm_send_policy_notify(struct xfrm_policy *xp, int dir, int hard)
1237 return netlink_broadcast(xfrm_nl, skb, 0, XFRMGRP_EXPIRE, GFP_ATOMIC); 1414 return netlink_broadcast(xfrm_nl, skb, 0, XFRMGRP_EXPIRE, GFP_ATOMIC);
1238} 1415}
1239 1416
1417static int xfrm_notify_policy(struct xfrm_policy *xp, int dir, struct km_event *c)
1418{
1419 struct xfrm_userpolicy_info *p;
1420 struct xfrm_userpolicy_id *id;
1421 struct nlmsghdr *nlh;
1422 struct sk_buff *skb;
1423 unsigned char *b;
1424 int len = RTA_SPACE(sizeof(struct xfrm_user_tmpl) * xp->xfrm_nr);
1425 int headlen;
1426
1427 headlen = sizeof(*p);
1428 if (c->event == XFRM_MSG_DELPOLICY) {
1429 len += RTA_SPACE(headlen);
1430 headlen = sizeof(*id);
1431 }
1432 len += NLMSG_SPACE(headlen);
1433
1434 skb = alloc_skb(len, GFP_ATOMIC);
1435 if (skb == NULL)
1436 return -ENOMEM;
1437 b = skb->tail;
1438
1439 nlh = NLMSG_PUT(skb, c->pid, c->seq, c->event, headlen);
1440
1441 p = NLMSG_DATA(nlh);
1442 if (c->event == XFRM_MSG_DELPOLICY) {
1443 id = NLMSG_DATA(nlh);
1444 memset(id, 0, sizeof(*id));
1445 id->dir = dir;
1446 if (c->data.byid)
1447 id->index = xp->index;
1448 else
1449 memcpy(&id->sel, &xp->selector, sizeof(id->sel));
1450
1451 p = RTA_DATA(__RTA_PUT(skb, XFRMA_POLICY, sizeof(*p)));
1452 }
1453
1454 nlh->nlmsg_flags = 0;
1455
1456 copy_to_user_policy(xp, p, dir);
1457 if (copy_to_user_tmpl(xp, skb) < 0)
1458 goto nlmsg_failure;
1459
1460 nlh->nlmsg_len = skb->tail - b;
1461
1462 return netlink_broadcast(xfrm_nl, skb, 0, XFRMGRP_POLICY, GFP_ATOMIC);
1463
1464nlmsg_failure:
1465rtattr_failure:
1466 kfree_skb(skb);
1467 return -1;
1468}
1469
1470static int xfrm_notify_policy_flush(struct km_event *c)
1471{
1472 struct nlmsghdr *nlh;
1473 struct sk_buff *skb;
1474 unsigned char *b;
1475 int len = NLMSG_LENGTH(0);
1476
1477 skb = alloc_skb(len, GFP_ATOMIC);
1478 if (skb == NULL)
1479 return -ENOMEM;
1480 b = skb->tail;
1481
1482
1483 nlh = NLMSG_PUT(skb, c->pid, c->seq, XFRM_MSG_FLUSHPOLICY, 0);
1484
1485 nlh->nlmsg_len = skb->tail - b;
1486
1487 return netlink_broadcast(xfrm_nl, skb, 0, XFRMGRP_POLICY, GFP_ATOMIC);
1488
1489nlmsg_failure:
1490 kfree_skb(skb);
1491 return -1;
1492}
1493
1494static int xfrm_send_policy_notify(struct xfrm_policy *xp, int dir, struct km_event *c)
1495{
1496
1497 switch (c->event) {
1498 case XFRM_MSG_NEWPOLICY:
1499 case XFRM_MSG_UPDPOLICY:
1500 case XFRM_MSG_DELPOLICY:
1501 return xfrm_notify_policy(xp, dir, c);
1502 case XFRM_MSG_FLUSHPOLICY:
1503 return xfrm_notify_policy_flush(c);
1504 case XFRM_MSG_POLEXPIRE:
1505 return xfrm_exp_policy_notify(xp, dir, c);
1506 default:
1507 printk("xfrm_user: Unknown Policy event %d\n", c->event);
1508 }
1509
1510 return 0;
1511
1512}
1513
1240static struct xfrm_mgr netlink_mgr = { 1514static struct xfrm_mgr netlink_mgr = {
1241 .id = "netlink", 1515 .id = "netlink",
1242 .notify = xfrm_send_state_notify, 1516 .notify = xfrm_send_state_notify,