aboutsummaryrefslogtreecommitdiffstats
path: root/net
diff options
context:
space:
mode:
authorDave Kleikamp <shaggy@austin.ibm.com>2005-06-20 09:44:00 -0400
committerDave Kleikamp <shaggy@austin.ibm.com>2005-06-20 09:44:00 -0400
commitd039ba24f135147f60a13bcaa768189a5b773b6e (patch)
tree444b7596ab8312b5954d15c3135052a7c09c6fbe /net
parent72e3148a6e987974e3e949c5668e5ca812d7c818 (diff)
parent8b22c249e7de453961e4d253b19fc2a0bdd65d53 (diff)
Merge with /home/shaggy/git/linus-clean/
Diffstat (limited to 'net')
-rw-r--r--net/core/Makefile3
-rw-r--r--net/core/dev.c1
-rw-r--r--net/core/ethtool.c2
-rw-r--r--net/core/neighbour.c333
-rw-r--r--net/core/net-sysfs.c17
-rw-r--r--net/core/request_sock.c64
-rw-r--r--net/core/rtnetlink.c33
-rw-r--r--net/core/sock.c35
-rw-r--r--net/core/wireless.c74
-rw-r--r--net/decnet/dn_dev.c9
-rw-r--r--net/decnet/dn_neigh.c1
-rw-r--r--net/decnet/dn_route.c11
-rw-r--r--net/decnet/dn_rules.c7
-rw-r--r--net/decnet/dn_table.c8
-rw-r--r--net/ipv4/af_inet.c1
-rw-r--r--net/ipv4/devinet.c9
-rw-r--r--net/ipv4/fib_hash.c3
-rw-r--r--net/ipv4/fib_lookup.h3
-rw-r--r--net/ipv4/fib_rules.c7
-rw-r--r--net/ipv4/fib_semantics.c10
-rw-r--r--net/ipv4/icmp.c9
-rw-r--r--net/ipv4/ip_sockglue.c6
-rw-r--r--net/ipv4/ipvs/Makefile2
-rw-r--r--net/ipv4/ipvs/ip_vs_proto.c3
-rw-r--r--net/ipv4/ipvs/ip_vs_proto_icmp.c182
-rw-r--r--net/ipv4/multipath_drr.c2
-rw-r--r--net/ipv4/multipath_random.c2
-rw-r--r--net/ipv4/multipath_rr.c2
-rw-r--r--net/ipv4/multipath_wrandom.c2
-rw-r--r--net/ipv4/netfilter/ip_conntrack_standalone.c1
-rw-r--r--net/ipv4/netfilter/ipt_recent.c10
-rw-r--r--net/ipv4/raw.c22
-rw-r--r--net/ipv4/route.c11
-rw-r--r--net/ipv4/syncookies.c49
-rw-r--r--net/ipv4/sysctl_net_ipv4.c9
-rw-r--r--net/ipv4/tcp.c86
-rw-r--r--net/ipv4/tcp_diag.c37
-rw-r--r--net/ipv4/tcp_ipv4.c172
-rw-r--r--net/ipv4/tcp_minisocks.c68
-rw-r--r--net/ipv4/tcp_output.c27
-rw-r--r--net/ipv4/tcp_timer.c18
-rw-r--r--net/ipv6/addrconf.c58
-rw-r--r--net/ipv6/datagram.c6
-rw-r--r--net/ipv6/icmp.c14
-rw-r--r--net/ipv6/ip6_tunnel.c1
-rw-r--r--net/ipv6/ipv6_syms.c1
-rw-r--r--net/ipv6/raw.c8
-rw-r--r--net/ipv6/route.c11
-rw-r--r--net/ipv6/tcp_ipv6.c148
-rw-r--r--net/ipv6/udp.c4
-rw-r--r--net/key/af_key.c369
-rw-r--r--net/netlink/af_netlink.c8
-rw-r--r--net/sched/Kconfig2
-rw-r--r--net/sched/act_api.c13
-rw-r--r--net/sched/cls_api.c5
-rw-r--r--net/sched/cls_basic.c3
-rw-r--r--net/sched/em_meta.c295
-rw-r--r--net/sched/sch_api.c10
-rw-r--r--net/sched/sch_dsmark.c357
-rw-r--r--net/sched/sch_fifo.c152
-rw-r--r--net/sched/sch_generic.c84
-rw-r--r--net/sctp/input.c49
-rw-r--r--net/sctp/ipv6.c36
-rw-r--r--net/sctp/proc.c194
-rw-r--r--net/sctp/protocol.c7
-rw-r--r--net/sctp/socket.c20
-rw-r--r--net/socket.c9
-rw-r--r--net/xfrm/xfrm_policy.c8
-rw-r--r--net/xfrm/xfrm_state.c81
-rw-r--r--net/xfrm/xfrm_user.c288
70 files changed, 2341 insertions, 1251 deletions
diff --git a/net/core/Makefile b/net/core/Makefile
index 81f03243fe2f..5e0c56b7f607 100644
--- a/net/core/Makefile
+++ b/net/core/Makefile
@@ -2,7 +2,8 @@
2# Makefile for the Linux networking core. 2# Makefile for the Linux networking core.
3# 3#
4 4
5obj-y := sock.o skbuff.o iovec.o datagram.o stream.o scm.o gen_stats.o gen_estimator.o 5obj-y := sock.o request_sock.o skbuff.o iovec.o datagram.o stream.o scm.o \
6 gen_stats.o gen_estimator.o
6 7
7obj-$(CONFIG_SYSCTL) += sysctl_net_core.o 8obj-$(CONFIG_SYSCTL) += sysctl_net_core.o
8 9
diff --git a/net/core/dev.c b/net/core/dev.c
index f15a3ffff635..ab935778ce81 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -1744,6 +1744,7 @@ static int process_backlog(struct net_device *backlog_dev, int *budget)
1744 struct softnet_data *queue = &__get_cpu_var(softnet_data); 1744 struct softnet_data *queue = &__get_cpu_var(softnet_data);
1745 unsigned long start_time = jiffies; 1745 unsigned long start_time = jiffies;
1746 1746
1747 backlog_dev->weight = weight_p;
1747 for (;;) { 1748 for (;;) {
1748 struct sk_buff *skb; 1749 struct sk_buff *skb;
1749 struct net_device *dev; 1750 struct net_device *dev;
diff --git a/net/core/ethtool.c b/net/core/ethtool.c
index 8ec484894d68..a3eeb88e1c81 100644
--- a/net/core/ethtool.c
+++ b/net/core/ethtool.c
@@ -356,7 +356,7 @@ static int ethtool_set_coalesce(struct net_device *dev, void __user *useraddr)
356{ 356{
357 struct ethtool_coalesce coalesce; 357 struct ethtool_coalesce coalesce;
358 358
359 if (!dev->ethtool_ops->get_coalesce) 359 if (!dev->ethtool_ops->set_coalesce)
360 return -EOPNOTSUPP; 360 return -EOPNOTSUPP;
361 361
362 if (copy_from_user(&coalesce, useraddr, sizeof(coalesce))) 362 if (copy_from_user(&coalesce, useraddr, sizeof(coalesce)))
diff --git a/net/core/neighbour.c b/net/core/neighbour.c
index 43bdc521e20d..f6bdcad47da6 100644
--- a/net/core/neighbour.c
+++ b/net/core/neighbour.c
@@ -1276,9 +1276,14 @@ struct neigh_parms *neigh_parms_alloc(struct net_device *dev,
1276 INIT_RCU_HEAD(&p->rcu_head); 1276 INIT_RCU_HEAD(&p->rcu_head);
1277 p->reachable_time = 1277 p->reachable_time =
1278 neigh_rand_reach_time(p->base_reachable_time); 1278 neigh_rand_reach_time(p->base_reachable_time);
1279 if (dev && dev->neigh_setup && dev->neigh_setup(dev, p)) { 1279 if (dev) {
1280 kfree(p); 1280 if (dev->neigh_setup && dev->neigh_setup(dev, p)) {
1281 return NULL; 1281 kfree(p);
1282 return NULL;
1283 }
1284
1285 dev_hold(dev);
1286 p->dev = dev;
1282 } 1287 }
1283 p->sysctl_table = NULL; 1288 p->sysctl_table = NULL;
1284 write_lock_bh(&tbl->lock); 1289 write_lock_bh(&tbl->lock);
@@ -1309,6 +1314,8 @@ void neigh_parms_release(struct neigh_table *tbl, struct neigh_parms *parms)
1309 *p = parms->next; 1314 *p = parms->next;
1310 parms->dead = 1; 1315 parms->dead = 1;
1311 write_unlock_bh(&tbl->lock); 1316 write_unlock_bh(&tbl->lock);
1317 if (parms->dev)
1318 dev_put(parms->dev);
1312 call_rcu(&parms->rcu_head, neigh_rcu_free_parms); 1319 call_rcu(&parms->rcu_head, neigh_rcu_free_parms);
1313 return; 1320 return;
1314 } 1321 }
@@ -1546,20 +1553,323 @@ out:
1546 return err; 1553 return err;
1547} 1554}
1548 1555
1556static int neightbl_fill_parms(struct sk_buff *skb, struct neigh_parms *parms)
1557{
1558 struct rtattr *nest = NULL;
1559
1560 nest = RTA_NEST(skb, NDTA_PARMS);
1561
1562 if (parms->dev)
1563 RTA_PUT_U32(skb, NDTPA_IFINDEX, parms->dev->ifindex);
1564
1565 RTA_PUT_U32(skb, NDTPA_REFCNT, atomic_read(&parms->refcnt));
1566 RTA_PUT_U32(skb, NDTPA_QUEUE_LEN, parms->queue_len);
1567 RTA_PUT_U32(skb, NDTPA_PROXY_QLEN, parms->proxy_qlen);
1568 RTA_PUT_U32(skb, NDTPA_APP_PROBES, parms->app_probes);
1569 RTA_PUT_U32(skb, NDTPA_UCAST_PROBES, parms->ucast_probes);
1570 RTA_PUT_U32(skb, NDTPA_MCAST_PROBES, parms->mcast_probes);
1571 RTA_PUT_MSECS(skb, NDTPA_REACHABLE_TIME, parms->reachable_time);
1572 RTA_PUT_MSECS(skb, NDTPA_BASE_REACHABLE_TIME,
1573 parms->base_reachable_time);
1574 RTA_PUT_MSECS(skb, NDTPA_GC_STALETIME, parms->gc_staletime);
1575 RTA_PUT_MSECS(skb, NDTPA_DELAY_PROBE_TIME, parms->delay_probe_time);
1576 RTA_PUT_MSECS(skb, NDTPA_RETRANS_TIME, parms->retrans_time);
1577 RTA_PUT_MSECS(skb, NDTPA_ANYCAST_DELAY, parms->anycast_delay);
1578 RTA_PUT_MSECS(skb, NDTPA_PROXY_DELAY, parms->proxy_delay);
1579 RTA_PUT_MSECS(skb, NDTPA_LOCKTIME, parms->locktime);
1580
1581 return RTA_NEST_END(skb, nest);
1582
1583rtattr_failure:
1584 return RTA_NEST_CANCEL(skb, nest);
1585}
1586
1587static int neightbl_fill_info(struct neigh_table *tbl, struct sk_buff *skb,
1588 struct netlink_callback *cb)
1589{
1590 struct nlmsghdr *nlh;
1591 struct ndtmsg *ndtmsg;
1592
1593 nlh = NLMSG_NEW_ANSWER(skb, cb, RTM_NEWNEIGHTBL, sizeof(struct ndtmsg),
1594 NLM_F_MULTI);
1595
1596 ndtmsg = NLMSG_DATA(nlh);
1597
1598 read_lock_bh(&tbl->lock);
1599 ndtmsg->ndtm_family = tbl->family;
1600
1601 RTA_PUT_STRING(skb, NDTA_NAME, tbl->id);
1602 RTA_PUT_MSECS(skb, NDTA_GC_INTERVAL, tbl->gc_interval);
1603 RTA_PUT_U32(skb, NDTA_THRESH1, tbl->gc_thresh1);
1604 RTA_PUT_U32(skb, NDTA_THRESH2, tbl->gc_thresh2);
1605 RTA_PUT_U32(skb, NDTA_THRESH3, tbl->gc_thresh3);
1606
1607 {
1608 unsigned long now = jiffies;
1609 unsigned int flush_delta = now - tbl->last_flush;
1610 unsigned int rand_delta = now - tbl->last_rand;
1611
1612 struct ndt_config ndc = {
1613 .ndtc_key_len = tbl->key_len,
1614 .ndtc_entry_size = tbl->entry_size,
1615 .ndtc_entries = atomic_read(&tbl->entries),
1616 .ndtc_last_flush = jiffies_to_msecs(flush_delta),
1617 .ndtc_last_rand = jiffies_to_msecs(rand_delta),
1618 .ndtc_hash_rnd = tbl->hash_rnd,
1619 .ndtc_hash_mask = tbl->hash_mask,
1620 .ndtc_hash_chain_gc = tbl->hash_chain_gc,
1621 .ndtc_proxy_qlen = tbl->proxy_queue.qlen,
1622 };
1623
1624 RTA_PUT(skb, NDTA_CONFIG, sizeof(ndc), &ndc);
1625 }
1626
1627 {
1628 int cpu;
1629 struct ndt_stats ndst;
1630
1631 memset(&ndst, 0, sizeof(ndst));
1632
1633 for (cpu = 0; cpu < NR_CPUS; cpu++) {
1634 struct neigh_statistics *st;
1635
1636 if (!cpu_possible(cpu))
1637 continue;
1638
1639 st = per_cpu_ptr(tbl->stats, cpu);
1640 ndst.ndts_allocs += st->allocs;
1641 ndst.ndts_destroys += st->destroys;
1642 ndst.ndts_hash_grows += st->hash_grows;
1643 ndst.ndts_res_failed += st->res_failed;
1644 ndst.ndts_lookups += st->lookups;
1645 ndst.ndts_hits += st->hits;
1646 ndst.ndts_rcv_probes_mcast += st->rcv_probes_mcast;
1647 ndst.ndts_rcv_probes_ucast += st->rcv_probes_ucast;
1648 ndst.ndts_periodic_gc_runs += st->periodic_gc_runs;
1649 ndst.ndts_forced_gc_runs += st->forced_gc_runs;
1650 }
1651
1652 RTA_PUT(skb, NDTA_STATS, sizeof(ndst), &ndst);
1653 }
1654
1655 BUG_ON(tbl->parms.dev);
1656 if (neightbl_fill_parms(skb, &tbl->parms) < 0)
1657 goto rtattr_failure;
1658
1659 read_unlock_bh(&tbl->lock);
1660 return NLMSG_END(skb, nlh);
1661
1662rtattr_failure:
1663 read_unlock_bh(&tbl->lock);
1664 return NLMSG_CANCEL(skb, nlh);
1665
1666nlmsg_failure:
1667 return -1;
1668}
1669
1670static int neightbl_fill_param_info(struct neigh_table *tbl,
1671 struct neigh_parms *parms,
1672 struct sk_buff *skb,
1673 struct netlink_callback *cb)
1674{
1675 struct ndtmsg *ndtmsg;
1676 struct nlmsghdr *nlh;
1677
1678 nlh = NLMSG_NEW_ANSWER(skb, cb, RTM_NEWNEIGHTBL, sizeof(struct ndtmsg),
1679 NLM_F_MULTI);
1680
1681 ndtmsg = NLMSG_DATA(nlh);
1682
1683 read_lock_bh(&tbl->lock);
1684 ndtmsg->ndtm_family = tbl->family;
1685 RTA_PUT_STRING(skb, NDTA_NAME, tbl->id);
1686
1687 if (neightbl_fill_parms(skb, parms) < 0)
1688 goto rtattr_failure;
1689
1690 read_unlock_bh(&tbl->lock);
1691 return NLMSG_END(skb, nlh);
1692
1693rtattr_failure:
1694 read_unlock_bh(&tbl->lock);
1695 return NLMSG_CANCEL(skb, nlh);
1696
1697nlmsg_failure:
1698 return -1;
1699}
1700
1701static inline struct neigh_parms *lookup_neigh_params(struct neigh_table *tbl,
1702 int ifindex)
1703{
1704 struct neigh_parms *p;
1705
1706 for (p = &tbl->parms; p; p = p->next)
1707 if ((p->dev && p->dev->ifindex == ifindex) ||
1708 (!p->dev && !ifindex))
1709 return p;
1710
1711 return NULL;
1712}
1713
1714int neightbl_set(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
1715{
1716 struct neigh_table *tbl;
1717 struct ndtmsg *ndtmsg = NLMSG_DATA(nlh);
1718 struct rtattr **tb = arg;
1719 int err = -EINVAL;
1720
1721 if (!tb[NDTA_NAME - 1] || !RTA_PAYLOAD(tb[NDTA_NAME - 1]))
1722 return -EINVAL;
1723
1724 read_lock(&neigh_tbl_lock);
1725 for (tbl = neigh_tables; tbl; tbl = tbl->next) {
1726 if (ndtmsg->ndtm_family && tbl->family != ndtmsg->ndtm_family)
1727 continue;
1728
1729 if (!rtattr_strcmp(tb[NDTA_NAME - 1], tbl->id))
1730 break;
1731 }
1732
1733 if (tbl == NULL) {
1734 err = -ENOENT;
1735 goto errout;
1736 }
1737
1738 /*
1739 * We acquire tbl->lock to be nice to the periodic timers and
1740 * make sure they always see a consistent set of values.
1741 */
1742 write_lock_bh(&tbl->lock);
1743
1744 if (tb[NDTA_THRESH1 - 1])
1745 tbl->gc_thresh1 = RTA_GET_U32(tb[NDTA_THRESH1 - 1]);
1746
1747 if (tb[NDTA_THRESH2 - 1])
1748 tbl->gc_thresh2 = RTA_GET_U32(tb[NDTA_THRESH2 - 1]);
1749
1750 if (tb[NDTA_THRESH3 - 1])
1751 tbl->gc_thresh3 = RTA_GET_U32(tb[NDTA_THRESH3 - 1]);
1752
1753 if (tb[NDTA_GC_INTERVAL - 1])
1754 tbl->gc_interval = RTA_GET_MSECS(tb[NDTA_GC_INTERVAL - 1]);
1755
1756 if (tb[NDTA_PARMS - 1]) {
1757 struct rtattr *tbp[NDTPA_MAX];
1758 struct neigh_parms *p;
1759 u32 ifindex = 0;
1760
1761 if (rtattr_parse_nested(tbp, NDTPA_MAX, tb[NDTA_PARMS - 1]) < 0)
1762 goto rtattr_failure;
1763
1764 if (tbp[NDTPA_IFINDEX - 1])
1765 ifindex = RTA_GET_U32(tbp[NDTPA_IFINDEX - 1]);
1766
1767 p = lookup_neigh_params(tbl, ifindex);
1768 if (p == NULL) {
1769 err = -ENOENT;
1770 goto rtattr_failure;
1771 }
1772
1773 if (tbp[NDTPA_QUEUE_LEN - 1])
1774 p->queue_len = RTA_GET_U32(tbp[NDTPA_QUEUE_LEN - 1]);
1775
1776 if (tbp[NDTPA_PROXY_QLEN - 1])
1777 p->proxy_qlen = RTA_GET_U32(tbp[NDTPA_PROXY_QLEN - 1]);
1778
1779 if (tbp[NDTPA_APP_PROBES - 1])
1780 p->app_probes = RTA_GET_U32(tbp[NDTPA_APP_PROBES - 1]);
1781
1782 if (tbp[NDTPA_UCAST_PROBES - 1])
1783 p->ucast_probes =
1784 RTA_GET_U32(tbp[NDTPA_UCAST_PROBES - 1]);
1785
1786 if (tbp[NDTPA_MCAST_PROBES - 1])
1787 p->mcast_probes =
1788 RTA_GET_U32(tbp[NDTPA_MCAST_PROBES - 1]);
1789
1790 if (tbp[NDTPA_BASE_REACHABLE_TIME - 1])
1791 p->base_reachable_time =
1792 RTA_GET_MSECS(tbp[NDTPA_BASE_REACHABLE_TIME - 1]);
1793
1794 if (tbp[NDTPA_GC_STALETIME - 1])
1795 p->gc_staletime =
1796 RTA_GET_MSECS(tbp[NDTPA_GC_STALETIME - 1]);
1797
1798 if (tbp[NDTPA_DELAY_PROBE_TIME - 1])
1799 p->delay_probe_time =
1800 RTA_GET_MSECS(tbp[NDTPA_DELAY_PROBE_TIME - 1]);
1801
1802 if (tbp[NDTPA_RETRANS_TIME - 1])
1803 p->retrans_time =
1804 RTA_GET_MSECS(tbp[NDTPA_RETRANS_TIME - 1]);
1805
1806 if (tbp[NDTPA_ANYCAST_DELAY - 1])
1807 p->anycast_delay =
1808 RTA_GET_MSECS(tbp[NDTPA_ANYCAST_DELAY - 1]);
1809
1810 if (tbp[NDTPA_PROXY_DELAY - 1])
1811 p->proxy_delay =
1812 RTA_GET_MSECS(tbp[NDTPA_PROXY_DELAY - 1]);
1813
1814 if (tbp[NDTPA_LOCKTIME - 1])
1815 p->locktime = RTA_GET_MSECS(tbp[NDTPA_LOCKTIME - 1]);
1816 }
1817
1818 err = 0;
1819
1820rtattr_failure:
1821 write_unlock_bh(&tbl->lock);
1822errout:
1823 read_unlock(&neigh_tbl_lock);
1824 return err;
1825}
1826
1827int neightbl_dump_info(struct sk_buff *skb, struct netlink_callback *cb)
1828{
1829 int idx, family;
1830 int s_idx = cb->args[0];
1831 struct neigh_table *tbl;
1832
1833 family = ((struct rtgenmsg *)NLMSG_DATA(cb->nlh))->rtgen_family;
1834
1835 read_lock(&neigh_tbl_lock);
1836 for (tbl = neigh_tables, idx = 0; tbl; tbl = tbl->next) {
1837 struct neigh_parms *p;
1838
1839 if (idx < s_idx || (family && tbl->family != family))
1840 continue;
1841
1842 if (neightbl_fill_info(tbl, skb, cb) <= 0)
1843 break;
1844
1845 for (++idx, p = tbl->parms.next; p; p = p->next, idx++) {
1846 if (idx < s_idx)
1847 continue;
1848
1849 if (neightbl_fill_param_info(tbl, p, skb, cb) <= 0)
1850 goto out;
1851 }
1852
1853 }
1854out:
1855 read_unlock(&neigh_tbl_lock);
1856 cb->args[0] = idx;
1857
1858 return skb->len;
1859}
1549 1860
1550static int neigh_fill_info(struct sk_buff *skb, struct neighbour *n, 1861static int neigh_fill_info(struct sk_buff *skb, struct neighbour *n,
1551 u32 pid, u32 seq, int event) 1862 u32 pid, u32 seq, int event, unsigned int flags)
1552{ 1863{
1553 unsigned long now = jiffies; 1864 unsigned long now = jiffies;
1554 unsigned char *b = skb->tail; 1865 unsigned char *b = skb->tail;
1555 struct nda_cacheinfo ci; 1866 struct nda_cacheinfo ci;
1556 int locked = 0; 1867 int locked = 0;
1557 u32 probes; 1868 u32 probes;
1558 struct nlmsghdr *nlh = NLMSG_PUT(skb, pid, seq, event, 1869 struct nlmsghdr *nlh = NLMSG_NEW(skb, pid, seq, event,
1559 sizeof(struct ndmsg)); 1870 sizeof(struct ndmsg), flags);
1560 struct ndmsg *ndm = NLMSG_DATA(nlh); 1871 struct ndmsg *ndm = NLMSG_DATA(nlh);
1561 1872
1562 nlh->nlmsg_flags = pid ? NLM_F_MULTI : 0;
1563 ndm->ndm_family = n->ops->family; 1873 ndm->ndm_family = n->ops->family;
1564 ndm->ndm_flags = n->flags; 1874 ndm->ndm_flags = n->flags;
1565 ndm->ndm_type = n->type; 1875 ndm->ndm_type = n->type;
@@ -1609,7 +1919,8 @@ static int neigh_dump_table(struct neigh_table *tbl, struct sk_buff *skb,
1609 continue; 1919 continue;
1610 if (neigh_fill_info(skb, n, NETLINK_CB(cb->skb).pid, 1920 if (neigh_fill_info(skb, n, NETLINK_CB(cb->skb).pid,
1611 cb->nlh->nlmsg_seq, 1921 cb->nlh->nlmsg_seq,
1612 RTM_NEWNEIGH) <= 0) { 1922 RTM_NEWNEIGH,
1923 NLM_F_MULTI) <= 0) {
1613 read_unlock_bh(&tbl->lock); 1924 read_unlock_bh(&tbl->lock);
1614 rc = -1; 1925 rc = -1;
1615 goto out; 1926 goto out;
@@ -2018,7 +2329,7 @@ void neigh_app_ns(struct neighbour *n)
2018 if (!skb) 2329 if (!skb)
2019 return; 2330 return;
2020 2331
2021 if (neigh_fill_info(skb, n, 0, 0, RTM_GETNEIGH) < 0) { 2332 if (neigh_fill_info(skb, n, 0, 0, RTM_GETNEIGH, 0) < 0) {
2022 kfree_skb(skb); 2333 kfree_skb(skb);
2023 return; 2334 return;
2024 } 2335 }
@@ -2037,7 +2348,7 @@ static void neigh_app_notify(struct neighbour *n)
2037 if (!skb) 2348 if (!skb)
2038 return; 2349 return;
2039 2350
2040 if (neigh_fill_info(skb, n, 0, 0, RTM_NEWNEIGH) < 0) { 2351 if (neigh_fill_info(skb, n, 0, 0, RTM_NEWNEIGH, 0) < 0) {
2041 kfree_skb(skb); 2352 kfree_skb(skb);
2042 return; 2353 return;
2043 } 2354 }
@@ -2352,6 +2663,8 @@ EXPORT_SYMBOL(neigh_update);
2352EXPORT_SYMBOL(neigh_update_hhs); 2663EXPORT_SYMBOL(neigh_update_hhs);
2353EXPORT_SYMBOL(pneigh_enqueue); 2664EXPORT_SYMBOL(pneigh_enqueue);
2354EXPORT_SYMBOL(pneigh_lookup); 2665EXPORT_SYMBOL(pneigh_lookup);
2666EXPORT_SYMBOL(neightbl_dump_info);
2667EXPORT_SYMBOL(neightbl_set);
2355 2668
2356#ifdef CONFIG_ARPD 2669#ifdef CONFIG_ARPD
2357EXPORT_SYMBOL(neigh_app_ns); 2670EXPORT_SYMBOL(neigh_app_ns);
diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c
index 910eb4c05a47..e2137f3e489d 100644
--- a/net/core/net-sysfs.c
+++ b/net/core/net-sysfs.c
@@ -185,6 +185,22 @@ static ssize_t store_tx_queue_len(struct class_device *dev, const char *buf, siz
185static CLASS_DEVICE_ATTR(tx_queue_len, S_IRUGO | S_IWUSR, show_tx_queue_len, 185static CLASS_DEVICE_ATTR(tx_queue_len, S_IRUGO | S_IWUSR, show_tx_queue_len,
186 store_tx_queue_len); 186 store_tx_queue_len);
187 187
188NETDEVICE_SHOW(weight, fmt_dec);
189
190static int change_weight(struct net_device *net, unsigned long new_weight)
191{
192 net->weight = new_weight;
193 return 0;
194}
195
196static ssize_t store_weight(struct class_device *dev, const char *buf, size_t len)
197{
198 return netdev_store(dev, buf, len, change_weight);
199}
200
201static CLASS_DEVICE_ATTR(weight, S_IRUGO | S_IWUSR, show_weight,
202 store_weight);
203
188 204
189static struct class_device_attribute *net_class_attributes[] = { 205static struct class_device_attribute *net_class_attributes[] = {
190 &class_device_attr_ifindex, 206 &class_device_attr_ifindex,
@@ -194,6 +210,7 @@ static struct class_device_attribute *net_class_attributes[] = {
194 &class_device_attr_features, 210 &class_device_attr_features,
195 &class_device_attr_mtu, 211 &class_device_attr_mtu,
196 &class_device_attr_flags, 212 &class_device_attr_flags,
213 &class_device_attr_weight,
197 &class_device_attr_type, 214 &class_device_attr_type,
198 &class_device_attr_address, 215 &class_device_attr_address,
199 &class_device_attr_broadcast, 216 &class_device_attr_broadcast,
diff --git a/net/core/request_sock.c b/net/core/request_sock.c
new file mode 100644
index 000000000000..bb55675f0685
--- /dev/null
+++ b/net/core/request_sock.c
@@ -0,0 +1,64 @@
1/*
2 * NET Generic infrastructure for Network protocols.
3 *
4 * Authors: Arnaldo Carvalho de Melo <acme@conectiva.com.br>
5 *
6 * From code originally in include/net/tcp.h
7 *
8 * This program is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU General Public License
10 * as published by the Free Software Foundation; either version
11 * 2 of the License, or (at your option) any later version.
12 */
13
14#include <linux/module.h>
15#include <linux/random.h>
16#include <linux/slab.h>
17#include <linux/string.h>
18
19#include <net/request_sock.h>
20
21/*
22 * Maximum number of SYN_RECV sockets in queue per LISTEN socket.
23 * One SYN_RECV socket costs about 80bytes on a 32bit machine.
24 * It would be better to replace it with a global counter for all sockets
25 * but then some measure against one socket starving all other sockets
26 * would be needed.
27 *
28 * It was 128 by default. Experiments with real servers show, that
29 * it is absolutely not enough even at 100conn/sec. 256 cures most
30 * of problems. This value is adjusted to 128 for very small machines
31 * (<=32Mb of memory) and to 1024 on normal or better ones (>=256Mb).
32 * Further increasing requires to change hash table size.
33 */
34int sysctl_max_syn_backlog = 256;
35EXPORT_SYMBOL(sysctl_max_syn_backlog);
36
37int reqsk_queue_alloc(struct request_sock_queue *queue,
38 const int nr_table_entries)
39{
40 const int lopt_size = sizeof(struct listen_sock) +
41 nr_table_entries * sizeof(struct request_sock *);
42 struct listen_sock *lopt = kmalloc(lopt_size, GFP_KERNEL);
43
44 if (lopt == NULL)
45 return -ENOMEM;
46
47 memset(lopt, 0, lopt_size);
48
49 for (lopt->max_qlen_log = 6;
50 (1 << lopt->max_qlen_log) < sysctl_max_syn_backlog;
51 lopt->max_qlen_log++);
52
53 get_random_bytes(&lopt->hash_rnd, sizeof(lopt->hash_rnd));
54 rwlock_init(&queue->syn_wait_lock);
55 queue->rskq_accept_head = queue->rskq_accept_head = NULL;
56
57 write_lock_bh(&queue->syn_wait_lock);
58 queue->listen_opt = lopt;
59 write_unlock_bh(&queue->syn_wait_lock);
60
61 return 0;
62}
63
64EXPORT_SYMBOL(reqsk_queue_alloc);
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index 00caf4b318b2..e013d836a7ab 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -100,6 +100,7 @@ static const int rtm_min[RTM_NR_FAMILIES] =
100 [RTM_FAM(RTM_NEWPREFIX)] = NLMSG_LENGTH(sizeof(struct rtgenmsg)), 100 [RTM_FAM(RTM_NEWPREFIX)] = NLMSG_LENGTH(sizeof(struct rtgenmsg)),
101 [RTM_FAM(RTM_GETMULTICAST)] = NLMSG_LENGTH(sizeof(struct rtgenmsg)), 101 [RTM_FAM(RTM_GETMULTICAST)] = NLMSG_LENGTH(sizeof(struct rtgenmsg)),
102 [RTM_FAM(RTM_GETANYCAST)] = NLMSG_LENGTH(sizeof(struct rtgenmsg)), 102 [RTM_FAM(RTM_GETANYCAST)] = NLMSG_LENGTH(sizeof(struct rtgenmsg)),
103 [RTM_FAM(RTM_NEWNEIGHTBL)] = NLMSG_LENGTH(sizeof(struct ndtmsg)),
103}; 104};
104 105
105static const int rta_max[RTM_NR_FAMILIES] = 106static const int rta_max[RTM_NR_FAMILIES] =
@@ -113,6 +114,7 @@ static const int rta_max[RTM_NR_FAMILIES] =
113 [RTM_FAM(RTM_NEWTCLASS)] = TCA_MAX, 114 [RTM_FAM(RTM_NEWTCLASS)] = TCA_MAX,
114 [RTM_FAM(RTM_NEWTFILTER)] = TCA_MAX, 115 [RTM_FAM(RTM_NEWTFILTER)] = TCA_MAX,
115 [RTM_FAM(RTM_NEWACTION)] = TCAA_MAX, 116 [RTM_FAM(RTM_NEWACTION)] = TCAA_MAX,
117 [RTM_FAM(RTM_NEWNEIGHTBL)] = NDTA_MAX,
116}; 118};
117 119
118void __rta_fill(struct sk_buff *skb, int attrtype, int attrlen, const void *data) 120void __rta_fill(struct sk_buff *skb, int attrtype, int attrlen, const void *data)
@@ -176,14 +178,14 @@ rtattr_failure:
176 178
177 179
178static int rtnetlink_fill_ifinfo(struct sk_buff *skb, struct net_device *dev, 180static int rtnetlink_fill_ifinfo(struct sk_buff *skb, struct net_device *dev,
179 int type, u32 pid, u32 seq, u32 change) 181 int type, u32 pid, u32 seq, u32 change,
182 unsigned int flags)
180{ 183{
181 struct ifinfomsg *r; 184 struct ifinfomsg *r;
182 struct nlmsghdr *nlh; 185 struct nlmsghdr *nlh;
183 unsigned char *b = skb->tail; 186 unsigned char *b = skb->tail;
184 187
185 nlh = NLMSG_PUT(skb, pid, seq, type, sizeof(*r)); 188 nlh = NLMSG_NEW(skb, pid, seq, type, sizeof(*r), flags);
186 if (pid) nlh->nlmsg_flags |= NLM_F_MULTI;
187 r = NLMSG_DATA(nlh); 189 r = NLMSG_DATA(nlh);
188 r->ifi_family = AF_UNSPEC; 190 r->ifi_family = AF_UNSPEC;
189 r->ifi_type = dev->type; 191 r->ifi_type = dev->type;
@@ -273,7 +275,10 @@ static int rtnetlink_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *c
273 for (dev=dev_base, idx=0; dev; dev = dev->next, idx++) { 275 for (dev=dev_base, idx=0; dev; dev = dev->next, idx++) {
274 if (idx < s_idx) 276 if (idx < s_idx)
275 continue; 277 continue;
276 if (rtnetlink_fill_ifinfo(skb, dev, RTM_NEWLINK, NETLINK_CB(cb->skb).pid, cb->nlh->nlmsg_seq, 0) <= 0) 278 if (rtnetlink_fill_ifinfo(skb, dev, RTM_NEWLINK,
279 NETLINK_CB(cb->skb).pid,
280 cb->nlh->nlmsg_seq, 0,
281 NLM_F_MULTI) <= 0)
277 break; 282 break;
278 } 283 }
279 read_unlock(&dev_base_lock); 284 read_unlock(&dev_base_lock);
@@ -447,7 +452,7 @@ void rtmsg_ifinfo(int type, struct net_device *dev, unsigned change)
447 if (!skb) 452 if (!skb)
448 return; 453 return;
449 454
450 if (rtnetlink_fill_ifinfo(skb, dev, type, 0, 0, change) < 0) { 455 if (rtnetlink_fill_ifinfo(skb, dev, type, current->pid, 0, change, 0) < 0) {
451 kfree_skb(skb); 456 kfree_skb(skb);
452 return; 457 return;
453 } 458 }
@@ -649,14 +654,16 @@ static void rtnetlink_rcv(struct sock *sk, int len)
649 654
650static struct rtnetlink_link link_rtnetlink_table[RTM_NR_MSGTYPES] = 655static struct rtnetlink_link link_rtnetlink_table[RTM_NR_MSGTYPES] =
651{ 656{
652 [RTM_GETLINK - RTM_BASE] = { .dumpit = rtnetlink_dump_ifinfo }, 657 [RTM_GETLINK - RTM_BASE] = { .dumpit = rtnetlink_dump_ifinfo },
653 [RTM_SETLINK - RTM_BASE] = { .doit = do_setlink }, 658 [RTM_SETLINK - RTM_BASE] = { .doit = do_setlink },
654 [RTM_GETADDR - RTM_BASE] = { .dumpit = rtnetlink_dump_all }, 659 [RTM_GETADDR - RTM_BASE] = { .dumpit = rtnetlink_dump_all },
655 [RTM_GETROUTE - RTM_BASE] = { .dumpit = rtnetlink_dump_all }, 660 [RTM_GETROUTE - RTM_BASE] = { .dumpit = rtnetlink_dump_all },
656 [RTM_NEWNEIGH - RTM_BASE] = { .doit = neigh_add }, 661 [RTM_NEWNEIGH - RTM_BASE] = { .doit = neigh_add },
657 [RTM_DELNEIGH - RTM_BASE] = { .doit = neigh_delete }, 662 [RTM_DELNEIGH - RTM_BASE] = { .doit = neigh_delete },
658 [RTM_GETNEIGH - RTM_BASE] = { .dumpit = neigh_dump_info }, 663 [RTM_GETNEIGH - RTM_BASE] = { .dumpit = neigh_dump_info },
659 [RTM_GETRULE - RTM_BASE] = { .dumpit = rtnetlink_dump_all }, 664 [RTM_GETRULE - RTM_BASE] = { .dumpit = rtnetlink_dump_all },
665 [RTM_GETNEIGHTBL - RTM_BASE] = { .dumpit = neightbl_dump_info },
666 [RTM_SETNEIGHTBL - RTM_BASE] = { .doit = neightbl_set },
660}; 667};
661 668
662static int rtnetlink_event(struct notifier_block *this, unsigned long event, void *ptr) 669static int rtnetlink_event(struct notifier_block *this, unsigned long event, void *ptr)
diff --git a/net/core/sock.c b/net/core/sock.c
index 96e00b08698f..a6ec3ada7f9e 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -118,6 +118,7 @@
118#include <linux/netdevice.h> 118#include <linux/netdevice.h>
119#include <net/protocol.h> 119#include <net/protocol.h>
120#include <linux/skbuff.h> 120#include <linux/skbuff.h>
121#include <net/request_sock.h>
121#include <net/sock.h> 122#include <net/sock.h>
122#include <net/xfrm.h> 123#include <net/xfrm.h>
123#include <linux/ipsec.h> 124#include <linux/ipsec.h>
@@ -1363,6 +1364,7 @@ static LIST_HEAD(proto_list);
1363 1364
1364int proto_register(struct proto *prot, int alloc_slab) 1365int proto_register(struct proto *prot, int alloc_slab)
1365{ 1366{
1367 char *request_sock_slab_name;
1366 int rc = -ENOBUFS; 1368 int rc = -ENOBUFS;
1367 1369
1368 if (alloc_slab) { 1370 if (alloc_slab) {
@@ -1374,6 +1376,25 @@ int proto_register(struct proto *prot, int alloc_slab)
1374 prot->name); 1376 prot->name);
1375 goto out; 1377 goto out;
1376 } 1378 }
1379
1380 if (prot->rsk_prot != NULL) {
1381 static const char mask[] = "request_sock_%s";
1382
1383 request_sock_slab_name = kmalloc(strlen(prot->name) + sizeof(mask) - 1, GFP_KERNEL);
1384 if (request_sock_slab_name == NULL)
1385 goto out_free_sock_slab;
1386
1387 sprintf(request_sock_slab_name, mask, prot->name);
1388 prot->rsk_prot->slab = kmem_cache_create(request_sock_slab_name,
1389 prot->rsk_prot->obj_size, 0,
1390 SLAB_HWCACHE_ALIGN, NULL, NULL);
1391
1392 if (prot->rsk_prot->slab == NULL) {
1393 printk(KERN_CRIT "%s: Can't create request sock SLAB cache!\n",
1394 prot->name);
1395 goto out_free_request_sock_slab_name;
1396 }
1397 }
1377 } 1398 }
1378 1399
1379 write_lock(&proto_list_lock); 1400 write_lock(&proto_list_lock);
@@ -1382,6 +1403,12 @@ int proto_register(struct proto *prot, int alloc_slab)
1382 rc = 0; 1403 rc = 0;
1383out: 1404out:
1384 return rc; 1405 return rc;
1406out_free_request_sock_slab_name:
1407 kfree(request_sock_slab_name);
1408out_free_sock_slab:
1409 kmem_cache_destroy(prot->slab);
1410 prot->slab = NULL;
1411 goto out;
1385} 1412}
1386 1413
1387EXPORT_SYMBOL(proto_register); 1414EXPORT_SYMBOL(proto_register);
@@ -1395,6 +1422,14 @@ void proto_unregister(struct proto *prot)
1395 prot->slab = NULL; 1422 prot->slab = NULL;
1396 } 1423 }
1397 1424
1425 if (prot->rsk_prot != NULL && prot->rsk_prot->slab != NULL) {
1426 const char *name = kmem_cache_name(prot->rsk_prot->slab);
1427
1428 kmem_cache_destroy(prot->rsk_prot->slab);
1429 kfree(name);
1430 prot->rsk_prot->slab = NULL;
1431 }
1432
1398 list_del(&prot->node); 1433 list_del(&prot->node);
1399 write_unlock(&proto_list_lock); 1434 write_unlock(&proto_list_lock);
1400} 1435}
diff --git a/net/core/wireless.c b/net/core/wireless.c
index 750cc5daeb03..b2fe378dfbf8 100644
--- a/net/core/wireless.c
+++ b/net/core/wireless.c
@@ -2,7 +2,7 @@
2 * This file implement the Wireless Extensions APIs. 2 * This file implement the Wireless Extensions APIs.
3 * 3 *
4 * Authors : Jean Tourrilhes - HPL - <jt@hpl.hp.com> 4 * Authors : Jean Tourrilhes - HPL - <jt@hpl.hp.com>
5 * Copyright (c) 1997-2004 Jean Tourrilhes, All Rights Reserved. 5 * Copyright (c) 1997-2005 Jean Tourrilhes, All Rights Reserved.
6 * 6 *
7 * (As all part of the Linux kernel, this file is GPL) 7 * (As all part of the Linux kernel, this file is GPL)
8 */ 8 */
@@ -187,6 +187,12 @@ static const struct iw_ioctl_description standard_ioctl[] = {
187 .header_type = IW_HEADER_TYPE_ADDR, 187 .header_type = IW_HEADER_TYPE_ADDR,
188 .flags = IW_DESCR_FLAG_DUMP, 188 .flags = IW_DESCR_FLAG_DUMP,
189 }, 189 },
190 [SIOCSIWMLME - SIOCIWFIRST] = {
191 .header_type = IW_HEADER_TYPE_POINT,
192 .token_size = 1,
193 .min_tokens = sizeof(struct iw_mlme),
194 .max_tokens = sizeof(struct iw_mlme),
195 },
190 [SIOCGIWAPLIST - SIOCIWFIRST] = { 196 [SIOCGIWAPLIST - SIOCIWFIRST] = {
191 .header_type = IW_HEADER_TYPE_POINT, 197 .header_type = IW_HEADER_TYPE_POINT,
192 .token_size = sizeof(struct sockaddr) + 198 .token_size = sizeof(struct sockaddr) +
@@ -195,7 +201,10 @@ static const struct iw_ioctl_description standard_ioctl[] = {
195 .flags = IW_DESCR_FLAG_NOMAX, 201 .flags = IW_DESCR_FLAG_NOMAX,
196 }, 202 },
197 [SIOCSIWSCAN - SIOCIWFIRST] = { 203 [SIOCSIWSCAN - SIOCIWFIRST] = {
198 .header_type = IW_HEADER_TYPE_PARAM, 204 .header_type = IW_HEADER_TYPE_POINT,
205 .token_size = 1,
206 .min_tokens = 0,
207 .max_tokens = sizeof(struct iw_scan_req),
199 }, 208 },
200 [SIOCGIWSCAN - SIOCIWFIRST] = { 209 [SIOCGIWSCAN - SIOCIWFIRST] = {
201 .header_type = IW_HEADER_TYPE_POINT, 210 .header_type = IW_HEADER_TYPE_POINT,
@@ -273,6 +282,42 @@ static const struct iw_ioctl_description standard_ioctl[] = {
273 [SIOCGIWPOWER - SIOCIWFIRST] = { 282 [SIOCGIWPOWER - SIOCIWFIRST] = {
274 .header_type = IW_HEADER_TYPE_PARAM, 283 .header_type = IW_HEADER_TYPE_PARAM,
275 }, 284 },
285 [SIOCSIWGENIE - SIOCIWFIRST] = {
286 .header_type = IW_HEADER_TYPE_POINT,
287 .token_size = 1,
288 .max_tokens = IW_GENERIC_IE_MAX,
289 },
290 [SIOCGIWGENIE - SIOCIWFIRST] = {
291 .header_type = IW_HEADER_TYPE_POINT,
292 .token_size = 1,
293 .max_tokens = IW_GENERIC_IE_MAX,
294 },
295 [SIOCSIWAUTH - SIOCIWFIRST] = {
296 .header_type = IW_HEADER_TYPE_PARAM,
297 },
298 [SIOCGIWAUTH - SIOCIWFIRST] = {
299 .header_type = IW_HEADER_TYPE_PARAM,
300 },
301 [SIOCSIWENCODEEXT - SIOCIWFIRST] = {
302 .header_type = IW_HEADER_TYPE_POINT,
303 .token_size = 1,
304 .min_tokens = sizeof(struct iw_encode_ext),
305 .max_tokens = sizeof(struct iw_encode_ext) +
306 IW_ENCODING_TOKEN_MAX,
307 },
308 [SIOCGIWENCODEEXT - SIOCIWFIRST] = {
309 .header_type = IW_HEADER_TYPE_POINT,
310 .token_size = 1,
311 .min_tokens = sizeof(struct iw_encode_ext),
312 .max_tokens = sizeof(struct iw_encode_ext) +
313 IW_ENCODING_TOKEN_MAX,
314 },
315 [SIOCSIWPMKSA - SIOCIWFIRST] = {
316 .header_type = IW_HEADER_TYPE_POINT,
317 .token_size = 1,
318 .min_tokens = sizeof(struct iw_pmksa),
319 .max_tokens = sizeof(struct iw_pmksa),
320 },
276}; 321};
277static const int standard_ioctl_num = (sizeof(standard_ioctl) / 322static const int standard_ioctl_num = (sizeof(standard_ioctl) /
278 sizeof(struct iw_ioctl_description)); 323 sizeof(struct iw_ioctl_description));
@@ -299,6 +344,31 @@ static const struct iw_ioctl_description standard_event[] = {
299 [IWEVEXPIRED - IWEVFIRST] = { 344 [IWEVEXPIRED - IWEVFIRST] = {
300 .header_type = IW_HEADER_TYPE_ADDR, 345 .header_type = IW_HEADER_TYPE_ADDR,
301 }, 346 },
347 [IWEVGENIE - IWEVFIRST] = {
348 .header_type = IW_HEADER_TYPE_POINT,
349 .token_size = 1,
350 .max_tokens = IW_GENERIC_IE_MAX,
351 },
352 [IWEVMICHAELMICFAILURE - IWEVFIRST] = {
353 .header_type = IW_HEADER_TYPE_POINT,
354 .token_size = 1,
355 .max_tokens = sizeof(struct iw_michaelmicfailure),
356 },
357 [IWEVASSOCREQIE - IWEVFIRST] = {
358 .header_type = IW_HEADER_TYPE_POINT,
359 .token_size = 1,
360 .max_tokens = IW_GENERIC_IE_MAX,
361 },
362 [IWEVASSOCRESPIE - IWEVFIRST] = {
363 .header_type = IW_HEADER_TYPE_POINT,
364 .token_size = 1,
365 .max_tokens = IW_GENERIC_IE_MAX,
366 },
367 [IWEVPMKIDCAND - IWEVFIRST] = {
368 .header_type = IW_HEADER_TYPE_POINT,
369 .token_size = 1,
370 .max_tokens = sizeof(struct iw_pmkid_cand),
371 },
302}; 372};
303static const int standard_event_num = (sizeof(standard_event) / 373static const int standard_event_num = (sizeof(standard_event) /
304 sizeof(struct iw_ioctl_description)); 374 sizeof(struct iw_ioctl_description));
diff --git a/net/decnet/dn_dev.c b/net/decnet/dn_dev.c
index ee7bf46eb78a..00233ecbc9cb 100644
--- a/net/decnet/dn_dev.c
+++ b/net/decnet/dn_dev.c
@@ -716,13 +716,13 @@ static int dn_dev_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh, void *a
716} 716}
717 717
718static int dn_dev_fill_ifaddr(struct sk_buff *skb, struct dn_ifaddr *ifa, 718static int dn_dev_fill_ifaddr(struct sk_buff *skb, struct dn_ifaddr *ifa,
719 u32 pid, u32 seq, int event) 719 u32 pid, u32 seq, int event, unsigned int flags)
720{ 720{
721 struct ifaddrmsg *ifm; 721 struct ifaddrmsg *ifm;
722 struct nlmsghdr *nlh; 722 struct nlmsghdr *nlh;
723 unsigned char *b = skb->tail; 723 unsigned char *b = skb->tail;
724 724
725 nlh = NLMSG_PUT(skb, pid, seq, event, sizeof(*ifm)); 725 nlh = NLMSG_NEW(skb, pid, seq, event, sizeof(*ifm), flags);
726 ifm = NLMSG_DATA(nlh); 726 ifm = NLMSG_DATA(nlh);
727 727
728 ifm->ifa_family = AF_DECnet; 728 ifm->ifa_family = AF_DECnet;
@@ -755,7 +755,7 @@ static void rtmsg_ifa(int event, struct dn_ifaddr *ifa)
755 netlink_set_err(rtnl, 0, RTMGRP_DECnet_IFADDR, ENOBUFS); 755 netlink_set_err(rtnl, 0, RTMGRP_DECnet_IFADDR, ENOBUFS);
756 return; 756 return;
757 } 757 }
758 if (dn_dev_fill_ifaddr(skb, ifa, 0, 0, event) < 0) { 758 if (dn_dev_fill_ifaddr(skb, ifa, 0, 0, event, 0) < 0) {
759 kfree_skb(skb); 759 kfree_skb(skb);
760 netlink_set_err(rtnl, 0, RTMGRP_DECnet_IFADDR, EINVAL); 760 netlink_set_err(rtnl, 0, RTMGRP_DECnet_IFADDR, EINVAL);
761 return; 761 return;
@@ -790,7 +790,8 @@ static int dn_dev_dump_ifaddr(struct sk_buff *skb, struct netlink_callback *cb)
790 if (dn_dev_fill_ifaddr(skb, ifa, 790 if (dn_dev_fill_ifaddr(skb, ifa,
791 NETLINK_CB(cb->skb).pid, 791 NETLINK_CB(cb->skb).pid,
792 cb->nlh->nlmsg_seq, 792 cb->nlh->nlmsg_seq,
793 RTM_NEWADDR) <= 0) 793 RTM_NEWADDR,
794 NLM_F_MULTI) <= 0)
794 goto done; 795 goto done;
795 } 796 }
796 } 797 }
diff --git a/net/decnet/dn_neigh.c b/net/decnet/dn_neigh.c
index f6dfe96f45b7..f32dba9e26fe 100644
--- a/net/decnet/dn_neigh.c
+++ b/net/decnet/dn_neigh.c
@@ -101,7 +101,6 @@ struct neigh_table dn_neigh_table = {
101 .id = "dn_neigh_cache", 101 .id = "dn_neigh_cache",
102 .parms ={ 102 .parms ={
103 .tbl = &dn_neigh_table, 103 .tbl = &dn_neigh_table,
104 .entries = 0,
105 .base_reachable_time = 30 * HZ, 104 .base_reachable_time = 30 * HZ,
106 .retrans_time = 1 * HZ, 105 .retrans_time = 1 * HZ,
107 .gc_staletime = 60 * HZ, 106 .gc_staletime = 60 * HZ,
diff --git a/net/decnet/dn_route.c b/net/decnet/dn_route.c
index 1e7b5c3ea215..2399fa8a3f86 100644
--- a/net/decnet/dn_route.c
+++ b/net/decnet/dn_route.c
@@ -1465,7 +1465,8 @@ int dn_route_input(struct sk_buff *skb)
1465 return dn_route_input_slow(skb); 1465 return dn_route_input_slow(skb);
1466} 1466}
1467 1467
1468static int dn_rt_fill_info(struct sk_buff *skb, u32 pid, u32 seq, int event, int nowait) 1468static int dn_rt_fill_info(struct sk_buff *skb, u32 pid, u32 seq,
1469 int event, int nowait, unsigned int flags)
1469{ 1470{
1470 struct dn_route *rt = (struct dn_route *)skb->dst; 1471 struct dn_route *rt = (struct dn_route *)skb->dst;
1471 struct rtmsg *r; 1472 struct rtmsg *r;
@@ -1473,9 +1474,8 @@ static int dn_rt_fill_info(struct sk_buff *skb, u32 pid, u32 seq, int event, int
1473 unsigned char *b = skb->tail; 1474 unsigned char *b = skb->tail;
1474 struct rta_cacheinfo ci; 1475 struct rta_cacheinfo ci;
1475 1476
1476 nlh = NLMSG_PUT(skb, pid, seq, event, sizeof(*r)); 1477 nlh = NLMSG_NEW(skb, pid, seq, event, sizeof(*r), flags);
1477 r = NLMSG_DATA(nlh); 1478 r = NLMSG_DATA(nlh);
1478 nlh->nlmsg_flags = (nowait && pid) ? NLM_F_MULTI : 0;
1479 r->rtm_family = AF_DECnet; 1479 r->rtm_family = AF_DECnet;
1480 r->rtm_dst_len = 16; 1480 r->rtm_dst_len = 16;
1481 r->rtm_src_len = 0; 1481 r->rtm_src_len = 0;
@@ -1596,7 +1596,7 @@ int dn_cache_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh, void *arg)
1596 1596
1597 NETLINK_CB(skb).dst_pid = NETLINK_CB(in_skb).pid; 1597 NETLINK_CB(skb).dst_pid = NETLINK_CB(in_skb).pid;
1598 1598
1599 err = dn_rt_fill_info(skb, NETLINK_CB(in_skb).pid, nlh->nlmsg_seq, RTM_NEWROUTE, 0); 1599 err = dn_rt_fill_info(skb, NETLINK_CB(in_skb).pid, nlh->nlmsg_seq, RTM_NEWROUTE, 0, 0);
1600 1600
1601 if (err == 0) 1601 if (err == 0)
1602 goto out_free; 1602 goto out_free;
@@ -1644,7 +1644,8 @@ int dn_cache_dump(struct sk_buff *skb, struct netlink_callback *cb)
1644 continue; 1644 continue;
1645 skb->dst = dst_clone(&rt->u.dst); 1645 skb->dst = dst_clone(&rt->u.dst);
1646 if (dn_rt_fill_info(skb, NETLINK_CB(cb->skb).pid, 1646 if (dn_rt_fill_info(skb, NETLINK_CB(cb->skb).pid,
1647 cb->nlh->nlmsg_seq, RTM_NEWROUTE, 1) <= 0) { 1647 cb->nlh->nlmsg_seq, RTM_NEWROUTE,
1648 1, NLM_F_MULTI) <= 0) {
1648 dst_release(xchg(&skb->dst, NULL)); 1649 dst_release(xchg(&skb->dst, NULL));
1649 rcu_read_unlock_bh(); 1650 rcu_read_unlock_bh();
1650 goto done; 1651 goto done;
diff --git a/net/decnet/dn_rules.c b/net/decnet/dn_rules.c
index 597587d170d8..1060de70bc0c 100644
--- a/net/decnet/dn_rules.c
+++ b/net/decnet/dn_rules.c
@@ -342,14 +342,15 @@ static struct notifier_block dn_fib_rules_notifier = {
342 .notifier_call = dn_fib_rules_event, 342 .notifier_call = dn_fib_rules_event,
343}; 343};
344 344
345static int dn_fib_fill_rule(struct sk_buff *skb, struct dn_fib_rule *r, struct netlink_callback *cb) 345static int dn_fib_fill_rule(struct sk_buff *skb, struct dn_fib_rule *r,
346 struct netlink_callback *cb, unsigned int flags)
346{ 347{
347 struct rtmsg *rtm; 348 struct rtmsg *rtm;
348 struct nlmsghdr *nlh; 349 struct nlmsghdr *nlh;
349 unsigned char *b = skb->tail; 350 unsigned char *b = skb->tail;
350 351
351 352
352 nlh = NLMSG_PUT(skb, NETLINK_CREDS(cb->skb)->pid, cb->nlh->nlmsg_seq, RTM_NEWRULE, sizeof(*rtm)); 353 nlh = NLMSG_NEW_ANSWER(skb, cb, RTM_NEWRULE, sizeof(*rtm), flags);
353 rtm = NLMSG_DATA(nlh); 354 rtm = NLMSG_DATA(nlh);
354 rtm->rtm_family = AF_DECnet; 355 rtm->rtm_family = AF_DECnet;
355 rtm->rtm_dst_len = r->r_dst_len; 356 rtm->rtm_dst_len = r->r_dst_len;
@@ -394,7 +395,7 @@ int dn_fib_dump_rules(struct sk_buff *skb, struct netlink_callback *cb)
394 for(r = dn_fib_rules, idx = 0; r; r = r->r_next, idx++) { 395 for(r = dn_fib_rules, idx = 0; r; r = r->r_next, idx++) {
395 if (idx < s_idx) 396 if (idx < s_idx)
396 continue; 397 continue;
397 if (dn_fib_fill_rule(skb, r, cb) < 0) 398 if (dn_fib_fill_rule(skb, r, cb, NLM_F_MULTI) < 0)
398 break; 399 break;
399 } 400 }
400 read_unlock(&dn_fib_rules_lock); 401 read_unlock(&dn_fib_rules_lock);
diff --git a/net/decnet/dn_table.c b/net/decnet/dn_table.c
index dad5603912be..28ba5777a25a 100644
--- a/net/decnet/dn_table.c
+++ b/net/decnet/dn_table.c
@@ -270,13 +270,13 @@ static int dn_fib_nh_match(struct rtmsg *r, struct nlmsghdr *nlh, struct dn_kern
270 270
271static int dn_fib_dump_info(struct sk_buff *skb, u32 pid, u32 seq, int event, 271static int dn_fib_dump_info(struct sk_buff *skb, u32 pid, u32 seq, int event,
272 u8 tb_id, u8 type, u8 scope, void *dst, int dst_len, 272 u8 tb_id, u8 type, u8 scope, void *dst, int dst_len,
273 struct dn_fib_info *fi) 273 struct dn_fib_info *fi, unsigned int flags)
274{ 274{
275 struct rtmsg *rtm; 275 struct rtmsg *rtm;
276 struct nlmsghdr *nlh; 276 struct nlmsghdr *nlh;
277 unsigned char *b = skb->tail; 277 unsigned char *b = skb->tail;
278 278
279 nlh = NLMSG_PUT(skb, pid, seq, event, sizeof(*rtm)); 279 nlh = NLMSG_NEW(skb, pid, seq, event, sizeof(*rtm), flags);
280 rtm = NLMSG_DATA(nlh); 280 rtm = NLMSG_DATA(nlh);
281 rtm->rtm_family = AF_DECnet; 281 rtm->rtm_family = AF_DECnet;
282 rtm->rtm_dst_len = dst_len; 282 rtm->rtm_dst_len = dst_len;
@@ -345,7 +345,7 @@ static void dn_rtmsg_fib(int event, struct dn_fib_node *f, int z, int tb_id,
345 345
346 if (dn_fib_dump_info(skb, pid, nlh->nlmsg_seq, event, tb_id, 346 if (dn_fib_dump_info(skb, pid, nlh->nlmsg_seq, event, tb_id,
347 f->fn_type, f->fn_scope, &f->fn_key, z, 347 f->fn_type, f->fn_scope, &f->fn_key, z,
348 DN_FIB_INFO(f)) < 0) { 348 DN_FIB_INFO(f), 0) < 0) {
349 kfree_skb(skb); 349 kfree_skb(skb);
350 return; 350 return;
351 } 351 }
@@ -377,7 +377,7 @@ static __inline__ int dn_hash_dump_bucket(struct sk_buff *skb,
377 tb->n, 377 tb->n,
378 (f->fn_state & DN_S_ZOMBIE) ? 0 : f->fn_type, 378 (f->fn_state & DN_S_ZOMBIE) ? 0 : f->fn_type,
379 f->fn_scope, &f->fn_key, dz->dz_order, 379 f->fn_scope, &f->fn_key, dz->dz_order,
380 f->fn_info) < 0) { 380 f->fn_info, NLM_F_MULTI) < 0) {
381 cb->args[3] = i; 381 cb->args[3] = i;
382 return -1; 382 return -1;
383 } 383 }
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index b3cb49ce5fad..03942f133944 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -1181,6 +1181,7 @@ EXPORT_SYMBOL(inet_stream_connect);
1181EXPORT_SYMBOL(inet_stream_ops); 1181EXPORT_SYMBOL(inet_stream_ops);
1182EXPORT_SYMBOL(inet_unregister_protosw); 1182EXPORT_SYMBOL(inet_unregister_protosw);
1183EXPORT_SYMBOL(net_statistics); 1183EXPORT_SYMBOL(net_statistics);
1184EXPORT_SYMBOL(sysctl_ip_nonlocal_bind);
1184 1185
1185#ifdef INET_REFCNT_DEBUG 1186#ifdef INET_REFCNT_DEBUG
1186EXPORT_SYMBOL(inet_sock_nr); 1187EXPORT_SYMBOL(inet_sock_nr);
diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c
index 478a30179a52..650dcb12d9a1 100644
--- a/net/ipv4/devinet.c
+++ b/net/ipv4/devinet.c
@@ -1030,14 +1030,13 @@ static struct notifier_block ip_netdev_notifier = {
1030}; 1030};
1031 1031
1032static int inet_fill_ifaddr(struct sk_buff *skb, struct in_ifaddr *ifa, 1032static int inet_fill_ifaddr(struct sk_buff *skb, struct in_ifaddr *ifa,
1033 u32 pid, u32 seq, int event) 1033 u32 pid, u32 seq, int event, unsigned int flags)
1034{ 1034{
1035 struct ifaddrmsg *ifm; 1035 struct ifaddrmsg *ifm;
1036 struct nlmsghdr *nlh; 1036 struct nlmsghdr *nlh;
1037 unsigned char *b = skb->tail; 1037 unsigned char *b = skb->tail;
1038 1038
1039 nlh = NLMSG_PUT(skb, pid, seq, event, sizeof(*ifm)); 1039 nlh = NLMSG_NEW(skb, pid, seq, event, sizeof(*ifm), flags);
1040 if (pid) nlh->nlmsg_flags |= NLM_F_MULTI;
1041 ifm = NLMSG_DATA(nlh); 1040 ifm = NLMSG_DATA(nlh);
1042 ifm->ifa_family = AF_INET; 1041 ifm->ifa_family = AF_INET;
1043 ifm->ifa_prefixlen = ifa->ifa_prefixlen; 1042 ifm->ifa_prefixlen = ifa->ifa_prefixlen;
@@ -1090,7 +1089,7 @@ static int inet_dump_ifaddr(struct sk_buff *skb, struct netlink_callback *cb)
1090 continue; 1089 continue;
1091 if (inet_fill_ifaddr(skb, ifa, NETLINK_CB(cb->skb).pid, 1090 if (inet_fill_ifaddr(skb, ifa, NETLINK_CB(cb->skb).pid,
1092 cb->nlh->nlmsg_seq, 1091 cb->nlh->nlmsg_seq,
1093 RTM_NEWADDR) <= 0) { 1092 RTM_NEWADDR, NLM_F_MULTI) <= 0) {
1094 rcu_read_unlock(); 1093 rcu_read_unlock();
1095 goto done; 1094 goto done;
1096 } 1095 }
@@ -1113,7 +1112,7 @@ static void rtmsg_ifa(int event, struct in_ifaddr* ifa)
1113 1112
1114 if (!skb) 1113 if (!skb)
1115 netlink_set_err(rtnl, 0, RTMGRP_IPV4_IFADDR, ENOBUFS); 1114 netlink_set_err(rtnl, 0, RTMGRP_IPV4_IFADDR, ENOBUFS);
1116 else if (inet_fill_ifaddr(skb, ifa, 0, 0, event) < 0) { 1115 else if (inet_fill_ifaddr(skb, ifa, current->pid, 0, event, 0) < 0) {
1117 kfree_skb(skb); 1116 kfree_skb(skb);
1118 netlink_set_err(rtnl, 0, RTMGRP_IPV4_IFADDR, EINVAL); 1117 netlink_set_err(rtnl, 0, RTMGRP_IPV4_IFADDR, EINVAL);
1119 } else { 1118 } else {
diff --git a/net/ipv4/fib_hash.c b/net/ipv4/fib_hash.c
index 6506dcc01b46..b10d6bb5ef3d 100644
--- a/net/ipv4/fib_hash.c
+++ b/net/ipv4/fib_hash.c
@@ -703,7 +703,8 @@ fn_hash_dump_bucket(struct sk_buff *skb, struct netlink_callback *cb,
703 &f->fn_key, 703 &f->fn_key,
704 fz->fz_order, 704 fz->fz_order,
705 fa->fa_tos, 705 fa->fa_tos,
706 fa->fa_info) < 0) { 706 fa->fa_info,
707 NLM_F_MULTI) < 0) {
707 cb->args[3] = i; 708 cb->args[3] = i;
708 return -1; 709 return -1;
709 } 710 }
diff --git a/net/ipv4/fib_lookup.h b/net/ipv4/fib_lookup.h
index ac4485f75e97..b729d97cfa93 100644
--- a/net/ipv4/fib_lookup.h
+++ b/net/ipv4/fib_lookup.h
@@ -30,7 +30,8 @@ extern int fib_nh_match(struct rtmsg *r, struct nlmsghdr *,
30 struct kern_rta *rta, struct fib_info *fi); 30 struct kern_rta *rta, struct fib_info *fi);
31extern int fib_dump_info(struct sk_buff *skb, u32 pid, u32 seq, int event, 31extern int fib_dump_info(struct sk_buff *skb, u32 pid, u32 seq, int event,
32 u8 tb_id, u8 type, u8 scope, void *dst, 32 u8 tb_id, u8 type, u8 scope, void *dst,
33 int dst_len, u8 tos, struct fib_info *fi); 33 int dst_len, u8 tos, struct fib_info *fi,
34 unsigned int);
34extern void rtmsg_fib(int event, u32 key, struct fib_alias *fa, 35extern void rtmsg_fib(int event, u32 key, struct fib_alias *fa,
35 int z, int tb_id, 36 int z, int tb_id,
36 struct nlmsghdr *n, struct netlink_skb_parms *req); 37 struct nlmsghdr *n, struct netlink_skb_parms *req);
diff --git a/net/ipv4/fib_rules.c b/net/ipv4/fib_rules.c
index 39d0aadb9a2a..0b298bbc1518 100644
--- a/net/ipv4/fib_rules.c
+++ b/net/ipv4/fib_rules.c
@@ -367,13 +367,14 @@ static struct notifier_block fib_rules_notifier = {
367 367
368static __inline__ int inet_fill_rule(struct sk_buff *skb, 368static __inline__ int inet_fill_rule(struct sk_buff *skb,
369 struct fib_rule *r, 369 struct fib_rule *r,
370 struct netlink_callback *cb) 370 struct netlink_callback *cb,
371 unsigned int flags)
371{ 372{
372 struct rtmsg *rtm; 373 struct rtmsg *rtm;
373 struct nlmsghdr *nlh; 374 struct nlmsghdr *nlh;
374 unsigned char *b = skb->tail; 375 unsigned char *b = skb->tail;
375 376
376 nlh = NLMSG_PUT(skb, NETLINK_CREDS(cb->skb)->pid, cb->nlh->nlmsg_seq, RTM_NEWRULE, sizeof(*rtm)); 377 nlh = NLMSG_NEW_ANSWER(skb, cb, RTM_NEWRULE, sizeof(*rtm), flags);
377 rtm = NLMSG_DATA(nlh); 378 rtm = NLMSG_DATA(nlh);
378 rtm->rtm_family = AF_INET; 379 rtm->rtm_family = AF_INET;
379 rtm->rtm_dst_len = r->r_dst_len; 380 rtm->rtm_dst_len = r->r_dst_len;
@@ -422,7 +423,7 @@ int inet_dump_rules(struct sk_buff *skb, struct netlink_callback *cb)
422 for (r=fib_rules, idx=0; r; r = r->r_next, idx++) { 423 for (r=fib_rules, idx=0; r; r = r->r_next, idx++) {
423 if (idx < s_idx) 424 if (idx < s_idx)
424 continue; 425 continue;
425 if (inet_fill_rule(skb, r, cb) < 0) 426 if (inet_fill_rule(skb, r, cb, NLM_F_MULTI) < 0)
426 break; 427 break;
427 } 428 }
428 read_unlock(&fib_rules_lock); 429 read_unlock(&fib_rules_lock);
diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c
index 029362d66135..c886b28ba9f5 100644
--- a/net/ipv4/fib_semantics.c
+++ b/net/ipv4/fib_semantics.c
@@ -276,7 +276,7 @@ void rtmsg_fib(int event, u32 key, struct fib_alias *fa,
276 struct nlmsghdr *n, struct netlink_skb_parms *req) 276 struct nlmsghdr *n, struct netlink_skb_parms *req)
277{ 277{
278 struct sk_buff *skb; 278 struct sk_buff *skb;
279 u32 pid = req ? req->pid : 0; 279 u32 pid = req ? req->pid : n->nlmsg_pid;
280 int size = NLMSG_SPACE(sizeof(struct rtmsg)+256); 280 int size = NLMSG_SPACE(sizeof(struct rtmsg)+256);
281 281
282 skb = alloc_skb(size, GFP_KERNEL); 282 skb = alloc_skb(size, GFP_KERNEL);
@@ -286,7 +286,7 @@ void rtmsg_fib(int event, u32 key, struct fib_alias *fa,
286 if (fib_dump_info(skb, pid, n->nlmsg_seq, event, tb_id, 286 if (fib_dump_info(skb, pid, n->nlmsg_seq, event, tb_id,
287 fa->fa_type, fa->fa_scope, &key, z, 287 fa->fa_type, fa->fa_scope, &key, z,
288 fa->fa_tos, 288 fa->fa_tos,
289 fa->fa_info) < 0) { 289 fa->fa_info, 0) < 0) {
290 kfree_skb(skb); 290 kfree_skb(skb);
291 return; 291 return;
292 } 292 }
@@ -932,13 +932,13 @@ u32 __fib_res_prefsrc(struct fib_result *res)
932int 932int
933fib_dump_info(struct sk_buff *skb, u32 pid, u32 seq, int event, 933fib_dump_info(struct sk_buff *skb, u32 pid, u32 seq, int event,
934 u8 tb_id, u8 type, u8 scope, void *dst, int dst_len, u8 tos, 934 u8 tb_id, u8 type, u8 scope, void *dst, int dst_len, u8 tos,
935 struct fib_info *fi) 935 struct fib_info *fi, unsigned int flags)
936{ 936{
937 struct rtmsg *rtm; 937 struct rtmsg *rtm;
938 struct nlmsghdr *nlh; 938 struct nlmsghdr *nlh;
939 unsigned char *b = skb->tail; 939 unsigned char *b = skb->tail;
940 940
941 nlh = NLMSG_PUT(skb, pid, seq, event, sizeof(*rtm)); 941 nlh = NLMSG_NEW(skb, pid, seq, event, sizeof(*rtm), flags);
942 rtm = NLMSG_DATA(nlh); 942 rtm = NLMSG_DATA(nlh);
943 rtm->rtm_family = AF_INET; 943 rtm->rtm_family = AF_INET;
944 rtm->rtm_dst_len = dst_len; 944 rtm->rtm_dst_len = dst_len;
@@ -1035,7 +1035,7 @@ fib_convert_rtentry(int cmd, struct nlmsghdr *nl, struct rtmsg *rtm,
1035 } 1035 }
1036 1036
1037 nl->nlmsg_flags = NLM_F_REQUEST; 1037 nl->nlmsg_flags = NLM_F_REQUEST;
1038 nl->nlmsg_pid = 0; 1038 nl->nlmsg_pid = current->pid;
1039 nl->nlmsg_seq = 0; 1039 nl->nlmsg_seq = 0;
1040 nl->nlmsg_len = NLMSG_LENGTH(sizeof(*rtm)); 1040 nl->nlmsg_len = NLMSG_LENGTH(sizeof(*rtm));
1041 if (cmd == SIOCDELRT) { 1041 if (cmd == SIOCDELRT) {
diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c
index 85bf0d3e294b..cb759484979d 100644
--- a/net/ipv4/icmp.c
+++ b/net/ipv4/icmp.c
@@ -207,6 +207,7 @@ int sysctl_icmp_ignore_bogus_error_responses;
207 207
208int sysctl_icmp_ratelimit = 1 * HZ; 208int sysctl_icmp_ratelimit = 1 * HZ;
209int sysctl_icmp_ratemask = 0x1818; 209int sysctl_icmp_ratemask = 0x1818;
210int sysctl_icmp_errors_use_inbound_ifaddr;
210 211
211/* 212/*
212 * ICMP control array. This specifies what to do with each ICMP. 213 * ICMP control array. This specifies what to do with each ICMP.
@@ -511,8 +512,12 @@ void icmp_send(struct sk_buff *skb_in, int type, int code, u32 info)
511 */ 512 */
512 513
513 saddr = iph->daddr; 514 saddr = iph->daddr;
514 if (!(rt->rt_flags & RTCF_LOCAL)) 515 if (!(rt->rt_flags & RTCF_LOCAL)) {
515 saddr = 0; 516 if (sysctl_icmp_errors_use_inbound_ifaddr)
517 saddr = inet_select_addr(skb_in->dev, 0, RT_SCOPE_LINK);
518 else
519 saddr = 0;
520 }
516 521
517 tos = icmp_pointers[type].error ? ((iph->tos & IPTOS_TOS_MASK) | 522 tos = icmp_pointers[type].error ? ((iph->tos & IPTOS_TOS_MASK) |
518 IPTOS_PREC_INTERNETCONTROL) : 523 IPTOS_PREC_INTERNETCONTROL) :
diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c
index 47012b93cad2..f8b172f89811 100644
--- a/net/ipv4/ip_sockglue.c
+++ b/net/ipv4/ip_sockglue.c
@@ -360,14 +360,14 @@ int ip_recv_error(struct sock *sk, struct msghdr *msg, int len)
360 err = copied; 360 err = copied;
361 361
362 /* Reset and regenerate socket error */ 362 /* Reset and regenerate socket error */
363 spin_lock_irq(&sk->sk_error_queue.lock); 363 spin_lock_bh(&sk->sk_error_queue.lock);
364 sk->sk_err = 0; 364 sk->sk_err = 0;
365 if ((skb2 = skb_peek(&sk->sk_error_queue)) != NULL) { 365 if ((skb2 = skb_peek(&sk->sk_error_queue)) != NULL) {
366 sk->sk_err = SKB_EXT_ERR(skb2)->ee.ee_errno; 366 sk->sk_err = SKB_EXT_ERR(skb2)->ee.ee_errno;
367 spin_unlock_irq(&sk->sk_error_queue.lock); 367 spin_unlock_bh(&sk->sk_error_queue.lock);
368 sk->sk_error_report(sk); 368 sk->sk_error_report(sk);
369 } else 369 } else
370 spin_unlock_irq(&sk->sk_error_queue.lock); 370 spin_unlock_bh(&sk->sk_error_queue.lock);
371 371
372out_free_skb: 372out_free_skb:
373 kfree_skb(skb); 373 kfree_skb(skb);
diff --git a/net/ipv4/ipvs/Makefile b/net/ipv4/ipvs/Makefile
index a788461a40c9..30e85de9ffff 100644
--- a/net/ipv4/ipvs/Makefile
+++ b/net/ipv4/ipvs/Makefile
@@ -11,7 +11,7 @@ ip_vs_proto-objs-$(CONFIG_IP_VS_PROTO_AH) += ip_vs_proto_ah.o
11 11
12ip_vs-objs := ip_vs_conn.o ip_vs_core.o ip_vs_ctl.o ip_vs_sched.o \ 12ip_vs-objs := ip_vs_conn.o ip_vs_core.o ip_vs_ctl.o ip_vs_sched.o \
13 ip_vs_xmit.o ip_vs_app.o ip_vs_sync.o \ 13 ip_vs_xmit.o ip_vs_app.o ip_vs_sync.o \
14 ip_vs_est.o ip_vs_proto.o ip_vs_proto_icmp.o \ 14 ip_vs_est.o ip_vs_proto.o \
15 $(ip_vs_proto-objs-y) 15 $(ip_vs_proto-objs-y)
16 16
17 17
diff --git a/net/ipv4/ipvs/ip_vs_proto.c b/net/ipv4/ipvs/ip_vs_proto.c
index 253c46252bd5..867d4e9c6594 100644
--- a/net/ipv4/ipvs/ip_vs_proto.c
+++ b/net/ipv4/ipvs/ip_vs_proto.c
@@ -216,9 +216,6 @@ int ip_vs_protocol_init(void)
216#ifdef CONFIG_IP_VS_PROTO_UDP 216#ifdef CONFIG_IP_VS_PROTO_UDP
217 REGISTER_PROTOCOL(&ip_vs_protocol_udp); 217 REGISTER_PROTOCOL(&ip_vs_protocol_udp);
218#endif 218#endif
219#ifdef CONFIG_IP_VS_PROTO_ICMP
220 REGISTER_PROTOCOL(&ip_vs_protocol_icmp);
221#endif
222#ifdef CONFIG_IP_VS_PROTO_AH 219#ifdef CONFIG_IP_VS_PROTO_AH
223 REGISTER_PROTOCOL(&ip_vs_protocol_ah); 220 REGISTER_PROTOCOL(&ip_vs_protocol_ah);
224#endif 221#endif
diff --git a/net/ipv4/ipvs/ip_vs_proto_icmp.c b/net/ipv4/ipvs/ip_vs_proto_icmp.c
deleted file mode 100644
index 191e94aa1c1f..000000000000
--- a/net/ipv4/ipvs/ip_vs_proto_icmp.c
+++ /dev/null
@@ -1,182 +0,0 @@
1/*
2 * ip_vs_proto_icmp.c: ICMP load balancing support for IP Virtual Server
3 *
4 * Authors: Julian Anastasov <ja@ssi.bg>, March 2002
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * version 2 as published by the Free Software Foundation;
9 *
10 */
11
12#include <linux/module.h>
13#include <linux/kernel.h>
14#include <linux/icmp.h>
15#include <linux/netfilter.h>
16#include <linux/netfilter_ipv4.h>
17
18#include <net/ip_vs.h>
19
20
21static int icmp_timeouts[1] = { 1*60*HZ };
22
23static char * icmp_state_name_table[1] = { "ICMP" };
24
25static struct ip_vs_conn *
26icmp_conn_in_get(const struct sk_buff *skb,
27 struct ip_vs_protocol *pp,
28 const struct iphdr *iph,
29 unsigned int proto_off,
30 int inverse)
31{
32#if 0
33 struct ip_vs_conn *cp;
34
35 if (likely(!inverse)) {
36 cp = ip_vs_conn_in_get(iph->protocol,
37 iph->saddr, 0,
38 iph->daddr, 0);
39 } else {
40 cp = ip_vs_conn_in_get(iph->protocol,
41 iph->daddr, 0,
42 iph->saddr, 0);
43 }
44
45 return cp;
46
47#else
48 return NULL;
49#endif
50}
51
52static struct ip_vs_conn *
53icmp_conn_out_get(const struct sk_buff *skb,
54 struct ip_vs_protocol *pp,
55 const struct iphdr *iph,
56 unsigned int proto_off,
57 int inverse)
58{
59#if 0
60 struct ip_vs_conn *cp;
61
62 if (likely(!inverse)) {
63 cp = ip_vs_conn_out_get(iph->protocol,
64 iph->saddr, 0,
65 iph->daddr, 0);
66 } else {
67 cp = ip_vs_conn_out_get(IPPROTO_UDP,
68 iph->daddr, 0,
69 iph->saddr, 0);
70 }
71
72 return cp;
73#else
74 return NULL;
75#endif
76}
77
78static int
79icmp_conn_schedule(struct sk_buff *skb, struct ip_vs_protocol *pp,
80 int *verdict, struct ip_vs_conn **cpp)
81{
82 *verdict = NF_ACCEPT;
83 return 0;
84}
85
86static int
87icmp_csum_check(struct sk_buff *skb, struct ip_vs_protocol *pp)
88{
89 if (!(skb->nh.iph->frag_off & __constant_htons(IP_OFFSET))) {
90 if (skb->ip_summed != CHECKSUM_UNNECESSARY) {
91 if (ip_vs_checksum_complete(skb, skb->nh.iph->ihl * 4)) {
92 IP_VS_DBG_RL_PKT(0, pp, skb, 0, "Failed checksum for");
93 return 0;
94 }
95 }
96 }
97 return 1;
98}
99
100static void
101icmp_debug_packet(struct ip_vs_protocol *pp,
102 const struct sk_buff *skb,
103 int offset,
104 const char *msg)
105{
106 char buf[256];
107 struct iphdr _iph, *ih;
108
109 ih = skb_header_pointer(skb, offset, sizeof(_iph), &_iph);
110 if (ih == NULL)
111 sprintf(buf, "%s TRUNCATED", pp->name);
112 else if (ih->frag_off & __constant_htons(IP_OFFSET))
113 sprintf(buf, "%s %u.%u.%u.%u->%u.%u.%u.%u frag",
114 pp->name, NIPQUAD(ih->saddr),
115 NIPQUAD(ih->daddr));
116 else {
117 struct icmphdr _icmph, *ic;
118
119 ic = skb_header_pointer(skb, offset + ih->ihl*4,
120 sizeof(_icmph), &_icmph);
121 if (ic == NULL)
122 sprintf(buf, "%s TRUNCATED to %u bytes\n",
123 pp->name, skb->len - offset);
124 else
125 sprintf(buf, "%s %u.%u.%u.%u->%u.%u.%u.%u T:%d C:%d",
126 pp->name, NIPQUAD(ih->saddr),
127 NIPQUAD(ih->daddr),
128 ic->type, ic->code);
129 }
130 printk(KERN_DEBUG "IPVS: %s: %s\n", msg, buf);
131}
132
133static int
134icmp_state_transition(struct ip_vs_conn *cp, int direction,
135 const struct sk_buff *skb,
136 struct ip_vs_protocol *pp)
137{
138 cp->timeout = pp->timeout_table[IP_VS_ICMP_S_NORMAL];
139 return 1;
140}
141
142static int
143icmp_set_state_timeout(struct ip_vs_protocol *pp, char *sname, int to)
144{
145 int num;
146 char **names;
147
148 num = IP_VS_ICMP_S_LAST;
149 names = icmp_state_name_table;
150 return ip_vs_set_state_timeout(pp->timeout_table, num, names, sname, to);
151}
152
153
154static void icmp_init(struct ip_vs_protocol *pp)
155{
156 pp->timeout_table = icmp_timeouts;
157}
158
159static void icmp_exit(struct ip_vs_protocol *pp)
160{
161}
162
163struct ip_vs_protocol ip_vs_protocol_icmp = {
164 .name = "ICMP",
165 .protocol = IPPROTO_ICMP,
166 .dont_defrag = 0,
167 .init = icmp_init,
168 .exit = icmp_exit,
169 .conn_schedule = icmp_conn_schedule,
170 .conn_in_get = icmp_conn_in_get,
171 .conn_out_get = icmp_conn_out_get,
172 .snat_handler = NULL,
173 .dnat_handler = NULL,
174 .csum_check = icmp_csum_check,
175 .state_transition = icmp_state_transition,
176 .register_app = NULL,
177 .unregister_app = NULL,
178 .app_conn_bind = NULL,
179 .debug_packet = icmp_debug_packet,
180 .timeout_change = NULL,
181 .set_state_timeout = icmp_set_state_timeout,
182};
diff --git a/net/ipv4/multipath_drr.c b/net/ipv4/multipath_drr.c
index cf2e6bcf7973..c9cf8726051d 100644
--- a/net/ipv4/multipath_drr.c
+++ b/net/ipv4/multipath_drr.c
@@ -31,6 +31,7 @@
31#include <linux/igmp.h> 31#include <linux/igmp.h>
32#include <linux/proc_fs.h> 32#include <linux/proc_fs.h>
33#include <linux/seq_file.h> 33#include <linux/seq_file.h>
34#include <linux/module.h>
34#include <linux/mroute.h> 35#include <linux/mroute.h>
35#include <linux/init.h> 36#include <linux/init.h>
36#include <net/ip.h> 37#include <net/ip.h>
@@ -247,3 +248,4 @@ static void __exit drr_exit(void)
247 248
248module_init(drr_init); 249module_init(drr_init);
249module_exit(drr_exit); 250module_exit(drr_exit);
251MODULE_LICENSE("GPL");
diff --git a/net/ipv4/multipath_random.c b/net/ipv4/multipath_random.c
index 805a16e47de5..5249dbe7c559 100644
--- a/net/ipv4/multipath_random.c
+++ b/net/ipv4/multipath_random.c
@@ -31,6 +31,7 @@
31#include <linux/igmp.h> 31#include <linux/igmp.h>
32#include <linux/proc_fs.h> 32#include <linux/proc_fs.h>
33#include <linux/seq_file.h> 33#include <linux/seq_file.h>
34#include <linux/module.h>
34#include <linux/mroute.h> 35#include <linux/mroute.h>
35#include <linux/init.h> 36#include <linux/init.h>
36#include <net/ip.h> 37#include <net/ip.h>
@@ -126,3 +127,4 @@ static void __exit random_exit(void)
126 127
127module_init(random_init); 128module_init(random_init);
128module_exit(random_exit); 129module_exit(random_exit);
130MODULE_LICENSE("GPL");
diff --git a/net/ipv4/multipath_rr.c b/net/ipv4/multipath_rr.c
index 061b6b253982..b6cd2870478f 100644
--- a/net/ipv4/multipath_rr.c
+++ b/net/ipv4/multipath_rr.c
@@ -31,6 +31,7 @@
31#include <linux/igmp.h> 31#include <linux/igmp.h>
32#include <linux/proc_fs.h> 32#include <linux/proc_fs.h>
33#include <linux/seq_file.h> 33#include <linux/seq_file.h>
34#include <linux/module.h>
34#include <linux/mroute.h> 35#include <linux/mroute.h>
35#include <linux/init.h> 36#include <linux/init.h>
36#include <net/ip.h> 37#include <net/ip.h>
@@ -93,3 +94,4 @@ static void __exit rr_exit(void)
93 94
94module_init(rr_init); 95module_init(rr_init);
95module_exit(rr_exit); 96module_exit(rr_exit);
97MODULE_LICENSE("GPL");
diff --git a/net/ipv4/multipath_wrandom.c b/net/ipv4/multipath_wrandom.c
index c3d2ca1a6781..bd7d75b6abe0 100644
--- a/net/ipv4/multipath_wrandom.c
+++ b/net/ipv4/multipath_wrandom.c
@@ -31,6 +31,7 @@
31#include <linux/igmp.h> 31#include <linux/igmp.h>
32#include <linux/proc_fs.h> 32#include <linux/proc_fs.h>
33#include <linux/seq_file.h> 33#include <linux/seq_file.h>
34#include <linux/module.h>
34#include <linux/mroute.h> 35#include <linux/mroute.h>
35#include <linux/init.h> 36#include <linux/init.h>
36#include <net/ip.h> 37#include <net/ip.h>
@@ -342,3 +343,4 @@ static void __exit wrandom_exit(void)
342 343
343module_init(wrandom_init); 344module_init(wrandom_init);
344module_exit(wrandom_exit); 345module_exit(wrandom_exit);
346MODULE_LICENSE("GPL");
diff --git a/net/ipv4/netfilter/ip_conntrack_standalone.c b/net/ipv4/netfilter/ip_conntrack_standalone.c
index 46ca45f74d85..bc59f7b39805 100644
--- a/net/ipv4/netfilter/ip_conntrack_standalone.c
+++ b/net/ipv4/netfilter/ip_conntrack_standalone.c
@@ -256,6 +256,7 @@ static void *exp_seq_next(struct seq_file *s, void *v, loff_t *pos)
256{ 256{
257 struct list_head *e = v; 257 struct list_head *e = v;
258 258
259 ++*pos;
259 e = e->next; 260 e = e->next;
260 261
261 if (e == &ip_conntrack_expect_list) 262 if (e == &ip_conntrack_expect_list)
diff --git a/net/ipv4/netfilter/ipt_recent.c b/net/ipv4/netfilter/ipt_recent.c
index 25ab9fabdcba..2d44b07688af 100644
--- a/net/ipv4/netfilter/ipt_recent.c
+++ b/net/ipv4/netfilter/ipt_recent.c
@@ -223,7 +223,7 @@ static int ip_recent_ctrl(struct file *file, const char __user *input, unsigned
223 curr_table->table[count].last_seen = 0; 223 curr_table->table[count].last_seen = 0;
224 curr_table->table[count].addr = 0; 224 curr_table->table[count].addr = 0;
225 curr_table->table[count].ttl = 0; 225 curr_table->table[count].ttl = 0;
226 memset(curr_table->table[count].last_pkts,0,ip_pkt_list_tot*sizeof(u_int32_t)); 226 memset(curr_table->table[count].last_pkts,0,ip_pkt_list_tot*sizeof(unsigned long));
227 curr_table->table[count].oldest_pkt = 0; 227 curr_table->table[count].oldest_pkt = 0;
228 curr_table->table[count].time_pos = 0; 228 curr_table->table[count].time_pos = 0;
229 curr_table->time_info[count].position = count; 229 curr_table->time_info[count].position = count;
@@ -502,7 +502,7 @@ match(const struct sk_buff *skb,
502 location = time_info[curr_table->time_pos].position; 502 location = time_info[curr_table->time_pos].position;
503 hash_table[r_list[location].hash_entry] = -1; 503 hash_table[r_list[location].hash_entry] = -1;
504 hash_table[hash_result] = location; 504 hash_table[hash_result] = location;
505 memset(r_list[location].last_pkts,0,ip_pkt_list_tot*sizeof(u_int32_t)); 505 memset(r_list[location].last_pkts,0,ip_pkt_list_tot*sizeof(unsigned long));
506 r_list[location].time_pos = curr_table->time_pos; 506 r_list[location].time_pos = curr_table->time_pos;
507 r_list[location].addr = addr; 507 r_list[location].addr = addr;
508 r_list[location].ttl = ttl; 508 r_list[location].ttl = ttl;
@@ -631,7 +631,7 @@ match(const struct sk_buff *skb,
631 r_list[location].last_seen = 0; 631 r_list[location].last_seen = 0;
632 r_list[location].addr = 0; 632 r_list[location].addr = 0;
633 r_list[location].ttl = 0; 633 r_list[location].ttl = 0;
634 memset(r_list[location].last_pkts,0,ip_pkt_list_tot*sizeof(u_int32_t)); 634 memset(r_list[location].last_pkts,0,ip_pkt_list_tot*sizeof(unsigned long));
635 r_list[location].oldest_pkt = 0; 635 r_list[location].oldest_pkt = 0;
636 ans = !info->invert; 636 ans = !info->invert;
637 } 637 }
@@ -734,10 +734,10 @@ checkentry(const char *tablename,
734 memset(curr_table->table,0,sizeof(struct recent_ip_list)*ip_list_tot); 734 memset(curr_table->table,0,sizeof(struct recent_ip_list)*ip_list_tot);
735#ifdef DEBUG 735#ifdef DEBUG
736 if(debug) printk(KERN_INFO RECENT_NAME ": checkentry: Allocating %d for pkt_list.\n", 736 if(debug) printk(KERN_INFO RECENT_NAME ": checkentry: Allocating %d for pkt_list.\n",
737 sizeof(u_int32_t)*ip_pkt_list_tot*ip_list_tot); 737 sizeof(unsigned long)*ip_pkt_list_tot*ip_list_tot);
738#endif 738#endif
739 739
740 hold = vmalloc(sizeof(u_int32_t)*ip_pkt_list_tot*ip_list_tot); 740 hold = vmalloc(sizeof(unsigned long)*ip_pkt_list_tot*ip_list_tot);
741#ifdef DEBUG 741#ifdef DEBUG
742 if(debug) printk(KERN_INFO RECENT_NAME ": checkentry: After pkt_list allocation.\n"); 742 if(debug) printk(KERN_INFO RECENT_NAME ": checkentry: After pkt_list allocation.\n");
743#endif 743#endif
diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c
index 5b1ec586bae6..d1835b1bc8c4 100644
--- a/net/ipv4/raw.c
+++ b/net/ipv4/raw.c
@@ -259,7 +259,7 @@ int raw_rcv(struct sock *sk, struct sk_buff *skb)
259 return 0; 259 return 0;
260} 260}
261 261
262static int raw_send_hdrinc(struct sock *sk, void *from, int length, 262static int raw_send_hdrinc(struct sock *sk, void *from, size_t length,
263 struct rtable *rt, 263 struct rtable *rt,
264 unsigned int flags) 264 unsigned int flags)
265{ 265{
@@ -298,7 +298,7 @@ static int raw_send_hdrinc(struct sock *sk, void *from, int length,
298 goto error_fault; 298 goto error_fault;
299 299
300 /* We don't modify invalid header */ 300 /* We don't modify invalid header */
301 if (length >= sizeof(*iph) && iph->ihl * 4 <= length) { 301 if (length >= sizeof(*iph) && iph->ihl * 4U <= length) {
302 if (!iph->saddr) 302 if (!iph->saddr)
303 iph->saddr = rt->rt_src; 303 iph->saddr = rt->rt_src;
304 iph->check = 0; 304 iph->check = 0;
@@ -332,7 +332,7 @@ static void raw_probe_proto_opt(struct flowi *fl, struct msghdr *msg)
332 u8 __user *type = NULL; 332 u8 __user *type = NULL;
333 u8 __user *code = NULL; 333 u8 __user *code = NULL;
334 int probed = 0; 334 int probed = 0;
335 int i; 335 unsigned int i;
336 336
337 if (!msg->msg_iov) 337 if (!msg->msg_iov)
338 return; 338 return;
@@ -384,7 +384,7 @@ static int raw_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
384 int err; 384 int err;
385 385
386 err = -EMSGSIZE; 386 err = -EMSGSIZE;
387 if (len < 0 || len > 0xFFFF) 387 if (len > 0xFFFF)
388 goto out; 388 goto out;
389 389
390 /* 390 /*
@@ -514,7 +514,10 @@ done:
514 kfree(ipc.opt); 514 kfree(ipc.opt);
515 ip_rt_put(rt); 515 ip_rt_put(rt);
516 516
517out: return err < 0 ? err : len; 517out:
518 if (err < 0)
519 return err;
520 return len;
518 521
519do_confirm: 522do_confirm:
520 dst_confirm(&rt->u.dst); 523 dst_confirm(&rt->u.dst);
@@ -610,7 +613,10 @@ static int raw_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
610 copied = skb->len; 613 copied = skb->len;
611done: 614done:
612 skb_free_datagram(sk, skb); 615 skb_free_datagram(sk, skb);
613out: return err ? err : copied; 616out:
617 if (err)
618 return err;
619 return copied;
614} 620}
615 621
616static int raw_init(struct sock *sk) 622static int raw_init(struct sock *sk)
@@ -691,11 +697,11 @@ static int raw_ioctl(struct sock *sk, int cmd, unsigned long arg)
691 struct sk_buff *skb; 697 struct sk_buff *skb;
692 int amount = 0; 698 int amount = 0;
693 699
694 spin_lock_irq(&sk->sk_receive_queue.lock); 700 spin_lock_bh(&sk->sk_receive_queue.lock);
695 skb = skb_peek(&sk->sk_receive_queue); 701 skb = skb_peek(&sk->sk_receive_queue);
696 if (skb != NULL) 702 if (skb != NULL)
697 amount = skb->len; 703 amount = skb->len;
698 spin_unlock_irq(&sk->sk_receive_queue.lock); 704 spin_unlock_bh(&sk->sk_receive_queue.lock);
699 return put_user(amount, (int __user *)arg); 705 return put_user(amount, (int __user *)arg);
700 } 706 }
701 707
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index a682d28e247b..f4d53c919869 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -2581,7 +2581,7 @@ int ip_route_output_key(struct rtable **rp, struct flowi *flp)
2581} 2581}
2582 2582
2583static int rt_fill_info(struct sk_buff *skb, u32 pid, u32 seq, int event, 2583static int rt_fill_info(struct sk_buff *skb, u32 pid, u32 seq, int event,
2584 int nowait) 2584 int nowait, unsigned int flags)
2585{ 2585{
2586 struct rtable *rt = (struct rtable*)skb->dst; 2586 struct rtable *rt = (struct rtable*)skb->dst;
2587 struct rtmsg *r; 2587 struct rtmsg *r;
@@ -2591,9 +2591,8 @@ static int rt_fill_info(struct sk_buff *skb, u32 pid, u32 seq, int event,
2591#ifdef CONFIG_IP_MROUTE 2591#ifdef CONFIG_IP_MROUTE
2592 struct rtattr *eptr; 2592 struct rtattr *eptr;
2593#endif 2593#endif
2594 nlh = NLMSG_PUT(skb, pid, seq, event, sizeof(*r)); 2594 nlh = NLMSG_NEW(skb, pid, seq, event, sizeof(*r), flags);
2595 r = NLMSG_DATA(nlh); 2595 r = NLMSG_DATA(nlh);
2596 nlh->nlmsg_flags = (nowait && pid) ? NLM_F_MULTI : 0;
2597 r->rtm_family = AF_INET; 2596 r->rtm_family = AF_INET;
2598 r->rtm_dst_len = 32; 2597 r->rtm_dst_len = 32;
2599 r->rtm_src_len = 0; 2598 r->rtm_src_len = 0;
@@ -2744,7 +2743,7 @@ int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void *arg)
2744 NETLINK_CB(skb).dst_pid = NETLINK_CB(in_skb).pid; 2743 NETLINK_CB(skb).dst_pid = NETLINK_CB(in_skb).pid;
2745 2744
2746 err = rt_fill_info(skb, NETLINK_CB(in_skb).pid, nlh->nlmsg_seq, 2745 err = rt_fill_info(skb, NETLINK_CB(in_skb).pid, nlh->nlmsg_seq,
2747 RTM_NEWROUTE, 0); 2746 RTM_NEWROUTE, 0, 0);
2748 if (!err) 2747 if (!err)
2749 goto out_free; 2748 goto out_free;
2750 if (err < 0) { 2749 if (err < 0) {
@@ -2781,8 +2780,8 @@ int ip_rt_dump(struct sk_buff *skb, struct netlink_callback *cb)
2781 continue; 2780 continue;
2782 skb->dst = dst_clone(&rt->u.dst); 2781 skb->dst = dst_clone(&rt->u.dst);
2783 if (rt_fill_info(skb, NETLINK_CB(cb->skb).pid, 2782 if (rt_fill_info(skb, NETLINK_CB(cb->skb).pid,
2784 cb->nlh->nlmsg_seq, 2783 cb->nlh->nlmsg_seq, RTM_NEWROUTE,
2785 RTM_NEWROUTE, 1) <= 0) { 2784 1, NLM_F_MULTI) <= 0) {
2786 dst_release(xchg(&skb->dst, NULL)); 2785 dst_release(xchg(&skb->dst, NULL));
2787 rcu_read_unlock_bh(); 2786 rcu_read_unlock_bh();
2788 goto done; 2787 goto done;
diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c
index e923d2f021aa..72d014442185 100644
--- a/net/ipv4/syncookies.c
+++ b/net/ipv4/syncookies.c
@@ -169,10 +169,10 @@ static inline int cookie_check(struct sk_buff *skb, __u32 cookie)
169 return mssind < NUM_MSS ? msstab[mssind] + 1 : 0; 169 return mssind < NUM_MSS ? msstab[mssind] + 1 : 0;
170} 170}
171 171
172extern struct or_calltable or_ipv4; 172extern struct request_sock_ops tcp_request_sock_ops;
173 173
174static inline struct sock *get_cookie_sock(struct sock *sk, struct sk_buff *skb, 174static inline struct sock *get_cookie_sock(struct sock *sk, struct sk_buff *skb,
175 struct open_request *req, 175 struct request_sock *req,
176 struct dst_entry *dst) 176 struct dst_entry *dst)
177{ 177{
178 struct tcp_sock *tp = tcp_sk(sk); 178 struct tcp_sock *tp = tcp_sk(sk);
@@ -182,7 +182,7 @@ static inline struct sock *get_cookie_sock(struct sock *sk, struct sk_buff *skb,
182 if (child) 182 if (child)
183 tcp_acceptq_queue(sk, req, child); 183 tcp_acceptq_queue(sk, req, child);
184 else 184 else
185 tcp_openreq_free(req); 185 reqsk_free(req);
186 186
187 return child; 187 return child;
188} 188}
@@ -190,10 +190,12 @@ static inline struct sock *get_cookie_sock(struct sock *sk, struct sk_buff *skb,
190struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb, 190struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb,
191 struct ip_options *opt) 191 struct ip_options *opt)
192{ 192{
193 struct inet_request_sock *ireq;
194 struct tcp_request_sock *treq;
193 struct tcp_sock *tp = tcp_sk(sk); 195 struct tcp_sock *tp = tcp_sk(sk);
194 __u32 cookie = ntohl(skb->h.th->ack_seq) - 1; 196 __u32 cookie = ntohl(skb->h.th->ack_seq) - 1;
195 struct sock *ret = sk; 197 struct sock *ret = sk;
196 struct open_request *req; 198 struct request_sock *req;
197 int mss; 199 int mss;
198 struct rtable *rt; 200 struct rtable *rt;
199 __u8 rcv_wscale; 201 __u8 rcv_wscale;
@@ -209,19 +211,20 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb,
209 211
210 NET_INC_STATS_BH(LINUX_MIB_SYNCOOKIESRECV); 212 NET_INC_STATS_BH(LINUX_MIB_SYNCOOKIESRECV);
211 213
212 req = tcp_openreq_alloc();
213 ret = NULL; 214 ret = NULL;
215 req = reqsk_alloc(&tcp_request_sock_ops); /* for safety */
214 if (!req) 216 if (!req)
215 goto out; 217 goto out;
216 218
217 req->rcv_isn = htonl(skb->h.th->seq) - 1; 219 ireq = inet_rsk(req);
218 req->snt_isn = cookie; 220 treq = tcp_rsk(req);
221 treq->rcv_isn = htonl(skb->h.th->seq) - 1;
222 treq->snt_isn = cookie;
219 req->mss = mss; 223 req->mss = mss;
220 req->rmt_port = skb->h.th->source; 224 ireq->rmt_port = skb->h.th->source;
221 req->af.v4_req.loc_addr = skb->nh.iph->daddr; 225 ireq->loc_addr = skb->nh.iph->daddr;
222 req->af.v4_req.rmt_addr = skb->nh.iph->saddr; 226 ireq->rmt_addr = skb->nh.iph->saddr;
223 req->class = &or_ipv4; /* for savety */ 227 ireq->opt = NULL;
224 req->af.v4_req.opt = NULL;
225 228
226 /* We throwed the options of the initial SYN away, so we hope 229 /* We throwed the options of the initial SYN away, so we hope
227 * the ACK carries the same options again (see RFC1122 4.2.3.8) 230 * the ACK carries the same options again (see RFC1122 4.2.3.8)
@@ -229,17 +232,15 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb,
229 if (opt && opt->optlen) { 232 if (opt && opt->optlen) {
230 int opt_size = sizeof(struct ip_options) + opt->optlen; 233 int opt_size = sizeof(struct ip_options) + opt->optlen;
231 234
232 req->af.v4_req.opt = kmalloc(opt_size, GFP_ATOMIC); 235 ireq->opt = kmalloc(opt_size, GFP_ATOMIC);
233 if (req->af.v4_req.opt) { 236 if (ireq->opt != NULL && ip_options_echo(ireq->opt, skb)) {
234 if (ip_options_echo(req->af.v4_req.opt, skb)) { 237 kfree(ireq->opt);
235 kfree(req->af.v4_req.opt); 238 ireq->opt = NULL;
236 req->af.v4_req.opt = NULL;
237 }
238 } 239 }
239 } 240 }
240 241
241 req->snd_wscale = req->rcv_wscale = req->tstamp_ok = 0; 242 ireq->snd_wscale = ireq->rcv_wscale = ireq->tstamp_ok = 0;
242 req->wscale_ok = req->sack_ok = 0; 243 ireq->wscale_ok = ireq->sack_ok = 0;
243 req->expires = 0UL; 244 req->expires = 0UL;
244 req->retrans = 0; 245 req->retrans = 0;
245 246
@@ -253,15 +254,15 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb,
253 struct flowi fl = { .nl_u = { .ip4_u = 254 struct flowi fl = { .nl_u = { .ip4_u =
254 { .daddr = ((opt && opt->srr) ? 255 { .daddr = ((opt && opt->srr) ?
255 opt->faddr : 256 opt->faddr :
256 req->af.v4_req.rmt_addr), 257 ireq->rmt_addr),
257 .saddr = req->af.v4_req.loc_addr, 258 .saddr = ireq->loc_addr,
258 .tos = RT_CONN_FLAGS(sk) } }, 259 .tos = RT_CONN_FLAGS(sk) } },
259 .proto = IPPROTO_TCP, 260 .proto = IPPROTO_TCP,
260 .uli_u = { .ports = 261 .uli_u = { .ports =
261 { .sport = skb->h.th->dest, 262 { .sport = skb->h.th->dest,
262 .dport = skb->h.th->source } } }; 263 .dport = skb->h.th->source } } };
263 if (ip_route_output_key(&rt, &fl)) { 264 if (ip_route_output_key(&rt, &fl)) {
264 tcp_openreq_free(req); 265 reqsk_free(req);
265 goto out; 266 goto out;
266 } 267 }
267 } 268 }
@@ -272,7 +273,7 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb,
272 &req->rcv_wnd, &req->window_clamp, 273 &req->rcv_wnd, &req->window_clamp,
273 0, &rcv_wscale); 274 0, &rcv_wscale);
274 /* BTW win scale with syncookies is 0 by definition */ 275 /* BTW win scale with syncookies is 0 by definition */
275 req->rcv_wscale = rcv_wscale; 276 ireq->rcv_wscale = rcv_wscale;
276 277
277 ret = get_cookie_sock(sk, skb, req, &rt->u.dst); 278 ret = get_cookie_sock(sk, skb, req, &rt->u.dst);
278out: return ret; 279out: return ret;
diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c
index 3aafb298c1c1..23068bddbf0b 100644
--- a/net/ipv4/sysctl_net_ipv4.c
+++ b/net/ipv4/sysctl_net_ipv4.c
@@ -23,6 +23,7 @@ extern int sysctl_ip_nonlocal_bind;
23extern int sysctl_icmp_echo_ignore_all; 23extern int sysctl_icmp_echo_ignore_all;
24extern int sysctl_icmp_echo_ignore_broadcasts; 24extern int sysctl_icmp_echo_ignore_broadcasts;
25extern int sysctl_icmp_ignore_bogus_error_responses; 25extern int sysctl_icmp_ignore_bogus_error_responses;
26extern int sysctl_icmp_errors_use_inbound_ifaddr;
26 27
27/* From ip_fragment.c */ 28/* From ip_fragment.c */
28extern int sysctl_ipfrag_low_thresh; 29extern int sysctl_ipfrag_low_thresh;
@@ -396,6 +397,14 @@ ctl_table ipv4_table[] = {
396 .proc_handler = &proc_dointvec 397 .proc_handler = &proc_dointvec
397 }, 398 },
398 { 399 {
400 .ctl_name = NET_IPV4_ICMP_ERRORS_USE_INBOUND_IFADDR,
401 .procname = "icmp_errors_use_inbound_ifaddr",
402 .data = &sysctl_icmp_errors_use_inbound_ifaddr,
403 .maxlen = sizeof(int),
404 .mode = 0644,
405 .proc_handler = &proc_dointvec
406 },
407 {
399 .ctl_name = NET_IPV4_ROUTE, 408 .ctl_name = NET_IPV4_ROUTE,
400 .procname = "route", 409 .procname = "route",
401 .maxlen = 0, 410 .maxlen = 0,
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index a037bafcba3c..674bbd8cfd36 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -271,7 +271,6 @@ int sysctl_tcp_fin_timeout = TCP_FIN_TIMEOUT;
271 271
272DEFINE_SNMP_STAT(struct tcp_mib, tcp_statistics); 272DEFINE_SNMP_STAT(struct tcp_mib, tcp_statistics);
273 273
274kmem_cache_t *tcp_openreq_cachep;
275kmem_cache_t *tcp_bucket_cachep; 274kmem_cache_t *tcp_bucket_cachep;
276kmem_cache_t *tcp_timewait_cachep; 275kmem_cache_t *tcp_timewait_cachep;
277 276
@@ -317,7 +316,7 @@ EXPORT_SYMBOL(tcp_enter_memory_pressure);
317static __inline__ unsigned int tcp_listen_poll(struct sock *sk, 316static __inline__ unsigned int tcp_listen_poll(struct sock *sk,
318 poll_table *wait) 317 poll_table *wait)
319{ 318{
320 return tcp_sk(sk)->accept_queue ? (POLLIN | POLLRDNORM) : 0; 319 return !reqsk_queue_empty(&tcp_sk(sk)->accept_queue) ? (POLLIN | POLLRDNORM) : 0;
321} 320}
322 321
323/* 322/*
@@ -463,28 +462,15 @@ int tcp_listen_start(struct sock *sk)
463{ 462{
464 struct inet_sock *inet = inet_sk(sk); 463 struct inet_sock *inet = inet_sk(sk);
465 struct tcp_sock *tp = tcp_sk(sk); 464 struct tcp_sock *tp = tcp_sk(sk);
466 struct tcp_listen_opt *lopt; 465 int rc = reqsk_queue_alloc(&tp->accept_queue, TCP_SYNQ_HSIZE);
466
467 if (rc != 0)
468 return rc;
467 469
468 sk->sk_max_ack_backlog = 0; 470 sk->sk_max_ack_backlog = 0;
469 sk->sk_ack_backlog = 0; 471 sk->sk_ack_backlog = 0;
470 tp->accept_queue = tp->accept_queue_tail = NULL;
471 rwlock_init(&tp->syn_wait_lock);
472 tcp_delack_init(tp); 472 tcp_delack_init(tp);
473 473
474 lopt = kmalloc(sizeof(struct tcp_listen_opt), GFP_KERNEL);
475 if (!lopt)
476 return -ENOMEM;
477
478 memset(lopt, 0, sizeof(struct tcp_listen_opt));
479 for (lopt->max_qlen_log = 6; ; lopt->max_qlen_log++)
480 if ((1 << lopt->max_qlen_log) >= sysctl_max_syn_backlog)
481 break;
482 get_random_bytes(&lopt->hash_rnd, 4);
483
484 write_lock_bh(&tp->syn_wait_lock);
485 tp->listen_opt = lopt;
486 write_unlock_bh(&tp->syn_wait_lock);
487
488 /* There is race window here: we announce ourselves listening, 474 /* There is race window here: we announce ourselves listening,
489 * but this transition is still not validated by get_port(). 475 * but this transition is still not validated by get_port().
490 * It is OK, because this socket enters to hash table only 476 * It is OK, because this socket enters to hash table only
@@ -501,10 +487,7 @@ int tcp_listen_start(struct sock *sk)
501 } 487 }
502 488
503 sk->sk_state = TCP_CLOSE; 489 sk->sk_state = TCP_CLOSE;
504 write_lock_bh(&tp->syn_wait_lock); 490 reqsk_queue_destroy(&tp->accept_queue);
505 tp->listen_opt = NULL;
506 write_unlock_bh(&tp->syn_wait_lock);
507 kfree(lopt);
508 return -EADDRINUSE; 491 return -EADDRINUSE;
509} 492}
510 493
@@ -516,25 +499,23 @@ int tcp_listen_start(struct sock *sk)
516static void tcp_listen_stop (struct sock *sk) 499static void tcp_listen_stop (struct sock *sk)
517{ 500{
518 struct tcp_sock *tp = tcp_sk(sk); 501 struct tcp_sock *tp = tcp_sk(sk);
519 struct tcp_listen_opt *lopt = tp->listen_opt; 502 struct listen_sock *lopt;
520 struct open_request *acc_req = tp->accept_queue; 503 struct request_sock *acc_req;
521 struct open_request *req; 504 struct request_sock *req;
522 int i; 505 int i;
523 506
524 tcp_delete_keepalive_timer(sk); 507 tcp_delete_keepalive_timer(sk);
525 508
526 /* make all the listen_opt local to us */ 509 /* make all the listen_opt local to us */
527 write_lock_bh(&tp->syn_wait_lock); 510 lopt = reqsk_queue_yank_listen_sk(&tp->accept_queue);
528 tp->listen_opt = NULL; 511 acc_req = reqsk_queue_yank_acceptq(&tp->accept_queue);
529 write_unlock_bh(&tp->syn_wait_lock);
530 tp->accept_queue = tp->accept_queue_tail = NULL;
531 512
532 if (lopt->qlen) { 513 if (lopt->qlen) {
533 for (i = 0; i < TCP_SYNQ_HSIZE; i++) { 514 for (i = 0; i < TCP_SYNQ_HSIZE; i++) {
534 while ((req = lopt->syn_table[i]) != NULL) { 515 while ((req = lopt->syn_table[i]) != NULL) {
535 lopt->syn_table[i] = req->dl_next; 516 lopt->syn_table[i] = req->dl_next;
536 lopt->qlen--; 517 lopt->qlen--;
537 tcp_openreq_free(req); 518 reqsk_free(req);
538 519
539 /* Following specs, it would be better either to send FIN 520 /* Following specs, it would be better either to send FIN
540 * (and enter FIN-WAIT-1, it is normal close) 521 * (and enter FIN-WAIT-1, it is normal close)
@@ -574,7 +555,7 @@ static void tcp_listen_stop (struct sock *sk)
574 sock_put(child); 555 sock_put(child);
575 556
576 sk_acceptq_removed(sk); 557 sk_acceptq_removed(sk);
577 tcp_openreq_fastfree(req); 558 __reqsk_free(req);
578 } 559 }
579 BUG_TRAP(!sk->sk_ack_backlog); 560 BUG_TRAP(!sk->sk_ack_backlog);
580} 561}
@@ -1345,7 +1326,7 @@ int tcp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
1345 1326
1346 cleanup_rbuf(sk, copied); 1327 cleanup_rbuf(sk, copied);
1347 1328
1348 if (tp->ucopy.task == user_recv) { 1329 if (!sysctl_tcp_low_latency && tp->ucopy.task == user_recv) {
1349 /* Install new reader */ 1330 /* Install new reader */
1350 if (!user_recv && !(flags & (MSG_TRUNC | MSG_PEEK))) { 1331 if (!user_recv && !(flags & (MSG_TRUNC | MSG_PEEK))) {
1351 user_recv = current; 1332 user_recv = current;
@@ -1868,11 +1849,11 @@ static int wait_for_connect(struct sock *sk, long timeo)
1868 prepare_to_wait_exclusive(sk->sk_sleep, &wait, 1849 prepare_to_wait_exclusive(sk->sk_sleep, &wait,
1869 TASK_INTERRUPTIBLE); 1850 TASK_INTERRUPTIBLE);
1870 release_sock(sk); 1851 release_sock(sk);
1871 if (!tp->accept_queue) 1852 if (reqsk_queue_empty(&tp->accept_queue))
1872 timeo = schedule_timeout(timeo); 1853 timeo = schedule_timeout(timeo);
1873 lock_sock(sk); 1854 lock_sock(sk);
1874 err = 0; 1855 err = 0;
1875 if (tp->accept_queue) 1856 if (!reqsk_queue_empty(&tp->accept_queue))
1876 break; 1857 break;
1877 err = -EINVAL; 1858 err = -EINVAL;
1878 if (sk->sk_state != TCP_LISTEN) 1859 if (sk->sk_state != TCP_LISTEN)
@@ -1895,7 +1876,6 @@ static int wait_for_connect(struct sock *sk, long timeo)
1895struct sock *tcp_accept(struct sock *sk, int flags, int *err) 1876struct sock *tcp_accept(struct sock *sk, int flags, int *err)
1896{ 1877{
1897 struct tcp_sock *tp = tcp_sk(sk); 1878 struct tcp_sock *tp = tcp_sk(sk);
1898 struct open_request *req;
1899 struct sock *newsk; 1879 struct sock *newsk;
1900 int error; 1880 int error;
1901 1881
@@ -1906,37 +1886,31 @@ struct sock *tcp_accept(struct sock *sk, int flags, int *err)
1906 */ 1886 */
1907 error = -EINVAL; 1887 error = -EINVAL;
1908 if (sk->sk_state != TCP_LISTEN) 1888 if (sk->sk_state != TCP_LISTEN)
1909 goto out; 1889 goto out_err;
1910 1890
1911 /* Find already established connection */ 1891 /* Find already established connection */
1912 if (!tp->accept_queue) { 1892 if (reqsk_queue_empty(&tp->accept_queue)) {
1913 long timeo = sock_rcvtimeo(sk, flags & O_NONBLOCK); 1893 long timeo = sock_rcvtimeo(sk, flags & O_NONBLOCK);
1914 1894
1915 /* If this is a non blocking socket don't sleep */ 1895 /* If this is a non blocking socket don't sleep */
1916 error = -EAGAIN; 1896 error = -EAGAIN;
1917 if (!timeo) 1897 if (!timeo)
1918 goto out; 1898 goto out_err;
1919 1899
1920 error = wait_for_connect(sk, timeo); 1900 error = wait_for_connect(sk, timeo);
1921 if (error) 1901 if (error)
1922 goto out; 1902 goto out_err;
1923 } 1903 }
1924 1904
1925 req = tp->accept_queue; 1905 newsk = reqsk_queue_get_child(&tp->accept_queue, sk);
1926 if ((tp->accept_queue = req->dl_next) == NULL)
1927 tp->accept_queue_tail = NULL;
1928
1929 newsk = req->sk;
1930 sk_acceptq_removed(sk);
1931 tcp_openreq_fastfree(req);
1932 BUG_TRAP(newsk->sk_state != TCP_SYN_RECV); 1906 BUG_TRAP(newsk->sk_state != TCP_SYN_RECV);
1933 release_sock(sk);
1934 return newsk;
1935
1936out: 1907out:
1937 release_sock(sk); 1908 release_sock(sk);
1909 return newsk;
1910out_err:
1911 newsk = NULL;
1938 *err = error; 1912 *err = error;
1939 return NULL; 1913 goto out;
1940} 1914}
1941 1915
1942/* 1916/*
@@ -2271,13 +2245,6 @@ void __init tcp_init(void)
2271 __skb_cb_too_small_for_tcp(sizeof(struct tcp_skb_cb), 2245 __skb_cb_too_small_for_tcp(sizeof(struct tcp_skb_cb),
2272 sizeof(skb->cb)); 2246 sizeof(skb->cb));
2273 2247
2274 tcp_openreq_cachep = kmem_cache_create("tcp_open_request",
2275 sizeof(struct open_request),
2276 0, SLAB_HWCACHE_ALIGN,
2277 NULL, NULL);
2278 if (!tcp_openreq_cachep)
2279 panic("tcp_init: Cannot alloc open_request cache.");
2280
2281 tcp_bucket_cachep = kmem_cache_create("tcp_bind_bucket", 2248 tcp_bucket_cachep = kmem_cache_create("tcp_bind_bucket",
2282 sizeof(struct tcp_bind_bucket), 2249 sizeof(struct tcp_bind_bucket),
2283 0, SLAB_HWCACHE_ALIGN, 2250 0, SLAB_HWCACHE_ALIGN,
@@ -2338,7 +2305,7 @@ void __init tcp_init(void)
2338 (tcp_bhash_size * sizeof(struct tcp_bind_hashbucket)); 2305 (tcp_bhash_size * sizeof(struct tcp_bind_hashbucket));
2339 order++) 2306 order++)
2340 ; 2307 ;
2341 if (order > 4) { 2308 if (order >= 4) {
2342 sysctl_local_port_range[0] = 32768; 2309 sysctl_local_port_range[0] = 32768;
2343 sysctl_local_port_range[1] = 61000; 2310 sysctl_local_port_range[1] = 61000;
2344 sysctl_tcp_max_tw_buckets = 180000; 2311 sysctl_tcp_max_tw_buckets = 180000;
@@ -2374,7 +2341,6 @@ EXPORT_SYMBOL(tcp_destroy_sock);
2374EXPORT_SYMBOL(tcp_disconnect); 2341EXPORT_SYMBOL(tcp_disconnect);
2375EXPORT_SYMBOL(tcp_getsockopt); 2342EXPORT_SYMBOL(tcp_getsockopt);
2376EXPORT_SYMBOL(tcp_ioctl); 2343EXPORT_SYMBOL(tcp_ioctl);
2377EXPORT_SYMBOL(tcp_openreq_cachep);
2378EXPORT_SYMBOL(tcp_poll); 2344EXPORT_SYMBOL(tcp_poll);
2379EXPORT_SYMBOL(tcp_read_sock); 2345EXPORT_SYMBOL(tcp_read_sock);
2380EXPORT_SYMBOL(tcp_recvmsg); 2346EXPORT_SYMBOL(tcp_recvmsg);
diff --git a/net/ipv4/tcp_diag.c b/net/ipv4/tcp_diag.c
index 8faa8948f75c..634befc07921 100644
--- a/net/ipv4/tcp_diag.c
+++ b/net/ipv4/tcp_diag.c
@@ -455,9 +455,10 @@ static int tcpdiag_dump_sock(struct sk_buff *skb, struct sock *sk,
455} 455}
456 456
457static int tcpdiag_fill_req(struct sk_buff *skb, struct sock *sk, 457static int tcpdiag_fill_req(struct sk_buff *skb, struct sock *sk,
458 struct open_request *req, 458 struct request_sock *req,
459 u32 pid, u32 seq) 459 u32 pid, u32 seq)
460{ 460{
461 const struct inet_request_sock *ireq = inet_rsk(req);
461 struct inet_sock *inet = inet_sk(sk); 462 struct inet_sock *inet = inet_sk(sk);
462 unsigned char *b = skb->tail; 463 unsigned char *b = skb->tail;
463 struct tcpdiagmsg *r; 464 struct tcpdiagmsg *r;
@@ -482,9 +483,9 @@ static int tcpdiag_fill_req(struct sk_buff *skb, struct sock *sk,
482 tmo = 0; 483 tmo = 0;
483 484
484 r->id.tcpdiag_sport = inet->sport; 485 r->id.tcpdiag_sport = inet->sport;
485 r->id.tcpdiag_dport = req->rmt_port; 486 r->id.tcpdiag_dport = ireq->rmt_port;
486 r->id.tcpdiag_src[0] = req->af.v4_req.loc_addr; 487 r->id.tcpdiag_src[0] = ireq->loc_addr;
487 r->id.tcpdiag_dst[0] = req->af.v4_req.rmt_addr; 488 r->id.tcpdiag_dst[0] = ireq->rmt_addr;
488 r->tcpdiag_expires = jiffies_to_msecs(tmo), 489 r->tcpdiag_expires = jiffies_to_msecs(tmo),
489 r->tcpdiag_rqueue = 0; 490 r->tcpdiag_rqueue = 0;
490 r->tcpdiag_wqueue = 0; 491 r->tcpdiag_wqueue = 0;
@@ -493,9 +494,9 @@ static int tcpdiag_fill_req(struct sk_buff *skb, struct sock *sk,
493#ifdef CONFIG_IP_TCPDIAG_IPV6 494#ifdef CONFIG_IP_TCPDIAG_IPV6
494 if (r->tcpdiag_family == AF_INET6) { 495 if (r->tcpdiag_family == AF_INET6) {
495 ipv6_addr_copy((struct in6_addr *)r->id.tcpdiag_src, 496 ipv6_addr_copy((struct in6_addr *)r->id.tcpdiag_src,
496 &req->af.v6_req.loc_addr); 497 &tcp6_rsk(req)->loc_addr);
497 ipv6_addr_copy((struct in6_addr *)r->id.tcpdiag_dst, 498 ipv6_addr_copy((struct in6_addr *)r->id.tcpdiag_dst,
498 &req->af.v6_req.rmt_addr); 499 &tcp6_rsk(req)->rmt_addr);
499 } 500 }
500#endif 501#endif
501 nlh->nlmsg_len = skb->tail - b; 502 nlh->nlmsg_len = skb->tail - b;
@@ -513,7 +514,7 @@ static int tcpdiag_dump_reqs(struct sk_buff *skb, struct sock *sk,
513 struct tcpdiag_entry entry; 514 struct tcpdiag_entry entry;
514 struct tcpdiagreq *r = NLMSG_DATA(cb->nlh); 515 struct tcpdiagreq *r = NLMSG_DATA(cb->nlh);
515 struct tcp_sock *tp = tcp_sk(sk); 516 struct tcp_sock *tp = tcp_sk(sk);
516 struct tcp_listen_opt *lopt; 517 struct listen_sock *lopt;
517 struct rtattr *bc = NULL; 518 struct rtattr *bc = NULL;
518 struct inet_sock *inet = inet_sk(sk); 519 struct inet_sock *inet = inet_sk(sk);
519 int j, s_j; 520 int j, s_j;
@@ -528,9 +529,9 @@ static int tcpdiag_dump_reqs(struct sk_buff *skb, struct sock *sk,
528 529
529 entry.family = sk->sk_family; 530 entry.family = sk->sk_family;
530 531
531 read_lock_bh(&tp->syn_wait_lock); 532 read_lock_bh(&tp->accept_queue.syn_wait_lock);
532 533
533 lopt = tp->listen_opt; 534 lopt = tp->accept_queue.listen_opt;
534 if (!lopt || !lopt->qlen) 535 if (!lopt || !lopt->qlen)
535 goto out; 536 goto out;
536 537
@@ -541,13 +542,15 @@ static int tcpdiag_dump_reqs(struct sk_buff *skb, struct sock *sk,
541 } 542 }
542 543
543 for (j = s_j; j < TCP_SYNQ_HSIZE; j++) { 544 for (j = s_j; j < TCP_SYNQ_HSIZE; j++) {
544 struct open_request *req, *head = lopt->syn_table[j]; 545 struct request_sock *req, *head = lopt->syn_table[j];
545 546
546 reqnum = 0; 547 reqnum = 0;
547 for (req = head; req; reqnum++, req = req->dl_next) { 548 for (req = head; req; reqnum++, req = req->dl_next) {
549 struct inet_request_sock *ireq = inet_rsk(req);
550
548 if (reqnum < s_reqnum) 551 if (reqnum < s_reqnum)
549 continue; 552 continue;
550 if (r->id.tcpdiag_dport != req->rmt_port && 553 if (r->id.tcpdiag_dport != ireq->rmt_port &&
551 r->id.tcpdiag_dport) 554 r->id.tcpdiag_dport)
552 continue; 555 continue;
553 556
@@ -555,16 +558,16 @@ static int tcpdiag_dump_reqs(struct sk_buff *skb, struct sock *sk,
555 entry.saddr = 558 entry.saddr =
556#ifdef CONFIG_IP_TCPDIAG_IPV6 559#ifdef CONFIG_IP_TCPDIAG_IPV6
557 (entry.family == AF_INET6) ? 560 (entry.family == AF_INET6) ?
558 req->af.v6_req.loc_addr.s6_addr32 : 561 tcp6_rsk(req)->loc_addr.s6_addr32 :
559#endif 562#endif
560 &req->af.v4_req.loc_addr; 563 &ireq->loc_addr;
561 entry.daddr = 564 entry.daddr =
562#ifdef CONFIG_IP_TCPDIAG_IPV6 565#ifdef CONFIG_IP_TCPDIAG_IPV6
563 (entry.family == AF_INET6) ? 566 (entry.family == AF_INET6) ?
564 req->af.v6_req.rmt_addr.s6_addr32 : 567 tcp6_rsk(req)->rmt_addr.s6_addr32 :
565#endif 568#endif
566 &req->af.v4_req.rmt_addr; 569 &ireq->rmt_addr;
567 entry.dport = ntohs(req->rmt_port); 570 entry.dport = ntohs(ireq->rmt_port);
568 571
569 if (!tcpdiag_bc_run(RTA_DATA(bc), 572 if (!tcpdiag_bc_run(RTA_DATA(bc),
570 RTA_PAYLOAD(bc), &entry)) 573 RTA_PAYLOAD(bc), &entry))
@@ -585,7 +588,7 @@ static int tcpdiag_dump_reqs(struct sk_buff *skb, struct sock *sk,
585 } 588 }
586 589
587out: 590out:
588 read_unlock_bh(&tp->syn_wait_lock); 591 read_unlock_bh(&tp->accept_queue.syn_wait_lock);
589 592
590 return err; 593 return err;
591} 594}
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index dad98e4a5043..2d41d5d6ad19 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -36,7 +36,7 @@
36 * ACK bit. 36 * ACK bit.
37 * Andi Kleen : Implemented fast path mtu discovery. 37 * Andi Kleen : Implemented fast path mtu discovery.
38 * Fixed many serious bugs in the 38 * Fixed many serious bugs in the
39 * open_request handling and moved 39 * request_sock handling and moved
40 * most of it into the af independent code. 40 * most of it into the af independent code.
41 * Added tail drop and some other bugfixes. 41 * Added tail drop and some other bugfixes.
42 * Added new listen sematics. 42 * Added new listen sematics.
@@ -869,21 +869,23 @@ static __inline__ u32 tcp_v4_synq_hash(u32 raddr, u16 rport, u32 rnd)
869 return (jhash_2words(raddr, (u32) rport, rnd) & (TCP_SYNQ_HSIZE - 1)); 869 return (jhash_2words(raddr, (u32) rport, rnd) & (TCP_SYNQ_HSIZE - 1));
870} 870}
871 871
872static struct open_request *tcp_v4_search_req(struct tcp_sock *tp, 872static struct request_sock *tcp_v4_search_req(struct tcp_sock *tp,
873 struct open_request ***prevp, 873 struct request_sock ***prevp,
874 __u16 rport, 874 __u16 rport,
875 __u32 raddr, __u32 laddr) 875 __u32 raddr, __u32 laddr)
876{ 876{
877 struct tcp_listen_opt *lopt = tp->listen_opt; 877 struct listen_sock *lopt = tp->accept_queue.listen_opt;
878 struct open_request *req, **prev; 878 struct request_sock *req, **prev;
879 879
880 for (prev = &lopt->syn_table[tcp_v4_synq_hash(raddr, rport, lopt->hash_rnd)]; 880 for (prev = &lopt->syn_table[tcp_v4_synq_hash(raddr, rport, lopt->hash_rnd)];
881 (req = *prev) != NULL; 881 (req = *prev) != NULL;
882 prev = &req->dl_next) { 882 prev = &req->dl_next) {
883 if (req->rmt_port == rport && 883 const struct inet_request_sock *ireq = inet_rsk(req);
884 req->af.v4_req.rmt_addr == raddr && 884
885 req->af.v4_req.loc_addr == laddr && 885 if (ireq->rmt_port == rport &&
886 TCP_INET_FAMILY(req->class->family)) { 886 ireq->rmt_addr == raddr &&
887 ireq->loc_addr == laddr &&
888 TCP_INET_FAMILY(req->rsk_ops->family)) {
887 BUG_TRAP(!req->sk); 889 BUG_TRAP(!req->sk);
888 *prevp = prev; 890 *prevp = prev;
889 break; 891 break;
@@ -893,21 +895,13 @@ static struct open_request *tcp_v4_search_req(struct tcp_sock *tp,
893 return req; 895 return req;
894} 896}
895 897
896static void tcp_v4_synq_add(struct sock *sk, struct open_request *req) 898static void tcp_v4_synq_add(struct sock *sk, struct request_sock *req)
897{ 899{
898 struct tcp_sock *tp = tcp_sk(sk); 900 struct tcp_sock *tp = tcp_sk(sk);
899 struct tcp_listen_opt *lopt = tp->listen_opt; 901 struct listen_sock *lopt = tp->accept_queue.listen_opt;
900 u32 h = tcp_v4_synq_hash(req->af.v4_req.rmt_addr, req->rmt_port, lopt->hash_rnd); 902 u32 h = tcp_v4_synq_hash(inet_rsk(req)->rmt_addr, inet_rsk(req)->rmt_port, lopt->hash_rnd);
901
902 req->expires = jiffies + TCP_TIMEOUT_INIT;
903 req->retrans = 0;
904 req->sk = NULL;
905 req->dl_next = lopt->syn_table[h];
906
907 write_lock(&tp->syn_wait_lock);
908 lopt->syn_table[h] = req;
909 write_unlock(&tp->syn_wait_lock);
910 903
904 reqsk_queue_hash_req(&tp->accept_queue, h, req, TCP_TIMEOUT_INIT);
911 tcp_synq_added(sk); 905 tcp_synq_added(sk);
912} 906}
913 907
@@ -1050,7 +1044,7 @@ void tcp_v4_err(struct sk_buff *skb, u32 info)
1050 } 1044 }
1051 1045
1052 switch (sk->sk_state) { 1046 switch (sk->sk_state) {
1053 struct open_request *req, **prev; 1047 struct request_sock *req, **prev;
1054 case TCP_LISTEN: 1048 case TCP_LISTEN:
1055 if (sock_owned_by_user(sk)) 1049 if (sock_owned_by_user(sk))
1056 goto out; 1050 goto out;
@@ -1065,7 +1059,7 @@ void tcp_v4_err(struct sk_buff *skb, u32 info)
1065 */ 1059 */
1066 BUG_TRAP(!req->sk); 1060 BUG_TRAP(!req->sk);
1067 1061
1068 if (seq != req->snt_isn) { 1062 if (seq != tcp_rsk(req)->snt_isn) {
1069 NET_INC_STATS_BH(LINUX_MIB_OUTOFWINDOWICMPS); 1063 NET_INC_STATS_BH(LINUX_MIB_OUTOFWINDOWICMPS);
1070 goto out; 1064 goto out;
1071 } 1065 }
@@ -1254,28 +1248,29 @@ static void tcp_v4_timewait_ack(struct sock *sk, struct sk_buff *skb)
1254 tcp_tw_put(tw); 1248 tcp_tw_put(tw);
1255} 1249}
1256 1250
1257static void tcp_v4_or_send_ack(struct sk_buff *skb, struct open_request *req) 1251static void tcp_v4_reqsk_send_ack(struct sk_buff *skb, struct request_sock *req)
1258{ 1252{
1259 tcp_v4_send_ack(skb, req->snt_isn + 1, req->rcv_isn + 1, req->rcv_wnd, 1253 tcp_v4_send_ack(skb, tcp_rsk(req)->snt_isn + 1, tcp_rsk(req)->rcv_isn + 1, req->rcv_wnd,
1260 req->ts_recent); 1254 req->ts_recent);
1261} 1255}
1262 1256
1263static struct dst_entry* tcp_v4_route_req(struct sock *sk, 1257static struct dst_entry* tcp_v4_route_req(struct sock *sk,
1264 struct open_request *req) 1258 struct request_sock *req)
1265{ 1259{
1266 struct rtable *rt; 1260 struct rtable *rt;
1267 struct ip_options *opt = req->af.v4_req.opt; 1261 const struct inet_request_sock *ireq = inet_rsk(req);
1262 struct ip_options *opt = inet_rsk(req)->opt;
1268 struct flowi fl = { .oif = sk->sk_bound_dev_if, 1263 struct flowi fl = { .oif = sk->sk_bound_dev_if,
1269 .nl_u = { .ip4_u = 1264 .nl_u = { .ip4_u =
1270 { .daddr = ((opt && opt->srr) ? 1265 { .daddr = ((opt && opt->srr) ?
1271 opt->faddr : 1266 opt->faddr :
1272 req->af.v4_req.rmt_addr), 1267 ireq->rmt_addr),
1273 .saddr = req->af.v4_req.loc_addr, 1268 .saddr = ireq->loc_addr,
1274 .tos = RT_CONN_FLAGS(sk) } }, 1269 .tos = RT_CONN_FLAGS(sk) } },
1275 .proto = IPPROTO_TCP, 1270 .proto = IPPROTO_TCP,
1276 .uli_u = { .ports = 1271 .uli_u = { .ports =
1277 { .sport = inet_sk(sk)->sport, 1272 { .sport = inet_sk(sk)->sport,
1278 .dport = req->rmt_port } } }; 1273 .dport = ireq->rmt_port } } };
1279 1274
1280 if (ip_route_output_flow(&rt, &fl, sk, 0)) { 1275 if (ip_route_output_flow(&rt, &fl, sk, 0)) {
1281 IP_INC_STATS_BH(IPSTATS_MIB_OUTNOROUTES); 1276 IP_INC_STATS_BH(IPSTATS_MIB_OUTNOROUTES);
@@ -1291,12 +1286,13 @@ static struct dst_entry* tcp_v4_route_req(struct sock *sk,
1291 1286
1292/* 1287/*
1293 * Send a SYN-ACK after having received an ACK. 1288 * Send a SYN-ACK after having received an ACK.
1294 * This still operates on a open_request only, not on a big 1289 * This still operates on a request_sock only, not on a big
1295 * socket. 1290 * socket.
1296 */ 1291 */
1297static int tcp_v4_send_synack(struct sock *sk, struct open_request *req, 1292static int tcp_v4_send_synack(struct sock *sk, struct request_sock *req,
1298 struct dst_entry *dst) 1293 struct dst_entry *dst)
1299{ 1294{
1295 const struct inet_request_sock *ireq = inet_rsk(req);
1300 int err = -1; 1296 int err = -1;
1301 struct sk_buff * skb; 1297 struct sk_buff * skb;
1302 1298
@@ -1310,14 +1306,14 @@ static int tcp_v4_send_synack(struct sock *sk, struct open_request *req,
1310 struct tcphdr *th = skb->h.th; 1306 struct tcphdr *th = skb->h.th;
1311 1307
1312 th->check = tcp_v4_check(th, skb->len, 1308 th->check = tcp_v4_check(th, skb->len,
1313 req->af.v4_req.loc_addr, 1309 ireq->loc_addr,
1314 req->af.v4_req.rmt_addr, 1310 ireq->rmt_addr,
1315 csum_partial((char *)th, skb->len, 1311 csum_partial((char *)th, skb->len,
1316 skb->csum)); 1312 skb->csum));
1317 1313
1318 err = ip_build_and_send_pkt(skb, sk, req->af.v4_req.loc_addr, 1314 err = ip_build_and_send_pkt(skb, sk, ireq->loc_addr,
1319 req->af.v4_req.rmt_addr, 1315 ireq->rmt_addr,
1320 req->af.v4_req.opt); 1316 ireq->opt);
1321 if (err == NET_XMIT_CN) 1317 if (err == NET_XMIT_CN)
1322 err = 0; 1318 err = 0;
1323 } 1319 }
@@ -1328,12 +1324,12 @@ out:
1328} 1324}
1329 1325
1330/* 1326/*
1331 * IPv4 open_request destructor. 1327 * IPv4 request_sock destructor.
1332 */ 1328 */
1333static void tcp_v4_or_free(struct open_request *req) 1329static void tcp_v4_reqsk_destructor(struct request_sock *req)
1334{ 1330{
1335 if (req->af.v4_req.opt) 1331 if (inet_rsk(req)->opt)
1336 kfree(req->af.v4_req.opt); 1332 kfree(inet_rsk(req)->opt);
1337} 1333}
1338 1334
1339static inline void syn_flood_warning(struct sk_buff *skb) 1335static inline void syn_flood_warning(struct sk_buff *skb)
@@ -1349,7 +1345,7 @@ static inline void syn_flood_warning(struct sk_buff *skb)
1349} 1345}
1350 1346
1351/* 1347/*
1352 * Save and compile IPv4 options into the open_request if needed. 1348 * Save and compile IPv4 options into the request_sock if needed.
1353 */ 1349 */
1354static inline struct ip_options *tcp_v4_save_options(struct sock *sk, 1350static inline struct ip_options *tcp_v4_save_options(struct sock *sk,
1355 struct sk_buff *skb) 1351 struct sk_buff *skb)
@@ -1370,33 +1366,20 @@ static inline struct ip_options *tcp_v4_save_options(struct sock *sk,
1370 return dopt; 1366 return dopt;
1371} 1367}
1372 1368
1373/* 1369struct request_sock_ops tcp_request_sock_ops = {
1374 * Maximum number of SYN_RECV sockets in queue per LISTEN socket.
1375 * One SYN_RECV socket costs about 80bytes on a 32bit machine.
1376 * It would be better to replace it with a global counter for all sockets
1377 * but then some measure against one socket starving all other sockets
1378 * would be needed.
1379 *
1380 * It was 128 by default. Experiments with real servers show, that
1381 * it is absolutely not enough even at 100conn/sec. 256 cures most
1382 * of problems. This value is adjusted to 128 for very small machines
1383 * (<=32Mb of memory) and to 1024 on normal or better ones (>=256Mb).
1384 * Further increasing requires to change hash table size.
1385 */
1386int sysctl_max_syn_backlog = 256;
1387
1388struct or_calltable or_ipv4 = {
1389 .family = PF_INET, 1370 .family = PF_INET,
1371 .obj_size = sizeof(struct tcp_request_sock),
1390 .rtx_syn_ack = tcp_v4_send_synack, 1372 .rtx_syn_ack = tcp_v4_send_synack,
1391 .send_ack = tcp_v4_or_send_ack, 1373 .send_ack = tcp_v4_reqsk_send_ack,
1392 .destructor = tcp_v4_or_free, 1374 .destructor = tcp_v4_reqsk_destructor,
1393 .send_reset = tcp_v4_send_reset, 1375 .send_reset = tcp_v4_send_reset,
1394}; 1376};
1395 1377
1396int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb) 1378int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
1397{ 1379{
1380 struct inet_request_sock *ireq;
1398 struct tcp_options_received tmp_opt; 1381 struct tcp_options_received tmp_opt;
1399 struct open_request *req; 1382 struct request_sock *req;
1400 __u32 saddr = skb->nh.iph->saddr; 1383 __u32 saddr = skb->nh.iph->saddr;
1401 __u32 daddr = skb->nh.iph->daddr; 1384 __u32 daddr = skb->nh.iph->daddr;
1402 __u32 isn = TCP_SKB_CB(skb)->when; 1385 __u32 isn = TCP_SKB_CB(skb)->when;
@@ -1433,7 +1416,7 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
1433 if (sk_acceptq_is_full(sk) && tcp_synq_young(sk) > 1) 1416 if (sk_acceptq_is_full(sk) && tcp_synq_young(sk) > 1)
1434 goto drop; 1417 goto drop;
1435 1418
1436 req = tcp_openreq_alloc(); 1419 req = reqsk_alloc(&tcp_request_sock_ops);
1437 if (!req) 1420 if (!req)
1438 goto drop; 1421 goto drop;
1439 1422
@@ -1461,10 +1444,10 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
1461 1444
1462 tcp_openreq_init(req, &tmp_opt, skb); 1445 tcp_openreq_init(req, &tmp_opt, skb);
1463 1446
1464 req->af.v4_req.loc_addr = daddr; 1447 ireq = inet_rsk(req);
1465 req->af.v4_req.rmt_addr = saddr; 1448 ireq->loc_addr = daddr;
1466 req->af.v4_req.opt = tcp_v4_save_options(sk, skb); 1449 ireq->rmt_addr = saddr;
1467 req->class = &or_ipv4; 1450 ireq->opt = tcp_v4_save_options(sk, skb);
1468 if (!want_cookie) 1451 if (!want_cookie)
1469 TCP_ECN_create_request(req, skb->h.th); 1452 TCP_ECN_create_request(req, skb->h.th);
1470 1453
@@ -1523,20 +1506,20 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
1523 1506
1524 isn = tcp_v4_init_sequence(sk, skb); 1507 isn = tcp_v4_init_sequence(sk, skb);
1525 } 1508 }
1526 req->snt_isn = isn; 1509 tcp_rsk(req)->snt_isn = isn;
1527 1510
1528 if (tcp_v4_send_synack(sk, req, dst)) 1511 if (tcp_v4_send_synack(sk, req, dst))
1529 goto drop_and_free; 1512 goto drop_and_free;
1530 1513
1531 if (want_cookie) { 1514 if (want_cookie) {
1532 tcp_openreq_free(req); 1515 reqsk_free(req);
1533 } else { 1516 } else {
1534 tcp_v4_synq_add(sk, req); 1517 tcp_v4_synq_add(sk, req);
1535 } 1518 }
1536 return 0; 1519 return 0;
1537 1520
1538drop_and_free: 1521drop_and_free:
1539 tcp_openreq_free(req); 1522 reqsk_free(req);
1540drop: 1523drop:
1541 TCP_INC_STATS_BH(TCP_MIB_ATTEMPTFAILS); 1524 TCP_INC_STATS_BH(TCP_MIB_ATTEMPTFAILS);
1542 return 0; 1525 return 0;
@@ -1548,9 +1531,10 @@ drop:
1548 * now create the new socket. 1531 * now create the new socket.
1549 */ 1532 */
1550struct sock *tcp_v4_syn_recv_sock(struct sock *sk, struct sk_buff *skb, 1533struct sock *tcp_v4_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
1551 struct open_request *req, 1534 struct request_sock *req,
1552 struct dst_entry *dst) 1535 struct dst_entry *dst)
1553{ 1536{
1537 struct inet_request_sock *ireq;
1554 struct inet_sock *newinet; 1538 struct inet_sock *newinet;
1555 struct tcp_sock *newtp; 1539 struct tcp_sock *newtp;
1556 struct sock *newsk; 1540 struct sock *newsk;
@@ -1570,11 +1554,12 @@ struct sock *tcp_v4_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
1570 1554
1571 newtp = tcp_sk(newsk); 1555 newtp = tcp_sk(newsk);
1572 newinet = inet_sk(newsk); 1556 newinet = inet_sk(newsk);
1573 newinet->daddr = req->af.v4_req.rmt_addr; 1557 ireq = inet_rsk(req);
1574 newinet->rcv_saddr = req->af.v4_req.loc_addr; 1558 newinet->daddr = ireq->rmt_addr;
1575 newinet->saddr = req->af.v4_req.loc_addr; 1559 newinet->rcv_saddr = ireq->loc_addr;
1576 newinet->opt = req->af.v4_req.opt; 1560 newinet->saddr = ireq->loc_addr;
1577 req->af.v4_req.opt = NULL; 1561 newinet->opt = ireq->opt;
1562 ireq->opt = NULL;
1578 newinet->mc_index = tcp_v4_iif(skb); 1563 newinet->mc_index = tcp_v4_iif(skb);
1579 newinet->mc_ttl = skb->nh.iph->ttl; 1564 newinet->mc_ttl = skb->nh.iph->ttl;
1580 newtp->ext_header_len = 0; 1565 newtp->ext_header_len = 0;
@@ -1605,9 +1590,9 @@ static struct sock *tcp_v4_hnd_req(struct sock *sk, struct sk_buff *skb)
1605 struct iphdr *iph = skb->nh.iph; 1590 struct iphdr *iph = skb->nh.iph;
1606 struct tcp_sock *tp = tcp_sk(sk); 1591 struct tcp_sock *tp = tcp_sk(sk);
1607 struct sock *nsk; 1592 struct sock *nsk;
1608 struct open_request **prev; 1593 struct request_sock **prev;
1609 /* Find possible connection requests. */ 1594 /* Find possible connection requests. */
1610 struct open_request *req = tcp_v4_search_req(tp, &prev, th->source, 1595 struct request_sock *req = tcp_v4_search_req(tp, &prev, th->source,
1611 iph->saddr, iph->daddr); 1596 iph->saddr, iph->daddr);
1612 if (req) 1597 if (req)
1613 return tcp_check_req(sk, skb, req, prev); 1598 return tcp_check_req(sk, skb, req, prev);
@@ -2144,13 +2129,13 @@ static void *listening_get_next(struct seq_file *seq, void *cur)
2144 ++st->num; 2129 ++st->num;
2145 2130
2146 if (st->state == TCP_SEQ_STATE_OPENREQ) { 2131 if (st->state == TCP_SEQ_STATE_OPENREQ) {
2147 struct open_request *req = cur; 2132 struct request_sock *req = cur;
2148 2133
2149 tp = tcp_sk(st->syn_wait_sk); 2134 tp = tcp_sk(st->syn_wait_sk);
2150 req = req->dl_next; 2135 req = req->dl_next;
2151 while (1) { 2136 while (1) {
2152 while (req) { 2137 while (req) {
2153 if (req->class->family == st->family) { 2138 if (req->rsk_ops->family == st->family) {
2154 cur = req; 2139 cur = req;
2155 goto out; 2140 goto out;
2156 } 2141 }
@@ -2159,17 +2144,17 @@ static void *listening_get_next(struct seq_file *seq, void *cur)
2159 if (++st->sbucket >= TCP_SYNQ_HSIZE) 2144 if (++st->sbucket >= TCP_SYNQ_HSIZE)
2160 break; 2145 break;
2161get_req: 2146get_req:
2162 req = tp->listen_opt->syn_table[st->sbucket]; 2147 req = tp->accept_queue.listen_opt->syn_table[st->sbucket];
2163 } 2148 }
2164 sk = sk_next(st->syn_wait_sk); 2149 sk = sk_next(st->syn_wait_sk);
2165 st->state = TCP_SEQ_STATE_LISTENING; 2150 st->state = TCP_SEQ_STATE_LISTENING;
2166 read_unlock_bh(&tp->syn_wait_lock); 2151 read_unlock_bh(&tp->accept_queue.syn_wait_lock);
2167 } else { 2152 } else {
2168 tp = tcp_sk(sk); 2153 tp = tcp_sk(sk);
2169 read_lock_bh(&tp->syn_wait_lock); 2154 read_lock_bh(&tp->accept_queue.syn_wait_lock);
2170 if (tp->listen_opt && tp->listen_opt->qlen) 2155 if (reqsk_queue_len(&tp->accept_queue))
2171 goto start_req; 2156 goto start_req;
2172 read_unlock_bh(&tp->syn_wait_lock); 2157 read_unlock_bh(&tp->accept_queue.syn_wait_lock);
2173 sk = sk_next(sk); 2158 sk = sk_next(sk);
2174 } 2159 }
2175get_sk: 2160get_sk:
@@ -2179,8 +2164,8 @@ get_sk:
2179 goto out; 2164 goto out;
2180 } 2165 }
2181 tp = tcp_sk(sk); 2166 tp = tcp_sk(sk);
2182 read_lock_bh(&tp->syn_wait_lock); 2167 read_lock_bh(&tp->accept_queue.syn_wait_lock);
2183 if (tp->listen_opt && tp->listen_opt->qlen) { 2168 if (reqsk_queue_len(&tp->accept_queue)) {
2184start_req: 2169start_req:
2185 st->uid = sock_i_uid(sk); 2170 st->uid = sock_i_uid(sk);
2186 st->syn_wait_sk = sk; 2171 st->syn_wait_sk = sk;
@@ -2188,7 +2173,7 @@ start_req:
2188 st->sbucket = 0; 2173 st->sbucket = 0;
2189 goto get_req; 2174 goto get_req;
2190 } 2175 }
2191 read_unlock_bh(&tp->syn_wait_lock); 2176 read_unlock_bh(&tp->accept_queue.syn_wait_lock);
2192 } 2177 }
2193 if (++st->bucket < TCP_LHTABLE_SIZE) { 2178 if (++st->bucket < TCP_LHTABLE_SIZE) {
2194 sk = sk_head(&tcp_listening_hash[st->bucket]); 2179 sk = sk_head(&tcp_listening_hash[st->bucket]);
@@ -2375,7 +2360,7 @@ static void tcp_seq_stop(struct seq_file *seq, void *v)
2375 case TCP_SEQ_STATE_OPENREQ: 2360 case TCP_SEQ_STATE_OPENREQ:
2376 if (v) { 2361 if (v) {
2377 struct tcp_sock *tp = tcp_sk(st->syn_wait_sk); 2362 struct tcp_sock *tp = tcp_sk(st->syn_wait_sk);
2378 read_unlock_bh(&tp->syn_wait_lock); 2363 read_unlock_bh(&tp->accept_queue.syn_wait_lock);
2379 } 2364 }
2380 case TCP_SEQ_STATE_LISTENING: 2365 case TCP_SEQ_STATE_LISTENING:
2381 if (v != SEQ_START_TOKEN) 2366 if (v != SEQ_START_TOKEN)
@@ -2451,18 +2436,19 @@ void tcp_proc_unregister(struct tcp_seq_afinfo *afinfo)
2451 memset(afinfo->seq_fops, 0, sizeof(*afinfo->seq_fops)); 2436 memset(afinfo->seq_fops, 0, sizeof(*afinfo->seq_fops));
2452} 2437}
2453 2438
2454static void get_openreq4(struct sock *sk, struct open_request *req, 2439static void get_openreq4(struct sock *sk, struct request_sock *req,
2455 char *tmpbuf, int i, int uid) 2440 char *tmpbuf, int i, int uid)
2456{ 2441{
2442 const struct inet_request_sock *ireq = inet_rsk(req);
2457 int ttd = req->expires - jiffies; 2443 int ttd = req->expires - jiffies;
2458 2444
2459 sprintf(tmpbuf, "%4d: %08X:%04X %08X:%04X" 2445 sprintf(tmpbuf, "%4d: %08X:%04X %08X:%04X"
2460 " %02X %08X:%08X %02X:%08lX %08X %5d %8d %u %d %p", 2446 " %02X %08X:%08X %02X:%08lX %08X %5d %8d %u %d %p",
2461 i, 2447 i,
2462 req->af.v4_req.loc_addr, 2448 ireq->loc_addr,
2463 ntohs(inet_sk(sk)->sport), 2449 ntohs(inet_sk(sk)->sport),
2464 req->af.v4_req.rmt_addr, 2450 ireq->rmt_addr,
2465 ntohs(req->rmt_port), 2451 ntohs(ireq->rmt_port),
2466 TCP_SYN_RECV, 2452 TCP_SYN_RECV,
2467 0, 0, /* could print option size, but that is af dependent. */ 2453 0, 0, /* could print option size, but that is af dependent. */
2468 1, /* timers active (only the expire timer) */ 2454 1, /* timers active (only the expire timer) */
@@ -2618,6 +2604,7 @@ struct proto tcp_prot = {
2618 .sysctl_rmem = sysctl_tcp_rmem, 2604 .sysctl_rmem = sysctl_tcp_rmem,
2619 .max_header = MAX_TCP_HEADER, 2605 .max_header = MAX_TCP_HEADER,
2620 .obj_size = sizeof(struct tcp_sock), 2606 .obj_size = sizeof(struct tcp_sock),
2607 .rsk_prot = &tcp_request_sock_ops,
2621}; 2608};
2622 2609
2623 2610
@@ -2660,7 +2647,6 @@ EXPORT_SYMBOL(tcp_proc_register);
2660EXPORT_SYMBOL(tcp_proc_unregister); 2647EXPORT_SYMBOL(tcp_proc_unregister);
2661#endif 2648#endif
2662EXPORT_SYMBOL(sysctl_local_port_range); 2649EXPORT_SYMBOL(sysctl_local_port_range);
2663EXPORT_SYMBOL(sysctl_max_syn_backlog);
2664EXPORT_SYMBOL(sysctl_tcp_low_latency); 2650EXPORT_SYMBOL(sysctl_tcp_low_latency);
2665EXPORT_SYMBOL(sysctl_tcp_tw_reuse); 2651EXPORT_SYMBOL(sysctl_tcp_tw_reuse);
2666 2652
diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c
index eea1a17a9ac2..b3943e7562f3 100644
--- a/net/ipv4/tcp_minisocks.c
+++ b/net/ipv4/tcp_minisocks.c
@@ -684,7 +684,7 @@ out:
684 * Actually, we could lots of memory writes here. tp of listening 684 * Actually, we could lots of memory writes here. tp of listening
685 * socket contains all necessary default parameters. 685 * socket contains all necessary default parameters.
686 */ 686 */
687struct sock *tcp_create_openreq_child(struct sock *sk, struct open_request *req, struct sk_buff *skb) 687struct sock *tcp_create_openreq_child(struct sock *sk, struct request_sock *req, struct sk_buff *skb)
688{ 688{
689 /* allocate the newsk from the same slab of the master sock, 689 /* allocate the newsk from the same slab of the master sock,
690 * if not, at sk_free time we'll try to free it from the wrong 690 * if not, at sk_free time we'll try to free it from the wrong
@@ -692,6 +692,8 @@ struct sock *tcp_create_openreq_child(struct sock *sk, struct open_request *req,
692 struct sock *newsk = sk_alloc(PF_INET, GFP_ATOMIC, sk->sk_prot, 0); 692 struct sock *newsk = sk_alloc(PF_INET, GFP_ATOMIC, sk->sk_prot, 0);
693 693
694 if(newsk != NULL) { 694 if(newsk != NULL) {
695 struct inet_request_sock *ireq = inet_rsk(req);
696 struct tcp_request_sock *treq = tcp_rsk(req);
695 struct tcp_sock *newtp; 697 struct tcp_sock *newtp;
696 struct sk_filter *filter; 698 struct sk_filter *filter;
697 699
@@ -703,7 +705,7 @@ struct sock *tcp_create_openreq_child(struct sock *sk, struct open_request *req,
703 tcp_sk(newsk)->bind_hash = NULL; 705 tcp_sk(newsk)->bind_hash = NULL;
704 706
705 /* Clone the TCP header template */ 707 /* Clone the TCP header template */
706 inet_sk(newsk)->dport = req->rmt_port; 708 inet_sk(newsk)->dport = ireq->rmt_port;
707 709
708 sock_lock_init(newsk); 710 sock_lock_init(newsk);
709 bh_lock_sock(newsk); 711 bh_lock_sock(newsk);
@@ -739,14 +741,14 @@ struct sock *tcp_create_openreq_child(struct sock *sk, struct open_request *req,
739 /* Now setup tcp_sock */ 741 /* Now setup tcp_sock */
740 newtp = tcp_sk(newsk); 742 newtp = tcp_sk(newsk);
741 newtp->pred_flags = 0; 743 newtp->pred_flags = 0;
742 newtp->rcv_nxt = req->rcv_isn + 1; 744 newtp->rcv_nxt = treq->rcv_isn + 1;
743 newtp->snd_nxt = req->snt_isn + 1; 745 newtp->snd_nxt = treq->snt_isn + 1;
744 newtp->snd_una = req->snt_isn + 1; 746 newtp->snd_una = treq->snt_isn + 1;
745 newtp->snd_sml = req->snt_isn + 1; 747 newtp->snd_sml = treq->snt_isn + 1;
746 748
747 tcp_prequeue_init(newtp); 749 tcp_prequeue_init(newtp);
748 750
749 tcp_init_wl(newtp, req->snt_isn, req->rcv_isn); 751 tcp_init_wl(newtp, treq->snt_isn, treq->rcv_isn);
750 752
751 newtp->retransmits = 0; 753 newtp->retransmits = 0;
752 newtp->backoff = 0; 754 newtp->backoff = 0;
@@ -775,10 +777,10 @@ struct sock *tcp_create_openreq_child(struct sock *sk, struct open_request *req,
775 tcp_set_ca_state(newtp, TCP_CA_Open); 777 tcp_set_ca_state(newtp, TCP_CA_Open);
776 tcp_init_xmit_timers(newsk); 778 tcp_init_xmit_timers(newsk);
777 skb_queue_head_init(&newtp->out_of_order_queue); 779 skb_queue_head_init(&newtp->out_of_order_queue);
778 newtp->rcv_wup = req->rcv_isn + 1; 780 newtp->rcv_wup = treq->rcv_isn + 1;
779 newtp->write_seq = req->snt_isn + 1; 781 newtp->write_seq = treq->snt_isn + 1;
780 newtp->pushed_seq = newtp->write_seq; 782 newtp->pushed_seq = newtp->write_seq;
781 newtp->copied_seq = req->rcv_isn + 1; 783 newtp->copied_seq = treq->rcv_isn + 1;
782 784
783 newtp->rx_opt.saw_tstamp = 0; 785 newtp->rx_opt.saw_tstamp = 0;
784 786
@@ -788,10 +790,8 @@ struct sock *tcp_create_openreq_child(struct sock *sk, struct open_request *req,
788 newtp->probes_out = 0; 790 newtp->probes_out = 0;
789 newtp->rx_opt.num_sacks = 0; 791 newtp->rx_opt.num_sacks = 0;
790 newtp->urg_data = 0; 792 newtp->urg_data = 0;
791 newtp->listen_opt = NULL; 793 /* Deinitialize accept_queue to trap illegal accesses. */
792 newtp->accept_queue = newtp->accept_queue_tail = NULL; 794 memset(&newtp->accept_queue, 0, sizeof(newtp->accept_queue));
793 /* Deinitialize syn_wait_lock to trap illegal accesses. */
794 memset(&newtp->syn_wait_lock, 0, sizeof(newtp->syn_wait_lock));
795 795
796 /* Back to base struct sock members. */ 796 /* Back to base struct sock members. */
797 newsk->sk_err = 0; 797 newsk->sk_err = 0;
@@ -808,18 +808,18 @@ struct sock *tcp_create_openreq_child(struct sock *sk, struct open_request *req,
808 newsk->sk_socket = NULL; 808 newsk->sk_socket = NULL;
809 newsk->sk_sleep = NULL; 809 newsk->sk_sleep = NULL;
810 810
811 newtp->rx_opt.tstamp_ok = req->tstamp_ok; 811 newtp->rx_opt.tstamp_ok = ireq->tstamp_ok;
812 if((newtp->rx_opt.sack_ok = req->sack_ok) != 0) { 812 if((newtp->rx_opt.sack_ok = ireq->sack_ok) != 0) {
813 if (sysctl_tcp_fack) 813 if (sysctl_tcp_fack)
814 newtp->rx_opt.sack_ok |= 2; 814 newtp->rx_opt.sack_ok |= 2;
815 } 815 }
816 newtp->window_clamp = req->window_clamp; 816 newtp->window_clamp = req->window_clamp;
817 newtp->rcv_ssthresh = req->rcv_wnd; 817 newtp->rcv_ssthresh = req->rcv_wnd;
818 newtp->rcv_wnd = req->rcv_wnd; 818 newtp->rcv_wnd = req->rcv_wnd;
819 newtp->rx_opt.wscale_ok = req->wscale_ok; 819 newtp->rx_opt.wscale_ok = ireq->wscale_ok;
820 if (newtp->rx_opt.wscale_ok) { 820 if (newtp->rx_opt.wscale_ok) {
821 newtp->rx_opt.snd_wscale = req->snd_wscale; 821 newtp->rx_opt.snd_wscale = ireq->snd_wscale;
822 newtp->rx_opt.rcv_wscale = req->rcv_wscale; 822 newtp->rx_opt.rcv_wscale = ireq->rcv_wscale;
823 } else { 823 } else {
824 newtp->rx_opt.snd_wscale = newtp->rx_opt.rcv_wscale = 0; 824 newtp->rx_opt.snd_wscale = newtp->rx_opt.rcv_wscale = 0;
825 newtp->window_clamp = min(newtp->window_clamp, 65535U); 825 newtp->window_clamp = min(newtp->window_clamp, 65535U);
@@ -851,12 +851,12 @@ struct sock *tcp_create_openreq_child(struct sock *sk, struct open_request *req,
851 851
852/* 852/*
853 * Process an incoming packet for SYN_RECV sockets represented 853 * Process an incoming packet for SYN_RECV sockets represented
854 * as an open_request. 854 * as a request_sock.
855 */ 855 */
856 856
857struct sock *tcp_check_req(struct sock *sk,struct sk_buff *skb, 857struct sock *tcp_check_req(struct sock *sk,struct sk_buff *skb,
858 struct open_request *req, 858 struct request_sock *req,
859 struct open_request **prev) 859 struct request_sock **prev)
860{ 860{
861 struct tcphdr *th = skb->h.th; 861 struct tcphdr *th = skb->h.th;
862 struct tcp_sock *tp = tcp_sk(sk); 862 struct tcp_sock *tp = tcp_sk(sk);
@@ -881,7 +881,7 @@ struct sock *tcp_check_req(struct sock *sk,struct sk_buff *skb,
881 } 881 }
882 882
883 /* Check for pure retransmitted SYN. */ 883 /* Check for pure retransmitted SYN. */
884 if (TCP_SKB_CB(skb)->seq == req->rcv_isn && 884 if (TCP_SKB_CB(skb)->seq == tcp_rsk(req)->rcv_isn &&
885 flg == TCP_FLAG_SYN && 885 flg == TCP_FLAG_SYN &&
886 !paws_reject) { 886 !paws_reject) {
887 /* 887 /*
@@ -901,7 +901,7 @@ struct sock *tcp_check_req(struct sock *sk,struct sk_buff *skb,
901 * Enforce "SYN-ACK" according to figure 8, figure 6 901 * Enforce "SYN-ACK" according to figure 8, figure 6
902 * of RFC793, fixed by RFC1122. 902 * of RFC793, fixed by RFC1122.
903 */ 903 */
904 req->class->rtx_syn_ack(sk, req, NULL); 904 req->rsk_ops->rtx_syn_ack(sk, req, NULL);
905 return NULL; 905 return NULL;
906 } 906 }
907 907
@@ -959,7 +959,7 @@ struct sock *tcp_check_req(struct sock *sk,struct sk_buff *skb,
959 * Invalid ACK: reset will be sent by listening socket 959 * Invalid ACK: reset will be sent by listening socket
960 */ 960 */
961 if ((flg & TCP_FLAG_ACK) && 961 if ((flg & TCP_FLAG_ACK) &&
962 (TCP_SKB_CB(skb)->ack_seq != req->snt_isn+1)) 962 (TCP_SKB_CB(skb)->ack_seq != tcp_rsk(req)->snt_isn + 1))
963 return sk; 963 return sk;
964 964
965 /* Also, it would be not so bad idea to check rcv_tsecr, which 965 /* Also, it would be not so bad idea to check rcv_tsecr, which
@@ -970,10 +970,10 @@ struct sock *tcp_check_req(struct sock *sk,struct sk_buff *skb,
970 /* RFC793: "first check sequence number". */ 970 /* RFC793: "first check sequence number". */
971 971
972 if (paws_reject || !tcp_in_window(TCP_SKB_CB(skb)->seq, TCP_SKB_CB(skb)->end_seq, 972 if (paws_reject || !tcp_in_window(TCP_SKB_CB(skb)->seq, TCP_SKB_CB(skb)->end_seq,
973 req->rcv_isn+1, req->rcv_isn+1+req->rcv_wnd)) { 973 tcp_rsk(req)->rcv_isn + 1, tcp_rsk(req)->rcv_isn + 1 + req->rcv_wnd)) {
974 /* Out of window: send ACK and drop. */ 974 /* Out of window: send ACK and drop. */
975 if (!(flg & TCP_FLAG_RST)) 975 if (!(flg & TCP_FLAG_RST))
976 req->class->send_ack(skb, req); 976 req->rsk_ops->send_ack(skb, req);
977 if (paws_reject) 977 if (paws_reject)
978 NET_INC_STATS_BH(LINUX_MIB_PAWSESTABREJECTED); 978 NET_INC_STATS_BH(LINUX_MIB_PAWSESTABREJECTED);
979 return NULL; 979 return NULL;
@@ -981,12 +981,12 @@ struct sock *tcp_check_req(struct sock *sk,struct sk_buff *skb,
981 981
982 /* In sequence, PAWS is OK. */ 982 /* In sequence, PAWS is OK. */
983 983
984 if (tmp_opt.saw_tstamp && !after(TCP_SKB_CB(skb)->seq, req->rcv_isn+1)) 984 if (tmp_opt.saw_tstamp && !after(TCP_SKB_CB(skb)->seq, tcp_rsk(req)->rcv_isn + 1))
985 req->ts_recent = tmp_opt.rcv_tsval; 985 req->ts_recent = tmp_opt.rcv_tsval;
986 986
987 if (TCP_SKB_CB(skb)->seq == req->rcv_isn) { 987 if (TCP_SKB_CB(skb)->seq == tcp_rsk(req)->rcv_isn) {
988 /* Truncate SYN, it is out of window starting 988 /* Truncate SYN, it is out of window starting
989 at req->rcv_isn+1. */ 989 at tcp_rsk(req)->rcv_isn + 1. */
990 flg &= ~TCP_FLAG_SYN; 990 flg &= ~TCP_FLAG_SYN;
991 } 991 }
992 992
@@ -1003,8 +1003,8 @@ struct sock *tcp_check_req(struct sock *sk,struct sk_buff *skb,
1003 return NULL; 1003 return NULL;
1004 1004
1005 /* If TCP_DEFER_ACCEPT is set, drop bare ACK. */ 1005 /* If TCP_DEFER_ACCEPT is set, drop bare ACK. */
1006 if (tp->defer_accept && TCP_SKB_CB(skb)->end_seq == req->rcv_isn+1) { 1006 if (tp->defer_accept && TCP_SKB_CB(skb)->end_seq == tcp_rsk(req)->rcv_isn + 1) {
1007 req->acked = 1; 1007 inet_rsk(req)->acked = 1;
1008 return NULL; 1008 return NULL;
1009 } 1009 }
1010 1010
@@ -1026,14 +1026,14 @@ struct sock *tcp_check_req(struct sock *sk,struct sk_buff *skb,
1026 1026
1027 listen_overflow: 1027 listen_overflow:
1028 if (!sysctl_tcp_abort_on_overflow) { 1028 if (!sysctl_tcp_abort_on_overflow) {
1029 req->acked = 1; 1029 inet_rsk(req)->acked = 1;
1030 return NULL; 1030 return NULL;
1031 } 1031 }
1032 1032
1033 embryonic_reset: 1033 embryonic_reset:
1034 NET_INC_STATS_BH(LINUX_MIB_EMBRYONICRSTS); 1034 NET_INC_STATS_BH(LINUX_MIB_EMBRYONICRSTS);
1035 if (!(flg & TCP_FLAG_RST)) 1035 if (!(flg & TCP_FLAG_RST))
1036 req->class->send_reset(skb); 1036 req->rsk_ops->send_reset(skb);
1037 1037
1038 tcp_synq_drop(sk, req, prev); 1038 tcp_synq_drop(sk, req, prev);
1039 return NULL; 1039 return NULL;
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index fa24e7ae1f40..f17c6577e337 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -1356,8 +1356,9 @@ int tcp_send_synack(struct sock *sk)
1356 * Prepare a SYN-ACK. 1356 * Prepare a SYN-ACK.
1357 */ 1357 */
1358struct sk_buff * tcp_make_synack(struct sock *sk, struct dst_entry *dst, 1358struct sk_buff * tcp_make_synack(struct sock *sk, struct dst_entry *dst,
1359 struct open_request *req) 1359 struct request_sock *req)
1360{ 1360{
1361 struct inet_request_sock *ireq = inet_rsk(req);
1361 struct tcp_sock *tp = tcp_sk(sk); 1362 struct tcp_sock *tp = tcp_sk(sk);
1362 struct tcphdr *th; 1363 struct tcphdr *th;
1363 int tcp_header_size; 1364 int tcp_header_size;
@@ -1373,47 +1374,47 @@ struct sk_buff * tcp_make_synack(struct sock *sk, struct dst_entry *dst,
1373 skb->dst = dst_clone(dst); 1374 skb->dst = dst_clone(dst);
1374 1375
1375 tcp_header_size = (sizeof(struct tcphdr) + TCPOLEN_MSS + 1376 tcp_header_size = (sizeof(struct tcphdr) + TCPOLEN_MSS +
1376 (req->tstamp_ok ? TCPOLEN_TSTAMP_ALIGNED : 0) + 1377 (ireq->tstamp_ok ? TCPOLEN_TSTAMP_ALIGNED : 0) +
1377 (req->wscale_ok ? TCPOLEN_WSCALE_ALIGNED : 0) + 1378 (ireq->wscale_ok ? TCPOLEN_WSCALE_ALIGNED : 0) +
1378 /* SACK_PERM is in the place of NOP NOP of TS */ 1379 /* SACK_PERM is in the place of NOP NOP of TS */
1379 ((req->sack_ok && !req->tstamp_ok) ? TCPOLEN_SACKPERM_ALIGNED : 0)); 1380 ((ireq->sack_ok && !ireq->tstamp_ok) ? TCPOLEN_SACKPERM_ALIGNED : 0));
1380 skb->h.th = th = (struct tcphdr *) skb_push(skb, tcp_header_size); 1381 skb->h.th = th = (struct tcphdr *) skb_push(skb, tcp_header_size);
1381 1382
1382 memset(th, 0, sizeof(struct tcphdr)); 1383 memset(th, 0, sizeof(struct tcphdr));
1383 th->syn = 1; 1384 th->syn = 1;
1384 th->ack = 1; 1385 th->ack = 1;
1385 if (dst->dev->features&NETIF_F_TSO) 1386 if (dst->dev->features&NETIF_F_TSO)
1386 req->ecn_ok = 0; 1387 ireq->ecn_ok = 0;
1387 TCP_ECN_make_synack(req, th); 1388 TCP_ECN_make_synack(req, th);
1388 th->source = inet_sk(sk)->sport; 1389 th->source = inet_sk(sk)->sport;
1389 th->dest = req->rmt_port; 1390 th->dest = ireq->rmt_port;
1390 TCP_SKB_CB(skb)->seq = req->snt_isn; 1391 TCP_SKB_CB(skb)->seq = tcp_rsk(req)->snt_isn;
1391 TCP_SKB_CB(skb)->end_seq = TCP_SKB_CB(skb)->seq + 1; 1392 TCP_SKB_CB(skb)->end_seq = TCP_SKB_CB(skb)->seq + 1;
1392 TCP_SKB_CB(skb)->sacked = 0; 1393 TCP_SKB_CB(skb)->sacked = 0;
1393 skb_shinfo(skb)->tso_segs = 1; 1394 skb_shinfo(skb)->tso_segs = 1;
1394 skb_shinfo(skb)->tso_size = 0; 1395 skb_shinfo(skb)->tso_size = 0;
1395 th->seq = htonl(TCP_SKB_CB(skb)->seq); 1396 th->seq = htonl(TCP_SKB_CB(skb)->seq);
1396 th->ack_seq = htonl(req->rcv_isn + 1); 1397 th->ack_seq = htonl(tcp_rsk(req)->rcv_isn + 1);
1397 if (req->rcv_wnd == 0) { /* ignored for retransmitted syns */ 1398 if (req->rcv_wnd == 0) { /* ignored for retransmitted syns */
1398 __u8 rcv_wscale; 1399 __u8 rcv_wscale;
1399 /* Set this up on the first call only */ 1400 /* Set this up on the first call only */
1400 req->window_clamp = tp->window_clamp ? : dst_metric(dst, RTAX_WINDOW); 1401 req->window_clamp = tp->window_clamp ? : dst_metric(dst, RTAX_WINDOW);
1401 /* tcp_full_space because it is guaranteed to be the first packet */ 1402 /* tcp_full_space because it is guaranteed to be the first packet */
1402 tcp_select_initial_window(tcp_full_space(sk), 1403 tcp_select_initial_window(tcp_full_space(sk),
1403 dst_metric(dst, RTAX_ADVMSS) - (req->tstamp_ok ? TCPOLEN_TSTAMP_ALIGNED : 0), 1404 dst_metric(dst, RTAX_ADVMSS) - (ireq->tstamp_ok ? TCPOLEN_TSTAMP_ALIGNED : 0),
1404 &req->rcv_wnd, 1405 &req->rcv_wnd,
1405 &req->window_clamp, 1406 &req->window_clamp,
1406 req->wscale_ok, 1407 ireq->wscale_ok,
1407 &rcv_wscale); 1408 &rcv_wscale);
1408 req->rcv_wscale = rcv_wscale; 1409 ireq->rcv_wscale = rcv_wscale;
1409 } 1410 }
1410 1411
1411 /* RFC1323: The window in SYN & SYN/ACK segments is never scaled. */ 1412 /* RFC1323: The window in SYN & SYN/ACK segments is never scaled. */
1412 th->window = htons(req->rcv_wnd); 1413 th->window = htons(req->rcv_wnd);
1413 1414
1414 TCP_SKB_CB(skb)->when = tcp_time_stamp; 1415 TCP_SKB_CB(skb)->when = tcp_time_stamp;
1415 tcp_syn_build_options((__u32 *)(th + 1), dst_metric(dst, RTAX_ADVMSS), req->tstamp_ok, 1416 tcp_syn_build_options((__u32 *)(th + 1), dst_metric(dst, RTAX_ADVMSS), ireq->tstamp_ok,
1416 req->sack_ok, req->wscale_ok, req->rcv_wscale, 1417 ireq->sack_ok, ireq->wscale_ok, ireq->rcv_wscale,
1417 TCP_SKB_CB(skb)->when, 1418 TCP_SKB_CB(skb)->when,
1418 req->ts_recent); 1419 req->ts_recent);
1419 1420
diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c
index 799ebe061e2c..b127b4498565 100644
--- a/net/ipv4/tcp_timer.c
+++ b/net/ipv4/tcp_timer.c
@@ -464,11 +464,11 @@ out_unlock:
464static void tcp_synack_timer(struct sock *sk) 464static void tcp_synack_timer(struct sock *sk)
465{ 465{
466 struct tcp_sock *tp = tcp_sk(sk); 466 struct tcp_sock *tp = tcp_sk(sk);
467 struct tcp_listen_opt *lopt = tp->listen_opt; 467 struct listen_sock *lopt = tp->accept_queue.listen_opt;
468 int max_retries = tp->syn_retries ? : sysctl_tcp_synack_retries; 468 int max_retries = tp->syn_retries ? : sysctl_tcp_synack_retries;
469 int thresh = max_retries; 469 int thresh = max_retries;
470 unsigned long now = jiffies; 470 unsigned long now = jiffies;
471 struct open_request **reqp, *req; 471 struct request_sock **reqp, *req;
472 int i, budget; 472 int i, budget;
473 473
474 if (lopt == NULL || lopt->qlen == 0) 474 if (lopt == NULL || lopt->qlen == 0)
@@ -513,8 +513,8 @@ static void tcp_synack_timer(struct sock *sk)
513 while ((req = *reqp) != NULL) { 513 while ((req = *reqp) != NULL) {
514 if (time_after_eq(now, req->expires)) { 514 if (time_after_eq(now, req->expires)) {
515 if ((req->retrans < thresh || 515 if ((req->retrans < thresh ||
516 (req->acked && req->retrans < max_retries)) 516 (inet_rsk(req)->acked && req->retrans < max_retries))
517 && !req->class->rtx_syn_ack(sk, req, NULL)) { 517 && !req->rsk_ops->rtx_syn_ack(sk, req, NULL)) {
518 unsigned long timeo; 518 unsigned long timeo;
519 519
520 if (req->retrans++ == 0) 520 if (req->retrans++ == 0)
@@ -527,13 +527,9 @@ static void tcp_synack_timer(struct sock *sk)
527 } 527 }
528 528
529 /* Drop this request */ 529 /* Drop this request */
530 write_lock(&tp->syn_wait_lock); 530 tcp_synq_unlink(tp, req, reqp);
531 *reqp = req->dl_next; 531 reqsk_queue_removed(&tp->accept_queue, req);
532 write_unlock(&tp->syn_wait_lock); 532 reqsk_free(req);
533 lopt->qlen--;
534 if (req->retrans == 0)
535 lopt->qlen_young--;
536 tcp_openreq_free(req);
537 continue; 533 continue;
538 } 534 }
539 reqp = &req->dl_next; 535 reqp = &req->dl_next;
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index 7744a2592693..47a30c3188ea 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -131,7 +131,7 @@ static void addrconf_leave_anycast(struct inet6_ifaddr *ifp);
131 131
132static int addrconf_ifdown(struct net_device *dev, int how); 132static int addrconf_ifdown(struct net_device *dev, int how);
133 133
134static void addrconf_dad_start(struct inet6_ifaddr *ifp, int flags); 134static void addrconf_dad_start(struct inet6_ifaddr *ifp, u32 flags);
135static void addrconf_dad_timer(unsigned long data); 135static void addrconf_dad_timer(unsigned long data);
136static void addrconf_dad_completed(struct inet6_ifaddr *ifp); 136static void addrconf_dad_completed(struct inet6_ifaddr *ifp);
137static void addrconf_rs_timer(unsigned long data); 137static void addrconf_rs_timer(unsigned long data);
@@ -372,6 +372,7 @@ static struct inet6_dev * ipv6_add_dev(struct net_device *dev)
372 ndev->regen_timer.data = (unsigned long) ndev; 372 ndev->regen_timer.data = (unsigned long) ndev;
373 if ((dev->flags&IFF_LOOPBACK) || 373 if ((dev->flags&IFF_LOOPBACK) ||
374 dev->type == ARPHRD_TUNNEL || 374 dev->type == ARPHRD_TUNNEL ||
375 dev->type == ARPHRD_NONE ||
375 dev->type == ARPHRD_SIT) { 376 dev->type == ARPHRD_SIT) {
376 printk(KERN_INFO 377 printk(KERN_INFO
377 "Disabled Privacy Extensions on device %p(%s)\n", 378 "Disabled Privacy Extensions on device %p(%s)\n",
@@ -491,7 +492,7 @@ void inet6_ifa_finish_destroy(struct inet6_ifaddr *ifp)
491 492
492static struct inet6_ifaddr * 493static struct inet6_ifaddr *
493ipv6_add_addr(struct inet6_dev *idev, const struct in6_addr *addr, int pfxlen, 494ipv6_add_addr(struct inet6_dev *idev, const struct in6_addr *addr, int pfxlen,
494 int scope, unsigned flags) 495 int scope, u32 flags)
495{ 496{
496 struct inet6_ifaddr *ifa = NULL; 497 struct inet6_ifaddr *ifa = NULL;
497 struct rt6_info *rt; 498 struct rt6_info *rt;
@@ -1319,7 +1320,7 @@ static int __ipv6_try_regen_rndid(struct inet6_dev *idev, struct in6_addr *tmpad
1319 1320
1320static void 1321static void
1321addrconf_prefix_route(struct in6_addr *pfx, int plen, struct net_device *dev, 1322addrconf_prefix_route(struct in6_addr *pfx, int plen, struct net_device *dev,
1322 unsigned long expires, unsigned flags) 1323 unsigned long expires, u32 flags)
1323{ 1324{
1324 struct in6_rtmsg rtmsg; 1325 struct in6_rtmsg rtmsg;
1325 1326
@@ -2228,7 +2229,7 @@ out:
2228/* 2229/*
2229 * Duplicate Address Detection 2230 * Duplicate Address Detection
2230 */ 2231 */
2231static void addrconf_dad_start(struct inet6_ifaddr *ifp, int flags) 2232static void addrconf_dad_start(struct inet6_ifaddr *ifp, u32 flags)
2232{ 2233{
2233 struct inet6_dev *idev = ifp->idev; 2234 struct inet6_dev *idev = ifp->idev;
2234 struct net_device *dev = idev->dev; 2235 struct net_device *dev = idev->dev;
@@ -2621,15 +2622,14 @@ inet6_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
2621} 2622}
2622 2623
2623static int inet6_fill_ifaddr(struct sk_buff *skb, struct inet6_ifaddr *ifa, 2624static int inet6_fill_ifaddr(struct sk_buff *skb, struct inet6_ifaddr *ifa,
2624 u32 pid, u32 seq, int event) 2625 u32 pid, u32 seq, int event, unsigned int flags)
2625{ 2626{
2626 struct ifaddrmsg *ifm; 2627 struct ifaddrmsg *ifm;
2627 struct nlmsghdr *nlh; 2628 struct nlmsghdr *nlh;
2628 struct ifa_cacheinfo ci; 2629 struct ifa_cacheinfo ci;
2629 unsigned char *b = skb->tail; 2630 unsigned char *b = skb->tail;
2630 2631
2631 nlh = NLMSG_PUT(skb, pid, seq, event, sizeof(*ifm)); 2632 nlh = NLMSG_NEW(skb, pid, seq, event, sizeof(*ifm), flags);
2632 if (pid) nlh->nlmsg_flags |= NLM_F_MULTI;
2633 ifm = NLMSG_DATA(nlh); 2633 ifm = NLMSG_DATA(nlh);
2634 ifm->ifa_family = AF_INET6; 2634 ifm->ifa_family = AF_INET6;
2635 ifm->ifa_prefixlen = ifa->prefix_len; 2635 ifm->ifa_prefixlen = ifa->prefix_len;
@@ -2671,15 +2671,14 @@ rtattr_failure:
2671} 2671}
2672 2672
2673static int inet6_fill_ifmcaddr(struct sk_buff *skb, struct ifmcaddr6 *ifmca, 2673static int inet6_fill_ifmcaddr(struct sk_buff *skb, struct ifmcaddr6 *ifmca,
2674 u32 pid, u32 seq, int event) 2674 u32 pid, u32 seq, int event, u16 flags)
2675{ 2675{
2676 struct ifaddrmsg *ifm; 2676 struct ifaddrmsg *ifm;
2677 struct nlmsghdr *nlh; 2677 struct nlmsghdr *nlh;
2678 struct ifa_cacheinfo ci; 2678 struct ifa_cacheinfo ci;
2679 unsigned char *b = skb->tail; 2679 unsigned char *b = skb->tail;
2680 2680
2681 nlh = NLMSG_PUT(skb, pid, seq, event, sizeof(*ifm)); 2681 nlh = NLMSG_NEW(skb, pid, seq, event, sizeof(*ifm), flags);
2682 if (pid) nlh->nlmsg_flags |= NLM_F_MULTI;
2683 ifm = NLMSG_DATA(nlh); 2682 ifm = NLMSG_DATA(nlh);
2684 ifm->ifa_family = AF_INET6; 2683 ifm->ifa_family = AF_INET6;
2685 ifm->ifa_prefixlen = 128; 2684 ifm->ifa_prefixlen = 128;
@@ -2708,15 +2707,14 @@ rtattr_failure:
2708} 2707}
2709 2708
2710static int inet6_fill_ifacaddr(struct sk_buff *skb, struct ifacaddr6 *ifaca, 2709static int inet6_fill_ifacaddr(struct sk_buff *skb, struct ifacaddr6 *ifaca,
2711 u32 pid, u32 seq, int event) 2710 u32 pid, u32 seq, int event, unsigned int flags)
2712{ 2711{
2713 struct ifaddrmsg *ifm; 2712 struct ifaddrmsg *ifm;
2714 struct nlmsghdr *nlh; 2713 struct nlmsghdr *nlh;
2715 struct ifa_cacheinfo ci; 2714 struct ifa_cacheinfo ci;
2716 unsigned char *b = skb->tail; 2715 unsigned char *b = skb->tail;
2717 2716
2718 nlh = NLMSG_PUT(skb, pid, seq, event, sizeof(*ifm)); 2717 nlh = NLMSG_NEW(skb, pid, seq, event, sizeof(*ifm), flags);
2719 if (pid) nlh->nlmsg_flags |= NLM_F_MULTI;
2720 ifm = NLMSG_DATA(nlh); 2718 ifm = NLMSG_DATA(nlh);
2721 ifm->ifa_family = AF_INET6; 2719 ifm->ifa_family = AF_INET6;
2722 ifm->ifa_prefixlen = 128; 2720 ifm->ifa_prefixlen = 128;
@@ -2785,7 +2783,8 @@ static int inet6_dump_addr(struct sk_buff *skb, struct netlink_callback *cb,
2785 continue; 2783 continue;
2786 if ((err = inet6_fill_ifaddr(skb, ifa, 2784 if ((err = inet6_fill_ifaddr(skb, ifa,
2787 NETLINK_CB(cb->skb).pid, 2785 NETLINK_CB(cb->skb).pid,
2788 cb->nlh->nlmsg_seq, RTM_NEWADDR)) <= 0) 2786 cb->nlh->nlmsg_seq, RTM_NEWADDR,
2787 NLM_F_MULTI)) <= 0)
2789 goto done; 2788 goto done;
2790 } 2789 }
2791 /* temp addr */ 2790 /* temp addr */
@@ -2796,7 +2795,8 @@ static int inet6_dump_addr(struct sk_buff *skb, struct netlink_callback *cb,
2796 continue; 2795 continue;
2797 if ((err = inet6_fill_ifaddr(skb, ifa, 2796 if ((err = inet6_fill_ifaddr(skb, ifa,
2798 NETLINK_CB(cb->skb).pid, 2797 NETLINK_CB(cb->skb).pid,
2799 cb->nlh->nlmsg_seq, RTM_NEWADDR)) <= 0) 2798 cb->nlh->nlmsg_seq, RTM_NEWADDR,
2799 NLM_F_MULTI)) <= 0)
2800 goto done; 2800 goto done;
2801 } 2801 }
2802#endif 2802#endif
@@ -2809,7 +2809,8 @@ static int inet6_dump_addr(struct sk_buff *skb, struct netlink_callback *cb,
2809 continue; 2809 continue;
2810 if ((err = inet6_fill_ifmcaddr(skb, ifmca, 2810 if ((err = inet6_fill_ifmcaddr(skb, ifmca,
2811 NETLINK_CB(cb->skb).pid, 2811 NETLINK_CB(cb->skb).pid,
2812 cb->nlh->nlmsg_seq, RTM_GETMULTICAST)) <= 0) 2812 cb->nlh->nlmsg_seq, RTM_GETMULTICAST,
2813 NLM_F_MULTI)) <= 0)
2813 goto done; 2814 goto done;
2814 } 2815 }
2815 break; 2816 break;
@@ -2821,7 +2822,8 @@ static int inet6_dump_addr(struct sk_buff *skb, struct netlink_callback *cb,
2821 continue; 2822 continue;
2822 if ((err = inet6_fill_ifacaddr(skb, ifaca, 2823 if ((err = inet6_fill_ifacaddr(skb, ifaca,
2823 NETLINK_CB(cb->skb).pid, 2824 NETLINK_CB(cb->skb).pid,
2824 cb->nlh->nlmsg_seq, RTM_GETANYCAST)) <= 0) 2825 cb->nlh->nlmsg_seq, RTM_GETANYCAST,
2826 NLM_F_MULTI)) <= 0)
2825 goto done; 2827 goto done;
2826 } 2828 }
2827 break; 2829 break;
@@ -2871,7 +2873,7 @@ static void inet6_ifa_notify(int event, struct inet6_ifaddr *ifa)
2871 netlink_set_err(rtnl, 0, RTMGRP_IPV6_IFADDR, ENOBUFS); 2873 netlink_set_err(rtnl, 0, RTMGRP_IPV6_IFADDR, ENOBUFS);
2872 return; 2874 return;
2873 } 2875 }
2874 if (inet6_fill_ifaddr(skb, ifa, 0, 0, event) < 0) { 2876 if (inet6_fill_ifaddr(skb, ifa, current->pid, 0, event, 0) < 0) {
2875 kfree_skb(skb); 2877 kfree_skb(skb);
2876 netlink_set_err(rtnl, 0, RTMGRP_IPV6_IFADDR, EINVAL); 2878 netlink_set_err(rtnl, 0, RTMGRP_IPV6_IFADDR, EINVAL);
2877 return; 2879 return;
@@ -2906,7 +2908,7 @@ static void inline ipv6_store_devconf(struct ipv6_devconf *cnf,
2906} 2908}
2907 2909
2908static int inet6_fill_ifinfo(struct sk_buff *skb, struct inet6_dev *idev, 2910static int inet6_fill_ifinfo(struct sk_buff *skb, struct inet6_dev *idev,
2909 u32 pid, u32 seq, int event) 2911 u32 pid, u32 seq, int event, unsigned int flags)
2910{ 2912{
2911 struct net_device *dev = idev->dev; 2913 struct net_device *dev = idev->dev;
2912 __s32 *array = NULL; 2914 __s32 *array = NULL;
@@ -2917,8 +2919,7 @@ static int inet6_fill_ifinfo(struct sk_buff *skb, struct inet6_dev *idev,
2917 __u32 mtu = dev->mtu; 2919 __u32 mtu = dev->mtu;
2918 struct ifla_cacheinfo ci; 2920 struct ifla_cacheinfo ci;
2919 2921
2920 nlh = NLMSG_PUT(skb, pid, seq, event, sizeof(*r)); 2922 nlh = NLMSG_NEW(skb, pid, seq, event, sizeof(*r), flags);
2921 if (pid) nlh->nlmsg_flags |= NLM_F_MULTI;
2922 r = NLMSG_DATA(nlh); 2923 r = NLMSG_DATA(nlh);
2923 r->ifi_family = AF_INET6; 2924 r->ifi_family = AF_INET6;
2924 r->ifi_type = dev->type; 2925 r->ifi_type = dev->type;
@@ -2985,7 +2986,7 @@ static int inet6_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb)
2985 if ((idev = in6_dev_get(dev)) == NULL) 2986 if ((idev = in6_dev_get(dev)) == NULL)
2986 continue; 2987 continue;
2987 err = inet6_fill_ifinfo(skb, idev, NETLINK_CB(cb->skb).pid, 2988 err = inet6_fill_ifinfo(skb, idev, NETLINK_CB(cb->skb).pid,
2988 cb->nlh->nlmsg_seq, RTM_NEWLINK); 2989 cb->nlh->nlmsg_seq, RTM_NEWLINK, NLM_F_MULTI);
2989 in6_dev_put(idev); 2990 in6_dev_put(idev);
2990 if (err <= 0) 2991 if (err <= 0)
2991 break; 2992 break;
@@ -3007,7 +3008,7 @@ void inet6_ifinfo_notify(int event, struct inet6_dev *idev)
3007 netlink_set_err(rtnl, 0, RTMGRP_IPV6_IFINFO, ENOBUFS); 3008 netlink_set_err(rtnl, 0, RTMGRP_IPV6_IFINFO, ENOBUFS);
3008 return; 3009 return;
3009 } 3010 }
3010 if (inet6_fill_ifinfo(skb, idev, 0, 0, event) < 0) { 3011 if (inet6_fill_ifinfo(skb, idev, current->pid, 0, event, 0) < 0) {
3011 kfree_skb(skb); 3012 kfree_skb(skb);
3012 netlink_set_err(rtnl, 0, RTMGRP_IPV6_IFINFO, EINVAL); 3013 netlink_set_err(rtnl, 0, RTMGRP_IPV6_IFINFO, EINVAL);
3013 return; 3014 return;
@@ -3017,18 +3018,15 @@ void inet6_ifinfo_notify(int event, struct inet6_dev *idev)
3017} 3018}
3018 3019
3019static int inet6_fill_prefix(struct sk_buff *skb, struct inet6_dev *idev, 3020static int inet6_fill_prefix(struct sk_buff *skb, struct inet6_dev *idev,
3020 struct prefix_info *pinfo, u32 pid, u32 seq, int event) 3021 struct prefix_info *pinfo, u32 pid, u32 seq,
3022 int event, unsigned int flags)
3021{ 3023{
3022 struct prefixmsg *pmsg; 3024 struct prefixmsg *pmsg;
3023 struct nlmsghdr *nlh; 3025 struct nlmsghdr *nlh;
3024 unsigned char *b = skb->tail; 3026 unsigned char *b = skb->tail;
3025 struct prefix_cacheinfo ci; 3027 struct prefix_cacheinfo ci;
3026 3028
3027 nlh = NLMSG_PUT(skb, pid, seq, event, sizeof(*pmsg)); 3029 nlh = NLMSG_NEW(skb, pid, seq, event, sizeof(*pmsg), flags);
3028
3029 if (pid)
3030 nlh->nlmsg_flags |= NLM_F_MULTI;
3031
3032 pmsg = NLMSG_DATA(nlh); 3030 pmsg = NLMSG_DATA(nlh);
3033 pmsg->prefix_family = AF_INET6; 3031 pmsg->prefix_family = AF_INET6;
3034 pmsg->prefix_ifindex = idev->dev->ifindex; 3032 pmsg->prefix_ifindex = idev->dev->ifindex;
@@ -3067,7 +3065,7 @@ static void inet6_prefix_notify(int event, struct inet6_dev *idev,
3067 netlink_set_err(rtnl, 0, RTMGRP_IPV6_PREFIX, ENOBUFS); 3065 netlink_set_err(rtnl, 0, RTMGRP_IPV6_PREFIX, ENOBUFS);
3068 return; 3066 return;
3069 } 3067 }
3070 if (inet6_fill_prefix(skb, idev, pinfo, 0, 0, event) < 0) { 3068 if (inet6_fill_prefix(skb, idev, pinfo, current->pid, 0, event, 0) < 0) {
3071 kfree_skb(skb); 3069 kfree_skb(skb);
3072 netlink_set_err(rtnl, 0, RTMGRP_IPV6_PREFIX, EINVAL); 3070 netlink_set_err(rtnl, 0, RTMGRP_IPV6_PREFIX, EINVAL);
3073 return; 3071 return;
diff --git a/net/ipv6/datagram.c b/net/ipv6/datagram.c
index 65b9375df57d..5229365cd8b4 100644
--- a/net/ipv6/datagram.c
+++ b/net/ipv6/datagram.c
@@ -353,14 +353,14 @@ int ipv6_recv_error(struct sock *sk, struct msghdr *msg, int len)
353 err = copied; 353 err = copied;
354 354
355 /* Reset and regenerate socket error */ 355 /* Reset and regenerate socket error */
356 spin_lock_irq(&sk->sk_error_queue.lock); 356 spin_lock_bh(&sk->sk_error_queue.lock);
357 sk->sk_err = 0; 357 sk->sk_err = 0;
358 if ((skb2 = skb_peek(&sk->sk_error_queue)) != NULL) { 358 if ((skb2 = skb_peek(&sk->sk_error_queue)) != NULL) {
359 sk->sk_err = SKB_EXT_ERR(skb2)->ee.ee_errno; 359 sk->sk_err = SKB_EXT_ERR(skb2)->ee.ee_errno;
360 spin_unlock_irq(&sk->sk_error_queue.lock); 360 spin_unlock_bh(&sk->sk_error_queue.lock);
361 sk->sk_error_report(sk); 361 sk->sk_error_report(sk);
362 } else { 362 } else {
363 spin_unlock_irq(&sk->sk_error_queue.lock); 363 spin_unlock_bh(&sk->sk_error_queue.lock);
364 } 364 }
365 365
366out_free_skb: 366out_free_skb:
diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c
index 8e0f569b883e..ff3ec9822e36 100644
--- a/net/ipv6/icmp.c
+++ b/net/ipv6/icmp.c
@@ -277,8 +277,8 @@ void icmpv6_send(struct sk_buff *skb, int type, int code, __u32 info,
277{ 277{
278 struct inet6_dev *idev = NULL; 278 struct inet6_dev *idev = NULL;
279 struct ipv6hdr *hdr = skb->nh.ipv6h; 279 struct ipv6hdr *hdr = skb->nh.ipv6h;
280 struct sock *sk = icmpv6_socket->sk; 280 struct sock *sk;
281 struct ipv6_pinfo *np = inet6_sk(sk); 281 struct ipv6_pinfo *np;
282 struct in6_addr *saddr = NULL; 282 struct in6_addr *saddr = NULL;
283 struct dst_entry *dst; 283 struct dst_entry *dst;
284 struct icmp6hdr tmp_hdr; 284 struct icmp6hdr tmp_hdr;
@@ -358,6 +358,9 @@ void icmpv6_send(struct sk_buff *skb, int type, int code, __u32 info,
358 if (icmpv6_xmit_lock()) 358 if (icmpv6_xmit_lock())
359 return; 359 return;
360 360
361 sk = icmpv6_socket->sk;
362 np = inet6_sk(sk);
363
361 if (!icmpv6_xrlim_allow(sk, type, &fl)) 364 if (!icmpv6_xrlim_allow(sk, type, &fl))
362 goto out; 365 goto out;
363 366
@@ -423,9 +426,9 @@ out:
423 426
424static void icmpv6_echo_reply(struct sk_buff *skb) 427static void icmpv6_echo_reply(struct sk_buff *skb)
425{ 428{
426 struct sock *sk = icmpv6_socket->sk; 429 struct sock *sk;
427 struct inet6_dev *idev; 430 struct inet6_dev *idev;
428 struct ipv6_pinfo *np = inet6_sk(sk); 431 struct ipv6_pinfo *np;
429 struct in6_addr *saddr = NULL; 432 struct in6_addr *saddr = NULL;
430 struct icmp6hdr *icmph = (struct icmp6hdr *) skb->h.raw; 433 struct icmp6hdr *icmph = (struct icmp6hdr *) skb->h.raw;
431 struct icmp6hdr tmp_hdr; 434 struct icmp6hdr tmp_hdr;
@@ -454,6 +457,9 @@ static void icmpv6_echo_reply(struct sk_buff *skb)
454 if (icmpv6_xmit_lock()) 457 if (icmpv6_xmit_lock())
455 return; 458 return;
456 459
460 sk = icmpv6_socket->sk;
461 np = inet6_sk(sk);
462
457 if (!fl.oif && ipv6_addr_is_multicast(&fl.fl6_dst)) 463 if (!fl.oif && ipv6_addr_is_multicast(&fl.fl6_dst))
458 fl.oif = np->mcast_oif; 464 fl.oif = np->mcast_oif;
459 465
diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c
index 3b1c9fa184ae..ba3b0c267f75 100644
--- a/net/ipv6/ip6_tunnel.c
+++ b/net/ipv6/ip6_tunnel.c
@@ -882,6 +882,7 @@ ip6ip6_tnl_change(struct ip6_tnl *t, struct ip6_tnl_parm *p)
882 t->parms.hop_limit = p->hop_limit; 882 t->parms.hop_limit = p->hop_limit;
883 t->parms.encap_limit = p->encap_limit; 883 t->parms.encap_limit = p->encap_limit;
884 t->parms.flowinfo = p->flowinfo; 884 t->parms.flowinfo = p->flowinfo;
885 t->parms.link = p->link;
885 ip6ip6_tnl_link_config(t); 886 ip6ip6_tnl_link_config(t);
886 return 0; 887 return 0;
887} 888}
diff --git a/net/ipv6/ipv6_syms.c b/net/ipv6/ipv6_syms.c
index 2f4c91ddc9a3..5ade5a5d1990 100644
--- a/net/ipv6/ipv6_syms.c
+++ b/net/ipv6/ipv6_syms.c
@@ -37,5 +37,4 @@ EXPORT_SYMBOL(in6_dev_finish_destroy);
37EXPORT_SYMBOL(xfrm6_rcv); 37EXPORT_SYMBOL(xfrm6_rcv);
38#endif 38#endif
39EXPORT_SYMBOL(rt6_lookup); 39EXPORT_SYMBOL(rt6_lookup);
40EXPORT_SYMBOL(fl6_sock_lookup);
41EXPORT_SYMBOL(ipv6_push_nfrag_opts); 40EXPORT_SYMBOL(ipv6_push_nfrag_opts);
diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c
index 617645bc5ed6..e2b848ec9851 100644
--- a/net/ipv6/raw.c
+++ b/net/ipv6/raw.c
@@ -434,12 +434,12 @@ csum_copy_err:
434 /* Clear queue. */ 434 /* Clear queue. */
435 if (flags&MSG_PEEK) { 435 if (flags&MSG_PEEK) {
436 int clear = 0; 436 int clear = 0;
437 spin_lock_irq(&sk->sk_receive_queue.lock); 437 spin_lock_bh(&sk->sk_receive_queue.lock);
438 if (skb == skb_peek(&sk->sk_receive_queue)) { 438 if (skb == skb_peek(&sk->sk_receive_queue)) {
439 __skb_unlink(skb, &sk->sk_receive_queue); 439 __skb_unlink(skb, &sk->sk_receive_queue);
440 clear = 1; 440 clear = 1;
441 } 441 }
442 spin_unlock_irq(&sk->sk_receive_queue.lock); 442 spin_unlock_bh(&sk->sk_receive_queue.lock);
443 if (clear) 443 if (clear)
444 kfree_skb(skb); 444 kfree_skb(skb);
445 } 445 }
@@ -971,11 +971,11 @@ static int rawv6_ioctl(struct sock *sk, int cmd, unsigned long arg)
971 struct sk_buff *skb; 971 struct sk_buff *skb;
972 int amount = 0; 972 int amount = 0;
973 973
974 spin_lock_irq(&sk->sk_receive_queue.lock); 974 spin_lock_bh(&sk->sk_receive_queue.lock);
975 skb = skb_peek(&sk->sk_receive_queue); 975 skb = skb_peek(&sk->sk_receive_queue);
976 if (skb != NULL) 976 if (skb != NULL)
977 amount = skb->tail - skb->h.raw; 977 amount = skb->tail - skb->h.raw;
978 spin_unlock_irq(&sk->sk_receive_queue.lock); 978 spin_unlock_bh(&sk->sk_receive_queue.lock);
979 return put_user(amount, (int __user *)arg); 979 return put_user(amount, (int __user *)arg);
980 } 980 }
981 981
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index 3bf8a0254f81..1f5b226c3573 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -1570,7 +1570,8 @@ static int rt6_fill_node(struct sk_buff *skb, struct rt6_info *rt,
1570 struct in6_addr *src, 1570 struct in6_addr *src,
1571 int iif, 1571 int iif,
1572 int type, u32 pid, u32 seq, 1572 int type, u32 pid, u32 seq,
1573 struct nlmsghdr *in_nlh, int prefix) 1573 struct nlmsghdr *in_nlh, int prefix,
1574 unsigned int flags)
1574{ 1575{
1575 struct rtmsg *rtm; 1576 struct rtmsg *rtm;
1576 struct nlmsghdr *nlh; 1577 struct nlmsghdr *nlh;
@@ -1588,7 +1589,7 @@ static int rt6_fill_node(struct sk_buff *skb, struct rt6_info *rt,
1588 pid = in_nlh->nlmsg_pid; 1589 pid = in_nlh->nlmsg_pid;
1589 } 1590 }
1590 1591
1591 nlh = NLMSG_PUT(skb, pid, seq, type, sizeof(*rtm)); 1592 nlh = NLMSG_NEW(skb, pid, seq, type, sizeof(*rtm), flags);
1592 rtm = NLMSG_DATA(nlh); 1593 rtm = NLMSG_DATA(nlh);
1593 rtm->rtm_family = AF_INET6; 1594 rtm->rtm_family = AF_INET6;
1594 rtm->rtm_dst_len = rt->rt6i_dst.plen; 1595 rtm->rtm_dst_len = rt->rt6i_dst.plen;
@@ -1674,7 +1675,7 @@ static int rt6_dump_route(struct rt6_info *rt, void *p_arg)
1674 1675
1675 return rt6_fill_node(arg->skb, rt, NULL, NULL, 0, RTM_NEWROUTE, 1676 return rt6_fill_node(arg->skb, rt, NULL, NULL, 0, RTM_NEWROUTE,
1676 NETLINK_CB(arg->cb->skb).pid, arg->cb->nlh->nlmsg_seq, 1677 NETLINK_CB(arg->cb->skb).pid, arg->cb->nlh->nlmsg_seq,
1677 NULL, prefix); 1678 NULL, prefix, NLM_F_MULTI);
1678} 1679}
1679 1680
1680static int fib6_dump_node(struct fib6_walker_t *w) 1681static int fib6_dump_node(struct fib6_walker_t *w)
@@ -1822,7 +1823,7 @@ int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void *arg)
1822 &fl.fl6_dst, &fl.fl6_src, 1823 &fl.fl6_dst, &fl.fl6_src,
1823 iif, 1824 iif,
1824 RTM_NEWROUTE, NETLINK_CB(in_skb).pid, 1825 RTM_NEWROUTE, NETLINK_CB(in_skb).pid,
1825 nlh->nlmsg_seq, nlh, 0); 1826 nlh->nlmsg_seq, nlh, 0, 0);
1826 if (err < 0) { 1827 if (err < 0) {
1827 err = -EMSGSIZE; 1828 err = -EMSGSIZE;
1828 goto out_free; 1829 goto out_free;
@@ -1848,7 +1849,7 @@ void inet6_rt_notify(int event, struct rt6_info *rt, struct nlmsghdr *nlh)
1848 netlink_set_err(rtnl, 0, RTMGRP_IPV6_ROUTE, ENOBUFS); 1849 netlink_set_err(rtnl, 0, RTMGRP_IPV6_ROUTE, ENOBUFS);
1849 return; 1850 return;
1850 } 1851 }
1851 if (rt6_fill_node(skb, rt, NULL, NULL, 0, event, 0, 0, nlh, 0) < 0) { 1852 if (rt6_fill_node(skb, rt, NULL, NULL, 0, event, 0, 0, nlh, 0, 0) < 0) {
1852 kfree_skb(skb); 1853 kfree_skb(skb);
1853 netlink_set_err(rtnl, 0, RTMGRP_IPV6_ROUTE, EINVAL); 1854 netlink_set_err(rtnl, 0, RTMGRP_IPV6_ROUTE, EINVAL);
1854 return; 1855 return;
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index 0f69e800a0ad..2414937f2a83 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -65,7 +65,7 @@
65#include <linux/seq_file.h> 65#include <linux/seq_file.h>
66 66
67static void tcp_v6_send_reset(struct sk_buff *skb); 67static void tcp_v6_send_reset(struct sk_buff *skb);
68static void tcp_v6_or_send_ack(struct sk_buff *skb, struct open_request *req); 68static void tcp_v6_reqsk_send_ack(struct sk_buff *skb, struct request_sock *req);
69static void tcp_v6_send_check(struct sock *sk, struct tcphdr *th, int len, 69static void tcp_v6_send_check(struct sock *sk, struct tcphdr *th, int len,
70 struct sk_buff *skb); 70 struct sk_buff *skb);
71 71
@@ -394,24 +394,26 @@ static u32 tcp_v6_synq_hash(struct in6_addr *raddr, u16 rport, u32 rnd)
394 return c & (TCP_SYNQ_HSIZE - 1); 394 return c & (TCP_SYNQ_HSIZE - 1);
395} 395}
396 396
397static struct open_request *tcp_v6_search_req(struct tcp_sock *tp, 397static struct request_sock *tcp_v6_search_req(struct tcp_sock *tp,
398 struct open_request ***prevp, 398 struct request_sock ***prevp,
399 __u16 rport, 399 __u16 rport,
400 struct in6_addr *raddr, 400 struct in6_addr *raddr,
401 struct in6_addr *laddr, 401 struct in6_addr *laddr,
402 int iif) 402 int iif)
403{ 403{
404 struct tcp_listen_opt *lopt = tp->listen_opt; 404 struct listen_sock *lopt = tp->accept_queue.listen_opt;
405 struct open_request *req, **prev; 405 struct request_sock *req, **prev;
406 406
407 for (prev = &lopt->syn_table[tcp_v6_synq_hash(raddr, rport, lopt->hash_rnd)]; 407 for (prev = &lopt->syn_table[tcp_v6_synq_hash(raddr, rport, lopt->hash_rnd)];
408 (req = *prev) != NULL; 408 (req = *prev) != NULL;
409 prev = &req->dl_next) { 409 prev = &req->dl_next) {
410 if (req->rmt_port == rport && 410 const struct tcp6_request_sock *treq = tcp6_rsk(req);
411 req->class->family == AF_INET6 && 411
412 ipv6_addr_equal(&req->af.v6_req.rmt_addr, raddr) && 412 if (inet_rsk(req)->rmt_port == rport &&
413 ipv6_addr_equal(&req->af.v6_req.loc_addr, laddr) && 413 req->rsk_ops->family == AF_INET6 &&
414 (!req->af.v6_req.iif || req->af.v6_req.iif == iif)) { 414 ipv6_addr_equal(&treq->rmt_addr, raddr) &&
415 ipv6_addr_equal(&treq->loc_addr, laddr) &&
416 (!treq->iif || treq->iif == iif)) {
415 BUG_TRAP(req->sk == NULL); 417 BUG_TRAP(req->sk == NULL);
416 *prevp = prev; 418 *prevp = prev;
417 return req; 419 return req;
@@ -906,9 +908,9 @@ static void tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
906 908
907 icmpv6_err_convert(type, code, &err); 909 icmpv6_err_convert(type, code, &err);
908 910
909 /* Might be for an open_request */ 911 /* Might be for an request_sock */
910 switch (sk->sk_state) { 912 switch (sk->sk_state) {
911 struct open_request *req, **prev; 913 struct request_sock *req, **prev;
912 case TCP_LISTEN: 914 case TCP_LISTEN:
913 if (sock_owned_by_user(sk)) 915 if (sock_owned_by_user(sk))
914 goto out; 916 goto out;
@@ -923,7 +925,7 @@ static void tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
923 */ 925 */
924 BUG_TRAP(req->sk == NULL); 926 BUG_TRAP(req->sk == NULL);
925 927
926 if (seq != req->snt_isn) { 928 if (seq != tcp_rsk(req)->snt_isn) {
927 NET_INC_STATS_BH(LINUX_MIB_OUTOFWINDOWICMPS); 929 NET_INC_STATS_BH(LINUX_MIB_OUTOFWINDOWICMPS);
928 goto out; 930 goto out;
929 } 931 }
@@ -957,9 +959,10 @@ out:
957} 959}
958 960
959 961
960static int tcp_v6_send_synack(struct sock *sk, struct open_request *req, 962static int tcp_v6_send_synack(struct sock *sk, struct request_sock *req,
961 struct dst_entry *dst) 963 struct dst_entry *dst)
962{ 964{
965 struct tcp6_request_sock *treq = tcp6_rsk(req);
963 struct ipv6_pinfo *np = inet6_sk(sk); 966 struct ipv6_pinfo *np = inet6_sk(sk);
964 struct sk_buff * skb; 967 struct sk_buff * skb;
965 struct ipv6_txoptions *opt = NULL; 968 struct ipv6_txoptions *opt = NULL;
@@ -969,19 +972,19 @@ static int tcp_v6_send_synack(struct sock *sk, struct open_request *req,
969 972
970 memset(&fl, 0, sizeof(fl)); 973 memset(&fl, 0, sizeof(fl));
971 fl.proto = IPPROTO_TCP; 974 fl.proto = IPPROTO_TCP;
972 ipv6_addr_copy(&fl.fl6_dst, &req->af.v6_req.rmt_addr); 975 ipv6_addr_copy(&fl.fl6_dst, &treq->rmt_addr);
973 ipv6_addr_copy(&fl.fl6_src, &req->af.v6_req.loc_addr); 976 ipv6_addr_copy(&fl.fl6_src, &treq->loc_addr);
974 fl.fl6_flowlabel = 0; 977 fl.fl6_flowlabel = 0;
975 fl.oif = req->af.v6_req.iif; 978 fl.oif = treq->iif;
976 fl.fl_ip_dport = req->rmt_port; 979 fl.fl_ip_dport = inet_rsk(req)->rmt_port;
977 fl.fl_ip_sport = inet_sk(sk)->sport; 980 fl.fl_ip_sport = inet_sk(sk)->sport;
978 981
979 if (dst == NULL) { 982 if (dst == NULL) {
980 opt = np->opt; 983 opt = np->opt;
981 if (opt == NULL && 984 if (opt == NULL &&
982 np->rxopt.bits.srcrt == 2 && 985 np->rxopt.bits.srcrt == 2 &&
983 req->af.v6_req.pktopts) { 986 treq->pktopts) {
984 struct sk_buff *pktopts = req->af.v6_req.pktopts; 987 struct sk_buff *pktopts = treq->pktopts;
985 struct inet6_skb_parm *rxopt = IP6CB(pktopts); 988 struct inet6_skb_parm *rxopt = IP6CB(pktopts);
986 if (rxopt->srcrt) 989 if (rxopt->srcrt)
987 opt = ipv6_invert_rthdr(sk, (struct ipv6_rt_hdr*)(pktopts->nh.raw + rxopt->srcrt)); 990 opt = ipv6_invert_rthdr(sk, (struct ipv6_rt_hdr*)(pktopts->nh.raw + rxopt->srcrt));
@@ -1008,10 +1011,10 @@ static int tcp_v6_send_synack(struct sock *sk, struct open_request *req,
1008 struct tcphdr *th = skb->h.th; 1011 struct tcphdr *th = skb->h.th;
1009 1012
1010 th->check = tcp_v6_check(th, skb->len, 1013 th->check = tcp_v6_check(th, skb->len,
1011 &req->af.v6_req.loc_addr, &req->af.v6_req.rmt_addr, 1014 &treq->loc_addr, &treq->rmt_addr,
1012 csum_partial((char *)th, skb->len, skb->csum)); 1015 csum_partial((char *)th, skb->len, skb->csum));
1013 1016
1014 ipv6_addr_copy(&fl.fl6_dst, &req->af.v6_req.rmt_addr); 1017 ipv6_addr_copy(&fl.fl6_dst, &treq->rmt_addr);
1015 err = ip6_xmit(sk, skb, &fl, opt, 0); 1018 err = ip6_xmit(sk, skb, &fl, opt, 0);
1016 if (err == NET_XMIT_CN) 1019 if (err == NET_XMIT_CN)
1017 err = 0; 1020 err = 0;
@@ -1024,17 +1027,18 @@ done:
1024 return err; 1027 return err;
1025} 1028}
1026 1029
1027static void tcp_v6_or_free(struct open_request *req) 1030static void tcp_v6_reqsk_destructor(struct request_sock *req)
1028{ 1031{
1029 if (req->af.v6_req.pktopts) 1032 if (tcp6_rsk(req)->pktopts)
1030 kfree_skb(req->af.v6_req.pktopts); 1033 kfree_skb(tcp6_rsk(req)->pktopts);
1031} 1034}
1032 1035
1033static struct or_calltable or_ipv6 = { 1036static struct request_sock_ops tcp6_request_sock_ops = {
1034 .family = AF_INET6, 1037 .family = AF_INET6,
1038 .obj_size = sizeof(struct tcp6_request_sock),
1035 .rtx_syn_ack = tcp_v6_send_synack, 1039 .rtx_syn_ack = tcp_v6_send_synack,
1036 .send_ack = tcp_v6_or_send_ack, 1040 .send_ack = tcp_v6_reqsk_send_ack,
1037 .destructor = tcp_v6_or_free, 1041 .destructor = tcp_v6_reqsk_destructor,
1038 .send_reset = tcp_v6_send_reset 1042 .send_reset = tcp_v6_send_reset
1039}; 1043};
1040 1044
@@ -1219,15 +1223,15 @@ static void tcp_v6_timewait_ack(struct sock *sk, struct sk_buff *skb)
1219 tcp_tw_put(tw); 1223 tcp_tw_put(tw);
1220} 1224}
1221 1225
1222static void tcp_v6_or_send_ack(struct sk_buff *skb, struct open_request *req) 1226static void tcp_v6_reqsk_send_ack(struct sk_buff *skb, struct request_sock *req)
1223{ 1227{
1224 tcp_v6_send_ack(skb, req->snt_isn+1, req->rcv_isn+1, req->rcv_wnd, req->ts_recent); 1228 tcp_v6_send_ack(skb, tcp_rsk(req)->snt_isn + 1, tcp_rsk(req)->rcv_isn + 1, req->rcv_wnd, req->ts_recent);
1225} 1229}
1226 1230
1227 1231
1228static struct sock *tcp_v6_hnd_req(struct sock *sk,struct sk_buff *skb) 1232static struct sock *tcp_v6_hnd_req(struct sock *sk,struct sk_buff *skb)
1229{ 1233{
1230 struct open_request *req, **prev; 1234 struct request_sock *req, **prev;
1231 struct tcphdr *th = skb->h.th; 1235 struct tcphdr *th = skb->h.th;
1232 struct tcp_sock *tp = tcp_sk(sk); 1236 struct tcp_sock *tp = tcp_sk(sk);
1233 struct sock *nsk; 1237 struct sock *nsk;
@@ -1260,21 +1264,13 @@ static struct sock *tcp_v6_hnd_req(struct sock *sk,struct sk_buff *skb)
1260 return sk; 1264 return sk;
1261} 1265}
1262 1266
1263static void tcp_v6_synq_add(struct sock *sk, struct open_request *req) 1267static void tcp_v6_synq_add(struct sock *sk, struct request_sock *req)
1264{ 1268{
1265 struct tcp_sock *tp = tcp_sk(sk); 1269 struct tcp_sock *tp = tcp_sk(sk);
1266 struct tcp_listen_opt *lopt = tp->listen_opt; 1270 struct listen_sock *lopt = tp->accept_queue.listen_opt;
1267 u32 h = tcp_v6_synq_hash(&req->af.v6_req.rmt_addr, req->rmt_port, lopt->hash_rnd); 1271 u32 h = tcp_v6_synq_hash(&tcp6_rsk(req)->rmt_addr, inet_rsk(req)->rmt_port, lopt->hash_rnd);
1268
1269 req->sk = NULL;
1270 req->expires = jiffies + TCP_TIMEOUT_INIT;
1271 req->retrans = 0;
1272 req->dl_next = lopt->syn_table[h];
1273
1274 write_lock(&tp->syn_wait_lock);
1275 lopt->syn_table[h] = req;
1276 write_unlock(&tp->syn_wait_lock);
1277 1272
1273 reqsk_queue_hash_req(&tp->accept_queue, h, req, TCP_TIMEOUT_INIT);
1278 tcp_synq_added(sk); 1274 tcp_synq_added(sk);
1279} 1275}
1280 1276
@@ -1284,10 +1280,11 @@ static void tcp_v6_synq_add(struct sock *sk, struct open_request *req)
1284 */ 1280 */
1285static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb) 1281static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
1286{ 1282{
1283 struct tcp6_request_sock *treq;
1287 struct ipv6_pinfo *np = inet6_sk(sk); 1284 struct ipv6_pinfo *np = inet6_sk(sk);
1288 struct tcp_options_received tmp_opt; 1285 struct tcp_options_received tmp_opt;
1289 struct tcp_sock *tp = tcp_sk(sk); 1286 struct tcp_sock *tp = tcp_sk(sk);
1290 struct open_request *req = NULL; 1287 struct request_sock *req = NULL;
1291 __u32 isn = TCP_SKB_CB(skb)->when; 1288 __u32 isn = TCP_SKB_CB(skb)->when;
1292 1289
1293 if (skb->protocol == htons(ETH_P_IP)) 1290 if (skb->protocol == htons(ETH_P_IP))
@@ -1308,7 +1305,7 @@ static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
1308 if (sk_acceptq_is_full(sk) && tcp_synq_young(sk) > 1) 1305 if (sk_acceptq_is_full(sk) && tcp_synq_young(sk) > 1)
1309 goto drop; 1306 goto drop;
1310 1307
1311 req = tcp_openreq_alloc(); 1308 req = reqsk_alloc(&tcp6_request_sock_ops);
1312 if (req == NULL) 1309 if (req == NULL)
1313 goto drop; 1310 goto drop;
1314 1311
@@ -1321,28 +1318,28 @@ static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
1321 tmp_opt.tstamp_ok = tmp_opt.saw_tstamp; 1318 tmp_opt.tstamp_ok = tmp_opt.saw_tstamp;
1322 tcp_openreq_init(req, &tmp_opt, skb); 1319 tcp_openreq_init(req, &tmp_opt, skb);
1323 1320
1324 req->class = &or_ipv6; 1321 treq = tcp6_rsk(req);
1325 ipv6_addr_copy(&req->af.v6_req.rmt_addr, &skb->nh.ipv6h->saddr); 1322 ipv6_addr_copy(&treq->rmt_addr, &skb->nh.ipv6h->saddr);
1326 ipv6_addr_copy(&req->af.v6_req.loc_addr, &skb->nh.ipv6h->daddr); 1323 ipv6_addr_copy(&treq->loc_addr, &skb->nh.ipv6h->daddr);
1327 TCP_ECN_create_request(req, skb->h.th); 1324 TCP_ECN_create_request(req, skb->h.th);
1328 req->af.v6_req.pktopts = NULL; 1325 treq->pktopts = NULL;
1329 if (ipv6_opt_accepted(sk, skb) || 1326 if (ipv6_opt_accepted(sk, skb) ||
1330 np->rxopt.bits.rxinfo || 1327 np->rxopt.bits.rxinfo ||
1331 np->rxopt.bits.rxhlim) { 1328 np->rxopt.bits.rxhlim) {
1332 atomic_inc(&skb->users); 1329 atomic_inc(&skb->users);
1333 req->af.v6_req.pktopts = skb; 1330 treq->pktopts = skb;
1334 } 1331 }
1335 req->af.v6_req.iif = sk->sk_bound_dev_if; 1332 treq->iif = sk->sk_bound_dev_if;
1336 1333
1337 /* So that link locals have meaning */ 1334 /* So that link locals have meaning */
1338 if (!sk->sk_bound_dev_if && 1335 if (!sk->sk_bound_dev_if &&
1339 ipv6_addr_type(&req->af.v6_req.rmt_addr) & IPV6_ADDR_LINKLOCAL) 1336 ipv6_addr_type(&treq->rmt_addr) & IPV6_ADDR_LINKLOCAL)
1340 req->af.v6_req.iif = tcp_v6_iif(skb); 1337 treq->iif = tcp_v6_iif(skb);
1341 1338
1342 if (isn == 0) 1339 if (isn == 0)
1343 isn = tcp_v6_init_sequence(sk,skb); 1340 isn = tcp_v6_init_sequence(sk,skb);
1344 1341
1345 req->snt_isn = isn; 1342 tcp_rsk(req)->snt_isn = isn;
1346 1343
1347 if (tcp_v6_send_synack(sk, req, NULL)) 1344 if (tcp_v6_send_synack(sk, req, NULL))
1348 goto drop; 1345 goto drop;
@@ -1353,16 +1350,17 @@ static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
1353 1350
1354drop: 1351drop:
1355 if (req) 1352 if (req)
1356 tcp_openreq_free(req); 1353 reqsk_free(req);
1357 1354
1358 TCP_INC_STATS_BH(TCP_MIB_ATTEMPTFAILS); 1355 TCP_INC_STATS_BH(TCP_MIB_ATTEMPTFAILS);
1359 return 0; /* don't send reset */ 1356 return 0; /* don't send reset */
1360} 1357}
1361 1358
1362static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb, 1359static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
1363 struct open_request *req, 1360 struct request_sock *req,
1364 struct dst_entry *dst) 1361 struct dst_entry *dst)
1365{ 1362{
1363 struct tcp6_request_sock *treq = tcp6_rsk(req);
1366 struct ipv6_pinfo *newnp, *np = inet6_sk(sk); 1364 struct ipv6_pinfo *newnp, *np = inet6_sk(sk);
1367 struct tcp6_sock *newtcp6sk; 1365 struct tcp6_sock *newtcp6sk;
1368 struct inet_sock *newinet; 1366 struct inet_sock *newinet;
@@ -1426,10 +1424,10 @@ static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
1426 goto out_overflow; 1424 goto out_overflow;
1427 1425
1428 if (np->rxopt.bits.srcrt == 2 && 1426 if (np->rxopt.bits.srcrt == 2 &&
1429 opt == NULL && req->af.v6_req.pktopts) { 1427 opt == NULL && treq->pktopts) {
1430 struct inet6_skb_parm *rxopt = IP6CB(req->af.v6_req.pktopts); 1428 struct inet6_skb_parm *rxopt = IP6CB(treq->pktopts);
1431 if (rxopt->srcrt) 1429 if (rxopt->srcrt)
1432 opt = ipv6_invert_rthdr(sk, (struct ipv6_rt_hdr*)(req->af.v6_req.pktopts->nh.raw+rxopt->srcrt)); 1430 opt = ipv6_invert_rthdr(sk, (struct ipv6_rt_hdr *)(treq->pktopts->nh.raw + rxopt->srcrt));
1433 } 1431 }
1434 1432
1435 if (dst == NULL) { 1433 if (dst == NULL) {
@@ -1438,16 +1436,16 @@ static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
1438 1436
1439 memset(&fl, 0, sizeof(fl)); 1437 memset(&fl, 0, sizeof(fl));
1440 fl.proto = IPPROTO_TCP; 1438 fl.proto = IPPROTO_TCP;
1441 ipv6_addr_copy(&fl.fl6_dst, &req->af.v6_req.rmt_addr); 1439 ipv6_addr_copy(&fl.fl6_dst, &treq->rmt_addr);
1442 if (opt && opt->srcrt) { 1440 if (opt && opt->srcrt) {
1443 struct rt0_hdr *rt0 = (struct rt0_hdr *) opt->srcrt; 1441 struct rt0_hdr *rt0 = (struct rt0_hdr *) opt->srcrt;
1444 ipv6_addr_copy(&final, &fl.fl6_dst); 1442 ipv6_addr_copy(&final, &fl.fl6_dst);
1445 ipv6_addr_copy(&fl.fl6_dst, rt0->addr); 1443 ipv6_addr_copy(&fl.fl6_dst, rt0->addr);
1446 final_p = &final; 1444 final_p = &final;
1447 } 1445 }
1448 ipv6_addr_copy(&fl.fl6_src, &req->af.v6_req.loc_addr); 1446 ipv6_addr_copy(&fl.fl6_src, &treq->loc_addr);
1449 fl.oif = sk->sk_bound_dev_if; 1447 fl.oif = sk->sk_bound_dev_if;
1450 fl.fl_ip_dport = req->rmt_port; 1448 fl.fl_ip_dport = inet_rsk(req)->rmt_port;
1451 fl.fl_ip_sport = inet_sk(sk)->sport; 1449 fl.fl_ip_sport = inet_sk(sk)->sport;
1452 1450
1453 if (ip6_dst_lookup(sk, &dst, &fl)) 1451 if (ip6_dst_lookup(sk, &dst, &fl))
@@ -1482,10 +1480,10 @@ static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
1482 1480
1483 memcpy(newnp, np, sizeof(struct ipv6_pinfo)); 1481 memcpy(newnp, np, sizeof(struct ipv6_pinfo));
1484 1482
1485 ipv6_addr_copy(&newnp->daddr, &req->af.v6_req.rmt_addr); 1483 ipv6_addr_copy(&newnp->daddr, &treq->rmt_addr);
1486 ipv6_addr_copy(&newnp->saddr, &req->af.v6_req.loc_addr); 1484 ipv6_addr_copy(&newnp->saddr, &treq->loc_addr);
1487 ipv6_addr_copy(&newnp->rcv_saddr, &req->af.v6_req.loc_addr); 1485 ipv6_addr_copy(&newnp->rcv_saddr, &treq->loc_addr);
1488 newsk->sk_bound_dev_if = req->af.v6_req.iif; 1486 newsk->sk_bound_dev_if = treq->iif;
1489 1487
1490 /* Now IPv6 options... 1488 /* Now IPv6 options...
1491 1489
@@ -1498,11 +1496,10 @@ static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
1498 1496
1499 /* Clone pktoptions received with SYN */ 1497 /* Clone pktoptions received with SYN */
1500 newnp->pktoptions = NULL; 1498 newnp->pktoptions = NULL;
1501 if (req->af.v6_req.pktopts) { 1499 if (treq->pktopts != NULL) {
1502 newnp->pktoptions = skb_clone(req->af.v6_req.pktopts, 1500 newnp->pktoptions = skb_clone(treq->pktopts, GFP_ATOMIC);
1503 GFP_ATOMIC); 1501 kfree_skb(treq->pktopts);
1504 kfree_skb(req->af.v6_req.pktopts); 1502 treq->pktopts = NULL;
1505 req->af.v6_req.pktopts = NULL;
1506 if (newnp->pktoptions) 1503 if (newnp->pktoptions)
1507 skb_set_owner_r(newnp->pktoptions, newsk); 1504 skb_set_owner_r(newnp->pktoptions, newsk);
1508 } 1505 }
@@ -2050,7 +2047,7 @@ static int tcp_v6_destroy_sock(struct sock *sk)
2050 2047
2051/* Proc filesystem TCPv6 sock list dumping. */ 2048/* Proc filesystem TCPv6 sock list dumping. */
2052static void get_openreq6(struct seq_file *seq, 2049static void get_openreq6(struct seq_file *seq,
2053 struct sock *sk, struct open_request *req, int i, int uid) 2050 struct sock *sk, struct request_sock *req, int i, int uid)
2054{ 2051{
2055 struct in6_addr *dest, *src; 2052 struct in6_addr *dest, *src;
2056 int ttd = req->expires - jiffies; 2053 int ttd = req->expires - jiffies;
@@ -2058,8 +2055,8 @@ static void get_openreq6(struct seq_file *seq,
2058 if (ttd < 0) 2055 if (ttd < 0)
2059 ttd = 0; 2056 ttd = 0;
2060 2057
2061 src = &req->af.v6_req.loc_addr; 2058 src = &tcp6_rsk(req)->loc_addr;
2062 dest = &req->af.v6_req.rmt_addr; 2059 dest = &tcp6_rsk(req)->rmt_addr;
2063 seq_printf(seq, 2060 seq_printf(seq,
2064 "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X " 2061 "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
2065 "%02X %08X:%08X %02X:%08lX %08X %5d %8d %d %d %p\n", 2062 "%02X %08X:%08X %02X:%08lX %08X %5d %8d %d %d %p\n",
@@ -2069,7 +2066,7 @@ static void get_openreq6(struct seq_file *seq,
2069 ntohs(inet_sk(sk)->sport), 2066 ntohs(inet_sk(sk)->sport),
2070 dest->s6_addr32[0], dest->s6_addr32[1], 2067 dest->s6_addr32[0], dest->s6_addr32[1],
2071 dest->s6_addr32[2], dest->s6_addr32[3], 2068 dest->s6_addr32[2], dest->s6_addr32[3],
2072 ntohs(req->rmt_port), 2069 ntohs(inet_rsk(req)->rmt_port),
2073 TCP_SYN_RECV, 2070 TCP_SYN_RECV,
2074 0,0, /* could print option size, but that is af dependent. */ 2071 0,0, /* could print option size, but that is af dependent. */
2075 1, /* timers active (only the expire timer) */ 2072 1, /* timers active (only the expire timer) */
@@ -2239,6 +2236,7 @@ struct proto tcpv6_prot = {
2239 .sysctl_rmem = sysctl_tcp_rmem, 2236 .sysctl_rmem = sysctl_tcp_rmem,
2240 .max_header = MAX_TCP_HEADER, 2237 .max_header = MAX_TCP_HEADER,
2241 .obj_size = sizeof(struct tcp6_sock), 2238 .obj_size = sizeof(struct tcp6_sock),
2239 .rsk_prot = &tcp6_request_sock_ops,
2242}; 2240};
2243 2241
2244static struct inet6_protocol tcpv6_protocol = { 2242static struct inet6_protocol tcpv6_protocol = {
diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
index e251d0ba4f39..eff050ac7049 100644
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -300,12 +300,12 @@ csum_copy_err:
300 /* Clear queue. */ 300 /* Clear queue. */
301 if (flags&MSG_PEEK) { 301 if (flags&MSG_PEEK) {
302 int clear = 0; 302 int clear = 0;
303 spin_lock_irq(&sk->sk_receive_queue.lock); 303 spin_lock_bh(&sk->sk_receive_queue.lock);
304 if (skb == skb_peek(&sk->sk_receive_queue)) { 304 if (skb == skb_peek(&sk->sk_receive_queue)) {
305 __skb_unlink(skb, &sk->sk_receive_queue); 305 __skb_unlink(skb, &sk->sk_receive_queue);
306 clear = 1; 306 clear = 1;
307 } 307 }
308 spin_unlock_irq(&sk->sk_receive_queue.lock); 308 spin_unlock_bh(&sk->sk_receive_queue.lock);
309 if (clear) 309 if (clear)
310 kfree_skb(skb); 310 kfree_skb(skb);
311 } 311 }
diff --git a/net/key/af_key.c b/net/key/af_key.c
index ce980aa94ed8..98b72f2024ff 100644
--- a/net/key/af_key.c
+++ b/net/key/af_key.c
@@ -656,13 +656,18 @@ static struct sk_buff * pfkey_xfrm_state2msg(struct xfrm_state *x, int add_keys,
656 sa->sadb_sa_exttype = SADB_EXT_SA; 656 sa->sadb_sa_exttype = SADB_EXT_SA;
657 sa->sadb_sa_spi = x->id.spi; 657 sa->sadb_sa_spi = x->id.spi;
658 sa->sadb_sa_replay = x->props.replay_window; 658 sa->sadb_sa_replay = x->props.replay_window;
659 sa->sadb_sa_state = SADB_SASTATE_DYING; 659 switch (x->km.state) {
660 if (x->km.state == XFRM_STATE_VALID && !x->km.dying) 660 case XFRM_STATE_VALID:
661 sa->sadb_sa_state = SADB_SASTATE_MATURE; 661 sa->sadb_sa_state = x->km.dying ?
662 else if (x->km.state == XFRM_STATE_ACQ) 662 SADB_SASTATE_DYING : SADB_SASTATE_MATURE;
663 break;
664 case XFRM_STATE_ACQ:
663 sa->sadb_sa_state = SADB_SASTATE_LARVAL; 665 sa->sadb_sa_state = SADB_SASTATE_LARVAL;
664 else if (x->km.state == XFRM_STATE_EXPIRED) 666 break;
667 default:
665 sa->sadb_sa_state = SADB_SASTATE_DEAD; 668 sa->sadb_sa_state = SADB_SASTATE_DEAD;
669 break;
670 }
666 sa->sadb_sa_auth = 0; 671 sa->sadb_sa_auth = 0;
667 if (x->aalg) { 672 if (x->aalg) {
668 struct xfrm_algo_desc *a = xfrm_aalg_get_byname(x->aalg->alg_name, 0); 673 struct xfrm_algo_desc *a = xfrm_aalg_get_byname(x->aalg->alg_name, 0);
@@ -1240,13 +1245,78 @@ static int pfkey_acquire(struct sock *sk, struct sk_buff *skb, struct sadb_msg *
1240 return 0; 1245 return 0;
1241} 1246}
1242 1247
1248static inline int event2poltype(int event)
1249{
1250 switch (event) {
1251 case XFRM_MSG_DELPOLICY:
1252 return SADB_X_SPDDELETE;
1253 case XFRM_MSG_NEWPOLICY:
1254 return SADB_X_SPDADD;
1255 case XFRM_MSG_UPDPOLICY:
1256 return SADB_X_SPDUPDATE;
1257 case XFRM_MSG_POLEXPIRE:
1258 // return SADB_X_SPDEXPIRE;
1259 default:
1260 printk("pfkey: Unknown policy event %d\n", event);
1261 break;
1262 }
1263
1264 return 0;
1265}
1266
1267static inline int event2keytype(int event)
1268{
1269 switch (event) {
1270 case XFRM_MSG_DELSA:
1271 return SADB_DELETE;
1272 case XFRM_MSG_NEWSA:
1273 return SADB_ADD;
1274 case XFRM_MSG_UPDSA:
1275 return SADB_UPDATE;
1276 case XFRM_MSG_EXPIRE:
1277 return SADB_EXPIRE;
1278 default:
1279 printk("pfkey: Unknown SA event %d\n", event);
1280 break;
1281 }
1282
1283 return 0;
1284}
1285
1286/* ADD/UPD/DEL */
1287static int key_notify_sa(struct xfrm_state *x, struct km_event *c)
1288{
1289 struct sk_buff *skb;
1290 struct sadb_msg *hdr;
1291 int hsc = 3;
1292
1293 if (c->event == XFRM_MSG_DELSA)
1294 hsc = 0;
1295
1296 skb = pfkey_xfrm_state2msg(x, 0, hsc);
1297
1298 if (IS_ERR(skb))
1299 return PTR_ERR(skb);
1300
1301 hdr = (struct sadb_msg *) skb->data;
1302 hdr->sadb_msg_version = PF_KEY_V2;
1303 hdr->sadb_msg_type = event2keytype(c->event);
1304 hdr->sadb_msg_satype = pfkey_proto2satype(x->id.proto);
1305 hdr->sadb_msg_errno = 0;
1306 hdr->sadb_msg_reserved = 0;
1307 hdr->sadb_msg_seq = c->seq;
1308 hdr->sadb_msg_pid = c->pid;
1309
1310 pfkey_broadcast(skb, GFP_ATOMIC, BROADCAST_ALL, NULL);
1311
1312 return 0;
1313}
1243 1314
1244static int pfkey_add(struct sock *sk, struct sk_buff *skb, struct sadb_msg *hdr, void **ext_hdrs) 1315static int pfkey_add(struct sock *sk, struct sk_buff *skb, struct sadb_msg *hdr, void **ext_hdrs)
1245{ 1316{
1246 struct sk_buff *out_skb;
1247 struct sadb_msg *out_hdr;
1248 struct xfrm_state *x; 1317 struct xfrm_state *x;
1249 int err; 1318 int err;
1319 struct km_event c;
1250 1320
1251 xfrm_probe_algs(); 1321 xfrm_probe_algs();
1252 1322
@@ -1254,6 +1324,7 @@ static int pfkey_add(struct sock *sk, struct sk_buff *skb, struct sadb_msg *hdr,
1254 if (IS_ERR(x)) 1324 if (IS_ERR(x))
1255 return PTR_ERR(x); 1325 return PTR_ERR(x);
1256 1326
1327 xfrm_state_hold(x);
1257 if (hdr->sadb_msg_type == SADB_ADD) 1328 if (hdr->sadb_msg_type == SADB_ADD)
1258 err = xfrm_state_add(x); 1329 err = xfrm_state_add(x);
1259 else 1330 else
@@ -1262,30 +1333,26 @@ static int pfkey_add(struct sock *sk, struct sk_buff *skb, struct sadb_msg *hdr,
1262 if (err < 0) { 1333 if (err < 0) {
1263 x->km.state = XFRM_STATE_DEAD; 1334 x->km.state = XFRM_STATE_DEAD;
1264 xfrm_state_put(x); 1335 xfrm_state_put(x);
1265 return err; 1336 goto out;
1266 } 1337 }
1267 1338
1268 out_skb = pfkey_xfrm_state2msg(x, 0, 3); 1339 if (hdr->sadb_msg_type == SADB_ADD)
1269 if (IS_ERR(out_skb)) 1340 c.event = XFRM_MSG_NEWSA;
1270 return PTR_ERR(out_skb); /* XXX Should we return 0 here ? */ 1341 else
1271 1342 c.event = XFRM_MSG_UPDSA;
1272 out_hdr = (struct sadb_msg *) out_skb->data; 1343 c.seq = hdr->sadb_msg_seq;
1273 out_hdr->sadb_msg_version = hdr->sadb_msg_version; 1344 c.pid = hdr->sadb_msg_pid;
1274 out_hdr->sadb_msg_type = hdr->sadb_msg_type; 1345 km_state_notify(x, &c);
1275 out_hdr->sadb_msg_satype = pfkey_proto2satype(x->id.proto); 1346out:
1276 out_hdr->sadb_msg_errno = 0; 1347 xfrm_state_put(x);
1277 out_hdr->sadb_msg_reserved = 0; 1348 return err;
1278 out_hdr->sadb_msg_seq = hdr->sadb_msg_seq;
1279 out_hdr->sadb_msg_pid = hdr->sadb_msg_pid;
1280
1281 pfkey_broadcast(out_skb, GFP_ATOMIC, BROADCAST_ALL, sk);
1282
1283 return 0;
1284} 1349}
1285 1350
1286static int pfkey_delete(struct sock *sk, struct sk_buff *skb, struct sadb_msg *hdr, void **ext_hdrs) 1351static int pfkey_delete(struct sock *sk, struct sk_buff *skb, struct sadb_msg *hdr, void **ext_hdrs)
1287{ 1352{
1288 struct xfrm_state *x; 1353 struct xfrm_state *x;
1354 struct km_event c;
1355 int err;
1289 1356
1290 if (!ext_hdrs[SADB_EXT_SA-1] || 1357 if (!ext_hdrs[SADB_EXT_SA-1] ||
1291 !present_and_same_family(ext_hdrs[SADB_EXT_ADDRESS_SRC-1], 1358 !present_and_same_family(ext_hdrs[SADB_EXT_ADDRESS_SRC-1],
@@ -1301,13 +1368,19 @@ static int pfkey_delete(struct sock *sk, struct sk_buff *skb, struct sadb_msg *h
1301 return -EPERM; 1368 return -EPERM;
1302 } 1369 }
1303 1370
1304 xfrm_state_delete(x); 1371 err = xfrm_state_delete(x);
1305 xfrm_state_put(x); 1372 if (err < 0) {
1373 xfrm_state_put(x);
1374 return err;
1375 }
1306 1376
1307 pfkey_broadcast(skb_clone(skb, GFP_KERNEL), GFP_KERNEL, 1377 c.seq = hdr->sadb_msg_seq;
1308 BROADCAST_ALL, sk); 1378 c.pid = hdr->sadb_msg_pid;
1379 c.event = XFRM_MSG_DELSA;
1380 km_state_notify(x, &c);
1381 xfrm_state_put(x);
1309 1382
1310 return 0; 1383 return err;
1311} 1384}
1312 1385
1313static int pfkey_get(struct sock *sk, struct sk_buff *skb, struct sadb_msg *hdr, void **ext_hdrs) 1386static int pfkey_get(struct sock *sk, struct sk_buff *skb, struct sadb_msg *hdr, void **ext_hdrs)
@@ -1445,28 +1518,42 @@ static int pfkey_register(struct sock *sk, struct sk_buff *skb, struct sadb_msg
1445 return 0; 1518 return 0;
1446} 1519}
1447 1520
1521static int key_notify_sa_flush(struct km_event *c)
1522{
1523 struct sk_buff *skb;
1524 struct sadb_msg *hdr;
1525
1526 skb = alloc_skb(sizeof(struct sadb_msg) + 16, GFP_ATOMIC);
1527 if (!skb)
1528 return -ENOBUFS;
1529 hdr = (struct sadb_msg *) skb_put(skb, sizeof(struct sadb_msg));
1530 hdr->sadb_msg_satype = pfkey_proto2satype(c->data.proto);
1531 hdr->sadb_msg_seq = c->seq;
1532 hdr->sadb_msg_pid = c->pid;
1533 hdr->sadb_msg_version = PF_KEY_V2;
1534 hdr->sadb_msg_errno = (uint8_t) 0;
1535 hdr->sadb_msg_len = (sizeof(struct sadb_msg) / sizeof(uint64_t));
1536
1537 pfkey_broadcast(skb, GFP_ATOMIC, BROADCAST_ALL, NULL);
1538
1539 return 0;
1540}
1541
1448static int pfkey_flush(struct sock *sk, struct sk_buff *skb, struct sadb_msg *hdr, void **ext_hdrs) 1542static int pfkey_flush(struct sock *sk, struct sk_buff *skb, struct sadb_msg *hdr, void **ext_hdrs)
1449{ 1543{
1450 unsigned proto; 1544 unsigned proto;
1451 struct sk_buff *skb_out; 1545 struct km_event c;
1452 struct sadb_msg *hdr_out;
1453 1546
1454 proto = pfkey_satype2proto(hdr->sadb_msg_satype); 1547 proto = pfkey_satype2proto(hdr->sadb_msg_satype);
1455 if (proto == 0) 1548 if (proto == 0)
1456 return -EINVAL; 1549 return -EINVAL;
1457 1550
1458 skb_out = alloc_skb(sizeof(struct sadb_msg) + 16, GFP_KERNEL);
1459 if (!skb_out)
1460 return -ENOBUFS;
1461
1462 xfrm_state_flush(proto); 1551 xfrm_state_flush(proto);
1463 1552 c.data.proto = proto;
1464 hdr_out = (struct sadb_msg *) skb_put(skb_out, sizeof(struct sadb_msg)); 1553 c.seq = hdr->sadb_msg_seq;
1465 pfkey_hdr_dup(hdr_out, hdr); 1554 c.pid = hdr->sadb_msg_pid;
1466 hdr_out->sadb_msg_errno = (uint8_t) 0; 1555 c.event = XFRM_MSG_FLUSHSA;
1467 hdr_out->sadb_msg_len = (sizeof(struct sadb_msg) / sizeof(uint64_t)); 1556 km_state_notify(NULL, &c);
1468
1469 pfkey_broadcast(skb_out, GFP_KERNEL, BROADCAST_ALL, NULL);
1470 1557
1471 return 0; 1558 return 0;
1472} 1559}
@@ -1859,6 +1946,35 @@ static void pfkey_xfrm_policy2msg(struct sk_buff *skb, struct xfrm_policy *xp, i
1859 hdr->sadb_msg_reserved = atomic_read(&xp->refcnt); 1946 hdr->sadb_msg_reserved = atomic_read(&xp->refcnt);
1860} 1947}
1861 1948
1949static int key_notify_policy(struct xfrm_policy *xp, int dir, struct km_event *c)
1950{
1951 struct sk_buff *out_skb;
1952 struct sadb_msg *out_hdr;
1953 int err;
1954
1955 out_skb = pfkey_xfrm_policy2msg_prep(xp);
1956 if (IS_ERR(out_skb)) {
1957 err = PTR_ERR(out_skb);
1958 goto out;
1959 }
1960 pfkey_xfrm_policy2msg(out_skb, xp, dir);
1961
1962 out_hdr = (struct sadb_msg *) out_skb->data;
1963 out_hdr->sadb_msg_version = PF_KEY_V2;
1964
1965 if (c->data.byid && c->event == XFRM_MSG_DELPOLICY)
1966 out_hdr->sadb_msg_type = SADB_X_SPDDELETE2;
1967 else
1968 out_hdr->sadb_msg_type = event2poltype(c->event);
1969 out_hdr->sadb_msg_errno = 0;
1970 out_hdr->sadb_msg_seq = c->seq;
1971 out_hdr->sadb_msg_pid = c->pid;
1972 pfkey_broadcast(out_skb, GFP_ATOMIC, BROADCAST_ALL, NULL);
1973out:
1974 return 0;
1975
1976}
1977
1862static int pfkey_spdadd(struct sock *sk, struct sk_buff *skb, struct sadb_msg *hdr, void **ext_hdrs) 1978static int pfkey_spdadd(struct sock *sk, struct sk_buff *skb, struct sadb_msg *hdr, void **ext_hdrs)
1863{ 1979{
1864 int err; 1980 int err;
@@ -1866,8 +1982,7 @@ static int pfkey_spdadd(struct sock *sk, struct sk_buff *skb, struct sadb_msg *h
1866 struct sadb_address *sa; 1982 struct sadb_address *sa;
1867 struct sadb_x_policy *pol; 1983 struct sadb_x_policy *pol;
1868 struct xfrm_policy *xp; 1984 struct xfrm_policy *xp;
1869 struct sk_buff *out_skb; 1985 struct km_event c;
1870 struct sadb_msg *out_hdr;
1871 1986
1872 if (!present_and_same_family(ext_hdrs[SADB_EXT_ADDRESS_SRC-1], 1987 if (!present_and_same_family(ext_hdrs[SADB_EXT_ADDRESS_SRC-1],
1873 ext_hdrs[SADB_EXT_ADDRESS_DST-1]) || 1988 ext_hdrs[SADB_EXT_ADDRESS_DST-1]) ||
@@ -1935,31 +2050,23 @@ static int pfkey_spdadd(struct sock *sk, struct sk_buff *skb, struct sadb_msg *h
1935 (err = parse_ipsecrequests(xp, pol)) < 0) 2050 (err = parse_ipsecrequests(xp, pol)) < 0)
1936 goto out; 2051 goto out;
1937 2052
1938 out_skb = pfkey_xfrm_policy2msg_prep(xp);
1939 if (IS_ERR(out_skb)) {
1940 err = PTR_ERR(out_skb);
1941 goto out;
1942 }
1943
1944 err = xfrm_policy_insert(pol->sadb_x_policy_dir-1, xp, 2053 err = xfrm_policy_insert(pol->sadb_x_policy_dir-1, xp,
1945 hdr->sadb_msg_type != SADB_X_SPDUPDATE); 2054 hdr->sadb_msg_type != SADB_X_SPDUPDATE);
1946 if (err) { 2055 if (err) {
1947 kfree_skb(out_skb); 2056 kfree(xp);
1948 goto out; 2057 return err;
1949 } 2058 }
1950 2059
1951 pfkey_xfrm_policy2msg(out_skb, xp, pol->sadb_x_policy_dir-1); 2060 if (hdr->sadb_msg_type == SADB_X_SPDUPDATE)
2061 c.event = XFRM_MSG_UPDPOLICY;
2062 else
2063 c.event = XFRM_MSG_NEWPOLICY;
1952 2064
1953 xfrm_pol_put(xp); 2065 c.seq = hdr->sadb_msg_seq;
2066 c.pid = hdr->sadb_msg_pid;
1954 2067
1955 out_hdr = (struct sadb_msg *) out_skb->data; 2068 km_policy_notify(xp, pol->sadb_x_policy_dir-1, &c);
1956 out_hdr->sadb_msg_version = hdr->sadb_msg_version; 2069 xfrm_pol_put(xp);
1957 out_hdr->sadb_msg_type = hdr->sadb_msg_type;
1958 out_hdr->sadb_msg_satype = 0;
1959 out_hdr->sadb_msg_errno = 0;
1960 out_hdr->sadb_msg_seq = hdr->sadb_msg_seq;
1961 out_hdr->sadb_msg_pid = hdr->sadb_msg_pid;
1962 pfkey_broadcast(out_skb, GFP_ATOMIC, BROADCAST_ALL, sk);
1963 return 0; 2070 return 0;
1964 2071
1965out: 2072out:
@@ -1973,9 +2080,8 @@ static int pfkey_spddelete(struct sock *sk, struct sk_buff *skb, struct sadb_msg
1973 struct sadb_address *sa; 2080 struct sadb_address *sa;
1974 struct sadb_x_policy *pol; 2081 struct sadb_x_policy *pol;
1975 struct xfrm_policy *xp; 2082 struct xfrm_policy *xp;
1976 struct sk_buff *out_skb;
1977 struct sadb_msg *out_hdr;
1978 struct xfrm_selector sel; 2083 struct xfrm_selector sel;
2084 struct km_event c;
1979 2085
1980 if (!present_and_same_family(ext_hdrs[SADB_EXT_ADDRESS_SRC-1], 2086 if (!present_and_same_family(ext_hdrs[SADB_EXT_ADDRESS_SRC-1],
1981 ext_hdrs[SADB_EXT_ADDRESS_DST-1]) || 2087 ext_hdrs[SADB_EXT_ADDRESS_DST-1]) ||
@@ -2010,25 +2116,40 @@ static int pfkey_spddelete(struct sock *sk, struct sk_buff *skb, struct sadb_msg
2010 2116
2011 err = 0; 2117 err = 0;
2012 2118
2119 c.seq = hdr->sadb_msg_seq;
2120 c.pid = hdr->sadb_msg_pid;
2121 c.event = XFRM_MSG_DELPOLICY;
2122 km_policy_notify(xp, pol->sadb_x_policy_dir-1, &c);
2123
2124 xfrm_pol_put(xp);
2125 return err;
2126}
2127
2128static int key_pol_get_resp(struct sock *sk, struct xfrm_policy *xp, struct sadb_msg *hdr, int dir)
2129{
2130 int err;
2131 struct sk_buff *out_skb;
2132 struct sadb_msg *out_hdr;
2133 err = 0;
2134
2013 out_skb = pfkey_xfrm_policy2msg_prep(xp); 2135 out_skb = pfkey_xfrm_policy2msg_prep(xp);
2014 if (IS_ERR(out_skb)) { 2136 if (IS_ERR(out_skb)) {
2015 err = PTR_ERR(out_skb); 2137 err = PTR_ERR(out_skb);
2016 goto out; 2138 goto out;
2017 } 2139 }
2018 pfkey_xfrm_policy2msg(out_skb, xp, pol->sadb_x_policy_dir-1); 2140 pfkey_xfrm_policy2msg(out_skb, xp, dir);
2019 2141
2020 out_hdr = (struct sadb_msg *) out_skb->data; 2142 out_hdr = (struct sadb_msg *) out_skb->data;
2021 out_hdr->sadb_msg_version = hdr->sadb_msg_version; 2143 out_hdr->sadb_msg_version = hdr->sadb_msg_version;
2022 out_hdr->sadb_msg_type = SADB_X_SPDDELETE; 2144 out_hdr->sadb_msg_type = hdr->sadb_msg_type;
2023 out_hdr->sadb_msg_satype = 0; 2145 out_hdr->sadb_msg_satype = 0;
2024 out_hdr->sadb_msg_errno = 0; 2146 out_hdr->sadb_msg_errno = 0;
2025 out_hdr->sadb_msg_seq = hdr->sadb_msg_seq; 2147 out_hdr->sadb_msg_seq = hdr->sadb_msg_seq;
2026 out_hdr->sadb_msg_pid = hdr->sadb_msg_pid; 2148 out_hdr->sadb_msg_pid = hdr->sadb_msg_pid;
2027 pfkey_broadcast(out_skb, GFP_ATOMIC, BROADCAST_ALL, sk); 2149 pfkey_broadcast(out_skb, GFP_ATOMIC, BROADCAST_ONE, sk);
2028 err = 0; 2150 err = 0;
2029 2151
2030out: 2152out:
2031 xfrm_pol_put(xp);
2032 return err; 2153 return err;
2033} 2154}
2034 2155
@@ -2037,8 +2158,7 @@ static int pfkey_spdget(struct sock *sk, struct sk_buff *skb, struct sadb_msg *h
2037 int err; 2158 int err;
2038 struct sadb_x_policy *pol; 2159 struct sadb_x_policy *pol;
2039 struct xfrm_policy *xp; 2160 struct xfrm_policy *xp;
2040 struct sk_buff *out_skb; 2161 struct km_event c;
2041 struct sadb_msg *out_hdr;
2042 2162
2043 if ((pol = ext_hdrs[SADB_X_EXT_POLICY-1]) == NULL) 2163 if ((pol = ext_hdrs[SADB_X_EXT_POLICY-1]) == NULL)
2044 return -EINVAL; 2164 return -EINVAL;
@@ -2050,24 +2170,16 @@ static int pfkey_spdget(struct sock *sk, struct sk_buff *skb, struct sadb_msg *h
2050 2170
2051 err = 0; 2171 err = 0;
2052 2172
2053 out_skb = pfkey_xfrm_policy2msg_prep(xp); 2173 c.seq = hdr->sadb_msg_seq;
2054 if (IS_ERR(out_skb)) { 2174 c.pid = hdr->sadb_msg_pid;
2055 err = PTR_ERR(out_skb); 2175 if (hdr->sadb_msg_type == SADB_X_SPDDELETE2) {
2056 goto out; 2176 c.data.byid = 1;
2177 c.event = XFRM_MSG_DELPOLICY;
2178 km_policy_notify(xp, pol->sadb_x_policy_dir-1, &c);
2179 } else {
2180 err = key_pol_get_resp(sk, xp, hdr, pol->sadb_x_policy_dir-1);
2057 } 2181 }
2058 pfkey_xfrm_policy2msg(out_skb, xp, pol->sadb_x_policy_dir-1);
2059
2060 out_hdr = (struct sadb_msg *) out_skb->data;
2061 out_hdr->sadb_msg_version = hdr->sadb_msg_version;
2062 out_hdr->sadb_msg_type = hdr->sadb_msg_type;
2063 out_hdr->sadb_msg_satype = 0;
2064 out_hdr->sadb_msg_errno = 0;
2065 out_hdr->sadb_msg_seq = hdr->sadb_msg_seq;
2066 out_hdr->sadb_msg_pid = hdr->sadb_msg_pid;
2067 pfkey_broadcast(out_skb, GFP_ATOMIC, BROADCAST_ALL, sk);
2068 err = 0;
2069 2182
2070out:
2071 xfrm_pol_put(xp); 2183 xfrm_pol_put(xp);
2072 return err; 2184 return err;
2073} 2185}
@@ -2102,22 +2214,34 @@ static int pfkey_spddump(struct sock *sk, struct sk_buff *skb, struct sadb_msg *
2102 return xfrm_policy_walk(dump_sp, &data); 2214 return xfrm_policy_walk(dump_sp, &data);
2103} 2215}
2104 2216
2105static int pfkey_spdflush(struct sock *sk, struct sk_buff *skb, struct sadb_msg *hdr, void **ext_hdrs) 2217static int key_notify_policy_flush(struct km_event *c)
2106{ 2218{
2107 struct sk_buff *skb_out; 2219 struct sk_buff *skb_out;
2108 struct sadb_msg *hdr_out; 2220 struct sadb_msg *hdr;
2109 2221
2110 skb_out = alloc_skb(sizeof(struct sadb_msg) + 16, GFP_KERNEL); 2222 skb_out = alloc_skb(sizeof(struct sadb_msg) + 16, GFP_ATOMIC);
2111 if (!skb_out) 2223 if (!skb_out)
2112 return -ENOBUFS; 2224 return -ENOBUFS;
2225 hdr = (struct sadb_msg *) skb_put(skb_out, sizeof(struct sadb_msg));
2226 hdr->sadb_msg_seq = c->seq;
2227 hdr->sadb_msg_pid = c->pid;
2228 hdr->sadb_msg_version = PF_KEY_V2;
2229 hdr->sadb_msg_errno = (uint8_t) 0;
2230 hdr->sadb_msg_len = (sizeof(struct sadb_msg) / sizeof(uint64_t));
2231 pfkey_broadcast(skb_out, GFP_ATOMIC, BROADCAST_ALL, NULL);
2232 return 0;
2113 2233
2114 xfrm_policy_flush(); 2234}
2235
2236static int pfkey_spdflush(struct sock *sk, struct sk_buff *skb, struct sadb_msg *hdr, void **ext_hdrs)
2237{
2238 struct km_event c;
2115 2239
2116 hdr_out = (struct sadb_msg *) skb_put(skb_out, sizeof(struct sadb_msg)); 2240 xfrm_policy_flush();
2117 pfkey_hdr_dup(hdr_out, hdr); 2241 c.event = XFRM_MSG_FLUSHPOLICY;
2118 hdr_out->sadb_msg_errno = (uint8_t) 0; 2242 c.pid = hdr->sadb_msg_pid;
2119 hdr_out->sadb_msg_len = (sizeof(struct sadb_msg) / sizeof(uint64_t)); 2243 c.seq = hdr->sadb_msg_seq;
2120 pfkey_broadcast(skb_out, GFP_KERNEL, BROADCAST_ALL, NULL); 2244 km_policy_notify(NULL, 0, &c);
2121 2245
2122 return 0; 2246 return 0;
2123} 2247}
@@ -2317,11 +2441,23 @@ static void dump_esp_combs(struct sk_buff *skb, struct xfrm_tmpl *t)
2317 } 2441 }
2318} 2442}
2319 2443
2320static int pfkey_send_notify(struct xfrm_state *x, int hard) 2444static int key_notify_policy_expire(struct xfrm_policy *xp, struct km_event *c)
2445{
2446 return 0;
2447}
2448
2449static int key_notify_sa_expire(struct xfrm_state *x, struct km_event *c)
2321{ 2450{
2322 struct sk_buff *out_skb; 2451 struct sk_buff *out_skb;
2323 struct sadb_msg *out_hdr; 2452 struct sadb_msg *out_hdr;
2324 int hsc = (hard ? 2 : 1); 2453 int hard;
2454 int hsc;
2455
2456 hard = c->data.hard;
2457 if (hard)
2458 hsc = 2;
2459 else
2460 hsc = 1;
2325 2461
2326 out_skb = pfkey_xfrm_state2msg(x, 0, hsc); 2462 out_skb = pfkey_xfrm_state2msg(x, 0, hsc);
2327 if (IS_ERR(out_skb)) 2463 if (IS_ERR(out_skb))
@@ -2340,6 +2476,44 @@ static int pfkey_send_notify(struct xfrm_state *x, int hard)
2340 return 0; 2476 return 0;
2341} 2477}
2342 2478
2479static int pfkey_send_notify(struct xfrm_state *x, struct km_event *c)
2480{
2481 switch (c->event) {
2482 case XFRM_MSG_EXPIRE:
2483 return key_notify_sa_expire(x, c);
2484 case XFRM_MSG_DELSA:
2485 case XFRM_MSG_NEWSA:
2486 case XFRM_MSG_UPDSA:
2487 return key_notify_sa(x, c);
2488 case XFRM_MSG_FLUSHSA:
2489 return key_notify_sa_flush(c);
2490 default:
2491 printk("pfkey: Unknown SA event %d\n", c->event);
2492 break;
2493 }
2494
2495 return 0;
2496}
2497
2498static int pfkey_send_policy_notify(struct xfrm_policy *xp, int dir, struct km_event *c)
2499{
2500 switch (c->event) {
2501 case XFRM_MSG_POLEXPIRE:
2502 return key_notify_policy_expire(xp, c);
2503 case XFRM_MSG_DELPOLICY:
2504 case XFRM_MSG_NEWPOLICY:
2505 case XFRM_MSG_UPDPOLICY:
2506 return key_notify_policy(xp, dir, c);
2507 case XFRM_MSG_FLUSHPOLICY:
2508 return key_notify_policy_flush(c);
2509 default:
2510 printk("pfkey: Unknown policy event %d\n", c->event);
2511 break;
2512 }
2513
2514 return 0;
2515}
2516
2343static u32 get_acqseq(void) 2517static u32 get_acqseq(void)
2344{ 2518{
2345 u32 res; 2519 u32 res;
@@ -2856,6 +3030,7 @@ static struct xfrm_mgr pfkeyv2_mgr =
2856 .acquire = pfkey_send_acquire, 3030 .acquire = pfkey_send_acquire,
2857 .compile_policy = pfkey_compile_policy, 3031 .compile_policy = pfkey_compile_policy,
2858 .new_mapping = pfkey_send_new_mapping, 3032 .new_mapping = pfkey_send_new_mapping,
3033 .notify_policy = pfkey_send_policy_notify,
2859}; 3034};
2860 3035
2861static void __exit ipsec_pfkey_exit(void) 3036static void __exit ipsec_pfkey_exit(void)
diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c
index e41ce458c2a9..70bcd4744d93 100644
--- a/net/netlink/af_netlink.c
+++ b/net/netlink/af_netlink.c
@@ -1095,8 +1095,7 @@ static int netlink_dump(struct sock *sk)
1095 return 0; 1095 return 0;
1096 } 1096 }
1097 1097
1098 nlh = __nlmsg_put(skb, NETLINK_CB(cb->skb).pid, cb->nlh->nlmsg_seq, NLMSG_DONE, sizeof(int)); 1098 nlh = NLMSG_NEW_ANSWER(skb, cb, NLMSG_DONE, sizeof(len), NLM_F_MULTI);
1099 nlh->nlmsg_flags |= NLM_F_MULTI;
1100 memcpy(NLMSG_DATA(nlh), &len, sizeof(len)); 1099 memcpy(NLMSG_DATA(nlh), &len, sizeof(len));
1101 skb_queue_tail(&sk->sk_receive_queue, skb); 1100 skb_queue_tail(&sk->sk_receive_queue, skb);
1102 sk->sk_data_ready(sk, skb->len); 1101 sk->sk_data_ready(sk, skb->len);
@@ -1107,6 +1106,9 @@ static int netlink_dump(struct sock *sk)
1107 1106
1108 netlink_destroy_callback(cb); 1107 netlink_destroy_callback(cb);
1109 return 0; 1108 return 0;
1109
1110nlmsg_failure:
1111 return -ENOBUFS;
1110} 1112}
1111 1113
1112int netlink_dump_start(struct sock *ssk, struct sk_buff *skb, 1114int netlink_dump_start(struct sock *ssk, struct sk_buff *skb,
@@ -1178,7 +1180,7 @@ void netlink_ack(struct sk_buff *in_skb, struct nlmsghdr *nlh, int err)
1178 } 1180 }
1179 1181
1180 rep = __nlmsg_put(skb, NETLINK_CB(in_skb).pid, nlh->nlmsg_seq, 1182 rep = __nlmsg_put(skb, NETLINK_CB(in_skb).pid, nlh->nlmsg_seq,
1181 NLMSG_ERROR, sizeof(struct nlmsgerr)); 1183 NLMSG_ERROR, sizeof(struct nlmsgerr), 0);
1182 errmsg = NLMSG_DATA(rep); 1184 errmsg = NLMSG_DATA(rep);
1183 errmsg->error = err; 1185 errmsg->error = err;
1184 memcpy(&errmsg->msg, nlh, err ? nlh->nlmsg_len : sizeof(struct nlmsghdr)); 1186 memcpy(&errmsg->msg, nlh, err ? nlh->nlmsg_len : sizeof(struct nlmsghdr));
diff --git a/net/sched/Kconfig b/net/sched/Kconfig
index b0941186f867..b22c9beb604d 100644
--- a/net/sched/Kconfig
+++ b/net/sched/Kconfig
@@ -405,7 +405,7 @@ config NET_EMATCH_STACK
405 ---help--- 405 ---help---
406 Size of the local stack variable used while evaluating the tree of 406 Size of the local stack variable used while evaluating the tree of
407 ematches. Limits the depth of the tree, i.e. the number of 407 ematches. Limits the depth of the tree, i.e. the number of
408 encapsulated precedences. Every level requires 4 bytes of addtional 408 encapsulated precedences. Every level requires 4 bytes of additional
409 stack space. 409 stack space.
410 410
411config NET_EMATCH_CMP 411config NET_EMATCH_CMP
diff --git a/net/sched/act_api.c b/net/sched/act_api.c
index cafcb084098d..9594206e6035 100644
--- a/net/sched/act_api.c
+++ b/net/sched/act_api.c
@@ -428,15 +428,15 @@ errout:
428 428
429static int 429static int
430tca_get_fill(struct sk_buff *skb, struct tc_action *a, u32 pid, u32 seq, 430tca_get_fill(struct sk_buff *skb, struct tc_action *a, u32 pid, u32 seq,
431 unsigned flags, int event, int bind, int ref) 431 u16 flags, int event, int bind, int ref)
432{ 432{
433 struct tcamsg *t; 433 struct tcamsg *t;
434 struct nlmsghdr *nlh; 434 struct nlmsghdr *nlh;
435 unsigned char *b = skb->tail; 435 unsigned char *b = skb->tail;
436 struct rtattr *x; 436 struct rtattr *x;
437 437
438 nlh = NLMSG_PUT(skb, pid, seq, event, sizeof(*t)); 438 nlh = NLMSG_NEW(skb, pid, seq, event, sizeof(*t), flags);
439 nlh->nlmsg_flags = flags; 439
440 t = NLMSG_DATA(nlh); 440 t = NLMSG_DATA(nlh);
441 t->tca_family = AF_UNSPEC; 441 t->tca_family = AF_UNSPEC;
442 442
@@ -669,7 +669,7 @@ err:
669} 669}
670 670
671static int tcf_add_notify(struct tc_action *a, u32 pid, u32 seq, int event, 671static int tcf_add_notify(struct tc_action *a, u32 pid, u32 seq, int event,
672 unsigned flags) 672 u16 flags)
673{ 673{
674 struct tcamsg *t; 674 struct tcamsg *t;
675 struct nlmsghdr *nlh; 675 struct nlmsghdr *nlh;
@@ -684,8 +684,7 @@ static int tcf_add_notify(struct tc_action *a, u32 pid, u32 seq, int event,
684 684
685 b = (unsigned char *)skb->tail; 685 b = (unsigned char *)skb->tail;
686 686
687 nlh = NLMSG_PUT(skb, pid, seq, event, sizeof(*t)); 687 nlh = NLMSG_NEW(skb, pid, seq, event, sizeof(*t), flags);
688 nlh->nlmsg_flags = flags;
689 t = NLMSG_DATA(nlh); 688 t = NLMSG_DATA(nlh);
690 t->tca_family = AF_UNSPEC; 689 t->tca_family = AF_UNSPEC;
691 690
@@ -881,7 +880,7 @@ static int __init tc_action_init(void)
881 link_p[RTM_GETACTION-RTM_BASE].dumpit = tc_dump_action; 880 link_p[RTM_GETACTION-RTM_BASE].dumpit = tc_dump_action;
882 } 881 }
883 882
884 printk("TC classifier action (bugs to netdev@oss.sgi.com cc " 883 printk("TC classifier action (bugs to netdev@vger.kernel.org cc "
885 "hadi@cyberus.ca)\n"); 884 "hadi@cyberus.ca)\n");
886 return 0; 885 return 0;
887} 886}
diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c
index 56e66c3fe0fa..1616bf5c9627 100644
--- a/net/sched/cls_api.c
+++ b/net/sched/cls_api.c
@@ -322,14 +322,13 @@ errout:
322 322
323static int 323static int
324tcf_fill_node(struct sk_buff *skb, struct tcf_proto *tp, unsigned long fh, 324tcf_fill_node(struct sk_buff *skb, struct tcf_proto *tp, unsigned long fh,
325 u32 pid, u32 seq, unsigned flags, int event) 325 u32 pid, u32 seq, u16 flags, int event)
326{ 326{
327 struct tcmsg *tcm; 327 struct tcmsg *tcm;
328 struct nlmsghdr *nlh; 328 struct nlmsghdr *nlh;
329 unsigned char *b = skb->tail; 329 unsigned char *b = skb->tail;
330 330
331 nlh = NLMSG_PUT(skb, pid, seq, event, sizeof(*tcm)); 331 nlh = NLMSG_NEW(skb, pid, seq, event, sizeof(*tcm), flags);
332 nlh->nlmsg_flags = flags;
333 tcm = NLMSG_DATA(nlh); 332 tcm = NLMSG_DATA(nlh);
334 tcm->tcm_family = AF_UNSPEC; 333 tcm->tcm_family = AF_UNSPEC;
335 tcm->tcm_ifindex = tp->q->dev->ifindex; 334 tcm->tcm_ifindex = tp->q->dev->ifindex;
diff --git a/net/sched/cls_basic.c b/net/sched/cls_basic.c
index 0d2d4415f334..dfb300bb6baa 100644
--- a/net/sched/cls_basic.c
+++ b/net/sched/cls_basic.c
@@ -261,6 +261,9 @@ static int basic_dump(struct tcf_proto *tp, unsigned long fh,
261 rta = (struct rtattr *) b; 261 rta = (struct rtattr *) b;
262 RTA_PUT(skb, TCA_OPTIONS, 0, NULL); 262 RTA_PUT(skb, TCA_OPTIONS, 0, NULL);
263 263
264 if (f->res.classid)
265 RTA_PUT(skb, TCA_BASIC_CLASSID, sizeof(u32), &f->res.classid);
266
264 if (tcf_exts_dump(skb, &f->exts, &basic_ext_map) < 0 || 267 if (tcf_exts_dump(skb, &f->exts, &basic_ext_map) < 0 ||
265 tcf_em_tree_dump(skb, &f->ematches, TCA_BASIC_EMATCHES) < 0) 268 tcf_em_tree_dump(skb, &f->ematches, TCA_BASIC_EMATCHES) < 0)
266 goto rtattr_failure; 269 goto rtattr_failure;
diff --git a/net/sched/em_meta.c b/net/sched/em_meta.c
index f1eeaf65cee5..48bb23c2a35a 100644
--- a/net/sched/em_meta.c
+++ b/net/sched/em_meta.c
@@ -32,7 +32,7 @@
32 * +-----------+ +-----------+ 32 * +-----------+ +-----------+
33 * | | 33 * | |
34 * ---> meta_ops[INT][INDEV](...) | 34 * ---> meta_ops[INT][INDEV](...) |
35 * | | 35 * | |
36 * ----------- | 36 * ----------- |
37 * V V 37 * V V
38 * +-----------+ +-----------+ 38 * +-----------+ +-----------+
@@ -70,6 +70,7 @@
70#include <net/dst.h> 70#include <net/dst.h>
71#include <net/route.h> 71#include <net/route.h>
72#include <net/pkt_cls.h> 72#include <net/pkt_cls.h>
73#include <net/sock.h>
73 74
74struct meta_obj 75struct meta_obj
75{ 76{
@@ -284,6 +285,214 @@ META_COLLECTOR(int_rtiif)
284} 285}
285 286
286/************************************************************************** 287/**************************************************************************
288 * Socket Attributes
289 **************************************************************************/
290
291#define SKIP_NONLOCAL(skb) \
292 if (unlikely(skb->sk == NULL)) { \
293 *err = -1; \
294 return; \
295 }
296
297META_COLLECTOR(int_sk_family)
298{
299 SKIP_NONLOCAL(skb);
300 dst->value = skb->sk->sk_family;
301}
302
303META_COLLECTOR(int_sk_state)
304{
305 SKIP_NONLOCAL(skb);
306 dst->value = skb->sk->sk_state;
307}
308
309META_COLLECTOR(int_sk_reuse)
310{
311 SKIP_NONLOCAL(skb);
312 dst->value = skb->sk->sk_reuse;
313}
314
315META_COLLECTOR(int_sk_bound_if)
316{
317 SKIP_NONLOCAL(skb);
318 /* No error if bound_dev_if is 0, legal userspace check */
319 dst->value = skb->sk->sk_bound_dev_if;
320}
321
322META_COLLECTOR(var_sk_bound_if)
323{
324 SKIP_NONLOCAL(skb);
325
326 if (skb->sk->sk_bound_dev_if == 0) {
327 dst->value = (unsigned long) "any";
328 dst->len = 3;
329 } else {
330 struct net_device *dev;
331
332 dev = dev_get_by_index(skb->sk->sk_bound_dev_if);
333 *err = var_dev(dev, dst);
334 if (dev)
335 dev_put(dev);
336 }
337}
338
339META_COLLECTOR(int_sk_refcnt)
340{
341 SKIP_NONLOCAL(skb);
342 dst->value = atomic_read(&skb->sk->sk_refcnt);
343}
344
345META_COLLECTOR(int_sk_rcvbuf)
346{
347 SKIP_NONLOCAL(skb);
348 dst->value = skb->sk->sk_rcvbuf;
349}
350
351META_COLLECTOR(int_sk_shutdown)
352{
353 SKIP_NONLOCAL(skb);
354 dst->value = skb->sk->sk_shutdown;
355}
356
357META_COLLECTOR(int_sk_proto)
358{
359 SKIP_NONLOCAL(skb);
360 dst->value = skb->sk->sk_protocol;
361}
362
363META_COLLECTOR(int_sk_type)
364{
365 SKIP_NONLOCAL(skb);
366 dst->value = skb->sk->sk_type;
367}
368
369META_COLLECTOR(int_sk_rmem_alloc)
370{
371 SKIP_NONLOCAL(skb);
372 dst->value = atomic_read(&skb->sk->sk_rmem_alloc);
373}
374
375META_COLLECTOR(int_sk_wmem_alloc)
376{
377 SKIP_NONLOCAL(skb);
378 dst->value = atomic_read(&skb->sk->sk_wmem_alloc);
379}
380
381META_COLLECTOR(int_sk_omem_alloc)
382{
383 SKIP_NONLOCAL(skb);
384 dst->value = atomic_read(&skb->sk->sk_omem_alloc);
385}
386
387META_COLLECTOR(int_sk_rcv_qlen)
388{
389 SKIP_NONLOCAL(skb);
390 dst->value = skb->sk->sk_receive_queue.qlen;
391}
392
393META_COLLECTOR(int_sk_snd_qlen)
394{
395 SKIP_NONLOCAL(skb);
396 dst->value = skb->sk->sk_write_queue.qlen;
397}
398
399META_COLLECTOR(int_sk_wmem_queued)
400{
401 SKIP_NONLOCAL(skb);
402 dst->value = skb->sk->sk_wmem_queued;
403}
404
405META_COLLECTOR(int_sk_fwd_alloc)
406{
407 SKIP_NONLOCAL(skb);
408 dst->value = skb->sk->sk_forward_alloc;
409}
410
411META_COLLECTOR(int_sk_sndbuf)
412{
413 SKIP_NONLOCAL(skb);
414 dst->value = skb->sk->sk_sndbuf;
415}
416
417META_COLLECTOR(int_sk_alloc)
418{
419 SKIP_NONLOCAL(skb);
420 dst->value = skb->sk->sk_allocation;
421}
422
423META_COLLECTOR(int_sk_route_caps)
424{
425 SKIP_NONLOCAL(skb);
426 dst->value = skb->sk->sk_route_caps;
427}
428
429META_COLLECTOR(int_sk_hashent)
430{
431 SKIP_NONLOCAL(skb);
432 dst->value = skb->sk->sk_hashent;
433}
434
435META_COLLECTOR(int_sk_lingertime)
436{
437 SKIP_NONLOCAL(skb);
438 dst->value = skb->sk->sk_lingertime / HZ;
439}
440
441META_COLLECTOR(int_sk_err_qlen)
442{
443 SKIP_NONLOCAL(skb);
444 dst->value = skb->sk->sk_error_queue.qlen;
445}
446
447META_COLLECTOR(int_sk_ack_bl)
448{
449 SKIP_NONLOCAL(skb);
450 dst->value = skb->sk->sk_ack_backlog;
451}
452
453META_COLLECTOR(int_sk_max_ack_bl)
454{
455 SKIP_NONLOCAL(skb);
456 dst->value = skb->sk->sk_max_ack_backlog;
457}
458
459META_COLLECTOR(int_sk_prio)
460{
461 SKIP_NONLOCAL(skb);
462 dst->value = skb->sk->sk_priority;
463}
464
465META_COLLECTOR(int_sk_rcvlowat)
466{
467 SKIP_NONLOCAL(skb);
468 dst->value = skb->sk->sk_rcvlowat;
469}
470
471META_COLLECTOR(int_sk_rcvtimeo)
472{
473 SKIP_NONLOCAL(skb);
474 dst->value = skb->sk->sk_rcvtimeo / HZ;
475}
476
477META_COLLECTOR(int_sk_sndtimeo)
478{
479 SKIP_NONLOCAL(skb);
480 dst->value = skb->sk->sk_sndtimeo / HZ;
481}
482
483META_COLLECTOR(int_sk_sendmsg_off)
484{
485 SKIP_NONLOCAL(skb);
486 dst->value = skb->sk->sk_sndmsg_off;
487}
488
489META_COLLECTOR(int_sk_write_pend)
490{
491 SKIP_NONLOCAL(skb);
492 dst->value = skb->sk->sk_write_pending;
493}
494
495/**************************************************************************
287 * Meta value collectors assignment table 496 * Meta value collectors assignment table
288 **************************************************************************/ 497 **************************************************************************/
289 498
@@ -293,41 +502,75 @@ struct meta_ops
293 struct meta_value *, struct meta_obj *, int *); 502 struct meta_value *, struct meta_obj *, int *);
294}; 503};
295 504
505#define META_ID(name) TCF_META_ID_##name
506#define META_FUNC(name) { .get = meta_##name }
507
296/* Meta value operations table listing all meta value collectors and 508/* Meta value operations table listing all meta value collectors and
297 * assigns them to a type and meta id. */ 509 * assigns them to a type and meta id. */
298static struct meta_ops __meta_ops[TCF_META_TYPE_MAX+1][TCF_META_ID_MAX+1] = { 510static struct meta_ops __meta_ops[TCF_META_TYPE_MAX+1][TCF_META_ID_MAX+1] = {
299 [TCF_META_TYPE_VAR] = { 511 [TCF_META_TYPE_VAR] = {
300 [TCF_META_ID_DEV] = { .get = meta_var_dev }, 512 [META_ID(DEV)] = META_FUNC(var_dev),
301 [TCF_META_ID_INDEV] = { .get = meta_var_indev }, 513 [META_ID(INDEV)] = META_FUNC(var_indev),
302 [TCF_META_ID_REALDEV] = { .get = meta_var_realdev } 514 [META_ID(REALDEV)] = META_FUNC(var_realdev),
515 [META_ID(SK_BOUND_IF)] = META_FUNC(var_sk_bound_if),
303 }, 516 },
304 [TCF_META_TYPE_INT] = { 517 [TCF_META_TYPE_INT] = {
305 [TCF_META_ID_RANDOM] = { .get = meta_int_random }, 518 [META_ID(RANDOM)] = META_FUNC(int_random),
306 [TCF_META_ID_LOADAVG_0] = { .get = meta_int_loadavg_0 }, 519 [META_ID(LOADAVG_0)] = META_FUNC(int_loadavg_0),
307 [TCF_META_ID_LOADAVG_1] = { .get = meta_int_loadavg_1 }, 520 [META_ID(LOADAVG_1)] = META_FUNC(int_loadavg_1),
308 [TCF_META_ID_LOADAVG_2] = { .get = meta_int_loadavg_2 }, 521 [META_ID(LOADAVG_2)] = META_FUNC(int_loadavg_2),
309 [TCF_META_ID_DEV] = { .get = meta_int_dev }, 522 [META_ID(DEV)] = META_FUNC(int_dev),
310 [TCF_META_ID_INDEV] = { .get = meta_int_indev }, 523 [META_ID(INDEV)] = META_FUNC(int_indev),
311 [TCF_META_ID_REALDEV] = { .get = meta_int_realdev }, 524 [META_ID(REALDEV)] = META_FUNC(int_realdev),
312 [TCF_META_ID_PRIORITY] = { .get = meta_int_priority }, 525 [META_ID(PRIORITY)] = META_FUNC(int_priority),
313 [TCF_META_ID_PROTOCOL] = { .get = meta_int_protocol }, 526 [META_ID(PROTOCOL)] = META_FUNC(int_protocol),
314 [TCF_META_ID_SECURITY] = { .get = meta_int_security }, 527 [META_ID(SECURITY)] = META_FUNC(int_security),
315 [TCF_META_ID_PKTTYPE] = { .get = meta_int_pkttype }, 528 [META_ID(PKTTYPE)] = META_FUNC(int_pkttype),
316 [TCF_META_ID_PKTLEN] = { .get = meta_int_pktlen }, 529 [META_ID(PKTLEN)] = META_FUNC(int_pktlen),
317 [TCF_META_ID_DATALEN] = { .get = meta_int_datalen }, 530 [META_ID(DATALEN)] = META_FUNC(int_datalen),
318 [TCF_META_ID_MACLEN] = { .get = meta_int_maclen }, 531 [META_ID(MACLEN)] = META_FUNC(int_maclen),
319#ifdef CONFIG_NETFILTER 532#ifdef CONFIG_NETFILTER
320 [TCF_META_ID_NFMARK] = { .get = meta_int_nfmark }, 533 [META_ID(NFMARK)] = META_FUNC(int_nfmark),
321#endif 534#endif
322 [TCF_META_ID_TCINDEX] = { .get = meta_int_tcindex }, 535 [META_ID(TCINDEX)] = META_FUNC(int_tcindex),
323#ifdef CONFIG_NET_CLS_ACT 536#ifdef CONFIG_NET_CLS_ACT
324 [TCF_META_ID_TCVERDICT] = { .get = meta_int_tcverd }, 537 [META_ID(TCVERDICT)] = META_FUNC(int_tcverd),
325 [TCF_META_ID_TCCLASSID] = { .get = meta_int_tcclassid }, 538 [META_ID(TCCLASSID)] = META_FUNC(int_tcclassid),
326#endif 539#endif
327#ifdef CONFIG_NET_CLS_ROUTE 540#ifdef CONFIG_NET_CLS_ROUTE
328 [TCF_META_ID_RTCLASSID] = { .get = meta_int_rtclassid }, 541 [META_ID(RTCLASSID)] = META_FUNC(int_rtclassid),
329#endif 542#endif
330 [TCF_META_ID_RTIIF] = { .get = meta_int_rtiif } 543 [META_ID(RTIIF)] = META_FUNC(int_rtiif),
544 [META_ID(SK_FAMILY)] = META_FUNC(int_sk_family),
545 [META_ID(SK_STATE)] = META_FUNC(int_sk_state),
546 [META_ID(SK_REUSE)] = META_FUNC(int_sk_reuse),
547 [META_ID(SK_BOUND_IF)] = META_FUNC(int_sk_bound_if),
548 [META_ID(SK_REFCNT)] = META_FUNC(int_sk_refcnt),
549 [META_ID(SK_RCVBUF)] = META_FUNC(int_sk_rcvbuf),
550 [META_ID(SK_SNDBUF)] = META_FUNC(int_sk_sndbuf),
551 [META_ID(SK_SHUTDOWN)] = META_FUNC(int_sk_shutdown),
552 [META_ID(SK_PROTO)] = META_FUNC(int_sk_proto),
553 [META_ID(SK_TYPE)] = META_FUNC(int_sk_type),
554 [META_ID(SK_RMEM_ALLOC)] = META_FUNC(int_sk_rmem_alloc),
555 [META_ID(SK_WMEM_ALLOC)] = META_FUNC(int_sk_wmem_alloc),
556 [META_ID(SK_OMEM_ALLOC)] = META_FUNC(int_sk_omem_alloc),
557 [META_ID(SK_WMEM_QUEUED)] = META_FUNC(int_sk_wmem_queued),
558 [META_ID(SK_RCV_QLEN)] = META_FUNC(int_sk_rcv_qlen),
559 [META_ID(SK_SND_QLEN)] = META_FUNC(int_sk_snd_qlen),
560 [META_ID(SK_ERR_QLEN)] = META_FUNC(int_sk_err_qlen),
561 [META_ID(SK_FORWARD_ALLOCS)] = META_FUNC(int_sk_fwd_alloc),
562 [META_ID(SK_ALLOCS)] = META_FUNC(int_sk_alloc),
563 [META_ID(SK_ROUTE_CAPS)] = META_FUNC(int_sk_route_caps),
564 [META_ID(SK_HASHENT)] = META_FUNC(int_sk_hashent),
565 [META_ID(SK_LINGERTIME)] = META_FUNC(int_sk_lingertime),
566 [META_ID(SK_ACK_BACKLOG)] = META_FUNC(int_sk_ack_bl),
567 [META_ID(SK_MAX_ACK_BACKLOG)] = META_FUNC(int_sk_max_ack_bl),
568 [META_ID(SK_PRIO)] = META_FUNC(int_sk_prio),
569 [META_ID(SK_RCVLOWAT)] = META_FUNC(int_sk_rcvlowat),
570 [META_ID(SK_RCVTIMEO)] = META_FUNC(int_sk_rcvtimeo),
571 [META_ID(SK_SNDTIMEO)] = META_FUNC(int_sk_sndtimeo),
572 [META_ID(SK_SENDMSG_OFF)] = META_FUNC(int_sk_sendmsg_off),
573 [META_ID(SK_WRITE_PENDING)] = META_FUNC(int_sk_write_pend),
331 } 574 }
332}; 575};
333 576
@@ -396,9 +639,9 @@ static int meta_int_compare(struct meta_obj *a, struct meta_obj *b)
396 /* Let gcc optimize it, the unlikely is not really based on 639 /* Let gcc optimize it, the unlikely is not really based on
397 * some numbers but jump free code for mismatches seems 640 * some numbers but jump free code for mismatches seems
398 * more logical. */ 641 * more logical. */
399 if (unlikely(a == b)) 642 if (unlikely(a->value == b->value))
400 return 0; 643 return 0;
401 else if (a < b) 644 else if (a->value < b->value)
402 return -1; 645 return -1;
403 else 646 else
404 return 1; 647 return 1;
diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c
index 07977f8f2679..97c1c75d5c78 100644
--- a/net/sched/sch_api.c
+++ b/net/sched/sch_api.c
@@ -760,15 +760,14 @@ graft:
760} 760}
761 761
762static int tc_fill_qdisc(struct sk_buff *skb, struct Qdisc *q, u32 clid, 762static int tc_fill_qdisc(struct sk_buff *skb, struct Qdisc *q, u32 clid,
763 u32 pid, u32 seq, unsigned flags, int event) 763 u32 pid, u32 seq, u16 flags, int event)
764{ 764{
765 struct tcmsg *tcm; 765 struct tcmsg *tcm;
766 struct nlmsghdr *nlh; 766 struct nlmsghdr *nlh;
767 unsigned char *b = skb->tail; 767 unsigned char *b = skb->tail;
768 struct gnet_dump d; 768 struct gnet_dump d;
769 769
770 nlh = NLMSG_PUT(skb, pid, seq, event, sizeof(*tcm)); 770 nlh = NLMSG_NEW(skb, pid, seq, event, sizeof(*tcm), flags);
771 nlh->nlmsg_flags = flags;
772 tcm = NLMSG_DATA(nlh); 771 tcm = NLMSG_DATA(nlh);
773 tcm->tcm_family = AF_UNSPEC; 772 tcm->tcm_family = AF_UNSPEC;
774 tcm->tcm_ifindex = q->dev->ifindex; 773 tcm->tcm_ifindex = q->dev->ifindex;
@@ -997,7 +996,7 @@ out:
997 996
998static int tc_fill_tclass(struct sk_buff *skb, struct Qdisc *q, 997static int tc_fill_tclass(struct sk_buff *skb, struct Qdisc *q,
999 unsigned long cl, 998 unsigned long cl,
1000 u32 pid, u32 seq, unsigned flags, int event) 999 u32 pid, u32 seq, u16 flags, int event)
1001{ 1000{
1002 struct tcmsg *tcm; 1001 struct tcmsg *tcm;
1003 struct nlmsghdr *nlh; 1002 struct nlmsghdr *nlh;
@@ -1005,8 +1004,7 @@ static int tc_fill_tclass(struct sk_buff *skb, struct Qdisc *q,
1005 struct gnet_dump d; 1004 struct gnet_dump d;
1006 struct Qdisc_class_ops *cl_ops = q->ops->cl_ops; 1005 struct Qdisc_class_ops *cl_ops = q->ops->cl_ops;
1007 1006
1008 nlh = NLMSG_PUT(skb, pid, seq, event, sizeof(*tcm)); 1007 nlh = NLMSG_NEW(skb, pid, seq, event, sizeof(*tcm), flags);
1009 nlh->nlmsg_flags = flags;
1010 tcm = NLMSG_DATA(nlh); 1008 tcm = NLMSG_DATA(nlh);
1011 tcm->tcm_family = AF_UNSPEC; 1009 tcm->tcm_family = AF_UNSPEC;
1012 tcm->tcm_ifindex = q->dev->ifindex; 1010 tcm->tcm_ifindex = q->dev->ifindex;
diff --git a/net/sched/sch_dsmark.c b/net/sched/sch_dsmark.c
index d8bd2a569c7c..13e0e7b3856b 100644
--- a/net/sched/sch_dsmark.c
+++ b/net/sched/sch_dsmark.c
@@ -31,7 +31,7 @@
31#endif 31#endif
32 32
33 33
34#define PRIV(sch) qdisc_priv(sch) 34#define PRIV(sch) ((struct dsmark_qdisc_data *) qdisc_priv(sch))
35 35
36 36
37/* 37/*
@@ -55,24 +55,38 @@
55struct dsmark_qdisc_data { 55struct dsmark_qdisc_data {
56 struct Qdisc *q; 56 struct Qdisc *q;
57 struct tcf_proto *filter_list; 57 struct tcf_proto *filter_list;
58 __u8 *mask; /* "owns" the array */ 58 u8 *mask; /* "owns" the array */
59 __u8 *value; 59 u8 *value;
60 __u16 indices; 60 u16 indices;
61 __u32 default_index; /* index range is 0...0xffff */ 61 u32 default_index; /* index range is 0...0xffff */
62 int set_tc_index; 62 int set_tc_index;
63}; 63};
64 64
65static inline int dsmark_valid_indices(u16 indices)
66{
67 while (indices != 1) {
68 if (indices & 1)
69 return 0;
70 indices >>= 1;
71 }
72
73 return 1;
74}
65 75
66/* ------------------------- Class/flow operations ------------------------- */ 76static inline int dsmark_valid_index(struct dsmark_qdisc_data *p, u16 index)
77{
78 return (index <= p->indices && index > 0);
79}
67 80
81/* ------------------------- Class/flow operations ------------------------- */
68 82
69static int dsmark_graft(struct Qdisc *sch,unsigned long arg, 83static int dsmark_graft(struct Qdisc *sch, unsigned long arg,
70 struct Qdisc *new,struct Qdisc **old) 84 struct Qdisc *new, struct Qdisc **old)
71{ 85{
72 struct dsmark_qdisc_data *p = PRIV(sch); 86 struct dsmark_qdisc_data *p = PRIV(sch);
73 87
74 DPRINTK("dsmark_graft(sch %p,[qdisc %p],new %p,old %p)\n",sch,p,new, 88 DPRINTK("dsmark_graft(sch %p,[qdisc %p],new %p,old %p)\n",
75 old); 89 sch, p, new, old);
76 90
77 if (new == NULL) { 91 if (new == NULL) {
78 new = qdisc_create_dflt(sch->dev, &pfifo_qdisc_ops); 92 new = qdisc_create_dflt(sch->dev, &pfifo_qdisc_ops);
@@ -81,91 +95,95 @@ static int dsmark_graft(struct Qdisc *sch,unsigned long arg,
81 } 95 }
82 96
83 sch_tree_lock(sch); 97 sch_tree_lock(sch);
84 *old = xchg(&p->q,new); 98 *old = xchg(&p->q, new);
85 if (*old) 99 qdisc_reset(*old);
86 qdisc_reset(*old);
87 sch->q.qlen = 0; 100 sch->q.qlen = 0;
88 sch_tree_unlock(sch); /* @@@ move up ? */ 101 sch_tree_unlock(sch);
102
89 return 0; 103 return 0;
90} 104}
91 105
92
93static struct Qdisc *dsmark_leaf(struct Qdisc *sch, unsigned long arg) 106static struct Qdisc *dsmark_leaf(struct Qdisc *sch, unsigned long arg)
94{ 107{
95 struct dsmark_qdisc_data *p = PRIV(sch); 108 return PRIV(sch)->q;
96
97 return p->q;
98} 109}
99 110
100 111static unsigned long dsmark_get(struct Qdisc *sch, u32 classid)
101static unsigned long dsmark_get(struct Qdisc *sch,u32 classid)
102{ 112{
103 struct dsmark_qdisc_data *p __attribute__((unused)) = PRIV(sch); 113 DPRINTK("dsmark_get(sch %p,[qdisc %p],classid %x)\n",
114 sch, PRIV(sch), classid);
104 115
105 DPRINTK("dsmark_get(sch %p,[qdisc %p],classid %x)\n",sch,p,classid); 116 return TC_H_MIN(classid) + 1;
106 return TC_H_MIN(classid)+1;
107} 117}
108 118
109
110static unsigned long dsmark_bind_filter(struct Qdisc *sch, 119static unsigned long dsmark_bind_filter(struct Qdisc *sch,
111 unsigned long parent, u32 classid) 120 unsigned long parent, u32 classid)
112{ 121{
113 return dsmark_get(sch,classid); 122 return dsmark_get(sch, classid);
114} 123}
115 124
116
117static void dsmark_put(struct Qdisc *sch, unsigned long cl) 125static void dsmark_put(struct Qdisc *sch, unsigned long cl)
118{ 126{
119} 127}
120 128
121
122static int dsmark_change(struct Qdisc *sch, u32 classid, u32 parent, 129static int dsmark_change(struct Qdisc *sch, u32 classid, u32 parent,
123 struct rtattr **tca, unsigned long *arg) 130 struct rtattr **tca, unsigned long *arg)
124{ 131{
125 struct dsmark_qdisc_data *p = PRIV(sch); 132 struct dsmark_qdisc_data *p = PRIV(sch);
126 struct rtattr *opt = tca[TCA_OPTIONS-1]; 133 struct rtattr *opt = tca[TCA_OPTIONS-1];
127 struct rtattr *tb[TCA_DSMARK_MAX]; 134 struct rtattr *tb[TCA_DSMARK_MAX];
135 int err = -EINVAL;
136 u8 mask = 0;
128 137
129 DPRINTK("dsmark_change(sch %p,[qdisc %p],classid %x,parent %x)," 138 DPRINTK("dsmark_change(sch %p,[qdisc %p],classid %x,parent %x),"
130 "arg 0x%lx\n",sch,p,classid,parent,*arg); 139 "arg 0x%lx\n", sch, p, classid, parent, *arg);
131 if (*arg > p->indices) 140
132 return -ENOENT; 141 if (!dsmark_valid_index(p, *arg)) {
133 if (!opt || rtattr_parse_nested(tb, TCA_DSMARK_MAX, opt)) 142 err = -ENOENT;
134 return -EINVAL; 143 goto rtattr_failure;
135 if (tb[TCA_DSMARK_MASK-1]) {
136 if (!RTA_PAYLOAD(tb[TCA_DSMARK_MASK-1]))
137 return -EINVAL;
138 p->mask[*arg-1] = *(__u8 *) RTA_DATA(tb[TCA_DSMARK_MASK-1]);
139 }
140 if (tb[TCA_DSMARK_VALUE-1]) {
141 if (!RTA_PAYLOAD(tb[TCA_DSMARK_VALUE-1]))
142 return -EINVAL;
143 p->value[*arg-1] = *(__u8 *) RTA_DATA(tb[TCA_DSMARK_VALUE-1]);
144 } 144 }
145 return 0;
146}
147 145
146 if (!opt || rtattr_parse_nested(tb, TCA_DSMARK_MAX, opt))
147 goto rtattr_failure;
148
149 if (tb[TCA_DSMARK_MASK-1])
150 mask = RTA_GET_U8(tb[TCA_DSMARK_MASK-1]);
151
152 if (tb[TCA_DSMARK_VALUE-1])
153 p->value[*arg-1] = RTA_GET_U8(tb[TCA_DSMARK_VALUE-1]);
154
155 if (tb[TCA_DSMARK_MASK-1])
156 p->mask[*arg-1] = mask;
157
158 err = 0;
148 159
149static int dsmark_delete(struct Qdisc *sch,unsigned long arg) 160rtattr_failure:
161 return err;
162}
163
164static int dsmark_delete(struct Qdisc *sch, unsigned long arg)
150{ 165{
151 struct dsmark_qdisc_data *p = PRIV(sch); 166 struct dsmark_qdisc_data *p = PRIV(sch);
152 167
153 if (!arg || arg > p->indices) 168 if (!dsmark_valid_index(p, arg))
154 return -EINVAL; 169 return -EINVAL;
170
155 p->mask[arg-1] = 0xff; 171 p->mask[arg-1] = 0xff;
156 p->value[arg-1] = 0; 172 p->value[arg-1] = 0;
173
157 return 0; 174 return 0;
158} 175}
159 176
160
161static void dsmark_walk(struct Qdisc *sch,struct qdisc_walker *walker) 177static void dsmark_walk(struct Qdisc *sch,struct qdisc_walker *walker)
162{ 178{
163 struct dsmark_qdisc_data *p = PRIV(sch); 179 struct dsmark_qdisc_data *p = PRIV(sch);
164 int i; 180 int i;
165 181
166 DPRINTK("dsmark_walk(sch %p,[qdisc %p],walker %p)\n",sch,p,walker); 182 DPRINTK("dsmark_walk(sch %p,[qdisc %p],walker %p)\n", sch, p, walker);
183
167 if (walker->stop) 184 if (walker->stop)
168 return; 185 return;
186
169 for (i = 0; i < p->indices; i++) { 187 for (i = 0; i < p->indices; i++) {
170 if (p->mask[i] == 0xff && !p->value[i]) 188 if (p->mask[i] == 0xff && !p->value[i])
171 goto ignore; 189 goto ignore;
@@ -180,26 +198,20 @@ ignore:
180 } 198 }
181} 199}
182 200
183
184static struct tcf_proto **dsmark_find_tcf(struct Qdisc *sch,unsigned long cl) 201static struct tcf_proto **dsmark_find_tcf(struct Qdisc *sch,unsigned long cl)
185{ 202{
186 struct dsmark_qdisc_data *p = PRIV(sch); 203 return &PRIV(sch)->filter_list;
187
188 return &p->filter_list;
189} 204}
190 205
191
192/* --------------------------- Qdisc operations ---------------------------- */ 206/* --------------------------- Qdisc operations ---------------------------- */
193 207
194
195static int dsmark_enqueue(struct sk_buff *skb,struct Qdisc *sch) 208static int dsmark_enqueue(struct sk_buff *skb,struct Qdisc *sch)
196{ 209{
197 struct dsmark_qdisc_data *p = PRIV(sch); 210 struct dsmark_qdisc_data *p = PRIV(sch);
198 struct tcf_result res; 211 int err;
199 int result; 212
200 int ret = NET_XMIT_POLICED; 213 D2PRINTK("dsmark_enqueue(skb %p,sch %p,[qdisc %p])\n", skb, sch, p);
201 214
202 D2PRINTK("dsmark_enqueue(skb %p,sch %p,[qdisc %p])\n",skb,sch,p);
203 if (p->set_tc_index) { 215 if (p->set_tc_index) {
204 /* FIXME: Safe with non-linear skbs? --RR */ 216 /* FIXME: Safe with non-linear skbs? --RR */
205 switch (skb->protocol) { 217 switch (skb->protocol) {
@@ -216,17 +228,21 @@ static int dsmark_enqueue(struct sk_buff *skb,struct Qdisc *sch)
216 break; 228 break;
217 }; 229 };
218 } 230 }
219 result = TC_POLICE_OK; /* be nice to gcc */ 231
220 if (TC_H_MAJ(skb->priority) == sch->handle) { 232 if (TC_H_MAJ(skb->priority) == sch->handle)
221 skb->tc_index = TC_H_MIN(skb->priority); 233 skb->tc_index = TC_H_MIN(skb->priority);
222 } else { 234 else {
223 result = tc_classify(skb,p->filter_list,&res); 235 struct tcf_result res;
224 D2PRINTK("result %d class 0x%04x\n",result,res.classid); 236 int result = tc_classify(skb, p->filter_list, &res);
237
238 D2PRINTK("result %d class 0x%04x\n", result, res.classid);
239
225 switch (result) { 240 switch (result) {
226#ifdef CONFIG_NET_CLS_POLICE 241#ifdef CONFIG_NET_CLS_POLICE
227 case TC_POLICE_SHOT: 242 case TC_POLICE_SHOT:
228 kfree_skb(skb); 243 kfree_skb(skb);
229 break; 244 sch->qstats.drops++;
245 return NET_XMIT_POLICED;
230#if 0 246#if 0
231 case TC_POLICE_RECLASSIFY: 247 case TC_POLICE_RECLASSIFY:
232 /* FIXME: what to do here ??? */ 248 /* FIXME: what to do here ??? */
@@ -243,43 +259,45 @@ static int dsmark_enqueue(struct sk_buff *skb,struct Qdisc *sch)
243 break; 259 break;
244 }; 260 };
245 } 261 }
246 if (
247#ifdef CONFIG_NET_CLS_POLICE
248 result == TC_POLICE_SHOT ||
249#endif
250 262
251 ((ret = p->q->enqueue(skb,p->q)) != 0)) { 263 err = p->q->enqueue(skb,p->q);
264 if (err != NET_XMIT_SUCCESS) {
252 sch->qstats.drops++; 265 sch->qstats.drops++;
253 return ret; 266 return err;
254 } 267 }
268
255 sch->bstats.bytes += skb->len; 269 sch->bstats.bytes += skb->len;
256 sch->bstats.packets++; 270 sch->bstats.packets++;
257 sch->q.qlen++; 271 sch->q.qlen++;
258 return ret;
259}
260 272
273 return NET_XMIT_SUCCESS;
274}
261 275
262static struct sk_buff *dsmark_dequeue(struct Qdisc *sch) 276static struct sk_buff *dsmark_dequeue(struct Qdisc *sch)
263{ 277{
264 struct dsmark_qdisc_data *p = PRIV(sch); 278 struct dsmark_qdisc_data *p = PRIV(sch);
265 struct sk_buff *skb; 279 struct sk_buff *skb;
266 int index; 280 u32 index;
281
282 D2PRINTK("dsmark_dequeue(sch %p,[qdisc %p])\n", sch, p);
267 283
268 D2PRINTK("dsmark_dequeue(sch %p,[qdisc %p])\n",sch,p);
269 skb = p->q->ops->dequeue(p->q); 284 skb = p->q->ops->dequeue(p->q);
270 if (!skb) 285 if (skb == NULL)
271 return NULL; 286 return NULL;
287
272 sch->q.qlen--; 288 sch->q.qlen--;
273 index = skb->tc_index & (p->indices-1); 289
274 D2PRINTK("index %d->%d\n",skb->tc_index,index); 290 index = skb->tc_index & (p->indices - 1);
291 D2PRINTK("index %d->%d\n", skb->tc_index, index);
292
275 switch (skb->protocol) { 293 switch (skb->protocol) {
276 case __constant_htons(ETH_P_IP): 294 case __constant_htons(ETH_P_IP):
277 ipv4_change_dsfield(skb->nh.iph, 295 ipv4_change_dsfield(skb->nh.iph, p->mask[index],
278 p->mask[index],p->value[index]); 296 p->value[index]);
279 break; 297 break;
280 case __constant_htons(ETH_P_IPV6): 298 case __constant_htons(ETH_P_IPV6):
281 ipv6_change_dsfield(skb->nh.ipv6h, 299 ipv6_change_dsfield(skb->nh.ipv6h, p->mask[index],
282 p->mask[index],p->value[index]); 300 p->value[index]);
283 break; 301 break;
284 default: 302 default:
285 /* 303 /*
@@ -293,152 +311,162 @@ static struct sk_buff *dsmark_dequeue(struct Qdisc *sch)
293 htons(skb->protocol)); 311 htons(skb->protocol));
294 break; 312 break;
295 }; 313 };
314
296 return skb; 315 return skb;
297} 316}
298 317
299
300static int dsmark_requeue(struct sk_buff *skb,struct Qdisc *sch) 318static int dsmark_requeue(struct sk_buff *skb,struct Qdisc *sch)
301{ 319{
302 int ret;
303 struct dsmark_qdisc_data *p = PRIV(sch); 320 struct dsmark_qdisc_data *p = PRIV(sch);
321 int err;
304 322
305 D2PRINTK("dsmark_requeue(skb %p,sch %p,[qdisc %p])\n",skb,sch,p); 323 D2PRINTK("dsmark_requeue(skb %p,sch %p,[qdisc %p])\n", skb, sch, p);
306 if ((ret = p->q->ops->requeue(skb, p->q)) == 0) { 324
307 sch->q.qlen++; 325 err = p->q->ops->requeue(skb, p->q);
308 sch->qstats.requeues++; 326 if (err != NET_XMIT_SUCCESS) {
309 return 0; 327 sch->qstats.drops++;
328 return err;
310 } 329 }
311 sch->qstats.drops++;
312 return ret;
313}
314 330
331 sch->q.qlen++;
332 sch->qstats.requeues++;
333
334 return NET_XMIT_SUCCESS;
335}
315 336
316static unsigned int dsmark_drop(struct Qdisc *sch) 337static unsigned int dsmark_drop(struct Qdisc *sch)
317{ 338{
318 struct dsmark_qdisc_data *p = PRIV(sch); 339 struct dsmark_qdisc_data *p = PRIV(sch);
319 unsigned int len; 340 unsigned int len;
320 341
321 DPRINTK("dsmark_reset(sch %p,[qdisc %p])\n",sch,p); 342 DPRINTK("dsmark_reset(sch %p,[qdisc %p])\n", sch, p);
322 if (!p->q->ops->drop) 343
323 return 0; 344 if (p->q->ops->drop == NULL)
324 if (!(len = p->q->ops->drop(p->q)))
325 return 0; 345 return 0;
326 sch->q.qlen--; 346
347 len = p->q->ops->drop(p->q);
348 if (len)
349 sch->q.qlen--;
350
327 return len; 351 return len;
328} 352}
329 353
330 354static int dsmark_init(struct Qdisc *sch, struct rtattr *opt)
331static int dsmark_init(struct Qdisc *sch,struct rtattr *opt)
332{ 355{
333 struct dsmark_qdisc_data *p = PRIV(sch); 356 struct dsmark_qdisc_data *p = PRIV(sch);
334 struct rtattr *tb[TCA_DSMARK_MAX]; 357 struct rtattr *tb[TCA_DSMARK_MAX];
335 __u16 tmp; 358 int err = -EINVAL;
336 359 u32 default_index = NO_DEFAULT_INDEX;
337 DPRINTK("dsmark_init(sch %p,[qdisc %p],opt %p)\n",sch,p,opt); 360 u16 indices;
338 if (!opt || 361 u8 *mask;
339 rtattr_parse(tb,TCA_DSMARK_MAX,RTA_DATA(opt),RTA_PAYLOAD(opt)) < 0 || 362
340 !tb[TCA_DSMARK_INDICES-1] || 363 DPRINTK("dsmark_init(sch %p,[qdisc %p],opt %p)\n", sch, p, opt);
341 RTA_PAYLOAD(tb[TCA_DSMARK_INDICES-1]) < sizeof(__u16)) 364
342 return -EINVAL; 365 if (!opt || rtattr_parse_nested(tb, TCA_DSMARK_MAX, opt) < 0)
343 p->indices = *(__u16 *) RTA_DATA(tb[TCA_DSMARK_INDICES-1]); 366 goto errout;
344 if (!p->indices) 367
345 return -EINVAL; 368 indices = RTA_GET_U16(tb[TCA_DSMARK_INDICES-1]);
346 for (tmp = p->indices; tmp != 1; tmp >>= 1) { 369 if (!indices || !dsmark_valid_indices(indices))
347 if (tmp & 1) 370 goto errout;
348 return -EINVAL; 371
349 } 372 if (tb[TCA_DSMARK_DEFAULT_INDEX-1])
350 p->default_index = NO_DEFAULT_INDEX; 373 default_index = RTA_GET_U16(tb[TCA_DSMARK_DEFAULT_INDEX-1]);
351 if (tb[TCA_DSMARK_DEFAULT_INDEX-1]) { 374
352 if (RTA_PAYLOAD(tb[TCA_DSMARK_DEFAULT_INDEX-1]) < sizeof(__u16)) 375 mask = kmalloc(indices * 2, GFP_KERNEL);
353 return -EINVAL; 376 if (mask == NULL) {
354 p->default_index = 377 err = -ENOMEM;
355 *(__u16 *) RTA_DATA(tb[TCA_DSMARK_DEFAULT_INDEX-1]); 378 goto errout;
356 } 379 }
357 p->set_tc_index = !!tb[TCA_DSMARK_SET_TC_INDEX-1]; 380
358 p->mask = kmalloc(p->indices*2,GFP_KERNEL); 381 p->mask = mask;
359 if (!p->mask) 382 memset(p->mask, 0xff, indices);
360 return -ENOMEM; 383
361 p->value = p->mask+p->indices; 384 p->value = p->mask + indices;
362 memset(p->mask,0xff,p->indices); 385 memset(p->value, 0, indices);
363 memset(p->value,0,p->indices); 386
364 if (!(p->q = qdisc_create_dflt(sch->dev, &pfifo_qdisc_ops))) 387 p->indices = indices;
388 p->default_index = default_index;
389 p->set_tc_index = RTA_GET_FLAG(tb[TCA_DSMARK_SET_TC_INDEX-1]);
390
391 p->q = qdisc_create_dflt(sch->dev, &pfifo_qdisc_ops);
392 if (p->q == NULL)
365 p->q = &noop_qdisc; 393 p->q = &noop_qdisc;
366 DPRINTK("dsmark_init: qdisc %p\n",&p->q);
367 return 0;
368}
369 394
395 DPRINTK("dsmark_init: qdisc %p\n", p->q);
396
397 err = 0;
398errout:
399rtattr_failure:
400 return err;
401}
370 402
371static void dsmark_reset(struct Qdisc *sch) 403static void dsmark_reset(struct Qdisc *sch)
372{ 404{
373 struct dsmark_qdisc_data *p = PRIV(sch); 405 struct dsmark_qdisc_data *p = PRIV(sch);
374 406
375 DPRINTK("dsmark_reset(sch %p,[qdisc %p])\n",sch,p); 407 DPRINTK("dsmark_reset(sch %p,[qdisc %p])\n", sch, p);
376 qdisc_reset(p->q); 408 qdisc_reset(p->q);
377 sch->q.qlen = 0; 409 sch->q.qlen = 0;
378} 410}
379 411
380
381static void dsmark_destroy(struct Qdisc *sch) 412static void dsmark_destroy(struct Qdisc *sch)
382{ 413{
383 struct dsmark_qdisc_data *p = PRIV(sch); 414 struct dsmark_qdisc_data *p = PRIV(sch);
384 struct tcf_proto *tp; 415 struct tcf_proto *tp;
385 416
386 DPRINTK("dsmark_destroy(sch %p,[qdisc %p])\n",sch,p); 417 DPRINTK("dsmark_destroy(sch %p,[qdisc %p])\n", sch, p);
418
387 while (p->filter_list) { 419 while (p->filter_list) {
388 tp = p->filter_list; 420 tp = p->filter_list;
389 p->filter_list = tp->next; 421 p->filter_list = tp->next;
390 tcf_destroy(tp); 422 tcf_destroy(tp);
391 } 423 }
424
392 qdisc_destroy(p->q); 425 qdisc_destroy(p->q);
393 kfree(p->mask); 426 kfree(p->mask);
394} 427}
395 428
396
397static int dsmark_dump_class(struct Qdisc *sch, unsigned long cl, 429static int dsmark_dump_class(struct Qdisc *sch, unsigned long cl,
398 struct sk_buff *skb, struct tcmsg *tcm) 430 struct sk_buff *skb, struct tcmsg *tcm)
399{ 431{
400 struct dsmark_qdisc_data *p = PRIV(sch); 432 struct dsmark_qdisc_data *p = PRIV(sch);
401 unsigned char *b = skb->tail; 433 struct rtattr *opts = NULL;
402 struct rtattr *rta; 434
435 DPRINTK("dsmark_dump_class(sch %p,[qdisc %p],class %ld\n", sch, p, cl);
403 436
404 DPRINTK("dsmark_dump_class(sch %p,[qdisc %p],class %ld\n",sch,p,cl); 437 if (!dsmark_valid_index(p, cl))
405 if (!cl || cl > p->indices)
406 return -EINVAL; 438 return -EINVAL;
407 tcm->tcm_handle = TC_H_MAKE(TC_H_MAJ(sch->handle),cl-1); 439
408 rta = (struct rtattr *) b; 440 tcm->tcm_handle = TC_H_MAKE(TC_H_MAJ(sch->handle), cl-1);
409 RTA_PUT(skb,TCA_OPTIONS,0,NULL); 441
410 RTA_PUT(skb,TCA_DSMARK_MASK,1,&p->mask[cl-1]); 442 opts = RTA_NEST(skb, TCA_OPTIONS);
411 RTA_PUT(skb,TCA_DSMARK_VALUE,1,&p->value[cl-1]); 443 RTA_PUT_U8(skb,TCA_DSMARK_MASK, p->mask[cl-1]);
412 rta->rta_len = skb->tail-b; 444 RTA_PUT_U8(skb,TCA_DSMARK_VALUE, p->value[cl-1]);
413 return skb->len; 445
446 return RTA_NEST_END(skb, opts);
414 447
415rtattr_failure: 448rtattr_failure:
416 skb_trim(skb,b-skb->data); 449 return RTA_NEST_CANCEL(skb, opts);
417 return -1;
418} 450}
419 451
420static int dsmark_dump(struct Qdisc *sch, struct sk_buff *skb) 452static int dsmark_dump(struct Qdisc *sch, struct sk_buff *skb)
421{ 453{
422 struct dsmark_qdisc_data *p = PRIV(sch); 454 struct dsmark_qdisc_data *p = PRIV(sch);
423 unsigned char *b = skb->tail; 455 struct rtattr *opts = NULL;
424 struct rtattr *rta;
425 456
426 rta = (struct rtattr *) b; 457 opts = RTA_NEST(skb, TCA_OPTIONS);
427 RTA_PUT(skb,TCA_OPTIONS,0,NULL); 458 RTA_PUT_U16(skb, TCA_DSMARK_INDICES, p->indices);
428 RTA_PUT(skb,TCA_DSMARK_INDICES,sizeof(__u16),&p->indices); 459
429 if (p->default_index != NO_DEFAULT_INDEX) { 460 if (p->default_index != NO_DEFAULT_INDEX)
430 __u16 tmp = p->default_index; 461 RTA_PUT_U16(skb, TCA_DSMARK_DEFAULT_INDEX, p->default_index);
431 462
432 RTA_PUT(skb,TCA_DSMARK_DEFAULT_INDEX, sizeof(__u16), &tmp);
433 }
434 if (p->set_tc_index) 463 if (p->set_tc_index)
435 RTA_PUT(skb, TCA_DSMARK_SET_TC_INDEX, 0, NULL); 464 RTA_PUT_FLAG(skb, TCA_DSMARK_SET_TC_INDEX);
436 rta->rta_len = skb->tail-b; 465
437 return skb->len; 466 return RTA_NEST_END(skb, opts);
438 467
439rtattr_failure: 468rtattr_failure:
440 skb_trim(skb,b-skb->data); 469 return RTA_NEST_CANCEL(skb, opts);
441 return -1;
442} 470}
443 471
444static struct Qdisc_class_ops dsmark_class_ops = { 472static struct Qdisc_class_ops dsmark_class_ops = {
@@ -476,10 +504,13 @@ static int __init dsmark_module_init(void)
476{ 504{
477 return register_qdisc(&dsmark_qdisc_ops); 505 return register_qdisc(&dsmark_qdisc_ops);
478} 506}
507
479static void __exit dsmark_module_exit(void) 508static void __exit dsmark_module_exit(void)
480{ 509{
481 unregister_qdisc(&dsmark_qdisc_ops); 510 unregister_qdisc(&dsmark_qdisc_ops);
482} 511}
512
483module_init(dsmark_module_init) 513module_init(dsmark_module_init)
484module_exit(dsmark_module_exit) 514module_exit(dsmark_module_exit)
515
485MODULE_LICENSE("GPL"); 516MODULE_LICENSE("GPL");
diff --git a/net/sched/sch_fifo.c b/net/sched/sch_fifo.c
index 4888305c96da..033083bf0e74 100644
--- a/net/sched/sch_fifo.c
+++ b/net/sched/sch_fifo.c
@@ -11,131 +11,38 @@
11 11
12#include <linux/config.h> 12#include <linux/config.h>
13#include <linux/module.h> 13#include <linux/module.h>
14#include <asm/uaccess.h>
15#include <asm/system.h>
16#include <linux/bitops.h>
17#include <linux/types.h> 14#include <linux/types.h>
18#include <linux/kernel.h> 15#include <linux/kernel.h>
19#include <linux/sched.h>
20#include <linux/string.h>
21#include <linux/mm.h>
22#include <linux/socket.h>
23#include <linux/sockios.h>
24#include <linux/in.h>
25#include <linux/errno.h> 16#include <linux/errno.h>
26#include <linux/interrupt.h>
27#include <linux/if_ether.h>
28#include <linux/inet.h>
29#include <linux/netdevice.h> 17#include <linux/netdevice.h>
30#include <linux/etherdevice.h>
31#include <linux/notifier.h>
32#include <net/ip.h>
33#include <net/route.h>
34#include <linux/skbuff.h> 18#include <linux/skbuff.h>
35#include <net/sock.h>
36#include <net/pkt_sched.h> 19#include <net/pkt_sched.h>
37 20
38/* 1 band FIFO pseudo-"scheduler" */ 21/* 1 band FIFO pseudo-"scheduler" */
39 22
40struct fifo_sched_data 23struct fifo_sched_data
41{ 24{
42 unsigned limit; 25 u32 limit;
43}; 26};
44 27
45static int 28static int bfifo_enqueue(struct sk_buff *skb, struct Qdisc* sch)
46bfifo_enqueue(struct sk_buff *skb, struct Qdisc* sch)
47{ 29{
48 struct fifo_sched_data *q = qdisc_priv(sch); 30 struct fifo_sched_data *q = qdisc_priv(sch);
49 31
50 if (sch->qstats.backlog + skb->len <= q->limit) { 32 if (likely(sch->qstats.backlog + skb->len <= q->limit))
51 __skb_queue_tail(&sch->q, skb); 33 return qdisc_enqueue_tail(skb, sch);
52 sch->qstats.backlog += skb->len;
53 sch->bstats.bytes += skb->len;
54 sch->bstats.packets++;
55 return 0;
56 }
57 sch->qstats.drops++;
58#ifdef CONFIG_NET_CLS_POLICE
59 if (sch->reshape_fail==NULL || sch->reshape_fail(skb, sch))
60#endif
61 kfree_skb(skb);
62 return NET_XMIT_DROP;
63}
64
65static int
66bfifo_requeue(struct sk_buff *skb, struct Qdisc* sch)
67{
68 __skb_queue_head(&sch->q, skb);
69 sch->qstats.backlog += skb->len;
70 sch->qstats.requeues++;
71 return 0;
72}
73
74static struct sk_buff *
75bfifo_dequeue(struct Qdisc* sch)
76{
77 struct sk_buff *skb;
78 34
79 skb = __skb_dequeue(&sch->q); 35 return qdisc_reshape_fail(skb, sch);
80 if (skb)
81 sch->qstats.backlog -= skb->len;
82 return skb;
83} 36}
84 37
85static unsigned int 38static int pfifo_enqueue(struct sk_buff *skb, struct Qdisc* sch)
86fifo_drop(struct Qdisc* sch)
87{
88 struct sk_buff *skb;
89
90 skb = __skb_dequeue_tail(&sch->q);
91 if (skb) {
92 unsigned int len = skb->len;
93 sch->qstats.backlog -= len;
94 kfree_skb(skb);
95 return len;
96 }
97 return 0;
98}
99
100static void
101fifo_reset(struct Qdisc* sch)
102{
103 skb_queue_purge(&sch->q);
104 sch->qstats.backlog = 0;
105}
106
107static int
108pfifo_enqueue(struct sk_buff *skb, struct Qdisc* sch)
109{ 39{
110 struct fifo_sched_data *q = qdisc_priv(sch); 40 struct fifo_sched_data *q = qdisc_priv(sch);
111 41
112 if (sch->q.qlen < q->limit) { 42 if (likely(skb_queue_len(&sch->q) < q->limit))
113 __skb_queue_tail(&sch->q, skb); 43 return qdisc_enqueue_tail(skb, sch);
114 sch->bstats.bytes += skb->len;
115 sch->bstats.packets++;
116 return 0;
117 }
118 sch->qstats.drops++;
119#ifdef CONFIG_NET_CLS_POLICE
120 if (sch->reshape_fail==NULL || sch->reshape_fail(skb, sch))
121#endif
122 kfree_skb(skb);
123 return NET_XMIT_DROP;
124}
125
126static int
127pfifo_requeue(struct sk_buff *skb, struct Qdisc* sch)
128{
129 __skb_queue_head(&sch->q, skb);
130 sch->qstats.requeues++;
131 return 0;
132}
133
134 44
135static struct sk_buff * 45 return qdisc_reshape_fail(skb, sch);
136pfifo_dequeue(struct Qdisc* sch)
137{
138 return __skb_dequeue(&sch->q);
139} 46}
140 47
141static int fifo_init(struct Qdisc *sch, struct rtattr *opt) 48static int fifo_init(struct Qdisc *sch, struct rtattr *opt)
@@ -143,66 +50,59 @@ static int fifo_init(struct Qdisc *sch, struct rtattr *opt)
143 struct fifo_sched_data *q = qdisc_priv(sch); 50 struct fifo_sched_data *q = qdisc_priv(sch);
144 51
145 if (opt == NULL) { 52 if (opt == NULL) {
146 unsigned int limit = sch->dev->tx_queue_len ? : 1; 53 u32 limit = sch->dev->tx_queue_len ? : 1;
147 54
148 if (sch->ops == &bfifo_qdisc_ops) 55 if (sch->ops == &bfifo_qdisc_ops)
149 q->limit = limit*sch->dev->mtu; 56 limit *= sch->dev->mtu;
150 else 57
151 q->limit = limit; 58 q->limit = limit;
152 } else { 59 } else {
153 struct tc_fifo_qopt *ctl = RTA_DATA(opt); 60 struct tc_fifo_qopt *ctl = RTA_DATA(opt);
154 if (opt->rta_len < RTA_LENGTH(sizeof(*ctl))) 61
62 if (RTA_PAYLOAD(opt) < sizeof(*ctl))
155 return -EINVAL; 63 return -EINVAL;
64
156 q->limit = ctl->limit; 65 q->limit = ctl->limit;
157 } 66 }
67
158 return 0; 68 return 0;
159} 69}
160 70
161static int fifo_dump(struct Qdisc *sch, struct sk_buff *skb) 71static int fifo_dump(struct Qdisc *sch, struct sk_buff *skb)
162{ 72{
163 struct fifo_sched_data *q = qdisc_priv(sch); 73 struct fifo_sched_data *q = qdisc_priv(sch);
164 unsigned char *b = skb->tail; 74 struct tc_fifo_qopt opt = { .limit = q->limit };
165 struct tc_fifo_qopt opt;
166 75
167 opt.limit = q->limit;
168 RTA_PUT(skb, TCA_OPTIONS, sizeof(opt), &opt); 76 RTA_PUT(skb, TCA_OPTIONS, sizeof(opt), &opt);
169
170 return skb->len; 77 return skb->len;
171 78
172rtattr_failure: 79rtattr_failure:
173 skb_trim(skb, b - skb->data);
174 return -1; 80 return -1;
175} 81}
176 82
177struct Qdisc_ops pfifo_qdisc_ops = { 83struct Qdisc_ops pfifo_qdisc_ops = {
178 .next = NULL,
179 .cl_ops = NULL,
180 .id = "pfifo", 84 .id = "pfifo",
181 .priv_size = sizeof(struct fifo_sched_data), 85 .priv_size = sizeof(struct fifo_sched_data),
182 .enqueue = pfifo_enqueue, 86 .enqueue = pfifo_enqueue,
183 .dequeue = pfifo_dequeue, 87 .dequeue = qdisc_dequeue_head,
184 .requeue = pfifo_requeue, 88 .requeue = qdisc_requeue,
185 .drop = fifo_drop, 89 .drop = qdisc_queue_drop,
186 .init = fifo_init, 90 .init = fifo_init,
187 .reset = fifo_reset, 91 .reset = qdisc_reset_queue,
188 .destroy = NULL,
189 .change = fifo_init, 92 .change = fifo_init,
190 .dump = fifo_dump, 93 .dump = fifo_dump,
191 .owner = THIS_MODULE, 94 .owner = THIS_MODULE,
192}; 95};
193 96
194struct Qdisc_ops bfifo_qdisc_ops = { 97struct Qdisc_ops bfifo_qdisc_ops = {
195 .next = NULL,
196 .cl_ops = NULL,
197 .id = "bfifo", 98 .id = "bfifo",
198 .priv_size = sizeof(struct fifo_sched_data), 99 .priv_size = sizeof(struct fifo_sched_data),
199 .enqueue = bfifo_enqueue, 100 .enqueue = bfifo_enqueue,
200 .dequeue = bfifo_dequeue, 101 .dequeue = qdisc_dequeue_head,
201 .requeue = bfifo_requeue, 102 .requeue = qdisc_requeue,
202 .drop = fifo_drop, 103 .drop = qdisc_queue_drop,
203 .init = fifo_init, 104 .init = fifo_init,
204 .reset = fifo_reset, 105 .reset = qdisc_reset_queue,
205 .destroy = NULL,
206 .change = fifo_init, 106 .change = fifo_init,
207 .dump = fifo_dump, 107 .dump = fifo_dump,
208 .owner = THIS_MODULE, 108 .owner = THIS_MODULE,
diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c
index 87e48a4e1051..7683b34dc6a9 100644
--- a/net/sched/sch_generic.c
+++ b/net/sched/sch_generic.c
@@ -243,31 +243,27 @@ static void dev_watchdog_down(struct net_device *dev)
243 cheaper. 243 cheaper.
244 */ 244 */
245 245
246static int 246static int noop_enqueue(struct sk_buff *skb, struct Qdisc * qdisc)
247noop_enqueue(struct sk_buff *skb, struct Qdisc * qdisc)
248{ 247{
249 kfree_skb(skb); 248 kfree_skb(skb);
250 return NET_XMIT_CN; 249 return NET_XMIT_CN;
251} 250}
252 251
253static struct sk_buff * 252static struct sk_buff *noop_dequeue(struct Qdisc * qdisc)
254noop_dequeue(struct Qdisc * qdisc)
255{ 253{
256 return NULL; 254 return NULL;
257} 255}
258 256
259static int 257static int noop_requeue(struct sk_buff *skb, struct Qdisc* qdisc)
260noop_requeue(struct sk_buff *skb, struct Qdisc* qdisc)
261{ 258{
262 if (net_ratelimit()) 259 if (net_ratelimit())
263 printk(KERN_DEBUG "%s deferred output. It is buggy.\n", skb->dev->name); 260 printk(KERN_DEBUG "%s deferred output. It is buggy.\n",
261 skb->dev->name);
264 kfree_skb(skb); 262 kfree_skb(skb);
265 return NET_XMIT_CN; 263 return NET_XMIT_CN;
266} 264}
267 265
268struct Qdisc_ops noop_qdisc_ops = { 266struct Qdisc_ops noop_qdisc_ops = {
269 .next = NULL,
270 .cl_ops = NULL,
271 .id = "noop", 267 .id = "noop",
272 .priv_size = 0, 268 .priv_size = 0,
273 .enqueue = noop_enqueue, 269 .enqueue = noop_enqueue,
@@ -285,8 +281,6 @@ struct Qdisc noop_qdisc = {
285}; 281};
286 282
287static struct Qdisc_ops noqueue_qdisc_ops = { 283static struct Qdisc_ops noqueue_qdisc_ops = {
288 .next = NULL,
289 .cl_ops = NULL,
290 .id = "noqueue", 284 .id = "noqueue",
291 .priv_size = 0, 285 .priv_size = 0,
292 .enqueue = noop_enqueue, 286 .enqueue = noop_enqueue,
@@ -311,97 +305,87 @@ static const u8 prio2band[TC_PRIO_MAX+1] =
311 generic prio+fifo combination. 305 generic prio+fifo combination.
312 */ 306 */
313 307
314static int 308#define PFIFO_FAST_BANDS 3
315pfifo_fast_enqueue(struct sk_buff *skb, struct Qdisc* qdisc) 309
310static inline struct sk_buff_head *prio2list(struct sk_buff *skb,
311 struct Qdisc *qdisc)
316{ 312{
317 struct sk_buff_head *list = qdisc_priv(qdisc); 313 struct sk_buff_head *list = qdisc_priv(qdisc);
314 return list + prio2band[skb->priority & TC_PRIO_MAX];
315}
318 316
319 list += prio2band[skb->priority&TC_PRIO_MAX]; 317static int pfifo_fast_enqueue(struct sk_buff *skb, struct Qdisc* qdisc)
318{
319 struct sk_buff_head *list = prio2list(skb, qdisc);
320 320
321 if (list->qlen < qdisc->dev->tx_queue_len) { 321 if (skb_queue_len(list) < qdisc->dev->tx_queue_len) {
322 __skb_queue_tail(list, skb);
323 qdisc->q.qlen++; 322 qdisc->q.qlen++;
324 qdisc->bstats.bytes += skb->len; 323 return __qdisc_enqueue_tail(skb, qdisc, list);
325 qdisc->bstats.packets++;
326 return 0;
327 } 324 }
328 qdisc->qstats.drops++; 325
329 kfree_skb(skb); 326 return qdisc_drop(skb, qdisc);
330 return NET_XMIT_DROP;
331} 327}
332 328
333static struct sk_buff * 329static struct sk_buff *pfifo_fast_dequeue(struct Qdisc* qdisc)
334pfifo_fast_dequeue(struct Qdisc* qdisc)
335{ 330{
336 int prio; 331 int prio;
337 struct sk_buff_head *list = qdisc_priv(qdisc); 332 struct sk_buff_head *list = qdisc_priv(qdisc);
338 struct sk_buff *skb;
339 333
340 for (prio = 0; prio < 3; prio++, list++) { 334 for (prio = 0; prio < PFIFO_FAST_BANDS; prio++, list++) {
341 skb = __skb_dequeue(list); 335 struct sk_buff *skb = __qdisc_dequeue_head(qdisc, list);
342 if (skb) { 336 if (skb) {
343 qdisc->q.qlen--; 337 qdisc->q.qlen--;
344 return skb; 338 return skb;
345 } 339 }
346 } 340 }
341
347 return NULL; 342 return NULL;
348} 343}
349 344
350static int 345static int pfifo_fast_requeue(struct sk_buff *skb, struct Qdisc* qdisc)
351pfifo_fast_requeue(struct sk_buff *skb, struct Qdisc* qdisc)
352{ 346{
353 struct sk_buff_head *list = qdisc_priv(qdisc);
354
355 list += prio2band[skb->priority&TC_PRIO_MAX];
356
357 __skb_queue_head(list, skb);
358 qdisc->q.qlen++; 347 qdisc->q.qlen++;
359 qdisc->qstats.requeues++; 348 return __qdisc_requeue(skb, qdisc, prio2list(skb, qdisc));
360 return 0;
361} 349}
362 350
363static void 351static void pfifo_fast_reset(struct Qdisc* qdisc)
364pfifo_fast_reset(struct Qdisc* qdisc)
365{ 352{
366 int prio; 353 int prio;
367 struct sk_buff_head *list = qdisc_priv(qdisc); 354 struct sk_buff_head *list = qdisc_priv(qdisc);
368 355
369 for (prio=0; prio < 3; prio++) 356 for (prio = 0; prio < PFIFO_FAST_BANDS; prio++)
370 skb_queue_purge(list+prio); 357 __qdisc_reset_queue(qdisc, list + prio);
358
359 qdisc->qstats.backlog = 0;
371 qdisc->q.qlen = 0; 360 qdisc->q.qlen = 0;
372} 361}
373 362
374static int pfifo_fast_dump(struct Qdisc *qdisc, struct sk_buff *skb) 363static int pfifo_fast_dump(struct Qdisc *qdisc, struct sk_buff *skb)
375{ 364{
376 unsigned char *b = skb->tail; 365 struct tc_prio_qopt opt = { .bands = PFIFO_FAST_BANDS };
377 struct tc_prio_qopt opt;
378 366
379 opt.bands = 3;
380 memcpy(&opt.priomap, prio2band, TC_PRIO_MAX+1); 367 memcpy(&opt.priomap, prio2band, TC_PRIO_MAX+1);
381 RTA_PUT(skb, TCA_OPTIONS, sizeof(opt), &opt); 368 RTA_PUT(skb, TCA_OPTIONS, sizeof(opt), &opt);
382 return skb->len; 369 return skb->len;
383 370
384rtattr_failure: 371rtattr_failure:
385 skb_trim(skb, b - skb->data);
386 return -1; 372 return -1;
387} 373}
388 374
389static int pfifo_fast_init(struct Qdisc *qdisc, struct rtattr *opt) 375static int pfifo_fast_init(struct Qdisc *qdisc, struct rtattr *opt)
390{ 376{
391 int i; 377 int prio;
392 struct sk_buff_head *list = qdisc_priv(qdisc); 378 struct sk_buff_head *list = qdisc_priv(qdisc);
393 379
394 for (i=0; i<3; i++) 380 for (prio = 0; prio < PFIFO_FAST_BANDS; prio++)
395 skb_queue_head_init(list+i); 381 skb_queue_head_init(list + prio);
396 382
397 return 0; 383 return 0;
398} 384}
399 385
400static struct Qdisc_ops pfifo_fast_ops = { 386static struct Qdisc_ops pfifo_fast_ops = {
401 .next = NULL,
402 .cl_ops = NULL,
403 .id = "pfifo_fast", 387 .id = "pfifo_fast",
404 .priv_size = 3 * sizeof(struct sk_buff_head), 388 .priv_size = PFIFO_FAST_BANDS * sizeof(struct sk_buff_head),
405 .enqueue = pfifo_fast_enqueue, 389 .enqueue = pfifo_fast_enqueue,
406 .dequeue = pfifo_fast_dequeue, 390 .dequeue = pfifo_fast_dequeue,
407 .requeue = pfifo_fast_requeue, 391 .requeue = pfifo_fast_requeue,
diff --git a/net/sctp/input.c b/net/sctp/input.c
index b719a77d66b4..fffc880a646d 100644
--- a/net/sctp/input.c
+++ b/net/sctp/input.c
@@ -178,6 +178,37 @@ int sctp_rcv(struct sk_buff *skb)
178 178
179 asoc = __sctp_rcv_lookup(skb, &src, &dest, &transport); 179 asoc = __sctp_rcv_lookup(skb, &src, &dest, &transport);
180 180
181 if (!asoc)
182 ep = __sctp_rcv_lookup_endpoint(&dest);
183
184 /* Retrieve the common input handling substructure. */
185 rcvr = asoc ? &asoc->base : &ep->base;
186 sk = rcvr->sk;
187
188 /*
189 * If a frame arrives on an interface and the receiving socket is
190 * bound to another interface, via SO_BINDTODEVICE, treat it as OOTB
191 */
192 if (sk->sk_bound_dev_if && (sk->sk_bound_dev_if != af->skb_iif(skb)))
193 {
194 sock_put(sk);
195 if (asoc) {
196 sctp_association_put(asoc);
197 asoc = NULL;
198 } else {
199 sctp_endpoint_put(ep);
200 ep = NULL;
201 }
202 sk = sctp_get_ctl_sock();
203 ep = sctp_sk(sk)->ep;
204 sctp_endpoint_hold(ep);
205 sock_hold(sk);
206 rcvr = &ep->base;
207 }
208
209 if (atomic_read(&sk->sk_rmem_alloc) >= sk->sk_rcvbuf)
210 goto discard_release;
211
181 /* 212 /*
182 * RFC 2960, 8.4 - Handle "Out of the blue" Packets. 213 * RFC 2960, 8.4 - Handle "Out of the blue" Packets.
183 * An SCTP packet is called an "out of the blue" (OOTB) 214 * An SCTP packet is called an "out of the blue" (OOTB)
@@ -187,22 +218,12 @@ int sctp_rcv(struct sk_buff *skb)
187 * packet belongs. 218 * packet belongs.
188 */ 219 */
189 if (!asoc) { 220 if (!asoc) {
190 ep = __sctp_rcv_lookup_endpoint(&dest);
191 if (sctp_rcv_ootb(skb)) { 221 if (sctp_rcv_ootb(skb)) {
192 SCTP_INC_STATS_BH(SCTP_MIB_OUTOFBLUES); 222 SCTP_INC_STATS_BH(SCTP_MIB_OUTOFBLUES);
193 goto discard_release; 223 goto discard_release;
194 } 224 }
195 } 225 }
196 226
197 /* Retrieve the common input handling substructure. */
198 rcvr = asoc ? &asoc->base : &ep->base;
199 sk = rcvr->sk;
200
201 if ((sk) && (atomic_read(&sk->sk_rmem_alloc) >= sk->sk_rcvbuf)) {
202 goto discard_release;
203 }
204
205
206 /* SCTP seems to always need a timestamp right now (FIXME) */ 227 /* SCTP seems to always need a timestamp right now (FIXME) */
207 if (skb->stamp.tv_sec == 0) { 228 if (skb->stamp.tv_sec == 0) {
208 do_gettimeofday(&skb->stamp); 229 do_gettimeofday(&skb->stamp);
@@ -265,13 +286,11 @@ discard_it:
265 286
266discard_release: 287discard_release:
267 /* Release any structures we may be holding. */ 288 /* Release any structures we may be holding. */
268 if (asoc) { 289 sock_put(sk);
269 sock_put(asoc->base.sk); 290 if (asoc)
270 sctp_association_put(asoc); 291 sctp_association_put(asoc);
271 } else { 292 else
272 sock_put(ep->base.sk);
273 sctp_endpoint_put(ep); 293 sctp_endpoint_put(ep);
274 }
275 294
276 goto discard_it; 295 goto discard_it;
277} 296}
diff --git a/net/sctp/ipv6.c b/net/sctp/ipv6.c
index c9d9ea064734..c7e42d125b9c 100644
--- a/net/sctp/ipv6.c
+++ b/net/sctp/ipv6.c
@@ -812,26 +812,23 @@ static int sctp_inet6_bind_verify(struct sctp_sock *opt, union sctp_addr *addr)
812 if (addr->sa.sa_family != AF_INET6) 812 if (addr->sa.sa_family != AF_INET6)
813 af = sctp_get_af_specific(addr->sa.sa_family); 813 af = sctp_get_af_specific(addr->sa.sa_family);
814 else { 814 else {
815 struct sock *sk;
816 int type = ipv6_addr_type(&addr->v6.sin6_addr); 815 int type = ipv6_addr_type(&addr->v6.sin6_addr);
817 sk = sctp_opt2sk(opt); 816 struct net_device *dev;
817
818 if (type & IPV6_ADDR_LINKLOCAL) { 818 if (type & IPV6_ADDR_LINKLOCAL) {
819 /* Note: Behavior similar to af_inet6.c: 819 if (!addr->v6.sin6_scope_id)
820 * 1) Overrides previous bound_dev_if 820 return 0;
821 * 2) Destructive even if bind isn't successful. 821 dev = dev_get_by_index(addr->v6.sin6_scope_id);
822 */ 822 if (!dev)
823
824 if (addr->v6.sin6_scope_id)
825 sk->sk_bound_dev_if = addr->v6.sin6_scope_id;
826 if (!sk->sk_bound_dev_if)
827 return 0; 823 return 0;
824 dev_put(dev);
828 } 825 }
829 af = opt->pf->af; 826 af = opt->pf->af;
830 } 827 }
831 return af->available(addr, opt); 828 return af->available(addr, opt);
832} 829}
833 830
834/* Verify that the provided sockaddr looks bindable. Common verification, 831/* Verify that the provided sockaddr looks sendable. Common verification,
835 * has already been taken care of. 832 * has already been taken care of.
836 */ 833 */
837static int sctp_inet6_send_verify(struct sctp_sock *opt, union sctp_addr *addr) 834static int sctp_inet6_send_verify(struct sctp_sock *opt, union sctp_addr *addr)
@@ -842,19 +839,16 @@ static int sctp_inet6_send_verify(struct sctp_sock *opt, union sctp_addr *addr)
842 if (addr->sa.sa_family != AF_INET6) 839 if (addr->sa.sa_family != AF_INET6)
843 af = sctp_get_af_specific(addr->sa.sa_family); 840 af = sctp_get_af_specific(addr->sa.sa_family);
844 else { 841 else {
845 struct sock *sk;
846 int type = ipv6_addr_type(&addr->v6.sin6_addr); 842 int type = ipv6_addr_type(&addr->v6.sin6_addr);
847 sk = sctp_opt2sk(opt); 843 struct net_device *dev;
844
848 if (type & IPV6_ADDR_LINKLOCAL) { 845 if (type & IPV6_ADDR_LINKLOCAL) {
849 /* Note: Behavior similar to af_inet6.c: 846 if (!addr->v6.sin6_scope_id)
850 * 1) Overrides previous bound_dev_if 847 return 0;
851 * 2) Destructive even if bind isn't successful. 848 dev = dev_get_by_index(addr->v6.sin6_scope_id);
852 */ 849 if (!dev)
853
854 if (addr->v6.sin6_scope_id)
855 sk->sk_bound_dev_if = addr->v6.sin6_scope_id;
856 if (!sk->sk_bound_dev_if)
857 return 0; 850 return 0;
851 dev_put(dev);
858 } 852 }
859 af = opt->pf->af; 853 af = opt->pf->af;
860 } 854 }
diff --git a/net/sctp/proc.c b/net/sctp/proc.c
index e42fd8c2916b..98d49ec9b74b 100644
--- a/net/sctp/proc.c
+++ b/net/sctp/proc.c
@@ -132,14 +132,25 @@ void sctp_snmp_proc_exit(void)
132static void sctp_seq_dump_local_addrs(struct seq_file *seq, struct sctp_ep_common *epb) 132static void sctp_seq_dump_local_addrs(struct seq_file *seq, struct sctp_ep_common *epb)
133{ 133{
134 struct list_head *pos; 134 struct list_head *pos;
135 struct sctp_association *asoc;
135 struct sctp_sockaddr_entry *laddr; 136 struct sctp_sockaddr_entry *laddr;
136 union sctp_addr *addr; 137 struct sctp_transport *peer;
138 union sctp_addr *addr, *primary = NULL;
137 struct sctp_af *af; 139 struct sctp_af *af;
138 140
141 if (epb->type == SCTP_EP_TYPE_ASSOCIATION) {
142 asoc = sctp_assoc(epb);
143 peer = asoc->peer.primary_path;
144 primary = &peer->saddr;
145 }
146
139 list_for_each(pos, &epb->bind_addr.address_list) { 147 list_for_each(pos, &epb->bind_addr.address_list) {
140 laddr = list_entry(pos, struct sctp_sockaddr_entry, list); 148 laddr = list_entry(pos, struct sctp_sockaddr_entry, list);
141 addr = (union sctp_addr *)&laddr->a; 149 addr = (union sctp_addr *)&laddr->a;
142 af = sctp_get_af_specific(addr->sa.sa_family); 150 af = sctp_get_af_specific(addr->sa.sa_family);
151 if (primary && af->cmp_addr(addr, primary)) {
152 seq_printf(seq, "*");
153 }
143 af->seq_dump_addr(seq, addr); 154 af->seq_dump_addr(seq, addr);
144 } 155 }
145} 156}
@@ -149,17 +160,54 @@ static void sctp_seq_dump_remote_addrs(struct seq_file *seq, struct sctp_associa
149{ 160{
150 struct list_head *pos; 161 struct list_head *pos;
151 struct sctp_transport *transport; 162 struct sctp_transport *transport;
152 union sctp_addr *addr; 163 union sctp_addr *addr, *primary;
153 struct sctp_af *af; 164 struct sctp_af *af;
154 165
166 primary = &(assoc->peer.primary_addr);
155 list_for_each(pos, &assoc->peer.transport_addr_list) { 167 list_for_each(pos, &assoc->peer.transport_addr_list) {
156 transport = list_entry(pos, struct sctp_transport, transports); 168 transport = list_entry(pos, struct sctp_transport, transports);
157 addr = (union sctp_addr *)&transport->ipaddr; 169 addr = (union sctp_addr *)&transport->ipaddr;
158 af = sctp_get_af_specific(addr->sa.sa_family); 170 af = sctp_get_af_specific(addr->sa.sa_family);
171 if (af->cmp_addr(addr, primary)) {
172 seq_printf(seq, "*");
173 }
159 af->seq_dump_addr(seq, addr); 174 af->seq_dump_addr(seq, addr);
160 } 175 }
161} 176}
162 177
178static void * sctp_eps_seq_start(struct seq_file *seq, loff_t *pos)
179{
180 if (*pos > sctp_ep_hashsize)
181 return NULL;
182
183 if (*pos < 0)
184 *pos = 0;
185
186 if (*pos == 0)
187 seq_printf(seq, " ENDPT SOCK STY SST HBKT LPORT UID INODE LADDRS\n");
188
189 ++*pos;
190
191 return (void *)pos;
192}
193
194static void sctp_eps_seq_stop(struct seq_file *seq, void *v)
195{
196 return;
197}
198
199
200static void * sctp_eps_seq_next(struct seq_file *seq, void *v, loff_t *pos)
201{
202 if (*pos > sctp_ep_hashsize)
203 return NULL;
204
205 ++*pos;
206
207 return pos;
208}
209
210
163/* Display sctp endpoints (/proc/net/sctp/eps). */ 211/* Display sctp endpoints (/proc/net/sctp/eps). */
164static int sctp_eps_seq_show(struct seq_file *seq, void *v) 212static int sctp_eps_seq_show(struct seq_file *seq, void *v)
165{ 213{
@@ -167,38 +215,50 @@ static int sctp_eps_seq_show(struct seq_file *seq, void *v)
167 struct sctp_ep_common *epb; 215 struct sctp_ep_common *epb;
168 struct sctp_endpoint *ep; 216 struct sctp_endpoint *ep;
169 struct sock *sk; 217 struct sock *sk;
170 int hash; 218 int hash = *(int *)v;
171 219
172 seq_printf(seq, " ENDPT SOCK STY SST HBKT LPORT LADDRS\n"); 220 if (hash > sctp_ep_hashsize)
173 for (hash = 0; hash < sctp_ep_hashsize; hash++) { 221 return -ENOMEM;
174 head = &sctp_ep_hashtable[hash]; 222
175 read_lock(&head->lock); 223 head = &sctp_ep_hashtable[hash-1];
176 for (epb = head->chain; epb; epb = epb->next) { 224 sctp_local_bh_disable();
177 ep = sctp_ep(epb); 225 read_lock(&head->lock);
178 sk = epb->sk; 226 for (epb = head->chain; epb; epb = epb->next) {
179 seq_printf(seq, "%8p %8p %-3d %-3d %-4d %-5d ", ep, sk, 227 ep = sctp_ep(epb);
180 sctp_sk(sk)->type, sk->sk_state, hash, 228 sk = epb->sk;
181 epb->bind_addr.port); 229 seq_printf(seq, "%8p %8p %-3d %-3d %-4d %-5d %5d %5lu ", ep, sk,
182 sctp_seq_dump_local_addrs(seq, epb); 230 sctp_sk(sk)->type, sk->sk_state, hash-1,
183 seq_printf(seq, "\n"); 231 epb->bind_addr.port,
184 } 232 sock_i_uid(sk), sock_i_ino(sk));
185 read_unlock(&head->lock); 233
234 sctp_seq_dump_local_addrs(seq, epb);
235 seq_printf(seq, "\n");
186 } 236 }
237 read_unlock(&head->lock);
238 sctp_local_bh_enable();
187 239
188 return 0; 240 return 0;
189} 241}
190 242
243static struct seq_operations sctp_eps_ops = {
244 .start = sctp_eps_seq_start,
245 .next = sctp_eps_seq_next,
246 .stop = sctp_eps_seq_stop,
247 .show = sctp_eps_seq_show,
248};
249
250
191/* Initialize the seq file operations for 'eps' object. */ 251/* Initialize the seq file operations for 'eps' object. */
192static int sctp_eps_seq_open(struct inode *inode, struct file *file) 252static int sctp_eps_seq_open(struct inode *inode, struct file *file)
193{ 253{
194 return single_open(file, sctp_eps_seq_show, NULL); 254 return seq_open(file, &sctp_eps_ops);
195} 255}
196 256
197static struct file_operations sctp_eps_seq_fops = { 257static struct file_operations sctp_eps_seq_fops = {
198 .open = sctp_eps_seq_open, 258 .open = sctp_eps_seq_open,
199 .read = seq_read, 259 .read = seq_read,
200 .llseek = seq_lseek, 260 .llseek = seq_lseek,
201 .release = single_release, 261 .release = seq_release,
202}; 262};
203 263
204/* Set up the proc fs entry for 'eps' object. */ 264/* Set up the proc fs entry for 'eps' object. */
@@ -221,6 +281,40 @@ void sctp_eps_proc_exit(void)
221 remove_proc_entry("eps", proc_net_sctp); 281 remove_proc_entry("eps", proc_net_sctp);
222} 282}
223 283
284
285static void * sctp_assocs_seq_start(struct seq_file *seq, loff_t *pos)
286{
287 if (*pos > sctp_assoc_hashsize)
288 return NULL;
289
290 if (*pos < 0)
291 *pos = 0;
292
293 if (*pos == 0)
294 seq_printf(seq, " ASSOC SOCK STY SST ST HBKT ASSOC-ID TX_QUEUE RX_QUEUE UID INODE LPORT "
295 "RPORT LADDRS <-> RADDRS\n");
296
297 ++*pos;
298
299 return (void *)pos;
300}
301
302static void sctp_assocs_seq_stop(struct seq_file *seq, void *v)
303{
304 return;
305}
306
307
308static void * sctp_assocs_seq_next(struct seq_file *seq, void *v, loff_t *pos)
309{
310 if (*pos > sctp_assoc_hashsize)
311 return NULL;
312
313 ++*pos;
314
315 return pos;
316}
317
224/* Display sctp associations (/proc/net/sctp/assocs). */ 318/* Display sctp associations (/proc/net/sctp/assocs). */
225static int sctp_assocs_seq_show(struct seq_file *seq, void *v) 319static int sctp_assocs_seq_show(struct seq_file *seq, void *v)
226{ 320{
@@ -228,43 +322,57 @@ static int sctp_assocs_seq_show(struct seq_file *seq, void *v)
228 struct sctp_ep_common *epb; 322 struct sctp_ep_common *epb;
229 struct sctp_association *assoc; 323 struct sctp_association *assoc;
230 struct sock *sk; 324 struct sock *sk;
231 int hash; 325 int hash = *(int *)v;
232 326
233 seq_printf(seq, " ASSOC SOCK STY SST ST HBKT LPORT RPORT " 327 if (hash > sctp_assoc_hashsize)
234 "LADDRS <-> RADDRS\n"); 328 return -ENOMEM;
235 for (hash = 0; hash < sctp_assoc_hashsize; hash++) { 329
236 head = &sctp_assoc_hashtable[hash]; 330 head = &sctp_assoc_hashtable[hash-1];
237 read_lock(&head->lock); 331 sctp_local_bh_disable();
238 for (epb = head->chain; epb; epb = epb->next) { 332 read_lock(&head->lock);
239 assoc = sctp_assoc(epb); 333 for (epb = head->chain; epb; epb = epb->next) {
240 sk = epb->sk; 334 assoc = sctp_assoc(epb);
241 seq_printf(seq, 335 sk = epb->sk;
242 "%8p %8p %-3d %-3d %-2d %-4d %-5d %-5d ", 336 seq_printf(seq,
243 assoc, sk, sctp_sk(sk)->type, sk->sk_state, 337 "%8p %8p %-3d %-3d %-2d %-4d %4d %8d %8d %7d %5lu %-5d %5d ",
244 assoc->state, hash, epb->bind_addr.port, 338 assoc, sk, sctp_sk(sk)->type, sk->sk_state,
245 assoc->peer.port); 339 assoc->state, hash-1, assoc->assoc_id,
246 sctp_seq_dump_local_addrs(seq, epb); 340 (sk->sk_rcvbuf - assoc->rwnd),
247 seq_printf(seq, "<-> "); 341 assoc->sndbuf_used,
248 sctp_seq_dump_remote_addrs(seq, assoc); 342 sock_i_uid(sk), sock_i_ino(sk),
249 seq_printf(seq, "\n"); 343 epb->bind_addr.port,
250 } 344 assoc->peer.port);
251 read_unlock(&head->lock); 345
346 seq_printf(seq, " ");
347 sctp_seq_dump_local_addrs(seq, epb);
348 seq_printf(seq, "<-> ");
349 sctp_seq_dump_remote_addrs(seq, assoc);
350 seq_printf(seq, "\n");
252 } 351 }
352 read_unlock(&head->lock);
353 sctp_local_bh_enable();
253 354
254 return 0; 355 return 0;
255} 356}
256 357
358static struct seq_operations sctp_assoc_ops = {
359 .start = sctp_assocs_seq_start,
360 .next = sctp_assocs_seq_next,
361 .stop = sctp_assocs_seq_stop,
362 .show = sctp_assocs_seq_show,
363};
364
257/* Initialize the seq file operations for 'assocs' object. */ 365/* Initialize the seq file operations for 'assocs' object. */
258static int sctp_assocs_seq_open(struct inode *inode, struct file *file) 366static int sctp_assocs_seq_open(struct inode *inode, struct file *file)
259{ 367{
260 return single_open(file, sctp_assocs_seq_show, NULL); 368 return seq_open(file, &sctp_assoc_ops);
261} 369}
262 370
263static struct file_operations sctp_assocs_seq_fops = { 371static struct file_operations sctp_assocs_seq_fops = {
264 .open = sctp_assocs_seq_open, 372 .open = sctp_assocs_seq_open,
265 .read = seq_read, 373 .read = seq_read,
266 .llseek = seq_lseek, 374 .llseek = seq_lseek,
267 .release = single_release, 375 .release = seq_release,
268}; 376};
269 377
270/* Set up the proc fs entry for 'assocs' object. */ 378/* Set up the proc fs entry for 'assocs' object. */
diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c
index 2e1f9c3556f5..5135e1a25d25 100644
--- a/net/sctp/protocol.c
+++ b/net/sctp/protocol.c
@@ -378,10 +378,13 @@ static int sctp_v4_available(union sctp_addr *addr, struct sctp_sock *sp)
378{ 378{
379 int ret = inet_addr_type(addr->v4.sin_addr.s_addr); 379 int ret = inet_addr_type(addr->v4.sin_addr.s_addr);
380 380
381 /* FIXME: ip_nonlocal_bind sysctl support. */
382 381
383 if (addr->v4.sin_addr.s_addr != INADDR_ANY && ret != RTN_LOCAL) 382 if (addr->v4.sin_addr.s_addr != INADDR_ANY &&
383 ret != RTN_LOCAL &&
384 !sp->inet.freebind &&
385 !sysctl_ip_nonlocal_bind)
384 return 0; 386 return 0;
387
385 return 1; 388 return 1;
386} 389}
387 390
diff --git a/net/sctp/socket.c b/net/sctp/socket.c
index 0b338eca6dc0..e6926cb19420 100644
--- a/net/sctp/socket.c
+++ b/net/sctp/socket.c
@@ -4368,15 +4368,11 @@ static struct sk_buff *sctp_skb_recv_datagram(struct sock *sk, int flags,
4368 * However, this function was corrent in any case. 8) 4368 * However, this function was corrent in any case. 8)
4369 */ 4369 */
4370 if (flags & MSG_PEEK) { 4370 if (flags & MSG_PEEK) {
4371 unsigned long cpu_flags; 4371 spin_lock_bh(&sk->sk_receive_queue.lock);
4372
4373 sctp_spin_lock_irqsave(&sk->sk_receive_queue.lock,
4374 cpu_flags);
4375 skb = skb_peek(&sk->sk_receive_queue); 4372 skb = skb_peek(&sk->sk_receive_queue);
4376 if (skb) 4373 if (skb)
4377 atomic_inc(&skb->users); 4374 atomic_inc(&skb->users);
4378 sctp_spin_unlock_irqrestore(&sk->sk_receive_queue.lock, 4375 spin_unlock_bh(&sk->sk_receive_queue.lock);
4379 cpu_flags);
4380 } else { 4376 } else {
4381 skb = skb_dequeue(&sk->sk_receive_queue); 4377 skb = skb_dequeue(&sk->sk_receive_queue);
4382 } 4378 }
@@ -4686,6 +4682,7 @@ static void sctp_sock_migrate(struct sock *oldsk, struct sock *newsk,
4686 struct sctp_endpoint *newep = newsp->ep; 4682 struct sctp_endpoint *newep = newsp->ep;
4687 struct sk_buff *skb, *tmp; 4683 struct sk_buff *skb, *tmp;
4688 struct sctp_ulpevent *event; 4684 struct sctp_ulpevent *event;
4685 int flags = 0;
4689 4686
4690 /* Migrate socket buffer sizes and all the socket level options to the 4687 /* Migrate socket buffer sizes and all the socket level options to the
4691 * new socket. 4688 * new socket.
@@ -4707,6 +4704,17 @@ static void sctp_sock_migrate(struct sock *oldsk, struct sock *newsk,
4707 sctp_sk(newsk)->bind_hash = pp; 4704 sctp_sk(newsk)->bind_hash = pp;
4708 inet_sk(newsk)->num = inet_sk(oldsk)->num; 4705 inet_sk(newsk)->num = inet_sk(oldsk)->num;
4709 4706
4707 /* Copy the bind_addr list from the original endpoint to the new
4708 * endpoint so that we can handle restarts properly
4709 */
4710 if (assoc->peer.ipv4_address)
4711 flags |= SCTP_ADDR4_PEERSUPP;
4712 if (assoc->peer.ipv6_address)
4713 flags |= SCTP_ADDR6_PEERSUPP;
4714 sctp_bind_addr_copy(&newsp->ep->base.bind_addr,
4715 &oldsp->ep->base.bind_addr,
4716 SCTP_SCOPE_GLOBAL, GFP_KERNEL, flags);
4717
4710 /* Move any messages in the old socket's receive queue that are for the 4718 /* Move any messages in the old socket's receive queue that are for the
4711 * peeled off association to the new socket's receive queue. 4719 * peeled off association to the new socket's receive queue.
4712 */ 4720 */
diff --git a/net/socket.c b/net/socket.c
index cec0cb38b9ce..38729af09461 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -81,6 +81,7 @@
81#include <linux/syscalls.h> 81#include <linux/syscalls.h>
82#include <linux/compat.h> 82#include <linux/compat.h>
83#include <linux/kmod.h> 83#include <linux/kmod.h>
84#include <linux/audit.h>
84 85
85#ifdef CONFIG_NET_RADIO 86#ifdef CONFIG_NET_RADIO
86#include <linux/wireless.h> /* Note : will define WIRELESS_EXT */ 87#include <linux/wireless.h> /* Note : will define WIRELESS_EXT */
@@ -226,7 +227,7 @@ int move_addr_to_kernel(void __user *uaddr, int ulen, void *kaddr)
226 return 0; 227 return 0;
227 if(copy_from_user(kaddr,uaddr,ulen)) 228 if(copy_from_user(kaddr,uaddr,ulen))
228 return -EFAULT; 229 return -EFAULT;
229 return 0; 230 return audit_sockaddr(ulen, kaddr);
230} 231}
231 232
232/** 233/**
@@ -1906,7 +1907,11 @@ asmlinkage long sys_socketcall(int call, unsigned long __user *args)
1906 /* copy_from_user should be SMP safe. */ 1907 /* copy_from_user should be SMP safe. */
1907 if (copy_from_user(a, args, nargs[call])) 1908 if (copy_from_user(a, args, nargs[call]))
1908 return -EFAULT; 1909 return -EFAULT;
1909 1910
1911 err = audit_socketcall(nargs[call]/sizeof(unsigned long), a);
1912 if (err)
1913 return err;
1914
1910 a0=a[0]; 1915 a0=a[0];
1911 a1=a[1]; 1916 a1=a[1];
1912 1917
diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c
index d07f5ce31824..0a4260719a12 100644
--- a/net/xfrm/xfrm_policy.c
+++ b/net/xfrm/xfrm_policy.c
@@ -216,8 +216,8 @@ out:
216 216
217expired: 217expired:
218 read_unlock(&xp->lock); 218 read_unlock(&xp->lock);
219 km_policy_expired(xp, dir, 1); 219 if (!xfrm_policy_delete(xp, dir))
220 xfrm_policy_delete(xp, dir); 220 km_policy_expired(xp, dir, 1);
221 xfrm_pol_put(xp); 221 xfrm_pol_put(xp);
222} 222}
223 223
@@ -555,7 +555,7 @@ static struct xfrm_policy *__xfrm_policy_unlink(struct xfrm_policy *pol,
555 return NULL; 555 return NULL;
556} 556}
557 557
558void xfrm_policy_delete(struct xfrm_policy *pol, int dir) 558int xfrm_policy_delete(struct xfrm_policy *pol, int dir)
559{ 559{
560 write_lock_bh(&xfrm_policy_lock); 560 write_lock_bh(&xfrm_policy_lock);
561 pol = __xfrm_policy_unlink(pol, dir); 561 pol = __xfrm_policy_unlink(pol, dir);
@@ -564,7 +564,9 @@ void xfrm_policy_delete(struct xfrm_policy *pol, int dir)
564 if (dir < XFRM_POLICY_MAX) 564 if (dir < XFRM_POLICY_MAX)
565 atomic_inc(&flow_cache_genid); 565 atomic_inc(&flow_cache_genid);
566 xfrm_policy_kill(pol); 566 xfrm_policy_kill(pol);
567 return 0;
567 } 568 }
569 return -ENOENT;
568} 570}
569 571
570int xfrm_sk_policy_insert(struct sock *sk, int dir, struct xfrm_policy *pol) 572int xfrm_sk_policy_insert(struct sock *sk, int dir, struct xfrm_policy *pol)
diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c
index d11747c2a763..2537f26f097c 100644
--- a/net/xfrm/xfrm_state.c
+++ b/net/xfrm/xfrm_state.c
@@ -50,7 +50,7 @@ static DEFINE_SPINLOCK(xfrm_state_gc_lock);
50 50
51static int xfrm_state_gc_flush_bundles; 51static int xfrm_state_gc_flush_bundles;
52 52
53static void __xfrm_state_delete(struct xfrm_state *x); 53static int __xfrm_state_delete(struct xfrm_state *x);
54 54
55static struct xfrm_state_afinfo *xfrm_state_get_afinfo(unsigned short family); 55static struct xfrm_state_afinfo *xfrm_state_get_afinfo(unsigned short family);
56static void xfrm_state_put_afinfo(struct xfrm_state_afinfo *afinfo); 56static void xfrm_state_put_afinfo(struct xfrm_state_afinfo *afinfo);
@@ -154,6 +154,7 @@ static void xfrm_timer_handler(unsigned long data)
154 next = tmo; 154 next = tmo;
155 } 155 }
156 156
157 x->km.dying = warn;
157 if (warn) 158 if (warn)
158 km_state_expired(x, 0); 159 km_state_expired(x, 0);
159resched: 160resched:
@@ -169,9 +170,8 @@ expired:
169 next = 2; 170 next = 2;
170 goto resched; 171 goto resched;
171 } 172 }
172 if (x->id.spi != 0) 173 if (!__xfrm_state_delete(x) && x->id.spi)
173 km_state_expired(x, 1); 174 km_state_expired(x, 1);
174 __xfrm_state_delete(x);
175 175
176out: 176out:
177 spin_unlock(&x->lock); 177 spin_unlock(&x->lock);
@@ -215,8 +215,10 @@ void __xfrm_state_destroy(struct xfrm_state *x)
215} 215}
216EXPORT_SYMBOL(__xfrm_state_destroy); 216EXPORT_SYMBOL(__xfrm_state_destroy);
217 217
218static void __xfrm_state_delete(struct xfrm_state *x) 218static int __xfrm_state_delete(struct xfrm_state *x)
219{ 219{
220 int err = -ESRCH;
221
220 if (x->km.state != XFRM_STATE_DEAD) { 222 if (x->km.state != XFRM_STATE_DEAD) {
221 x->km.state = XFRM_STATE_DEAD; 223 x->km.state = XFRM_STATE_DEAD;
222 spin_lock(&xfrm_state_lock); 224 spin_lock(&xfrm_state_lock);
@@ -245,14 +247,21 @@ static void __xfrm_state_delete(struct xfrm_state *x)
245 * is what we are dropping here. 247 * is what we are dropping here.
246 */ 248 */
247 atomic_dec(&x->refcnt); 249 atomic_dec(&x->refcnt);
250 err = 0;
248 } 251 }
252
253 return err;
249} 254}
250 255
251void xfrm_state_delete(struct xfrm_state *x) 256int xfrm_state_delete(struct xfrm_state *x)
252{ 257{
258 int err;
259
253 spin_lock_bh(&x->lock); 260 spin_lock_bh(&x->lock);
254 __xfrm_state_delete(x); 261 err = __xfrm_state_delete(x);
255 spin_unlock_bh(&x->lock); 262 spin_unlock_bh(&x->lock);
263
264 return err;
256} 265}
257EXPORT_SYMBOL(xfrm_state_delete); 266EXPORT_SYMBOL(xfrm_state_delete);
258 267
@@ -557,16 +566,18 @@ int xfrm_state_check_expire(struct xfrm_state *x)
557 566
558 if (x->curlft.bytes >= x->lft.hard_byte_limit || 567 if (x->curlft.bytes >= x->lft.hard_byte_limit ||
559 x->curlft.packets >= x->lft.hard_packet_limit) { 568 x->curlft.packets >= x->lft.hard_packet_limit) {
560 km_state_expired(x, 1); 569 x->km.state = XFRM_STATE_EXPIRED;
561 if (!mod_timer(&x->timer, jiffies + XFRM_ACQ_EXPIRES*HZ)) 570 if (!mod_timer(&x->timer, jiffies))
562 xfrm_state_hold(x); 571 xfrm_state_hold(x);
563 return -EINVAL; 572 return -EINVAL;
564 } 573 }
565 574
566 if (!x->km.dying && 575 if (!x->km.dying &&
567 (x->curlft.bytes >= x->lft.soft_byte_limit || 576 (x->curlft.bytes >= x->lft.soft_byte_limit ||
568 x->curlft.packets >= x->lft.soft_packet_limit)) 577 x->curlft.packets >= x->lft.soft_packet_limit)) {
578 x->km.dying = 1;
569 km_state_expired(x, 0); 579 km_state_expired(x, 0);
580 }
570 return 0; 581 return 0;
571} 582}
572EXPORT_SYMBOL(xfrm_state_check_expire); 583EXPORT_SYMBOL(xfrm_state_check_expire);
@@ -796,34 +807,56 @@ EXPORT_SYMBOL(xfrm_replay_advance);
796static struct list_head xfrm_km_list = LIST_HEAD_INIT(xfrm_km_list); 807static struct list_head xfrm_km_list = LIST_HEAD_INIT(xfrm_km_list);
797static DEFINE_RWLOCK(xfrm_km_lock); 808static DEFINE_RWLOCK(xfrm_km_lock);
798 809
799static void km_state_expired(struct xfrm_state *x, int hard) 810void km_policy_notify(struct xfrm_policy *xp, int dir, struct km_event *c)
800{ 811{
801 struct xfrm_mgr *km; 812 struct xfrm_mgr *km;
802 813
803 if (hard) 814 read_lock(&xfrm_km_lock);
804 x->km.state = XFRM_STATE_EXPIRED; 815 list_for_each_entry(km, &xfrm_km_list, list)
805 else 816 if (km->notify_policy)
806 x->km.dying = 1; 817 km->notify_policy(xp, dir, c);
818 read_unlock(&xfrm_km_lock);
819}
807 820
821void km_state_notify(struct xfrm_state *x, struct km_event *c)
822{
823 struct xfrm_mgr *km;
808 read_lock(&xfrm_km_lock); 824 read_lock(&xfrm_km_lock);
809 list_for_each_entry(km, &xfrm_km_list, list) 825 list_for_each_entry(km, &xfrm_km_list, list)
810 km->notify(x, hard); 826 if (km->notify)
827 km->notify(x, c);
811 read_unlock(&xfrm_km_lock); 828 read_unlock(&xfrm_km_lock);
829}
830
831EXPORT_SYMBOL(km_policy_notify);
832EXPORT_SYMBOL(km_state_notify);
833
834static void km_state_expired(struct xfrm_state *x, int hard)
835{
836 struct km_event c;
837
838 c.data.hard = hard;
839 c.event = XFRM_MSG_EXPIRE;
840 km_state_notify(x, &c);
812 841
813 if (hard) 842 if (hard)
814 wake_up(&km_waitq); 843 wake_up(&km_waitq);
815} 844}
816 845
846/*
847 * We send to all registered managers regardless of failure
848 * We are happy with one success
849*/
817static int km_query(struct xfrm_state *x, struct xfrm_tmpl *t, struct xfrm_policy *pol) 850static int km_query(struct xfrm_state *x, struct xfrm_tmpl *t, struct xfrm_policy *pol)
818{ 851{
819 int err = -EINVAL; 852 int err = -EINVAL, acqret;
820 struct xfrm_mgr *km; 853 struct xfrm_mgr *km;
821 854
822 read_lock(&xfrm_km_lock); 855 read_lock(&xfrm_km_lock);
823 list_for_each_entry(km, &xfrm_km_list, list) { 856 list_for_each_entry(km, &xfrm_km_list, list) {
824 err = km->acquire(x, t, pol, XFRM_POLICY_OUT); 857 acqret = km->acquire(x, t, pol, XFRM_POLICY_OUT);
825 if (!err) 858 if (!acqret)
826 break; 859 err = acqret;
827 } 860 }
828 read_unlock(&xfrm_km_lock); 861 read_unlock(&xfrm_km_lock);
829 return err; 862 return err;
@@ -848,13 +881,11 @@ EXPORT_SYMBOL(km_new_mapping);
848 881
849void km_policy_expired(struct xfrm_policy *pol, int dir, int hard) 882void km_policy_expired(struct xfrm_policy *pol, int dir, int hard)
850{ 883{
851 struct xfrm_mgr *km; 884 struct km_event c;
852 885
853 read_lock(&xfrm_km_lock); 886 c.data.hard = hard;
854 list_for_each_entry(km, &xfrm_km_list, list) 887 c.event = XFRM_MSG_POLEXPIRE;
855 if (km->notify_policy) 888 km_policy_notify(pol, dir, &c);
856 km->notify_policy(pol, dir, hard);
857 read_unlock(&xfrm_km_lock);
858 889
859 if (hard) 890 if (hard)
860 wake_up(&km_waitq); 891 wake_up(&km_waitq);
diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c
index 97509011c274..5ce8558eac91 100644
--- a/net/xfrm/xfrm_user.c
+++ b/net/xfrm/xfrm_user.c
@@ -277,6 +277,7 @@ static int xfrm_add_sa(struct sk_buff *skb, struct nlmsghdr *nlh, void **xfrma)
277 struct xfrm_usersa_info *p = NLMSG_DATA(nlh); 277 struct xfrm_usersa_info *p = NLMSG_DATA(nlh);
278 struct xfrm_state *x; 278 struct xfrm_state *x;
279 int err; 279 int err;
280 struct km_event c;
280 281
281 err = verify_newsa_info(p, (struct rtattr **) xfrma); 282 err = verify_newsa_info(p, (struct rtattr **) xfrma);
282 if (err) 283 if (err)
@@ -286,6 +287,7 @@ static int xfrm_add_sa(struct sk_buff *skb, struct nlmsghdr *nlh, void **xfrma)
286 if (!x) 287 if (!x)
287 return err; 288 return err;
288 289
290 xfrm_state_hold(x);
289 if (nlh->nlmsg_type == XFRM_MSG_NEWSA) 291 if (nlh->nlmsg_type == XFRM_MSG_NEWSA)
290 err = xfrm_state_add(x); 292 err = xfrm_state_add(x);
291 else 293 else
@@ -294,14 +296,24 @@ static int xfrm_add_sa(struct sk_buff *skb, struct nlmsghdr *nlh, void **xfrma)
294 if (err < 0) { 296 if (err < 0) {
295 x->km.state = XFRM_STATE_DEAD; 297 x->km.state = XFRM_STATE_DEAD;
296 xfrm_state_put(x); 298 xfrm_state_put(x);
299 goto out;
297 } 300 }
298 301
302 c.seq = nlh->nlmsg_seq;
303 c.pid = nlh->nlmsg_pid;
304 c.event = nlh->nlmsg_type;
305
306 km_state_notify(x, &c);
307out:
308 xfrm_state_put(x);
299 return err; 309 return err;
300} 310}
301 311
302static int xfrm_del_sa(struct sk_buff *skb, struct nlmsghdr *nlh, void **xfrma) 312static int xfrm_del_sa(struct sk_buff *skb, struct nlmsghdr *nlh, void **xfrma)
303{ 313{
304 struct xfrm_state *x; 314 struct xfrm_state *x;
315 int err;
316 struct km_event c;
305 struct xfrm_usersa_id *p = NLMSG_DATA(nlh); 317 struct xfrm_usersa_id *p = NLMSG_DATA(nlh);
306 318
307 x = xfrm_state_lookup(&p->daddr, p->spi, p->proto, p->family); 319 x = xfrm_state_lookup(&p->daddr, p->spi, p->proto, p->family);
@@ -313,10 +325,19 @@ static int xfrm_del_sa(struct sk_buff *skb, struct nlmsghdr *nlh, void **xfrma)
313 return -EPERM; 325 return -EPERM;
314 } 326 }
315 327
316 xfrm_state_delete(x); 328 err = xfrm_state_delete(x);
329 if (err < 0) {
330 xfrm_state_put(x);
331 return err;
332 }
333
334 c.seq = nlh->nlmsg_seq;
335 c.pid = nlh->nlmsg_pid;
336 c.event = nlh->nlmsg_type;
337 km_state_notify(x, &c);
317 xfrm_state_put(x); 338 xfrm_state_put(x);
318 339
319 return 0; 340 return err;
320} 341}
321 342
322static void copy_to_user_state(struct xfrm_state *x, struct xfrm_usersa_info *p) 343static void copy_to_user_state(struct xfrm_state *x, struct xfrm_usersa_info *p)
@@ -681,6 +702,7 @@ static int xfrm_add_policy(struct sk_buff *skb, struct nlmsghdr *nlh, void **xfr
681{ 702{
682 struct xfrm_userpolicy_info *p = NLMSG_DATA(nlh); 703 struct xfrm_userpolicy_info *p = NLMSG_DATA(nlh);
683 struct xfrm_policy *xp; 704 struct xfrm_policy *xp;
705 struct km_event c;
684 int err; 706 int err;
685 int excl; 707 int excl;
686 708
@@ -692,6 +714,10 @@ static int xfrm_add_policy(struct sk_buff *skb, struct nlmsghdr *nlh, void **xfr
692 if (!xp) 714 if (!xp)
693 return err; 715 return err;
694 716
717 /* shouldnt excl be based on nlh flags??
718 * Aha! this is anti-netlink really i.e more pfkey derived
719 * in netlink excl is a flag and you wouldnt need
720 * a type XFRM_MSG_UPDPOLICY - JHS */
695 excl = nlh->nlmsg_type == XFRM_MSG_NEWPOLICY; 721 excl = nlh->nlmsg_type == XFRM_MSG_NEWPOLICY;
696 err = xfrm_policy_insert(p->dir, xp, excl); 722 err = xfrm_policy_insert(p->dir, xp, excl);
697 if (err) { 723 if (err) {
@@ -699,6 +725,11 @@ static int xfrm_add_policy(struct sk_buff *skb, struct nlmsghdr *nlh, void **xfr
699 return err; 725 return err;
700 } 726 }
701 727
728 c.event = nlh->nlmsg_type;
729 c.seq = nlh->nlmsg_seq;
730 c.pid = nlh->nlmsg_pid;
731 km_policy_notify(xp, p->dir, &c);
732
702 xfrm_pol_put(xp); 733 xfrm_pol_put(xp);
703 734
704 return 0; 735 return 0;
@@ -816,6 +847,7 @@ static int xfrm_get_policy(struct sk_buff *skb, struct nlmsghdr *nlh, void **xfr
816 struct xfrm_policy *xp; 847 struct xfrm_policy *xp;
817 struct xfrm_userpolicy_id *p; 848 struct xfrm_userpolicy_id *p;
818 int err; 849 int err;
850 struct km_event c;
819 int delete; 851 int delete;
820 852
821 p = NLMSG_DATA(nlh); 853 p = NLMSG_DATA(nlh);
@@ -843,6 +875,12 @@ static int xfrm_get_policy(struct sk_buff *skb, struct nlmsghdr *nlh, void **xfr
843 NETLINK_CB(skb).pid, 875 NETLINK_CB(skb).pid,
844 MSG_DONTWAIT); 876 MSG_DONTWAIT);
845 } 877 }
878 } else {
879 c.data.byid = p->index;
880 c.event = nlh->nlmsg_type;
881 c.seq = nlh->nlmsg_seq;
882 c.pid = nlh->nlmsg_pid;
883 km_policy_notify(xp, p->dir, &c);
846 } 884 }
847 885
848 xfrm_pol_put(xp); 886 xfrm_pol_put(xp);
@@ -852,15 +890,28 @@ static int xfrm_get_policy(struct sk_buff *skb, struct nlmsghdr *nlh, void **xfr
852 890
853static int xfrm_flush_sa(struct sk_buff *skb, struct nlmsghdr *nlh, void **xfrma) 891static int xfrm_flush_sa(struct sk_buff *skb, struct nlmsghdr *nlh, void **xfrma)
854{ 892{
893 struct km_event c;
855 struct xfrm_usersa_flush *p = NLMSG_DATA(nlh); 894 struct xfrm_usersa_flush *p = NLMSG_DATA(nlh);
856 895
857 xfrm_state_flush(p->proto); 896 xfrm_state_flush(p->proto);
897 c.data.proto = p->proto;
898 c.event = nlh->nlmsg_type;
899 c.seq = nlh->nlmsg_seq;
900 c.pid = nlh->nlmsg_pid;
901 km_state_notify(NULL, &c);
902
858 return 0; 903 return 0;
859} 904}
860 905
861static int xfrm_flush_policy(struct sk_buff *skb, struct nlmsghdr *nlh, void **xfrma) 906static int xfrm_flush_policy(struct sk_buff *skb, struct nlmsghdr *nlh, void **xfrma)
862{ 907{
908 struct km_event c;
909
863 xfrm_policy_flush(); 910 xfrm_policy_flush();
911 c.event = nlh->nlmsg_type;
912 c.seq = nlh->nlmsg_seq;
913 c.pid = nlh->nlmsg_pid;
914 km_policy_notify(NULL, 0, &c);
864 return 0; 915 return 0;
865} 916}
866 917
@@ -1069,15 +1120,16 @@ nlmsg_failure:
1069 return -1; 1120 return -1;
1070} 1121}
1071 1122
1072static int xfrm_send_state_notify(struct xfrm_state *x, int hard) 1123static int xfrm_exp_state_notify(struct xfrm_state *x, struct km_event *c)
1073{ 1124{
1074 struct sk_buff *skb; 1125 struct sk_buff *skb;
1126 int len = NLMSG_LENGTH(sizeof(struct xfrm_user_expire));
1075 1127
1076 skb = alloc_skb(sizeof(struct xfrm_user_expire) + 16, GFP_ATOMIC); 1128 skb = alloc_skb(len, GFP_ATOMIC);
1077 if (skb == NULL) 1129 if (skb == NULL)
1078 return -ENOMEM; 1130 return -ENOMEM;
1079 1131
1080 if (build_expire(skb, x, hard) < 0) 1132 if (build_expire(skb, x, c->data.hard) < 0)
1081 BUG(); 1133 BUG();
1082 1134
1083 NETLINK_CB(skb).dst_groups = XFRMGRP_EXPIRE; 1135 NETLINK_CB(skb).dst_groups = XFRMGRP_EXPIRE;
@@ -1085,6 +1137,131 @@ static int xfrm_send_state_notify(struct xfrm_state *x, int hard)
1085 return netlink_broadcast(xfrm_nl, skb, 0, XFRMGRP_EXPIRE, GFP_ATOMIC); 1137 return netlink_broadcast(xfrm_nl, skb, 0, XFRMGRP_EXPIRE, GFP_ATOMIC);
1086} 1138}
1087 1139
1140static int xfrm_notify_sa_flush(struct km_event *c)
1141{
1142 struct xfrm_usersa_flush *p;
1143 struct nlmsghdr *nlh;
1144 struct sk_buff *skb;
1145 unsigned char *b;
1146 int len = NLMSG_LENGTH(sizeof(struct xfrm_usersa_flush));
1147
1148 skb = alloc_skb(len, GFP_ATOMIC);
1149 if (skb == NULL)
1150 return -ENOMEM;
1151 b = skb->tail;
1152
1153 nlh = NLMSG_PUT(skb, c->pid, c->seq,
1154 XFRM_MSG_FLUSHSA, sizeof(*p));
1155 nlh->nlmsg_flags = 0;
1156
1157 p = NLMSG_DATA(nlh);
1158 p->proto = c->data.proto;
1159
1160 nlh->nlmsg_len = skb->tail - b;
1161
1162 return netlink_broadcast(xfrm_nl, skb, 0, XFRMGRP_SA, GFP_ATOMIC);
1163
1164nlmsg_failure:
1165 kfree_skb(skb);
1166 return -1;
1167}
1168
1169static int inline xfrm_sa_len(struct xfrm_state *x)
1170{
1171 int l = 0;
1172 if (x->aalg)
1173 l += RTA_SPACE(sizeof(*x->aalg) + (x->aalg->alg_key_len+7)/8);
1174 if (x->ealg)
1175 l += RTA_SPACE(sizeof(*x->ealg) + (x->ealg->alg_key_len+7)/8);
1176 if (x->calg)
1177 l += RTA_SPACE(sizeof(*x->calg));
1178 if (x->encap)
1179 l += RTA_SPACE(sizeof(*x->encap));
1180
1181 return l;
1182}
1183
1184static int xfrm_notify_sa(struct xfrm_state *x, struct km_event *c)
1185{
1186 struct xfrm_usersa_info *p;
1187 struct xfrm_usersa_id *id;
1188 struct nlmsghdr *nlh;
1189 struct sk_buff *skb;
1190 unsigned char *b;
1191 int len = xfrm_sa_len(x);
1192 int headlen;
1193
1194 headlen = sizeof(*p);
1195 if (c->event == XFRM_MSG_DELSA) {
1196 len += RTA_SPACE(headlen);
1197 headlen = sizeof(*id);
1198 }
1199 len += NLMSG_SPACE(headlen);
1200
1201 skb = alloc_skb(len, GFP_ATOMIC);
1202 if (skb == NULL)
1203 return -ENOMEM;
1204 b = skb->tail;
1205
1206 nlh = NLMSG_PUT(skb, c->pid, c->seq, c->event, headlen);
1207 nlh->nlmsg_flags = 0;
1208
1209 p = NLMSG_DATA(nlh);
1210 if (c->event == XFRM_MSG_DELSA) {
1211 id = NLMSG_DATA(nlh);
1212 memcpy(&id->daddr, &x->id.daddr, sizeof(id->daddr));
1213 id->spi = x->id.spi;
1214 id->family = x->props.family;
1215 id->proto = x->id.proto;
1216
1217 p = RTA_DATA(__RTA_PUT(skb, XFRMA_SA, sizeof(*p)));
1218 }
1219
1220 copy_to_user_state(x, p);
1221
1222 if (x->aalg)
1223 RTA_PUT(skb, XFRMA_ALG_AUTH,
1224 sizeof(*(x->aalg))+(x->aalg->alg_key_len+7)/8, x->aalg);
1225 if (x->ealg)
1226 RTA_PUT(skb, XFRMA_ALG_CRYPT,
1227 sizeof(*(x->ealg))+(x->ealg->alg_key_len+7)/8, x->ealg);
1228 if (x->calg)
1229 RTA_PUT(skb, XFRMA_ALG_COMP, sizeof(*(x->calg)), x->calg);
1230
1231 if (x->encap)
1232 RTA_PUT(skb, XFRMA_ENCAP, sizeof(*x->encap), x->encap);
1233
1234 nlh->nlmsg_len = skb->tail - b;
1235
1236 return netlink_broadcast(xfrm_nl, skb, 0, XFRMGRP_SA, GFP_ATOMIC);
1237
1238nlmsg_failure:
1239rtattr_failure:
1240 kfree_skb(skb);
1241 return -1;
1242}
1243
1244static int xfrm_send_state_notify(struct xfrm_state *x, struct km_event *c)
1245{
1246
1247 switch (c->event) {
1248 case XFRM_MSG_EXPIRE:
1249 return xfrm_exp_state_notify(x, c);
1250 case XFRM_MSG_DELSA:
1251 case XFRM_MSG_UPDSA:
1252 case XFRM_MSG_NEWSA:
1253 return xfrm_notify_sa(x, c);
1254 case XFRM_MSG_FLUSHSA:
1255 return xfrm_notify_sa_flush(c);
1256 default:
1257 printk("xfrm_user: Unknown SA event %d\n", c->event);
1258 break;
1259 }
1260
1261 return 0;
1262
1263}
1264
1088static int build_acquire(struct sk_buff *skb, struct xfrm_state *x, 1265static int build_acquire(struct sk_buff *skb, struct xfrm_state *x,
1089 struct xfrm_tmpl *xt, struct xfrm_policy *xp, 1266 struct xfrm_tmpl *xt, struct xfrm_policy *xp,
1090 int dir) 1267 int dir)
@@ -1218,7 +1395,7 @@ nlmsg_failure:
1218 return -1; 1395 return -1;
1219} 1396}
1220 1397
1221static int xfrm_send_policy_notify(struct xfrm_policy *xp, int dir, int hard) 1398static int xfrm_exp_policy_notify(struct xfrm_policy *xp, int dir, struct km_event *c)
1222{ 1399{
1223 struct sk_buff *skb; 1400 struct sk_buff *skb;
1224 size_t len; 1401 size_t len;
@@ -1229,7 +1406,7 @@ static int xfrm_send_policy_notify(struct xfrm_policy *xp, int dir, int hard)
1229 if (skb == NULL) 1406 if (skb == NULL)
1230 return -ENOMEM; 1407 return -ENOMEM;
1231 1408
1232 if (build_polexpire(skb, xp, dir, hard) < 0) 1409 if (build_polexpire(skb, xp, dir, c->data.hard) < 0)
1233 BUG(); 1410 BUG();
1234 1411
1235 NETLINK_CB(skb).dst_groups = XFRMGRP_EXPIRE; 1412 NETLINK_CB(skb).dst_groups = XFRMGRP_EXPIRE;
@@ -1237,6 +1414,103 @@ static int xfrm_send_policy_notify(struct xfrm_policy *xp, int dir, int hard)
1237 return netlink_broadcast(xfrm_nl, skb, 0, XFRMGRP_EXPIRE, GFP_ATOMIC); 1414 return netlink_broadcast(xfrm_nl, skb, 0, XFRMGRP_EXPIRE, GFP_ATOMIC);
1238} 1415}
1239 1416
1417static int xfrm_notify_policy(struct xfrm_policy *xp, int dir, struct km_event *c)
1418{
1419 struct xfrm_userpolicy_info *p;
1420 struct xfrm_userpolicy_id *id;
1421 struct nlmsghdr *nlh;
1422 struct sk_buff *skb;
1423 unsigned char *b;
1424 int len = RTA_SPACE(sizeof(struct xfrm_user_tmpl) * xp->xfrm_nr);
1425 int headlen;
1426
1427 headlen = sizeof(*p);
1428 if (c->event == XFRM_MSG_DELPOLICY) {
1429 len += RTA_SPACE(headlen);
1430 headlen = sizeof(*id);
1431 }
1432 len += NLMSG_SPACE(headlen);
1433
1434 skb = alloc_skb(len, GFP_ATOMIC);
1435 if (skb == NULL)
1436 return -ENOMEM;
1437 b = skb->tail;
1438
1439 nlh = NLMSG_PUT(skb, c->pid, c->seq, c->event, headlen);
1440
1441 p = NLMSG_DATA(nlh);
1442 if (c->event == XFRM_MSG_DELPOLICY) {
1443 id = NLMSG_DATA(nlh);
1444 memset(id, 0, sizeof(*id));
1445 id->dir = dir;
1446 if (c->data.byid)
1447 id->index = xp->index;
1448 else
1449 memcpy(&id->sel, &xp->selector, sizeof(id->sel));
1450
1451 p = RTA_DATA(__RTA_PUT(skb, XFRMA_POLICY, sizeof(*p)));
1452 }
1453
1454 nlh->nlmsg_flags = 0;
1455
1456 copy_to_user_policy(xp, p, dir);
1457 if (copy_to_user_tmpl(xp, skb) < 0)
1458 goto nlmsg_failure;
1459
1460 nlh->nlmsg_len = skb->tail - b;
1461
1462 return netlink_broadcast(xfrm_nl, skb, 0, XFRMGRP_POLICY, GFP_ATOMIC);
1463
1464nlmsg_failure:
1465rtattr_failure:
1466 kfree_skb(skb);
1467 return -1;
1468}
1469
1470static int xfrm_notify_policy_flush(struct km_event *c)
1471{
1472 struct nlmsghdr *nlh;
1473 struct sk_buff *skb;
1474 unsigned char *b;
1475 int len = NLMSG_LENGTH(0);
1476
1477 skb = alloc_skb(len, GFP_ATOMIC);
1478 if (skb == NULL)
1479 return -ENOMEM;
1480 b = skb->tail;
1481
1482
1483 nlh = NLMSG_PUT(skb, c->pid, c->seq, XFRM_MSG_FLUSHPOLICY, 0);
1484
1485 nlh->nlmsg_len = skb->tail - b;
1486
1487 return netlink_broadcast(xfrm_nl, skb, 0, XFRMGRP_POLICY, GFP_ATOMIC);
1488
1489nlmsg_failure:
1490 kfree_skb(skb);
1491 return -1;
1492}
1493
1494static int xfrm_send_policy_notify(struct xfrm_policy *xp, int dir, struct km_event *c)
1495{
1496
1497 switch (c->event) {
1498 case XFRM_MSG_NEWPOLICY:
1499 case XFRM_MSG_UPDPOLICY:
1500 case XFRM_MSG_DELPOLICY:
1501 return xfrm_notify_policy(xp, dir, c);
1502 case XFRM_MSG_FLUSHPOLICY:
1503 return xfrm_notify_policy_flush(c);
1504 case XFRM_MSG_POLEXPIRE:
1505 return xfrm_exp_policy_notify(xp, dir, c);
1506 default:
1507 printk("xfrm_user: Unknown Policy event %d\n", c->event);
1508 }
1509
1510 return 0;
1511
1512}
1513
1240static struct xfrm_mgr netlink_mgr = { 1514static struct xfrm_mgr netlink_mgr = {
1241 .id = "netlink", 1515 .id = "netlink",
1242 .notify = xfrm_send_state_notify, 1516 .notify = xfrm_send_state_notify,