diff options
Diffstat (limited to 'net')
171 files changed, 5437 insertions, 2366 deletions
diff --git a/net/appletalk/ddp.c b/net/appletalk/ddp.c index 7982656b9c83..a5144e43aae1 100644 --- a/net/appletalk/ddp.c +++ b/net/appletalk/ddp.c | |||
@@ -63,7 +63,7 @@ | |||
63 | #include <linux/atalk.h> | 63 | #include <linux/atalk.h> |
64 | 64 | ||
65 | struct datalink_proto *ddp_dl, *aarp_dl; | 65 | struct datalink_proto *ddp_dl, *aarp_dl; |
66 | static struct proto_ops atalk_dgram_ops; | 66 | static const struct proto_ops atalk_dgram_ops; |
67 | 67 | ||
68 | /**************************************************************************\ | 68 | /**************************************************************************\ |
69 | * * | 69 | * * |
@@ -1763,7 +1763,7 @@ static int atalk_recvmsg(struct kiocb *iocb, struct socket *sock, struct msghdr | |||
1763 | */ | 1763 | */ |
1764 | static int atalk_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) | 1764 | static int atalk_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) |
1765 | { | 1765 | { |
1766 | int rc = -EINVAL; | 1766 | int rc = -ENOIOCTLCMD; |
1767 | struct sock *sk = sock->sk; | 1767 | struct sock *sk = sock->sk; |
1768 | void __user *argp = (void __user *)arg; | 1768 | void __user *argp = (void __user *)arg; |
1769 | 1769 | ||
@@ -1813,23 +1813,6 @@ static int atalk_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) | |||
1813 | rc = atif_ioctl(cmd, argp); | 1813 | rc = atif_ioctl(cmd, argp); |
1814 | rtnl_unlock(); | 1814 | rtnl_unlock(); |
1815 | break; | 1815 | break; |
1816 | /* Physical layer ioctl calls */ | ||
1817 | case SIOCSIFLINK: | ||
1818 | case SIOCGIFHWADDR: | ||
1819 | case SIOCSIFHWADDR: | ||
1820 | case SIOCGIFFLAGS: | ||
1821 | case SIOCSIFFLAGS: | ||
1822 | case SIOCGIFTXQLEN: | ||
1823 | case SIOCSIFTXQLEN: | ||
1824 | case SIOCGIFMTU: | ||
1825 | case SIOCGIFCONF: | ||
1826 | case SIOCADDMULTI: | ||
1827 | case SIOCDELMULTI: | ||
1828 | case SIOCGIFCOUNT: | ||
1829 | case SIOCGIFINDEX: | ||
1830 | case SIOCGIFNAME: | ||
1831 | rc = dev_ioctl(cmd, argp); | ||
1832 | break; | ||
1833 | } | 1816 | } |
1834 | 1817 | ||
1835 | return rc; | 1818 | return rc; |
@@ -1841,7 +1824,7 @@ static struct net_proto_family atalk_family_ops = { | |||
1841 | .owner = THIS_MODULE, | 1824 | .owner = THIS_MODULE, |
1842 | }; | 1825 | }; |
1843 | 1826 | ||
1844 | static struct proto_ops SOCKOPS_WRAPPED(atalk_dgram_ops) = { | 1827 | static const struct proto_ops SOCKOPS_WRAPPED(atalk_dgram_ops) = { |
1845 | .family = PF_APPLETALK, | 1828 | .family = PF_APPLETALK, |
1846 | .owner = THIS_MODULE, | 1829 | .owner = THIS_MODULE, |
1847 | .release = atalk_release, | 1830 | .release = atalk_release, |
diff --git a/net/atm/pvc.c b/net/atm/pvc.c index 2684a92da22b..f2c541774dcd 100644 --- a/net/atm/pvc.c +++ b/net/atm/pvc.c | |||
@@ -102,7 +102,7 @@ static int pvc_getname(struct socket *sock,struct sockaddr *sockaddr, | |||
102 | } | 102 | } |
103 | 103 | ||
104 | 104 | ||
105 | static struct proto_ops pvc_proto_ops = { | 105 | static const struct proto_ops pvc_proto_ops = { |
106 | .family = PF_ATMPVC, | 106 | .family = PF_ATMPVC, |
107 | .owner = THIS_MODULE, | 107 | .owner = THIS_MODULE, |
108 | 108 | ||
diff --git a/net/atm/svc.c b/net/atm/svc.c index d7b266136bf6..3a180cfd7b48 100644 --- a/net/atm/svc.c +++ b/net/atm/svc.c | |||
@@ -613,7 +613,7 @@ static int svc_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) | |||
613 | return error; | 613 | return error; |
614 | } | 614 | } |
615 | 615 | ||
616 | static struct proto_ops svc_proto_ops = { | 616 | static const struct proto_ops svc_proto_ops = { |
617 | .family = PF_ATMSVC, | 617 | .family = PF_ATMSVC, |
618 | .owner = THIS_MODULE, | 618 | .owner = THIS_MODULE, |
619 | 619 | ||
diff --git a/net/ax25/af_ax25.c b/net/ax25/af_ax25.c index 1b683f302657..e8753c7fcad1 100644 --- a/net/ax25/af_ax25.c +++ b/net/ax25/af_ax25.c | |||
@@ -54,7 +54,7 @@ | |||
54 | HLIST_HEAD(ax25_list); | 54 | HLIST_HEAD(ax25_list); |
55 | DEFINE_SPINLOCK(ax25_list_lock); | 55 | DEFINE_SPINLOCK(ax25_list_lock); |
56 | 56 | ||
57 | static struct proto_ops ax25_proto_ops; | 57 | static const struct proto_ops ax25_proto_ops; |
58 | 58 | ||
59 | static void ax25_free_sock(struct sock *sk) | 59 | static void ax25_free_sock(struct sock *sk) |
60 | { | 60 | { |
@@ -1827,7 +1827,7 @@ static int ax25_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) | |||
1827 | break; | 1827 | break; |
1828 | 1828 | ||
1829 | default: | 1829 | default: |
1830 | res = dev_ioctl(cmd, argp); | 1830 | res = -ENOIOCTLCMD; |
1831 | break; | 1831 | break; |
1832 | } | 1832 | } |
1833 | release_sock(sk); | 1833 | release_sock(sk); |
@@ -1944,7 +1944,7 @@ static struct net_proto_family ax25_family_ops = { | |||
1944 | .owner = THIS_MODULE, | 1944 | .owner = THIS_MODULE, |
1945 | }; | 1945 | }; |
1946 | 1946 | ||
1947 | static struct proto_ops ax25_proto_ops = { | 1947 | static const struct proto_ops ax25_proto_ops = { |
1948 | .family = PF_AX25, | 1948 | .family = PF_AX25, |
1949 | .owner = THIS_MODULE, | 1949 | .owner = THIS_MODULE, |
1950 | .release = ax25_release, | 1950 | .release = ax25_release, |
diff --git a/net/bluetooth/af_bluetooth.c b/net/bluetooth/af_bluetooth.c index ea616e3fc98e..fb031fe9be9e 100644 --- a/net/bluetooth/af_bluetooth.c +++ b/net/bluetooth/af_bluetooth.c | |||
@@ -287,10 +287,9 @@ int bt_sock_wait_state(struct sock *sk, int state, unsigned long timeo) | |||
287 | timeo = schedule_timeout(timeo); | 287 | timeo = schedule_timeout(timeo); |
288 | lock_sock(sk); | 288 | lock_sock(sk); |
289 | 289 | ||
290 | if (sk->sk_err) { | 290 | err = sock_error(sk); |
291 | err = sock_error(sk); | 291 | if (err) |
292 | break; | 292 | break; |
293 | } | ||
294 | } | 293 | } |
295 | set_current_state(TASK_RUNNING); | 294 | set_current_state(TASK_RUNNING); |
296 | remove_wait_queue(sk->sk_sleep, &wait); | 295 | remove_wait_queue(sk->sk_sleep, &wait); |
diff --git a/net/bluetooth/bnep/sock.c b/net/bluetooth/bnep/sock.c index 9778c6acd53b..ccbaf69afc5b 100644 --- a/net/bluetooth/bnep/sock.c +++ b/net/bluetooth/bnep/sock.c | |||
@@ -146,7 +146,7 @@ static int bnep_sock_ioctl(struct socket *sock, unsigned int cmd, unsigned long | |||
146 | return 0; | 146 | return 0; |
147 | } | 147 | } |
148 | 148 | ||
149 | static struct proto_ops bnep_sock_ops = { | 149 | static const struct proto_ops bnep_sock_ops = { |
150 | .family = PF_BLUETOOTH, | 150 | .family = PF_BLUETOOTH, |
151 | .owner = THIS_MODULE, | 151 | .owner = THIS_MODULE, |
152 | .release = bnep_sock_release, | 152 | .release = bnep_sock_release, |
diff --git a/net/bluetooth/cmtp/sock.c b/net/bluetooth/cmtp/sock.c index beb045bf5714..5e22343b6090 100644 --- a/net/bluetooth/cmtp/sock.c +++ b/net/bluetooth/cmtp/sock.c | |||
@@ -137,7 +137,7 @@ static int cmtp_sock_ioctl(struct socket *sock, unsigned int cmd, unsigned long | |||
137 | return -EINVAL; | 137 | return -EINVAL; |
138 | } | 138 | } |
139 | 139 | ||
140 | static struct proto_ops cmtp_sock_ops = { | 140 | static const struct proto_ops cmtp_sock_ops = { |
141 | .family = PF_BLUETOOTH, | 141 | .family = PF_BLUETOOTH, |
142 | .owner = THIS_MODULE, | 142 | .owner = THIS_MODULE, |
143 | .release = cmtp_sock_release, | 143 | .release = cmtp_sock_release, |
diff --git a/net/bluetooth/hci_sock.c b/net/bluetooth/hci_sock.c index 1d6d0a15c099..84e6c93a044a 100644 --- a/net/bluetooth/hci_sock.c +++ b/net/bluetooth/hci_sock.c | |||
@@ -575,7 +575,7 @@ static int hci_sock_getsockopt(struct socket *sock, int level, int optname, char | |||
575 | return 0; | 575 | return 0; |
576 | } | 576 | } |
577 | 577 | ||
578 | static struct proto_ops hci_sock_ops = { | 578 | static const struct proto_ops hci_sock_ops = { |
579 | .family = PF_BLUETOOTH, | 579 | .family = PF_BLUETOOTH, |
580 | .owner = THIS_MODULE, | 580 | .owner = THIS_MODULE, |
581 | .release = hci_sock_release, | 581 | .release = hci_sock_release, |
diff --git a/net/bluetooth/hci_sysfs.c b/net/bluetooth/hci_sysfs.c index bd7568ac87fc..0ed38740388c 100644 --- a/net/bluetooth/hci_sysfs.c +++ b/net/bluetooth/hci_sysfs.c | |||
@@ -78,7 +78,7 @@ static struct class_device_attribute *bt_attrs[] = { | |||
78 | }; | 78 | }; |
79 | 79 | ||
80 | #ifdef CONFIG_HOTPLUG | 80 | #ifdef CONFIG_HOTPLUG |
81 | static int bt_hotplug(struct class_device *cdev, char **envp, int num_envp, char *buf, int size) | 81 | static int bt_uevent(struct class_device *cdev, char **envp, int num_envp, char *buf, int size) |
82 | { | 82 | { |
83 | struct hci_dev *hdev = class_get_devdata(cdev); | 83 | struct hci_dev *hdev = class_get_devdata(cdev); |
84 | int n, i = 0; | 84 | int n, i = 0; |
@@ -107,7 +107,7 @@ struct class bt_class = { | |||
107 | .name = "bluetooth", | 107 | .name = "bluetooth", |
108 | .release = bt_release, | 108 | .release = bt_release, |
109 | #ifdef CONFIG_HOTPLUG | 109 | #ifdef CONFIG_HOTPLUG |
110 | .hotplug = bt_hotplug, | 110 | .uevent = bt_uevent, |
111 | #endif | 111 | #endif |
112 | }; | 112 | }; |
113 | 113 | ||
diff --git a/net/bluetooth/hidp/sock.c b/net/bluetooth/hidp/sock.c index f8986f881431..8f8dd931b294 100644 --- a/net/bluetooth/hidp/sock.c +++ b/net/bluetooth/hidp/sock.c | |||
@@ -143,7 +143,7 @@ static int hidp_sock_ioctl(struct socket *sock, unsigned int cmd, unsigned long | |||
143 | return -EINVAL; | 143 | return -EINVAL; |
144 | } | 144 | } |
145 | 145 | ||
146 | static struct proto_ops hidp_sock_ops = { | 146 | static const struct proto_ops hidp_sock_ops = { |
147 | .family = PF_BLUETOOTH, | 147 | .family = PF_BLUETOOTH, |
148 | .owner = THIS_MODULE, | 148 | .owner = THIS_MODULE, |
149 | .release = hidp_sock_release, | 149 | .release = hidp_sock_release, |
diff --git a/net/bluetooth/l2cap.c b/net/bluetooth/l2cap.c index e3bb11ca4235..7f0781e4326f 100644 --- a/net/bluetooth/l2cap.c +++ b/net/bluetooth/l2cap.c | |||
@@ -57,7 +57,7 @@ | |||
57 | 57 | ||
58 | #define VERSION "2.8" | 58 | #define VERSION "2.8" |
59 | 59 | ||
60 | static struct proto_ops l2cap_sock_ops; | 60 | static const struct proto_ops l2cap_sock_ops; |
61 | 61 | ||
62 | static struct bt_sock_list l2cap_sk_list = { | 62 | static struct bt_sock_list l2cap_sk_list = { |
63 | .lock = RW_LOCK_UNLOCKED | 63 | .lock = RW_LOCK_UNLOCKED |
@@ -767,8 +767,9 @@ static int l2cap_sock_sendmsg(struct kiocb *iocb, struct socket *sock, struct ms | |||
767 | 767 | ||
768 | BT_DBG("sock %p, sk %p", sock, sk); | 768 | BT_DBG("sock %p, sk %p", sock, sk); |
769 | 769 | ||
770 | if (sk->sk_err) | 770 | err = sock_error(sk); |
771 | return sock_error(sk); | 771 | if (err) |
772 | return err; | ||
772 | 773 | ||
773 | if (msg->msg_flags & MSG_OOB) | 774 | if (msg->msg_flags & MSG_OOB) |
774 | return -EOPNOTSUPP; | 775 | return -EOPNOTSUPP; |
@@ -2160,7 +2161,7 @@ static ssize_t l2cap_sysfs_show(struct class *dev, char *buf) | |||
2160 | 2161 | ||
2161 | static CLASS_ATTR(l2cap, S_IRUGO, l2cap_sysfs_show, NULL); | 2162 | static CLASS_ATTR(l2cap, S_IRUGO, l2cap_sysfs_show, NULL); |
2162 | 2163 | ||
2163 | static struct proto_ops l2cap_sock_ops = { | 2164 | static const struct proto_ops l2cap_sock_ops = { |
2164 | .family = PF_BLUETOOTH, | 2165 | .family = PF_BLUETOOTH, |
2165 | .owner = THIS_MODULE, | 2166 | .owner = THIS_MODULE, |
2166 | .release = l2cap_sock_release, | 2167 | .release = l2cap_sock_release, |
diff --git a/net/bluetooth/rfcomm/sock.c b/net/bluetooth/rfcomm/sock.c index 6c34261b232e..757d2dd3b02f 100644 --- a/net/bluetooth/rfcomm/sock.c +++ b/net/bluetooth/rfcomm/sock.c | |||
@@ -58,7 +58,7 @@ | |||
58 | #define BT_DBG(D...) | 58 | #define BT_DBG(D...) |
59 | #endif | 59 | #endif |
60 | 60 | ||
61 | static struct proto_ops rfcomm_sock_ops; | 61 | static const struct proto_ops rfcomm_sock_ops; |
62 | 62 | ||
63 | static struct bt_sock_list rfcomm_sk_list = { | 63 | static struct bt_sock_list rfcomm_sk_list = { |
64 | .lock = RW_LOCK_UNLOCKED | 64 | .lock = RW_LOCK_UNLOCKED |
@@ -907,7 +907,7 @@ static ssize_t rfcomm_sock_sysfs_show(struct class *dev, char *buf) | |||
907 | 907 | ||
908 | static CLASS_ATTR(rfcomm, S_IRUGO, rfcomm_sock_sysfs_show, NULL); | 908 | static CLASS_ATTR(rfcomm, S_IRUGO, rfcomm_sock_sysfs_show, NULL); |
909 | 909 | ||
910 | static struct proto_ops rfcomm_sock_ops = { | 910 | static const struct proto_ops rfcomm_sock_ops = { |
911 | .family = PF_BLUETOOTH, | 911 | .family = PF_BLUETOOTH, |
912 | .owner = THIS_MODULE, | 912 | .owner = THIS_MODULE, |
913 | .release = rfcomm_sock_release, | 913 | .release = rfcomm_sock_release, |
diff --git a/net/bluetooth/sco.c b/net/bluetooth/sco.c index 9cb00dc6c08c..6b61323ce23c 100644 --- a/net/bluetooth/sco.c +++ b/net/bluetooth/sco.c | |||
@@ -56,7 +56,7 @@ | |||
56 | 56 | ||
57 | #define VERSION "0.5" | 57 | #define VERSION "0.5" |
58 | 58 | ||
59 | static struct proto_ops sco_sock_ops; | 59 | static const struct proto_ops sco_sock_ops; |
60 | 60 | ||
61 | static struct bt_sock_list sco_sk_list = { | 61 | static struct bt_sock_list sco_sk_list = { |
62 | .lock = RW_LOCK_UNLOCKED | 62 | .lock = RW_LOCK_UNLOCKED |
@@ -637,8 +637,9 @@ static int sco_sock_sendmsg(struct kiocb *iocb, struct socket *sock, | |||
637 | 637 | ||
638 | BT_DBG("sock %p, sk %p", sock, sk); | 638 | BT_DBG("sock %p, sk %p", sock, sk); |
639 | 639 | ||
640 | if (sk->sk_err) | 640 | err = sock_error(sk); |
641 | return sock_error(sk); | 641 | if (err) |
642 | return err; | ||
642 | 643 | ||
643 | if (msg->msg_flags & MSG_OOB) | 644 | if (msg->msg_flags & MSG_OOB) |
644 | return -EOPNOTSUPP; | 645 | return -EOPNOTSUPP; |
@@ -913,7 +914,7 @@ static ssize_t sco_sysfs_show(struct class *dev, char *buf) | |||
913 | 914 | ||
914 | static CLASS_ATTR(sco, S_IRUGO, sco_sysfs_show, NULL); | 915 | static CLASS_ATTR(sco, S_IRUGO, sco_sysfs_show, NULL); |
915 | 916 | ||
916 | static struct proto_ops sco_sock_ops = { | 917 | static const struct proto_ops sco_sock_ops = { |
917 | .family = PF_BLUETOOTH, | 918 | .family = PF_BLUETOOTH, |
918 | .owner = THIS_MODULE, | 919 | .owner = THIS_MODULE, |
919 | .release = sco_sock_release, | 920 | .release = sco_sock_release, |
diff --git a/net/bridge/br.c b/net/bridge/br.c index f8f184942aaf..188cc1ac49eb 100644 --- a/net/bridge/br.c +++ b/net/bridge/br.c | |||
@@ -67,3 +67,4 @@ EXPORT_SYMBOL(br_should_route_hook); | |||
67 | module_init(br_init) | 67 | module_init(br_init) |
68 | module_exit(br_deinit) | 68 | module_exit(br_deinit) |
69 | MODULE_LICENSE("GPL"); | 69 | MODULE_LICENSE("GPL"); |
70 | MODULE_VERSION(BR_VERSION); | ||
diff --git a/net/bridge/br_device.c b/net/bridge/br_device.c index f564ee99782d..0b33a7b3a00c 100644 --- a/net/bridge/br_device.c +++ b/net/bridge/br_device.c | |||
@@ -15,7 +15,9 @@ | |||
15 | 15 | ||
16 | #include <linux/kernel.h> | 16 | #include <linux/kernel.h> |
17 | #include <linux/netdevice.h> | 17 | #include <linux/netdevice.h> |
18 | #include <linux/module.h> | 18 | #include <linux/etherdevice.h> |
19 | #include <linux/ethtool.h> | ||
20 | |||
19 | #include <asm/uaccess.h> | 21 | #include <asm/uaccess.h> |
20 | #include "br_private.h" | 22 | #include "br_private.h" |
21 | 23 | ||
@@ -82,6 +84,87 @@ static int br_change_mtu(struct net_device *dev, int new_mtu) | |||
82 | return 0; | 84 | return 0; |
83 | } | 85 | } |
84 | 86 | ||
87 | /* Allow setting mac address of pseudo-bridge to be same as | ||
88 | * any of the bound interfaces | ||
89 | */ | ||
90 | static int br_set_mac_address(struct net_device *dev, void *p) | ||
91 | { | ||
92 | struct net_bridge *br = netdev_priv(dev); | ||
93 | struct sockaddr *addr = p; | ||
94 | struct net_bridge_port *port; | ||
95 | int err = -EADDRNOTAVAIL; | ||
96 | |||
97 | spin_lock_bh(&br->lock); | ||
98 | list_for_each_entry(port, &br->port_list, list) { | ||
99 | if (!compare_ether_addr(port->dev->dev_addr, addr->sa_data)) { | ||
100 | br_stp_change_bridge_id(br, addr->sa_data); | ||
101 | err = 0; | ||
102 | break; | ||
103 | } | ||
104 | } | ||
105 | spin_unlock_bh(&br->lock); | ||
106 | |||
107 | return err; | ||
108 | } | ||
109 | |||
110 | static void br_getinfo(struct net_device *dev, struct ethtool_drvinfo *info) | ||
111 | { | ||
112 | strcpy(info->driver, "bridge"); | ||
113 | strcpy(info->version, BR_VERSION); | ||
114 | strcpy(info->fw_version, "N/A"); | ||
115 | strcpy(info->bus_info, "N/A"); | ||
116 | } | ||
117 | |||
118 | static int br_set_sg(struct net_device *dev, u32 data) | ||
119 | { | ||
120 | struct net_bridge *br = netdev_priv(dev); | ||
121 | |||
122 | if (data) | ||
123 | br->feature_mask |= NETIF_F_SG; | ||
124 | else | ||
125 | br->feature_mask &= ~NETIF_F_SG; | ||
126 | |||
127 | br_features_recompute(br); | ||
128 | return 0; | ||
129 | } | ||
130 | |||
131 | static int br_set_tso(struct net_device *dev, u32 data) | ||
132 | { | ||
133 | struct net_bridge *br = netdev_priv(dev); | ||
134 | |||
135 | if (data) | ||
136 | br->feature_mask |= NETIF_F_TSO; | ||
137 | else | ||
138 | br->feature_mask &= ~NETIF_F_TSO; | ||
139 | |||
140 | br_features_recompute(br); | ||
141 | return 0; | ||
142 | } | ||
143 | |||
144 | static int br_set_tx_csum(struct net_device *dev, u32 data) | ||
145 | { | ||
146 | struct net_bridge *br = netdev_priv(dev); | ||
147 | |||
148 | if (data) | ||
149 | br->feature_mask |= NETIF_F_IP_CSUM; | ||
150 | else | ||
151 | br->feature_mask &= ~NETIF_F_IP_CSUM; | ||
152 | |||
153 | br_features_recompute(br); | ||
154 | return 0; | ||
155 | } | ||
156 | |||
157 | static struct ethtool_ops br_ethtool_ops = { | ||
158 | .get_drvinfo = br_getinfo, | ||
159 | .get_link = ethtool_op_get_link, | ||
160 | .get_sg = ethtool_op_get_sg, | ||
161 | .set_sg = br_set_sg, | ||
162 | .get_tx_csum = ethtool_op_get_tx_csum, | ||
163 | .set_tx_csum = br_set_tx_csum, | ||
164 | .get_tso = ethtool_op_get_tso, | ||
165 | .set_tso = br_set_tso, | ||
166 | }; | ||
167 | |||
85 | void br_dev_setup(struct net_device *dev) | 168 | void br_dev_setup(struct net_device *dev) |
86 | { | 169 | { |
87 | memset(dev->dev_addr, 0, ETH_ALEN); | 170 | memset(dev->dev_addr, 0, ETH_ALEN); |
@@ -96,8 +179,12 @@ void br_dev_setup(struct net_device *dev) | |||
96 | dev->change_mtu = br_change_mtu; | 179 | dev->change_mtu = br_change_mtu; |
97 | dev->destructor = free_netdev; | 180 | dev->destructor = free_netdev; |
98 | SET_MODULE_OWNER(dev); | 181 | SET_MODULE_OWNER(dev); |
182 | SET_ETHTOOL_OPS(dev, &br_ethtool_ops); | ||
99 | dev->stop = br_dev_stop; | 183 | dev->stop = br_dev_stop; |
100 | dev->tx_queue_len = 0; | 184 | dev->tx_queue_len = 0; |
101 | dev->set_mac_address = NULL; | 185 | dev->set_mac_address = br_set_mac_address; |
102 | dev->priv_flags = IFF_EBRIDGE; | 186 | dev->priv_flags = IFF_EBRIDGE; |
187 | |||
188 | dev->features = NETIF_F_SG | NETIF_F_FRAGLIST | ||
189 | | NETIF_F_HIGHDMA | NETIF_F_TSO | NETIF_F_IP_CSUM; | ||
103 | } | 190 | } |
diff --git a/net/bridge/br_if.c b/net/bridge/br_if.c index 975abe254b7a..11321197338e 100644 --- a/net/bridge/br_if.c +++ b/net/bridge/br_if.c | |||
@@ -32,9 +32,8 @@ | |||
32 | * ethtool, use ethtool_ops. Also, since driver might sleep need to | 32 | * ethtool, use ethtool_ops. Also, since driver might sleep need to |
33 | * not be holding any locks. | 33 | * not be holding any locks. |
34 | */ | 34 | */ |
35 | static int br_initial_port_cost(struct net_device *dev) | 35 | static int port_cost(struct net_device *dev) |
36 | { | 36 | { |
37 | |||
38 | struct ethtool_cmd ecmd = { ETHTOOL_GSET }; | 37 | struct ethtool_cmd ecmd = { ETHTOOL_GSET }; |
39 | struct ifreq ifr; | 38 | struct ifreq ifr; |
40 | mm_segment_t old_fs; | 39 | mm_segment_t old_fs; |
@@ -58,10 +57,6 @@ static int br_initial_port_cost(struct net_device *dev) | |||
58 | return 2; | 57 | return 2; |
59 | case SPEED_10: | 58 | case SPEED_10: |
60 | return 100; | 59 | return 100; |
61 | default: | ||
62 | pr_info("bridge: can't decode speed from %s: %d\n", | ||
63 | dev->name, ecmd.speed); | ||
64 | return 100; | ||
65 | } | 60 | } |
66 | } | 61 | } |
67 | 62 | ||
@@ -75,6 +70,35 @@ static int br_initial_port_cost(struct net_device *dev) | |||
75 | return 100; /* assume old 10Mbps */ | 70 | return 100; /* assume old 10Mbps */ |
76 | } | 71 | } |
77 | 72 | ||
73 | |||
74 | /* | ||
75 | * Check for port carrier transistions. | ||
76 | * Called from work queue to allow for calling functions that | ||
77 | * might sleep (such as speed check), and to debounce. | ||
78 | */ | ||
79 | static void port_carrier_check(void *arg) | ||
80 | { | ||
81 | struct net_bridge_port *p = arg; | ||
82 | |||
83 | rtnl_lock(); | ||
84 | if (netif_carrier_ok(p->dev)) { | ||
85 | u32 cost = port_cost(p->dev); | ||
86 | |||
87 | spin_lock_bh(&p->br->lock); | ||
88 | if (p->state == BR_STATE_DISABLED) { | ||
89 | p->path_cost = cost; | ||
90 | br_stp_enable_port(p); | ||
91 | } | ||
92 | spin_unlock_bh(&p->br->lock); | ||
93 | } else { | ||
94 | spin_lock_bh(&p->br->lock); | ||
95 | if (p->state != BR_STATE_DISABLED) | ||
96 | br_stp_disable_port(p); | ||
97 | spin_unlock_bh(&p->br->lock); | ||
98 | } | ||
99 | rtnl_unlock(); | ||
100 | } | ||
101 | |||
78 | static void destroy_nbp(struct net_bridge_port *p) | 102 | static void destroy_nbp(struct net_bridge_port *p) |
79 | { | 103 | { |
80 | struct net_device *dev = p->dev; | 104 | struct net_device *dev = p->dev; |
@@ -102,6 +126,9 @@ static void del_nbp(struct net_bridge_port *p) | |||
102 | dev->br_port = NULL; | 126 | dev->br_port = NULL; |
103 | dev_set_promiscuity(dev, -1); | 127 | dev_set_promiscuity(dev, -1); |
104 | 128 | ||
129 | cancel_delayed_work(&p->carrier_check); | ||
130 | flush_scheduled_work(); | ||
131 | |||
105 | spin_lock_bh(&br->lock); | 132 | spin_lock_bh(&br->lock); |
106 | br_stp_disable_port(p); | 133 | br_stp_disable_port(p); |
107 | spin_unlock_bh(&br->lock); | 134 | spin_unlock_bh(&br->lock); |
@@ -155,6 +182,7 @@ static struct net_device *new_bridge_dev(const char *name) | |||
155 | br->bridge_id.prio[1] = 0x00; | 182 | br->bridge_id.prio[1] = 0x00; |
156 | memset(br->bridge_id.addr, 0, ETH_ALEN); | 183 | memset(br->bridge_id.addr, 0, ETH_ALEN); |
157 | 184 | ||
185 | br->feature_mask = dev->features; | ||
158 | br->stp_enabled = 0; | 186 | br->stp_enabled = 0; |
159 | br->designated_root = br->bridge_id; | 187 | br->designated_root = br->bridge_id; |
160 | br->root_path_cost = 0; | 188 | br->root_path_cost = 0; |
@@ -195,10 +223,9 @@ static int find_portno(struct net_bridge *br) | |||
195 | return (index >= BR_MAX_PORTS) ? -EXFULL : index; | 223 | return (index >= BR_MAX_PORTS) ? -EXFULL : index; |
196 | } | 224 | } |
197 | 225 | ||
198 | /* called with RTNL */ | 226 | /* called with RTNL but without bridge lock */ |
199 | static struct net_bridge_port *new_nbp(struct net_bridge *br, | 227 | static struct net_bridge_port *new_nbp(struct net_bridge *br, |
200 | struct net_device *dev, | 228 | struct net_device *dev) |
201 | unsigned long cost) | ||
202 | { | 229 | { |
203 | int index; | 230 | int index; |
204 | struct net_bridge_port *p; | 231 | struct net_bridge_port *p; |
@@ -215,12 +242,13 @@ static struct net_bridge_port *new_nbp(struct net_bridge *br, | |||
215 | p->br = br; | 242 | p->br = br; |
216 | dev_hold(dev); | 243 | dev_hold(dev); |
217 | p->dev = dev; | 244 | p->dev = dev; |
218 | p->path_cost = cost; | 245 | p->path_cost = port_cost(dev); |
219 | p->priority = 0x8000 >> BR_PORT_BITS; | 246 | p->priority = 0x8000 >> BR_PORT_BITS; |
220 | dev->br_port = p; | 247 | dev->br_port = p; |
221 | p->port_no = index; | 248 | p->port_no = index; |
222 | br_init_port(p); | 249 | br_init_port(p); |
223 | p->state = BR_STATE_DISABLED; | 250 | p->state = BR_STATE_DISABLED; |
251 | INIT_WORK(&p->carrier_check, port_carrier_check, p); | ||
224 | kobject_init(&p->kobj); | 252 | kobject_init(&p->kobj); |
225 | 253 | ||
226 | return p; | 254 | return p; |
@@ -322,9 +350,8 @@ void br_features_recompute(struct net_bridge *br) | |||
322 | struct net_bridge_port *p; | 350 | struct net_bridge_port *p; |
323 | unsigned long features, checksum; | 351 | unsigned long features, checksum; |
324 | 352 | ||
325 | features = NETIF_F_SG | NETIF_F_FRAGLIST | 353 | features = br->feature_mask &~ NETIF_F_IP_CSUM; |
326 | | NETIF_F_HIGHDMA | NETIF_F_TSO; | 354 | checksum = br->feature_mask & NETIF_F_IP_CSUM; |
327 | checksum = NETIF_F_IP_CSUM; /* least commmon subset */ | ||
328 | 355 | ||
329 | list_for_each_entry(p, &br->port_list, list) { | 356 | list_for_each_entry(p, &br->port_list, list) { |
330 | if (!(p->dev->features | 357 | if (!(p->dev->features |
@@ -351,7 +378,7 @@ int br_add_if(struct net_bridge *br, struct net_device *dev) | |||
351 | if (dev->br_port != NULL) | 378 | if (dev->br_port != NULL) |
352 | return -EBUSY; | 379 | return -EBUSY; |
353 | 380 | ||
354 | if (IS_ERR(p = new_nbp(br, dev, br_initial_port_cost(dev)))) | 381 | if (IS_ERR(p = new_nbp(br, dev))) |
355 | return PTR_ERR(p); | 382 | return PTR_ERR(p); |
356 | 383 | ||
357 | if ((err = br_fdb_insert(br, p, dev->dev_addr))) | 384 | if ((err = br_fdb_insert(br, p, dev->dev_addr))) |
diff --git a/net/bridge/br_input.c b/net/bridge/br_input.c index b88220a64cd8..c387852f753a 100644 --- a/net/bridge/br_input.c +++ b/net/bridge/br_input.c | |||
@@ -53,6 +53,11 @@ int br_handle_frame_finish(struct sk_buff *skb) | |||
53 | /* insert into forwarding database after filtering to avoid spoofing */ | 53 | /* insert into forwarding database after filtering to avoid spoofing */ |
54 | br_fdb_update(p->br, p, eth_hdr(skb)->h_source); | 54 | br_fdb_update(p->br, p, eth_hdr(skb)->h_source); |
55 | 55 | ||
56 | if (p->state == BR_STATE_LEARNING) { | ||
57 | kfree_skb(skb); | ||
58 | goto out; | ||
59 | } | ||
60 | |||
56 | if (br->dev->flags & IFF_PROMISC) { | 61 | if (br->dev->flags & IFF_PROMISC) { |
57 | struct sk_buff *skb2; | 62 | struct sk_buff *skb2; |
58 | 63 | ||
@@ -107,9 +112,6 @@ int br_handle_frame(struct net_bridge_port *p, struct sk_buff **pskb) | |||
107 | if (!is_valid_ether_addr(eth_hdr(skb)->h_source)) | 112 | if (!is_valid_ether_addr(eth_hdr(skb)->h_source)) |
108 | goto err; | 113 | goto err; |
109 | 114 | ||
110 | if (p->state == BR_STATE_LEARNING) | ||
111 | br_fdb_update(p->br, p, eth_hdr(skb)->h_source); | ||
112 | |||
113 | if (p->br->stp_enabled && | 115 | if (p->br->stp_enabled && |
114 | !memcmp(dest, bridge_ula, 5) && | 116 | !memcmp(dest, bridge_ula, 5) && |
115 | !(dest[5] & 0xF0)) { | 117 | !(dest[5] & 0xF0)) { |
@@ -118,9 +120,10 @@ int br_handle_frame(struct net_bridge_port *p, struct sk_buff **pskb) | |||
118 | NULL, br_stp_handle_bpdu); | 120 | NULL, br_stp_handle_bpdu); |
119 | return 1; | 121 | return 1; |
120 | } | 122 | } |
123 | goto err; | ||
121 | } | 124 | } |
122 | 125 | ||
123 | else if (p->state == BR_STATE_FORWARDING) { | 126 | if (p->state == BR_STATE_FORWARDING || p->state == BR_STATE_LEARNING) { |
124 | if (br_should_route_hook) { | 127 | if (br_should_route_hook) { |
125 | if (br_should_route_hook(pskb)) | 128 | if (br_should_route_hook(pskb)) |
126 | return 0; | 129 | return 0; |
diff --git a/net/bridge/br_netfilter.c b/net/bridge/br_netfilter.c index 23422bd53a5e..223f8270daee 100644 --- a/net/bridge/br_netfilter.c +++ b/net/bridge/br_netfilter.c | |||
@@ -26,6 +26,7 @@ | |||
26 | #include <linux/ip.h> | 26 | #include <linux/ip.h> |
27 | #include <linux/netdevice.h> | 27 | #include <linux/netdevice.h> |
28 | #include <linux/skbuff.h> | 28 | #include <linux/skbuff.h> |
29 | #include <linux/if_arp.h> | ||
29 | #include <linux/if_ether.h> | 30 | #include <linux/if_ether.h> |
30 | #include <linux/if_vlan.h> | 31 | #include <linux/if_vlan.h> |
31 | #include <linux/netfilter_bridge.h> | 32 | #include <linux/netfilter_bridge.h> |
@@ -33,8 +34,11 @@ | |||
33 | #include <linux/netfilter_ipv6.h> | 34 | #include <linux/netfilter_ipv6.h> |
34 | #include <linux/netfilter_arp.h> | 35 | #include <linux/netfilter_arp.h> |
35 | #include <linux/in_route.h> | 36 | #include <linux/in_route.h> |
37 | |||
36 | #include <net/ip.h> | 38 | #include <net/ip.h> |
37 | #include <net/ipv6.h> | 39 | #include <net/ipv6.h> |
40 | #include <net/route.h> | ||
41 | |||
38 | #include <asm/uaccess.h> | 42 | #include <asm/uaccess.h> |
39 | #include <asm/checksum.h> | 43 | #include <asm/checksum.h> |
40 | #include "br_private.h" | 44 | #include "br_private.h" |
diff --git a/net/bridge/br_notify.c b/net/bridge/br_notify.c index 917311c6828b..a43a9c1d50d7 100644 --- a/net/bridge/br_notify.c +++ b/net/bridge/br_notify.c | |||
@@ -52,17 +52,9 @@ static int br_device_event(struct notifier_block *unused, unsigned long event, v | |||
52 | br_stp_recalculate_bridge_id(br); | 52 | br_stp_recalculate_bridge_id(br); |
53 | break; | 53 | break; |
54 | 54 | ||
55 | case NETDEV_CHANGE: /* device is up but carrier changed */ | 55 | case NETDEV_CHANGE: |
56 | if (!(br->dev->flags & IFF_UP)) | 56 | if (br->dev->flags & IFF_UP) |
57 | break; | 57 | schedule_delayed_work(&p->carrier_check, BR_PORT_DEBOUNCE); |
58 | |||
59 | if (netif_carrier_ok(dev)) { | ||
60 | if (p->state == BR_STATE_DISABLED) | ||
61 | br_stp_enable_port(p); | ||
62 | } else { | ||
63 | if (p->state != BR_STATE_DISABLED) | ||
64 | br_stp_disable_port(p); | ||
65 | } | ||
66 | break; | 58 | break; |
67 | 59 | ||
68 | case NETDEV_FEAT_CHANGE: | 60 | case NETDEV_FEAT_CHANGE: |
diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h index bdf95a74d8cd..c5bd631ffcd5 100644 --- a/net/bridge/br_private.h +++ b/net/bridge/br_private.h | |||
@@ -27,6 +27,10 @@ | |||
27 | #define BR_PORT_BITS 10 | 27 | #define BR_PORT_BITS 10 |
28 | #define BR_MAX_PORTS (1<<BR_PORT_BITS) | 28 | #define BR_MAX_PORTS (1<<BR_PORT_BITS) |
29 | 29 | ||
30 | #define BR_PORT_DEBOUNCE (HZ/10) | ||
31 | |||
32 | #define BR_VERSION "2.1" | ||
33 | |||
30 | typedef struct bridge_id bridge_id; | 34 | typedef struct bridge_id bridge_id; |
31 | typedef struct mac_addr mac_addr; | 35 | typedef struct mac_addr mac_addr; |
32 | typedef __u16 port_id; | 36 | typedef __u16 port_id; |
@@ -78,6 +82,7 @@ struct net_bridge_port | |||
78 | struct timer_list hold_timer; | 82 | struct timer_list hold_timer; |
79 | struct timer_list message_age_timer; | 83 | struct timer_list message_age_timer; |
80 | struct kobject kobj; | 84 | struct kobject kobj; |
85 | struct work_struct carrier_check; | ||
81 | struct rcu_head rcu; | 86 | struct rcu_head rcu; |
82 | }; | 87 | }; |
83 | 88 | ||
@@ -90,6 +95,7 @@ struct net_bridge | |||
90 | spinlock_t hash_lock; | 95 | spinlock_t hash_lock; |
91 | struct hlist_head hash[BR_HASH_SIZE]; | 96 | struct hlist_head hash[BR_HASH_SIZE]; |
92 | struct list_head age_list; | 97 | struct list_head age_list; |
98 | unsigned long feature_mask; | ||
93 | 99 | ||
94 | /* STP */ | 100 | /* STP */ |
95 | bridge_id designated_root; | 101 | bridge_id designated_root; |
@@ -201,6 +207,7 @@ extern void br_stp_disable_bridge(struct net_bridge *br); | |||
201 | extern void br_stp_enable_port(struct net_bridge_port *p); | 207 | extern void br_stp_enable_port(struct net_bridge_port *p); |
202 | extern void br_stp_disable_port(struct net_bridge_port *p); | 208 | extern void br_stp_disable_port(struct net_bridge_port *p); |
203 | extern void br_stp_recalculate_bridge_id(struct net_bridge *br); | 209 | extern void br_stp_recalculate_bridge_id(struct net_bridge *br); |
210 | extern void br_stp_change_bridge_id(struct net_bridge *br, const unsigned char *a); | ||
204 | extern void br_stp_set_bridge_priority(struct net_bridge *br, | 211 | extern void br_stp_set_bridge_priority(struct net_bridge *br, |
205 | u16 newprio); | 212 | u16 newprio); |
206 | extern void br_stp_set_port_priority(struct net_bridge_port *p, | 213 | extern void br_stp_set_port_priority(struct net_bridge_port *p, |
diff --git a/net/bridge/br_stp_if.c b/net/bridge/br_stp_if.c index ac09b6a23523..cc047f7fb6ef 100644 --- a/net/bridge/br_stp_if.c +++ b/net/bridge/br_stp_if.c | |||
@@ -120,8 +120,7 @@ void br_stp_disable_port(struct net_bridge_port *p) | |||
120 | } | 120 | } |
121 | 121 | ||
122 | /* called under bridge lock */ | 122 | /* called under bridge lock */ |
123 | static void br_stp_change_bridge_id(struct net_bridge *br, | 123 | void br_stp_change_bridge_id(struct net_bridge *br, const unsigned char *addr) |
124 | const unsigned char *addr) | ||
125 | { | 124 | { |
126 | unsigned char oldaddr[6]; | 125 | unsigned char oldaddr[6]; |
127 | struct net_bridge_port *p; | 126 | struct net_bridge_port *p; |
@@ -158,7 +157,7 @@ void br_stp_recalculate_bridge_id(struct net_bridge *br) | |||
158 | 157 | ||
159 | list_for_each_entry(p, &br->port_list, list) { | 158 | list_for_each_entry(p, &br->port_list, list) { |
160 | if (addr == br_mac_zero || | 159 | if (addr == br_mac_zero || |
161 | compare_ether_addr(p->dev->dev_addr, addr) < 0) | 160 | memcmp(p->dev->dev_addr, addr, ETH_ALEN) < 0) |
162 | addr = p->dev->dev_addr; | 161 | addr = p->dev->dev_addr; |
163 | 162 | ||
164 | } | 163 | } |
diff --git a/net/bridge/br_sysfs_if.c b/net/bridge/br_sysfs_if.c index f6a19d53eaeb..2ebdc23bbe26 100644 --- a/net/bridge/br_sysfs_if.c +++ b/net/bridge/br_sysfs_if.c | |||
@@ -248,7 +248,7 @@ int br_sysfs_addif(struct net_bridge_port *p) | |||
248 | if (err) | 248 | if (err) |
249 | goto out2; | 249 | goto out2; |
250 | 250 | ||
251 | kobject_hotplug(&p->kobj, KOBJ_ADD); | 251 | kobject_uevent(&p->kobj, KOBJ_ADD); |
252 | return 0; | 252 | return 0; |
253 | out2: | 253 | out2: |
254 | kobject_del(&p->kobj); | 254 | kobject_del(&p->kobj); |
@@ -260,7 +260,7 @@ void br_sysfs_removeif(struct net_bridge_port *p) | |||
260 | { | 260 | { |
261 | pr_debug("br_sysfs_removeif\n"); | 261 | pr_debug("br_sysfs_removeif\n"); |
262 | sysfs_remove_link(&p->br->ifobj, p->dev->name); | 262 | sysfs_remove_link(&p->br->ifobj, p->dev->name); |
263 | kobject_hotplug(&p->kobj, KOBJ_REMOVE); | 263 | kobject_uevent(&p->kobj, KOBJ_REMOVE); |
264 | kobject_del(&p->kobj); | 264 | kobject_del(&p->kobj); |
265 | } | 265 | } |
266 | 266 | ||
diff --git a/net/bridge/netfilter/Kconfig b/net/bridge/netfilter/Kconfig index c70b3be23026..b84fc6075fe1 100644 --- a/net/bridge/netfilter/Kconfig +++ b/net/bridge/netfilter/Kconfig | |||
@@ -196,9 +196,13 @@ config BRIDGE_EBT_LOG | |||
196 | To compile it as a module, choose M here. If unsure, say N. | 196 | To compile it as a module, choose M here. If unsure, say N. |
197 | 197 | ||
198 | config BRIDGE_EBT_ULOG | 198 | config BRIDGE_EBT_ULOG |
199 | tristate "ebt: ulog support" | 199 | tristate "ebt: ulog support (OBSOLETE)" |
200 | depends on BRIDGE_NF_EBTABLES | 200 | depends on BRIDGE_NF_EBTABLES |
201 | help | 201 | help |
202 | This option enables the old bridge-specific "ebt_ulog" implementation | ||
203 | which has been obsoleted by the new "nfnetlink_log" code (see | ||
204 | CONFIG_NETFILTER_NETLINK_LOG). | ||
205 | |||
202 | This option adds the ulog watcher, that you can use in any rule | 206 | This option adds the ulog watcher, that you can use in any rule |
203 | in any ebtables table. The packet is passed to a userspace | 207 | in any ebtables table. The packet is passed to a userspace |
204 | logging daemon using netlink multicast sockets. This differs | 208 | logging daemon using netlink multicast sockets. This differs |
diff --git a/net/bridge/netfilter/ebt_log.c b/net/bridge/netfilter/ebt_log.c index 662975be3d1d..9f6e0193ae10 100644 --- a/net/bridge/netfilter/ebt_log.c +++ b/net/bridge/netfilter/ebt_log.c | |||
@@ -3,13 +3,16 @@ | |||
3 | * | 3 | * |
4 | * Authors: | 4 | * Authors: |
5 | * Bart De Schuymer <bdschuym@pandora.be> | 5 | * Bart De Schuymer <bdschuym@pandora.be> |
6 | * Harald Welte <laforge@netfilter.org> | ||
6 | * | 7 | * |
7 | * April, 2002 | 8 | * April, 2002 |
8 | * | 9 | * |
9 | */ | 10 | */ |
10 | 11 | ||
12 | #include <linux/in.h> | ||
11 | #include <linux/netfilter_bridge/ebtables.h> | 13 | #include <linux/netfilter_bridge/ebtables.h> |
12 | #include <linux/netfilter_bridge/ebt_log.h> | 14 | #include <linux/netfilter_bridge/ebt_log.h> |
15 | #include <linux/netfilter.h> | ||
13 | #include <linux/module.h> | 16 | #include <linux/module.h> |
14 | #include <linux/ip.h> | 17 | #include <linux/ip.h> |
15 | #include <linux/if_arp.h> | 18 | #include <linux/if_arp.h> |
@@ -55,27 +58,30 @@ static void print_MAC(unsigned char *p) | |||
55 | } | 58 | } |
56 | 59 | ||
57 | #define myNIPQUAD(a) a[0], a[1], a[2], a[3] | 60 | #define myNIPQUAD(a) a[0], a[1], a[2], a[3] |
58 | static void ebt_log(const struct sk_buff *skb, unsigned int hooknr, | 61 | static void |
59 | const struct net_device *in, const struct net_device *out, | 62 | ebt_log_packet(unsigned int pf, unsigned int hooknum, |
60 | const void *data, unsigned int datalen) | 63 | const struct sk_buff *skb, const struct net_device *in, |
64 | const struct net_device *out, const struct nf_loginfo *loginfo, | ||
65 | const char *prefix) | ||
61 | { | 66 | { |
62 | struct ebt_log_info *info = (struct ebt_log_info *)data; | 67 | unsigned int bitmask; |
63 | char level_string[4] = "< >"; | ||
64 | 68 | ||
65 | level_string[1] = '0' + info->loglevel; | ||
66 | spin_lock_bh(&ebt_log_lock); | 69 | spin_lock_bh(&ebt_log_lock); |
67 | printk(level_string); | 70 | printk("<%c>%s IN=%s OUT=%s MAC source = ", '0' + loginfo->u.log.level, |
68 | printk("%s IN=%s OUT=%s ", info->prefix, in ? in->name : "", | 71 | prefix, in ? in->name : "", out ? out->name : ""); |
69 | out ? out->name : ""); | ||
70 | 72 | ||
71 | printk("MAC source = "); | ||
72 | print_MAC(eth_hdr(skb)->h_source); | 73 | print_MAC(eth_hdr(skb)->h_source); |
73 | printk("MAC dest = "); | 74 | printk("MAC dest = "); |
74 | print_MAC(eth_hdr(skb)->h_dest); | 75 | print_MAC(eth_hdr(skb)->h_dest); |
75 | 76 | ||
76 | printk("proto = 0x%04x", ntohs(eth_hdr(skb)->h_proto)); | 77 | printk("proto = 0x%04x", ntohs(eth_hdr(skb)->h_proto)); |
77 | 78 | ||
78 | if ((info->bitmask & EBT_LOG_IP) && eth_hdr(skb)->h_proto == | 79 | if (loginfo->type == NF_LOG_TYPE_LOG) |
80 | bitmask = loginfo->u.log.logflags; | ||
81 | else | ||
82 | bitmask = NF_LOG_MASK; | ||
83 | |||
84 | if ((bitmask & EBT_LOG_IP) && eth_hdr(skb)->h_proto == | ||
79 | htons(ETH_P_IP)){ | 85 | htons(ETH_P_IP)){ |
80 | struct iphdr _iph, *ih; | 86 | struct iphdr _iph, *ih; |
81 | 87 | ||
@@ -84,10 +90,9 @@ static void ebt_log(const struct sk_buff *skb, unsigned int hooknr, | |||
84 | printk(" INCOMPLETE IP header"); | 90 | printk(" INCOMPLETE IP header"); |
85 | goto out; | 91 | goto out; |
86 | } | 92 | } |
87 | printk(" IP SRC=%u.%u.%u.%u IP DST=%u.%u.%u.%u,", | 93 | printk(" IP SRC=%u.%u.%u.%u IP DST=%u.%u.%u.%u, IP " |
88 | NIPQUAD(ih->saddr), NIPQUAD(ih->daddr)); | 94 | "tos=0x%02X, IP proto=%d", NIPQUAD(ih->saddr), |
89 | printk(" IP tos=0x%02X, IP proto=%d", ih->tos, | 95 | NIPQUAD(ih->daddr), ih->tos, ih->protocol); |
90 | ih->protocol); | ||
91 | if (ih->protocol == IPPROTO_TCP || | 96 | if (ih->protocol == IPPROTO_TCP || |
92 | ih->protocol == IPPROTO_UDP) { | 97 | ih->protocol == IPPROTO_UDP) { |
93 | struct tcpudphdr _ports, *pptr; | 98 | struct tcpudphdr _ports, *pptr; |
@@ -104,7 +109,7 @@ static void ebt_log(const struct sk_buff *skb, unsigned int hooknr, | |||
104 | goto out; | 109 | goto out; |
105 | } | 110 | } |
106 | 111 | ||
107 | if ((info->bitmask & EBT_LOG_ARP) && | 112 | if ((bitmask & EBT_LOG_ARP) && |
108 | ((eth_hdr(skb)->h_proto == htons(ETH_P_ARP)) || | 113 | ((eth_hdr(skb)->h_proto == htons(ETH_P_ARP)) || |
109 | (eth_hdr(skb)->h_proto == htons(ETH_P_RARP)))) { | 114 | (eth_hdr(skb)->h_proto == htons(ETH_P_RARP)))) { |
110 | struct arphdr _arph, *ah; | 115 | struct arphdr _arph, *ah; |
@@ -144,6 +149,21 @@ static void ebt_log(const struct sk_buff *skb, unsigned int hooknr, | |||
144 | out: | 149 | out: |
145 | printk("\n"); | 150 | printk("\n"); |
146 | spin_unlock_bh(&ebt_log_lock); | 151 | spin_unlock_bh(&ebt_log_lock); |
152 | |||
153 | } | ||
154 | |||
155 | static void ebt_log(const struct sk_buff *skb, unsigned int hooknr, | ||
156 | const struct net_device *in, const struct net_device *out, | ||
157 | const void *data, unsigned int datalen) | ||
158 | { | ||
159 | struct ebt_log_info *info = (struct ebt_log_info *)data; | ||
160 | struct nf_loginfo li; | ||
161 | |||
162 | li.type = NF_LOG_TYPE_LOG; | ||
163 | li.u.log.level = info->loglevel; | ||
164 | li.u.log.logflags = info->bitmask; | ||
165 | |||
166 | nf_log_packet(PF_BRIDGE, hooknr, skb, in, out, &li, info->prefix); | ||
147 | } | 167 | } |
148 | 168 | ||
149 | static struct ebt_watcher log = | 169 | static struct ebt_watcher log = |
@@ -154,13 +174,32 @@ static struct ebt_watcher log = | |||
154 | .me = THIS_MODULE, | 174 | .me = THIS_MODULE, |
155 | }; | 175 | }; |
156 | 176 | ||
177 | static struct nf_logger ebt_log_logger = { | ||
178 | .name = "ebt_log", | ||
179 | .logfn = &ebt_log_packet, | ||
180 | .me = THIS_MODULE, | ||
181 | }; | ||
182 | |||
157 | static int __init init(void) | 183 | static int __init init(void) |
158 | { | 184 | { |
159 | return ebt_register_watcher(&log); | 185 | int ret; |
186 | |||
187 | ret = ebt_register_watcher(&log); | ||
188 | if (ret < 0) | ||
189 | return ret; | ||
190 | if (nf_log_register(PF_BRIDGE, &ebt_log_logger) < 0) { | ||
191 | printk(KERN_WARNING "ebt_log: not logging via system console " | ||
192 | "since somebody else already registered for PF_INET\n"); | ||
193 | /* we cannot make module load fail here, since otherwise | ||
194 | * ebtables userspace would abort */ | ||
195 | } | ||
196 | |||
197 | return 0; | ||
160 | } | 198 | } |
161 | 199 | ||
162 | static void __exit fini(void) | 200 | static void __exit fini(void) |
163 | { | 201 | { |
202 | nf_log_unregister_logger(&ebt_log_logger); | ||
164 | ebt_unregister_watcher(&log); | 203 | ebt_unregister_watcher(&log); |
165 | } | 204 | } |
166 | 205 | ||
diff --git a/net/bridge/netfilter/ebt_ulog.c b/net/bridge/netfilter/ebt_ulog.c index aae26ae2e61f..ce617b3dbbb8 100644 --- a/net/bridge/netfilter/ebt_ulog.c +++ b/net/bridge/netfilter/ebt_ulog.c | |||
@@ -3,6 +3,7 @@ | |||
3 | * | 3 | * |
4 | * Authors: | 4 | * Authors: |
5 | * Bart De Schuymer <bdschuym@pandora.be> | 5 | * Bart De Schuymer <bdschuym@pandora.be> |
6 | * Harald Welte <laforge@netfilter.org> | ||
6 | * | 7 | * |
7 | * November, 2004 | 8 | * November, 2004 |
8 | * | 9 | * |
@@ -115,14 +116,13 @@ static struct sk_buff *ulog_alloc_skb(unsigned int size) | |||
115 | return skb; | 116 | return skb; |
116 | } | 117 | } |
117 | 118 | ||
118 | static void ebt_ulog(const struct sk_buff *skb, unsigned int hooknr, | 119 | static void ebt_ulog_packet(unsigned int hooknr, const struct sk_buff *skb, |
119 | const struct net_device *in, const struct net_device *out, | 120 | const struct net_device *in, const struct net_device *out, |
120 | const void *data, unsigned int datalen) | 121 | const struct ebt_ulog_info *uloginfo, const char *prefix) |
121 | { | 122 | { |
122 | ebt_ulog_packet_msg_t *pm; | 123 | ebt_ulog_packet_msg_t *pm; |
123 | size_t size, copy_len; | 124 | size_t size, copy_len; |
124 | struct nlmsghdr *nlh; | 125 | struct nlmsghdr *nlh; |
125 | struct ebt_ulog_info *uloginfo = (struct ebt_ulog_info *)data; | ||
126 | unsigned int group = uloginfo->nlgroup; | 126 | unsigned int group = uloginfo->nlgroup; |
127 | ebt_ulog_buff_t *ub = &ulog_buffers[group]; | 127 | ebt_ulog_buff_t *ub = &ulog_buffers[group]; |
128 | spinlock_t *lock = &ub->lock; | 128 | spinlock_t *lock = &ub->lock; |
@@ -216,6 +216,39 @@ alloc_failure: | |||
216 | goto unlock; | 216 | goto unlock; |
217 | } | 217 | } |
218 | 218 | ||
219 | /* this function is registered with the netfilter core */ | ||
220 | static void ebt_log_packet(unsigned int pf, unsigned int hooknum, | ||
221 | const struct sk_buff *skb, const struct net_device *in, | ||
222 | const struct net_device *out, const struct nf_loginfo *li, | ||
223 | const char *prefix) | ||
224 | { | ||
225 | struct ebt_ulog_info loginfo; | ||
226 | |||
227 | if (!li || li->type != NF_LOG_TYPE_ULOG) { | ||
228 | loginfo.nlgroup = EBT_ULOG_DEFAULT_NLGROUP; | ||
229 | loginfo.cprange = 0; | ||
230 | loginfo.qthreshold = EBT_ULOG_DEFAULT_QTHRESHOLD; | ||
231 | loginfo.prefix[0] = '\0'; | ||
232 | } else { | ||
233 | loginfo.nlgroup = li->u.ulog.group; | ||
234 | loginfo.cprange = li->u.ulog.copy_len; | ||
235 | loginfo.qthreshold = li->u.ulog.qthreshold; | ||
236 | strlcpy(loginfo.prefix, prefix, sizeof(loginfo.prefix)); | ||
237 | } | ||
238 | |||
239 | ebt_ulog_packet(hooknum, skb, in, out, &loginfo, prefix); | ||
240 | } | ||
241 | |||
242 | static void ebt_ulog(const struct sk_buff *skb, unsigned int hooknr, | ||
243 | const struct net_device *in, const struct net_device *out, | ||
244 | const void *data, unsigned int datalen) | ||
245 | { | ||
246 | struct ebt_ulog_info *uloginfo = (struct ebt_ulog_info *)data; | ||
247 | |||
248 | ebt_ulog_packet(hooknr, skb, in, out, uloginfo, NULL); | ||
249 | } | ||
250 | |||
251 | |||
219 | static int ebt_ulog_check(const char *tablename, unsigned int hookmask, | 252 | static int ebt_ulog_check(const char *tablename, unsigned int hookmask, |
220 | const struct ebt_entry *e, void *data, unsigned int datalen) | 253 | const struct ebt_entry *e, void *data, unsigned int datalen) |
221 | { | 254 | { |
@@ -240,6 +273,12 @@ static struct ebt_watcher ulog = { | |||
240 | .me = THIS_MODULE, | 273 | .me = THIS_MODULE, |
241 | }; | 274 | }; |
242 | 275 | ||
276 | static struct nf_logger ebt_ulog_logger = { | ||
277 | .name = EBT_ULOG_WATCHER, | ||
278 | .logfn = &ebt_log_packet, | ||
279 | .me = THIS_MODULE, | ||
280 | }; | ||
281 | |||
243 | static int __init init(void) | 282 | static int __init init(void) |
244 | { | 283 | { |
245 | int i, ret = 0; | 284 | int i, ret = 0; |
@@ -265,6 +304,13 @@ static int __init init(void) | |||
265 | else if ((ret = ebt_register_watcher(&ulog))) | 304 | else if ((ret = ebt_register_watcher(&ulog))) |
266 | sock_release(ebtulognl->sk_socket); | 305 | sock_release(ebtulognl->sk_socket); |
267 | 306 | ||
307 | if (nf_log_register(PF_BRIDGE, &ebt_ulog_logger) < 0) { | ||
308 | printk(KERN_WARNING "ebt_ulog: not logging via ulog " | ||
309 | "since somebody else already registered for PF_BRIDGE\n"); | ||
310 | /* we cannot make module load fail here, since otherwise | ||
311 | * ebtables userspace would abort */ | ||
312 | } | ||
313 | |||
268 | return ret; | 314 | return ret; |
269 | } | 315 | } |
270 | 316 | ||
@@ -273,6 +319,7 @@ static void __exit fini(void) | |||
273 | ebt_ulog_buff_t *ub; | 319 | ebt_ulog_buff_t *ub; |
274 | int i; | 320 | int i; |
275 | 321 | ||
322 | nf_log_unregister_logger(&ebt_ulog_logger); | ||
276 | ebt_unregister_watcher(&ulog); | 323 | ebt_unregister_watcher(&ulog); |
277 | for (i = 0; i < EBT_ULOG_MAXNLGROUPS; i++) { | 324 | for (i = 0; i < EBT_ULOG_MAXNLGROUPS; i++) { |
278 | ub = &ulog_buffers[i]; | 325 | ub = &ulog_buffers[i]; |
diff --git a/net/core/datagram.c b/net/core/datagram.c index 1bcfef51ac58..f8d322e1ea92 100644 --- a/net/core/datagram.c +++ b/net/core/datagram.c | |||
@@ -47,6 +47,7 @@ | |||
47 | #include <linux/rtnetlink.h> | 47 | #include <linux/rtnetlink.h> |
48 | #include <linux/poll.h> | 48 | #include <linux/poll.h> |
49 | #include <linux/highmem.h> | 49 | #include <linux/highmem.h> |
50 | #include <linux/spinlock.h> | ||
50 | 51 | ||
51 | #include <net/protocol.h> | 52 | #include <net/protocol.h> |
52 | #include <linux/skbuff.h> | 53 | #include <linux/skbuff.h> |
@@ -200,6 +201,41 @@ void skb_free_datagram(struct sock *sk, struct sk_buff *skb) | |||
200 | } | 201 | } |
201 | 202 | ||
202 | /** | 203 | /** |
204 | * skb_kill_datagram - Free a datagram skbuff forcibly | ||
205 | * @sk: socket | ||
206 | * @skb: datagram skbuff | ||
207 | * @flags: MSG_ flags | ||
208 | * | ||
209 | * This function frees a datagram skbuff that was received by | ||
210 | * skb_recv_datagram. The flags argument must match the one | ||
211 | * used for skb_recv_datagram. | ||
212 | * | ||
213 | * If the MSG_PEEK flag is set, and the packet is still on the | ||
214 | * receive queue of the socket, it will be taken off the queue | ||
215 | * before it is freed. | ||
216 | * | ||
217 | * This function currently only disables BH when acquiring the | ||
218 | * sk_receive_queue lock. Therefore it must not be used in a | ||
219 | * context where that lock is acquired in an IRQ context. | ||
220 | */ | ||
221 | |||
222 | void skb_kill_datagram(struct sock *sk, struct sk_buff *skb, unsigned int flags) | ||
223 | { | ||
224 | if (flags & MSG_PEEK) { | ||
225 | spin_lock_bh(&sk->sk_receive_queue.lock); | ||
226 | if (skb == skb_peek(&sk->sk_receive_queue)) { | ||
227 | __skb_unlink(skb, &sk->sk_receive_queue); | ||
228 | atomic_dec(&skb->users); | ||
229 | } | ||
230 | spin_unlock_bh(&sk->sk_receive_queue.lock); | ||
231 | } | ||
232 | |||
233 | kfree_skb(skb); | ||
234 | } | ||
235 | |||
236 | EXPORT_SYMBOL(skb_kill_datagram); | ||
237 | |||
238 | /** | ||
203 | * skb_copy_datagram_iovec - Copy a datagram to an iovec. | 239 | * skb_copy_datagram_iovec - Copy a datagram to an iovec. |
204 | * @skb: buffer to copy | 240 | * @skb: buffer to copy |
205 | * @offset: offset in the buffer to start copying from | 241 | * @offset: offset in the buffer to start copying from |
diff --git a/net/core/dev.c b/net/core/dev.c index a5efc9ae010b..5081287923d5 100644 --- a/net/core/dev.c +++ b/net/core/dev.c | |||
@@ -626,7 +626,7 @@ struct net_device * dev_get_by_flags(unsigned short if_flags, unsigned short mas | |||
626 | * Network device names need to be valid file names to | 626 | * Network device names need to be valid file names to |
627 | * to allow sysfs to work | 627 | * to allow sysfs to work |
628 | */ | 628 | */ |
629 | static int dev_valid_name(const char *name) | 629 | int dev_valid_name(const char *name) |
630 | { | 630 | { |
631 | return !(*name == '\0' | 631 | return !(*name == '\0' |
632 | || !strcmp(name, ".") | 632 | || !strcmp(name, ".") |
@@ -3270,13 +3270,13 @@ EXPORT_SYMBOL(__dev_get_by_index); | |||
3270 | EXPORT_SYMBOL(__dev_get_by_name); | 3270 | EXPORT_SYMBOL(__dev_get_by_name); |
3271 | EXPORT_SYMBOL(__dev_remove_pack); | 3271 | EXPORT_SYMBOL(__dev_remove_pack); |
3272 | EXPORT_SYMBOL(__skb_linearize); | 3272 | EXPORT_SYMBOL(__skb_linearize); |
3273 | EXPORT_SYMBOL(dev_valid_name); | ||
3273 | EXPORT_SYMBOL(dev_add_pack); | 3274 | EXPORT_SYMBOL(dev_add_pack); |
3274 | EXPORT_SYMBOL(dev_alloc_name); | 3275 | EXPORT_SYMBOL(dev_alloc_name); |
3275 | EXPORT_SYMBOL(dev_close); | 3276 | EXPORT_SYMBOL(dev_close); |
3276 | EXPORT_SYMBOL(dev_get_by_flags); | 3277 | EXPORT_SYMBOL(dev_get_by_flags); |
3277 | EXPORT_SYMBOL(dev_get_by_index); | 3278 | EXPORT_SYMBOL(dev_get_by_index); |
3278 | EXPORT_SYMBOL(dev_get_by_name); | 3279 | EXPORT_SYMBOL(dev_get_by_name); |
3279 | EXPORT_SYMBOL(dev_ioctl); | ||
3280 | EXPORT_SYMBOL(dev_open); | 3280 | EXPORT_SYMBOL(dev_open); |
3281 | EXPORT_SYMBOL(dev_queue_xmit); | 3281 | EXPORT_SYMBOL(dev_queue_xmit); |
3282 | EXPORT_SYMBOL(dev_remove_pack); | 3282 | EXPORT_SYMBOL(dev_remove_pack); |
diff --git a/net/core/filter.c b/net/core/filter.c index 3a10e0bc90e8..8964d3445588 100644 --- a/net/core/filter.c +++ b/net/core/filter.c | |||
@@ -13,6 +13,7 @@ | |||
13 | * 2 of the License, or (at your option) any later version. | 13 | * 2 of the License, or (at your option) any later version. |
14 | * | 14 | * |
15 | * Andi Kleen - Fix a few bad bugs and races. | 15 | * Andi Kleen - Fix a few bad bugs and races. |
16 | * Kris Katterjohn - Added many additional checks in sk_chk_filter() | ||
16 | */ | 17 | */ |
17 | 18 | ||
18 | #include <linux/module.h> | 19 | #include <linux/module.h> |
@@ -250,7 +251,7 @@ load_b: | |||
250 | mem[fentry->k] = X; | 251 | mem[fentry->k] = X; |
251 | continue; | 252 | continue; |
252 | default: | 253 | default: |
253 | /* Invalid instruction counts as RET */ | 254 | WARN_ON(1); |
254 | return 0; | 255 | return 0; |
255 | } | 256 | } |
256 | 257 | ||
@@ -283,8 +284,8 @@ load_b: | |||
283 | * | 284 | * |
284 | * Check the user's filter code. If we let some ugly | 285 | * Check the user's filter code. If we let some ugly |
285 | * filter code slip through kaboom! The filter must contain | 286 | * filter code slip through kaboom! The filter must contain |
286 | * no references or jumps that are out of range, no illegal instructions | 287 | * no references or jumps that are out of range, no illegal |
287 | * and no backward jumps. It must end with a RET instruction | 288 | * instructions, and must end with a RET instruction. |
288 | * | 289 | * |
289 | * Returns 0 if the rule set is legal or a negative errno code if not. | 290 | * Returns 0 if the rule set is legal or a negative errno code if not. |
290 | */ | 291 | */ |
@@ -300,38 +301,85 @@ int sk_chk_filter(struct sock_filter *filter, int flen) | |||
300 | for (pc = 0; pc < flen; pc++) { | 301 | for (pc = 0; pc < flen; pc++) { |
301 | /* all jumps are forward as they are not signed */ | 302 | /* all jumps are forward as they are not signed */ |
302 | ftest = &filter[pc]; | 303 | ftest = &filter[pc]; |
303 | if (BPF_CLASS(ftest->code) == BPF_JMP) { | ||
304 | /* but they mustn't jump off the end */ | ||
305 | if (BPF_OP(ftest->code) == BPF_JA) { | ||
306 | /* | ||
307 | * Note, the large ftest->k might cause loops. | ||
308 | * Compare this with conditional jumps below, | ||
309 | * where offsets are limited. --ANK (981016) | ||
310 | */ | ||
311 | if (ftest->k >= (unsigned)(flen-pc-1)) | ||
312 | return -EINVAL; | ||
313 | } else { | ||
314 | /* for conditionals both must be safe */ | ||
315 | if (pc + ftest->jt +1 >= flen || | ||
316 | pc + ftest->jf +1 >= flen) | ||
317 | return -EINVAL; | ||
318 | } | ||
319 | } | ||
320 | 304 | ||
321 | /* check for division by zero -Kris Katterjohn 2005-10-30 */ | 305 | /* Only allow valid instructions */ |
322 | if (ftest->code == (BPF_ALU|BPF_DIV|BPF_K) && ftest->k == 0) | 306 | switch (ftest->code) { |
323 | return -EINVAL; | 307 | case BPF_ALU|BPF_ADD|BPF_K: |
308 | case BPF_ALU|BPF_ADD|BPF_X: | ||
309 | case BPF_ALU|BPF_SUB|BPF_K: | ||
310 | case BPF_ALU|BPF_SUB|BPF_X: | ||
311 | case BPF_ALU|BPF_MUL|BPF_K: | ||
312 | case BPF_ALU|BPF_MUL|BPF_X: | ||
313 | case BPF_ALU|BPF_DIV|BPF_X: | ||
314 | case BPF_ALU|BPF_AND|BPF_K: | ||
315 | case BPF_ALU|BPF_AND|BPF_X: | ||
316 | case BPF_ALU|BPF_OR|BPF_K: | ||
317 | case BPF_ALU|BPF_OR|BPF_X: | ||
318 | case BPF_ALU|BPF_LSH|BPF_K: | ||
319 | case BPF_ALU|BPF_LSH|BPF_X: | ||
320 | case BPF_ALU|BPF_RSH|BPF_K: | ||
321 | case BPF_ALU|BPF_RSH|BPF_X: | ||
322 | case BPF_ALU|BPF_NEG: | ||
323 | case BPF_LD|BPF_W|BPF_ABS: | ||
324 | case BPF_LD|BPF_H|BPF_ABS: | ||
325 | case BPF_LD|BPF_B|BPF_ABS: | ||
326 | case BPF_LD|BPF_W|BPF_LEN: | ||
327 | case BPF_LD|BPF_W|BPF_IND: | ||
328 | case BPF_LD|BPF_H|BPF_IND: | ||
329 | case BPF_LD|BPF_B|BPF_IND: | ||
330 | case BPF_LD|BPF_IMM: | ||
331 | case BPF_LDX|BPF_W|BPF_LEN: | ||
332 | case BPF_LDX|BPF_B|BPF_MSH: | ||
333 | case BPF_LDX|BPF_IMM: | ||
334 | case BPF_MISC|BPF_TAX: | ||
335 | case BPF_MISC|BPF_TXA: | ||
336 | case BPF_RET|BPF_K: | ||
337 | case BPF_RET|BPF_A: | ||
338 | break; | ||
339 | |||
340 | /* Some instructions need special checks */ | ||
324 | 341 | ||
325 | /* check that memory operations use valid addresses. */ | 342 | case BPF_ALU|BPF_DIV|BPF_K: |
326 | if (ftest->k >= BPF_MEMWORDS) { | 343 | /* check for division by zero */ |
327 | /* but it might not be a memory operation... */ | 344 | if (ftest->k == 0) |
328 | switch (ftest->code) { | ||
329 | case BPF_ST: | ||
330 | case BPF_STX: | ||
331 | case BPF_LD|BPF_MEM: | ||
332 | case BPF_LDX|BPF_MEM: | ||
333 | return -EINVAL; | 345 | return -EINVAL; |
334 | } | 346 | break; |
347 | |||
348 | case BPF_LD|BPF_MEM: | ||
349 | case BPF_LDX|BPF_MEM: | ||
350 | case BPF_ST: | ||
351 | case BPF_STX: | ||
352 | /* check for invalid memory addresses */ | ||
353 | if (ftest->k >= BPF_MEMWORDS) | ||
354 | return -EINVAL; | ||
355 | break; | ||
356 | |||
357 | case BPF_JMP|BPF_JA: | ||
358 | /* | ||
359 | * Note, the large ftest->k might cause loops. | ||
360 | * Compare this with conditional jumps below, | ||
361 | * where offsets are limited. --ANK (981016) | ||
362 | */ | ||
363 | if (ftest->k >= (unsigned)(flen-pc-1)) | ||
364 | return -EINVAL; | ||
365 | break; | ||
366 | |||
367 | case BPF_JMP|BPF_JEQ|BPF_K: | ||
368 | case BPF_JMP|BPF_JEQ|BPF_X: | ||
369 | case BPF_JMP|BPF_JGE|BPF_K: | ||
370 | case BPF_JMP|BPF_JGE|BPF_X: | ||
371 | case BPF_JMP|BPF_JGT|BPF_K: | ||
372 | case BPF_JMP|BPF_JGT|BPF_X: | ||
373 | case BPF_JMP|BPF_JSET|BPF_K: | ||
374 | case BPF_JMP|BPF_JSET|BPF_X: | ||
375 | /* for conditionals both must be safe */ | ||
376 | if (pc + ftest->jt + 1 >= flen || | ||
377 | pc + ftest->jf + 1 >= flen) | ||
378 | return -EINVAL; | ||
379 | break; | ||
380 | |||
381 | default: | ||
382 | return -EINVAL; | ||
335 | } | 383 | } |
336 | } | 384 | } |
337 | 385 | ||
diff --git a/net/core/flow.c b/net/core/flow.c index 7e95b39de9fd..c4f25385029f 100644 --- a/net/core/flow.c +++ b/net/core/flow.c | |||
@@ -23,6 +23,7 @@ | |||
23 | #include <net/flow.h> | 23 | #include <net/flow.h> |
24 | #include <asm/atomic.h> | 24 | #include <asm/atomic.h> |
25 | #include <asm/semaphore.h> | 25 | #include <asm/semaphore.h> |
26 | #include <linux/security.h> | ||
26 | 27 | ||
27 | struct flow_cache_entry { | 28 | struct flow_cache_entry { |
28 | struct flow_cache_entry *next; | 29 | struct flow_cache_entry *next; |
@@ -30,6 +31,7 @@ struct flow_cache_entry { | |||
30 | u8 dir; | 31 | u8 dir; |
31 | struct flowi key; | 32 | struct flowi key; |
32 | u32 genid; | 33 | u32 genid; |
34 | u32 sk_sid; | ||
33 | void *object; | 35 | void *object; |
34 | atomic_t *object_ref; | 36 | atomic_t *object_ref; |
35 | }; | 37 | }; |
@@ -162,7 +164,7 @@ static int flow_key_compare(struct flowi *key1, struct flowi *key2) | |||
162 | return 0; | 164 | return 0; |
163 | } | 165 | } |
164 | 166 | ||
165 | void *flow_cache_lookup(struct flowi *key, u16 family, u8 dir, | 167 | void *flow_cache_lookup(struct flowi *key, u32 sk_sid, u16 family, u8 dir, |
166 | flow_resolve_t resolver) | 168 | flow_resolve_t resolver) |
167 | { | 169 | { |
168 | struct flow_cache_entry *fle, **head; | 170 | struct flow_cache_entry *fle, **head; |
@@ -186,6 +188,7 @@ void *flow_cache_lookup(struct flowi *key, u16 family, u8 dir, | |||
186 | for (fle = *head; fle; fle = fle->next) { | 188 | for (fle = *head; fle; fle = fle->next) { |
187 | if (fle->family == family && | 189 | if (fle->family == family && |
188 | fle->dir == dir && | 190 | fle->dir == dir && |
191 | fle->sk_sid == sk_sid && | ||
189 | flow_key_compare(key, &fle->key) == 0) { | 192 | flow_key_compare(key, &fle->key) == 0) { |
190 | if (fle->genid == atomic_read(&flow_cache_genid)) { | 193 | if (fle->genid == atomic_read(&flow_cache_genid)) { |
191 | void *ret = fle->object; | 194 | void *ret = fle->object; |
@@ -210,6 +213,7 @@ void *flow_cache_lookup(struct flowi *key, u16 family, u8 dir, | |||
210 | *head = fle; | 213 | *head = fle; |
211 | fle->family = family; | 214 | fle->family = family; |
212 | fle->dir = dir; | 215 | fle->dir = dir; |
216 | fle->sk_sid = sk_sid; | ||
213 | memcpy(&fle->key, key, sizeof(*key)); | 217 | memcpy(&fle->key, key, sizeof(*key)); |
214 | fle->object = NULL; | 218 | fle->object = NULL; |
215 | flow_count(cpu)++; | 219 | flow_count(cpu)++; |
@@ -221,7 +225,7 @@ nocache: | |||
221 | void *obj; | 225 | void *obj; |
222 | atomic_t *obj_ref; | 226 | atomic_t *obj_ref; |
223 | 227 | ||
224 | resolver(key, family, dir, &obj, &obj_ref); | 228 | resolver(key, sk_sid, family, dir, &obj, &obj_ref); |
225 | 229 | ||
226 | if (fle) { | 230 | if (fle) { |
227 | fle->genid = atomic_read(&flow_cache_genid); | 231 | fle->genid = atomic_read(&flow_cache_genid); |
diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c index e2137f3e489d..e1da81d261d1 100644 --- a/net/core/net-sysfs.c +++ b/net/core/net-sysfs.c | |||
@@ -84,16 +84,11 @@ static ssize_t netdev_store(struct class_device *dev, | |||
84 | return ret; | 84 | return ret; |
85 | } | 85 | } |
86 | 86 | ||
87 | /* generate a read-only network device class attribute */ | 87 | NETDEVICE_SHOW(addr_len, fmt_dec); |
88 | #define NETDEVICE_ATTR(field, format_string) \ | 88 | NETDEVICE_SHOW(iflink, fmt_dec); |
89 | NETDEVICE_SHOW(field, format_string) \ | 89 | NETDEVICE_SHOW(ifindex, fmt_dec); |
90 | static CLASS_DEVICE_ATTR(field, S_IRUGO, show_##field, NULL) \ | 90 | NETDEVICE_SHOW(features, fmt_long_hex); |
91 | 91 | NETDEVICE_SHOW(type, fmt_dec); | |
92 | NETDEVICE_ATTR(addr_len, fmt_dec); | ||
93 | NETDEVICE_ATTR(iflink, fmt_dec); | ||
94 | NETDEVICE_ATTR(ifindex, fmt_dec); | ||
95 | NETDEVICE_ATTR(features, fmt_long_hex); | ||
96 | NETDEVICE_ATTR(type, fmt_dec); | ||
97 | 92 | ||
98 | /* use same locking rules as GIFHWADDR ioctl's */ | 93 | /* use same locking rules as GIFHWADDR ioctl's */ |
99 | static ssize_t format_addr(char *buf, const unsigned char *addr, int len) | 94 | static ssize_t format_addr(char *buf, const unsigned char *addr, int len) |
@@ -136,10 +131,6 @@ static ssize_t show_carrier(struct class_device *dev, char *buf) | |||
136 | return -EINVAL; | 131 | return -EINVAL; |
137 | } | 132 | } |
138 | 133 | ||
139 | static CLASS_DEVICE_ATTR(address, S_IRUGO, show_address, NULL); | ||
140 | static CLASS_DEVICE_ATTR(broadcast, S_IRUGO, show_broadcast, NULL); | ||
141 | static CLASS_DEVICE_ATTR(carrier, S_IRUGO, show_carrier, NULL); | ||
142 | |||
143 | /* read-write attributes */ | 134 | /* read-write attributes */ |
144 | NETDEVICE_SHOW(mtu, fmt_dec); | 135 | NETDEVICE_SHOW(mtu, fmt_dec); |
145 | 136 | ||
@@ -153,8 +144,6 @@ static ssize_t store_mtu(struct class_device *dev, const char *buf, size_t len) | |||
153 | return netdev_store(dev, buf, len, change_mtu); | 144 | return netdev_store(dev, buf, len, change_mtu); |
154 | } | 145 | } |
155 | 146 | ||
156 | static CLASS_DEVICE_ATTR(mtu, S_IRUGO | S_IWUSR, show_mtu, store_mtu); | ||
157 | |||
158 | NETDEVICE_SHOW(flags, fmt_hex); | 147 | NETDEVICE_SHOW(flags, fmt_hex); |
159 | 148 | ||
160 | static int change_flags(struct net_device *net, unsigned long new_flags) | 149 | static int change_flags(struct net_device *net, unsigned long new_flags) |
@@ -167,8 +156,6 @@ static ssize_t store_flags(struct class_device *dev, const char *buf, size_t len | |||
167 | return netdev_store(dev, buf, len, change_flags); | 156 | return netdev_store(dev, buf, len, change_flags); |
168 | } | 157 | } |
169 | 158 | ||
170 | static CLASS_DEVICE_ATTR(flags, S_IRUGO | S_IWUSR, show_flags, store_flags); | ||
171 | |||
172 | NETDEVICE_SHOW(tx_queue_len, fmt_ulong); | 159 | NETDEVICE_SHOW(tx_queue_len, fmt_ulong); |
173 | 160 | ||
174 | static int change_tx_queue_len(struct net_device *net, unsigned long new_len) | 161 | static int change_tx_queue_len(struct net_device *net, unsigned long new_len) |
@@ -182,9 +169,6 @@ static ssize_t store_tx_queue_len(struct class_device *dev, const char *buf, siz | |||
182 | return netdev_store(dev, buf, len, change_tx_queue_len); | 169 | return netdev_store(dev, buf, len, change_tx_queue_len); |
183 | } | 170 | } |
184 | 171 | ||
185 | static CLASS_DEVICE_ATTR(tx_queue_len, S_IRUGO | S_IWUSR, show_tx_queue_len, | ||
186 | store_tx_queue_len); | ||
187 | |||
188 | NETDEVICE_SHOW(weight, fmt_dec); | 172 | NETDEVICE_SHOW(weight, fmt_dec); |
189 | 173 | ||
190 | static int change_weight(struct net_device *net, unsigned long new_weight) | 174 | static int change_weight(struct net_device *net, unsigned long new_weight) |
@@ -198,24 +182,21 @@ static ssize_t store_weight(struct class_device *dev, const char *buf, size_t le | |||
198 | return netdev_store(dev, buf, len, change_weight); | 182 | return netdev_store(dev, buf, len, change_weight); |
199 | } | 183 | } |
200 | 184 | ||
201 | static CLASS_DEVICE_ATTR(weight, S_IRUGO | S_IWUSR, show_weight, | 185 | static struct class_device_attribute net_class_attributes[] = { |
202 | store_weight); | 186 | __ATTR(addr_len, S_IRUGO, show_addr_len, NULL), |
203 | 187 | __ATTR(iflink, S_IRUGO, show_iflink, NULL), | |
204 | 188 | __ATTR(ifindex, S_IRUGO, show_ifindex, NULL), | |
205 | static struct class_device_attribute *net_class_attributes[] = { | 189 | __ATTR(features, S_IRUGO, show_features, NULL), |
206 | &class_device_attr_ifindex, | 190 | __ATTR(type, S_IRUGO, show_type, NULL), |
207 | &class_device_attr_iflink, | 191 | __ATTR(address, S_IRUGO, show_address, NULL), |
208 | &class_device_attr_addr_len, | 192 | __ATTR(broadcast, S_IRUGO, show_broadcast, NULL), |
209 | &class_device_attr_tx_queue_len, | 193 | __ATTR(carrier, S_IRUGO, show_carrier, NULL), |
210 | &class_device_attr_features, | 194 | __ATTR(mtu, S_IRUGO | S_IWUSR, show_mtu, store_mtu), |
211 | &class_device_attr_mtu, | 195 | __ATTR(flags, S_IRUGO | S_IWUSR, show_flags, store_flags), |
212 | &class_device_attr_flags, | 196 | __ATTR(tx_queue_len, S_IRUGO | S_IWUSR, show_tx_queue_len, |
213 | &class_device_attr_weight, | 197 | store_tx_queue_len), |
214 | &class_device_attr_type, | 198 | __ATTR(weight, S_IRUGO | S_IWUSR, show_weight, store_weight), |
215 | &class_device_attr_address, | 199 | {} |
216 | &class_device_attr_broadcast, | ||
217 | &class_device_attr_carrier, | ||
218 | NULL | ||
219 | }; | 200 | }; |
220 | 201 | ||
221 | /* Show a given an attribute in the statistics group */ | 202 | /* Show a given an attribute in the statistics group */ |
@@ -369,14 +350,14 @@ static struct attribute_group wireless_group = { | |||
369 | #endif | 350 | #endif |
370 | 351 | ||
371 | #ifdef CONFIG_HOTPLUG | 352 | #ifdef CONFIG_HOTPLUG |
372 | static int netdev_hotplug(struct class_device *cd, char **envp, | 353 | static int netdev_uevent(struct class_device *cd, char **envp, |
373 | int num_envp, char *buf, int size) | 354 | int num_envp, char *buf, int size) |
374 | { | 355 | { |
375 | struct net_device *dev = to_net_dev(cd); | 356 | struct net_device *dev = to_net_dev(cd); |
376 | int i = 0; | 357 | int i = 0; |
377 | int n; | 358 | int n; |
378 | 359 | ||
379 | /* pass interface in env to hotplug. */ | 360 | /* pass interface to uevent. */ |
380 | envp[i++] = buf; | 361 | envp[i++] = buf; |
381 | n = snprintf(buf, size, "INTERFACE=%s", dev->name) + 1; | 362 | n = snprintf(buf, size, "INTERFACE=%s", dev->name) + 1; |
382 | buf += n; | 363 | buf += n; |
@@ -407,8 +388,9 @@ static void netdev_release(struct class_device *cd) | |||
407 | static struct class net_class = { | 388 | static struct class net_class = { |
408 | .name = "net", | 389 | .name = "net", |
409 | .release = netdev_release, | 390 | .release = netdev_release, |
391 | .class_dev_attrs = net_class_attributes, | ||
410 | #ifdef CONFIG_HOTPLUG | 392 | #ifdef CONFIG_HOTPLUG |
411 | .hotplug = netdev_hotplug, | 393 | .uevent = netdev_uevent, |
412 | #endif | 394 | #endif |
413 | }; | 395 | }; |
414 | 396 | ||
@@ -431,8 +413,6 @@ void netdev_unregister_sysfs(struct net_device * net) | |||
431 | int netdev_register_sysfs(struct net_device *net) | 413 | int netdev_register_sysfs(struct net_device *net) |
432 | { | 414 | { |
433 | struct class_device *class_dev = &(net->class_dev); | 415 | struct class_device *class_dev = &(net->class_dev); |
434 | int i; | ||
435 | struct class_device_attribute *attr; | ||
436 | int ret; | 416 | int ret; |
437 | 417 | ||
438 | class_dev->class = &net_class; | 418 | class_dev->class = &net_class; |
@@ -442,12 +422,6 @@ int netdev_register_sysfs(struct net_device *net) | |||
442 | if ((ret = class_device_register(class_dev))) | 422 | if ((ret = class_device_register(class_dev))) |
443 | goto out; | 423 | goto out; |
444 | 424 | ||
445 | for (i = 0; (attr = net_class_attributes[i]) != NULL; i++) { | ||
446 | if ((ret = class_device_create_file(class_dev, attr))) | ||
447 | goto out_unreg; | ||
448 | } | ||
449 | |||
450 | |||
451 | if (net->get_stats && | 425 | if (net->get_stats && |
452 | (ret = sysfs_create_group(&class_dev->kobj, &netstat_group))) | 426 | (ret = sysfs_create_group(&class_dev->kobj, &netstat_group))) |
453 | goto out_unreg; | 427 | goto out_unreg; |
diff --git a/net/core/netpoll.c b/net/core/netpoll.c index 49424a42a2c0..281a632fa6a6 100644 --- a/net/core/netpoll.c +++ b/net/core/netpoll.c | |||
@@ -13,6 +13,7 @@ | |||
13 | #include <linux/netdevice.h> | 13 | #include <linux/netdevice.h> |
14 | #include <linux/etherdevice.h> | 14 | #include <linux/etherdevice.h> |
15 | #include <linux/string.h> | 15 | #include <linux/string.h> |
16 | #include <linux/if_arp.h> | ||
16 | #include <linux/inetdevice.h> | 17 | #include <linux/inetdevice.h> |
17 | #include <linux/inet.h> | 18 | #include <linux/inet.h> |
18 | #include <linux/interrupt.h> | 19 | #include <linux/interrupt.h> |
diff --git a/net/core/pktgen.c b/net/core/pktgen.c index 7fc3e9e28c34..06cad2d63e8a 100644 --- a/net/core/pktgen.c +++ b/net/core/pktgen.c | |||
@@ -487,9 +487,9 @@ static unsigned int fmt_ip6(char *s,const char ip[16]); | |||
487 | 487 | ||
488 | /* Module parameters, defaults. */ | 488 | /* Module parameters, defaults. */ |
489 | static int pg_count_d = 1000; /* 1000 pkts by default */ | 489 | static int pg_count_d = 1000; /* 1000 pkts by default */ |
490 | static int pg_delay_d = 0; | 490 | static int pg_delay_d; |
491 | static int pg_clone_skb_d = 0; | 491 | static int pg_clone_skb_d; |
492 | static int debug = 0; | 492 | static int debug; |
493 | 493 | ||
494 | static DECLARE_MUTEX(pktgen_sem); | 494 | static DECLARE_MUTEX(pktgen_sem); |
495 | static struct pktgen_thread *pktgen_threads = NULL; | 495 | static struct pktgen_thread *pktgen_threads = NULL; |
diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 83fee37de38e..070f91cfde59 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c | |||
@@ -135,17 +135,13 @@ void skb_under_panic(struct sk_buff *skb, int sz, void *here) | |||
135 | struct sk_buff *__alloc_skb(unsigned int size, gfp_t gfp_mask, | 135 | struct sk_buff *__alloc_skb(unsigned int size, gfp_t gfp_mask, |
136 | int fclone) | 136 | int fclone) |
137 | { | 137 | { |
138 | struct skb_shared_info *shinfo; | ||
138 | struct sk_buff *skb; | 139 | struct sk_buff *skb; |
139 | u8 *data; | 140 | u8 *data; |
140 | 141 | ||
141 | /* Get the HEAD */ | 142 | /* Get the HEAD */ |
142 | if (fclone) | 143 | skb = kmem_cache_alloc(fclone ? skbuff_fclone_cache : skbuff_head_cache, |
143 | skb = kmem_cache_alloc(skbuff_fclone_cache, | 144 | gfp_mask & ~__GFP_DMA); |
144 | gfp_mask & ~__GFP_DMA); | ||
145 | else | ||
146 | skb = kmem_cache_alloc(skbuff_head_cache, | ||
147 | gfp_mask & ~__GFP_DMA); | ||
148 | |||
149 | if (!skb) | 145 | if (!skb) |
150 | goto out; | 146 | goto out; |
151 | 147 | ||
@@ -162,6 +158,16 @@ struct sk_buff *__alloc_skb(unsigned int size, gfp_t gfp_mask, | |||
162 | skb->data = data; | 158 | skb->data = data; |
163 | skb->tail = data; | 159 | skb->tail = data; |
164 | skb->end = data + size; | 160 | skb->end = data + size; |
161 | /* make sure we initialize shinfo sequentially */ | ||
162 | shinfo = skb_shinfo(skb); | ||
163 | atomic_set(&shinfo->dataref, 1); | ||
164 | shinfo->nr_frags = 0; | ||
165 | shinfo->tso_size = 0; | ||
166 | shinfo->tso_segs = 0; | ||
167 | shinfo->ufo_size = 0; | ||
168 | shinfo->ip6_frag_id = 0; | ||
169 | shinfo->frag_list = NULL; | ||
170 | |||
165 | if (fclone) { | 171 | if (fclone) { |
166 | struct sk_buff *child = skb + 1; | 172 | struct sk_buff *child = skb + 1; |
167 | atomic_t *fclone_ref = (atomic_t *) (child + 1); | 173 | atomic_t *fclone_ref = (atomic_t *) (child + 1); |
@@ -171,13 +177,6 @@ struct sk_buff *__alloc_skb(unsigned int size, gfp_t gfp_mask, | |||
171 | 177 | ||
172 | child->fclone = SKB_FCLONE_UNAVAILABLE; | 178 | child->fclone = SKB_FCLONE_UNAVAILABLE; |
173 | } | 179 | } |
174 | atomic_set(&(skb_shinfo(skb)->dataref), 1); | ||
175 | skb_shinfo(skb)->nr_frags = 0; | ||
176 | skb_shinfo(skb)->tso_size = 0; | ||
177 | skb_shinfo(skb)->tso_segs = 0; | ||
178 | skb_shinfo(skb)->frag_list = NULL; | ||
179 | skb_shinfo(skb)->ufo_size = 0; | ||
180 | skb_shinfo(skb)->ip6_frag_id = 0; | ||
181 | out: | 180 | out: |
182 | return skb; | 181 | return skb; |
183 | nodata: | 182 | nodata: |
diff --git a/net/core/sock.c b/net/core/sock.c index 13cc3be4f056..6465b0e4c8cb 100644 --- a/net/core/sock.c +++ b/net/core/sock.c | |||
@@ -1488,7 +1488,7 @@ int proto_register(struct proto *prot, int alloc_slab) | |||
1488 | } | 1488 | } |
1489 | } | 1489 | } |
1490 | 1490 | ||
1491 | if (prot->twsk_obj_size) { | 1491 | if (prot->twsk_prot != NULL) { |
1492 | static const char mask[] = "tw_sock_%s"; | 1492 | static const char mask[] = "tw_sock_%s"; |
1493 | 1493 | ||
1494 | timewait_sock_slab_name = kmalloc(strlen(prot->name) + sizeof(mask) - 1, GFP_KERNEL); | 1494 | timewait_sock_slab_name = kmalloc(strlen(prot->name) + sizeof(mask) - 1, GFP_KERNEL); |
@@ -1497,11 +1497,12 @@ int proto_register(struct proto *prot, int alloc_slab) | |||
1497 | goto out_free_request_sock_slab; | 1497 | goto out_free_request_sock_slab; |
1498 | 1498 | ||
1499 | sprintf(timewait_sock_slab_name, mask, prot->name); | 1499 | sprintf(timewait_sock_slab_name, mask, prot->name); |
1500 | prot->twsk_slab = kmem_cache_create(timewait_sock_slab_name, | 1500 | prot->twsk_prot->twsk_slab = |
1501 | prot->twsk_obj_size, | 1501 | kmem_cache_create(timewait_sock_slab_name, |
1502 | 0, SLAB_HWCACHE_ALIGN, | 1502 | prot->twsk_prot->twsk_obj_size, |
1503 | NULL, NULL); | 1503 | 0, SLAB_HWCACHE_ALIGN, |
1504 | if (prot->twsk_slab == NULL) | 1504 | NULL, NULL); |
1505 | if (prot->twsk_prot->twsk_slab == NULL) | ||
1505 | goto out_free_timewait_sock_slab_name; | 1506 | goto out_free_timewait_sock_slab_name; |
1506 | } | 1507 | } |
1507 | } | 1508 | } |
@@ -1548,12 +1549,12 @@ void proto_unregister(struct proto *prot) | |||
1548 | prot->rsk_prot->slab = NULL; | 1549 | prot->rsk_prot->slab = NULL; |
1549 | } | 1550 | } |
1550 | 1551 | ||
1551 | if (prot->twsk_slab != NULL) { | 1552 | if (prot->twsk_prot != NULL && prot->twsk_prot->twsk_slab != NULL) { |
1552 | const char *name = kmem_cache_name(prot->twsk_slab); | 1553 | const char *name = kmem_cache_name(prot->twsk_prot->twsk_slab); |
1553 | 1554 | ||
1554 | kmem_cache_destroy(prot->twsk_slab); | 1555 | kmem_cache_destroy(prot->twsk_prot->twsk_slab); |
1555 | kfree(name); | 1556 | kfree(name); |
1556 | prot->twsk_slab = NULL; | 1557 | prot->twsk_prot->twsk_slab = NULL; |
1557 | } | 1558 | } |
1558 | } | 1559 | } |
1559 | 1560 | ||
diff --git a/net/core/stream.c b/net/core/stream.c index 15bfd03e8024..35e25259fd95 100644 --- a/net/core/stream.c +++ b/net/core/stream.c | |||
@@ -55,8 +55,9 @@ int sk_stream_wait_connect(struct sock *sk, long *timeo_p) | |||
55 | int done; | 55 | int done; |
56 | 56 | ||
57 | do { | 57 | do { |
58 | if (sk->sk_err) | 58 | int err = sock_error(sk); |
59 | return sock_error(sk); | 59 | if (err) |
60 | return err; | ||
60 | if ((1 << sk->sk_state) & ~(TCPF_SYN_SENT | TCPF_SYN_RECV)) | 61 | if ((1 << sk->sk_state) & ~(TCPF_SYN_SENT | TCPF_SYN_RECV)) |
61 | return -EPIPE; | 62 | return -EPIPE; |
62 | if (!*timeo_p) | 63 | if (!*timeo_p) |
@@ -67,6 +68,7 @@ int sk_stream_wait_connect(struct sock *sk, long *timeo_p) | |||
67 | prepare_to_wait(sk->sk_sleep, &wait, TASK_INTERRUPTIBLE); | 68 | prepare_to_wait(sk->sk_sleep, &wait, TASK_INTERRUPTIBLE); |
68 | sk->sk_write_pending++; | 69 | sk->sk_write_pending++; |
69 | done = sk_wait_event(sk, timeo_p, | 70 | done = sk_wait_event(sk, timeo_p, |
71 | !sk->sk_err && | ||
70 | !((1 << sk->sk_state) & | 72 | !((1 << sk->sk_state) & |
71 | ~(TCPF_ESTABLISHED | TCPF_CLOSE_WAIT))); | 73 | ~(TCPF_ESTABLISHED | TCPF_CLOSE_WAIT))); |
72 | finish_wait(sk->sk_sleep, &wait); | 74 | finish_wait(sk->sk_sleep, &wait); |
@@ -137,7 +139,9 @@ int sk_stream_wait_memory(struct sock *sk, long *timeo_p) | |||
137 | 139 | ||
138 | set_bit(SOCK_NOSPACE, &sk->sk_socket->flags); | 140 | set_bit(SOCK_NOSPACE, &sk->sk_socket->flags); |
139 | sk->sk_write_pending++; | 141 | sk->sk_write_pending++; |
140 | sk_wait_event(sk, ¤t_timeo, sk_stream_memory_free(sk) && | 142 | sk_wait_event(sk, ¤t_timeo, !sk->sk_err && |
143 | !(sk->sk_shutdown & SEND_SHUTDOWN) && | ||
144 | sk_stream_memory_free(sk) && | ||
141 | vm_wait); | 145 | vm_wait); |
142 | sk->sk_write_pending--; | 146 | sk->sk_write_pending--; |
143 | 147 | ||
diff --git a/net/core/utils.c b/net/core/utils.c index 7b5970fc9e40..587eb7787deb 100644 --- a/net/core/utils.c +++ b/net/core/utils.c | |||
@@ -175,7 +175,7 @@ __u32 in_aton(const char *str) | |||
175 | if (*str != '\0') | 175 | if (*str != '\0') |
176 | { | 176 | { |
177 | val = 0; | 177 | val = 0; |
178 | while (*str != '\0' && *str != '.') | 178 | while (*str != '\0' && *str != '.' && *str != '\n') |
179 | { | 179 | { |
180 | val *= 10; | 180 | val *= 10; |
181 | val += *str - '0'; | 181 | val += *str - '0'; |
diff --git a/net/dccp/Makefile b/net/dccp/Makefile index 344a8da153fc..87b27fff6e3b 100644 --- a/net/dccp/Makefile +++ b/net/dccp/Makefile | |||
@@ -1,3 +1,7 @@ | |||
1 | obj-$(CONFIG_IPV6) += dccp_ipv6.o | ||
2 | |||
3 | dccp_ipv6-y := ipv6.o | ||
4 | |||
1 | obj-$(CONFIG_IP_DCCP) += dccp.o | 5 | obj-$(CONFIG_IP_DCCP) += dccp.o |
2 | 6 | ||
3 | dccp-y := ccid.o input.o ipv4.o minisocks.o options.o output.o proto.o \ | 7 | dccp-y := ccid.o input.o ipv4.o minisocks.o options.o output.o proto.o \ |
diff --git a/net/dccp/ackvec.c b/net/dccp/ackvec.c index c9a62cca22fc..ce9cb77c5c29 100644 --- a/net/dccp/ackvec.c +++ b/net/dccp/ackvec.c | |||
@@ -55,8 +55,8 @@ int dccp_insert_option_ackvec(struct sock *sk, struct sk_buff *skb) | |||
55 | from = av->dccpav_buf + av->dccpav_buf_head; | 55 | from = av->dccpav_buf + av->dccpav_buf_head; |
56 | 56 | ||
57 | /* Check if buf_head wraps */ | 57 | /* Check if buf_head wraps */ |
58 | if (av->dccpav_buf_head + len > av->dccpav_vec_len) { | 58 | if ((int)av->dccpav_buf_head + len > av->dccpav_vec_len) { |
59 | const u32 tailsize = (av->dccpav_vec_len - av->dccpav_buf_head); | 59 | const u32 tailsize = av->dccpav_vec_len - av->dccpav_buf_head; |
60 | 60 | ||
61 | memcpy(to, from, tailsize); | 61 | memcpy(to, from, tailsize); |
62 | to += tailsize; | 62 | to += tailsize; |
@@ -93,8 +93,14 @@ int dccp_insert_option_ackvec(struct sock *sk, struct sk_buff *skb) | |||
93 | struct dccp_ackvec *dccp_ackvec_alloc(const unsigned int len, | 93 | struct dccp_ackvec *dccp_ackvec_alloc(const unsigned int len, |
94 | const gfp_t priority) | 94 | const gfp_t priority) |
95 | { | 95 | { |
96 | struct dccp_ackvec *av = kmalloc(sizeof(*av) + len, priority); | 96 | struct dccp_ackvec *av; |
97 | 97 | ||
98 | BUG_ON(len == 0); | ||
99 | |||
100 | if (len > DCCP_MAX_ACKVEC_LEN) | ||
101 | return NULL; | ||
102 | |||
103 | av = kmalloc(sizeof(*av) + len, priority); | ||
98 | if (av != NULL) { | 104 | if (av != NULL) { |
99 | av->dccpav_buf_len = len; | 105 | av->dccpav_buf_len = len; |
100 | av->dccpav_buf_head = | 106 | av->dccpav_buf_head = |
@@ -117,13 +123,13 @@ void dccp_ackvec_free(struct dccp_ackvec *av) | |||
117 | } | 123 | } |
118 | 124 | ||
119 | static inline u8 dccp_ackvec_state(const struct dccp_ackvec *av, | 125 | static inline u8 dccp_ackvec_state(const struct dccp_ackvec *av, |
120 | const unsigned int index) | 126 | const u8 index) |
121 | { | 127 | { |
122 | return av->dccpav_buf[index] & DCCP_ACKVEC_STATE_MASK; | 128 | return av->dccpav_buf[index] & DCCP_ACKVEC_STATE_MASK; |
123 | } | 129 | } |
124 | 130 | ||
125 | static inline u8 dccp_ackvec_len(const struct dccp_ackvec *av, | 131 | static inline u8 dccp_ackvec_len(const struct dccp_ackvec *av, |
126 | const unsigned int index) | 132 | const u8 index) |
127 | { | 133 | { |
128 | return av->dccpav_buf[index] & DCCP_ACKVEC_LEN_MASK; | 134 | return av->dccpav_buf[index] & DCCP_ACKVEC_LEN_MASK; |
129 | } | 135 | } |
@@ -135,7 +141,7 @@ static inline u8 dccp_ackvec_len(const struct dccp_ackvec *av, | |||
135 | */ | 141 | */ |
136 | static inline int dccp_ackvec_set_buf_head_state(struct dccp_ackvec *av, | 142 | static inline int dccp_ackvec_set_buf_head_state(struct dccp_ackvec *av, |
137 | const unsigned int packets, | 143 | const unsigned int packets, |
138 | const unsigned char state) | 144 | const unsigned char state) |
139 | { | 145 | { |
140 | unsigned int gap; | 146 | unsigned int gap; |
141 | signed long new_head; | 147 | signed long new_head; |
@@ -223,7 +229,7 @@ int dccp_ackvec_add(struct dccp_ackvec *av, const struct sock *sk, | |||
223 | * could reduce the complexity of this scan.) | 229 | * could reduce the complexity of this scan.) |
224 | */ | 230 | */ |
225 | u64 delta = dccp_delta_seqno(ackno, av->dccpav_buf_ackno); | 231 | u64 delta = dccp_delta_seqno(ackno, av->dccpav_buf_ackno); |
226 | unsigned int index = av->dccpav_buf_head; | 232 | u8 index = av->dccpav_buf_head; |
227 | 233 | ||
228 | while (1) { | 234 | while (1) { |
229 | const u8 len = dccp_ackvec_len(av, index); | 235 | const u8 len = dccp_ackvec_len(av, index); |
@@ -291,7 +297,7 @@ void dccp_ackvec_print(const struct dccp_ackvec *av) | |||
291 | } | 297 | } |
292 | #endif | 298 | #endif |
293 | 299 | ||
294 | static void dccp_ackvec_trow_away_ack_record(struct dccp_ackvec *av) | 300 | static void dccp_ackvec_throw_away_ack_record(struct dccp_ackvec *av) |
295 | { | 301 | { |
296 | /* | 302 | /* |
297 | * As we're keeping track of the ack vector size (dccpav_vec_len) and | 303 | * As we're keeping track of the ack vector size (dccpav_vec_len) and |
@@ -301,9 +307,10 @@ static void dccp_ackvec_trow_away_ack_record(struct dccp_ackvec *av) | |||
301 | * draft-ietf-dccp-spec-11.txt Appendix A. -acme | 307 | * draft-ietf-dccp-spec-11.txt Appendix A. -acme |
302 | */ | 308 | */ |
303 | #if 0 | 309 | #if 0 |
304 | av->dccpav_buf_tail = av->dccpav_ack_ptr + 1; | 310 | u32 new_buf_tail = av->dccpav_ack_ptr + 1; |
305 | if (av->dccpav_buf_tail >= av->dccpav_vec_len) | 311 | if (new_buf_tail >= av->dccpav_vec_len) |
306 | av->dccpav_buf_tail -= av->dccpav_vec_len; | 312 | new_buf_tail -= av->dccpav_vec_len; |
313 | av->dccpav_buf_tail = new_buf_tail; | ||
307 | #endif | 314 | #endif |
308 | av->dccpav_vec_len -= av->dccpav_sent_len; | 315 | av->dccpav_vec_len -= av->dccpav_sent_len; |
309 | } | 316 | } |
@@ -326,7 +333,7 @@ void dccp_ackvec_check_rcv_ackno(struct dccp_ackvec *av, struct sock *sk, | |||
326 | debug_prefix, 1, | 333 | debug_prefix, 1, |
327 | (unsigned long long)av->dccpav_ack_seqno, | 334 | (unsigned long long)av->dccpav_ack_seqno, |
328 | (unsigned long long)av->dccpav_ack_ackno); | 335 | (unsigned long long)av->dccpav_ack_ackno); |
329 | dccp_ackvec_trow_away_ack_record(av); | 336 | dccp_ackvec_throw_away_ack_record(av); |
330 | av->dccpav_ack_seqno = DCCP_MAX_SEQNO + 1; | 337 | av->dccpav_ack_seqno = DCCP_MAX_SEQNO + 1; |
331 | } | 338 | } |
332 | } | 339 | } |
@@ -389,7 +396,7 @@ static void dccp_ackvec_check_rcv_ackvector(struct dccp_ackvec *av, | |||
389 | av->dccpav_ack_seqno, | 396 | av->dccpav_ack_seqno, |
390 | (unsigned long long) | 397 | (unsigned long long) |
391 | av->dccpav_ack_ackno); | 398 | av->dccpav_ack_ackno); |
392 | dccp_ackvec_trow_away_ack_record(av); | 399 | dccp_ackvec_throw_away_ack_record(av); |
393 | } | 400 | } |
394 | /* | 401 | /* |
395 | * If dccpav_ack_seqno was not received, no problem | 402 | * If dccpav_ack_seqno was not received, no problem |
diff --git a/net/dccp/ackvec.h b/net/dccp/ackvec.h index d0fd6c60c574..f7dfb5f67b87 100644 --- a/net/dccp/ackvec.h +++ b/net/dccp/ackvec.h | |||
@@ -54,16 +54,16 @@ | |||
54 | * @dccpav_buf - circular buffer of acknowledgeable packets | 54 | * @dccpav_buf - circular buffer of acknowledgeable packets |
55 | */ | 55 | */ |
56 | struct dccp_ackvec { | 56 | struct dccp_ackvec { |
57 | unsigned int dccpav_buf_head; | ||
58 | unsigned int dccpav_buf_tail; | ||
59 | u64 dccpav_buf_ackno; | 57 | u64 dccpav_buf_ackno; |
60 | u64 dccpav_ack_seqno; | 58 | u64 dccpav_ack_seqno; |
61 | u64 dccpav_ack_ackno; | 59 | u64 dccpav_ack_ackno; |
62 | unsigned int dccpav_ack_ptr; | ||
63 | unsigned int dccpav_sent_len; | ||
64 | unsigned int dccpav_vec_len; | ||
65 | unsigned int dccpav_buf_len; | ||
66 | struct timeval dccpav_time; | 60 | struct timeval dccpav_time; |
61 | u8 dccpav_buf_head; | ||
62 | u8 dccpav_buf_tail; | ||
63 | u8 dccpav_ack_ptr; | ||
64 | u8 dccpav_sent_len; | ||
65 | u8 dccpav_vec_len; | ||
66 | u8 dccpav_buf_len; | ||
67 | u8 dccpav_buf_nonce; | 67 | u8 dccpav_buf_nonce; |
68 | u8 dccpav_ack_nonce; | 68 | u8 dccpav_ack_nonce; |
69 | u8 dccpav_buf[0]; | 69 | u8 dccpav_buf[0]; |
diff --git a/net/dccp/ccid.h b/net/dccp/ccid.h index c37eeeaf5c6e..de681c6ad081 100644 --- a/net/dccp/ccid.h +++ b/net/dccp/ccid.h | |||
@@ -21,6 +21,8 @@ | |||
21 | 21 | ||
22 | #define CCID_MAX 255 | 22 | #define CCID_MAX 255 |
23 | 23 | ||
24 | struct tcp_info; | ||
25 | |||
24 | struct ccid { | 26 | struct ccid { |
25 | unsigned char ccid_id; | 27 | unsigned char ccid_id; |
26 | const char *ccid_name; | 28 | const char *ccid_name; |
diff --git a/net/dccp/dccp.h b/net/dccp/dccp.h index f97b85d55ad8..93f26dd6e6cb 100644 --- a/net/dccp/dccp.h +++ b/net/dccp/dccp.h | |||
@@ -59,7 +59,7 @@ extern void dccp_time_wait(struct sock *sk, int state, int timeo); | |||
59 | 59 | ||
60 | #define DCCP_RTO_MAX ((unsigned)(120 * HZ)) /* FIXME: using TCP value */ | 60 | #define DCCP_RTO_MAX ((unsigned)(120 * HZ)) /* FIXME: using TCP value */ |
61 | 61 | ||
62 | extern struct proto dccp_v4_prot; | 62 | extern struct proto dccp_prot; |
63 | 63 | ||
64 | /* is seq1 < seq2 ? */ | 64 | /* is seq1 < seq2 ? */ |
65 | static inline int before48(const u64 seq1, const u64 seq2) | 65 | static inline int before48(const u64 seq1, const u64 seq2) |
@@ -228,6 +228,9 @@ extern int dccp_rcv_state_process(struct sock *sk, struct sk_buff *skb, | |||
228 | extern int dccp_rcv_established(struct sock *sk, struct sk_buff *skb, | 228 | extern int dccp_rcv_established(struct sock *sk, struct sk_buff *skb, |
229 | const struct dccp_hdr *dh, const unsigned len); | 229 | const struct dccp_hdr *dh, const unsigned len); |
230 | 230 | ||
231 | extern int dccp_v4_init_sock(struct sock *sk); | ||
232 | extern int dccp_v4_destroy_sock(struct sock *sk); | ||
233 | |||
231 | extern void dccp_close(struct sock *sk, long timeout); | 234 | extern void dccp_close(struct sock *sk, long timeout); |
232 | extern struct sk_buff *dccp_make_response(struct sock *sk, | 235 | extern struct sk_buff *dccp_make_response(struct sock *sk, |
233 | struct dst_entry *dst, | 236 | struct dst_entry *dst, |
@@ -238,6 +241,7 @@ extern struct sk_buff *dccp_make_reset(struct sock *sk, | |||
238 | 241 | ||
239 | extern int dccp_connect(struct sock *sk); | 242 | extern int dccp_connect(struct sock *sk); |
240 | extern int dccp_disconnect(struct sock *sk, int flags); | 243 | extern int dccp_disconnect(struct sock *sk, int flags); |
244 | extern void dccp_unhash(struct sock *sk); | ||
241 | extern int dccp_getsockopt(struct sock *sk, int level, int optname, | 245 | extern int dccp_getsockopt(struct sock *sk, int level, int optname, |
242 | char __user *optval, int __user *optlen); | 246 | char __user *optval, int __user *optlen); |
243 | extern int dccp_setsockopt(struct sock *sk, int level, int optname, | 247 | extern int dccp_setsockopt(struct sock *sk, int level, int optname, |
@@ -249,6 +253,13 @@ extern int dccp_recvmsg(struct kiocb *iocb, struct sock *sk, | |||
249 | struct msghdr *msg, size_t len, int nonblock, | 253 | struct msghdr *msg, size_t len, int nonblock, |
250 | int flags, int *addr_len); | 254 | int flags, int *addr_len); |
251 | extern void dccp_shutdown(struct sock *sk, int how); | 255 | extern void dccp_shutdown(struct sock *sk, int how); |
256 | extern int inet_dccp_listen(struct socket *sock, int backlog); | ||
257 | extern unsigned int dccp_poll(struct file *file, struct socket *sock, | ||
258 | poll_table *wait); | ||
259 | extern void dccp_v4_send_check(struct sock *sk, int len, | ||
260 | struct sk_buff *skb); | ||
261 | extern int dccp_v4_connect(struct sock *sk, struct sockaddr *uaddr, | ||
262 | int addr_len); | ||
252 | 263 | ||
253 | extern int dccp_v4_checksum(const struct sk_buff *skb, | 264 | extern int dccp_v4_checksum(const struct sk_buff *skb, |
254 | const u32 saddr, const u32 daddr); | 265 | const u32 saddr, const u32 daddr); |
@@ -256,6 +267,17 @@ extern int dccp_v4_checksum(const struct sk_buff *skb, | |||
256 | extern int dccp_v4_send_reset(struct sock *sk, | 267 | extern int dccp_v4_send_reset(struct sock *sk, |
257 | enum dccp_reset_codes code); | 268 | enum dccp_reset_codes code); |
258 | extern void dccp_send_close(struct sock *sk, const int active); | 269 | extern void dccp_send_close(struct sock *sk, const int active); |
270 | extern int dccp_invalid_packet(struct sk_buff *skb); | ||
271 | |||
272 | static inline int dccp_bad_service_code(const struct sock *sk, | ||
273 | const __u32 service) | ||
274 | { | ||
275 | const struct dccp_sock *dp = dccp_sk(sk); | ||
276 | |||
277 | if (dp->dccps_service == service) | ||
278 | return 0; | ||
279 | return !dccp_list_has_service(dp->dccps_service_list, service); | ||
280 | } | ||
259 | 281 | ||
260 | struct dccp_skb_cb { | 282 | struct dccp_skb_cb { |
261 | __u8 dccpd_type:4; | 283 | __u8 dccpd_type:4; |
diff --git a/net/dccp/diag.c b/net/dccp/diag.c index f675d8e642d3..3f78c00e3822 100644 --- a/net/dccp/diag.c +++ b/net/dccp/diag.c | |||
@@ -28,7 +28,7 @@ static void dccp_get_info(struct sock *sk, struct tcp_info *info) | |||
28 | info->tcpi_retransmits = icsk->icsk_retransmits; | 28 | info->tcpi_retransmits = icsk->icsk_retransmits; |
29 | info->tcpi_probes = icsk->icsk_probes_out; | 29 | info->tcpi_probes = icsk->icsk_probes_out; |
30 | info->tcpi_backoff = icsk->icsk_backoff; | 30 | info->tcpi_backoff = icsk->icsk_backoff; |
31 | info->tcpi_pmtu = dp->dccps_pmtu_cookie; | 31 | info->tcpi_pmtu = icsk->icsk_pmtu_cookie; |
32 | 32 | ||
33 | if (dp->dccps_options.dccpo_send_ack_vector) | 33 | if (dp->dccps_options.dccpo_send_ack_vector) |
34 | info->tcpi_options |= TCPI_OPT_SACK; | 34 | info->tcpi_options |= TCPI_OPT_SACK; |
diff --git a/net/dccp/input.c b/net/dccp/input.c index 3454d5941900..b6cba72b44e8 100644 --- a/net/dccp/input.c +++ b/net/dccp/input.c | |||
@@ -151,29 +151,12 @@ static int dccp_check_seqno(struct sock *sk, struct sk_buff *skb) | |||
151 | return 0; | 151 | return 0; |
152 | } | 152 | } |
153 | 153 | ||
154 | int dccp_rcv_established(struct sock *sk, struct sk_buff *skb, | 154 | static inline int __dccp_rcv_established(struct sock *sk, struct sk_buff *skb, |
155 | const struct dccp_hdr *dh, const unsigned len) | 155 | const struct dccp_hdr *dh, |
156 | const unsigned len) | ||
156 | { | 157 | { |
157 | struct dccp_sock *dp = dccp_sk(sk); | 158 | struct dccp_sock *dp = dccp_sk(sk); |
158 | 159 | ||
159 | if (dccp_check_seqno(sk, skb)) | ||
160 | goto discard; | ||
161 | |||
162 | if (dccp_parse_options(sk, skb)) | ||
163 | goto discard; | ||
164 | |||
165 | if (DCCP_SKB_CB(skb)->dccpd_ack_seq != DCCP_PKT_WITHOUT_ACK_SEQ) | ||
166 | dccp_event_ack_recv(sk, skb); | ||
167 | |||
168 | if (dp->dccps_options.dccpo_send_ack_vector && | ||
169 | dccp_ackvec_add(dp->dccps_hc_rx_ackvec, sk, | ||
170 | DCCP_SKB_CB(skb)->dccpd_seq, | ||
171 | DCCP_ACKVEC_STATE_RECEIVED)) | ||
172 | goto discard; | ||
173 | |||
174 | ccid_hc_rx_packet_recv(dp->dccps_hc_rx_ccid, sk, skb); | ||
175 | ccid_hc_tx_packet_recv(dp->dccps_hc_tx_ccid, sk, skb); | ||
176 | |||
177 | switch (dccp_hdr(skb)->dccph_type) { | 160 | switch (dccp_hdr(skb)->dccph_type) { |
178 | case DCCP_PKT_DATAACK: | 161 | case DCCP_PKT_DATAACK: |
179 | case DCCP_PKT_DATA: | 162 | case DCCP_PKT_DATA: |
@@ -250,6 +233,37 @@ discard: | |||
250 | return 0; | 233 | return 0; |
251 | } | 234 | } |
252 | 235 | ||
236 | int dccp_rcv_established(struct sock *sk, struct sk_buff *skb, | ||
237 | const struct dccp_hdr *dh, const unsigned len) | ||
238 | { | ||
239 | struct dccp_sock *dp = dccp_sk(sk); | ||
240 | |||
241 | if (dccp_check_seqno(sk, skb)) | ||
242 | goto discard; | ||
243 | |||
244 | if (dccp_parse_options(sk, skb)) | ||
245 | goto discard; | ||
246 | |||
247 | if (DCCP_SKB_CB(skb)->dccpd_ack_seq != DCCP_PKT_WITHOUT_ACK_SEQ) | ||
248 | dccp_event_ack_recv(sk, skb); | ||
249 | |||
250 | if (dp->dccps_options.dccpo_send_ack_vector && | ||
251 | dccp_ackvec_add(dp->dccps_hc_rx_ackvec, sk, | ||
252 | DCCP_SKB_CB(skb)->dccpd_seq, | ||
253 | DCCP_ACKVEC_STATE_RECEIVED)) | ||
254 | goto discard; | ||
255 | |||
256 | ccid_hc_rx_packet_recv(dp->dccps_hc_rx_ccid, sk, skb); | ||
257 | ccid_hc_tx_packet_recv(dp->dccps_hc_tx_ccid, sk, skb); | ||
258 | |||
259 | return __dccp_rcv_established(sk, skb, dh, len); | ||
260 | discard: | ||
261 | __kfree_skb(skb); | ||
262 | return 0; | ||
263 | } | ||
264 | |||
265 | EXPORT_SYMBOL_GPL(dccp_rcv_established); | ||
266 | |||
253 | static int dccp_rcv_request_sent_state_process(struct sock *sk, | 267 | static int dccp_rcv_request_sent_state_process(struct sock *sk, |
254 | struct sk_buff *skb, | 268 | struct sk_buff *skb, |
255 | const struct dccp_hdr *dh, | 269 | const struct dccp_hdr *dh, |
@@ -286,6 +300,12 @@ static int dccp_rcv_request_sent_state_process(struct sock *sk, | |||
286 | goto out_invalid_packet; | 300 | goto out_invalid_packet; |
287 | } | 301 | } |
288 | 302 | ||
303 | if (dp->dccps_options.dccpo_send_ack_vector && | ||
304 | dccp_ackvec_add(dp->dccps_hc_rx_ackvec, sk, | ||
305 | DCCP_SKB_CB(skb)->dccpd_seq, | ||
306 | DCCP_ACKVEC_STATE_RECEIVED)) | ||
307 | goto out_invalid_packet; /* FIXME: change error code */ | ||
308 | |||
289 | dp->dccps_isr = DCCP_SKB_CB(skb)->dccpd_seq; | 309 | dp->dccps_isr = DCCP_SKB_CB(skb)->dccpd_seq; |
290 | dccp_update_gsr(sk, dp->dccps_isr); | 310 | dccp_update_gsr(sk, dp->dccps_isr); |
291 | /* | 311 | /* |
@@ -309,7 +329,7 @@ static int dccp_rcv_request_sent_state_process(struct sock *sk, | |||
309 | goto out_invalid_packet; | 329 | goto out_invalid_packet; |
310 | } | 330 | } |
311 | 331 | ||
312 | dccp_sync_mss(sk, dp->dccps_pmtu_cookie); | 332 | dccp_sync_mss(sk, icsk->icsk_pmtu_cookie); |
313 | 333 | ||
314 | /* | 334 | /* |
315 | * Step 10: Process REQUEST state (second part) | 335 | * Step 10: Process REQUEST state (second part) |
@@ -329,7 +349,7 @@ static int dccp_rcv_request_sent_state_process(struct sock *sk, | |||
329 | dccp_set_state(sk, DCCP_PARTOPEN); | 349 | dccp_set_state(sk, DCCP_PARTOPEN); |
330 | 350 | ||
331 | /* Make sure socket is routed, for correct metrics. */ | 351 | /* Make sure socket is routed, for correct metrics. */ |
332 | inet_sk_rebuild_header(sk); | 352 | icsk->icsk_af_ops->rebuild_header(sk); |
333 | 353 | ||
334 | if (!sock_flag(sk, SOCK_DEAD)) { | 354 | if (!sock_flag(sk, SOCK_DEAD)) { |
335 | sk->sk_state_change(sk); | 355 | sk->sk_state_change(sk); |
@@ -398,9 +418,9 @@ static int dccp_rcv_respond_partopen_state_process(struct sock *sk, | |||
398 | 418 | ||
399 | if (dh->dccph_type == DCCP_PKT_DATAACK || | 419 | if (dh->dccph_type == DCCP_PKT_DATAACK || |
400 | dh->dccph_type == DCCP_PKT_DATA) { | 420 | dh->dccph_type == DCCP_PKT_DATA) { |
401 | dccp_rcv_established(sk, skb, dh, len); | 421 | __dccp_rcv_established(sk, skb, dh, len); |
402 | queued = 1; /* packet was queued | 422 | queued = 1; /* packet was queued |
403 | (by dccp_rcv_established) */ | 423 | (by __dccp_rcv_established) */ |
404 | } | 424 | } |
405 | break; | 425 | break; |
406 | } | 426 | } |
@@ -444,7 +464,8 @@ int dccp_rcv_state_process(struct sock *sk, struct sk_buff *skb, | |||
444 | */ | 464 | */ |
445 | if (sk->sk_state == DCCP_LISTEN) { | 465 | if (sk->sk_state == DCCP_LISTEN) { |
446 | if (dh->dccph_type == DCCP_PKT_REQUEST) { | 466 | if (dh->dccph_type == DCCP_PKT_REQUEST) { |
447 | if (dccp_v4_conn_request(sk, skb) < 0) | 467 | if (inet_csk(sk)->icsk_af_ops->conn_request(sk, |
468 | skb) < 0) | ||
448 | return 1; | 469 | return 1; |
449 | 470 | ||
450 | /* FIXME: do congestion control initialization */ | 471 | /* FIXME: do congestion control initialization */ |
@@ -471,14 +492,14 @@ int dccp_rcv_state_process(struct sock *sk, struct sk_buff *skb, | |||
471 | if (dcb->dccpd_ack_seq != DCCP_PKT_WITHOUT_ACK_SEQ) | 492 | if (dcb->dccpd_ack_seq != DCCP_PKT_WITHOUT_ACK_SEQ) |
472 | dccp_event_ack_recv(sk, skb); | 493 | dccp_event_ack_recv(sk, skb); |
473 | 494 | ||
474 | ccid_hc_rx_packet_recv(dp->dccps_hc_rx_ccid, sk, skb); | ||
475 | ccid_hc_tx_packet_recv(dp->dccps_hc_tx_ccid, sk, skb); | ||
476 | |||
477 | if (dp->dccps_options.dccpo_send_ack_vector && | 495 | if (dp->dccps_options.dccpo_send_ack_vector && |
478 | dccp_ackvec_add(dp->dccps_hc_rx_ackvec, sk, | 496 | dccp_ackvec_add(dp->dccps_hc_rx_ackvec, sk, |
479 | DCCP_SKB_CB(skb)->dccpd_seq, | 497 | DCCP_SKB_CB(skb)->dccpd_seq, |
480 | DCCP_ACKVEC_STATE_RECEIVED)) | 498 | DCCP_ACKVEC_STATE_RECEIVED)) |
481 | goto discard; | 499 | goto discard; |
500 | |||
501 | ccid_hc_rx_packet_recv(dp->dccps_hc_rx_ccid, sk, skb); | ||
502 | ccid_hc_tx_packet_recv(dp->dccps_hc_tx_ccid, sk, skb); | ||
482 | } | 503 | } |
483 | 504 | ||
484 | /* | 505 | /* |
@@ -566,3 +587,5 @@ discard: | |||
566 | } | 587 | } |
567 | return 0; | 588 | return 0; |
568 | } | 589 | } |
590 | |||
591 | EXPORT_SYMBOL_GPL(dccp_rcv_state_process); | ||
diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c index 656e13e38cfb..3f244670764a 100644 --- a/net/dccp/ipv4.c +++ b/net/dccp/ipv4.c | |||
@@ -19,7 +19,9 @@ | |||
19 | 19 | ||
20 | #include <net/icmp.h> | 20 | #include <net/icmp.h> |
21 | #include <net/inet_hashtables.h> | 21 | #include <net/inet_hashtables.h> |
22 | #include <net/inet_sock.h> | ||
22 | #include <net/sock.h> | 23 | #include <net/sock.h> |
24 | #include <net/timewait_sock.h> | ||
23 | #include <net/tcp_states.h> | 25 | #include <net/tcp_states.h> |
24 | #include <net/xfrm.h> | 26 | #include <net/xfrm.h> |
25 | 27 | ||
@@ -37,7 +39,8 @@ EXPORT_SYMBOL_GPL(dccp_hashinfo); | |||
37 | 39 | ||
38 | static int dccp_v4_get_port(struct sock *sk, const unsigned short snum) | 40 | static int dccp_v4_get_port(struct sock *sk, const unsigned short snum) |
39 | { | 41 | { |
40 | return inet_csk_get_port(&dccp_hashinfo, sk, snum); | 42 | return inet_csk_get_port(&dccp_hashinfo, sk, snum, |
43 | inet_csk_bind_conflict); | ||
41 | } | 44 | } |
42 | 45 | ||
43 | static void dccp_v4_hash(struct sock *sk) | 46 | static void dccp_v4_hash(struct sock *sk) |
@@ -45,171 +48,14 @@ static void dccp_v4_hash(struct sock *sk) | |||
45 | inet_hash(&dccp_hashinfo, sk); | 48 | inet_hash(&dccp_hashinfo, sk); |
46 | } | 49 | } |
47 | 50 | ||
48 | static void dccp_v4_unhash(struct sock *sk) | 51 | void dccp_unhash(struct sock *sk) |
49 | { | 52 | { |
50 | inet_unhash(&dccp_hashinfo, sk); | 53 | inet_unhash(&dccp_hashinfo, sk); |
51 | } | 54 | } |
52 | 55 | ||
53 | /* called with local bh disabled */ | 56 | EXPORT_SYMBOL_GPL(dccp_unhash); |
54 | static int __dccp_v4_check_established(struct sock *sk, const __u16 lport, | ||
55 | struct inet_timewait_sock **twp) | ||
56 | { | ||
57 | struct inet_sock *inet = inet_sk(sk); | ||
58 | const u32 daddr = inet->rcv_saddr; | ||
59 | const u32 saddr = inet->daddr; | ||
60 | const int dif = sk->sk_bound_dev_if; | ||
61 | INET_ADDR_COOKIE(acookie, saddr, daddr) | ||
62 | const __u32 ports = INET_COMBINED_PORTS(inet->dport, lport); | ||
63 | unsigned int hash = inet_ehashfn(daddr, lport, saddr, inet->dport); | ||
64 | struct inet_ehash_bucket *head = inet_ehash_bucket(&dccp_hashinfo, hash); | ||
65 | const struct sock *sk2; | ||
66 | const struct hlist_node *node; | ||
67 | struct inet_timewait_sock *tw; | ||
68 | |||
69 | prefetch(head->chain.first); | ||
70 | write_lock(&head->lock); | ||
71 | |||
72 | /* Check TIME-WAIT sockets first. */ | ||
73 | sk_for_each(sk2, node, &(head + dccp_hashinfo.ehash_size)->chain) { | ||
74 | tw = inet_twsk(sk2); | ||
75 | |||
76 | if (INET_TW_MATCH(sk2, hash, acookie, saddr, daddr, ports, dif)) | ||
77 | goto not_unique; | ||
78 | } | ||
79 | tw = NULL; | ||
80 | |||
81 | /* And established part... */ | ||
82 | sk_for_each(sk2, node, &head->chain) { | ||
83 | if (INET_MATCH(sk2, hash, acookie, saddr, daddr, ports, dif)) | ||
84 | goto not_unique; | ||
85 | } | ||
86 | 57 | ||
87 | /* Must record num and sport now. Otherwise we will see | 58 | int dccp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) |
88 | * in hash table socket with a funny identity. */ | ||
89 | inet->num = lport; | ||
90 | inet->sport = htons(lport); | ||
91 | sk->sk_hash = hash; | ||
92 | BUG_TRAP(sk_unhashed(sk)); | ||
93 | __sk_add_node(sk, &head->chain); | ||
94 | sock_prot_inc_use(sk->sk_prot); | ||
95 | write_unlock(&head->lock); | ||
96 | |||
97 | if (twp != NULL) { | ||
98 | *twp = tw; | ||
99 | NET_INC_STATS_BH(LINUX_MIB_TIMEWAITRECYCLED); | ||
100 | } else if (tw != NULL) { | ||
101 | /* Silly. Should hash-dance instead... */ | ||
102 | inet_twsk_deschedule(tw, &dccp_death_row); | ||
103 | NET_INC_STATS_BH(LINUX_MIB_TIMEWAITRECYCLED); | ||
104 | |||
105 | inet_twsk_put(tw); | ||
106 | } | ||
107 | |||
108 | return 0; | ||
109 | |||
110 | not_unique: | ||
111 | write_unlock(&head->lock); | ||
112 | return -EADDRNOTAVAIL; | ||
113 | } | ||
114 | |||
115 | /* | ||
116 | * Bind a port for a connect operation and hash it. | ||
117 | */ | ||
118 | static int dccp_v4_hash_connect(struct sock *sk) | ||
119 | { | ||
120 | const unsigned short snum = inet_sk(sk)->num; | ||
121 | struct inet_bind_hashbucket *head; | ||
122 | struct inet_bind_bucket *tb; | ||
123 | int ret; | ||
124 | |||
125 | if (snum == 0) { | ||
126 | int low = sysctl_local_port_range[0]; | ||
127 | int high = sysctl_local_port_range[1]; | ||
128 | int remaining = (high - low) + 1; | ||
129 | int rover = net_random() % (high - low) + low; | ||
130 | struct hlist_node *node; | ||
131 | struct inet_timewait_sock *tw = NULL; | ||
132 | |||
133 | local_bh_disable(); | ||
134 | do { | ||
135 | head = &dccp_hashinfo.bhash[inet_bhashfn(rover, | ||
136 | dccp_hashinfo.bhash_size)]; | ||
137 | spin_lock(&head->lock); | ||
138 | |||
139 | /* Does not bother with rcv_saddr checks, | ||
140 | * because the established check is already | ||
141 | * unique enough. | ||
142 | */ | ||
143 | inet_bind_bucket_for_each(tb, node, &head->chain) { | ||
144 | if (tb->port == rover) { | ||
145 | BUG_TRAP(!hlist_empty(&tb->owners)); | ||
146 | if (tb->fastreuse >= 0) | ||
147 | goto next_port; | ||
148 | if (!__dccp_v4_check_established(sk, | ||
149 | rover, | ||
150 | &tw)) | ||
151 | goto ok; | ||
152 | goto next_port; | ||
153 | } | ||
154 | } | ||
155 | |||
156 | tb = inet_bind_bucket_create(dccp_hashinfo.bind_bucket_cachep, | ||
157 | head, rover); | ||
158 | if (tb == NULL) { | ||
159 | spin_unlock(&head->lock); | ||
160 | break; | ||
161 | } | ||
162 | tb->fastreuse = -1; | ||
163 | goto ok; | ||
164 | |||
165 | next_port: | ||
166 | spin_unlock(&head->lock); | ||
167 | if (++rover > high) | ||
168 | rover = low; | ||
169 | } while (--remaining > 0); | ||
170 | |||
171 | local_bh_enable(); | ||
172 | |||
173 | return -EADDRNOTAVAIL; | ||
174 | |||
175 | ok: | ||
176 | /* All locks still held and bhs disabled */ | ||
177 | inet_bind_hash(sk, tb, rover); | ||
178 | if (sk_unhashed(sk)) { | ||
179 | inet_sk(sk)->sport = htons(rover); | ||
180 | __inet_hash(&dccp_hashinfo, sk, 0); | ||
181 | } | ||
182 | spin_unlock(&head->lock); | ||
183 | |||
184 | if (tw != NULL) { | ||
185 | inet_twsk_deschedule(tw, &dccp_death_row); | ||
186 | inet_twsk_put(tw); | ||
187 | } | ||
188 | |||
189 | ret = 0; | ||
190 | goto out; | ||
191 | } | ||
192 | |||
193 | head = &dccp_hashinfo.bhash[inet_bhashfn(snum, | ||
194 | dccp_hashinfo.bhash_size)]; | ||
195 | tb = inet_csk(sk)->icsk_bind_hash; | ||
196 | spin_lock_bh(&head->lock); | ||
197 | if (sk_head(&tb->owners) == sk && sk->sk_bind_node.next == NULL) { | ||
198 | __inet_hash(&dccp_hashinfo, sk, 0); | ||
199 | spin_unlock_bh(&head->lock); | ||
200 | return 0; | ||
201 | } else { | ||
202 | spin_unlock(&head->lock); | ||
203 | /* No definite answer... Walk to established hash table */ | ||
204 | ret = __dccp_v4_check_established(sk, snum, NULL); | ||
205 | out: | ||
206 | local_bh_enable(); | ||
207 | return ret; | ||
208 | } | ||
209 | } | ||
210 | |||
211 | static int dccp_v4_connect(struct sock *sk, struct sockaddr *uaddr, | ||
212 | int addr_len) | ||
213 | { | 59 | { |
214 | struct inet_sock *inet = inet_sk(sk); | 60 | struct inet_sock *inet = inet_sk(sk); |
215 | struct dccp_sock *dp = dccp_sk(sk); | 61 | struct dccp_sock *dp = dccp_sk(sk); |
@@ -259,9 +105,9 @@ static int dccp_v4_connect(struct sock *sk, struct sockaddr *uaddr, | |||
259 | inet->dport = usin->sin_port; | 105 | inet->dport = usin->sin_port; |
260 | inet->daddr = daddr; | 106 | inet->daddr = daddr; |
261 | 107 | ||
262 | dp->dccps_ext_header_len = 0; | 108 | inet_csk(sk)->icsk_ext_hdr_len = 0; |
263 | if (inet->opt != NULL) | 109 | if (inet->opt != NULL) |
264 | dp->dccps_ext_header_len = inet->opt->optlen; | 110 | inet_csk(sk)->icsk_ext_hdr_len = inet->opt->optlen; |
265 | /* | 111 | /* |
266 | * Socket identity is still unknown (sport may be zero). | 112 | * Socket identity is still unknown (sport may be zero). |
267 | * However we set state to DCCP_REQUESTING and not releasing socket | 113 | * However we set state to DCCP_REQUESTING and not releasing socket |
@@ -269,7 +115,7 @@ static int dccp_v4_connect(struct sock *sk, struct sockaddr *uaddr, | |||
269 | * complete initialization after this. | 115 | * complete initialization after this. |
270 | */ | 116 | */ |
271 | dccp_set_state(sk, DCCP_REQUESTING); | 117 | dccp_set_state(sk, DCCP_REQUESTING); |
272 | err = dccp_v4_hash_connect(sk); | 118 | err = inet_hash_connect(&dccp_death_row, sk); |
273 | if (err != 0) | 119 | if (err != 0) |
274 | goto failure; | 120 | goto failure; |
275 | 121 | ||
@@ -287,16 +133,6 @@ static int dccp_v4_connect(struct sock *sk, struct sockaddr *uaddr, | |||
287 | usin->sin_port); | 133 | usin->sin_port); |
288 | dccp_update_gss(sk, dp->dccps_iss); | 134 | dccp_update_gss(sk, dp->dccps_iss); |
289 | 135 | ||
290 | /* | ||
291 | * SWL and AWL are initially adjusted so that they are not less than | ||
292 | * the initial Sequence Numbers received and sent, respectively: | ||
293 | * SWL := max(GSR + 1 - floor(W/4), ISR), | ||
294 | * AWL := max(GSS - W' + 1, ISS). | ||
295 | * These adjustments MUST be applied only at the beginning of the | ||
296 | * connection. | ||
297 | */ | ||
298 | dccp_set_seqno(&dp->dccps_awl, max48(dp->dccps_awl, dp->dccps_iss)); | ||
299 | |||
300 | inet->id = dp->dccps_iss ^ jiffies; | 136 | inet->id = dp->dccps_iss ^ jiffies; |
301 | 137 | ||
302 | err = dccp_connect(sk); | 138 | err = dccp_connect(sk); |
@@ -316,6 +152,8 @@ failure: | |||
316 | goto out; | 152 | goto out; |
317 | } | 153 | } |
318 | 154 | ||
155 | EXPORT_SYMBOL_GPL(dccp_v4_connect); | ||
156 | |||
319 | /* | 157 | /* |
320 | * This routine does path mtu discovery as defined in RFC1191. | 158 | * This routine does path mtu discovery as defined in RFC1191. |
321 | */ | 159 | */ |
@@ -354,7 +192,7 @@ static inline void dccp_do_pmtu_discovery(struct sock *sk, | |||
354 | mtu = dst_mtu(dst); | 192 | mtu = dst_mtu(dst); |
355 | 193 | ||
356 | if (inet->pmtudisc != IP_PMTUDISC_DONT && | 194 | if (inet->pmtudisc != IP_PMTUDISC_DONT && |
357 | dp->dccps_pmtu_cookie > mtu) { | 195 | inet_csk(sk)->icsk_pmtu_cookie > mtu) { |
358 | dccp_sync_mss(sk, mtu); | 196 | dccp_sync_mss(sk, mtu); |
359 | 197 | ||
360 | /* | 198 | /* |
@@ -606,6 +444,17 @@ out: | |||
606 | sock_put(sk); | 444 | sock_put(sk); |
607 | } | 445 | } |
608 | 446 | ||
447 | /* This routine computes an IPv4 DCCP checksum. */ | ||
448 | void dccp_v4_send_check(struct sock *sk, int len, struct sk_buff *skb) | ||
449 | { | ||
450 | const struct inet_sock *inet = inet_sk(sk); | ||
451 | struct dccp_hdr *dh = dccp_hdr(skb); | ||
452 | |||
453 | dh->dccph_checksum = dccp_v4_checksum(skb, inet->saddr, inet->daddr); | ||
454 | } | ||
455 | |||
456 | EXPORT_SYMBOL_GPL(dccp_v4_send_check); | ||
457 | |||
609 | int dccp_v4_send_reset(struct sock *sk, enum dccp_reset_codes code) | 458 | int dccp_v4_send_reset(struct sock *sk, enum dccp_reset_codes code) |
610 | { | 459 | { |
611 | struct sk_buff *skb; | 460 | struct sk_buff *skb; |
@@ -641,16 +490,6 @@ static inline u64 dccp_v4_init_sequence(const struct sock *sk, | |||
641 | dccp_hdr(skb)->dccph_sport); | 490 | dccp_hdr(skb)->dccph_sport); |
642 | } | 491 | } |
643 | 492 | ||
644 | static inline int dccp_bad_service_code(const struct sock *sk, | ||
645 | const __u32 service) | ||
646 | { | ||
647 | const struct dccp_sock *dp = dccp_sk(sk); | ||
648 | |||
649 | if (dp->dccps_service == service) | ||
650 | return 0; | ||
651 | return !dccp_list_has_service(dp->dccps_service_list, service); | ||
652 | } | ||
653 | |||
654 | int dccp_v4_conn_request(struct sock *sk, struct sk_buff *skb) | 493 | int dccp_v4_conn_request(struct sock *sk, struct sk_buff *skb) |
655 | { | 494 | { |
656 | struct inet_request_sock *ireq; | 495 | struct inet_request_sock *ireq; |
@@ -662,7 +501,6 @@ int dccp_v4_conn_request(struct sock *sk, struct sk_buff *skb) | |||
662 | const __u32 service = dccp_hdr_request(skb)->dccph_req_service; | 501 | const __u32 service = dccp_hdr_request(skb)->dccph_req_service; |
663 | struct dccp_skb_cb *dcb = DCCP_SKB_CB(skb); | 502 | struct dccp_skb_cb *dcb = DCCP_SKB_CB(skb); |
664 | __u8 reset_code = DCCP_RESET_CODE_TOO_BUSY; | 503 | __u8 reset_code = DCCP_RESET_CODE_TOO_BUSY; |
665 | struct dst_entry *dst = NULL; | ||
666 | 504 | ||
667 | /* Never answer to DCCP_PKT_REQUESTs send to broadcast or multicast */ | 505 | /* Never answer to DCCP_PKT_REQUESTs send to broadcast or multicast */ |
668 | if (((struct rtable *)skb->dst)->rt_flags & | 506 | if (((struct rtable *)skb->dst)->rt_flags & |
@@ -703,7 +541,6 @@ int dccp_v4_conn_request(struct sock *sk, struct sk_buff *skb) | |||
703 | ireq = inet_rsk(req); | 541 | ireq = inet_rsk(req); |
704 | ireq->loc_addr = daddr; | 542 | ireq->loc_addr = daddr; |
705 | ireq->rmt_addr = saddr; | 543 | ireq->rmt_addr = saddr; |
706 | /* FIXME: Merge Aristeu's option parsing code when ready */ | ||
707 | req->rcv_wnd = 100; /* Fake, option parsing will get the | 544 | req->rcv_wnd = 100; /* Fake, option parsing will get the |
708 | right value */ | 545 | right value */ |
709 | ireq->opt = NULL; | 546 | ireq->opt = NULL; |
@@ -721,23 +558,22 @@ int dccp_v4_conn_request(struct sock *sk, struct sk_buff *skb) | |||
721 | dreq->dreq_iss = dccp_v4_init_sequence(sk, skb); | 558 | dreq->dreq_iss = dccp_v4_init_sequence(sk, skb); |
722 | dreq->dreq_service = service; | 559 | dreq->dreq_service = service; |
723 | 560 | ||
724 | if (dccp_v4_send_response(sk, req, dst)) | 561 | if (dccp_v4_send_response(sk, req, NULL)) |
725 | goto drop_and_free; | 562 | goto drop_and_free; |
726 | 563 | ||
727 | inet_csk_reqsk_queue_hash_add(sk, req, DCCP_TIMEOUT_INIT); | 564 | inet_csk_reqsk_queue_hash_add(sk, req, DCCP_TIMEOUT_INIT); |
728 | return 0; | 565 | return 0; |
729 | 566 | ||
730 | drop_and_free: | 567 | drop_and_free: |
731 | /* | 568 | reqsk_free(req); |
732 | * FIXME: should be reqsk_free after implementing req->rsk_ops | ||
733 | */ | ||
734 | __reqsk_free(req); | ||
735 | drop: | 569 | drop: |
736 | DCCP_INC_STATS_BH(DCCP_MIB_ATTEMPTFAILS); | 570 | DCCP_INC_STATS_BH(DCCP_MIB_ATTEMPTFAILS); |
737 | dcb->dccpd_reset_code = reset_code; | 571 | dcb->dccpd_reset_code = reset_code; |
738 | return -1; | 572 | return -1; |
739 | } | 573 | } |
740 | 574 | ||
575 | EXPORT_SYMBOL_GPL(dccp_v4_conn_request); | ||
576 | |||
741 | /* | 577 | /* |
742 | * The three way handshake has completed - we got a valid ACK or DATAACK - | 578 | * The three way handshake has completed - we got a valid ACK or DATAACK - |
743 | * now create the new socket. | 579 | * now create the new socket. |
@@ -792,6 +628,8 @@ exit: | |||
792 | return NULL; | 628 | return NULL; |
793 | } | 629 | } |
794 | 630 | ||
631 | EXPORT_SYMBOL_GPL(dccp_v4_request_recv_sock); | ||
632 | |||
795 | static struct sock *dccp_v4_hnd_req(struct sock *sk, struct sk_buff *skb) | 633 | static struct sock *dccp_v4_hnd_req(struct sock *sk, struct sk_buff *skb) |
796 | { | 634 | { |
797 | const struct dccp_hdr *dh = dccp_hdr(skb); | 635 | const struct dccp_hdr *dh = dccp_hdr(skb); |
@@ -1011,7 +849,9 @@ discard: | |||
1011 | return 0; | 849 | return 0; |
1012 | } | 850 | } |
1013 | 851 | ||
1014 | static inline int dccp_invalid_packet(struct sk_buff *skb) | 852 | EXPORT_SYMBOL_GPL(dccp_v4_do_rcv); |
853 | |||
854 | int dccp_invalid_packet(struct sk_buff *skb) | ||
1015 | { | 855 | { |
1016 | const struct dccp_hdr *dh; | 856 | const struct dccp_hdr *dh; |
1017 | 857 | ||
@@ -1065,29 +905,30 @@ static inline int dccp_invalid_packet(struct sk_buff *skb) | |||
1065 | return 1; | 905 | return 1; |
1066 | } | 906 | } |
1067 | 907 | ||
1068 | /* If the header checksum is incorrect, drop packet and return */ | ||
1069 | if (dccp_v4_verify_checksum(skb, skb->nh.iph->saddr, | ||
1070 | skb->nh.iph->daddr) < 0) { | ||
1071 | LIMIT_NETDEBUG(KERN_WARNING "DCCP: header checksum is " | ||
1072 | "incorrect\n"); | ||
1073 | return 1; | ||
1074 | } | ||
1075 | |||
1076 | return 0; | 908 | return 0; |
1077 | } | 909 | } |
1078 | 910 | ||
911 | EXPORT_SYMBOL_GPL(dccp_invalid_packet); | ||
912 | |||
1079 | /* this is called when real data arrives */ | 913 | /* this is called when real data arrives */ |
1080 | int dccp_v4_rcv(struct sk_buff *skb) | 914 | int dccp_v4_rcv(struct sk_buff *skb) |
1081 | { | 915 | { |
1082 | const struct dccp_hdr *dh; | 916 | const struct dccp_hdr *dh; |
1083 | struct sock *sk; | 917 | struct sock *sk; |
1084 | int rc; | ||
1085 | 918 | ||
1086 | /* Step 1: Check header basics: */ | 919 | /* Step 1: Check header basics: */ |
1087 | 920 | ||
1088 | if (dccp_invalid_packet(skb)) | 921 | if (dccp_invalid_packet(skb)) |
1089 | goto discard_it; | 922 | goto discard_it; |
1090 | 923 | ||
924 | /* If the header checksum is incorrect, drop packet and return */ | ||
925 | if (dccp_v4_verify_checksum(skb, skb->nh.iph->saddr, | ||
926 | skb->nh.iph->daddr) < 0) { | ||
927 | LIMIT_NETDEBUG(KERN_WARNING "%s: incorrect header checksum\n", | ||
928 | __FUNCTION__); | ||
929 | goto discard_it; | ||
930 | } | ||
931 | |||
1091 | dh = dccp_hdr(skb); | 932 | dh = dccp_hdr(skb); |
1092 | 933 | ||
1093 | DCCP_SKB_CB(skb)->dccpd_seq = dccp_hdr_seq(skb); | 934 | DCCP_SKB_CB(skb)->dccpd_seq = dccp_hdr_seq(skb); |
@@ -1143,28 +984,10 @@ int dccp_v4_rcv(struct sk_buff *skb) | |||
1143 | goto do_time_wait; | 984 | goto do_time_wait; |
1144 | } | 985 | } |
1145 | 986 | ||
1146 | if (!xfrm4_policy_check(sk, XFRM_POLICY_IN, skb)) { | 987 | if (!xfrm4_policy_check(sk, XFRM_POLICY_IN, skb)) |
1147 | dccp_pr_debug("xfrm4_policy_check failed\n"); | ||
1148 | goto discard_and_relse; | 988 | goto discard_and_relse; |
1149 | } | ||
1150 | |||
1151 | if (sk_filter(sk, skb, 0)) { | ||
1152 | dccp_pr_debug("sk_filter failed\n"); | ||
1153 | goto discard_and_relse; | ||
1154 | } | ||
1155 | |||
1156 | skb->dev = NULL; | ||
1157 | |||
1158 | bh_lock_sock(sk); | ||
1159 | rc = 0; | ||
1160 | if (!sock_owned_by_user(sk)) | ||
1161 | rc = dccp_v4_do_rcv(sk, skb); | ||
1162 | else | ||
1163 | sk_add_backlog(sk, skb); | ||
1164 | bh_unlock_sock(sk); | ||
1165 | 989 | ||
1166 | sock_put(sk); | 990 | return sk_receive_skb(sk, skb); |
1167 | return rc; | ||
1168 | 991 | ||
1169 | no_dccp_socket: | 992 | no_dccp_socket: |
1170 | if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) | 993 | if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) |
@@ -1194,9 +1017,23 @@ do_time_wait: | |||
1194 | goto no_dccp_socket; | 1017 | goto no_dccp_socket; |
1195 | } | 1018 | } |
1196 | 1019 | ||
1197 | static int dccp_v4_init_sock(struct sock *sk) | 1020 | struct inet_connection_sock_af_ops dccp_ipv4_af_ops = { |
1021 | .queue_xmit = ip_queue_xmit, | ||
1022 | .send_check = dccp_v4_send_check, | ||
1023 | .rebuild_header = inet_sk_rebuild_header, | ||
1024 | .conn_request = dccp_v4_conn_request, | ||
1025 | .syn_recv_sock = dccp_v4_request_recv_sock, | ||
1026 | .net_header_len = sizeof(struct iphdr), | ||
1027 | .setsockopt = ip_setsockopt, | ||
1028 | .getsockopt = ip_getsockopt, | ||
1029 | .addr2sockaddr = inet_csk_addr2sockaddr, | ||
1030 | .sockaddr_len = sizeof(struct sockaddr_in), | ||
1031 | }; | ||
1032 | |||
1033 | int dccp_v4_init_sock(struct sock *sk) | ||
1198 | { | 1034 | { |
1199 | struct dccp_sock *dp = dccp_sk(sk); | 1035 | struct dccp_sock *dp = dccp_sk(sk); |
1036 | struct inet_connection_sock *icsk = inet_csk(sk); | ||
1200 | static int dccp_ctl_socket_init = 1; | 1037 | static int dccp_ctl_socket_init = 1; |
1201 | 1038 | ||
1202 | dccp_options_init(&dp->dccps_options); | 1039 | dccp_options_init(&dp->dccps_options); |
@@ -1236,9 +1073,11 @@ static int dccp_v4_init_sock(struct sock *sk) | |||
1236 | dccp_ctl_socket_init = 0; | 1073 | dccp_ctl_socket_init = 0; |
1237 | 1074 | ||
1238 | dccp_init_xmit_timers(sk); | 1075 | dccp_init_xmit_timers(sk); |
1239 | inet_csk(sk)->icsk_rto = DCCP_TIMEOUT_INIT; | 1076 | icsk->icsk_rto = DCCP_TIMEOUT_INIT; |
1240 | sk->sk_state = DCCP_CLOSED; | 1077 | sk->sk_state = DCCP_CLOSED; |
1241 | sk->sk_write_space = dccp_write_space; | 1078 | sk->sk_write_space = dccp_write_space; |
1079 | icsk->icsk_af_ops = &dccp_ipv4_af_ops; | ||
1080 | icsk->icsk_sync_mss = dccp_sync_mss; | ||
1242 | dp->dccps_mss_cache = 536; | 1081 | dp->dccps_mss_cache = 536; |
1243 | dp->dccps_role = DCCP_ROLE_UNDEFINED; | 1082 | dp->dccps_role = DCCP_ROLE_UNDEFINED; |
1244 | dp->dccps_service = DCCP_SERVICE_INVALID_VALUE; | 1083 | dp->dccps_service = DCCP_SERVICE_INVALID_VALUE; |
@@ -1246,7 +1085,9 @@ static int dccp_v4_init_sock(struct sock *sk) | |||
1246 | return 0; | 1085 | return 0; |
1247 | } | 1086 | } |
1248 | 1087 | ||
1249 | static int dccp_v4_destroy_sock(struct sock *sk) | 1088 | EXPORT_SYMBOL_GPL(dccp_v4_init_sock); |
1089 | |||
1090 | int dccp_v4_destroy_sock(struct sock *sk) | ||
1250 | { | 1091 | { |
1251 | struct dccp_sock *dp = dccp_sk(sk); | 1092 | struct dccp_sock *dp = dccp_sk(sk); |
1252 | 1093 | ||
@@ -1279,6 +1120,8 @@ static int dccp_v4_destroy_sock(struct sock *sk) | |||
1279 | return 0; | 1120 | return 0; |
1280 | } | 1121 | } |
1281 | 1122 | ||
1123 | EXPORT_SYMBOL_GPL(dccp_v4_destroy_sock); | ||
1124 | |||
1282 | static void dccp_v4_reqsk_destructor(struct request_sock *req) | 1125 | static void dccp_v4_reqsk_destructor(struct request_sock *req) |
1283 | { | 1126 | { |
1284 | kfree(inet_rsk(req)->opt); | 1127 | kfree(inet_rsk(req)->opt); |
@@ -1293,7 +1136,11 @@ static struct request_sock_ops dccp_request_sock_ops = { | |||
1293 | .send_reset = dccp_v4_ctl_send_reset, | 1136 | .send_reset = dccp_v4_ctl_send_reset, |
1294 | }; | 1137 | }; |
1295 | 1138 | ||
1296 | struct proto dccp_v4_prot = { | 1139 | static struct timewait_sock_ops dccp_timewait_sock_ops = { |
1140 | .twsk_obj_size = sizeof(struct inet_timewait_sock), | ||
1141 | }; | ||
1142 | |||
1143 | struct proto dccp_prot = { | ||
1297 | .name = "DCCP", | 1144 | .name = "DCCP", |
1298 | .owner = THIS_MODULE, | 1145 | .owner = THIS_MODULE, |
1299 | .close = dccp_close, | 1146 | .close = dccp_close, |
@@ -1307,7 +1154,7 @@ struct proto dccp_v4_prot = { | |||
1307 | .recvmsg = dccp_recvmsg, | 1154 | .recvmsg = dccp_recvmsg, |
1308 | .backlog_rcv = dccp_v4_do_rcv, | 1155 | .backlog_rcv = dccp_v4_do_rcv, |
1309 | .hash = dccp_v4_hash, | 1156 | .hash = dccp_v4_hash, |
1310 | .unhash = dccp_v4_unhash, | 1157 | .unhash = dccp_unhash, |
1311 | .accept = inet_csk_accept, | 1158 | .accept = inet_csk_accept, |
1312 | .get_port = dccp_v4_get_port, | 1159 | .get_port = dccp_v4_get_port, |
1313 | .shutdown = dccp_shutdown, | 1160 | .shutdown = dccp_shutdown, |
@@ -1316,5 +1163,7 @@ struct proto dccp_v4_prot = { | |||
1316 | .max_header = MAX_DCCP_HEADER, | 1163 | .max_header = MAX_DCCP_HEADER, |
1317 | .obj_size = sizeof(struct dccp_sock), | 1164 | .obj_size = sizeof(struct dccp_sock), |
1318 | .rsk_prot = &dccp_request_sock_ops, | 1165 | .rsk_prot = &dccp_request_sock_ops, |
1319 | .twsk_obj_size = sizeof(struct inet_timewait_sock), | 1166 | .twsk_prot = &dccp_timewait_sock_ops, |
1320 | }; | 1167 | }; |
1168 | |||
1169 | EXPORT_SYMBOL_GPL(dccp_prot); | ||
diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c new file mode 100644 index 000000000000..c609dc78f487 --- /dev/null +++ b/net/dccp/ipv6.c | |||
@@ -0,0 +1,1261 @@ | |||
1 | /* | ||
2 | * DCCP over IPv6 | ||
3 | * Linux INET6 implementation | ||
4 | * | ||
5 | * Based on net/dccp6/ipv6.c | ||
6 | * | ||
7 | * Arnaldo Carvalho de Melo <acme@ghostprotocols.net> | ||
8 | * | ||
9 | * This program is free software; you can redistribute it and/or | ||
10 | * modify it under the terms of the GNU General Public License | ||
11 | * as published by the Free Software Foundation; either version | ||
12 | * 2 of the License, or (at your option) any later version. | ||
13 | */ | ||
14 | |||
15 | #include <linux/config.h> | ||
16 | #include <linux/module.h> | ||
17 | #include <linux/random.h> | ||
18 | #include <linux/xfrm.h> | ||
19 | |||
20 | #include <net/addrconf.h> | ||
21 | #include <net/inet_common.h> | ||
22 | #include <net/inet_hashtables.h> | ||
23 | #include <net/inet_sock.h> | ||
24 | #include <net/inet6_connection_sock.h> | ||
25 | #include <net/inet6_hashtables.h> | ||
26 | #include <net/ip6_route.h> | ||
27 | #include <net/ipv6.h> | ||
28 | #include <net/protocol.h> | ||
29 | #include <net/transp_v6.h> | ||
30 | #include <net/xfrm.h> | ||
31 | |||
32 | #include "dccp.h" | ||
33 | #include "ipv6.h" | ||
34 | |||
35 | static void dccp_v6_ctl_send_reset(struct sk_buff *skb); | ||
36 | static void dccp_v6_reqsk_send_ack(struct sk_buff *skb, | ||
37 | struct request_sock *req); | ||
38 | static void dccp_v6_send_check(struct sock *sk, int len, struct sk_buff *skb); | ||
39 | |||
40 | static int dccp_v6_do_rcv(struct sock *sk, struct sk_buff *skb); | ||
41 | |||
42 | static struct inet_connection_sock_af_ops dccp_ipv6_mapped; | ||
43 | static struct inet_connection_sock_af_ops dccp_ipv6_af_ops; | ||
44 | |||
45 | static int dccp_v6_get_port(struct sock *sk, unsigned short snum) | ||
46 | { | ||
47 | return inet_csk_get_port(&dccp_hashinfo, sk, snum, | ||
48 | inet6_csk_bind_conflict); | ||
49 | } | ||
50 | |||
51 | static void dccp_v6_hash(struct sock *sk) | ||
52 | { | ||
53 | if (sk->sk_state != DCCP_CLOSED) { | ||
54 | if (inet_csk(sk)->icsk_af_ops == &dccp_ipv6_mapped) { | ||
55 | dccp_prot.hash(sk); | ||
56 | return; | ||
57 | } | ||
58 | local_bh_disable(); | ||
59 | __inet6_hash(&dccp_hashinfo, sk); | ||
60 | local_bh_enable(); | ||
61 | } | ||
62 | } | ||
63 | |||
64 | static inline u16 dccp_v6_check(struct dccp_hdr *dh, int len, | ||
65 | struct in6_addr *saddr, | ||
66 | struct in6_addr *daddr, | ||
67 | unsigned long base) | ||
68 | { | ||
69 | return csum_ipv6_magic(saddr, daddr, len, IPPROTO_DCCP, base); | ||
70 | } | ||
71 | |||
72 | static __u32 dccp_v6_init_sequence(struct sock *sk, struct sk_buff *skb) | ||
73 | { | ||
74 | const struct dccp_hdr *dh = dccp_hdr(skb); | ||
75 | |||
76 | if (skb->protocol == htons(ETH_P_IPV6)) | ||
77 | return secure_tcpv6_sequence_number(skb->nh.ipv6h->daddr.s6_addr32, | ||
78 | skb->nh.ipv6h->saddr.s6_addr32, | ||
79 | dh->dccph_dport, | ||
80 | dh->dccph_sport); | ||
81 | else | ||
82 | return secure_dccp_sequence_number(skb->nh.iph->daddr, | ||
83 | skb->nh.iph->saddr, | ||
84 | dh->dccph_dport, | ||
85 | dh->dccph_sport); | ||
86 | } | ||
87 | |||
88 | static int dccp_v6_connect(struct sock *sk, struct sockaddr *uaddr, | ||
89 | int addr_len) | ||
90 | { | ||
91 | struct sockaddr_in6 *usin = (struct sockaddr_in6 *) uaddr; | ||
92 | struct inet_connection_sock *icsk = inet_csk(sk); | ||
93 | struct inet_sock *inet = inet_sk(sk); | ||
94 | struct ipv6_pinfo *np = inet6_sk(sk); | ||
95 | struct dccp_sock *dp = dccp_sk(sk); | ||
96 | struct in6_addr *saddr = NULL, *final_p = NULL, final; | ||
97 | struct flowi fl; | ||
98 | struct dst_entry *dst; | ||
99 | int addr_type; | ||
100 | int err; | ||
101 | |||
102 | dp->dccps_role = DCCP_ROLE_CLIENT; | ||
103 | |||
104 | if (addr_len < SIN6_LEN_RFC2133) | ||
105 | return -EINVAL; | ||
106 | |||
107 | if (usin->sin6_family != AF_INET6) | ||
108 | return -EAFNOSUPPORT; | ||
109 | |||
110 | memset(&fl, 0, sizeof(fl)); | ||
111 | |||
112 | if (np->sndflow) { | ||
113 | fl.fl6_flowlabel = usin->sin6_flowinfo & IPV6_FLOWINFO_MASK; | ||
114 | IP6_ECN_flow_init(fl.fl6_flowlabel); | ||
115 | if (fl.fl6_flowlabel & IPV6_FLOWLABEL_MASK) { | ||
116 | struct ip6_flowlabel *flowlabel; | ||
117 | flowlabel = fl6_sock_lookup(sk, fl.fl6_flowlabel); | ||
118 | if (flowlabel == NULL) | ||
119 | return -EINVAL; | ||
120 | ipv6_addr_copy(&usin->sin6_addr, &flowlabel->dst); | ||
121 | fl6_sock_release(flowlabel); | ||
122 | } | ||
123 | } | ||
124 | |||
125 | /* | ||
126 | * connect() to INADDR_ANY means loopback (BSD'ism). | ||
127 | */ | ||
128 | |||
129 | if (ipv6_addr_any(&usin->sin6_addr)) | ||
130 | usin->sin6_addr.s6_addr[15] = 0x1; | ||
131 | |||
132 | addr_type = ipv6_addr_type(&usin->sin6_addr); | ||
133 | |||
134 | if(addr_type & IPV6_ADDR_MULTICAST) | ||
135 | return -ENETUNREACH; | ||
136 | |||
137 | if (addr_type & IPV6_ADDR_LINKLOCAL) { | ||
138 | if (addr_len >= sizeof(struct sockaddr_in6) && | ||
139 | usin->sin6_scope_id) { | ||
140 | /* If interface is set while binding, indices | ||
141 | * must coincide. | ||
142 | */ | ||
143 | if (sk->sk_bound_dev_if && | ||
144 | sk->sk_bound_dev_if != usin->sin6_scope_id) | ||
145 | return -EINVAL; | ||
146 | |||
147 | sk->sk_bound_dev_if = usin->sin6_scope_id; | ||
148 | } | ||
149 | |||
150 | /* Connect to link-local address requires an interface */ | ||
151 | if (!sk->sk_bound_dev_if) | ||
152 | return -EINVAL; | ||
153 | } | ||
154 | |||
155 | ipv6_addr_copy(&np->daddr, &usin->sin6_addr); | ||
156 | np->flow_label = fl.fl6_flowlabel; | ||
157 | |||
158 | /* | ||
159 | * DCCP over IPv4 | ||
160 | */ | ||
161 | |||
162 | if (addr_type == IPV6_ADDR_MAPPED) { | ||
163 | u32 exthdrlen = icsk->icsk_ext_hdr_len; | ||
164 | struct sockaddr_in sin; | ||
165 | |||
166 | SOCK_DEBUG(sk, "connect: ipv4 mapped\n"); | ||
167 | |||
168 | if (__ipv6_only_sock(sk)) | ||
169 | return -ENETUNREACH; | ||
170 | |||
171 | sin.sin_family = AF_INET; | ||
172 | sin.sin_port = usin->sin6_port; | ||
173 | sin.sin_addr.s_addr = usin->sin6_addr.s6_addr32[3]; | ||
174 | |||
175 | icsk->icsk_af_ops = &dccp_ipv6_mapped; | ||
176 | sk->sk_backlog_rcv = dccp_v4_do_rcv; | ||
177 | |||
178 | err = dccp_v4_connect(sk, (struct sockaddr *)&sin, sizeof(sin)); | ||
179 | |||
180 | if (err) { | ||
181 | icsk->icsk_ext_hdr_len = exthdrlen; | ||
182 | icsk->icsk_af_ops = &dccp_ipv6_af_ops; | ||
183 | sk->sk_backlog_rcv = dccp_v6_do_rcv; | ||
184 | goto failure; | ||
185 | } else { | ||
186 | ipv6_addr_set(&np->saddr, 0, 0, htonl(0x0000FFFF), | ||
187 | inet->saddr); | ||
188 | ipv6_addr_set(&np->rcv_saddr, 0, 0, htonl(0x0000FFFF), | ||
189 | inet->rcv_saddr); | ||
190 | } | ||
191 | |||
192 | return err; | ||
193 | } | ||
194 | |||
195 | if (!ipv6_addr_any(&np->rcv_saddr)) | ||
196 | saddr = &np->rcv_saddr; | ||
197 | |||
198 | fl.proto = IPPROTO_DCCP; | ||
199 | ipv6_addr_copy(&fl.fl6_dst, &np->daddr); | ||
200 | ipv6_addr_copy(&fl.fl6_src, saddr ? saddr : &np->saddr); | ||
201 | fl.oif = sk->sk_bound_dev_if; | ||
202 | fl.fl_ip_dport = usin->sin6_port; | ||
203 | fl.fl_ip_sport = inet->sport; | ||
204 | |||
205 | if (np->opt && np->opt->srcrt) { | ||
206 | struct rt0_hdr *rt0 = (struct rt0_hdr *)np->opt->srcrt; | ||
207 | ipv6_addr_copy(&final, &fl.fl6_dst); | ||
208 | ipv6_addr_copy(&fl.fl6_dst, rt0->addr); | ||
209 | final_p = &final; | ||
210 | } | ||
211 | |||
212 | err = ip6_dst_lookup(sk, &dst, &fl); | ||
213 | if (err) | ||
214 | goto failure; | ||
215 | if (final_p) | ||
216 | ipv6_addr_copy(&fl.fl6_dst, final_p); | ||
217 | |||
218 | if ((err = xfrm_lookup(&dst, &fl, sk, 0)) < 0) | ||
219 | goto failure; | ||
220 | |||
221 | if (saddr == NULL) { | ||
222 | saddr = &fl.fl6_src; | ||
223 | ipv6_addr_copy(&np->rcv_saddr, saddr); | ||
224 | } | ||
225 | |||
226 | /* set the source address */ | ||
227 | ipv6_addr_copy(&np->saddr, saddr); | ||
228 | inet->rcv_saddr = LOOPBACK4_IPV6; | ||
229 | |||
230 | ip6_dst_store(sk, dst, NULL); | ||
231 | |||
232 | icsk->icsk_ext_hdr_len = 0; | ||
233 | if (np->opt) | ||
234 | icsk->icsk_ext_hdr_len = (np->opt->opt_flen + | ||
235 | np->opt->opt_nflen); | ||
236 | |||
237 | inet->dport = usin->sin6_port; | ||
238 | |||
239 | dccp_set_state(sk, DCCP_REQUESTING); | ||
240 | err = inet6_hash_connect(&dccp_death_row, sk); | ||
241 | if (err) | ||
242 | goto late_failure; | ||
243 | /* FIXME */ | ||
244 | #if 0 | ||
245 | dp->dccps_gar = secure_dccp_v6_sequence_number(np->saddr.s6_addr32, | ||
246 | np->daddr.s6_addr32, | ||
247 | inet->sport, | ||
248 | inet->dport); | ||
249 | #endif | ||
250 | err = dccp_connect(sk); | ||
251 | if (err) | ||
252 | goto late_failure; | ||
253 | |||
254 | return 0; | ||
255 | |||
256 | late_failure: | ||
257 | dccp_set_state(sk, DCCP_CLOSED); | ||
258 | __sk_dst_reset(sk); | ||
259 | failure: | ||
260 | inet->dport = 0; | ||
261 | sk->sk_route_caps = 0; | ||
262 | return err; | ||
263 | } | ||
264 | |||
265 | static void dccp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, | ||
266 | int type, int code, int offset, __u32 info) | ||
267 | { | ||
268 | struct ipv6hdr *hdr = (struct ipv6hdr *)skb->data; | ||
269 | const struct dccp_hdr *dh = (struct dccp_hdr *)(skb->data + offset); | ||
270 | struct ipv6_pinfo *np; | ||
271 | struct sock *sk; | ||
272 | int err; | ||
273 | __u64 seq; | ||
274 | |||
275 | sk = inet6_lookup(&dccp_hashinfo, &hdr->daddr, dh->dccph_dport, | ||
276 | &hdr->saddr, dh->dccph_sport, skb->dev->ifindex); | ||
277 | |||
278 | if (sk == NULL) { | ||
279 | ICMP6_INC_STATS_BH(__in6_dev_get(skb->dev), ICMP6_MIB_INERRORS); | ||
280 | return; | ||
281 | } | ||
282 | |||
283 | if (sk->sk_state == DCCP_TIME_WAIT) { | ||
284 | inet_twsk_put((struct inet_timewait_sock *)sk); | ||
285 | return; | ||
286 | } | ||
287 | |||
288 | bh_lock_sock(sk); | ||
289 | if (sock_owned_by_user(sk)) | ||
290 | NET_INC_STATS_BH(LINUX_MIB_LOCKDROPPEDICMPS); | ||
291 | |||
292 | if (sk->sk_state == DCCP_CLOSED) | ||
293 | goto out; | ||
294 | |||
295 | np = inet6_sk(sk); | ||
296 | |||
297 | if (type == ICMPV6_PKT_TOOBIG) { | ||
298 | struct dst_entry *dst = NULL; | ||
299 | |||
300 | if (sock_owned_by_user(sk)) | ||
301 | goto out; | ||
302 | if ((1 << sk->sk_state) & (DCCPF_LISTEN | DCCPF_CLOSED)) | ||
303 | goto out; | ||
304 | |||
305 | /* icmp should have updated the destination cache entry */ | ||
306 | dst = __sk_dst_check(sk, np->dst_cookie); | ||
307 | |||
308 | if (dst == NULL) { | ||
309 | struct inet_sock *inet = inet_sk(sk); | ||
310 | struct flowi fl; | ||
311 | |||
312 | /* BUGGG_FUTURE: Again, it is not clear how | ||
313 | to handle rthdr case. Ignore this complexity | ||
314 | for now. | ||
315 | */ | ||
316 | memset(&fl, 0, sizeof(fl)); | ||
317 | fl.proto = IPPROTO_DCCP; | ||
318 | ipv6_addr_copy(&fl.fl6_dst, &np->daddr); | ||
319 | ipv6_addr_copy(&fl.fl6_src, &np->saddr); | ||
320 | fl.oif = sk->sk_bound_dev_if; | ||
321 | fl.fl_ip_dport = inet->dport; | ||
322 | fl.fl_ip_sport = inet->sport; | ||
323 | |||
324 | if ((err = ip6_dst_lookup(sk, &dst, &fl))) { | ||
325 | sk->sk_err_soft = -err; | ||
326 | goto out; | ||
327 | } | ||
328 | |||
329 | if ((err = xfrm_lookup(&dst, &fl, sk, 0)) < 0) { | ||
330 | sk->sk_err_soft = -err; | ||
331 | goto out; | ||
332 | } | ||
333 | |||
334 | } else | ||
335 | dst_hold(dst); | ||
336 | |||
337 | if (inet_csk(sk)->icsk_pmtu_cookie > dst_mtu(dst)) { | ||
338 | dccp_sync_mss(sk, dst_mtu(dst)); | ||
339 | } /* else let the usual retransmit timer handle it */ | ||
340 | dst_release(dst); | ||
341 | goto out; | ||
342 | } | ||
343 | |||
344 | icmpv6_err_convert(type, code, &err); | ||
345 | |||
346 | seq = DCCP_SKB_CB(skb)->dccpd_seq; | ||
347 | /* Might be for an request_sock */ | ||
348 | switch (sk->sk_state) { | ||
349 | struct request_sock *req, **prev; | ||
350 | case DCCP_LISTEN: | ||
351 | if (sock_owned_by_user(sk)) | ||
352 | goto out; | ||
353 | |||
354 | req = inet6_csk_search_req(sk, &prev, dh->dccph_dport, | ||
355 | &hdr->daddr, &hdr->saddr, | ||
356 | inet6_iif(skb)); | ||
357 | if (!req) | ||
358 | goto out; | ||
359 | |||
360 | /* ICMPs are not backlogged, hence we cannot get | ||
361 | * an established socket here. | ||
362 | */ | ||
363 | BUG_TRAP(req->sk == NULL); | ||
364 | |||
365 | if (seq != dccp_rsk(req)->dreq_iss) { | ||
366 | NET_INC_STATS_BH(LINUX_MIB_OUTOFWINDOWICMPS); | ||
367 | goto out; | ||
368 | } | ||
369 | |||
370 | inet_csk_reqsk_queue_drop(sk, req, prev); | ||
371 | goto out; | ||
372 | |||
373 | case DCCP_REQUESTING: | ||
374 | case DCCP_RESPOND: /* Cannot happen. | ||
375 | It can, it SYNs are crossed. --ANK */ | ||
376 | if (!sock_owned_by_user(sk)) { | ||
377 | DCCP_INC_STATS_BH(DCCP_MIB_ATTEMPTFAILS); | ||
378 | sk->sk_err = err; | ||
379 | /* | ||
380 | * Wake people up to see the error | ||
381 | * (see connect in sock.c) | ||
382 | */ | ||
383 | sk->sk_error_report(sk); | ||
384 | |||
385 | dccp_done(sk); | ||
386 | } else | ||
387 | sk->sk_err_soft = err; | ||
388 | goto out; | ||
389 | } | ||
390 | |||
391 | if (!sock_owned_by_user(sk) && np->recverr) { | ||
392 | sk->sk_err = err; | ||
393 | sk->sk_error_report(sk); | ||
394 | } else | ||
395 | sk->sk_err_soft = err; | ||
396 | |||
397 | out: | ||
398 | bh_unlock_sock(sk); | ||
399 | sock_put(sk); | ||
400 | } | ||
401 | |||
402 | |||
403 | static int dccp_v6_send_response(struct sock *sk, struct request_sock *req, | ||
404 | struct dst_entry *dst) | ||
405 | { | ||
406 | struct inet6_request_sock *ireq6 = inet6_rsk(req); | ||
407 | struct ipv6_pinfo *np = inet6_sk(sk); | ||
408 | struct sk_buff *skb; | ||
409 | struct ipv6_txoptions *opt = NULL; | ||
410 | struct in6_addr *final_p = NULL, final; | ||
411 | struct flowi fl; | ||
412 | int err = -1; | ||
413 | |||
414 | memset(&fl, 0, sizeof(fl)); | ||
415 | fl.proto = IPPROTO_DCCP; | ||
416 | ipv6_addr_copy(&fl.fl6_dst, &ireq6->rmt_addr); | ||
417 | ipv6_addr_copy(&fl.fl6_src, &ireq6->loc_addr); | ||
418 | fl.fl6_flowlabel = 0; | ||
419 | fl.oif = ireq6->iif; | ||
420 | fl.fl_ip_dport = inet_rsk(req)->rmt_port; | ||
421 | fl.fl_ip_sport = inet_sk(sk)->sport; | ||
422 | |||
423 | if (dst == NULL) { | ||
424 | opt = np->opt; | ||
425 | if (opt == NULL && | ||
426 | np->rxopt.bits.osrcrt == 2 && | ||
427 | ireq6->pktopts) { | ||
428 | struct sk_buff *pktopts = ireq6->pktopts; | ||
429 | struct inet6_skb_parm *rxopt = IP6CB(pktopts); | ||
430 | if (rxopt->srcrt) | ||
431 | opt = ipv6_invert_rthdr(sk, | ||
432 | (struct ipv6_rt_hdr *)(pktopts->nh.raw + | ||
433 | rxopt->srcrt)); | ||
434 | } | ||
435 | |||
436 | if (opt && opt->srcrt) { | ||
437 | struct rt0_hdr *rt0 = (struct rt0_hdr *)opt->srcrt; | ||
438 | ipv6_addr_copy(&final, &fl.fl6_dst); | ||
439 | ipv6_addr_copy(&fl.fl6_dst, rt0->addr); | ||
440 | final_p = &final; | ||
441 | } | ||
442 | |||
443 | err = ip6_dst_lookup(sk, &dst, &fl); | ||
444 | if (err) | ||
445 | goto done; | ||
446 | if (final_p) | ||
447 | ipv6_addr_copy(&fl.fl6_dst, final_p); | ||
448 | if ((err = xfrm_lookup(&dst, &fl, sk, 0)) < 0) | ||
449 | goto done; | ||
450 | } | ||
451 | |||
452 | skb = dccp_make_response(sk, dst, req); | ||
453 | if (skb != NULL) { | ||
454 | struct dccp_hdr *dh = dccp_hdr(skb); | ||
455 | dh->dccph_checksum = dccp_v6_check(dh, skb->len, | ||
456 | &ireq6->loc_addr, | ||
457 | &ireq6->rmt_addr, | ||
458 | csum_partial((char *)dh, | ||
459 | skb->len, | ||
460 | skb->csum)); | ||
461 | ipv6_addr_copy(&fl.fl6_dst, &ireq6->rmt_addr); | ||
462 | err = ip6_xmit(sk, skb, &fl, opt, 0); | ||
463 | if (err == NET_XMIT_CN) | ||
464 | err = 0; | ||
465 | } | ||
466 | |||
467 | done: | ||
468 | if (opt && opt != np->opt) | ||
469 | sock_kfree_s(sk, opt, opt->tot_len); | ||
470 | return err; | ||
471 | } | ||
472 | |||
473 | static void dccp_v6_reqsk_destructor(struct request_sock *req) | ||
474 | { | ||
475 | if (inet6_rsk(req)->pktopts != NULL) | ||
476 | kfree_skb(inet6_rsk(req)->pktopts); | ||
477 | } | ||
478 | |||
479 | static struct request_sock_ops dccp6_request_sock_ops = { | ||
480 | .family = AF_INET6, | ||
481 | .obj_size = sizeof(struct dccp6_request_sock), | ||
482 | .rtx_syn_ack = dccp_v6_send_response, | ||
483 | .send_ack = dccp_v6_reqsk_send_ack, | ||
484 | .destructor = dccp_v6_reqsk_destructor, | ||
485 | .send_reset = dccp_v6_ctl_send_reset, | ||
486 | }; | ||
487 | |||
488 | static struct timewait_sock_ops dccp6_timewait_sock_ops = { | ||
489 | .twsk_obj_size = sizeof(struct dccp6_timewait_sock), | ||
490 | }; | ||
491 | |||
492 | static void dccp_v6_send_check(struct sock *sk, int len, struct sk_buff *skb) | ||
493 | { | ||
494 | struct ipv6_pinfo *np = inet6_sk(sk); | ||
495 | struct dccp_hdr *dh = dccp_hdr(skb); | ||
496 | |||
497 | dh->dccph_checksum = csum_ipv6_magic(&np->saddr, &np->daddr, | ||
498 | len, IPPROTO_DCCP, | ||
499 | csum_partial((char *)dh, | ||
500 | dh->dccph_doff << 2, | ||
501 | skb->csum)); | ||
502 | } | ||
503 | |||
504 | static void dccp_v6_ctl_send_reset(struct sk_buff *rxskb) | ||
505 | { | ||
506 | struct dccp_hdr *rxdh = dccp_hdr(rxskb), *dh; | ||
507 | const int dccp_hdr_reset_len = sizeof(struct dccp_hdr) + | ||
508 | sizeof(struct dccp_hdr_ext) + | ||
509 | sizeof(struct dccp_hdr_reset); | ||
510 | struct sk_buff *skb; | ||
511 | struct flowi fl; | ||
512 | u64 seqno; | ||
513 | |||
514 | if (rxdh->dccph_type == DCCP_PKT_RESET) | ||
515 | return; | ||
516 | |||
517 | if (!ipv6_unicast_destination(rxskb)) | ||
518 | return; | ||
519 | |||
520 | /* | ||
521 | * We need to grab some memory, and put together an RST, | ||
522 | * and then put it into the queue to be sent. | ||
523 | */ | ||
524 | |||
525 | skb = alloc_skb(MAX_HEADER + sizeof(struct ipv6hdr) + | ||
526 | dccp_hdr_reset_len, GFP_ATOMIC); | ||
527 | if (skb == NULL) | ||
528 | return; | ||
529 | |||
530 | skb_reserve(skb, MAX_HEADER + sizeof(struct ipv6hdr) + | ||
531 | dccp_hdr_reset_len); | ||
532 | |||
533 | skb->h.raw = skb_push(skb, dccp_hdr_reset_len); | ||
534 | dh = dccp_hdr(skb); | ||
535 | memset(dh, 0, dccp_hdr_reset_len); | ||
536 | |||
537 | /* Swap the send and the receive. */ | ||
538 | dh->dccph_type = DCCP_PKT_RESET; | ||
539 | dh->dccph_sport = rxdh->dccph_dport; | ||
540 | dh->dccph_dport = rxdh->dccph_sport; | ||
541 | dh->dccph_doff = dccp_hdr_reset_len / 4; | ||
542 | dh->dccph_x = 1; | ||
543 | dccp_hdr_reset(skb)->dccph_reset_code = | ||
544 | DCCP_SKB_CB(rxskb)->dccpd_reset_code; | ||
545 | |||
546 | /* See "8.3.1. Abnormal Termination" in draft-ietf-dccp-spec-11 */ | ||
547 | seqno = 0; | ||
548 | if (DCCP_SKB_CB(rxskb)->dccpd_ack_seq != DCCP_PKT_WITHOUT_ACK_SEQ) | ||
549 | dccp_set_seqno(&seqno, DCCP_SKB_CB(rxskb)->dccpd_ack_seq + 1); | ||
550 | |||
551 | dccp_hdr_set_seq(dh, seqno); | ||
552 | dccp_hdr_set_ack(dccp_hdr_ack_bits(skb), | ||
553 | DCCP_SKB_CB(rxskb)->dccpd_seq); | ||
554 | |||
555 | memset(&fl, 0, sizeof(fl)); | ||
556 | ipv6_addr_copy(&fl.fl6_dst, &rxskb->nh.ipv6h->saddr); | ||
557 | ipv6_addr_copy(&fl.fl6_src, &rxskb->nh.ipv6h->daddr); | ||
558 | dh->dccph_checksum = csum_ipv6_magic(&fl.fl6_src, &fl.fl6_dst, | ||
559 | sizeof(*dh), IPPROTO_DCCP, | ||
560 | skb->csum); | ||
561 | fl.proto = IPPROTO_DCCP; | ||
562 | fl.oif = inet6_iif(rxskb); | ||
563 | fl.fl_ip_dport = dh->dccph_dport; | ||
564 | fl.fl_ip_sport = dh->dccph_sport; | ||
565 | |||
566 | /* sk = NULL, but it is safe for now. RST socket required. */ | ||
567 | if (!ip6_dst_lookup(NULL, &skb->dst, &fl)) { | ||
568 | if (xfrm_lookup(&skb->dst, &fl, NULL, 0) >= 0) { | ||
569 | ip6_xmit(NULL, skb, &fl, NULL, 0); | ||
570 | DCCP_INC_STATS_BH(DCCP_MIB_OUTSEGS); | ||
571 | DCCP_INC_STATS_BH(DCCP_MIB_OUTRSTS); | ||
572 | return; | ||
573 | } | ||
574 | } | ||
575 | |||
576 | kfree_skb(skb); | ||
577 | } | ||
578 | |||
579 | static void dccp_v6_ctl_send_ack(struct sk_buff *rxskb) | ||
580 | { | ||
581 | struct flowi fl; | ||
582 | struct dccp_hdr *rxdh = dccp_hdr(rxskb), *dh; | ||
583 | const int dccp_hdr_ack_len = sizeof(struct dccp_hdr) + | ||
584 | sizeof(struct dccp_hdr_ext) + | ||
585 | sizeof(struct dccp_hdr_ack_bits); | ||
586 | struct sk_buff *skb; | ||
587 | |||
588 | skb = alloc_skb(MAX_HEADER + sizeof(struct ipv6hdr) + | ||
589 | dccp_hdr_ack_len, GFP_ATOMIC); | ||
590 | if (skb == NULL) | ||
591 | return; | ||
592 | |||
593 | skb_reserve(skb, MAX_HEADER + sizeof(struct ipv6hdr) + | ||
594 | dccp_hdr_ack_len); | ||
595 | |||
596 | skb->h.raw = skb_push(skb, dccp_hdr_ack_len); | ||
597 | dh = dccp_hdr(skb); | ||
598 | memset(dh, 0, dccp_hdr_ack_len); | ||
599 | |||
600 | /* Build DCCP header and checksum it. */ | ||
601 | dh->dccph_type = DCCP_PKT_ACK; | ||
602 | dh->dccph_sport = rxdh->dccph_dport; | ||
603 | dh->dccph_dport = rxdh->dccph_sport; | ||
604 | dh->dccph_doff = dccp_hdr_ack_len / 4; | ||
605 | dh->dccph_x = 1; | ||
606 | |||
607 | dccp_hdr_set_seq(dh, DCCP_SKB_CB(rxskb)->dccpd_ack_seq); | ||
608 | dccp_hdr_set_ack(dccp_hdr_ack_bits(skb), | ||
609 | DCCP_SKB_CB(rxskb)->dccpd_seq); | ||
610 | |||
611 | memset(&fl, 0, sizeof(fl)); | ||
612 | ipv6_addr_copy(&fl.fl6_dst, &rxskb->nh.ipv6h->saddr); | ||
613 | ipv6_addr_copy(&fl.fl6_src, &rxskb->nh.ipv6h->daddr); | ||
614 | |||
615 | /* FIXME: calculate checksum, IPv4 also should... */ | ||
616 | |||
617 | fl.proto = IPPROTO_DCCP; | ||
618 | fl.oif = inet6_iif(rxskb); | ||
619 | fl.fl_ip_dport = dh->dccph_dport; | ||
620 | fl.fl_ip_sport = dh->dccph_sport; | ||
621 | |||
622 | if (!ip6_dst_lookup(NULL, &skb->dst, &fl)) { | ||
623 | if (xfrm_lookup(&skb->dst, &fl, NULL, 0) >= 0) { | ||
624 | ip6_xmit(NULL, skb, &fl, NULL, 0); | ||
625 | DCCP_INC_STATS_BH(DCCP_MIB_OUTSEGS); | ||
626 | return; | ||
627 | } | ||
628 | } | ||
629 | |||
630 | kfree_skb(skb); | ||
631 | } | ||
632 | |||
633 | static void dccp_v6_reqsk_send_ack(struct sk_buff *skb, | ||
634 | struct request_sock *req) | ||
635 | { | ||
636 | dccp_v6_ctl_send_ack(skb); | ||
637 | } | ||
638 | |||
639 | static struct sock *dccp_v6_hnd_req(struct sock *sk,struct sk_buff *skb) | ||
640 | { | ||
641 | const struct dccp_hdr *dh = dccp_hdr(skb); | ||
642 | const struct ipv6hdr *iph = skb->nh.ipv6h; | ||
643 | struct sock *nsk; | ||
644 | struct request_sock **prev; | ||
645 | /* Find possible connection requests. */ | ||
646 | struct request_sock *req = inet6_csk_search_req(sk, &prev, | ||
647 | dh->dccph_sport, | ||
648 | &iph->saddr, | ||
649 | &iph->daddr, | ||
650 | inet6_iif(skb)); | ||
651 | if (req != NULL) | ||
652 | return dccp_check_req(sk, skb, req, prev); | ||
653 | |||
654 | nsk = __inet6_lookup_established(&dccp_hashinfo, | ||
655 | &iph->saddr, dh->dccph_sport, | ||
656 | &iph->daddr, ntohs(dh->dccph_dport), | ||
657 | inet6_iif(skb)); | ||
658 | |||
659 | if (nsk != NULL) { | ||
660 | if (nsk->sk_state != DCCP_TIME_WAIT) { | ||
661 | bh_lock_sock(nsk); | ||
662 | return nsk; | ||
663 | } | ||
664 | inet_twsk_put((struct inet_timewait_sock *)nsk); | ||
665 | return NULL; | ||
666 | } | ||
667 | |||
668 | return sk; | ||
669 | } | ||
670 | |||
671 | static int dccp_v6_conn_request(struct sock *sk, struct sk_buff *skb) | ||
672 | { | ||
673 | struct inet_request_sock *ireq; | ||
674 | struct dccp_sock dp; | ||
675 | struct request_sock *req; | ||
676 | struct dccp_request_sock *dreq; | ||
677 | struct inet6_request_sock *ireq6; | ||
678 | struct ipv6_pinfo *np = inet6_sk(sk); | ||
679 | const __u32 service = dccp_hdr_request(skb)->dccph_req_service; | ||
680 | struct dccp_skb_cb *dcb = DCCP_SKB_CB(skb); | ||
681 | __u8 reset_code = DCCP_RESET_CODE_TOO_BUSY; | ||
682 | |||
683 | if (skb->protocol == htons(ETH_P_IP)) | ||
684 | return dccp_v4_conn_request(sk, skb); | ||
685 | |||
686 | if (!ipv6_unicast_destination(skb)) | ||
687 | goto drop; | ||
688 | |||
689 | if (dccp_bad_service_code(sk, service)) { | ||
690 | reset_code = DCCP_RESET_CODE_BAD_SERVICE_CODE; | ||
691 | goto drop; | ||
692 | } | ||
693 | /* | ||
694 | * There are no SYN attacks on IPv6, yet... | ||
695 | */ | ||
696 | if (inet_csk_reqsk_queue_is_full(sk)) | ||
697 | goto drop; | ||
698 | |||
699 | if (sk_acceptq_is_full(sk) && inet_csk_reqsk_queue_young(sk) > 1) | ||
700 | goto drop; | ||
701 | |||
702 | req = inet6_reqsk_alloc(sk->sk_prot->rsk_prot); | ||
703 | if (req == NULL) | ||
704 | goto drop; | ||
705 | |||
706 | /* FIXME: process options */ | ||
707 | |||
708 | dccp_openreq_init(req, &dp, skb); | ||
709 | |||
710 | ireq6 = inet6_rsk(req); | ||
711 | ireq = inet_rsk(req); | ||
712 | ipv6_addr_copy(&ireq6->rmt_addr, &skb->nh.ipv6h->saddr); | ||
713 | ipv6_addr_copy(&ireq6->loc_addr, &skb->nh.ipv6h->daddr); | ||
714 | req->rcv_wnd = 100; /* Fake, option parsing will get the | ||
715 | right value */ | ||
716 | ireq6->pktopts = NULL; | ||
717 | |||
718 | if (ipv6_opt_accepted(sk, skb) || | ||
719 | np->rxopt.bits.rxinfo || np->rxopt.bits.rxoinfo || | ||
720 | np->rxopt.bits.rxhlim || np->rxopt.bits.rxohlim) { | ||
721 | atomic_inc(&skb->users); | ||
722 | ireq6->pktopts = skb; | ||
723 | } | ||
724 | ireq6->iif = sk->sk_bound_dev_if; | ||
725 | |||
726 | /* So that link locals have meaning */ | ||
727 | if (!sk->sk_bound_dev_if && | ||
728 | ipv6_addr_type(&ireq6->rmt_addr) & IPV6_ADDR_LINKLOCAL) | ||
729 | ireq6->iif = inet6_iif(skb); | ||
730 | |||
731 | /* | ||
732 | * Step 3: Process LISTEN state | ||
733 | * | ||
734 | * Set S.ISR, S.GSR, S.SWL, S.SWH from packet or Init Cookie | ||
735 | * | ||
736 | * In fact we defer setting S.GSR, S.SWL, S.SWH to | ||
737 | * dccp_create_openreq_child. | ||
738 | */ | ||
739 | dreq = dccp_rsk(req); | ||
740 | dreq->dreq_isr = dcb->dccpd_seq; | ||
741 | dreq->dreq_iss = dccp_v6_init_sequence(sk, skb); | ||
742 | dreq->dreq_service = service; | ||
743 | |||
744 | if (dccp_v6_send_response(sk, req, NULL)) | ||
745 | goto drop_and_free; | ||
746 | |||
747 | inet6_csk_reqsk_queue_hash_add(sk, req, DCCP_TIMEOUT_INIT); | ||
748 | return 0; | ||
749 | |||
750 | drop_and_free: | ||
751 | reqsk_free(req); | ||
752 | drop: | ||
753 | DCCP_INC_STATS_BH(DCCP_MIB_ATTEMPTFAILS); | ||
754 | dcb->dccpd_reset_code = reset_code; | ||
755 | return -1; | ||
756 | } | ||
757 | |||
758 | static struct sock *dccp_v6_request_recv_sock(struct sock *sk, | ||
759 | struct sk_buff *skb, | ||
760 | struct request_sock *req, | ||
761 | struct dst_entry *dst) | ||
762 | { | ||
763 | struct inet6_request_sock *ireq6 = inet6_rsk(req); | ||
764 | struct ipv6_pinfo *newnp, *np = inet6_sk(sk); | ||
765 | struct inet_sock *newinet; | ||
766 | struct dccp_sock *newdp; | ||
767 | struct dccp6_sock *newdp6; | ||
768 | struct sock *newsk; | ||
769 | struct ipv6_txoptions *opt; | ||
770 | |||
771 | if (skb->protocol == htons(ETH_P_IP)) { | ||
772 | /* | ||
773 | * v6 mapped | ||
774 | */ | ||
775 | |||
776 | newsk = dccp_v4_request_recv_sock(sk, skb, req, dst); | ||
777 | if (newsk == NULL) | ||
778 | return NULL; | ||
779 | |||
780 | newdp6 = (struct dccp6_sock *)newsk; | ||
781 | newdp = dccp_sk(newsk); | ||
782 | newinet = inet_sk(newsk); | ||
783 | newinet->pinet6 = &newdp6->inet6; | ||
784 | newnp = inet6_sk(newsk); | ||
785 | |||
786 | memcpy(newnp, np, sizeof(struct ipv6_pinfo)); | ||
787 | |||
788 | ipv6_addr_set(&newnp->daddr, 0, 0, htonl(0x0000FFFF), | ||
789 | newinet->daddr); | ||
790 | |||
791 | ipv6_addr_set(&newnp->saddr, 0, 0, htonl(0x0000FFFF), | ||
792 | newinet->saddr); | ||
793 | |||
794 | ipv6_addr_copy(&newnp->rcv_saddr, &newnp->saddr); | ||
795 | |||
796 | inet_csk(newsk)->icsk_af_ops = &dccp_ipv6_mapped; | ||
797 | newsk->sk_backlog_rcv = dccp_v4_do_rcv; | ||
798 | newnp->pktoptions = NULL; | ||
799 | newnp->opt = NULL; | ||
800 | newnp->mcast_oif = inet6_iif(skb); | ||
801 | newnp->mcast_hops = skb->nh.ipv6h->hop_limit; | ||
802 | |||
803 | /* | ||
804 | * No need to charge this sock to the relevant IPv6 refcnt debug socks count | ||
805 | * here, dccp_create_openreq_child now does this for us, see the comment in | ||
806 | * that function for the gory details. -acme | ||
807 | */ | ||
808 | |||
809 | /* It is tricky place. Until this moment IPv4 tcp | ||
810 | worked with IPv6 icsk.icsk_af_ops. | ||
811 | Sync it now. | ||
812 | */ | ||
813 | dccp_sync_mss(newsk, inet_csk(newsk)->icsk_pmtu_cookie); | ||
814 | |||
815 | return newsk; | ||
816 | } | ||
817 | |||
818 | opt = np->opt; | ||
819 | |||
820 | if (sk_acceptq_is_full(sk)) | ||
821 | goto out_overflow; | ||
822 | |||
823 | if (np->rxopt.bits.osrcrt == 2 && | ||
824 | opt == NULL && ireq6->pktopts) { | ||
825 | struct inet6_skb_parm *rxopt = IP6CB(ireq6->pktopts); | ||
826 | if (rxopt->srcrt) | ||
827 | opt = ipv6_invert_rthdr(sk, | ||
828 | (struct ipv6_rt_hdr *)(ireq6->pktopts->nh.raw + | ||
829 | rxopt->srcrt)); | ||
830 | } | ||
831 | |||
832 | if (dst == NULL) { | ||
833 | struct in6_addr *final_p = NULL, final; | ||
834 | struct flowi fl; | ||
835 | |||
836 | memset(&fl, 0, sizeof(fl)); | ||
837 | fl.proto = IPPROTO_DCCP; | ||
838 | ipv6_addr_copy(&fl.fl6_dst, &ireq6->rmt_addr); | ||
839 | if (opt && opt->srcrt) { | ||
840 | struct rt0_hdr *rt0 = (struct rt0_hdr *) opt->srcrt; | ||
841 | ipv6_addr_copy(&final, &fl.fl6_dst); | ||
842 | ipv6_addr_copy(&fl.fl6_dst, rt0->addr); | ||
843 | final_p = &final; | ||
844 | } | ||
845 | ipv6_addr_copy(&fl.fl6_src, &ireq6->loc_addr); | ||
846 | fl.oif = sk->sk_bound_dev_if; | ||
847 | fl.fl_ip_dport = inet_rsk(req)->rmt_port; | ||
848 | fl.fl_ip_sport = inet_sk(sk)->sport; | ||
849 | |||
850 | if (ip6_dst_lookup(sk, &dst, &fl)) | ||
851 | goto out; | ||
852 | |||
853 | if (final_p) | ||
854 | ipv6_addr_copy(&fl.fl6_dst, final_p); | ||
855 | |||
856 | if ((xfrm_lookup(&dst, &fl, sk, 0)) < 0) | ||
857 | goto out; | ||
858 | } | ||
859 | |||
860 | newsk = dccp_create_openreq_child(sk, req, skb); | ||
861 | if (newsk == NULL) | ||
862 | goto out; | ||
863 | |||
864 | /* | ||
865 | * No need to charge this sock to the relevant IPv6 refcnt debug socks | ||
866 | * count here, dccp_create_openreq_child now does this for us, see the | ||
867 | * comment in that function for the gory details. -acme | ||
868 | */ | ||
869 | |||
870 | ip6_dst_store(newsk, dst, NULL); | ||
871 | newsk->sk_route_caps = dst->dev->features & | ||
872 | ~(NETIF_F_IP_CSUM | NETIF_F_TSO); | ||
873 | |||
874 | newdp6 = (struct dccp6_sock *)newsk; | ||
875 | newinet = inet_sk(newsk); | ||
876 | newinet->pinet6 = &newdp6->inet6; | ||
877 | newdp = dccp_sk(newsk); | ||
878 | newnp = inet6_sk(newsk); | ||
879 | |||
880 | memcpy(newnp, np, sizeof(struct ipv6_pinfo)); | ||
881 | |||
882 | ipv6_addr_copy(&newnp->daddr, &ireq6->rmt_addr); | ||
883 | ipv6_addr_copy(&newnp->saddr, &ireq6->loc_addr); | ||
884 | ipv6_addr_copy(&newnp->rcv_saddr, &ireq6->loc_addr); | ||
885 | newsk->sk_bound_dev_if = ireq6->iif; | ||
886 | |||
887 | /* Now IPv6 options... | ||
888 | |||
889 | First: no IPv4 options. | ||
890 | */ | ||
891 | newinet->opt = NULL; | ||
892 | |||
893 | /* Clone RX bits */ | ||
894 | newnp->rxopt.all = np->rxopt.all; | ||
895 | |||
896 | /* Clone pktoptions received with SYN */ | ||
897 | newnp->pktoptions = NULL; | ||
898 | if (ireq6->pktopts != NULL) { | ||
899 | newnp->pktoptions = skb_clone(ireq6->pktopts, GFP_ATOMIC); | ||
900 | kfree_skb(ireq6->pktopts); | ||
901 | ireq6->pktopts = NULL; | ||
902 | if (newnp->pktoptions) | ||
903 | skb_set_owner_r(newnp->pktoptions, newsk); | ||
904 | } | ||
905 | newnp->opt = NULL; | ||
906 | newnp->mcast_oif = inet6_iif(skb); | ||
907 | newnp->mcast_hops = skb->nh.ipv6h->hop_limit; | ||
908 | |||
909 | /* Clone native IPv6 options from listening socket (if any) | ||
910 | |||
911 | Yes, keeping reference count would be much more clever, | ||
912 | but we make one more one thing there: reattach optmem | ||
913 | to newsk. | ||
914 | */ | ||
915 | if (opt) { | ||
916 | newnp->opt = ipv6_dup_options(newsk, opt); | ||
917 | if (opt != np->opt) | ||
918 | sock_kfree_s(sk, opt, opt->tot_len); | ||
919 | } | ||
920 | |||
921 | inet_csk(newsk)->icsk_ext_hdr_len = 0; | ||
922 | if (newnp->opt) | ||
923 | inet_csk(newsk)->icsk_ext_hdr_len = (newnp->opt->opt_nflen + | ||
924 | newnp->opt->opt_flen); | ||
925 | |||
926 | dccp_sync_mss(newsk, dst_mtu(dst)); | ||
927 | |||
928 | newinet->daddr = newinet->saddr = newinet->rcv_saddr = LOOPBACK4_IPV6; | ||
929 | |||
930 | __inet6_hash(&dccp_hashinfo, newsk); | ||
931 | inet_inherit_port(&dccp_hashinfo, sk, newsk); | ||
932 | |||
933 | return newsk; | ||
934 | |||
935 | out_overflow: | ||
936 | NET_INC_STATS_BH(LINUX_MIB_LISTENOVERFLOWS); | ||
937 | out: | ||
938 | NET_INC_STATS_BH(LINUX_MIB_LISTENDROPS); | ||
939 | if (opt && opt != np->opt) | ||
940 | sock_kfree_s(sk, opt, opt->tot_len); | ||
941 | dst_release(dst); | ||
942 | return NULL; | ||
943 | } | ||
944 | |||
945 | /* The socket must have it's spinlock held when we get | ||
946 | * here. | ||
947 | * | ||
948 | * We have a potential double-lock case here, so even when | ||
949 | * doing backlog processing we use the BH locking scheme. | ||
950 | * This is because we cannot sleep with the original spinlock | ||
951 | * held. | ||
952 | */ | ||
953 | static int dccp_v6_do_rcv(struct sock *sk, struct sk_buff *skb) | ||
954 | { | ||
955 | struct ipv6_pinfo *np = inet6_sk(sk); | ||
956 | struct sk_buff *opt_skb = NULL; | ||
957 | |||
958 | /* Imagine: socket is IPv6. IPv4 packet arrives, | ||
959 | goes to IPv4 receive handler and backlogged. | ||
960 | From backlog it always goes here. Kerboom... | ||
961 | Fortunately, dccp_rcv_established and rcv_established | ||
962 | handle them correctly, but it is not case with | ||
963 | dccp_v6_hnd_req and dccp_v6_ctl_send_reset(). --ANK | ||
964 | */ | ||
965 | |||
966 | if (skb->protocol == htons(ETH_P_IP)) | ||
967 | return dccp_v4_do_rcv(sk, skb); | ||
968 | |||
969 | if (sk_filter(sk, skb, 0)) | ||
970 | goto discard; | ||
971 | |||
972 | /* | ||
973 | * socket locking is here for SMP purposes as backlog rcv | ||
974 | * is currently called with bh processing disabled. | ||
975 | */ | ||
976 | |||
977 | /* Do Stevens' IPV6_PKTOPTIONS. | ||
978 | |||
979 | Yes, guys, it is the only place in our code, where we | ||
980 | may make it not affecting IPv4. | ||
981 | The rest of code is protocol independent, | ||
982 | and I do not like idea to uglify IPv4. | ||
983 | |||
984 | Actually, all the idea behind IPV6_PKTOPTIONS | ||
985 | looks not very well thought. For now we latch | ||
986 | options, received in the last packet, enqueued | ||
987 | by tcp. Feel free to propose better solution. | ||
988 | --ANK (980728) | ||
989 | */ | ||
990 | if (np->rxopt.all) | ||
991 | opt_skb = skb_clone(skb, GFP_ATOMIC); | ||
992 | |||
993 | if (sk->sk_state == DCCP_OPEN) { /* Fast path */ | ||
994 | if (dccp_rcv_established(sk, skb, dccp_hdr(skb), skb->len)) | ||
995 | goto reset; | ||
996 | return 0; | ||
997 | } | ||
998 | |||
999 | if (sk->sk_state == DCCP_LISTEN) { | ||
1000 | struct sock *nsk = dccp_v6_hnd_req(sk, skb); | ||
1001 | if (!nsk) | ||
1002 | goto discard; | ||
1003 | |||
1004 | /* | ||
1005 | * Queue it on the new socket if the new socket is active, | ||
1006 | * otherwise we just shortcircuit this and continue with | ||
1007 | * the new socket.. | ||
1008 | */ | ||
1009 | if(nsk != sk) { | ||
1010 | if (dccp_child_process(sk, nsk, skb)) | ||
1011 | goto reset; | ||
1012 | if (opt_skb) | ||
1013 | __kfree_skb(opt_skb); | ||
1014 | return 0; | ||
1015 | } | ||
1016 | } | ||
1017 | |||
1018 | if (dccp_rcv_state_process(sk, skb, dccp_hdr(skb), skb->len)) | ||
1019 | goto reset; | ||
1020 | return 0; | ||
1021 | |||
1022 | reset: | ||
1023 | dccp_v6_ctl_send_reset(skb); | ||
1024 | discard: | ||
1025 | if (opt_skb) | ||
1026 | __kfree_skb(opt_skb); | ||
1027 | kfree_skb(skb); | ||
1028 | return 0; | ||
1029 | } | ||
1030 | |||
1031 | static int dccp_v6_rcv(struct sk_buff **pskb, unsigned int *nhoffp) | ||
1032 | { | ||
1033 | const struct dccp_hdr *dh; | ||
1034 | struct sk_buff *skb = *pskb; | ||
1035 | struct sock *sk; | ||
1036 | |||
1037 | /* Step 1: Check header basics: */ | ||
1038 | |||
1039 | if (dccp_invalid_packet(skb)) | ||
1040 | goto discard_it; | ||
1041 | |||
1042 | dh = dccp_hdr(skb); | ||
1043 | |||
1044 | DCCP_SKB_CB(skb)->dccpd_seq = dccp_hdr_seq(skb); | ||
1045 | DCCP_SKB_CB(skb)->dccpd_type = dh->dccph_type; | ||
1046 | |||
1047 | if (dccp_packet_without_ack(skb)) | ||
1048 | DCCP_SKB_CB(skb)->dccpd_ack_seq = DCCP_PKT_WITHOUT_ACK_SEQ; | ||
1049 | else | ||
1050 | DCCP_SKB_CB(skb)->dccpd_ack_seq = dccp_hdr_ack_seq(skb); | ||
1051 | |||
1052 | /* Step 2: | ||
1053 | * Look up flow ID in table and get corresponding socket */ | ||
1054 | sk = __inet6_lookup(&dccp_hashinfo, &skb->nh.ipv6h->saddr, | ||
1055 | dh->dccph_sport, | ||
1056 | &skb->nh.ipv6h->daddr, ntohs(dh->dccph_dport), | ||
1057 | inet6_iif(skb)); | ||
1058 | /* | ||
1059 | * Step 2: | ||
1060 | * If no socket ... | ||
1061 | * Generate Reset(No Connection) unless P.type == Reset | ||
1062 | * Drop packet and return | ||
1063 | */ | ||
1064 | if (sk == NULL) | ||
1065 | goto no_dccp_socket; | ||
1066 | |||
1067 | /* | ||
1068 | * Step 2: | ||
1069 | * ... or S.state == TIMEWAIT, | ||
1070 | * Generate Reset(No Connection) unless P.type == Reset | ||
1071 | * Drop packet and return | ||
1072 | */ | ||
1073 | |||
1074 | if (sk->sk_state == DCCP_TIME_WAIT) | ||
1075 | goto do_time_wait; | ||
1076 | |||
1077 | if (!xfrm6_policy_check(sk, XFRM_POLICY_IN, skb)) | ||
1078 | goto discard_and_relse; | ||
1079 | |||
1080 | return sk_receive_skb(sk, skb) ? -1 : 0; | ||
1081 | |||
1082 | no_dccp_socket: | ||
1083 | if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb)) | ||
1084 | goto discard_it; | ||
1085 | /* | ||
1086 | * Step 2: | ||
1087 | * Generate Reset(No Connection) unless P.type == Reset | ||
1088 | * Drop packet and return | ||
1089 | */ | ||
1090 | if (dh->dccph_type != DCCP_PKT_RESET) { | ||
1091 | DCCP_SKB_CB(skb)->dccpd_reset_code = | ||
1092 | DCCP_RESET_CODE_NO_CONNECTION; | ||
1093 | dccp_v6_ctl_send_reset(skb); | ||
1094 | } | ||
1095 | discard_it: | ||
1096 | |||
1097 | /* | ||
1098 | * Discard frame | ||
1099 | */ | ||
1100 | |||
1101 | kfree_skb(skb); | ||
1102 | return 0; | ||
1103 | |||
1104 | discard_and_relse: | ||
1105 | sock_put(sk); | ||
1106 | goto discard_it; | ||
1107 | |||
1108 | do_time_wait: | ||
1109 | inet_twsk_put((struct inet_timewait_sock *)sk); | ||
1110 | goto no_dccp_socket; | ||
1111 | } | ||
1112 | |||
1113 | static struct inet_connection_sock_af_ops dccp_ipv6_af_ops = { | ||
1114 | .queue_xmit = inet6_csk_xmit, | ||
1115 | .send_check = dccp_v6_send_check, | ||
1116 | .rebuild_header = inet6_sk_rebuild_header, | ||
1117 | .conn_request = dccp_v6_conn_request, | ||
1118 | .syn_recv_sock = dccp_v6_request_recv_sock, | ||
1119 | .net_header_len = sizeof(struct ipv6hdr), | ||
1120 | .setsockopt = ipv6_setsockopt, | ||
1121 | .getsockopt = ipv6_getsockopt, | ||
1122 | .addr2sockaddr = inet6_csk_addr2sockaddr, | ||
1123 | .sockaddr_len = sizeof(struct sockaddr_in6) | ||
1124 | }; | ||
1125 | |||
1126 | /* | ||
1127 | * DCCP over IPv4 via INET6 API | ||
1128 | */ | ||
1129 | static struct inet_connection_sock_af_ops dccp_ipv6_mapped = { | ||
1130 | .queue_xmit = ip_queue_xmit, | ||
1131 | .send_check = dccp_v4_send_check, | ||
1132 | .rebuild_header = inet_sk_rebuild_header, | ||
1133 | .conn_request = dccp_v6_conn_request, | ||
1134 | .syn_recv_sock = dccp_v6_request_recv_sock, | ||
1135 | .net_header_len = sizeof(struct iphdr), | ||
1136 | .setsockopt = ipv6_setsockopt, | ||
1137 | .getsockopt = ipv6_getsockopt, | ||
1138 | .addr2sockaddr = inet6_csk_addr2sockaddr, | ||
1139 | .sockaddr_len = sizeof(struct sockaddr_in6) | ||
1140 | }; | ||
1141 | |||
1142 | /* NOTE: A lot of things set to zero explicitly by call to | ||
1143 | * sk_alloc() so need not be done here. | ||
1144 | */ | ||
1145 | static int dccp_v6_init_sock(struct sock *sk) | ||
1146 | { | ||
1147 | int err = dccp_v4_init_sock(sk); | ||
1148 | |||
1149 | if (err == 0) | ||
1150 | inet_csk(sk)->icsk_af_ops = &dccp_ipv6_af_ops; | ||
1151 | |||
1152 | return err; | ||
1153 | } | ||
1154 | |||
1155 | static int dccp_v6_destroy_sock(struct sock *sk) | ||
1156 | { | ||
1157 | dccp_v4_destroy_sock(sk); | ||
1158 | return inet6_destroy_sock(sk); | ||
1159 | } | ||
1160 | |||
1161 | static struct proto dccp_v6_prot = { | ||
1162 | .name = "DCCPv6", | ||
1163 | .owner = THIS_MODULE, | ||
1164 | .close = dccp_close, | ||
1165 | .connect = dccp_v6_connect, | ||
1166 | .disconnect = dccp_disconnect, | ||
1167 | .ioctl = dccp_ioctl, | ||
1168 | .init = dccp_v6_init_sock, | ||
1169 | .setsockopt = dccp_setsockopt, | ||
1170 | .getsockopt = dccp_getsockopt, | ||
1171 | .sendmsg = dccp_sendmsg, | ||
1172 | .recvmsg = dccp_recvmsg, | ||
1173 | .backlog_rcv = dccp_v6_do_rcv, | ||
1174 | .hash = dccp_v6_hash, | ||
1175 | .unhash = dccp_unhash, | ||
1176 | .accept = inet_csk_accept, | ||
1177 | .get_port = dccp_v6_get_port, | ||
1178 | .shutdown = dccp_shutdown, | ||
1179 | .destroy = dccp_v6_destroy_sock, | ||
1180 | .orphan_count = &dccp_orphan_count, | ||
1181 | .max_header = MAX_DCCP_HEADER, | ||
1182 | .obj_size = sizeof(struct dccp6_sock), | ||
1183 | .rsk_prot = &dccp6_request_sock_ops, | ||
1184 | .twsk_prot = &dccp6_timewait_sock_ops, | ||
1185 | }; | ||
1186 | |||
1187 | static struct inet6_protocol dccp_v6_protocol = { | ||
1188 | .handler = dccp_v6_rcv, | ||
1189 | .err_handler = dccp_v6_err, | ||
1190 | .flags = INET6_PROTO_NOPOLICY | INET6_PROTO_FINAL, | ||
1191 | }; | ||
1192 | |||
1193 | static struct proto_ops inet6_dccp_ops = { | ||
1194 | .family = PF_INET6, | ||
1195 | .owner = THIS_MODULE, | ||
1196 | .release = inet6_release, | ||
1197 | .bind = inet6_bind, | ||
1198 | .connect = inet_stream_connect, | ||
1199 | .socketpair = sock_no_socketpair, | ||
1200 | .accept = inet_accept, | ||
1201 | .getname = inet6_getname, | ||
1202 | .poll = dccp_poll, | ||
1203 | .ioctl = inet6_ioctl, | ||
1204 | .listen = inet_dccp_listen, | ||
1205 | .shutdown = inet_shutdown, | ||
1206 | .setsockopt = sock_common_setsockopt, | ||
1207 | .getsockopt = sock_common_getsockopt, | ||
1208 | .sendmsg = inet_sendmsg, | ||
1209 | .recvmsg = sock_common_recvmsg, | ||
1210 | .mmap = sock_no_mmap, | ||
1211 | .sendpage = sock_no_sendpage, | ||
1212 | }; | ||
1213 | |||
1214 | static struct inet_protosw dccp_v6_protosw = { | ||
1215 | .type = SOCK_DCCP, | ||
1216 | .protocol = IPPROTO_DCCP, | ||
1217 | .prot = &dccp_v6_prot, | ||
1218 | .ops = &inet6_dccp_ops, | ||
1219 | .capability = -1, | ||
1220 | .flags = INET_PROTOSW_ICSK, | ||
1221 | }; | ||
1222 | |||
1223 | static int __init dccp_v6_init(void) | ||
1224 | { | ||
1225 | int err = proto_register(&dccp_v6_prot, 1); | ||
1226 | |||
1227 | if (err != 0) | ||
1228 | goto out; | ||
1229 | |||
1230 | err = inet6_add_protocol(&dccp_v6_protocol, IPPROTO_DCCP); | ||
1231 | if (err != 0) | ||
1232 | goto out_unregister_proto; | ||
1233 | |||
1234 | inet6_register_protosw(&dccp_v6_protosw); | ||
1235 | out: | ||
1236 | return err; | ||
1237 | out_unregister_proto: | ||
1238 | proto_unregister(&dccp_v6_prot); | ||
1239 | goto out; | ||
1240 | } | ||
1241 | |||
1242 | static void __exit dccp_v6_exit(void) | ||
1243 | { | ||
1244 | inet6_del_protocol(&dccp_v6_protocol, IPPROTO_DCCP); | ||
1245 | inet6_unregister_protosw(&dccp_v6_protosw); | ||
1246 | proto_unregister(&dccp_v6_prot); | ||
1247 | } | ||
1248 | |||
1249 | module_init(dccp_v6_init); | ||
1250 | module_exit(dccp_v6_exit); | ||
1251 | |||
1252 | /* | ||
1253 | * __stringify doesn't likes enums, so use SOCK_DCCP (6) and IPPROTO_DCCP (33) | ||
1254 | * values directly, Also cover the case where the protocol is not specified, | ||
1255 | * i.e. net-pf-PF_INET6-proto-0-type-SOCK_DCCP | ||
1256 | */ | ||
1257 | MODULE_ALIAS("net-pf-" __stringify(PF_INET6) "-proto-33-type-6"); | ||
1258 | MODULE_ALIAS("net-pf-" __stringify(PF_INET6) "-proto-0-type-6"); | ||
1259 | MODULE_LICENSE("GPL"); | ||
1260 | MODULE_AUTHOR("Arnaldo Carvalho de Melo <acme@mandriva.com>"); | ||
1261 | MODULE_DESCRIPTION("DCCPv6 - Datagram Congestion Controlled Protocol"); | ||
diff --git a/net/dccp/ipv6.h b/net/dccp/ipv6.h new file mode 100644 index 000000000000..e4d4e9309270 --- /dev/null +++ b/net/dccp/ipv6.h | |||
@@ -0,0 +1,37 @@ | |||
1 | #ifndef _DCCP_IPV6_H | ||
2 | #define _DCCP_IPV6_H | ||
3 | /* | ||
4 | * net/dccp/ipv6.h | ||
5 | * | ||
6 | * An implementation of the DCCP protocol | ||
7 | * Copyright (c) 2005 Arnaldo Carvalho de Melo <acme@ghostprotocols.net> | ||
8 | * | ||
9 | * This program is free software; you can redistribute it and/or modify it | ||
10 | * under the terms of the GNU General Public License version 2 as | ||
11 | * published by the Free Software Foundation. | ||
12 | */ | ||
13 | |||
14 | #include <linux/config.h> | ||
15 | #include <linux/dccp.h> | ||
16 | #include <linux/ipv6.h> | ||
17 | |||
18 | struct dccp6_sock { | ||
19 | struct dccp_sock dccp; | ||
20 | /* | ||
21 | * ipv6_pinfo has to be the last member of dccp6_sock, | ||
22 | * see inet6_sk_generic. | ||
23 | */ | ||
24 | struct ipv6_pinfo inet6; | ||
25 | }; | ||
26 | |||
27 | struct dccp6_request_sock { | ||
28 | struct dccp_request_sock dccp; | ||
29 | struct inet6_request_sock inet6; | ||
30 | }; | ||
31 | |||
32 | struct dccp6_timewait_sock { | ||
33 | struct inet_timewait_sock inet; | ||
34 | struct inet6_timewait_sock tw6; | ||
35 | }; | ||
36 | |||
37 | #endif /* _DCCP_IPV6_H */ | ||
diff --git a/net/dccp/minisocks.c b/net/dccp/minisocks.c index 1393461898bb..29261fc198e7 100644 --- a/net/dccp/minisocks.c +++ b/net/dccp/minisocks.c | |||
@@ -40,6 +40,8 @@ struct inet_timewait_death_row dccp_death_row = { | |||
40 | (unsigned long)&dccp_death_row), | 40 | (unsigned long)&dccp_death_row), |
41 | }; | 41 | }; |
42 | 42 | ||
43 | EXPORT_SYMBOL_GPL(dccp_death_row); | ||
44 | |||
43 | void dccp_time_wait(struct sock *sk, int state, int timeo) | 45 | void dccp_time_wait(struct sock *sk, int state, int timeo) |
44 | { | 46 | { |
45 | struct inet_timewait_sock *tw = NULL; | 47 | struct inet_timewait_sock *tw = NULL; |
@@ -50,7 +52,18 @@ void dccp_time_wait(struct sock *sk, int state, int timeo) | |||
50 | if (tw != NULL) { | 52 | if (tw != NULL) { |
51 | const struct inet_connection_sock *icsk = inet_csk(sk); | 53 | const struct inet_connection_sock *icsk = inet_csk(sk); |
52 | const int rto = (icsk->icsk_rto << 2) - (icsk->icsk_rto >> 1); | 54 | const int rto = (icsk->icsk_rto << 2) - (icsk->icsk_rto >> 1); |
53 | 55 | #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) | |
56 | if (tw->tw_family == PF_INET6) { | ||
57 | const struct ipv6_pinfo *np = inet6_sk(sk); | ||
58 | struct inet6_timewait_sock *tw6; | ||
59 | |||
60 | tw->tw_ipv6_offset = inet6_tw_offset(sk->sk_prot); | ||
61 | tw6 = inet6_twsk((struct sock *)tw); | ||
62 | ipv6_addr_copy(&tw6->tw_v6_daddr, &np->daddr); | ||
63 | ipv6_addr_copy(&tw6->tw_v6_rcv_saddr, &np->rcv_saddr); | ||
64 | tw->tw_ipv6only = np->ipv6only; | ||
65 | } | ||
66 | #endif | ||
54 | /* Linkage updates. */ | 67 | /* Linkage updates. */ |
55 | __inet_twsk_hashdance(tw, sk, &dccp_hashinfo); | 68 | __inet_twsk_hashdance(tw, sk, &dccp_hashinfo); |
56 | 69 | ||
@@ -170,6 +183,8 @@ out_free: | |||
170 | return newsk; | 183 | return newsk; |
171 | } | 184 | } |
172 | 185 | ||
186 | EXPORT_SYMBOL_GPL(dccp_create_openreq_child); | ||
187 | |||
173 | /* | 188 | /* |
174 | * Process an incoming packet for RESPOND sockets represented | 189 | * Process an incoming packet for RESPOND sockets represented |
175 | * as an request_sock. | 190 | * as an request_sock. |
@@ -214,7 +229,7 @@ struct sock *dccp_check_req(struct sock *sk, struct sk_buff *skb, | |||
214 | goto drop; | 229 | goto drop; |
215 | } | 230 | } |
216 | 231 | ||
217 | child = dccp_v4_request_recv_sock(sk, skb, req, NULL); | 232 | child = inet_csk(sk)->icsk_af_ops->syn_recv_sock(sk, skb, req, NULL); |
218 | if (child == NULL) | 233 | if (child == NULL) |
219 | goto listen_overflow; | 234 | goto listen_overflow; |
220 | 235 | ||
@@ -236,6 +251,8 @@ drop: | |||
236 | goto out; | 251 | goto out; |
237 | } | 252 | } |
238 | 253 | ||
254 | EXPORT_SYMBOL_GPL(dccp_check_req); | ||
255 | |||
239 | /* | 256 | /* |
240 | * Queue segment on the new socket if the new socket is active, | 257 | * Queue segment on the new socket if the new socket is active, |
241 | * otherwise we just shortcircuit this and continue with | 258 | * otherwise we just shortcircuit this and continue with |
@@ -266,3 +283,5 @@ int dccp_child_process(struct sock *parent, struct sock *child, | |||
266 | sock_put(child); | 283 | sock_put(child); |
267 | return ret; | 284 | return ret; |
268 | } | 285 | } |
286 | |||
287 | EXPORT_SYMBOL_GPL(dccp_child_process); | ||
diff --git a/net/dccp/output.c b/net/dccp/output.c index 74ff87025878..efd7ffb903a1 100644 --- a/net/dccp/output.c +++ b/net/dccp/output.c | |||
@@ -15,6 +15,7 @@ | |||
15 | #include <linux/kernel.h> | 15 | #include <linux/kernel.h> |
16 | #include <linux/skbuff.h> | 16 | #include <linux/skbuff.h> |
17 | 17 | ||
18 | #include <net/inet_sock.h> | ||
18 | #include <net/sock.h> | 19 | #include <net/sock.h> |
19 | 20 | ||
20 | #include "ackvec.h" | 21 | #include "ackvec.h" |
@@ -43,6 +44,7 @@ static int dccp_transmit_skb(struct sock *sk, struct sk_buff *skb) | |||
43 | { | 44 | { |
44 | if (likely(skb != NULL)) { | 45 | if (likely(skb != NULL)) { |
45 | const struct inet_sock *inet = inet_sk(sk); | 46 | const struct inet_sock *inet = inet_sk(sk); |
47 | const struct inet_connection_sock *icsk = inet_csk(sk); | ||
46 | struct dccp_sock *dp = dccp_sk(sk); | 48 | struct dccp_sock *dp = dccp_sk(sk); |
47 | struct dccp_skb_cb *dcb = DCCP_SKB_CB(skb); | 49 | struct dccp_skb_cb *dcb = DCCP_SKB_CB(skb); |
48 | struct dccp_hdr *dh; | 50 | struct dccp_hdr *dh; |
@@ -108,8 +110,7 @@ static int dccp_transmit_skb(struct sock *sk, struct sk_buff *skb) | |||
108 | break; | 110 | break; |
109 | } | 111 | } |
110 | 112 | ||
111 | dh->dccph_checksum = dccp_v4_checksum(skb, inet->saddr, | 113 | icsk->icsk_af_ops->send_check(sk, skb->len, skb); |
112 | inet->daddr); | ||
113 | 114 | ||
114 | if (set_ack) | 115 | if (set_ack) |
115 | dccp_event_ack_sent(sk); | 116 | dccp_event_ack_sent(sk); |
@@ -117,7 +118,7 @@ static int dccp_transmit_skb(struct sock *sk, struct sk_buff *skb) | |||
117 | DCCP_INC_STATS(DCCP_MIB_OUTSEGS); | 118 | DCCP_INC_STATS(DCCP_MIB_OUTSEGS); |
118 | 119 | ||
119 | memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt)); | 120 | memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt)); |
120 | err = ip_queue_xmit(skb, 0); | 121 | err = icsk->icsk_af_ops->queue_xmit(skb, 0); |
121 | if (err <= 0) | 122 | if (err <= 0) |
122 | return err; | 123 | return err; |
123 | 124 | ||
@@ -134,20 +135,13 @@ static int dccp_transmit_skb(struct sock *sk, struct sk_buff *skb) | |||
134 | 135 | ||
135 | unsigned int dccp_sync_mss(struct sock *sk, u32 pmtu) | 136 | unsigned int dccp_sync_mss(struct sock *sk, u32 pmtu) |
136 | { | 137 | { |
138 | struct inet_connection_sock *icsk = inet_csk(sk); | ||
137 | struct dccp_sock *dp = dccp_sk(sk); | 139 | struct dccp_sock *dp = dccp_sk(sk); |
138 | int mss_now; | 140 | int mss_now = (pmtu - icsk->icsk_af_ops->net_header_len - |
139 | 141 | sizeof(struct dccp_hdr) - sizeof(struct dccp_hdr_ext)); | |
140 | /* | ||
141 | * FIXME: we really should be using the af_specific thing to support | ||
142 | * IPv6. | ||
143 | * mss_now = pmtu - tp->af_specific->net_header_len - | ||
144 | * sizeof(struct dccp_hdr) - sizeof(struct dccp_hdr_ext); | ||
145 | */ | ||
146 | mss_now = pmtu - sizeof(struct iphdr) - sizeof(struct dccp_hdr) - | ||
147 | sizeof(struct dccp_hdr_ext); | ||
148 | 142 | ||
149 | /* Now subtract optional transport overhead */ | 143 | /* Now subtract optional transport overhead */ |
150 | mss_now -= dp->dccps_ext_header_len; | 144 | mss_now -= icsk->icsk_ext_hdr_len; |
151 | 145 | ||
152 | /* | 146 | /* |
153 | * FIXME: this should come from the CCID infrastructure, where, say, | 147 | * FIXME: this should come from the CCID infrastructure, where, say, |
@@ -160,12 +154,14 @@ unsigned int dccp_sync_mss(struct sock *sk, u32 pmtu) | |||
160 | mss_now -= ((5 + 6 + 10 + 6 + 6 + 6 + 3) / 4) * 4; | 154 | mss_now -= ((5 + 6 + 10 + 6 + 6 + 6 + 3) / 4) * 4; |
161 | 155 | ||
162 | /* And store cached results */ | 156 | /* And store cached results */ |
163 | dp->dccps_pmtu_cookie = pmtu; | 157 | icsk->icsk_pmtu_cookie = pmtu; |
164 | dp->dccps_mss_cache = mss_now; | 158 | dp->dccps_mss_cache = mss_now; |
165 | 159 | ||
166 | return mss_now; | 160 | return mss_now; |
167 | } | 161 | } |
168 | 162 | ||
163 | EXPORT_SYMBOL_GPL(dccp_sync_mss); | ||
164 | |||
169 | void dccp_write_space(struct sock *sk) | 165 | void dccp_write_space(struct sock *sk) |
170 | { | 166 | { |
171 | read_lock(&sk->sk_callback_lock); | 167 | read_lock(&sk->sk_callback_lock); |
@@ -266,7 +262,7 @@ int dccp_write_xmit(struct sock *sk, struct sk_buff *skb, long *timeo) | |||
266 | 262 | ||
267 | int dccp_retransmit_skb(struct sock *sk, struct sk_buff *skb) | 263 | int dccp_retransmit_skb(struct sock *sk, struct sk_buff *skb) |
268 | { | 264 | { |
269 | if (inet_sk_rebuild_header(sk) != 0) | 265 | if (inet_csk(sk)->icsk_af_ops->rebuild_header(sk) != 0) |
270 | return -EHOSTUNREACH; /* Routing failure or similar. */ | 266 | return -EHOSTUNREACH; /* Routing failure or similar. */ |
271 | 267 | ||
272 | return dccp_transmit_skb(sk, (skb_cloned(skb) ? | 268 | return dccp_transmit_skb(sk, (skb_cloned(skb) ? |
@@ -321,6 +317,8 @@ struct sk_buff *dccp_make_response(struct sock *sk, struct dst_entry *dst, | |||
321 | return skb; | 317 | return skb; |
322 | } | 318 | } |
323 | 319 | ||
320 | EXPORT_SYMBOL_GPL(dccp_make_response); | ||
321 | |||
324 | struct sk_buff *dccp_make_reset(struct sock *sk, struct dst_entry *dst, | 322 | struct sk_buff *dccp_make_reset(struct sock *sk, struct dst_entry *dst, |
325 | const enum dccp_reset_codes code) | 323 | const enum dccp_reset_codes code) |
326 | 324 | ||
@@ -377,6 +375,7 @@ struct sk_buff *dccp_make_reset(struct sock *sk, struct dst_entry *dst, | |||
377 | */ | 375 | */ |
378 | static inline void dccp_connect_init(struct sock *sk) | 376 | static inline void dccp_connect_init(struct sock *sk) |
379 | { | 377 | { |
378 | struct dccp_sock *dp = dccp_sk(sk); | ||
380 | struct dst_entry *dst = __sk_dst_get(sk); | 379 | struct dst_entry *dst = __sk_dst_get(sk); |
381 | struct inet_connection_sock *icsk = inet_csk(sk); | 380 | struct inet_connection_sock *icsk = inet_csk(sk); |
382 | 381 | ||
@@ -385,10 +384,16 @@ static inline void dccp_connect_init(struct sock *sk) | |||
385 | 384 | ||
386 | dccp_sync_mss(sk, dst_mtu(dst)); | 385 | dccp_sync_mss(sk, dst_mtu(dst)); |
387 | 386 | ||
388 | /* | 387 | dccp_update_gss(sk, dp->dccps_iss); |
389 | * FIXME: set dp->{dccps_swh,dccps_swl}, with | 388 | /* |
390 | * something like dccp_inc_seq | 389 | * SWL and AWL are initially adjusted so that they are not less than |
391 | */ | 390 | * the initial Sequence Numbers received and sent, respectively: |
391 | * SWL := max(GSR + 1 - floor(W/4), ISR), | ||
392 | * AWL := max(GSS - W' + 1, ISS). | ||
393 | * These adjustments MUST be applied only at the beginning of the | ||
394 | * connection. | ||
395 | */ | ||
396 | dccp_set_seqno(&dp->dccps_awl, max48(dp->dccps_awl, dp->dccps_iss)); | ||
392 | 397 | ||
393 | icsk->icsk_retransmits = 0; | 398 | icsk->icsk_retransmits = 0; |
394 | } | 399 | } |
@@ -420,6 +425,8 @@ int dccp_connect(struct sock *sk) | |||
420 | return 0; | 425 | return 0; |
421 | } | 426 | } |
422 | 427 | ||
428 | EXPORT_SYMBOL_GPL(dccp_connect); | ||
429 | |||
423 | void dccp_send_ack(struct sock *sk) | 430 | void dccp_send_ack(struct sock *sk) |
424 | { | 431 | { |
425 | /* If we have been reset, we may not send again. */ | 432 | /* If we have been reset, we may not send again. */ |
diff --git a/net/dccp/proto.c b/net/dccp/proto.c index 8a6b2a9e4581..65b11ea90d85 100644 --- a/net/dccp/proto.c +++ b/net/dccp/proto.c | |||
@@ -24,7 +24,7 @@ | |||
24 | #include <net/checksum.h> | 24 | #include <net/checksum.h> |
25 | 25 | ||
26 | #include <net/inet_common.h> | 26 | #include <net/inet_common.h> |
27 | #include <net/ip.h> | 27 | #include <net/inet_sock.h> |
28 | #include <net/protocol.h> | 28 | #include <net/protocol.h> |
29 | #include <net/sock.h> | 29 | #include <net/sock.h> |
30 | #include <net/xfrm.h> | 30 | #include <net/xfrm.h> |
@@ -34,15 +34,18 @@ | |||
34 | #include <linux/timer.h> | 34 | #include <linux/timer.h> |
35 | #include <linux/delay.h> | 35 | #include <linux/delay.h> |
36 | #include <linux/poll.h> | 36 | #include <linux/poll.h> |
37 | #include <linux/dccp.h> | ||
38 | 37 | ||
39 | #include "ccid.h" | 38 | #include "ccid.h" |
40 | #include "dccp.h" | 39 | #include "dccp.h" |
41 | 40 | ||
42 | DEFINE_SNMP_STAT(struct dccp_mib, dccp_statistics) __read_mostly; | 41 | DEFINE_SNMP_STAT(struct dccp_mib, dccp_statistics) __read_mostly; |
43 | 42 | ||
43 | EXPORT_SYMBOL_GPL(dccp_statistics); | ||
44 | |||
44 | atomic_t dccp_orphan_count = ATOMIC_INIT(0); | 45 | atomic_t dccp_orphan_count = ATOMIC_INIT(0); |
45 | 46 | ||
47 | EXPORT_SYMBOL_GPL(dccp_orphan_count); | ||
48 | |||
46 | static struct net_protocol dccp_protocol = { | 49 | static struct net_protocol dccp_protocol = { |
47 | .handler = dccp_v4_rcv, | 50 | .handler = dccp_v4_rcv, |
48 | .err_handler = dccp_v4_err, | 51 | .err_handler = dccp_v4_err, |
@@ -149,6 +152,8 @@ int dccp_disconnect(struct sock *sk, int flags) | |||
149 | return err; | 152 | return err; |
150 | } | 153 | } |
151 | 154 | ||
155 | EXPORT_SYMBOL_GPL(dccp_disconnect); | ||
156 | |||
152 | /* | 157 | /* |
153 | * Wait for a DCCP event. | 158 | * Wait for a DCCP event. |
154 | * | 159 | * |
@@ -156,8 +161,8 @@ int dccp_disconnect(struct sock *sk, int flags) | |||
156 | * take care of normal races (between the test and the event) and we don't | 161 | * take care of normal races (between the test and the event) and we don't |
157 | * go look at any of the socket buffers directly. | 162 | * go look at any of the socket buffers directly. |
158 | */ | 163 | */ |
159 | static unsigned int dccp_poll(struct file *file, struct socket *sock, | 164 | unsigned int dccp_poll(struct file *file, struct socket *sock, |
160 | poll_table *wait) | 165 | poll_table *wait) |
161 | { | 166 | { |
162 | unsigned int mask; | 167 | unsigned int mask; |
163 | struct sock *sk = sock->sk; | 168 | struct sock *sk = sock->sk; |
@@ -205,12 +210,16 @@ static unsigned int dccp_poll(struct file *file, struct socket *sock, | |||
205 | return mask; | 210 | return mask; |
206 | } | 211 | } |
207 | 212 | ||
213 | EXPORT_SYMBOL_GPL(dccp_poll); | ||
214 | |||
208 | int dccp_ioctl(struct sock *sk, int cmd, unsigned long arg) | 215 | int dccp_ioctl(struct sock *sk, int cmd, unsigned long arg) |
209 | { | 216 | { |
210 | dccp_pr_debug("entry\n"); | 217 | dccp_pr_debug("entry\n"); |
211 | return -ENOIOCTLCMD; | 218 | return -ENOIOCTLCMD; |
212 | } | 219 | } |
213 | 220 | ||
221 | EXPORT_SYMBOL_GPL(dccp_ioctl); | ||
222 | |||
214 | static int dccp_setsockopt_service(struct sock *sk, const u32 service, | 223 | static int dccp_setsockopt_service(struct sock *sk, const u32 service, |
215 | char __user *optval, int optlen) | 224 | char __user *optval, int optlen) |
216 | { | 225 | { |
@@ -254,7 +263,9 @@ int dccp_setsockopt(struct sock *sk, int level, int optname, | |||
254 | int val; | 263 | int val; |
255 | 264 | ||
256 | if (level != SOL_DCCP) | 265 | if (level != SOL_DCCP) |
257 | return ip_setsockopt(sk, level, optname, optval, optlen); | 266 | return inet_csk(sk)->icsk_af_ops->setsockopt(sk, level, |
267 | optname, optval, | ||
268 | optlen); | ||
258 | 269 | ||
259 | if (optlen < sizeof(int)) | 270 | if (optlen < sizeof(int)) |
260 | return -EINVAL; | 271 | return -EINVAL; |
@@ -282,6 +293,8 @@ int dccp_setsockopt(struct sock *sk, int level, int optname, | |||
282 | return err; | 293 | return err; |
283 | } | 294 | } |
284 | 295 | ||
296 | EXPORT_SYMBOL_GPL(dccp_setsockopt); | ||
297 | |||
285 | static int dccp_getsockopt_service(struct sock *sk, int len, | 298 | static int dccp_getsockopt_service(struct sock *sk, int len, |
286 | u32 __user *optval, | 299 | u32 __user *optval, |
287 | int __user *optlen) | 300 | int __user *optlen) |
@@ -320,8 +333,9 @@ int dccp_getsockopt(struct sock *sk, int level, int optname, | |||
320 | int val, len; | 333 | int val, len; |
321 | 334 | ||
322 | if (level != SOL_DCCP) | 335 | if (level != SOL_DCCP) |
323 | return ip_getsockopt(sk, level, optname, optval, optlen); | 336 | return inet_csk(sk)->icsk_af_ops->getsockopt(sk, level, |
324 | 337 | optname, optval, | |
338 | optlen); | ||
325 | if (get_user(len, optlen)) | 339 | if (get_user(len, optlen)) |
326 | return -EFAULT; | 340 | return -EFAULT; |
327 | 341 | ||
@@ -354,6 +368,8 @@ int dccp_getsockopt(struct sock *sk, int level, int optname, | |||
354 | return 0; | 368 | return 0; |
355 | } | 369 | } |
356 | 370 | ||
371 | EXPORT_SYMBOL_GPL(dccp_getsockopt); | ||
372 | |||
357 | int dccp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, | 373 | int dccp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, |
358 | size_t len) | 374 | size_t len) |
359 | { | 375 | { |
@@ -410,6 +426,8 @@ out_discard: | |||
410 | goto out_release; | 426 | goto out_release; |
411 | } | 427 | } |
412 | 428 | ||
429 | EXPORT_SYMBOL_GPL(dccp_sendmsg); | ||
430 | |||
413 | int dccp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, | 431 | int dccp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, |
414 | size_t len, int nonblock, int flags, int *addr_len) | 432 | size_t len, int nonblock, int flags, int *addr_len) |
415 | { | 433 | { |
@@ -507,7 +525,9 @@ out: | |||
507 | return len; | 525 | return len; |
508 | } | 526 | } |
509 | 527 | ||
510 | static int inet_dccp_listen(struct socket *sock, int backlog) | 528 | EXPORT_SYMBOL_GPL(dccp_recvmsg); |
529 | |||
530 | int inet_dccp_listen(struct socket *sock, int backlog) | ||
511 | { | 531 | { |
512 | struct sock *sk = sock->sk; | 532 | struct sock *sk = sock->sk; |
513 | unsigned char old_state; | 533 | unsigned char old_state; |
@@ -543,6 +563,8 @@ out: | |||
543 | return err; | 563 | return err; |
544 | } | 564 | } |
545 | 565 | ||
566 | EXPORT_SYMBOL_GPL(inet_dccp_listen); | ||
567 | |||
546 | static const unsigned char dccp_new_state[] = { | 568 | static const unsigned char dccp_new_state[] = { |
547 | /* current state: new state: action: */ | 569 | /* current state: new state: action: */ |
548 | [0] = DCCP_CLOSED, | 570 | [0] = DCCP_CLOSED, |
@@ -648,12 +670,16 @@ adjudge_to_death: | |||
648 | sock_put(sk); | 670 | sock_put(sk); |
649 | } | 671 | } |
650 | 672 | ||
673 | EXPORT_SYMBOL_GPL(dccp_close); | ||
674 | |||
651 | void dccp_shutdown(struct sock *sk, int how) | 675 | void dccp_shutdown(struct sock *sk, int how) |
652 | { | 676 | { |
653 | dccp_pr_debug("entry\n"); | 677 | dccp_pr_debug("entry\n"); |
654 | } | 678 | } |
655 | 679 | ||
656 | static struct proto_ops inet_dccp_ops = { | 680 | EXPORT_SYMBOL_GPL(dccp_shutdown); |
681 | |||
682 | static const struct proto_ops inet_dccp_ops = { | ||
657 | .family = PF_INET, | 683 | .family = PF_INET, |
658 | .owner = THIS_MODULE, | 684 | .owner = THIS_MODULE, |
659 | .release = inet_release, | 685 | .release = inet_release, |
@@ -681,11 +707,11 @@ extern struct net_proto_family inet_family_ops; | |||
681 | static struct inet_protosw dccp_v4_protosw = { | 707 | static struct inet_protosw dccp_v4_protosw = { |
682 | .type = SOCK_DCCP, | 708 | .type = SOCK_DCCP, |
683 | .protocol = IPPROTO_DCCP, | 709 | .protocol = IPPROTO_DCCP, |
684 | .prot = &dccp_v4_prot, | 710 | .prot = &dccp_prot, |
685 | .ops = &inet_dccp_ops, | 711 | .ops = &inet_dccp_ops, |
686 | .capability = -1, | 712 | .capability = -1, |
687 | .no_check = 0, | 713 | .no_check = 0, |
688 | .flags = 0, | 714 | .flags = INET_PROTOSW_ICSK, |
689 | }; | 715 | }; |
690 | 716 | ||
691 | /* | 717 | /* |
@@ -760,13 +786,15 @@ MODULE_PARM_DESC(thash_entries, "Number of ehash buckets"); | |||
760 | int dccp_debug; | 786 | int dccp_debug; |
761 | module_param(dccp_debug, int, 0444); | 787 | module_param(dccp_debug, int, 0444); |
762 | MODULE_PARM_DESC(dccp_debug, "Enable debug messages"); | 788 | MODULE_PARM_DESC(dccp_debug, "Enable debug messages"); |
789 | |||
790 | EXPORT_SYMBOL_GPL(dccp_debug); | ||
763 | #endif | 791 | #endif |
764 | 792 | ||
765 | static int __init dccp_init(void) | 793 | static int __init dccp_init(void) |
766 | { | 794 | { |
767 | unsigned long goal; | 795 | unsigned long goal; |
768 | int ehash_order, bhash_order, i; | 796 | int ehash_order, bhash_order, i; |
769 | int rc = proto_register(&dccp_v4_prot, 1); | 797 | int rc = proto_register(&dccp_prot, 1); |
770 | 798 | ||
771 | if (rc) | 799 | if (rc) |
772 | goto out; | 800 | goto out; |
@@ -869,7 +897,7 @@ out_free_bind_bucket_cachep: | |||
869 | kmem_cache_destroy(dccp_hashinfo.bind_bucket_cachep); | 897 | kmem_cache_destroy(dccp_hashinfo.bind_bucket_cachep); |
870 | dccp_hashinfo.bind_bucket_cachep = NULL; | 898 | dccp_hashinfo.bind_bucket_cachep = NULL; |
871 | out_proto_unregister: | 899 | out_proto_unregister: |
872 | proto_unregister(&dccp_v4_prot); | 900 | proto_unregister(&dccp_prot); |
873 | goto out; | 901 | goto out; |
874 | } | 902 | } |
875 | 903 | ||
@@ -892,7 +920,7 @@ static void __exit dccp_fini(void) | |||
892 | get_order(dccp_hashinfo.ehash_size * | 920 | get_order(dccp_hashinfo.ehash_size * |
893 | sizeof(struct inet_ehash_bucket))); | 921 | sizeof(struct inet_ehash_bucket))); |
894 | kmem_cache_destroy(dccp_hashinfo.bind_bucket_cachep); | 922 | kmem_cache_destroy(dccp_hashinfo.bind_bucket_cachep); |
895 | proto_unregister(&dccp_v4_prot); | 923 | proto_unregister(&dccp_prot); |
896 | } | 924 | } |
897 | 925 | ||
898 | module_init(dccp_init); | 926 | module_init(dccp_init); |
diff --git a/net/decnet/af_decnet.c b/net/decnet/af_decnet.c index d402e9020c68..78ec5344be86 100644 --- a/net/decnet/af_decnet.c +++ b/net/decnet/af_decnet.c | |||
@@ -149,7 +149,7 @@ static void dn_keepalive(struct sock *sk); | |||
149 | #define DN_SK_HASH_MASK (DN_SK_HASH_SIZE - 1) | 149 | #define DN_SK_HASH_MASK (DN_SK_HASH_SIZE - 1) |
150 | 150 | ||
151 | 151 | ||
152 | static struct proto_ops dn_proto_ops; | 152 | static const struct proto_ops dn_proto_ops; |
153 | static DEFINE_RWLOCK(dn_hash_lock); | 153 | static DEFINE_RWLOCK(dn_hash_lock); |
154 | static struct hlist_head dn_sk_hash[DN_SK_HASH_SIZE]; | 154 | static struct hlist_head dn_sk_hash[DN_SK_HASH_SIZE]; |
155 | static struct hlist_head dn_wild_sk; | 155 | static struct hlist_head dn_wild_sk; |
@@ -1252,7 +1252,7 @@ static int dn_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) | |||
1252 | break; | 1252 | break; |
1253 | 1253 | ||
1254 | default: | 1254 | default: |
1255 | err = dev_ioctl(cmd, (void __user *)arg); | 1255 | err = -ENOIOCTLCMD; |
1256 | break; | 1256 | break; |
1257 | } | 1257 | } |
1258 | 1258 | ||
@@ -2342,7 +2342,7 @@ static struct net_proto_family dn_family_ops = { | |||
2342 | .owner = THIS_MODULE, | 2342 | .owner = THIS_MODULE, |
2343 | }; | 2343 | }; |
2344 | 2344 | ||
2345 | static struct proto_ops dn_proto_ops = { | 2345 | static const struct proto_ops dn_proto_ops = { |
2346 | .family = AF_DECnet, | 2346 | .family = AF_DECnet, |
2347 | .owner = THIS_MODULE, | 2347 | .owner = THIS_MODULE, |
2348 | .release = dn_release, | 2348 | .release = dn_release, |
diff --git a/net/decnet/dn_neigh.c b/net/decnet/dn_neigh.c index 8d0cc3cf3e49..33ab256cfd4a 100644 --- a/net/decnet/dn_neigh.c +++ b/net/decnet/dn_neigh.c | |||
@@ -408,11 +408,14 @@ int dn_neigh_router_hello(struct sk_buff *skb) | |||
408 | } | 408 | } |
409 | } | 409 | } |
410 | 410 | ||
411 | if (!dn_db->router) { | 411 | /* Only use routers in our area */ |
412 | dn_db->router = neigh_clone(neigh); | 412 | if ((dn_ntohs(src)>>10) == dn_ntohs((decnet_address)>>10)) { |
413 | } else { | 413 | if (!dn_db->router) { |
414 | if (msg->priority > ((struct dn_neigh *)dn_db->router)->priority) | 414 | dn_db->router = neigh_clone(neigh); |
415 | neigh_release(xchg(&dn_db->router, neigh_clone(neigh))); | 415 | } else { |
416 | if (msg->priority > ((struct dn_neigh *)dn_db->router)->priority) | ||
417 | neigh_release(xchg(&dn_db->router, neigh_clone(neigh))); | ||
418 | } | ||
416 | } | 419 | } |
417 | write_unlock(&neigh->lock); | 420 | write_unlock(&neigh->lock); |
418 | neigh_release(neigh); | 421 | neigh_release(neigh); |
diff --git a/net/decnet/dn_nsp_in.c b/net/decnet/dn_nsp_in.c index 369f25b60f3f..44bda85e678f 100644 --- a/net/decnet/dn_nsp_in.c +++ b/net/decnet/dn_nsp_in.c | |||
@@ -793,7 +793,6 @@ static int dn_nsp_rx_packet(struct sk_buff *skb) | |||
793 | got_it: | 793 | got_it: |
794 | if (sk != NULL) { | 794 | if (sk != NULL) { |
795 | struct dn_scp *scp = DN_SK(sk); | 795 | struct dn_scp *scp = DN_SK(sk); |
796 | int ret; | ||
797 | 796 | ||
798 | /* Reset backoff */ | 797 | /* Reset backoff */ |
799 | scp->nsp_rxtshift = 0; | 798 | scp->nsp_rxtshift = 0; |
@@ -807,21 +806,7 @@ got_it: | |||
807 | goto free_out; | 806 | goto free_out; |
808 | } | 807 | } |
809 | 808 | ||
810 | bh_lock_sock(sk); | 809 | return sk_receive_skb(sk, skb); |
811 | ret = NET_RX_SUCCESS; | ||
812 | if (decnet_debug_level & 8) | ||
813 | printk(KERN_DEBUG "NSP: 0x%02x 0x%02x 0x%04x 0x%04x %d\n", | ||
814 | (int)cb->rt_flags, (int)cb->nsp_flags, | ||
815 | (int)cb->src_port, (int)cb->dst_port, | ||
816 | !!sock_owned_by_user(sk)); | ||
817 | if (!sock_owned_by_user(sk)) | ||
818 | ret = dn_nsp_backlog_rcv(sk, skb); | ||
819 | else | ||
820 | sk_add_backlog(sk, skb); | ||
821 | bh_unlock_sock(sk); | ||
822 | sock_put(sk); | ||
823 | |||
824 | return ret; | ||
825 | } | 810 | } |
826 | 811 | ||
827 | return dn_nsp_no_socket(skb, reason); | 812 | return dn_nsp_no_socket(skb, reason); |
diff --git a/net/econet/af_econet.c b/net/econet/af_econet.c index 34fdac51df96..c792994d7952 100644 --- a/net/econet/af_econet.c +++ b/net/econet/af_econet.c | |||
@@ -31,6 +31,7 @@ | |||
31 | #include <linux/if_arp.h> | 31 | #include <linux/if_arp.h> |
32 | #include <linux/wireless.h> | 32 | #include <linux/wireless.h> |
33 | #include <linux/skbuff.h> | 33 | #include <linux/skbuff.h> |
34 | #include <linux/udp.h> | ||
34 | #include <net/sock.h> | 35 | #include <net/sock.h> |
35 | #include <net/inet_common.h> | 36 | #include <net/inet_common.h> |
36 | #include <linux/stat.h> | 37 | #include <linux/stat.h> |
@@ -45,7 +46,7 @@ | |||
45 | #include <asm/uaccess.h> | 46 | #include <asm/uaccess.h> |
46 | #include <asm/system.h> | 47 | #include <asm/system.h> |
47 | 48 | ||
48 | static struct proto_ops econet_ops; | 49 | static const struct proto_ops econet_ops; |
49 | static struct hlist_head econet_sklist; | 50 | static struct hlist_head econet_sklist; |
50 | static DEFINE_RWLOCK(econet_lock); | 51 | static DEFINE_RWLOCK(econet_lock); |
51 | 52 | ||
@@ -56,7 +57,7 @@ static struct net_device *net2dev_map[256]; | |||
56 | #define EC_PORT_IP 0xd2 | 57 | #define EC_PORT_IP 0xd2 |
57 | 58 | ||
58 | #ifdef CONFIG_ECONET_AUNUDP | 59 | #ifdef CONFIG_ECONET_AUNUDP |
59 | static spinlock_t aun_queue_lock; | 60 | static DEFINE_SPINLOCK(aun_queue_lock); |
60 | static struct socket *udpsock; | 61 | static struct socket *udpsock; |
61 | #define AUN_PORT 0x8000 | 62 | #define AUN_PORT 0x8000 |
62 | 63 | ||
@@ -686,7 +687,7 @@ static int econet_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg | |||
686 | break; | 687 | break; |
687 | 688 | ||
688 | default: | 689 | default: |
689 | return dev_ioctl(cmd, argp); | 690 | return -ENOIOCTLCMD; |
690 | } | 691 | } |
691 | /*NOTREACHED*/ | 692 | /*NOTREACHED*/ |
692 | return 0; | 693 | return 0; |
@@ -698,7 +699,7 @@ static struct net_proto_family econet_family_ops = { | |||
698 | .owner = THIS_MODULE, | 699 | .owner = THIS_MODULE, |
699 | }; | 700 | }; |
700 | 701 | ||
701 | static struct proto_ops SOCKOPS_WRAPPED(econet_ops) = { | 702 | static const struct proto_ops SOCKOPS_WRAPPED(econet_ops) = { |
702 | .family = PF_ECONET, | 703 | .family = PF_ECONET, |
703 | .owner = THIS_MODULE, | 704 | .owner = THIS_MODULE, |
704 | .release = econet_release, | 705 | .release = econet_release, |
diff --git a/net/ieee80211/ieee80211_rx.c b/net/ieee80211/ieee80211_rx.c index 03efaacbdb73..4cc6f41c6930 100644 --- a/net/ieee80211/ieee80211_rx.c +++ b/net/ieee80211/ieee80211_rx.c | |||
@@ -410,9 +410,8 @@ int ieee80211_rx(struct ieee80211_device *ieee, struct sk_buff *skb, | |||
410 | return 1; | 410 | return 1; |
411 | } | 411 | } |
412 | 412 | ||
413 | if ((is_multicast_ether_addr(hdr->addr1) || | 413 | if (is_multicast_ether_addr(hdr->addr1) |
414 | is_broadcast_ether_addr(hdr->addr2)) ? ieee->host_mc_decrypt : | 414 | ? ieee->host_mc_decrypt : ieee->host_decrypt) { |
415 | ieee->host_decrypt) { | ||
416 | int idx = 0; | 415 | int idx = 0; |
417 | if (skb->len >= hdrlen + 3) | 416 | if (skb->len >= hdrlen + 3) |
418 | idx = skb->data[hdrlen + 3] >> 6; | 417 | idx = skb->data[hdrlen + 3] >> 6; |
diff --git a/net/ipv4/Kconfig b/net/ipv4/Kconfig index e55136ae09f4..011cca7ae02b 100644 --- a/net/ipv4/Kconfig +++ b/net/ipv4/Kconfig | |||
@@ -456,6 +456,14 @@ config TCP_CONG_BIC | |||
456 | increase provides TCP friendliness. | 456 | increase provides TCP friendliness. |
457 | See http://www.csc.ncsu.edu/faculty/rhee/export/bitcp/ | 457 | See http://www.csc.ncsu.edu/faculty/rhee/export/bitcp/ |
458 | 458 | ||
459 | config TCP_CONG_CUBIC | ||
460 | tristate "CUBIC TCP" | ||
461 | default m | ||
462 | ---help--- | ||
463 | This is version 2.0 of BIC-TCP which uses a cubic growth function | ||
464 | among other techniques. | ||
465 | See http://www.csc.ncsu.edu/faculty/rhee/export/bitcp/cubic-paper.pdf | ||
466 | |||
459 | config TCP_CONG_WESTWOOD | 467 | config TCP_CONG_WESTWOOD |
460 | tristate "TCP Westwood+" | 468 | tristate "TCP Westwood+" |
461 | default m | 469 | default m |
diff --git a/net/ipv4/Makefile b/net/ipv4/Makefile index f0435d00db6b..c54edd76de09 100644 --- a/net/ipv4/Makefile +++ b/net/ipv4/Makefile | |||
@@ -34,6 +34,7 @@ obj-$(CONFIG_INET_DIAG) += inet_diag.o | |||
34 | obj-$(CONFIG_IP_ROUTE_MULTIPATH_CACHED) += multipath.o | 34 | obj-$(CONFIG_IP_ROUTE_MULTIPATH_CACHED) += multipath.o |
35 | obj-$(CONFIG_INET_TCP_DIAG) += tcp_diag.o | 35 | obj-$(CONFIG_INET_TCP_DIAG) += tcp_diag.o |
36 | obj-$(CONFIG_TCP_CONG_BIC) += tcp_bic.o | 36 | obj-$(CONFIG_TCP_CONG_BIC) += tcp_bic.o |
37 | obj-$(CONFIG_TCP_CONG_CUBIC) += tcp_cubic.o | ||
37 | obj-$(CONFIG_TCP_CONG_WESTWOOD) += tcp_westwood.o | 38 | obj-$(CONFIG_TCP_CONG_WESTWOOD) += tcp_westwood.o |
38 | obj-$(CONFIG_TCP_CONG_HSTCP) += tcp_highspeed.o | 39 | obj-$(CONFIG_TCP_CONG_HSTCP) += tcp_highspeed.o |
39 | obj-$(CONFIG_TCP_CONG_HYBLA) += tcp_hybla.o | 40 | obj-$(CONFIG_TCP_CONG_HYBLA) += tcp_hybla.o |
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index d368cf249000..966a071a408c 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c | |||
@@ -93,6 +93,7 @@ | |||
93 | #include <linux/smp_lock.h> | 93 | #include <linux/smp_lock.h> |
94 | #include <linux/inet.h> | 94 | #include <linux/inet.h> |
95 | #include <linux/igmp.h> | 95 | #include <linux/igmp.h> |
96 | #include <linux/inetdevice.h> | ||
96 | #include <linux/netdevice.h> | 97 | #include <linux/netdevice.h> |
97 | #include <net/ip.h> | 98 | #include <net/ip.h> |
98 | #include <net/protocol.h> | 99 | #include <net/protocol.h> |
@@ -302,6 +303,7 @@ lookup_protocol: | |||
302 | sk->sk_reuse = 1; | 303 | sk->sk_reuse = 1; |
303 | 304 | ||
304 | inet = inet_sk(sk); | 305 | inet = inet_sk(sk); |
306 | inet->is_icsk = INET_PROTOSW_ICSK & answer_flags; | ||
305 | 307 | ||
306 | if (SOCK_RAW == sock->type) { | 308 | if (SOCK_RAW == sock->type) { |
307 | inet->num = protocol; | 309 | inet->num = protocol; |
@@ -775,16 +777,16 @@ int inet_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) | |||
775 | err = devinet_ioctl(cmd, (void __user *)arg); | 777 | err = devinet_ioctl(cmd, (void __user *)arg); |
776 | break; | 778 | break; |
777 | default: | 779 | default: |
778 | if (!sk->sk_prot->ioctl || | 780 | if (sk->sk_prot->ioctl) |
779 | (err = sk->sk_prot->ioctl(sk, cmd, arg)) == | 781 | err = sk->sk_prot->ioctl(sk, cmd, arg); |
780 | -ENOIOCTLCMD) | 782 | else |
781 | err = dev_ioctl(cmd, (void __user *)arg); | 783 | err = -ENOIOCTLCMD; |
782 | break; | 784 | break; |
783 | } | 785 | } |
784 | return err; | 786 | return err; |
785 | } | 787 | } |
786 | 788 | ||
787 | struct proto_ops inet_stream_ops = { | 789 | const struct proto_ops inet_stream_ops = { |
788 | .family = PF_INET, | 790 | .family = PF_INET, |
789 | .owner = THIS_MODULE, | 791 | .owner = THIS_MODULE, |
790 | .release = inet_release, | 792 | .release = inet_release, |
@@ -805,7 +807,7 @@ struct proto_ops inet_stream_ops = { | |||
805 | .sendpage = tcp_sendpage | 807 | .sendpage = tcp_sendpage |
806 | }; | 808 | }; |
807 | 809 | ||
808 | struct proto_ops inet_dgram_ops = { | 810 | const struct proto_ops inet_dgram_ops = { |
809 | .family = PF_INET, | 811 | .family = PF_INET, |
810 | .owner = THIS_MODULE, | 812 | .owner = THIS_MODULE, |
811 | .release = inet_release, | 813 | .release = inet_release, |
@@ -830,7 +832,7 @@ struct proto_ops inet_dgram_ops = { | |||
830 | * For SOCK_RAW sockets; should be the same as inet_dgram_ops but without | 832 | * For SOCK_RAW sockets; should be the same as inet_dgram_ops but without |
831 | * udp_poll | 833 | * udp_poll |
832 | */ | 834 | */ |
833 | static struct proto_ops inet_sockraw_ops = { | 835 | static const struct proto_ops inet_sockraw_ops = { |
834 | .family = PF_INET, | 836 | .family = PF_INET, |
835 | .owner = THIS_MODULE, | 837 | .owner = THIS_MODULE, |
836 | .release = inet_release, | 838 | .release = inet_release, |
@@ -869,7 +871,8 @@ static struct inet_protosw inetsw_array[] = | |||
869 | .ops = &inet_stream_ops, | 871 | .ops = &inet_stream_ops, |
870 | .capability = -1, | 872 | .capability = -1, |
871 | .no_check = 0, | 873 | .no_check = 0, |
872 | .flags = INET_PROTOSW_PERMANENT, | 874 | .flags = INET_PROTOSW_PERMANENT | |
875 | INET_PROTOSW_ICSK, | ||
873 | }, | 876 | }, |
874 | 877 | ||
875 | { | 878 | { |
diff --git a/net/ipv4/ah4.c b/net/ipv4/ah4.c index 035ad2c9e1ba..aed537fa2c88 100644 --- a/net/ipv4/ah4.c +++ b/net/ipv4/ah4.c | |||
@@ -6,6 +6,7 @@ | |||
6 | #include <linux/crypto.h> | 6 | #include <linux/crypto.h> |
7 | #include <linux/pfkeyv2.h> | 7 | #include <linux/pfkeyv2.h> |
8 | #include <net/icmp.h> | 8 | #include <net/icmp.h> |
9 | #include <net/protocol.h> | ||
9 | #include <asm/scatterlist.h> | 10 | #include <asm/scatterlist.h> |
10 | 11 | ||
11 | 12 | ||
diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c index b425748f02d7..37432088fe6d 100644 --- a/net/ipv4/arp.c +++ b/net/ipv4/arp.c | |||
@@ -86,6 +86,7 @@ | |||
86 | #include <linux/in.h> | 86 | #include <linux/in.h> |
87 | #include <linux/mm.h> | 87 | #include <linux/mm.h> |
88 | #include <linux/inet.h> | 88 | #include <linux/inet.h> |
89 | #include <linux/inetdevice.h> | ||
89 | #include <linux/netdevice.h> | 90 | #include <linux/netdevice.h> |
90 | #include <linux/etherdevice.h> | 91 | #include <linux/etherdevice.h> |
91 | #include <linux/fddidevice.h> | 92 | #include <linux/fddidevice.h> |
diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c index 04a6fe3e95a2..7b9bb28e2ee9 100644 --- a/net/ipv4/devinet.c +++ b/net/ipv4/devinet.c | |||
@@ -58,6 +58,7 @@ | |||
58 | #endif | 58 | #endif |
59 | #include <linux/kmod.h> | 59 | #include <linux/kmod.h> |
60 | 60 | ||
61 | #include <net/arp.h> | ||
61 | #include <net/ip.h> | 62 | #include <net/ip.h> |
62 | #include <net/route.h> | 63 | #include <net/route.h> |
63 | #include <net/ip_fib.h> | 64 | #include <net/ip_fib.h> |
diff --git a/net/ipv4/esp4.c b/net/ipv4/esp4.c index 1b18ce66e7b7..73bfcae8af9c 100644 --- a/net/ipv4/esp4.c +++ b/net/ipv4/esp4.c | |||
@@ -9,6 +9,7 @@ | |||
9 | #include <linux/pfkeyv2.h> | 9 | #include <linux/pfkeyv2.h> |
10 | #include <linux/random.h> | 10 | #include <linux/random.h> |
11 | #include <net/icmp.h> | 11 | #include <net/icmp.h> |
12 | #include <net/protocol.h> | ||
12 | #include <net/udp.h> | 13 | #include <net/udp.h> |
13 | 14 | ||
14 | /* decapsulation data for use when post-processing */ | 15 | /* decapsulation data for use when post-processing */ |
diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c index 19b1b984d687..18f5e509281a 100644 --- a/net/ipv4/fib_frontend.c +++ b/net/ipv4/fib_frontend.c | |||
@@ -30,6 +30,7 @@ | |||
30 | #include <linux/errno.h> | 30 | #include <linux/errno.h> |
31 | #include <linux/in.h> | 31 | #include <linux/in.h> |
32 | #include <linux/inet.h> | 32 | #include <linux/inet.h> |
33 | #include <linux/inetdevice.h> | ||
33 | #include <linux/netdevice.h> | 34 | #include <linux/netdevice.h> |
34 | #include <linux/if_arp.h> | 35 | #include <linux/if_arp.h> |
35 | #include <linux/skbuff.h> | 36 | #include <linux/skbuff.h> |
diff --git a/net/ipv4/fib_hash.c b/net/ipv4/fib_hash.c index 7ea0209cb169..e2890ec8159e 100644 --- a/net/ipv4/fib_hash.c +++ b/net/ipv4/fib_hash.c | |||
@@ -29,6 +29,7 @@ | |||
29 | #include <linux/errno.h> | 29 | #include <linux/errno.h> |
30 | #include <linux/in.h> | 30 | #include <linux/in.h> |
31 | #include <linux/inet.h> | 31 | #include <linux/inet.h> |
32 | #include <linux/inetdevice.h> | ||
32 | #include <linux/netdevice.h> | 33 | #include <linux/netdevice.h> |
33 | #include <linux/if_arp.h> | 34 | #include <linux/if_arp.h> |
34 | #include <linux/proc_fs.h> | 35 | #include <linux/proc_fs.h> |
diff --git a/net/ipv4/fib_rules.c b/net/ipv4/fib_rules.c index 0b298bbc1518..0dd4d06e456d 100644 --- a/net/ipv4/fib_rules.c +++ b/net/ipv4/fib_rules.c | |||
@@ -33,6 +33,7 @@ | |||
33 | #include <linux/errno.h> | 33 | #include <linux/errno.h> |
34 | #include <linux/in.h> | 34 | #include <linux/in.h> |
35 | #include <linux/inet.h> | 35 | #include <linux/inet.h> |
36 | #include <linux/inetdevice.h> | ||
36 | #include <linux/netdevice.h> | 37 | #include <linux/netdevice.h> |
37 | #include <linux/if_arp.h> | 38 | #include <linux/if_arp.h> |
38 | #include <linux/proc_fs.h> | 39 | #include <linux/proc_fs.h> |
diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c index 6d2a6ac070e3..ef4724de7350 100644 --- a/net/ipv4/fib_semantics.c +++ b/net/ipv4/fib_semantics.c | |||
@@ -29,6 +29,7 @@ | |||
29 | #include <linux/errno.h> | 29 | #include <linux/errno.h> |
30 | #include <linux/in.h> | 30 | #include <linux/in.h> |
31 | #include <linux/inet.h> | 31 | #include <linux/inet.h> |
32 | #include <linux/inetdevice.h> | ||
32 | #include <linux/netdevice.h> | 33 | #include <linux/netdevice.h> |
33 | #include <linux/if_arp.h> | 34 | #include <linux/if_arp.h> |
34 | #include <linux/proc_fs.h> | 35 | #include <linux/proc_fs.h> |
@@ -36,6 +37,7 @@ | |||
36 | #include <linux/netlink.h> | 37 | #include <linux/netlink.h> |
37 | #include <linux/init.h> | 38 | #include <linux/init.h> |
38 | 39 | ||
40 | #include <net/arp.h> | ||
39 | #include <net/ip.h> | 41 | #include <net/ip.h> |
40 | #include <net/protocol.h> | 42 | #include <net/protocol.h> |
41 | #include <net/route.h> | 43 | #include <net/route.h> |
diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c index 705e3ce86df9..e320b32373e5 100644 --- a/net/ipv4/fib_trie.c +++ b/net/ipv4/fib_trie.c | |||
@@ -41,6 +41,13 @@ | |||
41 | * modify it under the terms of the GNU General Public License | 41 | * modify it under the terms of the GNU General Public License |
42 | * as published by the Free Software Foundation; either version | 42 | * as published by the Free Software Foundation; either version |
43 | * 2 of the License, or (at your option) any later version. | 43 | * 2 of the License, or (at your option) any later version. |
44 | * | ||
45 | * Substantial contributions to this work comes from: | ||
46 | * | ||
47 | * David S. Miller, <davem@davemloft.net> | ||
48 | * Stephen Hemminger <shemminger@osdl.org> | ||
49 | * Paul E. McKenney <paulmck@us.ibm.com> | ||
50 | * Patrick McHardy <kaber@trash.net> | ||
44 | */ | 51 | */ |
45 | 52 | ||
46 | #define VERSION "0.404" | 53 | #define VERSION "0.404" |
@@ -59,6 +66,7 @@ | |||
59 | #include <linux/errno.h> | 66 | #include <linux/errno.h> |
60 | #include <linux/in.h> | 67 | #include <linux/in.h> |
61 | #include <linux/inet.h> | 68 | #include <linux/inet.h> |
69 | #include <linux/inetdevice.h> | ||
62 | #include <linux/netdevice.h> | 70 | #include <linux/netdevice.h> |
63 | #include <linux/if_arp.h> | 71 | #include <linux/if_arp.h> |
64 | #include <linux/proc_fs.h> | 72 | #include <linux/proc_fs.h> |
diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c index 92e23b2ad4d2..be5a519cd2f8 100644 --- a/net/ipv4/icmp.c +++ b/net/ipv4/icmp.c | |||
@@ -73,6 +73,7 @@ | |||
73 | #include <linux/socket.h> | 73 | #include <linux/socket.h> |
74 | #include <linux/in.h> | 74 | #include <linux/in.h> |
75 | #include <linux/inet.h> | 75 | #include <linux/inet.h> |
76 | #include <linux/inetdevice.h> | ||
76 | #include <linux/netdevice.h> | 77 | #include <linux/netdevice.h> |
77 | #include <linux/string.h> | 78 | #include <linux/string.h> |
78 | #include <linux/netfilter_ipv4.h> | 79 | #include <linux/netfilter_ipv4.h> |
diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c index 4a195c724f01..34758118c10c 100644 --- a/net/ipv4/igmp.c +++ b/net/ipv4/igmp.c | |||
@@ -91,6 +91,8 @@ | |||
91 | #include <linux/if_arp.h> | 91 | #include <linux/if_arp.h> |
92 | #include <linux/rtnetlink.h> | 92 | #include <linux/rtnetlink.h> |
93 | #include <linux/times.h> | 93 | #include <linux/times.h> |
94 | |||
95 | #include <net/arp.h> | ||
94 | #include <net/ip.h> | 96 | #include <net/ip.h> |
95 | #include <net/protocol.h> | 97 | #include <net/protocol.h> |
96 | #include <net/route.h> | 98 | #include <net/route.h> |
diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c index 3fe021f1a566..ae20281d8deb 100644 --- a/net/ipv4/inet_connection_sock.c +++ b/net/ipv4/inet_connection_sock.c | |||
@@ -37,7 +37,8 @@ EXPORT_SYMBOL(inet_csk_timer_bug_msg); | |||
37 | */ | 37 | */ |
38 | int sysctl_local_port_range[2] = { 1024, 4999 }; | 38 | int sysctl_local_port_range[2] = { 1024, 4999 }; |
39 | 39 | ||
40 | static inline int inet_csk_bind_conflict(struct sock *sk, struct inet_bind_bucket *tb) | 40 | int inet_csk_bind_conflict(const struct sock *sk, |
41 | const struct inet_bind_bucket *tb) | ||
41 | { | 42 | { |
42 | const u32 sk_rcv_saddr = inet_rcv_saddr(sk); | 43 | const u32 sk_rcv_saddr = inet_rcv_saddr(sk); |
43 | struct sock *sk2; | 44 | struct sock *sk2; |
@@ -62,11 +63,15 @@ static inline int inet_csk_bind_conflict(struct sock *sk, struct inet_bind_bucke | |||
62 | return node != NULL; | 63 | return node != NULL; |
63 | } | 64 | } |
64 | 65 | ||
66 | EXPORT_SYMBOL_GPL(inet_csk_bind_conflict); | ||
67 | |||
65 | /* Obtain a reference to a local port for the given sock, | 68 | /* Obtain a reference to a local port for the given sock, |
66 | * if snum is zero it means select any available local port. | 69 | * if snum is zero it means select any available local port. |
67 | */ | 70 | */ |
68 | int inet_csk_get_port(struct inet_hashinfo *hashinfo, | 71 | int inet_csk_get_port(struct inet_hashinfo *hashinfo, |
69 | struct sock *sk, unsigned short snum) | 72 | struct sock *sk, unsigned short snum, |
73 | int (*bind_conflict)(const struct sock *sk, | ||
74 | const struct inet_bind_bucket *tb)) | ||
70 | { | 75 | { |
71 | struct inet_bind_hashbucket *head; | 76 | struct inet_bind_hashbucket *head; |
72 | struct hlist_node *node; | 77 | struct hlist_node *node; |
@@ -125,7 +130,7 @@ tb_found: | |||
125 | goto success; | 130 | goto success; |
126 | } else { | 131 | } else { |
127 | ret = 1; | 132 | ret = 1; |
128 | if (inet_csk_bind_conflict(sk, tb)) | 133 | if (bind_conflict(sk, tb)) |
129 | goto fail_unlock; | 134 | goto fail_unlock; |
130 | } | 135 | } |
131 | } | 136 | } |
@@ -380,7 +385,7 @@ struct request_sock *inet_csk_search_req(const struct sock *sk, | |||
380 | EXPORT_SYMBOL_GPL(inet_csk_search_req); | 385 | EXPORT_SYMBOL_GPL(inet_csk_search_req); |
381 | 386 | ||
382 | void inet_csk_reqsk_queue_hash_add(struct sock *sk, struct request_sock *req, | 387 | void inet_csk_reqsk_queue_hash_add(struct sock *sk, struct request_sock *req, |
383 | const unsigned timeout) | 388 | unsigned long timeout) |
384 | { | 389 | { |
385 | struct inet_connection_sock *icsk = inet_csk(sk); | 390 | struct inet_connection_sock *icsk = inet_csk(sk); |
386 | struct listen_sock *lopt = icsk->icsk_accept_queue.listen_opt; | 391 | struct listen_sock *lopt = icsk->icsk_accept_queue.listen_opt; |
@@ -631,3 +636,15 @@ void inet_csk_listen_stop(struct sock *sk) | |||
631 | } | 636 | } |
632 | 637 | ||
633 | EXPORT_SYMBOL_GPL(inet_csk_listen_stop); | 638 | EXPORT_SYMBOL_GPL(inet_csk_listen_stop); |
639 | |||
640 | void inet_csk_addr2sockaddr(struct sock *sk, struct sockaddr *uaddr) | ||
641 | { | ||
642 | struct sockaddr_in *sin = (struct sockaddr_in *)uaddr; | ||
643 | const struct inet_sock *inet = inet_sk(sk); | ||
644 | |||
645 | sin->sin_family = AF_INET; | ||
646 | sin->sin_addr.s_addr = inet->daddr; | ||
647 | sin->sin_port = inet->dport; | ||
648 | } | ||
649 | |||
650 | EXPORT_SYMBOL_GPL(inet_csk_addr2sockaddr); | ||
diff --git a/net/ipv4/inet_diag.c b/net/ipv4/inet_diag.c index 39061ed53cfd..c49908192047 100644 --- a/net/ipv4/inet_diag.c +++ b/net/ipv4/inet_diag.c | |||
@@ -112,12 +112,12 @@ static int inet_diag_fill(struct sk_buff *skb, struct sock *sk, | |||
112 | r->idiag_inode = 0; | 112 | r->idiag_inode = 0; |
113 | #if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE) | 113 | #if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE) |
114 | if (r->idiag_family == AF_INET6) { | 114 | if (r->idiag_family == AF_INET6) { |
115 | const struct tcp6_timewait_sock *tcp6tw = tcp6_twsk(sk); | 115 | const struct inet6_timewait_sock *tw6 = inet6_twsk(sk); |
116 | 116 | ||
117 | ipv6_addr_copy((struct in6_addr *)r->id.idiag_src, | 117 | ipv6_addr_copy((struct in6_addr *)r->id.idiag_src, |
118 | &tcp6tw->tw_v6_rcv_saddr); | 118 | &tw6->tw_v6_rcv_saddr); |
119 | ipv6_addr_copy((struct in6_addr *)r->id.idiag_dst, | 119 | ipv6_addr_copy((struct in6_addr *)r->id.idiag_dst, |
120 | &tcp6tw->tw_v6_daddr); | 120 | &tw6->tw_v6_daddr); |
121 | } | 121 | } |
122 | #endif | 122 | #endif |
123 | nlh->nlmsg_len = skb->tail - b; | 123 | nlh->nlmsg_len = skb->tail - b; |
@@ -489,9 +489,9 @@ static int inet_diag_fill_req(struct sk_buff *skb, struct sock *sk, | |||
489 | #if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE) | 489 | #if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE) |
490 | if (r->idiag_family == AF_INET6) { | 490 | if (r->idiag_family == AF_INET6) { |
491 | ipv6_addr_copy((struct in6_addr *)r->id.idiag_src, | 491 | ipv6_addr_copy((struct in6_addr *)r->id.idiag_src, |
492 | &tcp6_rsk(req)->loc_addr); | 492 | &inet6_rsk(req)->loc_addr); |
493 | ipv6_addr_copy((struct in6_addr *)r->id.idiag_dst, | 493 | ipv6_addr_copy((struct in6_addr *)r->id.idiag_dst, |
494 | &tcp6_rsk(req)->rmt_addr); | 494 | &inet6_rsk(req)->rmt_addr); |
495 | } | 495 | } |
496 | #endif | 496 | #endif |
497 | nlh->nlmsg_len = skb->tail - b; | 497 | nlh->nlmsg_len = skb->tail - b; |
@@ -553,13 +553,13 @@ static int inet_diag_dump_reqs(struct sk_buff *skb, struct sock *sk, | |||
553 | entry.saddr = | 553 | entry.saddr = |
554 | #if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE) | 554 | #if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE) |
555 | (entry.family == AF_INET6) ? | 555 | (entry.family == AF_INET6) ? |
556 | tcp6_rsk(req)->loc_addr.s6_addr32 : | 556 | inet6_rsk(req)->loc_addr.s6_addr32 : |
557 | #endif | 557 | #endif |
558 | &ireq->loc_addr; | 558 | &ireq->loc_addr; |
559 | entry.daddr = | 559 | entry.daddr = |
560 | #if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE) | 560 | #if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE) |
561 | (entry.family == AF_INET6) ? | 561 | (entry.family == AF_INET6) ? |
562 | tcp6_rsk(req)->rmt_addr.s6_addr32 : | 562 | inet6_rsk(req)->rmt_addr.s6_addr32 : |
563 | #endif | 563 | #endif |
564 | &ireq->rmt_addr; | 564 | &ireq->rmt_addr; |
565 | entry.dport = ntohs(ireq->rmt_port); | 565 | entry.dport = ntohs(ireq->rmt_port); |
diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c index e8d29fe736d2..33228115cda4 100644 --- a/net/ipv4/inet_hashtables.c +++ b/net/ipv4/inet_hashtables.c | |||
@@ -15,12 +15,14 @@ | |||
15 | 15 | ||
16 | #include <linux/config.h> | 16 | #include <linux/config.h> |
17 | #include <linux/module.h> | 17 | #include <linux/module.h> |
18 | #include <linux/random.h> | ||
18 | #include <linux/sched.h> | 19 | #include <linux/sched.h> |
19 | #include <linux/slab.h> | 20 | #include <linux/slab.h> |
20 | #include <linux/wait.h> | 21 | #include <linux/wait.h> |
21 | 22 | ||
22 | #include <net/inet_connection_sock.h> | 23 | #include <net/inet_connection_sock.h> |
23 | #include <net/inet_hashtables.h> | 24 | #include <net/inet_hashtables.h> |
25 | #include <net/ip.h> | ||
24 | 26 | ||
25 | /* | 27 | /* |
26 | * Allocate and initialize a new local port bind bucket. | 28 | * Allocate and initialize a new local port bind bucket. |
@@ -163,3 +165,179 @@ struct sock *__inet_lookup_listener(const struct hlist_head *head, const u32 dad | |||
163 | } | 165 | } |
164 | 166 | ||
165 | EXPORT_SYMBOL_GPL(__inet_lookup_listener); | 167 | EXPORT_SYMBOL_GPL(__inet_lookup_listener); |
168 | |||
169 | /* called with local bh disabled */ | ||
170 | static int __inet_check_established(struct inet_timewait_death_row *death_row, | ||
171 | struct sock *sk, __u16 lport, | ||
172 | struct inet_timewait_sock **twp) | ||
173 | { | ||
174 | struct inet_hashinfo *hinfo = death_row->hashinfo; | ||
175 | struct inet_sock *inet = inet_sk(sk); | ||
176 | u32 daddr = inet->rcv_saddr; | ||
177 | u32 saddr = inet->daddr; | ||
178 | int dif = sk->sk_bound_dev_if; | ||
179 | INET_ADDR_COOKIE(acookie, saddr, daddr) | ||
180 | const __u32 ports = INET_COMBINED_PORTS(inet->dport, lport); | ||
181 | unsigned int hash = inet_ehashfn(daddr, lport, saddr, inet->dport); | ||
182 | struct inet_ehash_bucket *head = inet_ehash_bucket(hinfo, hash); | ||
183 | struct sock *sk2; | ||
184 | const struct hlist_node *node; | ||
185 | struct inet_timewait_sock *tw; | ||
186 | |||
187 | prefetch(head->chain.first); | ||
188 | write_lock(&head->lock); | ||
189 | |||
190 | /* Check TIME-WAIT sockets first. */ | ||
191 | sk_for_each(sk2, node, &(head + hinfo->ehash_size)->chain) { | ||
192 | tw = inet_twsk(sk2); | ||
193 | |||
194 | if (INET_TW_MATCH(sk2, hash, acookie, saddr, daddr, ports, dif)) { | ||
195 | if (twsk_unique(sk, sk2, twp)) | ||
196 | goto unique; | ||
197 | else | ||
198 | goto not_unique; | ||
199 | } | ||
200 | } | ||
201 | tw = NULL; | ||
202 | |||
203 | /* And established part... */ | ||
204 | sk_for_each(sk2, node, &head->chain) { | ||
205 | if (INET_MATCH(sk2, hash, acookie, saddr, daddr, ports, dif)) | ||
206 | goto not_unique; | ||
207 | } | ||
208 | |||
209 | unique: | ||
210 | /* Must record num and sport now. Otherwise we will see | ||
211 | * in hash table socket with a funny identity. */ | ||
212 | inet->num = lport; | ||
213 | inet->sport = htons(lport); | ||
214 | sk->sk_hash = hash; | ||
215 | BUG_TRAP(sk_unhashed(sk)); | ||
216 | __sk_add_node(sk, &head->chain); | ||
217 | sock_prot_inc_use(sk->sk_prot); | ||
218 | write_unlock(&head->lock); | ||
219 | |||
220 | if (twp) { | ||
221 | *twp = tw; | ||
222 | NET_INC_STATS_BH(LINUX_MIB_TIMEWAITRECYCLED); | ||
223 | } else if (tw) { | ||
224 | /* Silly. Should hash-dance instead... */ | ||
225 | inet_twsk_deschedule(tw, death_row); | ||
226 | NET_INC_STATS_BH(LINUX_MIB_TIMEWAITRECYCLED); | ||
227 | |||
228 | inet_twsk_put(tw); | ||
229 | } | ||
230 | |||
231 | return 0; | ||
232 | |||
233 | not_unique: | ||
234 | write_unlock(&head->lock); | ||
235 | return -EADDRNOTAVAIL; | ||
236 | } | ||
237 | |||
238 | static inline u32 inet_sk_port_offset(const struct sock *sk) | ||
239 | { | ||
240 | const struct inet_sock *inet = inet_sk(sk); | ||
241 | return secure_ipv4_port_ephemeral(inet->rcv_saddr, inet->daddr, | ||
242 | inet->dport); | ||
243 | } | ||
244 | |||
245 | /* | ||
246 | * Bind a port for a connect operation and hash it. | ||
247 | */ | ||
248 | int inet_hash_connect(struct inet_timewait_death_row *death_row, | ||
249 | struct sock *sk) | ||
250 | { | ||
251 | struct inet_hashinfo *hinfo = death_row->hashinfo; | ||
252 | const unsigned short snum = inet_sk(sk)->num; | ||
253 | struct inet_bind_hashbucket *head; | ||
254 | struct inet_bind_bucket *tb; | ||
255 | int ret; | ||
256 | |||
257 | if (!snum) { | ||
258 | int low = sysctl_local_port_range[0]; | ||
259 | int high = sysctl_local_port_range[1]; | ||
260 | int range = high - low; | ||
261 | int i; | ||
262 | int port; | ||
263 | static u32 hint; | ||
264 | u32 offset = hint + inet_sk_port_offset(sk); | ||
265 | struct hlist_node *node; | ||
266 | struct inet_timewait_sock *tw = NULL; | ||
267 | |||
268 | local_bh_disable(); | ||
269 | for (i = 1; i <= range; i++) { | ||
270 | port = low + (i + offset) % range; | ||
271 | head = &hinfo->bhash[inet_bhashfn(port, hinfo->bhash_size)]; | ||
272 | spin_lock(&head->lock); | ||
273 | |||
274 | /* Does not bother with rcv_saddr checks, | ||
275 | * because the established check is already | ||
276 | * unique enough. | ||
277 | */ | ||
278 | inet_bind_bucket_for_each(tb, node, &head->chain) { | ||
279 | if (tb->port == port) { | ||
280 | BUG_TRAP(!hlist_empty(&tb->owners)); | ||
281 | if (tb->fastreuse >= 0) | ||
282 | goto next_port; | ||
283 | if (!__inet_check_established(death_row, | ||
284 | sk, port, | ||
285 | &tw)) | ||
286 | goto ok; | ||
287 | goto next_port; | ||
288 | } | ||
289 | } | ||
290 | |||
291 | tb = inet_bind_bucket_create(hinfo->bind_bucket_cachep, head, port); | ||
292 | if (!tb) { | ||
293 | spin_unlock(&head->lock); | ||
294 | break; | ||
295 | } | ||
296 | tb->fastreuse = -1; | ||
297 | goto ok; | ||
298 | |||
299 | next_port: | ||
300 | spin_unlock(&head->lock); | ||
301 | } | ||
302 | local_bh_enable(); | ||
303 | |||
304 | return -EADDRNOTAVAIL; | ||
305 | |||
306 | ok: | ||
307 | hint += i; | ||
308 | |||
309 | /* Head lock still held and bh's disabled */ | ||
310 | inet_bind_hash(sk, tb, port); | ||
311 | if (sk_unhashed(sk)) { | ||
312 | inet_sk(sk)->sport = htons(port); | ||
313 | __inet_hash(hinfo, sk, 0); | ||
314 | } | ||
315 | spin_unlock(&head->lock); | ||
316 | |||
317 | if (tw) { | ||
318 | inet_twsk_deschedule(tw, death_row);; | ||
319 | inet_twsk_put(tw); | ||
320 | } | ||
321 | |||
322 | ret = 0; | ||
323 | goto out; | ||
324 | } | ||
325 | |||
326 | head = &hinfo->bhash[inet_bhashfn(snum, hinfo->bhash_size)]; | ||
327 | tb = inet_csk(sk)->icsk_bind_hash; | ||
328 | spin_lock_bh(&head->lock); | ||
329 | if (sk_head(&tb->owners) == sk && !sk->sk_bind_node.next) { | ||
330 | __inet_hash(hinfo, sk, 0); | ||
331 | spin_unlock_bh(&head->lock); | ||
332 | return 0; | ||
333 | } else { | ||
334 | spin_unlock(&head->lock); | ||
335 | /* No definite answer... Walk to established hash table */ | ||
336 | ret = __inet_check_established(death_row, sk, snum, NULL); | ||
337 | out: | ||
338 | local_bh_enable(); | ||
339 | return ret; | ||
340 | } | ||
341 | } | ||
342 | |||
343 | EXPORT_SYMBOL_GPL(inet_hash_connect); | ||
diff --git a/net/ipv4/inet_timewait_sock.c b/net/ipv4/inet_timewait_sock.c index a010e9a68811..417f126c749e 100644 --- a/net/ipv4/inet_timewait_sock.c +++ b/net/ipv4/inet_timewait_sock.c | |||
@@ -90,8 +90,9 @@ EXPORT_SYMBOL_GPL(__inet_twsk_hashdance); | |||
90 | 90 | ||
91 | struct inet_timewait_sock *inet_twsk_alloc(const struct sock *sk, const int state) | 91 | struct inet_timewait_sock *inet_twsk_alloc(const struct sock *sk, const int state) |
92 | { | 92 | { |
93 | struct inet_timewait_sock *tw = kmem_cache_alloc(sk->sk_prot_creator->twsk_slab, | 93 | struct inet_timewait_sock *tw = |
94 | SLAB_ATOMIC); | 94 | kmem_cache_alloc(sk->sk_prot_creator->twsk_prot->twsk_slab, |
95 | SLAB_ATOMIC); | ||
95 | if (tw != NULL) { | 96 | if (tw != NULL) { |
96 | const struct inet_sock *inet = inet_sk(sk); | 97 | const struct inet_sock *inet = inet_sk(sk); |
97 | 98 | ||
diff --git a/net/ipv4/inetpeer.c b/net/ipv4/inetpeer.c index 2fc3fd38924f..ce5fe3f74a3d 100644 --- a/net/ipv4/inetpeer.c +++ b/net/ipv4/inetpeer.c | |||
@@ -401,6 +401,7 @@ struct inet_peer *inet_getpeer(__u32 daddr, int create) | |||
401 | return NULL; | 401 | return NULL; |
402 | n->v4daddr = daddr; | 402 | n->v4daddr = daddr; |
403 | atomic_set(&n->refcnt, 1); | 403 | atomic_set(&n->refcnt, 1); |
404 | atomic_set(&n->rid, 0); | ||
404 | n->ip_id_count = secure_ip_id(daddr); | 405 | n->ip_id_count = secure_ip_id(daddr); |
405 | n->tcp_ts_stamp = 0; | 406 | n->tcp_ts_stamp = 0; |
406 | 407 | ||
diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c index 8ce0ce2ee48e..ce2b70ce4018 100644 --- a/net/ipv4/ip_fragment.c +++ b/net/ipv4/ip_fragment.c | |||
@@ -22,6 +22,7 @@ | |||
22 | * Patrick McHardy : LRU queue of frag heads for evictor. | 22 | * Patrick McHardy : LRU queue of frag heads for evictor. |
23 | */ | 23 | */ |
24 | 24 | ||
25 | #include <linux/compiler.h> | ||
25 | #include <linux/config.h> | 26 | #include <linux/config.h> |
26 | #include <linux/module.h> | 27 | #include <linux/module.h> |
27 | #include <linux/types.h> | 28 | #include <linux/types.h> |
@@ -38,6 +39,7 @@ | |||
38 | #include <net/ip.h> | 39 | #include <net/ip.h> |
39 | #include <net/icmp.h> | 40 | #include <net/icmp.h> |
40 | #include <net/checksum.h> | 41 | #include <net/checksum.h> |
42 | #include <net/inetpeer.h> | ||
41 | #include <linux/tcp.h> | 43 | #include <linux/tcp.h> |
42 | #include <linux/udp.h> | 44 | #include <linux/udp.h> |
43 | #include <linux/inet.h> | 45 | #include <linux/inet.h> |
@@ -56,6 +58,8 @@ | |||
56 | int sysctl_ipfrag_high_thresh = 256*1024; | 58 | int sysctl_ipfrag_high_thresh = 256*1024; |
57 | int sysctl_ipfrag_low_thresh = 192*1024; | 59 | int sysctl_ipfrag_low_thresh = 192*1024; |
58 | 60 | ||
61 | int sysctl_ipfrag_max_dist = 64; | ||
62 | |||
59 | /* Important NOTE! Fragment queue must be destroyed before MSL expires. | 63 | /* Important NOTE! Fragment queue must be destroyed before MSL expires. |
60 | * RFC791 is wrong proposing to prolongate timer each fragment arrival by TTL. | 64 | * RFC791 is wrong proposing to prolongate timer each fragment arrival by TTL. |
61 | */ | 65 | */ |
@@ -89,8 +93,10 @@ struct ipq { | |||
89 | spinlock_t lock; | 93 | spinlock_t lock; |
90 | atomic_t refcnt; | 94 | atomic_t refcnt; |
91 | struct timer_list timer; /* when will this queue expire? */ | 95 | struct timer_list timer; /* when will this queue expire? */ |
92 | int iif; | ||
93 | struct timeval stamp; | 96 | struct timeval stamp; |
97 | int iif; | ||
98 | unsigned int rid; | ||
99 | struct inet_peer *peer; | ||
94 | }; | 100 | }; |
95 | 101 | ||
96 | /* Hash table. */ | 102 | /* Hash table. */ |
@@ -195,6 +201,9 @@ static void ip_frag_destroy(struct ipq *qp, int *work) | |||
195 | BUG_TRAP(qp->last_in&COMPLETE); | 201 | BUG_TRAP(qp->last_in&COMPLETE); |
196 | BUG_TRAP(del_timer(&qp->timer) == 0); | 202 | BUG_TRAP(del_timer(&qp->timer) == 0); |
197 | 203 | ||
204 | if (qp->peer) | ||
205 | inet_putpeer(qp->peer); | ||
206 | |||
198 | /* Release all fragment data. */ | 207 | /* Release all fragment data. */ |
199 | fp = qp->fragments; | 208 | fp = qp->fragments; |
200 | while (fp) { | 209 | while (fp) { |
@@ -353,6 +362,7 @@ static struct ipq *ip_frag_create(unsigned hash, struct iphdr *iph, u32 user) | |||
353 | qp->meat = 0; | 362 | qp->meat = 0; |
354 | qp->fragments = NULL; | 363 | qp->fragments = NULL; |
355 | qp->iif = 0; | 364 | qp->iif = 0; |
365 | qp->peer = sysctl_ipfrag_max_dist ? inet_getpeer(iph->saddr, 1) : NULL; | ||
356 | 366 | ||
357 | /* Initialize a timer for this entry. */ | 367 | /* Initialize a timer for this entry. */ |
358 | init_timer(&qp->timer); | 368 | init_timer(&qp->timer); |
@@ -398,6 +408,56 @@ static inline struct ipq *ip_find(struct iphdr *iph, u32 user) | |||
398 | return ip_frag_create(hash, iph, user); | 408 | return ip_frag_create(hash, iph, user); |
399 | } | 409 | } |
400 | 410 | ||
411 | /* Is the fragment too far ahead to be part of ipq? */ | ||
412 | static inline int ip_frag_too_far(struct ipq *qp) | ||
413 | { | ||
414 | struct inet_peer *peer = qp->peer; | ||
415 | unsigned int max = sysctl_ipfrag_max_dist; | ||
416 | unsigned int start, end; | ||
417 | |||
418 | int rc; | ||
419 | |||
420 | if (!peer || !max) | ||
421 | return 0; | ||
422 | |||
423 | start = qp->rid; | ||
424 | end = atomic_inc_return(&peer->rid); | ||
425 | qp->rid = end; | ||
426 | |||
427 | rc = qp->fragments && (end - start) > max; | ||
428 | |||
429 | if (rc) { | ||
430 | IP_INC_STATS_BH(IPSTATS_MIB_REASMFAILS); | ||
431 | } | ||
432 | |||
433 | return rc; | ||
434 | } | ||
435 | |||
436 | static int ip_frag_reinit(struct ipq *qp) | ||
437 | { | ||
438 | struct sk_buff *fp; | ||
439 | |||
440 | if (!mod_timer(&qp->timer, jiffies + sysctl_ipfrag_time)) { | ||
441 | atomic_inc(&qp->refcnt); | ||
442 | return -ETIMEDOUT; | ||
443 | } | ||
444 | |||
445 | fp = qp->fragments; | ||
446 | do { | ||
447 | struct sk_buff *xp = fp->next; | ||
448 | frag_kfree_skb(fp, NULL); | ||
449 | fp = xp; | ||
450 | } while (fp); | ||
451 | |||
452 | qp->last_in = 0; | ||
453 | qp->len = 0; | ||
454 | qp->meat = 0; | ||
455 | qp->fragments = NULL; | ||
456 | qp->iif = 0; | ||
457 | |||
458 | return 0; | ||
459 | } | ||
460 | |||
401 | /* Add new segment to existing queue. */ | 461 | /* Add new segment to existing queue. */ |
402 | static void ip_frag_queue(struct ipq *qp, struct sk_buff *skb) | 462 | static void ip_frag_queue(struct ipq *qp, struct sk_buff *skb) |
403 | { | 463 | { |
@@ -408,6 +468,12 @@ static void ip_frag_queue(struct ipq *qp, struct sk_buff *skb) | |||
408 | if (qp->last_in & COMPLETE) | 468 | if (qp->last_in & COMPLETE) |
409 | goto err; | 469 | goto err; |
410 | 470 | ||
471 | if (!(IPCB(skb)->flags & IPSKB_FRAG_COMPLETE) && | ||
472 | unlikely(ip_frag_too_far(qp)) && unlikely(ip_frag_reinit(qp))) { | ||
473 | ipq_kill(qp); | ||
474 | goto err; | ||
475 | } | ||
476 | |||
411 | offset = ntohs(skb->nh.iph->frag_off); | 477 | offset = ntohs(skb->nh.iph->frag_off); |
412 | flags = offset & ~IP_OFFSET; | 478 | flags = offset & ~IP_OFFSET; |
413 | offset &= IP_OFFSET; | 479 | offset &= IP_OFFSET; |
diff --git a/net/ipv4/ip_input.c b/net/ipv4/ip_input.c index 473d0f2b2e0d..e45846ae570b 100644 --- a/net/ipv4/ip_input.c +++ b/net/ipv4/ip_input.c | |||
@@ -128,6 +128,7 @@ | |||
128 | #include <linux/sockios.h> | 128 | #include <linux/sockios.h> |
129 | #include <linux/in.h> | 129 | #include <linux/in.h> |
130 | #include <linux/inet.h> | 130 | #include <linux/inet.h> |
131 | #include <linux/inetdevice.h> | ||
131 | #include <linux/netdevice.h> | 132 | #include <linux/netdevice.h> |
132 | #include <linux/etherdevice.h> | 133 | #include <linux/etherdevice.h> |
133 | 134 | ||
diff --git a/net/ipv4/ip_options.c b/net/ipv4/ip_options.c index dbe12da8d8b3..d3f6c468faf4 100644 --- a/net/ipv4/ip_options.c +++ b/net/ipv4/ip_options.c | |||
@@ -22,6 +22,7 @@ | |||
22 | #include <net/sock.h> | 22 | #include <net/sock.h> |
23 | #include <net/ip.h> | 23 | #include <net/ip.h> |
24 | #include <net/icmp.h> | 24 | #include <net/icmp.h> |
25 | #include <net/route.h> | ||
25 | 26 | ||
26 | /* | 27 | /* |
27 | * Write options to IP header, record destination address to | 28 | * Write options to IP header, record destination address to |
diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index eba64e2bd397..2a830de3a699 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c | |||
@@ -445,6 +445,7 @@ int ip_fragment(struct sk_buff *skb, int (*output)(struct sk_buff*)) | |||
445 | 445 | ||
446 | hlen = iph->ihl * 4; | 446 | hlen = iph->ihl * 4; |
447 | mtu = dst_mtu(&rt->u.dst) - hlen; /* Size of data space */ | 447 | mtu = dst_mtu(&rt->u.dst) - hlen; /* Size of data space */ |
448 | IPCB(skb)->flags |= IPSKB_FRAG_COMPLETE; | ||
448 | 449 | ||
449 | /* When frag_list is given, use it. First, check its validity: | 450 | /* When frag_list is given, use it. First, check its validity: |
450 | * some transformers could create wrong frag_list or break existing | 451 | * some transformers could create wrong frag_list or break existing |
diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c index 4f2d87257309..6986e11d65cc 100644 --- a/net/ipv4/ip_sockglue.c +++ b/net/ipv4/ip_sockglue.c | |||
@@ -25,12 +25,12 @@ | |||
25 | #include <linux/skbuff.h> | 25 | #include <linux/skbuff.h> |
26 | #include <linux/ip.h> | 26 | #include <linux/ip.h> |
27 | #include <linux/icmp.h> | 27 | #include <linux/icmp.h> |
28 | #include <linux/inetdevice.h> | ||
28 | #include <linux/netdevice.h> | 29 | #include <linux/netdevice.h> |
29 | #include <net/sock.h> | 30 | #include <net/sock.h> |
30 | #include <net/ip.h> | 31 | #include <net/ip.h> |
31 | #include <net/icmp.h> | 32 | #include <net/icmp.h> |
32 | #include <net/tcp.h> | 33 | #include <net/tcp_states.h> |
33 | #include <linux/tcp.h> | ||
34 | #include <linux/udp.h> | 34 | #include <linux/udp.h> |
35 | #include <linux/igmp.h> | 35 | #include <linux/igmp.h> |
36 | #include <linux/netfilter.h> | 36 | #include <linux/netfilter.h> |
@@ -427,8 +427,8 @@ int ip_setsockopt(struct sock *sk, int level, int optname, char __user *optval, | |||
427 | err = ip_options_get_from_user(&opt, optval, optlen); | 427 | err = ip_options_get_from_user(&opt, optval, optlen); |
428 | if (err) | 428 | if (err) |
429 | break; | 429 | break; |
430 | if (sk->sk_type == SOCK_STREAM) { | 430 | if (inet->is_icsk) { |
431 | struct tcp_sock *tp = tcp_sk(sk); | 431 | struct inet_connection_sock *icsk = inet_csk(sk); |
432 | #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) | 432 | #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) |
433 | if (sk->sk_family == PF_INET || | 433 | if (sk->sk_family == PF_INET || |
434 | (!((1 << sk->sk_state) & | 434 | (!((1 << sk->sk_state) & |
@@ -436,10 +436,10 @@ int ip_setsockopt(struct sock *sk, int level, int optname, char __user *optval, | |||
436 | inet->daddr != LOOPBACK4_IPV6)) { | 436 | inet->daddr != LOOPBACK4_IPV6)) { |
437 | #endif | 437 | #endif |
438 | if (inet->opt) | 438 | if (inet->opt) |
439 | tp->ext_header_len -= inet->opt->optlen; | 439 | icsk->icsk_ext_hdr_len -= inet->opt->optlen; |
440 | if (opt) | 440 | if (opt) |
441 | tp->ext_header_len += opt->optlen; | 441 | icsk->icsk_ext_hdr_len += opt->optlen; |
442 | tcp_sync_mss(sk, tp->pmtu_cookie); | 442 | icsk->icsk_sync_mss(sk, icsk->icsk_pmtu_cookie); |
443 | #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) | 443 | #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) |
444 | } | 444 | } |
445 | #endif | 445 | #endif |
diff --git a/net/ipv4/ipcomp.c b/net/ipv4/ipcomp.c index fc718df17b40..d64e2ec8da7b 100644 --- a/net/ipv4/ipcomp.c +++ b/net/ipv4/ipcomp.c | |||
@@ -28,6 +28,7 @@ | |||
28 | #include <net/xfrm.h> | 28 | #include <net/xfrm.h> |
29 | #include <net/icmp.h> | 29 | #include <net/icmp.h> |
30 | #include <net/ipcomp.h> | 30 | #include <net/ipcomp.h> |
31 | #include <net/protocol.h> | ||
31 | 32 | ||
32 | struct ipcomp_tfms { | 33 | struct ipcomp_tfms { |
33 | struct list_head list; | 34 | struct list_head list; |
diff --git a/net/ipv4/ipconfig.c b/net/ipv4/ipconfig.c index e8674baaa8d9..bb3613ec448c 100644 --- a/net/ipv4/ipconfig.c +++ b/net/ipv4/ipconfig.c | |||
@@ -42,6 +42,7 @@ | |||
42 | #include <linux/in.h> | 42 | #include <linux/in.h> |
43 | #include <linux/if.h> | 43 | #include <linux/if.h> |
44 | #include <linux/inet.h> | 44 | #include <linux/inet.h> |
45 | #include <linux/inetdevice.h> | ||
45 | #include <linux/netdevice.h> | 46 | #include <linux/netdevice.h> |
46 | #include <linux/if_arp.h> | 47 | #include <linux/if_arp.h> |
47 | #include <linux/skbuff.h> | 48 | #include <linux/skbuff.h> |
@@ -58,6 +59,7 @@ | |||
58 | #include <net/arp.h> | 59 | #include <net/arp.h> |
59 | #include <net/ip.h> | 60 | #include <net/ip.h> |
60 | #include <net/ipconfig.h> | 61 | #include <net/ipconfig.h> |
62 | #include <net/route.h> | ||
61 | 63 | ||
62 | #include <asm/uaccess.h> | 64 | #include <asm/uaccess.h> |
63 | #include <net/checksum.h> | 65 | #include <net/checksum.h> |
diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c index 302b7eb507c9..caa3b7d2e48a 100644 --- a/net/ipv4/ipmr.c +++ b/net/ipv4/ipmr.c | |||
@@ -52,6 +52,7 @@ | |||
52 | #include <net/ip.h> | 52 | #include <net/ip.h> |
53 | #include <net/protocol.h> | 53 | #include <net/protocol.h> |
54 | #include <linux/skbuff.h> | 54 | #include <linux/skbuff.h> |
55 | #include <net/route.h> | ||
55 | #include <net/sock.h> | 56 | #include <net/sock.h> |
56 | #include <net/icmp.h> | 57 | #include <net/icmp.h> |
57 | #include <net/udp.h> | 58 | #include <net/udp.h> |
diff --git a/net/ipv4/ipvs/ip_vs_app.c b/net/ipv4/ipvs/ip_vs_app.c index d7eb680101c2..9b176a942ac5 100644 --- a/net/ipv4/ipvs/ip_vs_app.c +++ b/net/ipv4/ipvs/ip_vs_app.c | |||
@@ -224,34 +224,6 @@ void unregister_ip_vs_app(struct ip_vs_app *app) | |||
224 | } | 224 | } |
225 | 225 | ||
226 | 226 | ||
227 | #if 0000 | ||
228 | /* | ||
229 | * Get reference to app by name (called from user context) | ||
230 | */ | ||
231 | struct ip_vs_app *ip_vs_app_get_by_name(char *appname) | ||
232 | { | ||
233 | struct ip_vs_app *app, *a = NULL; | ||
234 | |||
235 | down(&__ip_vs_app_mutex); | ||
236 | |||
237 | list_for_each_entry(ent, &ip_vs_app_list, a_list) { | ||
238 | if (strcmp(app->name, appname)) | ||
239 | continue; | ||
240 | |||
241 | /* softirq may call ip_vs_app_get too, so the caller | ||
242 | must disable softirq on the current CPU */ | ||
243 | if (ip_vs_app_get(app)) | ||
244 | a = app; | ||
245 | break; | ||
246 | } | ||
247 | |||
248 | up(&__ip_vs_app_mutex); | ||
249 | |||
250 | return a; | ||
251 | } | ||
252 | #endif | ||
253 | |||
254 | |||
255 | /* | 227 | /* |
256 | * Bind ip_vs_conn to its ip_vs_app (called by cp constructor) | 228 | * Bind ip_vs_conn to its ip_vs_app (called by cp constructor) |
257 | */ | 229 | */ |
diff --git a/net/ipv4/ipvs/ip_vs_conn.c b/net/ipv4/ipvs/ip_vs_conn.c index 2a3a8c59c655..81d90354c928 100644 --- a/net/ipv4/ipvs/ip_vs_conn.c +++ b/net/ipv4/ipvs/ip_vs_conn.c | |||
@@ -24,7 +24,10 @@ | |||
24 | * | 24 | * |
25 | */ | 25 | */ |
26 | 26 | ||
27 | #include <linux/in.h> | ||
28 | #include <linux/net.h> | ||
27 | #include <linux/kernel.h> | 29 | #include <linux/kernel.h> |
30 | #include <linux/module.h> | ||
28 | #include <linux/vmalloc.h> | 31 | #include <linux/vmalloc.h> |
29 | #include <linux/proc_fs.h> /* for proc_net_* */ | 32 | #include <linux/proc_fs.h> /* for proc_net_* */ |
30 | #include <linux/seq_file.h> | 33 | #include <linux/seq_file.h> |
@@ -219,7 +222,7 @@ struct ip_vs_conn *ip_vs_conn_in_get | |||
219 | if (!cp && atomic_read(&ip_vs_conn_no_cport_cnt)) | 222 | if (!cp && atomic_read(&ip_vs_conn_no_cport_cnt)) |
220 | cp = __ip_vs_conn_in_get(protocol, s_addr, 0, d_addr, d_port); | 223 | cp = __ip_vs_conn_in_get(protocol, s_addr, 0, d_addr, d_port); |
221 | 224 | ||
222 | IP_VS_DBG(7, "lookup/in %s %u.%u.%u.%u:%d->%u.%u.%u.%u:%d %s\n", | 225 | IP_VS_DBG(9, "lookup/in %s %u.%u.%u.%u:%d->%u.%u.%u.%u:%d %s\n", |
223 | ip_vs_proto_name(protocol), | 226 | ip_vs_proto_name(protocol), |
224 | NIPQUAD(s_addr), ntohs(s_port), | 227 | NIPQUAD(s_addr), ntohs(s_port), |
225 | NIPQUAD(d_addr), ntohs(d_port), | 228 | NIPQUAD(d_addr), ntohs(d_port), |
@@ -254,7 +257,7 @@ struct ip_vs_conn *ip_vs_ct_in_get | |||
254 | out: | 257 | out: |
255 | ct_read_unlock(hash); | 258 | ct_read_unlock(hash); |
256 | 259 | ||
257 | IP_VS_DBG(7, "template lookup/in %s %u.%u.%u.%u:%d->%u.%u.%u.%u:%d %s\n", | 260 | IP_VS_DBG(9, "template lookup/in %s %u.%u.%u.%u:%d->%u.%u.%u.%u:%d %s\n", |
258 | ip_vs_proto_name(protocol), | 261 | ip_vs_proto_name(protocol), |
259 | NIPQUAD(s_addr), ntohs(s_port), | 262 | NIPQUAD(s_addr), ntohs(s_port), |
260 | NIPQUAD(d_addr), ntohs(d_port), | 263 | NIPQUAD(d_addr), ntohs(d_port), |
@@ -295,7 +298,7 @@ struct ip_vs_conn *ip_vs_conn_out_get | |||
295 | 298 | ||
296 | ct_read_unlock(hash); | 299 | ct_read_unlock(hash); |
297 | 300 | ||
298 | IP_VS_DBG(7, "lookup/out %s %u.%u.%u.%u:%d->%u.%u.%u.%u:%d %s\n", | 301 | IP_VS_DBG(9, "lookup/out %s %u.%u.%u.%u:%d->%u.%u.%u.%u:%d %s\n", |
299 | ip_vs_proto_name(protocol), | 302 | ip_vs_proto_name(protocol), |
300 | NIPQUAD(s_addr), ntohs(s_port), | 303 | NIPQUAD(s_addr), ntohs(s_port), |
301 | NIPQUAD(d_addr), ntohs(d_port), | 304 | NIPQUAD(d_addr), ntohs(d_port), |
@@ -391,8 +394,9 @@ ip_vs_bind_dest(struct ip_vs_conn *cp, struct ip_vs_dest *dest) | |||
391 | cp->flags |= atomic_read(&dest->conn_flags); | 394 | cp->flags |= atomic_read(&dest->conn_flags); |
392 | cp->dest = dest; | 395 | cp->dest = dest; |
393 | 396 | ||
394 | IP_VS_DBG(9, "Bind-dest %s c:%u.%u.%u.%u:%d v:%u.%u.%u.%u:%d " | 397 | IP_VS_DBG(7, "Bind-dest %s c:%u.%u.%u.%u:%d v:%u.%u.%u.%u:%d " |
395 | "d:%u.%u.%u.%u:%d fwd:%c s:%u flg:%X cnt:%d destcnt:%d\n", | 398 | "d:%u.%u.%u.%u:%d fwd:%c s:%u conn->flags:%X conn->refcnt:%d " |
399 | "dest->refcnt:%d\n", | ||
396 | ip_vs_proto_name(cp->protocol), | 400 | ip_vs_proto_name(cp->protocol), |
397 | NIPQUAD(cp->caddr), ntohs(cp->cport), | 401 | NIPQUAD(cp->caddr), ntohs(cp->cport), |
398 | NIPQUAD(cp->vaddr), ntohs(cp->vport), | 402 | NIPQUAD(cp->vaddr), ntohs(cp->vport), |
@@ -430,8 +434,9 @@ static inline void ip_vs_unbind_dest(struct ip_vs_conn *cp) | |||
430 | if (!dest) | 434 | if (!dest) |
431 | return; | 435 | return; |
432 | 436 | ||
433 | IP_VS_DBG(9, "Unbind-dest %s c:%u.%u.%u.%u:%d v:%u.%u.%u.%u:%d " | 437 | IP_VS_DBG(7, "Unbind-dest %s c:%u.%u.%u.%u:%d v:%u.%u.%u.%u:%d " |
434 | "d:%u.%u.%u.%u:%d fwd:%c s:%u flg:%X cnt:%d destcnt:%d\n", | 438 | "d:%u.%u.%u.%u:%d fwd:%c s:%u conn->flags:%X conn->refcnt:%d " |
439 | "dest->refcnt:%d\n", | ||
435 | ip_vs_proto_name(cp->protocol), | 440 | ip_vs_proto_name(cp->protocol), |
436 | NIPQUAD(cp->caddr), ntohs(cp->cport), | 441 | NIPQUAD(cp->caddr), ntohs(cp->cport), |
437 | NIPQUAD(cp->vaddr), ntohs(cp->vport), | 442 | NIPQUAD(cp->vaddr), ntohs(cp->vport), |
@@ -571,7 +576,7 @@ static void ip_vs_conn_expire(unsigned long data) | |||
571 | ip_vs_conn_hash(cp); | 576 | ip_vs_conn_hash(cp); |
572 | 577 | ||
573 | expire_later: | 578 | expire_later: |
574 | IP_VS_DBG(7, "delayed: refcnt-1=%d conn.n_control=%d\n", | 579 | IP_VS_DBG(7, "delayed: conn->refcnt-1=%d conn->n_control=%d\n", |
575 | atomic_read(&cp->refcnt)-1, | 580 | atomic_read(&cp->refcnt)-1, |
576 | atomic_read(&cp->n_control)); | 581 | atomic_read(&cp->n_control)); |
577 | 582 | ||
diff --git a/net/ipv4/ipvs/ip_vs_core.c b/net/ipv4/ipvs/ip_vs_core.c index 1a0843cd58a9..1aca94a9fd8b 100644 --- a/net/ipv4/ipvs/ip_vs_core.c +++ b/net/ipv4/ipvs/ip_vs_core.c | |||
@@ -426,7 +426,7 @@ ip_vs_schedule(struct ip_vs_service *svc, const struct sk_buff *skb) | |||
426 | return NULL; | 426 | return NULL; |
427 | 427 | ||
428 | IP_VS_DBG(6, "Schedule fwd:%c c:%u.%u.%u.%u:%u v:%u.%u.%u.%u:%u " | 428 | IP_VS_DBG(6, "Schedule fwd:%c c:%u.%u.%u.%u:%u v:%u.%u.%u.%u:%u " |
429 | "d:%u.%u.%u.%u:%u flg:%X cnt:%d\n", | 429 | "d:%u.%u.%u.%u:%u conn->flags:%X conn->refcnt:%d\n", |
430 | ip_vs_fwd_tag(cp), | 430 | ip_vs_fwd_tag(cp), |
431 | NIPQUAD(cp->caddr), ntohs(cp->cport), | 431 | NIPQUAD(cp->caddr), ntohs(cp->cport), |
432 | NIPQUAD(cp->vaddr), ntohs(cp->vport), | 432 | NIPQUAD(cp->vaddr), ntohs(cp->vport), |
diff --git a/net/ipv4/ipvs/ip_vs_ctl.c b/net/ipv4/ipvs/ip_vs_ctl.c index 9bdcf31b760e..c935c5086d33 100644 --- a/net/ipv4/ipvs/ip_vs_ctl.c +++ b/net/ipv4/ipvs/ip_vs_ctl.c | |||
@@ -35,6 +35,7 @@ | |||
35 | #include <linux/netfilter_ipv4.h> | 35 | #include <linux/netfilter_ipv4.h> |
36 | 36 | ||
37 | #include <net/ip.h> | 37 | #include <net/ip.h> |
38 | #include <net/route.h> | ||
38 | #include <net/sock.h> | 39 | #include <net/sock.h> |
39 | 40 | ||
40 | #include <asm/uaccess.h> | 41 | #include <asm/uaccess.h> |
@@ -447,7 +448,7 @@ ip_vs_service_get(__u32 fwmark, __u16 protocol, __u32 vaddr, __u16 vport) | |||
447 | out: | 448 | out: |
448 | read_unlock(&__ip_vs_svc_lock); | 449 | read_unlock(&__ip_vs_svc_lock); |
449 | 450 | ||
450 | IP_VS_DBG(6, "lookup service: fwm %u %s %u.%u.%u.%u:%u %s\n", | 451 | IP_VS_DBG(9, "lookup service: fwm %u %s %u.%u.%u.%u:%u %s\n", |
451 | fwmark, ip_vs_proto_name(protocol), | 452 | fwmark, ip_vs_proto_name(protocol), |
452 | NIPQUAD(vaddr), ntohs(vport), | 453 | NIPQUAD(vaddr), ntohs(vport), |
453 | svc?"hit":"not hit"); | 454 | svc?"hit":"not hit"); |
@@ -597,7 +598,7 @@ ip_vs_trash_get_dest(struct ip_vs_service *svc, __u32 daddr, __u16 dport) | |||
597 | */ | 598 | */ |
598 | list_for_each_entry_safe(dest, nxt, &ip_vs_dest_trash, n_list) { | 599 | list_for_each_entry_safe(dest, nxt, &ip_vs_dest_trash, n_list) { |
599 | IP_VS_DBG(3, "Destination %u/%u.%u.%u.%u:%u still in trash, " | 600 | IP_VS_DBG(3, "Destination %u/%u.%u.%u.%u:%u still in trash, " |
600 | "refcnt=%d\n", | 601 | "dest->refcnt=%d\n", |
601 | dest->vfwmark, | 602 | dest->vfwmark, |
602 | NIPQUAD(dest->addr), ntohs(dest->port), | 603 | NIPQUAD(dest->addr), ntohs(dest->port), |
603 | atomic_read(&dest->refcnt)); | 604 | atomic_read(&dest->refcnt)); |
@@ -804,7 +805,7 @@ ip_vs_add_dest(struct ip_vs_service *svc, struct ip_vs_dest_user *udest) | |||
804 | dest = ip_vs_trash_get_dest(svc, daddr, dport); | 805 | dest = ip_vs_trash_get_dest(svc, daddr, dport); |
805 | if (dest != NULL) { | 806 | if (dest != NULL) { |
806 | IP_VS_DBG(3, "Get destination %u.%u.%u.%u:%u from trash, " | 807 | IP_VS_DBG(3, "Get destination %u.%u.%u.%u:%u from trash, " |
807 | "refcnt=%d, service %u/%u.%u.%u.%u:%u\n", | 808 | "dest->refcnt=%d, service %u/%u.%u.%u.%u:%u\n", |
808 | NIPQUAD(daddr), ntohs(dport), | 809 | NIPQUAD(daddr), ntohs(dport), |
809 | atomic_read(&dest->refcnt), | 810 | atomic_read(&dest->refcnt), |
810 | dest->vfwmark, | 811 | dest->vfwmark, |
@@ -949,7 +950,8 @@ static void __ip_vs_del_dest(struct ip_vs_dest *dest) | |||
949 | atomic_dec(&dest->svc->refcnt); | 950 | atomic_dec(&dest->svc->refcnt); |
950 | kfree(dest); | 951 | kfree(dest); |
951 | } else { | 952 | } else { |
952 | IP_VS_DBG(3, "Moving dest %u.%u.%u.%u:%u into trash, refcnt=%d\n", | 953 | IP_VS_DBG(3, "Moving dest %u.%u.%u.%u:%u into trash, " |
954 | "dest->refcnt=%d\n", | ||
953 | NIPQUAD(dest->addr), ntohs(dest->port), | 955 | NIPQUAD(dest->addr), ntohs(dest->port), |
954 | atomic_read(&dest->refcnt)); | 956 | atomic_read(&dest->refcnt)); |
955 | list_add(&dest->n_list, &ip_vs_dest_trash); | 957 | list_add(&dest->n_list, &ip_vs_dest_trash); |
diff --git a/net/ipv4/ipvs/ip_vs_dh.c b/net/ipv4/ipvs/ip_vs_dh.c index f3bc320dce93..9fee19c4c617 100644 --- a/net/ipv4/ipvs/ip_vs_dh.c +++ b/net/ipv4/ipvs/ip_vs_dh.c | |||
@@ -37,8 +37,10 @@ | |||
37 | * | 37 | * |
38 | */ | 38 | */ |
39 | 39 | ||
40 | #include <linux/ip.h> | ||
40 | #include <linux/module.h> | 41 | #include <linux/module.h> |
41 | #include <linux/kernel.h> | 42 | #include <linux/kernel.h> |
43 | #include <linux/skbuff.h> | ||
42 | 44 | ||
43 | #include <net/ip_vs.h> | 45 | #include <net/ip_vs.h> |
44 | 46 | ||
diff --git a/net/ipv4/ipvs/ip_vs_est.c b/net/ipv4/ipvs/ip_vs_est.c index 67b3e2fc1fa1..e7004741ac73 100644 --- a/net/ipv4/ipvs/ip_vs_est.c +++ b/net/ipv4/ipvs/ip_vs_est.c | |||
@@ -13,7 +13,10 @@ | |||
13 | * Changes: | 13 | * Changes: |
14 | * | 14 | * |
15 | */ | 15 | */ |
16 | #include <linux/config.h> | ||
16 | #include <linux/kernel.h> | 17 | #include <linux/kernel.h> |
18 | #include <linux/jiffies.h> | ||
19 | #include <linux/slab.h> | ||
17 | #include <linux/types.h> | 20 | #include <linux/types.h> |
18 | 21 | ||
19 | #include <net/ip_vs.h> | 22 | #include <net/ip_vs.h> |
diff --git a/net/ipv4/ipvs/ip_vs_lblc.c b/net/ipv4/ipvs/ip_vs_lblc.c index 561cda326fa8..6e5cb92a5c83 100644 --- a/net/ipv4/ipvs/ip_vs_lblc.c +++ b/net/ipv4/ipvs/ip_vs_lblc.c | |||
@@ -41,8 +41,10 @@ | |||
41 | * me to write this module. | 41 | * me to write this module. |
42 | */ | 42 | */ |
43 | 43 | ||
44 | #include <linux/ip.h> | ||
44 | #include <linux/module.h> | 45 | #include <linux/module.h> |
45 | #include <linux/kernel.h> | 46 | #include <linux/kernel.h> |
47 | #include <linux/skbuff.h> | ||
46 | 48 | ||
47 | /* for sysctl */ | 49 | /* for sysctl */ |
48 | #include <linux/fs.h> | 50 | #include <linux/fs.h> |
@@ -228,33 +230,6 @@ ip_vs_lblc_hash(struct ip_vs_lblc_table *tbl, struct ip_vs_lblc_entry *en) | |||
228 | } | 230 | } |
229 | 231 | ||
230 | 232 | ||
231 | #if 0000 | ||
232 | /* | ||
233 | * Unhash ip_vs_lblc_entry from ip_vs_lblc_table. | ||
234 | * returns bool success. | ||
235 | */ | ||
236 | static int ip_vs_lblc_unhash(struct ip_vs_lblc_table *tbl, | ||
237 | struct ip_vs_lblc_entry *en) | ||
238 | { | ||
239 | if (list_empty(&en->list)) { | ||
240 | IP_VS_ERR("ip_vs_lblc_unhash(): request for not hashed entry, " | ||
241 | "called from %p\n", __builtin_return_address(0)); | ||
242 | return 0; | ||
243 | } | ||
244 | |||
245 | /* | ||
246 | * Remove it from the table | ||
247 | */ | ||
248 | write_lock(&tbl->lock); | ||
249 | list_del(&en->list); | ||
250 | INIT_LIST_HEAD(&en->list); | ||
251 | write_unlock(&tbl->lock); | ||
252 | |||
253 | return 1; | ||
254 | } | ||
255 | #endif | ||
256 | |||
257 | |||
258 | /* | 233 | /* |
259 | * Get ip_vs_lblc_entry associated with supplied parameters. | 234 | * Get ip_vs_lblc_entry associated with supplied parameters. |
260 | */ | 235 | */ |
diff --git a/net/ipv4/ipvs/ip_vs_lblcr.c b/net/ipv4/ipvs/ip_vs_lblcr.c index ce456dbf09a5..32ba37ba72d8 100644 --- a/net/ipv4/ipvs/ip_vs_lblcr.c +++ b/net/ipv4/ipvs/ip_vs_lblcr.c | |||
@@ -39,8 +39,10 @@ | |||
39 | * | 39 | * |
40 | */ | 40 | */ |
41 | 41 | ||
42 | #include <linux/ip.h> | ||
42 | #include <linux/module.h> | 43 | #include <linux/module.h> |
43 | #include <linux/kernel.h> | 44 | #include <linux/kernel.h> |
45 | #include <linux/skbuff.h> | ||
44 | 46 | ||
45 | /* for sysctl */ | 47 | /* for sysctl */ |
46 | #include <linux/fs.h> | 48 | #include <linux/fs.h> |
@@ -414,33 +416,6 @@ ip_vs_lblcr_hash(struct ip_vs_lblcr_table *tbl, struct ip_vs_lblcr_entry *en) | |||
414 | } | 416 | } |
415 | 417 | ||
416 | 418 | ||
417 | #if 0000 | ||
418 | /* | ||
419 | * Unhash ip_vs_lblcr_entry from ip_vs_lblcr_table. | ||
420 | * returns bool success. | ||
421 | */ | ||
422 | static int ip_vs_lblcr_unhash(struct ip_vs_lblcr_table *tbl, | ||
423 | struct ip_vs_lblcr_entry *en) | ||
424 | { | ||
425 | if (list_empty(&en->list)) { | ||
426 | IP_VS_ERR("ip_vs_lblcr_unhash(): request for not hashed entry, " | ||
427 | "called from %p\n", __builtin_return_address(0)); | ||
428 | return 0; | ||
429 | } | ||
430 | |||
431 | /* | ||
432 | * Remove it from the table | ||
433 | */ | ||
434 | write_lock(&tbl->lock); | ||
435 | list_del(&en->list); | ||
436 | INIT_LIST_HEAD(&en->list); | ||
437 | write_unlock(&tbl->lock); | ||
438 | |||
439 | return 1; | ||
440 | } | ||
441 | #endif | ||
442 | |||
443 | |||
444 | /* | 419 | /* |
445 | * Get ip_vs_lblcr_entry associated with supplied parameters. | 420 | * Get ip_vs_lblcr_entry associated with supplied parameters. |
446 | */ | 421 | */ |
diff --git a/net/ipv4/ipvs/ip_vs_proto_ah.c b/net/ipv4/ipvs/ip_vs_proto_ah.c index 453e94a0bbd7..8b0505b09317 100644 --- a/net/ipv4/ipvs/ip_vs_proto_ah.c +++ b/net/ipv4/ipvs/ip_vs_proto_ah.c | |||
@@ -12,6 +12,8 @@ | |||
12 | * | 12 | * |
13 | */ | 13 | */ |
14 | 14 | ||
15 | #include <linux/in.h> | ||
16 | #include <linux/ip.h> | ||
15 | #include <linux/module.h> | 17 | #include <linux/module.h> |
16 | #include <linux/kernel.h> | 18 | #include <linux/kernel.h> |
17 | #include <linux/netfilter.h> | 19 | #include <linux/netfilter.h> |
diff --git a/net/ipv4/ipvs/ip_vs_proto_esp.c b/net/ipv4/ipvs/ip_vs_proto_esp.c index 478e5c7c7e8e..c36ccf057a19 100644 --- a/net/ipv4/ipvs/ip_vs_proto_esp.c +++ b/net/ipv4/ipvs/ip_vs_proto_esp.c | |||
@@ -12,6 +12,8 @@ | |||
12 | * | 12 | * |
13 | */ | 13 | */ |
14 | 14 | ||
15 | #include <linux/in.h> | ||
16 | #include <linux/ip.h> | ||
15 | #include <linux/module.h> | 17 | #include <linux/module.h> |
16 | #include <linux/kernel.h> | 18 | #include <linux/kernel.h> |
17 | #include <linux/netfilter.h> | 19 | #include <linux/netfilter.h> |
diff --git a/net/ipv4/ipvs/ip_vs_proto_tcp.c b/net/ipv4/ipvs/ip_vs_proto_tcp.c index 0e878fd6215c..bc28b1160a3a 100644 --- a/net/ipv4/ipvs/ip_vs_proto_tcp.c +++ b/net/ipv4/ipvs/ip_vs_proto_tcp.c | |||
@@ -275,28 +275,6 @@ static int tcp_timeouts[IP_VS_TCP_S_LAST+1] = { | |||
275 | [IP_VS_TCP_S_LAST] = 2*HZ, | 275 | [IP_VS_TCP_S_LAST] = 2*HZ, |
276 | }; | 276 | }; |
277 | 277 | ||
278 | |||
279 | #if 0 | ||
280 | |||
281 | /* FIXME: This is going to die */ | ||
282 | |||
283 | static int tcp_timeouts_dos[IP_VS_TCP_S_LAST+1] = { | ||
284 | [IP_VS_TCP_S_NONE] = 2*HZ, | ||
285 | [IP_VS_TCP_S_ESTABLISHED] = 8*60*HZ, | ||
286 | [IP_VS_TCP_S_SYN_SENT] = 60*HZ, | ||
287 | [IP_VS_TCP_S_SYN_RECV] = 10*HZ, | ||
288 | [IP_VS_TCP_S_FIN_WAIT] = 60*HZ, | ||
289 | [IP_VS_TCP_S_TIME_WAIT] = 60*HZ, | ||
290 | [IP_VS_TCP_S_CLOSE] = 10*HZ, | ||
291 | [IP_VS_TCP_S_CLOSE_WAIT] = 60*HZ, | ||
292 | [IP_VS_TCP_S_LAST_ACK] = 30*HZ, | ||
293 | [IP_VS_TCP_S_LISTEN] = 2*60*HZ, | ||
294 | [IP_VS_TCP_S_SYNACK] = 100*HZ, | ||
295 | [IP_VS_TCP_S_LAST] = 2*HZ, | ||
296 | }; | ||
297 | |||
298 | #endif | ||
299 | |||
300 | static char * tcp_state_name_table[IP_VS_TCP_S_LAST+1] = { | 278 | static char * tcp_state_name_table[IP_VS_TCP_S_LAST+1] = { |
301 | [IP_VS_TCP_S_NONE] = "NONE", | 279 | [IP_VS_TCP_S_NONE] = "NONE", |
302 | [IP_VS_TCP_S_ESTABLISHED] = "ESTABLISHED", | 280 | [IP_VS_TCP_S_ESTABLISHED] = "ESTABLISHED", |
@@ -448,7 +426,7 @@ set_tcp_state(struct ip_vs_protocol *pp, struct ip_vs_conn *cp, | |||
448 | struct ip_vs_dest *dest = cp->dest; | 426 | struct ip_vs_dest *dest = cp->dest; |
449 | 427 | ||
450 | IP_VS_DBG(8, "%s %s [%c%c%c%c] %u.%u.%u.%u:%d->" | 428 | IP_VS_DBG(8, "%s %s [%c%c%c%c] %u.%u.%u.%u:%d->" |
451 | "%u.%u.%u.%u:%d state: %s->%s cnt:%d\n", | 429 | "%u.%u.%u.%u:%d state: %s->%s conn->refcnt:%d\n", |
452 | pp->name, | 430 | pp->name, |
453 | (state_off==TCP_DIR_OUTPUT)?"output ":"input ", | 431 | (state_off==TCP_DIR_OUTPUT)?"output ":"input ", |
454 | th->syn? 'S' : '.', | 432 | th->syn? 'S' : '.', |
diff --git a/net/ipv4/ipvs/ip_vs_proto_udp.c b/net/ipv4/ipvs/ip_vs_proto_udp.c index 8ae5f2e0aefa..89d9175d8f28 100644 --- a/net/ipv4/ipvs/ip_vs_proto_udp.c +++ b/net/ipv4/ipvs/ip_vs_proto_udp.c | |||
@@ -15,8 +15,11 @@ | |||
15 | * | 15 | * |
16 | */ | 16 | */ |
17 | 17 | ||
18 | #include <linux/in.h> | ||
19 | #include <linux/ip.h> | ||
18 | #include <linux/kernel.h> | 20 | #include <linux/kernel.h> |
19 | #include <linux/netfilter_ipv4.h> | 21 | #include <linux/netfilter_ipv4.h> |
22 | #include <linux/udp.h> | ||
20 | 23 | ||
21 | #include <net/ip_vs.h> | 24 | #include <net/ip_vs.h> |
22 | 25 | ||
diff --git a/net/ipv4/ipvs/ip_vs_sh.c b/net/ipv4/ipvs/ip_vs_sh.c index 6f7c50e44a39..7775e6cc68be 100644 --- a/net/ipv4/ipvs/ip_vs_sh.c +++ b/net/ipv4/ipvs/ip_vs_sh.c | |||
@@ -34,8 +34,10 @@ | |||
34 | * | 34 | * |
35 | */ | 35 | */ |
36 | 36 | ||
37 | #include <linux/ip.h> | ||
37 | #include <linux/module.h> | 38 | #include <linux/module.h> |
38 | #include <linux/kernel.h> | 39 | #include <linux/kernel.h> |
40 | #include <linux/skbuff.h> | ||
39 | 41 | ||
40 | #include <net/ip_vs.h> | 42 | #include <net/ip_vs.h> |
41 | 43 | ||
diff --git a/net/ipv4/ipvs/ip_vs_sync.c b/net/ipv4/ipvs/ip_vs_sync.c index 2e5ced3d8062..1bca714bda3d 100644 --- a/net/ipv4/ipvs/ip_vs_sync.c +++ b/net/ipv4/ipvs/ip_vs_sync.c | |||
@@ -21,12 +21,14 @@ | |||
21 | 21 | ||
22 | #include <linux/module.h> | 22 | #include <linux/module.h> |
23 | #include <linux/slab.h> | 23 | #include <linux/slab.h> |
24 | #include <linux/inetdevice.h> | ||
24 | #include <linux/net.h> | 25 | #include <linux/net.h> |
25 | #include <linux/completion.h> | 26 | #include <linux/completion.h> |
26 | #include <linux/delay.h> | 27 | #include <linux/delay.h> |
27 | #include <linux/skbuff.h> | 28 | #include <linux/skbuff.h> |
28 | #include <linux/in.h> | 29 | #include <linux/in.h> |
29 | #include <linux/igmp.h> /* for ip_mc_join_group */ | 30 | #include <linux/igmp.h> /* for ip_mc_join_group */ |
31 | #include <linux/udp.h> | ||
30 | 32 | ||
31 | #include <net/ip.h> | 33 | #include <net/ip.h> |
32 | #include <net/sock.h> | 34 | #include <net/sock.h> |
diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c index 3c2e9639bba6..bba156304695 100644 --- a/net/ipv4/netfilter/arp_tables.c +++ b/net/ipv4/netfilter/arp_tables.c | |||
@@ -68,19 +68,14 @@ struct arpt_table_info { | |||
68 | unsigned int initial_entries; | 68 | unsigned int initial_entries; |
69 | unsigned int hook_entry[NF_ARP_NUMHOOKS]; | 69 | unsigned int hook_entry[NF_ARP_NUMHOOKS]; |
70 | unsigned int underflow[NF_ARP_NUMHOOKS]; | 70 | unsigned int underflow[NF_ARP_NUMHOOKS]; |
71 | char entries[0] __attribute__((aligned(SMP_CACHE_BYTES))); | 71 | void *entries[NR_CPUS]; |
72 | }; | 72 | }; |
73 | 73 | ||
74 | static LIST_HEAD(arpt_target); | 74 | static LIST_HEAD(arpt_target); |
75 | static LIST_HEAD(arpt_tables); | 75 | static LIST_HEAD(arpt_tables); |
76 | #define SET_COUNTER(c,b,p) do { (c).bcnt = (b); (c).pcnt = (p); } while(0) | ||
76 | #define ADD_COUNTER(c,b,p) do { (c).bcnt += (b); (c).pcnt += (p); } while(0) | 77 | #define ADD_COUNTER(c,b,p) do { (c).bcnt += (b); (c).pcnt += (p); } while(0) |
77 | 78 | ||
78 | #ifdef CONFIG_SMP | ||
79 | #define TABLE_OFFSET(t,p) (SMP_ALIGN((t)->size)*(p)) | ||
80 | #else | ||
81 | #define TABLE_OFFSET(t,p) 0 | ||
82 | #endif | ||
83 | |||
84 | static inline int arp_devaddr_compare(const struct arpt_devaddr_info *ap, | 79 | static inline int arp_devaddr_compare(const struct arpt_devaddr_info *ap, |
85 | char *hdr_addr, int len) | 80 | char *hdr_addr, int len) |
86 | { | 81 | { |
@@ -269,9 +264,7 @@ unsigned int arpt_do_table(struct sk_buff **pskb, | |||
269 | outdev = out ? out->name : nulldevname; | 264 | outdev = out ? out->name : nulldevname; |
270 | 265 | ||
271 | read_lock_bh(&table->lock); | 266 | read_lock_bh(&table->lock); |
272 | table_base = (void *)table->private->entries | 267 | table_base = (void *)table->private->entries[smp_processor_id()]; |
273 | + TABLE_OFFSET(table->private, | ||
274 | smp_processor_id()); | ||
275 | e = get_entry(table_base, table->private->hook_entry[hook]); | 268 | e = get_entry(table_base, table->private->hook_entry[hook]); |
276 | back = get_entry(table_base, table->private->underflow[hook]); | 269 | back = get_entry(table_base, table->private->underflow[hook]); |
277 | 270 | ||
@@ -462,7 +455,8 @@ static inline int unconditional(const struct arpt_arp *arp) | |||
462 | /* Figures out from what hook each rule can be called: returns 0 if | 455 | /* Figures out from what hook each rule can be called: returns 0 if |
463 | * there are loops. Puts hook bitmask in comefrom. | 456 | * there are loops. Puts hook bitmask in comefrom. |
464 | */ | 457 | */ |
465 | static int mark_source_chains(struct arpt_table_info *newinfo, unsigned int valid_hooks) | 458 | static int mark_source_chains(struct arpt_table_info *newinfo, |
459 | unsigned int valid_hooks, void *entry0) | ||
466 | { | 460 | { |
467 | unsigned int hook; | 461 | unsigned int hook; |
468 | 462 | ||
@@ -472,7 +466,7 @@ static int mark_source_chains(struct arpt_table_info *newinfo, unsigned int vali | |||
472 | for (hook = 0; hook < NF_ARP_NUMHOOKS; hook++) { | 466 | for (hook = 0; hook < NF_ARP_NUMHOOKS; hook++) { |
473 | unsigned int pos = newinfo->hook_entry[hook]; | 467 | unsigned int pos = newinfo->hook_entry[hook]; |
474 | struct arpt_entry *e | 468 | struct arpt_entry *e |
475 | = (struct arpt_entry *)(newinfo->entries + pos); | 469 | = (struct arpt_entry *)(entry0 + pos); |
476 | 470 | ||
477 | if (!(valid_hooks & (1 << hook))) | 471 | if (!(valid_hooks & (1 << hook))) |
478 | continue; | 472 | continue; |
@@ -514,13 +508,13 @@ static int mark_source_chains(struct arpt_table_info *newinfo, unsigned int vali | |||
514 | goto next; | 508 | goto next; |
515 | 509 | ||
516 | e = (struct arpt_entry *) | 510 | e = (struct arpt_entry *) |
517 | (newinfo->entries + pos); | 511 | (entry0 + pos); |
518 | } while (oldpos == pos + e->next_offset); | 512 | } while (oldpos == pos + e->next_offset); |
519 | 513 | ||
520 | /* Move along one */ | 514 | /* Move along one */ |
521 | size = e->next_offset; | 515 | size = e->next_offset; |
522 | e = (struct arpt_entry *) | 516 | e = (struct arpt_entry *) |
523 | (newinfo->entries + pos + size); | 517 | (entry0 + pos + size); |
524 | e->counters.pcnt = pos; | 518 | e->counters.pcnt = pos; |
525 | pos += size; | 519 | pos += size; |
526 | } else { | 520 | } else { |
@@ -537,7 +531,7 @@ static int mark_source_chains(struct arpt_table_info *newinfo, unsigned int vali | |||
537 | newpos = pos + e->next_offset; | 531 | newpos = pos + e->next_offset; |
538 | } | 532 | } |
539 | e = (struct arpt_entry *) | 533 | e = (struct arpt_entry *) |
540 | (newinfo->entries + newpos); | 534 | (entry0 + newpos); |
541 | e->counters.pcnt = pos; | 535 | e->counters.pcnt = pos; |
542 | pos = newpos; | 536 | pos = newpos; |
543 | } | 537 | } |
@@ -689,6 +683,7 @@ static inline int cleanup_entry(struct arpt_entry *e, unsigned int *i) | |||
689 | static int translate_table(const char *name, | 683 | static int translate_table(const char *name, |
690 | unsigned int valid_hooks, | 684 | unsigned int valid_hooks, |
691 | struct arpt_table_info *newinfo, | 685 | struct arpt_table_info *newinfo, |
686 | void *entry0, | ||
692 | unsigned int size, | 687 | unsigned int size, |
693 | unsigned int number, | 688 | unsigned int number, |
694 | const unsigned int *hook_entries, | 689 | const unsigned int *hook_entries, |
@@ -710,11 +705,11 @@ static int translate_table(const char *name, | |||
710 | i = 0; | 705 | i = 0; |
711 | 706 | ||
712 | /* Walk through entries, checking offsets. */ | 707 | /* Walk through entries, checking offsets. */ |
713 | ret = ARPT_ENTRY_ITERATE(newinfo->entries, newinfo->size, | 708 | ret = ARPT_ENTRY_ITERATE(entry0, newinfo->size, |
714 | check_entry_size_and_hooks, | 709 | check_entry_size_and_hooks, |
715 | newinfo, | 710 | newinfo, |
716 | newinfo->entries, | 711 | entry0, |
717 | newinfo->entries + size, | 712 | entry0 + size, |
718 | hook_entries, underflows, &i); | 713 | hook_entries, underflows, &i); |
719 | duprintf("translate_table: ARPT_ENTRY_ITERATE gives %d\n", ret); | 714 | duprintf("translate_table: ARPT_ENTRY_ITERATE gives %d\n", ret); |
720 | if (ret != 0) | 715 | if (ret != 0) |
@@ -743,29 +738,26 @@ static int translate_table(const char *name, | |||
743 | } | 738 | } |
744 | } | 739 | } |
745 | 740 | ||
746 | if (!mark_source_chains(newinfo, valid_hooks)) { | 741 | if (!mark_source_chains(newinfo, valid_hooks, entry0)) { |
747 | duprintf("Looping hook\n"); | 742 | duprintf("Looping hook\n"); |
748 | return -ELOOP; | 743 | return -ELOOP; |
749 | } | 744 | } |
750 | 745 | ||
751 | /* Finally, each sanity check must pass */ | 746 | /* Finally, each sanity check must pass */ |
752 | i = 0; | 747 | i = 0; |
753 | ret = ARPT_ENTRY_ITERATE(newinfo->entries, newinfo->size, | 748 | ret = ARPT_ENTRY_ITERATE(entry0, newinfo->size, |
754 | check_entry, name, size, &i); | 749 | check_entry, name, size, &i); |
755 | 750 | ||
756 | if (ret != 0) { | 751 | if (ret != 0) { |
757 | ARPT_ENTRY_ITERATE(newinfo->entries, newinfo->size, | 752 | ARPT_ENTRY_ITERATE(entry0, newinfo->size, |
758 | cleanup_entry, &i); | 753 | cleanup_entry, &i); |
759 | return ret; | 754 | return ret; |
760 | } | 755 | } |
761 | 756 | ||
762 | /* And one copy for every other CPU */ | 757 | /* And one copy for every other CPU */ |
763 | for_each_cpu(i) { | 758 | for_each_cpu(i) { |
764 | if (i == 0) | 759 | if (newinfo->entries[i] && newinfo->entries[i] != entry0) |
765 | continue; | 760 | memcpy(newinfo->entries[i], entry0, newinfo->size); |
766 | memcpy(newinfo->entries + SMP_ALIGN(newinfo->size) * i, | ||
767 | newinfo->entries, | ||
768 | SMP_ALIGN(newinfo->size)); | ||
769 | } | 761 | } |
770 | 762 | ||
771 | return ret; | 763 | return ret; |
@@ -807,15 +799,42 @@ static inline int add_entry_to_counter(const struct arpt_entry *e, | |||
807 | return 0; | 799 | return 0; |
808 | } | 800 | } |
809 | 801 | ||
802 | static inline int set_entry_to_counter(const struct arpt_entry *e, | ||
803 | struct arpt_counters total[], | ||
804 | unsigned int *i) | ||
805 | { | ||
806 | SET_COUNTER(total[*i], e->counters.bcnt, e->counters.pcnt); | ||
807 | |||
808 | (*i)++; | ||
809 | return 0; | ||
810 | } | ||
811 | |||
810 | static void get_counters(const struct arpt_table_info *t, | 812 | static void get_counters(const struct arpt_table_info *t, |
811 | struct arpt_counters counters[]) | 813 | struct arpt_counters counters[]) |
812 | { | 814 | { |
813 | unsigned int cpu; | 815 | unsigned int cpu; |
814 | unsigned int i; | 816 | unsigned int i; |
817 | unsigned int curcpu; | ||
818 | |||
819 | /* Instead of clearing (by a previous call to memset()) | ||
820 | * the counters and using adds, we set the counters | ||
821 | * with data used by 'current' CPU | ||
822 | * We dont care about preemption here. | ||
823 | */ | ||
824 | curcpu = raw_smp_processor_id(); | ||
825 | |||
826 | i = 0; | ||
827 | ARPT_ENTRY_ITERATE(t->entries[curcpu], | ||
828 | t->size, | ||
829 | set_entry_to_counter, | ||
830 | counters, | ||
831 | &i); | ||
815 | 832 | ||
816 | for_each_cpu(cpu) { | 833 | for_each_cpu(cpu) { |
834 | if (cpu == curcpu) | ||
835 | continue; | ||
817 | i = 0; | 836 | i = 0; |
818 | ARPT_ENTRY_ITERATE(t->entries + TABLE_OFFSET(t, cpu), | 837 | ARPT_ENTRY_ITERATE(t->entries[cpu], |
819 | t->size, | 838 | t->size, |
820 | add_entry_to_counter, | 839 | add_entry_to_counter, |
821 | counters, | 840 | counters, |
@@ -831,6 +850,7 @@ static int copy_entries_to_user(unsigned int total_size, | |||
831 | struct arpt_entry *e; | 850 | struct arpt_entry *e; |
832 | struct arpt_counters *counters; | 851 | struct arpt_counters *counters; |
833 | int ret = 0; | 852 | int ret = 0; |
853 | void *loc_cpu_entry; | ||
834 | 854 | ||
835 | /* We need atomic snapshot of counters: rest doesn't change | 855 | /* We need atomic snapshot of counters: rest doesn't change |
836 | * (other than comefrom, which userspace doesn't care | 856 | * (other than comefrom, which userspace doesn't care |
@@ -843,13 +863,13 @@ static int copy_entries_to_user(unsigned int total_size, | |||
843 | return -ENOMEM; | 863 | return -ENOMEM; |
844 | 864 | ||
845 | /* First, sum counters... */ | 865 | /* First, sum counters... */ |
846 | memset(counters, 0, countersize); | ||
847 | write_lock_bh(&table->lock); | 866 | write_lock_bh(&table->lock); |
848 | get_counters(table->private, counters); | 867 | get_counters(table->private, counters); |
849 | write_unlock_bh(&table->lock); | 868 | write_unlock_bh(&table->lock); |
850 | 869 | ||
851 | /* ... then copy entire thing from CPU 0... */ | 870 | loc_cpu_entry = table->private->entries[raw_smp_processor_id()]; |
852 | if (copy_to_user(userptr, table->private->entries, total_size) != 0) { | 871 | /* ... then copy entire thing ... */ |
872 | if (copy_to_user(userptr, loc_cpu_entry, total_size) != 0) { | ||
853 | ret = -EFAULT; | 873 | ret = -EFAULT; |
854 | goto free_counters; | 874 | goto free_counters; |
855 | } | 875 | } |
@@ -859,7 +879,7 @@ static int copy_entries_to_user(unsigned int total_size, | |||
859 | for (off = 0, num = 0; off < total_size; off += e->next_offset, num++){ | 879 | for (off = 0, num = 0; off < total_size; off += e->next_offset, num++){ |
860 | struct arpt_entry_target *t; | 880 | struct arpt_entry_target *t; |
861 | 881 | ||
862 | e = (struct arpt_entry *)(table->private->entries + off); | 882 | e = (struct arpt_entry *)(loc_cpu_entry + off); |
863 | if (copy_to_user(userptr + off | 883 | if (copy_to_user(userptr + off |
864 | + offsetof(struct arpt_entry, counters), | 884 | + offsetof(struct arpt_entry, counters), |
865 | &counters[num], | 885 | &counters[num], |
@@ -911,6 +931,47 @@ static int get_entries(const struct arpt_get_entries *entries, | |||
911 | return ret; | 931 | return ret; |
912 | } | 932 | } |
913 | 933 | ||
934 | static void free_table_info(struct arpt_table_info *info) | ||
935 | { | ||
936 | int cpu; | ||
937 | for_each_cpu(cpu) { | ||
938 | if (info->size <= PAGE_SIZE) | ||
939 | kfree(info->entries[cpu]); | ||
940 | else | ||
941 | vfree(info->entries[cpu]); | ||
942 | } | ||
943 | kfree(info); | ||
944 | } | ||
945 | |||
946 | static struct arpt_table_info *alloc_table_info(unsigned int size) | ||
947 | { | ||
948 | struct arpt_table_info *newinfo; | ||
949 | int cpu; | ||
950 | |||
951 | newinfo = kzalloc(sizeof(struct arpt_table_info), GFP_KERNEL); | ||
952 | if (!newinfo) | ||
953 | return NULL; | ||
954 | |||
955 | newinfo->size = size; | ||
956 | |||
957 | for_each_cpu(cpu) { | ||
958 | if (size <= PAGE_SIZE) | ||
959 | newinfo->entries[cpu] = kmalloc_node(size, | ||
960 | GFP_KERNEL, | ||
961 | cpu_to_node(cpu)); | ||
962 | else | ||
963 | newinfo->entries[cpu] = vmalloc_node(size, | ||
964 | cpu_to_node(cpu)); | ||
965 | |||
966 | if (newinfo->entries[cpu] == NULL) { | ||
967 | free_table_info(newinfo); | ||
968 | return NULL; | ||
969 | } | ||
970 | } | ||
971 | |||
972 | return newinfo; | ||
973 | } | ||
974 | |||
914 | static int do_replace(void __user *user, unsigned int len) | 975 | static int do_replace(void __user *user, unsigned int len) |
915 | { | 976 | { |
916 | int ret; | 977 | int ret; |
@@ -918,6 +979,7 @@ static int do_replace(void __user *user, unsigned int len) | |||
918 | struct arpt_table *t; | 979 | struct arpt_table *t; |
919 | struct arpt_table_info *newinfo, *oldinfo; | 980 | struct arpt_table_info *newinfo, *oldinfo; |
920 | struct arpt_counters *counters; | 981 | struct arpt_counters *counters; |
982 | void *loc_cpu_entry, *loc_cpu_old_entry; | ||
921 | 983 | ||
922 | if (copy_from_user(&tmp, user, sizeof(tmp)) != 0) | 984 | if (copy_from_user(&tmp, user, sizeof(tmp)) != 0) |
923 | return -EFAULT; | 985 | return -EFAULT; |
@@ -930,13 +992,13 @@ static int do_replace(void __user *user, unsigned int len) | |||
930 | if ((SMP_ALIGN(tmp.size) >> PAGE_SHIFT) + 2 > num_physpages) | 992 | if ((SMP_ALIGN(tmp.size) >> PAGE_SHIFT) + 2 > num_physpages) |
931 | return -ENOMEM; | 993 | return -ENOMEM; |
932 | 994 | ||
933 | newinfo = vmalloc(sizeof(struct arpt_table_info) | 995 | newinfo = alloc_table_info(tmp.size); |
934 | + SMP_ALIGN(tmp.size) * | ||
935 | (highest_possible_processor_id()+1)); | ||
936 | if (!newinfo) | 996 | if (!newinfo) |
937 | return -ENOMEM; | 997 | return -ENOMEM; |
938 | 998 | ||
939 | if (copy_from_user(newinfo->entries, user + sizeof(tmp), | 999 | /* choose the copy that is on our node/cpu */ |
1000 | loc_cpu_entry = newinfo->entries[raw_smp_processor_id()]; | ||
1001 | if (copy_from_user(loc_cpu_entry, user + sizeof(tmp), | ||
940 | tmp.size) != 0) { | 1002 | tmp.size) != 0) { |
941 | ret = -EFAULT; | 1003 | ret = -EFAULT; |
942 | goto free_newinfo; | 1004 | goto free_newinfo; |
@@ -947,10 +1009,9 @@ static int do_replace(void __user *user, unsigned int len) | |||
947 | ret = -ENOMEM; | 1009 | ret = -ENOMEM; |
948 | goto free_newinfo; | 1010 | goto free_newinfo; |
949 | } | 1011 | } |
950 | memset(counters, 0, tmp.num_counters * sizeof(struct arpt_counters)); | ||
951 | 1012 | ||
952 | ret = translate_table(tmp.name, tmp.valid_hooks, | 1013 | ret = translate_table(tmp.name, tmp.valid_hooks, |
953 | newinfo, tmp.size, tmp.num_entries, | 1014 | newinfo, loc_cpu_entry, tmp.size, tmp.num_entries, |
954 | tmp.hook_entry, tmp.underflow); | 1015 | tmp.hook_entry, tmp.underflow); |
955 | if (ret != 0) | 1016 | if (ret != 0) |
956 | goto free_newinfo_counters; | 1017 | goto free_newinfo_counters; |
@@ -989,8 +1050,10 @@ static int do_replace(void __user *user, unsigned int len) | |||
989 | /* Get the old counters. */ | 1050 | /* Get the old counters. */ |
990 | get_counters(oldinfo, counters); | 1051 | get_counters(oldinfo, counters); |
991 | /* Decrease module usage counts and free resource */ | 1052 | /* Decrease module usage counts and free resource */ |
992 | ARPT_ENTRY_ITERATE(oldinfo->entries, oldinfo->size, cleanup_entry,NULL); | 1053 | loc_cpu_old_entry = oldinfo->entries[raw_smp_processor_id()]; |
993 | vfree(oldinfo); | 1054 | ARPT_ENTRY_ITERATE(loc_cpu_old_entry, oldinfo->size, cleanup_entry,NULL); |
1055 | |||
1056 | free_table_info(oldinfo); | ||
994 | if (copy_to_user(tmp.counters, counters, | 1057 | if (copy_to_user(tmp.counters, counters, |
995 | sizeof(struct arpt_counters) * tmp.num_counters) != 0) | 1058 | sizeof(struct arpt_counters) * tmp.num_counters) != 0) |
996 | ret = -EFAULT; | 1059 | ret = -EFAULT; |
@@ -1002,11 +1065,11 @@ static int do_replace(void __user *user, unsigned int len) | |||
1002 | module_put(t->me); | 1065 | module_put(t->me); |
1003 | up(&arpt_mutex); | 1066 | up(&arpt_mutex); |
1004 | free_newinfo_counters_untrans: | 1067 | free_newinfo_counters_untrans: |
1005 | ARPT_ENTRY_ITERATE(newinfo->entries, newinfo->size, cleanup_entry, NULL); | 1068 | ARPT_ENTRY_ITERATE(loc_cpu_entry, newinfo->size, cleanup_entry, NULL); |
1006 | free_newinfo_counters: | 1069 | free_newinfo_counters: |
1007 | vfree(counters); | 1070 | vfree(counters); |
1008 | free_newinfo: | 1071 | free_newinfo: |
1009 | vfree(newinfo); | 1072 | free_table_info(newinfo); |
1010 | return ret; | 1073 | return ret; |
1011 | } | 1074 | } |
1012 | 1075 | ||
@@ -1030,6 +1093,7 @@ static int do_add_counters(void __user *user, unsigned int len) | |||
1030 | struct arpt_counters_info tmp, *paddc; | 1093 | struct arpt_counters_info tmp, *paddc; |
1031 | struct arpt_table *t; | 1094 | struct arpt_table *t; |
1032 | int ret = 0; | 1095 | int ret = 0; |
1096 | void *loc_cpu_entry; | ||
1033 | 1097 | ||
1034 | if (copy_from_user(&tmp, user, sizeof(tmp)) != 0) | 1098 | if (copy_from_user(&tmp, user, sizeof(tmp)) != 0) |
1035 | return -EFAULT; | 1099 | return -EFAULT; |
@@ -1059,7 +1123,9 @@ static int do_add_counters(void __user *user, unsigned int len) | |||
1059 | } | 1123 | } |
1060 | 1124 | ||
1061 | i = 0; | 1125 | i = 0; |
1062 | ARPT_ENTRY_ITERATE(t->private->entries, | 1126 | /* Choose the copy that is on our node */ |
1127 | loc_cpu_entry = t->private->entries[smp_processor_id()]; | ||
1128 | ARPT_ENTRY_ITERATE(loc_cpu_entry, | ||
1063 | t->private->size, | 1129 | t->private->size, |
1064 | add_counter_to_entry, | 1130 | add_counter_to_entry, |
1065 | paddc->counters, | 1131 | paddc->counters, |
@@ -1220,30 +1286,32 @@ int arpt_register_table(struct arpt_table *table, | |||
1220 | struct arpt_table_info *newinfo; | 1286 | struct arpt_table_info *newinfo; |
1221 | static struct arpt_table_info bootstrap | 1287 | static struct arpt_table_info bootstrap |
1222 | = { 0, 0, 0, { 0 }, { 0 }, { } }; | 1288 | = { 0, 0, 0, { 0 }, { 0 }, { } }; |
1289 | void *loc_cpu_entry; | ||
1223 | 1290 | ||
1224 | newinfo = vmalloc(sizeof(struct arpt_table_info) | 1291 | newinfo = alloc_table_info(repl->size); |
1225 | + SMP_ALIGN(repl->size) * | ||
1226 | (highest_possible_processor_id()+1)); | ||
1227 | if (!newinfo) { | 1292 | if (!newinfo) { |
1228 | ret = -ENOMEM; | 1293 | ret = -ENOMEM; |
1229 | return ret; | 1294 | return ret; |
1230 | } | 1295 | } |
1231 | memcpy(newinfo->entries, repl->entries, repl->size); | 1296 | |
1297 | /* choose the copy on our node/cpu */ | ||
1298 | loc_cpu_entry = newinfo->entries[raw_smp_processor_id()]; | ||
1299 | memcpy(loc_cpu_entry, repl->entries, repl->size); | ||
1232 | 1300 | ||
1233 | ret = translate_table(table->name, table->valid_hooks, | 1301 | ret = translate_table(table->name, table->valid_hooks, |
1234 | newinfo, repl->size, | 1302 | newinfo, loc_cpu_entry, repl->size, |
1235 | repl->num_entries, | 1303 | repl->num_entries, |
1236 | repl->hook_entry, | 1304 | repl->hook_entry, |
1237 | repl->underflow); | 1305 | repl->underflow); |
1238 | duprintf("arpt_register_table: translate table gives %d\n", ret); | 1306 | duprintf("arpt_register_table: translate table gives %d\n", ret); |
1239 | if (ret != 0) { | 1307 | if (ret != 0) { |
1240 | vfree(newinfo); | 1308 | free_table_info(newinfo); |
1241 | return ret; | 1309 | return ret; |
1242 | } | 1310 | } |
1243 | 1311 | ||
1244 | ret = down_interruptible(&arpt_mutex); | 1312 | ret = down_interruptible(&arpt_mutex); |
1245 | if (ret != 0) { | 1313 | if (ret != 0) { |
1246 | vfree(newinfo); | 1314 | free_table_info(newinfo); |
1247 | return ret; | 1315 | return ret; |
1248 | } | 1316 | } |
1249 | 1317 | ||
@@ -1272,20 +1340,23 @@ int arpt_register_table(struct arpt_table *table, | |||
1272 | return ret; | 1340 | return ret; |
1273 | 1341 | ||
1274 | free_unlock: | 1342 | free_unlock: |
1275 | vfree(newinfo); | 1343 | free_table_info(newinfo); |
1276 | goto unlock; | 1344 | goto unlock; |
1277 | } | 1345 | } |
1278 | 1346 | ||
1279 | void arpt_unregister_table(struct arpt_table *table) | 1347 | void arpt_unregister_table(struct arpt_table *table) |
1280 | { | 1348 | { |
1349 | void *loc_cpu_entry; | ||
1350 | |||
1281 | down(&arpt_mutex); | 1351 | down(&arpt_mutex); |
1282 | LIST_DELETE(&arpt_tables, table); | 1352 | LIST_DELETE(&arpt_tables, table); |
1283 | up(&arpt_mutex); | 1353 | up(&arpt_mutex); |
1284 | 1354 | ||
1285 | /* Decrease module usage counts and free resources */ | 1355 | /* Decrease module usage counts and free resources */ |
1286 | ARPT_ENTRY_ITERATE(table->private->entries, table->private->size, | 1356 | loc_cpu_entry = table->private->entries[raw_smp_processor_id()]; |
1357 | ARPT_ENTRY_ITERATE(loc_cpu_entry, table->private->size, | ||
1287 | cleanup_entry, NULL); | 1358 | cleanup_entry, NULL); |
1288 | vfree(table->private); | 1359 | free_table_info(table->private); |
1289 | } | 1360 | } |
1290 | 1361 | ||
1291 | /* The built-in targets: standard (NULL) and error. */ | 1362 | /* The built-in targets: standard (NULL) and error. */ |
diff --git a/net/ipv4/netfilter/ip_conntrack_amanda.c b/net/ipv4/netfilter/ip_conntrack_amanda.c index e52847fa10f5..0366eedb4d70 100644 --- a/net/ipv4/netfilter/ip_conntrack_amanda.c +++ b/net/ipv4/netfilter/ip_conntrack_amanda.c | |||
@@ -18,11 +18,13 @@ | |||
18 | * | 18 | * |
19 | */ | 19 | */ |
20 | 20 | ||
21 | #include <linux/in.h> | ||
21 | #include <linux/kernel.h> | 22 | #include <linux/kernel.h> |
22 | #include <linux/module.h> | 23 | #include <linux/module.h> |
23 | #include <linux/netfilter.h> | 24 | #include <linux/netfilter.h> |
24 | #include <linux/ip.h> | 25 | #include <linux/ip.h> |
25 | #include <linux/moduleparam.h> | 26 | #include <linux/moduleparam.h> |
27 | #include <linux/udp.h> | ||
26 | #include <net/checksum.h> | 28 | #include <net/checksum.h> |
27 | #include <net/udp.h> | 29 | #include <net/udp.h> |
28 | 30 | ||
diff --git a/net/ipv4/netfilter/ip_conntrack_proto_gre.c b/net/ipv4/netfilter/ip_conntrack_proto_gre.c index 744abb9d377a..57956dee60c8 100644 --- a/net/ipv4/netfilter/ip_conntrack_proto_gre.c +++ b/net/ipv4/netfilter/ip_conntrack_proto_gre.c | |||
@@ -31,6 +31,7 @@ | |||
31 | #include <linux/ip.h> | 31 | #include <linux/ip.h> |
32 | #include <linux/in.h> | 32 | #include <linux/in.h> |
33 | #include <linux/list.h> | 33 | #include <linux/list.h> |
34 | #include <linux/seq_file.h> | ||
34 | 35 | ||
35 | static DEFINE_RWLOCK(ip_ct_gre_lock); | 36 | static DEFINE_RWLOCK(ip_ct_gre_lock); |
36 | #define ASSERT_READ_LOCK(x) | 37 | #define ASSERT_READ_LOCK(x) |
diff --git a/net/ipv4/netfilter/ip_conntrack_proto_udp.c b/net/ipv4/netfilter/ip_conntrack_proto_udp.c index f2dcac7c7660..46becbe4fe58 100644 --- a/net/ipv4/netfilter/ip_conntrack_proto_udp.c +++ b/net/ipv4/netfilter/ip_conntrack_proto_udp.c | |||
@@ -11,6 +11,7 @@ | |||
11 | #include <linux/timer.h> | 11 | #include <linux/timer.h> |
12 | #include <linux/netfilter.h> | 12 | #include <linux/netfilter.h> |
13 | #include <linux/in.h> | 13 | #include <linux/in.h> |
14 | #include <linux/ip.h> | ||
14 | #include <linux/udp.h> | 15 | #include <linux/udp.h> |
15 | #include <linux/seq_file.h> | 16 | #include <linux/seq_file.h> |
16 | #include <net/checksum.h> | 17 | #include <net/checksum.h> |
diff --git a/net/ipv4/netfilter/ip_conntrack_standalone.c b/net/ipv4/netfilter/ip_conntrack_standalone.c index dd476b191f4b..a88bcc551244 100644 --- a/net/ipv4/netfilter/ip_conntrack_standalone.c +++ b/net/ipv4/netfilter/ip_conntrack_standalone.c | |||
@@ -27,6 +27,7 @@ | |||
27 | #endif | 27 | #endif |
28 | #include <net/checksum.h> | 28 | #include <net/checksum.h> |
29 | #include <net/ip.h> | 29 | #include <net/ip.h> |
30 | #include <net/route.h> | ||
30 | 31 | ||
31 | #define ASSERT_READ_LOCK(x) | 32 | #define ASSERT_READ_LOCK(x) |
32 | #define ASSERT_WRITE_LOCK(x) | 33 | #define ASSERT_WRITE_LOCK(x) |
diff --git a/net/ipv4/netfilter/ip_nat_snmp_basic.c b/net/ipv4/netfilter/ip_nat_snmp_basic.c index 8acb7ed40b47..4f95d477805c 100644 --- a/net/ipv4/netfilter/ip_nat_snmp_basic.c +++ b/net/ipv4/netfilter/ip_nat_snmp_basic.c | |||
@@ -44,6 +44,7 @@ | |||
44 | * | 44 | * |
45 | */ | 45 | */ |
46 | #include <linux/config.h> | 46 | #include <linux/config.h> |
47 | #include <linux/in.h> | ||
47 | #include <linux/module.h> | 48 | #include <linux/module.h> |
48 | #include <linux/types.h> | 49 | #include <linux/types.h> |
49 | #include <linux/kernel.h> | 50 | #include <linux/kernel.h> |
@@ -53,6 +54,7 @@ | |||
53 | #include <linux/netfilter_ipv4/ip_conntrack_helper.h> | 54 | #include <linux/netfilter_ipv4/ip_conntrack_helper.h> |
54 | #include <linux/netfilter_ipv4/ip_nat_helper.h> | 55 | #include <linux/netfilter_ipv4/ip_nat_helper.h> |
55 | #include <linux/ip.h> | 56 | #include <linux/ip.h> |
57 | #include <linux/udp.h> | ||
56 | #include <net/checksum.h> | 58 | #include <net/checksum.h> |
57 | #include <net/udp.h> | 59 | #include <net/udp.h> |
58 | #include <asm/uaccess.h> | 60 | #include <asm/uaccess.h> |
diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c index 45886c8475e8..2a26d167e149 100644 --- a/net/ipv4/netfilter/ip_tables.c +++ b/net/ipv4/netfilter/ip_tables.c | |||
@@ -83,11 +83,6 @@ static DECLARE_MUTEX(ipt_mutex); | |||
83 | context stops packets coming through and allows user context to read | 83 | context stops packets coming through and allows user context to read |
84 | the counters or update the rules. | 84 | the counters or update the rules. |
85 | 85 | ||
86 | To be cache friendly on SMP, we arrange them like so: | ||
87 | [ n-entries ] | ||
88 | ... cache-align padding ... | ||
89 | [ n-entries ] | ||
90 | |||
91 | Hence the start of any table is given by get_table() below. */ | 86 | Hence the start of any table is given by get_table() below. */ |
92 | 87 | ||
93 | /* The table itself */ | 88 | /* The table itself */ |
@@ -105,20 +100,15 @@ struct ipt_table_info | |||
105 | unsigned int underflow[NF_IP_NUMHOOKS]; | 100 | unsigned int underflow[NF_IP_NUMHOOKS]; |
106 | 101 | ||
107 | /* ipt_entry tables: one per CPU */ | 102 | /* ipt_entry tables: one per CPU */ |
108 | char entries[0] ____cacheline_aligned; | 103 | void *entries[NR_CPUS]; |
109 | }; | 104 | }; |
110 | 105 | ||
111 | static LIST_HEAD(ipt_target); | 106 | static LIST_HEAD(ipt_target); |
112 | static LIST_HEAD(ipt_match); | 107 | static LIST_HEAD(ipt_match); |
113 | static LIST_HEAD(ipt_tables); | 108 | static LIST_HEAD(ipt_tables); |
109 | #define SET_COUNTER(c,b,p) do { (c).bcnt = (b); (c).pcnt = (p); } while(0) | ||
114 | #define ADD_COUNTER(c,b,p) do { (c).bcnt += (b); (c).pcnt += (p); } while(0) | 110 | #define ADD_COUNTER(c,b,p) do { (c).bcnt += (b); (c).pcnt += (p); } while(0) |
115 | 111 | ||
116 | #ifdef CONFIG_SMP | ||
117 | #define TABLE_OFFSET(t,p) (SMP_ALIGN((t)->size)*(p)) | ||
118 | #else | ||
119 | #define TABLE_OFFSET(t,p) 0 | ||
120 | #endif | ||
121 | |||
122 | #if 0 | 112 | #if 0 |
123 | #define down(x) do { printk("DOWN:%u:" #x "\n", __LINE__); down(x); } while(0) | 113 | #define down(x) do { printk("DOWN:%u:" #x "\n", __LINE__); down(x); } while(0) |
124 | #define down_interruptible(x) ({ int __r; printk("DOWNi:%u:" #x "\n", __LINE__); __r = down_interruptible(x); if (__r != 0) printk("ABORT-DOWNi:%u\n", __LINE__); __r; }) | 114 | #define down_interruptible(x) ({ int __r; printk("DOWNi:%u:" #x "\n", __LINE__); __r = down_interruptible(x); if (__r != 0) printk("ABORT-DOWNi:%u\n", __LINE__); __r; }) |
@@ -290,8 +280,7 @@ ipt_do_table(struct sk_buff **pskb, | |||
290 | 280 | ||
291 | read_lock_bh(&table->lock); | 281 | read_lock_bh(&table->lock); |
292 | IP_NF_ASSERT(table->valid_hooks & (1 << hook)); | 282 | IP_NF_ASSERT(table->valid_hooks & (1 << hook)); |
293 | table_base = (void *)table->private->entries | 283 | table_base = (void *)table->private->entries[smp_processor_id()]; |
294 | + TABLE_OFFSET(table->private, smp_processor_id()); | ||
295 | e = get_entry(table_base, table->private->hook_entry[hook]); | 284 | e = get_entry(table_base, table->private->hook_entry[hook]); |
296 | 285 | ||
297 | #ifdef CONFIG_NETFILTER_DEBUG | 286 | #ifdef CONFIG_NETFILTER_DEBUG |
@@ -563,7 +552,8 @@ unconditional(const struct ipt_ip *ip) | |||
563 | /* Figures out from what hook each rule can be called: returns 0 if | 552 | /* Figures out from what hook each rule can be called: returns 0 if |
564 | there are loops. Puts hook bitmask in comefrom. */ | 553 | there are loops. Puts hook bitmask in comefrom. */ |
565 | static int | 554 | static int |
566 | mark_source_chains(struct ipt_table_info *newinfo, unsigned int valid_hooks) | 555 | mark_source_chains(struct ipt_table_info *newinfo, |
556 | unsigned int valid_hooks, void *entry0) | ||
567 | { | 557 | { |
568 | unsigned int hook; | 558 | unsigned int hook; |
569 | 559 | ||
@@ -572,7 +562,7 @@ mark_source_chains(struct ipt_table_info *newinfo, unsigned int valid_hooks) | |||
572 | for (hook = 0; hook < NF_IP_NUMHOOKS; hook++) { | 562 | for (hook = 0; hook < NF_IP_NUMHOOKS; hook++) { |
573 | unsigned int pos = newinfo->hook_entry[hook]; | 563 | unsigned int pos = newinfo->hook_entry[hook]; |
574 | struct ipt_entry *e | 564 | struct ipt_entry *e |
575 | = (struct ipt_entry *)(newinfo->entries + pos); | 565 | = (struct ipt_entry *)(entry0 + pos); |
576 | 566 | ||
577 | if (!(valid_hooks & (1 << hook))) | 567 | if (!(valid_hooks & (1 << hook))) |
578 | continue; | 568 | continue; |
@@ -622,13 +612,13 @@ mark_source_chains(struct ipt_table_info *newinfo, unsigned int valid_hooks) | |||
622 | goto next; | 612 | goto next; |
623 | 613 | ||
624 | e = (struct ipt_entry *) | 614 | e = (struct ipt_entry *) |
625 | (newinfo->entries + pos); | 615 | (entry0 + pos); |
626 | } while (oldpos == pos + e->next_offset); | 616 | } while (oldpos == pos + e->next_offset); |
627 | 617 | ||
628 | /* Move along one */ | 618 | /* Move along one */ |
629 | size = e->next_offset; | 619 | size = e->next_offset; |
630 | e = (struct ipt_entry *) | 620 | e = (struct ipt_entry *) |
631 | (newinfo->entries + pos + size); | 621 | (entry0 + pos + size); |
632 | e->counters.pcnt = pos; | 622 | e->counters.pcnt = pos; |
633 | pos += size; | 623 | pos += size; |
634 | } else { | 624 | } else { |
@@ -645,7 +635,7 @@ mark_source_chains(struct ipt_table_info *newinfo, unsigned int valid_hooks) | |||
645 | newpos = pos + e->next_offset; | 635 | newpos = pos + e->next_offset; |
646 | } | 636 | } |
647 | e = (struct ipt_entry *) | 637 | e = (struct ipt_entry *) |
648 | (newinfo->entries + newpos); | 638 | (entry0 + newpos); |
649 | e->counters.pcnt = pos; | 639 | e->counters.pcnt = pos; |
650 | pos = newpos; | 640 | pos = newpos; |
651 | } | 641 | } |
@@ -855,6 +845,7 @@ static int | |||
855 | translate_table(const char *name, | 845 | translate_table(const char *name, |
856 | unsigned int valid_hooks, | 846 | unsigned int valid_hooks, |
857 | struct ipt_table_info *newinfo, | 847 | struct ipt_table_info *newinfo, |
848 | void *entry0, | ||
858 | unsigned int size, | 849 | unsigned int size, |
859 | unsigned int number, | 850 | unsigned int number, |
860 | const unsigned int *hook_entries, | 851 | const unsigned int *hook_entries, |
@@ -875,11 +866,11 @@ translate_table(const char *name, | |||
875 | duprintf("translate_table: size %u\n", newinfo->size); | 866 | duprintf("translate_table: size %u\n", newinfo->size); |
876 | i = 0; | 867 | i = 0; |
877 | /* Walk through entries, checking offsets. */ | 868 | /* Walk through entries, checking offsets. */ |
878 | ret = IPT_ENTRY_ITERATE(newinfo->entries, newinfo->size, | 869 | ret = IPT_ENTRY_ITERATE(entry0, newinfo->size, |
879 | check_entry_size_and_hooks, | 870 | check_entry_size_and_hooks, |
880 | newinfo, | 871 | newinfo, |
881 | newinfo->entries, | 872 | entry0, |
882 | newinfo->entries + size, | 873 | entry0 + size, |
883 | hook_entries, underflows, &i); | 874 | hook_entries, underflows, &i); |
884 | if (ret != 0) | 875 | if (ret != 0) |
885 | return ret; | 876 | return ret; |
@@ -907,27 +898,24 @@ translate_table(const char *name, | |||
907 | } | 898 | } |
908 | } | 899 | } |
909 | 900 | ||
910 | if (!mark_source_chains(newinfo, valid_hooks)) | 901 | if (!mark_source_chains(newinfo, valid_hooks, entry0)) |
911 | return -ELOOP; | 902 | return -ELOOP; |
912 | 903 | ||
913 | /* Finally, each sanity check must pass */ | 904 | /* Finally, each sanity check must pass */ |
914 | i = 0; | 905 | i = 0; |
915 | ret = IPT_ENTRY_ITERATE(newinfo->entries, newinfo->size, | 906 | ret = IPT_ENTRY_ITERATE(entry0, newinfo->size, |
916 | check_entry, name, size, &i); | 907 | check_entry, name, size, &i); |
917 | 908 | ||
918 | if (ret != 0) { | 909 | if (ret != 0) { |
919 | IPT_ENTRY_ITERATE(newinfo->entries, newinfo->size, | 910 | IPT_ENTRY_ITERATE(entry0, newinfo->size, |
920 | cleanup_entry, &i); | 911 | cleanup_entry, &i); |
921 | return ret; | 912 | return ret; |
922 | } | 913 | } |
923 | 914 | ||
924 | /* And one copy for every other CPU */ | 915 | /* And one copy for every other CPU */ |
925 | for_each_cpu(i) { | 916 | for_each_cpu(i) { |
926 | if (i == 0) | 917 | if (newinfo->entries[i] && newinfo->entries[i] != entry0) |
927 | continue; | 918 | memcpy(newinfo->entries[i], entry0, newinfo->size); |
928 | memcpy(newinfo->entries + SMP_ALIGN(newinfo->size) * i, | ||
929 | newinfo->entries, | ||
930 | SMP_ALIGN(newinfo->size)); | ||
931 | } | 919 | } |
932 | 920 | ||
933 | return ret; | 921 | return ret; |
@@ -943,15 +931,12 @@ replace_table(struct ipt_table *table, | |||
943 | 931 | ||
944 | #ifdef CONFIG_NETFILTER_DEBUG | 932 | #ifdef CONFIG_NETFILTER_DEBUG |
945 | { | 933 | { |
946 | struct ipt_entry *table_base; | 934 | int cpu; |
947 | unsigned int i; | ||
948 | 935 | ||
949 | for_each_cpu(i) { | 936 | for_each_cpu(cpu) { |
950 | table_base = | 937 | struct ipt_entry *table_base = newinfo->entries[cpu]; |
951 | (void *)newinfo->entries | 938 | if (table_base) |
952 | + TABLE_OFFSET(newinfo, i); | 939 | table_base->comefrom = 0xdead57ac; |
953 | |||
954 | table_base->comefrom = 0xdead57ac; | ||
955 | } | 940 | } |
956 | } | 941 | } |
957 | #endif | 942 | #endif |
@@ -986,16 +971,44 @@ add_entry_to_counter(const struct ipt_entry *e, | |||
986 | return 0; | 971 | return 0; |
987 | } | 972 | } |
988 | 973 | ||
974 | static inline int | ||
975 | set_entry_to_counter(const struct ipt_entry *e, | ||
976 | struct ipt_counters total[], | ||
977 | unsigned int *i) | ||
978 | { | ||
979 | SET_COUNTER(total[*i], e->counters.bcnt, e->counters.pcnt); | ||
980 | |||
981 | (*i)++; | ||
982 | return 0; | ||
983 | } | ||
984 | |||
989 | static void | 985 | static void |
990 | get_counters(const struct ipt_table_info *t, | 986 | get_counters(const struct ipt_table_info *t, |
991 | struct ipt_counters counters[]) | 987 | struct ipt_counters counters[]) |
992 | { | 988 | { |
993 | unsigned int cpu; | 989 | unsigned int cpu; |
994 | unsigned int i; | 990 | unsigned int i; |
991 | unsigned int curcpu; | ||
992 | |||
993 | /* Instead of clearing (by a previous call to memset()) | ||
994 | * the counters and using adds, we set the counters | ||
995 | * with data used by 'current' CPU | ||
996 | * We dont care about preemption here. | ||
997 | */ | ||
998 | curcpu = raw_smp_processor_id(); | ||
999 | |||
1000 | i = 0; | ||
1001 | IPT_ENTRY_ITERATE(t->entries[curcpu], | ||
1002 | t->size, | ||
1003 | set_entry_to_counter, | ||
1004 | counters, | ||
1005 | &i); | ||
995 | 1006 | ||
996 | for_each_cpu(cpu) { | 1007 | for_each_cpu(cpu) { |
1008 | if (cpu == curcpu) | ||
1009 | continue; | ||
997 | i = 0; | 1010 | i = 0; |
998 | IPT_ENTRY_ITERATE(t->entries + TABLE_OFFSET(t, cpu), | 1011 | IPT_ENTRY_ITERATE(t->entries[cpu], |
999 | t->size, | 1012 | t->size, |
1000 | add_entry_to_counter, | 1013 | add_entry_to_counter, |
1001 | counters, | 1014 | counters, |
@@ -1012,24 +1025,29 @@ copy_entries_to_user(unsigned int total_size, | |||
1012 | struct ipt_entry *e; | 1025 | struct ipt_entry *e; |
1013 | struct ipt_counters *counters; | 1026 | struct ipt_counters *counters; |
1014 | int ret = 0; | 1027 | int ret = 0; |
1028 | void *loc_cpu_entry; | ||
1015 | 1029 | ||
1016 | /* We need atomic snapshot of counters: rest doesn't change | 1030 | /* We need atomic snapshot of counters: rest doesn't change |
1017 | (other than comefrom, which userspace doesn't care | 1031 | (other than comefrom, which userspace doesn't care |
1018 | about). */ | 1032 | about). */ |
1019 | countersize = sizeof(struct ipt_counters) * table->private->number; | 1033 | countersize = sizeof(struct ipt_counters) * table->private->number; |
1020 | counters = vmalloc(countersize); | 1034 | counters = vmalloc_node(countersize, numa_node_id()); |
1021 | 1035 | ||
1022 | if (counters == NULL) | 1036 | if (counters == NULL) |
1023 | return -ENOMEM; | 1037 | return -ENOMEM; |
1024 | 1038 | ||
1025 | /* First, sum counters... */ | 1039 | /* First, sum counters... */ |
1026 | memset(counters, 0, countersize); | ||
1027 | write_lock_bh(&table->lock); | 1040 | write_lock_bh(&table->lock); |
1028 | get_counters(table->private, counters); | 1041 | get_counters(table->private, counters); |
1029 | write_unlock_bh(&table->lock); | 1042 | write_unlock_bh(&table->lock); |
1030 | 1043 | ||
1031 | /* ... then copy entire thing from CPU 0... */ | 1044 | /* choose the copy that is on our node/cpu, ... |
1032 | if (copy_to_user(userptr, table->private->entries, total_size) != 0) { | 1045 | * This choice is lazy (because current thread is |
1046 | * allowed to migrate to another cpu) | ||
1047 | */ | ||
1048 | loc_cpu_entry = table->private->entries[raw_smp_processor_id()]; | ||
1049 | /* ... then copy entire thing ... */ | ||
1050 | if (copy_to_user(userptr, loc_cpu_entry, total_size) != 0) { | ||
1033 | ret = -EFAULT; | 1051 | ret = -EFAULT; |
1034 | goto free_counters; | 1052 | goto free_counters; |
1035 | } | 1053 | } |
@@ -1041,7 +1059,7 @@ copy_entries_to_user(unsigned int total_size, | |||
1041 | struct ipt_entry_match *m; | 1059 | struct ipt_entry_match *m; |
1042 | struct ipt_entry_target *t; | 1060 | struct ipt_entry_target *t; |
1043 | 1061 | ||
1044 | e = (struct ipt_entry *)(table->private->entries + off); | 1062 | e = (struct ipt_entry *)(loc_cpu_entry + off); |
1045 | if (copy_to_user(userptr + off | 1063 | if (copy_to_user(userptr + off |
1046 | + offsetof(struct ipt_entry, counters), | 1064 | + offsetof(struct ipt_entry, counters), |
1047 | &counters[num], | 1065 | &counters[num], |
@@ -1110,6 +1128,45 @@ get_entries(const struct ipt_get_entries *entries, | |||
1110 | return ret; | 1128 | return ret; |
1111 | } | 1129 | } |
1112 | 1130 | ||
1131 | static void free_table_info(struct ipt_table_info *info) | ||
1132 | { | ||
1133 | int cpu; | ||
1134 | for_each_cpu(cpu) { | ||
1135 | if (info->size <= PAGE_SIZE) | ||
1136 | kfree(info->entries[cpu]); | ||
1137 | else | ||
1138 | vfree(info->entries[cpu]); | ||
1139 | } | ||
1140 | kfree(info); | ||
1141 | } | ||
1142 | |||
1143 | static struct ipt_table_info *alloc_table_info(unsigned int size) | ||
1144 | { | ||
1145 | struct ipt_table_info *newinfo; | ||
1146 | int cpu; | ||
1147 | |||
1148 | newinfo = kzalloc(sizeof(struct ipt_table_info), GFP_KERNEL); | ||
1149 | if (!newinfo) | ||
1150 | return NULL; | ||
1151 | |||
1152 | newinfo->size = size; | ||
1153 | |||
1154 | for_each_cpu(cpu) { | ||
1155 | if (size <= PAGE_SIZE) | ||
1156 | newinfo->entries[cpu] = kmalloc_node(size, | ||
1157 | GFP_KERNEL, | ||
1158 | cpu_to_node(cpu)); | ||
1159 | else | ||
1160 | newinfo->entries[cpu] = vmalloc_node(size, cpu_to_node(cpu)); | ||
1161 | if (newinfo->entries[cpu] == 0) { | ||
1162 | free_table_info(newinfo); | ||
1163 | return NULL; | ||
1164 | } | ||
1165 | } | ||
1166 | |||
1167 | return newinfo; | ||
1168 | } | ||
1169 | |||
1113 | static int | 1170 | static int |
1114 | do_replace(void __user *user, unsigned int len) | 1171 | do_replace(void __user *user, unsigned int len) |
1115 | { | 1172 | { |
@@ -1118,6 +1175,7 @@ do_replace(void __user *user, unsigned int len) | |||
1118 | struct ipt_table *t; | 1175 | struct ipt_table *t; |
1119 | struct ipt_table_info *newinfo, *oldinfo; | 1176 | struct ipt_table_info *newinfo, *oldinfo; |
1120 | struct ipt_counters *counters; | 1177 | struct ipt_counters *counters; |
1178 | void *loc_cpu_entry, *loc_cpu_old_entry; | ||
1121 | 1179 | ||
1122 | if (copy_from_user(&tmp, user, sizeof(tmp)) != 0) | 1180 | if (copy_from_user(&tmp, user, sizeof(tmp)) != 0) |
1123 | return -EFAULT; | 1181 | return -EFAULT; |
@@ -1130,13 +1188,13 @@ do_replace(void __user *user, unsigned int len) | |||
1130 | if ((SMP_ALIGN(tmp.size) >> PAGE_SHIFT) + 2 > num_physpages) | 1188 | if ((SMP_ALIGN(tmp.size) >> PAGE_SHIFT) + 2 > num_physpages) |
1131 | return -ENOMEM; | 1189 | return -ENOMEM; |
1132 | 1190 | ||
1133 | newinfo = vmalloc(sizeof(struct ipt_table_info) | 1191 | newinfo = alloc_table_info(tmp.size); |
1134 | + SMP_ALIGN(tmp.size) * | ||
1135 | (highest_possible_processor_id()+1)); | ||
1136 | if (!newinfo) | 1192 | if (!newinfo) |
1137 | return -ENOMEM; | 1193 | return -ENOMEM; |
1138 | 1194 | ||
1139 | if (copy_from_user(newinfo->entries, user + sizeof(tmp), | 1195 | /* choose the copy that is our node/cpu */ |
1196 | loc_cpu_entry = newinfo->entries[raw_smp_processor_id()]; | ||
1197 | if (copy_from_user(loc_cpu_entry, user + sizeof(tmp), | ||
1140 | tmp.size) != 0) { | 1198 | tmp.size) != 0) { |
1141 | ret = -EFAULT; | 1199 | ret = -EFAULT; |
1142 | goto free_newinfo; | 1200 | goto free_newinfo; |
@@ -1147,10 +1205,9 @@ do_replace(void __user *user, unsigned int len) | |||
1147 | ret = -ENOMEM; | 1205 | ret = -ENOMEM; |
1148 | goto free_newinfo; | 1206 | goto free_newinfo; |
1149 | } | 1207 | } |
1150 | memset(counters, 0, tmp.num_counters * sizeof(struct ipt_counters)); | ||
1151 | 1208 | ||
1152 | ret = translate_table(tmp.name, tmp.valid_hooks, | 1209 | ret = translate_table(tmp.name, tmp.valid_hooks, |
1153 | newinfo, tmp.size, tmp.num_entries, | 1210 | newinfo, loc_cpu_entry, tmp.size, tmp.num_entries, |
1154 | tmp.hook_entry, tmp.underflow); | 1211 | tmp.hook_entry, tmp.underflow); |
1155 | if (ret != 0) | 1212 | if (ret != 0) |
1156 | goto free_newinfo_counters; | 1213 | goto free_newinfo_counters; |
@@ -1189,8 +1246,9 @@ do_replace(void __user *user, unsigned int len) | |||
1189 | /* Get the old counters. */ | 1246 | /* Get the old counters. */ |
1190 | get_counters(oldinfo, counters); | 1247 | get_counters(oldinfo, counters); |
1191 | /* Decrease module usage counts and free resource */ | 1248 | /* Decrease module usage counts and free resource */ |
1192 | IPT_ENTRY_ITERATE(oldinfo->entries, oldinfo->size, cleanup_entry,NULL); | 1249 | loc_cpu_old_entry = oldinfo->entries[raw_smp_processor_id()]; |
1193 | vfree(oldinfo); | 1250 | IPT_ENTRY_ITERATE(loc_cpu_old_entry, oldinfo->size, cleanup_entry,NULL); |
1251 | free_table_info(oldinfo); | ||
1194 | if (copy_to_user(tmp.counters, counters, | 1252 | if (copy_to_user(tmp.counters, counters, |
1195 | sizeof(struct ipt_counters) * tmp.num_counters) != 0) | 1253 | sizeof(struct ipt_counters) * tmp.num_counters) != 0) |
1196 | ret = -EFAULT; | 1254 | ret = -EFAULT; |
@@ -1202,11 +1260,11 @@ do_replace(void __user *user, unsigned int len) | |||
1202 | module_put(t->me); | 1260 | module_put(t->me); |
1203 | up(&ipt_mutex); | 1261 | up(&ipt_mutex); |
1204 | free_newinfo_counters_untrans: | 1262 | free_newinfo_counters_untrans: |
1205 | IPT_ENTRY_ITERATE(newinfo->entries, newinfo->size, cleanup_entry,NULL); | 1263 | IPT_ENTRY_ITERATE(loc_cpu_entry, newinfo->size, cleanup_entry,NULL); |
1206 | free_newinfo_counters: | 1264 | free_newinfo_counters: |
1207 | vfree(counters); | 1265 | vfree(counters); |
1208 | free_newinfo: | 1266 | free_newinfo: |
1209 | vfree(newinfo); | 1267 | free_table_info(newinfo); |
1210 | return ret; | 1268 | return ret; |
1211 | } | 1269 | } |
1212 | 1270 | ||
@@ -1239,6 +1297,7 @@ do_add_counters(void __user *user, unsigned int len) | |||
1239 | struct ipt_counters_info tmp, *paddc; | 1297 | struct ipt_counters_info tmp, *paddc; |
1240 | struct ipt_table *t; | 1298 | struct ipt_table *t; |
1241 | int ret = 0; | 1299 | int ret = 0; |
1300 | void *loc_cpu_entry; | ||
1242 | 1301 | ||
1243 | if (copy_from_user(&tmp, user, sizeof(tmp)) != 0) | 1302 | if (copy_from_user(&tmp, user, sizeof(tmp)) != 0) |
1244 | return -EFAULT; | 1303 | return -EFAULT; |
@@ -1246,7 +1305,7 @@ do_add_counters(void __user *user, unsigned int len) | |||
1246 | if (len != sizeof(tmp) + tmp.num_counters*sizeof(struct ipt_counters)) | 1305 | if (len != sizeof(tmp) + tmp.num_counters*sizeof(struct ipt_counters)) |
1247 | return -EINVAL; | 1306 | return -EINVAL; |
1248 | 1307 | ||
1249 | paddc = vmalloc(len); | 1308 | paddc = vmalloc_node(len, numa_node_id()); |
1250 | if (!paddc) | 1309 | if (!paddc) |
1251 | return -ENOMEM; | 1310 | return -ENOMEM; |
1252 | 1311 | ||
@@ -1268,7 +1327,9 @@ do_add_counters(void __user *user, unsigned int len) | |||
1268 | } | 1327 | } |
1269 | 1328 | ||
1270 | i = 0; | 1329 | i = 0; |
1271 | IPT_ENTRY_ITERATE(t->private->entries, | 1330 | /* Choose the copy that is on our node */ |
1331 | loc_cpu_entry = t->private->entries[raw_smp_processor_id()]; | ||
1332 | IPT_ENTRY_ITERATE(loc_cpu_entry, | ||
1272 | t->private->size, | 1333 | t->private->size, |
1273 | add_counter_to_entry, | 1334 | add_counter_to_entry, |
1274 | paddc->counters, | 1335 | paddc->counters, |
@@ -1460,28 +1521,31 @@ int ipt_register_table(struct ipt_table *table, const struct ipt_replace *repl) | |||
1460 | struct ipt_table_info *newinfo; | 1521 | struct ipt_table_info *newinfo; |
1461 | static struct ipt_table_info bootstrap | 1522 | static struct ipt_table_info bootstrap |
1462 | = { 0, 0, 0, { 0 }, { 0 }, { } }; | 1523 | = { 0, 0, 0, { 0 }, { 0 }, { } }; |
1524 | void *loc_cpu_entry; | ||
1463 | 1525 | ||
1464 | newinfo = vmalloc(sizeof(struct ipt_table_info) | 1526 | newinfo = alloc_table_info(repl->size); |
1465 | + SMP_ALIGN(repl->size) * | ||
1466 | (highest_possible_processor_id()+1)); | ||
1467 | if (!newinfo) | 1527 | if (!newinfo) |
1468 | return -ENOMEM; | 1528 | return -ENOMEM; |
1469 | 1529 | ||
1470 | memcpy(newinfo->entries, repl->entries, repl->size); | 1530 | /* choose the copy on our node/cpu |
1531 | * but dont care of preemption | ||
1532 | */ | ||
1533 | loc_cpu_entry = newinfo->entries[raw_smp_processor_id()]; | ||
1534 | memcpy(loc_cpu_entry, repl->entries, repl->size); | ||
1471 | 1535 | ||
1472 | ret = translate_table(table->name, table->valid_hooks, | 1536 | ret = translate_table(table->name, table->valid_hooks, |
1473 | newinfo, repl->size, | 1537 | newinfo, loc_cpu_entry, repl->size, |
1474 | repl->num_entries, | 1538 | repl->num_entries, |
1475 | repl->hook_entry, | 1539 | repl->hook_entry, |
1476 | repl->underflow); | 1540 | repl->underflow); |
1477 | if (ret != 0) { | 1541 | if (ret != 0) { |
1478 | vfree(newinfo); | 1542 | free_table_info(newinfo); |
1479 | return ret; | 1543 | return ret; |
1480 | } | 1544 | } |
1481 | 1545 | ||
1482 | ret = down_interruptible(&ipt_mutex); | 1546 | ret = down_interruptible(&ipt_mutex); |
1483 | if (ret != 0) { | 1547 | if (ret != 0) { |
1484 | vfree(newinfo); | 1548 | free_table_info(newinfo); |
1485 | return ret; | 1549 | return ret; |
1486 | } | 1550 | } |
1487 | 1551 | ||
@@ -1510,20 +1574,23 @@ int ipt_register_table(struct ipt_table *table, const struct ipt_replace *repl) | |||
1510 | return ret; | 1574 | return ret; |
1511 | 1575 | ||
1512 | free_unlock: | 1576 | free_unlock: |
1513 | vfree(newinfo); | 1577 | free_table_info(newinfo); |
1514 | goto unlock; | 1578 | goto unlock; |
1515 | } | 1579 | } |
1516 | 1580 | ||
1517 | void ipt_unregister_table(struct ipt_table *table) | 1581 | void ipt_unregister_table(struct ipt_table *table) |
1518 | { | 1582 | { |
1583 | void *loc_cpu_entry; | ||
1584 | |||
1519 | down(&ipt_mutex); | 1585 | down(&ipt_mutex); |
1520 | LIST_DELETE(&ipt_tables, table); | 1586 | LIST_DELETE(&ipt_tables, table); |
1521 | up(&ipt_mutex); | 1587 | up(&ipt_mutex); |
1522 | 1588 | ||
1523 | /* Decrease module usage counts and free resources */ | 1589 | /* Decrease module usage counts and free resources */ |
1524 | IPT_ENTRY_ITERATE(table->private->entries, table->private->size, | 1590 | loc_cpu_entry = table->private->entries[raw_smp_processor_id()]; |
1591 | IPT_ENTRY_ITERATE(loc_cpu_entry, table->private->size, | ||
1525 | cleanup_entry, NULL); | 1592 | cleanup_entry, NULL); |
1526 | vfree(table->private); | 1593 | free_table_info(table->private); |
1527 | } | 1594 | } |
1528 | 1595 | ||
1529 | /* Returns 1 if the port is matched by the range, 0 otherwise */ | 1596 | /* Returns 1 if the port is matched by the range, 0 otherwise */ |
diff --git a/net/ipv4/netfilter/ipt_MASQUERADE.c b/net/ipv4/netfilter/ipt_MASQUERADE.c index 275a174c6fe6..27860510ca6d 100644 --- a/net/ipv4/netfilter/ipt_MASQUERADE.c +++ b/net/ipv4/netfilter/ipt_MASQUERADE.c | |||
@@ -11,6 +11,7 @@ | |||
11 | 11 | ||
12 | #include <linux/config.h> | 12 | #include <linux/config.h> |
13 | #include <linux/types.h> | 13 | #include <linux/types.h> |
14 | #include <linux/inetdevice.h> | ||
14 | #include <linux/ip.h> | 15 | #include <linux/ip.h> |
15 | #include <linux/timer.h> | 16 | #include <linux/timer.h> |
16 | #include <linux/module.h> | 17 | #include <linux/module.h> |
@@ -18,6 +19,7 @@ | |||
18 | #include <net/protocol.h> | 19 | #include <net/protocol.h> |
19 | #include <net/ip.h> | 20 | #include <net/ip.h> |
20 | #include <net/checksum.h> | 21 | #include <net/checksum.h> |
22 | #include <net/route.h> | ||
21 | #include <linux/netfilter_ipv4.h> | 23 | #include <linux/netfilter_ipv4.h> |
22 | #include <linux/netfilter_ipv4/ip_nat_rule.h> | 24 | #include <linux/netfilter_ipv4/ip_nat_rule.h> |
23 | #include <linux/netfilter_ipv4/ip_tables.h> | 25 | #include <linux/netfilter_ipv4/ip_tables.h> |
diff --git a/net/ipv4/netfilter/ipt_physdev.c b/net/ipv4/netfilter/ipt_physdev.c index 1a53924041fc..03f554857a4d 100644 --- a/net/ipv4/netfilter/ipt_physdev.c +++ b/net/ipv4/netfilter/ipt_physdev.c | |||
@@ -9,6 +9,7 @@ | |||
9 | */ | 9 | */ |
10 | 10 | ||
11 | #include <linux/module.h> | 11 | #include <linux/module.h> |
12 | #include <linux/netdevice.h> | ||
12 | #include <linux/skbuff.h> | 13 | #include <linux/skbuff.h> |
13 | #include <linux/netfilter_ipv4/ipt_physdev.h> | 14 | #include <linux/netfilter_ipv4/ipt_physdev.h> |
14 | #include <linux/netfilter_ipv4/ip_tables.h> | 15 | #include <linux/netfilter_ipv4/ip_tables.h> |
diff --git a/net/ipv4/proc.c b/net/ipv4/proc.c index 0d7dc668db46..39d49dc333a7 100644 --- a/net/ipv4/proc.c +++ b/net/ipv4/proc.c | |||
@@ -38,6 +38,7 @@ | |||
38 | #include <net/protocol.h> | 38 | #include <net/protocol.h> |
39 | #include <net/tcp.h> | 39 | #include <net/tcp.h> |
40 | #include <net/udp.h> | 40 | #include <net/udp.h> |
41 | #include <linux/inetdevice.h> | ||
41 | #include <linux/proc_fs.h> | 42 | #include <linux/proc_fs.h> |
42 | #include <linux/seq_file.h> | 43 | #include <linux/seq_file.h> |
43 | #include <net/sock.h> | 44 | #include <net/sock.h> |
diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c index a34e60ea48a1..e20be3331f67 100644 --- a/net/ipv4/syncookies.c +++ b/net/ipv4/syncookies.c | |||
@@ -173,10 +173,10 @@ static inline struct sock *get_cookie_sock(struct sock *sk, struct sk_buff *skb, | |||
173 | struct request_sock *req, | 173 | struct request_sock *req, |
174 | struct dst_entry *dst) | 174 | struct dst_entry *dst) |
175 | { | 175 | { |
176 | struct tcp_sock *tp = tcp_sk(sk); | 176 | struct inet_connection_sock *icsk = inet_csk(sk); |
177 | struct sock *child; | 177 | struct sock *child; |
178 | 178 | ||
179 | child = tp->af_specific->syn_recv_sock(sk, skb, req, dst); | 179 | child = icsk->icsk_af_ops->syn_recv_sock(sk, skb, req, dst); |
180 | if (child) | 180 | if (child) |
181 | inet_csk_reqsk_queue_add(sk, req, child); | 181 | inet_csk_reqsk_queue_add(sk, req, child); |
182 | else | 182 | else |
diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c index 01444a02b48b..16984d4a8a06 100644 --- a/net/ipv4/sysctl_net_ipv4.c +++ b/net/ipv4/sysctl_net_ipv4.c | |||
@@ -12,6 +12,7 @@ | |||
12 | #include <linux/sysctl.h> | 12 | #include <linux/sysctl.h> |
13 | #include <linux/config.h> | 13 | #include <linux/config.h> |
14 | #include <linux/igmp.h> | 14 | #include <linux/igmp.h> |
15 | #include <linux/inetdevice.h> | ||
15 | #include <net/snmp.h> | 16 | #include <net/snmp.h> |
16 | #include <net/icmp.h> | 17 | #include <net/icmp.h> |
17 | #include <net/ip.h> | 18 | #include <net/ip.h> |
@@ -22,6 +23,7 @@ | |||
22 | extern int sysctl_ip_nonlocal_bind; | 23 | extern int sysctl_ip_nonlocal_bind; |
23 | 24 | ||
24 | #ifdef CONFIG_SYSCTL | 25 | #ifdef CONFIG_SYSCTL |
26 | static int zero; | ||
25 | static int tcp_retr1_max = 255; | 27 | static int tcp_retr1_max = 255; |
26 | static int ip_local_port_range_min[] = { 1, 1 }; | 28 | static int ip_local_port_range_min[] = { 1, 1 }; |
27 | static int ip_local_port_range_max[] = { 65535, 65535 }; | 29 | static int ip_local_port_range_max[] = { 65535, 65535 }; |
@@ -614,6 +616,15 @@ ctl_table ipv4_table[] = { | |||
614 | .strategy = &sysctl_jiffies | 616 | .strategy = &sysctl_jiffies |
615 | }, | 617 | }, |
616 | { | 618 | { |
619 | .ctl_name = NET_IPV4_IPFRAG_MAX_DIST, | ||
620 | .procname = "ipfrag_max_dist", | ||
621 | .data = &sysctl_ipfrag_max_dist, | ||
622 | .maxlen = sizeof(int), | ||
623 | .mode = 0644, | ||
624 | .proc_handler = &proc_dointvec_minmax, | ||
625 | .extra1 = &zero | ||
626 | }, | ||
627 | { | ||
617 | .ctl_name = NET_TCP_NO_METRICS_SAVE, | 628 | .ctl_name = NET_TCP_NO_METRICS_SAVE, |
618 | .procname = "tcp_no_metrics_save", | 629 | .procname = "tcp_no_metrics_save", |
619 | .data = &sysctl_tcp_nometrics_save, | 630 | .data = &sysctl_tcp_nometrics_save, |
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index ef98b14ac56d..00aa80e93243 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c | |||
@@ -1696,8 +1696,8 @@ int tcp_setsockopt(struct sock *sk, int level, int optname, char __user *optval, | |||
1696 | int err = 0; | 1696 | int err = 0; |
1697 | 1697 | ||
1698 | if (level != SOL_TCP) | 1698 | if (level != SOL_TCP) |
1699 | return tp->af_specific->setsockopt(sk, level, optname, | 1699 | return icsk->icsk_af_ops->setsockopt(sk, level, optname, |
1700 | optval, optlen); | 1700 | optval, optlen); |
1701 | 1701 | ||
1702 | /* This is a string value all the others are int's */ | 1702 | /* This is a string value all the others are int's */ |
1703 | if (optname == TCP_CONGESTION) { | 1703 | if (optname == TCP_CONGESTION) { |
@@ -1914,7 +1914,7 @@ void tcp_get_info(struct sock *sk, struct tcp_info *info) | |||
1914 | info->tcpi_last_data_recv = jiffies_to_msecs(now - icsk->icsk_ack.lrcvtime); | 1914 | info->tcpi_last_data_recv = jiffies_to_msecs(now - icsk->icsk_ack.lrcvtime); |
1915 | info->tcpi_last_ack_recv = jiffies_to_msecs(now - tp->rcv_tstamp); | 1915 | info->tcpi_last_ack_recv = jiffies_to_msecs(now - tp->rcv_tstamp); |
1916 | 1916 | ||
1917 | info->tcpi_pmtu = tp->pmtu_cookie; | 1917 | info->tcpi_pmtu = icsk->icsk_pmtu_cookie; |
1918 | info->tcpi_rcv_ssthresh = tp->rcv_ssthresh; | 1918 | info->tcpi_rcv_ssthresh = tp->rcv_ssthresh; |
1919 | info->tcpi_rtt = jiffies_to_usecs(tp->srtt)>>3; | 1919 | info->tcpi_rtt = jiffies_to_usecs(tp->srtt)>>3; |
1920 | info->tcpi_rttvar = jiffies_to_usecs(tp->mdev)>>2; | 1920 | info->tcpi_rttvar = jiffies_to_usecs(tp->mdev)>>2; |
@@ -1939,8 +1939,8 @@ int tcp_getsockopt(struct sock *sk, int level, int optname, char __user *optval, | |||
1939 | int val, len; | 1939 | int val, len; |
1940 | 1940 | ||
1941 | if (level != SOL_TCP) | 1941 | if (level != SOL_TCP) |
1942 | return tp->af_specific->getsockopt(sk, level, optname, | 1942 | return icsk->icsk_af_ops->getsockopt(sk, level, optname, |
1943 | optval, optlen); | 1943 | optval, optlen); |
1944 | 1944 | ||
1945 | if (get_user(len, optlen)) | 1945 | if (get_user(len, optlen)) |
1946 | return -EFAULT; | 1946 | return -EFAULT; |
diff --git a/net/ipv4/tcp_bic.c b/net/ipv4/tcp_bic.c index 1d0cd86621b1..035f2092d73a 100644 --- a/net/ipv4/tcp_bic.c +++ b/net/ipv4/tcp_bic.c | |||
@@ -30,8 +30,6 @@ static int fast_convergence = 1; | |||
30 | static int max_increment = 16; | 30 | static int max_increment = 16; |
31 | static int low_window = 14; | 31 | static int low_window = 14; |
32 | static int beta = 819; /* = 819/1024 (BICTCP_BETA_SCALE) */ | 32 | static int beta = 819; /* = 819/1024 (BICTCP_BETA_SCALE) */ |
33 | static int low_utilization_threshold = 153; | ||
34 | static int low_utilization_period = 2; | ||
35 | static int initial_ssthresh = 100; | 33 | static int initial_ssthresh = 100; |
36 | static int smooth_part = 20; | 34 | static int smooth_part = 20; |
37 | 35 | ||
@@ -43,10 +41,6 @@ module_param(low_window, int, 0644); | |||
43 | MODULE_PARM_DESC(low_window, "lower bound on congestion window (for TCP friendliness)"); | 41 | MODULE_PARM_DESC(low_window, "lower bound on congestion window (for TCP friendliness)"); |
44 | module_param(beta, int, 0644); | 42 | module_param(beta, int, 0644); |
45 | MODULE_PARM_DESC(beta, "beta for multiplicative increase"); | 43 | MODULE_PARM_DESC(beta, "beta for multiplicative increase"); |
46 | module_param(low_utilization_threshold, int, 0644); | ||
47 | MODULE_PARM_DESC(low_utilization_threshold, "percent (scaled by 1024) for low utilization mode"); | ||
48 | module_param(low_utilization_period, int, 0644); | ||
49 | MODULE_PARM_DESC(low_utilization_period, "if average delay exceeds then goto to low utilization mode (seconds)"); | ||
50 | module_param(initial_ssthresh, int, 0644); | 44 | module_param(initial_ssthresh, int, 0644); |
51 | MODULE_PARM_DESC(initial_ssthresh, "initial value of slow start threshold"); | 45 | MODULE_PARM_DESC(initial_ssthresh, "initial value of slow start threshold"); |
52 | module_param(smooth_part, int, 0644); | 46 | module_param(smooth_part, int, 0644); |
@@ -60,11 +54,6 @@ struct bictcp { | |||
60 | u32 loss_cwnd; /* congestion window at last loss */ | 54 | u32 loss_cwnd; /* congestion window at last loss */ |
61 | u32 last_cwnd; /* the last snd_cwnd */ | 55 | u32 last_cwnd; /* the last snd_cwnd */ |
62 | u32 last_time; /* time when updated last_cwnd */ | 56 | u32 last_time; /* time when updated last_cwnd */ |
63 | u32 delay_min; /* min delay */ | ||
64 | u32 delay_max; /* max delay */ | ||
65 | u32 last_delay; | ||
66 | u8 low_utilization;/* 0: high; 1: low */ | ||
67 | u32 low_utilization_start; /* starting time of low utilization detection*/ | ||
68 | u32 epoch_start; /* beginning of an epoch */ | 57 | u32 epoch_start; /* beginning of an epoch */ |
69 | #define ACK_RATIO_SHIFT 4 | 58 | #define ACK_RATIO_SHIFT 4 |
70 | u32 delayed_ack; /* estimate the ratio of Packets/ACKs << 4 */ | 59 | u32 delayed_ack; /* estimate the ratio of Packets/ACKs << 4 */ |
@@ -77,11 +66,6 @@ static inline void bictcp_reset(struct bictcp *ca) | |||
77 | ca->loss_cwnd = 0; | 66 | ca->loss_cwnd = 0; |
78 | ca->last_cwnd = 0; | 67 | ca->last_cwnd = 0; |
79 | ca->last_time = 0; | 68 | ca->last_time = 0; |
80 | ca->delay_min = 0; | ||
81 | ca->delay_max = 0; | ||
82 | ca->last_delay = 0; | ||
83 | ca->low_utilization = 0; | ||
84 | ca->low_utilization_start = 0; | ||
85 | ca->epoch_start = 0; | 69 | ca->epoch_start = 0; |
86 | ca->delayed_ack = 2 << ACK_RATIO_SHIFT; | 70 | ca->delayed_ack = 2 << ACK_RATIO_SHIFT; |
87 | } | 71 | } |
@@ -143,8 +127,7 @@ static inline void bictcp_update(struct bictcp *ca, u32 cwnd) | |||
143 | } | 127 | } |
144 | 128 | ||
145 | /* if in slow start or link utilization is very low */ | 129 | /* if in slow start or link utilization is very low */ |
146 | if ( ca->loss_cwnd == 0 || | 130 | if (ca->loss_cwnd == 0) { |
147 | (cwnd > ca->loss_cwnd && ca->low_utilization)) { | ||
148 | if (ca->cnt > 20) /* increase cwnd 5% per RTT */ | 131 | if (ca->cnt > 20) /* increase cwnd 5% per RTT */ |
149 | ca->cnt = 20; | 132 | ca->cnt = 20; |
150 | } | 133 | } |
@@ -154,69 +137,12 @@ static inline void bictcp_update(struct bictcp *ca, u32 cwnd) | |||
154 | ca->cnt = 1; | 137 | ca->cnt = 1; |
155 | } | 138 | } |
156 | 139 | ||
157 | |||
158 | /* Detect low utilization in congestion avoidance */ | ||
159 | static inline void bictcp_low_utilization(struct sock *sk, int flag) | ||
160 | { | ||
161 | const struct tcp_sock *tp = tcp_sk(sk); | ||
162 | struct bictcp *ca = inet_csk_ca(sk); | ||
163 | u32 dist, delay; | ||
164 | |||
165 | /* No time stamp */ | ||
166 | if (!(tp->rx_opt.saw_tstamp && tp->rx_opt.rcv_tsecr) || | ||
167 | /* Discard delay samples right after fast recovery */ | ||
168 | tcp_time_stamp < ca->epoch_start + HZ || | ||
169 | /* this delay samples may not be accurate */ | ||
170 | flag == 0) { | ||
171 | ca->last_delay = 0; | ||
172 | goto notlow; | ||
173 | } | ||
174 | |||
175 | delay = ca->last_delay<<3; /* use the same scale as tp->srtt*/ | ||
176 | ca->last_delay = tcp_time_stamp - tp->rx_opt.rcv_tsecr; | ||
177 | if (delay == 0) /* no previous delay sample */ | ||
178 | goto notlow; | ||
179 | |||
180 | /* first time call or link delay decreases */ | ||
181 | if (ca->delay_min == 0 || ca->delay_min > delay) { | ||
182 | ca->delay_min = ca->delay_max = delay; | ||
183 | goto notlow; | ||
184 | } | ||
185 | |||
186 | if (ca->delay_max < delay) | ||
187 | ca->delay_max = delay; | ||
188 | |||
189 | /* utilization is low, if avg delay < dist*threshold | ||
190 | for checking_period time */ | ||
191 | dist = ca->delay_max - ca->delay_min; | ||
192 | if (dist <= ca->delay_min>>6 || | ||
193 | tp->srtt - ca->delay_min >= (dist*low_utilization_threshold)>>10) | ||
194 | goto notlow; | ||
195 | |||
196 | if (ca->low_utilization_start == 0) { | ||
197 | ca->low_utilization = 0; | ||
198 | ca->low_utilization_start = tcp_time_stamp; | ||
199 | } else if ((s32)(tcp_time_stamp - ca->low_utilization_start) | ||
200 | > low_utilization_period*HZ) { | ||
201 | ca->low_utilization = 1; | ||
202 | } | ||
203 | |||
204 | return; | ||
205 | |||
206 | notlow: | ||
207 | ca->low_utilization = 0; | ||
208 | ca->low_utilization_start = 0; | ||
209 | |||
210 | } | ||
211 | |||
212 | static void bictcp_cong_avoid(struct sock *sk, u32 ack, | 140 | static void bictcp_cong_avoid(struct sock *sk, u32 ack, |
213 | u32 seq_rtt, u32 in_flight, int data_acked) | 141 | u32 seq_rtt, u32 in_flight, int data_acked) |
214 | { | 142 | { |
215 | struct tcp_sock *tp = tcp_sk(sk); | 143 | struct tcp_sock *tp = tcp_sk(sk); |
216 | struct bictcp *ca = inet_csk_ca(sk); | 144 | struct bictcp *ca = inet_csk_ca(sk); |
217 | 145 | ||
218 | bictcp_low_utilization(sk, data_acked); | ||
219 | |||
220 | if (!tcp_is_cwnd_limited(sk, in_flight)) | 146 | if (!tcp_is_cwnd_limited(sk, in_flight)) |
221 | return; | 147 | return; |
222 | 148 | ||
@@ -249,11 +175,6 @@ static u32 bictcp_recalc_ssthresh(struct sock *sk) | |||
249 | 175 | ||
250 | ca->epoch_start = 0; /* end of epoch */ | 176 | ca->epoch_start = 0; /* end of epoch */ |
251 | 177 | ||
252 | /* in case of wrong delay_max*/ | ||
253 | if (ca->delay_min > 0 && ca->delay_max > ca->delay_min) | ||
254 | ca->delay_max = ca->delay_min | ||
255 | + ((ca->delay_max - ca->delay_min)* 90) / 100; | ||
256 | |||
257 | /* Wmax and fast convergence */ | 178 | /* Wmax and fast convergence */ |
258 | if (tp->snd_cwnd < ca->last_max_cwnd && fast_convergence) | 179 | if (tp->snd_cwnd < ca->last_max_cwnd && fast_convergence) |
259 | ca->last_max_cwnd = (tp->snd_cwnd * (BICTCP_BETA_SCALE + beta)) | 180 | ca->last_max_cwnd = (tp->snd_cwnd * (BICTCP_BETA_SCALE + beta)) |
@@ -289,14 +210,14 @@ static void bictcp_state(struct sock *sk, u8 new_state) | |||
289 | bictcp_reset(inet_csk_ca(sk)); | 210 | bictcp_reset(inet_csk_ca(sk)); |
290 | } | 211 | } |
291 | 212 | ||
292 | /* Track delayed acknowledgement ratio using sliding window | 213 | /* Track delayed acknowledgment ratio using sliding window |
293 | * ratio = (15*ratio + sample) / 16 | 214 | * ratio = (15*ratio + sample) / 16 |
294 | */ | 215 | */ |
295 | static void bictcp_acked(struct sock *sk, u32 cnt) | 216 | static void bictcp_acked(struct sock *sk, u32 cnt) |
296 | { | 217 | { |
297 | const struct inet_connection_sock *icsk = inet_csk(sk); | 218 | const struct inet_connection_sock *icsk = inet_csk(sk); |
298 | 219 | ||
299 | if (cnt > 0 && icsk->icsk_ca_state == TCP_CA_Open) { | 220 | if (cnt > 0 && icsk->icsk_ca_state == TCP_CA_Open) { |
300 | struct bictcp *ca = inet_csk_ca(sk); | 221 | struct bictcp *ca = inet_csk_ca(sk); |
301 | cnt -= ca->delayed_ack >> ACK_RATIO_SHIFT; | 222 | cnt -= ca->delayed_ack >> ACK_RATIO_SHIFT; |
302 | ca->delayed_ack += cnt; | 223 | ca->delayed_ack += cnt; |
diff --git a/net/ipv4/tcp_cong.c b/net/ipv4/tcp_cong.c index c7cc62c8dc12..e688c687d62d 100644 --- a/net/ipv4/tcp_cong.c +++ b/net/ipv4/tcp_cong.c | |||
@@ -174,6 +174,34 @@ int tcp_set_congestion_control(struct sock *sk, const char *name) | |||
174 | return err; | 174 | return err; |
175 | } | 175 | } |
176 | 176 | ||
177 | |||
178 | /* | ||
179 | * Linear increase during slow start | ||
180 | */ | ||
181 | void tcp_slow_start(struct tcp_sock *tp) | ||
182 | { | ||
183 | if (sysctl_tcp_abc) { | ||
184 | /* RFC3465: Slow Start | ||
185 | * TCP sender SHOULD increase cwnd by the number of | ||
186 | * previously unacknowledged bytes ACKed by each incoming | ||
187 | * acknowledgment, provided the increase is not more than L | ||
188 | */ | ||
189 | if (tp->bytes_acked < tp->mss_cache) | ||
190 | return; | ||
191 | |||
192 | /* We MAY increase by 2 if discovered delayed ack */ | ||
193 | if (sysctl_tcp_abc > 1 && tp->bytes_acked > 2*tp->mss_cache) { | ||
194 | if (tp->snd_cwnd < tp->snd_cwnd_clamp) | ||
195 | tp->snd_cwnd++; | ||
196 | } | ||
197 | } | ||
198 | tp->bytes_acked = 0; | ||
199 | |||
200 | if (tp->snd_cwnd < tp->snd_cwnd_clamp) | ||
201 | tp->snd_cwnd++; | ||
202 | } | ||
203 | EXPORT_SYMBOL_GPL(tcp_slow_start); | ||
204 | |||
177 | /* | 205 | /* |
178 | * TCP Reno congestion control | 206 | * TCP Reno congestion control |
179 | * This is special case used for fallback as well. | 207 | * This is special case used for fallback as well. |
diff --git a/net/ipv4/tcp_cubic.c b/net/ipv4/tcp_cubic.c new file mode 100644 index 000000000000..31a4986dfbf7 --- /dev/null +++ b/net/ipv4/tcp_cubic.c | |||
@@ -0,0 +1,411 @@ | |||
1 | /* | ||
2 | * TCP CUBIC: Binary Increase Congestion control for TCP v2.0 | ||
3 | * | ||
4 | * This is from the implementation of CUBIC TCP in | ||
5 | * Injong Rhee, Lisong Xu. | ||
6 | * "CUBIC: A New TCP-Friendly High-Speed TCP Variant | ||
7 | * in PFLDnet 2005 | ||
8 | * Available from: | ||
9 | * http://www.csc.ncsu.edu/faculty/rhee/export/bitcp/cubic-paper.pdf | ||
10 | * | ||
11 | * Unless CUBIC is enabled and congestion window is large | ||
12 | * this behaves the same as the original Reno. | ||
13 | */ | ||
14 | |||
15 | #include <linux/config.h> | ||
16 | #include <linux/mm.h> | ||
17 | #include <linux/module.h> | ||
18 | #include <net/tcp.h> | ||
19 | #include <asm/div64.h> | ||
20 | |||
21 | #define BICTCP_BETA_SCALE 1024 /* Scale factor beta calculation | ||
22 | * max_cwnd = snd_cwnd * beta | ||
23 | */ | ||
24 | #define BICTCP_B 4 /* | ||
25 | * In binary search, | ||
26 | * go to point (max+min)/N | ||
27 | */ | ||
28 | #define BICTCP_HZ 10 /* BIC HZ 2^10 = 1024 */ | ||
29 | |||
30 | static int fast_convergence = 1; | ||
31 | static int max_increment = 16; | ||
32 | static int beta = 819; /* = 819/1024 (BICTCP_BETA_SCALE) */ | ||
33 | static int initial_ssthresh = 100; | ||
34 | static int bic_scale = 41; | ||
35 | static int tcp_friendliness = 1; | ||
36 | |||
37 | static u32 cube_rtt_scale; | ||
38 | static u32 beta_scale; | ||
39 | static u64 cube_factor; | ||
40 | |||
41 | /* Note parameters that are used for precomputing scale factors are read-only */ | ||
42 | module_param(fast_convergence, int, 0644); | ||
43 | MODULE_PARM_DESC(fast_convergence, "turn on/off fast convergence"); | ||
44 | module_param(max_increment, int, 0644); | ||
45 | MODULE_PARM_DESC(max_increment, "Limit on increment allowed during binary search"); | ||
46 | module_param(beta, int, 0444); | ||
47 | MODULE_PARM_DESC(beta, "beta for multiplicative increase"); | ||
48 | module_param(initial_ssthresh, int, 0644); | ||
49 | MODULE_PARM_DESC(initial_ssthresh, "initial value of slow start threshold"); | ||
50 | module_param(bic_scale, int, 0444); | ||
51 | MODULE_PARM_DESC(bic_scale, "scale (scaled by 1024) value for bic function (bic_scale/1024)"); | ||
52 | module_param(tcp_friendliness, int, 0644); | ||
53 | MODULE_PARM_DESC(tcp_friendliness, "turn on/off tcp friendliness"); | ||
54 | |||
55 | #include <asm/div64.h> | ||
56 | |||
57 | /* BIC TCP Parameters */ | ||
58 | struct bictcp { | ||
59 | u32 cnt; /* increase cwnd by 1 after ACKs */ | ||
60 | u32 last_max_cwnd; /* last maximum snd_cwnd */ | ||
61 | u32 loss_cwnd; /* congestion window at last loss */ | ||
62 | u32 last_cwnd; /* the last snd_cwnd */ | ||
63 | u32 last_time; /* time when updated last_cwnd */ | ||
64 | u32 bic_origin_point;/* origin point of bic function */ | ||
65 | u32 bic_K; /* time to origin point from the beginning of the current epoch */ | ||
66 | u32 delay_min; /* min delay */ | ||
67 | u32 epoch_start; /* beginning of an epoch */ | ||
68 | u32 ack_cnt; /* number of acks */ | ||
69 | u32 tcp_cwnd; /* estimated tcp cwnd */ | ||
70 | #define ACK_RATIO_SHIFT 4 | ||
71 | u32 delayed_ack; /* estimate the ratio of Packets/ACKs << 4 */ | ||
72 | }; | ||
73 | |||
74 | static inline void bictcp_reset(struct bictcp *ca) | ||
75 | { | ||
76 | ca->cnt = 0; | ||
77 | ca->last_max_cwnd = 0; | ||
78 | ca->loss_cwnd = 0; | ||
79 | ca->last_cwnd = 0; | ||
80 | ca->last_time = 0; | ||
81 | ca->bic_origin_point = 0; | ||
82 | ca->bic_K = 0; | ||
83 | ca->delay_min = 0; | ||
84 | ca->epoch_start = 0; | ||
85 | ca->delayed_ack = 2 << ACK_RATIO_SHIFT; | ||
86 | ca->ack_cnt = 0; | ||
87 | ca->tcp_cwnd = 0; | ||
88 | } | ||
89 | |||
90 | static void bictcp_init(struct sock *sk) | ||
91 | { | ||
92 | bictcp_reset(inet_csk_ca(sk)); | ||
93 | if (initial_ssthresh) | ||
94 | tcp_sk(sk)->snd_ssthresh = initial_ssthresh; | ||
95 | } | ||
96 | |||
97 | /* 64bit divisor, dividend and result. dynamic precision */ | ||
98 | static inline u_int64_t div64_64(u_int64_t dividend, u_int64_t divisor) | ||
99 | { | ||
100 | u_int32_t d = divisor; | ||
101 | |||
102 | if (divisor > 0xffffffffULL) { | ||
103 | unsigned int shift = fls(divisor >> 32); | ||
104 | |||
105 | d = divisor >> shift; | ||
106 | dividend >>= shift; | ||
107 | } | ||
108 | |||
109 | /* avoid 64 bit division if possible */ | ||
110 | if (dividend >> 32) | ||
111 | do_div(dividend, d); | ||
112 | else | ||
113 | dividend = (uint32_t) dividend / d; | ||
114 | |||
115 | return dividend; | ||
116 | } | ||
117 | |||
118 | /* | ||
119 | * calculate the cubic root of x using Newton-Raphson | ||
120 | */ | ||
121 | static u32 cubic_root(u64 a) | ||
122 | { | ||
123 | u32 x, x1; | ||
124 | |||
125 | /* Initial estimate is based on: | ||
126 | * cbrt(x) = exp(log(x) / 3) | ||
127 | */ | ||
128 | x = 1u << (fls64(a)/3); | ||
129 | |||
130 | /* | ||
131 | * Iteration based on: | ||
132 | * 2 | ||
133 | * x = ( 2 * x + a / x ) / 3 | ||
134 | * k+1 k k | ||
135 | */ | ||
136 | do { | ||
137 | x1 = x; | ||
138 | x = (2 * x + (uint32_t) div64_64(a, x*x)) / 3; | ||
139 | } while (abs(x1 - x) > 1); | ||
140 | |||
141 | return x; | ||
142 | } | ||
143 | |||
144 | /* | ||
145 | * Compute congestion window to use. | ||
146 | */ | ||
147 | static inline void bictcp_update(struct bictcp *ca, u32 cwnd) | ||
148 | { | ||
149 | u64 offs; | ||
150 | u32 delta, t, bic_target, min_cnt, max_cnt; | ||
151 | |||
152 | ca->ack_cnt++; /* count the number of ACKs */ | ||
153 | |||
154 | if (ca->last_cwnd == cwnd && | ||
155 | (s32)(tcp_time_stamp - ca->last_time) <= HZ / 32) | ||
156 | return; | ||
157 | |||
158 | ca->last_cwnd = cwnd; | ||
159 | ca->last_time = tcp_time_stamp; | ||
160 | |||
161 | if (ca->epoch_start == 0) { | ||
162 | ca->epoch_start = tcp_time_stamp; /* record the beginning of an epoch */ | ||
163 | ca->ack_cnt = 1; /* start counting */ | ||
164 | ca->tcp_cwnd = cwnd; /* syn with cubic */ | ||
165 | |||
166 | if (ca->last_max_cwnd <= cwnd) { | ||
167 | ca->bic_K = 0; | ||
168 | ca->bic_origin_point = cwnd; | ||
169 | } else { | ||
170 | /* Compute new K based on | ||
171 | * (wmax-cwnd) * (srtt>>3 / HZ) / c * 2^(3*bictcp_HZ) | ||
172 | */ | ||
173 | ca->bic_K = cubic_root(cube_factor | ||
174 | * (ca->last_max_cwnd - cwnd)); | ||
175 | ca->bic_origin_point = ca->last_max_cwnd; | ||
176 | } | ||
177 | } | ||
178 | |||
179 | /* cubic function - calc*/ | ||
180 | /* calculate c * time^3 / rtt, | ||
181 | * while considering overflow in calculation of time^3 | ||
182 | * (so time^3 is done by using 64 bit) | ||
183 | * and without the support of division of 64bit numbers | ||
184 | * (so all divisions are done by using 32 bit) | ||
185 | * also NOTE the unit of those veriables | ||
186 | * time = (t - K) / 2^bictcp_HZ | ||
187 | * c = bic_scale >> 10 | ||
188 | * rtt = (srtt >> 3) / HZ | ||
189 | * !!! The following code does not have overflow problems, | ||
190 | * if the cwnd < 1 million packets !!! | ||
191 | */ | ||
192 | |||
193 | /* change the unit from HZ to bictcp_HZ */ | ||
194 | t = ((tcp_time_stamp + ca->delay_min - ca->epoch_start) | ||
195 | << BICTCP_HZ) / HZ; | ||
196 | |||
197 | if (t < ca->bic_K) /* t - K */ | ||
198 | offs = ca->bic_K - t; | ||
199 | else | ||
200 | offs = t - ca->bic_K; | ||
201 | |||
202 | /* c/rtt * (t-K)^3 */ | ||
203 | delta = (cube_rtt_scale * offs * offs * offs) >> (10+3*BICTCP_HZ); | ||
204 | if (t < ca->bic_K) /* below origin*/ | ||
205 | bic_target = ca->bic_origin_point - delta; | ||
206 | else /* above origin*/ | ||
207 | bic_target = ca->bic_origin_point + delta; | ||
208 | |||
209 | /* cubic function - calc bictcp_cnt*/ | ||
210 | if (bic_target > cwnd) { | ||
211 | ca->cnt = cwnd / (bic_target - cwnd); | ||
212 | } else { | ||
213 | ca->cnt = 100 * cwnd; /* very small increment*/ | ||
214 | } | ||
215 | |||
216 | if (ca->delay_min > 0) { | ||
217 | /* max increment = Smax * rtt / 0.1 */ | ||
218 | min_cnt = (cwnd * HZ * 8)/(10 * max_increment * ca->delay_min); | ||
219 | if (ca->cnt < min_cnt) | ||
220 | ca->cnt = min_cnt; | ||
221 | } | ||
222 | |||
223 | /* slow start and low utilization */ | ||
224 | if (ca->loss_cwnd == 0) /* could be aggressive in slow start */ | ||
225 | ca->cnt = 50; | ||
226 | |||
227 | /* TCP Friendly */ | ||
228 | if (tcp_friendliness) { | ||
229 | u32 scale = beta_scale; | ||
230 | delta = (cwnd * scale) >> 3; | ||
231 | while (ca->ack_cnt > delta) { /* update tcp cwnd */ | ||
232 | ca->ack_cnt -= delta; | ||
233 | ca->tcp_cwnd++; | ||
234 | } | ||
235 | |||
236 | if (ca->tcp_cwnd > cwnd){ /* if bic is slower than tcp */ | ||
237 | delta = ca->tcp_cwnd - cwnd; | ||
238 | max_cnt = cwnd / delta; | ||
239 | if (ca->cnt > max_cnt) | ||
240 | ca->cnt = max_cnt; | ||
241 | } | ||
242 | } | ||
243 | |||
244 | ca->cnt = (ca->cnt << ACK_RATIO_SHIFT) / ca->delayed_ack; | ||
245 | if (ca->cnt == 0) /* cannot be zero */ | ||
246 | ca->cnt = 1; | ||
247 | } | ||
248 | |||
249 | |||
250 | /* Keep track of minimum rtt */ | ||
251 | static inline void measure_delay(struct sock *sk) | ||
252 | { | ||
253 | const struct tcp_sock *tp = tcp_sk(sk); | ||
254 | struct bictcp *ca = inet_csk_ca(sk); | ||
255 | u32 delay; | ||
256 | |||
257 | /* No time stamp */ | ||
258 | if (!(tp->rx_opt.saw_tstamp && tp->rx_opt.rcv_tsecr) || | ||
259 | /* Discard delay samples right after fast recovery */ | ||
260 | (s32)(tcp_time_stamp - ca->epoch_start) < HZ) | ||
261 | return; | ||
262 | |||
263 | delay = tcp_time_stamp - tp->rx_opt.rcv_tsecr; | ||
264 | if (delay == 0) | ||
265 | delay = 1; | ||
266 | |||
267 | /* first time call or link delay decreases */ | ||
268 | if (ca->delay_min == 0 || ca->delay_min > delay) | ||
269 | ca->delay_min = delay; | ||
270 | } | ||
271 | |||
272 | static void bictcp_cong_avoid(struct sock *sk, u32 ack, | ||
273 | u32 seq_rtt, u32 in_flight, int data_acked) | ||
274 | { | ||
275 | struct tcp_sock *tp = tcp_sk(sk); | ||
276 | struct bictcp *ca = inet_csk_ca(sk); | ||
277 | |||
278 | if (data_acked) | ||
279 | measure_delay(sk); | ||
280 | |||
281 | if (!tcp_is_cwnd_limited(sk, in_flight)) | ||
282 | return; | ||
283 | |||
284 | if (tp->snd_cwnd <= tp->snd_ssthresh) | ||
285 | tcp_slow_start(tp); | ||
286 | else { | ||
287 | bictcp_update(ca, tp->snd_cwnd); | ||
288 | |||
289 | /* In dangerous area, increase slowly. | ||
290 | * In theory this is tp->snd_cwnd += 1 / tp->snd_cwnd | ||
291 | */ | ||
292 | if (tp->snd_cwnd_cnt >= ca->cnt) { | ||
293 | if (tp->snd_cwnd < tp->snd_cwnd_clamp) | ||
294 | tp->snd_cwnd++; | ||
295 | tp->snd_cwnd_cnt = 0; | ||
296 | } else | ||
297 | tp->snd_cwnd_cnt++; | ||
298 | } | ||
299 | |||
300 | } | ||
301 | |||
302 | static u32 bictcp_recalc_ssthresh(struct sock *sk) | ||
303 | { | ||
304 | const struct tcp_sock *tp = tcp_sk(sk); | ||
305 | struct bictcp *ca = inet_csk_ca(sk); | ||
306 | |||
307 | ca->epoch_start = 0; /* end of epoch */ | ||
308 | |||
309 | /* Wmax and fast convergence */ | ||
310 | if (tp->snd_cwnd < ca->last_max_cwnd && fast_convergence) | ||
311 | ca->last_max_cwnd = (tp->snd_cwnd * (BICTCP_BETA_SCALE + beta)) | ||
312 | / (2 * BICTCP_BETA_SCALE); | ||
313 | else | ||
314 | ca->last_max_cwnd = tp->snd_cwnd; | ||
315 | |||
316 | ca->loss_cwnd = tp->snd_cwnd; | ||
317 | |||
318 | return max((tp->snd_cwnd * beta) / BICTCP_BETA_SCALE, 2U); | ||
319 | } | ||
320 | |||
321 | static u32 bictcp_undo_cwnd(struct sock *sk) | ||
322 | { | ||
323 | struct bictcp *ca = inet_csk_ca(sk); | ||
324 | |||
325 | return max(tcp_sk(sk)->snd_cwnd, ca->last_max_cwnd); | ||
326 | } | ||
327 | |||
328 | static u32 bictcp_min_cwnd(struct sock *sk) | ||
329 | { | ||
330 | return tcp_sk(sk)->snd_ssthresh; | ||
331 | } | ||
332 | |||
333 | static void bictcp_state(struct sock *sk, u8 new_state) | ||
334 | { | ||
335 | if (new_state == TCP_CA_Loss) | ||
336 | bictcp_reset(inet_csk_ca(sk)); | ||
337 | } | ||
338 | |||
339 | /* Track delayed acknowledgment ratio using sliding window | ||
340 | * ratio = (15*ratio + sample) / 16 | ||
341 | */ | ||
342 | static void bictcp_acked(struct sock *sk, u32 cnt) | ||
343 | { | ||
344 | const struct inet_connection_sock *icsk = inet_csk(sk); | ||
345 | |||
346 | if (cnt > 0 && icsk->icsk_ca_state == TCP_CA_Open) { | ||
347 | struct bictcp *ca = inet_csk_ca(sk); | ||
348 | cnt -= ca->delayed_ack >> ACK_RATIO_SHIFT; | ||
349 | ca->delayed_ack += cnt; | ||
350 | } | ||
351 | } | ||
352 | |||
353 | |||
354 | static struct tcp_congestion_ops cubictcp = { | ||
355 | .init = bictcp_init, | ||
356 | .ssthresh = bictcp_recalc_ssthresh, | ||
357 | .cong_avoid = bictcp_cong_avoid, | ||
358 | .set_state = bictcp_state, | ||
359 | .undo_cwnd = bictcp_undo_cwnd, | ||
360 | .min_cwnd = bictcp_min_cwnd, | ||
361 | .pkts_acked = bictcp_acked, | ||
362 | .owner = THIS_MODULE, | ||
363 | .name = "cubic", | ||
364 | }; | ||
365 | |||
366 | static int __init cubictcp_register(void) | ||
367 | { | ||
368 | BUG_ON(sizeof(struct bictcp) > ICSK_CA_PRIV_SIZE); | ||
369 | |||
370 | /* Precompute a bunch of the scaling factors that are used per-packet | ||
371 | * based on SRTT of 100ms | ||
372 | */ | ||
373 | |||
374 | beta_scale = 8*(BICTCP_BETA_SCALE+beta)/ 3 / (BICTCP_BETA_SCALE - beta); | ||
375 | |||
376 | cube_rtt_scale = (bic_scale << 3) / 10; /* 1024*c/rtt */ | ||
377 | |||
378 | /* calculate the "K" for (wmax-cwnd) = c/rtt * K^3 | ||
379 | * so K = cubic_root( (wmax-cwnd)*rtt/c ) | ||
380 | * the unit of K is bictcp_HZ=2^10, not HZ | ||
381 | * | ||
382 | * c = bic_scale >> 10 | ||
383 | * rtt = 100ms | ||
384 | * | ||
385 | * the following code has been designed and tested for | ||
386 | * cwnd < 1 million packets | ||
387 | * RTT < 100 seconds | ||
388 | * HZ < 1,000,00 (corresponding to 10 nano-second) | ||
389 | */ | ||
390 | |||
391 | /* 1/c * 2^2*bictcp_HZ * srtt */ | ||
392 | cube_factor = 1ull << (10+3*BICTCP_HZ); /* 2^40 */ | ||
393 | |||
394 | /* divide by bic_scale and by constant Srtt (100ms) */ | ||
395 | do_div(cube_factor, bic_scale * 10); | ||
396 | |||
397 | return tcp_register_congestion_control(&cubictcp); | ||
398 | } | ||
399 | |||
400 | static void __exit cubictcp_unregister(void) | ||
401 | { | ||
402 | tcp_unregister_congestion_control(&cubictcp); | ||
403 | } | ||
404 | |||
405 | module_init(cubictcp_register); | ||
406 | module_exit(cubictcp_unregister); | ||
407 | |||
408 | MODULE_AUTHOR("Sangtae Ha, Stephen Hemminger"); | ||
409 | MODULE_LICENSE("GPL"); | ||
410 | MODULE_DESCRIPTION("CUBIC TCP"); | ||
411 | MODULE_VERSION("2.0"); | ||
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index bf2e23086bce..0a461232329f 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c | |||
@@ -115,8 +115,8 @@ int sysctl_tcp_abc = 1; | |||
115 | /* Adapt the MSS value used to make delayed ack decision to the | 115 | /* Adapt the MSS value used to make delayed ack decision to the |
116 | * real world. | 116 | * real world. |
117 | */ | 117 | */ |
118 | static inline void tcp_measure_rcv_mss(struct sock *sk, | 118 | static void tcp_measure_rcv_mss(struct sock *sk, |
119 | const struct sk_buff *skb) | 119 | const struct sk_buff *skb) |
120 | { | 120 | { |
121 | struct inet_connection_sock *icsk = inet_csk(sk); | 121 | struct inet_connection_sock *icsk = inet_csk(sk); |
122 | const unsigned int lss = icsk->icsk_ack.last_seg_size; | 122 | const unsigned int lss = icsk->icsk_ack.last_seg_size; |
@@ -246,8 +246,8 @@ static int __tcp_grow_window(const struct sock *sk, struct tcp_sock *tp, | |||
246 | return 0; | 246 | return 0; |
247 | } | 247 | } |
248 | 248 | ||
249 | static inline void tcp_grow_window(struct sock *sk, struct tcp_sock *tp, | 249 | static void tcp_grow_window(struct sock *sk, struct tcp_sock *tp, |
250 | struct sk_buff *skb) | 250 | struct sk_buff *skb) |
251 | { | 251 | { |
252 | /* Check #1 */ | 252 | /* Check #1 */ |
253 | if (tp->rcv_ssthresh < tp->window_clamp && | 253 | if (tp->rcv_ssthresh < tp->window_clamp && |
@@ -341,6 +341,26 @@ static void tcp_clamp_window(struct sock *sk, struct tcp_sock *tp) | |||
341 | tp->rcv_ssthresh = min(tp->window_clamp, 2U*tp->advmss); | 341 | tp->rcv_ssthresh = min(tp->window_clamp, 2U*tp->advmss); |
342 | } | 342 | } |
343 | 343 | ||
344 | |||
345 | /* Initialize RCV_MSS value. | ||
346 | * RCV_MSS is an our guess about MSS used by the peer. | ||
347 | * We haven't any direct information about the MSS. | ||
348 | * It's better to underestimate the RCV_MSS rather than overestimate. | ||
349 | * Overestimations make us ACKing less frequently than needed. | ||
350 | * Underestimations are more easy to detect and fix by tcp_measure_rcv_mss(). | ||
351 | */ | ||
352 | void tcp_initialize_rcv_mss(struct sock *sk) | ||
353 | { | ||
354 | struct tcp_sock *tp = tcp_sk(sk); | ||
355 | unsigned int hint = min_t(unsigned int, tp->advmss, tp->mss_cache); | ||
356 | |||
357 | hint = min(hint, tp->rcv_wnd/2); | ||
358 | hint = min(hint, TCP_MIN_RCVMSS); | ||
359 | hint = max(hint, TCP_MIN_MSS); | ||
360 | |||
361 | inet_csk(sk)->icsk_ack.rcv_mss = hint; | ||
362 | } | ||
363 | |||
344 | /* Receiver "autotuning" code. | 364 | /* Receiver "autotuning" code. |
345 | * | 365 | * |
346 | * The algorithm for RTT estimation w/o timestamps is based on | 366 | * The algorithm for RTT estimation w/o timestamps is based on |
@@ -735,6 +755,27 @@ __u32 tcp_init_cwnd(struct tcp_sock *tp, struct dst_entry *dst) | |||
735 | return min_t(__u32, cwnd, tp->snd_cwnd_clamp); | 755 | return min_t(__u32, cwnd, tp->snd_cwnd_clamp); |
736 | } | 756 | } |
737 | 757 | ||
758 | /* Set slow start threshold and cwnd not falling to slow start */ | ||
759 | void tcp_enter_cwr(struct sock *sk) | ||
760 | { | ||
761 | struct tcp_sock *tp = tcp_sk(sk); | ||
762 | |||
763 | tp->prior_ssthresh = 0; | ||
764 | tp->bytes_acked = 0; | ||
765 | if (inet_csk(sk)->icsk_ca_state < TCP_CA_CWR) { | ||
766 | tp->undo_marker = 0; | ||
767 | tp->snd_ssthresh = inet_csk(sk)->icsk_ca_ops->ssthresh(sk); | ||
768 | tp->snd_cwnd = min(tp->snd_cwnd, | ||
769 | tcp_packets_in_flight(tp) + 1U); | ||
770 | tp->snd_cwnd_cnt = 0; | ||
771 | tp->high_seq = tp->snd_nxt; | ||
772 | tp->snd_cwnd_stamp = tcp_time_stamp; | ||
773 | TCP_ECN_queue_cwr(tp); | ||
774 | |||
775 | tcp_set_ca_state(sk, TCP_CA_CWR); | ||
776 | } | ||
777 | } | ||
778 | |||
738 | /* Initialize metrics on socket. */ | 779 | /* Initialize metrics on socket. */ |
739 | 780 | ||
740 | static void tcp_init_metrics(struct sock *sk) | 781 | static void tcp_init_metrics(struct sock *sk) |
@@ -2070,8 +2111,8 @@ static inline void tcp_ack_update_rtt(struct sock *sk, const int flag, | |||
2070 | tcp_ack_no_tstamp(sk, seq_rtt, flag); | 2111 | tcp_ack_no_tstamp(sk, seq_rtt, flag); |
2071 | } | 2112 | } |
2072 | 2113 | ||
2073 | static inline void tcp_cong_avoid(struct sock *sk, u32 ack, u32 rtt, | 2114 | static void tcp_cong_avoid(struct sock *sk, u32 ack, u32 rtt, |
2074 | u32 in_flight, int good) | 2115 | u32 in_flight, int good) |
2075 | { | 2116 | { |
2076 | const struct inet_connection_sock *icsk = inet_csk(sk); | 2117 | const struct inet_connection_sock *icsk = inet_csk(sk); |
2077 | icsk->icsk_ca_ops->cong_avoid(sk, ack, rtt, in_flight, good); | 2118 | icsk->icsk_ca_ops->cong_avoid(sk, ack, rtt, in_flight, good); |
@@ -2082,7 +2123,7 @@ static inline void tcp_cong_avoid(struct sock *sk, u32 ack, u32 rtt, | |||
2082 | * RFC2988 recommends to restart timer to now+rto. | 2123 | * RFC2988 recommends to restart timer to now+rto. |
2083 | */ | 2124 | */ |
2084 | 2125 | ||
2085 | static inline void tcp_ack_packets_out(struct sock *sk, struct tcp_sock *tp) | 2126 | static void tcp_ack_packets_out(struct sock *sk, struct tcp_sock *tp) |
2086 | { | 2127 | { |
2087 | if (!tp->packets_out) { | 2128 | if (!tp->packets_out) { |
2088 | inet_csk_clear_xmit_timer(sk, ICSK_TIME_RETRANS); | 2129 | inet_csk_clear_xmit_timer(sk, ICSK_TIME_RETRANS); |
@@ -2147,7 +2188,7 @@ static int tcp_tso_acked(struct sock *sk, struct sk_buff *skb, | |||
2147 | return acked; | 2188 | return acked; |
2148 | } | 2189 | } |
2149 | 2190 | ||
2150 | static inline u32 tcp_usrtt(const struct sk_buff *skb) | 2191 | static u32 tcp_usrtt(const struct sk_buff *skb) |
2151 | { | 2192 | { |
2152 | struct timeval tv, now; | 2193 | struct timeval tv, now; |
2153 | 2194 | ||
@@ -2342,7 +2383,7 @@ static int tcp_ack_update_window(struct sock *sk, struct tcp_sock *tp, | |||
2342 | 2383 | ||
2343 | if (nwin > tp->max_window) { | 2384 | if (nwin > tp->max_window) { |
2344 | tp->max_window = nwin; | 2385 | tp->max_window = nwin; |
2345 | tcp_sync_mss(sk, tp->pmtu_cookie); | 2386 | tcp_sync_mss(sk, inet_csk(sk)->icsk_pmtu_cookie); |
2346 | } | 2387 | } |
2347 | } | 2388 | } |
2348 | } | 2389 | } |
@@ -2583,8 +2624,8 @@ void tcp_parse_options(struct sk_buff *skb, struct tcp_options_received *opt_rx, | |||
2583 | /* Fast parse options. This hopes to only see timestamps. | 2624 | /* Fast parse options. This hopes to only see timestamps. |
2584 | * If it is wrong it falls back on tcp_parse_options(). | 2625 | * If it is wrong it falls back on tcp_parse_options(). |
2585 | */ | 2626 | */ |
2586 | static inline int tcp_fast_parse_options(struct sk_buff *skb, struct tcphdr *th, | 2627 | static int tcp_fast_parse_options(struct sk_buff *skb, struct tcphdr *th, |
2587 | struct tcp_sock *tp) | 2628 | struct tcp_sock *tp) |
2588 | { | 2629 | { |
2589 | if (th->doff == sizeof(struct tcphdr)>>2) { | 2630 | if (th->doff == sizeof(struct tcphdr)>>2) { |
2590 | tp->rx_opt.saw_tstamp = 0; | 2631 | tp->rx_opt.saw_tstamp = 0; |
@@ -2804,8 +2845,7 @@ static void tcp_fin(struct sk_buff *skb, struct sock *sk, struct tcphdr *th) | |||
2804 | } | 2845 | } |
2805 | } | 2846 | } |
2806 | 2847 | ||
2807 | static __inline__ int | 2848 | static inline int tcp_sack_extend(struct tcp_sack_block *sp, u32 seq, u32 end_seq) |
2808 | tcp_sack_extend(struct tcp_sack_block *sp, u32 seq, u32 end_seq) | ||
2809 | { | 2849 | { |
2810 | if (!after(seq, sp->end_seq) && !after(sp->start_seq, end_seq)) { | 2850 | if (!after(seq, sp->end_seq) && !after(sp->start_seq, end_seq)) { |
2811 | if (before(seq, sp->start_seq)) | 2851 | if (before(seq, sp->start_seq)) |
@@ -2817,7 +2857,7 @@ tcp_sack_extend(struct tcp_sack_block *sp, u32 seq, u32 end_seq) | |||
2817 | return 0; | 2857 | return 0; |
2818 | } | 2858 | } |
2819 | 2859 | ||
2820 | static inline void tcp_dsack_set(struct tcp_sock *tp, u32 seq, u32 end_seq) | 2860 | static void tcp_dsack_set(struct tcp_sock *tp, u32 seq, u32 end_seq) |
2821 | { | 2861 | { |
2822 | if (tp->rx_opt.sack_ok && sysctl_tcp_dsack) { | 2862 | if (tp->rx_opt.sack_ok && sysctl_tcp_dsack) { |
2823 | if (before(seq, tp->rcv_nxt)) | 2863 | if (before(seq, tp->rcv_nxt)) |
@@ -2832,7 +2872,7 @@ static inline void tcp_dsack_set(struct tcp_sock *tp, u32 seq, u32 end_seq) | |||
2832 | } | 2872 | } |
2833 | } | 2873 | } |
2834 | 2874 | ||
2835 | static inline void tcp_dsack_extend(struct tcp_sock *tp, u32 seq, u32 end_seq) | 2875 | static void tcp_dsack_extend(struct tcp_sock *tp, u32 seq, u32 end_seq) |
2836 | { | 2876 | { |
2837 | if (!tp->rx_opt.dsack) | 2877 | if (!tp->rx_opt.dsack) |
2838 | tcp_dsack_set(tp, seq, end_seq); | 2878 | tcp_dsack_set(tp, seq, end_seq); |
@@ -2890,7 +2930,7 @@ static void tcp_sack_maybe_coalesce(struct tcp_sock *tp) | |||
2890 | } | 2930 | } |
2891 | } | 2931 | } |
2892 | 2932 | ||
2893 | static __inline__ void tcp_sack_swap(struct tcp_sack_block *sack1, struct tcp_sack_block *sack2) | 2933 | static inline void tcp_sack_swap(struct tcp_sack_block *sack1, struct tcp_sack_block *sack2) |
2894 | { | 2934 | { |
2895 | __u32 tmp; | 2935 | __u32 tmp; |
2896 | 2936 | ||
@@ -3455,7 +3495,7 @@ void tcp_cwnd_application_limited(struct sock *sk) | |||
3455 | tp->snd_cwnd_stamp = tcp_time_stamp; | 3495 | tp->snd_cwnd_stamp = tcp_time_stamp; |
3456 | } | 3496 | } |
3457 | 3497 | ||
3458 | static inline int tcp_should_expand_sndbuf(struct sock *sk, struct tcp_sock *tp) | 3498 | static int tcp_should_expand_sndbuf(struct sock *sk, struct tcp_sock *tp) |
3459 | { | 3499 | { |
3460 | /* If the user specified a specific send buffer setting, do | 3500 | /* If the user specified a specific send buffer setting, do |
3461 | * not modify it. | 3501 | * not modify it. |
@@ -3502,7 +3542,7 @@ static void tcp_new_space(struct sock *sk) | |||
3502 | sk->sk_write_space(sk); | 3542 | sk->sk_write_space(sk); |
3503 | } | 3543 | } |
3504 | 3544 | ||
3505 | static inline void tcp_check_space(struct sock *sk) | 3545 | static void tcp_check_space(struct sock *sk) |
3506 | { | 3546 | { |
3507 | if (sock_flag(sk, SOCK_QUEUE_SHRUNK)) { | 3547 | if (sock_flag(sk, SOCK_QUEUE_SHRUNK)) { |
3508 | sock_reset_flag(sk, SOCK_QUEUE_SHRUNK); | 3548 | sock_reset_flag(sk, SOCK_QUEUE_SHRUNK); |
@@ -3512,7 +3552,7 @@ static inline void tcp_check_space(struct sock *sk) | |||
3512 | } | 3552 | } |
3513 | } | 3553 | } |
3514 | 3554 | ||
3515 | static __inline__ void tcp_data_snd_check(struct sock *sk, struct tcp_sock *tp) | 3555 | static inline void tcp_data_snd_check(struct sock *sk, struct tcp_sock *tp) |
3516 | { | 3556 | { |
3517 | tcp_push_pending_frames(sk, tp); | 3557 | tcp_push_pending_frames(sk, tp); |
3518 | tcp_check_space(sk); | 3558 | tcp_check_space(sk); |
@@ -3544,7 +3584,7 @@ static void __tcp_ack_snd_check(struct sock *sk, int ofo_possible) | |||
3544 | } | 3584 | } |
3545 | } | 3585 | } |
3546 | 3586 | ||
3547 | static __inline__ void tcp_ack_snd_check(struct sock *sk) | 3587 | static inline void tcp_ack_snd_check(struct sock *sk) |
3548 | { | 3588 | { |
3549 | if (!inet_csk_ack_scheduled(sk)) { | 3589 | if (!inet_csk_ack_scheduled(sk)) { |
3550 | /* We sent a data segment already. */ | 3590 | /* We sent a data segment already. */ |
@@ -3692,8 +3732,7 @@ static int __tcp_checksum_complete_user(struct sock *sk, struct sk_buff *skb) | |||
3692 | return result; | 3732 | return result; |
3693 | } | 3733 | } |
3694 | 3734 | ||
3695 | static __inline__ int | 3735 | static inline int tcp_checksum_complete_user(struct sock *sk, struct sk_buff *skb) |
3696 | tcp_checksum_complete_user(struct sock *sk, struct sk_buff *skb) | ||
3697 | { | 3736 | { |
3698 | return skb->ip_summed != CHECKSUM_UNNECESSARY && | 3737 | return skb->ip_summed != CHECKSUM_UNNECESSARY && |
3699 | __tcp_checksum_complete_user(sk, skb); | 3738 | __tcp_checksum_complete_user(sk, skb); |
@@ -3967,12 +4006,12 @@ static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb, | |||
3967 | struct tcphdr *th, unsigned len) | 4006 | struct tcphdr *th, unsigned len) |
3968 | { | 4007 | { |
3969 | struct tcp_sock *tp = tcp_sk(sk); | 4008 | struct tcp_sock *tp = tcp_sk(sk); |
4009 | struct inet_connection_sock *icsk = inet_csk(sk); | ||
3970 | int saved_clamp = tp->rx_opt.mss_clamp; | 4010 | int saved_clamp = tp->rx_opt.mss_clamp; |
3971 | 4011 | ||
3972 | tcp_parse_options(skb, &tp->rx_opt, 0); | 4012 | tcp_parse_options(skb, &tp->rx_opt, 0); |
3973 | 4013 | ||
3974 | if (th->ack) { | 4014 | if (th->ack) { |
3975 | struct inet_connection_sock *icsk; | ||
3976 | /* rfc793: | 4015 | /* rfc793: |
3977 | * "If the state is SYN-SENT then | 4016 | * "If the state is SYN-SENT then |
3978 | * first check the ACK bit | 4017 | * first check the ACK bit |
@@ -4061,7 +4100,7 @@ static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb, | |||
4061 | if (tp->rx_opt.sack_ok && sysctl_tcp_fack) | 4100 | if (tp->rx_opt.sack_ok && sysctl_tcp_fack) |
4062 | tp->rx_opt.sack_ok |= 2; | 4101 | tp->rx_opt.sack_ok |= 2; |
4063 | 4102 | ||
4064 | tcp_sync_mss(sk, tp->pmtu_cookie); | 4103 | tcp_sync_mss(sk, icsk->icsk_pmtu_cookie); |
4065 | tcp_initialize_rcv_mss(sk); | 4104 | tcp_initialize_rcv_mss(sk); |
4066 | 4105 | ||
4067 | /* Remember, tcp_poll() does not lock socket! | 4106 | /* Remember, tcp_poll() does not lock socket! |
@@ -4072,7 +4111,7 @@ static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb, | |||
4072 | tcp_set_state(sk, TCP_ESTABLISHED); | 4111 | tcp_set_state(sk, TCP_ESTABLISHED); |
4073 | 4112 | ||
4074 | /* Make sure socket is routed, for correct metrics. */ | 4113 | /* Make sure socket is routed, for correct metrics. */ |
4075 | tp->af_specific->rebuild_header(sk); | 4114 | icsk->icsk_af_ops->rebuild_header(sk); |
4076 | 4115 | ||
4077 | tcp_init_metrics(sk); | 4116 | tcp_init_metrics(sk); |
4078 | 4117 | ||
@@ -4098,8 +4137,6 @@ static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb, | |||
4098 | sk_wake_async(sk, 0, POLL_OUT); | 4137 | sk_wake_async(sk, 0, POLL_OUT); |
4099 | } | 4138 | } |
4100 | 4139 | ||
4101 | icsk = inet_csk(sk); | ||
4102 | |||
4103 | if (sk->sk_write_pending || | 4140 | if (sk->sk_write_pending || |
4104 | icsk->icsk_accept_queue.rskq_defer_accept || | 4141 | icsk->icsk_accept_queue.rskq_defer_accept || |
4105 | icsk->icsk_ack.pingpong) { | 4142 | icsk->icsk_ack.pingpong) { |
@@ -4173,7 +4210,7 @@ discard: | |||
4173 | if (tp->ecn_flags&TCP_ECN_OK) | 4210 | if (tp->ecn_flags&TCP_ECN_OK) |
4174 | sock_set_flag(sk, SOCK_NO_LARGESEND); | 4211 | sock_set_flag(sk, SOCK_NO_LARGESEND); |
4175 | 4212 | ||
4176 | tcp_sync_mss(sk, tp->pmtu_cookie); | 4213 | tcp_sync_mss(sk, icsk->icsk_pmtu_cookie); |
4177 | tcp_initialize_rcv_mss(sk); | 4214 | tcp_initialize_rcv_mss(sk); |
4178 | 4215 | ||
4179 | 4216 | ||
@@ -4220,6 +4257,7 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb, | |||
4220 | struct tcphdr *th, unsigned len) | 4257 | struct tcphdr *th, unsigned len) |
4221 | { | 4258 | { |
4222 | struct tcp_sock *tp = tcp_sk(sk); | 4259 | struct tcp_sock *tp = tcp_sk(sk); |
4260 | struct inet_connection_sock *icsk = inet_csk(sk); | ||
4223 | int queued = 0; | 4261 | int queued = 0; |
4224 | 4262 | ||
4225 | tp->rx_opt.saw_tstamp = 0; | 4263 | tp->rx_opt.saw_tstamp = 0; |
@@ -4236,7 +4274,7 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb, | |||
4236 | goto discard; | 4274 | goto discard; |
4237 | 4275 | ||
4238 | if(th->syn) { | 4276 | if(th->syn) { |
4239 | if(tp->af_specific->conn_request(sk, skb) < 0) | 4277 | if (icsk->icsk_af_ops->conn_request(sk, skb) < 0) |
4240 | return 1; | 4278 | return 1; |
4241 | 4279 | ||
4242 | /* Now we have several options: In theory there is | 4280 | /* Now we have several options: In theory there is |
@@ -4349,7 +4387,7 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb, | |||
4349 | /* Make sure socket is routed, for | 4387 | /* Make sure socket is routed, for |
4350 | * correct metrics. | 4388 | * correct metrics. |
4351 | */ | 4389 | */ |
4352 | tp->af_specific->rebuild_header(sk); | 4390 | icsk->icsk_af_ops->rebuild_header(sk); |
4353 | 4391 | ||
4354 | tcp_init_metrics(sk); | 4392 | tcp_init_metrics(sk); |
4355 | 4393 | ||
@@ -4475,3 +4513,4 @@ EXPORT_SYMBOL(sysctl_tcp_abc); | |||
4475 | EXPORT_SYMBOL(tcp_parse_options); | 4513 | EXPORT_SYMBOL(tcp_parse_options); |
4476 | EXPORT_SYMBOL(tcp_rcv_established); | 4514 | EXPORT_SYMBOL(tcp_rcv_established); |
4477 | EXPORT_SYMBOL(tcp_rcv_state_process); | 4515 | EXPORT_SYMBOL(tcp_rcv_state_process); |
4516 | EXPORT_SYMBOL(tcp_initialize_rcv_mss); | ||
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 4d5021e1929b..e9f83e5b28ce 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c | |||
@@ -69,6 +69,7 @@ | |||
69 | #include <net/transp_v6.h> | 69 | #include <net/transp_v6.h> |
70 | #include <net/ipv6.h> | 70 | #include <net/ipv6.h> |
71 | #include <net/inet_common.h> | 71 | #include <net/inet_common.h> |
72 | #include <net/timewait_sock.h> | ||
72 | #include <net/xfrm.h> | 73 | #include <net/xfrm.h> |
73 | 74 | ||
74 | #include <linux/inet.h> | 75 | #include <linux/inet.h> |
@@ -86,8 +87,7 @@ int sysctl_tcp_low_latency; | |||
86 | /* Socket used for sending RSTs */ | 87 | /* Socket used for sending RSTs */ |
87 | static struct socket *tcp_socket; | 88 | static struct socket *tcp_socket; |
88 | 89 | ||
89 | void tcp_v4_send_check(struct sock *sk, struct tcphdr *th, int len, | 90 | void tcp_v4_send_check(struct sock *sk, int len, struct sk_buff *skb); |
90 | struct sk_buff *skb); | ||
91 | 91 | ||
92 | struct inet_hashinfo __cacheline_aligned tcp_hashinfo = { | 92 | struct inet_hashinfo __cacheline_aligned tcp_hashinfo = { |
93 | .lhash_lock = RW_LOCK_UNLOCKED, | 93 | .lhash_lock = RW_LOCK_UNLOCKED, |
@@ -97,7 +97,8 @@ struct inet_hashinfo __cacheline_aligned tcp_hashinfo = { | |||
97 | 97 | ||
98 | static int tcp_v4_get_port(struct sock *sk, unsigned short snum) | 98 | static int tcp_v4_get_port(struct sock *sk, unsigned short snum) |
99 | { | 99 | { |
100 | return inet_csk_get_port(&tcp_hashinfo, sk, snum); | 100 | return inet_csk_get_port(&tcp_hashinfo, sk, snum, |
101 | inet_csk_bind_conflict); | ||
101 | } | 102 | } |
102 | 103 | ||
103 | static void tcp_v4_hash(struct sock *sk) | 104 | static void tcp_v4_hash(struct sock *sk) |
@@ -118,202 +119,38 @@ static inline __u32 tcp_v4_init_sequence(struct sock *sk, struct sk_buff *skb) | |||
118 | skb->h.th->source); | 119 | skb->h.th->source); |
119 | } | 120 | } |
120 | 121 | ||
121 | /* called with local bh disabled */ | 122 | int tcp_twsk_unique(struct sock *sk, struct sock *sktw, void *twp) |
122 | static int __tcp_v4_check_established(struct sock *sk, __u16 lport, | ||
123 | struct inet_timewait_sock **twp) | ||
124 | { | 123 | { |
125 | struct inet_sock *inet = inet_sk(sk); | 124 | const struct tcp_timewait_sock *tcptw = tcp_twsk(sktw); |
126 | u32 daddr = inet->rcv_saddr; | 125 | struct tcp_sock *tp = tcp_sk(sk); |
127 | u32 saddr = inet->daddr; | ||
128 | int dif = sk->sk_bound_dev_if; | ||
129 | INET_ADDR_COOKIE(acookie, saddr, daddr) | ||
130 | const __u32 ports = INET_COMBINED_PORTS(inet->dport, lport); | ||
131 | unsigned int hash = inet_ehashfn(daddr, lport, saddr, inet->dport); | ||
132 | struct inet_ehash_bucket *head = inet_ehash_bucket(&tcp_hashinfo, hash); | ||
133 | struct sock *sk2; | ||
134 | const struct hlist_node *node; | ||
135 | struct inet_timewait_sock *tw; | ||
136 | |||
137 | prefetch(head->chain.first); | ||
138 | write_lock(&head->lock); | ||
139 | |||
140 | /* Check TIME-WAIT sockets first. */ | ||
141 | sk_for_each(sk2, node, &(head + tcp_hashinfo.ehash_size)->chain) { | ||
142 | tw = inet_twsk(sk2); | ||
143 | |||
144 | if (INET_TW_MATCH(sk2, hash, acookie, saddr, daddr, ports, dif)) { | ||
145 | const struct tcp_timewait_sock *tcptw = tcp_twsk(sk2); | ||
146 | struct tcp_sock *tp = tcp_sk(sk); | ||
147 | |||
148 | /* With PAWS, it is safe from the viewpoint | ||
149 | of data integrity. Even without PAWS it | ||
150 | is safe provided sequence spaces do not | ||
151 | overlap i.e. at data rates <= 80Mbit/sec. | ||
152 | |||
153 | Actually, the idea is close to VJ's one, | ||
154 | only timestamp cache is held not per host, | ||
155 | but per port pair and TW bucket is used | ||
156 | as state holder. | ||
157 | 126 | ||
158 | If TW bucket has been already destroyed we | 127 | /* With PAWS, it is safe from the viewpoint |
159 | fall back to VJ's scheme and use initial | 128 | of data integrity. Even without PAWS it is safe provided sequence |
160 | timestamp retrieved from peer table. | 129 | spaces do not overlap i.e. at data rates <= 80Mbit/sec. |
161 | */ | ||
162 | if (tcptw->tw_ts_recent_stamp && | ||
163 | (!twp || (sysctl_tcp_tw_reuse && | ||
164 | xtime.tv_sec - | ||
165 | tcptw->tw_ts_recent_stamp > 1))) { | ||
166 | tp->write_seq = tcptw->tw_snd_nxt + 65535 + 2; | ||
167 | if (tp->write_seq == 0) | ||
168 | tp->write_seq = 1; | ||
169 | tp->rx_opt.ts_recent = tcptw->tw_ts_recent; | ||
170 | tp->rx_opt.ts_recent_stamp = tcptw->tw_ts_recent_stamp; | ||
171 | sock_hold(sk2); | ||
172 | goto unique; | ||
173 | } else | ||
174 | goto not_unique; | ||
175 | } | ||
176 | } | ||
177 | tw = NULL; | ||
178 | 130 | ||
179 | /* And established part... */ | 131 | Actually, the idea is close to VJ's one, only timestamp cache is |
180 | sk_for_each(sk2, node, &head->chain) { | 132 | held not per host, but per port pair and TW bucket is used as state |
181 | if (INET_MATCH(sk2, hash, acookie, saddr, daddr, ports, dif)) | 133 | holder. |
182 | goto not_unique; | ||
183 | } | ||
184 | 134 | ||
185 | unique: | 135 | If TW bucket has been already destroyed we fall back to VJ's scheme |
186 | /* Must record num and sport now. Otherwise we will see | 136 | and use initial timestamp retrieved from peer table. |
187 | * in hash table socket with a funny identity. */ | 137 | */ |
188 | inet->num = lport; | 138 | if (tcptw->tw_ts_recent_stamp && |
189 | inet->sport = htons(lport); | 139 | (twp == NULL || (sysctl_tcp_tw_reuse && |
190 | sk->sk_hash = hash; | 140 | xtime.tv_sec - tcptw->tw_ts_recent_stamp > 1))) { |
191 | BUG_TRAP(sk_unhashed(sk)); | 141 | tp->write_seq = tcptw->tw_snd_nxt + 65535 + 2; |
192 | __sk_add_node(sk, &head->chain); | 142 | if (tp->write_seq == 0) |
193 | sock_prot_inc_use(sk->sk_prot); | 143 | tp->write_seq = 1; |
194 | write_unlock(&head->lock); | 144 | tp->rx_opt.ts_recent = tcptw->tw_ts_recent; |
195 | 145 | tp->rx_opt.ts_recent_stamp = tcptw->tw_ts_recent_stamp; | |
196 | if (twp) { | 146 | sock_hold(sktw); |
197 | *twp = tw; | 147 | return 1; |
198 | NET_INC_STATS_BH(LINUX_MIB_TIMEWAITRECYCLED); | ||
199 | } else if (tw) { | ||
200 | /* Silly. Should hash-dance instead... */ | ||
201 | inet_twsk_deschedule(tw, &tcp_death_row); | ||
202 | NET_INC_STATS_BH(LINUX_MIB_TIMEWAITRECYCLED); | ||
203 | |||
204 | inet_twsk_put(tw); | ||
205 | } | 148 | } |
206 | 149 | ||
207 | return 0; | 150 | return 0; |
208 | |||
209 | not_unique: | ||
210 | write_unlock(&head->lock); | ||
211 | return -EADDRNOTAVAIL; | ||
212 | } | 151 | } |
213 | 152 | ||
214 | static inline u32 connect_port_offset(const struct sock *sk) | 153 | EXPORT_SYMBOL_GPL(tcp_twsk_unique); |
215 | { | ||
216 | const struct inet_sock *inet = inet_sk(sk); | ||
217 | |||
218 | return secure_tcp_port_ephemeral(inet->rcv_saddr, inet->daddr, | ||
219 | inet->dport); | ||
220 | } | ||
221 | |||
222 | /* | ||
223 | * Bind a port for a connect operation and hash it. | ||
224 | */ | ||
225 | static inline int tcp_v4_hash_connect(struct sock *sk) | ||
226 | { | ||
227 | const unsigned short snum = inet_sk(sk)->num; | ||
228 | struct inet_bind_hashbucket *head; | ||
229 | struct inet_bind_bucket *tb; | ||
230 | int ret; | ||
231 | |||
232 | if (!snum) { | ||
233 | int low = sysctl_local_port_range[0]; | ||
234 | int high = sysctl_local_port_range[1]; | ||
235 | int range = high - low; | ||
236 | int i; | ||
237 | int port; | ||
238 | static u32 hint; | ||
239 | u32 offset = hint + connect_port_offset(sk); | ||
240 | struct hlist_node *node; | ||
241 | struct inet_timewait_sock *tw = NULL; | ||
242 | |||
243 | local_bh_disable(); | ||
244 | for (i = 1; i <= range; i++) { | ||
245 | port = low + (i + offset) % range; | ||
246 | head = &tcp_hashinfo.bhash[inet_bhashfn(port, tcp_hashinfo.bhash_size)]; | ||
247 | spin_lock(&head->lock); | ||
248 | |||
249 | /* Does not bother with rcv_saddr checks, | ||
250 | * because the established check is already | ||
251 | * unique enough. | ||
252 | */ | ||
253 | inet_bind_bucket_for_each(tb, node, &head->chain) { | ||
254 | if (tb->port == port) { | ||
255 | BUG_TRAP(!hlist_empty(&tb->owners)); | ||
256 | if (tb->fastreuse >= 0) | ||
257 | goto next_port; | ||
258 | if (!__tcp_v4_check_established(sk, | ||
259 | port, | ||
260 | &tw)) | ||
261 | goto ok; | ||
262 | goto next_port; | ||
263 | } | ||
264 | } | ||
265 | |||
266 | tb = inet_bind_bucket_create(tcp_hashinfo.bind_bucket_cachep, head, port); | ||
267 | if (!tb) { | ||
268 | spin_unlock(&head->lock); | ||
269 | break; | ||
270 | } | ||
271 | tb->fastreuse = -1; | ||
272 | goto ok; | ||
273 | |||
274 | next_port: | ||
275 | spin_unlock(&head->lock); | ||
276 | } | ||
277 | local_bh_enable(); | ||
278 | |||
279 | return -EADDRNOTAVAIL; | ||
280 | |||
281 | ok: | ||
282 | hint += i; | ||
283 | |||
284 | /* Head lock still held and bh's disabled */ | ||
285 | inet_bind_hash(sk, tb, port); | ||
286 | if (sk_unhashed(sk)) { | ||
287 | inet_sk(sk)->sport = htons(port); | ||
288 | __inet_hash(&tcp_hashinfo, sk, 0); | ||
289 | } | ||
290 | spin_unlock(&head->lock); | ||
291 | |||
292 | if (tw) { | ||
293 | inet_twsk_deschedule(tw, &tcp_death_row);; | ||
294 | inet_twsk_put(tw); | ||
295 | } | ||
296 | |||
297 | ret = 0; | ||
298 | goto out; | ||
299 | } | ||
300 | |||
301 | head = &tcp_hashinfo.bhash[inet_bhashfn(snum, tcp_hashinfo.bhash_size)]; | ||
302 | tb = inet_csk(sk)->icsk_bind_hash; | ||
303 | spin_lock_bh(&head->lock); | ||
304 | if (sk_head(&tb->owners) == sk && !sk->sk_bind_node.next) { | ||
305 | __inet_hash(&tcp_hashinfo, sk, 0); | ||
306 | spin_unlock_bh(&head->lock); | ||
307 | return 0; | ||
308 | } else { | ||
309 | spin_unlock(&head->lock); | ||
310 | /* No definite answer... Walk to established hash table */ | ||
311 | ret = __tcp_v4_check_established(sk, snum, NULL); | ||
312 | out: | ||
313 | local_bh_enable(); | ||
314 | return ret; | ||
315 | } | ||
316 | } | ||
317 | 154 | ||
318 | /* This will initiate an outgoing connection. */ | 155 | /* This will initiate an outgoing connection. */ |
319 | int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) | 156 | int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) |
@@ -383,9 +220,9 @@ int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) | |||
383 | inet->dport = usin->sin_port; | 220 | inet->dport = usin->sin_port; |
384 | inet->daddr = daddr; | 221 | inet->daddr = daddr; |
385 | 222 | ||
386 | tp->ext_header_len = 0; | 223 | inet_csk(sk)->icsk_ext_hdr_len = 0; |
387 | if (inet->opt) | 224 | if (inet->opt) |
388 | tp->ext_header_len = inet->opt->optlen; | 225 | inet_csk(sk)->icsk_ext_hdr_len = inet->opt->optlen; |
389 | 226 | ||
390 | tp->rx_opt.mss_clamp = 536; | 227 | tp->rx_opt.mss_clamp = 536; |
391 | 228 | ||
@@ -395,7 +232,7 @@ int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) | |||
395 | * complete initialization after this. | 232 | * complete initialization after this. |
396 | */ | 233 | */ |
397 | tcp_set_state(sk, TCP_SYN_SENT); | 234 | tcp_set_state(sk, TCP_SYN_SENT); |
398 | err = tcp_v4_hash_connect(sk); | 235 | err = inet_hash_connect(&tcp_death_row, sk); |
399 | if (err) | 236 | if (err) |
400 | goto failure; | 237 | goto failure; |
401 | 238 | ||
@@ -433,12 +270,10 @@ failure: | |||
433 | /* | 270 | /* |
434 | * This routine does path mtu discovery as defined in RFC1191. | 271 | * This routine does path mtu discovery as defined in RFC1191. |
435 | */ | 272 | */ |
436 | static inline void do_pmtu_discovery(struct sock *sk, struct iphdr *iph, | 273 | static void do_pmtu_discovery(struct sock *sk, struct iphdr *iph, u32 mtu) |
437 | u32 mtu) | ||
438 | { | 274 | { |
439 | struct dst_entry *dst; | 275 | struct dst_entry *dst; |
440 | struct inet_sock *inet = inet_sk(sk); | 276 | struct inet_sock *inet = inet_sk(sk); |
441 | struct tcp_sock *tp = tcp_sk(sk); | ||
442 | 277 | ||
443 | /* We are not interested in TCP_LISTEN and open_requests (SYN-ACKs | 278 | /* We are not interested in TCP_LISTEN and open_requests (SYN-ACKs |
444 | * send out by Linux are always <576bytes so they should go through | 279 | * send out by Linux are always <576bytes so they should go through |
@@ -467,7 +302,7 @@ static inline void do_pmtu_discovery(struct sock *sk, struct iphdr *iph, | |||
467 | mtu = dst_mtu(dst); | 302 | mtu = dst_mtu(dst); |
468 | 303 | ||
469 | if (inet->pmtudisc != IP_PMTUDISC_DONT && | 304 | if (inet->pmtudisc != IP_PMTUDISC_DONT && |
470 | tp->pmtu_cookie > mtu) { | 305 | inet_csk(sk)->icsk_pmtu_cookie > mtu) { |
471 | tcp_sync_mss(sk, mtu); | 306 | tcp_sync_mss(sk, mtu); |
472 | 307 | ||
473 | /* Resend the TCP packet because it's | 308 | /* Resend the TCP packet because it's |
@@ -644,10 +479,10 @@ out: | |||
644 | } | 479 | } |
645 | 480 | ||
646 | /* This routine computes an IPv4 TCP checksum. */ | 481 | /* This routine computes an IPv4 TCP checksum. */ |
647 | void tcp_v4_send_check(struct sock *sk, struct tcphdr *th, int len, | 482 | void tcp_v4_send_check(struct sock *sk, int len, struct sk_buff *skb) |
648 | struct sk_buff *skb) | ||
649 | { | 483 | { |
650 | struct inet_sock *inet = inet_sk(sk); | 484 | struct inet_sock *inet = inet_sk(sk); |
485 | struct tcphdr *th = skb->h.th; | ||
651 | 486 | ||
652 | if (skb->ip_summed == CHECKSUM_HW) { | 487 | if (skb->ip_summed == CHECKSUM_HW) { |
653 | th->check = ~tcp_v4_check(th, len, inet->saddr, inet->daddr, 0); | 488 | th->check = ~tcp_v4_check(th, len, inet->saddr, inet->daddr, 0); |
@@ -826,7 +661,8 @@ static void tcp_v4_reqsk_destructor(struct request_sock *req) | |||
826 | kfree(inet_rsk(req)->opt); | 661 | kfree(inet_rsk(req)->opt); |
827 | } | 662 | } |
828 | 663 | ||
829 | static inline void syn_flood_warning(struct sk_buff *skb) | 664 | #ifdef CONFIG_SYN_COOKIES |
665 | static void syn_flood_warning(struct sk_buff *skb) | ||
830 | { | 666 | { |
831 | static unsigned long warntime; | 667 | static unsigned long warntime; |
832 | 668 | ||
@@ -837,12 +673,13 @@ static inline void syn_flood_warning(struct sk_buff *skb) | |||
837 | ntohs(skb->h.th->dest)); | 673 | ntohs(skb->h.th->dest)); |
838 | } | 674 | } |
839 | } | 675 | } |
676 | #endif | ||
840 | 677 | ||
841 | /* | 678 | /* |
842 | * Save and compile IPv4 options into the request_sock if needed. | 679 | * Save and compile IPv4 options into the request_sock if needed. |
843 | */ | 680 | */ |
844 | static inline struct ip_options *tcp_v4_save_options(struct sock *sk, | 681 | static struct ip_options *tcp_v4_save_options(struct sock *sk, |
845 | struct sk_buff *skb) | 682 | struct sk_buff *skb) |
846 | { | 683 | { |
847 | struct ip_options *opt = &(IPCB(skb)->opt); | 684 | struct ip_options *opt = &(IPCB(skb)->opt); |
848 | struct ip_options *dopt = NULL; | 685 | struct ip_options *dopt = NULL; |
@@ -869,6 +706,11 @@ struct request_sock_ops tcp_request_sock_ops = { | |||
869 | .send_reset = tcp_v4_send_reset, | 706 | .send_reset = tcp_v4_send_reset, |
870 | }; | 707 | }; |
871 | 708 | ||
709 | static struct timewait_sock_ops tcp_timewait_sock_ops = { | ||
710 | .twsk_obj_size = sizeof(struct tcp_timewait_sock), | ||
711 | .twsk_unique = tcp_twsk_unique, | ||
712 | }; | ||
713 | |||
872 | int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb) | 714 | int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb) |
873 | { | 715 | { |
874 | struct inet_request_sock *ireq; | 716 | struct inet_request_sock *ireq; |
@@ -1053,9 +895,9 @@ struct sock *tcp_v4_syn_recv_sock(struct sock *sk, struct sk_buff *skb, | |||
1053 | ireq->opt = NULL; | 895 | ireq->opt = NULL; |
1054 | newinet->mc_index = inet_iif(skb); | 896 | newinet->mc_index = inet_iif(skb); |
1055 | newinet->mc_ttl = skb->nh.iph->ttl; | 897 | newinet->mc_ttl = skb->nh.iph->ttl; |
1056 | newtp->ext_header_len = 0; | 898 | inet_csk(newsk)->icsk_ext_hdr_len = 0; |
1057 | if (newinet->opt) | 899 | if (newinet->opt) |
1058 | newtp->ext_header_len = newinet->opt->optlen; | 900 | inet_csk(newsk)->icsk_ext_hdr_len = newinet->opt->optlen; |
1059 | newinet->id = newtp->write_seq ^ jiffies; | 901 | newinet->id = newtp->write_seq ^ jiffies; |
1060 | 902 | ||
1061 | tcp_sync_mss(newsk, dst_mtu(dst)); | 903 | tcp_sync_mss(newsk, dst_mtu(dst)); |
@@ -1314,16 +1156,6 @@ do_time_wait: | |||
1314 | goto discard_it; | 1156 | goto discard_it; |
1315 | } | 1157 | } |
1316 | 1158 | ||
1317 | static void v4_addr2sockaddr(struct sock *sk, struct sockaddr * uaddr) | ||
1318 | { | ||
1319 | struct sockaddr_in *sin = (struct sockaddr_in *) uaddr; | ||
1320 | struct inet_sock *inet = inet_sk(sk); | ||
1321 | |||
1322 | sin->sin_family = AF_INET; | ||
1323 | sin->sin_addr.s_addr = inet->daddr; | ||
1324 | sin->sin_port = inet->dport; | ||
1325 | } | ||
1326 | |||
1327 | /* VJ's idea. Save last timestamp seen from this destination | 1159 | /* VJ's idea. Save last timestamp seen from this destination |
1328 | * and hold it at least for normal timewait interval to use for duplicate | 1160 | * and hold it at least for normal timewait interval to use for duplicate |
1329 | * segment detection in subsequent connections, before they enter synchronized | 1161 | * segment detection in subsequent connections, before they enter synchronized |
@@ -1382,7 +1214,7 @@ int tcp_v4_tw_remember_stamp(struct inet_timewait_sock *tw) | |||
1382 | return 0; | 1214 | return 0; |
1383 | } | 1215 | } |
1384 | 1216 | ||
1385 | struct tcp_func ipv4_specific = { | 1217 | struct inet_connection_sock_af_ops ipv4_specific = { |
1386 | .queue_xmit = ip_queue_xmit, | 1218 | .queue_xmit = ip_queue_xmit, |
1387 | .send_check = tcp_v4_send_check, | 1219 | .send_check = tcp_v4_send_check, |
1388 | .rebuild_header = inet_sk_rebuild_header, | 1220 | .rebuild_header = inet_sk_rebuild_header, |
@@ -1392,7 +1224,7 @@ struct tcp_func ipv4_specific = { | |||
1392 | .net_header_len = sizeof(struct iphdr), | 1224 | .net_header_len = sizeof(struct iphdr), |
1393 | .setsockopt = ip_setsockopt, | 1225 | .setsockopt = ip_setsockopt, |
1394 | .getsockopt = ip_getsockopt, | 1226 | .getsockopt = ip_getsockopt, |
1395 | .addr2sockaddr = v4_addr2sockaddr, | 1227 | .addr2sockaddr = inet_csk_addr2sockaddr, |
1396 | .sockaddr_len = sizeof(struct sockaddr_in), | 1228 | .sockaddr_len = sizeof(struct sockaddr_in), |
1397 | }; | 1229 | }; |
1398 | 1230 | ||
@@ -1433,7 +1265,8 @@ static int tcp_v4_init_sock(struct sock *sk) | |||
1433 | sk->sk_write_space = sk_stream_write_space; | 1265 | sk->sk_write_space = sk_stream_write_space; |
1434 | sock_set_flag(sk, SOCK_USE_WRITE_QUEUE); | 1266 | sock_set_flag(sk, SOCK_USE_WRITE_QUEUE); |
1435 | 1267 | ||
1436 | tp->af_specific = &ipv4_specific; | 1268 | icsk->icsk_af_ops = &ipv4_specific; |
1269 | icsk->icsk_sync_mss = tcp_sync_mss; | ||
1437 | 1270 | ||
1438 | sk->sk_sndbuf = sysctl_tcp_wmem[1]; | 1271 | sk->sk_sndbuf = sysctl_tcp_wmem[1]; |
1439 | sk->sk_rcvbuf = sysctl_tcp_rmem[1]; | 1272 | sk->sk_rcvbuf = sysctl_tcp_rmem[1]; |
@@ -1989,7 +1822,7 @@ struct proto tcp_prot = { | |||
1989 | .sysctl_rmem = sysctl_tcp_rmem, | 1822 | .sysctl_rmem = sysctl_tcp_rmem, |
1990 | .max_header = MAX_TCP_HEADER, | 1823 | .max_header = MAX_TCP_HEADER, |
1991 | .obj_size = sizeof(struct tcp_sock), | 1824 | .obj_size = sizeof(struct tcp_sock), |
1992 | .twsk_obj_size = sizeof(struct tcp_timewait_sock), | 1825 | .twsk_prot = &tcp_timewait_sock_ops, |
1993 | .rsk_prot = &tcp_request_sock_ops, | 1826 | .rsk_prot = &tcp_request_sock_ops, |
1994 | }; | 1827 | }; |
1995 | 1828 | ||
diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c index 1b66a2ac4321..2b9b7f6c7f7c 100644 --- a/net/ipv4/tcp_minisocks.c +++ b/net/ipv4/tcp_minisocks.c | |||
@@ -274,18 +274,18 @@ kill: | |||
274 | void tcp_time_wait(struct sock *sk, int state, int timeo) | 274 | void tcp_time_wait(struct sock *sk, int state, int timeo) |
275 | { | 275 | { |
276 | struct inet_timewait_sock *tw = NULL; | 276 | struct inet_timewait_sock *tw = NULL; |
277 | const struct inet_connection_sock *icsk = inet_csk(sk); | ||
277 | const struct tcp_sock *tp = tcp_sk(sk); | 278 | const struct tcp_sock *tp = tcp_sk(sk); |
278 | int recycle_ok = 0; | 279 | int recycle_ok = 0; |
279 | 280 | ||
280 | if (tcp_death_row.sysctl_tw_recycle && tp->rx_opt.ts_recent_stamp) | 281 | if (tcp_death_row.sysctl_tw_recycle && tp->rx_opt.ts_recent_stamp) |
281 | recycle_ok = tp->af_specific->remember_stamp(sk); | 282 | recycle_ok = icsk->icsk_af_ops->remember_stamp(sk); |
282 | 283 | ||
283 | if (tcp_death_row.tw_count < tcp_death_row.sysctl_max_tw_buckets) | 284 | if (tcp_death_row.tw_count < tcp_death_row.sysctl_max_tw_buckets) |
284 | tw = inet_twsk_alloc(sk, state); | 285 | tw = inet_twsk_alloc(sk, state); |
285 | 286 | ||
286 | if (tw != NULL) { | 287 | if (tw != NULL) { |
287 | struct tcp_timewait_sock *tcptw = tcp_twsk((struct sock *)tw); | 288 | struct tcp_timewait_sock *tcptw = tcp_twsk((struct sock *)tw); |
288 | const struct inet_connection_sock *icsk = inet_csk(sk); | ||
289 | const int rto = (icsk->icsk_rto << 2) - (icsk->icsk_rto >> 1); | 289 | const int rto = (icsk->icsk_rto << 2) - (icsk->icsk_rto >> 1); |
290 | 290 | ||
291 | tw->tw_rcv_wscale = tp->rx_opt.rcv_wscale; | 291 | tw->tw_rcv_wscale = tp->rx_opt.rcv_wscale; |
@@ -298,10 +298,12 @@ void tcp_time_wait(struct sock *sk, int state, int timeo) | |||
298 | #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) | 298 | #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) |
299 | if (tw->tw_family == PF_INET6) { | 299 | if (tw->tw_family == PF_INET6) { |
300 | struct ipv6_pinfo *np = inet6_sk(sk); | 300 | struct ipv6_pinfo *np = inet6_sk(sk); |
301 | struct tcp6_timewait_sock *tcp6tw = tcp6_twsk((struct sock *)tw); | 301 | struct inet6_timewait_sock *tw6; |
302 | 302 | ||
303 | ipv6_addr_copy(&tcp6tw->tw_v6_daddr, &np->daddr); | 303 | tw->tw_ipv6_offset = inet6_tw_offset(sk->sk_prot); |
304 | ipv6_addr_copy(&tcp6tw->tw_v6_rcv_saddr, &np->rcv_saddr); | 304 | tw6 = inet6_twsk((struct sock *)tw); |
305 | ipv6_addr_copy(&tw6->tw_v6_daddr, &np->daddr); | ||
306 | ipv6_addr_copy(&tw6->tw_v6_rcv_saddr, &np->rcv_saddr); | ||
305 | tw->tw_ipv6only = np->ipv6only; | 307 | tw->tw_ipv6only = np->ipv6only; |
306 | } | 308 | } |
307 | #endif | 309 | #endif |
@@ -456,7 +458,6 @@ struct sock *tcp_check_req(struct sock *sk,struct sk_buff *skb, | |||
456 | struct request_sock **prev) | 458 | struct request_sock **prev) |
457 | { | 459 | { |
458 | struct tcphdr *th = skb->h.th; | 460 | struct tcphdr *th = skb->h.th; |
459 | struct tcp_sock *tp = tcp_sk(sk); | ||
460 | u32 flg = tcp_flag_word(th) & (TCP_FLAG_RST|TCP_FLAG_SYN|TCP_FLAG_ACK); | 461 | u32 flg = tcp_flag_word(th) & (TCP_FLAG_RST|TCP_FLAG_SYN|TCP_FLAG_ACK); |
461 | int paws_reject = 0; | 462 | int paws_reject = 0; |
462 | struct tcp_options_received tmp_opt; | 463 | struct tcp_options_received tmp_opt; |
@@ -613,7 +614,8 @@ struct sock *tcp_check_req(struct sock *sk,struct sk_buff *skb, | |||
613 | * ESTABLISHED STATE. If it will be dropped after | 614 | * ESTABLISHED STATE. If it will be dropped after |
614 | * socket is created, wait for troubles. | 615 | * socket is created, wait for troubles. |
615 | */ | 616 | */ |
616 | child = tp->af_specific->syn_recv_sock(sk, skb, req, NULL); | 617 | child = inet_csk(sk)->icsk_af_ops->syn_recv_sock(sk, skb, |
618 | req, NULL); | ||
617 | if (child == NULL) | 619 | if (child == NULL) |
618 | goto listen_overflow; | 620 | goto listen_overflow; |
619 | 621 | ||
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index b7325e0b406a..a7623ead39a8 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c | |||
@@ -51,8 +51,8 @@ int sysctl_tcp_retrans_collapse = 1; | |||
51 | */ | 51 | */ |
52 | int sysctl_tcp_tso_win_divisor = 3; | 52 | int sysctl_tcp_tso_win_divisor = 3; |
53 | 53 | ||
54 | static inline void update_send_head(struct sock *sk, struct tcp_sock *tp, | 54 | static void update_send_head(struct sock *sk, struct tcp_sock *tp, |
55 | struct sk_buff *skb) | 55 | struct sk_buff *skb) |
56 | { | 56 | { |
57 | sk->sk_send_head = skb->next; | 57 | sk->sk_send_head = skb->next; |
58 | if (sk->sk_send_head == (struct sk_buff *)&sk->sk_write_queue) | 58 | if (sk->sk_send_head == (struct sk_buff *)&sk->sk_write_queue) |
@@ -124,8 +124,8 @@ static void tcp_cwnd_restart(struct sock *sk, struct dst_entry *dst) | |||
124 | tp->snd_cwnd_used = 0; | 124 | tp->snd_cwnd_used = 0; |
125 | } | 125 | } |
126 | 126 | ||
127 | static inline void tcp_event_data_sent(struct tcp_sock *tp, | 127 | static void tcp_event_data_sent(struct tcp_sock *tp, |
128 | struct sk_buff *skb, struct sock *sk) | 128 | struct sk_buff *skb, struct sock *sk) |
129 | { | 129 | { |
130 | struct inet_connection_sock *icsk = inet_csk(sk); | 130 | struct inet_connection_sock *icsk = inet_csk(sk); |
131 | const u32 now = tcp_time_stamp; | 131 | const u32 now = tcp_time_stamp; |
@@ -142,7 +142,7 @@ static inline void tcp_event_data_sent(struct tcp_sock *tp, | |||
142 | icsk->icsk_ack.pingpong = 1; | 142 | icsk->icsk_ack.pingpong = 1; |
143 | } | 143 | } |
144 | 144 | ||
145 | static __inline__ void tcp_event_ack_sent(struct sock *sk, unsigned int pkts) | 145 | static inline void tcp_event_ack_sent(struct sock *sk, unsigned int pkts) |
146 | { | 146 | { |
147 | tcp_dec_quickack_mode(sk, pkts); | 147 | tcp_dec_quickack_mode(sk, pkts); |
148 | inet_csk_clear_xmit_timer(sk, ICSK_TIME_DACK); | 148 | inet_csk_clear_xmit_timer(sk, ICSK_TIME_DACK); |
@@ -212,7 +212,7 @@ void tcp_select_initial_window(int __space, __u32 mss, | |||
212 | * value can be stuffed directly into th->window for an outgoing | 212 | * value can be stuffed directly into th->window for an outgoing |
213 | * frame. | 213 | * frame. |
214 | */ | 214 | */ |
215 | static __inline__ u16 tcp_select_window(struct sock *sk) | 215 | static u16 tcp_select_window(struct sock *sk) |
216 | { | 216 | { |
217 | struct tcp_sock *tp = tcp_sk(sk); | 217 | struct tcp_sock *tp = tcp_sk(sk); |
218 | u32 cur_win = tcp_receive_window(tp); | 218 | u32 cur_win = tcp_receive_window(tp); |
@@ -250,6 +250,75 @@ static __inline__ u16 tcp_select_window(struct sock *sk) | |||
250 | return new_win; | 250 | return new_win; |
251 | } | 251 | } |
252 | 252 | ||
253 | static void tcp_build_and_update_options(__u32 *ptr, struct tcp_sock *tp, | ||
254 | __u32 tstamp) | ||
255 | { | ||
256 | if (tp->rx_opt.tstamp_ok) { | ||
257 | *ptr++ = __constant_htonl((TCPOPT_NOP << 24) | | ||
258 | (TCPOPT_NOP << 16) | | ||
259 | (TCPOPT_TIMESTAMP << 8) | | ||
260 | TCPOLEN_TIMESTAMP); | ||
261 | *ptr++ = htonl(tstamp); | ||
262 | *ptr++ = htonl(tp->rx_opt.ts_recent); | ||
263 | } | ||
264 | if (tp->rx_opt.eff_sacks) { | ||
265 | struct tcp_sack_block *sp = tp->rx_opt.dsack ? tp->duplicate_sack : tp->selective_acks; | ||
266 | int this_sack; | ||
267 | |||
268 | *ptr++ = htonl((TCPOPT_NOP << 24) | | ||
269 | (TCPOPT_NOP << 16) | | ||
270 | (TCPOPT_SACK << 8) | | ||
271 | (TCPOLEN_SACK_BASE + (tp->rx_opt.eff_sacks * | ||
272 | TCPOLEN_SACK_PERBLOCK))); | ||
273 | for(this_sack = 0; this_sack < tp->rx_opt.eff_sacks; this_sack++) { | ||
274 | *ptr++ = htonl(sp[this_sack].start_seq); | ||
275 | *ptr++ = htonl(sp[this_sack].end_seq); | ||
276 | } | ||
277 | if (tp->rx_opt.dsack) { | ||
278 | tp->rx_opt.dsack = 0; | ||
279 | tp->rx_opt.eff_sacks--; | ||
280 | } | ||
281 | } | ||
282 | } | ||
283 | |||
284 | /* Construct a tcp options header for a SYN or SYN_ACK packet. | ||
285 | * If this is every changed make sure to change the definition of | ||
286 | * MAX_SYN_SIZE to match the new maximum number of options that you | ||
287 | * can generate. | ||
288 | */ | ||
289 | static void tcp_syn_build_options(__u32 *ptr, int mss, int ts, int sack, | ||
290 | int offer_wscale, int wscale, __u32 tstamp, | ||
291 | __u32 ts_recent) | ||
292 | { | ||
293 | /* We always get an MSS option. | ||
294 | * The option bytes which will be seen in normal data | ||
295 | * packets should timestamps be used, must be in the MSS | ||
296 | * advertised. But we subtract them from tp->mss_cache so | ||
297 | * that calculations in tcp_sendmsg are simpler etc. | ||
298 | * So account for this fact here if necessary. If we | ||
299 | * don't do this correctly, as a receiver we won't | ||
300 | * recognize data packets as being full sized when we | ||
301 | * should, and thus we won't abide by the delayed ACK | ||
302 | * rules correctly. | ||
303 | * SACKs don't matter, we never delay an ACK when we | ||
304 | * have any of those going out. | ||
305 | */ | ||
306 | *ptr++ = htonl((TCPOPT_MSS << 24) | (TCPOLEN_MSS << 16) | mss); | ||
307 | if (ts) { | ||
308 | if(sack) | ||
309 | *ptr++ = __constant_htonl((TCPOPT_SACK_PERM << 24) | (TCPOLEN_SACK_PERM << 16) | | ||
310 | (TCPOPT_TIMESTAMP << 8) | TCPOLEN_TIMESTAMP); | ||
311 | else | ||
312 | *ptr++ = __constant_htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) | | ||
313 | (TCPOPT_TIMESTAMP << 8) | TCPOLEN_TIMESTAMP); | ||
314 | *ptr++ = htonl(tstamp); /* TSVAL */ | ||
315 | *ptr++ = htonl(ts_recent); /* TSECR */ | ||
316 | } else if(sack) | ||
317 | *ptr++ = __constant_htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) | | ||
318 | (TCPOPT_SACK_PERM << 8) | TCPOLEN_SACK_PERM); | ||
319 | if (offer_wscale) | ||
320 | *ptr++ = htonl((TCPOPT_NOP << 24) | (TCPOPT_WINDOW << 16) | (TCPOLEN_WINDOW << 8) | (wscale)); | ||
321 | } | ||
253 | 322 | ||
254 | /* This routine actually transmits TCP packets queued in by | 323 | /* This routine actually transmits TCP packets queued in by |
255 | * tcp_do_sendmsg(). This is used by both the initial | 324 | * tcp_do_sendmsg(). This is used by both the initial |
@@ -371,7 +440,7 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it, | |||
371 | TCP_ECN_send(sk, tp, skb, tcp_header_size); | 440 | TCP_ECN_send(sk, tp, skb, tcp_header_size); |
372 | } | 441 | } |
373 | 442 | ||
374 | tp->af_specific->send_check(sk, th, skb->len, skb); | 443 | icsk->icsk_af_ops->send_check(sk, skb->len, skb); |
375 | 444 | ||
376 | if (likely(tcb->flags & TCPCB_FLAG_ACK)) | 445 | if (likely(tcb->flags & TCPCB_FLAG_ACK)) |
377 | tcp_event_ack_sent(sk, tcp_skb_pcount(skb)); | 446 | tcp_event_ack_sent(sk, tcp_skb_pcount(skb)); |
@@ -381,7 +450,7 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it, | |||
381 | 450 | ||
382 | TCP_INC_STATS(TCP_MIB_OUTSEGS); | 451 | TCP_INC_STATS(TCP_MIB_OUTSEGS); |
383 | 452 | ||
384 | err = tp->af_specific->queue_xmit(skb, 0); | 453 | err = icsk->icsk_af_ops->queue_xmit(skb, 0); |
385 | if (unlikely(err <= 0)) | 454 | if (unlikely(err <= 0)) |
386 | return err; | 455 | return err; |
387 | 456 | ||
@@ -621,7 +690,7 @@ int tcp_trim_head(struct sock *sk, struct sk_buff *skb, u32 len) | |||
621 | It is minimum of user_mss and mss received with SYN. | 690 | It is minimum of user_mss and mss received with SYN. |
622 | It also does not include TCP options. | 691 | It also does not include TCP options. |
623 | 692 | ||
624 | tp->pmtu_cookie is last pmtu, seen by this function. | 693 | inet_csk(sk)->icsk_pmtu_cookie is last pmtu, seen by this function. |
625 | 694 | ||
626 | tp->mss_cache is current effective sending mss, including | 695 | tp->mss_cache is current effective sending mss, including |
627 | all tcp options except for SACKs. It is evaluated, | 696 | all tcp options except for SACKs. It is evaluated, |
@@ -631,26 +700,26 @@ int tcp_trim_head(struct sock *sk, struct sk_buff *skb, u32 len) | |||
631 | NOTE1. rfc1122 clearly states that advertised MSS | 700 | NOTE1. rfc1122 clearly states that advertised MSS |
632 | DOES NOT include either tcp or ip options. | 701 | DOES NOT include either tcp or ip options. |
633 | 702 | ||
634 | NOTE2. tp->pmtu_cookie and tp->mss_cache are READ ONLY outside | 703 | NOTE2. inet_csk(sk)->icsk_pmtu_cookie and tp->mss_cache |
635 | this function. --ANK (980731) | 704 | are READ ONLY outside this function. --ANK (980731) |
636 | */ | 705 | */ |
637 | 706 | ||
638 | unsigned int tcp_sync_mss(struct sock *sk, u32 pmtu) | 707 | unsigned int tcp_sync_mss(struct sock *sk, u32 pmtu) |
639 | { | 708 | { |
640 | struct tcp_sock *tp = tcp_sk(sk); | 709 | struct tcp_sock *tp = tcp_sk(sk); |
641 | int mss_now; | 710 | struct inet_connection_sock *icsk = inet_csk(sk); |
642 | |||
643 | /* Calculate base mss without TCP options: | 711 | /* Calculate base mss without TCP options: |
644 | It is MMS_S - sizeof(tcphdr) of rfc1122 | 712 | It is MMS_S - sizeof(tcphdr) of rfc1122 |
645 | */ | 713 | */ |
646 | mss_now = pmtu - tp->af_specific->net_header_len - sizeof(struct tcphdr); | 714 | int mss_now = (pmtu - icsk->icsk_af_ops->net_header_len - |
715 | sizeof(struct tcphdr)); | ||
647 | 716 | ||
648 | /* Clamp it (mss_clamp does not include tcp options) */ | 717 | /* Clamp it (mss_clamp does not include tcp options) */ |
649 | if (mss_now > tp->rx_opt.mss_clamp) | 718 | if (mss_now > tp->rx_opt.mss_clamp) |
650 | mss_now = tp->rx_opt.mss_clamp; | 719 | mss_now = tp->rx_opt.mss_clamp; |
651 | 720 | ||
652 | /* Now subtract optional transport overhead */ | 721 | /* Now subtract optional transport overhead */ |
653 | mss_now -= tp->ext_header_len; | 722 | mss_now -= icsk->icsk_ext_hdr_len; |
654 | 723 | ||
655 | /* Then reserve room for full set of TCP options and 8 bytes of data */ | 724 | /* Then reserve room for full set of TCP options and 8 bytes of data */ |
656 | if (mss_now < 48) | 725 | if (mss_now < 48) |
@@ -664,7 +733,7 @@ unsigned int tcp_sync_mss(struct sock *sk, u32 pmtu) | |||
664 | mss_now = max((tp->max_window>>1), 68U - tp->tcp_header_len); | 733 | mss_now = max((tp->max_window>>1), 68U - tp->tcp_header_len); |
665 | 734 | ||
666 | /* And store cached results */ | 735 | /* And store cached results */ |
667 | tp->pmtu_cookie = pmtu; | 736 | icsk->icsk_pmtu_cookie = pmtu; |
668 | tp->mss_cache = mss_now; | 737 | tp->mss_cache = mss_now; |
669 | 738 | ||
670 | return mss_now; | 739 | return mss_now; |
@@ -694,7 +763,7 @@ unsigned int tcp_current_mss(struct sock *sk, int large_allowed) | |||
694 | 763 | ||
695 | if (dst) { | 764 | if (dst) { |
696 | u32 mtu = dst_mtu(dst); | 765 | u32 mtu = dst_mtu(dst); |
697 | if (mtu != tp->pmtu_cookie) | 766 | if (mtu != inet_csk(sk)->icsk_pmtu_cookie) |
698 | mss_now = tcp_sync_mss(sk, mtu); | 767 | mss_now = tcp_sync_mss(sk, mtu); |
699 | } | 768 | } |
700 | 769 | ||
@@ -705,9 +774,10 @@ unsigned int tcp_current_mss(struct sock *sk, int large_allowed) | |||
705 | xmit_size_goal = mss_now; | 774 | xmit_size_goal = mss_now; |
706 | 775 | ||
707 | if (doing_tso) { | 776 | if (doing_tso) { |
708 | xmit_size_goal = 65535 - | 777 | xmit_size_goal = (65535 - |
709 | tp->af_specific->net_header_len - | 778 | inet_csk(sk)->icsk_af_ops->net_header_len - |
710 | tp->ext_header_len - tp->tcp_header_len; | 779 | inet_csk(sk)->icsk_ext_hdr_len - |
780 | tp->tcp_header_len); | ||
711 | 781 | ||
712 | if (tp->max_window && | 782 | if (tp->max_window && |
713 | (xmit_size_goal > (tp->max_window >> 1))) | 783 | (xmit_size_goal > (tp->max_window >> 1))) |
@@ -723,7 +793,7 @@ unsigned int tcp_current_mss(struct sock *sk, int large_allowed) | |||
723 | 793 | ||
724 | /* Congestion window validation. (RFC2861) */ | 794 | /* Congestion window validation. (RFC2861) */ |
725 | 795 | ||
726 | static inline void tcp_cwnd_validate(struct sock *sk, struct tcp_sock *tp) | 796 | static void tcp_cwnd_validate(struct sock *sk, struct tcp_sock *tp) |
727 | { | 797 | { |
728 | __u32 packets_out = tp->packets_out; | 798 | __u32 packets_out = tp->packets_out; |
729 | 799 | ||
@@ -772,7 +842,7 @@ static inline unsigned int tcp_cwnd_test(struct tcp_sock *tp, struct sk_buff *sk | |||
772 | /* This must be invoked the first time we consider transmitting | 842 | /* This must be invoked the first time we consider transmitting |
773 | * SKB onto the wire. | 843 | * SKB onto the wire. |
774 | */ | 844 | */ |
775 | static inline int tcp_init_tso_segs(struct sock *sk, struct sk_buff *skb, unsigned int mss_now) | 845 | static int tcp_init_tso_segs(struct sock *sk, struct sk_buff *skb, unsigned int mss_now) |
776 | { | 846 | { |
777 | int tso_segs = tcp_skb_pcount(skb); | 847 | int tso_segs = tcp_skb_pcount(skb); |
778 | 848 | ||
@@ -1422,7 +1492,7 @@ int tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb) | |||
1422 | (sysctl_tcp_retrans_collapse != 0)) | 1492 | (sysctl_tcp_retrans_collapse != 0)) |
1423 | tcp_retrans_try_collapse(sk, skb, cur_mss); | 1493 | tcp_retrans_try_collapse(sk, skb, cur_mss); |
1424 | 1494 | ||
1425 | if(tp->af_specific->rebuild_header(sk)) | 1495 | if (inet_csk(sk)->icsk_af_ops->rebuild_header(sk)) |
1426 | return -EHOSTUNREACH; /* Routing failure or similar. */ | 1496 | return -EHOSTUNREACH; /* Routing failure or similar. */ |
1427 | 1497 | ||
1428 | /* Some Solaris stacks overoptimize and ignore the FIN on a | 1498 | /* Some Solaris stacks overoptimize and ignore the FIN on a |
@@ -1793,7 +1863,7 @@ struct sk_buff * tcp_make_synack(struct sock *sk, struct dst_entry *dst, | |||
1793 | /* | 1863 | /* |
1794 | * Do all connect socket setups that can be done AF independent. | 1864 | * Do all connect socket setups that can be done AF independent. |
1795 | */ | 1865 | */ |
1796 | static inline void tcp_connect_init(struct sock *sk) | 1866 | static void tcp_connect_init(struct sock *sk) |
1797 | { | 1867 | { |
1798 | struct dst_entry *dst = __sk_dst_get(sk); | 1868 | struct dst_entry *dst = __sk_dst_get(sk); |
1799 | struct tcp_sock *tp = tcp_sk(sk); | 1869 | struct tcp_sock *tp = tcp_sk(sk); |
diff --git a/net/ipv4/tcp_vegas.c b/net/ipv4/tcp_vegas.c index 13e7e6e8df16..3b7403495052 100644 --- a/net/ipv4/tcp_vegas.c +++ b/net/ipv4/tcp_vegas.c | |||
@@ -330,6 +330,10 @@ static void tcp_vegas_cong_avoid(struct sock *sk, u32 ack, | |||
330 | vegas->cntRTT = 0; | 330 | vegas->cntRTT = 0; |
331 | vegas->minRTT = 0x7fffffff; | 331 | vegas->minRTT = 0x7fffffff; |
332 | } | 332 | } |
333 | /* Use normal slow start */ | ||
334 | else if (tp->snd_cwnd <= tp->snd_ssthresh) | ||
335 | tcp_slow_start(tp); | ||
336 | |||
333 | } | 337 | } |
334 | 338 | ||
335 | /* Extract info for Tcp socket info provided via netlink. */ | 339 | /* Extract info for Tcp socket info provided via netlink. */ |
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 2422a5f7195d..223abaa72bc5 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c | |||
@@ -86,6 +86,7 @@ | |||
86 | #include <linux/module.h> | 86 | #include <linux/module.h> |
87 | #include <linux/socket.h> | 87 | #include <linux/socket.h> |
88 | #include <linux/sockios.h> | 88 | #include <linux/sockios.h> |
89 | #include <linux/igmp.h> | ||
89 | #include <linux/in.h> | 90 | #include <linux/in.h> |
90 | #include <linux/errno.h> | 91 | #include <linux/errno.h> |
91 | #include <linux/timer.h> | 92 | #include <linux/timer.h> |
@@ -846,20 +847,7 @@ out: | |||
846 | csum_copy_err: | 847 | csum_copy_err: |
847 | UDP_INC_STATS_BH(UDP_MIB_INERRORS); | 848 | UDP_INC_STATS_BH(UDP_MIB_INERRORS); |
848 | 849 | ||
849 | /* Clear queue. */ | 850 | skb_kill_datagram(sk, skb, flags); |
850 | if (flags&MSG_PEEK) { | ||
851 | int clear = 0; | ||
852 | spin_lock_bh(&sk->sk_receive_queue.lock); | ||
853 | if (skb == skb_peek(&sk->sk_receive_queue)) { | ||
854 | __skb_unlink(skb, &sk->sk_receive_queue); | ||
855 | clear = 1; | ||
856 | } | ||
857 | spin_unlock_bh(&sk->sk_receive_queue.lock); | ||
858 | if (clear) | ||
859 | kfree_skb(skb); | ||
860 | } | ||
861 | |||
862 | skb_free_datagram(sk, skb); | ||
863 | 851 | ||
864 | if (noblock) | 852 | if (noblock) |
865 | return -EAGAIN; | 853 | return -EAGAIN; |
@@ -1094,7 +1082,7 @@ static int udp_v4_mcast_deliver(struct sk_buff *skb, struct udphdr *uh, | |||
1094 | * Otherwise, csum completion requires chacksumming packet body, | 1082 | * Otherwise, csum completion requires chacksumming packet body, |
1095 | * including udp header and folding it to skb->csum. | 1083 | * including udp header and folding it to skb->csum. |
1096 | */ | 1084 | */ |
1097 | static int udp_checksum_init(struct sk_buff *skb, struct udphdr *uh, | 1085 | static void udp_checksum_init(struct sk_buff *skb, struct udphdr *uh, |
1098 | unsigned short ulen, u32 saddr, u32 daddr) | 1086 | unsigned short ulen, u32 saddr, u32 daddr) |
1099 | { | 1087 | { |
1100 | if (uh->check == 0) { | 1088 | if (uh->check == 0) { |
@@ -1108,7 +1096,6 @@ static int udp_checksum_init(struct sk_buff *skb, struct udphdr *uh, | |||
1108 | /* Probably, we should checksum udp header (it should be in cache | 1096 | /* Probably, we should checksum udp header (it should be in cache |
1109 | * in any case) and data in tiny packets (< rx copybreak). | 1097 | * in any case) and data in tiny packets (< rx copybreak). |
1110 | */ | 1098 | */ |
1111 | return 0; | ||
1112 | } | 1099 | } |
1113 | 1100 | ||
1114 | /* | 1101 | /* |
@@ -1141,8 +1128,7 @@ int udp_rcv(struct sk_buff *skb) | |||
1141 | if (pskb_trim_rcsum(skb, ulen)) | 1128 | if (pskb_trim_rcsum(skb, ulen)) |
1142 | goto short_packet; | 1129 | goto short_packet; |
1143 | 1130 | ||
1144 | if (udp_checksum_init(skb, uh, ulen, saddr, daddr) < 0) | 1131 | udp_checksum_init(skb, uh, ulen, saddr, daddr); |
1145 | goto csum_error; | ||
1146 | 1132 | ||
1147 | if(rt->rt_flags & (RTCF_BROADCAST|RTCF_MULTICAST)) | 1133 | if(rt->rt_flags & (RTCF_BROADCAST|RTCF_MULTICAST)) |
1148 | return udp_v4_mcast_deliver(skb, uh, saddr, daddr); | 1134 | return udp_v4_mcast_deliver(skb, uh, saddr, daddr); |
diff --git a/net/ipv6/Makefile b/net/ipv6/Makefile index 6460eec834b7..9601fd7f9d66 100644 --- a/net/ipv6/Makefile +++ b/net/ipv6/Makefile | |||
@@ -8,7 +8,8 @@ ipv6-objs := af_inet6.o anycast.o ip6_output.o ip6_input.o addrconf.o sit.o \ | |||
8 | route.o ip6_fib.o ipv6_sockglue.o ndisc.o udp.o raw.o \ | 8 | route.o ip6_fib.o ipv6_sockglue.o ndisc.o udp.o raw.o \ |
9 | protocol.o icmp.o mcast.o reassembly.o tcp_ipv6.o \ | 9 | protocol.o icmp.o mcast.o reassembly.o tcp_ipv6.o \ |
10 | exthdrs.o sysctl_net_ipv6.o datagram.o proc.o \ | 10 | exthdrs.o sysctl_net_ipv6.o datagram.o proc.o \ |
11 | ip6_flowlabel.o ipv6_syms.o netfilter.o | 11 | ip6_flowlabel.o ipv6_syms.o netfilter.o \ |
12 | inet6_connection_sock.o | ||
12 | 13 | ||
13 | ipv6-$(CONFIG_XFRM) += xfrm6_policy.o xfrm6_state.o xfrm6_input.o \ | 14 | ipv6-$(CONFIG_XFRM) += xfrm6_policy.o xfrm6_state.o xfrm6_input.o \ |
14 | xfrm6_output.o | 15 | xfrm6_output.o |
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index a60585fd85ad..704fb73e6c5f 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c | |||
@@ -1195,7 +1195,7 @@ struct inet6_ifaddr * ipv6_get_ifaddr(struct in6_addr *addr, struct net_device * | |||
1195 | int ipv6_rcv_saddr_equal(const struct sock *sk, const struct sock *sk2) | 1195 | int ipv6_rcv_saddr_equal(const struct sock *sk, const struct sock *sk2) |
1196 | { | 1196 | { |
1197 | const struct in6_addr *sk_rcv_saddr6 = &inet6_sk(sk)->rcv_saddr; | 1197 | const struct in6_addr *sk_rcv_saddr6 = &inet6_sk(sk)->rcv_saddr; |
1198 | const struct in6_addr *sk2_rcv_saddr6 = tcp_v6_rcv_saddr(sk2); | 1198 | const struct in6_addr *sk2_rcv_saddr6 = inet6_rcv_saddr(sk2); |
1199 | u32 sk_rcv_saddr = inet_sk(sk)->rcv_saddr; | 1199 | u32 sk_rcv_saddr = inet_sk(sk)->rcv_saddr; |
1200 | u32 sk2_rcv_saddr = inet_rcv_saddr(sk2); | 1200 | u32 sk2_rcv_saddr = inet_rcv_saddr(sk2); |
1201 | int sk_ipv6only = ipv6_only_sock(sk); | 1201 | int sk_ipv6only = ipv6_only_sock(sk); |
diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c index d9546380fa04..68afc53be662 100644 --- a/net/ipv6/af_inet6.c +++ b/net/ipv6/af_inet6.c | |||
@@ -167,6 +167,7 @@ lookup_protocol: | |||
167 | sk->sk_reuse = 1; | 167 | sk->sk_reuse = 1; |
168 | 168 | ||
169 | inet = inet_sk(sk); | 169 | inet = inet_sk(sk); |
170 | inet->is_icsk = INET_PROTOSW_ICSK & answer_flags; | ||
170 | 171 | ||
171 | if (SOCK_RAW == sock->type) { | 172 | if (SOCK_RAW == sock->type) { |
172 | inet->num = protocol; | 173 | inet->num = protocol; |
@@ -389,6 +390,8 @@ int inet6_destroy_sock(struct sock *sk) | |||
389 | return 0; | 390 | return 0; |
390 | } | 391 | } |
391 | 392 | ||
393 | EXPORT_SYMBOL_GPL(inet6_destroy_sock); | ||
394 | |||
392 | /* | 395 | /* |
393 | * This does both peername and sockname. | 396 | * This does both peername and sockname. |
394 | */ | 397 | */ |
@@ -431,7 +434,6 @@ int inet6_getname(struct socket *sock, struct sockaddr *uaddr, | |||
431 | int inet6_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) | 434 | int inet6_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) |
432 | { | 435 | { |
433 | struct sock *sk = sock->sk; | 436 | struct sock *sk = sock->sk; |
434 | int err = -EINVAL; | ||
435 | 437 | ||
436 | switch(cmd) | 438 | switch(cmd) |
437 | { | 439 | { |
@@ -450,16 +452,15 @@ int inet6_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) | |||
450 | case SIOCSIFDSTADDR: | 452 | case SIOCSIFDSTADDR: |
451 | return addrconf_set_dstaddr((void __user *) arg); | 453 | return addrconf_set_dstaddr((void __user *) arg); |
452 | default: | 454 | default: |
453 | if (!sk->sk_prot->ioctl || | 455 | if (!sk->sk_prot->ioctl) |
454 | (err = sk->sk_prot->ioctl(sk, cmd, arg)) == -ENOIOCTLCMD) | 456 | return -ENOIOCTLCMD; |
455 | return(dev_ioctl(cmd,(void __user *) arg)); | 457 | return sk->sk_prot->ioctl(sk, cmd, arg); |
456 | return err; | ||
457 | } | 458 | } |
458 | /*NOTREACHED*/ | 459 | /*NOTREACHED*/ |
459 | return(0); | 460 | return(0); |
460 | } | 461 | } |
461 | 462 | ||
462 | struct proto_ops inet6_stream_ops = { | 463 | const struct proto_ops inet6_stream_ops = { |
463 | .family = PF_INET6, | 464 | .family = PF_INET6, |
464 | .owner = THIS_MODULE, | 465 | .owner = THIS_MODULE, |
465 | .release = inet6_release, | 466 | .release = inet6_release, |
@@ -480,7 +481,7 @@ struct proto_ops inet6_stream_ops = { | |||
480 | .sendpage = tcp_sendpage | 481 | .sendpage = tcp_sendpage |
481 | }; | 482 | }; |
482 | 483 | ||
483 | struct proto_ops inet6_dgram_ops = { | 484 | const struct proto_ops inet6_dgram_ops = { |
484 | .family = PF_INET6, | 485 | .family = PF_INET6, |
485 | .owner = THIS_MODULE, | 486 | .owner = THIS_MODULE, |
486 | .release = inet6_release, | 487 | .release = inet6_release, |
@@ -508,7 +509,7 @@ static struct net_proto_family inet6_family_ops = { | |||
508 | }; | 509 | }; |
509 | 510 | ||
510 | /* Same as inet6_dgram_ops, sans udp_poll. */ | 511 | /* Same as inet6_dgram_ops, sans udp_poll. */ |
511 | static struct proto_ops inet6_sockraw_ops = { | 512 | static const struct proto_ops inet6_sockraw_ops = { |
512 | .family = PF_INET6, | 513 | .family = PF_INET6, |
513 | .owner = THIS_MODULE, | 514 | .owner = THIS_MODULE, |
514 | .release = inet6_release, | 515 | .release = inet6_release, |
@@ -609,6 +610,79 @@ inet6_unregister_protosw(struct inet_protosw *p) | |||
609 | } | 610 | } |
610 | } | 611 | } |
611 | 612 | ||
613 | int inet6_sk_rebuild_header(struct sock *sk) | ||
614 | { | ||
615 | int err; | ||
616 | struct dst_entry *dst; | ||
617 | struct ipv6_pinfo *np = inet6_sk(sk); | ||
618 | |||
619 | dst = __sk_dst_check(sk, np->dst_cookie); | ||
620 | |||
621 | if (dst == NULL) { | ||
622 | struct inet_sock *inet = inet_sk(sk); | ||
623 | struct in6_addr *final_p = NULL, final; | ||
624 | struct flowi fl; | ||
625 | |||
626 | memset(&fl, 0, sizeof(fl)); | ||
627 | fl.proto = sk->sk_protocol; | ||
628 | ipv6_addr_copy(&fl.fl6_dst, &np->daddr); | ||
629 | ipv6_addr_copy(&fl.fl6_src, &np->saddr); | ||
630 | fl.fl6_flowlabel = np->flow_label; | ||
631 | fl.oif = sk->sk_bound_dev_if; | ||
632 | fl.fl_ip_dport = inet->dport; | ||
633 | fl.fl_ip_sport = inet->sport; | ||
634 | |||
635 | if (np->opt && np->opt->srcrt) { | ||
636 | struct rt0_hdr *rt0 = (struct rt0_hdr *) np->opt->srcrt; | ||
637 | ipv6_addr_copy(&final, &fl.fl6_dst); | ||
638 | ipv6_addr_copy(&fl.fl6_dst, rt0->addr); | ||
639 | final_p = &final; | ||
640 | } | ||
641 | |||
642 | err = ip6_dst_lookup(sk, &dst, &fl); | ||
643 | if (err) { | ||
644 | sk->sk_route_caps = 0; | ||
645 | return err; | ||
646 | } | ||
647 | if (final_p) | ||
648 | ipv6_addr_copy(&fl.fl6_dst, final_p); | ||
649 | |||
650 | if ((err = xfrm_lookup(&dst, &fl, sk, 0)) < 0) { | ||
651 | sk->sk_err_soft = -err; | ||
652 | return err; | ||
653 | } | ||
654 | |||
655 | ip6_dst_store(sk, dst, NULL); | ||
656 | sk->sk_route_caps = dst->dev->features & | ||
657 | ~(NETIF_F_IP_CSUM | NETIF_F_TSO); | ||
658 | } | ||
659 | |||
660 | return 0; | ||
661 | } | ||
662 | |||
663 | EXPORT_SYMBOL_GPL(inet6_sk_rebuild_header); | ||
664 | |||
665 | int ipv6_opt_accepted(struct sock *sk, struct sk_buff *skb) | ||
666 | { | ||
667 | struct ipv6_pinfo *np = inet6_sk(sk); | ||
668 | struct inet6_skb_parm *opt = IP6CB(skb); | ||
669 | |||
670 | if (np->rxopt.all) { | ||
671 | if ((opt->hop && (np->rxopt.bits.hopopts || | ||
672 | np->rxopt.bits.ohopopts)) || | ||
673 | ((IPV6_FLOWINFO_MASK & *(u32*)skb->nh.raw) && | ||
674 | np->rxopt.bits.rxflow) || | ||
675 | (opt->srcrt && (np->rxopt.bits.srcrt || | ||
676 | np->rxopt.bits.osrcrt)) || | ||
677 | ((opt->dst1 || opt->dst0) && | ||
678 | (np->rxopt.bits.dstopts || np->rxopt.bits.odstopts))) | ||
679 | return 1; | ||
680 | } | ||
681 | return 0; | ||
682 | } | ||
683 | |||
684 | EXPORT_SYMBOL_GPL(ipv6_opt_accepted); | ||
685 | |||
612 | int | 686 | int |
613 | snmp6_mib_init(void *ptr[2], size_t mibsize, size_t mibalign) | 687 | snmp6_mib_init(void *ptr[2], size_t mibsize, size_t mibalign) |
614 | { | 688 | { |
diff --git a/net/ipv6/ah6.c b/net/ipv6/ah6.c index f3629730eb15..13cc7f895583 100644 --- a/net/ipv6/ah6.c +++ b/net/ipv6/ah6.c | |||
@@ -33,6 +33,7 @@ | |||
33 | #include <linux/string.h> | 33 | #include <linux/string.h> |
34 | #include <net/icmp.h> | 34 | #include <net/icmp.h> |
35 | #include <net/ipv6.h> | 35 | #include <net/ipv6.h> |
36 | #include <net/protocol.h> | ||
36 | #include <net/xfrm.h> | 37 | #include <net/xfrm.h> |
37 | #include <asm/scatterlist.h> | 38 | #include <asm/scatterlist.h> |
38 | 39 | ||
diff --git a/net/ipv6/esp6.c b/net/ipv6/esp6.c index 8bfbe9970793..6de8ee1a5ad9 100644 --- a/net/ipv6/esp6.c +++ b/net/ipv6/esp6.c | |||
@@ -36,6 +36,7 @@ | |||
36 | #include <linux/random.h> | 36 | #include <linux/random.h> |
37 | #include <net/icmp.h> | 37 | #include <net/icmp.h> |
38 | #include <net/ipv6.h> | 38 | #include <net/ipv6.h> |
39 | #include <net/protocol.h> | ||
39 | #include <linux/icmpv6.h> | 40 | #include <linux/icmpv6.h> |
40 | 41 | ||
41 | static int esp6_output(struct xfrm_state *x, struct sk_buff *skb) | 42 | static int esp6_output(struct xfrm_state *x, struct sk_buff *skb) |
diff --git a/net/ipv6/exthdrs.c b/net/ipv6/exthdrs.c index be6faf311387..113374dc342c 100644 --- a/net/ipv6/exthdrs.c +++ b/net/ipv6/exthdrs.c | |||
@@ -413,6 +413,8 @@ ipv6_invert_rthdr(struct sock *sk, struct ipv6_rt_hdr *hdr) | |||
413 | return opt; | 413 | return opt; |
414 | } | 414 | } |
415 | 415 | ||
416 | EXPORT_SYMBOL_GPL(ipv6_invert_rthdr); | ||
417 | |||
416 | /********************************** | 418 | /********************************** |
417 | Hop-by-hop options. | 419 | Hop-by-hop options. |
418 | **********************************/ | 420 | **********************************/ |
@@ -579,6 +581,8 @@ ipv6_dup_options(struct sock *sk, struct ipv6_txoptions *opt) | |||
579 | return opt2; | 581 | return opt2; |
580 | } | 582 | } |
581 | 583 | ||
584 | EXPORT_SYMBOL_GPL(ipv6_dup_options); | ||
585 | |||
582 | static int ipv6_renew_option(void *ohdr, | 586 | static int ipv6_renew_option(void *ohdr, |
583 | struct ipv6_opt_hdr __user *newopt, int newoptlen, | 587 | struct ipv6_opt_hdr __user *newopt, int newoptlen, |
584 | int inherit, | 588 | int inherit, |
diff --git a/net/ipv6/inet6_connection_sock.c b/net/ipv6/inet6_connection_sock.c new file mode 100644 index 000000000000..792f90f0f9ec --- /dev/null +++ b/net/ipv6/inet6_connection_sock.c | |||
@@ -0,0 +1,199 @@ | |||
1 | /* | ||
2 | * INET An implementation of the TCP/IP protocol suite for the LINUX | ||
3 | * operating system. INET is implemented using the BSD Socket | ||
4 | * interface as the means of communication with the user level. | ||
5 | * | ||
6 | * Support for INET6 connection oriented protocols. | ||
7 | * | ||
8 | * Authors: See the TCPv6 sources | ||
9 | * | ||
10 | * This program is free software; you can redistribute it and/or | ||
11 | * modify it under the terms of the GNU General Public License | ||
12 | * as published by the Free Software Foundation; either version | ||
13 | * 2 of the License, or(at your option) any later version. | ||
14 | */ | ||
15 | |||
16 | #include <linux/config.h> | ||
17 | #include <linux/module.h> | ||
18 | #include <linux/in6.h> | ||
19 | #include <linux/ipv6.h> | ||
20 | #include <linux/jhash.h> | ||
21 | |||
22 | #include <net/addrconf.h> | ||
23 | #include <net/inet_connection_sock.h> | ||
24 | #include <net/inet_ecn.h> | ||
25 | #include <net/inet_hashtables.h> | ||
26 | #include <net/ip6_route.h> | ||
27 | #include <net/sock.h> | ||
28 | |||
29 | int inet6_csk_bind_conflict(const struct sock *sk, | ||
30 | const struct inet_bind_bucket *tb) | ||
31 | { | ||
32 | const struct sock *sk2; | ||
33 | const struct hlist_node *node; | ||
34 | |||
35 | /* We must walk the whole port owner list in this case. -DaveM */ | ||
36 | sk_for_each_bound(sk2, node, &tb->owners) { | ||
37 | if (sk != sk2 && | ||
38 | (!sk->sk_bound_dev_if || | ||
39 | !sk2->sk_bound_dev_if || | ||
40 | sk->sk_bound_dev_if == sk2->sk_bound_dev_if) && | ||
41 | (!sk->sk_reuse || !sk2->sk_reuse || | ||
42 | sk2->sk_state == TCP_LISTEN) && | ||
43 | ipv6_rcv_saddr_equal(sk, sk2)) | ||
44 | break; | ||
45 | } | ||
46 | |||
47 | return node != NULL; | ||
48 | } | ||
49 | |||
50 | EXPORT_SYMBOL_GPL(inet6_csk_bind_conflict); | ||
51 | |||
52 | /* | ||
53 | * request_sock (formerly open request) hash tables. | ||
54 | */ | ||
55 | static u32 inet6_synq_hash(const struct in6_addr *raddr, const u16 rport, | ||
56 | const u32 rnd, const u16 synq_hsize) | ||
57 | { | ||
58 | u32 a = raddr->s6_addr32[0]; | ||
59 | u32 b = raddr->s6_addr32[1]; | ||
60 | u32 c = raddr->s6_addr32[2]; | ||
61 | |||
62 | a += JHASH_GOLDEN_RATIO; | ||
63 | b += JHASH_GOLDEN_RATIO; | ||
64 | c += rnd; | ||
65 | __jhash_mix(a, b, c); | ||
66 | |||
67 | a += raddr->s6_addr32[3]; | ||
68 | b += (u32)rport; | ||
69 | __jhash_mix(a, b, c); | ||
70 | |||
71 | return c & (synq_hsize - 1); | ||
72 | } | ||
73 | |||
74 | struct request_sock *inet6_csk_search_req(const struct sock *sk, | ||
75 | struct request_sock ***prevp, | ||
76 | const __u16 rport, | ||
77 | const struct in6_addr *raddr, | ||
78 | const struct in6_addr *laddr, | ||
79 | const int iif) | ||
80 | { | ||
81 | const struct inet_connection_sock *icsk = inet_csk(sk); | ||
82 | struct listen_sock *lopt = icsk->icsk_accept_queue.listen_opt; | ||
83 | struct request_sock *req, **prev; | ||
84 | |||
85 | for (prev = &lopt->syn_table[inet6_synq_hash(raddr, rport, | ||
86 | lopt->hash_rnd, | ||
87 | lopt->nr_table_entries)]; | ||
88 | (req = *prev) != NULL; | ||
89 | prev = &req->dl_next) { | ||
90 | const struct inet6_request_sock *treq = inet6_rsk(req); | ||
91 | |||
92 | if (inet_rsk(req)->rmt_port == rport && | ||
93 | req->rsk_ops->family == AF_INET6 && | ||
94 | ipv6_addr_equal(&treq->rmt_addr, raddr) && | ||
95 | ipv6_addr_equal(&treq->loc_addr, laddr) && | ||
96 | (!treq->iif || treq->iif == iif)) { | ||
97 | BUG_TRAP(req->sk == NULL); | ||
98 | *prevp = prev; | ||
99 | return req; | ||
100 | } | ||
101 | } | ||
102 | |||
103 | return NULL; | ||
104 | } | ||
105 | |||
106 | EXPORT_SYMBOL_GPL(inet6_csk_search_req); | ||
107 | |||
108 | void inet6_csk_reqsk_queue_hash_add(struct sock *sk, | ||
109 | struct request_sock *req, | ||
110 | const unsigned long timeout) | ||
111 | { | ||
112 | struct inet_connection_sock *icsk = inet_csk(sk); | ||
113 | struct listen_sock *lopt = icsk->icsk_accept_queue.listen_opt; | ||
114 | const u32 h = inet6_synq_hash(&inet6_rsk(req)->rmt_addr, | ||
115 | inet_rsk(req)->rmt_port, | ||
116 | lopt->hash_rnd, lopt->nr_table_entries); | ||
117 | |||
118 | reqsk_queue_hash_req(&icsk->icsk_accept_queue, h, req, timeout); | ||
119 | inet_csk_reqsk_queue_added(sk, timeout); | ||
120 | } | ||
121 | |||
122 | EXPORT_SYMBOL_GPL(inet6_csk_reqsk_queue_hash_add); | ||
123 | |||
124 | void inet6_csk_addr2sockaddr(struct sock *sk, struct sockaddr * uaddr) | ||
125 | { | ||
126 | struct ipv6_pinfo *np = inet6_sk(sk); | ||
127 | struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *) uaddr; | ||
128 | |||
129 | sin6->sin6_family = AF_INET6; | ||
130 | ipv6_addr_copy(&sin6->sin6_addr, &np->daddr); | ||
131 | sin6->sin6_port = inet_sk(sk)->dport; | ||
132 | /* We do not store received flowlabel for TCP */ | ||
133 | sin6->sin6_flowinfo = 0; | ||
134 | sin6->sin6_scope_id = 0; | ||
135 | if (sk->sk_bound_dev_if && | ||
136 | ipv6_addr_type(&sin6->sin6_addr) & IPV6_ADDR_LINKLOCAL) | ||
137 | sin6->sin6_scope_id = sk->sk_bound_dev_if; | ||
138 | } | ||
139 | |||
140 | EXPORT_SYMBOL_GPL(inet6_csk_addr2sockaddr); | ||
141 | |||
142 | int inet6_csk_xmit(struct sk_buff *skb, int ipfragok) | ||
143 | { | ||
144 | struct sock *sk = skb->sk; | ||
145 | struct inet_sock *inet = inet_sk(sk); | ||
146 | struct ipv6_pinfo *np = inet6_sk(sk); | ||
147 | struct flowi fl; | ||
148 | struct dst_entry *dst; | ||
149 | struct in6_addr *final_p = NULL, final; | ||
150 | |||
151 | memset(&fl, 0, sizeof(fl)); | ||
152 | fl.proto = sk->sk_protocol; | ||
153 | ipv6_addr_copy(&fl.fl6_dst, &np->daddr); | ||
154 | ipv6_addr_copy(&fl.fl6_src, &np->saddr); | ||
155 | fl.fl6_flowlabel = np->flow_label; | ||
156 | IP6_ECN_flow_xmit(sk, fl.fl6_flowlabel); | ||
157 | fl.oif = sk->sk_bound_dev_if; | ||
158 | fl.fl_ip_sport = inet->sport; | ||
159 | fl.fl_ip_dport = inet->dport; | ||
160 | |||
161 | if (np->opt && np->opt->srcrt) { | ||
162 | struct rt0_hdr *rt0 = (struct rt0_hdr *)np->opt->srcrt; | ||
163 | ipv6_addr_copy(&final, &fl.fl6_dst); | ||
164 | ipv6_addr_copy(&fl.fl6_dst, rt0->addr); | ||
165 | final_p = &final; | ||
166 | } | ||
167 | |||
168 | dst = __sk_dst_check(sk, np->dst_cookie); | ||
169 | |||
170 | if (dst == NULL) { | ||
171 | int err = ip6_dst_lookup(sk, &dst, &fl); | ||
172 | |||
173 | if (err) { | ||
174 | sk->sk_err_soft = -err; | ||
175 | return err; | ||
176 | } | ||
177 | |||
178 | if (final_p) | ||
179 | ipv6_addr_copy(&fl.fl6_dst, final_p); | ||
180 | |||
181 | if ((err = xfrm_lookup(&dst, &fl, sk, 0)) < 0) { | ||
182 | sk->sk_route_caps = 0; | ||
183 | return err; | ||
184 | } | ||
185 | |||
186 | ip6_dst_store(sk, dst, NULL); | ||
187 | sk->sk_route_caps = dst->dev->features & | ||
188 | ~(NETIF_F_IP_CSUM | NETIF_F_TSO); | ||
189 | } | ||
190 | |||
191 | skb->dst = dst_clone(dst); | ||
192 | |||
193 | /* Restore final destination back after routing done */ | ||
194 | ipv6_addr_copy(&fl.fl6_dst, &np->daddr); | ||
195 | |||
196 | return ip6_xmit(sk, skb, &fl, np->opt, 0); | ||
197 | } | ||
198 | |||
199 | EXPORT_SYMBOL_GPL(inet6_csk_xmit); | ||
diff --git a/net/ipv6/inet6_hashtables.c b/net/ipv6/inet6_hashtables.c index 01d5f46d4e40..4154f3a8b6cf 100644 --- a/net/ipv6/inet6_hashtables.c +++ b/net/ipv6/inet6_hashtables.c | |||
@@ -5,7 +5,8 @@ | |||
5 | * | 5 | * |
6 | * Generic INET6 transport hashtables | 6 | * Generic INET6 transport hashtables |
7 | * | 7 | * |
8 | * Authors: Lotsa people, from code originally in tcp | 8 | * Authors: Lotsa people, from code originally in tcp, generalised here |
9 | * by Arnaldo Carvalho de Melo <acme@mandriva.com> | ||
9 | * | 10 | * |
10 | * This program is free software; you can redistribute it and/or | 11 | * This program is free software; you can redistribute it and/or |
11 | * modify it under the terms of the GNU General Public License | 12 | * modify it under the terms of the GNU General Public License |
@@ -14,12 +15,13 @@ | |||
14 | */ | 15 | */ |
15 | 16 | ||
16 | #include <linux/config.h> | 17 | #include <linux/config.h> |
17 | |||
18 | #include <linux/module.h> | 18 | #include <linux/module.h> |
19 | #include <linux/random.h> | ||
19 | 20 | ||
20 | #include <net/inet_connection_sock.h> | 21 | #include <net/inet_connection_sock.h> |
21 | #include <net/inet_hashtables.h> | 22 | #include <net/inet_hashtables.h> |
22 | #include <net/inet6_hashtables.h> | 23 | #include <net/inet6_hashtables.h> |
24 | #include <net/ip.h> | ||
23 | 25 | ||
24 | struct sock *inet6_lookup_listener(struct inet_hashinfo *hashinfo, | 26 | struct sock *inet6_lookup_listener(struct inet_hashinfo *hashinfo, |
25 | const struct in6_addr *daddr, | 27 | const struct in6_addr *daddr, |
@@ -79,3 +81,180 @@ struct sock *inet6_lookup(struct inet_hashinfo *hashinfo, | |||
79 | } | 81 | } |
80 | 82 | ||
81 | EXPORT_SYMBOL_GPL(inet6_lookup); | 83 | EXPORT_SYMBOL_GPL(inet6_lookup); |
84 | |||
85 | static int __inet6_check_established(struct inet_timewait_death_row *death_row, | ||
86 | struct sock *sk, const __u16 lport, | ||
87 | struct inet_timewait_sock **twp) | ||
88 | { | ||
89 | struct inet_hashinfo *hinfo = death_row->hashinfo; | ||
90 | const struct inet_sock *inet = inet_sk(sk); | ||
91 | const struct ipv6_pinfo *np = inet6_sk(sk); | ||
92 | const struct in6_addr *daddr = &np->rcv_saddr; | ||
93 | const struct in6_addr *saddr = &np->daddr; | ||
94 | const int dif = sk->sk_bound_dev_if; | ||
95 | const u32 ports = INET_COMBINED_PORTS(inet->dport, lport); | ||
96 | const unsigned int hash = inet6_ehashfn(daddr, inet->num, saddr, | ||
97 | inet->dport); | ||
98 | struct inet_ehash_bucket *head = inet_ehash_bucket(hinfo, hash); | ||
99 | struct sock *sk2; | ||
100 | const struct hlist_node *node; | ||
101 | struct inet_timewait_sock *tw; | ||
102 | |||
103 | prefetch(head->chain.first); | ||
104 | write_lock(&head->lock); | ||
105 | |||
106 | /* Check TIME-WAIT sockets first. */ | ||
107 | sk_for_each(sk2, node, &(head + hinfo->ehash_size)->chain) { | ||
108 | const struct inet6_timewait_sock *tw6 = inet6_twsk(sk2); | ||
109 | |||
110 | tw = inet_twsk(sk2); | ||
111 | |||
112 | if(*((__u32 *)&(tw->tw_dport)) == ports && | ||
113 | sk2->sk_family == PF_INET6 && | ||
114 | ipv6_addr_equal(&tw6->tw_v6_daddr, saddr) && | ||
115 | ipv6_addr_equal(&tw6->tw_v6_rcv_saddr, daddr) && | ||
116 | sk2->sk_bound_dev_if == sk->sk_bound_dev_if) { | ||
117 | if (twsk_unique(sk, sk2, twp)) | ||
118 | goto unique; | ||
119 | else | ||
120 | goto not_unique; | ||
121 | } | ||
122 | } | ||
123 | tw = NULL; | ||
124 | |||
125 | /* And established part... */ | ||
126 | sk_for_each(sk2, node, &head->chain) { | ||
127 | if (INET6_MATCH(sk2, hash, saddr, daddr, ports, dif)) | ||
128 | goto not_unique; | ||
129 | } | ||
130 | |||
131 | unique: | ||
132 | BUG_TRAP(sk_unhashed(sk)); | ||
133 | __sk_add_node(sk, &head->chain); | ||
134 | sk->sk_hash = hash; | ||
135 | sock_prot_inc_use(sk->sk_prot); | ||
136 | write_unlock(&head->lock); | ||
137 | |||
138 | if (twp != NULL) { | ||
139 | *twp = tw; | ||
140 | NET_INC_STATS_BH(LINUX_MIB_TIMEWAITRECYCLED); | ||
141 | } else if (tw != NULL) { | ||
142 | /* Silly. Should hash-dance instead... */ | ||
143 | inet_twsk_deschedule(tw, death_row); | ||
144 | NET_INC_STATS_BH(LINUX_MIB_TIMEWAITRECYCLED); | ||
145 | |||
146 | inet_twsk_put(tw); | ||
147 | } | ||
148 | return 0; | ||
149 | |||
150 | not_unique: | ||
151 | write_unlock(&head->lock); | ||
152 | return -EADDRNOTAVAIL; | ||
153 | } | ||
154 | |||
155 | static inline u32 inet6_sk_port_offset(const struct sock *sk) | ||
156 | { | ||
157 | const struct inet_sock *inet = inet_sk(sk); | ||
158 | const struct ipv6_pinfo *np = inet6_sk(sk); | ||
159 | return secure_ipv6_port_ephemeral(np->rcv_saddr.s6_addr32, | ||
160 | np->daddr.s6_addr32, | ||
161 | inet->dport); | ||
162 | } | ||
163 | |||
164 | int inet6_hash_connect(struct inet_timewait_death_row *death_row, | ||
165 | struct sock *sk) | ||
166 | { | ||
167 | struct inet_hashinfo *hinfo = death_row->hashinfo; | ||
168 | const unsigned short snum = inet_sk(sk)->num; | ||
169 | struct inet_bind_hashbucket *head; | ||
170 | struct inet_bind_bucket *tb; | ||
171 | int ret; | ||
172 | |||
173 | if (snum == 0) { | ||
174 | const int low = sysctl_local_port_range[0]; | ||
175 | const int high = sysctl_local_port_range[1]; | ||
176 | const int range = high - low; | ||
177 | int i, port; | ||
178 | static u32 hint; | ||
179 | const u32 offset = hint + inet6_sk_port_offset(sk); | ||
180 | struct hlist_node *node; | ||
181 | struct inet_timewait_sock *tw = NULL; | ||
182 | |||
183 | local_bh_disable(); | ||
184 | for (i = 1; i <= range; i++) { | ||
185 | port = low + (i + offset) % range; | ||
186 | head = &hinfo->bhash[inet_bhashfn(port, hinfo->bhash_size)]; | ||
187 | spin_lock(&head->lock); | ||
188 | |||
189 | /* Does not bother with rcv_saddr checks, | ||
190 | * because the established check is already | ||
191 | * unique enough. | ||
192 | */ | ||
193 | inet_bind_bucket_for_each(tb, node, &head->chain) { | ||
194 | if (tb->port == port) { | ||
195 | BUG_TRAP(!hlist_empty(&tb->owners)); | ||
196 | if (tb->fastreuse >= 0) | ||
197 | goto next_port; | ||
198 | if (!__inet6_check_established(death_row, | ||
199 | sk, port, | ||
200 | &tw)) | ||
201 | goto ok; | ||
202 | goto next_port; | ||
203 | } | ||
204 | } | ||
205 | |||
206 | tb = inet_bind_bucket_create(hinfo->bind_bucket_cachep, | ||
207 | head, port); | ||
208 | if (!tb) { | ||
209 | spin_unlock(&head->lock); | ||
210 | break; | ||
211 | } | ||
212 | tb->fastreuse = -1; | ||
213 | goto ok; | ||
214 | |||
215 | next_port: | ||
216 | spin_unlock(&head->lock); | ||
217 | } | ||
218 | local_bh_enable(); | ||
219 | |||
220 | return -EADDRNOTAVAIL; | ||
221 | |||
222 | ok: | ||
223 | hint += i; | ||
224 | |||
225 | /* Head lock still held and bh's disabled */ | ||
226 | inet_bind_hash(sk, tb, port); | ||
227 | if (sk_unhashed(sk)) { | ||
228 | inet_sk(sk)->sport = htons(port); | ||
229 | __inet6_hash(hinfo, sk); | ||
230 | } | ||
231 | spin_unlock(&head->lock); | ||
232 | |||
233 | if (tw) { | ||
234 | inet_twsk_deschedule(tw, death_row); | ||
235 | inet_twsk_put(tw); | ||
236 | } | ||
237 | |||
238 | ret = 0; | ||
239 | goto out; | ||
240 | } | ||
241 | |||
242 | head = &hinfo->bhash[inet_bhashfn(snum, hinfo->bhash_size)]; | ||
243 | tb = inet_csk(sk)->icsk_bind_hash; | ||
244 | spin_lock_bh(&head->lock); | ||
245 | |||
246 | if (sk_head(&tb->owners) == sk && sk->sk_bind_node.next == NULL) { | ||
247 | __inet6_hash(hinfo, sk); | ||
248 | spin_unlock_bh(&head->lock); | ||
249 | return 0; | ||
250 | } else { | ||
251 | spin_unlock(&head->lock); | ||
252 | /* No definite answer... Walk to established hash table */ | ||
253 | ret = __inet6_check_established(death_row, sk, snum, NULL); | ||
254 | out: | ||
255 | local_bh_enable(); | ||
256 | return ret; | ||
257 | } | ||
258 | } | ||
259 | |||
260 | EXPORT_SYMBOL_GPL(inet6_hash_connect); | ||
diff --git a/net/ipv6/ip6_flowlabel.c b/net/ipv6/ip6_flowlabel.c index 1cf02765fb5c..89d12b4817a9 100644 --- a/net/ipv6/ip6_flowlabel.c +++ b/net/ipv6/ip6_flowlabel.c | |||
@@ -200,6 +200,8 @@ struct ip6_flowlabel * fl6_sock_lookup(struct sock *sk, u32 label) | |||
200 | return NULL; | 200 | return NULL; |
201 | } | 201 | } |
202 | 202 | ||
203 | EXPORT_SYMBOL_GPL(fl6_sock_lookup); | ||
204 | |||
203 | void fl6_free_socklist(struct sock *sk) | 205 | void fl6_free_socklist(struct sock *sk) |
204 | { | 206 | { |
205 | struct ipv6_pinfo *np = inet6_sk(sk); | 207 | struct ipv6_pinfo *np = inet6_sk(sk); |
diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index 8523c76ebf76..b4c4beba0ede 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c | |||
@@ -775,6 +775,8 @@ out_err_release: | |||
775 | return err; | 775 | return err; |
776 | } | 776 | } |
777 | 777 | ||
778 | EXPORT_SYMBOL_GPL(ip6_dst_lookup); | ||
779 | |||
778 | static inline int ip6_ufo_append_data(struct sock *sk, | 780 | static inline int ip6_ufo_append_data(struct sock *sk, |
779 | int getfrag(void *from, char *to, int offset, int len, | 781 | int getfrag(void *from, char *to, int offset, int len, |
780 | int odd, struct sk_buff *skb), | 782 | int odd, struct sk_buff *skb), |
diff --git a/net/ipv6/ipcomp6.c b/net/ipv6/ipcomp6.c index 55917fb17094..626dd39685f2 100644 --- a/net/ipv6/ipcomp6.c +++ b/net/ipv6/ipcomp6.c | |||
@@ -47,6 +47,7 @@ | |||
47 | #include <linux/rtnetlink.h> | 47 | #include <linux/rtnetlink.h> |
48 | #include <net/icmp.h> | 48 | #include <net/icmp.h> |
49 | #include <net/ipv6.h> | 49 | #include <net/ipv6.h> |
50 | #include <net/protocol.h> | ||
50 | #include <linux/ipv6.h> | 51 | #include <linux/ipv6.h> |
51 | #include <linux/icmpv6.h> | 52 | #include <linux/icmpv6.h> |
52 | 53 | ||
diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c index 3620718defe6..c63868dd2ca2 100644 --- a/net/ipv6/ipv6_sockglue.c +++ b/net/ipv6/ipv6_sockglue.c | |||
@@ -163,17 +163,17 @@ int ipv6_setsockopt(struct sock *sk, int level, int optname, | |||
163 | sk_refcnt_debug_dec(sk); | 163 | sk_refcnt_debug_dec(sk); |
164 | 164 | ||
165 | if (sk->sk_protocol == IPPROTO_TCP) { | 165 | if (sk->sk_protocol == IPPROTO_TCP) { |
166 | struct tcp_sock *tp = tcp_sk(sk); | 166 | struct inet_connection_sock *icsk = inet_csk(sk); |
167 | 167 | ||
168 | local_bh_disable(); | 168 | local_bh_disable(); |
169 | sock_prot_dec_use(sk->sk_prot); | 169 | sock_prot_dec_use(sk->sk_prot); |
170 | sock_prot_inc_use(&tcp_prot); | 170 | sock_prot_inc_use(&tcp_prot); |
171 | local_bh_enable(); | 171 | local_bh_enable(); |
172 | sk->sk_prot = &tcp_prot; | 172 | sk->sk_prot = &tcp_prot; |
173 | tp->af_specific = &ipv4_specific; | 173 | icsk->icsk_af_ops = &ipv4_specific; |
174 | sk->sk_socket->ops = &inet_stream_ops; | 174 | sk->sk_socket->ops = &inet_stream_ops; |
175 | sk->sk_family = PF_INET; | 175 | sk->sk_family = PF_INET; |
176 | tcp_sync_mss(sk, tp->pmtu_cookie); | 176 | tcp_sync_mss(sk, icsk->icsk_pmtu_cookie); |
177 | } else { | 177 | } else { |
178 | local_bh_disable(); | 178 | local_bh_disable(); |
179 | sock_prot_dec_use(sk->sk_prot); | 179 | sock_prot_dec_use(sk->sk_prot); |
@@ -317,14 +317,15 @@ int ipv6_setsockopt(struct sock *sk, int level, int optname, | |||
317 | } | 317 | } |
318 | 318 | ||
319 | retv = 0; | 319 | retv = 0; |
320 | if (sk->sk_type == SOCK_STREAM) { | 320 | if (inet_sk(sk)->is_icsk) { |
321 | if (opt) { | 321 | if (opt) { |
322 | struct tcp_sock *tp = tcp_sk(sk); | 322 | struct inet_connection_sock *icsk = inet_csk(sk); |
323 | if (!((1 << sk->sk_state) & | 323 | if (!((1 << sk->sk_state) & |
324 | (TCPF_LISTEN | TCPF_CLOSE)) | 324 | (TCPF_LISTEN | TCPF_CLOSE)) |
325 | && inet_sk(sk)->daddr != LOOPBACK4_IPV6) { | 325 | && inet_sk(sk)->daddr != LOOPBACK4_IPV6) { |
326 | tp->ext_header_len = opt->opt_flen + opt->opt_nflen; | 326 | icsk->icsk_ext_hdr_len = |
327 | tcp_sync_mss(sk, tp->pmtu_cookie); | 327 | opt->opt_flen + opt->opt_nflen; |
328 | icsk->icsk_sync_mss(sk, icsk->icsk_pmtu_cookie); | ||
328 | } | 329 | } |
329 | } | 330 | } |
330 | opt = xchg(&np->opt, opt); | 331 | opt = xchg(&np->opt, opt); |
@@ -380,14 +381,15 @@ sticky_done: | |||
380 | goto done; | 381 | goto done; |
381 | update: | 382 | update: |
382 | retv = 0; | 383 | retv = 0; |
383 | if (sk->sk_type == SOCK_STREAM) { | 384 | if (inet_sk(sk)->is_icsk) { |
384 | if (opt) { | 385 | if (opt) { |
385 | struct tcp_sock *tp = tcp_sk(sk); | 386 | struct inet_connection_sock *icsk = inet_csk(sk); |
386 | if (!((1 << sk->sk_state) & | 387 | if (!((1 << sk->sk_state) & |
387 | (TCPF_LISTEN | TCPF_CLOSE)) | 388 | (TCPF_LISTEN | TCPF_CLOSE)) |
388 | && inet_sk(sk)->daddr != LOOPBACK4_IPV6) { | 389 | && inet_sk(sk)->daddr != LOOPBACK4_IPV6) { |
389 | tp->ext_header_len = opt->opt_flen + opt->opt_nflen; | 390 | icsk->icsk_ext_hdr_len = |
390 | tcp_sync_mss(sk, tp->pmtu_cookie); | 391 | opt->opt_flen + opt->opt_nflen; |
392 | icsk->icsk_sync_mss(sk, icsk->icsk_pmtu_cookie); | ||
391 | } | 393 | } |
392 | } | 394 | } |
393 | opt = xchg(&np->opt, opt); | 395 | opt = xchg(&np->opt, opt); |
diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c index f829a4ad3ccc..1cf305a9f8dd 100644 --- a/net/ipv6/mcast.c +++ b/net/ipv6/mcast.c | |||
@@ -224,7 +224,7 @@ int ipv6_sock_mc_join(struct sock *sk, int ifindex, struct in6_addr *addr) | |||
224 | 224 | ||
225 | mc_lst->ifindex = dev->ifindex; | 225 | mc_lst->ifindex = dev->ifindex; |
226 | mc_lst->sfmode = MCAST_EXCLUDE; | 226 | mc_lst->sfmode = MCAST_EXCLUDE; |
227 | mc_lst->sflock = RW_LOCK_UNLOCKED; | 227 | rwlock_init(&mc_lst->sflock); |
228 | mc_lst->sflist = NULL; | 228 | mc_lst->sflist = NULL; |
229 | 229 | ||
230 | /* | 230 | /* |
diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c index 95d469271c4d..ea43ef1d94a7 100644 --- a/net/ipv6/netfilter/ip6_tables.c +++ b/net/ipv6/netfilter/ip6_tables.c | |||
@@ -15,6 +15,7 @@ | |||
15 | * - new extension header parser code | 15 | * - new extension header parser code |
16 | */ | 16 | */ |
17 | #include <linux/config.h> | 17 | #include <linux/config.h> |
18 | #include <linux/in.h> | ||
18 | #include <linux/skbuff.h> | 19 | #include <linux/skbuff.h> |
19 | #include <linux/kmod.h> | 20 | #include <linux/kmod.h> |
20 | #include <linux/vmalloc.h> | 21 | #include <linux/vmalloc.h> |
@@ -86,11 +87,6 @@ static DECLARE_MUTEX(ip6t_mutex); | |||
86 | context stops packets coming through and allows user context to read | 87 | context stops packets coming through and allows user context to read |
87 | the counters or update the rules. | 88 | the counters or update the rules. |
88 | 89 | ||
89 | To be cache friendly on SMP, we arrange them like so: | ||
90 | [ n-entries ] | ||
91 | ... cache-align padding ... | ||
92 | [ n-entries ] | ||
93 | |||
94 | Hence the start of any table is given by get_table() below. */ | 90 | Hence the start of any table is given by get_table() below. */ |
95 | 91 | ||
96 | /* The table itself */ | 92 | /* The table itself */ |
@@ -108,20 +104,15 @@ struct ip6t_table_info | |||
108 | unsigned int underflow[NF_IP6_NUMHOOKS]; | 104 | unsigned int underflow[NF_IP6_NUMHOOKS]; |
109 | 105 | ||
110 | /* ip6t_entry tables: one per CPU */ | 106 | /* ip6t_entry tables: one per CPU */ |
111 | char entries[0] ____cacheline_aligned; | 107 | void *entries[NR_CPUS]; |
112 | }; | 108 | }; |
113 | 109 | ||
114 | static LIST_HEAD(ip6t_target); | 110 | static LIST_HEAD(ip6t_target); |
115 | static LIST_HEAD(ip6t_match); | 111 | static LIST_HEAD(ip6t_match); |
116 | static LIST_HEAD(ip6t_tables); | 112 | static LIST_HEAD(ip6t_tables); |
113 | #define SET_COUNTER(c,b,p) do { (c).bcnt = (b); (c).pcnt = (p); } while(0) | ||
117 | #define ADD_COUNTER(c,b,p) do { (c).bcnt += (b); (c).pcnt += (p); } while(0) | 114 | #define ADD_COUNTER(c,b,p) do { (c).bcnt += (b); (c).pcnt += (p); } while(0) |
118 | 115 | ||
119 | #ifdef CONFIG_SMP | ||
120 | #define TABLE_OFFSET(t,p) (SMP_ALIGN((t)->size)*(p)) | ||
121 | #else | ||
122 | #define TABLE_OFFSET(t,p) 0 | ||
123 | #endif | ||
124 | |||
125 | #if 0 | 116 | #if 0 |
126 | #define down(x) do { printk("DOWN:%u:" #x "\n", __LINE__); down(x); } while(0) | 117 | #define down(x) do { printk("DOWN:%u:" #x "\n", __LINE__); down(x); } while(0) |
127 | #define down_interruptible(x) ({ int __r; printk("DOWNi:%u:" #x "\n", __LINE__); __r = down_interruptible(x); if (__r != 0) printk("ABORT-DOWNi:%u\n", __LINE__); __r; }) | 118 | #define down_interruptible(x) ({ int __r; printk("DOWNi:%u:" #x "\n", __LINE__); __r = down_interruptible(x); if (__r != 0) printk("ABORT-DOWNi:%u\n", __LINE__); __r; }) |
@@ -376,8 +367,7 @@ ip6t_do_table(struct sk_buff **pskb, | |||
376 | 367 | ||
377 | read_lock_bh(&table->lock); | 368 | read_lock_bh(&table->lock); |
378 | IP_NF_ASSERT(table->valid_hooks & (1 << hook)); | 369 | IP_NF_ASSERT(table->valid_hooks & (1 << hook)); |
379 | table_base = (void *)table->private->entries | 370 | table_base = (void *)table->private->entries[smp_processor_id()]; |
380 | + TABLE_OFFSET(table->private, smp_processor_id()); | ||
381 | e = get_entry(table_base, table->private->hook_entry[hook]); | 371 | e = get_entry(table_base, table->private->hook_entry[hook]); |
382 | 372 | ||
383 | #ifdef CONFIG_NETFILTER_DEBUG | 373 | #ifdef CONFIG_NETFILTER_DEBUG |
@@ -649,7 +639,8 @@ unconditional(const struct ip6t_ip6 *ipv6) | |||
649 | /* Figures out from what hook each rule can be called: returns 0 if | 639 | /* Figures out from what hook each rule can be called: returns 0 if |
650 | there are loops. Puts hook bitmask in comefrom. */ | 640 | there are loops. Puts hook bitmask in comefrom. */ |
651 | static int | 641 | static int |
652 | mark_source_chains(struct ip6t_table_info *newinfo, unsigned int valid_hooks) | 642 | mark_source_chains(struct ip6t_table_info *newinfo, |
643 | unsigned int valid_hooks, void *entry0) | ||
653 | { | 644 | { |
654 | unsigned int hook; | 645 | unsigned int hook; |
655 | 646 | ||
@@ -658,7 +649,7 @@ mark_source_chains(struct ip6t_table_info *newinfo, unsigned int valid_hooks) | |||
658 | for (hook = 0; hook < NF_IP6_NUMHOOKS; hook++) { | 649 | for (hook = 0; hook < NF_IP6_NUMHOOKS; hook++) { |
659 | unsigned int pos = newinfo->hook_entry[hook]; | 650 | unsigned int pos = newinfo->hook_entry[hook]; |
660 | struct ip6t_entry *e | 651 | struct ip6t_entry *e |
661 | = (struct ip6t_entry *)(newinfo->entries + pos); | 652 | = (struct ip6t_entry *)(entry0 + pos); |
662 | 653 | ||
663 | if (!(valid_hooks & (1 << hook))) | 654 | if (!(valid_hooks & (1 << hook))) |
664 | continue; | 655 | continue; |
@@ -708,13 +699,13 @@ mark_source_chains(struct ip6t_table_info *newinfo, unsigned int valid_hooks) | |||
708 | goto next; | 699 | goto next; |
709 | 700 | ||
710 | e = (struct ip6t_entry *) | 701 | e = (struct ip6t_entry *) |
711 | (newinfo->entries + pos); | 702 | (entry0 + pos); |
712 | } while (oldpos == pos + e->next_offset); | 703 | } while (oldpos == pos + e->next_offset); |
713 | 704 | ||
714 | /* Move along one */ | 705 | /* Move along one */ |
715 | size = e->next_offset; | 706 | size = e->next_offset; |
716 | e = (struct ip6t_entry *) | 707 | e = (struct ip6t_entry *) |
717 | (newinfo->entries + pos + size); | 708 | (entry0 + pos + size); |
718 | e->counters.pcnt = pos; | 709 | e->counters.pcnt = pos; |
719 | pos += size; | 710 | pos += size; |
720 | } else { | 711 | } else { |
@@ -731,7 +722,7 @@ mark_source_chains(struct ip6t_table_info *newinfo, unsigned int valid_hooks) | |||
731 | newpos = pos + e->next_offset; | 722 | newpos = pos + e->next_offset; |
732 | } | 723 | } |
733 | e = (struct ip6t_entry *) | 724 | e = (struct ip6t_entry *) |
734 | (newinfo->entries + newpos); | 725 | (entry0 + newpos); |
735 | e->counters.pcnt = pos; | 726 | e->counters.pcnt = pos; |
736 | pos = newpos; | 727 | pos = newpos; |
737 | } | 728 | } |
@@ -941,6 +932,7 @@ static int | |||
941 | translate_table(const char *name, | 932 | translate_table(const char *name, |
942 | unsigned int valid_hooks, | 933 | unsigned int valid_hooks, |
943 | struct ip6t_table_info *newinfo, | 934 | struct ip6t_table_info *newinfo, |
935 | void *entry0, | ||
944 | unsigned int size, | 936 | unsigned int size, |
945 | unsigned int number, | 937 | unsigned int number, |
946 | const unsigned int *hook_entries, | 938 | const unsigned int *hook_entries, |
@@ -961,11 +953,11 @@ translate_table(const char *name, | |||
961 | duprintf("translate_table: size %u\n", newinfo->size); | 953 | duprintf("translate_table: size %u\n", newinfo->size); |
962 | i = 0; | 954 | i = 0; |
963 | /* Walk through entries, checking offsets. */ | 955 | /* Walk through entries, checking offsets. */ |
964 | ret = IP6T_ENTRY_ITERATE(newinfo->entries, newinfo->size, | 956 | ret = IP6T_ENTRY_ITERATE(entry0, newinfo->size, |
965 | check_entry_size_and_hooks, | 957 | check_entry_size_and_hooks, |
966 | newinfo, | 958 | newinfo, |
967 | newinfo->entries, | 959 | entry0, |
968 | newinfo->entries + size, | 960 | entry0 + size, |
969 | hook_entries, underflows, &i); | 961 | hook_entries, underflows, &i); |
970 | if (ret != 0) | 962 | if (ret != 0) |
971 | return ret; | 963 | return ret; |
@@ -993,27 +985,24 @@ translate_table(const char *name, | |||
993 | } | 985 | } |
994 | } | 986 | } |
995 | 987 | ||
996 | if (!mark_source_chains(newinfo, valid_hooks)) | 988 | if (!mark_source_chains(newinfo, valid_hooks, entry0)) |
997 | return -ELOOP; | 989 | return -ELOOP; |
998 | 990 | ||
999 | /* Finally, each sanity check must pass */ | 991 | /* Finally, each sanity check must pass */ |
1000 | i = 0; | 992 | i = 0; |
1001 | ret = IP6T_ENTRY_ITERATE(newinfo->entries, newinfo->size, | 993 | ret = IP6T_ENTRY_ITERATE(entry0, newinfo->size, |
1002 | check_entry, name, size, &i); | 994 | check_entry, name, size, &i); |
1003 | 995 | ||
1004 | if (ret != 0) { | 996 | if (ret != 0) { |
1005 | IP6T_ENTRY_ITERATE(newinfo->entries, newinfo->size, | 997 | IP6T_ENTRY_ITERATE(entry0, newinfo->size, |
1006 | cleanup_entry, &i); | 998 | cleanup_entry, &i); |
1007 | return ret; | 999 | return ret; |
1008 | } | 1000 | } |
1009 | 1001 | ||
1010 | /* And one copy for every other CPU */ | 1002 | /* And one copy for every other CPU */ |
1011 | for_each_cpu(i) { | 1003 | for_each_cpu(i) { |
1012 | if (i == 0) | 1004 | if (newinfo->entries[i] && newinfo->entries[i] != entry0) |
1013 | continue; | 1005 | memcpy(newinfo->entries[i], entry0, newinfo->size); |
1014 | memcpy(newinfo->entries + SMP_ALIGN(newinfo->size) * i, | ||
1015 | newinfo->entries, | ||
1016 | SMP_ALIGN(newinfo->size)); | ||
1017 | } | 1006 | } |
1018 | 1007 | ||
1019 | return ret; | 1008 | return ret; |
@@ -1029,15 +1018,12 @@ replace_table(struct ip6t_table *table, | |||
1029 | 1018 | ||
1030 | #ifdef CONFIG_NETFILTER_DEBUG | 1019 | #ifdef CONFIG_NETFILTER_DEBUG |
1031 | { | 1020 | { |
1032 | struct ip6t_entry *table_base; | 1021 | int cpu; |
1033 | unsigned int i; | ||
1034 | 1022 | ||
1035 | for_each_cpu(i) { | 1023 | for_each_cpu(cpu) { |
1036 | table_base = | 1024 | struct ip6t_entry *table_base = newinfo->entries[cpu]; |
1037 | (void *)newinfo->entries | 1025 | if (table_base) |
1038 | + TABLE_OFFSET(newinfo, i); | 1026 | table_base->comefrom = 0xdead57ac; |
1039 | |||
1040 | table_base->comefrom = 0xdead57ac; | ||
1041 | } | 1027 | } |
1042 | } | 1028 | } |
1043 | #endif | 1029 | #endif |
@@ -1072,16 +1058,44 @@ add_entry_to_counter(const struct ip6t_entry *e, | |||
1072 | return 0; | 1058 | return 0; |
1073 | } | 1059 | } |
1074 | 1060 | ||
1061 | static inline int | ||
1062 | set_entry_to_counter(const struct ip6t_entry *e, | ||
1063 | struct ip6t_counters total[], | ||
1064 | unsigned int *i) | ||
1065 | { | ||
1066 | SET_COUNTER(total[*i], e->counters.bcnt, e->counters.pcnt); | ||
1067 | |||
1068 | (*i)++; | ||
1069 | return 0; | ||
1070 | } | ||
1071 | |||
1075 | static void | 1072 | static void |
1076 | get_counters(const struct ip6t_table_info *t, | 1073 | get_counters(const struct ip6t_table_info *t, |
1077 | struct ip6t_counters counters[]) | 1074 | struct ip6t_counters counters[]) |
1078 | { | 1075 | { |
1079 | unsigned int cpu; | 1076 | unsigned int cpu; |
1080 | unsigned int i; | 1077 | unsigned int i; |
1078 | unsigned int curcpu; | ||
1079 | |||
1080 | /* Instead of clearing (by a previous call to memset()) | ||
1081 | * the counters and using adds, we set the counters | ||
1082 | * with data used by 'current' CPU | ||
1083 | * We dont care about preemption here. | ||
1084 | */ | ||
1085 | curcpu = raw_smp_processor_id(); | ||
1086 | |||
1087 | i = 0; | ||
1088 | IP6T_ENTRY_ITERATE(t->entries[curcpu], | ||
1089 | t->size, | ||
1090 | set_entry_to_counter, | ||
1091 | counters, | ||
1092 | &i); | ||
1081 | 1093 | ||
1082 | for_each_cpu(cpu) { | 1094 | for_each_cpu(cpu) { |
1095 | if (cpu == curcpu) | ||
1096 | continue; | ||
1083 | i = 0; | 1097 | i = 0; |
1084 | IP6T_ENTRY_ITERATE(t->entries + TABLE_OFFSET(t, cpu), | 1098 | IP6T_ENTRY_ITERATE(t->entries[cpu], |
1085 | t->size, | 1099 | t->size, |
1086 | add_entry_to_counter, | 1100 | add_entry_to_counter, |
1087 | counters, | 1101 | counters, |
@@ -1098,6 +1112,7 @@ copy_entries_to_user(unsigned int total_size, | |||
1098 | struct ip6t_entry *e; | 1112 | struct ip6t_entry *e; |
1099 | struct ip6t_counters *counters; | 1113 | struct ip6t_counters *counters; |
1100 | int ret = 0; | 1114 | int ret = 0; |
1115 | void *loc_cpu_entry; | ||
1101 | 1116 | ||
1102 | /* We need atomic snapshot of counters: rest doesn't change | 1117 | /* We need atomic snapshot of counters: rest doesn't change |
1103 | (other than comefrom, which userspace doesn't care | 1118 | (other than comefrom, which userspace doesn't care |
@@ -1109,13 +1124,13 @@ copy_entries_to_user(unsigned int total_size, | |||
1109 | return -ENOMEM; | 1124 | return -ENOMEM; |
1110 | 1125 | ||
1111 | /* First, sum counters... */ | 1126 | /* First, sum counters... */ |
1112 | memset(counters, 0, countersize); | ||
1113 | write_lock_bh(&table->lock); | 1127 | write_lock_bh(&table->lock); |
1114 | get_counters(table->private, counters); | 1128 | get_counters(table->private, counters); |
1115 | write_unlock_bh(&table->lock); | 1129 | write_unlock_bh(&table->lock); |
1116 | 1130 | ||
1117 | /* ... then copy entire thing from CPU 0... */ | 1131 | /* choose the copy that is on ourc node/cpu */ |
1118 | if (copy_to_user(userptr, table->private->entries, total_size) != 0) { | 1132 | loc_cpu_entry = table->private->entries[raw_smp_processor_id()]; |
1133 | if (copy_to_user(userptr, loc_cpu_entry, total_size) != 0) { | ||
1119 | ret = -EFAULT; | 1134 | ret = -EFAULT; |
1120 | goto free_counters; | 1135 | goto free_counters; |
1121 | } | 1136 | } |
@@ -1127,7 +1142,7 @@ copy_entries_to_user(unsigned int total_size, | |||
1127 | struct ip6t_entry_match *m; | 1142 | struct ip6t_entry_match *m; |
1128 | struct ip6t_entry_target *t; | 1143 | struct ip6t_entry_target *t; |
1129 | 1144 | ||
1130 | e = (struct ip6t_entry *)(table->private->entries + off); | 1145 | e = (struct ip6t_entry *)(loc_cpu_entry + off); |
1131 | if (copy_to_user(userptr + off | 1146 | if (copy_to_user(userptr + off |
1132 | + offsetof(struct ip6t_entry, counters), | 1147 | + offsetof(struct ip6t_entry, counters), |
1133 | &counters[num], | 1148 | &counters[num], |
@@ -1196,6 +1211,46 @@ get_entries(const struct ip6t_get_entries *entries, | |||
1196 | return ret; | 1211 | return ret; |
1197 | } | 1212 | } |
1198 | 1213 | ||
1214 | static void free_table_info(struct ip6t_table_info *info) | ||
1215 | { | ||
1216 | int cpu; | ||
1217 | for_each_cpu(cpu) { | ||
1218 | if (info->size <= PAGE_SIZE) | ||
1219 | kfree(info->entries[cpu]); | ||
1220 | else | ||
1221 | vfree(info->entries[cpu]); | ||
1222 | } | ||
1223 | kfree(info); | ||
1224 | } | ||
1225 | |||
1226 | static struct ip6t_table_info *alloc_table_info(unsigned int size) | ||
1227 | { | ||
1228 | struct ip6t_table_info *newinfo; | ||
1229 | int cpu; | ||
1230 | |||
1231 | newinfo = kzalloc(sizeof(struct ip6t_table_info), GFP_KERNEL); | ||
1232 | if (!newinfo) | ||
1233 | return NULL; | ||
1234 | |||
1235 | newinfo->size = size; | ||
1236 | |||
1237 | for_each_cpu(cpu) { | ||
1238 | if (size <= PAGE_SIZE) | ||
1239 | newinfo->entries[cpu] = kmalloc_node(size, | ||
1240 | GFP_KERNEL, | ||
1241 | cpu_to_node(cpu)); | ||
1242 | else | ||
1243 | newinfo->entries[cpu] = vmalloc_node(size, | ||
1244 | cpu_to_node(cpu)); | ||
1245 | if (newinfo->entries[cpu] == NULL) { | ||
1246 | free_table_info(newinfo); | ||
1247 | return NULL; | ||
1248 | } | ||
1249 | } | ||
1250 | |||
1251 | return newinfo; | ||
1252 | } | ||
1253 | |||
1199 | static int | 1254 | static int |
1200 | do_replace(void __user *user, unsigned int len) | 1255 | do_replace(void __user *user, unsigned int len) |
1201 | { | 1256 | { |
@@ -1204,6 +1259,7 @@ do_replace(void __user *user, unsigned int len) | |||
1204 | struct ip6t_table *t; | 1259 | struct ip6t_table *t; |
1205 | struct ip6t_table_info *newinfo, *oldinfo; | 1260 | struct ip6t_table_info *newinfo, *oldinfo; |
1206 | struct ip6t_counters *counters; | 1261 | struct ip6t_counters *counters; |
1262 | void *loc_cpu_entry, *loc_cpu_old_entry; | ||
1207 | 1263 | ||
1208 | if (copy_from_user(&tmp, user, sizeof(tmp)) != 0) | 1264 | if (copy_from_user(&tmp, user, sizeof(tmp)) != 0) |
1209 | return -EFAULT; | 1265 | return -EFAULT; |
@@ -1212,13 +1268,13 @@ do_replace(void __user *user, unsigned int len) | |||
1212 | if ((SMP_ALIGN(tmp.size) >> PAGE_SHIFT) + 2 > num_physpages) | 1268 | if ((SMP_ALIGN(tmp.size) >> PAGE_SHIFT) + 2 > num_physpages) |
1213 | return -ENOMEM; | 1269 | return -ENOMEM; |
1214 | 1270 | ||
1215 | newinfo = vmalloc(sizeof(struct ip6t_table_info) | 1271 | newinfo = alloc_table_info(tmp.size); |
1216 | + SMP_ALIGN(tmp.size) * | ||
1217 | (highest_possible_processor_id()+1)); | ||
1218 | if (!newinfo) | 1272 | if (!newinfo) |
1219 | return -ENOMEM; | 1273 | return -ENOMEM; |
1220 | 1274 | ||
1221 | if (copy_from_user(newinfo->entries, user + sizeof(tmp), | 1275 | /* choose the copy that is on our node/cpu */ |
1276 | loc_cpu_entry = newinfo->entries[raw_smp_processor_id()]; | ||
1277 | if (copy_from_user(loc_cpu_entry, user + sizeof(tmp), | ||
1222 | tmp.size) != 0) { | 1278 | tmp.size) != 0) { |
1223 | ret = -EFAULT; | 1279 | ret = -EFAULT; |
1224 | goto free_newinfo; | 1280 | goto free_newinfo; |
@@ -1229,10 +1285,9 @@ do_replace(void __user *user, unsigned int len) | |||
1229 | ret = -ENOMEM; | 1285 | ret = -ENOMEM; |
1230 | goto free_newinfo; | 1286 | goto free_newinfo; |
1231 | } | 1287 | } |
1232 | memset(counters, 0, tmp.num_counters * sizeof(struct ip6t_counters)); | ||
1233 | 1288 | ||
1234 | ret = translate_table(tmp.name, tmp.valid_hooks, | 1289 | ret = translate_table(tmp.name, tmp.valid_hooks, |
1235 | newinfo, tmp.size, tmp.num_entries, | 1290 | newinfo, loc_cpu_entry, tmp.size, tmp.num_entries, |
1236 | tmp.hook_entry, tmp.underflow); | 1291 | tmp.hook_entry, tmp.underflow); |
1237 | if (ret != 0) | 1292 | if (ret != 0) |
1238 | goto free_newinfo_counters; | 1293 | goto free_newinfo_counters; |
@@ -1271,8 +1326,9 @@ do_replace(void __user *user, unsigned int len) | |||
1271 | /* Get the old counters. */ | 1326 | /* Get the old counters. */ |
1272 | get_counters(oldinfo, counters); | 1327 | get_counters(oldinfo, counters); |
1273 | /* Decrease module usage counts and free resource */ | 1328 | /* Decrease module usage counts and free resource */ |
1274 | IP6T_ENTRY_ITERATE(oldinfo->entries, oldinfo->size, cleanup_entry,NULL); | 1329 | loc_cpu_old_entry = oldinfo->entries[raw_smp_processor_id()]; |
1275 | vfree(oldinfo); | 1330 | IP6T_ENTRY_ITERATE(loc_cpu_old_entry, oldinfo->size, cleanup_entry,NULL); |
1331 | free_table_info(oldinfo); | ||
1276 | if (copy_to_user(tmp.counters, counters, | 1332 | if (copy_to_user(tmp.counters, counters, |
1277 | sizeof(struct ip6t_counters) * tmp.num_counters) != 0) | 1333 | sizeof(struct ip6t_counters) * tmp.num_counters) != 0) |
1278 | ret = -EFAULT; | 1334 | ret = -EFAULT; |
@@ -1284,11 +1340,11 @@ do_replace(void __user *user, unsigned int len) | |||
1284 | module_put(t->me); | 1340 | module_put(t->me); |
1285 | up(&ip6t_mutex); | 1341 | up(&ip6t_mutex); |
1286 | free_newinfo_counters_untrans: | 1342 | free_newinfo_counters_untrans: |
1287 | IP6T_ENTRY_ITERATE(newinfo->entries, newinfo->size, cleanup_entry,NULL); | 1343 | IP6T_ENTRY_ITERATE(loc_cpu_entry, newinfo->size, cleanup_entry,NULL); |
1288 | free_newinfo_counters: | 1344 | free_newinfo_counters: |
1289 | vfree(counters); | 1345 | vfree(counters); |
1290 | free_newinfo: | 1346 | free_newinfo: |
1291 | vfree(newinfo); | 1347 | free_table_info(newinfo); |
1292 | return ret; | 1348 | return ret; |
1293 | } | 1349 | } |
1294 | 1350 | ||
@@ -1321,6 +1377,7 @@ do_add_counters(void __user *user, unsigned int len) | |||
1321 | struct ip6t_counters_info tmp, *paddc; | 1377 | struct ip6t_counters_info tmp, *paddc; |
1322 | struct ip6t_table *t; | 1378 | struct ip6t_table *t; |
1323 | int ret = 0; | 1379 | int ret = 0; |
1380 | void *loc_cpu_entry; | ||
1324 | 1381 | ||
1325 | if (copy_from_user(&tmp, user, sizeof(tmp)) != 0) | 1382 | if (copy_from_user(&tmp, user, sizeof(tmp)) != 0) |
1326 | return -EFAULT; | 1383 | return -EFAULT; |
@@ -1350,7 +1407,9 @@ do_add_counters(void __user *user, unsigned int len) | |||
1350 | } | 1407 | } |
1351 | 1408 | ||
1352 | i = 0; | 1409 | i = 0; |
1353 | IP6T_ENTRY_ITERATE(t->private->entries, | 1410 | /* Choose the copy that is on our node */ |
1411 | loc_cpu_entry = t->private->entries[smp_processor_id()]; | ||
1412 | IP6T_ENTRY_ITERATE(loc_cpu_entry, | ||
1354 | t->private->size, | 1413 | t->private->size, |
1355 | add_counter_to_entry, | 1414 | add_counter_to_entry, |
1356 | paddc->counters, | 1415 | paddc->counters, |
@@ -1543,28 +1602,29 @@ int ip6t_register_table(struct ip6t_table *table, | |||
1543 | struct ip6t_table_info *newinfo; | 1602 | struct ip6t_table_info *newinfo; |
1544 | static struct ip6t_table_info bootstrap | 1603 | static struct ip6t_table_info bootstrap |
1545 | = { 0, 0, 0, { 0 }, { 0 }, { } }; | 1604 | = { 0, 0, 0, { 0 }, { 0 }, { } }; |
1605 | void *loc_cpu_entry; | ||
1546 | 1606 | ||
1547 | newinfo = vmalloc(sizeof(struct ip6t_table_info) | 1607 | newinfo = alloc_table_info(repl->size); |
1548 | + SMP_ALIGN(repl->size) * | ||
1549 | (highest_possible_processor_id()+1)); | ||
1550 | if (!newinfo) | 1608 | if (!newinfo) |
1551 | return -ENOMEM; | 1609 | return -ENOMEM; |
1552 | 1610 | ||
1553 | memcpy(newinfo->entries, repl->entries, repl->size); | 1611 | /* choose the copy on our node/cpu */ |
1612 | loc_cpu_entry = newinfo->entries[raw_smp_processor_id()]; | ||
1613 | memcpy(loc_cpu_entry, repl->entries, repl->size); | ||
1554 | 1614 | ||
1555 | ret = translate_table(table->name, table->valid_hooks, | 1615 | ret = translate_table(table->name, table->valid_hooks, |
1556 | newinfo, repl->size, | 1616 | newinfo, loc_cpu_entry, repl->size, |
1557 | repl->num_entries, | 1617 | repl->num_entries, |
1558 | repl->hook_entry, | 1618 | repl->hook_entry, |
1559 | repl->underflow); | 1619 | repl->underflow); |
1560 | if (ret != 0) { | 1620 | if (ret != 0) { |
1561 | vfree(newinfo); | 1621 | free_table_info(newinfo); |
1562 | return ret; | 1622 | return ret; |
1563 | } | 1623 | } |
1564 | 1624 | ||
1565 | ret = down_interruptible(&ip6t_mutex); | 1625 | ret = down_interruptible(&ip6t_mutex); |
1566 | if (ret != 0) { | 1626 | if (ret != 0) { |
1567 | vfree(newinfo); | 1627 | free_table_info(newinfo); |
1568 | return ret; | 1628 | return ret; |
1569 | } | 1629 | } |
1570 | 1630 | ||
@@ -1593,20 +1653,23 @@ int ip6t_register_table(struct ip6t_table *table, | |||
1593 | return ret; | 1653 | return ret; |
1594 | 1654 | ||
1595 | free_unlock: | 1655 | free_unlock: |
1596 | vfree(newinfo); | 1656 | free_table_info(newinfo); |
1597 | goto unlock; | 1657 | goto unlock; |
1598 | } | 1658 | } |
1599 | 1659 | ||
1600 | void ip6t_unregister_table(struct ip6t_table *table) | 1660 | void ip6t_unregister_table(struct ip6t_table *table) |
1601 | { | 1661 | { |
1662 | void *loc_cpu_entry; | ||
1663 | |||
1602 | down(&ip6t_mutex); | 1664 | down(&ip6t_mutex); |
1603 | LIST_DELETE(&ip6t_tables, table); | 1665 | LIST_DELETE(&ip6t_tables, table); |
1604 | up(&ip6t_mutex); | 1666 | up(&ip6t_mutex); |
1605 | 1667 | ||
1606 | /* Decrease module usage counts and free resources */ | 1668 | /* Decrease module usage counts and free resources */ |
1607 | IP6T_ENTRY_ITERATE(table->private->entries, table->private->size, | 1669 | loc_cpu_entry = table->private->entries[raw_smp_processor_id()]; |
1670 | IP6T_ENTRY_ITERATE(loc_cpu_entry, table->private->size, | ||
1608 | cleanup_entry, NULL); | 1671 | cleanup_entry, NULL); |
1609 | vfree(table->private); | 1672 | free_table_info(table->private); |
1610 | } | 1673 | } |
1611 | 1674 | ||
1612 | /* Returns 1 if the port is matched by the range, 0 otherwise */ | 1675 | /* Returns 1 if the port is matched by the range, 0 otherwise */ |
diff --git a/net/ipv6/netfilter/ip6t_LOG.c b/net/ipv6/netfilter/ip6t_LOG.c index 0cd1d1bd9033..ae4653bfd654 100644 --- a/net/ipv6/netfilter/ip6t_LOG.c +++ b/net/ipv6/netfilter/ip6t_LOG.c | |||
@@ -13,6 +13,7 @@ | |||
13 | #include <linux/module.h> | 13 | #include <linux/module.h> |
14 | #include <linux/moduleparam.h> | 14 | #include <linux/moduleparam.h> |
15 | #include <linux/skbuff.h> | 15 | #include <linux/skbuff.h> |
16 | #include <linux/if_arp.h> | ||
16 | #include <linux/ip.h> | 17 | #include <linux/ip.h> |
17 | #include <linux/spinlock.h> | 18 | #include <linux/spinlock.h> |
18 | #include <linux/icmpv6.h> | 19 | #include <linux/icmpv6.h> |
diff --git a/net/ipv6/netfilter/ip6t_ah.c b/net/ipv6/netfilter/ip6t_ah.c index dde37793d20b..268918d5deea 100644 --- a/net/ipv6/netfilter/ip6t_ah.c +++ b/net/ipv6/netfilter/ip6t_ah.c | |||
@@ -9,6 +9,7 @@ | |||
9 | 9 | ||
10 | #include <linux/module.h> | 10 | #include <linux/module.h> |
11 | #include <linux/skbuff.h> | 11 | #include <linux/skbuff.h> |
12 | #include <linux/ip.h> | ||
12 | #include <linux/ipv6.h> | 13 | #include <linux/ipv6.h> |
13 | #include <linux/types.h> | 14 | #include <linux/types.h> |
14 | #include <net/checksum.h> | 15 | #include <net/checksum.h> |
diff --git a/net/ipv6/netfilter/ip6t_esp.c b/net/ipv6/netfilter/ip6t_esp.c index 24bc0cde43a1..65937de1b58c 100644 --- a/net/ipv6/netfilter/ip6t_esp.c +++ b/net/ipv6/netfilter/ip6t_esp.c | |||
@@ -9,6 +9,7 @@ | |||
9 | 9 | ||
10 | #include <linux/module.h> | 10 | #include <linux/module.h> |
11 | #include <linux/skbuff.h> | 11 | #include <linux/skbuff.h> |
12 | #include <linux/ip.h> | ||
12 | #include <linux/ipv6.h> | 13 | #include <linux/ipv6.h> |
13 | #include <linux/types.h> | 14 | #include <linux/types.h> |
14 | #include <net/checksum.h> | 15 | #include <net/checksum.h> |
diff --git a/net/ipv6/netfilter/nf_conntrack_reasm.c b/net/ipv6/netfilter/nf_conntrack_reasm.c index c2c52af9e560..f3e5ffbd592f 100644 --- a/net/ipv6/netfilter/nf_conntrack_reasm.c +++ b/net/ipv6/netfilter/nf_conntrack_reasm.c | |||
@@ -98,7 +98,7 @@ struct nf_ct_frag6_queue | |||
98 | #define FRAG6Q_HASHSZ 64 | 98 | #define FRAG6Q_HASHSZ 64 |
99 | 99 | ||
100 | static struct nf_ct_frag6_queue *nf_ct_frag6_hash[FRAG6Q_HASHSZ]; | 100 | static struct nf_ct_frag6_queue *nf_ct_frag6_hash[FRAG6Q_HASHSZ]; |
101 | static rwlock_t nf_ct_frag6_lock = RW_LOCK_UNLOCKED; | 101 | static DEFINE_RWLOCK(nf_ct_frag6_lock); |
102 | static u32 nf_ct_frag6_hash_rnd; | 102 | static u32 nf_ct_frag6_hash_rnd; |
103 | static LIST_HEAD(nf_ct_frag6_lru_list); | 103 | static LIST_HEAD(nf_ct_frag6_lru_list); |
104 | int nf_ct_frag6_nqueues = 0; | 104 | int nf_ct_frag6_nqueues = 0; |
@@ -371,7 +371,7 @@ nf_ct_frag6_create(unsigned int hash, u32 id, struct in6_addr *src, struct | |||
371 | init_timer(&fq->timer); | 371 | init_timer(&fq->timer); |
372 | fq->timer.function = nf_ct_frag6_expire; | 372 | fq->timer.function = nf_ct_frag6_expire; |
373 | fq->timer.data = (long) fq; | 373 | fq->timer.data = (long) fq; |
374 | fq->lock = SPIN_LOCK_UNLOCKED; | 374 | spin_lock_init(&fq->lock); |
375 | atomic_set(&fq->refcnt, 1); | 375 | atomic_set(&fq->refcnt, 1); |
376 | 376 | ||
377 | return nf_ct_frag6_intern(hash, fq); | 377 | return nf_ct_frag6_intern(hash, fq); |
diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c index a66900cda2af..66f1d12ea578 100644 --- a/net/ipv6/raw.c +++ b/net/ipv6/raw.c | |||
@@ -32,6 +32,7 @@ | |||
32 | #include <linux/icmpv6.h> | 32 | #include <linux/icmpv6.h> |
33 | #include <linux/netfilter.h> | 33 | #include <linux/netfilter.h> |
34 | #include <linux/netfilter_ipv6.h> | 34 | #include <linux/netfilter_ipv6.h> |
35 | #include <linux/skbuff.h> | ||
35 | #include <asm/uaccess.h> | 36 | #include <asm/uaccess.h> |
36 | #include <asm/ioctls.h> | 37 | #include <asm/ioctls.h> |
37 | #include <asm/bug.h> | 38 | #include <asm/bug.h> |
@@ -433,25 +434,14 @@ out: | |||
433 | return err; | 434 | return err; |
434 | 435 | ||
435 | csum_copy_err: | 436 | csum_copy_err: |
436 | /* Clear queue. */ | 437 | skb_kill_datagram(sk, skb, flags); |
437 | if (flags&MSG_PEEK) { | ||
438 | int clear = 0; | ||
439 | spin_lock_bh(&sk->sk_receive_queue.lock); | ||
440 | if (skb == skb_peek(&sk->sk_receive_queue)) { | ||
441 | __skb_unlink(skb, &sk->sk_receive_queue); | ||
442 | clear = 1; | ||
443 | } | ||
444 | spin_unlock_bh(&sk->sk_receive_queue.lock); | ||
445 | if (clear) | ||
446 | kfree_skb(skb); | ||
447 | } | ||
448 | 438 | ||
449 | /* Error for blocking case is chosen to masquerade | 439 | /* Error for blocking case is chosen to masquerade |
450 | as some normal condition. | 440 | as some normal condition. |
451 | */ | 441 | */ |
452 | err = (flags&MSG_DONTWAIT) ? -EAGAIN : -EHOSTUNREACH; | 442 | err = (flags&MSG_DONTWAIT) ? -EAGAIN : -EHOSTUNREACH; |
453 | /* FIXME: increment a raw6 drops counter here */ | 443 | /* FIXME: increment a raw6 drops counter here */ |
454 | goto out_free; | 444 | goto out; |
455 | } | 445 | } |
456 | 446 | ||
457 | static int rawv6_push_pending_frames(struct sock *sk, struct flowi *fl, | 447 | static int rawv6_push_pending_frames(struct sock *sk, struct flowi *fl, |
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 8827389abaf7..2947bc56d8a0 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c | |||
@@ -48,6 +48,7 @@ | |||
48 | #include <net/tcp.h> | 48 | #include <net/tcp.h> |
49 | #include <net/ndisc.h> | 49 | #include <net/ndisc.h> |
50 | #include <net/inet6_hashtables.h> | 50 | #include <net/inet6_hashtables.h> |
51 | #include <net/inet6_connection_sock.h> | ||
51 | #include <net/ipv6.h> | 52 | #include <net/ipv6.h> |
52 | #include <net/transp_v6.h> | 53 | #include <net/transp_v6.h> |
53 | #include <net/addrconf.h> | 54 | #include <net/addrconf.h> |
@@ -59,6 +60,7 @@ | |||
59 | #include <net/addrconf.h> | 60 | #include <net/addrconf.h> |
60 | #include <net/snmp.h> | 61 | #include <net/snmp.h> |
61 | #include <net/dsfield.h> | 62 | #include <net/dsfield.h> |
63 | #include <net/timewait_sock.h> | ||
62 | 64 | ||
63 | #include <asm/uaccess.h> | 65 | #include <asm/uaccess.h> |
64 | 66 | ||
@@ -67,224 +69,33 @@ | |||
67 | 69 | ||
68 | static void tcp_v6_send_reset(struct sk_buff *skb); | 70 | static void tcp_v6_send_reset(struct sk_buff *skb); |
69 | static void tcp_v6_reqsk_send_ack(struct sk_buff *skb, struct request_sock *req); | 71 | static void tcp_v6_reqsk_send_ack(struct sk_buff *skb, struct request_sock *req); |
70 | static void tcp_v6_send_check(struct sock *sk, struct tcphdr *th, int len, | 72 | static void tcp_v6_send_check(struct sock *sk, int len, |
71 | struct sk_buff *skb); | 73 | struct sk_buff *skb); |
72 | 74 | ||
73 | static int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb); | 75 | static int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb); |
74 | static int tcp_v6_xmit(struct sk_buff *skb, int ipfragok); | ||
75 | 76 | ||
76 | static struct tcp_func ipv6_mapped; | 77 | static struct inet_connection_sock_af_ops ipv6_mapped; |
77 | static struct tcp_func ipv6_specific; | 78 | static struct inet_connection_sock_af_ops ipv6_specific; |
78 | 79 | ||
79 | static inline int tcp_v6_bind_conflict(const struct sock *sk, | ||
80 | const struct inet_bind_bucket *tb) | ||
81 | { | ||
82 | const struct sock *sk2; | ||
83 | const struct hlist_node *node; | ||
84 | |||
85 | /* We must walk the whole port owner list in this case. -DaveM */ | ||
86 | sk_for_each_bound(sk2, node, &tb->owners) { | ||
87 | if (sk != sk2 && | ||
88 | (!sk->sk_bound_dev_if || | ||
89 | !sk2->sk_bound_dev_if || | ||
90 | sk->sk_bound_dev_if == sk2->sk_bound_dev_if) && | ||
91 | (!sk->sk_reuse || !sk2->sk_reuse || | ||
92 | sk2->sk_state == TCP_LISTEN) && | ||
93 | ipv6_rcv_saddr_equal(sk, sk2)) | ||
94 | break; | ||
95 | } | ||
96 | |||
97 | return node != NULL; | ||
98 | } | ||
99 | |||
100 | /* Grrr, addr_type already calculated by caller, but I don't want | ||
101 | * to add some silly "cookie" argument to this method just for that. | ||
102 | * But it doesn't matter, the recalculation is in the rarest path | ||
103 | * this function ever takes. | ||
104 | */ | ||
105 | static int tcp_v6_get_port(struct sock *sk, unsigned short snum) | 80 | static int tcp_v6_get_port(struct sock *sk, unsigned short snum) |
106 | { | 81 | { |
107 | struct inet_bind_hashbucket *head; | 82 | return inet_csk_get_port(&tcp_hashinfo, sk, snum, |
108 | struct inet_bind_bucket *tb; | 83 | inet6_csk_bind_conflict); |
109 | struct hlist_node *node; | ||
110 | int ret; | ||
111 | |||
112 | local_bh_disable(); | ||
113 | if (snum == 0) { | ||
114 | int low = sysctl_local_port_range[0]; | ||
115 | int high = sysctl_local_port_range[1]; | ||
116 | int remaining = (high - low) + 1; | ||
117 | int rover = net_random() % (high - low) + low; | ||
118 | |||
119 | do { | ||
120 | head = &tcp_hashinfo.bhash[inet_bhashfn(rover, tcp_hashinfo.bhash_size)]; | ||
121 | spin_lock(&head->lock); | ||
122 | inet_bind_bucket_for_each(tb, node, &head->chain) | ||
123 | if (tb->port == rover) | ||
124 | goto next; | ||
125 | break; | ||
126 | next: | ||
127 | spin_unlock(&head->lock); | ||
128 | if (++rover > high) | ||
129 | rover = low; | ||
130 | } while (--remaining > 0); | ||
131 | |||
132 | /* Exhausted local port range during search? It is not | ||
133 | * possible for us to be holding one of the bind hash | ||
134 | * locks if this test triggers, because if 'remaining' | ||
135 | * drops to zero, we broke out of the do/while loop at | ||
136 | * the top level, not from the 'break;' statement. | ||
137 | */ | ||
138 | ret = 1; | ||
139 | if (unlikely(remaining <= 0)) | ||
140 | goto fail; | ||
141 | |||
142 | /* OK, here is the one we will use. */ | ||
143 | snum = rover; | ||
144 | } else { | ||
145 | head = &tcp_hashinfo.bhash[inet_bhashfn(snum, tcp_hashinfo.bhash_size)]; | ||
146 | spin_lock(&head->lock); | ||
147 | inet_bind_bucket_for_each(tb, node, &head->chain) | ||
148 | if (tb->port == snum) | ||
149 | goto tb_found; | ||
150 | } | ||
151 | tb = NULL; | ||
152 | goto tb_not_found; | ||
153 | tb_found: | ||
154 | if (tb && !hlist_empty(&tb->owners)) { | ||
155 | if (tb->fastreuse > 0 && sk->sk_reuse && | ||
156 | sk->sk_state != TCP_LISTEN) { | ||
157 | goto success; | ||
158 | } else { | ||
159 | ret = 1; | ||
160 | if (tcp_v6_bind_conflict(sk, tb)) | ||
161 | goto fail_unlock; | ||
162 | } | ||
163 | } | ||
164 | tb_not_found: | ||
165 | ret = 1; | ||
166 | if (tb == NULL) { | ||
167 | tb = inet_bind_bucket_create(tcp_hashinfo.bind_bucket_cachep, head, snum); | ||
168 | if (tb == NULL) | ||
169 | goto fail_unlock; | ||
170 | } | ||
171 | if (hlist_empty(&tb->owners)) { | ||
172 | if (sk->sk_reuse && sk->sk_state != TCP_LISTEN) | ||
173 | tb->fastreuse = 1; | ||
174 | else | ||
175 | tb->fastreuse = 0; | ||
176 | } else if (tb->fastreuse && | ||
177 | (!sk->sk_reuse || sk->sk_state == TCP_LISTEN)) | ||
178 | tb->fastreuse = 0; | ||
179 | |||
180 | success: | ||
181 | if (!inet_csk(sk)->icsk_bind_hash) | ||
182 | inet_bind_hash(sk, tb, snum); | ||
183 | BUG_TRAP(inet_csk(sk)->icsk_bind_hash == tb); | ||
184 | ret = 0; | ||
185 | |||
186 | fail_unlock: | ||
187 | spin_unlock(&head->lock); | ||
188 | fail: | ||
189 | local_bh_enable(); | ||
190 | return ret; | ||
191 | } | ||
192 | |||
193 | static __inline__ void __tcp_v6_hash(struct sock *sk) | ||
194 | { | ||
195 | struct hlist_head *list; | ||
196 | rwlock_t *lock; | ||
197 | |||
198 | BUG_TRAP(sk_unhashed(sk)); | ||
199 | |||
200 | if (sk->sk_state == TCP_LISTEN) { | ||
201 | list = &tcp_hashinfo.listening_hash[inet_sk_listen_hashfn(sk)]; | ||
202 | lock = &tcp_hashinfo.lhash_lock; | ||
203 | inet_listen_wlock(&tcp_hashinfo); | ||
204 | } else { | ||
205 | unsigned int hash; | ||
206 | sk->sk_hash = hash = inet6_sk_ehashfn(sk); | ||
207 | hash &= (tcp_hashinfo.ehash_size - 1); | ||
208 | list = &tcp_hashinfo.ehash[hash].chain; | ||
209 | lock = &tcp_hashinfo.ehash[hash].lock; | ||
210 | write_lock(lock); | ||
211 | } | ||
212 | |||
213 | __sk_add_node(sk, list); | ||
214 | sock_prot_inc_use(sk->sk_prot); | ||
215 | write_unlock(lock); | ||
216 | } | 84 | } |
217 | 85 | ||
218 | |||
219 | static void tcp_v6_hash(struct sock *sk) | 86 | static void tcp_v6_hash(struct sock *sk) |
220 | { | 87 | { |
221 | if (sk->sk_state != TCP_CLOSE) { | 88 | if (sk->sk_state != TCP_CLOSE) { |
222 | struct tcp_sock *tp = tcp_sk(sk); | 89 | if (inet_csk(sk)->icsk_af_ops == &ipv6_mapped) { |
223 | |||
224 | if (tp->af_specific == &ipv6_mapped) { | ||
225 | tcp_prot.hash(sk); | 90 | tcp_prot.hash(sk); |
226 | return; | 91 | return; |
227 | } | 92 | } |
228 | local_bh_disable(); | 93 | local_bh_disable(); |
229 | __tcp_v6_hash(sk); | 94 | __inet6_hash(&tcp_hashinfo, sk); |
230 | local_bh_enable(); | 95 | local_bh_enable(); |
231 | } | 96 | } |
232 | } | 97 | } |
233 | 98 | ||
234 | /* | ||
235 | * Open request hash tables. | ||
236 | */ | ||
237 | |||
238 | static u32 tcp_v6_synq_hash(const struct in6_addr *raddr, const u16 rport, const u32 rnd) | ||
239 | { | ||
240 | u32 a, b, c; | ||
241 | |||
242 | a = raddr->s6_addr32[0]; | ||
243 | b = raddr->s6_addr32[1]; | ||
244 | c = raddr->s6_addr32[2]; | ||
245 | |||
246 | a += JHASH_GOLDEN_RATIO; | ||
247 | b += JHASH_GOLDEN_RATIO; | ||
248 | c += rnd; | ||
249 | __jhash_mix(a, b, c); | ||
250 | |||
251 | a += raddr->s6_addr32[3]; | ||
252 | b += (u32) rport; | ||
253 | __jhash_mix(a, b, c); | ||
254 | |||
255 | return c & (TCP_SYNQ_HSIZE - 1); | ||
256 | } | ||
257 | |||
258 | static struct request_sock *tcp_v6_search_req(const struct sock *sk, | ||
259 | struct request_sock ***prevp, | ||
260 | __u16 rport, | ||
261 | struct in6_addr *raddr, | ||
262 | struct in6_addr *laddr, | ||
263 | int iif) | ||
264 | { | ||
265 | const struct inet_connection_sock *icsk = inet_csk(sk); | ||
266 | struct listen_sock *lopt = icsk->icsk_accept_queue.listen_opt; | ||
267 | struct request_sock *req, **prev; | ||
268 | |||
269 | for (prev = &lopt->syn_table[tcp_v6_synq_hash(raddr, rport, lopt->hash_rnd)]; | ||
270 | (req = *prev) != NULL; | ||
271 | prev = &req->dl_next) { | ||
272 | const struct tcp6_request_sock *treq = tcp6_rsk(req); | ||
273 | |||
274 | if (inet_rsk(req)->rmt_port == rport && | ||
275 | req->rsk_ops->family == AF_INET6 && | ||
276 | ipv6_addr_equal(&treq->rmt_addr, raddr) && | ||
277 | ipv6_addr_equal(&treq->loc_addr, laddr) && | ||
278 | (!treq->iif || treq->iif == iif)) { | ||
279 | BUG_TRAP(req->sk == NULL); | ||
280 | *prevp = prev; | ||
281 | return req; | ||
282 | } | ||
283 | } | ||
284 | |||
285 | return NULL; | ||
286 | } | ||
287 | |||
288 | static __inline__ u16 tcp_v6_check(struct tcphdr *th, int len, | 99 | static __inline__ u16 tcp_v6_check(struct tcphdr *th, int len, |
289 | struct in6_addr *saddr, | 100 | struct in6_addr *saddr, |
290 | struct in6_addr *daddr, | 101 | struct in6_addr *daddr, |
@@ -308,195 +119,12 @@ static __u32 tcp_v6_init_sequence(struct sock *sk, struct sk_buff *skb) | |||
308 | } | 119 | } |
309 | } | 120 | } |
310 | 121 | ||
311 | static int __tcp_v6_check_established(struct sock *sk, const __u16 lport, | ||
312 | struct inet_timewait_sock **twp) | ||
313 | { | ||
314 | struct inet_sock *inet = inet_sk(sk); | ||
315 | const struct ipv6_pinfo *np = inet6_sk(sk); | ||
316 | const struct in6_addr *daddr = &np->rcv_saddr; | ||
317 | const struct in6_addr *saddr = &np->daddr; | ||
318 | const int dif = sk->sk_bound_dev_if; | ||
319 | const u32 ports = INET_COMBINED_PORTS(inet->dport, lport); | ||
320 | unsigned int hash = inet6_ehashfn(daddr, inet->num, saddr, inet->dport); | ||
321 | struct inet_ehash_bucket *head = inet_ehash_bucket(&tcp_hashinfo, hash); | ||
322 | struct sock *sk2; | ||
323 | const struct hlist_node *node; | ||
324 | struct inet_timewait_sock *tw; | ||
325 | |||
326 | prefetch(head->chain.first); | ||
327 | write_lock(&head->lock); | ||
328 | |||
329 | /* Check TIME-WAIT sockets first. */ | ||
330 | sk_for_each(sk2, node, &(head + tcp_hashinfo.ehash_size)->chain) { | ||
331 | const struct tcp6_timewait_sock *tcp6tw = tcp6_twsk(sk2); | ||
332 | |||
333 | tw = inet_twsk(sk2); | ||
334 | |||
335 | if(*((__u32 *)&(tw->tw_dport)) == ports && | ||
336 | sk2->sk_family == PF_INET6 && | ||
337 | ipv6_addr_equal(&tcp6tw->tw_v6_daddr, saddr) && | ||
338 | ipv6_addr_equal(&tcp6tw->tw_v6_rcv_saddr, daddr) && | ||
339 | sk2->sk_bound_dev_if == sk->sk_bound_dev_if) { | ||
340 | const struct tcp_timewait_sock *tcptw = tcp_twsk(sk2); | ||
341 | struct tcp_sock *tp = tcp_sk(sk); | ||
342 | |||
343 | if (tcptw->tw_ts_recent_stamp && | ||
344 | (!twp || | ||
345 | (sysctl_tcp_tw_reuse && | ||
346 | xtime.tv_sec - tcptw->tw_ts_recent_stamp > 1))) { | ||
347 | /* See comment in tcp_ipv4.c */ | ||
348 | tp->write_seq = tcptw->tw_snd_nxt + 65535 + 2; | ||
349 | if (!tp->write_seq) | ||
350 | tp->write_seq = 1; | ||
351 | tp->rx_opt.ts_recent = tcptw->tw_ts_recent; | ||
352 | tp->rx_opt.ts_recent_stamp = tcptw->tw_ts_recent_stamp; | ||
353 | sock_hold(sk2); | ||
354 | goto unique; | ||
355 | } else | ||
356 | goto not_unique; | ||
357 | } | ||
358 | } | ||
359 | tw = NULL; | ||
360 | |||
361 | /* And established part... */ | ||
362 | sk_for_each(sk2, node, &head->chain) { | ||
363 | if (INET6_MATCH(sk2, hash, saddr, daddr, ports, dif)) | ||
364 | goto not_unique; | ||
365 | } | ||
366 | |||
367 | unique: | ||
368 | BUG_TRAP(sk_unhashed(sk)); | ||
369 | __sk_add_node(sk, &head->chain); | ||
370 | sk->sk_hash = hash; | ||
371 | sock_prot_inc_use(sk->sk_prot); | ||
372 | write_unlock(&head->lock); | ||
373 | |||
374 | if (twp) { | ||
375 | *twp = tw; | ||
376 | NET_INC_STATS_BH(LINUX_MIB_TIMEWAITRECYCLED); | ||
377 | } else if (tw) { | ||
378 | /* Silly. Should hash-dance instead... */ | ||
379 | inet_twsk_deschedule(tw, &tcp_death_row); | ||
380 | NET_INC_STATS_BH(LINUX_MIB_TIMEWAITRECYCLED); | ||
381 | |||
382 | inet_twsk_put(tw); | ||
383 | } | ||
384 | return 0; | ||
385 | |||
386 | not_unique: | ||
387 | write_unlock(&head->lock); | ||
388 | return -EADDRNOTAVAIL; | ||
389 | } | ||
390 | |||
391 | static inline u32 tcpv6_port_offset(const struct sock *sk) | ||
392 | { | ||
393 | const struct inet_sock *inet = inet_sk(sk); | ||
394 | const struct ipv6_pinfo *np = inet6_sk(sk); | ||
395 | |||
396 | return secure_tcpv6_port_ephemeral(np->rcv_saddr.s6_addr32, | ||
397 | np->daddr.s6_addr32, | ||
398 | inet->dport); | ||
399 | } | ||
400 | |||
401 | static int tcp_v6_hash_connect(struct sock *sk) | ||
402 | { | ||
403 | unsigned short snum = inet_sk(sk)->num; | ||
404 | struct inet_bind_hashbucket *head; | ||
405 | struct inet_bind_bucket *tb; | ||
406 | int ret; | ||
407 | |||
408 | if (!snum) { | ||
409 | int low = sysctl_local_port_range[0]; | ||
410 | int high = sysctl_local_port_range[1]; | ||
411 | int range = high - low; | ||
412 | int i; | ||
413 | int port; | ||
414 | static u32 hint; | ||
415 | u32 offset = hint + tcpv6_port_offset(sk); | ||
416 | struct hlist_node *node; | ||
417 | struct inet_timewait_sock *tw = NULL; | ||
418 | |||
419 | local_bh_disable(); | ||
420 | for (i = 1; i <= range; i++) { | ||
421 | port = low + (i + offset) % range; | ||
422 | head = &tcp_hashinfo.bhash[inet_bhashfn(port, tcp_hashinfo.bhash_size)]; | ||
423 | spin_lock(&head->lock); | ||
424 | |||
425 | /* Does not bother with rcv_saddr checks, | ||
426 | * because the established check is already | ||
427 | * unique enough. | ||
428 | */ | ||
429 | inet_bind_bucket_for_each(tb, node, &head->chain) { | ||
430 | if (tb->port == port) { | ||
431 | BUG_TRAP(!hlist_empty(&tb->owners)); | ||
432 | if (tb->fastreuse >= 0) | ||
433 | goto next_port; | ||
434 | if (!__tcp_v6_check_established(sk, | ||
435 | port, | ||
436 | &tw)) | ||
437 | goto ok; | ||
438 | goto next_port; | ||
439 | } | ||
440 | } | ||
441 | |||
442 | tb = inet_bind_bucket_create(tcp_hashinfo.bind_bucket_cachep, head, port); | ||
443 | if (!tb) { | ||
444 | spin_unlock(&head->lock); | ||
445 | break; | ||
446 | } | ||
447 | tb->fastreuse = -1; | ||
448 | goto ok; | ||
449 | |||
450 | next_port: | ||
451 | spin_unlock(&head->lock); | ||
452 | } | ||
453 | local_bh_enable(); | ||
454 | |||
455 | return -EADDRNOTAVAIL; | ||
456 | |||
457 | ok: | ||
458 | hint += i; | ||
459 | |||
460 | /* Head lock still held and bh's disabled */ | ||
461 | inet_bind_hash(sk, tb, port); | ||
462 | if (sk_unhashed(sk)) { | ||
463 | inet_sk(sk)->sport = htons(port); | ||
464 | __tcp_v6_hash(sk); | ||
465 | } | ||
466 | spin_unlock(&head->lock); | ||
467 | |||
468 | if (tw) { | ||
469 | inet_twsk_deschedule(tw, &tcp_death_row); | ||
470 | inet_twsk_put(tw); | ||
471 | } | ||
472 | |||
473 | ret = 0; | ||
474 | goto out; | ||
475 | } | ||
476 | |||
477 | head = &tcp_hashinfo.bhash[inet_bhashfn(snum, tcp_hashinfo.bhash_size)]; | ||
478 | tb = inet_csk(sk)->icsk_bind_hash; | ||
479 | spin_lock_bh(&head->lock); | ||
480 | |||
481 | if (sk_head(&tb->owners) == sk && !sk->sk_bind_node.next) { | ||
482 | __tcp_v6_hash(sk); | ||
483 | spin_unlock_bh(&head->lock); | ||
484 | return 0; | ||
485 | } else { | ||
486 | spin_unlock(&head->lock); | ||
487 | /* No definite answer... Walk to established hash table */ | ||
488 | ret = __tcp_v6_check_established(sk, snum, NULL); | ||
489 | out: | ||
490 | local_bh_enable(); | ||
491 | return ret; | ||
492 | } | ||
493 | } | ||
494 | |||
495 | static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr, | 122 | static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr, |
496 | int addr_len) | 123 | int addr_len) |
497 | { | 124 | { |
498 | struct sockaddr_in6 *usin = (struct sockaddr_in6 *) uaddr; | 125 | struct sockaddr_in6 *usin = (struct sockaddr_in6 *) uaddr; |
499 | struct inet_sock *inet = inet_sk(sk); | 126 | struct inet_sock *inet = inet_sk(sk); |
127 | struct inet_connection_sock *icsk = inet_csk(sk); | ||
500 | struct ipv6_pinfo *np = inet6_sk(sk); | 128 | struct ipv6_pinfo *np = inet6_sk(sk); |
501 | struct tcp_sock *tp = tcp_sk(sk); | 129 | struct tcp_sock *tp = tcp_sk(sk); |
502 | struct in6_addr *saddr = NULL, *final_p = NULL, final; | 130 | struct in6_addr *saddr = NULL, *final_p = NULL, final; |
@@ -571,7 +199,7 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr, | |||
571 | */ | 199 | */ |
572 | 200 | ||
573 | if (addr_type == IPV6_ADDR_MAPPED) { | 201 | if (addr_type == IPV6_ADDR_MAPPED) { |
574 | u32 exthdrlen = tp->ext_header_len; | 202 | u32 exthdrlen = icsk->icsk_ext_hdr_len; |
575 | struct sockaddr_in sin; | 203 | struct sockaddr_in sin; |
576 | 204 | ||
577 | SOCK_DEBUG(sk, "connect: ipv4 mapped\n"); | 205 | SOCK_DEBUG(sk, "connect: ipv4 mapped\n"); |
@@ -583,14 +211,14 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr, | |||
583 | sin.sin_port = usin->sin6_port; | 211 | sin.sin_port = usin->sin6_port; |
584 | sin.sin_addr.s_addr = usin->sin6_addr.s6_addr32[3]; | 212 | sin.sin_addr.s_addr = usin->sin6_addr.s6_addr32[3]; |
585 | 213 | ||
586 | tp->af_specific = &ipv6_mapped; | 214 | icsk->icsk_af_ops = &ipv6_mapped; |
587 | sk->sk_backlog_rcv = tcp_v4_do_rcv; | 215 | sk->sk_backlog_rcv = tcp_v4_do_rcv; |
588 | 216 | ||
589 | err = tcp_v4_connect(sk, (struct sockaddr *)&sin, sizeof(sin)); | 217 | err = tcp_v4_connect(sk, (struct sockaddr *)&sin, sizeof(sin)); |
590 | 218 | ||
591 | if (err) { | 219 | if (err) { |
592 | tp->ext_header_len = exthdrlen; | 220 | icsk->icsk_ext_hdr_len = exthdrlen; |
593 | tp->af_specific = &ipv6_specific; | 221 | icsk->icsk_af_ops = &ipv6_specific; |
594 | sk->sk_backlog_rcv = tcp_v6_do_rcv; | 222 | sk->sk_backlog_rcv = tcp_v6_do_rcv; |
595 | goto failure; | 223 | goto failure; |
596 | } else { | 224 | } else { |
@@ -643,16 +271,17 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr, | |||
643 | sk->sk_route_caps = dst->dev->features & | 271 | sk->sk_route_caps = dst->dev->features & |
644 | ~(NETIF_F_IP_CSUM | NETIF_F_TSO); | 272 | ~(NETIF_F_IP_CSUM | NETIF_F_TSO); |
645 | 273 | ||
646 | tp->ext_header_len = 0; | 274 | icsk->icsk_ext_hdr_len = 0; |
647 | if (np->opt) | 275 | if (np->opt) |
648 | tp->ext_header_len = np->opt->opt_flen + np->opt->opt_nflen; | 276 | icsk->icsk_ext_hdr_len = (np->opt->opt_flen + |
277 | np->opt->opt_nflen); | ||
649 | 278 | ||
650 | tp->rx_opt.mss_clamp = IPV6_MIN_MTU - sizeof(struct tcphdr) - sizeof(struct ipv6hdr); | 279 | tp->rx_opt.mss_clamp = IPV6_MIN_MTU - sizeof(struct tcphdr) - sizeof(struct ipv6hdr); |
651 | 280 | ||
652 | inet->dport = usin->sin6_port; | 281 | inet->dport = usin->sin6_port; |
653 | 282 | ||
654 | tcp_set_state(sk, TCP_SYN_SENT); | 283 | tcp_set_state(sk, TCP_SYN_SENT); |
655 | err = tcp_v6_hash_connect(sk); | 284 | err = inet6_hash_connect(&tcp_death_row, sk); |
656 | if (err) | 285 | if (err) |
657 | goto late_failure; | 286 | goto late_failure; |
658 | 287 | ||
@@ -758,7 +387,7 @@ static void tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, | |||
758 | } else | 387 | } else |
759 | dst_hold(dst); | 388 | dst_hold(dst); |
760 | 389 | ||
761 | if (tp->pmtu_cookie > dst_mtu(dst)) { | 390 | if (inet_csk(sk)->icsk_pmtu_cookie > dst_mtu(dst)) { |
762 | tcp_sync_mss(sk, dst_mtu(dst)); | 391 | tcp_sync_mss(sk, dst_mtu(dst)); |
763 | tcp_simple_retransmit(sk); | 392 | tcp_simple_retransmit(sk); |
764 | } /* else let the usual retransmit timer handle it */ | 393 | } /* else let the usual retransmit timer handle it */ |
@@ -775,8 +404,8 @@ static void tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, | |||
775 | if (sock_owned_by_user(sk)) | 404 | if (sock_owned_by_user(sk)) |
776 | goto out; | 405 | goto out; |
777 | 406 | ||
778 | req = tcp_v6_search_req(sk, &prev, th->dest, &hdr->daddr, | 407 | req = inet6_csk_search_req(sk, &prev, th->dest, &hdr->daddr, |
779 | &hdr->saddr, inet6_iif(skb)); | 408 | &hdr->saddr, inet6_iif(skb)); |
780 | if (!req) | 409 | if (!req) |
781 | goto out; | 410 | goto out; |
782 | 411 | ||
@@ -822,7 +451,7 @@ out: | |||
822 | static int tcp_v6_send_synack(struct sock *sk, struct request_sock *req, | 451 | static int tcp_v6_send_synack(struct sock *sk, struct request_sock *req, |
823 | struct dst_entry *dst) | 452 | struct dst_entry *dst) |
824 | { | 453 | { |
825 | struct tcp6_request_sock *treq = tcp6_rsk(req); | 454 | struct inet6_request_sock *treq = inet6_rsk(req); |
826 | struct ipv6_pinfo *np = inet6_sk(sk); | 455 | struct ipv6_pinfo *np = inet6_sk(sk); |
827 | struct sk_buff * skb; | 456 | struct sk_buff * skb; |
828 | struct ipv6_txoptions *opt = NULL; | 457 | struct ipv6_txoptions *opt = NULL; |
@@ -888,8 +517,8 @@ done: | |||
888 | 517 | ||
889 | static void tcp_v6_reqsk_destructor(struct request_sock *req) | 518 | static void tcp_v6_reqsk_destructor(struct request_sock *req) |
890 | { | 519 | { |
891 | if (tcp6_rsk(req)->pktopts) | 520 | if (inet6_rsk(req)->pktopts) |
892 | kfree_skb(tcp6_rsk(req)->pktopts); | 521 | kfree_skb(inet6_rsk(req)->pktopts); |
893 | } | 522 | } |
894 | 523 | ||
895 | static struct request_sock_ops tcp6_request_sock_ops = { | 524 | static struct request_sock_ops tcp6_request_sock_ops = { |
@@ -901,26 +530,15 @@ static struct request_sock_ops tcp6_request_sock_ops = { | |||
901 | .send_reset = tcp_v6_send_reset | 530 | .send_reset = tcp_v6_send_reset |
902 | }; | 531 | }; |
903 | 532 | ||
904 | static int ipv6_opt_accepted(struct sock *sk, struct sk_buff *skb) | 533 | static struct timewait_sock_ops tcp6_timewait_sock_ops = { |
905 | { | 534 | .twsk_obj_size = sizeof(struct tcp6_timewait_sock), |
906 | struct ipv6_pinfo *np = inet6_sk(sk); | 535 | .twsk_unique = tcp_twsk_unique, |
907 | struct inet6_skb_parm *opt = IP6CB(skb); | 536 | }; |
908 | |||
909 | if (np->rxopt.all) { | ||
910 | if ((opt->hop && (np->rxopt.bits.hopopts || np->rxopt.bits.ohopopts)) || | ||
911 | ((IPV6_FLOWINFO_MASK & *(u32*)skb->nh.raw) && np->rxopt.bits.rxflow) || | ||
912 | (opt->srcrt && (np->rxopt.bits.srcrt || np->rxopt.bits.osrcrt)) || | ||
913 | ((opt->dst1 || opt->dst0) && (np->rxopt.bits.dstopts || np->rxopt.bits.odstopts))) | ||
914 | return 1; | ||
915 | } | ||
916 | return 0; | ||
917 | } | ||
918 | |||
919 | 537 | ||
920 | static void tcp_v6_send_check(struct sock *sk, struct tcphdr *th, int len, | 538 | static void tcp_v6_send_check(struct sock *sk, int len, struct sk_buff *skb) |
921 | struct sk_buff *skb) | ||
922 | { | 539 | { |
923 | struct ipv6_pinfo *np = inet6_sk(sk); | 540 | struct ipv6_pinfo *np = inet6_sk(sk); |
541 | struct tcphdr *th = skb->h.th; | ||
924 | 542 | ||
925 | if (skb->ip_summed == CHECKSUM_HW) { | 543 | if (skb->ip_summed == CHECKSUM_HW) { |
926 | th->check = ~csum_ipv6_magic(&np->saddr, &np->daddr, len, IPPROTO_TCP, 0); | 544 | th->check = ~csum_ipv6_magic(&np->saddr, &np->daddr, len, IPPROTO_TCP, 0); |
@@ -1091,8 +709,9 @@ static struct sock *tcp_v6_hnd_req(struct sock *sk,struct sk_buff *skb) | |||
1091 | struct sock *nsk; | 709 | struct sock *nsk; |
1092 | 710 | ||
1093 | /* Find possible connection requests. */ | 711 | /* Find possible connection requests. */ |
1094 | req = tcp_v6_search_req(sk, &prev, th->source, &skb->nh.ipv6h->saddr, | 712 | req = inet6_csk_search_req(sk, &prev, th->source, |
1095 | &skb->nh.ipv6h->daddr, inet6_iif(skb)); | 713 | &skb->nh.ipv6h->saddr, |
714 | &skb->nh.ipv6h->daddr, inet6_iif(skb)); | ||
1096 | if (req) | 715 | if (req) |
1097 | return tcp_check_req(sk, skb, req, prev); | 716 | return tcp_check_req(sk, skb, req, prev); |
1098 | 717 | ||
@@ -1116,23 +735,12 @@ static struct sock *tcp_v6_hnd_req(struct sock *sk,struct sk_buff *skb) | |||
1116 | return sk; | 735 | return sk; |
1117 | } | 736 | } |
1118 | 737 | ||
1119 | static void tcp_v6_synq_add(struct sock *sk, struct request_sock *req) | ||
1120 | { | ||
1121 | struct inet_connection_sock *icsk = inet_csk(sk); | ||
1122 | struct listen_sock *lopt = icsk->icsk_accept_queue.listen_opt; | ||
1123 | const u32 h = tcp_v6_synq_hash(&tcp6_rsk(req)->rmt_addr, inet_rsk(req)->rmt_port, lopt->hash_rnd); | ||
1124 | |||
1125 | reqsk_queue_hash_req(&icsk->icsk_accept_queue, h, req, TCP_TIMEOUT_INIT); | ||
1126 | inet_csk_reqsk_queue_added(sk, TCP_TIMEOUT_INIT); | ||
1127 | } | ||
1128 | |||
1129 | |||
1130 | /* FIXME: this is substantially similar to the ipv4 code. | 738 | /* FIXME: this is substantially similar to the ipv4 code. |
1131 | * Can some kind of merge be done? -- erics | 739 | * Can some kind of merge be done? -- erics |
1132 | */ | 740 | */ |
1133 | static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb) | 741 | static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb) |
1134 | { | 742 | { |
1135 | struct tcp6_request_sock *treq; | 743 | struct inet6_request_sock *treq; |
1136 | struct ipv6_pinfo *np = inet6_sk(sk); | 744 | struct ipv6_pinfo *np = inet6_sk(sk); |
1137 | struct tcp_options_received tmp_opt; | 745 | struct tcp_options_received tmp_opt; |
1138 | struct tcp_sock *tp = tcp_sk(sk); | 746 | struct tcp_sock *tp = tcp_sk(sk); |
@@ -1157,7 +765,7 @@ static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb) | |||
1157 | if (sk_acceptq_is_full(sk) && inet_csk_reqsk_queue_young(sk) > 1) | 765 | if (sk_acceptq_is_full(sk) && inet_csk_reqsk_queue_young(sk) > 1) |
1158 | goto drop; | 766 | goto drop; |
1159 | 767 | ||
1160 | req = reqsk_alloc(&tcp6_request_sock_ops); | 768 | req = inet6_reqsk_alloc(&tcp6_request_sock_ops); |
1161 | if (req == NULL) | 769 | if (req == NULL) |
1162 | goto drop; | 770 | goto drop; |
1163 | 771 | ||
@@ -1170,7 +778,7 @@ static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb) | |||
1170 | tmp_opt.tstamp_ok = tmp_opt.saw_tstamp; | 778 | tmp_opt.tstamp_ok = tmp_opt.saw_tstamp; |
1171 | tcp_openreq_init(req, &tmp_opt, skb); | 779 | tcp_openreq_init(req, &tmp_opt, skb); |
1172 | 780 | ||
1173 | treq = tcp6_rsk(req); | 781 | treq = inet6_rsk(req); |
1174 | ipv6_addr_copy(&treq->rmt_addr, &skb->nh.ipv6h->saddr); | 782 | ipv6_addr_copy(&treq->rmt_addr, &skb->nh.ipv6h->saddr); |
1175 | ipv6_addr_copy(&treq->loc_addr, &skb->nh.ipv6h->daddr); | 783 | ipv6_addr_copy(&treq->loc_addr, &skb->nh.ipv6h->daddr); |
1176 | TCP_ECN_create_request(req, skb->h.th); | 784 | TCP_ECN_create_request(req, skb->h.th); |
@@ -1196,8 +804,7 @@ static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb) | |||
1196 | if (tcp_v6_send_synack(sk, req, NULL)) | 804 | if (tcp_v6_send_synack(sk, req, NULL)) |
1197 | goto drop; | 805 | goto drop; |
1198 | 806 | ||
1199 | tcp_v6_synq_add(sk, req); | 807 | inet6_csk_reqsk_queue_hash_add(sk, req, TCP_TIMEOUT_INIT); |
1200 | |||
1201 | return 0; | 808 | return 0; |
1202 | 809 | ||
1203 | drop: | 810 | drop: |
@@ -1212,7 +819,7 @@ static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb, | |||
1212 | struct request_sock *req, | 819 | struct request_sock *req, |
1213 | struct dst_entry *dst) | 820 | struct dst_entry *dst) |
1214 | { | 821 | { |
1215 | struct tcp6_request_sock *treq = tcp6_rsk(req); | 822 | struct inet6_request_sock *treq = inet6_rsk(req); |
1216 | struct ipv6_pinfo *newnp, *np = inet6_sk(sk); | 823 | struct ipv6_pinfo *newnp, *np = inet6_sk(sk); |
1217 | struct tcp6_sock *newtcp6sk; | 824 | struct tcp6_sock *newtcp6sk; |
1218 | struct inet_sock *newinet; | 825 | struct inet_sock *newinet; |
@@ -1247,7 +854,7 @@ static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb, | |||
1247 | 854 | ||
1248 | ipv6_addr_copy(&newnp->rcv_saddr, &newnp->saddr); | 855 | ipv6_addr_copy(&newnp->rcv_saddr, &newnp->saddr); |
1249 | 856 | ||
1250 | newtp->af_specific = &ipv6_mapped; | 857 | inet_csk(newsk)->icsk_af_ops = &ipv6_mapped; |
1251 | newsk->sk_backlog_rcv = tcp_v4_do_rcv; | 858 | newsk->sk_backlog_rcv = tcp_v4_do_rcv; |
1252 | newnp->pktoptions = NULL; | 859 | newnp->pktoptions = NULL; |
1253 | newnp->opt = NULL; | 860 | newnp->opt = NULL; |
@@ -1261,10 +868,10 @@ static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb, | |||
1261 | */ | 868 | */ |
1262 | 869 | ||
1263 | /* It is tricky place. Until this moment IPv4 tcp | 870 | /* It is tricky place. Until this moment IPv4 tcp |
1264 | worked with IPv6 af_tcp.af_specific. | 871 | worked with IPv6 icsk.icsk_af_ops. |
1265 | Sync it now. | 872 | Sync it now. |
1266 | */ | 873 | */ |
1267 | tcp_sync_mss(newsk, newtp->pmtu_cookie); | 874 | tcp_sync_mss(newsk, inet_csk(newsk)->icsk_pmtu_cookie); |
1268 | 875 | ||
1269 | return newsk; | 876 | return newsk; |
1270 | } | 877 | } |
@@ -1371,10 +978,10 @@ static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb, | |||
1371 | sock_kfree_s(sk, opt, opt->tot_len); | 978 | sock_kfree_s(sk, opt, opt->tot_len); |
1372 | } | 979 | } |
1373 | 980 | ||
1374 | newtp->ext_header_len = 0; | 981 | inet_csk(newsk)->icsk_ext_hdr_len = 0; |
1375 | if (newnp->opt) | 982 | if (newnp->opt) |
1376 | newtp->ext_header_len = newnp->opt->opt_nflen + | 983 | inet_csk(newsk)->icsk_ext_hdr_len = (newnp->opt->opt_nflen + |
1377 | newnp->opt->opt_flen; | 984 | newnp->opt->opt_flen); |
1378 | 985 | ||
1379 | tcp_sync_mss(newsk, dst_mtu(dst)); | 986 | tcp_sync_mss(newsk, dst_mtu(dst)); |
1380 | newtp->advmss = dst_metric(dst, RTAX_ADVMSS); | 987 | newtp->advmss = dst_metric(dst, RTAX_ADVMSS); |
@@ -1382,7 +989,7 @@ static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb, | |||
1382 | 989 | ||
1383 | newinet->daddr = newinet->saddr = newinet->rcv_saddr = LOOPBACK4_IPV6; | 990 | newinet->daddr = newinet->saddr = newinet->rcv_saddr = LOOPBACK4_IPV6; |
1384 | 991 | ||
1385 | __tcp_v6_hash(newsk); | 992 | __inet6_hash(&tcp_hashinfo, newsk); |
1386 | inet_inherit_port(&tcp_hashinfo, sk, newsk); | 993 | inet_inherit_port(&tcp_hashinfo, sk, newsk); |
1387 | 994 | ||
1388 | return newsk; | 995 | return newsk; |
@@ -1679,139 +1286,16 @@ do_time_wait: | |||
1679 | goto discard_it; | 1286 | goto discard_it; |
1680 | } | 1287 | } |
1681 | 1288 | ||
1682 | static int tcp_v6_rebuild_header(struct sock *sk) | ||
1683 | { | ||
1684 | int err; | ||
1685 | struct dst_entry *dst; | ||
1686 | struct ipv6_pinfo *np = inet6_sk(sk); | ||
1687 | |||
1688 | dst = __sk_dst_check(sk, np->dst_cookie); | ||
1689 | |||
1690 | if (dst == NULL) { | ||
1691 | struct inet_sock *inet = inet_sk(sk); | ||
1692 | struct in6_addr *final_p = NULL, final; | ||
1693 | struct flowi fl; | ||
1694 | |||
1695 | memset(&fl, 0, sizeof(fl)); | ||
1696 | fl.proto = IPPROTO_TCP; | ||
1697 | ipv6_addr_copy(&fl.fl6_dst, &np->daddr); | ||
1698 | ipv6_addr_copy(&fl.fl6_src, &np->saddr); | ||
1699 | fl.fl6_flowlabel = np->flow_label; | ||
1700 | fl.oif = sk->sk_bound_dev_if; | ||
1701 | fl.fl_ip_dport = inet->dport; | ||
1702 | fl.fl_ip_sport = inet->sport; | ||
1703 | |||
1704 | if (np->opt && np->opt->srcrt) { | ||
1705 | struct rt0_hdr *rt0 = (struct rt0_hdr *) np->opt->srcrt; | ||
1706 | ipv6_addr_copy(&final, &fl.fl6_dst); | ||
1707 | ipv6_addr_copy(&fl.fl6_dst, rt0->addr); | ||
1708 | final_p = &final; | ||
1709 | } | ||
1710 | |||
1711 | err = ip6_dst_lookup(sk, &dst, &fl); | ||
1712 | if (err) { | ||
1713 | sk->sk_route_caps = 0; | ||
1714 | return err; | ||
1715 | } | ||
1716 | if (final_p) | ||
1717 | ipv6_addr_copy(&fl.fl6_dst, final_p); | ||
1718 | |||
1719 | if ((err = xfrm_lookup(&dst, &fl, sk, 0)) < 0) { | ||
1720 | sk->sk_err_soft = -err; | ||
1721 | return err; | ||
1722 | } | ||
1723 | |||
1724 | ip6_dst_store(sk, dst, NULL); | ||
1725 | sk->sk_route_caps = dst->dev->features & | ||
1726 | ~(NETIF_F_IP_CSUM | NETIF_F_TSO); | ||
1727 | } | ||
1728 | |||
1729 | return 0; | ||
1730 | } | ||
1731 | |||
1732 | static int tcp_v6_xmit(struct sk_buff *skb, int ipfragok) | ||
1733 | { | ||
1734 | struct sock *sk = skb->sk; | ||
1735 | struct inet_sock *inet = inet_sk(sk); | ||
1736 | struct ipv6_pinfo *np = inet6_sk(sk); | ||
1737 | struct flowi fl; | ||
1738 | struct dst_entry *dst; | ||
1739 | struct in6_addr *final_p = NULL, final; | ||
1740 | |||
1741 | memset(&fl, 0, sizeof(fl)); | ||
1742 | fl.proto = IPPROTO_TCP; | ||
1743 | ipv6_addr_copy(&fl.fl6_dst, &np->daddr); | ||
1744 | ipv6_addr_copy(&fl.fl6_src, &np->saddr); | ||
1745 | fl.fl6_flowlabel = np->flow_label; | ||
1746 | IP6_ECN_flow_xmit(sk, fl.fl6_flowlabel); | ||
1747 | fl.oif = sk->sk_bound_dev_if; | ||
1748 | fl.fl_ip_sport = inet->sport; | ||
1749 | fl.fl_ip_dport = inet->dport; | ||
1750 | |||
1751 | if (np->opt && np->opt->srcrt) { | ||
1752 | struct rt0_hdr *rt0 = (struct rt0_hdr *) np->opt->srcrt; | ||
1753 | ipv6_addr_copy(&final, &fl.fl6_dst); | ||
1754 | ipv6_addr_copy(&fl.fl6_dst, rt0->addr); | ||
1755 | final_p = &final; | ||
1756 | } | ||
1757 | |||
1758 | dst = __sk_dst_check(sk, np->dst_cookie); | ||
1759 | |||
1760 | if (dst == NULL) { | ||
1761 | int err = ip6_dst_lookup(sk, &dst, &fl); | ||
1762 | |||
1763 | if (err) { | ||
1764 | sk->sk_err_soft = -err; | ||
1765 | return err; | ||
1766 | } | ||
1767 | |||
1768 | if (final_p) | ||
1769 | ipv6_addr_copy(&fl.fl6_dst, final_p); | ||
1770 | |||
1771 | if ((err = xfrm_lookup(&dst, &fl, sk, 0)) < 0) { | ||
1772 | sk->sk_route_caps = 0; | ||
1773 | return err; | ||
1774 | } | ||
1775 | |||
1776 | ip6_dst_store(sk, dst, NULL); | ||
1777 | sk->sk_route_caps = dst->dev->features & | ||
1778 | ~(NETIF_F_IP_CSUM | NETIF_F_TSO); | ||
1779 | } | ||
1780 | |||
1781 | skb->dst = dst_clone(dst); | ||
1782 | |||
1783 | /* Restore final destination back after routing done */ | ||
1784 | ipv6_addr_copy(&fl.fl6_dst, &np->daddr); | ||
1785 | |||
1786 | return ip6_xmit(sk, skb, &fl, np->opt, 0); | ||
1787 | } | ||
1788 | |||
1789 | static void v6_addr2sockaddr(struct sock *sk, struct sockaddr * uaddr) | ||
1790 | { | ||
1791 | struct ipv6_pinfo *np = inet6_sk(sk); | ||
1792 | struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *) uaddr; | ||
1793 | |||
1794 | sin6->sin6_family = AF_INET6; | ||
1795 | ipv6_addr_copy(&sin6->sin6_addr, &np->daddr); | ||
1796 | sin6->sin6_port = inet_sk(sk)->dport; | ||
1797 | /* We do not store received flowlabel for TCP */ | ||
1798 | sin6->sin6_flowinfo = 0; | ||
1799 | sin6->sin6_scope_id = 0; | ||
1800 | if (sk->sk_bound_dev_if && | ||
1801 | ipv6_addr_type(&sin6->sin6_addr) & IPV6_ADDR_LINKLOCAL) | ||
1802 | sin6->sin6_scope_id = sk->sk_bound_dev_if; | ||
1803 | } | ||
1804 | |||
1805 | static int tcp_v6_remember_stamp(struct sock *sk) | 1289 | static int tcp_v6_remember_stamp(struct sock *sk) |
1806 | { | 1290 | { |
1807 | /* Alas, not yet... */ | 1291 | /* Alas, not yet... */ |
1808 | return 0; | 1292 | return 0; |
1809 | } | 1293 | } |
1810 | 1294 | ||
1811 | static struct tcp_func ipv6_specific = { | 1295 | static struct inet_connection_sock_af_ops ipv6_specific = { |
1812 | .queue_xmit = tcp_v6_xmit, | 1296 | .queue_xmit = inet6_csk_xmit, |
1813 | .send_check = tcp_v6_send_check, | 1297 | .send_check = tcp_v6_send_check, |
1814 | .rebuild_header = tcp_v6_rebuild_header, | 1298 | .rebuild_header = inet6_sk_rebuild_header, |
1815 | .conn_request = tcp_v6_conn_request, | 1299 | .conn_request = tcp_v6_conn_request, |
1816 | .syn_recv_sock = tcp_v6_syn_recv_sock, | 1300 | .syn_recv_sock = tcp_v6_syn_recv_sock, |
1817 | .remember_stamp = tcp_v6_remember_stamp, | 1301 | .remember_stamp = tcp_v6_remember_stamp, |
@@ -1819,7 +1303,7 @@ static struct tcp_func ipv6_specific = { | |||
1819 | 1303 | ||
1820 | .setsockopt = ipv6_setsockopt, | 1304 | .setsockopt = ipv6_setsockopt, |
1821 | .getsockopt = ipv6_getsockopt, | 1305 | .getsockopt = ipv6_getsockopt, |
1822 | .addr2sockaddr = v6_addr2sockaddr, | 1306 | .addr2sockaddr = inet6_csk_addr2sockaddr, |
1823 | .sockaddr_len = sizeof(struct sockaddr_in6) | 1307 | .sockaddr_len = sizeof(struct sockaddr_in6) |
1824 | }; | 1308 | }; |
1825 | 1309 | ||
@@ -1827,7 +1311,7 @@ static struct tcp_func ipv6_specific = { | |||
1827 | * TCP over IPv4 via INET6 API | 1311 | * TCP over IPv4 via INET6 API |
1828 | */ | 1312 | */ |
1829 | 1313 | ||
1830 | static struct tcp_func ipv6_mapped = { | 1314 | static struct inet_connection_sock_af_ops ipv6_mapped = { |
1831 | .queue_xmit = ip_queue_xmit, | 1315 | .queue_xmit = ip_queue_xmit, |
1832 | .send_check = tcp_v4_send_check, | 1316 | .send_check = tcp_v4_send_check, |
1833 | .rebuild_header = inet_sk_rebuild_header, | 1317 | .rebuild_header = inet_sk_rebuild_header, |
@@ -1838,7 +1322,7 @@ static struct tcp_func ipv6_mapped = { | |||
1838 | 1322 | ||
1839 | .setsockopt = ipv6_setsockopt, | 1323 | .setsockopt = ipv6_setsockopt, |
1840 | .getsockopt = ipv6_getsockopt, | 1324 | .getsockopt = ipv6_getsockopt, |
1841 | .addr2sockaddr = v6_addr2sockaddr, | 1325 | .addr2sockaddr = inet6_csk_addr2sockaddr, |
1842 | .sockaddr_len = sizeof(struct sockaddr_in6) | 1326 | .sockaddr_len = sizeof(struct sockaddr_in6) |
1843 | }; | 1327 | }; |
1844 | 1328 | ||
@@ -1877,8 +1361,9 @@ static int tcp_v6_init_sock(struct sock *sk) | |||
1877 | 1361 | ||
1878 | sk->sk_state = TCP_CLOSE; | 1362 | sk->sk_state = TCP_CLOSE; |
1879 | 1363 | ||
1880 | tp->af_specific = &ipv6_specific; | 1364 | icsk->icsk_af_ops = &ipv6_specific; |
1881 | icsk->icsk_ca_ops = &tcp_init_congestion_ops; | 1365 | icsk->icsk_ca_ops = &tcp_init_congestion_ops; |
1366 | icsk->icsk_sync_mss = tcp_sync_mss; | ||
1882 | sk->sk_write_space = sk_stream_write_space; | 1367 | sk->sk_write_space = sk_stream_write_space; |
1883 | sock_set_flag(sk, SOCK_USE_WRITE_QUEUE); | 1368 | sock_set_flag(sk, SOCK_USE_WRITE_QUEUE); |
1884 | 1369 | ||
@@ -1900,14 +1385,13 @@ static int tcp_v6_destroy_sock(struct sock *sk) | |||
1900 | static void get_openreq6(struct seq_file *seq, | 1385 | static void get_openreq6(struct seq_file *seq, |
1901 | struct sock *sk, struct request_sock *req, int i, int uid) | 1386 | struct sock *sk, struct request_sock *req, int i, int uid) |
1902 | { | 1387 | { |
1903 | struct in6_addr *dest, *src; | ||
1904 | int ttd = req->expires - jiffies; | 1388 | int ttd = req->expires - jiffies; |
1389 | struct in6_addr *src = &inet6_rsk(req)->loc_addr; | ||
1390 | struct in6_addr *dest = &inet6_rsk(req)->rmt_addr; | ||
1905 | 1391 | ||
1906 | if (ttd < 0) | 1392 | if (ttd < 0) |
1907 | ttd = 0; | 1393 | ttd = 0; |
1908 | 1394 | ||
1909 | src = &tcp6_rsk(req)->loc_addr; | ||
1910 | dest = &tcp6_rsk(req)->rmt_addr; | ||
1911 | seq_printf(seq, | 1395 | seq_printf(seq, |
1912 | "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X " | 1396 | "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X " |
1913 | "%02X %08X:%08X %02X:%08lX %08X %5d %8d %d %d %p\n", | 1397 | "%02X %08X:%08X %02X:%08lX %08X %5d %8d %d %d %p\n", |
@@ -1988,14 +1472,14 @@ static void get_timewait6_sock(struct seq_file *seq, | |||
1988 | { | 1472 | { |
1989 | struct in6_addr *dest, *src; | 1473 | struct in6_addr *dest, *src; |
1990 | __u16 destp, srcp; | 1474 | __u16 destp, srcp; |
1991 | struct tcp6_timewait_sock *tcp6tw = tcp6_twsk((struct sock *)tw); | 1475 | struct inet6_timewait_sock *tw6 = inet6_twsk((struct sock *)tw); |
1992 | int ttd = tw->tw_ttd - jiffies; | 1476 | int ttd = tw->tw_ttd - jiffies; |
1993 | 1477 | ||
1994 | if (ttd < 0) | 1478 | if (ttd < 0) |
1995 | ttd = 0; | 1479 | ttd = 0; |
1996 | 1480 | ||
1997 | dest = &tcp6tw->tw_v6_daddr; | 1481 | dest = &tw6->tw_v6_daddr; |
1998 | src = &tcp6tw->tw_v6_rcv_saddr; | 1482 | src = &tw6->tw_v6_rcv_saddr; |
1999 | destp = ntohs(tw->tw_dport); | 1483 | destp = ntohs(tw->tw_dport); |
2000 | srcp = ntohs(tw->tw_sport); | 1484 | srcp = ntohs(tw->tw_sport); |
2001 | 1485 | ||
@@ -2093,7 +1577,7 @@ struct proto tcpv6_prot = { | |||
2093 | .sysctl_rmem = sysctl_tcp_rmem, | 1577 | .sysctl_rmem = sysctl_tcp_rmem, |
2094 | .max_header = MAX_TCP_HEADER, | 1578 | .max_header = MAX_TCP_HEADER, |
2095 | .obj_size = sizeof(struct tcp6_sock), | 1579 | .obj_size = sizeof(struct tcp6_sock), |
2096 | .twsk_obj_size = sizeof(struct tcp6_timewait_sock), | 1580 | .twsk_prot = &tcp6_timewait_sock_ops, |
2097 | .rsk_prot = &tcp6_request_sock_ops, | 1581 | .rsk_prot = &tcp6_request_sock_ops, |
2098 | }; | 1582 | }; |
2099 | 1583 | ||
@@ -2110,7 +1594,8 @@ static struct inet_protosw tcpv6_protosw = { | |||
2110 | .ops = &inet6_stream_ops, | 1594 | .ops = &inet6_stream_ops, |
2111 | .capability = -1, | 1595 | .capability = -1, |
2112 | .no_check = 0, | 1596 | .no_check = 0, |
2113 | .flags = INET_PROTOSW_PERMANENT, | 1597 | .flags = INET_PROTOSW_PERMANENT | |
1598 | INET_PROTOSW_ICSK, | ||
2114 | }; | 1599 | }; |
2115 | 1600 | ||
2116 | void __init tcpv6_init(void) | 1601 | void __init tcpv6_init(void) |
diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index 5cc8731eb55b..d8538dcea813 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c | |||
@@ -36,6 +36,7 @@ | |||
36 | #include <linux/ipv6.h> | 36 | #include <linux/ipv6.h> |
37 | #include <linux/icmpv6.h> | 37 | #include <linux/icmpv6.h> |
38 | #include <linux/init.h> | 38 | #include <linux/init.h> |
39 | #include <linux/skbuff.h> | ||
39 | #include <asm/uaccess.h> | 40 | #include <asm/uaccess.h> |
40 | 41 | ||
41 | #include <net/sock.h> | 42 | #include <net/sock.h> |
@@ -300,20 +301,7 @@ out: | |||
300 | return err; | 301 | return err; |
301 | 302 | ||
302 | csum_copy_err: | 303 | csum_copy_err: |
303 | /* Clear queue. */ | 304 | skb_kill_datagram(sk, skb, flags); |
304 | if (flags&MSG_PEEK) { | ||
305 | int clear = 0; | ||
306 | spin_lock_bh(&sk->sk_receive_queue.lock); | ||
307 | if (skb == skb_peek(&sk->sk_receive_queue)) { | ||
308 | __skb_unlink(skb, &sk->sk_receive_queue); | ||
309 | clear = 1; | ||
310 | } | ||
311 | spin_unlock_bh(&sk->sk_receive_queue.lock); | ||
312 | if (clear) | ||
313 | kfree_skb(skb); | ||
314 | } | ||
315 | |||
316 | skb_free_datagram(sk, skb); | ||
317 | 305 | ||
318 | if (flags & MSG_DONTWAIT) { | 306 | if (flags & MSG_DONTWAIT) { |
319 | UDP6_INC_STATS_USER(UDP_MIB_INERRORS); | 307 | UDP6_INC_STATS_USER(UDP_MIB_INERRORS); |
diff --git a/net/ipx/af_ipx.c b/net/ipx/af_ipx.c index 34b3bb868409..0dc519b40404 100644 --- a/net/ipx/af_ipx.c +++ b/net/ipx/af_ipx.c | |||
@@ -75,7 +75,7 @@ static struct datalink_proto *pEII_datalink; | |||
75 | static struct datalink_proto *p8023_datalink; | 75 | static struct datalink_proto *p8023_datalink; |
76 | static struct datalink_proto *pSNAP_datalink; | 76 | static struct datalink_proto *pSNAP_datalink; |
77 | 77 | ||
78 | static struct proto_ops ipx_dgram_ops; | 78 | static const struct proto_ops ipx_dgram_ops; |
79 | 79 | ||
80 | LIST_HEAD(ipx_interfaces); | 80 | LIST_HEAD(ipx_interfaces); |
81 | DEFINE_SPINLOCK(ipx_interfaces_lock); | 81 | DEFINE_SPINLOCK(ipx_interfaces_lock); |
@@ -1884,7 +1884,7 @@ static int ipx_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) | |||
1884 | rc = -EINVAL; | 1884 | rc = -EINVAL; |
1885 | break; | 1885 | break; |
1886 | default: | 1886 | default: |
1887 | rc = dev_ioctl(cmd, argp); | 1887 | rc = -ENOIOCTLCMD; |
1888 | break; | 1888 | break; |
1889 | } | 1889 | } |
1890 | 1890 | ||
@@ -1901,7 +1901,7 @@ static struct net_proto_family ipx_family_ops = { | |||
1901 | .owner = THIS_MODULE, | 1901 | .owner = THIS_MODULE, |
1902 | }; | 1902 | }; |
1903 | 1903 | ||
1904 | static struct proto_ops SOCKOPS_WRAPPED(ipx_dgram_ops) = { | 1904 | static const struct proto_ops SOCKOPS_WRAPPED(ipx_dgram_ops) = { |
1905 | .family = PF_IPX, | 1905 | .family = PF_IPX, |
1906 | .owner = THIS_MODULE, | 1906 | .owner = THIS_MODULE, |
1907 | .release = ipx_release, | 1907 | .release = ipx_release, |
diff --git a/net/irda/af_irda.c b/net/irda/af_irda.c index 6f92f9c62990..fbfa96754417 100644 --- a/net/irda/af_irda.c +++ b/net/irda/af_irda.c | |||
@@ -62,12 +62,12 @@ | |||
62 | 62 | ||
63 | static int irda_create(struct socket *sock, int protocol); | 63 | static int irda_create(struct socket *sock, int protocol); |
64 | 64 | ||
65 | static struct proto_ops irda_stream_ops; | 65 | static const struct proto_ops irda_stream_ops; |
66 | static struct proto_ops irda_seqpacket_ops; | 66 | static const struct proto_ops irda_seqpacket_ops; |
67 | static struct proto_ops irda_dgram_ops; | 67 | static const struct proto_ops irda_dgram_ops; |
68 | 68 | ||
69 | #ifdef CONFIG_IRDA_ULTRA | 69 | #ifdef CONFIG_IRDA_ULTRA |
70 | static struct proto_ops irda_ultra_ops; | 70 | static const struct proto_ops irda_ultra_ops; |
71 | #define ULTRA_MAX_DATA 382 | 71 | #define ULTRA_MAX_DATA 382 |
72 | #endif /* CONFIG_IRDA_ULTRA */ | 72 | #endif /* CONFIG_IRDA_ULTRA */ |
73 | 73 | ||
@@ -1438,8 +1438,9 @@ static int irda_recvmsg_stream(struct kiocb *iocb, struct socket *sock, | |||
1438 | /* | 1438 | /* |
1439 | * POSIX 1003.1g mandates this order. | 1439 | * POSIX 1003.1g mandates this order. |
1440 | */ | 1440 | */ |
1441 | if (sk->sk_err) | 1441 | ret = sock_error(sk); |
1442 | ret = sock_error(sk); | 1442 | if (ret) |
1443 | break; | ||
1443 | else if (sk->sk_shutdown & RCV_SHUTDOWN) | 1444 | else if (sk->sk_shutdown & RCV_SHUTDOWN) |
1444 | ; | 1445 | ; |
1445 | else if (noblock) | 1446 | else if (noblock) |
@@ -1821,7 +1822,7 @@ static int irda_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) | |||
1821 | return -EINVAL; | 1822 | return -EINVAL; |
1822 | default: | 1823 | default: |
1823 | IRDA_DEBUG(1, "%s(), doing device ioctl!\n", __FUNCTION__); | 1824 | IRDA_DEBUG(1, "%s(), doing device ioctl!\n", __FUNCTION__); |
1824 | return dev_ioctl(cmd, (void __user *) arg); | 1825 | return -ENOIOCTLCMD; |
1825 | } | 1826 | } |
1826 | 1827 | ||
1827 | /*NOTREACHED*/ | 1828 | /*NOTREACHED*/ |
@@ -2463,7 +2464,7 @@ static struct net_proto_family irda_family_ops = { | |||
2463 | .owner = THIS_MODULE, | 2464 | .owner = THIS_MODULE, |
2464 | }; | 2465 | }; |
2465 | 2466 | ||
2466 | static struct proto_ops SOCKOPS_WRAPPED(irda_stream_ops) = { | 2467 | static const struct proto_ops SOCKOPS_WRAPPED(irda_stream_ops) = { |
2467 | .family = PF_IRDA, | 2468 | .family = PF_IRDA, |
2468 | .owner = THIS_MODULE, | 2469 | .owner = THIS_MODULE, |
2469 | .release = irda_release, | 2470 | .release = irda_release, |
@@ -2484,7 +2485,7 @@ static struct proto_ops SOCKOPS_WRAPPED(irda_stream_ops) = { | |||
2484 | .sendpage = sock_no_sendpage, | 2485 | .sendpage = sock_no_sendpage, |
2485 | }; | 2486 | }; |
2486 | 2487 | ||
2487 | static struct proto_ops SOCKOPS_WRAPPED(irda_seqpacket_ops) = { | 2488 | static const struct proto_ops SOCKOPS_WRAPPED(irda_seqpacket_ops) = { |
2488 | .family = PF_IRDA, | 2489 | .family = PF_IRDA, |
2489 | .owner = THIS_MODULE, | 2490 | .owner = THIS_MODULE, |
2490 | .release = irda_release, | 2491 | .release = irda_release, |
@@ -2505,7 +2506,7 @@ static struct proto_ops SOCKOPS_WRAPPED(irda_seqpacket_ops) = { | |||
2505 | .sendpage = sock_no_sendpage, | 2506 | .sendpage = sock_no_sendpage, |
2506 | }; | 2507 | }; |
2507 | 2508 | ||
2508 | static struct proto_ops SOCKOPS_WRAPPED(irda_dgram_ops) = { | 2509 | static const struct proto_ops SOCKOPS_WRAPPED(irda_dgram_ops) = { |
2509 | .family = PF_IRDA, | 2510 | .family = PF_IRDA, |
2510 | .owner = THIS_MODULE, | 2511 | .owner = THIS_MODULE, |
2511 | .release = irda_release, | 2512 | .release = irda_release, |
@@ -2527,7 +2528,7 @@ static struct proto_ops SOCKOPS_WRAPPED(irda_dgram_ops) = { | |||
2527 | }; | 2528 | }; |
2528 | 2529 | ||
2529 | #ifdef CONFIG_IRDA_ULTRA | 2530 | #ifdef CONFIG_IRDA_ULTRA |
2530 | static struct proto_ops SOCKOPS_WRAPPED(irda_ultra_ops) = { | 2531 | static const struct proto_ops SOCKOPS_WRAPPED(irda_ultra_ops) = { |
2531 | .family = PF_IRDA, | 2532 | .family = PF_IRDA, |
2532 | .owner = THIS_MODULE, | 2533 | .owner = THIS_MODULE, |
2533 | .release = irda_release, | 2534 | .release = irda_release, |
diff --git a/net/irda/iriap.c b/net/irda/iriap.c index b8bb78af8b8a..254f90746900 100644 --- a/net/irda/iriap.c +++ b/net/irda/iriap.c | |||
@@ -364,7 +364,7 @@ static void iriap_disconnect_request(struct iriap_cb *self) | |||
364 | /* | 364 | /* |
365 | * Function iriap_getvaluebyclass (addr, name, attr) | 365 | * Function iriap_getvaluebyclass (addr, name, attr) |
366 | * | 366 | * |
367 | * Retreive all values from attribute in all objects with given class | 367 | * Retrieve all values from attribute in all objects with given class |
368 | * name | 368 | * name |
369 | */ | 369 | */ |
370 | int iriap_getvaluebyclass_request(struct iriap_cb *self, | 370 | int iriap_getvaluebyclass_request(struct iriap_cb *self, |
diff --git a/net/key/af_key.c b/net/key/af_key.c index 39031684b65c..52efd04cbedb 100644 --- a/net/key/af_key.c +++ b/net/key/af_key.c | |||
@@ -113,7 +113,7 @@ static __inline__ void pfkey_unlock_table(void) | |||
113 | } | 113 | } |
114 | 114 | ||
115 | 115 | ||
116 | static struct proto_ops pfkey_ops; | 116 | static const struct proto_ops pfkey_ops; |
117 | 117 | ||
118 | static void pfkey_insert(struct sock *sk) | 118 | static void pfkey_insert(struct sock *sk) |
119 | { | 119 | { |
@@ -336,6 +336,7 @@ static u8 sadb_ext_min_len[] = { | |||
336 | [SADB_X_EXT_NAT_T_SPORT] = (u8) sizeof(struct sadb_x_nat_t_port), | 336 | [SADB_X_EXT_NAT_T_SPORT] = (u8) sizeof(struct sadb_x_nat_t_port), |
337 | [SADB_X_EXT_NAT_T_DPORT] = (u8) sizeof(struct sadb_x_nat_t_port), | 337 | [SADB_X_EXT_NAT_T_DPORT] = (u8) sizeof(struct sadb_x_nat_t_port), |
338 | [SADB_X_EXT_NAT_T_OA] = (u8) sizeof(struct sadb_address), | 338 | [SADB_X_EXT_NAT_T_OA] = (u8) sizeof(struct sadb_address), |
339 | [SADB_X_EXT_SEC_CTX] = (u8) sizeof(struct sadb_x_sec_ctx), | ||
339 | }; | 340 | }; |
340 | 341 | ||
341 | /* Verify sadb_address_{len,prefixlen} against sa_family. */ | 342 | /* Verify sadb_address_{len,prefixlen} against sa_family. */ |
@@ -383,6 +384,55 @@ static int verify_address_len(void *p) | |||
383 | return 0; | 384 | return 0; |
384 | } | 385 | } |
385 | 386 | ||
387 | static inline int pfkey_sec_ctx_len(struct sadb_x_sec_ctx *sec_ctx) | ||
388 | { | ||
389 | int len = 0; | ||
390 | |||
391 | len += sizeof(struct sadb_x_sec_ctx); | ||
392 | len += sec_ctx->sadb_x_ctx_len; | ||
393 | len += sizeof(uint64_t) - 1; | ||
394 | len /= sizeof(uint64_t); | ||
395 | |||
396 | return len; | ||
397 | } | ||
398 | |||
399 | static inline int verify_sec_ctx_len(void *p) | ||
400 | { | ||
401 | struct sadb_x_sec_ctx *sec_ctx = (struct sadb_x_sec_ctx *)p; | ||
402 | int len; | ||
403 | |||
404 | if (sec_ctx->sadb_x_ctx_len > PAGE_SIZE) | ||
405 | return -EINVAL; | ||
406 | |||
407 | len = pfkey_sec_ctx_len(sec_ctx); | ||
408 | |||
409 | if (sec_ctx->sadb_x_sec_len != len) | ||
410 | return -EINVAL; | ||
411 | |||
412 | return 0; | ||
413 | } | ||
414 | |||
415 | static inline struct xfrm_user_sec_ctx *pfkey_sadb2xfrm_user_sec_ctx(struct sadb_x_sec_ctx *sec_ctx) | ||
416 | { | ||
417 | struct xfrm_user_sec_ctx *uctx = NULL; | ||
418 | int ctx_size = sec_ctx->sadb_x_ctx_len; | ||
419 | |||
420 | uctx = kmalloc((sizeof(*uctx)+ctx_size), GFP_KERNEL); | ||
421 | |||
422 | if (!uctx) | ||
423 | return NULL; | ||
424 | |||
425 | uctx->len = pfkey_sec_ctx_len(sec_ctx); | ||
426 | uctx->exttype = sec_ctx->sadb_x_sec_exttype; | ||
427 | uctx->ctx_doi = sec_ctx->sadb_x_ctx_doi; | ||
428 | uctx->ctx_alg = sec_ctx->sadb_x_ctx_alg; | ||
429 | uctx->ctx_len = sec_ctx->sadb_x_ctx_len; | ||
430 | memcpy(uctx + 1, sec_ctx + 1, | ||
431 | uctx->ctx_len); | ||
432 | |||
433 | return uctx; | ||
434 | } | ||
435 | |||
386 | static int present_and_same_family(struct sadb_address *src, | 436 | static int present_and_same_family(struct sadb_address *src, |
387 | struct sadb_address *dst) | 437 | struct sadb_address *dst) |
388 | { | 438 | { |
@@ -438,6 +488,10 @@ static int parse_exthdrs(struct sk_buff *skb, struct sadb_msg *hdr, void **ext_h | |||
438 | if (verify_address_len(p)) | 488 | if (verify_address_len(p)) |
439 | return -EINVAL; | 489 | return -EINVAL; |
440 | } | 490 | } |
491 | if (ext_type == SADB_X_EXT_SEC_CTX) { | ||
492 | if (verify_sec_ctx_len(p)) | ||
493 | return -EINVAL; | ||
494 | } | ||
441 | ext_hdrs[ext_type-1] = p; | 495 | ext_hdrs[ext_type-1] = p; |
442 | } | 496 | } |
443 | p += ext_len; | 497 | p += ext_len; |
@@ -586,6 +640,9 @@ static struct sk_buff * pfkey_xfrm_state2msg(struct xfrm_state *x, int add_keys, | |||
586 | struct sadb_key *key; | 640 | struct sadb_key *key; |
587 | struct sadb_x_sa2 *sa2; | 641 | struct sadb_x_sa2 *sa2; |
588 | struct sockaddr_in *sin; | 642 | struct sockaddr_in *sin; |
643 | struct sadb_x_sec_ctx *sec_ctx; | ||
644 | struct xfrm_sec_ctx *xfrm_ctx; | ||
645 | int ctx_size = 0; | ||
589 | #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) | 646 | #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) |
590 | struct sockaddr_in6 *sin6; | 647 | struct sockaddr_in6 *sin6; |
591 | #endif | 648 | #endif |
@@ -609,6 +666,12 @@ static struct sk_buff * pfkey_xfrm_state2msg(struct xfrm_state *x, int add_keys, | |||
609 | sizeof(struct sadb_address)*2 + | 666 | sizeof(struct sadb_address)*2 + |
610 | sockaddr_size*2 + | 667 | sockaddr_size*2 + |
611 | sizeof(struct sadb_x_sa2); | 668 | sizeof(struct sadb_x_sa2); |
669 | |||
670 | if ((xfrm_ctx = x->security)) { | ||
671 | ctx_size = PFKEY_ALIGN8(xfrm_ctx->ctx_len); | ||
672 | size += sizeof(struct sadb_x_sec_ctx) + ctx_size; | ||
673 | } | ||
674 | |||
612 | /* identity & sensitivity */ | 675 | /* identity & sensitivity */ |
613 | 676 | ||
614 | if ((x->props.family == AF_INET && | 677 | if ((x->props.family == AF_INET && |
@@ -899,6 +962,20 @@ static struct sk_buff * pfkey_xfrm_state2msg(struct xfrm_state *x, int add_keys, | |||
899 | n_port->sadb_x_nat_t_port_reserved = 0; | 962 | n_port->sadb_x_nat_t_port_reserved = 0; |
900 | } | 963 | } |
901 | 964 | ||
965 | /* security context */ | ||
966 | if (xfrm_ctx) { | ||
967 | sec_ctx = (struct sadb_x_sec_ctx *) skb_put(skb, | ||
968 | sizeof(struct sadb_x_sec_ctx) + ctx_size); | ||
969 | sec_ctx->sadb_x_sec_len = | ||
970 | (sizeof(struct sadb_x_sec_ctx) + ctx_size) / sizeof(uint64_t); | ||
971 | sec_ctx->sadb_x_sec_exttype = SADB_X_EXT_SEC_CTX; | ||
972 | sec_ctx->sadb_x_ctx_doi = xfrm_ctx->ctx_doi; | ||
973 | sec_ctx->sadb_x_ctx_alg = xfrm_ctx->ctx_alg; | ||
974 | sec_ctx->sadb_x_ctx_len = xfrm_ctx->ctx_len; | ||
975 | memcpy(sec_ctx + 1, xfrm_ctx->ctx_str, | ||
976 | xfrm_ctx->ctx_len); | ||
977 | } | ||
978 | |||
902 | return skb; | 979 | return skb; |
903 | } | 980 | } |
904 | 981 | ||
@@ -909,6 +986,7 @@ static struct xfrm_state * pfkey_msg2xfrm_state(struct sadb_msg *hdr, | |||
909 | struct sadb_lifetime *lifetime; | 986 | struct sadb_lifetime *lifetime; |
910 | struct sadb_sa *sa; | 987 | struct sadb_sa *sa; |
911 | struct sadb_key *key; | 988 | struct sadb_key *key; |
989 | struct sadb_x_sec_ctx *sec_ctx; | ||
912 | uint16_t proto; | 990 | uint16_t proto; |
913 | int err; | 991 | int err; |
914 | 992 | ||
@@ -993,6 +1071,21 @@ static struct xfrm_state * pfkey_msg2xfrm_state(struct sadb_msg *hdr, | |||
993 | x->lft.soft_add_expires_seconds = lifetime->sadb_lifetime_addtime; | 1071 | x->lft.soft_add_expires_seconds = lifetime->sadb_lifetime_addtime; |
994 | x->lft.soft_use_expires_seconds = lifetime->sadb_lifetime_usetime; | 1072 | x->lft.soft_use_expires_seconds = lifetime->sadb_lifetime_usetime; |
995 | } | 1073 | } |
1074 | |||
1075 | sec_ctx = (struct sadb_x_sec_ctx *) ext_hdrs[SADB_X_EXT_SEC_CTX-1]; | ||
1076 | if (sec_ctx != NULL) { | ||
1077 | struct xfrm_user_sec_ctx *uctx = pfkey_sadb2xfrm_user_sec_ctx(sec_ctx); | ||
1078 | |||
1079 | if (!uctx) | ||
1080 | goto out; | ||
1081 | |||
1082 | err = security_xfrm_state_alloc(x, uctx); | ||
1083 | kfree(uctx); | ||
1084 | |||
1085 | if (err) | ||
1086 | goto out; | ||
1087 | } | ||
1088 | |||
996 | key = (struct sadb_key*) ext_hdrs[SADB_EXT_KEY_AUTH-1]; | 1089 | key = (struct sadb_key*) ext_hdrs[SADB_EXT_KEY_AUTH-1]; |
997 | if (sa->sadb_sa_auth) { | 1090 | if (sa->sadb_sa_auth) { |
998 | int keysize = 0; | 1091 | int keysize = 0; |
@@ -1720,6 +1813,18 @@ parse_ipsecrequests(struct xfrm_policy *xp, struct sadb_x_policy *pol) | |||
1720 | return 0; | 1813 | return 0; |
1721 | } | 1814 | } |
1722 | 1815 | ||
1816 | static inline int pfkey_xfrm_policy2sec_ctx_size(struct xfrm_policy *xp) | ||
1817 | { | ||
1818 | struct xfrm_sec_ctx *xfrm_ctx = xp->security; | ||
1819 | |||
1820 | if (xfrm_ctx) { | ||
1821 | int len = sizeof(struct sadb_x_sec_ctx); | ||
1822 | len += xfrm_ctx->ctx_len; | ||
1823 | return PFKEY_ALIGN8(len); | ||
1824 | } | ||
1825 | return 0; | ||
1826 | } | ||
1827 | |||
1723 | static int pfkey_xfrm_policy2msg_size(struct xfrm_policy *xp) | 1828 | static int pfkey_xfrm_policy2msg_size(struct xfrm_policy *xp) |
1724 | { | 1829 | { |
1725 | int sockaddr_size = pfkey_sockaddr_size(xp->family); | 1830 | int sockaddr_size = pfkey_sockaddr_size(xp->family); |
@@ -1733,7 +1838,8 @@ static int pfkey_xfrm_policy2msg_size(struct xfrm_policy *xp) | |||
1733 | (sockaddr_size * 2) + | 1838 | (sockaddr_size * 2) + |
1734 | sizeof(struct sadb_x_policy) + | 1839 | sizeof(struct sadb_x_policy) + |
1735 | (xp->xfrm_nr * (sizeof(struct sadb_x_ipsecrequest) + | 1840 | (xp->xfrm_nr * (sizeof(struct sadb_x_ipsecrequest) + |
1736 | (socklen * 2))); | 1841 | (socklen * 2))) + |
1842 | pfkey_xfrm_policy2sec_ctx_size(xp); | ||
1737 | } | 1843 | } |
1738 | 1844 | ||
1739 | static struct sk_buff * pfkey_xfrm_policy2msg_prep(struct xfrm_policy *xp) | 1845 | static struct sk_buff * pfkey_xfrm_policy2msg_prep(struct xfrm_policy *xp) |
@@ -1757,6 +1863,8 @@ static void pfkey_xfrm_policy2msg(struct sk_buff *skb, struct xfrm_policy *xp, i | |||
1757 | struct sadb_lifetime *lifetime; | 1863 | struct sadb_lifetime *lifetime; |
1758 | struct sadb_x_policy *pol; | 1864 | struct sadb_x_policy *pol; |
1759 | struct sockaddr_in *sin; | 1865 | struct sockaddr_in *sin; |
1866 | struct sadb_x_sec_ctx *sec_ctx; | ||
1867 | struct xfrm_sec_ctx *xfrm_ctx; | ||
1760 | #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) | 1868 | #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) |
1761 | struct sockaddr_in6 *sin6; | 1869 | struct sockaddr_in6 *sin6; |
1762 | #endif | 1870 | #endif |
@@ -1941,6 +2049,21 @@ static void pfkey_xfrm_policy2msg(struct sk_buff *skb, struct xfrm_policy *xp, i | |||
1941 | } | 2049 | } |
1942 | } | 2050 | } |
1943 | } | 2051 | } |
2052 | |||
2053 | /* security context */ | ||
2054 | if ((xfrm_ctx = xp->security)) { | ||
2055 | int ctx_size = pfkey_xfrm_policy2sec_ctx_size(xp); | ||
2056 | |||
2057 | sec_ctx = (struct sadb_x_sec_ctx *) skb_put(skb, ctx_size); | ||
2058 | sec_ctx->sadb_x_sec_len = ctx_size / sizeof(uint64_t); | ||
2059 | sec_ctx->sadb_x_sec_exttype = SADB_X_EXT_SEC_CTX; | ||
2060 | sec_ctx->sadb_x_ctx_doi = xfrm_ctx->ctx_doi; | ||
2061 | sec_ctx->sadb_x_ctx_alg = xfrm_ctx->ctx_alg; | ||
2062 | sec_ctx->sadb_x_ctx_len = xfrm_ctx->ctx_len; | ||
2063 | memcpy(sec_ctx + 1, xfrm_ctx->ctx_str, | ||
2064 | xfrm_ctx->ctx_len); | ||
2065 | } | ||
2066 | |||
1944 | hdr->sadb_msg_len = size / sizeof(uint64_t); | 2067 | hdr->sadb_msg_len = size / sizeof(uint64_t); |
1945 | hdr->sadb_msg_reserved = atomic_read(&xp->refcnt); | 2068 | hdr->sadb_msg_reserved = atomic_read(&xp->refcnt); |
1946 | } | 2069 | } |
@@ -1976,12 +2099,13 @@ out: | |||
1976 | 2099 | ||
1977 | static int pfkey_spdadd(struct sock *sk, struct sk_buff *skb, struct sadb_msg *hdr, void **ext_hdrs) | 2100 | static int pfkey_spdadd(struct sock *sk, struct sk_buff *skb, struct sadb_msg *hdr, void **ext_hdrs) |
1978 | { | 2101 | { |
1979 | int err; | 2102 | int err = 0; |
1980 | struct sadb_lifetime *lifetime; | 2103 | struct sadb_lifetime *lifetime; |
1981 | struct sadb_address *sa; | 2104 | struct sadb_address *sa; |
1982 | struct sadb_x_policy *pol; | 2105 | struct sadb_x_policy *pol; |
1983 | struct xfrm_policy *xp; | 2106 | struct xfrm_policy *xp; |
1984 | struct km_event c; | 2107 | struct km_event c; |
2108 | struct sadb_x_sec_ctx *sec_ctx; | ||
1985 | 2109 | ||
1986 | if (!present_and_same_family(ext_hdrs[SADB_EXT_ADDRESS_SRC-1], | 2110 | if (!present_and_same_family(ext_hdrs[SADB_EXT_ADDRESS_SRC-1], |
1987 | ext_hdrs[SADB_EXT_ADDRESS_DST-1]) || | 2111 | ext_hdrs[SADB_EXT_ADDRESS_DST-1]) || |
@@ -2028,6 +2152,22 @@ static int pfkey_spdadd(struct sock *sk, struct sk_buff *skb, struct sadb_msg *h | |||
2028 | if (xp->selector.dport) | 2152 | if (xp->selector.dport) |
2029 | xp->selector.dport_mask = ~0; | 2153 | xp->selector.dport_mask = ~0; |
2030 | 2154 | ||
2155 | sec_ctx = (struct sadb_x_sec_ctx *) ext_hdrs[SADB_X_EXT_SEC_CTX-1]; | ||
2156 | if (sec_ctx != NULL) { | ||
2157 | struct xfrm_user_sec_ctx *uctx = pfkey_sadb2xfrm_user_sec_ctx(sec_ctx); | ||
2158 | |||
2159 | if (!uctx) { | ||
2160 | err = -ENOBUFS; | ||
2161 | goto out; | ||
2162 | } | ||
2163 | |||
2164 | err = security_xfrm_policy_alloc(xp, uctx); | ||
2165 | kfree(uctx); | ||
2166 | |||
2167 | if (err) | ||
2168 | goto out; | ||
2169 | } | ||
2170 | |||
2031 | xp->lft.soft_byte_limit = XFRM_INF; | 2171 | xp->lft.soft_byte_limit = XFRM_INF; |
2032 | xp->lft.hard_byte_limit = XFRM_INF; | 2172 | xp->lft.hard_byte_limit = XFRM_INF; |
2033 | xp->lft.soft_packet_limit = XFRM_INF; | 2173 | xp->lft.soft_packet_limit = XFRM_INF; |
@@ -2051,10 +2191,9 @@ static int pfkey_spdadd(struct sock *sk, struct sk_buff *skb, struct sadb_msg *h | |||
2051 | 2191 | ||
2052 | err = xfrm_policy_insert(pol->sadb_x_policy_dir-1, xp, | 2192 | err = xfrm_policy_insert(pol->sadb_x_policy_dir-1, xp, |
2053 | hdr->sadb_msg_type != SADB_X_SPDUPDATE); | 2193 | hdr->sadb_msg_type != SADB_X_SPDUPDATE); |
2054 | if (err) { | 2194 | |
2055 | kfree(xp); | 2195 | if (err) |
2056 | return err; | 2196 | goto out; |
2057 | } | ||
2058 | 2197 | ||
2059 | if (hdr->sadb_msg_type == SADB_X_SPDUPDATE) | 2198 | if (hdr->sadb_msg_type == SADB_X_SPDUPDATE) |
2060 | c.event = XFRM_MSG_UPDPOLICY; | 2199 | c.event = XFRM_MSG_UPDPOLICY; |
@@ -2069,6 +2208,7 @@ static int pfkey_spdadd(struct sock *sk, struct sk_buff *skb, struct sadb_msg *h | |||
2069 | return 0; | 2208 | return 0; |
2070 | 2209 | ||
2071 | out: | 2210 | out: |
2211 | security_xfrm_policy_free(xp); | ||
2072 | kfree(xp); | 2212 | kfree(xp); |
2073 | return err; | 2213 | return err; |
2074 | } | 2214 | } |
@@ -2078,9 +2218,10 @@ static int pfkey_spddelete(struct sock *sk, struct sk_buff *skb, struct sadb_msg | |||
2078 | int err; | 2218 | int err; |
2079 | struct sadb_address *sa; | 2219 | struct sadb_address *sa; |
2080 | struct sadb_x_policy *pol; | 2220 | struct sadb_x_policy *pol; |
2081 | struct xfrm_policy *xp; | 2221 | struct xfrm_policy *xp, tmp; |
2082 | struct xfrm_selector sel; | 2222 | struct xfrm_selector sel; |
2083 | struct km_event c; | 2223 | struct km_event c; |
2224 | struct sadb_x_sec_ctx *sec_ctx; | ||
2084 | 2225 | ||
2085 | if (!present_and_same_family(ext_hdrs[SADB_EXT_ADDRESS_SRC-1], | 2226 | if (!present_and_same_family(ext_hdrs[SADB_EXT_ADDRESS_SRC-1], |
2086 | ext_hdrs[SADB_EXT_ADDRESS_DST-1]) || | 2227 | ext_hdrs[SADB_EXT_ADDRESS_DST-1]) || |
@@ -2109,7 +2250,24 @@ static int pfkey_spddelete(struct sock *sk, struct sk_buff *skb, struct sadb_msg | |||
2109 | if (sel.dport) | 2250 | if (sel.dport) |
2110 | sel.dport_mask = ~0; | 2251 | sel.dport_mask = ~0; |
2111 | 2252 | ||
2112 | xp = xfrm_policy_bysel(pol->sadb_x_policy_dir-1, &sel, 1); | 2253 | sec_ctx = (struct sadb_x_sec_ctx *) ext_hdrs[SADB_X_EXT_SEC_CTX-1]; |
2254 | memset(&tmp, 0, sizeof(struct xfrm_policy)); | ||
2255 | |||
2256 | if (sec_ctx != NULL) { | ||
2257 | struct xfrm_user_sec_ctx *uctx = pfkey_sadb2xfrm_user_sec_ctx(sec_ctx); | ||
2258 | |||
2259 | if (!uctx) | ||
2260 | return -ENOMEM; | ||
2261 | |||
2262 | err = security_xfrm_policy_alloc(&tmp, uctx); | ||
2263 | kfree(uctx); | ||
2264 | |||
2265 | if (err) | ||
2266 | return err; | ||
2267 | } | ||
2268 | |||
2269 | xp = xfrm_policy_bysel_ctx(pol->sadb_x_policy_dir-1, &sel, tmp.security, 1); | ||
2270 | security_xfrm_policy_free(&tmp); | ||
2113 | if (xp == NULL) | 2271 | if (xp == NULL) |
2114 | return -ENOENT; | 2272 | return -ENOENT; |
2115 | 2273 | ||
@@ -2660,6 +2818,7 @@ static struct xfrm_policy *pfkey_compile_policy(u16 family, int opt, | |||
2660 | { | 2818 | { |
2661 | struct xfrm_policy *xp; | 2819 | struct xfrm_policy *xp; |
2662 | struct sadb_x_policy *pol = (struct sadb_x_policy*)data; | 2820 | struct sadb_x_policy *pol = (struct sadb_x_policy*)data; |
2821 | struct sadb_x_sec_ctx *sec_ctx; | ||
2663 | 2822 | ||
2664 | switch (family) { | 2823 | switch (family) { |
2665 | case AF_INET: | 2824 | case AF_INET: |
@@ -2709,10 +2868,32 @@ static struct xfrm_policy *pfkey_compile_policy(u16 family, int opt, | |||
2709 | (*dir = parse_ipsecrequests(xp, pol)) < 0) | 2868 | (*dir = parse_ipsecrequests(xp, pol)) < 0) |
2710 | goto out; | 2869 | goto out; |
2711 | 2870 | ||
2871 | /* security context too */ | ||
2872 | if (len >= (pol->sadb_x_policy_len*8 + | ||
2873 | sizeof(struct sadb_x_sec_ctx))) { | ||
2874 | char *p = (char *)pol; | ||
2875 | struct xfrm_user_sec_ctx *uctx; | ||
2876 | |||
2877 | p += pol->sadb_x_policy_len*8; | ||
2878 | sec_ctx = (struct sadb_x_sec_ctx *)p; | ||
2879 | if (len < pol->sadb_x_policy_len*8 + | ||
2880 | sec_ctx->sadb_x_sec_len) | ||
2881 | goto out; | ||
2882 | if ((*dir = verify_sec_ctx_len(p))) | ||
2883 | goto out; | ||
2884 | uctx = pfkey_sadb2xfrm_user_sec_ctx(sec_ctx); | ||
2885 | *dir = security_xfrm_policy_alloc(xp, uctx); | ||
2886 | kfree(uctx); | ||
2887 | |||
2888 | if (*dir) | ||
2889 | goto out; | ||
2890 | } | ||
2891 | |||
2712 | *dir = pol->sadb_x_policy_dir-1; | 2892 | *dir = pol->sadb_x_policy_dir-1; |
2713 | return xp; | 2893 | return xp; |
2714 | 2894 | ||
2715 | out: | 2895 | out: |
2896 | security_xfrm_policy_free(xp); | ||
2716 | kfree(xp); | 2897 | kfree(xp); |
2717 | return NULL; | 2898 | return NULL; |
2718 | } | 2899 | } |
@@ -2946,7 +3127,7 @@ out: | |||
2946 | return err; | 3127 | return err; |
2947 | } | 3128 | } |
2948 | 3129 | ||
2949 | static struct proto_ops pfkey_ops = { | 3130 | static const struct proto_ops pfkey_ops = { |
2950 | .family = PF_KEY, | 3131 | .family = PF_KEY, |
2951 | .owner = THIS_MODULE, | 3132 | .owner = THIS_MODULE, |
2952 | /* Operations that make no sense on pfkey sockets. */ | 3133 | /* Operations that make no sense on pfkey sockets. */ |
diff --git a/net/llc/af_llc.c b/net/llc/af_llc.c index c3f0b0783453..8171c53bc0ed 100644 --- a/net/llc/af_llc.c +++ b/net/llc/af_llc.c | |||
@@ -36,7 +36,7 @@ | |||
36 | static u16 llc_ui_sap_last_autoport = LLC_SAP_DYN_START; | 36 | static u16 llc_ui_sap_last_autoport = LLC_SAP_DYN_START; |
37 | static u16 llc_ui_sap_link_no_max[256]; | 37 | static u16 llc_ui_sap_link_no_max[256]; |
38 | static struct sockaddr_llc llc_ui_addrnull; | 38 | static struct sockaddr_llc llc_ui_addrnull; |
39 | static struct proto_ops llc_ui_ops; | 39 | static const struct proto_ops llc_ui_ops; |
40 | 40 | ||
41 | static int llc_ui_wait_for_conn(struct sock *sk, long timeout); | 41 | static int llc_ui_wait_for_conn(struct sock *sk, long timeout); |
42 | static int llc_ui_wait_for_disc(struct sock *sk, long timeout); | 42 | static int llc_ui_wait_for_disc(struct sock *sk, long timeout); |
@@ -566,10 +566,9 @@ static int llc_wait_data(struct sock *sk, long timeo) | |||
566 | /* | 566 | /* |
567 | * POSIX 1003.1g mandates this order. | 567 | * POSIX 1003.1g mandates this order. |
568 | */ | 568 | */ |
569 | if (sk->sk_err) { | 569 | rc = sock_error(sk); |
570 | rc = sock_error(sk); | 570 | if (rc) |
571 | break; | 571 | break; |
572 | } | ||
573 | rc = 0; | 572 | rc = 0; |
574 | if (sk->sk_shutdown & RCV_SHUTDOWN) | 573 | if (sk->sk_shutdown & RCV_SHUTDOWN) |
575 | break; | 574 | break; |
@@ -960,7 +959,7 @@ out: | |||
960 | static int llc_ui_ioctl(struct socket *sock, unsigned int cmd, | 959 | static int llc_ui_ioctl(struct socket *sock, unsigned int cmd, |
961 | unsigned long arg) | 960 | unsigned long arg) |
962 | { | 961 | { |
963 | return dev_ioctl(cmd, (void __user *)arg); | 962 | return -ENOIOCTLCMD; |
964 | } | 963 | } |
965 | 964 | ||
966 | /** | 965 | /** |
@@ -1099,7 +1098,7 @@ static struct net_proto_family llc_ui_family_ops = { | |||
1099 | .owner = THIS_MODULE, | 1098 | .owner = THIS_MODULE, |
1100 | }; | 1099 | }; |
1101 | 1100 | ||
1102 | static struct proto_ops llc_ui_ops = { | 1101 | static const struct proto_ops llc_ui_ops = { |
1103 | .family = PF_LLC, | 1102 | .family = PF_LLC, |
1104 | .owner = THIS_MODULE, | 1103 | .owner = THIS_MODULE, |
1105 | .release = llc_ui_release, | 1104 | .release = llc_ui_release, |
diff --git a/net/netfilter/nfnetlink_log.c b/net/netfilter/nfnetlink_log.c index cba63729313d..e10512e229b6 100644 --- a/net/netfilter/nfnetlink_log.c +++ b/net/netfilter/nfnetlink_log.c | |||
@@ -151,7 +151,7 @@ instance_create(u_int16_t group_num, int pid) | |||
151 | goto out_unlock; | 151 | goto out_unlock; |
152 | 152 | ||
153 | INIT_HLIST_NODE(&inst->hlist); | 153 | INIT_HLIST_NODE(&inst->hlist); |
154 | inst->lock = SPIN_LOCK_UNLOCKED; | 154 | spin_lock_init(&inst->lock); |
155 | /* needs to be two, since we _put() after creation */ | 155 | /* needs to be two, since we _put() after creation */ |
156 | atomic_set(&inst->use, 2); | 156 | atomic_set(&inst->use, 2); |
157 | 157 | ||
diff --git a/net/netfilter/nfnetlink_queue.c b/net/netfilter/nfnetlink_queue.c index f28460b61e47..55afdda3d940 100644 --- a/net/netfilter/nfnetlink_queue.c +++ b/net/netfilter/nfnetlink_queue.c | |||
@@ -148,7 +148,7 @@ instance_create(u_int16_t queue_num, int pid) | |||
148 | atomic_set(&inst->id_sequence, 0); | 148 | atomic_set(&inst->id_sequence, 0); |
149 | /* needs to be two, since we _put() after creation */ | 149 | /* needs to be two, since we _put() after creation */ |
150 | atomic_set(&inst->use, 2); | 150 | atomic_set(&inst->use, 2); |
151 | inst->lock = SPIN_LOCK_UNLOCKED; | 151 | spin_lock_init(&inst->lock); |
152 | INIT_LIST_HEAD(&inst->queue_list); | 152 | INIT_LIST_HEAD(&inst->queue_list); |
153 | 153 | ||
154 | if (!try_module_get(THIS_MODULE)) | 154 | if (!try_module_get(THIS_MODULE)) |
diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c index 96020d7087e8..7849cac14d3a 100644 --- a/net/netlink/af_netlink.c +++ b/net/netlink/af_netlink.c | |||
@@ -293,7 +293,7 @@ static inline int nl_pid_hash_dilute(struct nl_pid_hash *hash, int len) | |||
293 | return 0; | 293 | return 0; |
294 | } | 294 | } |
295 | 295 | ||
296 | static struct proto_ops netlink_ops; | 296 | static const struct proto_ops netlink_ops; |
297 | 297 | ||
298 | static int netlink_insert(struct sock *sk, u32 pid) | 298 | static int netlink_insert(struct sock *sk, u32 pid) |
299 | { | 299 | { |
@@ -1656,7 +1656,7 @@ int netlink_unregister_notifier(struct notifier_block *nb) | |||
1656 | return notifier_chain_unregister(&netlink_chain, nb); | 1656 | return notifier_chain_unregister(&netlink_chain, nb); |
1657 | } | 1657 | } |
1658 | 1658 | ||
1659 | static struct proto_ops netlink_ops = { | 1659 | static const struct proto_ops netlink_ops = { |
1660 | .family = PF_NETLINK, | 1660 | .family = PF_NETLINK, |
1661 | .owner = THIS_MODULE, | 1661 | .owner = THIS_MODULE, |
1662 | .release = netlink_release, | 1662 | .release = netlink_release, |
diff --git a/net/netlink/genetlink.c b/net/netlink/genetlink.c index 287cfcc56951..3b1378498d50 100644 --- a/net/netlink/genetlink.c +++ b/net/netlink/genetlink.c | |||
@@ -441,7 +441,7 @@ errout: | |||
441 | } | 441 | } |
442 | 442 | ||
443 | static struct sk_buff *ctrl_build_msg(struct genl_family *family, u32 pid, | 443 | static struct sk_buff *ctrl_build_msg(struct genl_family *family, u32 pid, |
444 | int seq, int cmd) | 444 | int seq, u8 cmd) |
445 | { | 445 | { |
446 | struct sk_buff *skb; | 446 | struct sk_buff *skb; |
447 | int err; | 447 | int err; |
diff --git a/net/netrom/af_netrom.c b/net/netrom/af_netrom.c index e5d82d711cae..63b0e4afeb33 100644 --- a/net/netrom/af_netrom.c +++ b/net/netrom/af_netrom.c | |||
@@ -63,7 +63,7 @@ static unsigned short circuit = 0x101; | |||
63 | static HLIST_HEAD(nr_list); | 63 | static HLIST_HEAD(nr_list); |
64 | static DEFINE_SPINLOCK(nr_list_lock); | 64 | static DEFINE_SPINLOCK(nr_list_lock); |
65 | 65 | ||
66 | static struct proto_ops nr_proto_ops; | 66 | static const struct proto_ops nr_proto_ops; |
67 | 67 | ||
68 | /* | 68 | /* |
69 | * Socket removal during an interrupt is now safe. | 69 | * Socket removal during an interrupt is now safe. |
@@ -1166,10 +1166,11 @@ static int nr_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) | |||
1166 | void __user *argp = (void __user *)arg; | 1166 | void __user *argp = (void __user *)arg; |
1167 | int ret; | 1167 | int ret; |
1168 | 1168 | ||
1169 | lock_sock(sk); | ||
1170 | switch (cmd) { | 1169 | switch (cmd) { |
1171 | case TIOCOUTQ: { | 1170 | case TIOCOUTQ: { |
1172 | long amount; | 1171 | long amount; |
1172 | |||
1173 | lock_sock(sk); | ||
1173 | amount = sk->sk_sndbuf - atomic_read(&sk->sk_wmem_alloc); | 1174 | amount = sk->sk_sndbuf - atomic_read(&sk->sk_wmem_alloc); |
1174 | if (amount < 0) | 1175 | if (amount < 0) |
1175 | amount = 0; | 1176 | amount = 0; |
@@ -1180,6 +1181,8 @@ static int nr_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) | |||
1180 | case TIOCINQ: { | 1181 | case TIOCINQ: { |
1181 | struct sk_buff *skb; | 1182 | struct sk_buff *skb; |
1182 | long amount = 0L; | 1183 | long amount = 0L; |
1184 | |||
1185 | lock_sock(sk); | ||
1183 | /* These two are safe on a single CPU system as only user tasks fiddle here */ | 1186 | /* These two are safe on a single CPU system as only user tasks fiddle here */ |
1184 | if ((skb = skb_peek(&sk->sk_receive_queue)) != NULL) | 1187 | if ((skb = skb_peek(&sk->sk_receive_queue)) != NULL) |
1185 | amount = skb->len; | 1188 | amount = skb->len; |
@@ -1188,6 +1191,7 @@ static int nr_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) | |||
1188 | } | 1191 | } |
1189 | 1192 | ||
1190 | case SIOCGSTAMP: | 1193 | case SIOCGSTAMP: |
1194 | lock_sock(sk); | ||
1191 | ret = sock_get_timestamp(sk, argp); | 1195 | ret = sock_get_timestamp(sk, argp); |
1192 | release_sock(sk); | 1196 | release_sock(sk); |
1193 | return ret; | 1197 | return ret; |
@@ -1202,21 +1206,17 @@ static int nr_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) | |||
1202 | case SIOCSIFNETMASK: | 1206 | case SIOCSIFNETMASK: |
1203 | case SIOCGIFMETRIC: | 1207 | case SIOCGIFMETRIC: |
1204 | case SIOCSIFMETRIC: | 1208 | case SIOCSIFMETRIC: |
1205 | release_sock(sk); | ||
1206 | return -EINVAL; | 1209 | return -EINVAL; |
1207 | 1210 | ||
1208 | case SIOCADDRT: | 1211 | case SIOCADDRT: |
1209 | case SIOCDELRT: | 1212 | case SIOCDELRT: |
1210 | case SIOCNRDECOBS: | 1213 | case SIOCNRDECOBS: |
1211 | release_sock(sk); | ||
1212 | if (!capable(CAP_NET_ADMIN)) return -EPERM; | 1214 | if (!capable(CAP_NET_ADMIN)) return -EPERM; |
1213 | return nr_rt_ioctl(cmd, argp); | 1215 | return nr_rt_ioctl(cmd, argp); |
1214 | 1216 | ||
1215 | default: | 1217 | default: |
1216 | release_sock(sk); | 1218 | return -ENOIOCTLCMD; |
1217 | return dev_ioctl(cmd, argp); | ||
1218 | } | 1219 | } |
1219 | release_sock(sk); | ||
1220 | 1220 | ||
1221 | return 0; | 1221 | return 0; |
1222 | } | 1222 | } |
@@ -1337,7 +1337,7 @@ static struct net_proto_family nr_family_ops = { | |||
1337 | .owner = THIS_MODULE, | 1337 | .owner = THIS_MODULE, |
1338 | }; | 1338 | }; |
1339 | 1339 | ||
1340 | static struct proto_ops nr_proto_ops = { | 1340 | static const struct proto_ops nr_proto_ops = { |
1341 | .family = PF_NETROM, | 1341 | .family = PF_NETROM, |
1342 | .owner = THIS_MODULE, | 1342 | .owner = THIS_MODULE, |
1343 | .release = nr_release, | 1343 | .release = nr_release, |
diff --git a/net/nonet.c b/net/nonet.c index e5241dceaa57..1230f0ae832e 100644 --- a/net/nonet.c +++ b/net/nonet.c | |||
@@ -14,11 +14,6 @@ | |||
14 | #include <linux/init.h> | 14 | #include <linux/init.h> |
15 | #include <linux/kernel.h> | 15 | #include <linux/kernel.h> |
16 | 16 | ||
17 | void __init sock_init(void) | ||
18 | { | ||
19 | printk(KERN_INFO "Linux NoNET1.0 for Linux 2.6\n"); | ||
20 | } | ||
21 | |||
22 | static int sock_no_open(struct inode *irrelevant, struct file *dontcare) | 17 | static int sock_no_open(struct inode *irrelevant, struct file *dontcare) |
23 | { | 18 | { |
24 | return -ENXIO; | 19 | return -ENXIO; |
diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index 3e2462760413..f69e5ed9bd06 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c | |||
@@ -251,10 +251,10 @@ static void packet_sock_destruct(struct sock *sk) | |||
251 | } | 251 | } |
252 | 252 | ||
253 | 253 | ||
254 | static struct proto_ops packet_ops; | 254 | static const struct proto_ops packet_ops; |
255 | 255 | ||
256 | #ifdef CONFIG_SOCK_PACKET | 256 | #ifdef CONFIG_SOCK_PACKET |
257 | static struct proto_ops packet_ops_spkt; | 257 | static const struct proto_ops packet_ops_spkt; |
258 | 258 | ||
259 | static int packet_rcv_spkt(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt, struct net_device *orig_dev) | 259 | static int packet_rcv_spkt(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt, struct net_device *orig_dev) |
260 | { | 260 | { |
@@ -1521,7 +1521,7 @@ static int packet_ioctl(struct socket *sock, unsigned int cmd, | |||
1521 | #endif | 1521 | #endif |
1522 | 1522 | ||
1523 | default: | 1523 | default: |
1524 | return dev_ioctl(cmd, (void __user *)arg); | 1524 | return -ENOIOCTLCMD; |
1525 | } | 1525 | } |
1526 | return 0; | 1526 | return 0; |
1527 | } | 1527 | } |
@@ -1784,7 +1784,7 @@ out: | |||
1784 | 1784 | ||
1785 | 1785 | ||
1786 | #ifdef CONFIG_SOCK_PACKET | 1786 | #ifdef CONFIG_SOCK_PACKET |
1787 | static struct proto_ops packet_ops_spkt = { | 1787 | static const struct proto_ops packet_ops_spkt = { |
1788 | .family = PF_PACKET, | 1788 | .family = PF_PACKET, |
1789 | .owner = THIS_MODULE, | 1789 | .owner = THIS_MODULE, |
1790 | .release = packet_release, | 1790 | .release = packet_release, |
@@ -1806,7 +1806,7 @@ static struct proto_ops packet_ops_spkt = { | |||
1806 | }; | 1806 | }; |
1807 | #endif | 1807 | #endif |
1808 | 1808 | ||
1809 | static struct proto_ops packet_ops = { | 1809 | static const struct proto_ops packet_ops = { |
1810 | .family = PF_PACKET, | 1810 | .family = PF_PACKET, |
1811 | .owner = THIS_MODULE, | 1811 | .owner = THIS_MODULE, |
1812 | .release = packet_release, | 1812 | .release = packet_release, |
diff --git a/net/rose/af_rose.c b/net/rose/af_rose.c index 829fdbc4400b..63090be2315a 100644 --- a/net/rose/af_rose.c +++ b/net/rose/af_rose.c | |||
@@ -1320,7 +1320,7 @@ static int rose_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) | |||
1320 | return 0; | 1320 | return 0; |
1321 | 1321 | ||
1322 | default: | 1322 | default: |
1323 | return dev_ioctl(cmd, argp); | 1323 | return -ENOIOCTLCMD; |
1324 | } | 1324 | } |
1325 | 1325 | ||
1326 | return 0; | 1326 | return 0; |
diff --git a/net/sched/sch_netem.c b/net/sched/sch_netem.c index 82fb07aa06a5..ba5283204837 100644 --- a/net/sched/sch_netem.c +++ b/net/sched/sch_netem.c | |||
@@ -25,7 +25,7 @@ | |||
25 | 25 | ||
26 | #include <net/pkt_sched.h> | 26 | #include <net/pkt_sched.h> |
27 | 27 | ||
28 | #define VERSION "1.1" | 28 | #define VERSION "1.2" |
29 | 29 | ||
30 | /* Network Emulation Queuing algorithm. | 30 | /* Network Emulation Queuing algorithm. |
31 | ==================================== | 31 | ==================================== |
@@ -65,11 +65,12 @@ struct netem_sched_data { | |||
65 | u32 jitter; | 65 | u32 jitter; |
66 | u32 duplicate; | 66 | u32 duplicate; |
67 | u32 reorder; | 67 | u32 reorder; |
68 | u32 corrupt; | ||
68 | 69 | ||
69 | struct crndstate { | 70 | struct crndstate { |
70 | unsigned long last; | 71 | unsigned long last; |
71 | unsigned long rho; | 72 | unsigned long rho; |
72 | } delay_cor, loss_cor, dup_cor, reorder_cor; | 73 | } delay_cor, loss_cor, dup_cor, reorder_cor, corrupt_cor; |
73 | 74 | ||
74 | struct disttable { | 75 | struct disttable { |
75 | u32 size; | 76 | u32 size; |
@@ -183,6 +184,23 @@ static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch) | |||
183 | q->duplicate = dupsave; | 184 | q->duplicate = dupsave; |
184 | } | 185 | } |
185 | 186 | ||
187 | /* | ||
188 | * Randomized packet corruption. | ||
189 | * Make copy if needed since we are modifying | ||
190 | * If packet is going to be hardware checksummed, then | ||
191 | * do it now in software before we mangle it. | ||
192 | */ | ||
193 | if (q->corrupt && q->corrupt >= get_crandom(&q->corrupt_cor)) { | ||
194 | if (!(skb = skb_unshare(skb, GFP_ATOMIC)) | ||
195 | || (skb->ip_summed == CHECKSUM_HW | ||
196 | && skb_checksum_help(skb, 0))) { | ||
197 | sch->qstats.drops++; | ||
198 | return NET_XMIT_DROP; | ||
199 | } | ||
200 | |||
201 | skb->data[net_random() % skb_headlen(skb)] ^= 1<<(net_random() % 8); | ||
202 | } | ||
203 | |||
186 | if (q->gap == 0 /* not doing reordering */ | 204 | if (q->gap == 0 /* not doing reordering */ |
187 | || q->counter < q->gap /* inside last reordering gap */ | 205 | || q->counter < q->gap /* inside last reordering gap */ |
188 | || q->reorder < get_crandom(&q->reorder_cor)) { | 206 | || q->reorder < get_crandom(&q->reorder_cor)) { |
@@ -382,6 +400,20 @@ static int get_reorder(struct Qdisc *sch, const struct rtattr *attr) | |||
382 | return 0; | 400 | return 0; |
383 | } | 401 | } |
384 | 402 | ||
403 | static int get_corrupt(struct Qdisc *sch, const struct rtattr *attr) | ||
404 | { | ||
405 | struct netem_sched_data *q = qdisc_priv(sch); | ||
406 | const struct tc_netem_corrupt *r = RTA_DATA(attr); | ||
407 | |||
408 | if (RTA_PAYLOAD(attr) != sizeof(*r)) | ||
409 | return -EINVAL; | ||
410 | |||
411 | q->corrupt = r->probability; | ||
412 | init_crandom(&q->corrupt_cor, r->correlation); | ||
413 | return 0; | ||
414 | } | ||
415 | |||
416 | /* Parse netlink message to set options */ | ||
385 | static int netem_change(struct Qdisc *sch, struct rtattr *opt) | 417 | static int netem_change(struct Qdisc *sch, struct rtattr *opt) |
386 | { | 418 | { |
387 | struct netem_sched_data *q = qdisc_priv(sch); | 419 | struct netem_sched_data *q = qdisc_priv(sch); |
@@ -432,13 +464,19 @@ static int netem_change(struct Qdisc *sch, struct rtattr *opt) | |||
432 | if (ret) | 464 | if (ret) |
433 | return ret; | 465 | return ret; |
434 | } | 466 | } |
467 | |||
435 | if (tb[TCA_NETEM_REORDER-1]) { | 468 | if (tb[TCA_NETEM_REORDER-1]) { |
436 | ret = get_reorder(sch, tb[TCA_NETEM_REORDER-1]); | 469 | ret = get_reorder(sch, tb[TCA_NETEM_REORDER-1]); |
437 | if (ret) | 470 | if (ret) |
438 | return ret; | 471 | return ret; |
439 | } | 472 | } |
440 | } | ||
441 | 473 | ||
474 | if (tb[TCA_NETEM_CORRUPT-1]) { | ||
475 | ret = get_corrupt(sch, tb[TCA_NETEM_CORRUPT-1]); | ||
476 | if (ret) | ||
477 | return ret; | ||
478 | } | ||
479 | } | ||
442 | 480 | ||
443 | return 0; | 481 | return 0; |
444 | } | 482 | } |
@@ -564,6 +602,7 @@ static int netem_dump(struct Qdisc *sch, struct sk_buff *skb) | |||
564 | struct tc_netem_qopt qopt; | 602 | struct tc_netem_qopt qopt; |
565 | struct tc_netem_corr cor; | 603 | struct tc_netem_corr cor; |
566 | struct tc_netem_reorder reorder; | 604 | struct tc_netem_reorder reorder; |
605 | struct tc_netem_corrupt corrupt; | ||
567 | 606 | ||
568 | qopt.latency = q->latency; | 607 | qopt.latency = q->latency; |
569 | qopt.jitter = q->jitter; | 608 | qopt.jitter = q->jitter; |
@@ -582,6 +621,10 @@ static int netem_dump(struct Qdisc *sch, struct sk_buff *skb) | |||
582 | reorder.correlation = q->reorder_cor.rho; | 621 | reorder.correlation = q->reorder_cor.rho; |
583 | RTA_PUT(skb, TCA_NETEM_REORDER, sizeof(reorder), &reorder); | 622 | RTA_PUT(skb, TCA_NETEM_REORDER, sizeof(reorder), &reorder); |
584 | 623 | ||
624 | corrupt.probability = q->corrupt; | ||
625 | corrupt.correlation = q->corrupt_cor.rho; | ||
626 | RTA_PUT(skb, TCA_NETEM_CORRUPT, sizeof(corrupt), &corrupt); | ||
627 | |||
585 | rta->rta_len = skb->tail - b; | 628 | rta->rta_len = skb->tail - b; |
586 | 629 | ||
587 | return skb->len; | 630 | return skb->len; |
diff --git a/net/sched/sch_teql.c b/net/sched/sch_teql.c index 6cf0342706b5..c4a2a8c4c339 100644 --- a/net/sched/sch_teql.c +++ b/net/sched/sch_teql.c | |||
@@ -22,6 +22,7 @@ | |||
22 | #include <linux/in.h> | 22 | #include <linux/in.h> |
23 | #include <linux/errno.h> | 23 | #include <linux/errno.h> |
24 | #include <linux/interrupt.h> | 24 | #include <linux/interrupt.h> |
25 | #include <linux/if_arp.h> | ||
25 | #include <linux/if_ether.h> | 26 | #include <linux/if_ether.h> |
26 | #include <linux/inet.h> | 27 | #include <linux/inet.h> |
27 | #include <linux/netdevice.h> | 28 | #include <linux/netdevice.h> |
diff --git a/net/sctp/associola.c b/net/sctp/associola.c index dec68a604773..9d05e13e92f6 100644 --- a/net/sctp/associola.c +++ b/net/sctp/associola.c | |||
@@ -110,7 +110,6 @@ static struct sctp_association *sctp_association_init(struct sctp_association *a | |||
110 | asoc->cookie_life.tv_sec = sp->assocparams.sasoc_cookie_life / 1000; | 110 | asoc->cookie_life.tv_sec = sp->assocparams.sasoc_cookie_life / 1000; |
111 | asoc->cookie_life.tv_usec = (sp->assocparams.sasoc_cookie_life % 1000) | 111 | asoc->cookie_life.tv_usec = (sp->assocparams.sasoc_cookie_life % 1000) |
112 | * 1000; | 112 | * 1000; |
113 | asoc->pmtu = 0; | ||
114 | asoc->frag_point = 0; | 113 | asoc->frag_point = 0; |
115 | 114 | ||
116 | /* Set the association max_retrans and RTO values from the | 115 | /* Set the association max_retrans and RTO values from the |
@@ -123,6 +122,25 @@ static struct sctp_association *sctp_association_init(struct sctp_association *a | |||
123 | 122 | ||
124 | asoc->overall_error_count = 0; | 123 | asoc->overall_error_count = 0; |
125 | 124 | ||
125 | /* Initialize the association's heartbeat interval based on the | ||
126 | * sock configured value. | ||
127 | */ | ||
128 | asoc->hbinterval = msecs_to_jiffies(sp->hbinterval); | ||
129 | |||
130 | /* Initialize path max retrans value. */ | ||
131 | asoc->pathmaxrxt = sp->pathmaxrxt; | ||
132 | |||
133 | /* Initialize default path MTU. */ | ||
134 | asoc->pathmtu = sp->pathmtu; | ||
135 | |||
136 | /* Set association default SACK delay */ | ||
137 | asoc->sackdelay = msecs_to_jiffies(sp->sackdelay); | ||
138 | |||
139 | /* Set the association default flags controlling | ||
140 | * Heartbeat, SACK delay, and Path MTU Discovery. | ||
141 | */ | ||
142 | asoc->param_flags = sp->param_flags; | ||
143 | |||
126 | /* Initialize the maximum mumber of new data packets that can be sent | 144 | /* Initialize the maximum mumber of new data packets that can be sent |
127 | * in a burst. | 145 | * in a burst. |
128 | */ | 146 | */ |
@@ -144,8 +162,7 @@ static struct sctp_association *sctp_association_init(struct sctp_association *a | |||
144 | = 5 * asoc->rto_max; | 162 | = 5 * asoc->rto_max; |
145 | 163 | ||
146 | asoc->timeouts[SCTP_EVENT_TIMEOUT_HEARTBEAT] = 0; | 164 | asoc->timeouts[SCTP_EVENT_TIMEOUT_HEARTBEAT] = 0; |
147 | asoc->timeouts[SCTP_EVENT_TIMEOUT_SACK] = | 165 | asoc->timeouts[SCTP_EVENT_TIMEOUT_SACK] = asoc->sackdelay; |
148 | SCTP_DEFAULT_TIMEOUT_SACK; | ||
149 | asoc->timeouts[SCTP_EVENT_TIMEOUT_AUTOCLOSE] = | 166 | asoc->timeouts[SCTP_EVENT_TIMEOUT_AUTOCLOSE] = |
150 | sp->autoclose * HZ; | 167 | sp->autoclose * HZ; |
151 | 168 | ||
@@ -540,23 +557,46 @@ struct sctp_transport *sctp_assoc_add_peer(struct sctp_association *asoc, | |||
540 | 557 | ||
541 | sctp_transport_set_owner(peer, asoc); | 558 | sctp_transport_set_owner(peer, asoc); |
542 | 559 | ||
560 | /* Initialize the peer's heartbeat interval based on the | ||
561 | * association configured value. | ||
562 | */ | ||
563 | peer->hbinterval = asoc->hbinterval; | ||
564 | |||
565 | /* Set the path max_retrans. */ | ||
566 | peer->pathmaxrxt = asoc->pathmaxrxt; | ||
567 | |||
568 | /* Initialize the peer's SACK delay timeout based on the | ||
569 | * association configured value. | ||
570 | */ | ||
571 | peer->sackdelay = asoc->sackdelay; | ||
572 | |||
573 | /* Enable/disable heartbeat, SACK delay, and path MTU discovery | ||
574 | * based on association setting. | ||
575 | */ | ||
576 | peer->param_flags = asoc->param_flags; | ||
577 | |||
543 | /* Initialize the pmtu of the transport. */ | 578 | /* Initialize the pmtu of the transport. */ |
544 | sctp_transport_pmtu(peer); | 579 | if (peer->param_flags & SPP_PMTUD_ENABLE) |
580 | sctp_transport_pmtu(peer); | ||
581 | else if (asoc->pathmtu) | ||
582 | peer->pathmtu = asoc->pathmtu; | ||
583 | else | ||
584 | peer->pathmtu = SCTP_DEFAULT_MAXSEGMENT; | ||
545 | 585 | ||
546 | /* If this is the first transport addr on this association, | 586 | /* If this is the first transport addr on this association, |
547 | * initialize the association PMTU to the peer's PMTU. | 587 | * initialize the association PMTU to the peer's PMTU. |
548 | * If not and the current association PMTU is higher than the new | 588 | * If not and the current association PMTU is higher than the new |
549 | * peer's PMTU, reset the association PMTU to the new peer's PMTU. | 589 | * peer's PMTU, reset the association PMTU to the new peer's PMTU. |
550 | */ | 590 | */ |
551 | if (asoc->pmtu) | 591 | if (asoc->pathmtu) |
552 | asoc->pmtu = min_t(int, peer->pmtu, asoc->pmtu); | 592 | asoc->pathmtu = min_t(int, peer->pathmtu, asoc->pathmtu); |
553 | else | 593 | else |
554 | asoc->pmtu = peer->pmtu; | 594 | asoc->pathmtu = peer->pathmtu; |
555 | 595 | ||
556 | SCTP_DEBUG_PRINTK("sctp_assoc_add_peer:association %p PMTU set to " | 596 | SCTP_DEBUG_PRINTK("sctp_assoc_add_peer:association %p PMTU set to " |
557 | "%d\n", asoc, asoc->pmtu); | 597 | "%d\n", asoc, asoc->pathmtu); |
558 | 598 | ||
559 | asoc->frag_point = sctp_frag_point(sp, asoc->pmtu); | 599 | asoc->frag_point = sctp_frag_point(sp, asoc->pathmtu); |
560 | 600 | ||
561 | /* The asoc->peer.port might not be meaningful yet, but | 601 | /* The asoc->peer.port might not be meaningful yet, but |
562 | * initialize the packet structure anyway. | 602 | * initialize the packet structure anyway. |
@@ -574,7 +614,7 @@ struct sctp_transport *sctp_assoc_add_peer(struct sctp_association *asoc, | |||
574 | * (for example, implementations MAY use the size of the | 614 | * (for example, implementations MAY use the size of the |
575 | * receiver advertised window). | 615 | * receiver advertised window). |
576 | */ | 616 | */ |
577 | peer->cwnd = min(4*asoc->pmtu, max_t(__u32, 2*asoc->pmtu, 4380)); | 617 | peer->cwnd = min(4*asoc->pathmtu, max_t(__u32, 2*asoc->pathmtu, 4380)); |
578 | 618 | ||
579 | /* At this point, we may not have the receiver's advertised window, | 619 | /* At this point, we may not have the receiver's advertised window, |
580 | * so initialize ssthresh to the default value and it will be set | 620 | * so initialize ssthresh to the default value and it will be set |
@@ -585,17 +625,6 @@ struct sctp_transport *sctp_assoc_add_peer(struct sctp_association *asoc, | |||
585 | peer->partial_bytes_acked = 0; | 625 | peer->partial_bytes_acked = 0; |
586 | peer->flight_size = 0; | 626 | peer->flight_size = 0; |
587 | 627 | ||
588 | /* By default, enable heartbeat for peer address. */ | ||
589 | peer->hb_allowed = 1; | ||
590 | |||
591 | /* Initialize the peer's heartbeat interval based on the | ||
592 | * sock configured value. | ||
593 | */ | ||
594 | peer->hb_interval = msecs_to_jiffies(sp->paddrparam.spp_hbinterval); | ||
595 | |||
596 | /* Set the path max_retrans. */ | ||
597 | peer->max_retrans = sp->paddrparam.spp_pathmaxrxt; | ||
598 | |||
599 | /* Set the transport's RTO.initial value */ | 628 | /* Set the transport's RTO.initial value */ |
600 | peer->rto = asoc->rto_initial; | 629 | peer->rto = asoc->rto_initial; |
601 | 630 | ||
@@ -1155,18 +1184,18 @@ void sctp_assoc_sync_pmtu(struct sctp_association *asoc) | |||
1155 | /* Get the lowest pmtu of all the transports. */ | 1184 | /* Get the lowest pmtu of all the transports. */ |
1156 | list_for_each(pos, &asoc->peer.transport_addr_list) { | 1185 | list_for_each(pos, &asoc->peer.transport_addr_list) { |
1157 | t = list_entry(pos, struct sctp_transport, transports); | 1186 | t = list_entry(pos, struct sctp_transport, transports); |
1158 | if (!pmtu || (t->pmtu < pmtu)) | 1187 | if (!pmtu || (t->pathmtu < pmtu)) |
1159 | pmtu = t->pmtu; | 1188 | pmtu = t->pathmtu; |
1160 | } | 1189 | } |
1161 | 1190 | ||
1162 | if (pmtu) { | 1191 | if (pmtu) { |
1163 | struct sctp_sock *sp = sctp_sk(asoc->base.sk); | 1192 | struct sctp_sock *sp = sctp_sk(asoc->base.sk); |
1164 | asoc->pmtu = pmtu; | 1193 | asoc->pathmtu = pmtu; |
1165 | asoc->frag_point = sctp_frag_point(sp, pmtu); | 1194 | asoc->frag_point = sctp_frag_point(sp, pmtu); |
1166 | } | 1195 | } |
1167 | 1196 | ||
1168 | SCTP_DEBUG_PRINTK("%s: asoc:%p, pmtu:%d, frag_point:%d\n", | 1197 | SCTP_DEBUG_PRINTK("%s: asoc:%p, pmtu:%d, frag_point:%d\n", |
1169 | __FUNCTION__, asoc, asoc->pmtu, asoc->frag_point); | 1198 | __FUNCTION__, asoc, asoc->pathmtu, asoc->frag_point); |
1170 | } | 1199 | } |
1171 | 1200 | ||
1172 | /* Should we send a SACK to update our peer? */ | 1201 | /* Should we send a SACK to update our peer? */ |
@@ -1179,7 +1208,7 @@ static inline int sctp_peer_needs_update(struct sctp_association *asoc) | |||
1179 | case SCTP_STATE_SHUTDOWN_SENT: | 1208 | case SCTP_STATE_SHUTDOWN_SENT: |
1180 | if ((asoc->rwnd > asoc->a_rwnd) && | 1209 | if ((asoc->rwnd > asoc->a_rwnd) && |
1181 | ((asoc->rwnd - asoc->a_rwnd) >= | 1210 | ((asoc->rwnd - asoc->a_rwnd) >= |
1182 | min_t(__u32, (asoc->base.sk->sk_rcvbuf >> 1), asoc->pmtu))) | 1211 | min_t(__u32, (asoc->base.sk->sk_rcvbuf >> 1), asoc->pathmtu))) |
1183 | return 1; | 1212 | return 1; |
1184 | break; | 1213 | break; |
1185 | default: | 1214 | default: |
diff --git a/net/sctp/input.c b/net/sctp/input.c index b24ff2c1aef5..238f1bffa684 100644 --- a/net/sctp/input.c +++ b/net/sctp/input.c | |||
@@ -305,18 +305,36 @@ int sctp_backlog_rcv(struct sock *sk, struct sk_buff *skb) | |||
305 | void sctp_icmp_frag_needed(struct sock *sk, struct sctp_association *asoc, | 305 | void sctp_icmp_frag_needed(struct sock *sk, struct sctp_association *asoc, |
306 | struct sctp_transport *t, __u32 pmtu) | 306 | struct sctp_transport *t, __u32 pmtu) |
307 | { | 307 | { |
308 | if (unlikely(pmtu < SCTP_DEFAULT_MINSEGMENT)) { | 308 | if (sock_owned_by_user(sk) || !t || (t->pathmtu == pmtu)) |
309 | printk(KERN_WARNING "%s: Reported pmtu %d too low, " | 309 | return; |
310 | "using default minimum of %d\n", __FUNCTION__, pmtu, | ||
311 | SCTP_DEFAULT_MINSEGMENT); | ||
312 | pmtu = SCTP_DEFAULT_MINSEGMENT; | ||
313 | } | ||
314 | 310 | ||
315 | if (!sock_owned_by_user(sk) && t && (t->pmtu != pmtu)) { | 311 | if (t->param_flags & SPP_PMTUD_ENABLE) { |
316 | t->pmtu = pmtu; | 312 | if (unlikely(pmtu < SCTP_DEFAULT_MINSEGMENT)) { |
313 | printk(KERN_WARNING "%s: Reported pmtu %d too low, " | ||
314 | "using default minimum of %d\n", | ||
315 | __FUNCTION__, pmtu, | ||
316 | SCTP_DEFAULT_MINSEGMENT); | ||
317 | /* Use default minimum segment size and disable | ||
318 | * pmtu discovery on this transport. | ||
319 | */ | ||
320 | t->pathmtu = SCTP_DEFAULT_MINSEGMENT; | ||
321 | t->param_flags = (t->param_flags & ~SPP_HB) | | ||
322 | SPP_PMTUD_DISABLE; | ||
323 | } else { | ||
324 | t->pathmtu = pmtu; | ||
325 | } | ||
326 | |||
327 | /* Update association pmtu. */ | ||
317 | sctp_assoc_sync_pmtu(asoc); | 328 | sctp_assoc_sync_pmtu(asoc); |
318 | sctp_retransmit(&asoc->outqueue, t, SCTP_RTXR_PMTUD); | ||
319 | } | 329 | } |
330 | |||
331 | /* Retransmit with the new pmtu setting. | ||
332 | * Normally, if PMTU discovery is disabled, an ICMP Fragmentation | ||
333 | * Needed will never be sent, but if a message was sent before | ||
334 | * PMTU discovery was disabled that was larger than the PMTU, it | ||
335 | * would not be fragmented, so it must be re-transmitted fragmented. | ||
336 | */ | ||
337 | sctp_retransmit(&asoc->outqueue, t, SCTP_RTXR_PMTUD); | ||
320 | } | 338 | } |
321 | 339 | ||
322 | /* | 340 | /* |
diff --git a/net/sctp/ipv6.c b/net/sctp/ipv6.c index fa3be2b8fb5f..15c05165c905 100644 --- a/net/sctp/ipv6.c +++ b/net/sctp/ipv6.c | |||
@@ -866,7 +866,7 @@ static int sctp_inet6_supported_addrs(const struct sctp_sock *opt, | |||
866 | return 2; | 866 | return 2; |
867 | } | 867 | } |
868 | 868 | ||
869 | static struct proto_ops inet6_seqpacket_ops = { | 869 | static const struct proto_ops inet6_seqpacket_ops = { |
870 | .family = PF_INET6, | 870 | .family = PF_INET6, |
871 | .owner = THIS_MODULE, | 871 | .owner = THIS_MODULE, |
872 | .release = inet6_release, | 872 | .release = inet6_release, |
diff --git a/net/sctp/output.c b/net/sctp/output.c index 931371633464..a40991ef72c9 100644 --- a/net/sctp/output.c +++ b/net/sctp/output.c | |||
@@ -234,8 +234,8 @@ sctp_xmit_t sctp_packet_append_chunk(struct sctp_packet *packet, | |||
234 | goto finish; | 234 | goto finish; |
235 | 235 | ||
236 | pmtu = ((packet->transport->asoc) ? | 236 | pmtu = ((packet->transport->asoc) ? |
237 | (packet->transport->asoc->pmtu) : | 237 | (packet->transport->asoc->pathmtu) : |
238 | (packet->transport->pmtu)); | 238 | (packet->transport->pathmtu)); |
239 | 239 | ||
240 | too_big = (psize + chunk_len > pmtu); | 240 | too_big = (psize + chunk_len > pmtu); |
241 | 241 | ||
@@ -482,7 +482,9 @@ int sctp_packet_transmit(struct sctp_packet *packet) | |||
482 | if (!dst || (dst->obsolete > 1)) { | 482 | if (!dst || (dst->obsolete > 1)) { |
483 | dst_release(dst); | 483 | dst_release(dst); |
484 | sctp_transport_route(tp, NULL, sctp_sk(sk)); | 484 | sctp_transport_route(tp, NULL, sctp_sk(sk)); |
485 | sctp_assoc_sync_pmtu(asoc); | 485 | if (asoc->param_flags & SPP_PMTUD_ENABLE) { |
486 | sctp_assoc_sync_pmtu(asoc); | ||
487 | } | ||
486 | } | 488 | } |
487 | 489 | ||
488 | nskb->dst = dst_clone(tp->dst); | 490 | nskb->dst = dst_clone(tp->dst); |
@@ -492,7 +494,10 @@ int sctp_packet_transmit(struct sctp_packet *packet) | |||
492 | SCTP_DEBUG_PRINTK("***sctp_transmit_packet*** skb len %d\n", | 494 | SCTP_DEBUG_PRINTK("***sctp_transmit_packet*** skb len %d\n", |
493 | nskb->len); | 495 | nskb->len); |
494 | 496 | ||
495 | (*tp->af_specific->sctp_xmit)(nskb, tp, packet->ipfragok); | 497 | if (tp->param_flags & SPP_PMTUD_ENABLE) |
498 | (*tp->af_specific->sctp_xmit)(nskb, tp, packet->ipfragok); | ||
499 | else | ||
500 | (*tp->af_specific->sctp_xmit)(nskb, tp, 1); | ||
496 | 501 | ||
497 | out: | 502 | out: |
498 | packet->size = packet->overhead; | 503 | packet->size = packet->overhead; |
@@ -577,7 +582,7 @@ static sctp_xmit_t sctp_packet_append_data(struct sctp_packet *packet, | |||
577 | * if ((flightsize + Max.Burst * MTU) < cwnd) | 582 | * if ((flightsize + Max.Burst * MTU) < cwnd) |
578 | * cwnd = flightsize + Max.Burst * MTU | 583 | * cwnd = flightsize + Max.Burst * MTU |
579 | */ | 584 | */ |
580 | max_burst_bytes = asoc->max_burst * asoc->pmtu; | 585 | max_burst_bytes = asoc->max_burst * asoc->pathmtu; |
581 | if ((transport->flight_size + max_burst_bytes) < transport->cwnd) { | 586 | if ((transport->flight_size + max_burst_bytes) < transport->cwnd) { |
582 | transport->cwnd = transport->flight_size + max_burst_bytes; | 587 | transport->cwnd = transport->flight_size + max_burst_bytes; |
583 | SCTP_DEBUG_PRINTK("%s: cwnd limited by max_burst: " | 588 | SCTP_DEBUG_PRINTK("%s: cwnd limited by max_burst: " |
@@ -622,7 +627,7 @@ static sctp_xmit_t sctp_packet_append_data(struct sctp_packet *packet, | |||
622 | * data will fit or delay in hopes of bundling a full | 627 | * data will fit or delay in hopes of bundling a full |
623 | * sized packet. | 628 | * sized packet. |
624 | */ | 629 | */ |
625 | if (len < asoc->pmtu - packet->overhead) { | 630 | if (len < asoc->pathmtu - packet->overhead) { |
626 | retval = SCTP_XMIT_NAGLE_DELAY; | 631 | retval = SCTP_XMIT_NAGLE_DELAY; |
627 | goto finish; | 632 | goto finish; |
628 | } | 633 | } |
diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c index f775d78aa59d..de693b43c8ea 100644 --- a/net/sctp/protocol.c +++ b/net/sctp/protocol.c | |||
@@ -54,6 +54,7 @@ | |||
54 | #include <net/protocol.h> | 54 | #include <net/protocol.h> |
55 | #include <net/ip.h> | 55 | #include <net/ip.h> |
56 | #include <net/ipv6.h> | 56 | #include <net/ipv6.h> |
57 | #include <net/route.h> | ||
57 | #include <net/sctp/sctp.h> | 58 | #include <net/sctp/sctp.h> |
58 | #include <net/addrconf.h> | 59 | #include <net/addrconf.h> |
59 | #include <net/inet_common.h> | 60 | #include <net/inet_common.h> |
@@ -829,7 +830,7 @@ static struct notifier_block sctp_inetaddr_notifier = { | |||
829 | }; | 830 | }; |
830 | 831 | ||
831 | /* Socket operations. */ | 832 | /* Socket operations. */ |
832 | static struct proto_ops inet_seqpacket_ops = { | 833 | static const struct proto_ops inet_seqpacket_ops = { |
833 | .family = PF_INET, | 834 | .family = PF_INET, |
834 | .owner = THIS_MODULE, | 835 | .owner = THIS_MODULE, |
835 | .release = inet_release, /* Needs to be wrapped... */ | 836 | .release = inet_release, /* Needs to be wrapped... */ |
diff --git a/net/sctp/sm_sideeffect.c b/net/sctp/sm_sideeffect.c index 823947170a33..2d7d8a5db2ac 100644 --- a/net/sctp/sm_sideeffect.c +++ b/net/sctp/sm_sideeffect.c | |||
@@ -157,9 +157,12 @@ static int sctp_gen_sack(struct sctp_association *asoc, int force, | |||
157 | { | 157 | { |
158 | __u32 ctsn, max_tsn_seen; | 158 | __u32 ctsn, max_tsn_seen; |
159 | struct sctp_chunk *sack; | 159 | struct sctp_chunk *sack; |
160 | struct sctp_transport *trans = asoc->peer.last_data_from; | ||
160 | int error = 0; | 161 | int error = 0; |
161 | 162 | ||
162 | if (force) | 163 | if (force || |
164 | (!trans && (asoc->param_flags & SPP_SACKDELAY_DISABLE)) || | ||
165 | (trans && (trans->param_flags & SPP_SACKDELAY_DISABLE))) | ||
163 | asoc->peer.sack_needed = 1; | 166 | asoc->peer.sack_needed = 1; |
164 | 167 | ||
165 | ctsn = sctp_tsnmap_get_ctsn(&asoc->peer.tsn_map); | 168 | ctsn = sctp_tsnmap_get_ctsn(&asoc->peer.tsn_map); |
@@ -189,7 +192,22 @@ static int sctp_gen_sack(struct sctp_association *asoc, int force, | |||
189 | if (!asoc->peer.sack_needed) { | 192 | if (!asoc->peer.sack_needed) { |
190 | /* We will need a SACK for the next packet. */ | 193 | /* We will need a SACK for the next packet. */ |
191 | asoc->peer.sack_needed = 1; | 194 | asoc->peer.sack_needed = 1; |
192 | goto out; | 195 | |
196 | /* Set the SACK delay timeout based on the | ||
197 | * SACK delay for the last transport | ||
198 | * data was received from, or the default | ||
199 | * for the association. | ||
200 | */ | ||
201 | if (trans) | ||
202 | asoc->timeouts[SCTP_EVENT_TIMEOUT_SACK] = | ||
203 | trans->sackdelay; | ||
204 | else | ||
205 | asoc->timeouts[SCTP_EVENT_TIMEOUT_SACK] = | ||
206 | asoc->sackdelay; | ||
207 | |||
208 | /* Restart the SACK timer. */ | ||
209 | sctp_add_cmd_sf(commands, SCTP_CMD_TIMER_RESTART, | ||
210 | SCTP_TO(SCTP_EVENT_TIMEOUT_SACK)); | ||
193 | } else { | 211 | } else { |
194 | if (asoc->a_rwnd > asoc->rwnd) | 212 | if (asoc->a_rwnd > asoc->rwnd) |
195 | asoc->a_rwnd = asoc->rwnd; | 213 | asoc->a_rwnd = asoc->rwnd; |
@@ -205,7 +223,7 @@ static int sctp_gen_sack(struct sctp_association *asoc, int force, | |||
205 | sctp_add_cmd_sf(commands, SCTP_CMD_TIMER_STOP, | 223 | sctp_add_cmd_sf(commands, SCTP_CMD_TIMER_STOP, |
206 | SCTP_TO(SCTP_EVENT_TIMEOUT_SACK)); | 224 | SCTP_TO(SCTP_EVENT_TIMEOUT_SACK)); |
207 | } | 225 | } |
208 | out: | 226 | |
209 | return error; | 227 | return error; |
210 | nomem: | 228 | nomem: |
211 | error = -ENOMEM; | 229 | error = -ENOMEM; |
@@ -415,7 +433,7 @@ static void sctp_do_8_2_transport_strike(struct sctp_association *asoc, | |||
415 | asoc->overall_error_count++; | 433 | asoc->overall_error_count++; |
416 | 434 | ||
417 | if (transport->state != SCTP_INACTIVE && | 435 | if (transport->state != SCTP_INACTIVE && |
418 | (transport->error_count++ >= transport->max_retrans)) { | 436 | (transport->error_count++ >= transport->pathmaxrxt)) { |
419 | SCTP_DEBUG_PRINTK_IPADDR("transport_strike:association %p", | 437 | SCTP_DEBUG_PRINTK_IPADDR("transport_strike:association %p", |
420 | " transport IP: port:%d failed.\n", | 438 | " transport IP: port:%d failed.\n", |
421 | asoc, | 439 | asoc, |
diff --git a/net/sctp/sm_statefuns.c b/net/sctp/sm_statefuns.c index 475bfb4972d9..557a7d90b92a 100644 --- a/net/sctp/sm_statefuns.c +++ b/net/sctp/sm_statefuns.c | |||
@@ -900,7 +900,7 @@ sctp_disposition_t sctp_sf_sendbeat_8_3(const struct sctp_endpoint *ep, | |||
900 | * HEARTBEAT is sent (see Section 8.3). | 900 | * HEARTBEAT is sent (see Section 8.3). |
901 | */ | 901 | */ |
902 | 902 | ||
903 | if (transport->hb_allowed) { | 903 | if (transport->param_flags & SPP_HB_ENABLE) { |
904 | if (SCTP_DISPOSITION_NOMEM == | 904 | if (SCTP_DISPOSITION_NOMEM == |
905 | sctp_sf_heartbeat(ep, asoc, type, arg, | 905 | sctp_sf_heartbeat(ep, asoc, type, arg, |
906 | commands)) | 906 | commands)) |
@@ -1051,7 +1051,7 @@ sctp_disposition_t sctp_sf_backbeat_8_3(const struct sctp_endpoint *ep, | |||
1051 | return SCTP_DISPOSITION_DISCARD; | 1051 | return SCTP_DISPOSITION_DISCARD; |
1052 | } | 1052 | } |
1053 | 1053 | ||
1054 | max_interval = link->hb_interval + link->rto; | 1054 | max_interval = link->hbinterval + link->rto; |
1055 | 1055 | ||
1056 | /* Check if the timestamp looks valid. */ | 1056 | /* Check if the timestamp looks valid. */ |
1057 | if (time_after(hbinfo->sent_at, jiffies) || | 1057 | if (time_after(hbinfo->sent_at, jiffies) || |
@@ -2691,14 +2691,9 @@ sctp_disposition_t sctp_sf_eat_data_6_2(const struct sctp_endpoint *ep, | |||
2691 | * document allow. However, an SCTP transmitter MUST NOT be | 2691 | * document allow. However, an SCTP transmitter MUST NOT be |
2692 | * more aggressive than the following algorithms allow. | 2692 | * more aggressive than the following algorithms allow. |
2693 | */ | 2693 | */ |
2694 | if (chunk->end_of_packet) { | 2694 | if (chunk->end_of_packet) |
2695 | sctp_add_cmd_sf(commands, SCTP_CMD_GEN_SACK, SCTP_NOFORCE()); | 2695 | sctp_add_cmd_sf(commands, SCTP_CMD_GEN_SACK, SCTP_NOFORCE()); |
2696 | 2696 | ||
2697 | /* Start the SACK timer. */ | ||
2698 | sctp_add_cmd_sf(commands, SCTP_CMD_TIMER_RESTART, | ||
2699 | SCTP_TO(SCTP_EVENT_TIMEOUT_SACK)); | ||
2700 | } | ||
2701 | |||
2702 | return SCTP_DISPOSITION_CONSUME; | 2697 | return SCTP_DISPOSITION_CONSUME; |
2703 | 2698 | ||
2704 | discard_force: | 2699 | discard_force: |
@@ -2721,13 +2716,9 @@ discard_force: | |||
2721 | return SCTP_DISPOSITION_DISCARD; | 2716 | return SCTP_DISPOSITION_DISCARD; |
2722 | 2717 | ||
2723 | discard_noforce: | 2718 | discard_noforce: |
2724 | if (chunk->end_of_packet) { | 2719 | if (chunk->end_of_packet) |
2725 | sctp_add_cmd_sf(commands, SCTP_CMD_GEN_SACK, SCTP_NOFORCE()); | 2720 | sctp_add_cmd_sf(commands, SCTP_CMD_GEN_SACK, SCTP_NOFORCE()); |
2726 | 2721 | ||
2727 | /* Start the SACK timer. */ | ||
2728 | sctp_add_cmd_sf(commands, SCTP_CMD_TIMER_RESTART, | ||
2729 | SCTP_TO(SCTP_EVENT_TIMEOUT_SACK)); | ||
2730 | } | ||
2731 | return SCTP_DISPOSITION_DISCARD; | 2722 | return SCTP_DISPOSITION_DISCARD; |
2732 | consume: | 2723 | consume: |
2733 | return SCTP_DISPOSITION_CONSUME; | 2724 | return SCTP_DISPOSITION_CONSUME; |
@@ -3442,9 +3433,6 @@ sctp_disposition_t sctp_sf_eat_fwd_tsn(const struct sctp_endpoint *ep, | |||
3442 | * send another. | 3433 | * send another. |
3443 | */ | 3434 | */ |
3444 | sctp_add_cmd_sf(commands, SCTP_CMD_GEN_SACK, SCTP_NOFORCE()); | 3435 | sctp_add_cmd_sf(commands, SCTP_CMD_GEN_SACK, SCTP_NOFORCE()); |
3445 | /* Start the SACK timer. */ | ||
3446 | sctp_add_cmd_sf(commands, SCTP_CMD_TIMER_RESTART, | ||
3447 | SCTP_TO(SCTP_EVENT_TIMEOUT_SACK)); | ||
3448 | 3436 | ||
3449 | return SCTP_DISPOSITION_CONSUME; | 3437 | return SCTP_DISPOSITION_CONSUME; |
3450 | 3438 | ||
diff --git a/net/sctp/socket.c b/net/sctp/socket.c index 9df888e932c5..fc04d185fa33 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c | |||
@@ -1941,107 +1941,379 @@ static int sctp_setsockopt_autoclose(struct sock *sk, char __user *optval, | |||
1941 | * address's parameters: | 1941 | * address's parameters: |
1942 | * | 1942 | * |
1943 | * struct sctp_paddrparams { | 1943 | * struct sctp_paddrparams { |
1944 | * sctp_assoc_t spp_assoc_id; | 1944 | * sctp_assoc_t spp_assoc_id; |
1945 | * struct sockaddr_storage spp_address; | 1945 | * struct sockaddr_storage spp_address; |
1946 | * uint32_t spp_hbinterval; | 1946 | * uint32_t spp_hbinterval; |
1947 | * uint16_t spp_pathmaxrxt; | 1947 | * uint16_t spp_pathmaxrxt; |
1948 | * }; | 1948 | * uint32_t spp_pathmtu; |
1949 | * | 1949 | * uint32_t spp_sackdelay; |
1950 | * spp_assoc_id - (UDP style socket) This is filled in the application, | 1950 | * uint32_t spp_flags; |
1951 | * and identifies the association for this query. | 1951 | * }; |
1952 | * | ||
1953 | * spp_assoc_id - (one-to-many style socket) This is filled in the | ||
1954 | * application, and identifies the association for | ||
1955 | * this query. | ||
1952 | * spp_address - This specifies which address is of interest. | 1956 | * spp_address - This specifies which address is of interest. |
1953 | * spp_hbinterval - This contains the value of the heartbeat interval, | 1957 | * spp_hbinterval - This contains the value of the heartbeat interval, |
1954 | * in milliseconds. A value of 0, when modifying the | 1958 | * in milliseconds. If a value of zero |
1955 | * parameter, specifies that the heartbeat on this | 1959 | * is present in this field then no changes are to |
1956 | * address should be disabled. A value of UINT32_MAX | 1960 | * be made to this parameter. |
1957 | * (4294967295), when modifying the parameter, | ||
1958 | * specifies that a heartbeat should be sent | ||
1959 | * immediately to the peer address, and the current | ||
1960 | * interval should remain unchanged. | ||
1961 | * spp_pathmaxrxt - This contains the maximum number of | 1961 | * spp_pathmaxrxt - This contains the maximum number of |
1962 | * retransmissions before this address shall be | 1962 | * retransmissions before this address shall be |
1963 | * considered unreachable. | 1963 | * considered unreachable. If a value of zero |
1964 | * is present in this field then no changes are to | ||
1965 | * be made to this parameter. | ||
1966 | * spp_pathmtu - When Path MTU discovery is disabled the value | ||
1967 | * specified here will be the "fixed" path mtu. | ||
1968 | * Note that if the spp_address field is empty | ||
1969 | * then all associations on this address will | ||
1970 | * have this fixed path mtu set upon them. | ||
1971 | * | ||
1972 | * spp_sackdelay - When delayed sack is enabled, this value specifies | ||
1973 | * the number of milliseconds that sacks will be delayed | ||
1974 | * for. This value will apply to all addresses of an | ||
1975 | * association if the spp_address field is empty. Note | ||
1976 | * also, that if delayed sack is enabled and this | ||
1977 | * value is set to 0, no change is made to the last | ||
1978 | * recorded delayed sack timer value. | ||
1979 | * | ||
1980 | * spp_flags - These flags are used to control various features | ||
1981 | * on an association. The flag field may contain | ||
1982 | * zero or more of the following options. | ||
1983 | * | ||
1984 | * SPP_HB_ENABLE - Enable heartbeats on the | ||
1985 | * specified address. Note that if the address | ||
1986 | * field is empty all addresses for the association | ||
1987 | * have heartbeats enabled upon them. | ||
1988 | * | ||
1989 | * SPP_HB_DISABLE - Disable heartbeats on the | ||
1990 | * speicifed address. Note that if the address | ||
1991 | * field is empty all addresses for the association | ||
1992 | * will have their heartbeats disabled. Note also | ||
1993 | * that SPP_HB_ENABLE and SPP_HB_DISABLE are | ||
1994 | * mutually exclusive, only one of these two should | ||
1995 | * be specified. Enabling both fields will have | ||
1996 | * undetermined results. | ||
1997 | * | ||
1998 | * SPP_HB_DEMAND - Request a user initiated heartbeat | ||
1999 | * to be made immediately. | ||
2000 | * | ||
2001 | * SPP_PMTUD_ENABLE - This field will enable PMTU | ||
2002 | * discovery upon the specified address. Note that | ||
2003 | * if the address feild is empty then all addresses | ||
2004 | * on the association are effected. | ||
2005 | * | ||
2006 | * SPP_PMTUD_DISABLE - This field will disable PMTU | ||
2007 | * discovery upon the specified address. Note that | ||
2008 | * if the address feild is empty then all addresses | ||
2009 | * on the association are effected. Not also that | ||
2010 | * SPP_PMTUD_ENABLE and SPP_PMTUD_DISABLE are mutually | ||
2011 | * exclusive. Enabling both will have undetermined | ||
2012 | * results. | ||
2013 | * | ||
2014 | * SPP_SACKDELAY_ENABLE - Setting this flag turns | ||
2015 | * on delayed sack. The time specified in spp_sackdelay | ||
2016 | * is used to specify the sack delay for this address. Note | ||
2017 | * that if spp_address is empty then all addresses will | ||
2018 | * enable delayed sack and take on the sack delay | ||
2019 | * value specified in spp_sackdelay. | ||
2020 | * SPP_SACKDELAY_DISABLE - Setting this flag turns | ||
2021 | * off delayed sack. If the spp_address field is blank then | ||
2022 | * delayed sack is disabled for the entire association. Note | ||
2023 | * also that this field is mutually exclusive to | ||
2024 | * SPP_SACKDELAY_ENABLE, setting both will have undefined | ||
2025 | * results. | ||
1964 | */ | 2026 | */ |
2027 | int sctp_apply_peer_addr_params(struct sctp_paddrparams *params, | ||
2028 | struct sctp_transport *trans, | ||
2029 | struct sctp_association *asoc, | ||
2030 | struct sctp_sock *sp, | ||
2031 | int hb_change, | ||
2032 | int pmtud_change, | ||
2033 | int sackdelay_change) | ||
2034 | { | ||
2035 | int error; | ||
2036 | |||
2037 | if (params->spp_flags & SPP_HB_DEMAND && trans) { | ||
2038 | error = sctp_primitive_REQUESTHEARTBEAT (trans->asoc, trans); | ||
2039 | if (error) | ||
2040 | return error; | ||
2041 | } | ||
2042 | |||
2043 | if (params->spp_hbinterval) { | ||
2044 | if (trans) { | ||
2045 | trans->hbinterval = msecs_to_jiffies(params->spp_hbinterval); | ||
2046 | } else if (asoc) { | ||
2047 | asoc->hbinterval = msecs_to_jiffies(params->spp_hbinterval); | ||
2048 | } else { | ||
2049 | sp->hbinterval = params->spp_hbinterval; | ||
2050 | } | ||
2051 | } | ||
2052 | |||
2053 | if (hb_change) { | ||
2054 | if (trans) { | ||
2055 | trans->param_flags = | ||
2056 | (trans->param_flags & ~SPP_HB) | hb_change; | ||
2057 | } else if (asoc) { | ||
2058 | asoc->param_flags = | ||
2059 | (asoc->param_flags & ~SPP_HB) | hb_change; | ||
2060 | } else { | ||
2061 | sp->param_flags = | ||
2062 | (sp->param_flags & ~SPP_HB) | hb_change; | ||
2063 | } | ||
2064 | } | ||
2065 | |||
2066 | if (params->spp_pathmtu) { | ||
2067 | if (trans) { | ||
2068 | trans->pathmtu = params->spp_pathmtu; | ||
2069 | sctp_assoc_sync_pmtu(asoc); | ||
2070 | } else if (asoc) { | ||
2071 | asoc->pathmtu = params->spp_pathmtu; | ||
2072 | sctp_frag_point(sp, params->spp_pathmtu); | ||
2073 | } else { | ||
2074 | sp->pathmtu = params->spp_pathmtu; | ||
2075 | } | ||
2076 | } | ||
2077 | |||
2078 | if (pmtud_change) { | ||
2079 | if (trans) { | ||
2080 | int update = (trans->param_flags & SPP_PMTUD_DISABLE) && | ||
2081 | (params->spp_flags & SPP_PMTUD_ENABLE); | ||
2082 | trans->param_flags = | ||
2083 | (trans->param_flags & ~SPP_PMTUD) | pmtud_change; | ||
2084 | if (update) { | ||
2085 | sctp_transport_pmtu(trans); | ||
2086 | sctp_assoc_sync_pmtu(asoc); | ||
2087 | } | ||
2088 | } else if (asoc) { | ||
2089 | asoc->param_flags = | ||
2090 | (asoc->param_flags & ~SPP_PMTUD) | pmtud_change; | ||
2091 | } else { | ||
2092 | sp->param_flags = | ||
2093 | (sp->param_flags & ~SPP_PMTUD) | pmtud_change; | ||
2094 | } | ||
2095 | } | ||
2096 | |||
2097 | if (params->spp_sackdelay) { | ||
2098 | if (trans) { | ||
2099 | trans->sackdelay = | ||
2100 | msecs_to_jiffies(params->spp_sackdelay); | ||
2101 | } else if (asoc) { | ||
2102 | asoc->sackdelay = | ||
2103 | msecs_to_jiffies(params->spp_sackdelay); | ||
2104 | } else { | ||
2105 | sp->sackdelay = params->spp_sackdelay; | ||
2106 | } | ||
2107 | } | ||
2108 | |||
2109 | if (sackdelay_change) { | ||
2110 | if (trans) { | ||
2111 | trans->param_flags = | ||
2112 | (trans->param_flags & ~SPP_SACKDELAY) | | ||
2113 | sackdelay_change; | ||
2114 | } else if (asoc) { | ||
2115 | asoc->param_flags = | ||
2116 | (asoc->param_flags & ~SPP_SACKDELAY) | | ||
2117 | sackdelay_change; | ||
2118 | } else { | ||
2119 | sp->param_flags = | ||
2120 | (sp->param_flags & ~SPP_SACKDELAY) | | ||
2121 | sackdelay_change; | ||
2122 | } | ||
2123 | } | ||
2124 | |||
2125 | if (params->spp_pathmaxrxt) { | ||
2126 | if (trans) { | ||
2127 | trans->pathmaxrxt = params->spp_pathmaxrxt; | ||
2128 | } else if (asoc) { | ||
2129 | asoc->pathmaxrxt = params->spp_pathmaxrxt; | ||
2130 | } else { | ||
2131 | sp->pathmaxrxt = params->spp_pathmaxrxt; | ||
2132 | } | ||
2133 | } | ||
2134 | |||
2135 | return 0; | ||
2136 | } | ||
2137 | |||
1965 | static int sctp_setsockopt_peer_addr_params(struct sock *sk, | 2138 | static int sctp_setsockopt_peer_addr_params(struct sock *sk, |
1966 | char __user *optval, int optlen) | 2139 | char __user *optval, int optlen) |
1967 | { | 2140 | { |
1968 | struct sctp_paddrparams params; | 2141 | struct sctp_paddrparams params; |
1969 | struct sctp_transport *trans; | 2142 | struct sctp_transport *trans = NULL; |
2143 | struct sctp_association *asoc = NULL; | ||
2144 | struct sctp_sock *sp = sctp_sk(sk); | ||
1970 | int error; | 2145 | int error; |
2146 | int hb_change, pmtud_change, sackdelay_change; | ||
1971 | 2147 | ||
1972 | if (optlen != sizeof(struct sctp_paddrparams)) | 2148 | if (optlen != sizeof(struct sctp_paddrparams)) |
1973 | return -EINVAL; | 2149 | return - EINVAL; |
2150 | |||
1974 | if (copy_from_user(¶ms, optval, optlen)) | 2151 | if (copy_from_user(¶ms, optval, optlen)) |
1975 | return -EFAULT; | 2152 | return -EFAULT; |
1976 | 2153 | ||
1977 | /* | 2154 | /* Validate flags and value parameters. */ |
1978 | * API 7. Socket Options (setting the default value for the endpoint) | 2155 | hb_change = params.spp_flags & SPP_HB; |
1979 | * All options that support specific settings on an association by | 2156 | pmtud_change = params.spp_flags & SPP_PMTUD; |
1980 | * filling in either an association id variable or a sockaddr_storage | 2157 | sackdelay_change = params.spp_flags & SPP_SACKDELAY; |
1981 | * SHOULD also support setting of the same value for the entire endpoint | 2158 | |
1982 | * (i.e. future associations). To accomplish this the following logic is | 2159 | if (hb_change == SPP_HB || |
1983 | * used when setting one of these options: | 2160 | pmtud_change == SPP_PMTUD || |
1984 | 2161 | sackdelay_change == SPP_SACKDELAY || | |
1985 | * c) If neither the sockaddr_storage or association identification is | 2162 | params.spp_sackdelay > 500 || |
1986 | * set i.e. the sockaddr_storage is set to all 0's (INADDR_ANY) and | 2163 | (params.spp_pathmtu |
1987 | * the association identification is 0, the settings are a default | 2164 | && params.spp_pathmtu < SCTP_DEFAULT_MINSEGMENT)) |
1988 | * and to be applied to the endpoint (all future associations). | 2165 | return -EINVAL; |
1989 | */ | ||
1990 | 2166 | ||
1991 | /* update default value for endpoint (all future associations) */ | 2167 | /* If an address other than INADDR_ANY is specified, and |
1992 | if (!params.spp_assoc_id && | 2168 | * no transport is found, then the request is invalid. |
1993 | sctp_is_any(( union sctp_addr *)¶ms.spp_address)) { | 2169 | */ |
1994 | /* Manual heartbeat on an endpoint is invalid. */ | 2170 | if (!sctp_is_any(( union sctp_addr *)¶ms.spp_address)) { |
1995 | if (0xffffffff == params.spp_hbinterval) | 2171 | trans = sctp_addr_id2transport(sk, ¶ms.spp_address, |
2172 | params.spp_assoc_id); | ||
2173 | if (!trans) | ||
1996 | return -EINVAL; | 2174 | return -EINVAL; |
1997 | else if (params.spp_hbinterval) | ||
1998 | sctp_sk(sk)->paddrparam.spp_hbinterval = | ||
1999 | params.spp_hbinterval; | ||
2000 | if (params.spp_pathmaxrxt) | ||
2001 | sctp_sk(sk)->paddrparam.spp_pathmaxrxt = | ||
2002 | params.spp_pathmaxrxt; | ||
2003 | return 0; | ||
2004 | } | 2175 | } |
2005 | 2176 | ||
2006 | trans = sctp_addr_id2transport(sk, ¶ms.spp_address, | 2177 | /* Get association, if assoc_id != 0 and the socket is a one |
2007 | params.spp_assoc_id); | 2178 | * to many style socket, and an association was not found, then |
2008 | if (!trans) | 2179 | * the id was invalid. |
2180 | */ | ||
2181 | asoc = sctp_id2assoc(sk, params.spp_assoc_id); | ||
2182 | if (!asoc && params.spp_assoc_id && sctp_style(sk, UDP)) | ||
2009 | return -EINVAL; | 2183 | return -EINVAL; |
2010 | 2184 | ||
2011 | /* Applications can enable or disable heartbeats for any peer address | 2185 | /* Heartbeat demand can only be sent on a transport or |
2012 | * of an association, modify an address's heartbeat interval, force a | 2186 | * association, but not a socket. |
2013 | * heartbeat to be sent immediately, and adjust the address's maximum | ||
2014 | * number of retransmissions sent before an address is considered | ||
2015 | * unreachable. | ||
2016 | * | ||
2017 | * The value of the heartbeat interval, in milliseconds. A value of | ||
2018 | * UINT32_MAX (4294967295), when modifying the parameter, specifies | ||
2019 | * that a heartbeat should be sent immediately to the peer address, | ||
2020 | * and the current interval should remain unchanged. | ||
2021 | */ | 2187 | */ |
2022 | if (0xffffffff == params.spp_hbinterval) { | 2188 | if (params.spp_flags & SPP_HB_DEMAND && !trans && !asoc) |
2023 | error = sctp_primitive_REQUESTHEARTBEAT (trans->asoc, trans); | 2189 | return -EINVAL; |
2024 | if (error) | 2190 | |
2025 | return error; | 2191 | /* Process parameters. */ |
2026 | } else { | 2192 | error = sctp_apply_peer_addr_params(¶ms, trans, asoc, sp, |
2027 | /* The value of the heartbeat interval, in milliseconds. A value of 0, | 2193 | hb_change, pmtud_change, |
2028 | * when modifying the parameter, specifies that the heartbeat on this | 2194 | sackdelay_change); |
2029 | * address should be disabled. | 2195 | |
2196 | if (error) | ||
2197 | return error; | ||
2198 | |||
2199 | /* If changes are for association, also apply parameters to each | ||
2200 | * transport. | ||
2030 | */ | 2201 | */ |
2031 | if (params.spp_hbinterval) { | 2202 | if (!trans && asoc) { |
2032 | trans->hb_allowed = 1; | 2203 | struct list_head *pos; |
2033 | trans->hb_interval = | 2204 | |
2034 | msecs_to_jiffies(params.spp_hbinterval); | 2205 | list_for_each(pos, &asoc->peer.transport_addr_list) { |
2035 | } else | 2206 | trans = list_entry(pos, struct sctp_transport, |
2036 | trans->hb_allowed = 0; | 2207 | transports); |
2208 | sctp_apply_peer_addr_params(¶ms, trans, asoc, sp, | ||
2209 | hb_change, pmtud_change, | ||
2210 | sackdelay_change); | ||
2211 | } | ||
2037 | } | 2212 | } |
2038 | 2213 | ||
2039 | /* spp_pathmaxrxt contains the maximum number of retransmissions | 2214 | return 0; |
2040 | * before this address shall be considered unreachable. | 2215 | } |
2041 | */ | 2216 | |
2042 | if (params.spp_pathmaxrxt) | 2217 | /* 7.1.24. Delayed Ack Timer (SCTP_DELAYED_ACK_TIME) |
2043 | trans->max_retrans = params.spp_pathmaxrxt; | 2218 | * |
2219 | * This options will get or set the delayed ack timer. The time is set | ||
2220 | * in milliseconds. If the assoc_id is 0, then this sets or gets the | ||
2221 | * endpoints default delayed ack timer value. If the assoc_id field is | ||
2222 | * non-zero, then the set or get effects the specified association. | ||
2223 | * | ||
2224 | * struct sctp_assoc_value { | ||
2225 | * sctp_assoc_t assoc_id; | ||
2226 | * uint32_t assoc_value; | ||
2227 | * }; | ||
2228 | * | ||
2229 | * assoc_id - This parameter, indicates which association the | ||
2230 | * user is preforming an action upon. Note that if | ||
2231 | * this field's value is zero then the endpoints | ||
2232 | * default value is changed (effecting future | ||
2233 | * associations only). | ||
2234 | * | ||
2235 | * assoc_value - This parameter contains the number of milliseconds | ||
2236 | * that the user is requesting the delayed ACK timer | ||
2237 | * be set to. Note that this value is defined in | ||
2238 | * the standard to be between 200 and 500 milliseconds. | ||
2239 | * | ||
2240 | * Note: a value of zero will leave the value alone, | ||
2241 | * but disable SACK delay. A non-zero value will also | ||
2242 | * enable SACK delay. | ||
2243 | */ | ||
2044 | 2244 | ||
2245 | static int sctp_setsockopt_delayed_ack_time(struct sock *sk, | ||
2246 | char __user *optval, int optlen) | ||
2247 | { | ||
2248 | struct sctp_assoc_value params; | ||
2249 | struct sctp_transport *trans = NULL; | ||
2250 | struct sctp_association *asoc = NULL; | ||
2251 | struct sctp_sock *sp = sctp_sk(sk); | ||
2252 | |||
2253 | if (optlen != sizeof(struct sctp_assoc_value)) | ||
2254 | return - EINVAL; | ||
2255 | |||
2256 | if (copy_from_user(¶ms, optval, optlen)) | ||
2257 | return -EFAULT; | ||
2258 | |||
2259 | /* Validate value parameter. */ | ||
2260 | if (params.assoc_value > 500) | ||
2261 | return -EINVAL; | ||
2262 | |||
2263 | /* Get association, if assoc_id != 0 and the socket is a one | ||
2264 | * to many style socket, and an association was not found, then | ||
2265 | * the id was invalid. | ||
2266 | */ | ||
2267 | asoc = sctp_id2assoc(sk, params.assoc_id); | ||
2268 | if (!asoc && params.assoc_id && sctp_style(sk, UDP)) | ||
2269 | return -EINVAL; | ||
2270 | |||
2271 | if (params.assoc_value) { | ||
2272 | if (asoc) { | ||
2273 | asoc->sackdelay = | ||
2274 | msecs_to_jiffies(params.assoc_value); | ||
2275 | asoc->param_flags = | ||
2276 | (asoc->param_flags & ~SPP_SACKDELAY) | | ||
2277 | SPP_SACKDELAY_ENABLE; | ||
2278 | } else { | ||
2279 | sp->sackdelay = params.assoc_value; | ||
2280 | sp->param_flags = | ||
2281 | (sp->param_flags & ~SPP_SACKDELAY) | | ||
2282 | SPP_SACKDELAY_ENABLE; | ||
2283 | } | ||
2284 | } else { | ||
2285 | if (asoc) { | ||
2286 | asoc->param_flags = | ||
2287 | (asoc->param_flags & ~SPP_SACKDELAY) | | ||
2288 | SPP_SACKDELAY_DISABLE; | ||
2289 | } else { | ||
2290 | sp->param_flags = | ||
2291 | (sp->param_flags & ~SPP_SACKDELAY) | | ||
2292 | SPP_SACKDELAY_DISABLE; | ||
2293 | } | ||
2294 | } | ||
2295 | |||
2296 | /* If change is for association, also apply to each transport. */ | ||
2297 | if (asoc) { | ||
2298 | struct list_head *pos; | ||
2299 | |||
2300 | list_for_each(pos, &asoc->peer.transport_addr_list) { | ||
2301 | trans = list_entry(pos, struct sctp_transport, | ||
2302 | transports); | ||
2303 | if (params.assoc_value) { | ||
2304 | trans->sackdelay = | ||
2305 | msecs_to_jiffies(params.assoc_value); | ||
2306 | trans->param_flags = | ||
2307 | (trans->param_flags & ~SPP_SACKDELAY) | | ||
2308 | SPP_SACKDELAY_ENABLE; | ||
2309 | } else { | ||
2310 | trans->param_flags = | ||
2311 | (trans->param_flags & ~SPP_SACKDELAY) | | ||
2312 | SPP_SACKDELAY_DISABLE; | ||
2313 | } | ||
2314 | } | ||
2315 | } | ||
2316 | |||
2045 | return 0; | 2317 | return 0; |
2046 | } | 2318 | } |
2047 | 2319 | ||
@@ -2334,7 +2606,7 @@ static int sctp_setsockopt_maxseg(struct sock *sk, char __user *optval, int optl | |||
2334 | /* Update the frag_point of the existing associations. */ | 2606 | /* Update the frag_point of the existing associations. */ |
2335 | list_for_each(pos, &(sp->ep->asocs)) { | 2607 | list_for_each(pos, &(sp->ep->asocs)) { |
2336 | asoc = list_entry(pos, struct sctp_association, asocs); | 2608 | asoc = list_entry(pos, struct sctp_association, asocs); |
2337 | asoc->frag_point = sctp_frag_point(sp, asoc->pmtu); | 2609 | asoc->frag_point = sctp_frag_point(sp, asoc->pathmtu); |
2338 | } | 2610 | } |
2339 | 2611 | ||
2340 | return 0; | 2612 | return 0; |
@@ -2491,6 +2763,10 @@ SCTP_STATIC int sctp_setsockopt(struct sock *sk, int level, int optname, | |||
2491 | retval = sctp_setsockopt_peer_addr_params(sk, optval, optlen); | 2763 | retval = sctp_setsockopt_peer_addr_params(sk, optval, optlen); |
2492 | break; | 2764 | break; |
2493 | 2765 | ||
2766 | case SCTP_DELAYED_ACK_TIME: | ||
2767 | retval = sctp_setsockopt_delayed_ack_time(sk, optval, optlen); | ||
2768 | break; | ||
2769 | |||
2494 | case SCTP_INITMSG: | 2770 | case SCTP_INITMSG: |
2495 | retval = sctp_setsockopt_initmsg(sk, optval, optlen); | 2771 | retval = sctp_setsockopt_initmsg(sk, optval, optlen); |
2496 | break; | 2772 | break; |
@@ -2715,8 +2991,13 @@ SCTP_STATIC int sctp_init_sock(struct sock *sk) | |||
2715 | /* Default Peer Address Parameters. These defaults can | 2991 | /* Default Peer Address Parameters. These defaults can |
2716 | * be modified via SCTP_PEER_ADDR_PARAMS | 2992 | * be modified via SCTP_PEER_ADDR_PARAMS |
2717 | */ | 2993 | */ |
2718 | sp->paddrparam.spp_hbinterval = jiffies_to_msecs(sctp_hb_interval); | 2994 | sp->hbinterval = jiffies_to_msecs(sctp_hb_interval); |
2719 | sp->paddrparam.spp_pathmaxrxt = sctp_max_retrans_path; | 2995 | sp->pathmaxrxt = sctp_max_retrans_path; |
2996 | sp->pathmtu = 0; // allow default discovery | ||
2997 | sp->sackdelay = sctp_sack_timeout; | ||
2998 | sp->param_flags = SPP_HB_ENABLE | | ||
2999 | SPP_PMTUD_ENABLE | | ||
3000 | SPP_SACKDELAY_ENABLE; | ||
2720 | 3001 | ||
2721 | /* If enabled no SCTP message fragmentation will be performed. | 3002 | /* If enabled no SCTP message fragmentation will be performed. |
2722 | * Configure through SCTP_DISABLE_FRAGMENTS socket option. | 3003 | * Configure through SCTP_DISABLE_FRAGMENTS socket option. |
@@ -2865,7 +3146,7 @@ static int sctp_getsockopt_sctp_status(struct sock *sk, int len, | |||
2865 | status.sstat_primary.spinfo_cwnd = transport->cwnd; | 3146 | status.sstat_primary.spinfo_cwnd = transport->cwnd; |
2866 | status.sstat_primary.spinfo_srtt = transport->srtt; | 3147 | status.sstat_primary.spinfo_srtt = transport->srtt; |
2867 | status.sstat_primary.spinfo_rto = jiffies_to_msecs(transport->rto); | 3148 | status.sstat_primary.spinfo_rto = jiffies_to_msecs(transport->rto); |
2868 | status.sstat_primary.spinfo_mtu = transport->pmtu; | 3149 | status.sstat_primary.spinfo_mtu = transport->pathmtu; |
2869 | 3150 | ||
2870 | if (status.sstat_primary.spinfo_state == SCTP_UNKNOWN) | 3151 | if (status.sstat_primary.spinfo_state == SCTP_UNKNOWN) |
2871 | status.sstat_primary.spinfo_state = SCTP_ACTIVE; | 3152 | status.sstat_primary.spinfo_state = SCTP_ACTIVE; |
@@ -2924,7 +3205,7 @@ static int sctp_getsockopt_peer_addr_info(struct sock *sk, int len, | |||
2924 | pinfo.spinfo_cwnd = transport->cwnd; | 3205 | pinfo.spinfo_cwnd = transport->cwnd; |
2925 | pinfo.spinfo_srtt = transport->srtt; | 3206 | pinfo.spinfo_srtt = transport->srtt; |
2926 | pinfo.spinfo_rto = jiffies_to_msecs(transport->rto); | 3207 | pinfo.spinfo_rto = jiffies_to_msecs(transport->rto); |
2927 | pinfo.spinfo_mtu = transport->pmtu; | 3208 | pinfo.spinfo_mtu = transport->pathmtu; |
2928 | 3209 | ||
2929 | if (pinfo.spinfo_state == SCTP_UNKNOWN) | 3210 | if (pinfo.spinfo_state == SCTP_UNKNOWN) |
2930 | pinfo.spinfo_state = SCTP_ACTIVE; | 3211 | pinfo.spinfo_state = SCTP_ACTIVE; |
@@ -3086,69 +3367,227 @@ out: | |||
3086 | * address's parameters: | 3367 | * address's parameters: |
3087 | * | 3368 | * |
3088 | * struct sctp_paddrparams { | 3369 | * struct sctp_paddrparams { |
3089 | * sctp_assoc_t spp_assoc_id; | 3370 | * sctp_assoc_t spp_assoc_id; |
3090 | * struct sockaddr_storage spp_address; | 3371 | * struct sockaddr_storage spp_address; |
3091 | * uint32_t spp_hbinterval; | 3372 | * uint32_t spp_hbinterval; |
3092 | * uint16_t spp_pathmaxrxt; | 3373 | * uint16_t spp_pathmaxrxt; |
3093 | * }; | 3374 | * uint32_t spp_pathmtu; |
3094 | * | 3375 | * uint32_t spp_sackdelay; |
3095 | * spp_assoc_id - (UDP style socket) This is filled in the application, | 3376 | * uint32_t spp_flags; |
3096 | * and identifies the association for this query. | 3377 | * }; |
3378 | * | ||
3379 | * spp_assoc_id - (one-to-many style socket) This is filled in the | ||
3380 | * application, and identifies the association for | ||
3381 | * this query. | ||
3097 | * spp_address - This specifies which address is of interest. | 3382 | * spp_address - This specifies which address is of interest. |
3098 | * spp_hbinterval - This contains the value of the heartbeat interval, | 3383 | * spp_hbinterval - This contains the value of the heartbeat interval, |
3099 | * in milliseconds. A value of 0, when modifying the | 3384 | * in milliseconds. If a value of zero |
3100 | * parameter, specifies that the heartbeat on this | 3385 | * is present in this field then no changes are to |
3101 | * address should be disabled. A value of UINT32_MAX | 3386 | * be made to this parameter. |
3102 | * (4294967295), when modifying the parameter, | ||
3103 | * specifies that a heartbeat should be sent | ||
3104 | * immediately to the peer address, and the current | ||
3105 | * interval should remain unchanged. | ||
3106 | * spp_pathmaxrxt - This contains the maximum number of | 3387 | * spp_pathmaxrxt - This contains the maximum number of |
3107 | * retransmissions before this address shall be | 3388 | * retransmissions before this address shall be |
3108 | * considered unreachable. | 3389 | * considered unreachable. If a value of zero |
3390 | * is present in this field then no changes are to | ||
3391 | * be made to this parameter. | ||
3392 | * spp_pathmtu - When Path MTU discovery is disabled the value | ||
3393 | * specified here will be the "fixed" path mtu. | ||
3394 | * Note that if the spp_address field is empty | ||
3395 | * then all associations on this address will | ||
3396 | * have this fixed path mtu set upon them. | ||
3397 | * | ||
3398 | * spp_sackdelay - When delayed sack is enabled, this value specifies | ||
3399 | * the number of milliseconds that sacks will be delayed | ||
3400 | * for. This value will apply to all addresses of an | ||
3401 | * association if the spp_address field is empty. Note | ||
3402 | * also, that if delayed sack is enabled and this | ||
3403 | * value is set to 0, no change is made to the last | ||
3404 | * recorded delayed sack timer value. | ||
3405 | * | ||
3406 | * spp_flags - These flags are used to control various features | ||
3407 | * on an association. The flag field may contain | ||
3408 | * zero or more of the following options. | ||
3409 | * | ||
3410 | * SPP_HB_ENABLE - Enable heartbeats on the | ||
3411 | * specified address. Note that if the address | ||
3412 | * field is empty all addresses for the association | ||
3413 | * have heartbeats enabled upon them. | ||
3414 | * | ||
3415 | * SPP_HB_DISABLE - Disable heartbeats on the | ||
3416 | * speicifed address. Note that if the address | ||
3417 | * field is empty all addresses for the association | ||
3418 | * will have their heartbeats disabled. Note also | ||
3419 | * that SPP_HB_ENABLE and SPP_HB_DISABLE are | ||
3420 | * mutually exclusive, only one of these two should | ||
3421 | * be specified. Enabling both fields will have | ||
3422 | * undetermined results. | ||
3423 | * | ||
3424 | * SPP_HB_DEMAND - Request a user initiated heartbeat | ||
3425 | * to be made immediately. | ||
3426 | * | ||
3427 | * SPP_PMTUD_ENABLE - This field will enable PMTU | ||
3428 | * discovery upon the specified address. Note that | ||
3429 | * if the address feild is empty then all addresses | ||
3430 | * on the association are effected. | ||
3431 | * | ||
3432 | * SPP_PMTUD_DISABLE - This field will disable PMTU | ||
3433 | * discovery upon the specified address. Note that | ||
3434 | * if the address feild is empty then all addresses | ||
3435 | * on the association are effected. Not also that | ||
3436 | * SPP_PMTUD_ENABLE and SPP_PMTUD_DISABLE are mutually | ||
3437 | * exclusive. Enabling both will have undetermined | ||
3438 | * results. | ||
3439 | * | ||
3440 | * SPP_SACKDELAY_ENABLE - Setting this flag turns | ||
3441 | * on delayed sack. The time specified in spp_sackdelay | ||
3442 | * is used to specify the sack delay for this address. Note | ||
3443 | * that if spp_address is empty then all addresses will | ||
3444 | * enable delayed sack and take on the sack delay | ||
3445 | * value specified in spp_sackdelay. | ||
3446 | * SPP_SACKDELAY_DISABLE - Setting this flag turns | ||
3447 | * off delayed sack. If the spp_address field is blank then | ||
3448 | * delayed sack is disabled for the entire association. Note | ||
3449 | * also that this field is mutually exclusive to | ||
3450 | * SPP_SACKDELAY_ENABLE, setting both will have undefined | ||
3451 | * results. | ||
3109 | */ | 3452 | */ |
3110 | static int sctp_getsockopt_peer_addr_params(struct sock *sk, int len, | 3453 | static int sctp_getsockopt_peer_addr_params(struct sock *sk, int len, |
3111 | char __user *optval, int __user *optlen) | 3454 | char __user *optval, int __user *optlen) |
3112 | { | 3455 | { |
3113 | struct sctp_paddrparams params; | 3456 | struct sctp_paddrparams params; |
3114 | struct sctp_transport *trans; | 3457 | struct sctp_transport *trans = NULL; |
3458 | struct sctp_association *asoc = NULL; | ||
3459 | struct sctp_sock *sp = sctp_sk(sk); | ||
3115 | 3460 | ||
3116 | if (len != sizeof(struct sctp_paddrparams)) | 3461 | if (len != sizeof(struct sctp_paddrparams)) |
3117 | return -EINVAL; | 3462 | return -EINVAL; |
3463 | |||
3118 | if (copy_from_user(¶ms, optval, len)) | 3464 | if (copy_from_user(¶ms, optval, len)) |
3119 | return -EFAULT; | 3465 | return -EFAULT; |
3120 | 3466 | ||
3121 | /* If no association id is specified retrieve the default value | 3467 | /* If an address other than INADDR_ANY is specified, and |
3122 | * for the endpoint that will be used for all future associations | 3468 | * no transport is found, then the request is invalid. |
3123 | */ | 3469 | */ |
3124 | if (!params.spp_assoc_id && | 3470 | if (!sctp_is_any(( union sctp_addr *)¶ms.spp_address)) { |
3125 | sctp_is_any(( union sctp_addr *)¶ms.spp_address)) { | 3471 | trans = sctp_addr_id2transport(sk, ¶ms.spp_address, |
3126 | params.spp_hbinterval = sctp_sk(sk)->paddrparam.spp_hbinterval; | 3472 | params.spp_assoc_id); |
3127 | params.spp_pathmaxrxt = sctp_sk(sk)->paddrparam.spp_pathmaxrxt; | 3473 | if (!trans) { |
3128 | 3474 | SCTP_DEBUG_PRINTK("Failed no transport\n"); | |
3129 | goto done; | 3475 | return -EINVAL; |
3476 | } | ||
3130 | } | 3477 | } |
3131 | 3478 | ||
3132 | trans = sctp_addr_id2transport(sk, ¶ms.spp_address, | 3479 | /* Get association, if assoc_id != 0 and the socket is a one |
3133 | params.spp_assoc_id); | 3480 | * to many style socket, and an association was not found, then |
3134 | if (!trans) | 3481 | * the id was invalid. |
3482 | */ | ||
3483 | asoc = sctp_id2assoc(sk, params.spp_assoc_id); | ||
3484 | if (!asoc && params.spp_assoc_id && sctp_style(sk, UDP)) { | ||
3485 | SCTP_DEBUG_PRINTK("Failed no association\n"); | ||
3135 | return -EINVAL; | 3486 | return -EINVAL; |
3487 | } | ||
3136 | 3488 | ||
3137 | /* The value of the heartbeat interval, in milliseconds. A value of 0, | 3489 | if (trans) { |
3138 | * when modifying the parameter, specifies that the heartbeat on this | 3490 | /* Fetch transport values. */ |
3139 | * address should be disabled. | 3491 | params.spp_hbinterval = jiffies_to_msecs(trans->hbinterval); |
3140 | */ | 3492 | params.spp_pathmtu = trans->pathmtu; |
3141 | if (!trans->hb_allowed) | 3493 | params.spp_pathmaxrxt = trans->pathmaxrxt; |
3142 | params.spp_hbinterval = 0; | 3494 | params.spp_sackdelay = jiffies_to_msecs(trans->sackdelay); |
3143 | else | 3495 | |
3144 | params.spp_hbinterval = jiffies_to_msecs(trans->hb_interval); | 3496 | /*draft-11 doesn't say what to return in spp_flags*/ |
3497 | params.spp_flags = trans->param_flags; | ||
3498 | } else if (asoc) { | ||
3499 | /* Fetch association values. */ | ||
3500 | params.spp_hbinterval = jiffies_to_msecs(asoc->hbinterval); | ||
3501 | params.spp_pathmtu = asoc->pathmtu; | ||
3502 | params.spp_pathmaxrxt = asoc->pathmaxrxt; | ||
3503 | params.spp_sackdelay = jiffies_to_msecs(asoc->sackdelay); | ||
3504 | |||
3505 | /*draft-11 doesn't say what to return in spp_flags*/ | ||
3506 | params.spp_flags = asoc->param_flags; | ||
3507 | } else { | ||
3508 | /* Fetch socket values. */ | ||
3509 | params.spp_hbinterval = sp->hbinterval; | ||
3510 | params.spp_pathmtu = sp->pathmtu; | ||
3511 | params.spp_sackdelay = sp->sackdelay; | ||
3512 | params.spp_pathmaxrxt = sp->pathmaxrxt; | ||
3513 | |||
3514 | /*draft-11 doesn't say what to return in spp_flags*/ | ||
3515 | params.spp_flags = sp->param_flags; | ||
3516 | } | ||
3145 | 3517 | ||
3146 | /* spp_pathmaxrxt contains the maximum number of retransmissions | 3518 | if (copy_to_user(optval, ¶ms, len)) |
3147 | * before this address shall be considered unreachable. | 3519 | return -EFAULT; |
3148 | */ | 3520 | |
3149 | params.spp_pathmaxrxt = trans->max_retrans; | 3521 | if (put_user(len, optlen)) |
3522 | return -EFAULT; | ||
3523 | |||
3524 | return 0; | ||
3525 | } | ||
3526 | |||
3527 | /* 7.1.24. Delayed Ack Timer (SCTP_DELAYED_ACK_TIME) | ||
3528 | * | ||
3529 | * This options will get or set the delayed ack timer. The time is set | ||
3530 | * in milliseconds. If the assoc_id is 0, then this sets or gets the | ||
3531 | * endpoints default delayed ack timer value. If the assoc_id field is | ||
3532 | * non-zero, then the set or get effects the specified association. | ||
3533 | * | ||
3534 | * struct sctp_assoc_value { | ||
3535 | * sctp_assoc_t assoc_id; | ||
3536 | * uint32_t assoc_value; | ||
3537 | * }; | ||
3538 | * | ||
3539 | * assoc_id - This parameter, indicates which association the | ||
3540 | * user is preforming an action upon. Note that if | ||
3541 | * this field's value is zero then the endpoints | ||
3542 | * default value is changed (effecting future | ||
3543 | * associations only). | ||
3544 | * | ||
3545 | * assoc_value - This parameter contains the number of milliseconds | ||
3546 | * that the user is requesting the delayed ACK timer | ||
3547 | * be set to. Note that this value is defined in | ||
3548 | * the standard to be between 200 and 500 milliseconds. | ||
3549 | * | ||
3550 | * Note: a value of zero will leave the value alone, | ||
3551 | * but disable SACK delay. A non-zero value will also | ||
3552 | * enable SACK delay. | ||
3553 | */ | ||
3554 | static int sctp_getsockopt_delayed_ack_time(struct sock *sk, int len, | ||
3555 | char __user *optval, | ||
3556 | int __user *optlen) | ||
3557 | { | ||
3558 | struct sctp_assoc_value params; | ||
3559 | struct sctp_association *asoc = NULL; | ||
3560 | struct sctp_sock *sp = sctp_sk(sk); | ||
3561 | |||
3562 | if (len != sizeof(struct sctp_assoc_value)) | ||
3563 | return - EINVAL; | ||
3564 | |||
3565 | if (copy_from_user(¶ms, optval, len)) | ||
3566 | return -EFAULT; | ||
3567 | |||
3568 | /* Get association, if assoc_id != 0 and the socket is a one | ||
3569 | * to many style socket, and an association was not found, then | ||
3570 | * the id was invalid. | ||
3571 | */ | ||
3572 | asoc = sctp_id2assoc(sk, params.assoc_id); | ||
3573 | if (!asoc && params.assoc_id && sctp_style(sk, UDP)) | ||
3574 | return -EINVAL; | ||
3575 | |||
3576 | if (asoc) { | ||
3577 | /* Fetch association values. */ | ||
3578 | if (asoc->param_flags & SPP_SACKDELAY_ENABLE) | ||
3579 | params.assoc_value = jiffies_to_msecs( | ||
3580 | asoc->sackdelay); | ||
3581 | else | ||
3582 | params.assoc_value = 0; | ||
3583 | } else { | ||
3584 | /* Fetch socket values. */ | ||
3585 | if (sp->param_flags & SPP_SACKDELAY_ENABLE) | ||
3586 | params.assoc_value = sp->sackdelay; | ||
3587 | else | ||
3588 | params.assoc_value = 0; | ||
3589 | } | ||
3150 | 3590 | ||
3151 | done: | ||
3152 | if (copy_to_user(optval, ¶ms, len)) | 3591 | if (copy_to_user(optval, ¶ms, len)) |
3153 | return -EFAULT; | 3592 | return -EFAULT; |
3154 | 3593 | ||
@@ -4015,6 +4454,10 @@ SCTP_STATIC int sctp_getsockopt(struct sock *sk, int level, int optname, | |||
4015 | retval = sctp_getsockopt_peer_addr_params(sk, len, optval, | 4454 | retval = sctp_getsockopt_peer_addr_params(sk, len, optval, |
4016 | optlen); | 4455 | optlen); |
4017 | break; | 4456 | break; |
4457 | case SCTP_DELAYED_ACK_TIME: | ||
4458 | retval = sctp_getsockopt_delayed_ack_time(sk, len, optval, | ||
4459 | optlen); | ||
4460 | break; | ||
4018 | case SCTP_INITMSG: | 4461 | case SCTP_INITMSG: |
4019 | retval = sctp_getsockopt_initmsg(sk, len, optval, optlen); | 4462 | retval = sctp_getsockopt_initmsg(sk, len, optval, optlen); |
4020 | break; | 4463 | break; |
diff --git a/net/sctp/transport.c b/net/sctp/transport.c index 268ddaf2dc0f..68d73e2dd155 100644 --- a/net/sctp/transport.c +++ b/net/sctp/transport.c | |||
@@ -86,10 +86,13 @@ static struct sctp_transport *sctp_transport_init(struct sctp_transport *peer, | |||
86 | peer->init_sent_count = 0; | 86 | peer->init_sent_count = 0; |
87 | 87 | ||
88 | peer->state = SCTP_ACTIVE; | 88 | peer->state = SCTP_ACTIVE; |
89 | peer->hb_allowed = 0; | 89 | peer->param_flags = SPP_HB_DISABLE | |
90 | SPP_PMTUD_ENABLE | | ||
91 | SPP_SACKDELAY_ENABLE; | ||
92 | peer->hbinterval = 0; | ||
90 | 93 | ||
91 | /* Initialize the default path max_retrans. */ | 94 | /* Initialize the default path max_retrans. */ |
92 | peer->max_retrans = sctp_max_retrans_path; | 95 | peer->pathmaxrxt = sctp_max_retrans_path; |
93 | peer->error_count = 0; | 96 | peer->error_count = 0; |
94 | 97 | ||
95 | INIT_LIST_HEAD(&peer->transmitted); | 98 | INIT_LIST_HEAD(&peer->transmitted); |
@@ -229,10 +232,10 @@ void sctp_transport_pmtu(struct sctp_transport *transport) | |||
229 | dst = transport->af_specific->get_dst(NULL, &transport->ipaddr, NULL); | 232 | dst = transport->af_specific->get_dst(NULL, &transport->ipaddr, NULL); |
230 | 233 | ||
231 | if (dst) { | 234 | if (dst) { |
232 | transport->pmtu = dst_mtu(dst); | 235 | transport->pathmtu = dst_mtu(dst); |
233 | dst_release(dst); | 236 | dst_release(dst); |
234 | } else | 237 | } else |
235 | transport->pmtu = SCTP_DEFAULT_MAXSEGMENT; | 238 | transport->pathmtu = SCTP_DEFAULT_MAXSEGMENT; |
236 | } | 239 | } |
237 | 240 | ||
238 | /* Caches the dst entry and source address for a transport's destination | 241 | /* Caches the dst entry and source address for a transport's destination |
@@ -254,8 +257,11 @@ void sctp_transport_route(struct sctp_transport *transport, | |||
254 | af->get_saddr(asoc, dst, daddr, &transport->saddr); | 257 | af->get_saddr(asoc, dst, daddr, &transport->saddr); |
255 | 258 | ||
256 | transport->dst = dst; | 259 | transport->dst = dst; |
260 | if ((transport->param_flags & SPP_PMTUD_DISABLE) && transport->pathmtu) { | ||
261 | return; | ||
262 | } | ||
257 | if (dst) { | 263 | if (dst) { |
258 | transport->pmtu = dst_mtu(dst); | 264 | transport->pathmtu = dst_mtu(dst); |
259 | 265 | ||
260 | /* Initialize sk->sk_rcv_saddr, if the transport is the | 266 | /* Initialize sk->sk_rcv_saddr, if the transport is the |
261 | * association's active path for getsockname(). | 267 | * association's active path for getsockname(). |
@@ -264,7 +270,7 @@ void sctp_transport_route(struct sctp_transport *transport, | |||
264 | opt->pf->af->to_sk_saddr(&transport->saddr, | 270 | opt->pf->af->to_sk_saddr(&transport->saddr, |
265 | asoc->base.sk); | 271 | asoc->base.sk); |
266 | } else | 272 | } else |
267 | transport->pmtu = SCTP_DEFAULT_MAXSEGMENT; | 273 | transport->pathmtu = SCTP_DEFAULT_MAXSEGMENT; |
268 | } | 274 | } |
269 | 275 | ||
270 | /* Hold a reference to a transport. */ | 276 | /* Hold a reference to a transport. */ |
@@ -369,7 +375,7 @@ void sctp_transport_raise_cwnd(struct sctp_transport *transport, | |||
369 | 375 | ||
370 | ssthresh = transport->ssthresh; | 376 | ssthresh = transport->ssthresh; |
371 | pba = transport->partial_bytes_acked; | 377 | pba = transport->partial_bytes_acked; |
372 | pmtu = transport->asoc->pmtu; | 378 | pmtu = transport->asoc->pathmtu; |
373 | 379 | ||
374 | if (cwnd <= ssthresh) { | 380 | if (cwnd <= ssthresh) { |
375 | /* RFC 2960 7.2.1, sctpimpguide-05 2.14.2 When cwnd is less | 381 | /* RFC 2960 7.2.1, sctpimpguide-05 2.14.2 When cwnd is less |
@@ -441,8 +447,8 @@ void sctp_transport_lower_cwnd(struct sctp_transport *transport, | |||
441 | * partial_bytes_acked = 0 | 447 | * partial_bytes_acked = 0 |
442 | */ | 448 | */ |
443 | transport->ssthresh = max(transport->cwnd/2, | 449 | transport->ssthresh = max(transport->cwnd/2, |
444 | 4*transport->asoc->pmtu); | 450 | 4*transport->asoc->pathmtu); |
445 | transport->cwnd = transport->asoc->pmtu; | 451 | transport->cwnd = transport->asoc->pathmtu; |
446 | break; | 452 | break; |
447 | 453 | ||
448 | case SCTP_LOWER_CWND_FAST_RTX: | 454 | case SCTP_LOWER_CWND_FAST_RTX: |
@@ -459,7 +465,7 @@ void sctp_transport_lower_cwnd(struct sctp_transport *transport, | |||
459 | * partial_bytes_acked = 0 | 465 | * partial_bytes_acked = 0 |
460 | */ | 466 | */ |
461 | transport->ssthresh = max(transport->cwnd/2, | 467 | transport->ssthresh = max(transport->cwnd/2, |
462 | 4*transport->asoc->pmtu); | 468 | 4*transport->asoc->pathmtu); |
463 | transport->cwnd = transport->ssthresh; | 469 | transport->cwnd = transport->ssthresh; |
464 | break; | 470 | break; |
465 | 471 | ||
@@ -479,7 +485,7 @@ void sctp_transport_lower_cwnd(struct sctp_transport *transport, | |||
479 | if ((jiffies - transport->last_time_ecne_reduced) > | 485 | if ((jiffies - transport->last_time_ecne_reduced) > |
480 | transport->rtt) { | 486 | transport->rtt) { |
481 | transport->ssthresh = max(transport->cwnd/2, | 487 | transport->ssthresh = max(transport->cwnd/2, |
482 | 4*transport->asoc->pmtu); | 488 | 4*transport->asoc->pathmtu); |
483 | transport->cwnd = transport->ssthresh; | 489 | transport->cwnd = transport->ssthresh; |
484 | transport->last_time_ecne_reduced = jiffies; | 490 | transport->last_time_ecne_reduced = jiffies; |
485 | } | 491 | } |
@@ -496,7 +502,7 @@ void sctp_transport_lower_cwnd(struct sctp_transport *transport, | |||
496 | */ | 502 | */ |
497 | if ((jiffies - transport->last_time_used) > transport->rto) | 503 | if ((jiffies - transport->last_time_used) > transport->rto) |
498 | transport->cwnd = max(transport->cwnd/2, | 504 | transport->cwnd = max(transport->cwnd/2, |
499 | 4*transport->asoc->pmtu); | 505 | 4*transport->asoc->pathmtu); |
500 | break; | 506 | break; |
501 | }; | 507 | }; |
502 | 508 | ||
@@ -511,7 +517,7 @@ void sctp_transport_lower_cwnd(struct sctp_transport *transport, | |||
511 | unsigned long sctp_transport_timeout(struct sctp_transport *t) | 517 | unsigned long sctp_transport_timeout(struct sctp_transport *t) |
512 | { | 518 | { |
513 | unsigned long timeout; | 519 | unsigned long timeout; |
514 | timeout = t->hb_interval + t->rto + sctp_jitter(t->rto); | 520 | timeout = t->hbinterval + t->rto + sctp_jitter(t->rto); |
515 | timeout += jiffies; | 521 | timeout += jiffies; |
516 | return timeout; | 522 | return timeout; |
517 | } | 523 | } |
diff --git a/net/socket.c b/net/socket.c index 3145103cdf54..06fa217f58a9 100644 --- a/net/socket.c +++ b/net/socket.c | |||
@@ -640,154 +640,150 @@ static void sock_aio_dtor(struct kiocb *iocb) | |||
640 | kfree(iocb->private); | 640 | kfree(iocb->private); |
641 | } | 641 | } |
642 | 642 | ||
643 | /* | 643 | static ssize_t sock_sendpage(struct file *file, struct page *page, |
644 | * Read data from a socket. ubuf is a user mode pointer. We make sure the user | 644 | int offset, size_t size, loff_t *ppos, int more) |
645 | * area ubuf...ubuf+size-1 is writable before asking the protocol. | ||
646 | */ | ||
647 | |||
648 | static ssize_t sock_aio_read(struct kiocb *iocb, char __user *ubuf, | ||
649 | size_t size, loff_t pos) | ||
650 | { | 645 | { |
651 | struct sock_iocb *x, siocb; | ||
652 | struct socket *sock; | 646 | struct socket *sock; |
653 | int flags; | 647 | int flags; |
654 | 648 | ||
655 | if (pos != 0) | 649 | sock = file->private_data; |
656 | return -ESPIPE; | ||
657 | if (size==0) /* Match SYS5 behaviour */ | ||
658 | return 0; | ||
659 | 650 | ||
660 | if (is_sync_kiocb(iocb)) | 651 | flags = !(file->f_flags & O_NONBLOCK) ? 0 : MSG_DONTWAIT; |
661 | x = &siocb; | 652 | if (more) |
662 | else { | 653 | flags |= MSG_MORE; |
663 | x = kmalloc(sizeof(struct sock_iocb), GFP_KERNEL); | 654 | |
664 | if (!x) | 655 | return sock->ops->sendpage(sock, page, offset, size, flags); |
665 | return -ENOMEM; | 656 | } |
657 | |||
658 | static struct sock_iocb *alloc_sock_iocb(struct kiocb *iocb, | ||
659 | char __user *ubuf, size_t size, struct sock_iocb *siocb) | ||
660 | { | ||
661 | if (!is_sync_kiocb(iocb)) { | ||
662 | siocb = kmalloc(sizeof(*siocb), GFP_KERNEL); | ||
663 | if (!siocb) | ||
664 | return NULL; | ||
666 | iocb->ki_dtor = sock_aio_dtor; | 665 | iocb->ki_dtor = sock_aio_dtor; |
667 | } | 666 | } |
668 | iocb->private = x; | ||
669 | x->kiocb = iocb; | ||
670 | sock = iocb->ki_filp->private_data; | ||
671 | 667 | ||
672 | x->async_msg.msg_name = NULL; | 668 | siocb->kiocb = iocb; |
673 | x->async_msg.msg_namelen = 0; | 669 | siocb->async_iov.iov_base = ubuf; |
674 | x->async_msg.msg_iov = &x->async_iov; | 670 | siocb->async_iov.iov_len = size; |
675 | x->async_msg.msg_iovlen = 1; | ||
676 | x->async_msg.msg_control = NULL; | ||
677 | x->async_msg.msg_controllen = 0; | ||
678 | x->async_iov.iov_base = ubuf; | ||
679 | x->async_iov.iov_len = size; | ||
680 | flags = !(iocb->ki_filp->f_flags & O_NONBLOCK) ? 0 : MSG_DONTWAIT; | ||
681 | 671 | ||
682 | return __sock_recvmsg(iocb, sock, &x->async_msg, size, flags); | 672 | iocb->private = siocb; |
673 | return siocb; | ||
683 | } | 674 | } |
684 | 675 | ||
676 | static ssize_t do_sock_read(struct msghdr *msg, struct kiocb *iocb, | ||
677 | struct file *file, struct iovec *iov, unsigned long nr_segs) | ||
678 | { | ||
679 | struct socket *sock = file->private_data; | ||
680 | size_t size = 0; | ||
681 | int i; | ||
685 | 682 | ||
686 | /* | 683 | for (i = 0 ; i < nr_segs ; i++) |
687 | * Write data to a socket. We verify that the user area ubuf..ubuf+size-1 | 684 | size += iov[i].iov_len; |
688 | * is readable by the user process. | ||
689 | */ | ||
690 | 685 | ||
691 | static ssize_t sock_aio_write(struct kiocb *iocb, const char __user *ubuf, | 686 | msg->msg_name = NULL; |
692 | size_t size, loff_t pos) | 687 | msg->msg_namelen = 0; |
688 | msg->msg_control = NULL; | ||
689 | msg->msg_controllen = 0; | ||
690 | msg->msg_iov = (struct iovec *) iov; | ||
691 | msg->msg_iovlen = nr_segs; | ||
692 | msg->msg_flags = (file->f_flags & O_NONBLOCK) ? MSG_DONTWAIT : 0; | ||
693 | |||
694 | return __sock_recvmsg(iocb, sock, msg, size, msg->msg_flags); | ||
695 | } | ||
696 | |||
697 | static ssize_t sock_readv(struct file *file, const struct iovec *iov, | ||
698 | unsigned long nr_segs, loff_t *ppos) | ||
693 | { | 699 | { |
694 | struct sock_iocb *x, siocb; | 700 | struct kiocb iocb; |
695 | struct socket *sock; | 701 | struct sock_iocb siocb; |
696 | 702 | struct msghdr msg; | |
703 | int ret; | ||
704 | |||
705 | init_sync_kiocb(&iocb, NULL); | ||
706 | iocb.private = &siocb; | ||
707 | |||
708 | ret = do_sock_read(&msg, &iocb, file, (struct iovec *)iov, nr_segs); | ||
709 | if (-EIOCBQUEUED == ret) | ||
710 | ret = wait_on_sync_kiocb(&iocb); | ||
711 | return ret; | ||
712 | } | ||
713 | |||
714 | static ssize_t sock_aio_read(struct kiocb *iocb, char __user *ubuf, | ||
715 | size_t count, loff_t pos) | ||
716 | { | ||
717 | struct sock_iocb siocb, *x; | ||
718 | |||
697 | if (pos != 0) | 719 | if (pos != 0) |
698 | return -ESPIPE; | 720 | return -ESPIPE; |
699 | if(size==0) /* Match SYS5 behaviour */ | 721 | if (count == 0) /* Match SYS5 behaviour */ |
700 | return 0; | 722 | return 0; |
701 | 723 | ||
702 | if (is_sync_kiocb(iocb)) | 724 | x = alloc_sock_iocb(iocb, ubuf, count, &siocb); |
703 | x = &siocb; | 725 | if (!x) |
704 | else { | 726 | return -ENOMEM; |
705 | x = kmalloc(sizeof(struct sock_iocb), GFP_KERNEL); | 727 | return do_sock_read(&x->async_msg, iocb, iocb->ki_filp, |
706 | if (!x) | 728 | &x->async_iov, 1); |
707 | return -ENOMEM; | ||
708 | iocb->ki_dtor = sock_aio_dtor; | ||
709 | } | ||
710 | iocb->private = x; | ||
711 | x->kiocb = iocb; | ||
712 | sock = iocb->ki_filp->private_data; | ||
713 | |||
714 | x->async_msg.msg_name = NULL; | ||
715 | x->async_msg.msg_namelen = 0; | ||
716 | x->async_msg.msg_iov = &x->async_iov; | ||
717 | x->async_msg.msg_iovlen = 1; | ||
718 | x->async_msg.msg_control = NULL; | ||
719 | x->async_msg.msg_controllen = 0; | ||
720 | x->async_msg.msg_flags = !(iocb->ki_filp->f_flags & O_NONBLOCK) ? 0 : MSG_DONTWAIT; | ||
721 | if (sock->type == SOCK_SEQPACKET) | ||
722 | x->async_msg.msg_flags |= MSG_EOR; | ||
723 | x->async_iov.iov_base = (void __user *)ubuf; | ||
724 | x->async_iov.iov_len = size; | ||
725 | |||
726 | return __sock_sendmsg(iocb, sock, &x->async_msg, size); | ||
727 | } | 729 | } |
728 | 730 | ||
729 | static ssize_t sock_sendpage(struct file *file, struct page *page, | 731 | static ssize_t do_sock_write(struct msghdr *msg, struct kiocb *iocb, |
730 | int offset, size_t size, loff_t *ppos, int more) | 732 | struct file *file, struct iovec *iov, unsigned long nr_segs) |
731 | { | 733 | { |
732 | struct socket *sock; | 734 | struct socket *sock = file->private_data; |
733 | int flags; | 735 | size_t size = 0; |
736 | int i; | ||
734 | 737 | ||
735 | sock = file->private_data; | 738 | for (i = 0 ; i < nr_segs ; i++) |
739 | size += iov[i].iov_len; | ||
736 | 740 | ||
737 | flags = !(file->f_flags & O_NONBLOCK) ? 0 : MSG_DONTWAIT; | 741 | msg->msg_name = NULL; |
738 | if (more) | 742 | msg->msg_namelen = 0; |
739 | flags |= MSG_MORE; | 743 | msg->msg_control = NULL; |
744 | msg->msg_controllen = 0; | ||
745 | msg->msg_iov = (struct iovec *) iov; | ||
746 | msg->msg_iovlen = nr_segs; | ||
747 | msg->msg_flags = (file->f_flags & O_NONBLOCK) ? MSG_DONTWAIT : 0; | ||
748 | if (sock->type == SOCK_SEQPACKET) | ||
749 | msg->msg_flags |= MSG_EOR; | ||
740 | 750 | ||
741 | return sock->ops->sendpage(sock, page, offset, size, flags); | 751 | return __sock_sendmsg(iocb, sock, msg, size); |
742 | } | 752 | } |
743 | 753 | ||
744 | static int sock_readv_writev(int type, | 754 | static ssize_t sock_writev(struct file *file, const struct iovec *iov, |
745 | struct file * file, const struct iovec * iov, | 755 | unsigned long nr_segs, loff_t *ppos) |
746 | long count, size_t size) | ||
747 | { | 756 | { |
748 | struct msghdr msg; | 757 | struct msghdr msg; |
749 | struct socket *sock; | 758 | struct kiocb iocb; |
759 | struct sock_iocb siocb; | ||
760 | int ret; | ||
750 | 761 | ||
751 | sock = file->private_data; | 762 | init_sync_kiocb(&iocb, NULL); |
763 | iocb.private = &siocb; | ||
752 | 764 | ||
753 | msg.msg_name = NULL; | 765 | ret = do_sock_write(&msg, &iocb, file, (struct iovec *)iov, nr_segs); |
754 | msg.msg_namelen = 0; | 766 | if (-EIOCBQUEUED == ret) |
755 | msg.msg_control = NULL; | 767 | ret = wait_on_sync_kiocb(&iocb); |
756 | msg.msg_controllen = 0; | 768 | return ret; |
757 | msg.msg_iov = (struct iovec *) iov; | 769 | } |
758 | msg.msg_iovlen = count; | ||
759 | msg.msg_flags = (file->f_flags & O_NONBLOCK) ? MSG_DONTWAIT : 0; | ||
760 | 770 | ||
761 | /* read() does a VERIFY_WRITE */ | 771 | static ssize_t sock_aio_write(struct kiocb *iocb, const char __user *ubuf, |
762 | if (type == VERIFY_WRITE) | 772 | size_t count, loff_t pos) |
763 | return sock_recvmsg(sock, &msg, size, msg.msg_flags); | 773 | { |
774 | struct sock_iocb siocb, *x; | ||
764 | 775 | ||
765 | if (sock->type == SOCK_SEQPACKET) | 776 | if (pos != 0) |
766 | msg.msg_flags |= MSG_EOR; | 777 | return -ESPIPE; |
778 | if (count == 0) /* Match SYS5 behaviour */ | ||
779 | return 0; | ||
767 | 780 | ||
768 | return sock_sendmsg(sock, &msg, size); | 781 | x = alloc_sock_iocb(iocb, (void __user *)ubuf, count, &siocb); |
769 | } | 782 | if (!x) |
783 | return -ENOMEM; | ||
770 | 784 | ||
771 | static ssize_t sock_readv(struct file *file, const struct iovec *vector, | 785 | return do_sock_write(&x->async_msg, iocb, iocb->ki_filp, |
772 | unsigned long count, loff_t *ppos) | 786 | &x->async_iov, 1); |
773 | { | ||
774 | size_t tot_len = 0; | ||
775 | int i; | ||
776 | for (i = 0 ; i < count ; i++) | ||
777 | tot_len += vector[i].iov_len; | ||
778 | return sock_readv_writev(VERIFY_WRITE, | ||
779 | file, vector, count, tot_len); | ||
780 | } | ||
781 | |||
782 | static ssize_t sock_writev(struct file *file, const struct iovec *vector, | ||
783 | unsigned long count, loff_t *ppos) | ||
784 | { | ||
785 | size_t tot_len = 0; | ||
786 | int i; | ||
787 | for (i = 0 ; i < count ; i++) | ||
788 | tot_len += vector[i].iov_len; | ||
789 | return sock_readv_writev(VERIFY_READ, | ||
790 | file, vector, count, tot_len); | ||
791 | } | 787 | } |
792 | 788 | ||
793 | 789 | ||
@@ -904,6 +900,13 @@ static long sock_ioctl(struct file *file, unsigned cmd, unsigned long arg) | |||
904 | break; | 900 | break; |
905 | default: | 901 | default: |
906 | err = sock->ops->ioctl(sock, cmd, arg); | 902 | err = sock->ops->ioctl(sock, cmd, arg); |
903 | |||
904 | /* | ||
905 | * If this ioctl is unknown try to hand it down | ||
906 | * to the NIC driver. | ||
907 | */ | ||
908 | if (err == -ENOIOCTLCMD) | ||
909 | err = dev_ioctl(cmd, argp); | ||
907 | break; | 910 | break; |
908 | } | 911 | } |
909 | return err; | 912 | return err; |
@@ -2036,7 +2039,7 @@ int sock_unregister(int family) | |||
2036 | return 0; | 2039 | return 0; |
2037 | } | 2040 | } |
2038 | 2041 | ||
2039 | void __init sock_init(void) | 2042 | static int __init sock_init(void) |
2040 | { | 2043 | { |
2041 | /* | 2044 | /* |
2042 | * Initialize sock SLAB cache. | 2045 | * Initialize sock SLAB cache. |
@@ -2044,12 +2047,10 @@ void __init sock_init(void) | |||
2044 | 2047 | ||
2045 | sk_init(); | 2048 | sk_init(); |
2046 | 2049 | ||
2047 | #ifdef SLAB_SKB | ||
2048 | /* | 2050 | /* |
2049 | * Initialize skbuff SLAB cache | 2051 | * Initialize skbuff SLAB cache |
2050 | */ | 2052 | */ |
2051 | skb_init(); | 2053 | skb_init(); |
2052 | #endif | ||
2053 | 2054 | ||
2054 | /* | 2055 | /* |
2055 | * Initialize the protocols module. | 2056 | * Initialize the protocols module. |
@@ -2058,15 +2059,19 @@ void __init sock_init(void) | |||
2058 | init_inodecache(); | 2059 | init_inodecache(); |
2059 | register_filesystem(&sock_fs_type); | 2060 | register_filesystem(&sock_fs_type); |
2060 | sock_mnt = kern_mount(&sock_fs_type); | 2061 | sock_mnt = kern_mount(&sock_fs_type); |
2061 | /* The real protocol initialization is performed when | 2062 | |
2062 | * do_initcalls is run. | 2063 | /* The real protocol initialization is performed in later initcalls. |
2063 | */ | 2064 | */ |
2064 | 2065 | ||
2065 | #ifdef CONFIG_NETFILTER | 2066 | #ifdef CONFIG_NETFILTER |
2066 | netfilter_init(); | 2067 | netfilter_init(); |
2067 | #endif | 2068 | #endif |
2069 | |||
2070 | return 0; | ||
2068 | } | 2071 | } |
2069 | 2072 | ||
2073 | core_initcall(sock_init); /* early initcall */ | ||
2074 | |||
2070 | #ifdef CONFIG_PROC_FS | 2075 | #ifdef CONFIG_PROC_FS |
2071 | void socket_seq_show(struct seq_file *seq) | 2076 | void socket_seq_show(struct seq_file *seq) |
2072 | { | 2077 | { |
diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c index c6a51911e71e..d68eba481291 100644 --- a/net/sunrpc/svcsock.c +++ b/net/sunrpc/svcsock.c | |||
@@ -758,7 +758,7 @@ svc_tcp_accept(struct svc_sock *svsk) | |||
758 | struct svc_serv *serv = svsk->sk_server; | 758 | struct svc_serv *serv = svsk->sk_server; |
759 | struct socket *sock = svsk->sk_sock; | 759 | struct socket *sock = svsk->sk_sock; |
760 | struct socket *newsock; | 760 | struct socket *newsock; |
761 | struct proto_ops *ops; | 761 | const struct proto_ops *ops; |
762 | struct svc_sock *newsvsk; | 762 | struct svc_sock *newsvsk; |
763 | int err, slen; | 763 | int err, slen; |
764 | 764 | ||
diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index acc73ba8bade..5f6ae79b8b16 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c | |||
@@ -121,7 +121,7 @@ | |||
121 | int sysctl_unix_max_dgram_qlen = 10; | 121 | int sysctl_unix_max_dgram_qlen = 10; |
122 | 122 | ||
123 | struct hlist_head unix_socket_table[UNIX_HASH_SIZE + 1]; | 123 | struct hlist_head unix_socket_table[UNIX_HASH_SIZE + 1]; |
124 | DEFINE_RWLOCK(unix_table_lock); | 124 | DEFINE_SPINLOCK(unix_table_lock); |
125 | static atomic_t unix_nr_socks = ATOMIC_INIT(0); | 125 | static atomic_t unix_nr_socks = ATOMIC_INIT(0); |
126 | 126 | ||
127 | #define unix_sockets_unbound (&unix_socket_table[UNIX_HASH_SIZE]) | 127 | #define unix_sockets_unbound (&unix_socket_table[UNIX_HASH_SIZE]) |
@@ -130,7 +130,7 @@ static atomic_t unix_nr_socks = ATOMIC_INIT(0); | |||
130 | 130 | ||
131 | /* | 131 | /* |
132 | * SMP locking strategy: | 132 | * SMP locking strategy: |
133 | * hash table is protected with rwlock unix_table_lock | 133 | * hash table is protected with spinlock unix_table_lock |
134 | * each socket state is protected by separate rwlock. | 134 | * each socket state is protected by separate rwlock. |
135 | */ | 135 | */ |
136 | 136 | ||
@@ -214,16 +214,16 @@ static void __unix_insert_socket(struct hlist_head *list, struct sock *sk) | |||
214 | 214 | ||
215 | static inline void unix_remove_socket(struct sock *sk) | 215 | static inline void unix_remove_socket(struct sock *sk) |
216 | { | 216 | { |
217 | write_lock(&unix_table_lock); | 217 | spin_lock(&unix_table_lock); |
218 | __unix_remove_socket(sk); | 218 | __unix_remove_socket(sk); |
219 | write_unlock(&unix_table_lock); | 219 | spin_unlock(&unix_table_lock); |
220 | } | 220 | } |
221 | 221 | ||
222 | static inline void unix_insert_socket(struct hlist_head *list, struct sock *sk) | 222 | static inline void unix_insert_socket(struct hlist_head *list, struct sock *sk) |
223 | { | 223 | { |
224 | write_lock(&unix_table_lock); | 224 | spin_lock(&unix_table_lock); |
225 | __unix_insert_socket(list, sk); | 225 | __unix_insert_socket(list, sk); |
226 | write_unlock(&unix_table_lock); | 226 | spin_unlock(&unix_table_lock); |
227 | } | 227 | } |
228 | 228 | ||
229 | static struct sock *__unix_find_socket_byname(struct sockaddr_un *sunname, | 229 | static struct sock *__unix_find_socket_byname(struct sockaddr_un *sunname, |
@@ -250,11 +250,11 @@ static inline struct sock *unix_find_socket_byname(struct sockaddr_un *sunname, | |||
250 | { | 250 | { |
251 | struct sock *s; | 251 | struct sock *s; |
252 | 252 | ||
253 | read_lock(&unix_table_lock); | 253 | spin_lock(&unix_table_lock); |
254 | s = __unix_find_socket_byname(sunname, len, type, hash); | 254 | s = __unix_find_socket_byname(sunname, len, type, hash); |
255 | if (s) | 255 | if (s) |
256 | sock_hold(s); | 256 | sock_hold(s); |
257 | read_unlock(&unix_table_lock); | 257 | spin_unlock(&unix_table_lock); |
258 | return s; | 258 | return s; |
259 | } | 259 | } |
260 | 260 | ||
@@ -263,7 +263,7 @@ static struct sock *unix_find_socket_byinode(struct inode *i) | |||
263 | struct sock *s; | 263 | struct sock *s; |
264 | struct hlist_node *node; | 264 | struct hlist_node *node; |
265 | 265 | ||
266 | read_lock(&unix_table_lock); | 266 | spin_lock(&unix_table_lock); |
267 | sk_for_each(s, node, | 267 | sk_for_each(s, node, |
268 | &unix_socket_table[i->i_ino & (UNIX_HASH_SIZE - 1)]) { | 268 | &unix_socket_table[i->i_ino & (UNIX_HASH_SIZE - 1)]) { |
269 | struct dentry *dentry = unix_sk(s)->dentry; | 269 | struct dentry *dentry = unix_sk(s)->dentry; |
@@ -276,7 +276,7 @@ static struct sock *unix_find_socket_byinode(struct inode *i) | |||
276 | } | 276 | } |
277 | s = NULL; | 277 | s = NULL; |
278 | found: | 278 | found: |
279 | read_unlock(&unix_table_lock); | 279 | spin_unlock(&unix_table_lock); |
280 | return s; | 280 | return s; |
281 | } | 281 | } |
282 | 282 | ||
@@ -473,7 +473,7 @@ static int unix_dgram_connect(struct socket *, struct sockaddr *, | |||
473 | static int unix_seqpacket_sendmsg(struct kiocb *, struct socket *, | 473 | static int unix_seqpacket_sendmsg(struct kiocb *, struct socket *, |
474 | struct msghdr *, size_t); | 474 | struct msghdr *, size_t); |
475 | 475 | ||
476 | static struct proto_ops unix_stream_ops = { | 476 | static const struct proto_ops unix_stream_ops = { |
477 | .family = PF_UNIX, | 477 | .family = PF_UNIX, |
478 | .owner = THIS_MODULE, | 478 | .owner = THIS_MODULE, |
479 | .release = unix_release, | 479 | .release = unix_release, |
@@ -494,7 +494,7 @@ static struct proto_ops unix_stream_ops = { | |||
494 | .sendpage = sock_no_sendpage, | 494 | .sendpage = sock_no_sendpage, |
495 | }; | 495 | }; |
496 | 496 | ||
497 | static struct proto_ops unix_dgram_ops = { | 497 | static const struct proto_ops unix_dgram_ops = { |
498 | .family = PF_UNIX, | 498 | .family = PF_UNIX, |
499 | .owner = THIS_MODULE, | 499 | .owner = THIS_MODULE, |
500 | .release = unix_release, | 500 | .release = unix_release, |
@@ -515,7 +515,7 @@ static struct proto_ops unix_dgram_ops = { | |||
515 | .sendpage = sock_no_sendpage, | 515 | .sendpage = sock_no_sendpage, |
516 | }; | 516 | }; |
517 | 517 | ||
518 | static struct proto_ops unix_seqpacket_ops = { | 518 | static const struct proto_ops unix_seqpacket_ops = { |
519 | .family = PF_UNIX, | 519 | .family = PF_UNIX, |
520 | .owner = THIS_MODULE, | 520 | .owner = THIS_MODULE, |
521 | .release = unix_release, | 521 | .release = unix_release, |
@@ -564,7 +564,7 @@ static struct sock * unix_create1(struct socket *sock) | |||
564 | u = unix_sk(sk); | 564 | u = unix_sk(sk); |
565 | u->dentry = NULL; | 565 | u->dentry = NULL; |
566 | u->mnt = NULL; | 566 | u->mnt = NULL; |
567 | rwlock_init(&u->lock); | 567 | spin_lock_init(&u->lock); |
568 | atomic_set(&u->inflight, sock ? 0 : -1); | 568 | atomic_set(&u->inflight, sock ? 0 : -1); |
569 | init_MUTEX(&u->readsem); /* single task reading lock */ | 569 | init_MUTEX(&u->readsem); /* single task reading lock */ |
570 | init_waitqueue_head(&u->peer_wait); | 570 | init_waitqueue_head(&u->peer_wait); |
@@ -642,12 +642,12 @@ retry: | |||
642 | addr->len = sprintf(addr->name->sun_path+1, "%05x", ordernum) + 1 + sizeof(short); | 642 | addr->len = sprintf(addr->name->sun_path+1, "%05x", ordernum) + 1 + sizeof(short); |
643 | addr->hash = unix_hash_fold(csum_partial((void*)addr->name, addr->len, 0)); | 643 | addr->hash = unix_hash_fold(csum_partial((void*)addr->name, addr->len, 0)); |
644 | 644 | ||
645 | write_lock(&unix_table_lock); | 645 | spin_lock(&unix_table_lock); |
646 | ordernum = (ordernum+1)&0xFFFFF; | 646 | ordernum = (ordernum+1)&0xFFFFF; |
647 | 647 | ||
648 | if (__unix_find_socket_byname(addr->name, addr->len, sock->type, | 648 | if (__unix_find_socket_byname(addr->name, addr->len, sock->type, |
649 | addr->hash)) { | 649 | addr->hash)) { |
650 | write_unlock(&unix_table_lock); | 650 | spin_unlock(&unix_table_lock); |
651 | /* Sanity yield. It is unusual case, but yet... */ | 651 | /* Sanity yield. It is unusual case, but yet... */ |
652 | if (!(ordernum&0xFF)) | 652 | if (!(ordernum&0xFF)) |
653 | yield(); | 653 | yield(); |
@@ -658,7 +658,7 @@ retry: | |||
658 | __unix_remove_socket(sk); | 658 | __unix_remove_socket(sk); |
659 | u->addr = addr; | 659 | u->addr = addr; |
660 | __unix_insert_socket(&unix_socket_table[addr->hash], sk); | 660 | __unix_insert_socket(&unix_socket_table[addr->hash], sk); |
661 | write_unlock(&unix_table_lock); | 661 | spin_unlock(&unix_table_lock); |
662 | err = 0; | 662 | err = 0; |
663 | 663 | ||
664 | out: up(&u->readsem); | 664 | out: up(&u->readsem); |
@@ -791,7 +791,7 @@ static int unix_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) | |||
791 | addr->hash = UNIX_HASH_SIZE; | 791 | addr->hash = UNIX_HASH_SIZE; |
792 | } | 792 | } |
793 | 793 | ||
794 | write_lock(&unix_table_lock); | 794 | spin_lock(&unix_table_lock); |
795 | 795 | ||
796 | if (!sunaddr->sun_path[0]) { | 796 | if (!sunaddr->sun_path[0]) { |
797 | err = -EADDRINUSE; | 797 | err = -EADDRINUSE; |
@@ -814,7 +814,7 @@ static int unix_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) | |||
814 | __unix_insert_socket(list, sk); | 814 | __unix_insert_socket(list, sk); |
815 | 815 | ||
816 | out_unlock: | 816 | out_unlock: |
817 | write_unlock(&unix_table_lock); | 817 | spin_unlock(&unix_table_lock); |
818 | out_up: | 818 | out_up: |
819 | up(&u->readsem); | 819 | up(&u->readsem); |
820 | out: | 820 | out: |
@@ -1063,10 +1063,12 @@ restart: | |||
1063 | /* Set credentials */ | 1063 | /* Set credentials */ |
1064 | sk->sk_peercred = other->sk_peercred; | 1064 | sk->sk_peercred = other->sk_peercred; |
1065 | 1065 | ||
1066 | sock_hold(newsk); | ||
1067 | unix_peer(sk) = newsk; | ||
1068 | sock->state = SS_CONNECTED; | 1066 | sock->state = SS_CONNECTED; |
1069 | sk->sk_state = TCP_ESTABLISHED; | 1067 | sk->sk_state = TCP_ESTABLISHED; |
1068 | sock_hold(newsk); | ||
1069 | |||
1070 | smp_mb__after_atomic_inc(); /* sock_hold() does an atomic_inc() */ | ||
1071 | unix_peer(sk) = newsk; | ||
1070 | 1072 | ||
1071 | unix_state_wunlock(sk); | 1073 | unix_state_wunlock(sk); |
1072 | 1074 | ||
@@ -1414,7 +1416,7 @@ static int unix_stream_sendmsg(struct kiocb *kiocb, struct socket *sock, | |||
1414 | } else { | 1416 | } else { |
1415 | sunaddr = NULL; | 1417 | sunaddr = NULL; |
1416 | err = -ENOTCONN; | 1418 | err = -ENOTCONN; |
1417 | other = unix_peer_get(sk); | 1419 | other = unix_peer(sk); |
1418 | if (!other) | 1420 | if (!other) |
1419 | goto out_err; | 1421 | goto out_err; |
1420 | } | 1422 | } |
@@ -1476,7 +1478,6 @@ static int unix_stream_sendmsg(struct kiocb *kiocb, struct socket *sock, | |||
1476 | other->sk_data_ready(other, size); | 1478 | other->sk_data_ready(other, size); |
1477 | sent+=size; | 1479 | sent+=size; |
1478 | } | 1480 | } |
1479 | sock_put(other); | ||
1480 | 1481 | ||
1481 | scm_destroy(siocb->scm); | 1482 | scm_destroy(siocb->scm); |
1482 | siocb->scm = NULL; | 1483 | siocb->scm = NULL; |
@@ -1491,8 +1492,6 @@ pipe_err: | |||
1491 | send_sig(SIGPIPE,current,0); | 1492 | send_sig(SIGPIPE,current,0); |
1492 | err = -EPIPE; | 1493 | err = -EPIPE; |
1493 | out_err: | 1494 | out_err: |
1494 | if (other) | ||
1495 | sock_put(other); | ||
1496 | scm_destroy(siocb->scm); | 1495 | scm_destroy(siocb->scm); |
1497 | siocb->scm = NULL; | 1496 | siocb->scm = NULL; |
1498 | return sent ? : err; | 1497 | return sent ? : err; |
@@ -1860,7 +1859,7 @@ static int unix_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) | |||
1860 | } | 1859 | } |
1861 | 1860 | ||
1862 | default: | 1861 | default: |
1863 | err = dev_ioctl(cmd, (void __user *)arg); | 1862 | err = -ENOIOCTLCMD; |
1864 | break; | 1863 | break; |
1865 | } | 1864 | } |
1866 | return err; | 1865 | return err; |
@@ -1917,7 +1916,7 @@ static struct sock *unix_seq_idx(int *iter, loff_t pos) | |||
1917 | 1916 | ||
1918 | static void *unix_seq_start(struct seq_file *seq, loff_t *pos) | 1917 | static void *unix_seq_start(struct seq_file *seq, loff_t *pos) |
1919 | { | 1918 | { |
1920 | read_lock(&unix_table_lock); | 1919 | spin_lock(&unix_table_lock); |
1921 | return *pos ? unix_seq_idx(seq->private, *pos - 1) : ((void *) 1); | 1920 | return *pos ? unix_seq_idx(seq->private, *pos - 1) : ((void *) 1); |
1922 | } | 1921 | } |
1923 | 1922 | ||
@@ -1932,7 +1931,7 @@ static void *unix_seq_next(struct seq_file *seq, void *v, loff_t *pos) | |||
1932 | 1931 | ||
1933 | static void unix_seq_stop(struct seq_file *seq, void *v) | 1932 | static void unix_seq_stop(struct seq_file *seq, void *v) |
1934 | { | 1933 | { |
1935 | read_unlock(&unix_table_lock); | 1934 | spin_unlock(&unix_table_lock); |
1936 | } | 1935 | } |
1937 | 1936 | ||
1938 | static int unix_seq_show(struct seq_file *seq, void *v) | 1937 | static int unix_seq_show(struct seq_file *seq, void *v) |
diff --git a/net/unix/garbage.c b/net/unix/garbage.c index 6ffc64e1712d..411802bd4d37 100644 --- a/net/unix/garbage.c +++ b/net/unix/garbage.c | |||
@@ -182,7 +182,7 @@ void unix_gc(void) | |||
182 | if (down_trylock(&unix_gc_sem)) | 182 | if (down_trylock(&unix_gc_sem)) |
183 | return; | 183 | return; |
184 | 184 | ||
185 | read_lock(&unix_table_lock); | 185 | spin_lock(&unix_table_lock); |
186 | 186 | ||
187 | forall_unix_sockets(i, s) | 187 | forall_unix_sockets(i, s) |
188 | { | 188 | { |
@@ -301,7 +301,7 @@ void unix_gc(void) | |||
301 | } | 301 | } |
302 | u->gc_tree = GC_ORPHAN; | 302 | u->gc_tree = GC_ORPHAN; |
303 | } | 303 | } |
304 | read_unlock(&unix_table_lock); | 304 | spin_unlock(&unix_table_lock); |
305 | 305 | ||
306 | /* | 306 | /* |
307 | * Here we are. Hitlist is filled. Die. | 307 | * Here we are. Hitlist is filled. Die. |
diff --git a/net/wanrouter/af_wanpipe.c b/net/wanrouter/af_wanpipe.c index 59fec59b2132..7a43ae4721ed 100644 --- a/net/wanrouter/af_wanpipe.c +++ b/net/wanrouter/af_wanpipe.c | |||
@@ -181,7 +181,7 @@ struct wanpipe_opt | |||
181 | #endif | 181 | #endif |
182 | 182 | ||
183 | static int sk_count; | 183 | static int sk_count; |
184 | extern struct proto_ops wanpipe_ops; | 184 | extern const struct proto_ops wanpipe_ops; |
185 | static unsigned long find_free_critical; | 185 | static unsigned long find_free_critical; |
186 | 186 | ||
187 | static void wanpipe_unlink_driver(struct sock *sk); | 187 | static void wanpipe_unlink_driver(struct sock *sk); |
@@ -1839,7 +1839,7 @@ static int wanpipe_ioctl(struct socket *sock, unsigned int cmd, unsigned long ar | |||
1839 | #endif | 1839 | #endif |
1840 | 1840 | ||
1841 | default: | 1841 | default: |
1842 | return dev_ioctl(cmd,(void __user *) arg); | 1842 | return -ENOIOCTLCMD; |
1843 | } | 1843 | } |
1844 | /*NOTREACHED*/ | 1844 | /*NOTREACHED*/ |
1845 | } | 1845 | } |
@@ -2546,7 +2546,7 @@ static int wanpipe_connect(struct socket *sock, struct sockaddr *uaddr, int addr | |||
2546 | return 0; | 2546 | return 0; |
2547 | } | 2547 | } |
2548 | 2548 | ||
2549 | struct proto_ops wanpipe_ops = { | 2549 | const struct proto_ops wanpipe_ops = { |
2550 | .family = PF_WANPIPE, | 2550 | .family = PF_WANPIPE, |
2551 | .owner = THIS_MODULE, | 2551 | .owner = THIS_MODULE, |
2552 | .release = wanpipe_release, | 2552 | .release = wanpipe_release, |
diff --git a/net/x25/af_x25.c b/net/x25/af_x25.c index 020d73cc8414..16459c7f54b2 100644 --- a/net/x25/af_x25.c +++ b/net/x25/af_x25.c | |||
@@ -64,7 +64,7 @@ int sysctl_x25_ack_holdback_timeout = X25_DEFAULT_T2; | |||
64 | HLIST_HEAD(x25_list); | 64 | HLIST_HEAD(x25_list); |
65 | DEFINE_RWLOCK(x25_list_lock); | 65 | DEFINE_RWLOCK(x25_list_lock); |
66 | 66 | ||
67 | static struct proto_ops x25_proto_ops; | 67 | static const struct proto_ops x25_proto_ops; |
68 | 68 | ||
69 | static struct x25_address null_x25_address = {" "}; | 69 | static struct x25_address null_x25_address = {" "}; |
70 | 70 | ||
@@ -1378,7 +1378,7 @@ static int x25_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) | |||
1378 | } | 1378 | } |
1379 | 1379 | ||
1380 | default: | 1380 | default: |
1381 | rc = dev_ioctl(cmd, argp); | 1381 | rc = -ENOIOCTLCMD; |
1382 | break; | 1382 | break; |
1383 | } | 1383 | } |
1384 | 1384 | ||
@@ -1391,7 +1391,7 @@ static struct net_proto_family x25_family_ops = { | |||
1391 | .owner = THIS_MODULE, | 1391 | .owner = THIS_MODULE, |
1392 | }; | 1392 | }; |
1393 | 1393 | ||
1394 | static struct proto_ops SOCKOPS_WRAPPED(x25_proto_ops) = { | 1394 | static const struct proto_ops SOCKOPS_WRAPPED(x25_proto_ops) = { |
1395 | .family = AF_X25, | 1395 | .family = AF_X25, |
1396 | .owner = THIS_MODULE, | 1396 | .owner = THIS_MODULE, |
1397 | .release = x25_release, | 1397 | .release = x25_release, |
diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index d19e274b9c4a..64a447375fdb 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c | |||
@@ -10,7 +10,7 @@ | |||
10 | * YOSHIFUJI Hideaki | 10 | * YOSHIFUJI Hideaki |
11 | * Split up af-specific portion | 11 | * Split up af-specific portion |
12 | * Derek Atkins <derek@ihtfp.com> Add the post_input processor | 12 | * Derek Atkins <derek@ihtfp.com> Add the post_input processor |
13 | * | 13 | * |
14 | */ | 14 | */ |
15 | 15 | ||
16 | #include <asm/bug.h> | 16 | #include <asm/bug.h> |
@@ -256,6 +256,7 @@ void __xfrm_policy_destroy(struct xfrm_policy *policy) | |||
256 | if (del_timer(&policy->timer)) | 256 | if (del_timer(&policy->timer)) |
257 | BUG(); | 257 | BUG(); |
258 | 258 | ||
259 | security_xfrm_policy_free(policy); | ||
259 | kfree(policy); | 260 | kfree(policy); |
260 | } | 261 | } |
261 | EXPORT_SYMBOL(__xfrm_policy_destroy); | 262 | EXPORT_SYMBOL(__xfrm_policy_destroy); |
@@ -350,7 +351,8 @@ int xfrm_policy_insert(int dir, struct xfrm_policy *policy, int excl) | |||
350 | 351 | ||
351 | write_lock_bh(&xfrm_policy_lock); | 352 | write_lock_bh(&xfrm_policy_lock); |
352 | for (p = &xfrm_policy_list[dir]; (pol=*p)!=NULL;) { | 353 | for (p = &xfrm_policy_list[dir]; (pol=*p)!=NULL;) { |
353 | if (!delpol && memcmp(&policy->selector, &pol->selector, sizeof(pol->selector)) == 0) { | 354 | if (!delpol && memcmp(&policy->selector, &pol->selector, sizeof(pol->selector)) == 0 && |
355 | xfrm_sec_ctx_match(pol->security, policy->security)) { | ||
354 | if (excl) { | 356 | if (excl) { |
355 | write_unlock_bh(&xfrm_policy_lock); | 357 | write_unlock_bh(&xfrm_policy_lock); |
356 | return -EEXIST; | 358 | return -EEXIST; |
@@ -416,14 +418,15 @@ int xfrm_policy_insert(int dir, struct xfrm_policy *policy, int excl) | |||
416 | } | 418 | } |
417 | EXPORT_SYMBOL(xfrm_policy_insert); | 419 | EXPORT_SYMBOL(xfrm_policy_insert); |
418 | 420 | ||
419 | struct xfrm_policy *xfrm_policy_bysel(int dir, struct xfrm_selector *sel, | 421 | struct xfrm_policy *xfrm_policy_bysel_ctx(int dir, struct xfrm_selector *sel, |
420 | int delete) | 422 | struct xfrm_sec_ctx *ctx, int delete) |
421 | { | 423 | { |
422 | struct xfrm_policy *pol, **p; | 424 | struct xfrm_policy *pol, **p; |
423 | 425 | ||
424 | write_lock_bh(&xfrm_policy_lock); | 426 | write_lock_bh(&xfrm_policy_lock); |
425 | for (p = &xfrm_policy_list[dir]; (pol=*p)!=NULL; p = &pol->next) { | 427 | for (p = &xfrm_policy_list[dir]; (pol=*p)!=NULL; p = &pol->next) { |
426 | if (memcmp(sel, &pol->selector, sizeof(*sel)) == 0) { | 428 | if ((memcmp(sel, &pol->selector, sizeof(*sel)) == 0) && |
429 | (xfrm_sec_ctx_match(ctx, pol->security))) { | ||
427 | xfrm_pol_hold(pol); | 430 | xfrm_pol_hold(pol); |
428 | if (delete) | 431 | if (delete) |
429 | *p = pol->next; | 432 | *p = pol->next; |
@@ -438,7 +441,7 @@ struct xfrm_policy *xfrm_policy_bysel(int dir, struct xfrm_selector *sel, | |||
438 | } | 441 | } |
439 | return pol; | 442 | return pol; |
440 | } | 443 | } |
441 | EXPORT_SYMBOL(xfrm_policy_bysel); | 444 | EXPORT_SYMBOL(xfrm_policy_bysel_ctx); |
442 | 445 | ||
443 | struct xfrm_policy *xfrm_policy_byid(int dir, u32 id, int delete) | 446 | struct xfrm_policy *xfrm_policy_byid(int dir, u32 id, int delete) |
444 | { | 447 | { |
@@ -519,7 +522,7 @@ EXPORT_SYMBOL(xfrm_policy_walk); | |||
519 | 522 | ||
520 | /* Find policy to apply to this flow. */ | 523 | /* Find policy to apply to this flow. */ |
521 | 524 | ||
522 | static void xfrm_policy_lookup(struct flowi *fl, u16 family, u8 dir, | 525 | static void xfrm_policy_lookup(struct flowi *fl, u32 sk_sid, u16 family, u8 dir, |
523 | void **objp, atomic_t **obj_refp) | 526 | void **objp, atomic_t **obj_refp) |
524 | { | 527 | { |
525 | struct xfrm_policy *pol; | 528 | struct xfrm_policy *pol; |
@@ -533,9 +536,12 @@ static void xfrm_policy_lookup(struct flowi *fl, u16 family, u8 dir, | |||
533 | continue; | 536 | continue; |
534 | 537 | ||
535 | match = xfrm_selector_match(sel, fl, family); | 538 | match = xfrm_selector_match(sel, fl, family); |
539 | |||
536 | if (match) { | 540 | if (match) { |
537 | xfrm_pol_hold(pol); | 541 | if (!security_xfrm_policy_lookup(pol, sk_sid, dir)) { |
538 | break; | 542 | xfrm_pol_hold(pol); |
543 | break; | ||
544 | } | ||
539 | } | 545 | } |
540 | } | 546 | } |
541 | read_unlock_bh(&xfrm_policy_lock); | 547 | read_unlock_bh(&xfrm_policy_lock); |
@@ -543,15 +549,37 @@ static void xfrm_policy_lookup(struct flowi *fl, u16 family, u8 dir, | |||
543 | *obj_refp = &pol->refcnt; | 549 | *obj_refp = &pol->refcnt; |
544 | } | 550 | } |
545 | 551 | ||
546 | static struct xfrm_policy *xfrm_sk_policy_lookup(struct sock *sk, int dir, struct flowi *fl) | 552 | static inline int policy_to_flow_dir(int dir) |
553 | { | ||
554 | if (XFRM_POLICY_IN == FLOW_DIR_IN && | ||
555 | XFRM_POLICY_OUT == FLOW_DIR_OUT && | ||
556 | XFRM_POLICY_FWD == FLOW_DIR_FWD) | ||
557 | return dir; | ||
558 | switch (dir) { | ||
559 | default: | ||
560 | case XFRM_POLICY_IN: | ||
561 | return FLOW_DIR_IN; | ||
562 | case XFRM_POLICY_OUT: | ||
563 | return FLOW_DIR_OUT; | ||
564 | case XFRM_POLICY_FWD: | ||
565 | return FLOW_DIR_FWD; | ||
566 | }; | ||
567 | } | ||
568 | |||
569 | static struct xfrm_policy *xfrm_sk_policy_lookup(struct sock *sk, int dir, struct flowi *fl, u32 sk_sid) | ||
547 | { | 570 | { |
548 | struct xfrm_policy *pol; | 571 | struct xfrm_policy *pol; |
549 | 572 | ||
550 | read_lock_bh(&xfrm_policy_lock); | 573 | read_lock_bh(&xfrm_policy_lock); |
551 | if ((pol = sk->sk_policy[dir]) != NULL) { | 574 | if ((pol = sk->sk_policy[dir]) != NULL) { |
552 | int match = xfrm_selector_match(&pol->selector, fl, | 575 | int match = xfrm_selector_match(&pol->selector, fl, |
553 | sk->sk_family); | 576 | sk->sk_family); |
577 | int err = 0; | ||
578 | |||
554 | if (match) | 579 | if (match) |
580 | err = security_xfrm_policy_lookup(pol, sk_sid, policy_to_flow_dir(dir)); | ||
581 | |||
582 | if (match && !err) | ||
555 | xfrm_pol_hold(pol); | 583 | xfrm_pol_hold(pol); |
556 | else | 584 | else |
557 | pol = NULL; | 585 | pol = NULL; |
@@ -624,6 +652,10 @@ static struct xfrm_policy *clone_policy(struct xfrm_policy *old, int dir) | |||
624 | 652 | ||
625 | if (newp) { | 653 | if (newp) { |
626 | newp->selector = old->selector; | 654 | newp->selector = old->selector; |
655 | if (security_xfrm_policy_clone(old, newp)) { | ||
656 | kfree(newp); | ||
657 | return NULL; /* ENOMEM */ | ||
658 | } | ||
627 | newp->lft = old->lft; | 659 | newp->lft = old->lft; |
628 | newp->curlft = old->curlft; | 660 | newp->curlft = old->curlft; |
629 | newp->action = old->action; | 661 | newp->action = old->action; |
@@ -735,22 +767,6 @@ xfrm_bundle_create(struct xfrm_policy *policy, struct xfrm_state **xfrm, int nx, | |||
735 | return err; | 767 | return err; |
736 | } | 768 | } |
737 | 769 | ||
738 | static inline int policy_to_flow_dir(int dir) | ||
739 | { | ||
740 | if (XFRM_POLICY_IN == FLOW_DIR_IN && | ||
741 | XFRM_POLICY_OUT == FLOW_DIR_OUT && | ||
742 | XFRM_POLICY_FWD == FLOW_DIR_FWD) | ||
743 | return dir; | ||
744 | switch (dir) { | ||
745 | default: | ||
746 | case XFRM_POLICY_IN: | ||
747 | return FLOW_DIR_IN; | ||
748 | case XFRM_POLICY_OUT: | ||
749 | return FLOW_DIR_OUT; | ||
750 | case XFRM_POLICY_FWD: | ||
751 | return FLOW_DIR_FWD; | ||
752 | }; | ||
753 | } | ||
754 | 770 | ||
755 | static int stale_bundle(struct dst_entry *dst); | 771 | static int stale_bundle(struct dst_entry *dst); |
756 | 772 | ||
@@ -769,19 +785,20 @@ int xfrm_lookup(struct dst_entry **dst_p, struct flowi *fl, | |||
769 | int err; | 785 | int err; |
770 | u32 genid; | 786 | u32 genid; |
771 | u16 family = dst_orig->ops->family; | 787 | u16 family = dst_orig->ops->family; |
788 | u8 dir = policy_to_flow_dir(XFRM_POLICY_OUT); | ||
789 | u32 sk_sid = security_sk_sid(sk, fl, dir); | ||
772 | restart: | 790 | restart: |
773 | genid = atomic_read(&flow_cache_genid); | 791 | genid = atomic_read(&flow_cache_genid); |
774 | policy = NULL; | 792 | policy = NULL; |
775 | if (sk && sk->sk_policy[1]) | 793 | if (sk && sk->sk_policy[1]) |
776 | policy = xfrm_sk_policy_lookup(sk, XFRM_POLICY_OUT, fl); | 794 | policy = xfrm_sk_policy_lookup(sk, XFRM_POLICY_OUT, fl, sk_sid); |
777 | 795 | ||
778 | if (!policy) { | 796 | if (!policy) { |
779 | /* To accelerate a bit... */ | 797 | /* To accelerate a bit... */ |
780 | if ((dst_orig->flags & DST_NOXFRM) || !xfrm_policy_list[XFRM_POLICY_OUT]) | 798 | if ((dst_orig->flags & DST_NOXFRM) || !xfrm_policy_list[XFRM_POLICY_OUT]) |
781 | return 0; | 799 | return 0; |
782 | 800 | ||
783 | policy = flow_cache_lookup(fl, family, | 801 | policy = flow_cache_lookup(fl, sk_sid, family, dir, |
784 | policy_to_flow_dir(XFRM_POLICY_OUT), | ||
785 | xfrm_policy_lookup); | 802 | xfrm_policy_lookup); |
786 | } | 803 | } |
787 | 804 | ||
@@ -962,16 +979,20 @@ int __xfrm_policy_check(struct sock *sk, int dir, struct sk_buff *skb, | |||
962 | { | 979 | { |
963 | struct xfrm_policy *pol; | 980 | struct xfrm_policy *pol; |
964 | struct flowi fl; | 981 | struct flowi fl; |
982 | u8 fl_dir = policy_to_flow_dir(dir); | ||
983 | u32 sk_sid; | ||
965 | 984 | ||
966 | if (_decode_session(skb, &fl, family) < 0) | 985 | if (_decode_session(skb, &fl, family) < 0) |
967 | return 0; | 986 | return 0; |
968 | 987 | ||
988 | sk_sid = security_sk_sid(sk, &fl, fl_dir); | ||
989 | |||
969 | /* First, check used SA against their selectors. */ | 990 | /* First, check used SA against their selectors. */ |
970 | if (skb->sp) { | 991 | if (skb->sp) { |
971 | int i; | 992 | int i; |
972 | 993 | ||
973 | for (i=skb->sp->len-1; i>=0; i--) { | 994 | for (i=skb->sp->len-1; i>=0; i--) { |
974 | struct sec_decap_state *xvec = &(skb->sp->x[i]); | 995 | struct sec_decap_state *xvec = &(skb->sp->x[i]); |
975 | if (!xfrm_selector_match(&xvec->xvec->sel, &fl, family)) | 996 | if (!xfrm_selector_match(&xvec->xvec->sel, &fl, family)) |
976 | return 0; | 997 | return 0; |
977 | 998 | ||
@@ -986,11 +1007,10 @@ int __xfrm_policy_check(struct sock *sk, int dir, struct sk_buff *skb, | |||
986 | 1007 | ||
987 | pol = NULL; | 1008 | pol = NULL; |
988 | if (sk && sk->sk_policy[dir]) | 1009 | if (sk && sk->sk_policy[dir]) |
989 | pol = xfrm_sk_policy_lookup(sk, dir, &fl); | 1010 | pol = xfrm_sk_policy_lookup(sk, dir, &fl, sk_sid); |
990 | 1011 | ||
991 | if (!pol) | 1012 | if (!pol) |
992 | pol = flow_cache_lookup(&fl, family, | 1013 | pol = flow_cache_lookup(&fl, sk_sid, family, fl_dir, |
993 | policy_to_flow_dir(dir), | ||
994 | xfrm_policy_lookup); | 1014 | xfrm_policy_lookup); |
995 | 1015 | ||
996 | if (!pol) | 1016 | if (!pol) |
diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c index 479effc97666..e12d0be5f976 100644 --- a/net/xfrm/xfrm_state.c +++ b/net/xfrm/xfrm_state.c | |||
@@ -10,7 +10,7 @@ | |||
10 | * Split up af-specific functions | 10 | * Split up af-specific functions |
11 | * Derek Atkins <derek@ihtfp.com> | 11 | * Derek Atkins <derek@ihtfp.com> |
12 | * Add UDP Encapsulation | 12 | * Add UDP Encapsulation |
13 | * | 13 | * |
14 | */ | 14 | */ |
15 | 15 | ||
16 | #include <linux/workqueue.h> | 16 | #include <linux/workqueue.h> |
@@ -70,6 +70,7 @@ static void xfrm_state_gc_destroy(struct xfrm_state *x) | |||
70 | x->type->destructor(x); | 70 | x->type->destructor(x); |
71 | xfrm_put_type(x->type); | 71 | xfrm_put_type(x->type); |
72 | } | 72 | } |
73 | security_xfrm_state_free(x); | ||
73 | kfree(x); | 74 | kfree(x); |
74 | } | 75 | } |
75 | 76 | ||
@@ -343,7 +344,8 @@ xfrm_state_find(xfrm_address_t *daddr, xfrm_address_t *saddr, | |||
343 | selector. | 344 | selector. |
344 | */ | 345 | */ |
345 | if (x->km.state == XFRM_STATE_VALID) { | 346 | if (x->km.state == XFRM_STATE_VALID) { |
346 | if (!xfrm_selector_match(&x->sel, fl, family)) | 347 | if (!xfrm_selector_match(&x->sel, fl, family) || |
348 | !xfrm_sec_ctx_match(pol->security, x->security)) | ||
347 | continue; | 349 | continue; |
348 | if (!best || | 350 | if (!best || |
349 | best->km.dying > x->km.dying || | 351 | best->km.dying > x->km.dying || |
@@ -354,7 +356,8 @@ xfrm_state_find(xfrm_address_t *daddr, xfrm_address_t *saddr, | |||
354 | acquire_in_progress = 1; | 356 | acquire_in_progress = 1; |
355 | } else if (x->km.state == XFRM_STATE_ERROR || | 357 | } else if (x->km.state == XFRM_STATE_ERROR || |
356 | x->km.state == XFRM_STATE_EXPIRED) { | 358 | x->km.state == XFRM_STATE_EXPIRED) { |
357 | if (xfrm_selector_match(&x->sel, fl, family)) | 359 | if (xfrm_selector_match(&x->sel, fl, family) && |
360 | xfrm_sec_ctx_match(pol->security, x->security)) | ||
358 | error = -ESRCH; | 361 | error = -ESRCH; |
359 | } | 362 | } |
360 | } | 363 | } |
diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c index 0cdd9a07e043..92e2b804c606 100644 --- a/net/xfrm/xfrm_user.c +++ b/net/xfrm/xfrm_user.c | |||
@@ -7,7 +7,7 @@ | |||
7 | * Kazunori MIYAZAWA @USAGI | 7 | * Kazunori MIYAZAWA @USAGI |
8 | * Kunihiro Ishiguro <kunihiro@ipinfusion.com> | 8 | * Kunihiro Ishiguro <kunihiro@ipinfusion.com> |
9 | * IPv6 support | 9 | * IPv6 support |
10 | * | 10 | * |
11 | */ | 11 | */ |
12 | 12 | ||
13 | #include <linux/module.h> | 13 | #include <linux/module.h> |
@@ -88,6 +88,34 @@ static int verify_encap_tmpl(struct rtattr **xfrma) | |||
88 | return 0; | 88 | return 0; |
89 | } | 89 | } |
90 | 90 | ||
91 | |||
92 | static inline int verify_sec_ctx_len(struct rtattr **xfrma) | ||
93 | { | ||
94 | struct rtattr *rt = xfrma[XFRMA_SEC_CTX - 1]; | ||
95 | struct xfrm_user_sec_ctx *uctx; | ||
96 | int len = 0; | ||
97 | |||
98 | if (!rt) | ||
99 | return 0; | ||
100 | |||
101 | if (rt->rta_len < sizeof(*uctx)) | ||
102 | return -EINVAL; | ||
103 | |||
104 | uctx = RTA_DATA(rt); | ||
105 | |||
106 | if (uctx->ctx_len > PAGE_SIZE) | ||
107 | return -EINVAL; | ||
108 | |||
109 | len += sizeof(struct xfrm_user_sec_ctx); | ||
110 | len += uctx->ctx_len; | ||
111 | |||
112 | if (uctx->len != len) | ||
113 | return -EINVAL; | ||
114 | |||
115 | return 0; | ||
116 | } | ||
117 | |||
118 | |||
91 | static int verify_newsa_info(struct xfrm_usersa_info *p, | 119 | static int verify_newsa_info(struct xfrm_usersa_info *p, |
92 | struct rtattr **xfrma) | 120 | struct rtattr **xfrma) |
93 | { | 121 | { |
@@ -145,6 +173,8 @@ static int verify_newsa_info(struct xfrm_usersa_info *p, | |||
145 | goto out; | 173 | goto out; |
146 | if ((err = verify_encap_tmpl(xfrma))) | 174 | if ((err = verify_encap_tmpl(xfrma))) |
147 | goto out; | 175 | goto out; |
176 | if ((err = verify_sec_ctx_len(xfrma))) | ||
177 | goto out; | ||
148 | 178 | ||
149 | err = -EINVAL; | 179 | err = -EINVAL; |
150 | switch (p->mode) { | 180 | switch (p->mode) { |
@@ -209,6 +239,30 @@ static int attach_encap_tmpl(struct xfrm_encap_tmpl **encapp, struct rtattr *u_a | |||
209 | return 0; | 239 | return 0; |
210 | } | 240 | } |
211 | 241 | ||
242 | |||
243 | static inline int xfrm_user_sec_ctx_size(struct xfrm_policy *xp) | ||
244 | { | ||
245 | struct xfrm_sec_ctx *xfrm_ctx = xp->security; | ||
246 | int len = 0; | ||
247 | |||
248 | if (xfrm_ctx) { | ||
249 | len += sizeof(struct xfrm_user_sec_ctx); | ||
250 | len += xfrm_ctx->ctx_len; | ||
251 | } | ||
252 | return len; | ||
253 | } | ||
254 | |||
255 | static int attach_sec_ctx(struct xfrm_state *x, struct rtattr *u_arg) | ||
256 | { | ||
257 | struct xfrm_user_sec_ctx *uctx; | ||
258 | |||
259 | if (!u_arg) | ||
260 | return 0; | ||
261 | |||
262 | uctx = RTA_DATA(u_arg); | ||
263 | return security_xfrm_state_alloc(x, uctx); | ||
264 | } | ||
265 | |||
212 | static void copy_from_user_state(struct xfrm_state *x, struct xfrm_usersa_info *p) | 266 | static void copy_from_user_state(struct xfrm_state *x, struct xfrm_usersa_info *p) |
213 | { | 267 | { |
214 | memcpy(&x->id, &p->id, sizeof(x->id)); | 268 | memcpy(&x->id, &p->id, sizeof(x->id)); |
@@ -253,6 +307,9 @@ static struct xfrm_state *xfrm_state_construct(struct xfrm_usersa_info *p, | |||
253 | if (err) | 307 | if (err) |
254 | goto error; | 308 | goto error; |
255 | 309 | ||
310 | if ((err = attach_sec_ctx(x, xfrma[XFRMA_SEC_CTX-1]))) | ||
311 | goto error; | ||
312 | |||
256 | x->km.seq = p->seq; | 313 | x->km.seq = p->seq; |
257 | 314 | ||
258 | return x; | 315 | return x; |
@@ -272,11 +329,11 @@ static int xfrm_add_sa(struct sk_buff *skb, struct nlmsghdr *nlh, void **xfrma) | |||
272 | int err; | 329 | int err; |
273 | struct km_event c; | 330 | struct km_event c; |
274 | 331 | ||
275 | err = verify_newsa_info(p, (struct rtattr **) xfrma); | 332 | err = verify_newsa_info(p, (struct rtattr **)xfrma); |
276 | if (err) | 333 | if (err) |
277 | return err; | 334 | return err; |
278 | 335 | ||
279 | x = xfrm_state_construct(p, (struct rtattr **) xfrma, &err); | 336 | x = xfrm_state_construct(p, (struct rtattr **)xfrma, &err); |
280 | if (!x) | 337 | if (!x) |
281 | return err; | 338 | return err; |
282 | 339 | ||
@@ -390,6 +447,19 @@ static int dump_one_state(struct xfrm_state *x, int count, void *ptr) | |||
390 | if (x->encap) | 447 | if (x->encap) |
391 | RTA_PUT(skb, XFRMA_ENCAP, sizeof(*x->encap), x->encap); | 448 | RTA_PUT(skb, XFRMA_ENCAP, sizeof(*x->encap), x->encap); |
392 | 449 | ||
450 | if (x->security) { | ||
451 | int ctx_size = sizeof(struct xfrm_sec_ctx) + | ||
452 | x->security->ctx_len; | ||
453 | struct rtattr *rt = __RTA_PUT(skb, XFRMA_SEC_CTX, ctx_size); | ||
454 | struct xfrm_user_sec_ctx *uctx = RTA_DATA(rt); | ||
455 | |||
456 | uctx->exttype = XFRMA_SEC_CTX; | ||
457 | uctx->len = ctx_size; | ||
458 | uctx->ctx_doi = x->security->ctx_doi; | ||
459 | uctx->ctx_alg = x->security->ctx_alg; | ||
460 | uctx->ctx_len = x->security->ctx_len; | ||
461 | memcpy(uctx + 1, x->security->ctx_str, x->security->ctx_len); | ||
462 | } | ||
393 | nlh->nlmsg_len = skb->tail - b; | 463 | nlh->nlmsg_len = skb->tail - b; |
394 | out: | 464 | out: |
395 | sp->this_idx++; | 465 | sp->this_idx++; |
@@ -603,6 +673,18 @@ static int verify_newpolicy_info(struct xfrm_userpolicy_info *p) | |||
603 | return verify_policy_dir(p->dir); | 673 | return verify_policy_dir(p->dir); |
604 | } | 674 | } |
605 | 675 | ||
676 | static int copy_from_user_sec_ctx(struct xfrm_policy *pol, struct rtattr **xfrma) | ||
677 | { | ||
678 | struct rtattr *rt = xfrma[XFRMA_SEC_CTX-1]; | ||
679 | struct xfrm_user_sec_ctx *uctx; | ||
680 | |||
681 | if (!rt) | ||
682 | return 0; | ||
683 | |||
684 | uctx = RTA_DATA(rt); | ||
685 | return security_xfrm_policy_alloc(pol, uctx); | ||
686 | } | ||
687 | |||
606 | static void copy_templates(struct xfrm_policy *xp, struct xfrm_user_tmpl *ut, | 688 | static void copy_templates(struct xfrm_policy *xp, struct xfrm_user_tmpl *ut, |
607 | int nr) | 689 | int nr) |
608 | { | 690 | { |
@@ -681,7 +763,10 @@ static struct xfrm_policy *xfrm_policy_construct(struct xfrm_userpolicy_info *p, | |||
681 | } | 763 | } |
682 | 764 | ||
683 | copy_from_user_policy(xp, p); | 765 | copy_from_user_policy(xp, p); |
684 | err = copy_from_user_tmpl(xp, xfrma); | 766 | |
767 | if (!(err = copy_from_user_tmpl(xp, xfrma))) | ||
768 | err = copy_from_user_sec_ctx(xp, xfrma); | ||
769 | |||
685 | if (err) { | 770 | if (err) { |
686 | *errp = err; | 771 | *errp = err; |
687 | kfree(xp); | 772 | kfree(xp); |
@@ -702,8 +787,11 @@ static int xfrm_add_policy(struct sk_buff *skb, struct nlmsghdr *nlh, void **xfr | |||
702 | err = verify_newpolicy_info(p); | 787 | err = verify_newpolicy_info(p); |
703 | if (err) | 788 | if (err) |
704 | return err; | 789 | return err; |
790 | err = verify_sec_ctx_len((struct rtattr **)xfrma); | ||
791 | if (err) | ||
792 | return err; | ||
705 | 793 | ||
706 | xp = xfrm_policy_construct(p, (struct rtattr **) xfrma, &err); | 794 | xp = xfrm_policy_construct(p, (struct rtattr **)xfrma, &err); |
707 | if (!xp) | 795 | if (!xp) |
708 | return err; | 796 | return err; |
709 | 797 | ||
@@ -761,6 +849,27 @@ rtattr_failure: | |||
761 | return -1; | 849 | return -1; |
762 | } | 850 | } |
763 | 851 | ||
852 | static int copy_to_user_sec_ctx(struct xfrm_policy *xp, struct sk_buff *skb) | ||
853 | { | ||
854 | if (xp->security) { | ||
855 | int ctx_size = sizeof(struct xfrm_sec_ctx) + | ||
856 | xp->security->ctx_len; | ||
857 | struct rtattr *rt = __RTA_PUT(skb, XFRMA_SEC_CTX, ctx_size); | ||
858 | struct xfrm_user_sec_ctx *uctx = RTA_DATA(rt); | ||
859 | |||
860 | uctx->exttype = XFRMA_SEC_CTX; | ||
861 | uctx->len = ctx_size; | ||
862 | uctx->ctx_doi = xp->security->ctx_doi; | ||
863 | uctx->ctx_alg = xp->security->ctx_alg; | ||
864 | uctx->ctx_len = xp->security->ctx_len; | ||
865 | memcpy(uctx + 1, xp->security->ctx_str, xp->security->ctx_len); | ||
866 | } | ||
867 | return 0; | ||
868 | |||
869 | rtattr_failure: | ||
870 | return -1; | ||
871 | } | ||
872 | |||
764 | static int dump_one_policy(struct xfrm_policy *xp, int dir, int count, void *ptr) | 873 | static int dump_one_policy(struct xfrm_policy *xp, int dir, int count, void *ptr) |
765 | { | 874 | { |
766 | struct xfrm_dump_info *sp = ptr; | 875 | struct xfrm_dump_info *sp = ptr; |
@@ -782,6 +891,8 @@ static int dump_one_policy(struct xfrm_policy *xp, int dir, int count, void *ptr | |||
782 | copy_to_user_policy(xp, p, dir); | 891 | copy_to_user_policy(xp, p, dir); |
783 | if (copy_to_user_tmpl(xp, skb) < 0) | 892 | if (copy_to_user_tmpl(xp, skb) < 0) |
784 | goto nlmsg_failure; | 893 | goto nlmsg_failure; |
894 | if (copy_to_user_sec_ctx(xp, skb)) | ||
895 | goto nlmsg_failure; | ||
785 | 896 | ||
786 | nlh->nlmsg_len = skb->tail - b; | 897 | nlh->nlmsg_len = skb->tail - b; |
787 | out: | 898 | out: |
@@ -852,8 +963,25 @@ static int xfrm_get_policy(struct sk_buff *skb, struct nlmsghdr *nlh, void **xfr | |||
852 | 963 | ||
853 | if (p->index) | 964 | if (p->index) |
854 | xp = xfrm_policy_byid(p->dir, p->index, delete); | 965 | xp = xfrm_policy_byid(p->dir, p->index, delete); |
855 | else | 966 | else { |
856 | xp = xfrm_policy_bysel(p->dir, &p->sel, delete); | 967 | struct rtattr **rtattrs = (struct rtattr **)xfrma; |
968 | struct rtattr *rt = rtattrs[XFRMA_SEC_CTX-1]; | ||
969 | struct xfrm_policy tmp; | ||
970 | |||
971 | err = verify_sec_ctx_len(rtattrs); | ||
972 | if (err) | ||
973 | return err; | ||
974 | |||
975 | memset(&tmp, 0, sizeof(struct xfrm_policy)); | ||
976 | if (rt) { | ||
977 | struct xfrm_user_sec_ctx *uctx = RTA_DATA(rt); | ||
978 | |||
979 | if ((err = security_xfrm_policy_alloc(&tmp, uctx))) | ||
980 | return err; | ||
981 | } | ||
982 | xp = xfrm_policy_bysel_ctx(p->dir, &p->sel, tmp.security, delete); | ||
983 | security_xfrm_policy_free(&tmp); | ||
984 | } | ||
857 | if (xp == NULL) | 985 | if (xp == NULL) |
858 | return -ENOENT; | 986 | return -ENOENT; |
859 | 987 | ||
@@ -1224,6 +1352,8 @@ static int build_acquire(struct sk_buff *skb, struct xfrm_state *x, | |||
1224 | 1352 | ||
1225 | if (copy_to_user_tmpl(xp, skb) < 0) | 1353 | if (copy_to_user_tmpl(xp, skb) < 0) |
1226 | goto nlmsg_failure; | 1354 | goto nlmsg_failure; |
1355 | if (copy_to_user_sec_ctx(xp, skb)) | ||
1356 | goto nlmsg_failure; | ||
1227 | 1357 | ||
1228 | nlh->nlmsg_len = skb->tail - b; | 1358 | nlh->nlmsg_len = skb->tail - b; |
1229 | return skb->len; | 1359 | return skb->len; |
@@ -1241,6 +1371,7 @@ static int xfrm_send_acquire(struct xfrm_state *x, struct xfrm_tmpl *xt, | |||
1241 | 1371 | ||
1242 | len = RTA_SPACE(sizeof(struct xfrm_user_tmpl) * xp->xfrm_nr); | 1372 | len = RTA_SPACE(sizeof(struct xfrm_user_tmpl) * xp->xfrm_nr); |
1243 | len += NLMSG_SPACE(sizeof(struct xfrm_user_acquire)); | 1373 | len += NLMSG_SPACE(sizeof(struct xfrm_user_acquire)); |
1374 | len += RTA_SPACE(xfrm_user_sec_ctx_size(xp)); | ||
1244 | skb = alloc_skb(len, GFP_ATOMIC); | 1375 | skb = alloc_skb(len, GFP_ATOMIC); |
1245 | if (skb == NULL) | 1376 | if (skb == NULL) |
1246 | return -ENOMEM; | 1377 | return -ENOMEM; |
@@ -1324,6 +1455,8 @@ static int build_polexpire(struct sk_buff *skb, struct xfrm_policy *xp, | |||
1324 | copy_to_user_policy(xp, &upe->pol, dir); | 1455 | copy_to_user_policy(xp, &upe->pol, dir); |
1325 | if (copy_to_user_tmpl(xp, skb) < 0) | 1456 | if (copy_to_user_tmpl(xp, skb) < 0) |
1326 | goto nlmsg_failure; | 1457 | goto nlmsg_failure; |
1458 | if (copy_to_user_sec_ctx(xp, skb)) | ||
1459 | goto nlmsg_failure; | ||
1327 | upe->hard = !!hard; | 1460 | upe->hard = !!hard; |
1328 | 1461 | ||
1329 | nlh->nlmsg_len = skb->tail - b; | 1462 | nlh->nlmsg_len = skb->tail - b; |
@@ -1341,6 +1474,7 @@ static int xfrm_exp_policy_notify(struct xfrm_policy *xp, int dir, struct km_eve | |||
1341 | 1474 | ||
1342 | len = RTA_SPACE(sizeof(struct xfrm_user_tmpl) * xp->xfrm_nr); | 1475 | len = RTA_SPACE(sizeof(struct xfrm_user_tmpl) * xp->xfrm_nr); |
1343 | len += NLMSG_SPACE(sizeof(struct xfrm_user_polexpire)); | 1476 | len += NLMSG_SPACE(sizeof(struct xfrm_user_polexpire)); |
1477 | len += RTA_SPACE(xfrm_user_sec_ctx_size(xp)); | ||
1344 | skb = alloc_skb(len, GFP_ATOMIC); | 1478 | skb = alloc_skb(len, GFP_ATOMIC); |
1345 | if (skb == NULL) | 1479 | if (skb == NULL) |
1346 | return -ENOMEM; | 1480 | return -ENOMEM; |