diff options
| author | Jeff Garzik <jgarzik@pobox.com> | 2005-11-10 04:12:10 -0500 | 
|---|---|---|
| committer | Jeff Garzik <jgarzik@pobox.com> | 2005-11-10 04:12:10 -0500 | 
| commit | 2f67bdb23d74a6c6fd4f98f64239c5c34d1833cc (patch) | |
| tree | fe533abe3e7c400848647b95e4806f5125c654c3 /net | |
| parent | d40d9d29c020f8466c96f8e3ad4b7c014ff1085d (diff) | |
| parent | 3b44f137b9a846c5452d9e6e1271b79b1dbcc942 (diff) | |
Merge branch 'master'
Diffstat (limited to 'net')
130 files changed, 10616 insertions, 2022 deletions
diff --git a/net/802/p8023.c b/net/802/p8023.c index 6368d3dce444..d23e906456eb 100644 --- a/net/802/p8023.c +++ b/net/802/p8023.c  | |||
| @@ -54,8 +54,7 @@ struct datalink_proto *make_8023_client(void) | |||
| 54 | */ | 54 | */ | 
| 55 | void destroy_8023_client(struct datalink_proto *dl) | 55 | void destroy_8023_client(struct datalink_proto *dl) | 
| 56 | { | 56 | { | 
| 57 | if (dl) | 57 | kfree(dl); | 
| 58 | kfree(dl); | ||
| 59 | } | 58 | } | 
| 60 | 59 | ||
| 61 | EXPORT_SYMBOL(destroy_8023_client); | 60 | EXPORT_SYMBOL(destroy_8023_client); | 
diff --git a/net/ax25/af_ax25.c b/net/ax25/af_ax25.c index 8e37e71e34ff..1b683f302657 100644 --- a/net/ax25/af_ax25.c +++ b/net/ax25/af_ax25.c  | |||
| @@ -1138,10 +1138,8 @@ static int ax25_connect(struct socket *sock, struct sockaddr *uaddr, | |||
| 1138 | sk->sk_state = TCP_CLOSE; | 1138 | sk->sk_state = TCP_CLOSE; | 
| 1139 | sock->state = SS_UNCONNECTED; | 1139 | sock->state = SS_UNCONNECTED; | 
| 1140 | 1140 | ||
| 1141 | if (ax25->digipeat != NULL) { | 1141 | kfree(ax25->digipeat); | 
| 1142 | kfree(ax25->digipeat); | 1142 | ax25->digipeat = NULL; | 
| 1143 | ax25->digipeat = NULL; | ||
| 1144 | } | ||
| 1145 | 1143 | ||
| 1146 | /* | 1144 | /* | 
| 1147 | * Handle digi-peaters to be used. | 1145 | * Handle digi-peaters to be used. | 
diff --git a/net/ax25/ax25_in.c b/net/ax25/ax25_in.c index 73cfc3411c46..4cf87540fb3a 100644 --- a/net/ax25/ax25_in.c +++ b/net/ax25/ax25_in.c  | |||
| @@ -401,10 +401,8 @@ static int ax25_rcv(struct sk_buff *skb, struct net_device *dev, | |||
| 401 | } | 401 | } | 
| 402 | 402 | ||
| 403 | if (dp.ndigi == 0) { | 403 | if (dp.ndigi == 0) { | 
| 404 | if (ax25->digipeat != NULL) { | 404 | kfree(ax25->digipeat); | 
| 405 | kfree(ax25->digipeat); | 405 | ax25->digipeat = NULL; | 
| 406 | ax25->digipeat = NULL; | ||
| 407 | } | ||
| 408 | } else { | 406 | } else { | 
| 409 | /* Reverse the source SABM's path */ | 407 | /* Reverse the source SABM's path */ | 
| 410 | memcpy(ax25->digipeat, &reverse_dp, sizeof(ax25_digi)); | 408 | memcpy(ax25->digipeat, &reverse_dp, sizeof(ax25_digi)); | 
diff --git a/net/ax25/ax25_route.c b/net/ax25/ax25_route.c index 26b77d972220..b1e945bd6ed3 100644 --- a/net/ax25/ax25_route.c +++ b/net/ax25/ax25_route.c  | |||
| @@ -54,15 +54,13 @@ void ax25_rt_device_down(struct net_device *dev) | |||
| 54 | if (s->dev == dev) { | 54 | if (s->dev == dev) { | 
| 55 | if (ax25_route_list == s) { | 55 | if (ax25_route_list == s) { | 
| 56 | ax25_route_list = s->next; | 56 | ax25_route_list = s->next; | 
| 57 | if (s->digipeat != NULL) | 57 | kfree(s->digipeat); | 
| 58 | kfree(s->digipeat); | ||
| 59 | kfree(s); | 58 | kfree(s); | 
| 60 | } else { | 59 | } else { | 
| 61 | for (t = ax25_route_list; t != NULL; t = t->next) { | 60 | for (t = ax25_route_list; t != NULL; t = t->next) { | 
| 62 | if (t->next == s) { | 61 | if (t->next == s) { | 
| 63 | t->next = s->next; | 62 | t->next = s->next; | 
| 64 | if (s->digipeat != NULL) | 63 | kfree(s->digipeat); | 
| 65 | kfree(s->digipeat); | ||
| 66 | kfree(s); | 64 | kfree(s); | 
| 67 | break; | 65 | break; | 
| 68 | } | 66 | } | 
| @@ -90,10 +88,8 @@ static int ax25_rt_add(struct ax25_routes_struct *route) | |||
| 90 | while (ax25_rt != NULL) { | 88 | while (ax25_rt != NULL) { | 
| 91 | if (ax25cmp(&ax25_rt->callsign, &route->dest_addr) == 0 && | 89 | if (ax25cmp(&ax25_rt->callsign, &route->dest_addr) == 0 && | 
| 92 | ax25_rt->dev == ax25_dev->dev) { | 90 | ax25_rt->dev == ax25_dev->dev) { | 
| 93 | if (ax25_rt->digipeat != NULL) { | 91 | kfree(ax25_rt->digipeat); | 
| 94 | kfree(ax25_rt->digipeat); | 92 | ax25_rt->digipeat = NULL; | 
| 95 | ax25_rt->digipeat = NULL; | ||
| 96 | } | ||
| 97 | if (route->digi_count != 0) { | 93 | if (route->digi_count != 0) { | 
| 98 | if ((ax25_rt->digipeat = kmalloc(sizeof(ax25_digi), GFP_ATOMIC)) == NULL) { | 94 | if ((ax25_rt->digipeat = kmalloc(sizeof(ax25_digi), GFP_ATOMIC)) == NULL) { | 
| 99 | write_unlock(&ax25_route_lock); | 95 | write_unlock(&ax25_route_lock); | 
| @@ -145,8 +141,7 @@ static int ax25_rt_add(struct ax25_routes_struct *route) | |||
| 145 | static void ax25_rt_destroy(ax25_route *ax25_rt) | 141 | static void ax25_rt_destroy(ax25_route *ax25_rt) | 
| 146 | { | 142 | { | 
| 147 | if (atomic_read(&ax25_rt->ref) == 0) { | 143 | if (atomic_read(&ax25_rt->ref) == 0) { | 
| 148 | if (ax25_rt->digipeat != NULL) | 144 | kfree(ax25_rt->digipeat); | 
| 149 | kfree(ax25_rt->digipeat); | ||
| 150 | kfree(ax25_rt); | 145 | kfree(ax25_rt); | 
| 151 | return; | 146 | return; | 
| 152 | } | 147 | } | 
| @@ -530,9 +525,7 @@ void __exit ax25_rt_free(void) | |||
| 530 | s = ax25_rt; | 525 | s = ax25_rt; | 
| 531 | ax25_rt = ax25_rt->next; | 526 | ax25_rt = ax25_rt->next; | 
| 532 | 527 | ||
| 533 | if (s->digipeat != NULL) | 528 | kfree(s->digipeat); | 
| 534 | kfree(s->digipeat); | ||
| 535 | |||
| 536 | kfree(s); | 529 | kfree(s); | 
| 537 | } | 530 | } | 
| 538 | write_unlock(&ax25_route_lock); | 531 | write_unlock(&ax25_route_lock); | 
diff --git a/net/bluetooth/af_bluetooth.c b/net/bluetooth/af_bluetooth.c index 03532062a46a..ea616e3fc98e 100644 --- a/net/bluetooth/af_bluetooth.c +++ b/net/bluetooth/af_bluetooth.c  | |||
| @@ -36,7 +36,6 @@ | |||
| 36 | #include <linux/skbuff.h> | 36 | #include <linux/skbuff.h> | 
| 37 | #include <linux/init.h> | 37 | #include <linux/init.h> | 
| 38 | #include <linux/poll.h> | 38 | #include <linux/poll.h> | 
| 39 | #include <linux/proc_fs.h> | ||
| 40 | #include <net/sock.h> | 39 | #include <net/sock.h> | 
| 41 | 40 | ||
| 42 | #if defined(CONFIG_KMOD) | 41 | #if defined(CONFIG_KMOD) | 
| @@ -50,10 +49,7 @@ | |||
| 50 | #define BT_DBG(D...) | 49 | #define BT_DBG(D...) | 
| 51 | #endif | 50 | #endif | 
| 52 | 51 | ||
| 53 | #define VERSION "2.7" | 52 | #define VERSION "2.8" | 
| 54 | |||
| 55 | struct proc_dir_entry *proc_bt; | ||
| 56 | EXPORT_SYMBOL(proc_bt); | ||
| 57 | 53 | ||
| 58 | /* Bluetooth sockets */ | 54 | /* Bluetooth sockets */ | 
| 59 | #define BT_MAX_PROTO 8 | 55 | #define BT_MAX_PROTO 8 | 
| @@ -312,10 +308,6 @@ static int __init bt_init(void) | |||
| 312 | { | 308 | { | 
| 313 | BT_INFO("Core ver %s", VERSION); | 309 | BT_INFO("Core ver %s", VERSION); | 
| 314 | 310 | ||
| 315 | proc_bt = proc_mkdir("bluetooth", NULL); | ||
| 316 | if (proc_bt) | ||
| 317 | proc_bt->owner = THIS_MODULE; | ||
| 318 | |||
| 319 | sock_register(&bt_sock_family_ops); | 311 | sock_register(&bt_sock_family_ops); | 
| 320 | 312 | ||
| 321 | BT_INFO("HCI device and connection manager initialized"); | 313 | BT_INFO("HCI device and connection manager initialized"); | 
| @@ -334,8 +326,6 @@ static void __exit bt_exit(void) | |||
| 334 | bt_sysfs_cleanup(); | 326 | bt_sysfs_cleanup(); | 
| 335 | 327 | ||
| 336 | sock_unregister(PF_BLUETOOTH); | 328 | sock_unregister(PF_BLUETOOTH); | 
| 337 | |||
| 338 | remove_proc_entry("bluetooth", NULL); | ||
| 339 | } | 329 | } | 
| 340 | 330 | ||
| 341 | subsys_initcall(bt_init); | 331 | subsys_initcall(bt_init); | 
diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c index cf0df1c8c933..9106354c781e 100644 --- a/net/bluetooth/hci_core.c +++ b/net/bluetooth/hci_core.c  | |||
| @@ -183,7 +183,7 @@ static void hci_reset_req(struct hci_dev *hdev, unsigned long opt) | |||
| 183 | static void hci_init_req(struct hci_dev *hdev, unsigned long opt) | 183 | static void hci_init_req(struct hci_dev *hdev, unsigned long opt) | 
| 184 | { | 184 | { | 
| 185 | struct sk_buff *skb; | 185 | struct sk_buff *skb; | 
| 186 | __u16 param; | 186 | __le16 param; | 
| 187 | 187 | ||
| 188 | BT_DBG("%s %ld", hdev->name, opt); | 188 | BT_DBG("%s %ld", hdev->name, opt); | 
| 189 | 189 | ||
diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c index b61b4e8e36fd..eb64555d1fb3 100644 --- a/net/bluetooth/hci_event.c +++ b/net/bluetooth/hci_event.c  | |||
| @@ -242,7 +242,7 @@ static void hci_cc_host_ctl(struct hci_dev *hdev, __u16 ocf, struct sk_buff *skb | |||
| 242 | break; | 242 | break; | 
| 243 | 243 | ||
| 244 | status = *((__u8 *) skb->data); | 244 | status = *((__u8 *) skb->data); | 
| 245 | setting = __le16_to_cpu(get_unaligned((__u16 *) sent)); | 245 | setting = __le16_to_cpu(get_unaligned((__le16 *) sent)); | 
| 246 | 246 | ||
| 247 | if (!status && hdev->voice_setting != setting) { | 247 | if (!status && hdev->voice_setting != setting) { | 
| 248 | hdev->voice_setting = setting; | 248 | hdev->voice_setting = setting; | 
| @@ -728,7 +728,7 @@ static inline void hci_disconn_complete_evt(struct hci_dev *hdev, struct sk_buff | |||
| 728 | static inline void hci_num_comp_pkts_evt(struct hci_dev *hdev, struct sk_buff *skb) | 728 | static inline void hci_num_comp_pkts_evt(struct hci_dev *hdev, struct sk_buff *skb) | 
| 729 | { | 729 | { | 
| 730 | struct hci_ev_num_comp_pkts *ev = (struct hci_ev_num_comp_pkts *) skb->data; | 730 | struct hci_ev_num_comp_pkts *ev = (struct hci_ev_num_comp_pkts *) skb->data; | 
| 731 | __u16 *ptr; | 731 | __le16 *ptr; | 
| 732 | int i; | 732 | int i; | 
| 733 | 733 | ||
| 734 | skb_pull(skb, sizeof(*ev)); | 734 | skb_pull(skb, sizeof(*ev)); | 
| @@ -742,7 +742,7 @@ static inline void hci_num_comp_pkts_evt(struct hci_dev *hdev, struct sk_buff *s | |||
| 742 | 742 | ||
| 743 | tasklet_disable(&hdev->tx_task); | 743 | tasklet_disable(&hdev->tx_task); | 
| 744 | 744 | ||
| 745 | for (i = 0, ptr = (__u16 *) skb->data; i < ev->num_hndl; i++) { | 745 | for (i = 0, ptr = (__le16 *) skb->data; i < ev->num_hndl; i++) { | 
| 746 | struct hci_conn *conn; | 746 | struct hci_conn *conn; | 
| 747 | __u16 handle, count; | 747 | __u16 handle, count; | 
| 748 | 748 | ||
diff --git a/net/bluetooth/hci_sock.c b/net/bluetooth/hci_sock.c index 799e448750ad..1d6d0a15c099 100644 --- a/net/bluetooth/hci_sock.c +++ b/net/bluetooth/hci_sock.c  | |||
| @@ -416,7 +416,7 @@ static int hci_sock_sendmsg(struct kiocb *iocb, struct socket *sock, | |||
| 416 | skb->dev = (void *) hdev; | 416 | skb->dev = (void *) hdev; | 
| 417 | 417 | ||
| 418 | if (bt_cb(skb)->pkt_type == HCI_COMMAND_PKT) { | 418 | if (bt_cb(skb)->pkt_type == HCI_COMMAND_PKT) { | 
| 419 | u16 opcode = __le16_to_cpu(get_unaligned((u16 *)skb->data)); | 419 | u16 opcode = __le16_to_cpu(get_unaligned((__le16 *) skb->data)); | 
| 420 | u16 ogf = hci_opcode_ogf(opcode); | 420 | u16 ogf = hci_opcode_ogf(opcode); | 
| 421 | u16 ocf = hci_opcode_ocf(opcode); | 421 | u16 ocf = hci_opcode_ocf(opcode); | 
| 422 | 422 | ||
diff --git a/net/bluetooth/hci_sysfs.c b/net/bluetooth/hci_sysfs.c index 7856bc26accb..bd7568ac87fc 100644 --- a/net/bluetooth/hci_sysfs.c +++ b/net/bluetooth/hci_sysfs.c  | |||
| @@ -103,7 +103,7 @@ static void bt_release(struct class_device *cdev) | |||
| 103 | kfree(hdev); | 103 | kfree(hdev); | 
| 104 | } | 104 | } | 
| 105 | 105 | ||
| 106 | static struct class bt_class = { | 106 | struct class bt_class = { | 
| 107 | .name = "bluetooth", | 107 | .name = "bluetooth", | 
| 108 | .release = bt_release, | 108 | .release = bt_release, | 
| 109 | #ifdef CONFIG_HOTPLUG | 109 | #ifdef CONFIG_HOTPLUG | 
| @@ -111,6 +111,8 @@ static struct class bt_class = { | |||
| 111 | #endif | 111 | #endif | 
| 112 | }; | 112 | }; | 
| 113 | 113 | ||
| 114 | EXPORT_SYMBOL_GPL(bt_class); | ||
| 115 | |||
| 114 | int hci_register_sysfs(struct hci_dev *hdev) | 116 | int hci_register_sysfs(struct hci_dev *hdev) | 
| 115 | { | 117 | { | 
| 116 | struct class_device *cdev = &hdev->class_dev; | 118 | struct class_device *cdev = &hdev->class_dev; | 
diff --git a/net/bluetooth/hidp/core.c b/net/bluetooth/hidp/core.c index 860444a7fc0f..cdb9cfafd960 100644 --- a/net/bluetooth/hidp/core.c +++ b/net/bluetooth/hidp/core.c  | |||
| @@ -660,9 +660,7 @@ unlink: | |||
| 660 | failed: | 660 | failed: | 
| 661 | up_write(&hidp_session_sem); | 661 | up_write(&hidp_session_sem); | 
| 662 | 662 | ||
| 663 | if (session->input) | 663 | kfree(session->input); | 
| 664 | kfree(session->input); | ||
| 665 | |||
| 666 | kfree(session); | 664 | kfree(session); | 
| 667 | return err; | 665 | return err; | 
| 668 | } | 666 | } | 
diff --git a/net/bluetooth/l2cap.c b/net/bluetooth/l2cap.c index 59b2dd36baa7..e3bb11ca4235 100644 --- a/net/bluetooth/l2cap.c +++ b/net/bluetooth/l2cap.c  | |||
| @@ -38,9 +38,8 @@ | |||
| 38 | #include <linux/interrupt.h> | 38 | #include <linux/interrupt.h> | 
| 39 | #include <linux/socket.h> | 39 | #include <linux/socket.h> | 
| 40 | #include <linux/skbuff.h> | 40 | #include <linux/skbuff.h> | 
| 41 | #include <linux/proc_fs.h> | ||
| 42 | #include <linux/seq_file.h> | ||
| 43 | #include <linux/list.h> | 41 | #include <linux/list.h> | 
| 42 | #include <linux/device.h> | ||
| 44 | #include <net/sock.h> | 43 | #include <net/sock.h> | 
| 45 | 44 | ||
| 46 | #include <asm/system.h> | 45 | #include <asm/system.h> | 
| @@ -56,7 +55,7 @@ | |||
| 56 | #define BT_DBG(D...) | 55 | #define BT_DBG(D...) | 
| 57 | #endif | 56 | #endif | 
| 58 | 57 | ||
| 59 | #define VERSION "2.7" | 58 | #define VERSION "2.8" | 
| 60 | 59 | ||
| 61 | static struct proto_ops l2cap_sock_ops; | 60 | static struct proto_ops l2cap_sock_ops; | 
| 62 | 61 | ||
| @@ -2137,94 +2136,29 @@ drop: | |||
| 2137 | return 0; | 2136 | return 0; | 
| 2138 | } | 2137 | } | 
| 2139 | 2138 | ||
| 2140 | /* ---- Proc fs support ---- */ | 2139 | static ssize_t l2cap_sysfs_show(struct class *dev, char *buf) | 
| 2141 | #ifdef CONFIG_PROC_FS | ||
| 2142 | static void *l2cap_seq_start(struct seq_file *seq, loff_t *pos) | ||
| 2143 | { | 2140 | { | 
| 2144 | struct sock *sk; | 2141 | struct sock *sk; | 
| 2145 | struct hlist_node *node; | 2142 | struct hlist_node *node; | 
| 2146 | loff_t l = *pos; | 2143 | char *str = buf; | 
| 2147 | 2144 | ||
| 2148 | read_lock_bh(&l2cap_sk_list.lock); | 2145 | read_lock_bh(&l2cap_sk_list.lock); | 
| 2149 | 2146 | ||
| 2150 | sk_for_each(sk, node, &l2cap_sk_list.head) | 2147 | sk_for_each(sk, node, &l2cap_sk_list.head) { | 
| 2151 | if (!l--) | 2148 | struct l2cap_pinfo *pi = l2cap_pi(sk); | 
| 2152 | goto found; | ||
| 2153 | sk = NULL; | ||
| 2154 | found: | ||
| 2155 | return sk; | ||
| 2156 | } | ||
| 2157 | 2149 | ||
| 2158 | static void *l2cap_seq_next(struct seq_file *seq, void *e, loff_t *pos) | 2150 | str += sprintf(str, "%s %s %d %d 0x%4.4x 0x%4.4x %d %d 0x%x\n", | 
| 2159 | { | 2151 | batostr(&bt_sk(sk)->src), batostr(&bt_sk(sk)->dst), | 
| 2160 | (*pos)++; | 2152 | sk->sk_state, pi->psm, pi->scid, pi->dcid, pi->imtu, | 
| 2161 | return sk_next(e); | 2153 | pi->omtu, pi->link_mode); | 
| 2162 | } | 2154 | } | 
| 2163 | 2155 | ||
| 2164 | static void l2cap_seq_stop(struct seq_file *seq, void *e) | ||
| 2165 | { | ||
| 2166 | read_unlock_bh(&l2cap_sk_list.lock); | 2156 | read_unlock_bh(&l2cap_sk_list.lock); | 
| 2167 | } | ||
| 2168 | 2157 | ||
| 2169 | static int l2cap_seq_show(struct seq_file *seq, void *e) | 2158 | return (str - buf); | 
| 2170 | { | ||
| 2171 | struct sock *sk = e; | ||
| 2172 | struct l2cap_pinfo *pi = l2cap_pi(sk); | ||
| 2173 | |||
| 2174 | seq_printf(seq, "%s %s %d %d 0x%4.4x 0x%4.4x %d %d 0x%x\n", | ||
| 2175 | batostr(&bt_sk(sk)->src), batostr(&bt_sk(sk)->dst), | ||
| 2176 | sk->sk_state, pi->psm, pi->scid, pi->dcid, pi->imtu, | ||
| 2177 | pi->omtu, pi->link_mode); | ||
| 2178 | return 0; | ||
| 2179 | } | 2159 | } | 
| 2180 | 2160 | ||
| 2181 | static struct seq_operations l2cap_seq_ops = { | 2161 | static CLASS_ATTR(l2cap, S_IRUGO, l2cap_sysfs_show, NULL); | 
| 2182 | .start = l2cap_seq_start, | ||
| 2183 | .next = l2cap_seq_next, | ||
| 2184 | .stop = l2cap_seq_stop, | ||
| 2185 | .show = l2cap_seq_show | ||
| 2186 | }; | ||
| 2187 | |||
| 2188 | static int l2cap_seq_open(struct inode *inode, struct file *file) | ||
| 2189 | { | ||
| 2190 | return seq_open(file, &l2cap_seq_ops); | ||
| 2191 | } | ||
| 2192 | |||
| 2193 | static struct file_operations l2cap_seq_fops = { | ||
| 2194 | .owner = THIS_MODULE, | ||
| 2195 | .open = l2cap_seq_open, | ||
| 2196 | .read = seq_read, | ||
| 2197 | .llseek = seq_lseek, | ||
| 2198 | .release = seq_release, | ||
| 2199 | }; | ||
| 2200 | |||
| 2201 | static int __init l2cap_proc_init(void) | ||
| 2202 | { | ||
| 2203 | struct proc_dir_entry *p = create_proc_entry("l2cap", S_IRUGO, proc_bt); | ||
| 2204 | if (!p) | ||
| 2205 | return -ENOMEM; | ||
| 2206 | p->owner = THIS_MODULE; | ||
| 2207 | p->proc_fops = &l2cap_seq_fops; | ||
| 2208 | return 0; | ||
| 2209 | } | ||
| 2210 | |||
| 2211 | static void __exit l2cap_proc_cleanup(void) | ||
| 2212 | { | ||
| 2213 | remove_proc_entry("l2cap", proc_bt); | ||
| 2214 | } | ||
| 2215 | |||
| 2216 | #else /* CONFIG_PROC_FS */ | ||
| 2217 | |||
| 2218 | static int __init l2cap_proc_init(void) | ||
| 2219 | { | ||
| 2220 | return 0; | ||
| 2221 | } | ||
| 2222 | |||
| 2223 | static void __exit l2cap_proc_cleanup(void) | ||
| 2224 | { | ||
| 2225 | return; | ||
| 2226 | } | ||
| 2227 | #endif /* CONFIG_PROC_FS */ | ||
| 2228 | 2162 | ||
| 2229 | static struct proto_ops l2cap_sock_ops = { | 2163 | static struct proto_ops l2cap_sock_ops = { | 
| 2230 | .family = PF_BLUETOOTH, | 2164 | .family = PF_BLUETOOTH, | 
| @@ -2266,7 +2200,7 @@ static struct hci_proto l2cap_hci_proto = { | |||
| 2266 | static int __init l2cap_init(void) | 2200 | static int __init l2cap_init(void) | 
| 2267 | { | 2201 | { | 
| 2268 | int err; | 2202 | int err; | 
| 2269 | 2203 | ||
| 2270 | err = proto_register(&l2cap_proto, 0); | 2204 | err = proto_register(&l2cap_proto, 0); | 
| 2271 | if (err < 0) | 2205 | if (err < 0) | 
| 2272 | return err; | 2206 | return err; | 
| @@ -2284,7 +2218,7 @@ static int __init l2cap_init(void) | |||
| 2284 | goto error; | 2218 | goto error; | 
| 2285 | } | 2219 | } | 
| 2286 | 2220 | ||
| 2287 | l2cap_proc_init(); | 2221 | class_create_file(&bt_class, &class_attr_l2cap); | 
| 2288 | 2222 | ||
| 2289 | BT_INFO("L2CAP ver %s", VERSION); | 2223 | BT_INFO("L2CAP ver %s", VERSION); | 
| 2290 | BT_INFO("L2CAP socket layer initialized"); | 2224 | BT_INFO("L2CAP socket layer initialized"); | 
| @@ -2298,7 +2232,7 @@ error: | |||
| 2298 | 2232 | ||
| 2299 | static void __exit l2cap_exit(void) | 2233 | static void __exit l2cap_exit(void) | 
| 2300 | { | 2234 | { | 
| 2301 | l2cap_proc_cleanup(); | 2235 | class_remove_file(&bt_class, &class_attr_l2cap); | 
| 2302 | 2236 | ||
| 2303 | if (bt_sock_unregister(BTPROTO_L2CAP) < 0) | 2237 | if (bt_sock_unregister(BTPROTO_L2CAP) < 0) | 
| 2304 | BT_ERR("L2CAP socket unregistration failed"); | 2238 | BT_ERR("L2CAP socket unregistration failed"); | 
diff --git a/net/bluetooth/rfcomm/core.c b/net/bluetooth/rfcomm/core.c index c3d56ead840c..0d89d6434136 100644 --- a/net/bluetooth/rfcomm/core.c +++ b/net/bluetooth/rfcomm/core.c  | |||
| @@ -35,9 +35,8 @@ | |||
| 35 | #include <linux/signal.h> | 35 | #include <linux/signal.h> | 
| 36 | #include <linux/init.h> | 36 | #include <linux/init.h> | 
| 37 | #include <linux/wait.h> | 37 | #include <linux/wait.h> | 
| 38 | #include <linux/device.h> | ||
| 38 | #include <linux/net.h> | 39 | #include <linux/net.h> | 
| 39 | #include <linux/proc_fs.h> | ||
| 40 | #include <linux/seq_file.h> | ||
| 41 | #include <net/sock.h> | 40 | #include <net/sock.h> | 
| 42 | #include <asm/uaccess.h> | 41 | #include <asm/uaccess.h> | 
| 43 | #include <asm/unaligned.h> | 42 | #include <asm/unaligned.h> | 
| @@ -47,17 +46,13 @@ | |||
| 47 | #include <net/bluetooth/l2cap.h> | 46 | #include <net/bluetooth/l2cap.h> | 
| 48 | #include <net/bluetooth/rfcomm.h> | 47 | #include <net/bluetooth/rfcomm.h> | 
| 49 | 48 | ||
| 50 | #define VERSION "1.5" | 49 | #define VERSION "1.6" | 
| 51 | 50 | ||
| 52 | #ifndef CONFIG_BT_RFCOMM_DEBUG | 51 | #ifndef CONFIG_BT_RFCOMM_DEBUG | 
| 53 | #undef BT_DBG | 52 | #undef BT_DBG | 
| 54 | #define BT_DBG(D...) | 53 | #define BT_DBG(D...) | 
| 55 | #endif | 54 | #endif | 
| 56 | 55 | ||
| 57 | #ifdef CONFIG_PROC_FS | ||
| 58 | struct proc_dir_entry *proc_bt_rfcomm; | ||
| 59 | #endif | ||
| 60 | |||
| 61 | static struct task_struct *rfcomm_thread; | 56 | static struct task_struct *rfcomm_thread; | 
| 62 | 57 | ||
| 63 | static DECLARE_MUTEX(rfcomm_sem); | 58 | static DECLARE_MUTEX(rfcomm_sem); | 
| @@ -2001,117 +1996,32 @@ static struct hci_cb rfcomm_cb = { | |||
| 2001 | .encrypt_cfm = rfcomm_encrypt_cfm | 1996 | .encrypt_cfm = rfcomm_encrypt_cfm | 
| 2002 | }; | 1997 | }; | 
| 2003 | 1998 | ||
| 2004 | /* ---- Proc fs support ---- */ | 1999 | static ssize_t rfcomm_dlc_sysfs_show(struct class *dev, char *buf) | 
| 2005 | #ifdef CONFIG_PROC_FS | ||
| 2006 | static void *rfcomm_seq_start(struct seq_file *seq, loff_t *pos) | ||
| 2007 | { | 2000 | { | 
| 2008 | struct rfcomm_session *s; | 2001 | struct rfcomm_session *s; | 
| 2009 | struct list_head *pp, *p; | 2002 | struct list_head *pp, *p; | 
| 2010 | loff_t l = *pos; | 2003 | char *str = buf; | 
| 2011 | 2004 | ||
| 2012 | rfcomm_lock(); | 2005 | rfcomm_lock(); | 
| 2013 | 2006 | ||
| 2014 | list_for_each(p, &session_list) { | 2007 | list_for_each(p, &session_list) { | 
| 2015 | s = list_entry(p, struct rfcomm_session, list); | 2008 | s = list_entry(p, struct rfcomm_session, list); | 
| 2016 | list_for_each(pp, &s->dlcs) | 2009 | list_for_each(pp, &s->dlcs) { | 
| 2017 | if (!l--) { | 2010 | struct sock *sk = s->sock->sk; | 
| 2018 | seq->private = s; | 2011 | struct rfcomm_dlc *d = list_entry(pp, struct rfcomm_dlc, list); | 
| 2019 | return pp; | ||
| 2020 | } | ||
| 2021 | } | ||
| 2022 | return NULL; | ||
| 2023 | } | ||
| 2024 | 2012 | ||
| 2025 | static void *rfcomm_seq_next(struct seq_file *seq, void *e, loff_t *pos) | 2013 | str += sprintf(str, "%s %s %ld %d %d %d %d\n", | 
| 2026 | { | 2014 | batostr(&bt_sk(sk)->src), batostr(&bt_sk(sk)->dst), | 
| 2027 | struct rfcomm_session *s = seq->private; | 2015 | d->state, d->dlci, d->mtu, d->rx_credits, d->tx_credits); | 
| 2028 | struct list_head *pp, *p = e; | ||
| 2029 | (*pos)++; | ||
| 2030 | |||
| 2031 | if (p->next != &s->dlcs) | ||
| 2032 | return p->next; | ||
| 2033 | |||
| 2034 | list_for_each(p, &session_list) { | ||
| 2035 | s = list_entry(p, struct rfcomm_session, list); | ||
| 2036 | __list_for_each(pp, &s->dlcs) { | ||
| 2037 | seq->private = s; | ||
| 2038 | return pp; | ||
| 2039 | } | 2016 | } | 
| 2040 | } | 2017 | } | 
| 2041 | return NULL; | ||
| 2042 | } | ||
| 2043 | 2018 | ||
| 2044 | static void rfcomm_seq_stop(struct seq_file *seq, void *e) | ||
| 2045 | { | ||
| 2046 | rfcomm_unlock(); | 2019 | rfcomm_unlock(); | 
| 2047 | } | ||
| 2048 | |||
| 2049 | static int rfcomm_seq_show(struct seq_file *seq, void *e) | ||
| 2050 | { | ||
| 2051 | struct rfcomm_session *s = seq->private; | ||
| 2052 | struct sock *sk = s->sock->sk; | ||
| 2053 | struct rfcomm_dlc *d = list_entry(e, struct rfcomm_dlc, list); | ||
| 2054 | |||
| 2055 | seq_printf(seq, "%s %s %ld %d %d %d %d\n", | ||
| 2056 | batostr(&bt_sk(sk)->src), batostr(&bt_sk(sk)->dst), | ||
| 2057 | d->state, d->dlci, d->mtu, d->rx_credits, d->tx_credits); | ||
| 2058 | return 0; | ||
| 2059 | } | ||
| 2060 | |||
| 2061 | static struct seq_operations rfcomm_seq_ops = { | ||
| 2062 | .start = rfcomm_seq_start, | ||
| 2063 | .next = rfcomm_seq_next, | ||
| 2064 | .stop = rfcomm_seq_stop, | ||
| 2065 | .show = rfcomm_seq_show | ||
| 2066 | }; | ||
| 2067 | |||
| 2068 | static int rfcomm_seq_open(struct inode *inode, struct file *file) | ||
| 2069 | { | ||
| 2070 | return seq_open(file, &rfcomm_seq_ops); | ||
| 2071 | } | ||
| 2072 | |||
| 2073 | static struct file_operations rfcomm_seq_fops = { | ||
| 2074 | .owner = THIS_MODULE, | ||
| 2075 | .open = rfcomm_seq_open, | ||
| 2076 | .read = seq_read, | ||
| 2077 | .llseek = seq_lseek, | ||
| 2078 | .release = seq_release, | ||
| 2079 | }; | ||
| 2080 | |||
| 2081 | static int __init rfcomm_proc_init(void) | ||
| 2082 | { | ||
| 2083 | struct proc_dir_entry *p; | ||
| 2084 | |||
| 2085 | proc_bt_rfcomm = proc_mkdir("rfcomm", proc_bt); | ||
| 2086 | if (proc_bt_rfcomm) { | ||
| 2087 | proc_bt_rfcomm->owner = THIS_MODULE; | ||
| 2088 | |||
| 2089 | p = create_proc_entry("dlc", S_IRUGO, proc_bt_rfcomm); | ||
| 2090 | if (p) | ||
| 2091 | p->proc_fops = &rfcomm_seq_fops; | ||
| 2092 | } | ||
| 2093 | return 0; | ||
| 2094 | } | ||
| 2095 | |||
| 2096 | static void __exit rfcomm_proc_cleanup(void) | ||
| 2097 | { | ||
| 2098 | remove_proc_entry("dlc", proc_bt_rfcomm); | ||
| 2099 | 2020 | ||
| 2100 | remove_proc_entry("rfcomm", proc_bt); | 2021 | return (str - buf); | 
| 2101 | } | 2022 | } | 
| 2102 | 2023 | ||
| 2103 | #else /* CONFIG_PROC_FS */ | 2024 | static CLASS_ATTR(rfcomm_dlc, S_IRUGO, rfcomm_dlc_sysfs_show, NULL); | 
| 2104 | |||
| 2105 | static int __init rfcomm_proc_init(void) | ||
| 2106 | { | ||
| 2107 | return 0; | ||
| 2108 | } | ||
| 2109 | |||
| 2110 | static void __exit rfcomm_proc_cleanup(void) | ||
| 2111 | { | ||
| 2112 | return; | ||
| 2113 | } | ||
| 2114 | #endif /* CONFIG_PROC_FS */ | ||
| 2115 | 2025 | ||
| 2116 | /* ---- Initialization ---- */ | 2026 | /* ---- Initialization ---- */ | 
| 2117 | static int __init rfcomm_init(void) | 2027 | static int __init rfcomm_init(void) | 
| @@ -2122,9 +2032,7 @@ static int __init rfcomm_init(void) | |||
| 2122 | 2032 | ||
| 2123 | kernel_thread(rfcomm_run, NULL, CLONE_KERNEL); | 2033 | kernel_thread(rfcomm_run, NULL, CLONE_KERNEL); | 
| 2124 | 2034 | ||
| 2125 | BT_INFO("RFCOMM ver %s", VERSION); | 2035 | class_create_file(&bt_class, &class_attr_rfcomm_dlc); | 
| 2126 | |||
| 2127 | rfcomm_proc_init(); | ||
| 2128 | 2036 | ||
| 2129 | rfcomm_init_sockets(); | 2037 | rfcomm_init_sockets(); | 
| 2130 | 2038 | ||
| @@ -2132,11 +2040,15 @@ static int __init rfcomm_init(void) | |||
| 2132 | rfcomm_init_ttys(); | 2040 | rfcomm_init_ttys(); | 
| 2133 | #endif | 2041 | #endif | 
| 2134 | 2042 | ||
| 2043 | BT_INFO("RFCOMM ver %s", VERSION); | ||
| 2044 | |||
| 2135 | return 0; | 2045 | return 0; | 
| 2136 | } | 2046 | } | 
| 2137 | 2047 | ||
| 2138 | static void __exit rfcomm_exit(void) | 2048 | static void __exit rfcomm_exit(void) | 
| 2139 | { | 2049 | { | 
| 2050 | class_remove_file(&bt_class, &class_attr_rfcomm_dlc); | ||
| 2051 | |||
| 2140 | hci_unregister_cb(&rfcomm_cb); | 2052 | hci_unregister_cb(&rfcomm_cb); | 
| 2141 | 2053 | ||
| 2142 | /* Terminate working thread. | 2054 | /* Terminate working thread. | 
| @@ -2153,8 +2065,6 @@ static void __exit rfcomm_exit(void) | |||
| 2153 | #endif | 2065 | #endif | 
| 2154 | 2066 | ||
| 2155 | rfcomm_cleanup_sockets(); | 2067 | rfcomm_cleanup_sockets(); | 
| 2156 | |||
| 2157 | rfcomm_proc_cleanup(); | ||
| 2158 | } | 2068 | } | 
| 2159 | 2069 | ||
| 2160 | module_init(rfcomm_init); | 2070 | module_init(rfcomm_init); | 
diff --git a/net/bluetooth/rfcomm/sock.c b/net/bluetooth/rfcomm/sock.c index a2b30f0aedb7..6c34261b232e 100644 --- a/net/bluetooth/rfcomm/sock.c +++ b/net/bluetooth/rfcomm/sock.c  | |||
| @@ -42,8 +42,7 @@ | |||
| 42 | #include <linux/socket.h> | 42 | #include <linux/socket.h> | 
| 43 | #include <linux/skbuff.h> | 43 | #include <linux/skbuff.h> | 
| 44 | #include <linux/list.h> | 44 | #include <linux/list.h> | 
| 45 | #include <linux/proc_fs.h> | 45 | #include <linux/device.h> | 
| 46 | #include <linux/seq_file.h> | ||
| 47 | #include <net/sock.h> | 46 | #include <net/sock.h> | 
| 48 | 47 | ||
| 49 | #include <asm/system.h> | 48 | #include <asm/system.h> | 
| @@ -887,89 +886,26 @@ done: | |||
| 887 | return result; | 886 | return result; | 
| 888 | } | 887 | } | 
| 889 | 888 | ||
| 890 | /* ---- Proc fs support ---- */ | 889 | static ssize_t rfcomm_sock_sysfs_show(struct class *dev, char *buf) | 
| 891 | #ifdef CONFIG_PROC_FS | ||
| 892 | static void *rfcomm_seq_start(struct seq_file *seq, loff_t *pos) | ||
| 893 | { | 890 | { | 
| 894 | struct sock *sk; | 891 | struct sock *sk; | 
| 895 | struct hlist_node *node; | 892 | struct hlist_node *node; | 
| 896 | loff_t l = *pos; | 893 | char *str = buf; | 
| 897 | 894 | ||
| 898 | read_lock_bh(&rfcomm_sk_list.lock); | 895 | read_lock_bh(&rfcomm_sk_list.lock); | 
| 899 | 896 | ||
| 900 | sk_for_each(sk, node, &rfcomm_sk_list.head) | 897 | sk_for_each(sk, node, &rfcomm_sk_list.head) { | 
| 901 | if (!l--) | 898 | str += sprintf(str, "%s %s %d %d\n", | 
| 902 | return sk; | 899 | batostr(&bt_sk(sk)->src), batostr(&bt_sk(sk)->dst), | 
| 903 | return NULL; | 900 | sk->sk_state, rfcomm_pi(sk)->channel); | 
| 904 | } | 901 | } | 
| 905 | |||
| 906 | static void *rfcomm_seq_next(struct seq_file *seq, void *e, loff_t *pos) | ||
| 907 | { | ||
| 908 | struct sock *sk = e; | ||
| 909 | (*pos)++; | ||
| 910 | return sk_next(sk); | ||
| 911 | } | ||
| 912 | 902 | ||
| 913 | static void rfcomm_seq_stop(struct seq_file *seq, void *e) | ||
| 914 | { | ||
| 915 | read_unlock_bh(&rfcomm_sk_list.lock); | 903 | read_unlock_bh(&rfcomm_sk_list.lock); | 
| 916 | } | ||
| 917 | 904 | ||
| 918 | static int rfcomm_seq_show(struct seq_file *seq, void *e) | 905 | return (str - buf); | 
| 919 | { | ||
| 920 | struct sock *sk = e; | ||
| 921 | seq_printf(seq, "%s %s %d %d\n", | ||
| 922 | batostr(&bt_sk(sk)->src), batostr(&bt_sk(sk)->dst), | ||
| 923 | sk->sk_state, rfcomm_pi(sk)->channel); | ||
| 924 | return 0; | ||
| 925 | } | ||
| 926 | |||
| 927 | static struct seq_operations rfcomm_seq_ops = { | ||
| 928 | .start = rfcomm_seq_start, | ||
| 929 | .next = rfcomm_seq_next, | ||
| 930 | .stop = rfcomm_seq_stop, | ||
| 931 | .show = rfcomm_seq_show | ||
| 932 | }; | ||
| 933 | |||
| 934 | static int rfcomm_seq_open(struct inode *inode, struct file *file) | ||
| 935 | { | ||
| 936 | return seq_open(file, &rfcomm_seq_ops); | ||
| 937 | } | 906 | } | 
| 938 | 907 | ||
| 939 | static struct file_operations rfcomm_seq_fops = { | 908 | static CLASS_ATTR(rfcomm, S_IRUGO, rfcomm_sock_sysfs_show, NULL); | 
| 940 | .owner = THIS_MODULE, | ||
| 941 | .open = rfcomm_seq_open, | ||
| 942 | .read = seq_read, | ||
| 943 | .llseek = seq_lseek, | ||
| 944 | .release = seq_release, | ||
| 945 | }; | ||
| 946 | |||
| 947 | static int __init rfcomm_sock_proc_init(void) | ||
| 948 | { | ||
| 949 | struct proc_dir_entry *p = create_proc_entry("sock", S_IRUGO, proc_bt_rfcomm); | ||
| 950 | if (!p) | ||
| 951 | return -ENOMEM; | ||
| 952 | p->proc_fops = &rfcomm_seq_fops; | ||
| 953 | return 0; | ||
| 954 | } | ||
| 955 | |||
| 956 | static void __exit rfcomm_sock_proc_cleanup(void) | ||
| 957 | { | ||
| 958 | remove_proc_entry("sock", proc_bt_rfcomm); | ||
| 959 | } | ||
| 960 | |||
| 961 | #else /* CONFIG_PROC_FS */ | ||
| 962 | |||
| 963 | static int __init rfcomm_sock_proc_init(void) | ||
| 964 | { | ||
| 965 | return 0; | ||
| 966 | } | ||
| 967 | |||
| 968 | static void __exit rfcomm_sock_proc_cleanup(void) | ||
| 969 | { | ||
| 970 | return; | ||
| 971 | } | ||
| 972 | #endif /* CONFIG_PROC_FS */ | ||
| 973 | 909 | ||
| 974 | static struct proto_ops rfcomm_sock_ops = { | 910 | static struct proto_ops rfcomm_sock_ops = { | 
| 975 | .family = PF_BLUETOOTH, | 911 | .family = PF_BLUETOOTH, | 
| @@ -997,7 +933,7 @@ static struct net_proto_family rfcomm_sock_family_ops = { | |||
| 997 | .create = rfcomm_sock_create | 933 | .create = rfcomm_sock_create | 
| 998 | }; | 934 | }; | 
| 999 | 935 | ||
| 1000 | int __init rfcomm_init_sockets(void) | 936 | int __init rfcomm_init_sockets(void) | 
| 1001 | { | 937 | { | 
| 1002 | int err; | 938 | int err; | 
| 1003 | 939 | ||
| @@ -1009,7 +945,7 @@ int __init rfcomm_init_sockets(void) | |||
| 1009 | if (err < 0) | 945 | if (err < 0) | 
| 1010 | goto error; | 946 | goto error; | 
| 1011 | 947 | ||
| 1012 | rfcomm_sock_proc_init(); | 948 | class_create_file(&bt_class, &class_attr_rfcomm); | 
| 1013 | 949 | ||
| 1014 | BT_INFO("RFCOMM socket layer initialized"); | 950 | BT_INFO("RFCOMM socket layer initialized"); | 
| 1015 | 951 | ||
| @@ -1023,7 +959,7 @@ error: | |||
| 1023 | 959 | ||
| 1024 | void __exit rfcomm_cleanup_sockets(void) | 960 | void __exit rfcomm_cleanup_sockets(void) | 
| 1025 | { | 961 | { | 
| 1026 | rfcomm_sock_proc_cleanup(); | 962 | class_remove_file(&bt_class, &class_attr_rfcomm); | 
| 1027 | 963 | ||
| 1028 | if (bt_sock_unregister(BTPROTO_RFCOMM) < 0) | 964 | if (bt_sock_unregister(BTPROTO_RFCOMM) < 0) | 
| 1029 | BT_ERR("RFCOMM socket layer unregistration failed"); | 965 | BT_ERR("RFCOMM socket layer unregistration failed"); | 
diff --git a/net/bluetooth/sco.c b/net/bluetooth/sco.c index 997e42df115c..9cb00dc6c08c 100644 --- a/net/bluetooth/sco.c +++ b/net/bluetooth/sco.c  | |||
| @@ -38,8 +38,7 @@ | |||
| 38 | #include <linux/interrupt.h> | 38 | #include <linux/interrupt.h> | 
| 39 | #include <linux/socket.h> | 39 | #include <linux/socket.h> | 
| 40 | #include <linux/skbuff.h> | 40 | #include <linux/skbuff.h> | 
| 41 | #include <linux/proc_fs.h> | 41 | #include <linux/device.h> | 
| 42 | #include <linux/seq_file.h> | ||
| 43 | #include <linux/list.h> | 42 | #include <linux/list.h> | 
| 44 | #include <net/sock.h> | 43 | #include <net/sock.h> | 
| 45 | 44 | ||
| @@ -55,7 +54,7 @@ | |||
| 55 | #define BT_DBG(D...) | 54 | #define BT_DBG(D...) | 
| 56 | #endif | 55 | #endif | 
| 57 | 56 | ||
| 58 | #define VERSION "0.4" | 57 | #define VERSION "0.5" | 
| 59 | 58 | ||
| 60 | static struct proto_ops sco_sock_ops; | 59 | static struct proto_ops sco_sock_ops; | 
| 61 | 60 | ||
| @@ -893,91 +892,26 @@ drop: | |||
| 893 | return 0; | 892 | return 0; | 
| 894 | } | 893 | } | 
| 895 | 894 | ||
| 896 | /* ---- Proc fs support ---- */ | 895 | static ssize_t sco_sysfs_show(struct class *dev, char *buf) | 
| 897 | #ifdef CONFIG_PROC_FS | ||
| 898 | static void *sco_seq_start(struct seq_file *seq, loff_t *pos) | ||
| 899 | { | 896 | { | 
| 900 | struct sock *sk; | 897 | struct sock *sk; | 
| 901 | struct hlist_node *node; | 898 | struct hlist_node *node; | 
| 902 | loff_t l = *pos; | 899 | char *str = buf; | 
| 903 | 900 | ||
| 904 | read_lock_bh(&sco_sk_list.lock); | 901 | read_lock_bh(&sco_sk_list.lock); | 
| 905 | 902 | ||
| 906 | sk_for_each(sk, node, &sco_sk_list.head) | 903 | sk_for_each(sk, node, &sco_sk_list.head) { | 
| 907 | if (!l--) | 904 | str += sprintf(str, "%s %s %d\n", | 
| 908 | goto found; | 905 | batostr(&bt_sk(sk)->src), batostr(&bt_sk(sk)->dst), | 
| 909 | sk = NULL; | 906 | sk->sk_state); | 
| 910 | found: | 907 | } | 
| 911 | return sk; | ||
| 912 | } | ||
| 913 | |||
| 914 | static void *sco_seq_next(struct seq_file *seq, void *e, loff_t *pos) | ||
| 915 | { | ||
| 916 | struct sock *sk = e; | ||
| 917 | (*pos)++; | ||
| 918 | return sk_next(sk); | ||
| 919 | } | ||
| 920 | 908 | ||
| 921 | static void sco_seq_stop(struct seq_file *seq, void *e) | ||
| 922 | { | ||
| 923 | read_unlock_bh(&sco_sk_list.lock); | 909 | read_unlock_bh(&sco_sk_list.lock); | 
| 924 | } | ||
| 925 | |||
| 926 | static int sco_seq_show(struct seq_file *seq, void *e) | ||
| 927 | { | ||
| 928 | struct sock *sk = e; | ||
| 929 | seq_printf(seq, "%s %s %d\n", | ||
| 930 | batostr(&bt_sk(sk)->src), batostr(&bt_sk(sk)->dst), sk->sk_state); | ||
| 931 | return 0; | ||
| 932 | } | ||
| 933 | 910 | ||
| 934 | static struct seq_operations sco_seq_ops = { | 911 | return (str - buf); | 
| 935 | .start = sco_seq_start, | ||
| 936 | .next = sco_seq_next, | ||
| 937 | .stop = sco_seq_stop, | ||
| 938 | .show = sco_seq_show | ||
| 939 | }; | ||
| 940 | |||
| 941 | static int sco_seq_open(struct inode *inode, struct file *file) | ||
| 942 | { | ||
| 943 | return seq_open(file, &sco_seq_ops); | ||
| 944 | } | 912 | } | 
| 945 | 913 | ||
| 946 | static struct file_operations sco_seq_fops = { | 914 | static CLASS_ATTR(sco, S_IRUGO, sco_sysfs_show, NULL); | 
| 947 | .owner = THIS_MODULE, | ||
| 948 | .open = sco_seq_open, | ||
| 949 | .read = seq_read, | ||
| 950 | .llseek = seq_lseek, | ||
| 951 | .release = seq_release, | ||
| 952 | }; | ||
| 953 | |||
| 954 | static int __init sco_proc_init(void) | ||
| 955 | { | ||
| 956 | struct proc_dir_entry *p = create_proc_entry("sco", S_IRUGO, proc_bt); | ||
| 957 | if (!p) | ||
| 958 | return -ENOMEM; | ||
| 959 | p->owner = THIS_MODULE; | ||
| 960 | p->proc_fops = &sco_seq_fops; | ||
| 961 | return 0; | ||
| 962 | } | ||
| 963 | |||
| 964 | static void __exit sco_proc_cleanup(void) | ||
| 965 | { | ||
| 966 | remove_proc_entry("sco", proc_bt); | ||
| 967 | } | ||
| 968 | |||
| 969 | #else /* CONFIG_PROC_FS */ | ||
| 970 | |||
| 971 | static int __init sco_proc_init(void) | ||
| 972 | { | ||
| 973 | return 0; | ||
| 974 | } | ||
| 975 | |||
| 976 | static void __exit sco_proc_cleanup(void) | ||
| 977 | { | ||
| 978 | return; | ||
| 979 | } | ||
| 980 | #endif /* CONFIG_PROC_FS */ | ||
| 981 | 915 | ||
| 982 | static struct proto_ops sco_sock_ops = { | 916 | static struct proto_ops sco_sock_ops = { | 
| 983 | .family = PF_BLUETOOTH, | 917 | .family = PF_BLUETOOTH, | 
| @@ -1035,7 +969,7 @@ static int __init sco_init(void) | |||
| 1035 | goto error; | 969 | goto error; | 
| 1036 | } | 970 | } | 
| 1037 | 971 | ||
| 1038 | sco_proc_init(); | 972 | class_create_file(&bt_class, &class_attr_sco); | 
| 1039 | 973 | ||
| 1040 | BT_INFO("SCO (Voice Link) ver %s", VERSION); | 974 | BT_INFO("SCO (Voice Link) ver %s", VERSION); | 
| 1041 | BT_INFO("SCO socket layer initialized"); | 975 | BT_INFO("SCO socket layer initialized"); | 
| @@ -1049,7 +983,7 @@ error: | |||
| 1049 | 983 | ||
| 1050 | static void __exit sco_exit(void) | 984 | static void __exit sco_exit(void) | 
| 1051 | { | 985 | { | 
| 1052 | sco_proc_cleanup(); | 986 | class_remove_file(&bt_class, &class_attr_sco); | 
| 1053 | 987 | ||
| 1054 | if (bt_sock_unregister(BTPROTO_SCO) < 0) | 988 | if (bt_sock_unregister(BTPROTO_SCO) < 0) | 
| 1055 | BT_ERR("SCO socket unregistration failed"); | 989 | BT_ERR("SCO socket unregistration failed"); | 
diff --git a/net/core/dev_mcast.c b/net/core/dev_mcast.c index db098ff3cd6a..cb530eef0e39 100644 --- a/net/core/dev_mcast.c +++ b/net/core/dev_mcast.c  | |||
| @@ -194,8 +194,7 @@ int dev_mc_add(struct net_device *dev, void *addr, int alen, int glbl) | |||
| 194 | 194 | ||
| 195 | done: | 195 | done: | 
| 196 | spin_unlock_bh(&dev->xmit_lock); | 196 | spin_unlock_bh(&dev->xmit_lock); | 
| 197 | if (dmi1) | 197 | kfree(dmi1); | 
| 198 | kfree(dmi1); | ||
| 199 | return err; | 198 | return err; | 
| 200 | } | 199 | } | 
| 201 | 200 | ||
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index 9bed7569ce3f..8700379685e0 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c  | |||
| @@ -49,6 +49,7 @@ | |||
| 49 | #include <net/udp.h> | 49 | #include <net/udp.h> | 
| 50 | #include <net/sock.h> | 50 | #include <net/sock.h> | 
| 51 | #include <net/pkt_sched.h> | 51 | #include <net/pkt_sched.h> | 
| 52 | #include <net/netlink.h> | ||
| 52 | 53 | ||
| 53 | DECLARE_MUTEX(rtnl_sem); | 54 | DECLARE_MUTEX(rtnl_sem); | 
| 54 | 55 | ||
| @@ -462,11 +463,6 @@ void rtmsg_ifinfo(int type, struct net_device *dev, unsigned change) | |||
| 462 | netlink_broadcast(rtnl, skb, 0, RTNLGRP_LINK, GFP_KERNEL); | 463 | netlink_broadcast(rtnl, skb, 0, RTNLGRP_LINK, GFP_KERNEL); | 
| 463 | } | 464 | } | 
| 464 | 465 | ||
| 465 | static int rtnetlink_done(struct netlink_callback *cb) | ||
| 466 | { | ||
| 467 | return 0; | ||
| 468 | } | ||
| 469 | |||
| 470 | /* Protected by RTNL sempahore. */ | 466 | /* Protected by RTNL sempahore. */ | 
| 471 | static struct rtattr **rta_buf; | 467 | static struct rtattr **rta_buf; | 
| 472 | static int rtattr_max; | 468 | static int rtattr_max; | 
| @@ -524,8 +520,6 @@ rtnetlink_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh, int *errp) | |||
| 524 | } | 520 | } | 
| 525 | 521 | ||
| 526 | if (kind == 2 && nlh->nlmsg_flags&NLM_F_DUMP) { | 522 | if (kind == 2 && nlh->nlmsg_flags&NLM_F_DUMP) { | 
| 527 | u32 rlen; | ||
| 528 | |||
| 529 | if (link->dumpit == NULL) | 523 | if (link->dumpit == NULL) | 
| 530 | link = &(rtnetlink_links[PF_UNSPEC][type]); | 524 | link = &(rtnetlink_links[PF_UNSPEC][type]); | 
| 531 | 525 | ||
| @@ -533,14 +527,11 @@ rtnetlink_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh, int *errp) | |||
| 533 | goto err_inval; | 527 | goto err_inval; | 
| 534 | 528 | ||
| 535 | if ((*errp = netlink_dump_start(rtnl, skb, nlh, | 529 | if ((*errp = netlink_dump_start(rtnl, skb, nlh, | 
| 536 | link->dumpit, | 530 | link->dumpit, NULL)) != 0) { | 
| 537 | rtnetlink_done)) != 0) { | ||
| 538 | return -1; | 531 | return -1; | 
| 539 | } | 532 | } | 
| 540 | rlen = NLMSG_ALIGN(nlh->nlmsg_len); | 533 | |
| 541 | if (rlen > skb->len) | 534 | netlink_queue_skip(nlh, skb); | 
| 542 | rlen = skb->len; | ||
| 543 | skb_pull(skb, rlen); | ||
| 544 | return -1; | 535 | return -1; | 
| 545 | } | 536 | } | 
| 546 | 537 | ||
| @@ -579,75 +570,13 @@ err_inval: | |||
| 579 | return -1; | 570 | return -1; | 
| 580 | } | 571 | } | 
| 581 | 572 | ||
| 582 | /* | ||
| 583 | * Process one packet of messages. | ||
| 584 | * Malformed skbs with wrong lengths of messages are discarded silently. | ||
| 585 | */ | ||
| 586 | |||
| 587 | static inline int rtnetlink_rcv_skb(struct sk_buff *skb) | ||
| 588 | { | ||
| 589 | int err; | ||
| 590 | struct nlmsghdr * nlh; | ||
| 591 | |||
| 592 | while (skb->len >= NLMSG_SPACE(0)) { | ||
| 593 | u32 rlen; | ||
| 594 | |||
| 595 | nlh = (struct nlmsghdr *)skb->data; | ||
| 596 | if (nlh->nlmsg_len < sizeof(*nlh) || skb->len < nlh->nlmsg_len) | ||
| 597 | return 0; | ||
| 598 | rlen = NLMSG_ALIGN(nlh->nlmsg_len); | ||
| 599 | if (rlen > skb->len) | ||
| 600 | rlen = skb->len; | ||
| 601 | if (rtnetlink_rcv_msg(skb, nlh, &err)) { | ||
| 602 | /* Not error, but we must interrupt processing here: | ||
| 603 | * Note, that in this case we do not pull message | ||
| 604 | * from skb, it will be processed later. | ||
| 605 | */ | ||
| 606 | if (err == 0) | ||
| 607 | return -1; | ||
| 608 | netlink_ack(skb, nlh, err); | ||
| 609 | } else if (nlh->nlmsg_flags&NLM_F_ACK) | ||
| 610 | netlink_ack(skb, nlh, 0); | ||
| 611 | skb_pull(skb, rlen); | ||
| 612 | } | ||
| 613 | |||
| 614 | return 0; | ||
| 615 | } | ||
| 616 | |||
| 617 | /* | ||
| 618 | * rtnetlink input queue processing routine: | ||
| 619 | * - process as much as there was in the queue upon entry. | ||
| 620 | * - feed skbs to rtnetlink_rcv_skb, until it refuse a message, | ||
| 621 | * that will occur, when a dump started. | ||
| 622 | */ | ||
| 623 | |||
| 624 | static void rtnetlink_rcv(struct sock *sk, int len) | 573 | static void rtnetlink_rcv(struct sock *sk, int len) | 
| 625 | { | 574 | { | 
| 626 | unsigned int qlen = skb_queue_len(&sk->sk_receive_queue); | 575 | unsigned int qlen = 0; | 
| 627 | 576 | ||
| 628 | do { | 577 | do { | 
| 629 | struct sk_buff *skb; | ||
| 630 | |||
| 631 | rtnl_lock(); | 578 | rtnl_lock(); | 
| 632 | 579 | netlink_run_queue(sk, &qlen, &rtnetlink_rcv_msg); | |
| 633 | if (qlen > skb_queue_len(&sk->sk_receive_queue)) | ||
| 634 | qlen = skb_queue_len(&sk->sk_receive_queue); | ||
| 635 | |||
| 636 | for (; qlen; qlen--) { | ||
| 637 | skb = skb_dequeue(&sk->sk_receive_queue); | ||
| 638 | if (rtnetlink_rcv_skb(skb)) { | ||
| 639 | if (skb->len) | ||
| 640 | skb_queue_head(&sk->sk_receive_queue, | ||
| 641 | skb); | ||
| 642 | else { | ||
| 643 | kfree_skb(skb); | ||
| 644 | qlen--; | ||
| 645 | } | ||
| 646 | break; | ||
| 647 | } | ||
| 648 | kfree_skb(skb); | ||
| 649 | } | ||
| 650 | |||
| 651 | up(&rtnl_sem); | 580 | up(&rtnl_sem); | 
| 652 | 581 | ||
| 653 | netdev_run_todo(); | 582 | netdev_run_todo(); | 
diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 95501e40100e..b7d13a4fff48 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c  | |||
| @@ -336,6 +336,9 @@ void __kfree_skb(struct sk_buff *skb) | |||
| 336 | } | 336 | } | 
| 337 | #ifdef CONFIG_NETFILTER | 337 | #ifdef CONFIG_NETFILTER | 
| 338 | nf_conntrack_put(skb->nfct); | 338 | nf_conntrack_put(skb->nfct); | 
| 339 | #if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE) | ||
| 340 | nf_conntrack_put_reasm(skb->nfct_reasm); | ||
| 341 | #endif | ||
| 339 | #ifdef CONFIG_BRIDGE_NETFILTER | 342 | #ifdef CONFIG_BRIDGE_NETFILTER | 
| 340 | nf_bridge_put(skb->nf_bridge); | 343 | nf_bridge_put(skb->nf_bridge); | 
| 341 | #endif | 344 | #endif | 
| @@ -414,9 +417,17 @@ struct sk_buff *skb_clone(struct sk_buff *skb, gfp_t gfp_mask) | |||
| 414 | C(nfct); | 417 | C(nfct); | 
| 415 | nf_conntrack_get(skb->nfct); | 418 | nf_conntrack_get(skb->nfct); | 
| 416 | C(nfctinfo); | 419 | C(nfctinfo); | 
| 420 | #if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE) | ||
| 421 | C(nfct_reasm); | ||
| 422 | nf_conntrack_get_reasm(skb->nfct_reasm); | ||
| 423 | #endif | ||
| 417 | #if defined(CONFIG_IP_VS) || defined(CONFIG_IP_VS_MODULE) | 424 | #if defined(CONFIG_IP_VS) || defined(CONFIG_IP_VS_MODULE) | 
| 418 | C(ipvs_property); | 425 | C(ipvs_property); | 
| 419 | #endif | 426 | #endif | 
| 427 | #if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE) | ||
| 428 | C(nfct_reasm); | ||
| 429 | nf_conntrack_get_reasm(skb->nfct_reasm); | ||
| 430 | #endif | ||
| 420 | #ifdef CONFIG_BRIDGE_NETFILTER | 431 | #ifdef CONFIG_BRIDGE_NETFILTER | 
| 421 | C(nf_bridge); | 432 | C(nf_bridge); | 
| 422 | nf_bridge_get(skb->nf_bridge); | 433 | nf_bridge_get(skb->nf_bridge); | 
| @@ -474,6 +485,10 @@ static void copy_skb_header(struct sk_buff *new, const struct sk_buff *old) | |||
| 474 | new->nfct = old->nfct; | 485 | new->nfct = old->nfct; | 
| 475 | nf_conntrack_get(old->nfct); | 486 | nf_conntrack_get(old->nfct); | 
| 476 | new->nfctinfo = old->nfctinfo; | 487 | new->nfctinfo = old->nfctinfo; | 
| 488 | #if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE) | ||
| 489 | new->nfct_reasm = old->nfct_reasm; | ||
| 490 | nf_conntrack_get_reasm(old->nfct_reasm); | ||
| 491 | #endif | ||
| 477 | #if defined(CONFIG_IP_VS) || defined(CONFIG_IP_VS_MODULE) | 492 | #if defined(CONFIG_IP_VS) || defined(CONFIG_IP_VS_MODULE) | 
| 478 | new->ipvs_property = old->ipvs_property; | 493 | new->ipvs_property = old->ipvs_property; | 
| 479 | #endif | 494 | #endif | 
diff --git a/net/core/sock.c b/net/core/sock.c index 9602ceb3bac9..13cc3be4f056 100644 --- a/net/core/sock.c +++ b/net/core/sock.c  | |||
| @@ -1242,8 +1242,7 @@ static void sock_def_write_space(struct sock *sk) | |||
| 1242 | 1242 | ||
| 1243 | static void sock_def_destruct(struct sock *sk) | 1243 | static void sock_def_destruct(struct sock *sk) | 
| 1244 | { | 1244 | { | 
| 1245 | if (sk->sk_protinfo) | 1245 | kfree(sk->sk_protinfo); | 
| 1246 | kfree(sk->sk_protinfo); | ||
| 1247 | } | 1246 | } | 
| 1248 | 1247 | ||
| 1249 | void sk_send_sigurg(struct sock *sk) | 1248 | void sk_send_sigurg(struct sock *sk) | 
diff --git a/net/core/stream.c b/net/core/stream.c index ac9edfdf8742..15bfd03e8024 100644 --- a/net/core/stream.c +++ b/net/core/stream.c  | |||
| @@ -52,8 +52,9 @@ int sk_stream_wait_connect(struct sock *sk, long *timeo_p) | |||
| 52 | { | 52 | { | 
| 53 | struct task_struct *tsk = current; | 53 | struct task_struct *tsk = current; | 
| 54 | DEFINE_WAIT(wait); | 54 | DEFINE_WAIT(wait); | 
| 55 | int done; | ||
| 55 | 56 | ||
| 56 | while (1) { | 57 | do { | 
| 57 | if (sk->sk_err) | 58 | if (sk->sk_err) | 
| 58 | return sock_error(sk); | 59 | return sock_error(sk); | 
| 59 | if ((1 << sk->sk_state) & ~(TCPF_SYN_SENT | TCPF_SYN_RECV)) | 60 | if ((1 << sk->sk_state) & ~(TCPF_SYN_SENT | TCPF_SYN_RECV)) | 
| @@ -65,13 +66,12 @@ int sk_stream_wait_connect(struct sock *sk, long *timeo_p) | |||
| 65 | 66 | ||
| 66 | prepare_to_wait(sk->sk_sleep, &wait, TASK_INTERRUPTIBLE); | 67 | prepare_to_wait(sk->sk_sleep, &wait, TASK_INTERRUPTIBLE); | 
| 67 | sk->sk_write_pending++; | 68 | sk->sk_write_pending++; | 
| 68 | if (sk_wait_event(sk, timeo_p, | 69 | done = sk_wait_event(sk, timeo_p, | 
| 69 | !((1 << sk->sk_state) & | 70 | !((1 << sk->sk_state) & | 
| 70 | ~(TCPF_ESTABLISHED | TCPF_CLOSE_WAIT)))) | 71 | ~(TCPF_ESTABLISHED | TCPF_CLOSE_WAIT))); | 
| 71 | break; | ||
| 72 | finish_wait(sk->sk_sleep, &wait); | 72 | finish_wait(sk->sk_sleep, &wait); | 
| 73 | sk->sk_write_pending--; | 73 | sk->sk_write_pending--; | 
| 74 | } | 74 | } while (!done); | 
| 75 | return 0; | 75 | return 0; | 
| 76 | } | 76 | } | 
| 77 | 77 | ||
diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c index 6298cf58ff9e..ca03521112c5 100644 --- a/net/dccp/ipv4.c +++ b/net/dccp/ipv4.c  | |||
| @@ -31,8 +31,6 @@ struct inet_hashinfo __cacheline_aligned dccp_hashinfo = { | |||
| 31 | .lhash_lock = RW_LOCK_UNLOCKED, | 31 | .lhash_lock = RW_LOCK_UNLOCKED, | 
| 32 | .lhash_users = ATOMIC_INIT(0), | 32 | .lhash_users = ATOMIC_INIT(0), | 
| 33 | .lhash_wait = __WAIT_QUEUE_HEAD_INITIALIZER(dccp_hashinfo.lhash_wait), | 33 | .lhash_wait = __WAIT_QUEUE_HEAD_INITIALIZER(dccp_hashinfo.lhash_wait), | 
| 34 | .portalloc_lock = SPIN_LOCK_UNLOCKED, | ||
| 35 | .port_rover = 1024 - 1, | ||
| 36 | }; | 34 | }; | 
| 37 | 35 | ||
| 38 | EXPORT_SYMBOL_GPL(dccp_hashinfo); | 36 | EXPORT_SYMBOL_GPL(dccp_hashinfo); | 
| @@ -125,36 +123,15 @@ static int dccp_v4_hash_connect(struct sock *sk) | |||
| 125 | int ret; | 123 | int ret; | 
| 126 | 124 | ||
| 127 | if (snum == 0) { | 125 | if (snum == 0) { | 
| 128 | int rover; | ||
| 129 | int low = sysctl_local_port_range[0]; | 126 | int low = sysctl_local_port_range[0]; | 
| 130 | int high = sysctl_local_port_range[1]; | 127 | int high = sysctl_local_port_range[1]; | 
| 131 | int remaining = (high - low) + 1; | 128 | int remaining = (high - low) + 1; | 
| 129 | int rover = net_random() % (high - low) + low; | ||
| 132 | struct hlist_node *node; | 130 | struct hlist_node *node; | 
| 133 | struct inet_timewait_sock *tw = NULL; | 131 | struct inet_timewait_sock *tw = NULL; | 
| 134 | 132 | ||
| 135 | local_bh_disable(); | 133 | local_bh_disable(); | 
| 136 | |||
| 137 | /* TODO. Actually it is not so bad idea to remove | ||
| 138 | * dccp_hashinfo.portalloc_lock before next submission to | ||
| 139 | * Linus. | ||
| 140 | * As soon as we touch this place at all it is time to think. | ||
| 141 | * | ||
| 142 | * Now it protects single _advisory_ variable | ||
| 143 | * dccp_hashinfo.port_rover, hence it is mostly useless. | ||
| 144 | * Code will work nicely if we just delete it, but | ||
| 145 | * I am afraid in contented case it will work not better or | ||
| 146 | * even worse: another cpu just will hit the same bucket | ||
| 147 | * and spin there. | ||
| 148 | * So some cpu salt could remove both contention and | ||
| 149 | * memory pingpong. Any ideas how to do this in a nice way? | ||
| 150 | */ | ||
| 151 | spin_lock(&dccp_hashinfo.portalloc_lock); | ||
| 152 | rover = dccp_hashinfo.port_rover; | ||
| 153 | |||
| 154 | do { | 134 | do { | 
| 155 | rover++; | ||
| 156 | if ((rover < low) || (rover > high)) | ||
| 157 | rover = low; | ||
| 158 | head = &dccp_hashinfo.bhash[inet_bhashfn(rover, | 135 | head = &dccp_hashinfo.bhash[inet_bhashfn(rover, | 
| 159 | dccp_hashinfo.bhash_size)]; | 136 | dccp_hashinfo.bhash_size)]; | 
| 160 | spin_lock(&head->lock); | 137 | spin_lock(&head->lock); | 
| @@ -187,9 +164,9 @@ static int dccp_v4_hash_connect(struct sock *sk) | |||
| 187 | 164 | ||
| 188 | next_port: | 165 | next_port: | 
| 189 | spin_unlock(&head->lock); | 166 | spin_unlock(&head->lock); | 
| 167 | if (++rover > high) | ||
| 168 | rover = low; | ||
| 190 | } while (--remaining > 0); | 169 | } while (--remaining > 0); | 
| 191 | dccp_hashinfo.port_rover = rover; | ||
| 192 | spin_unlock(&dccp_hashinfo.portalloc_lock); | ||
| 193 | 170 | ||
| 194 | local_bh_enable(); | 171 | local_bh_enable(); | 
| 195 | 172 | ||
| @@ -197,9 +174,6 @@ static int dccp_v4_hash_connect(struct sock *sk) | |||
| 197 | 174 | ||
| 198 | ok: | 175 | ok: | 
| 199 | /* All locks still held and bhs disabled */ | 176 | /* All locks still held and bhs disabled */ | 
| 200 | dccp_hashinfo.port_rover = rover; | ||
| 201 | spin_unlock(&dccp_hashinfo.portalloc_lock); | ||
| 202 | |||
| 203 | inet_bind_hash(sk, tb, rover); | 177 | inet_bind_hash(sk, tb, rover); | 
| 204 | if (sk_unhashed(sk)) { | 178 | if (sk_unhashed(sk)) { | 
| 205 | inet_sk(sk)->sport = htons(rover); | 179 | inet_sk(sk)->sport = htons(rover); | 
| @@ -1289,10 +1263,8 @@ static int dccp_v4_destroy_sock(struct sock *sk) | |||
| 1289 | if (inet_csk(sk)->icsk_bind_hash != NULL) | 1263 | if (inet_csk(sk)->icsk_bind_hash != NULL) | 
| 1290 | inet_put_port(&dccp_hashinfo, sk); | 1264 | inet_put_port(&dccp_hashinfo, sk); | 
| 1291 | 1265 | ||
| 1292 | if (dp->dccps_service_list != NULL) { | 1266 | kfree(dp->dccps_service_list); | 
| 1293 | kfree(dp->dccps_service_list); | 1267 | dp->dccps_service_list = NULL; | 
| 1294 | dp->dccps_service_list = NULL; | ||
| 1295 | } | ||
| 1296 | 1268 | ||
| 1297 | ccid_hc_rx_exit(dp->dccps_hc_rx_ccid, sk); | 1269 | ccid_hc_rx_exit(dp->dccps_hc_rx_ccid, sk); | 
| 1298 | ccid_hc_tx_exit(dp->dccps_hc_tx_ccid, sk); | 1270 | ccid_hc_tx_exit(dp->dccps_hc_tx_ccid, sk); | 
diff --git a/net/dccp/proto.c b/net/dccp/proto.c index a021c3422f67..e0ace7cbb996 100644 --- a/net/dccp/proto.c +++ b/net/dccp/proto.c  | |||
| @@ -238,8 +238,7 @@ static int dccp_setsockopt_service(struct sock *sk, const u32 service, | |||
| 238 | lock_sock(sk); | 238 | lock_sock(sk); | 
| 239 | dp->dccps_service = service; | 239 | dp->dccps_service = service; | 
| 240 | 240 | ||
| 241 | if (dp->dccps_service_list != NULL) | 241 | kfree(dp->dccps_service_list); | 
| 242 | kfree(dp->dccps_service_list); | ||
| 243 | 242 | ||
| 244 | dp->dccps_service_list = sl; | 243 | dp->dccps_service_list = sl; | 
| 245 | release_sock(sk); | 244 | release_sock(sk); | 
diff --git a/net/decnet/dn_table.c b/net/decnet/dn_table.c index eeba56f99323..6f8b5658cb4e 100644 --- a/net/decnet/dn_table.c +++ b/net/decnet/dn_table.c  | |||
| @@ -784,16 +784,14 @@ struct dn_fib_table *dn_fib_get_table(int n, int create) | |||
| 784 | 784 | ||
| 785 | static void dn_fib_del_tree(int n) | 785 | static void dn_fib_del_tree(int n) | 
| 786 | { | 786 | { | 
| 787 | struct dn_fib_table *t; | 787 | struct dn_fib_table *t; | 
| 788 | 788 | ||
| 789 | write_lock(&dn_fib_tables_lock); | 789 | write_lock(&dn_fib_tables_lock); | 
| 790 | t = dn_fib_tables[n]; | 790 | t = dn_fib_tables[n]; | 
| 791 | dn_fib_tables[n] = NULL; | 791 | dn_fib_tables[n] = NULL; | 
| 792 | write_unlock(&dn_fib_tables_lock); | 792 | write_unlock(&dn_fib_tables_lock); | 
| 793 | 793 | ||
| 794 | if (t) { | 794 | kfree(t); | 
| 795 | kfree(t); | ||
| 796 | } | ||
| 797 | } | 795 | } | 
| 798 | 796 | ||
| 799 | struct dn_fib_table *dn_fib_empty_table(void) | 797 | struct dn_fib_table *dn_fib_empty_table(void) | 
diff --git a/net/ethernet/pe2.c b/net/ethernet/pe2.c index 98a494be6039..9d57b4fb6440 100644 --- a/net/ethernet/pe2.c +++ b/net/ethernet/pe2.c  | |||
| @@ -32,8 +32,7 @@ struct datalink_proto *make_EII_client(void) | |||
| 32 | 32 | ||
| 33 | void destroy_EII_client(struct datalink_proto *dl) | 33 | void destroy_EII_client(struct datalink_proto *dl) | 
| 34 | { | 34 | { | 
| 35 | if (dl) | 35 | kfree(dl); | 
| 36 | kfree(dl); | ||
| 37 | } | 36 | } | 
| 38 | 37 | ||
| 39 | EXPORT_SYMBOL(destroy_EII_client); | 38 | EXPORT_SYMBOL(destroy_EII_client); | 
diff --git a/net/ieee80211/ieee80211_crypt.c b/net/ieee80211/ieee80211_crypt.c index f3b6aa3be638..ecc9bb196abc 100644 --- a/net/ieee80211/ieee80211_crypt.c +++ b/net/ieee80211/ieee80211_crypt.c  | |||
| @@ -11,16 +11,14 @@ | |||
| 11 | * | 11 | * | 
| 12 | */ | 12 | */ | 
| 13 | 13 | ||
| 14 | #include <linux/config.h> | 14 | #include <linux/errno.h> | 
| 15 | #include <linux/version.h> | ||
| 16 | #include <linux/module.h> | 15 | #include <linux/module.h> | 
| 17 | #include <linux/init.h> | 16 | #include <linux/init.h> | 
| 18 | #include <linux/slab.h> | 17 | #include <linux/slab.h> | 
| 19 | #include <asm/string.h> | 18 | #include <linux/string.h> | 
| 20 | #include <asm/errno.h> | ||
| 21 | |||
| 22 | #include <net/ieee80211.h> | 19 | #include <net/ieee80211.h> | 
| 23 | 20 | ||
| 21 | |||
| 24 | MODULE_AUTHOR("Jouni Malinen"); | 22 | MODULE_AUTHOR("Jouni Malinen"); | 
| 25 | MODULE_DESCRIPTION("HostAP crypto"); | 23 | MODULE_DESCRIPTION("HostAP crypto"); | 
| 26 | MODULE_LICENSE("GPL"); | 24 | MODULE_LICENSE("GPL"); | 
| @@ -30,32 +28,20 @@ struct ieee80211_crypto_alg { | |||
| 30 | struct ieee80211_crypto_ops *ops; | 28 | struct ieee80211_crypto_ops *ops; | 
| 31 | }; | 29 | }; | 
| 32 | 30 | ||
| 33 | struct ieee80211_crypto { | 31 | static LIST_HEAD(ieee80211_crypto_algs); | 
| 34 | struct list_head algs; | 32 | static DEFINE_SPINLOCK(ieee80211_crypto_lock); | 
| 35 | spinlock_t lock; | ||
| 36 | }; | ||
| 37 | |||
| 38 | static struct ieee80211_crypto *hcrypt; | ||
| 39 | 33 | ||
| 40 | void ieee80211_crypt_deinit_entries(struct ieee80211_device *ieee, int force) | 34 | void ieee80211_crypt_deinit_entries(struct ieee80211_device *ieee, int force) | 
| 41 | { | 35 | { | 
| 42 | struct list_head *ptr, *n; | 36 | struct ieee80211_crypt_data *entry, *next; | 
| 43 | struct ieee80211_crypt_data *entry; | ||
| 44 | unsigned long flags; | 37 | unsigned long flags; | 
| 45 | 38 | ||
| 46 | spin_lock_irqsave(&ieee->lock, flags); | 39 | spin_lock_irqsave(&ieee->lock, flags); | 
| 47 | 40 | list_for_each_entry_safe(entry, next, &ieee->crypt_deinit_list, list) { | |
| 48 | if (list_empty(&ieee->crypt_deinit_list)) | ||
| 49 | goto unlock; | ||
| 50 | |||
| 51 | for (ptr = ieee->crypt_deinit_list.next, n = ptr->next; | ||
| 52 | ptr != &ieee->crypt_deinit_list; ptr = n, n = ptr->next) { | ||
| 53 | entry = list_entry(ptr, struct ieee80211_crypt_data, list); | ||
| 54 | |||
| 55 | if (atomic_read(&entry->refcnt) != 0 && !force) | 41 | if (atomic_read(&entry->refcnt) != 0 && !force) | 
| 56 | continue; | 42 | continue; | 
| 57 | 43 | ||
| 58 | list_del(ptr); | 44 | list_del(&entry->list); | 
| 59 | 45 | ||
| 60 | if (entry->ops) { | 46 | if (entry->ops) { | 
| 61 | entry->ops->deinit(entry->priv); | 47 | entry->ops->deinit(entry->priv); | 
| @@ -63,7 +49,6 @@ void ieee80211_crypt_deinit_entries(struct ieee80211_device *ieee, int force) | |||
| 63 | } | 49 | } | 
| 64 | kfree(entry); | 50 | kfree(entry); | 
| 65 | } | 51 | } | 
| 66 | unlock: | ||
| 67 | spin_unlock_irqrestore(&ieee->lock, flags); | 52 | spin_unlock_irqrestore(&ieee->lock, flags); | 
| 68 | } | 53 | } | 
| 69 | 54 | ||
| @@ -126,9 +111,6 @@ int ieee80211_register_crypto_ops(struct ieee80211_crypto_ops *ops) | |||
| 126 | unsigned long flags; | 111 | unsigned long flags; | 
| 127 | struct ieee80211_crypto_alg *alg; | 112 | struct ieee80211_crypto_alg *alg; | 
| 128 | 113 | ||
| 129 | if (hcrypt == NULL) | ||
| 130 | return -1; | ||
| 131 | |||
| 132 | alg = kmalloc(sizeof(*alg), GFP_KERNEL); | 114 | alg = kmalloc(sizeof(*alg), GFP_KERNEL); | 
| 133 | if (alg == NULL) | 115 | if (alg == NULL) | 
| 134 | return -ENOMEM; | 116 | return -ENOMEM; | 
| @@ -136,9 +118,9 @@ int ieee80211_register_crypto_ops(struct ieee80211_crypto_ops *ops) | |||
| 136 | memset(alg, 0, sizeof(*alg)); | 118 | memset(alg, 0, sizeof(*alg)); | 
| 137 | alg->ops = ops; | 119 | alg->ops = ops; | 
| 138 | 120 | ||
| 139 | spin_lock_irqsave(&hcrypt->lock, flags); | 121 | spin_lock_irqsave(&ieee80211_crypto_lock, flags); | 
| 140 | list_add(&alg->list, &hcrypt->algs); | 122 | list_add(&alg->list, &ieee80211_crypto_algs); | 
| 141 | spin_unlock_irqrestore(&hcrypt->lock, flags); | 123 | spin_unlock_irqrestore(&ieee80211_crypto_lock, flags); | 
| 142 | 124 | ||
| 143 | printk(KERN_DEBUG "ieee80211_crypt: registered algorithm '%s'\n", | 125 | printk(KERN_DEBUG "ieee80211_crypt: registered algorithm '%s'\n", | 
| 144 | ops->name); | 126 | ops->name); | 
| @@ -148,64 +130,49 @@ int ieee80211_register_crypto_ops(struct ieee80211_crypto_ops *ops) | |||
| 148 | 130 | ||
| 149 | int ieee80211_unregister_crypto_ops(struct ieee80211_crypto_ops *ops) | 131 | int ieee80211_unregister_crypto_ops(struct ieee80211_crypto_ops *ops) | 
| 150 | { | 132 | { | 
| 133 | struct ieee80211_crypto_alg *alg; | ||
| 151 | unsigned long flags; | 134 | unsigned long flags; | 
| 152 | struct list_head *ptr; | ||
| 153 | struct ieee80211_crypto_alg *del_alg = NULL; | ||
| 154 | |||
| 155 | if (hcrypt == NULL) | ||
| 156 | return -1; | ||
| 157 | |||
| 158 | spin_lock_irqsave(&hcrypt->lock, flags); | ||
| 159 | for (ptr = hcrypt->algs.next; ptr != &hcrypt->algs; ptr = ptr->next) { | ||
| 160 | struct ieee80211_crypto_alg *alg = | ||
| 161 | (struct ieee80211_crypto_alg *)ptr; | ||
| 162 | if (alg->ops == ops) { | ||
| 163 | list_del(&alg->list); | ||
| 164 | del_alg = alg; | ||
| 165 | break; | ||
| 166 | } | ||
| 167 | } | ||
| 168 | spin_unlock_irqrestore(&hcrypt->lock, flags); | ||
| 169 | 135 | ||
| 170 | if (del_alg) { | 136 | spin_lock_irqsave(&ieee80211_crypto_lock, flags); | 
| 171 | printk(KERN_DEBUG "ieee80211_crypt: unregistered algorithm " | 137 | list_for_each_entry(alg, &ieee80211_crypto_algs, list) { | 
| 172 | "'%s'\n", ops->name); | 138 | if (alg->ops == ops) | 
| 173 | kfree(del_alg); | 139 | goto found; | 
| 174 | } | 140 | } | 
| 175 | 141 | spin_unlock_irqrestore(&ieee80211_crypto_lock, flags); | |
| 176 | return del_alg ? 0 : -1; | 142 | return -EINVAL; | 
| 143 | |||
| 144 | found: | ||
| 145 | printk(KERN_DEBUG "ieee80211_crypt: unregistered algorithm " | ||
| 146 | "'%s'\n", ops->name); | ||
| 147 | list_del(&alg->list); | ||
| 148 | spin_unlock_irqrestore(&ieee80211_crypto_lock, flags); | ||
| 149 | kfree(alg); | ||
| 150 | return 0; | ||
| 177 | } | 151 | } | 
| 178 | 152 | ||
| 179 | struct ieee80211_crypto_ops *ieee80211_get_crypto_ops(const char *name) | 153 | struct ieee80211_crypto_ops *ieee80211_get_crypto_ops(const char *name) | 
| 180 | { | 154 | { | 
| 155 | struct ieee80211_crypto_alg *alg; | ||
| 181 | unsigned long flags; | 156 | unsigned long flags; | 
| 182 | struct list_head *ptr; | 157 | |
| 183 | struct ieee80211_crypto_alg *found_alg = NULL; | 158 | spin_lock_irqsave(&ieee80211_crypto_lock, flags); | 
| 184 | 159 | list_for_each_entry(alg, &ieee80211_crypto_algs, list) { | |
| 185 | if (hcrypt == NULL) | 160 | if (strcmp(alg->ops->name, name) == 0) | 
| 186 | return NULL; | 161 | goto found; | 
| 187 | |||
| 188 | spin_lock_irqsave(&hcrypt->lock, flags); | ||
| 189 | for (ptr = hcrypt->algs.next; ptr != &hcrypt->algs; ptr = ptr->next) { | ||
| 190 | struct ieee80211_crypto_alg *alg = | ||
| 191 | (struct ieee80211_crypto_alg *)ptr; | ||
| 192 | if (strcmp(alg->ops->name, name) == 0) { | ||
| 193 | found_alg = alg; | ||
| 194 | break; | ||
| 195 | } | ||
| 196 | } | 162 | } | 
| 197 | spin_unlock_irqrestore(&hcrypt->lock, flags); | 163 | spin_unlock_irqrestore(&ieee80211_crypto_lock, flags); | 
| 164 | return NULL; | ||
| 198 | 165 | ||
| 199 | if (found_alg) | 166 | found: | 
| 200 | return found_alg->ops; | 167 | spin_unlock_irqrestore(&ieee80211_crypto_lock, flags); | 
| 201 | else | 168 | return alg->ops; | 
| 202 | return NULL; | ||
| 203 | } | 169 | } | 
| 204 | 170 | ||
| 205 | static void *ieee80211_crypt_null_init(int keyidx) | 171 | static void *ieee80211_crypt_null_init(int keyidx) | 
| 206 | { | 172 | { | 
| 207 | return (void *)1; | 173 | return (void *)1; | 
| 208 | } | 174 | } | 
| 175 | |||
| 209 | static void ieee80211_crypt_null_deinit(void *priv) | 176 | static void ieee80211_crypt_null_deinit(void *priv) | 
| 210 | { | 177 | { | 
| 211 | } | 178 | } | 
| @@ -214,56 +181,18 @@ static struct ieee80211_crypto_ops ieee80211_crypt_null = { | |||
| 214 | .name = "NULL", | 181 | .name = "NULL", | 
| 215 | .init = ieee80211_crypt_null_init, | 182 | .init = ieee80211_crypt_null_init, | 
| 216 | .deinit = ieee80211_crypt_null_deinit, | 183 | .deinit = ieee80211_crypt_null_deinit, | 
| 217 | .encrypt_mpdu = NULL, | ||
| 218 | .decrypt_mpdu = NULL, | ||
| 219 | .encrypt_msdu = NULL, | ||
| 220 | .decrypt_msdu = NULL, | ||
| 221 | .set_key = NULL, | ||
| 222 | .get_key = NULL, | ||
| 223 | .extra_mpdu_prefix_len = 0, | ||
| 224 | .extra_mpdu_postfix_len = 0, | ||
| 225 | .owner = THIS_MODULE, | 184 | .owner = THIS_MODULE, | 
| 226 | }; | 185 | }; | 
| 227 | 186 | ||
| 228 | static int __init ieee80211_crypto_init(void) | 187 | static int __init ieee80211_crypto_init(void) | 
| 229 | { | 188 | { | 
| 230 | int ret = -ENOMEM; | 189 | return ieee80211_register_crypto_ops(&ieee80211_crypt_null); | 
| 231 | |||
| 232 | hcrypt = kmalloc(sizeof(*hcrypt), GFP_KERNEL); | ||
| 233 | if (!hcrypt) | ||
| 234 | goto out; | ||
| 235 | |||
| 236 | memset(hcrypt, 0, sizeof(*hcrypt)); | ||
| 237 | INIT_LIST_HEAD(&hcrypt->algs); | ||
| 238 | spin_lock_init(&hcrypt->lock); | ||
| 239 | |||
| 240 | ret = ieee80211_register_crypto_ops(&ieee80211_crypt_null); | ||
| 241 | if (ret < 0) { | ||
| 242 | kfree(hcrypt); | ||
| 243 | hcrypt = NULL; | ||
| 244 | } | ||
| 245 | out: | ||
| 246 | return ret; | ||
| 247 | } | 190 | } | 
| 248 | 191 | ||
| 249 | static void __exit ieee80211_crypto_deinit(void) | 192 | static void __exit ieee80211_crypto_deinit(void) | 
| 250 | { | 193 | { | 
| 251 | struct list_head *ptr, *n; | 194 | ieee80211_unregister_crypto_ops(&ieee80211_crypt_null); | 
| 252 | 195 | BUG_ON(!list_empty(&ieee80211_crypto_algs)); | |
| 253 | if (hcrypt == NULL) | ||
| 254 | return; | ||
| 255 | |||
| 256 | for (ptr = hcrypt->algs.next, n = ptr->next; ptr != &hcrypt->algs; | ||
| 257 | ptr = n, n = ptr->next) { | ||
| 258 | struct ieee80211_crypto_alg *alg = | ||
| 259 | (struct ieee80211_crypto_alg *)ptr; | ||
| 260 | list_del(ptr); | ||
| 261 | printk(KERN_DEBUG "ieee80211_crypt: unregistered algorithm " | ||
| 262 | "'%s' (deinit)\n", alg->ops->name); | ||
| 263 | kfree(alg); | ||
| 264 | } | ||
| 265 | |||
| 266 | kfree(hcrypt); | ||
| 267 | } | 196 | } | 
| 268 | 197 | ||
| 269 | EXPORT_SYMBOL(ieee80211_crypt_deinit_entries); | 198 | EXPORT_SYMBOL(ieee80211_crypt_deinit_entries); | 
diff --git a/net/ieee80211/ieee80211_crypt_ccmp.c b/net/ieee80211/ieee80211_crypt_ccmp.c index 05a853c13012..470221728503 100644 --- a/net/ieee80211/ieee80211_crypt_ccmp.c +++ b/net/ieee80211/ieee80211_crypt_ccmp.c  | |||
| @@ -10,7 +10,6 @@ | |||
| 10 | */ | 10 | */ | 
| 11 | 11 | ||
| 12 | #include <linux/config.h> | 12 | #include <linux/config.h> | 
| 13 | #include <linux/version.h> | ||
| 14 | #include <linux/module.h> | 13 | #include <linux/module.h> | 
| 15 | #include <linux/init.h> | 14 | #include <linux/init.h> | 
| 16 | #include <linux/slab.h> | 15 | #include <linux/slab.h> | 
diff --git a/net/ieee80211/ieee80211_crypt_tkip.c b/net/ieee80211/ieee80211_crypt_tkip.c index 2e34f29b7956..e0988320efbf 100644 --- a/net/ieee80211/ieee80211_crypt_tkip.c +++ b/net/ieee80211/ieee80211_crypt_tkip.c  | |||
| @@ -10,7 +10,6 @@ | |||
| 10 | */ | 10 | */ | 
| 11 | 11 | ||
| 12 | #include <linux/config.h> | 12 | #include <linux/config.h> | 
| 13 | #include <linux/version.h> | ||
| 14 | #include <linux/module.h> | 13 | #include <linux/module.h> | 
| 15 | #include <linux/init.h> | 14 | #include <linux/init.h> | 
| 16 | #include <linux/slab.h> | 15 | #include <linux/slab.h> | 
diff --git a/net/ieee80211/ieee80211_crypt_wep.c b/net/ieee80211/ieee80211_crypt_wep.c index 7c08ed2f2628..073aebdf0f67 100644 --- a/net/ieee80211/ieee80211_crypt_wep.c +++ b/net/ieee80211/ieee80211_crypt_wep.c  | |||
| @@ -10,7 +10,6 @@ | |||
| 10 | */ | 10 | */ | 
| 11 | 11 | ||
| 12 | #include <linux/config.h> | 12 | #include <linux/config.h> | 
| 13 | #include <linux/version.h> | ||
| 14 | #include <linux/module.h> | 13 | #include <linux/module.h> | 
| 15 | #include <linux/init.h> | 14 | #include <linux/init.h> | 
| 16 | #include <linux/slab.h> | 15 | #include <linux/slab.h> | 
diff --git a/net/ieee80211/ieee80211_geo.c b/net/ieee80211/ieee80211_geo.c index c4b54ef8f6d5..610cc5cbc252 100644 --- a/net/ieee80211/ieee80211_geo.c +++ b/net/ieee80211/ieee80211_geo.c  | |||
| @@ -38,7 +38,6 @@ | |||
| 38 | #include <linux/slab.h> | 38 | #include <linux/slab.h> | 
| 39 | #include <linux/tcp.h> | 39 | #include <linux/tcp.h> | 
| 40 | #include <linux/types.h> | 40 | #include <linux/types.h> | 
| 41 | #include <linux/version.h> | ||
| 42 | #include <linux/wireless.h> | 41 | #include <linux/wireless.h> | 
| 43 | #include <linux/etherdevice.h> | 42 | #include <linux/etherdevice.h> | 
| 44 | #include <asm/uaccess.h> | 43 | #include <asm/uaccess.h> | 
diff --git a/net/ieee80211/ieee80211_module.c b/net/ieee80211/ieee80211_module.c index f66d792cd204..321287bc887f 100644 --- a/net/ieee80211/ieee80211_module.c +++ b/net/ieee80211/ieee80211_module.c  | |||
| @@ -45,7 +45,6 @@ | |||
| 45 | #include <linux/slab.h> | 45 | #include <linux/slab.h> | 
| 46 | #include <linux/tcp.h> | 46 | #include <linux/tcp.h> | 
| 47 | #include <linux/types.h> | 47 | #include <linux/types.h> | 
| 48 | #include <linux/version.h> | ||
| 49 | #include <linux/wireless.h> | 48 | #include <linux/wireless.h> | 
| 50 | #include <linux/etherdevice.h> | 49 | #include <linux/etherdevice.h> | 
| 51 | #include <asm/uaccess.h> | 50 | #include <asm/uaccess.h> | 
diff --git a/net/ieee80211/ieee80211_rx.c b/net/ieee80211/ieee80211_rx.c index ce694cf5c160..03efaacbdb73 100644 --- a/net/ieee80211/ieee80211_rx.c +++ b/net/ieee80211/ieee80211_rx.c  | |||
| @@ -28,7 +28,6 @@ | |||
| 28 | #include <linux/slab.h> | 28 | #include <linux/slab.h> | 
| 29 | #include <linux/tcp.h> | 29 | #include <linux/tcp.h> | 
| 30 | #include <linux/types.h> | 30 | #include <linux/types.h> | 
| 31 | #include <linux/version.h> | ||
| 32 | #include <linux/wireless.h> | 31 | #include <linux/wireless.h> | 
| 33 | #include <linux/etherdevice.h> | 32 | #include <linux/etherdevice.h> | 
| 34 | #include <asm/uaccess.h> | 33 | #include <asm/uaccess.h> | 
| @@ -370,6 +369,7 @@ int ieee80211_rx(struct ieee80211_device *ieee, struct sk_buff *skb, | |||
| 370 | /* Put this code here so that we avoid duplicating it in all | 369 | /* Put this code here so that we avoid duplicating it in all | 
| 371 | * Rx paths. - Jean II */ | 370 | * Rx paths. - Jean II */ | 
| 372 | #ifdef IW_WIRELESS_SPY /* defined in iw_handler.h */ | 371 | #ifdef IW_WIRELESS_SPY /* defined in iw_handler.h */ | 
| 372 | #ifdef CONFIG_NET_RADIO | ||
| 373 | /* If spy monitoring on */ | 373 | /* If spy monitoring on */ | 
| 374 | if (ieee->spy_data.spy_number > 0) { | 374 | if (ieee->spy_data.spy_number > 0) { | 
| 375 | struct iw_quality wstats; | 375 | struct iw_quality wstats; | 
| @@ -396,6 +396,7 @@ int ieee80211_rx(struct ieee80211_device *ieee, struct sk_buff *skb, | |||
| 396 | /* Update spy records */ | 396 | /* Update spy records */ | 
| 397 | wireless_spy_update(ieee->dev, hdr->addr2, &wstats); | 397 | wireless_spy_update(ieee->dev, hdr->addr2, &wstats); | 
| 398 | } | 398 | } | 
| 399 | #endif /* CONFIG_NET_RADIO */ | ||
| 399 | #endif /* IW_WIRELESS_SPY */ | 400 | #endif /* IW_WIRELESS_SPY */ | 
| 400 | 401 | ||
| 401 | #ifdef NOT_YET | 402 | #ifdef NOT_YET | 
diff --git a/net/ieee80211/ieee80211_tx.c b/net/ieee80211/ieee80211_tx.c index 95ccbadbf55b..445f206e65e0 100644 --- a/net/ieee80211/ieee80211_tx.c +++ b/net/ieee80211/ieee80211_tx.c  | |||
| @@ -38,7 +38,6 @@ | |||
| 38 | #include <linux/slab.h> | 38 | #include <linux/slab.h> | 
| 39 | #include <linux/tcp.h> | 39 | #include <linux/tcp.h> | 
| 40 | #include <linux/types.h> | 40 | #include <linux/types.h> | 
| 41 | #include <linux/version.h> | ||
| 42 | #include <linux/wireless.h> | 41 | #include <linux/wireless.h> | 
| 43 | #include <linux/etherdevice.h> | 42 | #include <linux/etherdevice.h> | 
| 44 | #include <asm/uaccess.h> | 43 | #include <asm/uaccess.h> | 
diff --git a/net/ieee80211/ieee80211_wx.c b/net/ieee80211/ieee80211_wx.c index 1ce7af9bec35..181755f2aa8b 100644 --- a/net/ieee80211/ieee80211_wx.c +++ b/net/ieee80211/ieee80211_wx.c  | |||
| @@ -161,9 +161,11 @@ static inline char *ipw2100_translate_scan(struct ieee80211_device *ieee, | |||
| 161 | (ieee->perfect_rssi - ieee->worst_rssi) - | 161 | (ieee->perfect_rssi - ieee->worst_rssi) - | 
| 162 | (ieee->perfect_rssi - network->stats.rssi) * | 162 | (ieee->perfect_rssi - network->stats.rssi) * | 
| 163 | (15 * (ieee->perfect_rssi - ieee->worst_rssi) + | 163 | (15 * (ieee->perfect_rssi - ieee->worst_rssi) + | 
| 164 | 62 * (ieee->perfect_rssi - network->stats.rssi))) / | 164 | 62 * (ieee->perfect_rssi - | 
| 165 | ((ieee->perfect_rssi - ieee->worst_rssi) * | 165 | network->stats.rssi))) / | 
| 166 | (ieee->perfect_rssi - ieee->worst_rssi)); | 166 | ((ieee->perfect_rssi - | 
| 167 | ieee->worst_rssi) * (ieee->perfect_rssi - | ||
| 168 | ieee->worst_rssi)); | ||
| 167 | if (iwe.u.qual.qual > 100) | 169 | if (iwe.u.qual.qual > 100) | 
| 168 | iwe.u.qual.qual = 100; | 170 | iwe.u.qual.qual = 100; | 
| 169 | else if (iwe.u.qual.qual < 1) | 171 | else if (iwe.u.qual.qual < 1) | 
| @@ -520,7 +522,8 @@ int ieee80211_wx_set_encodeext(struct ieee80211_device *ieee, | |||
| 520 | crypt = &ieee->crypt[idx]; | 522 | crypt = &ieee->crypt[idx]; | 
| 521 | group_key = 1; | 523 | group_key = 1; | 
| 522 | } else { | 524 | } else { | 
| 523 | if (idx != 0) | 525 | /* some Cisco APs use idx>0 for unicast in dynamic WEP */ | 
| 526 | if (idx != 0 && ext->alg != IW_ENCODE_ALG_WEP) | ||
| 524 | return -EINVAL; | 527 | return -EINVAL; | 
| 525 | if (ieee->iw_mode == IW_MODE_INFRA) | 528 | if (ieee->iw_mode == IW_MODE_INFRA) | 
| 526 | crypt = &ieee->crypt[idx]; | 529 | crypt = &ieee->crypt[idx]; | 
| @@ -688,7 +691,8 @@ int ieee80211_wx_get_encodeext(struct ieee80211_device *ieee, | |||
| 688 | } else | 691 | } else | 
| 689 | idx = ieee->tx_keyidx; | 692 | idx = ieee->tx_keyidx; | 
| 690 | 693 | ||
| 691 | if (!ext->ext_flags & IW_ENCODE_EXT_GROUP_KEY) | 694 | if (!ext->ext_flags & IW_ENCODE_EXT_GROUP_KEY && | 
| 695 | ext->alg != IW_ENCODE_ALG_WEP) | ||
| 692 | if (idx != 0 || ieee->iw_mode != IW_MODE_INFRA) | 696 | if (idx != 0 || ieee->iw_mode != IW_MODE_INFRA) | 
| 693 | return -EINVAL; | 697 | return -EINVAL; | 
| 694 | 698 | ||
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index a9d84f93442c..eaa150c33b04 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c  | |||
| @@ -147,8 +147,7 @@ void inet_sock_destruct(struct sock *sk) | |||
| 147 | BUG_TRAP(!sk->sk_wmem_queued); | 147 | BUG_TRAP(!sk->sk_wmem_queued); | 
| 148 | BUG_TRAP(!sk->sk_forward_alloc); | 148 | BUG_TRAP(!sk->sk_forward_alloc); | 
| 149 | 149 | ||
| 150 | if (inet->opt) | 150 | kfree(inet->opt); | 
| 151 | kfree(inet->opt); | ||
| 152 | dst_release(sk->sk_dst_cache); | 151 | dst_release(sk->sk_dst_cache); | 
| 153 | sk_refcnt_debug_dec(sk); | 152 | sk_refcnt_debug_dec(sk); | 
| 154 | } | 153 | } | 
diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c index 990633c09dfe..2267c1fad879 100644 --- a/net/ipv4/fib_frontend.c +++ b/net/ipv4/fib_frontend.c  | |||
| @@ -266,8 +266,7 @@ int ip_rt_ioctl(unsigned int cmd, void __user *arg) | |||
| 266 | if (tb) | 266 | if (tb) | 
| 267 | err = tb->tb_insert(tb, &req.rtm, &rta, &req.nlh, NULL); | 267 | err = tb->tb_insert(tb, &req.rtm, &rta, &req.nlh, NULL); | 
| 268 | } | 268 | } | 
| 269 | if (rta.rta_mx) | 269 | kfree(rta.rta_mx); | 
| 270 | kfree(rta.rta_mx); | ||
| 271 | } | 270 | } | 
| 272 | rtnl_unlock(); | 271 | rtnl_unlock(); | 
| 273 | return err; | 272 | return err; | 
diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c index 94468a76c5b4..3fe021f1a566 100644 --- a/net/ipv4/inet_connection_sock.c +++ b/net/ipv4/inet_connection_sock.c  | |||
| @@ -78,17 +78,9 @@ int inet_csk_get_port(struct inet_hashinfo *hashinfo, | |||
| 78 | int low = sysctl_local_port_range[0]; | 78 | int low = sysctl_local_port_range[0]; | 
| 79 | int high = sysctl_local_port_range[1]; | 79 | int high = sysctl_local_port_range[1]; | 
| 80 | int remaining = (high - low) + 1; | 80 | int remaining = (high - low) + 1; | 
| 81 | int rover; | 81 | int rover = net_random() % (high - low) + low; | 
| 82 | 82 | ||
| 83 | spin_lock(&hashinfo->portalloc_lock); | ||
| 84 | if (hashinfo->port_rover < low) | ||
| 85 | rover = low; | ||
| 86 | else | ||
| 87 | rover = hashinfo->port_rover; | ||
| 88 | do { | 83 | do { | 
| 89 | rover++; | ||
| 90 | if (rover > high) | ||
| 91 | rover = low; | ||
| 92 | head = &hashinfo->bhash[inet_bhashfn(rover, hashinfo->bhash_size)]; | 84 | head = &hashinfo->bhash[inet_bhashfn(rover, hashinfo->bhash_size)]; | 
| 93 | spin_lock(&head->lock); | 85 | spin_lock(&head->lock); | 
| 94 | inet_bind_bucket_for_each(tb, node, &head->chain) | 86 | inet_bind_bucket_for_each(tb, node, &head->chain) | 
| @@ -97,9 +89,9 @@ int inet_csk_get_port(struct inet_hashinfo *hashinfo, | |||
| 97 | break; | 89 | break; | 
| 98 | next: | 90 | next: | 
| 99 | spin_unlock(&head->lock); | 91 | spin_unlock(&head->lock); | 
| 92 | if (++rover > high) | ||
| 93 | rover = low; | ||
| 100 | } while (--remaining > 0); | 94 | } while (--remaining > 0); | 
| 101 | hashinfo->port_rover = rover; | ||
| 102 | spin_unlock(&hashinfo->portalloc_lock); | ||
| 103 | 95 | ||
| 104 | /* Exhausted local port range during search? It is not | 96 | /* Exhausted local port range during search? It is not | 
| 105 | * possible for us to be holding one of the bind hash | 97 | * possible for us to be holding one of the bind hash | 
diff --git a/net/ipv4/inet_diag.c b/net/ipv4/inet_diag.c index 71f3c7350c6e..39061ed53cfd 100644 --- a/net/ipv4/inet_diag.c +++ b/net/ipv4/inet_diag.c  | |||
| @@ -724,12 +724,6 @@ done: | |||
| 724 | return skb->len; | 724 | return skb->len; | 
| 725 | } | 725 | } | 
| 726 | 726 | ||
| 727 | static int inet_diag_dump_done(struct netlink_callback *cb) | ||
| 728 | { | ||
| 729 | return 0; | ||
| 730 | } | ||
| 731 | |||
| 732 | |||
| 733 | static __inline__ int | 727 | static __inline__ int | 
| 734 | inet_diag_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh) | 728 | inet_diag_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh) | 
| 735 | { | 729 | { | 
| @@ -760,8 +754,7 @@ inet_diag_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh) | |||
| 760 | goto err_inval; | 754 | goto err_inval; | 
| 761 | } | 755 | } | 
| 762 | return netlink_dump_start(idiagnl, skb, nlh, | 756 | return netlink_dump_start(idiagnl, skb, nlh, | 
| 763 | inet_diag_dump, | 757 | inet_diag_dump, NULL); | 
| 764 | inet_diag_dump_done); | ||
| 765 | } else { | 758 | } else { | 
| 766 | return inet_diag_get_exact(skb, nlh); | 759 | return inet_diag_get_exact(skb, nlh); | 
| 767 | } | 760 | } | 
diff --git a/net/ipv4/ip_options.c b/net/ipv4/ip_options.c index bce4e875193b..dbe12da8d8b3 100644 --- a/net/ipv4/ip_options.c +++ b/net/ipv4/ip_options.c  | |||
| @@ -510,8 +510,7 @@ static int ip_options_get_finish(struct ip_options **optp, | |||
| 510 | kfree(opt); | 510 | kfree(opt); | 
| 511 | return -EINVAL; | 511 | return -EINVAL; | 
| 512 | } | 512 | } | 
| 513 | if (*optp) | 513 | kfree(*optp); | 
| 514 | kfree(*optp); | ||
| 515 | *optp = opt; | 514 | *optp = opt; | 
| 516 | return 0; | 515 | return 0; | 
| 517 | } | 516 | } | 
diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index 17758234a3e3..11c2f68254f0 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c  | |||
| @@ -353,7 +353,8 @@ packet_routed: | |||
| 353 | ip_options_build(skb, opt, inet->daddr, rt, 0); | 353 | ip_options_build(skb, opt, inet->daddr, rt, 0); | 
| 354 | } | 354 | } | 
| 355 | 355 | ||
| 356 | ip_select_ident_more(iph, &rt->u.dst, sk, skb_shinfo(skb)->tso_segs); | 356 | ip_select_ident_more(iph, &rt->u.dst, sk, | 
| 357 | (skb_shinfo(skb)->tso_segs ?: 1) - 1); | ||
| 357 | 358 | ||
| 358 | /* Add an IP checksum. */ | 359 | /* Add an IP checksum. */ | 
| 359 | ip_send_check(iph); | 360 | ip_send_check(iph); | 
| @@ -1262,10 +1263,8 @@ int ip_push_pending_frames(struct sock *sk) | |||
| 1262 | 1263 | ||
| 1263 | out: | 1264 | out: | 
| 1264 | inet->cork.flags &= ~IPCORK_OPT; | 1265 | inet->cork.flags &= ~IPCORK_OPT; | 
| 1265 | if (inet->cork.opt) { | 1266 | kfree(inet->cork.opt); | 
| 1266 | kfree(inet->cork.opt); | 1267 | inet->cork.opt = NULL; | 
| 1267 | inet->cork.opt = NULL; | ||
| 1268 | } | ||
| 1269 | if (inet->cork.rt) { | 1268 | if (inet->cork.rt) { | 
| 1270 | ip_rt_put(inet->cork.rt); | 1269 | ip_rt_put(inet->cork.rt); | 
| 1271 | inet->cork.rt = NULL; | 1270 | inet->cork.rt = NULL; | 
| @@ -1289,10 +1288,8 @@ void ip_flush_pending_frames(struct sock *sk) | |||
| 1289 | kfree_skb(skb); | 1288 | kfree_skb(skb); | 
| 1290 | 1289 | ||
| 1291 | inet->cork.flags &= ~IPCORK_OPT; | 1290 | inet->cork.flags &= ~IPCORK_OPT; | 
| 1292 | if (inet->cork.opt) { | 1291 | kfree(inet->cork.opt); | 
| 1293 | kfree(inet->cork.opt); | 1292 | inet->cork.opt = NULL; | 
| 1294 | inet->cork.opt = NULL; | ||
| 1295 | } | ||
| 1296 | if (inet->cork.rt) { | 1293 | if (inet->cork.rt) { | 
| 1297 | ip_rt_put(inet->cork.rt); | 1294 | ip_rt_put(inet->cork.rt); | 
| 1298 | inet->cork.rt = NULL; | 1295 | inet->cork.rt = NULL; | 
diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c index 2f0b47da5b37..4f2d87257309 100644 --- a/net/ipv4/ip_sockglue.c +++ b/net/ipv4/ip_sockglue.c  | |||
| @@ -202,8 +202,7 @@ int ip_ra_control(struct sock *sk, unsigned char on, void (*destructor)(struct s | |||
| 202 | if (ra->sk == sk) { | 202 | if (ra->sk == sk) { | 
| 203 | if (on) { | 203 | if (on) { | 
| 204 | write_unlock_bh(&ip_ra_lock); | 204 | write_unlock_bh(&ip_ra_lock); | 
| 205 | if (new_ra) | 205 | kfree(new_ra); | 
| 206 | kfree(new_ra); | ||
| 207 | return -EADDRINUSE; | 206 | return -EADDRINUSE; | 
| 208 | } | 207 | } | 
| 209 | *rap = ra->next; | 208 | *rap = ra->next; | 
| @@ -446,8 +445,7 @@ int ip_setsockopt(struct sock *sk, int level, int optname, char __user *optval, | |||
| 446 | #endif | 445 | #endif | 
| 447 | } | 446 | } | 
| 448 | opt = xchg(&inet->opt, opt); | 447 | opt = xchg(&inet->opt, opt); | 
| 449 | if (opt) | 448 | kfree(opt); | 
| 450 | kfree(opt); | ||
| 451 | break; | 449 | break; | 
| 452 | } | 450 | } | 
| 453 | case IP_PKTINFO: | 451 | case IP_PKTINFO: | 
| @@ -828,10 +826,8 @@ int ip_setsockopt(struct sock *sk, int level, int optname, char __user *optval, | |||
| 828 | 826 | ||
| 829 | err = ip_mc_msfilter(sk, msf, ifindex); | 827 | err = ip_mc_msfilter(sk, msf, ifindex); | 
| 830 | mc_msf_out: | 828 | mc_msf_out: | 
| 831 | if (msf) | 829 | kfree(msf); | 
| 832 | kfree(msf); | 830 | kfree(gsf); | 
| 833 | if (gsf) | ||
| 834 | kfree(gsf); | ||
| 835 | break; | 831 | break; | 
| 836 | } | 832 | } | 
| 837 | case IP_ROUTER_ALERT: | 833 | case IP_ROUTER_ALERT: | 
diff --git a/net/ipv4/ipvs/ip_vs_app.c b/net/ipv4/ipvs/ip_vs_app.c index fc6f95aaa969..d7eb680101c2 100644 --- a/net/ipv4/ipvs/ip_vs_app.c +++ b/net/ipv4/ipvs/ip_vs_app.c  | |||
| @@ -110,8 +110,7 @@ ip_vs_app_inc_new(struct ip_vs_app *app, __u16 proto, __u16 port) | |||
| 110 | return 0; | 110 | return 0; | 
| 111 | 111 | ||
| 112 | out: | 112 | out: | 
| 113 | if (inc->timeout_table) | 113 | kfree(inc->timeout_table); | 
| 114 | kfree(inc->timeout_table); | ||
| 115 | kfree(inc); | 114 | kfree(inc); | 
| 116 | return ret; | 115 | return ret; | 
| 117 | } | 116 | } | 
| @@ -136,8 +135,7 @@ ip_vs_app_inc_release(struct ip_vs_app *inc) | |||
| 136 | 135 | ||
| 137 | list_del(&inc->a_list); | 136 | list_del(&inc->a_list); | 
| 138 | 137 | ||
| 139 | if (inc->timeout_table != NULL) | 138 | kfree(inc->timeout_table); | 
| 140 | kfree(inc->timeout_table); | ||
| 141 | kfree(inc); | 139 | kfree(inc); | 
| 142 | } | 140 | } | 
| 143 | 141 | ||
diff --git a/net/ipv4/ipvs/ip_vs_core.c b/net/ipv4/ipvs/ip_vs_core.c index 981cc3244ef2..1a0843cd58a9 100644 --- a/net/ipv4/ipvs/ip_vs_core.c +++ b/net/ipv4/ipvs/ip_vs_core.c  | |||
| @@ -1009,11 +1009,10 @@ ip_vs_in(unsigned int hooknum, struct sk_buff **pskb, | |||
| 1009 | if (sysctl_ip_vs_expire_nodest_conn) { | 1009 | if (sysctl_ip_vs_expire_nodest_conn) { | 
| 1010 | /* try to expire the connection immediately */ | 1010 | /* try to expire the connection immediately */ | 
| 1011 | ip_vs_conn_expire_now(cp); | 1011 | ip_vs_conn_expire_now(cp); | 
| 1012 | } else { | ||
| 1013 | /* don't restart its timer, and silently | ||
| 1014 | drop the packet. */ | ||
| 1015 | __ip_vs_conn_put(cp); | ||
| 1016 | } | 1012 | } | 
| 1013 | /* don't restart its timer, and silently | ||
| 1014 | drop the packet. */ | ||
| 1015 | __ip_vs_conn_put(cp); | ||
| 1017 | return NF_DROP; | 1016 | return NF_DROP; | 
| 1018 | } | 1017 | } | 
| 1019 | 1018 | ||
diff --git a/net/ipv4/multipath_wrandom.c b/net/ipv4/multipath_wrandom.c index bd7d75b6abe0..d34a9fa608e0 100644 --- a/net/ipv4/multipath_wrandom.c +++ b/net/ipv4/multipath_wrandom.c  | |||
| @@ -207,16 +207,12 @@ static void wrandom_select_route(const struct flowi *flp, | |||
| 207 | decision = mpc->rt; | 207 | decision = mpc->rt; | 
| 208 | 208 | ||
| 209 | last_power = mpc->power; | 209 | last_power = mpc->power; | 
| 210 | if (last_mpc) | 210 | kfree(last_mpc); | 
| 211 | kfree(last_mpc); | ||
| 212 | |||
| 213 | last_mpc = mpc; | 211 | last_mpc = mpc; | 
| 214 | } | 212 | } | 
| 215 | 213 | ||
| 216 | if (last_mpc) { | 214 | /* concurrent __multipath_flush may lead to !last_mpc */ | 
| 217 | /* concurrent __multipath_flush may lead to !last_mpc */ | 215 | kfree(last_mpc); | 
| 218 | kfree(last_mpc); | ||
| 219 | } | ||
| 220 | 216 | ||
| 221 | decision->u.dst.__use++; | 217 | decision->u.dst.__use++; | 
| 222 | *rp = decision; | 218 | *rp = decision; | 
diff --git a/net/ipv4/netfilter/Kconfig b/net/ipv4/netfilter/Kconfig index 7d917e4ce1d9..9d3c8b5f327e 100644 --- a/net/ipv4/netfilter/Kconfig +++ b/net/ipv4/netfilter/Kconfig  | |||
| @@ -5,6 +5,20 @@ | |||
| 5 | menu "IP: Netfilter Configuration" | 5 | menu "IP: Netfilter Configuration" | 
| 6 | depends on INET && NETFILTER | 6 | depends on INET && NETFILTER | 
| 7 | 7 | ||
| 8 | config NF_CONNTRACK_IPV4 | ||
| 9 | tristate "IPv4 support for new connection tracking (EXPERIMENTAL)" | ||
| 10 | depends on EXPERIMENTAL && NF_CONNTRACK | ||
| 11 | ---help--- | ||
| 12 | Connection tracking keeps a record of what packets have passed | ||
| 13 | through your machine, in order to figure out how they are related | ||
| 14 | into connections. | ||
| 15 | |||
| 16 | This is IPv4 support on Layer 3 independent connection tracking. | ||
| 17 | Layer 3 independent connection tracking is experimental scheme | ||
| 18 | which generalize ip_conntrack to support other layer 3 protocols. | ||
| 19 | |||
| 20 | To compile it as a module, choose M here. If unsure, say N. | ||
| 21 | |||
| 8 | # connection tracking, helpers and protocols | 22 | # connection tracking, helpers and protocols | 
| 9 | config IP_NF_CONNTRACK | 23 | config IP_NF_CONNTRACK | 
| 10 | tristate "Connection tracking (required for masq/NAT)" | 24 | tristate "Connection tracking (required for masq/NAT)" | 
| @@ -209,8 +223,8 @@ config IP_NF_MATCH_PKTTYPE | |||
| 209 | tristate "Packet type match support" | 223 | tristate "Packet type match support" | 
| 210 | depends on IP_NF_IPTABLES | 224 | depends on IP_NF_IPTABLES | 
| 211 | help | 225 | help | 
| 212 | Packet type matching allows you to match a packet by | 226 | Packet type matching allows you to match a packet by | 
| 213 | its "class", eg. BROADCAST, MULTICAST, ... | 227 | its "class", eg. BROADCAST, MULTICAST, ... | 
| 214 | 228 | ||
| 215 | Typical usage: | 229 | Typical usage: | 
| 216 | iptables -A INPUT -m pkttype --pkt-type broadcast -j LOG | 230 | iptables -A INPUT -m pkttype --pkt-type broadcast -j LOG | 
| @@ -317,7 +331,8 @@ config IP_NF_MATCH_TCPMSS | |||
| 317 | 331 | ||
| 318 | config IP_NF_MATCH_HELPER | 332 | config IP_NF_MATCH_HELPER | 
| 319 | tristate "Helper match support" | 333 | tristate "Helper match support" | 
| 320 | depends on IP_NF_CONNTRACK && IP_NF_IPTABLES | 334 | depends on IP_NF_IPTABLES | 
| 335 | depends on IP_NF_CONNTRACK || NF_CONNTRACK_IPV4 | ||
| 321 | help | 336 | help | 
| 322 | Helper matching allows you to match packets in dynamic connections | 337 | Helper matching allows you to match packets in dynamic connections | 
| 323 | tracked by a conntrack-helper, ie. ip_conntrack_ftp | 338 | tracked by a conntrack-helper, ie. ip_conntrack_ftp | 
| @@ -326,7 +341,8 @@ config IP_NF_MATCH_HELPER | |||
| 326 | 341 | ||
| 327 | config IP_NF_MATCH_STATE | 342 | config IP_NF_MATCH_STATE | 
| 328 | tristate "Connection state match support" | 343 | tristate "Connection state match support" | 
| 329 | depends on IP_NF_CONNTRACK && IP_NF_IPTABLES | 344 | depends on IP_NF_IPTABLES | 
| 345 | depends on IP_NF_CONNTRACK || NF_CONNTRACK_IPV4 | ||
| 330 | help | 346 | help | 
| 331 | Connection state matching allows you to match packets based on their | 347 | Connection state matching allows you to match packets based on their | 
| 332 | relationship to a tracked connection (ie. previous packets). This | 348 | relationship to a tracked connection (ie. previous packets). This | 
| @@ -336,7 +352,8 @@ config IP_NF_MATCH_STATE | |||
| 336 | 352 | ||
| 337 | config IP_NF_MATCH_CONNTRACK | 353 | config IP_NF_MATCH_CONNTRACK | 
| 338 | tristate "Connection tracking match support" | 354 | tristate "Connection tracking match support" | 
| 339 | depends on IP_NF_CONNTRACK && IP_NF_IPTABLES | 355 | depends on IP_NF_IPTABLES | 
| 356 | depends on IP_NF_CONNTRACK || NF_CONNTRACK_IPV4 | ||
| 340 | help | 357 | help | 
| 341 | This is a general conntrack match module, a superset of the state match. | 358 | This is a general conntrack match module, a superset of the state match. | 
| 342 | 359 | ||
| @@ -422,7 +439,8 @@ config IP_NF_MATCH_COMMENT | |||
| 422 | 439 | ||
| 423 | config IP_NF_MATCH_CONNMARK | 440 | config IP_NF_MATCH_CONNMARK | 
| 424 | tristate 'Connection mark match support' | 441 | tristate 'Connection mark match support' | 
| 425 | depends on IP_NF_CONNTRACK_MARK && IP_NF_IPTABLES | 442 | depends on IP_NF_IPTABLES | 
| 443 | depends on IP_NF_CONNTRACK_MARK || (NF_CONNTRACK_MARK && NF_CONNTRACK_IPV4) | ||
| 426 | help | 444 | help | 
| 427 | This option adds a `connmark' match, which allows you to match the | 445 | This option adds a `connmark' match, which allows you to match the | 
| 428 | connection mark value previously set for the session by `CONNMARK'. | 446 | connection mark value previously set for the session by `CONNMARK'. | 
| @@ -433,7 +451,8 @@ config IP_NF_MATCH_CONNMARK | |||
| 433 | 451 | ||
| 434 | config IP_NF_MATCH_CONNBYTES | 452 | config IP_NF_MATCH_CONNBYTES | 
| 435 | tristate 'Connection byte/packet counter match support' | 453 | tristate 'Connection byte/packet counter match support' | 
| 436 | depends on IP_NF_CT_ACCT && IP_NF_IPTABLES | 454 | depends on IP_NF_IPTABLES | 
| 455 | depends on IP_NF_CT_ACCT || (NF_CT_ACCT && NF_CONNTRACK_IPV4) | ||
| 437 | help | 456 | help | 
| 438 | This option adds a `connbytes' match, which allows you to match the | 457 | This option adds a `connbytes' match, which allows you to match the | 
| 439 | number of bytes and/or packets for each direction within a connection. | 458 | number of bytes and/or packets for each direction within a connection. | 
| @@ -747,7 +766,8 @@ config IP_NF_TARGET_TTL | |||
| 747 | 766 | ||
| 748 | config IP_NF_TARGET_CONNMARK | 767 | config IP_NF_TARGET_CONNMARK | 
| 749 | tristate 'CONNMARK target support' | 768 | tristate 'CONNMARK target support' | 
| 750 | depends on IP_NF_CONNTRACK_MARK && IP_NF_MANGLE | 769 | depends on IP_NF_MANGLE | 
| 770 | depends on IP_NF_CONNTRACK_MARK || (NF_CONNTRACK_MARK && NF_CONNTRACK_IPV4) | ||
| 751 | help | 771 | help | 
| 752 | This option adds a `CONNMARK' target, which allows one to manipulate | 772 | This option adds a `CONNMARK' target, which allows one to manipulate | 
| 753 | the connection mark value. Similar to the MARK target, but | 773 | the connection mark value. Similar to the MARK target, but | 
| @@ -759,7 +779,8 @@ config IP_NF_TARGET_CONNMARK | |||
| 759 | 779 | ||
| 760 | config IP_NF_TARGET_CLUSTERIP | 780 | config IP_NF_TARGET_CLUSTERIP | 
| 761 | tristate "CLUSTERIP target support (EXPERIMENTAL)" | 781 | tristate "CLUSTERIP target support (EXPERIMENTAL)" | 
| 762 | depends on IP_NF_CONNTRACK_MARK && IP_NF_IPTABLES && EXPERIMENTAL | 782 | depends on IP_NF_IPTABLES && EXPERIMENTAL | 
| 783 | depends on IP_NF_CONNTRACK_MARK || (NF_CONNTRACK_MARK && NF_CONNTRACK_IPV4) | ||
| 763 | help | 784 | help | 
| 764 | The CLUSTERIP target allows you to build load-balancing clusters of | 785 | The CLUSTERIP target allows you to build load-balancing clusters of | 
| 765 | network servers without having a dedicated load-balancing | 786 | network servers without having a dedicated load-balancing | 
| @@ -782,7 +803,7 @@ config IP_NF_RAW | |||
| 782 | config IP_NF_TARGET_NOTRACK | 803 | config IP_NF_TARGET_NOTRACK | 
| 783 | tristate 'NOTRACK target support' | 804 | tristate 'NOTRACK target support' | 
| 784 | depends on IP_NF_RAW | 805 | depends on IP_NF_RAW | 
| 785 | depends on IP_NF_CONNTRACK | 806 | depends on IP_NF_CONNTRACK || NF_CONNTRACK_IPV4 | 
| 786 | help | 807 | help | 
| 787 | The NOTRACK target allows a select rule to specify | 808 | The NOTRACK target allows a select rule to specify | 
| 788 | which packets *not* to enter the conntrack/NAT | 809 | which packets *not* to enter the conntrack/NAT | 
diff --git a/net/ipv4/netfilter/Makefile b/net/ipv4/netfilter/Makefile index dab4b58dd31e..058c48e258fc 100644 --- a/net/ipv4/netfilter/Makefile +++ b/net/ipv4/netfilter/Makefile  | |||
| @@ -103,3 +103,9 @@ obj-$(CONFIG_IP_NF_ARP_MANGLE) += arpt_mangle.o | |||
| 103 | obj-$(CONFIG_IP_NF_ARPFILTER) += arptable_filter.o | 103 | obj-$(CONFIG_IP_NF_ARPFILTER) += arptable_filter.o | 
| 104 | 104 | ||
| 105 | obj-$(CONFIG_IP_NF_QUEUE) += ip_queue.o | 105 | obj-$(CONFIG_IP_NF_QUEUE) += ip_queue.o | 
| 106 | |||
| 107 | # objects for l3 independent conntrack | ||
| 108 | nf_conntrack_ipv4-objs := nf_conntrack_l3proto_ipv4.o nf_conntrack_proto_icmp.o | ||
| 109 | |||
| 110 | # l3 independent conntrack | ||
| 111 | obj-$(CONFIG_NF_CONNTRACK_IPV4) += nf_conntrack_ipv4.o | ||
diff --git a/net/ipv4/netfilter/ip_conntrack_helper_pptp.c b/net/ipv4/netfilter/ip_conntrack_helper_pptp.c index 926a6684643d..4108a5e12b3c 100644 --- a/net/ipv4/netfilter/ip_conntrack_helper_pptp.c +++ b/net/ipv4/netfilter/ip_conntrack_helper_pptp.c  | |||
| @@ -270,14 +270,10 @@ exp_gre(struct ip_conntrack *master, | |||
| 270 | exp_orig->expectfn = pptp_expectfn; | 270 | exp_orig->expectfn = pptp_expectfn; | 
| 271 | exp_orig->flags = 0; | 271 | exp_orig->flags = 0; | 
| 272 | 272 | ||
| 273 | exp_orig->dir = IP_CT_DIR_ORIGINAL; | ||
| 274 | |||
| 275 | /* both expectations are identical apart from tuple */ | 273 | /* both expectations are identical apart from tuple */ | 
| 276 | memcpy(exp_reply, exp_orig, sizeof(*exp_reply)); | 274 | memcpy(exp_reply, exp_orig, sizeof(*exp_reply)); | 
| 277 | memcpy(&exp_reply->tuple, &exp_tuples[1], sizeof(exp_reply->tuple)); | 275 | memcpy(&exp_reply->tuple, &exp_tuples[1], sizeof(exp_reply->tuple)); | 
| 278 | 276 | ||
| 279 | exp_reply->dir = !exp_orig->dir; | ||
| 280 | |||
| 281 | if (ip_nat_pptp_hook_exp_gre) | 277 | if (ip_nat_pptp_hook_exp_gre) | 
| 282 | ret = ip_nat_pptp_hook_exp_gre(exp_orig, exp_reply); | 278 | ret = ip_nat_pptp_hook_exp_gre(exp_orig, exp_reply); | 
| 283 | else { | 279 | else { | 
diff --git a/net/ipv4/netfilter/ip_conntrack_netlink.c b/net/ipv4/netfilter/ip_conntrack_netlink.c index 166e6069f121..d2a4fec22862 100644 --- a/net/ipv4/netfilter/ip_conntrack_netlink.c +++ b/net/ipv4/netfilter/ip_conntrack_netlink.c  | |||
| @@ -28,11 +28,8 @@ | |||
| 28 | #include <linux/netlink.h> | 28 | #include <linux/netlink.h> | 
| 29 | #include <linux/spinlock.h> | 29 | #include <linux/spinlock.h> | 
| 30 | #include <linux/notifier.h> | 30 | #include <linux/notifier.h> | 
| 31 | #include <linux/rtnetlink.h> | ||
| 32 | 31 | ||
| 33 | #include <linux/netfilter.h> | 32 | #include <linux/netfilter.h> | 
| 34 | #include <linux/netfilter_ipv4.h> | ||
| 35 | #include <linux/netfilter_ipv4/ip_tables.h> | ||
| 36 | #include <linux/netfilter_ipv4/ip_conntrack.h> | 33 | #include <linux/netfilter_ipv4/ip_conntrack.h> | 
| 37 | #include <linux/netfilter_ipv4/ip_conntrack_core.h> | 34 | #include <linux/netfilter_ipv4/ip_conntrack_core.h> | 
| 38 | #include <linux/netfilter_ipv4/ip_conntrack_helper.h> | 35 | #include <linux/netfilter_ipv4/ip_conntrack_helper.h> | 
| @@ -58,14 +55,17 @@ ctnetlink_dump_tuples_proto(struct sk_buff *skb, | |||
| 58 | const struct ip_conntrack_tuple *tuple) | 55 | const struct ip_conntrack_tuple *tuple) | 
| 59 | { | 56 | { | 
| 60 | struct ip_conntrack_protocol *proto; | 57 | struct ip_conntrack_protocol *proto; | 
| 58 | int ret = 0; | ||
| 61 | 59 | ||
| 62 | NFA_PUT(skb, CTA_PROTO_NUM, sizeof(u_int8_t), &tuple->dst.protonum); | 60 | NFA_PUT(skb, CTA_PROTO_NUM, sizeof(u_int8_t), &tuple->dst.protonum); | 
| 63 | 61 | ||
| 64 | proto = ip_conntrack_proto_find_get(tuple->dst.protonum); | 62 | proto = ip_conntrack_proto_find_get(tuple->dst.protonum); | 
| 65 | if (proto && proto->tuple_to_nfattr) | 63 | if (likely(proto && proto->tuple_to_nfattr)) { | 
| 66 | return proto->tuple_to_nfattr(skb, tuple); | 64 | ret = proto->tuple_to_nfattr(skb, tuple); | 
| 65 | ip_conntrack_proto_put(proto); | ||
| 66 | } | ||
| 67 | 67 | ||
| 68 | return 0; | 68 | return ret; | 
| 69 | 69 | ||
| 70 | nfattr_failure: | 70 | nfattr_failure: | 
| 71 | return -1; | 71 | return -1; | 
| @@ -175,7 +175,7 @@ ctnetlink_dump_counters(struct sk_buff *skb, const struct ip_conntrack *ct, | |||
| 175 | { | 175 | { | 
| 176 | enum ctattr_type type = dir ? CTA_COUNTERS_REPLY: CTA_COUNTERS_ORIG; | 176 | enum ctattr_type type = dir ? CTA_COUNTERS_REPLY: CTA_COUNTERS_ORIG; | 
| 177 | struct nfattr *nest_count = NFA_NEST(skb, type); | 177 | struct nfattr *nest_count = NFA_NEST(skb, type); | 
| 178 | u_int64_t tmp; | 178 | u_int32_t tmp; | 
| 179 | 179 | ||
| 180 | tmp = htonl(ct->counters[dir].packets); | 180 | tmp = htonl(ct->counters[dir].packets); | 
| 181 | NFA_PUT(skb, CTA_COUNTERS32_PACKETS, sizeof(u_int32_t), &tmp); | 181 | NFA_PUT(skb, CTA_COUNTERS32_PACKETS, sizeof(u_int32_t), &tmp); | 
| @@ -479,9 +479,7 @@ ctnetlink_parse_tuple_ip(struct nfattr *attr, struct ip_conntrack_tuple *tuple) | |||
| 479 | 479 | ||
| 480 | DEBUGP("entered %s\n", __FUNCTION__); | 480 | DEBUGP("entered %s\n", __FUNCTION__); | 
| 481 | 481 | ||
| 482 | 482 | nfattr_parse_nested(tb, CTA_IP_MAX, attr); | |
| 483 | if (nfattr_parse_nested(tb, CTA_IP_MAX, attr) < 0) | ||
| 484 | goto nfattr_failure; | ||
| 485 | 483 | ||
| 486 | if (nfattr_bad_size(tb, CTA_IP_MAX, cta_min_ip)) | 484 | if (nfattr_bad_size(tb, CTA_IP_MAX, cta_min_ip)) | 
| 487 | return -EINVAL; | 485 | return -EINVAL; | 
| @@ -497,9 +495,6 @@ ctnetlink_parse_tuple_ip(struct nfattr *attr, struct ip_conntrack_tuple *tuple) | |||
| 497 | DEBUGP("leaving\n"); | 495 | DEBUGP("leaving\n"); | 
| 498 | 496 | ||
| 499 | return 0; | 497 | return 0; | 
| 500 | |||
| 501 | nfattr_failure: | ||
| 502 | return -1; | ||
| 503 | } | 498 | } | 
| 504 | 499 | ||
| 505 | static const int cta_min_proto[CTA_PROTO_MAX] = { | 500 | static const int cta_min_proto[CTA_PROTO_MAX] = { | 
| @@ -521,8 +516,7 @@ ctnetlink_parse_tuple_proto(struct nfattr *attr, | |||
| 521 | 516 | ||
| 522 | DEBUGP("entered %s\n", __FUNCTION__); | 517 | DEBUGP("entered %s\n", __FUNCTION__); | 
| 523 | 518 | ||
| 524 | if (nfattr_parse_nested(tb, CTA_PROTO_MAX, attr) < 0) | 519 | nfattr_parse_nested(tb, CTA_PROTO_MAX, attr); | 
| 525 | goto nfattr_failure; | ||
| 526 | 520 | ||
| 527 | if (nfattr_bad_size(tb, CTA_PROTO_MAX, cta_min_proto)) | 521 | if (nfattr_bad_size(tb, CTA_PROTO_MAX, cta_min_proto)) | 
| 528 | return -EINVAL; | 522 | return -EINVAL; | 
| @@ -539,9 +533,6 @@ ctnetlink_parse_tuple_proto(struct nfattr *attr, | |||
| 539 | } | 533 | } | 
| 540 | 534 | ||
| 541 | return ret; | 535 | return ret; | 
| 542 | |||
| 543 | nfattr_failure: | ||
| 544 | return -1; | ||
| 545 | } | 536 | } | 
| 546 | 537 | ||
| 547 | static inline int | 538 | static inline int | 
| @@ -555,8 +546,7 @@ ctnetlink_parse_tuple(struct nfattr *cda[], struct ip_conntrack_tuple *tuple, | |||
| 555 | 546 | ||
| 556 | memset(tuple, 0, sizeof(*tuple)); | 547 | memset(tuple, 0, sizeof(*tuple)); | 
| 557 | 548 | ||
| 558 | if (nfattr_parse_nested(tb, CTA_TUPLE_MAX, cda[type-1]) < 0) | 549 | nfattr_parse_nested(tb, CTA_TUPLE_MAX, cda[type-1]); | 
| 559 | goto nfattr_failure; | ||
| 560 | 550 | ||
| 561 | if (!tb[CTA_TUPLE_IP-1]) | 551 | if (!tb[CTA_TUPLE_IP-1]) | 
| 562 | return -EINVAL; | 552 | return -EINVAL; | 
| @@ -583,9 +573,6 @@ ctnetlink_parse_tuple(struct nfattr *cda[], struct ip_conntrack_tuple *tuple, | |||
| 583 | DEBUGP("leaving\n"); | 573 | DEBUGP("leaving\n"); | 
| 584 | 574 | ||
| 585 | return 0; | 575 | return 0; | 
| 586 | |||
| 587 | nfattr_failure: | ||
| 588 | return -1; | ||
| 589 | } | 576 | } | 
| 590 | 577 | ||
| 591 | #ifdef CONFIG_IP_NF_NAT_NEEDED | 578 | #ifdef CONFIG_IP_NF_NAT_NEEDED | 
| @@ -603,11 +590,10 @@ static int ctnetlink_parse_nat_proto(struct nfattr *attr, | |||
| 603 | 590 | ||
| 604 | DEBUGP("entered %s\n", __FUNCTION__); | 591 | DEBUGP("entered %s\n", __FUNCTION__); | 
| 605 | 592 | ||
| 606 | if (nfattr_parse_nested(tb, CTA_PROTONAT_MAX, attr) < 0) | 593 | nfattr_parse_nested(tb, CTA_PROTONAT_MAX, attr); | 
| 607 | goto nfattr_failure; | ||
| 608 | 594 | ||
| 609 | if (nfattr_bad_size(tb, CTA_PROTONAT_MAX, cta_min_protonat)) | 595 | if (nfattr_bad_size(tb, CTA_PROTONAT_MAX, cta_min_protonat)) | 
| 610 | goto nfattr_failure; | 596 | return -EINVAL; | 
| 611 | 597 | ||
| 612 | npt = ip_nat_proto_find_get(ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.protonum); | 598 | npt = ip_nat_proto_find_get(ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.protonum); | 
| 613 | if (!npt) | 599 | if (!npt) | 
| @@ -626,9 +612,6 @@ static int ctnetlink_parse_nat_proto(struct nfattr *attr, | |||
| 626 | 612 | ||
| 627 | DEBUGP("leaving\n"); | 613 | DEBUGP("leaving\n"); | 
| 628 | return 0; | 614 | return 0; | 
| 629 | |||
| 630 | nfattr_failure: | ||
| 631 | return -1; | ||
| 632 | } | 615 | } | 
| 633 | 616 | ||
| 634 | static inline int | 617 | static inline int | 
| @@ -642,8 +625,7 @@ ctnetlink_parse_nat(struct nfattr *cda[], | |||
| 642 | 625 | ||
| 643 | memset(range, 0, sizeof(*range)); | 626 | memset(range, 0, sizeof(*range)); | 
| 644 | 627 | ||
| 645 | if (nfattr_parse_nested(tb, CTA_NAT_MAX, cda[CTA_NAT-1]) < 0) | 628 | nfattr_parse_nested(tb, CTA_NAT_MAX, cda[CTA_NAT-1]); | 
| 646 | goto nfattr_failure; | ||
| 647 | 629 | ||
| 648 | if (tb[CTA_NAT_MINIP-1]) | 630 | if (tb[CTA_NAT_MINIP-1]) | 
| 649 | range->min_ip = *(u_int32_t *)NFA_DATA(tb[CTA_NAT_MINIP-1]); | 631 | range->min_ip = *(u_int32_t *)NFA_DATA(tb[CTA_NAT_MINIP-1]); | 
| @@ -665,9 +647,6 @@ ctnetlink_parse_nat(struct nfattr *cda[], | |||
| 665 | 647 | ||
| 666 | DEBUGP("leaving\n"); | 648 | DEBUGP("leaving\n"); | 
| 667 | return 0; | 649 | return 0; | 
| 668 | |||
| 669 | nfattr_failure: | ||
| 670 | return -1; | ||
| 671 | } | 650 | } | 
| 672 | #endif | 651 | #endif | 
| 673 | 652 | ||
| @@ -678,8 +657,7 @@ ctnetlink_parse_help(struct nfattr *attr, char **helper_name) | |||
| 678 | 657 | ||
| 679 | DEBUGP("entered %s\n", __FUNCTION__); | 658 | DEBUGP("entered %s\n", __FUNCTION__); | 
| 680 | 659 | ||
| 681 | if (nfattr_parse_nested(tb, CTA_HELP_MAX, attr) < 0) | 660 | nfattr_parse_nested(tb, CTA_HELP_MAX, attr); | 
| 682 | goto nfattr_failure; | ||
| 683 | 661 | ||
| 684 | if (!tb[CTA_HELP_NAME-1]) | 662 | if (!tb[CTA_HELP_NAME-1]) | 
| 685 | return -EINVAL; | 663 | return -EINVAL; | 
| @@ -687,9 +665,6 @@ ctnetlink_parse_help(struct nfattr *attr, char **helper_name) | |||
| 687 | *helper_name = NFA_DATA(tb[CTA_HELP_NAME-1]); | 665 | *helper_name = NFA_DATA(tb[CTA_HELP_NAME-1]); | 
| 688 | 666 | ||
| 689 | return 0; | 667 | return 0; | 
| 690 | |||
| 691 | nfattr_failure: | ||
| 692 | return -1; | ||
| 693 | } | 668 | } | 
| 694 | 669 | ||
| 695 | static int | 670 | static int | 
| @@ -804,7 +779,7 @@ ctnetlink_get_conntrack(struct sock *ctnl, struct sk_buff *skb, | |||
| 804 | ct = tuplehash_to_ctrack(h); | 779 | ct = tuplehash_to_ctrack(h); | 
| 805 | 780 | ||
| 806 | err = -ENOMEM; | 781 | err = -ENOMEM; | 
| 807 | skb2 = alloc_skb(NLMSG_GOODSIZE, GFP_ATOMIC); | 782 | skb2 = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL); | 
| 808 | if (!skb2) { | 783 | if (!skb2) { | 
| 809 | ip_conntrack_put(ct); | 784 | ip_conntrack_put(ct); | 
| 810 | return -ENOMEM; | 785 | return -ENOMEM; | 
| @@ -815,7 +790,7 @@ ctnetlink_get_conntrack(struct sock *ctnl, struct sk_buff *skb, | |||
| 815 | IPCTNL_MSG_CT_NEW, 1, ct); | 790 | IPCTNL_MSG_CT_NEW, 1, ct); | 
| 816 | ip_conntrack_put(ct); | 791 | ip_conntrack_put(ct); | 
| 817 | if (err <= 0) | 792 | if (err <= 0) | 
| 818 | goto out; | 793 | goto free; | 
| 819 | 794 | ||
| 820 | err = netlink_unicast(ctnl, skb2, NETLINK_CB(skb).pid, MSG_DONTWAIT); | 795 | err = netlink_unicast(ctnl, skb2, NETLINK_CB(skb).pid, MSG_DONTWAIT); | 
| 821 | if (err < 0) | 796 | if (err < 0) | 
| @@ -824,10 +799,10 @@ ctnetlink_get_conntrack(struct sock *ctnl, struct sk_buff *skb, | |||
| 824 | DEBUGP("leaving\n"); | 799 | DEBUGP("leaving\n"); | 
| 825 | return 0; | 800 | return 0; | 
| 826 | 801 | ||
| 802 | free: | ||
| 803 | kfree_skb(skb2); | ||
| 827 | out: | 804 | out: | 
| 828 | if (skb2) | 805 | return err; | 
| 829 | kfree_skb(skb2); | ||
| 830 | return -1; | ||
| 831 | } | 806 | } | 
| 832 | 807 | ||
| 833 | static inline int | 808 | static inline int | 
| @@ -957,8 +932,7 @@ ctnetlink_change_protoinfo(struct ip_conntrack *ct, struct nfattr *cda[]) | |||
| 957 | u_int16_t npt = ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.protonum; | 932 | u_int16_t npt = ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.protonum; | 
| 958 | int err = 0; | 933 | int err = 0; | 
| 959 | 934 | ||
| 960 | if (nfattr_parse_nested(tb, CTA_PROTOINFO_MAX, attr) < 0) | 935 | nfattr_parse_nested(tb, CTA_PROTOINFO_MAX, attr); | 
| 961 | goto nfattr_failure; | ||
| 962 | 936 | ||
| 963 | proto = ip_conntrack_proto_find_get(npt); | 937 | proto = ip_conntrack_proto_find_get(npt); | 
| 964 | if (!proto) | 938 | if (!proto) | 
| @@ -969,9 +943,6 @@ ctnetlink_change_protoinfo(struct ip_conntrack *ct, struct nfattr *cda[]) | |||
| 969 | ip_conntrack_proto_put(proto); | 943 | ip_conntrack_proto_put(proto); | 
| 970 | 944 | ||
| 971 | return err; | 945 | return err; | 
| 972 | |||
| 973 | nfattr_failure: | ||
| 974 | return -ENOMEM; | ||
| 975 | } | 946 | } | 
| 976 | 947 | ||
| 977 | static int | 948 | static int | 
| @@ -1005,6 +976,11 @@ ctnetlink_change_conntrack(struct ip_conntrack *ct, struct nfattr *cda[]) | |||
| 1005 | return err; | 976 | return err; | 
| 1006 | } | 977 | } | 
| 1007 | 978 | ||
| 979 | #if defined(CONFIG_IP_NF_CONNTRACK_MARK) | ||
| 980 | if (cda[CTA_MARK-1]) | ||
| 981 | ct->mark = ntohl(*(u_int32_t *)NFA_DATA(cda[CTA_MARK-1])); | ||
| 982 | #endif | ||
| 983 | |||
| 1008 | DEBUGP("all done\n"); | 984 | DEBUGP("all done\n"); | 
| 1009 | return 0; | 985 | return 0; | 
| 1010 | } | 986 | } | 
| @@ -1048,6 +1024,11 @@ ctnetlink_create_conntrack(struct nfattr *cda[], | |||
| 1048 | if (ct->helper) | 1024 | if (ct->helper) | 
| 1049 | ip_conntrack_helper_put(ct->helper); | 1025 | ip_conntrack_helper_put(ct->helper); | 
| 1050 | 1026 | ||
| 1027 | #if defined(CONFIG_IP_NF_CONNTRACK_MARK) | ||
| 1028 | if (cda[CTA_MARK-1]) | ||
| 1029 | ct->mark = ntohl(*(u_int32_t *)NFA_DATA(cda[CTA_MARK-1])); | ||
| 1030 | #endif | ||
| 1031 | |||
| 1051 | DEBUGP("conntrack with id %u inserted\n", ct->id); | 1032 | DEBUGP("conntrack with id %u inserted\n", ct->id); | 
| 1052 | return 0; | 1033 | return 0; | 
| 1053 | 1034 | ||
| @@ -1312,6 +1293,14 @@ ctnetlink_get_expect(struct sock *ctnl, struct sk_buff *skb, | |||
| 1312 | if (!exp) | 1293 | if (!exp) | 
| 1313 | return -ENOENT; | 1294 | return -ENOENT; | 
| 1314 | 1295 | ||
| 1296 | if (cda[CTA_EXPECT_ID-1]) { | ||
| 1297 | u_int32_t id = *(u_int32_t *)NFA_DATA(cda[CTA_EXPECT_ID-1]); | ||
| 1298 | if (exp->id != ntohl(id)) { | ||
| 1299 | ip_conntrack_expect_put(exp); | ||
| 1300 | return -ENOENT; | ||
| 1301 | } | ||
| 1302 | } | ||
| 1303 | |||
| 1315 | err = -ENOMEM; | 1304 | err = -ENOMEM; | 
| 1316 | skb2 = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL); | 1305 | skb2 = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL); | 
| 1317 | if (!skb2) | 1306 | if (!skb2) | 
| @@ -1322,21 +1311,16 @@ ctnetlink_get_expect(struct sock *ctnl, struct sk_buff *skb, | |||
| 1322 | nlh->nlmsg_seq, IPCTNL_MSG_EXP_NEW, | 1311 | nlh->nlmsg_seq, IPCTNL_MSG_EXP_NEW, | 
| 1323 | 1, exp); | 1312 | 1, exp); | 
| 1324 | if (err <= 0) | 1313 | if (err <= 0) | 
| 1325 | goto out; | 1314 | goto free; | 
| 1326 | 1315 | ||
| 1327 | ip_conntrack_expect_put(exp); | 1316 | ip_conntrack_expect_put(exp); | 
| 1328 | 1317 | ||
| 1329 | err = netlink_unicast(ctnl, skb2, NETLINK_CB(skb).pid, MSG_DONTWAIT); | 1318 | return netlink_unicast(ctnl, skb2, NETLINK_CB(skb).pid, MSG_DONTWAIT); | 
| 1330 | if (err < 0) | ||
| 1331 | goto free; | ||
| 1332 | |||
| 1333 | return err; | ||
| 1334 | 1319 | ||
| 1320 | free: | ||
| 1321 | kfree_skb(skb2); | ||
| 1335 | out: | 1322 | out: | 
| 1336 | ip_conntrack_expect_put(exp); | 1323 | ip_conntrack_expect_put(exp); | 
| 1337 | free: | ||
| 1338 | if (skb2) | ||
| 1339 | kfree_skb(skb2); | ||
| 1340 | return err; | 1324 | return err; | 
| 1341 | } | 1325 | } | 
| 1342 | 1326 | ||
| @@ -1392,7 +1376,7 @@ ctnetlink_del_expect(struct sock *ctnl, struct sk_buff *skb, | |||
| 1392 | ip_conntrack_expect_put(exp); | 1376 | ip_conntrack_expect_put(exp); | 
| 1393 | } | 1377 | } | 
| 1394 | } | 1378 | } | 
| 1395 | write_unlock(&ip_conntrack_lock); | 1379 | write_unlock_bh(&ip_conntrack_lock); | 
| 1396 | } else { | 1380 | } else { | 
| 1397 | /* This basically means we have to flush everything*/ | 1381 | /* This basically means we have to flush everything*/ | 
| 1398 | write_lock_bh(&ip_conntrack_lock); | 1382 | write_lock_bh(&ip_conntrack_lock); | 
| @@ -1559,6 +1543,8 @@ static struct nfnetlink_subsystem ctnl_exp_subsys = { | |||
| 1559 | .cb = ctnl_exp_cb, | 1543 | .cb = ctnl_exp_cb, | 
| 1560 | }; | 1544 | }; | 
| 1561 | 1545 | ||
| 1546 | MODULE_ALIAS_NFNL_SUBSYS(NFNL_SUBSYS_CTNETLINK); | ||
| 1547 | |||
| 1562 | static int __init ctnetlink_init(void) | 1548 | static int __init ctnetlink_init(void) | 
| 1563 | { | 1549 | { | 
| 1564 | int ret; | 1550 | int ret; | 
diff --git a/net/ipv4/netfilter/ip_conntrack_proto_icmp.c b/net/ipv4/netfilter/ip_conntrack_proto_icmp.c index 98f0015dd255..5198f3a1e2cd 100644 --- a/net/ipv4/netfilter/ip_conntrack_proto_icmp.c +++ b/net/ipv4/netfilter/ip_conntrack_proto_icmp.c  | |||
| @@ -151,13 +151,13 @@ icmp_error_message(struct sk_buff *skb, | |||
| 151 | /* Not enough header? */ | 151 | /* Not enough header? */ | 
| 152 | inside = skb_header_pointer(skb, skb->nh.iph->ihl*4, sizeof(_in), &_in); | 152 | inside = skb_header_pointer(skb, skb->nh.iph->ihl*4, sizeof(_in), &_in); | 
| 153 | if (inside == NULL) | 153 | if (inside == NULL) | 
| 154 | return NF_ACCEPT; | 154 | return -NF_ACCEPT; | 
| 155 | 155 | ||
| 156 | /* Ignore ICMP's containing fragments (shouldn't happen) */ | 156 | /* Ignore ICMP's containing fragments (shouldn't happen) */ | 
| 157 | if (inside->ip.frag_off & htons(IP_OFFSET)) { | 157 | if (inside->ip.frag_off & htons(IP_OFFSET)) { | 
| 158 | DEBUGP("icmp_error_track: fragment of proto %u\n", | 158 | DEBUGP("icmp_error_track: fragment of proto %u\n", | 
| 159 | inside->ip.protocol); | 159 | inside->ip.protocol); | 
| 160 | return NF_ACCEPT; | 160 | return -NF_ACCEPT; | 
| 161 | } | 161 | } | 
| 162 | 162 | ||
| 163 | innerproto = ip_conntrack_proto_find_get(inside->ip.protocol); | 163 | innerproto = ip_conntrack_proto_find_get(inside->ip.protocol); | 
| @@ -166,7 +166,7 @@ icmp_error_message(struct sk_buff *skb, | |||
| 166 | if (!ip_ct_get_tuple(&inside->ip, skb, dataoff, &origtuple, innerproto)) { | 166 | if (!ip_ct_get_tuple(&inside->ip, skb, dataoff, &origtuple, innerproto)) { | 
| 167 | DEBUGP("icmp_error: ! get_tuple p=%u", inside->ip.protocol); | 167 | DEBUGP("icmp_error: ! get_tuple p=%u", inside->ip.protocol); | 
| 168 | ip_conntrack_proto_put(innerproto); | 168 | ip_conntrack_proto_put(innerproto); | 
| 169 | return NF_ACCEPT; | 169 | return -NF_ACCEPT; | 
| 170 | } | 170 | } | 
| 171 | 171 | ||
| 172 | /* Ordinarily, we'd expect the inverted tupleproto, but it's | 172 | /* Ordinarily, we'd expect the inverted tupleproto, but it's | 
| @@ -174,7 +174,7 @@ icmp_error_message(struct sk_buff *skb, | |||
| 174 | if (!ip_ct_invert_tuple(&innertuple, &origtuple, innerproto)) { | 174 | if (!ip_ct_invert_tuple(&innertuple, &origtuple, innerproto)) { | 
| 175 | DEBUGP("icmp_error_track: Can't invert tuple\n"); | 175 | DEBUGP("icmp_error_track: Can't invert tuple\n"); | 
| 176 | ip_conntrack_proto_put(innerproto); | 176 | ip_conntrack_proto_put(innerproto); | 
| 177 | return NF_ACCEPT; | 177 | return -NF_ACCEPT; | 
| 178 | } | 178 | } | 
| 179 | ip_conntrack_proto_put(innerproto); | 179 | ip_conntrack_proto_put(innerproto); | 
| 180 | 180 | ||
| @@ -190,7 +190,7 @@ icmp_error_message(struct sk_buff *skb, | |||
| 190 | 190 | ||
| 191 | if (!h) { | 191 | if (!h) { | 
| 192 | DEBUGP("icmp_error_track: no match\n"); | 192 | DEBUGP("icmp_error_track: no match\n"); | 
| 193 | return NF_ACCEPT; | 193 | return -NF_ACCEPT; | 
| 194 | } | 194 | } | 
| 195 | /* Reverse direction from that found */ | 195 | /* Reverse direction from that found */ | 
| 196 | if (DIRECTION(h) != IP_CT_DIR_REPLY) | 196 | if (DIRECTION(h) != IP_CT_DIR_REPLY) | 
| @@ -296,7 +296,8 @@ static int icmp_nfattr_to_tuple(struct nfattr *tb[], | |||
| 296 | struct ip_conntrack_tuple *tuple) | 296 | struct ip_conntrack_tuple *tuple) | 
| 297 | { | 297 | { | 
| 298 | if (!tb[CTA_PROTO_ICMP_TYPE-1] | 298 | if (!tb[CTA_PROTO_ICMP_TYPE-1] | 
| 299 | || !tb[CTA_PROTO_ICMP_CODE-1]) | 299 | || !tb[CTA_PROTO_ICMP_CODE-1] | 
| 300 | || !tb[CTA_PROTO_ICMP_ID-1]) | ||
| 300 | return -1; | 301 | return -1; | 
| 301 | 302 | ||
| 302 | tuple->dst.u.icmp.type = | 303 | tuple->dst.u.icmp.type = | 
| @@ -304,7 +305,7 @@ static int icmp_nfattr_to_tuple(struct nfattr *tb[], | |||
| 304 | tuple->dst.u.icmp.code = | 305 | tuple->dst.u.icmp.code = | 
| 305 | *(u_int8_t *)NFA_DATA(tb[CTA_PROTO_ICMP_CODE-1]); | 306 | *(u_int8_t *)NFA_DATA(tb[CTA_PROTO_ICMP_CODE-1]); | 
| 306 | tuple->src.u.icmp.id = | 307 | tuple->src.u.icmp.id = | 
| 307 | *(u_int8_t *)NFA_DATA(tb[CTA_PROTO_ICMP_ID-1]); | 308 | *(u_int16_t *)NFA_DATA(tb[CTA_PROTO_ICMP_ID-1]); | 
| 308 | 309 | ||
| 309 | return 0; | 310 | return 0; | 
| 310 | } | 311 | } | 
diff --git a/net/ipv4/netfilter/ip_conntrack_proto_tcp.c b/net/ipv4/netfilter/ip_conntrack_proto_tcp.c index d6701cafbcc2..468c6003b4c7 100644 --- a/net/ipv4/netfilter/ip_conntrack_proto_tcp.c +++ b/net/ipv4/netfilter/ip_conntrack_proto_tcp.c  | |||
| @@ -362,8 +362,12 @@ static int nfattr_to_tcp(struct nfattr *cda[], struct ip_conntrack *ct) | |||
| 362 | struct nfattr *attr = cda[CTA_PROTOINFO_TCP-1]; | 362 | struct nfattr *attr = cda[CTA_PROTOINFO_TCP-1]; | 
| 363 | struct nfattr *tb[CTA_PROTOINFO_TCP_MAX]; | 363 | struct nfattr *tb[CTA_PROTOINFO_TCP_MAX]; | 
| 364 | 364 | ||
| 365 | if (nfattr_parse_nested(tb, CTA_PROTOINFO_TCP_MAX, attr) < 0) | 365 | /* updates could not contain anything about the private | 
| 366 | goto nfattr_failure; | 366 | * protocol info, in that case skip the parsing */ | 
| 367 | if (!attr) | ||
| 368 | return 0; | ||
| 369 | |||
| 370 | nfattr_parse_nested(tb, CTA_PROTOINFO_TCP_MAX, attr); | ||
| 367 | 371 | ||
| 368 | if (!tb[CTA_PROTOINFO_TCP_STATE-1]) | 372 | if (!tb[CTA_PROTOINFO_TCP_STATE-1]) | 
| 369 | return -EINVAL; | 373 | return -EINVAL; | 
| @@ -374,9 +378,6 @@ static int nfattr_to_tcp(struct nfattr *cda[], struct ip_conntrack *ct) | |||
| 374 | write_unlock_bh(&tcp_lock); | 378 | write_unlock_bh(&tcp_lock); | 
| 375 | 379 | ||
| 376 | return 0; | 380 | return 0; | 
| 377 | |||
| 378 | nfattr_failure: | ||
| 379 | return -1; | ||
| 380 | } | 381 | } | 
| 381 | #endif | 382 | #endif | 
| 382 | 383 | ||
diff --git a/net/ipv4/netfilter/ip_nat_core.c b/net/ipv4/netfilter/ip_nat_core.c index c5e3abd24672..762f4d93936b 100644 --- a/net/ipv4/netfilter/ip_nat_core.c +++ b/net/ipv4/netfilter/ip_nat_core.c  | |||
| @@ -66,10 +66,8 @@ ip_nat_proto_find_get(u_int8_t protonum) | |||
| 66 | * removed until we've grabbed the reference */ | 66 | * removed until we've grabbed the reference */ | 
| 67 | preempt_disable(); | 67 | preempt_disable(); | 
| 68 | p = __ip_nat_proto_find(protonum); | 68 | p = __ip_nat_proto_find(protonum); | 
| 69 | if (p) { | 69 | if (!try_module_get(p->me)) | 
| 70 | if (!try_module_get(p->me)) | 70 | p = &ip_nat_unknown_protocol; | 
| 71 | p = &ip_nat_unknown_protocol; | ||
| 72 | } | ||
| 73 | preempt_enable(); | 71 | preempt_enable(); | 
| 74 | 72 | ||
| 75 | return p; | 73 | return p; | 
diff --git a/net/ipv4/netfilter/ip_nat_helper_pptp.c b/net/ipv4/netfilter/ip_nat_helper_pptp.c index 3cdd0684d30d..e546203f5662 100644 --- a/net/ipv4/netfilter/ip_nat_helper_pptp.c +++ b/net/ipv4/netfilter/ip_nat_helper_pptp.c  | |||
| @@ -73,6 +73,7 @@ static void pptp_nat_expected(struct ip_conntrack *ct, | |||
| 73 | struct ip_conntrack_tuple t; | 73 | struct ip_conntrack_tuple t; | 
| 74 | struct ip_ct_pptp_master *ct_pptp_info; | 74 | struct ip_ct_pptp_master *ct_pptp_info; | 
| 75 | struct ip_nat_pptp *nat_pptp_info; | 75 | struct ip_nat_pptp *nat_pptp_info; | 
| 76 | struct ip_nat_range range; | ||
| 76 | 77 | ||
| 77 | ct_pptp_info = &master->help.ct_pptp_info; | 78 | ct_pptp_info = &master->help.ct_pptp_info; | 
| 78 | nat_pptp_info = &master->nat.help.nat_pptp_info; | 79 | nat_pptp_info = &master->nat.help.nat_pptp_info; | 
| @@ -110,7 +111,30 @@ static void pptp_nat_expected(struct ip_conntrack *ct, | |||
| 110 | DEBUGP("not found!\n"); | 111 | DEBUGP("not found!\n"); | 
| 111 | } | 112 | } | 
| 112 | 113 | ||
| 113 | ip_nat_follow_master(ct, exp); | 114 | /* This must be a fresh one. */ | 
| 115 | BUG_ON(ct->status & IPS_NAT_DONE_MASK); | ||
| 116 | |||
| 117 | /* Change src to where master sends to */ | ||
| 118 | range.flags = IP_NAT_RANGE_MAP_IPS; | ||
| 119 | range.min_ip = range.max_ip | ||
| 120 | = ct->master->tuplehash[!exp->dir].tuple.dst.ip; | ||
| 121 | if (exp->dir == IP_CT_DIR_ORIGINAL) { | ||
| 122 | range.flags |= IP_NAT_RANGE_PROTO_SPECIFIED; | ||
| 123 | range.min = range.max = exp->saved_proto; | ||
| 124 | } | ||
| 125 | /* hook doesn't matter, but it has to do source manip */ | ||
| 126 | ip_nat_setup_info(ct, &range, NF_IP_POST_ROUTING); | ||
| 127 | |||
| 128 | /* For DST manip, map port here to where it's expected. */ | ||
| 129 | range.flags = IP_NAT_RANGE_MAP_IPS; | ||
| 130 | range.min_ip = range.max_ip | ||
| 131 | = ct->master->tuplehash[!exp->dir].tuple.src.ip; | ||
| 132 | if (exp->dir == IP_CT_DIR_REPLY) { | ||
| 133 | range.flags |= IP_NAT_RANGE_PROTO_SPECIFIED; | ||
| 134 | range.min = range.max = exp->saved_proto; | ||
| 135 | } | ||
| 136 | /* hook doesn't matter, but it has to do destination manip */ | ||
| 137 | ip_nat_setup_info(ct, &range, NF_IP_PRE_ROUTING); | ||
| 114 | } | 138 | } | 
| 115 | 139 | ||
| 116 | /* outbound packets == from PNS to PAC */ | 140 | /* outbound packets == from PNS to PAC */ | 
| @@ -213,9 +237,10 @@ pptp_exp_gre(struct ip_conntrack_expect *expect_orig, | |||
| 213 | 237 | ||
| 214 | /* alter expectation for PNS->PAC direction */ | 238 | /* alter expectation for PNS->PAC direction */ | 
| 215 | invert_tuplepr(&inv_t, &expect_orig->tuple); | 239 | invert_tuplepr(&inv_t, &expect_orig->tuple); | 
| 216 | expect_orig->saved_proto.gre.key = htons(nat_pptp_info->pac_call_id); | 240 | expect_orig->saved_proto.gre.key = htons(ct_pptp_info->pns_call_id); | 
| 217 | expect_orig->tuple.src.u.gre.key = htons(nat_pptp_info->pns_call_id); | 241 | expect_orig->tuple.src.u.gre.key = htons(nat_pptp_info->pns_call_id); | 
| 218 | expect_orig->tuple.dst.u.gre.key = htons(ct_pptp_info->pac_call_id); | 242 | expect_orig->tuple.dst.u.gre.key = htons(ct_pptp_info->pac_call_id); | 
| 243 | expect_orig->dir = IP_CT_DIR_ORIGINAL; | ||
| 219 | inv_t.src.ip = reply_t->src.ip; | 244 | inv_t.src.ip = reply_t->src.ip; | 
| 220 | inv_t.dst.ip = reply_t->dst.ip; | 245 | inv_t.dst.ip = reply_t->dst.ip; | 
| 221 | inv_t.src.u.gre.key = htons(nat_pptp_info->pac_call_id); | 246 | inv_t.src.u.gre.key = htons(nat_pptp_info->pac_call_id); | 
| @@ -233,6 +258,7 @@ pptp_exp_gre(struct ip_conntrack_expect *expect_orig, | |||
| 233 | expect_reply->saved_proto.gre.key = htons(nat_pptp_info->pns_call_id); | 258 | expect_reply->saved_proto.gre.key = htons(nat_pptp_info->pns_call_id); | 
| 234 | expect_reply->tuple.src.u.gre.key = htons(nat_pptp_info->pac_call_id); | 259 | expect_reply->tuple.src.u.gre.key = htons(nat_pptp_info->pac_call_id); | 
| 235 | expect_reply->tuple.dst.u.gre.key = htons(ct_pptp_info->pns_call_id); | 260 | expect_reply->tuple.dst.u.gre.key = htons(ct_pptp_info->pns_call_id); | 
| 261 | expect_reply->dir = IP_CT_DIR_REPLY; | ||
| 236 | inv_t.src.ip = orig_t->src.ip; | 262 | inv_t.src.ip = orig_t->src.ip; | 
| 237 | inv_t.dst.ip = orig_t->dst.ip; | 263 | inv_t.dst.ip = orig_t->dst.ip; | 
| 238 | inv_t.src.u.gre.key = htons(nat_pptp_info->pns_call_id); | 264 | inv_t.src.u.gre.key = htons(nat_pptp_info->pns_call_id); | 
diff --git a/net/ipv4/netfilter/ip_nat_proto_gre.c b/net/ipv4/netfilter/ip_nat_proto_gre.c index 7c1285401672..f7cad7cf1aec 100644 --- a/net/ipv4/netfilter/ip_nat_proto_gre.c +++ b/net/ipv4/netfilter/ip_nat_proto_gre.c  | |||
| @@ -139,8 +139,8 @@ gre_manip_pkt(struct sk_buff **pskb, | |||
| 139 | break; | 139 | break; | 
| 140 | case GRE_VERSION_PPTP: | 140 | case GRE_VERSION_PPTP: | 
| 141 | DEBUGP("call_id -> 0x%04x\n", | 141 | DEBUGP("call_id -> 0x%04x\n", | 
| 142 | ntohl(tuple->dst.u.gre.key)); | 142 | ntohs(tuple->dst.u.gre.key)); | 
| 143 | pgreh->call_id = htons(ntohl(tuple->dst.u.gre.key)); | 143 | pgreh->call_id = tuple->dst.u.gre.key; | 
| 144 | break; | 144 | break; | 
| 145 | default: | 145 | default: | 
| 146 | DEBUGP("can't nat unknown GRE version\n"); | 146 | DEBUGP("can't nat unknown GRE version\n"); | 
diff --git a/net/ipv4/netfilter/ip_nat_proto_unknown.c b/net/ipv4/netfilter/ip_nat_proto_unknown.c index 99bbef56f84e..f0099a646a0b 100644 --- a/net/ipv4/netfilter/ip_nat_proto_unknown.c +++ b/net/ipv4/netfilter/ip_nat_proto_unknown.c  | |||
| @@ -62,7 +62,7 @@ unknown_print_range(char *buffer, const struct ip_nat_range *range) | |||
| 62 | 62 | ||
| 63 | struct ip_nat_protocol ip_nat_unknown_protocol = { | 63 | struct ip_nat_protocol ip_nat_unknown_protocol = { | 
| 64 | .name = "unknown", | 64 | .name = "unknown", | 
| 65 | .me = THIS_MODULE, | 65 | /* .me isn't set: getting a ref to this cannot fail. */ | 
| 66 | .manip_pkt = unknown_manip_pkt, | 66 | .manip_pkt = unknown_manip_pkt, | 
| 67 | .in_range = unknown_in_range, | 67 | .in_range = unknown_in_range, | 
| 68 | .unique_tuple = unknown_unique_tuple, | 68 | .unique_tuple = unknown_unique_tuple, | 
diff --git a/net/ipv4/netfilter/ip_nat_snmp_basic.c b/net/ipv4/netfilter/ip_nat_snmp_basic.c index 93b2c5111bb2..8acb7ed40b47 100644 --- a/net/ipv4/netfilter/ip_nat_snmp_basic.c +++ b/net/ipv4/netfilter/ip_nat_snmp_basic.c  | |||
| @@ -1161,8 +1161,7 @@ static int snmp_parse_mangle(unsigned char *msg, | |||
| 1161 | 1161 | ||
| 1162 | if (!snmp_object_decode(&ctx, obj)) { | 1162 | if (!snmp_object_decode(&ctx, obj)) { | 
| 1163 | if (*obj) { | 1163 | if (*obj) { | 
| 1164 | if ((*obj)->id) | 1164 | kfree((*obj)->id); | 
| 1165 | kfree((*obj)->id); | ||
| 1166 | kfree(*obj); | 1165 | kfree(*obj); | 
| 1167 | } | 1166 | } | 
| 1168 | kfree(obj); | 1167 | kfree(obj); | 
diff --git a/net/ipv4/netfilter/ipt_CLUSTERIP.c b/net/ipv4/netfilter/ipt_CLUSTERIP.c index 9bcb398fbc1f..45c52d8f4d99 100644 --- a/net/ipv4/netfilter/ipt_CLUSTERIP.c +++ b/net/ipv4/netfilter/ipt_CLUSTERIP.c  | |||
| @@ -29,7 +29,7 @@ | |||
| 29 | 29 | ||
| 30 | #include <linux/netfilter_ipv4/ip_tables.h> | 30 | #include <linux/netfilter_ipv4/ip_tables.h> | 
| 31 | #include <linux/netfilter_ipv4/ipt_CLUSTERIP.h> | 31 | #include <linux/netfilter_ipv4/ipt_CLUSTERIP.h> | 
| 32 | #include <linux/netfilter_ipv4/ip_conntrack.h> | 32 | #include <net/netfilter/nf_conntrack_compat.h> | 
| 33 | 33 | ||
| 34 | #define CLUSTERIP_VERSION "0.8" | 34 | #define CLUSTERIP_VERSION "0.8" | 
| 35 | 35 | ||
| @@ -316,14 +316,14 @@ target(struct sk_buff **pskb, | |||
| 316 | { | 316 | { | 
| 317 | const struct ipt_clusterip_tgt_info *cipinfo = targinfo; | 317 | const struct ipt_clusterip_tgt_info *cipinfo = targinfo; | 
| 318 | enum ip_conntrack_info ctinfo; | 318 | enum ip_conntrack_info ctinfo; | 
| 319 | struct ip_conntrack *ct = ip_conntrack_get((*pskb), &ctinfo); | 319 | u_int32_t *mark, hash; | 
| 320 | u_int32_t hash; | ||
| 321 | 320 | ||
| 322 | /* don't need to clusterip_config_get() here, since refcount | 321 | /* don't need to clusterip_config_get() here, since refcount | 
| 323 | * is only decremented by destroy() - and ip_tables guarantees | 322 | * is only decremented by destroy() - and ip_tables guarantees | 
| 324 | * that the ->target() function isn't called after ->destroy() */ | 323 | * that the ->target() function isn't called after ->destroy() */ | 
| 325 | 324 | ||
| 326 | if (!ct) { | 325 | mark = nf_ct_get_mark((*pskb), &ctinfo); | 
| 326 | if (mark == NULL) { | ||
| 327 | printk(KERN_ERR "CLUSTERIP: no conntrack!\n"); | 327 | printk(KERN_ERR "CLUSTERIP: no conntrack!\n"); | 
| 328 | /* FIXME: need to drop invalid ones, since replies | 328 | /* FIXME: need to drop invalid ones, since replies | 
| 329 | * to outgoing connections of other nodes will be | 329 | * to outgoing connections of other nodes will be | 
| @@ -346,7 +346,7 @@ target(struct sk_buff **pskb, | |||
| 346 | 346 | ||
| 347 | switch (ctinfo) { | 347 | switch (ctinfo) { | 
| 348 | case IP_CT_NEW: | 348 | case IP_CT_NEW: | 
| 349 | ct->mark = hash; | 349 | *mark = hash; | 
| 350 | break; | 350 | break; | 
| 351 | case IP_CT_RELATED: | 351 | case IP_CT_RELATED: | 
| 352 | case IP_CT_RELATED+IP_CT_IS_REPLY: | 352 | case IP_CT_RELATED+IP_CT_IS_REPLY: | 
| @@ -363,7 +363,7 @@ target(struct sk_buff **pskb, | |||
| 363 | #ifdef DEBUG_CLUSTERP | 363 | #ifdef DEBUG_CLUSTERP | 
| 364 | DUMP_TUPLE(&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple); | 364 | DUMP_TUPLE(&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple); | 
| 365 | #endif | 365 | #endif | 
| 366 | DEBUGP("hash=%u ct_hash=%u ", hash, ct->mark); | 366 | DEBUGP("hash=%u ct_hash=%u ", hash, *mark); | 
| 367 | if (!clusterip_responsible(cipinfo->config, hash)) { | 367 | if (!clusterip_responsible(cipinfo->config, hash)) { | 
| 368 | DEBUGP("not responsible\n"); | 368 | DEBUGP("not responsible\n"); | 
| 369 | return NF_DROP; | 369 | return NF_DROP; | 
diff --git a/net/ipv4/netfilter/ipt_CONNMARK.c b/net/ipv4/netfilter/ipt_CONNMARK.c index 134638021339..8acac5a40a92 100644 --- a/net/ipv4/netfilter/ipt_CONNMARK.c +++ b/net/ipv4/netfilter/ipt_CONNMARK.c  | |||
| @@ -29,7 +29,7 @@ MODULE_LICENSE("GPL"); | |||
| 29 | 29 | ||
| 30 | #include <linux/netfilter_ipv4/ip_tables.h> | 30 | #include <linux/netfilter_ipv4/ip_tables.h> | 
| 31 | #include <linux/netfilter_ipv4/ipt_CONNMARK.h> | 31 | #include <linux/netfilter_ipv4/ipt_CONNMARK.h> | 
| 32 | #include <linux/netfilter_ipv4/ip_conntrack.h> | 32 | #include <net/netfilter/nf_conntrack_compat.h> | 
| 33 | 33 | ||
| 34 | static unsigned int | 34 | static unsigned int | 
| 35 | target(struct sk_buff **pskb, | 35 | target(struct sk_buff **pskb, | 
| @@ -43,24 +43,24 @@ target(struct sk_buff **pskb, | |||
| 43 | u_int32_t diff; | 43 | u_int32_t diff; | 
| 44 | u_int32_t nfmark; | 44 | u_int32_t nfmark; | 
| 45 | u_int32_t newmark; | 45 | u_int32_t newmark; | 
| 46 | u_int32_t ctinfo; | ||
| 47 | u_int32_t *ctmark = nf_ct_get_mark(*pskb, &ctinfo); | ||
| 46 | 48 | ||
| 47 | enum ip_conntrack_info ctinfo; | 49 | if (ctmark) { | 
| 48 | struct ip_conntrack *ct = ip_conntrack_get((*pskb), &ctinfo); | ||
| 49 | if (ct) { | ||
| 50 | switch(markinfo->mode) { | 50 | switch(markinfo->mode) { | 
| 51 | case IPT_CONNMARK_SET: | 51 | case IPT_CONNMARK_SET: | 
| 52 | newmark = (ct->mark & ~markinfo->mask) | markinfo->mark; | 52 | newmark = (*ctmark & ~markinfo->mask) | markinfo->mark; | 
| 53 | if (newmark != ct->mark) | 53 | if (newmark != *ctmark) | 
| 54 | ct->mark = newmark; | 54 | *ctmark = newmark; | 
| 55 | break; | 55 | break; | 
| 56 | case IPT_CONNMARK_SAVE: | 56 | case IPT_CONNMARK_SAVE: | 
| 57 | newmark = (ct->mark & ~markinfo->mask) | ((*pskb)->nfmark & markinfo->mask); | 57 | newmark = (*ctmark & ~markinfo->mask) | ((*pskb)->nfmark & markinfo->mask); | 
| 58 | if (ct->mark != newmark) | 58 | if (*ctmark != newmark) | 
| 59 | ct->mark = newmark; | 59 | *ctmark = newmark; | 
| 60 | break; | 60 | break; | 
| 61 | case IPT_CONNMARK_RESTORE: | 61 | case IPT_CONNMARK_RESTORE: | 
| 62 | nfmark = (*pskb)->nfmark; | 62 | nfmark = (*pskb)->nfmark; | 
| 63 | diff = (ct->mark ^ nfmark) & markinfo->mask; | 63 | diff = (*ctmark ^ nfmark) & markinfo->mask; | 
| 64 | if (diff != 0) | 64 | if (diff != 0) | 
| 65 | (*pskb)->nfmark = nfmark ^ diff; | 65 | (*pskb)->nfmark = nfmark ^ diff; | 
| 66 | break; | 66 | break; | 
| @@ -109,6 +109,7 @@ static struct ipt_target ipt_connmark_reg = { | |||
| 109 | 109 | ||
| 110 | static int __init init(void) | 110 | static int __init init(void) | 
| 111 | { | 111 | { | 
| 112 | need_ip_conntrack(); | ||
| 112 | return ipt_register_target(&ipt_connmark_reg); | 113 | return ipt_register_target(&ipt_connmark_reg); | 
| 113 | } | 114 | } | 
| 114 | 115 | ||
diff --git a/net/ipv4/netfilter/ipt_NOTRACK.c b/net/ipv4/netfilter/ipt_NOTRACK.c index a4bb9b3bc292..e3c69d072c6e 100644 --- a/net/ipv4/netfilter/ipt_NOTRACK.c +++ b/net/ipv4/netfilter/ipt_NOTRACK.c  | |||
| @@ -5,7 +5,7 @@ | |||
| 5 | #include <linux/skbuff.h> | 5 | #include <linux/skbuff.h> | 
| 6 | 6 | ||
| 7 | #include <linux/netfilter_ipv4/ip_tables.h> | 7 | #include <linux/netfilter_ipv4/ip_tables.h> | 
| 8 | #include <linux/netfilter_ipv4/ip_conntrack.h> | 8 | #include <net/netfilter/nf_conntrack_compat.h> | 
| 9 | 9 | ||
| 10 | static unsigned int | 10 | static unsigned int | 
| 11 | target(struct sk_buff **pskb, | 11 | target(struct sk_buff **pskb, | 
| @@ -23,7 +23,7 @@ target(struct sk_buff **pskb, | |||
| 23 | If there is a real ct entry correspondig to this packet, | 23 | If there is a real ct entry correspondig to this packet, | 
| 24 | it'll hang aroun till timing out. We don't deal with it | 24 | it'll hang aroun till timing out. We don't deal with it | 
| 25 | for performance reasons. JK */ | 25 | for performance reasons. JK */ | 
| 26 | (*pskb)->nfct = &ip_conntrack_untracked.ct_general; | 26 | nf_ct_untrack(*pskb); | 
| 27 | (*pskb)->nfctinfo = IP_CT_NEW; | 27 | (*pskb)->nfctinfo = IP_CT_NEW; | 
| 28 | nf_conntrack_get((*pskb)->nfct); | 28 | nf_conntrack_get((*pskb)->nfct); | 
| 29 | 29 | ||
diff --git a/net/ipv4/netfilter/ipt_connbytes.c b/net/ipv4/netfilter/ipt_connbytes.c index df4a42c6da22..d68a048b7176 100644 --- a/net/ipv4/netfilter/ipt_connbytes.c +++ b/net/ipv4/netfilter/ipt_connbytes.c  | |||
| @@ -10,7 +10,7 @@ | |||
| 10 | */ | 10 | */ | 
| 11 | #include <linux/module.h> | 11 | #include <linux/module.h> | 
| 12 | #include <linux/skbuff.h> | 12 | #include <linux/skbuff.h> | 
| 13 | #include <linux/netfilter_ipv4/ip_conntrack.h> | 13 | #include <net/netfilter/nf_conntrack_compat.h> | 
| 14 | #include <linux/netfilter_ipv4/ip_tables.h> | 14 | #include <linux/netfilter_ipv4/ip_tables.h> | 
| 15 | #include <linux/netfilter_ipv4/ipt_connbytes.h> | 15 | #include <linux/netfilter_ipv4/ipt_connbytes.h> | 
| 16 | 16 | ||
| @@ -46,60 +46,59 @@ match(const struct sk_buff *skb, | |||
| 46 | int *hotdrop) | 46 | int *hotdrop) | 
| 47 | { | 47 | { | 
| 48 | const struct ipt_connbytes_info *sinfo = matchinfo; | 48 | const struct ipt_connbytes_info *sinfo = matchinfo; | 
| 49 | enum ip_conntrack_info ctinfo; | ||
| 50 | struct ip_conntrack *ct; | ||
| 51 | u_int64_t what = 0; /* initialize to make gcc happy */ | 49 | u_int64_t what = 0; /* initialize to make gcc happy */ | 
| 50 | const struct ip_conntrack_counter *counters; | ||
| 52 | 51 | ||
| 53 | if (!(ct = ip_conntrack_get((struct sk_buff *)skb, &ctinfo))) | 52 | if (!(counters = nf_ct_get_counters(skb))) | 
| 54 | return 0; /* no match */ | 53 | return 0; /* no match */ | 
| 55 | 54 | ||
| 56 | switch (sinfo->what) { | 55 | switch (sinfo->what) { | 
| 57 | case IPT_CONNBYTES_PKTS: | 56 | case IPT_CONNBYTES_PKTS: | 
| 58 | switch (sinfo->direction) { | 57 | switch (sinfo->direction) { | 
| 59 | case IPT_CONNBYTES_DIR_ORIGINAL: | 58 | case IPT_CONNBYTES_DIR_ORIGINAL: | 
| 60 | what = ct->counters[IP_CT_DIR_ORIGINAL].packets; | 59 | what = counters[IP_CT_DIR_ORIGINAL].packets; | 
| 61 | break; | 60 | break; | 
| 62 | case IPT_CONNBYTES_DIR_REPLY: | 61 | case IPT_CONNBYTES_DIR_REPLY: | 
| 63 | what = ct->counters[IP_CT_DIR_REPLY].packets; | 62 | what = counters[IP_CT_DIR_REPLY].packets; | 
| 64 | break; | 63 | break; | 
| 65 | case IPT_CONNBYTES_DIR_BOTH: | 64 | case IPT_CONNBYTES_DIR_BOTH: | 
| 66 | what = ct->counters[IP_CT_DIR_ORIGINAL].packets; | 65 | what = counters[IP_CT_DIR_ORIGINAL].packets; | 
| 67 | what += ct->counters[IP_CT_DIR_REPLY].packets; | 66 | what += counters[IP_CT_DIR_REPLY].packets; | 
| 68 | break; | 67 | break; | 
| 69 | } | 68 | } | 
| 70 | break; | 69 | break; | 
| 71 | case IPT_CONNBYTES_BYTES: | 70 | case IPT_CONNBYTES_BYTES: | 
| 72 | switch (sinfo->direction) { | 71 | switch (sinfo->direction) { | 
| 73 | case IPT_CONNBYTES_DIR_ORIGINAL: | 72 | case IPT_CONNBYTES_DIR_ORIGINAL: | 
| 74 | what = ct->counters[IP_CT_DIR_ORIGINAL].bytes; | 73 | what = counters[IP_CT_DIR_ORIGINAL].bytes; | 
| 75 | break; | 74 | break; | 
| 76 | case IPT_CONNBYTES_DIR_REPLY: | 75 | case IPT_CONNBYTES_DIR_REPLY: | 
| 77 | what = ct->counters[IP_CT_DIR_REPLY].bytes; | 76 | what = counters[IP_CT_DIR_REPLY].bytes; | 
| 78 | break; | 77 | break; | 
| 79 | case IPT_CONNBYTES_DIR_BOTH: | 78 | case IPT_CONNBYTES_DIR_BOTH: | 
| 80 | what = ct->counters[IP_CT_DIR_ORIGINAL].bytes; | 79 | what = counters[IP_CT_DIR_ORIGINAL].bytes; | 
| 81 | what += ct->counters[IP_CT_DIR_REPLY].bytes; | 80 | what += counters[IP_CT_DIR_REPLY].bytes; | 
| 82 | break; | 81 | break; | 
| 83 | } | 82 | } | 
| 84 | break; | 83 | break; | 
| 85 | case IPT_CONNBYTES_AVGPKT: | 84 | case IPT_CONNBYTES_AVGPKT: | 
| 86 | switch (sinfo->direction) { | 85 | switch (sinfo->direction) { | 
| 87 | case IPT_CONNBYTES_DIR_ORIGINAL: | 86 | case IPT_CONNBYTES_DIR_ORIGINAL: | 
| 88 | what = div64_64(ct->counters[IP_CT_DIR_ORIGINAL].bytes, | 87 | what = div64_64(counters[IP_CT_DIR_ORIGINAL].bytes, | 
| 89 | ct->counters[IP_CT_DIR_ORIGINAL].packets); | 88 | counters[IP_CT_DIR_ORIGINAL].packets); | 
| 90 | break; | 89 | break; | 
| 91 | case IPT_CONNBYTES_DIR_REPLY: | 90 | case IPT_CONNBYTES_DIR_REPLY: | 
| 92 | what = div64_64(ct->counters[IP_CT_DIR_REPLY].bytes, | 91 | what = div64_64(counters[IP_CT_DIR_REPLY].bytes, | 
| 93 | ct->counters[IP_CT_DIR_REPLY].packets); | 92 | counters[IP_CT_DIR_REPLY].packets); | 
| 94 | break; | 93 | break; | 
| 95 | case IPT_CONNBYTES_DIR_BOTH: | 94 | case IPT_CONNBYTES_DIR_BOTH: | 
| 96 | { | 95 | { | 
| 97 | u_int64_t bytes; | 96 | u_int64_t bytes; | 
| 98 | u_int64_t pkts; | 97 | u_int64_t pkts; | 
| 99 | bytes = ct->counters[IP_CT_DIR_ORIGINAL].bytes + | 98 | bytes = counters[IP_CT_DIR_ORIGINAL].bytes + | 
| 100 | ct->counters[IP_CT_DIR_REPLY].bytes; | 99 | counters[IP_CT_DIR_REPLY].bytes; | 
| 101 | pkts = ct->counters[IP_CT_DIR_ORIGINAL].packets+ | 100 | pkts = counters[IP_CT_DIR_ORIGINAL].packets+ | 
| 102 | ct->counters[IP_CT_DIR_REPLY].packets; | 101 | counters[IP_CT_DIR_REPLY].packets; | 
| 103 | 102 | ||
| 104 | /* FIXME_THEORETICAL: what to do if sum | 103 | /* FIXME_THEORETICAL: what to do if sum | 
| 105 | * overflows ? */ | 104 | * overflows ? */ | 
diff --git a/net/ipv4/netfilter/ipt_connmark.c b/net/ipv4/netfilter/ipt_connmark.c index bf8de47ce004..5306ef293b92 100644 --- a/net/ipv4/netfilter/ipt_connmark.c +++ b/net/ipv4/netfilter/ipt_connmark.c  | |||
| @@ -28,7 +28,7 @@ MODULE_LICENSE("GPL"); | |||
| 28 | 28 | ||
| 29 | #include <linux/netfilter_ipv4/ip_tables.h> | 29 | #include <linux/netfilter_ipv4/ip_tables.h> | 
| 30 | #include <linux/netfilter_ipv4/ipt_connmark.h> | 30 | #include <linux/netfilter_ipv4/ipt_connmark.h> | 
| 31 | #include <linux/netfilter_ipv4/ip_conntrack.h> | 31 | #include <net/netfilter/nf_conntrack_compat.h> | 
| 32 | 32 | ||
| 33 | static int | 33 | static int | 
| 34 | match(const struct sk_buff *skb, | 34 | match(const struct sk_buff *skb, | 
| @@ -39,12 +39,12 @@ match(const struct sk_buff *skb, | |||
| 39 | int *hotdrop) | 39 | int *hotdrop) | 
| 40 | { | 40 | { | 
| 41 | const struct ipt_connmark_info *info = matchinfo; | 41 | const struct ipt_connmark_info *info = matchinfo; | 
| 42 | enum ip_conntrack_info ctinfo; | 42 | u_int32_t ctinfo; | 
| 43 | struct ip_conntrack *ct = ip_conntrack_get((struct sk_buff *)skb, &ctinfo); | 43 | const u_int32_t *ctmark = nf_ct_get_mark(skb, &ctinfo); | 
| 44 | if (!ct) | 44 | if (!ctmark) | 
| 45 | return 0; | 45 | return 0; | 
| 46 | 46 | ||
| 47 | return ((ct->mark & info->mask) == info->mark) ^ info->invert; | 47 | return (((*ctmark) & info->mask) == info->mark) ^ info->invert; | 
| 48 | } | 48 | } | 
| 49 | 49 | ||
| 50 | static int | 50 | static int | 
diff --git a/net/ipv4/netfilter/ipt_conntrack.c b/net/ipv4/netfilter/ipt_conntrack.c index c1d22801b7cf..c8d18705469b 100644 --- a/net/ipv4/netfilter/ipt_conntrack.c +++ b/net/ipv4/netfilter/ipt_conntrack.c  | |||
| @@ -10,7 +10,14 @@ | |||
| 10 | 10 | ||
| 11 | #include <linux/module.h> | 11 | #include <linux/module.h> | 
| 12 | #include <linux/skbuff.h> | 12 | #include <linux/skbuff.h> | 
| 13 | |||
| 14 | #if defined(CONFIG_IP_NF_CONNTRACK) || defined(CONFIG_IP_NF_CONNTRACK_MODULE) | ||
| 13 | #include <linux/netfilter_ipv4/ip_conntrack.h> | 15 | #include <linux/netfilter_ipv4/ip_conntrack.h> | 
| 16 | #include <linux/netfilter_ipv4/ip_conntrack_tuple.h> | ||
| 17 | #else | ||
| 18 | #include <net/netfilter/nf_conntrack.h> | ||
| 19 | #endif | ||
| 20 | |||
| 14 | #include <linux/netfilter_ipv4/ip_tables.h> | 21 | #include <linux/netfilter_ipv4/ip_tables.h> | 
| 15 | #include <linux/netfilter_ipv4/ipt_conntrack.h> | 22 | #include <linux/netfilter_ipv4/ipt_conntrack.h> | 
| 16 | 23 | ||
| @@ -18,6 +25,8 @@ MODULE_LICENSE("GPL"); | |||
| 18 | MODULE_AUTHOR("Marc Boucher <marc@mbsi.ca>"); | 25 | MODULE_AUTHOR("Marc Boucher <marc@mbsi.ca>"); | 
| 19 | MODULE_DESCRIPTION("iptables connection tracking match module"); | 26 | MODULE_DESCRIPTION("iptables connection tracking match module"); | 
| 20 | 27 | ||
| 28 | #if defined(CONFIG_IP_NF_CONNTRACK) || defined(CONFIG_IP_NF_CONNTRACK_MODULE) | ||
| 29 | |||
| 21 | static int | 30 | static int | 
| 22 | match(const struct sk_buff *skb, | 31 | match(const struct sk_buff *skb, | 
| 23 | const struct net_device *in, | 32 | const struct net_device *in, | 
| @@ -102,6 +111,93 @@ match(const struct sk_buff *skb, | |||
| 102 | return 1; | 111 | return 1; | 
| 103 | } | 112 | } | 
| 104 | 113 | ||
| 114 | #else /* CONFIG_IP_NF_CONNTRACK */ | ||
| 115 | static int | ||
| 116 | match(const struct sk_buff *skb, | ||
| 117 | const struct net_device *in, | ||
| 118 | const struct net_device *out, | ||
| 119 | const void *matchinfo, | ||
| 120 | int offset, | ||
| 121 | int *hotdrop) | ||
| 122 | { | ||
| 123 | const struct ipt_conntrack_info *sinfo = matchinfo; | ||
| 124 | struct nf_conn *ct; | ||
| 125 | enum ip_conntrack_info ctinfo; | ||
| 126 | unsigned int statebit; | ||
| 127 | |||
| 128 | ct = nf_ct_get((struct sk_buff *)skb, &ctinfo); | ||
| 129 | |||
| 130 | #define FWINV(bool,invflg) ((bool) ^ !!(sinfo->invflags & invflg)) | ||
| 131 | |||
| 132 | if (ct == &nf_conntrack_untracked) | ||
| 133 | statebit = IPT_CONNTRACK_STATE_UNTRACKED; | ||
| 134 | else if (ct) | ||
| 135 | statebit = IPT_CONNTRACK_STATE_BIT(ctinfo); | ||
| 136 | else | ||
| 137 | statebit = IPT_CONNTRACK_STATE_INVALID; | ||
| 138 | |||
| 139 | if(sinfo->flags & IPT_CONNTRACK_STATE) { | ||
| 140 | if (ct) { | ||
| 141 | if(ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.u3.ip != | ||
| 142 | ct->tuplehash[IP_CT_DIR_REPLY].tuple.dst.u3.ip) | ||
| 143 | statebit |= IPT_CONNTRACK_STATE_SNAT; | ||
| 144 | |||
| 145 | if(ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.u3.ip != | ||
| 146 | ct->tuplehash[IP_CT_DIR_REPLY].tuple.src.u3.ip) | ||
| 147 | statebit |= IPT_CONNTRACK_STATE_DNAT; | ||
| 148 | } | ||
| 149 | |||
| 150 | if (FWINV((statebit & sinfo->statemask) == 0, IPT_CONNTRACK_STATE)) | ||
| 151 | return 0; | ||
| 152 | } | ||
| 153 | |||
| 154 | if(sinfo->flags & IPT_CONNTRACK_PROTO) { | ||
| 155 | if (!ct || FWINV(ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.protonum != sinfo->tuple[IP_CT_DIR_ORIGINAL].dst.protonum, IPT_CONNTRACK_PROTO)) | ||
| 156 | return 0; | ||
| 157 | } | ||
| 158 | |||
| 159 | if(sinfo->flags & IPT_CONNTRACK_ORIGSRC) { | ||
| 160 | if (!ct || FWINV((ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.u3.ip&sinfo->sipmsk[IP_CT_DIR_ORIGINAL].s_addr) != sinfo->tuple[IP_CT_DIR_ORIGINAL].src.ip, IPT_CONNTRACK_ORIGSRC)) | ||
| 161 | return 0; | ||
| 162 | } | ||
| 163 | |||
| 164 | if(sinfo->flags & IPT_CONNTRACK_ORIGDST) { | ||
| 165 | if (!ct || FWINV((ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.u3.ip&sinfo->dipmsk[IP_CT_DIR_ORIGINAL].s_addr) != sinfo->tuple[IP_CT_DIR_ORIGINAL].dst.ip, IPT_CONNTRACK_ORIGDST)) | ||
| 166 | return 0; | ||
| 167 | } | ||
| 168 | |||
| 169 | if(sinfo->flags & IPT_CONNTRACK_REPLSRC) { | ||
| 170 | if (!ct || FWINV((ct->tuplehash[IP_CT_DIR_REPLY].tuple.src.u3.ip&sinfo->sipmsk[IP_CT_DIR_REPLY].s_addr) != sinfo->tuple[IP_CT_DIR_REPLY].src.ip, IPT_CONNTRACK_REPLSRC)) | ||
| 171 | return 0; | ||
| 172 | } | ||
| 173 | |||
| 174 | if(sinfo->flags & IPT_CONNTRACK_REPLDST) { | ||
| 175 | if (!ct || FWINV((ct->tuplehash[IP_CT_DIR_REPLY].tuple.dst.u3.ip&sinfo->dipmsk[IP_CT_DIR_REPLY].s_addr) != sinfo->tuple[IP_CT_DIR_REPLY].dst.ip, IPT_CONNTRACK_REPLDST)) | ||
| 176 | return 0; | ||
| 177 | } | ||
| 178 | |||
| 179 | if(sinfo->flags & IPT_CONNTRACK_STATUS) { | ||
| 180 | if (!ct || FWINV((ct->status & sinfo->statusmask) == 0, IPT_CONNTRACK_STATUS)) | ||
| 181 | return 0; | ||
| 182 | } | ||
| 183 | |||
| 184 | if(sinfo->flags & IPT_CONNTRACK_EXPIRES) { | ||
| 185 | unsigned long expires; | ||
| 186 | |||
| 187 | if(!ct) | ||
| 188 | return 0; | ||
| 189 | |||
| 190 | expires = timer_pending(&ct->timeout) ? (ct->timeout.expires - jiffies)/HZ : 0; | ||
| 191 | |||
| 192 | if (FWINV(!(expires >= sinfo->expires_min && expires <= sinfo->expires_max), IPT_CONNTRACK_EXPIRES)) | ||
| 193 | return 0; | ||
| 194 | } | ||
| 195 | |||
| 196 | return 1; | ||
| 197 | } | ||
| 198 | |||
| 199 | #endif /* CONFIG_NF_IP_CONNTRACK */ | ||
| 200 | |||
| 105 | static int check(const char *tablename, | 201 | static int check(const char *tablename, | 
| 106 | const struct ipt_ip *ip, | 202 | const struct ipt_ip *ip, | 
| 107 | void *matchinfo, | 203 | void *matchinfo, | 
diff --git a/net/ipv4/netfilter/ipt_helper.c b/net/ipv4/netfilter/ipt_helper.c index 3e7dd014de43..bf14e1c7798a 100644 --- a/net/ipv4/netfilter/ipt_helper.c +++ b/net/ipv4/netfilter/ipt_helper.c  | |||
| @@ -13,9 +13,15 @@ | |||
| 13 | #include <linux/module.h> | 13 | #include <linux/module.h> | 
| 14 | #include <linux/skbuff.h> | 14 | #include <linux/skbuff.h> | 
| 15 | #include <linux/netfilter.h> | 15 | #include <linux/netfilter.h> | 
| 16 | #if defined(CONFIG_IP_NF_CONNTRACK) || defined(CONFIG_IP_NF_CONNTRACK_MODULE) | ||
| 16 | #include <linux/netfilter_ipv4/ip_conntrack.h> | 17 | #include <linux/netfilter_ipv4/ip_conntrack.h> | 
| 17 | #include <linux/netfilter_ipv4/ip_conntrack_core.h> | 18 | #include <linux/netfilter_ipv4/ip_conntrack_core.h> | 
| 18 | #include <linux/netfilter_ipv4/ip_conntrack_helper.h> | 19 | #include <linux/netfilter_ipv4/ip_conntrack_helper.h> | 
| 20 | #else | ||
| 21 | #include <net/netfilter/nf_conntrack.h> | ||
| 22 | #include <net/netfilter/nf_conntrack_core.h> | ||
| 23 | #include <net/netfilter/nf_conntrack_helper.h> | ||
| 24 | #endif | ||
| 19 | #include <linux/netfilter_ipv4/ip_tables.h> | 25 | #include <linux/netfilter_ipv4/ip_tables.h> | 
| 20 | #include <linux/netfilter_ipv4/ipt_helper.h> | 26 | #include <linux/netfilter_ipv4/ipt_helper.h> | 
| 21 | 27 | ||
| @@ -29,6 +35,7 @@ MODULE_DESCRIPTION("iptables helper match module"); | |||
| 29 | #define DEBUGP(format, args...) | 35 | #define DEBUGP(format, args...) | 
| 30 | #endif | 36 | #endif | 
| 31 | 37 | ||
| 38 | #if defined(CONFIG_IP_NF_CONNTRACK) || defined(CONFIG_IP_NF_CONNTRACK_MODULE) | ||
| 32 | static int | 39 | static int | 
| 33 | match(const struct sk_buff *skb, | 40 | match(const struct sk_buff *skb, | 
| 34 | const struct net_device *in, | 41 | const struct net_device *in, | 
| @@ -73,6 +80,53 @@ out_unlock: | |||
| 73 | return ret; | 80 | return ret; | 
| 74 | } | 81 | } | 
| 75 | 82 | ||
| 83 | #else /* CONFIG_IP_NF_CONNTRACK */ | ||
| 84 | |||
| 85 | static int | ||
| 86 | match(const struct sk_buff *skb, | ||
| 87 | const struct net_device *in, | ||
| 88 | const struct net_device *out, | ||
| 89 | const void *matchinfo, | ||
| 90 | int offset, | ||
| 91 | int *hotdrop) | ||
| 92 | { | ||
| 93 | const struct ipt_helper_info *info = matchinfo; | ||
| 94 | struct nf_conn *ct; | ||
| 95 | enum ip_conntrack_info ctinfo; | ||
| 96 | int ret = info->invert; | ||
| 97 | |||
| 98 | ct = nf_ct_get((struct sk_buff *)skb, &ctinfo); | ||
| 99 | if (!ct) { | ||
| 100 | DEBUGP("ipt_helper: Eek! invalid conntrack?\n"); | ||
| 101 | return ret; | ||
| 102 | } | ||
| 103 | |||
| 104 | if (!ct->master) { | ||
| 105 | DEBUGP("ipt_helper: conntrack %p has no master\n", ct); | ||
| 106 | return ret; | ||
| 107 | } | ||
| 108 | |||
| 109 | read_lock_bh(&nf_conntrack_lock); | ||
| 110 | if (!ct->master->helper) { | ||
| 111 | DEBUGP("ipt_helper: master ct %p has no helper\n", | ||
| 112 | exp->expectant); | ||
| 113 | goto out_unlock; | ||
| 114 | } | ||
| 115 | |||
| 116 | DEBUGP("master's name = %s , info->name = %s\n", | ||
| 117 | ct->master->helper->name, info->name); | ||
| 118 | |||
| 119 | if (info->name[0] == '\0') | ||
| 120 | ret ^= 1; | ||
| 121 | else | ||
| 122 | ret ^= !strncmp(ct->master->helper->name, info->name, | ||
| 123 | strlen(ct->master->helper->name)); | ||
| 124 | out_unlock: | ||
| 125 | read_unlock_bh(&nf_conntrack_lock); | ||
| 126 | return ret; | ||
| 127 | } | ||
| 128 | #endif | ||
| 129 | |||
| 76 | static int check(const char *tablename, | 130 | static int check(const char *tablename, | 
| 77 | const struct ipt_ip *ip, | 131 | const struct ipt_ip *ip, | 
| 78 | void *matchinfo, | 132 | void *matchinfo, | 
diff --git a/net/ipv4/netfilter/ipt_state.c b/net/ipv4/netfilter/ipt_state.c index b1511b97ea5f..4d7f16b70cec 100644 --- a/net/ipv4/netfilter/ipt_state.c +++ b/net/ipv4/netfilter/ipt_state.c  | |||
| @@ -10,7 +10,7 @@ | |||
| 10 | 10 | ||
| 11 | #include <linux/module.h> | 11 | #include <linux/module.h> | 
| 12 | #include <linux/skbuff.h> | 12 | #include <linux/skbuff.h> | 
| 13 | #include <linux/netfilter_ipv4/ip_conntrack.h> | 13 | #include <net/netfilter/nf_conntrack_compat.h> | 
| 14 | #include <linux/netfilter_ipv4/ip_tables.h> | 14 | #include <linux/netfilter_ipv4/ip_tables.h> | 
| 15 | #include <linux/netfilter_ipv4/ipt_state.h> | 15 | #include <linux/netfilter_ipv4/ipt_state.h> | 
| 16 | 16 | ||
| @@ -30,9 +30,9 @@ match(const struct sk_buff *skb, | |||
| 30 | enum ip_conntrack_info ctinfo; | 30 | enum ip_conntrack_info ctinfo; | 
| 31 | unsigned int statebit; | 31 | unsigned int statebit; | 
| 32 | 32 | ||
| 33 | if (skb->nfct == &ip_conntrack_untracked.ct_general) | 33 | if (nf_ct_is_untracked(skb)) | 
| 34 | statebit = IPT_STATE_UNTRACKED; | 34 | statebit = IPT_STATE_UNTRACKED; | 
| 35 | else if (!ip_conntrack_get(skb, &ctinfo)) | 35 | else if (!nf_ct_get_ctinfo(skb, &ctinfo)) | 
| 36 | statebit = IPT_STATE_INVALID; | 36 | statebit = IPT_STATE_INVALID; | 
| 37 | else | 37 | else | 
| 38 | statebit = IPT_STATE_BIT(ctinfo); | 38 | statebit = IPT_STATE_BIT(ctinfo); | 
diff --git a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c new file mode 100644 index 000000000000..8202c1c0afad --- /dev/null +++ b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c  | |||
| @@ -0,0 +1,571 @@ | |||
| 1 | /* (C) 1999-2001 Paul `Rusty' Russell | ||
| 2 | * (C) 2002-2004 Netfilter Core Team <coreteam@netfilter.org> | ||
| 3 | * | ||
| 4 | * This program is free software; you can redistribute it and/or modify | ||
| 5 | * it under the terms of the GNU General Public License version 2 as | ||
| 6 | * published by the Free Software Foundation. | ||
| 7 | * | ||
| 8 | * 16 Dec 2003: Yasuyuki Kozakai @USAGI <yasuyuki.kozakai@toshiba.co.jp> | ||
| 9 | * - move L3 protocol dependent part to this file. | ||
| 10 | * 23 Mar 2004: Yasuyuki Kozakai @USAGI <yasuyuki.kozakai@toshiba.co.jp> | ||
| 11 | * - add get_features() to support various size of conntrack | ||
| 12 | * structures. | ||
| 13 | * | ||
| 14 | * Derived from net/ipv4/netfilter/ip_conntrack_standalone.c | ||
| 15 | */ | ||
| 16 | |||
| 17 | #include <linux/config.h> | ||
| 18 | #include <linux/types.h> | ||
| 19 | #include <linux/ip.h> | ||
| 20 | #include <linux/netfilter.h> | ||
| 21 | #include <linux/module.h> | ||
| 22 | #include <linux/skbuff.h> | ||
| 23 | #include <linux/icmp.h> | ||
| 24 | #include <linux/sysctl.h> | ||
| 25 | #include <net/ip.h> | ||
| 26 | |||
| 27 | #include <linux/netfilter_ipv4.h> | ||
| 28 | #include <net/netfilter/nf_conntrack.h> | ||
| 29 | #include <net/netfilter/nf_conntrack_helper.h> | ||
| 30 | #include <net/netfilter/nf_conntrack_protocol.h> | ||
| 31 | #include <net/netfilter/nf_conntrack_l3proto.h> | ||
| 32 | #include <net/netfilter/nf_conntrack_core.h> | ||
| 33 | #include <net/netfilter/ipv4/nf_conntrack_ipv4.h> | ||
| 34 | |||
| 35 | #if 0 | ||
| 36 | #define DEBUGP printk | ||
| 37 | #else | ||
| 38 | #define DEBUGP(format, args...) | ||
| 39 | #endif | ||
| 40 | |||
| 41 | DECLARE_PER_CPU(struct nf_conntrack_stat, nf_conntrack_stat); | ||
| 42 | |||
| 43 | static int ipv4_pkt_to_tuple(const struct sk_buff *skb, unsigned int nhoff, | ||
| 44 | struct nf_conntrack_tuple *tuple) | ||
| 45 | { | ||
| 46 | u_int32_t _addrs[2], *ap; | ||
| 47 | ap = skb_header_pointer(skb, nhoff + offsetof(struct iphdr, saddr), | ||
| 48 | sizeof(u_int32_t) * 2, _addrs); | ||
| 49 | if (ap == NULL) | ||
| 50 | return 0; | ||
| 51 | |||
| 52 | tuple->src.u3.ip = ap[0]; | ||
| 53 | tuple->dst.u3.ip = ap[1]; | ||
| 54 | |||
| 55 | return 1; | ||
| 56 | } | ||
| 57 | |||
| 58 | static int ipv4_invert_tuple(struct nf_conntrack_tuple *tuple, | ||
| 59 | const struct nf_conntrack_tuple *orig) | ||
| 60 | { | ||
| 61 | tuple->src.u3.ip = orig->dst.u3.ip; | ||
| 62 | tuple->dst.u3.ip = orig->src.u3.ip; | ||
| 63 | |||
| 64 | return 1; | ||
| 65 | } | ||
| 66 | |||
| 67 | static int ipv4_print_tuple(struct seq_file *s, | ||
| 68 | const struct nf_conntrack_tuple *tuple) | ||
| 69 | { | ||
| 70 | return seq_printf(s, "src=%u.%u.%u.%u dst=%u.%u.%u.%u ", | ||
| 71 | NIPQUAD(tuple->src.u3.ip), | ||
| 72 | NIPQUAD(tuple->dst.u3.ip)); | ||
| 73 | } | ||
| 74 | |||
| 75 | static int ipv4_print_conntrack(struct seq_file *s, | ||
| 76 | const struct nf_conn *conntrack) | ||
| 77 | { | ||
| 78 | return 0; | ||
| 79 | } | ||
| 80 | |||
| 81 | /* Returns new sk_buff, or NULL */ | ||
| 82 | static struct sk_buff * | ||
| 83 | nf_ct_ipv4_gather_frags(struct sk_buff *skb, u_int32_t user) | ||
| 84 | { | ||
| 85 | skb_orphan(skb); | ||
| 86 | |||
| 87 | local_bh_disable(); | ||
| 88 | skb = ip_defrag(skb, user); | ||
| 89 | local_bh_enable(); | ||
| 90 | |||
| 91 | if (skb) | ||
| 92 | ip_send_check(skb->nh.iph); | ||
| 93 | |||
| 94 | return skb; | ||
| 95 | } | ||
| 96 | |||
| 97 | static int | ||
| 98 | ipv4_prepare(struct sk_buff **pskb, unsigned int hooknum, unsigned int *dataoff, | ||
| 99 | u_int8_t *protonum) | ||
| 100 | { | ||
| 101 | /* Never happen */ | ||
| 102 | if ((*pskb)->nh.iph->frag_off & htons(IP_OFFSET)) { | ||
| 103 | if (net_ratelimit()) { | ||
| 104 | printk(KERN_ERR "ipv4_prepare: Frag of proto %u (hook=%u)\n", | ||
| 105 | (*pskb)->nh.iph->protocol, hooknum); | ||
| 106 | } | ||
| 107 | return -NF_DROP; | ||
| 108 | } | ||
| 109 | |||
| 110 | *dataoff = (*pskb)->nh.raw - (*pskb)->data + (*pskb)->nh.iph->ihl*4; | ||
| 111 | *protonum = (*pskb)->nh.iph->protocol; | ||
| 112 | |||
| 113 | return NF_ACCEPT; | ||
| 114 | } | ||
| 115 | |||
| 116 | int nat_module_is_loaded = 0; | ||
| 117 | static u_int32_t ipv4_get_features(const struct nf_conntrack_tuple *tuple) | ||
| 118 | { | ||
| 119 | if (nat_module_is_loaded) | ||
| 120 | return NF_CT_F_NAT; | ||
| 121 | |||
| 122 | return NF_CT_F_BASIC; | ||
| 123 | } | ||
| 124 | |||
| 125 | static unsigned int ipv4_confirm(unsigned int hooknum, | ||
| 126 | struct sk_buff **pskb, | ||
| 127 | const struct net_device *in, | ||
| 128 | const struct net_device *out, | ||
| 129 | int (*okfn)(struct sk_buff *)) | ||
| 130 | { | ||
| 131 | /* We've seen it coming out the other side: confirm it */ | ||
| 132 | return nf_conntrack_confirm(pskb); | ||
| 133 | } | ||
| 134 | |||
| 135 | static unsigned int ipv4_conntrack_help(unsigned int hooknum, | ||
| 136 | struct sk_buff **pskb, | ||
| 137 | const struct net_device *in, | ||
| 138 | const struct net_device *out, | ||
| 139 | int (*okfn)(struct sk_buff *)) | ||
| 140 | { | ||
| 141 | struct nf_conn *ct; | ||
| 142 | enum ip_conntrack_info ctinfo; | ||
| 143 | |||
| 144 | /* This is where we call the helper: as the packet goes out. */ | ||
| 145 | ct = nf_ct_get(*pskb, &ctinfo); | ||
| 146 | if (ct && ct->helper) { | ||
| 147 | unsigned int ret; | ||
| 148 | ret = ct->helper->help(pskb, | ||
| 149 | (*pskb)->nh.raw - (*pskb)->data | ||
| 150 | + (*pskb)->nh.iph->ihl*4, | ||
| 151 | ct, ctinfo); | ||
| 152 | if (ret != NF_ACCEPT) | ||
| 153 | return ret; | ||
| 154 | } | ||
| 155 | return NF_ACCEPT; | ||
| 156 | } | ||
| 157 | |||
| 158 | static unsigned int ipv4_conntrack_defrag(unsigned int hooknum, | ||
| 159 | struct sk_buff **pskb, | ||
| 160 | const struct net_device *in, | ||
| 161 | const struct net_device *out, | ||
| 162 | int (*okfn)(struct sk_buff *)) | ||
| 163 | { | ||
| 164 | #if !defined(CONFIG_IP_NF_NAT) && !defined(CONFIG_IP_NF_NAT_MODULE) | ||
| 165 | /* Previously seen (loopback)? Ignore. Do this before | ||
| 166 | fragment check. */ | ||
| 167 | if ((*pskb)->nfct) | ||
| 168 | return NF_ACCEPT; | ||
| 169 | #endif | ||
| 170 | |||
| 171 | /* Gather fragments. */ | ||
| 172 | if ((*pskb)->nh.iph->frag_off & htons(IP_MF|IP_OFFSET)) { | ||
| 173 | *pskb = nf_ct_ipv4_gather_frags(*pskb, | ||
| 174 | hooknum == NF_IP_PRE_ROUTING ? | ||
| 175 | IP_DEFRAG_CONNTRACK_IN : | ||
| 176 | IP_DEFRAG_CONNTRACK_OUT); | ||
| 177 | if (!*pskb) | ||
| 178 | return NF_STOLEN; | ||
| 179 | } | ||
| 180 | return NF_ACCEPT; | ||
| 181 | } | ||
| 182 | |||
| 183 | static unsigned int ipv4_refrag(unsigned int hooknum, | ||
| 184 | struct sk_buff **pskb, | ||
| 185 | const struct net_device *in, | ||
| 186 | const struct net_device *out, | ||
| 187 | int (*okfn)(struct sk_buff *)) | ||
| 188 | { | ||
| 189 | struct rtable *rt = (struct rtable *)(*pskb)->dst; | ||
| 190 | |||
| 191 | /* We've seen it coming out the other side: confirm */ | ||
| 192 | if (ipv4_confirm(hooknum, pskb, in, out, okfn) != NF_ACCEPT) | ||
| 193 | return NF_DROP; | ||
| 194 | |||
| 195 | /* Local packets are never produced too large for their | ||
| 196 | interface. We degfragment them at LOCAL_OUT, however, | ||
| 197 | so we have to refragment them here. */ | ||
| 198 | if ((*pskb)->len > dst_mtu(&rt->u.dst) && | ||
| 199 | !skb_shinfo(*pskb)->tso_size) { | ||
| 200 | /* No hook can be after us, so this should be OK. */ | ||
| 201 | ip_fragment(*pskb, okfn); | ||
| 202 | return NF_STOLEN; | ||
| 203 | } | ||
| 204 | return NF_ACCEPT; | ||
| 205 | } | ||
| 206 | |||
| 207 | static unsigned int ipv4_conntrack_in(unsigned int hooknum, | ||
| 208 | struct sk_buff **pskb, | ||
| 209 | const struct net_device *in, | ||
| 210 | const struct net_device *out, | ||
| 211 | int (*okfn)(struct sk_buff *)) | ||
| 212 | { | ||
| 213 | return nf_conntrack_in(PF_INET, hooknum, pskb); | ||
| 214 | } | ||
| 215 | |||
| 216 | static unsigned int ipv4_conntrack_local(unsigned int hooknum, | ||
| 217 | struct sk_buff **pskb, | ||
| 218 | const struct net_device *in, | ||
| 219 | const struct net_device *out, | ||
| 220 | int (*okfn)(struct sk_buff *)) | ||
| 221 | { | ||
| 222 | /* root is playing with raw sockets. */ | ||
| 223 | if ((*pskb)->len < sizeof(struct iphdr) | ||
| 224 | || (*pskb)->nh.iph->ihl * 4 < sizeof(struct iphdr)) { | ||
| 225 | if (net_ratelimit()) | ||
| 226 | printk("ipt_hook: happy cracking.\n"); | ||
| 227 | return NF_ACCEPT; | ||
| 228 | } | ||
| 229 | return nf_conntrack_in(PF_INET, hooknum, pskb); | ||
| 230 | } | ||
| 231 | |||
| 232 | /* Connection tracking may drop packets, but never alters them, so | ||
| 233 | make it the first hook. */ | ||
| 234 | static struct nf_hook_ops ipv4_conntrack_defrag_ops = { | ||
| 235 | .hook = ipv4_conntrack_defrag, | ||
| 236 | .owner = THIS_MODULE, | ||
| 237 | .pf = PF_INET, | ||
| 238 | .hooknum = NF_IP_PRE_ROUTING, | ||
| 239 | .priority = NF_IP_PRI_CONNTRACK_DEFRAG, | ||
| 240 | }; | ||
| 241 | |||
| 242 | static struct nf_hook_ops ipv4_conntrack_in_ops = { | ||
| 243 | .hook = ipv4_conntrack_in, | ||
| 244 | .owner = THIS_MODULE, | ||
| 245 | .pf = PF_INET, | ||
| 246 | .hooknum = NF_IP_PRE_ROUTING, | ||
| 247 | .priority = NF_IP_PRI_CONNTRACK, | ||
| 248 | }; | ||
| 249 | |||
| 250 | static struct nf_hook_ops ipv4_conntrack_defrag_local_out_ops = { | ||
| 251 | .hook = ipv4_conntrack_defrag, | ||
| 252 | .owner = THIS_MODULE, | ||
| 253 | .pf = PF_INET, | ||
| 254 | .hooknum = NF_IP_LOCAL_OUT, | ||
| 255 | .priority = NF_IP_PRI_CONNTRACK_DEFRAG, | ||
| 256 | }; | ||
| 257 | |||
| 258 | static struct nf_hook_ops ipv4_conntrack_local_out_ops = { | ||
| 259 | .hook = ipv4_conntrack_local, | ||
| 260 | .owner = THIS_MODULE, | ||
| 261 | .pf = PF_INET, | ||
| 262 | .hooknum = NF_IP_LOCAL_OUT, | ||
| 263 | .priority = NF_IP_PRI_CONNTRACK, | ||
| 264 | }; | ||
| 265 | |||
| 266 | /* helpers */ | ||
| 267 | static struct nf_hook_ops ipv4_conntrack_helper_out_ops = { | ||
| 268 | .hook = ipv4_conntrack_help, | ||
| 269 | .owner = THIS_MODULE, | ||
| 270 | .pf = PF_INET, | ||
| 271 | .hooknum = NF_IP_POST_ROUTING, | ||
| 272 | .priority = NF_IP_PRI_CONNTRACK_HELPER, | ||
| 273 | }; | ||
| 274 | |||
| 275 | static struct nf_hook_ops ipv4_conntrack_helper_in_ops = { | ||
| 276 | .hook = ipv4_conntrack_help, | ||
| 277 | .owner = THIS_MODULE, | ||
| 278 | .pf = PF_INET, | ||
| 279 | .hooknum = NF_IP_LOCAL_IN, | ||
| 280 | .priority = NF_IP_PRI_CONNTRACK_HELPER, | ||
| 281 | }; | ||
| 282 | |||
| 283 | |||
| 284 | /* Refragmenter; last chance. */ | ||
| 285 | static struct nf_hook_ops ipv4_conntrack_out_ops = { | ||
| 286 | .hook = ipv4_refrag, | ||
| 287 | .owner = THIS_MODULE, | ||
| 288 | .pf = PF_INET, | ||
| 289 | .hooknum = NF_IP_POST_ROUTING, | ||
| 290 | .priority = NF_IP_PRI_CONNTRACK_CONFIRM, | ||
| 291 | }; | ||
| 292 | |||
| 293 | static struct nf_hook_ops ipv4_conntrack_local_in_ops = { | ||
| 294 | .hook = ipv4_confirm, | ||
| 295 | .owner = THIS_MODULE, | ||
| 296 | .pf = PF_INET, | ||
| 297 | .hooknum = NF_IP_LOCAL_IN, | ||
| 298 | .priority = NF_IP_PRI_CONNTRACK_CONFIRM, | ||
| 299 | }; | ||
| 300 | |||
| 301 | #ifdef CONFIG_SYSCTL | ||
| 302 | /* From nf_conntrack_proto_icmp.c */ | ||
| 303 | extern unsigned long nf_ct_icmp_timeout; | ||
| 304 | static struct ctl_table_header *nf_ct_ipv4_sysctl_header; | ||
| 305 | |||
| 306 | static ctl_table nf_ct_sysctl_table[] = { | ||
| 307 | { | ||
| 308 | .ctl_name = NET_NF_CONNTRACK_ICMP_TIMEOUT, | ||
| 309 | .procname = "nf_conntrack_icmp_timeout", | ||
| 310 | .data = &nf_ct_icmp_timeout, | ||
| 311 | .maxlen = sizeof(unsigned int), | ||
| 312 | .mode = 0644, | ||
| 313 | .proc_handler = &proc_dointvec_jiffies, | ||
| 314 | }, | ||
| 315 | { .ctl_name = 0 } | ||
| 316 | }; | ||
| 317 | |||
| 318 | static ctl_table nf_ct_netfilter_table[] = { | ||
| 319 | { | ||
| 320 | .ctl_name = NET_NETFILTER, | ||
| 321 | .procname = "netfilter", | ||
| 322 | .mode = 0555, | ||
| 323 | .child = nf_ct_sysctl_table, | ||
| 324 | }, | ||
| 325 | { .ctl_name = 0 } | ||
| 326 | }; | ||
| 327 | |||
| 328 | static ctl_table nf_ct_net_table[] = { | ||
| 329 | { | ||
| 330 | .ctl_name = CTL_NET, | ||
| 331 | .procname = "net", | ||
| 332 | .mode = 0555, | ||
| 333 | .child = nf_ct_netfilter_table, | ||
| 334 | }, | ||
| 335 | { .ctl_name = 0 } | ||
| 336 | }; | ||
| 337 | #endif | ||
| 338 | |||
| 339 | /* Fast function for those who don't want to parse /proc (and I don't | ||
| 340 | blame them). */ | ||
| 341 | /* Reversing the socket's dst/src point of view gives us the reply | ||
| 342 | mapping. */ | ||
| 343 | static int | ||
| 344 | getorigdst(struct sock *sk, int optval, void __user *user, int *len) | ||
| 345 | { | ||
| 346 | struct inet_sock *inet = inet_sk(sk); | ||
| 347 | struct nf_conntrack_tuple_hash *h; | ||
| 348 | struct nf_conntrack_tuple tuple; | ||
| 349 | |||
| 350 | NF_CT_TUPLE_U_BLANK(&tuple); | ||
| 351 | tuple.src.u3.ip = inet->rcv_saddr; | ||
| 352 | tuple.src.u.tcp.port = inet->sport; | ||
| 353 | tuple.dst.u3.ip = inet->daddr; | ||
| 354 | tuple.dst.u.tcp.port = inet->dport; | ||
| 355 | tuple.src.l3num = PF_INET; | ||
| 356 | tuple.dst.protonum = IPPROTO_TCP; | ||
| 357 | |||
| 358 | /* We only do TCP at the moment: is there a better way? */ | ||
| 359 | if (strcmp(sk->sk_prot->name, "TCP")) { | ||
| 360 | DEBUGP("SO_ORIGINAL_DST: Not a TCP socket\n"); | ||
| 361 | return -ENOPROTOOPT; | ||
| 362 | } | ||
| 363 | |||
| 364 | if ((unsigned int) *len < sizeof(struct sockaddr_in)) { | ||
| 365 | DEBUGP("SO_ORIGINAL_DST: len %u not %u\n", | ||
| 366 | *len, sizeof(struct sockaddr_in)); | ||
| 367 | return -EINVAL; | ||
| 368 | } | ||
| 369 | |||
| 370 | h = nf_conntrack_find_get(&tuple, NULL); | ||
| 371 | if (h) { | ||
| 372 | struct sockaddr_in sin; | ||
| 373 | struct nf_conn *ct = nf_ct_tuplehash_to_ctrack(h); | ||
| 374 | |||
| 375 | sin.sin_family = AF_INET; | ||
| 376 | sin.sin_port = ct->tuplehash[IP_CT_DIR_ORIGINAL] | ||
| 377 | .tuple.dst.u.tcp.port; | ||
| 378 | sin.sin_addr.s_addr = ct->tuplehash[IP_CT_DIR_ORIGINAL] | ||
| 379 | .tuple.dst.u3.ip; | ||
| 380 | |||
| 381 | DEBUGP("SO_ORIGINAL_DST: %u.%u.%u.%u %u\n", | ||
| 382 | NIPQUAD(sin.sin_addr.s_addr), ntohs(sin.sin_port)); | ||
| 383 | nf_ct_put(ct); | ||
| 384 | if (copy_to_user(user, &sin, sizeof(sin)) != 0) | ||
| 385 | return -EFAULT; | ||
| 386 | else | ||
| 387 | return 0; | ||
| 388 | } | ||
| 389 | DEBUGP("SO_ORIGINAL_DST: Can't find %u.%u.%u.%u/%u-%u.%u.%u.%u/%u.\n", | ||
| 390 | NIPQUAD(tuple.src.u3.ip), ntohs(tuple.src.u.tcp.port), | ||
| 391 | NIPQUAD(tuple.dst.u3.ip), ntohs(tuple.dst.u.tcp.port)); | ||
| 392 | return -ENOENT; | ||
| 393 | } | ||
| 394 | |||
| 395 | static struct nf_sockopt_ops so_getorigdst = { | ||
| 396 | .pf = PF_INET, | ||
| 397 | .get_optmin = SO_ORIGINAL_DST, | ||
| 398 | .get_optmax = SO_ORIGINAL_DST+1, | ||
| 399 | .get = &getorigdst, | ||
| 400 | }; | ||
| 401 | |||
| 402 | struct nf_conntrack_l3proto nf_conntrack_l3proto_ipv4 = { | ||
| 403 | .l3proto = PF_INET, | ||
| 404 | .name = "ipv4", | ||
| 405 | .pkt_to_tuple = ipv4_pkt_to_tuple, | ||
| 406 | .invert_tuple = ipv4_invert_tuple, | ||
| 407 | .print_tuple = ipv4_print_tuple, | ||
| 408 | .print_conntrack = ipv4_print_conntrack, | ||
| 409 | .prepare = ipv4_prepare, | ||
| 410 | .get_features = ipv4_get_features, | ||
| 411 | .me = THIS_MODULE, | ||
| 412 | }; | ||
| 413 | |||
| 414 | extern struct nf_conntrack_protocol nf_conntrack_protocol_tcp4; | ||
| 415 | extern struct nf_conntrack_protocol nf_conntrack_protocol_udp4; | ||
| 416 | extern struct nf_conntrack_protocol nf_conntrack_protocol_icmp; | ||
| 417 | static int init_or_cleanup(int init) | ||
| 418 | { | ||
| 419 | int ret = 0; | ||
| 420 | |||
| 421 | if (!init) goto cleanup; | ||
| 422 | |||
| 423 | ret = nf_register_sockopt(&so_getorigdst); | ||
| 424 | if (ret < 0) { | ||
| 425 | printk(KERN_ERR "Unable to register netfilter socket option\n"); | ||
| 426 | goto cleanup_nothing; | ||
| 427 | } | ||
| 428 | |||
| 429 | ret = nf_conntrack_protocol_register(&nf_conntrack_protocol_tcp4); | ||
| 430 | if (ret < 0) { | ||
| 431 | printk("nf_conntrack_ipv4: can't register tcp.\n"); | ||
| 432 | goto cleanup_sockopt; | ||
| 433 | } | ||
| 434 | |||
| 435 | ret = nf_conntrack_protocol_register(&nf_conntrack_protocol_udp4); | ||
| 436 | if (ret < 0) { | ||
| 437 | printk("nf_conntrack_ipv4: can't register udp.\n"); | ||
| 438 | goto cleanup_tcp; | ||
| 439 | } | ||
| 440 | |||
| 441 | ret = nf_conntrack_protocol_register(&nf_conntrack_protocol_icmp); | ||
| 442 | if (ret < 0) { | ||
| 443 | printk("nf_conntrack_ipv4: can't register icmp.\n"); | ||
| 444 | goto cleanup_udp; | ||
| 445 | } | ||
| 446 | |||
| 447 | ret = nf_conntrack_l3proto_register(&nf_conntrack_l3proto_ipv4); | ||
| 448 | if (ret < 0) { | ||
| 449 | printk("nf_conntrack_ipv4: can't register ipv4\n"); | ||
| 450 | goto cleanup_icmp; | ||
| 451 | } | ||
| 452 | |||
| 453 | ret = nf_register_hook(&ipv4_conntrack_defrag_ops); | ||
| 454 | if (ret < 0) { | ||
| 455 | printk("nf_conntrack_ipv4: can't register pre-routing defrag hook.\n"); | ||
| 456 | goto cleanup_ipv4; | ||
| 457 | } | ||
| 458 | ret = nf_register_hook(&ipv4_conntrack_defrag_local_out_ops); | ||
| 459 | if (ret < 0) { | ||
| 460 | printk("nf_conntrack_ipv4: can't register local_out defrag hook.\n"); | ||
| 461 | goto cleanup_defragops; | ||
| 462 | } | ||
| 463 | |||
| 464 | ret = nf_register_hook(&ipv4_conntrack_in_ops); | ||
| 465 | if (ret < 0) { | ||
| 466 | printk("nf_conntrack_ipv4: can't register pre-routing hook.\n"); | ||
| 467 | goto cleanup_defraglocalops; | ||
| 468 | } | ||
| 469 | |||
| 470 | ret = nf_register_hook(&ipv4_conntrack_local_out_ops); | ||
| 471 | if (ret < 0) { | ||
| 472 | printk("nf_conntrack_ipv4: can't register local out hook.\n"); | ||
| 473 | goto cleanup_inops; | ||
| 474 | } | ||
| 475 | |||
| 476 | ret = nf_register_hook(&ipv4_conntrack_helper_in_ops); | ||
| 477 | if (ret < 0) { | ||
| 478 | printk("nf_conntrack_ipv4: can't register local helper hook.\n"); | ||
| 479 | goto cleanup_inandlocalops; | ||
| 480 | } | ||
| 481 | |||
| 482 | ret = nf_register_hook(&ipv4_conntrack_helper_out_ops); | ||
| 483 | if (ret < 0) { | ||
| 484 | printk("nf_conntrack_ipv4: can't register postrouting helper hook.\n"); | ||
| 485 | goto cleanup_helperinops; | ||
| 486 | } | ||
| 487 | |||
| 488 | ret = nf_register_hook(&ipv4_conntrack_out_ops); | ||
| 489 | if (ret < 0) { | ||
| 490 | printk("nf_conntrack_ipv4: can't register post-routing hook.\n"); | ||
| 491 | goto cleanup_helperoutops; | ||
| 492 | } | ||
| 493 | |||
| 494 | ret = nf_register_hook(&ipv4_conntrack_local_in_ops); | ||
| 495 | if (ret < 0) { | ||
| 496 | printk("nf_conntrack_ipv4: can't register local in hook.\n"); | ||
| 497 | goto cleanup_inoutandlocalops; | ||
| 498 | } | ||
| 499 | |||
| 500 | #ifdef CONFIG_SYSCTL | ||
| 501 | nf_ct_ipv4_sysctl_header = register_sysctl_table(nf_ct_net_table, 0); | ||
| 502 | if (nf_ct_ipv4_sysctl_header == NULL) { | ||
| 503 | printk("nf_conntrack: can't register to sysctl.\n"); | ||
| 504 | ret = -ENOMEM; | ||
| 505 | goto cleanup_localinops; | ||
| 506 | } | ||
| 507 | #endif | ||
| 508 | |||
| 509 | /* For use by REJECT target */ | ||
| 510 | ip_ct_attach = __nf_conntrack_attach; | ||
| 511 | |||
| 512 | return ret; | ||
| 513 | |||
| 514 | cleanup: | ||
| 515 | synchronize_net(); | ||
| 516 | ip_ct_attach = NULL; | ||
| 517 | #ifdef CONFIG_SYSCTL | ||
| 518 | unregister_sysctl_table(nf_ct_ipv4_sysctl_header); | ||
| 519 | cleanup_localinops: | ||
| 520 | #endif | ||
| 521 | nf_unregister_hook(&ipv4_conntrack_local_in_ops); | ||
| 522 | cleanup_inoutandlocalops: | ||
| 523 | nf_unregister_hook(&ipv4_conntrack_out_ops); | ||
| 524 | cleanup_helperoutops: | ||
| 525 | nf_unregister_hook(&ipv4_conntrack_helper_out_ops); | ||
| 526 | cleanup_helperinops: | ||
| 527 | nf_unregister_hook(&ipv4_conntrack_helper_in_ops); | ||
| 528 | cleanup_inandlocalops: | ||
| 529 | nf_unregister_hook(&ipv4_conntrack_local_out_ops); | ||
| 530 | cleanup_inops: | ||
| 531 | nf_unregister_hook(&ipv4_conntrack_in_ops); | ||
| 532 | cleanup_defraglocalops: | ||
| 533 | nf_unregister_hook(&ipv4_conntrack_defrag_local_out_ops); | ||
| 534 | cleanup_defragops: | ||
| 535 | nf_unregister_hook(&ipv4_conntrack_defrag_ops); | ||
| 536 | cleanup_ipv4: | ||
| 537 | nf_conntrack_l3proto_unregister(&nf_conntrack_l3proto_ipv4); | ||
| 538 | cleanup_icmp: | ||
| 539 | nf_conntrack_protocol_unregister(&nf_conntrack_protocol_icmp); | ||
| 540 | cleanup_udp: | ||
| 541 | nf_conntrack_protocol_unregister(&nf_conntrack_protocol_udp4); | ||
| 542 | cleanup_tcp: | ||
| 543 | nf_conntrack_protocol_unregister(&nf_conntrack_protocol_tcp4); | ||
| 544 | cleanup_sockopt: | ||
| 545 | nf_unregister_sockopt(&so_getorigdst); | ||
| 546 | cleanup_nothing: | ||
| 547 | return ret; | ||
| 548 | } | ||
| 549 | |||
| 550 | MODULE_LICENSE("GPL"); | ||
| 551 | |||
| 552 | static int __init init(void) | ||
| 553 | { | ||
| 554 | need_nf_conntrack(); | ||
| 555 | return init_or_cleanup(1); | ||
| 556 | } | ||
| 557 | |||
| 558 | static void __exit fini(void) | ||
| 559 | { | ||
| 560 | init_or_cleanup(0); | ||
| 561 | } | ||
| 562 | |||
| 563 | module_init(init); | ||
| 564 | module_exit(fini); | ||
| 565 | |||
| 566 | void need_ip_conntrack(void) | ||
| 567 | { | ||
| 568 | } | ||
| 569 | |||
| 570 | EXPORT_SYMBOL(need_ip_conntrack); | ||
| 571 | EXPORT_SYMBOL(nf_ct_ipv4_gather_frags); | ||
diff --git a/net/ipv4/netfilter/nf_conntrack_proto_icmp.c b/net/ipv4/netfilter/nf_conntrack_proto_icmp.c new file mode 100644 index 000000000000..7ddb5c08f7b8 --- /dev/null +++ b/net/ipv4/netfilter/nf_conntrack_proto_icmp.c  | |||
| @@ -0,0 +1,301 @@ | |||
| 1 | /* (C) 1999-2001 Paul `Rusty' Russell | ||
| 2 | * (C) 2002-2004 Netfilter Core Team <coreteam@netfilter.org> | ||
| 3 | * | ||
| 4 | * This program is free software; you can redistribute it and/or modify | ||
| 5 | * it under the terms of the GNU General Public License version 2 as | ||
| 6 | * published by the Free Software Foundation. | ||
| 7 | * | ||
| 8 | * 16 Dec 2003: Yasuyuki Kozakai @USAGI <yasuyuki.kozakai@toshiba.co.jp> | ||
| 9 | * - enable working with Layer 3 protocol independent connection tracking. | ||
| 10 | * | ||
| 11 | * Derived from net/ipv4/netfilter/ip_conntrack_proto_icmp.c | ||
| 12 | */ | ||
| 13 | |||
| 14 | #include <linux/types.h> | ||
| 15 | #include <linux/sched.h> | ||
| 16 | #include <linux/timer.h> | ||
| 17 | #include <linux/netfilter.h> | ||
| 18 | #include <linux/in.h> | ||
| 19 | #include <linux/icmp.h> | ||
| 20 | #include <linux/seq_file.h> | ||
| 21 | #include <net/ip.h> | ||
| 22 | #include <net/checksum.h> | ||
| 23 | #include <linux/netfilter_ipv4.h> | ||
| 24 | #include <net/netfilter/nf_conntrack_tuple.h> | ||
| 25 | #include <net/netfilter/nf_conntrack_protocol.h> | ||
| 26 | #include <net/netfilter/nf_conntrack_core.h> | ||
| 27 | |||
| 28 | unsigned long nf_ct_icmp_timeout = 30*HZ; | ||
| 29 | |||
| 30 | #if 0 | ||
| 31 | #define DEBUGP printk | ||
| 32 | #else | ||
| 33 | #define DEBUGP(format, args...) | ||
| 34 | #endif | ||
| 35 | |||
| 36 | static int icmp_pkt_to_tuple(const struct sk_buff *skb, | ||
| 37 | unsigned int dataoff, | ||
| 38 | struct nf_conntrack_tuple *tuple) | ||
| 39 | { | ||
| 40 | struct icmphdr _hdr, *hp; | ||
| 41 | |||
| 42 | hp = skb_header_pointer(skb, dataoff, sizeof(_hdr), &_hdr); | ||
| 43 | if (hp == NULL) | ||
| 44 | return 0; | ||
| 45 | |||
| 46 | tuple->dst.u.icmp.type = hp->type; | ||
| 47 | tuple->src.u.icmp.id = hp->un.echo.id; | ||
| 48 | tuple->dst.u.icmp.code = hp->code; | ||
| 49 | |||
| 50 | return 1; | ||
| 51 | } | ||
| 52 | |||
| 53 | static int icmp_invert_tuple(struct nf_conntrack_tuple *tuple, | ||
| 54 | const struct nf_conntrack_tuple *orig) | ||
| 55 | { | ||
| 56 | /* Add 1; spaces filled with 0. */ | ||
| 57 | static u_int8_t invmap[] | ||
| 58 | = { [ICMP_ECHO] = ICMP_ECHOREPLY + 1, | ||
| 59 | [ICMP_ECHOREPLY] = ICMP_ECHO + 1, | ||
| 60 | [ICMP_TIMESTAMP] = ICMP_TIMESTAMPREPLY + 1, | ||
| 61 | [ICMP_TIMESTAMPREPLY] = ICMP_TIMESTAMP + 1, | ||
| 62 | [ICMP_INFO_REQUEST] = ICMP_INFO_REPLY + 1, | ||
| 63 | [ICMP_INFO_REPLY] = ICMP_INFO_REQUEST + 1, | ||
| 64 | [ICMP_ADDRESS] = ICMP_ADDRESSREPLY + 1, | ||
| 65 | [ICMP_ADDRESSREPLY] = ICMP_ADDRESS + 1}; | ||
| 66 | |||
| 67 | if (orig->dst.u.icmp.type >= sizeof(invmap) | ||
| 68 | || !invmap[orig->dst.u.icmp.type]) | ||
| 69 | return 0; | ||
| 70 | |||
| 71 | tuple->src.u.icmp.id = orig->src.u.icmp.id; | ||
| 72 | tuple->dst.u.icmp.type = invmap[orig->dst.u.icmp.type] - 1; | ||
| 73 | tuple->dst.u.icmp.code = orig->dst.u.icmp.code; | ||
| 74 | return 1; | ||
| 75 | } | ||
| 76 | |||
| 77 | /* Print out the per-protocol part of the tuple. */ | ||
| 78 | static int icmp_print_tuple(struct seq_file *s, | ||
| 79 | const struct nf_conntrack_tuple *tuple) | ||
| 80 | { | ||
| 81 | return seq_printf(s, "type=%u code=%u id=%u ", | ||
| 82 | tuple->dst.u.icmp.type, | ||
| 83 | tuple->dst.u.icmp.code, | ||
| 84 | ntohs(tuple->src.u.icmp.id)); | ||
| 85 | } | ||
| 86 | |||
| 87 | /* Print out the private part of the conntrack. */ | ||
| 88 | static int icmp_print_conntrack(struct seq_file *s, | ||
| 89 | const struct nf_conn *conntrack) | ||
| 90 | { | ||
| 91 | return 0; | ||
| 92 | } | ||
| 93 | |||
| 94 | /* Returns verdict for packet, or -1 for invalid. */ | ||
| 95 | static int icmp_packet(struct nf_conn *ct, | ||
| 96 | const struct sk_buff *skb, | ||
| 97 | unsigned int dataoff, | ||
| 98 | enum ip_conntrack_info ctinfo, | ||
| 99 | int pf, | ||
| 100 | unsigned int hooknum) | ||
| 101 | { | ||
| 102 | /* Try to delete connection immediately after all replies: | ||
| 103 | won't actually vanish as we still have skb, and del_timer | ||
| 104 | means this will only run once even if count hits zero twice | ||
| 105 | (theoretically possible with SMP) */ | ||
| 106 | if (CTINFO2DIR(ctinfo) == IP_CT_DIR_REPLY) { | ||
| 107 | if (atomic_dec_and_test(&ct->proto.icmp.count) | ||
| 108 | && del_timer(&ct->timeout)) | ||
| 109 | ct->timeout.function((unsigned long)ct); | ||
| 110 | } else { | ||
| 111 | atomic_inc(&ct->proto.icmp.count); | ||
| 112 | nf_conntrack_event_cache(IPCT_PROTOINFO_VOLATILE, skb); | ||
| 113 | nf_ct_refresh_acct(ct, ctinfo, skb, nf_ct_icmp_timeout); | ||
| 114 | } | ||
| 115 | |||
| 116 | return NF_ACCEPT; | ||
| 117 | } | ||
| 118 | |||
| 119 | /* Called when a new connection for this protocol found. */ | ||
| 120 | static int icmp_new(struct nf_conn *conntrack, | ||
| 121 | const struct sk_buff *skb, unsigned int dataoff) | ||
| 122 | { | ||
| 123 | static u_int8_t valid_new[] | ||
| 124 | = { [ICMP_ECHO] = 1, | ||
| 125 | [ICMP_TIMESTAMP] = 1, | ||
| 126 | [ICMP_INFO_REQUEST] = 1, | ||
| 127 | [ICMP_ADDRESS] = 1 }; | ||
| 128 | |||
| 129 | if (conntrack->tuplehash[0].tuple.dst.u.icmp.type >= sizeof(valid_new) | ||
| 130 | || !valid_new[conntrack->tuplehash[0].tuple.dst.u.icmp.type]) { | ||
| 131 | /* Can't create a new ICMP `conn' with this. */ | ||
| 132 | DEBUGP("icmp: can't create new conn with type %u\n", | ||
| 133 | conntrack->tuplehash[0].tuple.dst.u.icmp.type); | ||
| 134 | NF_CT_DUMP_TUPLE(&conntrack->tuplehash[0].tuple); | ||
| 135 | return 0; | ||
| 136 | } | ||
| 137 | atomic_set(&conntrack->proto.icmp.count, 0); | ||
| 138 | return 1; | ||
| 139 | } | ||
| 140 | |||
| 141 | extern struct nf_conntrack_l3proto nf_conntrack_l3proto_ipv4; | ||
| 142 | /* Returns conntrack if it dealt with ICMP, and filled in skb fields */ | ||
| 143 | static int | ||
| 144 | icmp_error_message(struct sk_buff *skb, | ||
| 145 | enum ip_conntrack_info *ctinfo, | ||
| 146 | unsigned int hooknum) | ||
| 147 | { | ||
| 148 | struct nf_conntrack_tuple innertuple, origtuple; | ||
| 149 | struct { | ||
| 150 | struct icmphdr icmp; | ||
| 151 | struct iphdr ip; | ||
| 152 | } _in, *inside; | ||
| 153 | struct nf_conntrack_protocol *innerproto; | ||
| 154 | struct nf_conntrack_tuple_hash *h; | ||
| 155 | int dataoff; | ||
| 156 | |||
| 157 | NF_CT_ASSERT(skb->nfct == NULL); | ||
| 158 | |||
| 159 | /* Not enough header? */ | ||
| 160 | inside = skb_header_pointer(skb, skb->nh.iph->ihl*4, sizeof(_in), &_in); | ||
| 161 | if (inside == NULL) | ||
| 162 | return -NF_ACCEPT; | ||
| 163 | |||
| 164 | /* Ignore ICMP's containing fragments (shouldn't happen) */ | ||
| 165 | if (inside->ip.frag_off & htons(IP_OFFSET)) { | ||
| 166 | DEBUGP("icmp_error_message: fragment of proto %u\n", | ||
| 167 | inside->ip.protocol); | ||
| 168 | return -NF_ACCEPT; | ||
| 169 | } | ||
| 170 | |||
| 171 | innerproto = nf_ct_find_proto(PF_INET, inside->ip.protocol); | ||
| 172 | dataoff = skb->nh.iph->ihl*4 + sizeof(inside->icmp); | ||
| 173 | /* Are they talking about one of our connections? */ | ||
| 174 | if (!nf_ct_get_tuple(skb, dataoff, dataoff + inside->ip.ihl*4, PF_INET, | ||
| 175 | inside->ip.protocol, &origtuple, | ||
| 176 | &nf_conntrack_l3proto_ipv4, innerproto)) { | ||
| 177 | DEBUGP("icmp_error_message: ! get_tuple p=%u", | ||
| 178 | inside->ip.protocol); | ||
| 179 | return -NF_ACCEPT; | ||
| 180 | } | ||
| 181 | |||
| 182 | /* Ordinarily, we'd expect the inverted tupleproto, but it's | ||
| 183 | been preserved inside the ICMP. */ | ||
| 184 | if (!nf_ct_invert_tuple(&innertuple, &origtuple, | ||
| 185 | &nf_conntrack_l3proto_ipv4, innerproto)) { | ||
| 186 | DEBUGP("icmp_error_message: no match\n"); | ||
| 187 | return -NF_ACCEPT; | ||
| 188 | } | ||
| 189 | |||
| 190 | *ctinfo = IP_CT_RELATED; | ||
| 191 | |||
| 192 | h = nf_conntrack_find_get(&innertuple, NULL); | ||
| 193 | if (!h) { | ||
| 194 | /* Locally generated ICMPs will match inverted if they | ||
| 195 | haven't been SNAT'ed yet */ | ||
| 196 | /* FIXME: NAT code has to handle half-done double NAT --RR */ | ||
| 197 | if (hooknum == NF_IP_LOCAL_OUT) | ||
| 198 | h = nf_conntrack_find_get(&origtuple, NULL); | ||
| 199 | |||
| 200 | if (!h) { | ||
| 201 | DEBUGP("icmp_error_message: no match\n"); | ||
| 202 | return -NF_ACCEPT; | ||
| 203 | } | ||
| 204 | |||
| 205 | /* Reverse direction from that found */ | ||
| 206 | if (NF_CT_DIRECTION(h) == IP_CT_DIR_REPLY) | ||
| 207 | *ctinfo += IP_CT_IS_REPLY; | ||
| 208 | } else { | ||
| 209 | if (NF_CT_DIRECTION(h) == IP_CT_DIR_REPLY) | ||
| 210 | *ctinfo += IP_CT_IS_REPLY; | ||
| 211 | } | ||
| 212 | |||
| 213 | /* Update skb to refer to this connection */ | ||
| 214 | skb->nfct = &nf_ct_tuplehash_to_ctrack(h)->ct_general; | ||
| 215 | skb->nfctinfo = *ctinfo; | ||
| 216 | return -NF_ACCEPT; | ||
| 217 | } | ||
| 218 | |||
| 219 | /* Small and modified version of icmp_rcv */ | ||
| 220 | static int | ||
| 221 | icmp_error(struct sk_buff *skb, unsigned int dataoff, | ||
| 222 | enum ip_conntrack_info *ctinfo, int pf, unsigned int hooknum) | ||
| 223 | { | ||
| 224 | struct icmphdr _ih, *icmph; | ||
| 225 | |||
| 226 | /* Not enough header? */ | ||
| 227 | icmph = skb_header_pointer(skb, skb->nh.iph->ihl*4, sizeof(_ih), &_ih); | ||
| 228 | if (icmph == NULL) { | ||
| 229 | if (LOG_INVALID(IPPROTO_ICMP)) | ||
| 230 | nf_log_packet(PF_INET, 0, skb, NULL, NULL, NULL, | ||
| 231 | "nf_ct_icmp: short packet "); | ||
| 232 | return -NF_ACCEPT; | ||
| 233 | } | ||
| 234 | |||
| 235 | /* See ip_conntrack_proto_tcp.c */ | ||
| 236 | if (hooknum != NF_IP_PRE_ROUTING) | ||
| 237 | goto checksum_skipped; | ||
| 238 | |||
| 239 | switch (skb->ip_summed) { | ||
| 240 | case CHECKSUM_HW: | ||
| 241 | if (!(u16)csum_fold(skb->csum)) | ||
| 242 | break; | ||
| 243 | if (LOG_INVALID(IPPROTO_ICMP)) | ||
| 244 | nf_log_packet(PF_INET, 0, skb, NULL, NULL, NULL, | ||
| 245 | "nf_ct_icmp: bad HW ICMP checksum "); | ||
| 246 | return -NF_ACCEPT; | ||
| 247 | case CHECKSUM_NONE: | ||
| 248 | if ((u16)csum_fold(skb_checksum(skb, 0, skb->len, 0))) { | ||
| 249 | if (LOG_INVALID(IPPROTO_ICMP)) | ||
| 250 | nf_log_packet(PF_INET, 0, skb, NULL, NULL, | ||
| 251 | NULL, | ||
| 252 | "nf_ct_icmp: bad ICMP checksum "); | ||
| 253 | return -NF_ACCEPT; | ||
| 254 | } | ||
| 255 | default: | ||
| 256 | break; | ||
| 257 | } | ||
| 258 | |||
| 259 | checksum_skipped: | ||
| 260 | /* | ||
| 261 | * 18 is the highest 'known' ICMP type. Anything else is a mystery | ||
| 262 | * | ||
| 263 | * RFC 1122: 3.2.2 Unknown ICMP messages types MUST be silently | ||
| 264 | * discarded. | ||
| 265 | */ | ||
| 266 | if (icmph->type > NR_ICMP_TYPES) { | ||
| 267 | if (LOG_INVALID(IPPROTO_ICMP)) | ||
| 268 | nf_log_packet(PF_INET, 0, skb, NULL, NULL, NULL, | ||
| 269 | "nf_ct_icmp: invalid ICMP type "); | ||
| 270 | return -NF_ACCEPT; | ||
| 271 | } | ||
| 272 | |||
| 273 | /* Need to track icmp error message? */ | ||
| 274 | if (icmph->type != ICMP_DEST_UNREACH | ||
| 275 | && icmph->type != ICMP_SOURCE_QUENCH | ||
| 276 | && icmph->type != ICMP_TIME_EXCEEDED | ||
| 277 | && icmph->type != ICMP_PARAMETERPROB | ||
| 278 | && icmph->type != ICMP_REDIRECT) | ||
| 279 | return NF_ACCEPT; | ||
| 280 | |||
| 281 | return icmp_error_message(skb, ctinfo, hooknum); | ||
| 282 | } | ||
| 283 | |||
| 284 | struct nf_conntrack_protocol nf_conntrack_protocol_icmp = | ||
| 285 | { | ||
| 286 | .list = { NULL, NULL }, | ||
| 287 | .l3proto = PF_INET, | ||
| 288 | .proto = IPPROTO_ICMP, | ||
| 289 | .name = "icmp", | ||
| 290 | .pkt_to_tuple = icmp_pkt_to_tuple, | ||
| 291 | .invert_tuple = icmp_invert_tuple, | ||
| 292 | .print_tuple = icmp_print_tuple, | ||
| 293 | .print_conntrack = icmp_print_conntrack, | ||
| 294 | .packet = icmp_packet, | ||
| 295 | .new = icmp_new, | ||
| 296 | .error = icmp_error, | ||
| 297 | .destroy = NULL, | ||
| 298 | .me = NULL | ||
| 299 | }; | ||
| 300 | |||
| 301 | EXPORT_SYMBOL(nf_conntrack_protocol_icmp); | ||
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index f3f0013a9580..72b7c22e1ea5 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c  | |||
| @@ -2112,7 +2112,6 @@ void __init tcp_init(void) | |||
| 2112 | sysctl_tcp_max_orphans >>= (3 - order); | 2112 | sysctl_tcp_max_orphans >>= (3 - order); | 
| 2113 | sysctl_max_syn_backlog = 128; | 2113 | sysctl_max_syn_backlog = 128; | 
| 2114 | } | 2114 | } | 
| 2115 | tcp_hashinfo.port_rover = sysctl_local_port_range[0] - 1; | ||
| 2116 | 2115 | ||
| 2117 | sysctl_tcp_mem[0] = 768 << order; | 2116 | sysctl_tcp_mem[0] = 768 << order; | 
| 2118 | sysctl_tcp_mem[1] = 1024 << order; | 2117 | sysctl_tcp_mem[1] = 1024 << order; | 
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index c85819d8474b..634dabb558fd 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c  | |||
| @@ -93,8 +93,6 @@ struct inet_hashinfo __cacheline_aligned tcp_hashinfo = { | |||
| 93 | .lhash_lock = RW_LOCK_UNLOCKED, | 93 | .lhash_lock = RW_LOCK_UNLOCKED, | 
| 94 | .lhash_users = ATOMIC_INIT(0), | 94 | .lhash_users = ATOMIC_INIT(0), | 
| 95 | .lhash_wait = __WAIT_QUEUE_HEAD_INITIALIZER(tcp_hashinfo.lhash_wait), | 95 | .lhash_wait = __WAIT_QUEUE_HEAD_INITIALIZER(tcp_hashinfo.lhash_wait), | 
| 96 | .portalloc_lock = SPIN_LOCK_UNLOCKED, | ||
| 97 | .port_rover = 1024 - 1, | ||
| 98 | }; | 96 | }; | 
| 99 | 97 | ||
| 100 | static int tcp_v4_get_port(struct sock *sk, unsigned short snum) | 98 | static int tcp_v4_get_port(struct sock *sk, unsigned short snum) | 
| @@ -825,8 +823,7 @@ out: | |||
| 825 | */ | 823 | */ | 
| 826 | static void tcp_v4_reqsk_destructor(struct request_sock *req) | 824 | static void tcp_v4_reqsk_destructor(struct request_sock *req) | 
| 827 | { | 825 | { | 
| 828 | if (inet_rsk(req)->opt) | 826 | kfree(inet_rsk(req)->opt); | 
| 829 | kfree(inet_rsk(req)->opt); | ||
| 830 | } | 827 | } | 
| 831 | 828 | ||
| 832 | static inline void syn_flood_warning(struct sk_buff *skb) | 829 | static inline void syn_flood_warning(struct sk_buff *skb) | 
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 2c5f57299d63..ddcf7754eec2 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c  | |||
| @@ -35,6 +35,9 @@ | |||
| 35 | * YOSHIFUJI Hideaki @USAGI : ARCnet support | 35 | * YOSHIFUJI Hideaki @USAGI : ARCnet support | 
| 36 | * YOSHIFUJI Hideaki @USAGI : convert /proc/net/if_inet6 to | 36 | * YOSHIFUJI Hideaki @USAGI : convert /proc/net/if_inet6 to | 
| 37 | * seq_file. | 37 | * seq_file. | 
| 38 | * YOSHIFUJI Hideaki @USAGI : improved source address | ||
| 39 | * selection; consider scope, | ||
| 40 | * status etc. | ||
| 38 | */ | 41 | */ | 
| 39 | 42 | ||
| 40 | #include <linux/config.h> | 43 | #include <linux/config.h> | 
| @@ -193,46 +196,51 @@ const struct in6_addr in6addr_any = IN6ADDR_ANY_INIT; | |||
| 193 | #endif | 196 | #endif | 
| 194 | const struct in6_addr in6addr_loopback = IN6ADDR_LOOPBACK_INIT; | 197 | const struct in6_addr in6addr_loopback = IN6ADDR_LOOPBACK_INIT; | 
| 195 | 198 | ||
| 196 | int ipv6_addr_type(const struct in6_addr *addr) | 199 | #define IPV6_ADDR_SCOPE_TYPE(scope) ((scope) << 16) | 
| 200 | |||
| 201 | static inline unsigned ipv6_addr_scope2type(unsigned scope) | ||
| 202 | { | ||
| 203 | switch(scope) { | ||
| 204 | case IPV6_ADDR_SCOPE_NODELOCAL: | ||
| 205 | return (IPV6_ADDR_SCOPE_TYPE(IPV6_ADDR_SCOPE_NODELOCAL) | | ||
| 206 | IPV6_ADDR_LOOPBACK); | ||
| 207 | case IPV6_ADDR_SCOPE_LINKLOCAL: | ||
| 208 | return (IPV6_ADDR_SCOPE_TYPE(IPV6_ADDR_SCOPE_LINKLOCAL) | | ||
| 209 | IPV6_ADDR_LINKLOCAL); | ||
| 210 | case IPV6_ADDR_SCOPE_SITELOCAL: | ||
| 211 | return (IPV6_ADDR_SCOPE_TYPE(IPV6_ADDR_SCOPE_SITELOCAL) | | ||
| 212 | IPV6_ADDR_SITELOCAL); | ||
| 213 | } | ||
| 214 | return IPV6_ADDR_SCOPE_TYPE(scope); | ||
| 215 | } | ||
| 216 | |||
| 217 | int __ipv6_addr_type(const struct in6_addr *addr) | ||
| 197 | { | 218 | { | 
| 198 | int type; | ||
| 199 | u32 st; | 219 | u32 st; | 
| 200 | 220 | ||
| 201 | st = addr->s6_addr32[0]; | 221 | st = addr->s6_addr32[0]; | 
| 202 | 222 | ||
| 203 | if ((st & htonl(0xFF000000)) == htonl(0xFF000000)) { | ||
| 204 | type = IPV6_ADDR_MULTICAST; | ||
| 205 | |||
| 206 | switch((st & htonl(0x00FF0000))) { | ||
| 207 | case __constant_htonl(0x00010000): | ||
| 208 | type |= IPV6_ADDR_LOOPBACK; | ||
| 209 | break; | ||
| 210 | |||
| 211 | case __constant_htonl(0x00020000): | ||
| 212 | type |= IPV6_ADDR_LINKLOCAL; | ||
| 213 | break; | ||
| 214 | |||
| 215 | case __constant_htonl(0x00050000): | ||
| 216 | type |= IPV6_ADDR_SITELOCAL; | ||
| 217 | break; | ||
| 218 | }; | ||
| 219 | return type; | ||
| 220 | } | ||
| 221 | |||
| 222 | type = IPV6_ADDR_UNICAST; | ||
| 223 | |||
| 224 | /* Consider all addresses with the first three bits different of | 223 | /* Consider all addresses with the first three bits different of | 
| 225 | 000 and 111 as finished. | 224 | 000 and 111 as unicasts. | 
| 226 | */ | 225 | */ | 
| 227 | if ((st & htonl(0xE0000000)) != htonl(0x00000000) && | 226 | if ((st & htonl(0xE0000000)) != htonl(0x00000000) && | 
| 228 | (st & htonl(0xE0000000)) != htonl(0xE0000000)) | 227 | (st & htonl(0xE0000000)) != htonl(0xE0000000)) | 
| 229 | return type; | 228 | return (IPV6_ADDR_UNICAST | | 
| 230 | 229 | IPV6_ADDR_SCOPE_TYPE(IPV6_ADDR_SCOPE_GLOBAL)); | |
| 231 | if ((st & htonl(0xFFC00000)) == htonl(0xFE800000)) | 230 | |
| 232 | return (IPV6_ADDR_LINKLOCAL | type); | 231 | if ((st & htonl(0xFF000000)) == htonl(0xFF000000)) { | 
| 232 | /* multicast */ | ||
| 233 | /* addr-select 3.1 */ | ||
| 234 | return (IPV6_ADDR_MULTICAST | | ||
| 235 | ipv6_addr_scope2type(IPV6_ADDR_MC_SCOPE(addr))); | ||
| 236 | } | ||
| 233 | 237 | ||
| 238 | if ((st & htonl(0xFFC00000)) == htonl(0xFE800000)) | ||
| 239 | return (IPV6_ADDR_LINKLOCAL | IPV6_ADDR_UNICAST | | ||
| 240 | IPV6_ADDR_SCOPE_TYPE(IPV6_ADDR_SCOPE_LINKLOCAL)); /* addr-select 3.1 */ | ||
| 234 | if ((st & htonl(0xFFC00000)) == htonl(0xFEC00000)) | 241 | if ((st & htonl(0xFFC00000)) == htonl(0xFEC00000)) | 
| 235 | return (IPV6_ADDR_SITELOCAL | type); | 242 | return (IPV6_ADDR_SITELOCAL | IPV6_ADDR_UNICAST | | 
| 243 | IPV6_ADDR_SCOPE_TYPE(IPV6_ADDR_SCOPE_SITELOCAL)); /* addr-select 3.1 */ | ||
| 236 | 244 | ||
| 237 | if ((addr->s6_addr32[0] | addr->s6_addr32[1]) == 0) { | 245 | if ((addr->s6_addr32[0] | addr->s6_addr32[1]) == 0) { | 
| 238 | if (addr->s6_addr32[2] == 0) { | 246 | if (addr->s6_addr32[2] == 0) { | 
| @@ -240,24 +248,20 @@ int ipv6_addr_type(const struct in6_addr *addr) | |||
| 240 | return IPV6_ADDR_ANY; | 248 | return IPV6_ADDR_ANY; | 
| 241 | 249 | ||
| 242 | if (addr->s6_addr32[3] == htonl(0x00000001)) | 250 | if (addr->s6_addr32[3] == htonl(0x00000001)) | 
| 243 | return (IPV6_ADDR_LOOPBACK | type); | 251 | return (IPV6_ADDR_LOOPBACK | IPV6_ADDR_UNICAST | | 
| 252 | IPV6_ADDR_SCOPE_TYPE(IPV6_ADDR_SCOPE_LINKLOCAL)); /* addr-select 3.4 */ | ||
| 244 | 253 | ||
| 245 | return (IPV6_ADDR_COMPATv4 | type); | 254 | return (IPV6_ADDR_COMPATv4 | IPV6_ADDR_UNICAST | | 
| 255 | IPV6_ADDR_SCOPE_TYPE(IPV6_ADDR_SCOPE_GLOBAL)); /* addr-select 3.3 */ | ||
| 246 | } | 256 | } | 
| 247 | 257 | ||
| 248 | if (addr->s6_addr32[2] == htonl(0x0000ffff)) | 258 | if (addr->s6_addr32[2] == htonl(0x0000ffff)) | 
| 249 | return IPV6_ADDR_MAPPED; | 259 | return (IPV6_ADDR_MAPPED | | 
| 260 | IPV6_ADDR_SCOPE_TYPE(IPV6_ADDR_SCOPE_GLOBAL)); /* addr-select 3.3 */ | ||
| 250 | } | 261 | } | 
| 251 | 262 | ||
| 252 | st &= htonl(0xFF000000); | 263 | return (IPV6_ADDR_RESERVED | | 
| 253 | if (st == 0) | 264 | IPV6_ADDR_SCOPE_TYPE(IPV6_ADDR_SCOPE_GLOBAL)); /* addr-select 3.4 */ | 
| 254 | return IPV6_ADDR_RESERVED; | ||
| 255 | st &= htonl(0xFE000000); | ||
| 256 | if (st == htonl(0x02000000)) | ||
| 257 | return IPV6_ADDR_RESERVED; /* for NSAP */ | ||
| 258 | if (st == htonl(0x04000000)) | ||
| 259 | return IPV6_ADDR_RESERVED; /* for IPX */ | ||
| 260 | return type; | ||
| 261 | } | 265 | } | 
| 262 | 266 | ||
| 263 | static void addrconf_del_timer(struct inet6_ifaddr *ifp) | 267 | static void addrconf_del_timer(struct inet6_ifaddr *ifp) | 
| @@ -805,138 +809,275 @@ out: | |||
| 805 | #endif | 809 | #endif | 
| 806 | 810 | ||
| 807 | /* | 811 | /* | 
| 808 | * Choose an appropriate source address | 812 | * Choose an appropriate source address (RFC3484) | 
| 809 | * should do: | ||
| 810 | * i) get an address with an appropriate scope | ||
| 811 | * ii) see if there is a specific route for the destination and use | ||
| 812 | * an address of the attached interface | ||
| 813 | * iii) don't use deprecated addresses | ||
| 814 | */ | 813 | */ | 
| 815 | static int inline ipv6_saddr_pref(const struct inet6_ifaddr *ifp, u8 invpref) | 814 | struct ipv6_saddr_score { | 
| 815 | int addr_type; | ||
| 816 | unsigned int attrs; | ||
| 817 | int matchlen; | ||
| 818 | unsigned int scope; | ||
| 819 | unsigned int rule; | ||
| 820 | }; | ||
| 821 | |||
| 822 | #define IPV6_SADDR_SCORE_LOCAL 0x0001 | ||
| 823 | #define IPV6_SADDR_SCORE_PREFERRED 0x0004 | ||
| 824 | #define IPV6_SADDR_SCORE_HOA 0x0008 | ||
| 825 | #define IPV6_SADDR_SCORE_OIF 0x0010 | ||
| 826 | #define IPV6_SADDR_SCORE_LABEL 0x0020 | ||
| 827 | #define IPV6_SADDR_SCORE_PRIVACY 0x0040 | ||
| 828 | |||
| 829 | static int inline ipv6_saddr_preferred(int type) | ||
| 816 | { | 830 | { | 
| 817 | int pref; | 831 | if (type & (IPV6_ADDR_MAPPED|IPV6_ADDR_COMPATv4| | 
| 818 | pref = ifp->flags&IFA_F_DEPRECATED ? 0 : 2; | 832 | IPV6_ADDR_LOOPBACK|IPV6_ADDR_RESERVED)) | 
| 819 | #ifdef CONFIG_IPV6_PRIVACY | 833 | return 1; | 
| 820 | pref |= (ifp->flags^invpref)&IFA_F_TEMPORARY ? 0 : 1; | 834 | return 0; | 
| 821 | #endif | ||
| 822 | return pref; | ||
| 823 | } | 835 | } | 
| 824 | 836 | ||
| 825 | #ifdef CONFIG_IPV6_PRIVACY | 837 | /* static matching label */ | 
| 826 | #define IPV6_GET_SADDR_MAXSCORE(score) ((score) == 3) | 838 | static int inline ipv6_saddr_label(const struct in6_addr *addr, int type) | 
| 827 | #else | 839 | { | 
| 828 | #define IPV6_GET_SADDR_MAXSCORE(score) (score) | 840 | /* | 
| 829 | #endif | 841 | * prefix (longest match) label | 
| 842 | * ----------------------------- | ||
| 843 | * ::1/128 0 | ||
| 844 | * ::/0 1 | ||
| 845 | * 2002::/16 2 | ||
| 846 | * ::/96 3 | ||
| 847 | * ::ffff:0:0/96 4 | ||
| 848 | */ | ||
| 849 | if (type & IPV6_ADDR_LOOPBACK) | ||
| 850 | return 0; | ||
| 851 | else if (type & IPV6_ADDR_COMPATv4) | ||
| 852 | return 3; | ||
| 853 | else if (type & IPV6_ADDR_MAPPED) | ||
| 854 | return 4; | ||
| 855 | else if (addr->s6_addr16[0] == htons(0x2002)) | ||
| 856 | return 2; | ||
| 857 | return 1; | ||
| 858 | } | ||
| 830 | 859 | ||
| 831 | int ipv6_dev_get_saddr(struct net_device *dev, | 860 | int ipv6_dev_get_saddr(struct net_device *daddr_dev, | 
| 832 | struct in6_addr *daddr, struct in6_addr *saddr) | 861 | struct in6_addr *daddr, struct in6_addr *saddr) | 
| 833 | { | 862 | { | 
| 834 | struct inet6_ifaddr *ifp = NULL; | 863 | struct ipv6_saddr_score hiscore; | 
| 835 | struct inet6_ifaddr *match = NULL; | 864 | struct inet6_ifaddr *ifa_result = NULL; | 
| 836 | struct inet6_dev *idev; | 865 | int daddr_type = __ipv6_addr_type(daddr); | 
| 837 | int scope; | 866 | int daddr_scope = __ipv6_addr_src_scope(daddr_type); | 
| 838 | int err; | 867 | u32 daddr_label = ipv6_saddr_label(daddr, daddr_type); | 
| 839 | int hiscore = -1, score; | 868 | struct net_device *dev; | 
| 840 | 869 | ||
| 841 | scope = ipv6_addr_scope(daddr); | 870 | memset(&hiscore, 0, sizeof(hiscore)); | 
| 842 | 871 | ||
| 843 | /* | 872 | read_lock(&dev_base_lock); | 
| 844 | * known dev | 873 | read_lock(&addrconf_lock); | 
| 845 | * search dev and walk through dev addresses | ||
| 846 | */ | ||
| 847 | 874 | ||
| 848 | if (dev) { | 875 | for (dev = dev_base; dev; dev=dev->next) { | 
| 849 | if (dev->flags & IFF_LOOPBACK) | 876 | struct inet6_dev *idev; | 
| 850 | scope = IFA_HOST; | 877 | struct inet6_ifaddr *ifa; | 
| 878 | |||
| 879 | /* Rule 0: Candidate Source Address (section 4) | ||
| 880 | * - multicast and link-local destination address, | ||
| 881 | * the set of candidate source address MUST only | ||
| 882 | * include addresses assigned to interfaces | ||
| 883 | * belonging to the same link as the outgoing | ||
| 884 | * interface. | ||
| 885 | * (- For site-local destination addresses, the | ||
| 886 | * set of candidate source addresses MUST only | ||
| 887 | * include addresses assigned to interfaces | ||
| 888 | * belonging to the same site as the outgoing | ||
| 889 | * interface.) | ||
| 890 | */ | ||
| 891 | if ((daddr_type & IPV6_ADDR_MULTICAST || | ||
| 892 | daddr_scope <= IPV6_ADDR_SCOPE_LINKLOCAL) && | ||
| 893 | daddr_dev && dev != daddr_dev) | ||
| 894 | continue; | ||
| 851 | 895 | ||
| 852 | read_lock(&addrconf_lock); | ||
| 853 | idev = __in6_dev_get(dev); | 896 | idev = __in6_dev_get(dev); | 
| 854 | if (idev) { | 897 | if (!idev) | 
| 855 | read_lock_bh(&idev->lock); | 898 | continue; | 
| 856 | for (ifp=idev->addr_list; ifp; ifp=ifp->if_next) { | ||
| 857 | if (ifp->scope == scope) { | ||
| 858 | if (ifp->flags&IFA_F_TENTATIVE) | ||
| 859 | continue; | ||
| 860 | #ifdef CONFIG_IPV6_PRIVACY | ||
| 861 | score = ipv6_saddr_pref(ifp, idev->cnf.use_tempaddr > 1 ? IFA_F_TEMPORARY : 0); | ||
| 862 | #else | ||
| 863 | score = ipv6_saddr_pref(ifp, 0); | ||
| 864 | #endif | ||
| 865 | if (score <= hiscore) | ||
| 866 | continue; | ||
| 867 | 899 | ||
| 868 | if (match) | 900 | read_lock_bh(&idev->lock); | 
| 869 | in6_ifa_put(match); | 901 | for (ifa = idev->addr_list; ifa; ifa = ifa->if_next) { | 
| 870 | match = ifp; | 902 | struct ipv6_saddr_score score; | 
| 871 | hiscore = score; | ||
| 872 | in6_ifa_hold(ifp); | ||
| 873 | 903 | ||
| 874 | if (IPV6_GET_SADDR_MAXSCORE(score)) { | 904 | score.addr_type = __ipv6_addr_type(&ifa->addr); | 
| 875 | read_unlock_bh(&idev->lock); | 905 | |
| 876 | read_unlock(&addrconf_lock); | 906 | /* Rule 0: Candidate Source Address (section 4) | 
| 877 | goto out; | 907 | * - In any case, anycast addresses, multicast | 
| 878 | } | 908 | * addresses, and the unspecified address MUST | 
| 909 | * NOT be included in a candidate set. | ||
| 910 | */ | ||
| 911 | if (unlikely(score.addr_type == IPV6_ADDR_ANY || | ||
| 912 | score.addr_type & IPV6_ADDR_MULTICAST)) { | ||
| 913 | LIMIT_NETDEBUG(KERN_DEBUG | ||
| 914 | "ADDRCONF: unspecified / multicast address" | ||
| 915 | "assigned as unicast address on %s", | ||
| 916 | dev->name); | ||
| 917 | continue; | ||
| 918 | } | ||
| 919 | |||
| 920 | score.attrs = 0; | ||
| 921 | score.matchlen = 0; | ||
| 922 | score.scope = 0; | ||
| 923 | score.rule = 0; | ||
| 924 | |||
| 925 | if (ifa_result == NULL) { | ||
| 926 | /* record it if the first available entry */ | ||
| 927 | goto record_it; | ||
| 928 | } | ||
| 929 | |||
| 930 | /* Rule 1: Prefer same address */ | ||
| 931 | if (hiscore.rule < 1) { | ||
| 932 | if (ipv6_addr_equal(&ifa_result->addr, daddr)) | ||
| 933 | hiscore.attrs |= IPV6_SADDR_SCORE_LOCAL; | ||
| 934 | hiscore.rule++; | ||
| 935 | } | ||
| 936 | if (ipv6_addr_equal(&ifa->addr, daddr)) { | ||
| 937 | score.attrs |= IPV6_SADDR_SCORE_LOCAL; | ||
| 938 | if (!(hiscore.attrs & IPV6_SADDR_SCORE_LOCAL)) { | ||
| 939 | score.rule = 1; | ||
| 940 | goto record_it; | ||
| 879 | } | 941 | } | 
| 942 | } else { | ||
| 943 | if (hiscore.attrs & IPV6_SADDR_SCORE_LOCAL) | ||
| 944 | continue; | ||
| 880 | } | 945 | } | 
| 881 | read_unlock_bh(&idev->lock); | ||
| 882 | } | ||
| 883 | read_unlock(&addrconf_lock); | ||
| 884 | } | ||
| 885 | 946 | ||
| 886 | if (scope == IFA_LINK) | 947 | /* Rule 2: Prefer appropriate scope */ | 
| 887 | goto out; | 948 | if (hiscore.rule < 2) { | 
| 949 | hiscore.scope = __ipv6_addr_src_scope(hiscore.addr_type); | ||
| 950 | hiscore.rule++; | ||
| 951 | } | ||
| 952 | score.scope = __ipv6_addr_src_scope(score.addr_type); | ||
| 953 | if (hiscore.scope < score.scope) { | ||
| 954 | if (hiscore.scope < daddr_scope) { | ||
| 955 | score.rule = 2; | ||
| 956 | goto record_it; | ||
| 957 | } else | ||
| 958 | continue; | ||
| 959 | } else if (score.scope < hiscore.scope) { | ||
| 960 | if (score.scope < daddr_scope) | ||
| 961 | continue; | ||
| 962 | else { | ||
| 963 | score.rule = 2; | ||
| 964 | goto record_it; | ||
| 965 | } | ||
| 966 | } | ||
| 888 | 967 | ||
| 889 | /* | 968 | /* Rule 3: Avoid deprecated address */ | 
| 890 | * dev == NULL or search failed for specified dev | 969 | if (hiscore.rule < 3) { | 
| 891 | */ | 970 | if (ipv6_saddr_preferred(hiscore.addr_type) || | 
| 971 | !(ifa_result->flags & IFA_F_DEPRECATED)) | ||
| 972 | hiscore.attrs |= IPV6_SADDR_SCORE_PREFERRED; | ||
| 973 | hiscore.rule++; | ||
| 974 | } | ||
| 975 | if (ipv6_saddr_preferred(score.addr_type) || | ||
| 976 | !(ifa->flags & IFA_F_DEPRECATED)) { | ||
| 977 | score.attrs |= IPV6_SADDR_SCORE_PREFERRED; | ||
| 978 | if (!(hiscore.attrs & IPV6_SADDR_SCORE_PREFERRED)) { | ||
| 979 | score.rule = 3; | ||
| 980 | goto record_it; | ||
| 981 | } | ||
| 982 | } else { | ||
| 983 | if (hiscore.attrs & IPV6_SADDR_SCORE_PREFERRED) | ||
| 984 | continue; | ||
| 985 | } | ||
| 892 | 986 | ||
| 893 | read_lock(&dev_base_lock); | 987 | /* Rule 4: Prefer home address -- not implemented yet */ | 
| 894 | read_lock(&addrconf_lock); | ||
| 895 | for (dev = dev_base; dev; dev=dev->next) { | ||
| 896 | idev = __in6_dev_get(dev); | ||
| 897 | if (idev) { | ||
| 898 | read_lock_bh(&idev->lock); | ||
| 899 | for (ifp=idev->addr_list; ifp; ifp=ifp->if_next) { | ||
| 900 | if (ifp->scope == scope) { | ||
| 901 | if (ifp->flags&IFA_F_TENTATIVE) | ||
| 902 | continue; | ||
| 903 | #ifdef CONFIG_IPV6_PRIVACY | ||
| 904 | score = ipv6_saddr_pref(ifp, idev->cnf.use_tempaddr > 1 ? IFA_F_TEMPORARY : 0); | ||
| 905 | #else | ||
| 906 | score = ipv6_saddr_pref(ifp, 0); | ||
| 907 | #endif | ||
| 908 | if (score <= hiscore) | ||
| 909 | continue; | ||
| 910 | 988 | ||
| 911 | if (match) | 989 | /* Rule 5: Prefer outgoing interface */ | 
| 912 | in6_ifa_put(match); | 990 | if (hiscore.rule < 5) { | 
| 913 | match = ifp; | 991 | if (daddr_dev == NULL || | 
| 914 | hiscore = score; | 992 | daddr_dev == ifa_result->idev->dev) | 
| 915 | in6_ifa_hold(ifp); | 993 | hiscore.attrs |= IPV6_SADDR_SCORE_OIF; | 
| 994 | hiscore.rule++; | ||
| 995 | } | ||
| 996 | if (daddr_dev == NULL || | ||
| 997 | daddr_dev == ifa->idev->dev) { | ||
| 998 | score.attrs |= IPV6_SADDR_SCORE_OIF; | ||
| 999 | if (!(hiscore.attrs & IPV6_SADDR_SCORE_OIF)) { | ||
| 1000 | score.rule = 5; | ||
| 1001 | goto record_it; | ||
| 1002 | } | ||
| 1003 | } else { | ||
| 1004 | if (hiscore.attrs & IPV6_SADDR_SCORE_OIF) | ||
| 1005 | continue; | ||
| 1006 | } | ||
| 916 | 1007 | ||
| 917 | if (IPV6_GET_SADDR_MAXSCORE(score)) { | 1008 | /* Rule 6: Prefer matching label */ | 
| 918 | read_unlock_bh(&idev->lock); | 1009 | if (hiscore.rule < 6) { | 
| 919 | goto out_unlock_base; | 1010 | if (ipv6_saddr_label(&ifa_result->addr, hiscore.addr_type) == daddr_label) | 
| 920 | } | 1011 | hiscore.attrs |= IPV6_SADDR_SCORE_LABEL; | 
| 1012 | hiscore.rule++; | ||
| 1013 | } | ||
| 1014 | if (ipv6_saddr_label(&ifa->addr, score.addr_type) == daddr_label) { | ||
| 1015 | score.attrs |= IPV6_SADDR_SCORE_LABEL; | ||
| 1016 | if (!(hiscore.attrs & IPV6_SADDR_SCORE_LABEL)) { | ||
| 1017 | score.rule = 6; | ||
| 1018 | goto record_it; | ||
| 921 | } | 1019 | } | 
| 1020 | } else { | ||
| 1021 | if (hiscore.attrs & IPV6_SADDR_SCORE_LABEL) | ||
| 1022 | continue; | ||
| 922 | } | 1023 | } | 
| 923 | read_unlock_bh(&idev->lock); | 1024 | |
| 1025 | #ifdef CONFIG_IPV6_PRIVACY | ||
| 1026 | /* Rule 7: Prefer public address | ||
| 1027 | * Note: prefer temprary address if use_tempaddr >= 2 | ||
| 1028 | */ | ||
| 1029 | if (hiscore.rule < 7) { | ||
| 1030 | if ((!(ifa_result->flags & IFA_F_TEMPORARY)) ^ | ||
| 1031 | (ifa_result->idev->cnf.use_tempaddr >= 2)) | ||
| 1032 | hiscore.attrs |= IPV6_SADDR_SCORE_PRIVACY; | ||
| 1033 | hiscore.rule++; | ||
| 1034 | } | ||
| 1035 | if ((!(ifa->flags & IFA_F_TEMPORARY)) ^ | ||
| 1036 | (ifa->idev->cnf.use_tempaddr >= 2)) { | ||
| 1037 | score.attrs |= IPV6_SADDR_SCORE_PRIVACY; | ||
| 1038 | if (!(hiscore.attrs & IPV6_SADDR_SCORE_PRIVACY)) { | ||
| 1039 | score.rule = 7; | ||
| 1040 | goto record_it; | ||
| 1041 | } | ||
| 1042 | } else { | ||
| 1043 | if (hiscore.attrs & IPV6_SADDR_SCORE_PRIVACY) | ||
| 1044 | continue; | ||
| 1045 | } | ||
| 1046 | #endif | ||
| 1047 | /* Rule 8: Use longest matching prefix */ | ||
| 1048 | if (hiscore.rule < 8) | ||
| 1049 | hiscore.matchlen = ipv6_addr_diff(&ifa_result->addr, daddr); | ||
| 1050 | score.rule++; | ||
| 1051 | score.matchlen = ipv6_addr_diff(&ifa->addr, daddr); | ||
| 1052 | if (score.matchlen > hiscore.matchlen) { | ||
| 1053 | score.rule = 8; | ||
| 1054 | goto record_it; | ||
| 1055 | } | ||
| 1056 | #if 0 | ||
| 1057 | else if (score.matchlen < hiscore.matchlen) | ||
| 1058 | continue; | ||
| 1059 | #endif | ||
| 1060 | |||
| 1061 | /* Final Rule: choose first available one */ | ||
| 1062 | continue; | ||
| 1063 | record_it: | ||
| 1064 | if (ifa_result) | ||
| 1065 | in6_ifa_put(ifa_result); | ||
| 1066 | in6_ifa_hold(ifa); | ||
| 1067 | ifa_result = ifa; | ||
| 1068 | hiscore = score; | ||
| 924 | } | 1069 | } | 
| 1070 | read_unlock_bh(&idev->lock); | ||
| 925 | } | 1071 | } | 
| 926 | |||
| 927 | out_unlock_base: | ||
| 928 | read_unlock(&addrconf_lock); | 1072 | read_unlock(&addrconf_lock); | 
| 929 | read_unlock(&dev_base_lock); | 1073 | read_unlock(&dev_base_lock); | 
| 930 | 1074 | ||
| 931 | out: | 1075 | if (!ifa_result) | 
| 932 | err = -EADDRNOTAVAIL; | 1076 | return -EADDRNOTAVAIL; | 
| 933 | if (match) { | 1077 | |
| 934 | ipv6_addr_copy(saddr, &match->addr); | 1078 | ipv6_addr_copy(saddr, &ifa_result->addr); | 
| 935 | err = 0; | 1079 | in6_ifa_put(ifa_result); | 
| 936 | in6_ifa_put(match); | 1080 | return 0; | 
| 937 | } | ||
| 938 | |||
| 939 | return err; | ||
| 940 | } | 1081 | } | 
| 941 | 1082 | ||
| 942 | 1083 | ||
| @@ -2950,8 +3091,7 @@ static int inet6_fill_ifinfo(struct sk_buff *skb, struct inet6_dev *idev, | |||
| 2950 | 3091 | ||
| 2951 | nlmsg_failure: | 3092 | nlmsg_failure: | 
| 2952 | rtattr_failure: | 3093 | rtattr_failure: | 
| 2953 | if (array) | 3094 | kfree(array); | 
| 2954 | kfree(array); | ||
| 2955 | skb_trim(skb, b - skb->data); | 3095 | skb_trim(skb, b - skb->data); | 
| 2956 | return -1; | 3096 | return -1; | 
| 2957 | } | 3097 | } | 
diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c index 4fcc5a7acf6e..1bf6d9a769e6 100644 --- a/net/ipv6/ip6_fib.c +++ b/net/ipv6/ip6_fib.c  | |||
| @@ -127,56 +127,6 @@ static __inline__ int addr_bit_set(void *token, int fn_bit) | |||
| 127 | return htonl(1 << ((~fn_bit)&0x1F)) & addr[fn_bit>>5]; | 127 | return htonl(1 << ((~fn_bit)&0x1F)) & addr[fn_bit>>5]; | 
| 128 | } | 128 | } | 
| 129 | 129 | ||
| 130 | /* | ||
| 131 | * find the first different bit between two addresses | ||
| 132 | * length of address must be a multiple of 32bits | ||
| 133 | */ | ||
| 134 | |||
| 135 | static __inline__ int addr_diff(void *token1, void *token2, int addrlen) | ||
| 136 | { | ||
| 137 | __u32 *a1 = token1; | ||
| 138 | __u32 *a2 = token2; | ||
| 139 | int i; | ||
| 140 | |||
| 141 | addrlen >>= 2; | ||
| 142 | |||
| 143 | for (i = 0; i < addrlen; i++) { | ||
| 144 | __u32 xb; | ||
| 145 | |||
| 146 | xb = a1[i] ^ a2[i]; | ||
| 147 | |||
| 148 | if (xb) { | ||
| 149 | int j = 31; | ||
| 150 | |||
| 151 | xb = ntohl(xb); | ||
| 152 | |||
| 153 | while ((xb & (1 << j)) == 0) | ||
| 154 | j--; | ||
| 155 | |||
| 156 | return (i * 32 + 31 - j); | ||
| 157 | } | ||
| 158 | } | ||
| 159 | |||
| 160 | /* | ||
| 161 | * we should *never* get to this point since that | ||
| 162 | * would mean the addrs are equal | ||
| 163 | * | ||
| 164 | * However, we do get to it 8) And exacly, when | ||
| 165 | * addresses are equal 8) | ||
| 166 | * | ||
| 167 | * ip route add 1111::/128 via ... | ||
| 168 | * ip route add 1111::/64 via ... | ||
| 169 | * and we are here. | ||
| 170 | * | ||
| 171 | * Ideally, this function should stop comparison | ||
| 172 | * at prefix length. It does not, but it is still OK, | ||
| 173 | * if returned value is greater than prefix length. | ||
| 174 | * --ANK (980803) | ||
| 175 | */ | ||
| 176 | |||
| 177 | return addrlen<<5; | ||
| 178 | } | ||
| 179 | |||
| 180 | static __inline__ struct fib6_node * node_alloc(void) | 130 | static __inline__ struct fib6_node * node_alloc(void) | 
| 181 | { | 131 | { | 
| 182 | struct fib6_node *fn; | 132 | struct fib6_node *fn; | 
| @@ -296,11 +246,11 @@ insert_above: | |||
| 296 | 246 | ||
| 297 | /* find 1st bit in difference between the 2 addrs. | 247 | /* find 1st bit in difference between the 2 addrs. | 
| 298 | 248 | ||
| 299 | See comment in addr_diff: bit may be an invalid value, | 249 | See comment in __ipv6_addr_diff: bit may be an invalid value, | 
| 300 | but if it is >= plen, the value is ignored in any case. | 250 | but if it is >= plen, the value is ignored in any case. | 
| 301 | */ | 251 | */ | 
| 302 | 252 | ||
| 303 | bit = addr_diff(addr, &key->addr, addrlen); | 253 | bit = __ipv6_addr_diff(addr, &key->addr, addrlen); | 
| 304 | 254 | ||
| 305 | /* | 255 | /* | 
| 306 | * (intermediate)[in] | 256 | * (intermediate)[in] | 
diff --git a/net/ipv6/ip6_input.c b/net/ipv6/ip6_input.c index 6e3480426939..a6026d2787d2 100644 --- a/net/ipv6/ip6_input.c +++ b/net/ipv6/ip6_input.c  | |||
| @@ -176,6 +176,11 @@ resubmit: | |||
| 176 | if (ipprot->flags & INET6_PROTO_FINAL) { | 176 | if (ipprot->flags & INET6_PROTO_FINAL) { | 
| 177 | struct ipv6hdr *hdr; | 177 | struct ipv6hdr *hdr; | 
| 178 | 178 | ||
| 179 | /* Free reference early: we don't need it any more, | ||
| 180 | and it may hold ip_conntrack module loaded | ||
| 181 | indefinitely. */ | ||
| 182 | nf_reset(skb); | ||
| 183 | |||
| 179 | skb_postpull_rcsum(skb, skb->nh.raw, | 184 | skb_postpull_rcsum(skb, skb->nh.raw, | 
| 180 | skb->h.raw - skb->nh.raw); | 185 | skb->h.raw - skb->nh.raw); | 
| 181 | hdr = skb->nh.ipv6h; | 186 | hdr = skb->nh.ipv6h; | 
diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index 614296a920c6..c1fa693511a1 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c  | |||
| @@ -441,9 +441,15 @@ static void ip6_copy_metadata(struct sk_buff *to, struct sk_buff *from) | |||
| 441 | #ifdef CONFIG_NETFILTER | 441 | #ifdef CONFIG_NETFILTER | 
| 442 | to->nfmark = from->nfmark; | 442 | to->nfmark = from->nfmark; | 
| 443 | /* Connection association is same as pre-frag packet */ | 443 | /* Connection association is same as pre-frag packet */ | 
| 444 | nf_conntrack_put(to->nfct); | ||
| 444 | to->nfct = from->nfct; | 445 | to->nfct = from->nfct; | 
| 445 | nf_conntrack_get(to->nfct); | 446 | nf_conntrack_get(to->nfct); | 
| 446 | to->nfctinfo = from->nfctinfo; | 447 | to->nfctinfo = from->nfctinfo; | 
| 448 | #if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE) | ||
| 449 | nf_conntrack_put_reasm(to->nfct_reasm); | ||
| 450 | to->nfct_reasm = from->nfct_reasm; | ||
| 451 | nf_conntrack_get_reasm(to->nfct_reasm); | ||
| 452 | #endif | ||
| 447 | #ifdef CONFIG_BRIDGE_NETFILTER | 453 | #ifdef CONFIG_BRIDGE_NETFILTER | 
| 448 | nf_bridge_put(to->nf_bridge); | 454 | nf_bridge_put(to->nf_bridge); | 
| 449 | to->nf_bridge = from->nf_bridge; | 455 | to->nf_bridge = from->nf_bridge; | 
| @@ -587,8 +593,7 @@ static int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *)) | |||
| 587 | skb->next = NULL; | 593 | skb->next = NULL; | 
| 588 | } | 594 | } | 
| 589 | 595 | ||
| 590 | if (tmp_hdr) | 596 | kfree(tmp_hdr); | 
| 591 | kfree(tmp_hdr); | ||
| 592 | 597 | ||
| 593 | if (err == 0) { | 598 | if (err == 0) { | 
| 594 | IP6_INC_STATS(IPSTATS_MIB_FRAGOKS); | 599 | IP6_INC_STATS(IPSTATS_MIB_FRAGOKS); | 
| @@ -1186,10 +1191,8 @@ int ip6_push_pending_frames(struct sock *sk) | |||
| 1186 | 1191 | ||
| 1187 | out: | 1192 | out: | 
| 1188 | inet->cork.flags &= ~IPCORK_OPT; | 1193 | inet->cork.flags &= ~IPCORK_OPT; | 
| 1189 | if (np->cork.opt) { | 1194 | kfree(np->cork.opt); | 
| 1190 | kfree(np->cork.opt); | 1195 | np->cork.opt = NULL; | 
| 1191 | np->cork.opt = NULL; | ||
| 1192 | } | ||
| 1193 | if (np->cork.rt) { | 1196 | if (np->cork.rt) { | 
| 1194 | dst_release(&np->cork.rt->u.dst); | 1197 | dst_release(&np->cork.rt->u.dst); | 
| 1195 | np->cork.rt = NULL; | 1198 | np->cork.rt = NULL; | 
| @@ -1214,10 +1217,8 @@ void ip6_flush_pending_frames(struct sock *sk) | |||
| 1214 | 1217 | ||
| 1215 | inet->cork.flags &= ~IPCORK_OPT; | 1218 | inet->cork.flags &= ~IPCORK_OPT; | 
| 1216 | 1219 | ||
| 1217 | if (np->cork.opt) { | 1220 | kfree(np->cork.opt); | 
| 1218 | kfree(np->cork.opt); | 1221 | np->cork.opt = NULL; | 
| 1219 | np->cork.opt = NULL; | ||
| 1220 | } | ||
| 1221 | if (np->cork.rt) { | 1222 | if (np->cork.rt) { | 
| 1222 | dst_release(&np->cork.rt->u.dst); | 1223 | dst_release(&np->cork.rt->u.dst); | 
| 1223 | np->cork.rt = NULL; | 1224 | np->cork.rt = NULL; | 
diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c index cf94372d1af3..e315d0f80af1 100644 --- a/net/ipv6/ip6_tunnel.c +++ b/net/ipv6/ip6_tunnel.c  | |||
| @@ -525,6 +525,7 @@ ip6ip6_rcv(struct sk_buff **pskb, unsigned int *nhoffp) | |||
| 525 | 525 | ||
| 526 | if ((t = ip6ip6_tnl_lookup(&ipv6h->saddr, &ipv6h->daddr)) != NULL) { | 526 | if ((t = ip6ip6_tnl_lookup(&ipv6h->saddr, &ipv6h->daddr)) != NULL) { | 
| 527 | if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb)) { | 527 | if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb)) { | 
| 528 | read_unlock(&ip6ip6_lock); | ||
| 528 | kfree_skb(skb); | 529 | kfree_skb(skb); | 
| 529 | return 0; | 530 | return 0; | 
| 530 | } | 531 | } | 
| @@ -756,8 +757,7 @@ ip6ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev) | |||
| 756 | } | 757 | } | 
| 757 | ip6_tnl_dst_store(t, dst); | 758 | ip6_tnl_dst_store(t, dst); | 
| 758 | 759 | ||
| 759 | if (opt) | 760 | kfree(opt); | 
| 760 | kfree(opt); | ||
| 761 | 761 | ||
| 762 | t->recursion--; | 762 | t->recursion--; | 
| 763 | return 0; | 763 | return 0; | 
| @@ -766,8 +766,7 @@ tx_err_link_failure: | |||
| 766 | dst_link_failure(skb); | 766 | dst_link_failure(skb); | 
| 767 | tx_err_dst_release: | 767 | tx_err_dst_release: | 
| 768 | dst_release(dst); | 768 | dst_release(dst); | 
| 769 | if (opt) | 769 | kfree(opt); | 
| 770 | kfree(opt); | ||
| 771 | tx_err: | 770 | tx_err: | 
| 772 | stats->tx_errors++; | 771 | stats->tx_errors++; | 
| 773 | stats->tx_dropped++; | 772 | stats->tx_dropped++; | 
diff --git a/net/ipv6/ipcomp6.c b/net/ipv6/ipcomp6.c index 85bfbc69b2c3..55917fb17094 100644 --- a/net/ipv6/ipcomp6.c +++ b/net/ipv6/ipcomp6.c  | |||
| @@ -130,8 +130,7 @@ static int ipcomp6_input(struct xfrm_state *x, struct xfrm_decap_state *decap, s | |||
| 130 | out_put_cpu: | 130 | out_put_cpu: | 
| 131 | put_cpu(); | 131 | put_cpu(); | 
| 132 | out: | 132 | out: | 
| 133 | if (tmp_hdr) | 133 | kfree(tmp_hdr); | 
| 134 | kfree(tmp_hdr); | ||
| 135 | if (err) | 134 | if (err) | 
| 136 | goto error_out; | 135 | goto error_out; | 
| 137 | return nexthdr; | 136 | return nexthdr; | 
diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c index 8567873d0dd8..003fd99ff597 100644 --- a/net/ipv6/ipv6_sockglue.c +++ b/net/ipv6/ipv6_sockglue.c  | |||
| @@ -80,8 +80,7 @@ int ip6_ra_control(struct sock *sk, int sel, void (*destructor)(struct sock *)) | |||
| 80 | if (ra->sk == sk) { | 80 | if (ra->sk == sk) { | 
| 81 | if (sel>=0) { | 81 | if (sel>=0) { | 
| 82 | write_unlock_bh(&ip6_ra_lock); | 82 | write_unlock_bh(&ip6_ra_lock); | 
| 83 | if (new_ra) | 83 | kfree(new_ra); | 
| 84 | kfree(new_ra); | ||
| 85 | return -EADDRINUSE; | 84 | return -EADDRINUSE; | 
| 86 | } | 85 | } | 
| 87 | 86 | ||
diff --git a/net/ipv6/ipv6_syms.c b/net/ipv6/ipv6_syms.c index 37a4a99c9fe9..16482785bdfd 100644 --- a/net/ipv6/ipv6_syms.c +++ b/net/ipv6/ipv6_syms.c  | |||
| @@ -7,7 +7,7 @@ | |||
| 7 | #include <net/ip6_route.h> | 7 | #include <net/ip6_route.h> | 
| 8 | #include <net/xfrm.h> | 8 | #include <net/xfrm.h> | 
| 9 | 9 | ||
| 10 | EXPORT_SYMBOL(ipv6_addr_type); | 10 | EXPORT_SYMBOL(__ipv6_addr_type); | 
| 11 | EXPORT_SYMBOL(icmpv6_send); | 11 | EXPORT_SYMBOL(icmpv6_send); | 
| 12 | EXPORT_SYMBOL(icmpv6_statistics); | 12 | EXPORT_SYMBOL(icmpv6_statistics); | 
| 13 | EXPORT_SYMBOL(icmpv6_err_convert); | 13 | EXPORT_SYMBOL(icmpv6_err_convert); | 
diff --git a/net/ipv6/netfilter/Kconfig b/net/ipv6/netfilter/Kconfig index bb7ccfe33f23..971ba60bf6e9 100644 --- a/net/ipv6/netfilter/Kconfig +++ b/net/ipv6/netfilter/Kconfig  | |||
| @@ -278,5 +278,19 @@ config IP6_NF_RAW | |||
| 278 | If you want to compile it as a module, say M here and read | 278 | If you want to compile it as a module, say M here and read | 
| 279 | <file:Documentation/modules.txt>. If unsure, say `N'. | 279 | <file:Documentation/modules.txt>. If unsure, say `N'. | 
| 280 | 280 | ||
| 281 | config NF_CONNTRACK_IPV6 | ||
| 282 | tristate "IPv6 support for new connection tracking (EXPERIMENTAL)" | ||
| 283 | depends on EXPERIMENTAL && NF_CONNTRACK | ||
| 284 | ---help--- | ||
| 285 | Connection tracking keeps a record of what packets have passed | ||
| 286 | through your machine, in order to figure out how they are related | ||
| 287 | into connections. | ||
| 288 | |||
| 289 | This is IPv6 support on Layer 3 independent connection tracking. | ||
| 290 | Layer 3 independent connection tracking is experimental scheme | ||
| 291 | which generalize ip_conntrack to support other layer 3 protocols. | ||
| 292 | |||
| 293 | To compile it as a module, choose M here. If unsure, say N. | ||
| 294 | |||
| 281 | endmenu | 295 | endmenu | 
| 282 | 296 | ||
diff --git a/net/ipv6/netfilter/Makefile b/net/ipv6/netfilter/Makefile index 2b2c370e8b1c..9ab5b2ca1f59 100644 --- a/net/ipv6/netfilter/Makefile +++ b/net/ipv6/netfilter/Makefile  | |||
| @@ -27,3 +27,9 @@ obj-$(CONFIG_IP6_NF_TARGET_LOG) += ip6t_LOG.o | |||
| 27 | obj-$(CONFIG_IP6_NF_RAW) += ip6table_raw.o | 27 | obj-$(CONFIG_IP6_NF_RAW) += ip6table_raw.o | 
| 28 | obj-$(CONFIG_IP6_NF_MATCH_HL) += ip6t_hl.o | 28 | obj-$(CONFIG_IP6_NF_MATCH_HL) += ip6t_hl.o | 
| 29 | obj-$(CONFIG_IP6_NF_TARGET_REJECT) += ip6t_REJECT.o | 29 | obj-$(CONFIG_IP6_NF_TARGET_REJECT) += ip6t_REJECT.o | 
| 30 | |||
| 31 | # objects for l3 independent conntrack | ||
| 32 | nf_conntrack_ipv6-objs := nf_conntrack_l3proto_ipv6.o nf_conntrack_proto_icmpv6.o nf_conntrack_reasm.o | ||
| 33 | |||
| 34 | # l3 independent conntrack | ||
| 35 | obj-$(CONFIG_NF_CONNTRACK_IPV6) += nf_conntrack_ipv6.o | ||
diff --git a/net/ipv6/netfilter/ip6t_MARK.c b/net/ipv6/netfilter/ip6t_MARK.c index 0c7584f92172..eab8fb864ee0 100644 --- a/net/ipv6/netfilter/ip6t_MARK.c +++ b/net/ipv6/netfilter/ip6t_MARK.c  | |||
| @@ -56,9 +56,9 @@ checkentry(const char *tablename, | |||
| 56 | return 1; | 56 | return 1; | 
| 57 | } | 57 | } | 
| 58 | 58 | ||
| 59 | static struct ip6t_target ip6t_mark_reg = { | 59 | static struct ip6t_target ip6t_mark_reg = { | 
| 60 | .name = "MARK", | 60 | .name = "MARK", | 
| 61 | .target = target, | 61 | .target = target, | 
| 62 | .checkentry = checkentry, | 62 | .checkentry = checkentry, | 
| 63 | .me = THIS_MODULE | 63 | .me = THIS_MODULE | 
| 64 | }; | 64 | }; | 
diff --git a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c new file mode 100644 index 000000000000..e2c90b3a8074 --- /dev/null +++ b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c  | |||
| @@ -0,0 +1,556 @@ | |||
| 1 | /* | ||
| 2 | * Copyright (C)2004 USAGI/WIDE Project | ||
| 3 | * | ||
| 4 | * This program is free software; you can redistribute it and/or modify | ||
| 5 | * it under the terms of the GNU General Public License version 2 as | ||
| 6 | * published by the Free Software Foundation. | ||
| 7 | * | ||
| 8 | * Author: | ||
| 9 | * Yasuyuki Kozakai @USAGI <yasuyuki.kozakai@toshiba.co.jp> | ||
| 10 | * | ||
| 11 | * 16 Dec 2003: Yasuyuki Kozakai @USAGI <yasuyuki.kozakai@toshiba.co.jp> | ||
| 12 | * - support Layer 3 protocol independent connection tracking. | ||
| 13 | * Based on the original ip_conntrack code which had the following | ||
| 14 | * copyright information: | ||
| 15 | * (C) 1999-2001 Paul `Rusty' Russell | ||
| 16 | * (C) 2002-2004 Netfilter Core Team <coreteam@netfilter.org> | ||
| 17 | * | ||
| 18 | * 23 Mar 2004: Yasuyuki Kozakai @USAGI <yasuyuki.kozakai@toshiba.co.jp> | ||
| 19 | * - add get_features() to support various size of conntrack | ||
| 20 | * structures. | ||
| 21 | */ | ||
| 22 | |||
| 23 | #include <linux/config.h> | ||
| 24 | #include <linux/types.h> | ||
| 25 | #include <linux/ipv6.h> | ||
| 26 | #include <linux/in6.h> | ||
| 27 | #include <linux/netfilter.h> | ||
| 28 | #include <linux/module.h> | ||
| 29 | #include <linux/skbuff.h> | ||
| 30 | #include <linux/icmp.h> | ||
| 31 | #include <linux/sysctl.h> | ||
| 32 | #include <net/ipv6.h> | ||
| 33 | |||
| 34 | #include <linux/netfilter_ipv6.h> | ||
| 35 | #include <net/netfilter/nf_conntrack.h> | ||
| 36 | #include <net/netfilter/nf_conntrack_helper.h> | ||
| 37 | #include <net/netfilter/nf_conntrack_protocol.h> | ||
| 38 | #include <net/netfilter/nf_conntrack_l3proto.h> | ||
| 39 | #include <net/netfilter/nf_conntrack_core.h> | ||
| 40 | |||
| 41 | #if 0 | ||
| 42 | #define DEBUGP printk | ||
| 43 | #else | ||
| 44 | #define DEBUGP(format, args...) | ||
| 45 | #endif | ||
| 46 | |||
| 47 | DECLARE_PER_CPU(struct ip_conntrack_stat, nf_conntrack_stat); | ||
| 48 | |||
| 49 | static int ipv6_pkt_to_tuple(const struct sk_buff *skb, unsigned int nhoff, | ||
| 50 | struct nf_conntrack_tuple *tuple) | ||
| 51 | { | ||
| 52 | u_int32_t _addrs[8], *ap; | ||
| 53 | |||
| 54 | ap = skb_header_pointer(skb, nhoff + offsetof(struct ipv6hdr, saddr), | ||
| 55 | sizeof(_addrs), _addrs); | ||
| 56 | if (ap == NULL) | ||
| 57 | return 0; | ||
| 58 | |||
| 59 | memcpy(tuple->src.u3.ip6, ap, sizeof(tuple->src.u3.ip6)); | ||
| 60 | memcpy(tuple->dst.u3.ip6, ap + 4, sizeof(tuple->dst.u3.ip6)); | ||
| 61 | |||
| 62 | return 1; | ||
| 63 | } | ||
| 64 | |||
| 65 | static int ipv6_invert_tuple(struct nf_conntrack_tuple *tuple, | ||
| 66 | const struct nf_conntrack_tuple *orig) | ||
| 67 | { | ||
| 68 | memcpy(tuple->src.u3.ip6, orig->dst.u3.ip6, sizeof(tuple->src.u3.ip6)); | ||
| 69 | memcpy(tuple->dst.u3.ip6, orig->src.u3.ip6, sizeof(tuple->dst.u3.ip6)); | ||
| 70 | |||
| 71 | return 1; | ||
| 72 | } | ||
| 73 | |||
| 74 | static int ipv6_print_tuple(struct seq_file *s, | ||
| 75 | const struct nf_conntrack_tuple *tuple) | ||
| 76 | { | ||
| 77 | return seq_printf(s, "src=%x:%x:%x:%x:%x:%x:%x:%x dst=%x:%x:%x:%x:%x:%x:%x:%x ", | ||
| 78 | NIP6(*((struct in6_addr *)tuple->src.u3.ip6)), | ||
| 79 | NIP6(*((struct in6_addr *)tuple->dst.u3.ip6))); | ||
| 80 | } | ||
| 81 | |||
| 82 | static int ipv6_print_conntrack(struct seq_file *s, | ||
| 83 | const struct nf_conn *conntrack) | ||
| 84 | { | ||
| 85 | return 0; | ||
| 86 | } | ||
| 87 | |||
| 88 | /* | ||
| 89 | * Based on ipv6_skip_exthdr() in net/ipv6/exthdr.c | ||
| 90 | * | ||
| 91 | * This function parses (probably truncated) exthdr set "hdr" | ||
| 92 | * of length "len". "nexthdrp" initially points to some place, | ||
| 93 | * where type of the first header can be found. | ||
| 94 | * | ||
| 95 | * It skips all well-known exthdrs, and returns pointer to the start | ||
| 96 | * of unparsable area i.e. the first header with unknown type. | ||
| 97 | * if success, *nexthdr is updated by type/protocol of this header. | ||
| 98 | * | ||
| 99 | * NOTES: - it may return pointer pointing beyond end of packet, | ||
| 100 | * if the last recognized header is truncated in the middle. | ||
| 101 | * - if packet is truncated, so that all parsed headers are skipped, | ||
| 102 | * it returns -1. | ||
| 103 | * - if packet is fragmented, return pointer of the fragment header. | ||
| 104 | * - ESP is unparsable for now and considered like | ||
| 105 | * normal payload protocol. | ||
| 106 | * - Note also special handling of AUTH header. Thanks to IPsec wizards. | ||
| 107 | */ | ||
| 108 | |||
| 109 | int nf_ct_ipv6_skip_exthdr(struct sk_buff *skb, int start, u8 *nexthdrp, | ||
| 110 | int len) | ||
| 111 | { | ||
| 112 | u8 nexthdr = *nexthdrp; | ||
| 113 | |||
| 114 | while (ipv6_ext_hdr(nexthdr)) { | ||
| 115 | struct ipv6_opt_hdr hdr; | ||
| 116 | int hdrlen; | ||
| 117 | |||
| 118 | if (len < (int)sizeof(struct ipv6_opt_hdr)) | ||
| 119 | return -1; | ||
| 120 | if (nexthdr == NEXTHDR_NONE) | ||
| 121 | break; | ||
| 122 | if (nexthdr == NEXTHDR_FRAGMENT) | ||
| 123 | break; | ||
| 124 | if (skb_copy_bits(skb, start, &hdr, sizeof(hdr))) | ||
| 125 | BUG(); | ||
| 126 | if (nexthdr == NEXTHDR_AUTH) | ||
| 127 | hdrlen = (hdr.hdrlen+2)<<2; | ||
| 128 | else | ||
| 129 | hdrlen = ipv6_optlen(&hdr); | ||
| 130 | |||
| 131 | nexthdr = hdr.nexthdr; | ||
| 132 | len -= hdrlen; | ||
| 133 | start += hdrlen; | ||
| 134 | } | ||
| 135 | |||
| 136 | *nexthdrp = nexthdr; | ||
| 137 | return start; | ||
| 138 | } | ||
| 139 | |||
| 140 | static int | ||
| 141 | ipv6_prepare(struct sk_buff **pskb, unsigned int hooknum, unsigned int *dataoff, | ||
| 142 | u_int8_t *protonum) | ||
| 143 | { | ||
| 144 | unsigned int extoff; | ||
| 145 | unsigned char pnum; | ||
| 146 | int protoff; | ||
| 147 | |||
| 148 | extoff = (u8*)((*pskb)->nh.ipv6h + 1) - (*pskb)->data; | ||
| 149 | pnum = (*pskb)->nh.ipv6h->nexthdr; | ||
| 150 | |||
| 151 | protoff = nf_ct_ipv6_skip_exthdr(*pskb, extoff, &pnum, | ||
| 152 | (*pskb)->len - extoff); | ||
| 153 | |||
| 154 | /* | ||
| 155 | * (protoff == (*pskb)->len) mean that the packet doesn't have no data | ||
| 156 | * except of IPv6 & ext headers. but it's tracked anyway. - YK | ||
| 157 | */ | ||
| 158 | if ((protoff < 0) || (protoff > (*pskb)->len)) { | ||
| 159 | DEBUGP("ip6_conntrack_core: can't find proto in pkt\n"); | ||
| 160 | NF_CT_STAT_INC(error); | ||
| 161 | NF_CT_STAT_INC(invalid); | ||
| 162 | return -NF_ACCEPT; | ||
| 163 | } | ||
| 164 | |||
| 165 | *dataoff = protoff; | ||
| 166 | *protonum = pnum; | ||
| 167 | return NF_ACCEPT; | ||
| 168 | } | ||
| 169 | |||
| 170 | static u_int32_t ipv6_get_features(const struct nf_conntrack_tuple *tuple) | ||
| 171 | { | ||
| 172 | return NF_CT_F_BASIC; | ||
| 173 | } | ||
| 174 | |||
| 175 | static unsigned int ipv6_confirm(unsigned int hooknum, | ||
| 176 | struct sk_buff **pskb, | ||
| 177 | const struct net_device *in, | ||
| 178 | const struct net_device *out, | ||
| 179 | int (*okfn)(struct sk_buff *)) | ||
| 180 | { | ||
| 181 | struct nf_conn *ct; | ||
| 182 | enum ip_conntrack_info ctinfo; | ||
| 183 | |||
| 184 | /* This is where we call the helper: as the packet goes out. */ | ||
| 185 | ct = nf_ct_get(*pskb, &ctinfo); | ||
| 186 | if (ct && ct->helper) { | ||
| 187 | unsigned int ret, protoff; | ||
| 188 | unsigned int extoff = (u8*)((*pskb)->nh.ipv6h + 1) | ||
| 189 | - (*pskb)->data; | ||
| 190 | unsigned char pnum = (*pskb)->nh.ipv6h->nexthdr; | ||
| 191 | |||
| 192 | protoff = nf_ct_ipv6_skip_exthdr(*pskb, extoff, &pnum, | ||
| 193 | (*pskb)->len - extoff); | ||
| 194 | if (protoff < 0 || protoff > (*pskb)->len || | ||
| 195 | pnum == NEXTHDR_FRAGMENT) { | ||
| 196 | DEBUGP("proto header not found\n"); | ||
| 197 | return NF_ACCEPT; | ||
| 198 | } | ||
| 199 | |||
| 200 | ret = ct->helper->help(pskb, protoff, ct, ctinfo); | ||
| 201 | if (ret != NF_ACCEPT) | ||
| 202 | return ret; | ||
| 203 | } | ||
| 204 | |||
| 205 | /* We've seen it coming out the other side: confirm it */ | ||
| 206 | |||
| 207 | return nf_conntrack_confirm(pskb); | ||
| 208 | } | ||
| 209 | |||
| 210 | extern struct sk_buff *nf_ct_frag6_gather(struct sk_buff *skb); | ||
| 211 | extern void nf_ct_frag6_output(unsigned int hooknum, struct sk_buff *skb, | ||
| 212 | struct net_device *in, | ||
| 213 | struct net_device *out, | ||
| 214 | int (*okfn)(struct sk_buff *)); | ||
| 215 | static unsigned int ipv6_defrag(unsigned int hooknum, | ||
| 216 | struct sk_buff **pskb, | ||
| 217 | const struct net_device *in, | ||
| 218 | const struct net_device *out, | ||
| 219 | int (*okfn)(struct sk_buff *)) | ||
| 220 | { | ||
| 221 | struct sk_buff *reasm; | ||
| 222 | |||
| 223 | /* Previously seen (loopback)? */ | ||
| 224 | if ((*pskb)->nfct) | ||
| 225 | return NF_ACCEPT; | ||
| 226 | |||
| 227 | reasm = nf_ct_frag6_gather(*pskb); | ||
| 228 | |||
| 229 | /* queued */ | ||
| 230 | if (reasm == NULL) | ||
| 231 | return NF_STOLEN; | ||
| 232 | |||
| 233 | /* error occured or not fragmented */ | ||
| 234 | if (reasm == *pskb) | ||
| 235 | return NF_ACCEPT; | ||
| 236 | |||
| 237 | nf_ct_frag6_output(hooknum, reasm, (struct net_device *)in, | ||
| 238 | (struct net_device *)out, okfn); | ||
| 239 | |||
| 240 | return NF_STOLEN; | ||
| 241 | } | ||
| 242 | |||
| 243 | static unsigned int ipv6_conntrack_in(unsigned int hooknum, | ||
| 244 | struct sk_buff **pskb, | ||
| 245 | const struct net_device *in, | ||
| 246 | const struct net_device *out, | ||
| 247 | int (*okfn)(struct sk_buff *)) | ||
| 248 | { | ||
| 249 | struct sk_buff *reasm = (*pskb)->nfct_reasm; | ||
| 250 | |||
| 251 | /* This packet is fragmented and has reassembled packet. */ | ||
| 252 | if (reasm) { | ||
| 253 | /* Reassembled packet isn't parsed yet ? */ | ||
| 254 | if (!reasm->nfct) { | ||
| 255 | unsigned int ret; | ||
| 256 | |||
| 257 | ret = nf_conntrack_in(PF_INET6, hooknum, &reasm); | ||
| 258 | if (ret != NF_ACCEPT) | ||
| 259 | return ret; | ||
| 260 | } | ||
| 261 | nf_conntrack_get(reasm->nfct); | ||
| 262 | (*pskb)->nfct = reasm->nfct; | ||
| 263 | return NF_ACCEPT; | ||
| 264 | } | ||
| 265 | |||
| 266 | return nf_conntrack_in(PF_INET6, hooknum, pskb); | ||
| 267 | } | ||
| 268 | |||
| 269 | static unsigned int ipv6_conntrack_local(unsigned int hooknum, | ||
| 270 | struct sk_buff **pskb, | ||
| 271 | const struct net_device *in, | ||
| 272 | const struct net_device *out, | ||
| 273 | int (*okfn)(struct sk_buff *)) | ||
| 274 | { | ||
| 275 | /* root is playing with raw sockets. */ | ||
| 276 | if ((*pskb)->len < sizeof(struct ipv6hdr)) { | ||
| 277 | if (net_ratelimit()) | ||
| 278 | printk("ipv6_conntrack_local: packet too short\n"); | ||
| 279 | return NF_ACCEPT; | ||
| 280 | } | ||
| 281 | return ipv6_conntrack_in(hooknum, pskb, in, out, okfn); | ||
| 282 | } | ||
| 283 | |||
| 284 | /* Connection tracking may drop packets, but never alters them, so | ||
| 285 | make it the first hook. */ | ||
| 286 | static struct nf_hook_ops ipv6_conntrack_defrag_ops = { | ||
| 287 | .hook = ipv6_defrag, | ||
| 288 | .owner = THIS_MODULE, | ||
| 289 | .pf = PF_INET6, | ||
| 290 | .hooknum = NF_IP6_PRE_ROUTING, | ||
| 291 | .priority = NF_IP6_PRI_CONNTRACK_DEFRAG, | ||
| 292 | }; | ||
| 293 | |||
| 294 | static struct nf_hook_ops ipv6_conntrack_in_ops = { | ||
| 295 | .hook = ipv6_conntrack_in, | ||
| 296 | .owner = THIS_MODULE, | ||
| 297 | .pf = PF_INET6, | ||
| 298 | .hooknum = NF_IP6_PRE_ROUTING, | ||
| 299 | .priority = NF_IP6_PRI_CONNTRACK, | ||
| 300 | }; | ||
| 301 | |||
| 302 | static struct nf_hook_ops ipv6_conntrack_local_out_ops = { | ||
| 303 | .hook = ipv6_conntrack_local, | ||
| 304 | .owner = THIS_MODULE, | ||
| 305 | .pf = PF_INET6, | ||
| 306 | .hooknum = NF_IP6_LOCAL_OUT, | ||
| 307 | .priority = NF_IP6_PRI_CONNTRACK, | ||
| 308 | }; | ||
| 309 | |||
| 310 | static struct nf_hook_ops ipv6_conntrack_defrag_local_out_ops = { | ||
| 311 | .hook = ipv6_defrag, | ||
| 312 | .owner = THIS_MODULE, | ||
| 313 | .pf = PF_INET6, | ||
| 314 | .hooknum = NF_IP6_LOCAL_OUT, | ||
| 315 | .priority = NF_IP6_PRI_CONNTRACK_DEFRAG, | ||
| 316 | }; | ||
| 317 | |||
| 318 | /* Refragmenter; last chance. */ | ||
| 319 | static struct nf_hook_ops ipv6_conntrack_out_ops = { | ||
| 320 | .hook = ipv6_confirm, | ||
| 321 | .owner = THIS_MODULE, | ||
| 322 | .pf = PF_INET6, | ||
| 323 | .hooknum = NF_IP6_POST_ROUTING, | ||
| 324 | .priority = NF_IP6_PRI_LAST, | ||
| 325 | }; | ||
| 326 | |||
| 327 | static struct nf_hook_ops ipv6_conntrack_local_in_ops = { | ||
| 328 | .hook = ipv6_confirm, | ||
| 329 | .owner = THIS_MODULE, | ||
| 330 | .pf = PF_INET6, | ||
| 331 | .hooknum = NF_IP6_LOCAL_IN, | ||
| 332 | .priority = NF_IP6_PRI_LAST-1, | ||
| 333 | }; | ||
| 334 | |||
| 335 | #ifdef CONFIG_SYSCTL | ||
| 336 | |||
| 337 | /* From nf_conntrack_proto_icmpv6.c */ | ||
| 338 | extern unsigned long nf_ct_icmpv6_timeout; | ||
| 339 | |||
| 340 | /* From nf_conntrack_frag6.c */ | ||
| 341 | extern unsigned long nf_ct_frag6_timeout; | ||
| 342 | extern unsigned long nf_ct_frag6_low_thresh; | ||
| 343 | extern unsigned long nf_ct_frag6_high_thresh; | ||
| 344 | |||
| 345 | static struct ctl_table_header *nf_ct_ipv6_sysctl_header; | ||
| 346 | |||
| 347 | static ctl_table nf_ct_sysctl_table[] = { | ||
| 348 | { | ||
| 349 | .ctl_name = NET_NF_CONNTRACK_ICMPV6_TIMEOUT, | ||
| 350 | .procname = "nf_conntrack_icmpv6_timeout", | ||
| 351 | .data = &nf_ct_icmpv6_timeout, | ||
| 352 | .maxlen = sizeof(unsigned int), | ||
| 353 | .mode = 0644, | ||
| 354 | .proc_handler = &proc_dointvec_jiffies, | ||
| 355 | }, | ||
| 356 | { | ||
| 357 | .ctl_name = NET_NF_CONNTRACK_FRAG6_TIMEOUT, | ||
| 358 | .procname = "nf_conntrack_frag6_timeout", | ||
| 359 | .data = &nf_ct_frag6_timeout, | ||
| 360 | .maxlen = sizeof(unsigned int), | ||
| 361 | .mode = 0644, | ||
| 362 | .proc_handler = &proc_dointvec_jiffies, | ||
| 363 | }, | ||
| 364 | { | ||
| 365 | .ctl_name = NET_NF_CONNTRACK_FRAG6_LOW_THRESH, | ||
| 366 | .procname = "nf_conntrack_frag6_low_thresh", | ||
| 367 | .data = &nf_ct_frag6_low_thresh, | ||
| 368 | .maxlen = sizeof(unsigned int), | ||
| 369 | .mode = 0644, | ||
| 370 | .proc_handler = &proc_dointvec_jiffies, | ||
| 371 | }, | ||
| 372 | { | ||
| 373 | .ctl_name = NET_NF_CONNTRACK_FRAG6_HIGH_THRESH, | ||
| 374 | .procname = "nf_conntrack_frag6_high_thresh", | ||
| 375 | .data = &nf_ct_frag6_high_thresh, | ||
| 376 | .maxlen = sizeof(unsigned int), | ||
| 377 | .mode = 0644, | ||
| 378 | .proc_handler = &proc_dointvec_jiffies, | ||
| 379 | }, | ||
| 380 | { .ctl_name = 0 } | ||
| 381 | }; | ||
| 382 | |||
| 383 | static ctl_table nf_ct_netfilter_table[] = { | ||
| 384 | { | ||
| 385 | .ctl_name = NET_NETFILTER, | ||
| 386 | .procname = "netfilter", | ||
| 387 | .mode = 0555, | ||
| 388 | .child = nf_ct_sysctl_table, | ||
| 389 | }, | ||
| 390 | { .ctl_name = 0 } | ||
| 391 | }; | ||
| 392 | |||
| 393 | static ctl_table nf_ct_net_table[] = { | ||
| 394 | { | ||
| 395 | .ctl_name = CTL_NET, | ||
| 396 | .procname = "net", | ||
| 397 | .mode = 0555, | ||
| 398 | .child = nf_ct_netfilter_table, | ||
| 399 | }, | ||
| 400 | { .ctl_name = 0 } | ||
| 401 | }; | ||
| 402 | #endif | ||
| 403 | |||
| 404 | struct nf_conntrack_l3proto nf_conntrack_l3proto_ipv6 = { | ||
| 405 | .l3proto = PF_INET6, | ||
| 406 | .name = "ipv6", | ||
| 407 | .pkt_to_tuple = ipv6_pkt_to_tuple, | ||
| 408 | .invert_tuple = ipv6_invert_tuple, | ||
| 409 | .print_tuple = ipv6_print_tuple, | ||
| 410 | .print_conntrack = ipv6_print_conntrack, | ||
| 411 | .prepare = ipv6_prepare, | ||
| 412 | .get_features = ipv6_get_features, | ||
| 413 | .me = THIS_MODULE, | ||
| 414 | }; | ||
| 415 | |||
| 416 | extern struct nf_conntrack_protocol nf_conntrack_protocol_tcp6; | ||
| 417 | extern struct nf_conntrack_protocol nf_conntrack_protocol_udp6; | ||
| 418 | extern struct nf_conntrack_protocol nf_conntrack_protocol_icmpv6; | ||
| 419 | extern int nf_ct_frag6_init(void); | ||
| 420 | extern void nf_ct_frag6_cleanup(void); | ||
| 421 | static int init_or_cleanup(int init) | ||
| 422 | { | ||
| 423 | int ret = 0; | ||
| 424 | |||
| 425 | if (!init) goto cleanup; | ||
| 426 | |||
| 427 | ret = nf_ct_frag6_init(); | ||
| 428 | if (ret < 0) { | ||
| 429 | printk("nf_conntrack_ipv6: can't initialize frag6.\n"); | ||
| 430 | goto cleanup_nothing; | ||
| 431 | } | ||
| 432 | ret = nf_conntrack_protocol_register(&nf_conntrack_protocol_tcp6); | ||
| 433 | if (ret < 0) { | ||
| 434 | printk("nf_conntrack_ipv6: can't register tcp.\n"); | ||
| 435 | goto cleanup_frag6; | ||
| 436 | } | ||
| 437 | |||
| 438 | ret = nf_conntrack_protocol_register(&nf_conntrack_protocol_udp6); | ||
| 439 | if (ret < 0) { | ||
| 440 | printk("nf_conntrack_ipv6: can't register udp.\n"); | ||
| 441 | goto cleanup_tcp; | ||
| 442 | } | ||
| 443 | |||
| 444 | ret = nf_conntrack_protocol_register(&nf_conntrack_protocol_icmpv6); | ||
| 445 | if (ret < 0) { | ||
| 446 | printk("nf_conntrack_ipv6: can't register icmpv6.\n"); | ||
| 447 | goto cleanup_udp; | ||
| 448 | } | ||
| 449 | |||
| 450 | ret = nf_conntrack_l3proto_register(&nf_conntrack_l3proto_ipv6); | ||
| 451 | if (ret < 0) { | ||
| 452 | printk("nf_conntrack_ipv6: can't register ipv6\n"); | ||
| 453 | goto cleanup_icmpv6; | ||
| 454 | } | ||
| 455 | |||
| 456 | ret = nf_register_hook(&ipv6_conntrack_defrag_ops); | ||
| 457 | if (ret < 0) { | ||
| 458 | printk("nf_conntrack_ipv6: can't register pre-routing defrag " | ||
| 459 | "hook.\n"); | ||
| 460 | goto cleanup_ipv6; | ||
| 461 | } | ||
| 462 | |||
| 463 | ret = nf_register_hook(&ipv6_conntrack_defrag_local_out_ops); | ||
| 464 | if (ret < 0) { | ||
| 465 | printk("nf_conntrack_ipv6: can't register local_out defrag " | ||
| 466 | "hook.\n"); | ||
| 467 | goto cleanup_defragops; | ||
| 468 | } | ||
| 469 | |||
| 470 | ret = nf_register_hook(&ipv6_conntrack_in_ops); | ||
| 471 | if (ret < 0) { | ||
| 472 | printk("nf_conntrack_ipv6: can't register pre-routing hook.\n"); | ||
| 473 | goto cleanup_defraglocalops; | ||
| 474 | } | ||
| 475 | |||
| 476 | ret = nf_register_hook(&ipv6_conntrack_local_out_ops); | ||
| 477 | if (ret < 0) { | ||
| 478 | printk("nf_conntrack_ipv6: can't register local out hook.\n"); | ||
| 479 | goto cleanup_inops; | ||
| 480 | } | ||
| 481 | |||
| 482 | ret = nf_register_hook(&ipv6_conntrack_out_ops); | ||
| 483 | if (ret < 0) { | ||
| 484 | printk("nf_conntrack_ipv6: can't register post-routing hook.\n"); | ||
| 485 | goto cleanup_inandlocalops; | ||
| 486 | } | ||
| 487 | |||
| 488 | ret = nf_register_hook(&ipv6_conntrack_local_in_ops); | ||
| 489 | if (ret < 0) { | ||
| 490 | printk("nf_conntrack_ipv6: can't register local in hook.\n"); | ||
| 491 | goto cleanup_inoutandlocalops; | ||
| 492 | } | ||
| 493 | |||
| 494 | #ifdef CONFIG_SYSCTL | ||
| 495 | nf_ct_ipv6_sysctl_header = register_sysctl_table(nf_ct_net_table, 0); | ||
| 496 | if (nf_ct_ipv6_sysctl_header == NULL) { | ||
| 497 | printk("nf_conntrack: can't register to sysctl.\n"); | ||
| 498 | ret = -ENOMEM; | ||
| 499 | goto cleanup_localinops; | ||
| 500 | } | ||
| 501 | #endif | ||
| 502 | return ret; | ||
| 503 | |||
| 504 | cleanup: | ||
| 505 | synchronize_net(); | ||
| 506 | #ifdef CONFIG_SYSCTL | ||
| 507 | unregister_sysctl_table(nf_ct_ipv6_sysctl_header); | ||
| 508 | cleanup_localinops: | ||
| 509 | #endif | ||
| 510 | nf_unregister_hook(&ipv6_conntrack_local_in_ops); | ||
| 511 | cleanup_inoutandlocalops: | ||
| 512 | nf_unregister_hook(&ipv6_conntrack_out_ops); | ||
| 513 | cleanup_inandlocalops: | ||
| 514 | nf_unregister_hook(&ipv6_conntrack_local_out_ops); | ||
| 515 | cleanup_inops: | ||
| 516 | nf_unregister_hook(&ipv6_conntrack_in_ops); | ||
| 517 | cleanup_defraglocalops: | ||
| 518 | nf_unregister_hook(&ipv6_conntrack_defrag_local_out_ops); | ||
| 519 | cleanup_defragops: | ||
| 520 | nf_unregister_hook(&ipv6_conntrack_defrag_ops); | ||
| 521 | cleanup_ipv6: | ||
| 522 | nf_conntrack_l3proto_unregister(&nf_conntrack_l3proto_ipv6); | ||
| 523 | cleanup_icmpv6: | ||
| 524 | nf_conntrack_protocol_unregister(&nf_conntrack_protocol_icmpv6); | ||
| 525 | cleanup_udp: | ||
| 526 | nf_conntrack_protocol_unregister(&nf_conntrack_protocol_udp6); | ||
| 527 | cleanup_tcp: | ||
| 528 | nf_conntrack_protocol_unregister(&nf_conntrack_protocol_tcp6); | ||
| 529 | cleanup_frag6: | ||
| 530 | nf_ct_frag6_cleanup(); | ||
| 531 | cleanup_nothing: | ||
| 532 | return ret; | ||
| 533 | } | ||
| 534 | |||
| 535 | MODULE_LICENSE("GPL"); | ||
| 536 | MODULE_AUTHOR("Yasuyuki KOZAKAI @USAGI <yasuyuki.kozakai@toshiba.co.jp>"); | ||
| 537 | |||
| 538 | static int __init init(void) | ||
| 539 | { | ||
| 540 | need_nf_conntrack(); | ||
| 541 | return init_or_cleanup(1); | ||
| 542 | } | ||
| 543 | |||
| 544 | static void __exit fini(void) | ||
| 545 | { | ||
| 546 | init_or_cleanup(0); | ||
| 547 | } | ||
| 548 | |||
| 549 | module_init(init); | ||
| 550 | module_exit(fini); | ||
| 551 | |||
| 552 | void need_ip6_conntrack(void) | ||
| 553 | { | ||
| 554 | } | ||
| 555 | |||
| 556 | EXPORT_SYMBOL(need_ip6_conntrack); | ||
diff --git a/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c b/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c new file mode 100644 index 000000000000..c0f1da5497a9 --- /dev/null +++ b/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c  | |||
| @@ -0,0 +1,272 @@ | |||
| 1 | /* | ||
| 2 | * Copyright (C)2003,2004 USAGI/WIDE Project | ||
| 3 | * | ||
| 4 | * This program is free software; you can redistribute it and/or modify | ||
| 5 | * it under the terms of the GNU General Public License version 2 as | ||
| 6 | * published by the Free Software Foundation. | ||
| 7 | * | ||
| 8 | * Author: | ||
| 9 | * Yasuyuki Kozakai @USAGI <yasuyuki.kozakai@toshiba.co.jp> | ||
| 10 | * | ||
| 11 | * 16 Dec 2003: Yasuyuki Kozakai @USAGI <yasuyuki.kozakai@toshiba.co.jp> | ||
| 12 | * - ICMPv6 tracking support. Derived from the original ip_conntrack code | ||
| 13 | * net/ipv4/netfilter/ip_conntrack_proto_icmp.c which had the following | ||
| 14 | * copyright information: | ||
| 15 | * (C) 1999-2001 Paul `Rusty' Russell | ||
| 16 | * (C) 2002-2004 Netfilter Core Team <coreteam@netfilter.org> | ||
| 17 | */ | ||
| 18 | |||
| 19 | #include <linux/types.h> | ||
| 20 | #include <linux/sched.h> | ||
| 21 | #include <linux/timer.h> | ||
| 22 | #include <linux/module.h> | ||
| 23 | #include <linux/netfilter.h> | ||
| 24 | #include <linux/in6.h> | ||
| 25 | #include <linux/icmpv6.h> | ||
| 26 | #include <linux/ipv6.h> | ||
| 27 | #include <net/ipv6.h> | ||
| 28 | #include <net/ip6_checksum.h> | ||
| 29 | #include <linux/seq_file.h> | ||
| 30 | #include <linux/netfilter_ipv6.h> | ||
| 31 | #include <net/netfilter/nf_conntrack_tuple.h> | ||
| 32 | #include <net/netfilter/nf_conntrack_protocol.h> | ||
| 33 | #include <net/netfilter/nf_conntrack_core.h> | ||
| 34 | #include <net/netfilter/ipv6/nf_conntrack_icmpv6.h> | ||
| 35 | |||
| 36 | unsigned long nf_ct_icmpv6_timeout = 30*HZ; | ||
| 37 | |||
| 38 | #if 0 | ||
| 39 | #define DEBUGP printk | ||
| 40 | #else | ||
| 41 | #define DEBUGP(format, args...) | ||
| 42 | #endif | ||
| 43 | |||
| 44 | static int icmpv6_pkt_to_tuple(const struct sk_buff *skb, | ||
| 45 | unsigned int dataoff, | ||
| 46 | struct nf_conntrack_tuple *tuple) | ||
| 47 | { | ||
| 48 | struct icmp6hdr _hdr, *hp; | ||
| 49 | |||
| 50 | hp = skb_header_pointer(skb, dataoff, sizeof(_hdr), &_hdr); | ||
| 51 | if (hp == NULL) | ||
| 52 | return 0; | ||
| 53 | tuple->dst.u.icmp.type = hp->icmp6_type; | ||
| 54 | tuple->src.u.icmp.id = hp->icmp6_identifier; | ||
| 55 | tuple->dst.u.icmp.code = hp->icmp6_code; | ||
| 56 | |||
| 57 | return 1; | ||
| 58 | } | ||
| 59 | |||
| 60 | static int icmpv6_invert_tuple(struct nf_conntrack_tuple *tuple, | ||
| 61 | const struct nf_conntrack_tuple *orig) | ||
| 62 | { | ||
| 63 | /* Add 1; spaces filled with 0. */ | ||
| 64 | static u_int8_t invmap[] = { | ||
| 65 | [ICMPV6_ECHO_REQUEST - 128] = ICMPV6_ECHO_REPLY + 1, | ||
| 66 | [ICMPV6_ECHO_REPLY - 128] = ICMPV6_ECHO_REQUEST + 1, | ||
| 67 | [ICMPV6_NI_QUERY - 128] = ICMPV6_NI_QUERY + 1, | ||
| 68 | [ICMPV6_NI_REPLY - 128] = ICMPV6_NI_REPLY +1 | ||
| 69 | }; | ||
| 70 | |||
| 71 | __u8 type = orig->dst.u.icmp.type - 128; | ||
| 72 | if (type >= sizeof(invmap) || !invmap[type]) | ||
| 73 | return 0; | ||
| 74 | |||
| 75 | tuple->src.u.icmp.id = orig->src.u.icmp.id; | ||
| 76 | tuple->dst.u.icmp.type = invmap[type] - 1; | ||
| 77 | tuple->dst.u.icmp.code = orig->dst.u.icmp.code; | ||
| 78 | return 1; | ||
| 79 | } | ||
| 80 | |||
| 81 | /* Print out the per-protocol part of the tuple. */ | ||
| 82 | static int icmpv6_print_tuple(struct seq_file *s, | ||
| 83 | const struct nf_conntrack_tuple *tuple) | ||
| 84 | { | ||
| 85 | return seq_printf(s, "type=%u code=%u id=%u ", | ||
| 86 | tuple->dst.u.icmp.type, | ||
| 87 | tuple->dst.u.icmp.code, | ||
| 88 | ntohs(tuple->src.u.icmp.id)); | ||
| 89 | } | ||
| 90 | |||
| 91 | /* Print out the private part of the conntrack. */ | ||
| 92 | static int icmpv6_print_conntrack(struct seq_file *s, | ||
| 93 | const struct nf_conn *conntrack) | ||
| 94 | { | ||
| 95 | return 0; | ||
| 96 | } | ||
| 97 | |||
| 98 | /* Returns verdict for packet, or -1 for invalid. */ | ||
| 99 | static int icmpv6_packet(struct nf_conn *ct, | ||
| 100 | const struct sk_buff *skb, | ||
| 101 | unsigned int dataoff, | ||
| 102 | enum ip_conntrack_info ctinfo, | ||
| 103 | int pf, | ||
| 104 | unsigned int hooknum) | ||
| 105 | { | ||
| 106 | /* Try to delete connection immediately after all replies: | ||
| 107 | won't actually vanish as we still have skb, and del_timer | ||
| 108 | means this will only run once even if count hits zero twice | ||
| 109 | (theoretically possible with SMP) */ | ||
| 110 | if (CTINFO2DIR(ctinfo) == IP_CT_DIR_REPLY) { | ||
| 111 | if (atomic_dec_and_test(&ct->proto.icmp.count) | ||
| 112 | && del_timer(&ct->timeout)) | ||
| 113 | ct->timeout.function((unsigned long)ct); | ||
| 114 | } else { | ||
| 115 | atomic_inc(&ct->proto.icmp.count); | ||
| 116 | nf_conntrack_event_cache(IPCT_PROTOINFO_VOLATILE, skb); | ||
| 117 | nf_ct_refresh_acct(ct, ctinfo, skb, nf_ct_icmpv6_timeout); | ||
| 118 | } | ||
| 119 | |||
| 120 | return NF_ACCEPT; | ||
| 121 | } | ||
| 122 | |||
| 123 | /* Called when a new connection for this protocol found. */ | ||
| 124 | static int icmpv6_new(struct nf_conn *conntrack, | ||
| 125 | const struct sk_buff *skb, | ||
| 126 | unsigned int dataoff) | ||
| 127 | { | ||
| 128 | static u_int8_t valid_new[] = { | ||
| 129 | [ICMPV6_ECHO_REQUEST - 128] = 1, | ||
| 130 | [ICMPV6_NI_QUERY - 128] = 1 | ||
| 131 | }; | ||
| 132 | |||
| 133 | if (conntrack->tuplehash[0].tuple.dst.u.icmp.type - 128 >= sizeof(valid_new) | ||
| 134 | || !valid_new[conntrack->tuplehash[0].tuple.dst.u.icmp.type - 128]) { | ||
| 135 | /* Can't create a new ICMPv6 `conn' with this. */ | ||
| 136 | DEBUGP("icmp: can't create new conn with type %u\n", | ||
| 137 | conntrack->tuplehash[0].tuple.dst.u.icmp.type); | ||
| 138 | NF_CT_DUMP_TUPLE(&conntrack->tuplehash[0].tuple); | ||
| 139 | return 0; | ||
| 140 | } | ||
| 141 | atomic_set(&conntrack->proto.icmp.count, 0); | ||
| 142 | return 1; | ||
| 143 | } | ||
| 144 | |||
| 145 | extern int | ||
| 146 | nf_ct_ipv6_skip_exthdr(struct sk_buff *skb, int start, u8 *nexthdrp, int len); | ||
| 147 | extern struct nf_conntrack_l3proto nf_conntrack_l3proto_ipv6; | ||
| 148 | static int | ||
| 149 | icmpv6_error_message(struct sk_buff *skb, | ||
| 150 | unsigned int icmp6off, | ||
| 151 | enum ip_conntrack_info *ctinfo, | ||
| 152 | unsigned int hooknum) | ||
| 153 | { | ||
| 154 | struct nf_conntrack_tuple intuple, origtuple; | ||
| 155 | struct nf_conntrack_tuple_hash *h; | ||
| 156 | struct icmp6hdr _hdr, *hp; | ||
| 157 | unsigned int inip6off; | ||
| 158 | struct nf_conntrack_protocol *inproto; | ||
| 159 | u_int8_t inprotonum; | ||
| 160 | unsigned int inprotoff; | ||
| 161 | |||
| 162 | NF_CT_ASSERT(skb->nfct == NULL); | ||
| 163 | |||
| 164 | hp = skb_header_pointer(skb, icmp6off, sizeof(_hdr), &_hdr); | ||
| 165 | if (hp == NULL) { | ||
| 166 | DEBUGP("icmpv6_error: Can't get ICMPv6 hdr.\n"); | ||
| 167 | return -NF_ACCEPT; | ||
| 168 | } | ||
| 169 | |||
| 170 | inip6off = icmp6off + sizeof(_hdr); | ||
| 171 | if (skb_copy_bits(skb, inip6off+offsetof(struct ipv6hdr, nexthdr), | ||
| 172 | &inprotonum, sizeof(inprotonum)) != 0) { | ||
| 173 | DEBUGP("icmpv6_error: Can't get nexthdr in inner IPv6 header.\n"); | ||
| 174 | return -NF_ACCEPT; | ||
| 175 | } | ||
| 176 | inprotoff = nf_ct_ipv6_skip_exthdr(skb, | ||
| 177 | inip6off + sizeof(struct ipv6hdr), | ||
| 178 | &inprotonum, | ||
| 179 | skb->len - inip6off | ||
| 180 | - sizeof(struct ipv6hdr)); | ||
| 181 | |||
| 182 | if ((inprotoff < 0) || (inprotoff > skb->len) || | ||
| 183 | (inprotonum == NEXTHDR_FRAGMENT)) { | ||
| 184 | DEBUGP("icmpv6_error: Can't get protocol header in ICMPv6 payload.\n"); | ||
| 185 | return -NF_ACCEPT; | ||
| 186 | } | ||
| 187 | |||
| 188 | inproto = nf_ct_find_proto(PF_INET6, inprotonum); | ||
| 189 | |||
| 190 | /* Are they talking about one of our connections? */ | ||
| 191 | if (!nf_ct_get_tuple(skb, inip6off, inprotoff, PF_INET6, inprotonum, | ||
| 192 | &origtuple, &nf_conntrack_l3proto_ipv6, inproto)) { | ||
| 193 | DEBUGP("icmpv6_error: Can't get tuple\n"); | ||
| 194 | return -NF_ACCEPT; | ||
| 195 | } | ||
| 196 | |||
| 197 | /* Ordinarily, we'd expect the inverted tupleproto, but it's | ||
| 198 | been preserved inside the ICMP. */ | ||
| 199 | if (!nf_ct_invert_tuple(&intuple, &origtuple, | ||
| 200 | &nf_conntrack_l3proto_ipv6, inproto)) { | ||
| 201 | DEBUGP("icmpv6_error: Can't invert tuple\n"); | ||
| 202 | return -NF_ACCEPT; | ||
| 203 | } | ||
| 204 | |||
| 205 | *ctinfo = IP_CT_RELATED; | ||
| 206 | |||
| 207 | h = nf_conntrack_find_get(&intuple, NULL); | ||
| 208 | if (!h) { | ||
| 209 | DEBUGP("icmpv6_error: no match\n"); | ||
| 210 | return -NF_ACCEPT; | ||
| 211 | } else { | ||
| 212 | if (NF_CT_DIRECTION(h) == IP_CT_DIR_REPLY) | ||
| 213 | *ctinfo += IP_CT_IS_REPLY; | ||
| 214 | } | ||
| 215 | |||
| 216 | /* Update skb to refer to this connection */ | ||
| 217 | skb->nfct = &nf_ct_tuplehash_to_ctrack(h)->ct_general; | ||
| 218 | skb->nfctinfo = *ctinfo; | ||
| 219 | return -NF_ACCEPT; | ||
| 220 | } | ||
| 221 | |||
| 222 | static int | ||
| 223 | icmpv6_error(struct sk_buff *skb, unsigned int dataoff, | ||
| 224 | enum ip_conntrack_info *ctinfo, int pf, unsigned int hooknum) | ||
| 225 | { | ||
| 226 | struct icmp6hdr _ih, *icmp6h; | ||
| 227 | |||
| 228 | icmp6h = skb_header_pointer(skb, dataoff, sizeof(_ih), &_ih); | ||
| 229 | if (icmp6h == NULL) { | ||
| 230 | if (LOG_INVALID(IPPROTO_ICMPV6)) | ||
| 231 | nf_log_packet(PF_INET6, 0, skb, NULL, NULL, NULL, | ||
| 232 | "nf_ct_icmpv6: short packet "); | ||
| 233 | return -NF_ACCEPT; | ||
| 234 | } | ||
| 235 | |||
| 236 | if (hooknum != NF_IP6_PRE_ROUTING) | ||
| 237 | goto skipped; | ||
| 238 | |||
| 239 | /* Ignore it if the checksum's bogus. */ | ||
| 240 | if (csum_ipv6_magic(&skb->nh.ipv6h->saddr, &skb->nh.ipv6h->daddr, | ||
| 241 | skb->len - dataoff, IPPROTO_ICMPV6, | ||
| 242 | skb_checksum(skb, dataoff, | ||
| 243 | skb->len - dataoff, 0))) { | ||
| 244 | nf_log_packet(PF_INET6, 0, skb, NULL, NULL, NULL, | ||
| 245 | "nf_ct_icmpv6: ICMPv6 checksum failed\n"); | ||
| 246 | return -NF_ACCEPT; | ||
| 247 | } | ||
| 248 | |||
| 249 | skipped: | ||
| 250 | |||
| 251 | /* is not error message ? */ | ||
| 252 | if (icmp6h->icmp6_type >= 128) | ||
| 253 | return NF_ACCEPT; | ||
| 254 | |||
| 255 | return icmpv6_error_message(skb, dataoff, ctinfo, hooknum); | ||
| 256 | } | ||
| 257 | |||
| 258 | struct nf_conntrack_protocol nf_conntrack_protocol_icmpv6 = | ||
| 259 | { | ||
| 260 | .l3proto = PF_INET6, | ||
| 261 | .proto = IPPROTO_ICMPV6, | ||
| 262 | .name = "icmpv6", | ||
| 263 | .pkt_to_tuple = icmpv6_pkt_to_tuple, | ||
| 264 | .invert_tuple = icmpv6_invert_tuple, | ||
| 265 | .print_tuple = icmpv6_print_tuple, | ||
| 266 | .print_conntrack = icmpv6_print_conntrack, | ||
| 267 | .packet = icmpv6_packet, | ||
| 268 | .new = icmpv6_new, | ||
| 269 | .error = icmpv6_error, | ||
| 270 | }; | ||
| 271 | |||
| 272 | EXPORT_SYMBOL(nf_conntrack_protocol_icmpv6); | ||
diff --git a/net/ipv6/netfilter/nf_conntrack_reasm.c b/net/ipv6/netfilter/nf_conntrack_reasm.c new file mode 100644 index 000000000000..7640b9bb7694 --- /dev/null +++ b/net/ipv6/netfilter/nf_conntrack_reasm.c  | |||
| @@ -0,0 +1,885 @@ | |||
| 1 | /* | ||
| 2 | * IPv6 fragment reassembly for connection tracking | ||
| 3 | * | ||
| 4 | * Copyright (C)2004 USAGI/WIDE Project | ||
| 5 | * | ||
| 6 | * Author: | ||
| 7 | * Yasuyuki Kozakai @USAGI <yasuyuki.kozakai@toshiba.co.jp> | ||
| 8 | * | ||
| 9 | * Based on: net/ipv6/reassembly.c | ||
| 10 | * | ||
| 11 | * This program is free software; you can redistribute it and/or | ||
| 12 | * modify it under the terms of the GNU General Public License | ||
| 13 | * as published by the Free Software Foundation; either version | ||
| 14 | * 2 of the License, or (at your option) any later version. | ||
| 15 | */ | ||
| 16 | |||
| 17 | #include <linux/config.h> | ||
| 18 | #include <linux/errno.h> | ||
| 19 | #include <linux/types.h> | ||
| 20 | #include <linux/string.h> | ||
| 21 | #include <linux/socket.h> | ||
| 22 | #include <linux/sockios.h> | ||
| 23 | #include <linux/jiffies.h> | ||
| 24 | #include <linux/net.h> | ||
| 25 | #include <linux/list.h> | ||
| 26 | #include <linux/netdevice.h> | ||
| 27 | #include <linux/in6.h> | ||
| 28 | #include <linux/ipv6.h> | ||
| 29 | #include <linux/icmpv6.h> | ||
| 30 | #include <linux/random.h> | ||
| 31 | #include <linux/jhash.h> | ||
| 32 | |||
| 33 | #include <net/sock.h> | ||
| 34 | #include <net/snmp.h> | ||
| 35 | |||
| 36 | #include <net/ipv6.h> | ||
| 37 | #include <net/protocol.h> | ||
| 38 | #include <net/transp_v6.h> | ||
| 39 | #include <net/rawv6.h> | ||
| 40 | #include <net/ndisc.h> | ||
| 41 | #include <net/addrconf.h> | ||
| 42 | #include <linux/sysctl.h> | ||
| 43 | #include <linux/netfilter.h> | ||
| 44 | #include <linux/netfilter_ipv6.h> | ||
| 45 | #include <linux/kernel.h> | ||
| 46 | #include <linux/module.h> | ||
| 47 | |||
| 48 | #if 0 | ||
| 49 | #define DEBUGP printk | ||
| 50 | #else | ||
| 51 | #define DEBUGP(format, args...) | ||
| 52 | #endif | ||
| 53 | |||
| 54 | #define NF_CT_FRAG6_HIGH_THRESH 262144 /* == 256*1024 */ | ||
| 55 | #define NF_CT_FRAG6_LOW_THRESH 196608 /* == 192*1024 */ | ||
| 56 | #define NF_CT_FRAG6_TIMEOUT IPV6_FRAG_TIMEOUT | ||
| 57 | |||
| 58 | int nf_ct_frag6_high_thresh = 256*1024; | ||
| 59 | int nf_ct_frag6_low_thresh = 192*1024; | ||
| 60 | int nf_ct_frag6_timeout = IPV6_FRAG_TIMEOUT; | ||
| 61 | |||
| 62 | struct nf_ct_frag6_skb_cb | ||
| 63 | { | ||
| 64 | struct inet6_skb_parm h; | ||
| 65 | int offset; | ||
| 66 | struct sk_buff *orig; | ||
| 67 | }; | ||
| 68 | |||
| 69 | #define NFCT_FRAG6_CB(skb) ((struct nf_ct_frag6_skb_cb*)((skb)->cb)) | ||
| 70 | |||
| 71 | struct nf_ct_frag6_queue | ||
| 72 | { | ||
| 73 | struct nf_ct_frag6_queue *next; | ||
| 74 | struct list_head lru_list; /* lru list member */ | ||
| 75 | |||
| 76 | __u32 id; /* fragment id */ | ||
| 77 | struct in6_addr saddr; | ||
| 78 | struct in6_addr daddr; | ||
| 79 | |||
| 80 | spinlock_t lock; | ||
| 81 | atomic_t refcnt; | ||
| 82 | struct timer_list timer; /* expire timer */ | ||
| 83 | struct sk_buff *fragments; | ||
| 84 | int len; | ||
| 85 | int meat; | ||
| 86 | struct timeval stamp; | ||
| 87 | unsigned int csum; | ||
| 88 | __u8 last_in; /* has first/last segment arrived? */ | ||
| 89 | #define COMPLETE 4 | ||
| 90 | #define FIRST_IN 2 | ||
| 91 | #define LAST_IN 1 | ||
| 92 | __u16 nhoffset; | ||
| 93 | struct nf_ct_frag6_queue **pprev; | ||
| 94 | }; | ||
| 95 | |||
| 96 | /* Hash table. */ | ||
| 97 | |||
| 98 | #define FRAG6Q_HASHSZ 64 | ||
| 99 | |||
| 100 | static struct nf_ct_frag6_queue *nf_ct_frag6_hash[FRAG6Q_HASHSZ]; | ||
| 101 | static rwlock_t nf_ct_frag6_lock = RW_LOCK_UNLOCKED; | ||
| 102 | static u32 nf_ct_frag6_hash_rnd; | ||
| 103 | static LIST_HEAD(nf_ct_frag6_lru_list); | ||
| 104 | int nf_ct_frag6_nqueues = 0; | ||
| 105 | |||
| 106 | static __inline__ void __fq_unlink(struct nf_ct_frag6_queue *fq) | ||
| 107 | { | ||
| 108 | if (fq->next) | ||
| 109 | fq->next->pprev = fq->pprev; | ||
| 110 | *fq->pprev = fq->next; | ||
| 111 | list_del(&fq->lru_list); | ||
| 112 | nf_ct_frag6_nqueues--; | ||
| 113 | } | ||
| 114 | |||
| 115 | static __inline__ void fq_unlink(struct nf_ct_frag6_queue *fq) | ||
| 116 | { | ||
| 117 | write_lock(&nf_ct_frag6_lock); | ||
| 118 | __fq_unlink(fq); | ||
| 119 | write_unlock(&nf_ct_frag6_lock); | ||
| 120 | } | ||
| 121 | |||
| 122 | static unsigned int ip6qhashfn(u32 id, struct in6_addr *saddr, | ||
| 123 | struct in6_addr *daddr) | ||
| 124 | { | ||
| 125 | u32 a, b, c; | ||
| 126 | |||
| 127 | a = saddr->s6_addr32[0]; | ||
| 128 | b = saddr->s6_addr32[1]; | ||
| 129 | c = saddr->s6_addr32[2]; | ||
| 130 | |||
| 131 | a += JHASH_GOLDEN_RATIO; | ||
| 132 | b += JHASH_GOLDEN_RATIO; | ||
| 133 | c += nf_ct_frag6_hash_rnd; | ||
| 134 | __jhash_mix(a, b, c); | ||
| 135 | |||
| 136 | a += saddr->s6_addr32[3]; | ||
| 137 | b += daddr->s6_addr32[0]; | ||
| 138 | c += daddr->s6_addr32[1]; | ||
| 139 | __jhash_mix(a, b, c); | ||
| 140 | |||
| 141 | a += daddr->s6_addr32[2]; | ||
| 142 | b += daddr->s6_addr32[3]; | ||
| 143 | c += id; | ||
| 144 | __jhash_mix(a, b, c); | ||
| 145 | |||
| 146 | return c & (FRAG6Q_HASHSZ - 1); | ||
| 147 | } | ||
| 148 | |||
| 149 | static struct timer_list nf_ct_frag6_secret_timer; | ||
| 150 | int nf_ct_frag6_secret_interval = 10 * 60 * HZ; | ||
| 151 | |||
| 152 | static void nf_ct_frag6_secret_rebuild(unsigned long dummy) | ||
| 153 | { | ||
| 154 | unsigned long now = jiffies; | ||
| 155 | int i; | ||
| 156 | |||
| 157 | write_lock(&nf_ct_frag6_lock); | ||
| 158 | get_random_bytes(&nf_ct_frag6_hash_rnd, sizeof(u32)); | ||
| 159 | for (i = 0; i < FRAG6Q_HASHSZ; i++) { | ||
| 160 | struct nf_ct_frag6_queue *q; | ||
| 161 | |||
| 162 | q = nf_ct_frag6_hash[i]; | ||
| 163 | while (q) { | ||
| 164 | struct nf_ct_frag6_queue *next = q->next; | ||
| 165 | unsigned int hval = ip6qhashfn(q->id, | ||
| 166 | &q->saddr, | ||
| 167 | &q->daddr); | ||
| 168 | |||
| 169 | if (hval != i) { | ||
| 170 | /* Unlink. */ | ||
| 171 | if (q->next) | ||
| 172 | q->next->pprev = q->pprev; | ||
| 173 | *q->pprev = q->next; | ||
| 174 | |||
| 175 | /* Relink to new hash chain. */ | ||
| 176 | if ((q->next = nf_ct_frag6_hash[hval]) != NULL) | ||
| 177 | q->next->pprev = &q->next; | ||
| 178 | nf_ct_frag6_hash[hval] = q; | ||
| 179 | q->pprev = &nf_ct_frag6_hash[hval]; | ||
| 180 | } | ||
| 181 | |||
| 182 | q = next; | ||
| 183 | } | ||
| 184 | } | ||
| 185 | write_unlock(&nf_ct_frag6_lock); | ||
| 186 | |||
| 187 | mod_timer(&nf_ct_frag6_secret_timer, now + nf_ct_frag6_secret_interval); | ||
| 188 | } | ||
| 189 | |||
| 190 | atomic_t nf_ct_frag6_mem = ATOMIC_INIT(0); | ||
| 191 | |||
| 192 | /* Memory Tracking Functions. */ | ||
| 193 | static inline void frag_kfree_skb(struct sk_buff *skb) | ||
| 194 | { | ||
| 195 | atomic_sub(skb->truesize, &nf_ct_frag6_mem); | ||
| 196 | if (NFCT_FRAG6_CB(skb)->orig) | ||
| 197 | kfree_skb(NFCT_FRAG6_CB(skb)->orig); | ||
| 198 | |||
| 199 | kfree_skb(skb); | ||
| 200 | } | ||
| 201 | |||
| 202 | static inline void frag_free_queue(struct nf_ct_frag6_queue *fq) | ||
| 203 | { | ||
| 204 | atomic_sub(sizeof(struct nf_ct_frag6_queue), &nf_ct_frag6_mem); | ||
| 205 | kfree(fq); | ||
| 206 | } | ||
| 207 | |||
| 208 | static inline struct nf_ct_frag6_queue *frag_alloc_queue(void) | ||
| 209 | { | ||
| 210 | struct nf_ct_frag6_queue *fq = kmalloc(sizeof(struct nf_ct_frag6_queue), GFP_ATOMIC); | ||
| 211 | |||
| 212 | if (!fq) | ||
| 213 | return NULL; | ||
| 214 | atomic_add(sizeof(struct nf_ct_frag6_queue), &nf_ct_frag6_mem); | ||
| 215 | return fq; | ||
| 216 | } | ||
| 217 | |||
| 218 | /* Destruction primitives. */ | ||
| 219 | |||
| 220 | /* Complete destruction of fq. */ | ||
| 221 | static void nf_ct_frag6_destroy(struct nf_ct_frag6_queue *fq) | ||
| 222 | { | ||
| 223 | struct sk_buff *fp; | ||
| 224 | |||
| 225 | BUG_TRAP(fq->last_in&COMPLETE); | ||
| 226 | BUG_TRAP(del_timer(&fq->timer) == 0); | ||
| 227 | |||
| 228 | /* Release all fragment data. */ | ||
| 229 | fp = fq->fragments; | ||
| 230 | while (fp) { | ||
| 231 | struct sk_buff *xp = fp->next; | ||
| 232 | |||
| 233 | frag_kfree_skb(fp); | ||
| 234 | fp = xp; | ||
| 235 | } | ||
| 236 | |||
| 237 | frag_free_queue(fq); | ||
| 238 | } | ||
| 239 | |||
| 240 | static __inline__ void fq_put(struct nf_ct_frag6_queue *fq) | ||
| 241 | { | ||
| 242 | if (atomic_dec_and_test(&fq->refcnt)) | ||
| 243 | nf_ct_frag6_destroy(fq); | ||
| 244 | } | ||
| 245 | |||
| 246 | /* Kill fq entry. It is not destroyed immediately, | ||
| 247 | * because caller (and someone more) holds reference count. | ||
| 248 | */ | ||
| 249 | static __inline__ void fq_kill(struct nf_ct_frag6_queue *fq) | ||
| 250 | { | ||
| 251 | if (del_timer(&fq->timer)) | ||
| 252 | atomic_dec(&fq->refcnt); | ||
| 253 | |||
| 254 | if (!(fq->last_in & COMPLETE)) { | ||
| 255 | fq_unlink(fq); | ||
| 256 | atomic_dec(&fq->refcnt); | ||
| 257 | fq->last_in |= COMPLETE; | ||
| 258 | } | ||
| 259 | } | ||
| 260 | |||
| 261 | static void nf_ct_frag6_evictor(void) | ||
| 262 | { | ||
| 263 | struct nf_ct_frag6_queue *fq; | ||
| 264 | struct list_head *tmp; | ||
| 265 | |||
| 266 | for (;;) { | ||
| 267 | if (atomic_read(&nf_ct_frag6_mem) <= nf_ct_frag6_low_thresh) | ||
| 268 | return; | ||
| 269 | read_lock(&nf_ct_frag6_lock); | ||
| 270 | if (list_empty(&nf_ct_frag6_lru_list)) { | ||
| 271 | read_unlock(&nf_ct_frag6_lock); | ||
| 272 | return; | ||
| 273 | } | ||
| 274 | tmp = nf_ct_frag6_lru_list.next; | ||
| 275 | fq = list_entry(tmp, struct nf_ct_frag6_queue, lru_list); | ||
| 276 | atomic_inc(&fq->refcnt); | ||
| 277 | read_unlock(&nf_ct_frag6_lock); | ||
| 278 | |||
| 279 | spin_lock(&fq->lock); | ||
| 280 | if (!(fq->last_in&COMPLETE)) | ||
| 281 | fq_kill(fq); | ||
| 282 | spin_unlock(&fq->lock); | ||
| 283 | |||
| 284 | fq_put(fq); | ||
| 285 | } | ||
| 286 | } | ||
| 287 | |||
| 288 | static void nf_ct_frag6_expire(unsigned long data) | ||
| 289 | { | ||
| 290 | struct nf_ct_frag6_queue *fq = (struct nf_ct_frag6_queue *) data; | ||
| 291 | |||
| 292 | spin_lock(&fq->lock); | ||
| 293 | |||
| 294 | if (fq->last_in & COMPLETE) | ||
| 295 | goto out; | ||
| 296 | |||
| 297 | fq_kill(fq); | ||
| 298 | |||
| 299 | out: | ||
| 300 | spin_unlock(&fq->lock); | ||
| 301 | fq_put(fq); | ||
| 302 | } | ||
| 303 | |||
| 304 | /* Creation primitives. */ | ||
| 305 | |||
| 306 | |||
| 307 | static struct nf_ct_frag6_queue *nf_ct_frag6_intern(unsigned int hash, | ||
| 308 | struct nf_ct_frag6_queue *fq_in) | ||
| 309 | { | ||
| 310 | struct nf_ct_frag6_queue *fq; | ||
| 311 | |||
| 312 | write_lock(&nf_ct_frag6_lock); | ||
| 313 | #ifdef CONFIG_SMP | ||
| 314 | for (fq = nf_ct_frag6_hash[hash]; fq; fq = fq->next) { | ||
| 315 | if (fq->id == fq_in->id && | ||
| 316 | !ipv6_addr_cmp(&fq_in->saddr, &fq->saddr) && | ||
| 317 | !ipv6_addr_cmp(&fq_in->daddr, &fq->daddr)) { | ||
| 318 | atomic_inc(&fq->refcnt); | ||
| 319 | write_unlock(&nf_ct_frag6_lock); | ||
| 320 | fq_in->last_in |= COMPLETE; | ||
| 321 | fq_put(fq_in); | ||
| 322 | return fq; | ||
| 323 | } | ||
| 324 | } | ||
| 325 | #endif | ||
| 326 | fq = fq_in; | ||
| 327 | |||
| 328 | if (!mod_timer(&fq->timer, jiffies + nf_ct_frag6_timeout)) | ||
| 329 | atomic_inc(&fq->refcnt); | ||
| 330 | |||
| 331 | atomic_inc(&fq->refcnt); | ||
| 332 | if ((fq->next = nf_ct_frag6_hash[hash]) != NULL) | ||
| 333 | fq->next->pprev = &fq->next; | ||
| 334 | nf_ct_frag6_hash[hash] = fq; | ||
| 335 | fq->pprev = &nf_ct_frag6_hash[hash]; | ||
| 336 | INIT_LIST_HEAD(&fq->lru_list); | ||
| 337 | list_add_tail(&fq->lru_list, &nf_ct_frag6_lru_list); | ||
| 338 | nf_ct_frag6_nqueues++; | ||
| 339 | write_unlock(&nf_ct_frag6_lock); | ||
| 340 | return fq; | ||
| 341 | } | ||
| 342 | |||
| 343 | |||
| 344 | static struct nf_ct_frag6_queue * | ||
| 345 | nf_ct_frag6_create(unsigned int hash, u32 id, struct in6_addr *src, struct in6_addr *dst) | ||
| 346 | { | ||
| 347 | struct nf_ct_frag6_queue *fq; | ||
| 348 | |||
| 349 | if ((fq = frag_alloc_queue()) == NULL) { | ||
| 350 | DEBUGP("Can't alloc new queue\n"); | ||
| 351 | goto oom; | ||
| 352 | } | ||
| 353 | |||
| 354 | memset(fq, 0, sizeof(struct nf_ct_frag6_queue)); | ||
| 355 | |||
| 356 | fq->id = id; | ||
| 357 | ipv6_addr_copy(&fq->saddr, src); | ||
| 358 | ipv6_addr_copy(&fq->daddr, dst); | ||
| 359 | |||
| 360 | init_timer(&fq->timer); | ||
| 361 | fq->timer.function = nf_ct_frag6_expire; | ||
| 362 | fq->timer.data = (long) fq; | ||
| 363 | fq->lock = SPIN_LOCK_UNLOCKED; | ||
| 364 | atomic_set(&fq->refcnt, 1); | ||
| 365 | |||
| 366 | return nf_ct_frag6_intern(hash, fq); | ||
| 367 | |||
| 368 | oom: | ||
| 369 | return NULL; | ||
| 370 | } | ||
| 371 | |||
| 372 | static __inline__ struct nf_ct_frag6_queue * | ||
| 373 | fq_find(u32 id, struct in6_addr *src, struct in6_addr *dst) | ||
| 374 | { | ||
| 375 | struct nf_ct_frag6_queue *fq; | ||
| 376 | unsigned int hash = ip6qhashfn(id, src, dst); | ||
| 377 | |||
| 378 | read_lock(&nf_ct_frag6_lock); | ||
| 379 | for (fq = nf_ct_frag6_hash[hash]; fq; fq = fq->next) { | ||
| 380 | if (fq->id == id && | ||
| 381 | !ipv6_addr_cmp(src, &fq->saddr) && | ||
| 382 | !ipv6_addr_cmp(dst, &fq->daddr)) { | ||
| 383 | atomic_inc(&fq->refcnt); | ||
| 384 | read_unlock(&nf_ct_frag6_lock); | ||
| 385 | return fq; | ||
| 386 | } | ||
| 387 | } | ||
| 388 | read_unlock(&nf_ct_frag6_lock); | ||
| 389 | |||
| 390 | return nf_ct_frag6_create(hash, id, src, dst); | ||
| 391 | } | ||
| 392 | |||
| 393 | |||
| 394 | static int nf_ct_frag6_queue(struct nf_ct_frag6_queue *fq, struct sk_buff *skb, | ||
| 395 | struct frag_hdr *fhdr, int nhoff) | ||
| 396 | { | ||
| 397 | struct sk_buff *prev, *next; | ||
| 398 | int offset, end; | ||
| 399 | |||
| 400 | if (fq->last_in & COMPLETE) { | ||
| 401 | DEBUGP("Allready completed\n"); | ||
| 402 | goto err; | ||
| 403 | } | ||
| 404 | |||
| 405 | offset = ntohs(fhdr->frag_off) & ~0x7; | ||
| 406 | end = offset + (ntohs(skb->nh.ipv6h->payload_len) - | ||
| 407 | ((u8 *) (fhdr + 1) - (u8 *) (skb->nh.ipv6h + 1))); | ||
| 408 | |||
| 409 | if ((unsigned int)end > IPV6_MAXPLEN) { | ||
| 410 | DEBUGP("offset is too large.\n"); | ||
| 411 | return -1; | ||
| 412 | } | ||
| 413 | |||
| 414 | if (skb->ip_summed == CHECKSUM_HW) | ||
| 415 | skb->csum = csum_sub(skb->csum, | ||
| 416 | csum_partial(skb->nh.raw, | ||
| 417 | (u8*)(fhdr + 1) - skb->nh.raw, | ||
| 418 | 0)); | ||
| 419 | |||
| 420 | /* Is this the final fragment? */ | ||
| 421 | if (!(fhdr->frag_off & htons(IP6_MF))) { | ||
| 422 | /* If we already have some bits beyond end | ||
| 423 | * or have different end, the segment is corrupted. | ||
| 424 | */ | ||
| 425 | if (end < fq->len || | ||
| 426 | ((fq->last_in & LAST_IN) && end != fq->len)) { | ||
| 427 | DEBUGP("already received last fragment\n"); | ||
| 428 | goto err; | ||
| 429 | } | ||
| 430 | fq->last_in |= LAST_IN; | ||
| 431 | fq->len = end; | ||
| 432 | } else { | ||
| 433 | /* Check if the fragment is rounded to 8 bytes. | ||
| 434 | * Required by the RFC. | ||
| 435 | */ | ||
| 436 | if (end & 0x7) { | ||
| 437 | /* RFC2460 says always send parameter problem in | ||
| 438 | * this case. -DaveM | ||
| 439 | */ | ||
| 440 | DEBUGP("the end of this fragment is not rounded to 8 bytes.\n"); | ||
| 441 | return -1; | ||
| 442 | } | ||
| 443 | if (end > fq->len) { | ||
| 444 | /* Some bits beyond end -> corruption. */ | ||
| 445 | if (fq->last_in & LAST_IN) { | ||
| 446 | DEBUGP("last packet already reached.\n"); | ||
| 447 | goto err; | ||
| 448 | } | ||
| 449 | fq->len = end; | ||
| 450 | } | ||
| 451 | } | ||
| 452 | |||
| 453 | if (end == offset) | ||
| 454 | goto err; | ||
| 455 | |||
| 456 | /* Point into the IP datagram 'data' part. */ | ||
| 457 | if (!pskb_pull(skb, (u8 *) (fhdr + 1) - skb->data)) { | ||
| 458 | DEBUGP("queue: message is too short.\n"); | ||
| 459 | goto err; | ||
| 460 | } | ||
| 461 | if (end-offset < skb->len) { | ||
| 462 | if (pskb_trim(skb, end - offset)) { | ||
| 463 | DEBUGP("Can't trim\n"); | ||
| 464 | goto err; | ||
| 465 | } | ||
| 466 | if (skb->ip_summed != CHECKSUM_UNNECESSARY) | ||
| 467 | skb->ip_summed = CHECKSUM_NONE; | ||
| 468 | } | ||
| 469 | |||
| 470 | /* Find out which fragments are in front and at the back of us | ||
| 471 | * in the chain of fragments so far. We must know where to put | ||
| 472 | * this fragment, right? | ||
| 473 | */ | ||
| 474 | prev = NULL; | ||
| 475 | for (next = fq->fragments; next != NULL; next = next->next) { | ||
| 476 | if (NFCT_FRAG6_CB(next)->offset >= offset) | ||
| 477 | break; /* bingo! */ | ||
| 478 | prev = next; | ||
| 479 | } | ||
| 480 | |||
| 481 | /* We found where to put this one. Check for overlap with | ||
| 482 | * preceding fragment, and, if needed, align things so that | ||
| 483 | * any overlaps are eliminated. | ||
| 484 | */ | ||
| 485 | if (prev) { | ||
| 486 | int i = (NFCT_FRAG6_CB(prev)->offset + prev->len) - offset; | ||
| 487 | |||
| 488 | if (i > 0) { | ||
| 489 | offset += i; | ||
| 490 | if (end <= offset) { | ||
| 491 | DEBUGP("overlap\n"); | ||
| 492 | goto err; | ||
| 493 | } | ||
| 494 | if (!pskb_pull(skb, i)) { | ||
| 495 | DEBUGP("Can't pull\n"); | ||
| 496 | goto err; | ||
| 497 | } | ||
| 498 | if (skb->ip_summed != CHECKSUM_UNNECESSARY) | ||
| 499 | skb->ip_summed = CHECKSUM_NONE; | ||
| 500 | } | ||
| 501 | } | ||
| 502 | |||
| 503 | /* Look for overlap with succeeding segments. | ||
| 504 | * If we can merge fragments, do it. | ||
| 505 | */ | ||
| 506 | while (next && NFCT_FRAG6_CB(next)->offset < end) { | ||
| 507 | /* overlap is 'i' bytes */ | ||
| 508 | int i = end - NFCT_FRAG6_CB(next)->offset; | ||
| 509 | |||
| 510 | if (i < next->len) { | ||
| 511 | /* Eat head of the next overlapped fragment | ||
| 512 | * and leave the loop. The next ones cannot overlap. | ||
| 513 | */ | ||
| 514 | DEBUGP("Eat head of the overlapped parts.: %d", i); | ||
| 515 | if (!pskb_pull(next, i)) | ||
| 516 | goto err; | ||
| 517 | |||
| 518 | /* next fragment */ | ||
| 519 | NFCT_FRAG6_CB(next)->offset += i; | ||
| 520 | fq->meat -= i; | ||
| 521 | if (next->ip_summed != CHECKSUM_UNNECESSARY) | ||
| 522 | next->ip_summed = CHECKSUM_NONE; | ||
| 523 | break; | ||
| 524 | } else { | ||
| 525 | struct sk_buff *free_it = next; | ||
| 526 | |||
| 527 | /* Old fragmnet is completely overridden with | ||
| 528 | * new one drop it. | ||
| 529 | */ | ||
| 530 | next = next->next; | ||
| 531 | |||
| 532 | if (prev) | ||
| 533 | prev->next = next; | ||
| 534 | else | ||
| 535 | fq->fragments = next; | ||
| 536 | |||
| 537 | fq->meat -= free_it->len; | ||
| 538 | frag_kfree_skb(free_it); | ||
| 539 | } | ||
| 540 | } | ||
| 541 | |||
| 542 | NFCT_FRAG6_CB(skb)->offset = offset; | ||
| 543 | |||
| 544 | /* Insert this fragment in the chain of fragments. */ | ||
| 545 | skb->next = next; | ||
| 546 | if (prev) | ||
| 547 | prev->next = skb; | ||
| 548 | else | ||
| 549 | fq->fragments = skb; | ||
| 550 | |||
| 551 | skb->dev = NULL; | ||
| 552 | skb_get_timestamp(skb, &fq->stamp); | ||
| 553 | fq->meat += skb->len; | ||
| 554 | atomic_add(skb->truesize, &nf_ct_frag6_mem); | ||
| 555 | |||
| 556 | /* The first fragment. | ||
| 557 | * nhoffset is obtained from the first fragment, of course. | ||
| 558 | */ | ||
| 559 | if (offset == 0) { | ||
| 560 | fq->nhoffset = nhoff; | ||
| 561 | fq->last_in |= FIRST_IN; | ||
| 562 | } | ||
| 563 | write_lock(&nf_ct_frag6_lock); | ||
| 564 | list_move_tail(&fq->lru_list, &nf_ct_frag6_lru_list); | ||
| 565 | write_unlock(&nf_ct_frag6_lock); | ||
| 566 | return 0; | ||
| 567 | |||
| 568 | err: | ||
| 569 | return -1; | ||
| 570 | } | ||
| 571 | |||
| 572 | /* | ||
| 573 | * Check if this packet is complete. | ||
| 574 | * Returns NULL on failure by any reason, and pointer | ||
| 575 | * to current nexthdr field in reassembled frame. | ||
| 576 | * | ||
| 577 | * It is called with locked fq, and caller must check that | ||
| 578 | * queue is eligible for reassembly i.e. it is not COMPLETE, | ||
| 579 | * the last and the first frames arrived and all the bits are here. | ||
| 580 | */ | ||
| 581 | static struct sk_buff * | ||
| 582 | nf_ct_frag6_reasm(struct nf_ct_frag6_queue *fq, struct net_device *dev) | ||
| 583 | { | ||
| 584 | struct sk_buff *fp, *op, *head = fq->fragments; | ||
| 585 | int payload_len; | ||
| 586 | |||
| 587 | fq_kill(fq); | ||
| 588 | |||
| 589 | BUG_TRAP(head != NULL); | ||
| 590 | BUG_TRAP(NFCT_FRAG6_CB(head)->offset == 0); | ||
| 591 | |||
| 592 | /* Unfragmented part is taken from the first segment. */ | ||
| 593 | payload_len = (head->data - head->nh.raw) - sizeof(struct ipv6hdr) + fq->len - sizeof(struct frag_hdr); | ||
| 594 | if (payload_len > IPV6_MAXPLEN) { | ||
| 595 | DEBUGP("payload len is too large.\n"); | ||
| 596 | goto out_oversize; | ||
| 597 | } | ||
| 598 | |||
| 599 | /* Head of list must not be cloned. */ | ||
| 600 | if (skb_cloned(head) && pskb_expand_head(head, 0, 0, GFP_ATOMIC)) { | ||
| 601 | DEBUGP("skb is cloned but can't expand head"); | ||
| 602 | goto out_oom; | ||
| 603 | } | ||
| 604 | |||
| 605 | /* If the first fragment is fragmented itself, we split | ||
| 606 | * it to two chunks: the first with data and paged part | ||
| 607 | * and the second, holding only fragments. */ | ||
| 608 | if (skb_shinfo(head)->frag_list) { | ||
| 609 | struct sk_buff *clone; | ||
| 610 | int i, plen = 0; | ||
| 611 | |||
| 612 | if ((clone = alloc_skb(0, GFP_ATOMIC)) == NULL) { | ||
| 613 | DEBUGP("Can't alloc skb\n"); | ||
| 614 | goto out_oom; | ||
| 615 | } | ||
| 616 | clone->next = head->next; | ||
| 617 | head->next = clone; | ||
| 618 | skb_shinfo(clone)->frag_list = skb_shinfo(head)->frag_list; | ||
| 619 | skb_shinfo(head)->frag_list = NULL; | ||
| 620 | for (i=0; i<skb_shinfo(head)->nr_frags; i++) | ||
| 621 | plen += skb_shinfo(head)->frags[i].size; | ||
| 622 | clone->len = clone->data_len = head->data_len - plen; | ||
| 623 | head->data_len -= clone->len; | ||
| 624 | head->len -= clone->len; | ||
| 625 | clone->csum = 0; | ||
| 626 | clone->ip_summed = head->ip_summed; | ||
| 627 | |||
| 628 | NFCT_FRAG6_CB(clone)->orig = NULL; | ||
| 629 | atomic_add(clone->truesize, &nf_ct_frag6_mem); | ||
| 630 | } | ||
| 631 | |||
| 632 | /* We have to remove fragment header from datagram and to relocate | ||
| 633 | * header in order to calculate ICV correctly. */ | ||
| 634 | head->nh.raw[fq->nhoffset] = head->h.raw[0]; | ||
| 635 | memmove(head->head + sizeof(struct frag_hdr), head->head, | ||
| 636 | (head->data - head->head) - sizeof(struct frag_hdr)); | ||
| 637 | head->mac.raw += sizeof(struct frag_hdr); | ||
| 638 | head->nh.raw += sizeof(struct frag_hdr); | ||
| 639 | |||
| 640 | skb_shinfo(head)->frag_list = head->next; | ||
| 641 | head->h.raw = head->data; | ||
| 642 | skb_push(head, head->data - head->nh.raw); | ||
| 643 | atomic_sub(head->truesize, &nf_ct_frag6_mem); | ||
| 644 | |||
| 645 | for (fp=head->next; fp; fp = fp->next) { | ||
| 646 | head->data_len += fp->len; | ||
| 647 | head->len += fp->len; | ||
| 648 | if (head->ip_summed != fp->ip_summed) | ||
| 649 | head->ip_summed = CHECKSUM_NONE; | ||
| 650 | else if (head->ip_summed == CHECKSUM_HW) | ||
| 651 | head->csum = csum_add(head->csum, fp->csum); | ||
| 652 | head->truesize += fp->truesize; | ||
| 653 | atomic_sub(fp->truesize, &nf_ct_frag6_mem); | ||
| 654 | } | ||
| 655 | |||
| 656 | head->next = NULL; | ||
| 657 | head->dev = dev; | ||
| 658 | skb_set_timestamp(head, &fq->stamp); | ||
| 659 | head->nh.ipv6h->payload_len = htons(payload_len); | ||
| 660 | |||
| 661 | /* Yes, and fold redundant checksum back. 8) */ | ||
| 662 | if (head->ip_summed == CHECKSUM_HW) | ||
| 663 | head->csum = csum_partial(head->nh.raw, head->h.raw-head->nh.raw, head->csum); | ||
| 664 | |||
| 665 | fq->fragments = NULL; | ||
| 666 | |||
| 667 | /* all original skbs are linked into the NFCT_FRAG6_CB(head).orig */ | ||
| 668 | fp = skb_shinfo(head)->frag_list; | ||
| 669 | if (NFCT_FRAG6_CB(fp)->orig == NULL) | ||
| 670 | /* at above code, head skb is divided into two skbs. */ | ||
| 671 | fp = fp->next; | ||
| 672 | |||
| 673 | op = NFCT_FRAG6_CB(head)->orig; | ||
| 674 | for (; fp; fp = fp->next) { | ||
| 675 | struct sk_buff *orig = NFCT_FRAG6_CB(fp)->orig; | ||
| 676 | |||
| 677 | op->next = orig; | ||
| 678 | op = orig; | ||
| 679 | NFCT_FRAG6_CB(fp)->orig = NULL; | ||
| 680 | } | ||
| 681 | |||
| 682 | return head; | ||
| 683 | |||
| 684 | out_oversize: | ||
| 685 | if (net_ratelimit()) | ||
| 686 | printk(KERN_DEBUG "nf_ct_frag6_reasm: payload len = %d\n", payload_len); | ||
| 687 | goto out_fail; | ||
| 688 | out_oom: | ||
| 689 | if (net_ratelimit()) | ||
| 690 | printk(KERN_DEBUG "nf_ct_frag6_reasm: no memory for reassembly\n"); | ||
| 691 | out_fail: | ||
| 692 | return NULL; | ||
| 693 | } | ||
| 694 | |||
| 695 | /* | ||
| 696 | * find the header just before Fragment Header. | ||
| 697 | * | ||
| 698 | * if success return 0 and set ... | ||
| 699 | * (*prevhdrp): the value of "Next Header Field" in the header | ||
| 700 | * just before Fragment Header. | ||
| 701 | * (*prevhoff): the offset of "Next Header Field" in the header | ||
| 702 | * just before Fragment Header. | ||
| 703 | * (*fhoff) : the offset of Fragment Header. | ||
| 704 | * | ||
| 705 | * Based on ipv6_skip_hdr() in net/ipv6/exthdr.c | ||
| 706 | * | ||
| 707 | */ | ||
| 708 | static int | ||
| 709 | find_prev_fhdr(struct sk_buff *skb, u8 *prevhdrp, int *prevhoff, int *fhoff) | ||
| 710 | { | ||
| 711 | u8 nexthdr = skb->nh.ipv6h->nexthdr; | ||
| 712 | u8 prev_nhoff = (u8 *)&skb->nh.ipv6h->nexthdr - skb->data; | ||
| 713 | int start = (u8 *)(skb->nh.ipv6h+1) - skb->data; | ||
| 714 | int len = skb->len - start; | ||
| 715 | u8 prevhdr = NEXTHDR_IPV6; | ||
| 716 | |||
| 717 | while (nexthdr != NEXTHDR_FRAGMENT) { | ||
| 718 | struct ipv6_opt_hdr hdr; | ||
| 719 | int hdrlen; | ||
| 720 | |||
| 721 | if (!ipv6_ext_hdr(nexthdr)) { | ||
| 722 | return -1; | ||
| 723 | } | ||
| 724 | if (len < (int)sizeof(struct ipv6_opt_hdr)) { | ||
| 725 | DEBUGP("too short\n"); | ||
| 726 | return -1; | ||
| 727 | } | ||
| 728 | if (nexthdr == NEXTHDR_NONE) { | ||
| 729 | DEBUGP("next header is none\n"); | ||
| 730 | return -1; | ||
| 731 | } | ||
| 732 | if (skb_copy_bits(skb, start, &hdr, sizeof(hdr))) | ||
| 733 | BUG(); | ||
| 734 | if (nexthdr == NEXTHDR_AUTH) | ||
| 735 | hdrlen = (hdr.hdrlen+2)<<2; | ||
| 736 | else | ||
| 737 | hdrlen = ipv6_optlen(&hdr); | ||
| 738 | |||
| 739 | prevhdr = nexthdr; | ||
| 740 | prev_nhoff = start; | ||
| 741 | |||
| 742 | nexthdr = hdr.nexthdr; | ||
| 743 | len -= hdrlen; | ||
| 744 | start += hdrlen; | ||
| 745 | } | ||
| 746 | |||
| 747 | if (len < 0) | ||
| 748 | return -1; | ||
| 749 | |||
| 750 | *prevhdrp = prevhdr; | ||
| 751 | *prevhoff = prev_nhoff; | ||
| 752 | *fhoff = start; | ||
| 753 | |||
| 754 | return 0; | ||
| 755 | } | ||
| 756 | |||
| 757 | struct sk_buff *nf_ct_frag6_gather(struct sk_buff *skb) | ||
| 758 | { | ||
| 759 | struct sk_buff *clone; | ||
| 760 | struct net_device *dev = skb->dev; | ||
| 761 | struct frag_hdr *fhdr; | ||
| 762 | struct nf_ct_frag6_queue *fq; | ||
| 763 | struct ipv6hdr *hdr; | ||
| 764 | int fhoff, nhoff; | ||
| 765 | u8 prevhdr; | ||
| 766 | struct sk_buff *ret_skb = NULL; | ||
| 767 | |||
| 768 | /* Jumbo payload inhibits frag. header */ | ||
| 769 | if (skb->nh.ipv6h->payload_len == 0) { | ||
| 770 | DEBUGP("payload len = 0\n"); | ||
| 771 | return skb; | ||
| 772 | } | ||
| 773 | |||
| 774 | if (find_prev_fhdr(skb, &prevhdr, &nhoff, &fhoff) < 0) | ||
| 775 | return skb; | ||
| 776 | |||
| 777 | clone = skb_clone(skb, GFP_ATOMIC); | ||
| 778 | if (clone == NULL) { | ||
| 779 | DEBUGP("Can't clone skb\n"); | ||
| 780 | return skb; | ||
| 781 | } | ||
| 782 | |||
| 783 | NFCT_FRAG6_CB(clone)->orig = skb; | ||
| 784 | |||
| 785 | if (!pskb_may_pull(clone, fhoff + sizeof(*fhdr))) { | ||
| 786 | DEBUGP("message is too short.\n"); | ||
| 787 | goto ret_orig; | ||
| 788 | } | ||
| 789 | |||
| 790 | clone->h.raw = clone->data + fhoff; | ||
| 791 | hdr = clone->nh.ipv6h; | ||
| 792 | fhdr = (struct frag_hdr *)clone->h.raw; | ||
| 793 | |||
| 794 | if (!(fhdr->frag_off & htons(0xFFF9))) { | ||
| 795 | DEBUGP("Invalid fragment offset\n"); | ||
| 796 | /* It is not a fragmented frame */ | ||
| 797 | goto ret_orig; | ||
| 798 | } | ||
| 799 | |||
| 800 | if (atomic_read(&nf_ct_frag6_mem) > nf_ct_frag6_high_thresh) | ||
| 801 | nf_ct_frag6_evictor(); | ||
| 802 | |||
| 803 | fq = fq_find(fhdr->identification, &hdr->saddr, &hdr->daddr); | ||
| 804 | if (fq == NULL) { | ||
| 805 | DEBUGP("Can't find and can't create new queue\n"); | ||
| 806 | goto ret_orig; | ||
| 807 | } | ||
| 808 | |||
| 809 | spin_lock(&fq->lock); | ||
| 810 | |||
| 811 | if (nf_ct_frag6_queue(fq, clone, fhdr, nhoff) < 0) { | ||
| 812 | spin_unlock(&fq->lock); | ||
| 813 | DEBUGP("Can't insert skb to queue\n"); | ||
| 814 | fq_put(fq); | ||
| 815 | goto ret_orig; | ||
| 816 | } | ||
| 817 | |||
| 818 | if (fq->last_in == (FIRST_IN|LAST_IN) && fq->meat == fq->len) { | ||
| 819 | ret_skb = nf_ct_frag6_reasm(fq, dev); | ||
| 820 | if (ret_skb == NULL) | ||
| 821 | DEBUGP("Can't reassemble fragmented packets\n"); | ||
| 822 | } | ||
| 823 | spin_unlock(&fq->lock); | ||
| 824 | |||
| 825 | fq_put(fq); | ||
| 826 | return ret_skb; | ||
| 827 | |||
| 828 | ret_orig: | ||
| 829 | kfree_skb(clone); | ||
| 830 | return skb; | ||
| 831 | } | ||
| 832 | |||
| 833 | void nf_ct_frag6_output(unsigned int hooknum, struct sk_buff *skb, | ||
| 834 | struct net_device *in, struct net_device *out, | ||
| 835 | int (*okfn)(struct sk_buff *)) | ||
| 836 | { | ||
| 837 | struct sk_buff *s, *s2; | ||
| 838 | |||
| 839 | for (s = NFCT_FRAG6_CB(skb)->orig; s;) { | ||
| 840 | nf_conntrack_put_reasm(s->nfct_reasm); | ||
| 841 | nf_conntrack_get_reasm(skb); | ||
| 842 | s->nfct_reasm = skb; | ||
| 843 | |||
| 844 | s2 = s->next; | ||
| 845 | NF_HOOK_THRESH(PF_INET6, hooknum, s, in, out, okfn, | ||
| 846 | NF_IP6_PRI_CONNTRACK_DEFRAG + 1); | ||
| 847 | s = s2; | ||
| 848 | } | ||
| 849 | nf_conntrack_put_reasm(skb); | ||
| 850 | } | ||
| 851 | |||
| 852 | int nf_ct_frag6_kfree_frags(struct sk_buff *skb) | ||
| 853 | { | ||
| 854 | struct sk_buff *s, *s2; | ||
| 855 | |||
| 856 | for (s = NFCT_FRAG6_CB(skb)->orig; s; s = s2) { | ||
| 857 | |||
| 858 | s2 = s->next; | ||
| 859 | kfree_skb(s); | ||
| 860 | } | ||
| 861 | |||
| 862 | kfree_skb(skb); | ||
| 863 | |||
| 864 | return 0; | ||
| 865 | } | ||
| 866 | |||
| 867 | int nf_ct_frag6_init(void) | ||
| 868 | { | ||
| 869 | nf_ct_frag6_hash_rnd = (u32) ((num_physpages ^ (num_physpages>>7)) ^ | ||
| 870 | (jiffies ^ (jiffies >> 6))); | ||
| 871 | |||
| 872 | init_timer(&nf_ct_frag6_secret_timer); | ||
| 873 | nf_ct_frag6_secret_timer.function = nf_ct_frag6_secret_rebuild; | ||
| 874 | nf_ct_frag6_secret_timer.expires = jiffies | ||
| 875 | + nf_ct_frag6_secret_interval; | ||
| 876 | add_timer(&nf_ct_frag6_secret_timer); | ||
| 877 | |||
| 878 | return 0; | ||
| 879 | } | ||
| 880 | |||
| 881 | void nf_ct_frag6_cleanup(void) | ||
| 882 | { | ||
| 883 | del_timer(&nf_ct_frag6_secret_timer); | ||
| 884 | nf_ct_frag6_evictor(); | ||
| 885 | } | ||
diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c index a1265a320b11..651c79b41eeb 100644 --- a/net/ipv6/raw.c +++ b/net/ipv6/raw.c  | |||
| @@ -174,8 +174,10 @@ int ipv6_raw_deliver(struct sk_buff *skb, int nexthdr) | |||
| 174 | struct sk_buff *clone = skb_clone(skb, GFP_ATOMIC); | 174 | struct sk_buff *clone = skb_clone(skb, GFP_ATOMIC); | 
| 175 | 175 | ||
| 176 | /* Not releasing hash table! */ | 176 | /* Not releasing hash table! */ | 
| 177 | if (clone) | 177 | if (clone) { | 
| 178 | nf_reset(clone); | ||
| 178 | rawv6_rcv(sk, clone); | 179 | rawv6_rcv(sk, clone); | 
| 180 | } | ||
| 179 | } | 181 | } | 
| 180 | sk = __raw_v6_lookup(sk_next(sk), nexthdr, daddr, saddr, | 182 | sk = __raw_v6_lookup(sk_next(sk), nexthdr, daddr, saddr, | 
| 181 | IP6CB(skb)->iif); | 183 | IP6CB(skb)->iif); | 
diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 227e99ed510c..f7f42c3e96cb 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c  | |||
| @@ -1710,7 +1710,7 @@ static void fib6_dump_end(struct netlink_callback *cb) | |||
| 1710 | static int fib6_dump_done(struct netlink_callback *cb) | 1710 | static int fib6_dump_done(struct netlink_callback *cb) | 
| 1711 | { | 1711 | { | 
| 1712 | fib6_dump_end(cb); | 1712 | fib6_dump_end(cb); | 
| 1713 | return cb->done(cb); | 1713 | return cb->done ? cb->done(cb) : 0; | 
| 1714 | } | 1714 | } | 
| 1715 | 1715 | ||
| 1716 | int inet6_dump_fib(struct sk_buff *skb, struct netlink_callback *cb) | 1716 | int inet6_dump_fib(struct sk_buff *skb, struct netlink_callback *cb) | 
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index d693cb988b78..d746d3b27efb 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c  | |||
| @@ -114,16 +114,9 @@ static int tcp_v6_get_port(struct sock *sk, unsigned short snum) | |||
| 114 | int low = sysctl_local_port_range[0]; | 114 | int low = sysctl_local_port_range[0]; | 
| 115 | int high = sysctl_local_port_range[1]; | 115 | int high = sysctl_local_port_range[1]; | 
| 116 | int remaining = (high - low) + 1; | 116 | int remaining = (high - low) + 1; | 
| 117 | int rover; | 117 | int rover = net_random() % (high - low) + low; | 
| 118 | 118 | ||
| 119 | spin_lock(&tcp_hashinfo.portalloc_lock); | 119 | do { | 
| 120 | if (tcp_hashinfo.port_rover < low) | ||
| 121 | rover = low; | ||
| 122 | else | ||
| 123 | rover = tcp_hashinfo.port_rover; | ||
| 124 | do { rover++; | ||
| 125 | if (rover > high) | ||
| 126 | rover = low; | ||
| 127 | head = &tcp_hashinfo.bhash[inet_bhashfn(rover, tcp_hashinfo.bhash_size)]; | 120 | head = &tcp_hashinfo.bhash[inet_bhashfn(rover, tcp_hashinfo.bhash_size)]; | 
| 128 | spin_lock(&head->lock); | 121 | spin_lock(&head->lock); | 
| 129 | inet_bind_bucket_for_each(tb, node, &head->chain) | 122 | inet_bind_bucket_for_each(tb, node, &head->chain) | 
| @@ -132,9 +125,9 @@ static int tcp_v6_get_port(struct sock *sk, unsigned short snum) | |||
| 132 | break; | 125 | break; | 
| 133 | next: | 126 | next: | 
| 134 | spin_unlock(&head->lock); | 127 | spin_unlock(&head->lock); | 
| 128 | if (++rover > high) | ||
| 129 | rover = low; | ||
| 135 | } while (--remaining > 0); | 130 | } while (--remaining > 0); | 
| 136 | tcp_hashinfo.port_rover = rover; | ||
| 137 | spin_unlock(&tcp_hashinfo.portalloc_lock); | ||
| 138 | 131 | ||
| 139 | /* Exhausted local port range during search? It is not | 132 | /* Exhausted local port range during search? It is not | 
| 140 | * possible for us to be holding one of the bind hash | 133 | * possible for us to be holding one of the bind hash | 
diff --git a/net/irda/discovery.c b/net/irda/discovery.c index c4ba5fa1446a..3fefc822c1c0 100644 --- a/net/irda/discovery.c +++ b/net/irda/discovery.c  | |||
| @@ -194,8 +194,7 @@ void irlmp_expire_discoveries(hashbin_t *log, __u32 saddr, int force) | |||
| 194 | 194 | ||
| 195 | /* Remove it from the log */ | 195 | /* Remove it from the log */ | 
| 196 | curr = hashbin_remove_this(log, (irda_queue_t *) curr); | 196 | curr = hashbin_remove_this(log, (irda_queue_t *) curr); | 
| 197 | if (curr) | 197 | kfree(curr); | 
| 198 | kfree(curr); | ||
| 199 | } | 198 | } | 
| 200 | } | 199 | } | 
| 201 | 200 | ||
diff --git a/net/irda/irias_object.c b/net/irda/irias_object.c index 6fec428b4512..75f2666e8630 100644 --- a/net/irda/irias_object.c +++ b/net/irda/irias_object.c  | |||
| @@ -122,8 +122,7 @@ static void __irias_delete_attrib(struct ias_attrib *attrib) | |||
| 122 | IRDA_ASSERT(attrib != NULL, return;); | 122 | IRDA_ASSERT(attrib != NULL, return;); | 
| 123 | IRDA_ASSERT(attrib->magic == IAS_ATTRIB_MAGIC, return;); | 123 | IRDA_ASSERT(attrib->magic == IAS_ATTRIB_MAGIC, return;); | 
| 124 | 124 | ||
| 125 | if (attrib->name) | 125 | kfree(attrib->name); | 
| 126 | kfree(attrib->name); | ||
| 127 | 126 | ||
| 128 | irias_delete_value(attrib->value); | 127 | irias_delete_value(attrib->value); | 
| 129 | attrib->magic = ~IAS_ATTRIB_MAGIC; | 128 | attrib->magic = ~IAS_ATTRIB_MAGIC; | 
| @@ -136,8 +135,7 @@ void __irias_delete_object(struct ias_object *obj) | |||
| 136 | IRDA_ASSERT(obj != NULL, return;); | 135 | IRDA_ASSERT(obj != NULL, return;); | 
| 137 | IRDA_ASSERT(obj->magic == IAS_OBJECT_MAGIC, return;); | 136 | IRDA_ASSERT(obj->magic == IAS_OBJECT_MAGIC, return;); | 
| 138 | 137 | ||
| 139 | if (obj->name) | 138 | kfree(obj->name); | 
| 140 | kfree(obj->name); | ||
| 141 | 139 | ||
| 142 | hashbin_delete(obj->attribs, (FREE_FUNC) __irias_delete_attrib); | 140 | hashbin_delete(obj->attribs, (FREE_FUNC) __irias_delete_attrib); | 
| 143 | 141 | ||
| @@ -562,14 +560,12 @@ void irias_delete_value(struct ias_value *value) | |||
| 562 | /* No need to deallocate */ | 560 | /* No need to deallocate */ | 
| 563 | break; | 561 | break; | 
| 564 | case IAS_STRING: | 562 | case IAS_STRING: | 
| 565 | /* If string, deallocate string */ | 563 | /* Deallocate string */ | 
| 566 | if (value->t.string != NULL) | 564 | kfree(value->t.string); | 
| 567 | kfree(value->t.string); | ||
| 568 | break; | 565 | break; | 
| 569 | case IAS_OCT_SEQ: | 566 | case IAS_OCT_SEQ: | 
| 570 | /* If byte stream, deallocate byte stream */ | 567 | /* Deallocate byte stream */ | 
| 571 | if (value->t.oct_seq != NULL) | 568 | kfree(value->t.oct_seq); | 
| 572 | kfree(value->t.oct_seq); | ||
| 573 | break; | 569 | break; | 
| 574 | default: | 570 | default: | 
| 575 | IRDA_DEBUG(0, "%s(), Unknown value type!\n", __FUNCTION__); | 571 | IRDA_DEBUG(0, "%s(), Unknown value type!\n", __FUNCTION__); | 
diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig index 8296b38bf270..a84f9221e5f0 100644 --- a/net/netfilter/Kconfig +++ b/net/netfilter/Kconfig  | |||
| @@ -1,3 +1,6 @@ | |||
| 1 | menu "Core Netfilter Configuration" | ||
| 2 | depends on NET && NETFILTER | ||
| 3 | |||
| 1 | config NETFILTER_NETLINK | 4 | config NETFILTER_NETLINK | 
| 2 | tristate "Netfilter netlink interface" | 5 | tristate "Netfilter netlink interface" | 
| 3 | help | 6 | help | 
| @@ -22,3 +25,74 @@ config NETFILTER_NETLINK_LOG | |||
| 22 | and is also scheduled to replace the old syslog-based ipt_LOG | 25 | and is also scheduled to replace the old syslog-based ipt_LOG | 
| 23 | and ip6t_LOG modules. | 26 | and ip6t_LOG modules. | 
| 24 | 27 | ||
| 28 | config NF_CONNTRACK | ||
| 29 | tristate "Layer 3 Independent Connection tracking (EXPERIMENTAL)" | ||
| 30 | depends on EXPERIMENTAL && IP_NF_CONNTRACK=n | ||
| 31 | default n | ||
| 32 | ---help--- | ||
| 33 | Connection tracking keeps a record of what packets have passed | ||
| 34 | through your machine, in order to figure out how they are related | ||
| 35 | into connections. | ||
| 36 | |||
| 37 | Layer 3 independent connection tracking is experimental scheme | ||
| 38 | which generalize ip_conntrack to support other layer 3 protocols. | ||
| 39 | |||
| 40 | To compile it as a module, choose M here. If unsure, say N. | ||
| 41 | |||
| 42 | config NF_CT_ACCT | ||
| 43 | bool "Connection tracking flow accounting" | ||
| 44 | depends on NF_CONNTRACK | ||
| 45 | help | ||
| 46 | If this option is enabled, the connection tracking code will | ||
| 47 | keep per-flow packet and byte counters. | ||
| 48 | |||
| 49 | Those counters can be used for flow-based accounting or the | ||
| 50 | `connbytes' match. | ||
| 51 | |||
| 52 | If unsure, say `N'. | ||
| 53 | |||
| 54 | config NF_CONNTRACK_MARK | ||
| 55 | bool 'Connection mark tracking support' | ||
| 56 | depends on NF_CONNTRACK | ||
| 57 | help | ||
| 58 | This option enables support for connection marks, used by the | ||
| 59 | `CONNMARK' target and `connmark' match. Similar to the mark value | ||
| 60 | of packets, but this mark value is kept in the conntrack session | ||
| 61 | instead of the individual packets. | ||
| 62 | |||
| 63 | config NF_CONNTRACK_EVENTS | ||
| 64 | bool "Connection tracking events" | ||
| 65 | depends on NF_CONNTRACK | ||
| 66 | help | ||
| 67 | If this option is enabled, the connection tracking code will | ||
| 68 | provide a notifier chain that can be used by other kernel code | ||
| 69 | to get notified aboutchanges in the connection tracking state. | ||
| 70 | |||
| 71 | If unsure, say `N'. | ||
| 72 | |||
| 73 | config NF_CT_PROTO_SCTP | ||
| 74 | tristate 'SCTP protocol on new connection tracking support (EXPERIMENTAL)' | ||
| 75 | depends on EXPERIMENTAL && NF_CONNTRACK | ||
| 76 | default n | ||
| 77 | help | ||
| 78 | With this option enabled, the layer 3 independent connection | ||
| 79 | tracking code will be able to do state tracking on SCTP connections. | ||
| 80 | |||
| 81 | If you want to compile it as a module, say M here and read | ||
| 82 | Documentation/modules.txt. If unsure, say `N'. | ||
| 83 | |||
| 84 | config NF_CONNTRACK_FTP | ||
| 85 | tristate "FTP support on new connection tracking (EXPERIMENTAL)" | ||
| 86 | depends on EXPERIMENTAL && NF_CONNTRACK | ||
| 87 | help | ||
| 88 | Tracking FTP connections is problematic: special helpers are | ||
| 89 | required for tracking them, and doing masquerading and other forms | ||
| 90 | of Network Address Translation on them. | ||
| 91 | |||
| 92 | This is FTP support on Layer 3 independent connection tracking. | ||
| 93 | Layer 3 independent connection tracking is experimental scheme | ||
| 94 | which generalize ip_conntrack to support other layer 3 protocols. | ||
| 95 | |||
| 96 | To compile it as a module, choose M here. If unsure, say N. | ||
| 97 | |||
| 98 | endmenu | ||
diff --git a/net/netfilter/Makefile b/net/netfilter/Makefile index b3b44f8b415a..55f019ad2c08 100644 --- a/net/netfilter/Makefile +++ b/net/netfilter/Makefile  | |||
| @@ -5,3 +5,11 @@ obj-$(CONFIG_NETFILTER) = netfilter.o | |||
| 5 | obj-$(CONFIG_NETFILTER_NETLINK) += nfnetlink.o | 5 | obj-$(CONFIG_NETFILTER_NETLINK) += nfnetlink.o | 
| 6 | obj-$(CONFIG_NETFILTER_NETLINK_QUEUE) += nfnetlink_queue.o | 6 | obj-$(CONFIG_NETFILTER_NETLINK_QUEUE) += nfnetlink_queue.o | 
| 7 | obj-$(CONFIG_NETFILTER_NETLINK_LOG) += nfnetlink_log.o | 7 | obj-$(CONFIG_NETFILTER_NETLINK_LOG) += nfnetlink_log.o | 
| 8 | |||
| 9 | nf_conntrack-objs := nf_conntrack_core.o nf_conntrack_standalone.o nf_conntrack_l3proto_generic.o nf_conntrack_proto_generic.o nf_conntrack_proto_tcp.o nf_conntrack_proto_udp.o | ||
| 10 | |||
| 11 | obj-$(CONFIG_NF_CONNTRACK) += nf_conntrack.o | ||
| 12 | obj-$(CONFIG_NF_CONNTRACK_FTP) += nf_conntrack_ftp.o | ||
| 13 | |||
| 14 | # SCTP protocol connection tracking | ||
| 15 | obj-$(CONFIG_NF_CT_PROTO_SCTP) += nf_conntrack_proto_sctp.o | ||
diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c new file mode 100644 index 000000000000..9a67c796b385 --- /dev/null +++ b/net/netfilter/nf_conntrack_core.c  | |||
| @@ -0,0 +1,1538 @@ | |||
| 1 | /* Connection state tracking for netfilter. This is separated from, | ||
| 2 | but required by, the NAT layer; it can also be used by an iptables | ||
| 3 | extension. */ | ||
| 4 | |||
| 5 | /* (C) 1999-2001 Paul `Rusty' Russell | ||
| 6 | * (C) 2002-2005 Netfilter Core Team <coreteam@netfilter.org> | ||
| 7 | * (C) 2003,2004 USAGI/WIDE Project <http://www.linux-ipv6.org> | ||
| 8 | * | ||
| 9 | * This program is free software; you can redistribute it and/or modify | ||
| 10 | * it under the terms of the GNU General Public License version 2 as | ||
| 11 | * published by the Free Software Foundation. | ||
| 12 | * | ||
| 13 | * 23 Apr 2001: Harald Welte <laforge@gnumonks.org> | ||
| 14 | * - new API and handling of conntrack/nat helpers | ||
| 15 | * - now capable of multiple expectations for one master | ||
| 16 | * 16 Jul 2002: Harald Welte <laforge@gnumonks.org> | ||
| 17 | * - add usage/reference counts to ip_conntrack_expect | ||
| 18 | * - export ip_conntrack[_expect]_{find_get,put} functions | ||
| 19 | * 16 Dec 2003: Yasuyuki Kozakai @USAGI <yasuyuki.kozakai@toshiba.co.jp> | ||
| 20 | * - generalize L3 protocol denendent part. | ||
| 21 | * 23 Mar 2004: Yasuyuki Kozakai @USAGI <yasuyuki.kozakai@toshiba.co.jp> | ||
| 22 | * - add support various size of conntrack structures. | ||
| 23 | * | ||
| 24 | * Derived from net/ipv4/netfilter/ip_conntrack_core.c | ||
| 25 | */ | ||
| 26 | |||
| 27 | #include <linux/config.h> | ||
| 28 | #include <linux/types.h> | ||
| 29 | #include <linux/netfilter.h> | ||
| 30 | #include <linux/module.h> | ||
| 31 | #include <linux/skbuff.h> | ||
| 32 | #include <linux/proc_fs.h> | ||
| 33 | #include <linux/vmalloc.h> | ||
| 34 | #include <linux/stddef.h> | ||
| 35 | #include <linux/slab.h> | ||
| 36 | #include <linux/random.h> | ||
| 37 | #include <linux/jhash.h> | ||
| 38 | #include <linux/err.h> | ||
| 39 | #include <linux/percpu.h> | ||
| 40 | #include <linux/moduleparam.h> | ||
| 41 | #include <linux/notifier.h> | ||
| 42 | #include <linux/kernel.h> | ||
| 43 | #include <linux/netdevice.h> | ||
| 44 | #include <linux/socket.h> | ||
| 45 | |||
| 46 | /* This rwlock protects the main hash table, protocol/helper/expected | ||
| 47 | registrations, conntrack timers*/ | ||
| 48 | #define ASSERT_READ_LOCK(x) | ||
| 49 | #define ASSERT_WRITE_LOCK(x) | ||
| 50 | |||
| 51 | #include <net/netfilter/nf_conntrack.h> | ||
| 52 | #include <net/netfilter/nf_conntrack_l3proto.h> | ||
| 53 | #include <net/netfilter/nf_conntrack_protocol.h> | ||
| 54 | #include <net/netfilter/nf_conntrack_helper.h> | ||
| 55 | #include <net/netfilter/nf_conntrack_core.h> | ||
| 56 | #include <linux/netfilter_ipv4/listhelp.h> | ||
| 57 | |||
| 58 | #define NF_CONNTRACK_VERSION "0.4.1" | ||
| 59 | |||
| 60 | #if 0 | ||
| 61 | #define DEBUGP printk | ||
| 62 | #else | ||
| 63 | #define DEBUGP(format, args...) | ||
| 64 | #endif | ||
| 65 | |||
| 66 | DEFINE_RWLOCK(nf_conntrack_lock); | ||
| 67 | |||
| 68 | /* nf_conntrack_standalone needs this */ | ||
| 69 | atomic_t nf_conntrack_count = ATOMIC_INIT(0); | ||
| 70 | |||
| 71 | void (*nf_conntrack_destroyed)(struct nf_conn *conntrack) = NULL; | ||
| 72 | LIST_HEAD(nf_conntrack_expect_list); | ||
| 73 | struct nf_conntrack_protocol **nf_ct_protos[PF_MAX]; | ||
| 74 | struct nf_conntrack_l3proto *nf_ct_l3protos[PF_MAX]; | ||
| 75 | static LIST_HEAD(helpers); | ||
| 76 | unsigned int nf_conntrack_htable_size = 0; | ||
| 77 | int nf_conntrack_max; | ||
| 78 | struct list_head *nf_conntrack_hash; | ||
| 79 | static kmem_cache_t *nf_conntrack_expect_cachep; | ||
| 80 | struct nf_conn nf_conntrack_untracked; | ||
| 81 | unsigned int nf_ct_log_invalid; | ||
| 82 | static LIST_HEAD(unconfirmed); | ||
| 83 | static int nf_conntrack_vmalloc; | ||
| 84 | |||
| 85 | #ifdef CONFIG_NF_CONNTRACK_EVENTS | ||
| 86 | struct notifier_block *nf_conntrack_chain; | ||
| 87 | struct notifier_block *nf_conntrack_expect_chain; | ||
| 88 | |||
| 89 | DEFINE_PER_CPU(struct nf_conntrack_ecache, nf_conntrack_ecache); | ||
| 90 | |||
| 91 | /* deliver cached events and clear cache entry - must be called with locally | ||
| 92 | * disabled softirqs */ | ||
| 93 | static inline void | ||
| 94 | __nf_ct_deliver_cached_events(struct nf_conntrack_ecache *ecache) | ||
| 95 | { | ||
| 96 | DEBUGP("ecache: delivering events for %p\n", ecache->ct); | ||
| 97 | if (nf_ct_is_confirmed(ecache->ct) && !nf_ct_is_dying(ecache->ct) | ||
| 98 | && ecache->events) | ||
| 99 | notifier_call_chain(&nf_conntrack_chain, ecache->events, | ||
| 100 | ecache->ct); | ||
| 101 | |||
| 102 | ecache->events = 0; | ||
| 103 | nf_ct_put(ecache->ct); | ||
| 104 | ecache->ct = NULL; | ||
| 105 | } | ||
| 106 | |||
| 107 | /* Deliver all cached events for a particular conntrack. This is called | ||
| 108 | * by code prior to async packet handling for freeing the skb */ | ||
| 109 | void nf_ct_deliver_cached_events(const struct nf_conn *ct) | ||
| 110 | { | ||
| 111 | struct nf_conntrack_ecache *ecache; | ||
| 112 | |||
| 113 | local_bh_disable(); | ||
| 114 | ecache = &__get_cpu_var(nf_conntrack_ecache); | ||
| 115 | if (ecache->ct == ct) | ||
| 116 | __nf_ct_deliver_cached_events(ecache); | ||
| 117 | local_bh_enable(); | ||
| 118 | } | ||
| 119 | |||
| 120 | /* Deliver cached events for old pending events, if current conntrack != old */ | ||
| 121 | void __nf_ct_event_cache_init(struct nf_conn *ct) | ||
| 122 | { | ||
| 123 | struct nf_conntrack_ecache *ecache; | ||
| 124 | |||
| 125 | /* take care of delivering potentially old events */ | ||
| 126 | ecache = &__get_cpu_var(nf_conntrack_ecache); | ||
| 127 | BUG_ON(ecache->ct == ct); | ||
| 128 | if (ecache->ct) | ||
| 129 | __nf_ct_deliver_cached_events(ecache); | ||
| 130 | /* initialize for this conntrack/packet */ | ||
| 131 | ecache->ct = ct; | ||
| 132 | nf_conntrack_get(&ct->ct_general); | ||
| 133 | } | ||
| 134 | |||
| 135 | /* flush the event cache - touches other CPU's data and must not be called | ||
| 136 | * while packets are still passing through the code */ | ||
| 137 | static void nf_ct_event_cache_flush(void) | ||
| 138 | { | ||
| 139 | struct nf_conntrack_ecache *ecache; | ||
| 140 | int cpu; | ||
| 141 | |||
| 142 | for_each_cpu(cpu) { | ||
| 143 | ecache = &per_cpu(nf_conntrack_ecache, cpu); | ||
| 144 | if (ecache->ct) | ||
| 145 | nf_ct_put(ecache->ct); | ||
| 146 | } | ||
| 147 | } | ||
| 148 | #else | ||
| 149 | static inline void nf_ct_event_cache_flush(void) {} | ||
| 150 | #endif /* CONFIG_NF_CONNTRACK_EVENTS */ | ||
| 151 | |||
| 152 | DEFINE_PER_CPU(struct ip_conntrack_stat, nf_conntrack_stat); | ||
| 153 | EXPORT_PER_CPU_SYMBOL(nf_conntrack_stat); | ||
| 154 | |||
| 155 | /* | ||
| 156 | * This scheme offers various size of "struct nf_conn" dependent on | ||
| 157 | * features(helper, nat, ...) | ||
| 158 | */ | ||
| 159 | |||
| 160 | #define NF_CT_FEATURES_NAMELEN 256 | ||
| 161 | static struct { | ||
| 162 | /* name of slab cache. printed in /proc/slabinfo */ | ||
| 163 | char *name; | ||
| 164 | |||
| 165 | /* size of slab cache */ | ||
| 166 | size_t size; | ||
| 167 | |||
| 168 | /* slab cache pointer */ | ||
| 169 | kmem_cache_t *cachep; | ||
| 170 | |||
| 171 | /* allocated slab cache + modules which uses this slab cache */ | ||
| 172 | int use; | ||
| 173 | |||
| 174 | /* Initialization */ | ||
| 175 | int (*init_conntrack)(struct nf_conn *, u_int32_t); | ||
| 176 | |||
| 177 | } nf_ct_cache[NF_CT_F_NUM]; | ||
| 178 | |||
| 179 | /* protect members of nf_ct_cache except of "use" */ | ||
| 180 | DEFINE_RWLOCK(nf_ct_cache_lock); | ||
| 181 | |||
| 182 | /* This avoids calling kmem_cache_create() with same name simultaneously */ | ||
| 183 | DECLARE_MUTEX(nf_ct_cache_mutex); | ||
| 184 | |||
| 185 | extern struct nf_conntrack_protocol nf_conntrack_generic_protocol; | ||
| 186 | struct nf_conntrack_protocol * | ||
| 187 | nf_ct_find_proto(u_int16_t l3proto, u_int8_t protocol) | ||
| 188 | { | ||
| 189 | if (unlikely(nf_ct_protos[l3proto] == NULL)) | ||
| 190 | return &nf_conntrack_generic_protocol; | ||
| 191 | |||
| 192 | return nf_ct_protos[l3proto][protocol]; | ||
| 193 | } | ||
| 194 | |||
| 195 | static int nf_conntrack_hash_rnd_initted; | ||
| 196 | static unsigned int nf_conntrack_hash_rnd; | ||
| 197 | |||
| 198 | static u_int32_t __hash_conntrack(const struct nf_conntrack_tuple *tuple, | ||
| 199 | unsigned int size, unsigned int rnd) | ||
| 200 | { | ||
| 201 | unsigned int a, b; | ||
| 202 | a = jhash((void *)tuple->src.u3.all, sizeof(tuple->src.u3.all), | ||
| 203 | ((tuple->src.l3num) << 16) | tuple->dst.protonum); | ||
| 204 | b = jhash((void *)tuple->dst.u3.all, sizeof(tuple->dst.u3.all), | ||
| 205 | (tuple->src.u.all << 16) | tuple->dst.u.all); | ||
| 206 | |||
| 207 | return jhash_2words(a, b, rnd) % size; | ||
| 208 | } | ||
| 209 | |||
| 210 | static inline u_int32_t hash_conntrack(const struct nf_conntrack_tuple *tuple) | ||
| 211 | { | ||
| 212 | return __hash_conntrack(tuple, nf_conntrack_htable_size, | ||
| 213 | nf_conntrack_hash_rnd); | ||
| 214 | } | ||
| 215 | |||
| 216 | /* Initialize "struct nf_conn" which has spaces for helper */ | ||
| 217 | static int | ||
| 218 | init_conntrack_for_helper(struct nf_conn *conntrack, u_int32_t features) | ||
| 219 | { | ||
| 220 | |||
| 221 | conntrack->help = (union nf_conntrack_help *) | ||
| 222 | (((unsigned long)conntrack->data | ||
| 223 | + (__alignof__(union nf_conntrack_help) - 1)) | ||
| 224 | & (~((unsigned long)(__alignof__(union nf_conntrack_help) -1)))); | ||
| 225 | return 0; | ||
| 226 | } | ||
| 227 | |||
| 228 | int nf_conntrack_register_cache(u_int32_t features, const char *name, | ||
| 229 | size_t size, | ||
| 230 | int (*init)(struct nf_conn *, u_int32_t)) | ||
| 231 | { | ||
| 232 | int ret = 0; | ||
| 233 | char *cache_name; | ||
| 234 | kmem_cache_t *cachep; | ||
| 235 | |||
| 236 | DEBUGP("nf_conntrack_register_cache: features=0x%x, name=%s, size=%d\n", | ||
| 237 | features, name, size); | ||
| 238 | |||
| 239 | if (features < NF_CT_F_BASIC || features >= NF_CT_F_NUM) { | ||
| 240 | DEBUGP("nf_conntrack_register_cache: invalid features.: 0x%x\n", | ||
| 241 | features); | ||
| 242 | return -EINVAL; | ||
| 243 | } | ||
| 244 | |||
| 245 | down(&nf_ct_cache_mutex); | ||
| 246 | |||
| 247 | write_lock_bh(&nf_ct_cache_lock); | ||
| 248 | /* e.g: multiple helpers are loaded */ | ||
| 249 | if (nf_ct_cache[features].use > 0) { | ||
| 250 | DEBUGP("nf_conntrack_register_cache: already resisterd.\n"); | ||
| 251 | if ((!strncmp(nf_ct_cache[features].name, name, | ||
| 252 | NF_CT_FEATURES_NAMELEN)) | ||
| 253 | && nf_ct_cache[features].size == size | ||
| 254 | && nf_ct_cache[features].init_conntrack == init) { | ||
| 255 | DEBUGP("nf_conntrack_register_cache: reusing.\n"); | ||
| 256 | nf_ct_cache[features].use++; | ||
| 257 | ret = 0; | ||
| 258 | } else | ||
| 259 | ret = -EBUSY; | ||
| 260 | |||
| 261 | write_unlock_bh(&nf_ct_cache_lock); | ||
| 262 | up(&nf_ct_cache_mutex); | ||
| 263 | return ret; | ||
| 264 | } | ||
| 265 | write_unlock_bh(&nf_ct_cache_lock); | ||
| 266 | |||
| 267 | /* | ||
| 268 | * The memory space for name of slab cache must be alive until | ||
| 269 | * cache is destroyed. | ||
| 270 | */ | ||
| 271 | cache_name = kmalloc(sizeof(char)*NF_CT_FEATURES_NAMELEN, GFP_ATOMIC); | ||
| 272 | if (cache_name == NULL) { | ||
| 273 | DEBUGP("nf_conntrack_register_cache: can't alloc cache_name\n"); | ||
| 274 | ret = -ENOMEM; | ||
| 275 | goto out_up_mutex; | ||
| 276 | } | ||
| 277 | |||
| 278 | if (strlcpy(cache_name, name, NF_CT_FEATURES_NAMELEN) | ||
| 279 | >= NF_CT_FEATURES_NAMELEN) { | ||
| 280 | printk("nf_conntrack_register_cache: name too long\n"); | ||
| 281 | ret = -EINVAL; | ||
| 282 | goto out_free_name; | ||
| 283 | } | ||
| 284 | |||
| 285 | cachep = kmem_cache_create(cache_name, size, 0, 0, | ||
| 286 | NULL, NULL); | ||
| 287 | if (!cachep) { | ||
| 288 | printk("nf_conntrack_register_cache: Can't create slab cache " | ||
| 289 | "for the features = 0x%x\n", features); | ||
| 290 | ret = -ENOMEM; | ||
| 291 | goto out_free_name; | ||
| 292 | } | ||
| 293 | |||
| 294 | write_lock_bh(&nf_ct_cache_lock); | ||
| 295 | nf_ct_cache[features].use = 1; | ||
| 296 | nf_ct_cache[features].size = size; | ||
| 297 | nf_ct_cache[features].init_conntrack = init; | ||
| 298 | nf_ct_cache[features].cachep = cachep; | ||
| 299 | nf_ct_cache[features].name = cache_name; | ||
| 300 | write_unlock_bh(&nf_ct_cache_lock); | ||
| 301 | |||
| 302 | goto out_up_mutex; | ||
| 303 | |||
| 304 | out_free_name: | ||
| 305 | kfree(cache_name); | ||
| 306 | out_up_mutex: | ||
| 307 | up(&nf_ct_cache_mutex); | ||
| 308 | return ret; | ||
| 309 | } | ||
| 310 | |||
| 311 | /* FIXME: In the current, only nf_conntrack_cleanup() can call this function. */ | ||
| 312 | void nf_conntrack_unregister_cache(u_int32_t features) | ||
| 313 | { | ||
| 314 | kmem_cache_t *cachep; | ||
| 315 | char *name; | ||
| 316 | |||
| 317 | /* | ||
| 318 | * This assures that kmem_cache_create() isn't called before destroying | ||
| 319 | * slab cache. | ||
| 320 | */ | ||
| 321 | DEBUGP("nf_conntrack_unregister_cache: 0x%04x\n", features); | ||
| 322 | down(&nf_ct_cache_mutex); | ||
| 323 | |||
| 324 | write_lock_bh(&nf_ct_cache_lock); | ||
| 325 | if (--nf_ct_cache[features].use > 0) { | ||
| 326 | write_unlock_bh(&nf_ct_cache_lock); | ||
| 327 | up(&nf_ct_cache_mutex); | ||
| 328 | return; | ||
| 329 | } | ||
| 330 | cachep = nf_ct_cache[features].cachep; | ||
| 331 | name = nf_ct_cache[features].name; | ||
| 332 | nf_ct_cache[features].cachep = NULL; | ||
| 333 | nf_ct_cache[features].name = NULL; | ||
| 334 | nf_ct_cache[features].init_conntrack = NULL; | ||
| 335 | nf_ct_cache[features].size = 0; | ||
| 336 | write_unlock_bh(&nf_ct_cache_lock); | ||
| 337 | |||
| 338 | synchronize_net(); | ||
| 339 | |||
| 340 | kmem_cache_destroy(cachep); | ||
| 341 | kfree(name); | ||
| 342 | |||
| 343 | up(&nf_ct_cache_mutex); | ||
| 344 | } | ||
| 345 | |||
| 346 | int | ||
| 347 | nf_ct_get_tuple(const struct sk_buff *skb, | ||
| 348 | unsigned int nhoff, | ||
| 349 | unsigned int dataoff, | ||
| 350 | u_int16_t l3num, | ||
| 351 | u_int8_t protonum, | ||
| 352 | struct nf_conntrack_tuple *tuple, | ||
| 353 | const struct nf_conntrack_l3proto *l3proto, | ||
| 354 | const struct nf_conntrack_protocol *protocol) | ||
| 355 | { | ||
| 356 | NF_CT_TUPLE_U_BLANK(tuple); | ||
| 357 | |||
| 358 | tuple->src.l3num = l3num; | ||
| 359 | if (l3proto->pkt_to_tuple(skb, nhoff, tuple) == 0) | ||
| 360 | return 0; | ||
| 361 | |||
| 362 | tuple->dst.protonum = protonum; | ||
| 363 | tuple->dst.dir = IP_CT_DIR_ORIGINAL; | ||
| 364 | |||
| 365 | return protocol->pkt_to_tuple(skb, dataoff, tuple); | ||
| 366 | } | ||
| 367 | |||
| 368 | int | ||
| 369 | nf_ct_invert_tuple(struct nf_conntrack_tuple *inverse, | ||
| 370 | const struct nf_conntrack_tuple *orig, | ||
| 371 | const struct nf_conntrack_l3proto *l3proto, | ||
| 372 | const struct nf_conntrack_protocol *protocol) | ||
| 373 | { | ||
| 374 | NF_CT_TUPLE_U_BLANK(inverse); | ||
| 375 | |||
| 376 | inverse->src.l3num = orig->src.l3num; | ||
| 377 | if (l3proto->invert_tuple(inverse, orig) == 0) | ||
| 378 | return 0; | ||
| 379 | |||
| 380 | inverse->dst.dir = !orig->dst.dir; | ||
| 381 | |||
| 382 | inverse->dst.protonum = orig->dst.protonum; | ||
| 383 | return protocol->invert_tuple(inverse, orig); | ||
| 384 | } | ||
| 385 | |||
| 386 | /* nf_conntrack_expect helper functions */ | ||
| 387 | static void nf_ct_unlink_expect(struct nf_conntrack_expect *exp) | ||
| 388 | { | ||
| 389 | ASSERT_WRITE_LOCK(&nf_conntrack_lock); | ||
| 390 | NF_CT_ASSERT(!timer_pending(&exp_timeout)); | ||
| 391 | list_del(&exp->list); | ||
| 392 | NF_CT_STAT_INC(expect_delete); | ||
| 393 | exp->master->expecting--; | ||
| 394 | nf_conntrack_expect_put(exp); | ||
| 395 | } | ||
| 396 | |||
| 397 | static void expectation_timed_out(unsigned long ul_expect) | ||
| 398 | { | ||
| 399 | struct nf_conntrack_expect *exp = (void *)ul_expect; | ||
| 400 | |||
| 401 | write_lock_bh(&nf_conntrack_lock); | ||
| 402 | nf_ct_unlink_expect(exp); | ||
| 403 | write_unlock_bh(&nf_conntrack_lock); | ||
| 404 | nf_conntrack_expect_put(exp); | ||
| 405 | } | ||
| 406 | |||
| 407 | /* If an expectation for this connection is found, it gets delete from | ||
| 408 | * global list then returned. */ | ||
| 409 | static struct nf_conntrack_expect * | ||
| 410 | find_expectation(const struct nf_conntrack_tuple *tuple) | ||
| 411 | { | ||
| 412 | struct nf_conntrack_expect *i; | ||
| 413 | |||
| 414 | list_for_each_entry(i, &nf_conntrack_expect_list, list) { | ||
| 415 | /* If master is not in hash table yet (ie. packet hasn't left | ||
| 416 | this machine yet), how can other end know about expected? | ||
| 417 | Hence these are not the droids you are looking for (if | ||
| 418 | master ct never got confirmed, we'd hold a reference to it | ||
| 419 | and weird things would happen to future packets). */ | ||
| 420 | if (nf_ct_tuple_mask_cmp(tuple, &i->tuple, &i->mask) | ||
| 421 | && nf_ct_is_confirmed(i->master)) { | ||
| 422 | if (i->flags & NF_CT_EXPECT_PERMANENT) { | ||
| 423 | atomic_inc(&i->use); | ||
| 424 | return i; | ||
| 425 | } else if (del_timer(&i->timeout)) { | ||
| 426 | nf_ct_unlink_expect(i); | ||
| 427 | return i; | ||
| 428 | } | ||
| 429 | } | ||
| 430 | } | ||
| 431 | return NULL; | ||
| 432 | } | ||
| 433 | |||
| 434 | /* delete all expectations for this conntrack */ | ||
| 435 | static void remove_expectations(struct nf_conn *ct) | ||
| 436 | { | ||
| 437 | struct nf_conntrack_expect *i, *tmp; | ||
| 438 | |||
| 439 | /* Optimization: most connection never expect any others. */ | ||
| 440 | if (ct->expecting == 0) | ||
| 441 | return; | ||
| 442 | |||
| 443 | list_for_each_entry_safe(i, tmp, &nf_conntrack_expect_list, list) { | ||
| 444 | if (i->master == ct && del_timer(&i->timeout)) { | ||
| 445 | nf_ct_unlink_expect(i); | ||
| 446 | nf_conntrack_expect_put(i); | ||
| 447 | } | ||
| 448 | } | ||
| 449 | } | ||
| 450 | |||
| 451 | static void | ||
| 452 | clean_from_lists(struct nf_conn *ct) | ||
| 453 | { | ||
| 454 | unsigned int ho, hr; | ||
| 455 | |||
| 456 | DEBUGP("clean_from_lists(%p)\n", ct); | ||
| 457 | ASSERT_WRITE_LOCK(&nf_conntrack_lock); | ||
| 458 | |||
| 459 | ho = hash_conntrack(&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple); | ||
| 460 | hr = hash_conntrack(&ct->tuplehash[IP_CT_DIR_REPLY].tuple); | ||
| 461 | LIST_DELETE(&nf_conntrack_hash[ho], &ct->tuplehash[IP_CT_DIR_ORIGINAL]); | ||
| 462 | LIST_DELETE(&nf_conntrack_hash[hr], &ct->tuplehash[IP_CT_DIR_REPLY]); | ||
| 463 | |||
| 464 | /* Destroy all pending expectations */ | ||
| 465 | remove_expectations(ct); | ||
| 466 | } | ||
| 467 | |||
| 468 | static void | ||
| 469 | destroy_conntrack(struct nf_conntrack *nfct) | ||
| 470 | { | ||
| 471 | struct nf_conn *ct = (struct nf_conn *)nfct; | ||
| 472 | struct nf_conntrack_l3proto *l3proto; | ||
| 473 | struct nf_conntrack_protocol *proto; | ||
| 474 | |||
| 475 | DEBUGP("destroy_conntrack(%p)\n", ct); | ||
| 476 | NF_CT_ASSERT(atomic_read(&nfct->use) == 0); | ||
| 477 | NF_CT_ASSERT(!timer_pending(&ct->timeout)); | ||
| 478 | |||
| 479 | nf_conntrack_event(IPCT_DESTROY, ct); | ||
| 480 | set_bit(IPS_DYING_BIT, &ct->status); | ||
| 481 | |||
| 482 | /* To make sure we don't get any weird locking issues here: | ||
| 483 | * destroy_conntrack() MUST NOT be called with a write lock | ||
| 484 | * to nf_conntrack_lock!!! -HW */ | ||
| 485 | l3proto = nf_ct_find_l3proto(ct->tuplehash[IP_CT_DIR_REPLY].tuple.src.l3num); | ||
| 486 | if (l3proto && l3proto->destroy) | ||
| 487 | l3proto->destroy(ct); | ||
| 488 | |||
| 489 | proto = nf_ct_find_proto(ct->tuplehash[IP_CT_DIR_REPLY].tuple.src.l3num, | ||
| 490 | ct->tuplehash[IP_CT_DIR_REPLY].tuple.dst.protonum); | ||
| 491 | if (proto && proto->destroy) | ||
| 492 | proto->destroy(ct); | ||
| 493 | |||
| 494 | if (nf_conntrack_destroyed) | ||
| 495 | nf_conntrack_destroyed(ct); | ||
| 496 | |||
| 497 | write_lock_bh(&nf_conntrack_lock); | ||
| 498 | /* Expectations will have been removed in clean_from_lists, | ||
| 499 | * except TFTP can create an expectation on the first packet, | ||
| 500 | * before connection is in the list, so we need to clean here, | ||
| 501 | * too. */ | ||
| 502 | remove_expectations(ct); | ||
| 503 | |||
| 504 | /* We overload first tuple to link into unconfirmed list. */ | ||
| 505 | if (!nf_ct_is_confirmed(ct)) { | ||
| 506 | BUG_ON(list_empty(&ct->tuplehash[IP_CT_DIR_ORIGINAL].list)); | ||
| 507 | list_del(&ct->tuplehash[IP_CT_DIR_ORIGINAL].list); | ||
| 508 | } | ||
| 509 | |||
| 510 | NF_CT_STAT_INC(delete); | ||
| 511 | write_unlock_bh(&nf_conntrack_lock); | ||
| 512 | |||
| 513 | if (ct->master) | ||
| 514 | nf_ct_put(ct->master); | ||
| 515 | |||
| 516 | DEBUGP("destroy_conntrack: returning ct=%p to slab\n", ct); | ||
| 517 | nf_conntrack_free(ct); | ||
| 518 | } | ||
| 519 | |||
| 520 | static void death_by_timeout(unsigned long ul_conntrack) | ||
| 521 | { | ||
| 522 | struct nf_conn *ct = (void *)ul_conntrack; | ||
| 523 | |||
| 524 | write_lock_bh(&nf_conntrack_lock); | ||
| 525 | /* Inside lock so preempt is disabled on module removal path. | ||
| 526 | * Otherwise we can get spurious warnings. */ | ||
| 527 | NF_CT_STAT_INC(delete_list); | ||
| 528 | clean_from_lists(ct); | ||
| 529 | write_unlock_bh(&nf_conntrack_lock); | ||
| 530 | nf_ct_put(ct); | ||
| 531 | } | ||
| 532 | |||
| 533 | static inline int | ||
| 534 | conntrack_tuple_cmp(const struct nf_conntrack_tuple_hash *i, | ||
| 535 | const struct nf_conntrack_tuple *tuple, | ||
| 536 | const struct nf_conn *ignored_conntrack) | ||
| 537 | { | ||
| 538 | ASSERT_READ_LOCK(&nf_conntrack_lock); | ||
| 539 | return nf_ct_tuplehash_to_ctrack(i) != ignored_conntrack | ||
| 540 | && nf_ct_tuple_equal(tuple, &i->tuple); | ||
| 541 | } | ||
| 542 | |||
| 543 | static struct nf_conntrack_tuple_hash * | ||
| 544 | __nf_conntrack_find(const struct nf_conntrack_tuple *tuple, | ||
| 545 | const struct nf_conn *ignored_conntrack) | ||
| 546 | { | ||
| 547 | struct nf_conntrack_tuple_hash *h; | ||
| 548 | unsigned int hash = hash_conntrack(tuple); | ||
| 549 | |||
| 550 | ASSERT_READ_LOCK(&nf_conntrack_lock); | ||
| 551 | list_for_each_entry(h, &nf_conntrack_hash[hash], list) { | ||
| 552 | if (conntrack_tuple_cmp(h, tuple, ignored_conntrack)) { | ||
| 553 | NF_CT_STAT_INC(found); | ||
| 554 | return h; | ||
| 555 | } | ||
| 556 | NF_CT_STAT_INC(searched); | ||
| 557 | } | ||
| 558 | |||
| 559 | return NULL; | ||
| 560 | } | ||
| 561 | |||
| 562 | /* Find a connection corresponding to a tuple. */ | ||
| 563 | struct nf_conntrack_tuple_hash * | ||
| 564 | nf_conntrack_find_get(const struct nf_conntrack_tuple *tuple, | ||
| 565 | const struct nf_conn *ignored_conntrack) | ||
| 566 | { | ||
| 567 | struct nf_conntrack_tuple_hash *h; | ||
| 568 | |||
| 569 | read_lock_bh(&nf_conntrack_lock); | ||
| 570 | h = __nf_conntrack_find(tuple, ignored_conntrack); | ||
| 571 | if (h) | ||
| 572 | atomic_inc(&nf_ct_tuplehash_to_ctrack(h)->ct_general.use); | ||
| 573 | read_unlock_bh(&nf_conntrack_lock); | ||
| 574 | |||
| 575 | return h; | ||
| 576 | } | ||
| 577 | |||
| 578 | /* Confirm a connection given skb; places it in hash table */ | ||
| 579 | int | ||
| 580 | __nf_conntrack_confirm(struct sk_buff **pskb) | ||
| 581 | { | ||
| 582 | unsigned int hash, repl_hash; | ||
| 583 | struct nf_conn *ct; | ||
| 584 | enum ip_conntrack_info ctinfo; | ||
| 585 | |||
| 586 | ct = nf_ct_get(*pskb, &ctinfo); | ||
| 587 | |||
| 588 | /* ipt_REJECT uses nf_conntrack_attach to attach related | ||
| 589 | ICMP/TCP RST packets in other direction. Actual packet | ||
| 590 | which created connection will be IP_CT_NEW or for an | ||
| 591 | expected connection, IP_CT_RELATED. */ | ||
| 592 | if (CTINFO2DIR(ctinfo) != IP_CT_DIR_ORIGINAL) | ||
| 593 | return NF_ACCEPT; | ||
| 594 | |||
| 595 | hash = hash_conntrack(&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple); | ||
| 596 | repl_hash = hash_conntrack(&ct->tuplehash[IP_CT_DIR_REPLY].tuple); | ||
| 597 | |||
| 598 | /* We're not in hash table, and we refuse to set up related | ||
| 599 | connections for unconfirmed conns. But packet copies and | ||
| 600 | REJECT will give spurious warnings here. */ | ||
| 601 | /* NF_CT_ASSERT(atomic_read(&ct->ct_general.use) == 1); */ | ||
| 602 | |||
| 603 | /* No external references means noone else could have | ||
| 604 | confirmed us. */ | ||
| 605 | NF_CT_ASSERT(!nf_ct_is_confirmed(ct)); | ||
| 606 | DEBUGP("Confirming conntrack %p\n", ct); | ||
| 607 | |||
| 608 | write_lock_bh(&nf_conntrack_lock); | ||
| 609 | |||
| 610 | /* See if there's one in the list already, including reverse: | ||
| 611 | NAT could have grabbed it without realizing, since we're | ||
| 612 | not in the hash. If there is, we lost race. */ | ||
| 613 | if (!LIST_FIND(&nf_conntrack_hash[hash], | ||
| 614 | conntrack_tuple_cmp, | ||
| 615 | struct nf_conntrack_tuple_hash *, | ||
| 616 | &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple, NULL) | ||
| 617 | && !LIST_FIND(&nf_conntrack_hash[repl_hash], | ||
| 618 | conntrack_tuple_cmp, | ||
| 619 | struct nf_conntrack_tuple_hash *, | ||
| 620 | &ct->tuplehash[IP_CT_DIR_REPLY].tuple, NULL)) { | ||
| 621 | /* Remove from unconfirmed list */ | ||
| 622 | list_del(&ct->tuplehash[IP_CT_DIR_ORIGINAL].list); | ||
| 623 | |||
| 624 | list_prepend(&nf_conntrack_hash[hash], | ||
| 625 | &ct->tuplehash[IP_CT_DIR_ORIGINAL]); | ||
| 626 | list_prepend(&nf_conntrack_hash[repl_hash], | ||
| 627 | &ct->tuplehash[IP_CT_DIR_REPLY]); | ||
| 628 | /* Timer relative to confirmation time, not original | ||
| 629 | setting time, otherwise we'd get timer wrap in | ||
| 630 | weird delay cases. */ | ||
| 631 | ct->timeout.expires += jiffies; | ||
| 632 | add_timer(&ct->timeout); | ||
| 633 | atomic_inc(&ct->ct_general.use); | ||
| 634 | set_bit(IPS_CONFIRMED_BIT, &ct->status); | ||
| 635 | NF_CT_STAT_INC(insert); | ||
| 636 | write_unlock_bh(&nf_conntrack_lock); | ||
| 637 | if (ct->helper) | ||
| 638 | nf_conntrack_event_cache(IPCT_HELPER, *pskb); | ||
| 639 | #ifdef CONFIG_NF_NAT_NEEDED | ||
| 640 | if (test_bit(IPS_SRC_NAT_DONE_BIT, &ct->status) || | ||
| 641 | test_bit(IPS_DST_NAT_DONE_BIT, &ct->status)) | ||
| 642 | nf_conntrack_event_cache(IPCT_NATINFO, *pskb); | ||
| 643 | #endif | ||
| 644 | nf_conntrack_event_cache(master_ct(ct) ? | ||
| 645 | IPCT_RELATED : IPCT_NEW, *pskb); | ||
| 646 | return NF_ACCEPT; | ||
| 647 | } | ||
| 648 | |||
| 649 | NF_CT_STAT_INC(insert_failed); | ||
| 650 | write_unlock_bh(&nf_conntrack_lock); | ||
| 651 | return NF_DROP; | ||
| 652 | } | ||
| 653 | |||
| 654 | /* Returns true if a connection correspondings to the tuple (required | ||
| 655 | for NAT). */ | ||
| 656 | int | ||
| 657 | nf_conntrack_tuple_taken(const struct nf_conntrack_tuple *tuple, | ||
| 658 | const struct nf_conn *ignored_conntrack) | ||
| 659 | { | ||
| 660 | struct nf_conntrack_tuple_hash *h; | ||
| 661 | |||
| 662 | read_lock_bh(&nf_conntrack_lock); | ||
| 663 | h = __nf_conntrack_find(tuple, ignored_conntrack); | ||
| 664 | read_unlock_bh(&nf_conntrack_lock); | ||
| 665 | |||
| 666 | return h != NULL; | ||
| 667 | } | ||
| 668 | |||
| 669 | /* There's a small race here where we may free a just-assured | ||
| 670 | connection. Too bad: we're in trouble anyway. */ | ||
| 671 | static inline int unreplied(const struct nf_conntrack_tuple_hash *i) | ||
| 672 | { | ||
| 673 | return !(test_bit(IPS_ASSURED_BIT, | ||
| 674 | &nf_ct_tuplehash_to_ctrack(i)->status)); | ||
| 675 | } | ||
| 676 | |||
| 677 | static int early_drop(struct list_head *chain) | ||
| 678 | { | ||
| 679 | /* Traverse backwards: gives us oldest, which is roughly LRU */ | ||
| 680 | struct nf_conntrack_tuple_hash *h; | ||
| 681 | struct nf_conn *ct = NULL; | ||
| 682 | int dropped = 0; | ||
| 683 | |||
| 684 | read_lock_bh(&nf_conntrack_lock); | ||
| 685 | h = LIST_FIND_B(chain, unreplied, struct nf_conntrack_tuple_hash *); | ||
| 686 | if (h) { | ||
| 687 | ct = nf_ct_tuplehash_to_ctrack(h); | ||
| 688 | atomic_inc(&ct->ct_general.use); | ||
| 689 | } | ||
| 690 | read_unlock_bh(&nf_conntrack_lock); | ||
| 691 | |||
| 692 | if (!ct) | ||
| 693 | return dropped; | ||
| 694 | |||
| 695 | if (del_timer(&ct->timeout)) { | ||
| 696 | death_by_timeout((unsigned long)ct); | ||
| 697 | dropped = 1; | ||
| 698 | NF_CT_STAT_INC(early_drop); | ||
| 699 | } | ||
| 700 | nf_ct_put(ct); | ||
| 701 | return dropped; | ||
| 702 | } | ||
| 703 | |||
| 704 | static inline int helper_cmp(const struct nf_conntrack_helper *i, | ||
| 705 | const struct nf_conntrack_tuple *rtuple) | ||
| 706 | { | ||
| 707 | return nf_ct_tuple_mask_cmp(rtuple, &i->tuple, &i->mask); | ||
| 708 | } | ||
| 709 | |||
| 710 | static struct nf_conntrack_helper * | ||
| 711 | nf_ct_find_helper(const struct nf_conntrack_tuple *tuple) | ||
| 712 | { | ||
| 713 | return LIST_FIND(&helpers, helper_cmp, | ||
| 714 | struct nf_conntrack_helper *, | ||
| 715 | tuple); | ||
| 716 | } | ||
| 717 | |||
| 718 | static struct nf_conn * | ||
| 719 | __nf_conntrack_alloc(const struct nf_conntrack_tuple *orig, | ||
| 720 | const struct nf_conntrack_tuple *repl, | ||
| 721 | const struct nf_conntrack_l3proto *l3proto) | ||
| 722 | { | ||
| 723 | struct nf_conn *conntrack = NULL; | ||
| 724 | u_int32_t features = 0; | ||
| 725 | |||
| 726 | if (!nf_conntrack_hash_rnd_initted) { | ||
| 727 | get_random_bytes(&nf_conntrack_hash_rnd, 4); | ||
| 728 | nf_conntrack_hash_rnd_initted = 1; | ||
| 729 | } | ||
| 730 | |||
| 731 | if (nf_conntrack_max | ||
| 732 | && atomic_read(&nf_conntrack_count) >= nf_conntrack_max) { | ||
| 733 | unsigned int hash = hash_conntrack(orig); | ||
| 734 | /* Try dropping from this hash chain. */ | ||
| 735 | if (!early_drop(&nf_conntrack_hash[hash])) { | ||
| 736 | if (net_ratelimit()) | ||
| 737 | printk(KERN_WARNING | ||
| 738 | "nf_conntrack: table full, dropping" | ||
| 739 | " packet.\n"); | ||
| 740 | return ERR_PTR(-ENOMEM); | ||
| 741 | } | ||
| 742 | } | ||
| 743 | |||
| 744 | /* find features needed by this conntrack. */ | ||
| 745 | features = l3proto->get_features(orig); | ||
| 746 | read_lock_bh(&nf_conntrack_lock); | ||
| 747 | if (nf_ct_find_helper(repl) != NULL) | ||
| 748 | features |= NF_CT_F_HELP; | ||
| 749 | read_unlock_bh(&nf_conntrack_lock); | ||
| 750 | |||
| 751 | DEBUGP("nf_conntrack_alloc: features=0x%x\n", features); | ||
| 752 | |||
| 753 | read_lock_bh(&nf_ct_cache_lock); | ||
| 754 | |||
| 755 | if (!nf_ct_cache[features].use) { | ||
| 756 | DEBUGP("nf_conntrack_alloc: not supported features = 0x%x\n", | ||
| 757 | features); | ||
| 758 | goto out; | ||
| 759 | } | ||
| 760 | |||
| 761 | conntrack = kmem_cache_alloc(nf_ct_cache[features].cachep, GFP_ATOMIC); | ||
| 762 | if (conntrack == NULL) { | ||
| 763 | DEBUGP("nf_conntrack_alloc: Can't alloc conntrack from cache\n"); | ||
| 764 | goto out; | ||
| 765 | } | ||
| 766 | |||
| 767 | memset(conntrack, 0, nf_ct_cache[features].size); | ||
| 768 | conntrack->features = features; | ||
| 769 | if (nf_ct_cache[features].init_conntrack && | ||
| 770 | nf_ct_cache[features].init_conntrack(conntrack, features) < 0) { | ||
| 771 | DEBUGP("nf_conntrack_alloc: failed to init\n"); | ||
| 772 | kmem_cache_free(nf_ct_cache[features].cachep, conntrack); | ||
| 773 | conntrack = NULL; | ||
| 774 | goto out; | ||
| 775 | } | ||
| 776 | |||
| 777 | atomic_set(&conntrack->ct_general.use, 1); | ||
| 778 | conntrack->ct_general.destroy = destroy_conntrack; | ||
| 779 | conntrack->tuplehash[IP_CT_DIR_ORIGINAL].tuple = *orig; | ||
| 780 | conntrack->tuplehash[IP_CT_DIR_REPLY].tuple = *repl; | ||
| 781 | /* Don't set timer yet: wait for confirmation */ | ||
| 782 | init_timer(&conntrack->timeout); | ||
| 783 | conntrack->timeout.data = (unsigned long)conntrack; | ||
| 784 | conntrack->timeout.function = death_by_timeout; | ||
| 785 | |||
| 786 | atomic_inc(&nf_conntrack_count); | ||
| 787 | out: | ||
| 788 | read_unlock_bh(&nf_ct_cache_lock); | ||
| 789 | return conntrack; | ||
| 790 | } | ||
| 791 | |||
| 792 | struct nf_conn *nf_conntrack_alloc(const struct nf_conntrack_tuple *orig, | ||
| 793 | const struct nf_conntrack_tuple *repl) | ||
| 794 | { | ||
| 795 | struct nf_conntrack_l3proto *l3proto; | ||
| 796 | |||
| 797 | l3proto = nf_ct_find_l3proto(orig->src.l3num); | ||
| 798 | return __nf_conntrack_alloc(orig, repl, l3proto); | ||
| 799 | } | ||
| 800 | |||
| 801 | void nf_conntrack_free(struct nf_conn *conntrack) | ||
| 802 | { | ||
| 803 | u_int32_t features = conntrack->features; | ||
| 804 | NF_CT_ASSERT(features >= NF_CT_F_BASIC && features < NF_CT_F_NUM); | ||
| 805 | DEBUGP("nf_conntrack_free: features = 0x%x, conntrack=%p\n", features, | ||
| 806 | conntrack); | ||
| 807 | kmem_cache_free(nf_ct_cache[features].cachep, conntrack); | ||
| 808 | atomic_dec(&nf_conntrack_count); | ||
| 809 | } | ||
| 810 | |||
| 811 | /* Allocate a new conntrack: we return -ENOMEM if classification | ||
| 812 | failed due to stress. Otherwise it really is unclassifiable. */ | ||
| 813 | static struct nf_conntrack_tuple_hash * | ||
| 814 | init_conntrack(const struct nf_conntrack_tuple *tuple, | ||
| 815 | struct nf_conntrack_l3proto *l3proto, | ||
| 816 | struct nf_conntrack_protocol *protocol, | ||
| 817 | struct sk_buff *skb, | ||
| 818 | unsigned int dataoff) | ||
| 819 | { | ||
| 820 | struct nf_conn *conntrack; | ||
| 821 | struct nf_conntrack_tuple repl_tuple; | ||
| 822 | struct nf_conntrack_expect *exp; | ||
| 823 | |||
| 824 | if (!nf_ct_invert_tuple(&repl_tuple, tuple, l3proto, protocol)) { | ||
| 825 | DEBUGP("Can't invert tuple.\n"); | ||
| 826 | return NULL; | ||
| 827 | } | ||
| 828 | |||
| 829 | conntrack = __nf_conntrack_alloc(tuple, &repl_tuple, l3proto); | ||
| 830 | if (conntrack == NULL || IS_ERR(conntrack)) { | ||
| 831 | DEBUGP("Can't allocate conntrack.\n"); | ||
| 832 | return (struct nf_conntrack_tuple_hash *)conntrack; | ||
| 833 | } | ||
| 834 | |||
| 835 | if (!protocol->new(conntrack, skb, dataoff)) { | ||
| 836 | nf_conntrack_free(conntrack); | ||
| 837 | DEBUGP("init conntrack: can't track with proto module\n"); | ||
| 838 | return NULL; | ||
| 839 | } | ||
| 840 | |||
| 841 | write_lock_bh(&nf_conntrack_lock); | ||
| 842 | exp = find_expectation(tuple); | ||
| 843 | |||
| 844 | if (exp) { | ||
| 845 | DEBUGP("conntrack: expectation arrives ct=%p exp=%p\n", | ||
| 846 | conntrack, exp); | ||
| 847 | /* Welcome, Mr. Bond. We've been expecting you... */ | ||
| 848 | __set_bit(IPS_EXPECTED_BIT, &conntrack->status); | ||
| 849 | conntrack->master = exp->master; | ||
| 850 | #ifdef CONFIG_NF_CONNTRACK_MARK | ||
| 851 | conntrack->mark = exp->master->mark; | ||
| 852 | #endif | ||
| 853 | nf_conntrack_get(&conntrack->master->ct_general); | ||
| 854 | NF_CT_STAT_INC(expect_new); | ||
| 855 | } else { | ||
| 856 | conntrack->helper = nf_ct_find_helper(&repl_tuple); | ||
| 857 | |||
| 858 | NF_CT_STAT_INC(new); | ||
| 859 | } | ||
| 860 | |||
| 861 | /* Overload tuple linked list to put us in unconfirmed list. */ | ||
| 862 | list_add(&conntrack->tuplehash[IP_CT_DIR_ORIGINAL].list, &unconfirmed); | ||
| 863 | |||
| 864 | write_unlock_bh(&nf_conntrack_lock); | ||
| 865 | |||
| 866 | if (exp) { | ||
| 867 | if (exp->expectfn) | ||
| 868 | exp->expectfn(conntrack, exp); | ||
| 869 | nf_conntrack_expect_put(exp); | ||
| 870 | } | ||
| 871 | |||
| 872 | return &conntrack->tuplehash[IP_CT_DIR_ORIGINAL]; | ||
| 873 | } | ||
| 874 | |||
| 875 | /* On success, returns conntrack ptr, sets skb->nfct and ctinfo */ | ||
| 876 | static inline struct nf_conn * | ||
| 877 | resolve_normal_ct(struct sk_buff *skb, | ||
| 878 | unsigned int dataoff, | ||
| 879 | u_int16_t l3num, | ||
| 880 | u_int8_t protonum, | ||
| 881 | struct nf_conntrack_l3proto *l3proto, | ||
| 882 | struct nf_conntrack_protocol *proto, | ||
| 883 | int *set_reply, | ||
| 884 | enum ip_conntrack_info *ctinfo) | ||
| 885 | { | ||
| 886 | struct nf_conntrack_tuple tuple; | ||
| 887 | struct nf_conntrack_tuple_hash *h; | ||
| 888 | struct nf_conn *ct; | ||
| 889 | |||
| 890 | if (!nf_ct_get_tuple(skb, (unsigned int)(skb->nh.raw - skb->data), | ||
| 891 | dataoff, l3num, protonum, &tuple, l3proto, | ||
| 892 | proto)) { | ||
| 893 | DEBUGP("resolve_normal_ct: Can't get tuple\n"); | ||
| 894 | return NULL; | ||
| 895 | } | ||
| 896 | |||
| 897 | /* look for tuple match */ | ||
| 898 | h = nf_conntrack_find_get(&tuple, NULL); | ||
| 899 | if (!h) { | ||
| 900 | h = init_conntrack(&tuple, l3proto, proto, skb, dataoff); | ||
| 901 | if (!h) | ||
| 902 | return NULL; | ||
| 903 | if (IS_ERR(h)) | ||
| 904 | return (void *)h; | ||
| 905 | } | ||
| 906 | ct = nf_ct_tuplehash_to_ctrack(h); | ||
| 907 | |||
| 908 | /* It exists; we have (non-exclusive) reference. */ | ||
| 909 | if (NF_CT_DIRECTION(h) == IP_CT_DIR_REPLY) { | ||
| 910 | *ctinfo = IP_CT_ESTABLISHED + IP_CT_IS_REPLY; | ||
| 911 | /* Please set reply bit if this packet OK */ | ||
| 912 | *set_reply = 1; | ||
| 913 | } else { | ||
| 914 | /* Once we've had two way comms, always ESTABLISHED. */ | ||
| 915 | if (test_bit(IPS_SEEN_REPLY_BIT, &ct->status)) { | ||
| 916 | DEBUGP("nf_conntrack_in: normal packet for %p\n", ct); | ||
| 917 | *ctinfo = IP_CT_ESTABLISHED; | ||
| 918 | } else if (test_bit(IPS_EXPECTED_BIT, &ct->status)) { | ||
| 919 | DEBUGP("nf_conntrack_in: related packet for %p\n", ct); | ||
| 920 | *ctinfo = IP_CT_RELATED; | ||
| 921 | } else { | ||
| 922 | DEBUGP("nf_conntrack_in: new packet for %p\n", ct); | ||
| 923 | *ctinfo = IP_CT_NEW; | ||
| 924 | } | ||
| 925 | *set_reply = 0; | ||
| 926 | } | ||
| 927 | skb->nfct = &ct->ct_general; | ||
| 928 | skb->nfctinfo = *ctinfo; | ||
| 929 | return ct; | ||
| 930 | } | ||
| 931 | |||
| 932 | unsigned int | ||
| 933 | nf_conntrack_in(int pf, unsigned int hooknum, struct sk_buff **pskb) | ||
| 934 | { | ||
| 935 | struct nf_conn *ct; | ||
| 936 | enum ip_conntrack_info ctinfo; | ||
| 937 | struct nf_conntrack_l3proto *l3proto; | ||
| 938 | struct nf_conntrack_protocol *proto; | ||
| 939 | unsigned int dataoff; | ||
| 940 | u_int8_t protonum; | ||
| 941 | int set_reply = 0; | ||
| 942 | int ret; | ||
| 943 | |||
| 944 | /* Previously seen (loopback or untracked)? Ignore. */ | ||
| 945 | if ((*pskb)->nfct) { | ||
| 946 | NF_CT_STAT_INC(ignore); | ||
| 947 | return NF_ACCEPT; | ||
| 948 | } | ||
| 949 | |||
| 950 | l3proto = nf_ct_find_l3proto((u_int16_t)pf); | ||
| 951 | if ((ret = l3proto->prepare(pskb, hooknum, &dataoff, &protonum)) <= 0) { | ||
| 952 | DEBUGP("not prepared to track yet or error occured\n"); | ||
| 953 | return -ret; | ||
| 954 | } | ||
| 955 | |||
| 956 | proto = nf_ct_find_proto((u_int16_t)pf, protonum); | ||
| 957 | |||
| 958 | /* It may be an special packet, error, unclean... | ||
| 959 | * inverse of the return code tells to the netfilter | ||
| 960 | * core what to do with the packet. */ | ||
| 961 | if (proto->error != NULL && | ||
| 962 | (ret = proto->error(*pskb, dataoff, &ctinfo, pf, hooknum)) <= 0) { | ||
| 963 | NF_CT_STAT_INC(error); | ||
| 964 | NF_CT_STAT_INC(invalid); | ||
| 965 | return -ret; | ||
| 966 | } | ||
| 967 | |||
| 968 | ct = resolve_normal_ct(*pskb, dataoff, pf, protonum, l3proto, proto, | ||
| 969 | &set_reply, &ctinfo); | ||
| 970 | if (!ct) { | ||
| 971 | /* Not valid part of a connection */ | ||
| 972 | NF_CT_STAT_INC(invalid); | ||
| 973 | return NF_ACCEPT; | ||
| 974 | } | ||
| 975 | |||
| 976 | if (IS_ERR(ct)) { | ||
| 977 | /* Too stressed to deal. */ | ||
| 978 | NF_CT_STAT_INC(drop); | ||
| 979 | return NF_DROP; | ||
| 980 | } | ||
| 981 | |||
| 982 | NF_CT_ASSERT((*pskb)->nfct); | ||
| 983 | |||
| 984 | ret = proto->packet(ct, *pskb, dataoff, ctinfo, pf, hooknum); | ||
| 985 | if (ret < 0) { | ||
| 986 | /* Invalid: inverse of the return code tells | ||
| 987 | * the netfilter core what to do */ | ||
| 988 | DEBUGP("nf_conntrack_in: Can't track with proto module\n"); | ||
| 989 | nf_conntrack_put((*pskb)->nfct); | ||
| 990 | (*pskb)->nfct = NULL; | ||
| 991 | NF_CT_STAT_INC(invalid); | ||
| 992 | return -ret; | ||
| 993 | } | ||
| 994 | |||
| 995 | if (set_reply && !test_and_set_bit(IPS_SEEN_REPLY_BIT, &ct->status)) | ||
| 996 | nf_conntrack_event_cache(IPCT_STATUS, *pskb); | ||
| 997 | |||
| 998 | return ret; | ||
| 999 | } | ||
| 1000 | |||
| 1001 | int nf_ct_invert_tuplepr(struct nf_conntrack_tuple *inverse, | ||
| 1002 | const struct nf_conntrack_tuple *orig) | ||
| 1003 | { | ||
| 1004 | return nf_ct_invert_tuple(inverse, orig, | ||
| 1005 | nf_ct_find_l3proto(orig->src.l3num), | ||
| 1006 | nf_ct_find_proto(orig->src.l3num, | ||
| 1007 | orig->dst.protonum)); | ||
| 1008 | } | ||
| 1009 | |||
| 1010 | /* Would two expected things clash? */ | ||
| 1011 | static inline int expect_clash(const struct nf_conntrack_expect *a, | ||
| 1012 | const struct nf_conntrack_expect *b) | ||
| 1013 | { | ||
| 1014 | /* Part covered by intersection of masks must be unequal, | ||
| 1015 | otherwise they clash */ | ||
| 1016 | struct nf_conntrack_tuple intersect_mask; | ||
| 1017 | int count; | ||
| 1018 | |||
| 1019 | intersect_mask.src.l3num = a->mask.src.l3num & b->mask.src.l3num; | ||
| 1020 | intersect_mask.src.u.all = a->mask.src.u.all & b->mask.src.u.all; | ||
| 1021 | intersect_mask.dst.u.all = a->mask.dst.u.all & b->mask.dst.u.all; | ||
| 1022 | intersect_mask.dst.protonum = a->mask.dst.protonum | ||
| 1023 | & b->mask.dst.protonum; | ||
| 1024 | |||
| 1025 | for (count = 0; count < NF_CT_TUPLE_L3SIZE; count++){ | ||
| 1026 | intersect_mask.src.u3.all[count] = | ||
| 1027 | a->mask.src.u3.all[count] & b->mask.src.u3.all[count]; | ||
| 1028 | } | ||
| 1029 | |||
| 1030 | for (count = 0; count < NF_CT_TUPLE_L3SIZE; count++){ | ||
| 1031 | intersect_mask.dst.u3.all[count] = | ||
| 1032 | a->mask.dst.u3.all[count] & b->mask.dst.u3.all[count]; | ||
| 1033 | } | ||
| 1034 | |||
| 1035 | return nf_ct_tuple_mask_cmp(&a->tuple, &b->tuple, &intersect_mask); | ||
| 1036 | } | ||
| 1037 | |||
| 1038 | static inline int expect_matches(const struct nf_conntrack_expect *a, | ||
| 1039 | const struct nf_conntrack_expect *b) | ||
| 1040 | { | ||
| 1041 | return a->master == b->master | ||
| 1042 | && nf_ct_tuple_equal(&a->tuple, &b->tuple) | ||
| 1043 | && nf_ct_tuple_equal(&a->mask, &b->mask); | ||
| 1044 | } | ||
| 1045 | |||
| 1046 | /* Generally a bad idea to call this: could have matched already. */ | ||
| 1047 | void nf_conntrack_unexpect_related(struct nf_conntrack_expect *exp) | ||
| 1048 | { | ||
| 1049 | struct nf_conntrack_expect *i; | ||
| 1050 | |||
| 1051 | write_lock_bh(&nf_conntrack_lock); | ||
| 1052 | /* choose the the oldest expectation to evict */ | ||
| 1053 | list_for_each_entry_reverse(i, &nf_conntrack_expect_list, list) { | ||
| 1054 | if (expect_matches(i, exp) && del_timer(&i->timeout)) { | ||
| 1055 | nf_ct_unlink_expect(i); | ||
| 1056 | write_unlock_bh(&nf_conntrack_lock); | ||
| 1057 | nf_conntrack_expect_put(i); | ||
| 1058 | return; | ||
| 1059 | } | ||
| 1060 | } | ||
| 1061 | write_unlock_bh(&nf_conntrack_lock); | ||
| 1062 | } | ||
| 1063 | |||
| 1064 | /* We don't increase the master conntrack refcount for non-fulfilled | ||
| 1065 | * conntracks. During the conntrack destruction, the expectations are | ||
| 1066 | * always killed before the conntrack itself */ | ||
| 1067 | struct nf_conntrack_expect *nf_conntrack_expect_alloc(struct nf_conn *me) | ||
| 1068 | { | ||
| 1069 | struct nf_conntrack_expect *new; | ||
| 1070 | |||
| 1071 | new = kmem_cache_alloc(nf_conntrack_expect_cachep, GFP_ATOMIC); | ||
| 1072 | if (!new) { | ||
| 1073 | DEBUGP("expect_related: OOM allocating expect\n"); | ||
| 1074 | return NULL; | ||
| 1075 | } | ||
| 1076 | new->master = me; | ||
| 1077 | atomic_set(&new->use, 1); | ||
| 1078 | return new; | ||
| 1079 | } | ||
| 1080 | |||
| 1081 | void nf_conntrack_expect_put(struct nf_conntrack_expect *exp) | ||
| 1082 | { | ||
| 1083 | if (atomic_dec_and_test(&exp->use)) | ||
| 1084 | kmem_cache_free(nf_conntrack_expect_cachep, exp); | ||
| 1085 | } | ||
| 1086 | |||
| 1087 | static void nf_conntrack_expect_insert(struct nf_conntrack_expect *exp) | ||
| 1088 | { | ||
| 1089 | atomic_inc(&exp->use); | ||
| 1090 | exp->master->expecting++; | ||
| 1091 | list_add(&exp->list, &nf_conntrack_expect_list); | ||
| 1092 | |||
| 1093 | init_timer(&exp->timeout); | ||
| 1094 | exp->timeout.data = (unsigned long)exp; | ||
| 1095 | exp->timeout.function = expectation_timed_out; | ||
| 1096 | exp->timeout.expires = jiffies + exp->master->helper->timeout * HZ; | ||
| 1097 | add_timer(&exp->timeout); | ||
| 1098 | |||
| 1099 | atomic_inc(&exp->use); | ||
| 1100 | NF_CT_STAT_INC(expect_create); | ||
| 1101 | } | ||
| 1102 | |||
| 1103 | /* Race with expectations being used means we could have none to find; OK. */ | ||
| 1104 | static void evict_oldest_expect(struct nf_conn *master) | ||
| 1105 | { | ||
| 1106 | struct nf_conntrack_expect *i; | ||
| 1107 | |||
| 1108 | list_for_each_entry_reverse(i, &nf_conntrack_expect_list, list) { | ||
| 1109 | if (i->master == master) { | ||
| 1110 | if (del_timer(&i->timeout)) { | ||
| 1111 | nf_ct_unlink_expect(i); | ||
| 1112 | nf_conntrack_expect_put(i); | ||
| 1113 | } | ||
| 1114 | break; | ||
| 1115 | } | ||
| 1116 | } | ||
| 1117 | } | ||
| 1118 | |||
| 1119 | static inline int refresh_timer(struct nf_conntrack_expect *i) | ||
| 1120 | { | ||
| 1121 | if (!del_timer(&i->timeout)) | ||
| 1122 | return 0; | ||
| 1123 | |||
| 1124 | i->timeout.expires = jiffies + i->master->helper->timeout*HZ; | ||
| 1125 | add_timer(&i->timeout); | ||
| 1126 | return 1; | ||
| 1127 | } | ||
| 1128 | |||
| 1129 | int nf_conntrack_expect_related(struct nf_conntrack_expect *expect) | ||
| 1130 | { | ||
| 1131 | struct nf_conntrack_expect *i; | ||
| 1132 | int ret; | ||
| 1133 | |||
| 1134 | DEBUGP("nf_conntrack_expect_related %p\n", related_to); | ||
| 1135 | DEBUGP("tuple: "); NF_CT_DUMP_TUPLE(&expect->tuple); | ||
| 1136 | DEBUGP("mask: "); NF_CT_DUMP_TUPLE(&expect->mask); | ||
| 1137 | |||
| 1138 | write_lock_bh(&nf_conntrack_lock); | ||
| 1139 | list_for_each_entry(i, &nf_conntrack_expect_list, list) { | ||
| 1140 | if (expect_matches(i, expect)) { | ||
| 1141 | /* Refresh timer: if it's dying, ignore.. */ | ||
| 1142 | if (refresh_timer(i)) { | ||
| 1143 | ret = 0; | ||
| 1144 | goto out; | ||
| 1145 | } | ||
| 1146 | } else if (expect_clash(i, expect)) { | ||
| 1147 | ret = -EBUSY; | ||
| 1148 | goto out; | ||
| 1149 | } | ||
| 1150 | } | ||
| 1151 | /* Will be over limit? */ | ||
| 1152 | if (expect->master->helper->max_expected && | ||
| 1153 | expect->master->expecting >= expect->master->helper->max_expected) | ||
| 1154 | evict_oldest_expect(expect->master); | ||
| 1155 | |||
| 1156 | nf_conntrack_expect_insert(expect); | ||
| 1157 | nf_conntrack_expect_event(IPEXP_NEW, expect); | ||
| 1158 | ret = 0; | ||
| 1159 | out: | ||
| 1160 | write_unlock_bh(&nf_conntrack_lock); | ||
| 1161 | return ret; | ||
| 1162 | } | ||
| 1163 | |||
| 1164 | /* Alter reply tuple (maybe alter helper). This is for NAT, and is | ||
| 1165 | implicitly racy: see __nf_conntrack_confirm */ | ||
| 1166 | void nf_conntrack_alter_reply(struct nf_conn *conntrack, | ||
| 1167 | const struct nf_conntrack_tuple *newreply) | ||
| 1168 | { | ||
| 1169 | write_lock_bh(&nf_conntrack_lock); | ||
| 1170 | /* Should be unconfirmed, so not in hash table yet */ | ||
| 1171 | NF_CT_ASSERT(!nf_ct_is_confirmed(conntrack)); | ||
| 1172 | |||
| 1173 | DEBUGP("Altering reply tuple of %p to ", conntrack); | ||
| 1174 | NF_CT_DUMP_TUPLE(newreply); | ||
| 1175 | |||
| 1176 | conntrack->tuplehash[IP_CT_DIR_REPLY].tuple = *newreply; | ||
| 1177 | if (!conntrack->master && conntrack->expecting == 0) | ||
| 1178 | conntrack->helper = nf_ct_find_helper(newreply); | ||
| 1179 | write_unlock_bh(&nf_conntrack_lock); | ||
| 1180 | } | ||
| 1181 | |||
| 1182 | int nf_conntrack_helper_register(struct nf_conntrack_helper *me) | ||
| 1183 | { | ||
| 1184 | int ret; | ||
| 1185 | BUG_ON(me->timeout == 0); | ||
| 1186 | |||
| 1187 | ret = nf_conntrack_register_cache(NF_CT_F_HELP, "nf_conntrack:help", | ||
| 1188 | sizeof(struct nf_conn) | ||
| 1189 | + sizeof(union nf_conntrack_help) | ||
| 1190 | + __alignof__(union nf_conntrack_help), | ||
| 1191 | init_conntrack_for_helper); | ||
| 1192 | if (ret < 0) { | ||
| 1193 | printk(KERN_ERR "nf_conntrack_helper_reigster: Unable to create slab cache for conntracks\n"); | ||
| 1194 | return ret; | ||
| 1195 | } | ||
| 1196 | write_lock_bh(&nf_conntrack_lock); | ||
| 1197 | list_prepend(&helpers, me); | ||
| 1198 | write_unlock_bh(&nf_conntrack_lock); | ||
| 1199 | |||
| 1200 | return 0; | ||
| 1201 | } | ||
| 1202 | |||
| 1203 | static inline int unhelp(struct nf_conntrack_tuple_hash *i, | ||
| 1204 | const struct nf_conntrack_helper *me) | ||
| 1205 | { | ||
| 1206 | if (nf_ct_tuplehash_to_ctrack(i)->helper == me) { | ||
| 1207 | nf_conntrack_event(IPCT_HELPER, nf_ct_tuplehash_to_ctrack(i)); | ||
| 1208 | nf_ct_tuplehash_to_ctrack(i)->helper = NULL; | ||
| 1209 | } | ||
| 1210 | return 0; | ||
| 1211 | } | ||
| 1212 | |||
| 1213 | void nf_conntrack_helper_unregister(struct nf_conntrack_helper *me) | ||
| 1214 | { | ||
| 1215 | unsigned int i; | ||
| 1216 | struct nf_conntrack_expect *exp, *tmp; | ||
| 1217 | |||
| 1218 | /* Need write lock here, to delete helper. */ | ||
| 1219 | write_lock_bh(&nf_conntrack_lock); | ||
| 1220 | LIST_DELETE(&helpers, me); | ||
| 1221 | |||
| 1222 | /* Get rid of expectations */ | ||
| 1223 | list_for_each_entry_safe(exp, tmp, &nf_conntrack_expect_list, list) { | ||
| 1224 | if (exp->master->helper == me && del_timer(&exp->timeout)) { | ||
| 1225 | nf_ct_unlink_expect(exp); | ||
| 1226 | nf_conntrack_expect_put(exp); | ||
| 1227 | } | ||
| 1228 | } | ||
| 1229 | |||
| 1230 | /* Get rid of expecteds, set helpers to NULL. */ | ||
| 1231 | LIST_FIND_W(&unconfirmed, unhelp, struct nf_conntrack_tuple_hash*, me); | ||
| 1232 | for (i = 0; i < nf_conntrack_htable_size; i++) | ||
| 1233 | LIST_FIND_W(&nf_conntrack_hash[i], unhelp, | ||
| 1234 | struct nf_conntrack_tuple_hash *, me); | ||
| 1235 | write_unlock_bh(&nf_conntrack_lock); | ||
| 1236 | |||
| 1237 | /* Someone could be still looking at the helper in a bh. */ | ||
| 1238 | synchronize_net(); | ||
| 1239 | } | ||
| 1240 | |||
| 1241 | /* Refresh conntrack for this many jiffies and do accounting if do_acct is 1 */ | ||
| 1242 | void __nf_ct_refresh_acct(struct nf_conn *ct, | ||
| 1243 | enum ip_conntrack_info ctinfo, | ||
| 1244 | const struct sk_buff *skb, | ||
| 1245 | unsigned long extra_jiffies, | ||
| 1246 | int do_acct) | ||
| 1247 | { | ||
| 1248 | int event = 0; | ||
| 1249 | |||
| 1250 | NF_CT_ASSERT(ct->timeout.data == (unsigned long)ct); | ||
| 1251 | NF_CT_ASSERT(skb); | ||
| 1252 | |||
| 1253 | write_lock_bh(&nf_conntrack_lock); | ||
| 1254 | |||
| 1255 | /* If not in hash table, timer will not be active yet */ | ||
| 1256 | if (!nf_ct_is_confirmed(ct)) { | ||
| 1257 | ct->timeout.expires = extra_jiffies; | ||
| 1258 | event = IPCT_REFRESH; | ||
| 1259 | } else { | ||
| 1260 | /* Need del_timer for race avoidance (may already be dying). */ | ||
| 1261 | if (del_timer(&ct->timeout)) { | ||
| 1262 | ct->timeout.expires = jiffies + extra_jiffies; | ||
| 1263 | add_timer(&ct->timeout); | ||
| 1264 | event = IPCT_REFRESH; | ||
| 1265 | } | ||
| 1266 | } | ||
| 1267 | |||
| 1268 | #ifdef CONFIG_NF_CT_ACCT | ||
| 1269 | if (do_acct) { | ||
| 1270 | ct->counters[CTINFO2DIR(ctinfo)].packets++; | ||
| 1271 | ct->counters[CTINFO2DIR(ctinfo)].bytes += | ||
| 1272 | skb->len - (unsigned int)(skb->nh.raw - skb->data); | ||
| 1273 | if ((ct->counters[CTINFO2DIR(ctinfo)].packets & 0x80000000) | ||
| 1274 | || (ct->counters[CTINFO2DIR(ctinfo)].bytes & 0x80000000)) | ||
| 1275 | event |= IPCT_COUNTER_FILLING; | ||
| 1276 | } | ||
| 1277 | #endif | ||
| 1278 | |||
| 1279 | write_unlock_bh(&nf_conntrack_lock); | ||
| 1280 | |||
| 1281 | /* must be unlocked when calling event cache */ | ||
| 1282 | if (event) | ||
| 1283 | nf_conntrack_event_cache(event, skb); | ||
| 1284 | } | ||
| 1285 | |||
| 1286 | /* Used by ipt_REJECT and ip6t_REJECT. */ | ||
| 1287 | void __nf_conntrack_attach(struct sk_buff *nskb, struct sk_buff *skb) | ||
| 1288 | { | ||
| 1289 | struct nf_conn *ct; | ||
| 1290 | enum ip_conntrack_info ctinfo; | ||
| 1291 | |||
| 1292 | /* This ICMP is in reverse direction to the packet which caused it */ | ||
| 1293 | ct = nf_ct_get(skb, &ctinfo); | ||
| 1294 | if (CTINFO2DIR(ctinfo) == IP_CT_DIR_ORIGINAL) | ||
| 1295 | ctinfo = IP_CT_RELATED + IP_CT_IS_REPLY; | ||
| 1296 | else | ||
| 1297 | ctinfo = IP_CT_RELATED; | ||
| 1298 | |||
| 1299 | /* Attach to new skbuff, and increment count */ | ||
| 1300 | nskb->nfct = &ct->ct_general; | ||
| 1301 | nskb->nfctinfo = ctinfo; | ||
| 1302 | nf_conntrack_get(nskb->nfct); | ||
| 1303 | } | ||
| 1304 | |||
| 1305 | static inline int | ||
| 1306 | do_iter(const struct nf_conntrack_tuple_hash *i, | ||
| 1307 | int (*iter)(struct nf_conn *i, void *data), | ||
| 1308 | void *data) | ||
| 1309 | { | ||
| 1310 | return iter(nf_ct_tuplehash_to_ctrack(i), data); | ||
| 1311 | } | ||
| 1312 | |||
| 1313 | /* Bring out ya dead! */ | ||
| 1314 | static struct nf_conntrack_tuple_hash * | ||
| 1315 | get_next_corpse(int (*iter)(struct nf_conn *i, void *data), | ||
| 1316 | void *data, unsigned int *bucket) | ||
| 1317 | { | ||
| 1318 | struct nf_conntrack_tuple_hash *h = NULL; | ||
| 1319 | |||
| 1320 | write_lock_bh(&nf_conntrack_lock); | ||
| 1321 | for (; *bucket < nf_conntrack_htable_size; (*bucket)++) { | ||
| 1322 | h = LIST_FIND_W(&nf_conntrack_hash[*bucket], do_iter, | ||
| 1323 | struct nf_conntrack_tuple_hash *, iter, data); | ||
| 1324 | if (h) | ||
| 1325 | break; | ||
| 1326 | } | ||
| 1327 | if (!h) | ||
| 1328 | h = LIST_FIND_W(&unconfirmed, do_iter, | ||
| 1329 | struct nf_conntrack_tuple_hash *, iter, data); | ||
| 1330 | if (h) | ||
| 1331 | atomic_inc(&nf_ct_tuplehash_to_ctrack(h)->ct_general.use); | ||
| 1332 | write_unlock_bh(&nf_conntrack_lock); | ||
| 1333 | |||
| 1334 | return h; | ||
| 1335 | } | ||
| 1336 | |||
| 1337 | void | ||
| 1338 | nf_ct_iterate_cleanup(int (*iter)(struct nf_conn *i, void *data), void *data) | ||
| 1339 | { | ||
| 1340 | struct nf_conntrack_tuple_hash *h; | ||
| 1341 | unsigned int bucket = 0; | ||
| 1342 | |||
| 1343 | while ((h = get_next_corpse(iter, data, &bucket)) != NULL) { | ||
| 1344 | struct nf_conn *ct = nf_ct_tuplehash_to_ctrack(h); | ||
| 1345 | /* Time to push up daises... */ | ||
| 1346 | if (del_timer(&ct->timeout)) | ||
| 1347 | death_by_timeout((unsigned long)ct); | ||
| 1348 | /* ... else the timer will get him soon. */ | ||
| 1349 | |||
| 1350 | nf_ct_put(ct); | ||
| 1351 | } | ||
| 1352 | } | ||
| 1353 | |||
| 1354 | static int kill_all(struct nf_conn *i, void *data) | ||
| 1355 | { | ||
| 1356 | return 1; | ||
| 1357 | } | ||
| 1358 | |||
| 1359 | static void free_conntrack_hash(struct list_head *hash, int vmalloced, int size) | ||
| 1360 | { | ||
| 1361 | if (vmalloced) | ||
| 1362 | vfree(hash); | ||
| 1363 | else | ||
| 1364 | free_pages((unsigned long)hash, | ||
| 1365 | get_order(sizeof(struct list_head) * size)); | ||
| 1366 | } | ||
| 1367 | |||
| 1368 | /* Mishearing the voices in his head, our hero wonders how he's | ||
| 1369 | supposed to kill the mall. */ | ||
| 1370 | void nf_conntrack_cleanup(void) | ||
| 1371 | { | ||
| 1372 | int i; | ||
| 1373 | |||
| 1374 | /* This makes sure all current packets have passed through | ||
| 1375 | netfilter framework. Roll on, two-stage module | ||
| 1376 | delete... */ | ||
| 1377 | synchronize_net(); | ||
| 1378 | |||
| 1379 | nf_ct_event_cache_flush(); | ||
| 1380 | i_see_dead_people: | ||
| 1381 | nf_ct_iterate_cleanup(kill_all, NULL); | ||
| 1382 | if (atomic_read(&nf_conntrack_count) != 0) { | ||
| 1383 | schedule(); | ||
| 1384 | goto i_see_dead_people; | ||
| 1385 | } | ||
| 1386 | |||
| 1387 | for (i = 0; i < NF_CT_F_NUM; i++) { | ||
| 1388 | if (nf_ct_cache[i].use == 0) | ||
| 1389 | continue; | ||
| 1390 | |||
| 1391 | NF_CT_ASSERT(nf_ct_cache[i].use == 1); | ||
| 1392 | nf_ct_cache[i].use = 1; | ||
| 1393 | nf_conntrack_unregister_cache(i); | ||
| 1394 | } | ||
| 1395 | kmem_cache_destroy(nf_conntrack_expect_cachep); | ||
| 1396 | free_conntrack_hash(nf_conntrack_hash, nf_conntrack_vmalloc, | ||
| 1397 | nf_conntrack_htable_size); | ||
| 1398 | } | ||
| 1399 | |||
| 1400 | static struct list_head *alloc_hashtable(int size, int *vmalloced) | ||
| 1401 | { | ||
| 1402 | struct list_head *hash; | ||
| 1403 | unsigned int i; | ||
| 1404 | |||
| 1405 | *vmalloced = 0; | ||
| 1406 | hash = (void*)__get_free_pages(GFP_KERNEL, | ||
| 1407 | get_order(sizeof(struct list_head) | ||
| 1408 | * size)); | ||
| 1409 | if (!hash) { | ||
| 1410 | *vmalloced = 1; | ||
| 1411 | printk(KERN_WARNING "nf_conntrack: falling back to vmalloc.\n"); | ||
| 1412 | hash = vmalloc(sizeof(struct list_head) * size); | ||
| 1413 | } | ||
| 1414 | |||
| 1415 | if (hash) | ||
| 1416 | for (i = 0; i < size; i++) | ||
| 1417 | INIT_LIST_HEAD(&hash[i]); | ||
| 1418 | |||
| 1419 | return hash; | ||
| 1420 | } | ||
| 1421 | |||
| 1422 | int set_hashsize(const char *val, struct kernel_param *kp) | ||
| 1423 | { | ||
| 1424 | int i, bucket, hashsize, vmalloced; | ||
| 1425 | int old_vmalloced, old_size; | ||
| 1426 | int rnd; | ||
| 1427 | struct list_head *hash, *old_hash; | ||
| 1428 | struct nf_conntrack_tuple_hash *h; | ||
| 1429 | |||
| 1430 | /* On boot, we can set this without any fancy locking. */ | ||
| 1431 | if (!nf_conntrack_htable_size) | ||
| 1432 | return param_set_uint(val, kp); | ||
| 1433 | |||
| 1434 | hashsize = simple_strtol(val, NULL, 0); | ||
| 1435 | if (!hashsize) | ||
| 1436 | return -EINVAL; | ||
| 1437 | |||
| 1438 | hash = alloc_hashtable(hashsize, &vmalloced); | ||
| 1439 | if (!hash) | ||
| 1440 | return -ENOMEM; | ||
| 1441 | |||
| 1442 | /* We have to rehahs for the new table anyway, so we also can | ||
| 1443 | * use a newrandom seed */ | ||
| 1444 | get_random_bytes(&rnd, 4); | ||
| 1445 | |||
| 1446 | write_lock_bh(&nf_conntrack_lock); | ||
| 1447 | for (i = 0; i < nf_conntrack_htable_size; i++) { | ||
| 1448 | while (!list_empty(&nf_conntrack_hash[i])) { | ||
| 1449 | h = list_entry(nf_conntrack_hash[i].next, | ||
| 1450 | struct nf_conntrack_tuple_hash, list); | ||
| 1451 | list_del(&h->list); | ||
| 1452 | bucket = __hash_conntrack(&h->tuple, hashsize, rnd); | ||
| 1453 | list_add_tail(&h->list, &hash[bucket]); | ||
| 1454 | } | ||
| 1455 | } | ||
| 1456 | old_size = nf_conntrack_htable_size; | ||
| 1457 | old_vmalloced = nf_conntrack_vmalloc; | ||
| 1458 | old_hash = nf_conntrack_hash; | ||
| 1459 | |||
| 1460 | nf_conntrack_htable_size = hashsize; | ||
| 1461 | nf_conntrack_vmalloc = vmalloced; | ||
| 1462 | nf_conntrack_hash = hash; | ||
| 1463 | nf_conntrack_hash_rnd = rnd; | ||
| 1464 | write_unlock_bh(&nf_conntrack_lock); | ||
| 1465 | |||
| 1466 | free_conntrack_hash(old_hash, old_vmalloced, old_size); | ||
| 1467 | return 0; | ||
| 1468 | } | ||
| 1469 | |||
| 1470 | module_param_call(hashsize, set_hashsize, param_get_uint, | ||
| 1471 | &nf_conntrack_htable_size, 0600); | ||
| 1472 | |||
| 1473 | int __init nf_conntrack_init(void) | ||
| 1474 | { | ||
| 1475 | unsigned int i; | ||
| 1476 | int ret; | ||
| 1477 | |||
| 1478 | /* Idea from tcp.c: use 1/16384 of memory. On i386: 32MB | ||
| 1479 | * machine has 256 buckets. >= 1GB machines have 8192 buckets. */ | ||
| 1480 | if (!nf_conntrack_htable_size) { | ||
| 1481 | nf_conntrack_htable_size | ||
| 1482 | = (((num_physpages << PAGE_SHIFT) / 16384) | ||
| 1483 | / sizeof(struct list_head)); | ||
| 1484 | if (num_physpages > (1024 * 1024 * 1024 / PAGE_SIZE)) | ||
| 1485 | nf_conntrack_htable_size = 8192; | ||
| 1486 | if (nf_conntrack_htable_size < 16) | ||
| 1487 | nf_conntrack_htable_size = 16; | ||
| 1488 | } | ||
| 1489 | nf_conntrack_max = 8 * nf_conntrack_htable_size; | ||
| 1490 | |||
| 1491 | printk("nf_conntrack version %s (%u buckets, %d max)\n", | ||
| 1492 | NF_CONNTRACK_VERSION, nf_conntrack_htable_size, | ||
| 1493 | nf_conntrack_max); | ||
| 1494 | |||
| 1495 | nf_conntrack_hash = alloc_hashtable(nf_conntrack_htable_size, | ||
| 1496 | &nf_conntrack_vmalloc); | ||
| 1497 | if (!nf_conntrack_hash) { | ||
| 1498 | printk(KERN_ERR "Unable to create nf_conntrack_hash\n"); | ||
| 1499 | goto err_out; | ||
| 1500 | } | ||
| 1501 | |||
| 1502 | ret = nf_conntrack_register_cache(NF_CT_F_BASIC, "nf_conntrack:basic", | ||
| 1503 | sizeof(struct nf_conn), NULL); | ||
| 1504 | if (ret < 0) { | ||
| 1505 | printk(KERN_ERR "Unable to create nf_conn slab cache\n"); | ||
| 1506 | goto err_free_hash; | ||
| 1507 | } | ||
| 1508 | |||
| 1509 | nf_conntrack_expect_cachep = kmem_cache_create("nf_conntrack_expect", | ||
| 1510 | sizeof(struct nf_conntrack_expect), | ||
| 1511 | 0, 0, NULL, NULL); | ||
| 1512 | if (!nf_conntrack_expect_cachep) { | ||
| 1513 | printk(KERN_ERR "Unable to create nf_expect slab cache\n"); | ||
| 1514 | goto err_free_conntrack_slab; | ||
| 1515 | } | ||
| 1516 | |||
| 1517 | /* Don't NEED lock here, but good form anyway. */ | ||
| 1518 | write_lock_bh(&nf_conntrack_lock); | ||
| 1519 | for (i = 0; i < PF_MAX; i++) | ||
| 1520 | nf_ct_l3protos[i] = &nf_conntrack_generic_l3proto; | ||
| 1521 | write_unlock_bh(&nf_conntrack_lock); | ||
| 1522 | |||
| 1523 | /* Set up fake conntrack: | ||
| 1524 | - to never be deleted, not in any hashes */ | ||
| 1525 | atomic_set(&nf_conntrack_untracked.ct_general.use, 1); | ||
| 1526 | /* - and look it like as a confirmed connection */ | ||
| 1527 | set_bit(IPS_CONFIRMED_BIT, &nf_conntrack_untracked.status); | ||
| 1528 | |||
| 1529 | return ret; | ||
| 1530 | |||
| 1531 | err_free_conntrack_slab: | ||
| 1532 | nf_conntrack_unregister_cache(NF_CT_F_BASIC); | ||
| 1533 | err_free_hash: | ||
| 1534 | free_conntrack_hash(nf_conntrack_hash, nf_conntrack_vmalloc, | ||
| 1535 | nf_conntrack_htable_size); | ||
| 1536 | err_out: | ||
| 1537 | return -ENOMEM; | ||
| 1538 | } | ||
diff --git a/net/netfilter/nf_conntrack_ftp.c b/net/netfilter/nf_conntrack_ftp.c new file mode 100644 index 000000000000..65080e269f27 --- /dev/null +++ b/net/netfilter/nf_conntrack_ftp.c  | |||
| @@ -0,0 +1,698 @@ | |||
| 1 | /* FTP extension for connection tracking. */ | ||
| 2 | |||
| 3 | /* (C) 1999-2001 Paul `Rusty' Russell | ||
| 4 | * (C) 2002-2004 Netfilter Core Team <coreteam@netfilter.org> | ||
| 5 | * (C) 2003,2004 USAGI/WIDE Project <http://www.linux-ipv6.org> | ||
| 6 | * | ||
| 7 | * This program is free software; you can redistribute it and/or modify | ||
| 8 | * it under the terms of the GNU General Public License version 2 as | ||
| 9 | * published by the Free Software Foundation. | ||
| 10 | * | ||
| 11 | * 16 Dec 2003: Yasuyuki Kozakai @USAGI <yasuyuki.kozakai@toshiba.co.jp> | ||
| 12 | * - enable working with Layer 3 protocol independent connection tracking. | ||
| 13 | * - track EPRT and EPSV commands with IPv6 address. | ||
| 14 | * | ||
| 15 | * Derived from net/ipv4/netfilter/ip_conntrack_ftp.c | ||
| 16 | */ | ||
| 17 | |||
| 18 | #include <linux/config.h> | ||
| 19 | #include <linux/module.h> | ||
| 20 | #include <linux/moduleparam.h> | ||
| 21 | #include <linux/netfilter.h> | ||
| 22 | #include <linux/ip.h> | ||
| 23 | #include <linux/ipv6.h> | ||
| 24 | #include <linux/ctype.h> | ||
| 25 | #include <net/checksum.h> | ||
| 26 | #include <net/tcp.h> | ||
| 27 | |||
| 28 | #include <net/netfilter/nf_conntrack.h> | ||
| 29 | #include <net/netfilter/nf_conntrack_helper.h> | ||
| 30 | #include <linux/netfilter/nf_conntrack_ftp.h> | ||
| 31 | |||
| 32 | MODULE_LICENSE("GPL"); | ||
| 33 | MODULE_AUTHOR("Rusty Russell <rusty@rustcorp.com.au>"); | ||
| 34 | MODULE_DESCRIPTION("ftp connection tracking helper"); | ||
| 35 | |||
| 36 | /* This is slow, but it's simple. --RR */ | ||
| 37 | static char *ftp_buffer; | ||
| 38 | |||
| 39 | static DEFINE_SPINLOCK(nf_ftp_lock); | ||
| 40 | |||
| 41 | #define MAX_PORTS 8 | ||
| 42 | static u_int16_t ports[MAX_PORTS]; | ||
| 43 | static unsigned int ports_c; | ||
| 44 | module_param_array(ports, ushort, &ports_c, 0400); | ||
| 45 | |||
| 46 | static int loose; | ||
| 47 | module_param(loose, int, 0600); | ||
| 48 | |||
| 49 | unsigned int (*nf_nat_ftp_hook)(struct sk_buff **pskb, | ||
| 50 | enum ip_conntrack_info ctinfo, | ||
| 51 | enum ip_ct_ftp_type type, | ||
| 52 | unsigned int matchoff, | ||
| 53 | unsigned int matchlen, | ||
| 54 | struct nf_conntrack_expect *exp, | ||
| 55 | u32 *seq); | ||
| 56 | EXPORT_SYMBOL_GPL(nf_nat_ftp_hook); | ||
| 57 | |||
| 58 | #if 0 | ||
| 59 | #define DEBUGP printk | ||
| 60 | #else | ||
| 61 | #define DEBUGP(format, args...) | ||
| 62 | #endif | ||
| 63 | |||
| 64 | static int try_rfc959(const char *, size_t, struct nf_conntrack_man *, char); | ||
| 65 | static int try_eprt(const char *, size_t, struct nf_conntrack_man *, char); | ||
| 66 | static int try_epsv_response(const char *, size_t, struct nf_conntrack_man *, | ||
| 67 | char); | ||
| 68 | |||
| 69 | static struct ftp_search { | ||
| 70 | enum ip_conntrack_dir dir; | ||
| 71 | const char *pattern; | ||
| 72 | size_t plen; | ||
| 73 | char skip; | ||
| 74 | char term; | ||
| 75 | enum ip_ct_ftp_type ftptype; | ||
| 76 | int (*getnum)(const char *, size_t, struct nf_conntrack_man *, char); | ||
| 77 | } search[] = { | ||
| 78 | { | ||
| 79 | IP_CT_DIR_ORIGINAL, | ||
| 80 | "PORT", sizeof("PORT") - 1, ' ', '\r', | ||
| 81 | IP_CT_FTP_PORT, | ||
| 82 | try_rfc959, | ||
| 83 | }, | ||
| 84 | { | ||
| 85 | IP_CT_DIR_REPLY, | ||
| 86 | "227 ", sizeof("227 ") - 1, '(', ')', | ||
| 87 | IP_CT_FTP_PASV, | ||
| 88 | try_rfc959, | ||
| 89 | }, | ||
| 90 | { | ||
| 91 | IP_CT_DIR_ORIGINAL, | ||
| 92 | "EPRT", sizeof("EPRT") - 1, ' ', '\r', | ||
| 93 | IP_CT_FTP_EPRT, | ||
| 94 | try_eprt, | ||
| 95 | }, | ||
| 96 | { | ||
| 97 | IP_CT_DIR_REPLY, | ||
| 98 | "229 ", sizeof("229 ") - 1, '(', ')', | ||
| 99 | IP_CT_FTP_EPSV, | ||
| 100 | try_epsv_response, | ||
| 101 | }, | ||
| 102 | }; | ||
| 103 | |||
| 104 | /* This code is based on inet_pton() in glibc-2.2.4 */ | ||
| 105 | static int | ||
| 106 | get_ipv6_addr(const char *src, size_t dlen, struct in6_addr *dst, u_int8_t term) | ||
| 107 | { | ||
| 108 | static const char xdigits[] = "0123456789abcdef"; | ||
| 109 | u_int8_t tmp[16], *tp, *endp, *colonp; | ||
| 110 | int ch, saw_xdigit; | ||
| 111 | u_int32_t val; | ||
| 112 | size_t clen = 0; | ||
| 113 | |||
| 114 | tp = memset(tmp, '\0', sizeof(tmp)); | ||
| 115 | endp = tp + sizeof(tmp); | ||
| 116 | colonp = NULL; | ||
| 117 | |||
| 118 | /* Leading :: requires some special handling. */ | ||
| 119 | if (*src == ':'){ | ||
| 120 | if (*++src != ':') { | ||
| 121 | DEBUGP("invalid \":\" at the head of addr\n"); | ||
| 122 | return 0; | ||
| 123 | } | ||
| 124 | clen++; | ||
| 125 | } | ||
| 126 | |||
| 127 | saw_xdigit = 0; | ||
| 128 | val = 0; | ||
| 129 | while ((clen < dlen) && (*src != term)) { | ||
| 130 | const char *pch; | ||
| 131 | |||
| 132 | ch = tolower(*src++); | ||
| 133 | clen++; | ||
| 134 | |||
| 135 | pch = strchr(xdigits, ch); | ||
| 136 | if (pch != NULL) { | ||
| 137 | val <<= 4; | ||
| 138 | val |= (pch - xdigits); | ||
| 139 | if (val > 0xffff) | ||
| 140 | return 0; | ||
| 141 | |||
| 142 | saw_xdigit = 1; | ||
| 143 | continue; | ||
| 144 | } | ||
| 145 | if (ch != ':') { | ||
| 146 | DEBUGP("get_ipv6_addr: invalid char. \'%c\'\n", ch); | ||
| 147 | return 0; | ||
| 148 | } | ||
| 149 | |||
| 150 | if (!saw_xdigit) { | ||
| 151 | if (colonp) { | ||
| 152 | DEBUGP("invalid location of \"::\".\n"); | ||
| 153 | return 0; | ||
| 154 | } | ||
| 155 | colonp = tp; | ||
| 156 | continue; | ||
| 157 | } else if (*src == term) { | ||
| 158 | DEBUGP("trancated IPv6 addr\n"); | ||
| 159 | return 0; | ||
| 160 | } | ||
| 161 | |||
| 162 | if (tp + 2 > endp) | ||
| 163 | return 0; | ||
| 164 | *tp++ = (u_int8_t) (val >> 8) & 0xff; | ||
| 165 | *tp++ = (u_int8_t) val & 0xff; | ||
| 166 | |||
| 167 | saw_xdigit = 0; | ||
| 168 | val = 0; | ||
| 169 | continue; | ||
| 170 | } | ||
| 171 | if (saw_xdigit) { | ||
| 172 | if (tp + 2 > endp) | ||
| 173 | return 0; | ||
| 174 | *tp++ = (u_int8_t) (val >> 8) & 0xff; | ||
| 175 | *tp++ = (u_int8_t) val & 0xff; | ||
| 176 | } | ||
| 177 | if (colonp != NULL) { | ||
| 178 | /* | ||
| 179 | * Since some memmove()'s erroneously fail to handle | ||
| 180 | * overlapping regions, we'll do the shift by hand. | ||
| 181 | */ | ||
| 182 | const int n = tp - colonp; | ||
| 183 | int i; | ||
| 184 | |||
| 185 | if (tp == endp) | ||
| 186 | return 0; | ||
| 187 | |||
| 188 | for (i = 1; i <= n; i++) { | ||
| 189 | endp[- i] = colonp[n - i]; | ||
| 190 | colonp[n - i] = 0; | ||
| 191 | } | ||
| 192 | tp = endp; | ||
| 193 | } | ||
| 194 | if (tp != endp || (*src != term)) | ||
| 195 | return 0; | ||
| 196 | |||
| 197 | memcpy(dst->s6_addr, tmp, sizeof(dst->s6_addr)); | ||
| 198 | return clen; | ||
| 199 | } | ||
| 200 | |||
| 201 | static int try_number(const char *data, size_t dlen, u_int32_t array[], | ||
| 202 | int array_size, char sep, char term) | ||
| 203 | { | ||
| 204 | u_int32_t i, len; | ||
| 205 | |||
| 206 | memset(array, 0, sizeof(array[0])*array_size); | ||
| 207 | |||
| 208 | /* Keep data pointing at next char. */ | ||
| 209 | for (i = 0, len = 0; len < dlen && i < array_size; len++, data++) { | ||
| 210 | if (*data >= '0' && *data <= '9') { | ||
| 211 | array[i] = array[i]*10 + *data - '0'; | ||
| 212 | } | ||
| 213 | else if (*data == sep) | ||
| 214 | i++; | ||
| 215 | else { | ||
| 216 | /* Unexpected character; true if it's the | ||
| 217 | terminator and we're finished. */ | ||
| 218 | if (*data == term && i == array_size - 1) | ||
| 219 | return len; | ||
| 220 | |||
| 221 | DEBUGP("Char %u (got %u nums) `%u' unexpected\n", | ||
| 222 | len, i, *data); | ||
| 223 | return 0; | ||
| 224 | } | ||
| 225 | } | ||
| 226 | DEBUGP("Failed to fill %u numbers separated by %c\n", array_size, sep); | ||
| 227 | |||
| 228 | return 0; | ||
| 229 | } | ||
| 230 | |||
| 231 | /* Returns 0, or length of numbers: 192,168,1,1,5,6 */ | ||
| 232 | static int try_rfc959(const char *data, size_t dlen, | ||
| 233 | struct nf_conntrack_man *cmd, char term) | ||
| 234 | { | ||
| 235 | int length; | ||
| 236 | u_int32_t array[6]; | ||
| 237 | |||
| 238 | length = try_number(data, dlen, array, 6, ',', term); | ||
| 239 | if (length == 0) | ||
| 240 | return 0; | ||
| 241 | |||
| 242 | cmd->u3.ip = htonl((array[0] << 24) | (array[1] << 16) | | ||
| 243 | (array[2] << 8) | array[3]); | ||
| 244 | cmd->u.tcp.port = htons((array[4] << 8) | array[5]); | ||
| 245 | return length; | ||
| 246 | } | ||
| 247 | |||
| 248 | /* Grab port: number up to delimiter */ | ||
| 249 | static int get_port(const char *data, int start, size_t dlen, char delim, | ||
| 250 | u_int16_t *port) | ||
| 251 | { | ||
| 252 | u_int16_t tmp_port = 0; | ||
| 253 | int i; | ||
| 254 | |||
| 255 | for (i = start; i < dlen; i++) { | ||
| 256 | /* Finished? */ | ||
| 257 | if (data[i] == delim) { | ||
| 258 | if (tmp_port == 0) | ||
| 259 | break; | ||
| 260 | *port = htons(tmp_port); | ||
| 261 | DEBUGP("get_port: return %d\n", tmp_port); | ||
| 262 | return i + 1; | ||
| 263 | } | ||
| 264 | else if (data[i] >= '0' && data[i] <= '9') | ||
| 265 | tmp_port = tmp_port*10 + data[i] - '0'; | ||
| 266 | else { /* Some other crap */ | ||
| 267 | DEBUGP("get_port: invalid char.\n"); | ||
| 268 | break; | ||
| 269 | } | ||
| 270 | } | ||
| 271 | return 0; | ||
| 272 | } | ||
| 273 | |||
| 274 | /* Returns 0, or length of numbers: |1|132.235.1.2|6275| or |2|3ffe::1|6275| */ | ||
| 275 | static int try_eprt(const char *data, size_t dlen, struct nf_conntrack_man *cmd, | ||
| 276 | char term) | ||
| 277 | { | ||
| 278 | char delim; | ||
| 279 | int length; | ||
| 280 | |||
| 281 | /* First character is delimiter, then "1" for IPv4 or "2" for IPv6, | ||
| 282 | then delimiter again. */ | ||
| 283 | if (dlen <= 3) { | ||
| 284 | DEBUGP("EPRT: too short\n"); | ||
| 285 | return 0; | ||
| 286 | } | ||
| 287 | delim = data[0]; | ||
| 288 | if (isdigit(delim) || delim < 33 || delim > 126 || data[2] != delim) { | ||
| 289 | DEBUGP("try_eprt: invalid delimitter.\n"); | ||
| 290 | return 0; | ||
| 291 | } | ||
| 292 | |||
| 293 | if ((cmd->l3num == PF_INET && data[1] != '1') || | ||
| 294 | (cmd->l3num == PF_INET6 && data[1] != '2')) { | ||
| 295 | DEBUGP("EPRT: invalid protocol number.\n"); | ||
| 296 | return 0; | ||
| 297 | } | ||
| 298 | |||
| 299 | DEBUGP("EPRT: Got %c%c%c\n", delim, data[1], delim); | ||
| 300 | |||
| 301 | if (data[1] == '1') { | ||
| 302 | u_int32_t array[4]; | ||
| 303 | |||
| 304 | /* Now we have IP address. */ | ||
| 305 | length = try_number(data + 3, dlen - 3, array, 4, '.', delim); | ||
| 306 | if (length != 0) | ||
| 307 | cmd->u3.ip = htonl((array[0] << 24) | (array[1] << 16) | ||
| 308 | | (array[2] << 8) | array[3]); | ||
| 309 | } else { | ||
| 310 | /* Now we have IPv6 address. */ | ||
| 311 | length = get_ipv6_addr(data + 3, dlen - 3, | ||
| 312 | (struct in6_addr *)cmd->u3.ip6, delim); | ||
| 313 | } | ||
| 314 | |||
| 315 | if (length == 0) | ||
| 316 | return 0; | ||
| 317 | DEBUGP("EPRT: Got IP address!\n"); | ||
| 318 | /* Start offset includes initial "|1|", and trailing delimiter */ | ||
| 319 | return get_port(data, 3 + length + 1, dlen, delim, &cmd->u.tcp.port); | ||
| 320 | } | ||
| 321 | |||
| 322 | /* Returns 0, or length of numbers: |||6446| */ | ||
| 323 | static int try_epsv_response(const char *data, size_t dlen, | ||
| 324 | struct nf_conntrack_man *cmd, char term) | ||
| 325 | { | ||
| 326 | char delim; | ||
| 327 | |||
| 328 | /* Three delimiters. */ | ||
| 329 | if (dlen <= 3) return 0; | ||
| 330 | delim = data[0]; | ||
| 331 | if (isdigit(delim) || delim < 33 || delim > 126 | ||
| 332 | || data[1] != delim || data[2] != delim) | ||
| 333 | return 0; | ||
| 334 | |||
| 335 | return get_port(data, 3, dlen, delim, &cmd->u.tcp.port); | ||
| 336 | } | ||
| 337 | |||
| 338 | /* Return 1 for match, 0 for accept, -1 for partial. */ | ||
| 339 | static int find_pattern(const char *data, size_t dlen, | ||
| 340 | const char *pattern, size_t plen, | ||
| 341 | char skip, char term, | ||
| 342 | unsigned int *numoff, | ||
| 343 | unsigned int *numlen, | ||
| 344 | struct nf_conntrack_man *cmd, | ||
| 345 | int (*getnum)(const char *, size_t, | ||
| 346 | struct nf_conntrack_man *, char)) | ||
| 347 | { | ||
| 348 | size_t i; | ||
| 349 | |||
| 350 | DEBUGP("find_pattern `%s': dlen = %u\n", pattern, dlen); | ||
| 351 | if (dlen == 0) | ||
| 352 | return 0; | ||
| 353 | |||
| 354 | if (dlen <= plen) { | ||
| 355 | /* Short packet: try for partial? */ | ||
| 356 | if (strnicmp(data, pattern, dlen) == 0) | ||
| 357 | return -1; | ||
| 358 | else return 0; | ||
| 359 | } | ||
| 360 | |||
| 361 | if (strnicmp(data, pattern, plen) != 0) { | ||
| 362 | #if 0 | ||
| 363 | size_t i; | ||
| 364 | |||
| 365 | DEBUGP("ftp: string mismatch\n"); | ||
| 366 | for (i = 0; i < plen; i++) { | ||
| 367 | DEBUGP("ftp:char %u `%c'(%u) vs `%c'(%u)\n", | ||
| 368 | i, data[i], data[i], | ||
| 369 | pattern[i], pattern[i]); | ||
| 370 | } | ||
| 371 | #endif | ||
| 372 | return 0; | ||
| 373 | } | ||
| 374 | |||
| 375 | DEBUGP("Pattern matches!\n"); | ||
| 376 | /* Now we've found the constant string, try to skip | ||
| 377 | to the 'skip' character */ | ||
| 378 | for (i = plen; data[i] != skip; i++) | ||
| 379 | if (i == dlen - 1) return -1; | ||
| 380 | |||
| 381 | /* Skip over the last character */ | ||
| 382 | i++; | ||
| 383 | |||
| 384 | DEBUGP("Skipped up to `%c'!\n", skip); | ||
| 385 | |||
| 386 | *numoff = i; | ||
| 387 | *numlen = getnum(data + i, dlen - i, cmd, term); | ||
| 388 | if (!*numlen) | ||
| 389 | return -1; | ||
| 390 | |||
| 391 | DEBUGP("Match succeeded!\n"); | ||
| 392 | return 1; | ||
| 393 | } | ||
| 394 | |||
| 395 | /* Look up to see if we're just after a \n. */ | ||
| 396 | static int find_nl_seq(u32 seq, const struct ip_ct_ftp_master *info, int dir) | ||
| 397 | { | ||
| 398 | unsigned int i; | ||
| 399 | |||
| 400 | for (i = 0; i < info->seq_aft_nl_num[dir]; i++) | ||
| 401 | if (info->seq_aft_nl[dir][i] == seq) | ||
| 402 | return 1; | ||
| 403 | return 0; | ||
| 404 | } | ||
| 405 | |||
| 406 | /* We don't update if it's older than what we have. */ | ||
| 407 | static void update_nl_seq(u32 nl_seq, struct ip_ct_ftp_master *info, int dir, | ||
| 408 | struct sk_buff *skb) | ||
| 409 | { | ||
| 410 | unsigned int i, oldest = NUM_SEQ_TO_REMEMBER; | ||
| 411 | |||
| 412 | /* Look for oldest: if we find exact match, we're done. */ | ||
| 413 | for (i = 0; i < info->seq_aft_nl_num[dir]; i++) { | ||
| 414 | if (info->seq_aft_nl[dir][i] == nl_seq) | ||
| 415 | return; | ||
| 416 | |||
| 417 | if (oldest == info->seq_aft_nl_num[dir] | ||
| 418 | || before(info->seq_aft_nl[dir][i], oldest)) | ||
| 419 | oldest = i; | ||
| 420 | } | ||
| 421 | |||
| 422 | if (info->seq_aft_nl_num[dir] < NUM_SEQ_TO_REMEMBER) { | ||
| 423 | info->seq_aft_nl[dir][info->seq_aft_nl_num[dir]++] = nl_seq; | ||
| 424 | nf_conntrack_event_cache(IPCT_HELPINFO_VOLATILE, skb); | ||
| 425 | } else if (oldest != NUM_SEQ_TO_REMEMBER) { | ||
| 426 | info->seq_aft_nl[dir][oldest] = nl_seq; | ||
| 427 | nf_conntrack_event_cache(IPCT_HELPINFO_VOLATILE, skb); | ||
| 428 | } | ||
| 429 | } | ||
| 430 | |||
| 431 | static int help(struct sk_buff **pskb, | ||
| 432 | unsigned int protoff, | ||
| 433 | struct nf_conn *ct, | ||
| 434 | enum ip_conntrack_info ctinfo) | ||
| 435 | { | ||
| 436 | unsigned int dataoff, datalen; | ||
| 437 | struct tcphdr _tcph, *th; | ||
| 438 | char *fb_ptr; | ||
| 439 | int ret; | ||
| 440 | u32 seq; | ||
| 441 | int dir = CTINFO2DIR(ctinfo); | ||
| 442 | unsigned int matchlen, matchoff; | ||
| 443 | struct ip_ct_ftp_master *ct_ftp_info = &ct->help->ct_ftp_info; | ||
| 444 | struct nf_conntrack_expect *exp; | ||
| 445 | struct nf_conntrack_man cmd = {}; | ||
| 446 | |||
| 447 | unsigned int i; | ||
| 448 | int found = 0, ends_in_nl; | ||
| 449 | |||
| 450 | /* Until there's been traffic both ways, don't look in packets. */ | ||
| 451 | if (ctinfo != IP_CT_ESTABLISHED | ||
| 452 | && ctinfo != IP_CT_ESTABLISHED+IP_CT_IS_REPLY) { | ||
| 453 | DEBUGP("ftp: Conntrackinfo = %u\n", ctinfo); | ||
| 454 | return NF_ACCEPT; | ||
| 455 | } | ||
| 456 | |||
| 457 | th = skb_header_pointer(*pskb, protoff, sizeof(_tcph), &_tcph); | ||
| 458 | if (th == NULL) | ||
| 459 | return NF_ACCEPT; | ||
| 460 | |||
| 461 | dataoff = protoff + th->doff * 4; | ||
| 462 | /* No data? */ | ||
| 463 | if (dataoff >= (*pskb)->len) { | ||
| 464 | DEBUGP("ftp: dataoff(%u) >= skblen(%u)\n", dataoff, | ||
| 465 | (*pskb)->len); | ||
| 466 | return NF_ACCEPT; | ||
| 467 | } | ||
| 468 | datalen = (*pskb)->len - dataoff; | ||
| 469 | |||
| 470 | spin_lock_bh(&nf_ftp_lock); | ||
| 471 | fb_ptr = skb_header_pointer(*pskb, dataoff, datalen, ftp_buffer); | ||
| 472 | BUG_ON(fb_ptr == NULL); | ||
| 473 | |||
| 474 | ends_in_nl = (fb_ptr[datalen - 1] == '\n'); | ||
| 475 | seq = ntohl(th->seq) + datalen; | ||
| 476 | |||
| 477 | /* Look up to see if we're just after a \n. */ | ||
| 478 | if (!find_nl_seq(ntohl(th->seq), ct_ftp_info, dir)) { | ||
| 479 | /* Now if this ends in \n, update ftp info. */ | ||
| 480 | DEBUGP("nf_conntrack_ftp_help: wrong seq pos %s(%u) or %s(%u)\n", | ||
| 481 | ct_ftp_info->seq_aft_nl_num[dir] > 0 ? "" : "(UNSET)", | ||
| 482 | ct_ftp_info->seq_aft_nl[dir][0], | ||
| 483 | ct_ftp_info->seq_aft_nl_num[dir] > 1 ? "" : "(UNSET)", | ||
| 484 | ct_ftp_info->seq_aft_nl[dir][1]); | ||
| 485 | ret = NF_ACCEPT; | ||
| 486 | goto out_update_nl; | ||
| 487 | } | ||
| 488 | |||
| 489 | /* Initialize IP/IPv6 addr to expected address (it's not mentioned | ||
| 490 | in EPSV responses) */ | ||
| 491 | cmd.l3num = ct->tuplehash[dir].tuple.src.l3num; | ||
| 492 | memcpy(cmd.u3.all, &ct->tuplehash[dir].tuple.src.u3.all, | ||
| 493 | sizeof(cmd.u3.all)); | ||
| 494 | |||
| 495 | for (i = 0; i < ARRAY_SIZE(search); i++) { | ||
| 496 | if (search[i].dir != dir) continue; | ||
| 497 | |||
| 498 | found = find_pattern(fb_ptr, datalen, | ||
| 499 | search[i].pattern, | ||
| 500 | search[i].plen, | ||
| 501 | search[i].skip, | ||
| 502 | search[i].term, | ||
| 503 | &matchoff, &matchlen, | ||
| 504 | &cmd, | ||
| 505 | search[i].getnum); | ||
| 506 | if (found) break; | ||
| 507 | } | ||
| 508 | if (found == -1) { | ||
| 509 | /* We don't usually drop packets. After all, this is | ||
| 510 | connection tracking, not packet filtering. | ||
| 511 | However, it is necessary for accurate tracking in | ||
| 512 | this case. */ | ||
| 513 | if (net_ratelimit()) | ||
| 514 | printk("conntrack_ftp: partial %s %u+%u\n", | ||
| 515 | search[i].pattern, | ||
| 516 | ntohl(th->seq), datalen); | ||
| 517 | ret = NF_DROP; | ||
| 518 | goto out; | ||
| 519 | } else if (found == 0) { /* No match */ | ||
| 520 | ret = NF_ACCEPT; | ||
| 521 | goto out_update_nl; | ||
| 522 | } | ||
| 523 | |||
| 524 | DEBUGP("conntrack_ftp: match `%.*s' (%u bytes at %u)\n", | ||
| 525 | (int)matchlen, fb_ptr + matchoff, | ||
| 526 | matchlen, ntohl(th->seq) + matchoff); | ||
| 527 | |||
| 528 | exp = nf_conntrack_expect_alloc(ct); | ||
| 529 | if (exp == NULL) { | ||
| 530 | ret = NF_DROP; | ||
| 531 | goto out; | ||
| 532 | } | ||
| 533 | |||
| 534 | /* We refer to the reverse direction ("!dir") tuples here, | ||
| 535 | * because we're expecting something in the other direction. | ||
| 536 | * Doesn't matter unless NAT is happening. */ | ||
| 537 | exp->tuple.dst.u3 = ct->tuplehash[!dir].tuple.dst.u3; | ||
| 538 | |||
| 539 | /* Update the ftp info */ | ||
| 540 | if ((cmd.l3num == ct->tuplehash[dir].tuple.src.l3num) && | ||
| 541 | memcmp(&cmd.u3.all, &ct->tuplehash[dir].tuple.src.u3.all, | ||
| 542 | sizeof(cmd.u3.all))) { | ||
| 543 | /* Enrico Scholz's passive FTP to partially RNAT'd ftp | ||
| 544 | server: it really wants us to connect to a | ||
| 545 | different IP address. Simply don't record it for | ||
| 546 | NAT. */ | ||
| 547 | if (cmd.l3num == PF_INET) { | ||
| 548 | DEBUGP("conntrack_ftp: NOT RECORDING: %u,%u,%u,%u != %u.%u.%u.%u\n", | ||
| 549 | NIPQUAD(cmd.u3.ip), | ||
| 550 | NIPQUAD(ct->tuplehash[dir].tuple.src.u3.ip)); | ||
| 551 | } else { | ||
| 552 | DEBUGP("conntrack_ftp: NOT RECORDING: %x:%x:%x:%x:%x:%x:%x:%x != %x:%x:%x:%x:%x:%x:%x:%x\n", | ||
| 553 | NIP6(*((struct in6_addr *)cmd.u3.ip6)), | ||
| 554 | NIP6(*((struct in6_addr *)ct->tuplehash[dir] | ||
| 555 | .tuple.src.u3.ip6))); | ||
| 556 | } | ||
| 557 | |||
| 558 | /* Thanks to Cristiano Lincoln Mattos | ||
| 559 | <lincoln@cesar.org.br> for reporting this potential | ||
| 560 | problem (DMZ machines opening holes to internal | ||
| 561 | networks, or the packet filter itself). */ | ||
| 562 | if (!loose) { | ||
| 563 | ret = NF_ACCEPT; | ||
| 564 | goto out_put_expect; | ||
| 565 | } | ||
| 566 | memcpy(&exp->tuple.dst.u3, &cmd.u3.all, | ||
| 567 | sizeof(exp->tuple.dst.u3)); | ||
| 568 | } | ||
| 569 | |||
| 570 | exp->tuple.src.u3 = ct->tuplehash[!dir].tuple.src.u3; | ||
| 571 | exp->tuple.src.l3num = cmd.l3num; | ||
| 572 | exp->tuple.src.u.tcp.port = 0; | ||
| 573 | exp->tuple.dst.u.tcp.port = cmd.u.tcp.port; | ||
| 574 | exp->tuple.dst.protonum = IPPROTO_TCP; | ||
| 575 | |||
| 576 | exp->mask = (struct nf_conntrack_tuple) | ||
| 577 | { .src = { .l3num = 0xFFFF, | ||
| 578 | .u = { .tcp = { 0 }}, | ||
| 579 | }, | ||
| 580 | .dst = { .protonum = 0xFF, | ||
| 581 | .u = { .tcp = { 0xFFFF }}, | ||
| 582 | }, | ||
| 583 | }; | ||
| 584 | if (cmd.l3num == PF_INET) { | ||
| 585 | exp->mask.src.u3.ip = 0xFFFFFFFF; | ||
| 586 | exp->mask.dst.u3.ip = 0xFFFFFFFF; | ||
| 587 | } else { | ||
| 588 | memset(exp->mask.src.u3.ip6, 0xFF, | ||
| 589 | sizeof(exp->mask.src.u3.ip6)); | ||
| 590 | memset(exp->mask.dst.u3.ip6, 0xFF, | ||
| 591 | sizeof(exp->mask.src.u3.ip6)); | ||
| 592 | } | ||
| 593 | |||
| 594 | exp->expectfn = NULL; | ||
| 595 | exp->flags = 0; | ||
| 596 | |||
| 597 | /* Now, NAT might want to mangle the packet, and register the | ||
| 598 | * (possibly changed) expectation itself. */ | ||
| 599 | if (nf_nat_ftp_hook) | ||
| 600 | ret = nf_nat_ftp_hook(pskb, ctinfo, search[i].ftptype, | ||
| 601 | matchoff, matchlen, exp, &seq); | ||
| 602 | else { | ||
| 603 | /* Can't expect this? Best to drop packet now. */ | ||
| 604 | if (nf_conntrack_expect_related(exp) != 0) | ||
| 605 | ret = NF_DROP; | ||
| 606 | else | ||
| 607 | ret = NF_ACCEPT; | ||
| 608 | } | ||
| 609 | |||
| 610 | out_put_expect: | ||
| 611 | nf_conntrack_expect_put(exp); | ||
| 612 | |||
| 613 | out_update_nl: | ||
| 614 | /* Now if this ends in \n, update ftp info. Seq may have been | ||
| 615 | * adjusted by NAT code. */ | ||
| 616 | if (ends_in_nl) | ||
| 617 | update_nl_seq(seq, ct_ftp_info, dir, *pskb); | ||
| 618 | out: | ||
| 619 | spin_unlock_bh(&nf_ftp_lock); | ||
| 620 | return ret; | ||
| 621 | } | ||
| 622 | |||
| 623 | static struct nf_conntrack_helper ftp[MAX_PORTS][2]; | ||
| 624 | static char ftp_names[MAX_PORTS][2][sizeof("ftp-65535")]; | ||
| 625 | |||
| 626 | /* don't make this __exit, since it's called from __init ! */ | ||
| 627 | static void fini(void) | ||
| 628 | { | ||
| 629 | int i, j; | ||
| 630 | for (i = 0; i < ports_c; i++) { | ||
| 631 | for (j = 0; j < 2; j++) { | ||
| 632 | if (ftp[i][j].me == NULL) | ||
| 633 | continue; | ||
| 634 | |||
| 635 | DEBUGP("nf_ct_ftp: unregistering helper for pf: %d " | ||
| 636 | "port: %d\n", | ||
| 637 | ftp[i][j].tuple.src.l3num, ports[i]); | ||
| 638 | nf_conntrack_helper_unregister(&ftp[i][j]); | ||
| 639 | } | ||
| 640 | } | ||
| 641 | |||
| 642 | kfree(ftp_buffer); | ||
| 643 | } | ||
| 644 | |||
| 645 | static int __init init(void) | ||
| 646 | { | ||
| 647 | int i, j = -1, ret = 0; | ||
| 648 | char *tmpname; | ||
| 649 | |||
| 650 | ftp_buffer = kmalloc(65536, GFP_KERNEL); | ||
| 651 | if (!ftp_buffer) | ||
| 652 | return -ENOMEM; | ||
| 653 | |||
| 654 | if (ports_c == 0) | ||
| 655 | ports[ports_c++] = FTP_PORT; | ||
| 656 | |||
| 657 | /* FIXME should be configurable whether IPv4 and IPv6 FTP connections | ||
| 658 | are tracked or not - YK */ | ||
| 659 | for (i = 0; i < ports_c; i++) { | ||
| 660 | memset(&ftp[i], 0, sizeof(struct nf_conntrack_helper)); | ||
| 661 | |||
| 662 | ftp[i][0].tuple.src.l3num = PF_INET; | ||
| 663 | ftp[i][1].tuple.src.l3num = PF_INET6; | ||
| 664 | for (j = 0; j < 2; j++) { | ||
| 665 | ftp[i][j].tuple.src.u.tcp.port = htons(ports[i]); | ||
| 666 | ftp[i][j].tuple.dst.protonum = IPPROTO_TCP; | ||
| 667 | ftp[i][j].mask.src.u.tcp.port = 0xFFFF; | ||
| 668 | ftp[i][j].mask.dst.protonum = 0xFF; | ||
| 669 | ftp[i][j].max_expected = 1; | ||
| 670 | ftp[i][j].timeout = 5 * 60; /* 5 Minutes */ | ||
| 671 | ftp[i][j].me = THIS_MODULE; | ||
| 672 | ftp[i][j].help = help; | ||
| 673 | tmpname = &ftp_names[i][j][0]; | ||
| 674 | if (ports[i] == FTP_PORT) | ||
| 675 | sprintf(tmpname, "ftp"); | ||
| 676 | else | ||
| 677 | sprintf(tmpname, "ftp-%d", ports[i]); | ||
| 678 | ftp[i][j].name = tmpname; | ||
| 679 | |||
| 680 | DEBUGP("nf_ct_ftp: registering helper for pf: %d " | ||
| 681 | "port: %d\n", | ||
| 682 | ftp[i][j].tuple.src.l3num, ports[i]); | ||
| 683 | ret = nf_conntrack_helper_register(&ftp[i][j]); | ||
| 684 | if (ret) { | ||
| 685 | printk("nf_ct_ftp: failed to register helper " | ||
| 686 | " for pf: %d port: %d\n", | ||
| 687 | ftp[i][j].tuple.src.l3num, ports[i]); | ||
| 688 | fini(); | ||
| 689 | return ret; | ||
| 690 | } | ||
| 691 | } | ||
| 692 | } | ||
| 693 | |||
| 694 | return 0; | ||
| 695 | } | ||
| 696 | |||
| 697 | module_init(init); | ||
| 698 | module_exit(fini); | ||
diff --git a/net/netfilter/nf_conntrack_l3proto_generic.c b/net/netfilter/nf_conntrack_l3proto_generic.c new file mode 100644 index 000000000000..7de4f06c63c5 --- /dev/null +++ b/net/netfilter/nf_conntrack_l3proto_generic.c  | |||
| @@ -0,0 +1,98 @@ | |||
| 1 | /* | ||
| 2 | * (C) 2003,2004 USAGI/WIDE Project <http://www.linux-ipv6.org> | ||
| 3 | * | ||
| 4 | * Based largely upon the original ip_conntrack code which | ||
| 5 | * had the following copyright information: | ||
| 6 | * | ||
| 7 | * (C) 1999-2001 Paul `Rusty' Russell | ||
| 8 | * (C) 2002-2004 Netfilter Core Team <coreteam@netfilter.org> | ||
| 9 | * | ||
| 10 | * This program is free software; you can redistribute it and/or modify | ||
| 11 | * it under the terms of the GNU General Public License version 2 as | ||
| 12 | * published by the Free Software Foundation. | ||
| 13 | * | ||
| 14 | * Author: | ||
| 15 | * Yasuyuki Kozakai @USAGI <yasuyuki.kozakai@toshiba.co.jp> | ||
| 16 | */ | ||
| 17 | |||
| 18 | #include <linux/config.h> | ||
| 19 | #include <linux/types.h> | ||
| 20 | #include <linux/ip.h> | ||
| 21 | #include <linux/netfilter.h> | ||
| 22 | #include <linux/module.h> | ||
| 23 | #include <linux/skbuff.h> | ||
| 24 | #include <linux/icmp.h> | ||
| 25 | #include <linux/sysctl.h> | ||
| 26 | #include <net/ip.h> | ||
| 27 | |||
| 28 | #include <linux/netfilter_ipv4.h> | ||
| 29 | #include <net/netfilter/nf_conntrack.h> | ||
| 30 | #include <net/netfilter/nf_conntrack_protocol.h> | ||
| 31 | #include <net/netfilter/nf_conntrack_l3proto.h> | ||
| 32 | #include <net/netfilter/nf_conntrack_core.h> | ||
| 33 | #include <net/netfilter/ipv4/nf_conntrack_ipv4.h> | ||
| 34 | |||
| 35 | #if 0 | ||
| 36 | #define DEBUGP printk | ||
| 37 | #else | ||
| 38 | #define DEBUGP(format, args...) | ||
| 39 | #endif | ||
| 40 | |||
| 41 | DECLARE_PER_CPU(struct nf_conntrack_stat, nf_conntrack_stat); | ||
| 42 | |||
| 43 | static int generic_pkt_to_tuple(const struct sk_buff *skb, unsigned int nhoff, | ||
| 44 | struct nf_conntrack_tuple *tuple) | ||
| 45 | { | ||
| 46 | memset(&tuple->src.u3, 0, sizeof(tuple->src.u3)); | ||
| 47 | memset(&tuple->dst.u3, 0, sizeof(tuple->dst.u3)); | ||
| 48 | |||
| 49 | return 1; | ||
| 50 | } | ||
| 51 | |||
| 52 | static int generic_invert_tuple(struct nf_conntrack_tuple *tuple, | ||
| 53 | const struct nf_conntrack_tuple *orig) | ||
| 54 | { | ||
| 55 | memset(&tuple->src.u3, 0, sizeof(tuple->src.u3)); | ||
| 56 | memset(&tuple->dst.u3, 0, sizeof(tuple->dst.u3)); | ||
| 57 | |||
| 58 | return 1; | ||
| 59 | } | ||
| 60 | |||
| 61 | static int generic_print_tuple(struct seq_file *s, | ||
| 62 | const struct nf_conntrack_tuple *tuple) | ||
| 63 | { | ||
| 64 | return 0; | ||
| 65 | } | ||
| 66 | |||
| 67 | static int generic_print_conntrack(struct seq_file *s, | ||
| 68 | const struct nf_conn *conntrack) | ||
| 69 | { | ||
| 70 | return 0; | ||
| 71 | } | ||
| 72 | |||
| 73 | static int | ||
| 74 | generic_prepare(struct sk_buff **pskb, unsigned int hooknum, | ||
| 75 | unsigned int *dataoff, u_int8_t *protonum) | ||
| 76 | { | ||
| 77 | /* Never track !!! */ | ||
| 78 | return -NF_ACCEPT; | ||
| 79 | } | ||
| 80 | |||
| 81 | |||
| 82 | static u_int32_t generic_get_features(const struct nf_conntrack_tuple *tuple) | ||
| 83 | |||
| 84 | { | ||
| 85 | return NF_CT_F_BASIC; | ||
| 86 | } | ||
| 87 | |||
| 88 | struct nf_conntrack_l3proto nf_conntrack_generic_l3proto = { | ||
| 89 | .l3proto = PF_UNSPEC, | ||
| 90 | .name = "unknown", | ||
| 91 | .pkt_to_tuple = generic_pkt_to_tuple, | ||
| 92 | .invert_tuple = generic_invert_tuple, | ||
| 93 | .print_tuple = generic_print_tuple, | ||
| 94 | .print_conntrack = generic_print_conntrack, | ||
| 95 | .prepare = generic_prepare, | ||
| 96 | .get_features = generic_get_features, | ||
| 97 | .me = THIS_MODULE, | ||
| 98 | }; | ||
diff --git a/net/netfilter/nf_conntrack_proto_generic.c b/net/netfilter/nf_conntrack_proto_generic.c new file mode 100644 index 000000000000..36425f6c833f --- /dev/null +++ b/net/netfilter/nf_conntrack_proto_generic.c  | |||
| @@ -0,0 +1,85 @@ | |||
| 1 | /* (C) 1999-2001 Paul `Rusty' Russell | ||
| 2 | * (C) 2002-2004 Netfilter Core Team <coreteam@netfilter.org> | ||
| 3 | * | ||
| 4 | * This program is free software; you can redistribute it and/or modify | ||
| 5 | * it under the terms of the GNU General Public License version 2 as | ||
| 6 | * published by the Free Software Foundation. | ||
| 7 | * | ||
| 8 | * 16 Dec 2003: Yasuyuki Kozakai @USAGI <yasuyuki.kozakai@toshiba.co.jp> | ||
| 9 | * - enable working with L3 protocol independent connection tracking. | ||
| 10 | * | ||
| 11 | * Derived from net/ipv4/netfilter/ip_conntrack_proto_generic.c | ||
| 12 | */ | ||
| 13 | |||
| 14 | #include <linux/types.h> | ||
| 15 | #include <linux/sched.h> | ||
| 16 | #include <linux/timer.h> | ||
| 17 | #include <linux/netfilter.h> | ||
| 18 | #include <net/netfilter/nf_conntrack_protocol.h> | ||
| 19 | |||
| 20 | unsigned long nf_ct_generic_timeout = 600*HZ; | ||
| 21 | |||
| 22 | static int generic_pkt_to_tuple(const struct sk_buff *skb, | ||
| 23 | unsigned int dataoff, | ||
| 24 | struct nf_conntrack_tuple *tuple) | ||
| 25 | { | ||
| 26 | tuple->src.u.all = 0; | ||
| 27 | tuple->dst.u.all = 0; | ||
| 28 | |||
| 29 | return 1; | ||
| 30 | } | ||
| 31 | |||
| 32 | static int generic_invert_tuple(struct nf_conntrack_tuple *tuple, | ||
| 33 | const struct nf_conntrack_tuple *orig) | ||
| 34 | { | ||
| 35 | tuple->src.u.all = 0; | ||
| 36 | tuple->dst.u.all = 0; | ||
| 37 | |||
| 38 | return 1; | ||
| 39 | } | ||
| 40 | |||
| 41 | /* Print out the per-protocol part of the tuple. */ | ||
| 42 | static int generic_print_tuple(struct seq_file *s, | ||
| 43 | const struct nf_conntrack_tuple *tuple) | ||
| 44 | { | ||
| 45 | return 0; | ||
| 46 | } | ||
| 47 | |||
| 48 | /* Print out the private part of the conntrack. */ | ||
| 49 | static int generic_print_conntrack(struct seq_file *s, | ||
| 50 | const struct nf_conn *state) | ||
| 51 | { | ||
| 52 | return 0; | ||
| 53 | } | ||
| 54 | |||
| 55 | /* Returns verdict for packet, or -1 for invalid. */ | ||
| 56 | static int packet(struct nf_conn *conntrack, | ||
| 57 | const struct sk_buff *skb, | ||
| 58 | unsigned int dataoff, | ||
| 59 | enum ip_conntrack_info ctinfo, | ||
| 60 | int pf, | ||
| 61 | unsigned int hooknum) | ||
| 62 | { | ||
| 63 | nf_ct_refresh_acct(conntrack, ctinfo, skb, nf_ct_generic_timeout); | ||
| 64 | return NF_ACCEPT; | ||
| 65 | } | ||
| 66 | |||
| 67 | /* Called when a new connection for this protocol found. */ | ||
| 68 | static int new(struct nf_conn *conntrack, const struct sk_buff *skb, | ||
| 69 | unsigned int dataoff) | ||
| 70 | { | ||
| 71 | return 1; | ||
| 72 | } | ||
| 73 | |||
| 74 | struct nf_conntrack_protocol nf_conntrack_generic_protocol = | ||
| 75 | { | ||
| 76 | .l3proto = PF_UNSPEC, | ||
| 77 | .proto = 0, | ||
| 78 | .name = "unknown", | ||
| 79 | .pkt_to_tuple = generic_pkt_to_tuple, | ||
| 80 | .invert_tuple = generic_invert_tuple, | ||
| 81 | .print_tuple = generic_print_tuple, | ||
| 82 | .print_conntrack = generic_print_conntrack, | ||
| 83 | .packet = packet, | ||
| 84 | .new = new, | ||
| 85 | }; | ||
diff --git a/net/netfilter/nf_conntrack_proto_sctp.c b/net/netfilter/nf_conntrack_proto_sctp.c new file mode 100644 index 000000000000..3a600f77b4e0 --- /dev/null +++ b/net/netfilter/nf_conntrack_proto_sctp.c  | |||
| @@ -0,0 +1,670 @@ | |||
| 1 | /* | ||
| 2 | * Connection tracking protocol helper module for SCTP. | ||
| 3 | * | ||
| 4 | * SCTP is defined in RFC 2960. References to various sections in this code | ||
| 5 | * are to this RFC. | ||
| 6 | * | ||
| 7 | * This program is free software; you can redistribute it and/or modify | ||
| 8 | * it under the terms of the GNU General Public License version 2 as | ||
| 9 | * published by the Free Software Foundation. | ||
| 10 | * | ||
| 11 | * 17 Oct 2004: Yasuyuki Kozakai @USAGI <yasuyuki.kozakai@toshiba.co.jp> | ||
| 12 | * - enable working with L3 protocol independent connection tracking. | ||
| 13 | * | ||
| 14 | * Derived from net/ipv4/ip_conntrack_sctp.c | ||
| 15 | */ | ||
| 16 | |||
| 17 | /* | ||
| 18 | * Added support for proc manipulation of timeouts. | ||
| 19 | */ | ||
| 20 | |||
| 21 | #include <linux/types.h> | ||
| 22 | #include <linux/sched.h> | ||
| 23 | #include <linux/timer.h> | ||
| 24 | #include <linux/netfilter.h> | ||
| 25 | #include <linux/module.h> | ||
| 26 | #include <linux/in.h> | ||
| 27 | #include <linux/ip.h> | ||
| 28 | #include <linux/sctp.h> | ||
| 29 | #include <linux/string.h> | ||
| 30 | #include <linux/seq_file.h> | ||
| 31 | |||
| 32 | #include <net/netfilter/nf_conntrack.h> | ||
| 33 | #include <net/netfilter/nf_conntrack_protocol.h> | ||
| 34 | |||
| 35 | #if 0 | ||
| 36 | #define DEBUGP(format, ...) printk(format, ## __VA_ARGS__) | ||
| 37 | #else | ||
| 38 | #define DEBUGP(format, args...) | ||
| 39 | #endif | ||
| 40 | |||
| 41 | /* Protects conntrack->proto.sctp */ | ||
| 42 | static DEFINE_RWLOCK(sctp_lock); | ||
| 43 | |||
| 44 | /* FIXME: Examine ipfilter's timeouts and conntrack transitions more | ||
| 45 | closely. They're more complex. --RR | ||
| 46 | |||
| 47 | And so for me for SCTP :D -Kiran */ | ||
| 48 | |||
| 49 | static const char *sctp_conntrack_names[] = { | ||
| 50 | "NONE", | ||
| 51 | "CLOSED", | ||
| 52 | "COOKIE_WAIT", | ||
| 53 | "COOKIE_ECHOED", | ||
| 54 | "ESTABLISHED", | ||
| 55 | "SHUTDOWN_SENT", | ||
| 56 | "SHUTDOWN_RECD", | ||
| 57 | "SHUTDOWN_ACK_SENT", | ||
| 58 | }; | ||
| 59 | |||
| 60 | #define SECS * HZ | ||
| 61 | #define MINS * 60 SECS | ||
| 62 | #define HOURS * 60 MINS | ||
| 63 | #define DAYS * 24 HOURS | ||
| 64 | |||
| 65 | static unsigned long nf_ct_sctp_timeout_closed = 10 SECS; | ||
| 66 | static unsigned long nf_ct_sctp_timeout_cookie_wait = 3 SECS; | ||
| 67 | static unsigned long nf_ct_sctp_timeout_cookie_echoed = 3 SECS; | ||
| 68 | static unsigned long nf_ct_sctp_timeout_established = 5 DAYS; | ||
| 69 | static unsigned long nf_ct_sctp_timeout_shutdown_sent = 300 SECS / 1000; | ||
| 70 | static unsigned long nf_ct_sctp_timeout_shutdown_recd = 300 SECS / 1000; | ||
| 71 | static unsigned long nf_ct_sctp_timeout_shutdown_ack_sent = 3 SECS; | ||
| 72 | |||
| 73 | static unsigned long * sctp_timeouts[] | ||
| 74 | = { NULL, /* SCTP_CONNTRACK_NONE */ | ||
| 75 | &nf_ct_sctp_timeout_closed, /* SCTP_CONNTRACK_CLOSED */ | ||
| 76 | &nf_ct_sctp_timeout_cookie_wait, /* SCTP_CONNTRACK_COOKIE_WAIT */ | ||
| 77 | &nf_ct_sctp_timeout_cookie_echoed, /* SCTP_CONNTRACK_COOKIE_ECHOED */ | ||
| 78 | &nf_ct_sctp_timeout_established, /* SCTP_CONNTRACK_ESTABLISHED */ | ||
| 79 | &nf_ct_sctp_timeout_shutdown_sent, /* SCTP_CONNTRACK_SHUTDOWN_SENT */ | ||
| 80 | &nf_ct_sctp_timeout_shutdown_recd, /* SCTP_CONNTRACK_SHUTDOWN_RECD */ | ||
| 81 | &nf_ct_sctp_timeout_shutdown_ack_sent /* SCTP_CONNTRACK_SHUTDOWN_ACK_SENT */ | ||
| 82 | }; | ||
| 83 | |||
| 84 | #define sNO SCTP_CONNTRACK_NONE | ||
| 85 | #define sCL SCTP_CONNTRACK_CLOSED | ||
| 86 | #define sCW SCTP_CONNTRACK_COOKIE_WAIT | ||
| 87 | #define sCE SCTP_CONNTRACK_COOKIE_ECHOED | ||
| 88 | #define sES SCTP_CONNTRACK_ESTABLISHED | ||
| 89 | #define sSS SCTP_CONNTRACK_SHUTDOWN_SENT | ||
| 90 | #define sSR SCTP_CONNTRACK_SHUTDOWN_RECD | ||
| 91 | #define sSA SCTP_CONNTRACK_SHUTDOWN_ACK_SENT | ||
| 92 | #define sIV SCTP_CONNTRACK_MAX | ||
| 93 | |||
| 94 | /* | ||
| 95 | These are the descriptions of the states: | ||
| 96 | |||
| 97 | NOTE: These state names are tantalizingly similar to the states of an | ||
| 98 | SCTP endpoint. But the interpretation of the states is a little different, | ||
| 99 | considering that these are the states of the connection and not of an end | ||
| 100 | point. Please note the subtleties. -Kiran | ||
| 101 | |||
| 102 | NONE - Nothing so far. | ||
| 103 | COOKIE WAIT - We have seen an INIT chunk in the original direction, or also | ||
| 104 | an INIT_ACK chunk in the reply direction. | ||
| 105 | COOKIE ECHOED - We have seen a COOKIE_ECHO chunk in the original direction. | ||
| 106 | ESTABLISHED - We have seen a COOKIE_ACK in the reply direction. | ||
| 107 | SHUTDOWN_SENT - We have seen a SHUTDOWN chunk in the original direction. | ||
| 108 | SHUTDOWN_RECD - We have seen a SHUTDOWN chunk in the reply directoin. | ||
| 109 | SHUTDOWN_ACK_SENT - We have seen a SHUTDOWN_ACK chunk in the direction opposite | ||
| 110 | to that of the SHUTDOWN chunk. | ||
| 111 | CLOSED - We have seen a SHUTDOWN_COMPLETE chunk in the direction of | ||
| 112 | the SHUTDOWN chunk. Connection is closed. | ||
| 113 | */ | ||
| 114 | |||
| 115 | /* TODO | ||
| 116 | - I have assumed that the first INIT is in the original direction. | ||
| 117 | This messes things when an INIT comes in the reply direction in CLOSED | ||
| 118 | state. | ||
| 119 | - Check the error type in the reply dir before transitioning from | ||
| 120 | cookie echoed to closed. | ||
| 121 | - Sec 5.2.4 of RFC 2960 | ||
| 122 | - Multi Homing support. | ||
| 123 | */ | ||
| 124 | |||
| 125 | /* SCTP conntrack state transitions */ | ||
| 126 | static enum sctp_conntrack sctp_conntracks[2][9][SCTP_CONNTRACK_MAX] = { | ||
| 127 | { | ||
| 128 | /* ORIGINAL */ | ||
| 129 | /* sNO, sCL, sCW, sCE, sES, sSS, sSR, sSA */ | ||
| 130 | /* init */ {sCW, sCW, sCW, sCE, sES, sSS, sSR, sSA}, | ||
| 131 | /* init_ack */ {sCL, sCL, sCW, sCE, sES, sSS, sSR, sSA}, | ||
| 132 | /* abort */ {sCL, sCL, sCL, sCL, sCL, sCL, sCL, sCL}, | ||
| 133 | /* shutdown */ {sCL, sCL, sCW, sCE, sSS, sSS, sSR, sSA}, | ||
| 134 | /* shutdown_ack */ {sSA, sCL, sCW, sCE, sES, sSA, sSA, sSA}, | ||
| 135 | /* error */ {sCL, sCL, sCW, sCE, sES, sSS, sSR, sSA},/* Cant have Stale cookie*/ | ||
| 136 | /* cookie_echo */ {sCL, sCL, sCE, sCE, sES, sSS, sSR, sSA},/* 5.2.4 - Big TODO */ | ||
| 137 | /* cookie_ack */ {sCL, sCL, sCW, sCE, sES, sSS, sSR, sSA},/* Cant come in orig dir */ | ||
| 138 | /* shutdown_comp*/ {sCL, sCL, sCW, sCE, sES, sSS, sSR, sCL} | ||
| 139 | }, | ||
| 140 | { | ||
| 141 | /* REPLY */ | ||
| 142 | /* sNO, sCL, sCW, sCE, sES, sSS, sSR, sSA */ | ||
| 143 | /* init */ {sIV, sCL, sCW, sCE, sES, sSS, sSR, sSA},/* INIT in sCL Big TODO */ | ||
| 144 | /* init_ack */ {sIV, sCL, sCW, sCE, sES, sSS, sSR, sSA}, | ||
| 145 | /* abort */ {sIV, sCL, sCL, sCL, sCL, sCL, sCL, sCL}, | ||
| 146 | /* shutdown */ {sIV, sCL, sCW, sCE, sSR, sSS, sSR, sSA}, | ||
| 147 | /* shutdown_ack */ {sIV, sCL, sCW, sCE, sES, sSA, sSA, sSA}, | ||
| 148 | /* error */ {sIV, sCL, sCW, sCL, sES, sSS, sSR, sSA}, | ||
| 149 | /* cookie_echo */ {sIV, sCL, sCW, sCE, sES, sSS, sSR, sSA},/* Cant come in reply dir */ | ||
| 150 | /* cookie_ack */ {sIV, sCL, sCW, sES, sES, sSS, sSR, sSA}, | ||
| 151 | /* shutdown_comp*/ {sIV, sCL, sCW, sCE, sES, sSS, sSR, sCL} | ||
| 152 | } | ||
| 153 | }; | ||
| 154 | |||
| 155 | static int sctp_pkt_to_tuple(const struct sk_buff *skb, | ||
| 156 | unsigned int dataoff, | ||
| 157 | struct nf_conntrack_tuple *tuple) | ||
| 158 | { | ||
| 159 | sctp_sctphdr_t _hdr, *hp; | ||
| 160 | |||
| 161 | DEBUGP(__FUNCTION__); | ||
| 162 | DEBUGP("\n"); | ||
| 163 | |||
| 164 | /* Actually only need first 8 bytes. */ | ||
| 165 | hp = skb_header_pointer(skb, dataoff, 8, &_hdr); | ||
| 166 | if (hp == NULL) | ||
| 167 | return 0; | ||
| 168 | |||
| 169 | tuple->src.u.sctp.port = hp->source; | ||
| 170 | tuple->dst.u.sctp.port = hp->dest; | ||
| 171 | return 1; | ||
| 172 | } | ||
| 173 | |||
| 174 | static int sctp_invert_tuple(struct nf_conntrack_tuple *tuple, | ||
| 175 | const struct nf_conntrack_tuple *orig) | ||
| 176 | { | ||
| 177 | DEBUGP(__FUNCTION__); | ||
| 178 | DEBUGP("\n"); | ||
| 179 | |||
| 180 | tuple->src.u.sctp.port = orig->dst.u.sctp.port; | ||
| 181 | tuple->dst.u.sctp.port = orig->src.u.sctp.port; | ||
| 182 | return 1; | ||
| 183 | } | ||
| 184 | |||
| 185 | /* Print out the per-protocol part of the tuple. */ | ||
| 186 | static int sctp_print_tuple(struct seq_file *s, | ||
| 187 | const struct nf_conntrack_tuple *tuple) | ||
| 188 | { | ||
| 189 | DEBUGP(__FUNCTION__); | ||
| 190 | DEBUGP("\n"); | ||
| 191 | |||
| 192 | return seq_printf(s, "sport=%hu dport=%hu ", | ||
| 193 | ntohs(tuple->src.u.sctp.port), | ||
| 194 | ntohs(tuple->dst.u.sctp.port)); | ||
| 195 | } | ||
| 196 | |||
| 197 | /* Print out the private part of the conntrack. */ | ||
| 198 | static int sctp_print_conntrack(struct seq_file *s, | ||
| 199 | const struct nf_conn *conntrack) | ||
| 200 | { | ||
| 201 | enum sctp_conntrack state; | ||
| 202 | |||
| 203 | DEBUGP(__FUNCTION__); | ||
| 204 | DEBUGP("\n"); | ||
| 205 | |||
| 206 | read_lock_bh(&sctp_lock); | ||
| 207 | state = conntrack->proto.sctp.state; | ||
| 208 | read_unlock_bh(&sctp_lock); | ||
| 209 | |||
| 210 | return seq_printf(s, "%s ", sctp_conntrack_names[state]); | ||
| 211 | } | ||
| 212 | |||
| 213 | #define for_each_sctp_chunk(skb, sch, _sch, offset, dataoff, count) \ | ||
| 214 | for (offset = dataoff + sizeof(sctp_sctphdr_t), count = 0; \ | ||
| 215 | offset < skb->len && \ | ||
| 216 | (sch = skb_header_pointer(skb, offset, sizeof(_sch), &_sch)); \ | ||
| 217 | offset += (htons(sch->length) + 3) & ~3, count++) | ||
| 218 | |||
| 219 | /* Some validity checks to make sure the chunks are fine */ | ||
| 220 | static int do_basic_checks(struct nf_conn *conntrack, | ||
| 221 | const struct sk_buff *skb, | ||
| 222 | unsigned int dataoff, | ||
| 223 | char *map) | ||
| 224 | { | ||
| 225 | u_int32_t offset, count; | ||
| 226 | sctp_chunkhdr_t _sch, *sch; | ||
| 227 | int flag; | ||
| 228 | |||
| 229 | DEBUGP(__FUNCTION__); | ||
| 230 | DEBUGP("\n"); | ||
| 231 | |||
| 232 | flag = 0; | ||
| 233 | |||
| 234 | for_each_sctp_chunk (skb, sch, _sch, offset, dataoff, count) { | ||
| 235 | DEBUGP("Chunk Num: %d Type: %d\n", count, sch->type); | ||
| 236 | |||
| 237 | if (sch->type == SCTP_CID_INIT | ||
| 238 | || sch->type == SCTP_CID_INIT_ACK | ||
| 239 | || sch->type == SCTP_CID_SHUTDOWN_COMPLETE) { | ||
| 240 | flag = 1; | ||
| 241 | } | ||
| 242 | |||
| 243 | /* Cookie Ack/Echo chunks not the first OR | ||
| 244 | Init / Init Ack / Shutdown compl chunks not the only chunks */ | ||
| 245 | if ((sch->type == SCTP_CID_COOKIE_ACK | ||
| 246 | || sch->type == SCTP_CID_COOKIE_ECHO | ||
| 247 | || flag) | ||
| 248 | && count !=0 ) { | ||
| 249 | DEBUGP("Basic checks failed\n"); | ||
| 250 | return 1; | ||
| 251 | } | ||
| 252 | |||
| 253 | if (map) { | ||
| 254 | set_bit(sch->type, (void *)map); | ||
| 255 | } | ||
| 256 | } | ||
| 257 | |||
| 258 | DEBUGP("Basic checks passed\n"); | ||
| 259 | return 0; | ||
| 260 | } | ||
| 261 | |||
| 262 | static int new_state(enum ip_conntrack_dir dir, | ||
| 263 | enum sctp_conntrack cur_state, | ||
| 264 | int chunk_type) | ||
| 265 | { | ||
| 266 | int i; | ||
| 267 | |||
| 268 | DEBUGP(__FUNCTION__); | ||
| 269 | DEBUGP("\n"); | ||
| 270 | |||
| 271 | DEBUGP("Chunk type: %d\n", chunk_type); | ||
| 272 | |||
| 273 | switch (chunk_type) { | ||
| 274 | case SCTP_CID_INIT: | ||
| 275 | DEBUGP("SCTP_CID_INIT\n"); | ||
| 276 | i = 0; break; | ||
| 277 | case SCTP_CID_INIT_ACK: | ||
| 278 | DEBUGP("SCTP_CID_INIT_ACK\n"); | ||
| 279 | i = 1; break; | ||
| 280 | case SCTP_CID_ABORT: | ||
| 281 | DEBUGP("SCTP_CID_ABORT\n"); | ||
| 282 | i = 2; break; | ||
| 283 | case SCTP_CID_SHUTDOWN: | ||
| 284 | DEBUGP("SCTP_CID_SHUTDOWN\n"); | ||
| 285 | i = 3; break; | ||
| 286 | case SCTP_CID_SHUTDOWN_ACK: | ||
| 287 | DEBUGP("SCTP_CID_SHUTDOWN_ACK\n"); | ||
| 288 | i = 4; break; | ||
| 289 | case SCTP_CID_ERROR: | ||
| 290 | DEBUGP("SCTP_CID_ERROR\n"); | ||
| 291 | i = 5; break; | ||
| 292 | case SCTP_CID_COOKIE_ECHO: | ||
| 293 | DEBUGP("SCTP_CID_COOKIE_ECHO\n"); | ||
| 294 | i = 6; break; | ||
| 295 | case SCTP_CID_COOKIE_ACK: | ||
| 296 | DEBUGP("SCTP_CID_COOKIE_ACK\n"); | ||
| 297 | i = 7; break; | ||
| 298 | case SCTP_CID_SHUTDOWN_COMPLETE: | ||
| 299 | DEBUGP("SCTP_CID_SHUTDOWN_COMPLETE\n"); | ||
| 300 | i = 8; break; | ||
| 301 | default: | ||
| 302 | /* Other chunks like DATA, SACK, HEARTBEAT and | ||
| 303 | its ACK do not cause a change in state */ | ||
| 304 | DEBUGP("Unknown chunk type, Will stay in %s\n", | ||
| 305 | sctp_conntrack_names[cur_state]); | ||
| 306 | return cur_state; | ||
| 307 | } | ||
| 308 | |||
| 309 | DEBUGP("dir: %d cur_state: %s chunk_type: %d new_state: %s\n", | ||
| 310 | dir, sctp_conntrack_names[cur_state], chunk_type, | ||
| 311 | sctp_conntrack_names[sctp_conntracks[dir][i][cur_state]]); | ||
| 312 | |||
| 313 | return sctp_conntracks[dir][i][cur_state]; | ||
| 314 | } | ||
| 315 | |||
| 316 | /* Returns verdict for packet, or -1 for invalid. */ | ||
| 317 | static int sctp_packet(struct nf_conn *conntrack, | ||
| 318 | const struct sk_buff *skb, | ||
| 319 | unsigned int dataoff, | ||
| 320 | enum ip_conntrack_info ctinfo, | ||
| 321 | int pf, | ||
| 322 | unsigned int hooknum) | ||
| 323 | { | ||
| 324 | enum sctp_conntrack newconntrack, oldsctpstate; | ||
| 325 | sctp_sctphdr_t _sctph, *sh; | ||
| 326 | sctp_chunkhdr_t _sch, *sch; | ||
| 327 | u_int32_t offset, count; | ||
| 328 | char map[256 / sizeof (char)] = {0}; | ||
| 329 | |||
| 330 | DEBUGP(__FUNCTION__); | ||
| 331 | DEBUGP("\n"); | ||
| 332 | |||
| 333 | sh = skb_header_pointer(skb, dataoff, sizeof(_sctph), &_sctph); | ||
| 334 | if (sh == NULL) | ||
| 335 | return -1; | ||
| 336 | |||
| 337 | if (do_basic_checks(conntrack, skb, dataoff, map) != 0) | ||
| 338 | return -1; | ||
| 339 | |||
| 340 | /* Check the verification tag (Sec 8.5) */ | ||
| 341 | if (!test_bit(SCTP_CID_INIT, (void *)map) | ||
| 342 | && !test_bit(SCTP_CID_SHUTDOWN_COMPLETE, (void *)map) | ||
| 343 | && !test_bit(SCTP_CID_COOKIE_ECHO, (void *)map) | ||
| 344 | && !test_bit(SCTP_CID_ABORT, (void *)map) | ||
| 345 | && !test_bit(SCTP_CID_SHUTDOWN_ACK, (void *)map) | ||
| 346 | && (sh->vtag != conntrack->proto.sctp.vtag[CTINFO2DIR(ctinfo)])) { | ||
| 347 | DEBUGP("Verification tag check failed\n"); | ||
| 348 | return -1; | ||
| 349 | } | ||
| 350 | |||
| 351 | oldsctpstate = newconntrack = SCTP_CONNTRACK_MAX; | ||
| 352 | for_each_sctp_chunk (skb, sch, _sch, offset, dataoff, count) { | ||
| 353 | write_lock_bh(&sctp_lock); | ||
| 354 | |||
| 355 | /* Special cases of Verification tag check (Sec 8.5.1) */ | ||
| 356 | if (sch->type == SCTP_CID_INIT) { | ||
| 357 | /* Sec 8.5.1 (A) */ | ||
| 358 | if (sh->vtag != 0) { | ||
| 359 | write_unlock_bh(&sctp_lock); | ||
| 360 | return -1; | ||
| 361 | } | ||
| 362 | } else if (sch->type == SCTP_CID_ABORT) { | ||
| 363 | /* Sec 8.5.1 (B) */ | ||
| 364 | if (!(sh->vtag == conntrack->proto.sctp.vtag[CTINFO2DIR(ctinfo)]) | ||
| 365 | && !(sh->vtag == conntrack->proto.sctp.vtag | ||
| 366 | [1 - CTINFO2DIR(ctinfo)])) { | ||
| 367 | write_unlock_bh(&sctp_lock); | ||
| 368 | return -1; | ||
| 369 | } | ||
| 370 | } else if (sch->type == SCTP_CID_SHUTDOWN_COMPLETE) { | ||
| 371 | /* Sec 8.5.1 (C) */ | ||
| 372 | if (!(sh->vtag == conntrack->proto.sctp.vtag[CTINFO2DIR(ctinfo)]) | ||
| 373 | && !(sh->vtag == conntrack->proto.sctp.vtag | ||
| 374 | [1 - CTINFO2DIR(ctinfo)] | ||
| 375 | && (sch->flags & 1))) { | ||
| 376 | write_unlock_bh(&sctp_lock); | ||
| 377 | return -1; | ||
| 378 | } | ||
| 379 | } else if (sch->type == SCTP_CID_COOKIE_ECHO) { | ||
| 380 | /* Sec 8.5.1 (D) */ | ||
| 381 | if (!(sh->vtag == conntrack->proto.sctp.vtag[CTINFO2DIR(ctinfo)])) { | ||
| 382 | write_unlock_bh(&sctp_lock); | ||
| 383 | return -1; | ||
| 384 | } | ||
| 385 | } | ||
| 386 | |||
| 387 | oldsctpstate = conntrack->proto.sctp.state; | ||
| 388 | newconntrack = new_state(CTINFO2DIR(ctinfo), oldsctpstate, sch->type); | ||
| 389 | |||
| 390 | /* Invalid */ | ||
| 391 | if (newconntrack == SCTP_CONNTRACK_MAX) { | ||
| 392 | DEBUGP("nf_conntrack_sctp: Invalid dir=%i ctype=%u conntrack=%u\n", | ||
| 393 | CTINFO2DIR(ctinfo), sch->type, oldsctpstate); | ||
| 394 | write_unlock_bh(&sctp_lock); | ||
| 395 | return -1; | ||
| 396 | } | ||
| 397 | |||
| 398 | /* If it is an INIT or an INIT ACK note down the vtag */ | ||
| 399 | if (sch->type == SCTP_CID_INIT | ||
| 400 | || sch->type == SCTP_CID_INIT_ACK) { | ||
| 401 | sctp_inithdr_t _inithdr, *ih; | ||
| 402 | |||
| 403 | ih = skb_header_pointer(skb, offset + sizeof(sctp_chunkhdr_t), | ||
| 404 | sizeof(_inithdr), &_inithdr); | ||
| 405 | if (ih == NULL) { | ||
| 406 | write_unlock_bh(&sctp_lock); | ||
| 407 | return -1; | ||
| 408 | } | ||
| 409 | DEBUGP("Setting vtag %x for dir %d\n", | ||
| 410 | ih->init_tag, !CTINFO2DIR(ctinfo)); | ||
| 411 | conntrack->proto.sctp.vtag[!CTINFO2DIR(ctinfo)] = ih->init_tag; | ||
| 412 | } | ||
| 413 | |||
| 414 | conntrack->proto.sctp.state = newconntrack; | ||
| 415 | if (oldsctpstate != newconntrack) | ||
| 416 | nf_conntrack_event_cache(IPCT_PROTOINFO, skb); | ||
| 417 | write_unlock_bh(&sctp_lock); | ||
| 418 | } | ||
| 419 | |||
| 420 | nf_ct_refresh_acct(conntrack, ctinfo, skb, *sctp_timeouts[newconntrack]); | ||
| 421 | |||
| 422 | if (oldsctpstate == SCTP_CONNTRACK_COOKIE_ECHOED | ||
| 423 | && CTINFO2DIR(ctinfo) == IP_CT_DIR_REPLY | ||
| 424 | && newconntrack == SCTP_CONNTRACK_ESTABLISHED) { | ||
| 425 | DEBUGP("Setting assured bit\n"); | ||
| 426 | set_bit(IPS_ASSURED_BIT, &conntrack->status); | ||
| 427 | nf_conntrack_event_cache(IPCT_STATUS, skb); | ||
| 428 | } | ||
| 429 | |||
| 430 | return NF_ACCEPT; | ||
| 431 | } | ||
| 432 | |||
| 433 | /* Called when a new connection for this protocol found. */ | ||
| 434 | static int sctp_new(struct nf_conn *conntrack, const struct sk_buff *skb, | ||
| 435 | unsigned int dataoff) | ||
| 436 | { | ||
| 437 | enum sctp_conntrack newconntrack; | ||
| 438 | sctp_sctphdr_t _sctph, *sh; | ||
| 439 | sctp_chunkhdr_t _sch, *sch; | ||
| 440 | u_int32_t offset, count; | ||
| 441 | char map[256 / sizeof (char)] = {0}; | ||
| 442 | |||
| 443 | DEBUGP(__FUNCTION__); | ||
| 444 | DEBUGP("\n"); | ||
| 445 | |||
| 446 | sh = skb_header_pointer(skb, dataoff, sizeof(_sctph), &_sctph); | ||
| 447 | if (sh == NULL) | ||
| 448 | return 0; | ||
| 449 | |||
| 450 | if (do_basic_checks(conntrack, skb, dataoff, map) != 0) | ||
| 451 | return 0; | ||
| 452 | |||
| 453 | /* If an OOTB packet has any of these chunks discard (Sec 8.4) */ | ||
| 454 | if ((test_bit (SCTP_CID_ABORT, (void *)map)) | ||
| 455 | || (test_bit (SCTP_CID_SHUTDOWN_COMPLETE, (void *)map)) | ||
| 456 | || (test_bit (SCTP_CID_COOKIE_ACK, (void *)map))) { | ||
| 457 | return 0; | ||
| 458 | } | ||
| 459 | |||
| 460 | newconntrack = SCTP_CONNTRACK_MAX; | ||
| 461 | for_each_sctp_chunk (skb, sch, _sch, offset, dataoff, count) { | ||
| 462 | /* Don't need lock here: this conntrack not in circulation yet */ | ||
| 463 | newconntrack = new_state(IP_CT_DIR_ORIGINAL, | ||
| 464 | SCTP_CONNTRACK_NONE, sch->type); | ||
| 465 | |||
| 466 | /* Invalid: delete conntrack */ | ||
| 467 | if (newconntrack == SCTP_CONNTRACK_MAX) { | ||
| 468 | DEBUGP("nf_conntrack_sctp: invalid new deleting.\n"); | ||
| 469 | return 0; | ||
| 470 | } | ||
| 471 | |||
| 472 | /* Copy the vtag into the state info */ | ||
| 473 | if (sch->type == SCTP_CID_INIT) { | ||
| 474 | if (sh->vtag == 0) { | ||
| 475 | sctp_inithdr_t _inithdr, *ih; | ||
| 476 | |||
| 477 | ih = skb_header_pointer(skb, offset + sizeof(sctp_chunkhdr_t), | ||
| 478 | sizeof(_inithdr), &_inithdr); | ||
| 479 | if (ih == NULL) | ||
| 480 | return 0; | ||
| 481 | |||
| 482 | DEBUGP("Setting vtag %x for new conn\n", | ||
| 483 | ih->init_tag); | ||
| 484 | |||
| 485 | conntrack->proto.sctp.vtag[IP_CT_DIR_REPLY] = | ||
| 486 | ih->init_tag; | ||
| 487 | } else { | ||
| 488 | /* Sec 8.5.1 (A) */ | ||
| 489 | return 0; | ||
| 490 | } | ||
| 491 | } | ||
| 492 | /* If it is a shutdown ack OOTB packet, we expect a return | ||
| 493 | shutdown complete, otherwise an ABORT Sec 8.4 (5) and (8) */ | ||
| 494 | else { | ||
| 495 | DEBUGP("Setting vtag %x for new conn OOTB\n", | ||
| 496 | sh->vtag); | ||
| 497 | conntrack->proto.sctp.vtag[IP_CT_DIR_REPLY] = sh->vtag; | ||
| 498 | } | ||
| 499 | |||
| 500 | conntrack->proto.sctp.state = newconntrack; | ||
| 501 | } | ||
| 502 | |||
| 503 | return 1; | ||
| 504 | } | ||
| 505 | |||
| 506 | struct nf_conntrack_protocol nf_conntrack_protocol_sctp4 = { | ||
| 507 | .l3proto = PF_INET, | ||
| 508 | .proto = IPPROTO_SCTP, | ||
| 509 | .name = "sctp", | ||
| 510 | .pkt_to_tuple = sctp_pkt_to_tuple, | ||
| 511 | .invert_tuple = sctp_invert_tuple, | ||
| 512 | .print_tuple = sctp_print_tuple, | ||
| 513 | .print_conntrack = sctp_print_conntrack, | ||
| 514 | .packet = sctp_packet, | ||
| 515 | .new = sctp_new, | ||
| 516 | .destroy = NULL, | ||
| 517 | .me = THIS_MODULE | ||
| 518 | }; | ||
| 519 | |||
| 520 | struct nf_conntrack_protocol nf_conntrack_protocol_sctp6 = { | ||
| 521 | .l3proto = PF_INET6, | ||
| 522 | .proto = IPPROTO_SCTP, | ||
| 523 | .name = "sctp", | ||
| 524 | .pkt_to_tuple = sctp_pkt_to_tuple, | ||
| 525 | .invert_tuple = sctp_invert_tuple, | ||
| 526 | .print_tuple = sctp_print_tuple, | ||
| 527 | .print_conntrack = sctp_print_conntrack, | ||
| 528 | .packet = sctp_packet, | ||
| 529 | .new = sctp_new, | ||
| 530 | .destroy = NULL, | ||
| 531 | .me = THIS_MODULE | ||
| 532 | }; | ||
| 533 | |||
| 534 | #ifdef CONFIG_SYSCTL | ||
| 535 | static ctl_table nf_ct_sysctl_table[] = { | ||
| 536 | { | ||
| 537 | .ctl_name = NET_NF_CONNTRACK_SCTP_TIMEOUT_CLOSED, | ||
| 538 | .procname = "nf_conntrack_sctp_timeout_closed", | ||
| 539 | .data = &nf_ct_sctp_timeout_closed, | ||
| 540 | .maxlen = sizeof(unsigned int), | ||
| 541 | .mode = 0644, | ||
| 542 | .proc_handler = &proc_dointvec_jiffies, | ||
| 543 | }, | ||
| 544 | { | ||
| 545 | .ctl_name = NET_NF_CONNTRACK_SCTP_TIMEOUT_COOKIE_WAIT, | ||
| 546 | .procname = "nf_conntrack_sctp_timeout_cookie_wait", | ||
| 547 | .data = &nf_ct_sctp_timeout_cookie_wait, | ||
| 548 | .maxlen = sizeof(unsigned int), | ||
| 549 | .mode = 0644, | ||
| 550 | .proc_handler = &proc_dointvec_jiffies, | ||
| 551 | }, | ||
| 552 | { | ||
| 553 | .ctl_name = NET_NF_CONNTRACK_SCTP_TIMEOUT_COOKIE_ECHOED, | ||
| 554 | .procname = "nf_conntrack_sctp_timeout_cookie_echoed", | ||
| 555 | .data = &nf_ct_sctp_timeout_cookie_echoed, | ||
| 556 | .maxlen = sizeof(unsigned int), | ||
| 557 | .mode = 0644, | ||
| 558 | .proc_handler = &proc_dointvec_jiffies, | ||
| 559 | }, | ||
| 560 | { | ||
| 561 | .ctl_name = NET_NF_CONNTRACK_SCTP_TIMEOUT_ESTABLISHED, | ||
| 562 | .procname = "nf_conntrack_sctp_timeout_established", | ||
| 563 | .data = &nf_ct_sctp_timeout_established, | ||
| 564 | .maxlen = sizeof(unsigned int), | ||
| 565 | .mode = 0644, | ||
| 566 | .proc_handler = &proc_dointvec_jiffies, | ||
| 567 | }, | ||
| 568 | { | ||
| 569 | .ctl_name = NET_NF_CONNTRACK_SCTP_TIMEOUT_SHUTDOWN_SENT, | ||
| 570 | .procname = "nf_conntrack_sctp_timeout_shutdown_sent", | ||
| 571 | .data = &nf_ct_sctp_timeout_shutdown_sent, | ||
| 572 | .maxlen = sizeof(unsigned int), | ||
| 573 | .mode = 0644, | ||
| 574 | .proc_handler = &proc_dointvec_jiffies, | ||
| 575 | }, | ||
| 576 | { | ||
| 577 | .ctl_name = NET_NF_CONNTRACK_SCTP_TIMEOUT_SHUTDOWN_RECD, | ||
| 578 | .procname = "nf_conntrack_sctp_timeout_shutdown_recd", | ||
| 579 | .data = &nf_ct_sctp_timeout_shutdown_recd, | ||
| 580 | .maxlen = sizeof(unsigned int), | ||
| 581 | .mode = 0644, | ||
| 582 | .proc_handler = &proc_dointvec_jiffies, | ||
| 583 | }, | ||
| 584 | { | ||
| 585 | .ctl_name = NET_NF_CONNTRACK_SCTP_TIMEOUT_SHUTDOWN_ACK_SENT, | ||
| 586 | .procname = "nf_conntrack_sctp_timeout_shutdown_ack_sent", | ||
| 587 | .data = &nf_ct_sctp_timeout_shutdown_ack_sent, | ||
| 588 | .maxlen = sizeof(unsigned int), | ||
| 589 | .mode = 0644, | ||
| 590 | .proc_handler = &proc_dointvec_jiffies, | ||
| 591 | }, | ||
| 592 | { .ctl_name = 0 } | ||
| 593 | }; | ||
| 594 | |||
| 595 | static ctl_table nf_ct_netfilter_table[] = { | ||
| 596 | { | ||
| 597 | .ctl_name = NET_NETFILTER, | ||
| 598 | .procname = "netfilter", | ||
| 599 | .mode = 0555, | ||
| 600 | .child = nf_ct_sysctl_table, | ||
| 601 | }, | ||
| 602 | { .ctl_name = 0 } | ||
| 603 | }; | ||
| 604 | |||
| 605 | static ctl_table nf_ct_net_table[] = { | ||
| 606 | { | ||
| 607 | .ctl_name = CTL_NET, | ||
| 608 | .procname = "net", | ||
| 609 | .mode = 0555, | ||
| 610 | .child = nf_ct_netfilter_table, | ||
| 611 | }, | ||
| 612 | { .ctl_name = 0 } | ||
| 613 | }; | ||
| 614 | |||
| 615 | static struct ctl_table_header *nf_ct_sysctl_header; | ||
| 616 | #endif | ||
| 617 | |||
| 618 | int __init init(void) | ||
| 619 | { | ||
| 620 | int ret; | ||
| 621 | |||
| 622 | ret = nf_conntrack_protocol_register(&nf_conntrack_protocol_sctp4); | ||
| 623 | if (ret) { | ||
| 624 | printk("nf_conntrack_proto_sctp4: protocol register failed\n"); | ||
| 625 | goto out; | ||
| 626 | } | ||
| 627 | ret = nf_conntrack_protocol_register(&nf_conntrack_protocol_sctp6); | ||
| 628 | if (ret) { | ||
| 629 | printk("nf_conntrack_proto_sctp6: protocol register failed\n"); | ||
| 630 | goto cleanup_sctp4; | ||
| 631 | } | ||
| 632 | |||
| 633 | #ifdef CONFIG_SYSCTL | ||
| 634 | nf_ct_sysctl_header = register_sysctl_table(nf_ct_net_table, 0); | ||
| 635 | if (nf_ct_sysctl_header == NULL) { | ||
| 636 | printk("nf_conntrack_proto_sctp: can't register to sysctl.\n"); | ||
| 637 | goto cleanup; | ||
| 638 | } | ||
| 639 | #endif | ||
| 640 | |||
| 641 | return ret; | ||
| 642 | |||
| 643 | #ifdef CONFIG_SYSCTL | ||
| 644 | cleanup: | ||
| 645 | nf_conntrack_protocol_unregister(&nf_conntrack_protocol_sctp6); | ||
| 646 | #endif | ||
| 647 | cleanup_sctp4: | ||
| 648 | nf_conntrack_protocol_unregister(&nf_conntrack_protocol_sctp4); | ||
| 649 | out: | ||
| 650 | DEBUGP("SCTP conntrack module loading %s\n", | ||
| 651 | ret ? "failed": "succeeded"); | ||
| 652 | return ret; | ||
| 653 | } | ||
| 654 | |||
| 655 | void __exit fini(void) | ||
| 656 | { | ||
| 657 | nf_conntrack_protocol_unregister(&nf_conntrack_protocol_sctp6); | ||
| 658 | nf_conntrack_protocol_unregister(&nf_conntrack_protocol_sctp4); | ||
| 659 | #ifdef CONFIG_SYSCTL | ||
| 660 | unregister_sysctl_table(nf_ct_sysctl_header); | ||
| 661 | #endif | ||
| 662 | DEBUGP("SCTP conntrack module unloaded\n"); | ||
| 663 | } | ||
| 664 | |||
| 665 | module_init(init); | ||
| 666 | module_exit(fini); | ||
| 667 | |||
| 668 | MODULE_LICENSE("GPL"); | ||
| 669 | MODULE_AUTHOR("Kiran Kumar Immidi"); | ||
| 670 | MODULE_DESCRIPTION("Netfilter connection tracking protocol helper for SCTP"); | ||
diff --git a/net/netfilter/nf_conntrack_proto_tcp.c b/net/netfilter/nf_conntrack_proto_tcp.c new file mode 100644 index 000000000000..83d90dd624f0 --- /dev/null +++ b/net/netfilter/nf_conntrack_proto_tcp.c  | |||
| @@ -0,0 +1,1162 @@ | |||
| 1 | /* (C) 1999-2001 Paul `Rusty' Russell | ||
| 2 | * (C) 2002-2004 Netfilter Core Team <coreteam@netfilter.org> | ||
| 3 | * | ||
| 4 | * This program is free software; you can redistribute it and/or modify | ||
| 5 | * it under the terms of the GNU General Public License version 2 as | ||
| 6 | * published by the Free Software Foundation. | ||
| 7 | * | ||
| 8 | * Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>: | ||
| 9 | * - Real stateful connection tracking | ||
| 10 | * - Modified state transitions table | ||
| 11 | * - Window scaling support added | ||
| 12 | * - SACK support added | ||
| 13 | * | ||
| 14 | * Willy Tarreau: | ||
| 15 | * - State table bugfixes | ||
| 16 | * - More robust state changes | ||
| 17 | * - Tuning timer parameters | ||
| 18 | * | ||
| 19 | * 27 Oct 2004: Yasuyuki Kozakai @USAGI <yasuyuki.kozakai@toshiba.co.jp> | ||
| 20 | * - genelized Layer 3 protocol part. | ||
| 21 | * | ||
| 22 | * Derived from net/ipv4/netfilter/ip_conntrack_proto_tcp.c | ||
| 23 | * | ||
| 24 | * version 2.2 | ||
| 25 | */ | ||
| 26 | |||
| 27 | #include <linux/config.h> | ||
| 28 | #include <linux/types.h> | ||
| 29 | #include <linux/sched.h> | ||
| 30 | #include <linux/timer.h> | ||
| 31 | #include <linux/netfilter.h> | ||
| 32 | #include <linux/module.h> | ||
| 33 | #include <linux/in.h> | ||
| 34 | #include <linux/tcp.h> | ||
| 35 | #include <linux/spinlock.h> | ||
| 36 | #include <linux/skbuff.h> | ||
| 37 | #include <linux/ipv6.h> | ||
| 38 | #include <net/ip6_checksum.h> | ||
| 39 | |||
| 40 | #include <net/tcp.h> | ||
| 41 | |||
| 42 | #include <linux/netfilter.h> | ||
| 43 | #include <linux/netfilter_ipv4.h> | ||
| 44 | #include <linux/netfilter_ipv6.h> | ||
| 45 | #include <net/netfilter/nf_conntrack.h> | ||
| 46 | #include <net/netfilter/nf_conntrack_protocol.h> | ||
| 47 | |||
| 48 | #if 0 | ||
| 49 | #define DEBUGP printk | ||
| 50 | #define DEBUGP_VARS | ||
| 51 | #else | ||
| 52 | #define DEBUGP(format, args...) | ||
| 53 | #endif | ||
| 54 | |||
| 55 | /* Protects conntrack->proto.tcp */ | ||
| 56 | static DEFINE_RWLOCK(tcp_lock); | ||
| 57 | |||
| 58 | /* "Be conservative in what you do, | ||
| 59 | be liberal in what you accept from others." | ||
| 60 | If it's non-zero, we mark only out of window RST segments as INVALID. */ | ||
| 61 | int nf_ct_tcp_be_liberal = 0; | ||
| 62 | |||
| 63 | /* When connection is picked up from the middle, how many packets are required | ||
| 64 | to pass in each direction when we assume we are in sync - if any side uses | ||
| 65 | window scaling, we lost the game. | ||
| 66 | If it is set to zero, we disable picking up already established | ||
| 67 | connections. */ | ||
| 68 | int nf_ct_tcp_loose = 3; | ||
| 69 | |||
| 70 | /* Max number of the retransmitted packets without receiving an (acceptable) | ||
| 71 | ACK from the destination. If this number is reached, a shorter timer | ||
| 72 | will be started. */ | ||
| 73 | int nf_ct_tcp_max_retrans = 3; | ||
| 74 | |||
| 75 | /* FIXME: Examine ipfilter's timeouts and conntrack transitions more | ||
| 76 | closely. They're more complex. --RR */ | ||
| 77 | |||
| 78 | static const char *tcp_conntrack_names[] = { | ||
| 79 | "NONE", | ||
| 80 | "SYN_SENT", | ||
| 81 | "SYN_RECV", | ||
| 82 | "ESTABLISHED", | ||
| 83 | "FIN_WAIT", | ||
| 84 | "CLOSE_WAIT", | ||
| 85 | "LAST_ACK", | ||
| 86 | "TIME_WAIT", | ||
| 87 | "CLOSE", | ||
| 88 | "LISTEN" | ||
| 89 | }; | ||
| 90 | |||
| 91 | #define SECS * HZ | ||
| 92 | #define MINS * 60 SECS | ||
| 93 | #define HOURS * 60 MINS | ||
| 94 | #define DAYS * 24 HOURS | ||
| 95 | |||
| 96 | unsigned long nf_ct_tcp_timeout_syn_sent = 2 MINS; | ||
| 97 | unsigned long nf_ct_tcp_timeout_syn_recv = 60 SECS; | ||
| 98 | unsigned long nf_ct_tcp_timeout_established = 5 DAYS; | ||
| 99 | unsigned long nf_ct_tcp_timeout_fin_wait = 2 MINS; | ||
| 100 | unsigned long nf_ct_tcp_timeout_close_wait = 60 SECS; | ||
| 101 | unsigned long nf_ct_tcp_timeout_last_ack = 30 SECS; | ||
| 102 | unsigned long nf_ct_tcp_timeout_time_wait = 2 MINS; | ||
| 103 | unsigned long nf_ct_tcp_timeout_close = 10 SECS; | ||
| 104 | |||
| 105 | /* RFC1122 says the R2 limit should be at least 100 seconds. | ||
| 106 | Linux uses 15 packets as limit, which corresponds | ||
| 107 | to ~13-30min depending on RTO. */ | ||
| 108 | unsigned long nf_ct_tcp_timeout_max_retrans = 5 MINS; | ||
| 109 | |||
| 110 | static unsigned long * tcp_timeouts[] | ||
| 111 | = { NULL, /* TCP_CONNTRACK_NONE */ | ||
| 112 | &nf_ct_tcp_timeout_syn_sent, /* TCP_CONNTRACK_SYN_SENT, */ | ||
| 113 | &nf_ct_tcp_timeout_syn_recv, /* TCP_CONNTRACK_SYN_RECV, */ | ||
| 114 | &nf_ct_tcp_timeout_established, /* TCP_CONNTRACK_ESTABLISHED, */ | ||
| 115 | &nf_ct_tcp_timeout_fin_wait, /* TCP_CONNTRACK_FIN_WAIT, */ | ||
| 116 | &nf_ct_tcp_timeout_close_wait, /* TCP_CONNTRACK_CLOSE_WAIT, */ | ||
| 117 | &nf_ct_tcp_timeout_last_ack, /* TCP_CONNTRACK_LAST_ACK, */ | ||
| 118 | &nf_ct_tcp_timeout_time_wait, /* TCP_CONNTRACK_TIME_WAIT, */ | ||
| 119 | &nf_ct_tcp_timeout_close, /* TCP_CONNTRACK_CLOSE, */ | ||
| 120 | NULL, /* TCP_CONNTRACK_LISTEN */ | ||
| 121 | }; | ||
| 122 | |||
| 123 | #define sNO TCP_CONNTRACK_NONE | ||
| 124 | #define sSS TCP_CONNTRACK_SYN_SENT | ||
| 125 | #define sSR TCP_CONNTRACK_SYN_RECV | ||
| 126 | #define sES TCP_CONNTRACK_ESTABLISHED | ||
| 127 | #define sFW TCP_CONNTRACK_FIN_WAIT | ||
| 128 | #define sCW TCP_CONNTRACK_CLOSE_WAIT | ||
| 129 | #define sLA TCP_CONNTRACK_LAST_ACK | ||
| 130 | #define sTW TCP_CONNTRACK_TIME_WAIT | ||
| 131 | #define sCL TCP_CONNTRACK_CLOSE | ||
| 132 | #define sLI TCP_CONNTRACK_LISTEN | ||
| 133 | #define sIV TCP_CONNTRACK_MAX | ||
| 134 | #define sIG TCP_CONNTRACK_IGNORE | ||
| 135 | |||
| 136 | /* What TCP flags are set from RST/SYN/FIN/ACK. */ | ||
| 137 | enum tcp_bit_set { | ||
| 138 | TCP_SYN_SET, | ||
| 139 | TCP_SYNACK_SET, | ||
| 140 | TCP_FIN_SET, | ||
| 141 | TCP_ACK_SET, | ||
| 142 | TCP_RST_SET, | ||
| 143 | TCP_NONE_SET, | ||
| 144 | }; | ||
| 145 | |||
| 146 | /* | ||
| 147 | * The TCP state transition table needs a few words... | ||
| 148 | * | ||
| 149 | * We are the man in the middle. All the packets go through us | ||
| 150 | * but might get lost in transit to the destination. | ||
| 151 | * It is assumed that the destinations can't receive segments | ||
| 152 | * we haven't seen. | ||
| 153 | * | ||
| 154 | * The checked segment is in window, but our windows are *not* | ||
| 155 | * equivalent with the ones of the sender/receiver. We always | ||
| 156 | * try to guess the state of the current sender. | ||
| 157 | * | ||
| 158 | * The meaning of the states are: | ||
| 159 | * | ||
| 160 | * NONE: initial state | ||
| 161 | * SYN_SENT: SYN-only packet seen | ||
| 162 | * SYN_RECV: SYN-ACK packet seen | ||
| 163 | * ESTABLISHED: ACK packet seen | ||
| 164 | * FIN_WAIT: FIN packet seen | ||
| 165 | * CLOSE_WAIT: ACK seen (after FIN) | ||
| 166 | * LAST_ACK: FIN seen (after FIN) | ||
| 167 | * TIME_WAIT: last ACK seen | ||
| 168 | * CLOSE: closed connection | ||
| 169 | * | ||
| 170 | * LISTEN state is not used. | ||
| 171 | * | ||
| 172 | * Packets marked as IGNORED (sIG): | ||
| 173 | * if they may be either invalid or valid | ||
| 174 | * and the receiver may send back a connection | ||
| 175 | * closing RST or a SYN/ACK. | ||
| 176 | * | ||
| 177 | * Packets marked as INVALID (sIV): | ||
| 178 | * if they are invalid | ||
| 179 | * or we do not support the request (simultaneous open) | ||
| 180 | */ | ||
| 181 | static enum tcp_conntrack tcp_conntracks[2][6][TCP_CONNTRACK_MAX] = { | ||
| 182 | { | ||
| 183 | /* ORIGINAL */ | ||
| 184 | /* sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sLI */ | ||
| 185 | /*syn*/ { sSS, sSS, sIG, sIG, sIG, sIG, sIG, sSS, sSS, sIV }, | ||
| 186 | /* | ||
| 187 | * sNO -> sSS Initialize a new connection | ||
| 188 | * sSS -> sSS Retransmitted SYN | ||
| 189 | * sSR -> sIG Late retransmitted SYN? | ||
| 190 | * sES -> sIG Error: SYNs in window outside the SYN_SENT state | ||
| 191 | * are errors. Receiver will reply with RST | ||
| 192 | * and close the connection. | ||
| 193 | * Or we are not in sync and hold a dead connection. | ||
| 194 | * sFW -> sIG | ||
| 195 | * sCW -> sIG | ||
| 196 | * sLA -> sIG | ||
| 197 | * sTW -> sSS Reopened connection (RFC 1122). | ||
| 198 | * sCL -> sSS | ||
| 199 | */ | ||
| 200 | /* sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sLI */ | ||
| 201 | /*synack*/ { sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV }, | ||
| 202 | /* | ||
| 203 | * A SYN/ACK from the client is always invalid: | ||
| 204 | * - either it tries to set up a simultaneous open, which is | ||
| 205 | * not supported; | ||
| 206 | * - or the firewall has just been inserted between the two hosts | ||
| 207 | * during the session set-up. The SYN will be retransmitted | ||
| 208 | * by the true client (or it'll time out). | ||
| 209 | */ | ||
| 210 | /* sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sLI */ | ||
| 211 | /*fin*/ { sIV, sIV, sFW, sFW, sLA, sLA, sLA, sTW, sCL, sIV }, | ||
| 212 | /* | ||
| 213 | * sNO -> sIV Too late and no reason to do anything... | ||
| 214 | * sSS -> sIV Client migth not send FIN in this state: | ||
| 215 | * we enforce waiting for a SYN/ACK reply first. | ||
| 216 | * sSR -> sFW Close started. | ||
| 217 | * sES -> sFW | ||
| 218 | * sFW -> sLA FIN seen in both directions, waiting for | ||
| 219 | * the last ACK. | ||
| 220 | * Migth be a retransmitted FIN as well... | ||
| 221 | * sCW -> sLA | ||
| 222 | * sLA -> sLA Retransmitted FIN. Remain in the same state. | ||
| 223 | * sTW -> sTW | ||
| 224 | * sCL -> sCL | ||
| 225 | */ | ||
| 226 | /* sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sLI */ | ||
| 227 | /*ack*/ { sES, sIV, sES, sES, sCW, sCW, sTW, sTW, sCL, sIV }, | ||
| 228 | /* | ||
| 229 | * sNO -> sES Assumed. | ||
| 230 | * sSS -> sIV ACK is invalid: we haven't seen a SYN/ACK yet. | ||
| 231 | * sSR -> sES Established state is reached. | ||
| 232 | * sES -> sES :-) | ||
| 233 | * sFW -> sCW Normal close request answered by ACK. | ||
| 234 | * sCW -> sCW | ||
| 235 | * sLA -> sTW Last ACK detected. | ||
| 236 | * sTW -> sTW Retransmitted last ACK. Remain in the same state. | ||
| 237 | * sCL -> sCL | ||
| 238 | */ | ||
| 239 | /* sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sLI */ | ||
| 240 | /*rst*/ { sIV, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sIV }, | ||
| 241 | /*none*/ { sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV } | ||
| 242 | }, | ||
| 243 | { | ||
| 244 | /* REPLY */ | ||
| 245 | /* sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sLI */ | ||
| 246 | /*syn*/ { sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV }, | ||
| 247 | /* | ||
| 248 | * sNO -> sIV Never reached. | ||
| 249 | * sSS -> sIV Simultaneous open, not supported | ||
| 250 | * sSR -> sIV Simultaneous open, not supported. | ||
| 251 | * sES -> sIV Server may not initiate a connection. | ||
| 252 | * sFW -> sIV | ||
| 253 | * sCW -> sIV | ||
| 254 | * sLA -> sIV | ||
| 255 | * sTW -> sIV Reopened connection, but server may not do it. | ||
| 256 | * sCL -> sIV | ||
| 257 | */ | ||
| 258 | /* sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sLI */ | ||
| 259 | /*synack*/ { sIV, sSR, sSR, sIG, sIG, sIG, sIG, sIG, sIG, sIV }, | ||
| 260 | /* | ||
| 261 | * sSS -> sSR Standard open. | ||
| 262 | * sSR -> sSR Retransmitted SYN/ACK. | ||
| 263 | * sES -> sIG Late retransmitted SYN/ACK? | ||
| 264 | * sFW -> sIG Might be SYN/ACK answering ignored SYN | ||
| 265 | * sCW -> sIG | ||
| 266 | * sLA -> sIG | ||
| 267 | * sTW -> sIG | ||
| 268 | * sCL -> sIG | ||
| 269 | */ | ||
| 270 | /* sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sLI */ | ||
| 271 | /*fin*/ { sIV, sIV, sFW, sFW, sLA, sLA, sLA, sTW, sCL, sIV }, | ||
| 272 | /* | ||
| 273 | * sSS -> sIV Server might not send FIN in this state. | ||
| 274 | * sSR -> sFW Close started. | ||
| 275 | * sES -> sFW | ||
| 276 | * sFW -> sLA FIN seen in both directions. | ||
| 277 | * sCW -> sLA | ||
| 278 | * sLA -> sLA Retransmitted FIN. | ||
| 279 | * sTW -> sTW | ||
| 280 | * sCL -> sCL | ||
| 281 | */ | ||
| 282 | /* sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sLI */ | ||
| 283 | /*ack*/ { sIV, sIV, sSR, sES, sCW, sCW, sTW, sTW, sCL, sIV }, | ||
| 284 | /* | ||
| 285 | * sSS -> sIV Might be a half-open connection. | ||
| 286 | * sSR -> sSR Might answer late resent SYN. | ||
| 287 | * sES -> sES :-) | ||
| 288 | * sFW -> sCW Normal close request answered by ACK. | ||
| 289 | * sCW -> sCW | ||
| 290 | * sLA -> sTW Last ACK detected. | ||
| 291 | * sTW -> sTW Retransmitted last ACK. | ||
| 292 | * sCL -> sCL | ||
| 293 | */ | ||
| 294 | /* sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sLI */ | ||
| 295 | /*rst*/ { sIV, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sIV }, | ||
| 296 | /*none*/ { sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV } | ||
| 297 | } | ||
| 298 | }; | ||
| 299 | |||
| 300 | static int tcp_pkt_to_tuple(const struct sk_buff *skb, | ||
| 301 | unsigned int dataoff, | ||
| 302 | struct nf_conntrack_tuple *tuple) | ||
| 303 | { | ||
| 304 | struct tcphdr _hdr, *hp; | ||
| 305 | |||
| 306 | /* Actually only need first 8 bytes. */ | ||
| 307 | hp = skb_header_pointer(skb, dataoff, 8, &_hdr); | ||
| 308 | if (hp == NULL) | ||
| 309 | return 0; | ||
| 310 | |||
| 311 | tuple->src.u.tcp.port = hp->source; | ||
| 312 | tuple->dst.u.tcp.port = hp->dest; | ||
| 313 | |||
| 314 | return 1; | ||
| 315 | } | ||
| 316 | |||
| 317 | static int tcp_invert_tuple(struct nf_conntrack_tuple *tuple, | ||
| 318 | const struct nf_conntrack_tuple *orig) | ||
| 319 | { | ||
| 320 | tuple->src.u.tcp.port = orig->dst.u.tcp.port; | ||
| 321 | tuple->dst.u.tcp.port = orig->src.u.tcp.port; | ||
| 322 | return 1; | ||
| 323 | } | ||
| 324 | |||
| 325 | /* Print out the per-protocol part of the tuple. */ | ||
| 326 | static int tcp_print_tuple(struct seq_file *s, | ||
| 327 | const struct nf_conntrack_tuple *tuple) | ||
| 328 | { | ||
| 329 | return seq_printf(s, "sport=%hu dport=%hu ", | ||
| 330 | ntohs(tuple->src.u.tcp.port), | ||
| 331 | ntohs(tuple->dst.u.tcp.port)); | ||
| 332 | } | ||
| 333 | |||
| 334 | /* Print out the private part of the conntrack. */ | ||
| 335 | static int tcp_print_conntrack(struct seq_file *s, | ||
| 336 | const struct nf_conn *conntrack) | ||
| 337 | { | ||
| 338 | enum tcp_conntrack state; | ||
| 339 | |||
| 340 | read_lock_bh(&tcp_lock); | ||
| 341 | state = conntrack->proto.tcp.state; | ||
| 342 | read_unlock_bh(&tcp_lock); | ||
| 343 | |||
| 344 | return seq_printf(s, "%s ", tcp_conntrack_names[state]); | ||
| 345 | } | ||
| 346 | |||
| 347 | static unsigned int get_conntrack_index(const struct tcphdr *tcph) | ||
| 348 | { | ||
| 349 | if (tcph->rst) return TCP_RST_SET; | ||
| 350 | else if (tcph->syn) return (tcph->ack ? TCP_SYNACK_SET : TCP_SYN_SET); | ||
| 351 | else if (tcph->fin) return TCP_FIN_SET; | ||
| 352 | else if (tcph->ack) return TCP_ACK_SET; | ||
| 353 | else return TCP_NONE_SET; | ||
| 354 | } | ||
| 355 | |||
| 356 | /* TCP connection tracking based on 'Real Stateful TCP Packet Filtering | ||
| 357 | in IP Filter' by Guido van Rooij. | ||
| 358 | |||
| 359 | http://www.nluug.nl/events/sane2000/papers.html | ||
| 360 | http://www.iae.nl/users/guido/papers/tcp_filtering.ps.gz | ||
| 361 | |||
| 362 | The boundaries and the conditions are changed according to RFC793: | ||
| 363 | the packet must intersect the window (i.e. segments may be | ||
| 364 | after the right or before the left edge) and thus receivers may ACK | ||
| 365 | segments after the right edge of the window. | ||
| 366 | |||
| 367 | td_maxend = max(sack + max(win,1)) seen in reply packets | ||
| 368 | td_maxwin = max(max(win, 1)) + (sack - ack) seen in sent packets | ||
| 369 | td_maxwin += seq + len - sender.td_maxend | ||
| 370 | if seq + len > sender.td_maxend | ||
| 371 | td_end = max(seq + len) seen in sent packets | ||
| 372 | |||
| 373 | I. Upper bound for valid data: seq <= sender.td_maxend | ||
| 374 | II. Lower bound for valid data: seq + len >= sender.td_end - receiver.td_maxwin | ||
| 375 | III. Upper bound for valid ack: sack <= receiver.td_end | ||
| 376 | IV. Lower bound for valid ack: ack >= receiver.td_end - MAXACKWINDOW | ||
| 377 | |||
| 378 | where sack is the highest right edge of sack block found in the packet. | ||
| 379 | |||
| 380 | The upper bound limit for a valid ack is not ignored - | ||
| 381 | we doesn't have to deal with fragments. | ||
| 382 | */ | ||
| 383 | |||
| 384 | static inline __u32 segment_seq_plus_len(__u32 seq, | ||
| 385 | size_t len, | ||
| 386 | unsigned int dataoff, | ||
| 387 | struct tcphdr *tcph) | ||
| 388 | { | ||
| 389 | /* XXX Should I use payload length field in IP/IPv6 header ? | ||
| 390 | * - YK */ | ||
| 391 | return (seq + len - dataoff - tcph->doff*4 | ||
| 392 | + (tcph->syn ? 1 : 0) + (tcph->fin ? 1 : 0)); | ||
| 393 | } | ||
| 394 | |||
| 395 | /* Fixme: what about big packets? */ | ||
| 396 | #define MAXACKWINCONST 66000 | ||
| 397 | #define MAXACKWINDOW(sender) \ | ||
| 398 | ((sender)->td_maxwin > MAXACKWINCONST ? (sender)->td_maxwin \ | ||
| 399 | : MAXACKWINCONST) | ||
| 400 | |||
| 401 | /* | ||
| 402 | * Simplified tcp_parse_options routine from tcp_input.c | ||
| 403 | */ | ||
| 404 | static void tcp_options(const struct sk_buff *skb, | ||
| 405 | unsigned int dataoff, | ||
| 406 | struct tcphdr *tcph, | ||
| 407 | struct ip_ct_tcp_state *state) | ||
| 408 | { | ||
| 409 | unsigned char buff[(15 * 4) - sizeof(struct tcphdr)]; | ||
| 410 | unsigned char *ptr; | ||
| 411 | int length = (tcph->doff*4) - sizeof(struct tcphdr); | ||
| 412 | |||
| 413 | if (!length) | ||
| 414 | return; | ||
| 415 | |||
| 416 | ptr = skb_header_pointer(skb, dataoff + sizeof(struct tcphdr), | ||
| 417 | length, buff); | ||
| 418 | BUG_ON(ptr == NULL); | ||
| 419 | |||
| 420 | state->td_scale = | ||
| 421 | state->flags = 0; | ||
| 422 | |||
| 423 | while (length > 0) { | ||
| 424 | int opcode=*ptr++; | ||
| 425 | int opsize; | ||
| 426 | |||
| 427 | switch (opcode) { | ||
| 428 | case TCPOPT_EOL: | ||
| 429 | return; | ||
| 430 | case TCPOPT_NOP: /* Ref: RFC 793 section 3.1 */ | ||
| 431 | length--; | ||
| 432 | continue; | ||
| 433 | default: | ||
| 434 | opsize=*ptr++; | ||
| 435 | if (opsize < 2) /* "silly options" */ | ||
| 436 | return; | ||
| 437 | if (opsize > length) | ||
| 438 | break; /* don't parse partial options */ | ||
| 439 | |||
| 440 | if (opcode == TCPOPT_SACK_PERM | ||
| 441 | && opsize == TCPOLEN_SACK_PERM) | ||
| 442 | state->flags |= IP_CT_TCP_FLAG_SACK_PERM; | ||
| 443 | else if (opcode == TCPOPT_WINDOW | ||
| 444 | && opsize == TCPOLEN_WINDOW) { | ||
| 445 | state->td_scale = *(u_int8_t *)ptr; | ||
| 446 | |||
| 447 | if (state->td_scale > 14) { | ||
| 448 | /* See RFC1323 */ | ||
| 449 | state->td_scale = 14; | ||
| 450 | } | ||
| 451 | state->flags |= | ||
| 452 | IP_CT_TCP_FLAG_WINDOW_SCALE; | ||
| 453 | } | ||
| 454 | ptr += opsize - 2; | ||
| 455 | length -= opsize; | ||
| 456 | } | ||
| 457 | } | ||
| 458 | } | ||
| 459 | |||
| 460 | static void tcp_sack(const struct sk_buff *skb, unsigned int dataoff, | ||
| 461 | struct tcphdr *tcph, __u32 *sack) | ||
| 462 | { | ||
| 463 | unsigned char buff[(15 * 4) - sizeof(struct tcphdr)]; | ||
| 464 | unsigned char *ptr; | ||
| 465 | int length = (tcph->doff*4) - sizeof(struct tcphdr); | ||
| 466 | __u32 tmp; | ||
| 467 | |||
| 468 | if (!length) | ||
| 469 | return; | ||
| 470 | |||
| 471 | ptr = skb_header_pointer(skb, dataoff + sizeof(struct tcphdr), | ||
| 472 | length, buff); | ||
| 473 | BUG_ON(ptr == NULL); | ||
| 474 | |||
| 475 | /* Fast path for timestamp-only option */ | ||
| 476 | if (length == TCPOLEN_TSTAMP_ALIGNED*4 | ||
| 477 | && *(__u32 *)ptr == | ||
| 478 | __constant_ntohl((TCPOPT_NOP << 24) | ||
| 479 | | (TCPOPT_NOP << 16) | ||
| 480 | | (TCPOPT_TIMESTAMP << 8) | ||
| 481 | | TCPOLEN_TIMESTAMP)) | ||
| 482 | return; | ||
| 483 | |||
| 484 | while (length > 0) { | ||
| 485 | int opcode = *ptr++; | ||
| 486 | int opsize, i; | ||
| 487 | |||
| 488 | switch (opcode) { | ||
| 489 | case TCPOPT_EOL: | ||
| 490 | return; | ||
| 491 | case TCPOPT_NOP: /* Ref: RFC 793 section 3.1 */ | ||
| 492 | length--; | ||
| 493 | continue; | ||
| 494 | default: | ||
| 495 | opsize = *ptr++; | ||
| 496 | if (opsize < 2) /* "silly options" */ | ||
| 497 | return; | ||
| 498 | if (opsize > length) | ||
| 499 | break; /* don't parse partial options */ | ||
| 500 | |||
| 501 | if (opcode == TCPOPT_SACK | ||
| 502 | && opsize >= (TCPOLEN_SACK_BASE | ||
| 503 | + TCPOLEN_SACK_PERBLOCK) | ||
| 504 | && !((opsize - TCPOLEN_SACK_BASE) | ||
| 505 | % TCPOLEN_SACK_PERBLOCK)) { | ||
| 506 | for (i = 0; | ||
| 507 | i < (opsize - TCPOLEN_SACK_BASE); | ||
| 508 | i += TCPOLEN_SACK_PERBLOCK) { | ||
| 509 | memcpy(&tmp, (__u32 *)(ptr + i) + 1, | ||
| 510 | sizeof(__u32)); | ||
| 511 | tmp = ntohl(tmp); | ||
| 512 | |||
| 513 | if (after(tmp, *sack)) | ||
| 514 | *sack = tmp; | ||
| 515 | } | ||
| 516 | return; | ||
| 517 | } | ||
| 518 | ptr += opsize - 2; | ||
| 519 | length -= opsize; | ||
| 520 | } | ||
| 521 | } | ||
| 522 | } | ||
| 523 | |||
| 524 | static int tcp_in_window(struct ip_ct_tcp *state, | ||
| 525 | enum ip_conntrack_dir dir, | ||
| 526 | unsigned int index, | ||
| 527 | const struct sk_buff *skb, | ||
| 528 | unsigned int dataoff, | ||
| 529 | struct tcphdr *tcph, | ||
| 530 | int pf) | ||
| 531 | { | ||
| 532 | struct ip_ct_tcp_state *sender = &state->seen[dir]; | ||
| 533 | struct ip_ct_tcp_state *receiver = &state->seen[!dir]; | ||
| 534 | __u32 seq, ack, sack, end, win, swin; | ||
| 535 | int res; | ||
| 536 | |||
| 537 | /* | ||
| 538 | * Get the required data from the packet. | ||
| 539 | */ | ||
| 540 | seq = ntohl(tcph->seq); | ||
| 541 | ack = sack = ntohl(tcph->ack_seq); | ||
| 542 | win = ntohs(tcph->window); | ||
| 543 | end = segment_seq_plus_len(seq, skb->len, dataoff, tcph); | ||
| 544 | |||
| 545 | if (receiver->flags & IP_CT_TCP_FLAG_SACK_PERM) | ||
| 546 | tcp_sack(skb, dataoff, tcph, &sack); | ||
| 547 | |||
| 548 | DEBUGP("tcp_in_window: START\n"); | ||
| 549 | DEBUGP("tcp_in_window: src=%u.%u.%u.%u:%hu dst=%u.%u.%u.%u:%hu " | ||
| 550 | "seq=%u ack=%u sack=%u win=%u end=%u\n", | ||
| 551 | NIPQUAD(iph->saddr), ntohs(tcph->source), | ||
| 552 | NIPQUAD(iph->daddr), ntohs(tcph->dest), | ||
| 553 | seq, ack, sack, win, end); | ||
| 554 | DEBUGP("tcp_in_window: sender end=%u maxend=%u maxwin=%u scale=%i " | ||
| 555 | "receiver end=%u maxend=%u maxwin=%u scale=%i\n", | ||
| 556 | sender->td_end, sender->td_maxend, sender->td_maxwin, | ||
| 557 | sender->td_scale, | ||
| 558 | receiver->td_end, receiver->td_maxend, receiver->td_maxwin, | ||
| 559 | receiver->td_scale); | ||
| 560 | |||
| 561 | if (sender->td_end == 0) { | ||
| 562 | /* | ||
| 563 | * Initialize sender data. | ||
| 564 | */ | ||
| 565 | if (tcph->syn && tcph->ack) { | ||
| 566 | /* | ||
| 567 | * Outgoing SYN-ACK in reply to a SYN. | ||
| 568 | */ | ||
| 569 | sender->td_end = | ||
| 570 | sender->td_maxend = end; | ||
| 571 | sender->td_maxwin = (win == 0 ? 1 : win); | ||
| 572 | |||
| 573 | tcp_options(skb, dataoff, tcph, sender); | ||
| 574 | /* | ||
| 575 | * RFC 1323: | ||
| 576 | * Both sides must send the Window Scale option | ||
| 577 | * to enable window scaling in either direction. | ||
| 578 | */ | ||
| 579 | if (!(sender->flags & IP_CT_TCP_FLAG_WINDOW_SCALE | ||
| 580 | && receiver->flags & IP_CT_TCP_FLAG_WINDOW_SCALE)) | ||
| 581 | sender->td_scale = | ||
| 582 | receiver->td_scale = 0; | ||
| 583 | } else { | ||
| 584 | /* | ||
| 585 | * We are in the middle of a connection, | ||
| 586 | * its history is lost for us. | ||
| 587 | * Let's try to use the data from the packet. | ||
| 588 | */ | ||
| 589 | sender->td_end = end; | ||
| 590 | sender->td_maxwin = (win == 0 ? 1 : win); | ||
| 591 | sender->td_maxend = end + sender->td_maxwin; | ||
| 592 | } | ||
| 593 | } else if (((state->state == TCP_CONNTRACK_SYN_SENT | ||
| 594 | && dir == IP_CT_DIR_ORIGINAL) | ||
| 595 | || (state->state == TCP_CONNTRACK_SYN_RECV | ||
| 596 | && dir == IP_CT_DIR_REPLY)) | ||
| 597 | && after(end, sender->td_end)) { | ||
| 598 | /* | ||
| 599 | * RFC 793: "if a TCP is reinitialized ... then it need | ||
| 600 | * not wait at all; it must only be sure to use sequence | ||
| 601 | * numbers larger than those recently used." | ||
| 602 | */ | ||
| 603 | sender->td_end = | ||
| 604 | sender->td_maxend = end; | ||
| 605 | sender->td_maxwin = (win == 0 ? 1 : win); | ||
| 606 | |||
| 607 | tcp_options(skb, dataoff, tcph, sender); | ||
| 608 | } | ||
| 609 | |||
| 610 | if (!(tcph->ack)) { | ||
| 611 | /* | ||
| 612 | * If there is no ACK, just pretend it was set and OK. | ||
| 613 | */ | ||
| 614 | ack = sack = receiver->td_end; | ||
| 615 | } else if (((tcp_flag_word(tcph) & (TCP_FLAG_ACK|TCP_FLAG_RST)) == | ||
| 616 | (TCP_FLAG_ACK|TCP_FLAG_RST)) | ||
| 617 | && (ack == 0)) { | ||
| 618 | /* | ||
| 619 | * Broken TCP stacks, that set ACK in RST packets as well | ||
| 620 | * with zero ack value. | ||
| 621 | */ | ||
| 622 | ack = sack = receiver->td_end; | ||
| 623 | } | ||
| 624 | |||
| 625 | if (seq == end | ||
| 626 | && (!tcph->rst | ||
| 627 | || (seq == 0 && state->state == TCP_CONNTRACK_SYN_SENT))) | ||
| 628 | /* | ||
| 629 | * Packets contains no data: we assume it is valid | ||
| 630 | * and check the ack value only. | ||
| 631 | * However RST segments are always validated by their | ||
| 632 | * SEQ number, except when seq == 0 (reset sent answering | ||
| 633 | * SYN. | ||
| 634 | */ | ||
| 635 | seq = end = sender->td_end; | ||
| 636 | |||
| 637 | DEBUGP("tcp_in_window: src=%u.%u.%u.%u:%hu dst=%u.%u.%u.%u:%hu " | ||
| 638 | "seq=%u ack=%u sack =%u win=%u end=%u\n", | ||
| 639 | NIPQUAD(iph->saddr), ntohs(tcph->source), | ||
| 640 | NIPQUAD(iph->daddr), ntohs(tcph->dest), | ||
| 641 | seq, ack, sack, win, end); | ||
| 642 | DEBUGP("tcp_in_window: sender end=%u maxend=%u maxwin=%u scale=%i " | ||
| 643 | "receiver end=%u maxend=%u maxwin=%u scale=%i\n", | ||
| 644 | sender->td_end, sender->td_maxend, sender->td_maxwin, | ||
| 645 | sender->td_scale, | ||
| 646 | receiver->td_end, receiver->td_maxend, receiver->td_maxwin, | ||
| 647 | receiver->td_scale); | ||
| 648 | |||
| 649 | DEBUGP("tcp_in_window: I=%i II=%i III=%i IV=%i\n", | ||
| 650 | before(seq, sender->td_maxend + 1), | ||
| 651 | after(end, sender->td_end - receiver->td_maxwin - 1), | ||
| 652 | before(sack, receiver->td_end + 1), | ||
| 653 | after(ack, receiver->td_end - MAXACKWINDOW(sender))); | ||
| 654 | |||
| 655 | if (sender->loose || receiver->loose || | ||
| 656 | (before(seq, sender->td_maxend + 1) && | ||
| 657 | after(end, sender->td_end - receiver->td_maxwin - 1) && | ||
| 658 | before(sack, receiver->td_end + 1) && | ||
| 659 | after(ack, receiver->td_end - MAXACKWINDOW(sender)))) { | ||
| 660 | /* | ||
| 661 | * Take into account window scaling (RFC 1323). | ||
| 662 | */ | ||
| 663 | if (!tcph->syn) | ||
| 664 | win <<= sender->td_scale; | ||
| 665 | |||
| 666 | /* | ||
| 667 | * Update sender data. | ||
| 668 | */ | ||
| 669 | swin = win + (sack - ack); | ||
| 670 | if (sender->td_maxwin < swin) | ||
| 671 | sender->td_maxwin = swin; | ||
| 672 | if (after(end, sender->td_end)) | ||
| 673 | sender->td_end = end; | ||
| 674 | /* | ||
| 675 | * Update receiver data. | ||
| 676 | */ | ||
| 677 | if (after(end, sender->td_maxend)) | ||
| 678 | receiver->td_maxwin += end - sender->td_maxend; | ||
| 679 | if (after(sack + win, receiver->td_maxend - 1)) { | ||
| 680 | receiver->td_maxend = sack + win; | ||
| 681 | if (win == 0) | ||
| 682 | receiver->td_maxend++; | ||
| 683 | } | ||
| 684 | |||
| 685 | /* | ||
| 686 | * Check retransmissions. | ||
| 687 | */ | ||
| 688 | if (index == TCP_ACK_SET) { | ||
| 689 | if (state->last_dir == dir | ||
| 690 | && state->last_seq == seq | ||
| 691 | && state->last_ack == ack | ||
| 692 | && state->last_end == end) | ||
| 693 | state->retrans++; | ||
| 694 | else { | ||
| 695 | state->last_dir = dir; | ||
| 696 | state->last_seq = seq; | ||
| 697 | state->last_ack = ack; | ||
| 698 | state->last_end = end; | ||
| 699 | state->retrans = 0; | ||
| 700 | } | ||
| 701 | } | ||
| 702 | /* | ||
| 703 | * Close the window of disabled window tracking :-) | ||
| 704 | */ | ||
| 705 | if (sender->loose) | ||
| 706 | sender->loose--; | ||
| 707 | |||
| 708 | res = 1; | ||
| 709 | } else { | ||
| 710 | if (LOG_INVALID(IPPROTO_TCP)) | ||
| 711 | nf_log_packet(pf, 0, skb, NULL, NULL, NULL, | ||
| 712 | "nf_ct_tcp: %s ", | ||
| 713 | before(seq, sender->td_maxend + 1) ? | ||
| 714 | after(end, sender->td_end - receiver->td_maxwin - 1) ? | ||
| 715 | before(sack, receiver->td_end + 1) ? | ||
| 716 | after(ack, receiver->td_end - MAXACKWINDOW(sender)) ? "BUG" | ||
| 717 | : "ACK is under the lower bound (possible overly delayed ACK)" | ||
| 718 | : "ACK is over the upper bound (ACKed data not seen yet)" | ||
| 719 | : "SEQ is under the lower bound (already ACKed data retransmitted)" | ||
| 720 | : "SEQ is over the upper bound (over the window of the receiver)"); | ||
| 721 | |||
| 722 | res = nf_ct_tcp_be_liberal; | ||
| 723 | } | ||
| 724 | |||
| 725 | DEBUGP("tcp_in_window: res=%i sender end=%u maxend=%u maxwin=%u " | ||
| 726 | "receiver end=%u maxend=%u maxwin=%u\n", | ||
| 727 | res, sender->td_end, sender->td_maxend, sender->td_maxwin, | ||
| 728 | receiver->td_end, receiver->td_maxend, receiver->td_maxwin); | ||
| 729 | |||
| 730 | return res; | ||
| 731 | } | ||
| 732 | |||
| 733 | #ifdef CONFIG_IP_NF_NAT_NEEDED | ||
| 734 | /* Update sender->td_end after NAT successfully mangled the packet */ | ||
| 735 | /* Caller must linearize skb at tcp header. */ | ||
| 736 | void nf_conntrack_tcp_update(struct sk_buff *skb, | ||
| 737 | unsigned int dataoff, | ||
| 738 | struct nf_conn *conntrack, | ||
| 739 | int dir) | ||
| 740 | { | ||
| 741 | struct tcphdr *tcph = (void *)skb->data + dataoff; | ||
| 742 | __u32 end; | ||
| 743 | #ifdef DEBUGP_VARS | ||
| 744 | struct ip_ct_tcp_state *sender = &conntrack->proto.tcp.seen[dir]; | ||
| 745 | struct ip_ct_tcp_state *receiver = &conntrack->proto.tcp.seen[!dir]; | ||
| 746 | #endif | ||
| 747 | |||
| 748 | end = segment_seq_plus_len(ntohl(tcph->seq), skb->len, dataoff, tcph); | ||
| 749 | |||
| 750 | write_lock_bh(&tcp_lock); | ||
| 751 | /* | ||
| 752 | * We have to worry for the ack in the reply packet only... | ||
| 753 | */ | ||
| 754 | if (after(end, conntrack->proto.tcp.seen[dir].td_end)) | ||
| 755 | conntrack->proto.tcp.seen[dir].td_end = end; | ||
| 756 | conntrack->proto.tcp.last_end = end; | ||
| 757 | write_unlock_bh(&tcp_lock); | ||
| 758 | DEBUGP("tcp_update: sender end=%u maxend=%u maxwin=%u scale=%i " | ||
| 759 | "receiver end=%u maxend=%u maxwin=%u scale=%i\n", | ||
| 760 | sender->td_end, sender->td_maxend, sender->td_maxwin, | ||
| 761 | sender->td_scale, | ||
| 762 | receiver->td_end, receiver->td_maxend, receiver->td_maxwin, | ||
| 763 | receiver->td_scale); | ||
| 764 | } | ||
| 765 | |||
| 766 | #endif | ||
| 767 | |||
| 768 | #define TH_FIN 0x01 | ||
| 769 | #define TH_SYN 0x02 | ||
| 770 | #define TH_RST 0x04 | ||
| 771 | #define TH_PUSH 0x08 | ||
| 772 | #define TH_ACK 0x10 | ||
| 773 | #define TH_URG 0x20 | ||
| 774 | #define TH_ECE 0x40 | ||
| 775 | #define TH_CWR 0x80 | ||
| 776 | |||
| 777 | /* table of valid flag combinations - ECE and CWR are always valid */ | ||
| 778 | static u8 tcp_valid_flags[(TH_FIN|TH_SYN|TH_RST|TH_PUSH|TH_ACK|TH_URG) + 1] = | ||
| 779 | { | ||
| 780 | [TH_SYN] = 1, | ||
| 781 | [TH_SYN|TH_ACK] = 1, | ||
| 782 | [TH_SYN|TH_ACK|TH_PUSH] = 1, | ||
| 783 | [TH_RST] = 1, | ||
| 784 | [TH_RST|TH_ACK] = 1, | ||
| 785 | [TH_RST|TH_ACK|TH_PUSH] = 1, | ||
| 786 | [TH_FIN|TH_ACK] = 1, | ||
| 787 | [TH_ACK] = 1, | ||
| 788 | [TH_ACK|TH_PUSH] = 1, | ||
| 789 | [TH_ACK|TH_URG] = 1, | ||
| 790 | [TH_ACK|TH_URG|TH_PUSH] = 1, | ||
| 791 | [TH_FIN|TH_ACK|TH_PUSH] = 1, | ||
| 792 | [TH_FIN|TH_ACK|TH_URG] = 1, | ||
| 793 | [TH_FIN|TH_ACK|TH_URG|TH_PUSH] = 1, | ||
| 794 | }; | ||
| 795 | |||
| 796 | /* Protect conntrack agaist broken packets. Code taken from ipt_unclean.c. */ | ||
| 797 | static int tcp_error(struct sk_buff *skb, | ||
| 798 | unsigned int dataoff, | ||
| 799 | enum ip_conntrack_info *ctinfo, | ||
| 800 | int pf, | ||
| 801 | unsigned int hooknum, | ||
| 802 | int(*csum)(const struct sk_buff *,unsigned int)) | ||
| 803 | { | ||
| 804 | struct tcphdr _tcph, *th; | ||
| 805 | unsigned int tcplen = skb->len - dataoff; | ||
| 806 | u_int8_t tcpflags; | ||
| 807 | |||
| 808 | /* Smaller that minimal TCP header? */ | ||
| 809 | th = skb_header_pointer(skb, dataoff, sizeof(_tcph), &_tcph); | ||
| 810 | if (th == NULL) { | ||
| 811 | if (LOG_INVALID(IPPROTO_TCP)) | ||
| 812 | nf_log_packet(pf, 0, skb, NULL, NULL, NULL, | ||
| 813 | "nf_ct_tcp: short packet "); | ||
| 814 | return -NF_ACCEPT; | ||
| 815 | } | ||
| 816 | |||
| 817 | /* Not whole TCP header or malformed packet */ | ||
| 818 | if (th->doff*4 < sizeof(struct tcphdr) || tcplen < th->doff*4) { | ||
| 819 | if (LOG_INVALID(IPPROTO_TCP)) | ||
| 820 | nf_log_packet(pf, 0, skb, NULL, NULL, NULL, | ||
| 821 | "nf_ct_tcp: truncated/malformed packet "); | ||
| 822 | return -NF_ACCEPT; | ||
| 823 | } | ||
| 824 | |||
| 825 | /* Checksum invalid? Ignore. | ||
| 826 | * We skip checking packets on the outgoing path | ||
| 827 | * because the semantic of CHECKSUM_HW is different there | ||
| 828 | * and moreover root might send raw packets. | ||
| 829 | */ | ||
| 830 | /* FIXME: Source route IP option packets --RR */ | ||
| 831 | if (((pf == PF_INET && hooknum == NF_IP_PRE_ROUTING) || | ||
| 832 | (pf == PF_INET6 && hooknum == NF_IP6_PRE_ROUTING)) | ||
| 833 | && skb->ip_summed != CHECKSUM_UNNECESSARY | ||
| 834 | && csum(skb, dataoff)) { | ||
| 835 | if (LOG_INVALID(IPPROTO_TCP)) | ||
| 836 | nf_log_packet(pf, 0, skb, NULL, NULL, NULL, | ||
| 837 | "nf_ct_tcp: bad TCP checksum "); | ||
| 838 | return -NF_ACCEPT; | ||
| 839 | } | ||
| 840 | |||
| 841 | /* Check TCP flags. */ | ||
| 842 | tcpflags = (((u_int8_t *)th)[13] & ~(TH_ECE|TH_CWR)); | ||
| 843 | if (!tcp_valid_flags[tcpflags]) { | ||
| 844 | if (LOG_INVALID(IPPROTO_TCP)) | ||
| 845 | nf_log_packet(pf, 0, skb, NULL, NULL, NULL, | ||
| 846 | "nf_ct_tcp: invalid TCP flag combination "); | ||
| 847 | return -NF_ACCEPT; | ||
| 848 | } | ||
| 849 | |||
| 850 | return NF_ACCEPT; | ||
| 851 | } | ||
| 852 | |||
| 853 | static int csum4(const struct sk_buff *skb, unsigned int dataoff) | ||
| 854 | { | ||
| 855 | return csum_tcpudp_magic(skb->nh.iph->saddr, skb->nh.iph->daddr, | ||
| 856 | skb->len - dataoff, IPPROTO_TCP, | ||
| 857 | skb->ip_summed == CHECKSUM_HW ? skb->csum | ||
| 858 | : skb_checksum(skb, dataoff, | ||
| 859 | skb->len - dataoff, 0)); | ||
| 860 | } | ||
| 861 | |||
| 862 | static int csum6(const struct sk_buff *skb, unsigned int dataoff) | ||
| 863 | { | ||
| 864 | return csum_ipv6_magic(&skb->nh.ipv6h->saddr, &skb->nh.ipv6h->daddr, | ||
| 865 | skb->len - dataoff, IPPROTO_TCP, | ||
| 866 | skb->ip_summed == CHECKSUM_HW ? skb->csum | ||
| 867 | : skb_checksum(skb, dataoff, skb->len - dataoff, | ||
| 868 | 0)); | ||
| 869 | } | ||
| 870 | |||
| 871 | static int tcp_error4(struct sk_buff *skb, | ||
| 872 | unsigned int dataoff, | ||
| 873 | enum ip_conntrack_info *ctinfo, | ||
| 874 | int pf, | ||
| 875 | unsigned int hooknum) | ||
| 876 | { | ||
| 877 | return tcp_error(skb, dataoff, ctinfo, pf, hooknum, csum4); | ||
| 878 | } | ||
| 879 | |||
| 880 | static int tcp_error6(struct sk_buff *skb, | ||
| 881 | unsigned int dataoff, | ||
| 882 | enum ip_conntrack_info *ctinfo, | ||
| 883 | int pf, | ||
| 884 | unsigned int hooknum) | ||
| 885 | { | ||
| 886 | return tcp_error(skb, dataoff, ctinfo, pf, hooknum, csum6); | ||
| 887 | } | ||
| 888 | |||
| 889 | /* Returns verdict for packet, or -1 for invalid. */ | ||
| 890 | static int tcp_packet(struct nf_conn *conntrack, | ||
| 891 | const struct sk_buff *skb, | ||
| 892 | unsigned int dataoff, | ||
| 893 | enum ip_conntrack_info ctinfo, | ||
| 894 | int pf, | ||
| 895 | unsigned int hooknum) | ||
| 896 | { | ||
| 897 | enum tcp_conntrack new_state, old_state; | ||
| 898 | enum ip_conntrack_dir dir; | ||
| 899 | struct tcphdr *th, _tcph; | ||
| 900 | unsigned long timeout; | ||
| 901 | unsigned int index; | ||
| 902 | |||
| 903 | th = skb_header_pointer(skb, dataoff, sizeof(_tcph), &_tcph); | ||
| 904 | BUG_ON(th == NULL); | ||
| 905 | |||
| 906 | write_lock_bh(&tcp_lock); | ||
| 907 | old_state = conntrack->proto.tcp.state; | ||
| 908 | dir = CTINFO2DIR(ctinfo); | ||
| 909 | index = get_conntrack_index(th); | ||
| 910 | new_state = tcp_conntracks[dir][index][old_state]; | ||
| 911 | |||
| 912 | switch (new_state) { | ||
| 913 | case TCP_CONNTRACK_IGNORE: | ||
| 914 | /* Either SYN in ORIGINAL | ||
| 915 | * or SYN/ACK in REPLY. */ | ||
| 916 | if (index == TCP_SYNACK_SET | ||
| 917 | && conntrack->proto.tcp.last_index == TCP_SYN_SET | ||
| 918 | && conntrack->proto.tcp.last_dir != dir | ||
| 919 | && ntohl(th->ack_seq) == | ||
| 920 | conntrack->proto.tcp.last_end) { | ||
| 921 | /* This SYN/ACK acknowledges a SYN that we earlier | ||
| 922 | * ignored as invalid. This means that the client and | ||
| 923 | * the server are both in sync, while the firewall is | ||
| 924 | * not. We kill this session and block the SYN/ACK so | ||
| 925 | * that the client cannot but retransmit its SYN and | ||
| 926 | * thus initiate a clean new session. | ||
| 927 | */ | ||
| 928 | write_unlock_bh(&tcp_lock); | ||
| 929 | if (LOG_INVALID(IPPROTO_TCP)) | ||
| 930 | nf_log_packet(pf, 0, skb, NULL, NULL, NULL, | ||
| 931 | "nf_ct_tcp: killing out of sync session "); | ||
| 932 | if (del_timer(&conntrack->timeout)) | ||
| 933 | conntrack->timeout.function((unsigned long) | ||
| 934 | conntrack); | ||
| 935 | return -NF_DROP; | ||
| 936 | } | ||
| 937 | conntrack->proto.tcp.last_index = index; | ||
| 938 | conntrack->proto.tcp.last_dir = dir; | ||
| 939 | conntrack->proto.tcp.last_seq = ntohl(th->seq); | ||
| 940 | conntrack->proto.tcp.last_end = | ||
| 941 | segment_seq_plus_len(ntohl(th->seq), skb->len, dataoff, th); | ||
| 942 | |||
| 943 | write_unlock_bh(&tcp_lock); | ||
| 944 | if (LOG_INVALID(IPPROTO_TCP)) | ||
| 945 | nf_log_packet(pf, 0, skb, NULL, NULL, NULL, | ||
| 946 | "nf_ct_tcp: invalid packed ignored "); | ||
| 947 | return NF_ACCEPT; | ||
| 948 | case TCP_CONNTRACK_MAX: | ||
| 949 | /* Invalid packet */ | ||
| 950 | DEBUGP("nf_ct_tcp: Invalid dir=%i index=%u ostate=%u\n", | ||
| 951 | dir, get_conntrack_index(th), | ||
| 952 | old_state); | ||
| 953 | write_unlock_bh(&tcp_lock); | ||
| 954 | if (LOG_INVALID(IPPROTO_TCP)) | ||
| 955 | nf_log_packet(pf, 0, skb, NULL, NULL, NULL, | ||
| 956 | "nf_ct_tcp: invalid state "); | ||
| 957 | return -NF_ACCEPT; | ||
| 958 | case TCP_CONNTRACK_SYN_SENT: | ||
| 959 | if (old_state < TCP_CONNTRACK_TIME_WAIT) | ||
| 960 | break; | ||
| 961 | if ((conntrack->proto.tcp.seen[dir].flags & | ||
| 962 | IP_CT_TCP_FLAG_CLOSE_INIT) | ||
| 963 | || after(ntohl(th->seq), | ||
| 964 | conntrack->proto.tcp.seen[dir].td_end)) { | ||
| 965 | /* Attempt to reopen a closed connection. | ||
| 966 | * Delete this connection and look up again. */ | ||
| 967 | write_unlock_bh(&tcp_lock); | ||
| 968 | if (del_timer(&conntrack->timeout)) | ||
| 969 | conntrack->timeout.function((unsigned long) | ||
| 970 | conntrack); | ||
| 971 | return -NF_REPEAT; | ||
| 972 | } | ||
| 973 | case TCP_CONNTRACK_CLOSE: | ||
| 974 | if (index == TCP_RST_SET | ||
| 975 | && test_bit(IPS_SEEN_REPLY_BIT, &conntrack->status) | ||
| 976 | && conntrack->proto.tcp.last_index == TCP_SYN_SET | ||
| 977 | && ntohl(th->ack_seq) == conntrack->proto.tcp.last_end) { | ||
| 978 | /* RST sent to invalid SYN we had let trough | ||
| 979 | * SYN was in window then, tear down connection. | ||
| 980 | * We skip window checking, because packet might ACK | ||
| 981 | * segments we ignored in the SYN. */ | ||
| 982 | goto in_window; | ||
| 983 | } | ||
| 984 | /* Just fall trough */ | ||
| 985 | default: | ||
| 986 | /* Keep compilers happy. */ | ||
| 987 | break; | ||
| 988 | } | ||
| 989 | |||
| 990 | if (!tcp_in_window(&conntrack->proto.tcp, dir, index, | ||
| 991 | skb, dataoff, th, pf)) { | ||
| 992 | write_unlock_bh(&tcp_lock); | ||
| 993 | return -NF_ACCEPT; | ||
| 994 | } | ||
| 995 | in_window: | ||
| 996 | /* From now on we have got in-window packets */ | ||
| 997 | conntrack->proto.tcp.last_index = index; | ||
| 998 | |||
| 999 | DEBUGP("tcp_conntracks: src=%u.%u.%u.%u:%hu dst=%u.%u.%u.%u:%hu " | ||
| 1000 | "syn=%i ack=%i fin=%i rst=%i old=%i new=%i\n", | ||
| 1001 | NIPQUAD(iph->saddr), ntohs(th->source), | ||
| 1002 | NIPQUAD(iph->daddr), ntohs(th->dest), | ||
| 1003 | (th->syn ? 1 : 0), (th->ack ? 1 : 0), | ||
| 1004 | (th->fin ? 1 : 0), (th->rst ? 1 : 0), | ||
| 1005 | old_state, new_state); | ||
| 1006 | |||
| 1007 | conntrack->proto.tcp.state = new_state; | ||
| 1008 | if (old_state != new_state | ||
| 1009 | && (new_state == TCP_CONNTRACK_FIN_WAIT | ||
| 1010 | || new_state == TCP_CONNTRACK_CLOSE)) | ||
| 1011 | conntrack->proto.tcp.seen[dir].flags |= IP_CT_TCP_FLAG_CLOSE_INIT; | ||
| 1012 | timeout = conntrack->proto.tcp.retrans >= nf_ct_tcp_max_retrans | ||
| 1013 | && *tcp_timeouts[new_state] > nf_ct_tcp_timeout_max_retrans | ||
| 1014 | ? nf_ct_tcp_timeout_max_retrans : *tcp_timeouts[new_state]; | ||
| 1015 | write_unlock_bh(&tcp_lock); | ||
| 1016 | |||
| 1017 | nf_conntrack_event_cache(IPCT_PROTOINFO_VOLATILE, skb); | ||
| 1018 | if (new_state != old_state) | ||
| 1019 | nf_conntrack_event_cache(IPCT_PROTOINFO, skb); | ||
| 1020 | |||
| 1021 | if (!test_bit(IPS_SEEN_REPLY_BIT, &conntrack->status)) { | ||
| 1022 | /* If only reply is a RST, we can consider ourselves not to | ||
| 1023 | have an established connection: this is a fairly common | ||
| 1024 | problem case, so we can delete the conntrack | ||
| 1025 | immediately. --RR */ | ||
| 1026 | if (th->rst) { | ||
| 1027 | if (del_timer(&conntrack->timeout)) | ||
| 1028 | conntrack->timeout.function((unsigned long) | ||
| 1029 | conntrack); | ||
| 1030 | return NF_ACCEPT; | ||
| 1031 | } | ||
| 1032 | } else if (!test_bit(IPS_ASSURED_BIT, &conntrack->status) | ||
| 1033 | && (old_state == TCP_CONNTRACK_SYN_RECV | ||
| 1034 | || old_state == TCP_CONNTRACK_ESTABLISHED) | ||
| 1035 | && new_state == TCP_CONNTRACK_ESTABLISHED) { | ||
| 1036 | /* Set ASSURED if we see see valid ack in ESTABLISHED | ||
| 1037 | after SYN_RECV or a valid answer for a picked up | ||
| 1038 | connection. */ | ||
| 1039 | set_bit(IPS_ASSURED_BIT, &conntrack->status); | ||
| 1040 | nf_conntrack_event_cache(IPCT_STATUS, skb); | ||
| 1041 | } | ||
| 1042 | nf_ct_refresh_acct(conntrack, ctinfo, skb, timeout); | ||
| 1043 | |||
| 1044 | return NF_ACCEPT; | ||
| 1045 | } | ||
| 1046 | |||
| 1047 | /* Called when a new connection for this protocol found. */ | ||
| 1048 | static int tcp_new(struct nf_conn *conntrack, | ||
| 1049 | const struct sk_buff *skb, | ||
| 1050 | unsigned int dataoff) | ||
| 1051 | { | ||
| 1052 | enum tcp_conntrack new_state; | ||
| 1053 | struct tcphdr *th, _tcph; | ||
| 1054 | #ifdef DEBUGP_VARS | ||
| 1055 | struct ip_ct_tcp_state *sender = &conntrack->proto.tcp.seen[0]; | ||
| 1056 | struct ip_ct_tcp_state *receiver = &conntrack->proto.tcp.seen[1]; | ||
| 1057 | #endif | ||
| 1058 | |||
| 1059 | th = skb_header_pointer(skb, dataoff, sizeof(_tcph), &_tcph); | ||
| 1060 | BUG_ON(th == NULL); | ||
| 1061 | |||
| 1062 | /* Don't need lock here: this conntrack not in circulation yet */ | ||
| 1063 | new_state | ||
| 1064 | = tcp_conntracks[0][get_conntrack_index(th)] | ||
| 1065 | [TCP_CONNTRACK_NONE]; | ||
| 1066 | |||
| 1067 | /* Invalid: delete conntrack */ | ||
| 1068 | if (new_state >= TCP_CONNTRACK_MAX) { | ||
| 1069 | DEBUGP("nf_ct_tcp: invalid new deleting.\n"); | ||
| 1070 | return 0; | ||
| 1071 | } | ||
| 1072 | |||
| 1073 | if (new_state == TCP_CONNTRACK_SYN_SENT) { | ||
| 1074 | /* SYN packet */ | ||
| 1075 | conntrack->proto.tcp.seen[0].td_end = | ||
| 1076 | segment_seq_plus_len(ntohl(th->seq), skb->len, | ||
| 1077 | dataoff, th); | ||
| 1078 | conntrack->proto.tcp.seen[0].td_maxwin = ntohs(th->window); | ||
| 1079 | if (conntrack->proto.tcp.seen[0].td_maxwin == 0) | ||
| 1080 | conntrack->proto.tcp.seen[0].td_maxwin = 1; | ||
| 1081 | conntrack->proto.tcp.seen[0].td_maxend = | ||
| 1082 | conntrack->proto.tcp.seen[0].td_end; | ||
| 1083 | |||
| 1084 | tcp_options(skb, dataoff, th, &conntrack->proto.tcp.seen[0]); | ||
| 1085 | conntrack->proto.tcp.seen[1].flags = 0; | ||
| 1086 | conntrack->proto.tcp.seen[0].loose = | ||
| 1087 | conntrack->proto.tcp.seen[1].loose = 0; | ||
| 1088 | } else if (nf_ct_tcp_loose == 0) { | ||
| 1089 | /* Don't try to pick up connections. */ | ||
| 1090 | return 0; | ||
| 1091 | } else { | ||
| 1092 | /* | ||
| 1093 | * We are in the middle of a connection, | ||
| 1094 | * its history is lost for us. | ||
| 1095 | * Let's try to use the data from the packet. | ||
| 1096 | */ | ||
| 1097 | conntrack->proto.tcp.seen[0].td_end = | ||
| 1098 | segment_seq_plus_len(ntohl(th->seq), skb->len, | ||
| 1099 | dataoff, th); | ||
| 1100 | conntrack->proto.tcp.seen[0].td_maxwin = ntohs(th->window); | ||
| 1101 | if (conntrack->proto.tcp.seen[0].td_maxwin == 0) | ||
| 1102 | conntrack->proto.tcp.seen[0].td_maxwin = 1; | ||
| 1103 | conntrack->proto.tcp.seen[0].td_maxend = | ||
| 1104 | conntrack->proto.tcp.seen[0].td_end + | ||
| 1105 | conntrack->proto.tcp.seen[0].td_maxwin; | ||
| 1106 | conntrack->proto.tcp.seen[0].td_scale = 0; | ||
| 1107 | |||
| 1108 | /* We assume SACK. Should we assume window scaling too? */ | ||
| 1109 | conntrack->proto.tcp.seen[0].flags = | ||
| 1110 | conntrack->proto.tcp.seen[1].flags = IP_CT_TCP_FLAG_SACK_PERM; | ||
| 1111 | conntrack->proto.tcp.seen[0].loose = | ||
| 1112 | conntrack->proto.tcp.seen[1].loose = nf_ct_tcp_loose; | ||
| 1113 | } | ||
| 1114 | |||
| 1115 | conntrack->proto.tcp.seen[1].td_end = 0; | ||
| 1116 | conntrack->proto.tcp.seen[1].td_maxend = 0; | ||
| 1117 | conntrack->proto.tcp.seen[1].td_maxwin = 1; | ||
| 1118 | conntrack->proto.tcp.seen[1].td_scale = 0; | ||
| 1119 | |||
| 1120 | /* tcp_packet will set them */ | ||
| 1121 | conntrack->proto.tcp.state = TCP_CONNTRACK_NONE; | ||
| 1122 | conntrack->proto.tcp.last_index = TCP_NONE_SET; | ||
| 1123 | |||
| 1124 | DEBUGP("tcp_new: sender end=%u maxend=%u maxwin=%u scale=%i " | ||
| 1125 | "receiver end=%u maxend=%u maxwin=%u scale=%i\n", | ||
| 1126 | sender->td_end, sender->td_maxend, sender->td_maxwin, | ||
| 1127 | sender->td_scale, | ||
| 1128 | receiver->td_end, receiver->td_maxend, receiver->td_maxwin, | ||
| 1129 | receiver->td_scale); | ||
| 1130 | return 1; | ||
| 1131 | } | ||
| 1132 | |||
| 1133 | struct nf_conntrack_protocol nf_conntrack_protocol_tcp4 = | ||
| 1134 | { | ||
| 1135 | .l3proto = PF_INET, | ||
| 1136 | .proto = IPPROTO_TCP, | ||
| 1137 | .name = "tcp", | ||
| 1138 | .pkt_to_tuple = tcp_pkt_to_tuple, | ||
| 1139 | .invert_tuple = tcp_invert_tuple, | ||
| 1140 | .print_tuple = tcp_print_tuple, | ||
| 1141 | .print_conntrack = tcp_print_conntrack, | ||
| 1142 | .packet = tcp_packet, | ||
| 1143 | .new = tcp_new, | ||
| 1144 | .error = tcp_error4, | ||
| 1145 | }; | ||
| 1146 | |||
| 1147 | struct nf_conntrack_protocol nf_conntrack_protocol_tcp6 = | ||
| 1148 | { | ||
| 1149 | .l3proto = PF_INET6, | ||
| 1150 | .proto = IPPROTO_TCP, | ||
| 1151 | .name = "tcp", | ||
| 1152 | .pkt_to_tuple = tcp_pkt_to_tuple, | ||
| 1153 | .invert_tuple = tcp_invert_tuple, | ||
| 1154 | .print_tuple = tcp_print_tuple, | ||
| 1155 | .print_conntrack = tcp_print_conntrack, | ||
| 1156 | .packet = tcp_packet, | ||
| 1157 | .new = tcp_new, | ||
| 1158 | .error = tcp_error6, | ||
| 1159 | }; | ||
| 1160 | |||
| 1161 | EXPORT_SYMBOL(nf_conntrack_protocol_tcp4); | ||
| 1162 | EXPORT_SYMBOL(nf_conntrack_protocol_tcp6); | ||
diff --git a/net/netfilter/nf_conntrack_proto_udp.c b/net/netfilter/nf_conntrack_proto_udp.c new file mode 100644 index 000000000000..3cae7ce420dd --- /dev/null +++ b/net/netfilter/nf_conntrack_proto_udp.c  | |||
| @@ -0,0 +1,216 @@ | |||
| 1 | /* (C) 1999-2001 Paul `Rusty' Russell | ||
| 2 | * (C) 2002-2004 Netfilter Core Team <coreteam@netfilter.org> | ||
| 3 | * | ||
| 4 | * This program is free software; you can redistribute it and/or modify | ||
| 5 | * it under the terms of the GNU General Public License version 2 as | ||
| 6 | * published by the Free Software Foundation. | ||
| 7 | * | ||
| 8 | * 16 Dec 2003: Yasuyuki Kozakai @USAGI <yasuyuki.kozakai@toshiba.co.jp> | ||
| 9 | * - enable working with Layer 3 protocol independent connection tracking. | ||
| 10 | * | ||
| 11 | * Derived from net/ipv4/netfilter/ip_conntrack_proto_udp.c | ||
| 12 | */ | ||
| 13 | |||
| 14 | #include <linux/types.h> | ||
| 15 | #include <linux/sched.h> | ||
| 16 | #include <linux/timer.h> | ||
| 17 | #include <linux/module.h> | ||
| 18 | #include <linux/netfilter.h> | ||
| 19 | #include <linux/udp.h> | ||
| 20 | #include <linux/seq_file.h> | ||
| 21 | #include <linux/skbuff.h> | ||
| 22 | #include <linux/ipv6.h> | ||
| 23 | #include <net/ip6_checksum.h> | ||
| 24 | #include <net/checksum.h> | ||
| 25 | #include <linux/netfilter.h> | ||
| 26 | #include <linux/netfilter_ipv4.h> | ||
| 27 | #include <linux/netfilter_ipv6.h> | ||
| 28 | #include <net/netfilter/nf_conntrack_protocol.h> | ||
| 29 | |||
| 30 | unsigned long nf_ct_udp_timeout = 30*HZ; | ||
| 31 | unsigned long nf_ct_udp_timeout_stream = 180*HZ; | ||
| 32 | |||
| 33 | static int udp_pkt_to_tuple(const struct sk_buff *skb, | ||
| 34 | unsigned int dataoff, | ||
| 35 | struct nf_conntrack_tuple *tuple) | ||
| 36 | { | ||
| 37 | struct udphdr _hdr, *hp; | ||
| 38 | |||
| 39 | /* Actually only need first 8 bytes. */ | ||
| 40 | hp = skb_header_pointer(skb, dataoff, sizeof(_hdr), &_hdr); | ||
| 41 | if (hp == NULL) | ||
| 42 | return 0; | ||
| 43 | |||
| 44 | tuple->src.u.udp.port = hp->source; | ||
| 45 | tuple->dst.u.udp.port = hp->dest; | ||
| 46 | |||
| 47 | return 1; | ||
| 48 | } | ||
| 49 | |||
| 50 | static int udp_invert_tuple(struct nf_conntrack_tuple *tuple, | ||
| 51 | const struct nf_conntrack_tuple *orig) | ||
| 52 | { | ||
| 53 | tuple->src.u.udp.port = orig->dst.u.udp.port; | ||
| 54 | tuple->dst.u.udp.port = orig->src.u.udp.port; | ||
| 55 | return 1; | ||
| 56 | } | ||
| 57 | |||
| 58 | /* Print out the per-protocol part of the tuple. */ | ||
| 59 | static int udp_print_tuple(struct seq_file *s, | ||
| 60 | const struct nf_conntrack_tuple *tuple) | ||
| 61 | { | ||
| 62 | return seq_printf(s, "sport=%hu dport=%hu ", | ||
| 63 | ntohs(tuple->src.u.udp.port), | ||
| 64 | ntohs(tuple->dst.u.udp.port)); | ||
| 65 | } | ||
| 66 | |||
| 67 | /* Print out the private part of the conntrack. */ | ||
| 68 | static int udp_print_conntrack(struct seq_file *s, | ||
| 69 | const struct nf_conn *conntrack) | ||
| 70 | { | ||
| 71 | return 0; | ||
| 72 | } | ||
| 73 | |||
| 74 | /* Returns verdict for packet, and may modify conntracktype */ | ||
| 75 | static int udp_packet(struct nf_conn *conntrack, | ||
| 76 | const struct sk_buff *skb, | ||
| 77 | unsigned int dataoff, | ||
| 78 | enum ip_conntrack_info ctinfo, | ||
| 79 | int pf, | ||
| 80 | unsigned int hooknum) | ||
| 81 | { | ||
| 82 | /* If we've seen traffic both ways, this is some kind of UDP | ||
| 83 | stream. Extend timeout. */ | ||
| 84 | if (test_bit(IPS_SEEN_REPLY_BIT, &conntrack->status)) { | ||
| 85 | nf_ct_refresh_acct(conntrack, ctinfo, skb, | ||
| 86 | nf_ct_udp_timeout_stream); | ||
| 87 | /* Also, more likely to be important, and not a probe */ | ||
| 88 | if (!test_and_set_bit(IPS_ASSURED_BIT, &conntrack->status)) | ||
| 89 | nf_conntrack_event_cache(IPCT_STATUS, skb); | ||
| 90 | } else | ||
| 91 | nf_ct_refresh_acct(conntrack, ctinfo, skb, nf_ct_udp_timeout); | ||
| 92 | |||
| 93 | return NF_ACCEPT; | ||
| 94 | } | ||
| 95 | |||
| 96 | /* Called when a new connection for this protocol found. */ | ||
| 97 | static int udp_new(struct nf_conn *conntrack, const struct sk_buff *skb, | ||
| 98 | unsigned int dataoff) | ||
| 99 | { | ||
| 100 | return 1; | ||
| 101 | } | ||
| 102 | |||
| 103 | static int udp_error(struct sk_buff *skb, unsigned int dataoff, | ||
| 104 | enum ip_conntrack_info *ctinfo, | ||
| 105 | int pf, | ||
| 106 | unsigned int hooknum, | ||
| 107 | int (*csum)(const struct sk_buff *, unsigned int)) | ||
| 108 | { | ||
| 109 | unsigned int udplen = skb->len - dataoff; | ||
| 110 | struct udphdr _hdr, *hdr; | ||
| 111 | |||
| 112 | /* Header is too small? */ | ||
| 113 | hdr = skb_header_pointer(skb, dataoff, sizeof(_hdr), &_hdr); | ||
| 114 | if (hdr == NULL) { | ||
| 115 | if (LOG_INVALID(IPPROTO_UDP)) | ||
| 116 | nf_log_packet(pf, 0, skb, NULL, NULL, NULL, | ||
| 117 | "nf_ct_udp: short packet "); | ||
| 118 | return -NF_ACCEPT; | ||
| 119 | } | ||
| 120 | |||
| 121 | /* Truncated/malformed packets */ | ||
| 122 | if (ntohs(hdr->len) > udplen || ntohs(hdr->len) < sizeof(*hdr)) { | ||
| 123 | if (LOG_INVALID(IPPROTO_UDP)) | ||
| 124 | nf_log_packet(pf, 0, skb, NULL, NULL, NULL, | ||
| 125 | "nf_ct_udp: truncated/malformed packet "); | ||
| 126 | return -NF_ACCEPT; | ||
| 127 | } | ||
| 128 | |||
| 129 | /* Packet with no checksum */ | ||
| 130 | if (!hdr->check) | ||
| 131 | return NF_ACCEPT; | ||
| 132 | |||
| 133 | /* Checksum invalid? Ignore. | ||
| 134 | * We skip checking packets on the outgoing path | ||
| 135 | * because the semantic of CHECKSUM_HW is different there | ||
| 136 | * and moreover root might send raw packets. | ||
| 137 | * FIXME: Source route IP option packets --RR */ | ||
| 138 | if (((pf == PF_INET && hooknum == NF_IP_PRE_ROUTING) || | ||
| 139 | (pf == PF_INET6 && hooknum == NF_IP6_PRE_ROUTING)) | ||
| 140 | && skb->ip_summed != CHECKSUM_UNNECESSARY | ||
| 141 | && csum(skb, dataoff)) { | ||
| 142 | if (LOG_INVALID(IPPROTO_UDP)) | ||
| 143 | nf_log_packet(pf, 0, skb, NULL, NULL, NULL, | ||
| 144 | "nf_ct_udp: bad UDP checksum "); | ||
| 145 | return -NF_ACCEPT; | ||
| 146 | } | ||
| 147 | |||
| 148 | return NF_ACCEPT; | ||
| 149 | } | ||
| 150 | |||
| 151 | static int csum4(const struct sk_buff *skb, unsigned int dataoff) | ||
| 152 | { | ||
| 153 | return csum_tcpudp_magic(skb->nh.iph->saddr, skb->nh.iph->daddr, | ||
| 154 | skb->len - dataoff, IPPROTO_UDP, | ||
| 155 | skb->ip_summed == CHECKSUM_HW ? skb->csum | ||
| 156 | : skb_checksum(skb, dataoff, | ||
| 157 | skb->len - dataoff, 0)); | ||
| 158 | } | ||
| 159 | |||
| 160 | static int csum6(const struct sk_buff *skb, unsigned int dataoff) | ||
| 161 | { | ||
| 162 | return csum_ipv6_magic(&skb->nh.ipv6h->saddr, &skb->nh.ipv6h->daddr, | ||
| 163 | skb->len - dataoff, IPPROTO_UDP, | ||
| 164 | skb->ip_summed == CHECKSUM_HW ? skb->csum | ||
| 165 | : skb_checksum(skb, dataoff, skb->len - dataoff, | ||
| 166 | 0)); | ||
| 167 | } | ||
| 168 | |||
| 169 | static int udp_error4(struct sk_buff *skb, | ||
| 170 | unsigned int dataoff, | ||
| 171 | enum ip_conntrack_info *ctinfo, | ||
| 172 | int pf, | ||
| 173 | unsigned int hooknum) | ||
| 174 | { | ||
| 175 | return udp_error(skb, dataoff, ctinfo, pf, hooknum, csum4); | ||
| 176 | } | ||
| 177 | |||
| 178 | static int udp_error6(struct sk_buff *skb, | ||
| 179 | unsigned int dataoff, | ||
| 180 | enum ip_conntrack_info *ctinfo, | ||
| 181 | int pf, | ||
| 182 | unsigned int hooknum) | ||
| 183 | { | ||
| 184 | return udp_error(skb, dataoff, ctinfo, pf, hooknum, csum6); | ||
| 185 | } | ||
| 186 | |||
| 187 | struct nf_conntrack_protocol nf_conntrack_protocol_udp4 = | ||
| 188 | { | ||
| 189 | .l3proto = PF_INET, | ||
| 190 | .proto = IPPROTO_UDP, | ||
| 191 | .name = "udp", | ||
| 192 | .pkt_to_tuple = udp_pkt_to_tuple, | ||
| 193 | .invert_tuple = udp_invert_tuple, | ||
| 194 | .print_tuple = udp_print_tuple, | ||
| 195 | .print_conntrack = udp_print_conntrack, | ||
| 196 | .packet = udp_packet, | ||
| 197 | .new = udp_new, | ||
| 198 | .error = udp_error4, | ||
| 199 | }; | ||
| 200 | |||
| 201 | struct nf_conntrack_protocol nf_conntrack_protocol_udp6 = | ||
| 202 | { | ||
| 203 | .l3proto = PF_INET6, | ||
| 204 | .proto = IPPROTO_UDP, | ||
| 205 | .name = "udp", | ||
| 206 | .pkt_to_tuple = udp_pkt_to_tuple, | ||
| 207 | .invert_tuple = udp_invert_tuple, | ||
| 208 | .print_tuple = udp_print_tuple, | ||
| 209 | .print_conntrack = udp_print_conntrack, | ||
| 210 | .packet = udp_packet, | ||
| 211 | .new = udp_new, | ||
| 212 | .error = udp_error6, | ||
| 213 | }; | ||
| 214 | |||
| 215 | EXPORT_SYMBOL(nf_conntrack_protocol_udp4); | ||
| 216 | EXPORT_SYMBOL(nf_conntrack_protocol_udp6); | ||
diff --git a/net/netfilter/nf_conntrack_standalone.c b/net/netfilter/nf_conntrack_standalone.c new file mode 100644 index 000000000000..45224db4fe2f --- /dev/null +++ b/net/netfilter/nf_conntrack_standalone.c  | |||
| @@ -0,0 +1,869 @@ | |||
| 1 | /* This file contains all the functions required for the standalone | ||
| 2 | nf_conntrack module. | ||
| 3 | |||
| 4 | These are not required by the compatibility layer. | ||
| 5 | */ | ||
| 6 | |||
| 7 | /* (C) 1999-2001 Paul `Rusty' Russell | ||
| 8 | * (C) 2002-2004 Netfilter Core Team <coreteam@netfilter.org> | ||
| 9 | * | ||
| 10 | * This program is free software; you can redistribute it and/or modify | ||
| 11 | * it under the terms of the GNU General Public License version 2 as | ||
| 12 | * published by the Free Software Foundation. | ||
| 13 | * | ||
| 14 | * 16 Dec 2003: Yasuyuki Kozakai @USAGI <yasuyuki.kozakai@toshiba.co.jp> | ||
| 15 | * - generalize L3 protocol dependent part. | ||
| 16 | * | ||
| 17 | * Derived from net/ipv4/netfilter/ip_conntrack_standalone.c | ||
| 18 | */ | ||
| 19 | |||
| 20 | #include <linux/config.h> | ||
| 21 | #include <linux/types.h> | ||
| 22 | #include <linux/netfilter.h> | ||
| 23 | #include <linux/module.h> | ||
| 24 | #include <linux/skbuff.h> | ||
| 25 | #include <linux/proc_fs.h> | ||
| 26 | #include <linux/seq_file.h> | ||
| 27 | #include <linux/percpu.h> | ||
| 28 | #include <linux/netdevice.h> | ||
| 29 | #ifdef CONFIG_SYSCTL | ||
| 30 | #include <linux/sysctl.h> | ||
| 31 | #endif | ||
| 32 | |||
| 33 | #define ASSERT_READ_LOCK(x) | ||
| 34 | #define ASSERT_WRITE_LOCK(x) | ||
| 35 | |||
| 36 | #include <net/netfilter/nf_conntrack.h> | ||
| 37 | #include <net/netfilter/nf_conntrack_l3proto.h> | ||
| 38 | #include <net/netfilter/nf_conntrack_protocol.h> | ||
| 39 | #include <net/netfilter/nf_conntrack_core.h> | ||
| 40 | #include <net/netfilter/nf_conntrack_helper.h> | ||
| 41 | #include <linux/netfilter_ipv4/listhelp.h> | ||
| 42 | |||
| 43 | #if 0 | ||
| 44 | #define DEBUGP printk | ||
| 45 | #else | ||
| 46 | #define DEBUGP(format, args...) | ||
| 47 | #endif | ||
| 48 | |||
| 49 | MODULE_LICENSE("GPL"); | ||
| 50 | |||
| 51 | extern atomic_t nf_conntrack_count; | ||
| 52 | DECLARE_PER_CPU(struct ip_conntrack_stat, nf_conntrack_stat); | ||
| 53 | |||
| 54 | static int kill_l3proto(struct nf_conn *i, void *data) | ||
| 55 | { | ||
| 56 | return (i->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.l3num == | ||
| 57 | ((struct nf_conntrack_l3proto *)data)->l3proto); | ||
| 58 | } | ||
| 59 | |||
| 60 | static int kill_proto(struct nf_conn *i, void *data) | ||
| 61 | { | ||
| 62 | struct nf_conntrack_protocol *proto; | ||
| 63 | proto = (struct nf_conntrack_protocol *)data; | ||
| 64 | return (i->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.protonum == | ||
| 65 | proto->proto) && | ||
| 66 | (i->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.l3num == | ||
| 67 | proto->l3proto); | ||
| 68 | } | ||
| 69 | |||
| 70 | #ifdef CONFIG_PROC_FS | ||
| 71 | static int | ||
| 72 | print_tuple(struct seq_file *s, const struct nf_conntrack_tuple *tuple, | ||
| 73 | struct nf_conntrack_l3proto *l3proto, | ||
| 74 | struct nf_conntrack_protocol *proto) | ||
| 75 | { | ||
| 76 | return l3proto->print_tuple(s, tuple) || proto->print_tuple(s, tuple); | ||
| 77 | } | ||
| 78 | |||
| 79 | #ifdef CONFIG_NF_CT_ACCT | ||
| 80 | static unsigned int | ||
| 81 | seq_print_counters(struct seq_file *s, | ||
| 82 | const struct ip_conntrack_counter *counter) | ||
| 83 | { | ||
| 84 | return seq_printf(s, "packets=%llu bytes=%llu ", | ||
| 85 | (unsigned long long)counter->packets, | ||
| 86 | (unsigned long long)counter->bytes); | ||
| 87 | } | ||
| 88 | #else | ||
| 89 | #define seq_print_counters(x, y) 0 | ||
| 90 | #endif | ||
| 91 | |||
| 92 | struct ct_iter_state { | ||
| 93 | unsigned int bucket; | ||
| 94 | }; | ||
| 95 | |||
| 96 | static struct list_head *ct_get_first(struct seq_file *seq) | ||
| 97 | { | ||
| 98 | struct ct_iter_state *st = seq->private; | ||
| 99 | |||
| 100 | for (st->bucket = 0; | ||
| 101 | st->bucket < nf_conntrack_htable_size; | ||
| 102 | st->bucket++) { | ||
| 103 | if (!list_empty(&nf_conntrack_hash[st->bucket])) | ||
| 104 | return nf_conntrack_hash[st->bucket].next; | ||
| 105 | } | ||
| 106 | return NULL; | ||
| 107 | } | ||
| 108 | |||
| 109 | static struct list_head *ct_get_next(struct seq_file *seq, struct list_head *head) | ||
| 110 | { | ||
| 111 | struct ct_iter_state *st = seq->private; | ||
| 112 | |||
| 113 | head = head->next; | ||
| 114 | while (head == &nf_conntrack_hash[st->bucket]) { | ||
| 115 | if (++st->bucket >= nf_conntrack_htable_size) | ||
| 116 | return NULL; | ||
| 117 | head = nf_conntrack_hash[st->bucket].next; | ||
| 118 | } | ||
| 119 | return head; | ||
| 120 | } | ||
| 121 | |||
| 122 | static struct list_head *ct_get_idx(struct seq_file *seq, loff_t pos) | ||
| 123 | { | ||
| 124 | struct list_head *head = ct_get_first(seq); | ||
| 125 | |||
| 126 | if (head) | ||
| 127 | while (pos && (head = ct_get_next(seq, head))) | ||
| 128 | pos--; | ||
| 129 | return pos ? NULL : head; | ||
| 130 | } | ||
| 131 | |||
| 132 | static void *ct_seq_start(struct seq_file *seq, loff_t *pos) | ||
| 133 | { | ||
| 134 | read_lock_bh(&nf_conntrack_lock); | ||
| 135 | return ct_get_idx(seq, *pos); | ||
| 136 | } | ||
| 137 | |||
| 138 | static void *ct_seq_next(struct seq_file *s, void *v, loff_t *pos) | ||
| 139 | { | ||
| 140 | (*pos)++; | ||
| 141 | return ct_get_next(s, v); | ||
| 142 | } | ||
| 143 | |||
| 144 | static void ct_seq_stop(struct seq_file *s, void *v) | ||
| 145 | { | ||
| 146 | read_unlock_bh(&nf_conntrack_lock); | ||
| 147 | } | ||
| 148 | |||
| 149 | /* return 0 on success, 1 in case of error */ | ||
| 150 | static int ct_seq_show(struct seq_file *s, void *v) | ||
| 151 | { | ||
| 152 | const struct nf_conntrack_tuple_hash *hash = v; | ||
| 153 | const struct nf_conn *conntrack = nf_ct_tuplehash_to_ctrack(hash); | ||
| 154 | struct nf_conntrack_l3proto *l3proto; | ||
| 155 | struct nf_conntrack_protocol *proto; | ||
| 156 | |||
| 157 | ASSERT_READ_LOCK(&nf_conntrack_lock); | ||
| 158 | NF_CT_ASSERT(conntrack); | ||
| 159 | |||
| 160 | /* we only want to print DIR_ORIGINAL */ | ||
| 161 | if (NF_CT_DIRECTION(hash)) | ||
| 162 | return 0; | ||
| 163 | |||
| 164 | l3proto = nf_ct_find_l3proto(conntrack->tuplehash[IP_CT_DIR_ORIGINAL] | ||
| 165 | .tuple.src.l3num); | ||
| 166 | |||
| 167 | NF_CT_ASSERT(l3proto); | ||
| 168 | proto = nf_ct_find_proto(conntrack->tuplehash[IP_CT_DIR_ORIGINAL] | ||
| 169 | .tuple.src.l3num, | ||
| 170 | conntrack->tuplehash[IP_CT_DIR_ORIGINAL] | ||
| 171 | .tuple.dst.protonum); | ||
| 172 | NF_CT_ASSERT(proto); | ||
| 173 | |||
| 174 | if (seq_printf(s, "%-8s %u %-8s %u %ld ", | ||
| 175 | l3proto->name, | ||
| 176 | conntrack->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.l3num, | ||
| 177 | proto->name, | ||
| 178 | conntrack->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.protonum, | ||
| 179 | timer_pending(&conntrack->timeout) | ||
| 180 | ? (long)(conntrack->timeout.expires - jiffies)/HZ : 0) != 0) | ||
| 181 | return -ENOSPC; | ||
| 182 | |||
| 183 | if (l3proto->print_conntrack(s, conntrack)) | ||
| 184 | return -ENOSPC; | ||
| 185 | |||
| 186 | if (proto->print_conntrack(s, conntrack)) | ||
| 187 | return -ENOSPC; | ||
| 188 | |||
| 189 | if (print_tuple(s, &conntrack->tuplehash[IP_CT_DIR_ORIGINAL].tuple, | ||
| 190 | l3proto, proto)) | ||
| 191 | return -ENOSPC; | ||
| 192 | |||
| 193 | if (seq_print_counters(s, &conntrack->counters[IP_CT_DIR_ORIGINAL])) | ||
| 194 | return -ENOSPC; | ||
| 195 | |||
| 196 | if (!(test_bit(IPS_SEEN_REPLY_BIT, &conntrack->status))) | ||
| 197 | if (seq_printf(s, "[UNREPLIED] ")) | ||
| 198 | return -ENOSPC; | ||
| 199 | |||
| 200 | if (print_tuple(s, &conntrack->tuplehash[IP_CT_DIR_REPLY].tuple, | ||
| 201 | l3proto, proto)) | ||
| 202 | return -ENOSPC; | ||
| 203 | |||
| 204 | if (seq_print_counters(s, &conntrack->counters[IP_CT_DIR_REPLY])) | ||
| 205 | return -ENOSPC; | ||
| 206 | |||
| 207 | if (test_bit(IPS_ASSURED_BIT, &conntrack->status)) | ||
| 208 | if (seq_printf(s, "[ASSURED] ")) | ||
| 209 | return -ENOSPC; | ||
| 210 | |||
| 211 | #if defined(CONFIG_NF_CONNTRACK_MARK) | ||
| 212 | if (seq_printf(s, "mark=%u ", conntrack->mark)) | ||
| 213 | return -ENOSPC; | ||
| 214 | #endif | ||
| 215 | |||
| 216 | if (seq_printf(s, "use=%u\n", atomic_read(&conntrack->ct_general.use))) | ||
| 217 | return -ENOSPC; | ||
| 218 | |||
| 219 | return 0; | ||
| 220 | } | ||
| 221 | |||
| 222 | static struct seq_operations ct_seq_ops = { | ||
| 223 | .start = ct_seq_start, | ||
| 224 | .next = ct_seq_next, | ||
| 225 | .stop = ct_seq_stop, | ||
| 226 | .show = ct_seq_show | ||
| 227 | }; | ||
| 228 | |||
| 229 | static int ct_open(struct inode *inode, struct file *file) | ||
| 230 | { | ||
| 231 | struct seq_file *seq; | ||
| 232 | struct ct_iter_state *st; | ||
| 233 | int ret; | ||
| 234 | |||
| 235 | st = kmalloc(sizeof(struct ct_iter_state), GFP_KERNEL); | ||
| 236 | if (st == NULL) | ||
| 237 | return -ENOMEM; | ||
| 238 | ret = seq_open(file, &ct_seq_ops); | ||
| 239 | if (ret) | ||
| 240 | goto out_free; | ||
| 241 | seq = file->private_data; | ||
| 242 | seq->private = st; | ||
| 243 | memset(st, 0, sizeof(struct ct_iter_state)); | ||
| 244 | return ret; | ||
| 245 | out_free: | ||
| 246 | kfree(st); | ||
| 247 | return ret; | ||
| 248 | } | ||
| 249 | |||
| 250 | static struct file_operations ct_file_ops = { | ||
| 251 | .owner = THIS_MODULE, | ||
| 252 | .open = ct_open, | ||
| 253 | .read = seq_read, | ||
| 254 | .llseek = seq_lseek, | ||
| 255 | .release = seq_release_private, | ||
| 256 | }; | ||
| 257 | |||
| 258 | /* expects */ | ||
| 259 | static void *exp_seq_start(struct seq_file *s, loff_t *pos) | ||
| 260 | { | ||
| 261 | struct list_head *e = &nf_conntrack_expect_list; | ||
| 262 | loff_t i; | ||
| 263 | |||
| 264 | /* strange seq_file api calls stop even if we fail, | ||
| 265 | * thus we need to grab lock since stop unlocks */ | ||
| 266 | read_lock_bh(&nf_conntrack_lock); | ||
| 267 | |||
| 268 | if (list_empty(e)) | ||
| 269 | return NULL; | ||
| 270 | |||
| 271 | for (i = 0; i <= *pos; i++) { | ||
| 272 | e = e->next; | ||
| 273 | if (e == &nf_conntrack_expect_list) | ||
| 274 | return NULL; | ||
| 275 | } | ||
| 276 | return e; | ||
| 277 | } | ||
| 278 | |||
| 279 | static void *exp_seq_next(struct seq_file *s, void *v, loff_t *pos) | ||
| 280 | { | ||
| 281 | struct list_head *e = v; | ||
| 282 | |||
| 283 | ++*pos; | ||
| 284 | e = e->next; | ||
| 285 | |||
| 286 | if (e == &nf_conntrack_expect_list) | ||
| 287 | return NULL; | ||
| 288 | |||
| 289 | return e; | ||
| 290 | } | ||
| 291 | |||
| 292 | static void exp_seq_stop(struct seq_file *s, void *v) | ||
| 293 | { | ||
| 294 | read_unlock_bh(&nf_conntrack_lock); | ||
| 295 | } | ||
| 296 | |||
| 297 | static int exp_seq_show(struct seq_file *s, void *v) | ||
| 298 | { | ||
| 299 | struct nf_conntrack_expect *expect = v; | ||
| 300 | |||
| 301 | if (expect->timeout.function) | ||
| 302 | seq_printf(s, "%ld ", timer_pending(&expect->timeout) | ||
| 303 | ? (long)(expect->timeout.expires - jiffies)/HZ : 0); | ||
| 304 | else | ||
| 305 | seq_printf(s, "- "); | ||
| 306 | seq_printf(s, "l3proto = %u proto=%u ", | ||
| 307 | expect->tuple.src.l3num, | ||
| 308 | expect->tuple.dst.protonum); | ||
| 309 | print_tuple(s, &expect->tuple, | ||
| 310 | nf_ct_find_l3proto(expect->tuple.src.l3num), | ||
| 311 | nf_ct_find_proto(expect->tuple.src.l3num, | ||
| 312 | expect->tuple.dst.protonum)); | ||
| 313 | return seq_putc(s, '\n'); | ||
| 314 | } | ||
| 315 | |||
| 316 | static struct seq_operations exp_seq_ops = { | ||
| 317 | .start = exp_seq_start, | ||
| 318 | .next = exp_seq_next, | ||
| 319 | .stop = exp_seq_stop, | ||
| 320 | .show = exp_seq_show | ||
| 321 | }; | ||
| 322 | |||
| 323 | static int exp_open(struct inode *inode, struct file *file) | ||
| 324 | { | ||
| 325 | return seq_open(file, &exp_seq_ops); | ||
| 326 | } | ||
| 327 | |||
| 328 | static struct file_operations exp_file_ops = { | ||
| 329 | .owner = THIS_MODULE, | ||
| 330 | .open = exp_open, | ||
| 331 | .read = seq_read, | ||
| 332 | .llseek = seq_lseek, | ||
| 333 | .release = seq_release | ||
| 334 | }; | ||
| 335 | |||
| 336 | static void *ct_cpu_seq_start(struct seq_file *seq, loff_t *pos) | ||
| 337 | { | ||
| 338 | int cpu; | ||
| 339 | |||
| 340 | if (*pos == 0) | ||
| 341 | return SEQ_START_TOKEN; | ||
| 342 | |||
| 343 | for (cpu = *pos-1; cpu < NR_CPUS; ++cpu) { | ||
| 344 | if (!cpu_possible(cpu)) | ||
| 345 | continue; | ||
| 346 | *pos = cpu + 1; | ||
| 347 | return &per_cpu(nf_conntrack_stat, cpu); | ||
| 348 | } | ||
| 349 | |||
| 350 | return NULL; | ||
| 351 | } | ||
| 352 | |||
| 353 | static void *ct_cpu_seq_next(struct seq_file *seq, void *v, loff_t *pos) | ||
| 354 | { | ||
| 355 | int cpu; | ||
| 356 | |||
| 357 | for (cpu = *pos; cpu < NR_CPUS; ++cpu) { | ||
| 358 | if (!cpu_possible(cpu)) | ||
| 359 | continue; | ||
| 360 | *pos = cpu + 1; | ||
| 361 | return &per_cpu(nf_conntrack_stat, cpu); | ||
| 362 | } | ||
| 363 | |||
| 364 | return NULL; | ||
| 365 | } | ||
| 366 | |||
| 367 | static void ct_cpu_seq_stop(struct seq_file *seq, void *v) | ||
| 368 | { | ||
| 369 | } | ||
| 370 | |||
| 371 | static int ct_cpu_seq_show(struct seq_file *seq, void *v) | ||
| 372 | { | ||
| 373 | unsigned int nr_conntracks = atomic_read(&nf_conntrack_count); | ||
| 374 | struct ip_conntrack_stat *st = v; | ||
| 375 | |||
| 376 | if (v == SEQ_START_TOKEN) { | ||
| 377 | seq_printf(seq, "entries searched found new invalid ignore delete delete_list insert insert_failed drop early_drop icmp_error expect_new expect_create expect_delete\n"); | ||
| 378 | return 0; | ||
| 379 | } | ||
| 380 | |||
| 381 | seq_printf(seq, "%08x %08x %08x %08x %08x %08x %08x %08x " | ||
| 382 | "%08x %08x %08x %08x %08x %08x %08x %08x \n", | ||
| 383 | nr_conntracks, | ||
| 384 | st->searched, | ||
| 385 | st->found, | ||
| 386 | st->new, | ||
| 387 | st->invalid, | ||
| 388 | st->ignore, | ||
| 389 | st->delete, | ||
| 390 | st->delete_list, | ||
| 391 | st->insert, | ||
| 392 | st->insert_failed, | ||
| 393 | st->drop, | ||
| 394 | st->early_drop, | ||
| 395 | st->error, | ||
| 396 | |||
| 397 | st->expect_new, | ||
| 398 | st->expect_create, | ||
| 399 | st->expect_delete | ||
| 400 | ); | ||
| 401 | return 0; | ||
| 402 | } | ||
| 403 | |||
| 404 | static struct seq_operations ct_cpu_seq_ops = { | ||
| 405 | .start = ct_cpu_seq_start, | ||
| 406 | .next = ct_cpu_seq_next, | ||
| 407 | .stop = ct_cpu_seq_stop, | ||
| 408 | .show = ct_cpu_seq_show, | ||
| 409 | }; | ||
| 410 | |||
| 411 | static int ct_cpu_seq_open(struct inode *inode, struct file *file) | ||
| 412 | { | ||
| 413 | return seq_open(file, &ct_cpu_seq_ops); | ||
| 414 | } | ||
| 415 | |||
| 416 | static struct file_operations ct_cpu_seq_fops = { | ||
| 417 | .owner = THIS_MODULE, | ||
| 418 | .open = ct_cpu_seq_open, | ||
| 419 | .read = seq_read, | ||
| 420 | .llseek = seq_lseek, | ||
| 421 | .release = seq_release_private, | ||
| 422 | }; | ||
| 423 | #endif /* CONFIG_PROC_FS */ | ||
| 424 | |||
| 425 | /* Sysctl support */ | ||
| 426 | |||
| 427 | #ifdef CONFIG_SYSCTL | ||
| 428 | |||
| 429 | /* From nf_conntrack_core.c */ | ||
| 430 | extern int nf_conntrack_max; | ||
| 431 | extern unsigned int nf_conntrack_htable_size; | ||
| 432 | |||
| 433 | /* From nf_conntrack_proto_tcp.c */ | ||
| 434 | extern unsigned long nf_ct_tcp_timeout_syn_sent; | ||
| 435 | extern unsigned long nf_ct_tcp_timeout_syn_recv; | ||
| 436 | extern unsigned long nf_ct_tcp_timeout_established; | ||
| 437 | extern unsigned long nf_ct_tcp_timeout_fin_wait; | ||
| 438 | extern unsigned long nf_ct_tcp_timeout_close_wait; | ||
| 439 | extern unsigned long nf_ct_tcp_timeout_last_ack; | ||
| 440 | extern unsigned long nf_ct_tcp_timeout_time_wait; | ||
| 441 | extern unsigned long nf_ct_tcp_timeout_close; | ||
| 442 | extern unsigned long nf_ct_tcp_timeout_max_retrans; | ||
| 443 | extern int nf_ct_tcp_loose; | ||
| 444 | extern int nf_ct_tcp_be_liberal; | ||
| 445 | extern int nf_ct_tcp_max_retrans; | ||
| 446 | |||
| 447 | /* From nf_conntrack_proto_udp.c */ | ||
| 448 | extern unsigned long nf_ct_udp_timeout; | ||
| 449 | extern unsigned long nf_ct_udp_timeout_stream; | ||
| 450 | |||
| 451 | /* From nf_conntrack_proto_generic.c */ | ||
| 452 | extern unsigned long nf_ct_generic_timeout; | ||
| 453 | |||
| 454 | /* Log invalid packets of a given protocol */ | ||
| 455 | static int log_invalid_proto_min = 0; | ||
| 456 | static int log_invalid_proto_max = 255; | ||
| 457 | |||
| 458 | static struct ctl_table_header *nf_ct_sysctl_header; | ||
| 459 | |||
| 460 | static ctl_table nf_ct_sysctl_table[] = { | ||
| 461 | { | ||
| 462 | .ctl_name = NET_NF_CONNTRACK_MAX, | ||
| 463 | .procname = "nf_conntrack_max", | ||
| 464 | .data = &nf_conntrack_max, | ||
| 465 | .maxlen = sizeof(int), | ||
| 466 | .mode = 0644, | ||
| 467 | .proc_handler = &proc_dointvec, | ||
| 468 | }, | ||
| 469 | { | ||
| 470 | .ctl_name = NET_NF_CONNTRACK_COUNT, | ||
| 471 | .procname = "nf_conntrack_count", | ||
| 472 | .data = &nf_conntrack_count, | ||
| 473 | .maxlen = sizeof(int), | ||
| 474 | .mode = 0444, | ||
| 475 | .proc_handler = &proc_dointvec, | ||
| 476 | }, | ||
| 477 | { | ||
| 478 | .ctl_name = NET_NF_CONNTRACK_BUCKETS, | ||
| 479 | .procname = "nf_conntrack_buckets", | ||
| 480 | .data = &nf_conntrack_htable_size, | ||
| 481 | .maxlen = sizeof(unsigned int), | ||
| 482 | .mode = 0444, | ||
| 483 | .proc_handler = &proc_dointvec, | ||
| 484 | }, | ||
| 485 | { | ||
| 486 | .ctl_name = NET_NF_CONNTRACK_TCP_TIMEOUT_SYN_SENT, | ||
| 487 | .procname = "nf_conntrack_tcp_timeout_syn_sent", | ||
| 488 | .data = &nf_ct_tcp_timeout_syn_sent, | ||
| 489 | .maxlen = sizeof(unsigned int), | ||
| 490 | .mode = 0644, | ||
| 491 | .proc_handler = &proc_dointvec_jiffies, | ||
| 492 | }, | ||
| 493 | { | ||
| 494 | .ctl_name = NET_NF_CONNTRACK_TCP_TIMEOUT_SYN_RECV, | ||
| 495 | .procname = "nf_conntrack_tcp_timeout_syn_recv", | ||
| 496 | .data = &nf_ct_tcp_timeout_syn_recv, | ||
| 497 | .maxlen = sizeof(unsigned int), | ||
| 498 | .mode = 0644, | ||
| 499 | .proc_handler = &proc_dointvec_jiffies, | ||
| 500 | }, | ||
| 501 | { | ||
| 502 | .ctl_name = NET_NF_CONNTRACK_TCP_TIMEOUT_ESTABLISHED, | ||
| 503 | .procname = "nf_conntrack_tcp_timeout_established", | ||
| 504 | .data = &nf_ct_tcp_timeout_established, | ||
| 505 | .maxlen = sizeof(unsigned int), | ||
| 506 | .mode = 0644, | ||
| 507 | .proc_handler = &proc_dointvec_jiffies, | ||
| 508 | }, | ||
| 509 | { | ||
| 510 | .ctl_name = NET_NF_CONNTRACK_TCP_TIMEOUT_FIN_WAIT, | ||
| 511 | .procname = "nf_conntrack_tcp_timeout_fin_wait", | ||
| 512 | .data = &nf_ct_tcp_timeout_fin_wait, | ||
| 513 | .maxlen = sizeof(unsigned int), | ||
| 514 | .mode = 0644, | ||
| 515 | .proc_handler = &proc_dointvec_jiffies, | ||
| 516 | }, | ||
| 517 | { | ||
| 518 | .ctl_name = NET_NF_CONNTRACK_TCP_TIMEOUT_CLOSE_WAIT, | ||
| 519 | .procname = "nf_conntrack_tcp_timeout_close_wait", | ||
| 520 | .data = &nf_ct_tcp_timeout_close_wait, | ||
| 521 | .maxlen = sizeof(unsigned int), | ||
| 522 | .mode = 0644, | ||
| 523 | .proc_handler = &proc_dointvec_jiffies, | ||
| 524 | }, | ||
| 525 | { | ||
| 526 | .ctl_name = NET_NF_CONNTRACK_TCP_TIMEOUT_LAST_ACK, | ||
| 527 | .procname = "nf_conntrack_tcp_timeout_last_ack", | ||
| 528 | .data = &nf_ct_tcp_timeout_last_ack, | ||
| 529 | .maxlen = sizeof(unsigned int), | ||
| 530 | .mode = 0644, | ||
| 531 | .proc_handler = &proc_dointvec_jiffies, | ||
| 532 | }, | ||
| 533 | { | ||
| 534 | .ctl_name = NET_NF_CONNTRACK_TCP_TIMEOUT_TIME_WAIT, | ||
| 535 | .procname = "nf_conntrack_tcp_timeout_time_wait", | ||
| 536 | .data = &nf_ct_tcp_timeout_time_wait, | ||
| 537 | .maxlen = sizeof(unsigned int), | ||
| 538 | .mode = 0644, | ||
| 539 | .proc_handler = &proc_dointvec_jiffies, | ||
| 540 | }, | ||
| 541 | { | ||
| 542 | .ctl_name = NET_NF_CONNTRACK_TCP_TIMEOUT_CLOSE, | ||
| 543 | .procname = "nf_conntrack_tcp_timeout_close", | ||
| 544 | .data = &nf_ct_tcp_timeout_close, | ||
| 545 | .maxlen = sizeof(unsigned int), | ||
| 546 | .mode = 0644, | ||
| 547 | .proc_handler = &proc_dointvec_jiffies, | ||
| 548 | }, | ||
| 549 | { | ||
| 550 | .ctl_name = NET_NF_CONNTRACK_UDP_TIMEOUT, | ||
| 551 | .procname = "nf_conntrack_udp_timeout", | ||
| 552 | .data = &nf_ct_udp_timeout, | ||
| 553 | .maxlen = sizeof(unsigned int), | ||
| 554 | .mode = 0644, | ||
| 555 | .proc_handler = &proc_dointvec_jiffies, | ||
| 556 | }, | ||
| 557 | { | ||
| 558 | .ctl_name = NET_NF_CONNTRACK_UDP_TIMEOUT_STREAM, | ||
| 559 | .procname = "nf_conntrack_udp_timeout_stream", | ||
| 560 | .data = &nf_ct_udp_timeout_stream, | ||
| 561 | .maxlen = sizeof(unsigned int), | ||
| 562 | .mode = 0644, | ||
| 563 | .proc_handler = &proc_dointvec_jiffies, | ||
| 564 | }, | ||
| 565 | { | ||
| 566 | .ctl_name = NET_NF_CONNTRACK_GENERIC_TIMEOUT, | ||
| 567 | .procname = "nf_conntrack_generic_timeout", | ||
| 568 | .data = &nf_ct_generic_timeout, | ||
| 569 | .maxlen = sizeof(unsigned int), | ||
| 570 | .mode = 0644, | ||
| 571 | .proc_handler = &proc_dointvec_jiffies, | ||
| 572 | }, | ||
| 573 | { | ||
| 574 | .ctl_name = NET_NF_CONNTRACK_LOG_INVALID, | ||
| 575 | .procname = "nf_conntrack_log_invalid", | ||
| 576 | .data = &nf_ct_log_invalid, | ||
| 577 | .maxlen = sizeof(unsigned int), | ||
| 578 | .mode = 0644, | ||
| 579 | .proc_handler = &proc_dointvec_minmax, | ||
| 580 | .strategy = &sysctl_intvec, | ||
| 581 | .extra1 = &log_invalid_proto_min, | ||
| 582 | .extra2 = &log_invalid_proto_max, | ||
| 583 | }, | ||
| 584 | { | ||
| 585 | .ctl_name = NET_NF_CONNTRACK_TCP_TIMEOUT_MAX_RETRANS, | ||
| 586 | .procname = "nf_conntrack_tcp_timeout_max_retrans", | ||
| 587 | .data = &nf_ct_tcp_timeout_max_retrans, | ||
| 588 | .maxlen = sizeof(unsigned int), | ||
| 589 | .mode = 0644, | ||
| 590 | .proc_handler = &proc_dointvec_jiffies, | ||
| 591 | }, | ||
| 592 | { | ||
| 593 | .ctl_name = NET_NF_CONNTRACK_TCP_LOOSE, | ||
| 594 | .procname = "nf_conntrack_tcp_loose", | ||
| 595 | .data = &nf_ct_tcp_loose, | ||
| 596 | .maxlen = sizeof(unsigned int), | ||
| 597 | .mode = 0644, | ||
| 598 | .proc_handler = &proc_dointvec, | ||
| 599 | }, | ||
| 600 | { | ||
| 601 | .ctl_name = NET_NF_CONNTRACK_TCP_BE_LIBERAL, | ||
| 602 | .procname = "nf_conntrack_tcp_be_liberal", | ||
| 603 | .data = &nf_ct_tcp_be_liberal, | ||
| 604 | .maxlen = sizeof(unsigned int), | ||
| 605 | .mode = 0644, | ||
| 606 | .proc_handler = &proc_dointvec, | ||
| 607 | }, | ||
| 608 | { | ||
| 609 | .ctl_name = NET_NF_CONNTRACK_TCP_MAX_RETRANS, | ||
| 610 | .procname = "nf_conntrack_tcp_max_retrans", | ||
| 611 | .data = &nf_ct_tcp_max_retrans, | ||
| 612 | .maxlen = sizeof(unsigned int), | ||
| 613 | .mode = 0644, | ||
| 614 | .proc_handler = &proc_dointvec, | ||
| 615 | }, | ||
| 616 | |||
| 617 | { .ctl_name = 0 } | ||
| 618 | }; | ||
| 619 | |||
| 620 | #define NET_NF_CONNTRACK_MAX 2089 | ||
| 621 | |||
| 622 | static ctl_table nf_ct_netfilter_table[] = { | ||
| 623 | { | ||
| 624 | .ctl_name = NET_NETFILTER, | ||
| 625 | .procname = "netfilter", | ||
| 626 | .mode = 0555, | ||
| 627 | .child = nf_ct_sysctl_table, | ||
| 628 | }, | ||
| 629 | { | ||
| 630 | .ctl_name = NET_NF_CONNTRACK_MAX, | ||
| 631 | .procname = "nf_conntrack_max", | ||
| 632 | .data = &nf_conntrack_max, | ||
| 633 | .maxlen = sizeof(int), | ||
| 634 | .mode = 0644, | ||
| 635 | .proc_handler = &proc_dointvec, | ||
| 636 | }, | ||
| 637 | { .ctl_name = 0 } | ||
| 638 | }; | ||
| 639 | |||
| 640 | static ctl_table nf_ct_net_table[] = { | ||
| 641 | { | ||
| 642 | .ctl_name = CTL_NET, | ||
| 643 | .procname = "net", | ||
| 644 | .mode = 0555, | ||
| 645 | .child = nf_ct_netfilter_table, | ||
| 646 | }, | ||
| 647 | { .ctl_name = 0 } | ||
| 648 | }; | ||
| 649 | EXPORT_SYMBOL(nf_ct_log_invalid); | ||
| 650 | #endif /* CONFIG_SYSCTL */ | ||
| 651 | |||
| 652 | static int init_or_cleanup(int init) | ||
| 653 | { | ||
| 654 | #ifdef CONFIG_PROC_FS | ||
| 655 | struct proc_dir_entry *proc, *proc_exp, *proc_stat; | ||
| 656 | #endif | ||
| 657 | int ret = 0; | ||
| 658 | |||
| 659 | if (!init) goto cleanup; | ||
| 660 | |||
| 661 | ret = nf_conntrack_init(); | ||
| 662 | if (ret < 0) | ||
| 663 | goto cleanup_nothing; | ||
| 664 | |||
| 665 | #ifdef CONFIG_PROC_FS | ||
| 666 | proc = proc_net_fops_create("nf_conntrack", 0440, &ct_file_ops); | ||
| 667 | if (!proc) goto cleanup_init; | ||
| 668 | |||
| 669 | proc_exp = proc_net_fops_create("nf_conntrack_expect", 0440, | ||
| 670 | &exp_file_ops); | ||
| 671 | if (!proc_exp) goto cleanup_proc; | ||
| 672 | |||
| 673 | proc_stat = create_proc_entry("nf_conntrack", S_IRUGO, proc_net_stat); | ||
| 674 | if (!proc_stat) | ||
| 675 | goto cleanup_proc_exp; | ||
| 676 | |||
| 677 | proc_stat->proc_fops = &ct_cpu_seq_fops; | ||
| 678 | proc_stat->owner = THIS_MODULE; | ||
| 679 | #endif | ||
| 680 | #ifdef CONFIG_SYSCTL | ||
| 681 | nf_ct_sysctl_header = register_sysctl_table(nf_ct_net_table, 0); | ||
| 682 | if (nf_ct_sysctl_header == NULL) { | ||
| 683 | printk("nf_conntrack: can't register to sysctl.\n"); | ||
| 684 | ret = -ENOMEM; | ||
| 685 | goto cleanup_proc_stat; | ||
| 686 | } | ||
| 687 | #endif | ||
| 688 | |||
| 689 | return ret; | ||
| 690 | |||
| 691 | cleanup: | ||
| 692 | #ifdef CONFIG_SYSCTL | ||
| 693 | unregister_sysctl_table(nf_ct_sysctl_header); | ||
| 694 | cleanup_proc_stat: | ||
| 695 | #endif | ||
| 696 | #ifdef CONFIG_PROC_FS | ||
| 697 | proc_net_remove("nf_conntrack_stat"); | ||
| 698 | cleanup_proc_exp: | ||
| 699 | proc_net_remove("nf_conntrack_expect"); | ||
| 700 | cleanup_proc: | ||
| 701 | proc_net_remove("nf_conntrack"); | ||
| 702 | cleanup_init: | ||
| 703 | #endif /* CNFIG_PROC_FS */ | ||
| 704 | nf_conntrack_cleanup(); | ||
| 705 | cleanup_nothing: | ||
| 706 | return ret; | ||
| 707 | } | ||
| 708 | |||
| 709 | int nf_conntrack_l3proto_register(struct nf_conntrack_l3proto *proto) | ||
| 710 | { | ||
| 711 | int ret = 0; | ||
| 712 | |||
| 713 | write_lock_bh(&nf_conntrack_lock); | ||
| 714 | if (nf_ct_l3protos[proto->l3proto] != &nf_conntrack_generic_l3proto) { | ||
| 715 | ret = -EBUSY; | ||
| 716 | goto out; | ||
| 717 | } | ||
| 718 | nf_ct_l3protos[proto->l3proto] = proto; | ||
| 719 | out: | ||
| 720 | write_unlock_bh(&nf_conntrack_lock); | ||
| 721 | |||
| 722 | return ret; | ||
| 723 | } | ||
| 724 | |||
| 725 | void nf_conntrack_l3proto_unregister(struct nf_conntrack_l3proto *proto) | ||
| 726 | { | ||
| 727 | write_lock_bh(&nf_conntrack_lock); | ||
| 728 | nf_ct_l3protos[proto->l3proto] = &nf_conntrack_generic_l3proto; | ||
| 729 | write_unlock_bh(&nf_conntrack_lock); | ||
| 730 | |||
| 731 | /* Somebody could be still looking at the proto in bh. */ | ||
| 732 | synchronize_net(); | ||
| 733 | |||
| 734 | /* Remove all contrack entries for this protocol */ | ||
| 735 | nf_ct_iterate_cleanup(kill_l3proto, proto); | ||
| 736 | } | ||
| 737 | |||
| 738 | /* FIXME: Allow NULL functions and sub in pointers to generic for | ||
| 739 | them. --RR */ | ||
| 740 | int nf_conntrack_protocol_register(struct nf_conntrack_protocol *proto) | ||
| 741 | { | ||
| 742 | int ret = 0; | ||
| 743 | |||
| 744 | retry: | ||
| 745 | write_lock_bh(&nf_conntrack_lock); | ||
| 746 | if (nf_ct_protos[proto->l3proto]) { | ||
| 747 | if (nf_ct_protos[proto->l3proto][proto->proto] | ||
| 748 | != &nf_conntrack_generic_protocol) { | ||
| 749 | ret = -EBUSY; | ||
| 750 | goto out_unlock; | ||
| 751 | } | ||
| 752 | } else { | ||
| 753 | /* l3proto may be loaded latter. */ | ||
| 754 | struct nf_conntrack_protocol **proto_array; | ||
| 755 | int i; | ||
| 756 | |||
| 757 | write_unlock_bh(&nf_conntrack_lock); | ||
| 758 | |||
| 759 | proto_array = (struct nf_conntrack_protocol **) | ||
| 760 | kmalloc(MAX_NF_CT_PROTO * | ||
| 761 | sizeof(struct nf_conntrack_protocol *), | ||
| 762 | GFP_KERNEL); | ||
| 763 | if (proto_array == NULL) { | ||
| 764 | ret = -ENOMEM; | ||
| 765 | goto out; | ||
| 766 | } | ||
| 767 | for (i = 0; i < MAX_NF_CT_PROTO; i++) | ||
| 768 | proto_array[i] = &nf_conntrack_generic_protocol; | ||
| 769 | |||
| 770 | write_lock_bh(&nf_conntrack_lock); | ||
| 771 | if (nf_ct_protos[proto->l3proto]) { | ||
| 772 | /* bad timing, but no problem */ | ||
| 773 | write_unlock_bh(&nf_conntrack_lock); | ||
| 774 | kfree(proto_array); | ||
| 775 | } else { | ||
| 776 | nf_ct_protos[proto->l3proto] = proto_array; | ||
| 777 | write_unlock_bh(&nf_conntrack_lock); | ||
| 778 | } | ||
| 779 | |||
| 780 | /* | ||
| 781 | * Just once because array is never freed until unloading | ||
| 782 | * nf_conntrack.ko | ||
| 783 | */ | ||
| 784 | goto retry; | ||
| 785 | } | ||
| 786 | |||
| 787 | nf_ct_protos[proto->l3proto][proto->proto] = proto; | ||
| 788 | |||
| 789 | out_unlock: | ||
| 790 | write_unlock_bh(&nf_conntrack_lock); | ||
| 791 | out: | ||
| 792 | return ret; | ||
| 793 | } | ||
| 794 | |||
| 795 | void nf_conntrack_protocol_unregister(struct nf_conntrack_protocol *proto) | ||
| 796 | { | ||
| 797 | write_lock_bh(&nf_conntrack_lock); | ||
| 798 | nf_ct_protos[proto->l3proto][proto->proto] | ||
| 799 | = &nf_conntrack_generic_protocol; | ||
| 800 | write_unlock_bh(&nf_conntrack_lock); | ||
| 801 | |||
| 802 | /* Somebody could be still looking at the proto in bh. */ | ||
| 803 | synchronize_net(); | ||
| 804 | |||
| 805 | /* Remove all contrack entries for this protocol */ | ||
| 806 | nf_ct_iterate_cleanup(kill_proto, proto); | ||
| 807 | } | ||
| 808 | |||
| 809 | static int __init init(void) | ||
| 810 | { | ||
| 811 | return init_or_cleanup(1); | ||
| 812 | } | ||
| 813 | |||
| 814 | static void __exit fini(void) | ||
| 815 | { | ||
| 816 | init_or_cleanup(0); | ||
| 817 | } | ||
| 818 | |||
| 819 | module_init(init); | ||
| 820 | module_exit(fini); | ||
| 821 | |||
| 822 | /* Some modules need us, but don't depend directly on any symbol. | ||
| 823 | They should call this. */ | ||
| 824 | void need_nf_conntrack(void) | ||
| 825 | { | ||
| 826 | } | ||
| 827 | |||
| 828 | #ifdef CONFIG_NF_CONNTRACK_EVENTS | ||
| 829 | EXPORT_SYMBOL_GPL(nf_conntrack_chain); | ||
| 830 | EXPORT_SYMBOL_GPL(nf_conntrack_expect_chain); | ||
| 831 | EXPORT_SYMBOL_GPL(nf_conntrack_register_notifier); | ||
| 832 | EXPORT_SYMBOL_GPL(nf_conntrack_unregister_notifier); | ||
| 833 | EXPORT_SYMBOL_GPL(__nf_ct_event_cache_init); | ||
| 834 | EXPORT_PER_CPU_SYMBOL_GPL(nf_conntrack_ecache); | ||
| 835 | EXPORT_SYMBOL_GPL(nf_ct_deliver_cached_events); | ||
| 836 | #endif | ||
| 837 | EXPORT_SYMBOL(nf_conntrack_l3proto_register); | ||
| 838 | EXPORT_SYMBOL(nf_conntrack_l3proto_unregister); | ||
| 839 | EXPORT_SYMBOL(nf_conntrack_protocol_register); | ||
| 840 | EXPORT_SYMBOL(nf_conntrack_protocol_unregister); | ||
| 841 | EXPORT_SYMBOL(nf_ct_invert_tuplepr); | ||
| 842 | EXPORT_SYMBOL(nf_conntrack_alter_reply); | ||
| 843 | EXPORT_SYMBOL(nf_conntrack_destroyed); | ||
| 844 | EXPORT_SYMBOL(need_nf_conntrack); | ||
| 845 | EXPORT_SYMBOL(nf_conntrack_helper_register); | ||
| 846 | EXPORT_SYMBOL(nf_conntrack_helper_unregister); | ||
| 847 | EXPORT_SYMBOL(nf_ct_iterate_cleanup); | ||
| 848 | EXPORT_SYMBOL(__nf_ct_refresh_acct); | ||
| 849 | EXPORT_SYMBOL(nf_ct_protos); | ||
| 850 | EXPORT_SYMBOL(nf_ct_find_proto); | ||
| 851 | EXPORT_SYMBOL(nf_ct_l3protos); | ||
| 852 | EXPORT_SYMBOL(nf_conntrack_expect_alloc); | ||
| 853 | EXPORT_SYMBOL(nf_conntrack_expect_put); | ||
| 854 | EXPORT_SYMBOL(nf_conntrack_expect_related); | ||
| 855 | EXPORT_SYMBOL(nf_conntrack_unexpect_related); | ||
| 856 | EXPORT_SYMBOL(nf_conntrack_tuple_taken); | ||
| 857 | EXPORT_SYMBOL(nf_conntrack_htable_size); | ||
| 858 | EXPORT_SYMBOL(nf_conntrack_lock); | ||
| 859 | EXPORT_SYMBOL(nf_conntrack_hash); | ||
| 860 | EXPORT_SYMBOL(nf_conntrack_untracked); | ||
| 861 | EXPORT_SYMBOL_GPL(nf_conntrack_find_get); | ||
| 862 | #ifdef CONFIG_IP_NF_NAT_NEEDED | ||
| 863 | EXPORT_SYMBOL(nf_conntrack_tcp_update); | ||
| 864 | #endif | ||
| 865 | EXPORT_SYMBOL(__nf_conntrack_confirm); | ||
| 866 | EXPORT_SYMBOL(nf_ct_get_tuple); | ||
| 867 | EXPORT_SYMBOL(nf_ct_invert_tuple); | ||
| 868 | EXPORT_SYMBOL(nf_conntrack_in); | ||
| 869 | EXPORT_SYMBOL(__nf_conntrack_attach); | ||
diff --git a/net/netfilter/nf_queue.c b/net/netfilter/nf_queue.c index d10d552d9c40..d3a4f30a7f22 100644 --- a/net/netfilter/nf_queue.c +++ b/net/netfilter/nf_queue.c  | |||
| @@ -117,7 +117,7 @@ int nf_queue(struct sk_buff **skb, | |||
| 117 | 117 | ||
| 118 | /* QUEUE == DROP if noone is waiting, to be safe. */ | 118 | /* QUEUE == DROP if noone is waiting, to be safe. */ | 
| 119 | read_lock(&queue_handler_lock); | 119 | read_lock(&queue_handler_lock); | 
| 120 | if (!queue_handler[pf]->outfn) { | 120 | if (!queue_handler[pf] || !queue_handler[pf]->outfn) { | 
| 121 | read_unlock(&queue_handler_lock); | 121 | read_unlock(&queue_handler_lock); | 
| 122 | kfree_skb(*skb); | 122 | kfree_skb(*skb); | 
| 123 | return 1; | 123 | return 1; | 
diff --git a/net/netfilter/nfnetlink.c b/net/netfilter/nfnetlink.c index 4bc27a6334c1..83f4c53030fc 100644 --- a/net/netfilter/nfnetlink.c +++ b/net/netfilter/nfnetlink.c  | |||
| @@ -128,7 +128,7 @@ void __nfa_fill(struct sk_buff *skb, int attrtype, int attrlen, | |||
| 128 | memset(NFA_DATA(nfa) + attrlen, 0, NFA_ALIGN(size) - size); | 128 | memset(NFA_DATA(nfa) + attrlen, 0, NFA_ALIGN(size) - size); | 
| 129 | } | 129 | } | 
| 130 | 130 | ||
| 131 | int nfattr_parse(struct nfattr *tb[], int maxattr, struct nfattr *nfa, int len) | 131 | void nfattr_parse(struct nfattr *tb[], int maxattr, struct nfattr *nfa, int len) | 
| 132 | { | 132 | { | 
| 133 | memset(tb, 0, sizeof(struct nfattr *) * maxattr); | 133 | memset(tb, 0, sizeof(struct nfattr *) * maxattr); | 
| 134 | 134 | ||
| @@ -138,8 +138,6 @@ int nfattr_parse(struct nfattr *tb[], int maxattr, struct nfattr *nfa, int len) | |||
| 138 | tb[flavor-1] = nfa; | 138 | tb[flavor-1] = nfa; | 
| 139 | nfa = NFA_NEXT(nfa, len); | 139 | nfa = NFA_NEXT(nfa, len); | 
| 140 | } | 140 | } | 
| 141 | |||
| 142 | return 0; | ||
| 143 | } | 141 | } | 
| 144 | 142 | ||
| 145 | /** | 143 | /** | 
| @@ -242,15 +240,18 @@ static inline int nfnetlink_rcv_msg(struct sk_buff *skb, | |||
| 242 | ss = nfnetlink_get_subsys(type); | 240 | ss = nfnetlink_get_subsys(type); | 
| 243 | if (!ss) { | 241 | if (!ss) { | 
| 244 | #ifdef CONFIG_KMOD | 242 | #ifdef CONFIG_KMOD | 
| 245 | /* don't call nfnl_shunlock, since it would reenter | 243 | if (cap_raised(NETLINK_CB(skb).eff_cap, CAP_NET_ADMIN)) { | 
| 246 | * with further packet processing */ | 244 | /* don't call nfnl_shunlock, since it would reenter | 
| 247 | up(&nfnl_sem); | 245 | * with further packet processing */ | 
| 248 | request_module("nfnetlink-subsys-%d", NFNL_SUBSYS_ID(type)); | 246 | up(&nfnl_sem); | 
| 249 | nfnl_shlock(); | 247 | request_module("nfnetlink-subsys-%d", | 
| 250 | ss = nfnetlink_get_subsys(type); | 248 | NFNL_SUBSYS_ID(type)); | 
| 249 | nfnl_shlock(); | ||
| 250 | ss = nfnetlink_get_subsys(type); | ||
| 251 | } | ||
| 251 | if (!ss) | 252 | if (!ss) | 
| 252 | #endif | 253 | #endif | 
| 253 | goto err_inval; | 254 | goto err_inval; | 
| 254 | } | 255 | } | 
| 255 | 256 | ||
| 256 | nc = nfnetlink_find_client(type, ss); | 257 | nc = nfnetlink_find_client(type, ss); | 
diff --git a/net/netfilter/nfnetlink_log.c b/net/netfilter/nfnetlink_log.c index efcd10f996ba..d194676f3655 100644 --- a/net/netfilter/nfnetlink_log.c +++ b/net/netfilter/nfnetlink_log.c  | |||
| @@ -146,11 +146,10 @@ instance_create(u_int16_t group_num, int pid) | |||
| 146 | goto out_unlock; | 146 | goto out_unlock; | 
| 147 | } | 147 | } | 
| 148 | 148 | ||
| 149 | inst = kmalloc(sizeof(*inst), GFP_ATOMIC); | 149 | inst = kzalloc(sizeof(*inst), GFP_ATOMIC); | 
| 150 | if (!inst) | 150 | if (!inst) | 
| 151 | goto out_unlock; | 151 | goto out_unlock; | 
| 152 | 152 | ||
| 153 | memset(inst, 0, sizeof(*inst)); | ||
| 154 | INIT_HLIST_NODE(&inst->hlist); | 153 | INIT_HLIST_NODE(&inst->hlist); | 
| 155 | inst->lock = SPIN_LOCK_UNLOCKED; | 154 | inst->lock = SPIN_LOCK_UNLOCKED; | 
| 156 | /* needs to be two, since we _put() after creation */ | 155 | /* needs to be two, since we _put() after creation */ | 
| @@ -962,10 +961,9 @@ static int nful_open(struct inode *inode, struct file *file) | |||
| 962 | struct iter_state *is; | 961 | struct iter_state *is; | 
| 963 | int ret; | 962 | int ret; | 
| 964 | 963 | ||
| 965 | is = kmalloc(sizeof(*is), GFP_KERNEL); | 964 | is = kzalloc(sizeof(*is), GFP_KERNEL); | 
| 966 | if (!is) | 965 | if (!is) | 
| 967 | return -ENOMEM; | 966 | return -ENOMEM; | 
| 968 | memset(is, 0, sizeof(*is)); | ||
| 969 | ret = seq_open(file, &nful_seq_ops); | 967 | ret = seq_open(file, &nful_seq_ops); | 
| 970 | if (ret < 0) | 968 | if (ret < 0) | 
| 971 | goto out_free; | 969 | goto out_free; | 
diff --git a/net/netfilter/nfnetlink_queue.c b/net/netfilter/nfnetlink_queue.c index eaa44c49567b..f065a6c94953 100644 --- a/net/netfilter/nfnetlink_queue.c +++ b/net/netfilter/nfnetlink_queue.c  | |||
| @@ -136,11 +136,10 @@ instance_create(u_int16_t queue_num, int pid) | |||
| 136 | goto out_unlock; | 136 | goto out_unlock; | 
| 137 | } | 137 | } | 
| 138 | 138 | ||
| 139 | inst = kmalloc(sizeof(*inst), GFP_ATOMIC); | 139 | inst = kzalloc(sizeof(*inst), GFP_ATOMIC); | 
| 140 | if (!inst) | 140 | if (!inst) | 
| 141 | goto out_unlock; | 141 | goto out_unlock; | 
| 142 | 142 | ||
| 143 | memset(inst, 0, sizeof(*inst)); | ||
| 144 | inst->queue_num = queue_num; | 143 | inst->queue_num = queue_num; | 
| 145 | inst->peer_pid = pid; | 144 | inst->peer_pid = pid; | 
| 146 | inst->queue_maxlen = NFQNL_QMAX_DEFAULT; | 145 | inst->queue_maxlen = NFQNL_QMAX_DEFAULT; | 
| @@ -1036,10 +1035,9 @@ static int nfqnl_open(struct inode *inode, struct file *file) | |||
| 1036 | struct iter_state *is; | 1035 | struct iter_state *is; | 
| 1037 | int ret; | 1036 | int ret; | 
| 1038 | 1037 | ||
| 1039 | is = kmalloc(sizeof(*is), GFP_KERNEL); | 1038 | is = kzalloc(sizeof(*is), GFP_KERNEL); | 
| 1040 | if (!is) | 1039 | if (!is) | 
| 1041 | return -ENOMEM; | 1040 | return -ENOMEM; | 
| 1042 | memset(is, 0, sizeof(*is)); | ||
| 1043 | ret = seq_open(file, &nfqnl_seq_ops); | 1041 | ret = seq_open(file, &nfqnl_seq_ops); | 
| 1044 | if (ret < 0) | 1042 | if (ret < 0) | 
| 1045 | goto out_free; | 1043 | goto out_free; | 
diff --git a/net/netlink/Makefile b/net/netlink/Makefile index 39d9c2dcd03c..e3589c2de49e 100644 --- a/net/netlink/Makefile +++ b/net/netlink/Makefile  | |||
| @@ -2,4 +2,4 @@ | |||
| 2 | # Makefile for the netlink driver. | 2 | # Makefile for the netlink driver. | 
| 3 | # | 3 | # | 
| 4 | 4 | ||
| 5 | obj-y := af_netlink.o | 5 | obj-y := af_netlink.o attr.o genetlink.o | 
diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c index 5ca283537bc6..8c38ee6d255e 100644 --- a/net/netlink/af_netlink.c +++ b/net/netlink/af_netlink.c  | |||
| @@ -58,6 +58,7 @@ | |||
| 58 | 58 | ||
| 59 | #include <net/sock.h> | 59 | #include <net/sock.h> | 
| 60 | #include <net/scm.h> | 60 | #include <net/scm.h> | 
| 61 | #include <net/netlink.h> | ||
| 61 | 62 | ||
| 62 | #define Nprintk(a...) | 63 | #define Nprintk(a...) | 
| 63 | #define NLGRPSZ(x) (ALIGN(x, sizeof(unsigned long) * 8) / 8) | 64 | #define NLGRPSZ(x) (ALIGN(x, sizeof(unsigned long) * 8) / 8) | 
| @@ -427,7 +428,8 @@ static int netlink_release(struct socket *sock) | |||
| 427 | 428 | ||
| 428 | spin_lock(&nlk->cb_lock); | 429 | spin_lock(&nlk->cb_lock); | 
| 429 | if (nlk->cb) { | 430 | if (nlk->cb) { | 
| 430 | nlk->cb->done(nlk->cb); | 431 | if (nlk->cb->done) | 
| 432 | nlk->cb->done(nlk->cb); | ||
| 431 | netlink_destroy_callback(nlk->cb); | 433 | netlink_destroy_callback(nlk->cb); | 
| 432 | nlk->cb = NULL; | 434 | nlk->cb = NULL; | 
| 433 | } | 435 | } | 
| @@ -1322,7 +1324,8 @@ static int netlink_dump(struct sock *sk) | |||
| 1322 | skb_queue_tail(&sk->sk_receive_queue, skb); | 1324 | skb_queue_tail(&sk->sk_receive_queue, skb); | 
| 1323 | sk->sk_data_ready(sk, skb->len); | 1325 | sk->sk_data_ready(sk, skb->len); | 
| 1324 | 1326 | ||
| 1325 | cb->done(cb); | 1327 | if (cb->done) | 
| 1328 | cb->done(cb); | ||
| 1326 | nlk->cb = NULL; | 1329 | nlk->cb = NULL; | 
| 1327 | spin_unlock(&nlk->cb_lock); | 1330 | spin_unlock(&nlk->cb_lock); | 
| 1328 | 1331 | ||
| @@ -1409,6 +1412,94 @@ void netlink_ack(struct sk_buff *in_skb, struct nlmsghdr *nlh, int err) | |||
| 1409 | netlink_unicast(in_skb->sk, skb, NETLINK_CB(in_skb).pid, MSG_DONTWAIT); | 1412 | netlink_unicast(in_skb->sk, skb, NETLINK_CB(in_skb).pid, MSG_DONTWAIT); | 
| 1410 | } | 1413 | } | 
| 1411 | 1414 | ||
| 1415 | static int netlink_rcv_skb(struct sk_buff *skb, int (*cb)(struct sk_buff *, | ||
| 1416 | struct nlmsghdr *, int *)) | ||
| 1417 | { | ||
| 1418 | unsigned int total_len; | ||
| 1419 | struct nlmsghdr *nlh; | ||
| 1420 | int err; | ||
| 1421 | |||
| 1422 | while (skb->len >= nlmsg_total_size(0)) { | ||
| 1423 | nlh = (struct nlmsghdr *) skb->data; | ||
| 1424 | |||
| 1425 | if (skb->len < nlh->nlmsg_len) | ||
| 1426 | return 0; | ||
| 1427 | |||
| 1428 | total_len = min(NLMSG_ALIGN(nlh->nlmsg_len), skb->len); | ||
| 1429 | |||
| 1430 | if (cb(skb, nlh, &err) < 0) { | ||
| 1431 | /* Not an error, but we have to interrupt processing | ||
| 1432 | * here. Note: that in this case we do not pull | ||
| 1433 | * message from skb, it will be processed later. | ||
| 1434 | */ | ||
| 1435 | if (err == 0) | ||
| 1436 | return -1; | ||
| 1437 | netlink_ack(skb, nlh, err); | ||
| 1438 | } else if (nlh->nlmsg_flags & NLM_F_ACK) | ||
| 1439 | netlink_ack(skb, nlh, 0); | ||
| 1440 | |||
| 1441 | skb_pull(skb, total_len); | ||
| 1442 | } | ||
| 1443 | |||
| 1444 | return 0; | ||
| 1445 | } | ||
| 1446 | |||
| 1447 | /** | ||
| 1448 | * nelink_run_queue - Process netlink receive queue. | ||
| 1449 | * @sk: Netlink socket containing the queue | ||
| 1450 | * @qlen: Place to store queue length upon entry | ||
| 1451 | * @cb: Callback function invoked for each netlink message found | ||
| 1452 | * | ||
| 1453 | * Processes as much as there was in the queue upon entry and invokes | ||
| 1454 | * a callback function for each netlink message found. The callback | ||
| 1455 | * function may refuse a message by returning a negative error code | ||
| 1456 | * but setting the error pointer to 0 in which case this function | ||
| 1457 | * returns with a qlen != 0. | ||
| 1458 | * | ||
| 1459 | * qlen must be initialized to 0 before the initial entry, afterwards | ||
| 1460 | * the function may be called repeatedly until qlen reaches 0. | ||
| 1461 | */ | ||
| 1462 | void netlink_run_queue(struct sock *sk, unsigned int *qlen, | ||
| 1463 | int (*cb)(struct sk_buff *, struct nlmsghdr *, int *)) | ||
| 1464 | { | ||
| 1465 | struct sk_buff *skb; | ||
| 1466 | |||
| 1467 | if (!*qlen || *qlen > skb_queue_len(&sk->sk_receive_queue)) | ||
| 1468 | *qlen = skb_queue_len(&sk->sk_receive_queue); | ||
| 1469 | |||
| 1470 | for (; *qlen; (*qlen)--) { | ||
| 1471 | skb = skb_dequeue(&sk->sk_receive_queue); | ||
| 1472 | if (netlink_rcv_skb(skb, cb)) { | ||
| 1473 | if (skb->len) | ||
| 1474 | skb_queue_head(&sk->sk_receive_queue, skb); | ||
| 1475 | else { | ||
| 1476 | kfree_skb(skb); | ||
| 1477 | (*qlen)--; | ||
| 1478 | } | ||
| 1479 | break; | ||
| 1480 | } | ||
| 1481 | |||
| 1482 | kfree_skb(skb); | ||
| 1483 | } | ||
| 1484 | } | ||
| 1485 | |||
| 1486 | /** | ||
| 1487 | * netlink_queue_skip - Skip netlink message while processing queue. | ||
| 1488 | * @nlh: Netlink message to be skipped | ||
| 1489 | * @skb: Socket buffer containing the netlink messages. | ||
| 1490 | * | ||
| 1491 | * Pulls the given netlink message off the socket buffer so the next | ||
| 1492 | * call to netlink_queue_run() will not reconsider the message. | ||
| 1493 | */ | ||
| 1494 | void netlink_queue_skip(struct nlmsghdr *nlh, struct sk_buff *skb) | ||
| 1495 | { | ||
| 1496 | int msglen = NLMSG_ALIGN(nlh->nlmsg_len); | ||
| 1497 | |||
| 1498 | if (msglen > skb->len) | ||
| 1499 | msglen = skb->len; | ||
| 1500 | |||
| 1501 | skb_pull(skb, msglen); | ||
| 1502 | } | ||
| 1412 | 1503 | ||
| 1413 | #ifdef CONFIG_PROC_FS | 1504 | #ifdef CONFIG_PROC_FS | 
| 1414 | struct nl_seq_iter { | 1505 | struct nl_seq_iter { | 
| @@ -1657,6 +1748,8 @@ out: | |||
| 1657 | core_initcall(netlink_proto_init); | 1748 | core_initcall(netlink_proto_init); | 
| 1658 | 1749 | ||
| 1659 | EXPORT_SYMBOL(netlink_ack); | 1750 | EXPORT_SYMBOL(netlink_ack); | 
| 1751 | EXPORT_SYMBOL(netlink_run_queue); | ||
| 1752 | EXPORT_SYMBOL(netlink_queue_skip); | ||
| 1660 | EXPORT_SYMBOL(netlink_broadcast); | 1753 | EXPORT_SYMBOL(netlink_broadcast); | 
| 1661 | EXPORT_SYMBOL(netlink_dump_start); | 1754 | EXPORT_SYMBOL(netlink_dump_start); | 
| 1662 | EXPORT_SYMBOL(netlink_kernel_create); | 1755 | EXPORT_SYMBOL(netlink_kernel_create); | 
diff --git a/net/netlink/attr.c b/net/netlink/attr.c new file mode 100644 index 000000000000..fffef4ab276f --- /dev/null +++ b/net/netlink/attr.c  | |||
| @@ -0,0 +1,328 @@ | |||
| 1 | /* | ||
| 2 | * NETLINK Netlink attributes | ||
| 3 | * | ||
| 4 | * Authors: Thomas Graf <tgraf@suug.ch> | ||
| 5 | * Alexey Kuznetsov <kuznet@ms2.inr.ac.ru> | ||
| 6 | */ | ||
| 7 | |||
| 8 | #include <linux/config.h> | ||
| 9 | #include <linux/module.h> | ||
| 10 | #include <linux/kernel.h> | ||
| 11 | #include <linux/errno.h> | ||
| 12 | #include <linux/jiffies.h> | ||
| 13 | #include <linux/netdevice.h> | ||
| 14 | #include <linux/skbuff.h> | ||
| 15 | #include <linux/string.h> | ||
| 16 | #include <linux/types.h> | ||
| 17 | #include <net/netlink.h> | ||
| 18 | |||
| 19 | static u16 nla_attr_minlen[NLA_TYPE_MAX+1] __read_mostly = { | ||
| 20 | [NLA_U8] = sizeof(u8), | ||
| 21 | [NLA_U16] = sizeof(u16), | ||
| 22 | [NLA_U32] = sizeof(u32), | ||
| 23 | [NLA_U64] = sizeof(u64), | ||
| 24 | [NLA_STRING] = 1, | ||
| 25 | [NLA_NESTED] = NLA_HDRLEN, | ||
| 26 | }; | ||
| 27 | |||
| 28 | static int validate_nla(struct nlattr *nla, int maxtype, | ||
| 29 | struct nla_policy *policy) | ||
| 30 | { | ||
| 31 | struct nla_policy *pt; | ||
| 32 | int minlen = 0; | ||
| 33 | |||
| 34 | if (nla->nla_type <= 0 || nla->nla_type > maxtype) | ||
| 35 | return 0; | ||
| 36 | |||
| 37 | pt = &policy[nla->nla_type]; | ||
| 38 | |||
| 39 | BUG_ON(pt->type > NLA_TYPE_MAX); | ||
| 40 | |||
| 41 | if (pt->minlen) | ||
| 42 | minlen = pt->minlen; | ||
| 43 | else if (pt->type != NLA_UNSPEC) | ||
| 44 | minlen = nla_attr_minlen[pt->type]; | ||
| 45 | |||
| 46 | if (pt->type == NLA_FLAG && nla_len(nla) > 0) | ||
| 47 | return -ERANGE; | ||
| 48 | |||
| 49 | if (nla_len(nla) < minlen) | ||
| 50 | return -ERANGE; | ||
| 51 | |||
| 52 | return 0; | ||
| 53 | } | ||
| 54 | |||
| 55 | /** | ||
| 56 | * nla_validate - Validate a stream of attributes | ||
| 57 | * @head: head of attribute stream | ||
| 58 | * @len: length of attribute stream | ||
| 59 | * @maxtype: maximum attribute type to be expected | ||
| 60 | * @policy: validation policy | ||
| 61 | * | ||
| 62 | * Validates all attributes in the specified attribute stream against the | ||
| 63 | * specified policy. Attributes with a type exceeding maxtype will be | ||
| 64 | * ignored. See documenation of struct nla_policy for more details. | ||
| 65 | * | ||
| 66 | * Returns 0 on success or a negative error code. | ||
| 67 | */ | ||
| 68 | int nla_validate(struct nlattr *head, int len, int maxtype, | ||
| 69 | struct nla_policy *policy) | ||
| 70 | { | ||
| 71 | struct nlattr *nla; | ||
| 72 | int rem, err; | ||
| 73 | |||
| 74 | nla_for_each_attr(nla, head, len, rem) { | ||
| 75 | err = validate_nla(nla, maxtype, policy); | ||
| 76 | if (err < 0) | ||
| 77 | goto errout; | ||
| 78 | } | ||
| 79 | |||
| 80 | err = 0; | ||
| 81 | errout: | ||
| 82 | return err; | ||
| 83 | } | ||
| 84 | |||
| 85 | /** | ||
| 86 | * nla_parse - Parse a stream of attributes into a tb buffer | ||
| 87 | * @tb: destination array with maxtype+1 elements | ||
| 88 | * @maxtype: maximum attribute type to be expected | ||
| 89 | * @head: head of attribute stream | ||
| 90 | * @len: length of attribute stream | ||
| 91 | * | ||
| 92 | * Parses a stream of attributes and stores a pointer to each attribute in | ||
| 93 | * the tb array accessable via the attribute type. Attributes with a type | ||
| 94 | * exceeding maxtype will be silently ignored for backwards compatibility | ||
| 95 | * reasons. policy may be set to NULL if no validation is required. | ||
| 96 | * | ||
| 97 | * Returns 0 on success or a negative error code. | ||
| 98 | */ | ||
| 99 | int nla_parse(struct nlattr *tb[], int maxtype, struct nlattr *head, int len, | ||
| 100 | struct nla_policy *policy) | ||
| 101 | { | ||
| 102 | struct nlattr *nla; | ||
| 103 | int rem, err; | ||
| 104 | |||
| 105 | memset(tb, 0, sizeof(struct nlattr *) * (maxtype + 1)); | ||
| 106 | |||
| 107 | nla_for_each_attr(nla, head, len, rem) { | ||
| 108 | u16 type = nla->nla_type; | ||
| 109 | |||
| 110 | if (type > 0 && type <= maxtype) { | ||
| 111 | if (policy) { | ||
| 112 | err = validate_nla(nla, maxtype, policy); | ||
| 113 | if (err < 0) | ||
| 114 | goto errout; | ||
| 115 | } | ||
| 116 | |||
| 117 | tb[type] = nla; | ||
| 118 | } | ||
| 119 | } | ||
| 120 | |||
| 121 | if (unlikely(rem > 0)) | ||
| 122 | printk(KERN_WARNING "netlink: %d bytes leftover after parsing " | ||
| 123 | "attributes.\n", rem); | ||
| 124 | |||
| 125 | err = 0; | ||
| 126 | errout: | ||
| 127 | return err; | ||
| 128 | } | ||
| 129 | |||
| 130 | /** | ||
| 131 | * nla_find - Find a specific attribute in a stream of attributes | ||
| 132 | * @head: head of attribute stream | ||
| 133 | * @len: length of attribute stream | ||
| 134 | * @attrtype: type of attribute to look for | ||
| 135 | * | ||
| 136 | * Returns the first attribute in the stream matching the specified type. | ||
| 137 | */ | ||
| 138 | struct nlattr *nla_find(struct nlattr *head, int len, int attrtype) | ||
| 139 | { | ||
| 140 | struct nlattr *nla; | ||
| 141 | int rem; | ||
| 142 | |||
| 143 | nla_for_each_attr(nla, head, len, rem) | ||
| 144 | if (nla->nla_type == attrtype) | ||
| 145 | return nla; | ||
| 146 | |||
| 147 | return NULL; | ||
| 148 | } | ||
| 149 | |||
| 150 | /** | ||
| 151 | * nla_strlcpy - Copy string attribute payload into a sized buffer | ||
| 152 | * @dst: where to copy the string to | ||
| 153 | * @src: attribute to copy the string from | ||
| 154 | * @dstsize: size of destination buffer | ||
| 155 | * | ||
| 156 | * Copies at most dstsize - 1 bytes into the destination buffer. | ||
| 157 | * The result is always a valid NUL-terminated string. Unlike | ||
| 158 | * strlcpy the destination buffer is always padded out. | ||
| 159 | * | ||
| 160 | * Returns the length of the source buffer. | ||
| 161 | */ | ||
| 162 | size_t nla_strlcpy(char *dst, const struct nlattr *nla, size_t dstsize) | ||
| 163 | { | ||
| 164 | size_t srclen = nla_len(nla); | ||
| 165 | char *src = nla_data(nla); | ||
| 166 | |||
| 167 | if (srclen > 0 && src[srclen - 1] == '\0') | ||
| 168 | srclen--; | ||
| 169 | |||
| 170 | if (dstsize > 0) { | ||
| 171 | size_t len = (srclen >= dstsize) ? dstsize - 1 : srclen; | ||
| 172 | |||
| 173 | memset(dst, 0, dstsize); | ||
| 174 | memcpy(dst, src, len); | ||
| 175 | } | ||
| 176 | |||
| 177 | return srclen; | ||
| 178 | } | ||
| 179 | |||
| 180 | /** | ||
| 181 | * nla_memcpy - Copy a netlink attribute into another memory area | ||
| 182 | * @dest: where to copy to memcpy | ||
| 183 | * @src: netlink attribute to copy from | ||
| 184 | * @count: size of the destination area | ||
| 185 | * | ||
| 186 | * Note: The number of bytes copied is limited by the length of | ||
| 187 | * attribute's payload. memcpy | ||
| 188 | * | ||
| 189 | * Returns the number of bytes copied. | ||
| 190 | */ | ||
| 191 | int nla_memcpy(void *dest, struct nlattr *src, int count) | ||
| 192 | { | ||
| 193 | int minlen = min_t(int, count, nla_len(src)); | ||
| 194 | |||
| 195 | memcpy(dest, nla_data(src), minlen); | ||
| 196 | |||
| 197 | return minlen; | ||
| 198 | } | ||
| 199 | |||
| 200 | /** | ||
| 201 | * nla_memcmp - Compare an attribute with sized memory area | ||
| 202 | * @nla: netlink attribute | ||
| 203 | * @data: memory area | ||
| 204 | * @size: size of memory area | ||
| 205 | */ | ||
| 206 | int nla_memcmp(const struct nlattr *nla, const void *data, | ||
| 207 | size_t size) | ||
| 208 | { | ||
| 209 | int d = nla_len(nla) - size; | ||
| 210 | |||
| 211 | if (d == 0) | ||
| 212 | d = memcmp(nla_data(nla), data, size); | ||
| 213 | |||
| 214 | return d; | ||
| 215 | } | ||
| 216 | |||
| 217 | /** | ||
| 218 | * nla_strcmp - Compare a string attribute against a string | ||
| 219 | * @nla: netlink string attribute | ||
| 220 | * @str: another string | ||
| 221 | */ | ||
| 222 | int nla_strcmp(const struct nlattr *nla, const char *str) | ||
| 223 | { | ||
| 224 | int len = strlen(str) + 1; | ||
| 225 | int d = nla_len(nla) - len; | ||
| 226 | |||
| 227 | if (d == 0) | ||
| 228 | d = memcmp(nla_data(nla), str, len); | ||
| 229 | |||
| 230 | return d; | ||
| 231 | } | ||
| 232 | |||
| 233 | /** | ||
| 234 | * __nla_reserve - reserve room for attribute on the skb | ||
| 235 | * @skb: socket buffer to reserve room on | ||
| 236 | * @attrtype: attribute type | ||
| 237 | * @attrlen: length of attribute payload | ||
| 238 | * | ||
| 239 | * Adds a netlink attribute header to a socket buffer and reserves | ||
| 240 | * room for the payload but does not copy it. | ||
| 241 | * | ||
| 242 | * The caller is responsible to ensure that the skb provides enough | ||
| 243 | * tailroom for the attribute header and payload. | ||
| 244 | */ | ||
| 245 | struct nlattr *__nla_reserve(struct sk_buff *skb, int attrtype, int attrlen) | ||
| 246 | { | ||
| 247 | struct nlattr *nla; | ||
| 248 | |||
| 249 | nla = (struct nlattr *) skb_put(skb, nla_total_size(attrlen)); | ||
| 250 | nla->nla_type = attrtype; | ||
| 251 | nla->nla_len = nla_attr_size(attrlen); | ||
| 252 | |||
| 253 | memset((unsigned char *) nla + nla->nla_len, 0, nla_padlen(attrlen)); | ||
| 254 | |||
| 255 | return nla; | ||
| 256 | } | ||
| 257 | |||
| 258 | /** | ||
| 259 | * nla_reserve - reserve room for attribute on the skb | ||
| 260 | * @skb: socket buffer to reserve room on | ||
| 261 | * @attrtype: attribute type | ||
| 262 | * @attrlen: length of attribute payload | ||
| 263 | * | ||
| 264 | * Adds a netlink attribute header to a socket buffer and reserves | ||
| 265 | * room for the payload but does not copy it. | ||
| 266 | * | ||
| 267 | * Returns NULL if the tailroom of the skb is insufficient to store | ||
| 268 | * the attribute header and payload. | ||
| 269 | */ | ||
| 270 | struct nlattr *nla_reserve(struct sk_buff *skb, int attrtype, int attrlen) | ||
| 271 | { | ||
| 272 | if (unlikely(skb_tailroom(skb) < nla_total_size(attrlen))) | ||
| 273 | return NULL; | ||
| 274 | |||
| 275 | return __nla_reserve(skb, attrtype, attrlen); | ||
| 276 | } | ||
| 277 | |||
| 278 | /** | ||
| 279 | * __nla_put - Add a netlink attribute to a socket buffer | ||
| 280 | * @skb: socket buffer to add attribute to | ||
| 281 | * @attrtype: attribute type | ||
| 282 | * @attrlen: length of attribute payload | ||
| 283 | * @data: head of attribute payload | ||
| 284 | * | ||
| 285 | * The caller is responsible to ensure that the skb provides enough | ||
| 286 | * tailroom for the attribute header and payload. | ||
| 287 | */ | ||
| 288 | void __nla_put(struct sk_buff *skb, int attrtype, int attrlen, | ||
| 289 | const void *data) | ||
| 290 | { | ||
| 291 | struct nlattr *nla; | ||
| 292 | |||
| 293 | nla = __nla_reserve(skb, attrtype, attrlen); | ||
| 294 | memcpy(nla_data(nla), data, attrlen); | ||
| 295 | } | ||
| 296 | |||
| 297 | |||
| 298 | /** | ||
| 299 | * nla_put - Add a netlink attribute to a socket buffer | ||
| 300 | * @skb: socket buffer to add attribute to | ||
| 301 | * @attrtype: attribute type | ||
| 302 | * @attrlen: length of attribute payload | ||
| 303 | * @data: head of attribute payload | ||
| 304 | * | ||
| 305 | * Returns -1 if the tailroom of the skb is insufficient to store | ||
| 306 | * the attribute header and payload. | ||
| 307 | */ | ||
| 308 | int nla_put(struct sk_buff *skb, int attrtype, int attrlen, const void *data) | ||
| 309 | { | ||
| 310 | if (unlikely(skb_tailroom(skb) < nla_total_size(attrlen))) | ||
| 311 | return -1; | ||
| 312 | |||
| 313 | __nla_put(skb, attrtype, attrlen, data); | ||
| 314 | return 0; | ||
| 315 | } | ||
| 316 | |||
| 317 | |||
| 318 | EXPORT_SYMBOL(nla_validate); | ||
| 319 | EXPORT_SYMBOL(nla_parse); | ||
| 320 | EXPORT_SYMBOL(nla_find); | ||
| 321 | EXPORT_SYMBOL(nla_strlcpy); | ||
| 322 | EXPORT_SYMBOL(__nla_reserve); | ||
| 323 | EXPORT_SYMBOL(nla_reserve); | ||
| 324 | EXPORT_SYMBOL(__nla_put); | ||
| 325 | EXPORT_SYMBOL(nla_put); | ||
| 326 | EXPORT_SYMBOL(nla_memcpy); | ||
| 327 | EXPORT_SYMBOL(nla_memcmp); | ||
| 328 | EXPORT_SYMBOL(nla_strcmp); | ||
diff --git a/net/netlink/genetlink.c b/net/netlink/genetlink.c new file mode 100644 index 000000000000..287cfcc56951 --- /dev/null +++ b/net/netlink/genetlink.c  | |||
| @@ -0,0 +1,579 @@ | |||
| 1 | /* | ||
| 2 | * NETLINK Generic Netlink Family | ||
| 3 | * | ||
| 4 | * Authors: Jamal Hadi Salim | ||
| 5 | * Thomas Graf <tgraf@suug.ch> | ||
| 6 | */ | ||
| 7 | |||
| 8 | #include <linux/config.h> | ||
| 9 | #include <linux/module.h> | ||
| 10 | #include <linux/kernel.h> | ||
| 11 | #include <linux/errno.h> | ||
| 12 | #include <linux/types.h> | ||
| 13 | #include <linux/socket.h> | ||
| 14 | #include <linux/string.h> | ||
| 15 | #include <linux/skbuff.h> | ||
| 16 | #include <net/sock.h> | ||
| 17 | #include <net/genetlink.h> | ||
| 18 | |||
| 19 | struct sock *genl_sock = NULL; | ||
| 20 | |||
| 21 | static DECLARE_MUTEX(genl_sem); /* serialization of message processing */ | ||
| 22 | |||
| 23 | static void genl_lock(void) | ||
| 24 | { | ||
| 25 | down(&genl_sem); | ||
| 26 | } | ||
| 27 | |||
| 28 | static int genl_trylock(void) | ||
| 29 | { | ||
| 30 | return down_trylock(&genl_sem); | ||
| 31 | } | ||
| 32 | |||
| 33 | static void genl_unlock(void) | ||
| 34 | { | ||
| 35 | up(&genl_sem); | ||
| 36 | |||
| 37 | if (genl_sock && genl_sock->sk_receive_queue.qlen) | ||
| 38 | genl_sock->sk_data_ready(genl_sock, 0); | ||
| 39 | } | ||
| 40 | |||
| 41 | #define GENL_FAM_TAB_SIZE 16 | ||
| 42 | #define GENL_FAM_TAB_MASK (GENL_FAM_TAB_SIZE - 1) | ||
| 43 | |||
| 44 | static struct list_head family_ht[GENL_FAM_TAB_SIZE]; | ||
| 45 | |||
| 46 | static int genl_ctrl_event(int event, void *data); | ||
| 47 | |||
| 48 | static inline unsigned int genl_family_hash(unsigned int id) | ||
| 49 | { | ||
| 50 | return id & GENL_FAM_TAB_MASK; | ||
| 51 | } | ||
| 52 | |||
| 53 | static inline struct list_head *genl_family_chain(unsigned int id) | ||
| 54 | { | ||
| 55 | return &family_ht[genl_family_hash(id)]; | ||
| 56 | } | ||
| 57 | |||
| 58 | static struct genl_family *genl_family_find_byid(unsigned int id) | ||
| 59 | { | ||
| 60 | struct genl_family *f; | ||
| 61 | |||
| 62 | list_for_each_entry(f, genl_family_chain(id), family_list) | ||
| 63 | if (f->id == id) | ||
| 64 | return f; | ||
| 65 | |||
| 66 | return NULL; | ||
| 67 | } | ||
| 68 | |||
| 69 | static struct genl_family *genl_family_find_byname(char *name) | ||
| 70 | { | ||
| 71 | struct genl_family *f; | ||
| 72 | int i; | ||
| 73 | |||
| 74 | for (i = 0; i < GENL_FAM_TAB_SIZE; i++) | ||
| 75 | list_for_each_entry(f, genl_family_chain(i), family_list) | ||
| 76 | if (strcmp(f->name, name) == 0) | ||
| 77 | return f; | ||
| 78 | |||
| 79 | return NULL; | ||
| 80 | } | ||
| 81 | |||
| 82 | static struct genl_ops *genl_get_cmd(u8 cmd, struct genl_family *family) | ||
| 83 | { | ||
| 84 | struct genl_ops *ops; | ||
| 85 | |||
| 86 | list_for_each_entry(ops, &family->ops_list, ops_list) | ||
| 87 | if (ops->cmd == cmd) | ||
| 88 | return ops; | ||
| 89 | |||
| 90 | return NULL; | ||
| 91 | } | ||
| 92 | |||
| 93 | /* Of course we are going to have problems once we hit | ||
| 94 | * 2^16 alive types, but that can only happen by year 2K | ||
| 95 | */ | ||
| 96 | static inline u16 genl_generate_id(void) | ||
| 97 | { | ||
| 98 | static u16 id_gen_idx; | ||
| 99 | int overflowed = 0; | ||
| 100 | |||
| 101 | do { | ||
| 102 | if (id_gen_idx == 0) | ||
| 103 | id_gen_idx = GENL_MIN_ID; | ||
| 104 | |||
| 105 | if (++id_gen_idx > GENL_MAX_ID) { | ||
| 106 | if (!overflowed) { | ||
| 107 | overflowed = 1; | ||
| 108 | id_gen_idx = 0; | ||
| 109 | continue; | ||
| 110 | } else | ||
| 111 | return 0; | ||
| 112 | } | ||
| 113 | |||
| 114 | } while (genl_family_find_byid(id_gen_idx)); | ||
| 115 | |||
| 116 | return id_gen_idx; | ||
| 117 | } | ||
| 118 | |||
| 119 | /** | ||
| 120 | * genl_register_ops - register generic netlink operations | ||
| 121 | * @family: generic netlink family | ||
| 122 | * @ops: operations to be registered | ||
| 123 | * | ||
| 124 | * Registers the specified operations and assigns them to the specified | ||
| 125 | * family. Either a doit or dumpit callback must be specified or the | ||
| 126 | * operation will fail. Only one operation structure per command | ||
| 127 | * identifier may be registered. | ||
| 128 | * | ||
| 129 | * See include/net/genetlink.h for more documenation on the operations | ||
| 130 | * structure. | ||
| 131 | * | ||
| 132 | * Returns 0 on success or a negative error code. | ||
| 133 | */ | ||
| 134 | int genl_register_ops(struct genl_family *family, struct genl_ops *ops) | ||
| 135 | { | ||
| 136 | int err = -EINVAL; | ||
| 137 | |||
| 138 | if (ops->dumpit == NULL && ops->doit == NULL) | ||
| 139 | goto errout; | ||
| 140 | |||
| 141 | if (genl_get_cmd(ops->cmd, family)) { | ||
| 142 | err = -EEXIST; | ||
| 143 | goto errout; | ||
| 144 | } | ||
| 145 | |||
| 146 | genl_lock(); | ||
| 147 | list_add_tail(&ops->ops_list, &family->ops_list); | ||
| 148 | genl_unlock(); | ||
| 149 | |||
| 150 | genl_ctrl_event(CTRL_CMD_NEWOPS, ops); | ||
| 151 | err = 0; | ||
| 152 | errout: | ||
| 153 | return err; | ||
| 154 | } | ||
| 155 | |||
| 156 | /** | ||
| 157 | * genl_unregister_ops - unregister generic netlink operations | ||
| 158 | * @family: generic netlink family | ||
| 159 | * @ops: operations to be unregistered | ||
| 160 | * | ||
| 161 | * Unregisters the specified operations and unassigns them from the | ||
| 162 | * specified family. The operation blocks until the current message | ||
| 163 | * processing has finished and doesn't start again until the | ||
| 164 | * unregister process has finished. | ||
| 165 | * | ||
| 166 | * Note: It is not necessary to unregister all operations before | ||
| 167 | * unregistering the family, unregistering the family will cause | ||
| 168 | * all assigned operations to be unregistered automatically. | ||
| 169 | * | ||
| 170 | * Returns 0 on success or a negative error code. | ||
| 171 | */ | ||
| 172 | int genl_unregister_ops(struct genl_family *family, struct genl_ops *ops) | ||
| 173 | { | ||
| 174 | struct genl_ops *rc; | ||
| 175 | |||
| 176 | genl_lock(); | ||
| 177 | list_for_each_entry(rc, &family->ops_list, ops_list) { | ||
| 178 | if (rc == ops) { | ||
| 179 | list_del(&ops->ops_list); | ||
| 180 | genl_unlock(); | ||
| 181 | genl_ctrl_event(CTRL_CMD_DELOPS, ops); | ||
| 182 | return 0; | ||
| 183 | } | ||
| 184 | } | ||
| 185 | genl_unlock(); | ||
| 186 | |||
| 187 | return -ENOENT; | ||
| 188 | } | ||
| 189 | |||
| 190 | /** | ||
| 191 | * genl_register_family - register a generic netlink family | ||
| 192 | * @family: generic netlink family | ||
| 193 | * | ||
| 194 | * Registers the specified family after validating it first. Only one | ||
| 195 | * family may be registered with the same family name or identifier. | ||
| 196 | * The family id may equal GENL_ID_GENERATE causing an unique id to | ||
| 197 | * be automatically generated and assigned. | ||
| 198 | * | ||
| 199 | * Return 0 on success or a negative error code. | ||
| 200 | */ | ||
| 201 | int genl_register_family(struct genl_family *family) | ||
| 202 | { | ||
| 203 | int err = -EINVAL; | ||
| 204 | |||
| 205 | if (family->id && family->id < GENL_MIN_ID) | ||
| 206 | goto errout; | ||
| 207 | |||
| 208 | if (family->id > GENL_MAX_ID) | ||
| 209 | goto errout; | ||
| 210 | |||
| 211 | INIT_LIST_HEAD(&family->ops_list); | ||
| 212 | |||
| 213 | genl_lock(); | ||
| 214 | |||
| 215 | if (genl_family_find_byname(family->name)) { | ||
| 216 | err = -EEXIST; | ||
| 217 | goto errout_locked; | ||
| 218 | } | ||
| 219 | |||
| 220 | if (genl_family_find_byid(family->id)) { | ||
| 221 | err = -EEXIST; | ||
| 222 | goto errout_locked; | ||
| 223 | } | ||
| 224 | |||
| 225 | if (!try_module_get(family->owner)) { | ||
| 226 | err = -EBUSY; | ||
| 227 | goto errout_locked; | ||
| 228 | } | ||
| 229 | |||
| 230 | if (family->id == GENL_ID_GENERATE) { | ||
| 231 | u16 newid = genl_generate_id(); | ||
| 232 | |||
| 233 | if (!newid) { | ||
| 234 | err = -ENOMEM; | ||
| 235 | goto errout_locked; | ||
| 236 | } | ||
| 237 | |||
| 238 | family->id = newid; | ||
| 239 | } | ||
| 240 | |||
| 241 | if (family->maxattr) { | ||
| 242 | family->attrbuf = kmalloc((family->maxattr+1) * | ||
| 243 | sizeof(struct nlattr *), GFP_KERNEL); | ||
| 244 | if (family->attrbuf == NULL) { | ||
| 245 | err = -ENOMEM; | ||
| 246 | goto errout; | ||
| 247 | } | ||
| 248 | } else | ||
| 249 | family->attrbuf = NULL; | ||
| 250 | |||
| 251 | list_add_tail(&family->family_list, genl_family_chain(family->id)); | ||
| 252 | genl_unlock(); | ||
| 253 | |||
| 254 | genl_ctrl_event(CTRL_CMD_NEWFAMILY, family); | ||
| 255 | |||
| 256 | return 0; | ||
| 257 | |||
| 258 | errout_locked: | ||
| 259 | genl_unlock(); | ||
| 260 | errout: | ||
| 261 | return err; | ||
| 262 | } | ||
| 263 | |||
| 264 | /** | ||
| 265 | * genl_unregister_family - unregister generic netlink family | ||
| 266 | * @family: generic netlink family | ||
| 267 | * | ||
| 268 | * Unregisters the specified family. | ||
| 269 | * | ||
| 270 | * Returns 0 on success or a negative error code. | ||
| 271 | */ | ||
| 272 | int genl_unregister_family(struct genl_family *family) | ||
| 273 | { | ||
| 274 | struct genl_family *rc; | ||
| 275 | |||
| 276 | genl_lock(); | ||
| 277 | |||
| 278 | list_for_each_entry(rc, genl_family_chain(family->id), family_list) { | ||
| 279 | if (family->id != rc->id || strcmp(rc->name, family->name)) | ||
| 280 | continue; | ||
| 281 | |||
| 282 | list_del(&rc->family_list); | ||
| 283 | INIT_LIST_HEAD(&family->ops_list); | ||
| 284 | genl_unlock(); | ||
| 285 | |||
| 286 | module_put(family->owner); | ||
| 287 | kfree(family->attrbuf); | ||
| 288 | genl_ctrl_event(CTRL_CMD_DELFAMILY, family); | ||
| 289 | return 0; | ||
| 290 | } | ||
| 291 | |||
| 292 | genl_unlock(); | ||
| 293 | |||
| 294 | return -ENOENT; | ||
| 295 | } | ||
| 296 | |||
| 297 | static inline int genl_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh, | ||
| 298 | int *errp) | ||
| 299 | { | ||
| 300 | struct genl_ops *ops; | ||
| 301 | struct genl_family *family; | ||
| 302 | struct genl_info info; | ||
| 303 | struct genlmsghdr *hdr = nlmsg_data(nlh); | ||
| 304 | int hdrlen, err = -EINVAL; | ||
| 305 | |||
| 306 | if (!(nlh->nlmsg_flags & NLM_F_REQUEST)) | ||
| 307 | goto ignore; | ||
| 308 | |||
| 309 | if (nlh->nlmsg_type < NLMSG_MIN_TYPE) | ||
| 310 | goto ignore; | ||
| 311 | |||
| 312 | family = genl_family_find_byid(nlh->nlmsg_type); | ||
| 313 | if (family == NULL) { | ||
| 314 | err = -ENOENT; | ||
| 315 | goto errout; | ||
| 316 | } | ||
| 317 | |||
| 318 | hdrlen = GENL_HDRLEN + family->hdrsize; | ||
| 319 | if (nlh->nlmsg_len < nlmsg_msg_size(hdrlen)) | ||
| 320 | goto errout; | ||
| 321 | |||
| 322 | ops = genl_get_cmd(hdr->cmd, family); | ||
| 323 | if (ops == NULL) { | ||
| 324 | err = -EOPNOTSUPP; | ||
| 325 | goto errout; | ||
| 326 | } | ||
| 327 | |||
| 328 | if ((ops->flags & GENL_ADMIN_PERM) && security_netlink_recv(skb)) { | ||
| 329 | err = -EPERM; | ||
| 330 | goto errout; | ||
| 331 | } | ||
| 332 | |||
| 333 | if (nlh->nlmsg_flags & NLM_F_DUMP) { | ||
| 334 | if (ops->dumpit == NULL) { | ||
| 335 | err = -EOPNOTSUPP; | ||
| 336 | goto errout; | ||
| 337 | } | ||
| 338 | |||
| 339 | *errp = err = netlink_dump_start(genl_sock, skb, nlh, | ||
| 340 | ops->dumpit, NULL); | ||
| 341 | if (err == 0) | ||
| 342 | skb_pull(skb, min(NLMSG_ALIGN(nlh->nlmsg_len), | ||
| 343 | skb->len)); | ||
| 344 | return -1; | ||
| 345 | } | ||
| 346 | |||
| 347 | if (ops->doit == NULL) { | ||
| 348 | err = -EOPNOTSUPP; | ||
| 349 | goto errout; | ||
| 350 | } | ||
| 351 | |||
| 352 | if (family->attrbuf) { | ||
| 353 | err = nlmsg_parse(nlh, hdrlen, family->attrbuf, family->maxattr, | ||
| 354 | ops->policy); | ||
| 355 | if (err < 0) | ||
| 356 | goto errout; | ||
| 357 | } | ||
| 358 | |||
| 359 | info.snd_seq = nlh->nlmsg_seq; | ||
| 360 | info.snd_pid = NETLINK_CB(skb).pid; | ||
| 361 | info.nlhdr = nlh; | ||
| 362 | info.genlhdr = nlmsg_data(nlh); | ||
| 363 | info.userhdr = nlmsg_data(nlh) + GENL_HDRLEN; | ||
| 364 | info.attrs = family->attrbuf; | ||
| 365 | |||
| 366 | *errp = err = ops->doit(skb, &info); | ||
| 367 | return err; | ||
| 368 | |||
| 369 | ignore: | ||
| 370 | return 0; | ||
| 371 | |||
| 372 | errout: | ||
| 373 | *errp = err; | ||
| 374 | return -1; | ||
| 375 | } | ||
| 376 | |||
| 377 | static void genl_rcv(struct sock *sk, int len) | ||
| 378 | { | ||
| 379 | unsigned int qlen = 0; | ||
| 380 | |||
| 381 | do { | ||
| 382 | if (genl_trylock()) | ||
| 383 | return; | ||
| 384 | netlink_run_queue(sk, &qlen, &genl_rcv_msg); | ||
| 385 | genl_unlock(); | ||
| 386 | } while (qlen && genl_sock && genl_sock->sk_receive_queue.qlen); | ||
| 387 | } | ||
| 388 | |||
| 389 | /************************************************************************** | ||
| 390 | * Controller | ||
| 391 | **************************************************************************/ | ||
| 392 | |||
| 393 | static int ctrl_fill_info(struct genl_family *family, u32 pid, u32 seq, | ||
| 394 | u32 flags, struct sk_buff *skb, u8 cmd) | ||
| 395 | { | ||
| 396 | void *hdr; | ||
| 397 | |||
| 398 | hdr = genlmsg_put(skb, pid, seq, GENL_ID_CTRL, 0, flags, cmd, | ||
| 399 | family->version); | ||
| 400 | if (hdr == NULL) | ||
| 401 | return -1; | ||
| 402 | |||
| 403 | NLA_PUT_STRING(skb, CTRL_ATTR_FAMILY_NAME, family->name); | ||
| 404 | NLA_PUT_U16(skb, CTRL_ATTR_FAMILY_ID, family->id); | ||
| 405 | |||
| 406 | return genlmsg_end(skb, hdr); | ||
| 407 | |||
| 408 | nla_put_failure: | ||
| 409 | return genlmsg_cancel(skb, hdr); | ||
| 410 | } | ||
| 411 | |||
| 412 | static int ctrl_dumpfamily(struct sk_buff *skb, struct netlink_callback *cb) | ||
| 413 | { | ||
| 414 | |||
| 415 | int i, n = 0; | ||
| 416 | struct genl_family *rt; | ||
| 417 | int chains_to_skip = cb->args[0]; | ||
| 418 | int fams_to_skip = cb->args[1]; | ||
| 419 | |||
| 420 | for (i = 0; i < GENL_FAM_TAB_SIZE; i++) { | ||
| 421 | if (i < chains_to_skip) | ||
| 422 | continue; | ||
| 423 | n = 0; | ||
| 424 | list_for_each_entry(rt, genl_family_chain(i), family_list) { | ||
| 425 | if (++n < fams_to_skip) | ||
| 426 | continue; | ||
| 427 | if (ctrl_fill_info(rt, NETLINK_CB(cb->skb).pid, | ||
| 428 | cb->nlh->nlmsg_seq, NLM_F_MULTI, | ||
| 429 | skb, CTRL_CMD_NEWFAMILY) < 0) | ||
| 430 | goto errout; | ||
| 431 | } | ||
| 432 | |||
| 433 | fams_to_skip = 0; | ||
| 434 | } | ||
| 435 | |||
| 436 | errout: | ||
| 437 | cb->args[0] = i; | ||
| 438 | cb->args[1] = n; | ||
| 439 | |||
| 440 | return skb->len; | ||
| 441 | } | ||
| 442 | |||
| 443 | static struct sk_buff *ctrl_build_msg(struct genl_family *family, u32 pid, | ||
| 444 | int seq, int cmd) | ||
| 445 | { | ||
| 446 | struct sk_buff *skb; | ||
| 447 | int err; | ||
| 448 | |||
| 449 | skb = nlmsg_new(NLMSG_GOODSIZE); | ||
| 450 | if (skb == NULL) | ||
| 451 | return ERR_PTR(-ENOBUFS); | ||
| 452 | |||
| 453 | err = ctrl_fill_info(family, pid, seq, 0, skb, cmd); | ||
| 454 | if (err < 0) { | ||
| 455 | nlmsg_free(skb); | ||
| 456 | return ERR_PTR(err); | ||
| 457 | } | ||
| 458 | |||
| 459 | return skb; | ||
| 460 | } | ||
| 461 | |||
| 462 | static struct nla_policy ctrl_policy[CTRL_ATTR_MAX+1] __read_mostly = { | ||
| 463 | [CTRL_ATTR_FAMILY_ID] = { .type = NLA_U16 }, | ||
| 464 | [CTRL_ATTR_FAMILY_NAME] = { .type = NLA_STRING }, | ||
| 465 | }; | ||
| 466 | |||
| 467 | static int ctrl_getfamily(struct sk_buff *skb, struct genl_info *info) | ||
| 468 | { | ||
| 469 | struct sk_buff *msg; | ||
| 470 | struct genl_family *res = NULL; | ||
| 471 | int err = -EINVAL; | ||
| 472 | |||
| 473 | if (info->attrs[CTRL_ATTR_FAMILY_ID]) { | ||
| 474 | u16 id = nla_get_u16(info->attrs[CTRL_ATTR_FAMILY_ID]); | ||
| 475 | res = genl_family_find_byid(id); | ||
| 476 | } | ||
| 477 | |||
| 478 | if (info->attrs[CTRL_ATTR_FAMILY_NAME]) { | ||
| 479 | char name[GENL_NAMSIZ]; | ||
| 480 | |||
| 481 | if (nla_strlcpy(name, info->attrs[CTRL_ATTR_FAMILY_NAME], | ||
| 482 | GENL_NAMSIZ) >= GENL_NAMSIZ) | ||
| 483 | goto errout; | ||
| 484 | |||
| 485 | res = genl_family_find_byname(name); | ||
| 486 | } | ||
| 487 | |||
| 488 | if (res == NULL) { | ||
| 489 | err = -ENOENT; | ||
| 490 | goto errout; | ||
| 491 | } | ||
| 492 | |||
| 493 | msg = ctrl_build_msg(res, info->snd_pid, info->snd_seq, | ||
| 494 | CTRL_CMD_NEWFAMILY); | ||
| 495 | if (IS_ERR(msg)) { | ||
| 496 | err = PTR_ERR(msg); | ||
| 497 | goto errout; | ||
| 498 | } | ||
| 499 | |||
| 500 | err = genlmsg_unicast(msg, info->snd_pid); | ||
| 501 | errout: | ||
| 502 | return err; | ||
| 503 | } | ||
| 504 | |||
| 505 | static int genl_ctrl_event(int event, void *data) | ||
| 506 | { | ||
| 507 | struct sk_buff *msg; | ||
| 508 | |||
| 509 | if (genl_sock == NULL) | ||
| 510 | return 0; | ||
| 511 | |||
| 512 | switch (event) { | ||
| 513 | case CTRL_CMD_NEWFAMILY: | ||
| 514 | case CTRL_CMD_DELFAMILY: | ||
| 515 | msg = ctrl_build_msg(data, 0, 0, event); | ||
| 516 | if (IS_ERR(msg)) | ||
| 517 | return PTR_ERR(msg); | ||
| 518 | |||
| 519 | genlmsg_multicast(msg, 0, GENL_ID_CTRL); | ||
| 520 | break; | ||
| 521 | } | ||
| 522 | |||
| 523 | return 0; | ||
| 524 | } | ||
| 525 | |||
| 526 | static struct genl_ops genl_ctrl_ops = { | ||
| 527 | .cmd = CTRL_CMD_GETFAMILY, | ||
| 528 | .doit = ctrl_getfamily, | ||
| 529 | .dumpit = ctrl_dumpfamily, | ||
| 530 | .policy = ctrl_policy, | ||
| 531 | }; | ||
| 532 | |||
| 533 | static struct genl_family genl_ctrl = { | ||
| 534 | .id = GENL_ID_CTRL, | ||
| 535 | .name = "nlctrl", | ||
| 536 | .version = 0x1, | ||
| 537 | .maxattr = CTRL_ATTR_MAX, | ||
| 538 | .owner = THIS_MODULE, | ||
| 539 | }; | ||
| 540 | |||
| 541 | static int __init genl_init(void) | ||
| 542 | { | ||
| 543 | int i, err; | ||
| 544 | |||
| 545 | for (i = 0; i < GENL_FAM_TAB_SIZE; i++) | ||
| 546 | INIT_LIST_HEAD(&family_ht[i]); | ||
| 547 | |||
| 548 | err = genl_register_family(&genl_ctrl); | ||
| 549 | if (err < 0) | ||
| 550 | goto errout; | ||
| 551 | |||
| 552 | err = genl_register_ops(&genl_ctrl, &genl_ctrl_ops); | ||
| 553 | if (err < 0) | ||
| 554 | goto errout_register; | ||
| 555 | |||
| 556 | netlink_set_nonroot(NETLINK_GENERIC, NL_NONROOT_RECV); | ||
| 557 | genl_sock = netlink_kernel_create(NETLINK_GENERIC, GENL_MAX_ID, | ||
| 558 | genl_rcv, THIS_MODULE); | ||
| 559 | if (genl_sock == NULL) { | ||
| 560 | panic("GENL: Cannot initialize generic netlink\n"); | ||
| 561 | return -ENOMEM; | ||
| 562 | } | ||
| 563 | |||
| 564 | return 0; | ||
| 565 | |||
| 566 | errout_register: | ||
| 567 | genl_unregister_family(&genl_ctrl); | ||
| 568 | errout: | ||
| 569 | panic("GENL: Cannot register controller: %d\n", err); | ||
| 570 | return err; | ||
| 571 | } | ||
| 572 | |||
| 573 | subsys_initcall(genl_init); | ||
| 574 | |||
| 575 | EXPORT_SYMBOL(genl_sock); | ||
| 576 | EXPORT_SYMBOL(genl_register_ops); | ||
| 577 | EXPORT_SYMBOL(genl_unregister_ops); | ||
| 578 | EXPORT_SYMBOL(genl_register_family); | ||
| 579 | EXPORT_SYMBOL(genl_unregister_family); | ||
diff --git a/net/rose/rose_route.c b/net/rose/rose_route.c index b18fe5043019..8631b65a7312 100644 --- a/net/rose/rose_route.c +++ b/net/rose/rose_route.c  | |||
| @@ -240,8 +240,7 @@ static void rose_remove_neigh(struct rose_neigh *rose_neigh) | |||
| 240 | if ((s = rose_neigh_list) == rose_neigh) { | 240 | if ((s = rose_neigh_list) == rose_neigh) { | 
| 241 | rose_neigh_list = rose_neigh->next; | 241 | rose_neigh_list = rose_neigh->next; | 
| 242 | spin_unlock_bh(&rose_neigh_list_lock); | 242 | spin_unlock_bh(&rose_neigh_list_lock); | 
| 243 | if (rose_neigh->digipeat != NULL) | 243 | kfree(rose_neigh->digipeat); | 
| 244 | kfree(rose_neigh->digipeat); | ||
| 245 | kfree(rose_neigh); | 244 | kfree(rose_neigh); | 
| 246 | return; | 245 | return; | 
| 247 | } | 246 | } | 
| @@ -250,8 +249,7 @@ static void rose_remove_neigh(struct rose_neigh *rose_neigh) | |||
| 250 | if (s->next == rose_neigh) { | 249 | if (s->next == rose_neigh) { | 
| 251 | s->next = rose_neigh->next; | 250 | s->next = rose_neigh->next; | 
| 252 | spin_unlock_bh(&rose_neigh_list_lock); | 251 | spin_unlock_bh(&rose_neigh_list_lock); | 
| 253 | if (rose_neigh->digipeat != NULL) | 252 | kfree(rose_neigh->digipeat); | 
| 254 | kfree(rose_neigh->digipeat); | ||
| 255 | kfree(rose_neigh); | 253 | kfree(rose_neigh); | 
| 256 | return; | 254 | return; | 
| 257 | } | 255 | } | 
diff --git a/net/sched/cls_fw.c b/net/sched/cls_fw.c index 29d8b9a4d162..75470486e405 100644 --- a/net/sched/cls_fw.c +++ b/net/sched/cls_fw.c  | |||
| @@ -298,8 +298,7 @@ static int fw_change(struct tcf_proto *tp, unsigned long base, | |||
| 298 | return 0; | 298 | return 0; | 
| 299 | 299 | ||
| 300 | errout: | 300 | errout: | 
| 301 | if (f) | 301 | kfree(f); | 
| 302 | kfree(f); | ||
| 303 | return err; | 302 | return err; | 
| 304 | } | 303 | } | 
| 305 | 304 | ||
diff --git a/net/sched/cls_route.c b/net/sched/cls_route.c index 02996ac05c75..520ff716dab2 100644 --- a/net/sched/cls_route.c +++ b/net/sched/cls_route.c  | |||
| @@ -525,8 +525,7 @@ reinsert: | |||
| 525 | return 0; | 525 | return 0; | 
| 526 | 526 | ||
| 527 | errout: | 527 | errout: | 
| 528 | if (f) | 528 | kfree(f); | 
| 529 | kfree(f); | ||
| 530 | return err; | 529 | return err; | 
| 531 | } | 530 | } | 
| 532 | 531 | ||
diff --git a/net/sched/cls_rsvp.h b/net/sched/cls_rsvp.h index 006168d69376..572f06be3b02 100644 --- a/net/sched/cls_rsvp.h +++ b/net/sched/cls_rsvp.h  | |||
| @@ -555,8 +555,7 @@ insert: | |||
| 555 | goto insert; | 555 | goto insert; | 
| 556 | 556 | ||
| 557 | errout: | 557 | errout: | 
| 558 | if (f) | 558 | kfree(f); | 
| 559 | kfree(f); | ||
| 560 | errout2: | 559 | errout2: | 
| 561 | tcf_exts_destroy(tp, &e); | 560 | tcf_exts_destroy(tp, &e); | 
| 562 | return err; | 561 | return err; | 
diff --git a/net/sched/cls_tcindex.c b/net/sched/cls_tcindex.c index 404d9d83a7fa..9f921174c8ab 100644 --- a/net/sched/cls_tcindex.c +++ b/net/sched/cls_tcindex.c  | |||
| @@ -194,8 +194,7 @@ found: | |||
| 194 | } | 194 | } | 
| 195 | tcf_unbind_filter(tp, &r->res); | 195 | tcf_unbind_filter(tp, &r->res); | 
| 196 | tcf_exts_destroy(tp, &r->exts); | 196 | tcf_exts_destroy(tp, &r->exts); | 
| 197 | if (f) | 197 | kfree(f); | 
| 198 | kfree(f); | ||
| 199 | return 0; | 198 | return 0; | 
| 200 | } | 199 | } | 
| 201 | 200 | ||
| @@ -442,10 +441,8 @@ static void tcindex_destroy(struct tcf_proto *tp) | |||
| 442 | walker.skip = 0; | 441 | walker.skip = 0; | 
| 443 | walker.fn = &tcindex_destroy_element; | 442 | walker.fn = &tcindex_destroy_element; | 
| 444 | tcindex_walk(tp,&walker); | 443 | tcindex_walk(tp,&walker); | 
| 445 | if (p->perfect) | 444 | kfree(p->perfect); | 
| 446 | kfree(p->perfect); | 445 | kfree(p->h); | 
| 447 | if (p->h) | ||
| 448 | kfree(p->h); | ||
| 449 | kfree(p); | 446 | kfree(p); | 
| 450 | tp->root = NULL; | 447 | tp->root = NULL; | 
| 451 | } | 448 | } | 
diff --git a/net/sched/cls_u32.c b/net/sched/cls_u32.c index 364b87d86455..2b670479dde1 100644 --- a/net/sched/cls_u32.c +++ b/net/sched/cls_u32.c  | |||
| @@ -347,7 +347,7 @@ static int u32_destroy_key(struct tcf_proto *tp, struct tc_u_knode *n) | |||
| 347 | if (n->ht_down) | 347 | if (n->ht_down) | 
| 348 | n->ht_down->refcnt--; | 348 | n->ht_down->refcnt--; | 
| 349 | #ifdef CONFIG_CLS_U32_PERF | 349 | #ifdef CONFIG_CLS_U32_PERF | 
| 350 | if (n && (NULL != n->pf)) | 350 | if (n) | 
| 351 | kfree(n->pf); | 351 | kfree(n->pf); | 
| 352 | #endif | 352 | #endif | 
| 353 | kfree(n); | 353 | kfree(n); | 
| @@ -680,7 +680,7 @@ static int u32_change(struct tcf_proto *tp, unsigned long base, u32 handle, | |||
| 680 | return 0; | 680 | return 0; | 
| 681 | } | 681 | } | 
| 682 | #ifdef CONFIG_CLS_U32_PERF | 682 | #ifdef CONFIG_CLS_U32_PERF | 
| 683 | if (n && (NULL != n->pf)) | 683 | if (n) | 
| 684 | kfree(n->pf); | 684 | kfree(n->pf); | 
| 685 | #endif | 685 | #endif | 
| 686 | kfree(n); | 686 | kfree(n); | 
diff --git a/net/sched/em_meta.c b/net/sched/em_meta.c index cf68a59fdc5a..700844d49d79 100644 --- a/net/sched/em_meta.c +++ b/net/sched/em_meta.c  | |||
| @@ -561,8 +561,7 @@ static int meta_var_change(struct meta_value *dst, struct rtattr *rta) | |||
| 561 | 561 | ||
| 562 | static void meta_var_destroy(struct meta_value *v) | 562 | static void meta_var_destroy(struct meta_value *v) | 
| 563 | { | 563 | { | 
| 564 | if (v->val) | 564 | kfree((void *) v->val); | 
| 565 | kfree((void *) v->val); | ||
| 566 | } | 565 | } | 
| 567 | 566 | ||
| 568 | static void meta_var_apply_extras(struct meta_value *v, | 567 | static void meta_var_apply_extras(struct meta_value *v, | 
diff --git a/net/sched/ematch.c b/net/sched/ematch.c index ebfe2e7d21bd..64b047c65568 100644 --- a/net/sched/ematch.c +++ b/net/sched/ematch.c  | |||
| @@ -298,6 +298,11 @@ int tcf_em_tree_validate(struct tcf_proto *tp, struct rtattr *rta, | |||
| 298 | struct tcf_ematch_tree_hdr *tree_hdr; | 298 | struct tcf_ematch_tree_hdr *tree_hdr; | 
| 299 | struct tcf_ematch *em; | 299 | struct tcf_ematch *em; | 
| 300 | 300 | ||
| 301 | if (!rta) { | ||
| 302 | memset(tree, 0, sizeof(*tree)); | ||
| 303 | return 0; | ||
| 304 | } | ||
| 305 | |||
| 301 | if (rtattr_parse_nested(tb, TCA_EMATCH_TREE_MAX, rta) < 0) | 306 | if (rtattr_parse_nested(tb, TCA_EMATCH_TREE_MAX, rta) < 0) | 
| 302 | goto errout; | 307 | goto errout; | 
| 303 | 308 | ||
diff --git a/net/sched/sch_gred.c b/net/sched/sch_gred.c index 25c171c32715..29a2dd9f3029 100644 --- a/net/sched/sch_gred.c +++ b/net/sched/sch_gred.c  | |||
| @@ -15,247 +15,281 @@ | |||
| 15 | * from Ren Liu | 15 | * from Ren Liu | 
| 16 | * - More error checks | 16 | * - More error checks | 
| 17 | * | 17 | * | 
| 18 | * | 18 | * For all the glorious comments look at include/net/red.h | 
| 19 | * | ||
| 20 | * For all the glorious comments look at Alexey's sch_red.c | ||
| 21 | */ | 19 | */ | 
| 22 | 20 | ||
| 23 | #include <linux/config.h> | 21 | #include <linux/config.h> | 
| 24 | #include <linux/module.h> | 22 | #include <linux/module.h> | 
| 25 | #include <asm/uaccess.h> | ||
| 26 | #include <asm/system.h> | ||
| 27 | #include <linux/bitops.h> | ||
| 28 | #include <linux/types.h> | 23 | #include <linux/types.h> | 
| 29 | #include <linux/kernel.h> | 24 | #include <linux/kernel.h> | 
| 30 | #include <linux/sched.h> | ||
| 31 | #include <linux/string.h> | ||
| 32 | #include <linux/mm.h> | ||
| 33 | #include <linux/socket.h> | ||
| 34 | #include <linux/sockios.h> | ||
| 35 | #include <linux/in.h> | ||
| 36 | #include <linux/errno.h> | ||
| 37 | #include <linux/interrupt.h> | ||
| 38 | #include <linux/if_ether.h> | ||
| 39 | #include <linux/inet.h> | ||
| 40 | #include <linux/netdevice.h> | 25 | #include <linux/netdevice.h> | 
| 41 | #include <linux/etherdevice.h> | ||
| 42 | #include <linux/notifier.h> | ||
| 43 | #include <net/ip.h> | ||
| 44 | #include <net/route.h> | ||
| 45 | #include <linux/skbuff.h> | 26 | #include <linux/skbuff.h> | 
| 46 | #include <net/sock.h> | ||
| 47 | #include <net/pkt_sched.h> | 27 | #include <net/pkt_sched.h> | 
| 28 | #include <net/red.h> | ||
| 48 | 29 | ||
| 49 | #if 1 /* control */ | 30 | #define GRED_DEF_PRIO (MAX_DPs / 2) | 
| 50 | #define DPRINTK(format,args...) printk(KERN_DEBUG format,##args) | 31 | #define GRED_VQ_MASK (MAX_DPs - 1) | 
| 51 | #else | ||
| 52 | #define DPRINTK(format,args...) | ||
| 53 | #endif | ||
| 54 | |||
| 55 | #if 0 /* data */ | ||
| 56 | #define D2PRINTK(format,args...) printk(KERN_DEBUG format,##args) | ||
| 57 | #else | ||
| 58 | #define D2PRINTK(format,args...) | ||
| 59 | #endif | ||
| 60 | 32 | ||
| 61 | struct gred_sched_data; | 33 | struct gred_sched_data; | 
| 62 | struct gred_sched; | 34 | struct gred_sched; | 
| 63 | 35 | ||
| 64 | struct gred_sched_data | 36 | struct gred_sched_data | 
| 65 | { | 37 | { | 
| 66 | /* Parameters */ | ||
| 67 | u32 limit; /* HARD maximal queue length */ | 38 | u32 limit; /* HARD maximal queue length */ | 
| 68 | u32 qth_min; /* Min average length threshold: A scaled */ | ||
| 69 | u32 qth_max; /* Max average length threshold: A scaled */ | ||
| 70 | u32 DP; /* the drop pramaters */ | 39 | u32 DP; /* the drop pramaters */ | 
| 71 | char Wlog; /* log(W) */ | ||
| 72 | char Plog; /* random number bits */ | ||
| 73 | u32 Scell_max; | ||
| 74 | u32 Rmask; | ||
| 75 | u32 bytesin; /* bytes seen on virtualQ so far*/ | 40 | u32 bytesin; /* bytes seen on virtualQ so far*/ | 
| 76 | u32 packetsin; /* packets seen on virtualQ so far*/ | 41 | u32 packetsin; /* packets seen on virtualQ so far*/ | 
| 77 | u32 backlog; /* bytes on the virtualQ */ | 42 | u32 backlog; /* bytes on the virtualQ */ | 
| 78 | u32 forced; /* packets dropped for exceeding limits */ | 43 | u8 prio; /* the prio of this vq */ | 
| 79 | u32 early; /* packets dropped as a warning */ | 44 | |
| 80 | u32 other; /* packets dropped by invoking drop() */ | 45 | struct red_parms parms; | 
| 81 | u32 pdrop; /* packets dropped because we exceeded physical queue limits */ | 46 | struct red_stats stats; | 
| 82 | char Scell_log; | 47 | }; | 
| 83 | u8 Stab[256]; | 48 | |
| 84 | u8 prio; /* the prio of this vq */ | 49 | enum { | 
| 85 | 50 | GRED_WRED_MODE = 1, | |
| 86 | /* Variables */ | 51 | GRED_RIO_MODE, | 
| 87 | unsigned long qave; /* Average queue length: A scaled */ | ||
| 88 | int qcount; /* Packets since last random number generation */ | ||
| 89 | u32 qR; /* Cached random number */ | ||
| 90 | |||
| 91 | psched_time_t qidlestart; /* Start of idle period */ | ||
| 92 | }; | 52 | }; | 
| 93 | 53 | ||
| 94 | struct gred_sched | 54 | struct gred_sched | 
| 95 | { | 55 | { | 
| 96 | struct gred_sched_data *tab[MAX_DPs]; | 56 | struct gred_sched_data *tab[MAX_DPs]; | 
| 97 | u32 DPs; | 57 | unsigned long flags; | 
| 98 | u32 def; | 58 | u32 red_flags; | 
| 99 | u8 initd; | 59 | u32 DPs; | 
| 100 | u8 grio; | 60 | u32 def; | 
| 101 | u8 eqp; | 61 | struct red_parms wred_set; | 
| 102 | }; | 62 | }; | 
| 103 | 63 | ||
| 104 | static int | 64 | static inline int gred_wred_mode(struct gred_sched *table) | 
| 105 | gred_enqueue(struct sk_buff *skb, struct Qdisc* sch) | ||
| 106 | { | 65 | { | 
| 107 | psched_time_t now; | 66 | return test_bit(GRED_WRED_MODE, &table->flags); | 
| 108 | struct gred_sched_data *q=NULL; | 67 | } | 
| 109 | struct gred_sched *t= qdisc_priv(sch); | 68 | |
| 110 | unsigned long qave=0; | 69 | static inline void gred_enable_wred_mode(struct gred_sched *table) | 
| 111 | int i=0; | 70 | { | 
| 71 | __set_bit(GRED_WRED_MODE, &table->flags); | ||
| 72 | } | ||
| 73 | |||
| 74 | static inline void gred_disable_wred_mode(struct gred_sched *table) | ||
| 75 | { | ||
| 76 | __clear_bit(GRED_WRED_MODE, &table->flags); | ||
| 77 | } | ||
| 78 | |||
| 79 | static inline int gred_rio_mode(struct gred_sched *table) | ||
| 80 | { | ||
| 81 | return test_bit(GRED_RIO_MODE, &table->flags); | ||
| 82 | } | ||
| 83 | |||
| 84 | static inline void gred_enable_rio_mode(struct gred_sched *table) | ||
| 85 | { | ||
| 86 | __set_bit(GRED_RIO_MODE, &table->flags); | ||
| 87 | } | ||
| 88 | |||
| 89 | static inline void gred_disable_rio_mode(struct gred_sched *table) | ||
| 90 | { | ||
| 91 | __clear_bit(GRED_RIO_MODE, &table->flags); | ||
| 92 | } | ||
| 93 | |||
| 94 | static inline int gred_wred_mode_check(struct Qdisc *sch) | ||
| 95 | { | ||
| 96 | struct gred_sched *table = qdisc_priv(sch); | ||
| 97 | int i; | ||
| 112 | 98 | ||
| 113 | if (!t->initd && skb_queue_len(&sch->q) < (sch->dev->tx_queue_len ? : 1)) { | 99 | /* Really ugly O(n^2) but shouldn't be necessary too frequent. */ | 
| 114 | D2PRINTK("NO GRED Queues setup yet! Enqueued anyway\n"); | 100 | for (i = 0; i < table->DPs; i++) { | 
| 115 | goto do_enqueue; | 101 | struct gred_sched_data *q = table->tab[i]; | 
| 102 | int n; | ||
| 103 | |||
| 104 | if (q == NULL) | ||
| 105 | continue; | ||
| 106 | |||
| 107 | for (n = 0; n < table->DPs; n++) | ||
| 108 | if (table->tab[n] && table->tab[n] != q && | ||
| 109 | table->tab[n]->prio == q->prio) | ||
| 110 | return 1; | ||
| 116 | } | 111 | } | 
| 117 | 112 | ||
| 113 | return 0; | ||
| 114 | } | ||
| 115 | |||
| 116 | static inline unsigned int gred_backlog(struct gred_sched *table, | ||
| 117 | struct gred_sched_data *q, | ||
| 118 | struct Qdisc *sch) | ||
| 119 | { | ||
| 120 | if (gred_wred_mode(table)) | ||
| 121 | return sch->qstats.backlog; | ||
| 122 | else | ||
| 123 | return q->backlog; | ||
| 124 | } | ||
| 125 | |||
| 126 | static inline u16 tc_index_to_dp(struct sk_buff *skb) | ||
| 127 | { | ||
| 128 | return skb->tc_index & GRED_VQ_MASK; | ||
| 129 | } | ||
| 130 | |||
| 131 | static inline void gred_load_wred_set(struct gred_sched *table, | ||
| 132 | struct gred_sched_data *q) | ||
| 133 | { | ||
| 134 | q->parms.qavg = table->wred_set.qavg; | ||
| 135 | q->parms.qidlestart = table->wred_set.qidlestart; | ||
| 136 | } | ||
| 137 | |||
| 138 | static inline void gred_store_wred_set(struct gred_sched *table, | ||
| 139 | struct gred_sched_data *q) | ||
| 140 | { | ||
| 141 | table->wred_set.qavg = q->parms.qavg; | ||
| 142 | } | ||
| 143 | |||
| 144 | static inline int gred_use_ecn(struct gred_sched *t) | ||
| 145 | { | ||
| 146 | return t->red_flags & TC_RED_ECN; | ||
| 147 | } | ||
| 118 | 148 | ||
| 119 | if ( ((skb->tc_index&0xf) > (t->DPs -1)) || !(q=t->tab[skb->tc_index&0xf])) { | 149 | static inline int gred_use_harddrop(struct gred_sched *t) | 
| 120 | printk("GRED: setting to default (%d)\n ",t->def); | 150 | { | 
| 121 | if (!(q=t->tab[t->def])) { | 151 | return t->red_flags & TC_RED_HARDDROP; | 
| 122 | DPRINTK("GRED: setting to default FAILED! dropping!! " | 152 | } | 
| 123 | "(%d)\n ", t->def); | 153 | |
| 124 | goto drop; | 154 | static int gred_enqueue(struct sk_buff *skb, struct Qdisc* sch) | 
| 155 | { | ||
| 156 | struct gred_sched_data *q=NULL; | ||
| 157 | struct gred_sched *t= qdisc_priv(sch); | ||
| 158 | unsigned long qavg = 0; | ||
| 159 | u16 dp = tc_index_to_dp(skb); | ||
| 160 | |||
| 161 | if (dp >= t->DPs || (q = t->tab[dp]) == NULL) { | ||
| 162 | dp = t->def; | ||
| 163 | |||
| 164 | if ((q = t->tab[dp]) == NULL) { | ||
| 165 | /* Pass through packets not assigned to a DP | ||
| 166 | * if no default DP has been configured. This | ||
| 167 | * allows for DP flows to be left untouched. | ||
| 168 | */ | ||
| 169 | if (skb_queue_len(&sch->q) < sch->dev->tx_queue_len) | ||
| 170 | return qdisc_enqueue_tail(skb, sch); | ||
| 171 | else | ||
| 172 | goto drop; | ||
| 125 | } | 173 | } | 
| 174 | |||
| 126 | /* fix tc_index? --could be controvesial but needed for | 175 | /* fix tc_index? --could be controvesial but needed for | 
| 127 | requeueing */ | 176 | requeueing */ | 
| 128 | skb->tc_index=(skb->tc_index&0xfffffff0) | t->def; | 177 | skb->tc_index = (skb->tc_index & ~GRED_VQ_MASK) | dp; | 
| 129 | } | 178 | } | 
| 130 | 179 | ||
| 131 | D2PRINTK("gred_enqueue virtualQ 0x%x classid %x backlog %d " | 180 | /* sum up all the qaves of prios <= to ours to get the new qave */ | 
| 132 | "general backlog %d\n",skb->tc_index&0xf,sch->handle,q->backlog, | 181 | if (!gred_wred_mode(t) && gred_rio_mode(t)) { | 
| 133 | sch->qstats.backlog); | 182 | int i; | 
| 134 | /* sum up all the qaves of prios <= to ours to get the new qave*/ | 183 | |
| 135 | if (!t->eqp && t->grio) { | 184 | for (i = 0; i < t->DPs; i++) { | 
| 136 | for (i=0;i<t->DPs;i++) { | 185 | if (t->tab[i] && t->tab[i]->prio < q->prio && | 
| 137 | if ((!t->tab[i]) || (i==q->DP)) | 186 | !red_is_idling(&t->tab[i]->parms)) | 
| 138 | continue; | 187 | qavg +=t->tab[i]->parms.qavg; | 
| 139 | |||
| 140 | if ((t->tab[i]->prio < q->prio) && (PSCHED_IS_PASTPERFECT(t->tab[i]->qidlestart))) | ||
| 141 | qave +=t->tab[i]->qave; | ||
| 142 | } | 188 | } | 
| 143 | 189 | ||
| 144 | } | 190 | } | 
| 145 | 191 | ||
| 146 | q->packetsin++; | 192 | q->packetsin++; | 
| 147 | q->bytesin+=skb->len; | 193 | q->bytesin += skb->len; | 
| 148 | 194 | ||
| 149 | if (t->eqp && t->grio) { | 195 | if (gred_wred_mode(t)) | 
| 150 | qave=0; | 196 | gred_load_wred_set(t, q); | 
| 151 | q->qave=t->tab[t->def]->qave; | ||
| 152 | q->qidlestart=t->tab[t->def]->qidlestart; | ||
| 153 | } | ||
| 154 | 197 | ||
| 155 | if (!PSCHED_IS_PASTPERFECT(q->qidlestart)) { | 198 | q->parms.qavg = red_calc_qavg(&q->parms, gred_backlog(t, q, sch)); | 
| 156 | long us_idle; | ||
| 157 | PSCHED_GET_TIME(now); | ||
| 158 | us_idle = PSCHED_TDIFF_SAFE(now, q->qidlestart, q->Scell_max); | ||
| 159 | PSCHED_SET_PASTPERFECT(q->qidlestart); | ||
| 160 | 199 | ||
| 161 | q->qave >>= q->Stab[(us_idle>>q->Scell_log)&0xFF]; | 200 | if (red_is_idling(&q->parms)) | 
| 162 | } else { | 201 | red_end_of_idle_period(&q->parms); | 
| 163 | if (t->eqp) { | ||
| 164 | q->qave += sch->qstats.backlog - (q->qave >> q->Wlog); | ||
| 165 | } else { | ||
| 166 | q->qave += q->backlog - (q->qave >> q->Wlog); | ||
| 167 | } | ||
| 168 | 202 | ||
| 169 | } | 203 | if (gred_wred_mode(t)) | 
| 170 | 204 | gred_store_wred_set(t, q); | |
| 171 | |||
| 172 | if (t->eqp && t->grio) | ||
| 173 | t->tab[t->def]->qave=q->qave; | ||
| 174 | |||
| 175 | if ((q->qave+qave) < q->qth_min) { | ||
| 176 | q->qcount = -1; | ||
| 177 | enqueue: | ||
| 178 | if (q->backlog + skb->len <= q->limit) { | ||
| 179 | q->backlog += skb->len; | ||
| 180 | do_enqueue: | ||
| 181 | __skb_queue_tail(&sch->q, skb); | ||
| 182 | sch->qstats.backlog += skb->len; | ||
| 183 | sch->bstats.bytes += skb->len; | ||
| 184 | sch->bstats.packets++; | ||
| 185 | return 0; | ||
| 186 | } else { | ||
| 187 | q->pdrop++; | ||
| 188 | } | ||
| 189 | 205 | ||
| 190 | drop: | 206 | switch (red_action(&q->parms, q->parms.qavg + qavg)) { | 
| 191 | kfree_skb(skb); | 207 | case RED_DONT_MARK: | 
| 192 | sch->qstats.drops++; | 208 | break; | 
| 193 | return NET_XMIT_DROP; | 209 | |
| 194 | } | 210 | case RED_PROB_MARK: | 
| 195 | if ((q->qave+qave) >= q->qth_max) { | 211 | sch->qstats.overlimits++; | 
| 196 | q->qcount = -1; | 212 | if (!gred_use_ecn(t) || !INET_ECN_set_ce(skb)) { | 
| 197 | sch->qstats.overlimits++; | 213 | q->stats.prob_drop++; | 
| 198 | q->forced++; | 214 | goto congestion_drop; | 
| 199 | goto drop; | 215 | } | 
| 216 | |||
| 217 | q->stats.prob_mark++; | ||
| 218 | break; | ||
| 219 | |||
| 220 | case RED_HARD_MARK: | ||
| 221 | sch->qstats.overlimits++; | ||
| 222 | if (gred_use_harddrop(t) || !gred_use_ecn(t) || | ||
| 223 | !INET_ECN_set_ce(skb)) { | ||
| 224 | q->stats.forced_drop++; | ||
| 225 | goto congestion_drop; | ||
| 226 | } | ||
| 227 | q->stats.forced_mark++; | ||
| 228 | break; | ||
| 200 | } | 229 | } | 
| 201 | if (++q->qcount) { | 230 | |
| 202 | if ((((qave+q->qave) - q->qth_min)>>q->Wlog)*q->qcount < q->qR) | 231 | if (q->backlog + skb->len <= q->limit) { | 
| 203 | goto enqueue; | 232 | q->backlog += skb->len; | 
| 204 | q->qcount = 0; | 233 | return qdisc_enqueue_tail(skb, sch); | 
| 205 | q->qR = net_random()&q->Rmask; | ||
| 206 | sch->qstats.overlimits++; | ||
| 207 | q->early++; | ||
| 208 | goto drop; | ||
| 209 | } | 234 | } | 
| 210 | q->qR = net_random()&q->Rmask; | 235 | |
| 211 | goto enqueue; | 236 | q->stats.pdrop++; | 
| 237 | drop: | ||
| 238 | return qdisc_drop(skb, sch); | ||
| 239 | |||
| 240 | congestion_drop: | ||
| 241 | qdisc_drop(skb, sch); | ||
| 242 | return NET_XMIT_CN; | ||
| 212 | } | 243 | } | 
| 213 | 244 | ||
| 214 | static int | 245 | static int gred_requeue(struct sk_buff *skb, struct Qdisc* sch) | 
| 215 | gred_requeue(struct sk_buff *skb, struct Qdisc* sch) | ||
| 216 | { | 246 | { | 
| 247 | struct gred_sched *t = qdisc_priv(sch); | ||
| 217 | struct gred_sched_data *q; | 248 | struct gred_sched_data *q; | 
| 218 | struct gred_sched *t= qdisc_priv(sch); | 249 | u16 dp = tc_index_to_dp(skb); | 
| 219 | q= t->tab[(skb->tc_index&0xf)]; | 250 | |
| 220 | /* error checking here -- probably unnecessary */ | 251 | if (dp >= t->DPs || (q = t->tab[dp]) == NULL) { | 
| 221 | PSCHED_SET_PASTPERFECT(q->qidlestart); | 252 | if (net_ratelimit()) | 
| 222 | 253 | printk(KERN_WARNING "GRED: Unable to relocate VQ 0x%x " | |
| 223 | __skb_queue_head(&sch->q, skb); | 254 | "for requeue, screwing up backlog.\n", | 
| 224 | sch->qstats.backlog += skb->len; | 255 | tc_index_to_dp(skb)); | 
| 225 | sch->qstats.requeues++; | 256 | } else { | 
| 226 | q->backlog += skb->len; | 257 | if (red_is_idling(&q->parms)) | 
| 227 | return 0; | 258 | red_end_of_idle_period(&q->parms); | 
| 259 | q->backlog += skb->len; | ||
| 260 | } | ||
| 261 | |||
| 262 | return qdisc_requeue(skb, sch); | ||
| 228 | } | 263 | } | 
| 229 | 264 | ||
| 230 | static struct sk_buff * | 265 | static struct sk_buff *gred_dequeue(struct Qdisc* sch) | 
| 231 | gred_dequeue(struct Qdisc* sch) | ||
| 232 | { | 266 | { | 
| 233 | struct sk_buff *skb; | 267 | struct sk_buff *skb; | 
| 234 | struct gred_sched_data *q; | 268 | struct gred_sched *t = qdisc_priv(sch); | 
| 235 | struct gred_sched *t= qdisc_priv(sch); | 269 | |
| 270 | skb = qdisc_dequeue_head(sch); | ||
| 236 | 271 | ||
| 237 | skb = __skb_dequeue(&sch->q); | ||
| 238 | if (skb) { | 272 | if (skb) { | 
| 239 | sch->qstats.backlog -= skb->len; | 273 | struct gred_sched_data *q; | 
| 240 | q= t->tab[(skb->tc_index&0xf)]; | 274 | u16 dp = tc_index_to_dp(skb); | 
| 241 | if (q) { | 275 | |
| 242 | q->backlog -= skb->len; | 276 | if (dp >= t->DPs || (q = t->tab[dp]) == NULL) { | 
| 243 | if (!q->backlog && !t->eqp) | 277 | if (net_ratelimit()) | 
| 244 | PSCHED_GET_TIME(q->qidlestart); | 278 | printk(KERN_WARNING "GRED: Unable to relocate " | 
| 279 | "VQ 0x%x after dequeue, screwing up " | ||
| 280 | "backlog.\n", tc_index_to_dp(skb)); | ||
| 245 | } else { | 281 | } else { | 
| 246 | D2PRINTK("gred_dequeue: skb has bad tcindex %x\n",skb->tc_index&0xf); | 282 | q->backlog -= skb->len; | 
| 283 | |||
| 284 | if (!q->backlog && !gred_wred_mode(t)) | ||
| 285 | red_start_of_idle_period(&q->parms); | ||
| 247 | } | 286 | } | 
| 287 | |||
| 248 | return skb; | 288 | return skb; | 
| 249 | } | 289 | } | 
| 250 | 290 | ||
| 251 | if (t->eqp) { | 291 | if (gred_wred_mode(t) && !red_is_idling(&t->wred_set)) | 
| 252 | q= t->tab[t->def]; | 292 | red_start_of_idle_period(&t->wred_set); | 
| 253 | if (!q) | ||
| 254 | D2PRINTK("no default VQ set: Results will be " | ||
| 255 | "screwed up\n"); | ||
| 256 | else | ||
| 257 | PSCHED_GET_TIME(q->qidlestart); | ||
| 258 | } | ||
| 259 | 293 | ||
| 260 | return NULL; | 294 | return NULL; | 
| 261 | } | 295 | } | 
| @@ -263,36 +297,34 @@ gred_dequeue(struct Qdisc* sch) | |||
| 263 | static unsigned int gred_drop(struct Qdisc* sch) | 297 | static unsigned int gred_drop(struct Qdisc* sch) | 
| 264 | { | 298 | { | 
| 265 | struct sk_buff *skb; | 299 | struct sk_buff *skb; | 
| 300 | struct gred_sched *t = qdisc_priv(sch); | ||
| 266 | 301 | ||
| 267 | struct gred_sched_data *q; | 302 | skb = qdisc_dequeue_tail(sch); | 
| 268 | struct gred_sched *t= qdisc_priv(sch); | ||
| 269 | |||
| 270 | skb = __skb_dequeue_tail(&sch->q); | ||
| 271 | if (skb) { | 303 | if (skb) { | 
| 272 | unsigned int len = skb->len; | 304 | unsigned int len = skb->len; | 
| 273 | sch->qstats.backlog -= len; | 305 | struct gred_sched_data *q; | 
| 274 | sch->qstats.drops++; | 306 | u16 dp = tc_index_to_dp(skb); | 
| 275 | q= t->tab[(skb->tc_index&0xf)]; | 307 | |
| 276 | if (q) { | 308 | if (dp >= t->DPs || (q = t->tab[dp]) == NULL) { | 
| 277 | q->backlog -= len; | 309 | if (net_ratelimit()) | 
| 278 | q->other++; | 310 | printk(KERN_WARNING "GRED: Unable to relocate " | 
| 279 | if (!q->backlog && !t->eqp) | 311 | "VQ 0x%x while dropping, screwing up " | 
| 280 | PSCHED_GET_TIME(q->qidlestart); | 312 | "backlog.\n", tc_index_to_dp(skb)); | 
| 281 | } else { | 313 | } else { | 
| 282 | D2PRINTK("gred_dequeue: skb has bad tcindex %x\n",skb->tc_index&0xf); | 314 | q->backlog -= len; | 
| 315 | q->stats.other++; | ||
| 316 | |||
| 317 | if (!q->backlog && !gred_wred_mode(t)) | ||
| 318 | red_start_of_idle_period(&q->parms); | ||
| 283 | } | 319 | } | 
| 284 | 320 | ||
| 285 | kfree_skb(skb); | 321 | qdisc_drop(skb, sch); | 
| 286 | return len; | 322 | return len; | 
| 287 | } | 323 | } | 
| 288 | 324 | ||
| 289 | q=t->tab[t->def]; | 325 | if (gred_wred_mode(t) && !red_is_idling(&t->wred_set)) | 
| 290 | if (!q) { | 326 | red_start_of_idle_period(&t->wred_set); | 
| 291 | D2PRINTK("no default VQ set: Results might be screwed up\n"); | ||
| 292 | return 0; | ||
| 293 | } | ||
| 294 | 327 | ||
| 295 | PSCHED_GET_TIME(q->qidlestart); | ||
| 296 | return 0; | 328 | return 0; | 
| 297 | 329 | ||
| 298 | } | 330 | } | 
| @@ -300,293 +332,241 @@ static unsigned int gred_drop(struct Qdisc* sch) | |||
| 300 | static void gred_reset(struct Qdisc* sch) | 332 | static void gred_reset(struct Qdisc* sch) | 
| 301 | { | 333 | { | 
| 302 | int i; | 334 | int i; | 
| 303 | struct gred_sched_data *q; | 335 | struct gred_sched *t = qdisc_priv(sch); | 
| 304 | struct gred_sched *t= qdisc_priv(sch); | 336 | |
| 337 | qdisc_reset_queue(sch); | ||
| 305 | 338 | ||
| 306 | __skb_queue_purge(&sch->q); | 339 | for (i = 0; i < t->DPs; i++) { | 
| 340 | struct gred_sched_data *q = t->tab[i]; | ||
| 307 | 341 | ||
| 308 | sch->qstats.backlog = 0; | 342 | if (!q) | 
| 343 | continue; | ||
| 309 | 344 | ||
| 310 | for (i=0;i<t->DPs;i++) { | 345 | red_restart(&q->parms); | 
| 311 | q= t->tab[i]; | ||
| 312 | if (!q) | ||
| 313 | continue; | ||
| 314 | PSCHED_SET_PASTPERFECT(q->qidlestart); | ||
| 315 | q->qave = 0; | ||
| 316 | q->qcount = -1; | ||
| 317 | q->backlog = 0; | 346 | q->backlog = 0; | 
| 318 | q->other=0; | ||
| 319 | q->forced=0; | ||
| 320 | q->pdrop=0; | ||
| 321 | q->early=0; | ||
| 322 | } | 347 | } | 
| 323 | } | 348 | } | 
| 324 | 349 | ||
| 325 | static int gred_change(struct Qdisc *sch, struct rtattr *opt) | 350 | static inline void gred_destroy_vq(struct gred_sched_data *q) | 
| 351 | { | ||
| 352 | kfree(q); | ||
| 353 | } | ||
| 354 | |||
| 355 | static inline int gred_change_table_def(struct Qdisc *sch, struct rtattr *dps) | ||
| 326 | { | 356 | { | 
| 327 | struct gred_sched *table = qdisc_priv(sch); | 357 | struct gred_sched *table = qdisc_priv(sch); | 
| 328 | struct gred_sched_data *q; | ||
| 329 | struct tc_gred_qopt *ctl; | ||
| 330 | struct tc_gred_sopt *sopt; | 358 | struct tc_gred_sopt *sopt; | 
| 331 | struct rtattr *tb[TCA_GRED_STAB]; | ||
| 332 | struct rtattr *tb2[TCA_GRED_DPS]; | ||
| 333 | int i; | 359 | int i; | 
| 334 | 360 | ||
| 335 | if (opt == NULL || rtattr_parse_nested(tb, TCA_GRED_STAB, opt)) | 361 | if (dps == NULL || RTA_PAYLOAD(dps) < sizeof(*sopt)) | 
| 336 | return -EINVAL; | 362 | return -EINVAL; | 
| 337 | 363 | ||
| 338 | if (tb[TCA_GRED_PARMS-1] == 0 && tb[TCA_GRED_STAB-1] == 0) { | 364 | sopt = RTA_DATA(dps); | 
| 339 | rtattr_parse_nested(tb2, TCA_GRED_DPS, opt); | 365 | |
| 366 | if (sopt->DPs > MAX_DPs || sopt->DPs == 0 || sopt->def_DP >= sopt->DPs) | ||
| 367 | return -EINVAL; | ||
| 340 | 368 | ||
| 341 | if (tb2[TCA_GRED_DPS-1] == 0) | 369 | sch_tree_lock(sch); | 
| 342 | return -EINVAL; | 370 | table->DPs = sopt->DPs; | 
| 371 | table->def = sopt->def_DP; | ||
| 372 | table->red_flags = sopt->flags; | ||
| 373 | |||
| 374 | /* | ||
| 375 | * Every entry point to GRED is synchronized with the above code | ||
| 376 | * and the DP is checked against DPs, i.e. shadowed VQs can no | ||
| 377 | * longer be found so we can unlock right here. | ||
| 378 | */ | ||
| 379 | sch_tree_unlock(sch); | ||
| 380 | |||
| 381 | if (sopt->grio) { | ||
| 382 | gred_enable_rio_mode(table); | ||
| 383 | gred_disable_wred_mode(table); | ||
| 384 | if (gred_wred_mode_check(sch)) | ||
| 385 | gred_enable_wred_mode(table); | ||
| 386 | } else { | ||
| 387 | gred_disable_rio_mode(table); | ||
| 388 | gred_disable_wred_mode(table); | ||
| 389 | } | ||
| 343 | 390 | ||
| 344 | sopt = RTA_DATA(tb2[TCA_GRED_DPS-1]); | 391 | for (i = table->DPs; i < MAX_DPs; i++) { | 
| 345 | table->DPs=sopt->DPs; | 392 | if (table->tab[i]) { | 
| 346 | table->def=sopt->def_DP; | 393 | printk(KERN_WARNING "GRED: Warning: Destroying " | 
| 347 | table->grio=sopt->grio; | 394 | "shadowed VQ 0x%x\n", i); | 
| 348 | table->initd=0; | 395 | gred_destroy_vq(table->tab[i]); | 
| 349 | /* probably need to clear all the table DP entries as well */ | 396 | table->tab[i] = NULL; | 
| 350 | return 0; | 397 | } | 
| 351 | } | 398 | } | 
| 352 | 399 | ||
| 400 | return 0; | ||
| 401 | } | ||
| 353 | 402 | ||
| 354 | if (!table->DPs || tb[TCA_GRED_PARMS-1] == 0 || tb[TCA_GRED_STAB-1] == 0 || | 403 | static inline int gred_change_vq(struct Qdisc *sch, int dp, | 
| 355 | RTA_PAYLOAD(tb[TCA_GRED_PARMS-1]) < sizeof(*ctl) || | 404 | struct tc_gred_qopt *ctl, int prio, u8 *stab) | 
| 356 | RTA_PAYLOAD(tb[TCA_GRED_STAB-1]) < 256) | 405 | { | 
| 357 | return -EINVAL; | 406 | struct gred_sched *table = qdisc_priv(sch); | 
| 407 | struct gred_sched_data *q; | ||
| 358 | 408 | ||
| 359 | ctl = RTA_DATA(tb[TCA_GRED_PARMS-1]); | 409 | if (table->tab[dp] == NULL) { | 
| 360 | if (ctl->DP > MAX_DPs-1 ) { | 410 | table->tab[dp] = kmalloc(sizeof(*q), GFP_KERNEL); | 
| 361 | /* misbehaving is punished! Put in the default drop probability */ | 411 | if (table->tab[dp] == NULL) | 
| 362 | DPRINTK("\nGRED: DP %u not in the proper range fixed. New DP " | ||
| 363 | "set to default at %d\n",ctl->DP,table->def); | ||
| 364 | ctl->DP=table->def; | ||
| 365 | } | ||
| 366 | |||
| 367 | if (table->tab[ctl->DP] == NULL) { | ||
| 368 | table->tab[ctl->DP]=kmalloc(sizeof(struct gred_sched_data), | ||
| 369 | GFP_KERNEL); | ||
| 370 | if (NULL == table->tab[ctl->DP]) | ||
| 371 | return -ENOMEM; | 412 | return -ENOMEM; | 
| 372 | memset(table->tab[ctl->DP], 0, (sizeof(struct gred_sched_data))); | 413 | memset(table->tab[dp], 0, sizeof(*q)); | 
| 373 | } | ||
| 374 | q= table->tab[ctl->DP]; | ||
| 375 | |||
| 376 | if (table->grio) { | ||
| 377 | if (ctl->prio <=0) { | ||
| 378 | if (table->def && table->tab[table->def]) { | ||
| 379 | DPRINTK("\nGRED: DP %u does not have a prio" | ||
| 380 | "setting default to %d\n",ctl->DP, | ||
| 381 | table->tab[table->def]->prio); | ||
| 382 | q->prio=table->tab[table->def]->prio; | ||
| 383 | } else { | ||
| 384 | DPRINTK("\nGRED: DP %u does not have a prio" | ||
| 385 | " setting default to 8\n",ctl->DP); | ||
| 386 | q->prio=8; | ||
| 387 | } | ||
| 388 | } else { | ||
| 389 | q->prio=ctl->prio; | ||
| 390 | } | ||
| 391 | } else { | ||
| 392 | q->prio=8; | ||
| 393 | } | 414 | } | 
| 394 | 415 | ||
| 395 | 416 | q = table->tab[dp]; | |
| 396 | q->DP=ctl->DP; | 417 | q->DP = dp; | 
| 397 | q->Wlog = ctl->Wlog; | 418 | q->prio = prio; | 
| 398 | q->Plog = ctl->Plog; | ||
| 399 | q->limit = ctl->limit; | 419 | q->limit = ctl->limit; | 
| 400 | q->Scell_log = ctl->Scell_log; | ||
| 401 | q->Rmask = ctl->Plog < 32 ? ((1<<ctl->Plog) - 1) : ~0UL; | ||
| 402 | q->Scell_max = (255<<q->Scell_log); | ||
| 403 | q->qth_min = ctl->qth_min<<ctl->Wlog; | ||
| 404 | q->qth_max = ctl->qth_max<<ctl->Wlog; | ||
| 405 | q->qave=0; | ||
| 406 | q->backlog=0; | ||
| 407 | q->qcount = -1; | ||
| 408 | q->other=0; | ||
| 409 | q->forced=0; | ||
| 410 | q->pdrop=0; | ||
| 411 | q->early=0; | ||
| 412 | |||
| 413 | PSCHED_SET_PASTPERFECT(q->qidlestart); | ||
| 414 | memcpy(q->Stab, RTA_DATA(tb[TCA_GRED_STAB-1]), 256); | ||
| 415 | |||
| 416 | if ( table->initd && table->grio) { | ||
| 417 | /* this looks ugly but it's not in the fast path */ | ||
| 418 | for (i=0;i<table->DPs;i++) { | ||
| 419 | if ((!table->tab[i]) || (i==q->DP) ) | ||
| 420 | continue; | ||
| 421 | if (table->tab[i]->prio == q->prio ){ | ||
| 422 | /* WRED mode detected */ | ||
| 423 | table->eqp=1; | ||
| 424 | break; | ||
| 425 | } | ||
| 426 | } | ||
| 427 | } | ||
| 428 | 420 | ||
| 429 | if (!table->initd) { | 421 | if (q->backlog == 0) | 
| 430 | table->initd=1; | 422 | red_end_of_idle_period(&q->parms); | 
| 431 | /* | ||
| 432 | the first entry also goes into the default until | ||
| 433 | over-written | ||
| 434 | */ | ||
| 435 | |||
| 436 | if (table->tab[table->def] == NULL) { | ||
| 437 | table->tab[table->def]= | ||
| 438 | kmalloc(sizeof(struct gred_sched_data), GFP_KERNEL); | ||
| 439 | if (NULL == table->tab[table->def]) | ||
| 440 | return -ENOMEM; | ||
| 441 | |||
| 442 | memset(table->tab[table->def], 0, | ||
| 443 | (sizeof(struct gred_sched_data))); | ||
| 444 | } | ||
| 445 | q= table->tab[table->def]; | ||
| 446 | q->DP=table->def; | ||
| 447 | q->Wlog = ctl->Wlog; | ||
| 448 | q->Plog = ctl->Plog; | ||
| 449 | q->limit = ctl->limit; | ||
| 450 | q->Scell_log = ctl->Scell_log; | ||
| 451 | q->Rmask = ctl->Plog < 32 ? ((1<<ctl->Plog) - 1) : ~0UL; | ||
| 452 | q->Scell_max = (255<<q->Scell_log); | ||
| 453 | q->qth_min = ctl->qth_min<<ctl->Wlog; | ||
| 454 | q->qth_max = ctl->qth_max<<ctl->Wlog; | ||
| 455 | |||
| 456 | if (table->grio) | ||
| 457 | q->prio=table->tab[ctl->DP]->prio; | ||
| 458 | else | ||
| 459 | q->prio=8; | ||
| 460 | |||
| 461 | q->qcount = -1; | ||
| 462 | PSCHED_SET_PASTPERFECT(q->qidlestart); | ||
| 463 | memcpy(q->Stab, RTA_DATA(tb[TCA_GRED_STAB-1]), 256); | ||
| 464 | } | ||
| 465 | return 0; | ||
| 466 | 423 | ||
| 424 | red_set_parms(&q->parms, | ||
| 425 | ctl->qth_min, ctl->qth_max, ctl->Wlog, ctl->Plog, | ||
| 426 | ctl->Scell_log, stab); | ||
| 427 | |||
| 428 | return 0; | ||
| 467 | } | 429 | } | 
| 468 | 430 | ||
| 469 | static int gred_init(struct Qdisc *sch, struct rtattr *opt) | 431 | static int gred_change(struct Qdisc *sch, struct rtattr *opt) | 
| 470 | { | 432 | { | 
| 471 | struct gred_sched *table = qdisc_priv(sch); | 433 | struct gred_sched *table = qdisc_priv(sch); | 
| 472 | struct tc_gred_sopt *sopt; | 434 | struct tc_gred_qopt *ctl; | 
| 473 | struct rtattr *tb[TCA_GRED_STAB]; | 435 | struct rtattr *tb[TCA_GRED_MAX]; | 
| 474 | struct rtattr *tb2[TCA_GRED_DPS]; | 436 | int err = -EINVAL, prio = GRED_DEF_PRIO; | 
| 437 | u8 *stab; | ||
| 475 | 438 | ||
| 476 | if (opt == NULL || rtattr_parse_nested(tb, TCA_GRED_STAB, opt)) | 439 | if (opt == NULL || rtattr_parse_nested(tb, TCA_GRED_MAX, opt)) | 
| 477 | return -EINVAL; | 440 | return -EINVAL; | 
| 478 | 441 | ||
| 479 | if (tb[TCA_GRED_PARMS-1] == 0 && tb[TCA_GRED_STAB-1] == 0) { | 442 | if (tb[TCA_GRED_PARMS-1] == NULL && tb[TCA_GRED_STAB-1] == NULL) | 
| 480 | rtattr_parse_nested(tb2, TCA_GRED_DPS, opt); | 443 | return gred_change_table_def(sch, opt); | 
| 444 | |||
| 445 | if (tb[TCA_GRED_PARMS-1] == NULL || | ||
| 446 | RTA_PAYLOAD(tb[TCA_GRED_PARMS-1]) < sizeof(*ctl) || | ||
| 447 | tb[TCA_GRED_STAB-1] == NULL || | ||
| 448 | RTA_PAYLOAD(tb[TCA_GRED_STAB-1]) < 256) | ||
| 449 | return -EINVAL; | ||
| 450 | |||
| 451 | ctl = RTA_DATA(tb[TCA_GRED_PARMS-1]); | ||
| 452 | stab = RTA_DATA(tb[TCA_GRED_STAB-1]); | ||
| 453 | |||
| 454 | if (ctl->DP >= table->DPs) | ||
| 455 | goto errout; | ||
| 481 | 456 | ||
| 482 | if (tb2[TCA_GRED_DPS-1] == 0) | 457 | if (gred_rio_mode(table)) { | 
| 483 | return -EINVAL; | 458 | if (ctl->prio == 0) { | 
| 459 | int def_prio = GRED_DEF_PRIO; | ||
| 484 | 460 | ||
| 485 | sopt = RTA_DATA(tb2[TCA_GRED_DPS-1]); | 461 | if (table->tab[table->def]) | 
| 486 | table->DPs=sopt->DPs; | 462 | def_prio = table->tab[table->def]->prio; | 
| 487 | table->def=sopt->def_DP; | 463 | |
| 488 | table->grio=sopt->grio; | 464 | printk(KERN_DEBUG "GRED: DP %u does not have a prio " | 
| 489 | table->initd=0; | 465 | "setting default to %d\n", ctl->DP, def_prio); | 
| 490 | return 0; | 466 | |
| 467 | prio = def_prio; | ||
| 468 | } else | ||
| 469 | prio = ctl->prio; | ||
| 470 | } | ||
| 471 | |||
| 472 | sch_tree_lock(sch); | ||
| 473 | |||
| 474 | err = gred_change_vq(sch, ctl->DP, ctl, prio, stab); | ||
| 475 | if (err < 0) | ||
| 476 | goto errout_locked; | ||
| 477 | |||
| 478 | if (gred_rio_mode(table)) { | ||
| 479 | gred_disable_wred_mode(table); | ||
| 480 | if (gred_wred_mode_check(sch)) | ||
| 481 | gred_enable_wred_mode(table); | ||
| 491 | } | 482 | } | 
| 492 | 483 | ||
| 493 | DPRINTK("\n GRED_INIT error!\n"); | 484 | err = 0; | 
| 494 | return -EINVAL; | 485 | |
| 486 | errout_locked: | ||
| 487 | sch_tree_unlock(sch); | ||
| 488 | errout: | ||
| 489 | return err; | ||
| 495 | } | 490 | } | 
| 496 | 491 | ||
| 497 | static int gred_dump(struct Qdisc *sch, struct sk_buff *skb) | 492 | static int gred_init(struct Qdisc *sch, struct rtattr *opt) | 
| 498 | { | 493 | { | 
| 499 | unsigned long qave; | 494 | struct rtattr *tb[TCA_GRED_MAX]; | 
| 500 | struct rtattr *rta; | ||
| 501 | struct tc_gred_qopt *opt = NULL ; | ||
| 502 | struct tc_gred_qopt *dst; | ||
| 503 | struct gred_sched *table = qdisc_priv(sch); | ||
| 504 | struct gred_sched_data *q; | ||
| 505 | int i; | ||
| 506 | unsigned char *b = skb->tail; | ||
| 507 | 495 | ||
| 508 | rta = (struct rtattr*)b; | 496 | if (opt == NULL || rtattr_parse_nested(tb, TCA_GRED_MAX, opt)) | 
| 509 | RTA_PUT(skb, TCA_OPTIONS, 0, NULL); | 497 | return -EINVAL; | 
| 510 | 498 | ||
| 511 | opt=kmalloc(sizeof(struct tc_gred_qopt)*MAX_DPs, GFP_KERNEL); | 499 | if (tb[TCA_GRED_PARMS-1] || tb[TCA_GRED_STAB-1]) | 
| 500 | return -EINVAL; | ||
| 512 | 501 | ||
| 513 | if (opt == NULL) { | 502 | return gred_change_table_def(sch, tb[TCA_GRED_DPS-1]); | 
| 514 | DPRINTK("gred_dump:failed to malloc for %Zd\n", | 503 | } | 
| 515 | sizeof(struct tc_gred_qopt)*MAX_DPs); | ||
| 516 | goto rtattr_failure; | ||
| 517 | } | ||
| 518 | 504 | ||
| 519 | memset(opt, 0, (sizeof(struct tc_gred_qopt))*table->DPs); | 505 | static int gred_dump(struct Qdisc *sch, struct sk_buff *skb) | 
| 506 | { | ||
| 507 | struct gred_sched *table = qdisc_priv(sch); | ||
| 508 | struct rtattr *parms, *opts = NULL; | ||
| 509 | int i; | ||
| 510 | struct tc_gred_sopt sopt = { | ||
| 511 | .DPs = table->DPs, | ||
| 512 | .def_DP = table->def, | ||
| 513 | .grio = gred_rio_mode(table), | ||
| 514 | .flags = table->red_flags, | ||
| 515 | }; | ||
| 520 | 516 | ||
| 521 | if (!table->initd) { | 517 | opts = RTA_NEST(skb, TCA_OPTIONS); | 
| 522 | DPRINTK("NO GRED Queues setup!\n"); | 518 | RTA_PUT(skb, TCA_GRED_DPS, sizeof(sopt), &sopt); | 
| 523 | } | 519 | parms = RTA_NEST(skb, TCA_GRED_PARMS); | 
| 520 | |||
| 521 | for (i = 0; i < MAX_DPs; i++) { | ||
| 522 | struct gred_sched_data *q = table->tab[i]; | ||
| 523 | struct tc_gred_qopt opt; | ||
| 524 | 524 | ||
| 525 | for (i=0;i<MAX_DPs;i++) { | 525 | memset(&opt, 0, sizeof(opt)); | 
| 526 | dst= &opt[i]; | ||
| 527 | q= table->tab[i]; | ||
| 528 | 526 | ||
| 529 | if (!q) { | 527 | if (!q) { | 
| 530 | /* hack -- fix at some point with proper message | 528 | /* hack -- fix at some point with proper message | 
| 531 | This is how we indicate to tc that there is no VQ | 529 | This is how we indicate to tc that there is no VQ | 
| 532 | at this DP */ | 530 | at this DP */ | 
| 533 | 531 | ||
| 534 | dst->DP=MAX_DPs+i; | 532 | opt.DP = MAX_DPs + i; | 
| 535 | continue; | 533 | goto append_opt; | 
| 536 | } | 534 | } | 
| 537 | 535 | ||
| 538 | dst->limit=q->limit; | 536 | opt.limit = q->limit; | 
| 539 | dst->qth_min=q->qth_min>>q->Wlog; | 537 | opt.DP = q->DP; | 
| 540 | dst->qth_max=q->qth_max>>q->Wlog; | 538 | opt.backlog = q->backlog; | 
| 541 | dst->DP=q->DP; | 539 | opt.prio = q->prio; | 
| 542 | dst->backlog=q->backlog; | 540 | opt.qth_min = q->parms.qth_min >> q->parms.Wlog; | 
| 543 | if (q->qave) { | 541 | opt.qth_max = q->parms.qth_max >> q->parms.Wlog; | 
| 544 | if (table->eqp && table->grio) { | 542 | opt.Wlog = q->parms.Wlog; | 
| 545 | q->qidlestart=table->tab[table->def]->qidlestart; | 543 | opt.Plog = q->parms.Plog; | 
| 546 | q->qave=table->tab[table->def]->qave; | 544 | opt.Scell_log = q->parms.Scell_log; | 
| 547 | } | 545 | opt.other = q->stats.other; | 
| 548 | if (!PSCHED_IS_PASTPERFECT(q->qidlestart)) { | 546 | opt.early = q->stats.prob_drop; | 
| 549 | long idle; | 547 | opt.forced = q->stats.forced_drop; | 
| 550 | psched_time_t now; | 548 | opt.pdrop = q->stats.pdrop; | 
| 551 | PSCHED_GET_TIME(now); | 549 | opt.packets = q->packetsin; | 
| 552 | idle = PSCHED_TDIFF_SAFE(now, q->qidlestart, q->Scell_max); | 550 | opt.bytesin = q->bytesin; | 
| 553 | qave = q->qave >> q->Stab[(idle>>q->Scell_log)&0xFF]; | 551 | |
| 554 | dst->qave = qave >> q->Wlog; | 552 | if (gred_wred_mode(table)) { | 
| 555 | 553 | q->parms.qidlestart = | |
| 556 | } else { | 554 | table->tab[table->def]->parms.qidlestart; | 
| 557 | dst->qave = q->qave >> q->Wlog; | 555 | q->parms.qavg = table->tab[table->def]->parms.qavg; | 
| 558 | } | ||
| 559 | } else { | ||
| 560 | dst->qave = 0; | ||
| 561 | } | 556 | } | 
| 562 | 557 | ||
| 563 | 558 | opt.qave = red_calc_qavg(&q->parms, q->parms.qavg); | |
| 564 | dst->Wlog = q->Wlog; | 559 | |
| 565 | dst->Plog = q->Plog; | 560 | append_opt: | 
| 566 | dst->Scell_log = q->Scell_log; | 561 | RTA_APPEND(skb, sizeof(opt), &opt); | 
| 567 | dst->other = q->other; | ||
| 568 | dst->forced = q->forced; | ||
| 569 | dst->early = q->early; | ||
| 570 | dst->pdrop = q->pdrop; | ||
| 571 | dst->prio = q->prio; | ||
| 572 | dst->packets=q->packetsin; | ||
| 573 | dst->bytesin=q->bytesin; | ||
| 574 | } | 562 | } | 
| 575 | 563 | ||
| 576 | RTA_PUT(skb, TCA_GRED_PARMS, sizeof(struct tc_gred_qopt)*MAX_DPs, opt); | 564 | RTA_NEST_END(skb, parms); | 
| 577 | rta->rta_len = skb->tail - b; | ||
| 578 | 565 | ||
| 579 | kfree(opt); | 566 | return RTA_NEST_END(skb, opts); | 
| 580 | return skb->len; | ||
| 581 | 567 | ||
| 582 | rtattr_failure: | 568 | rtattr_failure: | 
| 583 | if (opt) | 569 | return RTA_NEST_CANCEL(skb, opts); | 
| 584 | kfree(opt); | ||
| 585 | DPRINTK("gred_dump: FAILURE!!!!\n"); | ||
| 586 | |||
| 587 | /* also free the opt struct here */ | ||
| 588 | skb_trim(skb, b - skb->data); | ||
| 589 | return -1; | ||
| 590 | } | 570 | } | 
| 591 | 571 | ||
| 592 | static void gred_destroy(struct Qdisc *sch) | 572 | static void gred_destroy(struct Qdisc *sch) | 
| @@ -594,15 +574,13 @@ static void gred_destroy(struct Qdisc *sch) | |||
| 594 | struct gred_sched *table = qdisc_priv(sch); | 574 | struct gred_sched *table = qdisc_priv(sch); | 
| 595 | int i; | 575 | int i; | 
| 596 | 576 | ||
| 597 | for (i = 0;i < table->DPs; i++) { | 577 | for (i = 0; i < table->DPs; i++) { | 
| 598 | if (table->tab[i]) | 578 | if (table->tab[i]) | 
| 599 | kfree(table->tab[i]); | 579 | gred_destroy_vq(table->tab[i]); | 
| 600 | } | 580 | } | 
| 601 | } | 581 | } | 
| 602 | 582 | ||
| 603 | static struct Qdisc_ops gred_qdisc_ops = { | 583 | static struct Qdisc_ops gred_qdisc_ops = { | 
| 604 | .next = NULL, | ||
| 605 | .cl_ops = NULL, | ||
| 606 | .id = "gred", | 584 | .id = "gred", | 
| 607 | .priv_size = sizeof(struct gred_sched), | 585 | .priv_size = sizeof(struct gred_sched), | 
| 608 | .enqueue = gred_enqueue, | 586 | .enqueue = gred_enqueue, | 
| @@ -621,10 +599,13 @@ static int __init gred_module_init(void) | |||
| 621 | { | 599 | { | 
| 622 | return register_qdisc(&gred_qdisc_ops); | 600 | return register_qdisc(&gred_qdisc_ops); | 
| 623 | } | 601 | } | 
| 624 | static void __exit gred_module_exit(void) | 602 | |
| 603 | static void __exit gred_module_exit(void) | ||
| 625 | { | 604 | { | 
| 626 | unregister_qdisc(&gred_qdisc_ops); | 605 | unregister_qdisc(&gred_qdisc_ops); | 
| 627 | } | 606 | } | 
| 607 | |||
| 628 | module_init(gred_module_init) | 608 | module_init(gred_module_init) | 
| 629 | module_exit(gred_module_exit) | 609 | module_exit(gred_module_exit) | 
| 610 | |||
| 630 | MODULE_LICENSE("GPL"); | 611 | MODULE_LICENSE("GPL"); | 
diff --git a/net/sched/sch_netem.c b/net/sched/sch_netem.c index bb9bf8d5003c..cdc8d283791c 100644 --- a/net/sched/sch_netem.c +++ b/net/sched/sch_netem.c  | |||
| @@ -25,6 +25,8 @@ | |||
| 25 | 25 | ||
| 26 | #include <net/pkt_sched.h> | 26 | #include <net/pkt_sched.h> | 
| 27 | 27 | ||
| 28 | #define VERSION "1.1" | ||
| 29 | |||
| 28 | /* Network Emulation Queuing algorithm. | 30 | /* Network Emulation Queuing algorithm. | 
| 29 | ==================================== | 31 | ==================================== | 
| 30 | 32 | ||
| @@ -185,10 +187,13 @@ static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch) | |||
| 185 | || q->counter < q->gap /* inside last reordering gap */ | 187 | || q->counter < q->gap /* inside last reordering gap */ | 
| 186 | || q->reorder < get_crandom(&q->reorder_cor)) { | 188 | || q->reorder < get_crandom(&q->reorder_cor)) { | 
| 187 | psched_time_t now; | 189 | psched_time_t now; | 
| 190 | psched_tdiff_t delay; | ||
| 191 | |||
| 192 | delay = tabledist(q->latency, q->jitter, | ||
| 193 | &q->delay_cor, q->delay_dist); | ||
| 194 | |||
| 188 | PSCHED_GET_TIME(now); | 195 | PSCHED_GET_TIME(now); | 
| 189 | PSCHED_TADD2(now, tabledist(q->latency, q->jitter, | 196 | PSCHED_TADD2(now, delay, cb->time_to_send); | 
| 190 | &q->delay_cor, q->delay_dist), | ||
| 191 | cb->time_to_send); | ||
| 192 | ++q->counter; | 197 | ++q->counter; | 
| 193 | ret = q->qdisc->enqueue(skb, q->qdisc); | 198 | ret = q->qdisc->enqueue(skb, q->qdisc); | 
| 194 | } else { | 199 | } else { | 
| @@ -248,24 +253,31 @@ static struct sk_buff *netem_dequeue(struct Qdisc *sch) | |||
| 248 | const struct netem_skb_cb *cb | 253 | const struct netem_skb_cb *cb | 
| 249 | = (const struct netem_skb_cb *)skb->cb; | 254 | = (const struct netem_skb_cb *)skb->cb; | 
| 250 | psched_time_t now; | 255 | psched_time_t now; | 
| 251 | long delay; | ||
| 252 | 256 | ||
| 253 | /* if more time remaining? */ | 257 | /* if more time remaining? */ | 
| 254 | PSCHED_GET_TIME(now); | 258 | PSCHED_GET_TIME(now); | 
| 255 | delay = PSCHED_US2JIFFIE(PSCHED_TDIFF(cb->time_to_send, now)); | 259 | |
| 256 | pr_debug("netem_run: skb=%p delay=%ld\n", skb, delay); | 260 | if (PSCHED_TLESS(cb->time_to_send, now)) { | 
| 257 | if (delay <= 0) { | ||
| 258 | pr_debug("netem_dequeue: return skb=%p\n", skb); | 261 | pr_debug("netem_dequeue: return skb=%p\n", skb); | 
| 259 | sch->q.qlen--; | 262 | sch->q.qlen--; | 
| 260 | sch->flags &= ~TCQ_F_THROTTLED; | 263 | sch->flags &= ~TCQ_F_THROTTLED; | 
| 261 | return skb; | 264 | return skb; | 
| 262 | } | 265 | } else { | 
| 266 | psched_tdiff_t delay = PSCHED_TDIFF(cb->time_to_send, now); | ||
| 267 | |||
| 268 | if (q->qdisc->ops->requeue(skb, q->qdisc) != NET_XMIT_SUCCESS) { | ||
| 269 | sch->qstats.drops++; | ||
| 263 | 270 | ||
| 264 | mod_timer(&q->timer, jiffies + delay); | 271 | /* After this qlen is confused */ | 
| 265 | sch->flags |= TCQ_F_THROTTLED; | 272 | printk(KERN_ERR "netem: queue discpline %s could not requeue\n", | 
| 273 | q->qdisc->ops->id); | ||
| 266 | 274 | ||
| 267 | if (q->qdisc->ops->requeue(skb, q->qdisc) != 0) | 275 | sch->q.qlen--; | 
| 268 | sch->qstats.drops++; | 276 | } | 
| 277 | |||
| 278 | mod_timer(&q->timer, jiffies + PSCHED_US2JIFFIE(delay)); | ||
| 279 | sch->flags |= TCQ_F_THROTTLED; | ||
| 280 | } | ||
| 269 | } | 281 | } | 
| 270 | 282 | ||
| 271 | return NULL; | 283 | return NULL; | 
| @@ -290,11 +302,16 @@ static void netem_reset(struct Qdisc *sch) | |||
| 290 | del_timer_sync(&q->timer); | 302 | del_timer_sync(&q->timer); | 
| 291 | } | 303 | } | 
| 292 | 304 | ||
| 305 | /* Pass size change message down to embedded FIFO */ | ||
| 293 | static int set_fifo_limit(struct Qdisc *q, int limit) | 306 | static int set_fifo_limit(struct Qdisc *q, int limit) | 
| 294 | { | 307 | { | 
| 295 | struct rtattr *rta; | 308 | struct rtattr *rta; | 
| 296 | int ret = -ENOMEM; | 309 | int ret = -ENOMEM; | 
| 297 | 310 | ||
| 311 | /* Hack to avoid sending change message to non-FIFO */ | ||
| 312 | if (strncmp(q->ops->id + 1, "fifo", 4) != 0) | ||
| 313 | return 0; | ||
| 314 | |||
| 298 | rta = kmalloc(RTA_LENGTH(sizeof(struct tc_fifo_qopt)), GFP_KERNEL); | 315 | rta = kmalloc(RTA_LENGTH(sizeof(struct tc_fifo_qopt)), GFP_KERNEL); | 
| 299 | if (rta) { | 316 | if (rta) { | 
| 300 | rta->rta_type = RTM_NEWQDISC; | 317 | rta->rta_type = RTM_NEWQDISC; | 
| @@ -426,6 +443,84 @@ static int netem_change(struct Qdisc *sch, struct rtattr *opt) | |||
| 426 | return 0; | 443 | return 0; | 
| 427 | } | 444 | } | 
| 428 | 445 | ||
| 446 | /* | ||
| 447 | * Special case version of FIFO queue for use by netem. | ||
| 448 | * It queues in order based on timestamps in skb's | ||
| 449 | */ | ||
| 450 | struct fifo_sched_data { | ||
| 451 | u32 limit; | ||
| 452 | }; | ||
| 453 | |||
| 454 | static int tfifo_enqueue(struct sk_buff *nskb, struct Qdisc *sch) | ||
| 455 | { | ||
| 456 | struct fifo_sched_data *q = qdisc_priv(sch); | ||
| 457 | struct sk_buff_head *list = &sch->q; | ||
| 458 | const struct netem_skb_cb *ncb | ||
| 459 | = (const struct netem_skb_cb *)nskb->cb; | ||
| 460 | struct sk_buff *skb; | ||
| 461 | |||
| 462 | if (likely(skb_queue_len(list) < q->limit)) { | ||
| 463 | skb_queue_reverse_walk(list, skb) { | ||
| 464 | const struct netem_skb_cb *cb | ||
| 465 | = (const struct netem_skb_cb *)skb->cb; | ||
| 466 | |||
| 467 | if (PSCHED_TLESS(cb->time_to_send, ncb->time_to_send)) | ||
| 468 | break; | ||
| 469 | } | ||
| 470 | |||
| 471 | __skb_queue_after(list, skb, nskb); | ||
| 472 | |||
| 473 | sch->qstats.backlog += nskb->len; | ||
| 474 | sch->bstats.bytes += nskb->len; | ||
| 475 | sch->bstats.packets++; | ||
| 476 | |||
| 477 | return NET_XMIT_SUCCESS; | ||
| 478 | } | ||
| 479 | |||
| 480 | return qdisc_drop(nskb, sch); | ||
| 481 | } | ||
| 482 | |||
| 483 | static int tfifo_init(struct Qdisc *sch, struct rtattr *opt) | ||
| 484 | { | ||
| 485 | struct fifo_sched_data *q = qdisc_priv(sch); | ||
| 486 | |||
| 487 | if (opt) { | ||
| 488 | struct tc_fifo_qopt *ctl = RTA_DATA(opt); | ||
| 489 | if (RTA_PAYLOAD(opt) < sizeof(*ctl)) | ||
| 490 | return -EINVAL; | ||
| 491 | |||
| 492 | q->limit = ctl->limit; | ||
| 493 | } else | ||
| 494 | q->limit = max_t(u32, sch->dev->tx_queue_len, 1); | ||
| 495 | |||
| 496 | return 0; | ||
| 497 | } | ||
| 498 | |||
| 499 | static int tfifo_dump(struct Qdisc *sch, struct sk_buff *skb) | ||
| 500 | { | ||
| 501 | struct fifo_sched_data *q = qdisc_priv(sch); | ||
| 502 | struct tc_fifo_qopt opt = { .limit = q->limit }; | ||
| 503 | |||
| 504 | RTA_PUT(skb, TCA_OPTIONS, sizeof(opt), &opt); | ||
| 505 | return skb->len; | ||
| 506 | |||
| 507 | rtattr_failure: | ||
| 508 | return -1; | ||
| 509 | } | ||
| 510 | |||
| 511 | static struct Qdisc_ops tfifo_qdisc_ops = { | ||
| 512 | .id = "tfifo", | ||
| 513 | .priv_size = sizeof(struct fifo_sched_data), | ||
| 514 | .enqueue = tfifo_enqueue, | ||
| 515 | .dequeue = qdisc_dequeue_head, | ||
| 516 | .requeue = qdisc_requeue, | ||
| 517 | .drop = qdisc_queue_drop, | ||
| 518 | .init = tfifo_init, | ||
| 519 | .reset = qdisc_reset_queue, | ||
| 520 | .change = tfifo_init, | ||
| 521 | .dump = tfifo_dump, | ||
| 522 | }; | ||
| 523 | |||
| 429 | static int netem_init(struct Qdisc *sch, struct rtattr *opt) | 524 | static int netem_init(struct Qdisc *sch, struct rtattr *opt) | 
| 430 | { | 525 | { | 
| 431 | struct netem_sched_data *q = qdisc_priv(sch); | 526 | struct netem_sched_data *q = qdisc_priv(sch); | 
| @@ -438,7 +533,7 @@ static int netem_init(struct Qdisc *sch, struct rtattr *opt) | |||
| 438 | q->timer.function = netem_watchdog; | 533 | q->timer.function = netem_watchdog; | 
| 439 | q->timer.data = (unsigned long) sch; | 534 | q->timer.data = (unsigned long) sch; | 
| 440 | 535 | ||
| 441 | q->qdisc = qdisc_create_dflt(sch->dev, &pfifo_qdisc_ops); | 536 | q->qdisc = qdisc_create_dflt(sch->dev, &tfifo_qdisc_ops); | 
| 442 | if (!q->qdisc) { | 537 | if (!q->qdisc) { | 
| 443 | pr_debug("netem: qdisc create failed\n"); | 538 | pr_debug("netem: qdisc create failed\n"); | 
| 444 | return -ENOMEM; | 539 | return -ENOMEM; | 
| @@ -601,6 +696,7 @@ static struct Qdisc_ops netem_qdisc_ops = { | |||
| 601 | 696 | ||
| 602 | static int __init netem_module_init(void) | 697 | static int __init netem_module_init(void) | 
| 603 | { | 698 | { | 
| 699 | pr_info("netem: version " VERSION "\n"); | ||
| 604 | return register_qdisc(&netem_qdisc_ops); | 700 | return register_qdisc(&netem_qdisc_ops); | 
| 605 | } | 701 | } | 
| 606 | static void __exit netem_module_exit(void) | 702 | static void __exit netem_module_exit(void) | 
diff --git a/net/sched/sch_red.c b/net/sched/sch_red.c index 7845d045eec4..dccfa44c2d71 100644 --- a/net/sched/sch_red.c +++ b/net/sched/sch_red.c  | |||
| @@ -9,76 +9,23 @@ | |||
| 9 | * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru> | 9 | * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru> | 
| 10 | * | 10 | * | 
| 11 | * Changes: | 11 | * Changes: | 
| 12 | * J Hadi Salim <hadi@nortel.com> 980914: computation fixes | 12 | * J Hadi Salim 980914: computation fixes | 
| 13 | * Alexey Makarenko <makar@phoenix.kharkov.ua> 990814: qave on idle link was calculated incorrectly. | 13 | * Alexey Makarenko <makar@phoenix.kharkov.ua> 990814: qave on idle link was calculated incorrectly. | 
| 14 | * J Hadi Salim <hadi@nortelnetworks.com> 980816: ECN support | 14 | * J Hadi Salim 980816: ECN support | 
| 15 | */ | 15 | */ | 
| 16 | 16 | ||
| 17 | #include <linux/config.h> | 17 | #include <linux/config.h> | 
| 18 | #include <linux/module.h> | 18 | #include <linux/module.h> | 
| 19 | #include <asm/uaccess.h> | ||
| 20 | #include <asm/system.h> | ||
| 21 | #include <linux/bitops.h> | ||
| 22 | #include <linux/types.h> | 19 | #include <linux/types.h> | 
| 23 | #include <linux/kernel.h> | 20 | #include <linux/kernel.h> | 
| 24 | #include <linux/sched.h> | ||
| 25 | #include <linux/string.h> | ||
| 26 | #include <linux/mm.h> | ||
| 27 | #include <linux/socket.h> | ||
| 28 | #include <linux/sockios.h> | ||
| 29 | #include <linux/in.h> | ||
| 30 | #include <linux/errno.h> | ||
| 31 | #include <linux/interrupt.h> | ||
| 32 | #include <linux/if_ether.h> | ||
| 33 | #include <linux/inet.h> | ||
| 34 | #include <linux/netdevice.h> | 21 | #include <linux/netdevice.h> | 
| 35 | #include <linux/etherdevice.h> | ||
| 36 | #include <linux/notifier.h> | ||
| 37 | #include <net/ip.h> | ||
| 38 | #include <net/route.h> | ||
| 39 | #include <linux/skbuff.h> | 22 | #include <linux/skbuff.h> | 
| 40 | #include <net/sock.h> | ||
| 41 | #include <net/pkt_sched.h> | 23 | #include <net/pkt_sched.h> | 
| 42 | #include <net/inet_ecn.h> | 24 | #include <net/inet_ecn.h> | 
| 43 | #include <net/dsfield.h> | 25 | #include <net/red.h> | 
| 44 | 26 | ||
| 45 | 27 | ||
| 46 | /* Random Early Detection (RED) algorithm. | 28 | /* Parameters, settable by user: | 
| 47 | ======================================= | ||
| 48 | |||
| 49 | Source: Sally Floyd and Van Jacobson, "Random Early Detection Gateways | ||
| 50 | for Congestion Avoidance", 1993, IEEE/ACM Transactions on Networking. | ||
| 51 | |||
| 52 | This file codes a "divisionless" version of RED algorithm | ||
| 53 | as written down in Fig.17 of the paper. | ||
| 54 | |||
| 55 | Short description. | ||
| 56 | ------------------ | ||
| 57 | |||
| 58 | When a new packet arrives we calculate the average queue length: | ||
| 59 | |||
| 60 | avg = (1-W)*avg + W*current_queue_len, | ||
| 61 | |||
| 62 | W is the filter time constant (chosen as 2^(-Wlog)), it controls | ||
| 63 | the inertia of the algorithm. To allow larger bursts, W should be | ||
| 64 | decreased. | ||
| 65 | |||
| 66 | if (avg > th_max) -> packet marked (dropped). | ||
| 67 | if (avg < th_min) -> packet passes. | ||
| 68 | if (th_min < avg < th_max) we calculate probability: | ||
| 69 | |||
| 70 | Pb = max_P * (avg - th_min)/(th_max-th_min) | ||
| 71 | |||
| 72 | and mark (drop) packet with this probability. | ||
| 73 | Pb changes from 0 (at avg==th_min) to max_P (avg==th_max). | ||
| 74 | max_P should be small (not 1), usually 0.01..0.02 is good value. | ||
| 75 | |||
| 76 | max_P is chosen as a number, so that max_P/(th_max-th_min) | ||
| 77 | is a negative power of two in order arithmetics to contain | ||
| 78 | only shifts. | ||
| 79 | |||
| 80 | |||
| 81 | Parameters, settable by user: | ||
| 82 | ----------------------------- | 29 | ----------------------------- | 
| 83 | 30 | ||
| 84 | limit - bytes (must be > qth_max + burst) | 31 | limit - bytes (must be > qth_max + burst) | 
| @@ -89,243 +36,93 @@ Short description. | |||
| 89 | arbitrarily high (well, less than ram size) | 36 | arbitrarily high (well, less than ram size) | 
| 90 | Really, this limit will never be reached | 37 | Really, this limit will never be reached | 
| 91 | if RED works correctly. | 38 | if RED works correctly. | 
| 92 | |||
| 93 | qth_min - bytes (should be < qth_max/2) | ||
| 94 | qth_max - bytes (should be at least 2*qth_min and less limit) | ||
| 95 | Wlog - bits (<32) log(1/W). | ||
| 96 | Plog - bits (<32) | ||
| 97 | |||
| 98 | Plog is related to max_P by formula: | ||
| 99 | |||
| 100 | max_P = (qth_max-qth_min)/2^Plog; | ||
| 101 | |||
| 102 | F.e. if qth_max=128K and qth_min=32K, then Plog=22 | ||
| 103 | corresponds to max_P=0.02 | ||
| 104 | |||
| 105 | Scell_log | ||
| 106 | Stab | ||
| 107 | |||
| 108 | Lookup table for log((1-W)^(t/t_ave). | ||
| 109 | |||
| 110 | |||
| 111 | NOTES: | ||
| 112 | |||
| 113 | Upper bound on W. | ||
| 114 | ----------------- | ||
| 115 | |||
| 116 | If you want to allow bursts of L packets of size S, | ||
| 117 | you should choose W: | ||
| 118 | |||
| 119 | L + 1 - th_min/S < (1-(1-W)^L)/W | ||
| 120 | |||
| 121 | th_min/S = 32 th_min/S = 4 | ||
| 122 | |||
| 123 | log(W) L | ||
| 124 | -1 33 | ||
| 125 | -2 35 | ||
| 126 | -3 39 | ||
| 127 | -4 46 | ||
| 128 | -5 57 | ||
| 129 | -6 75 | ||
| 130 | -7 101 | ||
| 131 | -8 135 | ||
| 132 | -9 190 | ||
| 133 | etc. | ||
| 134 | */ | 39 | */ | 
| 135 | 40 | ||
| 136 | struct red_sched_data | 41 | struct red_sched_data | 
| 137 | { | 42 | { | 
| 138 | /* Parameters */ | 43 | u32 limit; /* HARD maximal queue length */ | 
| 139 | u32 limit; /* HARD maximal queue length */ | 44 | unsigned char flags; | 
| 140 | u32 qth_min; /* Min average length threshold: A scaled */ | 45 | struct red_parms parms; | 
| 141 | u32 qth_max; /* Max average length threshold: A scaled */ | 46 | struct red_stats stats; | 
| 142 | u32 Rmask; | ||
| 143 | u32 Scell_max; | ||
| 144 | unsigned char flags; | ||
| 145 | char Wlog; /* log(W) */ | ||
| 146 | char Plog; /* random number bits */ | ||
| 147 | char Scell_log; | ||
| 148 | u8 Stab[256]; | ||
| 149 | |||
| 150 | /* Variables */ | ||
| 151 | unsigned long qave; /* Average queue length: A scaled */ | ||
| 152 | int qcount; /* Packets since last random number generation */ | ||
| 153 | u32 qR; /* Cached random number */ | ||
| 154 | |||
| 155 | psched_time_t qidlestart; /* Start of idle period */ | ||
| 156 | struct tc_red_xstats st; | ||
| 157 | }; | 47 | }; | 
| 158 | 48 | ||
| 159 | static int red_ecn_mark(struct sk_buff *skb) | 49 | static inline int red_use_ecn(struct red_sched_data *q) | 
| 160 | { | 50 | { | 
| 161 | if (skb->nh.raw + 20 > skb->tail) | 51 | return q->flags & TC_RED_ECN; | 
| 162 | return 0; | ||
| 163 | |||
| 164 | switch (skb->protocol) { | ||
| 165 | case __constant_htons(ETH_P_IP): | ||
| 166 | if (INET_ECN_is_not_ect(skb->nh.iph->tos)) | ||
| 167 | return 0; | ||
| 168 | IP_ECN_set_ce(skb->nh.iph); | ||
| 169 | return 1; | ||
| 170 | case __constant_htons(ETH_P_IPV6): | ||
| 171 | if (INET_ECN_is_not_ect(ipv6_get_dsfield(skb->nh.ipv6h))) | ||
| 172 | return 0; | ||
| 173 | IP6_ECN_set_ce(skb->nh.ipv6h); | ||
| 174 | return 1; | ||
| 175 | default: | ||
| 176 | return 0; | ||
| 177 | } | ||
| 178 | } | 52 | } | 
| 179 | 53 | ||
| 180 | static int | 54 | static inline int red_use_harddrop(struct red_sched_data *q) | 
| 181 | red_enqueue(struct sk_buff *skb, struct Qdisc* sch) | 55 | { | 
| 56 | return q->flags & TC_RED_HARDDROP; | ||
| 57 | } | ||
| 58 | |||
| 59 | static int red_enqueue(struct sk_buff *skb, struct Qdisc* sch) | ||
| 182 | { | 60 | { | 
| 183 | struct red_sched_data *q = qdisc_priv(sch); | 61 | struct red_sched_data *q = qdisc_priv(sch); | 
| 184 | 62 | ||
| 185 | psched_time_t now; | 63 | q->parms.qavg = red_calc_qavg(&q->parms, sch->qstats.backlog); | 
| 186 | 64 | ||
| 187 | if (!PSCHED_IS_PASTPERFECT(q->qidlestart)) { | 65 | if (red_is_idling(&q->parms)) | 
| 188 | long us_idle; | 66 | red_end_of_idle_period(&q->parms); | 
| 189 | int shift; | ||
| 190 | 67 | ||
| 191 | PSCHED_GET_TIME(now); | 68 | switch (red_action(&q->parms, q->parms.qavg)) { | 
| 192 | us_idle = PSCHED_TDIFF_SAFE(now, q->qidlestart, q->Scell_max); | 69 | case RED_DONT_MARK: | 
| 193 | PSCHED_SET_PASTPERFECT(q->qidlestart); | 70 | break; | 
| 194 | 71 | ||
| 195 | /* | 72 | case RED_PROB_MARK: | 
| 196 | The problem: ideally, average length queue recalcultion should | 73 | sch->qstats.overlimits++; | 
| 197 | be done over constant clock intervals. This is too expensive, so that | 74 | if (!red_use_ecn(q) || !INET_ECN_set_ce(skb)) { | 
| 198 | the calculation is driven by outgoing packets. | 75 | q->stats.prob_drop++; | 
| 199 | When the queue is idle we have to model this clock by hand. | 76 | goto congestion_drop; | 
| 200 | 77 | } | |
| 201 | SF+VJ proposed to "generate" m = idletime/(average_pkt_size/bandwidth) | ||
| 202 | dummy packets as a burst after idle time, i.e. | ||
| 203 | |||
| 204 | q->qave *= (1-W)^m | ||
| 205 | |||
| 206 | This is an apparently overcomplicated solution (f.e. we have to precompute | ||
| 207 | a table to make this calculation in reasonable time) | ||
| 208 | I believe that a simpler model may be used here, | ||
| 209 | but it is field for experiments. | ||
| 210 | */ | ||
| 211 | shift = q->Stab[us_idle>>q->Scell_log]; | ||
| 212 | |||
| 213 | if (shift) { | ||
| 214 | q->qave >>= shift; | ||
| 215 | } else { | ||
| 216 | /* Approximate initial part of exponent | ||
| 217 | with linear function: | ||
| 218 | (1-W)^m ~= 1-mW + ... | ||
| 219 | |||
| 220 | Seems, it is the best solution to | ||
| 221 | problem of too coarce exponent tabulation. | ||
| 222 | */ | ||
| 223 | |||
| 224 | us_idle = (q->qave * us_idle)>>q->Scell_log; | ||
| 225 | if (us_idle < q->qave/2) | ||
| 226 | q->qave -= us_idle; | ||
| 227 | else | ||
| 228 | q->qave >>= 1; | ||
| 229 | } | ||
| 230 | } else { | ||
| 231 | q->qave += sch->qstats.backlog - (q->qave >> q->Wlog); | ||
| 232 | /* NOTE: | ||
| 233 | q->qave is fixed point number with point at Wlog. | ||
| 234 | The formulae above is equvalent to floating point | ||
| 235 | version: | ||
| 236 | |||
| 237 | qave = qave*(1-W) + sch->qstats.backlog*W; | ||
| 238 | --ANK (980924) | ||
| 239 | */ | ||
| 240 | } | ||
| 241 | 78 | ||
| 242 | if (q->qave < q->qth_min) { | 79 | q->stats.prob_mark++; | 
| 243 | q->qcount = -1; | 80 | break; | 
| 244 | enqueue: | 81 | |
| 245 | if (sch->qstats.backlog + skb->len <= q->limit) { | 82 | case RED_HARD_MARK: | 
| 246 | __skb_queue_tail(&sch->q, skb); | 83 | sch->qstats.overlimits++; | 
| 247 | sch->qstats.backlog += skb->len; | 84 | if (red_use_harddrop(q) || !red_use_ecn(q) || | 
| 248 | sch->bstats.bytes += skb->len; | 85 | !INET_ECN_set_ce(skb)) { | 
| 249 | sch->bstats.packets++; | 86 | q->stats.forced_drop++; | 
| 250 | return NET_XMIT_SUCCESS; | 87 | goto congestion_drop; | 
| 251 | } else { | 88 | } | 
| 252 | q->st.pdrop++; | ||
| 253 | } | ||
| 254 | kfree_skb(skb); | ||
| 255 | sch->qstats.drops++; | ||
| 256 | return NET_XMIT_DROP; | ||
| 257 | } | ||
| 258 | if (q->qave >= q->qth_max) { | ||
| 259 | q->qcount = -1; | ||
| 260 | sch->qstats.overlimits++; | ||
| 261 | mark: | ||
| 262 | if (!(q->flags&TC_RED_ECN) || !red_ecn_mark(skb)) { | ||
| 263 | q->st.early++; | ||
| 264 | goto drop; | ||
| 265 | } | ||
| 266 | q->st.marked++; | ||
| 267 | goto enqueue; | ||
| 268 | } | ||
| 269 | 89 | ||
| 270 | if (++q->qcount) { | 90 | q->stats.forced_mark++; | 
| 271 | /* The formula used below causes questions. | 91 | break; | 
| 272 | |||
| 273 | OK. qR is random number in the interval 0..Rmask | ||
| 274 | i.e. 0..(2^Plog). If we used floating point | ||
| 275 | arithmetics, it would be: (2^Plog)*rnd_num, | ||
| 276 | where rnd_num is less 1. | ||
| 277 | |||
| 278 | Taking into account, that qave have fixed | ||
| 279 | point at Wlog, and Plog is related to max_P by | ||
| 280 | max_P = (qth_max-qth_min)/2^Plog; two lines | ||
| 281 | below have the following floating point equivalent: | ||
| 282 | |||
| 283 | max_P*(qave - qth_min)/(qth_max-qth_min) < rnd/qcount | ||
| 284 | |||
| 285 | Any questions? --ANK (980924) | ||
| 286 | */ | ||
| 287 | if (((q->qave - q->qth_min)>>q->Wlog)*q->qcount < q->qR) | ||
| 288 | goto enqueue; | ||
| 289 | q->qcount = 0; | ||
| 290 | q->qR = net_random()&q->Rmask; | ||
| 291 | sch->qstats.overlimits++; | ||
| 292 | goto mark; | ||
| 293 | } | 92 | } | 
| 294 | q->qR = net_random()&q->Rmask; | ||
| 295 | goto enqueue; | ||
| 296 | 93 | ||
| 297 | drop: | 94 | if (sch->qstats.backlog + skb->len <= q->limit) | 
| 298 | kfree_skb(skb); | 95 | return qdisc_enqueue_tail(skb, sch); | 
| 299 | sch->qstats.drops++; | 96 | |
| 97 | q->stats.pdrop++; | ||
| 98 | return qdisc_drop(skb, sch); | ||
| 99 | |||
| 100 | congestion_drop: | ||
| 101 | qdisc_drop(skb, sch); | ||
| 300 | return NET_XMIT_CN; | 102 | return NET_XMIT_CN; | 
| 301 | } | 103 | } | 
| 302 | 104 | ||
| 303 | static int | 105 | static int red_requeue(struct sk_buff *skb, struct Qdisc* sch) | 
| 304 | red_requeue(struct sk_buff *skb, struct Qdisc* sch) | ||
| 305 | { | 106 | { | 
| 306 | struct red_sched_data *q = qdisc_priv(sch); | 107 | struct red_sched_data *q = qdisc_priv(sch); | 
| 307 | 108 | ||
| 308 | PSCHED_SET_PASTPERFECT(q->qidlestart); | 109 | if (red_is_idling(&q->parms)) | 
| 110 | red_end_of_idle_period(&q->parms); | ||
| 309 | 111 | ||
| 310 | __skb_queue_head(&sch->q, skb); | 112 | return qdisc_requeue(skb, sch); | 
| 311 | sch->qstats.backlog += skb->len; | ||
| 312 | sch->qstats.requeues++; | ||
| 313 | return 0; | ||
| 314 | } | 113 | } | 
| 315 | 114 | ||
| 316 | static struct sk_buff * | 115 | static struct sk_buff * red_dequeue(struct Qdisc* sch) | 
| 317 | red_dequeue(struct Qdisc* sch) | ||
| 318 | { | 116 | { | 
| 319 | struct sk_buff *skb; | 117 | struct sk_buff *skb; | 
| 320 | struct red_sched_data *q = qdisc_priv(sch); | 118 | struct red_sched_data *q = qdisc_priv(sch); | 
| 321 | 119 | ||
| 322 | skb = __skb_dequeue(&sch->q); | 120 | skb = qdisc_dequeue_head(sch); | 
| 323 | if (skb) { | 121 | |
| 324 | sch->qstats.backlog -= skb->len; | 122 | if (skb == NULL && !red_is_idling(&q->parms)) | 
| 325 | return skb; | 123 | red_start_of_idle_period(&q->parms); | 
| 326 | } | 124 | |
| 327 | PSCHED_GET_TIME(q->qidlestart); | 125 | return skb; | 
| 328 | return NULL; | ||
| 329 | } | 126 | } | 
| 330 | 127 | ||
| 331 | static unsigned int red_drop(struct Qdisc* sch) | 128 | static unsigned int red_drop(struct Qdisc* sch) | 
| @@ -333,16 +130,17 @@ static unsigned int red_drop(struct Qdisc* sch) | |||
| 333 | struct sk_buff *skb; | 130 | struct sk_buff *skb; | 
| 334 | struct red_sched_data *q = qdisc_priv(sch); | 131 | struct red_sched_data *q = qdisc_priv(sch); | 
| 335 | 132 | ||
| 336 | skb = __skb_dequeue_tail(&sch->q); | 133 | skb = qdisc_dequeue_tail(sch); | 
| 337 | if (skb) { | 134 | if (skb) { | 
| 338 | unsigned int len = skb->len; | 135 | unsigned int len = skb->len; | 
| 339 | sch->qstats.backlog -= len; | 136 | q->stats.other++; | 
| 340 | sch->qstats.drops++; | 137 | qdisc_drop(skb, sch); | 
| 341 | q->st.other++; | ||
| 342 | kfree_skb(skb); | ||
| 343 | return len; | 138 | return len; | 
| 344 | } | 139 | } | 
| 345 | PSCHED_GET_TIME(q->qidlestart); | 140 | |
| 141 | if (!red_is_idling(&q->parms)) | ||
| 142 | red_start_of_idle_period(&q->parms); | ||
| 143 | |||
| 346 | return 0; | 144 | return 0; | 
| 347 | } | 145 | } | 
| 348 | 146 | ||
| @@ -350,43 +148,38 @@ static void red_reset(struct Qdisc* sch) | |||
| 350 | { | 148 | { | 
| 351 | struct red_sched_data *q = qdisc_priv(sch); | 149 | struct red_sched_data *q = qdisc_priv(sch); | 
| 352 | 150 | ||
| 353 | __skb_queue_purge(&sch->q); | 151 | qdisc_reset_queue(sch); | 
| 354 | sch->qstats.backlog = 0; | 152 | red_restart(&q->parms); | 
| 355 | PSCHED_SET_PASTPERFECT(q->qidlestart); | ||
| 356 | q->qave = 0; | ||
| 357 | q->qcount = -1; | ||
| 358 | } | 153 | } | 
| 359 | 154 | ||
| 360 | static int red_change(struct Qdisc *sch, struct rtattr *opt) | 155 | static int red_change(struct Qdisc *sch, struct rtattr *opt) | 
| 361 | { | 156 | { | 
| 362 | struct red_sched_data *q = qdisc_priv(sch); | 157 | struct red_sched_data *q = qdisc_priv(sch); | 
| 363 | struct rtattr *tb[TCA_RED_STAB]; | 158 | struct rtattr *tb[TCA_RED_MAX]; | 
| 364 | struct tc_red_qopt *ctl; | 159 | struct tc_red_qopt *ctl; | 
| 365 | 160 | ||
| 366 | if (opt == NULL || | 161 | if (opt == NULL || rtattr_parse_nested(tb, TCA_RED_MAX, opt)) | 
| 367 | rtattr_parse_nested(tb, TCA_RED_STAB, opt) || | 162 | return -EINVAL; | 
| 368 | tb[TCA_RED_PARMS-1] == 0 || tb[TCA_RED_STAB-1] == 0 || | 163 | |
| 164 | if (tb[TCA_RED_PARMS-1] == NULL || | ||
| 369 | RTA_PAYLOAD(tb[TCA_RED_PARMS-1]) < sizeof(*ctl) || | 165 | RTA_PAYLOAD(tb[TCA_RED_PARMS-1]) < sizeof(*ctl) || | 
| 370 | RTA_PAYLOAD(tb[TCA_RED_STAB-1]) < 256) | 166 | tb[TCA_RED_STAB-1] == NULL || | 
| 167 | RTA_PAYLOAD(tb[TCA_RED_STAB-1]) < RED_STAB_SIZE) | ||
| 371 | return -EINVAL; | 168 | return -EINVAL; | 
| 372 | 169 | ||
| 373 | ctl = RTA_DATA(tb[TCA_RED_PARMS-1]); | 170 | ctl = RTA_DATA(tb[TCA_RED_PARMS-1]); | 
| 374 | 171 | ||
| 375 | sch_tree_lock(sch); | 172 | sch_tree_lock(sch); | 
| 376 | q->flags = ctl->flags; | 173 | q->flags = ctl->flags; | 
| 377 | q->Wlog = ctl->Wlog; | ||
| 378 | q->Plog = ctl->Plog; | ||
| 379 | q->Rmask = ctl->Plog < 32 ? ((1<<ctl->Plog) - 1) : ~0UL; | ||
| 380 | q->Scell_log = ctl->Scell_log; | ||
| 381 | q->Scell_max = (255<<q->Scell_log); | ||
| 382 | q->qth_min = ctl->qth_min<<ctl->Wlog; | ||
| 383 | q->qth_max = ctl->qth_max<<ctl->Wlog; | ||
| 384 | q->limit = ctl->limit; | 174 | q->limit = ctl->limit; | 
| 385 | memcpy(q->Stab, RTA_DATA(tb[TCA_RED_STAB-1]), 256); | ||
| 386 | 175 | ||
| 387 | q->qcount = -1; | 176 | red_set_parms(&q->parms, ctl->qth_min, ctl->qth_max, ctl->Wlog, | 
| 177 | ctl->Plog, ctl->Scell_log, | ||
| 178 | RTA_DATA(tb[TCA_RED_STAB-1])); | ||
| 179 | |||
| 388 | if (skb_queue_empty(&sch->q)) | 180 | if (skb_queue_empty(&sch->q)) | 
| 389 | PSCHED_SET_PASTPERFECT(q->qidlestart); | 181 | red_end_of_idle_period(&q->parms); | 
| 182 | |||
| 390 | sch_tree_unlock(sch); | 183 | sch_tree_unlock(sch); | 
| 391 | return 0; | 184 | return 0; | 
| 392 | } | 185 | } | 
| @@ -399,39 +192,39 @@ static int red_init(struct Qdisc* sch, struct rtattr *opt) | |||
| 399 | static int red_dump(struct Qdisc *sch, struct sk_buff *skb) | 192 | static int red_dump(struct Qdisc *sch, struct sk_buff *skb) | 
| 400 | { | 193 | { | 
| 401 | struct red_sched_data *q = qdisc_priv(sch); | 194 | struct red_sched_data *q = qdisc_priv(sch); | 
| 402 | unsigned char *b = skb->tail; | 195 | struct rtattr *opts = NULL; | 
| 403 | struct rtattr *rta; | 196 | struct tc_red_qopt opt = { | 
| 404 | struct tc_red_qopt opt; | 197 | .limit = q->limit, | 
| 405 | 198 | .flags = q->flags, | |
| 406 | rta = (struct rtattr*)b; | 199 | .qth_min = q->parms.qth_min >> q->parms.Wlog, | 
| 407 | RTA_PUT(skb, TCA_OPTIONS, 0, NULL); | 200 | .qth_max = q->parms.qth_max >> q->parms.Wlog, | 
| 408 | opt.limit = q->limit; | 201 | .Wlog = q->parms.Wlog, | 
| 409 | opt.qth_min = q->qth_min>>q->Wlog; | 202 | .Plog = q->parms.Plog, | 
| 410 | opt.qth_max = q->qth_max>>q->Wlog; | 203 | .Scell_log = q->parms.Scell_log, | 
| 411 | opt.Wlog = q->Wlog; | 204 | }; | 
| 412 | opt.Plog = q->Plog; | 205 | |
| 413 | opt.Scell_log = q->Scell_log; | 206 | opts = RTA_NEST(skb, TCA_OPTIONS); | 
| 414 | opt.flags = q->flags; | ||
| 415 | RTA_PUT(skb, TCA_RED_PARMS, sizeof(opt), &opt); | 207 | RTA_PUT(skb, TCA_RED_PARMS, sizeof(opt), &opt); | 
| 416 | rta->rta_len = skb->tail - b; | 208 | return RTA_NEST_END(skb, opts); | 
| 417 | |||
| 418 | return skb->len; | ||
| 419 | 209 | ||
| 420 | rtattr_failure: | 210 | rtattr_failure: | 
| 421 | skb_trim(skb, b - skb->data); | 211 | return RTA_NEST_CANCEL(skb, opts); | 
| 422 | return -1; | ||
| 423 | } | 212 | } | 
| 424 | 213 | ||
| 425 | static int red_dump_stats(struct Qdisc *sch, struct gnet_dump *d) | 214 | static int red_dump_stats(struct Qdisc *sch, struct gnet_dump *d) | 
| 426 | { | 215 | { | 
| 427 | struct red_sched_data *q = qdisc_priv(sch); | 216 | struct red_sched_data *q = qdisc_priv(sch); | 
| 428 | 217 | struct tc_red_xstats st = { | |
| 429 | return gnet_stats_copy_app(d, &q->st, sizeof(q->st)); | 218 | .early = q->stats.prob_drop + q->stats.forced_drop, | 
| 219 | .pdrop = q->stats.pdrop, | ||
| 220 | .other = q->stats.other, | ||
| 221 | .marked = q->stats.prob_mark + q->stats.forced_mark, | ||
| 222 | }; | ||
| 223 | |||
| 224 | return gnet_stats_copy_app(d, &st, sizeof(st)); | ||
| 430 | } | 225 | } | 
| 431 | 226 | ||
| 432 | static struct Qdisc_ops red_qdisc_ops = { | 227 | static struct Qdisc_ops red_qdisc_ops = { | 
| 433 | .next = NULL, | ||
| 434 | .cl_ops = NULL, | ||
| 435 | .id = "red", | 228 | .id = "red", | 
| 436 | .priv_size = sizeof(struct red_sched_data), | 229 | .priv_size = sizeof(struct red_sched_data), | 
| 437 | .enqueue = red_enqueue, | 230 | .enqueue = red_enqueue, | 
| @@ -450,10 +243,13 @@ static int __init red_module_init(void) | |||
| 450 | { | 243 | { | 
| 451 | return register_qdisc(&red_qdisc_ops); | 244 | return register_qdisc(&red_qdisc_ops); | 
| 452 | } | 245 | } | 
| 453 | static void __exit red_module_exit(void) | 246 | |
| 247 | static void __exit red_module_exit(void) | ||
| 454 | { | 248 | { | 
| 455 | unregister_qdisc(&red_qdisc_ops); | 249 | unregister_qdisc(&red_qdisc_ops); | 
| 456 | } | 250 | } | 
| 251 | |||
| 457 | module_init(red_module_init) | 252 | module_init(red_module_init) | 
| 458 | module_exit(red_module_exit) | 253 | module_exit(red_module_exit) | 
| 254 | |||
| 459 | MODULE_LICENSE("GPL"); | 255 | MODULE_LICENSE("GPL"); | 
diff --git a/net/sctp/associola.c b/net/sctp/associola.c index 12b0f582a66b..8c8ddf7f9b61 100644 --- a/net/sctp/associola.c +++ b/net/sctp/associola.c  | |||
| @@ -344,9 +344,7 @@ void sctp_association_free(struct sctp_association *asoc) | |||
| 344 | } | 344 | } | 
| 345 | 345 | ||
| 346 | /* Free peer's cached cookie. */ | 346 | /* Free peer's cached cookie. */ | 
| 347 | if (asoc->peer.cookie) { | 347 | kfree(asoc->peer.cookie); | 
| 348 | kfree(asoc->peer.cookie); | ||
| 349 | } | ||
| 350 | 348 | ||
| 351 | /* Release the transport structures. */ | 349 | /* Release the transport structures. */ | 
| 352 | list_for_each_safe(pos, temp, &asoc->peer.transport_addr_list) { | 350 | list_for_each_safe(pos, temp, &asoc->peer.transport_addr_list) { | 
diff --git a/net/sctp/sm_make_chunk.c b/net/sctp/sm_make_chunk.c index 660c61bdf164..f9573eba5c7a 100644 --- a/net/sctp/sm_make_chunk.c +++ b/net/sctp/sm_make_chunk.c  | |||
| @@ -254,8 +254,7 @@ struct sctp_chunk *sctp_make_init(const struct sctp_association *asoc, | |||
| 254 | aiparam.adaption_ind = htonl(sp->adaption_ind); | 254 | aiparam.adaption_ind = htonl(sp->adaption_ind); | 
| 255 | sctp_addto_chunk(retval, sizeof(aiparam), &aiparam); | 255 | sctp_addto_chunk(retval, sizeof(aiparam), &aiparam); | 
| 256 | nodata: | 256 | nodata: | 
| 257 | if (addrs.v) | 257 | kfree(addrs.v); | 
| 258 | kfree(addrs.v); | ||
| 259 | return retval; | 258 | return retval; | 
| 260 | } | 259 | } | 
| 261 | 260 | ||
| @@ -347,8 +346,7 @@ struct sctp_chunk *sctp_make_init_ack(const struct sctp_association *asoc, | |||
| 347 | nomem_chunk: | 346 | nomem_chunk: | 
| 348 | kfree(cookie); | 347 | kfree(cookie); | 
| 349 | nomem_cookie: | 348 | nomem_cookie: | 
| 350 | if (addrs.v) | 349 | kfree(addrs.v); | 
| 351 | kfree(addrs.v); | ||
| 352 | return retval; | 350 | return retval; | 
| 353 | } | 351 | } | 
| 354 | 352 | ||
diff --git a/net/sunrpc/auth_gss/gss_krb5_seal.c b/net/sunrpc/auth_gss/gss_krb5_seal.c index 13f8ae979454..d0dfdfd5e79e 100644 --- a/net/sunrpc/auth_gss/gss_krb5_seal.c +++ b/net/sunrpc/auth_gss/gss_krb5_seal.c  | |||
| @@ -143,6 +143,6 @@ gss_get_mic_kerberos(struct gss_ctx *gss_ctx, struct xdr_buf *text, | |||
| 143 | 143 | ||
| 144 | return ((ctx->endtime < now) ? GSS_S_CONTEXT_EXPIRED : GSS_S_COMPLETE); | 144 | return ((ctx->endtime < now) ? GSS_S_CONTEXT_EXPIRED : GSS_S_COMPLETE); | 
| 145 | out_err: | 145 | out_err: | 
| 146 | if (md5cksum.data) kfree(md5cksum.data); | 146 | kfree(md5cksum.data); | 
| 147 | return GSS_S_FAILURE; | 147 | return GSS_S_FAILURE; | 
| 148 | } | 148 | } | 
diff --git a/net/sunrpc/auth_gss/gss_krb5_unseal.c b/net/sunrpc/auth_gss/gss_krb5_unseal.c index 2030475d98ed..db055fd7d778 100644 --- a/net/sunrpc/auth_gss/gss_krb5_unseal.c +++ b/net/sunrpc/auth_gss/gss_krb5_unseal.c  | |||
| @@ -176,6 +176,6 @@ gss_verify_mic_kerberos(struct gss_ctx *gss_ctx, | |||
| 176 | 176 | ||
| 177 | ret = GSS_S_COMPLETE; | 177 | ret = GSS_S_COMPLETE; | 
| 178 | out: | 178 | out: | 
| 179 | if (md5cksum.data) kfree(md5cksum.data); | 179 | kfree(md5cksum.data); | 
| 180 | return ret; | 180 | return ret; | 
| 181 | } | 181 | } | 
diff --git a/net/sunrpc/auth_gss/gss_mech_switch.c b/net/sunrpc/auth_gss/gss_mech_switch.c index b048bf672da2..f8bac6ccd524 100644 --- a/net/sunrpc/auth_gss/gss_mech_switch.c +++ b/net/sunrpc/auth_gss/gss_mech_switch.c  | |||
| @@ -60,8 +60,7 @@ gss_mech_free(struct gss_api_mech *gm) | |||
| 60 | 60 | ||
| 61 | for (i = 0; i < gm->gm_pf_num; i++) { | 61 | for (i = 0; i < gm->gm_pf_num; i++) { | 
| 62 | pf = &gm->gm_pfs[i]; | 62 | pf = &gm->gm_pfs[i]; | 
| 63 | if (pf->auth_domain_name) | 63 | kfree(pf->auth_domain_name); | 
| 64 | kfree(pf->auth_domain_name); | ||
| 65 | pf->auth_domain_name = NULL; | 64 | pf->auth_domain_name = NULL; | 
| 66 | } | 65 | } | 
| 67 | } | 66 | } | 
diff --git a/net/sunrpc/auth_gss/gss_spkm3_seal.c b/net/sunrpc/auth_gss/gss_spkm3_seal.c index 148201e929d0..d1e12b25d6e2 100644 --- a/net/sunrpc/auth_gss/gss_spkm3_seal.c +++ b/net/sunrpc/auth_gss/gss_spkm3_seal.c  | |||
| @@ -122,8 +122,7 @@ spkm3_make_token(struct spkm3_ctx *ctx, | |||
| 122 | 122 | ||
| 123 | return GSS_S_COMPLETE; | 123 | return GSS_S_COMPLETE; | 
| 124 | out_err: | 124 | out_err: | 
| 125 | if (md5cksum.data) | 125 | kfree(md5cksum.data); | 
| 126 | kfree(md5cksum.data); | ||
| 127 | token->data = NULL; | 126 | token->data = NULL; | 
| 128 | token->len = 0; | 127 | token->len = 0; | 
| 129 | return GSS_S_FAILURE; | 128 | return GSS_S_FAILURE; | 
diff --git a/net/sunrpc/auth_gss/gss_spkm3_token.c b/net/sunrpc/auth_gss/gss_spkm3_token.c index 46c08a0710f6..1f824578d773 100644 --- a/net/sunrpc/auth_gss/gss_spkm3_token.c +++ b/net/sunrpc/auth_gss/gss_spkm3_token.c  | |||
| @@ -259,8 +259,7 @@ spkm3_verify_mic_token(unsigned char **tokp, int *mic_hdrlen, unsigned char **ck | |||
| 259 | 259 | ||
| 260 | ret = GSS_S_COMPLETE; | 260 | ret = GSS_S_COMPLETE; | 
| 261 | out: | 261 | out: | 
| 262 | if (spkm3_ctx_id.data) | 262 | kfree(spkm3_ctx_id.data); | 
| 263 | kfree(spkm3_ctx_id.data); | ||
| 264 | return ret; | 263 | return ret; | 
| 265 | } | 264 | } | 
| 266 | 265 | ||
diff --git a/net/sunrpc/auth_gss/gss_spkm3_unseal.c b/net/sunrpc/auth_gss/gss_spkm3_unseal.c index c3c0d9586103..241d5b30dfcb 100644 --- a/net/sunrpc/auth_gss/gss_spkm3_unseal.c +++ b/net/sunrpc/auth_gss/gss_spkm3_unseal.c  | |||
| @@ -120,9 +120,7 @@ spkm3_read_token(struct spkm3_ctx *ctx, | |||
| 120 | /* XXX: need to add expiration and sequencing */ | 120 | /* XXX: need to add expiration and sequencing */ | 
| 121 | ret = GSS_S_COMPLETE; | 121 | ret = GSS_S_COMPLETE; | 
| 122 | out: | 122 | out: | 
| 123 | if (md5cksum.data) | 123 | kfree(md5cksum.data); | 
| 124 | kfree(md5cksum.data); | 124 | kfree(wire_cksum.data); | 
| 125 | if (wire_cksum.data) | ||
| 126 | kfree(wire_cksum.data); | ||
| 127 | return ret; | 125 | return ret; | 
| 128 | } | 126 | } | 
diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c index 702ede309b06..61c3abeaccae 100644 --- a/net/sunrpc/clnt.c +++ b/net/sunrpc/clnt.c  | |||
| @@ -55,6 +55,7 @@ static void call_bind(struct rpc_task *task); | |||
| 55 | static void call_bind_status(struct rpc_task *task); | 55 | static void call_bind_status(struct rpc_task *task); | 
| 56 | static void call_transmit(struct rpc_task *task); | 56 | static void call_transmit(struct rpc_task *task); | 
| 57 | static void call_status(struct rpc_task *task); | 57 | static void call_status(struct rpc_task *task); | 
| 58 | static void call_transmit_status(struct rpc_task *task); | ||
| 58 | static void call_refresh(struct rpc_task *task); | 59 | static void call_refresh(struct rpc_task *task); | 
| 59 | static void call_refreshresult(struct rpc_task *task); | 60 | static void call_refreshresult(struct rpc_task *task); | 
| 60 | static void call_timeout(struct rpc_task *task); | 61 | static void call_timeout(struct rpc_task *task); | 
| @@ -672,6 +673,18 @@ call_allocate(struct rpc_task *task) | |||
| 672 | rpc_exit(task, -ERESTARTSYS); | 673 | rpc_exit(task, -ERESTARTSYS); | 
| 673 | } | 674 | } | 
| 674 | 675 | ||
| 676 | static inline int | ||
| 677 | rpc_task_need_encode(struct rpc_task *task) | ||
| 678 | { | ||
| 679 | return task->tk_rqstp->rq_snd_buf.len == 0; | ||
| 680 | } | ||
| 681 | |||
| 682 | static inline void | ||
| 683 | rpc_task_force_reencode(struct rpc_task *task) | ||
| 684 | { | ||
| 685 | task->tk_rqstp->rq_snd_buf.len = 0; | ||
| 686 | } | ||
| 687 | |||
| 675 | /* | 688 | /* | 
| 676 | * 3. Encode arguments of an RPC call | 689 | * 3. Encode arguments of an RPC call | 
| 677 | */ | 690 | */ | 
| @@ -867,12 +880,14 @@ call_transmit(struct rpc_task *task) | |||
| 867 | if (task->tk_status != 0) | 880 | if (task->tk_status != 0) | 
| 868 | return; | 881 | return; | 
| 869 | /* Encode here so that rpcsec_gss can use correct sequence number. */ | 882 | /* Encode here so that rpcsec_gss can use correct sequence number. */ | 
| 870 | if (task->tk_rqstp->rq_bytes_sent == 0) { | 883 | if (rpc_task_need_encode(task)) { | 
| 884 | task->tk_rqstp->rq_bytes_sent = 0; | ||
| 871 | call_encode(task); | 885 | call_encode(task); | 
| 872 | /* Did the encode result in an error condition? */ | 886 | /* Did the encode result in an error condition? */ | 
| 873 | if (task->tk_status != 0) | 887 | if (task->tk_status != 0) | 
| 874 | goto out_nosend; | 888 | goto out_nosend; | 
| 875 | } | 889 | } | 
| 890 | task->tk_action = call_transmit_status; | ||
| 876 | xprt_transmit(task); | 891 | xprt_transmit(task); | 
| 877 | if (task->tk_status < 0) | 892 | if (task->tk_status < 0) | 
| 878 | return; | 893 | return; | 
| @@ -884,6 +899,7 @@ call_transmit(struct rpc_task *task) | |||
| 884 | out_nosend: | 899 | out_nosend: | 
| 885 | /* release socket write lock before attempting to handle error */ | 900 | /* release socket write lock before attempting to handle error */ | 
| 886 | xprt_abort_transmit(task); | 901 | xprt_abort_transmit(task); | 
| 902 | rpc_task_force_reencode(task); | ||
| 887 | } | 903 | } | 
| 888 | 904 | ||
| 889 | /* | 905 | /* | 
| @@ -915,7 +931,6 @@ call_status(struct rpc_task *task) | |||
| 915 | break; | 931 | break; | 
| 916 | case -ECONNREFUSED: | 932 | case -ECONNREFUSED: | 
| 917 | case -ENOTCONN: | 933 | case -ENOTCONN: | 
| 918 | req->rq_bytes_sent = 0; | ||
| 919 | if (clnt->cl_autobind) | 934 | if (clnt->cl_autobind) | 
| 920 | clnt->cl_port = 0; | 935 | clnt->cl_port = 0; | 
| 921 | task->tk_action = call_bind; | 936 | task->tk_action = call_bind; | 
| @@ -937,7 +952,18 @@ call_status(struct rpc_task *task) | |||
| 937 | } | 952 | } | 
| 938 | 953 | ||
| 939 | /* | 954 | /* | 
| 940 | * 6a. Handle RPC timeout | 955 | * 6a. Handle transmission errors. | 
| 956 | */ | ||
| 957 | static void | ||
| 958 | call_transmit_status(struct rpc_task *task) | ||
| 959 | { | ||
| 960 | if (task->tk_status != -EAGAIN) | ||
| 961 | rpc_task_force_reencode(task); | ||
| 962 | call_status(task); | ||
| 963 | } | ||
| 964 | |||
| 965 | /* | ||
| 966 | * 6b. Handle RPC timeout | ||
| 941 | * We do not release the request slot, so we keep using the | 967 | * We do not release the request slot, so we keep using the | 
| 942 | * same XID for all retransmits. | 968 | * same XID for all retransmits. | 
| 943 | */ | 969 | */ | 
diff --git a/net/sunrpc/rpc_pipe.c b/net/sunrpc/rpc_pipe.c index 4f188d0a5d11..81e00a6c19de 100644 --- a/net/sunrpc/rpc_pipe.c +++ b/net/sunrpc/rpc_pipe.c  | |||
| @@ -603,7 +603,7 @@ rpc_lookup_negative(char *path, struct nameidata *nd) | |||
| 603 | return ERR_PTR(error); | 603 | return ERR_PTR(error); | 
| 604 | dir = nd->dentry->d_inode; | 604 | dir = nd->dentry->d_inode; | 
| 605 | down(&dir->i_sem); | 605 | down(&dir->i_sem); | 
| 606 | dentry = lookup_hash(&nd->last, nd->dentry); | 606 | dentry = lookup_hash(nd); | 
| 607 | if (IS_ERR(dentry)) | 607 | if (IS_ERR(dentry)) | 
| 608 | goto out_err; | 608 | goto out_err; | 
| 609 | if (dentry->d_inode) { | 609 | if (dentry->d_inode) { | 
| @@ -665,7 +665,7 @@ rpc_rmdir(char *path) | |||
| 665 | return error; | 665 | return error; | 
| 666 | dir = nd.dentry->d_inode; | 666 | dir = nd.dentry->d_inode; | 
| 667 | down(&dir->i_sem); | 667 | down(&dir->i_sem); | 
| 668 | dentry = lookup_hash(&nd.last, nd.dentry); | 668 | dentry = lookup_hash(&nd); | 
| 669 | if (IS_ERR(dentry)) { | 669 | if (IS_ERR(dentry)) { | 
| 670 | error = PTR_ERR(dentry); | 670 | error = PTR_ERR(dentry); | 
| 671 | goto out_release; | 671 | goto out_release; | 
| @@ -726,7 +726,7 @@ rpc_unlink(char *path) | |||
| 726 | return error; | 726 | return error; | 
| 727 | dir = nd.dentry->d_inode; | 727 | dir = nd.dentry->d_inode; | 
| 728 | down(&dir->i_sem); | 728 | down(&dir->i_sem); | 
| 729 | dentry = lookup_hash(&nd.last, nd.dentry); | 729 | dentry = lookup_hash(&nd); | 
| 730 | if (IS_ERR(dentry)) { | 730 | if (IS_ERR(dentry)) { | 
| 731 | error = PTR_ERR(dentry); | 731 | error = PTR_ERR(dentry); | 
| 732 | goto out_release; | 732 | goto out_release; | 
diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c index e9bd91265f70..e4296c8b861e 100644 --- a/net/sunrpc/svc.c +++ b/net/sunrpc/svc.c  | |||
| @@ -196,12 +196,9 @@ svc_exit_thread(struct svc_rqst *rqstp) | |||
| 196 | struct svc_serv *serv = rqstp->rq_server; | 196 | struct svc_serv *serv = rqstp->rq_server; | 
| 197 | 197 | ||
| 198 | svc_release_buffer(rqstp); | 198 | svc_release_buffer(rqstp); | 
| 199 | if (rqstp->rq_resp) | 199 | kfree(rqstp->rq_resp); | 
| 200 | kfree(rqstp->rq_resp); | 200 | kfree(rqstp->rq_argp); | 
| 201 | if (rqstp->rq_argp) | 201 | kfree(rqstp->rq_auth_data); | 
| 202 | kfree(rqstp->rq_argp); | ||
| 203 | if (rqstp->rq_auth_data) | ||
| 204 | kfree(rqstp->rq_auth_data); | ||
| 205 | kfree(rqstp); | 202 | kfree(rqstp); | 
| 206 | 203 | ||
| 207 | /* Release the server */ | 204 | /* Release the server */ | 
| @@ -313,6 +310,11 @@ svc_process(struct svc_serv *serv, struct svc_rqst *rqstp) | |||
| 313 | rqstp->rq_proc = proc = ntohl(svc_getu32(argv)); /* procedure number */ | 310 | rqstp->rq_proc = proc = ntohl(svc_getu32(argv)); /* procedure number */ | 
| 314 | 311 | ||
| 315 | progp = serv->sv_program; | 312 | progp = serv->sv_program; | 
| 313 | |||
| 314 | for (progp = serv->sv_program; progp; progp = progp->pg_next) | ||
| 315 | if (prog == progp->pg_prog) | ||
| 316 | break; | ||
| 317 | |||
| 316 | /* | 318 | /* | 
| 317 | * Decode auth data, and add verifier to reply buffer. | 319 | * Decode auth data, and add verifier to reply buffer. | 
| 318 | * We do this before anything else in order to get a decent | 320 | * We do this before anything else in order to get a decent | 
| @@ -320,7 +322,7 @@ svc_process(struct svc_serv *serv, struct svc_rqst *rqstp) | |||
| 320 | */ | 322 | */ | 
| 321 | auth_res = svc_authenticate(rqstp, &auth_stat); | 323 | auth_res = svc_authenticate(rqstp, &auth_stat); | 
| 322 | /* Also give the program a chance to reject this call: */ | 324 | /* Also give the program a chance to reject this call: */ | 
| 323 | if (auth_res == SVC_OK) { | 325 | if (auth_res == SVC_OK && progp) { | 
| 324 | auth_stat = rpc_autherr_badcred; | 326 | auth_stat = rpc_autherr_badcred; | 
| 325 | auth_res = progp->pg_authenticate(rqstp); | 327 | auth_res = progp->pg_authenticate(rqstp); | 
| 326 | } | 328 | } | 
| @@ -340,10 +342,7 @@ svc_process(struct svc_serv *serv, struct svc_rqst *rqstp) | |||
| 340 | case SVC_COMPLETE: | 342 | case SVC_COMPLETE: | 
| 341 | goto sendit; | 343 | goto sendit; | 
| 342 | } | 344 | } | 
| 343 | 345 | ||
| 344 | for (progp = serv->sv_program; progp; progp = progp->pg_next) | ||
| 345 | if (prog == progp->pg_prog) | ||
| 346 | break; | ||
| 347 | if (progp == NULL) | 346 | if (progp == NULL) | 
| 348 | goto err_bad_prog; | 347 | goto err_bad_prog; | 
| 349 | 348 | ||
diff --git a/net/sunrpc/xdr.c b/net/sunrpc/xdr.c index 32df43372ee9..aaf08cdd19f0 100644 --- a/net/sunrpc/xdr.c +++ b/net/sunrpc/xdr.c  | |||
| @@ -992,8 +992,7 @@ xdr_xcode_array2(struct xdr_buf *buf, unsigned int base, | |||
| 992 | err = 0; | 992 | err = 0; | 
| 993 | 993 | ||
| 994 | out: | 994 | out: | 
| 995 | if (elem) | 995 | kfree(elem); | 
| 996 | kfree(elem); | ||
| 997 | if (ppages) | 996 | if (ppages) | 
| 998 | kunmap(*ppages); | 997 | kunmap(*ppages); | 
| 999 | return err; | 998 | return err; | 
diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index 41feca3bef86..acc73ba8bade 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c  | |||
| @@ -676,7 +676,7 @@ static struct sock *unix_find_other(struct sockaddr_un *sunname, int len, | |||
| 676 | err = path_lookup(sunname->sun_path, LOOKUP_FOLLOW, &nd); | 676 | err = path_lookup(sunname->sun_path, LOOKUP_FOLLOW, &nd); | 
| 677 | if (err) | 677 | if (err) | 
| 678 | goto fail; | 678 | goto fail; | 
| 679 | err = permission(nd.dentry->d_inode,MAY_WRITE, &nd); | 679 | err = vfs_permission(&nd, MAY_WRITE); | 
| 680 | if (err) | 680 | if (err) | 
| 681 | goto put_fail; | 681 | goto put_fail; | 
| 682 | 682 | ||
diff --git a/net/wanrouter/af_wanpipe.c b/net/wanrouter/af_wanpipe.c index 596cb96e5f47..59fec59b2132 100644 --- a/net/wanrouter/af_wanpipe.c +++ b/net/wanrouter/af_wanpipe.c  | |||
| @@ -1099,7 +1099,7 @@ static void release_driver(struct sock *sk) | |||
| 1099 | sock_reset_flag(sk, SOCK_ZAPPED); | 1099 | sock_reset_flag(sk, SOCK_ZAPPED); | 
| 1100 | wp = wp_sk(sk); | 1100 | wp = wp_sk(sk); | 
| 1101 | 1101 | ||
| 1102 | if (wp && wp->mbox) { | 1102 | if (wp) { | 
| 1103 | kfree(wp->mbox); | 1103 | kfree(wp->mbox); | 
| 1104 | wp->mbox = NULL; | 1104 | wp->mbox = NULL; | 
| 1105 | } | 1105 | } | 
| @@ -1186,10 +1186,8 @@ static void wanpipe_kill_sock_timer (unsigned long data) | |||
| 1186 | return; | 1186 | return; | 
| 1187 | } | 1187 | } | 
| 1188 | 1188 | ||
| 1189 | if (wp_sk(sk)) { | 1189 | kfree(wp_sk(sk)); | 
| 1190 | kfree(wp_sk(sk)); | 1190 | wp_sk(sk) = NULL; | 
| 1191 | wp_sk(sk) = NULL; | ||
| 1192 | } | ||
| 1193 | 1191 | ||
| 1194 | if (atomic_read(&sk->sk_refcnt) != 1) { | 1192 | if (atomic_read(&sk->sk_refcnt) != 1) { | 
| 1195 | atomic_set(&sk->sk_refcnt, 1); | 1193 | atomic_set(&sk->sk_refcnt, 1); | 
| @@ -1219,10 +1217,8 @@ static void wanpipe_kill_sock_accept (struct sock *sk) | |||
| 1219 | sk->sk_socket = NULL; | 1217 | sk->sk_socket = NULL; | 
| 1220 | 1218 | ||
| 1221 | 1219 | ||
| 1222 | if (wp_sk(sk)) { | 1220 | kfree(wp_sk(sk)); | 
| 1223 | kfree(wp_sk(sk)); | 1221 | wp_sk(sk) = NULL; | 
| 1224 | wp_sk(sk) = NULL; | ||
| 1225 | } | ||
| 1226 | 1222 | ||
| 1227 | if (atomic_read(&sk->sk_refcnt) != 1) { | 1223 | if (atomic_read(&sk->sk_refcnt) != 1) { | 
| 1228 | atomic_set(&sk->sk_refcnt, 1); | 1224 | atomic_set(&sk->sk_refcnt, 1); | 
| @@ -1243,10 +1239,8 @@ static void wanpipe_kill_sock_irq (struct sock *sk) | |||
| 1243 | 1239 | ||
| 1244 | sk->sk_socket = NULL; | 1240 | sk->sk_socket = NULL; | 
| 1245 | 1241 | ||
| 1246 | if (wp_sk(sk)) { | 1242 | kfree(wp_sk(sk)); | 
| 1247 | kfree(wp_sk(sk)); | 1243 | wp_sk(sk) = NULL; | 
| 1248 | wp_sk(sk) = NULL; | ||
| 1249 | } | ||
| 1250 | 1244 | ||
| 1251 | if (atomic_read(&sk->sk_refcnt) != 1) { | 1245 | if (atomic_read(&sk->sk_refcnt) != 1) { | 
| 1252 | atomic_set(&sk->sk_refcnt, 1); | 1246 | atomic_set(&sk->sk_refcnt, 1); | 
diff --git a/net/wanrouter/wanmain.c b/net/wanrouter/wanmain.c index 13b650ad22e2..bcf7b3faa76a 100644 --- a/net/wanrouter/wanmain.c +++ b/net/wanrouter/wanmain.c  | |||
| @@ -714,10 +714,8 @@ static int wanrouter_device_new_if(struct wan_device *wandev, | |||
| 714 | } | 714 | } | 
| 715 | 715 | ||
| 716 | /* This code has moved from del_if() function */ | 716 | /* This code has moved from del_if() function */ | 
| 717 | if (dev->priv) { | 717 | kfree(dev->priv); | 
| 718 | kfree(dev->priv); | 718 | dev->priv = NULL; | 
| 719 | dev->priv = NULL; | ||
| 720 | } | ||
| 721 | 719 | ||
| 722 | #ifdef CONFIG_WANPIPE_MULTPPP | 720 | #ifdef CONFIG_WANPIPE_MULTPPP | 
| 723 | if (cnf->config_id == WANCONFIG_MPPP) | 721 | if (cnf->config_id == WANCONFIG_MPPP) | 
| @@ -851,10 +849,8 @@ static int wanrouter_delete_interface(struct wan_device *wandev, char *name) | |||
| 851 | 849 | ||
| 852 | /* Due to new interface linking method using dev->priv, | 850 | /* Due to new interface linking method using dev->priv, | 
| 853 | * this code has moved from del_if() function.*/ | 851 | * this code has moved from del_if() function.*/ | 
| 854 | if (dev->priv){ | 852 | kfree(dev->priv); | 
| 855 | kfree(dev->priv); | 853 | dev->priv=NULL; | 
| 856 | dev->priv=NULL; | ||
| 857 | } | ||
| 858 | 854 | ||
| 859 | unregister_netdev(dev); | 855 | unregister_netdev(dev); | 
| 860 | 856 | ||
diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c index 8b9a4747417d..7cf48aa6c95b 100644 --- a/net/xfrm/xfrm_state.c +++ b/net/xfrm/xfrm_state.c  | |||
| @@ -62,14 +62,10 @@ static void xfrm_state_gc_destroy(struct xfrm_state *x) | |||
| 62 | { | 62 | { | 
| 63 | if (del_timer(&x->timer)) | 63 | if (del_timer(&x->timer)) | 
| 64 | BUG(); | 64 | BUG(); | 
| 65 | if (x->aalg) | 65 | kfree(x->aalg); | 
| 66 | kfree(x->aalg); | 66 | kfree(x->ealg); | 
| 67 | if (x->ealg) | 67 | kfree(x->calg); | 
| 68 | kfree(x->ealg); | 68 | kfree(x->encap); | 
| 69 | if (x->calg) | ||
| 70 | kfree(x->calg); | ||
| 71 | if (x->encap) | ||
| 72 | kfree(x->encap); | ||
| 73 | if (x->type) { | 69 | if (x->type) { | 
| 74 | x->type->destructor(x); | 70 | x->type->destructor(x); | 
| 75 | xfrm_put_type(x->type); | 71 | xfrm_put_type(x->type); | 
diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c index c35336a0f71b..0cdd9a07e043 100644 --- a/net/xfrm/xfrm_user.c +++ b/net/xfrm/xfrm_user.c  | |||
| @@ -18,7 +18,6 @@ | |||
| 18 | #include <linux/string.h> | 18 | #include <linux/string.h> | 
| 19 | #include <linux/net.h> | 19 | #include <linux/net.h> | 
| 20 | #include <linux/skbuff.h> | 20 | #include <linux/skbuff.h> | 
| 21 | #include <linux/netlink.h> | ||
| 22 | #include <linux/rtnetlink.h> | 21 | #include <linux/rtnetlink.h> | 
| 23 | #include <linux/pfkeyv2.h> | 22 | #include <linux/pfkeyv2.h> | 
| 24 | #include <linux/ipsec.h> | 23 | #include <linux/ipsec.h> | 
| @@ -26,6 +25,7 @@ | |||
| 26 | #include <linux/security.h> | 25 | #include <linux/security.h> | 
| 27 | #include <net/sock.h> | 26 | #include <net/sock.h> | 
| 28 | #include <net/xfrm.h> | 27 | #include <net/xfrm.h> | 
| 28 | #include <net/netlink.h> | ||
| 29 | #include <asm/uaccess.h> | 29 | #include <asm/uaccess.h> | 
| 30 | 30 | ||
| 31 | static struct sock *xfrm_nl; | 31 | static struct sock *xfrm_nl; | 
| @@ -948,11 +948,6 @@ static struct xfrm_link { | |||
| 948 | [XFRM_MSG_FLUSHPOLICY - XFRM_MSG_BASE] = { .doit = xfrm_flush_policy }, | 948 | [XFRM_MSG_FLUSHPOLICY - XFRM_MSG_BASE] = { .doit = xfrm_flush_policy }, | 
| 949 | }; | 949 | }; | 
| 950 | 950 | ||
| 951 | static int xfrm_done(struct netlink_callback *cb) | ||
| 952 | { | ||
| 953 | return 0; | ||
| 954 | } | ||
| 955 | |||
| 956 | static int xfrm_user_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh, int *errp) | 951 | static int xfrm_user_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh, int *errp) | 
| 957 | { | 952 | { | 
| 958 | struct rtattr *xfrma[XFRMA_MAX]; | 953 | struct rtattr *xfrma[XFRMA_MAX]; | 
| @@ -984,20 +979,15 @@ static int xfrm_user_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh, int *err | |||
| 984 | if ((type == (XFRM_MSG_GETSA - XFRM_MSG_BASE) || | 979 | if ((type == (XFRM_MSG_GETSA - XFRM_MSG_BASE) || | 
| 985 | type == (XFRM_MSG_GETPOLICY - XFRM_MSG_BASE)) && | 980 | type == (XFRM_MSG_GETPOLICY - XFRM_MSG_BASE)) && | 
| 986 | (nlh->nlmsg_flags & NLM_F_DUMP)) { | 981 | (nlh->nlmsg_flags & NLM_F_DUMP)) { | 
| 987 | u32 rlen; | ||
| 988 | |||
| 989 | if (link->dump == NULL) | 982 | if (link->dump == NULL) | 
| 990 | goto err_einval; | 983 | goto err_einval; | 
| 991 | 984 | ||
| 992 | if ((*errp = netlink_dump_start(xfrm_nl, skb, nlh, | 985 | if ((*errp = netlink_dump_start(xfrm_nl, skb, nlh, | 
| 993 | link->dump, | 986 | link->dump, NULL)) != 0) { | 
| 994 | xfrm_done)) != 0) { | ||
| 995 | return -1; | 987 | return -1; | 
| 996 | } | 988 | } | 
| 997 | rlen = NLMSG_ALIGN(nlh->nlmsg_len); | 989 | |
| 998 | if (rlen > skb->len) | 990 | netlink_queue_skip(nlh, skb); | 
| 999 | rlen = skb->len; | ||
| 1000 | skb_pull(skb, rlen); | ||
| 1001 | return -1; | 991 | return -1; | 
| 1002 | } | 992 | } | 
| 1003 | 993 | ||
| @@ -1032,60 +1022,13 @@ err_einval: | |||
| 1032 | return -1; | 1022 | return -1; | 
| 1033 | } | 1023 | } | 
| 1034 | 1024 | ||
| 1035 | static int xfrm_user_rcv_skb(struct sk_buff *skb) | ||
| 1036 | { | ||
| 1037 | int err; | ||
| 1038 | struct nlmsghdr *nlh; | ||
| 1039 | |||
| 1040 | while (skb->len >= NLMSG_SPACE(0)) { | ||
| 1041 | u32 rlen; | ||
| 1042 | |||
| 1043 | nlh = (struct nlmsghdr *) skb->data; | ||
| 1044 | if (nlh->nlmsg_len < sizeof(*nlh) || | ||
| 1045 | skb->len < nlh->nlmsg_len) | ||
| 1046 | return 0; | ||
| 1047 | rlen = NLMSG_ALIGN(nlh->nlmsg_len); | ||
| 1048 | if (rlen > skb->len) | ||
| 1049 | rlen = skb->len; | ||
| 1050 | if (xfrm_user_rcv_msg(skb, nlh, &err) < 0) { | ||
| 1051 | if (err == 0) | ||
| 1052 | return -1; | ||
| 1053 | netlink_ack(skb, nlh, err); | ||
| 1054 | } else if (nlh->nlmsg_flags & NLM_F_ACK) | ||
| 1055 | netlink_ack(skb, nlh, 0); | ||
| 1056 | skb_pull(skb, rlen); | ||
| 1057 | } | ||
| 1058 | |||
| 1059 | return 0; | ||
| 1060 | } | ||
| 1061 | |||
| 1062 | static void xfrm_netlink_rcv(struct sock *sk, int len) | 1025 | static void xfrm_netlink_rcv(struct sock *sk, int len) | 
| 1063 | { | 1026 | { | 
| 1064 | unsigned int qlen = skb_queue_len(&sk->sk_receive_queue); | 1027 | unsigned int qlen = 0; | 
| 1065 | 1028 | ||
| 1066 | do { | 1029 | do { | 
| 1067 | struct sk_buff *skb; | ||
| 1068 | |||
| 1069 | down(&xfrm_cfg_sem); | 1030 | down(&xfrm_cfg_sem); | 
| 1070 | 1031 | netlink_run_queue(sk, &qlen, &xfrm_user_rcv_msg); | |
| 1071 | if (qlen > skb_queue_len(&sk->sk_receive_queue)) | ||
| 1072 | qlen = skb_queue_len(&sk->sk_receive_queue); | ||
| 1073 | |||
| 1074 | for (; qlen; qlen--) { | ||
| 1075 | skb = skb_dequeue(&sk->sk_receive_queue); | ||
| 1076 | if (xfrm_user_rcv_skb(skb)) { | ||
| 1077 | if (skb->len) | ||
| 1078 | skb_queue_head(&sk->sk_receive_queue, | ||
| 1079 | skb); | ||
| 1080 | else { | ||
| 1081 | kfree_skb(skb); | ||
| 1082 | qlen--; | ||
| 1083 | } | ||
| 1084 | break; | ||
| 1085 | } | ||
| 1086 | kfree_skb(skb); | ||
| 1087 | } | ||
| 1088 | |||
| 1089 | up(&xfrm_cfg_sem); | 1032 | up(&xfrm_cfg_sem); | 
| 1090 | 1033 | ||
| 1091 | } while (qlen); | 1034 | } while (qlen); | 
