diff options
Diffstat (limited to 'net')
313 files changed, 19091 insertions, 9897 deletions
diff --git a/net/Kconfig b/net/Kconfig index 4959a4e1e0fe..6528a935622c 100644 --- a/net/Kconfig +++ b/net/Kconfig | |||
@@ -249,6 +249,11 @@ source "net/ieee80211/Kconfig" | |||
249 | config WIRELESS_EXT | 249 | config WIRELESS_EXT |
250 | bool | 250 | bool |
251 | 251 | ||
252 | source "net/netlabel/Kconfig" | ||
253 | |||
254 | config FIB_RULES | ||
255 | bool | ||
256 | |||
252 | endif # if NET | 257 | endif # if NET |
253 | endmenu # Networking | 258 | endmenu # Networking |
254 | 259 | ||
diff --git a/net/Makefile b/net/Makefile index 065796f5fb17..ad4d14f4bb29 100644 --- a/net/Makefile +++ b/net/Makefile | |||
@@ -46,6 +46,7 @@ obj-$(CONFIG_IP_DCCP) += dccp/ | |||
46 | obj-$(CONFIG_IP_SCTP) += sctp/ | 46 | obj-$(CONFIG_IP_SCTP) += sctp/ |
47 | obj-$(CONFIG_IEEE80211) += ieee80211/ | 47 | obj-$(CONFIG_IEEE80211) += ieee80211/ |
48 | obj-$(CONFIG_TIPC) += tipc/ | 48 | obj-$(CONFIG_TIPC) += tipc/ |
49 | obj-$(CONFIG_NETLABEL) += netlabel/ | ||
49 | 50 | ||
50 | ifeq ($(CONFIG_NET),y) | 51 | ifeq ($(CONFIG_NET),y) |
51 | obj-$(CONFIG_SYSCTL) += sysctl_net.o | 52 | obj-$(CONFIG_SYSCTL) += sysctl_net.o |
diff --git a/net/atm/atm_sysfs.c b/net/atm/atm_sysfs.c index 5df4b9a068bb..c0a4ae28fcfa 100644 --- a/net/atm/atm_sysfs.c +++ b/net/atm/atm_sysfs.c | |||
@@ -1,6 +1,5 @@ | |||
1 | /* ATM driver model support. */ | 1 | /* ATM driver model support. */ |
2 | 2 | ||
3 | #include <linux/config.h> | ||
4 | #include <linux/kernel.h> | 3 | #include <linux/kernel.h> |
5 | #include <linux/init.h> | 4 | #include <linux/init.h> |
6 | #include <linux/kobject.h> | 5 | #include <linux/kobject.h> |
diff --git a/net/atm/mpc.c b/net/atm/mpc.c index 00704661e83f..b87c2a88bdce 100644 --- a/net/atm/mpc.c +++ b/net/atm/mpc.c | |||
@@ -98,11 +98,6 @@ static struct notifier_block mpoa_notifier = { | |||
98 | 0 | 98 | 0 |
99 | }; | 99 | }; |
100 | 100 | ||
101 | #ifdef CONFIG_PROC_FS | ||
102 | extern int mpc_proc_init(void); | ||
103 | extern void mpc_proc_clean(void); | ||
104 | #endif | ||
105 | |||
106 | struct mpoa_client *mpcs = NULL; /* FIXME */ | 101 | struct mpoa_client *mpcs = NULL; /* FIXME */ |
107 | static struct atm_mpoa_qos *qos_head = NULL; | 102 | static struct atm_mpoa_qos *qos_head = NULL; |
108 | static DEFINE_TIMER(mpc_timer, NULL, 0, 0); | 103 | static DEFINE_TIMER(mpc_timer, NULL, 0, 0); |
@@ -1439,12 +1434,8 @@ static __init int atm_mpoa_init(void) | |||
1439 | { | 1434 | { |
1440 | register_atm_ioctl(&atm_ioctl_ops); | 1435 | register_atm_ioctl(&atm_ioctl_ops); |
1441 | 1436 | ||
1442 | #ifdef CONFIG_PROC_FS | ||
1443 | if (mpc_proc_init() != 0) | 1437 | if (mpc_proc_init() != 0) |
1444 | printk(KERN_INFO "mpoa: failed to initialize /proc/mpoa\n"); | 1438 | printk(KERN_INFO "mpoa: failed to initialize /proc/mpoa\n"); |
1445 | else | ||
1446 | printk(KERN_INFO "mpoa: /proc/mpoa initialized\n"); | ||
1447 | #endif | ||
1448 | 1439 | ||
1449 | printk("mpc.c: " __DATE__ " " __TIME__ " initialized\n"); | 1440 | printk("mpc.c: " __DATE__ " " __TIME__ " initialized\n"); |
1450 | 1441 | ||
@@ -1457,9 +1448,7 @@ static void __exit atm_mpoa_cleanup(void) | |||
1457 | struct atm_mpoa_qos *qos, *nextqos; | 1448 | struct atm_mpoa_qos *qos, *nextqos; |
1458 | struct lec_priv *priv; | 1449 | struct lec_priv *priv; |
1459 | 1450 | ||
1460 | #ifdef CONFIG_PROC_FS | ||
1461 | mpc_proc_clean(); | 1451 | mpc_proc_clean(); |
1462 | #endif | ||
1463 | 1452 | ||
1464 | del_timer(&mpc_timer); | 1453 | del_timer(&mpc_timer); |
1465 | unregister_netdevice_notifier(&mpoa_notifier); | 1454 | unregister_netdevice_notifier(&mpoa_notifier); |
diff --git a/net/atm/mpc.h b/net/atm/mpc.h index 863ddf6079e1..3c7981a229e8 100644 --- a/net/atm/mpc.h +++ b/net/atm/mpc.h | |||
@@ -50,4 +50,12 @@ int atm_mpoa_delete_qos(struct atm_mpoa_qos *qos); | |||
50 | struct seq_file; | 50 | struct seq_file; |
51 | void atm_mpoa_disp_qos(struct seq_file *m); | 51 | void atm_mpoa_disp_qos(struct seq_file *m); |
52 | 52 | ||
53 | #ifdef CONFIG_PROC_FS | ||
54 | int mpc_proc_init(void); | ||
55 | void mpc_proc_clean(void); | ||
56 | #else | ||
57 | #define mpc_proc_init() (0) | ||
58 | #define mpc_proc_clean() do { } while(0) | ||
59 | #endif | ||
60 | |||
53 | #endif /* _MPC_H_ */ | 61 | #endif /* _MPC_H_ */ |
diff --git a/net/bridge/br_forward.c b/net/bridge/br_forward.c index 864fbbc7b24d..191b861e5e53 100644 --- a/net/bridge/br_forward.c +++ b/net/bridge/br_forward.c | |||
@@ -38,13 +38,10 @@ int br_dev_queue_push_xmit(struct sk_buff *skb) | |||
38 | if (packet_length(skb) > skb->dev->mtu && !skb_is_gso(skb)) | 38 | if (packet_length(skb) > skb->dev->mtu && !skb_is_gso(skb)) |
39 | kfree_skb(skb); | 39 | kfree_skb(skb); |
40 | else { | 40 | else { |
41 | #ifdef CONFIG_BRIDGE_NETFILTER | ||
42 | /* ip_refrag calls ip_fragment, doesn't copy the MAC header. */ | 41 | /* ip_refrag calls ip_fragment, doesn't copy the MAC header. */ |
43 | if (nf_bridge_maybe_copy_header(skb)) | 42 | if (nf_bridge_maybe_copy_header(skb)) |
44 | kfree_skb(skb); | 43 | kfree_skb(skb); |
45 | else | 44 | else { |
46 | #endif | ||
47 | { | ||
48 | skb_push(skb, ETH_HLEN); | 45 | skb_push(skb, ETH_HLEN); |
49 | 46 | ||
50 | dev_queue_xmit(skb); | 47 | dev_queue_xmit(skb); |
diff --git a/net/bridge/br_netfilter.c b/net/bridge/br_netfilter.c index 05b3de888243..ac181be13d83 100644 --- a/net/bridge/br_netfilter.c +++ b/net/bridge/br_netfilter.c | |||
@@ -53,10 +53,10 @@ | |||
53 | 53 | ||
54 | #ifdef CONFIG_SYSCTL | 54 | #ifdef CONFIG_SYSCTL |
55 | static struct ctl_table_header *brnf_sysctl_header; | 55 | static struct ctl_table_header *brnf_sysctl_header; |
56 | static int brnf_call_iptables = 1; | 56 | static int brnf_call_iptables __read_mostly = 1; |
57 | static int brnf_call_ip6tables = 1; | 57 | static int brnf_call_ip6tables __read_mostly = 1; |
58 | static int brnf_call_arptables = 1; | 58 | static int brnf_call_arptables __read_mostly = 1; |
59 | static int brnf_filter_vlan_tagged = 1; | 59 | static int brnf_filter_vlan_tagged __read_mostly = 1; |
60 | #else | 60 | #else |
61 | #define brnf_filter_vlan_tagged 1 | 61 | #define brnf_filter_vlan_tagged 1 |
62 | #endif | 62 | #endif |
@@ -127,14 +127,37 @@ static inline struct nf_bridge_info *nf_bridge_alloc(struct sk_buff *skb) | |||
127 | 127 | ||
128 | static inline void nf_bridge_save_header(struct sk_buff *skb) | 128 | static inline void nf_bridge_save_header(struct sk_buff *skb) |
129 | { | 129 | { |
130 | int header_size = 16; | 130 | int header_size = ETH_HLEN; |
131 | 131 | ||
132 | if (skb->protocol == htons(ETH_P_8021Q)) | 132 | if (skb->protocol == htons(ETH_P_8021Q)) |
133 | header_size = 18; | 133 | header_size += VLAN_HLEN; |
134 | 134 | ||
135 | memcpy(skb->nf_bridge->data, skb->data - header_size, header_size); | 135 | memcpy(skb->nf_bridge->data, skb->data - header_size, header_size); |
136 | } | 136 | } |
137 | 137 | ||
138 | /* | ||
139 | * When forwarding bridge frames, we save a copy of the original | ||
140 | * header before processing. | ||
141 | */ | ||
142 | int nf_bridge_copy_header(struct sk_buff *skb) | ||
143 | { | ||
144 | int err; | ||
145 | int header_size = ETH_HLEN; | ||
146 | |||
147 | if (skb->protocol == htons(ETH_P_8021Q)) | ||
148 | header_size += VLAN_HLEN; | ||
149 | |||
150 | err = skb_cow(skb, header_size); | ||
151 | if (err) | ||
152 | return err; | ||
153 | |||
154 | memcpy(skb->data - header_size, skb->nf_bridge->data, header_size); | ||
155 | |||
156 | if (skb->protocol == htons(ETH_P_8021Q)) | ||
157 | __skb_push(skb, VLAN_HLEN); | ||
158 | return 0; | ||
159 | } | ||
160 | |||
138 | /* PF_BRIDGE/PRE_ROUTING *********************************************/ | 161 | /* PF_BRIDGE/PRE_ROUTING *********************************************/ |
139 | /* Undo the changes made for ip6tables PREROUTING and continue the | 162 | /* Undo the changes made for ip6tables PREROUTING and continue the |
140 | * bridge PRE_ROUTING hook. */ | 163 | * bridge PRE_ROUTING hook. */ |
@@ -695,16 +718,6 @@ static unsigned int br_nf_local_out(unsigned int hook, struct sk_buff **pskb, | |||
695 | else | 718 | else |
696 | pf = PF_INET6; | 719 | pf = PF_INET6; |
697 | 720 | ||
698 | #ifdef CONFIG_NETFILTER_DEBUG | ||
699 | /* Sometimes we get packets with NULL ->dst here (for example, | ||
700 | * running a dhcp client daemon triggers this). This should now | ||
701 | * be fixed, but let's keep the check around. */ | ||
702 | if (skb->dst == NULL) { | ||
703 | printk(KERN_CRIT "br_netfilter: skb->dst == NULL."); | ||
704 | return NF_ACCEPT; | ||
705 | } | ||
706 | #endif | ||
707 | |||
708 | nf_bridge = skb->nf_bridge; | 721 | nf_bridge = skb->nf_bridge; |
709 | nf_bridge->physoutdev = skb->dev; | 722 | nf_bridge->physoutdev = skb->dev; |
710 | realindev = nf_bridge->physindev; | 723 | realindev = nf_bridge->physindev; |
@@ -786,7 +799,7 @@ static unsigned int br_nf_post_routing(unsigned int hook, struct sk_buff **pskb, | |||
786 | * keep the check just to be sure... */ | 799 | * keep the check just to be sure... */ |
787 | if (skb->mac.raw < skb->head || skb->mac.raw + ETH_HLEN > skb->data) { | 800 | if (skb->mac.raw < skb->head || skb->mac.raw + ETH_HLEN > skb->data) { |
788 | printk(KERN_CRIT "br_netfilter: Argh!! br_nf_post_routing: " | 801 | printk(KERN_CRIT "br_netfilter: Argh!! br_nf_post_routing: " |
789 | "bad mac.raw pointer."); | 802 | "bad mac.raw pointer.\n"); |
790 | goto print_error; | 803 | goto print_error; |
791 | } | 804 | } |
792 | #endif | 805 | #endif |
@@ -804,7 +817,7 @@ static unsigned int br_nf_post_routing(unsigned int hook, struct sk_buff **pskb, | |||
804 | 817 | ||
805 | #ifdef CONFIG_NETFILTER_DEBUG | 818 | #ifdef CONFIG_NETFILTER_DEBUG |
806 | if (skb->dst == NULL) { | 819 | if (skb->dst == NULL) { |
807 | printk(KERN_CRIT "br_netfilter: skb->dst == NULL."); | 820 | printk(KERN_INFO "br_netfilter post_routing: skb->dst == NULL\n"); |
808 | goto print_error; | 821 | goto print_error; |
809 | } | 822 | } |
810 | #endif | 823 | #endif |
@@ -841,6 +854,7 @@ print_error: | |||
841 | } | 854 | } |
842 | printk(" head:%p, raw:%p, data:%p\n", skb->head, skb->mac.raw, | 855 | printk(" head:%p, raw:%p, data:%p\n", skb->head, skb->mac.raw, |
843 | skb->data); | 856 | skb->data); |
857 | dump_stack(); | ||
844 | return NF_ACCEPT; | 858 | return NF_ACCEPT; |
845 | #endif | 859 | #endif |
846 | } | 860 | } |
diff --git a/net/bridge/br_netlink.c b/net/bridge/br_netlink.c index 53086fb75089..8f661195d09d 100644 --- a/net/bridge/br_netlink.c +++ b/net/bridge/br_netlink.c | |||
@@ -12,6 +12,7 @@ | |||
12 | 12 | ||
13 | #include <linux/kernel.h> | 13 | #include <linux/kernel.h> |
14 | #include <linux/rtnetlink.h> | 14 | #include <linux/rtnetlink.h> |
15 | #include <net/netlink.h> | ||
15 | #include "br_private.h" | 16 | #include "br_private.h" |
16 | 17 | ||
17 | /* | 18 | /* |
@@ -76,26 +77,24 @@ rtattr_failure: | |||
76 | void br_ifinfo_notify(int event, struct net_bridge_port *port) | 77 | void br_ifinfo_notify(int event, struct net_bridge_port *port) |
77 | { | 78 | { |
78 | struct sk_buff *skb; | 79 | struct sk_buff *skb; |
79 | int err = -ENOMEM; | 80 | int payload = sizeof(struct ifinfomsg) + 128; |
81 | int err = -ENOBUFS; | ||
80 | 82 | ||
81 | pr_debug("bridge notify event=%d\n", event); | 83 | pr_debug("bridge notify event=%d\n", event); |
82 | skb = alloc_skb(NLMSG_SPACE(sizeof(struct ifinfomsg) + 128), | 84 | skb = nlmsg_new(nlmsg_total_size(payload), GFP_ATOMIC); |
83 | GFP_ATOMIC); | 85 | if (skb == NULL) |
84 | if (!skb) | 86 | goto errout; |
85 | goto err_out; | 87 | |
88 | err = br_fill_ifinfo(skb, port, 0, 0, event, 0); | ||
89 | if (err < 0) { | ||
90 | kfree_skb(skb); | ||
91 | goto errout; | ||
92 | } | ||
86 | 93 | ||
87 | err = br_fill_ifinfo(skb, port, current->pid, 0, event, 0); | 94 | err = rtnl_notify(skb, 0, RTNLGRP_LINK, NULL, GFP_ATOMIC); |
95 | errout: | ||
88 | if (err < 0) | 96 | if (err < 0) |
89 | goto err_kfree; | 97 | rtnl_set_sk_err(RTNLGRP_LINK, err); |
90 | |||
91 | NETLINK_CB(skb).dst_group = RTNLGRP_LINK; | ||
92 | netlink_broadcast(rtnl, skb, 0, RTNLGRP_LINK, GFP_ATOMIC); | ||
93 | return; | ||
94 | |||
95 | err_kfree: | ||
96 | kfree_skb(skb); | ||
97 | err_out: | ||
98 | netlink_set_err(rtnl, 0, RTNLGRP_LINK, err); | ||
99 | } | 98 | } |
100 | 99 | ||
101 | /* | 100 | /* |
diff --git a/net/bridge/netfilter/ebtables.c b/net/bridge/netfilter/ebtables.c index 3a13ed643459..3df55b2bd91d 100644 --- a/net/bridge/netfilter/ebtables.c +++ b/net/bridge/netfilter/ebtables.c | |||
@@ -24,6 +24,7 @@ | |||
24 | #include <linux/vmalloc.h> | 24 | #include <linux/vmalloc.h> |
25 | #include <linux/netfilter_bridge/ebtables.h> | 25 | #include <linux/netfilter_bridge/ebtables.h> |
26 | #include <linux/spinlock.h> | 26 | #include <linux/spinlock.h> |
27 | #include <linux/mutex.h> | ||
27 | #include <asm/uaccess.h> | 28 | #include <asm/uaccess.h> |
28 | #include <linux/smp.h> | 29 | #include <linux/smp.h> |
29 | #include <linux/cpumask.h> | 30 | #include <linux/cpumask.h> |
@@ -31,36 +32,9 @@ | |||
31 | /* needed for logical [in,out]-dev filtering */ | 32 | /* needed for logical [in,out]-dev filtering */ |
32 | #include "../br_private.h" | 33 | #include "../br_private.h" |
33 | 34 | ||
34 | /* list_named_find */ | ||
35 | #define ASSERT_READ_LOCK(x) | ||
36 | #define ASSERT_WRITE_LOCK(x) | ||
37 | #include <linux/netfilter_ipv4/listhelp.h> | ||
38 | #include <linux/mutex.h> | ||
39 | |||
40 | #if 0 | ||
41 | /* use this for remote debugging | ||
42 | * Copyright (C) 1998 by Ori Pomerantz | ||
43 | * Print the string to the appropriate tty, the one | ||
44 | * the current task uses | ||
45 | */ | ||
46 | static void print_string(char *str) | ||
47 | { | ||
48 | struct tty_struct *my_tty; | ||
49 | |||
50 | /* The tty for the current task */ | ||
51 | my_tty = current->signal->tty; | ||
52 | if (my_tty != NULL) { | ||
53 | my_tty->driver->write(my_tty, 0, str, strlen(str)); | ||
54 | my_tty->driver->write(my_tty, 0, "\015\012", 2); | ||
55 | } | ||
56 | } | ||
57 | |||
58 | #define BUGPRINT(args) print_string(args); | ||
59 | #else | ||
60 | #define BUGPRINT(format, args...) printk("kernel msg: ebtables bug: please "\ | 35 | #define BUGPRINT(format, args...) printk("kernel msg: ebtables bug: please "\ |
61 | "report to author: "format, ## args) | 36 | "report to author: "format, ## args) |
62 | /* #define BUGPRINT(format, args...) */ | 37 | /* #define BUGPRINT(format, args...) */ |
63 | #endif | ||
64 | #define MEMPRINT(format, args...) printk("kernel msg: ebtables "\ | 38 | #define MEMPRINT(format, args...) printk("kernel msg: ebtables "\ |
65 | ": out of memory: "format, ## args) | 39 | ": out of memory: "format, ## args) |
66 | /* #define MEMPRINT(format, args...) */ | 40 | /* #define MEMPRINT(format, args...) */ |
@@ -299,18 +273,22 @@ static inline void * | |||
299 | find_inlist_lock_noload(struct list_head *head, const char *name, int *error, | 273 | find_inlist_lock_noload(struct list_head *head, const char *name, int *error, |
300 | struct mutex *mutex) | 274 | struct mutex *mutex) |
301 | { | 275 | { |
302 | void *ret; | 276 | struct { |
277 | struct list_head list; | ||
278 | char name[EBT_FUNCTION_MAXNAMELEN]; | ||
279 | } *e; | ||
303 | 280 | ||
304 | *error = mutex_lock_interruptible(mutex); | 281 | *error = mutex_lock_interruptible(mutex); |
305 | if (*error != 0) | 282 | if (*error != 0) |
306 | return NULL; | 283 | return NULL; |
307 | 284 | ||
308 | ret = list_named_find(head, name); | 285 | list_for_each_entry(e, head, list) { |
309 | if (!ret) { | 286 | if (strcmp(e->name, name) == 0) |
310 | *error = -ENOENT; | 287 | return e; |
311 | mutex_unlock(mutex); | ||
312 | } | 288 | } |
313 | return ret; | 289 | *error = -ENOENT; |
290 | mutex_unlock(mutex); | ||
291 | return NULL; | ||
314 | } | 292 | } |
315 | 293 | ||
316 | #ifndef CONFIG_KMOD | 294 | #ifndef CONFIG_KMOD |
@@ -1064,15 +1042,19 @@ free_newinfo: | |||
1064 | 1042 | ||
1065 | int ebt_register_target(struct ebt_target *target) | 1043 | int ebt_register_target(struct ebt_target *target) |
1066 | { | 1044 | { |
1045 | struct ebt_target *t; | ||
1067 | int ret; | 1046 | int ret; |
1068 | 1047 | ||
1069 | ret = mutex_lock_interruptible(&ebt_mutex); | 1048 | ret = mutex_lock_interruptible(&ebt_mutex); |
1070 | if (ret != 0) | 1049 | if (ret != 0) |
1071 | return ret; | 1050 | return ret; |
1072 | if (!list_named_insert(&ebt_targets, target)) { | 1051 | list_for_each_entry(t, &ebt_targets, list) { |
1073 | mutex_unlock(&ebt_mutex); | 1052 | if (strcmp(t->name, target->name) == 0) { |
1074 | return -EEXIST; | 1053 | mutex_unlock(&ebt_mutex); |
1054 | return -EEXIST; | ||
1055 | } | ||
1075 | } | 1056 | } |
1057 | list_add(&target->list, &ebt_targets); | ||
1076 | mutex_unlock(&ebt_mutex); | 1058 | mutex_unlock(&ebt_mutex); |
1077 | 1059 | ||
1078 | return 0; | 1060 | return 0; |
@@ -1081,21 +1063,25 @@ int ebt_register_target(struct ebt_target *target) | |||
1081 | void ebt_unregister_target(struct ebt_target *target) | 1063 | void ebt_unregister_target(struct ebt_target *target) |
1082 | { | 1064 | { |
1083 | mutex_lock(&ebt_mutex); | 1065 | mutex_lock(&ebt_mutex); |
1084 | LIST_DELETE(&ebt_targets, target); | 1066 | list_del(&target->list); |
1085 | mutex_unlock(&ebt_mutex); | 1067 | mutex_unlock(&ebt_mutex); |
1086 | } | 1068 | } |
1087 | 1069 | ||
1088 | int ebt_register_match(struct ebt_match *match) | 1070 | int ebt_register_match(struct ebt_match *match) |
1089 | { | 1071 | { |
1072 | struct ebt_match *m; | ||
1090 | int ret; | 1073 | int ret; |
1091 | 1074 | ||
1092 | ret = mutex_lock_interruptible(&ebt_mutex); | 1075 | ret = mutex_lock_interruptible(&ebt_mutex); |
1093 | if (ret != 0) | 1076 | if (ret != 0) |
1094 | return ret; | 1077 | return ret; |
1095 | if (!list_named_insert(&ebt_matches, match)) { | 1078 | list_for_each_entry(m, &ebt_matches, list) { |
1096 | mutex_unlock(&ebt_mutex); | 1079 | if (strcmp(m->name, match->name) == 0) { |
1097 | return -EEXIST; | 1080 | mutex_unlock(&ebt_mutex); |
1081 | return -EEXIST; | ||
1082 | } | ||
1098 | } | 1083 | } |
1084 | list_add(&match->list, &ebt_matches); | ||
1099 | mutex_unlock(&ebt_mutex); | 1085 | mutex_unlock(&ebt_mutex); |
1100 | 1086 | ||
1101 | return 0; | 1087 | return 0; |
@@ -1104,21 +1090,25 @@ int ebt_register_match(struct ebt_match *match) | |||
1104 | void ebt_unregister_match(struct ebt_match *match) | 1090 | void ebt_unregister_match(struct ebt_match *match) |
1105 | { | 1091 | { |
1106 | mutex_lock(&ebt_mutex); | 1092 | mutex_lock(&ebt_mutex); |
1107 | LIST_DELETE(&ebt_matches, match); | 1093 | list_del(&match->list); |
1108 | mutex_unlock(&ebt_mutex); | 1094 | mutex_unlock(&ebt_mutex); |
1109 | } | 1095 | } |
1110 | 1096 | ||
1111 | int ebt_register_watcher(struct ebt_watcher *watcher) | 1097 | int ebt_register_watcher(struct ebt_watcher *watcher) |
1112 | { | 1098 | { |
1099 | struct ebt_watcher *w; | ||
1113 | int ret; | 1100 | int ret; |
1114 | 1101 | ||
1115 | ret = mutex_lock_interruptible(&ebt_mutex); | 1102 | ret = mutex_lock_interruptible(&ebt_mutex); |
1116 | if (ret != 0) | 1103 | if (ret != 0) |
1117 | return ret; | 1104 | return ret; |
1118 | if (!list_named_insert(&ebt_watchers, watcher)) { | 1105 | list_for_each_entry(w, &ebt_watchers, list) { |
1119 | mutex_unlock(&ebt_mutex); | 1106 | if (strcmp(w->name, watcher->name) == 0) { |
1120 | return -EEXIST; | 1107 | mutex_unlock(&ebt_mutex); |
1108 | return -EEXIST; | ||
1109 | } | ||
1121 | } | 1110 | } |
1111 | list_add(&watcher->list, &ebt_watchers); | ||
1122 | mutex_unlock(&ebt_mutex); | 1112 | mutex_unlock(&ebt_mutex); |
1123 | 1113 | ||
1124 | return 0; | 1114 | return 0; |
@@ -1127,13 +1117,14 @@ int ebt_register_watcher(struct ebt_watcher *watcher) | |||
1127 | void ebt_unregister_watcher(struct ebt_watcher *watcher) | 1117 | void ebt_unregister_watcher(struct ebt_watcher *watcher) |
1128 | { | 1118 | { |
1129 | mutex_lock(&ebt_mutex); | 1119 | mutex_lock(&ebt_mutex); |
1130 | LIST_DELETE(&ebt_watchers, watcher); | 1120 | list_del(&watcher->list); |
1131 | mutex_unlock(&ebt_mutex); | 1121 | mutex_unlock(&ebt_mutex); |
1132 | } | 1122 | } |
1133 | 1123 | ||
1134 | int ebt_register_table(struct ebt_table *table) | 1124 | int ebt_register_table(struct ebt_table *table) |
1135 | { | 1125 | { |
1136 | struct ebt_table_info *newinfo; | 1126 | struct ebt_table_info *newinfo; |
1127 | struct ebt_table *t; | ||
1137 | int ret, i, countersize; | 1128 | int ret, i, countersize; |
1138 | 1129 | ||
1139 | if (!table || !table->table ||!table->table->entries || | 1130 | if (!table || !table->table ||!table->table->entries || |
@@ -1179,10 +1170,12 @@ int ebt_register_table(struct ebt_table *table) | |||
1179 | if (ret != 0) | 1170 | if (ret != 0) |
1180 | goto free_chainstack; | 1171 | goto free_chainstack; |
1181 | 1172 | ||
1182 | if (list_named_find(&ebt_tables, table->name)) { | 1173 | list_for_each_entry(t, &ebt_tables, list) { |
1183 | ret = -EEXIST; | 1174 | if (strcmp(t->name, table->name) == 0) { |
1184 | BUGPRINT("Table name already exists\n"); | 1175 | ret = -EEXIST; |
1185 | goto free_unlock; | 1176 | BUGPRINT("Table name already exists\n"); |
1177 | goto free_unlock; | ||
1178 | } | ||
1186 | } | 1179 | } |
1187 | 1180 | ||
1188 | /* Hold a reference count if the chains aren't empty */ | 1181 | /* Hold a reference count if the chains aren't empty */ |
@@ -1190,7 +1183,7 @@ int ebt_register_table(struct ebt_table *table) | |||
1190 | ret = -ENOENT; | 1183 | ret = -ENOENT; |
1191 | goto free_unlock; | 1184 | goto free_unlock; |
1192 | } | 1185 | } |
1193 | list_prepend(&ebt_tables, table); | 1186 | list_add(&table->list, &ebt_tables); |
1194 | mutex_unlock(&ebt_mutex); | 1187 | mutex_unlock(&ebt_mutex); |
1195 | return 0; | 1188 | return 0; |
1196 | free_unlock: | 1189 | free_unlock: |
@@ -1216,7 +1209,7 @@ void ebt_unregister_table(struct ebt_table *table) | |||
1216 | return; | 1209 | return; |
1217 | } | 1210 | } |
1218 | mutex_lock(&ebt_mutex); | 1211 | mutex_lock(&ebt_mutex); |
1219 | LIST_DELETE(&ebt_tables, table); | 1212 | list_del(&table->list); |
1220 | mutex_unlock(&ebt_mutex); | 1213 | mutex_unlock(&ebt_mutex); |
1221 | vfree(table->private->entries); | 1214 | vfree(table->private->entries); |
1222 | if (table->private->chainstack) { | 1215 | if (table->private->chainstack) { |
@@ -1486,7 +1479,7 @@ static int __init ebtables_init(void) | |||
1486 | int ret; | 1479 | int ret; |
1487 | 1480 | ||
1488 | mutex_lock(&ebt_mutex); | 1481 | mutex_lock(&ebt_mutex); |
1489 | list_named_insert(&ebt_targets, &ebt_standard_target); | 1482 | list_add(&ebt_standard_target.list, &ebt_targets); |
1490 | mutex_unlock(&ebt_mutex); | 1483 | mutex_unlock(&ebt_mutex); |
1491 | if ((ret = nf_register_sockopt(&ebt_sockopts)) < 0) | 1484 | if ((ret = nf_register_sockopt(&ebt_sockopts)) < 0) |
1492 | return ret; | 1485 | return ret; |
diff --git a/net/core/Makefile b/net/core/Makefile index 2645ba428d48..119568077dab 100644 --- a/net/core/Makefile +++ b/net/core/Makefile | |||
@@ -17,3 +17,4 @@ obj-$(CONFIG_NET_PKTGEN) += pktgen.o | |||
17 | obj-$(CONFIG_WIRELESS_EXT) += wireless.o | 17 | obj-$(CONFIG_WIRELESS_EXT) += wireless.o |
18 | obj-$(CONFIG_NETPOLL) += netpoll.o | 18 | obj-$(CONFIG_NETPOLL) += netpoll.o |
19 | obj-$(CONFIG_NET_DMA) += user_dma.o | 19 | obj-$(CONFIG_NET_DMA) += user_dma.o |
20 | obj-$(CONFIG_FIB_RULES) += fib_rules.o | ||
diff --git a/net/core/datagram.c b/net/core/datagram.c index aecddcc30401..f558c61aecc7 100644 --- a/net/core/datagram.c +++ b/net/core/datagram.c | |||
@@ -417,7 +417,7 @@ unsigned int __skb_checksum_complete(struct sk_buff *skb) | |||
417 | 417 | ||
418 | sum = (u16)csum_fold(skb_checksum(skb, 0, skb->len, skb->csum)); | 418 | sum = (u16)csum_fold(skb_checksum(skb, 0, skb->len, skb->csum)); |
419 | if (likely(!sum)) { | 419 | if (likely(!sum)) { |
420 | if (unlikely(skb->ip_summed == CHECKSUM_HW)) | 420 | if (unlikely(skb->ip_summed == CHECKSUM_COMPLETE)) |
421 | netdev_rx_csum_fault(skb->dev); | 421 | netdev_rx_csum_fault(skb->dev); |
422 | skb->ip_summed = CHECKSUM_UNNECESSARY; | 422 | skb->ip_summed = CHECKSUM_UNNECESSARY; |
423 | } | 423 | } |
@@ -462,7 +462,7 @@ int skb_copy_and_csum_datagram_iovec(struct sk_buff *skb, | |||
462 | goto fault; | 462 | goto fault; |
463 | if ((unsigned short)csum_fold(csum)) | 463 | if ((unsigned short)csum_fold(csum)) |
464 | goto csum_error; | 464 | goto csum_error; |
465 | if (unlikely(skb->ip_summed == CHECKSUM_HW)) | 465 | if (unlikely(skb->ip_summed == CHECKSUM_COMPLETE)) |
466 | netdev_rx_csum_fault(skb->dev); | 466 | netdev_rx_csum_fault(skb->dev); |
467 | iov->iov_len -= chunk; | 467 | iov->iov_len -= chunk; |
468 | iov->iov_base += chunk; | 468 | iov->iov_base += chunk; |
diff --git a/net/core/dev.c b/net/core/dev.c index d4a1ec3bded5..14de297d024d 100644 --- a/net/core/dev.c +++ b/net/core/dev.c | |||
@@ -640,6 +640,8 @@ int dev_valid_name(const char *name) | |||
640 | { | 640 | { |
641 | if (*name == '\0') | 641 | if (*name == '\0') |
642 | return 0; | 642 | return 0; |
643 | if (strlen(name) >= IFNAMSIZ) | ||
644 | return 0; | ||
643 | if (!strcmp(name, ".") || !strcmp(name, "..")) | 645 | if (!strcmp(name, ".") || !strcmp(name, "..")) |
644 | return 0; | 646 | return 0; |
645 | 647 | ||
@@ -1166,12 +1168,12 @@ EXPORT_SYMBOL(netif_device_attach); | |||
1166 | * Invalidate hardware checksum when packet is to be mangled, and | 1168 | * Invalidate hardware checksum when packet is to be mangled, and |
1167 | * complete checksum manually on outgoing path. | 1169 | * complete checksum manually on outgoing path. |
1168 | */ | 1170 | */ |
1169 | int skb_checksum_help(struct sk_buff *skb, int inward) | 1171 | int skb_checksum_help(struct sk_buff *skb) |
1170 | { | 1172 | { |
1171 | unsigned int csum; | 1173 | unsigned int csum; |
1172 | int ret = 0, offset = skb->h.raw - skb->data; | 1174 | int ret = 0, offset = skb->h.raw - skb->data; |
1173 | 1175 | ||
1174 | if (inward) | 1176 | if (skb->ip_summed == CHECKSUM_COMPLETE) |
1175 | goto out_set_summed; | 1177 | goto out_set_summed; |
1176 | 1178 | ||
1177 | if (unlikely(skb_shinfo(skb)->gso_size)) { | 1179 | if (unlikely(skb_shinfo(skb)->gso_size)) { |
@@ -1223,7 +1225,7 @@ struct sk_buff *skb_gso_segment(struct sk_buff *skb, int features) | |||
1223 | skb->mac_len = skb->nh.raw - skb->data; | 1225 | skb->mac_len = skb->nh.raw - skb->data; |
1224 | __skb_pull(skb, skb->mac_len); | 1226 | __skb_pull(skb, skb->mac_len); |
1225 | 1227 | ||
1226 | if (unlikely(skb->ip_summed != CHECKSUM_HW)) { | 1228 | if (unlikely(skb->ip_summed != CHECKSUM_PARTIAL)) { |
1227 | if (skb_header_cloned(skb) && | 1229 | if (skb_header_cloned(skb) && |
1228 | (err = pskb_expand_head(skb, 0, 0, GFP_ATOMIC))) | 1230 | (err = pskb_expand_head(skb, 0, 0, GFP_ATOMIC))) |
1229 | return ERR_PTR(err); | 1231 | return ERR_PTR(err); |
@@ -1232,7 +1234,7 @@ struct sk_buff *skb_gso_segment(struct sk_buff *skb, int features) | |||
1232 | rcu_read_lock(); | 1234 | rcu_read_lock(); |
1233 | list_for_each_entry_rcu(ptype, &ptype_base[ntohs(type) & 15], list) { | 1235 | list_for_each_entry_rcu(ptype, &ptype_base[ntohs(type) & 15], list) { |
1234 | if (ptype->type == type && !ptype->dev && ptype->gso_segment) { | 1236 | if (ptype->type == type && !ptype->dev && ptype->gso_segment) { |
1235 | if (unlikely(skb->ip_summed != CHECKSUM_HW)) { | 1237 | if (unlikely(skb->ip_summed != CHECKSUM_PARTIAL)) { |
1236 | err = ptype->gso_send_check(skb); | 1238 | err = ptype->gso_send_check(skb); |
1237 | segs = ERR_PTR(err); | 1239 | segs = ERR_PTR(err); |
1238 | if (err || skb_gso_ok(skb, features)) | 1240 | if (err || skb_gso_ok(skb, features)) |
@@ -1444,11 +1446,11 @@ int dev_queue_xmit(struct sk_buff *skb) | |||
1444 | /* If packet is not checksummed and device does not support | 1446 | /* If packet is not checksummed and device does not support |
1445 | * checksumming for this protocol, complete checksumming here. | 1447 | * checksumming for this protocol, complete checksumming here. |
1446 | */ | 1448 | */ |
1447 | if (skb->ip_summed == CHECKSUM_HW && | 1449 | if (skb->ip_summed == CHECKSUM_PARTIAL && |
1448 | (!(dev->features & NETIF_F_GEN_CSUM) && | 1450 | (!(dev->features & NETIF_F_GEN_CSUM) && |
1449 | (!(dev->features & NETIF_F_IP_CSUM) || | 1451 | (!(dev->features & NETIF_F_IP_CSUM) || |
1450 | skb->protocol != htons(ETH_P_IP)))) | 1452 | skb->protocol != htons(ETH_P_IP)))) |
1451 | if (skb_checksum_help(skb, 0)) | 1453 | if (skb_checksum_help(skb)) |
1452 | goto out_kfree_skb; | 1454 | goto out_kfree_skb; |
1453 | 1455 | ||
1454 | gso: | 1456 | gso: |
@@ -3191,13 +3193,15 @@ struct net_device *alloc_netdev(int sizeof_priv, const char *name, | |||
3191 | struct net_device *dev; | 3193 | struct net_device *dev; |
3192 | int alloc_size; | 3194 | int alloc_size; |
3193 | 3195 | ||
3196 | BUG_ON(strlen(name) >= sizeof(dev->name)); | ||
3197 | |||
3194 | /* ensure 32-byte alignment of both the device and private area */ | 3198 | /* ensure 32-byte alignment of both the device and private area */ |
3195 | alloc_size = (sizeof(*dev) + NETDEV_ALIGN_CONST) & ~NETDEV_ALIGN_CONST; | 3199 | alloc_size = (sizeof(*dev) + NETDEV_ALIGN_CONST) & ~NETDEV_ALIGN_CONST; |
3196 | alloc_size += sizeof_priv + NETDEV_ALIGN_CONST; | 3200 | alloc_size += sizeof_priv + NETDEV_ALIGN_CONST; |
3197 | 3201 | ||
3198 | p = kzalloc(alloc_size, GFP_KERNEL); | 3202 | p = kzalloc(alloc_size, GFP_KERNEL); |
3199 | if (!p) { | 3203 | if (!p) { |
3200 | printk(KERN_ERR "alloc_dev: Unable to allocate device.\n"); | 3204 | printk(KERN_ERR "alloc_netdev: Unable to allocate device.\n"); |
3201 | return NULL; | 3205 | return NULL; |
3202 | } | 3206 | } |
3203 | 3207 | ||
diff --git a/net/core/dev_mcast.c b/net/core/dev_mcast.c index c57d887da2ef..b22648d04d36 100644 --- a/net/core/dev_mcast.c +++ b/net/core/dev_mcast.c | |||
@@ -21,8 +21,7 @@ | |||
21 | * 2 of the License, or (at your option) any later version. | 21 | * 2 of the License, or (at your option) any later version. |
22 | */ | 22 | */ |
23 | 23 | ||
24 | #include <linux/config.h> | 24 | #include <linux/module.h> |
25 | #include <linux/module.h> | ||
26 | #include <asm/uaccess.h> | 25 | #include <asm/uaccess.h> |
27 | #include <asm/system.h> | 26 | #include <asm/system.h> |
28 | #include <linux/bitops.h> | 27 | #include <linux/bitops.h> |
diff --git a/net/core/fib_rules.c b/net/core/fib_rules.c new file mode 100644 index 000000000000..a99d87d82b7f --- /dev/null +++ b/net/core/fib_rules.c | |||
@@ -0,0 +1,421 @@ | |||
1 | /* | ||
2 | * net/core/fib_rules.c Generic Routing Rules | ||
3 | * | ||
4 | * This program is free software; you can redistribute it and/or | ||
5 | * modify it under the terms of the GNU General Public License as | ||
6 | * published by the Free Software Foundation, version 2. | ||
7 | * | ||
8 | * Authors: Thomas Graf <tgraf@suug.ch> | ||
9 | */ | ||
10 | |||
11 | #include <linux/config.h> | ||
12 | #include <linux/types.h> | ||
13 | #include <linux/kernel.h> | ||
14 | #include <linux/list.h> | ||
15 | #include <net/fib_rules.h> | ||
16 | |||
17 | static LIST_HEAD(rules_ops); | ||
18 | static DEFINE_SPINLOCK(rules_mod_lock); | ||
19 | |||
20 | static void notify_rule_change(int event, struct fib_rule *rule, | ||
21 | struct fib_rules_ops *ops, struct nlmsghdr *nlh, | ||
22 | u32 pid); | ||
23 | |||
24 | static struct fib_rules_ops *lookup_rules_ops(int family) | ||
25 | { | ||
26 | struct fib_rules_ops *ops; | ||
27 | |||
28 | rcu_read_lock(); | ||
29 | list_for_each_entry_rcu(ops, &rules_ops, list) { | ||
30 | if (ops->family == family) { | ||
31 | if (!try_module_get(ops->owner)) | ||
32 | ops = NULL; | ||
33 | rcu_read_unlock(); | ||
34 | return ops; | ||
35 | } | ||
36 | } | ||
37 | rcu_read_unlock(); | ||
38 | |||
39 | return NULL; | ||
40 | } | ||
41 | |||
42 | static void rules_ops_put(struct fib_rules_ops *ops) | ||
43 | { | ||
44 | if (ops) | ||
45 | module_put(ops->owner); | ||
46 | } | ||
47 | |||
48 | int fib_rules_register(struct fib_rules_ops *ops) | ||
49 | { | ||
50 | int err = -EEXIST; | ||
51 | struct fib_rules_ops *o; | ||
52 | |||
53 | if (ops->rule_size < sizeof(struct fib_rule)) | ||
54 | return -EINVAL; | ||
55 | |||
56 | if (ops->match == NULL || ops->configure == NULL || | ||
57 | ops->compare == NULL || ops->fill == NULL || | ||
58 | ops->action == NULL) | ||
59 | return -EINVAL; | ||
60 | |||
61 | spin_lock(&rules_mod_lock); | ||
62 | list_for_each_entry(o, &rules_ops, list) | ||
63 | if (ops->family == o->family) | ||
64 | goto errout; | ||
65 | |||
66 | list_add_tail_rcu(&ops->list, &rules_ops); | ||
67 | err = 0; | ||
68 | errout: | ||
69 | spin_unlock(&rules_mod_lock); | ||
70 | |||
71 | return err; | ||
72 | } | ||
73 | |||
74 | EXPORT_SYMBOL_GPL(fib_rules_register); | ||
75 | |||
76 | static void cleanup_ops(struct fib_rules_ops *ops) | ||
77 | { | ||
78 | struct fib_rule *rule, *tmp; | ||
79 | |||
80 | list_for_each_entry_safe(rule, tmp, ops->rules_list, list) { | ||
81 | list_del_rcu(&rule->list); | ||
82 | fib_rule_put(rule); | ||
83 | } | ||
84 | } | ||
85 | |||
86 | int fib_rules_unregister(struct fib_rules_ops *ops) | ||
87 | { | ||
88 | int err = 0; | ||
89 | struct fib_rules_ops *o; | ||
90 | |||
91 | spin_lock(&rules_mod_lock); | ||
92 | list_for_each_entry(o, &rules_ops, list) { | ||
93 | if (o == ops) { | ||
94 | list_del_rcu(&o->list); | ||
95 | cleanup_ops(ops); | ||
96 | goto out; | ||
97 | } | ||
98 | } | ||
99 | |||
100 | err = -ENOENT; | ||
101 | out: | ||
102 | spin_unlock(&rules_mod_lock); | ||
103 | |||
104 | synchronize_rcu(); | ||
105 | |||
106 | return err; | ||
107 | } | ||
108 | |||
109 | EXPORT_SYMBOL_GPL(fib_rules_unregister); | ||
110 | |||
111 | int fib_rules_lookup(struct fib_rules_ops *ops, struct flowi *fl, | ||
112 | int flags, struct fib_lookup_arg *arg) | ||
113 | { | ||
114 | struct fib_rule *rule; | ||
115 | int err; | ||
116 | |||
117 | rcu_read_lock(); | ||
118 | |||
119 | list_for_each_entry_rcu(rule, ops->rules_list, list) { | ||
120 | if (rule->ifindex && (rule->ifindex != fl->iif)) | ||
121 | continue; | ||
122 | |||
123 | if (!ops->match(rule, fl, flags)) | ||
124 | continue; | ||
125 | |||
126 | err = ops->action(rule, fl, flags, arg); | ||
127 | if (err != -EAGAIN) { | ||
128 | fib_rule_get(rule); | ||
129 | arg->rule = rule; | ||
130 | goto out; | ||
131 | } | ||
132 | } | ||
133 | |||
134 | err = -ENETUNREACH; | ||
135 | out: | ||
136 | rcu_read_unlock(); | ||
137 | |||
138 | return err; | ||
139 | } | ||
140 | |||
141 | EXPORT_SYMBOL_GPL(fib_rules_lookup); | ||
142 | |||
143 | int fib_nl_newrule(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg) | ||
144 | { | ||
145 | struct fib_rule_hdr *frh = nlmsg_data(nlh); | ||
146 | struct fib_rules_ops *ops = NULL; | ||
147 | struct fib_rule *rule, *r, *last = NULL; | ||
148 | struct nlattr *tb[FRA_MAX+1]; | ||
149 | int err = -EINVAL; | ||
150 | |||
151 | if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(*frh))) | ||
152 | goto errout; | ||
153 | |||
154 | ops = lookup_rules_ops(frh->family); | ||
155 | if (ops == NULL) { | ||
156 | err = EAFNOSUPPORT; | ||
157 | goto errout; | ||
158 | } | ||
159 | |||
160 | err = nlmsg_parse(nlh, sizeof(*frh), tb, FRA_MAX, ops->policy); | ||
161 | if (err < 0) | ||
162 | goto errout; | ||
163 | |||
164 | rule = kzalloc(ops->rule_size, GFP_KERNEL); | ||
165 | if (rule == NULL) { | ||
166 | err = -ENOMEM; | ||
167 | goto errout; | ||
168 | } | ||
169 | |||
170 | if (tb[FRA_PRIORITY]) | ||
171 | rule->pref = nla_get_u32(tb[FRA_PRIORITY]); | ||
172 | |||
173 | if (tb[FRA_IFNAME]) { | ||
174 | struct net_device *dev; | ||
175 | |||
176 | rule->ifindex = -1; | ||
177 | nla_strlcpy(rule->ifname, tb[FRA_IFNAME], IFNAMSIZ); | ||
178 | dev = __dev_get_by_name(rule->ifname); | ||
179 | if (dev) | ||
180 | rule->ifindex = dev->ifindex; | ||
181 | } | ||
182 | |||
183 | rule->action = frh->action; | ||
184 | rule->flags = frh->flags; | ||
185 | rule->table = frh_get_table(frh, tb); | ||
186 | |||
187 | if (!rule->pref && ops->default_pref) | ||
188 | rule->pref = ops->default_pref(); | ||
189 | |||
190 | err = ops->configure(rule, skb, nlh, frh, tb); | ||
191 | if (err < 0) | ||
192 | goto errout_free; | ||
193 | |||
194 | list_for_each_entry(r, ops->rules_list, list) { | ||
195 | if (r->pref > rule->pref) | ||
196 | break; | ||
197 | last = r; | ||
198 | } | ||
199 | |||
200 | fib_rule_get(rule); | ||
201 | |||
202 | if (last) | ||
203 | list_add_rcu(&rule->list, &last->list); | ||
204 | else | ||
205 | list_add_rcu(&rule->list, ops->rules_list); | ||
206 | |||
207 | notify_rule_change(RTM_NEWRULE, rule, ops, nlh, NETLINK_CB(skb).pid); | ||
208 | rules_ops_put(ops); | ||
209 | return 0; | ||
210 | |||
211 | errout_free: | ||
212 | kfree(rule); | ||
213 | errout: | ||
214 | rules_ops_put(ops); | ||
215 | return err; | ||
216 | } | ||
217 | |||
218 | int fib_nl_delrule(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg) | ||
219 | { | ||
220 | struct fib_rule_hdr *frh = nlmsg_data(nlh); | ||
221 | struct fib_rules_ops *ops = NULL; | ||
222 | struct fib_rule *rule; | ||
223 | struct nlattr *tb[FRA_MAX+1]; | ||
224 | int err = -EINVAL; | ||
225 | |||
226 | if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(*frh))) | ||
227 | goto errout; | ||
228 | |||
229 | ops = lookup_rules_ops(frh->family); | ||
230 | if (ops == NULL) { | ||
231 | err = EAFNOSUPPORT; | ||
232 | goto errout; | ||
233 | } | ||
234 | |||
235 | err = nlmsg_parse(nlh, sizeof(*frh), tb, FRA_MAX, ops->policy); | ||
236 | if (err < 0) | ||
237 | goto errout; | ||
238 | |||
239 | list_for_each_entry(rule, ops->rules_list, list) { | ||
240 | if (frh->action && (frh->action != rule->action)) | ||
241 | continue; | ||
242 | |||
243 | if (frh->table && (frh_get_table(frh, tb) != rule->table)) | ||
244 | continue; | ||
245 | |||
246 | if (tb[FRA_PRIORITY] && | ||
247 | (rule->pref != nla_get_u32(tb[FRA_PRIORITY]))) | ||
248 | continue; | ||
249 | |||
250 | if (tb[FRA_IFNAME] && | ||
251 | nla_strcmp(tb[FRA_IFNAME], rule->ifname)) | ||
252 | continue; | ||
253 | |||
254 | if (!ops->compare(rule, frh, tb)) | ||
255 | continue; | ||
256 | |||
257 | if (rule->flags & FIB_RULE_PERMANENT) { | ||
258 | err = -EPERM; | ||
259 | goto errout; | ||
260 | } | ||
261 | |||
262 | list_del_rcu(&rule->list); | ||
263 | synchronize_rcu(); | ||
264 | notify_rule_change(RTM_DELRULE, rule, ops, nlh, | ||
265 | NETLINK_CB(skb).pid); | ||
266 | fib_rule_put(rule); | ||
267 | rules_ops_put(ops); | ||
268 | return 0; | ||
269 | } | ||
270 | |||
271 | err = -ENOENT; | ||
272 | errout: | ||
273 | rules_ops_put(ops); | ||
274 | return err; | ||
275 | } | ||
276 | |||
277 | static int fib_nl_fill_rule(struct sk_buff *skb, struct fib_rule *rule, | ||
278 | u32 pid, u32 seq, int type, int flags, | ||
279 | struct fib_rules_ops *ops) | ||
280 | { | ||
281 | struct nlmsghdr *nlh; | ||
282 | struct fib_rule_hdr *frh; | ||
283 | |||
284 | nlh = nlmsg_put(skb, pid, seq, type, sizeof(*frh), flags); | ||
285 | if (nlh == NULL) | ||
286 | return -1; | ||
287 | |||
288 | frh = nlmsg_data(nlh); | ||
289 | frh->table = rule->table; | ||
290 | NLA_PUT_U32(skb, FRA_TABLE, rule->table); | ||
291 | frh->res1 = 0; | ||
292 | frh->res2 = 0; | ||
293 | frh->action = rule->action; | ||
294 | frh->flags = rule->flags; | ||
295 | |||
296 | if (rule->ifname[0]) | ||
297 | NLA_PUT_STRING(skb, FRA_IFNAME, rule->ifname); | ||
298 | |||
299 | if (rule->pref) | ||
300 | NLA_PUT_U32(skb, FRA_PRIORITY, rule->pref); | ||
301 | |||
302 | if (ops->fill(rule, skb, nlh, frh) < 0) | ||
303 | goto nla_put_failure; | ||
304 | |||
305 | return nlmsg_end(skb, nlh); | ||
306 | |||
307 | nla_put_failure: | ||
308 | return nlmsg_cancel(skb, nlh); | ||
309 | } | ||
310 | |||
311 | int fib_rules_dump(struct sk_buff *skb, struct netlink_callback *cb, int family) | ||
312 | { | ||
313 | int idx = 0; | ||
314 | struct fib_rule *rule; | ||
315 | struct fib_rules_ops *ops; | ||
316 | |||
317 | ops = lookup_rules_ops(family); | ||
318 | if (ops == NULL) | ||
319 | return -EAFNOSUPPORT; | ||
320 | |||
321 | rcu_read_lock(); | ||
322 | list_for_each_entry(rule, ops->rules_list, list) { | ||
323 | if (idx < cb->args[0]) | ||
324 | goto skip; | ||
325 | |||
326 | if (fib_nl_fill_rule(skb, rule, NETLINK_CB(cb->skb).pid, | ||
327 | cb->nlh->nlmsg_seq, RTM_NEWRULE, | ||
328 | NLM_F_MULTI, ops) < 0) | ||
329 | break; | ||
330 | skip: | ||
331 | idx++; | ||
332 | } | ||
333 | rcu_read_unlock(); | ||
334 | cb->args[0] = idx; | ||
335 | rules_ops_put(ops); | ||
336 | |||
337 | return skb->len; | ||
338 | } | ||
339 | |||
340 | EXPORT_SYMBOL_GPL(fib_rules_dump); | ||
341 | |||
342 | static void notify_rule_change(int event, struct fib_rule *rule, | ||
343 | struct fib_rules_ops *ops, struct nlmsghdr *nlh, | ||
344 | u32 pid) | ||
345 | { | ||
346 | struct sk_buff *skb; | ||
347 | int err = -ENOBUFS; | ||
348 | |||
349 | skb = nlmsg_new(NLMSG_GOODSIZE, GFP_KERNEL); | ||
350 | if (skb == NULL) | ||
351 | goto errout; | ||
352 | |||
353 | err = fib_nl_fill_rule(skb, rule, pid, nlh->nlmsg_seq, event, 0, ops); | ||
354 | if (err < 0) { | ||
355 | kfree_skb(skb); | ||
356 | goto errout; | ||
357 | } | ||
358 | |||
359 | err = rtnl_notify(skb, pid, ops->nlgroup, nlh, GFP_KERNEL); | ||
360 | errout: | ||
361 | if (err < 0) | ||
362 | rtnl_set_sk_err(ops->nlgroup, err); | ||
363 | } | ||
364 | |||
365 | static void attach_rules(struct list_head *rules, struct net_device *dev) | ||
366 | { | ||
367 | struct fib_rule *rule; | ||
368 | |||
369 | list_for_each_entry(rule, rules, list) { | ||
370 | if (rule->ifindex == -1 && | ||
371 | strcmp(dev->name, rule->ifname) == 0) | ||
372 | rule->ifindex = dev->ifindex; | ||
373 | } | ||
374 | } | ||
375 | |||
376 | static void detach_rules(struct list_head *rules, struct net_device *dev) | ||
377 | { | ||
378 | struct fib_rule *rule; | ||
379 | |||
380 | list_for_each_entry(rule, rules, list) | ||
381 | if (rule->ifindex == dev->ifindex) | ||
382 | rule->ifindex = -1; | ||
383 | } | ||
384 | |||
385 | |||
386 | static int fib_rules_event(struct notifier_block *this, unsigned long event, | ||
387 | void *ptr) | ||
388 | { | ||
389 | struct net_device *dev = ptr; | ||
390 | struct fib_rules_ops *ops; | ||
391 | |||
392 | ASSERT_RTNL(); | ||
393 | rcu_read_lock(); | ||
394 | |||
395 | switch (event) { | ||
396 | case NETDEV_REGISTER: | ||
397 | list_for_each_entry(ops, &rules_ops, list) | ||
398 | attach_rules(ops->rules_list, dev); | ||
399 | break; | ||
400 | |||
401 | case NETDEV_UNREGISTER: | ||
402 | list_for_each_entry(ops, &rules_ops, list) | ||
403 | detach_rules(ops->rules_list, dev); | ||
404 | break; | ||
405 | } | ||
406 | |||
407 | rcu_read_unlock(); | ||
408 | |||
409 | return NOTIFY_DONE; | ||
410 | } | ||
411 | |||
412 | static struct notifier_block fib_rules_notifier = { | ||
413 | .notifier_call = fib_rules_event, | ||
414 | }; | ||
415 | |||
416 | static int __init fib_rules_init(void) | ||
417 | { | ||
418 | return register_netdevice_notifier(&fib_rules_notifier); | ||
419 | } | ||
420 | |||
421 | subsys_initcall(fib_rules_init); | ||
diff --git a/net/core/filter.c b/net/core/filter.c index 5b4486a60cf6..6732782a5a40 100644 --- a/net/core/filter.c +++ b/net/core/filter.c | |||
@@ -422,10 +422,10 @@ int sk_attach_filter(struct sock_fprog *fprog, struct sock *sk) | |||
422 | if (!err) { | 422 | if (!err) { |
423 | struct sk_filter *old_fp; | 423 | struct sk_filter *old_fp; |
424 | 424 | ||
425 | spin_lock_bh(&sk->sk_lock.slock); | 425 | rcu_read_lock_bh(); |
426 | old_fp = sk->sk_filter; | 426 | old_fp = rcu_dereference(sk->sk_filter); |
427 | sk->sk_filter = fp; | 427 | rcu_assign_pointer(sk->sk_filter, fp); |
428 | spin_unlock_bh(&sk->sk_lock.slock); | 428 | rcu_read_unlock_bh(); |
429 | fp = old_fp; | 429 | fp = old_fp; |
430 | } | 430 | } |
431 | 431 | ||
diff --git a/net/core/flow.c b/net/core/flow.c index 2191af5f26ac..f23e7e386543 100644 --- a/net/core/flow.c +++ b/net/core/flow.c | |||
@@ -32,7 +32,6 @@ struct flow_cache_entry { | |||
32 | u8 dir; | 32 | u8 dir; |
33 | struct flowi key; | 33 | struct flowi key; |
34 | u32 genid; | 34 | u32 genid; |
35 | u32 sk_sid; | ||
36 | void *object; | 35 | void *object; |
37 | atomic_t *object_ref; | 36 | atomic_t *object_ref; |
38 | }; | 37 | }; |
@@ -165,7 +164,7 @@ static int flow_key_compare(struct flowi *key1, struct flowi *key2) | |||
165 | return 0; | 164 | return 0; |
166 | } | 165 | } |
167 | 166 | ||
168 | void *flow_cache_lookup(struct flowi *key, u32 sk_sid, u16 family, u8 dir, | 167 | void *flow_cache_lookup(struct flowi *key, u16 family, u8 dir, |
169 | flow_resolve_t resolver) | 168 | flow_resolve_t resolver) |
170 | { | 169 | { |
171 | struct flow_cache_entry *fle, **head; | 170 | struct flow_cache_entry *fle, **head; |
@@ -189,7 +188,6 @@ void *flow_cache_lookup(struct flowi *key, u32 sk_sid, u16 family, u8 dir, | |||
189 | for (fle = *head; fle; fle = fle->next) { | 188 | for (fle = *head; fle; fle = fle->next) { |
190 | if (fle->family == family && | 189 | if (fle->family == family && |
191 | fle->dir == dir && | 190 | fle->dir == dir && |
192 | fle->sk_sid == sk_sid && | ||
193 | flow_key_compare(key, &fle->key) == 0) { | 191 | flow_key_compare(key, &fle->key) == 0) { |
194 | if (fle->genid == atomic_read(&flow_cache_genid)) { | 192 | if (fle->genid == atomic_read(&flow_cache_genid)) { |
195 | void *ret = fle->object; | 193 | void *ret = fle->object; |
@@ -214,7 +212,6 @@ void *flow_cache_lookup(struct flowi *key, u32 sk_sid, u16 family, u8 dir, | |||
214 | *head = fle; | 212 | *head = fle; |
215 | fle->family = family; | 213 | fle->family = family; |
216 | fle->dir = dir; | 214 | fle->dir = dir; |
217 | fle->sk_sid = sk_sid; | ||
218 | memcpy(&fle->key, key, sizeof(*key)); | 215 | memcpy(&fle->key, key, sizeof(*key)); |
219 | fle->object = NULL; | 216 | fle->object = NULL; |
220 | flow_count(cpu)++; | 217 | flow_count(cpu)++; |
@@ -226,7 +223,7 @@ nocache: | |||
226 | void *obj; | 223 | void *obj; |
227 | atomic_t *obj_ref; | 224 | atomic_t *obj_ref; |
228 | 225 | ||
229 | resolver(key, sk_sid, family, dir, &obj, &obj_ref); | 226 | resolver(key, family, dir, &obj, &obj_ref); |
230 | 227 | ||
231 | if (fle) { | 228 | if (fle) { |
232 | fle->genid = atomic_read(&flow_cache_genid); | 229 | fle->genid = atomic_read(&flow_cache_genid); |
@@ -346,12 +343,8 @@ static int __init flow_cache_init(void) | |||
346 | 343 | ||
347 | flow_cachep = kmem_cache_create("flow_cache", | 344 | flow_cachep = kmem_cache_create("flow_cache", |
348 | sizeof(struct flow_cache_entry), | 345 | sizeof(struct flow_cache_entry), |
349 | 0, SLAB_HWCACHE_ALIGN, | 346 | 0, SLAB_HWCACHE_ALIGN|SLAB_PANIC, |
350 | NULL, NULL); | 347 | NULL, NULL); |
351 | |||
352 | if (!flow_cachep) | ||
353 | panic("NET: failed to allocate flow cache slab\n"); | ||
354 | |||
355 | flow_hash_shift = 10; | 348 | flow_hash_shift = 10; |
356 | flow_lwm = 2 * flow_hash_size; | 349 | flow_lwm = 2 * flow_hash_size; |
357 | flow_hwm = 4 * flow_hash_size; | 350 | flow_hwm = 4 * flow_hash_size; |
diff --git a/net/core/neighbour.c b/net/core/neighbour.c index fe2113f54e2b..b6c69e1463e8 100644 --- a/net/core/neighbour.c +++ b/net/core/neighbour.c | |||
@@ -30,6 +30,7 @@ | |||
30 | #include <net/dst.h> | 30 | #include <net/dst.h> |
31 | #include <net/sock.h> | 31 | #include <net/sock.h> |
32 | #include <net/netevent.h> | 32 | #include <net/netevent.h> |
33 | #include <net/netlink.h> | ||
33 | #include <linux/rtnetlink.h> | 34 | #include <linux/rtnetlink.h> |
34 | #include <linux/random.h> | 35 | #include <linux/random.h> |
35 | #include <linux/string.h> | 36 | #include <linux/string.h> |
@@ -888,7 +889,7 @@ out_unlock_bh: | |||
888 | return rc; | 889 | return rc; |
889 | } | 890 | } |
890 | 891 | ||
891 | static __inline__ void neigh_update_hhs(struct neighbour *neigh) | 892 | static void neigh_update_hhs(struct neighbour *neigh) |
892 | { | 893 | { |
893 | struct hh_cache *hh; | 894 | struct hh_cache *hh; |
894 | void (*update)(struct hh_cache*, struct net_device*, unsigned char *) = | 895 | void (*update)(struct hh_cache*, struct net_device*, unsigned char *) = |
@@ -1338,14 +1339,10 @@ void neigh_table_init_no_netlink(struct neigh_table *tbl) | |||
1338 | neigh_rand_reach_time(tbl->parms.base_reachable_time); | 1339 | neigh_rand_reach_time(tbl->parms.base_reachable_time); |
1339 | 1340 | ||
1340 | if (!tbl->kmem_cachep) | 1341 | if (!tbl->kmem_cachep) |
1341 | tbl->kmem_cachep = kmem_cache_create(tbl->id, | 1342 | tbl->kmem_cachep = |
1342 | tbl->entry_size, | 1343 | kmem_cache_create(tbl->id, tbl->entry_size, 0, |
1343 | 0, SLAB_HWCACHE_ALIGN, | 1344 | SLAB_HWCACHE_ALIGN|SLAB_PANIC, |
1344 | NULL, NULL); | 1345 | NULL, NULL); |
1345 | |||
1346 | if (!tbl->kmem_cachep) | ||
1347 | panic("cannot create neighbour cache"); | ||
1348 | |||
1349 | tbl->stats = alloc_percpu(struct neigh_statistics); | 1346 | tbl->stats = alloc_percpu(struct neigh_statistics); |
1350 | if (!tbl->stats) | 1347 | if (!tbl->stats) |
1351 | panic("cannot create neighbour cache statistics"); | 1348 | panic("cannot create neighbour cache statistics"); |
@@ -1440,48 +1437,62 @@ int neigh_table_clear(struct neigh_table *tbl) | |||
1440 | 1437 | ||
1441 | int neigh_delete(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) | 1438 | int neigh_delete(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) |
1442 | { | 1439 | { |
1443 | struct ndmsg *ndm = NLMSG_DATA(nlh); | 1440 | struct ndmsg *ndm; |
1444 | struct rtattr **nda = arg; | 1441 | struct nlattr *dst_attr; |
1445 | struct neigh_table *tbl; | 1442 | struct neigh_table *tbl; |
1446 | struct net_device *dev = NULL; | 1443 | struct net_device *dev = NULL; |
1447 | int err = -ENODEV; | 1444 | int err = -EINVAL; |
1448 | 1445 | ||
1449 | if (ndm->ndm_ifindex && | 1446 | if (nlmsg_len(nlh) < sizeof(*ndm)) |
1450 | (dev = dev_get_by_index(ndm->ndm_ifindex)) == NULL) | ||
1451 | goto out; | 1447 | goto out; |
1452 | 1448 | ||
1449 | dst_attr = nlmsg_find_attr(nlh, sizeof(*ndm), NDA_DST); | ||
1450 | if (dst_attr == NULL) | ||
1451 | goto out; | ||
1452 | |||
1453 | ndm = nlmsg_data(nlh); | ||
1454 | if (ndm->ndm_ifindex) { | ||
1455 | dev = dev_get_by_index(ndm->ndm_ifindex); | ||
1456 | if (dev == NULL) { | ||
1457 | err = -ENODEV; | ||
1458 | goto out; | ||
1459 | } | ||
1460 | } | ||
1461 | |||
1453 | read_lock(&neigh_tbl_lock); | 1462 | read_lock(&neigh_tbl_lock); |
1454 | for (tbl = neigh_tables; tbl; tbl = tbl->next) { | 1463 | for (tbl = neigh_tables; tbl; tbl = tbl->next) { |
1455 | struct rtattr *dst_attr = nda[NDA_DST - 1]; | 1464 | struct neighbour *neigh; |
1456 | struct neighbour *n; | ||
1457 | 1465 | ||
1458 | if (tbl->family != ndm->ndm_family) | 1466 | if (tbl->family != ndm->ndm_family) |
1459 | continue; | 1467 | continue; |
1460 | read_unlock(&neigh_tbl_lock); | 1468 | read_unlock(&neigh_tbl_lock); |
1461 | 1469 | ||
1462 | err = -EINVAL; | 1470 | if (nla_len(dst_attr) < tbl->key_len) |
1463 | if (!dst_attr || RTA_PAYLOAD(dst_attr) < tbl->key_len) | ||
1464 | goto out_dev_put; | 1471 | goto out_dev_put; |
1465 | 1472 | ||
1466 | if (ndm->ndm_flags & NTF_PROXY) { | 1473 | if (ndm->ndm_flags & NTF_PROXY) { |
1467 | err = pneigh_delete(tbl, RTA_DATA(dst_attr), dev); | 1474 | err = pneigh_delete(tbl, nla_data(dst_attr), dev); |
1468 | goto out_dev_put; | 1475 | goto out_dev_put; |
1469 | } | 1476 | } |
1470 | 1477 | ||
1471 | if (!dev) | 1478 | if (dev == NULL) |
1472 | goto out; | 1479 | goto out_dev_put; |
1473 | 1480 | ||
1474 | n = neigh_lookup(tbl, RTA_DATA(dst_attr), dev); | 1481 | neigh = neigh_lookup(tbl, nla_data(dst_attr), dev); |
1475 | if (n) { | 1482 | if (neigh == NULL) { |
1476 | err = neigh_update(n, NULL, NUD_FAILED, | 1483 | err = -ENOENT; |
1477 | NEIGH_UPDATE_F_OVERRIDE| | 1484 | goto out_dev_put; |
1478 | NEIGH_UPDATE_F_ADMIN); | ||
1479 | neigh_release(n); | ||
1480 | } | 1485 | } |
1486 | |||
1487 | err = neigh_update(neigh, NULL, NUD_FAILED, | ||
1488 | NEIGH_UPDATE_F_OVERRIDE | | ||
1489 | NEIGH_UPDATE_F_ADMIN); | ||
1490 | neigh_release(neigh); | ||
1481 | goto out_dev_put; | 1491 | goto out_dev_put; |
1482 | } | 1492 | } |
1483 | read_unlock(&neigh_tbl_lock); | 1493 | read_unlock(&neigh_tbl_lock); |
1484 | err = -EADDRNOTAVAIL; | 1494 | err = -EAFNOSUPPORT; |
1495 | |||
1485 | out_dev_put: | 1496 | out_dev_put: |
1486 | if (dev) | 1497 | if (dev) |
1487 | dev_put(dev); | 1498 | dev_put(dev); |
@@ -1491,76 +1502,93 @@ out: | |||
1491 | 1502 | ||
1492 | int neigh_add(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) | 1503 | int neigh_add(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) |
1493 | { | 1504 | { |
1494 | struct ndmsg *ndm = NLMSG_DATA(nlh); | 1505 | struct ndmsg *ndm; |
1495 | struct rtattr **nda = arg; | 1506 | struct nlattr *tb[NDA_MAX+1]; |
1496 | struct neigh_table *tbl; | 1507 | struct neigh_table *tbl; |
1497 | struct net_device *dev = NULL; | 1508 | struct net_device *dev = NULL; |
1498 | int err = -ENODEV; | 1509 | int err; |
1499 | 1510 | ||
1500 | if (ndm->ndm_ifindex && | 1511 | err = nlmsg_parse(nlh, sizeof(*ndm), tb, NDA_MAX, NULL); |
1501 | (dev = dev_get_by_index(ndm->ndm_ifindex)) == NULL) | 1512 | if (err < 0) |
1502 | goto out; | 1513 | goto out; |
1503 | 1514 | ||
1515 | err = -EINVAL; | ||
1516 | if (tb[NDA_DST] == NULL) | ||
1517 | goto out; | ||
1518 | |||
1519 | ndm = nlmsg_data(nlh); | ||
1520 | if (ndm->ndm_ifindex) { | ||
1521 | dev = dev_get_by_index(ndm->ndm_ifindex); | ||
1522 | if (dev == NULL) { | ||
1523 | err = -ENODEV; | ||
1524 | goto out; | ||
1525 | } | ||
1526 | |||
1527 | if (tb[NDA_LLADDR] && nla_len(tb[NDA_LLADDR]) < dev->addr_len) | ||
1528 | goto out_dev_put; | ||
1529 | } | ||
1530 | |||
1504 | read_lock(&neigh_tbl_lock); | 1531 | read_lock(&neigh_tbl_lock); |
1505 | for (tbl = neigh_tables; tbl; tbl = tbl->next) { | 1532 | for (tbl = neigh_tables; tbl; tbl = tbl->next) { |
1506 | struct rtattr *lladdr_attr = nda[NDA_LLADDR - 1]; | 1533 | int flags = NEIGH_UPDATE_F_ADMIN | NEIGH_UPDATE_F_OVERRIDE; |
1507 | struct rtattr *dst_attr = nda[NDA_DST - 1]; | 1534 | struct neighbour *neigh; |
1508 | int override = 1; | 1535 | void *dst, *lladdr; |
1509 | struct neighbour *n; | ||
1510 | 1536 | ||
1511 | if (tbl->family != ndm->ndm_family) | 1537 | if (tbl->family != ndm->ndm_family) |
1512 | continue; | 1538 | continue; |
1513 | read_unlock(&neigh_tbl_lock); | 1539 | read_unlock(&neigh_tbl_lock); |
1514 | 1540 | ||
1515 | err = -EINVAL; | 1541 | if (nla_len(tb[NDA_DST]) < tbl->key_len) |
1516 | if (!dst_attr || RTA_PAYLOAD(dst_attr) < tbl->key_len) | ||
1517 | goto out_dev_put; | 1542 | goto out_dev_put; |
1543 | dst = nla_data(tb[NDA_DST]); | ||
1544 | lladdr = tb[NDA_LLADDR] ? nla_data(tb[NDA_LLADDR]) : NULL; | ||
1518 | 1545 | ||
1519 | if (ndm->ndm_flags & NTF_PROXY) { | 1546 | if (ndm->ndm_flags & NTF_PROXY) { |
1547 | struct pneigh_entry *pn; | ||
1548 | |||
1520 | err = -ENOBUFS; | 1549 | err = -ENOBUFS; |
1521 | if (pneigh_lookup(tbl, RTA_DATA(dst_attr), dev, 1)) | 1550 | pn = pneigh_lookup(tbl, dst, dev, 1); |
1551 | if (pn) { | ||
1552 | pn->flags = ndm->ndm_flags; | ||
1522 | err = 0; | 1553 | err = 0; |
1554 | } | ||
1523 | goto out_dev_put; | 1555 | goto out_dev_put; |
1524 | } | 1556 | } |
1525 | 1557 | ||
1526 | err = -EINVAL; | 1558 | if (dev == NULL) |
1527 | if (!dev) | ||
1528 | goto out; | ||
1529 | if (lladdr_attr && RTA_PAYLOAD(lladdr_attr) < dev->addr_len) | ||
1530 | goto out_dev_put; | 1559 | goto out_dev_put; |
1560 | |||
1561 | neigh = neigh_lookup(tbl, dst, dev); | ||
1562 | if (neigh == NULL) { | ||
1563 | if (!(nlh->nlmsg_flags & NLM_F_CREATE)) { | ||
1564 | err = -ENOENT; | ||
1565 | goto out_dev_put; | ||
1566 | } | ||
1531 | 1567 | ||
1532 | n = neigh_lookup(tbl, RTA_DATA(dst_attr), dev); | 1568 | neigh = __neigh_lookup_errno(tbl, dst, dev); |
1533 | if (n) { | 1569 | if (IS_ERR(neigh)) { |
1534 | if (nlh->nlmsg_flags & NLM_F_EXCL) { | 1570 | err = PTR_ERR(neigh); |
1535 | err = -EEXIST; | ||
1536 | neigh_release(n); | ||
1537 | goto out_dev_put; | 1571 | goto out_dev_put; |
1538 | } | 1572 | } |
1539 | |||
1540 | override = nlh->nlmsg_flags & NLM_F_REPLACE; | ||
1541 | } else if (!(nlh->nlmsg_flags & NLM_F_CREATE)) { | ||
1542 | err = -ENOENT; | ||
1543 | goto out_dev_put; | ||
1544 | } else { | 1573 | } else { |
1545 | n = __neigh_lookup_errno(tbl, RTA_DATA(dst_attr), dev); | 1574 | if (nlh->nlmsg_flags & NLM_F_EXCL) { |
1546 | if (IS_ERR(n)) { | 1575 | err = -EEXIST; |
1547 | err = PTR_ERR(n); | 1576 | neigh_release(neigh); |
1548 | goto out_dev_put; | 1577 | goto out_dev_put; |
1549 | } | 1578 | } |
1550 | } | ||
1551 | 1579 | ||
1552 | err = neigh_update(n, | 1580 | if (!(nlh->nlmsg_flags & NLM_F_REPLACE)) |
1553 | lladdr_attr ? RTA_DATA(lladdr_attr) : NULL, | 1581 | flags &= ~NEIGH_UPDATE_F_OVERRIDE; |
1554 | ndm->ndm_state, | 1582 | } |
1555 | (override ? NEIGH_UPDATE_F_OVERRIDE : 0) | | ||
1556 | NEIGH_UPDATE_F_ADMIN); | ||
1557 | 1583 | ||
1558 | neigh_release(n); | 1584 | err = neigh_update(neigh, lladdr, ndm->ndm_state, flags); |
1585 | neigh_release(neigh); | ||
1559 | goto out_dev_put; | 1586 | goto out_dev_put; |
1560 | } | 1587 | } |
1561 | 1588 | ||
1562 | read_unlock(&neigh_tbl_lock); | 1589 | read_unlock(&neigh_tbl_lock); |
1563 | err = -EADDRNOTAVAIL; | 1590 | err = -EAFNOSUPPORT; |
1591 | |||
1564 | out_dev_put: | 1592 | out_dev_put: |
1565 | if (dev) | 1593 | if (dev) |
1566 | dev_put(dev); | 1594 | dev_put(dev); |
@@ -1570,56 +1598,59 @@ out: | |||
1570 | 1598 | ||
1571 | static int neightbl_fill_parms(struct sk_buff *skb, struct neigh_parms *parms) | 1599 | static int neightbl_fill_parms(struct sk_buff *skb, struct neigh_parms *parms) |
1572 | { | 1600 | { |
1573 | struct rtattr *nest = NULL; | 1601 | struct nlattr *nest; |
1574 | 1602 | ||
1575 | nest = RTA_NEST(skb, NDTA_PARMS); | 1603 | nest = nla_nest_start(skb, NDTA_PARMS); |
1604 | if (nest == NULL) | ||
1605 | return -ENOBUFS; | ||
1576 | 1606 | ||
1577 | if (parms->dev) | 1607 | if (parms->dev) |
1578 | RTA_PUT_U32(skb, NDTPA_IFINDEX, parms->dev->ifindex); | 1608 | NLA_PUT_U32(skb, NDTPA_IFINDEX, parms->dev->ifindex); |
1579 | 1609 | ||
1580 | RTA_PUT_U32(skb, NDTPA_REFCNT, atomic_read(&parms->refcnt)); | 1610 | NLA_PUT_U32(skb, NDTPA_REFCNT, atomic_read(&parms->refcnt)); |
1581 | RTA_PUT_U32(skb, NDTPA_QUEUE_LEN, parms->queue_len); | 1611 | NLA_PUT_U32(skb, NDTPA_QUEUE_LEN, parms->queue_len); |
1582 | RTA_PUT_U32(skb, NDTPA_PROXY_QLEN, parms->proxy_qlen); | 1612 | NLA_PUT_U32(skb, NDTPA_PROXY_QLEN, parms->proxy_qlen); |
1583 | RTA_PUT_U32(skb, NDTPA_APP_PROBES, parms->app_probes); | 1613 | NLA_PUT_U32(skb, NDTPA_APP_PROBES, parms->app_probes); |
1584 | RTA_PUT_U32(skb, NDTPA_UCAST_PROBES, parms->ucast_probes); | 1614 | NLA_PUT_U32(skb, NDTPA_UCAST_PROBES, parms->ucast_probes); |
1585 | RTA_PUT_U32(skb, NDTPA_MCAST_PROBES, parms->mcast_probes); | 1615 | NLA_PUT_U32(skb, NDTPA_MCAST_PROBES, parms->mcast_probes); |
1586 | RTA_PUT_MSECS(skb, NDTPA_REACHABLE_TIME, parms->reachable_time); | 1616 | NLA_PUT_MSECS(skb, NDTPA_REACHABLE_TIME, parms->reachable_time); |
1587 | RTA_PUT_MSECS(skb, NDTPA_BASE_REACHABLE_TIME, | 1617 | NLA_PUT_MSECS(skb, NDTPA_BASE_REACHABLE_TIME, |
1588 | parms->base_reachable_time); | 1618 | parms->base_reachable_time); |
1589 | RTA_PUT_MSECS(skb, NDTPA_GC_STALETIME, parms->gc_staletime); | 1619 | NLA_PUT_MSECS(skb, NDTPA_GC_STALETIME, parms->gc_staletime); |
1590 | RTA_PUT_MSECS(skb, NDTPA_DELAY_PROBE_TIME, parms->delay_probe_time); | 1620 | NLA_PUT_MSECS(skb, NDTPA_DELAY_PROBE_TIME, parms->delay_probe_time); |
1591 | RTA_PUT_MSECS(skb, NDTPA_RETRANS_TIME, parms->retrans_time); | 1621 | NLA_PUT_MSECS(skb, NDTPA_RETRANS_TIME, parms->retrans_time); |
1592 | RTA_PUT_MSECS(skb, NDTPA_ANYCAST_DELAY, parms->anycast_delay); | 1622 | NLA_PUT_MSECS(skb, NDTPA_ANYCAST_DELAY, parms->anycast_delay); |
1593 | RTA_PUT_MSECS(skb, NDTPA_PROXY_DELAY, parms->proxy_delay); | 1623 | NLA_PUT_MSECS(skb, NDTPA_PROXY_DELAY, parms->proxy_delay); |
1594 | RTA_PUT_MSECS(skb, NDTPA_LOCKTIME, parms->locktime); | 1624 | NLA_PUT_MSECS(skb, NDTPA_LOCKTIME, parms->locktime); |
1595 | 1625 | ||
1596 | return RTA_NEST_END(skb, nest); | 1626 | return nla_nest_end(skb, nest); |
1597 | 1627 | ||
1598 | rtattr_failure: | 1628 | nla_put_failure: |
1599 | return RTA_NEST_CANCEL(skb, nest); | 1629 | return nla_nest_cancel(skb, nest); |
1600 | } | 1630 | } |
1601 | 1631 | ||
1602 | static int neightbl_fill_info(struct neigh_table *tbl, struct sk_buff *skb, | 1632 | static int neightbl_fill_info(struct sk_buff *skb, struct neigh_table *tbl, |
1603 | struct netlink_callback *cb) | 1633 | u32 pid, u32 seq, int type, int flags) |
1604 | { | 1634 | { |
1605 | struct nlmsghdr *nlh; | 1635 | struct nlmsghdr *nlh; |
1606 | struct ndtmsg *ndtmsg; | 1636 | struct ndtmsg *ndtmsg; |
1607 | 1637 | ||
1608 | nlh = NLMSG_NEW_ANSWER(skb, cb, RTM_NEWNEIGHTBL, sizeof(struct ndtmsg), | 1638 | nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ndtmsg), flags); |
1609 | NLM_F_MULTI); | 1639 | if (nlh == NULL) |
1640 | return -ENOBUFS; | ||
1610 | 1641 | ||
1611 | ndtmsg = NLMSG_DATA(nlh); | 1642 | ndtmsg = nlmsg_data(nlh); |
1612 | 1643 | ||
1613 | read_lock_bh(&tbl->lock); | 1644 | read_lock_bh(&tbl->lock); |
1614 | ndtmsg->ndtm_family = tbl->family; | 1645 | ndtmsg->ndtm_family = tbl->family; |
1615 | ndtmsg->ndtm_pad1 = 0; | 1646 | ndtmsg->ndtm_pad1 = 0; |
1616 | ndtmsg->ndtm_pad2 = 0; | 1647 | ndtmsg->ndtm_pad2 = 0; |
1617 | 1648 | ||
1618 | RTA_PUT_STRING(skb, NDTA_NAME, tbl->id); | 1649 | NLA_PUT_STRING(skb, NDTA_NAME, tbl->id); |
1619 | RTA_PUT_MSECS(skb, NDTA_GC_INTERVAL, tbl->gc_interval); | 1650 | NLA_PUT_MSECS(skb, NDTA_GC_INTERVAL, tbl->gc_interval); |
1620 | RTA_PUT_U32(skb, NDTA_THRESH1, tbl->gc_thresh1); | 1651 | NLA_PUT_U32(skb, NDTA_THRESH1, tbl->gc_thresh1); |
1621 | RTA_PUT_U32(skb, NDTA_THRESH2, tbl->gc_thresh2); | 1652 | NLA_PUT_U32(skb, NDTA_THRESH2, tbl->gc_thresh2); |
1622 | RTA_PUT_U32(skb, NDTA_THRESH3, tbl->gc_thresh3); | 1653 | NLA_PUT_U32(skb, NDTA_THRESH3, tbl->gc_thresh3); |
1623 | 1654 | ||
1624 | { | 1655 | { |
1625 | unsigned long now = jiffies; | 1656 | unsigned long now = jiffies; |
@@ -1638,7 +1669,7 @@ static int neightbl_fill_info(struct neigh_table *tbl, struct sk_buff *skb, | |||
1638 | .ndtc_proxy_qlen = tbl->proxy_queue.qlen, | 1669 | .ndtc_proxy_qlen = tbl->proxy_queue.qlen, |
1639 | }; | 1670 | }; |
1640 | 1671 | ||
1641 | RTA_PUT(skb, NDTA_CONFIG, sizeof(ndc), &ndc); | 1672 | NLA_PUT(skb, NDTA_CONFIG, sizeof(ndc), &ndc); |
1642 | } | 1673 | } |
1643 | 1674 | ||
1644 | { | 1675 | { |
@@ -1663,55 +1694,50 @@ static int neightbl_fill_info(struct neigh_table *tbl, struct sk_buff *skb, | |||
1663 | ndst.ndts_forced_gc_runs += st->forced_gc_runs; | 1694 | ndst.ndts_forced_gc_runs += st->forced_gc_runs; |
1664 | } | 1695 | } |
1665 | 1696 | ||
1666 | RTA_PUT(skb, NDTA_STATS, sizeof(ndst), &ndst); | 1697 | NLA_PUT(skb, NDTA_STATS, sizeof(ndst), &ndst); |
1667 | } | 1698 | } |
1668 | 1699 | ||
1669 | BUG_ON(tbl->parms.dev); | 1700 | BUG_ON(tbl->parms.dev); |
1670 | if (neightbl_fill_parms(skb, &tbl->parms) < 0) | 1701 | if (neightbl_fill_parms(skb, &tbl->parms) < 0) |
1671 | goto rtattr_failure; | 1702 | goto nla_put_failure; |
1672 | 1703 | ||
1673 | read_unlock_bh(&tbl->lock); | 1704 | read_unlock_bh(&tbl->lock); |
1674 | return NLMSG_END(skb, nlh); | 1705 | return nlmsg_end(skb, nlh); |
1675 | 1706 | ||
1676 | rtattr_failure: | 1707 | nla_put_failure: |
1677 | read_unlock_bh(&tbl->lock); | 1708 | read_unlock_bh(&tbl->lock); |
1678 | return NLMSG_CANCEL(skb, nlh); | 1709 | return nlmsg_cancel(skb, nlh); |
1679 | |||
1680 | nlmsg_failure: | ||
1681 | return -1; | ||
1682 | } | 1710 | } |
1683 | 1711 | ||
1684 | static int neightbl_fill_param_info(struct neigh_table *tbl, | 1712 | static int neightbl_fill_param_info(struct sk_buff *skb, |
1713 | struct neigh_table *tbl, | ||
1685 | struct neigh_parms *parms, | 1714 | struct neigh_parms *parms, |
1686 | struct sk_buff *skb, | 1715 | u32 pid, u32 seq, int type, |
1687 | struct netlink_callback *cb) | 1716 | unsigned int flags) |
1688 | { | 1717 | { |
1689 | struct ndtmsg *ndtmsg; | 1718 | struct ndtmsg *ndtmsg; |
1690 | struct nlmsghdr *nlh; | 1719 | struct nlmsghdr *nlh; |
1691 | 1720 | ||
1692 | nlh = NLMSG_NEW_ANSWER(skb, cb, RTM_NEWNEIGHTBL, sizeof(struct ndtmsg), | 1721 | nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ndtmsg), flags); |
1693 | NLM_F_MULTI); | 1722 | if (nlh == NULL) |
1723 | return -ENOBUFS; | ||
1694 | 1724 | ||
1695 | ndtmsg = NLMSG_DATA(nlh); | 1725 | ndtmsg = nlmsg_data(nlh); |
1696 | 1726 | ||
1697 | read_lock_bh(&tbl->lock); | 1727 | read_lock_bh(&tbl->lock); |
1698 | ndtmsg->ndtm_family = tbl->family; | 1728 | ndtmsg->ndtm_family = tbl->family; |
1699 | ndtmsg->ndtm_pad1 = 0; | 1729 | ndtmsg->ndtm_pad1 = 0; |
1700 | ndtmsg->ndtm_pad2 = 0; | 1730 | ndtmsg->ndtm_pad2 = 0; |
1701 | RTA_PUT_STRING(skb, NDTA_NAME, tbl->id); | ||
1702 | 1731 | ||
1703 | if (neightbl_fill_parms(skb, parms) < 0) | 1732 | if (nla_put_string(skb, NDTA_NAME, tbl->id) < 0 || |
1704 | goto rtattr_failure; | 1733 | neightbl_fill_parms(skb, parms) < 0) |
1734 | goto errout; | ||
1705 | 1735 | ||
1706 | read_unlock_bh(&tbl->lock); | 1736 | read_unlock_bh(&tbl->lock); |
1707 | return NLMSG_END(skb, nlh); | 1737 | return nlmsg_end(skb, nlh); |
1708 | 1738 | errout: | |
1709 | rtattr_failure: | ||
1710 | read_unlock_bh(&tbl->lock); | 1739 | read_unlock_bh(&tbl->lock); |
1711 | return NLMSG_CANCEL(skb, nlh); | 1740 | return nlmsg_cancel(skb, nlh); |
1712 | |||
1713 | nlmsg_failure: | ||
1714 | return -1; | ||
1715 | } | 1741 | } |
1716 | 1742 | ||
1717 | static inline struct neigh_parms *lookup_neigh_params(struct neigh_table *tbl, | 1743 | static inline struct neigh_parms *lookup_neigh_params(struct neigh_table *tbl, |
@@ -1727,28 +1753,61 @@ static inline struct neigh_parms *lookup_neigh_params(struct neigh_table *tbl, | |||
1727 | return NULL; | 1753 | return NULL; |
1728 | } | 1754 | } |
1729 | 1755 | ||
1756 | static struct nla_policy nl_neightbl_policy[NDTA_MAX+1] __read_mostly = { | ||
1757 | [NDTA_NAME] = { .type = NLA_STRING }, | ||
1758 | [NDTA_THRESH1] = { .type = NLA_U32 }, | ||
1759 | [NDTA_THRESH2] = { .type = NLA_U32 }, | ||
1760 | [NDTA_THRESH3] = { .type = NLA_U32 }, | ||
1761 | [NDTA_GC_INTERVAL] = { .type = NLA_U64 }, | ||
1762 | [NDTA_PARMS] = { .type = NLA_NESTED }, | ||
1763 | }; | ||
1764 | |||
1765 | static struct nla_policy nl_ntbl_parm_policy[NDTPA_MAX+1] __read_mostly = { | ||
1766 | [NDTPA_IFINDEX] = { .type = NLA_U32 }, | ||
1767 | [NDTPA_QUEUE_LEN] = { .type = NLA_U32 }, | ||
1768 | [NDTPA_PROXY_QLEN] = { .type = NLA_U32 }, | ||
1769 | [NDTPA_APP_PROBES] = { .type = NLA_U32 }, | ||
1770 | [NDTPA_UCAST_PROBES] = { .type = NLA_U32 }, | ||
1771 | [NDTPA_MCAST_PROBES] = { .type = NLA_U32 }, | ||
1772 | [NDTPA_BASE_REACHABLE_TIME] = { .type = NLA_U64 }, | ||
1773 | [NDTPA_GC_STALETIME] = { .type = NLA_U64 }, | ||
1774 | [NDTPA_DELAY_PROBE_TIME] = { .type = NLA_U64 }, | ||
1775 | [NDTPA_RETRANS_TIME] = { .type = NLA_U64 }, | ||
1776 | [NDTPA_ANYCAST_DELAY] = { .type = NLA_U64 }, | ||
1777 | [NDTPA_PROXY_DELAY] = { .type = NLA_U64 }, | ||
1778 | [NDTPA_LOCKTIME] = { .type = NLA_U64 }, | ||
1779 | }; | ||
1780 | |||
1730 | int neightbl_set(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) | 1781 | int neightbl_set(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) |
1731 | { | 1782 | { |
1732 | struct neigh_table *tbl; | 1783 | struct neigh_table *tbl; |
1733 | struct ndtmsg *ndtmsg = NLMSG_DATA(nlh); | 1784 | struct ndtmsg *ndtmsg; |
1734 | struct rtattr **tb = arg; | 1785 | struct nlattr *tb[NDTA_MAX+1]; |
1735 | int err = -EINVAL; | 1786 | int err; |
1736 | 1787 | ||
1737 | if (!tb[NDTA_NAME - 1] || !RTA_PAYLOAD(tb[NDTA_NAME - 1])) | 1788 | err = nlmsg_parse(nlh, sizeof(*ndtmsg), tb, NDTA_MAX, |
1738 | return -EINVAL; | 1789 | nl_neightbl_policy); |
1790 | if (err < 0) | ||
1791 | goto errout; | ||
1792 | |||
1793 | if (tb[NDTA_NAME] == NULL) { | ||
1794 | err = -EINVAL; | ||
1795 | goto errout; | ||
1796 | } | ||
1739 | 1797 | ||
1798 | ndtmsg = nlmsg_data(nlh); | ||
1740 | read_lock(&neigh_tbl_lock); | 1799 | read_lock(&neigh_tbl_lock); |
1741 | for (tbl = neigh_tables; tbl; tbl = tbl->next) { | 1800 | for (tbl = neigh_tables; tbl; tbl = tbl->next) { |
1742 | if (ndtmsg->ndtm_family && tbl->family != ndtmsg->ndtm_family) | 1801 | if (ndtmsg->ndtm_family && tbl->family != ndtmsg->ndtm_family) |
1743 | continue; | 1802 | continue; |
1744 | 1803 | ||
1745 | if (!rtattr_strcmp(tb[NDTA_NAME - 1], tbl->id)) | 1804 | if (nla_strcmp(tb[NDTA_NAME], tbl->id) == 0) |
1746 | break; | 1805 | break; |
1747 | } | 1806 | } |
1748 | 1807 | ||
1749 | if (tbl == NULL) { | 1808 | if (tbl == NULL) { |
1750 | err = -ENOENT; | 1809 | err = -ENOENT; |
1751 | goto errout; | 1810 | goto errout_locked; |
1752 | } | 1811 | } |
1753 | 1812 | ||
1754 | /* | 1813 | /* |
@@ -1757,165 +1816,178 @@ int neightbl_set(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) | |||
1757 | */ | 1816 | */ |
1758 | write_lock_bh(&tbl->lock); | 1817 | write_lock_bh(&tbl->lock); |
1759 | 1818 | ||
1760 | if (tb[NDTA_THRESH1 - 1]) | 1819 | if (tb[NDTA_PARMS]) { |
1761 | tbl->gc_thresh1 = RTA_GET_U32(tb[NDTA_THRESH1 - 1]); | 1820 | struct nlattr *tbp[NDTPA_MAX+1]; |
1762 | |||
1763 | if (tb[NDTA_THRESH2 - 1]) | ||
1764 | tbl->gc_thresh2 = RTA_GET_U32(tb[NDTA_THRESH2 - 1]); | ||
1765 | |||
1766 | if (tb[NDTA_THRESH3 - 1]) | ||
1767 | tbl->gc_thresh3 = RTA_GET_U32(tb[NDTA_THRESH3 - 1]); | ||
1768 | |||
1769 | if (tb[NDTA_GC_INTERVAL - 1]) | ||
1770 | tbl->gc_interval = RTA_GET_MSECS(tb[NDTA_GC_INTERVAL - 1]); | ||
1771 | |||
1772 | if (tb[NDTA_PARMS - 1]) { | ||
1773 | struct rtattr *tbp[NDTPA_MAX]; | ||
1774 | struct neigh_parms *p; | 1821 | struct neigh_parms *p; |
1775 | u32 ifindex = 0; | 1822 | int i, ifindex = 0; |
1776 | 1823 | ||
1777 | if (rtattr_parse_nested(tbp, NDTPA_MAX, tb[NDTA_PARMS - 1]) < 0) | 1824 | err = nla_parse_nested(tbp, NDTPA_MAX, tb[NDTA_PARMS], |
1778 | goto rtattr_failure; | 1825 | nl_ntbl_parm_policy); |
1826 | if (err < 0) | ||
1827 | goto errout_tbl_lock; | ||
1779 | 1828 | ||
1780 | if (tbp[NDTPA_IFINDEX - 1]) | 1829 | if (tbp[NDTPA_IFINDEX]) |
1781 | ifindex = RTA_GET_U32(tbp[NDTPA_IFINDEX - 1]); | 1830 | ifindex = nla_get_u32(tbp[NDTPA_IFINDEX]); |
1782 | 1831 | ||
1783 | p = lookup_neigh_params(tbl, ifindex); | 1832 | p = lookup_neigh_params(tbl, ifindex); |
1784 | if (p == NULL) { | 1833 | if (p == NULL) { |
1785 | err = -ENOENT; | 1834 | err = -ENOENT; |
1786 | goto rtattr_failure; | 1835 | goto errout_tbl_lock; |
1787 | } | 1836 | } |
1788 | |||
1789 | if (tbp[NDTPA_QUEUE_LEN - 1]) | ||
1790 | p->queue_len = RTA_GET_U32(tbp[NDTPA_QUEUE_LEN - 1]); | ||
1791 | |||
1792 | if (tbp[NDTPA_PROXY_QLEN - 1]) | ||
1793 | p->proxy_qlen = RTA_GET_U32(tbp[NDTPA_PROXY_QLEN - 1]); | ||
1794 | |||
1795 | if (tbp[NDTPA_APP_PROBES - 1]) | ||
1796 | p->app_probes = RTA_GET_U32(tbp[NDTPA_APP_PROBES - 1]); | ||
1797 | 1837 | ||
1798 | if (tbp[NDTPA_UCAST_PROBES - 1]) | 1838 | for (i = 1; i <= NDTPA_MAX; i++) { |
1799 | p->ucast_probes = | 1839 | if (tbp[i] == NULL) |
1800 | RTA_GET_U32(tbp[NDTPA_UCAST_PROBES - 1]); | 1840 | continue; |
1801 | |||
1802 | if (tbp[NDTPA_MCAST_PROBES - 1]) | ||
1803 | p->mcast_probes = | ||
1804 | RTA_GET_U32(tbp[NDTPA_MCAST_PROBES - 1]); | ||
1805 | |||
1806 | if (tbp[NDTPA_BASE_REACHABLE_TIME - 1]) | ||
1807 | p->base_reachable_time = | ||
1808 | RTA_GET_MSECS(tbp[NDTPA_BASE_REACHABLE_TIME - 1]); | ||
1809 | |||
1810 | if (tbp[NDTPA_GC_STALETIME - 1]) | ||
1811 | p->gc_staletime = | ||
1812 | RTA_GET_MSECS(tbp[NDTPA_GC_STALETIME - 1]); | ||
1813 | 1841 | ||
1814 | if (tbp[NDTPA_DELAY_PROBE_TIME - 1]) | 1842 | switch (i) { |
1815 | p->delay_probe_time = | 1843 | case NDTPA_QUEUE_LEN: |
1816 | RTA_GET_MSECS(tbp[NDTPA_DELAY_PROBE_TIME - 1]); | 1844 | p->queue_len = nla_get_u32(tbp[i]); |
1845 | break; | ||
1846 | case NDTPA_PROXY_QLEN: | ||
1847 | p->proxy_qlen = nla_get_u32(tbp[i]); | ||
1848 | break; | ||
1849 | case NDTPA_APP_PROBES: | ||
1850 | p->app_probes = nla_get_u32(tbp[i]); | ||
1851 | break; | ||
1852 | case NDTPA_UCAST_PROBES: | ||
1853 | p->ucast_probes = nla_get_u32(tbp[i]); | ||
1854 | break; | ||
1855 | case NDTPA_MCAST_PROBES: | ||
1856 | p->mcast_probes = nla_get_u32(tbp[i]); | ||
1857 | break; | ||
1858 | case NDTPA_BASE_REACHABLE_TIME: | ||
1859 | p->base_reachable_time = nla_get_msecs(tbp[i]); | ||
1860 | break; | ||
1861 | case NDTPA_GC_STALETIME: | ||
1862 | p->gc_staletime = nla_get_msecs(tbp[i]); | ||
1863 | break; | ||
1864 | case NDTPA_DELAY_PROBE_TIME: | ||
1865 | p->delay_probe_time = nla_get_msecs(tbp[i]); | ||
1866 | break; | ||
1867 | case NDTPA_RETRANS_TIME: | ||
1868 | p->retrans_time = nla_get_msecs(tbp[i]); | ||
1869 | break; | ||
1870 | case NDTPA_ANYCAST_DELAY: | ||
1871 | p->anycast_delay = nla_get_msecs(tbp[i]); | ||
1872 | break; | ||
1873 | case NDTPA_PROXY_DELAY: | ||
1874 | p->proxy_delay = nla_get_msecs(tbp[i]); | ||
1875 | break; | ||
1876 | case NDTPA_LOCKTIME: | ||
1877 | p->locktime = nla_get_msecs(tbp[i]); | ||
1878 | break; | ||
1879 | } | ||
1880 | } | ||
1881 | } | ||
1817 | 1882 | ||
1818 | if (tbp[NDTPA_RETRANS_TIME - 1]) | 1883 | if (tb[NDTA_THRESH1]) |
1819 | p->retrans_time = | 1884 | tbl->gc_thresh1 = nla_get_u32(tb[NDTA_THRESH1]); |
1820 | RTA_GET_MSECS(tbp[NDTPA_RETRANS_TIME - 1]); | ||
1821 | 1885 | ||
1822 | if (tbp[NDTPA_ANYCAST_DELAY - 1]) | 1886 | if (tb[NDTA_THRESH2]) |
1823 | p->anycast_delay = | 1887 | tbl->gc_thresh2 = nla_get_u32(tb[NDTA_THRESH2]); |
1824 | RTA_GET_MSECS(tbp[NDTPA_ANYCAST_DELAY - 1]); | ||
1825 | 1888 | ||
1826 | if (tbp[NDTPA_PROXY_DELAY - 1]) | 1889 | if (tb[NDTA_THRESH3]) |
1827 | p->proxy_delay = | 1890 | tbl->gc_thresh3 = nla_get_u32(tb[NDTA_THRESH3]); |
1828 | RTA_GET_MSECS(tbp[NDTPA_PROXY_DELAY - 1]); | ||
1829 | 1891 | ||
1830 | if (tbp[NDTPA_LOCKTIME - 1]) | 1892 | if (tb[NDTA_GC_INTERVAL]) |
1831 | p->locktime = RTA_GET_MSECS(tbp[NDTPA_LOCKTIME - 1]); | 1893 | tbl->gc_interval = nla_get_msecs(tb[NDTA_GC_INTERVAL]); |
1832 | } | ||
1833 | 1894 | ||
1834 | err = 0; | 1895 | err = 0; |
1835 | 1896 | ||
1836 | rtattr_failure: | 1897 | errout_tbl_lock: |
1837 | write_unlock_bh(&tbl->lock); | 1898 | write_unlock_bh(&tbl->lock); |
1838 | errout: | 1899 | errout_locked: |
1839 | read_unlock(&neigh_tbl_lock); | 1900 | read_unlock(&neigh_tbl_lock); |
1901 | errout: | ||
1840 | return err; | 1902 | return err; |
1841 | } | 1903 | } |
1842 | 1904 | ||
1843 | int neightbl_dump_info(struct sk_buff *skb, struct netlink_callback *cb) | 1905 | int neightbl_dump_info(struct sk_buff *skb, struct netlink_callback *cb) |
1844 | { | 1906 | { |
1845 | int idx, family; | 1907 | int family, tidx, nidx = 0; |
1846 | int s_idx = cb->args[0]; | 1908 | int tbl_skip = cb->args[0]; |
1909 | int neigh_skip = cb->args[1]; | ||
1847 | struct neigh_table *tbl; | 1910 | struct neigh_table *tbl; |
1848 | 1911 | ||
1849 | family = ((struct rtgenmsg *)NLMSG_DATA(cb->nlh))->rtgen_family; | 1912 | family = ((struct rtgenmsg *) nlmsg_data(cb->nlh))->rtgen_family; |
1850 | 1913 | ||
1851 | read_lock(&neigh_tbl_lock); | 1914 | read_lock(&neigh_tbl_lock); |
1852 | for (tbl = neigh_tables, idx = 0; tbl; tbl = tbl->next) { | 1915 | for (tbl = neigh_tables, tidx = 0; tbl; tbl = tbl->next, tidx++) { |
1853 | struct neigh_parms *p; | 1916 | struct neigh_parms *p; |
1854 | 1917 | ||
1855 | if (idx < s_idx || (family && tbl->family != family)) | 1918 | if (tidx < tbl_skip || (family && tbl->family != family)) |
1856 | continue; | 1919 | continue; |
1857 | 1920 | ||
1858 | if (neightbl_fill_info(tbl, skb, cb) <= 0) | 1921 | if (neightbl_fill_info(skb, tbl, NETLINK_CB(cb->skb).pid, |
1922 | cb->nlh->nlmsg_seq, RTM_NEWNEIGHTBL, | ||
1923 | NLM_F_MULTI) <= 0) | ||
1859 | break; | 1924 | break; |
1860 | 1925 | ||
1861 | for (++idx, p = tbl->parms.next; p; p = p->next, idx++) { | 1926 | for (nidx = 0, p = tbl->parms.next; p; p = p->next, nidx++) { |
1862 | if (idx < s_idx) | 1927 | if (nidx < neigh_skip) |
1863 | continue; | 1928 | continue; |
1864 | 1929 | ||
1865 | if (neightbl_fill_param_info(tbl, p, skb, cb) <= 0) | 1930 | if (neightbl_fill_param_info(skb, tbl, p, |
1931 | NETLINK_CB(cb->skb).pid, | ||
1932 | cb->nlh->nlmsg_seq, | ||
1933 | RTM_NEWNEIGHTBL, | ||
1934 | NLM_F_MULTI) <= 0) | ||
1866 | goto out; | 1935 | goto out; |
1867 | } | 1936 | } |
1868 | 1937 | ||
1938 | neigh_skip = 0; | ||
1869 | } | 1939 | } |
1870 | out: | 1940 | out: |
1871 | read_unlock(&neigh_tbl_lock); | 1941 | read_unlock(&neigh_tbl_lock); |
1872 | cb->args[0] = idx; | 1942 | cb->args[0] = tidx; |
1943 | cb->args[1] = nidx; | ||
1873 | 1944 | ||
1874 | return skb->len; | 1945 | return skb->len; |
1875 | } | 1946 | } |
1876 | 1947 | ||
1877 | static int neigh_fill_info(struct sk_buff *skb, struct neighbour *n, | 1948 | static int neigh_fill_info(struct sk_buff *skb, struct neighbour *neigh, |
1878 | u32 pid, u32 seq, int event, unsigned int flags) | 1949 | u32 pid, u32 seq, int type, unsigned int flags) |
1879 | { | 1950 | { |
1880 | unsigned long now = jiffies; | 1951 | unsigned long now = jiffies; |
1881 | unsigned char *b = skb->tail; | ||
1882 | struct nda_cacheinfo ci; | 1952 | struct nda_cacheinfo ci; |
1883 | int locked = 0; | 1953 | struct nlmsghdr *nlh; |
1884 | u32 probes; | 1954 | struct ndmsg *ndm; |
1885 | struct nlmsghdr *nlh = NLMSG_NEW(skb, pid, seq, event, | 1955 | |
1886 | sizeof(struct ndmsg), flags); | 1956 | nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ndm), flags); |
1887 | struct ndmsg *ndm = NLMSG_DATA(nlh); | 1957 | if (nlh == NULL) |
1958 | return -ENOBUFS; | ||
1888 | 1959 | ||
1889 | ndm->ndm_family = n->ops->family; | 1960 | ndm = nlmsg_data(nlh); |
1961 | ndm->ndm_family = neigh->ops->family; | ||
1890 | ndm->ndm_pad1 = 0; | 1962 | ndm->ndm_pad1 = 0; |
1891 | ndm->ndm_pad2 = 0; | 1963 | ndm->ndm_pad2 = 0; |
1892 | ndm->ndm_flags = n->flags; | 1964 | ndm->ndm_flags = neigh->flags; |
1893 | ndm->ndm_type = n->type; | 1965 | ndm->ndm_type = neigh->type; |
1894 | ndm->ndm_ifindex = n->dev->ifindex; | 1966 | ndm->ndm_ifindex = neigh->dev->ifindex; |
1895 | RTA_PUT(skb, NDA_DST, n->tbl->key_len, n->primary_key); | 1967 | |
1896 | read_lock_bh(&n->lock); | 1968 | NLA_PUT(skb, NDA_DST, neigh->tbl->key_len, neigh->primary_key); |
1897 | locked = 1; | ||
1898 | ndm->ndm_state = n->nud_state; | ||
1899 | if (n->nud_state & NUD_VALID) | ||
1900 | RTA_PUT(skb, NDA_LLADDR, n->dev->addr_len, n->ha); | ||
1901 | ci.ndm_used = now - n->used; | ||
1902 | ci.ndm_confirmed = now - n->confirmed; | ||
1903 | ci.ndm_updated = now - n->updated; | ||
1904 | ci.ndm_refcnt = atomic_read(&n->refcnt) - 1; | ||
1905 | probes = atomic_read(&n->probes); | ||
1906 | read_unlock_bh(&n->lock); | ||
1907 | locked = 0; | ||
1908 | RTA_PUT(skb, NDA_CACHEINFO, sizeof(ci), &ci); | ||
1909 | RTA_PUT(skb, NDA_PROBES, sizeof(probes), &probes); | ||
1910 | nlh->nlmsg_len = skb->tail - b; | ||
1911 | return skb->len; | ||
1912 | 1969 | ||
1913 | nlmsg_failure: | 1970 | read_lock_bh(&neigh->lock); |
1914 | rtattr_failure: | 1971 | ndm->ndm_state = neigh->nud_state; |
1915 | if (locked) | 1972 | if ((neigh->nud_state & NUD_VALID) && |
1916 | read_unlock_bh(&n->lock); | 1973 | nla_put(skb, NDA_LLADDR, neigh->dev->addr_len, neigh->ha) < 0) { |
1917 | skb_trim(skb, b - skb->data); | 1974 | read_unlock_bh(&neigh->lock); |
1918 | return -1; | 1975 | goto nla_put_failure; |
1976 | } | ||
1977 | |||
1978 | ci.ndm_used = now - neigh->used; | ||
1979 | ci.ndm_confirmed = now - neigh->confirmed; | ||
1980 | ci.ndm_updated = now - neigh->updated; | ||
1981 | ci.ndm_refcnt = atomic_read(&neigh->refcnt) - 1; | ||
1982 | read_unlock_bh(&neigh->lock); | ||
1983 | |||
1984 | NLA_PUT_U32(skb, NDA_PROBES, atomic_read(&neigh->probes)); | ||
1985 | NLA_PUT(skb, NDA_CACHEINFO, sizeof(ci), &ci); | ||
1986 | |||
1987 | return nlmsg_end(skb, nlh); | ||
1988 | |||
1989 | nla_put_failure: | ||
1990 | return nlmsg_cancel(skb, nlh); | ||
1919 | } | 1991 | } |
1920 | 1992 | ||
1921 | 1993 | ||
@@ -1959,7 +2031,7 @@ int neigh_dump_info(struct sk_buff *skb, struct netlink_callback *cb) | |||
1959 | int t, family, s_t; | 2031 | int t, family, s_t; |
1960 | 2032 | ||
1961 | read_lock(&neigh_tbl_lock); | 2033 | read_lock(&neigh_tbl_lock); |
1962 | family = ((struct rtgenmsg *)NLMSG_DATA(cb->nlh))->rtgen_family; | 2034 | family = ((struct rtgenmsg *) nlmsg_data(cb->nlh))->rtgen_family; |
1963 | s_t = cb->args[0]; | 2035 | s_t = cb->args[0]; |
1964 | 2036 | ||
1965 | for (tbl = neigh_tables, t = 0; tbl; tbl = tbl->next, t++) { | 2037 | for (tbl = neigh_tables, t = 0; tbl; tbl = tbl->next, t++) { |
@@ -2338,41 +2410,35 @@ static struct file_operations neigh_stat_seq_fops = { | |||
2338 | #endif /* CONFIG_PROC_FS */ | 2410 | #endif /* CONFIG_PROC_FS */ |
2339 | 2411 | ||
2340 | #ifdef CONFIG_ARPD | 2412 | #ifdef CONFIG_ARPD |
2341 | void neigh_app_ns(struct neighbour *n) | 2413 | static void __neigh_notify(struct neighbour *n, int type, int flags) |
2342 | { | 2414 | { |
2343 | struct nlmsghdr *nlh; | 2415 | struct sk_buff *skb; |
2344 | int size = NLMSG_SPACE(sizeof(struct ndmsg) + 256); | 2416 | int err = -ENOBUFS; |
2345 | struct sk_buff *skb = alloc_skb(size, GFP_ATOMIC); | ||
2346 | 2417 | ||
2347 | if (!skb) | 2418 | skb = nlmsg_new(NLMSG_GOODSIZE, GFP_ATOMIC); |
2348 | return; | 2419 | if (skb == NULL) |
2420 | goto errout; | ||
2349 | 2421 | ||
2350 | if (neigh_fill_info(skb, n, 0, 0, RTM_GETNEIGH, 0) < 0) { | 2422 | err = neigh_fill_info(skb, n, 0, 0, type, flags); |
2423 | if (err < 0) { | ||
2351 | kfree_skb(skb); | 2424 | kfree_skb(skb); |
2352 | return; | 2425 | goto errout; |
2353 | } | 2426 | } |
2354 | nlh = (struct nlmsghdr *)skb->data; | 2427 | |
2355 | nlh->nlmsg_flags = NLM_F_REQUEST; | 2428 | err = rtnl_notify(skb, 0, RTNLGRP_NEIGH, NULL, GFP_ATOMIC); |
2356 | NETLINK_CB(skb).dst_group = RTNLGRP_NEIGH; | 2429 | errout: |
2357 | netlink_broadcast(rtnl, skb, 0, RTNLGRP_NEIGH, GFP_ATOMIC); | 2430 | if (err < 0) |
2431 | rtnl_set_sk_err(RTNLGRP_NEIGH, err); | ||
2358 | } | 2432 | } |
2359 | 2433 | ||
2360 | static void neigh_app_notify(struct neighbour *n) | 2434 | void neigh_app_ns(struct neighbour *n) |
2361 | { | 2435 | { |
2362 | struct nlmsghdr *nlh; | 2436 | __neigh_notify(n, RTM_GETNEIGH, NLM_F_REQUEST); |
2363 | int size = NLMSG_SPACE(sizeof(struct ndmsg) + 256); | 2437 | } |
2364 | struct sk_buff *skb = alloc_skb(size, GFP_ATOMIC); | ||
2365 | |||
2366 | if (!skb) | ||
2367 | return; | ||
2368 | 2438 | ||
2369 | if (neigh_fill_info(skb, n, 0, 0, RTM_NEWNEIGH, 0) < 0) { | 2439 | static void neigh_app_notify(struct neighbour *n) |
2370 | kfree_skb(skb); | 2440 | { |
2371 | return; | 2441 | __neigh_notify(n, RTM_NEWNEIGH, 0); |
2372 | } | ||
2373 | nlh = (struct nlmsghdr *)skb->data; | ||
2374 | NETLINK_CB(skb).dst_group = RTNLGRP_NEIGH; | ||
2375 | netlink_broadcast(rtnl, skb, 0, RTNLGRP_NEIGH, GFP_ATOMIC); | ||
2376 | } | 2442 | } |
2377 | 2443 | ||
2378 | #endif /* CONFIG_ARPD */ | 2444 | #endif /* CONFIG_ARPD */ |
@@ -2386,7 +2452,7 @@ static struct neigh_sysctl_table { | |||
2386 | ctl_table neigh_neigh_dir[2]; | 2452 | ctl_table neigh_neigh_dir[2]; |
2387 | ctl_table neigh_proto_dir[2]; | 2453 | ctl_table neigh_proto_dir[2]; |
2388 | ctl_table neigh_root_dir[2]; | 2454 | ctl_table neigh_root_dir[2]; |
2389 | } neigh_sysctl_template = { | 2455 | } neigh_sysctl_template __read_mostly = { |
2390 | .neigh_vars = { | 2456 | .neigh_vars = { |
2391 | { | 2457 | { |
2392 | .ctl_name = NET_NEIGH_MCAST_SOLICIT, | 2458 | .ctl_name = NET_NEIGH_MCAST_SOLICIT, |
@@ -2659,7 +2725,6 @@ void neigh_sysctl_unregister(struct neigh_parms *p) | |||
2659 | #endif /* CONFIG_SYSCTL */ | 2725 | #endif /* CONFIG_SYSCTL */ |
2660 | 2726 | ||
2661 | EXPORT_SYMBOL(__neigh_event_send); | 2727 | EXPORT_SYMBOL(__neigh_event_send); |
2662 | EXPORT_SYMBOL(neigh_add); | ||
2663 | EXPORT_SYMBOL(neigh_changeaddr); | 2728 | EXPORT_SYMBOL(neigh_changeaddr); |
2664 | EXPORT_SYMBOL(neigh_compat_output); | 2729 | EXPORT_SYMBOL(neigh_compat_output); |
2665 | EXPORT_SYMBOL(neigh_connected_output); | 2730 | EXPORT_SYMBOL(neigh_connected_output); |
@@ -2679,11 +2744,8 @@ EXPORT_SYMBOL(neigh_table_clear); | |||
2679 | EXPORT_SYMBOL(neigh_table_init); | 2744 | EXPORT_SYMBOL(neigh_table_init); |
2680 | EXPORT_SYMBOL(neigh_table_init_no_netlink); | 2745 | EXPORT_SYMBOL(neigh_table_init_no_netlink); |
2681 | EXPORT_SYMBOL(neigh_update); | 2746 | EXPORT_SYMBOL(neigh_update); |
2682 | EXPORT_SYMBOL(neigh_update_hhs); | ||
2683 | EXPORT_SYMBOL(pneigh_enqueue); | 2747 | EXPORT_SYMBOL(pneigh_enqueue); |
2684 | EXPORT_SYMBOL(pneigh_lookup); | 2748 | EXPORT_SYMBOL(pneigh_lookup); |
2685 | EXPORT_SYMBOL(neightbl_dump_info); | ||
2686 | EXPORT_SYMBOL(neightbl_set); | ||
2687 | 2749 | ||
2688 | #ifdef CONFIG_ARPD | 2750 | #ifdef CONFIG_ARPD |
2689 | EXPORT_SYMBOL(neigh_app_ns); | 2751 | EXPORT_SYMBOL(neigh_app_ns); |
diff --git a/net/core/netpoll.c b/net/core/netpoll.c index 471da451cd48..ead5920c26d6 100644 --- a/net/core/netpoll.c +++ b/net/core/netpoll.c | |||
@@ -110,7 +110,7 @@ static int checksum_udp(struct sk_buff *skb, struct udphdr *uh, | |||
110 | 110 | ||
111 | psum = csum_tcpudp_nofold(saddr, daddr, ulen, IPPROTO_UDP, 0); | 111 | psum = csum_tcpudp_nofold(saddr, daddr, ulen, IPPROTO_UDP, 0); |
112 | 112 | ||
113 | if (skb->ip_summed == CHECKSUM_HW && | 113 | if (skb->ip_summed == CHECKSUM_COMPLETE && |
114 | !(u16)csum_fold(csum_add(psum, skb->csum))) | 114 | !(u16)csum_fold(csum_add(psum, skb->csum))) |
115 | return 0; | 115 | return 0; |
116 | 116 | ||
diff --git a/net/core/pktgen.c b/net/core/pktgen.c index 6a7320b39ed0..72145d4a2600 100644 --- a/net/core/pktgen.c +++ b/net/core/pktgen.c | |||
@@ -1786,7 +1786,7 @@ static void pktgen_setup_inject(struct pktgen_dev *pkt_dev) | |||
1786 | * use ipv6_get_lladdr if/when it's get exported | 1786 | * use ipv6_get_lladdr if/when it's get exported |
1787 | */ | 1787 | */ |
1788 | 1788 | ||
1789 | read_lock(&addrconf_lock); | 1789 | rcu_read_lock(); |
1790 | if ((idev = __in6_dev_get(pkt_dev->odev)) != NULL) { | 1790 | if ((idev = __in6_dev_get(pkt_dev->odev)) != NULL) { |
1791 | struct inet6_ifaddr *ifp; | 1791 | struct inet6_ifaddr *ifp; |
1792 | 1792 | ||
@@ -1805,7 +1805,7 @@ static void pktgen_setup_inject(struct pktgen_dev *pkt_dev) | |||
1805 | } | 1805 | } |
1806 | read_unlock_bh(&idev->lock); | 1806 | read_unlock_bh(&idev->lock); |
1807 | } | 1807 | } |
1808 | read_unlock(&addrconf_lock); | 1808 | rcu_read_unlock(); |
1809 | if (err) | 1809 | if (err) |
1810 | printk("pktgen: ERROR: IPv6 link address not availble.\n"); | 1810 | printk("pktgen: ERROR: IPv6 link address not availble.\n"); |
1811 | } | 1811 | } |
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index 30cc1ba6ed5c..d8e25e08cb7e 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c | |||
@@ -35,6 +35,7 @@ | |||
35 | #include <linux/init.h> | 35 | #include <linux/init.h> |
36 | #include <linux/security.h> | 36 | #include <linux/security.h> |
37 | #include <linux/mutex.h> | 37 | #include <linux/mutex.h> |
38 | #include <linux/if_addr.h> | ||
38 | 39 | ||
39 | #include <asm/uaccess.h> | 40 | #include <asm/uaccess.h> |
40 | #include <asm/system.h> | 41 | #include <asm/system.h> |
@@ -49,6 +50,7 @@ | |||
49 | #include <net/udp.h> | 50 | #include <net/udp.h> |
50 | #include <net/sock.h> | 51 | #include <net/sock.h> |
51 | #include <net/pkt_sched.h> | 52 | #include <net/pkt_sched.h> |
53 | #include <net/fib_rules.h> | ||
52 | #include <net/netlink.h> | 54 | #include <net/netlink.h> |
53 | #ifdef CONFIG_NET_WIRELESS_RTNETLINK | 55 | #ifdef CONFIG_NET_WIRELESS_RTNETLINK |
54 | #include <linux/wireless.h> | 56 | #include <linux/wireless.h> |
@@ -56,6 +58,7 @@ | |||
56 | #endif /* CONFIG_NET_WIRELESS_RTNETLINK */ | 58 | #endif /* CONFIG_NET_WIRELESS_RTNETLINK */ |
57 | 59 | ||
58 | static DEFINE_MUTEX(rtnl_mutex); | 60 | static DEFINE_MUTEX(rtnl_mutex); |
61 | static struct sock *rtnl; | ||
59 | 62 | ||
60 | void rtnl_lock(void) | 63 | void rtnl_lock(void) |
61 | { | 64 | { |
@@ -93,8 +96,6 @@ int rtattr_parse(struct rtattr *tb[], int maxattr, struct rtattr *rta, int len) | |||
93 | return 0; | 96 | return 0; |
94 | } | 97 | } |
95 | 98 | ||
96 | struct sock *rtnl; | ||
97 | |||
98 | struct rtnetlink_link * rtnetlink_links[NPROTO]; | 99 | struct rtnetlink_link * rtnetlink_links[NPROTO]; |
99 | 100 | ||
100 | static const int rtm_min[RTM_NR_FAMILIES] = | 101 | static const int rtm_min[RTM_NR_FAMILIES] = |
@@ -102,8 +103,7 @@ static const int rtm_min[RTM_NR_FAMILIES] = | |||
102 | [RTM_FAM(RTM_NEWLINK)] = NLMSG_LENGTH(sizeof(struct ifinfomsg)), | 103 | [RTM_FAM(RTM_NEWLINK)] = NLMSG_LENGTH(sizeof(struct ifinfomsg)), |
103 | [RTM_FAM(RTM_NEWADDR)] = NLMSG_LENGTH(sizeof(struct ifaddrmsg)), | 104 | [RTM_FAM(RTM_NEWADDR)] = NLMSG_LENGTH(sizeof(struct ifaddrmsg)), |
104 | [RTM_FAM(RTM_NEWROUTE)] = NLMSG_LENGTH(sizeof(struct rtmsg)), | 105 | [RTM_FAM(RTM_NEWROUTE)] = NLMSG_LENGTH(sizeof(struct rtmsg)), |
105 | [RTM_FAM(RTM_NEWNEIGH)] = NLMSG_LENGTH(sizeof(struct ndmsg)), | 106 | [RTM_FAM(RTM_NEWRULE)] = NLMSG_LENGTH(sizeof(struct fib_rule_hdr)), |
106 | [RTM_FAM(RTM_NEWRULE)] = NLMSG_LENGTH(sizeof(struct rtmsg)), | ||
107 | [RTM_FAM(RTM_NEWQDISC)] = NLMSG_LENGTH(sizeof(struct tcmsg)), | 107 | [RTM_FAM(RTM_NEWQDISC)] = NLMSG_LENGTH(sizeof(struct tcmsg)), |
108 | [RTM_FAM(RTM_NEWTCLASS)] = NLMSG_LENGTH(sizeof(struct tcmsg)), | 108 | [RTM_FAM(RTM_NEWTCLASS)] = NLMSG_LENGTH(sizeof(struct tcmsg)), |
109 | [RTM_FAM(RTM_NEWTFILTER)] = NLMSG_LENGTH(sizeof(struct tcmsg)), | 109 | [RTM_FAM(RTM_NEWTFILTER)] = NLMSG_LENGTH(sizeof(struct tcmsg)), |
@@ -111,7 +111,6 @@ static const int rtm_min[RTM_NR_FAMILIES] = | |||
111 | [RTM_FAM(RTM_NEWPREFIX)] = NLMSG_LENGTH(sizeof(struct rtgenmsg)), | 111 | [RTM_FAM(RTM_NEWPREFIX)] = NLMSG_LENGTH(sizeof(struct rtgenmsg)), |
112 | [RTM_FAM(RTM_GETMULTICAST)] = NLMSG_LENGTH(sizeof(struct rtgenmsg)), | 112 | [RTM_FAM(RTM_GETMULTICAST)] = NLMSG_LENGTH(sizeof(struct rtgenmsg)), |
113 | [RTM_FAM(RTM_GETANYCAST)] = NLMSG_LENGTH(sizeof(struct rtgenmsg)), | 113 | [RTM_FAM(RTM_GETANYCAST)] = NLMSG_LENGTH(sizeof(struct rtgenmsg)), |
114 | [RTM_FAM(RTM_NEWNEIGHTBL)] = NLMSG_LENGTH(sizeof(struct ndtmsg)), | ||
115 | }; | 114 | }; |
116 | 115 | ||
117 | static const int rta_max[RTM_NR_FAMILIES] = | 116 | static const int rta_max[RTM_NR_FAMILIES] = |
@@ -119,13 +118,11 @@ static const int rta_max[RTM_NR_FAMILIES] = | |||
119 | [RTM_FAM(RTM_NEWLINK)] = IFLA_MAX, | 118 | [RTM_FAM(RTM_NEWLINK)] = IFLA_MAX, |
120 | [RTM_FAM(RTM_NEWADDR)] = IFA_MAX, | 119 | [RTM_FAM(RTM_NEWADDR)] = IFA_MAX, |
121 | [RTM_FAM(RTM_NEWROUTE)] = RTA_MAX, | 120 | [RTM_FAM(RTM_NEWROUTE)] = RTA_MAX, |
122 | [RTM_FAM(RTM_NEWNEIGH)] = NDA_MAX, | 121 | [RTM_FAM(RTM_NEWRULE)] = FRA_MAX, |
123 | [RTM_FAM(RTM_NEWRULE)] = RTA_MAX, | ||
124 | [RTM_FAM(RTM_NEWQDISC)] = TCA_MAX, | 122 | [RTM_FAM(RTM_NEWQDISC)] = TCA_MAX, |
125 | [RTM_FAM(RTM_NEWTCLASS)] = TCA_MAX, | 123 | [RTM_FAM(RTM_NEWTCLASS)] = TCA_MAX, |
126 | [RTM_FAM(RTM_NEWTFILTER)] = TCA_MAX, | 124 | [RTM_FAM(RTM_NEWTFILTER)] = TCA_MAX, |
127 | [RTM_FAM(RTM_NEWACTION)] = TCAA_MAX, | 125 | [RTM_FAM(RTM_NEWACTION)] = TCAA_MAX, |
128 | [RTM_FAM(RTM_NEWNEIGHTBL)] = NDTA_MAX, | ||
129 | }; | 126 | }; |
130 | 127 | ||
131 | void __rta_fill(struct sk_buff *skb, int attrtype, int attrlen, const void *data) | 128 | void __rta_fill(struct sk_buff *skb, int attrtype, int attrlen, const void *data) |
@@ -168,24 +165,52 @@ int rtnetlink_send(struct sk_buff *skb, u32 pid, unsigned group, int echo) | |||
168 | return err; | 165 | return err; |
169 | } | 166 | } |
170 | 167 | ||
168 | int rtnl_unicast(struct sk_buff *skb, u32 pid) | ||
169 | { | ||
170 | return nlmsg_unicast(rtnl, skb, pid); | ||
171 | } | ||
172 | |||
173 | int rtnl_notify(struct sk_buff *skb, u32 pid, u32 group, | ||
174 | struct nlmsghdr *nlh, gfp_t flags) | ||
175 | { | ||
176 | int report = 0; | ||
177 | |||
178 | if (nlh) | ||
179 | report = nlmsg_report(nlh); | ||
180 | |||
181 | return nlmsg_notify(rtnl, skb, pid, group, report, flags); | ||
182 | } | ||
183 | |||
184 | void rtnl_set_sk_err(u32 group, int error) | ||
185 | { | ||
186 | netlink_set_err(rtnl, 0, group, error); | ||
187 | } | ||
188 | |||
171 | int rtnetlink_put_metrics(struct sk_buff *skb, u32 *metrics) | 189 | int rtnetlink_put_metrics(struct sk_buff *skb, u32 *metrics) |
172 | { | 190 | { |
173 | struct rtattr *mx = (struct rtattr*)skb->tail; | 191 | struct nlattr *mx; |
174 | int i; | 192 | int i, valid = 0; |
193 | |||
194 | mx = nla_nest_start(skb, RTA_METRICS); | ||
195 | if (mx == NULL) | ||
196 | return -ENOBUFS; | ||
197 | |||
198 | for (i = 0; i < RTAX_MAX; i++) { | ||
199 | if (metrics[i]) { | ||
200 | valid++; | ||
201 | NLA_PUT_U32(skb, i+1, metrics[i]); | ||
202 | } | ||
203 | } | ||
175 | 204 | ||
176 | RTA_PUT(skb, RTA_METRICS, 0, NULL); | 205 | if (!valid) { |
177 | for (i=0; i<RTAX_MAX; i++) { | 206 | nla_nest_cancel(skb, mx); |
178 | if (metrics[i]) | 207 | return 0; |
179 | RTA_PUT(skb, i+1, sizeof(u32), metrics+i); | ||
180 | } | 208 | } |
181 | mx->rta_len = skb->tail - (u8*)mx; | ||
182 | if (mx->rta_len == RTA_LENGTH(0)) | ||
183 | skb_trim(skb, (u8*)mx - skb->data); | ||
184 | return 0; | ||
185 | 209 | ||
186 | rtattr_failure: | 210 | return nla_nest_end(skb, mx); |
187 | skb_trim(skb, (u8*)mx - skb->data); | 211 | |
188 | return -1; | 212 | nla_put_failure: |
213 | return nla_nest_cancel(skb, mx); | ||
189 | } | 214 | } |
190 | 215 | ||
191 | 216 | ||
@@ -216,41 +241,73 @@ static void set_operstate(struct net_device *dev, unsigned char transition) | |||
216 | } | 241 | } |
217 | } | 242 | } |
218 | 243 | ||
219 | static int rtnetlink_fill_ifinfo(struct sk_buff *skb, struct net_device *dev, | 244 | static void copy_rtnl_link_stats(struct rtnl_link_stats *a, |
220 | int type, u32 pid, u32 seq, u32 change, | 245 | struct net_device_stats *b) |
221 | unsigned int flags) | ||
222 | { | 246 | { |
223 | struct ifinfomsg *r; | 247 | a->rx_packets = b->rx_packets; |
224 | struct nlmsghdr *nlh; | 248 | a->tx_packets = b->tx_packets; |
225 | unsigned char *b = skb->tail; | 249 | a->rx_bytes = b->rx_bytes; |
226 | 250 | a->tx_bytes = b->tx_bytes; | |
227 | nlh = NLMSG_NEW(skb, pid, seq, type, sizeof(*r), flags); | 251 | a->rx_errors = b->rx_errors; |
228 | r = NLMSG_DATA(nlh); | 252 | a->tx_errors = b->tx_errors; |
229 | r->ifi_family = AF_UNSPEC; | 253 | a->rx_dropped = b->rx_dropped; |
230 | r->__ifi_pad = 0; | 254 | a->tx_dropped = b->tx_dropped; |
231 | r->ifi_type = dev->type; | 255 | |
232 | r->ifi_index = dev->ifindex; | 256 | a->multicast = b->multicast; |
233 | r->ifi_flags = dev_get_flags(dev); | 257 | a->collisions = b->collisions; |
234 | r->ifi_change = change; | 258 | |
235 | 259 | a->rx_length_errors = b->rx_length_errors; | |
236 | RTA_PUT(skb, IFLA_IFNAME, strlen(dev->name)+1, dev->name); | 260 | a->rx_over_errors = b->rx_over_errors; |
237 | 261 | a->rx_crc_errors = b->rx_crc_errors; | |
238 | if (1) { | 262 | a->rx_frame_errors = b->rx_frame_errors; |
239 | u32 txqlen = dev->tx_queue_len; | 263 | a->rx_fifo_errors = b->rx_fifo_errors; |
240 | RTA_PUT(skb, IFLA_TXQLEN, sizeof(txqlen), &txqlen); | 264 | a->rx_missed_errors = b->rx_missed_errors; |
241 | } | 265 | |
266 | a->tx_aborted_errors = b->tx_aborted_errors; | ||
267 | a->tx_carrier_errors = b->tx_carrier_errors; | ||
268 | a->tx_fifo_errors = b->tx_fifo_errors; | ||
269 | a->tx_heartbeat_errors = b->tx_heartbeat_errors; | ||
270 | a->tx_window_errors = b->tx_window_errors; | ||
271 | |||
272 | a->rx_compressed = b->rx_compressed; | ||
273 | a->tx_compressed = b->tx_compressed; | ||
274 | }; | ||
242 | 275 | ||
243 | if (1) { | 276 | static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev, |
244 | u32 weight = dev->weight; | 277 | void *iwbuf, int iwbuflen, int type, u32 pid, |
245 | RTA_PUT(skb, IFLA_WEIGHT, sizeof(weight), &weight); | 278 | u32 seq, u32 change, unsigned int flags) |
246 | } | 279 | { |
280 | struct ifinfomsg *ifm; | ||
281 | struct nlmsghdr *nlh; | ||
282 | |||
283 | nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ifm), flags); | ||
284 | if (nlh == NULL) | ||
285 | return -ENOBUFS; | ||
286 | |||
287 | ifm = nlmsg_data(nlh); | ||
288 | ifm->ifi_family = AF_UNSPEC; | ||
289 | ifm->__ifi_pad = 0; | ||
290 | ifm->ifi_type = dev->type; | ||
291 | ifm->ifi_index = dev->ifindex; | ||
292 | ifm->ifi_flags = dev_get_flags(dev); | ||
293 | ifm->ifi_change = change; | ||
294 | |||
295 | NLA_PUT_STRING(skb, IFLA_IFNAME, dev->name); | ||
296 | NLA_PUT_U32(skb, IFLA_TXQLEN, dev->tx_queue_len); | ||
297 | NLA_PUT_U32(skb, IFLA_WEIGHT, dev->weight); | ||
298 | NLA_PUT_U8(skb, IFLA_OPERSTATE, | ||
299 | netif_running(dev) ? dev->operstate : IF_OPER_DOWN); | ||
300 | NLA_PUT_U8(skb, IFLA_LINKMODE, dev->link_mode); | ||
301 | NLA_PUT_U32(skb, IFLA_MTU, dev->mtu); | ||
302 | |||
303 | if (dev->ifindex != dev->iflink) | ||
304 | NLA_PUT_U32(skb, IFLA_LINK, dev->iflink); | ||
305 | |||
306 | if (dev->master) | ||
307 | NLA_PUT_U32(skb, IFLA_MASTER, dev->master->ifindex); | ||
247 | 308 | ||
248 | if (1) { | 309 | if (dev->qdisc_sleeping) |
249 | u8 operstate = netif_running(dev)?dev->operstate:IF_OPER_DOWN; | 310 | NLA_PUT_STRING(skb, IFLA_QDISC, dev->qdisc_sleeping->ops->id); |
250 | u8 link_mode = dev->link_mode; | ||
251 | RTA_PUT(skb, IFLA_OPERSTATE, sizeof(operstate), &operstate); | ||
252 | RTA_PUT(skb, IFLA_LINKMODE, sizeof(link_mode), &link_mode); | ||
253 | } | ||
254 | 311 | ||
255 | if (1) { | 312 | if (1) { |
256 | struct rtnl_link_ifmap map = { | 313 | struct rtnl_link_ifmap map = { |
@@ -261,58 +318,38 @@ static int rtnetlink_fill_ifinfo(struct sk_buff *skb, struct net_device *dev, | |||
261 | .dma = dev->dma, | 318 | .dma = dev->dma, |
262 | .port = dev->if_port, | 319 | .port = dev->if_port, |
263 | }; | 320 | }; |
264 | RTA_PUT(skb, IFLA_MAP, sizeof(map), &map); | 321 | NLA_PUT(skb, IFLA_MAP, sizeof(map), &map); |
265 | } | 322 | } |
266 | 323 | ||
267 | if (dev->addr_len) { | 324 | if (dev->addr_len) { |
268 | RTA_PUT(skb, IFLA_ADDRESS, dev->addr_len, dev->dev_addr); | 325 | NLA_PUT(skb, IFLA_ADDRESS, dev->addr_len, dev->dev_addr); |
269 | RTA_PUT(skb, IFLA_BROADCAST, dev->addr_len, dev->broadcast); | 326 | NLA_PUT(skb, IFLA_BROADCAST, dev->addr_len, dev->broadcast); |
270 | } | ||
271 | |||
272 | if (1) { | ||
273 | u32 mtu = dev->mtu; | ||
274 | RTA_PUT(skb, IFLA_MTU, sizeof(mtu), &mtu); | ||
275 | } | ||
276 | |||
277 | if (dev->ifindex != dev->iflink) { | ||
278 | u32 iflink = dev->iflink; | ||
279 | RTA_PUT(skb, IFLA_LINK, sizeof(iflink), &iflink); | ||
280 | } | ||
281 | |||
282 | if (dev->qdisc_sleeping) | ||
283 | RTA_PUT(skb, IFLA_QDISC, | ||
284 | strlen(dev->qdisc_sleeping->ops->id) + 1, | ||
285 | dev->qdisc_sleeping->ops->id); | ||
286 | |||
287 | if (dev->master) { | ||
288 | u32 master = dev->master->ifindex; | ||
289 | RTA_PUT(skb, IFLA_MASTER, sizeof(master), &master); | ||
290 | } | 327 | } |
291 | 328 | ||
292 | if (dev->get_stats) { | 329 | if (dev->get_stats) { |
293 | unsigned long *stats = (unsigned long*)dev->get_stats(dev); | 330 | struct net_device_stats *stats = dev->get_stats(dev); |
294 | if (stats) { | 331 | if (stats) { |
295 | struct rtattr *a; | 332 | struct nlattr *attr; |
296 | __u32 *s; | 333 | |
297 | int i; | 334 | attr = nla_reserve(skb, IFLA_STATS, |
298 | int n = sizeof(struct rtnl_link_stats)/4; | 335 | sizeof(struct rtnl_link_stats)); |
299 | 336 | if (attr == NULL) | |
300 | a = __RTA_PUT(skb, IFLA_STATS, n*4); | 337 | goto nla_put_failure; |
301 | s = RTA_DATA(a); | 338 | |
302 | for (i=0; i<n; i++) | 339 | copy_rtnl_link_stats(nla_data(attr), stats); |
303 | s[i] = stats[i]; | ||
304 | } | 340 | } |
305 | } | 341 | } |
306 | nlh->nlmsg_len = skb->tail - b; | ||
307 | return skb->len; | ||
308 | 342 | ||
309 | nlmsg_failure: | 343 | if (iwbuf) |
310 | rtattr_failure: | 344 | NLA_PUT(skb, IFLA_WIRELESS, iwbuflen, iwbuf); |
311 | skb_trim(skb, b - skb->data); | 345 | |
312 | return -1; | 346 | return nlmsg_end(skb, nlh); |
347 | |||
348 | nla_put_failure: | ||
349 | return nlmsg_cancel(skb, nlh); | ||
313 | } | 350 | } |
314 | 351 | ||
315 | static int rtnetlink_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb) | 352 | static int rtnl_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb) |
316 | { | 353 | { |
317 | int idx; | 354 | int idx; |
318 | int s_idx = cb->args[0]; | 355 | int s_idx = cb->args[0]; |
@@ -322,10 +359,9 @@ static int rtnetlink_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *c | |||
322 | for (dev=dev_base, idx=0; dev; dev = dev->next, idx++) { | 359 | for (dev=dev_base, idx=0; dev; dev = dev->next, idx++) { |
323 | if (idx < s_idx) | 360 | if (idx < s_idx) |
324 | continue; | 361 | continue; |
325 | if (rtnetlink_fill_ifinfo(skb, dev, RTM_NEWLINK, | 362 | if (rtnl_fill_ifinfo(skb, dev, NULL, 0, RTM_NEWLINK, |
326 | NETLINK_CB(cb->skb).pid, | 363 | NETLINK_CB(cb->skb).pid, |
327 | cb->nlh->nlmsg_seq, 0, | 364 | cb->nlh->nlmsg_seq, 0, NLM_F_MULTI) <= 0) |
328 | NLM_F_MULTI) <= 0) | ||
329 | break; | 365 | break; |
330 | } | 366 | } |
331 | read_unlock(&dev_base_lock); | 367 | read_unlock(&dev_base_lock); |
@@ -334,52 +370,70 @@ static int rtnetlink_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *c | |||
334 | return skb->len; | 370 | return skb->len; |
335 | } | 371 | } |
336 | 372 | ||
337 | static int do_setlink(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) | 373 | static struct nla_policy ifla_policy[IFLA_MAX+1] __read_mostly = { |
374 | [IFLA_IFNAME] = { .type = NLA_STRING, .len = IFNAMSIZ-1 }, | ||
375 | [IFLA_MAP] = { .len = sizeof(struct rtnl_link_ifmap) }, | ||
376 | [IFLA_MTU] = { .type = NLA_U32 }, | ||
377 | [IFLA_TXQLEN] = { .type = NLA_U32 }, | ||
378 | [IFLA_WEIGHT] = { .type = NLA_U32 }, | ||
379 | [IFLA_OPERSTATE] = { .type = NLA_U8 }, | ||
380 | [IFLA_LINKMODE] = { .type = NLA_U8 }, | ||
381 | }; | ||
382 | |||
383 | static int rtnl_setlink(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) | ||
338 | { | 384 | { |
339 | struct ifinfomsg *ifm = NLMSG_DATA(nlh); | 385 | struct ifinfomsg *ifm; |
340 | struct rtattr **ida = arg; | ||
341 | struct net_device *dev; | 386 | struct net_device *dev; |
342 | int err, send_addr_notify = 0; | 387 | int err, send_addr_notify = 0, modified = 0; |
388 | struct nlattr *tb[IFLA_MAX+1]; | ||
389 | char ifname[IFNAMSIZ]; | ||
390 | |||
391 | err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFLA_MAX, ifla_policy); | ||
392 | if (err < 0) | ||
393 | goto errout; | ||
394 | |||
395 | if (tb[IFLA_IFNAME]) | ||
396 | nla_strlcpy(ifname, tb[IFLA_IFNAME], IFNAMSIZ); | ||
397 | else | ||
398 | ifname[0] = '\0'; | ||
343 | 399 | ||
400 | err = -EINVAL; | ||
401 | ifm = nlmsg_data(nlh); | ||
344 | if (ifm->ifi_index >= 0) | 402 | if (ifm->ifi_index >= 0) |
345 | dev = dev_get_by_index(ifm->ifi_index); | 403 | dev = dev_get_by_index(ifm->ifi_index); |
346 | else if (ida[IFLA_IFNAME - 1]) { | 404 | else if (tb[IFLA_IFNAME]) |
347 | char ifname[IFNAMSIZ]; | ||
348 | |||
349 | if (rtattr_strlcpy(ifname, ida[IFLA_IFNAME - 1], | ||
350 | IFNAMSIZ) >= IFNAMSIZ) | ||
351 | return -EINVAL; | ||
352 | dev = dev_get_by_name(ifname); | 405 | dev = dev_get_by_name(ifname); |
353 | } else | 406 | else |
354 | return -EINVAL; | 407 | goto errout; |
355 | 408 | ||
356 | if (!dev) | 409 | if (dev == NULL) { |
357 | return -ENODEV; | 410 | err = -ENODEV; |
411 | goto errout; | ||
412 | } | ||
358 | 413 | ||
359 | err = -EINVAL; | 414 | if (tb[IFLA_ADDRESS] && |
415 | nla_len(tb[IFLA_ADDRESS]) < dev->addr_len) | ||
416 | goto errout_dev; | ||
360 | 417 | ||
361 | if (ifm->ifi_flags) | 418 | if (tb[IFLA_BROADCAST] && |
362 | dev_change_flags(dev, ifm->ifi_flags); | 419 | nla_len(tb[IFLA_BROADCAST]) < dev->addr_len) |
420 | goto errout_dev; | ||
363 | 421 | ||
364 | if (ida[IFLA_MAP - 1]) { | 422 | if (tb[IFLA_MAP]) { |
365 | struct rtnl_link_ifmap *u_map; | 423 | struct rtnl_link_ifmap *u_map; |
366 | struct ifmap k_map; | 424 | struct ifmap k_map; |
367 | 425 | ||
368 | if (!dev->set_config) { | 426 | if (!dev->set_config) { |
369 | err = -EOPNOTSUPP; | 427 | err = -EOPNOTSUPP; |
370 | goto out; | 428 | goto errout_dev; |
371 | } | 429 | } |
372 | 430 | ||
373 | if (!netif_device_present(dev)) { | 431 | if (!netif_device_present(dev)) { |
374 | err = -ENODEV; | 432 | err = -ENODEV; |
375 | goto out; | 433 | goto errout_dev; |
376 | } | 434 | } |
377 | |||
378 | if (ida[IFLA_MAP - 1]->rta_len != RTA_LENGTH(sizeof(*u_map))) | ||
379 | goto out; | ||
380 | |||
381 | u_map = RTA_DATA(ida[IFLA_MAP - 1]); | ||
382 | 435 | ||
436 | u_map = nla_data(tb[IFLA_MAP]); | ||
383 | k_map.mem_start = (unsigned long) u_map->mem_start; | 437 | k_map.mem_start = (unsigned long) u_map->mem_start; |
384 | k_map.mem_end = (unsigned long) u_map->mem_end; | 438 | k_map.mem_end = (unsigned long) u_map->mem_end; |
385 | k_map.base_addr = (unsigned short) u_map->base_addr; | 439 | k_map.base_addr = (unsigned short) u_map->base_addr; |
@@ -388,200 +442,175 @@ static int do_setlink(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) | |||
388 | k_map.port = (unsigned char) u_map->port; | 442 | k_map.port = (unsigned char) u_map->port; |
389 | 443 | ||
390 | err = dev->set_config(dev, &k_map); | 444 | err = dev->set_config(dev, &k_map); |
445 | if (err < 0) | ||
446 | goto errout_dev; | ||
391 | 447 | ||
392 | if (err) | 448 | modified = 1; |
393 | goto out; | ||
394 | } | 449 | } |
395 | 450 | ||
396 | if (ida[IFLA_ADDRESS - 1]) { | 451 | if (tb[IFLA_ADDRESS]) { |
397 | struct sockaddr *sa; | 452 | struct sockaddr *sa; |
398 | int len; | 453 | int len; |
399 | 454 | ||
400 | if (!dev->set_mac_address) { | 455 | if (!dev->set_mac_address) { |
401 | err = -EOPNOTSUPP; | 456 | err = -EOPNOTSUPP; |
402 | goto out; | 457 | goto errout_dev; |
403 | } | 458 | } |
459 | |||
404 | if (!netif_device_present(dev)) { | 460 | if (!netif_device_present(dev)) { |
405 | err = -ENODEV; | 461 | err = -ENODEV; |
406 | goto out; | 462 | goto errout_dev; |
407 | } | 463 | } |
408 | if (ida[IFLA_ADDRESS - 1]->rta_len != RTA_LENGTH(dev->addr_len)) | ||
409 | goto out; | ||
410 | 464 | ||
411 | len = sizeof(sa_family_t) + dev->addr_len; | 465 | len = sizeof(sa_family_t) + dev->addr_len; |
412 | sa = kmalloc(len, GFP_KERNEL); | 466 | sa = kmalloc(len, GFP_KERNEL); |
413 | if (!sa) { | 467 | if (!sa) { |
414 | err = -ENOMEM; | 468 | err = -ENOMEM; |
415 | goto out; | 469 | goto errout_dev; |
416 | } | 470 | } |
417 | sa->sa_family = dev->type; | 471 | sa->sa_family = dev->type; |
418 | memcpy(sa->sa_data, RTA_DATA(ida[IFLA_ADDRESS - 1]), | 472 | memcpy(sa->sa_data, nla_data(tb[IFLA_ADDRESS]), |
419 | dev->addr_len); | 473 | dev->addr_len); |
420 | err = dev->set_mac_address(dev, sa); | 474 | err = dev->set_mac_address(dev, sa); |
421 | kfree(sa); | 475 | kfree(sa); |
422 | if (err) | 476 | if (err) |
423 | goto out; | 477 | goto errout_dev; |
424 | send_addr_notify = 1; | 478 | send_addr_notify = 1; |
479 | modified = 1; | ||
425 | } | 480 | } |
426 | 481 | ||
427 | if (ida[IFLA_BROADCAST - 1]) { | 482 | if (tb[IFLA_MTU]) { |
428 | if (ida[IFLA_BROADCAST - 1]->rta_len != RTA_LENGTH(dev->addr_len)) | 483 | err = dev_set_mtu(dev, nla_get_u32(tb[IFLA_MTU])); |
429 | goto out; | 484 | if (err < 0) |
430 | memcpy(dev->broadcast, RTA_DATA(ida[IFLA_BROADCAST - 1]), | 485 | goto errout_dev; |
431 | dev->addr_len); | 486 | modified = 1; |
432 | send_addr_notify = 1; | ||
433 | } | 487 | } |
434 | 488 | ||
435 | if (ida[IFLA_MTU - 1]) { | 489 | /* |
436 | if (ida[IFLA_MTU - 1]->rta_len != RTA_LENGTH(sizeof(u32))) | 490 | * Interface selected by interface index but interface |
437 | goto out; | 491 | * name provided implies that a name change has been |
438 | err = dev_set_mtu(dev, *((u32 *) RTA_DATA(ida[IFLA_MTU - 1]))); | 492 | * requested. |
439 | 493 | */ | |
440 | if (err) | 494 | if (ifm->ifi_index >= 0 && ifname[0]) { |
441 | goto out; | 495 | err = dev_change_name(dev, ifname); |
442 | 496 | if (err < 0) | |
497 | goto errout_dev; | ||
498 | modified = 1; | ||
443 | } | 499 | } |
444 | 500 | ||
445 | if (ida[IFLA_TXQLEN - 1]) { | 501 | #ifdef CONFIG_NET_WIRELESS_RTNETLINK |
446 | if (ida[IFLA_TXQLEN - 1]->rta_len != RTA_LENGTH(sizeof(u32))) | 502 | if (tb[IFLA_WIRELESS]) { |
447 | goto out; | 503 | /* Call Wireless Extensions. |
504 | * Various stuff checked in there... */ | ||
505 | err = wireless_rtnetlink_set(dev, nla_data(tb[IFLA_WIRELESS]), | ||
506 | nla_len(tb[IFLA_WIRELESS])); | ||
507 | if (err < 0) | ||
508 | goto errout_dev; | ||
509 | } | ||
510 | #endif /* CONFIG_NET_WIRELESS_RTNETLINK */ | ||
448 | 511 | ||
449 | dev->tx_queue_len = *((u32 *) RTA_DATA(ida[IFLA_TXQLEN - 1])); | 512 | if (tb[IFLA_BROADCAST]) { |
513 | nla_memcpy(dev->broadcast, tb[IFLA_BROADCAST], dev->addr_len); | ||
514 | send_addr_notify = 1; | ||
450 | } | 515 | } |
451 | 516 | ||
452 | if (ida[IFLA_WEIGHT - 1]) { | ||
453 | if (ida[IFLA_WEIGHT - 1]->rta_len != RTA_LENGTH(sizeof(u32))) | ||
454 | goto out; | ||
455 | 517 | ||
456 | dev->weight = *((u32 *) RTA_DATA(ida[IFLA_WEIGHT - 1])); | 518 | if (ifm->ifi_flags) |
457 | } | 519 | dev_change_flags(dev, ifm->ifi_flags); |
458 | 520 | ||
459 | if (ida[IFLA_OPERSTATE - 1]) { | 521 | if (tb[IFLA_TXQLEN]) |
460 | if (ida[IFLA_OPERSTATE - 1]->rta_len != RTA_LENGTH(sizeof(u8))) | 522 | dev->tx_queue_len = nla_get_u32(tb[IFLA_TXQLEN]); |
461 | goto out; | ||
462 | 523 | ||
463 | set_operstate(dev, *((u8 *) RTA_DATA(ida[IFLA_OPERSTATE - 1]))); | 524 | if (tb[IFLA_WEIGHT]) |
464 | } | 525 | dev->weight = nla_get_u32(tb[IFLA_WEIGHT]); |
465 | 526 | ||
466 | if (ida[IFLA_LINKMODE - 1]) { | 527 | if (tb[IFLA_OPERSTATE]) |
467 | if (ida[IFLA_LINKMODE - 1]->rta_len != RTA_LENGTH(sizeof(u8))) | 528 | set_operstate(dev, nla_get_u8(tb[IFLA_OPERSTATE])); |
468 | goto out; | ||
469 | 529 | ||
530 | if (tb[IFLA_LINKMODE]) { | ||
470 | write_lock_bh(&dev_base_lock); | 531 | write_lock_bh(&dev_base_lock); |
471 | dev->link_mode = *((u8 *) RTA_DATA(ida[IFLA_LINKMODE - 1])); | 532 | dev->link_mode = nla_get_u8(tb[IFLA_LINKMODE]); |
472 | write_unlock_bh(&dev_base_lock); | 533 | write_unlock_bh(&dev_base_lock); |
473 | } | 534 | } |
474 | 535 | ||
475 | if (ifm->ifi_index >= 0 && ida[IFLA_IFNAME - 1]) { | ||
476 | char ifname[IFNAMSIZ]; | ||
477 | |||
478 | if (rtattr_strlcpy(ifname, ida[IFLA_IFNAME - 1], | ||
479 | IFNAMSIZ) >= IFNAMSIZ) | ||
480 | goto out; | ||
481 | err = dev_change_name(dev, ifname); | ||
482 | if (err) | ||
483 | goto out; | ||
484 | } | ||
485 | |||
486 | #ifdef CONFIG_NET_WIRELESS_RTNETLINK | ||
487 | if (ida[IFLA_WIRELESS - 1]) { | ||
488 | |||
489 | /* Call Wireless Extensions. | ||
490 | * Various stuff checked in there... */ | ||
491 | err = wireless_rtnetlink_set(dev, RTA_DATA(ida[IFLA_WIRELESS - 1]), ida[IFLA_WIRELESS - 1]->rta_len); | ||
492 | if (err) | ||
493 | goto out; | ||
494 | } | ||
495 | #endif /* CONFIG_NET_WIRELESS_RTNETLINK */ | ||
496 | |||
497 | err = 0; | 536 | err = 0; |
498 | 537 | ||
499 | out: | 538 | errout_dev: |
539 | if (err < 0 && modified && net_ratelimit()) | ||
540 | printk(KERN_WARNING "A link change request failed with " | ||
541 | "some changes comitted already. Interface %s may " | ||
542 | "have been left with an inconsistent configuration, " | ||
543 | "please check.\n", dev->name); | ||
544 | |||
500 | if (send_addr_notify) | 545 | if (send_addr_notify) |
501 | call_netdevice_notifiers(NETDEV_CHANGEADDR, dev); | 546 | call_netdevice_notifiers(NETDEV_CHANGEADDR, dev); |
502 | 547 | ||
503 | dev_put(dev); | 548 | dev_put(dev); |
549 | errout: | ||
504 | return err; | 550 | return err; |
505 | } | 551 | } |
506 | 552 | ||
507 | #ifdef CONFIG_NET_WIRELESS_RTNETLINK | 553 | static int rtnl_getlink(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg) |
508 | static int do_getlink(struct sk_buff *in_skb, struct nlmsghdr* in_nlh, void *arg) | ||
509 | { | 554 | { |
510 | struct ifinfomsg *ifm = NLMSG_DATA(in_nlh); | 555 | struct ifinfomsg *ifm; |
511 | struct rtattr **ida = arg; | 556 | struct nlattr *tb[IFLA_MAX+1]; |
512 | struct net_device *dev; | 557 | struct net_device *dev = NULL; |
513 | struct ifinfomsg *r; | 558 | struct sk_buff *nskb; |
514 | struct nlmsghdr *nlh; | 559 | char *iw_buf = NULL, *iw = NULL; |
515 | int err = -ENOBUFS; | ||
516 | struct sk_buff *skb; | ||
517 | unsigned char *b; | ||
518 | char *iw_buf = NULL; | ||
519 | int iw_buf_len = 0; | 560 | int iw_buf_len = 0; |
561 | int err, payload; | ||
520 | 562 | ||
521 | if (ifm->ifi_index >= 0) | 563 | err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFLA_MAX, ifla_policy); |
564 | if (err < 0) | ||
565 | goto errout; | ||
566 | |||
567 | ifm = nlmsg_data(nlh); | ||
568 | if (ifm->ifi_index >= 0) { | ||
522 | dev = dev_get_by_index(ifm->ifi_index); | 569 | dev = dev_get_by_index(ifm->ifi_index); |
523 | else | 570 | if (dev == NULL) |
571 | return -ENODEV; | ||
572 | } else | ||
524 | return -EINVAL; | 573 | return -EINVAL; |
525 | if (!dev) | ||
526 | return -ENODEV; | ||
527 | 574 | ||
528 | #ifdef CONFIG_NET_WIRELESS_RTNETLINK | ||
529 | if (ida[IFLA_WIRELESS - 1]) { | ||
530 | 575 | ||
576 | #ifdef CONFIG_NET_WIRELESS_RTNETLINK | ||
577 | if (tb[IFLA_WIRELESS]) { | ||
531 | /* Call Wireless Extensions. We need to know the size before | 578 | /* Call Wireless Extensions. We need to know the size before |
532 | * we can alloc. Various stuff checked in there... */ | 579 | * we can alloc. Various stuff checked in there... */ |
533 | err = wireless_rtnetlink_get(dev, RTA_DATA(ida[IFLA_WIRELESS - 1]), ida[IFLA_WIRELESS - 1]->rta_len, &iw_buf, &iw_buf_len); | 580 | err = wireless_rtnetlink_get(dev, nla_data(tb[IFLA_WIRELESS]), |
534 | if (err) | 581 | nla_len(tb[IFLA_WIRELESS]), |
535 | goto out; | 582 | &iw_buf, &iw_buf_len); |
583 | if (err < 0) | ||
584 | goto errout; | ||
585 | |||
586 | iw += IW_EV_POINT_OFF; | ||
536 | } | 587 | } |
537 | #endif /* CONFIG_NET_WIRELESS_RTNETLINK */ | 588 | #endif /* CONFIG_NET_WIRELESS_RTNETLINK */ |
538 | 589 | ||
539 | /* Create a skb big enough to include all the data. | 590 | payload = NLMSG_ALIGN(sizeof(struct ifinfomsg) + |
540 | * Some requests are way bigger than 4k... Jean II */ | 591 | nla_total_size(iw_buf_len)); |
541 | skb = alloc_skb((NLMSG_LENGTH(sizeof(*r))) + (RTA_SPACE(iw_buf_len)), | 592 | nskb = nlmsg_new(nlmsg_total_size(payload), GFP_KERNEL); |
542 | GFP_KERNEL); | 593 | if (nskb == NULL) { |
543 | if (!skb) | 594 | err = -ENOBUFS; |
544 | goto out; | 595 | goto errout; |
545 | b = skb->tail; | 596 | } |
546 | 597 | ||
547 | /* Put in the message the usual good stuff */ | 598 | err = rtnl_fill_ifinfo(nskb, dev, iw, iw_buf_len, RTM_NEWLINK, |
548 | nlh = NLMSG_PUT(skb, NETLINK_CB(in_skb).pid, in_nlh->nlmsg_seq, | 599 | NETLINK_CB(skb).pid, nlh->nlmsg_seq, 0, 0); |
549 | RTM_NEWLINK, sizeof(*r)); | 600 | if (err <= 0) { |
550 | r = NLMSG_DATA(nlh); | 601 | kfree_skb(nskb); |
551 | r->ifi_family = AF_UNSPEC; | 602 | goto errout; |
552 | r->__ifi_pad = 0; | 603 | } |
553 | r->ifi_type = dev->type; | 604 | |
554 | r->ifi_index = dev->ifindex; | 605 | err = rtnl_unicast(skb, NETLINK_CB(skb).pid); |
555 | r->ifi_flags = dev->flags; | 606 | errout: |
556 | r->ifi_change = 0; | 607 | kfree(iw_buf); |
557 | |||
558 | /* Put the wireless payload if it exist */ | ||
559 | if(iw_buf != NULL) | ||
560 | RTA_PUT(skb, IFLA_WIRELESS, iw_buf_len, | ||
561 | iw_buf + IW_EV_POINT_OFF); | ||
562 | |||
563 | nlh->nlmsg_len = skb->tail - b; | ||
564 | |||
565 | /* Needed ? */ | ||
566 | NETLINK_CB(skb).dst_pid = NETLINK_CB(in_skb).pid; | ||
567 | |||
568 | err = netlink_unicast(rtnl, skb, NETLINK_CB(in_skb).pid, MSG_DONTWAIT); | ||
569 | if (err > 0) | ||
570 | err = 0; | ||
571 | out: | ||
572 | if(iw_buf != NULL) | ||
573 | kfree(iw_buf); | ||
574 | dev_put(dev); | 608 | dev_put(dev); |
575 | return err; | ||
576 | 609 | ||
577 | rtattr_failure: | 610 | return err; |
578 | nlmsg_failure: | ||
579 | kfree_skb(skb); | ||
580 | goto out; | ||
581 | } | 611 | } |
582 | #endif /* CONFIG_NET_WIRELESS_RTNETLINK */ | ||
583 | 612 | ||
584 | static int rtnetlink_dump_all(struct sk_buff *skb, struct netlink_callback *cb) | 613 | static int rtnl_dump_all(struct sk_buff *skb, struct netlink_callback *cb) |
585 | { | 614 | { |
586 | int idx; | 615 | int idx; |
587 | int s_idx = cb->family; | 616 | int s_idx = cb->family; |
@@ -608,20 +637,22 @@ static int rtnetlink_dump_all(struct sk_buff *skb, struct netlink_callback *cb) | |||
608 | void rtmsg_ifinfo(int type, struct net_device *dev, unsigned change) | 637 | void rtmsg_ifinfo(int type, struct net_device *dev, unsigned change) |
609 | { | 638 | { |
610 | struct sk_buff *skb; | 639 | struct sk_buff *skb; |
611 | int size = NLMSG_SPACE(sizeof(struct ifinfomsg) + | 640 | int err = -ENOBUFS; |
612 | sizeof(struct rtnl_link_ifmap) + | ||
613 | sizeof(struct rtnl_link_stats) + 128); | ||
614 | 641 | ||
615 | skb = alloc_skb(size, GFP_KERNEL); | 642 | skb = nlmsg_new(NLMSG_GOODSIZE, GFP_KERNEL); |
616 | if (!skb) | 643 | if (skb == NULL) |
617 | return; | 644 | goto errout; |
618 | 645 | ||
619 | if (rtnetlink_fill_ifinfo(skb, dev, type, 0, 0, change, 0) < 0) { | 646 | err = rtnl_fill_ifinfo(skb, dev, NULL, 0, type, 0, 0, change, 0); |
647 | if (err < 0) { | ||
620 | kfree_skb(skb); | 648 | kfree_skb(skb); |
621 | return; | 649 | goto errout; |
622 | } | 650 | } |
623 | NETLINK_CB(skb).dst_group = RTNLGRP_LINK; | 651 | |
624 | netlink_broadcast(rtnl, skb, 0, RTNLGRP_LINK, GFP_KERNEL); | 652 | err = rtnl_notify(skb, 0, RTNLGRP_LINK, NULL, GFP_KERNEL); |
653 | errout: | ||
654 | if (err < 0) | ||
655 | rtnl_set_sk_err(RTNLGRP_LINK, err); | ||
625 | } | 656 | } |
626 | 657 | ||
627 | /* Protected by RTNL sempahore. */ | 658 | /* Protected by RTNL sempahore. */ |
@@ -746,18 +777,19 @@ static void rtnetlink_rcv(struct sock *sk, int len) | |||
746 | 777 | ||
747 | static struct rtnetlink_link link_rtnetlink_table[RTM_NR_MSGTYPES] = | 778 | static struct rtnetlink_link link_rtnetlink_table[RTM_NR_MSGTYPES] = |
748 | { | 779 | { |
749 | [RTM_GETLINK - RTM_BASE] = { | 780 | [RTM_GETLINK - RTM_BASE] = { .doit = rtnl_getlink, |
750 | #ifdef CONFIG_NET_WIRELESS_RTNETLINK | 781 | .dumpit = rtnl_dump_ifinfo }, |
751 | .doit = do_getlink, | 782 | [RTM_SETLINK - RTM_BASE] = { .doit = rtnl_setlink }, |
752 | #endif /* CONFIG_NET_WIRELESS_RTNETLINK */ | 783 | [RTM_GETADDR - RTM_BASE] = { .dumpit = rtnl_dump_all }, |
753 | .dumpit = rtnetlink_dump_ifinfo }, | 784 | [RTM_GETROUTE - RTM_BASE] = { .dumpit = rtnl_dump_all }, |
754 | [RTM_SETLINK - RTM_BASE] = { .doit = do_setlink }, | ||
755 | [RTM_GETADDR - RTM_BASE] = { .dumpit = rtnetlink_dump_all }, | ||
756 | [RTM_GETROUTE - RTM_BASE] = { .dumpit = rtnetlink_dump_all }, | ||
757 | [RTM_NEWNEIGH - RTM_BASE] = { .doit = neigh_add }, | 785 | [RTM_NEWNEIGH - RTM_BASE] = { .doit = neigh_add }, |
758 | [RTM_DELNEIGH - RTM_BASE] = { .doit = neigh_delete }, | 786 | [RTM_DELNEIGH - RTM_BASE] = { .doit = neigh_delete }, |
759 | [RTM_GETNEIGH - RTM_BASE] = { .dumpit = neigh_dump_info }, | 787 | [RTM_GETNEIGH - RTM_BASE] = { .dumpit = neigh_dump_info }, |
760 | [RTM_GETRULE - RTM_BASE] = { .dumpit = rtnetlink_dump_all }, | 788 | #ifdef CONFIG_FIB_RULES |
789 | [RTM_NEWRULE - RTM_BASE] = { .doit = fib_nl_newrule }, | ||
790 | [RTM_DELRULE - RTM_BASE] = { .doit = fib_nl_delrule }, | ||
791 | #endif | ||
792 | [RTM_GETRULE - RTM_BASE] = { .dumpit = rtnl_dump_all }, | ||
761 | [RTM_GETNEIGHTBL - RTM_BASE] = { .dumpit = neightbl_dump_info }, | 793 | [RTM_GETNEIGHTBL - RTM_BASE] = { .dumpit = neightbl_dump_info }, |
762 | [RTM_SETNEIGHTBL - RTM_BASE] = { .doit = neightbl_set }, | 794 | [RTM_SETNEIGHTBL - RTM_BASE] = { .doit = neightbl_set }, |
763 | }; | 795 | }; |
@@ -817,7 +849,9 @@ EXPORT_SYMBOL(rtattr_strlcpy); | |||
817 | EXPORT_SYMBOL(rtattr_parse); | 849 | EXPORT_SYMBOL(rtattr_parse); |
818 | EXPORT_SYMBOL(rtnetlink_links); | 850 | EXPORT_SYMBOL(rtnetlink_links); |
819 | EXPORT_SYMBOL(rtnetlink_put_metrics); | 851 | EXPORT_SYMBOL(rtnetlink_put_metrics); |
820 | EXPORT_SYMBOL(rtnl); | ||
821 | EXPORT_SYMBOL(rtnl_lock); | 852 | EXPORT_SYMBOL(rtnl_lock); |
822 | EXPORT_SYMBOL(rtnl_trylock); | 853 | EXPORT_SYMBOL(rtnl_trylock); |
823 | EXPORT_SYMBOL(rtnl_unlock); | 854 | EXPORT_SYMBOL(rtnl_unlock); |
855 | EXPORT_SYMBOL(rtnl_unicast); | ||
856 | EXPORT_SYMBOL(rtnl_notify); | ||
857 | EXPORT_SYMBOL(rtnl_set_sk_err); | ||
diff --git a/net/core/skbuff.c b/net/core/skbuff.c index c54f3664bce5..c448c7f6fde2 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c | |||
@@ -1397,7 +1397,7 @@ void skb_copy_and_csum_dev(const struct sk_buff *skb, u8 *to) | |||
1397 | unsigned int csum; | 1397 | unsigned int csum; |
1398 | long csstart; | 1398 | long csstart; |
1399 | 1399 | ||
1400 | if (skb->ip_summed == CHECKSUM_HW) | 1400 | if (skb->ip_summed == CHECKSUM_PARTIAL) |
1401 | csstart = skb->h.raw - skb->data; | 1401 | csstart = skb->h.raw - skb->data; |
1402 | else | 1402 | else |
1403 | csstart = skb_headlen(skb); | 1403 | csstart = skb_headlen(skb); |
@@ -1411,7 +1411,7 @@ void skb_copy_and_csum_dev(const struct sk_buff *skb, u8 *to) | |||
1411 | csum = skb_copy_and_csum_bits(skb, csstart, to + csstart, | 1411 | csum = skb_copy_and_csum_bits(skb, csstart, to + csstart, |
1412 | skb->len - csstart, 0); | 1412 | skb->len - csstart, 0); |
1413 | 1413 | ||
1414 | if (skb->ip_summed == CHECKSUM_HW) { | 1414 | if (skb->ip_summed == CHECKSUM_PARTIAL) { |
1415 | long csstuff = csstart + skb->csum; | 1415 | long csstuff = csstart + skb->csum; |
1416 | 1416 | ||
1417 | *((unsigned short *)(to + csstuff)) = csum_fold(csum); | 1417 | *((unsigned short *)(to + csstuff)) = csum_fold(csum); |
@@ -1898,10 +1898,10 @@ int skb_append_datato_frags(struct sock *sk, struct sk_buff *skb, | |||
1898 | * @len: length of data pulled | 1898 | * @len: length of data pulled |
1899 | * | 1899 | * |
1900 | * This function performs an skb_pull on the packet and updates | 1900 | * This function performs an skb_pull on the packet and updates |
1901 | * update the CHECKSUM_HW checksum. It should be used on receive | 1901 | * update the CHECKSUM_COMPLETE checksum. It should be used on |
1902 | * path processing instead of skb_pull unless you know that the | 1902 | * receive path processing instead of skb_pull unless you know |
1903 | * checksum difference is zero (e.g., a valid IP header) or you | 1903 | * that the checksum difference is zero (e.g., a valid IP header) |
1904 | * are setting ip_summed to CHECKSUM_NONE. | 1904 | * or you are setting ip_summed to CHECKSUM_NONE. |
1905 | */ | 1905 | */ |
1906 | unsigned char *skb_pull_rcsum(struct sk_buff *skb, unsigned int len) | 1906 | unsigned char *skb_pull_rcsum(struct sk_buff *skb, unsigned int len) |
1907 | { | 1907 | { |
@@ -1994,7 +1994,7 @@ struct sk_buff *skb_segment(struct sk_buff *skb, int features) | |||
1994 | frag = skb_shinfo(nskb)->frags; | 1994 | frag = skb_shinfo(nskb)->frags; |
1995 | k = 0; | 1995 | k = 0; |
1996 | 1996 | ||
1997 | nskb->ip_summed = CHECKSUM_HW; | 1997 | nskb->ip_summed = CHECKSUM_PARTIAL; |
1998 | nskb->csum = skb->csum; | 1998 | nskb->csum = skb->csum; |
1999 | memcpy(skb_put(nskb, hsize), skb->data + offset, hsize); | 1999 | memcpy(skb_put(nskb, hsize), skb->data + offset, hsize); |
2000 | 2000 | ||
@@ -2046,19 +2046,14 @@ void __init skb_init(void) | |||
2046 | skbuff_head_cache = kmem_cache_create("skbuff_head_cache", | 2046 | skbuff_head_cache = kmem_cache_create("skbuff_head_cache", |
2047 | sizeof(struct sk_buff), | 2047 | sizeof(struct sk_buff), |
2048 | 0, | 2048 | 0, |
2049 | SLAB_HWCACHE_ALIGN, | 2049 | SLAB_HWCACHE_ALIGN|SLAB_PANIC, |
2050 | NULL, NULL); | 2050 | NULL, NULL); |
2051 | if (!skbuff_head_cache) | ||
2052 | panic("cannot create skbuff cache"); | ||
2053 | |||
2054 | skbuff_fclone_cache = kmem_cache_create("skbuff_fclone_cache", | 2051 | skbuff_fclone_cache = kmem_cache_create("skbuff_fclone_cache", |
2055 | (2*sizeof(struct sk_buff)) + | 2052 | (2*sizeof(struct sk_buff)) + |
2056 | sizeof(atomic_t), | 2053 | sizeof(atomic_t), |
2057 | 0, | 2054 | 0, |
2058 | SLAB_HWCACHE_ALIGN, | 2055 | SLAB_HWCACHE_ALIGN|SLAB_PANIC, |
2059 | NULL, NULL); | 2056 | NULL, NULL); |
2060 | if (!skbuff_fclone_cache) | ||
2061 | panic("cannot create skbuff cache"); | ||
2062 | } | 2057 | } |
2063 | 2058 | ||
2064 | EXPORT_SYMBOL(___pskb_trim); | 2059 | EXPORT_SYMBOL(___pskb_trim); |
diff --git a/net/core/sock.c b/net/core/sock.c index 51fcfbc041a7..b77e155cbe6c 100644 --- a/net/core/sock.c +++ b/net/core/sock.c | |||
@@ -187,13 +187,13 @@ static struct lock_class_key af_callback_keys[AF_MAX]; | |||
187 | #define SK_RMEM_MAX (_SK_MEM_OVERHEAD * _SK_MEM_PACKETS) | 187 | #define SK_RMEM_MAX (_SK_MEM_OVERHEAD * _SK_MEM_PACKETS) |
188 | 188 | ||
189 | /* Run time adjustable parameters. */ | 189 | /* Run time adjustable parameters. */ |
190 | __u32 sysctl_wmem_max = SK_WMEM_MAX; | 190 | __u32 sysctl_wmem_max __read_mostly = SK_WMEM_MAX; |
191 | __u32 sysctl_rmem_max = SK_RMEM_MAX; | 191 | __u32 sysctl_rmem_max __read_mostly = SK_RMEM_MAX; |
192 | __u32 sysctl_wmem_default = SK_WMEM_MAX; | 192 | __u32 sysctl_wmem_default __read_mostly = SK_WMEM_MAX; |
193 | __u32 sysctl_rmem_default = SK_RMEM_MAX; | 193 | __u32 sysctl_rmem_default __read_mostly = SK_RMEM_MAX; |
194 | 194 | ||
195 | /* Maximal space eaten by iovec or ancilliary data plus some space */ | 195 | /* Maximal space eaten by iovec or ancilliary data plus some space */ |
196 | int sysctl_optmem_max = sizeof(unsigned long)*(2*UIO_MAXIOV + 512); | 196 | int sysctl_optmem_max __read_mostly = sizeof(unsigned long)*(2*UIO_MAXIOV+512); |
197 | 197 | ||
198 | static int sock_set_timeout(long *timeo_p, char __user *optval, int optlen) | 198 | static int sock_set_timeout(long *timeo_p, char __user *optval, int optlen) |
199 | { | 199 | { |
@@ -247,11 +247,7 @@ int sock_queue_rcv_skb(struct sock *sk, struct sk_buff *skb) | |||
247 | goto out; | 247 | goto out; |
248 | } | 248 | } |
249 | 249 | ||
250 | /* It would be deadlock, if sock_queue_rcv_skb is used | 250 | err = sk_filter(sk, skb); |
251 | with socket lock! We assume that users of this | ||
252 | function are lock free. | ||
253 | */ | ||
254 | err = sk_filter(sk, skb, 1); | ||
255 | if (err) | 251 | if (err) |
256 | goto out; | 252 | goto out; |
257 | 253 | ||
@@ -278,7 +274,7 @@ int sk_receive_skb(struct sock *sk, struct sk_buff *skb) | |||
278 | { | 274 | { |
279 | int rc = NET_RX_SUCCESS; | 275 | int rc = NET_RX_SUCCESS; |
280 | 276 | ||
281 | if (sk_filter(sk, skb, 0)) | 277 | if (sk_filter(sk, skb)) |
282 | goto discard_and_relse; | 278 | goto discard_and_relse; |
283 | 279 | ||
284 | skb->dev = NULL; | 280 | skb->dev = NULL; |
@@ -606,15 +602,15 @@ set_rcvbuf: | |||
606 | break; | 602 | break; |
607 | 603 | ||
608 | case SO_DETACH_FILTER: | 604 | case SO_DETACH_FILTER: |
609 | spin_lock_bh(&sk->sk_lock.slock); | 605 | rcu_read_lock_bh(); |
610 | filter = sk->sk_filter; | 606 | filter = rcu_dereference(sk->sk_filter); |
611 | if (filter) { | 607 | if (filter) { |
612 | sk->sk_filter = NULL; | 608 | rcu_assign_pointer(sk->sk_filter, NULL); |
613 | spin_unlock_bh(&sk->sk_lock.slock); | ||
614 | sk_filter_release(sk, filter); | 609 | sk_filter_release(sk, filter); |
610 | rcu_read_unlock_bh(); | ||
615 | break; | 611 | break; |
616 | } | 612 | } |
617 | spin_unlock_bh(&sk->sk_lock.slock); | 613 | rcu_read_unlock_bh(); |
618 | ret = -ENONET; | 614 | ret = -ENONET; |
619 | break; | 615 | break; |
620 | 616 | ||
@@ -884,10 +880,10 @@ void sk_free(struct sock *sk) | |||
884 | if (sk->sk_destruct) | 880 | if (sk->sk_destruct) |
885 | sk->sk_destruct(sk); | 881 | sk->sk_destruct(sk); |
886 | 882 | ||
887 | filter = sk->sk_filter; | 883 | filter = rcu_dereference(sk->sk_filter); |
888 | if (filter) { | 884 | if (filter) { |
889 | sk_filter_release(sk, filter); | 885 | sk_filter_release(sk, filter); |
890 | sk->sk_filter = NULL; | 886 | rcu_assign_pointer(sk->sk_filter, NULL); |
891 | } | 887 | } |
892 | 888 | ||
893 | sock_disable_timestamp(sk); | 889 | sock_disable_timestamp(sk); |
@@ -911,7 +907,7 @@ struct sock *sk_clone(const struct sock *sk, const gfp_t priority) | |||
911 | if (newsk != NULL) { | 907 | if (newsk != NULL) { |
912 | struct sk_filter *filter; | 908 | struct sk_filter *filter; |
913 | 909 | ||
914 | memcpy(newsk, sk, sk->sk_prot->obj_size); | 910 | sock_copy(newsk, sk); |
915 | 911 | ||
916 | /* SANITY */ | 912 | /* SANITY */ |
917 | sk_node_init(&newsk->sk_node); | 913 | sk_node_init(&newsk->sk_node); |
diff --git a/net/core/utils.c b/net/core/utils.c index e31c90e05594..2682490777de 100644 --- a/net/core/utils.c +++ b/net/core/utils.c | |||
@@ -4,6 +4,7 @@ | |||
4 | * Authors: | 4 | * Authors: |
5 | * net_random Alan Cox | 5 | * net_random Alan Cox |
6 | * net_ratelimit Andy Kleen | 6 | * net_ratelimit Andy Kleen |
7 | * in{4,6}_pton YOSHIFUJI Hideaki, Copyright (C)2006 USAGI/WIDE Project | ||
7 | * | 8 | * |
8 | * Created by Alexey Kuznetsov <kuznet@ms2.inr.ac.ru> | 9 | * Created by Alexey Kuznetsov <kuznet@ms2.inr.ac.ru> |
9 | * | 10 | * |
@@ -191,3 +192,215 @@ __be32 in_aton(const char *str) | |||
191 | } | 192 | } |
192 | 193 | ||
193 | EXPORT_SYMBOL(in_aton); | 194 | EXPORT_SYMBOL(in_aton); |
195 | |||
196 | #define IN6PTON_XDIGIT 0x00010000 | ||
197 | #define IN6PTON_DIGIT 0x00020000 | ||
198 | #define IN6PTON_COLON_MASK 0x00700000 | ||
199 | #define IN6PTON_COLON_1 0x00100000 /* single : requested */ | ||
200 | #define IN6PTON_COLON_2 0x00200000 /* second : requested */ | ||
201 | #define IN6PTON_COLON_1_2 0x00400000 /* :: requested */ | ||
202 | #define IN6PTON_DOT 0x00800000 /* . */ | ||
203 | #define IN6PTON_DELIM 0x10000000 | ||
204 | #define IN6PTON_NULL 0x20000000 /* first/tail */ | ||
205 | #define IN6PTON_UNKNOWN 0x40000000 | ||
206 | |||
207 | static inline int digit2bin(char c, char delim) | ||
208 | { | ||
209 | if (c == delim || c == '\0') | ||
210 | return IN6PTON_DELIM; | ||
211 | if (c == '.') | ||
212 | return IN6PTON_DOT; | ||
213 | if (c >= '0' && c <= '9') | ||
214 | return (IN6PTON_DIGIT | (c - '0')); | ||
215 | return IN6PTON_UNKNOWN; | ||
216 | } | ||
217 | |||
218 | static inline int xdigit2bin(char c, char delim) | ||
219 | { | ||
220 | if (c == delim || c == '\0') | ||
221 | return IN6PTON_DELIM; | ||
222 | if (c == ':') | ||
223 | return IN6PTON_COLON_MASK; | ||
224 | if (c == '.') | ||
225 | return IN6PTON_DOT; | ||
226 | if (c >= '0' && c <= '9') | ||
227 | return (IN6PTON_XDIGIT | IN6PTON_DIGIT| (c - '0')); | ||
228 | if (c >= 'a' && c <= 'f') | ||
229 | return (IN6PTON_XDIGIT | (c - 'a' + 10)); | ||
230 | if (c >= 'A' && c <= 'F') | ||
231 | return (IN6PTON_XDIGIT | (c - 'A' + 10)); | ||
232 | return IN6PTON_UNKNOWN; | ||
233 | } | ||
234 | |||
235 | int in4_pton(const char *src, int srclen, | ||
236 | u8 *dst, | ||
237 | char delim, const char **end) | ||
238 | { | ||
239 | const char *s; | ||
240 | u8 *d; | ||
241 | u8 dbuf[4]; | ||
242 | int ret = 0; | ||
243 | int i; | ||
244 | int w = 0; | ||
245 | |||
246 | if (srclen < 0) | ||
247 | srclen = strlen(src); | ||
248 | s = src; | ||
249 | d = dbuf; | ||
250 | i = 0; | ||
251 | while(1) { | ||
252 | int c; | ||
253 | c = xdigit2bin(srclen > 0 ? *s : '\0', delim); | ||
254 | if (!(c & (IN6PTON_DIGIT | IN6PTON_DOT | IN6PTON_DELIM))) { | ||
255 | goto out; | ||
256 | } | ||
257 | if (c & (IN6PTON_DOT | IN6PTON_DELIM)) { | ||
258 | if (w == 0) | ||
259 | goto out; | ||
260 | *d++ = w & 0xff; | ||
261 | w = 0; | ||
262 | i++; | ||
263 | if (c & IN6PTON_DELIM) { | ||
264 | if (i != 4) | ||
265 | goto out; | ||
266 | break; | ||
267 | } | ||
268 | goto cont; | ||
269 | } | ||
270 | w = (w * 10) + c; | ||
271 | if ((w & 0xffff) > 255) { | ||
272 | goto out; | ||
273 | } | ||
274 | cont: | ||
275 | if (i >= 4) | ||
276 | goto out; | ||
277 | s++; | ||
278 | srclen--; | ||
279 | } | ||
280 | ret = 1; | ||
281 | memcpy(dst, dbuf, sizeof(dbuf)); | ||
282 | out: | ||
283 | if (end) | ||
284 | *end = s; | ||
285 | return ret; | ||
286 | } | ||
287 | |||
288 | EXPORT_SYMBOL(in4_pton); | ||
289 | |||
290 | int in6_pton(const char *src, int srclen, | ||
291 | u8 *dst, | ||
292 | char delim, const char **end) | ||
293 | { | ||
294 | const char *s, *tok = NULL; | ||
295 | u8 *d, *dc = NULL; | ||
296 | u8 dbuf[16]; | ||
297 | int ret = 0; | ||
298 | int i; | ||
299 | int state = IN6PTON_COLON_1_2 | IN6PTON_XDIGIT | IN6PTON_NULL; | ||
300 | int w = 0; | ||
301 | |||
302 | memset(dbuf, 0, sizeof(dbuf)); | ||
303 | |||
304 | s = src; | ||
305 | d = dbuf; | ||
306 | if (srclen < 0) | ||
307 | srclen = strlen(src); | ||
308 | |||
309 | while (1) { | ||
310 | int c; | ||
311 | |||
312 | c = xdigit2bin(srclen > 0 ? *s : '\0', delim); | ||
313 | if (!(c & state)) | ||
314 | goto out; | ||
315 | if (c & (IN6PTON_DELIM | IN6PTON_COLON_MASK)) { | ||
316 | /* process one 16-bit word */ | ||
317 | if (!(state & IN6PTON_NULL)) { | ||
318 | *d++ = (w >> 8) & 0xff; | ||
319 | *d++ = w & 0xff; | ||
320 | } | ||
321 | w = 0; | ||
322 | if (c & IN6PTON_DELIM) { | ||
323 | /* We've processed last word */ | ||
324 | break; | ||
325 | } | ||
326 | /* | ||
327 | * COLON_1 => XDIGIT | ||
328 | * COLON_2 => XDIGIT|DELIM | ||
329 | * COLON_1_2 => COLON_2 | ||
330 | */ | ||
331 | switch (state & IN6PTON_COLON_MASK) { | ||
332 | case IN6PTON_COLON_2: | ||
333 | dc = d; | ||
334 | state = IN6PTON_XDIGIT | IN6PTON_DELIM; | ||
335 | if (dc - dbuf >= sizeof(dbuf)) | ||
336 | state |= IN6PTON_NULL; | ||
337 | break; | ||
338 | case IN6PTON_COLON_1|IN6PTON_COLON_1_2: | ||
339 | state = IN6PTON_XDIGIT | IN6PTON_COLON_2; | ||
340 | break; | ||
341 | case IN6PTON_COLON_1: | ||
342 | state = IN6PTON_XDIGIT; | ||
343 | break; | ||
344 | case IN6PTON_COLON_1_2: | ||
345 | state = IN6PTON_COLON_2; | ||
346 | break; | ||
347 | default: | ||
348 | state = 0; | ||
349 | } | ||
350 | tok = s + 1; | ||
351 | goto cont; | ||
352 | } | ||
353 | |||
354 | if (c & IN6PTON_DOT) { | ||
355 | ret = in4_pton(tok ? tok : s, srclen + (int)(s - tok), d, delim, &s); | ||
356 | if (ret > 0) { | ||
357 | d += 4; | ||
358 | break; | ||
359 | } | ||
360 | goto out; | ||
361 | } | ||
362 | |||
363 | w = (w << 4) | (0xff & c); | ||
364 | state = IN6PTON_COLON_1 | IN6PTON_DELIM; | ||
365 | if (!(w & 0xf000)) { | ||
366 | state |= IN6PTON_XDIGIT; | ||
367 | } | ||
368 | if (!dc && d + 2 < dbuf + sizeof(dbuf)) { | ||
369 | state |= IN6PTON_COLON_1_2; | ||
370 | state &= ~IN6PTON_DELIM; | ||
371 | } | ||
372 | if (d + 2 >= dbuf + sizeof(dbuf)) { | ||
373 | state &= ~(IN6PTON_COLON_1|IN6PTON_COLON_1_2); | ||
374 | } | ||
375 | cont: | ||
376 | if ((dc && d + 4 < dbuf + sizeof(dbuf)) || | ||
377 | d + 4 == dbuf + sizeof(dbuf)) { | ||
378 | state |= IN6PTON_DOT; | ||
379 | } | ||
380 | if (d >= dbuf + sizeof(dbuf)) { | ||
381 | state &= ~(IN6PTON_XDIGIT|IN6PTON_COLON_MASK); | ||
382 | } | ||
383 | s++; | ||
384 | srclen--; | ||
385 | } | ||
386 | |||
387 | i = 15; d--; | ||
388 | |||
389 | if (dc) { | ||
390 | while(d >= dc) | ||
391 | dst[i--] = *d--; | ||
392 | while(i >= dc - dbuf) | ||
393 | dst[i--] = 0; | ||
394 | while(i >= 0) | ||
395 | dst[i--] = *d--; | ||
396 | } else | ||
397 | memcpy(dst, dbuf, sizeof(dbuf)); | ||
398 | |||
399 | ret = 1; | ||
400 | out: | ||
401 | if (end) | ||
402 | *end = s; | ||
403 | return ret; | ||
404 | } | ||
405 | |||
406 | EXPORT_SYMBOL(in6_pton); | ||
diff --git a/net/core/wireless.c b/net/core/wireless.c index de0bde4b51dd..3168fca312f7 100644 --- a/net/core/wireless.c +++ b/net/core/wireless.c | |||
@@ -72,7 +72,6 @@ | |||
72 | 72 | ||
73 | /***************************** INCLUDES *****************************/ | 73 | /***************************** INCLUDES *****************************/ |
74 | 74 | ||
75 | #include <linux/config.h> /* Not needed ??? */ | ||
76 | #include <linux/module.h> | 75 | #include <linux/module.h> |
77 | #include <linux/types.h> /* off_t */ | 76 | #include <linux/types.h> /* off_t */ |
78 | #include <linux/netdevice.h> /* struct ifreq, dev_get_by_name() */ | 77 | #include <linux/netdevice.h> /* struct ifreq, dev_get_by_name() */ |
@@ -86,6 +85,7 @@ | |||
86 | 85 | ||
87 | #include <linux/wireless.h> /* Pretty obvious */ | 86 | #include <linux/wireless.h> /* Pretty obvious */ |
88 | #include <net/iw_handler.h> /* New driver API */ | 87 | #include <net/iw_handler.h> /* New driver API */ |
88 | #include <net/netlink.h> | ||
89 | 89 | ||
90 | #include <asm/uaccess.h> /* copy_to_user() */ | 90 | #include <asm/uaccess.h> /* copy_to_user() */ |
91 | 91 | ||
@@ -1850,7 +1850,7 @@ static void wireless_nlevent_process(unsigned long data) | |||
1850 | struct sk_buff *skb; | 1850 | struct sk_buff *skb; |
1851 | 1851 | ||
1852 | while ((skb = skb_dequeue(&wireless_nlevent_queue))) | 1852 | while ((skb = skb_dequeue(&wireless_nlevent_queue))) |
1853 | netlink_broadcast(rtnl, skb, 0, RTNLGRP_LINK, GFP_ATOMIC); | 1853 | rtnl_notify(skb, 0, RTNLGRP_LINK, NULL, GFP_ATOMIC); |
1854 | } | 1854 | } |
1855 | 1855 | ||
1856 | static DECLARE_TASKLET(wireless_nlevent_tasklet, wireless_nlevent_process, 0); | 1856 | static DECLARE_TASKLET(wireless_nlevent_tasklet, wireless_nlevent_process, 0); |
diff --git a/net/dccp/ackvec.c b/net/dccp/ackvec.c index 8c211c58893b..4d176d33983f 100644 --- a/net/dccp/ackvec.c +++ b/net/dccp/ackvec.c | |||
@@ -142,14 +142,13 @@ struct dccp_ackvec *dccp_ackvec_alloc(const gfp_t priority) | |||
142 | struct dccp_ackvec *av = kmem_cache_alloc(dccp_ackvec_slab, priority); | 142 | struct dccp_ackvec *av = kmem_cache_alloc(dccp_ackvec_slab, priority); |
143 | 143 | ||
144 | if (av != NULL) { | 144 | if (av != NULL) { |
145 | av->dccpav_buf_head = | 145 | av->dccpav_buf_head = DCCP_MAX_ACKVEC_LEN - 1; |
146 | av->dccpav_buf_tail = DCCP_MAX_ACKVEC_LEN - 1; | ||
147 | av->dccpav_buf_ackno = DCCP_MAX_SEQNO + 1; | 146 | av->dccpav_buf_ackno = DCCP_MAX_SEQNO + 1; |
148 | av->dccpav_buf_nonce = av->dccpav_buf_nonce = 0; | 147 | av->dccpav_buf_nonce = av->dccpav_buf_nonce = 0; |
149 | av->dccpav_ack_ptr = 0; | 148 | av->dccpav_ack_ptr = 0; |
150 | av->dccpav_time.tv_sec = 0; | 149 | av->dccpav_time.tv_sec = 0; |
151 | av->dccpav_time.tv_usec = 0; | 150 | av->dccpav_time.tv_usec = 0; |
152 | av->dccpav_sent_len = av->dccpav_vec_len = 0; | 151 | av->dccpav_vec_len = 0; |
153 | INIT_LIST_HEAD(&av->dccpav_records); | 152 | INIT_LIST_HEAD(&av->dccpav_records); |
154 | } | 153 | } |
155 | 154 | ||
@@ -353,11 +352,13 @@ static void dccp_ackvec_throw_record(struct dccp_ackvec *av, | |||
353 | { | 352 | { |
354 | struct dccp_ackvec_record *next; | 353 | struct dccp_ackvec_record *next; |
355 | 354 | ||
356 | av->dccpav_buf_tail = avr->dccpavr_ack_ptr - 1; | 355 | /* sort out vector length */ |
357 | if (av->dccpav_buf_tail == 0) | 356 | if (av->dccpav_buf_head <= avr->dccpavr_ack_ptr) |
358 | av->dccpav_buf_tail = DCCP_MAX_ACKVEC_LEN - 1; | 357 | av->dccpav_vec_len = avr->dccpavr_ack_ptr - av->dccpav_buf_head; |
359 | 358 | else | |
360 | av->dccpav_vec_len -= avr->dccpavr_sent_len; | 359 | av->dccpav_vec_len = DCCP_MAX_ACKVEC_LEN - 1 |
360 | - av->dccpav_buf_head | ||
361 | + avr->dccpavr_ack_ptr; | ||
361 | 362 | ||
362 | /* free records */ | 363 | /* free records */ |
363 | list_for_each_entry_safe_from(avr, next, &av->dccpav_records, | 364 | list_for_each_entry_safe_from(avr, next, &av->dccpav_records, |
@@ -434,8 +435,7 @@ static void dccp_ackvec_check_rcv_ackvector(struct dccp_ackvec *av, | |||
434 | break; | 435 | break; |
435 | found: | 436 | found: |
436 | if (between48(avr->dccpavr_ack_seqno, ackno_end_rl, ackno)) { | 437 | if (between48(avr->dccpavr_ack_seqno, ackno_end_rl, ackno)) { |
437 | const u8 state = (*vector & | 438 | const u8 state = *vector & DCCP_ACKVEC_STATE_MASK; |
438 | DCCP_ACKVEC_STATE_MASK) >> 6; | ||
439 | if (state != DCCP_ACKVEC_STATE_NOT_RECEIVED) { | 439 | if (state != DCCP_ACKVEC_STATE_NOT_RECEIVED) { |
440 | #ifdef CONFIG_IP_DCCP_DEBUG | 440 | #ifdef CONFIG_IP_DCCP_DEBUG |
441 | struct dccp_sock *dp = dccp_sk(sk); | 441 | struct dccp_sock *dp = dccp_sk(sk); |
diff --git a/net/dccp/ackvec.h b/net/dccp/ackvec.h index 0adf4b56c34c..2424effac7f6 100644 --- a/net/dccp/ackvec.h +++ b/net/dccp/ackvec.h | |||
@@ -54,9 +54,7 @@ struct dccp_ackvec { | |||
54 | struct list_head dccpav_records; | 54 | struct list_head dccpav_records; |
55 | struct timeval dccpav_time; | 55 | struct timeval dccpav_time; |
56 | u8 dccpav_buf_head; | 56 | u8 dccpav_buf_head; |
57 | u8 dccpav_buf_tail; | ||
58 | u8 dccpav_ack_ptr; | 57 | u8 dccpav_ack_ptr; |
59 | u8 dccpav_sent_len; | ||
60 | u8 dccpav_vec_len; | 58 | u8 dccpav_vec_len; |
61 | u8 dccpav_buf_nonce; | 59 | u8 dccpav_buf_nonce; |
62 | u8 dccpav_ack_nonce; | 60 | u8 dccpav_ack_nonce; |
@@ -107,7 +105,7 @@ extern int dccp_insert_option_ackvec(struct sock *sk, struct sk_buff *skb); | |||
107 | 105 | ||
108 | static inline int dccp_ackvec_pending(const struct dccp_ackvec *av) | 106 | static inline int dccp_ackvec_pending(const struct dccp_ackvec *av) |
109 | { | 107 | { |
110 | return av->dccpav_sent_len != av->dccpav_vec_len; | 108 | return av->dccpav_vec_len; |
111 | } | 109 | } |
112 | #else /* CONFIG_IP_DCCP_ACKVEC */ | 110 | #else /* CONFIG_IP_DCCP_ACKVEC */ |
113 | static inline int dccp_ackvec_init(void) | 111 | static inline int dccp_ackvec_init(void) |
diff --git a/net/dccp/ccids/Kconfig b/net/dccp/ccids/Kconfig index ca00191628f7..32752f750447 100644 --- a/net/dccp/ccids/Kconfig +++ b/net/dccp/ccids/Kconfig | |||
@@ -30,6 +30,14 @@ config IP_DCCP_CCID2 | |||
30 | 30 | ||
31 | If in doubt, say M. | 31 | If in doubt, say M. |
32 | 32 | ||
33 | config IP_DCCP_CCID2_DEBUG | ||
34 | bool "CCID2 debug" | ||
35 | depends on IP_DCCP_CCID2 | ||
36 | ---help--- | ||
37 | Enable CCID2 debug messages. | ||
38 | |||
39 | If in doubt, say N. | ||
40 | |||
33 | config IP_DCCP_CCID3 | 41 | config IP_DCCP_CCID3 |
34 | tristate "CCID3 (TCP-Friendly) (EXPERIMENTAL)" | 42 | tristate "CCID3 (TCP-Friendly) (EXPERIMENTAL)" |
35 | depends on IP_DCCP | 43 | depends on IP_DCCP |
diff --git a/net/dccp/ccids/ccid2.c b/net/dccp/ccids/ccid2.c index e9615627dcd6..457dd3db7f41 100644 --- a/net/dccp/ccids/ccid2.c +++ b/net/dccp/ccids/ccid2.c | |||
@@ -27,7 +27,6 @@ | |||
27 | * | 27 | * |
28 | * BUGS: | 28 | * BUGS: |
29 | * - sequence number wrapping | 29 | * - sequence number wrapping |
30 | * - jiffies wrapping | ||
31 | */ | 30 | */ |
32 | 31 | ||
33 | #include "../ccid.h" | 32 | #include "../ccid.h" |
@@ -36,8 +35,7 @@ | |||
36 | 35 | ||
37 | static int ccid2_debug; | 36 | static int ccid2_debug; |
38 | 37 | ||
39 | #undef CCID2_DEBUG | 38 | #ifdef CONFIG_IP_DCCP_CCID2_DEBUG |
40 | #ifdef CCID2_DEBUG | ||
41 | #define ccid2_pr_debug(format, a...) \ | 39 | #define ccid2_pr_debug(format, a...) \ |
42 | do { if (ccid2_debug) \ | 40 | do { if (ccid2_debug) \ |
43 | printk(KERN_DEBUG "%s: " format, __FUNCTION__, ##a); \ | 41 | printk(KERN_DEBUG "%s: " format, __FUNCTION__, ##a); \ |
@@ -46,9 +44,7 @@ static int ccid2_debug; | |||
46 | #define ccid2_pr_debug(format, a...) | 44 | #define ccid2_pr_debug(format, a...) |
47 | #endif | 45 | #endif |
48 | 46 | ||
49 | static const int ccid2_seq_len = 128; | 47 | #ifdef CONFIG_IP_DCCP_CCID2_DEBUG |
50 | |||
51 | #ifdef CCID2_DEBUG | ||
52 | static void ccid2_hc_tx_check_sanity(const struct ccid2_hc_tx_sock *hctx) | 48 | static void ccid2_hc_tx_check_sanity(const struct ccid2_hc_tx_sock *hctx) |
53 | { | 49 | { |
54 | int len = 0; | 50 | int len = 0; |
@@ -71,8 +67,8 @@ static void ccid2_hc_tx_check_sanity(const struct ccid2_hc_tx_sock *hctx) | |||
71 | 67 | ||
72 | /* packets are sent sequentially */ | 68 | /* packets are sent sequentially */ |
73 | BUG_ON(seqp->ccid2s_seq <= prev->ccid2s_seq); | 69 | BUG_ON(seqp->ccid2s_seq <= prev->ccid2s_seq); |
74 | BUG_ON(seqp->ccid2s_sent < prev->ccid2s_sent); | 70 | BUG_ON(time_before(seqp->ccid2s_sent, |
75 | BUG_ON(len > ccid2_seq_len); | 71 | prev->ccid2s_sent)); |
76 | 72 | ||
77 | seqp = prev; | 73 | seqp = prev; |
78 | } | 74 | } |
@@ -84,16 +80,57 @@ static void ccid2_hc_tx_check_sanity(const struct ccid2_hc_tx_sock *hctx) | |||
84 | do { | 80 | do { |
85 | seqp = seqp->ccid2s_prev; | 81 | seqp = seqp->ccid2s_prev; |
86 | len++; | 82 | len++; |
87 | BUG_ON(len > ccid2_seq_len); | ||
88 | } while (seqp != hctx->ccid2hctx_seqh); | 83 | } while (seqp != hctx->ccid2hctx_seqh); |
89 | 84 | ||
90 | BUG_ON(len != ccid2_seq_len); | ||
91 | ccid2_pr_debug("total len=%d\n", len); | 85 | ccid2_pr_debug("total len=%d\n", len); |
86 | BUG_ON(len != hctx->ccid2hctx_seqbufc * CCID2_SEQBUF_LEN); | ||
92 | } | 87 | } |
93 | #else | 88 | #else |
94 | #define ccid2_hc_tx_check_sanity(hctx) do {} while (0) | 89 | #define ccid2_hc_tx_check_sanity(hctx) do {} while (0) |
95 | #endif | 90 | #endif |
96 | 91 | ||
92 | static int ccid2_hc_tx_alloc_seq(struct ccid2_hc_tx_sock *hctx, int num, | ||
93 | gfp_t gfp) | ||
94 | { | ||
95 | struct ccid2_seq *seqp; | ||
96 | int i; | ||
97 | |||
98 | /* check if we have space to preserve the pointer to the buffer */ | ||
99 | if (hctx->ccid2hctx_seqbufc >= (sizeof(hctx->ccid2hctx_seqbuf) / | ||
100 | sizeof(struct ccid2_seq*))) | ||
101 | return -ENOMEM; | ||
102 | |||
103 | /* allocate buffer and initialize linked list */ | ||
104 | seqp = kmalloc(sizeof(*seqp) * num, gfp); | ||
105 | if (seqp == NULL) | ||
106 | return -ENOMEM; | ||
107 | |||
108 | for (i = 0; i < (num - 1); i++) { | ||
109 | seqp[i].ccid2s_next = &seqp[i + 1]; | ||
110 | seqp[i + 1].ccid2s_prev = &seqp[i]; | ||
111 | } | ||
112 | seqp[num - 1].ccid2s_next = seqp; | ||
113 | seqp->ccid2s_prev = &seqp[num - 1]; | ||
114 | |||
115 | /* This is the first allocation. Initiate the head and tail. */ | ||
116 | if (hctx->ccid2hctx_seqbufc == 0) | ||
117 | hctx->ccid2hctx_seqh = hctx->ccid2hctx_seqt = seqp; | ||
118 | else { | ||
119 | /* link the existing list with the one we just created */ | ||
120 | hctx->ccid2hctx_seqh->ccid2s_next = seqp; | ||
121 | seqp->ccid2s_prev = hctx->ccid2hctx_seqh; | ||
122 | |||
123 | hctx->ccid2hctx_seqt->ccid2s_prev = &seqp[num - 1]; | ||
124 | seqp[num - 1].ccid2s_next = hctx->ccid2hctx_seqt; | ||
125 | } | ||
126 | |||
127 | /* store the original pointer to the buffer so we can free it */ | ||
128 | hctx->ccid2hctx_seqbuf[hctx->ccid2hctx_seqbufc] = seqp; | ||
129 | hctx->ccid2hctx_seqbufc++; | ||
130 | |||
131 | return 0; | ||
132 | } | ||
133 | |||
97 | static int ccid2_hc_tx_send_packet(struct sock *sk, | 134 | static int ccid2_hc_tx_send_packet(struct sock *sk, |
98 | struct sk_buff *skb, int len) | 135 | struct sk_buff *skb, int len) |
99 | { | 136 | { |
@@ -122,7 +159,7 @@ static int ccid2_hc_tx_send_packet(struct sock *sk, | |||
122 | } | 159 | } |
123 | } | 160 | } |
124 | 161 | ||
125 | return 100; /* XXX */ | 162 | return 1; /* XXX CCID should dequeue when ready instead of polling */ |
126 | } | 163 | } |
127 | 164 | ||
128 | static void ccid2_change_l_ack_ratio(struct sock *sk, int val) | 165 | static void ccid2_change_l_ack_ratio(struct sock *sk, int val) |
@@ -150,10 +187,8 @@ static void ccid2_change_l_ack_ratio(struct sock *sk, int val) | |||
150 | dp->dccps_l_ack_ratio = val; | 187 | dp->dccps_l_ack_ratio = val; |
151 | } | 188 | } |
152 | 189 | ||
153 | static void ccid2_change_cwnd(struct sock *sk, int val) | 190 | static void ccid2_change_cwnd(struct ccid2_hc_tx_sock *hctx, int val) |
154 | { | 191 | { |
155 | struct ccid2_hc_tx_sock *hctx = ccid2_hc_tx_sk(sk); | ||
156 | |||
157 | if (val == 0) | 192 | if (val == 0) |
158 | val = 1; | 193 | val = 1; |
159 | 194 | ||
@@ -164,6 +199,17 @@ static void ccid2_change_cwnd(struct sock *sk, int val) | |||
164 | hctx->ccid2hctx_cwnd = val; | 199 | hctx->ccid2hctx_cwnd = val; |
165 | } | 200 | } |
166 | 201 | ||
202 | static void ccid2_change_srtt(struct ccid2_hc_tx_sock *hctx, long val) | ||
203 | { | ||
204 | ccid2_pr_debug("change SRTT to %ld\n", val); | ||
205 | hctx->ccid2hctx_srtt = val; | ||
206 | } | ||
207 | |||
208 | static void ccid2_change_pipe(struct ccid2_hc_tx_sock *hctx, long val) | ||
209 | { | ||
210 | hctx->ccid2hctx_pipe = val; | ||
211 | } | ||
212 | |||
167 | static void ccid2_start_rto_timer(struct sock *sk); | 213 | static void ccid2_start_rto_timer(struct sock *sk); |
168 | 214 | ||
169 | static void ccid2_hc_tx_rto_expire(unsigned long data) | 215 | static void ccid2_hc_tx_rto_expire(unsigned long data) |
@@ -193,11 +239,11 @@ static void ccid2_hc_tx_rto_expire(unsigned long data) | |||
193 | ccid2_start_rto_timer(sk); | 239 | ccid2_start_rto_timer(sk); |
194 | 240 | ||
195 | /* adjust pipe, cwnd etc */ | 241 | /* adjust pipe, cwnd etc */ |
196 | hctx->ccid2hctx_pipe = 0; | 242 | ccid2_change_pipe(hctx, 0); |
197 | hctx->ccid2hctx_ssthresh = hctx->ccid2hctx_cwnd >> 1; | 243 | hctx->ccid2hctx_ssthresh = hctx->ccid2hctx_cwnd >> 1; |
198 | if (hctx->ccid2hctx_ssthresh < 2) | 244 | if (hctx->ccid2hctx_ssthresh < 2) |
199 | hctx->ccid2hctx_ssthresh = 2; | 245 | hctx->ccid2hctx_ssthresh = 2; |
200 | ccid2_change_cwnd(sk, 1); | 246 | ccid2_change_cwnd(hctx, 1); |
201 | 247 | ||
202 | /* clear state about stuff we sent */ | 248 | /* clear state about stuff we sent */ |
203 | hctx->ccid2hctx_seqt = hctx->ccid2hctx_seqh; | 249 | hctx->ccid2hctx_seqt = hctx->ccid2hctx_seqh; |
@@ -232,13 +278,14 @@ static void ccid2_hc_tx_packet_sent(struct sock *sk, int more, int len) | |||
232 | { | 278 | { |
233 | struct dccp_sock *dp = dccp_sk(sk); | 279 | struct dccp_sock *dp = dccp_sk(sk); |
234 | struct ccid2_hc_tx_sock *hctx = ccid2_hc_tx_sk(sk); | 280 | struct ccid2_hc_tx_sock *hctx = ccid2_hc_tx_sk(sk); |
281 | struct ccid2_seq *next; | ||
235 | u64 seq; | 282 | u64 seq; |
236 | 283 | ||
237 | ccid2_hc_tx_check_sanity(hctx); | 284 | ccid2_hc_tx_check_sanity(hctx); |
238 | 285 | ||
239 | BUG_ON(!hctx->ccid2hctx_sendwait); | 286 | BUG_ON(!hctx->ccid2hctx_sendwait); |
240 | hctx->ccid2hctx_sendwait = 0; | 287 | hctx->ccid2hctx_sendwait = 0; |
241 | hctx->ccid2hctx_pipe++; | 288 | ccid2_change_pipe(hctx, hctx->ccid2hctx_pipe + 1); |
242 | BUG_ON(hctx->ccid2hctx_pipe < 0); | 289 | BUG_ON(hctx->ccid2hctx_pipe < 0); |
243 | 290 | ||
244 | /* There is an issue. What if another packet is sent between | 291 | /* There is an issue. What if another packet is sent between |
@@ -251,15 +298,23 @@ static void ccid2_hc_tx_packet_sent(struct sock *sk, int more, int len) | |||
251 | hctx->ccid2hctx_seqh->ccid2s_seq = seq; | 298 | hctx->ccid2hctx_seqh->ccid2s_seq = seq; |
252 | hctx->ccid2hctx_seqh->ccid2s_acked = 0; | 299 | hctx->ccid2hctx_seqh->ccid2s_acked = 0; |
253 | hctx->ccid2hctx_seqh->ccid2s_sent = jiffies; | 300 | hctx->ccid2hctx_seqh->ccid2s_sent = jiffies; |
254 | hctx->ccid2hctx_seqh = hctx->ccid2hctx_seqh->ccid2s_next; | ||
255 | 301 | ||
256 | ccid2_pr_debug("cwnd=%d pipe=%d\n", hctx->ccid2hctx_cwnd, | 302 | next = hctx->ccid2hctx_seqh->ccid2s_next; |
257 | hctx->ccid2hctx_pipe); | 303 | /* check if we need to alloc more space */ |
304 | if (next == hctx->ccid2hctx_seqt) { | ||
305 | int rc; | ||
306 | |||
307 | ccid2_pr_debug("allocating more space in history\n"); | ||
308 | rc = ccid2_hc_tx_alloc_seq(hctx, CCID2_SEQBUF_LEN, GFP_KERNEL); | ||
309 | BUG_ON(rc); /* XXX what do we do? */ | ||
258 | 310 | ||
259 | if (hctx->ccid2hctx_seqh == hctx->ccid2hctx_seqt) { | 311 | next = hctx->ccid2hctx_seqh->ccid2s_next; |
260 | /* XXX allocate more space */ | 312 | BUG_ON(next == hctx->ccid2hctx_seqt); |
261 | WARN_ON(1); | ||
262 | } | 313 | } |
314 | hctx->ccid2hctx_seqh = next; | ||
315 | |||
316 | ccid2_pr_debug("cwnd=%d pipe=%d\n", hctx->ccid2hctx_cwnd, | ||
317 | hctx->ccid2hctx_pipe); | ||
263 | 318 | ||
264 | hctx->ccid2hctx_sent++; | 319 | hctx->ccid2hctx_sent++; |
265 | 320 | ||
@@ -295,7 +350,7 @@ static void ccid2_hc_tx_packet_sent(struct sock *sk, int more, int len) | |||
295 | if (!timer_pending(&hctx->ccid2hctx_rtotimer)) | 350 | if (!timer_pending(&hctx->ccid2hctx_rtotimer)) |
296 | ccid2_start_rto_timer(sk); | 351 | ccid2_start_rto_timer(sk); |
297 | 352 | ||
298 | #ifdef CCID2_DEBUG | 353 | #ifdef CONFIG_IP_DCCP_CCID2_DEBUG |
299 | ccid2_pr_debug("pipe=%d\n", hctx->ccid2hctx_pipe); | 354 | ccid2_pr_debug("pipe=%d\n", hctx->ccid2hctx_pipe); |
300 | ccid2_pr_debug("Sent: seq=%llu\n", seq); | 355 | ccid2_pr_debug("Sent: seq=%llu\n", seq); |
301 | do { | 356 | do { |
@@ -398,7 +453,7 @@ static inline void ccid2_new_ack(struct sock *sk, | |||
398 | /* increase every 2 acks */ | 453 | /* increase every 2 acks */ |
399 | hctx->ccid2hctx_ssacks++; | 454 | hctx->ccid2hctx_ssacks++; |
400 | if (hctx->ccid2hctx_ssacks == 2) { | 455 | if (hctx->ccid2hctx_ssacks == 2) { |
401 | ccid2_change_cwnd(sk, hctx->ccid2hctx_cwnd + 1); | 456 | ccid2_change_cwnd(hctx, hctx->ccid2hctx_cwnd+1); |
402 | hctx->ccid2hctx_ssacks = 0; | 457 | hctx->ccid2hctx_ssacks = 0; |
403 | *maxincr = *maxincr - 1; | 458 | *maxincr = *maxincr - 1; |
404 | } | 459 | } |
@@ -411,26 +466,28 @@ static inline void ccid2_new_ack(struct sock *sk, | |||
411 | hctx->ccid2hctx_acks++; | 466 | hctx->ccid2hctx_acks++; |
412 | 467 | ||
413 | if (hctx->ccid2hctx_acks >= hctx->ccid2hctx_cwnd) { | 468 | if (hctx->ccid2hctx_acks >= hctx->ccid2hctx_cwnd) { |
414 | ccid2_change_cwnd(sk, hctx->ccid2hctx_cwnd + 1); | 469 | ccid2_change_cwnd(hctx, hctx->ccid2hctx_cwnd + 1); |
415 | hctx->ccid2hctx_acks = 0; | 470 | hctx->ccid2hctx_acks = 0; |
416 | } | 471 | } |
417 | } | 472 | } |
418 | 473 | ||
419 | /* update RTO */ | 474 | /* update RTO */ |
420 | if (hctx->ccid2hctx_srtt == -1 || | 475 | if (hctx->ccid2hctx_srtt == -1 || |
421 | (jiffies - hctx->ccid2hctx_lastrtt) >= hctx->ccid2hctx_srtt) { | 476 | time_after(jiffies, hctx->ccid2hctx_lastrtt + hctx->ccid2hctx_srtt)) { |
422 | unsigned long r = jiffies - seqp->ccid2s_sent; | 477 | unsigned long r = (long)jiffies - (long)seqp->ccid2s_sent; |
423 | int s; | 478 | int s; |
424 | 479 | ||
425 | /* first measurement */ | 480 | /* first measurement */ |
426 | if (hctx->ccid2hctx_srtt == -1) { | 481 | if (hctx->ccid2hctx_srtt == -1) { |
427 | ccid2_pr_debug("R: %lu Time=%lu seq=%llu\n", | 482 | ccid2_pr_debug("R: %lu Time=%lu seq=%llu\n", |
428 | r, jiffies, seqp->ccid2s_seq); | 483 | r, jiffies, seqp->ccid2s_seq); |
429 | hctx->ccid2hctx_srtt = r; | 484 | ccid2_change_srtt(hctx, r); |
430 | hctx->ccid2hctx_rttvar = r >> 1; | 485 | hctx->ccid2hctx_rttvar = r >> 1; |
431 | } else { | 486 | } else { |
432 | /* RTTVAR */ | 487 | /* RTTVAR */ |
433 | long tmp = hctx->ccid2hctx_srtt - r; | 488 | long tmp = hctx->ccid2hctx_srtt - r; |
489 | long srtt; | ||
490 | |||
434 | if (tmp < 0) | 491 | if (tmp < 0) |
435 | tmp *= -1; | 492 | tmp *= -1; |
436 | 493 | ||
@@ -440,10 +497,12 @@ static inline void ccid2_new_ack(struct sock *sk, | |||
440 | hctx->ccid2hctx_rttvar += tmp; | 497 | hctx->ccid2hctx_rttvar += tmp; |
441 | 498 | ||
442 | /* SRTT */ | 499 | /* SRTT */ |
443 | hctx->ccid2hctx_srtt *= 7; | 500 | srtt = hctx->ccid2hctx_srtt; |
444 | hctx->ccid2hctx_srtt >>= 3; | 501 | srtt *= 7; |
502 | srtt >>= 3; | ||
445 | tmp = r >> 3; | 503 | tmp = r >> 3; |
446 | hctx->ccid2hctx_srtt += tmp; | 504 | srtt += tmp; |
505 | ccid2_change_srtt(hctx, srtt); | ||
447 | } | 506 | } |
448 | s = hctx->ccid2hctx_rttvar << 2; | 507 | s = hctx->ccid2hctx_rttvar << 2; |
449 | /* clock granularity is 1 when based on jiffies */ | 508 | /* clock granularity is 1 when based on jiffies */ |
@@ -479,13 +538,29 @@ static void ccid2_hc_tx_dec_pipe(struct sock *sk) | |||
479 | { | 538 | { |
480 | struct ccid2_hc_tx_sock *hctx = ccid2_hc_tx_sk(sk); | 539 | struct ccid2_hc_tx_sock *hctx = ccid2_hc_tx_sk(sk); |
481 | 540 | ||
482 | hctx->ccid2hctx_pipe--; | 541 | ccid2_change_pipe(hctx, hctx->ccid2hctx_pipe-1); |
483 | BUG_ON(hctx->ccid2hctx_pipe < 0); | 542 | BUG_ON(hctx->ccid2hctx_pipe < 0); |
484 | 543 | ||
485 | if (hctx->ccid2hctx_pipe == 0) | 544 | if (hctx->ccid2hctx_pipe == 0) |
486 | ccid2_hc_tx_kill_rto_timer(sk); | 545 | ccid2_hc_tx_kill_rto_timer(sk); |
487 | } | 546 | } |
488 | 547 | ||
548 | static void ccid2_congestion_event(struct ccid2_hc_tx_sock *hctx, | ||
549 | struct ccid2_seq *seqp) | ||
550 | { | ||
551 | if (time_before(seqp->ccid2s_sent, hctx->ccid2hctx_last_cong)) { | ||
552 | ccid2_pr_debug("Multiple losses in an RTT---treating as one\n"); | ||
553 | return; | ||
554 | } | ||
555 | |||
556 | hctx->ccid2hctx_last_cong = jiffies; | ||
557 | |||
558 | ccid2_change_cwnd(hctx, hctx->ccid2hctx_cwnd >> 1); | ||
559 | hctx->ccid2hctx_ssthresh = hctx->ccid2hctx_cwnd; | ||
560 | if (hctx->ccid2hctx_ssthresh < 2) | ||
561 | hctx->ccid2hctx_ssthresh = 2; | ||
562 | } | ||
563 | |||
489 | static void ccid2_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb) | 564 | static void ccid2_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb) |
490 | { | 565 | { |
491 | struct dccp_sock *dp = dccp_sk(sk); | 566 | struct dccp_sock *dp = dccp_sk(sk); |
@@ -496,7 +571,6 @@ static void ccid2_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb) | |||
496 | unsigned char veclen; | 571 | unsigned char veclen; |
497 | int offset = 0; | 572 | int offset = 0; |
498 | int done = 0; | 573 | int done = 0; |
499 | int loss = 0; | ||
500 | unsigned int maxincr = 0; | 574 | unsigned int maxincr = 0; |
501 | 575 | ||
502 | ccid2_hc_tx_check_sanity(hctx); | 576 | ccid2_hc_tx_check_sanity(hctx); |
@@ -582,15 +656,16 @@ static void ccid2_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb) | |||
582 | * run length | 656 | * run length |
583 | */ | 657 | */ |
584 | while (between48(seqp->ccid2s_seq,ackno_end_rl,ackno)) { | 658 | while (between48(seqp->ccid2s_seq,ackno_end_rl,ackno)) { |
585 | const u8 state = (*vector & | 659 | const u8 state = *vector & |
586 | DCCP_ACKVEC_STATE_MASK) >> 6; | 660 | DCCP_ACKVEC_STATE_MASK; |
587 | 661 | ||
588 | /* new packet received or marked */ | 662 | /* new packet received or marked */ |
589 | if (state != DCCP_ACKVEC_STATE_NOT_RECEIVED && | 663 | if (state != DCCP_ACKVEC_STATE_NOT_RECEIVED && |
590 | !seqp->ccid2s_acked) { | 664 | !seqp->ccid2s_acked) { |
591 | if (state == | 665 | if (state == |
592 | DCCP_ACKVEC_STATE_ECN_MARKED) { | 666 | DCCP_ACKVEC_STATE_ECN_MARKED) { |
593 | loss = 1; | 667 | ccid2_congestion_event(hctx, |
668 | seqp); | ||
594 | } else | 669 | } else |
595 | ccid2_new_ack(sk, seqp, | 670 | ccid2_new_ack(sk, seqp, |
596 | &maxincr); | 671 | &maxincr); |
@@ -642,7 +717,13 @@ static void ccid2_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb) | |||
642 | /* check for lost packets */ | 717 | /* check for lost packets */ |
643 | while (1) { | 718 | while (1) { |
644 | if (!seqp->ccid2s_acked) { | 719 | if (!seqp->ccid2s_acked) { |
645 | loss = 1; | 720 | ccid2_pr_debug("Packet lost: %llu\n", |
721 | seqp->ccid2s_seq); | ||
722 | /* XXX need to traverse from tail -> head in | ||
723 | * order to detect multiple congestion events in | ||
724 | * one ack vector. | ||
725 | */ | ||
726 | ccid2_congestion_event(hctx, seqp); | ||
646 | ccid2_hc_tx_dec_pipe(sk); | 727 | ccid2_hc_tx_dec_pipe(sk); |
647 | } | 728 | } |
648 | if (seqp == hctx->ccid2hctx_seqt) | 729 | if (seqp == hctx->ccid2hctx_seqt) |
@@ -661,53 +742,33 @@ static void ccid2_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb) | |||
661 | hctx->ccid2hctx_seqt = hctx->ccid2hctx_seqt->ccid2s_next; | 742 | hctx->ccid2hctx_seqt = hctx->ccid2hctx_seqt->ccid2s_next; |
662 | } | 743 | } |
663 | 744 | ||
664 | if (loss) { | ||
665 | /* XXX do bit shifts guarantee a 0 as the new bit? */ | ||
666 | ccid2_change_cwnd(sk, hctx->ccid2hctx_cwnd >> 1); | ||
667 | hctx->ccid2hctx_ssthresh = hctx->ccid2hctx_cwnd; | ||
668 | if (hctx->ccid2hctx_ssthresh < 2) | ||
669 | hctx->ccid2hctx_ssthresh = 2; | ||
670 | } | ||
671 | |||
672 | ccid2_hc_tx_check_sanity(hctx); | 745 | ccid2_hc_tx_check_sanity(hctx); |
673 | } | 746 | } |
674 | 747 | ||
675 | static int ccid2_hc_tx_init(struct ccid *ccid, struct sock *sk) | 748 | static int ccid2_hc_tx_init(struct ccid *ccid, struct sock *sk) |
676 | { | 749 | { |
677 | struct ccid2_hc_tx_sock *hctx = ccid_priv(ccid); | 750 | struct ccid2_hc_tx_sock *hctx = ccid_priv(ccid); |
678 | int seqcount = ccid2_seq_len; | ||
679 | int i; | ||
680 | 751 | ||
681 | /* XXX init variables with proper values */ | 752 | ccid2_change_cwnd(hctx, 1); |
682 | hctx->ccid2hctx_cwnd = 1; | 753 | /* Initialize ssthresh to infinity. This means that we will exit the |
683 | hctx->ccid2hctx_ssthresh = 10; | 754 | * initial slow-start after the first packet loss. This is what we |
755 | * want. | ||
756 | */ | ||
757 | hctx->ccid2hctx_ssthresh = ~0; | ||
684 | hctx->ccid2hctx_numdupack = 3; | 758 | hctx->ccid2hctx_numdupack = 3; |
759 | hctx->ccid2hctx_seqbufc = 0; | ||
685 | 760 | ||
686 | /* XXX init ~ to window size... */ | 761 | /* XXX init ~ to window size... */ |
687 | hctx->ccid2hctx_seqbuf = kmalloc(sizeof(*hctx->ccid2hctx_seqbuf) * | 762 | if (ccid2_hc_tx_alloc_seq(hctx, CCID2_SEQBUF_LEN, GFP_ATOMIC) != 0) |
688 | seqcount, gfp_any()); | ||
689 | if (hctx->ccid2hctx_seqbuf == NULL) | ||
690 | return -ENOMEM; | 763 | return -ENOMEM; |
691 | 764 | ||
692 | for (i = 0; i < (seqcount - 1); i++) { | ||
693 | hctx->ccid2hctx_seqbuf[i].ccid2s_next = | ||
694 | &hctx->ccid2hctx_seqbuf[i + 1]; | ||
695 | hctx->ccid2hctx_seqbuf[i + 1].ccid2s_prev = | ||
696 | &hctx->ccid2hctx_seqbuf[i]; | ||
697 | } | ||
698 | hctx->ccid2hctx_seqbuf[seqcount - 1].ccid2s_next = | ||
699 | hctx->ccid2hctx_seqbuf; | ||
700 | hctx->ccid2hctx_seqbuf->ccid2s_prev = | ||
701 | &hctx->ccid2hctx_seqbuf[seqcount - 1]; | ||
702 | |||
703 | hctx->ccid2hctx_seqh = hctx->ccid2hctx_seqbuf; | ||
704 | hctx->ccid2hctx_seqt = hctx->ccid2hctx_seqh; | ||
705 | hctx->ccid2hctx_sent = 0; | 765 | hctx->ccid2hctx_sent = 0; |
706 | hctx->ccid2hctx_rto = 3 * HZ; | 766 | hctx->ccid2hctx_rto = 3 * HZ; |
707 | hctx->ccid2hctx_srtt = -1; | 767 | ccid2_change_srtt(hctx, -1); |
708 | hctx->ccid2hctx_rttvar = -1; | 768 | hctx->ccid2hctx_rttvar = -1; |
709 | hctx->ccid2hctx_lastrtt = 0; | 769 | hctx->ccid2hctx_lastrtt = 0; |
710 | hctx->ccid2hctx_rpdupack = -1; | 770 | hctx->ccid2hctx_rpdupack = -1; |
771 | hctx->ccid2hctx_last_cong = jiffies; | ||
711 | 772 | ||
712 | hctx->ccid2hctx_rtotimer.function = &ccid2_hc_tx_rto_expire; | 773 | hctx->ccid2hctx_rtotimer.function = &ccid2_hc_tx_rto_expire; |
713 | hctx->ccid2hctx_rtotimer.data = (unsigned long)sk; | 774 | hctx->ccid2hctx_rtotimer.data = (unsigned long)sk; |
@@ -720,10 +781,13 @@ static int ccid2_hc_tx_init(struct ccid *ccid, struct sock *sk) | |||
720 | static void ccid2_hc_tx_exit(struct sock *sk) | 781 | static void ccid2_hc_tx_exit(struct sock *sk) |
721 | { | 782 | { |
722 | struct ccid2_hc_tx_sock *hctx = ccid2_hc_tx_sk(sk); | 783 | struct ccid2_hc_tx_sock *hctx = ccid2_hc_tx_sk(sk); |
784 | int i; | ||
723 | 785 | ||
724 | ccid2_hc_tx_kill_rto_timer(sk); | 786 | ccid2_hc_tx_kill_rto_timer(sk); |
725 | kfree(hctx->ccid2hctx_seqbuf); | 787 | |
726 | hctx->ccid2hctx_seqbuf = NULL; | 788 | for (i = 0; i < hctx->ccid2hctx_seqbufc; i++) |
789 | kfree(hctx->ccid2hctx_seqbuf[i]); | ||
790 | hctx->ccid2hctx_seqbufc = 0; | ||
727 | } | 791 | } |
728 | 792 | ||
729 | static void ccid2_hc_rx_packet_recv(struct sock *sk, struct sk_buff *skb) | 793 | static void ccid2_hc_rx_packet_recv(struct sock *sk, struct sk_buff *skb) |
diff --git a/net/dccp/ccids/ccid2.h b/net/dccp/ccids/ccid2.h index 451a87464fa5..5b2ef4acb300 100644 --- a/net/dccp/ccids/ccid2.h +++ b/net/dccp/ccids/ccid2.h | |||
@@ -35,6 +35,9 @@ struct ccid2_seq { | |||
35 | struct ccid2_seq *ccid2s_next; | 35 | struct ccid2_seq *ccid2s_next; |
36 | }; | 36 | }; |
37 | 37 | ||
38 | #define CCID2_SEQBUF_LEN 256 | ||
39 | #define CCID2_SEQBUF_MAX 128 | ||
40 | |||
38 | /** struct ccid2_hc_tx_sock - CCID2 TX half connection | 41 | /** struct ccid2_hc_tx_sock - CCID2 TX half connection |
39 | * | 42 | * |
40 | * @ccid2hctx_ssacks - ACKs recv in slow start | 43 | * @ccid2hctx_ssacks - ACKs recv in slow start |
@@ -50,10 +53,11 @@ struct ccid2_hc_tx_sock { | |||
50 | int ccid2hctx_cwnd; | 53 | int ccid2hctx_cwnd; |
51 | int ccid2hctx_ssacks; | 54 | int ccid2hctx_ssacks; |
52 | int ccid2hctx_acks; | 55 | int ccid2hctx_acks; |
53 | int ccid2hctx_ssthresh; | 56 | unsigned int ccid2hctx_ssthresh; |
54 | int ccid2hctx_pipe; | 57 | int ccid2hctx_pipe; |
55 | int ccid2hctx_numdupack; | 58 | int ccid2hctx_numdupack; |
56 | struct ccid2_seq *ccid2hctx_seqbuf; | 59 | struct ccid2_seq *ccid2hctx_seqbuf[CCID2_SEQBUF_MAX]; |
60 | int ccid2hctx_seqbufc; | ||
57 | struct ccid2_seq *ccid2hctx_seqh; | 61 | struct ccid2_seq *ccid2hctx_seqh; |
58 | struct ccid2_seq *ccid2hctx_seqt; | 62 | struct ccid2_seq *ccid2hctx_seqt; |
59 | long ccid2hctx_rto; | 63 | long ccid2hctx_rto; |
@@ -67,6 +71,7 @@ struct ccid2_hc_tx_sock { | |||
67 | u64 ccid2hctx_rpseq; | 71 | u64 ccid2hctx_rpseq; |
68 | int ccid2hctx_rpdupack; | 72 | int ccid2hctx_rpdupack; |
69 | int ccid2hctx_sendwait; | 73 | int ccid2hctx_sendwait; |
74 | unsigned long ccid2hctx_last_cong; | ||
70 | }; | 75 | }; |
71 | 76 | ||
72 | struct ccid2_hc_rx_sock { | 77 | struct ccid2_hc_rx_sock { |
diff --git a/net/dccp/ccids/ccid3.c b/net/dccp/ccids/ccid3.c index 090bc39e8199..195aa9566228 100644 --- a/net/dccp/ccids/ccid3.c +++ b/net/dccp/ccids/ccid3.c | |||
@@ -900,7 +900,7 @@ found: | |||
900 | static void ccid3_hc_rx_update_li(struct sock *sk, u64 seq_loss, u8 win_loss) | 900 | static void ccid3_hc_rx_update_li(struct sock *sk, u64 seq_loss, u8 win_loss) |
901 | { | 901 | { |
902 | struct ccid3_hc_rx_sock *hcrx = ccid3_hc_rx_sk(sk); | 902 | struct ccid3_hc_rx_sock *hcrx = ccid3_hc_rx_sk(sk); |
903 | struct dccp_li_hist_entry *next, *head; | 903 | struct dccp_li_hist_entry *head; |
904 | u64 seq_temp; | 904 | u64 seq_temp; |
905 | 905 | ||
906 | if (list_empty(&hcrx->ccid3hcrx_li_hist)) { | 906 | if (list_empty(&hcrx->ccid3hcrx_li_hist)) { |
@@ -908,15 +908,15 @@ static void ccid3_hc_rx_update_li(struct sock *sk, u64 seq_loss, u8 win_loss) | |||
908 | &hcrx->ccid3hcrx_li_hist, seq_loss, win_loss)) | 908 | &hcrx->ccid3hcrx_li_hist, seq_loss, win_loss)) |
909 | return; | 909 | return; |
910 | 910 | ||
911 | next = (struct dccp_li_hist_entry *) | 911 | head = list_entry(hcrx->ccid3hcrx_li_hist.next, |
912 | hcrx->ccid3hcrx_li_hist.next; | 912 | struct dccp_li_hist_entry, dccplih_node); |
913 | next->dccplih_interval = ccid3_hc_rx_calc_first_li(sk); | 913 | head->dccplih_interval = ccid3_hc_rx_calc_first_li(sk); |
914 | } else { | 914 | } else { |
915 | struct dccp_li_hist_entry *entry; | 915 | struct dccp_li_hist_entry *entry; |
916 | struct list_head *tail; | 916 | struct list_head *tail; |
917 | 917 | ||
918 | head = (struct dccp_li_hist_entry *) | 918 | head = list_entry(hcrx->ccid3hcrx_li_hist.next, |
919 | hcrx->ccid3hcrx_li_hist.next; | 919 | struct dccp_li_hist_entry, dccplih_node); |
920 | /* FIXME win count check removed as was wrong */ | 920 | /* FIXME win count check removed as was wrong */ |
921 | /* should make this check with receive history */ | 921 | /* should make this check with receive history */ |
922 | /* and compare there as per section 10.2 of RFC4342 */ | 922 | /* and compare there as per section 10.2 of RFC4342 */ |
diff --git a/net/dccp/dccp.h b/net/dccp/dccp.h index a5c5475724c0..0a21be437ed3 100644 --- a/net/dccp/dccp.h +++ b/net/dccp/dccp.h | |||
@@ -130,7 +130,7 @@ extern void dccp_send_delayed_ack(struct sock *sk); | |||
130 | extern void dccp_send_sync(struct sock *sk, const u64 seq, | 130 | extern void dccp_send_sync(struct sock *sk, const u64 seq, |
131 | const enum dccp_pkt_type pkt_type); | 131 | const enum dccp_pkt_type pkt_type); |
132 | 132 | ||
133 | extern int dccp_write_xmit(struct sock *sk, struct sk_buff *skb, long *timeo); | 133 | extern void dccp_write_xmit(struct sock *sk, int block); |
134 | extern void dccp_write_space(struct sock *sk); | 134 | extern void dccp_write_space(struct sock *sk); |
135 | 135 | ||
136 | extern void dccp_init_xmit_timers(struct sock *sk); | 136 | extern void dccp_init_xmit_timers(struct sock *sk); |
diff --git a/net/dccp/feat.h b/net/dccp/feat.h index b44c45504fb6..cee553d416ca 100644 --- a/net/dccp/feat.h +++ b/net/dccp/feat.h | |||
@@ -27,5 +27,10 @@ extern int dccp_feat_clone(struct sock *oldsk, struct sock *newsk); | |||
27 | extern int dccp_feat_init(struct dccp_minisock *dmsk); | 27 | extern int dccp_feat_init(struct dccp_minisock *dmsk); |
28 | 28 | ||
29 | extern int dccp_feat_default_sequence_window; | 29 | extern int dccp_feat_default_sequence_window; |
30 | extern int dccp_feat_default_rx_ccid; | ||
31 | extern int dccp_feat_default_tx_ccid; | ||
32 | extern int dccp_feat_default_ack_ratio; | ||
33 | extern int dccp_feat_default_send_ack_vector; | ||
34 | extern int dccp_feat_default_send_ndp_count; | ||
30 | 35 | ||
31 | #endif /* _DCCP_FEAT_H */ | 36 | #endif /* _DCCP_FEAT_H */ |
diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c index 7f56f7e8f571..9a1a76a7dc41 100644 --- a/net/dccp/ipv4.c +++ b/net/dccp/ipv4.c | |||
@@ -501,6 +501,9 @@ int dccp_v4_conn_request(struct sock *sk, struct sk_buff *skb) | |||
501 | 501 | ||
502 | dccp_openreq_init(req, &dp, skb); | 502 | dccp_openreq_init(req, &dp, skb); |
503 | 503 | ||
504 | if (security_inet_conn_request(sk, skb, req)) | ||
505 | goto drop_and_free; | ||
506 | |||
504 | ireq = inet_rsk(req); | 507 | ireq = inet_rsk(req); |
505 | ireq->loc_addr = daddr; | 508 | ireq->loc_addr = daddr; |
506 | ireq->rmt_addr = saddr; | 509 | ireq->rmt_addr = saddr; |
@@ -605,10 +608,10 @@ static struct sock *dccp_v4_hnd_req(struct sock *sk, struct sk_buff *skb) | |||
605 | if (req != NULL) | 608 | if (req != NULL) |
606 | return dccp_check_req(sk, skb, req, prev); | 609 | return dccp_check_req(sk, skb, req, prev); |
607 | 610 | ||
608 | nsk = __inet_lookup_established(&dccp_hashinfo, | 611 | nsk = inet_lookup_established(&dccp_hashinfo, |
609 | iph->saddr, dh->dccph_sport, | 612 | iph->saddr, dh->dccph_sport, |
610 | iph->daddr, ntohs(dh->dccph_dport), | 613 | iph->daddr, dh->dccph_dport, |
611 | inet_iif(skb)); | 614 | inet_iif(skb)); |
612 | if (nsk != NULL) { | 615 | if (nsk != NULL) { |
613 | if (nsk->sk_state != DCCP_TIME_WAIT) { | 616 | if (nsk->sk_state != DCCP_TIME_WAIT) { |
614 | bh_lock_sock(nsk); | 617 | bh_lock_sock(nsk); |
@@ -678,6 +681,7 @@ static struct dst_entry* dccp_v4_route_skb(struct sock *sk, | |||
678 | } | 681 | } |
679 | }; | 682 | }; |
680 | 683 | ||
684 | security_skb_classify_flow(skb, &fl); | ||
681 | if (ip_route_output_flow(&rt, &fl, sk, 0)) { | 685 | if (ip_route_output_flow(&rt, &fl, sk, 0)) { |
682 | IP_INC_STATS_BH(IPSTATS_MIB_OUTNOROUTES); | 686 | IP_INC_STATS_BH(IPSTATS_MIB_OUTNOROUTES); |
683 | return NULL; | 687 | return NULL; |
@@ -921,7 +925,7 @@ static int dccp_v4_rcv(struct sk_buff *skb) | |||
921 | * Look up flow ID in table and get corresponding socket */ | 925 | * Look up flow ID in table and get corresponding socket */ |
922 | sk = __inet_lookup(&dccp_hashinfo, | 926 | sk = __inet_lookup(&dccp_hashinfo, |
923 | skb->nh.iph->saddr, dh->dccph_sport, | 927 | skb->nh.iph->saddr, dh->dccph_sport, |
924 | skb->nh.iph->daddr, ntohs(dh->dccph_dport), | 928 | skb->nh.iph->daddr, dh->dccph_dport, |
925 | inet_iif(skb)); | 929 | inet_iif(skb)); |
926 | 930 | ||
927 | /* | 931 | /* |
diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c index 610c722ac27f..7a47399cf31f 100644 --- a/net/dccp/ipv6.c +++ b/net/dccp/ipv6.c | |||
@@ -201,6 +201,7 @@ static int dccp_v6_connect(struct sock *sk, struct sockaddr *uaddr, | |||
201 | fl.oif = sk->sk_bound_dev_if; | 201 | fl.oif = sk->sk_bound_dev_if; |
202 | fl.fl_ip_dport = usin->sin6_port; | 202 | fl.fl_ip_dport = usin->sin6_port; |
203 | fl.fl_ip_sport = inet->sport; | 203 | fl.fl_ip_sport = inet->sport; |
204 | security_sk_classify_flow(sk, &fl); | ||
204 | 205 | ||
205 | if (np->opt != NULL && np->opt->srcrt != NULL) { | 206 | if (np->opt != NULL && np->opt->srcrt != NULL) { |
206 | const struct rt0_hdr *rt0 = (struct rt0_hdr *)np->opt->srcrt; | 207 | const struct rt0_hdr *rt0 = (struct rt0_hdr *)np->opt->srcrt; |
@@ -230,7 +231,7 @@ static int dccp_v6_connect(struct sock *sk, struct sockaddr *uaddr, | |||
230 | ipv6_addr_copy(&np->saddr, saddr); | 231 | ipv6_addr_copy(&np->saddr, saddr); |
231 | inet->rcv_saddr = LOOPBACK4_IPV6; | 232 | inet->rcv_saddr = LOOPBACK4_IPV6; |
232 | 233 | ||
233 | __ip6_dst_store(sk, dst, NULL); | 234 | __ip6_dst_store(sk, dst, NULL, NULL); |
234 | 235 | ||
235 | icsk->icsk_ext_hdr_len = 0; | 236 | icsk->icsk_ext_hdr_len = 0; |
236 | if (np->opt != NULL) | 237 | if (np->opt != NULL) |
@@ -322,6 +323,7 @@ static void dccp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, | |||
322 | fl.oif = sk->sk_bound_dev_if; | 323 | fl.oif = sk->sk_bound_dev_if; |
323 | fl.fl_ip_dport = inet->dport; | 324 | fl.fl_ip_dport = inet->dport; |
324 | fl.fl_ip_sport = inet->sport; | 325 | fl.fl_ip_sport = inet->sport; |
326 | security_sk_classify_flow(sk, &fl); | ||
325 | 327 | ||
326 | err = ip6_dst_lookup(sk, &dst, &fl); | 328 | err = ip6_dst_lookup(sk, &dst, &fl); |
327 | if (err) { | 329 | if (err) { |
@@ -422,6 +424,7 @@ static int dccp_v6_send_response(struct sock *sk, struct request_sock *req, | |||
422 | fl.oif = ireq6->iif; | 424 | fl.oif = ireq6->iif; |
423 | fl.fl_ip_dport = inet_rsk(req)->rmt_port; | 425 | fl.fl_ip_dport = inet_rsk(req)->rmt_port; |
424 | fl.fl_ip_sport = inet_sk(sk)->sport; | 426 | fl.fl_ip_sport = inet_sk(sk)->sport; |
427 | security_req_classify_flow(req, &fl); | ||
425 | 428 | ||
426 | if (dst == NULL) { | 429 | if (dst == NULL) { |
427 | opt = np->opt; | 430 | opt = np->opt; |
@@ -566,6 +569,7 @@ static void dccp_v6_ctl_send_reset(struct sk_buff *rxskb) | |||
566 | fl.oif = inet6_iif(rxskb); | 569 | fl.oif = inet6_iif(rxskb); |
567 | fl.fl_ip_dport = dh->dccph_dport; | 570 | fl.fl_ip_dport = dh->dccph_dport; |
568 | fl.fl_ip_sport = dh->dccph_sport; | 571 | fl.fl_ip_sport = dh->dccph_sport; |
572 | security_skb_classify_flow(rxskb, &fl); | ||
569 | 573 | ||
570 | /* sk = NULL, but it is safe for now. RST socket required. */ | 574 | /* sk = NULL, but it is safe for now. RST socket required. */ |
571 | if (!ip6_dst_lookup(NULL, &skb->dst, &fl)) { | 575 | if (!ip6_dst_lookup(NULL, &skb->dst, &fl)) { |
@@ -622,6 +626,7 @@ static void dccp_v6_reqsk_send_ack(struct sk_buff *rxskb, | |||
622 | fl.oif = inet6_iif(rxskb); | 626 | fl.oif = inet6_iif(rxskb); |
623 | fl.fl_ip_dport = dh->dccph_dport; | 627 | fl.fl_ip_dport = dh->dccph_dport; |
624 | fl.fl_ip_sport = dh->dccph_sport; | 628 | fl.fl_ip_sport = dh->dccph_sport; |
629 | security_req_classify_flow(req, &fl); | ||
625 | 630 | ||
626 | if (!ip6_dst_lookup(NULL, &skb->dst, &fl)) { | 631 | if (!ip6_dst_lookup(NULL, &skb->dst, &fl)) { |
627 | if (xfrm_lookup(&skb->dst, &fl, NULL, 0) >= 0) { | 632 | if (xfrm_lookup(&skb->dst, &fl, NULL, 0) >= 0) { |
@@ -704,6 +709,9 @@ static int dccp_v6_conn_request(struct sock *sk, struct sk_buff *skb) | |||
704 | 709 | ||
705 | dccp_openreq_init(req, &dp, skb); | 710 | dccp_openreq_init(req, &dp, skb); |
706 | 711 | ||
712 | if (security_inet_conn_request(sk, skb, req)) | ||
713 | goto drop_and_free; | ||
714 | |||
707 | ireq6 = inet6_rsk(req); | 715 | ireq6 = inet6_rsk(req); |
708 | ireq = inet_rsk(req); | 716 | ireq = inet_rsk(req); |
709 | ipv6_addr_copy(&ireq6->rmt_addr, &skb->nh.ipv6h->saddr); | 717 | ipv6_addr_copy(&ireq6->rmt_addr, &skb->nh.ipv6h->saddr); |
@@ -842,6 +850,7 @@ static struct sock *dccp_v6_request_recv_sock(struct sock *sk, | |||
842 | fl.oif = sk->sk_bound_dev_if; | 850 | fl.oif = sk->sk_bound_dev_if; |
843 | fl.fl_ip_dport = inet_rsk(req)->rmt_port; | 851 | fl.fl_ip_dport = inet_rsk(req)->rmt_port; |
844 | fl.fl_ip_sport = inet_sk(sk)->sport; | 852 | fl.fl_ip_sport = inet_sk(sk)->sport; |
853 | security_sk_classify_flow(sk, &fl); | ||
845 | 854 | ||
846 | if (ip6_dst_lookup(sk, &dst, &fl)) | 855 | if (ip6_dst_lookup(sk, &dst, &fl)) |
847 | goto out; | 856 | goto out; |
@@ -863,7 +872,7 @@ static struct sock *dccp_v6_request_recv_sock(struct sock *sk, | |||
863 | * comment in that function for the gory details. -acme | 872 | * comment in that function for the gory details. -acme |
864 | */ | 873 | */ |
865 | 874 | ||
866 | __ip6_dst_store(newsk, dst, NULL); | 875 | __ip6_dst_store(newsk, dst, NULL, NULL); |
867 | newsk->sk_route_caps = dst->dev->features & ~(NETIF_F_IP_CSUM | | 876 | newsk->sk_route_caps = dst->dev->features & ~(NETIF_F_IP_CSUM | |
868 | NETIF_F_TSO); | 877 | NETIF_F_TSO); |
869 | newdp6 = (struct dccp6_sock *)newsk; | 878 | newdp6 = (struct dccp6_sock *)newsk; |
@@ -961,7 +970,7 @@ static int dccp_v6_do_rcv(struct sock *sk, struct sk_buff *skb) | |||
961 | if (skb->protocol == htons(ETH_P_IP)) | 970 | if (skb->protocol == htons(ETH_P_IP)) |
962 | return dccp_v4_do_rcv(sk, skb); | 971 | return dccp_v4_do_rcv(sk, skb); |
963 | 972 | ||
964 | if (sk_filter(sk, skb, 0)) | 973 | if (sk_filter(sk, skb)) |
965 | goto discard; | 974 | goto discard; |
966 | 975 | ||
967 | /* | 976 | /* |
diff --git a/net/dccp/output.c b/net/dccp/output.c index 58669beee132..7102e3aed4ca 100644 --- a/net/dccp/output.c +++ b/net/dccp/output.c | |||
@@ -198,7 +198,7 @@ static int dccp_wait_for_ccid(struct sock *sk, struct sk_buff *skb, | |||
198 | while (1) { | 198 | while (1) { |
199 | prepare_to_wait(sk->sk_sleep, &wait, TASK_INTERRUPTIBLE); | 199 | prepare_to_wait(sk->sk_sleep, &wait, TASK_INTERRUPTIBLE); |
200 | 200 | ||
201 | if (sk->sk_err || (sk->sk_shutdown & SEND_SHUTDOWN)) | 201 | if (sk->sk_err) |
202 | goto do_error; | 202 | goto do_error; |
203 | if (!*timeo) | 203 | if (!*timeo) |
204 | goto do_nonblock; | 204 | goto do_nonblock; |
@@ -234,37 +234,72 @@ do_interrupted: | |||
234 | goto out; | 234 | goto out; |
235 | } | 235 | } |
236 | 236 | ||
237 | int dccp_write_xmit(struct sock *sk, struct sk_buff *skb, long *timeo) | 237 | static void dccp_write_xmit_timer(unsigned long data) { |
238 | struct sock *sk = (struct sock *)data; | ||
239 | struct dccp_sock *dp = dccp_sk(sk); | ||
240 | |||
241 | bh_lock_sock(sk); | ||
242 | if (sock_owned_by_user(sk)) | ||
243 | sk_reset_timer(sk, &dp->dccps_xmit_timer, jiffies+1); | ||
244 | else | ||
245 | dccp_write_xmit(sk, 0); | ||
246 | bh_unlock_sock(sk); | ||
247 | sock_put(sk); | ||
248 | } | ||
249 | |||
250 | void dccp_write_xmit(struct sock *sk, int block) | ||
238 | { | 251 | { |
239 | const struct dccp_sock *dp = dccp_sk(sk); | 252 | struct dccp_sock *dp = dccp_sk(sk); |
240 | int err = ccid_hc_tx_send_packet(dp->dccps_hc_tx_ccid, sk, skb, | 253 | struct sk_buff *skb; |
254 | long timeo = 30000; /* If a packet is taking longer than 2 secs | ||
255 | we have other issues */ | ||
256 | |||
257 | while ((skb = skb_peek(&sk->sk_write_queue))) { | ||
258 | int err = ccid_hc_tx_send_packet(dp->dccps_hc_tx_ccid, sk, skb, | ||
241 | skb->len); | 259 | skb->len); |
242 | 260 | ||
243 | if (err > 0) | 261 | if (err > 0) { |
244 | err = dccp_wait_for_ccid(sk, skb, timeo); | 262 | if (!block) { |
263 | sk_reset_timer(sk, &dp->dccps_xmit_timer, | ||
264 | msecs_to_jiffies(err)+jiffies); | ||
265 | break; | ||
266 | } else | ||
267 | err = dccp_wait_for_ccid(sk, skb, &timeo); | ||
268 | if (err) { | ||
269 | printk(KERN_CRIT "%s:err at dccp_wait_for_ccid" | ||
270 | " %d\n", __FUNCTION__, err); | ||
271 | dump_stack(); | ||
272 | } | ||
273 | } | ||
245 | 274 | ||
246 | if (err == 0) { | 275 | skb_dequeue(&sk->sk_write_queue); |
247 | struct dccp_skb_cb *dcb = DCCP_SKB_CB(skb); | 276 | if (err == 0) { |
248 | const int len = skb->len; | 277 | struct dccp_skb_cb *dcb = DCCP_SKB_CB(skb); |
278 | const int len = skb->len; | ||
249 | 279 | ||
250 | if (sk->sk_state == DCCP_PARTOPEN) { | 280 | if (sk->sk_state == DCCP_PARTOPEN) { |
251 | /* See 8.1.5. Handshake Completion */ | 281 | /* See 8.1.5. Handshake Completion */ |
252 | inet_csk_schedule_ack(sk); | 282 | inet_csk_schedule_ack(sk); |
253 | inet_csk_reset_xmit_timer(sk, ICSK_TIME_DACK, | 283 | inet_csk_reset_xmit_timer(sk, ICSK_TIME_DACK, |
254 | inet_csk(sk)->icsk_rto, | 284 | inet_csk(sk)->icsk_rto, |
255 | DCCP_RTO_MAX); | 285 | DCCP_RTO_MAX); |
256 | dcb->dccpd_type = DCCP_PKT_DATAACK; | 286 | dcb->dccpd_type = DCCP_PKT_DATAACK; |
257 | } else if (dccp_ack_pending(sk)) | 287 | } else if (dccp_ack_pending(sk)) |
258 | dcb->dccpd_type = DCCP_PKT_DATAACK; | 288 | dcb->dccpd_type = DCCP_PKT_DATAACK; |
259 | else | 289 | else |
260 | dcb->dccpd_type = DCCP_PKT_DATA; | 290 | dcb->dccpd_type = DCCP_PKT_DATA; |
261 | 291 | ||
262 | err = dccp_transmit_skb(sk, skb); | 292 | err = dccp_transmit_skb(sk, skb); |
263 | ccid_hc_tx_packet_sent(dp->dccps_hc_tx_ccid, sk, 0, len); | 293 | ccid_hc_tx_packet_sent(dp->dccps_hc_tx_ccid, sk, 0, len); |
264 | } else | 294 | if (err) { |
265 | kfree_skb(skb); | 295 | printk(KERN_CRIT "%s:err from " |
266 | 296 | "ccid_hc_tx_packet_sent %d\n", | |
267 | return err; | 297 | __FUNCTION__, err); |
298 | dump_stack(); | ||
299 | } | ||
300 | } else | ||
301 | kfree(skb); | ||
302 | } | ||
268 | } | 303 | } |
269 | 304 | ||
270 | int dccp_retransmit_skb(struct sock *sk, struct sk_buff *skb) | 305 | int dccp_retransmit_skb(struct sock *sk, struct sk_buff *skb) |
@@ -426,6 +461,9 @@ static inline void dccp_connect_init(struct sock *sk) | |||
426 | dccp_set_seqno(&dp->dccps_awl, max48(dp->dccps_awl, dp->dccps_iss)); | 461 | dccp_set_seqno(&dp->dccps_awl, max48(dp->dccps_awl, dp->dccps_iss)); |
427 | 462 | ||
428 | icsk->icsk_retransmits = 0; | 463 | icsk->icsk_retransmits = 0; |
464 | init_timer(&dp->dccps_xmit_timer); | ||
465 | dp->dccps_xmit_timer.data = (unsigned long)sk; | ||
466 | dp->dccps_xmit_timer.function = dccp_write_xmit_timer; | ||
429 | } | 467 | } |
430 | 468 | ||
431 | int dccp_connect(struct sock *sk) | 469 | int dccp_connect(struct sock *sk) |
@@ -560,8 +598,10 @@ void dccp_send_close(struct sock *sk, const int active) | |||
560 | DCCP_PKT_CLOSE : DCCP_PKT_CLOSEREQ; | 598 | DCCP_PKT_CLOSE : DCCP_PKT_CLOSEREQ; |
561 | 599 | ||
562 | if (active) { | 600 | if (active) { |
601 | dccp_write_xmit(sk, 1); | ||
563 | dccp_skb_entail(sk, skb); | 602 | dccp_skb_entail(sk, skb); |
564 | dccp_transmit_skb(sk, skb_clone(skb, prio)); | 603 | dccp_transmit_skb(sk, skb_clone(skb, prio)); |
604 | /* FIXME do we need a retransmit timer here? */ | ||
565 | } else | 605 | } else |
566 | dccp_transmit_skb(sk, skb); | 606 | dccp_transmit_skb(sk, skb); |
567 | } | 607 | } |
diff --git a/net/dccp/proto.c b/net/dccp/proto.c index 6f14bb5a28d4..962df0ea31aa 100644 --- a/net/dccp/proto.c +++ b/net/dccp/proto.c | |||
@@ -662,17 +662,8 @@ int dccp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, | |||
662 | if (rc != 0) | 662 | if (rc != 0) |
663 | goto out_discard; | 663 | goto out_discard; |
664 | 664 | ||
665 | rc = dccp_write_xmit(sk, skb, &timeo); | 665 | skb_queue_tail(&sk->sk_write_queue, skb); |
666 | /* | 666 | dccp_write_xmit(sk,0); |
667 | * XXX we don't use sk_write_queue, so just discard the packet. | ||
668 | * Current plan however is to _use_ sk_write_queue with | ||
669 | * an algorith similar to tcp_sendmsg, where the main difference | ||
670 | * is that in DCCP we have to respect packet boundaries, so | ||
671 | * no coalescing of skbs. | ||
672 | * | ||
673 | * This bug was _quickly_ found & fixed by just looking at an OSTRA | ||
674 | * generated callgraph 8) -acme | ||
675 | */ | ||
676 | out_release: | 667 | out_release: |
677 | release_sock(sk); | 668 | release_sock(sk); |
678 | return rc ? : len; | 669 | return rc ? : len; |
@@ -846,6 +837,7 @@ static int dccp_close_state(struct sock *sk) | |||
846 | 837 | ||
847 | void dccp_close(struct sock *sk, long timeout) | 838 | void dccp_close(struct sock *sk, long timeout) |
848 | { | 839 | { |
840 | struct dccp_sock *dp = dccp_sk(sk); | ||
849 | struct sk_buff *skb; | 841 | struct sk_buff *skb; |
850 | int state; | 842 | int state; |
851 | 843 | ||
@@ -862,6 +854,8 @@ void dccp_close(struct sock *sk, long timeout) | |||
862 | goto adjudge_to_death; | 854 | goto adjudge_to_death; |
863 | } | 855 | } |
864 | 856 | ||
857 | sk_stop_timer(sk, &dp->dccps_xmit_timer); | ||
858 | |||
865 | /* | 859 | /* |
866 | * We need to flush the recv. buffs. We do this only on the | 860 | * We need to flush the recv. buffs. We do this only on the |
867 | * descriptor close, not protocol-sourced closes, because the | 861 | * descriptor close, not protocol-sourced closes, because the |
diff --git a/net/dccp/sysctl.c b/net/dccp/sysctl.c index c1ba9451bc3d..38bc157876f3 100644 --- a/net/dccp/sysctl.c +++ b/net/dccp/sysctl.c | |||
@@ -11,18 +11,12 @@ | |||
11 | 11 | ||
12 | #include <linux/mm.h> | 12 | #include <linux/mm.h> |
13 | #include <linux/sysctl.h> | 13 | #include <linux/sysctl.h> |
14 | #include "feat.h" | ||
14 | 15 | ||
15 | #ifndef CONFIG_SYSCTL | 16 | #ifndef CONFIG_SYSCTL |
16 | #error This file should not be compiled without CONFIG_SYSCTL defined | 17 | #error This file should not be compiled without CONFIG_SYSCTL defined |
17 | #endif | 18 | #endif |
18 | 19 | ||
19 | extern int dccp_feat_default_sequence_window; | ||
20 | extern int dccp_feat_default_rx_ccid; | ||
21 | extern int dccp_feat_default_tx_ccid; | ||
22 | extern int dccp_feat_default_ack_ratio; | ||
23 | extern int dccp_feat_default_send_ack_vector; | ||
24 | extern int dccp_feat_default_send_ndp_count; | ||
25 | |||
26 | static struct ctl_table dccp_default_table[] = { | 20 | static struct ctl_table dccp_default_table[] = { |
27 | { | 21 | { |
28 | .ctl_name = NET_DCCP_DEFAULT_SEQ_WINDOW, | 22 | .ctl_name = NET_DCCP_DEFAULT_SEQ_WINDOW, |
diff --git a/net/decnet/Kconfig b/net/decnet/Kconfig index 92f2ec46fd22..36e72cb145b0 100644 --- a/net/decnet/Kconfig +++ b/net/decnet/Kconfig | |||
@@ -27,6 +27,7 @@ config DECNET | |||
27 | config DECNET_ROUTER | 27 | config DECNET_ROUTER |
28 | bool "DECnet: router support (EXPERIMENTAL)" | 28 | bool "DECnet: router support (EXPERIMENTAL)" |
29 | depends on DECNET && EXPERIMENTAL | 29 | depends on DECNET && EXPERIMENTAL |
30 | select FIB_RULES | ||
30 | ---help--- | 31 | ---help--- |
31 | Add support for turning your DECnet Endnode into a level 1 or 2 | 32 | Add support for turning your DECnet Endnode into a level 1 or 2 |
32 | router. This is an experimental, but functional option. If you | 33 | router. This is an experimental, but functional option. If you |
diff --git a/net/decnet/af_decnet.c b/net/decnet/af_decnet.c index 5486247735f6..70e027375682 100644 --- a/net/decnet/af_decnet.c +++ b/net/decnet/af_decnet.c | |||
@@ -130,6 +130,7 @@ Version 0.0.6 2.1.110 07-aug-98 Eduardo Marcelo Serrat | |||
130 | #include <linux/poll.h> | 130 | #include <linux/poll.h> |
131 | #include <net/neighbour.h> | 131 | #include <net/neighbour.h> |
132 | #include <net/dst.h> | 132 | #include <net/dst.h> |
133 | #include <net/fib_rules.h> | ||
133 | #include <net/dn.h> | 134 | #include <net/dn.h> |
134 | #include <net/dn_nsp.h> | 135 | #include <net/dn_nsp.h> |
135 | #include <net/dn_dev.h> | 136 | #include <net/dn_dev.h> |
diff --git a/net/decnet/dn_dev.c b/net/decnet/dn_dev.c index 476455fbdb03..01861feb608d 100644 --- a/net/decnet/dn_dev.c +++ b/net/decnet/dn_dev.c | |||
@@ -34,6 +34,7 @@ | |||
34 | #include <linux/seq_file.h> | 34 | #include <linux/seq_file.h> |
35 | #include <linux/timer.h> | 35 | #include <linux/timer.h> |
36 | #include <linux/string.h> | 36 | #include <linux/string.h> |
37 | #include <linux/if_addr.h> | ||
37 | #include <linux/if_arp.h> | 38 | #include <linux/if_arp.h> |
38 | #include <linux/if_ether.h> | 39 | #include <linux/if_ether.h> |
39 | #include <linux/skbuff.h> | 40 | #include <linux/skbuff.h> |
@@ -45,6 +46,7 @@ | |||
45 | #include <net/neighbour.h> | 46 | #include <net/neighbour.h> |
46 | #include <net/dst.h> | 47 | #include <net/dst.h> |
47 | #include <net/flow.h> | 48 | #include <net/flow.h> |
49 | #include <net/fib_rules.h> | ||
48 | #include <net/dn.h> | 50 | #include <net/dn.h> |
49 | #include <net/dn_dev.h> | 51 | #include <net/dn_dev.h> |
50 | #include <net/dn_route.h> | 52 | #include <net/dn_route.h> |
@@ -744,20 +746,23 @@ rtattr_failure: | |||
744 | static void rtmsg_ifa(int event, struct dn_ifaddr *ifa) | 746 | static void rtmsg_ifa(int event, struct dn_ifaddr *ifa) |
745 | { | 747 | { |
746 | struct sk_buff *skb; | 748 | struct sk_buff *skb; |
747 | int size = NLMSG_SPACE(sizeof(struct ifaddrmsg)+128); | 749 | int payload = sizeof(struct ifaddrmsg) + 128; |
750 | int err = -ENOBUFS; | ||
748 | 751 | ||
749 | skb = alloc_skb(size, GFP_KERNEL); | 752 | skb = alloc_skb(nlmsg_total_size(payload), GFP_KERNEL); |
750 | if (!skb) { | 753 | if (skb == NULL) |
751 | netlink_set_err(rtnl, 0, RTNLGRP_DECnet_IFADDR, ENOBUFS); | 754 | goto errout; |
752 | return; | 755 | |
753 | } | 756 | err = dn_dev_fill_ifaddr(skb, ifa, 0, 0, event, 0); |
754 | if (dn_dev_fill_ifaddr(skb, ifa, 0, 0, event, 0) < 0) { | 757 | if (err < 0) { |
755 | kfree_skb(skb); | 758 | kfree_skb(skb); |
756 | netlink_set_err(rtnl, 0, RTNLGRP_DECnet_IFADDR, EINVAL); | 759 | goto errout; |
757 | return; | ||
758 | } | 760 | } |
759 | NETLINK_CB(skb).dst_group = RTNLGRP_DECnet_IFADDR; | 761 | |
760 | netlink_broadcast(rtnl, skb, 0, RTNLGRP_DECnet_IFADDR, GFP_KERNEL); | 762 | err = rtnl_notify(skb, 0, RTNLGRP_DECnet_IFADDR, NULL, GFP_KERNEL); |
763 | errout: | ||
764 | if (err < 0) | ||
765 | rtnl_set_sk_err(RTNLGRP_DECnet_IFADDR, err); | ||
761 | } | 766 | } |
762 | 767 | ||
763 | static int dn_dev_dump_ifaddr(struct sk_buff *skb, struct netlink_callback *cb) | 768 | static int dn_dev_dump_ifaddr(struct sk_buff *skb, struct netlink_callback *cb) |
@@ -1417,8 +1422,6 @@ static struct rtnetlink_link dnet_rtnetlink_table[RTM_NR_MSGTYPES] = | |||
1417 | [RTM_DELROUTE - RTM_BASE] = { .doit = dn_fib_rtm_delroute, }, | 1422 | [RTM_DELROUTE - RTM_BASE] = { .doit = dn_fib_rtm_delroute, }, |
1418 | [RTM_GETROUTE - RTM_BASE] = { .doit = dn_cache_getroute, | 1423 | [RTM_GETROUTE - RTM_BASE] = { .doit = dn_cache_getroute, |
1419 | .dumpit = dn_fib_dump, }, | 1424 | .dumpit = dn_fib_dump, }, |
1420 | [RTM_NEWRULE - RTM_BASE] = { .doit = dn_fib_rtm_newrule, }, | ||
1421 | [RTM_DELRULE - RTM_BASE] = { .doit = dn_fib_rtm_delrule, }, | ||
1422 | [RTM_GETRULE - RTM_BASE] = { .dumpit = dn_fib_dump_rules, }, | 1425 | [RTM_GETRULE - RTM_BASE] = { .dumpit = dn_fib_dump_rules, }, |
1423 | #else | 1426 | #else |
1424 | [RTM_GETROUTE - RTM_BASE] = { .doit = dn_cache_getroute, | 1427 | [RTM_GETROUTE - RTM_BASE] = { .doit = dn_cache_getroute, |
diff --git a/net/decnet/dn_fib.c b/net/decnet/dn_fib.c index fa20e2efcfc1..1cf010124ec5 100644 --- a/net/decnet/dn_fib.c +++ b/net/decnet/dn_fib.c | |||
@@ -34,6 +34,7 @@ | |||
34 | #include <net/neighbour.h> | 34 | #include <net/neighbour.h> |
35 | #include <net/dst.h> | 35 | #include <net/dst.h> |
36 | #include <net/flow.h> | 36 | #include <net/flow.h> |
37 | #include <net/fib_rules.h> | ||
37 | #include <net/dn.h> | 38 | #include <net/dn.h> |
38 | #include <net/dn_route.h> | 39 | #include <net/dn_route.h> |
39 | #include <net/dn_fib.h> | 40 | #include <net/dn_fib.h> |
@@ -54,11 +55,9 @@ | |||
54 | 55 | ||
55 | #define endfor_nexthops(fi) } | 56 | #define endfor_nexthops(fi) } |
56 | 57 | ||
57 | extern int dn_cache_dump(struct sk_buff *skb, struct netlink_callback *cb); | ||
58 | |||
59 | static DEFINE_SPINLOCK(dn_fib_multipath_lock); | 58 | static DEFINE_SPINLOCK(dn_fib_multipath_lock); |
60 | static struct dn_fib_info *dn_fib_info_list; | 59 | static struct dn_fib_info *dn_fib_info_list; |
61 | static DEFINE_RWLOCK(dn_fib_info_lock); | 60 | static DEFINE_SPINLOCK(dn_fib_info_lock); |
62 | 61 | ||
63 | static struct | 62 | static struct |
64 | { | 63 | { |
@@ -79,6 +78,9 @@ static struct | |||
79 | [RTN_XRESOLVE] = { .error = -EINVAL, .scope = RT_SCOPE_NOWHERE }, | 78 | [RTN_XRESOLVE] = { .error = -EINVAL, .scope = RT_SCOPE_NOWHERE }, |
80 | }; | 79 | }; |
81 | 80 | ||
81 | static int dn_fib_sync_down(__le16 local, struct net_device *dev, int force); | ||
82 | static int dn_fib_sync_up(struct net_device *dev); | ||
83 | |||
82 | void dn_fib_free_info(struct dn_fib_info *fi) | 84 | void dn_fib_free_info(struct dn_fib_info *fi) |
83 | { | 85 | { |
84 | if (fi->fib_dead == 0) { | 86 | if (fi->fib_dead == 0) { |
@@ -96,7 +98,7 @@ void dn_fib_free_info(struct dn_fib_info *fi) | |||
96 | 98 | ||
97 | void dn_fib_release_info(struct dn_fib_info *fi) | 99 | void dn_fib_release_info(struct dn_fib_info *fi) |
98 | { | 100 | { |
99 | write_lock(&dn_fib_info_lock); | 101 | spin_lock(&dn_fib_info_lock); |
100 | if (fi && --fi->fib_treeref == 0) { | 102 | if (fi && --fi->fib_treeref == 0) { |
101 | if (fi->fib_next) | 103 | if (fi->fib_next) |
102 | fi->fib_next->fib_prev = fi->fib_prev; | 104 | fi->fib_next->fib_prev = fi->fib_prev; |
@@ -107,7 +109,7 @@ void dn_fib_release_info(struct dn_fib_info *fi) | |||
107 | fi->fib_dead = 1; | 109 | fi->fib_dead = 1; |
108 | dn_fib_info_put(fi); | 110 | dn_fib_info_put(fi); |
109 | } | 111 | } |
110 | write_unlock(&dn_fib_info_lock); | 112 | spin_unlock(&dn_fib_info_lock); |
111 | } | 113 | } |
112 | 114 | ||
113 | static inline int dn_fib_nh_comp(const struct dn_fib_info *fi, const struct dn_fib_info *ofi) | 115 | static inline int dn_fib_nh_comp(const struct dn_fib_info *fi, const struct dn_fib_info *ofi) |
@@ -378,13 +380,13 @@ link_it: | |||
378 | 380 | ||
379 | fi->fib_treeref++; | 381 | fi->fib_treeref++; |
380 | atomic_inc(&fi->fib_clntref); | 382 | atomic_inc(&fi->fib_clntref); |
381 | write_lock(&dn_fib_info_lock); | 383 | spin_lock(&dn_fib_info_lock); |
382 | fi->fib_next = dn_fib_info_list; | 384 | fi->fib_next = dn_fib_info_list; |
383 | fi->fib_prev = NULL; | 385 | fi->fib_prev = NULL; |
384 | if (dn_fib_info_list) | 386 | if (dn_fib_info_list) |
385 | dn_fib_info_list->fib_prev = fi; | 387 | dn_fib_info_list->fib_prev = fi; |
386 | dn_fib_info_list = fi; | 388 | dn_fib_info_list = fi; |
387 | write_unlock(&dn_fib_info_lock); | 389 | spin_unlock(&dn_fib_info_lock); |
388 | return fi; | 390 | return fi; |
389 | 391 | ||
390 | err_inval: | 392 | err_inval: |
@@ -490,7 +492,8 @@ static int dn_fib_check_attr(struct rtmsg *r, struct rtattr **rta) | |||
490 | if (attr) { | 492 | if (attr) { |
491 | if (RTA_PAYLOAD(attr) < 4 && RTA_PAYLOAD(attr) != 2) | 493 | if (RTA_PAYLOAD(attr) < 4 && RTA_PAYLOAD(attr) != 2) |
492 | return -EINVAL; | 494 | return -EINVAL; |
493 | if (i != RTA_MULTIPATH && i != RTA_METRICS) | 495 | if (i != RTA_MULTIPATH && i != RTA_METRICS && |
496 | i != RTA_TABLE) | ||
494 | rta[i-1] = (struct rtattr *)RTA_DATA(attr); | 497 | rta[i-1] = (struct rtattr *)RTA_DATA(attr); |
495 | } | 498 | } |
496 | } | 499 | } |
@@ -507,7 +510,7 @@ int dn_fib_rtm_delroute(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) | |||
507 | if (dn_fib_check_attr(r, rta)) | 510 | if (dn_fib_check_attr(r, rta)) |
508 | return -EINVAL; | 511 | return -EINVAL; |
509 | 512 | ||
510 | tb = dn_fib_get_table(r->rtm_table, 0); | 513 | tb = dn_fib_get_table(rtm_get_table(rta, r->rtm_table), 0); |
511 | if (tb) | 514 | if (tb) |
512 | return tb->delete(tb, r, (struct dn_kern_rta *)rta, nlh, &NETLINK_CB(skb)); | 515 | return tb->delete(tb, r, (struct dn_kern_rta *)rta, nlh, &NETLINK_CB(skb)); |
513 | 516 | ||
@@ -523,46 +526,13 @@ int dn_fib_rtm_newroute(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) | |||
523 | if (dn_fib_check_attr(r, rta)) | 526 | if (dn_fib_check_attr(r, rta)) |
524 | return -EINVAL; | 527 | return -EINVAL; |
525 | 528 | ||
526 | tb = dn_fib_get_table(r->rtm_table, 1); | 529 | tb = dn_fib_get_table(rtm_get_table(rta, r->rtm_table), 1); |
527 | if (tb) | 530 | if (tb) |
528 | return tb->insert(tb, r, (struct dn_kern_rta *)rta, nlh, &NETLINK_CB(skb)); | 531 | return tb->insert(tb, r, (struct dn_kern_rta *)rta, nlh, &NETLINK_CB(skb)); |
529 | 532 | ||
530 | return -ENOBUFS; | 533 | return -ENOBUFS; |
531 | } | 534 | } |
532 | 535 | ||
533 | |||
534 | int dn_fib_dump(struct sk_buff *skb, struct netlink_callback *cb) | ||
535 | { | ||
536 | int t; | ||
537 | int s_t; | ||
538 | struct dn_fib_table *tb; | ||
539 | |||
540 | if (NLMSG_PAYLOAD(cb->nlh, 0) >= sizeof(struct rtmsg) && | ||
541 | ((struct rtmsg *)NLMSG_DATA(cb->nlh))->rtm_flags&RTM_F_CLONED) | ||
542 | return dn_cache_dump(skb, cb); | ||
543 | |||
544 | s_t = cb->args[0]; | ||
545 | if (s_t == 0) | ||
546 | s_t = cb->args[0] = RT_MIN_TABLE; | ||
547 | |||
548 | for(t = s_t; t <= RT_TABLE_MAX; t++) { | ||
549 | if (t < s_t) | ||
550 | continue; | ||
551 | if (t > s_t) | ||
552 | memset(&cb->args[1], 0, | ||
553 | sizeof(cb->args) - sizeof(cb->args[0])); | ||
554 | tb = dn_fib_get_table(t, 0); | ||
555 | if (tb == NULL) | ||
556 | continue; | ||
557 | if (tb->dump(tb, skb, cb) < 0) | ||
558 | break; | ||
559 | } | ||
560 | |||
561 | cb->args[0] = t; | ||
562 | |||
563 | return skb->len; | ||
564 | } | ||
565 | |||
566 | static void fib_magic(int cmd, int type, __le16 dst, int dst_len, struct dn_ifaddr *ifa) | 536 | static void fib_magic(int cmd, int type, __le16 dst, int dst_len, struct dn_ifaddr *ifa) |
567 | { | 537 | { |
568 | struct dn_fib_table *tb; | 538 | struct dn_fib_table *tb; |
@@ -682,7 +652,7 @@ static int dn_fib_dnaddr_event(struct notifier_block *this, unsigned long event, | |||
682 | return NOTIFY_DONE; | 652 | return NOTIFY_DONE; |
683 | } | 653 | } |
684 | 654 | ||
685 | int dn_fib_sync_down(__le16 local, struct net_device *dev, int force) | 655 | static int dn_fib_sync_down(__le16 local, struct net_device *dev, int force) |
686 | { | 656 | { |
687 | int ret = 0; | 657 | int ret = 0; |
688 | int scope = RT_SCOPE_NOWHERE; | 658 | int scope = RT_SCOPE_NOWHERE; |
@@ -726,7 +696,7 @@ int dn_fib_sync_down(__le16 local, struct net_device *dev, int force) | |||
726 | } | 696 | } |
727 | 697 | ||
728 | 698 | ||
729 | int dn_fib_sync_up(struct net_device *dev) | 699 | static int dn_fib_sync_up(struct net_device *dev) |
730 | { | 700 | { |
731 | int ret = 0; | 701 | int ret = 0; |
732 | 702 | ||
@@ -760,22 +730,6 @@ int dn_fib_sync_up(struct net_device *dev) | |||
760 | return ret; | 730 | return ret; |
761 | } | 731 | } |
762 | 732 | ||
763 | void dn_fib_flush(void) | ||
764 | { | ||
765 | int flushed = 0; | ||
766 | struct dn_fib_table *tb; | ||
767 | int id; | ||
768 | |||
769 | for(id = RT_TABLE_MAX; id > 0; id--) { | ||
770 | if ((tb = dn_fib_get_table(id, 0)) == NULL) | ||
771 | continue; | ||
772 | flushed += tb->flush(tb); | ||
773 | } | ||
774 | |||
775 | if (flushed) | ||
776 | dn_rt_cache_flush(-1); | ||
777 | } | ||
778 | |||
779 | static struct notifier_block dn_fib_dnaddr_notifier = { | 733 | static struct notifier_block dn_fib_dnaddr_notifier = { |
780 | .notifier_call = dn_fib_dnaddr_event, | 734 | .notifier_call = dn_fib_dnaddr_event, |
781 | }; | 735 | }; |
diff --git a/net/decnet/dn_nsp_in.c b/net/decnet/dn_nsp_in.c index 86f7f3b28e70..72ecc6e62ec4 100644 --- a/net/decnet/dn_nsp_in.c +++ b/net/decnet/dn_nsp_in.c | |||
@@ -586,7 +586,7 @@ static __inline__ int dn_queue_skb(struct sock *sk, struct sk_buff *skb, int sig | |||
586 | goto out; | 586 | goto out; |
587 | } | 587 | } |
588 | 588 | ||
589 | err = sk_filter(sk, skb, 0); | 589 | err = sk_filter(sk, skb); |
590 | if (err) | 590 | if (err) |
591 | goto out; | 591 | goto out; |
592 | 592 | ||
diff --git a/net/decnet/dn_route.c b/net/decnet/dn_route.c index 743e9fcf7c5a..dd0761e3d280 100644 --- a/net/decnet/dn_route.c +++ b/net/decnet/dn_route.c | |||
@@ -80,6 +80,7 @@ | |||
80 | #include <net/neighbour.h> | 80 | #include <net/neighbour.h> |
81 | #include <net/dst.h> | 81 | #include <net/dst.h> |
82 | #include <net/flow.h> | 82 | #include <net/flow.h> |
83 | #include <net/fib_rules.h> | ||
83 | #include <net/dn.h> | 84 | #include <net/dn.h> |
84 | #include <net/dn_dev.h> | 85 | #include <net/dn_dev.h> |
85 | #include <net/dn_nsp.h> | 86 | #include <net/dn_nsp.h> |
@@ -1284,7 +1285,7 @@ static int dn_route_input_slow(struct sk_buff *skb) | |||
1284 | dev_hold(out_dev); | 1285 | dev_hold(out_dev); |
1285 | 1286 | ||
1286 | if (res.r) | 1287 | if (res.r) |
1287 | src_map = dn_fib_rules_policy(fl.fld_src, &res, &flags); | 1288 | src_map = fl.fld_src; /* no NAT support for now */ |
1288 | 1289 | ||
1289 | gateway = DN_FIB_RES_GW(res); | 1290 | gateway = DN_FIB_RES_GW(res); |
1290 | if (res.type == RTN_NAT) { | 1291 | if (res.type == RTN_NAT) { |
@@ -1485,6 +1486,7 @@ static int dn_rt_fill_info(struct sk_buff *skb, u32 pid, u32 seq, | |||
1485 | r->rtm_src_len = 0; | 1486 | r->rtm_src_len = 0; |
1486 | r->rtm_tos = 0; | 1487 | r->rtm_tos = 0; |
1487 | r->rtm_table = RT_TABLE_MAIN; | 1488 | r->rtm_table = RT_TABLE_MAIN; |
1489 | RTA_PUT_U32(skb, RTA_TABLE, RT_TABLE_MAIN); | ||
1488 | r->rtm_type = rt->rt_type; | 1490 | r->rtm_type = rt->rt_type; |
1489 | r->rtm_flags = (rt->rt_flags & ~0xFFFF) | RTM_F_CLONED; | 1491 | r->rtm_flags = (rt->rt_flags & ~0xFFFF) | RTM_F_CLONED; |
1490 | r->rtm_scope = RT_SCOPE_UNIVERSE; | 1492 | r->rtm_scope = RT_SCOPE_UNIVERSE; |
@@ -1609,9 +1611,7 @@ int dn_cache_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh, void *arg) | |||
1609 | goto out_free; | 1611 | goto out_free; |
1610 | } | 1612 | } |
1611 | 1613 | ||
1612 | err = netlink_unicast(rtnl, skb, NETLINK_CB(in_skb).pid, MSG_DONTWAIT); | 1614 | return rtnl_unicast(skb, NETLINK_CB(in_skb).pid); |
1613 | |||
1614 | return err; | ||
1615 | 1615 | ||
1616 | out_free: | 1616 | out_free: |
1617 | kfree_skb(skb); | 1617 | kfree_skb(skb); |
@@ -1781,14 +1781,9 @@ void __init dn_route_init(void) | |||
1781 | { | 1781 | { |
1782 | int i, goal, order; | 1782 | int i, goal, order; |
1783 | 1783 | ||
1784 | dn_dst_ops.kmem_cachep = kmem_cache_create("dn_dst_cache", | 1784 | dn_dst_ops.kmem_cachep = |
1785 | sizeof(struct dn_route), | 1785 | kmem_cache_create("dn_dst_cache", sizeof(struct dn_route), 0, |
1786 | 0, SLAB_HWCACHE_ALIGN, | 1786 | SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL, NULL); |
1787 | NULL, NULL); | ||
1788 | |||
1789 | if (!dn_dst_ops.kmem_cachep) | ||
1790 | panic("DECnet: Failed to allocate dn_dst_cache\n"); | ||
1791 | |||
1792 | init_timer(&dn_route_timer); | 1787 | init_timer(&dn_route_timer); |
1793 | dn_route_timer.function = dn_dst_check_expire; | 1788 | dn_route_timer.function = dn_dst_check_expire; |
1794 | dn_route_timer.expires = jiffies + decnet_dst_gc_interval * HZ; | 1789 | dn_route_timer.expires = jiffies + decnet_dst_gc_interval * HZ; |
diff --git a/net/decnet/dn_rules.c b/net/decnet/dn_rules.c index 6986be754ef2..3e0c882c90bf 100644 --- a/net/decnet/dn_rules.c +++ b/net/decnet/dn_rules.c | |||
@@ -11,259 +11,213 @@ | |||
11 | * | 11 | * |
12 | * | 12 | * |
13 | * Changes: | 13 | * Changes: |
14 | * Steve Whitehouse <steve@chygwyn.com> | ||
15 | * Updated for Thomas Graf's generic rules | ||
14 | * | 16 | * |
15 | */ | 17 | */ |
16 | #include <linux/string.h> | ||
17 | #include <linux/net.h> | 18 | #include <linux/net.h> |
18 | #include <linux/socket.h> | ||
19 | #include <linux/sockios.h> | ||
20 | #include <linux/init.h> | 19 | #include <linux/init.h> |
21 | #include <linux/skbuff.h> | ||
22 | #include <linux/netlink.h> | 20 | #include <linux/netlink.h> |
23 | #include <linux/rtnetlink.h> | 21 | #include <linux/rtnetlink.h> |
24 | #include <linux/proc_fs.h> | ||
25 | #include <linux/netdevice.h> | 22 | #include <linux/netdevice.h> |
26 | #include <linux/timer.h> | ||
27 | #include <linux/spinlock.h> | 23 | #include <linux/spinlock.h> |
28 | #include <linux/in_route.h> | ||
29 | #include <linux/list.h> | 24 | #include <linux/list.h> |
30 | #include <linux/rcupdate.h> | 25 | #include <linux/rcupdate.h> |
31 | #include <asm/atomic.h> | ||
32 | #include <asm/uaccess.h> | ||
33 | #include <net/neighbour.h> | 26 | #include <net/neighbour.h> |
34 | #include <net/dst.h> | 27 | #include <net/dst.h> |
35 | #include <net/flow.h> | 28 | #include <net/flow.h> |
29 | #include <net/fib_rules.h> | ||
36 | #include <net/dn.h> | 30 | #include <net/dn.h> |
37 | #include <net/dn_fib.h> | 31 | #include <net/dn_fib.h> |
38 | #include <net/dn_neigh.h> | 32 | #include <net/dn_neigh.h> |
39 | #include <net/dn_dev.h> | 33 | #include <net/dn_dev.h> |
40 | 34 | ||
35 | static struct fib_rules_ops dn_fib_rules_ops; | ||
36 | |||
41 | struct dn_fib_rule | 37 | struct dn_fib_rule |
42 | { | 38 | { |
43 | struct hlist_node r_hlist; | 39 | struct fib_rule common; |
44 | atomic_t r_clntref; | 40 | unsigned char dst_len; |
45 | u32 r_preference; | 41 | unsigned char src_len; |
46 | unsigned char r_table; | 42 | __le16 src; |
47 | unsigned char r_action; | 43 | __le16 srcmask; |
48 | unsigned char r_dst_len; | 44 | __le16 dst; |
49 | unsigned char r_src_len; | 45 | __le16 dstmask; |
50 | __le16 r_src; | 46 | __le16 srcmap; |
51 | __le16 r_srcmask; | 47 | u8 flags; |
52 | __le16 r_dst; | ||
53 | __le16 r_dstmask; | ||
54 | __le16 r_srcmap; | ||
55 | u8 r_flags; | ||
56 | #ifdef CONFIG_DECNET_ROUTE_FWMARK | 48 | #ifdef CONFIG_DECNET_ROUTE_FWMARK |
57 | u32 r_fwmark; | 49 | u32 fwmark; |
50 | u32 fwmask; | ||
58 | #endif | 51 | #endif |
59 | int r_ifindex; | ||
60 | char r_ifname[IFNAMSIZ]; | ||
61 | int r_dead; | ||
62 | struct rcu_head rcu; | ||
63 | }; | 52 | }; |
64 | 53 | ||
65 | static struct dn_fib_rule default_rule = { | 54 | static struct dn_fib_rule default_rule = { |
66 | .r_clntref = ATOMIC_INIT(2), | 55 | .common = { |
67 | .r_preference = 0x7fff, | 56 | .refcnt = ATOMIC_INIT(2), |
68 | .r_table = RT_TABLE_MAIN, | 57 | .pref = 0x7fff, |
69 | .r_action = RTN_UNICAST | 58 | .table = RT_TABLE_MAIN, |
59 | .action = FR_ACT_TO_TBL, | ||
60 | }, | ||
70 | }; | 61 | }; |
71 | 62 | ||
72 | static struct hlist_head dn_fib_rules; | 63 | static LIST_HEAD(dn_fib_rules); |
64 | |||
73 | 65 | ||
74 | int dn_fib_rtm_delrule(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) | 66 | int dn_fib_lookup(struct flowi *flp, struct dn_fib_res *res) |
75 | { | 67 | { |
76 | struct rtattr **rta = arg; | 68 | struct fib_lookup_arg arg = { |
77 | struct rtmsg *rtm = NLMSG_DATA(nlh); | 69 | .result = res, |
78 | struct dn_fib_rule *r; | 70 | }; |
79 | struct hlist_node *node; | 71 | int err; |
80 | int err = -ESRCH; | 72 | |
81 | 73 | err = fib_rules_lookup(&dn_fib_rules_ops, flp, 0, &arg); | |
82 | hlist_for_each_entry(r, node, &dn_fib_rules, r_hlist) { | 74 | res->r = arg.rule; |
83 | if ((!rta[RTA_SRC-1] || memcmp(RTA_DATA(rta[RTA_SRC-1]), &r->r_src, 2) == 0) && | ||
84 | rtm->rtm_src_len == r->r_src_len && | ||
85 | rtm->rtm_dst_len == r->r_dst_len && | ||
86 | (!rta[RTA_DST-1] || memcmp(RTA_DATA(rta[RTA_DST-1]), &r->r_dst, 2) == 0) && | ||
87 | #ifdef CONFIG_DECNET_ROUTE_FWMARK | ||
88 | (!rta[RTA_PROTOINFO-1] || memcmp(RTA_DATA(rta[RTA_PROTOINFO-1]), &r->r_fwmark, 4) == 0) && | ||
89 | #endif | ||
90 | (!rtm->rtm_type || rtm->rtm_type == r->r_action) && | ||
91 | (!rta[RTA_PRIORITY-1] || memcmp(RTA_DATA(rta[RTA_PRIORITY-1]), &r->r_preference, 4) == 0) && | ||
92 | (!rta[RTA_IIF-1] || rtattr_strcmp(rta[RTA_IIF-1], r->r_ifname) == 0) && | ||
93 | (!rtm->rtm_table || (r && rtm->rtm_table == r->r_table))) { | ||
94 | |||
95 | err = -EPERM; | ||
96 | if (r == &default_rule) | ||
97 | break; | ||
98 | |||
99 | hlist_del_rcu(&r->r_hlist); | ||
100 | r->r_dead = 1; | ||
101 | dn_fib_rule_put(r); | ||
102 | err = 0; | ||
103 | break; | ||
104 | } | ||
105 | } | ||
106 | 75 | ||
107 | return err; | 76 | return err; |
108 | } | 77 | } |
109 | 78 | ||
110 | static inline void dn_fib_rule_put_rcu(struct rcu_head *head) | 79 | static int dn_fib_rule_action(struct fib_rule *rule, struct flowi *flp, |
80 | int flags, struct fib_lookup_arg *arg) | ||
111 | { | 81 | { |
112 | struct dn_fib_rule *r = container_of(head, struct dn_fib_rule, rcu); | 82 | int err = -EAGAIN; |
113 | kfree(r); | 83 | struct dn_fib_table *tbl; |
114 | } | ||
115 | 84 | ||
116 | void dn_fib_rule_put(struct dn_fib_rule *r) | 85 | switch(rule->action) { |
117 | { | 86 | case FR_ACT_TO_TBL: |
118 | if (atomic_dec_and_test(&r->r_clntref)) { | 87 | break; |
119 | if (r->r_dead) | 88 | |
120 | call_rcu(&r->rcu, dn_fib_rule_put_rcu); | 89 | case FR_ACT_UNREACHABLE: |
121 | else | 90 | err = -ENETUNREACH; |
122 | printk(KERN_DEBUG "Attempt to free alive dn_fib_rule\n"); | 91 | goto errout; |
92 | |||
93 | case FR_ACT_PROHIBIT: | ||
94 | err = -EACCES; | ||
95 | goto errout; | ||
96 | |||
97 | case FR_ACT_BLACKHOLE: | ||
98 | default: | ||
99 | err = -EINVAL; | ||
100 | goto errout; | ||
123 | } | 101 | } |
102 | |||
103 | tbl = dn_fib_get_table(rule->table, 0); | ||
104 | if (tbl == NULL) | ||
105 | goto errout; | ||
106 | |||
107 | err = tbl->lookup(tbl, flp, (struct dn_fib_res *)arg->result); | ||
108 | if (err > 0) | ||
109 | err = -EAGAIN; | ||
110 | errout: | ||
111 | return err; | ||
124 | } | 112 | } |
125 | 113 | ||
114 | static struct nla_policy dn_fib_rule_policy[FRA_MAX+1] __read_mostly = { | ||
115 | [FRA_IFNAME] = { .type = NLA_STRING, .len = IFNAMSIZ - 1 }, | ||
116 | [FRA_PRIORITY] = { .type = NLA_U32 }, | ||
117 | [FRA_SRC] = { .type = NLA_U16 }, | ||
118 | [FRA_DST] = { .type = NLA_U16 }, | ||
119 | [FRA_FWMARK] = { .type = NLA_U32 }, | ||
120 | [FRA_FWMASK] = { .type = NLA_U32 }, | ||
121 | [FRA_TABLE] = { .type = NLA_U32 }, | ||
122 | }; | ||
126 | 123 | ||
127 | int dn_fib_rtm_newrule(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) | 124 | static int dn_fib_rule_match(struct fib_rule *rule, struct flowi *fl, int flags) |
128 | { | 125 | { |
129 | struct rtattr **rta = arg; | 126 | struct dn_fib_rule *r = (struct dn_fib_rule *)rule; |
130 | struct rtmsg *rtm = NLMSG_DATA(nlh); | 127 | u16 daddr = fl->fld_dst; |
131 | struct dn_fib_rule *r, *new_r, *last = NULL; | 128 | u16 saddr = fl->fld_src; |
132 | struct hlist_node *node = NULL; | 129 | |
133 | unsigned char table_id; | 130 | if (((saddr ^ r->src) & r->srcmask) || |
134 | 131 | ((daddr ^ r->dst) & r->dstmask)) | |
135 | if (rtm->rtm_src_len > 16 || rtm->rtm_dst_len > 16) | 132 | return 0; |
136 | return -EINVAL; | ||
137 | |||
138 | if (rta[RTA_IIF-1] && RTA_PAYLOAD(rta[RTA_IIF-1]) > IFNAMSIZ) | ||
139 | return -EINVAL; | ||
140 | |||
141 | if (rtm->rtm_type == RTN_NAT) | ||
142 | return -EINVAL; | ||
143 | |||
144 | table_id = rtm->rtm_table; | ||
145 | if (table_id == RT_TABLE_UNSPEC) { | ||
146 | struct dn_fib_table *tb; | ||
147 | if (rtm->rtm_type == RTN_UNICAST) { | ||
148 | if ((tb = dn_fib_empty_table()) == NULL) | ||
149 | return -ENOBUFS; | ||
150 | table_id = tb->n; | ||
151 | } | ||
152 | } | ||
153 | 133 | ||
154 | new_r = kzalloc(sizeof(*new_r), GFP_KERNEL); | ||
155 | if (!new_r) | ||
156 | return -ENOMEM; | ||
157 | |||
158 | if (rta[RTA_SRC-1]) | ||
159 | memcpy(&new_r->r_src, RTA_DATA(rta[RTA_SRC-1]), 2); | ||
160 | if (rta[RTA_DST-1]) | ||
161 | memcpy(&new_r->r_dst, RTA_DATA(rta[RTA_DST-1]), 2); | ||
162 | if (rta[RTA_GATEWAY-1]) | ||
163 | memcpy(&new_r->r_srcmap, RTA_DATA(rta[RTA_GATEWAY-1]), 2); | ||
164 | new_r->r_src_len = rtm->rtm_src_len; | ||
165 | new_r->r_dst_len = rtm->rtm_dst_len; | ||
166 | new_r->r_srcmask = dnet_make_mask(rtm->rtm_src_len); | ||
167 | new_r->r_dstmask = dnet_make_mask(rtm->rtm_dst_len); | ||
168 | #ifdef CONFIG_DECNET_ROUTE_FWMARK | 134 | #ifdef CONFIG_DECNET_ROUTE_FWMARK |
169 | if (rta[RTA_PROTOINFO-1]) | 135 | if ((r->fwmark ^ fl->fld_fwmark) & r->fwmask) |
170 | memcpy(&new_r->r_fwmark, RTA_DATA(rta[RTA_PROTOINFO-1]), 4); | 136 | return 0; |
171 | #endif | 137 | #endif |
172 | new_r->r_action = rtm->rtm_type; | ||
173 | new_r->r_flags = rtm->rtm_flags; | ||
174 | if (rta[RTA_PRIORITY-1]) | ||
175 | memcpy(&new_r->r_preference, RTA_DATA(rta[RTA_PRIORITY-1]), 4); | ||
176 | new_r->r_table = table_id; | ||
177 | if (rta[RTA_IIF-1]) { | ||
178 | struct net_device *dev; | ||
179 | rtattr_strlcpy(new_r->r_ifname, rta[RTA_IIF-1], IFNAMSIZ); | ||
180 | new_r->r_ifindex = -1; | ||
181 | dev = dev_get_by_name(new_r->r_ifname); | ||
182 | if (dev) { | ||
183 | new_r->r_ifindex = dev->ifindex; | ||
184 | dev_put(dev); | ||
185 | } | ||
186 | } | ||
187 | 138 | ||
188 | r = container_of(dn_fib_rules.first, struct dn_fib_rule, r_hlist); | 139 | return 1; |
189 | if (!new_r->r_preference) { | 140 | } |
190 | if (r && r->r_hlist.next != NULL) { | 141 | |
191 | r = container_of(r->r_hlist.next, struct dn_fib_rule, r_hlist); | 142 | static int dn_fib_rule_configure(struct fib_rule *rule, struct sk_buff *skb, |
192 | if (r->r_preference) | 143 | struct nlmsghdr *nlh, struct fib_rule_hdr *frh, |
193 | new_r->r_preference = r->r_preference - 1; | 144 | struct nlattr **tb) |
145 | { | ||
146 | int err = -EINVAL; | ||
147 | struct dn_fib_rule *r = (struct dn_fib_rule *)rule; | ||
148 | |||
149 | if (frh->src_len > 16 || frh->dst_len > 16 || frh->tos) | ||
150 | goto errout; | ||
151 | |||
152 | if (rule->table == RT_TABLE_UNSPEC) { | ||
153 | if (rule->action == FR_ACT_TO_TBL) { | ||
154 | struct dn_fib_table *table; | ||
155 | |||
156 | table = dn_fib_empty_table(); | ||
157 | if (table == NULL) { | ||
158 | err = -ENOBUFS; | ||
159 | goto errout; | ||
160 | } | ||
161 | |||
162 | rule->table = table->n; | ||
194 | } | 163 | } |
195 | } | 164 | } |
196 | 165 | ||
197 | hlist_for_each_entry(r, node, &dn_fib_rules, r_hlist) { | 166 | if (tb[FRA_SRC]) |
198 | if (r->r_preference > new_r->r_preference) | 167 | r->src = nla_get_u16(tb[FRA_SRC]); |
199 | break; | 168 | |
200 | last = r; | 169 | if (tb[FRA_DST]) |
170 | r->dst = nla_get_u16(tb[FRA_DST]); | ||
171 | |||
172 | #ifdef CONFIG_DECNET_ROUTE_FWMARK | ||
173 | if (tb[FRA_FWMARK]) { | ||
174 | r->fwmark = nla_get_u32(tb[FRA_FWMARK]); | ||
175 | if (r->fwmark) | ||
176 | /* compatibility: if the mark value is non-zero all bits | ||
177 | * are compared unless a mask is explicitly specified. | ||
178 | */ | ||
179 | r->fwmask = 0xFFFFFFFF; | ||
201 | } | 180 | } |
202 | atomic_inc(&new_r->r_clntref); | ||
203 | 181 | ||
204 | if (last) | 182 | if (tb[FRA_FWMASK]) |
205 | hlist_add_after_rcu(&last->r_hlist, &new_r->r_hlist); | 183 | r->fwmask = nla_get_u32(tb[FRA_FWMASK]); |
206 | else | 184 | #endif |
207 | hlist_add_before_rcu(&new_r->r_hlist, &r->r_hlist); | ||
208 | return 0; | ||
209 | } | ||
210 | 185 | ||
186 | r->src_len = frh->src_len; | ||
187 | r->srcmask = dnet_make_mask(r->src_len); | ||
188 | r->dst_len = frh->dst_len; | ||
189 | r->dstmask = dnet_make_mask(r->dst_len); | ||
190 | err = 0; | ||
191 | errout: | ||
192 | return err; | ||
193 | } | ||
211 | 194 | ||
212 | int dn_fib_lookup(const struct flowi *flp, struct dn_fib_res *res) | 195 | static int dn_fib_rule_compare(struct fib_rule *rule, struct fib_rule_hdr *frh, |
196 | struct nlattr **tb) | ||
213 | { | 197 | { |
214 | struct dn_fib_rule *r, *policy; | 198 | struct dn_fib_rule *r = (struct dn_fib_rule *)rule; |
215 | struct dn_fib_table *tb; | 199 | |
216 | __le16 saddr = flp->fld_src; | 200 | if (frh->src_len && (r->src_len != frh->src_len)) |
217 | __le16 daddr = flp->fld_dst; | 201 | return 0; |
218 | struct hlist_node *node; | ||
219 | int err; | ||
220 | 202 | ||
221 | rcu_read_lock(); | 203 | if (frh->dst_len && (r->dst_len != frh->dst_len)) |
204 | return 0; | ||
222 | 205 | ||
223 | hlist_for_each_entry_rcu(r, node, &dn_fib_rules, r_hlist) { | ||
224 | if (((saddr^r->r_src) & r->r_srcmask) || | ||
225 | ((daddr^r->r_dst) & r->r_dstmask) || | ||
226 | #ifdef CONFIG_DECNET_ROUTE_FWMARK | 206 | #ifdef CONFIG_DECNET_ROUTE_FWMARK |
227 | (r->r_fwmark && r->r_fwmark != flp->fld_fwmark) || | 207 | if (tb[FRA_FWMARK] && (r->fwmark != nla_get_u32(tb[FRA_FWMARK]))) |
208 | return 0; | ||
209 | |||
210 | if (tb[FRA_FWMASK] && (r->fwmask != nla_get_u32(tb[FRA_FWMASK]))) | ||
211 | return 0; | ||
228 | #endif | 212 | #endif |
229 | (r->r_ifindex && r->r_ifindex != flp->iif)) | ||
230 | continue; | ||
231 | |||
232 | switch(r->r_action) { | ||
233 | case RTN_UNICAST: | ||
234 | case RTN_NAT: | ||
235 | policy = r; | ||
236 | break; | ||
237 | case RTN_UNREACHABLE: | ||
238 | rcu_read_unlock(); | ||
239 | return -ENETUNREACH; | ||
240 | default: | ||
241 | case RTN_BLACKHOLE: | ||
242 | rcu_read_unlock(); | ||
243 | return -EINVAL; | ||
244 | case RTN_PROHIBIT: | ||
245 | rcu_read_unlock(); | ||
246 | return -EACCES; | ||
247 | } | ||
248 | 213 | ||
249 | if ((tb = dn_fib_get_table(r->r_table, 0)) == NULL) | 214 | if (tb[FRA_SRC] && (r->src != nla_get_u16(tb[FRA_SRC]))) |
250 | continue; | 215 | return 0; |
251 | err = tb->lookup(tb, flp, res); | 216 | |
252 | if (err == 0) { | 217 | if (tb[FRA_DST] && (r->dst != nla_get_u16(tb[FRA_DST]))) |
253 | res->r = policy; | 218 | return 0; |
254 | if (policy) | ||
255 | atomic_inc(&policy->r_clntref); | ||
256 | rcu_read_unlock(); | ||
257 | return 0; | ||
258 | } | ||
259 | if (err < 0 && err != -EAGAIN) { | ||
260 | rcu_read_unlock(); | ||
261 | return err; | ||
262 | } | ||
263 | } | ||
264 | 219 | ||
265 | rcu_read_unlock(); | 220 | return 1; |
266 | return -ESRCH; | ||
267 | } | 221 | } |
268 | 222 | ||
269 | unsigned dnet_addr_type(__le16 addr) | 223 | unsigned dnet_addr_type(__le16 addr) |
@@ -271,7 +225,7 @@ unsigned dnet_addr_type(__le16 addr) | |||
271 | struct flowi fl = { .nl_u = { .dn_u = { .daddr = addr } } }; | 225 | struct flowi fl = { .nl_u = { .dn_u = { .daddr = addr } } }; |
272 | struct dn_fib_res res; | 226 | struct dn_fib_res res; |
273 | unsigned ret = RTN_UNICAST; | 227 | unsigned ret = RTN_UNICAST; |
274 | struct dn_fib_table *tb = dn_fib_tables[RT_TABLE_LOCAL]; | 228 | struct dn_fib_table *tb = dn_fib_get_table(RT_TABLE_LOCAL, 0); |
275 | 229 | ||
276 | res.r = NULL; | 230 | res.r = NULL; |
277 | 231 | ||
@@ -284,142 +238,79 @@ unsigned dnet_addr_type(__le16 addr) | |||
284 | return ret; | 238 | return ret; |
285 | } | 239 | } |
286 | 240 | ||
287 | __le16 dn_fib_rules_policy(__le16 saddr, struct dn_fib_res *res, unsigned *flags) | 241 | static int dn_fib_rule_fill(struct fib_rule *rule, struct sk_buff *skb, |
242 | struct nlmsghdr *nlh, struct fib_rule_hdr *frh) | ||
288 | { | 243 | { |
289 | struct dn_fib_rule *r = res->r; | 244 | struct dn_fib_rule *r = (struct dn_fib_rule *)rule; |
290 | 245 | ||
291 | if (r->r_action == RTN_NAT) { | 246 | frh->family = AF_DECnet; |
292 | int addrtype = dnet_addr_type(r->r_srcmap); | 247 | frh->dst_len = r->dst_len; |
248 | frh->src_len = r->src_len; | ||
249 | frh->tos = 0; | ||
293 | 250 | ||
294 | if (addrtype == RTN_NAT) { | 251 | #ifdef CONFIG_DECNET_ROUTE_FWMARK |
295 | saddr = (saddr&~r->r_srcmask)|r->r_srcmap; | 252 | if (r->fwmark) |
296 | *flags |= RTCF_SNAT; | 253 | NLA_PUT_U32(skb, FRA_FWMARK, r->fwmark); |
297 | } else if (addrtype == RTN_LOCAL || r->r_srcmap == 0) { | 254 | if (r->fwmask || r->fwmark) |
298 | saddr = r->r_srcmap; | 255 | NLA_PUT_U32(skb, FRA_FWMASK, r->fwmask); |
299 | *flags |= RTCF_MASQ; | 256 | #endif |
300 | } | 257 | if (r->dst_len) |
301 | } | 258 | NLA_PUT_U16(skb, FRA_DST, r->dst); |
302 | return saddr; | 259 | if (r->src_len) |
303 | } | 260 | NLA_PUT_U16(skb, FRA_SRC, r->src); |
304 | |||
305 | static void dn_fib_rules_detach(struct net_device *dev) | ||
306 | { | ||
307 | struct hlist_node *node; | ||
308 | struct dn_fib_rule *r; | ||
309 | |||
310 | hlist_for_each_entry(r, node, &dn_fib_rules, r_hlist) { | ||
311 | if (r->r_ifindex == dev->ifindex) | ||
312 | r->r_ifindex = -1; | ||
313 | } | ||
314 | } | ||
315 | 261 | ||
316 | static void dn_fib_rules_attach(struct net_device *dev) | 262 | return 0; |
317 | { | ||
318 | struct hlist_node *node; | ||
319 | struct dn_fib_rule *r; | ||
320 | 263 | ||
321 | hlist_for_each_entry(r, node, &dn_fib_rules, r_hlist) { | 264 | nla_put_failure: |
322 | if (r->r_ifindex == -1 && strcmp(dev->name, r->r_ifname) == 0) | 265 | return -ENOBUFS; |
323 | r->r_ifindex = dev->ifindex; | ||
324 | } | ||
325 | } | 266 | } |
326 | 267 | ||
327 | static int dn_fib_rules_event(struct notifier_block *this, unsigned long event, void *ptr) | 268 | static u32 dn_fib_rule_default_pref(void) |
328 | { | 269 | { |
329 | struct net_device *dev = ptr; | 270 | struct list_head *pos; |
330 | 271 | struct fib_rule *rule; | |
331 | switch(event) { | 272 | |
332 | case NETDEV_UNREGISTER: | 273 | if (!list_empty(&dn_fib_rules)) { |
333 | dn_fib_rules_detach(dev); | 274 | pos = dn_fib_rules.next; |
334 | dn_fib_sync_down(0, dev, 1); | 275 | if (pos->next != &dn_fib_rules) { |
335 | case NETDEV_REGISTER: | 276 | rule = list_entry(pos->next, struct fib_rule, list); |
336 | dn_fib_rules_attach(dev); | 277 | if (rule->pref) |
337 | dn_fib_sync_up(dev); | 278 | return rule->pref - 1; |
279 | } | ||
338 | } | 280 | } |
339 | 281 | ||
340 | return NOTIFY_DONE; | 282 | return 0; |
341 | } | ||
342 | |||
343 | |||
344 | static struct notifier_block dn_fib_rules_notifier = { | ||
345 | .notifier_call = dn_fib_rules_event, | ||
346 | }; | ||
347 | |||
348 | static int dn_fib_fill_rule(struct sk_buff *skb, struct dn_fib_rule *r, | ||
349 | struct netlink_callback *cb, unsigned int flags) | ||
350 | { | ||
351 | struct rtmsg *rtm; | ||
352 | struct nlmsghdr *nlh; | ||
353 | unsigned char *b = skb->tail; | ||
354 | |||
355 | |||
356 | nlh = NLMSG_NEW_ANSWER(skb, cb, RTM_NEWRULE, sizeof(*rtm), flags); | ||
357 | rtm = NLMSG_DATA(nlh); | ||
358 | rtm->rtm_family = AF_DECnet; | ||
359 | rtm->rtm_dst_len = r->r_dst_len; | ||
360 | rtm->rtm_src_len = r->r_src_len; | ||
361 | rtm->rtm_tos = 0; | ||
362 | #ifdef CONFIG_DECNET_ROUTE_FWMARK | ||
363 | if (r->r_fwmark) | ||
364 | RTA_PUT(skb, RTA_PROTOINFO, 4, &r->r_fwmark); | ||
365 | #endif | ||
366 | rtm->rtm_table = r->r_table; | ||
367 | rtm->rtm_protocol = 0; | ||
368 | rtm->rtm_scope = 0; | ||
369 | rtm->rtm_type = r->r_action; | ||
370 | rtm->rtm_flags = r->r_flags; | ||
371 | |||
372 | if (r->r_dst_len) | ||
373 | RTA_PUT(skb, RTA_DST, 2, &r->r_dst); | ||
374 | if (r->r_src_len) | ||
375 | RTA_PUT(skb, RTA_SRC, 2, &r->r_src); | ||
376 | if (r->r_ifname[0]) | ||
377 | RTA_PUT(skb, RTA_IIF, IFNAMSIZ, &r->r_ifname); | ||
378 | if (r->r_preference) | ||
379 | RTA_PUT(skb, RTA_PRIORITY, 4, &r->r_preference); | ||
380 | if (r->r_srcmap) | ||
381 | RTA_PUT(skb, RTA_GATEWAY, 2, &r->r_srcmap); | ||
382 | nlh->nlmsg_len = skb->tail - b; | ||
383 | return skb->len; | ||
384 | |||
385 | nlmsg_failure: | ||
386 | rtattr_failure: | ||
387 | skb_trim(skb, b - skb->data); | ||
388 | return -1; | ||
389 | } | 283 | } |
390 | 284 | ||
391 | int dn_fib_dump_rules(struct sk_buff *skb, struct netlink_callback *cb) | 285 | int dn_fib_dump_rules(struct sk_buff *skb, struct netlink_callback *cb) |
392 | { | 286 | { |
393 | int idx = 0; | 287 | return fib_rules_dump(skb, cb, AF_DECnet); |
394 | int s_idx = cb->args[0]; | ||
395 | struct dn_fib_rule *r; | ||
396 | struct hlist_node *node; | ||
397 | |||
398 | rcu_read_lock(); | ||
399 | hlist_for_each_entry(r, node, &dn_fib_rules, r_hlist) { | ||
400 | if (idx < s_idx) | ||
401 | goto next; | ||
402 | if (dn_fib_fill_rule(skb, r, cb, NLM_F_MULTI) < 0) | ||
403 | break; | ||
404 | next: | ||
405 | idx++; | ||
406 | } | ||
407 | rcu_read_unlock(); | ||
408 | cb->args[0] = idx; | ||
409 | |||
410 | return skb->len; | ||
411 | } | 288 | } |
412 | 289 | ||
290 | static struct fib_rules_ops dn_fib_rules_ops = { | ||
291 | .family = AF_DECnet, | ||
292 | .rule_size = sizeof(struct dn_fib_rule), | ||
293 | .action = dn_fib_rule_action, | ||
294 | .match = dn_fib_rule_match, | ||
295 | .configure = dn_fib_rule_configure, | ||
296 | .compare = dn_fib_rule_compare, | ||
297 | .fill = dn_fib_rule_fill, | ||
298 | .default_pref = dn_fib_rule_default_pref, | ||
299 | .nlgroup = RTNLGRP_DECnet_RULE, | ||
300 | .policy = dn_fib_rule_policy, | ||
301 | .rules_list = &dn_fib_rules, | ||
302 | .owner = THIS_MODULE, | ||
303 | }; | ||
304 | |||
413 | void __init dn_fib_rules_init(void) | 305 | void __init dn_fib_rules_init(void) |
414 | { | 306 | { |
415 | INIT_HLIST_HEAD(&dn_fib_rules); | 307 | list_add_tail(&default_rule.common.list, &dn_fib_rules); |
416 | hlist_add_head(&default_rule.r_hlist, &dn_fib_rules); | 308 | fib_rules_register(&dn_fib_rules_ops); |
417 | register_netdevice_notifier(&dn_fib_rules_notifier); | ||
418 | } | 309 | } |
419 | 310 | ||
420 | void __exit dn_fib_rules_cleanup(void) | 311 | void __exit dn_fib_rules_cleanup(void) |
421 | { | 312 | { |
422 | unregister_netdevice_notifier(&dn_fib_rules_notifier); | 313 | fib_rules_unregister(&dn_fib_rules_ops); |
423 | } | 314 | } |
424 | 315 | ||
425 | 316 | ||
diff --git a/net/decnet/dn_table.c b/net/decnet/dn_table.c index e926c952e363..317904bb5896 100644 --- a/net/decnet/dn_table.c +++ b/net/decnet/dn_table.c | |||
@@ -30,6 +30,7 @@ | |||
30 | #include <net/neighbour.h> | 30 | #include <net/neighbour.h> |
31 | #include <net/dst.h> | 31 | #include <net/dst.h> |
32 | #include <net/flow.h> | 32 | #include <net/flow.h> |
33 | #include <net/fib_rules.h> | ||
33 | #include <net/dn.h> | 34 | #include <net/dn.h> |
34 | #include <net/dn_route.h> | 35 | #include <net/dn_route.h> |
35 | #include <net/dn_fib.h> | 36 | #include <net/dn_fib.h> |
@@ -74,9 +75,9 @@ for( ; ((f) = *(fp)) != NULL; (fp) = &(f)->fn_next) | |||
74 | for( ; ((f) = *(fp)) != NULL && dn_key_eq((f)->fn_key, (key)); (fp) = &(f)->fn_next) | 75 | for( ; ((f) = *(fp)) != NULL && dn_key_eq((f)->fn_key, (key)); (fp) = &(f)->fn_next) |
75 | 76 | ||
76 | #define RT_TABLE_MIN 1 | 77 | #define RT_TABLE_MIN 1 |
77 | 78 | #define DN_FIB_TABLE_HASHSZ 256 | |
79 | static struct hlist_head dn_fib_table_hash[DN_FIB_TABLE_HASHSZ]; | ||
78 | static DEFINE_RWLOCK(dn_fib_tables_lock); | 80 | static DEFINE_RWLOCK(dn_fib_tables_lock); |
79 | struct dn_fib_table *dn_fib_tables[RT_TABLE_MAX + 1]; | ||
80 | 81 | ||
81 | static kmem_cache_t *dn_hash_kmem __read_mostly; | 82 | static kmem_cache_t *dn_hash_kmem __read_mostly; |
82 | static int dn_fib_hash_zombies; | 83 | static int dn_fib_hash_zombies; |
@@ -263,7 +264,7 @@ static int dn_fib_nh_match(struct rtmsg *r, struct nlmsghdr *nlh, struct dn_kern | |||
263 | } | 264 | } |
264 | 265 | ||
265 | static int dn_fib_dump_info(struct sk_buff *skb, u32 pid, u32 seq, int event, | 266 | static int dn_fib_dump_info(struct sk_buff *skb, u32 pid, u32 seq, int event, |
266 | u8 tb_id, u8 type, u8 scope, void *dst, int dst_len, | 267 | u32 tb_id, u8 type, u8 scope, void *dst, int dst_len, |
267 | struct dn_fib_info *fi, unsigned int flags) | 268 | struct dn_fib_info *fi, unsigned int flags) |
268 | { | 269 | { |
269 | struct rtmsg *rtm; | 270 | struct rtmsg *rtm; |
@@ -277,6 +278,7 @@ static int dn_fib_dump_info(struct sk_buff *skb, u32 pid, u32 seq, int event, | |||
277 | rtm->rtm_src_len = 0; | 278 | rtm->rtm_src_len = 0; |
278 | rtm->rtm_tos = 0; | 279 | rtm->rtm_tos = 0; |
279 | rtm->rtm_table = tb_id; | 280 | rtm->rtm_table = tb_id; |
281 | RTA_PUT_U32(skb, RTA_TABLE, tb_id); | ||
280 | rtm->rtm_flags = fi->fib_flags; | 282 | rtm->rtm_flags = fi->fib_flags; |
281 | rtm->rtm_scope = scope; | 283 | rtm->rtm_scope = scope; |
282 | rtm->rtm_type = type; | 284 | rtm->rtm_type = type; |
@@ -326,29 +328,29 @@ rtattr_failure: | |||
326 | } | 328 | } |
327 | 329 | ||
328 | 330 | ||
329 | static void dn_rtmsg_fib(int event, struct dn_fib_node *f, int z, int tb_id, | 331 | static void dn_rtmsg_fib(int event, struct dn_fib_node *f, int z, u32 tb_id, |
330 | struct nlmsghdr *nlh, struct netlink_skb_parms *req) | 332 | struct nlmsghdr *nlh, struct netlink_skb_parms *req) |
331 | { | 333 | { |
332 | struct sk_buff *skb; | 334 | struct sk_buff *skb; |
333 | u32 pid = req ? req->pid : 0; | 335 | u32 pid = req ? req->pid : 0; |
334 | int size = NLMSG_SPACE(sizeof(struct rtmsg) + 256); | 336 | int err = -ENOBUFS; |
335 | 337 | ||
336 | skb = alloc_skb(size, GFP_KERNEL); | 338 | skb = nlmsg_new(NLMSG_GOODSIZE, GFP_KERNEL); |
337 | if (!skb) | 339 | if (skb == NULL) |
338 | return; | 340 | goto errout; |
339 | 341 | ||
340 | if (dn_fib_dump_info(skb, pid, nlh->nlmsg_seq, event, tb_id, | 342 | err = dn_fib_dump_info(skb, pid, nlh->nlmsg_seq, event, tb_id, |
341 | f->fn_type, f->fn_scope, &f->fn_key, z, | 343 | f->fn_type, f->fn_scope, &f->fn_key, z, |
342 | DN_FIB_INFO(f), 0) < 0) { | 344 | DN_FIB_INFO(f), 0); |
345 | if (err < 0) { | ||
343 | kfree_skb(skb); | 346 | kfree_skb(skb); |
344 | return; | 347 | goto errout; |
345 | } | 348 | } |
346 | NETLINK_CB(skb).dst_group = RTNLGRP_DECnet_ROUTE; | 349 | |
347 | if (nlh->nlmsg_flags & NLM_F_ECHO) | 350 | err = rtnl_notify(skb, pid, RTNLGRP_DECnet_ROUTE, nlh, GFP_KERNEL); |
348 | atomic_inc(&skb->users); | 351 | errout: |
349 | netlink_broadcast(rtnl, skb, pid, RTNLGRP_DECnet_ROUTE, GFP_KERNEL); | 352 | if (err < 0) |
350 | if (nlh->nlmsg_flags & NLM_F_ECHO) | 353 | rtnl_set_sk_err(RTNLGRP_DECnet_ROUTE, err); |
351 | netlink_unicast(rtnl, skb, pid, MSG_DONTWAIT); | ||
352 | } | 354 | } |
353 | 355 | ||
354 | static __inline__ int dn_hash_dump_bucket(struct sk_buff *skb, | 356 | static __inline__ int dn_hash_dump_bucket(struct sk_buff *skb, |
@@ -359,7 +361,7 @@ static __inline__ int dn_hash_dump_bucket(struct sk_buff *skb, | |||
359 | { | 361 | { |
360 | int i, s_i; | 362 | int i, s_i; |
361 | 363 | ||
362 | s_i = cb->args[3]; | 364 | s_i = cb->args[4]; |
363 | for(i = 0; f; i++, f = f->fn_next) { | 365 | for(i = 0; f; i++, f = f->fn_next) { |
364 | if (i < s_i) | 366 | if (i < s_i) |
365 | continue; | 367 | continue; |
@@ -372,11 +374,11 @@ static __inline__ int dn_hash_dump_bucket(struct sk_buff *skb, | |||
372 | (f->fn_state & DN_S_ZOMBIE) ? 0 : f->fn_type, | 374 | (f->fn_state & DN_S_ZOMBIE) ? 0 : f->fn_type, |
373 | f->fn_scope, &f->fn_key, dz->dz_order, | 375 | f->fn_scope, &f->fn_key, dz->dz_order, |
374 | f->fn_info, NLM_F_MULTI) < 0) { | 376 | f->fn_info, NLM_F_MULTI) < 0) { |
375 | cb->args[3] = i; | 377 | cb->args[4] = i; |
376 | return -1; | 378 | return -1; |
377 | } | 379 | } |
378 | } | 380 | } |
379 | cb->args[3] = i; | 381 | cb->args[4] = i; |
380 | return skb->len; | 382 | return skb->len; |
381 | } | 383 | } |
382 | 384 | ||
@@ -387,20 +389,20 @@ static __inline__ int dn_hash_dump_zone(struct sk_buff *skb, | |||
387 | { | 389 | { |
388 | int h, s_h; | 390 | int h, s_h; |
389 | 391 | ||
390 | s_h = cb->args[2]; | 392 | s_h = cb->args[3]; |
391 | for(h = 0; h < dz->dz_divisor; h++) { | 393 | for(h = 0; h < dz->dz_divisor; h++) { |
392 | if (h < s_h) | 394 | if (h < s_h) |
393 | continue; | 395 | continue; |
394 | if (h > s_h) | 396 | if (h > s_h) |
395 | memset(&cb->args[3], 0, sizeof(cb->args) - 3*sizeof(cb->args[0])); | 397 | memset(&cb->args[4], 0, sizeof(cb->args) - 4*sizeof(cb->args[0])); |
396 | if (dz->dz_hash == NULL || dz->dz_hash[h] == NULL) | 398 | if (dz->dz_hash == NULL || dz->dz_hash[h] == NULL) |
397 | continue; | 399 | continue; |
398 | if (dn_hash_dump_bucket(skb, cb, tb, dz, dz->dz_hash[h]) < 0) { | 400 | if (dn_hash_dump_bucket(skb, cb, tb, dz, dz->dz_hash[h]) < 0) { |
399 | cb->args[2] = h; | 401 | cb->args[3] = h; |
400 | return -1; | 402 | return -1; |
401 | } | 403 | } |
402 | } | 404 | } |
403 | cb->args[2] = h; | 405 | cb->args[3] = h; |
404 | return skb->len; | 406 | return skb->len; |
405 | } | 407 | } |
406 | 408 | ||
@@ -411,26 +413,63 @@ static int dn_fib_table_dump(struct dn_fib_table *tb, struct sk_buff *skb, | |||
411 | struct dn_zone *dz; | 413 | struct dn_zone *dz; |
412 | struct dn_hash *table = (struct dn_hash *)tb->data; | 414 | struct dn_hash *table = (struct dn_hash *)tb->data; |
413 | 415 | ||
414 | s_m = cb->args[1]; | 416 | s_m = cb->args[2]; |
415 | read_lock(&dn_fib_tables_lock); | 417 | read_lock(&dn_fib_tables_lock); |
416 | for(dz = table->dh_zone_list, m = 0; dz; dz = dz->dz_next, m++) { | 418 | for(dz = table->dh_zone_list, m = 0; dz; dz = dz->dz_next, m++) { |
417 | if (m < s_m) | 419 | if (m < s_m) |
418 | continue; | 420 | continue; |
419 | if (m > s_m) | 421 | if (m > s_m) |
420 | memset(&cb->args[2], 0, sizeof(cb->args) - 2*sizeof(cb->args[0])); | 422 | memset(&cb->args[3], 0, sizeof(cb->args) - 3*sizeof(cb->args[0])); |
421 | 423 | ||
422 | if (dn_hash_dump_zone(skb, cb, tb, dz) < 0) { | 424 | if (dn_hash_dump_zone(skb, cb, tb, dz) < 0) { |
423 | cb->args[1] = m; | 425 | cb->args[2] = m; |
424 | read_unlock(&dn_fib_tables_lock); | 426 | read_unlock(&dn_fib_tables_lock); |
425 | return -1; | 427 | return -1; |
426 | } | 428 | } |
427 | } | 429 | } |
428 | read_unlock(&dn_fib_tables_lock); | 430 | read_unlock(&dn_fib_tables_lock); |
429 | cb->args[1] = m; | 431 | cb->args[2] = m; |
430 | 432 | ||
431 | return skb->len; | 433 | return skb->len; |
432 | } | 434 | } |
433 | 435 | ||
436 | int dn_fib_dump(struct sk_buff *skb, struct netlink_callback *cb) | ||
437 | { | ||
438 | unsigned int h, s_h; | ||
439 | unsigned int e = 0, s_e; | ||
440 | struct dn_fib_table *tb; | ||
441 | struct hlist_node *node; | ||
442 | int dumped = 0; | ||
443 | |||
444 | if (NLMSG_PAYLOAD(cb->nlh, 0) >= sizeof(struct rtmsg) && | ||
445 | ((struct rtmsg *)NLMSG_DATA(cb->nlh))->rtm_flags&RTM_F_CLONED) | ||
446 | return dn_cache_dump(skb, cb); | ||
447 | |||
448 | s_h = cb->args[0]; | ||
449 | s_e = cb->args[1]; | ||
450 | |||
451 | for (h = s_h; h < DN_FIB_TABLE_HASHSZ; h++, s_h = 0) { | ||
452 | e = 0; | ||
453 | hlist_for_each_entry(tb, node, &dn_fib_table_hash[h], hlist) { | ||
454 | if (e < s_e) | ||
455 | goto next; | ||
456 | if (dumped) | ||
457 | memset(&cb->args[2], 0, sizeof(cb->args) - | ||
458 | 2 * sizeof(cb->args[0])); | ||
459 | if (tb->dump(tb, skb, cb) < 0) | ||
460 | goto out; | ||
461 | dumped = 1; | ||
462 | next: | ||
463 | e++; | ||
464 | } | ||
465 | } | ||
466 | out: | ||
467 | cb->args[1] = e; | ||
468 | cb->args[0] = h; | ||
469 | |||
470 | return skb->len; | ||
471 | } | ||
472 | |||
434 | static int dn_fib_table_insert(struct dn_fib_table *tb, struct rtmsg *r, struct dn_kern_rta *rta, struct nlmsghdr *n, struct netlink_skb_parms *req) | 473 | static int dn_fib_table_insert(struct dn_fib_table *tb, struct rtmsg *r, struct dn_kern_rta *rta, struct nlmsghdr *n, struct netlink_skb_parms *req) |
435 | { | 474 | { |
436 | struct dn_hash *table = (struct dn_hash *)tb->data; | 475 | struct dn_hash *table = (struct dn_hash *)tb->data; |
@@ -739,9 +778,11 @@ out: | |||
739 | } | 778 | } |
740 | 779 | ||
741 | 780 | ||
742 | struct dn_fib_table *dn_fib_get_table(int n, int create) | 781 | struct dn_fib_table *dn_fib_get_table(u32 n, int create) |
743 | { | 782 | { |
744 | struct dn_fib_table *t; | 783 | struct dn_fib_table *t; |
784 | struct hlist_node *node; | ||
785 | unsigned int h; | ||
745 | 786 | ||
746 | if (n < RT_TABLE_MIN) | 787 | if (n < RT_TABLE_MIN) |
747 | return NULL; | 788 | return NULL; |
@@ -749,8 +790,15 @@ struct dn_fib_table *dn_fib_get_table(int n, int create) | |||
749 | if (n > RT_TABLE_MAX) | 790 | if (n > RT_TABLE_MAX) |
750 | return NULL; | 791 | return NULL; |
751 | 792 | ||
752 | if (dn_fib_tables[n]) | 793 | h = n & (DN_FIB_TABLE_HASHSZ - 1); |
753 | return dn_fib_tables[n]; | 794 | rcu_read_lock(); |
795 | hlist_for_each_entry_rcu(t, node, &dn_fib_table_hash[h], hlist) { | ||
796 | if (t->n == n) { | ||
797 | rcu_read_unlock(); | ||
798 | return t; | ||
799 | } | ||
800 | } | ||
801 | rcu_read_unlock(); | ||
754 | 802 | ||
755 | if (!create) | 803 | if (!create) |
756 | return NULL; | 804 | return NULL; |
@@ -771,33 +819,37 @@ struct dn_fib_table *dn_fib_get_table(int n, int create) | |||
771 | t->flush = dn_fib_table_flush; | 819 | t->flush = dn_fib_table_flush; |
772 | t->dump = dn_fib_table_dump; | 820 | t->dump = dn_fib_table_dump; |
773 | memset(t->data, 0, sizeof(struct dn_hash)); | 821 | memset(t->data, 0, sizeof(struct dn_hash)); |
774 | dn_fib_tables[n] = t; | 822 | hlist_add_head_rcu(&t->hlist, &dn_fib_table_hash[h]); |
775 | 823 | ||
776 | return t; | 824 | return t; |
777 | } | 825 | } |
778 | 826 | ||
779 | static void dn_fib_del_tree(int n) | ||
780 | { | ||
781 | struct dn_fib_table *t; | ||
782 | |||
783 | write_lock(&dn_fib_tables_lock); | ||
784 | t = dn_fib_tables[n]; | ||
785 | dn_fib_tables[n] = NULL; | ||
786 | write_unlock(&dn_fib_tables_lock); | ||
787 | |||
788 | kfree(t); | ||
789 | } | ||
790 | |||
791 | struct dn_fib_table *dn_fib_empty_table(void) | 827 | struct dn_fib_table *dn_fib_empty_table(void) |
792 | { | 828 | { |
793 | int id; | 829 | u32 id; |
794 | 830 | ||
795 | for(id = RT_TABLE_MIN; id <= RT_TABLE_MAX; id++) | 831 | for(id = RT_TABLE_MIN; id <= RT_TABLE_MAX; id++) |
796 | if (dn_fib_tables[id] == NULL) | 832 | if (dn_fib_get_table(id, 0) == NULL) |
797 | return dn_fib_get_table(id, 1); | 833 | return dn_fib_get_table(id, 1); |
798 | return NULL; | 834 | return NULL; |
799 | } | 835 | } |
800 | 836 | ||
837 | void dn_fib_flush(void) | ||
838 | { | ||
839 | int flushed = 0; | ||
840 | struct dn_fib_table *tb; | ||
841 | struct hlist_node *node; | ||
842 | unsigned int h; | ||
843 | |||
844 | for (h = 0; h < DN_FIB_TABLE_HASHSZ; h++) { | ||
845 | hlist_for_each_entry(tb, node, &dn_fib_table_hash[h], hlist) | ||
846 | flushed += tb->flush(tb); | ||
847 | } | ||
848 | |||
849 | if (flushed) | ||
850 | dn_rt_cache_flush(-1); | ||
851 | } | ||
852 | |||
801 | void __init dn_fib_table_init(void) | 853 | void __init dn_fib_table_init(void) |
802 | { | 854 | { |
803 | dn_hash_kmem = kmem_cache_create("dn_fib_info_cache", | 855 | dn_hash_kmem = kmem_cache_create("dn_fib_info_cache", |
@@ -808,10 +860,17 @@ void __init dn_fib_table_init(void) | |||
808 | 860 | ||
809 | void __exit dn_fib_table_cleanup(void) | 861 | void __exit dn_fib_table_cleanup(void) |
810 | { | 862 | { |
811 | int i; | 863 | struct dn_fib_table *t; |
812 | 864 | struct hlist_node *node, *next; | |
813 | for (i = RT_TABLE_MIN; i <= RT_TABLE_MAX; ++i) | 865 | unsigned int h; |
814 | dn_fib_del_tree(i); | ||
815 | 866 | ||
816 | return; | 867 | write_lock(&dn_fib_tables_lock); |
868 | for (h = 0; h < DN_FIB_TABLE_HASHSZ; h++) { | ||
869 | hlist_for_each_entry_safe(t, node, next, &dn_fib_table_hash[h], | ||
870 | hlist) { | ||
871 | hlist_del(&t->hlist); | ||
872 | kfree(t); | ||
873 | } | ||
874 | } | ||
875 | write_unlock(&dn_fib_tables_lock); | ||
817 | } | 876 | } |
diff --git a/net/ethernet/eth.c b/net/ethernet/eth.c index 387c71c584ee..43863933f27f 100644 --- a/net/ethernet/eth.c +++ b/net/ethernet/eth.c | |||
@@ -64,81 +64,79 @@ | |||
64 | 64 | ||
65 | __setup("ether=", netdev_boot_setup); | 65 | __setup("ether=", netdev_boot_setup); |
66 | 66 | ||
67 | /* | 67 | /** |
68 | * Create the Ethernet MAC header for an arbitrary protocol layer | 68 | * eth_header - create the Ethernet header |
69 | * @skb: buffer to alter | ||
70 | * @dev: source device | ||
71 | * @type: Ethernet type field | ||
72 | * @daddr: destination address (NULL leave destination address) | ||
73 | * @saddr: source address (NULL use device source address) | ||
74 | * @len: packet length (<= skb->len) | ||
69 | * | 75 | * |
70 | * saddr=NULL means use device source address | 76 | * |
71 | * daddr=NULL means leave destination address (eg unresolved arp) | 77 | * Set the protocol type. For a packet of type ETH_P_802_3 we put the length |
78 | * in here instead. It is up to the 802.2 layer to carry protocol information. | ||
72 | */ | 79 | */ |
73 | |||
74 | int eth_header(struct sk_buff *skb, struct net_device *dev, unsigned short type, | 80 | int eth_header(struct sk_buff *skb, struct net_device *dev, unsigned short type, |
75 | void *daddr, void *saddr, unsigned len) | 81 | void *daddr, void *saddr, unsigned len) |
76 | { | 82 | { |
77 | struct ethhdr *eth = (struct ethhdr *)skb_push(skb,ETH_HLEN); | 83 | struct ethhdr *eth = (struct ethhdr *)skb_push(skb, ETH_HLEN); |
78 | 84 | ||
79 | /* | 85 | if (type != ETH_P_802_3) |
80 | * Set the protocol type. For a packet of type ETH_P_802_3 we put the length | ||
81 | * in here instead. It is up to the 802.2 layer to carry protocol information. | ||
82 | */ | ||
83 | |||
84 | if(type!=ETH_P_802_3) | ||
85 | eth->h_proto = htons(type); | 86 | eth->h_proto = htons(type); |
86 | else | 87 | else |
87 | eth->h_proto = htons(len); | 88 | eth->h_proto = htons(len); |
88 | 89 | ||
89 | /* | 90 | /* |
90 | * Set the source hardware address. | 91 | * Set the source hardware address. |
91 | */ | 92 | */ |
92 | 93 | ||
93 | if(!saddr) | 94 | if (!saddr) |
94 | saddr = dev->dev_addr; | 95 | saddr = dev->dev_addr; |
95 | memcpy(eth->h_source,saddr,dev->addr_len); | 96 | memcpy(eth->h_source, saddr, dev->addr_len); |
96 | 97 | ||
97 | if(daddr) | 98 | if (daddr) { |
98 | { | 99 | memcpy(eth->h_dest, daddr, dev->addr_len); |
99 | memcpy(eth->h_dest,daddr,dev->addr_len); | ||
100 | return ETH_HLEN; | 100 | return ETH_HLEN; |
101 | } | 101 | } |
102 | 102 | ||
103 | /* | 103 | /* |
104 | * Anyway, the loopback-device should never use this function... | 104 | * Anyway, the loopback-device should never use this function... |
105 | */ | 105 | */ |
106 | 106 | ||
107 | if (dev->flags & (IFF_LOOPBACK|IFF_NOARP)) | 107 | if (dev->flags & (IFF_LOOPBACK | IFF_NOARP)) { |
108 | { | ||
109 | memset(eth->h_dest, 0, dev->addr_len); | 108 | memset(eth->h_dest, 0, dev->addr_len); |
110 | return ETH_HLEN; | 109 | return ETH_HLEN; |
111 | } | 110 | } |
112 | 111 | ||
113 | return -ETH_HLEN; | 112 | return -ETH_HLEN; |
114 | } | 113 | } |
115 | 114 | ||
116 | 115 | /** | |
117 | /* | 116 | * eth_rebuild_header- rebuild the Ethernet MAC header. |
118 | * Rebuild the Ethernet MAC header. This is called after an ARP | 117 | * @skb: socket buffer to update |
119 | * (or in future other address resolution) has completed on this | ||
120 | * sk_buff. We now let ARP fill in the other fields. | ||
121 | * | 118 | * |
122 | * This routine CANNOT use cached dst->neigh! | 119 | * This is called after an ARP or IPV6 ndisc it's resolution on this |
123 | * Really, it is used only when dst->neigh is wrong. | 120 | * sk_buff. We now let protocol (ARP) fill in the other fields. |
121 | * | ||
122 | * This routine CANNOT use cached dst->neigh! | ||
123 | * Really, it is used only when dst->neigh is wrong. | ||
124 | */ | 124 | */ |
125 | |||
126 | int eth_rebuild_header(struct sk_buff *skb) | 125 | int eth_rebuild_header(struct sk_buff *skb) |
127 | { | 126 | { |
128 | struct ethhdr *eth = (struct ethhdr *)skb->data; | 127 | struct ethhdr *eth = (struct ethhdr *)skb->data; |
129 | struct net_device *dev = skb->dev; | 128 | struct net_device *dev = skb->dev; |
130 | 129 | ||
131 | switch (eth->h_proto) | 130 | switch (eth->h_proto) { |
132 | { | ||
133 | #ifdef CONFIG_INET | 131 | #ifdef CONFIG_INET |
134 | case __constant_htons(ETH_P_IP): | 132 | case __constant_htons(ETH_P_IP): |
135 | return arp_find(eth->h_dest, skb); | 133 | return arp_find(eth->h_dest, skb); |
136 | #endif | 134 | #endif |
137 | default: | 135 | default: |
138 | printk(KERN_DEBUG | 136 | printk(KERN_DEBUG |
139 | "%s: unable to resolve type %X addresses.\n", | 137 | "%s: unable to resolve type %X addresses.\n", |
140 | dev->name, (int)eth->h_proto); | 138 | dev->name, (int)eth->h_proto); |
141 | 139 | ||
142 | memcpy(eth->h_source, dev->dev_addr, dev->addr_len); | 140 | memcpy(eth->h_source, dev->dev_addr, dev->addr_len); |
143 | break; | 141 | break; |
144 | } | 142 | } |
@@ -146,62 +144,70 @@ int eth_rebuild_header(struct sk_buff *skb) | |||
146 | return 0; | 144 | return 0; |
147 | } | 145 | } |
148 | 146 | ||
149 | 147 | /** | |
150 | /* | 148 | * eth_type_trans - determine the packet's protocol ID. |
151 | * Determine the packet's protocol ID. The rule here is that we | 149 | * @skb: received socket data |
152 | * assume 802.3 if the type field is short enough to be a length. | 150 | * @dev: receiving network device |
153 | * This is normal practice and works for any 'now in use' protocol. | 151 | * |
152 | * The rule here is that we | ||
153 | * assume 802.3 if the type field is short enough to be a length. | ||
154 | * This is normal practice and works for any 'now in use' protocol. | ||
154 | */ | 155 | */ |
155 | |||
156 | __be16 eth_type_trans(struct sk_buff *skb, struct net_device *dev) | 156 | __be16 eth_type_trans(struct sk_buff *skb, struct net_device *dev) |
157 | { | 157 | { |
158 | struct ethhdr *eth; | 158 | struct ethhdr *eth; |
159 | unsigned char *rawp; | 159 | unsigned char *rawp; |
160 | 160 | ||
161 | skb->mac.raw = skb->data; | 161 | skb->mac.raw = skb->data; |
162 | skb_pull(skb,ETH_HLEN); | 162 | skb_pull(skb, ETH_HLEN); |
163 | eth = eth_hdr(skb); | 163 | eth = eth_hdr(skb); |
164 | 164 | ||
165 | if (is_multicast_ether_addr(eth->h_dest)) { | 165 | if (is_multicast_ether_addr(eth->h_dest)) { |
166 | if (!compare_ether_addr(eth->h_dest, dev->broadcast)) | 166 | if (!compare_ether_addr(eth->h_dest, dev->broadcast)) |
167 | skb->pkt_type = PACKET_BROADCAST; | 167 | skb->pkt_type = PACKET_BROADCAST; |
168 | else | 168 | else |
169 | skb->pkt_type = PACKET_MULTICAST; | 169 | skb->pkt_type = PACKET_MULTICAST; |
170 | } | 170 | } |
171 | 171 | ||
172 | /* | 172 | /* |
173 | * This ALLMULTI check should be redundant by 1.4 | 173 | * This ALLMULTI check should be redundant by 1.4 |
174 | * so don't forget to remove it. | 174 | * so don't forget to remove it. |
175 | * | 175 | * |
176 | * Seems, you forgot to remove it. All silly devices | 176 | * Seems, you forgot to remove it. All silly devices |
177 | * seems to set IFF_PROMISC. | 177 | * seems to set IFF_PROMISC. |
178 | */ | 178 | */ |
179 | 179 | ||
180 | else if(1 /*dev->flags&IFF_PROMISC*/) { | 180 | else if (1 /*dev->flags&IFF_PROMISC */ ) { |
181 | if (unlikely(compare_ether_addr(eth->h_dest, dev->dev_addr))) | 181 | if (unlikely(compare_ether_addr(eth->h_dest, dev->dev_addr))) |
182 | skb->pkt_type = PACKET_OTHERHOST; | 182 | skb->pkt_type = PACKET_OTHERHOST; |
183 | } | 183 | } |
184 | 184 | ||
185 | if (ntohs(eth->h_proto) >= 1536) | 185 | if (ntohs(eth->h_proto) >= 1536) |
186 | return eth->h_proto; | 186 | return eth->h_proto; |
187 | 187 | ||
188 | rawp = skb->data; | 188 | rawp = skb->data; |
189 | 189 | ||
190 | /* | 190 | /* |
191 | * This is a magic hack to spot IPX packets. Older Novell breaks | 191 | * This is a magic hack to spot IPX packets. Older Novell breaks |
192 | * the protocol design and runs IPX over 802.3 without an 802.2 LLC | 192 | * the protocol design and runs IPX over 802.3 without an 802.2 LLC |
193 | * layer. We look for FFFF which isn't a used 802.2 SSAP/DSAP. This | 193 | * layer. We look for FFFF which isn't a used 802.2 SSAP/DSAP. This |
194 | * won't work for fault tolerant netware but does for the rest. | 194 | * won't work for fault tolerant netware but does for the rest. |
195 | */ | 195 | */ |
196 | if (*(unsigned short *)rawp == 0xFFFF) | 196 | if (*(unsigned short *)rawp == 0xFFFF) |
197 | return htons(ETH_P_802_3); | 197 | return htons(ETH_P_802_3); |
198 | 198 | ||
199 | /* | 199 | /* |
200 | * Real 802.2 LLC | 200 | * Real 802.2 LLC |
201 | */ | 201 | */ |
202 | return htons(ETH_P_802_2); | 202 | return htons(ETH_P_802_2); |
203 | } | 203 | } |
204 | EXPORT_SYMBOL(eth_type_trans); | ||
204 | 205 | ||
206 | /** | ||
207 | * eth_header_parse - extract hardware address from packet | ||
208 | * @skb: packet to extract header from | ||
209 | * @haddr: destination buffer | ||
210 | */ | ||
205 | static int eth_header_parse(struct sk_buff *skb, unsigned char *haddr) | 211 | static int eth_header_parse(struct sk_buff *skb, unsigned char *haddr) |
206 | { | 212 | { |
207 | struct ethhdr *eth = eth_hdr(skb); | 213 | struct ethhdr *eth = eth_hdr(skb); |
@@ -209,14 +215,20 @@ static int eth_header_parse(struct sk_buff *skb, unsigned char *haddr) | |||
209 | return ETH_ALEN; | 215 | return ETH_ALEN; |
210 | } | 216 | } |
211 | 217 | ||
218 | /** | ||
219 | * eth_header_cache - fill cache entry from neighbour | ||
220 | * @neigh: source neighbour | ||
221 | * @hh: destination cache entry | ||
222 | * Create an Ethernet header template from the neighbour. | ||
223 | */ | ||
212 | int eth_header_cache(struct neighbour *neigh, struct hh_cache *hh) | 224 | int eth_header_cache(struct neighbour *neigh, struct hh_cache *hh) |
213 | { | 225 | { |
214 | unsigned short type = hh->hh_type; | 226 | unsigned short type = hh->hh_type; |
215 | struct ethhdr *eth; | 227 | struct ethhdr *eth; |
216 | struct net_device *dev = neigh->dev; | 228 | struct net_device *dev = neigh->dev; |
217 | 229 | ||
218 | eth = (struct ethhdr*) | 230 | eth = (struct ethhdr *) |
219 | (((u8*)hh->hh_data) + (HH_DATA_OFF(sizeof(*eth)))); | 231 | (((u8 *) hh->hh_data) + (HH_DATA_OFF(sizeof(*eth)))); |
220 | 232 | ||
221 | if (type == __constant_htons(ETH_P_802_3)) | 233 | if (type == __constant_htons(ETH_P_802_3)) |
222 | return -1; | 234 | return -1; |
@@ -228,27 +240,47 @@ int eth_header_cache(struct neighbour *neigh, struct hh_cache *hh) | |||
228 | return 0; | 240 | return 0; |
229 | } | 241 | } |
230 | 242 | ||
231 | /* | 243 | /** |
244 | * eth_header_cache_update - update cache entry | ||
245 | * @hh: destination cache entry | ||
246 | * @dev: network device | ||
247 | * @haddr: new hardware address | ||
248 | * | ||
232 | * Called by Address Resolution module to notify changes in address. | 249 | * Called by Address Resolution module to notify changes in address. |
233 | */ | 250 | */ |
234 | 251 | void eth_header_cache_update(struct hh_cache *hh, struct net_device *dev, | |
235 | void eth_header_cache_update(struct hh_cache *hh, struct net_device *dev, unsigned char * haddr) | 252 | unsigned char *haddr) |
236 | { | 253 | { |
237 | memcpy(((u8*)hh->hh_data) + HH_DATA_OFF(sizeof(struct ethhdr)), | 254 | memcpy(((u8 *) hh->hh_data) + HH_DATA_OFF(sizeof(struct ethhdr)), |
238 | haddr, dev->addr_len); | 255 | haddr, dev->addr_len); |
239 | } | 256 | } |
240 | 257 | ||
241 | EXPORT_SYMBOL(eth_type_trans); | 258 | /** |
242 | 259 | * eth_mac_addr - set new Ethernet hardware address | |
260 | * @dev: network device | ||
261 | * @p: socket address | ||
262 | * Change hardware address of device. | ||
263 | * | ||
264 | * This doesn't change hardware matching, so needs to be overridden | ||
265 | * for most real devices. | ||
266 | */ | ||
243 | static int eth_mac_addr(struct net_device *dev, void *p) | 267 | static int eth_mac_addr(struct net_device *dev, void *p) |
244 | { | 268 | { |
245 | struct sockaddr *addr=p; | 269 | struct sockaddr *addr = p; |
246 | if (netif_running(dev)) | 270 | if (netif_running(dev)) |
247 | return -EBUSY; | 271 | return -EBUSY; |
248 | memcpy(dev->dev_addr, addr->sa_data,dev->addr_len); | 272 | memcpy(dev->dev_addr, addr->sa_data, dev->addr_len); |
249 | return 0; | 273 | return 0; |
250 | } | 274 | } |
251 | 275 | ||
276 | /** | ||
277 | * eth_change_mtu - set new MTU size | ||
278 | * @dev: network device | ||
279 | * @new_mtu: new Maximum Transfer Unit | ||
280 | * | ||
281 | * Allow changing MTU size. Needs to be overridden for devices | ||
282 | * supporting jumbo frames. | ||
283 | */ | ||
252 | static int eth_change_mtu(struct net_device *dev, int new_mtu) | 284 | static int eth_change_mtu(struct net_device *dev, int new_mtu) |
253 | { | 285 | { |
254 | if (new_mtu < 68 || new_mtu > ETH_DATA_LEN) | 286 | if (new_mtu < 68 || new_mtu > ETH_DATA_LEN) |
@@ -257,8 +289,10 @@ static int eth_change_mtu(struct net_device *dev, int new_mtu) | |||
257 | return 0; | 289 | return 0; |
258 | } | 290 | } |
259 | 291 | ||
260 | /* | 292 | /** |
261 | * Fill in the fields of the device structure with ethernet-generic values. | 293 | * ether_setup - setup Ethernet network device |
294 | * @dev: network device | ||
295 | * Fill in the fields of the device structure with Ethernet-generic values. | ||
262 | */ | 296 | */ |
263 | void ether_setup(struct net_device *dev) | 297 | void ether_setup(struct net_device *dev) |
264 | { | 298 | { |
@@ -277,21 +311,21 @@ void ether_setup(struct net_device *dev) | |||
277 | dev->tx_queue_len = 1000; /* Ethernet wants good queues */ | 311 | dev->tx_queue_len = 1000; /* Ethernet wants good queues */ |
278 | dev->flags = IFF_BROADCAST|IFF_MULTICAST; | 312 | dev->flags = IFF_BROADCAST|IFF_MULTICAST; |
279 | 313 | ||
280 | memset(dev->broadcast,0xFF, ETH_ALEN); | 314 | memset(dev->broadcast, 0xFF, ETH_ALEN); |
281 | 315 | ||
282 | } | 316 | } |
283 | EXPORT_SYMBOL(ether_setup); | 317 | EXPORT_SYMBOL(ether_setup); |
284 | 318 | ||
285 | /** | 319 | /** |
286 | * alloc_etherdev - Allocates and sets up an ethernet device | 320 | * alloc_etherdev - Allocates and sets up an Ethernet device |
287 | * @sizeof_priv: Size of additional driver-private structure to be allocated | 321 | * @sizeof_priv: Size of additional driver-private structure to be allocated |
288 | * for this ethernet device | 322 | * for this Ethernet device |
289 | * | 323 | * |
290 | * Fill in the fields of the device structure with ethernet-generic | 324 | * Fill in the fields of the device structure with Ethernet-generic |
291 | * values. Basically does everything except registering the device. | 325 | * values. Basically does everything except registering the device. |
292 | * | 326 | * |
293 | * Constructs a new net device, complete with a private data area of | 327 | * Constructs a new net device, complete with a private data area of |
294 | * size @sizeof_priv. A 32-byte (not bit) alignment is enforced for | 328 | * size (sizeof_priv). A 32-byte (not bit) alignment is enforced for |
295 | * this private data area. | 329 | * this private data area. |
296 | */ | 330 | */ |
297 | 331 | ||
diff --git a/net/ipv4/Kconfig b/net/ipv4/Kconfig index 3b5d504a74be..1650b64415aa 100644 --- a/net/ipv4/Kconfig +++ b/net/ipv4/Kconfig | |||
@@ -88,6 +88,7 @@ config IP_FIB_HASH | |||
88 | config IP_MULTIPLE_TABLES | 88 | config IP_MULTIPLE_TABLES |
89 | bool "IP: policy routing" | 89 | bool "IP: policy routing" |
90 | depends on IP_ADVANCED_ROUTER | 90 | depends on IP_ADVANCED_ROUTER |
91 | select FIB_RULES | ||
91 | ---help--- | 92 | ---help--- |
92 | Normally, a router decides what to do with a received packet based | 93 | Normally, a router decides what to do with a received packet based |
93 | solely on the packet's final destination address. If you say Y here, | 94 | solely on the packet's final destination address. If you say Y here, |
diff --git a/net/ipv4/Makefile b/net/ipv4/Makefile index 4878fc5be85f..f66049e28aeb 100644 --- a/net/ipv4/Makefile +++ b/net/ipv4/Makefile | |||
@@ -47,6 +47,7 @@ obj-$(CONFIG_TCP_CONG_VEGAS) += tcp_vegas.o | |||
47 | obj-$(CONFIG_TCP_CONG_VENO) += tcp_veno.o | 47 | obj-$(CONFIG_TCP_CONG_VENO) += tcp_veno.o |
48 | obj-$(CONFIG_TCP_CONG_SCALABLE) += tcp_scalable.o | 48 | obj-$(CONFIG_TCP_CONG_SCALABLE) += tcp_scalable.o |
49 | obj-$(CONFIG_TCP_CONG_LP) += tcp_lp.o | 49 | obj-$(CONFIG_TCP_CONG_LP) += tcp_lp.o |
50 | obj-$(CONFIG_NETLABEL) += cipso_ipv4.o | ||
50 | 51 | ||
51 | obj-$(CONFIG_XFRM) += xfrm4_policy.o xfrm4_state.o xfrm4_input.o \ | 52 | obj-$(CONFIG_XFRM) += xfrm4_policy.o xfrm4_state.o xfrm4_input.o \ |
52 | xfrm4_output.o | 53 | xfrm4_output.o |
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index c84a32070f8d..fdd89e37b9aa 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c | |||
@@ -67,7 +67,6 @@ | |||
67 | * 2 of the License, or (at your option) any later version. | 67 | * 2 of the License, or (at your option) any later version. |
68 | */ | 68 | */ |
69 | 69 | ||
70 | #include <linux/config.h> | ||
71 | #include <linux/err.h> | 70 | #include <linux/err.h> |
72 | #include <linux/errno.h> | 71 | #include <linux/errno.h> |
73 | #include <linux/types.h> | 72 | #include <linux/types.h> |
@@ -392,7 +391,7 @@ int inet_release(struct socket *sock) | |||
392 | } | 391 | } |
393 | 392 | ||
394 | /* It is off by default, see below. */ | 393 | /* It is off by default, see below. */ |
395 | int sysctl_ip_nonlocal_bind; | 394 | int sysctl_ip_nonlocal_bind __read_mostly; |
396 | 395 | ||
397 | int inet_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) | 396 | int inet_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) |
398 | { | 397 | { |
@@ -988,7 +987,7 @@ void inet_unregister_protosw(struct inet_protosw *p) | |||
988 | * Shall we try to damage output packets if routing dev changes? | 987 | * Shall we try to damage output packets if routing dev changes? |
989 | */ | 988 | */ |
990 | 989 | ||
991 | int sysctl_ip_dynaddr; | 990 | int sysctl_ip_dynaddr __read_mostly; |
992 | 991 | ||
993 | static int inet_sk_reselect_saddr(struct sock *sk) | 992 | static int inet_sk_reselect_saddr(struct sock *sk) |
994 | { | 993 | { |
@@ -1074,6 +1073,7 @@ int inet_sk_rebuild_header(struct sock *sk) | |||
1074 | }, | 1073 | }, |
1075 | }; | 1074 | }; |
1076 | 1075 | ||
1076 | security_sk_classify_flow(sk, &fl); | ||
1077 | err = ip_route_output_flow(&rt, &fl, sk, 0); | 1077 | err = ip_route_output_flow(&rt, &fl, sk, 0); |
1078 | } | 1078 | } |
1079 | if (!err) | 1079 | if (!err) |
@@ -1254,10 +1254,7 @@ static int __init inet_init(void) | |||
1254 | struct list_head *r; | 1254 | struct list_head *r; |
1255 | int rc = -EINVAL; | 1255 | int rc = -EINVAL; |
1256 | 1256 | ||
1257 | if (sizeof(struct inet_skb_parm) > sizeof(dummy_skb->cb)) { | 1257 | BUILD_BUG_ON(sizeof(struct inet_skb_parm) > sizeof(dummy_skb->cb)); |
1258 | printk(KERN_CRIT "%s: panic\n", __FUNCTION__); | ||
1259 | goto out; | ||
1260 | } | ||
1261 | 1258 | ||
1262 | rc = proto_register(&tcp_prot, 1); | 1259 | rc = proto_register(&tcp_prot, 1); |
1263 | if (rc) | 1260 | if (rc) |
diff --git a/net/ipv4/ah4.c b/net/ipv4/ah4.c index 2b98943e6b02..99542977e47e 100644 --- a/net/ipv4/ah4.c +++ b/net/ipv4/ah4.c | |||
@@ -35,7 +35,7 @@ static int ip_clear_mutable_options(struct iphdr *iph, u32 *daddr) | |||
35 | switch (*optptr) { | 35 | switch (*optptr) { |
36 | case IPOPT_SEC: | 36 | case IPOPT_SEC: |
37 | case 0x85: /* Some "Extended Security" crap. */ | 37 | case 0x85: /* Some "Extended Security" crap. */ |
38 | case 0x86: /* Another "Commercial Security" crap. */ | 38 | case IPOPT_CIPSO: |
39 | case IPOPT_RA: | 39 | case IPOPT_RA: |
40 | case 0x80|21: /* RFC1770 */ | 40 | case 0x80|21: /* RFC1770 */ |
41 | break; | 41 | break; |
@@ -265,7 +265,7 @@ static int ah_init_state(struct xfrm_state *x) | |||
265 | goto error; | 265 | goto error; |
266 | 266 | ||
267 | x->props.header_len = XFRM_ALIGN8(sizeof(struct ip_auth_hdr) + ahp->icv_trunc_len); | 267 | x->props.header_len = XFRM_ALIGN8(sizeof(struct ip_auth_hdr) + ahp->icv_trunc_len); |
268 | if (x->props.mode) | 268 | if (x->props.mode == XFRM_MODE_TUNNEL) |
269 | x->props.header_len += sizeof(struct iphdr); | 269 | x->props.header_len += sizeof(struct iphdr); |
270 | x->data = ahp; | 270 | x->data = ahp; |
271 | 271 | ||
diff --git a/net/ipv4/cipso_ipv4.c b/net/ipv4/cipso_ipv4.c new file mode 100644 index 000000000000..80a2a0911b49 --- /dev/null +++ b/net/ipv4/cipso_ipv4.c | |||
@@ -0,0 +1,1607 @@ | |||
1 | /* | ||
2 | * CIPSO - Commercial IP Security Option | ||
3 | * | ||
4 | * This is an implementation of the CIPSO 2.2 protocol as specified in | ||
5 | * draft-ietf-cipso-ipsecurity-01.txt with additional tag types as found in | ||
6 | * FIPS-188, copies of both documents can be found in the Documentation | ||
7 | * directory. While CIPSO never became a full IETF RFC standard many vendors | ||
8 | * have chosen to adopt the protocol and over the years it has become a | ||
9 | * de-facto standard for labeled networking. | ||
10 | * | ||
11 | * Author: Paul Moore <paul.moore@hp.com> | ||
12 | * | ||
13 | */ | ||
14 | |||
15 | /* | ||
16 | * (c) Copyright Hewlett-Packard Development Company, L.P., 2006 | ||
17 | * | ||
18 | * This program is free software; you can redistribute it and/or modify | ||
19 | * it under the terms of the GNU General Public License as published by | ||
20 | * the Free Software Foundation; either version 2 of the License, or | ||
21 | * (at your option) any later version. | ||
22 | * | ||
23 | * This program is distributed in the hope that it will be useful, | ||
24 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
25 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See | ||
26 | * the GNU General Public License for more details. | ||
27 | * | ||
28 | * You should have received a copy of the GNU General Public License | ||
29 | * along with this program; if not, write to the Free Software | ||
30 | * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA | ||
31 | * | ||
32 | */ | ||
33 | |||
34 | #include <linux/init.h> | ||
35 | #include <linux/types.h> | ||
36 | #include <linux/rcupdate.h> | ||
37 | #include <linux/list.h> | ||
38 | #include <linux/spinlock.h> | ||
39 | #include <linux/string.h> | ||
40 | #include <linux/jhash.h> | ||
41 | #include <net/ip.h> | ||
42 | #include <net/icmp.h> | ||
43 | #include <net/tcp.h> | ||
44 | #include <net/netlabel.h> | ||
45 | #include <net/cipso_ipv4.h> | ||
46 | #include <asm/bug.h> | ||
47 | |||
48 | struct cipso_v4_domhsh_entry { | ||
49 | char *domain; | ||
50 | u32 valid; | ||
51 | struct list_head list; | ||
52 | struct rcu_head rcu; | ||
53 | }; | ||
54 | |||
55 | /* List of available DOI definitions */ | ||
56 | /* XXX - Updates should be minimal so having a single lock for the | ||
57 | * cipso_v4_doi_list and the cipso_v4_doi_list->dom_list should be | ||
58 | * okay. */ | ||
59 | /* XXX - This currently assumes a minimal number of different DOIs in use, | ||
60 | * if in practice there are a lot of different DOIs this list should | ||
61 | * probably be turned into a hash table or something similar so we | ||
62 | * can do quick lookups. */ | ||
63 | static DEFINE_SPINLOCK(cipso_v4_doi_list_lock); | ||
64 | static struct list_head cipso_v4_doi_list = LIST_HEAD_INIT(cipso_v4_doi_list); | ||
65 | |||
66 | /* Label mapping cache */ | ||
67 | int cipso_v4_cache_enabled = 1; | ||
68 | int cipso_v4_cache_bucketsize = 10; | ||
69 | #define CIPSO_V4_CACHE_BUCKETBITS 7 | ||
70 | #define CIPSO_V4_CACHE_BUCKETS (1 << CIPSO_V4_CACHE_BUCKETBITS) | ||
71 | #define CIPSO_V4_CACHE_REORDERLIMIT 10 | ||
72 | struct cipso_v4_map_cache_bkt { | ||
73 | spinlock_t lock; | ||
74 | u32 size; | ||
75 | struct list_head list; | ||
76 | }; | ||
77 | struct cipso_v4_map_cache_entry { | ||
78 | u32 hash; | ||
79 | unsigned char *key; | ||
80 | size_t key_len; | ||
81 | |||
82 | struct netlbl_lsm_cache lsm_data; | ||
83 | |||
84 | u32 activity; | ||
85 | struct list_head list; | ||
86 | }; | ||
87 | static struct cipso_v4_map_cache_bkt *cipso_v4_cache = NULL; | ||
88 | |||
89 | /* Restricted bitmap (tag #1) flags */ | ||
90 | int cipso_v4_rbm_optfmt = 0; | ||
91 | int cipso_v4_rbm_strictvalid = 1; | ||
92 | |||
93 | /* | ||
94 | * Helper Functions | ||
95 | */ | ||
96 | |||
97 | /** | ||
98 | * cipso_v4_bitmap_walk - Walk a bitmap looking for a bit | ||
99 | * @bitmap: the bitmap | ||
100 | * @bitmap_len: length in bits | ||
101 | * @offset: starting offset | ||
102 | * @state: if non-zero, look for a set (1) bit else look for a cleared (0) bit | ||
103 | * | ||
104 | * Description: | ||
105 | * Starting at @offset, walk the bitmap from left to right until either the | ||
106 | * desired bit is found or we reach the end. Return the bit offset, -1 if | ||
107 | * not found, or -2 if error. | ||
108 | */ | ||
109 | static int cipso_v4_bitmap_walk(const unsigned char *bitmap, | ||
110 | u32 bitmap_len, | ||
111 | u32 offset, | ||
112 | u8 state) | ||
113 | { | ||
114 | u32 bit_spot; | ||
115 | u32 byte_offset; | ||
116 | unsigned char bitmask; | ||
117 | unsigned char byte; | ||
118 | |||
119 | /* gcc always rounds to zero when doing integer division */ | ||
120 | byte_offset = offset / 8; | ||
121 | byte = bitmap[byte_offset]; | ||
122 | bit_spot = offset; | ||
123 | bitmask = 0x80 >> (offset % 8); | ||
124 | |||
125 | while (bit_spot < bitmap_len) { | ||
126 | if ((state && (byte & bitmask) == bitmask) || | ||
127 | (state == 0 && (byte & bitmask) == 0)) | ||
128 | return bit_spot; | ||
129 | |||
130 | bit_spot++; | ||
131 | bitmask >>= 1; | ||
132 | if (bitmask == 0) { | ||
133 | byte = bitmap[++byte_offset]; | ||
134 | bitmask = 0x80; | ||
135 | } | ||
136 | } | ||
137 | |||
138 | return -1; | ||
139 | } | ||
140 | |||
141 | /** | ||
142 | * cipso_v4_bitmap_setbit - Sets a single bit in a bitmap | ||
143 | * @bitmap: the bitmap | ||
144 | * @bit: the bit | ||
145 | * @state: if non-zero, set the bit (1) else clear the bit (0) | ||
146 | * | ||
147 | * Description: | ||
148 | * Set a single bit in the bitmask. Returns zero on success, negative values | ||
149 | * on error. | ||
150 | */ | ||
151 | static void cipso_v4_bitmap_setbit(unsigned char *bitmap, | ||
152 | u32 bit, | ||
153 | u8 state) | ||
154 | { | ||
155 | u32 byte_spot; | ||
156 | u8 bitmask; | ||
157 | |||
158 | /* gcc always rounds to zero when doing integer division */ | ||
159 | byte_spot = bit / 8; | ||
160 | bitmask = 0x80 >> (bit % 8); | ||
161 | if (state) | ||
162 | bitmap[byte_spot] |= bitmask; | ||
163 | else | ||
164 | bitmap[byte_spot] &= ~bitmask; | ||
165 | } | ||
166 | |||
167 | /** | ||
168 | * cipso_v4_doi_domhsh_free - Frees a domain list entry | ||
169 | * @entry: the entry's RCU field | ||
170 | * | ||
171 | * Description: | ||
172 | * This function is designed to be used as a callback to the call_rcu() | ||
173 | * function so that the memory allocated to a domain list entry can be released | ||
174 | * safely. | ||
175 | * | ||
176 | */ | ||
177 | static void cipso_v4_doi_domhsh_free(struct rcu_head *entry) | ||
178 | { | ||
179 | struct cipso_v4_domhsh_entry *ptr; | ||
180 | |||
181 | ptr = container_of(entry, struct cipso_v4_domhsh_entry, rcu); | ||
182 | kfree(ptr->domain); | ||
183 | kfree(ptr); | ||
184 | } | ||
185 | |||
186 | /** | ||
187 | * cipso_v4_cache_entry_free - Frees a cache entry | ||
188 | * @entry: the entry to free | ||
189 | * | ||
190 | * Description: | ||
191 | * This function frees the memory associated with a cache entry. | ||
192 | * | ||
193 | */ | ||
194 | static void cipso_v4_cache_entry_free(struct cipso_v4_map_cache_entry *entry) | ||
195 | { | ||
196 | if (entry->lsm_data.free) | ||
197 | entry->lsm_data.free(entry->lsm_data.data); | ||
198 | kfree(entry->key); | ||
199 | kfree(entry); | ||
200 | } | ||
201 | |||
202 | /** | ||
203 | * cipso_v4_map_cache_hash - Hashing function for the CIPSO cache | ||
204 | * @key: the hash key | ||
205 | * @key_len: the length of the key in bytes | ||
206 | * | ||
207 | * Description: | ||
208 | * The CIPSO tag hashing function. Returns a 32-bit hash value. | ||
209 | * | ||
210 | */ | ||
211 | static u32 cipso_v4_map_cache_hash(const unsigned char *key, u32 key_len) | ||
212 | { | ||
213 | return jhash(key, key_len, 0); | ||
214 | } | ||
215 | |||
216 | /* | ||
217 | * Label Mapping Cache Functions | ||
218 | */ | ||
219 | |||
220 | /** | ||
221 | * cipso_v4_cache_init - Initialize the CIPSO cache | ||
222 | * | ||
223 | * Description: | ||
224 | * Initializes the CIPSO label mapping cache, this function should be called | ||
225 | * before any of the other functions defined in this file. Returns zero on | ||
226 | * success, negative values on error. | ||
227 | * | ||
228 | */ | ||
229 | static int cipso_v4_cache_init(void) | ||
230 | { | ||
231 | u32 iter; | ||
232 | |||
233 | cipso_v4_cache = kcalloc(CIPSO_V4_CACHE_BUCKETS, | ||
234 | sizeof(struct cipso_v4_map_cache_bkt), | ||
235 | GFP_KERNEL); | ||
236 | if (cipso_v4_cache == NULL) | ||
237 | return -ENOMEM; | ||
238 | |||
239 | for (iter = 0; iter < CIPSO_V4_CACHE_BUCKETS; iter++) { | ||
240 | spin_lock_init(&cipso_v4_cache[iter].lock); | ||
241 | cipso_v4_cache[iter].size = 0; | ||
242 | INIT_LIST_HEAD(&cipso_v4_cache[iter].list); | ||
243 | } | ||
244 | |||
245 | return 0; | ||
246 | } | ||
247 | |||
248 | /** | ||
249 | * cipso_v4_cache_invalidate - Invalidates the current CIPSO cache | ||
250 | * | ||
251 | * Description: | ||
252 | * Invalidates and frees any entries in the CIPSO cache. Returns zero on | ||
253 | * success and negative values on failure. | ||
254 | * | ||
255 | */ | ||
256 | void cipso_v4_cache_invalidate(void) | ||
257 | { | ||
258 | struct cipso_v4_map_cache_entry *entry, *tmp_entry; | ||
259 | u32 iter; | ||
260 | |||
261 | for (iter = 0; iter < CIPSO_V4_CACHE_BUCKETS; iter++) { | ||
262 | spin_lock(&cipso_v4_cache[iter].lock); | ||
263 | list_for_each_entry_safe(entry, | ||
264 | tmp_entry, | ||
265 | &cipso_v4_cache[iter].list, list) { | ||
266 | list_del(&entry->list); | ||
267 | cipso_v4_cache_entry_free(entry); | ||
268 | } | ||
269 | cipso_v4_cache[iter].size = 0; | ||
270 | spin_unlock(&cipso_v4_cache[iter].lock); | ||
271 | } | ||
272 | |||
273 | return; | ||
274 | } | ||
275 | |||
276 | /** | ||
277 | * cipso_v4_cache_check - Check the CIPSO cache for a label mapping | ||
278 | * @key: the buffer to check | ||
279 | * @key_len: buffer length in bytes | ||
280 | * @secattr: the security attribute struct to use | ||
281 | * | ||
282 | * Description: | ||
283 | * This function checks the cache to see if a label mapping already exists for | ||
284 | * the given key. If there is a match then the cache is adjusted and the | ||
285 | * @secattr struct is populated with the correct LSM security attributes. The | ||
286 | * cache is adjusted in the following manner if the entry is not already the | ||
287 | * first in the cache bucket: | ||
288 | * | ||
289 | * 1. The cache entry's activity counter is incremented | ||
290 | * 2. The previous (higher ranking) entry's activity counter is decremented | ||
291 | * 3. If the difference between the two activity counters is geater than | ||
292 | * CIPSO_V4_CACHE_REORDERLIMIT the two entries are swapped | ||
293 | * | ||
294 | * Returns zero on success, -ENOENT for a cache miss, and other negative values | ||
295 | * on error. | ||
296 | * | ||
297 | */ | ||
298 | static int cipso_v4_cache_check(const unsigned char *key, | ||
299 | u32 key_len, | ||
300 | struct netlbl_lsm_secattr *secattr) | ||
301 | { | ||
302 | u32 bkt; | ||
303 | struct cipso_v4_map_cache_entry *entry; | ||
304 | struct cipso_v4_map_cache_entry *prev_entry = NULL; | ||
305 | u32 hash; | ||
306 | |||
307 | if (!cipso_v4_cache_enabled) | ||
308 | return -ENOENT; | ||
309 | |||
310 | hash = cipso_v4_map_cache_hash(key, key_len); | ||
311 | bkt = hash & (CIPSO_V4_CACHE_BUCKETBITS - 1); | ||
312 | spin_lock(&cipso_v4_cache[bkt].lock); | ||
313 | list_for_each_entry(entry, &cipso_v4_cache[bkt].list, list) { | ||
314 | if (entry->hash == hash && | ||
315 | entry->key_len == key_len && | ||
316 | memcmp(entry->key, key, key_len) == 0) { | ||
317 | entry->activity += 1; | ||
318 | secattr->cache.free = entry->lsm_data.free; | ||
319 | secattr->cache.data = entry->lsm_data.data; | ||
320 | if (prev_entry == NULL) { | ||
321 | spin_unlock(&cipso_v4_cache[bkt].lock); | ||
322 | return 0; | ||
323 | } | ||
324 | |||
325 | if (prev_entry->activity > 0) | ||
326 | prev_entry->activity -= 1; | ||
327 | if (entry->activity > prev_entry->activity && | ||
328 | entry->activity - prev_entry->activity > | ||
329 | CIPSO_V4_CACHE_REORDERLIMIT) { | ||
330 | __list_del(entry->list.prev, entry->list.next); | ||
331 | __list_add(&entry->list, | ||
332 | prev_entry->list.prev, | ||
333 | &prev_entry->list); | ||
334 | } | ||
335 | |||
336 | spin_unlock(&cipso_v4_cache[bkt].lock); | ||
337 | return 0; | ||
338 | } | ||
339 | prev_entry = entry; | ||
340 | } | ||
341 | spin_unlock(&cipso_v4_cache[bkt].lock); | ||
342 | |||
343 | return -ENOENT; | ||
344 | } | ||
345 | |||
346 | /** | ||
347 | * cipso_v4_cache_add - Add an entry to the CIPSO cache | ||
348 | * @skb: the packet | ||
349 | * @secattr: the packet's security attributes | ||
350 | * | ||
351 | * Description: | ||
352 | * Add a new entry into the CIPSO label mapping cache. Add the new entry to | ||
353 | * head of the cache bucket's list, if the cache bucket is out of room remove | ||
354 | * the last entry in the list first. It is important to note that there is | ||
355 | * currently no checking for duplicate keys. Returns zero on success, | ||
356 | * negative values on failure. | ||
357 | * | ||
358 | */ | ||
359 | int cipso_v4_cache_add(const struct sk_buff *skb, | ||
360 | const struct netlbl_lsm_secattr *secattr) | ||
361 | { | ||
362 | int ret_val = -EPERM; | ||
363 | u32 bkt; | ||
364 | struct cipso_v4_map_cache_entry *entry = NULL; | ||
365 | struct cipso_v4_map_cache_entry *old_entry = NULL; | ||
366 | unsigned char *cipso_ptr; | ||
367 | u32 cipso_ptr_len; | ||
368 | |||
369 | if (!cipso_v4_cache_enabled || cipso_v4_cache_bucketsize <= 0) | ||
370 | return 0; | ||
371 | |||
372 | cipso_ptr = CIPSO_V4_OPTPTR(skb); | ||
373 | cipso_ptr_len = cipso_ptr[1]; | ||
374 | |||
375 | entry = kzalloc(sizeof(*entry), GFP_ATOMIC); | ||
376 | if (entry == NULL) | ||
377 | return -ENOMEM; | ||
378 | entry->key = kmalloc(cipso_ptr_len, GFP_ATOMIC); | ||
379 | if (entry->key == NULL) { | ||
380 | ret_val = -ENOMEM; | ||
381 | goto cache_add_failure; | ||
382 | } | ||
383 | memcpy(entry->key, cipso_ptr, cipso_ptr_len); | ||
384 | entry->key_len = cipso_ptr_len; | ||
385 | entry->hash = cipso_v4_map_cache_hash(cipso_ptr, cipso_ptr_len); | ||
386 | entry->lsm_data.free = secattr->cache.free; | ||
387 | entry->lsm_data.data = secattr->cache.data; | ||
388 | |||
389 | bkt = entry->hash & (CIPSO_V4_CACHE_BUCKETBITS - 1); | ||
390 | spin_lock(&cipso_v4_cache[bkt].lock); | ||
391 | if (cipso_v4_cache[bkt].size < cipso_v4_cache_bucketsize) { | ||
392 | list_add(&entry->list, &cipso_v4_cache[bkt].list); | ||
393 | cipso_v4_cache[bkt].size += 1; | ||
394 | } else { | ||
395 | old_entry = list_entry(cipso_v4_cache[bkt].list.prev, | ||
396 | struct cipso_v4_map_cache_entry, list); | ||
397 | list_del(&old_entry->list); | ||
398 | list_add(&entry->list, &cipso_v4_cache[bkt].list); | ||
399 | cipso_v4_cache_entry_free(old_entry); | ||
400 | } | ||
401 | spin_unlock(&cipso_v4_cache[bkt].lock); | ||
402 | |||
403 | return 0; | ||
404 | |||
405 | cache_add_failure: | ||
406 | if (entry) | ||
407 | cipso_v4_cache_entry_free(entry); | ||
408 | return ret_val; | ||
409 | } | ||
410 | |||
411 | /* | ||
412 | * DOI List Functions | ||
413 | */ | ||
414 | |||
415 | /** | ||
416 | * cipso_v4_doi_search - Searches for a DOI definition | ||
417 | * @doi: the DOI to search for | ||
418 | * | ||
419 | * Description: | ||
420 | * Search the DOI definition list for a DOI definition with a DOI value that | ||
421 | * matches @doi. The caller is responsibile for calling rcu_read_[un]lock(). | ||
422 | * Returns a pointer to the DOI definition on success and NULL on failure. | ||
423 | */ | ||
424 | static struct cipso_v4_doi *cipso_v4_doi_search(u32 doi) | ||
425 | { | ||
426 | struct cipso_v4_doi *iter; | ||
427 | |||
428 | list_for_each_entry_rcu(iter, &cipso_v4_doi_list, list) | ||
429 | if (iter->doi == doi && iter->valid) | ||
430 | return iter; | ||
431 | return NULL; | ||
432 | } | ||
433 | |||
434 | /** | ||
435 | * cipso_v4_doi_add - Add a new DOI to the CIPSO protocol engine | ||
436 | * @doi_def: the DOI structure | ||
437 | * | ||
438 | * Description: | ||
439 | * The caller defines a new DOI for use by the CIPSO engine and calls this | ||
440 | * function to add it to the list of acceptable domains. The caller must | ||
441 | * ensure that the mapping table specified in @doi_def->map meets all of the | ||
442 | * requirements of the mapping type (see cipso_ipv4.h for details). Returns | ||
443 | * zero on success and non-zero on failure. | ||
444 | * | ||
445 | */ | ||
446 | int cipso_v4_doi_add(struct cipso_v4_doi *doi_def) | ||
447 | { | ||
448 | if (doi_def == NULL || doi_def->doi == CIPSO_V4_DOI_UNKNOWN) | ||
449 | return -EINVAL; | ||
450 | |||
451 | doi_def->valid = 1; | ||
452 | INIT_RCU_HEAD(&doi_def->rcu); | ||
453 | INIT_LIST_HEAD(&doi_def->dom_list); | ||
454 | |||
455 | rcu_read_lock(); | ||
456 | if (cipso_v4_doi_search(doi_def->doi) != NULL) | ||
457 | goto doi_add_failure_rlock; | ||
458 | spin_lock(&cipso_v4_doi_list_lock); | ||
459 | if (cipso_v4_doi_search(doi_def->doi) != NULL) | ||
460 | goto doi_add_failure_slock; | ||
461 | list_add_tail_rcu(&doi_def->list, &cipso_v4_doi_list); | ||
462 | spin_unlock(&cipso_v4_doi_list_lock); | ||
463 | rcu_read_unlock(); | ||
464 | |||
465 | return 0; | ||
466 | |||
467 | doi_add_failure_slock: | ||
468 | spin_unlock(&cipso_v4_doi_list_lock); | ||
469 | doi_add_failure_rlock: | ||
470 | rcu_read_unlock(); | ||
471 | return -EEXIST; | ||
472 | } | ||
473 | |||
474 | /** | ||
475 | * cipso_v4_doi_remove - Remove an existing DOI from the CIPSO protocol engine | ||
476 | * @doi: the DOI value | ||
477 | * @callback: the DOI cleanup/free callback | ||
478 | * | ||
479 | * Description: | ||
480 | * Removes a DOI definition from the CIPSO engine, @callback is called to | ||
481 | * free any memory. The NetLabel routines will be called to release their own | ||
482 | * LSM domain mappings as well as our own domain list. Returns zero on | ||
483 | * success and negative values on failure. | ||
484 | * | ||
485 | */ | ||
486 | int cipso_v4_doi_remove(u32 doi, void (*callback) (struct rcu_head * head)) | ||
487 | { | ||
488 | struct cipso_v4_doi *doi_def; | ||
489 | struct cipso_v4_domhsh_entry *dom_iter; | ||
490 | |||
491 | rcu_read_lock(); | ||
492 | if (cipso_v4_doi_search(doi) != NULL) { | ||
493 | spin_lock(&cipso_v4_doi_list_lock); | ||
494 | doi_def = cipso_v4_doi_search(doi); | ||
495 | if (doi_def == NULL) { | ||
496 | spin_unlock(&cipso_v4_doi_list_lock); | ||
497 | rcu_read_unlock(); | ||
498 | return -ENOENT; | ||
499 | } | ||
500 | doi_def->valid = 0; | ||
501 | list_del_rcu(&doi_def->list); | ||
502 | spin_unlock(&cipso_v4_doi_list_lock); | ||
503 | list_for_each_entry_rcu(dom_iter, &doi_def->dom_list, list) | ||
504 | if (dom_iter->valid) | ||
505 | netlbl_domhsh_remove(dom_iter->domain); | ||
506 | cipso_v4_cache_invalidate(); | ||
507 | rcu_read_unlock(); | ||
508 | |||
509 | call_rcu(&doi_def->rcu, callback); | ||
510 | return 0; | ||
511 | } | ||
512 | rcu_read_unlock(); | ||
513 | |||
514 | return -ENOENT; | ||
515 | } | ||
516 | |||
517 | /** | ||
518 | * cipso_v4_doi_getdef - Returns a pointer to a valid DOI definition | ||
519 | * @doi: the DOI value | ||
520 | * | ||
521 | * Description: | ||
522 | * Searches for a valid DOI definition and if one is found it is returned to | ||
523 | * the caller. Otherwise NULL is returned. The caller must ensure that | ||
524 | * rcu_read_lock() is held while accessing the returned definition. | ||
525 | * | ||
526 | */ | ||
527 | struct cipso_v4_doi *cipso_v4_doi_getdef(u32 doi) | ||
528 | { | ||
529 | return cipso_v4_doi_search(doi); | ||
530 | } | ||
531 | |||
532 | /** | ||
533 | * cipso_v4_doi_dump_all - Dump all the CIPSO DOI definitions into a sk_buff | ||
534 | * @headroom: the amount of headroom to allocate for the sk_buff | ||
535 | * | ||
536 | * Description: | ||
537 | * Dump a list of all the configured DOI values into a sk_buff. The returned | ||
538 | * sk_buff has room at the front of the sk_buff for @headroom bytes. See | ||
539 | * net/netlabel/netlabel_cipso_v4.h for the LISTALL message format. This | ||
540 | * function may fail if another process is changing the DOI list at the same | ||
541 | * time. Returns a pointer to a sk_buff on success, NULL on error. | ||
542 | * | ||
543 | */ | ||
544 | struct sk_buff *cipso_v4_doi_dump_all(size_t headroom) | ||
545 | { | ||
546 | struct sk_buff *skb = NULL; | ||
547 | struct cipso_v4_doi *iter; | ||
548 | u32 doi_cnt = 0; | ||
549 | ssize_t buf_len; | ||
550 | |||
551 | buf_len = NETLBL_LEN_U32; | ||
552 | rcu_read_lock(); | ||
553 | list_for_each_entry_rcu(iter, &cipso_v4_doi_list, list) | ||
554 | if (iter->valid) { | ||
555 | doi_cnt += 1; | ||
556 | buf_len += 2 * NETLBL_LEN_U32; | ||
557 | } | ||
558 | |||
559 | skb = netlbl_netlink_alloc_skb(headroom, buf_len, GFP_ATOMIC); | ||
560 | if (skb == NULL) | ||
561 | goto doi_dump_all_failure; | ||
562 | |||
563 | if (nla_put_u32(skb, NLA_U32, doi_cnt) != 0) | ||
564 | goto doi_dump_all_failure; | ||
565 | buf_len -= NETLBL_LEN_U32; | ||
566 | list_for_each_entry_rcu(iter, &cipso_v4_doi_list, list) | ||
567 | if (iter->valid) { | ||
568 | if (buf_len < 2 * NETLBL_LEN_U32) | ||
569 | goto doi_dump_all_failure; | ||
570 | if (nla_put_u32(skb, NLA_U32, iter->doi) != 0) | ||
571 | goto doi_dump_all_failure; | ||
572 | if (nla_put_u32(skb, NLA_U32, iter->type) != 0) | ||
573 | goto doi_dump_all_failure; | ||
574 | buf_len -= 2 * NETLBL_LEN_U32; | ||
575 | } | ||
576 | rcu_read_unlock(); | ||
577 | |||
578 | return skb; | ||
579 | |||
580 | doi_dump_all_failure: | ||
581 | rcu_read_unlock(); | ||
582 | kfree(skb); | ||
583 | return NULL; | ||
584 | } | ||
585 | |||
586 | /** | ||
587 | * cipso_v4_doi_dump - Dump a CIPSO DOI definition into a sk_buff | ||
588 | * @doi: the DOI value | ||
589 | * @headroom: the amount of headroom to allocate for the sk_buff | ||
590 | * | ||
591 | * Description: | ||
592 | * Lookup the DOI definition matching @doi and dump it's contents into a | ||
593 | * sk_buff. The returned sk_buff has room at the front of the sk_buff for | ||
594 | * @headroom bytes. See net/netlabel/netlabel_cipso_v4.h for the LIST message | ||
595 | * format. This function may fail if another process is changing the DOI list | ||
596 | * at the same time. Returns a pointer to a sk_buff on success, NULL on error. | ||
597 | * | ||
598 | */ | ||
599 | struct sk_buff *cipso_v4_doi_dump(u32 doi, size_t headroom) | ||
600 | { | ||
601 | struct sk_buff *skb = NULL; | ||
602 | struct cipso_v4_doi *iter; | ||
603 | u32 tag_cnt = 0; | ||
604 | u32 lvl_cnt = 0; | ||
605 | u32 cat_cnt = 0; | ||
606 | ssize_t buf_len; | ||
607 | ssize_t tmp; | ||
608 | |||
609 | rcu_read_lock(); | ||
610 | iter = cipso_v4_doi_getdef(doi); | ||
611 | if (iter == NULL) | ||
612 | goto doi_dump_failure; | ||
613 | buf_len = NETLBL_LEN_U32; | ||
614 | switch (iter->type) { | ||
615 | case CIPSO_V4_MAP_PASS: | ||
616 | buf_len += NETLBL_LEN_U32; | ||
617 | while(tag_cnt < CIPSO_V4_TAG_MAXCNT && | ||
618 | iter->tags[tag_cnt] != CIPSO_V4_TAG_INVALID) { | ||
619 | tag_cnt += 1; | ||
620 | buf_len += NETLBL_LEN_U8; | ||
621 | } | ||
622 | break; | ||
623 | case CIPSO_V4_MAP_STD: | ||
624 | buf_len += 3 * NETLBL_LEN_U32; | ||
625 | while (tag_cnt < CIPSO_V4_TAG_MAXCNT && | ||
626 | iter->tags[tag_cnt] != CIPSO_V4_TAG_INVALID) { | ||
627 | tag_cnt += 1; | ||
628 | buf_len += NETLBL_LEN_U8; | ||
629 | } | ||
630 | for (tmp = 0; tmp < iter->map.std->lvl.local_size; tmp++) | ||
631 | if (iter->map.std->lvl.local[tmp] != | ||
632 | CIPSO_V4_INV_LVL) { | ||
633 | lvl_cnt += 1; | ||
634 | buf_len += NETLBL_LEN_U32 + NETLBL_LEN_U8; | ||
635 | } | ||
636 | for (tmp = 0; tmp < iter->map.std->cat.local_size; tmp++) | ||
637 | if (iter->map.std->cat.local[tmp] != | ||
638 | CIPSO_V4_INV_CAT) { | ||
639 | cat_cnt += 1; | ||
640 | buf_len += NETLBL_LEN_U32 + NETLBL_LEN_U16; | ||
641 | } | ||
642 | break; | ||
643 | } | ||
644 | |||
645 | skb = netlbl_netlink_alloc_skb(headroom, buf_len, GFP_ATOMIC); | ||
646 | if (skb == NULL) | ||
647 | goto doi_dump_failure; | ||
648 | |||
649 | if (nla_put_u32(skb, NLA_U32, iter->type) != 0) | ||
650 | goto doi_dump_failure; | ||
651 | buf_len -= NETLBL_LEN_U32; | ||
652 | if (iter != cipso_v4_doi_getdef(doi)) | ||
653 | goto doi_dump_failure; | ||
654 | switch (iter->type) { | ||
655 | case CIPSO_V4_MAP_PASS: | ||
656 | if (nla_put_u32(skb, NLA_U32, tag_cnt) != 0) | ||
657 | goto doi_dump_failure; | ||
658 | buf_len -= NETLBL_LEN_U32; | ||
659 | for (tmp = 0; | ||
660 | tmp < CIPSO_V4_TAG_MAXCNT && | ||
661 | iter->tags[tmp] != CIPSO_V4_TAG_INVALID; | ||
662 | tmp++) { | ||
663 | if (buf_len < NETLBL_LEN_U8) | ||
664 | goto doi_dump_failure; | ||
665 | if (nla_put_u8(skb, NLA_U8, iter->tags[tmp]) != 0) | ||
666 | goto doi_dump_failure; | ||
667 | buf_len -= NETLBL_LEN_U8; | ||
668 | } | ||
669 | break; | ||
670 | case CIPSO_V4_MAP_STD: | ||
671 | if (nla_put_u32(skb, NLA_U32, tag_cnt) != 0) | ||
672 | goto doi_dump_failure; | ||
673 | if (nla_put_u32(skb, NLA_U32, lvl_cnt) != 0) | ||
674 | goto doi_dump_failure; | ||
675 | if (nla_put_u32(skb, NLA_U32, cat_cnt) != 0) | ||
676 | goto doi_dump_failure; | ||
677 | buf_len -= 3 * NETLBL_LEN_U32; | ||
678 | for (tmp = 0; | ||
679 | tmp < CIPSO_V4_TAG_MAXCNT && | ||
680 | iter->tags[tmp] != CIPSO_V4_TAG_INVALID; | ||
681 | tmp++) { | ||
682 | if (buf_len < NETLBL_LEN_U8) | ||
683 | goto doi_dump_failure; | ||
684 | if (nla_put_u8(skb, NLA_U8, iter->tags[tmp]) != 0) | ||
685 | goto doi_dump_failure; | ||
686 | buf_len -= NETLBL_LEN_U8; | ||
687 | } | ||
688 | for (tmp = 0; tmp < iter->map.std->lvl.local_size; tmp++) | ||
689 | if (iter->map.std->lvl.local[tmp] != | ||
690 | CIPSO_V4_INV_LVL) { | ||
691 | if (buf_len < NETLBL_LEN_U32 + NETLBL_LEN_U8) | ||
692 | goto doi_dump_failure; | ||
693 | if (nla_put_u32(skb, NLA_U32, tmp) != 0) | ||
694 | goto doi_dump_failure; | ||
695 | if (nla_put_u8(skb, | ||
696 | NLA_U8, | ||
697 | iter->map.std->lvl.local[tmp]) != 0) | ||
698 | goto doi_dump_failure; | ||
699 | buf_len -= NETLBL_LEN_U32 + NETLBL_LEN_U8; | ||
700 | } | ||
701 | for (tmp = 0; tmp < iter->map.std->cat.local_size; tmp++) | ||
702 | if (iter->map.std->cat.local[tmp] != | ||
703 | CIPSO_V4_INV_CAT) { | ||
704 | if (buf_len < NETLBL_LEN_U32 + NETLBL_LEN_U16) | ||
705 | goto doi_dump_failure; | ||
706 | if (nla_put_u32(skb, NLA_U32, tmp) != 0) | ||
707 | goto doi_dump_failure; | ||
708 | if (nla_put_u16(skb, | ||
709 | NLA_U16, | ||
710 | iter->map.std->cat.local[tmp]) != 0) | ||
711 | goto doi_dump_failure; | ||
712 | buf_len -= NETLBL_LEN_U32 + NETLBL_LEN_U16; | ||
713 | } | ||
714 | break; | ||
715 | } | ||
716 | rcu_read_unlock(); | ||
717 | |||
718 | return skb; | ||
719 | |||
720 | doi_dump_failure: | ||
721 | rcu_read_unlock(); | ||
722 | kfree(skb); | ||
723 | return NULL; | ||
724 | } | ||
725 | |||
726 | /** | ||
727 | * cipso_v4_doi_domhsh_add - Adds a domain entry to a DOI definition | ||
728 | * @doi_def: the DOI definition | ||
729 | * @domain: the domain to add | ||
730 | * | ||
731 | * Description: | ||
732 | * Adds the @domain to the the DOI specified by @doi_def, this function | ||
733 | * should only be called by external functions (i.e. NetLabel). This function | ||
734 | * does allocate memory. Returns zero on success, negative values on failure. | ||
735 | * | ||
736 | */ | ||
737 | int cipso_v4_doi_domhsh_add(struct cipso_v4_doi *doi_def, const char *domain) | ||
738 | { | ||
739 | struct cipso_v4_domhsh_entry *iter; | ||
740 | struct cipso_v4_domhsh_entry *new_dom; | ||
741 | |||
742 | new_dom = kzalloc(sizeof(*new_dom), GFP_KERNEL); | ||
743 | if (new_dom == NULL) | ||
744 | return -ENOMEM; | ||
745 | if (domain) { | ||
746 | new_dom->domain = kstrdup(domain, GFP_KERNEL); | ||
747 | if (new_dom->domain == NULL) { | ||
748 | kfree(new_dom); | ||
749 | return -ENOMEM; | ||
750 | } | ||
751 | } | ||
752 | new_dom->valid = 1; | ||
753 | INIT_RCU_HEAD(&new_dom->rcu); | ||
754 | |||
755 | rcu_read_lock(); | ||
756 | spin_lock(&cipso_v4_doi_list_lock); | ||
757 | list_for_each_entry_rcu(iter, &doi_def->dom_list, list) | ||
758 | if (iter->valid && | ||
759 | ((domain != NULL && iter->domain != NULL && | ||
760 | strcmp(iter->domain, domain) == 0) || | ||
761 | (domain == NULL && iter->domain == NULL))) { | ||
762 | spin_unlock(&cipso_v4_doi_list_lock); | ||
763 | rcu_read_unlock(); | ||
764 | kfree(new_dom->domain); | ||
765 | kfree(new_dom); | ||
766 | return -EEXIST; | ||
767 | } | ||
768 | list_add_tail_rcu(&new_dom->list, &doi_def->dom_list); | ||
769 | spin_unlock(&cipso_v4_doi_list_lock); | ||
770 | rcu_read_unlock(); | ||
771 | |||
772 | return 0; | ||
773 | } | ||
774 | |||
775 | /** | ||
776 | * cipso_v4_doi_domhsh_remove - Removes a domain entry from a DOI definition | ||
777 | * @doi_def: the DOI definition | ||
778 | * @domain: the domain to remove | ||
779 | * | ||
780 | * Description: | ||
781 | * Removes the @domain from the DOI specified by @doi_def, this function | ||
782 | * should only be called by external functions (i.e. NetLabel). Returns zero | ||
783 | * on success and negative values on error. | ||
784 | * | ||
785 | */ | ||
786 | int cipso_v4_doi_domhsh_remove(struct cipso_v4_doi *doi_def, | ||
787 | const char *domain) | ||
788 | { | ||
789 | struct cipso_v4_domhsh_entry *iter; | ||
790 | |||
791 | rcu_read_lock(); | ||
792 | spin_lock(&cipso_v4_doi_list_lock); | ||
793 | list_for_each_entry_rcu(iter, &doi_def->dom_list, list) | ||
794 | if (iter->valid && | ||
795 | ((domain != NULL && iter->domain != NULL && | ||
796 | strcmp(iter->domain, domain) == 0) || | ||
797 | (domain == NULL && iter->domain == NULL))) { | ||
798 | iter->valid = 0; | ||
799 | list_del_rcu(&iter->list); | ||
800 | spin_unlock(&cipso_v4_doi_list_lock); | ||
801 | rcu_read_unlock(); | ||
802 | call_rcu(&iter->rcu, cipso_v4_doi_domhsh_free); | ||
803 | |||
804 | return 0; | ||
805 | } | ||
806 | spin_unlock(&cipso_v4_doi_list_lock); | ||
807 | rcu_read_unlock(); | ||
808 | |||
809 | return -ENOENT; | ||
810 | } | ||
811 | |||
812 | /* | ||
813 | * Label Mapping Functions | ||
814 | */ | ||
815 | |||
816 | /** | ||
817 | * cipso_v4_map_lvl_valid - Checks to see if the given level is understood | ||
818 | * @doi_def: the DOI definition | ||
819 | * @level: the level to check | ||
820 | * | ||
821 | * Description: | ||
822 | * Checks the given level against the given DOI definition and returns a | ||
823 | * negative value if the level does not have a valid mapping and a zero value | ||
824 | * if the level is defined by the DOI. | ||
825 | * | ||
826 | */ | ||
827 | static int cipso_v4_map_lvl_valid(const struct cipso_v4_doi *doi_def, u8 level) | ||
828 | { | ||
829 | switch (doi_def->type) { | ||
830 | case CIPSO_V4_MAP_PASS: | ||
831 | return 0; | ||
832 | case CIPSO_V4_MAP_STD: | ||
833 | if (doi_def->map.std->lvl.cipso[level] < CIPSO_V4_INV_LVL) | ||
834 | return 0; | ||
835 | break; | ||
836 | } | ||
837 | |||
838 | return -EFAULT; | ||
839 | } | ||
840 | |||
841 | /** | ||
842 | * cipso_v4_map_lvl_hton - Perform a level mapping from the host to the network | ||
843 | * @doi_def: the DOI definition | ||
844 | * @host_lvl: the host MLS level | ||
845 | * @net_lvl: the network/CIPSO MLS level | ||
846 | * | ||
847 | * Description: | ||
848 | * Perform a label mapping to translate a local MLS level to the correct | ||
849 | * CIPSO level using the given DOI definition. Returns zero on success, | ||
850 | * negative values otherwise. | ||
851 | * | ||
852 | */ | ||
853 | static int cipso_v4_map_lvl_hton(const struct cipso_v4_doi *doi_def, | ||
854 | u32 host_lvl, | ||
855 | u32 *net_lvl) | ||
856 | { | ||
857 | switch (doi_def->type) { | ||
858 | case CIPSO_V4_MAP_PASS: | ||
859 | *net_lvl = host_lvl; | ||
860 | return 0; | ||
861 | case CIPSO_V4_MAP_STD: | ||
862 | if (host_lvl < doi_def->map.std->lvl.local_size) { | ||
863 | *net_lvl = doi_def->map.std->lvl.local[host_lvl]; | ||
864 | return 0; | ||
865 | } | ||
866 | break; | ||
867 | } | ||
868 | |||
869 | return -EINVAL; | ||
870 | } | ||
871 | |||
872 | /** | ||
873 | * cipso_v4_map_lvl_ntoh - Perform a level mapping from the network to the host | ||
874 | * @doi_def: the DOI definition | ||
875 | * @net_lvl: the network/CIPSO MLS level | ||
876 | * @host_lvl: the host MLS level | ||
877 | * | ||
878 | * Description: | ||
879 | * Perform a label mapping to translate a CIPSO level to the correct local MLS | ||
880 | * level using the given DOI definition. Returns zero on success, negative | ||
881 | * values otherwise. | ||
882 | * | ||
883 | */ | ||
884 | static int cipso_v4_map_lvl_ntoh(const struct cipso_v4_doi *doi_def, | ||
885 | u32 net_lvl, | ||
886 | u32 *host_lvl) | ||
887 | { | ||
888 | struct cipso_v4_std_map_tbl *map_tbl; | ||
889 | |||
890 | switch (doi_def->type) { | ||
891 | case CIPSO_V4_MAP_PASS: | ||
892 | *host_lvl = net_lvl; | ||
893 | return 0; | ||
894 | case CIPSO_V4_MAP_STD: | ||
895 | map_tbl = doi_def->map.std; | ||
896 | if (net_lvl < map_tbl->lvl.cipso_size && | ||
897 | map_tbl->lvl.cipso[net_lvl] < CIPSO_V4_INV_LVL) { | ||
898 | *host_lvl = doi_def->map.std->lvl.cipso[net_lvl]; | ||
899 | return 0; | ||
900 | } | ||
901 | break; | ||
902 | } | ||
903 | |||
904 | return -EINVAL; | ||
905 | } | ||
906 | |||
907 | /** | ||
908 | * cipso_v4_map_cat_rbm_valid - Checks to see if the category bitmap is valid | ||
909 | * @doi_def: the DOI definition | ||
910 | * @bitmap: category bitmap | ||
911 | * @bitmap_len: bitmap length in bytes | ||
912 | * | ||
913 | * Description: | ||
914 | * Checks the given category bitmap against the given DOI definition and | ||
915 | * returns a negative value if any of the categories in the bitmap do not have | ||
916 | * a valid mapping and a zero value if all of the categories are valid. | ||
917 | * | ||
918 | */ | ||
919 | static int cipso_v4_map_cat_rbm_valid(const struct cipso_v4_doi *doi_def, | ||
920 | const unsigned char *bitmap, | ||
921 | u32 bitmap_len) | ||
922 | { | ||
923 | int cat = -1; | ||
924 | u32 bitmap_len_bits = bitmap_len * 8; | ||
925 | u32 cipso_cat_size = doi_def->map.std->cat.cipso_size; | ||
926 | u32 *cipso_array = doi_def->map.std->cat.cipso; | ||
927 | |||
928 | switch (doi_def->type) { | ||
929 | case CIPSO_V4_MAP_PASS: | ||
930 | return 0; | ||
931 | case CIPSO_V4_MAP_STD: | ||
932 | for (;;) { | ||
933 | cat = cipso_v4_bitmap_walk(bitmap, | ||
934 | bitmap_len_bits, | ||
935 | cat + 1, | ||
936 | 1); | ||
937 | if (cat < 0) | ||
938 | break; | ||
939 | if (cat >= cipso_cat_size || | ||
940 | cipso_array[cat] >= CIPSO_V4_INV_CAT) | ||
941 | return -EFAULT; | ||
942 | } | ||
943 | |||
944 | if (cat == -1) | ||
945 | return 0; | ||
946 | break; | ||
947 | } | ||
948 | |||
949 | return -EFAULT; | ||
950 | } | ||
951 | |||
952 | /** | ||
953 | * cipso_v4_map_cat_rbm_hton - Perform a category mapping from host to network | ||
954 | * @doi_def: the DOI definition | ||
955 | * @host_cat: the category bitmap in host format | ||
956 | * @host_cat_len: the length of the host's category bitmap in bytes | ||
957 | * @net_cat: the zero'd out category bitmap in network/CIPSO format | ||
958 | * @net_cat_len: the length of the CIPSO bitmap in bytes | ||
959 | * | ||
960 | * Description: | ||
961 | * Perform a label mapping to translate a local MLS category bitmap to the | ||
962 | * correct CIPSO bitmap using the given DOI definition. Returns the minimum | ||
963 | * size in bytes of the network bitmap on success, negative values otherwise. | ||
964 | * | ||
965 | */ | ||
966 | static int cipso_v4_map_cat_rbm_hton(const struct cipso_v4_doi *doi_def, | ||
967 | const unsigned char *host_cat, | ||
968 | u32 host_cat_len, | ||
969 | unsigned char *net_cat, | ||
970 | u32 net_cat_len) | ||
971 | { | ||
972 | int host_spot = -1; | ||
973 | u32 net_spot; | ||
974 | u32 net_spot_max = 0; | ||
975 | u32 host_clen_bits = host_cat_len * 8; | ||
976 | u32 net_clen_bits = net_cat_len * 8; | ||
977 | u32 host_cat_size = doi_def->map.std->cat.local_size; | ||
978 | u32 *host_cat_array = doi_def->map.std->cat.local; | ||
979 | |||
980 | switch (doi_def->type) { | ||
981 | case CIPSO_V4_MAP_PASS: | ||
982 | net_spot_max = host_cat_len - 1; | ||
983 | while (net_spot_max > 0 && host_cat[net_spot_max] == 0) | ||
984 | net_spot_max--; | ||
985 | if (net_spot_max > net_cat_len) | ||
986 | return -EINVAL; | ||
987 | memcpy(net_cat, host_cat, net_spot_max); | ||
988 | return net_spot_max; | ||
989 | case CIPSO_V4_MAP_STD: | ||
990 | for (;;) { | ||
991 | host_spot = cipso_v4_bitmap_walk(host_cat, | ||
992 | host_clen_bits, | ||
993 | host_spot + 1, | ||
994 | 1); | ||
995 | if (host_spot < 0) | ||
996 | break; | ||
997 | if (host_spot >= host_cat_size) | ||
998 | return -EPERM; | ||
999 | |||
1000 | net_spot = host_cat_array[host_spot]; | ||
1001 | if (net_spot >= net_clen_bits) | ||
1002 | return -ENOSPC; | ||
1003 | cipso_v4_bitmap_setbit(net_cat, net_spot, 1); | ||
1004 | |||
1005 | if (net_spot > net_spot_max) | ||
1006 | net_spot_max = net_spot; | ||
1007 | } | ||
1008 | |||
1009 | if (host_spot == -2) | ||
1010 | return -EFAULT; | ||
1011 | |||
1012 | if (++net_spot_max % 8) | ||
1013 | return net_spot_max / 8 + 1; | ||
1014 | return net_spot_max / 8; | ||
1015 | } | ||
1016 | |||
1017 | return -EINVAL; | ||
1018 | } | ||
1019 | |||
1020 | /** | ||
1021 | * cipso_v4_map_cat_rbm_ntoh - Perform a category mapping from network to host | ||
1022 | * @doi_def: the DOI definition | ||
1023 | * @net_cat: the category bitmap in network/CIPSO format | ||
1024 | * @net_cat_len: the length of the CIPSO bitmap in bytes | ||
1025 | * @host_cat: the zero'd out category bitmap in host format | ||
1026 | * @host_cat_len: the length of the host's category bitmap in bytes | ||
1027 | * | ||
1028 | * Description: | ||
1029 | * Perform a label mapping to translate a CIPSO bitmap to the correct local | ||
1030 | * MLS category bitmap using the given DOI definition. Returns the minimum | ||
1031 | * size in bytes of the host bitmap on success, negative values otherwise. | ||
1032 | * | ||
1033 | */ | ||
1034 | static int cipso_v4_map_cat_rbm_ntoh(const struct cipso_v4_doi *doi_def, | ||
1035 | const unsigned char *net_cat, | ||
1036 | u32 net_cat_len, | ||
1037 | unsigned char *host_cat, | ||
1038 | u32 host_cat_len) | ||
1039 | { | ||
1040 | u32 host_spot; | ||
1041 | u32 host_spot_max = 0; | ||
1042 | int net_spot = -1; | ||
1043 | u32 net_clen_bits = net_cat_len * 8; | ||
1044 | u32 host_clen_bits = host_cat_len * 8; | ||
1045 | u32 net_cat_size = doi_def->map.std->cat.cipso_size; | ||
1046 | u32 *net_cat_array = doi_def->map.std->cat.cipso; | ||
1047 | |||
1048 | switch (doi_def->type) { | ||
1049 | case CIPSO_V4_MAP_PASS: | ||
1050 | if (net_cat_len > host_cat_len) | ||
1051 | return -EINVAL; | ||
1052 | memcpy(host_cat, net_cat, net_cat_len); | ||
1053 | return net_cat_len; | ||
1054 | case CIPSO_V4_MAP_STD: | ||
1055 | for (;;) { | ||
1056 | net_spot = cipso_v4_bitmap_walk(net_cat, | ||
1057 | net_clen_bits, | ||
1058 | net_spot + 1, | ||
1059 | 1); | ||
1060 | if (net_spot < 0) | ||
1061 | break; | ||
1062 | if (net_spot >= net_cat_size || | ||
1063 | net_cat_array[net_spot] >= CIPSO_V4_INV_CAT) | ||
1064 | return -EPERM; | ||
1065 | |||
1066 | host_spot = net_cat_array[net_spot]; | ||
1067 | if (host_spot >= host_clen_bits) | ||
1068 | return -ENOSPC; | ||
1069 | cipso_v4_bitmap_setbit(host_cat, host_spot, 1); | ||
1070 | |||
1071 | if (host_spot > host_spot_max) | ||
1072 | host_spot_max = host_spot; | ||
1073 | } | ||
1074 | |||
1075 | if (net_spot == -2) | ||
1076 | return -EFAULT; | ||
1077 | |||
1078 | if (++host_spot_max % 8) | ||
1079 | return host_spot_max / 8 + 1; | ||
1080 | return host_spot_max / 8; | ||
1081 | } | ||
1082 | |||
1083 | return -EINVAL; | ||
1084 | } | ||
1085 | |||
1086 | /* | ||
1087 | * Protocol Handling Functions | ||
1088 | */ | ||
1089 | |||
1090 | #define CIPSO_V4_HDR_LEN 6 | ||
1091 | |||
1092 | /** | ||
1093 | * cipso_v4_gentag_hdr - Generate a CIPSO option header | ||
1094 | * @doi_def: the DOI definition | ||
1095 | * @len: the total tag length in bytes | ||
1096 | * @buf: the CIPSO option buffer | ||
1097 | * | ||
1098 | * Description: | ||
1099 | * Write a CIPSO header into the beginning of @buffer. Return zero on success, | ||
1100 | * negative values on failure. | ||
1101 | * | ||
1102 | */ | ||
1103 | static int cipso_v4_gentag_hdr(const struct cipso_v4_doi *doi_def, | ||
1104 | u32 len, | ||
1105 | unsigned char *buf) | ||
1106 | { | ||
1107 | if (CIPSO_V4_HDR_LEN + len > 40) | ||
1108 | return -ENOSPC; | ||
1109 | |||
1110 | buf[0] = IPOPT_CIPSO; | ||
1111 | buf[1] = CIPSO_V4_HDR_LEN + len; | ||
1112 | *(u32 *)&buf[2] = htonl(doi_def->doi); | ||
1113 | |||
1114 | return 0; | ||
1115 | } | ||
1116 | |||
1117 | #define CIPSO_V4_TAG1_CAT_LEN 30 | ||
1118 | |||
1119 | /** | ||
1120 | * cipso_v4_gentag_rbm - Generate a CIPSO restricted bitmap tag (type #1) | ||
1121 | * @doi_def: the DOI definition | ||
1122 | * @secattr: the security attributes | ||
1123 | * @buffer: the option buffer | ||
1124 | * @buffer_len: length of buffer in bytes | ||
1125 | * | ||
1126 | * Description: | ||
1127 | * Generate a CIPSO option using the restricted bitmap tag, tag type #1. The | ||
1128 | * actual buffer length may be larger than the indicated size due to | ||
1129 | * translation between host and network category bitmaps. Returns zero on | ||
1130 | * success, negative values on failure. | ||
1131 | * | ||
1132 | */ | ||
1133 | static int cipso_v4_gentag_rbm(const struct cipso_v4_doi *doi_def, | ||
1134 | const struct netlbl_lsm_secattr *secattr, | ||
1135 | unsigned char **buffer, | ||
1136 | u32 *buffer_len) | ||
1137 | { | ||
1138 | int ret_val = -EPERM; | ||
1139 | unsigned char *buf = NULL; | ||
1140 | u32 buf_len; | ||
1141 | u32 level; | ||
1142 | |||
1143 | if (secattr->mls_cat) { | ||
1144 | buf = kzalloc(CIPSO_V4_HDR_LEN + 4 + CIPSO_V4_TAG1_CAT_LEN, | ||
1145 | GFP_ATOMIC); | ||
1146 | if (buf == NULL) | ||
1147 | return -ENOMEM; | ||
1148 | |||
1149 | ret_val = cipso_v4_map_cat_rbm_hton(doi_def, | ||
1150 | secattr->mls_cat, | ||
1151 | secattr->mls_cat_len, | ||
1152 | &buf[CIPSO_V4_HDR_LEN + 4], | ||
1153 | CIPSO_V4_TAG1_CAT_LEN); | ||
1154 | if (ret_val < 0) | ||
1155 | goto gentag_failure; | ||
1156 | |||
1157 | /* This will send packets using the "optimized" format when | ||
1158 | * possibile as specified in section 3.4.2.6 of the | ||
1159 | * CIPSO draft. */ | ||
1160 | if (cipso_v4_rbm_optfmt && (ret_val > 0 && ret_val < 10)) | ||
1161 | ret_val = 10; | ||
1162 | |||
1163 | buf_len = 4 + ret_val; | ||
1164 | } else { | ||
1165 | buf = kzalloc(CIPSO_V4_HDR_LEN + 4, GFP_ATOMIC); | ||
1166 | if (buf == NULL) | ||
1167 | return -ENOMEM; | ||
1168 | buf_len = 4; | ||
1169 | } | ||
1170 | |||
1171 | ret_val = cipso_v4_map_lvl_hton(doi_def, secattr->mls_lvl, &level); | ||
1172 | if (ret_val != 0) | ||
1173 | goto gentag_failure; | ||
1174 | |||
1175 | ret_val = cipso_v4_gentag_hdr(doi_def, buf_len, buf); | ||
1176 | if (ret_val != 0) | ||
1177 | goto gentag_failure; | ||
1178 | |||
1179 | buf[CIPSO_V4_HDR_LEN] = 0x01; | ||
1180 | buf[CIPSO_V4_HDR_LEN + 1] = buf_len; | ||
1181 | buf[CIPSO_V4_HDR_LEN + 3] = level; | ||
1182 | |||
1183 | *buffer = buf; | ||
1184 | *buffer_len = CIPSO_V4_HDR_LEN + buf_len; | ||
1185 | |||
1186 | return 0; | ||
1187 | |||
1188 | gentag_failure: | ||
1189 | kfree(buf); | ||
1190 | return ret_val; | ||
1191 | } | ||
1192 | |||
1193 | /** | ||
1194 | * cipso_v4_parsetag_rbm - Parse a CIPSO restricted bitmap tag | ||
1195 | * @doi_def: the DOI definition | ||
1196 | * @tag: the CIPSO tag | ||
1197 | * @secattr: the security attributes | ||
1198 | * | ||
1199 | * Description: | ||
1200 | * Parse a CIPSO restricted bitmap tag (tag type #1) and return the security | ||
1201 | * attributes in @secattr. Return zero on success, negatives values on | ||
1202 | * failure. | ||
1203 | * | ||
1204 | */ | ||
1205 | static int cipso_v4_parsetag_rbm(const struct cipso_v4_doi *doi_def, | ||
1206 | const unsigned char *tag, | ||
1207 | struct netlbl_lsm_secattr *secattr) | ||
1208 | { | ||
1209 | int ret_val; | ||
1210 | u8 tag_len = tag[1]; | ||
1211 | u32 level; | ||
1212 | |||
1213 | ret_val = cipso_v4_map_lvl_ntoh(doi_def, tag[3], &level); | ||
1214 | if (ret_val != 0) | ||
1215 | return ret_val; | ||
1216 | secattr->mls_lvl = level; | ||
1217 | secattr->mls_lvl_vld = 1; | ||
1218 | |||
1219 | if (tag_len > 4) { | ||
1220 | switch (doi_def->type) { | ||
1221 | case CIPSO_V4_MAP_PASS: | ||
1222 | secattr->mls_cat_len = tag_len - 4; | ||
1223 | break; | ||
1224 | case CIPSO_V4_MAP_STD: | ||
1225 | secattr->mls_cat_len = | ||
1226 | doi_def->map.std->cat.local_size; | ||
1227 | break; | ||
1228 | } | ||
1229 | secattr->mls_cat = kzalloc(secattr->mls_cat_len, GFP_ATOMIC); | ||
1230 | if (secattr->mls_cat == NULL) | ||
1231 | return -ENOMEM; | ||
1232 | |||
1233 | ret_val = cipso_v4_map_cat_rbm_ntoh(doi_def, | ||
1234 | &tag[4], | ||
1235 | tag_len - 4, | ||
1236 | secattr->mls_cat, | ||
1237 | secattr->mls_cat_len); | ||
1238 | if (ret_val < 0) { | ||
1239 | kfree(secattr->mls_cat); | ||
1240 | return ret_val; | ||
1241 | } | ||
1242 | secattr->mls_cat_len = ret_val; | ||
1243 | } | ||
1244 | |||
1245 | return 0; | ||
1246 | } | ||
1247 | |||
1248 | /** | ||
1249 | * cipso_v4_validate - Validate a CIPSO option | ||
1250 | * @option: the start of the option, on error it is set to point to the error | ||
1251 | * | ||
1252 | * Description: | ||
1253 | * This routine is called to validate a CIPSO option, it checks all of the | ||
1254 | * fields to ensure that they are at least valid, see the draft snippet below | ||
1255 | * for details. If the option is valid then a zero value is returned and | ||
1256 | * the value of @option is unchanged. If the option is invalid then a | ||
1257 | * non-zero value is returned and @option is adjusted to point to the | ||
1258 | * offending portion of the option. From the IETF draft ... | ||
1259 | * | ||
1260 | * "If any field within the CIPSO options, such as the DOI identifier, is not | ||
1261 | * recognized the IP datagram is discarded and an ICMP 'parameter problem' | ||
1262 | * (type 12) is generated and returned. The ICMP code field is set to 'bad | ||
1263 | * parameter' (code 0) and the pointer is set to the start of the CIPSO field | ||
1264 | * that is unrecognized." | ||
1265 | * | ||
1266 | */ | ||
1267 | int cipso_v4_validate(unsigned char **option) | ||
1268 | { | ||
1269 | unsigned char *opt = *option; | ||
1270 | unsigned char *tag; | ||
1271 | unsigned char opt_iter; | ||
1272 | unsigned char err_offset = 0; | ||
1273 | u8 opt_len; | ||
1274 | u8 tag_len; | ||
1275 | struct cipso_v4_doi *doi_def = NULL; | ||
1276 | u32 tag_iter; | ||
1277 | |||
1278 | /* caller already checks for length values that are too large */ | ||
1279 | opt_len = opt[1]; | ||
1280 | if (opt_len < 8) { | ||
1281 | err_offset = 1; | ||
1282 | goto validate_return; | ||
1283 | } | ||
1284 | |||
1285 | rcu_read_lock(); | ||
1286 | doi_def = cipso_v4_doi_getdef(ntohl(*((u32 *)&opt[2]))); | ||
1287 | if (doi_def == NULL) { | ||
1288 | err_offset = 2; | ||
1289 | goto validate_return_locked; | ||
1290 | } | ||
1291 | |||
1292 | opt_iter = 6; | ||
1293 | tag = opt + opt_iter; | ||
1294 | while (opt_iter < opt_len) { | ||
1295 | for (tag_iter = 0; doi_def->tags[tag_iter] != tag[0];) | ||
1296 | if (doi_def->tags[tag_iter] == CIPSO_V4_TAG_INVALID || | ||
1297 | ++tag_iter == CIPSO_V4_TAG_MAXCNT) { | ||
1298 | err_offset = opt_iter; | ||
1299 | goto validate_return_locked; | ||
1300 | } | ||
1301 | |||
1302 | tag_len = tag[1]; | ||
1303 | if (tag_len > (opt_len - opt_iter)) { | ||
1304 | err_offset = opt_iter + 1; | ||
1305 | goto validate_return_locked; | ||
1306 | } | ||
1307 | |||
1308 | switch (tag[0]) { | ||
1309 | case CIPSO_V4_TAG_RBITMAP: | ||
1310 | if (tag_len < 4) { | ||
1311 | err_offset = opt_iter + 1; | ||
1312 | goto validate_return_locked; | ||
1313 | } | ||
1314 | |||
1315 | /* We are already going to do all the verification | ||
1316 | * necessary at the socket layer so from our point of | ||
1317 | * view it is safe to turn these checks off (and less | ||
1318 | * work), however, the CIPSO draft says we should do | ||
1319 | * all the CIPSO validations here but it doesn't | ||
1320 | * really specify _exactly_ what we need to validate | ||
1321 | * ... so, just make it a sysctl tunable. */ | ||
1322 | if (cipso_v4_rbm_strictvalid) { | ||
1323 | if (cipso_v4_map_lvl_valid(doi_def, | ||
1324 | tag[3]) < 0) { | ||
1325 | err_offset = opt_iter + 3; | ||
1326 | goto validate_return_locked; | ||
1327 | } | ||
1328 | if (tag_len > 4 && | ||
1329 | cipso_v4_map_cat_rbm_valid(doi_def, | ||
1330 | &tag[4], | ||
1331 | tag_len - 4) < 0) { | ||
1332 | err_offset = opt_iter + 4; | ||
1333 | goto validate_return_locked; | ||
1334 | } | ||
1335 | } | ||
1336 | break; | ||
1337 | default: | ||
1338 | err_offset = opt_iter; | ||
1339 | goto validate_return_locked; | ||
1340 | } | ||
1341 | |||
1342 | tag += tag_len; | ||
1343 | opt_iter += tag_len; | ||
1344 | } | ||
1345 | |||
1346 | validate_return_locked: | ||
1347 | rcu_read_unlock(); | ||
1348 | validate_return: | ||
1349 | *option = opt + err_offset; | ||
1350 | return err_offset; | ||
1351 | } | ||
1352 | |||
1353 | /** | ||
1354 | * cipso_v4_error - Send the correct reponse for a bad packet | ||
1355 | * @skb: the packet | ||
1356 | * @error: the error code | ||
1357 | * @gateway: CIPSO gateway flag | ||
1358 | * | ||
1359 | * Description: | ||
1360 | * Based on the error code given in @error, send an ICMP error message back to | ||
1361 | * the originating host. From the IETF draft ... | ||
1362 | * | ||
1363 | * "If the contents of the CIPSO [option] are valid but the security label is | ||
1364 | * outside of the configured host or port label range, the datagram is | ||
1365 | * discarded and an ICMP 'destination unreachable' (type 3) is generated and | ||
1366 | * returned. The code field of the ICMP is set to 'communication with | ||
1367 | * destination network administratively prohibited' (code 9) or to | ||
1368 | * 'communication with destination host administratively prohibited' | ||
1369 | * (code 10). The value of the code is dependent on whether the originator | ||
1370 | * of the ICMP message is acting as a CIPSO host or a CIPSO gateway. The | ||
1371 | * recipient of the ICMP message MUST be able to handle either value. The | ||
1372 | * same procedure is performed if a CIPSO [option] can not be added to an | ||
1373 | * IP packet because it is too large to fit in the IP options area." | ||
1374 | * | ||
1375 | * "If the error is triggered by receipt of an ICMP message, the message is | ||
1376 | * discarded and no response is permitted (consistent with general ICMP | ||
1377 | * processing rules)." | ||
1378 | * | ||
1379 | */ | ||
1380 | void cipso_v4_error(struct sk_buff *skb, int error, u32 gateway) | ||
1381 | { | ||
1382 | if (skb->nh.iph->protocol == IPPROTO_ICMP || error != -EACCES) | ||
1383 | return; | ||
1384 | |||
1385 | if (gateway) | ||
1386 | icmp_send(skb, ICMP_DEST_UNREACH, ICMP_NET_ANO, 0); | ||
1387 | else | ||
1388 | icmp_send(skb, ICMP_DEST_UNREACH, ICMP_HOST_ANO, 0); | ||
1389 | } | ||
1390 | |||
1391 | /** | ||
1392 | * cipso_v4_socket_setattr - Add a CIPSO option to a socket | ||
1393 | * @sock: the socket | ||
1394 | * @doi_def: the CIPSO DOI to use | ||
1395 | * @secattr: the specific security attributes of the socket | ||
1396 | * | ||
1397 | * Description: | ||
1398 | * Set the CIPSO option on the given socket using the DOI definition and | ||
1399 | * security attributes passed to the function. This function requires | ||
1400 | * exclusive access to @sock->sk, which means it either needs to be in the | ||
1401 | * process of being created or locked via lock_sock(sock->sk). Returns zero on | ||
1402 | * success and negative values on failure. | ||
1403 | * | ||
1404 | */ | ||
1405 | int cipso_v4_socket_setattr(const struct socket *sock, | ||
1406 | const struct cipso_v4_doi *doi_def, | ||
1407 | const struct netlbl_lsm_secattr *secattr) | ||
1408 | { | ||
1409 | int ret_val = -EPERM; | ||
1410 | u32 iter; | ||
1411 | unsigned char *buf = NULL; | ||
1412 | u32 buf_len = 0; | ||
1413 | u32 opt_len; | ||
1414 | struct ip_options *opt = NULL; | ||
1415 | struct sock *sk; | ||
1416 | struct inet_sock *sk_inet; | ||
1417 | struct inet_connection_sock *sk_conn; | ||
1418 | |||
1419 | /* In the case of sock_create_lite(), the sock->sk field is not | ||
1420 | * defined yet but it is not a problem as the only users of these | ||
1421 | * "lite" PF_INET sockets are functions which do an accept() call | ||
1422 | * afterwards so we will label the socket as part of the accept(). */ | ||
1423 | sk = sock->sk; | ||
1424 | if (sk == NULL) | ||
1425 | return 0; | ||
1426 | |||
1427 | /* XXX - This code assumes only one tag per CIPSO option which isn't | ||
1428 | * really a good assumption to make but since we only support the MAC | ||
1429 | * tags right now it is a safe assumption. */ | ||
1430 | iter = 0; | ||
1431 | do { | ||
1432 | switch (doi_def->tags[iter]) { | ||
1433 | case CIPSO_V4_TAG_RBITMAP: | ||
1434 | ret_val = cipso_v4_gentag_rbm(doi_def, | ||
1435 | secattr, | ||
1436 | &buf, | ||
1437 | &buf_len); | ||
1438 | break; | ||
1439 | default: | ||
1440 | ret_val = -EPERM; | ||
1441 | goto socket_setattr_failure; | ||
1442 | } | ||
1443 | |||
1444 | iter++; | ||
1445 | } while (ret_val != 0 && | ||
1446 | iter < CIPSO_V4_TAG_MAXCNT && | ||
1447 | doi_def->tags[iter] != CIPSO_V4_TAG_INVALID); | ||
1448 | if (ret_val != 0) | ||
1449 | goto socket_setattr_failure; | ||
1450 | |||
1451 | /* We can't use ip_options_get() directly because it makes a call to | ||
1452 | * ip_options_get_alloc() which allocates memory with GFP_KERNEL and | ||
1453 | * we can't block here. */ | ||
1454 | opt_len = (buf_len + 3) & ~3; | ||
1455 | opt = kzalloc(sizeof(*opt) + opt_len, GFP_ATOMIC); | ||
1456 | if (opt == NULL) { | ||
1457 | ret_val = -ENOMEM; | ||
1458 | goto socket_setattr_failure; | ||
1459 | } | ||
1460 | memcpy(opt->__data, buf, buf_len); | ||
1461 | opt->optlen = opt_len; | ||
1462 | opt->is_data = 1; | ||
1463 | kfree(buf); | ||
1464 | buf = NULL; | ||
1465 | ret_val = ip_options_compile(opt, NULL); | ||
1466 | if (ret_val != 0) | ||
1467 | goto socket_setattr_failure; | ||
1468 | |||
1469 | sk_inet = inet_sk(sk); | ||
1470 | if (sk_inet->is_icsk) { | ||
1471 | sk_conn = inet_csk(sk); | ||
1472 | if (sk_inet->opt) | ||
1473 | sk_conn->icsk_ext_hdr_len -= sk_inet->opt->optlen; | ||
1474 | sk_conn->icsk_ext_hdr_len += opt->optlen; | ||
1475 | sk_conn->icsk_sync_mss(sk, sk_conn->icsk_pmtu_cookie); | ||
1476 | } | ||
1477 | opt = xchg(&sk_inet->opt, opt); | ||
1478 | kfree(opt); | ||
1479 | |||
1480 | return 0; | ||
1481 | |||
1482 | socket_setattr_failure: | ||
1483 | kfree(buf); | ||
1484 | kfree(opt); | ||
1485 | return ret_val; | ||
1486 | } | ||
1487 | |||
1488 | /** | ||
1489 | * cipso_v4_socket_getattr - Get the security attributes from a socket | ||
1490 | * @sock: the socket | ||
1491 | * @secattr: the security attributes | ||
1492 | * | ||
1493 | * Description: | ||
1494 | * Query @sock to see if there is a CIPSO option attached to the socket and if | ||
1495 | * there is return the CIPSO security attributes in @secattr. Returns zero on | ||
1496 | * success and negative values on failure. | ||
1497 | * | ||
1498 | */ | ||
1499 | int cipso_v4_socket_getattr(const struct socket *sock, | ||
1500 | struct netlbl_lsm_secattr *secattr) | ||
1501 | { | ||
1502 | int ret_val = -ENOMSG; | ||
1503 | struct sock *sk; | ||
1504 | struct inet_sock *sk_inet; | ||
1505 | unsigned char *cipso_ptr; | ||
1506 | u32 doi; | ||
1507 | struct cipso_v4_doi *doi_def; | ||
1508 | |||
1509 | sk = sock->sk; | ||
1510 | lock_sock(sk); | ||
1511 | sk_inet = inet_sk(sk); | ||
1512 | if (sk_inet->opt == NULL || sk_inet->opt->cipso == 0) | ||
1513 | goto socket_getattr_return; | ||
1514 | cipso_ptr = sk_inet->opt->__data + sk_inet->opt->cipso - | ||
1515 | sizeof(struct iphdr); | ||
1516 | ret_val = cipso_v4_cache_check(cipso_ptr, cipso_ptr[1], secattr); | ||
1517 | if (ret_val == 0) | ||
1518 | goto socket_getattr_return; | ||
1519 | |||
1520 | doi = ntohl(*(u32 *)&cipso_ptr[2]); | ||
1521 | rcu_read_lock(); | ||
1522 | doi_def = cipso_v4_doi_getdef(doi); | ||
1523 | if (doi_def == NULL) { | ||
1524 | rcu_read_unlock(); | ||
1525 | goto socket_getattr_return; | ||
1526 | } | ||
1527 | switch (cipso_ptr[6]) { | ||
1528 | case CIPSO_V4_TAG_RBITMAP: | ||
1529 | ret_val = cipso_v4_parsetag_rbm(doi_def, | ||
1530 | &cipso_ptr[6], | ||
1531 | secattr); | ||
1532 | break; | ||
1533 | } | ||
1534 | rcu_read_unlock(); | ||
1535 | |||
1536 | socket_getattr_return: | ||
1537 | release_sock(sk); | ||
1538 | return ret_val; | ||
1539 | } | ||
1540 | |||
1541 | /** | ||
1542 | * cipso_v4_skbuff_getattr - Get the security attributes from the CIPSO option | ||
1543 | * @skb: the packet | ||
1544 | * @secattr: the security attributes | ||
1545 | * | ||
1546 | * Description: | ||
1547 | * Parse the given packet's CIPSO option and return the security attributes. | ||
1548 | * Returns zero on success and negative values on failure. | ||
1549 | * | ||
1550 | */ | ||
1551 | int cipso_v4_skbuff_getattr(const struct sk_buff *skb, | ||
1552 | struct netlbl_lsm_secattr *secattr) | ||
1553 | { | ||
1554 | int ret_val = -ENOMSG; | ||
1555 | unsigned char *cipso_ptr; | ||
1556 | u32 doi; | ||
1557 | struct cipso_v4_doi *doi_def; | ||
1558 | |||
1559 | if (!CIPSO_V4_OPTEXIST(skb)) | ||
1560 | return -ENOMSG; | ||
1561 | cipso_ptr = CIPSO_V4_OPTPTR(skb); | ||
1562 | if (cipso_v4_cache_check(cipso_ptr, cipso_ptr[1], secattr) == 0) | ||
1563 | return 0; | ||
1564 | |||
1565 | doi = ntohl(*(u32 *)&cipso_ptr[2]); | ||
1566 | rcu_read_lock(); | ||
1567 | doi_def = cipso_v4_doi_getdef(doi); | ||
1568 | if (doi_def == NULL) | ||
1569 | goto skbuff_getattr_return; | ||
1570 | switch (cipso_ptr[6]) { | ||
1571 | case CIPSO_V4_TAG_RBITMAP: | ||
1572 | ret_val = cipso_v4_parsetag_rbm(doi_def, | ||
1573 | &cipso_ptr[6], | ||
1574 | secattr); | ||
1575 | break; | ||
1576 | } | ||
1577 | |||
1578 | skbuff_getattr_return: | ||
1579 | rcu_read_unlock(); | ||
1580 | return ret_val; | ||
1581 | } | ||
1582 | |||
1583 | /* | ||
1584 | * Setup Functions | ||
1585 | */ | ||
1586 | |||
1587 | /** | ||
1588 | * cipso_v4_init - Initialize the CIPSO module | ||
1589 | * | ||
1590 | * Description: | ||
1591 | * Initialize the CIPSO module and prepare it for use. Returns zero on success | ||
1592 | * and negative values on failure. | ||
1593 | * | ||
1594 | */ | ||
1595 | static int __init cipso_v4_init(void) | ||
1596 | { | ||
1597 | int ret_val; | ||
1598 | |||
1599 | ret_val = cipso_v4_cache_init(); | ||
1600 | if (ret_val != 0) | ||
1601 | panic("Failed to initialize the CIPSO/IPv4 cache (%d)\n", | ||
1602 | ret_val); | ||
1603 | |||
1604 | return 0; | ||
1605 | } | ||
1606 | |||
1607 | subsys_initcall(cipso_v4_init); | ||
diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c index a6cc31d911eb..8e8d1f17d77a 100644 --- a/net/ipv4/devinet.c +++ b/net/ipv4/devinet.c | |||
@@ -43,6 +43,7 @@ | |||
43 | #include <linux/in.h> | 43 | #include <linux/in.h> |
44 | #include <linux/errno.h> | 44 | #include <linux/errno.h> |
45 | #include <linux/interrupt.h> | 45 | #include <linux/interrupt.h> |
46 | #include <linux/if_addr.h> | ||
46 | #include <linux/if_ether.h> | 47 | #include <linux/if_ether.h> |
47 | #include <linux/inet.h> | 48 | #include <linux/inet.h> |
48 | #include <linux/netdevice.h> | 49 | #include <linux/netdevice.h> |
@@ -62,6 +63,7 @@ | |||
62 | #include <net/ip.h> | 63 | #include <net/ip.h> |
63 | #include <net/route.h> | 64 | #include <net/route.h> |
64 | #include <net/ip_fib.h> | 65 | #include <net/ip_fib.h> |
66 | #include <net/netlink.h> | ||
65 | 67 | ||
66 | struct ipv4_devconf ipv4_devconf = { | 68 | struct ipv4_devconf ipv4_devconf = { |
67 | .accept_redirects = 1, | 69 | .accept_redirects = 1, |
@@ -78,7 +80,15 @@ static struct ipv4_devconf ipv4_devconf_dflt = { | |||
78 | .accept_source_route = 1, | 80 | .accept_source_route = 1, |
79 | }; | 81 | }; |
80 | 82 | ||
81 | static void rtmsg_ifa(int event, struct in_ifaddr *); | 83 | static struct nla_policy ifa_ipv4_policy[IFA_MAX+1] __read_mostly = { |
84 | [IFA_LOCAL] = { .type = NLA_U32 }, | ||
85 | [IFA_ADDRESS] = { .type = NLA_U32 }, | ||
86 | [IFA_BROADCAST] = { .type = NLA_U32 }, | ||
87 | [IFA_ANYCAST] = { .type = NLA_U32 }, | ||
88 | [IFA_LABEL] = { .type = NLA_STRING, .len = IFNAMSIZ - 1 }, | ||
89 | }; | ||
90 | |||
91 | static void rtmsg_ifa(int event, struct in_ifaddr *, struct nlmsghdr *, u32); | ||
82 | 92 | ||
83 | static BLOCKING_NOTIFIER_HEAD(inetaddr_chain); | 93 | static BLOCKING_NOTIFIER_HEAD(inetaddr_chain); |
84 | static void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap, | 94 | static void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap, |
@@ -229,8 +239,8 @@ int inet_addr_onlink(struct in_device *in_dev, u32 a, u32 b) | |||
229 | return 0; | 239 | return 0; |
230 | } | 240 | } |
231 | 241 | ||
232 | static void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap, | 242 | static void __inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap, |
233 | int destroy) | 243 | int destroy, struct nlmsghdr *nlh, u32 pid) |
234 | { | 244 | { |
235 | struct in_ifaddr *promote = NULL; | 245 | struct in_ifaddr *promote = NULL; |
236 | struct in_ifaddr *ifa, *ifa1 = *ifap; | 246 | struct in_ifaddr *ifa, *ifa1 = *ifap; |
@@ -263,7 +273,7 @@ static void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap, | |||
263 | if (!do_promote) { | 273 | if (!do_promote) { |
264 | *ifap1 = ifa->ifa_next; | 274 | *ifap1 = ifa->ifa_next; |
265 | 275 | ||
266 | rtmsg_ifa(RTM_DELADDR, ifa); | 276 | rtmsg_ifa(RTM_DELADDR, ifa, nlh, pid); |
267 | blocking_notifier_call_chain(&inetaddr_chain, | 277 | blocking_notifier_call_chain(&inetaddr_chain, |
268 | NETDEV_DOWN, ifa); | 278 | NETDEV_DOWN, ifa); |
269 | inet_free_ifa(ifa); | 279 | inet_free_ifa(ifa); |
@@ -288,7 +298,7 @@ static void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap, | |||
288 | is valid, it will try to restore deleted routes... Grr. | 298 | is valid, it will try to restore deleted routes... Grr. |
289 | So that, this order is correct. | 299 | So that, this order is correct. |
290 | */ | 300 | */ |
291 | rtmsg_ifa(RTM_DELADDR, ifa1); | 301 | rtmsg_ifa(RTM_DELADDR, ifa1, nlh, pid); |
292 | blocking_notifier_call_chain(&inetaddr_chain, NETDEV_DOWN, ifa1); | 302 | blocking_notifier_call_chain(&inetaddr_chain, NETDEV_DOWN, ifa1); |
293 | 303 | ||
294 | if (promote) { | 304 | if (promote) { |
@@ -300,7 +310,7 @@ static void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap, | |||
300 | } | 310 | } |
301 | 311 | ||
302 | promote->ifa_flags &= ~IFA_F_SECONDARY; | 312 | promote->ifa_flags &= ~IFA_F_SECONDARY; |
303 | rtmsg_ifa(RTM_NEWADDR, promote); | 313 | rtmsg_ifa(RTM_NEWADDR, promote, nlh, pid); |
304 | blocking_notifier_call_chain(&inetaddr_chain, | 314 | blocking_notifier_call_chain(&inetaddr_chain, |
305 | NETDEV_UP, promote); | 315 | NETDEV_UP, promote); |
306 | for (ifa = promote->ifa_next; ifa; ifa = ifa->ifa_next) { | 316 | for (ifa = promote->ifa_next; ifa; ifa = ifa->ifa_next) { |
@@ -319,7 +329,14 @@ static void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap, | |||
319 | } | 329 | } |
320 | } | 330 | } |
321 | 331 | ||
322 | static int inet_insert_ifa(struct in_ifaddr *ifa) | 332 | static void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap, |
333 | int destroy) | ||
334 | { | ||
335 | __inet_del_ifa(in_dev, ifap, destroy, NULL, 0); | ||
336 | } | ||
337 | |||
338 | static int __inet_insert_ifa(struct in_ifaddr *ifa, struct nlmsghdr *nlh, | ||
339 | u32 pid) | ||
323 | { | 340 | { |
324 | struct in_device *in_dev = ifa->ifa_dev; | 341 | struct in_device *in_dev = ifa->ifa_dev; |
325 | struct in_ifaddr *ifa1, **ifap, **last_primary; | 342 | struct in_ifaddr *ifa1, **ifap, **last_primary; |
@@ -364,12 +381,17 @@ static int inet_insert_ifa(struct in_ifaddr *ifa) | |||
364 | /* Send message first, then call notifier. | 381 | /* Send message first, then call notifier. |
365 | Notifier will trigger FIB update, so that | 382 | Notifier will trigger FIB update, so that |
366 | listeners of netlink will know about new ifaddr */ | 383 | listeners of netlink will know about new ifaddr */ |
367 | rtmsg_ifa(RTM_NEWADDR, ifa); | 384 | rtmsg_ifa(RTM_NEWADDR, ifa, nlh, pid); |
368 | blocking_notifier_call_chain(&inetaddr_chain, NETDEV_UP, ifa); | 385 | blocking_notifier_call_chain(&inetaddr_chain, NETDEV_UP, ifa); |
369 | 386 | ||
370 | return 0; | 387 | return 0; |
371 | } | 388 | } |
372 | 389 | ||
390 | static int inet_insert_ifa(struct in_ifaddr *ifa) | ||
391 | { | ||
392 | return __inet_insert_ifa(ifa, NULL, 0); | ||
393 | } | ||
394 | |||
373 | static int inet_set_ifa(struct net_device *dev, struct in_ifaddr *ifa) | 395 | static int inet_set_ifa(struct net_device *dev, struct in_ifaddr *ifa) |
374 | { | 396 | { |
375 | struct in_device *in_dev = __in_dev_get_rtnl(dev); | 397 | struct in_device *in_dev = __in_dev_get_rtnl(dev); |
@@ -421,87 +443,134 @@ struct in_ifaddr *inet_ifa_byprefix(struct in_device *in_dev, u32 prefix, | |||
421 | 443 | ||
422 | static int inet_rtm_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) | 444 | static int inet_rtm_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) |
423 | { | 445 | { |
424 | struct rtattr **rta = arg; | 446 | struct nlattr *tb[IFA_MAX+1]; |
425 | struct in_device *in_dev; | 447 | struct in_device *in_dev; |
426 | struct ifaddrmsg *ifm = NLMSG_DATA(nlh); | 448 | struct ifaddrmsg *ifm; |
427 | struct in_ifaddr *ifa, **ifap; | 449 | struct in_ifaddr *ifa, **ifap; |
450 | int err = -EINVAL; | ||
428 | 451 | ||
429 | ASSERT_RTNL(); | 452 | ASSERT_RTNL(); |
430 | 453 | ||
431 | if ((in_dev = inetdev_by_index(ifm->ifa_index)) == NULL) | 454 | err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv4_policy); |
432 | goto out; | 455 | if (err < 0) |
456 | goto errout; | ||
457 | |||
458 | ifm = nlmsg_data(nlh); | ||
459 | in_dev = inetdev_by_index(ifm->ifa_index); | ||
460 | if (in_dev == NULL) { | ||
461 | err = -ENODEV; | ||
462 | goto errout; | ||
463 | } | ||
464 | |||
433 | __in_dev_put(in_dev); | 465 | __in_dev_put(in_dev); |
434 | 466 | ||
435 | for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL; | 467 | for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL; |
436 | ifap = &ifa->ifa_next) { | 468 | ifap = &ifa->ifa_next) { |
437 | if ((rta[IFA_LOCAL - 1] && | 469 | if (tb[IFA_LOCAL] && |
438 | memcmp(RTA_DATA(rta[IFA_LOCAL - 1]), | 470 | ifa->ifa_local != nla_get_u32(tb[IFA_LOCAL])) |
439 | &ifa->ifa_local, 4)) || | 471 | continue; |
440 | (rta[IFA_LABEL - 1] && | 472 | |
441 | rtattr_strcmp(rta[IFA_LABEL - 1], ifa->ifa_label)) || | 473 | if (tb[IFA_LABEL] && nla_strcmp(tb[IFA_LABEL], ifa->ifa_label)) |
442 | (rta[IFA_ADDRESS - 1] && | 474 | continue; |
443 | (ifm->ifa_prefixlen != ifa->ifa_prefixlen || | 475 | |
444 | !inet_ifa_match(*(u32*)RTA_DATA(rta[IFA_ADDRESS - 1]), | 476 | if (tb[IFA_ADDRESS] && |
445 | ifa)))) | 477 | (ifm->ifa_prefixlen != ifa->ifa_prefixlen || |
478 | !inet_ifa_match(nla_get_u32(tb[IFA_ADDRESS]), ifa))) | ||
446 | continue; | 479 | continue; |
447 | inet_del_ifa(in_dev, ifap, 1); | 480 | |
481 | __inet_del_ifa(in_dev, ifap, 1, nlh, NETLINK_CB(skb).pid); | ||
448 | return 0; | 482 | return 0; |
449 | } | 483 | } |
450 | out: | 484 | |
451 | return -EADDRNOTAVAIL; | 485 | err = -EADDRNOTAVAIL; |
486 | errout: | ||
487 | return err; | ||
452 | } | 488 | } |
453 | 489 | ||
454 | static int inet_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) | 490 | static struct in_ifaddr *rtm_to_ifaddr(struct nlmsghdr *nlh) |
455 | { | 491 | { |
456 | struct rtattr **rta = arg; | 492 | struct nlattr *tb[IFA_MAX+1]; |
493 | struct in_ifaddr *ifa; | ||
494 | struct ifaddrmsg *ifm; | ||
457 | struct net_device *dev; | 495 | struct net_device *dev; |
458 | struct in_device *in_dev; | 496 | struct in_device *in_dev; |
459 | struct ifaddrmsg *ifm = NLMSG_DATA(nlh); | 497 | int err = -EINVAL; |
460 | struct in_ifaddr *ifa; | ||
461 | int rc = -EINVAL; | ||
462 | 498 | ||
463 | ASSERT_RTNL(); | 499 | err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv4_policy); |
500 | if (err < 0) | ||
501 | goto errout; | ||
464 | 502 | ||
465 | if (ifm->ifa_prefixlen > 32 || !rta[IFA_LOCAL - 1]) | 503 | ifm = nlmsg_data(nlh); |
466 | goto out; | 504 | if (ifm->ifa_prefixlen > 32 || tb[IFA_LOCAL] == NULL) |
505 | goto errout; | ||
467 | 506 | ||
468 | rc = -ENODEV; | 507 | dev = __dev_get_by_index(ifm->ifa_index); |
469 | if ((dev = __dev_get_by_index(ifm->ifa_index)) == NULL) | 508 | if (dev == NULL) { |
470 | goto out; | 509 | err = -ENODEV; |
510 | goto errout; | ||
511 | } | ||
471 | 512 | ||
472 | rc = -ENOBUFS; | 513 | in_dev = __in_dev_get_rtnl(dev); |
473 | if ((in_dev = __in_dev_get_rtnl(dev)) == NULL) { | 514 | if (in_dev == NULL) { |
474 | in_dev = inetdev_init(dev); | 515 | in_dev = inetdev_init(dev); |
475 | if (!in_dev) | 516 | if (in_dev == NULL) { |
476 | goto out; | 517 | err = -ENOBUFS; |
518 | goto errout; | ||
519 | } | ||
477 | } | 520 | } |
478 | 521 | ||
479 | if ((ifa = inet_alloc_ifa()) == NULL) | 522 | ifa = inet_alloc_ifa(); |
480 | goto out; | 523 | if (ifa == NULL) { |
524 | /* | ||
525 | * A potential indev allocation can be left alive, it stays | ||
526 | * assigned to its device and is destroy with it. | ||
527 | */ | ||
528 | err = -ENOBUFS; | ||
529 | goto errout; | ||
530 | } | ||
531 | |||
532 | in_dev_hold(in_dev); | ||
533 | |||
534 | if (tb[IFA_ADDRESS] == NULL) | ||
535 | tb[IFA_ADDRESS] = tb[IFA_LOCAL]; | ||
481 | 536 | ||
482 | if (!rta[IFA_ADDRESS - 1]) | ||
483 | rta[IFA_ADDRESS - 1] = rta[IFA_LOCAL - 1]; | ||
484 | memcpy(&ifa->ifa_local, RTA_DATA(rta[IFA_LOCAL - 1]), 4); | ||
485 | memcpy(&ifa->ifa_address, RTA_DATA(rta[IFA_ADDRESS - 1]), 4); | ||
486 | ifa->ifa_prefixlen = ifm->ifa_prefixlen; | 537 | ifa->ifa_prefixlen = ifm->ifa_prefixlen; |
487 | ifa->ifa_mask = inet_make_mask(ifm->ifa_prefixlen); | 538 | ifa->ifa_mask = inet_make_mask(ifm->ifa_prefixlen); |
488 | if (rta[IFA_BROADCAST - 1]) | ||
489 | memcpy(&ifa->ifa_broadcast, | ||
490 | RTA_DATA(rta[IFA_BROADCAST - 1]), 4); | ||
491 | if (rta[IFA_ANYCAST - 1]) | ||
492 | memcpy(&ifa->ifa_anycast, RTA_DATA(rta[IFA_ANYCAST - 1]), 4); | ||
493 | ifa->ifa_flags = ifm->ifa_flags; | 539 | ifa->ifa_flags = ifm->ifa_flags; |
494 | ifa->ifa_scope = ifm->ifa_scope; | 540 | ifa->ifa_scope = ifm->ifa_scope; |
495 | in_dev_hold(in_dev); | 541 | ifa->ifa_dev = in_dev; |
496 | ifa->ifa_dev = in_dev; | 542 | |
497 | if (rta[IFA_LABEL - 1]) | 543 | ifa->ifa_local = nla_get_u32(tb[IFA_LOCAL]); |
498 | rtattr_strlcpy(ifa->ifa_label, rta[IFA_LABEL - 1], IFNAMSIZ); | 544 | ifa->ifa_address = nla_get_u32(tb[IFA_ADDRESS]); |
545 | |||
546 | if (tb[IFA_BROADCAST]) | ||
547 | ifa->ifa_broadcast = nla_get_u32(tb[IFA_BROADCAST]); | ||
548 | |||
549 | if (tb[IFA_ANYCAST]) | ||
550 | ifa->ifa_anycast = nla_get_u32(tb[IFA_ANYCAST]); | ||
551 | |||
552 | if (tb[IFA_LABEL]) | ||
553 | nla_strlcpy(ifa->ifa_label, tb[IFA_LABEL], IFNAMSIZ); | ||
499 | else | 554 | else |
500 | memcpy(ifa->ifa_label, dev->name, IFNAMSIZ); | 555 | memcpy(ifa->ifa_label, dev->name, IFNAMSIZ); |
501 | 556 | ||
502 | rc = inet_insert_ifa(ifa); | 557 | return ifa; |
503 | out: | 558 | |
504 | return rc; | 559 | errout: |
560 | return ERR_PTR(err); | ||
561 | } | ||
562 | |||
563 | static int inet_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) | ||
564 | { | ||
565 | struct in_ifaddr *ifa; | ||
566 | |||
567 | ASSERT_RTNL(); | ||
568 | |||
569 | ifa = rtm_to_ifaddr(nlh); | ||
570 | if (IS_ERR(ifa)) | ||
571 | return PTR_ERR(ifa); | ||
572 | |||
573 | return __inet_insert_ifa(ifa, nlh, NETLINK_CB(skb).pid); | ||
505 | } | 574 | } |
506 | 575 | ||
507 | /* | 576 | /* |
@@ -1056,32 +1125,37 @@ static int inet_fill_ifaddr(struct sk_buff *skb, struct in_ifaddr *ifa, | |||
1056 | { | 1125 | { |
1057 | struct ifaddrmsg *ifm; | 1126 | struct ifaddrmsg *ifm; |
1058 | struct nlmsghdr *nlh; | 1127 | struct nlmsghdr *nlh; |
1059 | unsigned char *b = skb->tail; | ||
1060 | 1128 | ||
1061 | nlh = NLMSG_NEW(skb, pid, seq, event, sizeof(*ifm), flags); | 1129 | nlh = nlmsg_put(skb, pid, seq, event, sizeof(*ifm), flags); |
1062 | ifm = NLMSG_DATA(nlh); | 1130 | if (nlh == NULL) |
1131 | return -ENOBUFS; | ||
1132 | |||
1133 | ifm = nlmsg_data(nlh); | ||
1063 | ifm->ifa_family = AF_INET; | 1134 | ifm->ifa_family = AF_INET; |
1064 | ifm->ifa_prefixlen = ifa->ifa_prefixlen; | 1135 | ifm->ifa_prefixlen = ifa->ifa_prefixlen; |
1065 | ifm->ifa_flags = ifa->ifa_flags|IFA_F_PERMANENT; | 1136 | ifm->ifa_flags = ifa->ifa_flags|IFA_F_PERMANENT; |
1066 | ifm->ifa_scope = ifa->ifa_scope; | 1137 | ifm->ifa_scope = ifa->ifa_scope; |
1067 | ifm->ifa_index = ifa->ifa_dev->dev->ifindex; | 1138 | ifm->ifa_index = ifa->ifa_dev->dev->ifindex; |
1139 | |||
1068 | if (ifa->ifa_address) | 1140 | if (ifa->ifa_address) |
1069 | RTA_PUT(skb, IFA_ADDRESS, 4, &ifa->ifa_address); | 1141 | NLA_PUT_U32(skb, IFA_ADDRESS, ifa->ifa_address); |
1142 | |||
1070 | if (ifa->ifa_local) | 1143 | if (ifa->ifa_local) |
1071 | RTA_PUT(skb, IFA_LOCAL, 4, &ifa->ifa_local); | 1144 | NLA_PUT_U32(skb, IFA_LOCAL, ifa->ifa_local); |
1145 | |||
1072 | if (ifa->ifa_broadcast) | 1146 | if (ifa->ifa_broadcast) |
1073 | RTA_PUT(skb, IFA_BROADCAST, 4, &ifa->ifa_broadcast); | 1147 | NLA_PUT_U32(skb, IFA_BROADCAST, ifa->ifa_broadcast); |
1148 | |||
1074 | if (ifa->ifa_anycast) | 1149 | if (ifa->ifa_anycast) |
1075 | RTA_PUT(skb, IFA_ANYCAST, 4, &ifa->ifa_anycast); | 1150 | NLA_PUT_U32(skb, IFA_ANYCAST, ifa->ifa_anycast); |
1151 | |||
1076 | if (ifa->ifa_label[0]) | 1152 | if (ifa->ifa_label[0]) |
1077 | RTA_PUT(skb, IFA_LABEL, IFNAMSIZ, &ifa->ifa_label); | 1153 | NLA_PUT_STRING(skb, IFA_LABEL, ifa->ifa_label); |
1078 | nlh->nlmsg_len = skb->tail - b; | ||
1079 | return skb->len; | ||
1080 | 1154 | ||
1081 | nlmsg_failure: | 1155 | return nlmsg_end(skb, nlh); |
1082 | rtattr_failure: | 1156 | |
1083 | skb_trim(skb, b - skb->data); | 1157 | nla_put_failure: |
1084 | return -1; | 1158 | return nlmsg_cancel(skb, nlh); |
1085 | } | 1159 | } |
1086 | 1160 | ||
1087 | static int inet_dump_ifaddr(struct sk_buff *skb, struct netlink_callback *cb) | 1161 | static int inet_dump_ifaddr(struct sk_buff *skb, struct netlink_callback *cb) |
@@ -1127,19 +1201,27 @@ done: | |||
1127 | return skb->len; | 1201 | return skb->len; |
1128 | } | 1202 | } |
1129 | 1203 | ||
1130 | static void rtmsg_ifa(int event, struct in_ifaddr* ifa) | 1204 | static void rtmsg_ifa(int event, struct in_ifaddr* ifa, struct nlmsghdr *nlh, |
1205 | u32 pid) | ||
1131 | { | 1206 | { |
1132 | int size = NLMSG_SPACE(sizeof(struct ifaddrmsg) + 128); | 1207 | struct sk_buff *skb; |
1133 | struct sk_buff *skb = alloc_skb(size, GFP_KERNEL); | 1208 | u32 seq = nlh ? nlh->nlmsg_seq : 0; |
1209 | int err = -ENOBUFS; | ||
1210 | |||
1211 | skb = nlmsg_new(NLMSG_GOODSIZE, GFP_KERNEL); | ||
1212 | if (skb == NULL) | ||
1213 | goto errout; | ||
1134 | 1214 | ||
1135 | if (!skb) | 1215 | err = inet_fill_ifaddr(skb, ifa, pid, seq, event, 0); |
1136 | netlink_set_err(rtnl, 0, RTNLGRP_IPV4_IFADDR, ENOBUFS); | 1216 | if (err < 0) { |
1137 | else if (inet_fill_ifaddr(skb, ifa, 0, 0, event, 0) < 0) { | ||
1138 | kfree_skb(skb); | 1217 | kfree_skb(skb); |
1139 | netlink_set_err(rtnl, 0, RTNLGRP_IPV4_IFADDR, EINVAL); | 1218 | goto errout; |
1140 | } else { | ||
1141 | netlink_broadcast(rtnl, skb, 0, RTNLGRP_IPV4_IFADDR, GFP_KERNEL); | ||
1142 | } | 1219 | } |
1220 | |||
1221 | err = rtnl_notify(skb, pid, RTNLGRP_IPV4_IFADDR, nlh, GFP_KERNEL); | ||
1222 | errout: | ||
1223 | if (err < 0) | ||
1224 | rtnl_set_sk_err(RTNLGRP_IPV4_IFADDR, err); | ||
1143 | } | 1225 | } |
1144 | 1226 | ||
1145 | static struct rtnetlink_link inet_rtnetlink_table[RTM_NR_MSGTYPES] = { | 1227 | static struct rtnetlink_link inet_rtnetlink_table[RTM_NR_MSGTYPES] = { |
@@ -1151,9 +1233,7 @@ static struct rtnetlink_link inet_rtnetlink_table[RTM_NR_MSGTYPES] = { | |||
1151 | [RTM_GETROUTE - RTM_BASE] = { .doit = inet_rtm_getroute, | 1233 | [RTM_GETROUTE - RTM_BASE] = { .doit = inet_rtm_getroute, |
1152 | .dumpit = inet_dump_fib, }, | 1234 | .dumpit = inet_dump_fib, }, |
1153 | #ifdef CONFIG_IP_MULTIPLE_TABLES | 1235 | #ifdef CONFIG_IP_MULTIPLE_TABLES |
1154 | [RTM_NEWRULE - RTM_BASE] = { .doit = inet_rtm_newrule, }, | 1236 | [RTM_GETRULE - RTM_BASE] = { .dumpit = fib4_rules_dump, }, |
1155 | [RTM_DELRULE - RTM_BASE] = { .doit = inet_rtm_delrule, }, | ||
1156 | [RTM_GETRULE - RTM_BASE] = { .dumpit = inet_dump_rules, }, | ||
1157 | #endif | 1237 | #endif |
1158 | }; | 1238 | }; |
1159 | 1239 | ||
diff --git a/net/ipv4/esp4.c b/net/ipv4/esp4.c index b428489f6ccd..13b29360d102 100644 --- a/net/ipv4/esp4.c +++ b/net/ipv4/esp4.c | |||
@@ -95,8 +95,13 @@ static int esp_output(struct xfrm_state *x, struct sk_buff *skb) | |||
95 | esph->seq_no = htonl(++x->replay.oseq); | 95 | esph->seq_no = htonl(++x->replay.oseq); |
96 | xfrm_aevent_doreplay(x); | 96 | xfrm_aevent_doreplay(x); |
97 | 97 | ||
98 | if (esp->conf.ivlen) | 98 | if (esp->conf.ivlen) { |
99 | if (unlikely(!esp->conf.ivinitted)) { | ||
100 | get_random_bytes(esp->conf.ivec, esp->conf.ivlen); | ||
101 | esp->conf.ivinitted = 1; | ||
102 | } | ||
99 | crypto_blkcipher_set_iv(tfm, esp->conf.ivec, esp->conf.ivlen); | 103 | crypto_blkcipher_set_iv(tfm, esp->conf.ivec, esp->conf.ivlen); |
104 | } | ||
100 | 105 | ||
101 | do { | 106 | do { |
102 | struct scatterlist *sg = &esp->sgbuf[0]; | 107 | struct scatterlist *sg = &esp->sgbuf[0]; |
@@ -248,7 +253,7 @@ static int esp_input(struct xfrm_state *x, struct sk_buff *skb) | |||
248 | * as per draft-ietf-ipsec-udp-encaps-06, | 253 | * as per draft-ietf-ipsec-udp-encaps-06, |
249 | * section 3.1.2 | 254 | * section 3.1.2 |
250 | */ | 255 | */ |
251 | if (!x->props.mode) | 256 | if (x->props.mode == XFRM_MODE_TRANSPORT) |
252 | skb->ip_summed = CHECKSUM_UNNECESSARY; | 257 | skb->ip_summed = CHECKSUM_UNNECESSARY; |
253 | } | 258 | } |
254 | 259 | ||
@@ -267,7 +272,7 @@ static u32 esp4_get_max_size(struct xfrm_state *x, int mtu) | |||
267 | struct esp_data *esp = x->data; | 272 | struct esp_data *esp = x->data; |
268 | u32 blksize = ALIGN(crypto_blkcipher_blocksize(esp->conf.tfm), 4); | 273 | u32 blksize = ALIGN(crypto_blkcipher_blocksize(esp->conf.tfm), 4); |
269 | 274 | ||
270 | if (x->props.mode) { | 275 | if (x->props.mode == XFRM_MODE_TUNNEL) { |
271 | mtu = ALIGN(mtu + 2, blksize); | 276 | mtu = ALIGN(mtu + 2, blksize); |
272 | } else { | 277 | } else { |
273 | /* The worst case. */ | 278 | /* The worst case. */ |
@@ -378,12 +383,12 @@ static int esp_init_state(struct xfrm_state *x) | |||
378 | esp->conf.ivec = kmalloc(esp->conf.ivlen, GFP_KERNEL); | 383 | esp->conf.ivec = kmalloc(esp->conf.ivlen, GFP_KERNEL); |
379 | if (unlikely(esp->conf.ivec == NULL)) | 384 | if (unlikely(esp->conf.ivec == NULL)) |
380 | goto error; | 385 | goto error; |
381 | get_random_bytes(esp->conf.ivec, esp->conf.ivlen); | 386 | esp->conf.ivinitted = 0; |
382 | } | 387 | } |
383 | if (crypto_blkcipher_setkey(tfm, esp->conf.key, esp->conf.key_len)) | 388 | if (crypto_blkcipher_setkey(tfm, esp->conf.key, esp->conf.key_len)) |
384 | goto error; | 389 | goto error; |
385 | x->props.header_len = sizeof(struct ip_esp_hdr) + esp->conf.ivlen; | 390 | x->props.header_len = sizeof(struct ip_esp_hdr) + esp->conf.ivlen; |
386 | if (x->props.mode) | 391 | if (x->props.mode == XFRM_MODE_TUNNEL) |
387 | x->props.header_len += sizeof(struct iphdr); | 392 | x->props.header_len += sizeof(struct iphdr); |
388 | if (x->encap) { | 393 | if (x->encap) { |
389 | struct xfrm_encap_tmpl *encap = x->encap; | 394 | struct xfrm_encap_tmpl *encap = x->encap; |
diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c index ba2a70745a63..cfb527c060e4 100644 --- a/net/ipv4/fib_frontend.c +++ b/net/ipv4/fib_frontend.c | |||
@@ -32,10 +32,12 @@ | |||
32 | #include <linux/inet.h> | 32 | #include <linux/inet.h> |
33 | #include <linux/inetdevice.h> | 33 | #include <linux/inetdevice.h> |
34 | #include <linux/netdevice.h> | 34 | #include <linux/netdevice.h> |
35 | #include <linux/if_addr.h> | ||
35 | #include <linux/if_arp.h> | 36 | #include <linux/if_arp.h> |
36 | #include <linux/skbuff.h> | 37 | #include <linux/skbuff.h> |
37 | #include <linux/netlink.h> | 38 | #include <linux/netlink.h> |
38 | #include <linux/init.h> | 39 | #include <linux/init.h> |
40 | #include <linux/list.h> | ||
39 | 41 | ||
40 | #include <net/ip.h> | 42 | #include <net/ip.h> |
41 | #include <net/protocol.h> | 43 | #include <net/protocol.h> |
@@ -50,48 +52,67 @@ | |||
50 | 52 | ||
51 | #ifndef CONFIG_IP_MULTIPLE_TABLES | 53 | #ifndef CONFIG_IP_MULTIPLE_TABLES |
52 | 54 | ||
53 | #define RT_TABLE_MIN RT_TABLE_MAIN | ||
54 | |||
55 | struct fib_table *ip_fib_local_table; | 55 | struct fib_table *ip_fib_local_table; |
56 | struct fib_table *ip_fib_main_table; | 56 | struct fib_table *ip_fib_main_table; |
57 | 57 | ||
58 | #else | 58 | #define FIB_TABLE_HASHSZ 1 |
59 | static struct hlist_head fib_table_hash[FIB_TABLE_HASHSZ]; | ||
59 | 60 | ||
60 | #define RT_TABLE_MIN 1 | 61 | #else |
61 | 62 | ||
62 | struct fib_table *fib_tables[RT_TABLE_MAX+1]; | 63 | #define FIB_TABLE_HASHSZ 256 |
64 | static struct hlist_head fib_table_hash[FIB_TABLE_HASHSZ]; | ||
63 | 65 | ||
64 | struct fib_table *__fib_new_table(int id) | 66 | struct fib_table *fib_new_table(u32 id) |
65 | { | 67 | { |
66 | struct fib_table *tb; | 68 | struct fib_table *tb; |
69 | unsigned int h; | ||
67 | 70 | ||
71 | if (id == 0) | ||
72 | id = RT_TABLE_MAIN; | ||
73 | tb = fib_get_table(id); | ||
74 | if (tb) | ||
75 | return tb; | ||
68 | tb = fib_hash_init(id); | 76 | tb = fib_hash_init(id); |
69 | if (!tb) | 77 | if (!tb) |
70 | return NULL; | 78 | return NULL; |
71 | fib_tables[id] = tb; | 79 | h = id & (FIB_TABLE_HASHSZ - 1); |
80 | hlist_add_head_rcu(&tb->tb_hlist, &fib_table_hash[h]); | ||
72 | return tb; | 81 | return tb; |
73 | } | 82 | } |
74 | 83 | ||
84 | struct fib_table *fib_get_table(u32 id) | ||
85 | { | ||
86 | struct fib_table *tb; | ||
87 | struct hlist_node *node; | ||
88 | unsigned int h; | ||
75 | 89 | ||
90 | if (id == 0) | ||
91 | id = RT_TABLE_MAIN; | ||
92 | h = id & (FIB_TABLE_HASHSZ - 1); | ||
93 | rcu_read_lock(); | ||
94 | hlist_for_each_entry_rcu(tb, node, &fib_table_hash[h], tb_hlist) { | ||
95 | if (tb->tb_id == id) { | ||
96 | rcu_read_unlock(); | ||
97 | return tb; | ||
98 | } | ||
99 | } | ||
100 | rcu_read_unlock(); | ||
101 | return NULL; | ||
102 | } | ||
76 | #endif /* CONFIG_IP_MULTIPLE_TABLES */ | 103 | #endif /* CONFIG_IP_MULTIPLE_TABLES */ |
77 | 104 | ||
78 | |||
79 | static void fib_flush(void) | 105 | static void fib_flush(void) |
80 | { | 106 | { |
81 | int flushed = 0; | 107 | int flushed = 0; |
82 | #ifdef CONFIG_IP_MULTIPLE_TABLES | ||
83 | struct fib_table *tb; | 108 | struct fib_table *tb; |
84 | int id; | 109 | struct hlist_node *node; |
110 | unsigned int h; | ||
85 | 111 | ||
86 | for (id = RT_TABLE_MAX; id>0; id--) { | 112 | for (h = 0; h < FIB_TABLE_HASHSZ; h++) { |
87 | if ((tb = fib_get_table(id))==NULL) | 113 | hlist_for_each_entry(tb, node, &fib_table_hash[h], tb_hlist) |
88 | continue; | 114 | flushed += tb->tb_flush(tb); |
89 | flushed += tb->tb_flush(tb); | ||
90 | } | 115 | } |
91 | #else /* CONFIG_IP_MULTIPLE_TABLES */ | ||
92 | flushed += ip_fib_main_table->tb_flush(ip_fib_main_table); | ||
93 | flushed += ip_fib_local_table->tb_flush(ip_fib_local_table); | ||
94 | #endif /* CONFIG_IP_MULTIPLE_TABLES */ | ||
95 | 116 | ||
96 | if (flushed) | 117 | if (flushed) |
97 | rt_cache_flush(-1); | 118 | rt_cache_flush(-1); |
@@ -232,42 +253,190 @@ e_inval: | |||
232 | 253 | ||
233 | #ifndef CONFIG_IP_NOSIOCRT | 254 | #ifndef CONFIG_IP_NOSIOCRT |
234 | 255 | ||
256 | static inline u32 sk_extract_addr(struct sockaddr *addr) | ||
257 | { | ||
258 | return ((struct sockaddr_in *) addr)->sin_addr.s_addr; | ||
259 | } | ||
260 | |||
261 | static int put_rtax(struct nlattr *mx, int len, int type, u32 value) | ||
262 | { | ||
263 | struct nlattr *nla; | ||
264 | |||
265 | nla = (struct nlattr *) ((char *) mx + len); | ||
266 | nla->nla_type = type; | ||
267 | nla->nla_len = nla_attr_size(4); | ||
268 | *(u32 *) nla_data(nla) = value; | ||
269 | |||
270 | return len + nla_total_size(4); | ||
271 | } | ||
272 | |||
273 | static int rtentry_to_fib_config(int cmd, struct rtentry *rt, | ||
274 | struct fib_config *cfg) | ||
275 | { | ||
276 | u32 addr; | ||
277 | int plen; | ||
278 | |||
279 | memset(cfg, 0, sizeof(*cfg)); | ||
280 | |||
281 | if (rt->rt_dst.sa_family != AF_INET) | ||
282 | return -EAFNOSUPPORT; | ||
283 | |||
284 | /* | ||
285 | * Check mask for validity: | ||
286 | * a) it must be contiguous. | ||
287 | * b) destination must have all host bits clear. | ||
288 | * c) if application forgot to set correct family (AF_INET), | ||
289 | * reject request unless it is absolutely clear i.e. | ||
290 | * both family and mask are zero. | ||
291 | */ | ||
292 | plen = 32; | ||
293 | addr = sk_extract_addr(&rt->rt_dst); | ||
294 | if (!(rt->rt_flags & RTF_HOST)) { | ||
295 | u32 mask = sk_extract_addr(&rt->rt_genmask); | ||
296 | |||
297 | if (rt->rt_genmask.sa_family != AF_INET) { | ||
298 | if (mask || rt->rt_genmask.sa_family) | ||
299 | return -EAFNOSUPPORT; | ||
300 | } | ||
301 | |||
302 | if (bad_mask(mask, addr)) | ||
303 | return -EINVAL; | ||
304 | |||
305 | plen = inet_mask_len(mask); | ||
306 | } | ||
307 | |||
308 | cfg->fc_dst_len = plen; | ||
309 | cfg->fc_dst = addr; | ||
310 | |||
311 | if (cmd != SIOCDELRT) { | ||
312 | cfg->fc_nlflags = NLM_F_CREATE; | ||
313 | cfg->fc_protocol = RTPROT_BOOT; | ||
314 | } | ||
315 | |||
316 | if (rt->rt_metric) | ||
317 | cfg->fc_priority = rt->rt_metric - 1; | ||
318 | |||
319 | if (rt->rt_flags & RTF_REJECT) { | ||
320 | cfg->fc_scope = RT_SCOPE_HOST; | ||
321 | cfg->fc_type = RTN_UNREACHABLE; | ||
322 | return 0; | ||
323 | } | ||
324 | |||
325 | cfg->fc_scope = RT_SCOPE_NOWHERE; | ||
326 | cfg->fc_type = RTN_UNICAST; | ||
327 | |||
328 | if (rt->rt_dev) { | ||
329 | char *colon; | ||
330 | struct net_device *dev; | ||
331 | char devname[IFNAMSIZ]; | ||
332 | |||
333 | if (copy_from_user(devname, rt->rt_dev, IFNAMSIZ-1)) | ||
334 | return -EFAULT; | ||
335 | |||
336 | devname[IFNAMSIZ-1] = 0; | ||
337 | colon = strchr(devname, ':'); | ||
338 | if (colon) | ||
339 | *colon = 0; | ||
340 | dev = __dev_get_by_name(devname); | ||
341 | if (!dev) | ||
342 | return -ENODEV; | ||
343 | cfg->fc_oif = dev->ifindex; | ||
344 | if (colon) { | ||
345 | struct in_ifaddr *ifa; | ||
346 | struct in_device *in_dev = __in_dev_get_rtnl(dev); | ||
347 | if (!in_dev) | ||
348 | return -ENODEV; | ||
349 | *colon = ':'; | ||
350 | for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next) | ||
351 | if (strcmp(ifa->ifa_label, devname) == 0) | ||
352 | break; | ||
353 | if (ifa == NULL) | ||
354 | return -ENODEV; | ||
355 | cfg->fc_prefsrc = ifa->ifa_local; | ||
356 | } | ||
357 | } | ||
358 | |||
359 | addr = sk_extract_addr(&rt->rt_gateway); | ||
360 | if (rt->rt_gateway.sa_family == AF_INET && addr) { | ||
361 | cfg->fc_gw = addr; | ||
362 | if (rt->rt_flags & RTF_GATEWAY && | ||
363 | inet_addr_type(addr) == RTN_UNICAST) | ||
364 | cfg->fc_scope = RT_SCOPE_UNIVERSE; | ||
365 | } | ||
366 | |||
367 | if (cmd == SIOCDELRT) | ||
368 | return 0; | ||
369 | |||
370 | if (rt->rt_flags & RTF_GATEWAY && !cfg->fc_gw) | ||
371 | return -EINVAL; | ||
372 | |||
373 | if (cfg->fc_scope == RT_SCOPE_NOWHERE) | ||
374 | cfg->fc_scope = RT_SCOPE_LINK; | ||
375 | |||
376 | if (rt->rt_flags & (RTF_MTU | RTF_WINDOW | RTF_IRTT)) { | ||
377 | struct nlattr *mx; | ||
378 | int len = 0; | ||
379 | |||
380 | mx = kzalloc(3 * nla_total_size(4), GFP_KERNEL); | ||
381 | if (mx == NULL) | ||
382 | return -ENOMEM; | ||
383 | |||
384 | if (rt->rt_flags & RTF_MTU) | ||
385 | len = put_rtax(mx, len, RTAX_ADVMSS, rt->rt_mtu - 40); | ||
386 | |||
387 | if (rt->rt_flags & RTF_WINDOW) | ||
388 | len = put_rtax(mx, len, RTAX_WINDOW, rt->rt_window); | ||
389 | |||
390 | if (rt->rt_flags & RTF_IRTT) | ||
391 | len = put_rtax(mx, len, RTAX_RTT, rt->rt_irtt << 3); | ||
392 | |||
393 | cfg->fc_mx = mx; | ||
394 | cfg->fc_mx_len = len; | ||
395 | } | ||
396 | |||
397 | return 0; | ||
398 | } | ||
399 | |||
235 | /* | 400 | /* |
236 | * Handle IP routing ioctl calls. These are used to manipulate the routing tables | 401 | * Handle IP routing ioctl calls. These are used to manipulate the routing tables |
237 | */ | 402 | */ |
238 | 403 | ||
239 | int ip_rt_ioctl(unsigned int cmd, void __user *arg) | 404 | int ip_rt_ioctl(unsigned int cmd, void __user *arg) |
240 | { | 405 | { |
406 | struct fib_config cfg; | ||
407 | struct rtentry rt; | ||
241 | int err; | 408 | int err; |
242 | struct kern_rta rta; | ||
243 | struct rtentry r; | ||
244 | struct { | ||
245 | struct nlmsghdr nlh; | ||
246 | struct rtmsg rtm; | ||
247 | } req; | ||
248 | 409 | ||
249 | switch (cmd) { | 410 | switch (cmd) { |
250 | case SIOCADDRT: /* Add a route */ | 411 | case SIOCADDRT: /* Add a route */ |
251 | case SIOCDELRT: /* Delete a route */ | 412 | case SIOCDELRT: /* Delete a route */ |
252 | if (!capable(CAP_NET_ADMIN)) | 413 | if (!capable(CAP_NET_ADMIN)) |
253 | return -EPERM; | 414 | return -EPERM; |
254 | if (copy_from_user(&r, arg, sizeof(struct rtentry))) | 415 | |
416 | if (copy_from_user(&rt, arg, sizeof(rt))) | ||
255 | return -EFAULT; | 417 | return -EFAULT; |
418 | |||
256 | rtnl_lock(); | 419 | rtnl_lock(); |
257 | err = fib_convert_rtentry(cmd, &req.nlh, &req.rtm, &rta, &r); | 420 | err = rtentry_to_fib_config(cmd, &rt, &cfg); |
258 | if (err == 0) { | 421 | if (err == 0) { |
422 | struct fib_table *tb; | ||
423 | |||
259 | if (cmd == SIOCDELRT) { | 424 | if (cmd == SIOCDELRT) { |
260 | struct fib_table *tb = fib_get_table(req.rtm.rtm_table); | 425 | tb = fib_get_table(cfg.fc_table); |
261 | err = -ESRCH; | ||
262 | if (tb) | 426 | if (tb) |
263 | err = tb->tb_delete(tb, &req.rtm, &rta, &req.nlh, NULL); | 427 | err = tb->tb_delete(tb, &cfg); |
428 | else | ||
429 | err = -ESRCH; | ||
264 | } else { | 430 | } else { |
265 | struct fib_table *tb = fib_new_table(req.rtm.rtm_table); | 431 | tb = fib_new_table(cfg.fc_table); |
266 | err = -ENOBUFS; | ||
267 | if (tb) | 432 | if (tb) |
268 | err = tb->tb_insert(tb, &req.rtm, &rta, &req.nlh, NULL); | 433 | err = tb->tb_insert(tb, &cfg); |
434 | else | ||
435 | err = -ENOBUFS; | ||
269 | } | 436 | } |
270 | kfree(rta.rta_mx); | 437 | |
438 | /* allocated by rtentry_to_fib_config() */ | ||
439 | kfree(cfg.fc_mx); | ||
271 | } | 440 | } |
272 | rtnl_unlock(); | 441 | rtnl_unlock(); |
273 | return err; | 442 | return err; |
@@ -284,77 +453,169 @@ int ip_rt_ioctl(unsigned int cmd, void *arg) | |||
284 | 453 | ||
285 | #endif | 454 | #endif |
286 | 455 | ||
287 | static int inet_check_attr(struct rtmsg *r, struct rtattr **rta) | 456 | struct nla_policy rtm_ipv4_policy[RTA_MAX+1] __read_mostly = { |
457 | [RTA_DST] = { .type = NLA_U32 }, | ||
458 | [RTA_SRC] = { .type = NLA_U32 }, | ||
459 | [RTA_IIF] = { .type = NLA_U32 }, | ||
460 | [RTA_OIF] = { .type = NLA_U32 }, | ||
461 | [RTA_GATEWAY] = { .type = NLA_U32 }, | ||
462 | [RTA_PRIORITY] = { .type = NLA_U32 }, | ||
463 | [RTA_PREFSRC] = { .type = NLA_U32 }, | ||
464 | [RTA_METRICS] = { .type = NLA_NESTED }, | ||
465 | [RTA_MULTIPATH] = { .len = sizeof(struct rtnexthop) }, | ||
466 | [RTA_PROTOINFO] = { .type = NLA_U32 }, | ||
467 | [RTA_FLOW] = { .type = NLA_U32 }, | ||
468 | [RTA_MP_ALGO] = { .type = NLA_U32 }, | ||
469 | }; | ||
470 | |||
471 | static int rtm_to_fib_config(struct sk_buff *skb, struct nlmsghdr *nlh, | ||
472 | struct fib_config *cfg) | ||
288 | { | 473 | { |
289 | int i; | 474 | struct nlattr *attr; |
290 | 475 | int err, remaining; | |
291 | for (i=1; i<=RTA_MAX; i++, rta++) { | 476 | struct rtmsg *rtm; |
292 | struct rtattr *attr = *rta; | 477 | |
293 | if (attr) { | 478 | err = nlmsg_validate(nlh, sizeof(*rtm), RTA_MAX, rtm_ipv4_policy); |
294 | if (RTA_PAYLOAD(attr) < 4) | 479 | if (err < 0) |
295 | return -EINVAL; | 480 | goto errout; |
296 | if (i != RTA_MULTIPATH && i != RTA_METRICS) | 481 | |
297 | *rta = (struct rtattr*)RTA_DATA(attr); | 482 | memset(cfg, 0, sizeof(*cfg)); |
483 | |||
484 | rtm = nlmsg_data(nlh); | ||
485 | cfg->fc_family = rtm->rtm_family; | ||
486 | cfg->fc_dst_len = rtm->rtm_dst_len; | ||
487 | cfg->fc_src_len = rtm->rtm_src_len; | ||
488 | cfg->fc_tos = rtm->rtm_tos; | ||
489 | cfg->fc_table = rtm->rtm_table; | ||
490 | cfg->fc_protocol = rtm->rtm_protocol; | ||
491 | cfg->fc_scope = rtm->rtm_scope; | ||
492 | cfg->fc_type = rtm->rtm_type; | ||
493 | cfg->fc_flags = rtm->rtm_flags; | ||
494 | cfg->fc_nlflags = nlh->nlmsg_flags; | ||
495 | |||
496 | cfg->fc_nlinfo.pid = NETLINK_CB(skb).pid; | ||
497 | cfg->fc_nlinfo.nlh = nlh; | ||
498 | |||
499 | nlmsg_for_each_attr(attr, nlh, sizeof(struct rtmsg), remaining) { | ||
500 | switch (attr->nla_type) { | ||
501 | case RTA_DST: | ||
502 | cfg->fc_dst = nla_get_u32(attr); | ||
503 | break; | ||
504 | case RTA_SRC: | ||
505 | cfg->fc_src = nla_get_u32(attr); | ||
506 | break; | ||
507 | case RTA_OIF: | ||
508 | cfg->fc_oif = nla_get_u32(attr); | ||
509 | break; | ||
510 | case RTA_GATEWAY: | ||
511 | cfg->fc_gw = nla_get_u32(attr); | ||
512 | break; | ||
513 | case RTA_PRIORITY: | ||
514 | cfg->fc_priority = nla_get_u32(attr); | ||
515 | break; | ||
516 | case RTA_PREFSRC: | ||
517 | cfg->fc_prefsrc = nla_get_u32(attr); | ||
518 | break; | ||
519 | case RTA_METRICS: | ||
520 | cfg->fc_mx = nla_data(attr); | ||
521 | cfg->fc_mx_len = nla_len(attr); | ||
522 | break; | ||
523 | case RTA_MULTIPATH: | ||
524 | cfg->fc_mp = nla_data(attr); | ||
525 | cfg->fc_mp_len = nla_len(attr); | ||
526 | break; | ||
527 | case RTA_FLOW: | ||
528 | cfg->fc_flow = nla_get_u32(attr); | ||
529 | break; | ||
530 | case RTA_MP_ALGO: | ||
531 | cfg->fc_mp_alg = nla_get_u32(attr); | ||
532 | break; | ||
533 | case RTA_TABLE: | ||
534 | cfg->fc_table = nla_get_u32(attr); | ||
535 | break; | ||
298 | } | 536 | } |
299 | } | 537 | } |
538 | |||
300 | return 0; | 539 | return 0; |
540 | errout: | ||
541 | return err; | ||
301 | } | 542 | } |
302 | 543 | ||
303 | int inet_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg) | 544 | int inet_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg) |
304 | { | 545 | { |
305 | struct fib_table * tb; | 546 | struct fib_config cfg; |
306 | struct rtattr **rta = arg; | 547 | struct fib_table *tb; |
307 | struct rtmsg *r = NLMSG_DATA(nlh); | 548 | int err; |
308 | 549 | ||
309 | if (inet_check_attr(r, rta)) | 550 | err = rtm_to_fib_config(skb, nlh, &cfg); |
310 | return -EINVAL; | 551 | if (err < 0) |
552 | goto errout; | ||
311 | 553 | ||
312 | tb = fib_get_table(r->rtm_table); | 554 | tb = fib_get_table(cfg.fc_table); |
313 | if (tb) | 555 | if (tb == NULL) { |
314 | return tb->tb_delete(tb, r, (struct kern_rta*)rta, nlh, &NETLINK_CB(skb)); | 556 | err = -ESRCH; |
315 | return -ESRCH; | 557 | goto errout; |
558 | } | ||
559 | |||
560 | err = tb->tb_delete(tb, &cfg); | ||
561 | errout: | ||
562 | return err; | ||
316 | } | 563 | } |
317 | 564 | ||
318 | int inet_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg) | 565 | int inet_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg) |
319 | { | 566 | { |
320 | struct fib_table * tb; | 567 | struct fib_config cfg; |
321 | struct rtattr **rta = arg; | 568 | struct fib_table *tb; |
322 | struct rtmsg *r = NLMSG_DATA(nlh); | 569 | int err; |
323 | 570 | ||
324 | if (inet_check_attr(r, rta)) | 571 | err = rtm_to_fib_config(skb, nlh, &cfg); |
325 | return -EINVAL; | 572 | if (err < 0) |
573 | goto errout; | ||
326 | 574 | ||
327 | tb = fib_new_table(r->rtm_table); | 575 | tb = fib_new_table(cfg.fc_table); |
328 | if (tb) | 576 | if (tb == NULL) { |
329 | return tb->tb_insert(tb, r, (struct kern_rta*)rta, nlh, &NETLINK_CB(skb)); | 577 | err = -ENOBUFS; |
330 | return -ENOBUFS; | 578 | goto errout; |
579 | } | ||
580 | |||
581 | err = tb->tb_insert(tb, &cfg); | ||
582 | errout: | ||
583 | return err; | ||
331 | } | 584 | } |
332 | 585 | ||
333 | int inet_dump_fib(struct sk_buff *skb, struct netlink_callback *cb) | 586 | int inet_dump_fib(struct sk_buff *skb, struct netlink_callback *cb) |
334 | { | 587 | { |
335 | int t; | 588 | unsigned int h, s_h; |
336 | int s_t; | 589 | unsigned int e = 0, s_e; |
337 | struct fib_table *tb; | 590 | struct fib_table *tb; |
591 | struct hlist_node *node; | ||
592 | int dumped = 0; | ||
338 | 593 | ||
339 | if (NLMSG_PAYLOAD(cb->nlh, 0) >= sizeof(struct rtmsg) && | 594 | if (nlmsg_len(cb->nlh) >= sizeof(struct rtmsg) && |
340 | ((struct rtmsg*)NLMSG_DATA(cb->nlh))->rtm_flags&RTM_F_CLONED) | 595 | ((struct rtmsg *) nlmsg_data(cb->nlh))->rtm_flags & RTM_F_CLONED) |
341 | return ip_rt_dump(skb, cb); | 596 | return ip_rt_dump(skb, cb); |
342 | 597 | ||
343 | s_t = cb->args[0]; | 598 | s_h = cb->args[0]; |
344 | if (s_t == 0) | 599 | s_e = cb->args[1]; |
345 | s_t = cb->args[0] = RT_TABLE_MIN; | 600 | |
346 | 601 | for (h = s_h; h < FIB_TABLE_HASHSZ; h++, s_e = 0) { | |
347 | for (t=s_t; t<=RT_TABLE_MAX; t++) { | 602 | e = 0; |
348 | if (t < s_t) continue; | 603 | hlist_for_each_entry(tb, node, &fib_table_hash[h], tb_hlist) { |
349 | if (t > s_t) | 604 | if (e < s_e) |
350 | memset(&cb->args[1], 0, sizeof(cb->args)-sizeof(cb->args[0])); | 605 | goto next; |
351 | if ((tb = fib_get_table(t))==NULL) | 606 | if (dumped) |
352 | continue; | 607 | memset(&cb->args[2], 0, sizeof(cb->args) - |
353 | if (tb->tb_dump(tb, skb, cb) < 0) | 608 | 2 * sizeof(cb->args[0])); |
354 | break; | 609 | if (tb->tb_dump(tb, skb, cb) < 0) |
610 | goto out; | ||
611 | dumped = 1; | ||
612 | next: | ||
613 | e++; | ||
614 | } | ||
355 | } | 615 | } |
356 | 616 | out: | |
357 | cb->args[0] = t; | 617 | cb->args[1] = e; |
618 | cb->args[0] = h; | ||
358 | 619 | ||
359 | return skb->len; | 620 | return skb->len; |
360 | } | 621 | } |
@@ -366,17 +627,19 @@ int inet_dump_fib(struct sk_buff *skb, struct netlink_callback *cb) | |||
366 | only when netlink is already locked. | 627 | only when netlink is already locked. |
367 | */ | 628 | */ |
368 | 629 | ||
369 | static void fib_magic(int cmd, int type, u32 dst, int dst_len, struct in_ifaddr *ifa) | 630 | static void fib_magic(int cmd, int type, u32 dst, int dst_len, |
631 | struct in_ifaddr *ifa) | ||
370 | { | 632 | { |
371 | struct fib_table * tb; | 633 | struct fib_table *tb; |
372 | struct { | 634 | struct fib_config cfg = { |
373 | struct nlmsghdr nlh; | 635 | .fc_protocol = RTPROT_KERNEL, |
374 | struct rtmsg rtm; | 636 | .fc_type = type, |
375 | } req; | 637 | .fc_dst = dst, |
376 | struct kern_rta rta; | 638 | .fc_dst_len = dst_len, |
377 | 639 | .fc_prefsrc = ifa->ifa_local, | |
378 | memset(&req.rtm, 0, sizeof(req.rtm)); | 640 | .fc_oif = ifa->ifa_dev->dev->ifindex, |
379 | memset(&rta, 0, sizeof(rta)); | 641 | .fc_nlflags = NLM_F_CREATE | NLM_F_APPEND, |
642 | }; | ||
380 | 643 | ||
381 | if (type == RTN_UNICAST) | 644 | if (type == RTN_UNICAST) |
382 | tb = fib_new_table(RT_TABLE_MAIN); | 645 | tb = fib_new_table(RT_TABLE_MAIN); |
@@ -386,26 +649,17 @@ static void fib_magic(int cmd, int type, u32 dst, int dst_len, struct in_ifaddr | |||
386 | if (tb == NULL) | 649 | if (tb == NULL) |
387 | return; | 650 | return; |
388 | 651 | ||
389 | req.nlh.nlmsg_len = sizeof(req); | 652 | cfg.fc_table = tb->tb_id; |
390 | req.nlh.nlmsg_type = cmd; | ||
391 | req.nlh.nlmsg_flags = NLM_F_REQUEST|NLM_F_CREATE|NLM_F_APPEND; | ||
392 | req.nlh.nlmsg_pid = 0; | ||
393 | req.nlh.nlmsg_seq = 0; | ||
394 | 653 | ||
395 | req.rtm.rtm_dst_len = dst_len; | 654 | if (type != RTN_LOCAL) |
396 | req.rtm.rtm_table = tb->tb_id; | 655 | cfg.fc_scope = RT_SCOPE_LINK; |
397 | req.rtm.rtm_protocol = RTPROT_KERNEL; | 656 | else |
398 | req.rtm.rtm_scope = (type != RTN_LOCAL ? RT_SCOPE_LINK : RT_SCOPE_HOST); | 657 | cfg.fc_scope = RT_SCOPE_HOST; |
399 | req.rtm.rtm_type = type; | ||
400 | |||
401 | rta.rta_dst = &dst; | ||
402 | rta.rta_prefsrc = &ifa->ifa_local; | ||
403 | rta.rta_oif = &ifa->ifa_dev->dev->ifindex; | ||
404 | 658 | ||
405 | if (cmd == RTM_NEWROUTE) | 659 | if (cmd == RTM_NEWROUTE) |
406 | tb->tb_insert(tb, &req.rtm, &rta, &req.nlh, NULL); | 660 | tb->tb_insert(tb, &cfg); |
407 | else | 661 | else |
408 | tb->tb_delete(tb, &req.rtm, &rta, &req.nlh, NULL); | 662 | tb->tb_delete(tb, &cfg); |
409 | } | 663 | } |
410 | 664 | ||
411 | void fib_add_ifaddr(struct in_ifaddr *ifa) | 665 | void fib_add_ifaddr(struct in_ifaddr *ifa) |
@@ -652,11 +906,17 @@ static struct notifier_block fib_netdev_notifier = { | |||
652 | 906 | ||
653 | void __init ip_fib_init(void) | 907 | void __init ip_fib_init(void) |
654 | { | 908 | { |
909 | unsigned int i; | ||
910 | |||
911 | for (i = 0; i < FIB_TABLE_HASHSZ; i++) | ||
912 | INIT_HLIST_HEAD(&fib_table_hash[i]); | ||
655 | #ifndef CONFIG_IP_MULTIPLE_TABLES | 913 | #ifndef CONFIG_IP_MULTIPLE_TABLES |
656 | ip_fib_local_table = fib_hash_init(RT_TABLE_LOCAL); | 914 | ip_fib_local_table = fib_hash_init(RT_TABLE_LOCAL); |
915 | hlist_add_head_rcu(&ip_fib_local_table->tb_hlist, &fib_table_hash[0]); | ||
657 | ip_fib_main_table = fib_hash_init(RT_TABLE_MAIN); | 916 | ip_fib_main_table = fib_hash_init(RT_TABLE_MAIN); |
917 | hlist_add_head_rcu(&ip_fib_main_table->tb_hlist, &fib_table_hash[0]); | ||
658 | #else | 918 | #else |
659 | fib_rules_init(); | 919 | fib4_rules_init(); |
660 | #endif | 920 | #endif |
661 | 921 | ||
662 | register_netdevice_notifier(&fib_netdev_notifier); | 922 | register_netdevice_notifier(&fib_netdev_notifier); |
diff --git a/net/ipv4/fib_hash.c b/net/ipv4/fib_hash.c index 72c633b357cf..88133b383dc5 100644 --- a/net/ipv4/fib_hash.c +++ b/net/ipv4/fib_hash.c | |||
@@ -379,42 +379,39 @@ static struct fib_node *fib_find_node(struct fn_zone *fz, u32 key) | |||
379 | return NULL; | 379 | return NULL; |
380 | } | 380 | } |
381 | 381 | ||
382 | static int | 382 | static int fn_hash_insert(struct fib_table *tb, struct fib_config *cfg) |
383 | fn_hash_insert(struct fib_table *tb, struct rtmsg *r, struct kern_rta *rta, | ||
384 | struct nlmsghdr *n, struct netlink_skb_parms *req) | ||
385 | { | 383 | { |
386 | struct fn_hash *table = (struct fn_hash *) tb->tb_data; | 384 | struct fn_hash *table = (struct fn_hash *) tb->tb_data; |
387 | struct fib_node *new_f, *f; | 385 | struct fib_node *new_f, *f; |
388 | struct fib_alias *fa, *new_fa; | 386 | struct fib_alias *fa, *new_fa; |
389 | struct fn_zone *fz; | 387 | struct fn_zone *fz; |
390 | struct fib_info *fi; | 388 | struct fib_info *fi; |
391 | int z = r->rtm_dst_len; | 389 | u8 tos = cfg->fc_tos; |
392 | int type = r->rtm_type; | ||
393 | u8 tos = r->rtm_tos; | ||
394 | u32 key; | 390 | u32 key; |
395 | int err; | 391 | int err; |
396 | 392 | ||
397 | if (z > 32) | 393 | if (cfg->fc_dst_len > 32) |
398 | return -EINVAL; | 394 | return -EINVAL; |
399 | fz = table->fn_zones[z]; | 395 | |
400 | if (!fz && !(fz = fn_new_zone(table, z))) | 396 | fz = table->fn_zones[cfg->fc_dst_len]; |
397 | if (!fz && !(fz = fn_new_zone(table, cfg->fc_dst_len))) | ||
401 | return -ENOBUFS; | 398 | return -ENOBUFS; |
402 | 399 | ||
403 | key = 0; | 400 | key = 0; |
404 | if (rta->rta_dst) { | 401 | if (cfg->fc_dst) { |
405 | u32 dst; | 402 | if (cfg->fc_dst & ~FZ_MASK(fz)) |
406 | memcpy(&dst, rta->rta_dst, 4); | ||
407 | if (dst & ~FZ_MASK(fz)) | ||
408 | return -EINVAL; | 403 | return -EINVAL; |
409 | key = fz_key(dst, fz); | 404 | key = fz_key(cfg->fc_dst, fz); |
410 | } | 405 | } |
411 | 406 | ||
412 | if ((fi = fib_create_info(r, rta, n, &err)) == NULL) | 407 | fi = fib_create_info(cfg); |
413 | return err; | 408 | if (IS_ERR(fi)) |
409 | return PTR_ERR(fi); | ||
414 | 410 | ||
415 | if (fz->fz_nent > (fz->fz_divisor<<1) && | 411 | if (fz->fz_nent > (fz->fz_divisor<<1) && |
416 | fz->fz_divisor < FZ_MAX_DIVISOR && | 412 | fz->fz_divisor < FZ_MAX_DIVISOR && |
417 | (z==32 || (1<<z) > fz->fz_divisor)) | 413 | (cfg->fc_dst_len == 32 || |
414 | (1 << cfg->fc_dst_len) > fz->fz_divisor)) | ||
418 | fn_rehash_zone(fz); | 415 | fn_rehash_zone(fz); |
419 | 416 | ||
420 | f = fib_find_node(fz, key); | 417 | f = fib_find_node(fz, key); |
@@ -440,18 +437,18 @@ fn_hash_insert(struct fib_table *tb, struct rtmsg *r, struct kern_rta *rta, | |||
440 | struct fib_alias *fa_orig; | 437 | struct fib_alias *fa_orig; |
441 | 438 | ||
442 | err = -EEXIST; | 439 | err = -EEXIST; |
443 | if (n->nlmsg_flags & NLM_F_EXCL) | 440 | if (cfg->fc_nlflags & NLM_F_EXCL) |
444 | goto out; | 441 | goto out; |
445 | 442 | ||
446 | if (n->nlmsg_flags & NLM_F_REPLACE) { | 443 | if (cfg->fc_nlflags & NLM_F_REPLACE) { |
447 | struct fib_info *fi_drop; | 444 | struct fib_info *fi_drop; |
448 | u8 state; | 445 | u8 state; |
449 | 446 | ||
450 | write_lock_bh(&fib_hash_lock); | 447 | write_lock_bh(&fib_hash_lock); |
451 | fi_drop = fa->fa_info; | 448 | fi_drop = fa->fa_info; |
452 | fa->fa_info = fi; | 449 | fa->fa_info = fi; |
453 | fa->fa_type = type; | 450 | fa->fa_type = cfg->fc_type; |
454 | fa->fa_scope = r->rtm_scope; | 451 | fa->fa_scope = cfg->fc_scope; |
455 | state = fa->fa_state; | 452 | state = fa->fa_state; |
456 | fa->fa_state &= ~FA_S_ACCESSED; | 453 | fa->fa_state &= ~FA_S_ACCESSED; |
457 | fib_hash_genid++; | 454 | fib_hash_genid++; |
@@ -474,17 +471,17 @@ fn_hash_insert(struct fib_table *tb, struct rtmsg *r, struct kern_rta *rta, | |||
474 | break; | 471 | break; |
475 | if (fa->fa_info->fib_priority != fi->fib_priority) | 472 | if (fa->fa_info->fib_priority != fi->fib_priority) |
476 | break; | 473 | break; |
477 | if (fa->fa_type == type && | 474 | if (fa->fa_type == cfg->fc_type && |
478 | fa->fa_scope == r->rtm_scope && | 475 | fa->fa_scope == cfg->fc_scope && |
479 | fa->fa_info == fi) | 476 | fa->fa_info == fi) |
480 | goto out; | 477 | goto out; |
481 | } | 478 | } |
482 | if (!(n->nlmsg_flags & NLM_F_APPEND)) | 479 | if (!(cfg->fc_nlflags & NLM_F_APPEND)) |
483 | fa = fa_orig; | 480 | fa = fa_orig; |
484 | } | 481 | } |
485 | 482 | ||
486 | err = -ENOENT; | 483 | err = -ENOENT; |
487 | if (!(n->nlmsg_flags&NLM_F_CREATE)) | 484 | if (!(cfg->fc_nlflags & NLM_F_CREATE)) |
488 | goto out; | 485 | goto out; |
489 | 486 | ||
490 | err = -ENOBUFS; | 487 | err = -ENOBUFS; |
@@ -506,8 +503,8 @@ fn_hash_insert(struct fib_table *tb, struct rtmsg *r, struct kern_rta *rta, | |||
506 | 503 | ||
507 | new_fa->fa_info = fi; | 504 | new_fa->fa_info = fi; |
508 | new_fa->fa_tos = tos; | 505 | new_fa->fa_tos = tos; |
509 | new_fa->fa_type = type; | 506 | new_fa->fa_type = cfg->fc_type; |
510 | new_fa->fa_scope = r->rtm_scope; | 507 | new_fa->fa_scope = cfg->fc_scope; |
511 | new_fa->fa_state = 0; | 508 | new_fa->fa_state = 0; |
512 | 509 | ||
513 | /* | 510 | /* |
@@ -526,7 +523,8 @@ fn_hash_insert(struct fib_table *tb, struct rtmsg *r, struct kern_rta *rta, | |||
526 | fz->fz_nent++; | 523 | fz->fz_nent++; |
527 | rt_cache_flush(-1); | 524 | rt_cache_flush(-1); |
528 | 525 | ||
529 | rtmsg_fib(RTM_NEWROUTE, key, new_fa, z, tb->tb_id, n, req); | 526 | rtmsg_fib(RTM_NEWROUTE, key, new_fa, cfg->fc_dst_len, tb->tb_id, |
527 | &cfg->fc_nlinfo); | ||
530 | return 0; | 528 | return 0; |
531 | 529 | ||
532 | out_free_new_fa: | 530 | out_free_new_fa: |
@@ -537,30 +535,25 @@ out: | |||
537 | } | 535 | } |
538 | 536 | ||
539 | 537 | ||
540 | static int | 538 | static int fn_hash_delete(struct fib_table *tb, struct fib_config *cfg) |
541 | fn_hash_delete(struct fib_table *tb, struct rtmsg *r, struct kern_rta *rta, | ||
542 | struct nlmsghdr *n, struct netlink_skb_parms *req) | ||
543 | { | 539 | { |
544 | struct fn_hash *table = (struct fn_hash*)tb->tb_data; | 540 | struct fn_hash *table = (struct fn_hash*)tb->tb_data; |
545 | struct fib_node *f; | 541 | struct fib_node *f; |
546 | struct fib_alias *fa, *fa_to_delete; | 542 | struct fib_alias *fa, *fa_to_delete; |
547 | int z = r->rtm_dst_len; | ||
548 | struct fn_zone *fz; | 543 | struct fn_zone *fz; |
549 | u32 key; | 544 | u32 key; |
550 | u8 tos = r->rtm_tos; | ||
551 | 545 | ||
552 | if (z > 32) | 546 | if (cfg->fc_dst_len > 32) |
553 | return -EINVAL; | 547 | return -EINVAL; |
554 | if ((fz = table->fn_zones[z]) == NULL) | 548 | |
549 | if ((fz = table->fn_zones[cfg->fc_dst_len]) == NULL) | ||
555 | return -ESRCH; | 550 | return -ESRCH; |
556 | 551 | ||
557 | key = 0; | 552 | key = 0; |
558 | if (rta->rta_dst) { | 553 | if (cfg->fc_dst) { |
559 | u32 dst; | 554 | if (cfg->fc_dst & ~FZ_MASK(fz)) |
560 | memcpy(&dst, rta->rta_dst, 4); | ||
561 | if (dst & ~FZ_MASK(fz)) | ||
562 | return -EINVAL; | 555 | return -EINVAL; |
563 | key = fz_key(dst, fz); | 556 | key = fz_key(cfg->fc_dst, fz); |
564 | } | 557 | } |
565 | 558 | ||
566 | f = fib_find_node(fz, key); | 559 | f = fib_find_node(fz, key); |
@@ -568,7 +561,7 @@ fn_hash_delete(struct fib_table *tb, struct rtmsg *r, struct kern_rta *rta, | |||
568 | if (!f) | 561 | if (!f) |
569 | fa = NULL; | 562 | fa = NULL; |
570 | else | 563 | else |
571 | fa = fib_find_alias(&f->fn_alias, tos, 0); | 564 | fa = fib_find_alias(&f->fn_alias, cfg->fc_tos, 0); |
572 | if (!fa) | 565 | if (!fa) |
573 | return -ESRCH; | 566 | return -ESRCH; |
574 | 567 | ||
@@ -577,16 +570,16 @@ fn_hash_delete(struct fib_table *tb, struct rtmsg *r, struct kern_rta *rta, | |||
577 | list_for_each_entry_continue(fa, &f->fn_alias, fa_list) { | 570 | list_for_each_entry_continue(fa, &f->fn_alias, fa_list) { |
578 | struct fib_info *fi = fa->fa_info; | 571 | struct fib_info *fi = fa->fa_info; |
579 | 572 | ||
580 | if (fa->fa_tos != tos) | 573 | if (fa->fa_tos != cfg->fc_tos) |
581 | break; | 574 | break; |
582 | 575 | ||
583 | if ((!r->rtm_type || | 576 | if ((!cfg->fc_type || |
584 | fa->fa_type == r->rtm_type) && | 577 | fa->fa_type == cfg->fc_type) && |
585 | (r->rtm_scope == RT_SCOPE_NOWHERE || | 578 | (cfg->fc_scope == RT_SCOPE_NOWHERE || |
586 | fa->fa_scope == r->rtm_scope) && | 579 | fa->fa_scope == cfg->fc_scope) && |
587 | (!r->rtm_protocol || | 580 | (!cfg->fc_protocol || |
588 | fi->fib_protocol == r->rtm_protocol) && | 581 | fi->fib_protocol == cfg->fc_protocol) && |
589 | fib_nh_match(r, n, rta, fi) == 0) { | 582 | fib_nh_match(cfg, fi) == 0) { |
590 | fa_to_delete = fa; | 583 | fa_to_delete = fa; |
591 | break; | 584 | break; |
592 | } | 585 | } |
@@ -596,7 +589,8 @@ fn_hash_delete(struct fib_table *tb, struct rtmsg *r, struct kern_rta *rta, | |||
596 | int kill_fn; | 589 | int kill_fn; |
597 | 590 | ||
598 | fa = fa_to_delete; | 591 | fa = fa_to_delete; |
599 | rtmsg_fib(RTM_DELROUTE, key, fa, z, tb->tb_id, n, req); | 592 | rtmsg_fib(RTM_DELROUTE, key, fa, cfg->fc_dst_len, |
593 | tb->tb_id, &cfg->fc_nlinfo); | ||
600 | 594 | ||
601 | kill_fn = 0; | 595 | kill_fn = 0; |
602 | write_lock_bh(&fib_hash_lock); | 596 | write_lock_bh(&fib_hash_lock); |
@@ -684,7 +678,7 @@ fn_hash_dump_bucket(struct sk_buff *skb, struct netlink_callback *cb, | |||
684 | struct fib_node *f; | 678 | struct fib_node *f; |
685 | int i, s_i; | 679 | int i, s_i; |
686 | 680 | ||
687 | s_i = cb->args[3]; | 681 | s_i = cb->args[4]; |
688 | i = 0; | 682 | i = 0; |
689 | hlist_for_each_entry(f, node, head, fn_hash) { | 683 | hlist_for_each_entry(f, node, head, fn_hash) { |
690 | struct fib_alias *fa; | 684 | struct fib_alias *fa; |
@@ -699,19 +693,19 @@ fn_hash_dump_bucket(struct sk_buff *skb, struct netlink_callback *cb, | |||
699 | tb->tb_id, | 693 | tb->tb_id, |
700 | fa->fa_type, | 694 | fa->fa_type, |
701 | fa->fa_scope, | 695 | fa->fa_scope, |
702 | &f->fn_key, | 696 | f->fn_key, |
703 | fz->fz_order, | 697 | fz->fz_order, |
704 | fa->fa_tos, | 698 | fa->fa_tos, |
705 | fa->fa_info, | 699 | fa->fa_info, |
706 | NLM_F_MULTI) < 0) { | 700 | NLM_F_MULTI) < 0) { |
707 | cb->args[3] = i; | 701 | cb->args[4] = i; |
708 | return -1; | 702 | return -1; |
709 | } | 703 | } |
710 | next: | 704 | next: |
711 | i++; | 705 | i++; |
712 | } | 706 | } |
713 | } | 707 | } |
714 | cb->args[3] = i; | 708 | cb->args[4] = i; |
715 | return skb->len; | 709 | return skb->len; |
716 | } | 710 | } |
717 | 711 | ||
@@ -722,21 +716,21 @@ fn_hash_dump_zone(struct sk_buff *skb, struct netlink_callback *cb, | |||
722 | { | 716 | { |
723 | int h, s_h; | 717 | int h, s_h; |
724 | 718 | ||
725 | s_h = cb->args[2]; | 719 | s_h = cb->args[3]; |
726 | for (h=0; h < fz->fz_divisor; h++) { | 720 | for (h=0; h < fz->fz_divisor; h++) { |
727 | if (h < s_h) continue; | 721 | if (h < s_h) continue; |
728 | if (h > s_h) | 722 | if (h > s_h) |
729 | memset(&cb->args[3], 0, | 723 | memset(&cb->args[4], 0, |
730 | sizeof(cb->args) - 3*sizeof(cb->args[0])); | 724 | sizeof(cb->args) - 4*sizeof(cb->args[0])); |
731 | if (fz->fz_hash == NULL || | 725 | if (fz->fz_hash == NULL || |
732 | hlist_empty(&fz->fz_hash[h])) | 726 | hlist_empty(&fz->fz_hash[h])) |
733 | continue; | 727 | continue; |
734 | if (fn_hash_dump_bucket(skb, cb, tb, fz, &fz->fz_hash[h])<0) { | 728 | if (fn_hash_dump_bucket(skb, cb, tb, fz, &fz->fz_hash[h])<0) { |
735 | cb->args[2] = h; | 729 | cb->args[3] = h; |
736 | return -1; | 730 | return -1; |
737 | } | 731 | } |
738 | } | 732 | } |
739 | cb->args[2] = h; | 733 | cb->args[3] = h; |
740 | return skb->len; | 734 | return skb->len; |
741 | } | 735 | } |
742 | 736 | ||
@@ -746,28 +740,28 @@ static int fn_hash_dump(struct fib_table *tb, struct sk_buff *skb, struct netlin | |||
746 | struct fn_zone *fz; | 740 | struct fn_zone *fz; |
747 | struct fn_hash *table = (struct fn_hash*)tb->tb_data; | 741 | struct fn_hash *table = (struct fn_hash*)tb->tb_data; |
748 | 742 | ||
749 | s_m = cb->args[1]; | 743 | s_m = cb->args[2]; |
750 | read_lock(&fib_hash_lock); | 744 | read_lock(&fib_hash_lock); |
751 | for (fz = table->fn_zone_list, m=0; fz; fz = fz->fz_next, m++) { | 745 | for (fz = table->fn_zone_list, m=0; fz; fz = fz->fz_next, m++) { |
752 | if (m < s_m) continue; | 746 | if (m < s_m) continue; |
753 | if (m > s_m) | 747 | if (m > s_m) |
754 | memset(&cb->args[2], 0, | 748 | memset(&cb->args[3], 0, |
755 | sizeof(cb->args) - 2*sizeof(cb->args[0])); | 749 | sizeof(cb->args) - 3*sizeof(cb->args[0])); |
756 | if (fn_hash_dump_zone(skb, cb, tb, fz) < 0) { | 750 | if (fn_hash_dump_zone(skb, cb, tb, fz) < 0) { |
757 | cb->args[1] = m; | 751 | cb->args[2] = m; |
758 | read_unlock(&fib_hash_lock); | 752 | read_unlock(&fib_hash_lock); |
759 | return -1; | 753 | return -1; |
760 | } | 754 | } |
761 | } | 755 | } |
762 | read_unlock(&fib_hash_lock); | 756 | read_unlock(&fib_hash_lock); |
763 | cb->args[1] = m; | 757 | cb->args[2] = m; |
764 | return skb->len; | 758 | return skb->len; |
765 | } | 759 | } |
766 | 760 | ||
767 | #ifdef CONFIG_IP_MULTIPLE_TABLES | 761 | #ifdef CONFIG_IP_MULTIPLE_TABLES |
768 | struct fib_table * fib_hash_init(int id) | 762 | struct fib_table * fib_hash_init(u32 id) |
769 | #else | 763 | #else |
770 | struct fib_table * __init fib_hash_init(int id) | 764 | struct fib_table * __init fib_hash_init(u32 id) |
771 | #endif | 765 | #endif |
772 | { | 766 | { |
773 | struct fib_table *tb; | 767 | struct fib_table *tb; |
diff --git a/net/ipv4/fib_lookup.h b/net/ipv4/fib_lookup.h index ef6609ea0eb7..fd6f7769f8ab 100644 --- a/net/ipv4/fib_lookup.h +++ b/net/ipv4/fib_lookup.h | |||
@@ -23,19 +23,14 @@ extern int fib_semantic_match(struct list_head *head, | |||
23 | struct fib_result *res, __u32 zone, __u32 mask, | 23 | struct fib_result *res, __u32 zone, __u32 mask, |
24 | int prefixlen); | 24 | int prefixlen); |
25 | extern void fib_release_info(struct fib_info *); | 25 | extern void fib_release_info(struct fib_info *); |
26 | extern struct fib_info *fib_create_info(const struct rtmsg *r, | 26 | extern struct fib_info *fib_create_info(struct fib_config *cfg); |
27 | struct kern_rta *rta, | 27 | extern int fib_nh_match(struct fib_config *cfg, struct fib_info *fi); |
28 | const struct nlmsghdr *, | ||
29 | int *err); | ||
30 | extern int fib_nh_match(struct rtmsg *r, struct nlmsghdr *, | ||
31 | struct kern_rta *rta, struct fib_info *fi); | ||
32 | extern int fib_dump_info(struct sk_buff *skb, u32 pid, u32 seq, int event, | 28 | extern int fib_dump_info(struct sk_buff *skb, u32 pid, u32 seq, int event, |
33 | u8 tb_id, u8 type, u8 scope, void *dst, | 29 | u32 tb_id, u8 type, u8 scope, u32 dst, |
34 | int dst_len, u8 tos, struct fib_info *fi, | 30 | int dst_len, u8 tos, struct fib_info *fi, |
35 | unsigned int); | 31 | unsigned int); |
36 | extern void rtmsg_fib(int event, u32 key, struct fib_alias *fa, | 32 | extern void rtmsg_fib(int event, u32 key, struct fib_alias *fa, |
37 | int z, int tb_id, | 33 | int dst_len, u32 tb_id, struct nl_info *info); |
38 | struct nlmsghdr *n, struct netlink_skb_parms *req); | ||
39 | extern struct fib_alias *fib_find_alias(struct list_head *fah, | 34 | extern struct fib_alias *fib_find_alias(struct list_head *fah, |
40 | u8 tos, u32 prio); | 35 | u8 tos, u32 prio); |
41 | extern int fib_detect_death(struct fib_info *fi, int order, | 36 | extern int fib_detect_death(struct fib_info *fi, int order, |
diff --git a/net/ipv4/fib_rules.c b/net/ipv4/fib_rules.c index 79b04718bdfd..52b2adae4f22 100644 --- a/net/ipv4/fib_rules.c +++ b/net/ipv4/fib_rules.c | |||
@@ -5,9 +5,8 @@ | |||
5 | * | 5 | * |
6 | * IPv4 Forwarding Information Base: policy rules. | 6 | * IPv4 Forwarding Information Base: policy rules. |
7 | * | 7 | * |
8 | * Version: $Id: fib_rules.c,v 1.17 2001/10/31 21:55:54 davem Exp $ | ||
9 | * | ||
10 | * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru> | 8 | * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru> |
9 | * Thomas Graf <tgraf@suug.ch> | ||
11 | * | 10 | * |
12 | * This program is free software; you can redistribute it and/or | 11 | * This program is free software; you can redistribute it and/or |
13 | * modify it under the terms of the GNU General Public License | 12 | * modify it under the terms of the GNU General Public License |
@@ -19,463 +18,350 @@ | |||
19 | * Marc Boucher : routing by fwmark | 18 | * Marc Boucher : routing by fwmark |
20 | */ | 19 | */ |
21 | 20 | ||
22 | #include <asm/uaccess.h> | ||
23 | #include <asm/system.h> | ||
24 | #include <linux/bitops.h> | ||
25 | #include <linux/types.h> | 21 | #include <linux/types.h> |
26 | #include <linux/kernel.h> | 22 | #include <linux/kernel.h> |
27 | #include <linux/sched.h> | ||
28 | #include <linux/mm.h> | ||
29 | #include <linux/string.h> | ||
30 | #include <linux/socket.h> | ||
31 | #include <linux/sockios.h> | ||
32 | #include <linux/errno.h> | ||
33 | #include <linux/in.h> | ||
34 | #include <linux/inet.h> | ||
35 | #include <linux/inetdevice.h> | ||
36 | #include <linux/netdevice.h> | 23 | #include <linux/netdevice.h> |
37 | #include <linux/if_arp.h> | ||
38 | #include <linux/proc_fs.h> | ||
39 | #include <linux/skbuff.h> | ||
40 | #include <linux/netlink.h> | 24 | #include <linux/netlink.h> |
25 | #include <linux/inetdevice.h> | ||
41 | #include <linux/init.h> | 26 | #include <linux/init.h> |
42 | #include <linux/list.h> | 27 | #include <linux/list.h> |
43 | #include <linux/rcupdate.h> | 28 | #include <linux/rcupdate.h> |
44 | |||
45 | #include <net/ip.h> | 29 | #include <net/ip.h> |
46 | #include <net/protocol.h> | ||
47 | #include <net/route.h> | 30 | #include <net/route.h> |
48 | #include <net/tcp.h> | 31 | #include <net/tcp.h> |
49 | #include <net/sock.h> | ||
50 | #include <net/ip_fib.h> | 32 | #include <net/ip_fib.h> |
33 | #include <net/fib_rules.h> | ||
51 | 34 | ||
52 | #define FRprintk(a...) | 35 | static struct fib_rules_ops fib4_rules_ops; |
53 | 36 | ||
54 | struct fib_rule | 37 | struct fib4_rule |
55 | { | 38 | { |
56 | struct hlist_node hlist; | 39 | struct fib_rule common; |
57 | atomic_t r_clntref; | 40 | u8 dst_len; |
58 | u32 r_preference; | 41 | u8 src_len; |
59 | unsigned char r_table; | 42 | u8 tos; |
60 | unsigned char r_action; | 43 | u32 src; |
61 | unsigned char r_dst_len; | 44 | u32 srcmask; |
62 | unsigned char r_src_len; | 45 | u32 dst; |
63 | u32 r_src; | 46 | u32 dstmask; |
64 | u32 r_srcmask; | ||
65 | u32 r_dst; | ||
66 | u32 r_dstmask; | ||
67 | u32 r_srcmap; | ||
68 | u8 r_flags; | ||
69 | u8 r_tos; | ||
70 | #ifdef CONFIG_IP_ROUTE_FWMARK | 47 | #ifdef CONFIG_IP_ROUTE_FWMARK |
71 | u32 r_fwmark; | 48 | u32 fwmark; |
49 | u32 fwmask; | ||
72 | #endif | 50 | #endif |
73 | int r_ifindex; | ||
74 | #ifdef CONFIG_NET_CLS_ROUTE | 51 | #ifdef CONFIG_NET_CLS_ROUTE |
75 | __u32 r_tclassid; | 52 | u32 tclassid; |
76 | #endif | 53 | #endif |
77 | char r_ifname[IFNAMSIZ]; | ||
78 | int r_dead; | ||
79 | struct rcu_head rcu; | ||
80 | }; | 54 | }; |
81 | 55 | ||
82 | static struct fib_rule default_rule = { | 56 | static struct fib4_rule default_rule = { |
83 | .r_clntref = ATOMIC_INIT(2), | 57 | .common = { |
84 | .r_preference = 0x7FFF, | 58 | .refcnt = ATOMIC_INIT(2), |
85 | .r_table = RT_TABLE_DEFAULT, | 59 | .pref = 0x7FFF, |
86 | .r_action = RTN_UNICAST, | 60 | .table = RT_TABLE_DEFAULT, |
61 | .action = FR_ACT_TO_TBL, | ||
62 | }, | ||
87 | }; | 63 | }; |
88 | 64 | ||
89 | static struct fib_rule main_rule = { | 65 | static struct fib4_rule main_rule = { |
90 | .r_clntref = ATOMIC_INIT(2), | 66 | .common = { |
91 | .r_preference = 0x7FFE, | 67 | .refcnt = ATOMIC_INIT(2), |
92 | .r_table = RT_TABLE_MAIN, | 68 | .pref = 0x7FFE, |
93 | .r_action = RTN_UNICAST, | 69 | .table = RT_TABLE_MAIN, |
70 | .action = FR_ACT_TO_TBL, | ||
71 | }, | ||
94 | }; | 72 | }; |
95 | 73 | ||
96 | static struct fib_rule local_rule = { | 74 | static struct fib4_rule local_rule = { |
97 | .r_clntref = ATOMIC_INIT(2), | 75 | .common = { |
98 | .r_table = RT_TABLE_LOCAL, | 76 | .refcnt = ATOMIC_INIT(2), |
99 | .r_action = RTN_UNICAST, | 77 | .table = RT_TABLE_LOCAL, |
78 | .action = FR_ACT_TO_TBL, | ||
79 | .flags = FIB_RULE_PERMANENT, | ||
80 | }, | ||
100 | }; | 81 | }; |
101 | 82 | ||
102 | static struct hlist_head fib_rules; | 83 | static LIST_HEAD(fib4_rules); |
103 | 84 | ||
104 | /* writer func called from netlink -- rtnl_sem hold*/ | 85 | #ifdef CONFIG_NET_CLS_ROUTE |
105 | 86 | u32 fib_rules_tclass(struct fib_result *res) | |
106 | static void rtmsg_rule(int, struct fib_rule *); | ||
107 | |||
108 | int inet_rtm_delrule(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg) | ||
109 | { | 87 | { |
110 | struct rtattr **rta = arg; | 88 | return res->r ? ((struct fib4_rule *) res->r)->tclassid : 0; |
111 | struct rtmsg *rtm = NLMSG_DATA(nlh); | ||
112 | struct fib_rule *r; | ||
113 | struct hlist_node *node; | ||
114 | int err = -ESRCH; | ||
115 | |||
116 | hlist_for_each_entry(r, node, &fib_rules, hlist) { | ||
117 | if ((!rta[RTA_SRC-1] || memcmp(RTA_DATA(rta[RTA_SRC-1]), &r->r_src, 4) == 0) && | ||
118 | rtm->rtm_src_len == r->r_src_len && | ||
119 | rtm->rtm_dst_len == r->r_dst_len && | ||
120 | (!rta[RTA_DST-1] || memcmp(RTA_DATA(rta[RTA_DST-1]), &r->r_dst, 4) == 0) && | ||
121 | rtm->rtm_tos == r->r_tos && | ||
122 | #ifdef CONFIG_IP_ROUTE_FWMARK | ||
123 | (!rta[RTA_PROTOINFO-1] || memcmp(RTA_DATA(rta[RTA_PROTOINFO-1]), &r->r_fwmark, 4) == 0) && | ||
124 | #endif | ||
125 | (!rtm->rtm_type || rtm->rtm_type == r->r_action) && | ||
126 | (!rta[RTA_PRIORITY-1] || memcmp(RTA_DATA(rta[RTA_PRIORITY-1]), &r->r_preference, 4) == 0) && | ||
127 | (!rta[RTA_IIF-1] || rtattr_strcmp(rta[RTA_IIF-1], r->r_ifname) == 0) && | ||
128 | (!rtm->rtm_table || (r && rtm->rtm_table == r->r_table))) { | ||
129 | err = -EPERM; | ||
130 | if (r == &local_rule) | ||
131 | break; | ||
132 | |||
133 | hlist_del_rcu(&r->hlist); | ||
134 | r->r_dead = 1; | ||
135 | rtmsg_rule(RTM_DELRULE, r); | ||
136 | fib_rule_put(r); | ||
137 | err = 0; | ||
138 | break; | ||
139 | } | ||
140 | } | ||
141 | return err; | ||
142 | } | 89 | } |
90 | #endif | ||
143 | 91 | ||
144 | /* Allocate new unique table id */ | 92 | int fib_lookup(struct flowi *flp, struct fib_result *res) |
145 | |||
146 | static struct fib_table *fib_empty_table(void) | ||
147 | { | 93 | { |
148 | int id; | 94 | struct fib_lookup_arg arg = { |
95 | .result = res, | ||
96 | }; | ||
97 | int err; | ||
149 | 98 | ||
150 | for (id = 1; id <= RT_TABLE_MAX; id++) | 99 | err = fib_rules_lookup(&fib4_rules_ops, flp, 0, &arg); |
151 | if (fib_tables[id] == NULL) | 100 | res->r = arg.rule; |
152 | return __fib_new_table(id); | ||
153 | return NULL; | ||
154 | } | ||
155 | 101 | ||
156 | static inline void fib_rule_put_rcu(struct rcu_head *head) | 102 | return err; |
157 | { | ||
158 | struct fib_rule *r = container_of(head, struct fib_rule, rcu); | ||
159 | kfree(r); | ||
160 | } | 103 | } |
161 | 104 | ||
162 | void fib_rule_put(struct fib_rule *r) | 105 | static int fib4_rule_action(struct fib_rule *rule, struct flowi *flp, |
106 | int flags, struct fib_lookup_arg *arg) | ||
163 | { | 107 | { |
164 | if (atomic_dec_and_test(&r->r_clntref)) { | 108 | int err = -EAGAIN; |
165 | if (r->r_dead) | 109 | struct fib_table *tbl; |
166 | call_rcu(&r->rcu, fib_rule_put_rcu); | 110 | |
167 | else | 111 | switch (rule->action) { |
168 | printk("Freeing alive rule %p\n", r); | 112 | case FR_ACT_TO_TBL: |
113 | break; | ||
114 | |||
115 | case FR_ACT_UNREACHABLE: | ||
116 | err = -ENETUNREACH; | ||
117 | goto errout; | ||
118 | |||
119 | case FR_ACT_PROHIBIT: | ||
120 | err = -EACCES; | ||
121 | goto errout; | ||
122 | |||
123 | case FR_ACT_BLACKHOLE: | ||
124 | default: | ||
125 | err = -EINVAL; | ||
126 | goto errout; | ||
169 | } | 127 | } |
128 | |||
129 | if ((tbl = fib_get_table(rule->table)) == NULL) | ||
130 | goto errout; | ||
131 | |||
132 | err = tbl->tb_lookup(tbl, flp, (struct fib_result *) arg->result); | ||
133 | if (err > 0) | ||
134 | err = -EAGAIN; | ||
135 | errout: | ||
136 | return err; | ||
170 | } | 137 | } |
171 | 138 | ||
172 | /* writer func called from netlink -- rtnl_sem hold*/ | ||
173 | 139 | ||
174 | int inet_rtm_newrule(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg) | 140 | void fib_select_default(const struct flowi *flp, struct fib_result *res) |
175 | { | 141 | { |
176 | struct rtattr **rta = arg; | 142 | if (res->r && res->r->action == FR_ACT_TO_TBL && |
177 | struct rtmsg *rtm = NLMSG_DATA(nlh); | 143 | FIB_RES_GW(*res) && FIB_RES_NH(*res).nh_scope == RT_SCOPE_LINK) { |
178 | struct fib_rule *r, *new_r, *last = NULL; | 144 | struct fib_table *tb; |
179 | struct hlist_node *node = NULL; | 145 | if ((tb = fib_get_table(res->r->table)) != NULL) |
180 | unsigned char table_id; | 146 | tb->tb_select_default(tb, flp, res); |
181 | |||
182 | if (rtm->rtm_src_len > 32 || rtm->rtm_dst_len > 32 || | ||
183 | (rtm->rtm_tos & ~IPTOS_TOS_MASK)) | ||
184 | return -EINVAL; | ||
185 | |||
186 | if (rta[RTA_IIF-1] && RTA_PAYLOAD(rta[RTA_IIF-1]) > IFNAMSIZ) | ||
187 | return -EINVAL; | ||
188 | |||
189 | table_id = rtm->rtm_table; | ||
190 | if (table_id == RT_TABLE_UNSPEC) { | ||
191 | struct fib_table *table; | ||
192 | if (rtm->rtm_type == RTN_UNICAST) { | ||
193 | if ((table = fib_empty_table()) == NULL) | ||
194 | return -ENOBUFS; | ||
195 | table_id = table->tb_id; | ||
196 | } | ||
197 | } | 147 | } |
148 | } | ||
198 | 149 | ||
199 | new_r = kzalloc(sizeof(*new_r), GFP_KERNEL); | 150 | static int fib4_rule_match(struct fib_rule *rule, struct flowi *fl, int flags) |
200 | if (!new_r) | 151 | { |
201 | return -ENOMEM; | 152 | struct fib4_rule *r = (struct fib4_rule *) rule; |
202 | 153 | u32 daddr = fl->fl4_dst; | |
203 | if (rta[RTA_SRC-1]) | 154 | u32 saddr = fl->fl4_src; |
204 | memcpy(&new_r->r_src, RTA_DATA(rta[RTA_SRC-1]), 4); | ||
205 | if (rta[RTA_DST-1]) | ||
206 | memcpy(&new_r->r_dst, RTA_DATA(rta[RTA_DST-1]), 4); | ||
207 | if (rta[RTA_GATEWAY-1]) | ||
208 | memcpy(&new_r->r_srcmap, RTA_DATA(rta[RTA_GATEWAY-1]), 4); | ||
209 | new_r->r_src_len = rtm->rtm_src_len; | ||
210 | new_r->r_dst_len = rtm->rtm_dst_len; | ||
211 | new_r->r_srcmask = inet_make_mask(rtm->rtm_src_len); | ||
212 | new_r->r_dstmask = inet_make_mask(rtm->rtm_dst_len); | ||
213 | new_r->r_tos = rtm->rtm_tos; | ||
214 | #ifdef CONFIG_IP_ROUTE_FWMARK | ||
215 | if (rta[RTA_PROTOINFO-1]) | ||
216 | memcpy(&new_r->r_fwmark, RTA_DATA(rta[RTA_PROTOINFO-1]), 4); | ||
217 | #endif | ||
218 | new_r->r_action = rtm->rtm_type; | ||
219 | new_r->r_flags = rtm->rtm_flags; | ||
220 | if (rta[RTA_PRIORITY-1]) | ||
221 | memcpy(&new_r->r_preference, RTA_DATA(rta[RTA_PRIORITY-1]), 4); | ||
222 | new_r->r_table = table_id; | ||
223 | if (rta[RTA_IIF-1]) { | ||
224 | struct net_device *dev; | ||
225 | rtattr_strlcpy(new_r->r_ifname, rta[RTA_IIF-1], IFNAMSIZ); | ||
226 | new_r->r_ifindex = -1; | ||
227 | dev = __dev_get_by_name(new_r->r_ifname); | ||
228 | if (dev) | ||
229 | new_r->r_ifindex = dev->ifindex; | ||
230 | } | ||
231 | #ifdef CONFIG_NET_CLS_ROUTE | ||
232 | if (rta[RTA_FLOW-1]) | ||
233 | memcpy(&new_r->r_tclassid, RTA_DATA(rta[RTA_FLOW-1]), 4); | ||
234 | #endif | ||
235 | r = container_of(fib_rules.first, struct fib_rule, hlist); | ||
236 | 155 | ||
237 | if (!new_r->r_preference) { | 156 | if (((saddr ^ r->src) & r->srcmask) || |
238 | if (r && r->hlist.next != NULL) { | 157 | ((daddr ^ r->dst) & r->dstmask)) |
239 | r = container_of(r->hlist.next, struct fib_rule, hlist); | 158 | return 0; |
240 | if (r->r_preference) | ||
241 | new_r->r_preference = r->r_preference - 1; | ||
242 | } | ||
243 | } | ||
244 | 159 | ||
245 | hlist_for_each_entry(r, node, &fib_rules, hlist) { | 160 | if (r->tos && (r->tos != fl->fl4_tos)) |
246 | if (r->r_preference > new_r->r_preference) | 161 | return 0; |
247 | break; | ||
248 | last = r; | ||
249 | } | ||
250 | atomic_inc(&new_r->r_clntref); | ||
251 | 162 | ||
252 | if (last) | 163 | #ifdef CONFIG_IP_ROUTE_FWMARK |
253 | hlist_add_after_rcu(&last->hlist, &new_r->hlist); | 164 | if ((r->fwmark ^ fl->fl4_fwmark) & r->fwmask) |
254 | else | 165 | return 0; |
255 | hlist_add_before_rcu(&new_r->hlist, &r->hlist); | 166 | #endif |
256 | 167 | ||
257 | rtmsg_rule(RTM_NEWRULE, new_r); | 168 | return 1; |
258 | return 0; | ||
259 | } | 169 | } |
260 | 170 | ||
261 | #ifdef CONFIG_NET_CLS_ROUTE | 171 | static struct fib_table *fib_empty_table(void) |
262 | u32 fib_rules_tclass(struct fib_result *res) | ||
263 | { | 172 | { |
264 | if (res->r) | 173 | u32 id; |
265 | return res->r->r_tclassid; | 174 | |
266 | return 0; | 175 | for (id = 1; id <= RT_TABLE_MAX; id++) |
176 | if (fib_get_table(id) == NULL) | ||
177 | return fib_new_table(id); | ||
178 | return NULL; | ||
267 | } | 179 | } |
268 | #endif | ||
269 | 180 | ||
270 | /* callers should hold rtnl semaphore */ | 181 | static struct nla_policy fib4_rule_policy[FRA_MAX+1] __read_mostly = { |
182 | [FRA_IFNAME] = { .type = NLA_STRING, .len = IFNAMSIZ - 1 }, | ||
183 | [FRA_PRIORITY] = { .type = NLA_U32 }, | ||
184 | [FRA_SRC] = { .type = NLA_U32 }, | ||
185 | [FRA_DST] = { .type = NLA_U32 }, | ||
186 | [FRA_FWMARK] = { .type = NLA_U32 }, | ||
187 | [FRA_FWMASK] = { .type = NLA_U32 }, | ||
188 | [FRA_FLOW] = { .type = NLA_U32 }, | ||
189 | [FRA_TABLE] = { .type = NLA_U32 }, | ||
190 | }; | ||
271 | 191 | ||
272 | static void fib_rules_detach(struct net_device *dev) | 192 | static int fib4_rule_configure(struct fib_rule *rule, struct sk_buff *skb, |
193 | struct nlmsghdr *nlh, struct fib_rule_hdr *frh, | ||
194 | struct nlattr **tb) | ||
273 | { | 195 | { |
274 | struct hlist_node *node; | 196 | int err = -EINVAL; |
275 | struct fib_rule *r; | 197 | struct fib4_rule *rule4 = (struct fib4_rule *) rule; |
198 | |||
199 | if (frh->src_len > 32 || frh->dst_len > 32 || | ||
200 | (frh->tos & ~IPTOS_TOS_MASK)) | ||
201 | goto errout; | ||
202 | |||
203 | if (rule->table == RT_TABLE_UNSPEC) { | ||
204 | if (rule->action == FR_ACT_TO_TBL) { | ||
205 | struct fib_table *table; | ||
276 | 206 | ||
277 | hlist_for_each_entry(r, node, &fib_rules, hlist) { | 207 | table = fib_empty_table(); |
278 | if (r->r_ifindex == dev->ifindex) | 208 | if (table == NULL) { |
279 | r->r_ifindex = -1; | 209 | err = -ENOBUFS; |
210 | goto errout; | ||
211 | } | ||
280 | 212 | ||
213 | rule->table = table->tb_id; | ||
214 | } | ||
281 | } | 215 | } |
282 | } | ||
283 | 216 | ||
284 | /* callers should hold rtnl semaphore */ | 217 | if (tb[FRA_SRC]) |
218 | rule4->src = nla_get_u32(tb[FRA_SRC]); | ||
285 | 219 | ||
286 | static void fib_rules_attach(struct net_device *dev) | 220 | if (tb[FRA_DST]) |
287 | { | 221 | rule4->dst = nla_get_u32(tb[FRA_DST]); |
288 | struct hlist_node *node; | ||
289 | struct fib_rule *r; | ||
290 | 222 | ||
291 | hlist_for_each_entry(r, node, &fib_rules, hlist) { | 223 | #ifdef CONFIG_IP_ROUTE_FWMARK |
292 | if (r->r_ifindex == -1 && strcmp(dev->name, r->r_ifname) == 0) | 224 | if (tb[FRA_FWMARK]) { |
293 | r->r_ifindex = dev->ifindex; | 225 | rule4->fwmark = nla_get_u32(tb[FRA_FWMARK]); |
226 | if (rule4->fwmark) | ||
227 | /* compatibility: if the mark value is non-zero all bits | ||
228 | * are compared unless a mask is explicitly specified. | ||
229 | */ | ||
230 | rule4->fwmask = 0xFFFFFFFF; | ||
294 | } | 231 | } |
232 | |||
233 | if (tb[FRA_FWMASK]) | ||
234 | rule4->fwmask = nla_get_u32(tb[FRA_FWMASK]); | ||
235 | #endif | ||
236 | |||
237 | #ifdef CONFIG_NET_CLS_ROUTE | ||
238 | if (tb[FRA_FLOW]) | ||
239 | rule4->tclassid = nla_get_u32(tb[FRA_FLOW]); | ||
240 | #endif | ||
241 | |||
242 | rule4->src_len = frh->src_len; | ||
243 | rule4->srcmask = inet_make_mask(rule4->src_len); | ||
244 | rule4->dst_len = frh->dst_len; | ||
245 | rule4->dstmask = inet_make_mask(rule4->dst_len); | ||
246 | rule4->tos = frh->tos; | ||
247 | |||
248 | err = 0; | ||
249 | errout: | ||
250 | return err; | ||
295 | } | 251 | } |
296 | 252 | ||
297 | int fib_lookup(const struct flowi *flp, struct fib_result *res) | 253 | static int fib4_rule_compare(struct fib_rule *rule, struct fib_rule_hdr *frh, |
254 | struct nlattr **tb) | ||
298 | { | 255 | { |
299 | int err; | 256 | struct fib4_rule *rule4 = (struct fib4_rule *) rule; |
300 | struct fib_rule *r, *policy; | ||
301 | struct fib_table *tb; | ||
302 | struct hlist_node *node; | ||
303 | 257 | ||
304 | u32 daddr = flp->fl4_dst; | 258 | if (frh->src_len && (rule4->src_len != frh->src_len)) |
305 | u32 saddr = flp->fl4_src; | 259 | return 0; |
306 | 260 | ||
307 | FRprintk("Lookup: %u.%u.%u.%u <- %u.%u.%u.%u ", | 261 | if (frh->dst_len && (rule4->dst_len != frh->dst_len)) |
308 | NIPQUAD(flp->fl4_dst), NIPQUAD(flp->fl4_src)); | 262 | return 0; |
309 | 263 | ||
310 | rcu_read_lock(); | 264 | if (frh->tos && (rule4->tos != frh->tos)) |
265 | return 0; | ||
311 | 266 | ||
312 | hlist_for_each_entry_rcu(r, node, &fib_rules, hlist) { | ||
313 | if (((saddr^r->r_src) & r->r_srcmask) || | ||
314 | ((daddr^r->r_dst) & r->r_dstmask) || | ||
315 | (r->r_tos && r->r_tos != flp->fl4_tos) || | ||
316 | #ifdef CONFIG_IP_ROUTE_FWMARK | 267 | #ifdef CONFIG_IP_ROUTE_FWMARK |
317 | (r->r_fwmark && r->r_fwmark != flp->fl4_fwmark) || | 268 | if (tb[FRA_FWMARK] && (rule4->fwmark != nla_get_u32(tb[FRA_FWMARK]))) |
269 | return 0; | ||
270 | |||
271 | if (tb[FRA_FWMASK] && (rule4->fwmask != nla_get_u32(tb[FRA_FWMASK]))) | ||
272 | return 0; | ||
318 | #endif | 273 | #endif |
319 | (r->r_ifindex && r->r_ifindex != flp->iif)) | ||
320 | continue; | ||
321 | |||
322 | FRprintk("tb %d r %d ", r->r_table, r->r_action); | ||
323 | switch (r->r_action) { | ||
324 | case RTN_UNICAST: | ||
325 | policy = r; | ||
326 | break; | ||
327 | case RTN_UNREACHABLE: | ||
328 | rcu_read_unlock(); | ||
329 | return -ENETUNREACH; | ||
330 | default: | ||
331 | case RTN_BLACKHOLE: | ||
332 | rcu_read_unlock(); | ||
333 | return -EINVAL; | ||
334 | case RTN_PROHIBIT: | ||
335 | rcu_read_unlock(); | ||
336 | return -EACCES; | ||
337 | } | ||
338 | 274 | ||
339 | if ((tb = fib_get_table(r->r_table)) == NULL) | 275 | #ifdef CONFIG_NET_CLS_ROUTE |
340 | continue; | 276 | if (tb[FRA_FLOW] && (rule4->tclassid != nla_get_u32(tb[FRA_FLOW]))) |
341 | err = tb->tb_lookup(tb, flp, res); | 277 | return 0; |
342 | if (err == 0) { | 278 | #endif |
343 | res->r = policy; | ||
344 | if (policy) | ||
345 | atomic_inc(&policy->r_clntref); | ||
346 | rcu_read_unlock(); | ||
347 | return 0; | ||
348 | } | ||
349 | if (err < 0 && err != -EAGAIN) { | ||
350 | rcu_read_unlock(); | ||
351 | return err; | ||
352 | } | ||
353 | } | ||
354 | FRprintk("FAILURE\n"); | ||
355 | rcu_read_unlock(); | ||
356 | return -ENETUNREACH; | ||
357 | } | ||
358 | 279 | ||
359 | void fib_select_default(const struct flowi *flp, struct fib_result *res) | 280 | if (tb[FRA_SRC] && (rule4->src != nla_get_u32(tb[FRA_SRC]))) |
360 | { | 281 | return 0; |
361 | if (res->r && res->r->r_action == RTN_UNICAST && | ||
362 | FIB_RES_GW(*res) && FIB_RES_NH(*res).nh_scope == RT_SCOPE_LINK) { | ||
363 | struct fib_table *tb; | ||
364 | if ((tb = fib_get_table(res->r->r_table)) != NULL) | ||
365 | tb->tb_select_default(tb, flp, res); | ||
366 | } | ||
367 | } | ||
368 | 282 | ||
369 | static int fib_rules_event(struct notifier_block *this, unsigned long event, void *ptr) | 283 | if (tb[FRA_DST] && (rule4->dst != nla_get_u32(tb[FRA_DST]))) |
370 | { | 284 | return 0; |
371 | struct net_device *dev = ptr; | ||
372 | 285 | ||
373 | if (event == NETDEV_UNREGISTER) | 286 | return 1; |
374 | fib_rules_detach(dev); | ||
375 | else if (event == NETDEV_REGISTER) | ||
376 | fib_rules_attach(dev); | ||
377 | return NOTIFY_DONE; | ||
378 | } | 287 | } |
379 | 288 | ||
289 | static int fib4_rule_fill(struct fib_rule *rule, struct sk_buff *skb, | ||
290 | struct nlmsghdr *nlh, struct fib_rule_hdr *frh) | ||
291 | { | ||
292 | struct fib4_rule *rule4 = (struct fib4_rule *) rule; | ||
380 | 293 | ||
381 | static struct notifier_block fib_rules_notifier = { | 294 | frh->family = AF_INET; |
382 | .notifier_call =fib_rules_event, | 295 | frh->dst_len = rule4->dst_len; |
383 | }; | 296 | frh->src_len = rule4->src_len; |
297 | frh->tos = rule4->tos; | ||
384 | 298 | ||
385 | static __inline__ int inet_fill_rule(struct sk_buff *skb, | ||
386 | struct fib_rule *r, | ||
387 | u32 pid, u32 seq, int event, | ||
388 | unsigned int flags) | ||
389 | { | ||
390 | struct rtmsg *rtm; | ||
391 | struct nlmsghdr *nlh; | ||
392 | unsigned char *b = skb->tail; | ||
393 | |||
394 | nlh = NLMSG_NEW(skb, pid, seq, event, sizeof(*rtm), flags); | ||
395 | rtm = NLMSG_DATA(nlh); | ||
396 | rtm->rtm_family = AF_INET; | ||
397 | rtm->rtm_dst_len = r->r_dst_len; | ||
398 | rtm->rtm_src_len = r->r_src_len; | ||
399 | rtm->rtm_tos = r->r_tos; | ||
400 | #ifdef CONFIG_IP_ROUTE_FWMARK | 299 | #ifdef CONFIG_IP_ROUTE_FWMARK |
401 | if (r->r_fwmark) | 300 | if (rule4->fwmark) |
402 | RTA_PUT(skb, RTA_PROTOINFO, 4, &r->r_fwmark); | 301 | NLA_PUT_U32(skb, FRA_FWMARK, rule4->fwmark); |
302 | |||
303 | if (rule4->fwmask || rule4->fwmark) | ||
304 | NLA_PUT_U32(skb, FRA_FWMASK, rule4->fwmask); | ||
403 | #endif | 305 | #endif |
404 | rtm->rtm_table = r->r_table; | 306 | |
405 | rtm->rtm_protocol = 0; | 307 | if (rule4->dst_len) |
406 | rtm->rtm_scope = 0; | 308 | NLA_PUT_U32(skb, FRA_DST, rule4->dst); |
407 | rtm->rtm_type = r->r_action; | 309 | |
408 | rtm->rtm_flags = r->r_flags; | 310 | if (rule4->src_len) |
409 | 311 | NLA_PUT_U32(skb, FRA_SRC, rule4->src); | |
410 | if (r->r_dst_len) | 312 | |
411 | RTA_PUT(skb, RTA_DST, 4, &r->r_dst); | ||
412 | if (r->r_src_len) | ||
413 | RTA_PUT(skb, RTA_SRC, 4, &r->r_src); | ||
414 | if (r->r_ifname[0]) | ||
415 | RTA_PUT(skb, RTA_IIF, IFNAMSIZ, &r->r_ifname); | ||
416 | if (r->r_preference) | ||
417 | RTA_PUT(skb, RTA_PRIORITY, 4, &r->r_preference); | ||
418 | if (r->r_srcmap) | ||
419 | RTA_PUT(skb, RTA_GATEWAY, 4, &r->r_srcmap); | ||
420 | #ifdef CONFIG_NET_CLS_ROUTE | 313 | #ifdef CONFIG_NET_CLS_ROUTE |
421 | if (r->r_tclassid) | 314 | if (rule4->tclassid) |
422 | RTA_PUT(skb, RTA_FLOW, 4, &r->r_tclassid); | 315 | NLA_PUT_U32(skb, FRA_FLOW, rule4->tclassid); |
423 | #endif | 316 | #endif |
424 | nlh->nlmsg_len = skb->tail - b; | 317 | return 0; |
425 | return skb->len; | ||
426 | 318 | ||
427 | nlmsg_failure: | 319 | nla_put_failure: |
428 | rtattr_failure: | 320 | return -ENOBUFS; |
429 | skb_trim(skb, b - skb->data); | ||
430 | return -1; | ||
431 | } | 321 | } |
432 | 322 | ||
433 | /* callers should hold rtnl semaphore */ | 323 | int fib4_rules_dump(struct sk_buff *skb, struct netlink_callback *cb) |
434 | |||
435 | static void rtmsg_rule(int event, struct fib_rule *r) | ||
436 | { | 324 | { |
437 | int size = NLMSG_SPACE(sizeof(struct rtmsg) + 128); | 325 | return fib_rules_dump(skb, cb, AF_INET); |
438 | struct sk_buff *skb = alloc_skb(size, GFP_KERNEL); | ||
439 | |||
440 | if (!skb) | ||
441 | netlink_set_err(rtnl, 0, RTNLGRP_IPV4_RULE, ENOBUFS); | ||
442 | else if (inet_fill_rule(skb, r, 0, 0, event, 0) < 0) { | ||
443 | kfree_skb(skb); | ||
444 | netlink_set_err(rtnl, 0, RTNLGRP_IPV4_RULE, EINVAL); | ||
445 | } else { | ||
446 | netlink_broadcast(rtnl, skb, 0, RTNLGRP_IPV4_RULE, GFP_KERNEL); | ||
447 | } | ||
448 | } | 326 | } |
449 | 327 | ||
450 | int inet_dump_rules(struct sk_buff *skb, struct netlink_callback *cb) | 328 | static u32 fib4_rule_default_pref(void) |
451 | { | 329 | { |
452 | int idx = 0; | 330 | struct list_head *pos; |
453 | int s_idx = cb->args[0]; | 331 | struct fib_rule *rule; |
454 | struct fib_rule *r; | 332 | |
455 | struct hlist_node *node; | 333 | if (!list_empty(&fib4_rules)) { |
456 | 334 | pos = fib4_rules.next; | |
457 | rcu_read_lock(); | 335 | if (pos->next != &fib4_rules) { |
458 | hlist_for_each_entry(r, node, &fib_rules, hlist) { | 336 | rule = list_entry(pos->next, struct fib_rule, list); |
459 | if (idx < s_idx) | 337 | if (rule->pref) |
460 | goto next; | 338 | return rule->pref - 1; |
461 | if (inet_fill_rule(skb, r, NETLINK_CB(cb->skb).pid, | 339 | } |
462 | cb->nlh->nlmsg_seq, | ||
463 | RTM_NEWRULE, NLM_F_MULTI) < 0) | ||
464 | break; | ||
465 | next: | ||
466 | idx++; | ||
467 | } | 340 | } |
468 | rcu_read_unlock(); | ||
469 | cb->args[0] = idx; | ||
470 | 341 | ||
471 | return skb->len; | 342 | return 0; |
472 | } | 343 | } |
473 | 344 | ||
474 | void __init fib_rules_init(void) | 345 | static struct fib_rules_ops fib4_rules_ops = { |
346 | .family = AF_INET, | ||
347 | .rule_size = sizeof(struct fib4_rule), | ||
348 | .action = fib4_rule_action, | ||
349 | .match = fib4_rule_match, | ||
350 | .configure = fib4_rule_configure, | ||
351 | .compare = fib4_rule_compare, | ||
352 | .fill = fib4_rule_fill, | ||
353 | .default_pref = fib4_rule_default_pref, | ||
354 | .nlgroup = RTNLGRP_IPV4_RULE, | ||
355 | .policy = fib4_rule_policy, | ||
356 | .rules_list = &fib4_rules, | ||
357 | .owner = THIS_MODULE, | ||
358 | }; | ||
359 | |||
360 | void __init fib4_rules_init(void) | ||
475 | { | 361 | { |
476 | INIT_HLIST_HEAD(&fib_rules); | 362 | list_add_tail(&local_rule.common.list, &fib4_rules); |
477 | hlist_add_head(&local_rule.hlist, &fib_rules); | 363 | list_add_tail(&main_rule.common.list, &fib4_rules); |
478 | hlist_add_after(&local_rule.hlist, &main_rule.hlist); | 364 | list_add_tail(&default_rule.common.list, &fib4_rules); |
479 | hlist_add_after(&main_rule.hlist, &default_rule.hlist); | 365 | |
480 | register_netdevice_notifier(&fib_rules_notifier); | 366 | fib_rules_register(&fib4_rules_ops); |
481 | } | 367 | } |
diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c index 51738000f3dc..2ead09543f68 100644 --- a/net/ipv4/fib_semantics.c +++ b/net/ipv4/fib_semantics.c | |||
@@ -33,7 +33,6 @@ | |||
33 | #include <linux/if_arp.h> | 33 | #include <linux/if_arp.h> |
34 | #include <linux/proc_fs.h> | 34 | #include <linux/proc_fs.h> |
35 | #include <linux/skbuff.h> | 35 | #include <linux/skbuff.h> |
36 | #include <linux/netlink.h> | ||
37 | #include <linux/init.h> | 36 | #include <linux/init.h> |
38 | 37 | ||
39 | #include <net/arp.h> | 38 | #include <net/arp.h> |
@@ -44,12 +43,14 @@ | |||
44 | #include <net/sock.h> | 43 | #include <net/sock.h> |
45 | #include <net/ip_fib.h> | 44 | #include <net/ip_fib.h> |
46 | #include <net/ip_mp_alg.h> | 45 | #include <net/ip_mp_alg.h> |
46 | #include <net/netlink.h> | ||
47 | #include <net/nexthop.h> | ||
47 | 48 | ||
48 | #include "fib_lookup.h" | 49 | #include "fib_lookup.h" |
49 | 50 | ||
50 | #define FSprintk(a...) | 51 | #define FSprintk(a...) |
51 | 52 | ||
52 | static DEFINE_RWLOCK(fib_info_lock); | 53 | static DEFINE_SPINLOCK(fib_info_lock); |
53 | static struct hlist_head *fib_info_hash; | 54 | static struct hlist_head *fib_info_hash; |
54 | static struct hlist_head *fib_info_laddrhash; | 55 | static struct hlist_head *fib_info_laddrhash; |
55 | static unsigned int fib_hash_size; | 56 | static unsigned int fib_hash_size; |
@@ -159,7 +160,7 @@ void free_fib_info(struct fib_info *fi) | |||
159 | 160 | ||
160 | void fib_release_info(struct fib_info *fi) | 161 | void fib_release_info(struct fib_info *fi) |
161 | { | 162 | { |
162 | write_lock_bh(&fib_info_lock); | 163 | spin_lock_bh(&fib_info_lock); |
163 | if (fi && --fi->fib_treeref == 0) { | 164 | if (fi && --fi->fib_treeref == 0) { |
164 | hlist_del(&fi->fib_hash); | 165 | hlist_del(&fi->fib_hash); |
165 | if (fi->fib_prefsrc) | 166 | if (fi->fib_prefsrc) |
@@ -172,7 +173,7 @@ void fib_release_info(struct fib_info *fi) | |||
172 | fi->fib_dead = 1; | 173 | fi->fib_dead = 1; |
173 | fib_info_put(fi); | 174 | fib_info_put(fi); |
174 | } | 175 | } |
175 | write_unlock_bh(&fib_info_lock); | 176 | spin_unlock_bh(&fib_info_lock); |
176 | } | 177 | } |
177 | 178 | ||
178 | static __inline__ int nh_comp(const struct fib_info *fi, const struct fib_info *ofi) | 179 | static __inline__ int nh_comp(const struct fib_info *fi, const struct fib_info *ofi) |
@@ -254,7 +255,7 @@ int ip_fib_check_default(u32 gw, struct net_device *dev) | |||
254 | struct fib_nh *nh; | 255 | struct fib_nh *nh; |
255 | unsigned int hash; | 256 | unsigned int hash; |
256 | 257 | ||
257 | read_lock(&fib_info_lock); | 258 | spin_lock(&fib_info_lock); |
258 | 259 | ||
259 | hash = fib_devindex_hashfn(dev->ifindex); | 260 | hash = fib_devindex_hashfn(dev->ifindex); |
260 | head = &fib_info_devhash[hash]; | 261 | head = &fib_info_devhash[hash]; |
@@ -262,41 +263,41 @@ int ip_fib_check_default(u32 gw, struct net_device *dev) | |||
262 | if (nh->nh_dev == dev && | 263 | if (nh->nh_dev == dev && |
263 | nh->nh_gw == gw && | 264 | nh->nh_gw == gw && |
264 | !(nh->nh_flags&RTNH_F_DEAD)) { | 265 | !(nh->nh_flags&RTNH_F_DEAD)) { |
265 | read_unlock(&fib_info_lock); | 266 | spin_unlock(&fib_info_lock); |
266 | return 0; | 267 | return 0; |
267 | } | 268 | } |
268 | } | 269 | } |
269 | 270 | ||
270 | read_unlock(&fib_info_lock); | 271 | spin_unlock(&fib_info_lock); |
271 | 272 | ||
272 | return -1; | 273 | return -1; |
273 | } | 274 | } |
274 | 275 | ||
275 | void rtmsg_fib(int event, u32 key, struct fib_alias *fa, | 276 | void rtmsg_fib(int event, u32 key, struct fib_alias *fa, |
276 | int z, int tb_id, | 277 | int dst_len, u32 tb_id, struct nl_info *info) |
277 | struct nlmsghdr *n, struct netlink_skb_parms *req) | ||
278 | { | 278 | { |
279 | struct sk_buff *skb; | 279 | struct sk_buff *skb; |
280 | u32 pid = req ? req->pid : n->nlmsg_pid; | 280 | int payload = sizeof(struct rtmsg) + 256; |
281 | int size = NLMSG_SPACE(sizeof(struct rtmsg)+256); | 281 | u32 seq = info->nlh ? info->nlh->nlmsg_seq : 0; |
282 | 282 | int err = -ENOBUFS; | |
283 | skb = alloc_skb(size, GFP_KERNEL); | 283 | |
284 | if (!skb) | 284 | skb = nlmsg_new(nlmsg_total_size(payload), GFP_KERNEL); |
285 | return; | 285 | if (skb == NULL) |
286 | 286 | goto errout; | |
287 | if (fib_dump_info(skb, pid, n->nlmsg_seq, event, tb_id, | 287 | |
288 | fa->fa_type, fa->fa_scope, &key, z, | 288 | err = fib_dump_info(skb, info->pid, seq, event, tb_id, |
289 | fa->fa_tos, | 289 | fa->fa_type, fa->fa_scope, key, dst_len, |
290 | fa->fa_info, 0) < 0) { | 290 | fa->fa_tos, fa->fa_info, 0); |
291 | if (err < 0) { | ||
291 | kfree_skb(skb); | 292 | kfree_skb(skb); |
292 | return; | 293 | goto errout; |
293 | } | 294 | } |
294 | NETLINK_CB(skb).dst_group = RTNLGRP_IPV4_ROUTE; | 295 | |
295 | if (n->nlmsg_flags&NLM_F_ECHO) | 296 | err = rtnl_notify(skb, info->pid, RTNLGRP_IPV4_ROUTE, |
296 | atomic_inc(&skb->users); | 297 | info->nlh, GFP_KERNEL); |
297 | netlink_broadcast(rtnl, skb, pid, RTNLGRP_IPV4_ROUTE, GFP_KERNEL); | 298 | errout: |
298 | if (n->nlmsg_flags&NLM_F_ECHO) | 299 | if (err < 0) |
299 | netlink_unicast(rtnl, skb, pid, MSG_DONTWAIT); | 300 | rtnl_set_sk_err(RTNLGRP_IPV4_ROUTE, err); |
300 | } | 301 | } |
301 | 302 | ||
302 | /* Return the first fib alias matching TOS with | 303 | /* Return the first fib alias matching TOS with |
@@ -342,102 +343,100 @@ int fib_detect_death(struct fib_info *fi, int order, | |||
342 | 343 | ||
343 | #ifdef CONFIG_IP_ROUTE_MULTIPATH | 344 | #ifdef CONFIG_IP_ROUTE_MULTIPATH |
344 | 345 | ||
345 | static u32 fib_get_attr32(struct rtattr *attr, int attrlen, int type) | 346 | static int fib_count_nexthops(struct rtnexthop *rtnh, int remaining) |
346 | { | ||
347 | while (RTA_OK(attr,attrlen)) { | ||
348 | if (attr->rta_type == type) | ||
349 | return *(u32*)RTA_DATA(attr); | ||
350 | attr = RTA_NEXT(attr, attrlen); | ||
351 | } | ||
352 | return 0; | ||
353 | } | ||
354 | |||
355 | static int | ||
356 | fib_count_nexthops(struct rtattr *rta) | ||
357 | { | 347 | { |
358 | int nhs = 0; | 348 | int nhs = 0; |
359 | struct rtnexthop *nhp = RTA_DATA(rta); | ||
360 | int nhlen = RTA_PAYLOAD(rta); | ||
361 | 349 | ||
362 | while (nhlen >= (int)sizeof(struct rtnexthop)) { | 350 | while (rtnh_ok(rtnh, remaining)) { |
363 | if ((nhlen -= nhp->rtnh_len) < 0) | ||
364 | return 0; | ||
365 | nhs++; | 351 | nhs++; |
366 | nhp = RTNH_NEXT(nhp); | 352 | rtnh = rtnh_next(rtnh, &remaining); |
367 | }; | 353 | } |
368 | return nhs; | 354 | |
355 | /* leftover implies invalid nexthop configuration, discard it */ | ||
356 | return remaining > 0 ? 0 : nhs; | ||
369 | } | 357 | } |
370 | 358 | ||
371 | static int | 359 | static int fib_get_nhs(struct fib_info *fi, struct rtnexthop *rtnh, |
372 | fib_get_nhs(struct fib_info *fi, const struct rtattr *rta, const struct rtmsg *r) | 360 | int remaining, struct fib_config *cfg) |
373 | { | 361 | { |
374 | struct rtnexthop *nhp = RTA_DATA(rta); | ||
375 | int nhlen = RTA_PAYLOAD(rta); | ||
376 | |||
377 | change_nexthops(fi) { | 362 | change_nexthops(fi) { |
378 | int attrlen = nhlen - sizeof(struct rtnexthop); | 363 | int attrlen; |
379 | if (attrlen < 0 || (nhlen -= nhp->rtnh_len) < 0) | 364 | |
365 | if (!rtnh_ok(rtnh, remaining)) | ||
380 | return -EINVAL; | 366 | return -EINVAL; |
381 | nh->nh_flags = (r->rtm_flags&~0xFF) | nhp->rtnh_flags; | 367 | |
382 | nh->nh_oif = nhp->rtnh_ifindex; | 368 | nh->nh_flags = (cfg->fc_flags & ~0xFF) | rtnh->rtnh_flags; |
383 | nh->nh_weight = nhp->rtnh_hops + 1; | 369 | nh->nh_oif = rtnh->rtnh_ifindex; |
384 | if (attrlen) { | 370 | nh->nh_weight = rtnh->rtnh_hops + 1; |
385 | nh->nh_gw = fib_get_attr32(RTNH_DATA(nhp), attrlen, RTA_GATEWAY); | 371 | |
372 | attrlen = rtnh_attrlen(rtnh); | ||
373 | if (attrlen > 0) { | ||
374 | struct nlattr *nla, *attrs = rtnh_attrs(rtnh); | ||
375 | |||
376 | nla = nla_find(attrs, attrlen, RTA_GATEWAY); | ||
377 | nh->nh_gw = nla ? nla_get_u32(nla) : 0; | ||
386 | #ifdef CONFIG_NET_CLS_ROUTE | 378 | #ifdef CONFIG_NET_CLS_ROUTE |
387 | nh->nh_tclassid = fib_get_attr32(RTNH_DATA(nhp), attrlen, RTA_FLOW); | 379 | nla = nla_find(attrs, attrlen, RTA_FLOW); |
380 | nh->nh_tclassid = nla ? nla_get_u32(nla) : 0; | ||
388 | #endif | 381 | #endif |
389 | } | 382 | } |
390 | nhp = RTNH_NEXT(nhp); | 383 | |
384 | rtnh = rtnh_next(rtnh, &remaining); | ||
391 | } endfor_nexthops(fi); | 385 | } endfor_nexthops(fi); |
386 | |||
392 | return 0; | 387 | return 0; |
393 | } | 388 | } |
394 | 389 | ||
395 | #endif | 390 | #endif |
396 | 391 | ||
397 | int fib_nh_match(struct rtmsg *r, struct nlmsghdr *nlh, struct kern_rta *rta, | 392 | int fib_nh_match(struct fib_config *cfg, struct fib_info *fi) |
398 | struct fib_info *fi) | ||
399 | { | 393 | { |
400 | #ifdef CONFIG_IP_ROUTE_MULTIPATH | 394 | #ifdef CONFIG_IP_ROUTE_MULTIPATH |
401 | struct rtnexthop *nhp; | 395 | struct rtnexthop *rtnh; |
402 | int nhlen; | 396 | int remaining; |
403 | #endif | 397 | #endif |
404 | 398 | ||
405 | if (rta->rta_priority && | 399 | if (cfg->fc_priority && cfg->fc_priority != fi->fib_priority) |
406 | *rta->rta_priority != fi->fib_priority) | ||
407 | return 1; | 400 | return 1; |
408 | 401 | ||
409 | if (rta->rta_oif || rta->rta_gw) { | 402 | if (cfg->fc_oif || cfg->fc_gw) { |
410 | if ((!rta->rta_oif || *rta->rta_oif == fi->fib_nh->nh_oif) && | 403 | if ((!cfg->fc_oif || cfg->fc_oif == fi->fib_nh->nh_oif) && |
411 | (!rta->rta_gw || memcmp(rta->rta_gw, &fi->fib_nh->nh_gw, 4) == 0)) | 404 | (!cfg->fc_gw || cfg->fc_gw == fi->fib_nh->nh_gw)) |
412 | return 0; | 405 | return 0; |
413 | return 1; | 406 | return 1; |
414 | } | 407 | } |
415 | 408 | ||
416 | #ifdef CONFIG_IP_ROUTE_MULTIPATH | 409 | #ifdef CONFIG_IP_ROUTE_MULTIPATH |
417 | if (rta->rta_mp == NULL) | 410 | if (cfg->fc_mp == NULL) |
418 | return 0; | 411 | return 0; |
419 | nhp = RTA_DATA(rta->rta_mp); | 412 | |
420 | nhlen = RTA_PAYLOAD(rta->rta_mp); | 413 | rtnh = cfg->fc_mp; |
414 | remaining = cfg->fc_mp_len; | ||
421 | 415 | ||
422 | for_nexthops(fi) { | 416 | for_nexthops(fi) { |
423 | int attrlen = nhlen - sizeof(struct rtnexthop); | 417 | int attrlen; |
424 | u32 gw; | ||
425 | 418 | ||
426 | if (attrlen < 0 || (nhlen -= nhp->rtnh_len) < 0) | 419 | if (!rtnh_ok(rtnh, remaining)) |
427 | return -EINVAL; | 420 | return -EINVAL; |
428 | if (nhp->rtnh_ifindex && nhp->rtnh_ifindex != nh->nh_oif) | 421 | |
422 | if (rtnh->rtnh_ifindex && rtnh->rtnh_ifindex != nh->nh_oif) | ||
429 | return 1; | 423 | return 1; |
430 | if (attrlen) { | 424 | |
431 | gw = fib_get_attr32(RTNH_DATA(nhp), attrlen, RTA_GATEWAY); | 425 | attrlen = rtnh_attrlen(rtnh); |
432 | if (gw && gw != nh->nh_gw) | 426 | if (attrlen < 0) { |
427 | struct nlattr *nla, *attrs = rtnh_attrs(rtnh); | ||
428 | |||
429 | nla = nla_find(attrs, attrlen, RTA_GATEWAY); | ||
430 | if (nla && nla_get_u32(nla) != nh->nh_gw) | ||
433 | return 1; | 431 | return 1; |
434 | #ifdef CONFIG_NET_CLS_ROUTE | 432 | #ifdef CONFIG_NET_CLS_ROUTE |
435 | gw = fib_get_attr32(RTNH_DATA(nhp), attrlen, RTA_FLOW); | 433 | nla = nla_find(attrs, attrlen, RTA_FLOW); |
436 | if (gw && gw != nh->nh_tclassid) | 434 | if (nla && nla_get_u32(nla) != nh->nh_tclassid) |
437 | return 1; | 435 | return 1; |
438 | #endif | 436 | #endif |
439 | } | 437 | } |
440 | nhp = RTNH_NEXT(nhp); | 438 | |
439 | rtnh = rtnh_next(rtnh, &remaining); | ||
441 | } endfor_nexthops(fi); | 440 | } endfor_nexthops(fi); |
442 | #endif | 441 | #endif |
443 | return 0; | 442 | return 0; |
@@ -488,7 +487,8 @@ int fib_nh_match(struct rtmsg *r, struct nlmsghdr *nlh, struct kern_rta *rta, | |||
488 | |-> {local prefix} (terminal node) | 487 | |-> {local prefix} (terminal node) |
489 | */ | 488 | */ |
490 | 489 | ||
491 | static int fib_check_nh(const struct rtmsg *r, struct fib_info *fi, struct fib_nh *nh) | 490 | static int fib_check_nh(struct fib_config *cfg, struct fib_info *fi, |
491 | struct fib_nh *nh) | ||
492 | { | 492 | { |
493 | int err; | 493 | int err; |
494 | 494 | ||
@@ -502,7 +502,7 @@ static int fib_check_nh(const struct rtmsg *r, struct fib_info *fi, struct fib_n | |||
502 | if (nh->nh_flags&RTNH_F_ONLINK) { | 502 | if (nh->nh_flags&RTNH_F_ONLINK) { |
503 | struct net_device *dev; | 503 | struct net_device *dev; |
504 | 504 | ||
505 | if (r->rtm_scope >= RT_SCOPE_LINK) | 505 | if (cfg->fc_scope >= RT_SCOPE_LINK) |
506 | return -EINVAL; | 506 | return -EINVAL; |
507 | if (inet_addr_type(nh->nh_gw) != RTN_UNICAST) | 507 | if (inet_addr_type(nh->nh_gw) != RTN_UNICAST) |
508 | return -EINVAL; | 508 | return -EINVAL; |
@@ -516,10 +516,15 @@ static int fib_check_nh(const struct rtmsg *r, struct fib_info *fi, struct fib_n | |||
516 | return 0; | 516 | return 0; |
517 | } | 517 | } |
518 | { | 518 | { |
519 | struct flowi fl = { .nl_u = { .ip4_u = | 519 | struct flowi fl = { |
520 | { .daddr = nh->nh_gw, | 520 | .nl_u = { |
521 | .scope = r->rtm_scope + 1 } }, | 521 | .ip4_u = { |
522 | .oif = nh->nh_oif }; | 522 | .daddr = nh->nh_gw, |
523 | .scope = cfg->fc_scope + 1, | ||
524 | }, | ||
525 | }, | ||
526 | .oif = nh->nh_oif, | ||
527 | }; | ||
523 | 528 | ||
524 | /* It is not necessary, but requires a bit of thinking */ | 529 | /* It is not necessary, but requires a bit of thinking */ |
525 | if (fl.fl4_scope < RT_SCOPE_LINK) | 530 | if (fl.fl4_scope < RT_SCOPE_LINK) |
@@ -598,7 +603,7 @@ static void fib_hash_move(struct hlist_head *new_info_hash, | |||
598 | unsigned int old_size = fib_hash_size; | 603 | unsigned int old_size = fib_hash_size; |
599 | unsigned int i, bytes; | 604 | unsigned int i, bytes; |
600 | 605 | ||
601 | write_lock_bh(&fib_info_lock); | 606 | spin_lock_bh(&fib_info_lock); |
602 | old_info_hash = fib_info_hash; | 607 | old_info_hash = fib_info_hash; |
603 | old_laddrhash = fib_info_laddrhash; | 608 | old_laddrhash = fib_info_laddrhash; |
604 | fib_hash_size = new_size; | 609 | fib_hash_size = new_size; |
@@ -639,46 +644,35 @@ static void fib_hash_move(struct hlist_head *new_info_hash, | |||
639 | } | 644 | } |
640 | fib_info_laddrhash = new_laddrhash; | 645 | fib_info_laddrhash = new_laddrhash; |
641 | 646 | ||
642 | write_unlock_bh(&fib_info_lock); | 647 | spin_unlock_bh(&fib_info_lock); |
643 | 648 | ||
644 | bytes = old_size * sizeof(struct hlist_head *); | 649 | bytes = old_size * sizeof(struct hlist_head *); |
645 | fib_hash_free(old_info_hash, bytes); | 650 | fib_hash_free(old_info_hash, bytes); |
646 | fib_hash_free(old_laddrhash, bytes); | 651 | fib_hash_free(old_laddrhash, bytes); |
647 | } | 652 | } |
648 | 653 | ||
649 | struct fib_info * | 654 | struct fib_info *fib_create_info(struct fib_config *cfg) |
650 | fib_create_info(const struct rtmsg *r, struct kern_rta *rta, | ||
651 | const struct nlmsghdr *nlh, int *errp) | ||
652 | { | 655 | { |
653 | int err; | 656 | int err; |
654 | struct fib_info *fi = NULL; | 657 | struct fib_info *fi = NULL; |
655 | struct fib_info *ofi; | 658 | struct fib_info *ofi; |
656 | #ifdef CONFIG_IP_ROUTE_MULTIPATH | ||
657 | int nhs = 1; | 659 | int nhs = 1; |
658 | #else | ||
659 | const int nhs = 1; | ||
660 | #endif | ||
661 | #ifdef CONFIG_IP_ROUTE_MULTIPATH_CACHED | ||
662 | u32 mp_alg = IP_MP_ALG_NONE; | ||
663 | #endif | ||
664 | 660 | ||
665 | /* Fast check to catch the most weird cases */ | 661 | /* Fast check to catch the most weird cases */ |
666 | if (fib_props[r->rtm_type].scope > r->rtm_scope) | 662 | if (fib_props[cfg->fc_type].scope > cfg->fc_scope) |
667 | goto err_inval; | 663 | goto err_inval; |
668 | 664 | ||
669 | #ifdef CONFIG_IP_ROUTE_MULTIPATH | 665 | #ifdef CONFIG_IP_ROUTE_MULTIPATH |
670 | if (rta->rta_mp) { | 666 | if (cfg->fc_mp) { |
671 | nhs = fib_count_nexthops(rta->rta_mp); | 667 | nhs = fib_count_nexthops(cfg->fc_mp, cfg->fc_mp_len); |
672 | if (nhs == 0) | 668 | if (nhs == 0) |
673 | goto err_inval; | 669 | goto err_inval; |
674 | } | 670 | } |
675 | #endif | 671 | #endif |
676 | #ifdef CONFIG_IP_ROUTE_MULTIPATH_CACHED | 672 | #ifdef CONFIG_IP_ROUTE_MULTIPATH_CACHED |
677 | if (rta->rta_mp_alg) { | 673 | if (cfg->fc_mp_alg) { |
678 | mp_alg = *rta->rta_mp_alg; | 674 | if (cfg->fc_mp_alg < IP_MP_ALG_NONE || |
679 | 675 | cfg->fc_mp_alg > IP_MP_ALG_MAX) | |
680 | if (mp_alg < IP_MP_ALG_NONE || | ||
681 | mp_alg > IP_MP_ALG_MAX) | ||
682 | goto err_inval; | 676 | goto err_inval; |
683 | } | 677 | } |
684 | #endif | 678 | #endif |
@@ -714,43 +708,42 @@ fib_create_info(const struct rtmsg *r, struct kern_rta *rta, | |||
714 | goto failure; | 708 | goto failure; |
715 | fib_info_cnt++; | 709 | fib_info_cnt++; |
716 | 710 | ||
717 | fi->fib_protocol = r->rtm_protocol; | 711 | fi->fib_protocol = cfg->fc_protocol; |
712 | fi->fib_flags = cfg->fc_flags; | ||
713 | fi->fib_priority = cfg->fc_priority; | ||
714 | fi->fib_prefsrc = cfg->fc_prefsrc; | ||
718 | 715 | ||
719 | fi->fib_nhs = nhs; | 716 | fi->fib_nhs = nhs; |
720 | change_nexthops(fi) { | 717 | change_nexthops(fi) { |
721 | nh->nh_parent = fi; | 718 | nh->nh_parent = fi; |
722 | } endfor_nexthops(fi) | 719 | } endfor_nexthops(fi) |
723 | 720 | ||
724 | fi->fib_flags = r->rtm_flags; | 721 | if (cfg->fc_mx) { |
725 | if (rta->rta_priority) | 722 | struct nlattr *nla; |
726 | fi->fib_priority = *rta->rta_priority; | 723 | int remaining; |
727 | if (rta->rta_mx) { | 724 | |
728 | int attrlen = RTA_PAYLOAD(rta->rta_mx); | 725 | nla_for_each_attr(nla, cfg->fc_mx, cfg->fc_mx_len, remaining) { |
729 | struct rtattr *attr = RTA_DATA(rta->rta_mx); | 726 | int type = nla->nla_type; |
730 | 727 | ||
731 | while (RTA_OK(attr, attrlen)) { | 728 | if (type) { |
732 | unsigned flavor = attr->rta_type; | 729 | if (type > RTAX_MAX) |
733 | if (flavor) { | ||
734 | if (flavor > RTAX_MAX) | ||
735 | goto err_inval; | 730 | goto err_inval; |
736 | fi->fib_metrics[flavor-1] = *(unsigned*)RTA_DATA(attr); | 731 | fi->fib_metrics[type - 1] = nla_get_u32(nla); |
737 | } | 732 | } |
738 | attr = RTA_NEXT(attr, attrlen); | ||
739 | } | 733 | } |
740 | } | 734 | } |
741 | if (rta->rta_prefsrc) | ||
742 | memcpy(&fi->fib_prefsrc, rta->rta_prefsrc, 4); | ||
743 | 735 | ||
744 | if (rta->rta_mp) { | 736 | if (cfg->fc_mp) { |
745 | #ifdef CONFIG_IP_ROUTE_MULTIPATH | 737 | #ifdef CONFIG_IP_ROUTE_MULTIPATH |
746 | if ((err = fib_get_nhs(fi, rta->rta_mp, r)) != 0) | 738 | err = fib_get_nhs(fi, cfg->fc_mp, cfg->fc_mp_len, cfg); |
739 | if (err != 0) | ||
747 | goto failure; | 740 | goto failure; |
748 | if (rta->rta_oif && fi->fib_nh->nh_oif != *rta->rta_oif) | 741 | if (cfg->fc_oif && fi->fib_nh->nh_oif != cfg->fc_oif) |
749 | goto err_inval; | 742 | goto err_inval; |
750 | if (rta->rta_gw && memcmp(&fi->fib_nh->nh_gw, rta->rta_gw, 4)) | 743 | if (cfg->fc_gw && fi->fib_nh->nh_gw != cfg->fc_gw) |
751 | goto err_inval; | 744 | goto err_inval; |
752 | #ifdef CONFIG_NET_CLS_ROUTE | 745 | #ifdef CONFIG_NET_CLS_ROUTE |
753 | if (rta->rta_flow && memcmp(&fi->fib_nh->nh_tclassid, rta->rta_flow, 4)) | 746 | if (cfg->fc_flow && fi->fib_nh->nh_tclassid != cfg->fc_flow) |
754 | goto err_inval; | 747 | goto err_inval; |
755 | #endif | 748 | #endif |
756 | #else | 749 | #else |
@@ -758,34 +751,32 @@ fib_create_info(const struct rtmsg *r, struct kern_rta *rta, | |||
758 | #endif | 751 | #endif |
759 | } else { | 752 | } else { |
760 | struct fib_nh *nh = fi->fib_nh; | 753 | struct fib_nh *nh = fi->fib_nh; |
761 | if (rta->rta_oif) | 754 | |
762 | nh->nh_oif = *rta->rta_oif; | 755 | nh->nh_oif = cfg->fc_oif; |
763 | if (rta->rta_gw) | 756 | nh->nh_gw = cfg->fc_gw; |
764 | memcpy(&nh->nh_gw, rta->rta_gw, 4); | 757 | nh->nh_flags = cfg->fc_flags; |
765 | #ifdef CONFIG_NET_CLS_ROUTE | 758 | #ifdef CONFIG_NET_CLS_ROUTE |
766 | if (rta->rta_flow) | 759 | nh->nh_tclassid = cfg->fc_flow; |
767 | memcpy(&nh->nh_tclassid, rta->rta_flow, 4); | ||
768 | #endif | 760 | #endif |
769 | nh->nh_flags = r->rtm_flags; | ||
770 | #ifdef CONFIG_IP_ROUTE_MULTIPATH | 761 | #ifdef CONFIG_IP_ROUTE_MULTIPATH |
771 | nh->nh_weight = 1; | 762 | nh->nh_weight = 1; |
772 | #endif | 763 | #endif |
773 | } | 764 | } |
774 | 765 | ||
775 | #ifdef CONFIG_IP_ROUTE_MULTIPATH_CACHED | 766 | #ifdef CONFIG_IP_ROUTE_MULTIPATH_CACHED |
776 | fi->fib_mp_alg = mp_alg; | 767 | fi->fib_mp_alg = cfg->fc_mp_alg; |
777 | #endif | 768 | #endif |
778 | 769 | ||
779 | if (fib_props[r->rtm_type].error) { | 770 | if (fib_props[cfg->fc_type].error) { |
780 | if (rta->rta_gw || rta->rta_oif || rta->rta_mp) | 771 | if (cfg->fc_gw || cfg->fc_oif || cfg->fc_mp) |
781 | goto err_inval; | 772 | goto err_inval; |
782 | goto link_it; | 773 | goto link_it; |
783 | } | 774 | } |
784 | 775 | ||
785 | if (r->rtm_scope > RT_SCOPE_HOST) | 776 | if (cfg->fc_scope > RT_SCOPE_HOST) |
786 | goto err_inval; | 777 | goto err_inval; |
787 | 778 | ||
788 | if (r->rtm_scope == RT_SCOPE_HOST) { | 779 | if (cfg->fc_scope == RT_SCOPE_HOST) { |
789 | struct fib_nh *nh = fi->fib_nh; | 780 | struct fib_nh *nh = fi->fib_nh; |
790 | 781 | ||
791 | /* Local address is added. */ | 782 | /* Local address is added. */ |
@@ -798,14 +789,14 @@ fib_create_info(const struct rtmsg *r, struct kern_rta *rta, | |||
798 | goto failure; | 789 | goto failure; |
799 | } else { | 790 | } else { |
800 | change_nexthops(fi) { | 791 | change_nexthops(fi) { |
801 | if ((err = fib_check_nh(r, fi, nh)) != 0) | 792 | if ((err = fib_check_nh(cfg, fi, nh)) != 0) |
802 | goto failure; | 793 | goto failure; |
803 | } endfor_nexthops(fi) | 794 | } endfor_nexthops(fi) |
804 | } | 795 | } |
805 | 796 | ||
806 | if (fi->fib_prefsrc) { | 797 | if (fi->fib_prefsrc) { |
807 | if (r->rtm_type != RTN_LOCAL || rta->rta_dst == NULL || | 798 | if (cfg->fc_type != RTN_LOCAL || !cfg->fc_dst || |
808 | memcmp(&fi->fib_prefsrc, rta->rta_dst, 4)) | 799 | fi->fib_prefsrc != cfg->fc_dst) |
809 | if (inet_addr_type(fi->fib_prefsrc) != RTN_LOCAL) | 800 | if (inet_addr_type(fi->fib_prefsrc) != RTN_LOCAL) |
810 | goto err_inval; | 801 | goto err_inval; |
811 | } | 802 | } |
@@ -820,7 +811,7 @@ link_it: | |||
820 | 811 | ||
821 | fi->fib_treeref++; | 812 | fi->fib_treeref++; |
822 | atomic_inc(&fi->fib_clntref); | 813 | atomic_inc(&fi->fib_clntref); |
823 | write_lock_bh(&fib_info_lock); | 814 | spin_lock_bh(&fib_info_lock); |
824 | hlist_add_head(&fi->fib_hash, | 815 | hlist_add_head(&fi->fib_hash, |
825 | &fib_info_hash[fib_info_hashfn(fi)]); | 816 | &fib_info_hash[fib_info_hashfn(fi)]); |
826 | if (fi->fib_prefsrc) { | 817 | if (fi->fib_prefsrc) { |
@@ -839,19 +830,19 @@ link_it: | |||
839 | head = &fib_info_devhash[hash]; | 830 | head = &fib_info_devhash[hash]; |
840 | hlist_add_head(&nh->nh_hash, head); | 831 | hlist_add_head(&nh->nh_hash, head); |
841 | } endfor_nexthops(fi) | 832 | } endfor_nexthops(fi) |
842 | write_unlock_bh(&fib_info_lock); | 833 | spin_unlock_bh(&fib_info_lock); |
843 | return fi; | 834 | return fi; |
844 | 835 | ||
845 | err_inval: | 836 | err_inval: |
846 | err = -EINVAL; | 837 | err = -EINVAL; |
847 | 838 | ||
848 | failure: | 839 | failure: |
849 | *errp = err; | ||
850 | if (fi) { | 840 | if (fi) { |
851 | fi->fib_dead = 1; | 841 | fi->fib_dead = 1; |
852 | free_fib_info(fi); | 842 | free_fib_info(fi); |
853 | } | 843 | } |
854 | return NULL; | 844 | |
845 | return ERR_PTR(err); | ||
855 | } | 846 | } |
856 | 847 | ||
857 | /* Note! fib_semantic_match intentionally uses RCU list functions. */ | 848 | /* Note! fib_semantic_match intentionally uses RCU list functions. */ |
@@ -937,224 +928,89 @@ u32 __fib_res_prefsrc(struct fib_result *res) | |||
937 | return inet_select_addr(FIB_RES_DEV(*res), FIB_RES_GW(*res), res->scope); | 928 | return inet_select_addr(FIB_RES_DEV(*res), FIB_RES_GW(*res), res->scope); |
938 | } | 929 | } |
939 | 930 | ||
940 | int | 931 | int fib_dump_info(struct sk_buff *skb, u32 pid, u32 seq, int event, |
941 | fib_dump_info(struct sk_buff *skb, u32 pid, u32 seq, int event, | 932 | u32 tb_id, u8 type, u8 scope, u32 dst, int dst_len, u8 tos, |
942 | u8 tb_id, u8 type, u8 scope, void *dst, int dst_len, u8 tos, | 933 | struct fib_info *fi, unsigned int flags) |
943 | struct fib_info *fi, unsigned int flags) | ||
944 | { | 934 | { |
935 | struct nlmsghdr *nlh; | ||
945 | struct rtmsg *rtm; | 936 | struct rtmsg *rtm; |
946 | struct nlmsghdr *nlh; | ||
947 | unsigned char *b = skb->tail; | ||
948 | 937 | ||
949 | nlh = NLMSG_NEW(skb, pid, seq, event, sizeof(*rtm), flags); | 938 | nlh = nlmsg_put(skb, pid, seq, event, sizeof(*rtm), flags); |
950 | rtm = NLMSG_DATA(nlh); | 939 | if (nlh == NULL) |
940 | return -ENOBUFS; | ||
941 | |||
942 | rtm = nlmsg_data(nlh); | ||
951 | rtm->rtm_family = AF_INET; | 943 | rtm->rtm_family = AF_INET; |
952 | rtm->rtm_dst_len = dst_len; | 944 | rtm->rtm_dst_len = dst_len; |
953 | rtm->rtm_src_len = 0; | 945 | rtm->rtm_src_len = 0; |
954 | rtm->rtm_tos = tos; | 946 | rtm->rtm_tos = tos; |
955 | rtm->rtm_table = tb_id; | 947 | rtm->rtm_table = tb_id; |
948 | NLA_PUT_U32(skb, RTA_TABLE, tb_id); | ||
956 | rtm->rtm_type = type; | 949 | rtm->rtm_type = type; |
957 | rtm->rtm_flags = fi->fib_flags; | 950 | rtm->rtm_flags = fi->fib_flags; |
958 | rtm->rtm_scope = scope; | 951 | rtm->rtm_scope = scope; |
959 | if (rtm->rtm_dst_len) | ||
960 | RTA_PUT(skb, RTA_DST, 4, dst); | ||
961 | rtm->rtm_protocol = fi->fib_protocol; | 952 | rtm->rtm_protocol = fi->fib_protocol; |
953 | |||
954 | if (rtm->rtm_dst_len) | ||
955 | NLA_PUT_U32(skb, RTA_DST, dst); | ||
956 | |||
962 | if (fi->fib_priority) | 957 | if (fi->fib_priority) |
963 | RTA_PUT(skb, RTA_PRIORITY, 4, &fi->fib_priority); | 958 | NLA_PUT_U32(skb, RTA_PRIORITY, fi->fib_priority); |
959 | |||
964 | if (rtnetlink_put_metrics(skb, fi->fib_metrics) < 0) | 960 | if (rtnetlink_put_metrics(skb, fi->fib_metrics) < 0) |
965 | goto rtattr_failure; | 961 | goto nla_put_failure; |
962 | |||
966 | if (fi->fib_prefsrc) | 963 | if (fi->fib_prefsrc) |
967 | RTA_PUT(skb, RTA_PREFSRC, 4, &fi->fib_prefsrc); | 964 | NLA_PUT_U32(skb, RTA_PREFSRC, fi->fib_prefsrc); |
965 | |||
968 | if (fi->fib_nhs == 1) { | 966 | if (fi->fib_nhs == 1) { |
969 | if (fi->fib_nh->nh_gw) | 967 | if (fi->fib_nh->nh_gw) |
970 | RTA_PUT(skb, RTA_GATEWAY, 4, &fi->fib_nh->nh_gw); | 968 | NLA_PUT_U32(skb, RTA_GATEWAY, fi->fib_nh->nh_gw); |
969 | |||
971 | if (fi->fib_nh->nh_oif) | 970 | if (fi->fib_nh->nh_oif) |
972 | RTA_PUT(skb, RTA_OIF, sizeof(int), &fi->fib_nh->nh_oif); | 971 | NLA_PUT_U32(skb, RTA_OIF, fi->fib_nh->nh_oif); |
973 | #ifdef CONFIG_NET_CLS_ROUTE | 972 | #ifdef CONFIG_NET_CLS_ROUTE |
974 | if (fi->fib_nh[0].nh_tclassid) | 973 | if (fi->fib_nh[0].nh_tclassid) |
975 | RTA_PUT(skb, RTA_FLOW, 4, &fi->fib_nh[0].nh_tclassid); | 974 | NLA_PUT_U32(skb, RTA_FLOW, fi->fib_nh[0].nh_tclassid); |
976 | #endif | 975 | #endif |
977 | } | 976 | } |
978 | #ifdef CONFIG_IP_ROUTE_MULTIPATH | 977 | #ifdef CONFIG_IP_ROUTE_MULTIPATH |
979 | if (fi->fib_nhs > 1) { | 978 | if (fi->fib_nhs > 1) { |
980 | struct rtnexthop *nhp; | 979 | struct rtnexthop *rtnh; |
981 | struct rtattr *mp_head; | 980 | struct nlattr *mp; |
982 | if (skb_tailroom(skb) <= RTA_SPACE(0)) | 981 | |
983 | goto rtattr_failure; | 982 | mp = nla_nest_start(skb, RTA_MULTIPATH); |
984 | mp_head = (struct rtattr*)skb_put(skb, RTA_SPACE(0)); | 983 | if (mp == NULL) |
984 | goto nla_put_failure; | ||
985 | 985 | ||
986 | for_nexthops(fi) { | 986 | for_nexthops(fi) { |
987 | if (skb_tailroom(skb) < RTA_ALIGN(RTA_ALIGN(sizeof(*nhp)) + 4)) | 987 | rtnh = nla_reserve_nohdr(skb, sizeof(*rtnh)); |
988 | goto rtattr_failure; | 988 | if (rtnh == NULL) |
989 | nhp = (struct rtnexthop*)skb_put(skb, RTA_ALIGN(sizeof(*nhp))); | 989 | goto nla_put_failure; |
990 | nhp->rtnh_flags = nh->nh_flags & 0xFF; | 990 | |
991 | nhp->rtnh_hops = nh->nh_weight-1; | 991 | rtnh->rtnh_flags = nh->nh_flags & 0xFF; |
992 | nhp->rtnh_ifindex = nh->nh_oif; | 992 | rtnh->rtnh_hops = nh->nh_weight - 1; |
993 | rtnh->rtnh_ifindex = nh->nh_oif; | ||
994 | |||
993 | if (nh->nh_gw) | 995 | if (nh->nh_gw) |
994 | RTA_PUT(skb, RTA_GATEWAY, 4, &nh->nh_gw); | 996 | NLA_PUT_U32(skb, RTA_GATEWAY, nh->nh_gw); |
995 | #ifdef CONFIG_NET_CLS_ROUTE | 997 | #ifdef CONFIG_NET_CLS_ROUTE |
996 | if (nh->nh_tclassid) | 998 | if (nh->nh_tclassid) |
997 | RTA_PUT(skb, RTA_FLOW, 4, &nh->nh_tclassid); | 999 | NLA_PUT_U32(skb, RTA_FLOW, nh->nh_tclassid); |
998 | #endif | 1000 | #endif |
999 | nhp->rtnh_len = skb->tail - (unsigned char*)nhp; | 1001 | /* length of rtnetlink header + attributes */ |
1002 | rtnh->rtnh_len = nlmsg_get_pos(skb) - (void *) rtnh; | ||
1000 | } endfor_nexthops(fi); | 1003 | } endfor_nexthops(fi); |
1001 | mp_head->rta_type = RTA_MULTIPATH; | ||
1002 | mp_head->rta_len = skb->tail - (u8*)mp_head; | ||
1003 | } | ||
1004 | #endif | ||
1005 | nlh->nlmsg_len = skb->tail - b; | ||
1006 | return skb->len; | ||
1007 | |||
1008 | nlmsg_failure: | ||
1009 | rtattr_failure: | ||
1010 | skb_trim(skb, b - skb->data); | ||
1011 | return -1; | ||
1012 | } | ||
1013 | |||
1014 | #ifndef CONFIG_IP_NOSIOCRT | ||
1015 | |||
1016 | int | ||
1017 | fib_convert_rtentry(int cmd, struct nlmsghdr *nl, struct rtmsg *rtm, | ||
1018 | struct kern_rta *rta, struct rtentry *r) | ||
1019 | { | ||
1020 | int plen; | ||
1021 | u32 *ptr; | ||
1022 | |||
1023 | memset(rtm, 0, sizeof(*rtm)); | ||
1024 | memset(rta, 0, sizeof(*rta)); | ||
1025 | |||
1026 | if (r->rt_dst.sa_family != AF_INET) | ||
1027 | return -EAFNOSUPPORT; | ||
1028 | |||
1029 | /* Check mask for validity: | ||
1030 | a) it must be contiguous. | ||
1031 | b) destination must have all host bits clear. | ||
1032 | c) if application forgot to set correct family (AF_INET), | ||
1033 | reject request unless it is absolutely clear i.e. | ||
1034 | both family and mask are zero. | ||
1035 | */ | ||
1036 | plen = 32; | ||
1037 | ptr = &((struct sockaddr_in*)&r->rt_dst)->sin_addr.s_addr; | ||
1038 | if (!(r->rt_flags&RTF_HOST)) { | ||
1039 | u32 mask = ((struct sockaddr_in*)&r->rt_genmask)->sin_addr.s_addr; | ||
1040 | if (r->rt_genmask.sa_family != AF_INET) { | ||
1041 | if (mask || r->rt_genmask.sa_family) | ||
1042 | return -EAFNOSUPPORT; | ||
1043 | } | ||
1044 | if (bad_mask(mask, *ptr)) | ||
1045 | return -EINVAL; | ||
1046 | plen = inet_mask_len(mask); | ||
1047 | } | ||
1048 | |||
1049 | nl->nlmsg_flags = NLM_F_REQUEST; | ||
1050 | nl->nlmsg_pid = 0; | ||
1051 | nl->nlmsg_seq = 0; | ||
1052 | nl->nlmsg_len = NLMSG_LENGTH(sizeof(*rtm)); | ||
1053 | if (cmd == SIOCDELRT) { | ||
1054 | nl->nlmsg_type = RTM_DELROUTE; | ||
1055 | nl->nlmsg_flags = 0; | ||
1056 | } else { | ||
1057 | nl->nlmsg_type = RTM_NEWROUTE; | ||
1058 | nl->nlmsg_flags = NLM_F_REQUEST|NLM_F_CREATE; | ||
1059 | rtm->rtm_protocol = RTPROT_BOOT; | ||
1060 | } | ||
1061 | |||
1062 | rtm->rtm_dst_len = plen; | ||
1063 | rta->rta_dst = ptr; | ||
1064 | |||
1065 | if (r->rt_metric) { | ||
1066 | *(u32*)&r->rt_pad3 = r->rt_metric - 1; | ||
1067 | rta->rta_priority = (u32*)&r->rt_pad3; | ||
1068 | } | ||
1069 | if (r->rt_flags&RTF_REJECT) { | ||
1070 | rtm->rtm_scope = RT_SCOPE_HOST; | ||
1071 | rtm->rtm_type = RTN_UNREACHABLE; | ||
1072 | return 0; | ||
1073 | } | ||
1074 | rtm->rtm_scope = RT_SCOPE_NOWHERE; | ||
1075 | rtm->rtm_type = RTN_UNICAST; | ||
1076 | |||
1077 | if (r->rt_dev) { | ||
1078 | char *colon; | ||
1079 | struct net_device *dev; | ||
1080 | char devname[IFNAMSIZ]; | ||
1081 | |||
1082 | if (copy_from_user(devname, r->rt_dev, IFNAMSIZ-1)) | ||
1083 | return -EFAULT; | ||
1084 | devname[IFNAMSIZ-1] = 0; | ||
1085 | colon = strchr(devname, ':'); | ||
1086 | if (colon) | ||
1087 | *colon = 0; | ||
1088 | dev = __dev_get_by_name(devname); | ||
1089 | if (!dev) | ||
1090 | return -ENODEV; | ||
1091 | rta->rta_oif = &dev->ifindex; | ||
1092 | if (colon) { | ||
1093 | struct in_ifaddr *ifa; | ||
1094 | struct in_device *in_dev = __in_dev_get_rtnl(dev); | ||
1095 | if (!in_dev) | ||
1096 | return -ENODEV; | ||
1097 | *colon = ':'; | ||
1098 | for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next) | ||
1099 | if (strcmp(ifa->ifa_label, devname) == 0) | ||
1100 | break; | ||
1101 | if (ifa == NULL) | ||
1102 | return -ENODEV; | ||
1103 | rta->rta_prefsrc = &ifa->ifa_local; | ||
1104 | } | ||
1105 | } | ||
1106 | 1004 | ||
1107 | ptr = &((struct sockaddr_in*)&r->rt_gateway)->sin_addr.s_addr; | 1005 | nla_nest_end(skb, mp); |
1108 | if (r->rt_gateway.sa_family == AF_INET && *ptr) { | ||
1109 | rta->rta_gw = ptr; | ||
1110 | if (r->rt_flags&RTF_GATEWAY && inet_addr_type(*ptr) == RTN_UNICAST) | ||
1111 | rtm->rtm_scope = RT_SCOPE_UNIVERSE; | ||
1112 | } | 1006 | } |
1007 | #endif | ||
1008 | return nlmsg_end(skb, nlh); | ||
1113 | 1009 | ||
1114 | if (cmd == SIOCDELRT) | 1010 | nla_put_failure: |
1115 | return 0; | 1011 | return nlmsg_cancel(skb, nlh); |
1116 | |||
1117 | if (r->rt_flags&RTF_GATEWAY && rta->rta_gw == NULL) | ||
1118 | return -EINVAL; | ||
1119 | |||
1120 | if (rtm->rtm_scope == RT_SCOPE_NOWHERE) | ||
1121 | rtm->rtm_scope = RT_SCOPE_LINK; | ||
1122 | |||
1123 | if (r->rt_flags&(RTF_MTU|RTF_WINDOW|RTF_IRTT)) { | ||
1124 | struct rtattr *rec; | ||
1125 | struct rtattr *mx = kmalloc(RTA_LENGTH(3*RTA_LENGTH(4)), GFP_KERNEL); | ||
1126 | if (mx == NULL) | ||
1127 | return -ENOMEM; | ||
1128 | rta->rta_mx = mx; | ||
1129 | mx->rta_type = RTA_METRICS; | ||
1130 | mx->rta_len = RTA_LENGTH(0); | ||
1131 | if (r->rt_flags&RTF_MTU) { | ||
1132 | rec = (void*)((char*)mx + RTA_ALIGN(mx->rta_len)); | ||
1133 | rec->rta_type = RTAX_ADVMSS; | ||
1134 | rec->rta_len = RTA_LENGTH(4); | ||
1135 | mx->rta_len += RTA_LENGTH(4); | ||
1136 | *(u32*)RTA_DATA(rec) = r->rt_mtu - 40; | ||
1137 | } | ||
1138 | if (r->rt_flags&RTF_WINDOW) { | ||
1139 | rec = (void*)((char*)mx + RTA_ALIGN(mx->rta_len)); | ||
1140 | rec->rta_type = RTAX_WINDOW; | ||
1141 | rec->rta_len = RTA_LENGTH(4); | ||
1142 | mx->rta_len += RTA_LENGTH(4); | ||
1143 | *(u32*)RTA_DATA(rec) = r->rt_window; | ||
1144 | } | ||
1145 | if (r->rt_flags&RTF_IRTT) { | ||
1146 | rec = (void*)((char*)mx + RTA_ALIGN(mx->rta_len)); | ||
1147 | rec->rta_type = RTAX_RTT; | ||
1148 | rec->rta_len = RTA_LENGTH(4); | ||
1149 | mx->rta_len += RTA_LENGTH(4); | ||
1150 | *(u32*)RTA_DATA(rec) = r->rt_irtt<<3; | ||
1151 | } | ||
1152 | } | ||
1153 | return 0; | ||
1154 | } | 1012 | } |
1155 | 1013 | ||
1156 | #endif | ||
1157 | |||
1158 | /* | 1014 | /* |
1159 | Update FIB if: | 1015 | Update FIB if: |
1160 | - local address disappeared -> we must delete all the entries | 1016 | - local address disappeared -> we must delete all the entries |
diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c index 01801c0f885d..9c3ff6ba6e21 100644 --- a/net/ipv4/fib_trie.c +++ b/net/ipv4/fib_trie.c | |||
@@ -1124,17 +1124,14 @@ err: | |||
1124 | return fa_head; | 1124 | return fa_head; |
1125 | } | 1125 | } |
1126 | 1126 | ||
1127 | static int | 1127 | static int fn_trie_insert(struct fib_table *tb, struct fib_config *cfg) |
1128 | fn_trie_insert(struct fib_table *tb, struct rtmsg *r, struct kern_rta *rta, | ||
1129 | struct nlmsghdr *nlhdr, struct netlink_skb_parms *req) | ||
1130 | { | 1128 | { |
1131 | struct trie *t = (struct trie *) tb->tb_data; | 1129 | struct trie *t = (struct trie *) tb->tb_data; |
1132 | struct fib_alias *fa, *new_fa; | 1130 | struct fib_alias *fa, *new_fa; |
1133 | struct list_head *fa_head = NULL; | 1131 | struct list_head *fa_head = NULL; |
1134 | struct fib_info *fi; | 1132 | struct fib_info *fi; |
1135 | int plen = r->rtm_dst_len; | 1133 | int plen = cfg->fc_dst_len; |
1136 | int type = r->rtm_type; | 1134 | u8 tos = cfg->fc_tos; |
1137 | u8 tos = r->rtm_tos; | ||
1138 | u32 key, mask; | 1135 | u32 key, mask; |
1139 | int err; | 1136 | int err; |
1140 | struct leaf *l; | 1137 | struct leaf *l; |
@@ -1142,13 +1139,9 @@ fn_trie_insert(struct fib_table *tb, struct rtmsg *r, struct kern_rta *rta, | |||
1142 | if (plen > 32) | 1139 | if (plen > 32) |
1143 | return -EINVAL; | 1140 | return -EINVAL; |
1144 | 1141 | ||
1145 | key = 0; | 1142 | key = ntohl(cfg->fc_dst); |
1146 | if (rta->rta_dst) | ||
1147 | memcpy(&key, rta->rta_dst, 4); | ||
1148 | |||
1149 | key = ntohl(key); | ||
1150 | 1143 | ||
1151 | pr_debug("Insert table=%d %08x/%d\n", tb->tb_id, key, plen); | 1144 | pr_debug("Insert table=%u %08x/%d\n", tb->tb_id, key, plen); |
1152 | 1145 | ||
1153 | mask = ntohl(inet_make_mask(plen)); | 1146 | mask = ntohl(inet_make_mask(plen)); |
1154 | 1147 | ||
@@ -1157,10 +1150,11 @@ fn_trie_insert(struct fib_table *tb, struct rtmsg *r, struct kern_rta *rta, | |||
1157 | 1150 | ||
1158 | key = key & mask; | 1151 | key = key & mask; |
1159 | 1152 | ||
1160 | fi = fib_create_info(r, rta, nlhdr, &err); | 1153 | fi = fib_create_info(cfg); |
1161 | 1154 | if (IS_ERR(fi)) { | |
1162 | if (!fi) | 1155 | err = PTR_ERR(fi); |
1163 | goto err; | 1156 | goto err; |
1157 | } | ||
1164 | 1158 | ||
1165 | l = fib_find_node(t, key); | 1159 | l = fib_find_node(t, key); |
1166 | fa = NULL; | 1160 | fa = NULL; |
@@ -1185,10 +1179,10 @@ fn_trie_insert(struct fib_table *tb, struct rtmsg *r, struct kern_rta *rta, | |||
1185 | struct fib_alias *fa_orig; | 1179 | struct fib_alias *fa_orig; |
1186 | 1180 | ||
1187 | err = -EEXIST; | 1181 | err = -EEXIST; |
1188 | if (nlhdr->nlmsg_flags & NLM_F_EXCL) | 1182 | if (cfg->fc_nlflags & NLM_F_EXCL) |
1189 | goto out; | 1183 | goto out; |
1190 | 1184 | ||
1191 | if (nlhdr->nlmsg_flags & NLM_F_REPLACE) { | 1185 | if (cfg->fc_nlflags & NLM_F_REPLACE) { |
1192 | struct fib_info *fi_drop; | 1186 | struct fib_info *fi_drop; |
1193 | u8 state; | 1187 | u8 state; |
1194 | 1188 | ||
@@ -1200,8 +1194,8 @@ fn_trie_insert(struct fib_table *tb, struct rtmsg *r, struct kern_rta *rta, | |||
1200 | fi_drop = fa->fa_info; | 1194 | fi_drop = fa->fa_info; |
1201 | new_fa->fa_tos = fa->fa_tos; | 1195 | new_fa->fa_tos = fa->fa_tos; |
1202 | new_fa->fa_info = fi; | 1196 | new_fa->fa_info = fi; |
1203 | new_fa->fa_type = type; | 1197 | new_fa->fa_type = cfg->fc_type; |
1204 | new_fa->fa_scope = r->rtm_scope; | 1198 | new_fa->fa_scope = cfg->fc_scope; |
1205 | state = fa->fa_state; | 1199 | state = fa->fa_state; |
1206 | new_fa->fa_state &= ~FA_S_ACCESSED; | 1200 | new_fa->fa_state &= ~FA_S_ACCESSED; |
1207 | 1201 | ||
@@ -1224,17 +1218,17 @@ fn_trie_insert(struct fib_table *tb, struct rtmsg *r, struct kern_rta *rta, | |||
1224 | break; | 1218 | break; |
1225 | if (fa->fa_info->fib_priority != fi->fib_priority) | 1219 | if (fa->fa_info->fib_priority != fi->fib_priority) |
1226 | break; | 1220 | break; |
1227 | if (fa->fa_type == type && | 1221 | if (fa->fa_type == cfg->fc_type && |
1228 | fa->fa_scope == r->rtm_scope && | 1222 | fa->fa_scope == cfg->fc_scope && |
1229 | fa->fa_info == fi) { | 1223 | fa->fa_info == fi) { |
1230 | goto out; | 1224 | goto out; |
1231 | } | 1225 | } |
1232 | } | 1226 | } |
1233 | if (!(nlhdr->nlmsg_flags & NLM_F_APPEND)) | 1227 | if (!(cfg->fc_nlflags & NLM_F_APPEND)) |
1234 | fa = fa_orig; | 1228 | fa = fa_orig; |
1235 | } | 1229 | } |
1236 | err = -ENOENT; | 1230 | err = -ENOENT; |
1237 | if (!(nlhdr->nlmsg_flags & NLM_F_CREATE)) | 1231 | if (!(cfg->fc_nlflags & NLM_F_CREATE)) |
1238 | goto out; | 1232 | goto out; |
1239 | 1233 | ||
1240 | err = -ENOBUFS; | 1234 | err = -ENOBUFS; |
@@ -1244,8 +1238,8 @@ fn_trie_insert(struct fib_table *tb, struct rtmsg *r, struct kern_rta *rta, | |||
1244 | 1238 | ||
1245 | new_fa->fa_info = fi; | 1239 | new_fa->fa_info = fi; |
1246 | new_fa->fa_tos = tos; | 1240 | new_fa->fa_tos = tos; |
1247 | new_fa->fa_type = type; | 1241 | new_fa->fa_type = cfg->fc_type; |
1248 | new_fa->fa_scope = r->rtm_scope; | 1242 | new_fa->fa_scope = cfg->fc_scope; |
1249 | new_fa->fa_state = 0; | 1243 | new_fa->fa_state = 0; |
1250 | /* | 1244 | /* |
1251 | * Insert new entry to the list. | 1245 | * Insert new entry to the list. |
@@ -1262,7 +1256,8 @@ fn_trie_insert(struct fib_table *tb, struct rtmsg *r, struct kern_rta *rta, | |||
1262 | (fa ? &fa->fa_list : fa_head)); | 1256 | (fa ? &fa->fa_list : fa_head)); |
1263 | 1257 | ||
1264 | rt_cache_flush(-1); | 1258 | rt_cache_flush(-1); |
1265 | rtmsg_fib(RTM_NEWROUTE, htonl(key), new_fa, plen, tb->tb_id, nlhdr, req); | 1259 | rtmsg_fib(RTM_NEWROUTE, htonl(key), new_fa, plen, tb->tb_id, |
1260 | &cfg->fc_nlinfo); | ||
1266 | succeeded: | 1261 | succeeded: |
1267 | return 0; | 1262 | return 0; |
1268 | 1263 | ||
@@ -1548,28 +1543,21 @@ static int trie_leaf_remove(struct trie *t, t_key key) | |||
1548 | return 1; | 1543 | return 1; |
1549 | } | 1544 | } |
1550 | 1545 | ||
1551 | static int | 1546 | static int fn_trie_delete(struct fib_table *tb, struct fib_config *cfg) |
1552 | fn_trie_delete(struct fib_table *tb, struct rtmsg *r, struct kern_rta *rta, | ||
1553 | struct nlmsghdr *nlhdr, struct netlink_skb_parms *req) | ||
1554 | { | 1547 | { |
1555 | struct trie *t = (struct trie *) tb->tb_data; | 1548 | struct trie *t = (struct trie *) tb->tb_data; |
1556 | u32 key, mask; | 1549 | u32 key, mask; |
1557 | int plen = r->rtm_dst_len; | 1550 | int plen = cfg->fc_dst_len; |
1558 | u8 tos = r->rtm_tos; | 1551 | u8 tos = cfg->fc_tos; |
1559 | struct fib_alias *fa, *fa_to_delete; | 1552 | struct fib_alias *fa, *fa_to_delete; |
1560 | struct list_head *fa_head; | 1553 | struct list_head *fa_head; |
1561 | struct leaf *l; | 1554 | struct leaf *l; |
1562 | struct leaf_info *li; | 1555 | struct leaf_info *li; |
1563 | 1556 | ||
1564 | |||
1565 | if (plen > 32) | 1557 | if (plen > 32) |
1566 | return -EINVAL; | 1558 | return -EINVAL; |
1567 | 1559 | ||
1568 | key = 0; | 1560 | key = ntohl(cfg->fc_dst); |
1569 | if (rta->rta_dst) | ||
1570 | memcpy(&key, rta->rta_dst, 4); | ||
1571 | |||
1572 | key = ntohl(key); | ||
1573 | mask = ntohl(inet_make_mask(plen)); | 1561 | mask = ntohl(inet_make_mask(plen)); |
1574 | 1562 | ||
1575 | if (key & ~mask) | 1563 | if (key & ~mask) |
@@ -1598,13 +1586,12 @@ fn_trie_delete(struct fib_table *tb, struct rtmsg *r, struct kern_rta *rta, | |||
1598 | if (fa->fa_tos != tos) | 1586 | if (fa->fa_tos != tos) |
1599 | break; | 1587 | break; |
1600 | 1588 | ||
1601 | if ((!r->rtm_type || | 1589 | if ((!cfg->fc_type || fa->fa_type == cfg->fc_type) && |
1602 | fa->fa_type == r->rtm_type) && | 1590 | (cfg->fc_scope == RT_SCOPE_NOWHERE || |
1603 | (r->rtm_scope == RT_SCOPE_NOWHERE || | 1591 | fa->fa_scope == cfg->fc_scope) && |
1604 | fa->fa_scope == r->rtm_scope) && | 1592 | (!cfg->fc_protocol || |
1605 | (!r->rtm_protocol || | 1593 | fi->fib_protocol == cfg->fc_protocol) && |
1606 | fi->fib_protocol == r->rtm_protocol) && | 1594 | fib_nh_match(cfg, fi) == 0) { |
1607 | fib_nh_match(r, nlhdr, rta, fi) == 0) { | ||
1608 | fa_to_delete = fa; | 1595 | fa_to_delete = fa; |
1609 | break; | 1596 | break; |
1610 | } | 1597 | } |
@@ -1614,7 +1601,8 @@ fn_trie_delete(struct fib_table *tb, struct rtmsg *r, struct kern_rta *rta, | |||
1614 | return -ESRCH; | 1601 | return -ESRCH; |
1615 | 1602 | ||
1616 | fa = fa_to_delete; | 1603 | fa = fa_to_delete; |
1617 | rtmsg_fib(RTM_DELROUTE, htonl(key), fa, plen, tb->tb_id, nlhdr, req); | 1604 | rtmsg_fib(RTM_DELROUTE, htonl(key), fa, plen, tb->tb_id, |
1605 | &cfg->fc_nlinfo); | ||
1618 | 1606 | ||
1619 | l = fib_find_node(t, key); | 1607 | l = fib_find_node(t, key); |
1620 | li = find_leaf_info(l, plen); | 1608 | li = find_leaf_info(l, plen); |
@@ -1848,7 +1836,7 @@ static int fn_trie_dump_fa(t_key key, int plen, struct list_head *fah, struct fi | |||
1848 | 1836 | ||
1849 | u32 xkey = htonl(key); | 1837 | u32 xkey = htonl(key); |
1850 | 1838 | ||
1851 | s_i = cb->args[3]; | 1839 | s_i = cb->args[4]; |
1852 | i = 0; | 1840 | i = 0; |
1853 | 1841 | ||
1854 | /* rcu_read_lock is hold by caller */ | 1842 | /* rcu_read_lock is hold by caller */ |
@@ -1866,16 +1854,16 @@ static int fn_trie_dump_fa(t_key key, int plen, struct list_head *fah, struct fi | |||
1866 | tb->tb_id, | 1854 | tb->tb_id, |
1867 | fa->fa_type, | 1855 | fa->fa_type, |
1868 | fa->fa_scope, | 1856 | fa->fa_scope, |
1869 | &xkey, | 1857 | xkey, |
1870 | plen, | 1858 | plen, |
1871 | fa->fa_tos, | 1859 | fa->fa_tos, |
1872 | fa->fa_info, 0) < 0) { | 1860 | fa->fa_info, 0) < 0) { |
1873 | cb->args[3] = i; | 1861 | cb->args[4] = i; |
1874 | return -1; | 1862 | return -1; |
1875 | } | 1863 | } |
1876 | i++; | 1864 | i++; |
1877 | } | 1865 | } |
1878 | cb->args[3] = i; | 1866 | cb->args[4] = i; |
1879 | return skb->len; | 1867 | return skb->len; |
1880 | } | 1868 | } |
1881 | 1869 | ||
@@ -1886,14 +1874,14 @@ static int fn_trie_dump_plen(struct trie *t, int plen, struct fib_table *tb, str | |||
1886 | struct list_head *fa_head; | 1874 | struct list_head *fa_head; |
1887 | struct leaf *l = NULL; | 1875 | struct leaf *l = NULL; |
1888 | 1876 | ||
1889 | s_h = cb->args[2]; | 1877 | s_h = cb->args[3]; |
1890 | 1878 | ||
1891 | for (h = 0; (l = nextleaf(t, l)) != NULL; h++) { | 1879 | for (h = 0; (l = nextleaf(t, l)) != NULL; h++) { |
1892 | if (h < s_h) | 1880 | if (h < s_h) |
1893 | continue; | 1881 | continue; |
1894 | if (h > s_h) | 1882 | if (h > s_h) |
1895 | memset(&cb->args[3], 0, | 1883 | memset(&cb->args[4], 0, |
1896 | sizeof(cb->args) - 3*sizeof(cb->args[0])); | 1884 | sizeof(cb->args) - 4*sizeof(cb->args[0])); |
1897 | 1885 | ||
1898 | fa_head = get_fa_head(l, plen); | 1886 | fa_head = get_fa_head(l, plen); |
1899 | 1887 | ||
@@ -1904,11 +1892,11 @@ static int fn_trie_dump_plen(struct trie *t, int plen, struct fib_table *tb, str | |||
1904 | continue; | 1892 | continue; |
1905 | 1893 | ||
1906 | if (fn_trie_dump_fa(l->key, plen, fa_head, tb, skb, cb)<0) { | 1894 | if (fn_trie_dump_fa(l->key, plen, fa_head, tb, skb, cb)<0) { |
1907 | cb->args[2] = h; | 1895 | cb->args[3] = h; |
1908 | return -1; | 1896 | return -1; |
1909 | } | 1897 | } |
1910 | } | 1898 | } |
1911 | cb->args[2] = h; | 1899 | cb->args[3] = h; |
1912 | return skb->len; | 1900 | return skb->len; |
1913 | } | 1901 | } |
1914 | 1902 | ||
@@ -1917,23 +1905,23 @@ static int fn_trie_dump(struct fib_table *tb, struct sk_buff *skb, struct netlin | |||
1917 | int m, s_m; | 1905 | int m, s_m; |
1918 | struct trie *t = (struct trie *) tb->tb_data; | 1906 | struct trie *t = (struct trie *) tb->tb_data; |
1919 | 1907 | ||
1920 | s_m = cb->args[1]; | 1908 | s_m = cb->args[2]; |
1921 | 1909 | ||
1922 | rcu_read_lock(); | 1910 | rcu_read_lock(); |
1923 | for (m = 0; m <= 32; m++) { | 1911 | for (m = 0; m <= 32; m++) { |
1924 | if (m < s_m) | 1912 | if (m < s_m) |
1925 | continue; | 1913 | continue; |
1926 | if (m > s_m) | 1914 | if (m > s_m) |
1927 | memset(&cb->args[2], 0, | 1915 | memset(&cb->args[3], 0, |
1928 | sizeof(cb->args) - 2*sizeof(cb->args[0])); | 1916 | sizeof(cb->args) - 3*sizeof(cb->args[0])); |
1929 | 1917 | ||
1930 | if (fn_trie_dump_plen(t, 32-m, tb, skb, cb)<0) { | 1918 | if (fn_trie_dump_plen(t, 32-m, tb, skb, cb)<0) { |
1931 | cb->args[1] = m; | 1919 | cb->args[2] = m; |
1932 | goto out; | 1920 | goto out; |
1933 | } | 1921 | } |
1934 | } | 1922 | } |
1935 | rcu_read_unlock(); | 1923 | rcu_read_unlock(); |
1936 | cb->args[1] = m; | 1924 | cb->args[2] = m; |
1937 | return skb->len; | 1925 | return skb->len; |
1938 | out: | 1926 | out: |
1939 | rcu_read_unlock(); | 1927 | rcu_read_unlock(); |
@@ -1943,9 +1931,9 @@ out: | |||
1943 | /* Fix more generic FIB names for init later */ | 1931 | /* Fix more generic FIB names for init later */ |
1944 | 1932 | ||
1945 | #ifdef CONFIG_IP_MULTIPLE_TABLES | 1933 | #ifdef CONFIG_IP_MULTIPLE_TABLES |
1946 | struct fib_table * fib_hash_init(int id) | 1934 | struct fib_table * fib_hash_init(u32 id) |
1947 | #else | 1935 | #else |
1948 | struct fib_table * __init fib_hash_init(int id) | 1936 | struct fib_table * __init fib_hash_init(u32 id) |
1949 | #endif | 1937 | #endif |
1950 | { | 1938 | { |
1951 | struct fib_table *tb; | 1939 | struct fib_table *tb; |
diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c index 4c86ac3d882d..c2ad07e48ab4 100644 --- a/net/ipv4/icmp.c +++ b/net/ipv4/icmp.c | |||
@@ -187,11 +187,11 @@ struct icmp_err icmp_err_convert[] = { | |||
187 | }; | 187 | }; |
188 | 188 | ||
189 | /* Control parameters for ECHO replies. */ | 189 | /* Control parameters for ECHO replies. */ |
190 | int sysctl_icmp_echo_ignore_all; | 190 | int sysctl_icmp_echo_ignore_all __read_mostly; |
191 | int sysctl_icmp_echo_ignore_broadcasts = 1; | 191 | int sysctl_icmp_echo_ignore_broadcasts __read_mostly = 1; |
192 | 192 | ||
193 | /* Control parameter - ignore bogus broadcast responses? */ | 193 | /* Control parameter - ignore bogus broadcast responses? */ |
194 | int sysctl_icmp_ignore_bogus_error_responses = 1; | 194 | int sysctl_icmp_ignore_bogus_error_responses __read_mostly = 1; |
195 | 195 | ||
196 | /* | 196 | /* |
197 | * Configurable global rate limit. | 197 | * Configurable global rate limit. |
@@ -205,9 +205,9 @@ int sysctl_icmp_ignore_bogus_error_responses = 1; | |||
205 | * time exceeded (11), parameter problem (12) | 205 | * time exceeded (11), parameter problem (12) |
206 | */ | 206 | */ |
207 | 207 | ||
208 | int sysctl_icmp_ratelimit = 1 * HZ; | 208 | int sysctl_icmp_ratelimit __read_mostly = 1 * HZ; |
209 | int sysctl_icmp_ratemask = 0x1818; | 209 | int sysctl_icmp_ratemask __read_mostly = 0x1818; |
210 | int sysctl_icmp_errors_use_inbound_ifaddr; | 210 | int sysctl_icmp_errors_use_inbound_ifaddr __read_mostly; |
211 | 211 | ||
212 | /* | 212 | /* |
213 | * ICMP control array. This specifies what to do with each ICMP. | 213 | * ICMP control array. This specifies what to do with each ICMP. |
@@ -406,6 +406,7 @@ static void icmp_reply(struct icmp_bxm *icmp_param, struct sk_buff *skb) | |||
406 | .saddr = rt->rt_spec_dst, | 406 | .saddr = rt->rt_spec_dst, |
407 | .tos = RT_TOS(skb->nh.iph->tos) } }, | 407 | .tos = RT_TOS(skb->nh.iph->tos) } }, |
408 | .proto = IPPROTO_ICMP }; | 408 | .proto = IPPROTO_ICMP }; |
409 | security_skb_classify_flow(skb, &fl); | ||
409 | if (ip_route_output_key(&rt, &fl)) | 410 | if (ip_route_output_key(&rt, &fl)) |
410 | goto out_unlock; | 411 | goto out_unlock; |
411 | } | 412 | } |
@@ -560,6 +561,7 @@ void icmp_send(struct sk_buff *skb_in, int type, int code, u32 info) | |||
560 | } | 561 | } |
561 | } | 562 | } |
562 | }; | 563 | }; |
564 | security_skb_classify_flow(skb_in, &fl); | ||
563 | if (ip_route_output_key(&rt, &fl)) | 565 | if (ip_route_output_key(&rt, &fl)) |
564 | goto out_unlock; | 566 | goto out_unlock; |
565 | } | 567 | } |
@@ -928,7 +930,7 @@ int icmp_rcv(struct sk_buff *skb) | |||
928 | ICMP_INC_STATS_BH(ICMP_MIB_INMSGS); | 930 | ICMP_INC_STATS_BH(ICMP_MIB_INMSGS); |
929 | 931 | ||
930 | switch (skb->ip_summed) { | 932 | switch (skb->ip_summed) { |
931 | case CHECKSUM_HW: | 933 | case CHECKSUM_COMPLETE: |
932 | if (!(u16)csum_fold(skb->csum)) | 934 | if (!(u16)csum_fold(skb->csum)) |
933 | break; | 935 | break; |
934 | /* fall through */ | 936 | /* fall through */ |
diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c index 8e8117c19e4d..58be8227b0cb 100644 --- a/net/ipv4/igmp.c +++ b/net/ipv4/igmp.c | |||
@@ -931,7 +931,7 @@ int igmp_rcv(struct sk_buff *skb) | |||
931 | goto drop; | 931 | goto drop; |
932 | 932 | ||
933 | switch (skb->ip_summed) { | 933 | switch (skb->ip_summed) { |
934 | case CHECKSUM_HW: | 934 | case CHECKSUM_COMPLETE: |
935 | if (!(u16)csum_fold(skb->csum)) | 935 | if (!(u16)csum_fold(skb->csum)) |
936 | break; | 936 | break; |
937 | /* fall through */ | 937 | /* fall through */ |
@@ -1397,8 +1397,8 @@ static struct in_device * ip_mc_find_dev(struct ip_mreqn *imr) | |||
1397 | /* | 1397 | /* |
1398 | * Join a socket to a group | 1398 | * Join a socket to a group |
1399 | */ | 1399 | */ |
1400 | int sysctl_igmp_max_memberships = IP_MAX_MEMBERSHIPS; | 1400 | int sysctl_igmp_max_memberships __read_mostly = IP_MAX_MEMBERSHIPS; |
1401 | int sysctl_igmp_max_msf = IP_MAX_MSF; | 1401 | int sysctl_igmp_max_msf __read_mostly = IP_MAX_MSF; |
1402 | 1402 | ||
1403 | 1403 | ||
1404 | static int ip_mc_del1_src(struct ip_mc_list *pmc, int sfmode, | 1404 | static int ip_mc_del1_src(struct ip_mc_list *pmc, int sfmode, |
diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c index e50a1bfd7ccc..07204391d083 100644 --- a/net/ipv4/inet_connection_sock.c +++ b/net/ipv4/inet_connection_sock.c | |||
@@ -327,6 +327,7 @@ struct dst_entry* inet_csk_route_req(struct sock *sk, | |||
327 | { .sport = inet_sk(sk)->sport, | 327 | { .sport = inet_sk(sk)->sport, |
328 | .dport = ireq->rmt_port } } }; | 328 | .dport = ireq->rmt_port } } }; |
329 | 329 | ||
330 | security_req_classify_flow(req, &fl); | ||
330 | if (ip_route_output_flow(&rt, &fl, sk, 0)) { | 331 | if (ip_route_output_flow(&rt, &fl, sk, 0)) { |
331 | IP_INC_STATS_BH(IPSTATS_MIB_OUTNOROUTES); | 332 | IP_INC_STATS_BH(IPSTATS_MIB_OUTNOROUTES); |
332 | return NULL; | 333 | return NULL; |
@@ -509,6 +510,8 @@ struct sock *inet_csk_clone(struct sock *sk, const struct request_sock *req, | |||
509 | 510 | ||
510 | /* Deinitialize accept_queue to trap illegal accesses. */ | 511 | /* Deinitialize accept_queue to trap illegal accesses. */ |
511 | memset(&newicsk->icsk_accept_queue, 0, sizeof(newicsk->icsk_accept_queue)); | 512 | memset(&newicsk->icsk_accept_queue, 0, sizeof(newicsk->icsk_accept_queue)); |
513 | |||
514 | security_inet_csk_clone(newsk, req); | ||
512 | } | 515 | } |
513 | return newsk; | 516 | return newsk; |
514 | } | 517 | } |
diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c index 95fac5532994..fb296c9a7f3f 100644 --- a/net/ipv4/inet_hashtables.c +++ b/net/ipv4/inet_hashtables.c | |||
@@ -124,8 +124,10 @@ EXPORT_SYMBOL(inet_listen_wlock); | |||
124 | * remote address for the connection. So always assume those are both | 124 | * remote address for the connection. So always assume those are both |
125 | * wildcarded during the search since they can never be otherwise. | 125 | * wildcarded during the search since they can never be otherwise. |
126 | */ | 126 | */ |
127 | struct sock *__inet_lookup_listener(const struct hlist_head *head, const u32 daddr, | 127 | static struct sock *inet_lookup_listener_slow(const struct hlist_head *head, |
128 | const unsigned short hnum, const int dif) | 128 | const u32 daddr, |
129 | const unsigned short hnum, | ||
130 | const int dif) | ||
129 | { | 131 | { |
130 | struct sock *result = NULL, *sk; | 132 | struct sock *result = NULL, *sk; |
131 | const struct hlist_node *node; | 133 | const struct hlist_node *node; |
@@ -159,6 +161,33 @@ struct sock *__inet_lookup_listener(const struct hlist_head *head, const u32 dad | |||
159 | return result; | 161 | return result; |
160 | } | 162 | } |
161 | 163 | ||
164 | /* Optimize the common listener case. */ | ||
165 | struct sock *__inet_lookup_listener(struct inet_hashinfo *hashinfo, | ||
166 | const u32 daddr, const unsigned short hnum, | ||
167 | const int dif) | ||
168 | { | ||
169 | struct sock *sk = NULL; | ||
170 | const struct hlist_head *head; | ||
171 | |||
172 | read_lock(&hashinfo->lhash_lock); | ||
173 | head = &hashinfo->listening_hash[inet_lhashfn(hnum)]; | ||
174 | if (!hlist_empty(head)) { | ||
175 | const struct inet_sock *inet = inet_sk((sk = __sk_head(head))); | ||
176 | |||
177 | if (inet->num == hnum && !sk->sk_node.next && | ||
178 | (!inet->rcv_saddr || inet->rcv_saddr == daddr) && | ||
179 | (sk->sk_family == PF_INET || !ipv6_only_sock(sk)) && | ||
180 | !sk->sk_bound_dev_if) | ||
181 | goto sherry_cache; | ||
182 | sk = inet_lookup_listener_slow(head, daddr, hnum, dif); | ||
183 | } | ||
184 | if (sk) { | ||
185 | sherry_cache: | ||
186 | sock_hold(sk); | ||
187 | } | ||
188 | read_unlock(&hashinfo->lhash_lock); | ||
189 | return sk; | ||
190 | } | ||
162 | EXPORT_SYMBOL_GPL(__inet_lookup_listener); | 191 | EXPORT_SYMBOL_GPL(__inet_lookup_listener); |
163 | 192 | ||
164 | /* called with local bh disabled */ | 193 | /* called with local bh disabled */ |
diff --git a/net/ipv4/inetpeer.c b/net/ipv4/inetpeer.c index 03ff62ebcfeb..a675602ef295 100644 --- a/net/ipv4/inetpeer.c +++ b/net/ipv4/inetpeer.c | |||
@@ -126,12 +126,9 @@ void __init inet_initpeers(void) | |||
126 | 126 | ||
127 | peer_cachep = kmem_cache_create("inet_peer_cache", | 127 | peer_cachep = kmem_cache_create("inet_peer_cache", |
128 | sizeof(struct inet_peer), | 128 | sizeof(struct inet_peer), |
129 | 0, SLAB_HWCACHE_ALIGN, | 129 | 0, SLAB_HWCACHE_ALIGN|SLAB_PANIC, |
130 | NULL, NULL); | 130 | NULL, NULL); |
131 | 131 | ||
132 | if (!peer_cachep) | ||
133 | panic("cannot create inet_peer_cache"); | ||
134 | |||
135 | /* All the timers, started at system startup tend | 132 | /* All the timers, started at system startup tend |
136 | to synchronize. Perturb it a bit. | 133 | to synchronize. Perturb it a bit. |
137 | */ | 134 | */ |
diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c index b84b53a47526..165d72859ddf 100644 --- a/net/ipv4/ip_fragment.c +++ b/net/ipv4/ip_fragment.c | |||
@@ -54,15 +54,15 @@ | |||
54 | * even the most extreme cases without allowing an attacker to measurably | 54 | * even the most extreme cases without allowing an attacker to measurably |
55 | * harm machine performance. | 55 | * harm machine performance. |
56 | */ | 56 | */ |
57 | int sysctl_ipfrag_high_thresh = 256*1024; | 57 | int sysctl_ipfrag_high_thresh __read_mostly = 256*1024; |
58 | int sysctl_ipfrag_low_thresh = 192*1024; | 58 | int sysctl_ipfrag_low_thresh __read_mostly = 192*1024; |
59 | 59 | ||
60 | int sysctl_ipfrag_max_dist = 64; | 60 | int sysctl_ipfrag_max_dist __read_mostly = 64; |
61 | 61 | ||
62 | /* Important NOTE! Fragment queue must be destroyed before MSL expires. | 62 | /* Important NOTE! Fragment queue must be destroyed before MSL expires. |
63 | * RFC791 is wrong proposing to prolongate timer each fragment arrival by TTL. | 63 | * RFC791 is wrong proposing to prolongate timer each fragment arrival by TTL. |
64 | */ | 64 | */ |
65 | int sysctl_ipfrag_time = IP_FRAG_TIME; | 65 | int sysctl_ipfrag_time __read_mostly = IP_FRAG_TIME; |
66 | 66 | ||
67 | struct ipfrag_skb_cb | 67 | struct ipfrag_skb_cb |
68 | { | 68 | { |
@@ -130,7 +130,7 @@ static unsigned int ipqhashfn(u16 id, u32 saddr, u32 daddr, u8 prot) | |||
130 | } | 130 | } |
131 | 131 | ||
132 | static struct timer_list ipfrag_secret_timer; | 132 | static struct timer_list ipfrag_secret_timer; |
133 | int sysctl_ipfrag_secret_interval = 10 * 60 * HZ; | 133 | int sysctl_ipfrag_secret_interval __read_mostly = 10 * 60 * HZ; |
134 | 134 | ||
135 | static void ipfrag_secret_rebuild(unsigned long dummy) | 135 | static void ipfrag_secret_rebuild(unsigned long dummy) |
136 | { | 136 | { |
@@ -665,7 +665,7 @@ static struct sk_buff *ip_frag_reasm(struct ipq *qp, struct net_device *dev) | |||
665 | head->len += fp->len; | 665 | head->len += fp->len; |
666 | if (head->ip_summed != fp->ip_summed) | 666 | if (head->ip_summed != fp->ip_summed) |
667 | head->ip_summed = CHECKSUM_NONE; | 667 | head->ip_summed = CHECKSUM_NONE; |
668 | else if (head->ip_summed == CHECKSUM_HW) | 668 | else if (head->ip_summed == CHECKSUM_COMPLETE) |
669 | head->csum = csum_add(head->csum, fp->csum); | 669 | head->csum = csum_add(head->csum, fp->csum); |
670 | head->truesize += fp->truesize; | 670 | head->truesize += fp->truesize; |
671 | atomic_sub(fp->truesize, &ip_frag_mem); | 671 | atomic_sub(fp->truesize, &ip_frag_mem); |
diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c index 0f9b3a31997b..f5fba051df3d 100644 --- a/net/ipv4/ip_gre.c +++ b/net/ipv4/ip_gre.c | |||
@@ -393,7 +393,8 @@ out: | |||
393 | int code = skb->h.icmph->code; | 393 | int code = skb->h.icmph->code; |
394 | int rel_type = 0; | 394 | int rel_type = 0; |
395 | int rel_code = 0; | 395 | int rel_code = 0; |
396 | int rel_info = 0; | 396 | __be32 rel_info = 0; |
397 | __u32 n = 0; | ||
397 | u16 flags; | 398 | u16 flags; |
398 | int grehlen = (iph->ihl<<2) + 4; | 399 | int grehlen = (iph->ihl<<2) + 4; |
399 | struct sk_buff *skb2; | 400 | struct sk_buff *skb2; |
@@ -422,14 +423,16 @@ out: | |||
422 | default: | 423 | default: |
423 | return; | 424 | return; |
424 | case ICMP_PARAMETERPROB: | 425 | case ICMP_PARAMETERPROB: |
425 | if (skb->h.icmph->un.gateway < (iph->ihl<<2)) | 426 | n = ntohl(skb->h.icmph->un.gateway) >> 24; |
427 | if (n < (iph->ihl<<2)) | ||
426 | return; | 428 | return; |
427 | 429 | ||
428 | /* So... This guy found something strange INSIDE encapsulated | 430 | /* So... This guy found something strange INSIDE encapsulated |
429 | packet. Well, he is fool, but what can we do ? | 431 | packet. Well, he is fool, but what can we do ? |
430 | */ | 432 | */ |
431 | rel_type = ICMP_PARAMETERPROB; | 433 | rel_type = ICMP_PARAMETERPROB; |
432 | rel_info = skb->h.icmph->un.gateway - grehlen; | 434 | n -= grehlen; |
435 | rel_info = htonl(n << 24); | ||
433 | break; | 436 | break; |
434 | 437 | ||
435 | case ICMP_DEST_UNREACH: | 438 | case ICMP_DEST_UNREACH: |
@@ -440,13 +443,14 @@ out: | |||
440 | return; | 443 | return; |
441 | case ICMP_FRAG_NEEDED: | 444 | case ICMP_FRAG_NEEDED: |
442 | /* And it is the only really necessary thing :-) */ | 445 | /* And it is the only really necessary thing :-) */ |
443 | rel_info = ntohs(skb->h.icmph->un.frag.mtu); | 446 | n = ntohs(skb->h.icmph->un.frag.mtu); |
444 | if (rel_info < grehlen+68) | 447 | if (n < grehlen+68) |
445 | return; | 448 | return; |
446 | rel_info -= grehlen; | 449 | n -= grehlen; |
447 | /* BSD 4.2 MORE DOES NOT EXIST IN NATURE. */ | 450 | /* BSD 4.2 MORE DOES NOT EXIST IN NATURE. */ |
448 | if (rel_info > ntohs(eiph->tot_len)) | 451 | if (n > ntohs(eiph->tot_len)) |
449 | return; | 452 | return; |
453 | rel_info = htonl(n); | ||
450 | break; | 454 | break; |
451 | default: | 455 | default: |
452 | /* All others are translated to HOST_UNREACH. | 456 | /* All others are translated to HOST_UNREACH. |
@@ -508,12 +512,11 @@ out: | |||
508 | 512 | ||
509 | /* change mtu on this route */ | 513 | /* change mtu on this route */ |
510 | if (type == ICMP_DEST_UNREACH && code == ICMP_FRAG_NEEDED) { | 514 | if (type == ICMP_DEST_UNREACH && code == ICMP_FRAG_NEEDED) { |
511 | if (rel_info > dst_mtu(skb2->dst)) { | 515 | if (n > dst_mtu(skb2->dst)) { |
512 | kfree_skb(skb2); | 516 | kfree_skb(skb2); |
513 | return; | 517 | return; |
514 | } | 518 | } |
515 | skb2->dst->ops->update_pmtu(skb2->dst, rel_info); | 519 | skb2->dst->ops->update_pmtu(skb2->dst, n); |
516 | rel_info = htonl(rel_info); | ||
517 | } else if (type == ICMP_TIME_EXCEEDED) { | 520 | } else if (type == ICMP_TIME_EXCEEDED) { |
518 | struct ip_tunnel *t = netdev_priv(skb2->dev); | 521 | struct ip_tunnel *t = netdev_priv(skb2->dev); |
519 | if (t->parms.iph.ttl) { | 522 | if (t->parms.iph.ttl) { |
@@ -576,7 +579,7 @@ static int ipgre_rcv(struct sk_buff *skb) | |||
576 | 579 | ||
577 | if (flags&GRE_CSUM) { | 580 | if (flags&GRE_CSUM) { |
578 | switch (skb->ip_summed) { | 581 | switch (skb->ip_summed) { |
579 | case CHECKSUM_HW: | 582 | case CHECKSUM_COMPLETE: |
580 | csum = (u16)csum_fold(skb->csum); | 583 | csum = (u16)csum_fold(skb->csum); |
581 | if (!csum) | 584 | if (!csum) |
582 | break; | 585 | break; |
@@ -584,7 +587,7 @@ static int ipgre_rcv(struct sk_buff *skb) | |||
584 | case CHECKSUM_NONE: | 587 | case CHECKSUM_NONE: |
585 | skb->csum = 0; | 588 | skb->csum = 0; |
586 | csum = __skb_checksum_complete(skb); | 589 | csum = __skb_checksum_complete(skb); |
587 | skb->ip_summed = CHECKSUM_HW; | 590 | skb->ip_summed = CHECKSUM_COMPLETE; |
588 | } | 591 | } |
589 | offset += 4; | 592 | offset += 4; |
590 | } | 593 | } |
diff --git a/net/ipv4/ip_options.c b/net/ipv4/ip_options.c index 406056edc02b..e7437c091326 100644 --- a/net/ipv4/ip_options.c +++ b/net/ipv4/ip_options.c | |||
@@ -24,6 +24,7 @@ | |||
24 | #include <net/ip.h> | 24 | #include <net/ip.h> |
25 | #include <net/icmp.h> | 25 | #include <net/icmp.h> |
26 | #include <net/route.h> | 26 | #include <net/route.h> |
27 | #include <net/cipso_ipv4.h> | ||
27 | 28 | ||
28 | /* | 29 | /* |
29 | * Write options to IP header, record destination address to | 30 | * Write options to IP header, record destination address to |
@@ -194,6 +195,13 @@ int ip_options_echo(struct ip_options * dopt, struct sk_buff * skb) | |||
194 | dopt->is_strictroute = sopt->is_strictroute; | 195 | dopt->is_strictroute = sopt->is_strictroute; |
195 | } | 196 | } |
196 | } | 197 | } |
198 | if (sopt->cipso) { | ||
199 | optlen = sptr[sopt->cipso+1]; | ||
200 | dopt->cipso = dopt->optlen+sizeof(struct iphdr); | ||
201 | memcpy(dptr, sptr+sopt->cipso, optlen); | ||
202 | dptr += optlen; | ||
203 | dopt->optlen += optlen; | ||
204 | } | ||
197 | while (dopt->optlen & 3) { | 205 | while (dopt->optlen & 3) { |
198 | *dptr++ = IPOPT_END; | 206 | *dptr++ = IPOPT_END; |
199 | dopt->optlen++; | 207 | dopt->optlen++; |
@@ -434,6 +442,17 @@ int ip_options_compile(struct ip_options * opt, struct sk_buff * skb) | |||
434 | if (optptr[2] == 0 && optptr[3] == 0) | 442 | if (optptr[2] == 0 && optptr[3] == 0) |
435 | opt->router_alert = optptr - iph; | 443 | opt->router_alert = optptr - iph; |
436 | break; | 444 | break; |
445 | case IPOPT_CIPSO: | ||
446 | if (opt->cipso) { | ||
447 | pp_ptr = optptr; | ||
448 | goto error; | ||
449 | } | ||
450 | opt->cipso = optptr - iph; | ||
451 | if (cipso_v4_validate(&optptr)) { | ||
452 | pp_ptr = optptr; | ||
453 | goto error; | ||
454 | } | ||
455 | break; | ||
437 | case IPOPT_SEC: | 456 | case IPOPT_SEC: |
438 | case IPOPT_SID: | 457 | case IPOPT_SID: |
439 | default: | 458 | default: |
@@ -506,7 +525,6 @@ static int ip_options_get_finish(struct ip_options **optp, | |||
506 | opt->__data[optlen++] = IPOPT_END; | 525 | opt->__data[optlen++] = IPOPT_END; |
507 | opt->optlen = optlen; | 526 | opt->optlen = optlen; |
508 | opt->is_data = 1; | 527 | opt->is_data = 1; |
509 | opt->is_setbyuser = 1; | ||
510 | if (optlen && ip_options_compile(opt, NULL)) { | 528 | if (optlen && ip_options_compile(opt, NULL)) { |
511 | kfree(opt); | 529 | kfree(opt); |
512 | return -EINVAL; | 530 | return -EINVAL; |
diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index a2ede167e045..97aee76fb746 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c | |||
@@ -83,7 +83,7 @@ | |||
83 | #include <linux/netlink.h> | 83 | #include <linux/netlink.h> |
84 | #include <linux/tcp.h> | 84 | #include <linux/tcp.h> |
85 | 85 | ||
86 | int sysctl_ip_default_ttl = IPDEFTTL; | 86 | int sysctl_ip_default_ttl __read_mostly = IPDEFTTL; |
87 | 87 | ||
88 | /* Generate a checksum for an outgoing IP datagram. */ | 88 | /* Generate a checksum for an outgoing IP datagram. */ |
89 | __inline__ void ip_send_check(struct iphdr *iph) | 89 | __inline__ void ip_send_check(struct iphdr *iph) |
@@ -328,6 +328,7 @@ int ip_queue_xmit(struct sk_buff *skb, int ipfragok) | |||
328 | * keep trying until route appears or the connection times | 328 | * keep trying until route appears or the connection times |
329 | * itself out. | 329 | * itself out. |
330 | */ | 330 | */ |
331 | security_sk_classify_flow(sk, &fl); | ||
331 | if (ip_route_output_flow(&rt, &fl, sk, 0)) | 332 | if (ip_route_output_flow(&rt, &fl, sk, 0)) |
332 | goto no_route; | 333 | goto no_route; |
333 | } | 334 | } |
@@ -425,7 +426,7 @@ int ip_fragment(struct sk_buff *skb, int (*output)(struct sk_buff*)) | |||
425 | int ptr; | 426 | int ptr; |
426 | struct net_device *dev; | 427 | struct net_device *dev; |
427 | struct sk_buff *skb2; | 428 | struct sk_buff *skb2; |
428 | unsigned int mtu, hlen, left, len, ll_rs; | 429 | unsigned int mtu, hlen, left, len, ll_rs, pad; |
429 | int offset; | 430 | int offset; |
430 | __be16 not_last_frag; | 431 | __be16 not_last_frag; |
431 | struct rtable *rt = (struct rtable*)skb->dst; | 432 | struct rtable *rt = (struct rtable*)skb->dst; |
@@ -555,14 +556,13 @@ slow_path: | |||
555 | left = skb->len - hlen; /* Space per frame */ | 556 | left = skb->len - hlen; /* Space per frame */ |
556 | ptr = raw + hlen; /* Where to start from */ | 557 | ptr = raw + hlen; /* Where to start from */ |
557 | 558 | ||
558 | #ifdef CONFIG_BRIDGE_NETFILTER | ||
559 | /* for bridged IP traffic encapsulated inside f.e. a vlan header, | 559 | /* for bridged IP traffic encapsulated inside f.e. a vlan header, |
560 | * we need to make room for the encapsulating header */ | 560 | * we need to make room for the encapsulating header |
561 | ll_rs = LL_RESERVED_SPACE_EXTRA(rt->u.dst.dev, nf_bridge_pad(skb)); | 561 | */ |
562 | mtu -= nf_bridge_pad(skb); | 562 | pad = nf_bridge_pad(skb); |
563 | #else | 563 | ll_rs = LL_RESERVED_SPACE_EXTRA(rt->u.dst.dev, pad); |
564 | ll_rs = LL_RESERVED_SPACE(rt->u.dst.dev); | 564 | mtu -= pad; |
565 | #endif | 565 | |
566 | /* | 566 | /* |
567 | * Fragment the datagram. | 567 | * Fragment the datagram. |
568 | */ | 568 | */ |
@@ -679,7 +679,7 @@ ip_generic_getfrag(void *from, char *to, int offset, int len, int odd, struct sk | |||
679 | { | 679 | { |
680 | struct iovec *iov = from; | 680 | struct iovec *iov = from; |
681 | 681 | ||
682 | if (skb->ip_summed == CHECKSUM_HW) { | 682 | if (skb->ip_summed == CHECKSUM_PARTIAL) { |
683 | if (memcpy_fromiovecend(to, iov, offset, len) < 0) | 683 | if (memcpy_fromiovecend(to, iov, offset, len) < 0) |
684 | return -EFAULT; | 684 | return -EFAULT; |
685 | } else { | 685 | } else { |
@@ -735,7 +735,7 @@ static inline int ip_ufo_append_data(struct sock *sk, | |||
735 | /* initialize protocol header pointer */ | 735 | /* initialize protocol header pointer */ |
736 | skb->h.raw = skb->data + fragheaderlen; | 736 | skb->h.raw = skb->data + fragheaderlen; |
737 | 737 | ||
738 | skb->ip_summed = CHECKSUM_HW; | 738 | skb->ip_summed = CHECKSUM_PARTIAL; |
739 | skb->csum = 0; | 739 | skb->csum = 0; |
740 | sk->sk_sndmsg_off = 0; | 740 | sk->sk_sndmsg_off = 0; |
741 | } | 741 | } |
@@ -843,7 +843,7 @@ int ip_append_data(struct sock *sk, | |||
843 | length + fragheaderlen <= mtu && | 843 | length + fragheaderlen <= mtu && |
844 | rt->u.dst.dev->features & NETIF_F_ALL_CSUM && | 844 | rt->u.dst.dev->features & NETIF_F_ALL_CSUM && |
845 | !exthdrlen) | 845 | !exthdrlen) |
846 | csummode = CHECKSUM_HW; | 846 | csummode = CHECKSUM_PARTIAL; |
847 | 847 | ||
848 | inet->cork.length += length; | 848 | inet->cork.length += length; |
849 | if (((length > mtu) && (sk->sk_protocol == IPPROTO_UDP)) && | 849 | if (((length > mtu) && (sk->sk_protocol == IPPROTO_UDP)) && |
@@ -1366,6 +1366,7 @@ void ip_send_reply(struct sock *sk, struct sk_buff *skb, struct ip_reply_arg *ar | |||
1366 | { .sport = skb->h.th->dest, | 1366 | { .sport = skb->h.th->dest, |
1367 | .dport = skb->h.th->source } }, | 1367 | .dport = skb->h.th->source } }, |
1368 | .proto = sk->sk_protocol }; | 1368 | .proto = sk->sk_protocol }; |
1369 | security_skb_classify_flow(skb, &fl); | ||
1369 | if (ip_route_output_key(&rt, &fl)) | 1370 | if (ip_route_output_key(&rt, &fl)) |
1370 | return; | 1371 | return; |
1371 | } | 1372 | } |
diff --git a/net/ipv4/ipcomp.c b/net/ipv4/ipcomp.c index 5bb9c9f03fb6..17342430a843 100644 --- a/net/ipv4/ipcomp.c +++ b/net/ipv4/ipcomp.c | |||
@@ -176,7 +176,7 @@ static int ipcomp_output(struct xfrm_state *x, struct sk_buff *skb) | |||
176 | return 0; | 176 | return 0; |
177 | 177 | ||
178 | out_ok: | 178 | out_ok: |
179 | if (x->props.mode) | 179 | if (x->props.mode == XFRM_MODE_TUNNEL) |
180 | ip_send_check(iph); | 180 | ip_send_check(iph); |
181 | return 0; | 181 | return 0; |
182 | } | 182 | } |
@@ -216,7 +216,7 @@ static struct xfrm_state *ipcomp_tunnel_create(struct xfrm_state *x) | |||
216 | t->id.daddr.a4 = x->id.daddr.a4; | 216 | t->id.daddr.a4 = x->id.daddr.a4; |
217 | memcpy(&t->sel, &x->sel, sizeof(t->sel)); | 217 | memcpy(&t->sel, &x->sel, sizeof(t->sel)); |
218 | t->props.family = AF_INET; | 218 | t->props.family = AF_INET; |
219 | t->props.mode = 1; | 219 | t->props.mode = XFRM_MODE_TUNNEL; |
220 | t->props.saddr.a4 = x->props.saddr.a4; | 220 | t->props.saddr.a4 = x->props.saddr.a4; |
221 | t->props.flags = x->props.flags; | 221 | t->props.flags = x->props.flags; |
222 | 222 | ||
@@ -416,7 +416,7 @@ static int ipcomp_init_state(struct xfrm_state *x) | |||
416 | goto out; | 416 | goto out; |
417 | 417 | ||
418 | x->props.header_len = 0; | 418 | x->props.header_len = 0; |
419 | if (x->props.mode) | 419 | if (x->props.mode == XFRM_MODE_TUNNEL) |
420 | x->props.header_len += sizeof(struct iphdr); | 420 | x->props.header_len += sizeof(struct iphdr); |
421 | 421 | ||
422 | mutex_lock(&ipcomp_resource_mutex); | 422 | mutex_lock(&ipcomp_resource_mutex); |
@@ -428,7 +428,7 @@ static int ipcomp_init_state(struct xfrm_state *x) | |||
428 | goto error; | 428 | goto error; |
429 | mutex_unlock(&ipcomp_resource_mutex); | 429 | mutex_unlock(&ipcomp_resource_mutex); |
430 | 430 | ||
431 | if (x->props.mode) { | 431 | if (x->props.mode == XFRM_MODE_TUNNEL) { |
432 | err = ipcomp_tunnel_attach(x); | 432 | err = ipcomp_tunnel_attach(x); |
433 | if (err) | 433 | if (err) |
434 | goto error_tunnel; | 434 | goto error_tunnel; |
diff --git a/net/ipv4/ipconfig.c b/net/ipv4/ipconfig.c index cb8a92f18ef6..1fbb38415b19 100644 --- a/net/ipv4/ipconfig.c +++ b/net/ipv4/ipconfig.c | |||
@@ -31,7 +31,6 @@ | |||
31 | * -- Josef Siemes <jsiemes@web.de>, Aug 2002 | 31 | * -- Josef Siemes <jsiemes@web.de>, Aug 2002 |
32 | */ | 32 | */ |
33 | 33 | ||
34 | #include <linux/config.h> | ||
35 | #include <linux/types.h> | 34 | #include <linux/types.h> |
36 | #include <linux/string.h> | 35 | #include <linux/string.h> |
37 | #include <linux/kernel.h> | 36 | #include <linux/kernel.h> |
diff --git a/net/ipv4/ipip.c b/net/ipv4/ipip.c index 76ab50b0d6ef..0c4556529228 100644 --- a/net/ipv4/ipip.c +++ b/net/ipv4/ipip.c | |||
@@ -341,7 +341,8 @@ out: | |||
341 | int code = skb->h.icmph->code; | 341 | int code = skb->h.icmph->code; |
342 | int rel_type = 0; | 342 | int rel_type = 0; |
343 | int rel_code = 0; | 343 | int rel_code = 0; |
344 | int rel_info = 0; | 344 | __be32 rel_info = 0; |
345 | __u32 n = 0; | ||
345 | struct sk_buff *skb2; | 346 | struct sk_buff *skb2; |
346 | struct flowi fl; | 347 | struct flowi fl; |
347 | struct rtable *rt; | 348 | struct rtable *rt; |
@@ -354,14 +355,15 @@ out: | |||
354 | default: | 355 | default: |
355 | return 0; | 356 | return 0; |
356 | case ICMP_PARAMETERPROB: | 357 | case ICMP_PARAMETERPROB: |
357 | if (skb->h.icmph->un.gateway < hlen) | 358 | n = ntohl(skb->h.icmph->un.gateway) >> 24; |
359 | if (n < hlen) | ||
358 | return 0; | 360 | return 0; |
359 | 361 | ||
360 | /* So... This guy found something strange INSIDE encapsulated | 362 | /* So... This guy found something strange INSIDE encapsulated |
361 | packet. Well, he is fool, but what can we do ? | 363 | packet. Well, he is fool, but what can we do ? |
362 | */ | 364 | */ |
363 | rel_type = ICMP_PARAMETERPROB; | 365 | rel_type = ICMP_PARAMETERPROB; |
364 | rel_info = skb->h.icmph->un.gateway - hlen; | 366 | rel_info = htonl((n - hlen) << 24); |
365 | break; | 367 | break; |
366 | 368 | ||
367 | case ICMP_DEST_UNREACH: | 369 | case ICMP_DEST_UNREACH: |
@@ -372,13 +374,14 @@ out: | |||
372 | return 0; | 374 | return 0; |
373 | case ICMP_FRAG_NEEDED: | 375 | case ICMP_FRAG_NEEDED: |
374 | /* And it is the only really necessary thing :-) */ | 376 | /* And it is the only really necessary thing :-) */ |
375 | rel_info = ntohs(skb->h.icmph->un.frag.mtu); | 377 | n = ntohs(skb->h.icmph->un.frag.mtu); |
376 | if (rel_info < hlen+68) | 378 | if (n < hlen+68) |
377 | return 0; | 379 | return 0; |
378 | rel_info -= hlen; | 380 | n -= hlen; |
379 | /* BSD 4.2 MORE DOES NOT EXIST IN NATURE. */ | 381 | /* BSD 4.2 MORE DOES NOT EXIST IN NATURE. */ |
380 | if (rel_info > ntohs(eiph->tot_len)) | 382 | if (n > ntohs(eiph->tot_len)) |
381 | return 0; | 383 | return 0; |
384 | rel_info = htonl(n); | ||
382 | break; | 385 | break; |
383 | default: | 386 | default: |
384 | /* All others are translated to HOST_UNREACH. | 387 | /* All others are translated to HOST_UNREACH. |
@@ -440,12 +443,11 @@ out: | |||
440 | 443 | ||
441 | /* change mtu on this route */ | 444 | /* change mtu on this route */ |
442 | if (type == ICMP_DEST_UNREACH && code == ICMP_FRAG_NEEDED) { | 445 | if (type == ICMP_DEST_UNREACH && code == ICMP_FRAG_NEEDED) { |
443 | if (rel_info > dst_mtu(skb2->dst)) { | 446 | if (n > dst_mtu(skb2->dst)) { |
444 | kfree_skb(skb2); | 447 | kfree_skb(skb2); |
445 | return 0; | 448 | return 0; |
446 | } | 449 | } |
447 | skb2->dst->ops->update_pmtu(skb2->dst, rel_info); | 450 | skb2->dst->ops->update_pmtu(skb2->dst, n); |
448 | rel_info = htonl(rel_info); | ||
449 | } else if (type == ICMP_TIME_EXCEEDED) { | 451 | } else if (type == ICMP_TIME_EXCEEDED) { |
450 | struct ip_tunnel *t = netdev_priv(skb2->dev); | 452 | struct ip_tunnel *t = netdev_priv(skb2->dev); |
451 | if (t->parms.iph.ttl) { | 453 | if (t->parms.iph.ttl) { |
diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c index 85893eef6b16..ba49588da242 100644 --- a/net/ipv4/ipmr.c +++ b/net/ipv4/ipmr.c | |||
@@ -312,7 +312,8 @@ static void ipmr_destroy_unres(struct mfc_cache *c) | |||
312 | e = NLMSG_DATA(nlh); | 312 | e = NLMSG_DATA(nlh); |
313 | e->error = -ETIMEDOUT; | 313 | e->error = -ETIMEDOUT; |
314 | memset(&e->msg, 0, sizeof(e->msg)); | 314 | memset(&e->msg, 0, sizeof(e->msg)); |
315 | netlink_unicast(rtnl, skb, NETLINK_CB(skb).dst_pid, MSG_DONTWAIT); | 315 | |
316 | rtnl_unicast(skb, NETLINK_CB(skb).pid); | ||
316 | } else | 317 | } else |
317 | kfree_skb(skb); | 318 | kfree_skb(skb); |
318 | } | 319 | } |
@@ -512,7 +513,6 @@ static void ipmr_cache_resolve(struct mfc_cache *uc, struct mfc_cache *c) | |||
512 | 513 | ||
513 | while((skb=__skb_dequeue(&uc->mfc_un.unres.unresolved))) { | 514 | while((skb=__skb_dequeue(&uc->mfc_un.unres.unresolved))) { |
514 | if (skb->nh.iph->version == 0) { | 515 | if (skb->nh.iph->version == 0) { |
515 | int err; | ||
516 | struct nlmsghdr *nlh = (struct nlmsghdr *)skb_pull(skb, sizeof(struct iphdr)); | 516 | struct nlmsghdr *nlh = (struct nlmsghdr *)skb_pull(skb, sizeof(struct iphdr)); |
517 | 517 | ||
518 | if (ipmr_fill_mroute(skb, c, NLMSG_DATA(nlh)) > 0) { | 518 | if (ipmr_fill_mroute(skb, c, NLMSG_DATA(nlh)) > 0) { |
@@ -525,7 +525,8 @@ static void ipmr_cache_resolve(struct mfc_cache *uc, struct mfc_cache *c) | |||
525 | e->error = -EMSGSIZE; | 525 | e->error = -EMSGSIZE; |
526 | memset(&e->msg, 0, sizeof(e->msg)); | 526 | memset(&e->msg, 0, sizeof(e->msg)); |
527 | } | 527 | } |
528 | err = netlink_unicast(rtnl, skb, NETLINK_CB(skb).dst_pid, MSG_DONTWAIT); | 528 | |
529 | rtnl_unicast(skb, NETLINK_CB(skb).pid); | ||
529 | } else | 530 | } else |
530 | ip_mr_forward(skb, c, 0); | 531 | ip_mr_forward(skb, c, 0); |
531 | } | 532 | } |
@@ -1899,11 +1900,8 @@ void __init ip_mr_init(void) | |||
1899 | { | 1900 | { |
1900 | mrt_cachep = kmem_cache_create("ip_mrt_cache", | 1901 | mrt_cachep = kmem_cache_create("ip_mrt_cache", |
1901 | sizeof(struct mfc_cache), | 1902 | sizeof(struct mfc_cache), |
1902 | 0, SLAB_HWCACHE_ALIGN, | 1903 | 0, SLAB_HWCACHE_ALIGN|SLAB_PANIC, |
1903 | NULL, NULL); | 1904 | NULL, NULL); |
1904 | if (!mrt_cachep) | ||
1905 | panic("cannot allocate ip_mrt_cache"); | ||
1906 | |||
1907 | init_timer(&ipmr_expire_timer); | 1905 | init_timer(&ipmr_expire_timer); |
1908 | ipmr_expire_timer.function=ipmr_expire_process; | 1906 | ipmr_expire_timer.function=ipmr_expire_process; |
1909 | register_netdevice_notifier(&ip_mr_notifier); | 1907 | register_netdevice_notifier(&ip_mr_notifier); |
diff --git a/net/ipv4/ipvs/ip_vs_proto_tcp.c b/net/ipv4/ipvs/ip_vs_proto_tcp.c index bc28b1160a3a..820e8318d10d 100644 --- a/net/ipv4/ipvs/ip_vs_proto_tcp.c +++ b/net/ipv4/ipvs/ip_vs_proto_tcp.c | |||
@@ -151,7 +151,7 @@ tcp_snat_handler(struct sk_buff **pskb, | |||
151 | /* Only port and addr are changed, do fast csum update */ | 151 | /* Only port and addr are changed, do fast csum update */ |
152 | tcp_fast_csum_update(tcph, cp->daddr, cp->vaddr, | 152 | tcp_fast_csum_update(tcph, cp->daddr, cp->vaddr, |
153 | cp->dport, cp->vport); | 153 | cp->dport, cp->vport); |
154 | if ((*pskb)->ip_summed == CHECKSUM_HW) | 154 | if ((*pskb)->ip_summed == CHECKSUM_COMPLETE) |
155 | (*pskb)->ip_summed = CHECKSUM_NONE; | 155 | (*pskb)->ip_summed = CHECKSUM_NONE; |
156 | } else { | 156 | } else { |
157 | /* full checksum calculation */ | 157 | /* full checksum calculation */ |
@@ -204,7 +204,7 @@ tcp_dnat_handler(struct sk_buff **pskb, | |||
204 | /* Only port and addr are changed, do fast csum update */ | 204 | /* Only port and addr are changed, do fast csum update */ |
205 | tcp_fast_csum_update(tcph, cp->vaddr, cp->daddr, | 205 | tcp_fast_csum_update(tcph, cp->vaddr, cp->daddr, |
206 | cp->vport, cp->dport); | 206 | cp->vport, cp->dport); |
207 | if ((*pskb)->ip_summed == CHECKSUM_HW) | 207 | if ((*pskb)->ip_summed == CHECKSUM_COMPLETE) |
208 | (*pskb)->ip_summed = CHECKSUM_NONE; | 208 | (*pskb)->ip_summed = CHECKSUM_NONE; |
209 | } else { | 209 | } else { |
210 | /* full checksum calculation */ | 210 | /* full checksum calculation */ |
@@ -229,7 +229,7 @@ tcp_csum_check(struct sk_buff *skb, struct ip_vs_protocol *pp) | |||
229 | switch (skb->ip_summed) { | 229 | switch (skb->ip_summed) { |
230 | case CHECKSUM_NONE: | 230 | case CHECKSUM_NONE: |
231 | skb->csum = skb_checksum(skb, tcphoff, skb->len - tcphoff, 0); | 231 | skb->csum = skb_checksum(skb, tcphoff, skb->len - tcphoff, 0); |
232 | case CHECKSUM_HW: | 232 | case CHECKSUM_COMPLETE: |
233 | if (csum_tcpudp_magic(skb->nh.iph->saddr, skb->nh.iph->daddr, | 233 | if (csum_tcpudp_magic(skb->nh.iph->saddr, skb->nh.iph->daddr, |
234 | skb->len - tcphoff, | 234 | skb->len - tcphoff, |
235 | skb->nh.iph->protocol, skb->csum)) { | 235 | skb->nh.iph->protocol, skb->csum)) { |
@@ -239,7 +239,7 @@ tcp_csum_check(struct sk_buff *skb, struct ip_vs_protocol *pp) | |||
239 | } | 239 | } |
240 | break; | 240 | break; |
241 | default: | 241 | default: |
242 | /* CHECKSUM_UNNECESSARY */ | 242 | /* No need to checksum. */ |
243 | break; | 243 | break; |
244 | } | 244 | } |
245 | 245 | ||
diff --git a/net/ipv4/ipvs/ip_vs_proto_udp.c b/net/ipv4/ipvs/ip_vs_proto_udp.c index 89d9175d8f28..90c8166c0ec1 100644 --- a/net/ipv4/ipvs/ip_vs_proto_udp.c +++ b/net/ipv4/ipvs/ip_vs_proto_udp.c | |||
@@ -161,7 +161,7 @@ udp_snat_handler(struct sk_buff **pskb, | |||
161 | /* Only port and addr are changed, do fast csum update */ | 161 | /* Only port and addr are changed, do fast csum update */ |
162 | udp_fast_csum_update(udph, cp->daddr, cp->vaddr, | 162 | udp_fast_csum_update(udph, cp->daddr, cp->vaddr, |
163 | cp->dport, cp->vport); | 163 | cp->dport, cp->vport); |
164 | if ((*pskb)->ip_summed == CHECKSUM_HW) | 164 | if ((*pskb)->ip_summed == CHECKSUM_COMPLETE) |
165 | (*pskb)->ip_summed = CHECKSUM_NONE; | 165 | (*pskb)->ip_summed = CHECKSUM_NONE; |
166 | } else { | 166 | } else { |
167 | /* full checksum calculation */ | 167 | /* full checksum calculation */ |
@@ -216,7 +216,7 @@ udp_dnat_handler(struct sk_buff **pskb, | |||
216 | /* Only port and addr are changed, do fast csum update */ | 216 | /* Only port and addr are changed, do fast csum update */ |
217 | udp_fast_csum_update(udph, cp->vaddr, cp->daddr, | 217 | udp_fast_csum_update(udph, cp->vaddr, cp->daddr, |
218 | cp->vport, cp->dport); | 218 | cp->vport, cp->dport); |
219 | if ((*pskb)->ip_summed == CHECKSUM_HW) | 219 | if ((*pskb)->ip_summed == CHECKSUM_COMPLETE) |
220 | (*pskb)->ip_summed = CHECKSUM_NONE; | 220 | (*pskb)->ip_summed = CHECKSUM_NONE; |
221 | } else { | 221 | } else { |
222 | /* full checksum calculation */ | 222 | /* full checksum calculation */ |
@@ -250,7 +250,7 @@ udp_csum_check(struct sk_buff *skb, struct ip_vs_protocol *pp) | |||
250 | case CHECKSUM_NONE: | 250 | case CHECKSUM_NONE: |
251 | skb->csum = skb_checksum(skb, udphoff, | 251 | skb->csum = skb_checksum(skb, udphoff, |
252 | skb->len - udphoff, 0); | 252 | skb->len - udphoff, 0); |
253 | case CHECKSUM_HW: | 253 | case CHECKSUM_COMPLETE: |
254 | if (csum_tcpudp_magic(skb->nh.iph->saddr, | 254 | if (csum_tcpudp_magic(skb->nh.iph->saddr, |
255 | skb->nh.iph->daddr, | 255 | skb->nh.iph->daddr, |
256 | skb->len - udphoff, | 256 | skb->len - udphoff, |
@@ -262,7 +262,7 @@ udp_csum_check(struct sk_buff *skb, struct ip_vs_protocol *pp) | |||
262 | } | 262 | } |
263 | break; | 263 | break; |
264 | default: | 264 | default: |
265 | /* CHECKSUM_UNNECESSARY */ | 265 | /* No need to checksum. */ |
266 | break; | 266 | break; |
267 | } | 267 | } |
268 | } | 268 | } |
diff --git a/net/ipv4/netfilter.c b/net/ipv4/netfilter.c index 6a9e34b794bc..f88347de21a9 100644 --- a/net/ipv4/netfilter.c +++ b/net/ipv4/netfilter.c | |||
@@ -168,7 +168,7 @@ unsigned int nf_ip_checksum(struct sk_buff *skb, unsigned int hook, | |||
168 | unsigned int csum = 0; | 168 | unsigned int csum = 0; |
169 | 169 | ||
170 | switch (skb->ip_summed) { | 170 | switch (skb->ip_summed) { |
171 | case CHECKSUM_HW: | 171 | case CHECKSUM_COMPLETE: |
172 | if (hook != NF_IP_PRE_ROUTING && hook != NF_IP_LOCAL_IN) | 172 | if (hook != NF_IP_PRE_ROUTING && hook != NF_IP_LOCAL_IN) |
173 | break; | 173 | break; |
174 | if ((protocol == 0 && !(u16)csum_fold(skb->csum)) || | 174 | if ((protocol == 0 && !(u16)csum_fold(skb->csum)) || |
diff --git a/net/ipv4/netfilter/Kconfig b/net/ipv4/netfilter/Kconfig index ef0b5aac5838..a55b8ff70ded 100644 --- a/net/ipv4/netfilter/Kconfig +++ b/net/ipv4/netfilter/Kconfig | |||
@@ -278,17 +278,6 @@ config IP_NF_MATCH_ECN | |||
278 | 278 | ||
279 | To compile it as a module, choose M here. If unsure, say N. | 279 | To compile it as a module, choose M here. If unsure, say N. |
280 | 280 | ||
281 | config IP_NF_MATCH_DSCP | ||
282 | tristate "DSCP match support" | ||
283 | depends on IP_NF_IPTABLES | ||
284 | help | ||
285 | This option adds a `DSCP' match, which allows you to match against | ||
286 | the IPv4 header DSCP field (DSCP codepoint). | ||
287 | |||
288 | The DSCP codepoint can have any value between 0x0 and 0x4f. | ||
289 | |||
290 | To compile it as a module, choose M here. If unsure, say N. | ||
291 | |||
292 | config IP_NF_MATCH_AH | 281 | config IP_NF_MATCH_AH |
293 | tristate "AH match support" | 282 | tristate "AH match support" |
294 | depends on IP_NF_IPTABLES | 283 | depends on IP_NF_IPTABLES |
@@ -568,17 +557,6 @@ config IP_NF_TARGET_ECN | |||
568 | 557 | ||
569 | To compile it as a module, choose M here. If unsure, say N. | 558 | To compile it as a module, choose M here. If unsure, say N. |
570 | 559 | ||
571 | config IP_NF_TARGET_DSCP | ||
572 | tristate "DSCP target support" | ||
573 | depends on IP_NF_MANGLE | ||
574 | help | ||
575 | This option adds a `DSCP' match, which allows you to match against | ||
576 | the IPv4 header DSCP field (DSCP codepoint). | ||
577 | |||
578 | The DSCP codepoint can have any value between 0x0 and 0x4f. | ||
579 | |||
580 | To compile it as a module, choose M here. If unsure, say N. | ||
581 | |||
582 | config IP_NF_TARGET_TTL | 560 | config IP_NF_TARGET_TTL |
583 | tristate 'TTL target support' | 561 | tristate 'TTL target support' |
584 | depends on IP_NF_MANGLE | 562 | depends on IP_NF_MANGLE |
diff --git a/net/ipv4/netfilter/Makefile b/net/ipv4/netfilter/Makefile index 3ded4a3af59c..09aaed1a8063 100644 --- a/net/ipv4/netfilter/Makefile +++ b/net/ipv4/netfilter/Makefile | |||
@@ -59,7 +59,6 @@ obj-$(CONFIG_IP_NF_MATCH_OWNER) += ipt_owner.o | |||
59 | obj-$(CONFIG_IP_NF_MATCH_TOS) += ipt_tos.o | 59 | obj-$(CONFIG_IP_NF_MATCH_TOS) += ipt_tos.o |
60 | obj-$(CONFIG_IP_NF_MATCH_RECENT) += ipt_recent.o | 60 | obj-$(CONFIG_IP_NF_MATCH_RECENT) += ipt_recent.o |
61 | obj-$(CONFIG_IP_NF_MATCH_ECN) += ipt_ecn.o | 61 | obj-$(CONFIG_IP_NF_MATCH_ECN) += ipt_ecn.o |
62 | obj-$(CONFIG_IP_NF_MATCH_DSCP) += ipt_dscp.o | ||
63 | obj-$(CONFIG_IP_NF_MATCH_AH) += ipt_ah.o | 62 | obj-$(CONFIG_IP_NF_MATCH_AH) += ipt_ah.o |
64 | obj-$(CONFIG_IP_NF_MATCH_TTL) += ipt_ttl.o | 63 | obj-$(CONFIG_IP_NF_MATCH_TTL) += ipt_ttl.o |
65 | obj-$(CONFIG_IP_NF_MATCH_ADDRTYPE) += ipt_addrtype.o | 64 | obj-$(CONFIG_IP_NF_MATCH_ADDRTYPE) += ipt_addrtype.o |
@@ -68,7 +67,6 @@ obj-$(CONFIG_IP_NF_MATCH_ADDRTYPE) += ipt_addrtype.o | |||
68 | obj-$(CONFIG_IP_NF_TARGET_REJECT) += ipt_REJECT.o | 67 | obj-$(CONFIG_IP_NF_TARGET_REJECT) += ipt_REJECT.o |
69 | obj-$(CONFIG_IP_NF_TARGET_TOS) += ipt_TOS.o | 68 | obj-$(CONFIG_IP_NF_TARGET_TOS) += ipt_TOS.o |
70 | obj-$(CONFIG_IP_NF_TARGET_ECN) += ipt_ECN.o | 69 | obj-$(CONFIG_IP_NF_TARGET_ECN) += ipt_ECN.o |
71 | obj-$(CONFIG_IP_NF_TARGET_DSCP) += ipt_DSCP.o | ||
72 | obj-$(CONFIG_IP_NF_TARGET_MASQUERADE) += ipt_MASQUERADE.o | 70 | obj-$(CONFIG_IP_NF_TARGET_MASQUERADE) += ipt_MASQUERADE.o |
73 | obj-$(CONFIG_IP_NF_TARGET_REDIRECT) += ipt_REDIRECT.o | 71 | obj-$(CONFIG_IP_NF_TARGET_REDIRECT) += ipt_REDIRECT.o |
74 | obj-$(CONFIG_IP_NF_TARGET_NETMAP) += ipt_NETMAP.o | 72 | obj-$(CONFIG_IP_NF_TARGET_NETMAP) += ipt_NETMAP.o |
diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c index 8d1d7a6e72a5..85f0d73ebfb4 100644 --- a/net/ipv4/netfilter/arp_tables.c +++ b/net/ipv4/netfilter/arp_tables.c | |||
@@ -56,8 +56,6 @@ do { \ | |||
56 | #define ARP_NF_ASSERT(x) | 56 | #define ARP_NF_ASSERT(x) |
57 | #endif | 57 | #endif |
58 | 58 | ||
59 | #include <linux/netfilter_ipv4/listhelp.h> | ||
60 | |||
61 | static inline int arp_devaddr_compare(const struct arpt_devaddr_info *ap, | 59 | static inline int arp_devaddr_compare(const struct arpt_devaddr_info *ap, |
62 | char *hdr_addr, int len) | 60 | char *hdr_addr, int len) |
63 | { | 61 | { |
@@ -208,8 +206,7 @@ static unsigned int arpt_error(struct sk_buff **pskb, | |||
208 | const struct net_device *out, | 206 | const struct net_device *out, |
209 | unsigned int hooknum, | 207 | unsigned int hooknum, |
210 | const struct xt_target *target, | 208 | const struct xt_target *target, |
211 | const void *targinfo, | 209 | const void *targinfo) |
212 | void *userinfo) | ||
213 | { | 210 | { |
214 | if (net_ratelimit()) | 211 | if (net_ratelimit()) |
215 | printk("arp_tables: error: '%s'\n", (char *)targinfo); | 212 | printk("arp_tables: error: '%s'\n", (char *)targinfo); |
@@ -226,8 +223,7 @@ unsigned int arpt_do_table(struct sk_buff **pskb, | |||
226 | unsigned int hook, | 223 | unsigned int hook, |
227 | const struct net_device *in, | 224 | const struct net_device *in, |
228 | const struct net_device *out, | 225 | const struct net_device *out, |
229 | struct arpt_table *table, | 226 | struct arpt_table *table) |
230 | void *userdata) | ||
231 | { | 227 | { |
232 | static const char nulldevname[IFNAMSIZ]; | 228 | static const char nulldevname[IFNAMSIZ]; |
233 | unsigned int verdict = NF_DROP; | 229 | unsigned int verdict = NF_DROP; |
@@ -302,8 +298,7 @@ unsigned int arpt_do_table(struct sk_buff **pskb, | |||
302 | in, out, | 298 | in, out, |
303 | hook, | 299 | hook, |
304 | t->u.kernel.target, | 300 | t->u.kernel.target, |
305 | t->data, | 301 | t->data); |
306 | userdata); | ||
307 | 302 | ||
308 | /* Target might have changed stuff. */ | 303 | /* Target might have changed stuff. */ |
309 | arp = (*pskb)->nh.arph; | 304 | arp = (*pskb)->nh.arph; |
@@ -490,12 +485,10 @@ static inline int check_entry(struct arpt_entry *e, const char *name, unsigned i | |||
490 | if (t->u.kernel.target == &arpt_standard_target) { | 485 | if (t->u.kernel.target == &arpt_standard_target) { |
491 | if (!standard_check(t, size)) { | 486 | if (!standard_check(t, size)) { |
492 | ret = -EINVAL; | 487 | ret = -EINVAL; |
493 | goto out; | 488 | goto err; |
494 | } | 489 | } |
495 | } else if (t->u.kernel.target->checkentry | 490 | } else if (t->u.kernel.target->checkentry |
496 | && !t->u.kernel.target->checkentry(name, e, target, t->data, | 491 | && !t->u.kernel.target->checkentry(name, e, target, t->data, |
497 | t->u.target_size | ||
498 | - sizeof(*t), | ||
499 | e->comefrom)) { | 492 | e->comefrom)) { |
500 | duprintf("arp_tables: check failed for `%s'.\n", | 493 | duprintf("arp_tables: check failed for `%s'.\n", |
501 | t->u.kernel.target->name); | 494 | t->u.kernel.target->name); |
@@ -562,8 +555,7 @@ static inline int cleanup_entry(struct arpt_entry *e, unsigned int *i) | |||
562 | 555 | ||
563 | t = arpt_get_target(e); | 556 | t = arpt_get_target(e); |
564 | if (t->u.kernel.target->destroy) | 557 | if (t->u.kernel.target->destroy) |
565 | t->u.kernel.target->destroy(t->u.kernel.target, t->data, | 558 | t->u.kernel.target->destroy(t->u.kernel.target, t->data); |
566 | t->u.target_size - sizeof(*t)); | ||
567 | module_put(t->u.kernel.target->me); | 559 | module_put(t->u.kernel.target->me); |
568 | return 0; | 560 | return 0; |
569 | } | 561 | } |
diff --git a/net/ipv4/netfilter/arpt_mangle.c b/net/ipv4/netfilter/arpt_mangle.c index a58325c1ceb9..d12b1df252a1 100644 --- a/net/ipv4/netfilter/arpt_mangle.c +++ b/net/ipv4/netfilter/arpt_mangle.c | |||
@@ -11,7 +11,7 @@ static unsigned int | |||
11 | target(struct sk_buff **pskb, | 11 | target(struct sk_buff **pskb, |
12 | const struct net_device *in, const struct net_device *out, | 12 | const struct net_device *in, const struct net_device *out, |
13 | unsigned int hooknum, const struct xt_target *target, | 13 | unsigned int hooknum, const struct xt_target *target, |
14 | const void *targinfo, void *userinfo) | 14 | const void *targinfo) |
15 | { | 15 | { |
16 | const struct arpt_mangle *mangle = targinfo; | 16 | const struct arpt_mangle *mangle = targinfo; |
17 | struct arphdr *arp; | 17 | struct arphdr *arp; |
@@ -67,7 +67,7 @@ target(struct sk_buff **pskb, | |||
67 | 67 | ||
68 | static int | 68 | static int |
69 | checkentry(const char *tablename, const void *e, const struct xt_target *target, | 69 | checkentry(const char *tablename, const void *e, const struct xt_target *target, |
70 | void *targinfo, unsigned int targinfosize, unsigned int hook_mask) | 70 | void *targinfo, unsigned int hook_mask) |
71 | { | 71 | { |
72 | const struct arpt_mangle *mangle = targinfo; | 72 | const struct arpt_mangle *mangle = targinfo; |
73 | 73 | ||
diff --git a/net/ipv4/netfilter/arptable_filter.c b/net/ipv4/netfilter/arptable_filter.c index d7c472faa53b..7edea2a1696c 100644 --- a/net/ipv4/netfilter/arptable_filter.c +++ b/net/ipv4/netfilter/arptable_filter.c | |||
@@ -155,7 +155,7 @@ static unsigned int arpt_hook(unsigned int hook, | |||
155 | const struct net_device *out, | 155 | const struct net_device *out, |
156 | int (*okfn)(struct sk_buff *)) | 156 | int (*okfn)(struct sk_buff *)) |
157 | { | 157 | { |
158 | return arpt_do_table(pskb, hook, in, out, &packet_filter, NULL); | 158 | return arpt_do_table(pskb, hook, in, out, &packet_filter); |
159 | } | 159 | } |
160 | 160 | ||
161 | static struct nf_hook_ops arpt_ops[] = { | 161 | static struct nf_hook_ops arpt_ops[] = { |
diff --git a/net/ipv4/netfilter/ip_conntrack_core.c b/net/ipv4/netfilter/ip_conntrack_core.c index aa459177c3f8..c432b3163609 100644 --- a/net/ipv4/netfilter/ip_conntrack_core.c +++ b/net/ipv4/netfilter/ip_conntrack_core.c | |||
@@ -47,7 +47,6 @@ | |||
47 | #include <linux/netfilter_ipv4/ip_conntrack_protocol.h> | 47 | #include <linux/netfilter_ipv4/ip_conntrack_protocol.h> |
48 | #include <linux/netfilter_ipv4/ip_conntrack_helper.h> | 48 | #include <linux/netfilter_ipv4/ip_conntrack_helper.h> |
49 | #include <linux/netfilter_ipv4/ip_conntrack_core.h> | 49 | #include <linux/netfilter_ipv4/ip_conntrack_core.h> |
50 | #include <linux/netfilter_ipv4/listhelp.h> | ||
51 | 50 | ||
52 | #define IP_CONNTRACK_VERSION "2.4" | 51 | #define IP_CONNTRACK_VERSION "2.4" |
53 | 52 | ||
@@ -64,17 +63,17 @@ atomic_t ip_conntrack_count = ATOMIC_INIT(0); | |||
64 | 63 | ||
65 | void (*ip_conntrack_destroyed)(struct ip_conntrack *conntrack) = NULL; | 64 | void (*ip_conntrack_destroyed)(struct ip_conntrack *conntrack) = NULL; |
66 | LIST_HEAD(ip_conntrack_expect_list); | 65 | LIST_HEAD(ip_conntrack_expect_list); |
67 | struct ip_conntrack_protocol *ip_ct_protos[MAX_IP_CT_PROTO]; | 66 | struct ip_conntrack_protocol *ip_ct_protos[MAX_IP_CT_PROTO] __read_mostly; |
68 | static LIST_HEAD(helpers); | 67 | static LIST_HEAD(helpers); |
69 | unsigned int ip_conntrack_htable_size = 0; | 68 | unsigned int ip_conntrack_htable_size __read_mostly = 0; |
70 | int ip_conntrack_max; | 69 | int ip_conntrack_max __read_mostly; |
71 | struct list_head *ip_conntrack_hash; | 70 | struct list_head *ip_conntrack_hash __read_mostly; |
72 | static kmem_cache_t *ip_conntrack_cachep __read_mostly; | 71 | static kmem_cache_t *ip_conntrack_cachep __read_mostly; |
73 | static kmem_cache_t *ip_conntrack_expect_cachep __read_mostly; | 72 | static kmem_cache_t *ip_conntrack_expect_cachep __read_mostly; |
74 | struct ip_conntrack ip_conntrack_untracked; | 73 | struct ip_conntrack ip_conntrack_untracked; |
75 | unsigned int ip_ct_log_invalid; | 74 | unsigned int ip_ct_log_invalid __read_mostly; |
76 | static LIST_HEAD(unconfirmed); | 75 | static LIST_HEAD(unconfirmed); |
77 | static int ip_conntrack_vmalloc; | 76 | static int ip_conntrack_vmalloc __read_mostly; |
78 | 77 | ||
79 | static unsigned int ip_conntrack_next_id; | 78 | static unsigned int ip_conntrack_next_id; |
80 | static unsigned int ip_conntrack_expect_next_id; | 79 | static unsigned int ip_conntrack_expect_next_id; |
@@ -294,15 +293,10 @@ void ip_ct_remove_expectations(struct ip_conntrack *ct) | |||
294 | static void | 293 | static void |
295 | clean_from_lists(struct ip_conntrack *ct) | 294 | clean_from_lists(struct ip_conntrack *ct) |
296 | { | 295 | { |
297 | unsigned int ho, hr; | ||
298 | |||
299 | DEBUGP("clean_from_lists(%p)\n", ct); | 296 | DEBUGP("clean_from_lists(%p)\n", ct); |
300 | ASSERT_WRITE_LOCK(&ip_conntrack_lock); | 297 | ASSERT_WRITE_LOCK(&ip_conntrack_lock); |
301 | 298 | list_del(&ct->tuplehash[IP_CT_DIR_ORIGINAL].list); | |
302 | ho = hash_conntrack(&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple); | 299 | list_del(&ct->tuplehash[IP_CT_DIR_REPLY].list); |
303 | hr = hash_conntrack(&ct->tuplehash[IP_CT_DIR_REPLY].tuple); | ||
304 | LIST_DELETE(&ip_conntrack_hash[ho], &ct->tuplehash[IP_CT_DIR_ORIGINAL]); | ||
305 | LIST_DELETE(&ip_conntrack_hash[hr], &ct->tuplehash[IP_CT_DIR_REPLY]); | ||
306 | 300 | ||
307 | /* Destroy all pending expectations */ | 301 | /* Destroy all pending expectations */ |
308 | ip_ct_remove_expectations(ct); | 302 | ip_ct_remove_expectations(ct); |
@@ -313,6 +307,7 @@ destroy_conntrack(struct nf_conntrack *nfct) | |||
313 | { | 307 | { |
314 | struct ip_conntrack *ct = (struct ip_conntrack *)nfct; | 308 | struct ip_conntrack *ct = (struct ip_conntrack *)nfct; |
315 | struct ip_conntrack_protocol *proto; | 309 | struct ip_conntrack_protocol *proto; |
310 | struct ip_conntrack_helper *helper; | ||
316 | 311 | ||
317 | DEBUGP("destroy_conntrack(%p)\n", ct); | 312 | DEBUGP("destroy_conntrack(%p)\n", ct); |
318 | IP_NF_ASSERT(atomic_read(&nfct->use) == 0); | 313 | IP_NF_ASSERT(atomic_read(&nfct->use) == 0); |
@@ -321,6 +316,10 @@ destroy_conntrack(struct nf_conntrack *nfct) | |||
321 | ip_conntrack_event(IPCT_DESTROY, ct); | 316 | ip_conntrack_event(IPCT_DESTROY, ct); |
322 | set_bit(IPS_DYING_BIT, &ct->status); | 317 | set_bit(IPS_DYING_BIT, &ct->status); |
323 | 318 | ||
319 | helper = ct->helper; | ||
320 | if (helper && helper->destroy) | ||
321 | helper->destroy(ct); | ||
322 | |||
324 | /* To make sure we don't get any weird locking issues here: | 323 | /* To make sure we don't get any weird locking issues here: |
325 | * destroy_conntrack() MUST NOT be called with a write lock | 324 | * destroy_conntrack() MUST NOT be called with a write lock |
326 | * to ip_conntrack_lock!!! -HW */ | 325 | * to ip_conntrack_lock!!! -HW */ |
@@ -367,16 +366,6 @@ static void death_by_timeout(unsigned long ul_conntrack) | |||
367 | ip_conntrack_put(ct); | 366 | ip_conntrack_put(ct); |
368 | } | 367 | } |
369 | 368 | ||
370 | static inline int | ||
371 | conntrack_tuple_cmp(const struct ip_conntrack_tuple_hash *i, | ||
372 | const struct ip_conntrack_tuple *tuple, | ||
373 | const struct ip_conntrack *ignored_conntrack) | ||
374 | { | ||
375 | ASSERT_READ_LOCK(&ip_conntrack_lock); | ||
376 | return tuplehash_to_ctrack(i) != ignored_conntrack | ||
377 | && ip_ct_tuple_equal(tuple, &i->tuple); | ||
378 | } | ||
379 | |||
380 | struct ip_conntrack_tuple_hash * | 369 | struct ip_conntrack_tuple_hash * |
381 | __ip_conntrack_find(const struct ip_conntrack_tuple *tuple, | 370 | __ip_conntrack_find(const struct ip_conntrack_tuple *tuple, |
382 | const struct ip_conntrack *ignored_conntrack) | 371 | const struct ip_conntrack *ignored_conntrack) |
@@ -386,7 +375,8 @@ __ip_conntrack_find(const struct ip_conntrack_tuple *tuple, | |||
386 | 375 | ||
387 | ASSERT_READ_LOCK(&ip_conntrack_lock); | 376 | ASSERT_READ_LOCK(&ip_conntrack_lock); |
388 | list_for_each_entry(h, &ip_conntrack_hash[hash], list) { | 377 | list_for_each_entry(h, &ip_conntrack_hash[hash], list) { |
389 | if (conntrack_tuple_cmp(h, tuple, ignored_conntrack)) { | 378 | if (tuplehash_to_ctrack(h) != ignored_conntrack && |
379 | ip_ct_tuple_equal(tuple, &h->tuple)) { | ||
390 | CONNTRACK_STAT_INC(found); | 380 | CONNTRACK_STAT_INC(found); |
391 | return h; | 381 | return h; |
392 | } | 382 | } |
@@ -417,10 +407,10 @@ static void __ip_conntrack_hash_insert(struct ip_conntrack *ct, | |||
417 | unsigned int repl_hash) | 407 | unsigned int repl_hash) |
418 | { | 408 | { |
419 | ct->id = ++ip_conntrack_next_id; | 409 | ct->id = ++ip_conntrack_next_id; |
420 | list_prepend(&ip_conntrack_hash[hash], | 410 | list_add(&ct->tuplehash[IP_CT_DIR_ORIGINAL].list, |
421 | &ct->tuplehash[IP_CT_DIR_ORIGINAL].list); | 411 | &ip_conntrack_hash[hash]); |
422 | list_prepend(&ip_conntrack_hash[repl_hash], | 412 | list_add(&ct->tuplehash[IP_CT_DIR_REPLY].list, |
423 | &ct->tuplehash[IP_CT_DIR_REPLY].list); | 413 | &ip_conntrack_hash[repl_hash]); |
424 | } | 414 | } |
425 | 415 | ||
426 | void ip_conntrack_hash_insert(struct ip_conntrack *ct) | 416 | void ip_conntrack_hash_insert(struct ip_conntrack *ct) |
@@ -440,6 +430,7 @@ int | |||
440 | __ip_conntrack_confirm(struct sk_buff **pskb) | 430 | __ip_conntrack_confirm(struct sk_buff **pskb) |
441 | { | 431 | { |
442 | unsigned int hash, repl_hash; | 432 | unsigned int hash, repl_hash; |
433 | struct ip_conntrack_tuple_hash *h; | ||
443 | struct ip_conntrack *ct; | 434 | struct ip_conntrack *ct; |
444 | enum ip_conntrack_info ctinfo; | 435 | enum ip_conntrack_info ctinfo; |
445 | 436 | ||
@@ -470,43 +461,43 @@ __ip_conntrack_confirm(struct sk_buff **pskb) | |||
470 | /* See if there's one in the list already, including reverse: | 461 | /* See if there's one in the list already, including reverse: |
471 | NAT could have grabbed it without realizing, since we're | 462 | NAT could have grabbed it without realizing, since we're |
472 | not in the hash. If there is, we lost race. */ | 463 | not in the hash. If there is, we lost race. */ |
473 | if (!LIST_FIND(&ip_conntrack_hash[hash], | 464 | list_for_each_entry(h, &ip_conntrack_hash[hash], list) |
474 | conntrack_tuple_cmp, | 465 | if (ip_ct_tuple_equal(&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple, |
475 | struct ip_conntrack_tuple_hash *, | 466 | &h->tuple)) |
476 | &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple, NULL) | 467 | goto out; |
477 | && !LIST_FIND(&ip_conntrack_hash[repl_hash], | 468 | list_for_each_entry(h, &ip_conntrack_hash[repl_hash], list) |
478 | conntrack_tuple_cmp, | 469 | if (ip_ct_tuple_equal(&ct->tuplehash[IP_CT_DIR_REPLY].tuple, |
479 | struct ip_conntrack_tuple_hash *, | 470 | &h->tuple)) |
480 | &ct->tuplehash[IP_CT_DIR_REPLY].tuple, NULL)) { | 471 | goto out; |
481 | /* Remove from unconfirmed list */ | ||
482 | list_del(&ct->tuplehash[IP_CT_DIR_ORIGINAL].list); | ||
483 | 472 | ||
484 | __ip_conntrack_hash_insert(ct, hash, repl_hash); | 473 | /* Remove from unconfirmed list */ |
485 | /* Timer relative to confirmation time, not original | 474 | list_del(&ct->tuplehash[IP_CT_DIR_ORIGINAL].list); |
486 | setting time, otherwise we'd get timer wrap in | 475 | |
487 | weird delay cases. */ | 476 | __ip_conntrack_hash_insert(ct, hash, repl_hash); |
488 | ct->timeout.expires += jiffies; | 477 | /* Timer relative to confirmation time, not original |
489 | add_timer(&ct->timeout); | 478 | setting time, otherwise we'd get timer wrap in |
490 | atomic_inc(&ct->ct_general.use); | 479 | weird delay cases. */ |
491 | set_bit(IPS_CONFIRMED_BIT, &ct->status); | 480 | ct->timeout.expires += jiffies; |
492 | CONNTRACK_STAT_INC(insert); | 481 | add_timer(&ct->timeout); |
493 | write_unlock_bh(&ip_conntrack_lock); | 482 | atomic_inc(&ct->ct_general.use); |
494 | if (ct->helper) | 483 | set_bit(IPS_CONFIRMED_BIT, &ct->status); |
495 | ip_conntrack_event_cache(IPCT_HELPER, *pskb); | 484 | CONNTRACK_STAT_INC(insert); |
485 | write_unlock_bh(&ip_conntrack_lock); | ||
486 | if (ct->helper) | ||
487 | ip_conntrack_event_cache(IPCT_HELPER, *pskb); | ||
496 | #ifdef CONFIG_IP_NF_NAT_NEEDED | 488 | #ifdef CONFIG_IP_NF_NAT_NEEDED |
497 | if (test_bit(IPS_SRC_NAT_DONE_BIT, &ct->status) || | 489 | if (test_bit(IPS_SRC_NAT_DONE_BIT, &ct->status) || |
498 | test_bit(IPS_DST_NAT_DONE_BIT, &ct->status)) | 490 | test_bit(IPS_DST_NAT_DONE_BIT, &ct->status)) |
499 | ip_conntrack_event_cache(IPCT_NATINFO, *pskb); | 491 | ip_conntrack_event_cache(IPCT_NATINFO, *pskb); |
500 | #endif | 492 | #endif |
501 | ip_conntrack_event_cache(master_ct(ct) ? | 493 | ip_conntrack_event_cache(master_ct(ct) ? |
502 | IPCT_RELATED : IPCT_NEW, *pskb); | 494 | IPCT_RELATED : IPCT_NEW, *pskb); |
503 | 495 | ||
504 | return NF_ACCEPT; | 496 | return NF_ACCEPT; |
505 | } | ||
506 | 497 | ||
498 | out: | ||
507 | CONNTRACK_STAT_INC(insert_failed); | 499 | CONNTRACK_STAT_INC(insert_failed); |
508 | write_unlock_bh(&ip_conntrack_lock); | 500 | write_unlock_bh(&ip_conntrack_lock); |
509 | |||
510 | return NF_DROP; | 501 | return NF_DROP; |
511 | } | 502 | } |
512 | 503 | ||
@@ -527,23 +518,21 @@ ip_conntrack_tuple_taken(const struct ip_conntrack_tuple *tuple, | |||
527 | 518 | ||
528 | /* There's a small race here where we may free a just-assured | 519 | /* There's a small race here where we may free a just-assured |
529 | connection. Too bad: we're in trouble anyway. */ | 520 | connection. Too bad: we're in trouble anyway. */ |
530 | static inline int unreplied(const struct ip_conntrack_tuple_hash *i) | ||
531 | { | ||
532 | return !(test_bit(IPS_ASSURED_BIT, &tuplehash_to_ctrack(i)->status)); | ||
533 | } | ||
534 | |||
535 | static int early_drop(struct list_head *chain) | 521 | static int early_drop(struct list_head *chain) |
536 | { | 522 | { |
537 | /* Traverse backwards: gives us oldest, which is roughly LRU */ | 523 | /* Traverse backwards: gives us oldest, which is roughly LRU */ |
538 | struct ip_conntrack_tuple_hash *h; | 524 | struct ip_conntrack_tuple_hash *h; |
539 | struct ip_conntrack *ct = NULL; | 525 | struct ip_conntrack *ct = NULL, *tmp; |
540 | int dropped = 0; | 526 | int dropped = 0; |
541 | 527 | ||
542 | read_lock_bh(&ip_conntrack_lock); | 528 | read_lock_bh(&ip_conntrack_lock); |
543 | h = LIST_FIND_B(chain, unreplied, struct ip_conntrack_tuple_hash *); | 529 | list_for_each_entry_reverse(h, chain, list) { |
544 | if (h) { | 530 | tmp = tuplehash_to_ctrack(h); |
545 | ct = tuplehash_to_ctrack(h); | 531 | if (!test_bit(IPS_ASSURED_BIT, &tmp->status)) { |
546 | atomic_inc(&ct->ct_general.use); | 532 | ct = tmp; |
533 | atomic_inc(&ct->ct_general.use); | ||
534 | break; | ||
535 | } | ||
547 | } | 536 | } |
548 | read_unlock_bh(&ip_conntrack_lock); | 537 | read_unlock_bh(&ip_conntrack_lock); |
549 | 538 | ||
@@ -559,18 +548,16 @@ static int early_drop(struct list_head *chain) | |||
559 | return dropped; | 548 | return dropped; |
560 | } | 549 | } |
561 | 550 | ||
562 | static inline int helper_cmp(const struct ip_conntrack_helper *i, | ||
563 | const struct ip_conntrack_tuple *rtuple) | ||
564 | { | ||
565 | return ip_ct_tuple_mask_cmp(rtuple, &i->tuple, &i->mask); | ||
566 | } | ||
567 | |||
568 | static struct ip_conntrack_helper * | 551 | static struct ip_conntrack_helper * |
569 | __ip_conntrack_helper_find( const struct ip_conntrack_tuple *tuple) | 552 | __ip_conntrack_helper_find( const struct ip_conntrack_tuple *tuple) |
570 | { | 553 | { |
571 | return LIST_FIND(&helpers, helper_cmp, | 554 | struct ip_conntrack_helper *h; |
572 | struct ip_conntrack_helper *, | 555 | |
573 | tuple); | 556 | list_for_each_entry(h, &helpers, list) { |
557 | if (ip_ct_tuple_mask_cmp(tuple, &h->tuple, &h->mask)) | ||
558 | return h; | ||
559 | } | ||
560 | return NULL; | ||
574 | } | 561 | } |
575 | 562 | ||
576 | struct ip_conntrack_helper * | 563 | struct ip_conntrack_helper * |
@@ -640,11 +627,15 @@ struct ip_conntrack *ip_conntrack_alloc(struct ip_conntrack_tuple *orig, | |||
640 | ip_conntrack_hash_rnd_initted = 1; | 627 | ip_conntrack_hash_rnd_initted = 1; |
641 | } | 628 | } |
642 | 629 | ||
630 | /* We don't want any race condition at early drop stage */ | ||
631 | atomic_inc(&ip_conntrack_count); | ||
632 | |||
643 | if (ip_conntrack_max | 633 | if (ip_conntrack_max |
644 | && atomic_read(&ip_conntrack_count) >= ip_conntrack_max) { | 634 | && atomic_read(&ip_conntrack_count) > ip_conntrack_max) { |
645 | unsigned int hash = hash_conntrack(orig); | 635 | unsigned int hash = hash_conntrack(orig); |
646 | /* Try dropping from this hash chain. */ | 636 | /* Try dropping from this hash chain. */ |
647 | if (!early_drop(&ip_conntrack_hash[hash])) { | 637 | if (!early_drop(&ip_conntrack_hash[hash])) { |
638 | atomic_dec(&ip_conntrack_count); | ||
648 | if (net_ratelimit()) | 639 | if (net_ratelimit()) |
649 | printk(KERN_WARNING | 640 | printk(KERN_WARNING |
650 | "ip_conntrack: table full, dropping" | 641 | "ip_conntrack: table full, dropping" |
@@ -656,6 +647,7 @@ struct ip_conntrack *ip_conntrack_alloc(struct ip_conntrack_tuple *orig, | |||
656 | conntrack = kmem_cache_alloc(ip_conntrack_cachep, GFP_ATOMIC); | 647 | conntrack = kmem_cache_alloc(ip_conntrack_cachep, GFP_ATOMIC); |
657 | if (!conntrack) { | 648 | if (!conntrack) { |
658 | DEBUGP("Can't allocate conntrack.\n"); | 649 | DEBUGP("Can't allocate conntrack.\n"); |
650 | atomic_dec(&ip_conntrack_count); | ||
659 | return ERR_PTR(-ENOMEM); | 651 | return ERR_PTR(-ENOMEM); |
660 | } | 652 | } |
661 | 653 | ||
@@ -669,8 +661,6 @@ struct ip_conntrack *ip_conntrack_alloc(struct ip_conntrack_tuple *orig, | |||
669 | conntrack->timeout.data = (unsigned long)conntrack; | 661 | conntrack->timeout.data = (unsigned long)conntrack; |
670 | conntrack->timeout.function = death_by_timeout; | 662 | conntrack->timeout.function = death_by_timeout; |
671 | 663 | ||
672 | atomic_inc(&ip_conntrack_count); | ||
673 | |||
674 | return conntrack; | 664 | return conntrack; |
675 | } | 665 | } |
676 | 666 | ||
@@ -1062,7 +1052,7 @@ int ip_conntrack_helper_register(struct ip_conntrack_helper *me) | |||
1062 | { | 1052 | { |
1063 | BUG_ON(me->timeout == 0); | 1053 | BUG_ON(me->timeout == 0); |
1064 | write_lock_bh(&ip_conntrack_lock); | 1054 | write_lock_bh(&ip_conntrack_lock); |
1065 | list_prepend(&helpers, me); | 1055 | list_add(&me->list, &helpers); |
1066 | write_unlock_bh(&ip_conntrack_lock); | 1056 | write_unlock_bh(&ip_conntrack_lock); |
1067 | 1057 | ||
1068 | return 0; | 1058 | return 0; |
@@ -1081,24 +1071,24 @@ __ip_conntrack_helper_find_byname(const char *name) | |||
1081 | return NULL; | 1071 | return NULL; |
1082 | } | 1072 | } |
1083 | 1073 | ||
1084 | static inline int unhelp(struct ip_conntrack_tuple_hash *i, | 1074 | static inline void unhelp(struct ip_conntrack_tuple_hash *i, |
1085 | const struct ip_conntrack_helper *me) | 1075 | const struct ip_conntrack_helper *me) |
1086 | { | 1076 | { |
1087 | if (tuplehash_to_ctrack(i)->helper == me) { | 1077 | if (tuplehash_to_ctrack(i)->helper == me) { |
1088 | ip_conntrack_event(IPCT_HELPER, tuplehash_to_ctrack(i)); | 1078 | ip_conntrack_event(IPCT_HELPER, tuplehash_to_ctrack(i)); |
1089 | tuplehash_to_ctrack(i)->helper = NULL; | 1079 | tuplehash_to_ctrack(i)->helper = NULL; |
1090 | } | 1080 | } |
1091 | return 0; | ||
1092 | } | 1081 | } |
1093 | 1082 | ||
1094 | void ip_conntrack_helper_unregister(struct ip_conntrack_helper *me) | 1083 | void ip_conntrack_helper_unregister(struct ip_conntrack_helper *me) |
1095 | { | 1084 | { |
1096 | unsigned int i; | 1085 | unsigned int i; |
1086 | struct ip_conntrack_tuple_hash *h; | ||
1097 | struct ip_conntrack_expect *exp, *tmp; | 1087 | struct ip_conntrack_expect *exp, *tmp; |
1098 | 1088 | ||
1099 | /* Need write lock here, to delete helper. */ | 1089 | /* Need write lock here, to delete helper. */ |
1100 | write_lock_bh(&ip_conntrack_lock); | 1090 | write_lock_bh(&ip_conntrack_lock); |
1101 | LIST_DELETE(&helpers, me); | 1091 | list_del(&me->list); |
1102 | 1092 | ||
1103 | /* Get rid of expectations */ | 1093 | /* Get rid of expectations */ |
1104 | list_for_each_entry_safe(exp, tmp, &ip_conntrack_expect_list, list) { | 1094 | list_for_each_entry_safe(exp, tmp, &ip_conntrack_expect_list, list) { |
@@ -1108,10 +1098,12 @@ void ip_conntrack_helper_unregister(struct ip_conntrack_helper *me) | |||
1108 | } | 1098 | } |
1109 | } | 1099 | } |
1110 | /* Get rid of expecteds, set helpers to NULL. */ | 1100 | /* Get rid of expecteds, set helpers to NULL. */ |
1111 | LIST_FIND_W(&unconfirmed, unhelp, struct ip_conntrack_tuple_hash*, me); | 1101 | list_for_each_entry(h, &unconfirmed, list) |
1112 | for (i = 0; i < ip_conntrack_htable_size; i++) | 1102 | unhelp(h, me); |
1113 | LIST_FIND_W(&ip_conntrack_hash[i], unhelp, | 1103 | for (i = 0; i < ip_conntrack_htable_size; i++) { |
1114 | struct ip_conntrack_tuple_hash *, me); | 1104 | list_for_each_entry(h, &ip_conntrack_hash[i], list) |
1105 | unhelp(h, me); | ||
1106 | } | ||
1115 | write_unlock_bh(&ip_conntrack_lock); | 1107 | write_unlock_bh(&ip_conntrack_lock); |
1116 | 1108 | ||
1117 | /* Someone could be still looking at the helper in a bh. */ | 1109 | /* Someone could be still looking at the helper in a bh. */ |
@@ -1237,46 +1229,43 @@ static void ip_conntrack_attach(struct sk_buff *nskb, struct sk_buff *skb) | |||
1237 | nf_conntrack_get(nskb->nfct); | 1229 | nf_conntrack_get(nskb->nfct); |
1238 | } | 1230 | } |
1239 | 1231 | ||
1240 | static inline int | ||
1241 | do_iter(const struct ip_conntrack_tuple_hash *i, | ||
1242 | int (*iter)(struct ip_conntrack *i, void *data), | ||
1243 | void *data) | ||
1244 | { | ||
1245 | return iter(tuplehash_to_ctrack(i), data); | ||
1246 | } | ||
1247 | |||
1248 | /* Bring out ya dead! */ | 1232 | /* Bring out ya dead! */ |
1249 | static struct ip_conntrack_tuple_hash * | 1233 | static struct ip_conntrack * |
1250 | get_next_corpse(int (*iter)(struct ip_conntrack *i, void *data), | 1234 | get_next_corpse(int (*iter)(struct ip_conntrack *i, void *data), |
1251 | void *data, unsigned int *bucket) | 1235 | void *data, unsigned int *bucket) |
1252 | { | 1236 | { |
1253 | struct ip_conntrack_tuple_hash *h = NULL; | 1237 | struct ip_conntrack_tuple_hash *h; |
1238 | struct ip_conntrack *ct; | ||
1254 | 1239 | ||
1255 | write_lock_bh(&ip_conntrack_lock); | 1240 | write_lock_bh(&ip_conntrack_lock); |
1256 | for (; *bucket < ip_conntrack_htable_size; (*bucket)++) { | 1241 | for (; *bucket < ip_conntrack_htable_size; (*bucket)++) { |
1257 | h = LIST_FIND_W(&ip_conntrack_hash[*bucket], do_iter, | 1242 | list_for_each_entry(h, &ip_conntrack_hash[*bucket], list) { |
1258 | struct ip_conntrack_tuple_hash *, iter, data); | 1243 | ct = tuplehash_to_ctrack(h); |
1259 | if (h) | 1244 | if (iter(ct, data)) |
1260 | break; | 1245 | goto found; |
1246 | } | ||
1247 | } | ||
1248 | list_for_each_entry(h, &unconfirmed, list) { | ||
1249 | ct = tuplehash_to_ctrack(h); | ||
1250 | if (iter(ct, data)) | ||
1251 | goto found; | ||
1261 | } | 1252 | } |
1262 | if (!h) | ||
1263 | h = LIST_FIND_W(&unconfirmed, do_iter, | ||
1264 | struct ip_conntrack_tuple_hash *, iter, data); | ||
1265 | if (h) | ||
1266 | atomic_inc(&tuplehash_to_ctrack(h)->ct_general.use); | ||
1267 | write_unlock_bh(&ip_conntrack_lock); | 1253 | write_unlock_bh(&ip_conntrack_lock); |
1254 | return NULL; | ||
1268 | 1255 | ||
1269 | return h; | 1256 | found: |
1257 | atomic_inc(&ct->ct_general.use); | ||
1258 | write_unlock_bh(&ip_conntrack_lock); | ||
1259 | return ct; | ||
1270 | } | 1260 | } |
1271 | 1261 | ||
1272 | void | 1262 | void |
1273 | ip_ct_iterate_cleanup(int (*iter)(struct ip_conntrack *i, void *), void *data) | 1263 | ip_ct_iterate_cleanup(int (*iter)(struct ip_conntrack *i, void *), void *data) |
1274 | { | 1264 | { |
1275 | struct ip_conntrack_tuple_hash *h; | 1265 | struct ip_conntrack *ct; |
1276 | unsigned int bucket = 0; | 1266 | unsigned int bucket = 0; |
1277 | 1267 | ||
1278 | while ((h = get_next_corpse(iter, data, &bucket)) != NULL) { | 1268 | while ((ct = get_next_corpse(iter, data, &bucket)) != NULL) { |
1279 | struct ip_conntrack *ct = tuplehash_to_ctrack(h); | ||
1280 | /* Time to push up daises... */ | 1269 | /* Time to push up daises... */ |
1281 | if (del_timer(&ct->timeout)) | 1270 | if (del_timer(&ct->timeout)) |
1282 | death_by_timeout((unsigned long)ct); | 1271 | death_by_timeout((unsigned long)ct); |
diff --git a/net/ipv4/netfilter/ip_conntrack_helper_pptp.c b/net/ipv4/netfilter/ip_conntrack_helper_pptp.c index b020a33e65e9..fb0aee691721 100644 --- a/net/ipv4/netfilter/ip_conntrack_helper_pptp.c +++ b/net/ipv4/netfilter/ip_conntrack_helper_pptp.c | |||
@@ -20,11 +20,11 @@ | |||
20 | * - We can only support one single call within each session | 20 | * - We can only support one single call within each session |
21 | * | 21 | * |
22 | * TODO: | 22 | * TODO: |
23 | * - testing of incoming PPTP calls | 23 | * - testing of incoming PPTP calls |
24 | * | 24 | * |
25 | * Changes: | 25 | * Changes: |
26 | * 2002-02-05 - Version 1.3 | 26 | * 2002-02-05 - Version 1.3 |
27 | * - Call ip_conntrack_unexpect_related() from | 27 | * - Call ip_conntrack_unexpect_related() from |
28 | * pptp_destroy_siblings() to destroy expectations in case | 28 | * pptp_destroy_siblings() to destroy expectations in case |
29 | * CALL_DISCONNECT_NOTIFY or tcp fin packet was seen | 29 | * CALL_DISCONNECT_NOTIFY or tcp fin packet was seen |
30 | * (Philip Craig <philipc@snapgear.com>) | 30 | * (Philip Craig <philipc@snapgear.com>) |
@@ -80,7 +80,7 @@ int | |||
80 | struct PptpControlHeader *ctlh, | 80 | struct PptpControlHeader *ctlh, |
81 | union pptp_ctrl_union *pptpReq); | 81 | union pptp_ctrl_union *pptpReq); |
82 | 82 | ||
83 | int | 83 | void |
84 | (*ip_nat_pptp_hook_exp_gre)(struct ip_conntrack_expect *expect_orig, | 84 | (*ip_nat_pptp_hook_exp_gre)(struct ip_conntrack_expect *expect_orig, |
85 | struct ip_conntrack_expect *expect_reply); | 85 | struct ip_conntrack_expect *expect_reply); |
86 | 86 | ||
@@ -141,7 +141,7 @@ static void pptp_expectfn(struct ip_conntrack *ct, | |||
141 | invert_tuplepr(&inv_t, &exp->tuple); | 141 | invert_tuplepr(&inv_t, &exp->tuple); |
142 | DEBUGP("trying to unexpect other dir: "); | 142 | DEBUGP("trying to unexpect other dir: "); |
143 | DUMP_TUPLE(&inv_t); | 143 | DUMP_TUPLE(&inv_t); |
144 | 144 | ||
145 | exp_other = ip_conntrack_expect_find(&inv_t); | 145 | exp_other = ip_conntrack_expect_find(&inv_t); |
146 | if (exp_other) { | 146 | if (exp_other) { |
147 | /* delete other expectation. */ | 147 | /* delete other expectation. */ |
@@ -194,15 +194,16 @@ static void pptp_destroy_siblings(struct ip_conntrack *ct) | |||
194 | { | 194 | { |
195 | struct ip_conntrack_tuple t; | 195 | struct ip_conntrack_tuple t; |
196 | 196 | ||
197 | /* Since ct->sibling_list has literally rusted away in 2.6.11, | 197 | ip_ct_gre_keymap_destroy(ct); |
198 | /* Since ct->sibling_list has literally rusted away in 2.6.11, | ||
198 | * we now need another way to find out about our sibling | 199 | * we now need another way to find out about our sibling |
199 | * contrack and expects... -HW */ | 200 | * contrack and expects... -HW */ |
200 | 201 | ||
201 | /* try original (pns->pac) tuple */ | 202 | /* try original (pns->pac) tuple */ |
202 | memcpy(&t, &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple, sizeof(t)); | 203 | memcpy(&t, &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple, sizeof(t)); |
203 | t.dst.protonum = IPPROTO_GRE; | 204 | t.dst.protonum = IPPROTO_GRE; |
204 | t.src.u.gre.key = htons(ct->help.ct_pptp_info.pns_call_id); | 205 | t.src.u.gre.key = ct->help.ct_pptp_info.pns_call_id; |
205 | t.dst.u.gre.key = htons(ct->help.ct_pptp_info.pac_call_id); | 206 | t.dst.u.gre.key = ct->help.ct_pptp_info.pac_call_id; |
206 | 207 | ||
207 | if (!destroy_sibling_or_exp(&t)) | 208 | if (!destroy_sibling_or_exp(&t)) |
208 | DEBUGP("failed to timeout original pns->pac ct/exp\n"); | 209 | DEBUGP("failed to timeout original pns->pac ct/exp\n"); |
@@ -210,8 +211,8 @@ static void pptp_destroy_siblings(struct ip_conntrack *ct) | |||
210 | /* try reply (pac->pns) tuple */ | 211 | /* try reply (pac->pns) tuple */ |
211 | memcpy(&t, &ct->tuplehash[IP_CT_DIR_REPLY].tuple, sizeof(t)); | 212 | memcpy(&t, &ct->tuplehash[IP_CT_DIR_REPLY].tuple, sizeof(t)); |
212 | t.dst.protonum = IPPROTO_GRE; | 213 | t.dst.protonum = IPPROTO_GRE; |
213 | t.src.u.gre.key = htons(ct->help.ct_pptp_info.pac_call_id); | 214 | t.src.u.gre.key = ct->help.ct_pptp_info.pac_call_id; |
214 | t.dst.u.gre.key = htons(ct->help.ct_pptp_info.pns_call_id); | 215 | t.dst.u.gre.key = ct->help.ct_pptp_info.pns_call_id; |
215 | 216 | ||
216 | if (!destroy_sibling_or_exp(&t)) | 217 | if (!destroy_sibling_or_exp(&t)) |
217 | DEBUGP("failed to timeout reply pac->pns ct/exp\n"); | 218 | DEBUGP("failed to timeout reply pac->pns ct/exp\n"); |
@@ -219,94 +220,63 @@ static void pptp_destroy_siblings(struct ip_conntrack *ct) | |||
219 | 220 | ||
220 | /* expect GRE connections (PNS->PAC and PAC->PNS direction) */ | 221 | /* expect GRE connections (PNS->PAC and PAC->PNS direction) */ |
221 | static inline int | 222 | static inline int |
222 | exp_gre(struct ip_conntrack *master, | 223 | exp_gre(struct ip_conntrack *ct, |
223 | u_int32_t seq, | ||
224 | __be16 callid, | 224 | __be16 callid, |
225 | __be16 peer_callid) | 225 | __be16 peer_callid) |
226 | { | 226 | { |
227 | struct ip_conntrack_tuple inv_tuple; | ||
228 | struct ip_conntrack_tuple exp_tuples[] = { | ||
229 | /* tuple in original direction, PNS->PAC */ | ||
230 | { .src = { .ip = master->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.ip, | ||
231 | .u = { .gre = { .key = peer_callid } } | ||
232 | }, | ||
233 | .dst = { .ip = master->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.ip, | ||
234 | .u = { .gre = { .key = callid } }, | ||
235 | .protonum = IPPROTO_GRE | ||
236 | }, | ||
237 | }, | ||
238 | /* tuple in reply direction, PAC->PNS */ | ||
239 | { .src = { .ip = master->tuplehash[IP_CT_DIR_REPLY].tuple.src.ip, | ||
240 | .u = { .gre = { .key = callid } } | ||
241 | }, | ||
242 | .dst = { .ip = master->tuplehash[IP_CT_DIR_REPLY].tuple.dst.ip, | ||
243 | .u = { .gre = { .key = peer_callid } }, | ||
244 | .protonum = IPPROTO_GRE | ||
245 | }, | ||
246 | } | ||
247 | }; | ||
248 | struct ip_conntrack_expect *exp_orig, *exp_reply; | 227 | struct ip_conntrack_expect *exp_orig, *exp_reply; |
249 | int ret = 1; | 228 | int ret = 1; |
250 | 229 | ||
251 | exp_orig = ip_conntrack_expect_alloc(master); | 230 | exp_orig = ip_conntrack_expect_alloc(ct); |
252 | if (exp_orig == NULL) | 231 | if (exp_orig == NULL) |
253 | goto out; | 232 | goto out; |
254 | 233 | ||
255 | exp_reply = ip_conntrack_expect_alloc(master); | 234 | exp_reply = ip_conntrack_expect_alloc(ct); |
256 | if (exp_reply == NULL) | 235 | if (exp_reply == NULL) |
257 | goto out_put_orig; | 236 | goto out_put_orig; |
258 | 237 | ||
259 | memcpy(&exp_orig->tuple, &exp_tuples[0], sizeof(exp_orig->tuple)); | 238 | /* original direction, PNS->PAC */ |
239 | exp_orig->tuple.src.ip = ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.ip; | ||
240 | exp_orig->tuple.src.u.gre.key = peer_callid; | ||
241 | exp_orig->tuple.dst.ip = ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.ip; | ||
242 | exp_orig->tuple.dst.u.gre.key = callid; | ||
243 | exp_orig->tuple.dst.protonum = IPPROTO_GRE; | ||
260 | 244 | ||
261 | exp_orig->mask.src.ip = 0xffffffff; | 245 | exp_orig->mask.src.ip = 0xffffffff; |
262 | exp_orig->mask.src.u.all = 0; | 246 | exp_orig->mask.src.u.all = 0; |
263 | exp_orig->mask.dst.u.all = 0; | ||
264 | exp_orig->mask.dst.u.gre.key = htons(0xffff); | 247 | exp_orig->mask.dst.u.gre.key = htons(0xffff); |
265 | exp_orig->mask.dst.ip = 0xffffffff; | 248 | exp_orig->mask.dst.ip = 0xffffffff; |
266 | exp_orig->mask.dst.protonum = 0xff; | 249 | exp_orig->mask.dst.protonum = 0xff; |
267 | 250 | ||
268 | exp_orig->master = master; | 251 | exp_orig->master = ct; |
269 | exp_orig->expectfn = pptp_expectfn; | 252 | exp_orig->expectfn = pptp_expectfn; |
270 | exp_orig->flags = 0; | 253 | exp_orig->flags = 0; |
271 | 254 | ||
272 | /* both expectations are identical apart from tuple */ | 255 | /* both expectations are identical apart from tuple */ |
273 | memcpy(exp_reply, exp_orig, sizeof(*exp_reply)); | 256 | memcpy(exp_reply, exp_orig, sizeof(*exp_reply)); |
274 | memcpy(&exp_reply->tuple, &exp_tuples[1], sizeof(exp_reply->tuple)); | ||
275 | 257 | ||
276 | if (ip_nat_pptp_hook_exp_gre) | 258 | /* reply direction, PAC->PNS */ |
277 | ret = ip_nat_pptp_hook_exp_gre(exp_orig, exp_reply); | 259 | exp_reply->tuple.src.ip = ct->tuplehash[IP_CT_DIR_REPLY].tuple.src.ip; |
278 | else { | 260 | exp_reply->tuple.src.u.gre.key = callid; |
279 | 261 | exp_reply->tuple.dst.ip = ct->tuplehash[IP_CT_DIR_REPLY].tuple.dst.ip; | |
280 | DEBUGP("calling expect_related PNS->PAC"); | 262 | exp_reply->tuple.dst.u.gre.key = peer_callid; |
281 | DUMP_TUPLE(&exp_orig->tuple); | 263 | exp_reply->tuple.dst.protonum = IPPROTO_GRE; |
282 | |||
283 | if (ip_conntrack_expect_related(exp_orig) != 0) { | ||
284 | DEBUGP("cannot expect_related()\n"); | ||
285 | goto out_put_both; | ||
286 | } | ||
287 | 264 | ||
288 | DEBUGP("calling expect_related PAC->PNS"); | 265 | if (ip_nat_pptp_hook_exp_gre) |
289 | DUMP_TUPLE(&exp_reply->tuple); | 266 | ip_nat_pptp_hook_exp_gre(exp_orig, exp_reply); |
290 | 267 | if (ip_conntrack_expect_related(exp_orig) != 0) | |
291 | if (ip_conntrack_expect_related(exp_reply) != 0) { | 268 | goto out_put_both; |
292 | DEBUGP("cannot expect_related()\n"); | 269 | if (ip_conntrack_expect_related(exp_reply) != 0) |
293 | goto out_unexpect_orig; | 270 | goto out_unexpect_orig; |
294 | } | 271 | |
295 | 272 | /* Add GRE keymap entries */ | |
296 | /* Add GRE keymap entries */ | 273 | if (ip_ct_gre_keymap_add(ct, &exp_orig->tuple, 0) != 0) |
297 | if (ip_ct_gre_keymap_add(master, &exp_reply->tuple, 0) != 0) { | 274 | goto out_unexpect_both; |
298 | DEBUGP("cannot keymap_add() exp\n"); | 275 | if (ip_ct_gre_keymap_add(ct, &exp_reply->tuple, 1) != 0) { |
299 | goto out_unexpect_both; | 276 | ip_ct_gre_keymap_destroy(ct); |
300 | } | 277 | goto out_unexpect_both; |
301 | |||
302 | invert_tuplepr(&inv_tuple, &exp_reply->tuple); | ||
303 | if (ip_ct_gre_keymap_add(master, &inv_tuple, 1) != 0) { | ||
304 | ip_ct_gre_keymap_destroy(master); | ||
305 | DEBUGP("cannot keymap_add() exp_inv\n"); | ||
306 | goto out_unexpect_both; | ||
307 | } | ||
308 | ret = 0; | ||
309 | } | 278 | } |
279 | ret = 0; | ||
310 | 280 | ||
311 | out_put_both: | 281 | out_put_both: |
312 | ip_conntrack_expect_put(exp_reply); | 282 | ip_conntrack_expect_put(exp_reply); |
@@ -322,73 +292,36 @@ out_unexpect_orig: | |||
322 | goto out_put_both; | 292 | goto out_put_both; |
323 | } | 293 | } |
324 | 294 | ||
325 | static inline int | 295 | static inline int |
326 | pptp_inbound_pkt(struct sk_buff **pskb, | 296 | pptp_inbound_pkt(struct sk_buff **pskb, |
327 | struct tcphdr *tcph, | 297 | struct PptpControlHeader *ctlh, |
328 | unsigned int nexthdr_off, | 298 | union pptp_ctrl_union *pptpReq, |
329 | unsigned int datalen, | 299 | unsigned int reqlen, |
330 | struct ip_conntrack *ct, | 300 | struct ip_conntrack *ct, |
331 | enum ip_conntrack_info ctinfo) | 301 | enum ip_conntrack_info ctinfo) |
332 | { | 302 | { |
333 | struct PptpControlHeader _ctlh, *ctlh; | ||
334 | unsigned int reqlen; | ||
335 | union pptp_ctrl_union _pptpReq, *pptpReq; | ||
336 | struct ip_ct_pptp_master *info = &ct->help.ct_pptp_info; | 303 | struct ip_ct_pptp_master *info = &ct->help.ct_pptp_info; |
337 | u_int16_t msg; | 304 | u_int16_t msg; |
338 | __be16 *cid, *pcid; | 305 | __be16 cid = 0, pcid = 0; |
339 | u_int32_t seq; | ||
340 | |||
341 | ctlh = skb_header_pointer(*pskb, nexthdr_off, sizeof(_ctlh), &_ctlh); | ||
342 | if (!ctlh) { | ||
343 | DEBUGP("error during skb_header_pointer\n"); | ||
344 | return NF_ACCEPT; | ||
345 | } | ||
346 | nexthdr_off += sizeof(_ctlh); | ||
347 | datalen -= sizeof(_ctlh); | ||
348 | |||
349 | reqlen = datalen; | ||
350 | if (reqlen > sizeof(*pptpReq)) | ||
351 | reqlen = sizeof(*pptpReq); | ||
352 | pptpReq = skb_header_pointer(*pskb, nexthdr_off, reqlen, &_pptpReq); | ||
353 | if (!pptpReq) { | ||
354 | DEBUGP("error during skb_header_pointer\n"); | ||
355 | return NF_ACCEPT; | ||
356 | } | ||
357 | 306 | ||
358 | msg = ntohs(ctlh->messageType); | 307 | msg = ntohs(ctlh->messageType); |
359 | DEBUGP("inbound control message %s\n", pptp_msg_name[msg]); | 308 | DEBUGP("inbound control message %s\n", pptp_msg_name[msg]); |
360 | 309 | ||
361 | switch (msg) { | 310 | switch (msg) { |
362 | case PPTP_START_SESSION_REPLY: | 311 | case PPTP_START_SESSION_REPLY: |
363 | if (reqlen < sizeof(_pptpReq.srep)) { | ||
364 | DEBUGP("%s: short packet\n", pptp_msg_name[msg]); | ||
365 | break; | ||
366 | } | ||
367 | |||
368 | /* server confirms new control session */ | 312 | /* server confirms new control session */ |
369 | if (info->sstate < PPTP_SESSION_REQUESTED) { | 313 | if (info->sstate < PPTP_SESSION_REQUESTED) |
370 | DEBUGP("%s without START_SESS_REQUEST\n", | 314 | goto invalid; |
371 | pptp_msg_name[msg]); | ||
372 | break; | ||
373 | } | ||
374 | if (pptpReq->srep.resultCode == PPTP_START_OK) | 315 | if (pptpReq->srep.resultCode == PPTP_START_OK) |
375 | info->sstate = PPTP_SESSION_CONFIRMED; | 316 | info->sstate = PPTP_SESSION_CONFIRMED; |
376 | else | 317 | else |
377 | info->sstate = PPTP_SESSION_ERROR; | 318 | info->sstate = PPTP_SESSION_ERROR; |
378 | break; | 319 | break; |
379 | 320 | ||
380 | case PPTP_STOP_SESSION_REPLY: | 321 | case PPTP_STOP_SESSION_REPLY: |
381 | if (reqlen < sizeof(_pptpReq.strep)) { | ||
382 | DEBUGP("%s: short packet\n", pptp_msg_name[msg]); | ||
383 | break; | ||
384 | } | ||
385 | |||
386 | /* server confirms end of control session */ | 322 | /* server confirms end of control session */ |
387 | if (info->sstate > PPTP_SESSION_STOPREQ) { | 323 | if (info->sstate > PPTP_SESSION_STOPREQ) |
388 | DEBUGP("%s without STOP_SESS_REQUEST\n", | 324 | goto invalid; |
389 | pptp_msg_name[msg]); | ||
390 | break; | ||
391 | } | ||
392 | if (pptpReq->strep.resultCode == PPTP_STOP_OK) | 325 | if (pptpReq->strep.resultCode == PPTP_STOP_OK) |
393 | info->sstate = PPTP_SESSION_NONE; | 326 | info->sstate = PPTP_SESSION_NONE; |
394 | else | 327 | else |
@@ -396,116 +329,64 @@ pptp_inbound_pkt(struct sk_buff **pskb, | |||
396 | break; | 329 | break; |
397 | 330 | ||
398 | case PPTP_OUT_CALL_REPLY: | 331 | case PPTP_OUT_CALL_REPLY: |
399 | if (reqlen < sizeof(_pptpReq.ocack)) { | ||
400 | DEBUGP("%s: short packet\n", pptp_msg_name[msg]); | ||
401 | break; | ||
402 | } | ||
403 | |||
404 | /* server accepted call, we now expect GRE frames */ | 332 | /* server accepted call, we now expect GRE frames */ |
405 | if (info->sstate != PPTP_SESSION_CONFIRMED) { | 333 | if (info->sstate != PPTP_SESSION_CONFIRMED) |
406 | DEBUGP("%s but no session\n", pptp_msg_name[msg]); | 334 | goto invalid; |
407 | break; | ||
408 | } | ||
409 | if (info->cstate != PPTP_CALL_OUT_REQ && | 335 | if (info->cstate != PPTP_CALL_OUT_REQ && |
410 | info->cstate != PPTP_CALL_OUT_CONF) { | 336 | info->cstate != PPTP_CALL_OUT_CONF) |
411 | DEBUGP("%s without OUTCALL_REQ\n", pptp_msg_name[msg]); | 337 | goto invalid; |
412 | break; | 338 | |
413 | } | 339 | cid = pptpReq->ocack.callID; |
414 | if (pptpReq->ocack.resultCode != PPTP_OUTCALL_CONNECT) { | 340 | pcid = pptpReq->ocack.peersCallID; |
341 | if (info->pns_call_id != pcid) | ||
342 | goto invalid; | ||
343 | DEBUGP("%s, CID=%X, PCID=%X\n", pptp_msg_name[msg], | ||
344 | ntohs(cid), ntohs(pcid)); | ||
345 | |||
346 | if (pptpReq->ocack.resultCode == PPTP_OUTCALL_CONNECT) { | ||
347 | info->cstate = PPTP_CALL_OUT_CONF; | ||
348 | info->pac_call_id = cid; | ||
349 | exp_gre(ct, cid, pcid); | ||
350 | } else | ||
415 | info->cstate = PPTP_CALL_NONE; | 351 | info->cstate = PPTP_CALL_NONE; |
416 | break; | ||
417 | } | ||
418 | |||
419 | cid = &pptpReq->ocack.callID; | ||
420 | pcid = &pptpReq->ocack.peersCallID; | ||
421 | |||
422 | info->pac_call_id = ntohs(*cid); | ||
423 | |||
424 | if (htons(info->pns_call_id) != *pcid) { | ||
425 | DEBUGP("%s for unknown callid %u\n", | ||
426 | pptp_msg_name[msg], ntohs(*pcid)); | ||
427 | break; | ||
428 | } | ||
429 | |||
430 | DEBUGP("%s, CID=%X, PCID=%X\n", pptp_msg_name[msg], | ||
431 | ntohs(*cid), ntohs(*pcid)); | ||
432 | |||
433 | info->cstate = PPTP_CALL_OUT_CONF; | ||
434 | |||
435 | seq = ntohl(tcph->seq) + sizeof(struct pptp_pkt_hdr) | ||
436 | + sizeof(struct PptpControlHeader) | ||
437 | + ((void *)pcid - (void *)pptpReq); | ||
438 | |||
439 | if (exp_gre(ct, seq, *cid, *pcid) != 0) | ||
440 | printk("ip_conntrack_pptp: error during exp_gre\n"); | ||
441 | break; | 352 | break; |
442 | 353 | ||
443 | case PPTP_IN_CALL_REQUEST: | 354 | case PPTP_IN_CALL_REQUEST: |
444 | if (reqlen < sizeof(_pptpReq.icack)) { | ||
445 | DEBUGP("%s: short packet\n", pptp_msg_name[msg]); | ||
446 | break; | ||
447 | } | ||
448 | |||
449 | /* server tells us about incoming call request */ | 355 | /* server tells us about incoming call request */ |
450 | if (info->sstate != PPTP_SESSION_CONFIRMED) { | 356 | if (info->sstate != PPTP_SESSION_CONFIRMED) |
451 | DEBUGP("%s but no session\n", pptp_msg_name[msg]); | 357 | goto invalid; |
452 | break; | 358 | |
453 | } | 359 | cid = pptpReq->icreq.callID; |
454 | pcid = &pptpReq->icack.peersCallID; | 360 | DEBUGP("%s, CID=%X\n", pptp_msg_name[msg], ntohs(cid)); |
455 | DEBUGP("%s, PCID=%X\n", pptp_msg_name[msg], ntohs(*pcid)); | ||
456 | info->cstate = PPTP_CALL_IN_REQ; | 361 | info->cstate = PPTP_CALL_IN_REQ; |
457 | info->pac_call_id = ntohs(*pcid); | 362 | info->pac_call_id = cid; |
458 | break; | 363 | break; |
459 | 364 | ||
460 | case PPTP_IN_CALL_CONNECT: | 365 | case PPTP_IN_CALL_CONNECT: |
461 | if (reqlen < sizeof(_pptpReq.iccon)) { | ||
462 | DEBUGP("%s: short packet\n", pptp_msg_name[msg]); | ||
463 | break; | ||
464 | } | ||
465 | |||
466 | /* server tells us about incoming call established */ | 366 | /* server tells us about incoming call established */ |
467 | if (info->sstate != PPTP_SESSION_CONFIRMED) { | 367 | if (info->sstate != PPTP_SESSION_CONFIRMED) |
468 | DEBUGP("%s but no session\n", pptp_msg_name[msg]); | 368 | goto invalid; |
469 | break; | 369 | if (info->cstate != PPTP_CALL_IN_REP && |
470 | } | 370 | info->cstate != PPTP_CALL_IN_CONF) |
471 | if (info->cstate != PPTP_CALL_IN_REP | 371 | goto invalid; |
472 | && info->cstate != PPTP_CALL_IN_CONF) { | ||
473 | DEBUGP("%s but never sent IN_CALL_REPLY\n", | ||
474 | pptp_msg_name[msg]); | ||
475 | break; | ||
476 | } | ||
477 | 372 | ||
478 | pcid = &pptpReq->iccon.peersCallID; | 373 | pcid = pptpReq->iccon.peersCallID; |
479 | cid = &info->pac_call_id; | 374 | cid = info->pac_call_id; |
480 | 375 | ||
481 | if (info->pns_call_id != ntohs(*pcid)) { | 376 | if (info->pns_call_id != pcid) |
482 | DEBUGP("%s for unknown CallID %u\n", | 377 | goto invalid; |
483 | pptp_msg_name[msg], ntohs(*pcid)); | ||
484 | break; | ||
485 | } | ||
486 | 378 | ||
487 | DEBUGP("%s, PCID=%X\n", pptp_msg_name[msg], ntohs(*pcid)); | 379 | DEBUGP("%s, PCID=%X\n", pptp_msg_name[msg], ntohs(pcid)); |
488 | info->cstate = PPTP_CALL_IN_CONF; | 380 | info->cstate = PPTP_CALL_IN_CONF; |
489 | 381 | ||
490 | /* we expect a GRE connection from PAC to PNS */ | 382 | /* we expect a GRE connection from PAC to PNS */ |
491 | seq = ntohl(tcph->seq) + sizeof(struct pptp_pkt_hdr) | 383 | exp_gre(ct, cid, pcid); |
492 | + sizeof(struct PptpControlHeader) | ||
493 | + ((void *)pcid - (void *)pptpReq); | ||
494 | |||
495 | if (exp_gre(ct, seq, *cid, *pcid) != 0) | ||
496 | printk("ip_conntrack_pptp: error during exp_gre\n"); | ||
497 | |||
498 | break; | 384 | break; |
499 | 385 | ||
500 | case PPTP_CALL_DISCONNECT_NOTIFY: | 386 | case PPTP_CALL_DISCONNECT_NOTIFY: |
501 | if (reqlen < sizeof(_pptpReq.disc)) { | ||
502 | DEBUGP("%s: short packet\n", pptp_msg_name[msg]); | ||
503 | break; | ||
504 | } | ||
505 | |||
506 | /* server confirms disconnect */ | 387 | /* server confirms disconnect */ |
507 | cid = &pptpReq->disc.callID; | 388 | cid = pptpReq->disc.callID; |
508 | DEBUGP("%s, CID=%X\n", pptp_msg_name[msg], ntohs(*cid)); | 389 | DEBUGP("%s, CID=%X\n", pptp_msg_name[msg], ntohs(cid)); |
509 | info->cstate = PPTP_CALL_NONE; | 390 | info->cstate = PPTP_CALL_NONE; |
510 | 391 | ||
511 | /* untrack this call id, unexpect GRE packets */ | 392 | /* untrack this call id, unexpect GRE packets */ |
@@ -513,54 +394,39 @@ pptp_inbound_pkt(struct sk_buff **pskb, | |||
513 | break; | 394 | break; |
514 | 395 | ||
515 | case PPTP_WAN_ERROR_NOTIFY: | 396 | case PPTP_WAN_ERROR_NOTIFY: |
516 | break; | ||
517 | |||
518 | case PPTP_ECHO_REQUEST: | 397 | case PPTP_ECHO_REQUEST: |
519 | case PPTP_ECHO_REPLY: | 398 | case PPTP_ECHO_REPLY: |
520 | /* I don't have to explain these ;) */ | 399 | /* I don't have to explain these ;) */ |
521 | break; | 400 | break; |
522 | default: | 401 | default: |
523 | DEBUGP("invalid %s (TY=%d)\n", (msg <= PPTP_MSG_MAX) | 402 | goto invalid; |
524 | ? pptp_msg_name[msg]:pptp_msg_name[0], msg); | ||
525 | break; | ||
526 | } | 403 | } |
527 | 404 | ||
528 | |||
529 | if (ip_nat_pptp_hook_inbound) | 405 | if (ip_nat_pptp_hook_inbound) |
530 | return ip_nat_pptp_hook_inbound(pskb, ct, ctinfo, ctlh, | 406 | return ip_nat_pptp_hook_inbound(pskb, ct, ctinfo, ctlh, |
531 | pptpReq); | 407 | pptpReq); |
532 | |||
533 | return NF_ACCEPT; | 408 | return NF_ACCEPT; |
534 | 409 | ||
410 | invalid: | ||
411 | DEBUGP("invalid %s: type=%d cid=%u pcid=%u " | ||
412 | "cstate=%d sstate=%d pns_cid=%u pac_cid=%u\n", | ||
413 | msg <= PPTP_MSG_MAX ? pptp_msg_name[msg] : pptp_msg_name[0], | ||
414 | msg, ntohs(cid), ntohs(pcid), info->cstate, info->sstate, | ||
415 | ntohs(info->pns_call_id), ntohs(info->pac_call_id)); | ||
416 | return NF_ACCEPT; | ||
535 | } | 417 | } |
536 | 418 | ||
537 | static inline int | 419 | static inline int |
538 | pptp_outbound_pkt(struct sk_buff **pskb, | 420 | pptp_outbound_pkt(struct sk_buff **pskb, |
539 | struct tcphdr *tcph, | 421 | struct PptpControlHeader *ctlh, |
540 | unsigned int nexthdr_off, | 422 | union pptp_ctrl_union *pptpReq, |
541 | unsigned int datalen, | 423 | unsigned int reqlen, |
542 | struct ip_conntrack *ct, | 424 | struct ip_conntrack *ct, |
543 | enum ip_conntrack_info ctinfo) | 425 | enum ip_conntrack_info ctinfo) |
544 | { | 426 | { |
545 | struct PptpControlHeader _ctlh, *ctlh; | ||
546 | unsigned int reqlen; | ||
547 | union pptp_ctrl_union _pptpReq, *pptpReq; | ||
548 | struct ip_ct_pptp_master *info = &ct->help.ct_pptp_info; | 427 | struct ip_ct_pptp_master *info = &ct->help.ct_pptp_info; |
549 | u_int16_t msg; | 428 | u_int16_t msg; |
550 | __be16 *cid, *pcid; | 429 | __be16 cid = 0, pcid = 0; |
551 | |||
552 | ctlh = skb_header_pointer(*pskb, nexthdr_off, sizeof(_ctlh), &_ctlh); | ||
553 | if (!ctlh) | ||
554 | return NF_ACCEPT; | ||
555 | nexthdr_off += sizeof(_ctlh); | ||
556 | datalen -= sizeof(_ctlh); | ||
557 | |||
558 | reqlen = datalen; | ||
559 | if (reqlen > sizeof(*pptpReq)) | ||
560 | reqlen = sizeof(*pptpReq); | ||
561 | pptpReq = skb_header_pointer(*pskb, nexthdr_off, reqlen, &_pptpReq); | ||
562 | if (!pptpReq) | ||
563 | return NF_ACCEPT; | ||
564 | 430 | ||
565 | msg = ntohs(ctlh->messageType); | 431 | msg = ntohs(ctlh->messageType); |
566 | DEBUGP("outbound control message %s\n", pptp_msg_name[msg]); | 432 | DEBUGP("outbound control message %s\n", pptp_msg_name[msg]); |
@@ -568,10 +434,8 @@ pptp_outbound_pkt(struct sk_buff **pskb, | |||
568 | switch (msg) { | 434 | switch (msg) { |
569 | case PPTP_START_SESSION_REQUEST: | 435 | case PPTP_START_SESSION_REQUEST: |
570 | /* client requests for new control session */ | 436 | /* client requests for new control session */ |
571 | if (info->sstate != PPTP_SESSION_NONE) { | 437 | if (info->sstate != PPTP_SESSION_NONE) |
572 | DEBUGP("%s but we already have one", | 438 | goto invalid; |
573 | pptp_msg_name[msg]); | ||
574 | } | ||
575 | info->sstate = PPTP_SESSION_REQUESTED; | 439 | info->sstate = PPTP_SESSION_REQUESTED; |
576 | break; | 440 | break; |
577 | case PPTP_STOP_SESSION_REQUEST: | 441 | case PPTP_STOP_SESSION_REQUEST: |
@@ -580,123 +444,115 @@ pptp_outbound_pkt(struct sk_buff **pskb, | |||
580 | break; | 444 | break; |
581 | 445 | ||
582 | case PPTP_OUT_CALL_REQUEST: | 446 | case PPTP_OUT_CALL_REQUEST: |
583 | if (reqlen < sizeof(_pptpReq.ocreq)) { | ||
584 | DEBUGP("%s: short packet\n", pptp_msg_name[msg]); | ||
585 | /* FIXME: break; */ | ||
586 | } | ||
587 | |||
588 | /* client initiating connection to server */ | 447 | /* client initiating connection to server */ |
589 | if (info->sstate != PPTP_SESSION_CONFIRMED) { | 448 | if (info->sstate != PPTP_SESSION_CONFIRMED) |
590 | DEBUGP("%s but no session\n", | 449 | goto invalid; |
591 | pptp_msg_name[msg]); | ||
592 | break; | ||
593 | } | ||
594 | info->cstate = PPTP_CALL_OUT_REQ; | 450 | info->cstate = PPTP_CALL_OUT_REQ; |
595 | /* track PNS call id */ | 451 | /* track PNS call id */ |
596 | cid = &pptpReq->ocreq.callID; | 452 | cid = pptpReq->ocreq.callID; |
597 | DEBUGP("%s, CID=%X\n", pptp_msg_name[msg], ntohs(*cid)); | 453 | DEBUGP("%s, CID=%X\n", pptp_msg_name[msg], ntohs(cid)); |
598 | info->pns_call_id = ntohs(*cid); | 454 | info->pns_call_id = cid; |
599 | break; | 455 | break; |
600 | case PPTP_IN_CALL_REPLY: | 456 | case PPTP_IN_CALL_REPLY: |
601 | if (reqlen < sizeof(_pptpReq.icack)) { | ||
602 | DEBUGP("%s: short packet\n", pptp_msg_name[msg]); | ||
603 | break; | ||
604 | } | ||
605 | |||
606 | /* client answers incoming call */ | 457 | /* client answers incoming call */ |
607 | if (info->cstate != PPTP_CALL_IN_REQ | 458 | if (info->cstate != PPTP_CALL_IN_REQ && |
608 | && info->cstate != PPTP_CALL_IN_REP) { | 459 | info->cstate != PPTP_CALL_IN_REP) |
609 | DEBUGP("%s without incall_req\n", | 460 | goto invalid; |
610 | pptp_msg_name[msg]); | 461 | |
611 | break; | 462 | cid = pptpReq->icack.callID; |
612 | } | 463 | pcid = pptpReq->icack.peersCallID; |
613 | if (pptpReq->icack.resultCode != PPTP_INCALL_ACCEPT) { | 464 | if (info->pac_call_id != pcid) |
465 | goto invalid; | ||
466 | DEBUGP("%s, CID=%X PCID=%X\n", pptp_msg_name[msg], | ||
467 | ntohs(cid), ntohs(pcid)); | ||
468 | |||
469 | if (pptpReq->icack.resultCode == PPTP_INCALL_ACCEPT) { | ||
470 | /* part two of the three-way handshake */ | ||
471 | info->cstate = PPTP_CALL_IN_REP; | ||
472 | info->pns_call_id = cid; | ||
473 | } else | ||
614 | info->cstate = PPTP_CALL_NONE; | 474 | info->cstate = PPTP_CALL_NONE; |
615 | break; | ||
616 | } | ||
617 | pcid = &pptpReq->icack.peersCallID; | ||
618 | if (info->pac_call_id != ntohs(*pcid)) { | ||
619 | DEBUGP("%s for unknown call %u\n", | ||
620 | pptp_msg_name[msg], ntohs(*pcid)); | ||
621 | break; | ||
622 | } | ||
623 | DEBUGP("%s, CID=%X\n", pptp_msg_name[msg], ntohs(*pcid)); | ||
624 | /* part two of the three-way handshake */ | ||
625 | info->cstate = PPTP_CALL_IN_REP; | ||
626 | info->pns_call_id = ntohs(pptpReq->icack.callID); | ||
627 | break; | 475 | break; |
628 | 476 | ||
629 | case PPTP_CALL_CLEAR_REQUEST: | 477 | case PPTP_CALL_CLEAR_REQUEST: |
630 | /* client requests hangup of call */ | 478 | /* client requests hangup of call */ |
631 | if (info->sstate != PPTP_SESSION_CONFIRMED) { | 479 | if (info->sstate != PPTP_SESSION_CONFIRMED) |
632 | DEBUGP("CLEAR_CALL but no session\n"); | 480 | goto invalid; |
633 | break; | ||
634 | } | ||
635 | /* FUTURE: iterate over all calls and check if | 481 | /* FUTURE: iterate over all calls and check if |
636 | * call ID is valid. We don't do this without newnat, | 482 | * call ID is valid. We don't do this without newnat, |
637 | * because we only know about last call */ | 483 | * because we only know about last call */ |
638 | info->cstate = PPTP_CALL_CLEAR_REQ; | 484 | info->cstate = PPTP_CALL_CLEAR_REQ; |
639 | break; | 485 | break; |
640 | case PPTP_SET_LINK_INFO: | 486 | case PPTP_SET_LINK_INFO: |
641 | break; | ||
642 | case PPTP_ECHO_REQUEST: | 487 | case PPTP_ECHO_REQUEST: |
643 | case PPTP_ECHO_REPLY: | 488 | case PPTP_ECHO_REPLY: |
644 | /* I don't have to explain these ;) */ | 489 | /* I don't have to explain these ;) */ |
645 | break; | 490 | break; |
646 | default: | 491 | default: |
647 | DEBUGP("invalid %s (TY=%d)\n", (msg <= PPTP_MSG_MAX)? | 492 | goto invalid; |
648 | pptp_msg_name[msg]:pptp_msg_name[0], msg); | ||
649 | /* unknown: no need to create GRE masq table entry */ | ||
650 | break; | ||
651 | } | 493 | } |
652 | 494 | ||
653 | if (ip_nat_pptp_hook_outbound) | 495 | if (ip_nat_pptp_hook_outbound) |
654 | return ip_nat_pptp_hook_outbound(pskb, ct, ctinfo, ctlh, | 496 | return ip_nat_pptp_hook_outbound(pskb, ct, ctinfo, ctlh, |
655 | pptpReq); | 497 | pptpReq); |
498 | return NF_ACCEPT; | ||
656 | 499 | ||
500 | invalid: | ||
501 | DEBUGP("invalid %s: type=%d cid=%u pcid=%u " | ||
502 | "cstate=%d sstate=%d pns_cid=%u pac_cid=%u\n", | ||
503 | msg <= PPTP_MSG_MAX ? pptp_msg_name[msg] : pptp_msg_name[0], | ||
504 | msg, ntohs(cid), ntohs(pcid), info->cstate, info->sstate, | ||
505 | ntohs(info->pns_call_id), ntohs(info->pac_call_id)); | ||
657 | return NF_ACCEPT; | 506 | return NF_ACCEPT; |
658 | } | 507 | } |
659 | 508 | ||
509 | static const unsigned int pptp_msg_size[] = { | ||
510 | [PPTP_START_SESSION_REQUEST] = sizeof(struct PptpStartSessionRequest), | ||
511 | [PPTP_START_SESSION_REPLY] = sizeof(struct PptpStartSessionReply), | ||
512 | [PPTP_STOP_SESSION_REQUEST] = sizeof(struct PptpStopSessionRequest), | ||
513 | [PPTP_STOP_SESSION_REPLY] = sizeof(struct PptpStopSessionReply), | ||
514 | [PPTP_OUT_CALL_REQUEST] = sizeof(struct PptpOutCallRequest), | ||
515 | [PPTP_OUT_CALL_REPLY] = sizeof(struct PptpOutCallReply), | ||
516 | [PPTP_IN_CALL_REQUEST] = sizeof(struct PptpInCallRequest), | ||
517 | [PPTP_IN_CALL_REPLY] = sizeof(struct PptpInCallReply), | ||
518 | [PPTP_IN_CALL_CONNECT] = sizeof(struct PptpInCallConnected), | ||
519 | [PPTP_CALL_CLEAR_REQUEST] = sizeof(struct PptpClearCallRequest), | ||
520 | [PPTP_CALL_DISCONNECT_NOTIFY] = sizeof(struct PptpCallDisconnectNotify), | ||
521 | [PPTP_WAN_ERROR_NOTIFY] = sizeof(struct PptpWanErrorNotify), | ||
522 | [PPTP_SET_LINK_INFO] = sizeof(struct PptpSetLinkInfo), | ||
523 | }; | ||
660 | 524 | ||
661 | /* track caller id inside control connection, call expect_related */ | 525 | /* track caller id inside control connection, call expect_related */ |
662 | static int | 526 | static int |
663 | conntrack_pptp_help(struct sk_buff **pskb, | 527 | conntrack_pptp_help(struct sk_buff **pskb, |
664 | struct ip_conntrack *ct, enum ip_conntrack_info ctinfo) | 528 | struct ip_conntrack *ct, enum ip_conntrack_info ctinfo) |
665 | 529 | ||
666 | { | 530 | { |
667 | struct pptp_pkt_hdr _pptph, *pptph; | ||
668 | struct tcphdr _tcph, *tcph; | ||
669 | u_int32_t tcplen = (*pskb)->len - (*pskb)->nh.iph->ihl * 4; | ||
670 | u_int32_t datalen; | ||
671 | int dir = CTINFO2DIR(ctinfo); | 531 | int dir = CTINFO2DIR(ctinfo); |
672 | struct ip_ct_pptp_master *info = &ct->help.ct_pptp_info; | 532 | struct ip_ct_pptp_master *info = &ct->help.ct_pptp_info; |
673 | unsigned int nexthdr_off; | 533 | struct tcphdr _tcph, *tcph; |
674 | 534 | struct pptp_pkt_hdr _pptph, *pptph; | |
535 | struct PptpControlHeader _ctlh, *ctlh; | ||
536 | union pptp_ctrl_union _pptpReq, *pptpReq; | ||
537 | unsigned int tcplen = (*pskb)->len - (*pskb)->nh.iph->ihl * 4; | ||
538 | unsigned int datalen, reqlen, nexthdr_off; | ||
675 | int oldsstate, oldcstate; | 539 | int oldsstate, oldcstate; |
676 | int ret; | 540 | int ret; |
541 | u_int16_t msg; | ||
677 | 542 | ||
678 | /* don't do any tracking before tcp handshake complete */ | 543 | /* don't do any tracking before tcp handshake complete */ |
679 | if (ctinfo != IP_CT_ESTABLISHED | 544 | if (ctinfo != IP_CT_ESTABLISHED |
680 | && ctinfo != IP_CT_ESTABLISHED+IP_CT_IS_REPLY) { | 545 | && ctinfo != IP_CT_ESTABLISHED+IP_CT_IS_REPLY) { |
681 | DEBUGP("ctinfo = %u, skipping\n", ctinfo); | 546 | DEBUGP("ctinfo = %u, skipping\n", ctinfo); |
682 | return NF_ACCEPT; | 547 | return NF_ACCEPT; |
683 | } | 548 | } |
684 | 549 | ||
685 | nexthdr_off = (*pskb)->nh.iph->ihl*4; | 550 | nexthdr_off = (*pskb)->nh.iph->ihl*4; |
686 | tcph = skb_header_pointer(*pskb, nexthdr_off, sizeof(_tcph), &_tcph); | 551 | tcph = skb_header_pointer(*pskb, nexthdr_off, sizeof(_tcph), &_tcph); |
687 | BUG_ON(!tcph); | 552 | BUG_ON(!tcph); |
688 | nexthdr_off += tcph->doff * 4; | 553 | nexthdr_off += tcph->doff * 4; |
689 | datalen = tcplen - tcph->doff * 4; | 554 | datalen = tcplen - tcph->doff * 4; |
690 | 555 | ||
691 | if (tcph->fin || tcph->rst) { | ||
692 | DEBUGP("RST/FIN received, timeouting GRE\n"); | ||
693 | /* can't do this after real newnat */ | ||
694 | info->cstate = PPTP_CALL_NONE; | ||
695 | |||
696 | /* untrack this call id, unexpect GRE packets */ | ||
697 | pptp_destroy_siblings(ct); | ||
698 | } | ||
699 | |||
700 | pptph = skb_header_pointer(*pskb, nexthdr_off, sizeof(_pptph), &_pptph); | 556 | pptph = skb_header_pointer(*pskb, nexthdr_off, sizeof(_pptph), &_pptph); |
701 | if (!pptph) { | 557 | if (!pptph) { |
702 | DEBUGP("no full PPTP header, can't track\n"); | 558 | DEBUGP("no full PPTP header, can't track\n"); |
@@ -712,6 +568,23 @@ conntrack_pptp_help(struct sk_buff **pskb, | |||
712 | return NF_ACCEPT; | 568 | return NF_ACCEPT; |
713 | } | 569 | } |
714 | 570 | ||
571 | ctlh = skb_header_pointer(*pskb, nexthdr_off, sizeof(_ctlh), &_ctlh); | ||
572 | if (!ctlh) | ||
573 | return NF_ACCEPT; | ||
574 | nexthdr_off += sizeof(_ctlh); | ||
575 | datalen -= sizeof(_ctlh); | ||
576 | |||
577 | reqlen = datalen; | ||
578 | msg = ntohs(ctlh->messageType); | ||
579 | if (msg > 0 && msg <= PPTP_MSG_MAX && reqlen < pptp_msg_size[msg]) | ||
580 | return NF_ACCEPT; | ||
581 | if (reqlen > sizeof(*pptpReq)) | ||
582 | reqlen = sizeof(*pptpReq); | ||
583 | |||
584 | pptpReq = skb_header_pointer(*pskb, nexthdr_off, reqlen, &_pptpReq); | ||
585 | if (!pptpReq) | ||
586 | return NF_ACCEPT; | ||
587 | |||
715 | oldsstate = info->sstate; | 588 | oldsstate = info->sstate; |
716 | oldcstate = info->cstate; | 589 | oldcstate = info->cstate; |
717 | 590 | ||
@@ -721,11 +594,11 @@ conntrack_pptp_help(struct sk_buff **pskb, | |||
721 | * established from PNS->PAC. However, RFC makes no guarantee */ | 594 | * established from PNS->PAC. However, RFC makes no guarantee */ |
722 | if (dir == IP_CT_DIR_ORIGINAL) | 595 | if (dir == IP_CT_DIR_ORIGINAL) |
723 | /* client -> server (PNS -> PAC) */ | 596 | /* client -> server (PNS -> PAC) */ |
724 | ret = pptp_outbound_pkt(pskb, tcph, nexthdr_off, datalen, ct, | 597 | ret = pptp_outbound_pkt(pskb, ctlh, pptpReq, reqlen, ct, |
725 | ctinfo); | 598 | ctinfo); |
726 | else | 599 | else |
727 | /* server -> client (PAC -> PNS) */ | 600 | /* server -> client (PAC -> PNS) */ |
728 | ret = pptp_inbound_pkt(pskb, tcph, nexthdr_off, datalen, ct, | 601 | ret = pptp_inbound_pkt(pskb, ctlh, pptpReq, reqlen, ct, |
729 | ctinfo); | 602 | ctinfo); |
730 | DEBUGP("sstate: %d->%d, cstate: %d->%d\n", | 603 | DEBUGP("sstate: %d->%d, cstate: %d->%d\n", |
731 | oldsstate, info->sstate, oldcstate, info->cstate); | 604 | oldsstate, info->sstate, oldcstate, info->cstate); |
@@ -735,30 +608,31 @@ conntrack_pptp_help(struct sk_buff **pskb, | |||
735 | } | 608 | } |
736 | 609 | ||
737 | /* control protocol helper */ | 610 | /* control protocol helper */ |
738 | static struct ip_conntrack_helper pptp = { | 611 | static struct ip_conntrack_helper pptp = { |
739 | .list = { NULL, NULL }, | 612 | .list = { NULL, NULL }, |
740 | .name = "pptp", | 613 | .name = "pptp", |
741 | .me = THIS_MODULE, | 614 | .me = THIS_MODULE, |
742 | .max_expected = 2, | 615 | .max_expected = 2, |
743 | .timeout = 5 * 60, | 616 | .timeout = 5 * 60, |
744 | .tuple = { .src = { .ip = 0, | 617 | .tuple = { .src = { .ip = 0, |
745 | .u = { .tcp = { .port = | 618 | .u = { .tcp = { .port = |
746 | __constant_htons(PPTP_CONTROL_PORT) } } | 619 | __constant_htons(PPTP_CONTROL_PORT) } } |
747 | }, | 620 | }, |
748 | .dst = { .ip = 0, | 621 | .dst = { .ip = 0, |
749 | .u = { .all = 0 }, | 622 | .u = { .all = 0 }, |
750 | .protonum = IPPROTO_TCP | 623 | .protonum = IPPROTO_TCP |
751 | } | 624 | } |
752 | }, | 625 | }, |
753 | .mask = { .src = { .ip = 0, | 626 | .mask = { .src = { .ip = 0, |
754 | .u = { .tcp = { .port = __constant_htons(0xffff) } } | 627 | .u = { .tcp = { .port = __constant_htons(0xffff) } } |
755 | }, | 628 | }, |
756 | .dst = { .ip = 0, | 629 | .dst = { .ip = 0, |
757 | .u = { .all = 0 }, | 630 | .u = { .all = 0 }, |
758 | .protonum = 0xff | 631 | .protonum = 0xff |
759 | } | 632 | } |
760 | }, | 633 | }, |
761 | .help = conntrack_pptp_help | 634 | .help = conntrack_pptp_help, |
635 | .destroy = pptp_destroy_siblings, | ||
762 | }; | 636 | }; |
763 | 637 | ||
764 | extern void ip_ct_proto_gre_fini(void); | 638 | extern void ip_ct_proto_gre_fini(void); |
@@ -768,7 +642,7 @@ extern int __init ip_ct_proto_gre_init(void); | |||
768 | static int __init ip_conntrack_helper_pptp_init(void) | 642 | static int __init ip_conntrack_helper_pptp_init(void) |
769 | { | 643 | { |
770 | int retcode; | 644 | int retcode; |
771 | 645 | ||
772 | retcode = ip_ct_proto_gre_init(); | 646 | retcode = ip_ct_proto_gre_init(); |
773 | if (retcode < 0) | 647 | if (retcode < 0) |
774 | return retcode; | 648 | return retcode; |
diff --git a/net/ipv4/netfilter/ip_conntrack_netbios_ns.c b/net/ipv4/netfilter/ip_conntrack_netbios_ns.c index a566a81325b2..3d0b438783db 100644 --- a/net/ipv4/netfilter/ip_conntrack_netbios_ns.c +++ b/net/ipv4/netfilter/ip_conntrack_netbios_ns.c | |||
@@ -21,6 +21,7 @@ | |||
21 | #include <linux/skbuff.h> | 21 | #include <linux/skbuff.h> |
22 | #include <linux/netdevice.h> | 22 | #include <linux/netdevice.h> |
23 | #include <linux/inetdevice.h> | 23 | #include <linux/inetdevice.h> |
24 | #include <linux/if_addr.h> | ||
24 | #include <linux/in.h> | 25 | #include <linux/in.h> |
25 | #include <linux/ip.h> | 26 | #include <linux/ip.h> |
26 | #include <net/route.h> | 27 | #include <net/route.h> |
diff --git a/net/ipv4/netfilter/ip_conntrack_netlink.c b/net/ipv4/netfilter/ip_conntrack_netlink.c index 0d4cc92391fa..52eddea27e93 100644 --- a/net/ipv4/netfilter/ip_conntrack_netlink.c +++ b/net/ipv4/netfilter/ip_conntrack_netlink.c | |||
@@ -329,11 +329,7 @@ static int ctnetlink_conntrack_event(struct notifier_block *this, | |||
329 | /* dump everything */ | 329 | /* dump everything */ |
330 | events = ~0UL; | 330 | events = ~0UL; |
331 | group = NFNLGRP_CONNTRACK_NEW; | 331 | group = NFNLGRP_CONNTRACK_NEW; |
332 | } else if (events & (IPCT_STATUS | | 332 | } else if (events & (IPCT_STATUS | IPCT_PROTOINFO)) { |
333 | IPCT_PROTOINFO | | ||
334 | IPCT_HELPER | | ||
335 | IPCT_HELPINFO | | ||
336 | IPCT_NATINFO)) { | ||
337 | type = IPCTNL_MSG_CT_NEW; | 333 | type = IPCTNL_MSG_CT_NEW; |
338 | group = NFNLGRP_CONNTRACK_UPDATE; | 334 | group = NFNLGRP_CONNTRACK_UPDATE; |
339 | } else | 335 | } else |
@@ -385,6 +381,10 @@ static int ctnetlink_conntrack_event(struct notifier_block *this, | |||
385 | ctnetlink_dump_counters(skb, ct, IP_CT_DIR_REPLY) < 0) | 381 | ctnetlink_dump_counters(skb, ct, IP_CT_DIR_REPLY) < 0) |
386 | goto nfattr_failure; | 382 | goto nfattr_failure; |
387 | 383 | ||
384 | if (events & IPCT_MARK | ||
385 | && ctnetlink_dump_mark(skb, ct) < 0) | ||
386 | goto nfattr_failure; | ||
387 | |||
388 | nlh->nlmsg_len = skb->tail - b; | 388 | nlh->nlmsg_len = skb->tail - b; |
389 | nfnetlink_send(skb, 0, group, 0); | 389 | nfnetlink_send(skb, 0, group, 0); |
390 | return NOTIFY_DONE; | 390 | return NOTIFY_DONE; |
@@ -436,6 +436,11 @@ restart: | |||
436 | cb->args[1] = (unsigned long)ct; | 436 | cb->args[1] = (unsigned long)ct; |
437 | goto out; | 437 | goto out; |
438 | } | 438 | } |
439 | #ifdef CONFIG_NF_CT_ACCT | ||
440 | if (NFNL_MSG_TYPE(cb->nlh->nlmsg_type) == | ||
441 | IPCTNL_MSG_CT_GET_CTRZERO) | ||
442 | memset(&ct->counters, 0, sizeof(ct->counters)); | ||
443 | #endif | ||
439 | } | 444 | } |
440 | if (cb->args[1]) { | 445 | if (cb->args[1]) { |
441 | cb->args[1] = 0; | 446 | cb->args[1] = 0; |
@@ -451,46 +456,6 @@ out: | |||
451 | return skb->len; | 456 | return skb->len; |
452 | } | 457 | } |
453 | 458 | ||
454 | #ifdef CONFIG_IP_NF_CT_ACCT | ||
455 | static int | ||
456 | ctnetlink_dump_table_w(struct sk_buff *skb, struct netlink_callback *cb) | ||
457 | { | ||
458 | struct ip_conntrack *ct = NULL; | ||
459 | struct ip_conntrack_tuple_hash *h; | ||
460 | struct list_head *i; | ||
461 | u_int32_t *id = (u_int32_t *) &cb->args[1]; | ||
462 | |||
463 | DEBUGP("entered %s, last bucket=%u id=%u\n", __FUNCTION__, | ||
464 | cb->args[0], *id); | ||
465 | |||
466 | write_lock_bh(&ip_conntrack_lock); | ||
467 | for (; cb->args[0] < ip_conntrack_htable_size; cb->args[0]++, *id = 0) { | ||
468 | list_for_each_prev(i, &ip_conntrack_hash[cb->args[0]]) { | ||
469 | h = (struct ip_conntrack_tuple_hash *) i; | ||
470 | if (DIRECTION(h) != IP_CT_DIR_ORIGINAL) | ||
471 | continue; | ||
472 | ct = tuplehash_to_ctrack(h); | ||
473 | if (ct->id <= *id) | ||
474 | continue; | ||
475 | if (ctnetlink_fill_info(skb, NETLINK_CB(cb->skb).pid, | ||
476 | cb->nlh->nlmsg_seq, | ||
477 | IPCTNL_MSG_CT_NEW, | ||
478 | 1, ct) < 0) | ||
479 | goto out; | ||
480 | *id = ct->id; | ||
481 | |||
482 | memset(&ct->counters, 0, sizeof(ct->counters)); | ||
483 | } | ||
484 | } | ||
485 | out: | ||
486 | write_unlock_bh(&ip_conntrack_lock); | ||
487 | |||
488 | DEBUGP("leaving, last bucket=%lu id=%u\n", cb->args[0], *id); | ||
489 | |||
490 | return skb->len; | ||
491 | } | ||
492 | #endif | ||
493 | |||
494 | static const size_t cta_min_ip[CTA_IP_MAX] = { | 459 | static const size_t cta_min_ip[CTA_IP_MAX] = { |
495 | [CTA_IP_V4_SRC-1] = sizeof(u_int32_t), | 460 | [CTA_IP_V4_SRC-1] = sizeof(u_int32_t), |
496 | [CTA_IP_V4_DST-1] = sizeof(u_int32_t), | 461 | [CTA_IP_V4_DST-1] = sizeof(u_int32_t), |
@@ -775,22 +740,14 @@ ctnetlink_get_conntrack(struct sock *ctnl, struct sk_buff *skb, | |||
775 | if (msg->nfgen_family != AF_INET) | 740 | if (msg->nfgen_family != AF_INET) |
776 | return -EAFNOSUPPORT; | 741 | return -EAFNOSUPPORT; |
777 | 742 | ||
778 | if (NFNL_MSG_TYPE(nlh->nlmsg_type) == | 743 | #ifndef CONFIG_IP_NF_CT_ACCT |
779 | IPCTNL_MSG_CT_GET_CTRZERO) { | 744 | if (NFNL_MSG_TYPE(nlh->nlmsg_type) == IPCTNL_MSG_CT_GET_CTRZERO) |
780 | #ifdef CONFIG_IP_NF_CT_ACCT | ||
781 | if ((*errp = netlink_dump_start(ctnl, skb, nlh, | ||
782 | ctnetlink_dump_table_w, | ||
783 | ctnetlink_done)) != 0) | ||
784 | return -EINVAL; | ||
785 | #else | ||
786 | return -ENOTSUPP; | 745 | return -ENOTSUPP; |
787 | #endif | 746 | #endif |
788 | } else { | 747 | if ((*errp = netlink_dump_start(ctnl, skb, nlh, |
789 | if ((*errp = netlink_dump_start(ctnl, skb, nlh, | 748 | ctnetlink_dump_table, |
790 | ctnetlink_dump_table, | 749 | ctnetlink_done)) != 0) |
791 | ctnetlink_done)) != 0) | ||
792 | return -EINVAL; | 750 | return -EINVAL; |
793 | } | ||
794 | 751 | ||
795 | rlen = NLMSG_ALIGN(nlh->nlmsg_len); | 752 | rlen = NLMSG_ALIGN(nlh->nlmsg_len); |
796 | if (rlen > skb->len) | 753 | if (rlen > skb->len) |
@@ -1253,6 +1210,9 @@ static int ctnetlink_expect_event(struct notifier_block *this, | |||
1253 | } else | 1210 | } else |
1254 | return NOTIFY_DONE; | 1211 | return NOTIFY_DONE; |
1255 | 1212 | ||
1213 | if (!nfnetlink_has_listeners(NFNLGRP_CONNTRACK_EXP_NEW)) | ||
1214 | return NOTIFY_DONE; | ||
1215 | |||
1256 | skb = alloc_skb(NLMSG_GOODSIZE, GFP_ATOMIC); | 1216 | skb = alloc_skb(NLMSG_GOODSIZE, GFP_ATOMIC); |
1257 | if (!skb) | 1217 | if (!skb) |
1258 | return NOTIFY_DONE; | 1218 | return NOTIFY_DONE; |
diff --git a/net/ipv4/netfilter/ip_conntrack_proto_generic.c b/net/ipv4/netfilter/ip_conntrack_proto_generic.c index f891308b5e4c..36f2b5e5d80a 100644 --- a/net/ipv4/netfilter/ip_conntrack_proto_generic.c +++ b/net/ipv4/netfilter/ip_conntrack_proto_generic.c | |||
@@ -12,7 +12,7 @@ | |||
12 | #include <linux/netfilter.h> | 12 | #include <linux/netfilter.h> |
13 | #include <linux/netfilter_ipv4/ip_conntrack_protocol.h> | 13 | #include <linux/netfilter_ipv4/ip_conntrack_protocol.h> |
14 | 14 | ||
15 | unsigned int ip_ct_generic_timeout = 600*HZ; | 15 | unsigned int ip_ct_generic_timeout __read_mostly = 600*HZ; |
16 | 16 | ||
17 | static int generic_pkt_to_tuple(const struct sk_buff *skb, | 17 | static int generic_pkt_to_tuple(const struct sk_buff *skb, |
18 | unsigned int dataoff, | 18 | unsigned int dataoff, |
diff --git a/net/ipv4/netfilter/ip_conntrack_proto_gre.c b/net/ipv4/netfilter/ip_conntrack_proto_gre.c index 4ee016c427b4..5fe026f467d3 100644 --- a/net/ipv4/netfilter/ip_conntrack_proto_gre.c +++ b/net/ipv4/netfilter/ip_conntrack_proto_gre.c | |||
@@ -1,15 +1,15 @@ | |||
1 | /* | 1 | /* |
2 | * ip_conntrack_proto_gre.c - Version 3.0 | 2 | * ip_conntrack_proto_gre.c - Version 3.0 |
3 | * | 3 | * |
4 | * Connection tracking protocol helper module for GRE. | 4 | * Connection tracking protocol helper module for GRE. |
5 | * | 5 | * |
6 | * GRE is a generic encapsulation protocol, which is generally not very | 6 | * GRE is a generic encapsulation protocol, which is generally not very |
7 | * suited for NAT, as it has no protocol-specific part as port numbers. | 7 | * suited for NAT, as it has no protocol-specific part as port numbers. |
8 | * | 8 | * |
9 | * It has an optional key field, which may help us distinguishing two | 9 | * It has an optional key field, which may help us distinguishing two |
10 | * connections between the same two hosts. | 10 | * connections between the same two hosts. |
11 | * | 11 | * |
12 | * GRE is defined in RFC 1701 and RFC 1702, as well as RFC 2784 | 12 | * GRE is defined in RFC 1701 and RFC 1702, as well as RFC 2784 |
13 | * | 13 | * |
14 | * PPTP is built on top of a modified version of GRE, and has a mandatory | 14 | * PPTP is built on top of a modified version of GRE, and has a mandatory |
15 | * field called "CallID", which serves us for the same purpose as the key | 15 | * field called "CallID", which serves us for the same purpose as the key |
@@ -37,7 +37,6 @@ static DEFINE_RWLOCK(ip_ct_gre_lock); | |||
37 | #define ASSERT_READ_LOCK(x) | 37 | #define ASSERT_READ_LOCK(x) |
38 | #define ASSERT_WRITE_LOCK(x) | 38 | #define ASSERT_WRITE_LOCK(x) |
39 | 39 | ||
40 | #include <linux/netfilter_ipv4/listhelp.h> | ||
41 | #include <linux/netfilter_ipv4/ip_conntrack_protocol.h> | 40 | #include <linux/netfilter_ipv4/ip_conntrack_protocol.h> |
42 | #include <linux/netfilter_ipv4/ip_conntrack_helper.h> | 41 | #include <linux/netfilter_ipv4/ip_conntrack_helper.h> |
43 | #include <linux/netfilter_ipv4/ip_conntrack_core.h> | 42 | #include <linux/netfilter_ipv4/ip_conntrack_core.h> |
@@ -62,7 +61,7 @@ MODULE_DESCRIPTION("netfilter connection tracking protocol helper for GRE"); | |||
62 | #define DEBUGP(x, args...) | 61 | #define DEBUGP(x, args...) |
63 | #define DUMP_TUPLE_GRE(x) | 62 | #define DUMP_TUPLE_GRE(x) |
64 | #endif | 63 | #endif |
65 | 64 | ||
66 | /* GRE KEYMAP HANDLING FUNCTIONS */ | 65 | /* GRE KEYMAP HANDLING FUNCTIONS */ |
67 | static LIST_HEAD(gre_keymap_list); | 66 | static LIST_HEAD(gre_keymap_list); |
68 | 67 | ||
@@ -82,12 +81,14 @@ static __be16 gre_keymap_lookup(struct ip_conntrack_tuple *t) | |||
82 | __be16 key = 0; | 81 | __be16 key = 0; |
83 | 82 | ||
84 | read_lock_bh(&ip_ct_gre_lock); | 83 | read_lock_bh(&ip_ct_gre_lock); |
85 | km = LIST_FIND(&gre_keymap_list, gre_key_cmpfn, | 84 | list_for_each_entry(km, &gre_keymap_list, list) { |
86 | struct ip_ct_gre_keymap *, t); | 85 | if (gre_key_cmpfn(km, t)) { |
87 | if (km) | 86 | key = km->tuple.src.u.gre.key; |
88 | key = km->tuple.src.u.gre.key; | 87 | break; |
88 | } | ||
89 | } | ||
89 | read_unlock_bh(&ip_ct_gre_lock); | 90 | read_unlock_bh(&ip_ct_gre_lock); |
90 | 91 | ||
91 | DEBUGP("lookup src key 0x%x up key for ", key); | 92 | DEBUGP("lookup src key 0x%x up key for ", key); |
92 | DUMP_TUPLE_GRE(t); | 93 | DUMP_TUPLE_GRE(t); |
93 | 94 | ||
@@ -99,28 +100,25 @@ int | |||
99 | ip_ct_gre_keymap_add(struct ip_conntrack *ct, | 100 | ip_ct_gre_keymap_add(struct ip_conntrack *ct, |
100 | struct ip_conntrack_tuple *t, int reply) | 101 | struct ip_conntrack_tuple *t, int reply) |
101 | { | 102 | { |
102 | struct ip_ct_gre_keymap **exist_km, *km, *old; | 103 | struct ip_ct_gre_keymap **exist_km, *km; |
103 | 104 | ||
104 | if (!ct->helper || strcmp(ct->helper->name, "pptp")) { | 105 | if (!ct->helper || strcmp(ct->helper->name, "pptp")) { |
105 | DEBUGP("refusing to add GRE keymap to non-pptp session\n"); | 106 | DEBUGP("refusing to add GRE keymap to non-pptp session\n"); |
106 | return -1; | 107 | return -1; |
107 | } | 108 | } |
108 | 109 | ||
109 | if (!reply) | 110 | if (!reply) |
110 | exist_km = &ct->help.ct_pptp_info.keymap_orig; | 111 | exist_km = &ct->help.ct_pptp_info.keymap_orig; |
111 | else | 112 | else |
112 | exist_km = &ct->help.ct_pptp_info.keymap_reply; | 113 | exist_km = &ct->help.ct_pptp_info.keymap_reply; |
113 | 114 | ||
114 | if (*exist_km) { | 115 | if (*exist_km) { |
115 | /* check whether it's a retransmission */ | 116 | /* check whether it's a retransmission */ |
116 | old = LIST_FIND(&gre_keymap_list, gre_key_cmpfn, | 117 | list_for_each_entry(km, &gre_keymap_list, list) { |
117 | struct ip_ct_gre_keymap *, t); | 118 | if (gre_key_cmpfn(km, t) && km == *exist_km) |
118 | if (old == *exist_km) { | 119 | return 0; |
119 | DEBUGP("retransmission\n"); | ||
120 | return 0; | ||
121 | } | 120 | } |
122 | 121 | DEBUGP("trying to override keymap_%s for ct %p\n", | |
123 | DEBUGP("trying to override keymap_%s for ct %p\n", | ||
124 | reply? "reply":"orig", ct); | 122 | reply? "reply":"orig", ct); |
125 | return -EEXIST; | 123 | return -EEXIST; |
126 | } | 124 | } |
@@ -136,7 +134,7 @@ ip_ct_gre_keymap_add(struct ip_conntrack *ct, | |||
136 | DUMP_TUPLE_GRE(&km->tuple); | 134 | DUMP_TUPLE_GRE(&km->tuple); |
137 | 135 | ||
138 | write_lock_bh(&ip_ct_gre_lock); | 136 | write_lock_bh(&ip_ct_gre_lock); |
139 | list_append(&gre_keymap_list, km); | 137 | list_add_tail(&km->list, &gre_keymap_list); |
140 | write_unlock_bh(&ip_ct_gre_lock); | 138 | write_unlock_bh(&ip_ct_gre_lock); |
141 | 139 | ||
142 | return 0; | 140 | return 0; |
@@ -154,7 +152,7 @@ void ip_ct_gre_keymap_destroy(struct ip_conntrack *ct) | |||
154 | 152 | ||
155 | write_lock_bh(&ip_ct_gre_lock); | 153 | write_lock_bh(&ip_ct_gre_lock); |
156 | if (ct->help.ct_pptp_info.keymap_orig) { | 154 | if (ct->help.ct_pptp_info.keymap_orig) { |
157 | DEBUGP("removing %p from list\n", | 155 | DEBUGP("removing %p from list\n", |
158 | ct->help.ct_pptp_info.keymap_orig); | 156 | ct->help.ct_pptp_info.keymap_orig); |
159 | list_del(&ct->help.ct_pptp_info.keymap_orig->list); | 157 | list_del(&ct->help.ct_pptp_info.keymap_orig->list); |
160 | kfree(ct->help.ct_pptp_info.keymap_orig); | 158 | kfree(ct->help.ct_pptp_info.keymap_orig); |
@@ -222,7 +220,7 @@ static int gre_pkt_to_tuple(const struct sk_buff *skb, | |||
222 | static int gre_print_tuple(struct seq_file *s, | 220 | static int gre_print_tuple(struct seq_file *s, |
223 | const struct ip_conntrack_tuple *tuple) | 221 | const struct ip_conntrack_tuple *tuple) |
224 | { | 222 | { |
225 | return seq_printf(s, "srckey=0x%x dstkey=0x%x ", | 223 | return seq_printf(s, "srckey=0x%x dstkey=0x%x ", |
226 | ntohs(tuple->src.u.gre.key), | 224 | ntohs(tuple->src.u.gre.key), |
227 | ntohs(tuple->dst.u.gre.key)); | 225 | ntohs(tuple->dst.u.gre.key)); |
228 | } | 226 | } |
@@ -252,14 +250,14 @@ static int gre_packet(struct ip_conntrack *ct, | |||
252 | } else | 250 | } else |
253 | ip_ct_refresh_acct(ct, conntrackinfo, skb, | 251 | ip_ct_refresh_acct(ct, conntrackinfo, skb, |
254 | ct->proto.gre.timeout); | 252 | ct->proto.gre.timeout); |
255 | 253 | ||
256 | return NF_ACCEPT; | 254 | return NF_ACCEPT; |
257 | } | 255 | } |
258 | 256 | ||
259 | /* Called when a new connection for this protocol found. */ | 257 | /* Called when a new connection for this protocol found. */ |
260 | static int gre_new(struct ip_conntrack *ct, | 258 | static int gre_new(struct ip_conntrack *ct, |
261 | const struct sk_buff *skb) | 259 | const struct sk_buff *skb) |
262 | { | 260 | { |
263 | DEBUGP(": "); | 261 | DEBUGP(": "); |
264 | DUMP_TUPLE_GRE(&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple); | 262 | DUMP_TUPLE_GRE(&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple); |
265 | 263 | ||
@@ -285,9 +283,9 @@ static void gre_destroy(struct ip_conntrack *ct) | |||
285 | } | 283 | } |
286 | 284 | ||
287 | /* protocol helper struct */ | 285 | /* protocol helper struct */ |
288 | static struct ip_conntrack_protocol gre = { | 286 | static struct ip_conntrack_protocol gre = { |
289 | .proto = IPPROTO_GRE, | 287 | .proto = IPPROTO_GRE, |
290 | .name = "gre", | 288 | .name = "gre", |
291 | .pkt_to_tuple = gre_pkt_to_tuple, | 289 | .pkt_to_tuple = gre_pkt_to_tuple, |
292 | .invert_tuple = gre_invert_tuple, | 290 | .invert_tuple = gre_invert_tuple, |
293 | .print_tuple = gre_print_tuple, | 291 | .print_tuple = gre_print_tuple, |
@@ -325,7 +323,7 @@ void ip_ct_proto_gre_fini(void) | |||
325 | } | 323 | } |
326 | write_unlock_bh(&ip_ct_gre_lock); | 324 | write_unlock_bh(&ip_ct_gre_lock); |
327 | 325 | ||
328 | ip_conntrack_protocol_unregister(&gre); | 326 | ip_conntrack_protocol_unregister(&gre); |
329 | } | 327 | } |
330 | 328 | ||
331 | EXPORT_SYMBOL(ip_ct_gre_keymap_add); | 329 | EXPORT_SYMBOL(ip_ct_gre_keymap_add); |
diff --git a/net/ipv4/netfilter/ip_conntrack_proto_icmp.c b/net/ipv4/netfilter/ip_conntrack_proto_icmp.c index 23f1c504586d..09c40ebe3345 100644 --- a/net/ipv4/netfilter/ip_conntrack_proto_icmp.c +++ b/net/ipv4/netfilter/ip_conntrack_proto_icmp.c | |||
@@ -21,7 +21,7 @@ | |||
21 | #include <linux/netfilter_ipv4/ip_conntrack_core.h> | 21 | #include <linux/netfilter_ipv4/ip_conntrack_core.h> |
22 | #include <linux/netfilter_ipv4/ip_conntrack_protocol.h> | 22 | #include <linux/netfilter_ipv4/ip_conntrack_protocol.h> |
23 | 23 | ||
24 | unsigned int ip_ct_icmp_timeout = 30*HZ; | 24 | unsigned int ip_ct_icmp_timeout __read_mostly = 30*HZ; |
25 | 25 | ||
26 | #if 0 | 26 | #if 0 |
27 | #define DEBUGP printk | 27 | #define DEBUGP printk |
diff --git a/net/ipv4/netfilter/ip_conntrack_proto_sctp.c b/net/ipv4/netfilter/ip_conntrack_proto_sctp.c index 2d3612cd5f18..b908a4842e18 100644 --- a/net/ipv4/netfilter/ip_conntrack_proto_sctp.c +++ b/net/ipv4/netfilter/ip_conntrack_proto_sctp.c | |||
@@ -58,13 +58,13 @@ static const char *sctp_conntrack_names[] = { | |||
58 | #define HOURS * 60 MINS | 58 | #define HOURS * 60 MINS |
59 | #define DAYS * 24 HOURS | 59 | #define DAYS * 24 HOURS |
60 | 60 | ||
61 | static unsigned int ip_ct_sctp_timeout_closed = 10 SECS; | 61 | static unsigned int ip_ct_sctp_timeout_closed __read_mostly = 10 SECS; |
62 | static unsigned int ip_ct_sctp_timeout_cookie_wait = 3 SECS; | 62 | static unsigned int ip_ct_sctp_timeout_cookie_wait __read_mostly = 3 SECS; |
63 | static unsigned int ip_ct_sctp_timeout_cookie_echoed = 3 SECS; | 63 | static unsigned int ip_ct_sctp_timeout_cookie_echoed __read_mostly = 3 SECS; |
64 | static unsigned int ip_ct_sctp_timeout_established = 5 DAYS; | 64 | static unsigned int ip_ct_sctp_timeout_established __read_mostly = 5 DAYS; |
65 | static unsigned int ip_ct_sctp_timeout_shutdown_sent = 300 SECS / 1000; | 65 | static unsigned int ip_ct_sctp_timeout_shutdown_sent __read_mostly = 300 SECS / 1000; |
66 | static unsigned int ip_ct_sctp_timeout_shutdown_recd = 300 SECS / 1000; | 66 | static unsigned int ip_ct_sctp_timeout_shutdown_recd __read_mostly = 300 SECS / 1000; |
67 | static unsigned int ip_ct_sctp_timeout_shutdown_ack_sent = 3 SECS; | 67 | static unsigned int ip_ct_sctp_timeout_shutdown_ack_sent __read_mostly = 3 SECS; |
68 | 68 | ||
69 | static const unsigned int * sctp_timeouts[] | 69 | static const unsigned int * sctp_timeouts[] |
70 | = { NULL, /* SCTP_CONNTRACK_NONE */ | 70 | = { NULL, /* SCTP_CONNTRACK_NONE */ |
diff --git a/net/ipv4/netfilter/ip_conntrack_proto_tcp.c b/net/ipv4/netfilter/ip_conntrack_proto_tcp.c index fb920e76ec10..03ae9a04cb37 100644 --- a/net/ipv4/netfilter/ip_conntrack_proto_tcp.c +++ b/net/ipv4/netfilter/ip_conntrack_proto_tcp.c | |||
@@ -48,19 +48,19 @@ static DEFINE_RWLOCK(tcp_lock); | |||
48 | /* "Be conservative in what you do, | 48 | /* "Be conservative in what you do, |
49 | be liberal in what you accept from others." | 49 | be liberal in what you accept from others." |
50 | If it's non-zero, we mark only out of window RST segments as INVALID. */ | 50 | If it's non-zero, we mark only out of window RST segments as INVALID. */ |
51 | int ip_ct_tcp_be_liberal = 0; | 51 | int ip_ct_tcp_be_liberal __read_mostly = 0; |
52 | 52 | ||
53 | /* When connection is picked up from the middle, how many packets are required | 53 | /* When connection is picked up from the middle, how many packets are required |
54 | to pass in each direction when we assume we are in sync - if any side uses | 54 | to pass in each direction when we assume we are in sync - if any side uses |
55 | window scaling, we lost the game. | 55 | window scaling, we lost the game. |
56 | If it is set to zero, we disable picking up already established | 56 | If it is set to zero, we disable picking up already established |
57 | connections. */ | 57 | connections. */ |
58 | int ip_ct_tcp_loose = 3; | 58 | int ip_ct_tcp_loose __read_mostly = 3; |
59 | 59 | ||
60 | /* Max number of the retransmitted packets without receiving an (acceptable) | 60 | /* Max number of the retransmitted packets without receiving an (acceptable) |
61 | ACK from the destination. If this number is reached, a shorter timer | 61 | ACK from the destination. If this number is reached, a shorter timer |
62 | will be started. */ | 62 | will be started. */ |
63 | int ip_ct_tcp_max_retrans = 3; | 63 | int ip_ct_tcp_max_retrans __read_mostly = 3; |
64 | 64 | ||
65 | /* FIXME: Examine ipfilter's timeouts and conntrack transitions more | 65 | /* FIXME: Examine ipfilter's timeouts and conntrack transitions more |
66 | closely. They're more complex. --RR */ | 66 | closely. They're more complex. --RR */ |
@@ -83,19 +83,19 @@ static const char *tcp_conntrack_names[] = { | |||
83 | #define HOURS * 60 MINS | 83 | #define HOURS * 60 MINS |
84 | #define DAYS * 24 HOURS | 84 | #define DAYS * 24 HOURS |
85 | 85 | ||
86 | unsigned int ip_ct_tcp_timeout_syn_sent = 2 MINS; | 86 | unsigned int ip_ct_tcp_timeout_syn_sent __read_mostly = 2 MINS; |
87 | unsigned int ip_ct_tcp_timeout_syn_recv = 60 SECS; | 87 | unsigned int ip_ct_tcp_timeout_syn_recv __read_mostly = 60 SECS; |
88 | unsigned int ip_ct_tcp_timeout_established = 5 DAYS; | 88 | unsigned int ip_ct_tcp_timeout_established __read_mostly = 5 DAYS; |
89 | unsigned int ip_ct_tcp_timeout_fin_wait = 2 MINS; | 89 | unsigned int ip_ct_tcp_timeout_fin_wait __read_mostly = 2 MINS; |
90 | unsigned int ip_ct_tcp_timeout_close_wait = 60 SECS; | 90 | unsigned int ip_ct_tcp_timeout_close_wait __read_mostly = 60 SECS; |
91 | unsigned int ip_ct_tcp_timeout_last_ack = 30 SECS; | 91 | unsigned int ip_ct_tcp_timeout_last_ack __read_mostly = 30 SECS; |
92 | unsigned int ip_ct_tcp_timeout_time_wait = 2 MINS; | 92 | unsigned int ip_ct_tcp_timeout_time_wait __read_mostly = 2 MINS; |
93 | unsigned int ip_ct_tcp_timeout_close = 10 SECS; | 93 | unsigned int ip_ct_tcp_timeout_close __read_mostly = 10 SECS; |
94 | 94 | ||
95 | /* RFC1122 says the R2 limit should be at least 100 seconds. | 95 | /* RFC1122 says the R2 limit should be at least 100 seconds. |
96 | Linux uses 15 packets as limit, which corresponds | 96 | Linux uses 15 packets as limit, which corresponds |
97 | to ~13-30min depending on RTO. */ | 97 | to ~13-30min depending on RTO. */ |
98 | unsigned int ip_ct_tcp_timeout_max_retrans = 5 MINS; | 98 | unsigned int ip_ct_tcp_timeout_max_retrans __read_mostly = 5 MINS; |
99 | 99 | ||
100 | static const unsigned int * tcp_timeouts[] | 100 | static const unsigned int * tcp_timeouts[] |
101 | = { NULL, /* TCP_CONNTRACK_NONE */ | 101 | = { NULL, /* TCP_CONNTRACK_NONE */ |
@@ -731,13 +731,15 @@ static int tcp_in_window(struct ip_ct_tcp *state, | |||
731 | if (state->last_dir == dir | 731 | if (state->last_dir == dir |
732 | && state->last_seq == seq | 732 | && state->last_seq == seq |
733 | && state->last_ack == ack | 733 | && state->last_ack == ack |
734 | && state->last_end == end) | 734 | && state->last_end == end |
735 | && state->last_win == win) | ||
735 | state->retrans++; | 736 | state->retrans++; |
736 | else { | 737 | else { |
737 | state->last_dir = dir; | 738 | state->last_dir = dir; |
738 | state->last_seq = seq; | 739 | state->last_seq = seq; |
739 | state->last_ack = ack; | 740 | state->last_ack = ack; |
740 | state->last_end = end; | 741 | state->last_end = end; |
742 | state->last_win = win; | ||
741 | state->retrans = 0; | 743 | state->retrans = 0; |
742 | } | 744 | } |
743 | } | 745 | } |
@@ -865,8 +867,7 @@ static int tcp_error(struct sk_buff *skb, | |||
865 | 867 | ||
866 | /* Checksum invalid? Ignore. | 868 | /* Checksum invalid? Ignore. |
867 | * We skip checking packets on the outgoing path | 869 | * We skip checking packets on the outgoing path |
868 | * because the semantic of CHECKSUM_HW is different there | 870 | * because it is assumed to be correct. |
869 | * and moreover root might send raw packets. | ||
870 | */ | 871 | */ |
871 | /* FIXME: Source route IP option packets --RR */ | 872 | /* FIXME: Source route IP option packets --RR */ |
872 | if (ip_conntrack_checksum && hooknum == NF_IP_PRE_ROUTING && | 873 | if (ip_conntrack_checksum && hooknum == NF_IP_PRE_ROUTING && |
diff --git a/net/ipv4/netfilter/ip_conntrack_proto_udp.c b/net/ipv4/netfilter/ip_conntrack_proto_udp.c index 9b2c16b4d2ff..d0e8a16970ec 100644 --- a/net/ipv4/netfilter/ip_conntrack_proto_udp.c +++ b/net/ipv4/netfilter/ip_conntrack_proto_udp.c | |||
@@ -18,8 +18,8 @@ | |||
18 | #include <linux/netfilter_ipv4.h> | 18 | #include <linux/netfilter_ipv4.h> |
19 | #include <linux/netfilter_ipv4/ip_conntrack_protocol.h> | 19 | #include <linux/netfilter_ipv4/ip_conntrack_protocol.h> |
20 | 20 | ||
21 | unsigned int ip_ct_udp_timeout = 30*HZ; | 21 | unsigned int ip_ct_udp_timeout __read_mostly = 30*HZ; |
22 | unsigned int ip_ct_udp_timeout_stream = 180*HZ; | 22 | unsigned int ip_ct_udp_timeout_stream __read_mostly = 180*HZ; |
23 | 23 | ||
24 | static int udp_pkt_to_tuple(const struct sk_buff *skb, | 24 | static int udp_pkt_to_tuple(const struct sk_buff *skb, |
25 | unsigned int dataoff, | 25 | unsigned int dataoff, |
@@ -117,8 +117,7 @@ static int udp_error(struct sk_buff *skb, enum ip_conntrack_info *ctinfo, | |||
117 | 117 | ||
118 | /* Checksum invalid? Ignore. | 118 | /* Checksum invalid? Ignore. |
119 | * We skip checking packets on the outgoing path | 119 | * We skip checking packets on the outgoing path |
120 | * because the semantic of CHECKSUM_HW is different there | 120 | * because the checksum is assumed to be correct. |
121 | * and moreover root might send raw packets. | ||
122 | * FIXME: Source route IP option packets --RR */ | 121 | * FIXME: Source route IP option packets --RR */ |
123 | if (ip_conntrack_checksum && hooknum == NF_IP_PRE_ROUTING && | 122 | if (ip_conntrack_checksum && hooknum == NF_IP_PRE_ROUTING && |
124 | nf_ip_checksum(skb, hooknum, iph->ihl * 4, IPPROTO_UDP)) { | 123 | nf_ip_checksum(skb, hooknum, iph->ihl * 4, IPPROTO_UDP)) { |
diff --git a/net/ipv4/netfilter/ip_conntrack_sip.c b/net/ipv4/netfilter/ip_conntrack_sip.c index 4f222d6be009..2893e9c74850 100644 --- a/net/ipv4/netfilter/ip_conntrack_sip.c +++ b/net/ipv4/netfilter/ip_conntrack_sip.c | |||
@@ -8,7 +8,6 @@ | |||
8 | * published by the Free Software Foundation. | 8 | * published by the Free Software Foundation. |
9 | */ | 9 | */ |
10 | 10 | ||
11 | #include <linux/config.h> | ||
12 | #include <linux/module.h> | 11 | #include <linux/module.h> |
13 | #include <linux/ctype.h> | 12 | #include <linux/ctype.h> |
14 | #include <linux/skbuff.h> | 13 | #include <linux/skbuff.h> |
diff --git a/net/ipv4/netfilter/ip_conntrack_standalone.c b/net/ipv4/netfilter/ip_conntrack_standalone.c index 7a9fa04a467a..02135756562e 100644 --- a/net/ipv4/netfilter/ip_conntrack_standalone.c +++ b/net/ipv4/netfilter/ip_conntrack_standalone.c | |||
@@ -35,7 +35,6 @@ | |||
35 | #include <linux/netfilter_ipv4/ip_conntrack_protocol.h> | 35 | #include <linux/netfilter_ipv4/ip_conntrack_protocol.h> |
36 | #include <linux/netfilter_ipv4/ip_conntrack_core.h> | 36 | #include <linux/netfilter_ipv4/ip_conntrack_core.h> |
37 | #include <linux/netfilter_ipv4/ip_conntrack_helper.h> | 37 | #include <linux/netfilter_ipv4/ip_conntrack_helper.h> |
38 | #include <linux/netfilter_ipv4/listhelp.h> | ||
39 | 38 | ||
40 | #if 0 | 39 | #if 0 |
41 | #define DEBUGP printk | 40 | #define DEBUGP printk |
@@ -534,7 +533,7 @@ static struct nf_hook_ops ip_conntrack_ops[] = { | |||
534 | 533 | ||
535 | /* Sysctl support */ | 534 | /* Sysctl support */ |
536 | 535 | ||
537 | int ip_conntrack_checksum = 1; | 536 | int ip_conntrack_checksum __read_mostly = 1; |
538 | 537 | ||
539 | #ifdef CONFIG_SYSCTL | 538 | #ifdef CONFIG_SYSCTL |
540 | 539 | ||
@@ -563,7 +562,7 @@ extern unsigned int ip_ct_udp_timeout_stream; | |||
563 | /* From ip_conntrack_proto_icmp.c */ | 562 | /* From ip_conntrack_proto_icmp.c */ |
564 | extern unsigned int ip_ct_icmp_timeout; | 563 | extern unsigned int ip_ct_icmp_timeout; |
565 | 564 | ||
566 | /* From ip_conntrack_proto_icmp.c */ | 565 | /* From ip_conntrack_proto_generic.c */ |
567 | extern unsigned int ip_ct_generic_timeout; | 566 | extern unsigned int ip_ct_generic_timeout; |
568 | 567 | ||
569 | /* Log invalid packets of a given protocol */ | 568 | /* Log invalid packets of a given protocol */ |
diff --git a/net/ipv4/netfilter/ip_nat_core.c b/net/ipv4/netfilter/ip_nat_core.c index 1741d555ad0d..71f3e09cbc84 100644 --- a/net/ipv4/netfilter/ip_nat_core.c +++ b/net/ipv4/netfilter/ip_nat_core.c | |||
@@ -22,9 +22,6 @@ | |||
22 | #include <linux/udp.h> | 22 | #include <linux/udp.h> |
23 | #include <linux/jhash.h> | 23 | #include <linux/jhash.h> |
24 | 24 | ||
25 | #define ASSERT_READ_LOCK(x) | ||
26 | #define ASSERT_WRITE_LOCK(x) | ||
27 | |||
28 | #include <linux/netfilter_ipv4/ip_conntrack.h> | 25 | #include <linux/netfilter_ipv4/ip_conntrack.h> |
29 | #include <linux/netfilter_ipv4/ip_conntrack_core.h> | 26 | #include <linux/netfilter_ipv4/ip_conntrack_core.h> |
30 | #include <linux/netfilter_ipv4/ip_conntrack_protocol.h> | 27 | #include <linux/netfilter_ipv4/ip_conntrack_protocol.h> |
@@ -33,7 +30,6 @@ | |||
33 | #include <linux/netfilter_ipv4/ip_nat_core.h> | 30 | #include <linux/netfilter_ipv4/ip_nat_core.h> |
34 | #include <linux/netfilter_ipv4/ip_nat_helper.h> | 31 | #include <linux/netfilter_ipv4/ip_nat_helper.h> |
35 | #include <linux/netfilter_ipv4/ip_conntrack_helper.h> | 32 | #include <linux/netfilter_ipv4/ip_conntrack_helper.h> |
36 | #include <linux/netfilter_ipv4/listhelp.h> | ||
37 | 33 | ||
38 | #if 0 | 34 | #if 0 |
39 | #define DEBUGP printk | 35 | #define DEBUGP printk |
@@ -101,18 +97,6 @@ static void ip_nat_cleanup_conntrack(struct ip_conntrack *conn) | |||
101 | write_unlock_bh(&ip_nat_lock); | 97 | write_unlock_bh(&ip_nat_lock); |
102 | } | 98 | } |
103 | 99 | ||
104 | /* We do checksum mangling, so if they were wrong before they're still | ||
105 | * wrong. Also works for incomplete packets (eg. ICMP dest | ||
106 | * unreachables.) */ | ||
107 | u_int16_t | ||
108 | ip_nat_cheat_check(u_int32_t oldvalinv, u_int32_t newval, u_int16_t oldcheck) | ||
109 | { | ||
110 | u_int32_t diffs[] = { oldvalinv, newval }; | ||
111 | return csum_fold(csum_partial((char *)diffs, sizeof(diffs), | ||
112 | oldcheck^0xFFFF)); | ||
113 | } | ||
114 | EXPORT_SYMBOL(ip_nat_cheat_check); | ||
115 | |||
116 | /* Is this tuple already taken? (not by us) */ | 100 | /* Is this tuple already taken? (not by us) */ |
117 | int | 101 | int |
118 | ip_nat_used_tuple(const struct ip_conntrack_tuple *tuple, | 102 | ip_nat_used_tuple(const struct ip_conntrack_tuple *tuple, |
@@ -378,12 +362,12 @@ manip_pkt(u_int16_t proto, | |||
378 | iph = (void *)(*pskb)->data + iphdroff; | 362 | iph = (void *)(*pskb)->data + iphdroff; |
379 | 363 | ||
380 | if (maniptype == IP_NAT_MANIP_SRC) { | 364 | if (maniptype == IP_NAT_MANIP_SRC) { |
381 | iph->check = ip_nat_cheat_check(~iph->saddr, target->src.ip, | 365 | iph->check = nf_csum_update(~iph->saddr, target->src.ip, |
382 | iph->check); | 366 | iph->check); |
383 | iph->saddr = target->src.ip; | 367 | iph->saddr = target->src.ip; |
384 | } else { | 368 | } else { |
385 | iph->check = ip_nat_cheat_check(~iph->daddr, target->dst.ip, | 369 | iph->check = nf_csum_update(~iph->daddr, target->dst.ip, |
386 | iph->check); | 370 | iph->check); |
387 | iph->daddr = target->dst.ip; | 371 | iph->daddr = target->dst.ip; |
388 | } | 372 | } |
389 | return 1; | 373 | return 1; |
@@ -423,10 +407,10 @@ unsigned int ip_nat_packet(struct ip_conntrack *ct, | |||
423 | EXPORT_SYMBOL_GPL(ip_nat_packet); | 407 | EXPORT_SYMBOL_GPL(ip_nat_packet); |
424 | 408 | ||
425 | /* Dir is direction ICMP is coming from (opposite to packet it contains) */ | 409 | /* Dir is direction ICMP is coming from (opposite to packet it contains) */ |
426 | int ip_nat_icmp_reply_translation(struct sk_buff **pskb, | 410 | int ip_nat_icmp_reply_translation(struct ip_conntrack *ct, |
427 | struct ip_conntrack *ct, | 411 | enum ip_conntrack_info ctinfo, |
428 | enum ip_nat_manip_type manip, | 412 | unsigned int hooknum, |
429 | enum ip_conntrack_dir dir) | 413 | struct sk_buff **pskb) |
430 | { | 414 | { |
431 | struct { | 415 | struct { |
432 | struct icmphdr icmp; | 416 | struct icmphdr icmp; |
@@ -434,7 +418,9 @@ int ip_nat_icmp_reply_translation(struct sk_buff **pskb, | |||
434 | } *inside; | 418 | } *inside; |
435 | struct ip_conntrack_tuple inner, target; | 419 | struct ip_conntrack_tuple inner, target; |
436 | int hdrlen = (*pskb)->nh.iph->ihl * 4; | 420 | int hdrlen = (*pskb)->nh.iph->ihl * 4; |
421 | enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo); | ||
437 | unsigned long statusbit; | 422 | unsigned long statusbit; |
423 | enum ip_nat_manip_type manip = HOOK2MANIP(hooknum); | ||
438 | 424 | ||
439 | if (!skb_make_writable(pskb, hdrlen + sizeof(*inside))) | 425 | if (!skb_make_writable(pskb, hdrlen + sizeof(*inside))) |
440 | return 0; | 426 | return 0; |
@@ -443,12 +429,8 @@ int ip_nat_icmp_reply_translation(struct sk_buff **pskb, | |||
443 | 429 | ||
444 | /* We're actually going to mangle it beyond trivial checksum | 430 | /* We're actually going to mangle it beyond trivial checksum |
445 | adjustment, so make sure the current checksum is correct. */ | 431 | adjustment, so make sure the current checksum is correct. */ |
446 | if ((*pskb)->ip_summed != CHECKSUM_UNNECESSARY) { | 432 | if (nf_ip_checksum(*pskb, hooknum, hdrlen, 0)) |
447 | hdrlen = (*pskb)->nh.iph->ihl * 4; | 433 | return 0; |
448 | if ((u16)csum_fold(skb_checksum(*pskb, hdrlen, | ||
449 | (*pskb)->len - hdrlen, 0))) | ||
450 | return 0; | ||
451 | } | ||
452 | 434 | ||
453 | /* Must be RELATED */ | 435 | /* Must be RELATED */ |
454 | IP_NF_ASSERT((*pskb)->nfctinfo == IP_CT_RELATED || | 436 | IP_NF_ASSERT((*pskb)->nfctinfo == IP_CT_RELATED || |
@@ -487,12 +469,14 @@ int ip_nat_icmp_reply_translation(struct sk_buff **pskb, | |||
487 | !manip)) | 469 | !manip)) |
488 | return 0; | 470 | return 0; |
489 | 471 | ||
490 | /* Reloading "inside" here since manip_pkt inner. */ | 472 | if ((*pskb)->ip_summed != CHECKSUM_PARTIAL) { |
491 | inside = (void *)(*pskb)->data + (*pskb)->nh.iph->ihl*4; | 473 | /* Reloading "inside" here since manip_pkt inner. */ |
492 | inside->icmp.checksum = 0; | 474 | inside = (void *)(*pskb)->data + (*pskb)->nh.iph->ihl*4; |
493 | inside->icmp.checksum = csum_fold(skb_checksum(*pskb, hdrlen, | 475 | inside->icmp.checksum = 0; |
494 | (*pskb)->len - hdrlen, | 476 | inside->icmp.checksum = csum_fold(skb_checksum(*pskb, hdrlen, |
495 | 0)); | 477 | (*pskb)->len - hdrlen, |
478 | 0)); | ||
479 | } | ||
496 | 480 | ||
497 | /* Change outer to look the reply to an incoming packet | 481 | /* Change outer to look the reply to an incoming packet |
498 | * (proto 0 means don't invert per-proto part). */ | 482 | * (proto 0 means don't invert per-proto part). */ |
diff --git a/net/ipv4/netfilter/ip_nat_helper.c b/net/ipv4/netfilter/ip_nat_helper.c index cbcaa45370ae..7f6a75984f6c 100644 --- a/net/ipv4/netfilter/ip_nat_helper.c +++ b/net/ipv4/netfilter/ip_nat_helper.c | |||
@@ -27,16 +27,12 @@ | |||
27 | #include <net/tcp.h> | 27 | #include <net/tcp.h> |
28 | #include <net/udp.h> | 28 | #include <net/udp.h> |
29 | 29 | ||
30 | #define ASSERT_READ_LOCK(x) | ||
31 | #define ASSERT_WRITE_LOCK(x) | ||
32 | |||
33 | #include <linux/netfilter_ipv4/ip_conntrack.h> | 30 | #include <linux/netfilter_ipv4/ip_conntrack.h> |
34 | #include <linux/netfilter_ipv4/ip_conntrack_helper.h> | 31 | #include <linux/netfilter_ipv4/ip_conntrack_helper.h> |
35 | #include <linux/netfilter_ipv4/ip_nat.h> | 32 | #include <linux/netfilter_ipv4/ip_nat.h> |
36 | #include <linux/netfilter_ipv4/ip_nat_protocol.h> | 33 | #include <linux/netfilter_ipv4/ip_nat_protocol.h> |
37 | #include <linux/netfilter_ipv4/ip_nat_core.h> | 34 | #include <linux/netfilter_ipv4/ip_nat_core.h> |
38 | #include <linux/netfilter_ipv4/ip_nat_helper.h> | 35 | #include <linux/netfilter_ipv4/ip_nat_helper.h> |
39 | #include <linux/netfilter_ipv4/listhelp.h> | ||
40 | 36 | ||
41 | #if 0 | 37 | #if 0 |
42 | #define DEBUGP printk | 38 | #define DEBUGP printk |
@@ -165,7 +161,7 @@ ip_nat_mangle_tcp_packet(struct sk_buff **pskb, | |||
165 | { | 161 | { |
166 | struct iphdr *iph; | 162 | struct iphdr *iph; |
167 | struct tcphdr *tcph; | 163 | struct tcphdr *tcph; |
168 | int datalen; | 164 | int oldlen, datalen; |
169 | 165 | ||
170 | if (!skb_make_writable(pskb, (*pskb)->len)) | 166 | if (!skb_make_writable(pskb, (*pskb)->len)) |
171 | return 0; | 167 | return 0; |
@@ -180,13 +176,22 @@ ip_nat_mangle_tcp_packet(struct sk_buff **pskb, | |||
180 | iph = (*pskb)->nh.iph; | 176 | iph = (*pskb)->nh.iph; |
181 | tcph = (void *)iph + iph->ihl*4; | 177 | tcph = (void *)iph + iph->ihl*4; |
182 | 178 | ||
179 | oldlen = (*pskb)->len - iph->ihl*4; | ||
183 | mangle_contents(*pskb, iph->ihl*4 + tcph->doff*4, | 180 | mangle_contents(*pskb, iph->ihl*4 + tcph->doff*4, |
184 | match_offset, match_len, rep_buffer, rep_len); | 181 | match_offset, match_len, rep_buffer, rep_len); |
185 | 182 | ||
186 | datalen = (*pskb)->len - iph->ihl*4; | 183 | datalen = (*pskb)->len - iph->ihl*4; |
187 | tcph->check = 0; | 184 | if ((*pskb)->ip_summed != CHECKSUM_PARTIAL) { |
188 | tcph->check = tcp_v4_check(tcph, datalen, iph->saddr, iph->daddr, | 185 | tcph->check = 0; |
189 | csum_partial((char *)tcph, datalen, 0)); | 186 | tcph->check = tcp_v4_check(tcph, datalen, |
187 | iph->saddr, iph->daddr, | ||
188 | csum_partial((char *)tcph, | ||
189 | datalen, 0)); | ||
190 | } else | ||
191 | tcph->check = nf_proto_csum_update(*pskb, | ||
192 | htons(oldlen) ^ 0xFFFF, | ||
193 | htons(datalen), | ||
194 | tcph->check, 1); | ||
190 | 195 | ||
191 | if (rep_len != match_len) { | 196 | if (rep_len != match_len) { |
192 | set_bit(IPS_SEQ_ADJUST_BIT, &ct->status); | 197 | set_bit(IPS_SEQ_ADJUST_BIT, &ct->status); |
@@ -221,6 +226,7 @@ ip_nat_mangle_udp_packet(struct sk_buff **pskb, | |||
221 | { | 226 | { |
222 | struct iphdr *iph; | 227 | struct iphdr *iph; |
223 | struct udphdr *udph; | 228 | struct udphdr *udph; |
229 | int datalen, oldlen; | ||
224 | 230 | ||
225 | /* UDP helpers might accidentally mangle the wrong packet */ | 231 | /* UDP helpers might accidentally mangle the wrong packet */ |
226 | iph = (*pskb)->nh.iph; | 232 | iph = (*pskb)->nh.iph; |
@@ -238,22 +244,32 @@ ip_nat_mangle_udp_packet(struct sk_buff **pskb, | |||
238 | 244 | ||
239 | iph = (*pskb)->nh.iph; | 245 | iph = (*pskb)->nh.iph; |
240 | udph = (void *)iph + iph->ihl*4; | 246 | udph = (void *)iph + iph->ihl*4; |
247 | |||
248 | oldlen = (*pskb)->len - iph->ihl*4; | ||
241 | mangle_contents(*pskb, iph->ihl*4 + sizeof(*udph), | 249 | mangle_contents(*pskb, iph->ihl*4 + sizeof(*udph), |
242 | match_offset, match_len, rep_buffer, rep_len); | 250 | match_offset, match_len, rep_buffer, rep_len); |
243 | 251 | ||
244 | /* update the length of the UDP packet */ | 252 | /* update the length of the UDP packet */ |
245 | udph->len = htons((*pskb)->len - iph->ihl*4); | 253 | datalen = (*pskb)->len - iph->ihl*4; |
254 | udph->len = htons(datalen); | ||
246 | 255 | ||
247 | /* fix udp checksum if udp checksum was previously calculated */ | 256 | /* fix udp checksum if udp checksum was previously calculated */ |
248 | if (udph->check) { | 257 | if (!udph->check && (*pskb)->ip_summed != CHECKSUM_PARTIAL) |
249 | int datalen = (*pskb)->len - iph->ihl * 4; | 258 | return 1; |
259 | |||
260 | if ((*pskb)->ip_summed != CHECKSUM_PARTIAL) { | ||
250 | udph->check = 0; | 261 | udph->check = 0; |
251 | udph->check = csum_tcpudp_magic(iph->saddr, iph->daddr, | 262 | udph->check = csum_tcpudp_magic(iph->saddr, iph->daddr, |
252 | datalen, IPPROTO_UDP, | 263 | datalen, IPPROTO_UDP, |
253 | csum_partial((char *)udph, | 264 | csum_partial((char *)udph, |
254 | datalen, 0)); | 265 | datalen, 0)); |
255 | } | 266 | if (!udph->check) |
256 | 267 | udph->check = -1; | |
268 | } else | ||
269 | udph->check = nf_proto_csum_update(*pskb, | ||
270 | htons(oldlen) ^ 0xFFFF, | ||
271 | htons(datalen), | ||
272 | udph->check, 1); | ||
257 | return 1; | 273 | return 1; |
258 | } | 274 | } |
259 | EXPORT_SYMBOL(ip_nat_mangle_udp_packet); | 275 | EXPORT_SYMBOL(ip_nat_mangle_udp_packet); |
@@ -293,11 +309,14 @@ sack_adjust(struct sk_buff *skb, | |||
293 | ntohl(sack->start_seq), new_start_seq, | 309 | ntohl(sack->start_seq), new_start_seq, |
294 | ntohl(sack->end_seq), new_end_seq); | 310 | ntohl(sack->end_seq), new_end_seq); |
295 | 311 | ||
296 | tcph->check = | 312 | tcph->check = nf_proto_csum_update(skb, |
297 | ip_nat_cheat_check(~sack->start_seq, new_start_seq, | 313 | ~sack->start_seq, |
298 | ip_nat_cheat_check(~sack->end_seq, | 314 | new_start_seq, |
299 | new_end_seq, | 315 | tcph->check, 0); |
300 | tcph->check)); | 316 | tcph->check = nf_proto_csum_update(skb, |
317 | ~sack->end_seq, | ||
318 | new_end_seq, | ||
319 | tcph->check, 0); | ||
301 | sack->start_seq = new_start_seq; | 320 | sack->start_seq = new_start_seq; |
302 | sack->end_seq = new_end_seq; | 321 | sack->end_seq = new_end_seq; |
303 | sackoff += sizeof(*sack); | 322 | sackoff += sizeof(*sack); |
@@ -381,10 +400,10 @@ ip_nat_seq_adjust(struct sk_buff **pskb, | |||
381 | newack = ntohl(tcph->ack_seq) - other_way->offset_before; | 400 | newack = ntohl(tcph->ack_seq) - other_way->offset_before; |
382 | newack = htonl(newack); | 401 | newack = htonl(newack); |
383 | 402 | ||
384 | tcph->check = ip_nat_cheat_check(~tcph->seq, newseq, | 403 | tcph->check = nf_proto_csum_update(*pskb, ~tcph->seq, newseq, |
385 | ip_nat_cheat_check(~tcph->ack_seq, | 404 | tcph->check, 0); |
386 | newack, | 405 | tcph->check = nf_proto_csum_update(*pskb, ~tcph->ack_seq, newack, |
387 | tcph->check)); | 406 | tcph->check, 0); |
388 | 407 | ||
389 | DEBUGP("Adjusting sequence number from %u->%u, ack from %u->%u\n", | 408 | DEBUGP("Adjusting sequence number from %u->%u, ack from %u->%u\n", |
390 | ntohl(tcph->seq), ntohl(newseq), ntohl(tcph->ack_seq), | 409 | ntohl(tcph->seq), ntohl(newseq), ntohl(tcph->ack_seq), |
diff --git a/net/ipv4/netfilter/ip_nat_helper_pptp.c b/net/ipv4/netfilter/ip_nat_helper_pptp.c index 1d149964dc38..2ff578807123 100644 --- a/net/ipv4/netfilter/ip_nat_helper_pptp.c +++ b/net/ipv4/netfilter/ip_nat_helper_pptp.c | |||
@@ -32,7 +32,7 @@ | |||
32 | * 2005-06-10 - Version 3.0 | 32 | * 2005-06-10 - Version 3.0 |
33 | * - kernel >= 2.6.11 version, | 33 | * - kernel >= 2.6.11 version, |
34 | * funded by Oxcoda NetBox Blue (http://www.netboxblue.com/) | 34 | * funded by Oxcoda NetBox Blue (http://www.netboxblue.com/) |
35 | * | 35 | * |
36 | */ | 36 | */ |
37 | 37 | ||
38 | #include <linux/module.h> | 38 | #include <linux/module.h> |
@@ -85,19 +85,17 @@ static void pptp_nat_expected(struct ip_conntrack *ct, | |||
85 | DEBUGP("we are PNS->PAC\n"); | 85 | DEBUGP("we are PNS->PAC\n"); |
86 | /* therefore, build tuple for PAC->PNS */ | 86 | /* therefore, build tuple for PAC->PNS */ |
87 | t.src.ip = master->tuplehash[IP_CT_DIR_REPLY].tuple.src.ip; | 87 | t.src.ip = master->tuplehash[IP_CT_DIR_REPLY].tuple.src.ip; |
88 | t.src.u.gre.key = htons(master->help.ct_pptp_info.pac_call_id); | 88 | t.src.u.gre.key = master->help.ct_pptp_info.pac_call_id; |
89 | t.dst.ip = master->tuplehash[IP_CT_DIR_REPLY].tuple.dst.ip; | 89 | t.dst.ip = master->tuplehash[IP_CT_DIR_REPLY].tuple.dst.ip; |
90 | t.dst.u.gre.key = htons(master->help.ct_pptp_info.pns_call_id); | 90 | t.dst.u.gre.key = master->help.ct_pptp_info.pns_call_id; |
91 | t.dst.protonum = IPPROTO_GRE; | 91 | t.dst.protonum = IPPROTO_GRE; |
92 | } else { | 92 | } else { |
93 | DEBUGP("we are PAC->PNS\n"); | 93 | DEBUGP("we are PAC->PNS\n"); |
94 | /* build tuple for PNS->PAC */ | 94 | /* build tuple for PNS->PAC */ |
95 | t.src.ip = master->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.ip; | 95 | t.src.ip = master->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.ip; |
96 | t.src.u.gre.key = | 96 | t.src.u.gre.key = master->nat.help.nat_pptp_info.pns_call_id; |
97 | htons(master->nat.help.nat_pptp_info.pns_call_id); | ||
98 | t.dst.ip = master->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.ip; | 97 | t.dst.ip = master->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.ip; |
99 | t.dst.u.gre.key = | 98 | t.dst.u.gre.key = master->nat.help.nat_pptp_info.pac_call_id; |
100 | htons(master->nat.help.nat_pptp_info.pac_call_id); | ||
101 | t.dst.protonum = IPPROTO_GRE; | 99 | t.dst.protonum = IPPROTO_GRE; |
102 | } | 100 | } |
103 | 101 | ||
@@ -149,51 +147,52 @@ pptp_outbound_pkt(struct sk_buff **pskb, | |||
149 | { | 147 | { |
150 | struct ip_ct_pptp_master *ct_pptp_info = &ct->help.ct_pptp_info; | 148 | struct ip_ct_pptp_master *ct_pptp_info = &ct->help.ct_pptp_info; |
151 | struct ip_nat_pptp *nat_pptp_info = &ct->nat.help.nat_pptp_info; | 149 | struct ip_nat_pptp *nat_pptp_info = &ct->nat.help.nat_pptp_info; |
152 | u_int16_t msg, new_callid; | 150 | u_int16_t msg; |
151 | __be16 new_callid; | ||
153 | unsigned int cid_off; | 152 | unsigned int cid_off; |
154 | 153 | ||
155 | new_callid = htons(ct_pptp_info->pns_call_id); | 154 | new_callid = ct_pptp_info->pns_call_id; |
156 | 155 | ||
157 | switch (msg = ntohs(ctlh->messageType)) { | 156 | switch (msg = ntohs(ctlh->messageType)) { |
158 | case PPTP_OUT_CALL_REQUEST: | 157 | case PPTP_OUT_CALL_REQUEST: |
159 | cid_off = offsetof(union pptp_ctrl_union, ocreq.callID); | 158 | cid_off = offsetof(union pptp_ctrl_union, ocreq.callID); |
160 | /* FIXME: ideally we would want to reserve a call ID | 159 | /* FIXME: ideally we would want to reserve a call ID |
161 | * here. current netfilter NAT core is not able to do | 160 | * here. current netfilter NAT core is not able to do |
162 | * this :( For now we use TCP source port. This breaks | 161 | * this :( For now we use TCP source port. This breaks |
163 | * multiple calls within one control session */ | 162 | * multiple calls within one control session */ |
164 | 163 | ||
165 | /* save original call ID in nat_info */ | 164 | /* save original call ID in nat_info */ |
166 | nat_pptp_info->pns_call_id = ct_pptp_info->pns_call_id; | 165 | nat_pptp_info->pns_call_id = ct_pptp_info->pns_call_id; |
167 | 166 | ||
168 | /* don't use tcph->source since we are at a DSTmanip | 167 | /* don't use tcph->source since we are at a DSTmanip |
169 | * hook (e.g. PREROUTING) and pkt is not mangled yet */ | 168 | * hook (e.g. PREROUTING) and pkt is not mangled yet */ |
170 | new_callid = ct->tuplehash[IP_CT_DIR_REPLY].tuple.dst.u.tcp.port; | 169 | new_callid = ct->tuplehash[IP_CT_DIR_REPLY].tuple.dst.u.tcp.port; |
171 | 170 | ||
172 | /* save new call ID in ct info */ | 171 | /* save new call ID in ct info */ |
173 | ct_pptp_info->pns_call_id = ntohs(new_callid); | 172 | ct_pptp_info->pns_call_id = new_callid; |
174 | break; | 173 | break; |
175 | case PPTP_IN_CALL_REPLY: | 174 | case PPTP_IN_CALL_REPLY: |
176 | cid_off = offsetof(union pptp_ctrl_union, icreq.callID); | 175 | cid_off = offsetof(union pptp_ctrl_union, icack.callID); |
177 | break; | 176 | break; |
178 | case PPTP_CALL_CLEAR_REQUEST: | 177 | case PPTP_CALL_CLEAR_REQUEST: |
179 | cid_off = offsetof(union pptp_ctrl_union, clrreq.callID); | 178 | cid_off = offsetof(union pptp_ctrl_union, clrreq.callID); |
180 | break; | 179 | break; |
181 | default: | 180 | default: |
182 | DEBUGP("unknown outbound packet 0x%04x:%s\n", msg, | 181 | DEBUGP("unknown outbound packet 0x%04x:%s\n", msg, |
183 | (msg <= PPTP_MSG_MAX)? | 182 | (msg <= PPTP_MSG_MAX)? |
184 | pptp_msg_name[msg]:pptp_msg_name[0]); | 183 | pptp_msg_name[msg]:pptp_msg_name[0]); |
185 | /* fall through */ | 184 | /* fall through */ |
186 | 185 | ||
187 | case PPTP_SET_LINK_INFO: | 186 | case PPTP_SET_LINK_INFO: |
188 | /* only need to NAT in case PAC is behind NAT box */ | 187 | /* only need to NAT in case PAC is behind NAT box */ |
189 | case PPTP_START_SESSION_REQUEST: | 188 | case PPTP_START_SESSION_REQUEST: |
190 | case PPTP_START_SESSION_REPLY: | 189 | case PPTP_START_SESSION_REPLY: |
191 | case PPTP_STOP_SESSION_REQUEST: | 190 | case PPTP_STOP_SESSION_REQUEST: |
192 | case PPTP_STOP_SESSION_REPLY: | 191 | case PPTP_STOP_SESSION_REPLY: |
193 | case PPTP_ECHO_REQUEST: | 192 | case PPTP_ECHO_REQUEST: |
194 | case PPTP_ECHO_REPLY: | 193 | case PPTP_ECHO_REPLY: |
195 | /* no need to alter packet */ | 194 | /* no need to alter packet */ |
196 | return NF_ACCEPT; | 195 | return NF_ACCEPT; |
197 | } | 196 | } |
198 | 197 | ||
199 | /* only OUT_CALL_REQUEST, IN_CALL_REPLY, CALL_CLEAR_REQUEST pass | 198 | /* only OUT_CALL_REQUEST, IN_CALL_REPLY, CALL_CLEAR_REQUEST pass |
@@ -212,80 +211,28 @@ pptp_outbound_pkt(struct sk_buff **pskb, | |||
212 | return NF_ACCEPT; | 211 | return NF_ACCEPT; |
213 | } | 212 | } |
214 | 213 | ||
215 | static int | 214 | static void |
216 | pptp_exp_gre(struct ip_conntrack_expect *expect_orig, | 215 | pptp_exp_gre(struct ip_conntrack_expect *expect_orig, |
217 | struct ip_conntrack_expect *expect_reply) | 216 | struct ip_conntrack_expect *expect_reply) |
218 | { | 217 | { |
219 | struct ip_ct_pptp_master *ct_pptp_info = | ||
220 | &expect_orig->master->help.ct_pptp_info; | ||
221 | struct ip_nat_pptp *nat_pptp_info = | ||
222 | &expect_orig->master->nat.help.nat_pptp_info; | ||
223 | |||
224 | struct ip_conntrack *ct = expect_orig->master; | 218 | struct ip_conntrack *ct = expect_orig->master; |
225 | 219 | struct ip_ct_pptp_master *ct_pptp_info = &ct->help.ct_pptp_info; | |
226 | struct ip_conntrack_tuple inv_t; | 220 | struct ip_nat_pptp *nat_pptp_info = &ct->nat.help.nat_pptp_info; |
227 | struct ip_conntrack_tuple *orig_t, *reply_t; | ||
228 | 221 | ||
229 | /* save original PAC call ID in nat_info */ | 222 | /* save original PAC call ID in nat_info */ |
230 | nat_pptp_info->pac_call_id = ct_pptp_info->pac_call_id; | 223 | nat_pptp_info->pac_call_id = ct_pptp_info->pac_call_id; |
231 | 224 | ||
232 | /* alter expectation */ | ||
233 | orig_t = &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple; | ||
234 | reply_t = &ct->tuplehash[IP_CT_DIR_REPLY].tuple; | ||
235 | |||
236 | /* alter expectation for PNS->PAC direction */ | 225 | /* alter expectation for PNS->PAC direction */ |
237 | invert_tuplepr(&inv_t, &expect_orig->tuple); | 226 | expect_orig->saved_proto.gre.key = ct_pptp_info->pns_call_id; |
238 | expect_orig->saved_proto.gre.key = htons(ct_pptp_info->pns_call_id); | 227 | expect_orig->tuple.src.u.gre.key = nat_pptp_info->pns_call_id; |
239 | expect_orig->tuple.src.u.gre.key = htons(nat_pptp_info->pns_call_id); | 228 | expect_orig->tuple.dst.u.gre.key = ct_pptp_info->pac_call_id; |
240 | expect_orig->tuple.dst.u.gre.key = htons(ct_pptp_info->pac_call_id); | ||
241 | expect_orig->dir = IP_CT_DIR_ORIGINAL; | 229 | expect_orig->dir = IP_CT_DIR_ORIGINAL; |
242 | inv_t.src.ip = reply_t->src.ip; | ||
243 | inv_t.dst.ip = reply_t->dst.ip; | ||
244 | inv_t.src.u.gre.key = htons(nat_pptp_info->pac_call_id); | ||
245 | inv_t.dst.u.gre.key = htons(ct_pptp_info->pns_call_id); | ||
246 | |||
247 | if (!ip_conntrack_expect_related(expect_orig)) { | ||
248 | DEBUGP("successfully registered expect\n"); | ||
249 | } else { | ||
250 | DEBUGP("can't expect_related(expect_orig)\n"); | ||
251 | return 1; | ||
252 | } | ||
253 | 230 | ||
254 | /* alter expectation for PAC->PNS direction */ | 231 | /* alter expectation for PAC->PNS direction */ |
255 | invert_tuplepr(&inv_t, &expect_reply->tuple); | 232 | expect_reply->saved_proto.gre.key = nat_pptp_info->pns_call_id; |
256 | expect_reply->saved_proto.gre.key = htons(nat_pptp_info->pns_call_id); | 233 | expect_reply->tuple.src.u.gre.key = nat_pptp_info->pac_call_id; |
257 | expect_reply->tuple.src.u.gre.key = htons(nat_pptp_info->pac_call_id); | 234 | expect_reply->tuple.dst.u.gre.key = ct_pptp_info->pns_call_id; |
258 | expect_reply->tuple.dst.u.gre.key = htons(ct_pptp_info->pns_call_id); | ||
259 | expect_reply->dir = IP_CT_DIR_REPLY; | 235 | expect_reply->dir = IP_CT_DIR_REPLY; |
260 | inv_t.src.ip = orig_t->src.ip; | ||
261 | inv_t.dst.ip = orig_t->dst.ip; | ||
262 | inv_t.src.u.gre.key = htons(nat_pptp_info->pns_call_id); | ||
263 | inv_t.dst.u.gre.key = htons(ct_pptp_info->pac_call_id); | ||
264 | |||
265 | if (!ip_conntrack_expect_related(expect_reply)) { | ||
266 | DEBUGP("successfully registered expect\n"); | ||
267 | } else { | ||
268 | DEBUGP("can't expect_related(expect_reply)\n"); | ||
269 | ip_conntrack_unexpect_related(expect_orig); | ||
270 | return 1; | ||
271 | } | ||
272 | |||
273 | if (ip_ct_gre_keymap_add(ct, &expect_reply->tuple, 0) < 0) { | ||
274 | DEBUGP("can't register original keymap\n"); | ||
275 | ip_conntrack_unexpect_related(expect_orig); | ||
276 | ip_conntrack_unexpect_related(expect_reply); | ||
277 | return 1; | ||
278 | } | ||
279 | |||
280 | if (ip_ct_gre_keymap_add(ct, &inv_t, 1) < 0) { | ||
281 | DEBUGP("can't register reply keymap\n"); | ||
282 | ip_conntrack_unexpect_related(expect_orig); | ||
283 | ip_conntrack_unexpect_related(expect_reply); | ||
284 | ip_ct_gre_keymap_destroy(ct); | ||
285 | return 1; | ||
286 | } | ||
287 | |||
288 | return 0; | ||
289 | } | 236 | } |
290 | 237 | ||
291 | /* inbound packets == from PAC to PNS */ | 238 | /* inbound packets == from PAC to PNS */ |
@@ -297,15 +244,15 @@ pptp_inbound_pkt(struct sk_buff **pskb, | |||
297 | union pptp_ctrl_union *pptpReq) | 244 | union pptp_ctrl_union *pptpReq) |
298 | { | 245 | { |
299 | struct ip_nat_pptp *nat_pptp_info = &ct->nat.help.nat_pptp_info; | 246 | struct ip_nat_pptp *nat_pptp_info = &ct->nat.help.nat_pptp_info; |
300 | u_int16_t msg, new_cid = 0, new_pcid; | 247 | u_int16_t msg; |
301 | unsigned int pcid_off, cid_off = 0; | 248 | __be16 new_pcid; |
249 | unsigned int pcid_off; | ||
302 | 250 | ||
303 | new_pcid = htons(nat_pptp_info->pns_call_id); | 251 | new_pcid = nat_pptp_info->pns_call_id; |
304 | 252 | ||
305 | switch (msg = ntohs(ctlh->messageType)) { | 253 | switch (msg = ntohs(ctlh->messageType)) { |
306 | case PPTP_OUT_CALL_REPLY: | 254 | case PPTP_OUT_CALL_REPLY: |
307 | pcid_off = offsetof(union pptp_ctrl_union, ocack.peersCallID); | 255 | pcid_off = offsetof(union pptp_ctrl_union, ocack.peersCallID); |
308 | cid_off = offsetof(union pptp_ctrl_union, ocack.callID); | ||
309 | break; | 256 | break; |
310 | case PPTP_IN_CALL_CONNECT: | 257 | case PPTP_IN_CALL_CONNECT: |
311 | pcid_off = offsetof(union pptp_ctrl_union, iccon.peersCallID); | 258 | pcid_off = offsetof(union pptp_ctrl_union, iccon.peersCallID); |
@@ -324,7 +271,7 @@ pptp_inbound_pkt(struct sk_buff **pskb, | |||
324 | break; | 271 | break; |
325 | 272 | ||
326 | default: | 273 | default: |
327 | DEBUGP("unknown inbound packet %s\n", (msg <= PPTP_MSG_MAX)? | 274 | DEBUGP("unknown inbound packet %s\n", (msg <= PPTP_MSG_MAX)? |
328 | pptp_msg_name[msg]:pptp_msg_name[0]); | 275 | pptp_msg_name[msg]:pptp_msg_name[0]); |
329 | /* fall through */ | 276 | /* fall through */ |
330 | 277 | ||
@@ -351,17 +298,6 @@ pptp_inbound_pkt(struct sk_buff **pskb, | |||
351 | sizeof(new_pcid), (char *)&new_pcid, | 298 | sizeof(new_pcid), (char *)&new_pcid, |
352 | sizeof(new_pcid)) == 0) | 299 | sizeof(new_pcid)) == 0) |
353 | return NF_DROP; | 300 | return NF_DROP; |
354 | |||
355 | if (new_cid) { | ||
356 | DEBUGP("altering call id from 0x%04x to 0x%04x\n", | ||
357 | ntohs(REQ_CID(pptpReq, cid_off)), ntohs(new_cid)); | ||
358 | if (ip_nat_mangle_tcp_packet(pskb, ct, ctinfo, | ||
359 | cid_off + sizeof(struct pptp_pkt_hdr) + | ||
360 | sizeof(struct PptpControlHeader), | ||
361 | sizeof(new_cid), (char *)&new_cid, | ||
362 | sizeof(new_cid)) == 0) | ||
363 | return NF_DROP; | ||
364 | } | ||
365 | return NF_ACCEPT; | 301 | return NF_ACCEPT; |
366 | } | 302 | } |
367 | 303 | ||
diff --git a/net/ipv4/netfilter/ip_nat_proto_gre.c b/net/ipv4/netfilter/ip_nat_proto_gre.c index 38acfdf540eb..bf91f9312b3c 100644 --- a/net/ipv4/netfilter/ip_nat_proto_gre.c +++ b/net/ipv4/netfilter/ip_nat_proto_gre.c | |||
@@ -6,10 +6,10 @@ | |||
6 | * GRE is a generic encapsulation protocol, which is generally not very | 6 | * GRE is a generic encapsulation protocol, which is generally not very |
7 | * suited for NAT, as it has no protocol-specific part as port numbers. | 7 | * suited for NAT, as it has no protocol-specific part as port numbers. |
8 | * | 8 | * |
9 | * It has an optional key field, which may help us distinguishing two | 9 | * It has an optional key field, which may help us distinguishing two |
10 | * connections between the same two hosts. | 10 | * connections between the same two hosts. |
11 | * | 11 | * |
12 | * GRE is defined in RFC 1701 and RFC 1702, as well as RFC 2784 | 12 | * GRE is defined in RFC 1701 and RFC 1702, as well as RFC 2784 |
13 | * | 13 | * |
14 | * PPTP is built on top of a modified version of GRE, and has a mandatory | 14 | * PPTP is built on top of a modified version of GRE, and has a mandatory |
15 | * field called "CallID", which serves us for the same purpose as the key | 15 | * field called "CallID", which serves us for the same purpose as the key |
@@ -60,14 +60,14 @@ gre_in_range(const struct ip_conntrack_tuple *tuple, | |||
60 | } | 60 | } |
61 | 61 | ||
62 | /* generate unique tuple ... */ | 62 | /* generate unique tuple ... */ |
63 | static int | 63 | static int |
64 | gre_unique_tuple(struct ip_conntrack_tuple *tuple, | 64 | gre_unique_tuple(struct ip_conntrack_tuple *tuple, |
65 | const struct ip_nat_range *range, | 65 | const struct ip_nat_range *range, |
66 | enum ip_nat_manip_type maniptype, | 66 | enum ip_nat_manip_type maniptype, |
67 | const struct ip_conntrack *conntrack) | 67 | const struct ip_conntrack *conntrack) |
68 | { | 68 | { |
69 | static u_int16_t key; | 69 | static u_int16_t key; |
70 | u_int16_t *keyptr; | 70 | __be16 *keyptr; |
71 | unsigned int min, i, range_size; | 71 | unsigned int min, i, range_size; |
72 | 72 | ||
73 | if (maniptype == IP_NAT_MANIP_SRC) | 73 | if (maniptype == IP_NAT_MANIP_SRC) |
@@ -84,7 +84,7 @@ gre_unique_tuple(struct ip_conntrack_tuple *tuple, | |||
84 | range_size = ntohs(range->max.gre.key) - min + 1; | 84 | range_size = ntohs(range->max.gre.key) - min + 1; |
85 | } | 85 | } |
86 | 86 | ||
87 | DEBUGP("min = %u, range_size = %u\n", min, range_size); | 87 | DEBUGP("min = %u, range_size = %u\n", min, range_size); |
88 | 88 | ||
89 | for (i = 0; i < range_size; i++, key++) { | 89 | for (i = 0; i < range_size; i++, key++) { |
90 | *keyptr = htons(min + key % range_size); | 90 | *keyptr = htons(min + key % range_size); |
@@ -117,7 +117,7 @@ gre_manip_pkt(struct sk_buff **pskb, | |||
117 | greh = (void *)(*pskb)->data + hdroff; | 117 | greh = (void *)(*pskb)->data + hdroff; |
118 | pgreh = (struct gre_hdr_pptp *) greh; | 118 | pgreh = (struct gre_hdr_pptp *) greh; |
119 | 119 | ||
120 | /* we only have destination manip of a packet, since 'source key' | 120 | /* we only have destination manip of a packet, since 'source key' |
121 | * is not present in the packet itself */ | 121 | * is not present in the packet itself */ |
122 | if (maniptype == IP_NAT_MANIP_DST) { | 122 | if (maniptype == IP_NAT_MANIP_DST) { |
123 | /* key manipulation is always dest */ | 123 | /* key manipulation is always dest */ |
@@ -129,15 +129,16 @@ gre_manip_pkt(struct sk_buff **pskb, | |||
129 | } | 129 | } |
130 | if (greh->csum) { | 130 | if (greh->csum) { |
131 | /* FIXME: Never tested this code... */ | 131 | /* FIXME: Never tested this code... */ |
132 | *(gre_csum(greh)) = | 132 | *(gre_csum(greh)) = |
133 | ip_nat_cheat_check(~*(gre_key(greh)), | 133 | nf_proto_csum_update(*pskb, |
134 | ~*(gre_key(greh)), | ||
134 | tuple->dst.u.gre.key, | 135 | tuple->dst.u.gre.key, |
135 | *(gre_csum(greh))); | 136 | *(gre_csum(greh)), 0); |
136 | } | 137 | } |
137 | *(gre_key(greh)) = tuple->dst.u.gre.key; | 138 | *(gre_key(greh)) = tuple->dst.u.gre.key; |
138 | break; | 139 | break; |
139 | case GRE_VERSION_PPTP: | 140 | case GRE_VERSION_PPTP: |
140 | DEBUGP("call_id -> 0x%04x\n", | 141 | DEBUGP("call_id -> 0x%04x\n", |
141 | ntohs(tuple->dst.u.gre.key)); | 142 | ntohs(tuple->dst.u.gre.key)); |
142 | pgreh->call_id = tuple->dst.u.gre.key; | 143 | pgreh->call_id = tuple->dst.u.gre.key; |
143 | break; | 144 | break; |
@@ -151,8 +152,8 @@ gre_manip_pkt(struct sk_buff **pskb, | |||
151 | } | 152 | } |
152 | 153 | ||
153 | /* nat helper struct */ | 154 | /* nat helper struct */ |
154 | static struct ip_nat_protocol gre = { | 155 | static struct ip_nat_protocol gre = { |
155 | .name = "GRE", | 156 | .name = "GRE", |
156 | .protonum = IPPROTO_GRE, | 157 | .protonum = IPPROTO_GRE, |
157 | .manip_pkt = gre_manip_pkt, | 158 | .manip_pkt = gre_manip_pkt, |
158 | .in_range = gre_in_range, | 159 | .in_range = gre_in_range, |
@@ -163,7 +164,7 @@ static struct ip_nat_protocol gre = { | |||
163 | .nfattr_to_range = ip_nat_port_nfattr_to_range, | 164 | .nfattr_to_range = ip_nat_port_nfattr_to_range, |
164 | #endif | 165 | #endif |
165 | }; | 166 | }; |
166 | 167 | ||
167 | int __init ip_nat_proto_gre_init(void) | 168 | int __init ip_nat_proto_gre_init(void) |
168 | { | 169 | { |
169 | return ip_nat_protocol_register(&gre); | 170 | return ip_nat_protocol_register(&gre); |
diff --git a/net/ipv4/netfilter/ip_nat_proto_icmp.c b/net/ipv4/netfilter/ip_nat_proto_icmp.c index 31a3f4ccb99c..ec50cc295317 100644 --- a/net/ipv4/netfilter/ip_nat_proto_icmp.c +++ b/net/ipv4/netfilter/ip_nat_proto_icmp.c | |||
@@ -66,10 +66,10 @@ icmp_manip_pkt(struct sk_buff **pskb, | |||
66 | return 0; | 66 | return 0; |
67 | 67 | ||
68 | hdr = (struct icmphdr *)((*pskb)->data + hdroff); | 68 | hdr = (struct icmphdr *)((*pskb)->data + hdroff); |
69 | 69 | hdr->checksum = nf_proto_csum_update(*pskb, | |
70 | hdr->checksum = ip_nat_cheat_check(hdr->un.echo.id ^ 0xFFFF, | 70 | hdr->un.echo.id ^ 0xFFFF, |
71 | tuple->src.u.icmp.id, | 71 | tuple->src.u.icmp.id, |
72 | hdr->checksum); | 72 | hdr->checksum, 0); |
73 | hdr->un.echo.id = tuple->src.u.icmp.id; | 73 | hdr->un.echo.id = tuple->src.u.icmp.id; |
74 | return 1; | 74 | return 1; |
75 | } | 75 | } |
diff --git a/net/ipv4/netfilter/ip_nat_proto_tcp.c b/net/ipv4/netfilter/ip_nat_proto_tcp.c index a3d14079eba6..72a6307bd2db 100644 --- a/net/ipv4/netfilter/ip_nat_proto_tcp.c +++ b/net/ipv4/netfilter/ip_nat_proto_tcp.c | |||
@@ -129,10 +129,9 @@ tcp_manip_pkt(struct sk_buff **pskb, | |||
129 | if (hdrsize < sizeof(*hdr)) | 129 | if (hdrsize < sizeof(*hdr)) |
130 | return 1; | 130 | return 1; |
131 | 131 | ||
132 | hdr->check = ip_nat_cheat_check(~oldip, newip, | 132 | hdr->check = nf_proto_csum_update(*pskb, ~oldip, newip, hdr->check, 1); |
133 | ip_nat_cheat_check(oldport ^ 0xFFFF, | 133 | hdr->check = nf_proto_csum_update(*pskb, oldport ^ 0xFFFF, newport, |
134 | newport, | 134 | hdr->check, 0); |
135 | hdr->check)); | ||
136 | return 1; | 135 | return 1; |
137 | } | 136 | } |
138 | 137 | ||
diff --git a/net/ipv4/netfilter/ip_nat_proto_udp.c b/net/ipv4/netfilter/ip_nat_proto_udp.c index ec6053fdc867..5da196ae758c 100644 --- a/net/ipv4/netfilter/ip_nat_proto_udp.c +++ b/net/ipv4/netfilter/ip_nat_proto_udp.c | |||
@@ -113,11 +113,16 @@ udp_manip_pkt(struct sk_buff **pskb, | |||
113 | newport = tuple->dst.u.udp.port; | 113 | newport = tuple->dst.u.udp.port; |
114 | portptr = &hdr->dest; | 114 | portptr = &hdr->dest; |
115 | } | 115 | } |
116 | if (hdr->check) /* 0 is a special case meaning no checksum */ | 116 | |
117 | hdr->check = ip_nat_cheat_check(~oldip, newip, | 117 | if (hdr->check || (*pskb)->ip_summed == CHECKSUM_PARTIAL) { |
118 | ip_nat_cheat_check(*portptr ^ 0xFFFF, | 118 | hdr->check = nf_proto_csum_update(*pskb, ~oldip, newip, |
119 | newport, | 119 | hdr->check, 1); |
120 | hdr->check)); | 120 | hdr->check = nf_proto_csum_update(*pskb, |
121 | *portptr ^ 0xFFFF, newport, | ||
122 | hdr->check, 0); | ||
123 | if (!hdr->check) | ||
124 | hdr->check = -1; | ||
125 | } | ||
121 | *portptr = newport; | 126 | *portptr = newport; |
122 | return 1; | 127 | return 1; |
123 | } | 128 | } |
diff --git a/net/ipv4/netfilter/ip_nat_rule.c b/net/ipv4/netfilter/ip_nat_rule.c index 1aba926c1cb0..7b703839aa58 100644 --- a/net/ipv4/netfilter/ip_nat_rule.c +++ b/net/ipv4/netfilter/ip_nat_rule.c | |||
@@ -19,14 +19,10 @@ | |||
19 | #include <net/route.h> | 19 | #include <net/route.h> |
20 | #include <linux/bitops.h> | 20 | #include <linux/bitops.h> |
21 | 21 | ||
22 | #define ASSERT_READ_LOCK(x) | ||
23 | #define ASSERT_WRITE_LOCK(x) | ||
24 | |||
25 | #include <linux/netfilter_ipv4/ip_tables.h> | 22 | #include <linux/netfilter_ipv4/ip_tables.h> |
26 | #include <linux/netfilter_ipv4/ip_nat.h> | 23 | #include <linux/netfilter_ipv4/ip_nat.h> |
27 | #include <linux/netfilter_ipv4/ip_nat_core.h> | 24 | #include <linux/netfilter_ipv4/ip_nat_core.h> |
28 | #include <linux/netfilter_ipv4/ip_nat_rule.h> | 25 | #include <linux/netfilter_ipv4/ip_nat_rule.h> |
29 | #include <linux/netfilter_ipv4/listhelp.h> | ||
30 | 26 | ||
31 | #if 0 | 27 | #if 0 |
32 | #define DEBUGP printk | 28 | #define DEBUGP printk |
@@ -104,8 +100,7 @@ static unsigned int ipt_snat_target(struct sk_buff **pskb, | |||
104 | const struct net_device *out, | 100 | const struct net_device *out, |
105 | unsigned int hooknum, | 101 | unsigned int hooknum, |
106 | const struct ipt_target *target, | 102 | const struct ipt_target *target, |
107 | const void *targinfo, | 103 | const void *targinfo) |
108 | void *userinfo) | ||
109 | { | 104 | { |
110 | struct ip_conntrack *ct; | 105 | struct ip_conntrack *ct; |
111 | enum ip_conntrack_info ctinfo; | 106 | enum ip_conntrack_info ctinfo; |
@@ -147,8 +142,7 @@ static unsigned int ipt_dnat_target(struct sk_buff **pskb, | |||
147 | const struct net_device *out, | 142 | const struct net_device *out, |
148 | unsigned int hooknum, | 143 | unsigned int hooknum, |
149 | const struct ipt_target *target, | 144 | const struct ipt_target *target, |
150 | const void *targinfo, | 145 | const void *targinfo) |
151 | void *userinfo) | ||
152 | { | 146 | { |
153 | struct ip_conntrack *ct; | 147 | struct ip_conntrack *ct; |
154 | enum ip_conntrack_info ctinfo; | 148 | enum ip_conntrack_info ctinfo; |
@@ -174,7 +168,6 @@ static int ipt_snat_checkentry(const char *tablename, | |||
174 | const void *entry, | 168 | const void *entry, |
175 | const struct ipt_target *target, | 169 | const struct ipt_target *target, |
176 | void *targinfo, | 170 | void *targinfo, |
177 | unsigned int targinfosize, | ||
178 | unsigned int hook_mask) | 171 | unsigned int hook_mask) |
179 | { | 172 | { |
180 | struct ip_nat_multi_range_compat *mr = targinfo; | 173 | struct ip_nat_multi_range_compat *mr = targinfo; |
@@ -191,7 +184,6 @@ static int ipt_dnat_checkentry(const char *tablename, | |||
191 | const void *entry, | 184 | const void *entry, |
192 | const struct ipt_target *target, | 185 | const struct ipt_target *target, |
193 | void *targinfo, | 186 | void *targinfo, |
194 | unsigned int targinfosize, | ||
195 | unsigned int hook_mask) | 187 | unsigned int hook_mask) |
196 | { | 188 | { |
197 | struct ip_nat_multi_range_compat *mr = targinfo; | 189 | struct ip_nat_multi_range_compat *mr = targinfo; |
@@ -255,7 +247,7 @@ int ip_nat_rule_find(struct sk_buff **pskb, | |||
255 | { | 247 | { |
256 | int ret; | 248 | int ret; |
257 | 249 | ||
258 | ret = ipt_do_table(pskb, hooknum, in, out, &nat_table, NULL); | 250 | ret = ipt_do_table(pskb, hooknum, in, out, &nat_table); |
259 | 251 | ||
260 | if (ret == NF_ACCEPT) { | 252 | if (ret == NF_ACCEPT) { |
261 | if (!ip_nat_initialized(ct, HOOK2MANIP(hooknum))) | 253 | if (!ip_nat_initialized(ct, HOOK2MANIP(hooknum))) |
diff --git a/net/ipv4/netfilter/ip_nat_standalone.c b/net/ipv4/netfilter/ip_nat_standalone.c index 17de077a7901..9c577db62047 100644 --- a/net/ipv4/netfilter/ip_nat_standalone.c +++ b/net/ipv4/netfilter/ip_nat_standalone.c | |||
@@ -30,9 +30,6 @@ | |||
30 | #include <net/checksum.h> | 30 | #include <net/checksum.h> |
31 | #include <linux/spinlock.h> | 31 | #include <linux/spinlock.h> |
32 | 32 | ||
33 | #define ASSERT_READ_LOCK(x) | ||
34 | #define ASSERT_WRITE_LOCK(x) | ||
35 | |||
36 | #include <linux/netfilter_ipv4/ip_nat.h> | 33 | #include <linux/netfilter_ipv4/ip_nat.h> |
37 | #include <linux/netfilter_ipv4/ip_nat_rule.h> | 34 | #include <linux/netfilter_ipv4/ip_nat_rule.h> |
38 | #include <linux/netfilter_ipv4/ip_nat_protocol.h> | 35 | #include <linux/netfilter_ipv4/ip_nat_protocol.h> |
@@ -40,7 +37,6 @@ | |||
40 | #include <linux/netfilter_ipv4/ip_nat_helper.h> | 37 | #include <linux/netfilter_ipv4/ip_nat_helper.h> |
41 | #include <linux/netfilter_ipv4/ip_tables.h> | 38 | #include <linux/netfilter_ipv4/ip_tables.h> |
42 | #include <linux/netfilter_ipv4/ip_conntrack_core.h> | 39 | #include <linux/netfilter_ipv4/ip_conntrack_core.h> |
43 | #include <linux/netfilter_ipv4/listhelp.h> | ||
44 | 40 | ||
45 | #if 0 | 41 | #if 0 |
46 | #define DEBUGP printk | 42 | #define DEBUGP printk |
@@ -110,11 +106,6 @@ ip_nat_fn(unsigned int hooknum, | |||
110 | IP_NF_ASSERT(!((*pskb)->nh.iph->frag_off | 106 | IP_NF_ASSERT(!((*pskb)->nh.iph->frag_off |
111 | & htons(IP_MF|IP_OFFSET))); | 107 | & htons(IP_MF|IP_OFFSET))); |
112 | 108 | ||
113 | /* If we had a hardware checksum before, it's now invalid */ | ||
114 | if ((*pskb)->ip_summed == CHECKSUM_HW) | ||
115 | if (skb_checksum_help(*pskb, (out == NULL))) | ||
116 | return NF_DROP; | ||
117 | |||
118 | ct = ip_conntrack_get(*pskb, &ctinfo); | 109 | ct = ip_conntrack_get(*pskb, &ctinfo); |
119 | /* Can't track? It's not due to stress, or conntrack would | 110 | /* Can't track? It's not due to stress, or conntrack would |
120 | have dropped it. Hence it's the user's responsibilty to | 111 | have dropped it. Hence it's the user's responsibilty to |
@@ -145,8 +136,8 @@ ip_nat_fn(unsigned int hooknum, | |||
145 | case IP_CT_RELATED: | 136 | case IP_CT_RELATED: |
146 | case IP_CT_RELATED+IP_CT_IS_REPLY: | 137 | case IP_CT_RELATED+IP_CT_IS_REPLY: |
147 | if ((*pskb)->nh.iph->protocol == IPPROTO_ICMP) { | 138 | if ((*pskb)->nh.iph->protocol == IPPROTO_ICMP) { |
148 | if (!ip_nat_icmp_reply_translation(pskb, ct, maniptype, | 139 | if (!ip_nat_icmp_reply_translation(ct, ctinfo, |
149 | CTINFO2DIR(ctinfo))) | 140 | hooknum, pskb)) |
150 | return NF_DROP; | 141 | return NF_DROP; |
151 | else | 142 | else |
152 | return NF_ACCEPT; | 143 | return NF_ACCEPT; |
diff --git a/net/ipv4/netfilter/ip_queue.c b/net/ipv4/netfilter/ip_queue.c index 198ac36db861..7edad790478a 100644 --- a/net/ipv4/netfilter/ip_queue.c +++ b/net/ipv4/netfilter/ip_queue.c | |||
@@ -52,15 +52,15 @@ struct ipq_queue_entry { | |||
52 | 52 | ||
53 | typedef int (*ipq_cmpfn)(struct ipq_queue_entry *, unsigned long); | 53 | typedef int (*ipq_cmpfn)(struct ipq_queue_entry *, unsigned long); |
54 | 54 | ||
55 | static unsigned char copy_mode = IPQ_COPY_NONE; | 55 | static unsigned char copy_mode __read_mostly = IPQ_COPY_NONE; |
56 | static unsigned int queue_maxlen = IPQ_QMAX_DEFAULT; | 56 | static unsigned int queue_maxlen __read_mostly = IPQ_QMAX_DEFAULT; |
57 | static DEFINE_RWLOCK(queue_lock); | 57 | static DEFINE_RWLOCK(queue_lock); |
58 | static int peer_pid; | 58 | static int peer_pid __read_mostly; |
59 | static unsigned int copy_range; | 59 | static unsigned int copy_range __read_mostly; |
60 | static unsigned int queue_total; | 60 | static unsigned int queue_total; |
61 | static unsigned int queue_dropped = 0; | 61 | static unsigned int queue_dropped = 0; |
62 | static unsigned int queue_user_dropped = 0; | 62 | static unsigned int queue_user_dropped = 0; |
63 | static struct sock *ipqnl; | 63 | static struct sock *ipqnl __read_mostly; |
64 | static LIST_HEAD(queue_list); | 64 | static LIST_HEAD(queue_list); |
65 | static DEFINE_MUTEX(ipqnl_mutex); | 65 | static DEFINE_MUTEX(ipqnl_mutex); |
66 | 66 | ||
@@ -208,9 +208,9 @@ ipq_build_packet_message(struct ipq_queue_entry *entry, int *errp) | |||
208 | break; | 208 | break; |
209 | 209 | ||
210 | case IPQ_COPY_PACKET: | 210 | case IPQ_COPY_PACKET: |
211 | if (entry->skb->ip_summed == CHECKSUM_HW && | 211 | if ((entry->skb->ip_summed == CHECKSUM_PARTIAL || |
212 | (*errp = skb_checksum_help(entry->skb, | 212 | entry->skb->ip_summed == CHECKSUM_COMPLETE) && |
213 | entry->info->outdev == NULL))) { | 213 | (*errp = skb_checksum_help(entry->skb))) { |
214 | read_unlock_bh(&queue_lock); | 214 | read_unlock_bh(&queue_lock); |
215 | return NULL; | 215 | return NULL; |
216 | } | 216 | } |
diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c index 048514f15f2f..800067d69a9a 100644 --- a/net/ipv4/netfilter/ip_tables.c +++ b/net/ipv4/netfilter/ip_tables.c | |||
@@ -180,8 +180,7 @@ ipt_error(struct sk_buff **pskb, | |||
180 | const struct net_device *out, | 180 | const struct net_device *out, |
181 | unsigned int hooknum, | 181 | unsigned int hooknum, |
182 | const struct xt_target *target, | 182 | const struct xt_target *target, |
183 | const void *targinfo, | 183 | const void *targinfo) |
184 | void *userinfo) | ||
185 | { | 184 | { |
186 | if (net_ratelimit()) | 185 | if (net_ratelimit()) |
187 | printk("ip_tables: error: `%s'\n", (char *)targinfo); | 186 | printk("ip_tables: error: `%s'\n", (char *)targinfo); |
@@ -217,8 +216,7 @@ ipt_do_table(struct sk_buff **pskb, | |||
217 | unsigned int hook, | 216 | unsigned int hook, |
218 | const struct net_device *in, | 217 | const struct net_device *in, |
219 | const struct net_device *out, | 218 | const struct net_device *out, |
220 | struct ipt_table *table, | 219 | struct ipt_table *table) |
221 | void *userdata) | ||
222 | { | 220 | { |
223 | static const char nulldevname[IFNAMSIZ] __attribute__((aligned(sizeof(long)))); | 221 | static const char nulldevname[IFNAMSIZ] __attribute__((aligned(sizeof(long)))); |
224 | u_int16_t offset; | 222 | u_int16_t offset; |
@@ -308,8 +306,7 @@ ipt_do_table(struct sk_buff **pskb, | |||
308 | in, out, | 306 | in, out, |
309 | hook, | 307 | hook, |
310 | t->u.kernel.target, | 308 | t->u.kernel.target, |
311 | t->data, | 309 | t->data); |
312 | userdata); | ||
313 | 310 | ||
314 | #ifdef CONFIG_NETFILTER_DEBUG | 311 | #ifdef CONFIG_NETFILTER_DEBUG |
315 | if (((struct ipt_entry *)table_base)->comefrom | 312 | if (((struct ipt_entry *)table_base)->comefrom |
@@ -467,8 +464,7 @@ cleanup_match(struct ipt_entry_match *m, unsigned int *i) | |||
467 | return 1; | 464 | return 1; |
468 | 465 | ||
469 | if (m->u.kernel.match->destroy) | 466 | if (m->u.kernel.match->destroy) |
470 | m->u.kernel.match->destroy(m->u.kernel.match, m->data, | 467 | m->u.kernel.match->destroy(m->u.kernel.match, m->data); |
471 | m->u.match_size - sizeof(*m)); | ||
472 | module_put(m->u.kernel.match->me); | 468 | module_put(m->u.kernel.match->me); |
473 | return 0; | 469 | return 0; |
474 | } | 470 | } |
@@ -521,7 +517,6 @@ check_match(struct ipt_entry_match *m, | |||
521 | 517 | ||
522 | if (m->u.kernel.match->checkentry | 518 | if (m->u.kernel.match->checkentry |
523 | && !m->u.kernel.match->checkentry(name, ip, match, m->data, | 519 | && !m->u.kernel.match->checkentry(name, ip, match, m->data, |
524 | m->u.match_size - sizeof(*m), | ||
525 | hookmask)) { | 520 | hookmask)) { |
526 | duprintf("ip_tables: check failed for `%s'.\n", | 521 | duprintf("ip_tables: check failed for `%s'.\n", |
527 | m->u.kernel.match->name); | 522 | m->u.kernel.match->name); |
@@ -578,12 +573,10 @@ check_entry(struct ipt_entry *e, const char *name, unsigned int size, | |||
578 | if (t->u.kernel.target == &ipt_standard_target) { | 573 | if (t->u.kernel.target == &ipt_standard_target) { |
579 | if (!standard_check(t, size)) { | 574 | if (!standard_check(t, size)) { |
580 | ret = -EINVAL; | 575 | ret = -EINVAL; |
581 | goto cleanup_matches; | 576 | goto err; |
582 | } | 577 | } |
583 | } else if (t->u.kernel.target->checkentry | 578 | } else if (t->u.kernel.target->checkentry |
584 | && !t->u.kernel.target->checkentry(name, e, target, t->data, | 579 | && !t->u.kernel.target->checkentry(name, e, target, t->data, |
585 | t->u.target_size | ||
586 | - sizeof(*t), | ||
587 | e->comefrom)) { | 580 | e->comefrom)) { |
588 | duprintf("ip_tables: check failed for `%s'.\n", | 581 | duprintf("ip_tables: check failed for `%s'.\n", |
589 | t->u.kernel.target->name); | 582 | t->u.kernel.target->name); |
@@ -655,8 +648,7 @@ cleanup_entry(struct ipt_entry *e, unsigned int *i) | |||
655 | IPT_MATCH_ITERATE(e, cleanup_match, NULL); | 648 | IPT_MATCH_ITERATE(e, cleanup_match, NULL); |
656 | t = ipt_get_target(e); | 649 | t = ipt_get_target(e); |
657 | if (t->u.kernel.target->destroy) | 650 | if (t->u.kernel.target->destroy) |
658 | t->u.kernel.target->destroy(t->u.kernel.target, t->data, | 651 | t->u.kernel.target->destroy(t->u.kernel.target, t->data); |
659 | t->u.target_size - sizeof(*t)); | ||
660 | module_put(t->u.kernel.target->me); | 652 | module_put(t->u.kernel.target->me); |
661 | return 0; | 653 | return 0; |
662 | } | 654 | } |
@@ -950,73 +942,28 @@ static short compat_calc_jump(u_int16_t offset) | |||
950 | return delta; | 942 | return delta; |
951 | } | 943 | } |
952 | 944 | ||
953 | struct compat_ipt_standard_target | 945 | static void compat_standard_from_user(void *dst, void *src) |
954 | { | 946 | { |
955 | struct compat_xt_entry_target target; | 947 | int v = *(compat_int_t *)src; |
956 | compat_int_t verdict; | ||
957 | }; | ||
958 | |||
959 | struct compat_ipt_standard | ||
960 | { | ||
961 | struct compat_ipt_entry entry; | ||
962 | struct compat_ipt_standard_target target; | ||
963 | }; | ||
964 | 948 | ||
965 | #define IPT_ST_LEN XT_ALIGN(sizeof(struct ipt_standard_target)) | 949 | if (v > 0) |
966 | #define IPT_ST_COMPAT_LEN COMPAT_XT_ALIGN(sizeof(struct compat_ipt_standard_target)) | 950 | v += compat_calc_jump(v); |
967 | #define IPT_ST_OFFSET (IPT_ST_LEN - IPT_ST_COMPAT_LEN) | 951 | memcpy(dst, &v, sizeof(v)); |
952 | } | ||
968 | 953 | ||
969 | static int compat_ipt_standard_fn(void *target, | 954 | static int compat_standard_to_user(void __user *dst, void *src) |
970 | void **dstptr, int *size, int convert) | ||
971 | { | 955 | { |
972 | struct compat_ipt_standard_target compat_st, *pcompat_st; | 956 | compat_int_t cv = *(int *)src; |
973 | struct ipt_standard_target st, *pst; | ||
974 | int ret; | ||
975 | 957 | ||
976 | ret = 0; | 958 | if (cv > 0) |
977 | switch (convert) { | 959 | cv -= compat_calc_jump(cv); |
978 | case COMPAT_TO_USER: | 960 | return copy_to_user(dst, &cv, sizeof(cv)) ? -EFAULT : 0; |
979 | pst = target; | ||
980 | memcpy(&compat_st.target, &pst->target, | ||
981 | sizeof(compat_st.target)); | ||
982 | compat_st.verdict = pst->verdict; | ||
983 | if (compat_st.verdict > 0) | ||
984 | compat_st.verdict -= | ||
985 | compat_calc_jump(compat_st.verdict); | ||
986 | compat_st.target.u.user.target_size = IPT_ST_COMPAT_LEN; | ||
987 | if (copy_to_user(*dstptr, &compat_st, IPT_ST_COMPAT_LEN)) | ||
988 | ret = -EFAULT; | ||
989 | *size -= IPT_ST_OFFSET; | ||
990 | *dstptr += IPT_ST_COMPAT_LEN; | ||
991 | break; | ||
992 | case COMPAT_FROM_USER: | ||
993 | pcompat_st = target; | ||
994 | memcpy(&st.target, &pcompat_st->target, IPT_ST_COMPAT_LEN); | ||
995 | st.verdict = pcompat_st->verdict; | ||
996 | if (st.verdict > 0) | ||
997 | st.verdict += compat_calc_jump(st.verdict); | ||
998 | st.target.u.user.target_size = IPT_ST_LEN; | ||
999 | memcpy(*dstptr, &st, IPT_ST_LEN); | ||
1000 | *size += IPT_ST_OFFSET; | ||
1001 | *dstptr += IPT_ST_LEN; | ||
1002 | break; | ||
1003 | case COMPAT_CALC_SIZE: | ||
1004 | *size += IPT_ST_OFFSET; | ||
1005 | break; | ||
1006 | default: | ||
1007 | ret = -ENOPROTOOPT; | ||
1008 | break; | ||
1009 | } | ||
1010 | return ret; | ||
1011 | } | 961 | } |
1012 | 962 | ||
1013 | static inline int | 963 | static inline int |
1014 | compat_calc_match(struct ipt_entry_match *m, int * size) | 964 | compat_calc_match(struct ipt_entry_match *m, int * size) |
1015 | { | 965 | { |
1016 | if (m->u.kernel.match->compat) | 966 | *size += xt_compat_match_offset(m->u.kernel.match); |
1017 | m->u.kernel.match->compat(m, NULL, size, COMPAT_CALC_SIZE); | ||
1018 | else | ||
1019 | xt_compat_match(m, NULL, size, COMPAT_CALC_SIZE); | ||
1020 | return 0; | 967 | return 0; |
1021 | } | 968 | } |
1022 | 969 | ||
@@ -1031,10 +978,7 @@ static int compat_calc_entry(struct ipt_entry *e, struct xt_table_info *info, | |||
1031 | entry_offset = (void *)e - base; | 978 | entry_offset = (void *)e - base; |
1032 | IPT_MATCH_ITERATE(e, compat_calc_match, &off); | 979 | IPT_MATCH_ITERATE(e, compat_calc_match, &off); |
1033 | t = ipt_get_target(e); | 980 | t = ipt_get_target(e); |
1034 | if (t->u.kernel.target->compat) | 981 | off += xt_compat_target_offset(t->u.kernel.target); |
1035 | t->u.kernel.target->compat(t, NULL, &off, COMPAT_CALC_SIZE); | ||
1036 | else | ||
1037 | xt_compat_target(t, NULL, &off, COMPAT_CALC_SIZE); | ||
1038 | newinfo->size -= off; | 982 | newinfo->size -= off; |
1039 | ret = compat_add_offset(entry_offset, off); | 983 | ret = compat_add_offset(entry_offset, off); |
1040 | if (ret) | 984 | if (ret) |
@@ -1420,17 +1364,13 @@ struct compat_ipt_replace { | |||
1420 | }; | 1364 | }; |
1421 | 1365 | ||
1422 | static inline int compat_copy_match_to_user(struct ipt_entry_match *m, | 1366 | static inline int compat_copy_match_to_user(struct ipt_entry_match *m, |
1423 | void __user **dstptr, compat_uint_t *size) | 1367 | void * __user *dstptr, compat_uint_t *size) |
1424 | { | 1368 | { |
1425 | if (m->u.kernel.match->compat) | 1369 | return xt_compat_match_to_user(m, dstptr, size); |
1426 | return m->u.kernel.match->compat(m, dstptr, size, | ||
1427 | COMPAT_TO_USER); | ||
1428 | else | ||
1429 | return xt_compat_match(m, dstptr, size, COMPAT_TO_USER); | ||
1430 | } | 1370 | } |
1431 | 1371 | ||
1432 | static int compat_copy_entry_to_user(struct ipt_entry *e, | 1372 | static int compat_copy_entry_to_user(struct ipt_entry *e, |
1433 | void __user **dstptr, compat_uint_t *size) | 1373 | void * __user *dstptr, compat_uint_t *size) |
1434 | { | 1374 | { |
1435 | struct ipt_entry_target __user *t; | 1375 | struct ipt_entry_target __user *t; |
1436 | struct compat_ipt_entry __user *ce; | 1376 | struct compat_ipt_entry __user *ce; |
@@ -1450,11 +1390,7 @@ static int compat_copy_entry_to_user(struct ipt_entry *e, | |||
1450 | if (ret) | 1390 | if (ret) |
1451 | goto out; | 1391 | goto out; |
1452 | t = ipt_get_target(e); | 1392 | t = ipt_get_target(e); |
1453 | if (t->u.kernel.target->compat) | 1393 | ret = xt_compat_target_to_user(t, dstptr, size); |
1454 | ret = t->u.kernel.target->compat(t, dstptr, size, | ||
1455 | COMPAT_TO_USER); | ||
1456 | else | ||
1457 | ret = xt_compat_target(t, dstptr, size, COMPAT_TO_USER); | ||
1458 | if (ret) | 1394 | if (ret) |
1459 | goto out; | 1395 | goto out; |
1460 | ret = -EFAULT; | 1396 | ret = -EFAULT; |
@@ -1486,11 +1422,7 @@ compat_check_calc_match(struct ipt_entry_match *m, | |||
1486 | return match ? PTR_ERR(match) : -ENOENT; | 1422 | return match ? PTR_ERR(match) : -ENOENT; |
1487 | } | 1423 | } |
1488 | m->u.kernel.match = match; | 1424 | m->u.kernel.match = match; |
1489 | 1425 | *size += xt_compat_match_offset(match); | |
1490 | if (m->u.kernel.match->compat) | ||
1491 | m->u.kernel.match->compat(m, NULL, size, COMPAT_CALC_SIZE); | ||
1492 | else | ||
1493 | xt_compat_match(m, NULL, size, COMPAT_CALC_SIZE); | ||
1494 | 1426 | ||
1495 | (*i)++; | 1427 | (*i)++; |
1496 | return 0; | 1428 | return 0; |
@@ -1537,7 +1469,7 @@ check_compat_entry_size_and_hooks(struct ipt_entry *e, | |||
1537 | ret = IPT_MATCH_ITERATE(e, compat_check_calc_match, name, &e->ip, | 1469 | ret = IPT_MATCH_ITERATE(e, compat_check_calc_match, name, &e->ip, |
1538 | e->comefrom, &off, &j); | 1470 | e->comefrom, &off, &j); |
1539 | if (ret != 0) | 1471 | if (ret != 0) |
1540 | goto out; | 1472 | goto cleanup_matches; |
1541 | 1473 | ||
1542 | t = ipt_get_target(e); | 1474 | t = ipt_get_target(e); |
1543 | target = try_then_request_module(xt_find_target(AF_INET, | 1475 | target = try_then_request_module(xt_find_target(AF_INET, |
@@ -1547,14 +1479,11 @@ check_compat_entry_size_and_hooks(struct ipt_entry *e, | |||
1547 | if (IS_ERR(target) || !target) { | 1479 | if (IS_ERR(target) || !target) { |
1548 | duprintf("check_entry: `%s' not found\n", t->u.user.name); | 1480 | duprintf("check_entry: `%s' not found\n", t->u.user.name); |
1549 | ret = target ? PTR_ERR(target) : -ENOENT; | 1481 | ret = target ? PTR_ERR(target) : -ENOENT; |
1550 | goto out; | 1482 | goto cleanup_matches; |
1551 | } | 1483 | } |
1552 | t->u.kernel.target = target; | 1484 | t->u.kernel.target = target; |
1553 | 1485 | ||
1554 | if (t->u.kernel.target->compat) | 1486 | off += xt_compat_target_offset(target); |
1555 | t->u.kernel.target->compat(t, NULL, &off, COMPAT_CALC_SIZE); | ||
1556 | else | ||
1557 | xt_compat_target(t, NULL, &off, COMPAT_CALC_SIZE); | ||
1558 | *size += off; | 1487 | *size += off; |
1559 | ret = compat_add_offset(entry_offset, off); | 1488 | ret = compat_add_offset(entry_offset, off); |
1560 | if (ret) | 1489 | if (ret) |
@@ -1574,14 +1503,17 @@ check_compat_entry_size_and_hooks(struct ipt_entry *e, | |||
1574 | 1503 | ||
1575 | (*i)++; | 1504 | (*i)++; |
1576 | return 0; | 1505 | return 0; |
1506 | |||
1577 | out: | 1507 | out: |
1508 | module_put(t->u.kernel.target->me); | ||
1509 | cleanup_matches: | ||
1578 | IPT_MATCH_ITERATE(e, cleanup_match, &j); | 1510 | IPT_MATCH_ITERATE(e, cleanup_match, &j); |
1579 | return ret; | 1511 | return ret; |
1580 | } | 1512 | } |
1581 | 1513 | ||
1582 | static inline int compat_copy_match_from_user(struct ipt_entry_match *m, | 1514 | static inline int compat_copy_match_from_user(struct ipt_entry_match *m, |
1583 | void **dstptr, compat_uint_t *size, const char *name, | 1515 | void **dstptr, compat_uint_t *size, const char *name, |
1584 | const struct ipt_ip *ip, unsigned int hookmask) | 1516 | const struct ipt_ip *ip, unsigned int hookmask, int *i) |
1585 | { | 1517 | { |
1586 | struct ipt_entry_match *dm; | 1518 | struct ipt_entry_match *dm; |
1587 | struct ipt_match *match; | 1519 | struct ipt_match *match; |
@@ -1589,26 +1521,28 @@ static inline int compat_copy_match_from_user(struct ipt_entry_match *m, | |||
1589 | 1521 | ||
1590 | dm = (struct ipt_entry_match *)*dstptr; | 1522 | dm = (struct ipt_entry_match *)*dstptr; |
1591 | match = m->u.kernel.match; | 1523 | match = m->u.kernel.match; |
1592 | if (match->compat) | 1524 | xt_compat_match_from_user(m, dstptr, size); |
1593 | match->compat(m, dstptr, size, COMPAT_FROM_USER); | ||
1594 | else | ||
1595 | xt_compat_match(m, dstptr, size, COMPAT_FROM_USER); | ||
1596 | 1525 | ||
1597 | ret = xt_check_match(match, AF_INET, dm->u.match_size - sizeof(*dm), | 1526 | ret = xt_check_match(match, AF_INET, dm->u.match_size - sizeof(*dm), |
1598 | name, hookmask, ip->proto, | 1527 | name, hookmask, ip->proto, |
1599 | ip->invflags & IPT_INV_PROTO); | 1528 | ip->invflags & IPT_INV_PROTO); |
1600 | if (ret) | 1529 | if (ret) |
1601 | return ret; | 1530 | goto err; |
1602 | 1531 | ||
1603 | if (m->u.kernel.match->checkentry | 1532 | if (m->u.kernel.match->checkentry |
1604 | && !m->u.kernel.match->checkentry(name, ip, match, dm->data, | 1533 | && !m->u.kernel.match->checkentry(name, ip, match, dm->data, |
1605 | dm->u.match_size - sizeof(*dm), | ||
1606 | hookmask)) { | 1534 | hookmask)) { |
1607 | duprintf("ip_tables: check failed for `%s'.\n", | 1535 | duprintf("ip_tables: check failed for `%s'.\n", |
1608 | m->u.kernel.match->name); | 1536 | m->u.kernel.match->name); |
1609 | return -EINVAL; | 1537 | ret = -EINVAL; |
1538 | goto err; | ||
1610 | } | 1539 | } |
1540 | (*i)++; | ||
1611 | return 0; | 1541 | return 0; |
1542 | |||
1543 | err: | ||
1544 | module_put(m->u.kernel.match->me); | ||
1545 | return ret; | ||
1612 | } | 1546 | } |
1613 | 1547 | ||
1614 | static int compat_copy_entry_from_user(struct ipt_entry *e, void **dstptr, | 1548 | static int compat_copy_entry_from_user(struct ipt_entry *e, void **dstptr, |
@@ -1619,25 +1553,23 @@ static int compat_copy_entry_from_user(struct ipt_entry *e, void **dstptr, | |||
1619 | struct ipt_target *target; | 1553 | struct ipt_target *target; |
1620 | struct ipt_entry *de; | 1554 | struct ipt_entry *de; |
1621 | unsigned int origsize; | 1555 | unsigned int origsize; |
1622 | int ret, h; | 1556 | int ret, h, j; |
1623 | 1557 | ||
1624 | ret = 0; | 1558 | ret = 0; |
1625 | origsize = *size; | 1559 | origsize = *size; |
1626 | de = (struct ipt_entry *)*dstptr; | 1560 | de = (struct ipt_entry *)*dstptr; |
1627 | memcpy(de, e, sizeof(struct ipt_entry)); | 1561 | memcpy(de, e, sizeof(struct ipt_entry)); |
1628 | 1562 | ||
1563 | j = 0; | ||
1629 | *dstptr += sizeof(struct compat_ipt_entry); | 1564 | *dstptr += sizeof(struct compat_ipt_entry); |
1630 | ret = IPT_MATCH_ITERATE(e, compat_copy_match_from_user, dstptr, size, | 1565 | ret = IPT_MATCH_ITERATE(e, compat_copy_match_from_user, dstptr, size, |
1631 | name, &de->ip, de->comefrom); | 1566 | name, &de->ip, de->comefrom, &j); |
1632 | if (ret) | 1567 | if (ret) |
1633 | goto out; | 1568 | goto cleanup_matches; |
1634 | de->target_offset = e->target_offset - (origsize - *size); | 1569 | de->target_offset = e->target_offset - (origsize - *size); |
1635 | t = ipt_get_target(e); | 1570 | t = ipt_get_target(e); |
1636 | target = t->u.kernel.target; | 1571 | target = t->u.kernel.target; |
1637 | if (target->compat) | 1572 | xt_compat_target_from_user(t, dstptr, size); |
1638 | target->compat(t, dstptr, size, COMPAT_FROM_USER); | ||
1639 | else | ||
1640 | xt_compat_target(t, dstptr, size, COMPAT_FROM_USER); | ||
1641 | 1573 | ||
1642 | de->next_offset = e->next_offset - (origsize - *size); | 1574 | de->next_offset = e->next_offset - (origsize - *size); |
1643 | for (h = 0; h < NF_IP_NUMHOOKS; h++) { | 1575 | for (h = 0; h < NF_IP_NUMHOOKS; h++) { |
@@ -1653,22 +1585,26 @@ static int compat_copy_entry_from_user(struct ipt_entry *e, void **dstptr, | |||
1653 | name, e->comefrom, e->ip.proto, | 1585 | name, e->comefrom, e->ip.proto, |
1654 | e->ip.invflags & IPT_INV_PROTO); | 1586 | e->ip.invflags & IPT_INV_PROTO); |
1655 | if (ret) | 1587 | if (ret) |
1656 | goto out; | 1588 | goto err; |
1657 | 1589 | ||
1658 | ret = -EINVAL; | 1590 | ret = -EINVAL; |
1659 | if (t->u.kernel.target == &ipt_standard_target) { | 1591 | if (t->u.kernel.target == &ipt_standard_target) { |
1660 | if (!standard_check(t, *size)) | 1592 | if (!standard_check(t, *size)) |
1661 | goto out; | 1593 | goto err; |
1662 | } else if (t->u.kernel.target->checkentry | 1594 | } else if (t->u.kernel.target->checkentry |
1663 | && !t->u.kernel.target->checkentry(name, de, target, | 1595 | && !t->u.kernel.target->checkentry(name, de, target, |
1664 | t->data, t->u.target_size - sizeof(*t), | 1596 | t->data, de->comefrom)) { |
1665 | de->comefrom)) { | ||
1666 | duprintf("ip_tables: compat: check failed for `%s'.\n", | 1597 | duprintf("ip_tables: compat: check failed for `%s'.\n", |
1667 | t->u.kernel.target->name); | 1598 | t->u.kernel.target->name); |
1668 | goto out; | 1599 | goto err; |
1669 | } | 1600 | } |
1670 | ret = 0; | 1601 | ret = 0; |
1671 | out: | 1602 | return ret; |
1603 | |||
1604 | err: | ||
1605 | module_put(t->u.kernel.target->me); | ||
1606 | cleanup_matches: | ||
1607 | IPT_MATCH_ITERATE(e, cleanup_match, &j); | ||
1672 | return ret; | 1608 | return ret; |
1673 | } | 1609 | } |
1674 | 1610 | ||
@@ -1989,6 +1925,8 @@ compat_get_entries(struct compat_ipt_get_entries __user *uptr, int *len) | |||
1989 | return ret; | 1925 | return ret; |
1990 | } | 1926 | } |
1991 | 1927 | ||
1928 | static int do_ipt_get_ctl(struct sock *, int, void __user *, int *); | ||
1929 | |||
1992 | static int | 1930 | static int |
1993 | compat_do_ipt_get_ctl(struct sock *sk, int cmd, void __user *user, int *len) | 1931 | compat_do_ipt_get_ctl(struct sock *sk, int cmd, void __user *user, int *len) |
1994 | { | 1932 | { |
@@ -2002,8 +1940,7 @@ compat_do_ipt_get_ctl(struct sock *sk, int cmd, void __user *user, int *len) | |||
2002 | ret = compat_get_entries(user, len); | 1940 | ret = compat_get_entries(user, len); |
2003 | break; | 1941 | break; |
2004 | default: | 1942 | default: |
2005 | duprintf("compat_do_ipt_get_ctl: unknown request %i\n", cmd); | 1943 | ret = do_ipt_get_ctl(sk, cmd, user, len); |
2006 | ret = -EINVAL; | ||
2007 | } | 1944 | } |
2008 | return ret; | 1945 | return ret; |
2009 | } | 1946 | } |
@@ -2185,7 +2122,6 @@ icmp_checkentry(const char *tablename, | |||
2185 | const void *info, | 2122 | const void *info, |
2186 | const struct xt_match *match, | 2123 | const struct xt_match *match, |
2187 | void *matchinfo, | 2124 | void *matchinfo, |
2188 | unsigned int matchsize, | ||
2189 | unsigned int hook_mask) | 2125 | unsigned int hook_mask) |
2190 | { | 2126 | { |
2191 | const struct ipt_icmp *icmpinfo = matchinfo; | 2127 | const struct ipt_icmp *icmpinfo = matchinfo; |
@@ -2200,7 +2136,9 @@ static struct ipt_target ipt_standard_target = { | |||
2200 | .targetsize = sizeof(int), | 2136 | .targetsize = sizeof(int), |
2201 | .family = AF_INET, | 2137 | .family = AF_INET, |
2202 | #ifdef CONFIG_COMPAT | 2138 | #ifdef CONFIG_COMPAT |
2203 | .compat = &compat_ipt_standard_fn, | 2139 | .compatsize = sizeof(compat_int_t), |
2140 | .compat_from_user = compat_standard_from_user, | ||
2141 | .compat_to_user = compat_standard_to_user, | ||
2204 | #endif | 2142 | #endif |
2205 | }; | 2143 | }; |
2206 | 2144 | ||
diff --git a/net/ipv4/netfilter/ipt_CLUSTERIP.c b/net/ipv4/netfilter/ipt_CLUSTERIP.c index d994c5f5744c..41589665fc5d 100644 --- a/net/ipv4/netfilter/ipt_CLUSTERIP.c +++ b/net/ipv4/netfilter/ipt_CLUSTERIP.c | |||
@@ -302,8 +302,7 @@ target(struct sk_buff **pskb, | |||
302 | const struct net_device *out, | 302 | const struct net_device *out, |
303 | unsigned int hooknum, | 303 | unsigned int hooknum, |
304 | const struct xt_target *target, | 304 | const struct xt_target *target, |
305 | const void *targinfo, | 305 | const void *targinfo) |
306 | void *userinfo) | ||
307 | { | 306 | { |
308 | const struct ipt_clusterip_tgt_info *cipinfo = targinfo; | 307 | const struct ipt_clusterip_tgt_info *cipinfo = targinfo; |
309 | enum ip_conntrack_info ctinfo; | 308 | enum ip_conntrack_info ctinfo; |
@@ -373,7 +372,6 @@ checkentry(const char *tablename, | |||
373 | const void *e_void, | 372 | const void *e_void, |
374 | const struct xt_target *target, | 373 | const struct xt_target *target, |
375 | void *targinfo, | 374 | void *targinfo, |
376 | unsigned int targinfosize, | ||
377 | unsigned int hook_mask) | 375 | unsigned int hook_mask) |
378 | { | 376 | { |
379 | struct ipt_clusterip_tgt_info *cipinfo = targinfo; | 377 | struct ipt_clusterip_tgt_info *cipinfo = targinfo; |
@@ -450,8 +448,7 @@ checkentry(const char *tablename, | |||
450 | } | 448 | } |
451 | 449 | ||
452 | /* drop reference count of cluster config when rule is deleted */ | 450 | /* drop reference count of cluster config when rule is deleted */ |
453 | static void destroy(const struct xt_target *target, void *targinfo, | 451 | static void destroy(const struct xt_target *target, void *targinfo) |
454 | unsigned int targinfosize) | ||
455 | { | 452 | { |
456 | struct ipt_clusterip_tgt_info *cipinfo = targinfo; | 453 | struct ipt_clusterip_tgt_info *cipinfo = targinfo; |
457 | 454 | ||
diff --git a/net/ipv4/netfilter/ipt_DSCP.c b/net/ipv4/netfilter/ipt_DSCP.c deleted file mode 100644 index c8e971288dfe..000000000000 --- a/net/ipv4/netfilter/ipt_DSCP.c +++ /dev/null | |||
@@ -1,96 +0,0 @@ | |||
1 | /* iptables module for setting the IPv4 DSCP field, Version 1.8 | ||
2 | * | ||
3 | * (C) 2002 by Harald Welte <laforge@netfilter.org> | ||
4 | * based on ipt_FTOS.c (C) 2000 by Matthew G. Marsh <mgm@paktronix.com> | ||
5 | * | ||
6 | * This program is free software; you can redistribute it and/or modify | ||
7 | * it under the terms of the GNU General Public License version 2 as | ||
8 | * published by the Free Software Foundation. | ||
9 | * | ||
10 | * See RFC2474 for a description of the DSCP field within the IP Header. | ||
11 | * | ||
12 | * ipt_DSCP.c,v 1.8 2002/08/06 18:41:57 laforge Exp | ||
13 | */ | ||
14 | |||
15 | #include <linux/module.h> | ||
16 | #include <linux/skbuff.h> | ||
17 | #include <linux/ip.h> | ||
18 | #include <net/checksum.h> | ||
19 | |||
20 | #include <linux/netfilter_ipv4/ip_tables.h> | ||
21 | #include <linux/netfilter_ipv4/ipt_DSCP.h> | ||
22 | |||
23 | MODULE_AUTHOR("Harald Welte <laforge@netfilter.org>"); | ||
24 | MODULE_DESCRIPTION("iptables DSCP modification module"); | ||
25 | MODULE_LICENSE("GPL"); | ||
26 | |||
27 | static unsigned int | ||
28 | target(struct sk_buff **pskb, | ||
29 | const struct net_device *in, | ||
30 | const struct net_device *out, | ||
31 | unsigned int hooknum, | ||
32 | const struct xt_target *target, | ||
33 | const void *targinfo, | ||
34 | void *userinfo) | ||
35 | { | ||
36 | const struct ipt_DSCP_info *dinfo = targinfo; | ||
37 | u_int8_t sh_dscp = ((dinfo->dscp << IPT_DSCP_SHIFT) & IPT_DSCP_MASK); | ||
38 | |||
39 | |||
40 | if (((*pskb)->nh.iph->tos & IPT_DSCP_MASK) != sh_dscp) { | ||
41 | u_int16_t diffs[2]; | ||
42 | |||
43 | if (!skb_make_writable(pskb, sizeof(struct iphdr))) | ||
44 | return NF_DROP; | ||
45 | |||
46 | diffs[0] = htons((*pskb)->nh.iph->tos) ^ 0xFFFF; | ||
47 | (*pskb)->nh.iph->tos = ((*pskb)->nh.iph->tos & ~IPT_DSCP_MASK) | ||
48 | | sh_dscp; | ||
49 | diffs[1] = htons((*pskb)->nh.iph->tos); | ||
50 | (*pskb)->nh.iph->check | ||
51 | = csum_fold(csum_partial((char *)diffs, | ||
52 | sizeof(diffs), | ||
53 | (*pskb)->nh.iph->check | ||
54 | ^ 0xFFFF)); | ||
55 | } | ||
56 | return IPT_CONTINUE; | ||
57 | } | ||
58 | |||
59 | static int | ||
60 | checkentry(const char *tablename, | ||
61 | const void *e_void, | ||
62 | const struct xt_target *target, | ||
63 | void *targinfo, | ||
64 | unsigned int targinfosize, | ||
65 | unsigned int hook_mask) | ||
66 | { | ||
67 | const u_int8_t dscp = ((struct ipt_DSCP_info *)targinfo)->dscp; | ||
68 | |||
69 | if ((dscp > IPT_DSCP_MAX)) { | ||
70 | printk(KERN_WARNING "DSCP: dscp %x out of range\n", dscp); | ||
71 | return 0; | ||
72 | } | ||
73 | return 1; | ||
74 | } | ||
75 | |||
76 | static struct ipt_target ipt_dscp_reg = { | ||
77 | .name = "DSCP", | ||
78 | .target = target, | ||
79 | .targetsize = sizeof(struct ipt_DSCP_info), | ||
80 | .table = "mangle", | ||
81 | .checkentry = checkentry, | ||
82 | .me = THIS_MODULE, | ||
83 | }; | ||
84 | |||
85 | static int __init ipt_dscp_init(void) | ||
86 | { | ||
87 | return ipt_register_target(&ipt_dscp_reg); | ||
88 | } | ||
89 | |||
90 | static void __exit ipt_dscp_fini(void) | ||
91 | { | ||
92 | ipt_unregister_target(&ipt_dscp_reg); | ||
93 | } | ||
94 | |||
95 | module_init(ipt_dscp_init); | ||
96 | module_exit(ipt_dscp_fini); | ||
diff --git a/net/ipv4/netfilter/ipt_ECN.c b/net/ipv4/netfilter/ipt_ECN.c index 4adf5c9d34f5..23f9c7ebe7eb 100644 --- a/net/ipv4/netfilter/ipt_ECN.c +++ b/net/ipv4/netfilter/ipt_ECN.c | |||
@@ -27,32 +27,28 @@ MODULE_DESCRIPTION("iptables ECN modification module"); | |||
27 | static inline int | 27 | static inline int |
28 | set_ect_ip(struct sk_buff **pskb, const struct ipt_ECN_info *einfo) | 28 | set_ect_ip(struct sk_buff **pskb, const struct ipt_ECN_info *einfo) |
29 | { | 29 | { |
30 | if (((*pskb)->nh.iph->tos & IPT_ECN_IP_MASK) | 30 | struct iphdr *iph = (*pskb)->nh.iph; |
31 | != (einfo->ip_ect & IPT_ECN_IP_MASK)) { | 31 | u_int16_t oldtos; |
32 | u_int16_t diffs[2]; | ||
33 | 32 | ||
33 | if ((iph->tos & IPT_ECN_IP_MASK) != (einfo->ip_ect & IPT_ECN_IP_MASK)) { | ||
34 | if (!skb_make_writable(pskb, sizeof(struct iphdr))) | 34 | if (!skb_make_writable(pskb, sizeof(struct iphdr))) |
35 | return 0; | 35 | return 0; |
36 | 36 | iph = (*pskb)->nh.iph; | |
37 | diffs[0] = htons((*pskb)->nh.iph->tos) ^ 0xFFFF; | 37 | oldtos = iph->tos; |
38 | (*pskb)->nh.iph->tos &= ~IPT_ECN_IP_MASK; | 38 | iph->tos &= ~IPT_ECN_IP_MASK; |
39 | (*pskb)->nh.iph->tos |= (einfo->ip_ect & IPT_ECN_IP_MASK); | 39 | iph->tos |= (einfo->ip_ect & IPT_ECN_IP_MASK); |
40 | diffs[1] = htons((*pskb)->nh.iph->tos); | 40 | iph->check = nf_csum_update(oldtos ^ 0xFFFF, iph->tos, |
41 | (*pskb)->nh.iph->check | 41 | iph->check); |
42 | = csum_fold(csum_partial((char *)diffs, | ||
43 | sizeof(diffs), | ||
44 | (*pskb)->nh.iph->check | ||
45 | ^0xFFFF)); | ||
46 | } | 42 | } |
47 | return 1; | 43 | return 1; |
48 | } | 44 | } |
49 | 45 | ||
50 | /* Return 0 if there was an error. */ | 46 | /* Return 0 if there was an error. */ |
51 | static inline int | 47 | static inline int |
52 | set_ect_tcp(struct sk_buff **pskb, const struct ipt_ECN_info *einfo, int inward) | 48 | set_ect_tcp(struct sk_buff **pskb, const struct ipt_ECN_info *einfo) |
53 | { | 49 | { |
54 | struct tcphdr _tcph, *tcph; | 50 | struct tcphdr _tcph, *tcph; |
55 | u_int16_t diffs[2]; | 51 | u_int16_t oldval; |
56 | 52 | ||
57 | /* Not enought header? */ | 53 | /* Not enought header? */ |
58 | tcph = skb_header_pointer(*pskb, (*pskb)->nh.iph->ihl*4, | 54 | tcph = skb_header_pointer(*pskb, (*pskb)->nh.iph->ihl*4, |
@@ -70,22 +66,16 @@ set_ect_tcp(struct sk_buff **pskb, const struct ipt_ECN_info *einfo, int inward) | |||
70 | return 0; | 66 | return 0; |
71 | tcph = (void *)(*pskb)->nh.iph + (*pskb)->nh.iph->ihl*4; | 67 | tcph = (void *)(*pskb)->nh.iph + (*pskb)->nh.iph->ihl*4; |
72 | 68 | ||
73 | if ((*pskb)->ip_summed == CHECKSUM_HW && | 69 | oldval = ((u_int16_t *)tcph)[6]; |
74 | skb_checksum_help(*pskb, inward)) | ||
75 | return 0; | ||
76 | |||
77 | diffs[0] = ((u_int16_t *)tcph)[6]; | ||
78 | if (einfo->operation & IPT_ECN_OP_SET_ECE) | 70 | if (einfo->operation & IPT_ECN_OP_SET_ECE) |
79 | tcph->ece = einfo->proto.tcp.ece; | 71 | tcph->ece = einfo->proto.tcp.ece; |
80 | if (einfo->operation & IPT_ECN_OP_SET_CWR) | 72 | if (einfo->operation & IPT_ECN_OP_SET_CWR) |
81 | tcph->cwr = einfo->proto.tcp.cwr; | 73 | tcph->cwr = einfo->proto.tcp.cwr; |
82 | diffs[1] = ((u_int16_t *)tcph)[6]; | ||
83 | diffs[0] = diffs[0] ^ 0xFFFF; | ||
84 | 74 | ||
85 | if ((*pskb)->ip_summed != CHECKSUM_UNNECESSARY) | 75 | tcph->check = nf_proto_csum_update((*pskb), |
86 | tcph->check = csum_fold(csum_partial((char *)diffs, | 76 | oldval ^ 0xFFFF, |
87 | sizeof(diffs), | 77 | ((u_int16_t *)tcph)[6], |
88 | tcph->check^0xFFFF)); | 78 | tcph->check, 0); |
89 | return 1; | 79 | return 1; |
90 | } | 80 | } |
91 | 81 | ||
@@ -95,8 +85,7 @@ target(struct sk_buff **pskb, | |||
95 | const struct net_device *out, | 85 | const struct net_device *out, |
96 | unsigned int hooknum, | 86 | unsigned int hooknum, |
97 | const struct xt_target *target, | 87 | const struct xt_target *target, |
98 | const void *targinfo, | 88 | const void *targinfo) |
99 | void *userinfo) | ||
100 | { | 89 | { |
101 | const struct ipt_ECN_info *einfo = targinfo; | 90 | const struct ipt_ECN_info *einfo = targinfo; |
102 | 91 | ||
@@ -106,7 +95,7 @@ target(struct sk_buff **pskb, | |||
106 | 95 | ||
107 | if (einfo->operation & (IPT_ECN_OP_SET_ECE | IPT_ECN_OP_SET_CWR) | 96 | if (einfo->operation & (IPT_ECN_OP_SET_ECE | IPT_ECN_OP_SET_CWR) |
108 | && (*pskb)->nh.iph->protocol == IPPROTO_TCP) | 97 | && (*pskb)->nh.iph->protocol == IPPROTO_TCP) |
109 | if (!set_ect_tcp(pskb, einfo, (out == NULL))) | 98 | if (!set_ect_tcp(pskb, einfo)) |
110 | return NF_DROP; | 99 | return NF_DROP; |
111 | 100 | ||
112 | return IPT_CONTINUE; | 101 | return IPT_CONTINUE; |
@@ -117,7 +106,6 @@ checkentry(const char *tablename, | |||
117 | const void *e_void, | 106 | const void *e_void, |
118 | const struct xt_target *target, | 107 | const struct xt_target *target, |
119 | void *targinfo, | 108 | void *targinfo, |
120 | unsigned int targinfosize, | ||
121 | unsigned int hook_mask) | 109 | unsigned int hook_mask) |
122 | { | 110 | { |
123 | const struct ipt_ECN_info *einfo = (struct ipt_ECN_info *)targinfo; | 111 | const struct ipt_ECN_info *einfo = (struct ipt_ECN_info *)targinfo; |
diff --git a/net/ipv4/netfilter/ipt_LOG.c b/net/ipv4/netfilter/ipt_LOG.c index b98f7b08b084..7dc820df8bc5 100644 --- a/net/ipv4/netfilter/ipt_LOG.c +++ b/net/ipv4/netfilter/ipt_LOG.c | |||
@@ -416,8 +416,7 @@ ipt_log_target(struct sk_buff **pskb, | |||
416 | const struct net_device *out, | 416 | const struct net_device *out, |
417 | unsigned int hooknum, | 417 | unsigned int hooknum, |
418 | const struct xt_target *target, | 418 | const struct xt_target *target, |
419 | const void *targinfo, | 419 | const void *targinfo) |
420 | void *userinfo) | ||
421 | { | 420 | { |
422 | const struct ipt_log_info *loginfo = targinfo; | 421 | const struct ipt_log_info *loginfo = targinfo; |
423 | struct nf_loginfo li; | 422 | struct nf_loginfo li; |
@@ -440,7 +439,6 @@ static int ipt_log_checkentry(const char *tablename, | |||
440 | const void *e, | 439 | const void *e, |
441 | const struct xt_target *target, | 440 | const struct xt_target *target, |
442 | void *targinfo, | 441 | void *targinfo, |
443 | unsigned int targinfosize, | ||
444 | unsigned int hook_mask) | 442 | unsigned int hook_mask) |
445 | { | 443 | { |
446 | const struct ipt_log_info *loginfo = targinfo; | 444 | const struct ipt_log_info *loginfo = targinfo; |
diff --git a/net/ipv4/netfilter/ipt_MASQUERADE.c b/net/ipv4/netfilter/ipt_MASQUERADE.c index ebd94f2abf0d..bc65168a3437 100644 --- a/net/ipv4/netfilter/ipt_MASQUERADE.c +++ b/net/ipv4/netfilter/ipt_MASQUERADE.c | |||
@@ -42,7 +42,6 @@ masquerade_check(const char *tablename, | |||
42 | const void *e, | 42 | const void *e, |
43 | const struct xt_target *target, | 43 | const struct xt_target *target, |
44 | void *targinfo, | 44 | void *targinfo, |
45 | unsigned int targinfosize, | ||
46 | unsigned int hook_mask) | 45 | unsigned int hook_mask) |
47 | { | 46 | { |
48 | const struct ip_nat_multi_range_compat *mr = targinfo; | 47 | const struct ip_nat_multi_range_compat *mr = targinfo; |
@@ -64,8 +63,7 @@ masquerade_target(struct sk_buff **pskb, | |||
64 | const struct net_device *out, | 63 | const struct net_device *out, |
65 | unsigned int hooknum, | 64 | unsigned int hooknum, |
66 | const struct xt_target *target, | 65 | const struct xt_target *target, |
67 | const void *targinfo, | 66 | const void *targinfo) |
68 | void *userinfo) | ||
69 | { | 67 | { |
70 | struct ip_conntrack *ct; | 68 | struct ip_conntrack *ct; |
71 | enum ip_conntrack_info ctinfo; | 69 | enum ip_conntrack_info ctinfo; |
diff --git a/net/ipv4/netfilter/ipt_NETMAP.c b/net/ipv4/netfilter/ipt_NETMAP.c index 736c4b5a86a7..beb2914225ff 100644 --- a/net/ipv4/netfilter/ipt_NETMAP.c +++ b/net/ipv4/netfilter/ipt_NETMAP.c | |||
@@ -33,7 +33,6 @@ check(const char *tablename, | |||
33 | const void *e, | 33 | const void *e, |
34 | const struct xt_target *target, | 34 | const struct xt_target *target, |
35 | void *targinfo, | 35 | void *targinfo, |
36 | unsigned int targinfosize, | ||
37 | unsigned int hook_mask) | 36 | unsigned int hook_mask) |
38 | { | 37 | { |
39 | const struct ip_nat_multi_range_compat *mr = targinfo; | 38 | const struct ip_nat_multi_range_compat *mr = targinfo; |
@@ -55,8 +54,7 @@ target(struct sk_buff **pskb, | |||
55 | const struct net_device *out, | 54 | const struct net_device *out, |
56 | unsigned int hooknum, | 55 | unsigned int hooknum, |
57 | const struct xt_target *target, | 56 | const struct xt_target *target, |
58 | const void *targinfo, | 57 | const void *targinfo) |
59 | void *userinfo) | ||
60 | { | 58 | { |
61 | struct ip_conntrack *ct; | 59 | struct ip_conntrack *ct; |
62 | enum ip_conntrack_info ctinfo; | 60 | enum ip_conntrack_info ctinfo; |
diff --git a/net/ipv4/netfilter/ipt_REDIRECT.c b/net/ipv4/netfilter/ipt_REDIRECT.c index f290463232de..f03d43671c6d 100644 --- a/net/ipv4/netfilter/ipt_REDIRECT.c +++ b/net/ipv4/netfilter/ipt_REDIRECT.c | |||
@@ -36,7 +36,6 @@ redirect_check(const char *tablename, | |||
36 | const void *e, | 36 | const void *e, |
37 | const struct xt_target *target, | 37 | const struct xt_target *target, |
38 | void *targinfo, | 38 | void *targinfo, |
39 | unsigned int targinfosize, | ||
40 | unsigned int hook_mask) | 39 | unsigned int hook_mask) |
41 | { | 40 | { |
42 | const struct ip_nat_multi_range_compat *mr = targinfo; | 41 | const struct ip_nat_multi_range_compat *mr = targinfo; |
@@ -58,8 +57,7 @@ redirect_target(struct sk_buff **pskb, | |||
58 | const struct net_device *out, | 57 | const struct net_device *out, |
59 | unsigned int hooknum, | 58 | unsigned int hooknum, |
60 | const struct xt_target *target, | 59 | const struct xt_target *target, |
61 | const void *targinfo, | 60 | const void *targinfo) |
62 | void *userinfo) | ||
63 | { | 61 | { |
64 | struct ip_conntrack *ct; | 62 | struct ip_conntrack *ct; |
65 | enum ip_conntrack_info ctinfo; | 63 | enum ip_conntrack_info ctinfo; |
diff --git a/net/ipv4/netfilter/ipt_REJECT.c b/net/ipv4/netfilter/ipt_REJECT.c index 269bc2067cb8..b81821edd893 100644 --- a/net/ipv4/netfilter/ipt_REJECT.c +++ b/net/ipv4/netfilter/ipt_REJECT.c | |||
@@ -90,6 +90,7 @@ static inline struct rtable *route_reverse(struct sk_buff *skb, | |||
90 | fl.proto = IPPROTO_TCP; | 90 | fl.proto = IPPROTO_TCP; |
91 | fl.fl_ip_sport = tcph->dest; | 91 | fl.fl_ip_sport = tcph->dest; |
92 | fl.fl_ip_dport = tcph->source; | 92 | fl.fl_ip_dport = tcph->source; |
93 | security_skb_classify_flow(skb, &fl); | ||
93 | 94 | ||
94 | xfrm_lookup((struct dst_entry **)&rt, &fl, NULL, 0); | 95 | xfrm_lookup((struct dst_entry **)&rt, &fl, NULL, 0); |
95 | 96 | ||
@@ -184,6 +185,7 @@ static void send_reset(struct sk_buff *oldskb, int hook) | |||
184 | tcph->urg_ptr = 0; | 185 | tcph->urg_ptr = 0; |
185 | 186 | ||
186 | /* Adjust TCP checksum */ | 187 | /* Adjust TCP checksum */ |
188 | nskb->ip_summed = CHECKSUM_NONE; | ||
187 | tcph->check = 0; | 189 | tcph->check = 0; |
188 | tcph->check = tcp_v4_check(tcph, sizeof(struct tcphdr), | 190 | tcph->check = tcp_v4_check(tcph, sizeof(struct tcphdr), |
189 | nskb->nh.iph->saddr, | 191 | nskb->nh.iph->saddr, |
@@ -226,8 +228,7 @@ static unsigned int reject(struct sk_buff **pskb, | |||
226 | const struct net_device *out, | 228 | const struct net_device *out, |
227 | unsigned int hooknum, | 229 | unsigned int hooknum, |
228 | const struct xt_target *target, | 230 | const struct xt_target *target, |
229 | const void *targinfo, | 231 | const void *targinfo) |
230 | void *userinfo) | ||
231 | { | 232 | { |
232 | const struct ipt_reject_info *reject = targinfo; | 233 | const struct ipt_reject_info *reject = targinfo; |
233 | 234 | ||
@@ -275,7 +276,6 @@ static int check(const char *tablename, | |||
275 | const void *e_void, | 276 | const void *e_void, |
276 | const struct xt_target *target, | 277 | const struct xt_target *target, |
277 | void *targinfo, | 278 | void *targinfo, |
278 | unsigned int targinfosize, | ||
279 | unsigned int hook_mask) | 279 | unsigned int hook_mask) |
280 | { | 280 | { |
281 | const struct ipt_reject_info *rejinfo = targinfo; | 281 | const struct ipt_reject_info *rejinfo = targinfo; |
diff --git a/net/ipv4/netfilter/ipt_SAME.c b/net/ipv4/netfilter/ipt_SAME.c index 7169b09b5a67..efbcb1198832 100644 --- a/net/ipv4/netfilter/ipt_SAME.c +++ b/net/ipv4/netfilter/ipt_SAME.c | |||
@@ -52,7 +52,6 @@ same_check(const char *tablename, | |||
52 | const void *e, | 52 | const void *e, |
53 | const struct xt_target *target, | 53 | const struct xt_target *target, |
54 | void *targinfo, | 54 | void *targinfo, |
55 | unsigned int targinfosize, | ||
56 | unsigned int hook_mask) | 55 | unsigned int hook_mask) |
57 | { | 56 | { |
58 | unsigned int count, countess, rangeip, index = 0; | 57 | unsigned int count, countess, rangeip, index = 0; |
@@ -116,8 +115,7 @@ same_check(const char *tablename, | |||
116 | } | 115 | } |
117 | 116 | ||
118 | static void | 117 | static void |
119 | same_destroy(const struct xt_target *target, void *targinfo, | 118 | same_destroy(const struct xt_target *target, void *targinfo) |
120 | unsigned int targinfosize) | ||
121 | { | 119 | { |
122 | struct ipt_same_info *mr = targinfo; | 120 | struct ipt_same_info *mr = targinfo; |
123 | 121 | ||
@@ -133,8 +131,7 @@ same_target(struct sk_buff **pskb, | |||
133 | const struct net_device *out, | 131 | const struct net_device *out, |
134 | unsigned int hooknum, | 132 | unsigned int hooknum, |
135 | const struct xt_target *target, | 133 | const struct xt_target *target, |
136 | const void *targinfo, | 134 | const void *targinfo) |
137 | void *userinfo) | ||
138 | { | 135 | { |
139 | struct ip_conntrack *ct; | 136 | struct ip_conntrack *ct; |
140 | enum ip_conntrack_info ctinfo; | 137 | enum ip_conntrack_info ctinfo; |
diff --git a/net/ipv4/netfilter/ipt_TCPMSS.c b/net/ipv4/netfilter/ipt_TCPMSS.c index ef2fe5b3f0d8..4246c4321e5b 100644 --- a/net/ipv4/netfilter/ipt_TCPMSS.c +++ b/net/ipv4/netfilter/ipt_TCPMSS.c | |||
@@ -21,26 +21,14 @@ MODULE_LICENSE("GPL"); | |||
21 | MODULE_AUTHOR("Marc Boucher <marc@mbsi.ca>"); | 21 | MODULE_AUTHOR("Marc Boucher <marc@mbsi.ca>"); |
22 | MODULE_DESCRIPTION("iptables TCP MSS modification module"); | 22 | MODULE_DESCRIPTION("iptables TCP MSS modification module"); |
23 | 23 | ||
24 | #if 0 | ||
25 | #define DEBUGP printk | ||
26 | #else | ||
27 | #define DEBUGP(format, args...) | ||
28 | #endif | ||
29 | |||
30 | static u_int16_t | ||
31 | cheat_check(u_int32_t oldvalinv, u_int32_t newval, u_int16_t oldcheck) | ||
32 | { | ||
33 | u_int32_t diffs[] = { oldvalinv, newval }; | ||
34 | return csum_fold(csum_partial((char *)diffs, sizeof(diffs), | ||
35 | oldcheck^0xFFFF)); | ||
36 | } | ||
37 | |||
38 | static inline unsigned int | 24 | static inline unsigned int |
39 | optlen(const u_int8_t *opt, unsigned int offset) | 25 | optlen(const u_int8_t *opt, unsigned int offset) |
40 | { | 26 | { |
41 | /* Beware zero-length options: make finite progress */ | 27 | /* Beware zero-length options: make finite progress */ |
42 | if (opt[offset] <= TCPOPT_NOP || opt[offset+1] == 0) return 1; | 28 | if (opt[offset] <= TCPOPT_NOP || opt[offset+1] == 0) |
43 | else return opt[offset+1]; | 29 | return 1; |
30 | else | ||
31 | return opt[offset+1]; | ||
44 | } | 32 | } |
45 | 33 | ||
46 | static unsigned int | 34 | static unsigned int |
@@ -49,8 +37,7 @@ ipt_tcpmss_target(struct sk_buff **pskb, | |||
49 | const struct net_device *out, | 37 | const struct net_device *out, |
50 | unsigned int hooknum, | 38 | unsigned int hooknum, |
51 | const struct xt_target *target, | 39 | const struct xt_target *target, |
52 | const void *targinfo, | 40 | const void *targinfo) |
53 | void *userinfo) | ||
54 | { | 41 | { |
55 | const struct ipt_tcpmss_info *tcpmssinfo = targinfo; | 42 | const struct ipt_tcpmss_info *tcpmssinfo = targinfo; |
56 | struct tcphdr *tcph; | 43 | struct tcphdr *tcph; |
@@ -62,13 +49,8 @@ ipt_tcpmss_target(struct sk_buff **pskb, | |||
62 | if (!skb_make_writable(pskb, (*pskb)->len)) | 49 | if (!skb_make_writable(pskb, (*pskb)->len)) |
63 | return NF_DROP; | 50 | return NF_DROP; |
64 | 51 | ||
65 | if ((*pskb)->ip_summed == CHECKSUM_HW && | ||
66 | skb_checksum_help(*pskb, out == NULL)) | ||
67 | return NF_DROP; | ||
68 | |||
69 | iph = (*pskb)->nh.iph; | 52 | iph = (*pskb)->nh.iph; |
70 | tcplen = (*pskb)->len - iph->ihl*4; | 53 | tcplen = (*pskb)->len - iph->ihl*4; |
71 | |||
72 | tcph = (void *)iph + iph->ihl*4; | 54 | tcph = (void *)iph + iph->ihl*4; |
73 | 55 | ||
74 | /* Since it passed flags test in tcp match, we know it is is | 56 | /* Since it passed flags test in tcp match, we know it is is |
@@ -84,54 +66,41 @@ ipt_tcpmss_target(struct sk_buff **pskb, | |||
84 | return NF_DROP; | 66 | return NF_DROP; |
85 | } | 67 | } |
86 | 68 | ||
87 | if(tcpmssinfo->mss == IPT_TCPMSS_CLAMP_PMTU) { | 69 | if (tcpmssinfo->mss == IPT_TCPMSS_CLAMP_PMTU) { |
88 | if(!(*pskb)->dst) { | 70 | if (dst_mtu((*pskb)->dst) <= sizeof(struct iphdr) + |
71 | sizeof(struct tcphdr)) { | ||
89 | if (net_ratelimit()) | 72 | if (net_ratelimit()) |
90 | printk(KERN_ERR | 73 | printk(KERN_ERR "ipt_tcpmss_target: " |
91 | "ipt_tcpmss_target: no dst?! can't determine path-MTU\n"); | 74 | "unknown or invalid path-MTU (%d)\n", |
75 | dst_mtu((*pskb)->dst)); | ||
92 | return NF_DROP; /* or IPT_CONTINUE ?? */ | 76 | return NF_DROP; /* or IPT_CONTINUE ?? */ |
93 | } | 77 | } |
94 | 78 | ||
95 | if(dst_mtu((*pskb)->dst) <= (sizeof(struct iphdr) + sizeof(struct tcphdr))) { | 79 | newmss = dst_mtu((*pskb)->dst) - sizeof(struct iphdr) - |
96 | if (net_ratelimit()) | 80 | sizeof(struct tcphdr); |
97 | printk(KERN_ERR | ||
98 | "ipt_tcpmss_target: unknown or invalid path-MTU (%d)\n", dst_mtu((*pskb)->dst)); | ||
99 | return NF_DROP; /* or IPT_CONTINUE ?? */ | ||
100 | } | ||
101 | |||
102 | newmss = dst_mtu((*pskb)->dst) - sizeof(struct iphdr) - sizeof(struct tcphdr); | ||
103 | } else | 81 | } else |
104 | newmss = tcpmssinfo->mss; | 82 | newmss = tcpmssinfo->mss; |
105 | 83 | ||
106 | opt = (u_int8_t *)tcph; | 84 | opt = (u_int8_t *)tcph; |
107 | for (i = sizeof(struct tcphdr); i < tcph->doff*4; i += optlen(opt, i)){ | 85 | for (i = sizeof(struct tcphdr); i < tcph->doff*4; i += optlen(opt, i)) { |
108 | if ((opt[i] == TCPOPT_MSS) && | 86 | if (opt[i] == TCPOPT_MSS && tcph->doff*4 - i >= TCPOLEN_MSS && |
109 | ((tcph->doff*4 - i) >= TCPOLEN_MSS) && | 87 | opt[i+1] == TCPOLEN_MSS) { |
110 | (opt[i+1] == TCPOLEN_MSS)) { | ||
111 | u_int16_t oldmss; | 88 | u_int16_t oldmss; |
112 | 89 | ||
113 | oldmss = (opt[i+2] << 8) | opt[i+3]; | 90 | oldmss = (opt[i+2] << 8) | opt[i+3]; |
114 | 91 | ||
115 | if((tcpmssinfo->mss == IPT_TCPMSS_CLAMP_PMTU) && | 92 | if (tcpmssinfo->mss == IPT_TCPMSS_CLAMP_PMTU && |
116 | (oldmss <= newmss)) | 93 | oldmss <= newmss) |
117 | return IPT_CONTINUE; | 94 | return IPT_CONTINUE; |
118 | 95 | ||
119 | opt[i+2] = (newmss & 0xff00) >> 8; | 96 | opt[i+2] = (newmss & 0xff00) >> 8; |
120 | opt[i+3] = (newmss & 0x00ff); | 97 | opt[i+3] = (newmss & 0x00ff); |
121 | 98 | ||
122 | tcph->check = cheat_check(htons(oldmss)^0xFFFF, | 99 | tcph->check = nf_proto_csum_update(*pskb, |
123 | htons(newmss), | 100 | htons(oldmss)^0xFFFF, |
124 | tcph->check); | 101 | htons(newmss), |
125 | 102 | tcph->check, 0); | |
126 | DEBUGP(KERN_INFO "ipt_tcpmss_target: %u.%u.%u.%u:%hu" | 103 | return IPT_CONTINUE; |
127 | "->%u.%u.%u.%u:%hu changed TCP MSS option" | ||
128 | " (from %u to %u)\n", | ||
129 | NIPQUAD((*pskb)->nh.iph->saddr), | ||
130 | ntohs(tcph->source), | ||
131 | NIPQUAD((*pskb)->nh.iph->daddr), | ||
132 | ntohs(tcph->dest), | ||
133 | oldmss, newmss); | ||
134 | goto retmodified; | ||
135 | } | 104 | } |
136 | } | 105 | } |
137 | 106 | ||
@@ -143,13 +112,8 @@ ipt_tcpmss_target(struct sk_buff **pskb, | |||
143 | 112 | ||
144 | newskb = skb_copy_expand(*pskb, skb_headroom(*pskb), | 113 | newskb = skb_copy_expand(*pskb, skb_headroom(*pskb), |
145 | TCPOLEN_MSS, GFP_ATOMIC); | 114 | TCPOLEN_MSS, GFP_ATOMIC); |
146 | if (!newskb) { | 115 | if (!newskb) |
147 | if (net_ratelimit()) | ||
148 | printk(KERN_ERR "ipt_tcpmss_target:" | ||
149 | " unable to allocate larger skb\n"); | ||
150 | return NF_DROP; | 116 | return NF_DROP; |
151 | } | ||
152 | |||
153 | kfree_skb(*pskb); | 117 | kfree_skb(*pskb); |
154 | *pskb = newskb; | 118 | *pskb = newskb; |
155 | iph = (*pskb)->nh.iph; | 119 | iph = (*pskb)->nh.iph; |
@@ -161,36 +125,29 @@ ipt_tcpmss_target(struct sk_buff **pskb, | |||
161 | opt = (u_int8_t *)tcph + sizeof(struct tcphdr); | 125 | opt = (u_int8_t *)tcph + sizeof(struct tcphdr); |
162 | memmove(opt + TCPOLEN_MSS, opt, tcplen - sizeof(struct tcphdr)); | 126 | memmove(opt + TCPOLEN_MSS, opt, tcplen - sizeof(struct tcphdr)); |
163 | 127 | ||
164 | tcph->check = cheat_check(htons(tcplen) ^ 0xFFFF, | 128 | tcph->check = nf_proto_csum_update(*pskb, |
165 | htons(tcplen + TCPOLEN_MSS), tcph->check); | 129 | htons(tcplen) ^ 0xFFFF, |
166 | tcplen += TCPOLEN_MSS; | 130 | htons(tcplen + TCPOLEN_MSS), |
167 | 131 | tcph->check, 1); | |
168 | opt[0] = TCPOPT_MSS; | 132 | opt[0] = TCPOPT_MSS; |
169 | opt[1] = TCPOLEN_MSS; | 133 | opt[1] = TCPOLEN_MSS; |
170 | opt[2] = (newmss & 0xff00) >> 8; | 134 | opt[2] = (newmss & 0xff00) >> 8; |
171 | opt[3] = (newmss & 0x00ff); | 135 | opt[3] = (newmss & 0x00ff); |
172 | 136 | ||
173 | tcph->check = cheat_check(~0, *((u_int32_t *)opt), tcph->check); | 137 | tcph->check = nf_proto_csum_update(*pskb, ~0, *((u_int32_t *)opt), |
138 | tcph->check, 0); | ||
174 | 139 | ||
175 | oldval = ((u_int16_t *)tcph)[6]; | 140 | oldval = ((u_int16_t *)tcph)[6]; |
176 | tcph->doff += TCPOLEN_MSS/4; | 141 | tcph->doff += TCPOLEN_MSS/4; |
177 | tcph->check = cheat_check(oldval ^ 0xFFFF, | 142 | tcph->check = nf_proto_csum_update(*pskb, |
178 | ((u_int16_t *)tcph)[6], tcph->check); | 143 | oldval ^ 0xFFFF, |
144 | ((u_int16_t *)tcph)[6], | ||
145 | tcph->check, 0); | ||
179 | 146 | ||
180 | newtotlen = htons(ntohs(iph->tot_len) + TCPOLEN_MSS); | 147 | newtotlen = htons(ntohs(iph->tot_len) + TCPOLEN_MSS); |
181 | iph->check = cheat_check(iph->tot_len ^ 0xFFFF, | 148 | iph->check = nf_csum_update(iph->tot_len ^ 0xFFFF, |
182 | newtotlen, iph->check); | 149 | newtotlen, iph->check); |
183 | iph->tot_len = newtotlen; | 150 | iph->tot_len = newtotlen; |
184 | |||
185 | DEBUGP(KERN_INFO "ipt_tcpmss_target: %u.%u.%u.%u:%hu" | ||
186 | "->%u.%u.%u.%u:%hu added TCP MSS option (%u)\n", | ||
187 | NIPQUAD((*pskb)->nh.iph->saddr), | ||
188 | ntohs(tcph->source), | ||
189 | NIPQUAD((*pskb)->nh.iph->daddr), | ||
190 | ntohs(tcph->dest), | ||
191 | newmss); | ||
192 | |||
193 | retmodified: | ||
194 | return IPT_CONTINUE; | 151 | return IPT_CONTINUE; |
195 | } | 152 | } |
196 | 153 | ||
@@ -200,9 +157,9 @@ static inline int find_syn_match(const struct ipt_entry_match *m) | |||
200 | { | 157 | { |
201 | const struct ipt_tcp *tcpinfo = (const struct ipt_tcp *)m->data; | 158 | const struct ipt_tcp *tcpinfo = (const struct ipt_tcp *)m->data; |
202 | 159 | ||
203 | if (strcmp(m->u.kernel.match->name, "tcp") == 0 | 160 | if (strcmp(m->u.kernel.match->name, "tcp") == 0 && |
204 | && (tcpinfo->flg_cmp & TH_SYN) | 161 | tcpinfo->flg_cmp & TH_SYN && |
205 | && !(tcpinfo->invflags & IPT_TCP_INV_FLAGS)) | 162 | !(tcpinfo->invflags & IPT_TCP_INV_FLAGS)) |
206 | return 1; | 163 | return 1; |
207 | 164 | ||
208 | return 0; | 165 | return 0; |
@@ -214,17 +171,17 @@ ipt_tcpmss_checkentry(const char *tablename, | |||
214 | const void *e_void, | 171 | const void *e_void, |
215 | const struct xt_target *target, | 172 | const struct xt_target *target, |
216 | void *targinfo, | 173 | void *targinfo, |
217 | unsigned int targinfosize, | ||
218 | unsigned int hook_mask) | 174 | unsigned int hook_mask) |
219 | { | 175 | { |
220 | const struct ipt_tcpmss_info *tcpmssinfo = targinfo; | 176 | const struct ipt_tcpmss_info *tcpmssinfo = targinfo; |
221 | const struct ipt_entry *e = e_void; | 177 | const struct ipt_entry *e = e_void; |
222 | 178 | ||
223 | if((tcpmssinfo->mss == IPT_TCPMSS_CLAMP_PMTU) && | 179 | if (tcpmssinfo->mss == IPT_TCPMSS_CLAMP_PMTU && |
224 | ((hook_mask & ~((1 << NF_IP_FORWARD) | 180 | (hook_mask & ~((1 << NF_IP_FORWARD) | |
225 | | (1 << NF_IP_LOCAL_OUT) | 181 | (1 << NF_IP_LOCAL_OUT) | |
226 | | (1 << NF_IP_POST_ROUTING))) != 0)) { | 182 | (1 << NF_IP_POST_ROUTING))) != 0) { |
227 | printk("TCPMSS: path-MTU clamping only supported in FORWARD, OUTPUT and POSTROUTING hooks\n"); | 183 | printk("TCPMSS: path-MTU clamping only supported in " |
184 | "FORWARD, OUTPUT and POSTROUTING hooks\n"); | ||
228 | return 0; | 185 | return 0; |
229 | } | 186 | } |
230 | 187 | ||
diff --git a/net/ipv4/netfilter/ipt_TOS.c b/net/ipv4/netfilter/ipt_TOS.c index 1c7a5ca399b3..471a4c438b0a 100644 --- a/net/ipv4/netfilter/ipt_TOS.c +++ b/net/ipv4/netfilter/ipt_TOS.c | |||
@@ -26,27 +26,20 @@ target(struct sk_buff **pskb, | |||
26 | const struct net_device *out, | 26 | const struct net_device *out, |
27 | unsigned int hooknum, | 27 | unsigned int hooknum, |
28 | const struct xt_target *target, | 28 | const struct xt_target *target, |
29 | const void *targinfo, | 29 | const void *targinfo) |
30 | void *userinfo) | ||
31 | { | 30 | { |
32 | const struct ipt_tos_target_info *tosinfo = targinfo; | 31 | const struct ipt_tos_target_info *tosinfo = targinfo; |
32 | struct iphdr *iph = (*pskb)->nh.iph; | ||
33 | u_int16_t oldtos; | ||
33 | 34 | ||
34 | if (((*pskb)->nh.iph->tos & IPTOS_TOS_MASK) != tosinfo->tos) { | 35 | if ((iph->tos & IPTOS_TOS_MASK) != tosinfo->tos) { |
35 | u_int16_t diffs[2]; | ||
36 | |||
37 | if (!skb_make_writable(pskb, sizeof(struct iphdr))) | 36 | if (!skb_make_writable(pskb, sizeof(struct iphdr))) |
38 | return NF_DROP; | 37 | return NF_DROP; |
39 | 38 | iph = (*pskb)->nh.iph; | |
40 | diffs[0] = htons((*pskb)->nh.iph->tos) ^ 0xFFFF; | 39 | oldtos = iph->tos; |
41 | (*pskb)->nh.iph->tos | 40 | iph->tos = (iph->tos & IPTOS_PREC_MASK) | tosinfo->tos; |
42 | = ((*pskb)->nh.iph->tos & IPTOS_PREC_MASK) | 41 | iph->check = nf_csum_update(oldtos ^ 0xFFFF, iph->tos, |
43 | | tosinfo->tos; | 42 | iph->check); |
44 | diffs[1] = htons((*pskb)->nh.iph->tos); | ||
45 | (*pskb)->nh.iph->check | ||
46 | = csum_fold(csum_partial((char *)diffs, | ||
47 | sizeof(diffs), | ||
48 | (*pskb)->nh.iph->check | ||
49 | ^0xFFFF)); | ||
50 | } | 43 | } |
51 | return IPT_CONTINUE; | 44 | return IPT_CONTINUE; |
52 | } | 45 | } |
@@ -56,7 +49,6 @@ checkentry(const char *tablename, | |||
56 | const void *e_void, | 49 | const void *e_void, |
57 | const struct xt_target *target, | 50 | const struct xt_target *target, |
58 | void *targinfo, | 51 | void *targinfo, |
59 | unsigned int targinfosize, | ||
60 | unsigned int hook_mask) | 52 | unsigned int hook_mask) |
61 | { | 53 | { |
62 | const u_int8_t tos = ((struct ipt_tos_target_info *)targinfo)->tos; | 54 | const u_int8_t tos = ((struct ipt_tos_target_info *)targinfo)->tos; |
diff --git a/net/ipv4/netfilter/ipt_TTL.c b/net/ipv4/netfilter/ipt_TTL.c index f48892ae0be5..96e79cc6d0f2 100644 --- a/net/ipv4/netfilter/ipt_TTL.c +++ b/net/ipv4/netfilter/ipt_TTL.c | |||
@@ -23,11 +23,10 @@ static unsigned int | |||
23 | ipt_ttl_target(struct sk_buff **pskb, | 23 | ipt_ttl_target(struct sk_buff **pskb, |
24 | const struct net_device *in, const struct net_device *out, | 24 | const struct net_device *in, const struct net_device *out, |
25 | unsigned int hooknum, const struct xt_target *target, | 25 | unsigned int hooknum, const struct xt_target *target, |
26 | const void *targinfo, void *userinfo) | 26 | const void *targinfo) |
27 | { | 27 | { |
28 | struct iphdr *iph; | 28 | struct iphdr *iph; |
29 | const struct ipt_TTL_info *info = targinfo; | 29 | const struct ipt_TTL_info *info = targinfo; |
30 | u_int16_t diffs[2]; | ||
31 | int new_ttl; | 30 | int new_ttl; |
32 | 31 | ||
33 | if (!skb_make_writable(pskb, (*pskb)->len)) | 32 | if (!skb_make_writable(pskb, (*pskb)->len)) |
@@ -55,12 +54,10 @@ ipt_ttl_target(struct sk_buff **pskb, | |||
55 | } | 54 | } |
56 | 55 | ||
57 | if (new_ttl != iph->ttl) { | 56 | if (new_ttl != iph->ttl) { |
58 | diffs[0] = htons(((unsigned)iph->ttl) << 8) ^ 0xFFFF; | 57 | iph->check = nf_csum_update(ntohs((iph->ttl << 8)) ^ 0xFFFF, |
58 | ntohs(new_ttl << 8), | ||
59 | iph->check); | ||
59 | iph->ttl = new_ttl; | 60 | iph->ttl = new_ttl; |
60 | diffs[1] = htons(((unsigned)iph->ttl) << 8); | ||
61 | iph->check = csum_fold(csum_partial((char *)diffs, | ||
62 | sizeof(diffs), | ||
63 | iph->check^0xFFFF)); | ||
64 | } | 61 | } |
65 | 62 | ||
66 | return IPT_CONTINUE; | 63 | return IPT_CONTINUE; |
@@ -70,7 +67,6 @@ static int ipt_ttl_checkentry(const char *tablename, | |||
70 | const void *e, | 67 | const void *e, |
71 | const struct xt_target *target, | 68 | const struct xt_target *target, |
72 | void *targinfo, | 69 | void *targinfo, |
73 | unsigned int targinfosize, | ||
74 | unsigned int hook_mask) | 70 | unsigned int hook_mask) |
75 | { | 71 | { |
76 | struct ipt_TTL_info *info = targinfo; | 72 | struct ipt_TTL_info *info = targinfo; |
diff --git a/net/ipv4/netfilter/ipt_ULOG.c b/net/ipv4/netfilter/ipt_ULOG.c index d46fd677fa11..2b104ea54f48 100644 --- a/net/ipv4/netfilter/ipt_ULOG.c +++ b/net/ipv4/netfilter/ipt_ULOG.c | |||
@@ -308,7 +308,7 @@ static unsigned int ipt_ulog_target(struct sk_buff **pskb, | |||
308 | const struct net_device *out, | 308 | const struct net_device *out, |
309 | unsigned int hooknum, | 309 | unsigned int hooknum, |
310 | const struct xt_target *target, | 310 | const struct xt_target *target, |
311 | const void *targinfo, void *userinfo) | 311 | const void *targinfo) |
312 | { | 312 | { |
313 | struct ipt_ulog_info *loginfo = (struct ipt_ulog_info *) targinfo; | 313 | struct ipt_ulog_info *loginfo = (struct ipt_ulog_info *) targinfo; |
314 | 314 | ||
@@ -346,7 +346,6 @@ static int ipt_ulog_checkentry(const char *tablename, | |||
346 | const void *e, | 346 | const void *e, |
347 | const struct xt_target *target, | 347 | const struct xt_target *target, |
348 | void *targinfo, | 348 | void *targinfo, |
349 | unsigned int targinfosize, | ||
350 | unsigned int hookmask) | 349 | unsigned int hookmask) |
351 | { | 350 | { |
352 | struct ipt_ulog_info *loginfo = (struct ipt_ulog_info *) targinfo; | 351 | struct ipt_ulog_info *loginfo = (struct ipt_ulog_info *) targinfo; |
diff --git a/net/ipv4/netfilter/ipt_ah.c b/net/ipv4/netfilter/ipt_ah.c index 2927135873d7..1798f86bc534 100644 --- a/net/ipv4/netfilter/ipt_ah.c +++ b/net/ipv4/netfilter/ipt_ah.c | |||
@@ -74,7 +74,6 @@ checkentry(const char *tablename, | |||
74 | const void *ip_void, | 74 | const void *ip_void, |
75 | const struct xt_match *match, | 75 | const struct xt_match *match, |
76 | void *matchinfo, | 76 | void *matchinfo, |
77 | unsigned int matchinfosize, | ||
78 | unsigned int hook_mask) | 77 | unsigned int hook_mask) |
79 | { | 78 | { |
80 | const struct ipt_ah *ahinfo = matchinfo; | 79 | const struct ipt_ah *ahinfo = matchinfo; |
diff --git a/net/ipv4/netfilter/ipt_dscp.c b/net/ipv4/netfilter/ipt_dscp.c deleted file mode 100644 index 47177591aeb6..000000000000 --- a/net/ipv4/netfilter/ipt_dscp.c +++ /dev/null | |||
@@ -1,54 +0,0 @@ | |||
1 | /* IP tables module for matching the value of the IPv4 DSCP field | ||
2 | * | ||
3 | * ipt_dscp.c,v 1.3 2002/08/05 19:00:21 laforge Exp | ||
4 | * | ||
5 | * (C) 2002 by Harald Welte <laforge@netfilter.org> | ||
6 | * | ||
7 | * This program is free software; you can redistribute it and/or modify | ||
8 | * it under the terms of the GNU General Public License version 2 as | ||
9 | * published by the Free Software Foundation. | ||
10 | */ | ||
11 | |||
12 | #include <linux/module.h> | ||
13 | #include <linux/skbuff.h> | ||
14 | |||
15 | #include <linux/netfilter_ipv4/ipt_dscp.h> | ||
16 | #include <linux/netfilter_ipv4/ip_tables.h> | ||
17 | |||
18 | MODULE_AUTHOR("Harald Welte <laforge@netfilter.org>"); | ||
19 | MODULE_DESCRIPTION("iptables DSCP matching module"); | ||
20 | MODULE_LICENSE("GPL"); | ||
21 | |||
22 | static int match(const struct sk_buff *skb, | ||
23 | const struct net_device *in, const struct net_device *out, | ||
24 | const struct xt_match *match, const void *matchinfo, | ||
25 | int offset, unsigned int protoff, int *hotdrop) | ||
26 | { | ||
27 | const struct ipt_dscp_info *info = matchinfo; | ||
28 | const struct iphdr *iph = skb->nh.iph; | ||
29 | |||
30 | u_int8_t sh_dscp = ((info->dscp << IPT_DSCP_SHIFT) & IPT_DSCP_MASK); | ||
31 | |||
32 | return ((iph->tos&IPT_DSCP_MASK) == sh_dscp) ^ info->invert; | ||
33 | } | ||
34 | |||
35 | static struct ipt_match dscp_match = { | ||
36 | .name = "dscp", | ||
37 | .match = match, | ||
38 | .matchsize = sizeof(struct ipt_dscp_info), | ||
39 | .me = THIS_MODULE, | ||
40 | }; | ||
41 | |||
42 | static int __init ipt_dscp_init(void) | ||
43 | { | ||
44 | return ipt_register_match(&dscp_match); | ||
45 | } | ||
46 | |||
47 | static void __exit ipt_dscp_fini(void) | ||
48 | { | ||
49 | ipt_unregister_match(&dscp_match); | ||
50 | |||
51 | } | ||
52 | |||
53 | module_init(ipt_dscp_init); | ||
54 | module_exit(ipt_dscp_fini); | ||
diff --git a/net/ipv4/netfilter/ipt_ecn.c b/net/ipv4/netfilter/ipt_ecn.c index b28250414933..dafbdec0efc0 100644 --- a/net/ipv4/netfilter/ipt_ecn.c +++ b/net/ipv4/netfilter/ipt_ecn.c | |||
@@ -88,8 +88,7 @@ static int match(const struct sk_buff *skb, | |||
88 | 88 | ||
89 | static int checkentry(const char *tablename, const void *ip_void, | 89 | static int checkentry(const char *tablename, const void *ip_void, |
90 | const struct xt_match *match, | 90 | const struct xt_match *match, |
91 | void *matchinfo, unsigned int matchsize, | 91 | void *matchinfo, unsigned int hook_mask) |
92 | unsigned int hook_mask) | ||
93 | { | 92 | { |
94 | const struct ipt_ecn_info *info = matchinfo; | 93 | const struct ipt_ecn_info *info = matchinfo; |
95 | const struct ipt_ip *ip = ip_void; | 94 | const struct ipt_ip *ip = ip_void; |
diff --git a/net/ipv4/netfilter/ipt_hashlimit.c b/net/ipv4/netfilter/ipt_hashlimit.c index 3bd2368e1fc9..4f73a61aa3dd 100644 --- a/net/ipv4/netfilter/ipt_hashlimit.c +++ b/net/ipv4/netfilter/ipt_hashlimit.c | |||
@@ -478,7 +478,6 @@ hashlimit_checkentry(const char *tablename, | |||
478 | const void *inf, | 478 | const void *inf, |
479 | const struct xt_match *match, | 479 | const struct xt_match *match, |
480 | void *matchinfo, | 480 | void *matchinfo, |
481 | unsigned int matchsize, | ||
482 | unsigned int hook_mask) | 481 | unsigned int hook_mask) |
483 | { | 482 | { |
484 | struct ipt_hashlimit_info *r = matchinfo; | 483 | struct ipt_hashlimit_info *r = matchinfo; |
@@ -529,18 +528,46 @@ hashlimit_checkentry(const char *tablename, | |||
529 | } | 528 | } |
530 | 529 | ||
531 | static void | 530 | static void |
532 | hashlimit_destroy(const struct xt_match *match, void *matchinfo, | 531 | hashlimit_destroy(const struct xt_match *match, void *matchinfo) |
533 | unsigned int matchsize) | ||
534 | { | 532 | { |
535 | struct ipt_hashlimit_info *r = matchinfo; | 533 | struct ipt_hashlimit_info *r = matchinfo; |
536 | 534 | ||
537 | htable_put(r->hinfo); | 535 | htable_put(r->hinfo); |
538 | } | 536 | } |
539 | 537 | ||
538 | #ifdef CONFIG_COMPAT | ||
539 | struct compat_ipt_hashlimit_info { | ||
540 | char name[IFNAMSIZ]; | ||
541 | struct hashlimit_cfg cfg; | ||
542 | compat_uptr_t hinfo; | ||
543 | compat_uptr_t master; | ||
544 | }; | ||
545 | |||
546 | static void compat_from_user(void *dst, void *src) | ||
547 | { | ||
548 | int off = offsetof(struct compat_ipt_hashlimit_info, hinfo); | ||
549 | |||
550 | memcpy(dst, src, off); | ||
551 | memset(dst + off, 0, sizeof(struct compat_ipt_hashlimit_info) - off); | ||
552 | } | ||
553 | |||
554 | static int compat_to_user(void __user *dst, void *src) | ||
555 | { | ||
556 | int off = offsetof(struct compat_ipt_hashlimit_info, hinfo); | ||
557 | |||
558 | return copy_to_user(dst, src, off) ? -EFAULT : 0; | ||
559 | } | ||
560 | #endif | ||
561 | |||
540 | static struct ipt_match ipt_hashlimit = { | 562 | static struct ipt_match ipt_hashlimit = { |
541 | .name = "hashlimit", | 563 | .name = "hashlimit", |
542 | .match = hashlimit_match, | 564 | .match = hashlimit_match, |
543 | .matchsize = sizeof(struct ipt_hashlimit_info), | 565 | .matchsize = sizeof(struct ipt_hashlimit_info), |
566 | #ifdef CONFIG_COMPAT | ||
567 | .compatsize = sizeof(struct compat_ipt_hashlimit_info), | ||
568 | .compat_from_user = compat_from_user, | ||
569 | .compat_to_user = compat_to_user, | ||
570 | #endif | ||
544 | .checkentry = hashlimit_checkentry, | 571 | .checkentry = hashlimit_checkentry, |
545 | .destroy = hashlimit_destroy, | 572 | .destroy = hashlimit_destroy, |
546 | .me = THIS_MODULE | 573 | .me = THIS_MODULE |
diff --git a/net/ipv4/netfilter/ipt_owner.c b/net/ipv4/netfilter/ipt_owner.c index 5ac6ac023b5e..78c336f12a9e 100644 --- a/net/ipv4/netfilter/ipt_owner.c +++ b/net/ipv4/netfilter/ipt_owner.c | |||
@@ -56,7 +56,6 @@ checkentry(const char *tablename, | |||
56 | const void *ip, | 56 | const void *ip, |
57 | const struct xt_match *match, | 57 | const struct xt_match *match, |
58 | void *matchinfo, | 58 | void *matchinfo, |
59 | unsigned int matchsize, | ||
60 | unsigned int hook_mask) | 59 | unsigned int hook_mask) |
61 | { | 60 | { |
62 | const struct ipt_owner_info *info = matchinfo; | 61 | const struct ipt_owner_info *info = matchinfo; |
diff --git a/net/ipv4/netfilter/ipt_recent.c b/net/ipv4/netfilter/ipt_recent.c index 61a2139f9cfd..32ae8d7ac506 100644 --- a/net/ipv4/netfilter/ipt_recent.c +++ b/net/ipv4/netfilter/ipt_recent.c | |||
@@ -35,14 +35,20 @@ static unsigned int ip_list_tot = 100; | |||
35 | static unsigned int ip_pkt_list_tot = 20; | 35 | static unsigned int ip_pkt_list_tot = 20; |
36 | static unsigned int ip_list_hash_size = 0; | 36 | static unsigned int ip_list_hash_size = 0; |
37 | static unsigned int ip_list_perms = 0644; | 37 | static unsigned int ip_list_perms = 0644; |
38 | static unsigned int ip_list_uid = 0; | ||
39 | static unsigned int ip_list_gid = 0; | ||
38 | module_param(ip_list_tot, uint, 0400); | 40 | module_param(ip_list_tot, uint, 0400); |
39 | module_param(ip_pkt_list_tot, uint, 0400); | 41 | module_param(ip_pkt_list_tot, uint, 0400); |
40 | module_param(ip_list_hash_size, uint, 0400); | 42 | module_param(ip_list_hash_size, uint, 0400); |
41 | module_param(ip_list_perms, uint, 0400); | 43 | module_param(ip_list_perms, uint, 0400); |
44 | module_param(ip_list_uid, uint, 0400); | ||
45 | module_param(ip_list_gid, uint, 0400); | ||
42 | MODULE_PARM_DESC(ip_list_tot, "number of IPs to remember per list"); | 46 | MODULE_PARM_DESC(ip_list_tot, "number of IPs to remember per list"); |
43 | MODULE_PARM_DESC(ip_pkt_list_tot, "number of packets per IP to remember (max. 255)"); | 47 | MODULE_PARM_DESC(ip_pkt_list_tot, "number of packets per IP to remember (max. 255)"); |
44 | MODULE_PARM_DESC(ip_list_hash_size, "size of hash table used to look up IPs"); | 48 | MODULE_PARM_DESC(ip_list_hash_size, "size of hash table used to look up IPs"); |
45 | MODULE_PARM_DESC(ip_list_perms, "permissions on /proc/net/ipt_recent/* files"); | 49 | MODULE_PARM_DESC(ip_list_perms, "permissions on /proc/net/ipt_recent/* files"); |
50 | MODULE_PARM_DESC(ip_list_uid,"owner of /proc/net/ipt_recent/* files"); | ||
51 | MODULE_PARM_DESC(ip_list_gid,"owning group of /proc/net/ipt_recent/* files"); | ||
46 | 52 | ||
47 | 53 | ||
48 | struct recent_entry { | 54 | struct recent_entry { |
@@ -232,7 +238,7 @@ out: | |||
232 | static int | 238 | static int |
233 | ipt_recent_checkentry(const char *tablename, const void *ip, | 239 | ipt_recent_checkentry(const char *tablename, const void *ip, |
234 | const struct xt_match *match, void *matchinfo, | 240 | const struct xt_match *match, void *matchinfo, |
235 | unsigned int matchsize, unsigned int hook_mask) | 241 | unsigned int hook_mask) |
236 | { | 242 | { |
237 | const struct ipt_recent_info *info = matchinfo; | 243 | const struct ipt_recent_info *info = matchinfo; |
238 | struct recent_table *t; | 244 | struct recent_table *t; |
@@ -274,6 +280,8 @@ ipt_recent_checkentry(const char *tablename, const void *ip, | |||
274 | goto out; | 280 | goto out; |
275 | } | 281 | } |
276 | t->proc->proc_fops = &recent_fops; | 282 | t->proc->proc_fops = &recent_fops; |
283 | t->proc->uid = ip_list_uid; | ||
284 | t->proc->gid = ip_list_gid; | ||
277 | t->proc->data = t; | 285 | t->proc->data = t; |
278 | #endif | 286 | #endif |
279 | spin_lock_bh(&recent_lock); | 287 | spin_lock_bh(&recent_lock); |
@@ -286,8 +294,7 @@ out: | |||
286 | } | 294 | } |
287 | 295 | ||
288 | static void | 296 | static void |
289 | ipt_recent_destroy(const struct xt_match *match, void *matchinfo, | 297 | ipt_recent_destroy(const struct xt_match *match, void *matchinfo) |
290 | unsigned int matchsize) | ||
291 | { | 298 | { |
292 | const struct ipt_recent_info *info = matchinfo; | 299 | const struct ipt_recent_info *info = matchinfo; |
293 | struct recent_table *t; | 300 | struct recent_table *t; |
diff --git a/net/ipv4/netfilter/iptable_filter.c b/net/ipv4/netfilter/iptable_filter.c index 7f417484bfbf..e2e7dd8d7903 100644 --- a/net/ipv4/netfilter/iptable_filter.c +++ b/net/ipv4/netfilter/iptable_filter.c | |||
@@ -90,7 +90,7 @@ ipt_hook(unsigned int hook, | |||
90 | const struct net_device *out, | 90 | const struct net_device *out, |
91 | int (*okfn)(struct sk_buff *)) | 91 | int (*okfn)(struct sk_buff *)) |
92 | { | 92 | { |
93 | return ipt_do_table(pskb, hook, in, out, &packet_filter, NULL); | 93 | return ipt_do_table(pskb, hook, in, out, &packet_filter); |
94 | } | 94 | } |
95 | 95 | ||
96 | static unsigned int | 96 | static unsigned int |
@@ -108,7 +108,7 @@ ipt_local_out_hook(unsigned int hook, | |||
108 | return NF_ACCEPT; | 108 | return NF_ACCEPT; |
109 | } | 109 | } |
110 | 110 | ||
111 | return ipt_do_table(pskb, hook, in, out, &packet_filter, NULL); | 111 | return ipt_do_table(pskb, hook, in, out, &packet_filter); |
112 | } | 112 | } |
113 | 113 | ||
114 | static struct nf_hook_ops ipt_ops[] = { | 114 | static struct nf_hook_ops ipt_ops[] = { |
diff --git a/net/ipv4/netfilter/iptable_mangle.c b/net/ipv4/netfilter/iptable_mangle.c index 4e7998beda63..79336cb42527 100644 --- a/net/ipv4/netfilter/iptable_mangle.c +++ b/net/ipv4/netfilter/iptable_mangle.c | |||
@@ -119,7 +119,7 @@ ipt_route_hook(unsigned int hook, | |||
119 | const struct net_device *out, | 119 | const struct net_device *out, |
120 | int (*okfn)(struct sk_buff *)) | 120 | int (*okfn)(struct sk_buff *)) |
121 | { | 121 | { |
122 | return ipt_do_table(pskb, hook, in, out, &packet_mangler, NULL); | 122 | return ipt_do_table(pskb, hook, in, out, &packet_mangler); |
123 | } | 123 | } |
124 | 124 | ||
125 | static unsigned int | 125 | static unsigned int |
@@ -148,7 +148,7 @@ ipt_local_hook(unsigned int hook, | |||
148 | daddr = (*pskb)->nh.iph->daddr; | 148 | daddr = (*pskb)->nh.iph->daddr; |
149 | tos = (*pskb)->nh.iph->tos; | 149 | tos = (*pskb)->nh.iph->tos; |
150 | 150 | ||
151 | ret = ipt_do_table(pskb, hook, in, out, &packet_mangler, NULL); | 151 | ret = ipt_do_table(pskb, hook, in, out, &packet_mangler); |
152 | /* Reroute for ANY change. */ | 152 | /* Reroute for ANY change. */ |
153 | if (ret != NF_DROP && ret != NF_STOLEN && ret != NF_QUEUE | 153 | if (ret != NF_DROP && ret != NF_STOLEN && ret != NF_QUEUE |
154 | && ((*pskb)->nh.iph->saddr != saddr | 154 | && ((*pskb)->nh.iph->saddr != saddr |
diff --git a/net/ipv4/netfilter/iptable_raw.c b/net/ipv4/netfilter/iptable_raw.c index 7912cce1e1b8..bcbeb4aeacd9 100644 --- a/net/ipv4/netfilter/iptable_raw.c +++ b/net/ipv4/netfilter/iptable_raw.c | |||
@@ -95,7 +95,7 @@ ipt_hook(unsigned int hook, | |||
95 | const struct net_device *out, | 95 | const struct net_device *out, |
96 | int (*okfn)(struct sk_buff *)) | 96 | int (*okfn)(struct sk_buff *)) |
97 | { | 97 | { |
98 | return ipt_do_table(pskb, hook, in, out, &packet_raw, NULL); | 98 | return ipt_do_table(pskb, hook, in, out, &packet_raw); |
99 | } | 99 | } |
100 | 100 | ||
101 | /* 'raw' is the very first table. */ | 101 | /* 'raw' is the very first table. */ |
diff --git a/net/ipv4/netfilter/nf_conntrack_proto_icmp.c b/net/ipv4/netfilter/nf_conntrack_proto_icmp.c index 663a73ee3f2f..790f00d500c3 100644 --- a/net/ipv4/netfilter/nf_conntrack_proto_icmp.c +++ b/net/ipv4/netfilter/nf_conntrack_proto_icmp.c | |||
@@ -25,7 +25,7 @@ | |||
25 | #include <net/netfilter/nf_conntrack_protocol.h> | 25 | #include <net/netfilter/nf_conntrack_protocol.h> |
26 | #include <net/netfilter/nf_conntrack_core.h> | 26 | #include <net/netfilter/nf_conntrack_core.h> |
27 | 27 | ||
28 | unsigned long nf_ct_icmp_timeout = 30*HZ; | 28 | unsigned long nf_ct_icmp_timeout __read_mostly = 30*HZ; |
29 | 29 | ||
30 | #if 0 | 30 | #if 0 |
31 | #define DEBUGP printk | 31 | #define DEBUGP printk |
diff --git a/net/ipv4/proc.c b/net/ipv4/proc.c index d61e2a9d394d..9c6cbe3d9fb8 100644 --- a/net/ipv4/proc.c +++ b/net/ipv4/proc.c | |||
@@ -173,6 +173,8 @@ static const struct snmp_mib snmp4_udp_list[] = { | |||
173 | SNMP_MIB_ITEM("NoPorts", UDP_MIB_NOPORTS), | 173 | SNMP_MIB_ITEM("NoPorts", UDP_MIB_NOPORTS), |
174 | SNMP_MIB_ITEM("InErrors", UDP_MIB_INERRORS), | 174 | SNMP_MIB_ITEM("InErrors", UDP_MIB_INERRORS), |
175 | SNMP_MIB_ITEM("OutDatagrams", UDP_MIB_OUTDATAGRAMS), | 175 | SNMP_MIB_ITEM("OutDatagrams", UDP_MIB_OUTDATAGRAMS), |
176 | SNMP_MIB_ITEM("RcvbufErrors", UDP_MIB_RCVBUFERRORS), | ||
177 | SNMP_MIB_ITEM("SndbufErrors", UDP_MIB_SNDBUFERRORS), | ||
176 | SNMP_MIB_SENTINEL | 178 | SNMP_MIB_SENTINEL |
177 | }; | 179 | }; |
178 | 180 | ||
diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c index 62b2762a2420..0e935b4c8741 100644 --- a/net/ipv4/raw.c +++ b/net/ipv4/raw.c | |||
@@ -38,8 +38,7 @@ | |||
38 | * as published by the Free Software Foundation; either version | 38 | * as published by the Free Software Foundation; either version |
39 | * 2 of the License, or (at your option) any later version. | 39 | * 2 of the License, or (at your option) any later version. |
40 | */ | 40 | */ |
41 | 41 | ||
42 | #include <linux/config.h> | ||
43 | #include <linux/types.h> | 42 | #include <linux/types.h> |
44 | #include <asm/atomic.h> | 43 | #include <asm/atomic.h> |
45 | #include <asm/byteorder.h> | 44 | #include <asm/byteorder.h> |
@@ -484,6 +483,7 @@ static int raw_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, | |||
484 | if (!inet->hdrincl) | 483 | if (!inet->hdrincl) |
485 | raw_probe_proto_opt(&fl, msg); | 484 | raw_probe_proto_opt(&fl, msg); |
486 | 485 | ||
486 | security_sk_classify_flow(sk, &fl); | ||
487 | err = ip_route_output_flow(&rt, &fl, sk, !(msg->msg_flags&MSG_DONTWAIT)); | 487 | err = ip_route_output_flow(&rt, &fl, sk, !(msg->msg_flags&MSG_DONTWAIT)); |
488 | } | 488 | } |
489 | if (err) | 489 | if (err) |
diff --git a/net/ipv4/route.c b/net/ipv4/route.c index b873cbcdd0b8..20ffe8e88c0f 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c | |||
@@ -2639,51 +2639,54 @@ static int rt_fill_info(struct sk_buff *skb, u32 pid, u32 seq, int event, | |||
2639 | { | 2639 | { |
2640 | struct rtable *rt = (struct rtable*)skb->dst; | 2640 | struct rtable *rt = (struct rtable*)skb->dst; |
2641 | struct rtmsg *r; | 2641 | struct rtmsg *r; |
2642 | struct nlmsghdr *nlh; | 2642 | struct nlmsghdr *nlh; |
2643 | unsigned char *b = skb->tail; | ||
2644 | struct rta_cacheinfo ci; | 2643 | struct rta_cacheinfo ci; |
2645 | #ifdef CONFIG_IP_MROUTE | 2644 | |
2646 | struct rtattr *eptr; | 2645 | nlh = nlmsg_put(skb, pid, seq, event, sizeof(*r), flags); |
2647 | #endif | 2646 | if (nlh == NULL) |
2648 | nlh = NLMSG_NEW(skb, pid, seq, event, sizeof(*r), flags); | 2647 | return -ENOBUFS; |
2649 | r = NLMSG_DATA(nlh); | 2648 | |
2649 | r = nlmsg_data(nlh); | ||
2650 | r->rtm_family = AF_INET; | 2650 | r->rtm_family = AF_INET; |
2651 | r->rtm_dst_len = 32; | 2651 | r->rtm_dst_len = 32; |
2652 | r->rtm_src_len = 0; | 2652 | r->rtm_src_len = 0; |
2653 | r->rtm_tos = rt->fl.fl4_tos; | 2653 | r->rtm_tos = rt->fl.fl4_tos; |
2654 | r->rtm_table = RT_TABLE_MAIN; | 2654 | r->rtm_table = RT_TABLE_MAIN; |
2655 | NLA_PUT_U32(skb, RTA_TABLE, RT_TABLE_MAIN); | ||
2655 | r->rtm_type = rt->rt_type; | 2656 | r->rtm_type = rt->rt_type; |
2656 | r->rtm_scope = RT_SCOPE_UNIVERSE; | 2657 | r->rtm_scope = RT_SCOPE_UNIVERSE; |
2657 | r->rtm_protocol = RTPROT_UNSPEC; | 2658 | r->rtm_protocol = RTPROT_UNSPEC; |
2658 | r->rtm_flags = (rt->rt_flags & ~0xFFFF) | RTM_F_CLONED; | 2659 | r->rtm_flags = (rt->rt_flags & ~0xFFFF) | RTM_F_CLONED; |
2659 | if (rt->rt_flags & RTCF_NOTIFY) | 2660 | if (rt->rt_flags & RTCF_NOTIFY) |
2660 | r->rtm_flags |= RTM_F_NOTIFY; | 2661 | r->rtm_flags |= RTM_F_NOTIFY; |
2661 | RTA_PUT(skb, RTA_DST, 4, &rt->rt_dst); | 2662 | |
2663 | NLA_PUT_U32(skb, RTA_DST, rt->rt_dst); | ||
2664 | |||
2662 | if (rt->fl.fl4_src) { | 2665 | if (rt->fl.fl4_src) { |
2663 | r->rtm_src_len = 32; | 2666 | r->rtm_src_len = 32; |
2664 | RTA_PUT(skb, RTA_SRC, 4, &rt->fl.fl4_src); | 2667 | NLA_PUT_U32(skb, RTA_SRC, rt->fl.fl4_src); |
2665 | } | 2668 | } |
2666 | if (rt->u.dst.dev) | 2669 | if (rt->u.dst.dev) |
2667 | RTA_PUT(skb, RTA_OIF, sizeof(int), &rt->u.dst.dev->ifindex); | 2670 | NLA_PUT_U32(skb, RTA_OIF, rt->u.dst.dev->ifindex); |
2668 | #ifdef CONFIG_NET_CLS_ROUTE | 2671 | #ifdef CONFIG_NET_CLS_ROUTE |
2669 | if (rt->u.dst.tclassid) | 2672 | if (rt->u.dst.tclassid) |
2670 | RTA_PUT(skb, RTA_FLOW, 4, &rt->u.dst.tclassid); | 2673 | NLA_PUT_U32(skb, RTA_FLOW, rt->u.dst.tclassid); |
2671 | #endif | 2674 | #endif |
2672 | #ifdef CONFIG_IP_ROUTE_MULTIPATH_CACHED | 2675 | #ifdef CONFIG_IP_ROUTE_MULTIPATH_CACHED |
2673 | if (rt->rt_multipath_alg != IP_MP_ALG_NONE) { | 2676 | if (rt->rt_multipath_alg != IP_MP_ALG_NONE) |
2674 | __u32 alg = rt->rt_multipath_alg; | 2677 | NLA_PUT_U32(skb, RTA_MP_ALGO, rt->rt_multipath_alg); |
2675 | |||
2676 | RTA_PUT(skb, RTA_MP_ALGO, 4, &alg); | ||
2677 | } | ||
2678 | #endif | 2678 | #endif |
2679 | if (rt->fl.iif) | 2679 | if (rt->fl.iif) |
2680 | RTA_PUT(skb, RTA_PREFSRC, 4, &rt->rt_spec_dst); | 2680 | NLA_PUT_U32(skb, RTA_PREFSRC, rt->rt_spec_dst); |
2681 | else if (rt->rt_src != rt->fl.fl4_src) | 2681 | else if (rt->rt_src != rt->fl.fl4_src) |
2682 | RTA_PUT(skb, RTA_PREFSRC, 4, &rt->rt_src); | 2682 | NLA_PUT_U32(skb, RTA_PREFSRC, rt->rt_src); |
2683 | |||
2683 | if (rt->rt_dst != rt->rt_gateway) | 2684 | if (rt->rt_dst != rt->rt_gateway) |
2684 | RTA_PUT(skb, RTA_GATEWAY, 4, &rt->rt_gateway); | 2685 | NLA_PUT_U32(skb, RTA_GATEWAY, rt->rt_gateway); |
2686 | |||
2685 | if (rtnetlink_put_metrics(skb, rt->u.dst.metrics) < 0) | 2687 | if (rtnetlink_put_metrics(skb, rt->u.dst.metrics) < 0) |
2686 | goto rtattr_failure; | 2688 | goto nla_put_failure; |
2689 | |||
2687 | ci.rta_lastuse = jiffies_to_clock_t(jiffies - rt->u.dst.lastuse); | 2690 | ci.rta_lastuse = jiffies_to_clock_t(jiffies - rt->u.dst.lastuse); |
2688 | ci.rta_used = rt->u.dst.__use; | 2691 | ci.rta_used = rt->u.dst.__use; |
2689 | ci.rta_clntref = atomic_read(&rt->u.dst.__refcnt); | 2692 | ci.rta_clntref = atomic_read(&rt->u.dst.__refcnt); |
@@ -2700,10 +2703,7 @@ static int rt_fill_info(struct sk_buff *skb, u32 pid, u32 seq, int event, | |||
2700 | ci.rta_tsage = xtime.tv_sec - rt->peer->tcp_ts_stamp; | 2703 | ci.rta_tsage = xtime.tv_sec - rt->peer->tcp_ts_stamp; |
2701 | } | 2704 | } |
2702 | } | 2705 | } |
2703 | #ifdef CONFIG_IP_MROUTE | 2706 | |
2704 | eptr = (struct rtattr*)skb->tail; | ||
2705 | #endif | ||
2706 | RTA_PUT(skb, RTA_CACHEINFO, sizeof(ci), &ci); | ||
2707 | if (rt->fl.iif) { | 2707 | if (rt->fl.iif) { |
2708 | #ifdef CONFIG_IP_MROUTE | 2708 | #ifdef CONFIG_IP_MROUTE |
2709 | u32 dst = rt->rt_dst; | 2709 | u32 dst = rt->rt_dst; |
@@ -2715,41 +2715,46 @@ static int rt_fill_info(struct sk_buff *skb, u32 pid, u32 seq, int event, | |||
2715 | if (!nowait) { | 2715 | if (!nowait) { |
2716 | if (err == 0) | 2716 | if (err == 0) |
2717 | return 0; | 2717 | return 0; |
2718 | goto nlmsg_failure; | 2718 | goto nla_put_failure; |
2719 | } else { | 2719 | } else { |
2720 | if (err == -EMSGSIZE) | 2720 | if (err == -EMSGSIZE) |
2721 | goto nlmsg_failure; | 2721 | goto nla_put_failure; |
2722 | ((struct rta_cacheinfo*)RTA_DATA(eptr))->rta_error = err; | 2722 | ci.rta_error = err; |
2723 | } | 2723 | } |
2724 | } | 2724 | } |
2725 | } else | 2725 | } else |
2726 | #endif | 2726 | #endif |
2727 | RTA_PUT(skb, RTA_IIF, sizeof(int), &rt->fl.iif); | 2727 | NLA_PUT_U32(skb, RTA_IIF, rt->fl.iif); |
2728 | } | 2728 | } |
2729 | 2729 | ||
2730 | nlh->nlmsg_len = skb->tail - b; | 2730 | NLA_PUT(skb, RTA_CACHEINFO, sizeof(ci), &ci); |
2731 | return skb->len; | 2731 | |
2732 | return nlmsg_end(skb, nlh); | ||
2732 | 2733 | ||
2733 | nlmsg_failure: | 2734 | nla_put_failure: |
2734 | rtattr_failure: | 2735 | return nlmsg_cancel(skb, nlh); |
2735 | skb_trim(skb, b - skb->data); | ||
2736 | return -1; | ||
2737 | } | 2736 | } |
2738 | 2737 | ||
2739 | int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void *arg) | 2738 | int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void *arg) |
2740 | { | 2739 | { |
2741 | struct rtattr **rta = arg; | 2740 | struct rtmsg *rtm; |
2742 | struct rtmsg *rtm = NLMSG_DATA(nlh); | 2741 | struct nlattr *tb[RTA_MAX+1]; |
2743 | struct rtable *rt = NULL; | 2742 | struct rtable *rt = NULL; |
2744 | u32 dst = 0; | 2743 | u32 dst, src, iif; |
2745 | u32 src = 0; | 2744 | int err; |
2746 | int iif = 0; | ||
2747 | int err = -ENOBUFS; | ||
2748 | struct sk_buff *skb; | 2745 | struct sk_buff *skb; |
2749 | 2746 | ||
2747 | err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv4_policy); | ||
2748 | if (err < 0) | ||
2749 | goto errout; | ||
2750 | |||
2751 | rtm = nlmsg_data(nlh); | ||
2752 | |||
2750 | skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL); | 2753 | skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL); |
2751 | if (!skb) | 2754 | if (skb == NULL) { |
2752 | goto out; | 2755 | err = -ENOBUFS; |
2756 | goto errout; | ||
2757 | } | ||
2753 | 2758 | ||
2754 | /* Reserve room for dummy headers, this skb can pass | 2759 | /* Reserve room for dummy headers, this skb can pass |
2755 | through good chunk of routing engine. | 2760 | through good chunk of routing engine. |
@@ -2760,62 +2765,61 @@ int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void *arg) | |||
2760 | skb->nh.iph->protocol = IPPROTO_ICMP; | 2765 | skb->nh.iph->protocol = IPPROTO_ICMP; |
2761 | skb_reserve(skb, MAX_HEADER + sizeof(struct iphdr)); | 2766 | skb_reserve(skb, MAX_HEADER + sizeof(struct iphdr)); |
2762 | 2767 | ||
2763 | if (rta[RTA_SRC - 1]) | 2768 | src = tb[RTA_SRC] ? nla_get_u32(tb[RTA_SRC]) : 0; |
2764 | memcpy(&src, RTA_DATA(rta[RTA_SRC - 1]), 4); | 2769 | dst = tb[RTA_DST] ? nla_get_u32(tb[RTA_DST]) : 0; |
2765 | if (rta[RTA_DST - 1]) | 2770 | iif = tb[RTA_IIF] ? nla_get_u32(tb[RTA_IIF]) : 0; |
2766 | memcpy(&dst, RTA_DATA(rta[RTA_DST - 1]), 4); | ||
2767 | if (rta[RTA_IIF - 1]) | ||
2768 | memcpy(&iif, RTA_DATA(rta[RTA_IIF - 1]), sizeof(int)); | ||
2769 | 2771 | ||
2770 | if (iif) { | 2772 | if (iif) { |
2771 | struct net_device *dev = __dev_get_by_index(iif); | 2773 | struct net_device *dev; |
2772 | err = -ENODEV; | 2774 | |
2773 | if (!dev) | 2775 | dev = __dev_get_by_index(iif); |
2774 | goto out_free; | 2776 | if (dev == NULL) { |
2777 | err = -ENODEV; | ||
2778 | goto errout_free; | ||
2779 | } | ||
2780 | |||
2775 | skb->protocol = htons(ETH_P_IP); | 2781 | skb->protocol = htons(ETH_P_IP); |
2776 | skb->dev = dev; | 2782 | skb->dev = dev; |
2777 | local_bh_disable(); | 2783 | local_bh_disable(); |
2778 | err = ip_route_input(skb, dst, src, rtm->rtm_tos, dev); | 2784 | err = ip_route_input(skb, dst, src, rtm->rtm_tos, dev); |
2779 | local_bh_enable(); | 2785 | local_bh_enable(); |
2780 | rt = (struct rtable*)skb->dst; | 2786 | |
2781 | if (!err && rt->u.dst.error) | 2787 | rt = (struct rtable*) skb->dst; |
2788 | if (err == 0 && rt->u.dst.error) | ||
2782 | err = -rt->u.dst.error; | 2789 | err = -rt->u.dst.error; |
2783 | } else { | 2790 | } else { |
2784 | struct flowi fl = { .nl_u = { .ip4_u = { .daddr = dst, | 2791 | struct flowi fl = { |
2785 | .saddr = src, | 2792 | .nl_u = { |
2786 | .tos = rtm->rtm_tos } } }; | 2793 | .ip4_u = { |
2787 | int oif = 0; | 2794 | .daddr = dst, |
2788 | if (rta[RTA_OIF - 1]) | 2795 | .saddr = src, |
2789 | memcpy(&oif, RTA_DATA(rta[RTA_OIF - 1]), sizeof(int)); | 2796 | .tos = rtm->rtm_tos, |
2790 | fl.oif = oif; | 2797 | }, |
2798 | }, | ||
2799 | .oif = tb[RTA_OIF] ? nla_get_u32(tb[RTA_OIF]) : 0, | ||
2800 | }; | ||
2791 | err = ip_route_output_key(&rt, &fl); | 2801 | err = ip_route_output_key(&rt, &fl); |
2792 | } | 2802 | } |
2803 | |||
2793 | if (err) | 2804 | if (err) |
2794 | goto out_free; | 2805 | goto errout_free; |
2795 | 2806 | ||
2796 | skb->dst = &rt->u.dst; | 2807 | skb->dst = &rt->u.dst; |
2797 | if (rtm->rtm_flags & RTM_F_NOTIFY) | 2808 | if (rtm->rtm_flags & RTM_F_NOTIFY) |
2798 | rt->rt_flags |= RTCF_NOTIFY; | 2809 | rt->rt_flags |= RTCF_NOTIFY; |
2799 | 2810 | ||
2800 | NETLINK_CB(skb).dst_pid = NETLINK_CB(in_skb).pid; | ||
2801 | |||
2802 | err = rt_fill_info(skb, NETLINK_CB(in_skb).pid, nlh->nlmsg_seq, | 2811 | err = rt_fill_info(skb, NETLINK_CB(in_skb).pid, nlh->nlmsg_seq, |
2803 | RTM_NEWROUTE, 0, 0); | 2812 | RTM_NEWROUTE, 0, 0); |
2804 | if (!err) | 2813 | if (err <= 0) |
2805 | goto out_free; | 2814 | goto errout_free; |
2806 | if (err < 0) { | ||
2807 | err = -EMSGSIZE; | ||
2808 | goto out_free; | ||
2809 | } | ||
2810 | 2815 | ||
2811 | err = netlink_unicast(rtnl, skb, NETLINK_CB(in_skb).pid, MSG_DONTWAIT); | 2816 | err = rtnl_unicast(skb, NETLINK_CB(in_skb).pid); |
2812 | if (err > 0) | 2817 | errout: |
2813 | err = 0; | 2818 | return err; |
2814 | out: return err; | ||
2815 | 2819 | ||
2816 | out_free: | 2820 | errout_free: |
2817 | kfree_skb(skb); | 2821 | kfree_skb(skb); |
2818 | goto out; | 2822 | goto errout; |
2819 | } | 2823 | } |
2820 | 2824 | ||
2821 | int ip_rt_dump(struct sk_buff *skb, struct netlink_callback *cb) | 2825 | int ip_rt_dump(struct sk_buff *skb, struct netlink_callback *cb) |
@@ -3143,13 +3147,9 @@ int __init ip_rt_init(void) | |||
3143 | } | 3147 | } |
3144 | #endif | 3148 | #endif |
3145 | 3149 | ||
3146 | ipv4_dst_ops.kmem_cachep = kmem_cache_create("ip_dst_cache", | 3150 | ipv4_dst_ops.kmem_cachep = |
3147 | sizeof(struct rtable), | 3151 | kmem_cache_create("ip_dst_cache", sizeof(struct rtable), 0, |
3148 | 0, SLAB_HWCACHE_ALIGN, | 3152 | SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL, NULL); |
3149 | NULL, NULL); | ||
3150 | |||
3151 | if (!ipv4_dst_ops.kmem_cachep) | ||
3152 | panic("IP: failed to allocate ip_dst_cache\n"); | ||
3153 | 3153 | ||
3154 | rt_hash_table = (struct rt_hash_bucket *) | 3154 | rt_hash_table = (struct rt_hash_bucket *) |
3155 | alloc_large_system_hash("IP route cache", | 3155 | alloc_large_system_hash("IP route cache", |
diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c index e20be3331f67..661e0a4bca72 100644 --- a/net/ipv4/syncookies.c +++ b/net/ipv4/syncookies.c | |||
@@ -214,6 +214,10 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb, | |||
214 | if (!req) | 214 | if (!req) |
215 | goto out; | 215 | goto out; |
216 | 216 | ||
217 | if (security_inet_conn_request(sk, skb, req)) { | ||
218 | reqsk_free(req); | ||
219 | goto out; | ||
220 | } | ||
217 | ireq = inet_rsk(req); | 221 | ireq = inet_rsk(req); |
218 | treq = tcp_rsk(req); | 222 | treq = tcp_rsk(req); |
219 | treq->rcv_isn = htonl(skb->h.th->seq) - 1; | 223 | treq->rcv_isn = htonl(skb->h.th->seq) - 1; |
@@ -259,6 +263,7 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb, | |||
259 | .uli_u = { .ports = | 263 | .uli_u = { .ports = |
260 | { .sport = skb->h.th->dest, | 264 | { .sport = skb->h.th->dest, |
261 | .dport = skb->h.th->source } } }; | 265 | .dport = skb->h.th->source } } }; |
266 | security_req_classify_flow(req, &fl); | ||
262 | if (ip_route_output_key(&rt, &fl)) { | 267 | if (ip_route_output_key(&rt, &fl)) { |
263 | reqsk_free(req); | 268 | reqsk_free(req); |
264 | goto out; | 269 | goto out; |
diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c index 70cea9d08a38..19b2071ff319 100644 --- a/net/ipv4/sysctl_net_ipv4.c +++ b/net/ipv4/sysctl_net_ipv4.c | |||
@@ -17,6 +17,7 @@ | |||
17 | #include <net/ip.h> | 17 | #include <net/ip.h> |
18 | #include <net/route.h> | 18 | #include <net/route.h> |
19 | #include <net/tcp.h> | 19 | #include <net/tcp.h> |
20 | #include <net/cipso_ipv4.h> | ||
20 | 21 | ||
21 | /* From af_inet.c */ | 22 | /* From af_inet.c */ |
22 | extern int sysctl_ip_nonlocal_bind; | 23 | extern int sysctl_ip_nonlocal_bind; |
@@ -697,6 +698,40 @@ ctl_table ipv4_table[] = { | |||
697 | .mode = 0644, | 698 | .mode = 0644, |
698 | .proc_handler = &proc_dointvec | 699 | .proc_handler = &proc_dointvec |
699 | }, | 700 | }, |
701 | #ifdef CONFIG_NETLABEL | ||
702 | { | ||
703 | .ctl_name = NET_CIPSOV4_CACHE_ENABLE, | ||
704 | .procname = "cipso_cache_enable", | ||
705 | .data = &cipso_v4_cache_enabled, | ||
706 | .maxlen = sizeof(int), | ||
707 | .mode = 0644, | ||
708 | .proc_handler = &proc_dointvec, | ||
709 | }, | ||
710 | { | ||
711 | .ctl_name = NET_CIPSOV4_CACHE_BUCKET_SIZE, | ||
712 | .procname = "cipso_cache_bucket_size", | ||
713 | .data = &cipso_v4_cache_bucketsize, | ||
714 | .maxlen = sizeof(int), | ||
715 | .mode = 0644, | ||
716 | .proc_handler = &proc_dointvec, | ||
717 | }, | ||
718 | { | ||
719 | .ctl_name = NET_CIPSOV4_RBM_OPTFMT, | ||
720 | .procname = "cipso_rbm_optfmt", | ||
721 | .data = &cipso_v4_rbm_optfmt, | ||
722 | .maxlen = sizeof(int), | ||
723 | .mode = 0644, | ||
724 | .proc_handler = &proc_dointvec, | ||
725 | }, | ||
726 | { | ||
727 | .ctl_name = NET_CIPSOV4_RBM_STRICTVALID, | ||
728 | .procname = "cipso_rbm_strictvalid", | ||
729 | .data = &cipso_v4_rbm_strictvalid, | ||
730 | .maxlen = sizeof(int), | ||
731 | .mode = 0644, | ||
732 | .proc_handler = &proc_dointvec, | ||
733 | }, | ||
734 | #endif /* CONFIG_NETLABEL */ | ||
700 | { .ctl_name = 0 } | 735 | { .ctl_name = 0 } |
701 | }; | 736 | }; |
702 | 737 | ||
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 934396bb1376..66e9a729f6df 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c | |||
@@ -268,7 +268,7 @@ | |||
268 | #include <asm/uaccess.h> | 268 | #include <asm/uaccess.h> |
269 | #include <asm/ioctls.h> | 269 | #include <asm/ioctls.h> |
270 | 270 | ||
271 | int sysctl_tcp_fin_timeout = TCP_FIN_TIMEOUT; | 271 | int sysctl_tcp_fin_timeout __read_mostly = TCP_FIN_TIMEOUT; |
272 | 272 | ||
273 | DEFINE_SNMP_STAT(struct tcp_mib, tcp_statistics) __read_mostly; | 273 | DEFINE_SNMP_STAT(struct tcp_mib, tcp_statistics) __read_mostly; |
274 | 274 | ||
@@ -568,7 +568,7 @@ new_segment: | |||
568 | skb->truesize += copy; | 568 | skb->truesize += copy; |
569 | sk->sk_wmem_queued += copy; | 569 | sk->sk_wmem_queued += copy; |
570 | sk->sk_forward_alloc -= copy; | 570 | sk->sk_forward_alloc -= copy; |
571 | skb->ip_summed = CHECKSUM_HW; | 571 | skb->ip_summed = CHECKSUM_PARTIAL; |
572 | tp->write_seq += copy; | 572 | tp->write_seq += copy; |
573 | TCP_SKB_CB(skb)->end_seq += copy; | 573 | TCP_SKB_CB(skb)->end_seq += copy; |
574 | skb_shinfo(skb)->gso_segs = 0; | 574 | skb_shinfo(skb)->gso_segs = 0; |
@@ -723,7 +723,7 @@ new_segment: | |||
723 | * Check whether we can use HW checksum. | 723 | * Check whether we can use HW checksum. |
724 | */ | 724 | */ |
725 | if (sk->sk_route_caps & NETIF_F_ALL_CSUM) | 725 | if (sk->sk_route_caps & NETIF_F_ALL_CSUM) |
726 | skb->ip_summed = CHECKSUM_HW; | 726 | skb->ip_summed = CHECKSUM_PARTIAL; |
727 | 727 | ||
728 | skb_entail(sk, tp, skb); | 728 | skb_entail(sk, tp, skb); |
729 | copy = size_goal; | 729 | copy = size_goal; |
@@ -955,8 +955,11 @@ void tcp_cleanup_rbuf(struct sock *sk, int copied) | |||
955 | * receive buffer and there was a small segment | 955 | * receive buffer and there was a small segment |
956 | * in queue. | 956 | * in queue. |
957 | */ | 957 | */ |
958 | (copied > 0 && (icsk->icsk_ack.pending & ICSK_ACK_PUSHED) && | 958 | (copied > 0 && |
959 | !icsk->icsk_ack.pingpong && !atomic_read(&sk->sk_rmem_alloc))) | 959 | ((icsk->icsk_ack.pending & ICSK_ACK_PUSHED2) || |
960 | ((icsk->icsk_ack.pending & ICSK_ACK_PUSHED) && | ||
961 | !icsk->icsk_ack.pingpong)) && | ||
962 | !atomic_read(&sk->sk_rmem_alloc))) | ||
960 | time_to_ack = 1; | 963 | time_to_ack = 1; |
961 | } | 964 | } |
962 | 965 | ||
@@ -2205,7 +2208,7 @@ struct sk_buff *tcp_tso_segment(struct sk_buff *skb, int features) | |||
2205 | th->fin = th->psh = 0; | 2208 | th->fin = th->psh = 0; |
2206 | 2209 | ||
2207 | th->check = ~csum_fold(th->check + delta); | 2210 | th->check = ~csum_fold(th->check + delta); |
2208 | if (skb->ip_summed != CHECKSUM_HW) | 2211 | if (skb->ip_summed != CHECKSUM_PARTIAL) |
2209 | th->check = csum_fold(csum_partial(skb->h.raw, thlen, | 2212 | th->check = csum_fold(csum_partial(skb->h.raw, thlen, |
2210 | skb->csum)); | 2213 | skb->csum)); |
2211 | 2214 | ||
@@ -2219,7 +2222,7 @@ struct sk_buff *tcp_tso_segment(struct sk_buff *skb, int features) | |||
2219 | 2222 | ||
2220 | delta = htonl(oldlen + (skb->tail - skb->h.raw) + skb->data_len); | 2223 | delta = htonl(oldlen + (skb->tail - skb->h.raw) + skb->data_len); |
2221 | th->check = ~csum_fold(th->check + delta); | 2224 | th->check = ~csum_fold(th->check + delta); |
2222 | if (skb->ip_summed != CHECKSUM_HW) | 2225 | if (skb->ip_summed != CHECKSUM_PARTIAL) |
2223 | th->check = csum_fold(csum_partial(skb->h.raw, thlen, | 2226 | th->check = csum_fold(csum_partial(skb->h.raw, thlen, |
2224 | skb->csum)); | 2227 | skb->csum)); |
2225 | 2228 | ||
@@ -2254,9 +2257,7 @@ void __init tcp_init(void) | |||
2254 | tcp_hashinfo.bind_bucket_cachep = | 2257 | tcp_hashinfo.bind_bucket_cachep = |
2255 | kmem_cache_create("tcp_bind_bucket", | 2258 | kmem_cache_create("tcp_bind_bucket", |
2256 | sizeof(struct inet_bind_bucket), 0, | 2259 | sizeof(struct inet_bind_bucket), 0, |
2257 | SLAB_HWCACHE_ALIGN, NULL, NULL); | 2260 | SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL, NULL); |
2258 | if (!tcp_hashinfo.bind_bucket_cachep) | ||
2259 | panic("tcp_init: Cannot alloc tcp_bind_bucket cache."); | ||
2260 | 2261 | ||
2261 | /* Size and allocate the main established and bind bucket | 2262 | /* Size and allocate the main established and bind bucket |
2262 | * hash tables. | 2263 | * hash tables. |
diff --git a/net/ipv4/tcp_bic.c b/net/ipv4/tcp_bic.c index b0134ab08379..5730333cd0ac 100644 --- a/net/ipv4/tcp_bic.c +++ b/net/ipv4/tcp_bic.c | |||
@@ -231,7 +231,7 @@ static struct tcp_congestion_ops bictcp = { | |||
231 | 231 | ||
232 | static int __init bictcp_register(void) | 232 | static int __init bictcp_register(void) |
233 | { | 233 | { |
234 | BUG_ON(sizeof(struct bictcp) > ICSK_CA_PRIV_SIZE); | 234 | BUILD_BUG_ON(sizeof(struct bictcp) > ICSK_CA_PRIV_SIZE); |
235 | return tcp_register_congestion_control(&bictcp); | 235 | return tcp_register_congestion_control(&bictcp); |
236 | } | 236 | } |
237 | 237 | ||
diff --git a/net/ipv4/tcp_cubic.c b/net/ipv4/tcp_cubic.c index 2be27980ca78..a60ef38d75c6 100644 --- a/net/ipv4/tcp_cubic.c +++ b/net/ipv4/tcp_cubic.c | |||
@@ -358,7 +358,7 @@ static struct tcp_congestion_ops cubictcp = { | |||
358 | 358 | ||
359 | static int __init cubictcp_register(void) | 359 | static int __init cubictcp_register(void) |
360 | { | 360 | { |
361 | BUG_ON(sizeof(struct bictcp) > ICSK_CA_PRIV_SIZE); | 361 | BUILD_BUG_ON(sizeof(struct bictcp) > ICSK_CA_PRIV_SIZE); |
362 | 362 | ||
363 | /* Precompute a bunch of the scaling factors that are used per-packet | 363 | /* Precompute a bunch of the scaling factors that are used per-packet |
364 | * based on SRTT of 100ms | 364 | * based on SRTT of 100ms |
diff --git a/net/ipv4/tcp_highspeed.c b/net/ipv4/tcp_highspeed.c index fa3e1aad660c..c4fc811bf377 100644 --- a/net/ipv4/tcp_highspeed.c +++ b/net/ipv4/tcp_highspeed.c | |||
@@ -189,7 +189,7 @@ static struct tcp_congestion_ops tcp_highspeed = { | |||
189 | 189 | ||
190 | static int __init hstcp_register(void) | 190 | static int __init hstcp_register(void) |
191 | { | 191 | { |
192 | BUG_ON(sizeof(struct hstcp) > ICSK_CA_PRIV_SIZE); | 192 | BUILD_BUG_ON(sizeof(struct hstcp) > ICSK_CA_PRIV_SIZE); |
193 | return tcp_register_congestion_control(&tcp_highspeed); | 193 | return tcp_register_congestion_control(&tcp_highspeed); |
194 | } | 194 | } |
195 | 195 | ||
diff --git a/net/ipv4/tcp_htcp.c b/net/ipv4/tcp_htcp.c index 6edfe5e4510e..682e7d5b6f2f 100644 --- a/net/ipv4/tcp_htcp.c +++ b/net/ipv4/tcp_htcp.c | |||
@@ -286,7 +286,7 @@ static struct tcp_congestion_ops htcp = { | |||
286 | 286 | ||
287 | static int __init htcp_register(void) | 287 | static int __init htcp_register(void) |
288 | { | 288 | { |
289 | BUG_ON(sizeof(struct htcp) > ICSK_CA_PRIV_SIZE); | 289 | BUILD_BUG_ON(sizeof(struct htcp) > ICSK_CA_PRIV_SIZE); |
290 | BUILD_BUG_ON(BETA_MIN >= BETA_MAX); | 290 | BUILD_BUG_ON(BETA_MIN >= BETA_MAX); |
291 | return tcp_register_congestion_control(&htcp); | 291 | return tcp_register_congestion_control(&htcp); |
292 | } | 292 | } |
diff --git a/net/ipv4/tcp_hybla.c b/net/ipv4/tcp_hybla.c index 7406e0c5fb8e..59e691d26f64 100644 --- a/net/ipv4/tcp_hybla.c +++ b/net/ipv4/tcp_hybla.c | |||
@@ -170,7 +170,7 @@ static struct tcp_congestion_ops tcp_hybla = { | |||
170 | 170 | ||
171 | static int __init hybla_register(void) | 171 | static int __init hybla_register(void) |
172 | { | 172 | { |
173 | BUG_ON(sizeof(struct hybla) > ICSK_CA_PRIV_SIZE); | 173 | BUILD_BUG_ON(sizeof(struct hybla) > ICSK_CA_PRIV_SIZE); |
174 | return tcp_register_congestion_control(&tcp_hybla); | 174 | return tcp_register_congestion_control(&tcp_hybla); |
175 | } | 175 | } |
176 | 176 | ||
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 159fa3f1ba67..b3def0df14fb 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c | |||
@@ -72,24 +72,24 @@ | |||
72 | #include <asm/unaligned.h> | 72 | #include <asm/unaligned.h> |
73 | #include <net/netdma.h> | 73 | #include <net/netdma.h> |
74 | 74 | ||
75 | int sysctl_tcp_timestamps = 1; | 75 | int sysctl_tcp_timestamps __read_mostly = 1; |
76 | int sysctl_tcp_window_scaling = 1; | 76 | int sysctl_tcp_window_scaling __read_mostly = 1; |
77 | int sysctl_tcp_sack = 1; | 77 | int sysctl_tcp_sack __read_mostly = 1; |
78 | int sysctl_tcp_fack = 1; | 78 | int sysctl_tcp_fack __read_mostly = 1; |
79 | int sysctl_tcp_reordering = TCP_FASTRETRANS_THRESH; | 79 | int sysctl_tcp_reordering __read_mostly = TCP_FASTRETRANS_THRESH; |
80 | int sysctl_tcp_ecn; | 80 | int sysctl_tcp_ecn __read_mostly; |
81 | int sysctl_tcp_dsack = 1; | 81 | int sysctl_tcp_dsack __read_mostly = 1; |
82 | int sysctl_tcp_app_win = 31; | 82 | int sysctl_tcp_app_win __read_mostly = 31; |
83 | int sysctl_tcp_adv_win_scale = 2; | 83 | int sysctl_tcp_adv_win_scale __read_mostly = 2; |
84 | 84 | ||
85 | int sysctl_tcp_stdurg; | 85 | int sysctl_tcp_stdurg __read_mostly; |
86 | int sysctl_tcp_rfc1337; | 86 | int sysctl_tcp_rfc1337 __read_mostly; |
87 | int sysctl_tcp_max_orphans = NR_FILE; | 87 | int sysctl_tcp_max_orphans __read_mostly = NR_FILE; |
88 | int sysctl_tcp_frto; | 88 | int sysctl_tcp_frto __read_mostly; |
89 | int sysctl_tcp_nometrics_save; | 89 | int sysctl_tcp_nometrics_save __read_mostly; |
90 | 90 | ||
91 | int sysctl_tcp_moderate_rcvbuf = 1; | 91 | int sysctl_tcp_moderate_rcvbuf __read_mostly = 1; |
92 | int sysctl_tcp_abc; | 92 | int sysctl_tcp_abc __read_mostly; |
93 | 93 | ||
94 | #define FLAG_DATA 0x01 /* Incoming frame contained data. */ | 94 | #define FLAG_DATA 0x01 /* Incoming frame contained data. */ |
95 | #define FLAG_WIN_UPDATE 0x02 /* Incoming ACK was a window update. */ | 95 | #define FLAG_WIN_UPDATE 0x02 /* Incoming ACK was a window update. */ |
@@ -127,7 +127,7 @@ static void tcp_measure_rcv_mss(struct sock *sk, | |||
127 | /* skb->len may jitter because of SACKs, even if peer | 127 | /* skb->len may jitter because of SACKs, even if peer |
128 | * sends good full-sized frames. | 128 | * sends good full-sized frames. |
129 | */ | 129 | */ |
130 | len = skb->len; | 130 | len = skb_shinfo(skb)->gso_size ?: skb->len; |
131 | if (len >= icsk->icsk_ack.rcv_mss) { | 131 | if (len >= icsk->icsk_ack.rcv_mss) { |
132 | icsk->icsk_ack.rcv_mss = len; | 132 | icsk->icsk_ack.rcv_mss = len; |
133 | } else { | 133 | } else { |
@@ -156,6 +156,8 @@ static void tcp_measure_rcv_mss(struct sock *sk, | |||
156 | return; | 156 | return; |
157 | } | 157 | } |
158 | } | 158 | } |
159 | if (icsk->icsk_ack.pending & ICSK_ACK_PUSHED) | ||
160 | icsk->icsk_ack.pending |= ICSK_ACK_PUSHED2; | ||
159 | icsk->icsk_ack.pending |= ICSK_ACK_PUSHED; | 161 | icsk->icsk_ack.pending |= ICSK_ACK_PUSHED; |
160 | } | 162 | } |
161 | } | 163 | } |
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 4b04c3edd4a9..39b179856082 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c | |||
@@ -78,8 +78,8 @@ | |||
78 | #include <linux/proc_fs.h> | 78 | #include <linux/proc_fs.h> |
79 | #include <linux/seq_file.h> | 79 | #include <linux/seq_file.h> |
80 | 80 | ||
81 | int sysctl_tcp_tw_reuse; | 81 | int sysctl_tcp_tw_reuse __read_mostly; |
82 | int sysctl_tcp_low_latency; | 82 | int sysctl_tcp_low_latency __read_mostly; |
83 | 83 | ||
84 | /* Check TCP sequence numbers in ICMP packets. */ | 84 | /* Check TCP sequence numbers in ICMP packets. */ |
85 | #define ICMP_MIN_LENGTH 8 | 85 | #define ICMP_MIN_LENGTH 8 |
@@ -484,7 +484,7 @@ void tcp_v4_send_check(struct sock *sk, int len, struct sk_buff *skb) | |||
484 | struct inet_sock *inet = inet_sk(sk); | 484 | struct inet_sock *inet = inet_sk(sk); |
485 | struct tcphdr *th = skb->h.th; | 485 | struct tcphdr *th = skb->h.th; |
486 | 486 | ||
487 | if (skb->ip_summed == CHECKSUM_HW) { | 487 | if (skb->ip_summed == CHECKSUM_PARTIAL) { |
488 | th->check = ~tcp_v4_check(th, len, inet->saddr, inet->daddr, 0); | 488 | th->check = ~tcp_v4_check(th, len, inet->saddr, inet->daddr, 0); |
489 | skb->csum = offsetof(struct tcphdr, check); | 489 | skb->csum = offsetof(struct tcphdr, check); |
490 | } else { | 490 | } else { |
@@ -509,7 +509,7 @@ int tcp_v4_gso_send_check(struct sk_buff *skb) | |||
509 | th->check = 0; | 509 | th->check = 0; |
510 | th->check = ~tcp_v4_check(th, skb->len, iph->saddr, iph->daddr, 0); | 510 | th->check = ~tcp_v4_check(th, skb->len, iph->saddr, iph->daddr, 0); |
511 | skb->csum = offsetof(struct tcphdr, check); | 511 | skb->csum = offsetof(struct tcphdr, check); |
512 | skb->ip_summed = CHECKSUM_HW; | 512 | skb->ip_summed = CHECKSUM_PARTIAL; |
513 | return 0; | 513 | return 0; |
514 | } | 514 | } |
515 | 515 | ||
@@ -798,6 +798,9 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb) | |||
798 | 798 | ||
799 | tcp_openreq_init(req, &tmp_opt, skb); | 799 | tcp_openreq_init(req, &tmp_opt, skb); |
800 | 800 | ||
801 | if (security_inet_conn_request(sk, skb, req)) | ||
802 | goto drop_and_free; | ||
803 | |||
801 | ireq = inet_rsk(req); | 804 | ireq = inet_rsk(req); |
802 | ireq->loc_addr = daddr; | 805 | ireq->loc_addr = daddr; |
803 | ireq->rmt_addr = saddr; | 806 | ireq->rmt_addr = saddr; |
@@ -948,9 +951,9 @@ static struct sock *tcp_v4_hnd_req(struct sock *sk, struct sk_buff *skb) | |||
948 | if (req) | 951 | if (req) |
949 | return tcp_check_req(sk, skb, req, prev); | 952 | return tcp_check_req(sk, skb, req, prev); |
950 | 953 | ||
951 | nsk = __inet_lookup_established(&tcp_hashinfo, skb->nh.iph->saddr, | 954 | nsk = inet_lookup_established(&tcp_hashinfo, skb->nh.iph->saddr, |
952 | th->source, skb->nh.iph->daddr, | 955 | th->source, skb->nh.iph->daddr, |
953 | ntohs(th->dest), inet_iif(skb)); | 956 | th->dest, inet_iif(skb)); |
954 | 957 | ||
955 | if (nsk) { | 958 | if (nsk) { |
956 | if (nsk->sk_state != TCP_TIME_WAIT) { | 959 | if (nsk->sk_state != TCP_TIME_WAIT) { |
@@ -970,7 +973,7 @@ static struct sock *tcp_v4_hnd_req(struct sock *sk, struct sk_buff *skb) | |||
970 | 973 | ||
971 | static int tcp_v4_checksum_init(struct sk_buff *skb) | 974 | static int tcp_v4_checksum_init(struct sk_buff *skb) |
972 | { | 975 | { |
973 | if (skb->ip_summed == CHECKSUM_HW) { | 976 | if (skb->ip_summed == CHECKSUM_COMPLETE) { |
974 | if (!tcp_v4_check(skb->h.th, skb->len, skb->nh.iph->saddr, | 977 | if (!tcp_v4_check(skb->h.th, skb->len, skb->nh.iph->saddr, |
975 | skb->nh.iph->daddr, skb->csum)) { | 978 | skb->nh.iph->daddr, skb->csum)) { |
976 | skb->ip_summed = CHECKSUM_UNNECESSARY; | 979 | skb->ip_summed = CHECKSUM_UNNECESSARY; |
@@ -1087,7 +1090,7 @@ int tcp_v4_rcv(struct sk_buff *skb) | |||
1087 | TCP_SKB_CB(skb)->sacked = 0; | 1090 | TCP_SKB_CB(skb)->sacked = 0; |
1088 | 1091 | ||
1089 | sk = __inet_lookup(&tcp_hashinfo, skb->nh.iph->saddr, th->source, | 1092 | sk = __inet_lookup(&tcp_hashinfo, skb->nh.iph->saddr, th->source, |
1090 | skb->nh.iph->daddr, ntohs(th->dest), | 1093 | skb->nh.iph->daddr, th->dest, |
1091 | inet_iif(skb)); | 1094 | inet_iif(skb)); |
1092 | 1095 | ||
1093 | if (!sk) | 1096 | if (!sk) |
@@ -1101,7 +1104,7 @@ process: | |||
1101 | goto discard_and_relse; | 1104 | goto discard_and_relse; |
1102 | nf_reset(skb); | 1105 | nf_reset(skb); |
1103 | 1106 | ||
1104 | if (sk_filter(sk, skb, 0)) | 1107 | if (sk_filter(sk, skb)) |
1105 | goto discard_and_relse; | 1108 | goto discard_and_relse; |
1106 | 1109 | ||
1107 | skb->dev = NULL; | 1110 | skb->dev = NULL; |
@@ -1165,7 +1168,7 @@ do_time_wait: | |||
1165 | case TCP_TW_SYN: { | 1168 | case TCP_TW_SYN: { |
1166 | struct sock *sk2 = inet_lookup_listener(&tcp_hashinfo, | 1169 | struct sock *sk2 = inet_lookup_listener(&tcp_hashinfo, |
1167 | skb->nh.iph->daddr, | 1170 | skb->nh.iph->daddr, |
1168 | ntohs(th->dest), | 1171 | th->dest, |
1169 | inet_iif(skb)); | 1172 | inet_iif(skb)); |
1170 | if (sk2) { | 1173 | if (sk2) { |
1171 | inet_twsk_deschedule((struct inet_timewait_sock *)sk, | 1174 | inet_twsk_deschedule((struct inet_timewait_sock *)sk, |
diff --git a/net/ipv4/tcp_lp.c b/net/ipv4/tcp_lp.c index 48f28d617ce6..308fb7e071c5 100644 --- a/net/ipv4/tcp_lp.c +++ b/net/ipv4/tcp_lp.c | |||
@@ -35,7 +35,6 @@ | |||
35 | * Version: $Id: tcp_lp.c,v 1.24 2006/09/05 20:22:53 hswong3i Exp $ | 35 | * Version: $Id: tcp_lp.c,v 1.24 2006/09/05 20:22:53 hswong3i Exp $ |
36 | */ | 36 | */ |
37 | 37 | ||
38 | #include <linux/config.h> | ||
39 | #include <linux/module.h> | 38 | #include <linux/module.h> |
40 | #include <net/tcp.h> | 39 | #include <net/tcp.h> |
41 | 40 | ||
@@ -328,7 +327,7 @@ static struct tcp_congestion_ops tcp_lp = { | |||
328 | 327 | ||
329 | static int __init tcp_lp_register(void) | 328 | static int __init tcp_lp_register(void) |
330 | { | 329 | { |
331 | BUG_ON(sizeof(struct lp) > ICSK_CA_PRIV_SIZE); | 330 | BUILD_BUG_ON(sizeof(struct lp) > ICSK_CA_PRIV_SIZE); |
332 | return tcp_register_congestion_control(&tcp_lp); | 331 | return tcp_register_congestion_control(&tcp_lp); |
333 | } | 332 | } |
334 | 333 | ||
diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c index 624e2b2c7f53..0163d9826907 100644 --- a/net/ipv4/tcp_minisocks.c +++ b/net/ipv4/tcp_minisocks.c | |||
@@ -34,8 +34,8 @@ | |||
34 | #define SYNC_INIT 1 | 34 | #define SYNC_INIT 1 |
35 | #endif | 35 | #endif |
36 | 36 | ||
37 | int sysctl_tcp_syncookies = SYNC_INIT; | 37 | int sysctl_tcp_syncookies __read_mostly = SYNC_INIT; |
38 | int sysctl_tcp_abort_on_overflow; | 38 | int sysctl_tcp_abort_on_overflow __read_mostly; |
39 | 39 | ||
40 | struct inet_timewait_death_row tcp_death_row = { | 40 | struct inet_timewait_death_row tcp_death_row = { |
41 | .sysctl_max_tw_buckets = NR_FILE * 2, | 41 | .sysctl_max_tw_buckets = NR_FILE * 2, |
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index b4f3ffe1b3b4..061edfae0c29 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c | |||
@@ -43,24 +43,24 @@ | |||
43 | #include <linux/smp_lock.h> | 43 | #include <linux/smp_lock.h> |
44 | 44 | ||
45 | /* People can turn this off for buggy TCP's found in printers etc. */ | 45 | /* People can turn this off for buggy TCP's found in printers etc. */ |
46 | int sysctl_tcp_retrans_collapse = 1; | 46 | int sysctl_tcp_retrans_collapse __read_mostly = 1; |
47 | 47 | ||
48 | /* People can turn this on to work with those rare, broken TCPs that | 48 | /* People can turn this on to work with those rare, broken TCPs that |
49 | * interpret the window field as a signed quantity. | 49 | * interpret the window field as a signed quantity. |
50 | */ | 50 | */ |
51 | int sysctl_tcp_workaround_signed_windows = 0; | 51 | int sysctl_tcp_workaround_signed_windows __read_mostly = 0; |
52 | 52 | ||
53 | /* This limits the percentage of the congestion window which we | 53 | /* This limits the percentage of the congestion window which we |
54 | * will allow a single TSO frame to consume. Building TSO frames | 54 | * will allow a single TSO frame to consume. Building TSO frames |
55 | * which are too large can cause TCP streams to be bursty. | 55 | * which are too large can cause TCP streams to be bursty. |
56 | */ | 56 | */ |
57 | int sysctl_tcp_tso_win_divisor = 3; | 57 | int sysctl_tcp_tso_win_divisor __read_mostly = 3; |
58 | 58 | ||
59 | int sysctl_tcp_mtu_probing = 0; | 59 | int sysctl_tcp_mtu_probing __read_mostly = 0; |
60 | int sysctl_tcp_base_mss = 512; | 60 | int sysctl_tcp_base_mss __read_mostly = 512; |
61 | 61 | ||
62 | /* By default, RFC2861 behavior. */ | 62 | /* By default, RFC2861 behavior. */ |
63 | int sysctl_tcp_slow_start_after_idle = 1; | 63 | int sysctl_tcp_slow_start_after_idle __read_mostly = 1; |
64 | 64 | ||
65 | static void update_send_head(struct sock *sk, struct tcp_sock *tp, | 65 | static void update_send_head(struct sock *sk, struct tcp_sock *tp, |
66 | struct sk_buff *skb) | 66 | struct sk_buff *skb) |
@@ -577,7 +577,7 @@ int tcp_fragment(struct sock *sk, struct sk_buff *skb, u32 len, unsigned int mss | |||
577 | TCP_SKB_CB(buff)->sacked = TCP_SKB_CB(skb)->sacked; | 577 | TCP_SKB_CB(buff)->sacked = TCP_SKB_CB(skb)->sacked; |
578 | TCP_SKB_CB(skb)->sacked &= ~TCPCB_AT_TAIL; | 578 | TCP_SKB_CB(skb)->sacked &= ~TCPCB_AT_TAIL; |
579 | 579 | ||
580 | if (!skb_shinfo(skb)->nr_frags && skb->ip_summed != CHECKSUM_HW) { | 580 | if (!skb_shinfo(skb)->nr_frags && skb->ip_summed != CHECKSUM_PARTIAL) { |
581 | /* Copy and checksum data tail into the new buffer. */ | 581 | /* Copy and checksum data tail into the new buffer. */ |
582 | buff->csum = csum_partial_copy_nocheck(skb->data + len, skb_put(buff, nsize), | 582 | buff->csum = csum_partial_copy_nocheck(skb->data + len, skb_put(buff, nsize), |
583 | nsize, 0); | 583 | nsize, 0); |
@@ -586,7 +586,7 @@ int tcp_fragment(struct sock *sk, struct sk_buff *skb, u32 len, unsigned int mss | |||
586 | 586 | ||
587 | skb->csum = csum_block_sub(skb->csum, buff->csum, len); | 587 | skb->csum = csum_block_sub(skb->csum, buff->csum, len); |
588 | } else { | 588 | } else { |
589 | skb->ip_summed = CHECKSUM_HW; | 589 | skb->ip_summed = CHECKSUM_PARTIAL; |
590 | skb_split(skb, buff, len); | 590 | skb_split(skb, buff, len); |
591 | } | 591 | } |
592 | 592 | ||
@@ -689,7 +689,7 @@ int tcp_trim_head(struct sock *sk, struct sk_buff *skb, u32 len) | |||
689 | __pskb_trim_head(skb, len - skb_headlen(skb)); | 689 | __pskb_trim_head(skb, len - skb_headlen(skb)); |
690 | 690 | ||
691 | TCP_SKB_CB(skb)->seq += len; | 691 | TCP_SKB_CB(skb)->seq += len; |
692 | skb->ip_summed = CHECKSUM_HW; | 692 | skb->ip_summed = CHECKSUM_PARTIAL; |
693 | 693 | ||
694 | skb->truesize -= len; | 694 | skb->truesize -= len; |
695 | sk->sk_wmem_queued -= len; | 695 | sk->sk_wmem_queued -= len; |
@@ -1062,7 +1062,7 @@ static int tso_fragment(struct sock *sk, struct sk_buff *skb, unsigned int len, | |||
1062 | /* This packet was never sent out yet, so no SACK bits. */ | 1062 | /* This packet was never sent out yet, so no SACK bits. */ |
1063 | TCP_SKB_CB(buff)->sacked = 0; | 1063 | TCP_SKB_CB(buff)->sacked = 0; |
1064 | 1064 | ||
1065 | buff->ip_summed = skb->ip_summed = CHECKSUM_HW; | 1065 | buff->ip_summed = skb->ip_summed = CHECKSUM_PARTIAL; |
1066 | skb_split(skb, buff, len); | 1066 | skb_split(skb, buff, len); |
1067 | 1067 | ||
1068 | /* Fix up tso_factor for both original and new SKB. */ | 1068 | /* Fix up tso_factor for both original and new SKB. */ |
@@ -1206,8 +1206,7 @@ static int tcp_mtu_probe(struct sock *sk) | |||
1206 | TCP_SKB_CB(nskb)->flags = TCPCB_FLAG_ACK; | 1206 | TCP_SKB_CB(nskb)->flags = TCPCB_FLAG_ACK; |
1207 | TCP_SKB_CB(nskb)->sacked = 0; | 1207 | TCP_SKB_CB(nskb)->sacked = 0; |
1208 | nskb->csum = 0; | 1208 | nskb->csum = 0; |
1209 | if (skb->ip_summed == CHECKSUM_HW) | 1209 | nskb->ip_summed = skb->ip_summed; |
1210 | nskb->ip_summed = CHECKSUM_HW; | ||
1211 | 1210 | ||
1212 | len = 0; | 1211 | len = 0; |
1213 | while (len < probe_size) { | 1212 | while (len < probe_size) { |
@@ -1231,7 +1230,7 @@ static int tcp_mtu_probe(struct sock *sk) | |||
1231 | ~(TCPCB_FLAG_FIN|TCPCB_FLAG_PSH); | 1230 | ~(TCPCB_FLAG_FIN|TCPCB_FLAG_PSH); |
1232 | if (!skb_shinfo(skb)->nr_frags) { | 1231 | if (!skb_shinfo(skb)->nr_frags) { |
1233 | skb_pull(skb, copy); | 1232 | skb_pull(skb, copy); |
1234 | if (skb->ip_summed != CHECKSUM_HW) | 1233 | if (skb->ip_summed != CHECKSUM_PARTIAL) |
1235 | skb->csum = csum_partial(skb->data, skb->len, 0); | 1234 | skb->csum = csum_partial(skb->data, skb->len, 0); |
1236 | } else { | 1235 | } else { |
1237 | __pskb_trim_head(skb, copy); | 1236 | __pskb_trim_head(skb, copy); |
@@ -1572,10 +1571,9 @@ static void tcp_retrans_try_collapse(struct sock *sk, struct sk_buff *skb, int m | |||
1572 | 1571 | ||
1573 | memcpy(skb_put(skb, next_skb_size), next_skb->data, next_skb_size); | 1572 | memcpy(skb_put(skb, next_skb_size), next_skb->data, next_skb_size); |
1574 | 1573 | ||
1575 | if (next_skb->ip_summed == CHECKSUM_HW) | 1574 | skb->ip_summed = next_skb->ip_summed; |
1576 | skb->ip_summed = CHECKSUM_HW; | ||
1577 | 1575 | ||
1578 | if (skb->ip_summed != CHECKSUM_HW) | 1576 | if (skb->ip_summed != CHECKSUM_PARTIAL) |
1579 | skb->csum = csum_block_add(skb->csum, next_skb->csum, skb_size); | 1577 | skb->csum = csum_block_add(skb->csum, next_skb->csum, skb_size); |
1580 | 1578 | ||
1581 | /* Update sequence range on original skb. */ | 1579 | /* Update sequence range on original skb. */ |
diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c index 7c1bde3cd6cb..fb09ade5897b 100644 --- a/net/ipv4/tcp_timer.c +++ b/net/ipv4/tcp_timer.c | |||
@@ -23,14 +23,14 @@ | |||
23 | #include <linux/module.h> | 23 | #include <linux/module.h> |
24 | #include <net/tcp.h> | 24 | #include <net/tcp.h> |
25 | 25 | ||
26 | int sysctl_tcp_syn_retries = TCP_SYN_RETRIES; | 26 | int sysctl_tcp_syn_retries __read_mostly = TCP_SYN_RETRIES; |
27 | int sysctl_tcp_synack_retries = TCP_SYNACK_RETRIES; | 27 | int sysctl_tcp_synack_retries __read_mostly = TCP_SYNACK_RETRIES; |
28 | int sysctl_tcp_keepalive_time = TCP_KEEPALIVE_TIME; | 28 | int sysctl_tcp_keepalive_time __read_mostly = TCP_KEEPALIVE_TIME; |
29 | int sysctl_tcp_keepalive_probes = TCP_KEEPALIVE_PROBES; | 29 | int sysctl_tcp_keepalive_probes __read_mostly = TCP_KEEPALIVE_PROBES; |
30 | int sysctl_tcp_keepalive_intvl = TCP_KEEPALIVE_INTVL; | 30 | int sysctl_tcp_keepalive_intvl __read_mostly = TCP_KEEPALIVE_INTVL; |
31 | int sysctl_tcp_retries1 = TCP_RETR1; | 31 | int sysctl_tcp_retries1 __read_mostly = TCP_RETR1; |
32 | int sysctl_tcp_retries2 = TCP_RETR2; | 32 | int sysctl_tcp_retries2 __read_mostly = TCP_RETR2; |
33 | int sysctl_tcp_orphan_retries; | 33 | int sysctl_tcp_orphan_retries __read_mostly; |
34 | 34 | ||
35 | static void tcp_write_timer(unsigned long); | 35 | static void tcp_write_timer(unsigned long); |
36 | static void tcp_delack_timer(unsigned long); | 36 | static void tcp_delack_timer(unsigned long); |
diff --git a/net/ipv4/tcp_vegas.c b/net/ipv4/tcp_vegas.c index 490360b5b4bf..a3b7aa015a2f 100644 --- a/net/ipv4/tcp_vegas.c +++ b/net/ipv4/tcp_vegas.c | |||
@@ -370,7 +370,7 @@ static struct tcp_congestion_ops tcp_vegas = { | |||
370 | 370 | ||
371 | static int __init tcp_vegas_register(void) | 371 | static int __init tcp_vegas_register(void) |
372 | { | 372 | { |
373 | BUG_ON(sizeof(struct vegas) > ICSK_CA_PRIV_SIZE); | 373 | BUILD_BUG_ON(sizeof(struct vegas) > ICSK_CA_PRIV_SIZE); |
374 | tcp_register_congestion_control(&tcp_vegas); | 374 | tcp_register_congestion_control(&tcp_vegas); |
375 | return 0; | 375 | return 0; |
376 | } | 376 | } |
diff --git a/net/ipv4/tcp_veno.c b/net/ipv4/tcp_veno.c index 11b42a7135c1..ce57bf302f6c 100644 --- a/net/ipv4/tcp_veno.c +++ b/net/ipv4/tcp_veno.c | |||
@@ -9,7 +9,6 @@ | |||
9 | * See http://www.ntu.edu.sg/home5/ZHOU0022/papers/CPFu03a.pdf | 9 | * See http://www.ntu.edu.sg/home5/ZHOU0022/papers/CPFu03a.pdf |
10 | */ | 10 | */ |
11 | 11 | ||
12 | #include <linux/config.h> | ||
13 | #include <linux/mm.h> | 12 | #include <linux/mm.h> |
14 | #include <linux/module.h> | 13 | #include <linux/module.h> |
15 | #include <linux/skbuff.h> | 14 | #include <linux/skbuff.h> |
@@ -213,7 +212,7 @@ static struct tcp_congestion_ops tcp_veno = { | |||
213 | 212 | ||
214 | static int __init tcp_veno_register(void) | 213 | static int __init tcp_veno_register(void) |
215 | { | 214 | { |
216 | BUG_ON(sizeof(struct veno) > ICSK_CA_PRIV_SIZE); | 215 | BUILD_BUG_ON(sizeof(struct veno) > ICSK_CA_PRIV_SIZE); |
217 | tcp_register_congestion_control(&tcp_veno); | 216 | tcp_register_congestion_control(&tcp_veno); |
218 | return 0; | 217 | return 0; |
219 | } | 218 | } |
diff --git a/net/ipv4/tcp_westwood.c b/net/ipv4/tcp_westwood.c index 5446312ffd2a..4f42a86c77f3 100644 --- a/net/ipv4/tcp_westwood.c +++ b/net/ipv4/tcp_westwood.c | |||
@@ -289,7 +289,7 @@ static struct tcp_congestion_ops tcp_westwood = { | |||
289 | 289 | ||
290 | static int __init tcp_westwood_register(void) | 290 | static int __init tcp_westwood_register(void) |
291 | { | 291 | { |
292 | BUG_ON(sizeof(struct westwood) > ICSK_CA_PRIV_SIZE); | 292 | BUILD_BUG_ON(sizeof(struct westwood) > ICSK_CA_PRIV_SIZE); |
293 | return tcp_register_congestion_control(&tcp_westwood); | 293 | return tcp_register_congestion_control(&tcp_westwood); |
294 | } | 294 | } |
295 | 295 | ||
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index f136cec96d95..77e265d7bb8f 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c | |||
@@ -118,14 +118,33 @@ DEFINE_SNMP_STAT(struct udp_mib, udp_statistics) __read_mostly; | |||
118 | struct hlist_head udp_hash[UDP_HTABLE_SIZE]; | 118 | struct hlist_head udp_hash[UDP_HTABLE_SIZE]; |
119 | DEFINE_RWLOCK(udp_hash_lock); | 119 | DEFINE_RWLOCK(udp_hash_lock); |
120 | 120 | ||
121 | /* Shared by v4/v6 udp. */ | 121 | static int udp_port_rover; |
122 | int udp_port_rover; | ||
123 | 122 | ||
124 | static int udp_v4_get_port(struct sock *sk, unsigned short snum) | 123 | static inline int udp_lport_inuse(u16 num) |
124 | { | ||
125 | struct sock *sk; | ||
126 | struct hlist_node *node; | ||
127 | |||
128 | sk_for_each(sk, node, &udp_hash[num & (UDP_HTABLE_SIZE - 1)]) | ||
129 | if (inet_sk(sk)->num == num) | ||
130 | return 1; | ||
131 | return 0; | ||
132 | } | ||
133 | |||
134 | /** | ||
135 | * udp_get_port - common port lookup for IPv4 and IPv6 | ||
136 | * | ||
137 | * @sk: socket struct in question | ||
138 | * @snum: port number to look up | ||
139 | * @saddr_comp: AF-dependent comparison of bound local IP addresses | ||
140 | */ | ||
141 | int udp_get_port(struct sock *sk, unsigned short snum, | ||
142 | int (*saddr_cmp)(const struct sock *sk1, const struct sock *sk2)) | ||
125 | { | 143 | { |
126 | struct hlist_node *node; | 144 | struct hlist_node *node; |
145 | struct hlist_head *head; | ||
127 | struct sock *sk2; | 146 | struct sock *sk2; |
128 | struct inet_sock *inet = inet_sk(sk); | 147 | int error = 1; |
129 | 148 | ||
130 | write_lock_bh(&udp_hash_lock); | 149 | write_lock_bh(&udp_hash_lock); |
131 | if (snum == 0) { | 150 | if (snum == 0) { |
@@ -137,11 +156,10 @@ static int udp_v4_get_port(struct sock *sk, unsigned short snum) | |||
137 | best_size_so_far = 32767; | 156 | best_size_so_far = 32767; |
138 | best = result = udp_port_rover; | 157 | best = result = udp_port_rover; |
139 | for (i = 0; i < UDP_HTABLE_SIZE; i++, result++) { | 158 | for (i = 0; i < UDP_HTABLE_SIZE; i++, result++) { |
140 | struct hlist_head *list; | ||
141 | int size; | 159 | int size; |
142 | 160 | ||
143 | list = &udp_hash[result & (UDP_HTABLE_SIZE - 1)]; | 161 | head = &udp_hash[result & (UDP_HTABLE_SIZE - 1)]; |
144 | if (hlist_empty(list)) { | 162 | if (hlist_empty(head)) { |
145 | if (result > sysctl_local_port_range[1]) | 163 | if (result > sysctl_local_port_range[1]) |
146 | result = sysctl_local_port_range[0] + | 164 | result = sysctl_local_port_range[0] + |
147 | ((result - sysctl_local_port_range[0]) & | 165 | ((result - sysctl_local_port_range[0]) & |
@@ -149,12 +167,11 @@ static int udp_v4_get_port(struct sock *sk, unsigned short snum) | |||
149 | goto gotit; | 167 | goto gotit; |
150 | } | 168 | } |
151 | size = 0; | 169 | size = 0; |
152 | sk_for_each(sk2, node, list) | 170 | sk_for_each(sk2, node, head) |
153 | if (++size >= best_size_so_far) | 171 | if (++size < best_size_so_far) { |
154 | goto next; | 172 | best_size_so_far = size; |
155 | best_size_so_far = size; | 173 | best = result; |
156 | best = result; | 174 | } |
157 | next:; | ||
158 | } | 175 | } |
159 | result = best; | 176 | result = best; |
160 | for(i = 0; i < (1 << 16) / UDP_HTABLE_SIZE; i++, result += UDP_HTABLE_SIZE) { | 177 | for(i = 0; i < (1 << 16) / UDP_HTABLE_SIZE; i++, result += UDP_HTABLE_SIZE) { |
@@ -170,38 +187,44 @@ static int udp_v4_get_port(struct sock *sk, unsigned short snum) | |||
170 | gotit: | 187 | gotit: |
171 | udp_port_rover = snum = result; | 188 | udp_port_rover = snum = result; |
172 | } else { | 189 | } else { |
173 | sk_for_each(sk2, node, | 190 | head = &udp_hash[snum & (UDP_HTABLE_SIZE - 1)]; |
174 | &udp_hash[snum & (UDP_HTABLE_SIZE - 1)]) { | 191 | |
175 | struct inet_sock *inet2 = inet_sk(sk2); | 192 | sk_for_each(sk2, node, head) |
176 | 193 | if (inet_sk(sk2)->num == snum && | |
177 | if (inet2->num == snum && | 194 | sk2 != sk && |
178 | sk2 != sk && | 195 | (!sk2->sk_reuse || !sk->sk_reuse) && |
179 | !ipv6_only_sock(sk2) && | 196 | (!sk2->sk_bound_dev_if || !sk->sk_bound_dev_if |
180 | (!sk2->sk_bound_dev_if || | 197 | || sk2->sk_bound_dev_if == sk->sk_bound_dev_if) && |
181 | !sk->sk_bound_dev_if || | 198 | (*saddr_cmp)(sk, sk2) ) |
182 | sk2->sk_bound_dev_if == sk->sk_bound_dev_if) && | ||
183 | (!inet2->rcv_saddr || | ||
184 | !inet->rcv_saddr || | ||
185 | inet2->rcv_saddr == inet->rcv_saddr) && | ||
186 | (!sk2->sk_reuse || !sk->sk_reuse)) | ||
187 | goto fail; | 199 | goto fail; |
188 | } | ||
189 | } | 200 | } |
190 | inet->num = snum; | 201 | inet_sk(sk)->num = snum; |
191 | if (sk_unhashed(sk)) { | 202 | if (sk_unhashed(sk)) { |
192 | struct hlist_head *h = &udp_hash[snum & (UDP_HTABLE_SIZE - 1)]; | 203 | head = &udp_hash[snum & (UDP_HTABLE_SIZE - 1)]; |
193 | 204 | sk_add_node(sk, head); | |
194 | sk_add_node(sk, h); | ||
195 | sock_prot_inc_use(sk->sk_prot); | 205 | sock_prot_inc_use(sk->sk_prot); |
196 | } | 206 | } |
197 | write_unlock_bh(&udp_hash_lock); | 207 | error = 0; |
198 | return 0; | ||
199 | |||
200 | fail: | 208 | fail: |
201 | write_unlock_bh(&udp_hash_lock); | 209 | write_unlock_bh(&udp_hash_lock); |
202 | return 1; | 210 | return error; |
211 | } | ||
212 | |||
213 | static inline int ipv4_rcv_saddr_equal(const struct sock *sk1, const struct sock *sk2) | ||
214 | { | ||
215 | struct inet_sock *inet1 = inet_sk(sk1), *inet2 = inet_sk(sk2); | ||
216 | |||
217 | return ( !ipv6_only_sock(sk2) && | ||
218 | (!inet1->rcv_saddr || !inet2->rcv_saddr || | ||
219 | inet1->rcv_saddr == inet2->rcv_saddr )); | ||
203 | } | 220 | } |
204 | 221 | ||
222 | static inline int udp_v4_get_port(struct sock *sk, unsigned short snum) | ||
223 | { | ||
224 | return udp_get_port(sk, snum, ipv4_rcv_saddr_equal); | ||
225 | } | ||
226 | |||
227 | |||
205 | static void udp_v4_hash(struct sock *sk) | 228 | static void udp_v4_hash(struct sock *sk) |
206 | { | 229 | { |
207 | BUG(); | 230 | BUG(); |
@@ -429,7 +452,7 @@ static int udp_push_pending_frames(struct sock *sk, struct udp_sock *up) | |||
429 | /* | 452 | /* |
430 | * Only one fragment on the socket. | 453 | * Only one fragment on the socket. |
431 | */ | 454 | */ |
432 | if (skb->ip_summed == CHECKSUM_HW) { | 455 | if (skb->ip_summed == CHECKSUM_PARTIAL) { |
433 | skb->csum = offsetof(struct udphdr, check); | 456 | skb->csum = offsetof(struct udphdr, check); |
434 | uh->check = ~csum_tcpudp_magic(fl->fl4_src, fl->fl4_dst, | 457 | uh->check = ~csum_tcpudp_magic(fl->fl4_src, fl->fl4_dst, |
435 | up->len, IPPROTO_UDP, 0); | 458 | up->len, IPPROTO_UDP, 0); |
@@ -448,7 +471,7 @@ static int udp_push_pending_frames(struct sock *sk, struct udp_sock *up) | |||
448 | * fragments on the socket so that all csums of sk_buffs | 471 | * fragments on the socket so that all csums of sk_buffs |
449 | * should be together. | 472 | * should be together. |
450 | */ | 473 | */ |
451 | if (skb->ip_summed == CHECKSUM_HW) { | 474 | if (skb->ip_summed == CHECKSUM_PARTIAL) { |
452 | int offset = (unsigned char *)uh - skb->data; | 475 | int offset = (unsigned char *)uh - skb->data; |
453 | skb->csum = skb_checksum(skb, offset, skb->len - offset, 0); | 476 | skb->csum = skb_checksum(skb, offset, skb->len - offset, 0); |
454 | 477 | ||
@@ -603,6 +626,7 @@ int udp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, | |||
603 | .uli_u = { .ports = | 626 | .uli_u = { .ports = |
604 | { .sport = inet->sport, | 627 | { .sport = inet->sport, |
605 | .dport = dport } } }; | 628 | .dport = dport } } }; |
629 | security_sk_classify_flow(sk, &fl); | ||
606 | err = ip_route_output_flow(&rt, &fl, sk, !(msg->msg_flags&MSG_DONTWAIT)); | 630 | err = ip_route_output_flow(&rt, &fl, sk, !(msg->msg_flags&MSG_DONTWAIT)); |
607 | if (err) | 631 | if (err) |
608 | goto out; | 632 | goto out; |
@@ -661,6 +685,16 @@ out: | |||
661 | UDP_INC_STATS_USER(UDP_MIB_OUTDATAGRAMS); | 685 | UDP_INC_STATS_USER(UDP_MIB_OUTDATAGRAMS); |
662 | return len; | 686 | return len; |
663 | } | 687 | } |
688 | /* | ||
689 | * ENOBUFS = no kernel mem, SOCK_NOSPACE = no sndbuf space. Reporting | ||
690 | * ENOBUFS might not be good (it's not tunable per se), but otherwise | ||
691 | * we don't have a good statistic (IpOutDiscards but it can be too many | ||
692 | * things). We could add another new stat but at least for now that | ||
693 | * seems like overkill. | ||
694 | */ | ||
695 | if (err == -ENOBUFS || test_bit(SOCK_NOSPACE, &sk->sk_socket->flags)) { | ||
696 | UDP_INC_STATS_USER(UDP_MIB_SNDBUFERRORS); | ||
697 | } | ||
664 | return err; | 698 | return err; |
665 | 699 | ||
666 | do_confirm: | 700 | do_confirm: |
@@ -980,6 +1014,7 @@ static int udp_encap_rcv(struct sock * sk, struct sk_buff *skb) | |||
980 | static int udp_queue_rcv_skb(struct sock * sk, struct sk_buff *skb) | 1014 | static int udp_queue_rcv_skb(struct sock * sk, struct sk_buff *skb) |
981 | { | 1015 | { |
982 | struct udp_sock *up = udp_sk(sk); | 1016 | struct udp_sock *up = udp_sk(sk); |
1017 | int rc; | ||
983 | 1018 | ||
984 | /* | 1019 | /* |
985 | * Charge it to the socket, dropping if the queue is full. | 1020 | * Charge it to the socket, dropping if the queue is full. |
@@ -1026,7 +1061,10 @@ static int udp_queue_rcv_skb(struct sock * sk, struct sk_buff *skb) | |||
1026 | skb->ip_summed = CHECKSUM_UNNECESSARY; | 1061 | skb->ip_summed = CHECKSUM_UNNECESSARY; |
1027 | } | 1062 | } |
1028 | 1063 | ||
1029 | if (sock_queue_rcv_skb(sk,skb)<0) { | 1064 | if ((rc = sock_queue_rcv_skb(sk,skb)) < 0) { |
1065 | /* Note that an ENOMEM error is charged twice */ | ||
1066 | if (rc == -ENOMEM) | ||
1067 | UDP_INC_STATS_BH(UDP_MIB_RCVBUFERRORS); | ||
1030 | UDP_INC_STATS_BH(UDP_MIB_INERRORS); | 1068 | UDP_INC_STATS_BH(UDP_MIB_INERRORS); |
1031 | kfree_skb(skb); | 1069 | kfree_skb(skb); |
1032 | return -1; | 1070 | return -1; |
@@ -1087,7 +1125,7 @@ static void udp_checksum_init(struct sk_buff *skb, struct udphdr *uh, | |||
1087 | { | 1125 | { |
1088 | if (uh->check == 0) { | 1126 | if (uh->check == 0) { |
1089 | skb->ip_summed = CHECKSUM_UNNECESSARY; | 1127 | skb->ip_summed = CHECKSUM_UNNECESSARY; |
1090 | } else if (skb->ip_summed == CHECKSUM_HW) { | 1128 | } else if (skb->ip_summed == CHECKSUM_COMPLETE) { |
1091 | if (!udp_check(uh, ulen, saddr, daddr, skb->csum)) | 1129 | if (!udp_check(uh, ulen, saddr, daddr, skb->csum)) |
1092 | skb->ip_summed = CHECKSUM_UNNECESSARY; | 1130 | skb->ip_summed = CHECKSUM_UNNECESSARY; |
1093 | } | 1131 | } |
@@ -1581,7 +1619,7 @@ EXPORT_SYMBOL(udp_disconnect); | |||
1581 | EXPORT_SYMBOL(udp_hash); | 1619 | EXPORT_SYMBOL(udp_hash); |
1582 | EXPORT_SYMBOL(udp_hash_lock); | 1620 | EXPORT_SYMBOL(udp_hash_lock); |
1583 | EXPORT_SYMBOL(udp_ioctl); | 1621 | EXPORT_SYMBOL(udp_ioctl); |
1584 | EXPORT_SYMBOL(udp_port_rover); | 1622 | EXPORT_SYMBOL(udp_get_port); |
1585 | EXPORT_SYMBOL(udp_prot); | 1623 | EXPORT_SYMBOL(udp_prot); |
1586 | EXPORT_SYMBOL(udp_sendmsg); | 1624 | EXPORT_SYMBOL(udp_sendmsg); |
1587 | EXPORT_SYMBOL(udp_poll); | 1625 | EXPORT_SYMBOL(udp_poll); |
diff --git a/net/ipv4/xfrm4_input.c b/net/ipv4/xfrm4_input.c index 817ed84511a6..040e8475f295 100644 --- a/net/ipv4/xfrm4_input.c +++ b/net/ipv4/xfrm4_input.c | |||
@@ -106,7 +106,7 @@ int xfrm4_rcv_encap(struct sk_buff *skb, __u16 encap_type) | |||
106 | if (x->mode->input(x, skb)) | 106 | if (x->mode->input(x, skb)) |
107 | goto drop; | 107 | goto drop; |
108 | 108 | ||
109 | if (x->props.mode) { | 109 | if (x->props.mode == XFRM_MODE_TUNNEL) { |
110 | decaps = 1; | 110 | decaps = 1; |
111 | break; | 111 | break; |
112 | } | 112 | } |
diff --git a/net/ipv4/xfrm4_mode_transport.c b/net/ipv4/xfrm4_mode_transport.c index a9e6b3dd19c9..92676b7e4034 100644 --- a/net/ipv4/xfrm4_mode_transport.c +++ b/net/ipv4/xfrm4_mode_transport.c | |||
@@ -21,9 +21,8 @@ | |||
21 | * On exit, skb->h will be set to the start of the payload to be processed | 21 | * On exit, skb->h will be set to the start of the payload to be processed |
22 | * by x->type->output and skb->nh will be set to the top IP header. | 22 | * by x->type->output and skb->nh will be set to the top IP header. |
23 | */ | 23 | */ |
24 | static int xfrm4_transport_output(struct sk_buff *skb) | 24 | static int xfrm4_transport_output(struct xfrm_state *x, struct sk_buff *skb) |
25 | { | 25 | { |
26 | struct xfrm_state *x; | ||
27 | struct iphdr *iph; | 26 | struct iphdr *iph; |
28 | int ihl; | 27 | int ihl; |
29 | 28 | ||
@@ -33,7 +32,6 @@ static int xfrm4_transport_output(struct sk_buff *skb) | |||
33 | ihl = iph->ihl * 4; | 32 | ihl = iph->ihl * 4; |
34 | skb->h.raw += ihl; | 33 | skb->h.raw += ihl; |
35 | 34 | ||
36 | x = skb->dst->xfrm; | ||
37 | skb->nh.raw = memmove(skb_push(skb, x->props.header_len), iph, ihl); | 35 | skb->nh.raw = memmove(skb_push(skb, x->props.header_len), iph, ihl); |
38 | return 0; | 36 | return 0; |
39 | } | 37 | } |
diff --git a/net/ipv4/xfrm4_mode_tunnel.c b/net/ipv4/xfrm4_mode_tunnel.c index 13cafbe56ce3..e23c21d31a53 100644 --- a/net/ipv4/xfrm4_mode_tunnel.c +++ b/net/ipv4/xfrm4_mode_tunnel.c | |||
@@ -33,10 +33,9 @@ static inline void ipip_ecn_decapsulate(struct sk_buff *skb) | |||
33 | * On exit, skb->h will be set to the start of the payload to be processed | 33 | * On exit, skb->h will be set to the start of the payload to be processed |
34 | * by x->type->output and skb->nh will be set to the top IP header. | 34 | * by x->type->output and skb->nh will be set to the top IP header. |
35 | */ | 35 | */ |
36 | static int xfrm4_tunnel_output(struct sk_buff *skb) | 36 | static int xfrm4_tunnel_output(struct xfrm_state *x, struct sk_buff *skb) |
37 | { | 37 | { |
38 | struct dst_entry *dst = skb->dst; | 38 | struct dst_entry *dst = skb->dst; |
39 | struct xfrm_state *x = dst->xfrm; | ||
40 | struct iphdr *iph, *top_iph; | 39 | struct iphdr *iph, *top_iph; |
41 | int flags; | 40 | int flags; |
42 | 41 | ||
diff --git a/net/ipv4/xfrm4_output.c b/net/ipv4/xfrm4_output.c index d16f863cf687..04403fb01a58 100644 --- a/net/ipv4/xfrm4_output.c +++ b/net/ipv4/xfrm4_output.c | |||
@@ -48,13 +48,13 @@ static int xfrm4_output_one(struct sk_buff *skb) | |||
48 | struct xfrm_state *x = dst->xfrm; | 48 | struct xfrm_state *x = dst->xfrm; |
49 | int err; | 49 | int err; |
50 | 50 | ||
51 | if (skb->ip_summed == CHECKSUM_HW) { | 51 | if (skb->ip_summed == CHECKSUM_PARTIAL) { |
52 | err = skb_checksum_help(skb, 0); | 52 | err = skb_checksum_help(skb); |
53 | if (err) | 53 | if (err) |
54 | goto error_nolock; | 54 | goto error_nolock; |
55 | } | 55 | } |
56 | 56 | ||
57 | if (x->props.mode) { | 57 | if (x->props.mode == XFRM_MODE_TUNNEL) { |
58 | err = xfrm4_tunnel_check_size(skb); | 58 | err = xfrm4_tunnel_check_size(skb); |
59 | if (err) | 59 | if (err) |
60 | goto error_nolock; | 60 | goto error_nolock; |
@@ -66,7 +66,7 @@ static int xfrm4_output_one(struct sk_buff *skb) | |||
66 | if (err) | 66 | if (err) |
67 | goto error; | 67 | goto error; |
68 | 68 | ||
69 | err = x->mode->output(skb); | 69 | err = x->mode->output(x, skb); |
70 | if (err) | 70 | if (err) |
71 | goto error; | 71 | goto error; |
72 | 72 | ||
@@ -85,7 +85,7 @@ static int xfrm4_output_one(struct sk_buff *skb) | |||
85 | } | 85 | } |
86 | dst = skb->dst; | 86 | dst = skb->dst; |
87 | x = dst->xfrm; | 87 | x = dst->xfrm; |
88 | } while (x && !x->props.mode); | 88 | } while (x && (x->props.mode != XFRM_MODE_TUNNEL)); |
89 | 89 | ||
90 | IPCB(skb)->flags |= IPSKB_XFRM_TRANSFORMED; | 90 | IPCB(skb)->flags |= IPSKB_XFRM_TRANSFORMED; |
91 | err = 0; | 91 | err = 0; |
diff --git a/net/ipv4/xfrm4_policy.c b/net/ipv4/xfrm4_policy.c index 8f50eae47d03..eabcd27b1767 100644 --- a/net/ipv4/xfrm4_policy.c +++ b/net/ipv4/xfrm4_policy.c | |||
@@ -21,6 +21,25 @@ static int xfrm4_dst_lookup(struct xfrm_dst **dst, struct flowi *fl) | |||
21 | return __ip_route_output_key((struct rtable**)dst, fl); | 21 | return __ip_route_output_key((struct rtable**)dst, fl); |
22 | } | 22 | } |
23 | 23 | ||
24 | static int xfrm4_get_saddr(xfrm_address_t *saddr, xfrm_address_t *daddr) | ||
25 | { | ||
26 | struct rtable *rt; | ||
27 | struct flowi fl_tunnel = { | ||
28 | .nl_u = { | ||
29 | .ip4_u = { | ||
30 | .daddr = daddr->a4, | ||
31 | }, | ||
32 | }, | ||
33 | }; | ||
34 | |||
35 | if (!xfrm4_dst_lookup((struct xfrm_dst **)&rt, &fl_tunnel)) { | ||
36 | saddr->a4 = rt->rt_src; | ||
37 | dst_release(&rt->u.dst); | ||
38 | return 0; | ||
39 | } | ||
40 | return -EHOSTUNREACH; | ||
41 | } | ||
42 | |||
24 | static struct dst_entry * | 43 | static struct dst_entry * |
25 | __xfrm4_find_bundle(struct flowi *fl, struct xfrm_policy *policy) | 44 | __xfrm4_find_bundle(struct flowi *fl, struct xfrm_policy *policy) |
26 | { | 45 | { |
@@ -33,7 +52,7 @@ __xfrm4_find_bundle(struct flowi *fl, struct xfrm_policy *policy) | |||
33 | xdst->u.rt.fl.fl4_dst == fl->fl4_dst && | 52 | xdst->u.rt.fl.fl4_dst == fl->fl4_dst && |
34 | xdst->u.rt.fl.fl4_src == fl->fl4_src && | 53 | xdst->u.rt.fl.fl4_src == fl->fl4_src && |
35 | xdst->u.rt.fl.fl4_tos == fl->fl4_tos && | 54 | xdst->u.rt.fl.fl4_tos == fl->fl4_tos && |
36 | xfrm_bundle_ok(xdst, fl, AF_INET)) { | 55 | xfrm_bundle_ok(xdst, fl, AF_INET, 0)) { |
37 | dst_clone(dst); | 56 | dst_clone(dst); |
38 | break; | 57 | break; |
39 | } | 58 | } |
@@ -93,10 +112,11 @@ __xfrm4_bundle_create(struct xfrm_policy *policy, struct xfrm_state **xfrm, int | |||
93 | 112 | ||
94 | xdst = (struct xfrm_dst *)dst1; | 113 | xdst = (struct xfrm_dst *)dst1; |
95 | xdst->route = &rt->u.dst; | 114 | xdst->route = &rt->u.dst; |
115 | xdst->genid = xfrm[i]->genid; | ||
96 | 116 | ||
97 | dst1->next = dst_prev; | 117 | dst1->next = dst_prev; |
98 | dst_prev = dst1; | 118 | dst_prev = dst1; |
99 | if (xfrm[i]->props.mode) { | 119 | if (xfrm[i]->props.mode != XFRM_MODE_TRANSPORT) { |
100 | remote = xfrm[i]->id.daddr.a4; | 120 | remote = xfrm[i]->id.daddr.a4; |
101 | local = xfrm[i]->props.saddr.a4; | 121 | local = xfrm[i]->props.saddr.a4; |
102 | tunnel = 1; | 122 | tunnel = 1; |
@@ -135,6 +155,7 @@ __xfrm4_bundle_create(struct xfrm_policy *policy, struct xfrm_state **xfrm, int | |||
135 | dst_prev->flags |= DST_HOST; | 155 | dst_prev->flags |= DST_HOST; |
136 | dst_prev->lastuse = jiffies; | 156 | dst_prev->lastuse = jiffies; |
137 | dst_prev->header_len = header_len; | 157 | dst_prev->header_len = header_len; |
158 | dst_prev->nfheader_len = 0; | ||
138 | dst_prev->trailer_len = trailer_len; | 159 | dst_prev->trailer_len = trailer_len; |
139 | memcpy(&dst_prev->metrics, &x->route->metrics, sizeof(dst_prev->metrics)); | 160 | memcpy(&dst_prev->metrics, &x->route->metrics, sizeof(dst_prev->metrics)); |
140 | 161 | ||
@@ -296,6 +317,7 @@ static struct xfrm_policy_afinfo xfrm4_policy_afinfo = { | |||
296 | .family = AF_INET, | 317 | .family = AF_INET, |
297 | .dst_ops = &xfrm4_dst_ops, | 318 | .dst_ops = &xfrm4_dst_ops, |
298 | .dst_lookup = xfrm4_dst_lookup, | 319 | .dst_lookup = xfrm4_dst_lookup, |
320 | .get_saddr = xfrm4_get_saddr, | ||
299 | .find_bundle = __xfrm4_find_bundle, | 321 | .find_bundle = __xfrm4_find_bundle, |
300 | .bundle_create = __xfrm4_bundle_create, | 322 | .bundle_create = __xfrm4_bundle_create, |
301 | .decode_session = _decode_session4, | 323 | .decode_session = _decode_session4, |
diff --git a/net/ipv4/xfrm4_state.c b/net/ipv4/xfrm4_state.c index 81e1751c966e..fe2034494d08 100644 --- a/net/ipv4/xfrm4_state.c +++ b/net/ipv4/xfrm4_state.c | |||
@@ -42,99 +42,15 @@ __xfrm4_init_tempsel(struct xfrm_state *x, struct flowi *fl, | |||
42 | x->props.saddr = tmpl->saddr; | 42 | x->props.saddr = tmpl->saddr; |
43 | if (x->props.saddr.a4 == 0) | 43 | if (x->props.saddr.a4 == 0) |
44 | x->props.saddr.a4 = saddr->a4; | 44 | x->props.saddr.a4 = saddr->a4; |
45 | if (tmpl->mode && x->props.saddr.a4 == 0) { | ||
46 | struct rtable *rt; | ||
47 | struct flowi fl_tunnel = { | ||
48 | .nl_u = { | ||
49 | .ip4_u = { | ||
50 | .daddr = x->id.daddr.a4, | ||
51 | } | ||
52 | } | ||
53 | }; | ||
54 | if (!xfrm_dst_lookup((struct xfrm_dst **)&rt, | ||
55 | &fl_tunnel, AF_INET)) { | ||
56 | x->props.saddr.a4 = rt->rt_src; | ||
57 | dst_release(&rt->u.dst); | ||
58 | } | ||
59 | } | ||
60 | x->props.mode = tmpl->mode; | 45 | x->props.mode = tmpl->mode; |
61 | x->props.reqid = tmpl->reqid; | 46 | x->props.reqid = tmpl->reqid; |
62 | x->props.family = AF_INET; | 47 | x->props.family = AF_INET; |
63 | } | 48 | } |
64 | 49 | ||
65 | static struct xfrm_state * | ||
66 | __xfrm4_state_lookup(xfrm_address_t *daddr, u32 spi, u8 proto) | ||
67 | { | ||
68 | unsigned h = __xfrm4_spi_hash(daddr, spi, proto); | ||
69 | struct xfrm_state *x; | ||
70 | |||
71 | list_for_each_entry(x, xfrm4_state_afinfo.state_byspi+h, byspi) { | ||
72 | if (x->props.family == AF_INET && | ||
73 | spi == x->id.spi && | ||
74 | daddr->a4 == x->id.daddr.a4 && | ||
75 | proto == x->id.proto) { | ||
76 | xfrm_state_hold(x); | ||
77 | return x; | ||
78 | } | ||
79 | } | ||
80 | return NULL; | ||
81 | } | ||
82 | |||
83 | static struct xfrm_state * | ||
84 | __xfrm4_find_acq(u8 mode, u32 reqid, u8 proto, | ||
85 | xfrm_address_t *daddr, xfrm_address_t *saddr, | ||
86 | int create) | ||
87 | { | ||
88 | struct xfrm_state *x, *x0; | ||
89 | unsigned h = __xfrm4_dst_hash(daddr); | ||
90 | |||
91 | x0 = NULL; | ||
92 | |||
93 | list_for_each_entry(x, xfrm4_state_afinfo.state_bydst+h, bydst) { | ||
94 | if (x->props.family == AF_INET && | ||
95 | daddr->a4 == x->id.daddr.a4 && | ||
96 | mode == x->props.mode && | ||
97 | proto == x->id.proto && | ||
98 | saddr->a4 == x->props.saddr.a4 && | ||
99 | reqid == x->props.reqid && | ||
100 | x->km.state == XFRM_STATE_ACQ && | ||
101 | !x->id.spi) { | ||
102 | x0 = x; | ||
103 | break; | ||
104 | } | ||
105 | } | ||
106 | if (!x0 && create && (x0 = xfrm_state_alloc()) != NULL) { | ||
107 | x0->sel.daddr.a4 = daddr->a4; | ||
108 | x0->sel.saddr.a4 = saddr->a4; | ||
109 | x0->sel.prefixlen_d = 32; | ||
110 | x0->sel.prefixlen_s = 32; | ||
111 | x0->props.saddr.a4 = saddr->a4; | ||
112 | x0->km.state = XFRM_STATE_ACQ; | ||
113 | x0->id.daddr.a4 = daddr->a4; | ||
114 | x0->id.proto = proto; | ||
115 | x0->props.family = AF_INET; | ||
116 | x0->props.mode = mode; | ||
117 | x0->props.reqid = reqid; | ||
118 | x0->props.family = AF_INET; | ||
119 | x0->lft.hard_add_expires_seconds = XFRM_ACQ_EXPIRES; | ||
120 | xfrm_state_hold(x0); | ||
121 | x0->timer.expires = jiffies + XFRM_ACQ_EXPIRES*HZ; | ||
122 | add_timer(&x0->timer); | ||
123 | xfrm_state_hold(x0); | ||
124 | list_add_tail(&x0->bydst, xfrm4_state_afinfo.state_bydst+h); | ||
125 | wake_up(&km_waitq); | ||
126 | } | ||
127 | if (x0) | ||
128 | xfrm_state_hold(x0); | ||
129 | return x0; | ||
130 | } | ||
131 | |||
132 | static struct xfrm_state_afinfo xfrm4_state_afinfo = { | 50 | static struct xfrm_state_afinfo xfrm4_state_afinfo = { |
133 | .family = AF_INET, | 51 | .family = AF_INET, |
134 | .init_flags = xfrm4_init_flags, | 52 | .init_flags = xfrm4_init_flags, |
135 | .init_tempsel = __xfrm4_init_tempsel, | 53 | .init_tempsel = __xfrm4_init_tempsel, |
136 | .state_lookup = __xfrm4_state_lookup, | ||
137 | .find_acq = __xfrm4_find_acq, | ||
138 | }; | 54 | }; |
139 | 55 | ||
140 | void __init xfrm4_state_init(void) | 56 | void __init xfrm4_state_init(void) |
diff --git a/net/ipv4/xfrm4_tunnel.c b/net/ipv4/xfrm4_tunnel.c index f8ceaa127c83..f110af5b1319 100644 --- a/net/ipv4/xfrm4_tunnel.c +++ b/net/ipv4/xfrm4_tunnel.c | |||
@@ -28,7 +28,7 @@ static int ipip_xfrm_rcv(struct xfrm_state *x, struct sk_buff *skb) | |||
28 | 28 | ||
29 | static int ipip_init_state(struct xfrm_state *x) | 29 | static int ipip_init_state(struct xfrm_state *x) |
30 | { | 30 | { |
31 | if (!x->props.mode) | 31 | if (x->props.mode != XFRM_MODE_TUNNEL) |
32 | return -EINVAL; | 32 | return -EINVAL; |
33 | 33 | ||
34 | if (x->encap) | 34 | if (x->encap) |
diff --git a/net/ipv6/Kconfig b/net/ipv6/Kconfig index 0ba06c0c5d39..a2d211da2aba 100644 --- a/net/ipv6/Kconfig +++ b/net/ipv6/Kconfig | |||
@@ -98,6 +98,15 @@ config INET6_IPCOMP | |||
98 | 98 | ||
99 | If unsure, say Y. | 99 | If unsure, say Y. |
100 | 100 | ||
101 | config IPV6_MIP6 | ||
102 | bool "IPv6: Mobility (EXPERIMENTAL)" | ||
103 | depends on IPV6 && EXPERIMENTAL | ||
104 | select XFRM | ||
105 | ---help--- | ||
106 | Support for IPv6 Mobility described in RFC 3775. | ||
107 | |||
108 | If unsure, say N. | ||
109 | |||
101 | config INET6_XFRM_TUNNEL | 110 | config INET6_XFRM_TUNNEL |
102 | tristate | 111 | tristate |
103 | select INET6_TUNNEL | 112 | select INET6_TUNNEL |
@@ -127,6 +136,13 @@ config INET6_XFRM_MODE_TUNNEL | |||
127 | 136 | ||
128 | If unsure, say Y. | 137 | If unsure, say Y. |
129 | 138 | ||
139 | config INET6_XFRM_MODE_ROUTEOPTIMIZATION | ||
140 | tristate "IPv6: MIPv6 route optimization mode (EXPERIMENTAL)" | ||
141 | depends on IPV6 && EXPERIMENTAL | ||
142 | select XFRM | ||
143 | ---help--- | ||
144 | Support for MIPv6 route optimization mode. | ||
145 | |||
130 | config IPV6_TUNNEL | 146 | config IPV6_TUNNEL |
131 | tristate "IPv6: IPv6-in-IPv6 tunnel" | 147 | tristate "IPv6: IPv6-in-IPv6 tunnel" |
132 | select INET6_TUNNEL | 148 | select INET6_TUNNEL |
@@ -136,3 +152,31 @@ config IPV6_TUNNEL | |||
136 | 152 | ||
137 | If unsure, say N. | 153 | If unsure, say N. |
138 | 154 | ||
155 | config IPV6_SUBTREES | ||
156 | bool "IPv6: source address based routing" | ||
157 | depends on IPV6 && EXPERIMENTAL | ||
158 | ---help--- | ||
159 | Enable routing by source address or prefix. | ||
160 | |||
161 | The destination address is still the primary routing key, so mixing | ||
162 | normal and source prefix specific routes in the same routing table | ||
163 | may sometimes lead to unintended routing behavior. This can be | ||
164 | avoided by defining different routing tables for the normal and | ||
165 | source prefix specific routes. | ||
166 | |||
167 | If unsure, say N. | ||
168 | |||
169 | config IPV6_MULTIPLE_TABLES | ||
170 | bool "IPv6: Multiple Routing Tables" | ||
171 | depends on IPV6 && EXPERIMENTAL | ||
172 | select FIB_RULES | ||
173 | ---help--- | ||
174 | Support multiple routing tables. | ||
175 | |||
176 | config IPV6_ROUTE_FWMARK | ||
177 | bool "IPv6: use netfilter MARK value as routing key" | ||
178 | depends on IPV6_MULTIPLE_TABLES && NETFILTER | ||
179 | ---help--- | ||
180 | If you say Y here, you will be able to specify different routes for | ||
181 | packets with different mark values (see iptables(8), MARK target). | ||
182 | |||
diff --git a/net/ipv6/Makefile b/net/ipv6/Makefile index 386e0a626948..0213c6612b58 100644 --- a/net/ipv6/Makefile +++ b/net/ipv6/Makefile | |||
@@ -13,6 +13,9 @@ ipv6-objs := af_inet6.o anycast.o ip6_output.o ip6_input.o addrconf.o sit.o \ | |||
13 | ipv6-$(CONFIG_XFRM) += xfrm6_policy.o xfrm6_state.o xfrm6_input.o \ | 13 | ipv6-$(CONFIG_XFRM) += xfrm6_policy.o xfrm6_state.o xfrm6_input.o \ |
14 | xfrm6_output.o | 14 | xfrm6_output.o |
15 | ipv6-$(CONFIG_NETFILTER) += netfilter.o | 15 | ipv6-$(CONFIG_NETFILTER) += netfilter.o |
16 | ipv6-$(CONFIG_IPV6_MULTIPLE_TABLES) += fib6_rules.o | ||
17 | ipv6-$(CONFIG_IPV6_MIP6) += mip6.o | ||
18 | |||
16 | ipv6-objs += $(ipv6-y) | 19 | ipv6-objs += $(ipv6-y) |
17 | 20 | ||
18 | obj-$(CONFIG_INET6_AH) += ah6.o | 21 | obj-$(CONFIG_INET6_AH) += ah6.o |
@@ -22,6 +25,7 @@ obj-$(CONFIG_INET6_XFRM_TUNNEL) += xfrm6_tunnel.o | |||
22 | obj-$(CONFIG_INET6_TUNNEL) += tunnel6.o | 25 | obj-$(CONFIG_INET6_TUNNEL) += tunnel6.o |
23 | obj-$(CONFIG_INET6_XFRM_MODE_TRANSPORT) += xfrm6_mode_transport.o | 26 | obj-$(CONFIG_INET6_XFRM_MODE_TRANSPORT) += xfrm6_mode_transport.o |
24 | obj-$(CONFIG_INET6_XFRM_MODE_TUNNEL) += xfrm6_mode_tunnel.o | 27 | obj-$(CONFIG_INET6_XFRM_MODE_TUNNEL) += xfrm6_mode_tunnel.o |
28 | obj-$(CONFIG_INET6_XFRM_MODE_ROUTEOPTIMIZATION) += xfrm6_mode_ro.o | ||
25 | obj-$(CONFIG_NETFILTER) += netfilter/ | 29 | obj-$(CONFIG_NETFILTER) += netfilter/ |
26 | 30 | ||
27 | obj-$(CONFIG_IPV6_TUNNEL) += ip6_tunnel.o | 31 | obj-$(CONFIG_IPV6_TUNNEL) += ip6_tunnel.o |
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index c7852b38e03e..c18676352397 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c | |||
@@ -48,6 +48,7 @@ | |||
48 | #include <linux/net.h> | 48 | #include <linux/net.h> |
49 | #include <linux/in6.h> | 49 | #include <linux/in6.h> |
50 | #include <linux/netdevice.h> | 50 | #include <linux/netdevice.h> |
51 | #include <linux/if_addr.h> | ||
51 | #include <linux/if_arp.h> | 52 | #include <linux/if_arp.h> |
52 | #include <linux/if_arcnet.h> | 53 | #include <linux/if_arcnet.h> |
53 | #include <linux/if_infiniband.h> | 54 | #include <linux/if_infiniband.h> |
@@ -72,6 +73,7 @@ | |||
72 | #include <net/addrconf.h> | 73 | #include <net/addrconf.h> |
73 | #include <net/tcp.h> | 74 | #include <net/tcp.h> |
74 | #include <net/ip.h> | 75 | #include <net/ip.h> |
76 | #include <net/netlink.h> | ||
75 | #include <linux/if_tunnel.h> | 77 | #include <linux/if_tunnel.h> |
76 | #include <linux/rtnetlink.h> | 78 | #include <linux/rtnetlink.h> |
77 | 79 | ||
@@ -117,9 +119,6 @@ static int ipv6_count_addresses(struct inet6_dev *idev); | |||
117 | static struct inet6_ifaddr *inet6_addr_lst[IN6_ADDR_HSIZE]; | 119 | static struct inet6_ifaddr *inet6_addr_lst[IN6_ADDR_HSIZE]; |
118 | static DEFINE_RWLOCK(addrconf_hash_lock); | 120 | static DEFINE_RWLOCK(addrconf_hash_lock); |
119 | 121 | ||
120 | /* Protects inet6 devices */ | ||
121 | DEFINE_RWLOCK(addrconf_lock); | ||
122 | |||
123 | static void addrconf_verify(unsigned long); | 122 | static void addrconf_verify(unsigned long); |
124 | 123 | ||
125 | static DEFINE_TIMER(addr_chk_timer, addrconf_verify, 0, 0); | 124 | static DEFINE_TIMER(addr_chk_timer, addrconf_verify, 0, 0); |
@@ -144,7 +143,7 @@ static int ipv6_chk_same_addr(const struct in6_addr *addr, struct net_device *de | |||
144 | 143 | ||
145 | static ATOMIC_NOTIFIER_HEAD(inet6addr_chain); | 144 | static ATOMIC_NOTIFIER_HEAD(inet6addr_chain); |
146 | 145 | ||
147 | struct ipv6_devconf ipv6_devconf = { | 146 | struct ipv6_devconf ipv6_devconf __read_mostly = { |
148 | .forwarding = 0, | 147 | .forwarding = 0, |
149 | .hop_limit = IPV6_DEFAULT_HOPLIMIT, | 148 | .hop_limit = IPV6_DEFAULT_HOPLIMIT, |
150 | .mtu6 = IPV6_MIN_MTU, | 149 | .mtu6 = IPV6_MIN_MTU, |
@@ -173,9 +172,10 @@ struct ipv6_devconf ipv6_devconf = { | |||
173 | .accept_ra_rt_info_max_plen = 0, | 172 | .accept_ra_rt_info_max_plen = 0, |
174 | #endif | 173 | #endif |
175 | #endif | 174 | #endif |
175 | .proxy_ndp = 0, | ||
176 | }; | 176 | }; |
177 | 177 | ||
178 | static struct ipv6_devconf ipv6_devconf_dflt = { | 178 | static struct ipv6_devconf ipv6_devconf_dflt __read_mostly = { |
179 | .forwarding = 0, | 179 | .forwarding = 0, |
180 | .hop_limit = IPV6_DEFAULT_HOPLIMIT, | 180 | .hop_limit = IPV6_DEFAULT_HOPLIMIT, |
181 | .mtu6 = IPV6_MIN_MTU, | 181 | .mtu6 = IPV6_MIN_MTU, |
@@ -203,6 +203,7 @@ static struct ipv6_devconf ipv6_devconf_dflt = { | |||
203 | .accept_ra_rt_info_max_plen = 0, | 203 | .accept_ra_rt_info_max_plen = 0, |
204 | #endif | 204 | #endif |
205 | #endif | 205 | #endif |
206 | .proxy_ndp = 0, | ||
206 | }; | 207 | }; |
207 | 208 | ||
208 | /* IPv6 Wildcard Address and Loopback Address defined by RFC2553 */ | 209 | /* IPv6 Wildcard Address and Loopback Address defined by RFC2553 */ |
@@ -314,6 +315,12 @@ static void addrconf_mod_timer(struct inet6_ifaddr *ifp, | |||
314 | 315 | ||
315 | /* Nobody refers to this device, we may destroy it. */ | 316 | /* Nobody refers to this device, we may destroy it. */ |
316 | 317 | ||
318 | static void in6_dev_finish_destroy_rcu(struct rcu_head *head) | ||
319 | { | ||
320 | struct inet6_dev *idev = container_of(head, struct inet6_dev, rcu); | ||
321 | kfree(idev); | ||
322 | } | ||
323 | |||
317 | void in6_dev_finish_destroy(struct inet6_dev *idev) | 324 | void in6_dev_finish_destroy(struct inet6_dev *idev) |
318 | { | 325 | { |
319 | struct net_device *dev = idev->dev; | 326 | struct net_device *dev = idev->dev; |
@@ -328,7 +335,7 @@ void in6_dev_finish_destroy(struct inet6_dev *idev) | |||
328 | return; | 335 | return; |
329 | } | 336 | } |
330 | snmp6_free_dev(idev); | 337 | snmp6_free_dev(idev); |
331 | kfree(idev); | 338 | call_rcu(&idev->rcu, in6_dev_finish_destroy_rcu); |
332 | } | 339 | } |
333 | 340 | ||
334 | static struct inet6_dev * ipv6_add_dev(struct net_device *dev) | 341 | static struct inet6_dev * ipv6_add_dev(struct net_device *dev) |
@@ -404,9 +411,8 @@ static struct inet6_dev * ipv6_add_dev(struct net_device *dev) | |||
404 | if (netif_carrier_ok(dev)) | 411 | if (netif_carrier_ok(dev)) |
405 | ndev->if_flags |= IF_READY; | 412 | ndev->if_flags |= IF_READY; |
406 | 413 | ||
407 | write_lock_bh(&addrconf_lock); | 414 | /* protected by rtnl_lock */ |
408 | dev->ip6_ptr = ndev; | 415 | rcu_assign_pointer(dev->ip6_ptr, ndev); |
409 | write_unlock_bh(&addrconf_lock); | ||
410 | 416 | ||
411 | ipv6_mc_init_dev(ndev); | 417 | ipv6_mc_init_dev(ndev); |
412 | ndev->tstamp = jiffies; | 418 | ndev->tstamp = jiffies; |
@@ -470,7 +476,7 @@ static void addrconf_forward_change(void) | |||
470 | 476 | ||
471 | read_lock(&dev_base_lock); | 477 | read_lock(&dev_base_lock); |
472 | for (dev=dev_base; dev; dev=dev->next) { | 478 | for (dev=dev_base; dev; dev=dev->next) { |
473 | read_lock(&addrconf_lock); | 479 | rcu_read_lock(); |
474 | idev = __in6_dev_get(dev); | 480 | idev = __in6_dev_get(dev); |
475 | if (idev) { | 481 | if (idev) { |
476 | int changed = (!idev->cnf.forwarding) ^ (!ipv6_devconf.forwarding); | 482 | int changed = (!idev->cnf.forwarding) ^ (!ipv6_devconf.forwarding); |
@@ -478,7 +484,7 @@ static void addrconf_forward_change(void) | |||
478 | if (changed) | 484 | if (changed) |
479 | dev_forward_change(idev); | 485 | dev_forward_change(idev); |
480 | } | 486 | } |
481 | read_unlock(&addrconf_lock); | 487 | rcu_read_unlock(); |
482 | } | 488 | } |
483 | read_unlock(&dev_base_lock); | 489 | read_unlock(&dev_base_lock); |
484 | } | 490 | } |
@@ -539,7 +545,7 @@ ipv6_add_addr(struct inet6_dev *idev, const struct in6_addr *addr, int pfxlen, | |||
539 | int hash; | 545 | int hash; |
540 | int err = 0; | 546 | int err = 0; |
541 | 547 | ||
542 | read_lock_bh(&addrconf_lock); | 548 | rcu_read_lock_bh(); |
543 | if (idev->dead) { | 549 | if (idev->dead) { |
544 | err = -ENODEV; /*XXX*/ | 550 | err = -ENODEV; /*XXX*/ |
545 | goto out2; | 551 | goto out2; |
@@ -608,7 +614,7 @@ ipv6_add_addr(struct inet6_dev *idev, const struct in6_addr *addr, int pfxlen, | |||
608 | in6_ifa_hold(ifa); | 614 | in6_ifa_hold(ifa); |
609 | write_unlock(&idev->lock); | 615 | write_unlock(&idev->lock); |
610 | out2: | 616 | out2: |
611 | read_unlock_bh(&addrconf_lock); | 617 | rcu_read_unlock_bh(); |
612 | 618 | ||
613 | if (likely(err == 0)) | 619 | if (likely(err == 0)) |
614 | atomic_notifier_call_chain(&inet6addr_chain, NETDEV_UP, ifa); | 620 | atomic_notifier_call_chain(&inet6addr_chain, NETDEV_UP, ifa); |
@@ -734,7 +740,7 @@ static void ipv6_del_addr(struct inet6_ifaddr *ifp) | |||
734 | 740 | ||
735 | if (rt && ((rt->rt6i_flags & (RTF_GATEWAY | RTF_DEFAULT)) == 0)) { | 741 | if (rt && ((rt->rt6i_flags & (RTF_GATEWAY | RTF_DEFAULT)) == 0)) { |
736 | if (onlink == 0) { | 742 | if (onlink == 0) { |
737 | ip6_del_rt(rt, NULL, NULL, NULL); | 743 | ip6_del_rt(rt); |
738 | rt = NULL; | 744 | rt = NULL; |
739 | } else if (!(rt->rt6i_flags & RTF_EXPIRES)) { | 745 | } else if (!(rt->rt6i_flags & RTF_EXPIRES)) { |
740 | rt->rt6i_expires = expires; | 746 | rt->rt6i_expires = expires; |
@@ -911,7 +917,7 @@ int ipv6_dev_get_saddr(struct net_device *daddr_dev, | |||
911 | memset(&hiscore, 0, sizeof(hiscore)); | 917 | memset(&hiscore, 0, sizeof(hiscore)); |
912 | 918 | ||
913 | read_lock(&dev_base_lock); | 919 | read_lock(&dev_base_lock); |
914 | read_lock(&addrconf_lock); | 920 | rcu_read_lock(); |
915 | 921 | ||
916 | for (dev = dev_base; dev; dev=dev->next) { | 922 | for (dev = dev_base; dev; dev=dev->next) { |
917 | struct inet6_dev *idev; | 923 | struct inet6_dev *idev; |
@@ -1032,9 +1038,27 @@ int ipv6_dev_get_saddr(struct net_device *daddr_dev, | |||
1032 | continue; | 1038 | continue; |
1033 | } | 1039 | } |
1034 | 1040 | ||
1035 | /* Rule 4: Prefer home address -- not implemented yet */ | 1041 | /* Rule 4: Prefer home address */ |
1042 | #ifdef CONFIG_IPV6_MIP6 | ||
1043 | if (hiscore.rule < 4) { | ||
1044 | if (ifa_result->flags & IFA_F_HOMEADDRESS) | ||
1045 | hiscore.attrs |= IPV6_SADDR_SCORE_HOA; | ||
1046 | hiscore.rule++; | ||
1047 | } | ||
1048 | if (ifa->flags & IFA_F_HOMEADDRESS) { | ||
1049 | score.attrs |= IPV6_SADDR_SCORE_HOA; | ||
1050 | if (!(ifa_result->flags & IFA_F_HOMEADDRESS)) { | ||
1051 | score.rule = 4; | ||
1052 | goto record_it; | ||
1053 | } | ||
1054 | } else { | ||
1055 | if (hiscore.attrs & IPV6_SADDR_SCORE_HOA) | ||
1056 | continue; | ||
1057 | } | ||
1058 | #else | ||
1036 | if (hiscore.rule < 4) | 1059 | if (hiscore.rule < 4) |
1037 | hiscore.rule++; | 1060 | hiscore.rule++; |
1061 | #endif | ||
1038 | 1062 | ||
1039 | /* Rule 5: Prefer outgoing interface */ | 1063 | /* Rule 5: Prefer outgoing interface */ |
1040 | if (hiscore.rule < 5) { | 1064 | if (hiscore.rule < 5) { |
@@ -1123,7 +1147,7 @@ record_it: | |||
1123 | } | 1147 | } |
1124 | read_unlock_bh(&idev->lock); | 1148 | read_unlock_bh(&idev->lock); |
1125 | } | 1149 | } |
1126 | read_unlock(&addrconf_lock); | 1150 | rcu_read_unlock(); |
1127 | read_unlock(&dev_base_lock); | 1151 | read_unlock(&dev_base_lock); |
1128 | 1152 | ||
1129 | if (!ifa_result) | 1153 | if (!ifa_result) |
@@ -1147,7 +1171,7 @@ int ipv6_get_lladdr(struct net_device *dev, struct in6_addr *addr) | |||
1147 | struct inet6_dev *idev; | 1171 | struct inet6_dev *idev; |
1148 | int err = -EADDRNOTAVAIL; | 1172 | int err = -EADDRNOTAVAIL; |
1149 | 1173 | ||
1150 | read_lock(&addrconf_lock); | 1174 | rcu_read_lock(); |
1151 | if ((idev = __in6_dev_get(dev)) != NULL) { | 1175 | if ((idev = __in6_dev_get(dev)) != NULL) { |
1152 | struct inet6_ifaddr *ifp; | 1176 | struct inet6_ifaddr *ifp; |
1153 | 1177 | ||
@@ -1161,7 +1185,7 @@ int ipv6_get_lladdr(struct net_device *dev, struct in6_addr *addr) | |||
1161 | } | 1185 | } |
1162 | read_unlock_bh(&idev->lock); | 1186 | read_unlock_bh(&idev->lock); |
1163 | } | 1187 | } |
1164 | read_unlock(&addrconf_lock); | 1188 | rcu_read_unlock(); |
1165 | return err; | 1189 | return err; |
1166 | } | 1190 | } |
1167 | 1191 | ||
@@ -1462,7 +1486,7 @@ static void ipv6_regen_rndid(unsigned long data) | |||
1462 | struct inet6_dev *idev = (struct inet6_dev *) data; | 1486 | struct inet6_dev *idev = (struct inet6_dev *) data; |
1463 | unsigned long expires; | 1487 | unsigned long expires; |
1464 | 1488 | ||
1465 | read_lock_bh(&addrconf_lock); | 1489 | rcu_read_lock_bh(); |
1466 | write_lock_bh(&idev->lock); | 1490 | write_lock_bh(&idev->lock); |
1467 | 1491 | ||
1468 | if (idev->dead) | 1492 | if (idev->dead) |
@@ -1486,7 +1510,7 @@ static void ipv6_regen_rndid(unsigned long data) | |||
1486 | 1510 | ||
1487 | out: | 1511 | out: |
1488 | write_unlock_bh(&idev->lock); | 1512 | write_unlock_bh(&idev->lock); |
1489 | read_unlock_bh(&addrconf_lock); | 1513 | rcu_read_unlock_bh(); |
1490 | in6_dev_put(idev); | 1514 | in6_dev_put(idev); |
1491 | } | 1515 | } |
1492 | 1516 | ||
@@ -1507,59 +1531,56 @@ static void | |||
1507 | addrconf_prefix_route(struct in6_addr *pfx, int plen, struct net_device *dev, | 1531 | addrconf_prefix_route(struct in6_addr *pfx, int plen, struct net_device *dev, |
1508 | unsigned long expires, u32 flags) | 1532 | unsigned long expires, u32 flags) |
1509 | { | 1533 | { |
1510 | struct in6_rtmsg rtmsg; | 1534 | struct fib6_config cfg = { |
1535 | .fc_table = RT6_TABLE_PREFIX, | ||
1536 | .fc_metric = IP6_RT_PRIO_ADDRCONF, | ||
1537 | .fc_ifindex = dev->ifindex, | ||
1538 | .fc_expires = expires, | ||
1539 | .fc_dst_len = plen, | ||
1540 | .fc_flags = RTF_UP | flags, | ||
1541 | }; | ||
1511 | 1542 | ||
1512 | memset(&rtmsg, 0, sizeof(rtmsg)); | 1543 | ipv6_addr_copy(&cfg.fc_dst, pfx); |
1513 | ipv6_addr_copy(&rtmsg.rtmsg_dst, pfx); | ||
1514 | rtmsg.rtmsg_dst_len = plen; | ||
1515 | rtmsg.rtmsg_metric = IP6_RT_PRIO_ADDRCONF; | ||
1516 | rtmsg.rtmsg_ifindex = dev->ifindex; | ||
1517 | rtmsg.rtmsg_info = expires; | ||
1518 | rtmsg.rtmsg_flags = RTF_UP|flags; | ||
1519 | rtmsg.rtmsg_type = RTMSG_NEWROUTE; | ||
1520 | 1544 | ||
1521 | /* Prevent useless cloning on PtP SIT. | 1545 | /* Prevent useless cloning on PtP SIT. |
1522 | This thing is done here expecting that the whole | 1546 | This thing is done here expecting that the whole |
1523 | class of non-broadcast devices need not cloning. | 1547 | class of non-broadcast devices need not cloning. |
1524 | */ | 1548 | */ |
1525 | if (dev->type == ARPHRD_SIT && (dev->flags&IFF_POINTOPOINT)) | 1549 | if (dev->type == ARPHRD_SIT && (dev->flags & IFF_POINTOPOINT)) |
1526 | rtmsg.rtmsg_flags |= RTF_NONEXTHOP; | 1550 | cfg.fc_flags |= RTF_NONEXTHOP; |
1527 | 1551 | ||
1528 | ip6_route_add(&rtmsg, NULL, NULL, NULL); | 1552 | ip6_route_add(&cfg); |
1529 | } | 1553 | } |
1530 | 1554 | ||
1531 | /* Create "default" multicast route to the interface */ | 1555 | /* Create "default" multicast route to the interface */ |
1532 | 1556 | ||
1533 | static void addrconf_add_mroute(struct net_device *dev) | 1557 | static void addrconf_add_mroute(struct net_device *dev) |
1534 | { | 1558 | { |
1535 | struct in6_rtmsg rtmsg; | 1559 | struct fib6_config cfg = { |
1560 | .fc_table = RT6_TABLE_LOCAL, | ||
1561 | .fc_metric = IP6_RT_PRIO_ADDRCONF, | ||
1562 | .fc_ifindex = dev->ifindex, | ||
1563 | .fc_dst_len = 8, | ||
1564 | .fc_flags = RTF_UP, | ||
1565 | }; | ||
1566 | |||
1567 | ipv6_addr_set(&cfg.fc_dst, htonl(0xFF000000), 0, 0, 0); | ||
1536 | 1568 | ||
1537 | memset(&rtmsg, 0, sizeof(rtmsg)); | 1569 | ip6_route_add(&cfg); |
1538 | ipv6_addr_set(&rtmsg.rtmsg_dst, | ||
1539 | htonl(0xFF000000), 0, 0, 0); | ||
1540 | rtmsg.rtmsg_dst_len = 8; | ||
1541 | rtmsg.rtmsg_metric = IP6_RT_PRIO_ADDRCONF; | ||
1542 | rtmsg.rtmsg_ifindex = dev->ifindex; | ||
1543 | rtmsg.rtmsg_flags = RTF_UP; | ||
1544 | rtmsg.rtmsg_type = RTMSG_NEWROUTE; | ||
1545 | ip6_route_add(&rtmsg, NULL, NULL, NULL); | ||
1546 | } | 1570 | } |
1547 | 1571 | ||
1548 | static void sit_route_add(struct net_device *dev) | 1572 | static void sit_route_add(struct net_device *dev) |
1549 | { | 1573 | { |
1550 | struct in6_rtmsg rtmsg; | 1574 | struct fib6_config cfg = { |
1551 | 1575 | .fc_table = RT6_TABLE_MAIN, | |
1552 | memset(&rtmsg, 0, sizeof(rtmsg)); | 1576 | .fc_metric = IP6_RT_PRIO_ADDRCONF, |
1553 | 1577 | .fc_ifindex = dev->ifindex, | |
1554 | rtmsg.rtmsg_type = RTMSG_NEWROUTE; | 1578 | .fc_dst_len = 96, |
1555 | rtmsg.rtmsg_metric = IP6_RT_PRIO_ADDRCONF; | 1579 | .fc_flags = RTF_UP | RTF_NONEXTHOP, |
1580 | }; | ||
1556 | 1581 | ||
1557 | /* prefix length - 96 bits "::d.d.d.d" */ | 1582 | /* prefix length - 96 bits "::d.d.d.d" */ |
1558 | rtmsg.rtmsg_dst_len = 96; | 1583 | ip6_route_add(&cfg); |
1559 | rtmsg.rtmsg_flags = RTF_UP|RTF_NONEXTHOP; | ||
1560 | rtmsg.rtmsg_ifindex = dev->ifindex; | ||
1561 | |||
1562 | ip6_route_add(&rtmsg, NULL, NULL, NULL); | ||
1563 | } | 1584 | } |
1564 | 1585 | ||
1565 | static void addrconf_add_lroute(struct net_device *dev) | 1586 | static void addrconf_add_lroute(struct net_device *dev) |
@@ -1660,7 +1681,7 @@ void addrconf_prefix_rcv(struct net_device *dev, u8 *opt, int len) | |||
1660 | if (rt && ((rt->rt6i_flags & (RTF_GATEWAY | RTF_DEFAULT)) == 0)) { | 1681 | if (rt && ((rt->rt6i_flags & (RTF_GATEWAY | RTF_DEFAULT)) == 0)) { |
1661 | if (rt->rt6i_flags&RTF_EXPIRES) { | 1682 | if (rt->rt6i_flags&RTF_EXPIRES) { |
1662 | if (valid_lft == 0) { | 1683 | if (valid_lft == 0) { |
1663 | ip6_del_rt(rt, NULL, NULL, NULL); | 1684 | ip6_del_rt(rt); |
1664 | rt = NULL; | 1685 | rt = NULL; |
1665 | } else { | 1686 | } else { |
1666 | rt->rt6i_expires = jiffies + rt_expires; | 1687 | rt->rt6i_expires = jiffies + rt_expires; |
@@ -1870,12 +1891,11 @@ err_exit: | |||
1870 | * Manual configuration of address on an interface | 1891 | * Manual configuration of address on an interface |
1871 | */ | 1892 | */ |
1872 | static int inet6_addr_add(int ifindex, struct in6_addr *pfx, int plen, | 1893 | static int inet6_addr_add(int ifindex, struct in6_addr *pfx, int plen, |
1873 | __u32 prefered_lft, __u32 valid_lft) | 1894 | __u8 ifa_flags, __u32 prefered_lft, __u32 valid_lft) |
1874 | { | 1895 | { |
1875 | struct inet6_ifaddr *ifp; | 1896 | struct inet6_ifaddr *ifp; |
1876 | struct inet6_dev *idev; | 1897 | struct inet6_dev *idev; |
1877 | struct net_device *dev; | 1898 | struct net_device *dev; |
1878 | __u8 ifa_flags = 0; | ||
1879 | int scope; | 1899 | int scope; |
1880 | 1900 | ||
1881 | ASSERT_RTNL(); | 1901 | ASSERT_RTNL(); |
@@ -1887,9 +1907,6 @@ static int inet6_addr_add(int ifindex, struct in6_addr *pfx, int plen, | |||
1887 | if ((dev = __dev_get_by_index(ifindex)) == NULL) | 1907 | if ((dev = __dev_get_by_index(ifindex)) == NULL) |
1888 | return -ENODEV; | 1908 | return -ENODEV; |
1889 | 1909 | ||
1890 | if (!(dev->flags&IFF_UP)) | ||
1891 | return -ENETDOWN; | ||
1892 | |||
1893 | if ((idev = addrconf_add_dev(dev)) == NULL) | 1910 | if ((idev = addrconf_add_dev(dev)) == NULL) |
1894 | return -ENOBUFS; | 1911 | return -ENOBUFS; |
1895 | 1912 | ||
@@ -1971,7 +1988,7 @@ int addrconf_add_ifaddr(void __user *arg) | |||
1971 | 1988 | ||
1972 | rtnl_lock(); | 1989 | rtnl_lock(); |
1973 | err = inet6_addr_add(ireq.ifr6_ifindex, &ireq.ifr6_addr, ireq.ifr6_prefixlen, | 1990 | err = inet6_addr_add(ireq.ifr6_ifindex, &ireq.ifr6_addr, ireq.ifr6_prefixlen, |
1974 | INFINITY_LIFE_TIME, INFINITY_LIFE_TIME); | 1991 | IFA_F_PERMANENT, INFINITY_LIFE_TIME, INFINITY_LIFE_TIME); |
1975 | rtnl_unlock(); | 1992 | rtnl_unlock(); |
1976 | return err; | 1993 | return err; |
1977 | } | 1994 | } |
@@ -2344,10 +2361,10 @@ static int addrconf_ifdown(struct net_device *dev, int how) | |||
2344 | Do not dev_put! | 2361 | Do not dev_put! |
2345 | */ | 2362 | */ |
2346 | if (how == 1) { | 2363 | if (how == 1) { |
2347 | write_lock_bh(&addrconf_lock); | ||
2348 | dev->ip6_ptr = NULL; | ||
2349 | idev->dead = 1; | 2364 | idev->dead = 1; |
2350 | write_unlock_bh(&addrconf_lock); | 2365 | |
2366 | /* protected by rtnl_lock */ | ||
2367 | rcu_assign_pointer(dev->ip6_ptr, NULL); | ||
2351 | 2368 | ||
2352 | /* Step 1.5: remove snmp6 entry */ | 2369 | /* Step 1.5: remove snmp6 entry */ |
2353 | snmp6_unregister_dev(idev); | 2370 | snmp6_unregister_dev(idev); |
@@ -2514,7 +2531,8 @@ static void addrconf_dad_start(struct inet6_ifaddr *ifp, u32 flags) | |||
2514 | spin_lock_bh(&ifp->lock); | 2531 | spin_lock_bh(&ifp->lock); |
2515 | 2532 | ||
2516 | if (dev->flags&(IFF_NOARP|IFF_LOOPBACK) || | 2533 | if (dev->flags&(IFF_NOARP|IFF_LOOPBACK) || |
2517 | !(ifp->flags&IFA_F_TENTATIVE)) { | 2534 | !(ifp->flags&IFA_F_TENTATIVE) || |
2535 | ifp->flags & IFA_F_NODAD) { | ||
2518 | ifp->flags &= ~IFA_F_TENTATIVE; | 2536 | ifp->flags &= ~IFA_F_TENTATIVE; |
2519 | spin_unlock_bh(&ifp->lock); | 2537 | spin_unlock_bh(&ifp->lock); |
2520 | read_unlock_bh(&idev->lock); | 2538 | read_unlock_bh(&idev->lock); |
@@ -2759,6 +2777,26 @@ void if6_proc_exit(void) | |||
2759 | } | 2777 | } |
2760 | #endif /* CONFIG_PROC_FS */ | 2778 | #endif /* CONFIG_PROC_FS */ |
2761 | 2779 | ||
2780 | #ifdef CONFIG_IPV6_MIP6 | ||
2781 | /* Check if address is a home address configured on any interface. */ | ||
2782 | int ipv6_chk_home_addr(struct in6_addr *addr) | ||
2783 | { | ||
2784 | int ret = 0; | ||
2785 | struct inet6_ifaddr * ifp; | ||
2786 | u8 hash = ipv6_addr_hash(addr); | ||
2787 | read_lock_bh(&addrconf_hash_lock); | ||
2788 | for (ifp = inet6_addr_lst[hash]; ifp; ifp = ifp->lst_next) { | ||
2789 | if (ipv6_addr_cmp(&ifp->addr, addr) == 0 && | ||
2790 | (ifp->flags & IFA_F_HOMEADDRESS)) { | ||
2791 | ret = 1; | ||
2792 | break; | ||
2793 | } | ||
2794 | } | ||
2795 | read_unlock_bh(&addrconf_hash_lock); | ||
2796 | return ret; | ||
2797 | } | ||
2798 | #endif | ||
2799 | |||
2762 | /* | 2800 | /* |
2763 | * Periodic address status verification | 2801 | * Periodic address status verification |
2764 | */ | 2802 | */ |
@@ -2869,66 +2907,68 @@ restart: | |||
2869 | spin_unlock_bh(&addrconf_verify_lock); | 2907 | spin_unlock_bh(&addrconf_verify_lock); |
2870 | } | 2908 | } |
2871 | 2909 | ||
2910 | static struct in6_addr *extract_addr(struct nlattr *addr, struct nlattr *local) | ||
2911 | { | ||
2912 | struct in6_addr *pfx = NULL; | ||
2913 | |||
2914 | if (addr) | ||
2915 | pfx = nla_data(addr); | ||
2916 | |||
2917 | if (local) { | ||
2918 | if (pfx && nla_memcmp(local, pfx, sizeof(*pfx))) | ||
2919 | pfx = NULL; | ||
2920 | else | ||
2921 | pfx = nla_data(local); | ||
2922 | } | ||
2923 | |||
2924 | return pfx; | ||
2925 | } | ||
2926 | |||
2927 | static struct nla_policy ifa_ipv6_policy[IFA_MAX+1] __read_mostly = { | ||
2928 | [IFA_ADDRESS] = { .len = sizeof(struct in6_addr) }, | ||
2929 | [IFA_LOCAL] = { .len = sizeof(struct in6_addr) }, | ||
2930 | [IFA_CACHEINFO] = { .len = sizeof(struct ifa_cacheinfo) }, | ||
2931 | }; | ||
2932 | |||
2872 | static int | 2933 | static int |
2873 | inet6_rtm_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) | 2934 | inet6_rtm_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) |
2874 | { | 2935 | { |
2875 | struct rtattr **rta = arg; | 2936 | struct ifaddrmsg *ifm; |
2876 | struct ifaddrmsg *ifm = NLMSG_DATA(nlh); | 2937 | struct nlattr *tb[IFA_MAX+1]; |
2877 | struct in6_addr *pfx; | 2938 | struct in6_addr *pfx; |
2939 | int err; | ||
2878 | 2940 | ||
2879 | pfx = NULL; | 2941 | err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv6_policy); |
2880 | if (rta[IFA_ADDRESS-1]) { | 2942 | if (err < 0) |
2881 | if (RTA_PAYLOAD(rta[IFA_ADDRESS-1]) < sizeof(*pfx)) | 2943 | return err; |
2882 | return -EINVAL; | 2944 | |
2883 | pfx = RTA_DATA(rta[IFA_ADDRESS-1]); | 2945 | ifm = nlmsg_data(nlh); |
2884 | } | 2946 | pfx = extract_addr(tb[IFA_ADDRESS], tb[IFA_LOCAL]); |
2885 | if (rta[IFA_LOCAL-1]) { | ||
2886 | if (RTA_PAYLOAD(rta[IFA_LOCAL-1]) < sizeof(*pfx) || | ||
2887 | (pfx && memcmp(pfx, RTA_DATA(rta[IFA_LOCAL-1]), sizeof(*pfx)))) | ||
2888 | return -EINVAL; | ||
2889 | pfx = RTA_DATA(rta[IFA_LOCAL-1]); | ||
2890 | } | ||
2891 | if (pfx == NULL) | 2947 | if (pfx == NULL) |
2892 | return -EINVAL; | 2948 | return -EINVAL; |
2893 | 2949 | ||
2894 | return inet6_addr_del(ifm->ifa_index, pfx, ifm->ifa_prefixlen); | 2950 | return inet6_addr_del(ifm->ifa_index, pfx, ifm->ifa_prefixlen); |
2895 | } | 2951 | } |
2896 | 2952 | ||
2897 | static int | 2953 | static int inet6_addr_modify(struct inet6_ifaddr *ifp, u8 ifa_flags, |
2898 | inet6_addr_modify(int ifindex, struct in6_addr *pfx, | 2954 | u32 prefered_lft, u32 valid_lft) |
2899 | __u32 prefered_lft, __u32 valid_lft) | ||
2900 | { | 2955 | { |
2901 | struct inet6_ifaddr *ifp = NULL; | ||
2902 | struct net_device *dev; | ||
2903 | int ifa_flags = 0; | ||
2904 | |||
2905 | if ((dev = __dev_get_by_index(ifindex)) == NULL) | ||
2906 | return -ENODEV; | ||
2907 | |||
2908 | if (!(dev->flags&IFF_UP)) | ||
2909 | return -ENETDOWN; | ||
2910 | |||
2911 | if (!valid_lft || (prefered_lft > valid_lft)) | 2956 | if (!valid_lft || (prefered_lft > valid_lft)) |
2912 | return -EINVAL; | 2957 | return -EINVAL; |
2913 | 2958 | ||
2914 | ifp = ipv6_get_ifaddr(pfx, dev, 1); | ||
2915 | if (ifp == NULL) | ||
2916 | return -ENOENT; | ||
2917 | |||
2918 | if (valid_lft == INFINITY_LIFE_TIME) | 2959 | if (valid_lft == INFINITY_LIFE_TIME) |
2919 | ifa_flags = IFA_F_PERMANENT; | 2960 | ifa_flags |= IFA_F_PERMANENT; |
2920 | else if (valid_lft >= 0x7FFFFFFF/HZ) | 2961 | else if (valid_lft >= 0x7FFFFFFF/HZ) |
2921 | valid_lft = 0x7FFFFFFF/HZ; | 2962 | valid_lft = 0x7FFFFFFF/HZ; |
2922 | 2963 | ||
2923 | if (prefered_lft == 0) | 2964 | if (prefered_lft == 0) |
2924 | ifa_flags = IFA_F_DEPRECATED; | 2965 | ifa_flags |= IFA_F_DEPRECATED; |
2925 | else if ((prefered_lft >= 0x7FFFFFFF/HZ) && | 2966 | else if ((prefered_lft >= 0x7FFFFFFF/HZ) && |
2926 | (prefered_lft != INFINITY_LIFE_TIME)) | 2967 | (prefered_lft != INFINITY_LIFE_TIME)) |
2927 | prefered_lft = 0x7FFFFFFF/HZ; | 2968 | prefered_lft = 0x7FFFFFFF/HZ; |
2928 | 2969 | ||
2929 | spin_lock_bh(&ifp->lock); | 2970 | spin_lock_bh(&ifp->lock); |
2930 | ifp->flags = (ifp->flags & ~(IFA_F_DEPRECATED|IFA_F_PERMANENT)) | ifa_flags; | 2971 | ifp->flags = (ifp->flags & ~(IFA_F_DEPRECATED | IFA_F_PERMANENT | IFA_F_NODAD | IFA_F_HOMEADDRESS)) | ifa_flags; |
2931 | |||
2932 | ifp->tstamp = jiffies; | 2972 | ifp->tstamp = jiffies; |
2933 | ifp->valid_lft = valid_lft; | 2973 | ifp->valid_lft = valid_lft; |
2934 | ifp->prefered_lft = prefered_lft; | 2974 | ifp->prefered_lft = prefered_lft; |
@@ -2936,7 +2976,6 @@ inet6_addr_modify(int ifindex, struct in6_addr *pfx, | |||
2936 | spin_unlock_bh(&ifp->lock); | 2976 | spin_unlock_bh(&ifp->lock); |
2937 | if (!(ifp->flags&IFA_F_TENTATIVE)) | 2977 | if (!(ifp->flags&IFA_F_TENTATIVE)) |
2938 | ipv6_ifa_notify(0, ifp); | 2978 | ipv6_ifa_notify(0, ifp); |
2939 | in6_ifa_put(ifp); | ||
2940 | 2979 | ||
2941 | addrconf_verify(0); | 2980 | addrconf_verify(0); |
2942 | 2981 | ||
@@ -2946,172 +2985,189 @@ inet6_addr_modify(int ifindex, struct in6_addr *pfx, | |||
2946 | static int | 2985 | static int |
2947 | inet6_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) | 2986 | inet6_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) |
2948 | { | 2987 | { |
2949 | struct rtattr **rta = arg; | 2988 | struct ifaddrmsg *ifm; |
2950 | struct ifaddrmsg *ifm = NLMSG_DATA(nlh); | 2989 | struct nlattr *tb[IFA_MAX+1]; |
2951 | struct in6_addr *pfx; | 2990 | struct in6_addr *pfx; |
2952 | __u32 valid_lft = INFINITY_LIFE_TIME, prefered_lft = INFINITY_LIFE_TIME; | 2991 | struct inet6_ifaddr *ifa; |
2992 | struct net_device *dev; | ||
2993 | u32 valid_lft = INFINITY_LIFE_TIME, preferred_lft = INFINITY_LIFE_TIME; | ||
2994 | u8 ifa_flags; | ||
2995 | int err; | ||
2953 | 2996 | ||
2954 | pfx = NULL; | 2997 | err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv6_policy); |
2955 | if (rta[IFA_ADDRESS-1]) { | 2998 | if (err < 0) |
2956 | if (RTA_PAYLOAD(rta[IFA_ADDRESS-1]) < sizeof(*pfx)) | 2999 | return err; |
2957 | return -EINVAL; | 3000 | |
2958 | pfx = RTA_DATA(rta[IFA_ADDRESS-1]); | 3001 | ifm = nlmsg_data(nlh); |
2959 | } | 3002 | pfx = extract_addr(tb[IFA_ADDRESS], tb[IFA_LOCAL]); |
2960 | if (rta[IFA_LOCAL-1]) { | ||
2961 | if (RTA_PAYLOAD(rta[IFA_LOCAL-1]) < sizeof(*pfx) || | ||
2962 | (pfx && memcmp(pfx, RTA_DATA(rta[IFA_LOCAL-1]), sizeof(*pfx)))) | ||
2963 | return -EINVAL; | ||
2964 | pfx = RTA_DATA(rta[IFA_LOCAL-1]); | ||
2965 | } | ||
2966 | if (pfx == NULL) | 3003 | if (pfx == NULL) |
2967 | return -EINVAL; | 3004 | return -EINVAL; |
2968 | 3005 | ||
2969 | if (rta[IFA_CACHEINFO-1]) { | 3006 | if (tb[IFA_CACHEINFO]) { |
2970 | struct ifa_cacheinfo *ci; | 3007 | struct ifa_cacheinfo *ci; |
2971 | if (RTA_PAYLOAD(rta[IFA_CACHEINFO-1]) < sizeof(*ci)) | 3008 | |
2972 | return -EINVAL; | 3009 | ci = nla_data(tb[IFA_CACHEINFO]); |
2973 | ci = RTA_DATA(rta[IFA_CACHEINFO-1]); | ||
2974 | valid_lft = ci->ifa_valid; | 3010 | valid_lft = ci->ifa_valid; |
2975 | prefered_lft = ci->ifa_prefered; | 3011 | preferred_lft = ci->ifa_prefered; |
3012 | } else { | ||
3013 | preferred_lft = INFINITY_LIFE_TIME; | ||
3014 | valid_lft = INFINITY_LIFE_TIME; | ||
2976 | } | 3015 | } |
2977 | 3016 | ||
2978 | if (nlh->nlmsg_flags & NLM_F_REPLACE) { | 3017 | dev = __dev_get_by_index(ifm->ifa_index); |
2979 | int ret; | 3018 | if (dev == NULL) |
2980 | ret = inet6_addr_modify(ifm->ifa_index, pfx, | 3019 | return -ENODEV; |
2981 | prefered_lft, valid_lft); | 3020 | |
2982 | if (ret == 0 || !(nlh->nlmsg_flags & NLM_F_CREATE)) | 3021 | /* We ignore other flags so far. */ |
2983 | return ret; | 3022 | ifa_flags = ifm->ifa_flags & (IFA_F_NODAD | IFA_F_HOMEADDRESS); |
3023 | |||
3024 | ifa = ipv6_get_ifaddr(pfx, dev, 1); | ||
3025 | if (ifa == NULL) { | ||
3026 | /* | ||
3027 | * It would be best to check for !NLM_F_CREATE here but | ||
3028 | * userspace alreay relies on not having to provide this. | ||
3029 | */ | ||
3030 | return inet6_addr_add(ifm->ifa_index, pfx, ifm->ifa_prefixlen, | ||
3031 | ifa_flags, preferred_lft, valid_lft); | ||
2984 | } | 3032 | } |
2985 | 3033 | ||
2986 | return inet6_addr_add(ifm->ifa_index, pfx, ifm->ifa_prefixlen, | 3034 | if (nlh->nlmsg_flags & NLM_F_EXCL || |
2987 | prefered_lft, valid_lft); | 3035 | !(nlh->nlmsg_flags & NLM_F_REPLACE)) |
3036 | err = -EEXIST; | ||
3037 | else | ||
3038 | err = inet6_addr_modify(ifa, ifa_flags, preferred_lft, valid_lft); | ||
3039 | |||
3040 | in6_ifa_put(ifa); | ||
3041 | |||
3042 | return err; | ||
3043 | } | ||
3044 | |||
3045 | static void put_ifaddrmsg(struct nlmsghdr *nlh, u8 prefixlen, u8 flags, | ||
3046 | u8 scope, int ifindex) | ||
3047 | { | ||
3048 | struct ifaddrmsg *ifm; | ||
2988 | 3049 | ||
3050 | ifm = nlmsg_data(nlh); | ||
3051 | ifm->ifa_family = AF_INET6; | ||
3052 | ifm->ifa_prefixlen = prefixlen; | ||
3053 | ifm->ifa_flags = flags; | ||
3054 | ifm->ifa_scope = scope; | ||
3055 | ifm->ifa_index = ifindex; | ||
2989 | } | 3056 | } |
2990 | 3057 | ||
2991 | /* Maximum length of ifa_cacheinfo attributes */ | 3058 | static int put_cacheinfo(struct sk_buff *skb, unsigned long cstamp, |
2992 | #define INET6_IFADDR_RTA_SPACE \ | 3059 | unsigned long tstamp, u32 preferred, u32 valid) |
2993 | RTA_SPACE(16) /* IFA_ADDRESS */ + \ | 3060 | { |
2994 | RTA_SPACE(sizeof(struct ifa_cacheinfo)) /* CACHEINFO */ | 3061 | struct ifa_cacheinfo ci; |
3062 | |||
3063 | ci.cstamp = (u32)(TIME_DELTA(cstamp, INITIAL_JIFFIES) / HZ * 100 | ||
3064 | + TIME_DELTA(cstamp, INITIAL_JIFFIES) % HZ * 100 / HZ); | ||
3065 | ci.tstamp = (u32)(TIME_DELTA(tstamp, INITIAL_JIFFIES) / HZ * 100 | ||
3066 | + TIME_DELTA(tstamp, INITIAL_JIFFIES) % HZ * 100 / HZ); | ||
3067 | ci.ifa_prefered = preferred; | ||
3068 | ci.ifa_valid = valid; | ||
3069 | |||
3070 | return nla_put(skb, IFA_CACHEINFO, sizeof(ci), &ci); | ||
3071 | } | ||
3072 | |||
3073 | static inline int rt_scope(int ifa_scope) | ||
3074 | { | ||
3075 | if (ifa_scope & IFA_HOST) | ||
3076 | return RT_SCOPE_HOST; | ||
3077 | else if (ifa_scope & IFA_LINK) | ||
3078 | return RT_SCOPE_LINK; | ||
3079 | else if (ifa_scope & IFA_SITE) | ||
3080 | return RT_SCOPE_SITE; | ||
3081 | else | ||
3082 | return RT_SCOPE_UNIVERSE; | ||
3083 | } | ||
3084 | |||
3085 | static inline int inet6_ifaddr_msgsize(void) | ||
3086 | { | ||
3087 | return nlmsg_total_size(sizeof(struct ifaddrmsg) + | ||
3088 | nla_total_size(16) + | ||
3089 | nla_total_size(sizeof(struct ifa_cacheinfo)) + | ||
3090 | 128); | ||
3091 | } | ||
2995 | 3092 | ||
2996 | static int inet6_fill_ifaddr(struct sk_buff *skb, struct inet6_ifaddr *ifa, | 3093 | static int inet6_fill_ifaddr(struct sk_buff *skb, struct inet6_ifaddr *ifa, |
2997 | u32 pid, u32 seq, int event, unsigned int flags) | 3094 | u32 pid, u32 seq, int event, unsigned int flags) |
2998 | { | 3095 | { |
2999 | struct ifaddrmsg *ifm; | ||
3000 | struct nlmsghdr *nlh; | 3096 | struct nlmsghdr *nlh; |
3001 | struct ifa_cacheinfo ci; | 3097 | u32 preferred, valid; |
3002 | unsigned char *b = skb->tail; | 3098 | |
3099 | nlh = nlmsg_put(skb, pid, seq, event, sizeof(struct ifaddrmsg), flags); | ||
3100 | if (nlh == NULL) | ||
3101 | return -ENOBUFS; | ||
3102 | |||
3103 | put_ifaddrmsg(nlh, ifa->prefix_len, ifa->flags, rt_scope(ifa->scope), | ||
3104 | ifa->idev->dev->ifindex); | ||
3003 | 3105 | ||
3004 | nlh = NLMSG_NEW(skb, pid, seq, event, sizeof(*ifm), flags); | ||
3005 | ifm = NLMSG_DATA(nlh); | ||
3006 | ifm->ifa_family = AF_INET6; | ||
3007 | ifm->ifa_prefixlen = ifa->prefix_len; | ||
3008 | ifm->ifa_flags = ifa->flags; | ||
3009 | ifm->ifa_scope = RT_SCOPE_UNIVERSE; | ||
3010 | if (ifa->scope&IFA_HOST) | ||
3011 | ifm->ifa_scope = RT_SCOPE_HOST; | ||
3012 | else if (ifa->scope&IFA_LINK) | ||
3013 | ifm->ifa_scope = RT_SCOPE_LINK; | ||
3014 | else if (ifa->scope&IFA_SITE) | ||
3015 | ifm->ifa_scope = RT_SCOPE_SITE; | ||
3016 | ifm->ifa_index = ifa->idev->dev->ifindex; | ||
3017 | RTA_PUT(skb, IFA_ADDRESS, 16, &ifa->addr); | ||
3018 | if (!(ifa->flags&IFA_F_PERMANENT)) { | 3106 | if (!(ifa->flags&IFA_F_PERMANENT)) { |
3019 | ci.ifa_prefered = ifa->prefered_lft; | 3107 | preferred = ifa->prefered_lft; |
3020 | ci.ifa_valid = ifa->valid_lft; | 3108 | valid = ifa->valid_lft; |
3021 | if (ci.ifa_prefered != INFINITY_LIFE_TIME) { | 3109 | if (preferred != INFINITY_LIFE_TIME) { |
3022 | long tval = (jiffies - ifa->tstamp)/HZ; | 3110 | long tval = (jiffies - ifa->tstamp)/HZ; |
3023 | ci.ifa_prefered -= tval; | 3111 | preferred -= tval; |
3024 | if (ci.ifa_valid != INFINITY_LIFE_TIME) | 3112 | if (valid != INFINITY_LIFE_TIME) |
3025 | ci.ifa_valid -= tval; | 3113 | valid -= tval; |
3026 | } | 3114 | } |
3027 | } else { | 3115 | } else { |
3028 | ci.ifa_prefered = INFINITY_LIFE_TIME; | 3116 | preferred = INFINITY_LIFE_TIME; |
3029 | ci.ifa_valid = INFINITY_LIFE_TIME; | 3117 | valid = INFINITY_LIFE_TIME; |
3030 | } | 3118 | } |
3031 | ci.cstamp = (__u32)(TIME_DELTA(ifa->cstamp, INITIAL_JIFFIES) / HZ * 100 | ||
3032 | + TIME_DELTA(ifa->cstamp, INITIAL_JIFFIES) % HZ * 100 / HZ); | ||
3033 | ci.tstamp = (__u32)(TIME_DELTA(ifa->tstamp, INITIAL_JIFFIES) / HZ * 100 | ||
3034 | + TIME_DELTA(ifa->tstamp, INITIAL_JIFFIES) % HZ * 100 / HZ); | ||
3035 | RTA_PUT(skb, IFA_CACHEINFO, sizeof(ci), &ci); | ||
3036 | nlh->nlmsg_len = skb->tail - b; | ||
3037 | return skb->len; | ||
3038 | 3119 | ||
3039 | nlmsg_failure: | 3120 | if (nla_put(skb, IFA_ADDRESS, 16, &ifa->addr) < 0 || |
3040 | rtattr_failure: | 3121 | put_cacheinfo(skb, ifa->cstamp, ifa->tstamp, preferred, valid) < 0) |
3041 | skb_trim(skb, b - skb->data); | 3122 | return nlmsg_cancel(skb, nlh); |
3042 | return -1; | 3123 | |
3124 | return nlmsg_end(skb, nlh); | ||
3043 | } | 3125 | } |
3044 | 3126 | ||
3045 | static int inet6_fill_ifmcaddr(struct sk_buff *skb, struct ifmcaddr6 *ifmca, | 3127 | static int inet6_fill_ifmcaddr(struct sk_buff *skb, struct ifmcaddr6 *ifmca, |
3046 | u32 pid, u32 seq, int event, u16 flags) | 3128 | u32 pid, u32 seq, int event, u16 flags) |
3047 | { | 3129 | { |
3048 | struct ifaddrmsg *ifm; | ||
3049 | struct nlmsghdr *nlh; | 3130 | struct nlmsghdr *nlh; |
3050 | struct ifa_cacheinfo ci; | 3131 | u8 scope = RT_SCOPE_UNIVERSE; |
3051 | unsigned char *b = skb->tail; | 3132 | int ifindex = ifmca->idev->dev->ifindex; |
3052 | |||
3053 | nlh = NLMSG_NEW(skb, pid, seq, event, sizeof(*ifm), flags); | ||
3054 | ifm = NLMSG_DATA(nlh); | ||
3055 | ifm->ifa_family = AF_INET6; | ||
3056 | ifm->ifa_prefixlen = 128; | ||
3057 | ifm->ifa_flags = IFA_F_PERMANENT; | ||
3058 | ifm->ifa_scope = RT_SCOPE_UNIVERSE; | ||
3059 | if (ipv6_addr_scope(&ifmca->mca_addr)&IFA_SITE) | ||
3060 | ifm->ifa_scope = RT_SCOPE_SITE; | ||
3061 | ifm->ifa_index = ifmca->idev->dev->ifindex; | ||
3062 | RTA_PUT(skb, IFA_MULTICAST, 16, &ifmca->mca_addr); | ||
3063 | ci.cstamp = (__u32)(TIME_DELTA(ifmca->mca_cstamp, INITIAL_JIFFIES) / HZ | ||
3064 | * 100 + TIME_DELTA(ifmca->mca_cstamp, INITIAL_JIFFIES) % HZ | ||
3065 | * 100 / HZ); | ||
3066 | ci.tstamp = (__u32)(TIME_DELTA(ifmca->mca_tstamp, INITIAL_JIFFIES) / HZ | ||
3067 | * 100 + TIME_DELTA(ifmca->mca_tstamp, INITIAL_JIFFIES) % HZ | ||
3068 | * 100 / HZ); | ||
3069 | ci.ifa_prefered = INFINITY_LIFE_TIME; | ||
3070 | ci.ifa_valid = INFINITY_LIFE_TIME; | ||
3071 | RTA_PUT(skb, IFA_CACHEINFO, sizeof(ci), &ci); | ||
3072 | nlh->nlmsg_len = skb->tail - b; | ||
3073 | return skb->len; | ||
3074 | 3133 | ||
3075 | nlmsg_failure: | 3134 | if (ipv6_addr_scope(&ifmca->mca_addr) & IFA_SITE) |
3076 | rtattr_failure: | 3135 | scope = RT_SCOPE_SITE; |
3077 | skb_trim(skb, b - skb->data); | 3136 | |
3078 | return -1; | 3137 | nlh = nlmsg_put(skb, pid, seq, event, sizeof(struct ifaddrmsg), flags); |
3138 | if (nlh == NULL) | ||
3139 | return -ENOBUFS; | ||
3140 | |||
3141 | put_ifaddrmsg(nlh, 128, IFA_F_PERMANENT, scope, ifindex); | ||
3142 | if (nla_put(skb, IFA_MULTICAST, 16, &ifmca->mca_addr) < 0 || | ||
3143 | put_cacheinfo(skb, ifmca->mca_cstamp, ifmca->mca_tstamp, | ||
3144 | INFINITY_LIFE_TIME, INFINITY_LIFE_TIME) < 0) | ||
3145 | return nlmsg_cancel(skb, nlh); | ||
3146 | |||
3147 | return nlmsg_end(skb, nlh); | ||
3079 | } | 3148 | } |
3080 | 3149 | ||
3081 | static int inet6_fill_ifacaddr(struct sk_buff *skb, struct ifacaddr6 *ifaca, | 3150 | static int inet6_fill_ifacaddr(struct sk_buff *skb, struct ifacaddr6 *ifaca, |
3082 | u32 pid, u32 seq, int event, unsigned int flags) | 3151 | u32 pid, u32 seq, int event, unsigned int flags) |
3083 | { | 3152 | { |
3084 | struct ifaddrmsg *ifm; | ||
3085 | struct nlmsghdr *nlh; | 3153 | struct nlmsghdr *nlh; |
3086 | struct ifa_cacheinfo ci; | 3154 | u8 scope = RT_SCOPE_UNIVERSE; |
3087 | unsigned char *b = skb->tail; | 3155 | int ifindex = ifaca->aca_idev->dev->ifindex; |
3088 | |||
3089 | nlh = NLMSG_NEW(skb, pid, seq, event, sizeof(*ifm), flags); | ||
3090 | ifm = NLMSG_DATA(nlh); | ||
3091 | ifm->ifa_family = AF_INET6; | ||
3092 | ifm->ifa_prefixlen = 128; | ||
3093 | ifm->ifa_flags = IFA_F_PERMANENT; | ||
3094 | ifm->ifa_scope = RT_SCOPE_UNIVERSE; | ||
3095 | if (ipv6_addr_scope(&ifaca->aca_addr)&IFA_SITE) | ||
3096 | ifm->ifa_scope = RT_SCOPE_SITE; | ||
3097 | ifm->ifa_index = ifaca->aca_idev->dev->ifindex; | ||
3098 | RTA_PUT(skb, IFA_ANYCAST, 16, &ifaca->aca_addr); | ||
3099 | ci.cstamp = (__u32)(TIME_DELTA(ifaca->aca_cstamp, INITIAL_JIFFIES) / HZ | ||
3100 | * 100 + TIME_DELTA(ifaca->aca_cstamp, INITIAL_JIFFIES) % HZ | ||
3101 | * 100 / HZ); | ||
3102 | ci.tstamp = (__u32)(TIME_DELTA(ifaca->aca_tstamp, INITIAL_JIFFIES) / HZ | ||
3103 | * 100 + TIME_DELTA(ifaca->aca_tstamp, INITIAL_JIFFIES) % HZ | ||
3104 | * 100 / HZ); | ||
3105 | ci.ifa_prefered = INFINITY_LIFE_TIME; | ||
3106 | ci.ifa_valid = INFINITY_LIFE_TIME; | ||
3107 | RTA_PUT(skb, IFA_CACHEINFO, sizeof(ci), &ci); | ||
3108 | nlh->nlmsg_len = skb->tail - b; | ||
3109 | return skb->len; | ||
3110 | 3156 | ||
3111 | nlmsg_failure: | 3157 | if (ipv6_addr_scope(&ifaca->aca_addr) & IFA_SITE) |
3112 | rtattr_failure: | 3158 | scope = RT_SCOPE_SITE; |
3113 | skb_trim(skb, b - skb->data); | 3159 | |
3114 | return -1; | 3160 | nlh = nlmsg_put(skb, pid, seq, event, sizeof(struct ifaddrmsg), flags); |
3161 | if (nlh == NULL) | ||
3162 | return -ENOBUFS; | ||
3163 | |||
3164 | put_ifaddrmsg(nlh, 128, IFA_F_PERMANENT, scope, ifindex); | ||
3165 | if (nla_put(skb, IFA_ANYCAST, 16, &ifaca->aca_addr) < 0 || | ||
3166 | put_cacheinfo(skb, ifaca->aca_cstamp, ifaca->aca_tstamp, | ||
3167 | INFINITY_LIFE_TIME, INFINITY_LIFE_TIME) < 0) | ||
3168 | return nlmsg_cancel(skb, nlh); | ||
3169 | |||
3170 | return nlmsg_end(skb, nlh); | ||
3115 | } | 3171 | } |
3116 | 3172 | ||
3117 | enum addr_type_t | 3173 | enum addr_type_t |
@@ -3222,79 +3278,74 @@ static int inet6_dump_ifacaddr(struct sk_buff *skb, struct netlink_callback *cb) | |||
3222 | return inet6_dump_addr(skb, cb, type); | 3278 | return inet6_dump_addr(skb, cb, type); |
3223 | } | 3279 | } |
3224 | 3280 | ||
3225 | static int inet6_rtm_getaddr(struct sk_buff *in_skb, | 3281 | static int inet6_rtm_getaddr(struct sk_buff *in_skb, struct nlmsghdr* nlh, |
3226 | struct nlmsghdr* nlh, void *arg) | 3282 | void *arg) |
3227 | { | 3283 | { |
3228 | struct rtattr **rta = arg; | 3284 | struct ifaddrmsg *ifm; |
3229 | struct ifaddrmsg *ifm = NLMSG_DATA(nlh); | 3285 | struct nlattr *tb[IFA_MAX+1]; |
3230 | struct in6_addr *addr = NULL; | 3286 | struct in6_addr *addr = NULL; |
3231 | struct net_device *dev = NULL; | 3287 | struct net_device *dev = NULL; |
3232 | struct inet6_ifaddr *ifa; | 3288 | struct inet6_ifaddr *ifa; |
3233 | struct sk_buff *skb; | 3289 | struct sk_buff *skb; |
3234 | int size = NLMSG_SPACE(sizeof(struct ifaddrmsg) + INET6_IFADDR_RTA_SPACE); | ||
3235 | int err; | 3290 | int err; |
3236 | 3291 | ||
3237 | if (rta[IFA_ADDRESS-1]) { | 3292 | err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv6_policy); |
3238 | if (RTA_PAYLOAD(rta[IFA_ADDRESS-1]) < sizeof(*addr)) | 3293 | if (err < 0) |
3239 | return -EINVAL; | 3294 | goto errout; |
3240 | addr = RTA_DATA(rta[IFA_ADDRESS-1]); | 3295 | |
3241 | } | 3296 | addr = extract_addr(tb[IFA_ADDRESS], tb[IFA_LOCAL]); |
3242 | if (rta[IFA_LOCAL-1]) { | 3297 | if (addr == NULL) { |
3243 | if (RTA_PAYLOAD(rta[IFA_LOCAL-1]) < sizeof(*addr) || | 3298 | err = -EINVAL; |
3244 | (addr && memcmp(addr, RTA_DATA(rta[IFA_LOCAL-1]), sizeof(*addr)))) | 3299 | goto errout; |
3245 | return -EINVAL; | ||
3246 | addr = RTA_DATA(rta[IFA_LOCAL-1]); | ||
3247 | } | 3300 | } |
3248 | if (addr == NULL) | ||
3249 | return -EINVAL; | ||
3250 | 3301 | ||
3302 | ifm = nlmsg_data(nlh); | ||
3251 | if (ifm->ifa_index) | 3303 | if (ifm->ifa_index) |
3252 | dev = __dev_get_by_index(ifm->ifa_index); | 3304 | dev = __dev_get_by_index(ifm->ifa_index); |
3253 | 3305 | ||
3254 | if ((ifa = ipv6_get_ifaddr(addr, dev, 1)) == NULL) | 3306 | if ((ifa = ipv6_get_ifaddr(addr, dev, 1)) == NULL) { |
3255 | return -EADDRNOTAVAIL; | 3307 | err = -EADDRNOTAVAIL; |
3308 | goto errout; | ||
3309 | } | ||
3256 | 3310 | ||
3257 | if ((skb = alloc_skb(size, GFP_KERNEL)) == NULL) { | 3311 | if ((skb = nlmsg_new(inet6_ifaddr_msgsize(), GFP_KERNEL)) == NULL) { |
3258 | err = -ENOBUFS; | 3312 | err = -ENOBUFS; |
3259 | goto out; | 3313 | goto errout_ifa; |
3260 | } | 3314 | } |
3261 | 3315 | ||
3262 | NETLINK_CB(skb).dst_pid = NETLINK_CB(in_skb).pid; | ||
3263 | err = inet6_fill_ifaddr(skb, ifa, NETLINK_CB(in_skb).pid, | 3316 | err = inet6_fill_ifaddr(skb, ifa, NETLINK_CB(in_skb).pid, |
3264 | nlh->nlmsg_seq, RTM_NEWADDR, 0); | 3317 | nlh->nlmsg_seq, RTM_NEWADDR, 0); |
3265 | if (err < 0) { | 3318 | if (err < 0) { |
3266 | err = -EMSGSIZE; | 3319 | kfree_skb(skb); |
3267 | goto out_free; | 3320 | goto errout_ifa; |
3268 | } | 3321 | } |
3269 | 3322 | ||
3270 | err = netlink_unicast(rtnl, skb, NETLINK_CB(in_skb).pid, MSG_DONTWAIT); | 3323 | err = rtnl_unicast(skb, NETLINK_CB(in_skb).pid); |
3271 | if (err > 0) | 3324 | errout_ifa: |
3272 | err = 0; | ||
3273 | out: | ||
3274 | in6_ifa_put(ifa); | 3325 | in6_ifa_put(ifa); |
3326 | errout: | ||
3275 | return err; | 3327 | return err; |
3276 | out_free: | ||
3277 | kfree_skb(skb); | ||
3278 | goto out; | ||
3279 | } | 3328 | } |
3280 | 3329 | ||
3281 | static void inet6_ifa_notify(int event, struct inet6_ifaddr *ifa) | 3330 | static void inet6_ifa_notify(int event, struct inet6_ifaddr *ifa) |
3282 | { | 3331 | { |
3283 | struct sk_buff *skb; | 3332 | struct sk_buff *skb; |
3284 | int size = NLMSG_SPACE(sizeof(struct ifaddrmsg) + INET6_IFADDR_RTA_SPACE); | 3333 | int err = -ENOBUFS; |
3285 | 3334 | ||
3286 | skb = alloc_skb(size, GFP_ATOMIC); | 3335 | skb = nlmsg_new(inet6_ifaddr_msgsize(), GFP_ATOMIC); |
3287 | if (!skb) { | 3336 | if (skb == NULL) |
3288 | netlink_set_err(rtnl, 0, RTNLGRP_IPV6_IFADDR, ENOBUFS); | 3337 | goto errout; |
3289 | return; | 3338 | |
3290 | } | 3339 | err = inet6_fill_ifaddr(skb, ifa, 0, 0, event, 0); |
3291 | if (inet6_fill_ifaddr(skb, ifa, current->pid, 0, event, 0) < 0) { | 3340 | if (err < 0) { |
3292 | kfree_skb(skb); | 3341 | kfree_skb(skb); |
3293 | netlink_set_err(rtnl, 0, RTNLGRP_IPV6_IFADDR, EINVAL); | 3342 | goto errout; |
3294 | return; | ||
3295 | } | 3343 | } |
3296 | NETLINK_CB(skb).dst_group = RTNLGRP_IPV6_IFADDR; | 3344 | |
3297 | netlink_broadcast(rtnl, skb, 0, RTNLGRP_IPV6_IFADDR, GFP_ATOMIC); | 3345 | err = rtnl_notify(skb, 0, RTNLGRP_IPV6_IFADDR, NULL, GFP_ATOMIC); |
3346 | errout: | ||
3347 | if (err < 0) | ||
3348 | rtnl_set_sk_err(RTNLGRP_IPV6_IFADDR, err); | ||
3298 | } | 3349 | } |
3299 | 3350 | ||
3300 | static void inline ipv6_store_devconf(struct ipv6_devconf *cnf, | 3351 | static void inline ipv6_store_devconf(struct ipv6_devconf *cnf, |
@@ -3329,6 +3380,7 @@ static void inline ipv6_store_devconf(struct ipv6_devconf *cnf, | |||
3329 | array[DEVCONF_ACCEPT_RA_RT_INFO_MAX_PLEN] = cnf->accept_ra_rt_info_max_plen; | 3380 | array[DEVCONF_ACCEPT_RA_RT_INFO_MAX_PLEN] = cnf->accept_ra_rt_info_max_plen; |
3330 | #endif | 3381 | #endif |
3331 | #endif | 3382 | #endif |
3383 | array[DEVCONF_PROXY_NDP] = cnf->proxy_ndp; | ||
3332 | } | 3384 | } |
3333 | 3385 | ||
3334 | /* Maximum length of ifinfomsg attributes */ | 3386 | /* Maximum length of ifinfomsg attributes */ |
@@ -3435,20 +3487,23 @@ static int inet6_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb) | |||
3435 | void inet6_ifinfo_notify(int event, struct inet6_dev *idev) | 3487 | void inet6_ifinfo_notify(int event, struct inet6_dev *idev) |
3436 | { | 3488 | { |
3437 | struct sk_buff *skb; | 3489 | struct sk_buff *skb; |
3438 | int size = NLMSG_SPACE(sizeof(struct ifinfomsg) + INET6_IFINFO_RTA_SPACE); | 3490 | int payload = sizeof(struct ifinfomsg) + INET6_IFINFO_RTA_SPACE; |
3491 | int err = -ENOBUFS; | ||
3439 | 3492 | ||
3440 | skb = alloc_skb(size, GFP_ATOMIC); | 3493 | skb = nlmsg_new(nlmsg_total_size(payload), GFP_ATOMIC); |
3441 | if (!skb) { | 3494 | if (skb == NULL) |
3442 | netlink_set_err(rtnl, 0, RTNLGRP_IPV6_IFINFO, ENOBUFS); | 3495 | goto errout; |
3443 | return; | 3496 | |
3444 | } | 3497 | err = inet6_fill_ifinfo(skb, idev, 0, 0, event, 0); |
3445 | if (inet6_fill_ifinfo(skb, idev, current->pid, 0, event, 0) < 0) { | 3498 | if (err < 0) { |
3446 | kfree_skb(skb); | 3499 | kfree_skb(skb); |
3447 | netlink_set_err(rtnl, 0, RTNLGRP_IPV6_IFINFO, EINVAL); | 3500 | goto errout; |
3448 | return; | ||
3449 | } | 3501 | } |
3450 | NETLINK_CB(skb).dst_group = RTNLGRP_IPV6_IFINFO; | 3502 | |
3451 | netlink_broadcast(rtnl, skb, 0, RTNLGRP_IPV6_IFINFO, GFP_ATOMIC); | 3503 | err = rtnl_notify(skb, 0, RTNLGRP_IPV6_IFADDR, NULL, GFP_ATOMIC); |
3504 | errout: | ||
3505 | if (err < 0) | ||
3506 | rtnl_set_sk_err(RTNLGRP_IPV6_IFADDR, err); | ||
3452 | } | 3507 | } |
3453 | 3508 | ||
3454 | /* Maximum length of prefix_cacheinfo attributes */ | 3509 | /* Maximum length of prefix_cacheinfo attributes */ |
@@ -3500,20 +3555,23 @@ static void inet6_prefix_notify(int event, struct inet6_dev *idev, | |||
3500 | struct prefix_info *pinfo) | 3555 | struct prefix_info *pinfo) |
3501 | { | 3556 | { |
3502 | struct sk_buff *skb; | 3557 | struct sk_buff *skb; |
3503 | int size = NLMSG_SPACE(sizeof(struct prefixmsg) + INET6_PREFIX_RTA_SPACE); | 3558 | int payload = sizeof(struct prefixmsg) + INET6_PREFIX_RTA_SPACE; |
3559 | int err = -ENOBUFS; | ||
3504 | 3560 | ||
3505 | skb = alloc_skb(size, GFP_ATOMIC); | 3561 | skb = nlmsg_new(nlmsg_total_size(payload), GFP_ATOMIC); |
3506 | if (!skb) { | 3562 | if (skb == NULL) |
3507 | netlink_set_err(rtnl, 0, RTNLGRP_IPV6_PREFIX, ENOBUFS); | 3563 | goto errout; |
3508 | return; | 3564 | |
3509 | } | 3565 | err = inet6_fill_prefix(skb, idev, pinfo, 0, 0, event, 0); |
3510 | if (inet6_fill_prefix(skb, idev, pinfo, current->pid, 0, event, 0) < 0) { | 3566 | if (err < 0) { |
3511 | kfree_skb(skb); | 3567 | kfree_skb(skb); |
3512 | netlink_set_err(rtnl, 0, RTNLGRP_IPV6_PREFIX, EINVAL); | 3568 | goto errout; |
3513 | return; | ||
3514 | } | 3569 | } |
3515 | NETLINK_CB(skb).dst_group = RTNLGRP_IPV6_PREFIX; | 3570 | |
3516 | netlink_broadcast(rtnl, skb, 0, RTNLGRP_IPV6_PREFIX, GFP_ATOMIC); | 3571 | err = rtnl_notify(skb, 0, RTNLGRP_IPV6_PREFIX, NULL, GFP_ATOMIC); |
3572 | errout: | ||
3573 | if (err < 0) | ||
3574 | rtnl_set_sk_err(RTNLGRP_IPV6_PREFIX, err); | ||
3517 | } | 3575 | } |
3518 | 3576 | ||
3519 | static struct rtnetlink_link inet6_rtnetlink_table[RTM_NR_MSGTYPES] = { | 3577 | static struct rtnetlink_link inet6_rtnetlink_table[RTM_NR_MSGTYPES] = { |
@@ -3528,6 +3586,9 @@ static struct rtnetlink_link inet6_rtnetlink_table[RTM_NR_MSGTYPES] = { | |||
3528 | [RTM_DELROUTE - RTM_BASE] = { .doit = inet6_rtm_delroute, }, | 3586 | [RTM_DELROUTE - RTM_BASE] = { .doit = inet6_rtm_delroute, }, |
3529 | [RTM_GETROUTE - RTM_BASE] = { .doit = inet6_rtm_getroute, | 3587 | [RTM_GETROUTE - RTM_BASE] = { .doit = inet6_rtm_getroute, |
3530 | .dumpit = inet6_dump_fib, }, | 3588 | .dumpit = inet6_dump_fib, }, |
3589 | #ifdef CONFIG_IPV6_MULTIPLE_TABLES | ||
3590 | [RTM_GETRULE - RTM_BASE] = { .dumpit = fib6_rules_dump, }, | ||
3591 | #endif | ||
3531 | }; | 3592 | }; |
3532 | 3593 | ||
3533 | static void __ipv6_ifa_notify(int event, struct inet6_ifaddr *ifp) | 3594 | static void __ipv6_ifa_notify(int event, struct inet6_ifaddr *ifp) |
@@ -3536,7 +3597,7 @@ static void __ipv6_ifa_notify(int event, struct inet6_ifaddr *ifp) | |||
3536 | 3597 | ||
3537 | switch (event) { | 3598 | switch (event) { |
3538 | case RTM_NEWADDR: | 3599 | case RTM_NEWADDR: |
3539 | ip6_ins_rt(ifp->rt, NULL, NULL, NULL); | 3600 | ip6_ins_rt(ifp->rt); |
3540 | if (ifp->idev->cnf.forwarding) | 3601 | if (ifp->idev->cnf.forwarding) |
3541 | addrconf_join_anycast(ifp); | 3602 | addrconf_join_anycast(ifp); |
3542 | break; | 3603 | break; |
@@ -3545,7 +3606,7 @@ static void __ipv6_ifa_notify(int event, struct inet6_ifaddr *ifp) | |||
3545 | addrconf_leave_anycast(ifp); | 3606 | addrconf_leave_anycast(ifp); |
3546 | addrconf_leave_solict(ifp->idev, &ifp->addr); | 3607 | addrconf_leave_solict(ifp->idev, &ifp->addr); |
3547 | dst_hold(&ifp->rt->u.dst); | 3608 | dst_hold(&ifp->rt->u.dst); |
3548 | if (ip6_del_rt(ifp->rt, NULL, NULL, NULL)) | 3609 | if (ip6_del_rt(ifp->rt)) |
3549 | dst_free(&ifp->rt->u.dst); | 3610 | dst_free(&ifp->rt->u.dst); |
3550 | break; | 3611 | break; |
3551 | } | 3612 | } |
@@ -3553,10 +3614,10 @@ static void __ipv6_ifa_notify(int event, struct inet6_ifaddr *ifp) | |||
3553 | 3614 | ||
3554 | static void ipv6_ifa_notify(int event, struct inet6_ifaddr *ifp) | 3615 | static void ipv6_ifa_notify(int event, struct inet6_ifaddr *ifp) |
3555 | { | 3616 | { |
3556 | read_lock_bh(&addrconf_lock); | 3617 | rcu_read_lock_bh(); |
3557 | if (likely(ifp->idev->dead == 0)) | 3618 | if (likely(ifp->idev->dead == 0)) |
3558 | __ipv6_ifa_notify(event, ifp); | 3619 | __ipv6_ifa_notify(event, ifp); |
3559 | read_unlock_bh(&addrconf_lock); | 3620 | rcu_read_unlock_bh(); |
3560 | } | 3621 | } |
3561 | 3622 | ||
3562 | #ifdef CONFIG_SYSCTL | 3623 | #ifdef CONFIG_SYSCTL |
@@ -3653,7 +3714,7 @@ static struct addrconf_sysctl_table | |||
3653 | ctl_table addrconf_conf_dir[2]; | 3714 | ctl_table addrconf_conf_dir[2]; |
3654 | ctl_table addrconf_proto_dir[2]; | 3715 | ctl_table addrconf_proto_dir[2]; |
3655 | ctl_table addrconf_root_dir[2]; | 3716 | ctl_table addrconf_root_dir[2]; |
3656 | } addrconf_sysctl = { | 3717 | } addrconf_sysctl __read_mostly = { |
3657 | .sysctl_header = NULL, | 3718 | .sysctl_header = NULL, |
3658 | .addrconf_vars = { | 3719 | .addrconf_vars = { |
3659 | { | 3720 | { |
@@ -3843,6 +3904,14 @@ static struct addrconf_sysctl_table | |||
3843 | #endif | 3904 | #endif |
3844 | #endif | 3905 | #endif |
3845 | { | 3906 | { |
3907 | .ctl_name = NET_IPV6_PROXY_NDP, | ||
3908 | .procname = "proxy_ndp", | ||
3909 | .data = &ipv6_devconf.proxy_ndp, | ||
3910 | .maxlen = sizeof(int), | ||
3911 | .mode = 0644, | ||
3912 | .proc_handler = &proc_dointvec, | ||
3913 | }, | ||
3914 | { | ||
3846 | .ctl_name = 0, /* sentinel */ | 3915 | .ctl_name = 0, /* sentinel */ |
3847 | } | 3916 | } |
3848 | }, | 3917 | }, |
diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c index ac85e9c532c2..bf6e8aff19d4 100644 --- a/net/ipv6/af_inet6.c +++ b/net/ipv6/af_inet6.c | |||
@@ -59,6 +59,9 @@ | |||
59 | #ifdef CONFIG_IPV6_TUNNEL | 59 | #ifdef CONFIG_IPV6_TUNNEL |
60 | #include <net/ip6_tunnel.h> | 60 | #include <net/ip6_tunnel.h> |
61 | #endif | 61 | #endif |
62 | #ifdef CONFIG_IPV6_MIP6 | ||
63 | #include <net/mip6.h> | ||
64 | #endif | ||
62 | 65 | ||
63 | #include <asm/uaccess.h> | 66 | #include <asm/uaccess.h> |
64 | #include <asm/system.h> | 67 | #include <asm/system.h> |
@@ -67,7 +70,7 @@ MODULE_AUTHOR("Cast of dozens"); | |||
67 | MODULE_DESCRIPTION("IPv6 protocol stack for Linux"); | 70 | MODULE_DESCRIPTION("IPv6 protocol stack for Linux"); |
68 | MODULE_LICENSE("GPL"); | 71 | MODULE_LICENSE("GPL"); |
69 | 72 | ||
70 | int sysctl_ipv6_bindv6only; | 73 | int sysctl_ipv6_bindv6only __read_mostly; |
71 | 74 | ||
72 | /* The inetsw table contains everything that inet_create needs to | 75 | /* The inetsw table contains everything that inet_create needs to |
73 | * build a new socket. | 76 | * build a new socket. |
@@ -637,6 +640,7 @@ int inet6_sk_rebuild_header(struct sock *sk) | |||
637 | fl.oif = sk->sk_bound_dev_if; | 640 | fl.oif = sk->sk_bound_dev_if; |
638 | fl.fl_ip_dport = inet->dport; | 641 | fl.fl_ip_dport = inet->dport; |
639 | fl.fl_ip_sport = inet->sport; | 642 | fl.fl_ip_sport = inet->sport; |
643 | security_sk_classify_flow(sk, &fl); | ||
640 | 644 | ||
641 | if (np->opt && np->opt->srcrt) { | 645 | if (np->opt && np->opt->srcrt) { |
642 | struct rt0_hdr *rt0 = (struct rt0_hdr *) np->opt->srcrt; | 646 | struct rt0_hdr *rt0 = (struct rt0_hdr *) np->opt->srcrt; |
@@ -658,7 +662,7 @@ int inet6_sk_rebuild_header(struct sock *sk) | |||
658 | return err; | 662 | return err; |
659 | } | 663 | } |
660 | 664 | ||
661 | __ip6_dst_store(sk, dst, NULL); | 665 | __ip6_dst_store(sk, dst, NULL, NULL); |
662 | } | 666 | } |
663 | 667 | ||
664 | return 0; | 668 | return 0; |
@@ -757,6 +761,8 @@ static int __init inet6_init(void) | |||
757 | struct list_head *r; | 761 | struct list_head *r; |
758 | int err; | 762 | int err; |
759 | 763 | ||
764 | BUILD_BUG_ON(sizeof(struct inet6_skb_parm) > sizeof(dummy_skb->cb)); | ||
765 | |||
760 | #ifdef MODULE | 766 | #ifdef MODULE |
761 | #if 0 /* FIXME --RR */ | 767 | #if 0 /* FIXME --RR */ |
762 | if (!mod_member_present(&__this_module, can_unload)) | 768 | if (!mod_member_present(&__this_module, can_unload)) |
@@ -766,11 +772,6 @@ static int __init inet6_init(void) | |||
766 | #endif | 772 | #endif |
767 | #endif | 773 | #endif |
768 | 774 | ||
769 | if (sizeof(struct inet6_skb_parm) > sizeof(dummy_skb->cb)) { | ||
770 | printk(KERN_CRIT "inet6_proto_init: size fault\n"); | ||
771 | return -EINVAL; | ||
772 | } | ||
773 | |||
774 | err = proto_register(&tcpv6_prot, 1); | 775 | err = proto_register(&tcpv6_prot, 1); |
775 | if (err) | 776 | if (err) |
776 | goto out; | 777 | goto out; |
@@ -856,6 +857,9 @@ static int __init inet6_init(void) | |||
856 | ipv6_frag_init(); | 857 | ipv6_frag_init(); |
857 | ipv6_nodata_init(); | 858 | ipv6_nodata_init(); |
858 | ipv6_destopt_init(); | 859 | ipv6_destopt_init(); |
860 | #ifdef CONFIG_IPV6_MIP6 | ||
861 | mip6_init(); | ||
862 | #endif | ||
859 | 863 | ||
860 | /* Init v6 transport protocols. */ | 864 | /* Init v6 transport protocols. */ |
861 | udpv6_init(); | 865 | udpv6_init(); |
@@ -919,6 +923,9 @@ static void __exit inet6_exit(void) | |||
919 | tcp6_proc_exit(); | 923 | tcp6_proc_exit(); |
920 | raw6_proc_exit(); | 924 | raw6_proc_exit(); |
921 | #endif | 925 | #endif |
926 | #ifdef CONFIG_IPV6_MIP6 | ||
927 | mip6_fini(); | ||
928 | #endif | ||
922 | /* Cleanup code parts. */ | 929 | /* Cleanup code parts. */ |
923 | sit_cleanup(); | 930 | sit_cleanup(); |
924 | ip6_flowlabel_cleanup(); | 931 | ip6_flowlabel_cleanup(); |
diff --git a/net/ipv6/ah6.c b/net/ipv6/ah6.c index 00ffa7bc6c9f..b0d83e8e4252 100644 --- a/net/ipv6/ah6.c +++ b/net/ipv6/ah6.c | |||
@@ -74,6 +74,66 @@ bad: | |||
74 | return 0; | 74 | return 0; |
75 | } | 75 | } |
76 | 76 | ||
77 | #ifdef CONFIG_IPV6_MIP6 | ||
78 | /** | ||
79 | * ipv6_rearrange_destopt - rearrange IPv6 destination options header | ||
80 | * @iph: IPv6 header | ||
81 | * @destopt: destionation options header | ||
82 | */ | ||
83 | static void ipv6_rearrange_destopt(struct ipv6hdr *iph, struct ipv6_opt_hdr *destopt) | ||
84 | { | ||
85 | u8 *opt = (u8 *)destopt; | ||
86 | int len = ipv6_optlen(destopt); | ||
87 | int off = 0; | ||
88 | int optlen = 0; | ||
89 | |||
90 | off += 2; | ||
91 | len -= 2; | ||
92 | |||
93 | while (len > 0) { | ||
94 | |||
95 | switch (opt[off]) { | ||
96 | |||
97 | case IPV6_TLV_PAD0: | ||
98 | optlen = 1; | ||
99 | break; | ||
100 | default: | ||
101 | if (len < 2) | ||
102 | goto bad; | ||
103 | optlen = opt[off+1]+2; | ||
104 | if (len < optlen) | ||
105 | goto bad; | ||
106 | |||
107 | /* Rearrange the source address in @iph and the | ||
108 | * addresses in home address option for final source. | ||
109 | * See 11.3.2 of RFC 3775 for details. | ||
110 | */ | ||
111 | if (opt[off] == IPV6_TLV_HAO) { | ||
112 | struct in6_addr final_addr; | ||
113 | struct ipv6_destopt_hao *hao; | ||
114 | |||
115 | hao = (struct ipv6_destopt_hao *)&opt[off]; | ||
116 | if (hao->length != sizeof(hao->addr)) { | ||
117 | if (net_ratelimit()) | ||
118 | printk(KERN_WARNING "destopt hao: invalid header length: %u\n", hao->length); | ||
119 | goto bad; | ||
120 | } | ||
121 | ipv6_addr_copy(&final_addr, &hao->addr); | ||
122 | ipv6_addr_copy(&hao->addr, &iph->saddr); | ||
123 | ipv6_addr_copy(&iph->saddr, &final_addr); | ||
124 | } | ||
125 | break; | ||
126 | } | ||
127 | |||
128 | off += optlen; | ||
129 | len -= optlen; | ||
130 | } | ||
131 | /* Note: ok if len == 0 */ | ||
132 | bad: | ||
133 | return; | ||
134 | } | ||
135 | #endif | ||
136 | |||
77 | /** | 137 | /** |
78 | * ipv6_rearrange_rthdr - rearrange IPv6 routing header | 138 | * ipv6_rearrange_rthdr - rearrange IPv6 routing header |
79 | * @iph: IPv6 header | 139 | * @iph: IPv6 header |
@@ -113,7 +173,7 @@ static void ipv6_rearrange_rthdr(struct ipv6hdr *iph, struct ipv6_rt_hdr *rthdr) | |||
113 | ipv6_addr_copy(&iph->daddr, &final_addr); | 173 | ipv6_addr_copy(&iph->daddr, &final_addr); |
114 | } | 174 | } |
115 | 175 | ||
116 | static int ipv6_clear_mutable_options(struct ipv6hdr *iph, int len) | 176 | static int ipv6_clear_mutable_options(struct ipv6hdr *iph, int len, int dir) |
117 | { | 177 | { |
118 | union { | 178 | union { |
119 | struct ipv6hdr *iph; | 179 | struct ipv6hdr *iph; |
@@ -128,8 +188,12 @@ static int ipv6_clear_mutable_options(struct ipv6hdr *iph, int len) | |||
128 | 188 | ||
129 | while (exthdr.raw < end) { | 189 | while (exthdr.raw < end) { |
130 | switch (nexthdr) { | 190 | switch (nexthdr) { |
131 | case NEXTHDR_HOP: | ||
132 | case NEXTHDR_DEST: | 191 | case NEXTHDR_DEST: |
192 | #ifdef CONFIG_IPV6_MIP6 | ||
193 | if (dir == XFRM_POLICY_OUT) | ||
194 | ipv6_rearrange_destopt(iph, exthdr.opth); | ||
195 | #endif | ||
196 | case NEXTHDR_HOP: | ||
133 | if (!zero_out_mutable_opts(exthdr.opth)) { | 197 | if (!zero_out_mutable_opts(exthdr.opth)) { |
134 | LIMIT_NETDEBUG( | 198 | LIMIT_NETDEBUG( |
135 | KERN_WARNING "overrun %sopts\n", | 199 | KERN_WARNING "overrun %sopts\n", |
@@ -164,6 +228,9 @@ static int ah6_output(struct xfrm_state *x, struct sk_buff *skb) | |||
164 | u8 nexthdr; | 228 | u8 nexthdr; |
165 | char tmp_base[8]; | 229 | char tmp_base[8]; |
166 | struct { | 230 | struct { |
231 | #ifdef CONFIG_IPV6_MIP6 | ||
232 | struct in6_addr saddr; | ||
233 | #endif | ||
167 | struct in6_addr daddr; | 234 | struct in6_addr daddr; |
168 | char hdrs[0]; | 235 | char hdrs[0]; |
169 | } *tmp_ext; | 236 | } *tmp_ext; |
@@ -188,10 +255,15 @@ static int ah6_output(struct xfrm_state *x, struct sk_buff *skb) | |||
188 | err = -ENOMEM; | 255 | err = -ENOMEM; |
189 | goto error; | 256 | goto error; |
190 | } | 257 | } |
258 | #ifdef CONFIG_IPV6_MIP6 | ||
259 | memcpy(tmp_ext, &top_iph->saddr, extlen); | ||
260 | #else | ||
191 | memcpy(tmp_ext, &top_iph->daddr, extlen); | 261 | memcpy(tmp_ext, &top_iph->daddr, extlen); |
262 | #endif | ||
192 | err = ipv6_clear_mutable_options(top_iph, | 263 | err = ipv6_clear_mutable_options(top_iph, |
193 | extlen - sizeof(*tmp_ext) + | 264 | extlen - sizeof(*tmp_ext) + |
194 | sizeof(*top_iph)); | 265 | sizeof(*top_iph), |
266 | XFRM_POLICY_OUT); | ||
195 | if (err) | 267 | if (err) |
196 | goto error_free_iph; | 268 | goto error_free_iph; |
197 | } | 269 | } |
@@ -222,7 +294,11 @@ static int ah6_output(struct xfrm_state *x, struct sk_buff *skb) | |||
222 | 294 | ||
223 | memcpy(top_iph, tmp_base, sizeof(tmp_base)); | 295 | memcpy(top_iph, tmp_base, sizeof(tmp_base)); |
224 | if (tmp_ext) { | 296 | if (tmp_ext) { |
297 | #ifdef CONFIG_IPV6_MIP6 | ||
298 | memcpy(&top_iph->saddr, tmp_ext, extlen); | ||
299 | #else | ||
225 | memcpy(&top_iph->daddr, tmp_ext, extlen); | 300 | memcpy(&top_iph->daddr, tmp_ext, extlen); |
301 | #endif | ||
226 | error_free_iph: | 302 | error_free_iph: |
227 | kfree(tmp_ext); | 303 | kfree(tmp_ext); |
228 | } | 304 | } |
@@ -282,7 +358,7 @@ static int ah6_input(struct xfrm_state *x, struct sk_buff *skb) | |||
282 | if (!tmp_hdr) | 358 | if (!tmp_hdr) |
283 | goto out; | 359 | goto out; |
284 | memcpy(tmp_hdr, skb->nh.raw, hdr_len); | 360 | memcpy(tmp_hdr, skb->nh.raw, hdr_len); |
285 | if (ipv6_clear_mutable_options(skb->nh.ipv6h, hdr_len)) | 361 | if (ipv6_clear_mutable_options(skb->nh.ipv6h, hdr_len, XFRM_POLICY_IN)) |
286 | goto free_out; | 362 | goto free_out; |
287 | skb->nh.ipv6h->priority = 0; | 363 | skb->nh.ipv6h->priority = 0; |
288 | skb->nh.ipv6h->flow_lbl[0] = 0; | 364 | skb->nh.ipv6h->flow_lbl[0] = 0; |
@@ -398,7 +474,7 @@ static int ah6_init_state(struct xfrm_state *x) | |||
398 | goto error; | 474 | goto error; |
399 | 475 | ||
400 | x->props.header_len = XFRM_ALIGN8(sizeof(struct ipv6_auth_hdr) + ahp->icv_trunc_len); | 476 | x->props.header_len = XFRM_ALIGN8(sizeof(struct ipv6_auth_hdr) + ahp->icv_trunc_len); |
401 | if (x->props.mode) | 477 | if (x->props.mode == XFRM_MODE_TUNNEL) |
402 | x->props.header_len += sizeof(struct ipv6hdr); | 478 | x->props.header_len += sizeof(struct ipv6hdr); |
403 | x->data = ahp; | 479 | x->data = ahp; |
404 | 480 | ||
@@ -435,7 +511,8 @@ static struct xfrm_type ah6_type = | |||
435 | .init_state = ah6_init_state, | 511 | .init_state = ah6_init_state, |
436 | .destructor = ah6_destroy, | 512 | .destructor = ah6_destroy, |
437 | .input = ah6_input, | 513 | .input = ah6_input, |
438 | .output = ah6_output | 514 | .output = ah6_output, |
515 | .hdr_offset = xfrm6_find_1stfragopt, | ||
439 | }; | 516 | }; |
440 | 517 | ||
441 | static struct inet6_protocol ah6_protocol = { | 518 | static struct inet6_protocol ah6_protocol = { |
diff --git a/net/ipv6/anycast.c b/net/ipv6/anycast.c index f6881d7a0385..a9604764e015 100644 --- a/net/ipv6/anycast.c +++ b/net/ipv6/anycast.c | |||
@@ -56,7 +56,7 @@ ip6_onlink(struct in6_addr *addr, struct net_device *dev) | |||
56 | int onlink; | 56 | int onlink; |
57 | 57 | ||
58 | onlink = 0; | 58 | onlink = 0; |
59 | read_lock(&addrconf_lock); | 59 | rcu_read_lock(); |
60 | idev = __in6_dev_get(dev); | 60 | idev = __in6_dev_get(dev); |
61 | if (idev) { | 61 | if (idev) { |
62 | read_lock_bh(&idev->lock); | 62 | read_lock_bh(&idev->lock); |
@@ -68,7 +68,7 @@ ip6_onlink(struct in6_addr *addr, struct net_device *dev) | |||
68 | } | 68 | } |
69 | read_unlock_bh(&idev->lock); | 69 | read_unlock_bh(&idev->lock); |
70 | } | 70 | } |
71 | read_unlock(&addrconf_lock); | 71 | rcu_read_unlock(); |
72 | return onlink; | 72 | return onlink; |
73 | } | 73 | } |
74 | 74 | ||
@@ -335,7 +335,7 @@ int ipv6_dev_ac_inc(struct net_device *dev, struct in6_addr *addr) | |||
335 | write_unlock_bh(&idev->lock); | 335 | write_unlock_bh(&idev->lock); |
336 | 336 | ||
337 | dst_hold(&rt->u.dst); | 337 | dst_hold(&rt->u.dst); |
338 | if (ip6_ins_rt(rt, NULL, NULL, NULL)) | 338 | if (ip6_ins_rt(rt)) |
339 | dst_release(&rt->u.dst); | 339 | dst_release(&rt->u.dst); |
340 | 340 | ||
341 | addrconf_join_solict(dev, &aca->aca_addr); | 341 | addrconf_join_solict(dev, &aca->aca_addr); |
@@ -378,7 +378,7 @@ int __ipv6_dev_ac_dec(struct inet6_dev *idev, struct in6_addr *addr) | |||
378 | addrconf_leave_solict(idev, &aca->aca_addr); | 378 | addrconf_leave_solict(idev, &aca->aca_addr); |
379 | 379 | ||
380 | dst_hold(&aca->aca_rt->u.dst); | 380 | dst_hold(&aca->aca_rt->u.dst); |
381 | if (ip6_del_rt(aca->aca_rt, NULL, NULL, NULL)) | 381 | if (ip6_del_rt(aca->aca_rt)) |
382 | dst_free(&aca->aca_rt->u.dst); | 382 | dst_free(&aca->aca_rt->u.dst); |
383 | else | 383 | else |
384 | dst_release(&aca->aca_rt->u.dst); | 384 | dst_release(&aca->aca_rt->u.dst); |
diff --git a/net/ipv6/datagram.c b/net/ipv6/datagram.c index 3b55b4c8e2d1..7206747022fc 100644 --- a/net/ipv6/datagram.c +++ b/net/ipv6/datagram.c | |||
@@ -156,6 +156,8 @@ ipv4_connected: | |||
156 | if (!fl.oif && (addr_type&IPV6_ADDR_MULTICAST)) | 156 | if (!fl.oif && (addr_type&IPV6_ADDR_MULTICAST)) |
157 | fl.oif = np->mcast_oif; | 157 | fl.oif = np->mcast_oif; |
158 | 158 | ||
159 | security_sk_classify_flow(sk, &fl); | ||
160 | |||
159 | if (flowlabel) { | 161 | if (flowlabel) { |
160 | if (flowlabel->opt && flowlabel->opt->srcrt) { | 162 | if (flowlabel->opt && flowlabel->opt->srcrt) { |
161 | struct rt0_hdr *rt0 = (struct rt0_hdr *) flowlabel->opt->srcrt; | 163 | struct rt0_hdr *rt0 = (struct rt0_hdr *) flowlabel->opt->srcrt; |
@@ -191,7 +193,12 @@ ipv4_connected: | |||
191 | 193 | ||
192 | ip6_dst_store(sk, dst, | 194 | ip6_dst_store(sk, dst, |
193 | ipv6_addr_equal(&fl.fl6_dst, &np->daddr) ? | 195 | ipv6_addr_equal(&fl.fl6_dst, &np->daddr) ? |
194 | &np->daddr : NULL); | 196 | &np->daddr : NULL, |
197 | #ifdef CONFIG_IPV6_SUBTREES | ||
198 | ipv6_addr_equal(&fl.fl6_src, &np->saddr) ? | ||
199 | &np->saddr : | ||
200 | #endif | ||
201 | NULL); | ||
195 | 202 | ||
196 | sk->sk_state = TCP_ESTABLISHED; | 203 | sk->sk_state = TCP_ESTABLISHED; |
197 | out: | 204 | out: |
@@ -641,10 +648,13 @@ int datagram_send_ctl(struct msghdr *msg, struct flowi *fl, | |||
641 | 648 | ||
642 | rthdr = (struct ipv6_rt_hdr *)CMSG_DATA(cmsg); | 649 | rthdr = (struct ipv6_rt_hdr *)CMSG_DATA(cmsg); |
643 | 650 | ||
644 | /* | 651 | switch (rthdr->type) { |
645 | * TYPE 0 | 652 | case IPV6_SRCRT_TYPE_0: |
646 | */ | 653 | #ifdef CONFIG_IPV6_MIP6 |
647 | if (rthdr->type) { | 654 | case IPV6_SRCRT_TYPE_2: |
655 | #endif | ||
656 | break; | ||
657 | default: | ||
648 | err = -EINVAL; | 658 | err = -EINVAL; |
649 | goto exit_f; | 659 | goto exit_f; |
650 | } | 660 | } |
diff --git a/net/ipv6/esp6.c b/net/ipv6/esp6.c index 2ebfd281e721..e78680a9985b 100644 --- a/net/ipv6/esp6.c +++ b/net/ipv6/esp6.c | |||
@@ -99,8 +99,13 @@ static int esp6_output(struct xfrm_state *x, struct sk_buff *skb) | |||
99 | esph->seq_no = htonl(++x->replay.oseq); | 99 | esph->seq_no = htonl(++x->replay.oseq); |
100 | xfrm_aevent_doreplay(x); | 100 | xfrm_aevent_doreplay(x); |
101 | 101 | ||
102 | if (esp->conf.ivlen) | 102 | if (esp->conf.ivlen) { |
103 | if (unlikely(!esp->conf.ivinitted)) { | ||
104 | get_random_bytes(esp->conf.ivec, esp->conf.ivlen); | ||
105 | esp->conf.ivinitted = 1; | ||
106 | } | ||
103 | crypto_blkcipher_set_iv(tfm, esp->conf.ivec, esp->conf.ivlen); | 107 | crypto_blkcipher_set_iv(tfm, esp->conf.ivec, esp->conf.ivlen); |
108 | } | ||
104 | 109 | ||
105 | do { | 110 | do { |
106 | struct scatterlist *sg = &esp->sgbuf[0]; | 111 | struct scatterlist *sg = &esp->sgbuf[0]; |
@@ -237,7 +242,7 @@ static u32 esp6_get_max_size(struct xfrm_state *x, int mtu) | |||
237 | struct esp_data *esp = x->data; | 242 | struct esp_data *esp = x->data; |
238 | u32 blksize = ALIGN(crypto_blkcipher_blocksize(esp->conf.tfm), 4); | 243 | u32 blksize = ALIGN(crypto_blkcipher_blocksize(esp->conf.tfm), 4); |
239 | 244 | ||
240 | if (x->props.mode) { | 245 | if (x->props.mode == XFRM_MODE_TUNNEL) { |
241 | mtu = ALIGN(mtu + 2, blksize); | 246 | mtu = ALIGN(mtu + 2, blksize); |
242 | } else { | 247 | } else { |
243 | /* The worst case. */ | 248 | /* The worst case. */ |
@@ -353,12 +358,12 @@ static int esp6_init_state(struct xfrm_state *x) | |||
353 | esp->conf.ivec = kmalloc(esp->conf.ivlen, GFP_KERNEL); | 358 | esp->conf.ivec = kmalloc(esp->conf.ivlen, GFP_KERNEL); |
354 | if (unlikely(esp->conf.ivec == NULL)) | 359 | if (unlikely(esp->conf.ivec == NULL)) |
355 | goto error; | 360 | goto error; |
356 | get_random_bytes(esp->conf.ivec, esp->conf.ivlen); | 361 | esp->conf.ivinitted = 0; |
357 | } | 362 | } |
358 | if (crypto_blkcipher_setkey(tfm, esp->conf.key, esp->conf.key_len)) | 363 | if (crypto_blkcipher_setkey(tfm, esp->conf.key, esp->conf.key_len)) |
359 | goto error; | 364 | goto error; |
360 | x->props.header_len = sizeof(struct ipv6_esp_hdr) + esp->conf.ivlen; | 365 | x->props.header_len = sizeof(struct ipv6_esp_hdr) + esp->conf.ivlen; |
361 | if (x->props.mode) | 366 | if (x->props.mode == XFRM_MODE_TUNNEL) |
362 | x->props.header_len += sizeof(struct ipv6hdr); | 367 | x->props.header_len += sizeof(struct ipv6hdr); |
363 | x->data = esp; | 368 | x->data = esp; |
364 | return 0; | 369 | return 0; |
@@ -379,7 +384,8 @@ static struct xfrm_type esp6_type = | |||
379 | .destructor = esp6_destroy, | 384 | .destructor = esp6_destroy, |
380 | .get_max_size = esp6_get_max_size, | 385 | .get_max_size = esp6_get_max_size, |
381 | .input = esp6_input, | 386 | .input = esp6_input, |
382 | .output = esp6_output | 387 | .output = esp6_output, |
388 | .hdr_offset = xfrm6_find_1stfragopt, | ||
383 | }; | 389 | }; |
384 | 390 | ||
385 | static struct inet6_protocol esp6_protocol = { | 391 | static struct inet6_protocol esp6_protocol = { |
diff --git a/net/ipv6/exthdrs.c b/net/ipv6/exthdrs.c index 86dac106873b..88c96b10684c 100644 --- a/net/ipv6/exthdrs.c +++ b/net/ipv6/exthdrs.c | |||
@@ -43,9 +43,54 @@ | |||
43 | #include <net/ndisc.h> | 43 | #include <net/ndisc.h> |
44 | #include <net/ip6_route.h> | 44 | #include <net/ip6_route.h> |
45 | #include <net/addrconf.h> | 45 | #include <net/addrconf.h> |
46 | #ifdef CONFIG_IPV6_MIP6 | ||
47 | #include <net/xfrm.h> | ||
48 | #endif | ||
46 | 49 | ||
47 | #include <asm/uaccess.h> | 50 | #include <asm/uaccess.h> |
48 | 51 | ||
52 | int ipv6_find_tlv(struct sk_buff *skb, int offset, int type) | ||
53 | { | ||
54 | int packet_len = skb->tail - skb->nh.raw; | ||
55 | struct ipv6_opt_hdr *hdr; | ||
56 | int len; | ||
57 | |||
58 | if (offset + 2 > packet_len) | ||
59 | goto bad; | ||
60 | hdr = (struct ipv6_opt_hdr*)(skb->nh.raw + offset); | ||
61 | len = ((hdr->hdrlen + 1) << 3); | ||
62 | |||
63 | if (offset + len > packet_len) | ||
64 | goto bad; | ||
65 | |||
66 | offset += 2; | ||
67 | len -= 2; | ||
68 | |||
69 | while (len > 0) { | ||
70 | int opttype = skb->nh.raw[offset]; | ||
71 | int optlen; | ||
72 | |||
73 | if (opttype == type) | ||
74 | return offset; | ||
75 | |||
76 | switch (opttype) { | ||
77 | case IPV6_TLV_PAD0: | ||
78 | optlen = 1; | ||
79 | break; | ||
80 | default: | ||
81 | optlen = skb->nh.raw[offset + 1] + 2; | ||
82 | if (optlen > len) | ||
83 | goto bad; | ||
84 | break; | ||
85 | } | ||
86 | offset += optlen; | ||
87 | len -= optlen; | ||
88 | } | ||
89 | /* not_found */ | ||
90 | bad: | ||
91 | return -1; | ||
92 | } | ||
93 | |||
49 | /* | 94 | /* |
50 | * Parsing tlv encoded headers. | 95 | * Parsing tlv encoded headers. |
51 | * | 96 | * |
@@ -56,7 +101,7 @@ | |||
56 | 101 | ||
57 | struct tlvtype_proc { | 102 | struct tlvtype_proc { |
58 | int type; | 103 | int type; |
59 | int (*func)(struct sk_buff *skb, int offset); | 104 | int (*func)(struct sk_buff **skbp, int offset); |
60 | }; | 105 | }; |
61 | 106 | ||
62 | /********************* | 107 | /********************* |
@@ -65,8 +110,10 @@ struct tlvtype_proc { | |||
65 | 110 | ||
66 | /* An unknown option is detected, decide what to do */ | 111 | /* An unknown option is detected, decide what to do */ |
67 | 112 | ||
68 | static int ip6_tlvopt_unknown(struct sk_buff *skb, int optoff) | 113 | static int ip6_tlvopt_unknown(struct sk_buff **skbp, int optoff) |
69 | { | 114 | { |
115 | struct sk_buff *skb = *skbp; | ||
116 | |||
70 | switch ((skb->nh.raw[optoff] & 0xC0) >> 6) { | 117 | switch ((skb->nh.raw[optoff] & 0xC0) >> 6) { |
71 | case 0: /* ignore */ | 118 | case 0: /* ignore */ |
72 | return 1; | 119 | return 1; |
@@ -91,8 +138,9 @@ static int ip6_tlvopt_unknown(struct sk_buff *skb, int optoff) | |||
91 | 138 | ||
92 | /* Parse tlv encoded option header (hop-by-hop or destination) */ | 139 | /* Parse tlv encoded option header (hop-by-hop or destination) */ |
93 | 140 | ||
94 | static int ip6_parse_tlv(struct tlvtype_proc *procs, struct sk_buff *skb) | 141 | static int ip6_parse_tlv(struct tlvtype_proc *procs, struct sk_buff **skbp) |
95 | { | 142 | { |
143 | struct sk_buff *skb = *skbp; | ||
96 | struct tlvtype_proc *curr; | 144 | struct tlvtype_proc *curr; |
97 | int off = skb->h.raw - skb->nh.raw; | 145 | int off = skb->h.raw - skb->nh.raw; |
98 | int len = ((skb->h.raw[1]+1)<<3); | 146 | int len = ((skb->h.raw[1]+1)<<3); |
@@ -122,13 +170,13 @@ static int ip6_parse_tlv(struct tlvtype_proc *procs, struct sk_buff *skb) | |||
122 | /* type specific length/alignment | 170 | /* type specific length/alignment |
123 | checks will be performed in the | 171 | checks will be performed in the |
124 | func(). */ | 172 | func(). */ |
125 | if (curr->func(skb, off) == 0) | 173 | if (curr->func(skbp, off) == 0) |
126 | return 0; | 174 | return 0; |
127 | break; | 175 | break; |
128 | } | 176 | } |
129 | } | 177 | } |
130 | if (curr->type < 0) { | 178 | if (curr->type < 0) { |
131 | if (ip6_tlvopt_unknown(skb, off) == 0) | 179 | if (ip6_tlvopt_unknown(skbp, off) == 0) |
132 | return 0; | 180 | return 0; |
133 | } | 181 | } |
134 | break; | 182 | break; |
@@ -147,8 +195,85 @@ bad: | |||
147 | Destination options header. | 195 | Destination options header. |
148 | *****************************/ | 196 | *****************************/ |
149 | 197 | ||
198 | #ifdef CONFIG_IPV6_MIP6 | ||
199 | static int ipv6_dest_hao(struct sk_buff **skbp, int optoff) | ||
200 | { | ||
201 | struct sk_buff *skb = *skbp; | ||
202 | struct ipv6_destopt_hao *hao; | ||
203 | struct inet6_skb_parm *opt = IP6CB(skb); | ||
204 | struct ipv6hdr *ipv6h = (struct ipv6hdr *)skb->nh.raw; | ||
205 | struct in6_addr tmp_addr; | ||
206 | int ret; | ||
207 | |||
208 | if (opt->dsthao) { | ||
209 | LIMIT_NETDEBUG(KERN_DEBUG "hao duplicated\n"); | ||
210 | goto discard; | ||
211 | } | ||
212 | opt->dsthao = opt->dst1; | ||
213 | opt->dst1 = 0; | ||
214 | |||
215 | hao = (struct ipv6_destopt_hao *)(skb->nh.raw + optoff); | ||
216 | |||
217 | if (hao->length != 16) { | ||
218 | LIMIT_NETDEBUG( | ||
219 | KERN_DEBUG "hao invalid option length = %d\n", hao->length); | ||
220 | goto discard; | ||
221 | } | ||
222 | |||
223 | if (!(ipv6_addr_type(&hao->addr) & IPV6_ADDR_UNICAST)) { | ||
224 | LIMIT_NETDEBUG( | ||
225 | KERN_DEBUG "hao is not an unicast addr: " NIP6_FMT "\n", NIP6(hao->addr)); | ||
226 | goto discard; | ||
227 | } | ||
228 | |||
229 | ret = xfrm6_input_addr(skb, (xfrm_address_t *)&ipv6h->daddr, | ||
230 | (xfrm_address_t *)&hao->addr, IPPROTO_DSTOPTS); | ||
231 | if (unlikely(ret < 0)) | ||
232 | goto discard; | ||
233 | |||
234 | if (skb_cloned(skb)) { | ||
235 | struct sk_buff *skb2 = skb_copy(skb, GFP_ATOMIC); | ||
236 | struct inet6_skb_parm *opt2; | ||
237 | |||
238 | if (skb2 == NULL) | ||
239 | goto discard; | ||
240 | |||
241 | opt2 = IP6CB(skb2); | ||
242 | memcpy(opt2, opt, sizeof(*opt2)); | ||
243 | |||
244 | kfree_skb(skb); | ||
245 | |||
246 | /* update all variable using below by copied skbuff */ | ||
247 | *skbp = skb = skb2; | ||
248 | hao = (struct ipv6_destopt_hao *)(skb2->nh.raw + optoff); | ||
249 | ipv6h = (struct ipv6hdr *)skb2->nh.raw; | ||
250 | } | ||
251 | |||
252 | if (skb->ip_summed == CHECKSUM_COMPLETE) | ||
253 | skb->ip_summed = CHECKSUM_NONE; | ||
254 | |||
255 | ipv6_addr_copy(&tmp_addr, &ipv6h->saddr); | ||
256 | ipv6_addr_copy(&ipv6h->saddr, &hao->addr); | ||
257 | ipv6_addr_copy(&hao->addr, &tmp_addr); | ||
258 | |||
259 | if (skb->tstamp.off_sec == 0) | ||
260 | __net_timestamp(skb); | ||
261 | |||
262 | return 1; | ||
263 | |||
264 | discard: | ||
265 | kfree_skb(skb); | ||
266 | return 0; | ||
267 | } | ||
268 | #endif | ||
269 | |||
150 | static struct tlvtype_proc tlvprocdestopt_lst[] = { | 270 | static struct tlvtype_proc tlvprocdestopt_lst[] = { |
151 | /* No destination options are defined now */ | 271 | #ifdef CONFIG_IPV6_MIP6 |
272 | { | ||
273 | .type = IPV6_TLV_HAO, | ||
274 | .func = ipv6_dest_hao, | ||
275 | }, | ||
276 | #endif | ||
152 | {-1, NULL} | 277 | {-1, NULL} |
153 | }; | 278 | }; |
154 | 279 | ||
@@ -156,6 +281,9 @@ static int ipv6_destopt_rcv(struct sk_buff **skbp) | |||
156 | { | 281 | { |
157 | struct sk_buff *skb = *skbp; | 282 | struct sk_buff *skb = *skbp; |
158 | struct inet6_skb_parm *opt = IP6CB(skb); | 283 | struct inet6_skb_parm *opt = IP6CB(skb); |
284 | #ifdef CONFIG_IPV6_MIP6 | ||
285 | __u16 dstbuf; | ||
286 | #endif | ||
159 | 287 | ||
160 | if (!pskb_may_pull(skb, (skb->h.raw-skb->data)+8) || | 288 | if (!pskb_may_pull(skb, (skb->h.raw-skb->data)+8) || |
161 | !pskb_may_pull(skb, (skb->h.raw-skb->data)+((skb->h.raw[1]+1)<<3))) { | 289 | !pskb_may_pull(skb, (skb->h.raw-skb->data)+((skb->h.raw[1]+1)<<3))) { |
@@ -166,10 +294,19 @@ static int ipv6_destopt_rcv(struct sk_buff **skbp) | |||
166 | 294 | ||
167 | opt->lastopt = skb->h.raw - skb->nh.raw; | 295 | opt->lastopt = skb->h.raw - skb->nh.raw; |
168 | opt->dst1 = skb->h.raw - skb->nh.raw; | 296 | opt->dst1 = skb->h.raw - skb->nh.raw; |
297 | #ifdef CONFIG_IPV6_MIP6 | ||
298 | dstbuf = opt->dst1; | ||
299 | #endif | ||
169 | 300 | ||
170 | if (ip6_parse_tlv(tlvprocdestopt_lst, skb)) { | 301 | if (ip6_parse_tlv(tlvprocdestopt_lst, skbp)) { |
302 | skb = *skbp; | ||
171 | skb->h.raw += ((skb->h.raw[1]+1)<<3); | 303 | skb->h.raw += ((skb->h.raw[1]+1)<<3); |
304 | opt = IP6CB(skb); | ||
305 | #ifdef CONFIG_IPV6_MIP6 | ||
306 | opt->nhoff = dstbuf; | ||
307 | #else | ||
172 | opt->nhoff = opt->dst1; | 308 | opt->nhoff = opt->dst1; |
309 | #endif | ||
173 | return 1; | 310 | return 1; |
174 | } | 311 | } |
175 | 312 | ||
@@ -219,7 +356,7 @@ static int ipv6_rthdr_rcv(struct sk_buff **skbp) | |||
219 | { | 356 | { |
220 | struct sk_buff *skb = *skbp; | 357 | struct sk_buff *skb = *skbp; |
221 | struct inet6_skb_parm *opt = IP6CB(skb); | 358 | struct inet6_skb_parm *opt = IP6CB(skb); |
222 | struct in6_addr *addr; | 359 | struct in6_addr *addr = NULL; |
223 | struct in6_addr daddr; | 360 | struct in6_addr daddr; |
224 | int n, i; | 361 | int n, i; |
225 | 362 | ||
@@ -244,6 +381,23 @@ static int ipv6_rthdr_rcv(struct sk_buff **skbp) | |||
244 | 381 | ||
245 | looped_back: | 382 | looped_back: |
246 | if (hdr->segments_left == 0) { | 383 | if (hdr->segments_left == 0) { |
384 | switch (hdr->type) { | ||
385 | #ifdef CONFIG_IPV6_MIP6 | ||
386 | case IPV6_SRCRT_TYPE_2: | ||
387 | /* Silently discard type 2 header unless it was | ||
388 | * processed by own | ||
389 | */ | ||
390 | if (!addr) { | ||
391 | IP6_INC_STATS_BH(IPSTATS_MIB_INADDRERRORS); | ||
392 | kfree_skb(skb); | ||
393 | return -1; | ||
394 | } | ||
395 | break; | ||
396 | #endif | ||
397 | default: | ||
398 | break; | ||
399 | } | ||
400 | |||
247 | opt->lastopt = skb->h.raw - skb->nh.raw; | 401 | opt->lastopt = skb->h.raw - skb->nh.raw; |
248 | opt->srcrt = skb->h.raw - skb->nh.raw; | 402 | opt->srcrt = skb->h.raw - skb->nh.raw; |
249 | skb->h.raw += (hdr->hdrlen + 1) << 3; | 403 | skb->h.raw += (hdr->hdrlen + 1) << 3; |
@@ -253,17 +407,29 @@ looped_back: | |||
253 | return 1; | 407 | return 1; |
254 | } | 408 | } |
255 | 409 | ||
256 | if (hdr->type != IPV6_SRCRT_TYPE_0) { | 410 | switch (hdr->type) { |
411 | case IPV6_SRCRT_TYPE_0: | ||
412 | if (hdr->hdrlen & 0x01) { | ||
413 | IP6_INC_STATS_BH(IPSTATS_MIB_INHDRERRORS); | ||
414 | icmpv6_param_prob(skb, ICMPV6_HDR_FIELD, (&hdr->hdrlen) - skb->nh.raw); | ||
415 | return -1; | ||
416 | } | ||
417 | break; | ||
418 | #ifdef CONFIG_IPV6_MIP6 | ||
419 | case IPV6_SRCRT_TYPE_2: | ||
420 | /* Silently discard invalid RTH type 2 */ | ||
421 | if (hdr->hdrlen != 2 || hdr->segments_left != 1) { | ||
422 | IP6_INC_STATS_BH(IPSTATS_MIB_INHDRERRORS); | ||
423 | kfree_skb(skb); | ||
424 | return -1; | ||
425 | } | ||
426 | break; | ||
427 | #endif | ||
428 | default: | ||
257 | IP6_INC_STATS_BH(IPSTATS_MIB_INHDRERRORS); | 429 | IP6_INC_STATS_BH(IPSTATS_MIB_INHDRERRORS); |
258 | icmpv6_param_prob(skb, ICMPV6_HDR_FIELD, (&hdr->type) - skb->nh.raw); | 430 | icmpv6_param_prob(skb, ICMPV6_HDR_FIELD, (&hdr->type) - skb->nh.raw); |
259 | return -1; | 431 | return -1; |
260 | } | 432 | } |
261 | |||
262 | if (hdr->hdrlen & 0x01) { | ||
263 | IP6_INC_STATS_BH(IPSTATS_MIB_INHDRERRORS); | ||
264 | icmpv6_param_prob(skb, ICMPV6_HDR_FIELD, (&hdr->hdrlen) - skb->nh.raw); | ||
265 | return -1; | ||
266 | } | ||
267 | 433 | ||
268 | /* | 434 | /* |
269 | * This is the routing header forwarding algorithm from | 435 | * This is the routing header forwarding algorithm from |
@@ -294,7 +460,7 @@ looped_back: | |||
294 | hdr = (struct ipv6_rt_hdr *) skb2->h.raw; | 460 | hdr = (struct ipv6_rt_hdr *) skb2->h.raw; |
295 | } | 461 | } |
296 | 462 | ||
297 | if (skb->ip_summed == CHECKSUM_HW) | 463 | if (skb->ip_summed == CHECKSUM_COMPLETE) |
298 | skb->ip_summed = CHECKSUM_NONE; | 464 | skb->ip_summed = CHECKSUM_NONE; |
299 | 465 | ||
300 | i = n - --hdr->segments_left; | 466 | i = n - --hdr->segments_left; |
@@ -303,6 +469,27 @@ looped_back: | |||
303 | addr = rthdr->addr; | 469 | addr = rthdr->addr; |
304 | addr += i - 1; | 470 | addr += i - 1; |
305 | 471 | ||
472 | switch (hdr->type) { | ||
473 | #ifdef CONFIG_IPV6_MIP6 | ||
474 | case IPV6_SRCRT_TYPE_2: | ||
475 | if (xfrm6_input_addr(skb, (xfrm_address_t *)addr, | ||
476 | (xfrm_address_t *)&skb->nh.ipv6h->saddr, | ||
477 | IPPROTO_ROUTING) < 0) { | ||
478 | IP6_INC_STATS_BH(IPSTATS_MIB_INADDRERRORS); | ||
479 | kfree_skb(skb); | ||
480 | return -1; | ||
481 | } | ||
482 | if (!ipv6_chk_home_addr(addr)) { | ||
483 | IP6_INC_STATS_BH(IPSTATS_MIB_INADDRERRORS); | ||
484 | kfree_skb(skb); | ||
485 | return -1; | ||
486 | } | ||
487 | break; | ||
488 | #endif | ||
489 | default: | ||
490 | break; | ||
491 | } | ||
492 | |||
306 | if (ipv6_addr_is_multicast(addr)) { | 493 | if (ipv6_addr_is_multicast(addr)) { |
307 | IP6_INC_STATS_BH(IPSTATS_MIB_INADDRERRORS); | 494 | IP6_INC_STATS_BH(IPSTATS_MIB_INADDRERRORS); |
308 | kfree_skb(skb); | 495 | kfree_skb(skb); |
@@ -421,8 +608,10 @@ EXPORT_SYMBOL_GPL(ipv6_invert_rthdr); | |||
421 | 608 | ||
422 | /* Router Alert as of RFC 2711 */ | 609 | /* Router Alert as of RFC 2711 */ |
423 | 610 | ||
424 | static int ipv6_hop_ra(struct sk_buff *skb, int optoff) | 611 | static int ipv6_hop_ra(struct sk_buff **skbp, int optoff) |
425 | { | 612 | { |
613 | struct sk_buff *skb = *skbp; | ||
614 | |||
426 | if (skb->nh.raw[optoff+1] == 2) { | 615 | if (skb->nh.raw[optoff+1] == 2) { |
427 | IP6CB(skb)->ra = optoff; | 616 | IP6CB(skb)->ra = optoff; |
428 | return 1; | 617 | return 1; |
@@ -435,8 +624,9 @@ static int ipv6_hop_ra(struct sk_buff *skb, int optoff) | |||
435 | 624 | ||
436 | /* Jumbo payload */ | 625 | /* Jumbo payload */ |
437 | 626 | ||
438 | static int ipv6_hop_jumbo(struct sk_buff *skb, int optoff) | 627 | static int ipv6_hop_jumbo(struct sk_buff **skbp, int optoff) |
439 | { | 628 | { |
629 | struct sk_buff *skb = *skbp; | ||
440 | u32 pkt_len; | 630 | u32 pkt_len; |
441 | 631 | ||
442 | if (skb->nh.raw[optoff+1] != 4 || (optoff&3) != 2) { | 632 | if (skb->nh.raw[optoff+1] != 4 || (optoff&3) != 2) { |
@@ -485,8 +675,9 @@ static struct tlvtype_proc tlvprochopopt_lst[] = { | |||
485 | { -1, } | 675 | { -1, } |
486 | }; | 676 | }; |
487 | 677 | ||
488 | int ipv6_parse_hopopts(struct sk_buff *skb) | 678 | int ipv6_parse_hopopts(struct sk_buff **skbp) |
489 | { | 679 | { |
680 | struct sk_buff *skb = *skbp; | ||
490 | struct inet6_skb_parm *opt = IP6CB(skb); | 681 | struct inet6_skb_parm *opt = IP6CB(skb); |
491 | 682 | ||
492 | /* | 683 | /* |
@@ -502,8 +693,10 @@ int ipv6_parse_hopopts(struct sk_buff *skb) | |||
502 | } | 693 | } |
503 | 694 | ||
504 | opt->hop = sizeof(struct ipv6hdr); | 695 | opt->hop = sizeof(struct ipv6hdr); |
505 | if (ip6_parse_tlv(tlvprochopopt_lst, skb)) { | 696 | if (ip6_parse_tlv(tlvprochopopt_lst, skbp)) { |
697 | skb = *skbp; | ||
506 | skb->h.raw += (skb->h.raw[1]+1)<<3; | 698 | skb->h.raw += (skb->h.raw[1]+1)<<3; |
699 | opt = IP6CB(skb); | ||
507 | opt->nhoff = sizeof(struct ipv6hdr); | 700 | opt->nhoff = sizeof(struct ipv6hdr); |
508 | return 1; | 701 | return 1; |
509 | } | 702 | } |
diff --git a/net/ipv6/fib6_rules.c b/net/ipv6/fib6_rules.c new file mode 100644 index 000000000000..34f5bfaddfc2 --- /dev/null +++ b/net/ipv6/fib6_rules.c | |||
@@ -0,0 +1,305 @@ | |||
1 | /* | ||
2 | * net/ipv6/fib6_rules.c IPv6 Routing Policy Rules | ||
3 | * | ||
4 | * Copyright (C)2003-2006 Helsinki University of Technology | ||
5 | * Copyright (C)2003-2006 USAGI/WIDE Project | ||
6 | * | ||
7 | * This program is free software; you can redistribute it and/or | ||
8 | * modify it under the terms of the GNU General Public License as | ||
9 | * published by the Free Software Foundation, version 2. | ||
10 | * | ||
11 | * Authors | ||
12 | * Thomas Graf <tgraf@suug.ch> | ||
13 | * Ville Nuorvala <vnuorval@tcs.hut.fi> | ||
14 | */ | ||
15 | |||
16 | #include <linux/config.h> | ||
17 | #include <linux/netdevice.h> | ||
18 | |||
19 | #include <net/fib_rules.h> | ||
20 | #include <net/ipv6.h> | ||
21 | #include <net/ip6_route.h> | ||
22 | #include <net/netlink.h> | ||
23 | |||
24 | struct fib6_rule | ||
25 | { | ||
26 | struct fib_rule common; | ||
27 | struct rt6key src; | ||
28 | struct rt6key dst; | ||
29 | #ifdef CONFIG_IPV6_ROUTE_FWMARK | ||
30 | u32 fwmark; | ||
31 | u32 fwmask; | ||
32 | #endif | ||
33 | u8 tclass; | ||
34 | }; | ||
35 | |||
36 | static struct fib_rules_ops fib6_rules_ops; | ||
37 | |||
38 | static struct fib6_rule main_rule = { | ||
39 | .common = { | ||
40 | .refcnt = ATOMIC_INIT(2), | ||
41 | .pref = 0x7FFE, | ||
42 | .action = FR_ACT_TO_TBL, | ||
43 | .table = RT6_TABLE_MAIN, | ||
44 | }, | ||
45 | }; | ||
46 | |||
47 | static struct fib6_rule local_rule = { | ||
48 | .common = { | ||
49 | .refcnt = ATOMIC_INIT(2), | ||
50 | .pref = 0, | ||
51 | .action = FR_ACT_TO_TBL, | ||
52 | .table = RT6_TABLE_LOCAL, | ||
53 | .flags = FIB_RULE_PERMANENT, | ||
54 | }, | ||
55 | }; | ||
56 | |||
57 | static LIST_HEAD(fib6_rules); | ||
58 | |||
59 | struct dst_entry *fib6_rule_lookup(struct flowi *fl, int flags, | ||
60 | pol_lookup_t lookup) | ||
61 | { | ||
62 | struct fib_lookup_arg arg = { | ||
63 | .lookup_ptr = lookup, | ||
64 | }; | ||
65 | |||
66 | fib_rules_lookup(&fib6_rules_ops, fl, flags, &arg); | ||
67 | if (arg.rule) | ||
68 | fib_rule_put(arg.rule); | ||
69 | |||
70 | if (arg.result) | ||
71 | return (struct dst_entry *) arg.result; | ||
72 | |||
73 | dst_hold(&ip6_null_entry.u.dst); | ||
74 | return &ip6_null_entry.u.dst; | ||
75 | } | ||
76 | |||
77 | static int fib6_rule_action(struct fib_rule *rule, struct flowi *flp, | ||
78 | int flags, struct fib_lookup_arg *arg) | ||
79 | { | ||
80 | struct rt6_info *rt = NULL; | ||
81 | struct fib6_table *table; | ||
82 | pol_lookup_t lookup = arg->lookup_ptr; | ||
83 | |||
84 | switch (rule->action) { | ||
85 | case FR_ACT_TO_TBL: | ||
86 | break; | ||
87 | case FR_ACT_UNREACHABLE: | ||
88 | rt = &ip6_null_entry; | ||
89 | goto discard_pkt; | ||
90 | default: | ||
91 | case FR_ACT_BLACKHOLE: | ||
92 | rt = &ip6_blk_hole_entry; | ||
93 | goto discard_pkt; | ||
94 | case FR_ACT_PROHIBIT: | ||
95 | rt = &ip6_prohibit_entry; | ||
96 | goto discard_pkt; | ||
97 | } | ||
98 | |||
99 | table = fib6_get_table(rule->table); | ||
100 | if (table) | ||
101 | rt = lookup(table, flp, flags); | ||
102 | |||
103 | if (rt != &ip6_null_entry) | ||
104 | goto out; | ||
105 | dst_release(&rt->u.dst); | ||
106 | rt = NULL; | ||
107 | goto out; | ||
108 | |||
109 | discard_pkt: | ||
110 | dst_hold(&rt->u.dst); | ||
111 | out: | ||
112 | arg->result = rt; | ||
113 | return rt == NULL ? -EAGAIN : 0; | ||
114 | } | ||
115 | |||
116 | |||
117 | static int fib6_rule_match(struct fib_rule *rule, struct flowi *fl, int flags) | ||
118 | { | ||
119 | struct fib6_rule *r = (struct fib6_rule *) rule; | ||
120 | |||
121 | if (!ipv6_prefix_equal(&fl->fl6_dst, &r->dst.addr, r->dst.plen)) | ||
122 | return 0; | ||
123 | |||
124 | if ((flags & RT6_LOOKUP_F_HAS_SADDR) && | ||
125 | !ipv6_prefix_equal(&fl->fl6_src, &r->src.addr, r->src.plen)) | ||
126 | return 0; | ||
127 | |||
128 | if (r->tclass && r->tclass != ((ntohl(fl->fl6_flowlabel) >> 20) & 0xff)) | ||
129 | return 0; | ||
130 | |||
131 | #ifdef CONFIG_IPV6_ROUTE_FWMARK | ||
132 | if ((r->fwmark ^ fl->fl6_fwmark) & r->fwmask) | ||
133 | return 0; | ||
134 | #endif | ||
135 | |||
136 | return 1; | ||
137 | } | ||
138 | |||
139 | static struct nla_policy fib6_rule_policy[FRA_MAX+1] __read_mostly = { | ||
140 | [FRA_IFNAME] = { .type = NLA_STRING, .len = IFNAMSIZ - 1 }, | ||
141 | [FRA_PRIORITY] = { .type = NLA_U32 }, | ||
142 | [FRA_SRC] = { .len = sizeof(struct in6_addr) }, | ||
143 | [FRA_DST] = { .len = sizeof(struct in6_addr) }, | ||
144 | [FRA_FWMARK] = { .type = NLA_U32 }, | ||
145 | [FRA_FWMASK] = { .type = NLA_U32 }, | ||
146 | [FRA_TABLE] = { .type = NLA_U32 }, | ||
147 | }; | ||
148 | |||
149 | static int fib6_rule_configure(struct fib_rule *rule, struct sk_buff *skb, | ||
150 | struct nlmsghdr *nlh, struct fib_rule_hdr *frh, | ||
151 | struct nlattr **tb) | ||
152 | { | ||
153 | int err = -EINVAL; | ||
154 | struct fib6_rule *rule6 = (struct fib6_rule *) rule; | ||
155 | |||
156 | if (frh->src_len > 128 || frh->dst_len > 128 || | ||
157 | (frh->tos & ~IPV6_FLOWINFO_MASK)) | ||
158 | goto errout; | ||
159 | |||
160 | if (rule->action == FR_ACT_TO_TBL) { | ||
161 | if (rule->table == RT6_TABLE_UNSPEC) | ||
162 | goto errout; | ||
163 | |||
164 | if (fib6_new_table(rule->table) == NULL) { | ||
165 | err = -ENOBUFS; | ||
166 | goto errout; | ||
167 | } | ||
168 | } | ||
169 | |||
170 | if (tb[FRA_SRC]) | ||
171 | nla_memcpy(&rule6->src.addr, tb[FRA_SRC], | ||
172 | sizeof(struct in6_addr)); | ||
173 | |||
174 | if (tb[FRA_DST]) | ||
175 | nla_memcpy(&rule6->dst.addr, tb[FRA_DST], | ||
176 | sizeof(struct in6_addr)); | ||
177 | |||
178 | #ifdef CONFIG_IPV6_ROUTE_FWMARK | ||
179 | if (tb[FRA_FWMARK]) { | ||
180 | rule6->fwmark = nla_get_u32(tb[FRA_FWMARK]); | ||
181 | if (rule6->fwmark) { | ||
182 | /* | ||
183 | * if the mark value is non-zero, | ||
184 | * all bits are compared by default | ||
185 | * unless a mask is explicitly specified. | ||
186 | */ | ||
187 | rule6->fwmask = 0xFFFFFFFF; | ||
188 | } | ||
189 | } | ||
190 | |||
191 | if (tb[FRA_FWMASK]) | ||
192 | rule6->fwmask = nla_get_u32(tb[FRA_FWMASK]); | ||
193 | #endif | ||
194 | |||
195 | rule6->src.plen = frh->src_len; | ||
196 | rule6->dst.plen = frh->dst_len; | ||
197 | rule6->tclass = frh->tos; | ||
198 | |||
199 | err = 0; | ||
200 | errout: | ||
201 | return err; | ||
202 | } | ||
203 | |||
204 | static int fib6_rule_compare(struct fib_rule *rule, struct fib_rule_hdr *frh, | ||
205 | struct nlattr **tb) | ||
206 | { | ||
207 | struct fib6_rule *rule6 = (struct fib6_rule *) rule; | ||
208 | |||
209 | if (frh->src_len && (rule6->src.plen != frh->src_len)) | ||
210 | return 0; | ||
211 | |||
212 | if (frh->dst_len && (rule6->dst.plen != frh->dst_len)) | ||
213 | return 0; | ||
214 | |||
215 | if (frh->tos && (rule6->tclass != frh->tos)) | ||
216 | return 0; | ||
217 | |||
218 | if (tb[FRA_SRC] && | ||
219 | nla_memcmp(tb[FRA_SRC], &rule6->src.addr, sizeof(struct in6_addr))) | ||
220 | return 0; | ||
221 | |||
222 | if (tb[FRA_DST] && | ||
223 | nla_memcmp(tb[FRA_DST], &rule6->dst.addr, sizeof(struct in6_addr))) | ||
224 | return 0; | ||
225 | |||
226 | #ifdef CONFIG_IPV6_ROUTE_FWMARK | ||
227 | if (tb[FRA_FWMARK] && (rule6->fwmark != nla_get_u32(tb[FRA_FWMARK]))) | ||
228 | return 0; | ||
229 | |||
230 | if (tb[FRA_FWMASK] && (rule6->fwmask != nla_get_u32(tb[FRA_FWMASK]))) | ||
231 | return 0; | ||
232 | #endif | ||
233 | |||
234 | return 1; | ||
235 | } | ||
236 | |||
237 | static int fib6_rule_fill(struct fib_rule *rule, struct sk_buff *skb, | ||
238 | struct nlmsghdr *nlh, struct fib_rule_hdr *frh) | ||
239 | { | ||
240 | struct fib6_rule *rule6 = (struct fib6_rule *) rule; | ||
241 | |||
242 | frh->family = AF_INET6; | ||
243 | frh->dst_len = rule6->dst.plen; | ||
244 | frh->src_len = rule6->src.plen; | ||
245 | frh->tos = rule6->tclass; | ||
246 | |||
247 | if (rule6->dst.plen) | ||
248 | NLA_PUT(skb, FRA_DST, sizeof(struct in6_addr), | ||
249 | &rule6->dst.addr); | ||
250 | |||
251 | if (rule6->src.plen) | ||
252 | NLA_PUT(skb, FRA_SRC, sizeof(struct in6_addr), | ||
253 | &rule6->src.addr); | ||
254 | |||
255 | #ifdef CONFIG_IPV6_ROUTE_FWMARK | ||
256 | if (rule6->fwmark) | ||
257 | NLA_PUT_U32(skb, FRA_FWMARK, rule6->fwmark); | ||
258 | |||
259 | if (rule6->fwmask || rule6->fwmark) | ||
260 | NLA_PUT_U32(skb, FRA_FWMASK, rule6->fwmask); | ||
261 | #endif | ||
262 | |||
263 | return 0; | ||
264 | |||
265 | nla_put_failure: | ||
266 | return -ENOBUFS; | ||
267 | } | ||
268 | |||
269 | int fib6_rules_dump(struct sk_buff *skb, struct netlink_callback *cb) | ||
270 | { | ||
271 | return fib_rules_dump(skb, cb, AF_INET6); | ||
272 | } | ||
273 | |||
274 | static u32 fib6_rule_default_pref(void) | ||
275 | { | ||
276 | return 0x3FFF; | ||
277 | } | ||
278 | |||
279 | static struct fib_rules_ops fib6_rules_ops = { | ||
280 | .family = AF_INET6, | ||
281 | .rule_size = sizeof(struct fib6_rule), | ||
282 | .action = fib6_rule_action, | ||
283 | .match = fib6_rule_match, | ||
284 | .configure = fib6_rule_configure, | ||
285 | .compare = fib6_rule_compare, | ||
286 | .fill = fib6_rule_fill, | ||
287 | .default_pref = fib6_rule_default_pref, | ||
288 | .nlgroup = RTNLGRP_IPV6_RULE, | ||
289 | .policy = fib6_rule_policy, | ||
290 | .rules_list = &fib6_rules, | ||
291 | .owner = THIS_MODULE, | ||
292 | }; | ||
293 | |||
294 | void __init fib6_rules_init(void) | ||
295 | { | ||
296 | list_add_tail(&local_rule.common.list, &fib6_rules); | ||
297 | list_add_tail(&main_rule.common.list, &fib6_rules); | ||
298 | |||
299 | fib_rules_register(&fib6_rules_ops); | ||
300 | } | ||
301 | |||
302 | void fib6_rules_cleanup(void) | ||
303 | { | ||
304 | fib_rules_unregister(&fib6_rules_ops); | ||
305 | } | ||
diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c index 356a8a7ef22a..4ec876066b3f 100644 --- a/net/ipv6/icmp.c +++ b/net/ipv6/icmp.c | |||
@@ -151,7 +151,7 @@ static int is_ineligible(struct sk_buff *skb) | |||
151 | return 0; | 151 | return 0; |
152 | } | 152 | } |
153 | 153 | ||
154 | static int sysctl_icmpv6_time = 1*HZ; | 154 | static int sysctl_icmpv6_time __read_mostly = 1*HZ; |
155 | 155 | ||
156 | /* | 156 | /* |
157 | * Check the ICMP output rate limit | 157 | * Check the ICMP output rate limit |
@@ -273,6 +273,29 @@ static int icmpv6_getfrag(void *from, char *to, int offset, int len, int odd, st | |||
273 | return 0; | 273 | return 0; |
274 | } | 274 | } |
275 | 275 | ||
276 | #ifdef CONFIG_IPV6_MIP6 | ||
277 | static void mip6_addr_swap(struct sk_buff *skb) | ||
278 | { | ||
279 | struct ipv6hdr *iph = skb->nh.ipv6h; | ||
280 | struct inet6_skb_parm *opt = IP6CB(skb); | ||
281 | struct ipv6_destopt_hao *hao; | ||
282 | struct in6_addr tmp; | ||
283 | int off; | ||
284 | |||
285 | if (opt->dsthao) { | ||
286 | off = ipv6_find_tlv(skb, opt->dsthao, IPV6_TLV_HAO); | ||
287 | if (likely(off >= 0)) { | ||
288 | hao = (struct ipv6_destopt_hao *)(skb->nh.raw + off); | ||
289 | ipv6_addr_copy(&tmp, &iph->saddr); | ||
290 | ipv6_addr_copy(&iph->saddr, &hao->addr); | ||
291 | ipv6_addr_copy(&hao->addr, &tmp); | ||
292 | } | ||
293 | } | ||
294 | } | ||
295 | #else | ||
296 | static inline void mip6_addr_swap(struct sk_buff *skb) {} | ||
297 | #endif | ||
298 | |||
276 | /* | 299 | /* |
277 | * Send an ICMP message in response to a packet in error | 300 | * Send an ICMP message in response to a packet in error |
278 | */ | 301 | */ |
@@ -350,6 +373,8 @@ void icmpv6_send(struct sk_buff *skb, int type, int code, __u32 info, | |||
350 | return; | 373 | return; |
351 | } | 374 | } |
352 | 375 | ||
376 | mip6_addr_swap(skb); | ||
377 | |||
353 | memset(&fl, 0, sizeof(fl)); | 378 | memset(&fl, 0, sizeof(fl)); |
354 | fl.proto = IPPROTO_ICMPV6; | 379 | fl.proto = IPPROTO_ICMPV6; |
355 | ipv6_addr_copy(&fl.fl6_dst, &hdr->saddr); | 380 | ipv6_addr_copy(&fl.fl6_dst, &hdr->saddr); |
@@ -358,6 +383,7 @@ void icmpv6_send(struct sk_buff *skb, int type, int code, __u32 info, | |||
358 | fl.oif = iif; | 383 | fl.oif = iif; |
359 | fl.fl_icmp_type = type; | 384 | fl.fl_icmp_type = type; |
360 | fl.fl_icmp_code = code; | 385 | fl.fl_icmp_code = code; |
386 | security_skb_classify_flow(skb, &fl); | ||
361 | 387 | ||
362 | if (icmpv6_xmit_lock()) | 388 | if (icmpv6_xmit_lock()) |
363 | return; | 389 | return; |
@@ -472,6 +498,7 @@ static void icmpv6_echo_reply(struct sk_buff *skb) | |||
472 | ipv6_addr_copy(&fl.fl6_src, saddr); | 498 | ipv6_addr_copy(&fl.fl6_src, saddr); |
473 | fl.oif = skb->dev->ifindex; | 499 | fl.oif = skb->dev->ifindex; |
474 | fl.fl_icmp_type = ICMPV6_ECHO_REPLY; | 500 | fl.fl_icmp_type = ICMPV6_ECHO_REPLY; |
501 | security_skb_classify_flow(skb, &fl); | ||
475 | 502 | ||
476 | if (icmpv6_xmit_lock()) | 503 | if (icmpv6_xmit_lock()) |
477 | return; | 504 | return; |
@@ -604,7 +631,7 @@ static int icmpv6_rcv(struct sk_buff **pskb) | |||
604 | 631 | ||
605 | /* Perform checksum. */ | 632 | /* Perform checksum. */ |
606 | switch (skb->ip_summed) { | 633 | switch (skb->ip_summed) { |
607 | case CHECKSUM_HW: | 634 | case CHECKSUM_COMPLETE: |
608 | if (!csum_ipv6_magic(saddr, daddr, skb->len, IPPROTO_ICMPV6, | 635 | if (!csum_ipv6_magic(saddr, daddr, skb->len, IPPROTO_ICMPV6, |
609 | skb->csum)) | 636 | skb->csum)) |
610 | break; | 637 | break; |
diff --git a/net/ipv6/inet6_connection_sock.c b/net/ipv6/inet6_connection_sock.c index bf491077b822..827f41d1478b 100644 --- a/net/ipv6/inet6_connection_sock.c +++ b/net/ipv6/inet6_connection_sock.c | |||
@@ -157,6 +157,7 @@ int inet6_csk_xmit(struct sk_buff *skb, int ipfragok) | |||
157 | fl.oif = sk->sk_bound_dev_if; | 157 | fl.oif = sk->sk_bound_dev_if; |
158 | fl.fl_ip_sport = inet->sport; | 158 | fl.fl_ip_sport = inet->sport; |
159 | fl.fl_ip_dport = inet->dport; | 159 | fl.fl_ip_dport = inet->dport; |
160 | security_sk_classify_flow(sk, &fl); | ||
160 | 161 | ||
161 | if (np->opt && np->opt->srcrt) { | 162 | if (np->opt && np->opt->srcrt) { |
162 | struct rt0_hdr *rt0 = (struct rt0_hdr *)np->opt->srcrt; | 163 | struct rt0_hdr *rt0 = (struct rt0_hdr *)np->opt->srcrt; |
@@ -185,7 +186,7 @@ int inet6_csk_xmit(struct sk_buff *skb, int ipfragok) | |||
185 | return err; | 186 | return err; |
186 | } | 187 | } |
187 | 188 | ||
188 | __ip6_dst_store(sk, dst, NULL); | 189 | __ip6_dst_store(sk, dst, NULL, NULL); |
189 | } | 190 | } |
190 | 191 | ||
191 | skb->dst = dst_clone(dst); | 192 | skb->dst = dst_clone(dst); |
diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c index 764221220afd..8fcae7a6510b 100644 --- a/net/ipv6/ip6_fib.c +++ b/net/ipv6/ip6_fib.c | |||
@@ -18,6 +18,7 @@ | |||
18 | * Yuji SEKIYA @USAGI: Support default route on router node; | 18 | * Yuji SEKIYA @USAGI: Support default route on router node; |
19 | * remove ip6_null_entry from the top of | 19 | * remove ip6_null_entry from the top of |
20 | * routing table. | 20 | * routing table. |
21 | * Ville Nuorvala: Fixed routing subtrees. | ||
21 | */ | 22 | */ |
22 | #include <linux/errno.h> | 23 | #include <linux/errno.h> |
23 | #include <linux/types.h> | 24 | #include <linux/types.h> |
@@ -26,6 +27,7 @@ | |||
26 | #include <linux/netdevice.h> | 27 | #include <linux/netdevice.h> |
27 | #include <linux/in6.h> | 28 | #include <linux/in6.h> |
28 | #include <linux/init.h> | 29 | #include <linux/init.h> |
30 | #include <linux/list.h> | ||
29 | 31 | ||
30 | #ifdef CONFIG_PROC_FS | 32 | #ifdef CONFIG_PROC_FS |
31 | #include <linux/proc_fs.h> | 33 | #include <linux/proc_fs.h> |
@@ -68,19 +70,19 @@ struct fib6_cleaner_t | |||
68 | void *arg; | 70 | void *arg; |
69 | }; | 71 | }; |
70 | 72 | ||
71 | DEFINE_RWLOCK(fib6_walker_lock); | 73 | static DEFINE_RWLOCK(fib6_walker_lock); |
72 | |||
73 | 74 | ||
74 | #ifdef CONFIG_IPV6_SUBTREES | 75 | #ifdef CONFIG_IPV6_SUBTREES |
75 | #define FWS_INIT FWS_S | 76 | #define FWS_INIT FWS_S |
76 | #define SUBTREE(fn) ((fn)->subtree) | ||
77 | #else | 77 | #else |
78 | #define FWS_INIT FWS_L | 78 | #define FWS_INIT FWS_L |
79 | #define SUBTREE(fn) NULL | ||
80 | #endif | 79 | #endif |
81 | 80 | ||
82 | static void fib6_prune_clones(struct fib6_node *fn, struct rt6_info *rt); | 81 | static void fib6_prune_clones(struct fib6_node *fn, struct rt6_info *rt); |
82 | static struct rt6_info * fib6_find_prefix(struct fib6_node *fn); | ||
83 | static struct fib6_node * fib6_repair_tree(struct fib6_node *fn); | 83 | static struct fib6_node * fib6_repair_tree(struct fib6_node *fn); |
84 | static int fib6_walk(struct fib6_walker_t *w); | ||
85 | static int fib6_walk_continue(struct fib6_walker_t *w); | ||
84 | 86 | ||
85 | /* | 87 | /* |
86 | * A routing update causes an increase of the serial number on the | 88 | * A routing update causes an increase of the serial number on the |
@@ -93,13 +95,31 @@ static __u32 rt_sernum; | |||
93 | 95 | ||
94 | static DEFINE_TIMER(ip6_fib_timer, fib6_run_gc, 0, 0); | 96 | static DEFINE_TIMER(ip6_fib_timer, fib6_run_gc, 0, 0); |
95 | 97 | ||
96 | struct fib6_walker_t fib6_walker_list = { | 98 | static struct fib6_walker_t fib6_walker_list = { |
97 | .prev = &fib6_walker_list, | 99 | .prev = &fib6_walker_list, |
98 | .next = &fib6_walker_list, | 100 | .next = &fib6_walker_list, |
99 | }; | 101 | }; |
100 | 102 | ||
101 | #define FOR_WALKERS(w) for ((w)=fib6_walker_list.next; (w) != &fib6_walker_list; (w)=(w)->next) | 103 | #define FOR_WALKERS(w) for ((w)=fib6_walker_list.next; (w) != &fib6_walker_list; (w)=(w)->next) |
102 | 104 | ||
105 | static inline void fib6_walker_link(struct fib6_walker_t *w) | ||
106 | { | ||
107 | write_lock_bh(&fib6_walker_lock); | ||
108 | w->next = fib6_walker_list.next; | ||
109 | w->prev = &fib6_walker_list; | ||
110 | w->next->prev = w; | ||
111 | w->prev->next = w; | ||
112 | write_unlock_bh(&fib6_walker_lock); | ||
113 | } | ||
114 | |||
115 | static inline void fib6_walker_unlink(struct fib6_walker_t *w) | ||
116 | { | ||
117 | write_lock_bh(&fib6_walker_lock); | ||
118 | w->next->prev = w->prev; | ||
119 | w->prev->next = w->next; | ||
120 | w->prev = w->next = w; | ||
121 | write_unlock_bh(&fib6_walker_lock); | ||
122 | } | ||
103 | static __inline__ u32 fib6_new_sernum(void) | 123 | static __inline__ u32 fib6_new_sernum(void) |
104 | { | 124 | { |
105 | u32 n = ++rt_sernum; | 125 | u32 n = ++rt_sernum; |
@@ -147,6 +167,253 @@ static __inline__ void rt6_release(struct rt6_info *rt) | |||
147 | dst_free(&rt->u.dst); | 167 | dst_free(&rt->u.dst); |
148 | } | 168 | } |
149 | 169 | ||
170 | static struct fib6_table fib6_main_tbl = { | ||
171 | .tb6_id = RT6_TABLE_MAIN, | ||
172 | .tb6_lock = RW_LOCK_UNLOCKED, | ||
173 | .tb6_root = { | ||
174 | .leaf = &ip6_null_entry, | ||
175 | .fn_flags = RTN_ROOT | RTN_TL_ROOT | RTN_RTINFO, | ||
176 | }, | ||
177 | }; | ||
178 | |||
179 | #ifdef CONFIG_IPV6_MULTIPLE_TABLES | ||
180 | #define FIB_TABLE_HASHSZ 256 | ||
181 | #else | ||
182 | #define FIB_TABLE_HASHSZ 1 | ||
183 | #endif | ||
184 | static struct hlist_head fib_table_hash[FIB_TABLE_HASHSZ]; | ||
185 | |||
186 | static void fib6_link_table(struct fib6_table *tb) | ||
187 | { | ||
188 | unsigned int h; | ||
189 | |||
190 | h = tb->tb6_id & (FIB_TABLE_HASHSZ - 1); | ||
191 | |||
192 | /* | ||
193 | * No protection necessary, this is the only list mutatation | ||
194 | * operation, tables never disappear once they exist. | ||
195 | */ | ||
196 | hlist_add_head_rcu(&tb->tb6_hlist, &fib_table_hash[h]); | ||
197 | } | ||
198 | |||
199 | #ifdef CONFIG_IPV6_MULTIPLE_TABLES | ||
200 | static struct fib6_table fib6_local_tbl = { | ||
201 | .tb6_id = RT6_TABLE_LOCAL, | ||
202 | .tb6_lock = RW_LOCK_UNLOCKED, | ||
203 | .tb6_root = { | ||
204 | .leaf = &ip6_null_entry, | ||
205 | .fn_flags = RTN_ROOT | RTN_TL_ROOT | RTN_RTINFO, | ||
206 | }, | ||
207 | }; | ||
208 | |||
209 | static struct fib6_table *fib6_alloc_table(u32 id) | ||
210 | { | ||
211 | struct fib6_table *table; | ||
212 | |||
213 | table = kzalloc(sizeof(*table), GFP_ATOMIC); | ||
214 | if (table != NULL) { | ||
215 | table->tb6_id = id; | ||
216 | table->tb6_lock = RW_LOCK_UNLOCKED; | ||
217 | table->tb6_root.leaf = &ip6_null_entry; | ||
218 | table->tb6_root.fn_flags = RTN_ROOT | RTN_TL_ROOT | RTN_RTINFO; | ||
219 | } | ||
220 | |||
221 | return table; | ||
222 | } | ||
223 | |||
224 | struct fib6_table *fib6_new_table(u32 id) | ||
225 | { | ||
226 | struct fib6_table *tb; | ||
227 | |||
228 | if (id == 0) | ||
229 | id = RT6_TABLE_MAIN; | ||
230 | tb = fib6_get_table(id); | ||
231 | if (tb) | ||
232 | return tb; | ||
233 | |||
234 | tb = fib6_alloc_table(id); | ||
235 | if (tb != NULL) | ||
236 | fib6_link_table(tb); | ||
237 | |||
238 | return tb; | ||
239 | } | ||
240 | |||
241 | struct fib6_table *fib6_get_table(u32 id) | ||
242 | { | ||
243 | struct fib6_table *tb; | ||
244 | struct hlist_node *node; | ||
245 | unsigned int h; | ||
246 | |||
247 | if (id == 0) | ||
248 | id = RT6_TABLE_MAIN; | ||
249 | h = id & (FIB_TABLE_HASHSZ - 1); | ||
250 | rcu_read_lock(); | ||
251 | hlist_for_each_entry_rcu(tb, node, &fib_table_hash[h], tb6_hlist) { | ||
252 | if (tb->tb6_id == id) { | ||
253 | rcu_read_unlock(); | ||
254 | return tb; | ||
255 | } | ||
256 | } | ||
257 | rcu_read_unlock(); | ||
258 | |||
259 | return NULL; | ||
260 | } | ||
261 | |||
262 | static void __init fib6_tables_init(void) | ||
263 | { | ||
264 | fib6_link_table(&fib6_main_tbl); | ||
265 | fib6_link_table(&fib6_local_tbl); | ||
266 | } | ||
267 | |||
268 | #else | ||
269 | |||
270 | struct fib6_table *fib6_new_table(u32 id) | ||
271 | { | ||
272 | return fib6_get_table(id); | ||
273 | } | ||
274 | |||
275 | struct fib6_table *fib6_get_table(u32 id) | ||
276 | { | ||
277 | return &fib6_main_tbl; | ||
278 | } | ||
279 | |||
280 | struct dst_entry *fib6_rule_lookup(struct flowi *fl, int flags, | ||
281 | pol_lookup_t lookup) | ||
282 | { | ||
283 | return (struct dst_entry *) lookup(&fib6_main_tbl, fl, flags); | ||
284 | } | ||
285 | |||
286 | static void __init fib6_tables_init(void) | ||
287 | { | ||
288 | fib6_link_table(&fib6_main_tbl); | ||
289 | } | ||
290 | |||
291 | #endif | ||
292 | |||
293 | static int fib6_dump_node(struct fib6_walker_t *w) | ||
294 | { | ||
295 | int res; | ||
296 | struct rt6_info *rt; | ||
297 | |||
298 | for (rt = w->leaf; rt; rt = rt->u.next) { | ||
299 | res = rt6_dump_route(rt, w->args); | ||
300 | if (res < 0) { | ||
301 | /* Frame is full, suspend walking */ | ||
302 | w->leaf = rt; | ||
303 | return 1; | ||
304 | } | ||
305 | BUG_TRAP(res!=0); | ||
306 | } | ||
307 | w->leaf = NULL; | ||
308 | return 0; | ||
309 | } | ||
310 | |||
311 | static void fib6_dump_end(struct netlink_callback *cb) | ||
312 | { | ||
313 | struct fib6_walker_t *w = (void*)cb->args[2]; | ||
314 | |||
315 | if (w) { | ||
316 | cb->args[2] = 0; | ||
317 | kfree(w); | ||
318 | } | ||
319 | cb->done = (void*)cb->args[3]; | ||
320 | cb->args[1] = 3; | ||
321 | } | ||
322 | |||
323 | static int fib6_dump_done(struct netlink_callback *cb) | ||
324 | { | ||
325 | fib6_dump_end(cb); | ||
326 | return cb->done ? cb->done(cb) : 0; | ||
327 | } | ||
328 | |||
329 | static int fib6_dump_table(struct fib6_table *table, struct sk_buff *skb, | ||
330 | struct netlink_callback *cb) | ||
331 | { | ||
332 | struct fib6_walker_t *w; | ||
333 | int res; | ||
334 | |||
335 | w = (void *)cb->args[2]; | ||
336 | w->root = &table->tb6_root; | ||
337 | |||
338 | if (cb->args[4] == 0) { | ||
339 | read_lock_bh(&table->tb6_lock); | ||
340 | res = fib6_walk(w); | ||
341 | read_unlock_bh(&table->tb6_lock); | ||
342 | if (res > 0) | ||
343 | cb->args[4] = 1; | ||
344 | } else { | ||
345 | read_lock_bh(&table->tb6_lock); | ||
346 | res = fib6_walk_continue(w); | ||
347 | read_unlock_bh(&table->tb6_lock); | ||
348 | if (res != 0) { | ||
349 | if (res < 0) | ||
350 | fib6_walker_unlink(w); | ||
351 | goto end; | ||
352 | } | ||
353 | fib6_walker_unlink(w); | ||
354 | cb->args[4] = 0; | ||
355 | } | ||
356 | end: | ||
357 | return res; | ||
358 | } | ||
359 | |||
360 | int inet6_dump_fib(struct sk_buff *skb, struct netlink_callback *cb) | ||
361 | { | ||
362 | unsigned int h, s_h; | ||
363 | unsigned int e = 0, s_e; | ||
364 | struct rt6_rtnl_dump_arg arg; | ||
365 | struct fib6_walker_t *w; | ||
366 | struct fib6_table *tb; | ||
367 | struct hlist_node *node; | ||
368 | int res = 0; | ||
369 | |||
370 | s_h = cb->args[0]; | ||
371 | s_e = cb->args[1]; | ||
372 | |||
373 | w = (void *)cb->args[2]; | ||
374 | if (w == NULL) { | ||
375 | /* New dump: | ||
376 | * | ||
377 | * 1. hook callback destructor. | ||
378 | */ | ||
379 | cb->args[3] = (long)cb->done; | ||
380 | cb->done = fib6_dump_done; | ||
381 | |||
382 | /* | ||
383 | * 2. allocate and initialize walker. | ||
384 | */ | ||
385 | w = kzalloc(sizeof(*w), GFP_ATOMIC); | ||
386 | if (w == NULL) | ||
387 | return -ENOMEM; | ||
388 | w->func = fib6_dump_node; | ||
389 | cb->args[2] = (long)w; | ||
390 | } | ||
391 | |||
392 | arg.skb = skb; | ||
393 | arg.cb = cb; | ||
394 | w->args = &arg; | ||
395 | |||
396 | for (h = s_h; h < FIB_TABLE_HASHSZ; h++, s_e = 0) { | ||
397 | e = 0; | ||
398 | hlist_for_each_entry(tb, node, &fib_table_hash[h], tb6_hlist) { | ||
399 | if (e < s_e) | ||
400 | goto next; | ||
401 | res = fib6_dump_table(tb, skb, cb); | ||
402 | if (res != 0) | ||
403 | goto out; | ||
404 | next: | ||
405 | e++; | ||
406 | } | ||
407 | } | ||
408 | out: | ||
409 | cb->args[1] = e; | ||
410 | cb->args[0] = h; | ||
411 | |||
412 | res = res < 0 ? res : skb->len; | ||
413 | if (res <= 0) | ||
414 | fib6_dump_end(cb); | ||
415 | return res; | ||
416 | } | ||
150 | 417 | ||
151 | /* | 418 | /* |
152 | * Routing Table | 419 | * Routing Table |
@@ -343,7 +610,7 @@ insert_above: | |||
343 | */ | 610 | */ |
344 | 611 | ||
345 | static int fib6_add_rt2node(struct fib6_node *fn, struct rt6_info *rt, | 612 | static int fib6_add_rt2node(struct fib6_node *fn, struct rt6_info *rt, |
346 | struct nlmsghdr *nlh, struct netlink_skb_parms *req) | 613 | struct nl_info *info) |
347 | { | 614 | { |
348 | struct rt6_info *iter = NULL; | 615 | struct rt6_info *iter = NULL; |
349 | struct rt6_info **ins; | 616 | struct rt6_info **ins; |
@@ -398,7 +665,7 @@ out: | |||
398 | *ins = rt; | 665 | *ins = rt; |
399 | rt->rt6i_node = fn; | 666 | rt->rt6i_node = fn; |
400 | atomic_inc(&rt->rt6i_ref); | 667 | atomic_inc(&rt->rt6i_ref); |
401 | inet6_rt_notify(RTM_NEWROUTE, rt, nlh, req); | 668 | inet6_rt_notify(RTM_NEWROUTE, rt, info); |
402 | rt6_stats.fib_rt_entries++; | 669 | rt6_stats.fib_rt_entries++; |
403 | 670 | ||
404 | if ((fn->fn_flags & RTN_RTINFO) == 0) { | 671 | if ((fn->fn_flags & RTN_RTINFO) == 0) { |
@@ -428,10 +695,9 @@ void fib6_force_start_gc(void) | |||
428 | * with source addr info in sub-trees | 695 | * with source addr info in sub-trees |
429 | */ | 696 | */ |
430 | 697 | ||
431 | int fib6_add(struct fib6_node *root, struct rt6_info *rt, | 698 | int fib6_add(struct fib6_node *root, struct rt6_info *rt, struct nl_info *info) |
432 | struct nlmsghdr *nlh, void *_rtattr, struct netlink_skb_parms *req) | ||
433 | { | 699 | { |
434 | struct fib6_node *fn; | 700 | struct fib6_node *fn, *pn = NULL; |
435 | int err = -ENOMEM; | 701 | int err = -ENOMEM; |
436 | 702 | ||
437 | fn = fib6_add_1(root, &rt->rt6i_dst.addr, sizeof(struct in6_addr), | 703 | fn = fib6_add_1(root, &rt->rt6i_dst.addr, sizeof(struct in6_addr), |
@@ -440,6 +706,8 @@ int fib6_add(struct fib6_node *root, struct rt6_info *rt, | |||
440 | if (fn == NULL) | 706 | if (fn == NULL) |
441 | goto out; | 707 | goto out; |
442 | 708 | ||
709 | pn = fn; | ||
710 | |||
443 | #ifdef CONFIG_IPV6_SUBTREES | 711 | #ifdef CONFIG_IPV6_SUBTREES |
444 | if (rt->rt6i_src.plen) { | 712 | if (rt->rt6i_src.plen) { |
445 | struct fib6_node *sn; | 713 | struct fib6_node *sn; |
@@ -485,10 +753,6 @@ int fib6_add(struct fib6_node *root, struct rt6_info *rt, | |||
485 | /* Now link new subtree to main tree */ | 753 | /* Now link new subtree to main tree */ |
486 | sfn->parent = fn; | 754 | sfn->parent = fn; |
487 | fn->subtree = sfn; | 755 | fn->subtree = sfn; |
488 | if (fn->leaf == NULL) { | ||
489 | fn->leaf = rt; | ||
490 | atomic_inc(&rt->rt6i_ref); | ||
491 | } | ||
492 | } else { | 756 | } else { |
493 | sn = fib6_add_1(fn->subtree, &rt->rt6i_src.addr, | 757 | sn = fib6_add_1(fn->subtree, &rt->rt6i_src.addr, |
494 | sizeof(struct in6_addr), rt->rt6i_src.plen, | 758 | sizeof(struct in6_addr), rt->rt6i_src.plen, |
@@ -498,21 +762,42 @@ int fib6_add(struct fib6_node *root, struct rt6_info *rt, | |||
498 | goto st_failure; | 762 | goto st_failure; |
499 | } | 763 | } |
500 | 764 | ||
765 | if (fn->leaf == NULL) { | ||
766 | fn->leaf = rt; | ||
767 | atomic_inc(&rt->rt6i_ref); | ||
768 | } | ||
501 | fn = sn; | 769 | fn = sn; |
502 | } | 770 | } |
503 | #endif | 771 | #endif |
504 | 772 | ||
505 | err = fib6_add_rt2node(fn, rt, nlh, req); | 773 | err = fib6_add_rt2node(fn, rt, info); |
506 | 774 | ||
507 | if (err == 0) { | 775 | if (err == 0) { |
508 | fib6_start_gc(rt); | 776 | fib6_start_gc(rt); |
509 | if (!(rt->rt6i_flags&RTF_CACHE)) | 777 | if (!(rt->rt6i_flags&RTF_CACHE)) |
510 | fib6_prune_clones(fn, rt); | 778 | fib6_prune_clones(pn, rt); |
511 | } | 779 | } |
512 | 780 | ||
513 | out: | 781 | out: |
514 | if (err) | 782 | if (err) { |
783 | #ifdef CONFIG_IPV6_SUBTREES | ||
784 | /* | ||
785 | * If fib6_add_1 has cleared the old leaf pointer in the | ||
786 | * super-tree leaf node we have to find a new one for it. | ||
787 | */ | ||
788 | if (pn != fn && !pn->leaf && !(pn->fn_flags & RTN_RTINFO)) { | ||
789 | pn->leaf = fib6_find_prefix(pn); | ||
790 | #if RT6_DEBUG >= 2 | ||
791 | if (!pn->leaf) { | ||
792 | BUG_TRAP(pn->leaf != NULL); | ||
793 | pn->leaf = &ip6_null_entry; | ||
794 | } | ||
795 | #endif | ||
796 | atomic_inc(&pn->leaf->rt6i_ref); | ||
797 | } | ||
798 | #endif | ||
515 | dst_free(&rt->u.dst); | 799 | dst_free(&rt->u.dst); |
800 | } | ||
516 | return err; | 801 | return err; |
517 | 802 | ||
518 | #ifdef CONFIG_IPV6_SUBTREES | 803 | #ifdef CONFIG_IPV6_SUBTREES |
@@ -543,6 +828,9 @@ static struct fib6_node * fib6_lookup_1(struct fib6_node *root, | |||
543 | struct fib6_node *fn; | 828 | struct fib6_node *fn; |
544 | int dir; | 829 | int dir; |
545 | 830 | ||
831 | if (unlikely(args->offset == 0)) | ||
832 | return NULL; | ||
833 | |||
546 | /* | 834 | /* |
547 | * Descend on a tree | 835 | * Descend on a tree |
548 | */ | 836 | */ |
@@ -564,33 +852,26 @@ static struct fib6_node * fib6_lookup_1(struct fib6_node *root, | |||
564 | break; | 852 | break; |
565 | } | 853 | } |
566 | 854 | ||
567 | while ((fn->fn_flags & RTN_ROOT) == 0) { | 855 | while(fn) { |
568 | #ifdef CONFIG_IPV6_SUBTREES | 856 | if (FIB6_SUBTREE(fn) || fn->fn_flags & RTN_RTINFO) { |
569 | if (fn->subtree) { | ||
570 | struct fib6_node *st; | ||
571 | struct lookup_args *narg; | ||
572 | |||
573 | narg = args + 1; | ||
574 | |||
575 | if (narg->addr) { | ||
576 | st = fib6_lookup_1(fn->subtree, narg); | ||
577 | |||
578 | if (st && !(st->fn_flags & RTN_ROOT)) | ||
579 | return st; | ||
580 | } | ||
581 | } | ||
582 | #endif | ||
583 | |||
584 | if (fn->fn_flags & RTN_RTINFO) { | ||
585 | struct rt6key *key; | 857 | struct rt6key *key; |
586 | 858 | ||
587 | key = (struct rt6key *) ((u8 *) fn->leaf + | 859 | key = (struct rt6key *) ((u8 *) fn->leaf + |
588 | args->offset); | 860 | args->offset); |
589 | 861 | ||
590 | if (ipv6_prefix_equal(&key->addr, args->addr, key->plen)) | 862 | if (ipv6_prefix_equal(&key->addr, args->addr, key->plen)) { |
591 | return fn; | 863 | #ifdef CONFIG_IPV6_SUBTREES |
864 | if (fn->subtree) | ||
865 | fn = fib6_lookup_1(fn->subtree, args + 1); | ||
866 | #endif | ||
867 | if (!fn || fn->fn_flags & RTN_RTINFO) | ||
868 | return fn; | ||
869 | } | ||
592 | } | 870 | } |
593 | 871 | ||
872 | if (fn->fn_flags & RTN_ROOT) | ||
873 | break; | ||
874 | |||
594 | fn = fn->parent; | 875 | fn = fn->parent; |
595 | } | 876 | } |
596 | 877 | ||
@@ -600,18 +881,24 @@ static struct fib6_node * fib6_lookup_1(struct fib6_node *root, | |||
600 | struct fib6_node * fib6_lookup(struct fib6_node *root, struct in6_addr *daddr, | 881 | struct fib6_node * fib6_lookup(struct fib6_node *root, struct in6_addr *daddr, |
601 | struct in6_addr *saddr) | 882 | struct in6_addr *saddr) |
602 | { | 883 | { |
603 | struct lookup_args args[2]; | ||
604 | struct fib6_node *fn; | 884 | struct fib6_node *fn; |
605 | 885 | struct lookup_args args[] = { | |
606 | args[0].offset = offsetof(struct rt6_info, rt6i_dst); | 886 | { |
607 | args[0].addr = daddr; | 887 | .offset = offsetof(struct rt6_info, rt6i_dst), |
608 | 888 | .addr = daddr, | |
889 | }, | ||
609 | #ifdef CONFIG_IPV6_SUBTREES | 890 | #ifdef CONFIG_IPV6_SUBTREES |
610 | args[1].offset = offsetof(struct rt6_info, rt6i_src); | 891 | { |
611 | args[1].addr = saddr; | 892 | .offset = offsetof(struct rt6_info, rt6i_src), |
893 | .addr = saddr, | ||
894 | }, | ||
612 | #endif | 895 | #endif |
896 | { | ||
897 | .offset = 0, /* sentinel */ | ||
898 | } | ||
899 | }; | ||
613 | 900 | ||
614 | fn = fib6_lookup_1(root, args); | 901 | fn = fib6_lookup_1(root, daddr ? args : args + 1); |
615 | 902 | ||
616 | if (fn == NULL || fn->fn_flags & RTN_TL_ROOT) | 903 | if (fn == NULL || fn->fn_flags & RTN_TL_ROOT) |
617 | fn = root; | 904 | fn = root; |
@@ -667,10 +954,8 @@ struct fib6_node * fib6_locate(struct fib6_node *root, | |||
667 | #ifdef CONFIG_IPV6_SUBTREES | 954 | #ifdef CONFIG_IPV6_SUBTREES |
668 | if (src_len) { | 955 | if (src_len) { |
669 | BUG_TRAP(saddr!=NULL); | 956 | BUG_TRAP(saddr!=NULL); |
670 | if (fn == NULL) | 957 | if (fn && fn->subtree) |
671 | fn = fn->subtree; | 958 | fn = fib6_locate_1(fn->subtree, saddr, src_len, |
672 | if (fn) | ||
673 | fn = fib6_locate_1(fn, saddr, src_len, | ||
674 | offsetof(struct rt6_info, rt6i_src)); | 959 | offsetof(struct rt6_info, rt6i_src)); |
675 | } | 960 | } |
676 | #endif | 961 | #endif |
@@ -699,7 +984,7 @@ static struct rt6_info * fib6_find_prefix(struct fib6_node *fn) | |||
699 | if(fn->right) | 984 | if(fn->right) |
700 | return fn->right->leaf; | 985 | return fn->right->leaf; |
701 | 986 | ||
702 | fn = SUBTREE(fn); | 987 | fn = FIB6_SUBTREE(fn); |
703 | } | 988 | } |
704 | return NULL; | 989 | return NULL; |
705 | } | 990 | } |
@@ -730,7 +1015,7 @@ static struct fib6_node * fib6_repair_tree(struct fib6_node *fn) | |||
730 | if (fn->right) child = fn->right, children |= 1; | 1015 | if (fn->right) child = fn->right, children |= 1; |
731 | if (fn->left) child = fn->left, children |= 2; | 1016 | if (fn->left) child = fn->left, children |= 2; |
732 | 1017 | ||
733 | if (children == 3 || SUBTREE(fn) | 1018 | if (children == 3 || FIB6_SUBTREE(fn) |
734 | #ifdef CONFIG_IPV6_SUBTREES | 1019 | #ifdef CONFIG_IPV6_SUBTREES |
735 | /* Subtree root (i.e. fn) may have one child */ | 1020 | /* Subtree root (i.e. fn) may have one child */ |
736 | || (children && fn->fn_flags&RTN_ROOT) | 1021 | || (children && fn->fn_flags&RTN_ROOT) |
@@ -749,9 +1034,9 @@ static struct fib6_node * fib6_repair_tree(struct fib6_node *fn) | |||
749 | 1034 | ||
750 | pn = fn->parent; | 1035 | pn = fn->parent; |
751 | #ifdef CONFIG_IPV6_SUBTREES | 1036 | #ifdef CONFIG_IPV6_SUBTREES |
752 | if (SUBTREE(pn) == fn) { | 1037 | if (FIB6_SUBTREE(pn) == fn) { |
753 | BUG_TRAP(fn->fn_flags&RTN_ROOT); | 1038 | BUG_TRAP(fn->fn_flags&RTN_ROOT); |
754 | SUBTREE(pn) = NULL; | 1039 | FIB6_SUBTREE(pn) = NULL; |
755 | nstate = FWS_L; | 1040 | nstate = FWS_L; |
756 | } else { | 1041 | } else { |
757 | BUG_TRAP(!(fn->fn_flags&RTN_ROOT)); | 1042 | BUG_TRAP(!(fn->fn_flags&RTN_ROOT)); |
@@ -799,7 +1084,7 @@ static struct fib6_node * fib6_repair_tree(struct fib6_node *fn) | |||
799 | read_unlock(&fib6_walker_lock); | 1084 | read_unlock(&fib6_walker_lock); |
800 | 1085 | ||
801 | node_free(fn); | 1086 | node_free(fn); |
802 | if (pn->fn_flags&RTN_RTINFO || SUBTREE(pn)) | 1087 | if (pn->fn_flags&RTN_RTINFO || FIB6_SUBTREE(pn)) |
803 | return pn; | 1088 | return pn; |
804 | 1089 | ||
805 | rt6_release(pn->leaf); | 1090 | rt6_release(pn->leaf); |
@@ -809,7 +1094,7 @@ static struct fib6_node * fib6_repair_tree(struct fib6_node *fn) | |||
809 | } | 1094 | } |
810 | 1095 | ||
811 | static void fib6_del_route(struct fib6_node *fn, struct rt6_info **rtp, | 1096 | static void fib6_del_route(struct fib6_node *fn, struct rt6_info **rtp, |
812 | struct nlmsghdr *nlh, void *_rtattr, struct netlink_skb_parms *req) | 1097 | struct nl_info *info) |
813 | { | 1098 | { |
814 | struct fib6_walker_t *w; | 1099 | struct fib6_walker_t *w; |
815 | struct rt6_info *rt = *rtp; | 1100 | struct rt6_info *rt = *rtp; |
@@ -865,11 +1150,11 @@ static void fib6_del_route(struct fib6_node *fn, struct rt6_info **rtp, | |||
865 | if (atomic_read(&rt->rt6i_ref) != 1) BUG(); | 1150 | if (atomic_read(&rt->rt6i_ref) != 1) BUG(); |
866 | } | 1151 | } |
867 | 1152 | ||
868 | inet6_rt_notify(RTM_DELROUTE, rt, nlh, req); | 1153 | inet6_rt_notify(RTM_DELROUTE, rt, info); |
869 | rt6_release(rt); | 1154 | rt6_release(rt); |
870 | } | 1155 | } |
871 | 1156 | ||
872 | int fib6_del(struct rt6_info *rt, struct nlmsghdr *nlh, void *_rtattr, struct netlink_skb_parms *req) | 1157 | int fib6_del(struct rt6_info *rt, struct nl_info *info) |
873 | { | 1158 | { |
874 | struct fib6_node *fn = rt->rt6i_node; | 1159 | struct fib6_node *fn = rt->rt6i_node; |
875 | struct rt6_info **rtp; | 1160 | struct rt6_info **rtp; |
@@ -885,8 +1170,18 @@ int fib6_del(struct rt6_info *rt, struct nlmsghdr *nlh, void *_rtattr, struct ne | |||
885 | 1170 | ||
886 | BUG_TRAP(fn->fn_flags&RTN_RTINFO); | 1171 | BUG_TRAP(fn->fn_flags&RTN_RTINFO); |
887 | 1172 | ||
888 | if (!(rt->rt6i_flags&RTF_CACHE)) | 1173 | if (!(rt->rt6i_flags&RTF_CACHE)) { |
889 | fib6_prune_clones(fn, rt); | 1174 | struct fib6_node *pn = fn; |
1175 | #ifdef CONFIG_IPV6_SUBTREES | ||
1176 | /* clones of this route might be in another subtree */ | ||
1177 | if (rt->rt6i_src.plen) { | ||
1178 | while (!(pn->fn_flags&RTN_ROOT)) | ||
1179 | pn = pn->parent; | ||
1180 | pn = pn->parent; | ||
1181 | } | ||
1182 | #endif | ||
1183 | fib6_prune_clones(pn, rt); | ||
1184 | } | ||
890 | 1185 | ||
891 | /* | 1186 | /* |
892 | * Walk the leaf entries looking for ourself | 1187 | * Walk the leaf entries looking for ourself |
@@ -894,7 +1189,7 @@ int fib6_del(struct rt6_info *rt, struct nlmsghdr *nlh, void *_rtattr, struct ne | |||
894 | 1189 | ||
895 | for (rtp = &fn->leaf; *rtp; rtp = &(*rtp)->u.next) { | 1190 | for (rtp = &fn->leaf; *rtp; rtp = &(*rtp)->u.next) { |
896 | if (*rtp == rt) { | 1191 | if (*rtp == rt) { |
897 | fib6_del_route(fn, rtp, nlh, _rtattr, req); | 1192 | fib6_del_route(fn, rtp, info); |
898 | return 0; | 1193 | return 0; |
899 | } | 1194 | } |
900 | } | 1195 | } |
@@ -925,7 +1220,7 @@ int fib6_del(struct rt6_info *rt, struct nlmsghdr *nlh, void *_rtattr, struct ne | |||
925 | * <0 -> walk is terminated by an error. | 1220 | * <0 -> walk is terminated by an error. |
926 | */ | 1221 | */ |
927 | 1222 | ||
928 | int fib6_walk_continue(struct fib6_walker_t *w) | 1223 | static int fib6_walk_continue(struct fib6_walker_t *w) |
929 | { | 1224 | { |
930 | struct fib6_node *fn, *pn; | 1225 | struct fib6_node *fn, *pn; |
931 | 1226 | ||
@@ -942,8 +1237,8 @@ int fib6_walk_continue(struct fib6_walker_t *w) | |||
942 | switch (w->state) { | 1237 | switch (w->state) { |
943 | #ifdef CONFIG_IPV6_SUBTREES | 1238 | #ifdef CONFIG_IPV6_SUBTREES |
944 | case FWS_S: | 1239 | case FWS_S: |
945 | if (SUBTREE(fn)) { | 1240 | if (FIB6_SUBTREE(fn)) { |
946 | w->node = SUBTREE(fn); | 1241 | w->node = FIB6_SUBTREE(fn); |
947 | continue; | 1242 | continue; |
948 | } | 1243 | } |
949 | w->state = FWS_L; | 1244 | w->state = FWS_L; |
@@ -977,7 +1272,7 @@ int fib6_walk_continue(struct fib6_walker_t *w) | |||
977 | pn = fn->parent; | 1272 | pn = fn->parent; |
978 | w->node = pn; | 1273 | w->node = pn; |
979 | #ifdef CONFIG_IPV6_SUBTREES | 1274 | #ifdef CONFIG_IPV6_SUBTREES |
980 | if (SUBTREE(pn) == fn) { | 1275 | if (FIB6_SUBTREE(pn) == fn) { |
981 | BUG_TRAP(fn->fn_flags&RTN_ROOT); | 1276 | BUG_TRAP(fn->fn_flags&RTN_ROOT); |
982 | w->state = FWS_L; | 1277 | w->state = FWS_L; |
983 | continue; | 1278 | continue; |
@@ -999,7 +1294,7 @@ int fib6_walk_continue(struct fib6_walker_t *w) | |||
999 | } | 1294 | } |
1000 | } | 1295 | } |
1001 | 1296 | ||
1002 | int fib6_walk(struct fib6_walker_t *w) | 1297 | static int fib6_walk(struct fib6_walker_t *w) |
1003 | { | 1298 | { |
1004 | int res; | 1299 | int res; |
1005 | 1300 | ||
@@ -1023,7 +1318,7 @@ static int fib6_clean_node(struct fib6_walker_t *w) | |||
1023 | res = c->func(rt, c->arg); | 1318 | res = c->func(rt, c->arg); |
1024 | if (res < 0) { | 1319 | if (res < 0) { |
1025 | w->leaf = rt; | 1320 | w->leaf = rt; |
1026 | res = fib6_del(rt, NULL, NULL, NULL); | 1321 | res = fib6_del(rt, NULL); |
1027 | if (res) { | 1322 | if (res) { |
1028 | #if RT6_DEBUG >= 2 | 1323 | #if RT6_DEBUG >= 2 |
1029 | printk(KERN_DEBUG "fib6_clean_node: del failed: rt=%p@%p err=%d\n", rt, rt->rt6i_node, res); | 1324 | printk(KERN_DEBUG "fib6_clean_node: del failed: rt=%p@%p err=%d\n", rt, rt->rt6i_node, res); |
@@ -1049,9 +1344,9 @@ static int fib6_clean_node(struct fib6_walker_t *w) | |||
1049 | * ignoring pure split nodes) will be scanned. | 1344 | * ignoring pure split nodes) will be scanned. |
1050 | */ | 1345 | */ |
1051 | 1346 | ||
1052 | void fib6_clean_tree(struct fib6_node *root, | 1347 | static void fib6_clean_tree(struct fib6_node *root, |
1053 | int (*func)(struct rt6_info *, void *arg), | 1348 | int (*func)(struct rt6_info *, void *arg), |
1054 | int prune, void *arg) | 1349 | int prune, void *arg) |
1055 | { | 1350 | { |
1056 | struct fib6_cleaner_t c; | 1351 | struct fib6_cleaner_t c; |
1057 | 1352 | ||
@@ -1064,6 +1359,25 @@ void fib6_clean_tree(struct fib6_node *root, | |||
1064 | fib6_walk(&c.w); | 1359 | fib6_walk(&c.w); |
1065 | } | 1360 | } |
1066 | 1361 | ||
1362 | void fib6_clean_all(int (*func)(struct rt6_info *, void *arg), | ||
1363 | int prune, void *arg) | ||
1364 | { | ||
1365 | struct fib6_table *table; | ||
1366 | struct hlist_node *node; | ||
1367 | unsigned int h; | ||
1368 | |||
1369 | rcu_read_lock(); | ||
1370 | for (h = 0; h < FIB_TABLE_HASHSZ; h++) { | ||
1371 | hlist_for_each_entry_rcu(table, node, &fib_table_hash[h], | ||
1372 | tb6_hlist) { | ||
1373 | write_lock_bh(&table->tb6_lock); | ||
1374 | fib6_clean_tree(&table->tb6_root, func, prune, arg); | ||
1375 | write_unlock_bh(&table->tb6_lock); | ||
1376 | } | ||
1377 | } | ||
1378 | rcu_read_unlock(); | ||
1379 | } | ||
1380 | |||
1067 | static int fib6_prune_clone(struct rt6_info *rt, void *arg) | 1381 | static int fib6_prune_clone(struct rt6_info *rt, void *arg) |
1068 | { | 1382 | { |
1069 | if (rt->rt6i_flags & RTF_CACHE) { | 1383 | if (rt->rt6i_flags & RTF_CACHE) { |
@@ -1142,11 +1456,8 @@ void fib6_run_gc(unsigned long dummy) | |||
1142 | } | 1456 | } |
1143 | gc_args.more = 0; | 1457 | gc_args.more = 0; |
1144 | 1458 | ||
1145 | |||
1146 | write_lock_bh(&rt6_lock); | ||
1147 | ndisc_dst_gc(&gc_args.more); | 1459 | ndisc_dst_gc(&gc_args.more); |
1148 | fib6_clean_tree(&ip6_routing_table, fib6_age, 0, NULL); | 1460 | fib6_clean_all(fib6_age, 0, NULL); |
1149 | write_unlock_bh(&rt6_lock); | ||
1150 | 1461 | ||
1151 | if (gc_args.more) | 1462 | if (gc_args.more) |
1152 | mod_timer(&ip6_fib_timer, jiffies + ip6_rt_gc_interval); | 1463 | mod_timer(&ip6_fib_timer, jiffies + ip6_rt_gc_interval); |
@@ -1161,10 +1472,10 @@ void __init fib6_init(void) | |||
1161 | { | 1472 | { |
1162 | fib6_node_kmem = kmem_cache_create("fib6_nodes", | 1473 | fib6_node_kmem = kmem_cache_create("fib6_nodes", |
1163 | sizeof(struct fib6_node), | 1474 | sizeof(struct fib6_node), |
1164 | 0, SLAB_HWCACHE_ALIGN, | 1475 | 0, SLAB_HWCACHE_ALIGN|SLAB_PANIC, |
1165 | NULL, NULL); | 1476 | NULL, NULL); |
1166 | if (!fib6_node_kmem) | 1477 | |
1167 | panic("cannot create fib6_nodes cache"); | 1478 | fib6_tables_init(); |
1168 | } | 1479 | } |
1169 | 1480 | ||
1170 | void fib6_gc_cleanup(void) | 1481 | void fib6_gc_cleanup(void) |
diff --git a/net/ipv6/ip6_input.c b/net/ipv6/ip6_input.c index 25c2a9e03895..6b8e6d76a58b 100644 --- a/net/ipv6/ip6_input.c +++ b/net/ipv6/ip6_input.c | |||
@@ -111,7 +111,7 @@ int ipv6_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt | |||
111 | } | 111 | } |
112 | 112 | ||
113 | if (hdr->nexthdr == NEXTHDR_HOP) { | 113 | if (hdr->nexthdr == NEXTHDR_HOP) { |
114 | if (ipv6_parse_hopopts(skb) < 0) { | 114 | if (ipv6_parse_hopopts(&skb) < 0) { |
115 | IP6_INC_STATS_BH(IPSTATS_MIB_INHDRERRORS); | 115 | IP6_INC_STATS_BH(IPSTATS_MIB_INHDRERRORS); |
116 | return 0; | 116 | return 0; |
117 | } | 117 | } |
diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index 4fb47a252913..66716911962e 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c | |||
@@ -308,6 +308,56 @@ static int ip6_call_ra_chain(struct sk_buff *skb, int sel) | |||
308 | return 0; | 308 | return 0; |
309 | } | 309 | } |
310 | 310 | ||
311 | static int ip6_forward_proxy_check(struct sk_buff *skb) | ||
312 | { | ||
313 | struct ipv6hdr *hdr = skb->nh.ipv6h; | ||
314 | u8 nexthdr = hdr->nexthdr; | ||
315 | int offset; | ||
316 | |||
317 | if (ipv6_ext_hdr(nexthdr)) { | ||
318 | offset = ipv6_skip_exthdr(skb, sizeof(*hdr), &nexthdr); | ||
319 | if (offset < 0) | ||
320 | return 0; | ||
321 | } else | ||
322 | offset = sizeof(struct ipv6hdr); | ||
323 | |||
324 | if (nexthdr == IPPROTO_ICMPV6) { | ||
325 | struct icmp6hdr *icmp6; | ||
326 | |||
327 | if (!pskb_may_pull(skb, skb->nh.raw + offset + 1 - skb->data)) | ||
328 | return 0; | ||
329 | |||
330 | icmp6 = (struct icmp6hdr *)(skb->nh.raw + offset); | ||
331 | |||
332 | switch (icmp6->icmp6_type) { | ||
333 | case NDISC_ROUTER_SOLICITATION: | ||
334 | case NDISC_ROUTER_ADVERTISEMENT: | ||
335 | case NDISC_NEIGHBOUR_SOLICITATION: | ||
336 | case NDISC_NEIGHBOUR_ADVERTISEMENT: | ||
337 | case NDISC_REDIRECT: | ||
338 | /* For reaction involving unicast neighbor discovery | ||
339 | * message destined to the proxied address, pass it to | ||
340 | * input function. | ||
341 | */ | ||
342 | return 1; | ||
343 | default: | ||
344 | break; | ||
345 | } | ||
346 | } | ||
347 | |||
348 | /* | ||
349 | * The proxying router can't forward traffic sent to a link-local | ||
350 | * address, so signal the sender and discard the packet. This | ||
351 | * behavior is clarified by the MIPv6 specification. | ||
352 | */ | ||
353 | if (ipv6_addr_type(&hdr->daddr) & IPV6_ADDR_LINKLOCAL) { | ||
354 | dst_link_failure(skb); | ||
355 | return -1; | ||
356 | } | ||
357 | |||
358 | return 0; | ||
359 | } | ||
360 | |||
311 | static inline int ip6_forward_finish(struct sk_buff *skb) | 361 | static inline int ip6_forward_finish(struct sk_buff *skb) |
312 | { | 362 | { |
313 | return dst_output(skb); | 363 | return dst_output(skb); |
@@ -362,6 +412,18 @@ int ip6_forward(struct sk_buff *skb) | |||
362 | return -ETIMEDOUT; | 412 | return -ETIMEDOUT; |
363 | } | 413 | } |
364 | 414 | ||
415 | /* XXX: idev->cnf.proxy_ndp? */ | ||
416 | if (ipv6_devconf.proxy_ndp && | ||
417 | pneigh_lookup(&nd_tbl, &hdr->daddr, skb->dev, 0)) { | ||
418 | int proxied = ip6_forward_proxy_check(skb); | ||
419 | if (proxied > 0) | ||
420 | return ip6_input(skb); | ||
421 | else if (proxied < 0) { | ||
422 | IP6_INC_STATS(IPSTATS_MIB_INDISCARDS); | ||
423 | goto drop; | ||
424 | } | ||
425 | } | ||
426 | |||
365 | if (!xfrm6_route_forward(skb)) { | 427 | if (!xfrm6_route_forward(skb)) { |
366 | IP6_INC_STATS(IPSTATS_MIB_INDISCARDS); | 428 | IP6_INC_STATS(IPSTATS_MIB_INDISCARDS); |
367 | goto drop; | 429 | goto drop; |
@@ -475,17 +537,25 @@ int ip6_find_1stfragopt(struct sk_buff *skb, u8 **nexthdr) | |||
475 | switch (**nexthdr) { | 537 | switch (**nexthdr) { |
476 | 538 | ||
477 | case NEXTHDR_HOP: | 539 | case NEXTHDR_HOP: |
540 | break; | ||
478 | case NEXTHDR_ROUTING: | 541 | case NEXTHDR_ROUTING: |
542 | found_rhdr = 1; | ||
543 | break; | ||
479 | case NEXTHDR_DEST: | 544 | case NEXTHDR_DEST: |
480 | if (**nexthdr == NEXTHDR_ROUTING) found_rhdr = 1; | 545 | #ifdef CONFIG_IPV6_MIP6 |
481 | if (**nexthdr == NEXTHDR_DEST && found_rhdr) return offset; | 546 | if (ipv6_find_tlv(skb, offset, IPV6_TLV_HAO) >= 0) |
482 | offset += ipv6_optlen(exthdr); | 547 | break; |
483 | *nexthdr = &exthdr->nexthdr; | 548 | #endif |
484 | exthdr = (struct ipv6_opt_hdr*)(skb->nh.raw + offset); | 549 | if (found_rhdr) |
550 | return offset; | ||
485 | break; | 551 | break; |
486 | default : | 552 | default : |
487 | return offset; | 553 | return offset; |
488 | } | 554 | } |
555 | |||
556 | offset += ipv6_optlen(exthdr); | ||
557 | *nexthdr = &exthdr->nexthdr; | ||
558 | exthdr = (struct ipv6_opt_hdr*)(skb->nh.raw + offset); | ||
489 | } | 559 | } |
490 | 560 | ||
491 | return offset; | 561 | return offset; |
@@ -726,6 +796,14 @@ fail: | |||
726 | return err; | 796 | return err; |
727 | } | 797 | } |
728 | 798 | ||
799 | static inline int ip6_rt_check(struct rt6key *rt_key, | ||
800 | struct in6_addr *fl_addr, | ||
801 | struct in6_addr *addr_cache) | ||
802 | { | ||
803 | return ((rt_key->plen != 128 || !ipv6_addr_equal(fl_addr, &rt_key->addr)) && | ||
804 | (addr_cache == NULL || !ipv6_addr_equal(fl_addr, addr_cache))); | ||
805 | } | ||
806 | |||
729 | static struct dst_entry *ip6_sk_dst_check(struct sock *sk, | 807 | static struct dst_entry *ip6_sk_dst_check(struct sock *sk, |
730 | struct dst_entry *dst, | 808 | struct dst_entry *dst, |
731 | struct flowi *fl) | 809 | struct flowi *fl) |
@@ -741,8 +819,8 @@ static struct dst_entry *ip6_sk_dst_check(struct sock *sk, | |||
741 | * that we do not support routing by source, TOS, | 819 | * that we do not support routing by source, TOS, |
742 | * and MSG_DONTROUTE --ANK (980726) | 820 | * and MSG_DONTROUTE --ANK (980726) |
743 | * | 821 | * |
744 | * 1. If route was host route, check that | 822 | * 1. ip6_rt_check(): If route was host route, |
745 | * cached destination is current. | 823 | * check that cached destination is current. |
746 | * If it is network route, we still may | 824 | * If it is network route, we still may |
747 | * check its validity using saved pointer | 825 | * check its validity using saved pointer |
748 | * to the last used address: daddr_cache. | 826 | * to the last used address: daddr_cache. |
@@ -753,11 +831,11 @@ static struct dst_entry *ip6_sk_dst_check(struct sock *sk, | |||
753 | * sockets. | 831 | * sockets. |
754 | * 2. oif also should be the same. | 832 | * 2. oif also should be the same. |
755 | */ | 833 | */ |
756 | if (((rt->rt6i_dst.plen != 128 || | 834 | if (ip6_rt_check(&rt->rt6i_dst, &fl->fl6_dst, np->daddr_cache) || |
757 | !ipv6_addr_equal(&fl->fl6_dst, &rt->rt6i_dst.addr)) | 835 | #ifdef CONFIG_IPV6_SUBTREES |
758 | && (np->daddr_cache == NULL || | 836 | ip6_rt_check(&rt->rt6i_src, &fl->fl6_src, np->saddr_cache) || |
759 | !ipv6_addr_equal(&fl->fl6_dst, np->daddr_cache))) | 837 | #endif |
760 | || (fl->oif && fl->oif != dst->dev->ifindex)) { | 838 | (fl->oif && fl->oif != dst->dev->ifindex)) { |
761 | dst_release(dst); | 839 | dst_release(dst); |
762 | dst = NULL; | 840 | dst = NULL; |
763 | } | 841 | } |
@@ -866,7 +944,7 @@ static inline int ip6_ufo_append_data(struct sock *sk, | |||
866 | /* initialize protocol header pointer */ | 944 | /* initialize protocol header pointer */ |
867 | skb->h.raw = skb->data + fragheaderlen; | 945 | skb->h.raw = skb->data + fragheaderlen; |
868 | 946 | ||
869 | skb->ip_summed = CHECKSUM_HW; | 947 | skb->ip_summed = CHECKSUM_PARTIAL; |
870 | skb->csum = 0; | 948 | skb->csum = 0; |
871 | sk->sk_sndmsg_off = 0; | 949 | sk->sk_sndmsg_off = 0; |
872 | } | 950 | } |
@@ -963,7 +1041,7 @@ int ip6_append_data(struct sock *sk, int getfrag(void *from, char *to, | |||
963 | 1041 | ||
964 | hh_len = LL_RESERVED_SPACE(rt->u.dst.dev); | 1042 | hh_len = LL_RESERVED_SPACE(rt->u.dst.dev); |
965 | 1043 | ||
966 | fragheaderlen = sizeof(struct ipv6hdr) + (opt ? opt->opt_nflen : 0); | 1044 | fragheaderlen = sizeof(struct ipv6hdr) + rt->u.dst.nfheader_len + (opt ? opt->opt_nflen : 0); |
967 | maxfraglen = ((mtu - fragheaderlen) & ~7) + fragheaderlen - sizeof(struct frag_hdr); | 1045 | maxfraglen = ((mtu - fragheaderlen) & ~7) + fragheaderlen - sizeof(struct frag_hdr); |
968 | 1046 | ||
969 | if (mtu <= sizeof(struct ipv6hdr) + IPV6_MAXPLEN) { | 1047 | if (mtu <= sizeof(struct ipv6hdr) + IPV6_MAXPLEN) { |
diff --git a/net/ipv6/ipcomp6.c b/net/ipv6/ipcomp6.c index a81e9e9d93bd..ad9c6e824e62 100644 --- a/net/ipv6/ipcomp6.c +++ b/net/ipv6/ipcomp6.c | |||
@@ -212,7 +212,7 @@ static struct xfrm_state *ipcomp6_tunnel_create(struct xfrm_state *x) | |||
212 | memcpy(t->id.daddr.a6, x->id.daddr.a6, sizeof(struct in6_addr)); | 212 | memcpy(t->id.daddr.a6, x->id.daddr.a6, sizeof(struct in6_addr)); |
213 | memcpy(&t->sel, &x->sel, sizeof(t->sel)); | 213 | memcpy(&t->sel, &x->sel, sizeof(t->sel)); |
214 | t->props.family = AF_INET6; | 214 | t->props.family = AF_INET6; |
215 | t->props.mode = 1; | 215 | t->props.mode = XFRM_MODE_TUNNEL; |
216 | memcpy(t->props.saddr.a6, x->props.saddr.a6, sizeof(struct in6_addr)); | 216 | memcpy(t->props.saddr.a6, x->props.saddr.a6, sizeof(struct in6_addr)); |
217 | 217 | ||
218 | if (xfrm_init_state(t)) | 218 | if (xfrm_init_state(t)) |
@@ -417,7 +417,7 @@ static int ipcomp6_init_state(struct xfrm_state *x) | |||
417 | goto out; | 417 | goto out; |
418 | 418 | ||
419 | x->props.header_len = 0; | 419 | x->props.header_len = 0; |
420 | if (x->props.mode) | 420 | if (x->props.mode == XFRM_MODE_TUNNEL) |
421 | x->props.header_len += sizeof(struct ipv6hdr); | 421 | x->props.header_len += sizeof(struct ipv6hdr); |
422 | 422 | ||
423 | mutex_lock(&ipcomp6_resource_mutex); | 423 | mutex_lock(&ipcomp6_resource_mutex); |
@@ -429,7 +429,7 @@ static int ipcomp6_init_state(struct xfrm_state *x) | |||
429 | goto error; | 429 | goto error; |
430 | mutex_unlock(&ipcomp6_resource_mutex); | 430 | mutex_unlock(&ipcomp6_resource_mutex); |
431 | 431 | ||
432 | if (x->props.mode) { | 432 | if (x->props.mode == XFRM_MODE_TUNNEL) { |
433 | err = ipcomp6_tunnel_attach(x); | 433 | err = ipcomp6_tunnel_attach(x); |
434 | if (err) | 434 | if (err) |
435 | goto error_tunnel; | 435 | goto error_tunnel; |
@@ -461,6 +461,7 @@ static struct xfrm_type ipcomp6_type = | |||
461 | .destructor = ipcomp6_destroy, | 461 | .destructor = ipcomp6_destroy, |
462 | .input = ipcomp6_input, | 462 | .input = ipcomp6_input, |
463 | .output = ipcomp6_output, | 463 | .output = ipcomp6_output, |
464 | .hdr_offset = xfrm6_find_1stfragopt, | ||
464 | }; | 465 | }; |
465 | 466 | ||
466 | static struct inet6_protocol ipcomp6_protocol = | 467 | static struct inet6_protocol ipcomp6_protocol = |
diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c index a5eaaf693abf..4f3bb7fcc8b5 100644 --- a/net/ipv6/ipv6_sockglue.c +++ b/net/ipv6/ipv6_sockglue.c | |||
@@ -407,8 +407,16 @@ static int do_ipv6_setsockopt(struct sock *sk, int level, int optname, | |||
407 | /* routing header option needs extra check */ | 407 | /* routing header option needs extra check */ |
408 | if (optname == IPV6_RTHDR && opt->srcrt) { | 408 | if (optname == IPV6_RTHDR && opt->srcrt) { |
409 | struct ipv6_rt_hdr *rthdr = opt->srcrt; | 409 | struct ipv6_rt_hdr *rthdr = opt->srcrt; |
410 | if (rthdr->type) | 410 | switch (rthdr->type) { |
411 | case IPV6_SRCRT_TYPE_0: | ||
412 | #ifdef CONFIG_IPV6_MIP6 | ||
413 | case IPV6_SRCRT_TYPE_2: | ||
414 | #endif | ||
415 | break; | ||
416 | default: | ||
411 | goto sticky_done; | 417 | goto sticky_done; |
418 | } | ||
419 | |||
412 | if ((rthdr->hdrlen & 1) || | 420 | if ((rthdr->hdrlen & 1) || |
413 | (rthdr->hdrlen >> 1) != rthdr->segments_left) | 421 | (rthdr->hdrlen >> 1) != rthdr->segments_left) |
414 | goto sticky_done; | 422 | goto sticky_done; |
diff --git a/net/ipv6/ipv6_syms.c b/net/ipv6/ipv6_syms.c index dd4d1ce77769..0e8e0676a033 100644 --- a/net/ipv6/ipv6_syms.c +++ b/net/ipv6/ipv6_syms.c | |||
@@ -14,7 +14,6 @@ EXPORT_SYMBOL(ndisc_mc_map); | |||
14 | EXPORT_SYMBOL(register_inet6addr_notifier); | 14 | EXPORT_SYMBOL(register_inet6addr_notifier); |
15 | EXPORT_SYMBOL(unregister_inet6addr_notifier); | 15 | EXPORT_SYMBOL(unregister_inet6addr_notifier); |
16 | EXPORT_SYMBOL(ip6_route_output); | 16 | EXPORT_SYMBOL(ip6_route_output); |
17 | EXPORT_SYMBOL(addrconf_lock); | ||
18 | EXPORT_SYMBOL(ipv6_setsockopt); | 17 | EXPORT_SYMBOL(ipv6_setsockopt); |
19 | EXPORT_SYMBOL(ipv6_getsockopt); | 18 | EXPORT_SYMBOL(ipv6_getsockopt); |
20 | EXPORT_SYMBOL(inet6_register_protosw); | 19 | EXPORT_SYMBOL(inet6_register_protosw); |
@@ -31,6 +30,8 @@ EXPORT_SYMBOL(ipv6_chk_addr); | |||
31 | EXPORT_SYMBOL(in6_dev_finish_destroy); | 30 | EXPORT_SYMBOL(in6_dev_finish_destroy); |
32 | #ifdef CONFIG_XFRM | 31 | #ifdef CONFIG_XFRM |
33 | EXPORT_SYMBOL(xfrm6_rcv); | 32 | EXPORT_SYMBOL(xfrm6_rcv); |
33 | EXPORT_SYMBOL(xfrm6_input_addr); | ||
34 | EXPORT_SYMBOL(xfrm6_find_1stfragopt); | ||
34 | #endif | 35 | #endif |
35 | EXPORT_SYMBOL(rt6_lookup); | 36 | EXPORT_SYMBOL(rt6_lookup); |
36 | EXPORT_SYMBOL(ipv6_push_nfrag_opts); | 37 | EXPORT_SYMBOL(ipv6_push_nfrag_opts); |
diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c index 639eb20c9f1f..3b114e3fa2f8 100644 --- a/net/ipv6/mcast.c +++ b/net/ipv6/mcast.c | |||
@@ -171,7 +171,7 @@ static int ip6_mc_leave_src(struct sock *sk, struct ipv6_mc_socklist *iml, | |||
171 | 171 | ||
172 | #define IPV6_MLD_MAX_MSF 64 | 172 | #define IPV6_MLD_MAX_MSF 64 |
173 | 173 | ||
174 | int sysctl_mld_max_msf = IPV6_MLD_MAX_MSF; | 174 | int sysctl_mld_max_msf __read_mostly = IPV6_MLD_MAX_MSF; |
175 | 175 | ||
176 | /* | 176 | /* |
177 | * socket join on multicast group | 177 | * socket join on multicast group |
diff --git a/net/ipv6/mip6.c b/net/ipv6/mip6.c new file mode 100644 index 000000000000..99d116caecda --- /dev/null +++ b/net/ipv6/mip6.c | |||
@@ -0,0 +1,519 @@ | |||
1 | /* | ||
2 | * Copyright (C)2003-2006 Helsinki University of Technology | ||
3 | * Copyright (C)2003-2006 USAGI/WIDE Project | ||
4 | * | ||
5 | * This program is free software; you can redistribute it and/or modify | ||
6 | * it under the terms of the GNU General Public License as published by | ||
7 | * the Free Software Foundation; either version 2 of the License, or | ||
8 | * (at your option) any later version. | ||
9 | * | ||
10 | * This program is distributed in the hope that it will be useful, | ||
11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
13 | * GNU General Public License for more details. | ||
14 | * | ||
15 | * You should have received a copy of the GNU General Public License | ||
16 | * along with this program; if not, write to the Free Software | ||
17 | * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA | ||
18 | */ | ||
19 | /* | ||
20 | * Authors: | ||
21 | * Noriaki TAKAMIYA @USAGI | ||
22 | * Masahide NAKAMURA @USAGI | ||
23 | */ | ||
24 | |||
25 | #include <linux/config.h> | ||
26 | #include <linux/module.h> | ||
27 | #include <linux/skbuff.h> | ||
28 | #include <linux/time.h> | ||
29 | #include <linux/ipv6.h> | ||
30 | #include <linux/icmpv6.h> | ||
31 | #include <net/sock.h> | ||
32 | #include <net/ipv6.h> | ||
33 | #include <net/ip6_checksum.h> | ||
34 | #include <net/xfrm.h> | ||
35 | #include <net/mip6.h> | ||
36 | |||
37 | static xfrm_address_t *mip6_xfrm_addr(struct xfrm_state *x, xfrm_address_t *addr) | ||
38 | { | ||
39 | return x->coaddr; | ||
40 | } | ||
41 | |||
42 | static inline unsigned int calc_padlen(unsigned int len, unsigned int n) | ||
43 | { | ||
44 | return (n - len + 16) & 0x7; | ||
45 | } | ||
46 | |||
47 | static inline void *mip6_padn(__u8 *data, __u8 padlen) | ||
48 | { | ||
49 | if (!data) | ||
50 | return NULL; | ||
51 | if (padlen == 1) { | ||
52 | data[0] = MIP6_OPT_PAD_1; | ||
53 | } else if (padlen > 1) { | ||
54 | data[0] = MIP6_OPT_PAD_N; | ||
55 | data[1] = padlen - 2; | ||
56 | if (padlen > 2) | ||
57 | memset(data+2, 0, data[1]); | ||
58 | } | ||
59 | return data + padlen; | ||
60 | } | ||
61 | |||
62 | static inline void mip6_param_prob(struct sk_buff *skb, int code, int pos) | ||
63 | { | ||
64 | icmpv6_send(skb, ICMPV6_PARAMPROB, code, pos, skb->dev); | ||
65 | } | ||
66 | |||
67 | static int mip6_mh_len(int type) | ||
68 | { | ||
69 | int len = 0; | ||
70 | |||
71 | switch (type) { | ||
72 | case IP6_MH_TYPE_BRR: | ||
73 | len = 0; | ||
74 | break; | ||
75 | case IP6_MH_TYPE_HOTI: | ||
76 | case IP6_MH_TYPE_COTI: | ||
77 | case IP6_MH_TYPE_BU: | ||
78 | case IP6_MH_TYPE_BACK: | ||
79 | len = 1; | ||
80 | break; | ||
81 | case IP6_MH_TYPE_HOT: | ||
82 | case IP6_MH_TYPE_COT: | ||
83 | case IP6_MH_TYPE_BERROR: | ||
84 | len = 2; | ||
85 | break; | ||
86 | } | ||
87 | return len; | ||
88 | } | ||
89 | |||
90 | int mip6_mh_filter(struct sock *sk, struct sk_buff *skb) | ||
91 | { | ||
92 | struct ip6_mh *mh; | ||
93 | int mhlen; | ||
94 | |||
95 | if (!pskb_may_pull(skb, (skb->h.raw - skb->data) + 8) || | ||
96 | !pskb_may_pull(skb, (skb->h.raw - skb->data) + ((skb->h.raw[1] + 1) << 3))) | ||
97 | return -1; | ||
98 | |||
99 | mh = (struct ip6_mh *)skb->h.raw; | ||
100 | |||
101 | if (mh->ip6mh_hdrlen < mip6_mh_len(mh->ip6mh_type)) { | ||
102 | LIMIT_NETDEBUG(KERN_DEBUG "mip6: MH message too short: %d vs >=%d\n", | ||
103 | mh->ip6mh_hdrlen, mip6_mh_len(mh->ip6mh_type)); | ||
104 | mip6_param_prob(skb, 0, (&mh->ip6mh_hdrlen) - skb->nh.raw); | ||
105 | return -1; | ||
106 | } | ||
107 | mhlen = (mh->ip6mh_hdrlen + 1) << 3; | ||
108 | |||
109 | if (skb->ip_summed == CHECKSUM_COMPLETE) { | ||
110 | skb->ip_summed = CHECKSUM_UNNECESSARY; | ||
111 | if (csum_ipv6_magic(&skb->nh.ipv6h->saddr, | ||
112 | &skb->nh.ipv6h->daddr, | ||
113 | mhlen, IPPROTO_MH, | ||
114 | skb->csum)) { | ||
115 | LIMIT_NETDEBUG(KERN_DEBUG "mip6: MH hw checksum failed\n"); | ||
116 | skb->ip_summed = CHECKSUM_NONE; | ||
117 | } | ||
118 | } | ||
119 | if (skb->ip_summed == CHECKSUM_NONE) { | ||
120 | if (csum_ipv6_magic(&skb->nh.ipv6h->saddr, | ||
121 | &skb->nh.ipv6h->daddr, | ||
122 | mhlen, IPPROTO_MH, | ||
123 | skb_checksum(skb, 0, mhlen, 0))) { | ||
124 | LIMIT_NETDEBUG(KERN_DEBUG "mip6: MH checksum failed " | ||
125 | "[" NIP6_FMT " > " NIP6_FMT "]\n", | ||
126 | NIP6(skb->nh.ipv6h->saddr), | ||
127 | NIP6(skb->nh.ipv6h->daddr)); | ||
128 | return -1; | ||
129 | } | ||
130 | skb->ip_summed = CHECKSUM_UNNECESSARY; | ||
131 | } | ||
132 | |||
133 | if (mh->ip6mh_proto != IPPROTO_NONE) { | ||
134 | LIMIT_NETDEBUG(KERN_DEBUG "mip6: MH invalid payload proto = %d\n", | ||
135 | mh->ip6mh_proto); | ||
136 | mip6_param_prob(skb, 0, (&mh->ip6mh_proto) - skb->nh.raw); | ||
137 | return -1; | ||
138 | } | ||
139 | |||
140 | return 0; | ||
141 | } | ||
142 | |||
143 | struct mip6_report_rate_limiter { | ||
144 | spinlock_t lock; | ||
145 | struct timeval stamp; | ||
146 | int iif; | ||
147 | struct in6_addr src; | ||
148 | struct in6_addr dst; | ||
149 | }; | ||
150 | |||
151 | static struct mip6_report_rate_limiter mip6_report_rl = { | ||
152 | .lock = SPIN_LOCK_UNLOCKED | ||
153 | }; | ||
154 | |||
155 | static int mip6_destopt_input(struct xfrm_state *x, struct sk_buff *skb) | ||
156 | { | ||
157 | struct ipv6hdr *iph = skb->nh.ipv6h; | ||
158 | struct ipv6_destopt_hdr *destopt = (struct ipv6_destopt_hdr *)skb->data; | ||
159 | |||
160 | if (!ipv6_addr_equal(&iph->saddr, (struct in6_addr *)x->coaddr) && | ||
161 | !ipv6_addr_any((struct in6_addr *)x->coaddr)) | ||
162 | return -ENOENT; | ||
163 | |||
164 | return destopt->nexthdr; | ||
165 | } | ||
166 | |||
167 | /* Destination Option Header is inserted. | ||
168 | * IP Header's src address is replaced with Home Address Option in | ||
169 | * Destination Option Header. | ||
170 | */ | ||
171 | static int mip6_destopt_output(struct xfrm_state *x, struct sk_buff *skb) | ||
172 | { | ||
173 | struct ipv6hdr *iph; | ||
174 | struct ipv6_destopt_hdr *dstopt; | ||
175 | struct ipv6_destopt_hao *hao; | ||
176 | u8 nexthdr; | ||
177 | int len; | ||
178 | |||
179 | iph = (struct ipv6hdr *)skb->data; | ||
180 | iph->payload_len = htons(skb->len - sizeof(*iph)); | ||
181 | |||
182 | nexthdr = *skb->nh.raw; | ||
183 | *skb->nh.raw = IPPROTO_DSTOPTS; | ||
184 | |||
185 | dstopt = (struct ipv6_destopt_hdr *)skb->h.raw; | ||
186 | dstopt->nexthdr = nexthdr; | ||
187 | |||
188 | hao = mip6_padn((char *)(dstopt + 1), | ||
189 | calc_padlen(sizeof(*dstopt), 6)); | ||
190 | |||
191 | hao->type = IPV6_TLV_HAO; | ||
192 | hao->length = sizeof(*hao) - 2; | ||
193 | BUG_TRAP(hao->length == 16); | ||
194 | |||
195 | len = ((char *)hao - (char *)dstopt) + sizeof(*hao); | ||
196 | |||
197 | memcpy(&hao->addr, &iph->saddr, sizeof(hao->addr)); | ||
198 | memcpy(&iph->saddr, x->coaddr, sizeof(iph->saddr)); | ||
199 | |||
200 | BUG_TRAP(len == x->props.header_len); | ||
201 | dstopt->hdrlen = (x->props.header_len >> 3) - 1; | ||
202 | |||
203 | return 0; | ||
204 | } | ||
205 | |||
206 | static inline int mip6_report_rl_allow(struct timeval *stamp, | ||
207 | struct in6_addr *dst, | ||
208 | struct in6_addr *src, int iif) | ||
209 | { | ||
210 | int allow = 0; | ||
211 | |||
212 | spin_lock_bh(&mip6_report_rl.lock); | ||
213 | if (mip6_report_rl.stamp.tv_sec != stamp->tv_sec || | ||
214 | mip6_report_rl.stamp.tv_usec != stamp->tv_usec || | ||
215 | mip6_report_rl.iif != iif || | ||
216 | !ipv6_addr_equal(&mip6_report_rl.src, src) || | ||
217 | !ipv6_addr_equal(&mip6_report_rl.dst, dst)) { | ||
218 | mip6_report_rl.stamp.tv_sec = stamp->tv_sec; | ||
219 | mip6_report_rl.stamp.tv_usec = stamp->tv_usec; | ||
220 | mip6_report_rl.iif = iif; | ||
221 | ipv6_addr_copy(&mip6_report_rl.src, src); | ||
222 | ipv6_addr_copy(&mip6_report_rl.dst, dst); | ||
223 | allow = 1; | ||
224 | } | ||
225 | spin_unlock_bh(&mip6_report_rl.lock); | ||
226 | return allow; | ||
227 | } | ||
228 | |||
229 | static int mip6_destopt_reject(struct xfrm_state *x, struct sk_buff *skb, struct flowi *fl) | ||
230 | { | ||
231 | struct inet6_skb_parm *opt = (struct inet6_skb_parm *)skb->cb; | ||
232 | struct ipv6_destopt_hao *hao = NULL; | ||
233 | struct xfrm_selector sel; | ||
234 | int offset; | ||
235 | struct timeval stamp; | ||
236 | int err = 0; | ||
237 | |||
238 | if (unlikely(fl->proto == IPPROTO_MH && | ||
239 | fl->fl_mh_type <= IP6_MH_TYPE_MAX)) | ||
240 | goto out; | ||
241 | |||
242 | if (likely(opt->dsthao)) { | ||
243 | offset = ipv6_find_tlv(skb, opt->dsthao, IPV6_TLV_HAO); | ||
244 | if (likely(offset >= 0)) | ||
245 | hao = (struct ipv6_destopt_hao *)(skb->nh.raw + offset); | ||
246 | } | ||
247 | |||
248 | skb_get_timestamp(skb, &stamp); | ||
249 | |||
250 | if (!mip6_report_rl_allow(&stamp, &skb->nh.ipv6h->daddr, | ||
251 | hao ? &hao->addr : &skb->nh.ipv6h->saddr, | ||
252 | opt->iif)) | ||
253 | goto out; | ||
254 | |||
255 | memset(&sel, 0, sizeof(sel)); | ||
256 | memcpy(&sel.daddr, (xfrm_address_t *)&skb->nh.ipv6h->daddr, | ||
257 | sizeof(sel.daddr)); | ||
258 | sel.prefixlen_d = 128; | ||
259 | memcpy(&sel.saddr, (xfrm_address_t *)&skb->nh.ipv6h->saddr, | ||
260 | sizeof(sel.saddr)); | ||
261 | sel.prefixlen_s = 128; | ||
262 | sel.family = AF_INET6; | ||
263 | sel.proto = fl->proto; | ||
264 | sel.dport = xfrm_flowi_dport(fl); | ||
265 | if (sel.dport) | ||
266 | sel.dport_mask = ~((__u16)0); | ||
267 | sel.sport = xfrm_flowi_sport(fl); | ||
268 | if (sel.sport) | ||
269 | sel.sport_mask = ~((__u16)0); | ||
270 | sel.ifindex = fl->oif; | ||
271 | |||
272 | err = km_report(IPPROTO_DSTOPTS, &sel, | ||
273 | (hao ? (xfrm_address_t *)&hao->addr : NULL)); | ||
274 | |||
275 | out: | ||
276 | return err; | ||
277 | } | ||
278 | |||
279 | static int mip6_destopt_offset(struct xfrm_state *x, struct sk_buff *skb, | ||
280 | u8 **nexthdr) | ||
281 | { | ||
282 | u16 offset = sizeof(struct ipv6hdr); | ||
283 | struct ipv6_opt_hdr *exthdr = (struct ipv6_opt_hdr*)(skb->nh.ipv6h + 1); | ||
284 | unsigned int packet_len = skb->tail - skb->nh.raw; | ||
285 | int found_rhdr = 0; | ||
286 | |||
287 | *nexthdr = &skb->nh.ipv6h->nexthdr; | ||
288 | |||
289 | while (offset + 1 <= packet_len) { | ||
290 | |||
291 | switch (**nexthdr) { | ||
292 | case NEXTHDR_HOP: | ||
293 | break; | ||
294 | case NEXTHDR_ROUTING: | ||
295 | found_rhdr = 1; | ||
296 | break; | ||
297 | case NEXTHDR_DEST: | ||
298 | /* | ||
299 | * HAO MUST NOT appear more than once. | ||
300 | * XXX: It is better to try to find by the end of | ||
301 | * XXX: packet if HAO exists. | ||
302 | */ | ||
303 | if (ipv6_find_tlv(skb, offset, IPV6_TLV_HAO) >= 0) { | ||
304 | LIMIT_NETDEBUG(KERN_WARNING "mip6: hao exists already, override\n"); | ||
305 | return offset; | ||
306 | } | ||
307 | |||
308 | if (found_rhdr) | ||
309 | return offset; | ||
310 | |||
311 | break; | ||
312 | default: | ||
313 | return offset; | ||
314 | } | ||
315 | |||
316 | offset += ipv6_optlen(exthdr); | ||
317 | *nexthdr = &exthdr->nexthdr; | ||
318 | exthdr = (struct ipv6_opt_hdr*)(skb->nh.raw + offset); | ||
319 | } | ||
320 | |||
321 | return offset; | ||
322 | } | ||
323 | |||
324 | static int mip6_destopt_init_state(struct xfrm_state *x) | ||
325 | { | ||
326 | if (x->id.spi) { | ||
327 | printk(KERN_INFO "%s: spi is not 0: %u\n", __FUNCTION__, | ||
328 | x->id.spi); | ||
329 | return -EINVAL; | ||
330 | } | ||
331 | if (x->props.mode != XFRM_MODE_ROUTEOPTIMIZATION) { | ||
332 | printk(KERN_INFO "%s: state's mode is not %u: %u\n", | ||
333 | __FUNCTION__, XFRM_MODE_ROUTEOPTIMIZATION, x->props.mode); | ||
334 | return -EINVAL; | ||
335 | } | ||
336 | |||
337 | x->props.header_len = sizeof(struct ipv6_destopt_hdr) + | ||
338 | calc_padlen(sizeof(struct ipv6_destopt_hdr), 6) + | ||
339 | sizeof(struct ipv6_destopt_hao); | ||
340 | BUG_TRAP(x->props.header_len == 24); | ||
341 | |||
342 | return 0; | ||
343 | } | ||
344 | |||
345 | /* | ||
346 | * Do nothing about destroying since it has no specific operation for | ||
347 | * destination options header unlike IPsec protocols. | ||
348 | */ | ||
349 | static void mip6_destopt_destroy(struct xfrm_state *x) | ||
350 | { | ||
351 | } | ||
352 | |||
353 | static struct xfrm_type mip6_destopt_type = | ||
354 | { | ||
355 | .description = "MIP6DESTOPT", | ||
356 | .owner = THIS_MODULE, | ||
357 | .proto = IPPROTO_DSTOPTS, | ||
358 | .flags = XFRM_TYPE_NON_FRAGMENT, | ||
359 | .init_state = mip6_destopt_init_state, | ||
360 | .destructor = mip6_destopt_destroy, | ||
361 | .input = mip6_destopt_input, | ||
362 | .output = mip6_destopt_output, | ||
363 | .reject = mip6_destopt_reject, | ||
364 | .hdr_offset = mip6_destopt_offset, | ||
365 | .local_addr = mip6_xfrm_addr, | ||
366 | }; | ||
367 | |||
368 | static int mip6_rthdr_input(struct xfrm_state *x, struct sk_buff *skb) | ||
369 | { | ||
370 | struct rt2_hdr *rt2 = (struct rt2_hdr *)skb->data; | ||
371 | |||
372 | if (!ipv6_addr_equal(&rt2->addr, (struct in6_addr *)x->coaddr) && | ||
373 | !ipv6_addr_any((struct in6_addr *)x->coaddr)) | ||
374 | return -ENOENT; | ||
375 | |||
376 | return rt2->rt_hdr.nexthdr; | ||
377 | } | ||
378 | |||
379 | /* Routing Header type 2 is inserted. | ||
380 | * IP Header's dst address is replaced with Routing Header's Home Address. | ||
381 | */ | ||
382 | static int mip6_rthdr_output(struct xfrm_state *x, struct sk_buff *skb) | ||
383 | { | ||
384 | struct ipv6hdr *iph; | ||
385 | struct rt2_hdr *rt2; | ||
386 | u8 nexthdr; | ||
387 | |||
388 | iph = (struct ipv6hdr *)skb->data; | ||
389 | iph->payload_len = htons(skb->len - sizeof(*iph)); | ||
390 | |||
391 | nexthdr = *skb->nh.raw; | ||
392 | *skb->nh.raw = IPPROTO_ROUTING; | ||
393 | |||
394 | rt2 = (struct rt2_hdr *)skb->h.raw; | ||
395 | rt2->rt_hdr.nexthdr = nexthdr; | ||
396 | rt2->rt_hdr.hdrlen = (x->props.header_len >> 3) - 1; | ||
397 | rt2->rt_hdr.type = IPV6_SRCRT_TYPE_2; | ||
398 | rt2->rt_hdr.segments_left = 1; | ||
399 | memset(&rt2->reserved, 0, sizeof(rt2->reserved)); | ||
400 | |||
401 | BUG_TRAP(rt2->rt_hdr.hdrlen == 2); | ||
402 | |||
403 | memcpy(&rt2->addr, &iph->daddr, sizeof(rt2->addr)); | ||
404 | memcpy(&iph->daddr, x->coaddr, sizeof(iph->daddr)); | ||
405 | |||
406 | return 0; | ||
407 | } | ||
408 | |||
409 | static int mip6_rthdr_offset(struct xfrm_state *x, struct sk_buff *skb, | ||
410 | u8 **nexthdr) | ||
411 | { | ||
412 | u16 offset = sizeof(struct ipv6hdr); | ||
413 | struct ipv6_opt_hdr *exthdr = (struct ipv6_opt_hdr*)(skb->nh.ipv6h + 1); | ||
414 | unsigned int packet_len = skb->tail - skb->nh.raw; | ||
415 | int found_rhdr = 0; | ||
416 | |||
417 | *nexthdr = &skb->nh.ipv6h->nexthdr; | ||
418 | |||
419 | while (offset + 1 <= packet_len) { | ||
420 | |||
421 | switch (**nexthdr) { | ||
422 | case NEXTHDR_HOP: | ||
423 | break; | ||
424 | case NEXTHDR_ROUTING: | ||
425 | if (offset + 3 <= packet_len) { | ||
426 | struct ipv6_rt_hdr *rt; | ||
427 | rt = (struct ipv6_rt_hdr *)(skb->nh.raw + offset); | ||
428 | if (rt->type != 0) | ||
429 | return offset; | ||
430 | } | ||
431 | found_rhdr = 1; | ||
432 | break; | ||
433 | case NEXTHDR_DEST: | ||
434 | if (ipv6_find_tlv(skb, offset, IPV6_TLV_HAO) >= 0) | ||
435 | return offset; | ||
436 | |||
437 | if (found_rhdr) | ||
438 | return offset; | ||
439 | |||
440 | break; | ||
441 | default: | ||
442 | return offset; | ||
443 | } | ||
444 | |||
445 | offset += ipv6_optlen(exthdr); | ||
446 | *nexthdr = &exthdr->nexthdr; | ||
447 | exthdr = (struct ipv6_opt_hdr*)(skb->nh.raw + offset); | ||
448 | } | ||
449 | |||
450 | return offset; | ||
451 | } | ||
452 | |||
453 | static int mip6_rthdr_init_state(struct xfrm_state *x) | ||
454 | { | ||
455 | if (x->id.spi) { | ||
456 | printk(KERN_INFO "%s: spi is not 0: %u\n", __FUNCTION__, | ||
457 | x->id.spi); | ||
458 | return -EINVAL; | ||
459 | } | ||
460 | if (x->props.mode != XFRM_MODE_ROUTEOPTIMIZATION) { | ||
461 | printk(KERN_INFO "%s: state's mode is not %u: %u\n", | ||
462 | __FUNCTION__, XFRM_MODE_ROUTEOPTIMIZATION, x->props.mode); | ||
463 | return -EINVAL; | ||
464 | } | ||
465 | |||
466 | x->props.header_len = sizeof(struct rt2_hdr); | ||
467 | |||
468 | return 0; | ||
469 | } | ||
470 | |||
471 | /* | ||
472 | * Do nothing about destroying since it has no specific operation for routing | ||
473 | * header type 2 unlike IPsec protocols. | ||
474 | */ | ||
475 | static void mip6_rthdr_destroy(struct xfrm_state *x) | ||
476 | { | ||
477 | } | ||
478 | |||
479 | static struct xfrm_type mip6_rthdr_type = | ||
480 | { | ||
481 | .description = "MIP6RT", | ||
482 | .owner = THIS_MODULE, | ||
483 | .proto = IPPROTO_ROUTING, | ||
484 | .flags = XFRM_TYPE_NON_FRAGMENT, | ||
485 | .init_state = mip6_rthdr_init_state, | ||
486 | .destructor = mip6_rthdr_destroy, | ||
487 | .input = mip6_rthdr_input, | ||
488 | .output = mip6_rthdr_output, | ||
489 | .hdr_offset = mip6_rthdr_offset, | ||
490 | .remote_addr = mip6_xfrm_addr, | ||
491 | }; | ||
492 | |||
493 | int __init mip6_init(void) | ||
494 | { | ||
495 | printk(KERN_INFO "Mobile IPv6\n"); | ||
496 | |||
497 | if (xfrm_register_type(&mip6_destopt_type, AF_INET6) < 0) { | ||
498 | printk(KERN_INFO "%s: can't add xfrm type(destopt)\n", __FUNCTION__); | ||
499 | goto mip6_destopt_xfrm_fail; | ||
500 | } | ||
501 | if (xfrm_register_type(&mip6_rthdr_type, AF_INET6) < 0) { | ||
502 | printk(KERN_INFO "%s: can't add xfrm type(rthdr)\n", __FUNCTION__); | ||
503 | goto mip6_rthdr_xfrm_fail; | ||
504 | } | ||
505 | return 0; | ||
506 | |||
507 | mip6_rthdr_xfrm_fail: | ||
508 | xfrm_unregister_type(&mip6_destopt_type, AF_INET6); | ||
509 | mip6_destopt_xfrm_fail: | ||
510 | return -EAGAIN; | ||
511 | } | ||
512 | |||
513 | void __exit mip6_fini(void) | ||
514 | { | ||
515 | if (xfrm_unregister_type(&mip6_rthdr_type, AF_INET6) < 0) | ||
516 | printk(KERN_INFO "%s: can't remove xfrm type(rthdr)\n", __FUNCTION__); | ||
517 | if (xfrm_unregister_type(&mip6_destopt_type, AF_INET6) < 0) | ||
518 | printk(KERN_INFO "%s: can't remove xfrm type(destopt)\n", __FUNCTION__); | ||
519 | } | ||
diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c index b50055b9278d..0304b5fe8d6a 100644 --- a/net/ipv6/ndisc.c +++ b/net/ipv6/ndisc.c | |||
@@ -62,6 +62,7 @@ | |||
62 | #include <linux/sysctl.h> | 62 | #include <linux/sysctl.h> |
63 | #endif | 63 | #endif |
64 | 64 | ||
65 | #include <linux/if_addr.h> | ||
65 | #include <linux/if_arp.h> | 66 | #include <linux/if_arp.h> |
66 | #include <linux/ipv6.h> | 67 | #include <linux/ipv6.h> |
67 | #include <linux/icmpv6.h> | 68 | #include <linux/icmpv6.h> |
@@ -411,7 +412,8 @@ static void pndisc_destructor(struct pneigh_entry *n) | |||
411 | */ | 412 | */ |
412 | 413 | ||
413 | static inline void ndisc_flow_init(struct flowi *fl, u8 type, | 414 | static inline void ndisc_flow_init(struct flowi *fl, u8 type, |
414 | struct in6_addr *saddr, struct in6_addr *daddr) | 415 | struct in6_addr *saddr, struct in6_addr *daddr, |
416 | int oif) | ||
415 | { | 417 | { |
416 | memset(fl, 0, sizeof(*fl)); | 418 | memset(fl, 0, sizeof(*fl)); |
417 | ipv6_addr_copy(&fl->fl6_src, saddr); | 419 | ipv6_addr_copy(&fl->fl6_src, saddr); |
@@ -419,6 +421,8 @@ static inline void ndisc_flow_init(struct flowi *fl, u8 type, | |||
419 | fl->proto = IPPROTO_ICMPV6; | 421 | fl->proto = IPPROTO_ICMPV6; |
420 | fl->fl_icmp_type = type; | 422 | fl->fl_icmp_type = type; |
421 | fl->fl_icmp_code = 0; | 423 | fl->fl_icmp_code = 0; |
424 | fl->oif = oif; | ||
425 | security_sk_classify_flow(ndisc_socket->sk, fl); | ||
422 | } | 426 | } |
423 | 427 | ||
424 | static void ndisc_send_na(struct net_device *dev, struct neighbour *neigh, | 428 | static void ndisc_send_na(struct net_device *dev, struct neighbour *neigh, |
@@ -450,7 +454,8 @@ static void ndisc_send_na(struct net_device *dev, struct neighbour *neigh, | |||
450 | src_addr = &tmpaddr; | 454 | src_addr = &tmpaddr; |
451 | } | 455 | } |
452 | 456 | ||
453 | ndisc_flow_init(&fl, NDISC_NEIGHBOUR_ADVERTISEMENT, src_addr, daddr); | 457 | ndisc_flow_init(&fl, NDISC_NEIGHBOUR_ADVERTISEMENT, src_addr, daddr, |
458 | dev->ifindex); | ||
454 | 459 | ||
455 | dst = ndisc_dst_alloc(dev, neigh, daddr, ip6_output); | 460 | dst = ndisc_dst_alloc(dev, neigh, daddr, ip6_output); |
456 | if (!dst) | 461 | if (!dst) |
@@ -491,7 +496,7 @@ static void ndisc_send_na(struct net_device *dev, struct neighbour *neigh, | |||
491 | msg->icmph.icmp6_unused = 0; | 496 | msg->icmph.icmp6_unused = 0; |
492 | msg->icmph.icmp6_router = router; | 497 | msg->icmph.icmp6_router = router; |
493 | msg->icmph.icmp6_solicited = solicited; | 498 | msg->icmph.icmp6_solicited = solicited; |
494 | msg->icmph.icmp6_override = !!override; | 499 | msg->icmph.icmp6_override = override; |
495 | 500 | ||
496 | /* Set the target address. */ | 501 | /* Set the target address. */ |
497 | ipv6_addr_copy(&msg->target, solicited_addr); | 502 | ipv6_addr_copy(&msg->target, solicited_addr); |
@@ -540,7 +545,8 @@ void ndisc_send_ns(struct net_device *dev, struct neighbour *neigh, | |||
540 | saddr = &addr_buf; | 545 | saddr = &addr_buf; |
541 | } | 546 | } |
542 | 547 | ||
543 | ndisc_flow_init(&fl, NDISC_NEIGHBOUR_SOLICITATION, saddr, daddr); | 548 | ndisc_flow_init(&fl, NDISC_NEIGHBOUR_SOLICITATION, saddr, daddr, |
549 | dev->ifindex); | ||
544 | 550 | ||
545 | dst = ndisc_dst_alloc(dev, neigh, daddr, ip6_output); | 551 | dst = ndisc_dst_alloc(dev, neigh, daddr, ip6_output); |
546 | if (!dst) | 552 | if (!dst) |
@@ -615,7 +621,8 @@ void ndisc_send_rs(struct net_device *dev, struct in6_addr *saddr, | |||
615 | int len; | 621 | int len; |
616 | int err; | 622 | int err; |
617 | 623 | ||
618 | ndisc_flow_init(&fl, NDISC_ROUTER_SOLICITATION, saddr, daddr); | 624 | ndisc_flow_init(&fl, NDISC_ROUTER_SOLICITATION, saddr, daddr, |
625 | dev->ifindex); | ||
619 | 626 | ||
620 | dst = ndisc_dst_alloc(dev, NULL, daddr, ip6_output); | 627 | dst = ndisc_dst_alloc(dev, NULL, daddr, ip6_output); |
621 | if (!dst) | 628 | if (!dst) |
@@ -729,8 +736,10 @@ static void ndisc_recv_ns(struct sk_buff *skb) | |||
729 | struct inet6_ifaddr *ifp; | 736 | struct inet6_ifaddr *ifp; |
730 | struct inet6_dev *idev = NULL; | 737 | struct inet6_dev *idev = NULL; |
731 | struct neighbour *neigh; | 738 | struct neighbour *neigh; |
739 | struct pneigh_entry *pneigh = NULL; | ||
732 | int dad = ipv6_addr_any(saddr); | 740 | int dad = ipv6_addr_any(saddr); |
733 | int inc; | 741 | int inc; |
742 | int is_router; | ||
734 | 743 | ||
735 | if (ipv6_addr_is_multicast(&msg->target)) { | 744 | if (ipv6_addr_is_multicast(&msg->target)) { |
736 | ND_PRINTK2(KERN_WARNING | 745 | ND_PRINTK2(KERN_WARNING |
@@ -815,7 +824,9 @@ static void ndisc_recv_ns(struct sk_buff *skb) | |||
815 | 824 | ||
816 | if (ipv6_chk_acast_addr(dev, &msg->target) || | 825 | if (ipv6_chk_acast_addr(dev, &msg->target) || |
817 | (idev->cnf.forwarding && | 826 | (idev->cnf.forwarding && |
818 | pneigh_lookup(&nd_tbl, &msg->target, dev, 0))) { | 827 | (ipv6_devconf.proxy_ndp || idev->cnf.proxy_ndp) && |
828 | (pneigh = pneigh_lookup(&nd_tbl, | ||
829 | &msg->target, dev, 0)) != NULL)) { | ||
819 | if (!(NEIGH_CB(skb)->flags & LOCALLY_ENQUEUED) && | 830 | if (!(NEIGH_CB(skb)->flags & LOCALLY_ENQUEUED) && |
820 | skb->pkt_type != PACKET_HOST && | 831 | skb->pkt_type != PACKET_HOST && |
821 | inc != 0 && | 832 | inc != 0 && |
@@ -836,12 +847,14 @@ static void ndisc_recv_ns(struct sk_buff *skb) | |||
836 | goto out; | 847 | goto out; |
837 | } | 848 | } |
838 | 849 | ||
850 | is_router = !!(pneigh ? pneigh->flags & NTF_ROUTER : idev->cnf.forwarding); | ||
851 | |||
839 | if (dad) { | 852 | if (dad) { |
840 | struct in6_addr maddr; | 853 | struct in6_addr maddr; |
841 | 854 | ||
842 | ipv6_addr_all_nodes(&maddr); | 855 | ipv6_addr_all_nodes(&maddr); |
843 | ndisc_send_na(dev, NULL, &maddr, &msg->target, | 856 | ndisc_send_na(dev, NULL, &maddr, &msg->target, |
844 | idev->cnf.forwarding, 0, (ifp != NULL), 1); | 857 | is_router, 0, (ifp != NULL), 1); |
845 | goto out; | 858 | goto out; |
846 | } | 859 | } |
847 | 860 | ||
@@ -862,7 +875,7 @@ static void ndisc_recv_ns(struct sk_buff *skb) | |||
862 | NEIGH_UPDATE_F_OVERRIDE); | 875 | NEIGH_UPDATE_F_OVERRIDE); |
863 | if (neigh || !dev->hard_header) { | 876 | if (neigh || !dev->hard_header) { |
864 | ndisc_send_na(dev, neigh, saddr, &msg->target, | 877 | ndisc_send_na(dev, neigh, saddr, &msg->target, |
865 | idev->cnf.forwarding, | 878 | is_router, |
866 | 1, (ifp != NULL && inc), inc); | 879 | 1, (ifp != NULL && inc), inc); |
867 | if (neigh) | 880 | if (neigh) |
868 | neigh_release(neigh); | 881 | neigh_release(neigh); |
@@ -945,6 +958,20 @@ static void ndisc_recv_na(struct sk_buff *skb) | |||
945 | if (neigh->nud_state & NUD_FAILED) | 958 | if (neigh->nud_state & NUD_FAILED) |
946 | goto out; | 959 | goto out; |
947 | 960 | ||
961 | /* | ||
962 | * Don't update the neighbor cache entry on a proxy NA from | ||
963 | * ourselves because either the proxied node is off link or it | ||
964 | * has already sent a NA to us. | ||
965 | */ | ||
966 | if (lladdr && !memcmp(lladdr, dev->dev_addr, dev->addr_len) && | ||
967 | ipv6_devconf.forwarding && ipv6_devconf.proxy_ndp && | ||
968 | pneigh_lookup(&nd_tbl, &msg->target, dev, 0)) { | ||
969 | /* XXX: idev->cnf.prixy_ndp */ | ||
970 | WARN_ON(skb->dst != NULL && | ||
971 | ((struct rt6_info *)skb->dst)->rt6i_idev); | ||
972 | goto out; | ||
973 | } | ||
974 | |||
948 | neigh_update(neigh, lladdr, | 975 | neigh_update(neigh, lladdr, |
949 | msg->icmph.icmp6_solicited ? NUD_REACHABLE : NUD_STALE, | 976 | msg->icmph.icmp6_solicited ? NUD_REACHABLE : NUD_STALE, |
950 | NEIGH_UPDATE_F_WEAK_OVERRIDE| | 977 | NEIGH_UPDATE_F_WEAK_OVERRIDE| |
@@ -959,7 +986,7 @@ static void ndisc_recv_na(struct sk_buff *skb) | |||
959 | struct rt6_info *rt; | 986 | struct rt6_info *rt; |
960 | rt = rt6_get_dflt_router(saddr, dev); | 987 | rt = rt6_get_dflt_router(saddr, dev); |
961 | if (rt) | 988 | if (rt) |
962 | ip6_del_rt(rt, NULL, NULL, NULL); | 989 | ip6_del_rt(rt); |
963 | } | 990 | } |
964 | 991 | ||
965 | out: | 992 | out: |
@@ -1112,7 +1139,7 @@ static void ndisc_router_discovery(struct sk_buff *skb) | |||
1112 | 1139 | ||
1113 | if (rt && lifetime == 0) { | 1140 | if (rt && lifetime == 0) { |
1114 | neigh_clone(neigh); | 1141 | neigh_clone(neigh); |
1115 | ip6_del_rt(rt, NULL, NULL, NULL); | 1142 | ip6_del_rt(rt); |
1116 | rt = NULL; | 1143 | rt = NULL; |
1117 | } | 1144 | } |
1118 | 1145 | ||
@@ -1344,7 +1371,8 @@ static void ndisc_redirect_rcv(struct sk_buff *skb) | |||
1344 | 1371 | ||
1345 | neigh = __neigh_lookup(&nd_tbl, target, skb->dev, 1); | 1372 | neigh = __neigh_lookup(&nd_tbl, target, skb->dev, 1); |
1346 | if (neigh) { | 1373 | if (neigh) { |
1347 | rt6_redirect(dest, &skb->nh.ipv6h->saddr, neigh, lladdr, | 1374 | rt6_redirect(dest, &skb->nh.ipv6h->daddr, |
1375 | &skb->nh.ipv6h->saddr, neigh, lladdr, | ||
1348 | on_link); | 1376 | on_link); |
1349 | neigh_release(neigh); | 1377 | neigh_release(neigh); |
1350 | } | 1378 | } |
@@ -1380,7 +1408,8 @@ void ndisc_send_redirect(struct sk_buff *skb, struct neighbour *neigh, | |||
1380 | return; | 1408 | return; |
1381 | } | 1409 | } |
1382 | 1410 | ||
1383 | ndisc_flow_init(&fl, NDISC_REDIRECT, &saddr_buf, &skb->nh.ipv6h->saddr); | 1411 | ndisc_flow_init(&fl, NDISC_REDIRECT, &saddr_buf, &skb->nh.ipv6h->saddr, |
1412 | dev->ifindex); | ||
1384 | 1413 | ||
1385 | dst = ip6_route_output(NULL, &fl); | 1414 | dst = ip6_route_output(NULL, &fl); |
1386 | if (dst == NULL) | 1415 | if (dst == NULL) |
diff --git a/net/ipv6/netfilter.c b/net/ipv6/netfilter.c index 395a417ba955..580b1aba6722 100644 --- a/net/ipv6/netfilter.c +++ b/net/ipv6/netfilter.c | |||
@@ -87,7 +87,7 @@ unsigned int nf_ip6_checksum(struct sk_buff *skb, unsigned int hook, | |||
87 | unsigned int csum = 0; | 87 | unsigned int csum = 0; |
88 | 88 | ||
89 | switch (skb->ip_summed) { | 89 | switch (skb->ip_summed) { |
90 | case CHECKSUM_HW: | 90 | case CHECKSUM_COMPLETE: |
91 | if (hook != NF_IP6_PRE_ROUTING && hook != NF_IP6_LOCAL_IN) | 91 | if (hook != NF_IP6_PRE_ROUTING && hook != NF_IP6_LOCAL_IN) |
92 | break; | 92 | break; |
93 | if (!csum_ipv6_magic(&ip6h->saddr, &ip6h->daddr, | 93 | if (!csum_ipv6_magic(&ip6h->saddr, &ip6h->daddr, |
diff --git a/net/ipv6/netfilter/Makefile b/net/ipv6/netfilter/Makefile index eeeb57d4c9c5..ac1dfebde175 100644 --- a/net/ipv6/netfilter/Makefile +++ b/net/ipv6/netfilter/Makefile | |||
@@ -5,7 +5,7 @@ | |||
5 | # Link order matters here. | 5 | # Link order matters here. |
6 | obj-$(CONFIG_IP6_NF_IPTABLES) += ip6_tables.o | 6 | obj-$(CONFIG_IP6_NF_IPTABLES) += ip6_tables.o |
7 | obj-$(CONFIG_IP6_NF_MATCH_RT) += ip6t_rt.o | 7 | obj-$(CONFIG_IP6_NF_MATCH_RT) += ip6t_rt.o |
8 | obj-$(CONFIG_IP6_NF_MATCH_OPTS) += ip6t_hbh.o ip6t_dst.o | 8 | obj-$(CONFIG_IP6_NF_MATCH_OPTS) += ip6t_hbh.o |
9 | obj-$(CONFIG_IP6_NF_MATCH_IPV6HEADER) += ip6t_ipv6header.o | 9 | obj-$(CONFIG_IP6_NF_MATCH_IPV6HEADER) += ip6t_ipv6header.o |
10 | obj-$(CONFIG_IP6_NF_MATCH_FRAG) += ip6t_frag.o | 10 | obj-$(CONFIG_IP6_NF_MATCH_FRAG) += ip6t_frag.o |
11 | obj-$(CONFIG_IP6_NF_MATCH_AH) += ip6t_ah.o | 11 | obj-$(CONFIG_IP6_NF_MATCH_AH) += ip6t_ah.o |
diff --git a/net/ipv6/netfilter/ip6_queue.c b/net/ipv6/netfilter/ip6_queue.c index 968a14be0d05..9510c24ca8d2 100644 --- a/net/ipv6/netfilter/ip6_queue.c +++ b/net/ipv6/netfilter/ip6_queue.c | |||
@@ -56,15 +56,15 @@ struct ipq_queue_entry { | |||
56 | 56 | ||
57 | typedef int (*ipq_cmpfn)(struct ipq_queue_entry *, unsigned long); | 57 | typedef int (*ipq_cmpfn)(struct ipq_queue_entry *, unsigned long); |
58 | 58 | ||
59 | static unsigned char copy_mode = IPQ_COPY_NONE; | 59 | static unsigned char copy_mode __read_mostly = IPQ_COPY_NONE; |
60 | static unsigned int queue_maxlen = IPQ_QMAX_DEFAULT; | 60 | static unsigned int queue_maxlen __read_mostly = IPQ_QMAX_DEFAULT; |
61 | static DEFINE_RWLOCK(queue_lock); | 61 | static DEFINE_RWLOCK(queue_lock); |
62 | static int peer_pid; | 62 | static int peer_pid __read_mostly; |
63 | static unsigned int copy_range; | 63 | static unsigned int copy_range __read_mostly; |
64 | static unsigned int queue_total; | 64 | static unsigned int queue_total; |
65 | static unsigned int queue_dropped = 0; | 65 | static unsigned int queue_dropped = 0; |
66 | static unsigned int queue_user_dropped = 0; | 66 | static unsigned int queue_user_dropped = 0; |
67 | static struct sock *ipqnl; | 67 | static struct sock *ipqnl __read_mostly; |
68 | static LIST_HEAD(queue_list); | 68 | static LIST_HEAD(queue_list); |
69 | static DEFINE_MUTEX(ipqnl_mutex); | 69 | static DEFINE_MUTEX(ipqnl_mutex); |
70 | 70 | ||
@@ -206,9 +206,9 @@ ipq_build_packet_message(struct ipq_queue_entry *entry, int *errp) | |||
206 | break; | 206 | break; |
207 | 207 | ||
208 | case IPQ_COPY_PACKET: | 208 | case IPQ_COPY_PACKET: |
209 | if (entry->skb->ip_summed == CHECKSUM_HW && | 209 | if ((entry->skb->ip_summed == CHECKSUM_PARTIAL || |
210 | (*errp = skb_checksum_help(entry->skb, | 210 | entry->skb->ip_summed == CHECKSUM_COMPLETE) && |
211 | entry->info->outdev == NULL))) { | 211 | (*errp = skb_checksum_help(entry->skb))) { |
212 | read_unlock_bh(&queue_lock); | 212 | read_unlock_bh(&queue_lock); |
213 | return NULL; | 213 | return NULL; |
214 | } | 214 | } |
diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c index c9d6b23cd3f7..4ab368fa0b8f 100644 --- a/net/ipv6/netfilter/ip6_tables.c +++ b/net/ipv6/netfilter/ip6_tables.c | |||
@@ -70,9 +70,6 @@ do { \ | |||
70 | #define IP_NF_ASSERT(x) | 70 | #define IP_NF_ASSERT(x) |
71 | #endif | 71 | #endif |
72 | 72 | ||
73 | |||
74 | #include <linux/netfilter_ipv4/listhelp.h> | ||
75 | |||
76 | #if 0 | 73 | #if 0 |
77 | /* All the better to debug you with... */ | 74 | /* All the better to debug you with... */ |
78 | #define static | 75 | #define static |
@@ -220,8 +217,7 @@ ip6t_error(struct sk_buff **pskb, | |||
220 | const struct net_device *out, | 217 | const struct net_device *out, |
221 | unsigned int hooknum, | 218 | unsigned int hooknum, |
222 | const struct xt_target *target, | 219 | const struct xt_target *target, |
223 | const void *targinfo, | 220 | const void *targinfo) |
224 | void *userinfo) | ||
225 | { | 221 | { |
226 | if (net_ratelimit()) | 222 | if (net_ratelimit()) |
227 | printk("ip6_tables: error: `%s'\n", (char *)targinfo); | 223 | printk("ip6_tables: error: `%s'\n", (char *)targinfo); |
@@ -258,8 +254,7 @@ ip6t_do_table(struct sk_buff **pskb, | |||
258 | unsigned int hook, | 254 | unsigned int hook, |
259 | const struct net_device *in, | 255 | const struct net_device *in, |
260 | const struct net_device *out, | 256 | const struct net_device *out, |
261 | struct xt_table *table, | 257 | struct xt_table *table) |
262 | void *userdata) | ||
263 | { | 258 | { |
264 | static const char nulldevname[IFNAMSIZ] __attribute__((aligned(sizeof(long)))); | 259 | static const char nulldevname[IFNAMSIZ] __attribute__((aligned(sizeof(long)))); |
265 | int offset = 0; | 260 | int offset = 0; |
@@ -349,8 +344,7 @@ ip6t_do_table(struct sk_buff **pskb, | |||
349 | in, out, | 344 | in, out, |
350 | hook, | 345 | hook, |
351 | t->u.kernel.target, | 346 | t->u.kernel.target, |
352 | t->data, | 347 | t->data); |
353 | userdata); | ||
354 | 348 | ||
355 | #ifdef CONFIG_NETFILTER_DEBUG | 349 | #ifdef CONFIG_NETFILTER_DEBUG |
356 | if (((struct ip6t_entry *)table_base)->comefrom | 350 | if (((struct ip6t_entry *)table_base)->comefrom |
@@ -507,8 +501,7 @@ cleanup_match(struct ip6t_entry_match *m, unsigned int *i) | |||
507 | return 1; | 501 | return 1; |
508 | 502 | ||
509 | if (m->u.kernel.match->destroy) | 503 | if (m->u.kernel.match->destroy) |
510 | m->u.kernel.match->destroy(m->u.kernel.match, m->data, | 504 | m->u.kernel.match->destroy(m->u.kernel.match, m->data); |
511 | m->u.match_size - sizeof(*m)); | ||
512 | module_put(m->u.kernel.match->me); | 505 | module_put(m->u.kernel.match->me); |
513 | return 0; | 506 | return 0; |
514 | } | 507 | } |
@@ -561,7 +554,6 @@ check_match(struct ip6t_entry_match *m, | |||
561 | 554 | ||
562 | if (m->u.kernel.match->checkentry | 555 | if (m->u.kernel.match->checkentry |
563 | && !m->u.kernel.match->checkentry(name, ipv6, match, m->data, | 556 | && !m->u.kernel.match->checkentry(name, ipv6, match, m->data, |
564 | m->u.match_size - sizeof(*m), | ||
565 | hookmask)) { | 557 | hookmask)) { |
566 | duprintf("ip_tables: check failed for `%s'.\n", | 558 | duprintf("ip_tables: check failed for `%s'.\n", |
567 | m->u.kernel.match->name); | 559 | m->u.kernel.match->name); |
@@ -618,12 +610,10 @@ check_entry(struct ip6t_entry *e, const char *name, unsigned int size, | |||
618 | if (t->u.kernel.target == &ip6t_standard_target) { | 610 | if (t->u.kernel.target == &ip6t_standard_target) { |
619 | if (!standard_check(t, size)) { | 611 | if (!standard_check(t, size)) { |
620 | ret = -EINVAL; | 612 | ret = -EINVAL; |
621 | goto cleanup_matches; | 613 | goto err; |
622 | } | 614 | } |
623 | } else if (t->u.kernel.target->checkentry | 615 | } else if (t->u.kernel.target->checkentry |
624 | && !t->u.kernel.target->checkentry(name, e, target, t->data, | 616 | && !t->u.kernel.target->checkentry(name, e, target, t->data, |
625 | t->u.target_size | ||
626 | - sizeof(*t), | ||
627 | e->comefrom)) { | 617 | e->comefrom)) { |
628 | duprintf("ip_tables: check failed for `%s'.\n", | 618 | duprintf("ip_tables: check failed for `%s'.\n", |
629 | t->u.kernel.target->name); | 619 | t->u.kernel.target->name); |
@@ -695,8 +685,7 @@ cleanup_entry(struct ip6t_entry *e, unsigned int *i) | |||
695 | IP6T_MATCH_ITERATE(e, cleanup_match, NULL); | 685 | IP6T_MATCH_ITERATE(e, cleanup_match, NULL); |
696 | t = ip6t_get_target(e); | 686 | t = ip6t_get_target(e); |
697 | if (t->u.kernel.target->destroy) | 687 | if (t->u.kernel.target->destroy) |
698 | t->u.kernel.target->destroy(t->u.kernel.target, t->data, | 688 | t->u.kernel.target->destroy(t->u.kernel.target, t->data); |
699 | t->u.target_size - sizeof(*t)); | ||
700 | module_put(t->u.kernel.target->me); | 689 | module_put(t->u.kernel.target->me); |
701 | return 0; | 690 | return 0; |
702 | } | 691 | } |
@@ -1352,7 +1341,6 @@ icmp6_checkentry(const char *tablename, | |||
1352 | const void *entry, | 1341 | const void *entry, |
1353 | const struct xt_match *match, | 1342 | const struct xt_match *match, |
1354 | void *matchinfo, | 1343 | void *matchinfo, |
1355 | unsigned int matchsize, | ||
1356 | unsigned int hook_mask) | 1344 | unsigned int hook_mask) |
1357 | { | 1345 | { |
1358 | const struct ip6t_icmp *icmpinfo = matchinfo; | 1346 | const struct ip6t_icmp *icmpinfo = matchinfo; |
diff --git a/net/ipv6/netfilter/ip6t_HL.c b/net/ipv6/netfilter/ip6t_HL.c index b8eff8ee69b1..435750f664dd 100644 --- a/net/ipv6/netfilter/ip6t_HL.c +++ b/net/ipv6/netfilter/ip6t_HL.c | |||
@@ -22,11 +22,10 @@ static unsigned int ip6t_hl_target(struct sk_buff **pskb, | |||
22 | const struct net_device *out, | 22 | const struct net_device *out, |
23 | unsigned int hooknum, | 23 | unsigned int hooknum, |
24 | const struct xt_target *target, | 24 | const struct xt_target *target, |
25 | const void *targinfo, void *userinfo) | 25 | const void *targinfo) |
26 | { | 26 | { |
27 | struct ipv6hdr *ip6h; | 27 | struct ipv6hdr *ip6h; |
28 | const struct ip6t_HL_info *info = targinfo; | 28 | const struct ip6t_HL_info *info = targinfo; |
29 | u_int16_t diffs[2]; | ||
30 | int new_hl; | 29 | int new_hl; |
31 | 30 | ||
32 | if (!skb_make_writable(pskb, (*pskb)->len)) | 31 | if (!skb_make_writable(pskb, (*pskb)->len)) |
@@ -53,11 +52,8 @@ static unsigned int ip6t_hl_target(struct sk_buff **pskb, | |||
53 | break; | 52 | break; |
54 | } | 53 | } |
55 | 54 | ||
56 | if (new_hl != ip6h->hop_limit) { | 55 | if (new_hl != ip6h->hop_limit) |
57 | diffs[0] = htons(((unsigned)ip6h->hop_limit) << 8) ^ 0xFFFF; | ||
58 | ip6h->hop_limit = new_hl; | 56 | ip6h->hop_limit = new_hl; |
59 | diffs[1] = htons(((unsigned)ip6h->hop_limit) << 8); | ||
60 | } | ||
61 | 57 | ||
62 | return IP6T_CONTINUE; | 58 | return IP6T_CONTINUE; |
63 | } | 59 | } |
@@ -66,7 +62,6 @@ static int ip6t_hl_checkentry(const char *tablename, | |||
66 | const void *entry, | 62 | const void *entry, |
67 | const struct xt_target *target, | 63 | const struct xt_target *target, |
68 | void *targinfo, | 64 | void *targinfo, |
69 | unsigned int targinfosize, | ||
70 | unsigned int hook_mask) | 65 | unsigned int hook_mask) |
71 | { | 66 | { |
72 | struct ip6t_HL_info *info = targinfo; | 67 | struct ip6t_HL_info *info = targinfo; |
diff --git a/net/ipv6/netfilter/ip6t_LOG.c b/net/ipv6/netfilter/ip6t_LOG.c index 73c6300109d6..0cf537d30185 100644 --- a/net/ipv6/netfilter/ip6t_LOG.c +++ b/net/ipv6/netfilter/ip6t_LOG.c | |||
@@ -427,8 +427,7 @@ ip6t_log_target(struct sk_buff **pskb, | |||
427 | const struct net_device *out, | 427 | const struct net_device *out, |
428 | unsigned int hooknum, | 428 | unsigned int hooknum, |
429 | const struct xt_target *target, | 429 | const struct xt_target *target, |
430 | const void *targinfo, | 430 | const void *targinfo) |
431 | void *userinfo) | ||
432 | { | 431 | { |
433 | const struct ip6t_log_info *loginfo = targinfo; | 432 | const struct ip6t_log_info *loginfo = targinfo; |
434 | struct nf_loginfo li; | 433 | struct nf_loginfo li; |
@@ -452,7 +451,6 @@ static int ip6t_log_checkentry(const char *tablename, | |||
452 | const void *entry, | 451 | const void *entry, |
453 | const struct xt_target *target, | 452 | const struct xt_target *target, |
454 | void *targinfo, | 453 | void *targinfo, |
455 | unsigned int targinfosize, | ||
456 | unsigned int hook_mask) | 454 | unsigned int hook_mask) |
457 | { | 455 | { |
458 | const struct ip6t_log_info *loginfo = targinfo; | 456 | const struct ip6t_log_info *loginfo = targinfo; |
diff --git a/net/ipv6/netfilter/ip6t_REJECT.c b/net/ipv6/netfilter/ip6t_REJECT.c index 8629ba195d2d..311eae82feb3 100644 --- a/net/ipv6/netfilter/ip6t_REJECT.c +++ b/net/ipv6/netfilter/ip6t_REJECT.c | |||
@@ -96,6 +96,7 @@ static void send_reset(struct sk_buff *oldskb) | |||
96 | ipv6_addr_copy(&fl.fl6_dst, &oip6h->saddr); | 96 | ipv6_addr_copy(&fl.fl6_dst, &oip6h->saddr); |
97 | fl.fl_ip_sport = otcph.dest; | 97 | fl.fl_ip_sport = otcph.dest; |
98 | fl.fl_ip_dport = otcph.source; | 98 | fl.fl_ip_dport = otcph.source; |
99 | security_skb_classify_flow(oldskb, &fl); | ||
99 | dst = ip6_route_output(NULL, &fl); | 100 | dst = ip6_route_output(NULL, &fl); |
100 | if (dst == NULL) | 101 | if (dst == NULL) |
101 | return; | 102 | return; |
@@ -179,8 +180,7 @@ static unsigned int reject6_target(struct sk_buff **pskb, | |||
179 | const struct net_device *out, | 180 | const struct net_device *out, |
180 | unsigned int hooknum, | 181 | unsigned int hooknum, |
181 | const struct xt_target *target, | 182 | const struct xt_target *target, |
182 | const void *targinfo, | 183 | const void *targinfo) |
183 | void *userinfo) | ||
184 | { | 184 | { |
185 | const struct ip6t_reject_info *reject = targinfo; | 185 | const struct ip6t_reject_info *reject = targinfo; |
186 | 186 | ||
@@ -223,7 +223,6 @@ static int check(const char *tablename, | |||
223 | const void *entry, | 223 | const void *entry, |
224 | const struct xt_target *target, | 224 | const struct xt_target *target, |
225 | void *targinfo, | 225 | void *targinfo, |
226 | unsigned int targinfosize, | ||
227 | unsigned int hook_mask) | 226 | unsigned int hook_mask) |
228 | { | 227 | { |
229 | const struct ip6t_reject_info *rejinfo = targinfo; | 228 | const struct ip6t_reject_info *rejinfo = targinfo; |
@@ -256,9 +255,7 @@ static struct ip6t_target ip6t_reject_reg = { | |||
256 | 255 | ||
257 | static int __init ip6t_reject_init(void) | 256 | static int __init ip6t_reject_init(void) |
258 | { | 257 | { |
259 | if (ip6t_register_target(&ip6t_reject_reg)) | 258 | return ip6t_register_target(&ip6t_reject_reg); |
260 | return -EINVAL; | ||
261 | return 0; | ||
262 | } | 259 | } |
263 | 260 | ||
264 | static void __exit ip6t_reject_fini(void) | 261 | static void __exit ip6t_reject_fini(void) |
diff --git a/net/ipv6/netfilter/ip6t_ah.c b/net/ipv6/netfilter/ip6t_ah.c index 2f7bb20c758b..ec1b1608156c 100644 --- a/net/ipv6/netfilter/ip6t_ah.c +++ b/net/ipv6/netfilter/ip6t_ah.c | |||
@@ -102,7 +102,6 @@ checkentry(const char *tablename, | |||
102 | const void *entry, | 102 | const void *entry, |
103 | const struct xt_match *match, | 103 | const struct xt_match *match, |
104 | void *matchinfo, | 104 | void *matchinfo, |
105 | unsigned int matchinfosize, | ||
106 | unsigned int hook_mask) | 105 | unsigned int hook_mask) |
107 | { | 106 | { |
108 | const struct ip6t_ah *ahinfo = matchinfo; | 107 | const struct ip6t_ah *ahinfo = matchinfo; |
diff --git a/net/ipv6/netfilter/ip6t_dst.c b/net/ipv6/netfilter/ip6t_dst.c deleted file mode 100644 index 9422413d0571..000000000000 --- a/net/ipv6/netfilter/ip6t_dst.c +++ /dev/null | |||
@@ -1,220 +0,0 @@ | |||
1 | /* Kernel module to match Hop-by-Hop and Destination parameters. */ | ||
2 | |||
3 | /* (C) 2001-2002 Andras Kis-Szabo <kisza@sch.bme.hu> | ||
4 | * | ||
5 | * This program is free software; you can redistribute it and/or modify | ||
6 | * it under the terms of the GNU General Public License version 2 as | ||
7 | * published by the Free Software Foundation. | ||
8 | */ | ||
9 | |||
10 | #include <linux/module.h> | ||
11 | #include <linux/skbuff.h> | ||
12 | #include <linux/ipv6.h> | ||
13 | #include <linux/types.h> | ||
14 | #include <net/checksum.h> | ||
15 | #include <net/ipv6.h> | ||
16 | |||
17 | #include <asm/byteorder.h> | ||
18 | |||
19 | #include <linux/netfilter_ipv6/ip6_tables.h> | ||
20 | #include <linux/netfilter_ipv6/ip6t_opts.h> | ||
21 | |||
22 | #define HOPBYHOP 0 | ||
23 | |||
24 | MODULE_LICENSE("GPL"); | ||
25 | #if HOPBYHOP | ||
26 | MODULE_DESCRIPTION("IPv6 HbH match"); | ||
27 | #else | ||
28 | MODULE_DESCRIPTION("IPv6 DST match"); | ||
29 | #endif | ||
30 | MODULE_AUTHOR("Andras Kis-Szabo <kisza@sch.bme.hu>"); | ||
31 | |||
32 | #if 0 | ||
33 | #define DEBUGP printk | ||
34 | #else | ||
35 | #define DEBUGP(format, args...) | ||
36 | #endif | ||
37 | |||
38 | /* | ||
39 | * (Type & 0xC0) >> 6 | ||
40 | * 0 -> ignorable | ||
41 | * 1 -> must drop the packet | ||
42 | * 2 -> send ICMP PARM PROB regardless and drop packet | ||
43 | * 3 -> Send ICMP if not a multicast address and drop packet | ||
44 | * (Type & 0x20) >> 5 | ||
45 | * 0 -> invariant | ||
46 | * 1 -> can change the routing | ||
47 | * (Type & 0x1F) Type | ||
48 | * 0 -> Pad1 (only 1 byte!) | ||
49 | * 1 -> PadN LENGTH info (total length = length + 2) | ||
50 | * C0 | 2 -> JUMBO 4 x x x x ( xxxx > 64k ) | ||
51 | * 5 -> RTALERT 2 x x | ||
52 | */ | ||
53 | |||
54 | static int | ||
55 | match(const struct sk_buff *skb, | ||
56 | const struct net_device *in, | ||
57 | const struct net_device *out, | ||
58 | const struct xt_match *match, | ||
59 | const void *matchinfo, | ||
60 | int offset, | ||
61 | unsigned int protoff, | ||
62 | int *hotdrop) | ||
63 | { | ||
64 | struct ipv6_opt_hdr _optsh, *oh; | ||
65 | const struct ip6t_opts *optinfo = matchinfo; | ||
66 | unsigned int temp; | ||
67 | unsigned int ptr; | ||
68 | unsigned int hdrlen = 0; | ||
69 | unsigned int ret = 0; | ||
70 | u8 _opttype, *tp = NULL; | ||
71 | u8 _optlen, *lp = NULL; | ||
72 | unsigned int optlen; | ||
73 | |||
74 | #if HOPBYHOP | ||
75 | if (ipv6_find_hdr(skb, &ptr, NEXTHDR_HOP, NULL) < 0) | ||
76 | #else | ||
77 | if (ipv6_find_hdr(skb, &ptr, NEXTHDR_DEST, NULL) < 0) | ||
78 | #endif | ||
79 | return 0; | ||
80 | |||
81 | oh = skb_header_pointer(skb, ptr, sizeof(_optsh), &_optsh); | ||
82 | if (oh == NULL) { | ||
83 | *hotdrop = 1; | ||
84 | return 0; | ||
85 | } | ||
86 | |||
87 | hdrlen = ipv6_optlen(oh); | ||
88 | if (skb->len - ptr < hdrlen) { | ||
89 | /* Packet smaller than it's length field */ | ||
90 | return 0; | ||
91 | } | ||
92 | |||
93 | DEBUGP("IPv6 OPTS LEN %u %u ", hdrlen, oh->hdrlen); | ||
94 | |||
95 | DEBUGP("len %02X %04X %02X ", | ||
96 | optinfo->hdrlen, hdrlen, | ||
97 | (!(optinfo->flags & IP6T_OPTS_LEN) || | ||
98 | ((optinfo->hdrlen == hdrlen) ^ | ||
99 | !!(optinfo->invflags & IP6T_OPTS_INV_LEN)))); | ||
100 | |||
101 | ret = (oh != NULL) && | ||
102 | (!(optinfo->flags & IP6T_OPTS_LEN) || | ||
103 | ((optinfo->hdrlen == hdrlen) ^ | ||
104 | !!(optinfo->invflags & IP6T_OPTS_INV_LEN))); | ||
105 | |||
106 | ptr += 2; | ||
107 | hdrlen -= 2; | ||
108 | if (!(optinfo->flags & IP6T_OPTS_OPTS)) { | ||
109 | return ret; | ||
110 | } else if (optinfo->flags & IP6T_OPTS_NSTRICT) { | ||
111 | DEBUGP("Not strict - not implemented"); | ||
112 | } else { | ||
113 | DEBUGP("Strict "); | ||
114 | DEBUGP("#%d ", optinfo->optsnr); | ||
115 | for (temp = 0; temp < optinfo->optsnr; temp++) { | ||
116 | /* type field exists ? */ | ||
117 | if (hdrlen < 1) | ||
118 | break; | ||
119 | tp = skb_header_pointer(skb, ptr, sizeof(_opttype), | ||
120 | &_opttype); | ||
121 | if (tp == NULL) | ||
122 | break; | ||
123 | |||
124 | /* Type check */ | ||
125 | if (*tp != (optinfo->opts[temp] & 0xFF00) >> 8) { | ||
126 | DEBUGP("Tbad %02X %02X\n", | ||
127 | *tp, | ||
128 | (optinfo->opts[temp] & 0xFF00) >> 8); | ||
129 | return 0; | ||
130 | } else { | ||
131 | DEBUGP("Tok "); | ||
132 | } | ||
133 | /* Length check */ | ||
134 | if (*tp) { | ||
135 | u16 spec_len; | ||
136 | |||
137 | /* length field exists ? */ | ||
138 | if (hdrlen < 2) | ||
139 | break; | ||
140 | lp = skb_header_pointer(skb, ptr + 1, | ||
141 | sizeof(_optlen), | ||
142 | &_optlen); | ||
143 | if (lp == NULL) | ||
144 | break; | ||
145 | spec_len = optinfo->opts[temp] & 0x00FF; | ||
146 | |||
147 | if (spec_len != 0x00FF && spec_len != *lp) { | ||
148 | DEBUGP("Lbad %02X %04X\n", *lp, | ||
149 | spec_len); | ||
150 | return 0; | ||
151 | } | ||
152 | DEBUGP("Lok "); | ||
153 | optlen = *lp + 2; | ||
154 | } else { | ||
155 | DEBUGP("Pad1\n"); | ||
156 | optlen = 1; | ||
157 | } | ||
158 | |||
159 | /* Step to the next */ | ||
160 | DEBUGP("len%04X \n", optlen); | ||
161 | |||
162 | if ((ptr > skb->len - optlen || hdrlen < optlen) && | ||
163 | (temp < optinfo->optsnr - 1)) { | ||
164 | DEBUGP("new pointer is too large! \n"); | ||
165 | break; | ||
166 | } | ||
167 | ptr += optlen; | ||
168 | hdrlen -= optlen; | ||
169 | } | ||
170 | if (temp == optinfo->optsnr) | ||
171 | return ret; | ||
172 | else | ||
173 | return 0; | ||
174 | } | ||
175 | |||
176 | return 0; | ||
177 | } | ||
178 | |||
179 | /* Called when user tries to insert an entry of this type. */ | ||
180 | static int | ||
181 | checkentry(const char *tablename, | ||
182 | const void *info, | ||
183 | const struct xt_match *match, | ||
184 | void *matchinfo, | ||
185 | unsigned int matchinfosize, | ||
186 | unsigned int hook_mask) | ||
187 | { | ||
188 | const struct ip6t_opts *optsinfo = matchinfo; | ||
189 | |||
190 | if (optsinfo->invflags & ~IP6T_OPTS_INV_MASK) { | ||
191 | DEBUGP("ip6t_opts: unknown flags %X\n", optsinfo->invflags); | ||
192 | return 0; | ||
193 | } | ||
194 | return 1; | ||
195 | } | ||
196 | |||
197 | static struct ip6t_match opts_match = { | ||
198 | #if HOPBYHOP | ||
199 | .name = "hbh", | ||
200 | #else | ||
201 | .name = "dst", | ||
202 | #endif | ||
203 | .match = match, | ||
204 | .matchsize = sizeof(struct ip6t_opts), | ||
205 | .checkentry = checkentry, | ||
206 | .me = THIS_MODULE, | ||
207 | }; | ||
208 | |||
209 | static int __init ip6t_dst_init(void) | ||
210 | { | ||
211 | return ip6t_register_match(&opts_match); | ||
212 | } | ||
213 | |||
214 | static void __exit ip6t_dst_fini(void) | ||
215 | { | ||
216 | ip6t_unregister_match(&opts_match); | ||
217 | } | ||
218 | |||
219 | module_init(ip6t_dst_init); | ||
220 | module_exit(ip6t_dst_fini); | ||
diff --git a/net/ipv6/netfilter/ip6t_frag.c b/net/ipv6/netfilter/ip6t_frag.c index 06768c84bd31..78d9c8b9e28a 100644 --- a/net/ipv6/netfilter/ip6t_frag.c +++ b/net/ipv6/netfilter/ip6t_frag.c | |||
@@ -119,7 +119,6 @@ checkentry(const char *tablename, | |||
119 | const void *ip, | 119 | const void *ip, |
120 | const struct xt_match *match, | 120 | const struct xt_match *match, |
121 | void *matchinfo, | 121 | void *matchinfo, |
122 | unsigned int matchinfosize, | ||
123 | unsigned int hook_mask) | 122 | unsigned int hook_mask) |
124 | { | 123 | { |
125 | const struct ip6t_frag *fraginfo = matchinfo; | 124 | const struct ip6t_frag *fraginfo = matchinfo; |
diff --git a/net/ipv6/netfilter/ip6t_hbh.c b/net/ipv6/netfilter/ip6t_hbh.c index 374f1be85c0d..d32a205e3af2 100644 --- a/net/ipv6/netfilter/ip6t_hbh.c +++ b/net/ipv6/netfilter/ip6t_hbh.c | |||
@@ -19,15 +19,10 @@ | |||
19 | #include <linux/netfilter_ipv6/ip6_tables.h> | 19 | #include <linux/netfilter_ipv6/ip6_tables.h> |
20 | #include <linux/netfilter_ipv6/ip6t_opts.h> | 20 | #include <linux/netfilter_ipv6/ip6t_opts.h> |
21 | 21 | ||
22 | #define HOPBYHOP 1 | ||
23 | |||
24 | MODULE_LICENSE("GPL"); | 22 | MODULE_LICENSE("GPL"); |
25 | #if HOPBYHOP | 23 | MODULE_DESCRIPTION("IPv6 opts match"); |
26 | MODULE_DESCRIPTION("IPv6 HbH match"); | ||
27 | #else | ||
28 | MODULE_DESCRIPTION("IPv6 DST match"); | ||
29 | #endif | ||
30 | MODULE_AUTHOR("Andras Kis-Szabo <kisza@sch.bme.hu>"); | 24 | MODULE_AUTHOR("Andras Kis-Szabo <kisza@sch.bme.hu>"); |
25 | MODULE_ALIAS("ip6t_dst"); | ||
31 | 26 | ||
32 | #if 0 | 27 | #if 0 |
33 | #define DEBUGP printk | 28 | #define DEBUGP printk |
@@ -71,11 +66,7 @@ match(const struct sk_buff *skb, | |||
71 | u8 _optlen, *lp = NULL; | 66 | u8 _optlen, *lp = NULL; |
72 | unsigned int optlen; | 67 | unsigned int optlen; |
73 | 68 | ||
74 | #if HOPBYHOP | 69 | if (ipv6_find_hdr(skb, &ptr, match->data, NULL) < 0) |
75 | if (ipv6_find_hdr(skb, &ptr, NEXTHDR_HOP, NULL) < 0) | ||
76 | #else | ||
77 | if (ipv6_find_hdr(skb, &ptr, NEXTHDR_DEST, NULL) < 0) | ||
78 | #endif | ||
79 | return 0; | 70 | return 0; |
80 | 71 | ||
81 | oh = skb_header_pointer(skb, ptr, sizeof(_optsh), &_optsh); | 72 | oh = skb_header_pointer(skb, ptr, sizeof(_optsh), &_optsh); |
@@ -182,7 +173,6 @@ checkentry(const char *tablename, | |||
182 | const void *entry, | 173 | const void *entry, |
183 | const struct xt_match *match, | 174 | const struct xt_match *match, |
184 | void *matchinfo, | 175 | void *matchinfo, |
185 | unsigned int matchinfosize, | ||
186 | unsigned int hook_mask) | 176 | unsigned int hook_mask) |
187 | { | 177 | { |
188 | const struct ip6t_opts *optsinfo = matchinfo; | 178 | const struct ip6t_opts *optsinfo = matchinfo; |
@@ -194,26 +184,35 @@ checkentry(const char *tablename, | |||
194 | return 1; | 184 | return 1; |
195 | } | 185 | } |
196 | 186 | ||
197 | static struct ip6t_match opts_match = { | 187 | static struct xt_match opts_match[] = { |
198 | #if HOPBYHOP | 188 | { |
199 | .name = "hbh", | 189 | .name = "hbh", |
200 | #else | 190 | .family = AF_INET6, |
201 | .name = "dst", | 191 | .match = match, |
202 | #endif | 192 | .matchsize = sizeof(struct ip6t_opts), |
203 | .match = match, | 193 | .checkentry = checkentry, |
204 | .matchsize = sizeof(struct ip6t_opts), | 194 | .me = THIS_MODULE, |
205 | .checkentry = checkentry, | 195 | .data = NEXTHDR_HOP, |
206 | .me = THIS_MODULE, | 196 | }, |
197 | { | ||
198 | .name = "dst", | ||
199 | .family = AF_INET6, | ||
200 | .match = match, | ||
201 | .matchsize = sizeof(struct ip6t_opts), | ||
202 | .checkentry = checkentry, | ||
203 | .me = THIS_MODULE, | ||
204 | .data = NEXTHDR_DEST, | ||
205 | }, | ||
207 | }; | 206 | }; |
208 | 207 | ||
209 | static int __init ip6t_hbh_init(void) | 208 | static int __init ip6t_hbh_init(void) |
210 | { | 209 | { |
211 | return ip6t_register_match(&opts_match); | 210 | return xt_register_matches(opts_match, ARRAY_SIZE(opts_match)); |
212 | } | 211 | } |
213 | 212 | ||
214 | static void __exit ip6t_hbh_fini(void) | 213 | static void __exit ip6t_hbh_fini(void) |
215 | { | 214 | { |
216 | ip6t_unregister_match(&opts_match); | 215 | xt_unregister_matches(opts_match, ARRAY_SIZE(opts_match)); |
217 | } | 216 | } |
218 | 217 | ||
219 | module_init(ip6t_hbh_init); | 218 | module_init(ip6t_hbh_init); |
diff --git a/net/ipv6/netfilter/ip6t_ipv6header.c b/net/ipv6/netfilter/ip6t_ipv6header.c index 9375eeb1369f..3093c398002f 100644 --- a/net/ipv6/netfilter/ip6t_ipv6header.c +++ b/net/ipv6/netfilter/ip6t_ipv6header.c | |||
@@ -128,7 +128,6 @@ ipv6header_checkentry(const char *tablename, | |||
128 | const void *ip, | 128 | const void *ip, |
129 | const struct xt_match *match, | 129 | const struct xt_match *match, |
130 | void *matchinfo, | 130 | void *matchinfo, |
131 | unsigned int matchsize, | ||
132 | unsigned int hook_mask) | 131 | unsigned int hook_mask) |
133 | { | 132 | { |
134 | const struct ip6t_ipv6header_info *info = matchinfo; | 133 | const struct ip6t_ipv6header_info *info = matchinfo; |
diff --git a/net/ipv6/netfilter/ip6t_owner.c b/net/ipv6/netfilter/ip6t_owner.c index 5d047990cd44..4eb9bbc4ebc3 100644 --- a/net/ipv6/netfilter/ip6t_owner.c +++ b/net/ipv6/netfilter/ip6t_owner.c | |||
@@ -57,7 +57,6 @@ checkentry(const char *tablename, | |||
57 | const void *ip, | 57 | const void *ip, |
58 | const struct xt_match *match, | 58 | const struct xt_match *match, |
59 | void *matchinfo, | 59 | void *matchinfo, |
60 | unsigned int matchsize, | ||
61 | unsigned int hook_mask) | 60 | unsigned int hook_mask) |
62 | { | 61 | { |
63 | const struct ip6t_owner_info *info = matchinfo; | 62 | const struct ip6t_owner_info *info = matchinfo; |
diff --git a/net/ipv6/netfilter/ip6t_rt.c b/net/ipv6/netfilter/ip6t_rt.c index fbb0184a41d8..bcb2e168a5bc 100644 --- a/net/ipv6/netfilter/ip6t_rt.c +++ b/net/ipv6/netfilter/ip6t_rt.c | |||
@@ -197,7 +197,6 @@ checkentry(const char *tablename, | |||
197 | const void *entry, | 197 | const void *entry, |
198 | const struct xt_match *match, | 198 | const struct xt_match *match, |
199 | void *matchinfo, | 199 | void *matchinfo, |
200 | unsigned int matchinfosize, | ||
201 | unsigned int hook_mask) | 200 | unsigned int hook_mask) |
202 | { | 201 | { |
203 | const struct ip6t_rt *rtinfo = matchinfo; | 202 | const struct ip6t_rt *rtinfo = matchinfo; |
diff --git a/net/ipv6/netfilter/ip6table_filter.c b/net/ipv6/netfilter/ip6table_filter.c index 60976c0c58e8..2fc07c74decf 100644 --- a/net/ipv6/netfilter/ip6table_filter.c +++ b/net/ipv6/netfilter/ip6table_filter.c | |||
@@ -108,7 +108,7 @@ ip6t_hook(unsigned int hook, | |||
108 | const struct net_device *out, | 108 | const struct net_device *out, |
109 | int (*okfn)(struct sk_buff *)) | 109 | int (*okfn)(struct sk_buff *)) |
110 | { | 110 | { |
111 | return ip6t_do_table(pskb, hook, in, out, &packet_filter, NULL); | 111 | return ip6t_do_table(pskb, hook, in, out, &packet_filter); |
112 | } | 112 | } |
113 | 113 | ||
114 | static unsigned int | 114 | static unsigned int |
@@ -128,7 +128,7 @@ ip6t_local_out_hook(unsigned int hook, | |||
128 | } | 128 | } |
129 | #endif | 129 | #endif |
130 | 130 | ||
131 | return ip6t_do_table(pskb, hook, in, out, &packet_filter, NULL); | 131 | return ip6t_do_table(pskb, hook, in, out, &packet_filter); |
132 | } | 132 | } |
133 | 133 | ||
134 | static struct nf_hook_ops ip6t_ops[] = { | 134 | static struct nf_hook_ops ip6t_ops[] = { |
diff --git a/net/ipv6/netfilter/ip6table_mangle.c b/net/ipv6/netfilter/ip6table_mangle.c index 03a13eab1dae..386ea260e767 100644 --- a/net/ipv6/netfilter/ip6table_mangle.c +++ b/net/ipv6/netfilter/ip6table_mangle.c | |||
@@ -138,7 +138,7 @@ ip6t_route_hook(unsigned int hook, | |||
138 | const struct net_device *out, | 138 | const struct net_device *out, |
139 | int (*okfn)(struct sk_buff *)) | 139 | int (*okfn)(struct sk_buff *)) |
140 | { | 140 | { |
141 | return ip6t_do_table(pskb, hook, in, out, &packet_mangler, NULL); | 141 | return ip6t_do_table(pskb, hook, in, out, &packet_mangler); |
142 | } | 142 | } |
143 | 143 | ||
144 | static unsigned int | 144 | static unsigned int |
@@ -174,18 +174,14 @@ ip6t_local_hook(unsigned int hook, | |||
174 | /* flowlabel and prio (includes version, which shouldn't change either */ | 174 | /* flowlabel and prio (includes version, which shouldn't change either */ |
175 | flowlabel = *((u_int32_t *) (*pskb)->nh.ipv6h); | 175 | flowlabel = *((u_int32_t *) (*pskb)->nh.ipv6h); |
176 | 176 | ||
177 | ret = ip6t_do_table(pskb, hook, in, out, &packet_mangler, NULL); | 177 | ret = ip6t_do_table(pskb, hook, in, out, &packet_mangler); |
178 | 178 | ||
179 | if (ret != NF_DROP && ret != NF_STOLEN | 179 | if (ret != NF_DROP && ret != NF_STOLEN |
180 | && (memcmp(&(*pskb)->nh.ipv6h->saddr, &saddr, sizeof(saddr)) | 180 | && (memcmp(&(*pskb)->nh.ipv6h->saddr, &saddr, sizeof(saddr)) |
181 | || memcmp(&(*pskb)->nh.ipv6h->daddr, &daddr, sizeof(daddr)) | 181 | || memcmp(&(*pskb)->nh.ipv6h->daddr, &daddr, sizeof(daddr)) |
182 | || (*pskb)->nfmark != nfmark | 182 | || (*pskb)->nfmark != nfmark |
183 | || (*pskb)->nh.ipv6h->hop_limit != hop_limit)) { | 183 | || (*pskb)->nh.ipv6h->hop_limit != hop_limit)) |
184 | 184 | return ip6_route_me_harder(*pskb) == 0 ? ret : NF_DROP; | |
185 | /* something which could affect routing has changed */ | ||
186 | |||
187 | DEBUGP("ip6table_mangle: we'd need to re-route a packet\n"); | ||
188 | } | ||
189 | 185 | ||
190 | return ret; | 186 | return ret; |
191 | } | 187 | } |
diff --git a/net/ipv6/netfilter/ip6table_raw.c b/net/ipv6/netfilter/ip6table_raw.c index 61a7c58e99f8..b4154da575c0 100644 --- a/net/ipv6/netfilter/ip6table_raw.c +++ b/net/ipv6/netfilter/ip6table_raw.c | |||
@@ -122,7 +122,7 @@ ip6t_hook(unsigned int hook, | |||
122 | const struct net_device *out, | 122 | const struct net_device *out, |
123 | int (*okfn)(struct sk_buff *)) | 123 | int (*okfn)(struct sk_buff *)) |
124 | { | 124 | { |
125 | return ip6t_do_table(pskb, hook, in, out, &packet_raw, NULL); | 125 | return ip6t_do_table(pskb, hook, in, out, &packet_raw); |
126 | } | 126 | } |
127 | 127 | ||
128 | static struct nf_hook_ops ip6t_ops[] = { | 128 | static struct nf_hook_ops ip6t_ops[] = { |
diff --git a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c index c2ab38ff46af..e5e53fff9e38 100644 --- a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c +++ b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c | |||
@@ -335,7 +335,7 @@ static struct nf_hook_ops ipv6_conntrack_ops[] = { | |||
335 | /* From nf_conntrack_proto_icmpv6.c */ | 335 | /* From nf_conntrack_proto_icmpv6.c */ |
336 | extern unsigned int nf_ct_icmpv6_timeout; | 336 | extern unsigned int nf_ct_icmpv6_timeout; |
337 | 337 | ||
338 | /* From nf_conntrack_frag6.c */ | 338 | /* From nf_conntrack_reasm.c */ |
339 | extern unsigned int nf_ct_frag6_timeout; | 339 | extern unsigned int nf_ct_frag6_timeout; |
340 | extern unsigned int nf_ct_frag6_low_thresh; | 340 | extern unsigned int nf_ct_frag6_low_thresh; |
341 | extern unsigned int nf_ct_frag6_high_thresh; | 341 | extern unsigned int nf_ct_frag6_high_thresh; |
diff --git a/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c b/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c index ef18a7b7014b..34d447208ffd 100644 --- a/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c +++ b/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c | |||
@@ -33,7 +33,7 @@ | |||
33 | #include <net/netfilter/nf_conntrack_core.h> | 33 | #include <net/netfilter/nf_conntrack_core.h> |
34 | #include <net/netfilter/ipv6/nf_conntrack_icmpv6.h> | 34 | #include <net/netfilter/ipv6/nf_conntrack_icmpv6.h> |
35 | 35 | ||
36 | unsigned long nf_ct_icmpv6_timeout = 30*HZ; | 36 | unsigned long nf_ct_icmpv6_timeout __read_mostly = 30*HZ; |
37 | 37 | ||
38 | #if 0 | 38 | #if 0 |
39 | #define DEBUGP printk | 39 | #define DEBUGP printk |
diff --git a/net/ipv6/netfilter/nf_conntrack_reasm.c b/net/ipv6/netfilter/nf_conntrack_reasm.c index 00d5583807f7..bf93c1ea6be9 100644 --- a/net/ipv6/netfilter/nf_conntrack_reasm.c +++ b/net/ipv6/netfilter/nf_conntrack_reasm.c | |||
@@ -54,9 +54,9 @@ | |||
54 | #define NF_CT_FRAG6_LOW_THRESH 196608 /* == 192*1024 */ | 54 | #define NF_CT_FRAG6_LOW_THRESH 196608 /* == 192*1024 */ |
55 | #define NF_CT_FRAG6_TIMEOUT IPV6_FRAG_TIMEOUT | 55 | #define NF_CT_FRAG6_TIMEOUT IPV6_FRAG_TIMEOUT |
56 | 56 | ||
57 | unsigned int nf_ct_frag6_high_thresh = 256*1024; | 57 | unsigned int nf_ct_frag6_high_thresh __read_mostly = 256*1024; |
58 | unsigned int nf_ct_frag6_low_thresh = 192*1024; | 58 | unsigned int nf_ct_frag6_low_thresh __read_mostly = 192*1024; |
59 | unsigned long nf_ct_frag6_timeout = IPV6_FRAG_TIMEOUT; | 59 | unsigned long nf_ct_frag6_timeout __read_mostly = IPV6_FRAG_TIMEOUT; |
60 | 60 | ||
61 | struct nf_ct_frag6_skb_cb | 61 | struct nf_ct_frag6_skb_cb |
62 | { | 62 | { |
@@ -408,7 +408,7 @@ static int nf_ct_frag6_queue(struct nf_ct_frag6_queue *fq, struct sk_buff *skb, | |||
408 | return -1; | 408 | return -1; |
409 | } | 409 | } |
410 | 410 | ||
411 | if (skb->ip_summed == CHECKSUM_HW) | 411 | if (skb->ip_summed == CHECKSUM_COMPLETE) |
412 | skb->csum = csum_sub(skb->csum, | 412 | skb->csum = csum_sub(skb->csum, |
413 | csum_partial(skb->nh.raw, | 413 | csum_partial(skb->nh.raw, |
414 | (u8*)(fhdr + 1) - skb->nh.raw, | 414 | (u8*)(fhdr + 1) - skb->nh.raw, |
@@ -640,7 +640,7 @@ nf_ct_frag6_reasm(struct nf_ct_frag6_queue *fq, struct net_device *dev) | |||
640 | head->len += fp->len; | 640 | head->len += fp->len; |
641 | if (head->ip_summed != fp->ip_summed) | 641 | if (head->ip_summed != fp->ip_summed) |
642 | head->ip_summed = CHECKSUM_NONE; | 642 | head->ip_summed = CHECKSUM_NONE; |
643 | else if (head->ip_summed == CHECKSUM_HW) | 643 | else if (head->ip_summed == CHECKSUM_COMPLETE) |
644 | head->csum = csum_add(head->csum, fp->csum); | 644 | head->csum = csum_add(head->csum, fp->csum); |
645 | head->truesize += fp->truesize; | 645 | head->truesize += fp->truesize; |
646 | atomic_sub(fp->truesize, &nf_ct_frag6_mem); | 646 | atomic_sub(fp->truesize, &nf_ct_frag6_mem); |
@@ -652,7 +652,7 @@ nf_ct_frag6_reasm(struct nf_ct_frag6_queue *fq, struct net_device *dev) | |||
652 | head->nh.ipv6h->payload_len = htons(payload_len); | 652 | head->nh.ipv6h->payload_len = htons(payload_len); |
653 | 653 | ||
654 | /* Yes, and fold redundant checksum back. 8) */ | 654 | /* Yes, and fold redundant checksum back. 8) */ |
655 | if (head->ip_summed == CHECKSUM_HW) | 655 | if (head->ip_summed == CHECKSUM_COMPLETE) |
656 | head->csum = csum_partial(head->nh.raw, head->h.raw-head->nh.raw, head->csum); | 656 | head->csum = csum_partial(head->nh.raw, head->h.raw-head->nh.raw, head->csum); |
657 | 657 | ||
658 | fq->fragments = NULL; | 658 | fq->fragments = NULL; |
diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c index 15b862d8acab..d09329ca3267 100644 --- a/net/ipv6/raw.c +++ b/net/ipv6/raw.c | |||
@@ -50,6 +50,9 @@ | |||
50 | #include <net/udp.h> | 50 | #include <net/udp.h> |
51 | #include <net/inet_common.h> | 51 | #include <net/inet_common.h> |
52 | #include <net/tcp_states.h> | 52 | #include <net/tcp_states.h> |
53 | #ifdef CONFIG_IPV6_MIP6 | ||
54 | #include <net/mip6.h> | ||
55 | #endif | ||
53 | 56 | ||
54 | #include <net/rawv6.h> | 57 | #include <net/rawv6.h> |
55 | #include <net/xfrm.h> | 58 | #include <net/xfrm.h> |
@@ -169,8 +172,32 @@ int ipv6_raw_deliver(struct sk_buff *skb, int nexthdr) | |||
169 | sk = __raw_v6_lookup(sk, nexthdr, daddr, saddr, IP6CB(skb)->iif); | 172 | sk = __raw_v6_lookup(sk, nexthdr, daddr, saddr, IP6CB(skb)->iif); |
170 | 173 | ||
171 | while (sk) { | 174 | while (sk) { |
175 | int filtered; | ||
176 | |||
172 | delivered = 1; | 177 | delivered = 1; |
173 | if (nexthdr != IPPROTO_ICMPV6 || !icmpv6_filter(sk, skb)) { | 178 | switch (nexthdr) { |
179 | case IPPROTO_ICMPV6: | ||
180 | filtered = icmpv6_filter(sk, skb); | ||
181 | break; | ||
182 | #ifdef CONFIG_IPV6_MIP6 | ||
183 | case IPPROTO_MH: | ||
184 | /* XXX: To validate MH only once for each packet, | ||
185 | * this is placed here. It should be after checking | ||
186 | * xfrm policy, however it doesn't. The checking xfrm | ||
187 | * policy is placed in rawv6_rcv() because it is | ||
188 | * required for each socket. | ||
189 | */ | ||
190 | filtered = mip6_mh_filter(sk, skb); | ||
191 | break; | ||
192 | #endif | ||
193 | default: | ||
194 | filtered = 0; | ||
195 | break; | ||
196 | } | ||
197 | |||
198 | if (filtered < 0) | ||
199 | break; | ||
200 | if (filtered == 0) { | ||
174 | struct sk_buff *clone = skb_clone(skb, GFP_ATOMIC); | 201 | struct sk_buff *clone = skb_clone(skb, GFP_ATOMIC); |
175 | 202 | ||
176 | /* Not releasing hash table! */ | 203 | /* Not releasing hash table! */ |
@@ -334,7 +361,7 @@ int rawv6_rcv(struct sock *sk, struct sk_buff *skb) | |||
334 | if (!rp->checksum) | 361 | if (!rp->checksum) |
335 | skb->ip_summed = CHECKSUM_UNNECESSARY; | 362 | skb->ip_summed = CHECKSUM_UNNECESSARY; |
336 | 363 | ||
337 | if (skb->ip_summed == CHECKSUM_HW) { | 364 | if (skb->ip_summed == CHECKSUM_COMPLETE) { |
338 | skb_postpull_rcsum(skb, skb->nh.raw, | 365 | skb_postpull_rcsum(skb, skb->nh.raw, |
339 | skb->h.raw - skb->nh.raw); | 366 | skb->h.raw - skb->nh.raw); |
340 | if (!csum_ipv6_magic(&skb->nh.ipv6h->saddr, | 367 | if (!csum_ipv6_magic(&skb->nh.ipv6h->saddr, |
@@ -582,6 +609,9 @@ static void rawv6_probe_proto_opt(struct flowi *fl, struct msghdr *msg) | |||
582 | struct iovec *iov; | 609 | struct iovec *iov; |
583 | u8 __user *type = NULL; | 610 | u8 __user *type = NULL; |
584 | u8 __user *code = NULL; | 611 | u8 __user *code = NULL; |
612 | #ifdef CONFIG_IPV6_MIP6 | ||
613 | u8 len = 0; | ||
614 | #endif | ||
585 | int probed = 0; | 615 | int probed = 0; |
586 | int i; | 616 | int i; |
587 | 617 | ||
@@ -613,6 +643,20 @@ static void rawv6_probe_proto_opt(struct flowi *fl, struct msghdr *msg) | |||
613 | probed = 1; | 643 | probed = 1; |
614 | } | 644 | } |
615 | break; | 645 | break; |
646 | #ifdef CONFIG_IPV6_MIP6 | ||
647 | case IPPROTO_MH: | ||
648 | if (iov->iov_base && iov->iov_len < 1) | ||
649 | break; | ||
650 | /* check if type field is readable or not. */ | ||
651 | if (iov->iov_len > 2 - len) { | ||
652 | u8 __user *p = iov->iov_base; | ||
653 | get_user(fl->fl_mh_type, &p[2 - len]); | ||
654 | probed = 1; | ||
655 | } else | ||
656 | len += iov->iov_len; | ||
657 | |||
658 | break; | ||
659 | #endif | ||
616 | default: | 660 | default: |
617 | probed = 1; | 661 | probed = 1; |
618 | break; | 662 | break; |
@@ -759,6 +803,7 @@ static int rawv6_sendmsg(struct kiocb *iocb, struct sock *sk, | |||
759 | 803 | ||
760 | if (!fl.oif && ipv6_addr_is_multicast(&fl.fl6_dst)) | 804 | if (!fl.oif && ipv6_addr_is_multicast(&fl.fl6_dst)) |
761 | fl.oif = np->mcast_oif; | 805 | fl.oif = np->mcast_oif; |
806 | security_sk_classify_flow(sk, &fl); | ||
762 | 807 | ||
763 | err = ip6_dst_lookup(sk, &dst, &fl); | 808 | err = ip6_dst_lookup(sk, &dst, &fl); |
764 | if (err) | 809 | if (err) |
diff --git a/net/ipv6/reassembly.c b/net/ipv6/reassembly.c index 4e299c69e1c6..f39bbedd1327 100644 --- a/net/ipv6/reassembly.c +++ b/net/ipv6/reassembly.c | |||
@@ -53,10 +53,10 @@ | |||
53 | #include <net/ndisc.h> | 53 | #include <net/ndisc.h> |
54 | #include <net/addrconf.h> | 54 | #include <net/addrconf.h> |
55 | 55 | ||
56 | int sysctl_ip6frag_high_thresh = 256*1024; | 56 | int sysctl_ip6frag_high_thresh __read_mostly = 256*1024; |
57 | int sysctl_ip6frag_low_thresh = 192*1024; | 57 | int sysctl_ip6frag_low_thresh __read_mostly = 192*1024; |
58 | 58 | ||
59 | int sysctl_ip6frag_time = IPV6_FRAG_TIMEOUT; | 59 | int sysctl_ip6frag_time __read_mostly = IPV6_FRAG_TIMEOUT; |
60 | 60 | ||
61 | struct ip6frag_skb_cb | 61 | struct ip6frag_skb_cb |
62 | { | 62 | { |
@@ -152,7 +152,7 @@ static unsigned int ip6qhashfn(u32 id, struct in6_addr *saddr, | |||
152 | } | 152 | } |
153 | 153 | ||
154 | static struct timer_list ip6_frag_secret_timer; | 154 | static struct timer_list ip6_frag_secret_timer; |
155 | int sysctl_ip6frag_secret_interval = 10 * 60 * HZ; | 155 | int sysctl_ip6frag_secret_interval __read_mostly = 10 * 60 * HZ; |
156 | 156 | ||
157 | static void ip6_frag_secret_rebuild(unsigned long dummy) | 157 | static void ip6_frag_secret_rebuild(unsigned long dummy) |
158 | { | 158 | { |
@@ -433,7 +433,7 @@ static void ip6_frag_queue(struct frag_queue *fq, struct sk_buff *skb, | |||
433 | return; | 433 | return; |
434 | } | 434 | } |
435 | 435 | ||
436 | if (skb->ip_summed == CHECKSUM_HW) | 436 | if (skb->ip_summed == CHECKSUM_COMPLETE) |
437 | skb->csum = csum_sub(skb->csum, | 437 | skb->csum = csum_sub(skb->csum, |
438 | csum_partial(skb->nh.raw, (u8*)(fhdr+1)-skb->nh.raw, 0)); | 438 | csum_partial(skb->nh.raw, (u8*)(fhdr+1)-skb->nh.raw, 0)); |
439 | 439 | ||
@@ -647,7 +647,7 @@ static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff **skb_in, | |||
647 | head->len += fp->len; | 647 | head->len += fp->len; |
648 | if (head->ip_summed != fp->ip_summed) | 648 | if (head->ip_summed != fp->ip_summed) |
649 | head->ip_summed = CHECKSUM_NONE; | 649 | head->ip_summed = CHECKSUM_NONE; |
650 | else if (head->ip_summed == CHECKSUM_HW) | 650 | else if (head->ip_summed == CHECKSUM_COMPLETE) |
651 | head->csum = csum_add(head->csum, fp->csum); | 651 | head->csum = csum_add(head->csum, fp->csum); |
652 | head->truesize += fp->truesize; | 652 | head->truesize += fp->truesize; |
653 | atomic_sub(fp->truesize, &ip6_frag_mem); | 653 | atomic_sub(fp->truesize, &ip6_frag_mem); |
@@ -662,7 +662,7 @@ static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff **skb_in, | |||
662 | *skb_in = head; | 662 | *skb_in = head; |
663 | 663 | ||
664 | /* Yes, and fold redundant checksum back. 8) */ | 664 | /* Yes, and fold redundant checksum back. 8) */ |
665 | if (head->ip_summed == CHECKSUM_HW) | 665 | if (head->ip_summed == CHECKSUM_COMPLETE) |
666 | head->csum = csum_partial(head->nh.raw, head->h.raw-head->nh.raw, head->csum); | 666 | head->csum = csum_partial(head->nh.raw, head->h.raw-head->nh.raw, head->csum); |
667 | 667 | ||
668 | IP6_INC_STATS_BH(IPSTATS_MIB_REASMOKS); | 668 | IP6_INC_STATS_BH(IPSTATS_MIB_REASMOKS); |
diff --git a/net/ipv6/route.c b/net/ipv6/route.c index d9baca062d24..d6b4b4f48d18 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c | |||
@@ -22,6 +22,8 @@ | |||
22 | * routers in REACHABLE, STALE, DELAY or PROBE states). | 22 | * routers in REACHABLE, STALE, DELAY or PROBE states). |
23 | * - always select the same router if it is (probably) | 23 | * - always select the same router if it is (probably) |
24 | * reachable. otherwise, round-robin the list. | 24 | * reachable. otherwise, round-robin the list. |
25 | * Ville Nuorvala | ||
26 | * Fixed routing subtrees. | ||
25 | */ | 27 | */ |
26 | 28 | ||
27 | #include <linux/capability.h> | 29 | #include <linux/capability.h> |
@@ -35,7 +37,6 @@ | |||
35 | #include <linux/netdevice.h> | 37 | #include <linux/netdevice.h> |
36 | #include <linux/in6.h> | 38 | #include <linux/in6.h> |
37 | #include <linux/init.h> | 39 | #include <linux/init.h> |
38 | #include <linux/netlink.h> | ||
39 | #include <linux/if_arp.h> | 40 | #include <linux/if_arp.h> |
40 | 41 | ||
41 | #ifdef CONFIG_PROC_FS | 42 | #ifdef CONFIG_PROC_FS |
@@ -54,6 +55,7 @@ | |||
54 | #include <net/dst.h> | 55 | #include <net/dst.h> |
55 | #include <net/xfrm.h> | 56 | #include <net/xfrm.h> |
56 | #include <net/netevent.h> | 57 | #include <net/netevent.h> |
58 | #include <net/netlink.h> | ||
57 | 59 | ||
58 | #include <asm/uaccess.h> | 60 | #include <asm/uaccess.h> |
59 | 61 | ||
@@ -74,9 +76,6 @@ | |||
74 | 76 | ||
75 | #define CLONE_OFFLINK_ROUTE 0 | 77 | #define CLONE_OFFLINK_ROUTE 0 |
76 | 78 | ||
77 | #define RT6_SELECT_F_IFACE 0x1 | ||
78 | #define RT6_SELECT_F_REACHABLE 0x2 | ||
79 | |||
80 | static int ip6_rt_max_size = 4096; | 79 | static int ip6_rt_max_size = 4096; |
81 | static int ip6_rt_gc_min_interval = HZ / 2; | 80 | static int ip6_rt_gc_min_interval = HZ / 2; |
82 | static int ip6_rt_gc_timeout = 60*HZ; | 81 | static int ip6_rt_gc_timeout = 60*HZ; |
@@ -140,15 +139,49 @@ struct rt6_info ip6_null_entry = { | |||
140 | .rt6i_ref = ATOMIC_INIT(1), | 139 | .rt6i_ref = ATOMIC_INIT(1), |
141 | }; | 140 | }; |
142 | 141 | ||
143 | struct fib6_node ip6_routing_table = { | 142 | #ifdef CONFIG_IPV6_MULTIPLE_TABLES |
144 | .leaf = &ip6_null_entry, | ||
145 | .fn_flags = RTN_ROOT | RTN_TL_ROOT | RTN_RTINFO, | ||
146 | }; | ||
147 | 143 | ||
148 | /* Protects all the ip6 fib */ | 144 | struct rt6_info ip6_prohibit_entry = { |
145 | .u = { | ||
146 | .dst = { | ||
147 | .__refcnt = ATOMIC_INIT(1), | ||
148 | .__use = 1, | ||
149 | .dev = &loopback_dev, | ||
150 | .obsolete = -1, | ||
151 | .error = -EACCES, | ||
152 | .metrics = { [RTAX_HOPLIMIT - 1] = 255, }, | ||
153 | .input = ip6_pkt_discard, | ||
154 | .output = ip6_pkt_discard_out, | ||
155 | .ops = &ip6_dst_ops, | ||
156 | .path = (struct dst_entry*)&ip6_prohibit_entry, | ||
157 | } | ||
158 | }, | ||
159 | .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP), | ||
160 | .rt6i_metric = ~(u32) 0, | ||
161 | .rt6i_ref = ATOMIC_INIT(1), | ||
162 | }; | ||
149 | 163 | ||
150 | DEFINE_RWLOCK(rt6_lock); | 164 | struct rt6_info ip6_blk_hole_entry = { |
165 | .u = { | ||
166 | .dst = { | ||
167 | .__refcnt = ATOMIC_INIT(1), | ||
168 | .__use = 1, | ||
169 | .dev = &loopback_dev, | ||
170 | .obsolete = -1, | ||
171 | .error = -EINVAL, | ||
172 | .metrics = { [RTAX_HOPLIMIT - 1] = 255, }, | ||
173 | .input = ip6_pkt_discard, | ||
174 | .output = ip6_pkt_discard_out, | ||
175 | .ops = &ip6_dst_ops, | ||
176 | .path = (struct dst_entry*)&ip6_blk_hole_entry, | ||
177 | } | ||
178 | }, | ||
179 | .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP), | ||
180 | .rt6i_metric = ~(u32) 0, | ||
181 | .rt6i_ref = ATOMIC_INIT(1), | ||
182 | }; | ||
151 | 183 | ||
184 | #endif | ||
152 | 185 | ||
153 | /* allocate dst with ip6_dst_ops */ | 186 | /* allocate dst with ip6_dst_ops */ |
154 | static __inline__ struct rt6_info *ip6_dst_alloc(void) | 187 | static __inline__ struct rt6_info *ip6_dst_alloc(void) |
@@ -188,8 +221,14 @@ static __inline__ int rt6_check_expired(const struct rt6_info *rt) | |||
188 | time_after(jiffies, rt->rt6i_expires)); | 221 | time_after(jiffies, rt->rt6i_expires)); |
189 | } | 222 | } |
190 | 223 | ||
224 | static inline int rt6_need_strict(struct in6_addr *daddr) | ||
225 | { | ||
226 | return (ipv6_addr_type(daddr) & | ||
227 | (IPV6_ADDR_MULTICAST | IPV6_ADDR_LINKLOCAL)); | ||
228 | } | ||
229 | |||
191 | /* | 230 | /* |
192 | * Route lookup. Any rt6_lock is implied. | 231 | * Route lookup. Any table->tb6_lock is implied. |
193 | */ | 232 | */ |
194 | 233 | ||
195 | static __inline__ struct rt6_info *rt6_device_match(struct rt6_info *rt, | 234 | static __inline__ struct rt6_info *rt6_device_match(struct rt6_info *rt, |
@@ -298,7 +337,7 @@ static int rt6_score_route(struct rt6_info *rt, int oif, | |||
298 | int m, n; | 337 | int m, n; |
299 | 338 | ||
300 | m = rt6_check_dev(rt, oif); | 339 | m = rt6_check_dev(rt, oif); |
301 | if (!m && (strict & RT6_SELECT_F_IFACE)) | 340 | if (!m && (strict & RT6_LOOKUP_F_IFACE)) |
302 | return -1; | 341 | return -1; |
303 | #ifdef CONFIG_IPV6_ROUTER_PREF | 342 | #ifdef CONFIG_IPV6_ROUTER_PREF |
304 | m |= IPV6_DECODE_PREF(IPV6_EXTRACT_PREF(rt->rt6i_flags)) << 2; | 343 | m |= IPV6_DECODE_PREF(IPV6_EXTRACT_PREF(rt->rt6i_flags)) << 2; |
@@ -306,7 +345,7 @@ static int rt6_score_route(struct rt6_info *rt, int oif, | |||
306 | n = rt6_check_neigh(rt); | 345 | n = rt6_check_neigh(rt); |
307 | if (n > 1) | 346 | if (n > 1) |
308 | m |= 16; | 347 | m |= 16; |
309 | else if (!n && strict & RT6_SELECT_F_REACHABLE) | 348 | else if (!n && strict & RT6_LOOKUP_F_REACHABLE) |
310 | return -1; | 349 | return -1; |
311 | return m; | 350 | return m; |
312 | } | 351 | } |
@@ -346,7 +385,7 @@ static struct rt6_info *rt6_select(struct rt6_info **head, int oif, | |||
346 | } | 385 | } |
347 | 386 | ||
348 | if (!match && | 387 | if (!match && |
349 | (strict & RT6_SELECT_F_REACHABLE) && | 388 | (strict & RT6_LOOKUP_F_REACHABLE) && |
350 | last && last != rt0) { | 389 | last && last != rt0) { |
351 | /* no entries matched; do round-robin */ | 390 | /* no entries matched; do round-robin */ |
352 | static DEFINE_SPINLOCK(lock); | 391 | static DEFINE_SPINLOCK(lock); |
@@ -417,7 +456,7 @@ int rt6_route_rcv(struct net_device *dev, u8 *opt, int len, | |||
417 | rt = rt6_get_route_info(prefix, rinfo->prefix_len, gwaddr, dev->ifindex); | 456 | rt = rt6_get_route_info(prefix, rinfo->prefix_len, gwaddr, dev->ifindex); |
418 | 457 | ||
419 | if (rt && !lifetime) { | 458 | if (rt && !lifetime) { |
420 | ip6_del_rt(rt, NULL, NULL, NULL); | 459 | ip6_del_rt(rt); |
421 | rt = NULL; | 460 | rt = NULL; |
422 | } | 461 | } |
423 | 462 | ||
@@ -441,44 +480,95 @@ int rt6_route_rcv(struct net_device *dev, u8 *opt, int len, | |||
441 | } | 480 | } |
442 | #endif | 481 | #endif |
443 | 482 | ||
444 | struct rt6_info *rt6_lookup(struct in6_addr *daddr, struct in6_addr *saddr, | 483 | #define BACKTRACK(saddr) \ |
445 | int oif, int strict) | 484 | do { \ |
485 | if (rt == &ip6_null_entry) { \ | ||
486 | struct fib6_node *pn; \ | ||
487 | while (fn) { \ | ||
488 | if (fn->fn_flags & RTN_TL_ROOT) \ | ||
489 | goto out; \ | ||
490 | pn = fn->parent; \ | ||
491 | if (FIB6_SUBTREE(pn) && FIB6_SUBTREE(pn) != fn) \ | ||
492 | fn = fib6_lookup(pn->subtree, NULL, saddr); \ | ||
493 | else \ | ||
494 | fn = pn; \ | ||
495 | if (fn->fn_flags & RTN_RTINFO) \ | ||
496 | goto restart; \ | ||
497 | } \ | ||
498 | } \ | ||
499 | } while(0) | ||
500 | |||
501 | static struct rt6_info *ip6_pol_route_lookup(struct fib6_table *table, | ||
502 | struct flowi *fl, int flags) | ||
446 | { | 503 | { |
447 | struct fib6_node *fn; | 504 | struct fib6_node *fn; |
448 | struct rt6_info *rt; | 505 | struct rt6_info *rt; |
449 | 506 | ||
450 | read_lock_bh(&rt6_lock); | 507 | read_lock_bh(&table->tb6_lock); |
451 | fn = fib6_lookup(&ip6_routing_table, daddr, saddr); | 508 | fn = fib6_lookup(&table->tb6_root, &fl->fl6_dst, &fl->fl6_src); |
452 | rt = rt6_device_match(fn->leaf, oif, strict); | 509 | restart: |
510 | rt = fn->leaf; | ||
511 | rt = rt6_device_match(rt, fl->oif, flags); | ||
512 | BACKTRACK(&fl->fl6_src); | ||
513 | out: | ||
453 | dst_hold(&rt->u.dst); | 514 | dst_hold(&rt->u.dst); |
454 | rt->u.dst.__use++; | 515 | read_unlock_bh(&table->tb6_lock); |
455 | read_unlock_bh(&rt6_lock); | ||
456 | 516 | ||
457 | rt->u.dst.lastuse = jiffies; | 517 | rt->u.dst.lastuse = jiffies; |
458 | if (rt->u.dst.error == 0) | 518 | rt->u.dst.__use++; |
459 | return rt; | 519 | |
460 | dst_release(&rt->u.dst); | 520 | return rt; |
521 | |||
522 | } | ||
523 | |||
524 | struct rt6_info *rt6_lookup(struct in6_addr *daddr, struct in6_addr *saddr, | ||
525 | int oif, int strict) | ||
526 | { | ||
527 | struct flowi fl = { | ||
528 | .oif = oif, | ||
529 | .nl_u = { | ||
530 | .ip6_u = { | ||
531 | .daddr = *daddr, | ||
532 | /* TODO: saddr */ | ||
533 | }, | ||
534 | }, | ||
535 | }; | ||
536 | struct dst_entry *dst; | ||
537 | int flags = strict ? RT6_LOOKUP_F_IFACE : 0; | ||
538 | |||
539 | dst = fib6_rule_lookup(&fl, flags, ip6_pol_route_lookup); | ||
540 | if (dst->error == 0) | ||
541 | return (struct rt6_info *) dst; | ||
542 | |||
543 | dst_release(dst); | ||
544 | |||
461 | return NULL; | 545 | return NULL; |
462 | } | 546 | } |
463 | 547 | ||
464 | /* ip6_ins_rt is called with FREE rt6_lock. | 548 | /* ip6_ins_rt is called with FREE table->tb6_lock. |
465 | It takes new route entry, the addition fails by any reason the | 549 | It takes new route entry, the addition fails by any reason the |
466 | route is freed. In any case, if caller does not hold it, it may | 550 | route is freed. In any case, if caller does not hold it, it may |
467 | be destroyed. | 551 | be destroyed. |
468 | */ | 552 | */ |
469 | 553 | ||
470 | int ip6_ins_rt(struct rt6_info *rt, struct nlmsghdr *nlh, | 554 | static int __ip6_ins_rt(struct rt6_info *rt, struct nl_info *info) |
471 | void *_rtattr, struct netlink_skb_parms *req) | ||
472 | { | 555 | { |
473 | int err; | 556 | int err; |
557 | struct fib6_table *table; | ||
474 | 558 | ||
475 | write_lock_bh(&rt6_lock); | 559 | table = rt->rt6i_table; |
476 | err = fib6_add(&ip6_routing_table, rt, nlh, _rtattr, req); | 560 | write_lock_bh(&table->tb6_lock); |
477 | write_unlock_bh(&rt6_lock); | 561 | err = fib6_add(&table->tb6_root, rt, info); |
562 | write_unlock_bh(&table->tb6_lock); | ||
478 | 563 | ||
479 | return err; | 564 | return err; |
480 | } | 565 | } |
481 | 566 | ||
567 | int ip6_ins_rt(struct rt6_info *rt) | ||
568 | { | ||
569 | return __ip6_ins_rt(rt, NULL); | ||
570 | } | ||
571 | |||
482 | static struct rt6_info *rt6_alloc_cow(struct rt6_info *ort, struct in6_addr *daddr, | 572 | static struct rt6_info *rt6_alloc_cow(struct rt6_info *ort, struct in6_addr *daddr, |
483 | struct in6_addr *saddr) | 573 | struct in6_addr *saddr) |
484 | { | 574 | { |
@@ -532,51 +622,39 @@ static struct rt6_info *rt6_alloc_clone(struct rt6_info *ort, struct in6_addr *d | |||
532 | return rt; | 622 | return rt; |
533 | } | 623 | } |
534 | 624 | ||
535 | #define BACKTRACK() \ | 625 | static struct rt6_info *ip6_pol_route_input(struct fib6_table *table, |
536 | if (rt == &ip6_null_entry) { \ | 626 | struct flowi *fl, int flags) |
537 | while ((fn = fn->parent) != NULL) { \ | ||
538 | if (fn->fn_flags & RTN_ROOT) { \ | ||
539 | goto out; \ | ||
540 | } \ | ||
541 | if (fn->fn_flags & RTN_RTINFO) \ | ||
542 | goto restart; \ | ||
543 | } \ | ||
544 | } | ||
545 | |||
546 | |||
547 | void ip6_route_input(struct sk_buff *skb) | ||
548 | { | 627 | { |
549 | struct fib6_node *fn; | 628 | struct fib6_node *fn; |
550 | struct rt6_info *rt, *nrt; | 629 | struct rt6_info *rt, *nrt; |
551 | int strict; | 630 | int strict = 0; |
552 | int attempts = 3; | 631 | int attempts = 3; |
553 | int err; | 632 | int err; |
554 | int reachable = RT6_SELECT_F_REACHABLE; | 633 | int reachable = RT6_LOOKUP_F_REACHABLE; |
555 | 634 | ||
556 | strict = ipv6_addr_type(&skb->nh.ipv6h->daddr) & (IPV6_ADDR_MULTICAST|IPV6_ADDR_LINKLOCAL) ? RT6_SELECT_F_IFACE : 0; | 635 | strict |= flags & RT6_LOOKUP_F_IFACE; |
557 | 636 | ||
558 | relookup: | 637 | relookup: |
559 | read_lock_bh(&rt6_lock); | 638 | read_lock_bh(&table->tb6_lock); |
560 | 639 | ||
561 | restart_2: | 640 | restart_2: |
562 | fn = fib6_lookup(&ip6_routing_table, &skb->nh.ipv6h->daddr, | 641 | fn = fib6_lookup(&table->tb6_root, &fl->fl6_dst, &fl->fl6_src); |
563 | &skb->nh.ipv6h->saddr); | ||
564 | 642 | ||
565 | restart: | 643 | restart: |
566 | rt = rt6_select(&fn->leaf, skb->dev->ifindex, strict | reachable); | 644 | rt = rt6_select(&fn->leaf, fl->iif, strict | reachable); |
567 | BACKTRACK(); | 645 | BACKTRACK(&fl->fl6_src); |
568 | if (rt == &ip6_null_entry || | 646 | if (rt == &ip6_null_entry || |
569 | rt->rt6i_flags & RTF_CACHE) | 647 | rt->rt6i_flags & RTF_CACHE) |
570 | goto out; | 648 | goto out; |
571 | 649 | ||
572 | dst_hold(&rt->u.dst); | 650 | dst_hold(&rt->u.dst); |
573 | read_unlock_bh(&rt6_lock); | 651 | read_unlock_bh(&table->tb6_lock); |
574 | 652 | ||
575 | if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP)) | 653 | if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP)) |
576 | nrt = rt6_alloc_cow(rt, &skb->nh.ipv6h->daddr, &skb->nh.ipv6h->saddr); | 654 | nrt = rt6_alloc_cow(rt, &fl->fl6_dst, &fl->fl6_src); |
577 | else { | 655 | else { |
578 | #if CLONE_OFFLINK_ROUTE | 656 | #if CLONE_OFFLINK_ROUTE |
579 | nrt = rt6_alloc_clone(rt, &skb->nh.ipv6h->daddr); | 657 | nrt = rt6_alloc_clone(rt, &fl->fl6_dst); |
580 | #else | 658 | #else |
581 | goto out2; | 659 | goto out2; |
582 | #endif | 660 | #endif |
@@ -587,7 +665,7 @@ restart: | |||
587 | 665 | ||
588 | dst_hold(&rt->u.dst); | 666 | dst_hold(&rt->u.dst); |
589 | if (nrt) { | 667 | if (nrt) { |
590 | err = ip6_ins_rt(nrt, NULL, NULL, &NETLINK_CB(skb)); | 668 | err = ip6_ins_rt(nrt); |
591 | if (!err) | 669 | if (!err) |
592 | goto out2; | 670 | goto out2; |
593 | } | 671 | } |
@@ -596,7 +674,7 @@ restart: | |||
596 | goto out2; | 674 | goto out2; |
597 | 675 | ||
598 | /* | 676 | /* |
599 | * Race condition! In the gap, when rt6_lock was | 677 | * Race condition! In the gap, when table->tb6_lock was |
600 | * released someone could insert this route. Relookup. | 678 | * released someone could insert this route. Relookup. |
601 | */ | 679 | */ |
602 | dst_release(&rt->u.dst); | 680 | dst_release(&rt->u.dst); |
@@ -608,40 +686,63 @@ out: | |||
608 | goto restart_2; | 686 | goto restart_2; |
609 | } | 687 | } |
610 | dst_hold(&rt->u.dst); | 688 | dst_hold(&rt->u.dst); |
611 | read_unlock_bh(&rt6_lock); | 689 | read_unlock_bh(&table->tb6_lock); |
612 | out2: | 690 | out2: |
613 | rt->u.dst.lastuse = jiffies; | 691 | rt->u.dst.lastuse = jiffies; |
614 | rt->u.dst.__use++; | 692 | rt->u.dst.__use++; |
615 | skb->dst = (struct dst_entry *) rt; | 693 | |
616 | return; | 694 | return rt; |
617 | } | 695 | } |
618 | 696 | ||
619 | struct dst_entry * ip6_route_output(struct sock *sk, struct flowi *fl) | 697 | void ip6_route_input(struct sk_buff *skb) |
698 | { | ||
699 | struct ipv6hdr *iph = skb->nh.ipv6h; | ||
700 | struct flowi fl = { | ||
701 | .iif = skb->dev->ifindex, | ||
702 | .nl_u = { | ||
703 | .ip6_u = { | ||
704 | .daddr = iph->daddr, | ||
705 | .saddr = iph->saddr, | ||
706 | #ifdef CONFIG_IPV6_ROUTE_FWMARK | ||
707 | .fwmark = skb->nfmark, | ||
708 | #endif | ||
709 | .flowlabel = (* (u32 *) iph)&IPV6_FLOWINFO_MASK, | ||
710 | }, | ||
711 | }, | ||
712 | .proto = iph->nexthdr, | ||
713 | }; | ||
714 | int flags = rt6_need_strict(&iph->daddr) ? RT6_LOOKUP_F_IFACE : 0; | ||
715 | |||
716 | skb->dst = fib6_rule_lookup(&fl, flags, ip6_pol_route_input); | ||
717 | } | ||
718 | |||
719 | static struct rt6_info *ip6_pol_route_output(struct fib6_table *table, | ||
720 | struct flowi *fl, int flags) | ||
620 | { | 721 | { |
621 | struct fib6_node *fn; | 722 | struct fib6_node *fn; |
622 | struct rt6_info *rt, *nrt; | 723 | struct rt6_info *rt, *nrt; |
623 | int strict; | 724 | int strict = 0; |
624 | int attempts = 3; | 725 | int attempts = 3; |
625 | int err; | 726 | int err; |
626 | int reachable = RT6_SELECT_F_REACHABLE; | 727 | int reachable = RT6_LOOKUP_F_REACHABLE; |
627 | 728 | ||
628 | strict = ipv6_addr_type(&fl->fl6_dst) & (IPV6_ADDR_MULTICAST|IPV6_ADDR_LINKLOCAL) ? RT6_SELECT_F_IFACE : 0; | 729 | strict |= flags & RT6_LOOKUP_F_IFACE; |
629 | 730 | ||
630 | relookup: | 731 | relookup: |
631 | read_lock_bh(&rt6_lock); | 732 | read_lock_bh(&table->tb6_lock); |
632 | 733 | ||
633 | restart_2: | 734 | restart_2: |
634 | fn = fib6_lookup(&ip6_routing_table, &fl->fl6_dst, &fl->fl6_src); | 735 | fn = fib6_lookup(&table->tb6_root, &fl->fl6_dst, &fl->fl6_src); |
635 | 736 | ||
636 | restart: | 737 | restart: |
637 | rt = rt6_select(&fn->leaf, fl->oif, strict | reachable); | 738 | rt = rt6_select(&fn->leaf, fl->oif, strict | reachable); |
638 | BACKTRACK(); | 739 | BACKTRACK(&fl->fl6_src); |
639 | if (rt == &ip6_null_entry || | 740 | if (rt == &ip6_null_entry || |
640 | rt->rt6i_flags & RTF_CACHE) | 741 | rt->rt6i_flags & RTF_CACHE) |
641 | goto out; | 742 | goto out; |
642 | 743 | ||
643 | dst_hold(&rt->u.dst); | 744 | dst_hold(&rt->u.dst); |
644 | read_unlock_bh(&rt6_lock); | 745 | read_unlock_bh(&table->tb6_lock); |
645 | 746 | ||
646 | if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP)) | 747 | if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP)) |
647 | nrt = rt6_alloc_cow(rt, &fl->fl6_dst, &fl->fl6_src); | 748 | nrt = rt6_alloc_cow(rt, &fl->fl6_dst, &fl->fl6_src); |
@@ -658,7 +759,7 @@ restart: | |||
658 | 759 | ||
659 | dst_hold(&rt->u.dst); | 760 | dst_hold(&rt->u.dst); |
660 | if (nrt) { | 761 | if (nrt) { |
661 | err = ip6_ins_rt(nrt, NULL, NULL, NULL); | 762 | err = ip6_ins_rt(nrt); |
662 | if (!err) | 763 | if (!err) |
663 | goto out2; | 764 | goto out2; |
664 | } | 765 | } |
@@ -667,7 +768,7 @@ restart: | |||
667 | goto out2; | 768 | goto out2; |
668 | 769 | ||
669 | /* | 770 | /* |
670 | * Race condition! In the gap, when rt6_lock was | 771 | * Race condition! In the gap, when table->tb6_lock was |
671 | * released someone could insert this route. Relookup. | 772 | * released someone could insert this route. Relookup. |
672 | */ | 773 | */ |
673 | dst_release(&rt->u.dst); | 774 | dst_release(&rt->u.dst); |
@@ -679,11 +780,21 @@ out: | |||
679 | goto restart_2; | 780 | goto restart_2; |
680 | } | 781 | } |
681 | dst_hold(&rt->u.dst); | 782 | dst_hold(&rt->u.dst); |
682 | read_unlock_bh(&rt6_lock); | 783 | read_unlock_bh(&table->tb6_lock); |
683 | out2: | 784 | out2: |
684 | rt->u.dst.lastuse = jiffies; | 785 | rt->u.dst.lastuse = jiffies; |
685 | rt->u.dst.__use++; | 786 | rt->u.dst.__use++; |
686 | return &rt->u.dst; | 787 | return rt; |
788 | } | ||
789 | |||
790 | struct dst_entry * ip6_route_output(struct sock *sk, struct flowi *fl) | ||
791 | { | ||
792 | int flags = 0; | ||
793 | |||
794 | if (rt6_need_strict(&fl->fl6_dst)) | ||
795 | flags |= RT6_LOOKUP_F_IFACE; | ||
796 | |||
797 | return fib6_rule_lookup(fl, flags, ip6_pol_route_output); | ||
687 | } | 798 | } |
688 | 799 | ||
689 | 800 | ||
@@ -709,7 +820,7 @@ static struct dst_entry *ip6_negative_advice(struct dst_entry *dst) | |||
709 | 820 | ||
710 | if (rt) { | 821 | if (rt) { |
711 | if (rt->rt6i_flags & RTF_CACHE) | 822 | if (rt->rt6i_flags & RTF_CACHE) |
712 | ip6_del_rt(rt, NULL, NULL, NULL); | 823 | ip6_del_rt(rt); |
713 | else | 824 | else |
714 | dst_release(dst); | 825 | dst_release(dst); |
715 | } | 826 | } |
@@ -747,8 +858,6 @@ static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu) | |||
747 | } | 858 | } |
748 | } | 859 | } |
749 | 860 | ||
750 | /* Protected by rt6_lock. */ | ||
751 | static struct dst_entry *ndisc_dst_gc_list; | ||
752 | static int ipv6_get_mtu(struct net_device *dev); | 861 | static int ipv6_get_mtu(struct net_device *dev); |
753 | 862 | ||
754 | static inline unsigned int ipv6_advmss(unsigned int mtu) | 863 | static inline unsigned int ipv6_advmss(unsigned int mtu) |
@@ -769,6 +878,9 @@ static inline unsigned int ipv6_advmss(unsigned int mtu) | |||
769 | return mtu; | 878 | return mtu; |
770 | } | 879 | } |
771 | 880 | ||
881 | static struct dst_entry *ndisc_dst_gc_list; | ||
882 | static DEFINE_SPINLOCK(ndisc_lock); | ||
883 | |||
772 | struct dst_entry *ndisc_dst_alloc(struct net_device *dev, | 884 | struct dst_entry *ndisc_dst_alloc(struct net_device *dev, |
773 | struct neighbour *neigh, | 885 | struct neighbour *neigh, |
774 | struct in6_addr *addr, | 886 | struct in6_addr *addr, |
@@ -809,10 +921,10 @@ struct dst_entry *ndisc_dst_alloc(struct net_device *dev, | |||
809 | rt->rt6i_dst.plen = 128; | 921 | rt->rt6i_dst.plen = 128; |
810 | #endif | 922 | #endif |
811 | 923 | ||
812 | write_lock_bh(&rt6_lock); | 924 | spin_lock_bh(&ndisc_lock); |
813 | rt->u.dst.next = ndisc_dst_gc_list; | 925 | rt->u.dst.next = ndisc_dst_gc_list; |
814 | ndisc_dst_gc_list = &rt->u.dst; | 926 | ndisc_dst_gc_list = &rt->u.dst; |
815 | write_unlock_bh(&rt6_lock); | 927 | spin_unlock_bh(&ndisc_lock); |
816 | 928 | ||
817 | fib6_force_start_gc(); | 929 | fib6_force_start_gc(); |
818 | 930 | ||
@@ -826,8 +938,11 @@ int ndisc_dst_gc(int *more) | |||
826 | int freed; | 938 | int freed; |
827 | 939 | ||
828 | next = NULL; | 940 | next = NULL; |
941 | freed = 0; | ||
942 | |||
943 | spin_lock_bh(&ndisc_lock); | ||
829 | pprev = &ndisc_dst_gc_list; | 944 | pprev = &ndisc_dst_gc_list; |
830 | freed = 0; | 945 | |
831 | while ((dst = *pprev) != NULL) { | 946 | while ((dst = *pprev) != NULL) { |
832 | if (!atomic_read(&dst->__refcnt)) { | 947 | if (!atomic_read(&dst->__refcnt)) { |
833 | *pprev = dst->next; | 948 | *pprev = dst->next; |
@@ -839,6 +954,8 @@ int ndisc_dst_gc(int *more) | |||
839 | } | 954 | } |
840 | } | 955 | } |
841 | 956 | ||
957 | spin_unlock_bh(&ndisc_lock); | ||
958 | |||
842 | return freed; | 959 | return freed; |
843 | } | 960 | } |
844 | 961 | ||
@@ -899,28 +1016,24 @@ int ipv6_get_hoplimit(struct net_device *dev) | |||
899 | * | 1016 | * |
900 | */ | 1017 | */ |
901 | 1018 | ||
902 | int ip6_route_add(struct in6_rtmsg *rtmsg, struct nlmsghdr *nlh, | 1019 | int ip6_route_add(struct fib6_config *cfg) |
903 | void *_rtattr, struct netlink_skb_parms *req) | ||
904 | { | 1020 | { |
905 | int err; | 1021 | int err; |
906 | struct rtmsg *r; | ||
907 | struct rtattr **rta; | ||
908 | struct rt6_info *rt = NULL; | 1022 | struct rt6_info *rt = NULL; |
909 | struct net_device *dev = NULL; | 1023 | struct net_device *dev = NULL; |
910 | struct inet6_dev *idev = NULL; | 1024 | struct inet6_dev *idev = NULL; |
1025 | struct fib6_table *table; | ||
911 | int addr_type; | 1026 | int addr_type; |
912 | 1027 | ||
913 | rta = (struct rtattr **) _rtattr; | 1028 | if (cfg->fc_dst_len > 128 || cfg->fc_src_len > 128) |
914 | |||
915 | if (rtmsg->rtmsg_dst_len > 128 || rtmsg->rtmsg_src_len > 128) | ||
916 | return -EINVAL; | 1029 | return -EINVAL; |
917 | #ifndef CONFIG_IPV6_SUBTREES | 1030 | #ifndef CONFIG_IPV6_SUBTREES |
918 | if (rtmsg->rtmsg_src_len) | 1031 | if (cfg->fc_src_len) |
919 | return -EINVAL; | 1032 | return -EINVAL; |
920 | #endif | 1033 | #endif |
921 | if (rtmsg->rtmsg_ifindex) { | 1034 | if (cfg->fc_ifindex) { |
922 | err = -ENODEV; | 1035 | err = -ENODEV; |
923 | dev = dev_get_by_index(rtmsg->rtmsg_ifindex); | 1036 | dev = dev_get_by_index(cfg->fc_ifindex); |
924 | if (!dev) | 1037 | if (!dev) |
925 | goto out; | 1038 | goto out; |
926 | idev = in6_dev_get(dev); | 1039 | idev = in6_dev_get(dev); |
@@ -928,8 +1041,14 @@ int ip6_route_add(struct in6_rtmsg *rtmsg, struct nlmsghdr *nlh, | |||
928 | goto out; | 1041 | goto out; |
929 | } | 1042 | } |
930 | 1043 | ||
931 | if (rtmsg->rtmsg_metric == 0) | 1044 | if (cfg->fc_metric == 0) |
932 | rtmsg->rtmsg_metric = IP6_RT_PRIO_USER; | 1045 | cfg->fc_metric = IP6_RT_PRIO_USER; |
1046 | |||
1047 | table = fib6_new_table(cfg->fc_table); | ||
1048 | if (table == NULL) { | ||
1049 | err = -ENOBUFS; | ||
1050 | goto out; | ||
1051 | } | ||
933 | 1052 | ||
934 | rt = ip6_dst_alloc(); | 1053 | rt = ip6_dst_alloc(); |
935 | 1054 | ||
@@ -939,14 +1058,13 @@ int ip6_route_add(struct in6_rtmsg *rtmsg, struct nlmsghdr *nlh, | |||
939 | } | 1058 | } |
940 | 1059 | ||
941 | rt->u.dst.obsolete = -1; | 1060 | rt->u.dst.obsolete = -1; |
942 | rt->rt6i_expires = jiffies + clock_t_to_jiffies(rtmsg->rtmsg_info); | 1061 | rt->rt6i_expires = jiffies + clock_t_to_jiffies(cfg->fc_expires); |
943 | if (nlh && (r = NLMSG_DATA(nlh))) { | ||
944 | rt->rt6i_protocol = r->rtm_protocol; | ||
945 | } else { | ||
946 | rt->rt6i_protocol = RTPROT_BOOT; | ||
947 | } | ||
948 | 1062 | ||
949 | addr_type = ipv6_addr_type(&rtmsg->rtmsg_dst); | 1063 | if (cfg->fc_protocol == RTPROT_UNSPEC) |
1064 | cfg->fc_protocol = RTPROT_BOOT; | ||
1065 | rt->rt6i_protocol = cfg->fc_protocol; | ||
1066 | |||
1067 | addr_type = ipv6_addr_type(&cfg->fc_dst); | ||
950 | 1068 | ||
951 | if (addr_type & IPV6_ADDR_MULTICAST) | 1069 | if (addr_type & IPV6_ADDR_MULTICAST) |
952 | rt->u.dst.input = ip6_mc_input; | 1070 | rt->u.dst.input = ip6_mc_input; |
@@ -955,24 +1073,22 @@ int ip6_route_add(struct in6_rtmsg *rtmsg, struct nlmsghdr *nlh, | |||
955 | 1073 | ||
956 | rt->u.dst.output = ip6_output; | 1074 | rt->u.dst.output = ip6_output; |
957 | 1075 | ||
958 | ipv6_addr_prefix(&rt->rt6i_dst.addr, | 1076 | ipv6_addr_prefix(&rt->rt6i_dst.addr, &cfg->fc_dst, cfg->fc_dst_len); |
959 | &rtmsg->rtmsg_dst, rtmsg->rtmsg_dst_len); | 1077 | rt->rt6i_dst.plen = cfg->fc_dst_len; |
960 | rt->rt6i_dst.plen = rtmsg->rtmsg_dst_len; | ||
961 | if (rt->rt6i_dst.plen == 128) | 1078 | if (rt->rt6i_dst.plen == 128) |
962 | rt->u.dst.flags = DST_HOST; | 1079 | rt->u.dst.flags = DST_HOST; |
963 | 1080 | ||
964 | #ifdef CONFIG_IPV6_SUBTREES | 1081 | #ifdef CONFIG_IPV6_SUBTREES |
965 | ipv6_addr_prefix(&rt->rt6i_src.addr, | 1082 | ipv6_addr_prefix(&rt->rt6i_src.addr, &cfg->fc_src, cfg->fc_src_len); |
966 | &rtmsg->rtmsg_src, rtmsg->rtmsg_src_len); | 1083 | rt->rt6i_src.plen = cfg->fc_src_len; |
967 | rt->rt6i_src.plen = rtmsg->rtmsg_src_len; | ||
968 | #endif | 1084 | #endif |
969 | 1085 | ||
970 | rt->rt6i_metric = rtmsg->rtmsg_metric; | 1086 | rt->rt6i_metric = cfg->fc_metric; |
971 | 1087 | ||
972 | /* We cannot add true routes via loopback here, | 1088 | /* We cannot add true routes via loopback here, |
973 | they would result in kernel looping; promote them to reject routes | 1089 | they would result in kernel looping; promote them to reject routes |
974 | */ | 1090 | */ |
975 | if ((rtmsg->rtmsg_flags&RTF_REJECT) || | 1091 | if ((cfg->fc_flags & RTF_REJECT) || |
976 | (dev && (dev->flags&IFF_LOOPBACK) && !(addr_type&IPV6_ADDR_LOOPBACK))) { | 1092 | (dev && (dev->flags&IFF_LOOPBACK) && !(addr_type&IPV6_ADDR_LOOPBACK))) { |
977 | /* hold loopback dev/idev if we haven't done so. */ | 1093 | /* hold loopback dev/idev if we haven't done so. */ |
978 | if (dev != &loopback_dev) { | 1094 | if (dev != &loopback_dev) { |
@@ -995,12 +1111,12 @@ int ip6_route_add(struct in6_rtmsg *rtmsg, struct nlmsghdr *nlh, | |||
995 | goto install_route; | 1111 | goto install_route; |
996 | } | 1112 | } |
997 | 1113 | ||
998 | if (rtmsg->rtmsg_flags & RTF_GATEWAY) { | 1114 | if (cfg->fc_flags & RTF_GATEWAY) { |
999 | struct in6_addr *gw_addr; | 1115 | struct in6_addr *gw_addr; |
1000 | int gwa_type; | 1116 | int gwa_type; |
1001 | 1117 | ||
1002 | gw_addr = &rtmsg->rtmsg_gateway; | 1118 | gw_addr = &cfg->fc_gateway; |
1003 | ipv6_addr_copy(&rt->rt6i_gateway, &rtmsg->rtmsg_gateway); | 1119 | ipv6_addr_copy(&rt->rt6i_gateway, gw_addr); |
1004 | gwa_type = ipv6_addr_type(gw_addr); | 1120 | gwa_type = ipv6_addr_type(gw_addr); |
1005 | 1121 | ||
1006 | if (gwa_type != (IPV6_ADDR_LINKLOCAL|IPV6_ADDR_UNICAST)) { | 1122 | if (gwa_type != (IPV6_ADDR_LINKLOCAL|IPV6_ADDR_UNICAST)) { |
@@ -1017,7 +1133,7 @@ int ip6_route_add(struct in6_rtmsg *rtmsg, struct nlmsghdr *nlh, | |||
1017 | if (!(gwa_type&IPV6_ADDR_UNICAST)) | 1133 | if (!(gwa_type&IPV6_ADDR_UNICAST)) |
1018 | goto out; | 1134 | goto out; |
1019 | 1135 | ||
1020 | grt = rt6_lookup(gw_addr, NULL, rtmsg->rtmsg_ifindex, 1); | 1136 | grt = rt6_lookup(gw_addr, NULL, cfg->fc_ifindex, 1); |
1021 | 1137 | ||
1022 | err = -EHOSTUNREACH; | 1138 | err = -EHOSTUNREACH; |
1023 | if (grt == NULL) | 1139 | if (grt == NULL) |
@@ -1049,7 +1165,7 @@ int ip6_route_add(struct in6_rtmsg *rtmsg, struct nlmsghdr *nlh, | |||
1049 | if (dev == NULL) | 1165 | if (dev == NULL) |
1050 | goto out; | 1166 | goto out; |
1051 | 1167 | ||
1052 | if (rtmsg->rtmsg_flags & (RTF_GATEWAY|RTF_NONEXTHOP)) { | 1168 | if (cfg->fc_flags & (RTF_GATEWAY | RTF_NONEXTHOP)) { |
1053 | rt->rt6i_nexthop = __neigh_lookup_errno(&nd_tbl, &rt->rt6i_gateway, dev); | 1169 | rt->rt6i_nexthop = __neigh_lookup_errno(&nd_tbl, &rt->rt6i_gateway, dev); |
1054 | if (IS_ERR(rt->rt6i_nexthop)) { | 1170 | if (IS_ERR(rt->rt6i_nexthop)) { |
1055 | err = PTR_ERR(rt->rt6i_nexthop); | 1171 | err = PTR_ERR(rt->rt6i_nexthop); |
@@ -1058,24 +1174,24 @@ int ip6_route_add(struct in6_rtmsg *rtmsg, struct nlmsghdr *nlh, | |||
1058 | } | 1174 | } |
1059 | } | 1175 | } |
1060 | 1176 | ||
1061 | rt->rt6i_flags = rtmsg->rtmsg_flags; | 1177 | rt->rt6i_flags = cfg->fc_flags; |
1062 | 1178 | ||
1063 | install_route: | 1179 | install_route: |
1064 | if (rta && rta[RTA_METRICS-1]) { | 1180 | if (cfg->fc_mx) { |
1065 | int attrlen = RTA_PAYLOAD(rta[RTA_METRICS-1]); | 1181 | struct nlattr *nla; |
1066 | struct rtattr *attr = RTA_DATA(rta[RTA_METRICS-1]); | 1182 | int remaining; |
1067 | 1183 | ||
1068 | while (RTA_OK(attr, attrlen)) { | 1184 | nla_for_each_attr(nla, cfg->fc_mx, cfg->fc_mx_len, remaining) { |
1069 | unsigned flavor = attr->rta_type; | 1185 | int type = nla->nla_type; |
1070 | if (flavor) { | 1186 | |
1071 | if (flavor > RTAX_MAX) { | 1187 | if (type) { |
1188 | if (type > RTAX_MAX) { | ||
1072 | err = -EINVAL; | 1189 | err = -EINVAL; |
1073 | goto out; | 1190 | goto out; |
1074 | } | 1191 | } |
1075 | rt->u.dst.metrics[flavor-1] = | 1192 | |
1076 | *(u32 *)RTA_DATA(attr); | 1193 | rt->u.dst.metrics[type - 1] = nla_get_u32(nla); |
1077 | } | 1194 | } |
1078 | attr = RTA_NEXT(attr, attrlen); | ||
1079 | } | 1195 | } |
1080 | } | 1196 | } |
1081 | 1197 | ||
@@ -1087,7 +1203,8 @@ install_route: | |||
1087 | rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_mtu(&rt->u.dst)); | 1203 | rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_mtu(&rt->u.dst)); |
1088 | rt->u.dst.dev = dev; | 1204 | rt->u.dst.dev = dev; |
1089 | rt->rt6i_idev = idev; | 1205 | rt->rt6i_idev = idev; |
1090 | return ip6_ins_rt(rt, nlh, _rtattr, req); | 1206 | rt->rt6i_table = table; |
1207 | return __ip6_ins_rt(rt, &cfg->fc_nlinfo); | ||
1091 | 1208 | ||
1092 | out: | 1209 | out: |
1093 | if (dev) | 1210 | if (dev) |
@@ -1099,51 +1216,65 @@ out: | |||
1099 | return err; | 1216 | return err; |
1100 | } | 1217 | } |
1101 | 1218 | ||
1102 | int ip6_del_rt(struct rt6_info *rt, struct nlmsghdr *nlh, void *_rtattr, struct netlink_skb_parms *req) | 1219 | static int __ip6_del_rt(struct rt6_info *rt, struct nl_info *info) |
1103 | { | 1220 | { |
1104 | int err; | 1221 | int err; |
1222 | struct fib6_table *table; | ||
1105 | 1223 | ||
1106 | write_lock_bh(&rt6_lock); | 1224 | if (rt == &ip6_null_entry) |
1225 | return -ENOENT; | ||
1107 | 1226 | ||
1108 | err = fib6_del(rt, nlh, _rtattr, req); | 1227 | table = rt->rt6i_table; |
1228 | write_lock_bh(&table->tb6_lock); | ||
1229 | |||
1230 | err = fib6_del(rt, info); | ||
1109 | dst_release(&rt->u.dst); | 1231 | dst_release(&rt->u.dst); |
1110 | 1232 | ||
1111 | write_unlock_bh(&rt6_lock); | 1233 | write_unlock_bh(&table->tb6_lock); |
1112 | 1234 | ||
1113 | return err; | 1235 | return err; |
1114 | } | 1236 | } |
1115 | 1237 | ||
1116 | static int ip6_route_del(struct in6_rtmsg *rtmsg, struct nlmsghdr *nlh, void *_rtattr, struct netlink_skb_parms *req) | 1238 | int ip6_del_rt(struct rt6_info *rt) |
1117 | { | 1239 | { |
1240 | return __ip6_del_rt(rt, NULL); | ||
1241 | } | ||
1242 | |||
1243 | static int ip6_route_del(struct fib6_config *cfg) | ||
1244 | { | ||
1245 | struct fib6_table *table; | ||
1118 | struct fib6_node *fn; | 1246 | struct fib6_node *fn; |
1119 | struct rt6_info *rt; | 1247 | struct rt6_info *rt; |
1120 | int err = -ESRCH; | 1248 | int err = -ESRCH; |
1121 | 1249 | ||
1122 | read_lock_bh(&rt6_lock); | 1250 | table = fib6_get_table(cfg->fc_table); |
1251 | if (table == NULL) | ||
1252 | return err; | ||
1123 | 1253 | ||
1124 | fn = fib6_locate(&ip6_routing_table, | 1254 | read_lock_bh(&table->tb6_lock); |
1125 | &rtmsg->rtmsg_dst, rtmsg->rtmsg_dst_len, | 1255 | |
1126 | &rtmsg->rtmsg_src, rtmsg->rtmsg_src_len); | 1256 | fn = fib6_locate(&table->tb6_root, |
1257 | &cfg->fc_dst, cfg->fc_dst_len, | ||
1258 | &cfg->fc_src, cfg->fc_src_len); | ||
1127 | 1259 | ||
1128 | if (fn) { | 1260 | if (fn) { |
1129 | for (rt = fn->leaf; rt; rt = rt->u.next) { | 1261 | for (rt = fn->leaf; rt; rt = rt->u.next) { |
1130 | if (rtmsg->rtmsg_ifindex && | 1262 | if (cfg->fc_ifindex && |
1131 | (rt->rt6i_dev == NULL || | 1263 | (rt->rt6i_dev == NULL || |
1132 | rt->rt6i_dev->ifindex != rtmsg->rtmsg_ifindex)) | 1264 | rt->rt6i_dev->ifindex != cfg->fc_ifindex)) |
1133 | continue; | 1265 | continue; |
1134 | if (rtmsg->rtmsg_flags&RTF_GATEWAY && | 1266 | if (cfg->fc_flags & RTF_GATEWAY && |
1135 | !ipv6_addr_equal(&rtmsg->rtmsg_gateway, &rt->rt6i_gateway)) | 1267 | !ipv6_addr_equal(&cfg->fc_gateway, &rt->rt6i_gateway)) |
1136 | continue; | 1268 | continue; |
1137 | if (rtmsg->rtmsg_metric && | 1269 | if (cfg->fc_metric && cfg->fc_metric != rt->rt6i_metric) |
1138 | rtmsg->rtmsg_metric != rt->rt6i_metric) | ||
1139 | continue; | 1270 | continue; |
1140 | dst_hold(&rt->u.dst); | 1271 | dst_hold(&rt->u.dst); |
1141 | read_unlock_bh(&rt6_lock); | 1272 | read_unlock_bh(&table->tb6_lock); |
1142 | 1273 | ||
1143 | return ip6_del_rt(rt, nlh, _rtattr, req); | 1274 | return __ip6_del_rt(rt, &cfg->fc_nlinfo); |
1144 | } | 1275 | } |
1145 | } | 1276 | } |
1146 | read_unlock_bh(&rt6_lock); | 1277 | read_unlock_bh(&table->tb6_lock); |
1147 | 1278 | ||
1148 | return err; | 1279 | return err; |
1149 | } | 1280 | } |
@@ -1151,13 +1282,18 @@ static int ip6_route_del(struct in6_rtmsg *rtmsg, struct nlmsghdr *nlh, void *_r | |||
1151 | /* | 1282 | /* |
1152 | * Handle redirects | 1283 | * Handle redirects |
1153 | */ | 1284 | */ |
1154 | void rt6_redirect(struct in6_addr *dest, struct in6_addr *saddr, | 1285 | struct ip6rd_flowi { |
1155 | struct neighbour *neigh, u8 *lladdr, int on_link) | 1286 | struct flowi fl; |
1287 | struct in6_addr gateway; | ||
1288 | }; | ||
1289 | |||
1290 | static struct rt6_info *__ip6_route_redirect(struct fib6_table *table, | ||
1291 | struct flowi *fl, | ||
1292 | int flags) | ||
1156 | { | 1293 | { |
1157 | struct rt6_info *rt, *nrt = NULL; | 1294 | struct ip6rd_flowi *rdfl = (struct ip6rd_flowi *)fl; |
1158 | int strict; | 1295 | struct rt6_info *rt; |
1159 | struct fib6_node *fn; | 1296 | struct fib6_node *fn; |
1160 | struct netevent_redirect netevent; | ||
1161 | 1297 | ||
1162 | /* | 1298 | /* |
1163 | * Get the "current" route for this destination and | 1299 | * Get the "current" route for this destination and |
@@ -1169,10 +1305,9 @@ void rt6_redirect(struct in6_addr *dest, struct in6_addr *saddr, | |||
1169 | * is a bit fuzzy and one might need to check all possible | 1305 | * is a bit fuzzy and one might need to check all possible |
1170 | * routes. | 1306 | * routes. |
1171 | */ | 1307 | */ |
1172 | strict = ipv6_addr_type(dest) & (IPV6_ADDR_MULTICAST | IPV6_ADDR_LINKLOCAL); | ||
1173 | 1308 | ||
1174 | read_lock_bh(&rt6_lock); | 1309 | read_lock_bh(&table->tb6_lock); |
1175 | fn = fib6_lookup(&ip6_routing_table, dest, NULL); | 1310 | fn = fib6_lookup(&table->tb6_root, &fl->fl6_dst, &fl->fl6_src); |
1176 | restart: | 1311 | restart: |
1177 | for (rt = fn->leaf; rt; rt = rt->u.next) { | 1312 | for (rt = fn->leaf; rt; rt = rt->u.next) { |
1178 | /* | 1313 | /* |
@@ -1187,29 +1322,60 @@ restart: | |||
1187 | continue; | 1322 | continue; |
1188 | if (!(rt->rt6i_flags & RTF_GATEWAY)) | 1323 | if (!(rt->rt6i_flags & RTF_GATEWAY)) |
1189 | continue; | 1324 | continue; |
1190 | if (neigh->dev != rt->rt6i_dev) | 1325 | if (fl->oif != rt->rt6i_dev->ifindex) |
1191 | continue; | 1326 | continue; |
1192 | if (!ipv6_addr_equal(saddr, &rt->rt6i_gateway)) | 1327 | if (!ipv6_addr_equal(&rdfl->gateway, &rt->rt6i_gateway)) |
1193 | continue; | 1328 | continue; |
1194 | break; | 1329 | break; |
1195 | } | 1330 | } |
1196 | if (rt) | ||
1197 | dst_hold(&rt->u.dst); | ||
1198 | else if (strict) { | ||
1199 | while ((fn = fn->parent) != NULL) { | ||
1200 | if (fn->fn_flags & RTN_ROOT) | ||
1201 | break; | ||
1202 | if (fn->fn_flags & RTN_RTINFO) | ||
1203 | goto restart; | ||
1204 | } | ||
1205 | } | ||
1206 | read_unlock_bh(&rt6_lock); | ||
1207 | 1331 | ||
1208 | if (!rt) { | 1332 | if (!rt) |
1333 | rt = &ip6_null_entry; | ||
1334 | BACKTRACK(&fl->fl6_src); | ||
1335 | out: | ||
1336 | dst_hold(&rt->u.dst); | ||
1337 | |||
1338 | read_unlock_bh(&table->tb6_lock); | ||
1339 | |||
1340 | return rt; | ||
1341 | }; | ||
1342 | |||
1343 | static struct rt6_info *ip6_route_redirect(struct in6_addr *dest, | ||
1344 | struct in6_addr *src, | ||
1345 | struct in6_addr *gateway, | ||
1346 | struct net_device *dev) | ||
1347 | { | ||
1348 | struct ip6rd_flowi rdfl = { | ||
1349 | .fl = { | ||
1350 | .oif = dev->ifindex, | ||
1351 | .nl_u = { | ||
1352 | .ip6_u = { | ||
1353 | .daddr = *dest, | ||
1354 | .saddr = *src, | ||
1355 | }, | ||
1356 | }, | ||
1357 | }, | ||
1358 | .gateway = *gateway, | ||
1359 | }; | ||
1360 | int flags = rt6_need_strict(dest) ? RT6_LOOKUP_F_IFACE : 0; | ||
1361 | |||
1362 | return (struct rt6_info *)fib6_rule_lookup((struct flowi *)&rdfl, flags, __ip6_route_redirect); | ||
1363 | } | ||
1364 | |||
1365 | void rt6_redirect(struct in6_addr *dest, struct in6_addr *src, | ||
1366 | struct in6_addr *saddr, | ||
1367 | struct neighbour *neigh, u8 *lladdr, int on_link) | ||
1368 | { | ||
1369 | struct rt6_info *rt, *nrt = NULL; | ||
1370 | struct netevent_redirect netevent; | ||
1371 | |||
1372 | rt = ip6_route_redirect(dest, src, saddr, neigh->dev); | ||
1373 | |||
1374 | if (rt == &ip6_null_entry) { | ||
1209 | if (net_ratelimit()) | 1375 | if (net_ratelimit()) |
1210 | printk(KERN_DEBUG "rt6_redirect: source isn't a valid nexthop " | 1376 | printk(KERN_DEBUG "rt6_redirect: source isn't a valid nexthop " |
1211 | "for redirect target\n"); | 1377 | "for redirect target\n"); |
1212 | return; | 1378 | goto out; |
1213 | } | 1379 | } |
1214 | 1380 | ||
1215 | /* | 1381 | /* |
@@ -1252,7 +1418,7 @@ restart: | |||
1252 | nrt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(neigh->dev); | 1418 | nrt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(neigh->dev); |
1253 | nrt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_mtu(&nrt->u.dst)); | 1419 | nrt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_mtu(&nrt->u.dst)); |
1254 | 1420 | ||
1255 | if (ip6_ins_rt(nrt, NULL, NULL, NULL)) | 1421 | if (ip6_ins_rt(nrt)) |
1256 | goto out; | 1422 | goto out; |
1257 | 1423 | ||
1258 | netevent.old = &rt->u.dst; | 1424 | netevent.old = &rt->u.dst; |
@@ -1260,7 +1426,7 @@ restart: | |||
1260 | call_netevent_notifiers(NETEVENT_REDIRECT, &netevent); | 1426 | call_netevent_notifiers(NETEVENT_REDIRECT, &netevent); |
1261 | 1427 | ||
1262 | if (rt->rt6i_flags&RTF_CACHE) { | 1428 | if (rt->rt6i_flags&RTF_CACHE) { |
1263 | ip6_del_rt(rt, NULL, NULL, NULL); | 1429 | ip6_del_rt(rt); |
1264 | return; | 1430 | return; |
1265 | } | 1431 | } |
1266 | 1432 | ||
@@ -1342,7 +1508,7 @@ void rt6_pmtu_discovery(struct in6_addr *daddr, struct in6_addr *saddr, | |||
1342 | dst_set_expires(&nrt->u.dst, ip6_rt_mtu_expires); | 1508 | dst_set_expires(&nrt->u.dst, ip6_rt_mtu_expires); |
1343 | nrt->rt6i_flags |= RTF_DYNAMIC|RTF_EXPIRES; | 1509 | nrt->rt6i_flags |= RTF_DYNAMIC|RTF_EXPIRES; |
1344 | 1510 | ||
1345 | ip6_ins_rt(nrt, NULL, NULL, NULL); | 1511 | ip6_ins_rt(nrt); |
1346 | } | 1512 | } |
1347 | out: | 1513 | out: |
1348 | dst_release(&rt->u.dst); | 1514 | dst_release(&rt->u.dst); |
@@ -1378,6 +1544,7 @@ static struct rt6_info * ip6_rt_copy(struct rt6_info *ort) | |||
1378 | #ifdef CONFIG_IPV6_SUBTREES | 1544 | #ifdef CONFIG_IPV6_SUBTREES |
1379 | memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key)); | 1545 | memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key)); |
1380 | #endif | 1546 | #endif |
1547 | rt->rt6i_table = ort->rt6i_table; | ||
1381 | } | 1548 | } |
1382 | return rt; | 1549 | return rt; |
1383 | } | 1550 | } |
@@ -1388,9 +1555,14 @@ static struct rt6_info *rt6_get_route_info(struct in6_addr *prefix, int prefixle | |||
1388 | { | 1555 | { |
1389 | struct fib6_node *fn; | 1556 | struct fib6_node *fn; |
1390 | struct rt6_info *rt = NULL; | 1557 | struct rt6_info *rt = NULL; |
1558 | struct fib6_table *table; | ||
1559 | |||
1560 | table = fib6_get_table(RT6_TABLE_INFO); | ||
1561 | if (table == NULL) | ||
1562 | return NULL; | ||
1391 | 1563 | ||
1392 | write_lock_bh(&rt6_lock); | 1564 | write_lock_bh(&table->tb6_lock); |
1393 | fn = fib6_locate(&ip6_routing_table, prefix ,prefixlen, NULL, 0); | 1565 | fn = fib6_locate(&table->tb6_root, prefix ,prefixlen, NULL, 0); |
1394 | if (!fn) | 1566 | if (!fn) |
1395 | goto out; | 1567 | goto out; |
1396 | 1568 | ||
@@ -1405,7 +1577,7 @@ static struct rt6_info *rt6_get_route_info(struct in6_addr *prefix, int prefixle | |||
1405 | break; | 1577 | break; |
1406 | } | 1578 | } |
1407 | out: | 1579 | out: |
1408 | write_unlock_bh(&rt6_lock); | 1580 | write_unlock_bh(&table->tb6_lock); |
1409 | return rt; | 1581 | return rt; |
1410 | } | 1582 | } |
1411 | 1583 | ||
@@ -1413,21 +1585,23 @@ static struct rt6_info *rt6_add_route_info(struct in6_addr *prefix, int prefixle | |||
1413 | struct in6_addr *gwaddr, int ifindex, | 1585 | struct in6_addr *gwaddr, int ifindex, |
1414 | unsigned pref) | 1586 | unsigned pref) |
1415 | { | 1587 | { |
1416 | struct in6_rtmsg rtmsg; | 1588 | struct fib6_config cfg = { |
1589 | .fc_table = RT6_TABLE_INFO, | ||
1590 | .fc_metric = 1024, | ||
1591 | .fc_ifindex = ifindex, | ||
1592 | .fc_dst_len = prefixlen, | ||
1593 | .fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_ROUTEINFO | | ||
1594 | RTF_UP | RTF_PREF(pref), | ||
1595 | }; | ||
1596 | |||
1597 | ipv6_addr_copy(&cfg.fc_dst, prefix); | ||
1598 | ipv6_addr_copy(&cfg.fc_gateway, gwaddr); | ||
1417 | 1599 | ||
1418 | memset(&rtmsg, 0, sizeof(rtmsg)); | ||
1419 | rtmsg.rtmsg_type = RTMSG_NEWROUTE; | ||
1420 | ipv6_addr_copy(&rtmsg.rtmsg_dst, prefix); | ||
1421 | rtmsg.rtmsg_dst_len = prefixlen; | ||
1422 | ipv6_addr_copy(&rtmsg.rtmsg_gateway, gwaddr); | ||
1423 | rtmsg.rtmsg_metric = 1024; | ||
1424 | rtmsg.rtmsg_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_ROUTEINFO | RTF_UP | RTF_PREF(pref); | ||
1425 | /* We should treat it as a default route if prefix length is 0. */ | 1600 | /* We should treat it as a default route if prefix length is 0. */ |
1426 | if (!prefixlen) | 1601 | if (!prefixlen) |
1427 | rtmsg.rtmsg_flags |= RTF_DEFAULT; | 1602 | cfg.fc_flags |= RTF_DEFAULT; |
1428 | rtmsg.rtmsg_ifindex = ifindex; | ||
1429 | 1603 | ||
1430 | ip6_route_add(&rtmsg, NULL, NULL, NULL); | 1604 | ip6_route_add(&cfg); |
1431 | 1605 | ||
1432 | return rt6_get_route_info(prefix, prefixlen, gwaddr, ifindex); | 1606 | return rt6_get_route_info(prefix, prefixlen, gwaddr, ifindex); |
1433 | } | 1607 | } |
@@ -1436,12 +1610,14 @@ static struct rt6_info *rt6_add_route_info(struct in6_addr *prefix, int prefixle | |||
1436 | struct rt6_info *rt6_get_dflt_router(struct in6_addr *addr, struct net_device *dev) | 1610 | struct rt6_info *rt6_get_dflt_router(struct in6_addr *addr, struct net_device *dev) |
1437 | { | 1611 | { |
1438 | struct rt6_info *rt; | 1612 | struct rt6_info *rt; |
1439 | struct fib6_node *fn; | 1613 | struct fib6_table *table; |
1440 | 1614 | ||
1441 | fn = &ip6_routing_table; | 1615 | table = fib6_get_table(RT6_TABLE_DFLT); |
1616 | if (table == NULL) | ||
1617 | return NULL; | ||
1442 | 1618 | ||
1443 | write_lock_bh(&rt6_lock); | 1619 | write_lock_bh(&table->tb6_lock); |
1444 | for (rt = fn->leaf; rt; rt=rt->u.next) { | 1620 | for (rt = table->tb6_root.leaf; rt; rt=rt->u.next) { |
1445 | if (dev == rt->rt6i_dev && | 1621 | if (dev == rt->rt6i_dev && |
1446 | ((rt->rt6i_flags & (RTF_ADDRCONF | RTF_DEFAULT)) == (RTF_ADDRCONF | RTF_DEFAULT)) && | 1622 | ((rt->rt6i_flags & (RTF_ADDRCONF | RTF_DEFAULT)) == (RTF_ADDRCONF | RTF_DEFAULT)) && |
1447 | ipv6_addr_equal(&rt->rt6i_gateway, addr)) | 1623 | ipv6_addr_equal(&rt->rt6i_gateway, addr)) |
@@ -1449,7 +1625,7 @@ struct rt6_info *rt6_get_dflt_router(struct in6_addr *addr, struct net_device *d | |||
1449 | } | 1625 | } |
1450 | if (rt) | 1626 | if (rt) |
1451 | dst_hold(&rt->u.dst); | 1627 | dst_hold(&rt->u.dst); |
1452 | write_unlock_bh(&rt6_lock); | 1628 | write_unlock_bh(&table->tb6_lock); |
1453 | return rt; | 1629 | return rt; |
1454 | } | 1630 | } |
1455 | 1631 | ||
@@ -1457,43 +1633,65 @@ struct rt6_info *rt6_add_dflt_router(struct in6_addr *gwaddr, | |||
1457 | struct net_device *dev, | 1633 | struct net_device *dev, |
1458 | unsigned int pref) | 1634 | unsigned int pref) |
1459 | { | 1635 | { |
1460 | struct in6_rtmsg rtmsg; | 1636 | struct fib6_config cfg = { |
1637 | .fc_table = RT6_TABLE_DFLT, | ||
1638 | .fc_metric = 1024, | ||
1639 | .fc_ifindex = dev->ifindex, | ||
1640 | .fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT | | ||
1641 | RTF_UP | RTF_EXPIRES | RTF_PREF(pref), | ||
1642 | }; | ||
1461 | 1643 | ||
1462 | memset(&rtmsg, 0, sizeof(struct in6_rtmsg)); | 1644 | ipv6_addr_copy(&cfg.fc_gateway, gwaddr); |
1463 | rtmsg.rtmsg_type = RTMSG_NEWROUTE; | ||
1464 | ipv6_addr_copy(&rtmsg.rtmsg_gateway, gwaddr); | ||
1465 | rtmsg.rtmsg_metric = 1024; | ||
1466 | rtmsg.rtmsg_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT | RTF_UP | RTF_EXPIRES | | ||
1467 | RTF_PREF(pref); | ||
1468 | 1645 | ||
1469 | rtmsg.rtmsg_ifindex = dev->ifindex; | 1646 | ip6_route_add(&cfg); |
1470 | 1647 | ||
1471 | ip6_route_add(&rtmsg, NULL, NULL, NULL); | ||
1472 | return rt6_get_dflt_router(gwaddr, dev); | 1648 | return rt6_get_dflt_router(gwaddr, dev); |
1473 | } | 1649 | } |
1474 | 1650 | ||
1475 | void rt6_purge_dflt_routers(void) | 1651 | void rt6_purge_dflt_routers(void) |
1476 | { | 1652 | { |
1477 | struct rt6_info *rt; | 1653 | struct rt6_info *rt; |
1654 | struct fib6_table *table; | ||
1655 | |||
1656 | /* NOTE: Keep consistent with rt6_get_dflt_router */ | ||
1657 | table = fib6_get_table(RT6_TABLE_DFLT); | ||
1658 | if (table == NULL) | ||
1659 | return; | ||
1478 | 1660 | ||
1479 | restart: | 1661 | restart: |
1480 | read_lock_bh(&rt6_lock); | 1662 | read_lock_bh(&table->tb6_lock); |
1481 | for (rt = ip6_routing_table.leaf; rt; rt = rt->u.next) { | 1663 | for (rt = table->tb6_root.leaf; rt; rt = rt->u.next) { |
1482 | if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF)) { | 1664 | if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF)) { |
1483 | dst_hold(&rt->u.dst); | 1665 | dst_hold(&rt->u.dst); |
1484 | 1666 | read_unlock_bh(&table->tb6_lock); | |
1485 | read_unlock_bh(&rt6_lock); | 1667 | ip6_del_rt(rt); |
1486 | |||
1487 | ip6_del_rt(rt, NULL, NULL, NULL); | ||
1488 | |||
1489 | goto restart; | 1668 | goto restart; |
1490 | } | 1669 | } |
1491 | } | 1670 | } |
1492 | read_unlock_bh(&rt6_lock); | 1671 | read_unlock_bh(&table->tb6_lock); |
1672 | } | ||
1673 | |||
1674 | static void rtmsg_to_fib6_config(struct in6_rtmsg *rtmsg, | ||
1675 | struct fib6_config *cfg) | ||
1676 | { | ||
1677 | memset(cfg, 0, sizeof(*cfg)); | ||
1678 | |||
1679 | cfg->fc_table = RT6_TABLE_MAIN; | ||
1680 | cfg->fc_ifindex = rtmsg->rtmsg_ifindex; | ||
1681 | cfg->fc_metric = rtmsg->rtmsg_metric; | ||
1682 | cfg->fc_expires = rtmsg->rtmsg_info; | ||
1683 | cfg->fc_dst_len = rtmsg->rtmsg_dst_len; | ||
1684 | cfg->fc_src_len = rtmsg->rtmsg_src_len; | ||
1685 | cfg->fc_flags = rtmsg->rtmsg_flags; | ||
1686 | |||
1687 | ipv6_addr_copy(&cfg->fc_dst, &rtmsg->rtmsg_dst); | ||
1688 | ipv6_addr_copy(&cfg->fc_src, &rtmsg->rtmsg_src); | ||
1689 | ipv6_addr_copy(&cfg->fc_gateway, &rtmsg->rtmsg_gateway); | ||
1493 | } | 1690 | } |
1494 | 1691 | ||
1495 | int ipv6_route_ioctl(unsigned int cmd, void __user *arg) | 1692 | int ipv6_route_ioctl(unsigned int cmd, void __user *arg) |
1496 | { | 1693 | { |
1694 | struct fib6_config cfg; | ||
1497 | struct in6_rtmsg rtmsg; | 1695 | struct in6_rtmsg rtmsg; |
1498 | int err; | 1696 | int err; |
1499 | 1697 | ||
@@ -1506,14 +1704,16 @@ int ipv6_route_ioctl(unsigned int cmd, void __user *arg) | |||
1506 | sizeof(struct in6_rtmsg)); | 1704 | sizeof(struct in6_rtmsg)); |
1507 | if (err) | 1705 | if (err) |
1508 | return -EFAULT; | 1706 | return -EFAULT; |
1509 | 1707 | ||
1708 | rtmsg_to_fib6_config(&rtmsg, &cfg); | ||
1709 | |||
1510 | rtnl_lock(); | 1710 | rtnl_lock(); |
1511 | switch (cmd) { | 1711 | switch (cmd) { |
1512 | case SIOCADDRT: | 1712 | case SIOCADDRT: |
1513 | err = ip6_route_add(&rtmsg, NULL, NULL, NULL); | 1713 | err = ip6_route_add(&cfg); |
1514 | break; | 1714 | break; |
1515 | case SIOCDELRT: | 1715 | case SIOCDELRT: |
1516 | err = ip6_route_del(&rtmsg, NULL, NULL, NULL); | 1716 | err = ip6_route_del(&cfg); |
1517 | break; | 1717 | break; |
1518 | default: | 1718 | default: |
1519 | err = -EINVAL; | 1719 | err = -EINVAL; |
@@ -1587,6 +1787,7 @@ struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev, | |||
1587 | 1787 | ||
1588 | ipv6_addr_copy(&rt->rt6i_dst.addr, addr); | 1788 | ipv6_addr_copy(&rt->rt6i_dst.addr, addr); |
1589 | rt->rt6i_dst.plen = 128; | 1789 | rt->rt6i_dst.plen = 128; |
1790 | rt->rt6i_table = fib6_get_table(RT6_TABLE_LOCAL); | ||
1590 | 1791 | ||
1591 | atomic_set(&rt->u.dst.__refcnt, 1); | 1792 | atomic_set(&rt->u.dst.__refcnt, 1); |
1592 | 1793 | ||
@@ -1605,9 +1806,7 @@ static int fib6_ifdown(struct rt6_info *rt, void *arg) | |||
1605 | 1806 | ||
1606 | void rt6_ifdown(struct net_device *dev) | 1807 | void rt6_ifdown(struct net_device *dev) |
1607 | { | 1808 | { |
1608 | write_lock_bh(&rt6_lock); | 1809 | fib6_clean_all(fib6_ifdown, 0, dev); |
1609 | fib6_clean_tree(&ip6_routing_table, fib6_ifdown, 0, dev); | ||
1610 | write_unlock_bh(&rt6_lock); | ||
1611 | } | 1810 | } |
1612 | 1811 | ||
1613 | struct rt6_mtu_change_arg | 1812 | struct rt6_mtu_change_arg |
@@ -1657,80 +1856,114 @@ static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg) | |||
1657 | 1856 | ||
1658 | void rt6_mtu_change(struct net_device *dev, unsigned mtu) | 1857 | void rt6_mtu_change(struct net_device *dev, unsigned mtu) |
1659 | { | 1858 | { |
1660 | struct rt6_mtu_change_arg arg; | 1859 | struct rt6_mtu_change_arg arg = { |
1860 | .dev = dev, | ||
1861 | .mtu = mtu, | ||
1862 | }; | ||
1661 | 1863 | ||
1662 | arg.dev = dev; | 1864 | fib6_clean_all(rt6_mtu_change_route, 0, &arg); |
1663 | arg.mtu = mtu; | ||
1664 | read_lock_bh(&rt6_lock); | ||
1665 | fib6_clean_tree(&ip6_routing_table, rt6_mtu_change_route, 0, &arg); | ||
1666 | read_unlock_bh(&rt6_lock); | ||
1667 | } | 1865 | } |
1668 | 1866 | ||
1669 | static int inet6_rtm_to_rtmsg(struct rtmsg *r, struct rtattr **rta, | 1867 | static struct nla_policy rtm_ipv6_policy[RTA_MAX+1] __read_mostly = { |
1670 | struct in6_rtmsg *rtmsg) | 1868 | [RTA_GATEWAY] = { .len = sizeof(struct in6_addr) }, |
1869 | [RTA_OIF] = { .type = NLA_U32 }, | ||
1870 | [RTA_IIF] = { .type = NLA_U32 }, | ||
1871 | [RTA_PRIORITY] = { .type = NLA_U32 }, | ||
1872 | [RTA_METRICS] = { .type = NLA_NESTED }, | ||
1873 | }; | ||
1874 | |||
1875 | static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh, | ||
1876 | struct fib6_config *cfg) | ||
1671 | { | 1877 | { |
1672 | memset(rtmsg, 0, sizeof(*rtmsg)); | 1878 | struct rtmsg *rtm; |
1879 | struct nlattr *tb[RTA_MAX+1]; | ||
1880 | int err; | ||
1673 | 1881 | ||
1674 | rtmsg->rtmsg_dst_len = r->rtm_dst_len; | 1882 | err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy); |
1675 | rtmsg->rtmsg_src_len = r->rtm_src_len; | 1883 | if (err < 0) |
1676 | rtmsg->rtmsg_flags = RTF_UP; | 1884 | goto errout; |
1677 | if (r->rtm_type == RTN_UNREACHABLE) | ||
1678 | rtmsg->rtmsg_flags |= RTF_REJECT; | ||
1679 | 1885 | ||
1680 | if (rta[RTA_GATEWAY-1]) { | 1886 | err = -EINVAL; |
1681 | if (rta[RTA_GATEWAY-1]->rta_len != RTA_LENGTH(16)) | 1887 | rtm = nlmsg_data(nlh); |
1682 | return -EINVAL; | 1888 | memset(cfg, 0, sizeof(*cfg)); |
1683 | memcpy(&rtmsg->rtmsg_gateway, RTA_DATA(rta[RTA_GATEWAY-1]), 16); | 1889 | |
1684 | rtmsg->rtmsg_flags |= RTF_GATEWAY; | 1890 | cfg->fc_table = rtm->rtm_table; |
1685 | } | 1891 | cfg->fc_dst_len = rtm->rtm_dst_len; |
1686 | if (rta[RTA_DST-1]) { | 1892 | cfg->fc_src_len = rtm->rtm_src_len; |
1687 | if (RTA_PAYLOAD(rta[RTA_DST-1]) < ((r->rtm_dst_len+7)>>3)) | 1893 | cfg->fc_flags = RTF_UP; |
1688 | return -EINVAL; | 1894 | cfg->fc_protocol = rtm->rtm_protocol; |
1689 | memcpy(&rtmsg->rtmsg_dst, RTA_DATA(rta[RTA_DST-1]), ((r->rtm_dst_len+7)>>3)); | 1895 | |
1896 | if (rtm->rtm_type == RTN_UNREACHABLE) | ||
1897 | cfg->fc_flags |= RTF_REJECT; | ||
1898 | |||
1899 | cfg->fc_nlinfo.pid = NETLINK_CB(skb).pid; | ||
1900 | cfg->fc_nlinfo.nlh = nlh; | ||
1901 | |||
1902 | if (tb[RTA_GATEWAY]) { | ||
1903 | nla_memcpy(&cfg->fc_gateway, tb[RTA_GATEWAY], 16); | ||
1904 | cfg->fc_flags |= RTF_GATEWAY; | ||
1690 | } | 1905 | } |
1691 | if (rta[RTA_SRC-1]) { | 1906 | |
1692 | if (RTA_PAYLOAD(rta[RTA_SRC-1]) < ((r->rtm_src_len+7)>>3)) | 1907 | if (tb[RTA_DST]) { |
1693 | return -EINVAL; | 1908 | int plen = (rtm->rtm_dst_len + 7) >> 3; |
1694 | memcpy(&rtmsg->rtmsg_src, RTA_DATA(rta[RTA_SRC-1]), ((r->rtm_src_len+7)>>3)); | 1909 | |
1910 | if (nla_len(tb[RTA_DST]) < plen) | ||
1911 | goto errout; | ||
1912 | |||
1913 | nla_memcpy(&cfg->fc_dst, tb[RTA_DST], plen); | ||
1695 | } | 1914 | } |
1696 | if (rta[RTA_OIF-1]) { | 1915 | |
1697 | if (rta[RTA_OIF-1]->rta_len != RTA_LENGTH(sizeof(int))) | 1916 | if (tb[RTA_SRC]) { |
1698 | return -EINVAL; | 1917 | int plen = (rtm->rtm_src_len + 7) >> 3; |
1699 | memcpy(&rtmsg->rtmsg_ifindex, RTA_DATA(rta[RTA_OIF-1]), sizeof(int)); | 1918 | |
1919 | if (nla_len(tb[RTA_SRC]) < plen) | ||
1920 | goto errout; | ||
1921 | |||
1922 | nla_memcpy(&cfg->fc_src, tb[RTA_SRC], plen); | ||
1700 | } | 1923 | } |
1701 | if (rta[RTA_PRIORITY-1]) { | 1924 | |
1702 | if (rta[RTA_PRIORITY-1]->rta_len != RTA_LENGTH(4)) | 1925 | if (tb[RTA_OIF]) |
1703 | return -EINVAL; | 1926 | cfg->fc_ifindex = nla_get_u32(tb[RTA_OIF]); |
1704 | memcpy(&rtmsg->rtmsg_metric, RTA_DATA(rta[RTA_PRIORITY-1]), 4); | 1927 | |
1928 | if (tb[RTA_PRIORITY]) | ||
1929 | cfg->fc_metric = nla_get_u32(tb[RTA_PRIORITY]); | ||
1930 | |||
1931 | if (tb[RTA_METRICS]) { | ||
1932 | cfg->fc_mx = nla_data(tb[RTA_METRICS]); | ||
1933 | cfg->fc_mx_len = nla_len(tb[RTA_METRICS]); | ||
1705 | } | 1934 | } |
1706 | return 0; | 1935 | |
1936 | if (tb[RTA_TABLE]) | ||
1937 | cfg->fc_table = nla_get_u32(tb[RTA_TABLE]); | ||
1938 | |||
1939 | err = 0; | ||
1940 | errout: | ||
1941 | return err; | ||
1707 | } | 1942 | } |
1708 | 1943 | ||
1709 | int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg) | 1944 | int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg) |
1710 | { | 1945 | { |
1711 | struct rtmsg *r = NLMSG_DATA(nlh); | 1946 | struct fib6_config cfg; |
1712 | struct in6_rtmsg rtmsg; | 1947 | int err; |
1713 | 1948 | ||
1714 | if (inet6_rtm_to_rtmsg(r, arg, &rtmsg)) | 1949 | err = rtm_to_fib6_config(skb, nlh, &cfg); |
1715 | return -EINVAL; | 1950 | if (err < 0) |
1716 | return ip6_route_del(&rtmsg, nlh, arg, &NETLINK_CB(skb)); | 1951 | return err; |
1952 | |||
1953 | return ip6_route_del(&cfg); | ||
1717 | } | 1954 | } |
1718 | 1955 | ||
1719 | int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg) | 1956 | int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg) |
1720 | { | 1957 | { |
1721 | struct rtmsg *r = NLMSG_DATA(nlh); | 1958 | struct fib6_config cfg; |
1722 | struct in6_rtmsg rtmsg; | 1959 | int err; |
1723 | 1960 | ||
1724 | if (inet6_rtm_to_rtmsg(r, arg, &rtmsg)) | 1961 | err = rtm_to_fib6_config(skb, nlh, &cfg); |
1725 | return -EINVAL; | 1962 | if (err < 0) |
1726 | return ip6_route_add(&rtmsg, nlh, arg, &NETLINK_CB(skb)); | 1963 | return err; |
1727 | } | ||
1728 | 1964 | ||
1729 | struct rt6_rtnl_dump_arg | 1965 | return ip6_route_add(&cfg); |
1730 | { | 1966 | } |
1731 | struct sk_buff *skb; | ||
1732 | struct netlink_callback *cb; | ||
1733 | }; | ||
1734 | 1967 | ||
1735 | static int rt6_fill_node(struct sk_buff *skb, struct rt6_info *rt, | 1968 | static int rt6_fill_node(struct sk_buff *skb, struct rt6_info *rt, |
1736 | struct in6_addr *dst, struct in6_addr *src, | 1969 | struct in6_addr *dst, struct in6_addr *src, |
@@ -1738,9 +1971,9 @@ static int rt6_fill_node(struct sk_buff *skb, struct rt6_info *rt, | |||
1738 | int prefix, unsigned int flags) | 1971 | int prefix, unsigned int flags) |
1739 | { | 1972 | { |
1740 | struct rtmsg *rtm; | 1973 | struct rtmsg *rtm; |
1741 | struct nlmsghdr *nlh; | 1974 | struct nlmsghdr *nlh; |
1742 | unsigned char *b = skb->tail; | ||
1743 | struct rta_cacheinfo ci; | 1975 | struct rta_cacheinfo ci; |
1976 | u32 table; | ||
1744 | 1977 | ||
1745 | if (prefix) { /* user wants prefix routes only */ | 1978 | if (prefix) { /* user wants prefix routes only */ |
1746 | if (!(rt->rt6i_flags & RTF_PREFIX_RT)) { | 1979 | if (!(rt->rt6i_flags & RTF_PREFIX_RT)) { |
@@ -1749,13 +1982,21 @@ static int rt6_fill_node(struct sk_buff *skb, struct rt6_info *rt, | |||
1749 | } | 1982 | } |
1750 | } | 1983 | } |
1751 | 1984 | ||
1752 | nlh = NLMSG_NEW(skb, pid, seq, type, sizeof(*rtm), flags); | 1985 | nlh = nlmsg_put(skb, pid, seq, type, sizeof(*rtm), flags); |
1753 | rtm = NLMSG_DATA(nlh); | 1986 | if (nlh == NULL) |
1987 | return -ENOBUFS; | ||
1988 | |||
1989 | rtm = nlmsg_data(nlh); | ||
1754 | rtm->rtm_family = AF_INET6; | 1990 | rtm->rtm_family = AF_INET6; |
1755 | rtm->rtm_dst_len = rt->rt6i_dst.plen; | 1991 | rtm->rtm_dst_len = rt->rt6i_dst.plen; |
1756 | rtm->rtm_src_len = rt->rt6i_src.plen; | 1992 | rtm->rtm_src_len = rt->rt6i_src.plen; |
1757 | rtm->rtm_tos = 0; | 1993 | rtm->rtm_tos = 0; |
1758 | rtm->rtm_table = RT_TABLE_MAIN; | 1994 | if (rt->rt6i_table) |
1995 | table = rt->rt6i_table->tb6_id; | ||
1996 | else | ||
1997 | table = RT6_TABLE_UNSPEC; | ||
1998 | rtm->rtm_table = table; | ||
1999 | NLA_PUT_U32(skb, RTA_TABLE, table); | ||
1759 | if (rt->rt6i_flags&RTF_REJECT) | 2000 | if (rt->rt6i_flags&RTF_REJECT) |
1760 | rtm->rtm_type = RTN_UNREACHABLE; | 2001 | rtm->rtm_type = RTN_UNREACHABLE; |
1761 | else if (rt->rt6i_dev && (rt->rt6i_dev->flags&IFF_LOOPBACK)) | 2002 | else if (rt->rt6i_dev && (rt->rt6i_dev->flags&IFF_LOOPBACK)) |
@@ -1776,31 +2017,35 @@ static int rt6_fill_node(struct sk_buff *skb, struct rt6_info *rt, | |||
1776 | rtm->rtm_flags |= RTM_F_CLONED; | 2017 | rtm->rtm_flags |= RTM_F_CLONED; |
1777 | 2018 | ||
1778 | if (dst) { | 2019 | if (dst) { |
1779 | RTA_PUT(skb, RTA_DST, 16, dst); | 2020 | NLA_PUT(skb, RTA_DST, 16, dst); |
1780 | rtm->rtm_dst_len = 128; | 2021 | rtm->rtm_dst_len = 128; |
1781 | } else if (rtm->rtm_dst_len) | 2022 | } else if (rtm->rtm_dst_len) |
1782 | RTA_PUT(skb, RTA_DST, 16, &rt->rt6i_dst.addr); | 2023 | NLA_PUT(skb, RTA_DST, 16, &rt->rt6i_dst.addr); |
1783 | #ifdef CONFIG_IPV6_SUBTREES | 2024 | #ifdef CONFIG_IPV6_SUBTREES |
1784 | if (src) { | 2025 | if (src) { |
1785 | RTA_PUT(skb, RTA_SRC, 16, src); | 2026 | NLA_PUT(skb, RTA_SRC, 16, src); |
1786 | rtm->rtm_src_len = 128; | 2027 | rtm->rtm_src_len = 128; |
1787 | } else if (rtm->rtm_src_len) | 2028 | } else if (rtm->rtm_src_len) |
1788 | RTA_PUT(skb, RTA_SRC, 16, &rt->rt6i_src.addr); | 2029 | NLA_PUT(skb, RTA_SRC, 16, &rt->rt6i_src.addr); |
1789 | #endif | 2030 | #endif |
1790 | if (iif) | 2031 | if (iif) |
1791 | RTA_PUT(skb, RTA_IIF, 4, &iif); | 2032 | NLA_PUT_U32(skb, RTA_IIF, iif); |
1792 | else if (dst) { | 2033 | else if (dst) { |
1793 | struct in6_addr saddr_buf; | 2034 | struct in6_addr saddr_buf; |
1794 | if (ipv6_get_saddr(&rt->u.dst, dst, &saddr_buf) == 0) | 2035 | if (ipv6_get_saddr(&rt->u.dst, dst, &saddr_buf) == 0) |
1795 | RTA_PUT(skb, RTA_PREFSRC, 16, &saddr_buf); | 2036 | NLA_PUT(skb, RTA_PREFSRC, 16, &saddr_buf); |
1796 | } | 2037 | } |
2038 | |||
1797 | if (rtnetlink_put_metrics(skb, rt->u.dst.metrics) < 0) | 2039 | if (rtnetlink_put_metrics(skb, rt->u.dst.metrics) < 0) |
1798 | goto rtattr_failure; | 2040 | goto nla_put_failure; |
2041 | |||
1799 | if (rt->u.dst.neighbour) | 2042 | if (rt->u.dst.neighbour) |
1800 | RTA_PUT(skb, RTA_GATEWAY, 16, &rt->u.dst.neighbour->primary_key); | 2043 | NLA_PUT(skb, RTA_GATEWAY, 16, &rt->u.dst.neighbour->primary_key); |
2044 | |||
1801 | if (rt->u.dst.dev) | 2045 | if (rt->u.dst.dev) |
1802 | RTA_PUT(skb, RTA_OIF, sizeof(int), &rt->rt6i_dev->ifindex); | 2046 | NLA_PUT_U32(skb, RTA_OIF, rt->rt6i_dev->ifindex); |
1803 | RTA_PUT(skb, RTA_PRIORITY, 4, &rt->rt6i_metric); | 2047 | |
2048 | NLA_PUT_U32(skb, RTA_PRIORITY, rt->rt6i_metric); | ||
1804 | ci.rta_lastuse = jiffies_to_clock_t(jiffies - rt->u.dst.lastuse); | 2049 | ci.rta_lastuse = jiffies_to_clock_t(jiffies - rt->u.dst.lastuse); |
1805 | if (rt->rt6i_expires) | 2050 | if (rt->rt6i_expires) |
1806 | ci.rta_expires = jiffies_to_clock_t(rt->rt6i_expires - jiffies); | 2051 | ci.rta_expires = jiffies_to_clock_t(rt->rt6i_expires - jiffies); |
@@ -1812,23 +2057,21 @@ static int rt6_fill_node(struct sk_buff *skb, struct rt6_info *rt, | |||
1812 | ci.rta_id = 0; | 2057 | ci.rta_id = 0; |
1813 | ci.rta_ts = 0; | 2058 | ci.rta_ts = 0; |
1814 | ci.rta_tsage = 0; | 2059 | ci.rta_tsage = 0; |
1815 | RTA_PUT(skb, RTA_CACHEINFO, sizeof(ci), &ci); | 2060 | NLA_PUT(skb, RTA_CACHEINFO, sizeof(ci), &ci); |
1816 | nlh->nlmsg_len = skb->tail - b; | ||
1817 | return skb->len; | ||
1818 | 2061 | ||
1819 | nlmsg_failure: | 2062 | return nlmsg_end(skb, nlh); |
1820 | rtattr_failure: | 2063 | |
1821 | skb_trim(skb, b - skb->data); | 2064 | nla_put_failure: |
1822 | return -1; | 2065 | return nlmsg_cancel(skb, nlh); |
1823 | } | 2066 | } |
1824 | 2067 | ||
1825 | static int rt6_dump_route(struct rt6_info *rt, void *p_arg) | 2068 | int rt6_dump_route(struct rt6_info *rt, void *p_arg) |
1826 | { | 2069 | { |
1827 | struct rt6_rtnl_dump_arg *arg = (struct rt6_rtnl_dump_arg *) p_arg; | 2070 | struct rt6_rtnl_dump_arg *arg = (struct rt6_rtnl_dump_arg *) p_arg; |
1828 | int prefix; | 2071 | int prefix; |
1829 | 2072 | ||
1830 | if (arg->cb->nlh->nlmsg_len >= NLMSG_LENGTH(sizeof(struct rtmsg))) { | 2073 | if (nlmsg_len(arg->cb->nlh) >= sizeof(struct rtmsg)) { |
1831 | struct rtmsg *rtm = NLMSG_DATA(arg->cb->nlh); | 2074 | struct rtmsg *rtm = nlmsg_data(arg->cb->nlh); |
1832 | prefix = (rtm->rtm_flags & RTM_F_PREFIX) != 0; | 2075 | prefix = (rtm->rtm_flags & RTM_F_PREFIX) != 0; |
1833 | } else | 2076 | } else |
1834 | prefix = 0; | 2077 | prefix = 0; |
@@ -1838,189 +2081,108 @@ static int rt6_dump_route(struct rt6_info *rt, void *p_arg) | |||
1838 | prefix, NLM_F_MULTI); | 2081 | prefix, NLM_F_MULTI); |
1839 | } | 2082 | } |
1840 | 2083 | ||
1841 | static int fib6_dump_node(struct fib6_walker_t *w) | 2084 | int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void *arg) |
1842 | { | 2085 | { |
1843 | int res; | 2086 | struct nlattr *tb[RTA_MAX+1]; |
1844 | struct rt6_info *rt; | 2087 | struct rt6_info *rt; |
2088 | struct sk_buff *skb; | ||
2089 | struct rtmsg *rtm; | ||
2090 | struct flowi fl; | ||
2091 | int err, iif = 0; | ||
1845 | 2092 | ||
1846 | for (rt = w->leaf; rt; rt = rt->u.next) { | 2093 | err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy); |
1847 | res = rt6_dump_route(rt, w->args); | 2094 | if (err < 0) |
1848 | if (res < 0) { | 2095 | goto errout; |
1849 | /* Frame is full, suspend walking */ | ||
1850 | w->leaf = rt; | ||
1851 | return 1; | ||
1852 | } | ||
1853 | BUG_TRAP(res!=0); | ||
1854 | } | ||
1855 | w->leaf = NULL; | ||
1856 | return 0; | ||
1857 | } | ||
1858 | |||
1859 | static void fib6_dump_end(struct netlink_callback *cb) | ||
1860 | { | ||
1861 | struct fib6_walker_t *w = (void*)cb->args[0]; | ||
1862 | |||
1863 | if (w) { | ||
1864 | cb->args[0] = 0; | ||
1865 | fib6_walker_unlink(w); | ||
1866 | kfree(w); | ||
1867 | } | ||
1868 | cb->done = (void*)cb->args[1]; | ||
1869 | cb->args[1] = 0; | ||
1870 | } | ||
1871 | |||
1872 | static int fib6_dump_done(struct netlink_callback *cb) | ||
1873 | { | ||
1874 | fib6_dump_end(cb); | ||
1875 | return cb->done ? cb->done(cb) : 0; | ||
1876 | } | ||
1877 | |||
1878 | int inet6_dump_fib(struct sk_buff *skb, struct netlink_callback *cb) | ||
1879 | { | ||
1880 | struct rt6_rtnl_dump_arg arg; | ||
1881 | struct fib6_walker_t *w; | ||
1882 | int res; | ||
1883 | 2096 | ||
1884 | arg.skb = skb; | 2097 | err = -EINVAL; |
1885 | arg.cb = cb; | 2098 | memset(&fl, 0, sizeof(fl)); |
1886 | 2099 | ||
1887 | w = (void*)cb->args[0]; | 2100 | if (tb[RTA_SRC]) { |
1888 | if (w == NULL) { | 2101 | if (nla_len(tb[RTA_SRC]) < sizeof(struct in6_addr)) |
1889 | /* New dump: | 2102 | goto errout; |
1890 | * | ||
1891 | * 1. hook callback destructor. | ||
1892 | */ | ||
1893 | cb->args[1] = (long)cb->done; | ||
1894 | cb->done = fib6_dump_done; | ||
1895 | 2103 | ||
1896 | /* | 2104 | ipv6_addr_copy(&fl.fl6_src, nla_data(tb[RTA_SRC])); |
1897 | * 2. allocate and initialize walker. | ||
1898 | */ | ||
1899 | w = kzalloc(sizeof(*w), GFP_ATOMIC); | ||
1900 | if (w == NULL) | ||
1901 | return -ENOMEM; | ||
1902 | RT6_TRACE("dump<%p", w); | ||
1903 | w->root = &ip6_routing_table; | ||
1904 | w->func = fib6_dump_node; | ||
1905 | w->args = &arg; | ||
1906 | cb->args[0] = (long)w; | ||
1907 | read_lock_bh(&rt6_lock); | ||
1908 | res = fib6_walk(w); | ||
1909 | read_unlock_bh(&rt6_lock); | ||
1910 | } else { | ||
1911 | w->args = &arg; | ||
1912 | read_lock_bh(&rt6_lock); | ||
1913 | res = fib6_walk_continue(w); | ||
1914 | read_unlock_bh(&rt6_lock); | ||
1915 | } | 2105 | } |
1916 | #if RT6_DEBUG >= 3 | ||
1917 | if (res <= 0 && skb->len == 0) | ||
1918 | RT6_TRACE("%p>dump end\n", w); | ||
1919 | #endif | ||
1920 | res = res < 0 ? res : skb->len; | ||
1921 | /* res < 0 is an error. (really, impossible) | ||
1922 | res == 0 means that dump is complete, but skb still can contain data. | ||
1923 | res > 0 dump is not complete, but frame is full. | ||
1924 | */ | ||
1925 | /* Destroy walker, if dump of this table is complete. */ | ||
1926 | if (res <= 0) | ||
1927 | fib6_dump_end(cb); | ||
1928 | return res; | ||
1929 | } | ||
1930 | |||
1931 | int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void *arg) | ||
1932 | { | ||
1933 | struct rtattr **rta = arg; | ||
1934 | int iif = 0; | ||
1935 | int err = -ENOBUFS; | ||
1936 | struct sk_buff *skb; | ||
1937 | struct flowi fl; | ||
1938 | struct rt6_info *rt; | ||
1939 | 2106 | ||
1940 | skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL); | 2107 | if (tb[RTA_DST]) { |
1941 | if (skb == NULL) | 2108 | if (nla_len(tb[RTA_DST]) < sizeof(struct in6_addr)) |
1942 | goto out; | 2109 | goto errout; |
1943 | 2110 | ||
1944 | /* Reserve room for dummy headers, this skb can pass | 2111 | ipv6_addr_copy(&fl.fl6_dst, nla_data(tb[RTA_DST])); |
1945 | through good chunk of routing engine. | 2112 | } |
1946 | */ | ||
1947 | skb->mac.raw = skb->data; | ||
1948 | skb_reserve(skb, MAX_HEADER + sizeof(struct ipv6hdr)); | ||
1949 | 2113 | ||
1950 | memset(&fl, 0, sizeof(fl)); | 2114 | if (tb[RTA_IIF]) |
1951 | if (rta[RTA_SRC-1]) | 2115 | iif = nla_get_u32(tb[RTA_IIF]); |
1952 | ipv6_addr_copy(&fl.fl6_src, | ||
1953 | (struct in6_addr*)RTA_DATA(rta[RTA_SRC-1])); | ||
1954 | if (rta[RTA_DST-1]) | ||
1955 | ipv6_addr_copy(&fl.fl6_dst, | ||
1956 | (struct in6_addr*)RTA_DATA(rta[RTA_DST-1])); | ||
1957 | 2116 | ||
1958 | if (rta[RTA_IIF-1]) | 2117 | if (tb[RTA_OIF]) |
1959 | memcpy(&iif, RTA_DATA(rta[RTA_IIF-1]), sizeof(int)); | 2118 | fl.oif = nla_get_u32(tb[RTA_OIF]); |
1960 | 2119 | ||
1961 | if (iif) { | 2120 | if (iif) { |
1962 | struct net_device *dev; | 2121 | struct net_device *dev; |
1963 | dev = __dev_get_by_index(iif); | 2122 | dev = __dev_get_by_index(iif); |
1964 | if (!dev) { | 2123 | if (!dev) { |
1965 | err = -ENODEV; | 2124 | err = -ENODEV; |
1966 | goto out_free; | 2125 | goto errout; |
1967 | } | 2126 | } |
1968 | } | 2127 | } |
1969 | 2128 | ||
1970 | fl.oif = 0; | 2129 | skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL); |
1971 | if (rta[RTA_OIF-1]) | 2130 | if (skb == NULL) { |
1972 | memcpy(&fl.oif, RTA_DATA(rta[RTA_OIF-1]), sizeof(int)); | 2131 | err = -ENOBUFS; |
2132 | goto errout; | ||
2133 | } | ||
1973 | 2134 | ||
1974 | rt = (struct rt6_info*)ip6_route_output(NULL, &fl); | 2135 | /* Reserve room for dummy headers, this skb can pass |
2136 | through good chunk of routing engine. | ||
2137 | */ | ||
2138 | skb->mac.raw = skb->data; | ||
2139 | skb_reserve(skb, MAX_HEADER + sizeof(struct ipv6hdr)); | ||
1975 | 2140 | ||
2141 | rt = (struct rt6_info*) ip6_route_output(NULL, &fl); | ||
1976 | skb->dst = &rt->u.dst; | 2142 | skb->dst = &rt->u.dst; |
1977 | 2143 | ||
1978 | NETLINK_CB(skb).dst_pid = NETLINK_CB(in_skb).pid; | 2144 | err = rt6_fill_node(skb, rt, &fl.fl6_dst, &fl.fl6_src, iif, |
1979 | err = rt6_fill_node(skb, rt, | ||
1980 | &fl.fl6_dst, &fl.fl6_src, | ||
1981 | iif, | ||
1982 | RTM_NEWROUTE, NETLINK_CB(in_skb).pid, | 2145 | RTM_NEWROUTE, NETLINK_CB(in_skb).pid, |
1983 | nlh->nlmsg_seq, 0, 0); | 2146 | nlh->nlmsg_seq, 0, 0); |
1984 | if (err < 0) { | 2147 | if (err < 0) { |
1985 | err = -EMSGSIZE; | 2148 | kfree_skb(skb); |
1986 | goto out_free; | 2149 | goto errout; |
1987 | } | 2150 | } |
1988 | 2151 | ||
1989 | err = netlink_unicast(rtnl, skb, NETLINK_CB(in_skb).pid, MSG_DONTWAIT); | 2152 | err = rtnl_unicast(skb, NETLINK_CB(in_skb).pid); |
1990 | if (err > 0) | 2153 | errout: |
1991 | err = 0; | ||
1992 | out: | ||
1993 | return err; | 2154 | return err; |
1994 | out_free: | ||
1995 | kfree_skb(skb); | ||
1996 | goto out; | ||
1997 | } | 2155 | } |
1998 | 2156 | ||
1999 | void inet6_rt_notify(int event, struct rt6_info *rt, struct nlmsghdr *nlh, | 2157 | void inet6_rt_notify(int event, struct rt6_info *rt, struct nl_info *info) |
2000 | struct netlink_skb_parms *req) | ||
2001 | { | 2158 | { |
2002 | struct sk_buff *skb; | 2159 | struct sk_buff *skb; |
2003 | int size = NLMSG_SPACE(sizeof(struct rtmsg)+256); | 2160 | u32 pid = 0, seq = 0; |
2004 | u32 pid = current->pid; | 2161 | struct nlmsghdr *nlh = NULL; |
2005 | u32 seq = 0; | 2162 | int payload = sizeof(struct rtmsg) + 256; |
2006 | 2163 | int err = -ENOBUFS; | |
2007 | if (req) | 2164 | |
2008 | pid = req->pid; | 2165 | if (info) { |
2009 | if (nlh) | 2166 | pid = info->pid; |
2010 | seq = nlh->nlmsg_seq; | 2167 | nlh = info->nlh; |
2011 | 2168 | if (nlh) | |
2012 | skb = alloc_skb(size, gfp_any()); | 2169 | seq = nlh->nlmsg_seq; |
2013 | if (!skb) { | ||
2014 | netlink_set_err(rtnl, 0, RTNLGRP_IPV6_ROUTE, ENOBUFS); | ||
2015 | return; | ||
2016 | } | 2170 | } |
2017 | if (rt6_fill_node(skb, rt, NULL, NULL, 0, event, pid, seq, 0, 0) < 0) { | 2171 | |
2172 | skb = nlmsg_new(nlmsg_total_size(payload), gfp_any()); | ||
2173 | if (skb == NULL) | ||
2174 | goto errout; | ||
2175 | |||
2176 | err = rt6_fill_node(skb, rt, NULL, NULL, 0, event, pid, seq, 0, 0); | ||
2177 | if (err < 0) { | ||
2018 | kfree_skb(skb); | 2178 | kfree_skb(skb); |
2019 | netlink_set_err(rtnl, 0, RTNLGRP_IPV6_ROUTE, EINVAL); | 2179 | goto errout; |
2020 | return; | ||
2021 | } | 2180 | } |
2022 | NETLINK_CB(skb).dst_group = RTNLGRP_IPV6_ROUTE; | 2181 | |
2023 | netlink_broadcast(rtnl, skb, 0, RTNLGRP_IPV6_ROUTE, gfp_any()); | 2182 | err = rtnl_notify(skb, pid, RTNLGRP_IPV6_ROUTE, nlh, gfp_any()); |
2183 | errout: | ||
2184 | if (err < 0) | ||
2185 | rtnl_set_sk_err(RTNLGRP_IPV6_ROUTE, err); | ||
2024 | } | 2186 | } |
2025 | 2187 | ||
2026 | /* | 2188 | /* |
@@ -2096,16 +2258,13 @@ static int rt6_info_route(struct rt6_info *rt, void *p_arg) | |||
2096 | 2258 | ||
2097 | static int rt6_proc_info(char *buffer, char **start, off_t offset, int length) | 2259 | static int rt6_proc_info(char *buffer, char **start, off_t offset, int length) |
2098 | { | 2260 | { |
2099 | struct rt6_proc_arg arg; | 2261 | struct rt6_proc_arg arg = { |
2100 | arg.buffer = buffer; | 2262 | .buffer = buffer, |
2101 | arg.offset = offset; | 2263 | .offset = offset, |
2102 | arg.length = length; | 2264 | .length = length, |
2103 | arg.skip = 0; | 2265 | }; |
2104 | arg.len = 0; | ||
2105 | 2266 | ||
2106 | read_lock_bh(&rt6_lock); | 2267 | fib6_clean_all(rt6_info_route, 0, &arg); |
2107 | fib6_clean_tree(&ip6_routing_table, rt6_info_route, 0, &arg); | ||
2108 | read_unlock_bh(&rt6_lock); | ||
2109 | 2268 | ||
2110 | *start = buffer; | 2269 | *start = buffer; |
2111 | if (offset) | 2270 | if (offset) |
@@ -2260,13 +2419,9 @@ void __init ip6_route_init(void) | |||
2260 | { | 2419 | { |
2261 | struct proc_dir_entry *p; | 2420 | struct proc_dir_entry *p; |
2262 | 2421 | ||
2263 | ip6_dst_ops.kmem_cachep = kmem_cache_create("ip6_dst_cache", | 2422 | ip6_dst_ops.kmem_cachep = |
2264 | sizeof(struct rt6_info), | 2423 | kmem_cache_create("ip6_dst_cache", sizeof(struct rt6_info), 0, |
2265 | 0, SLAB_HWCACHE_ALIGN, | 2424 | SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL, NULL); |
2266 | NULL, NULL); | ||
2267 | if (!ip6_dst_ops.kmem_cachep) | ||
2268 | panic("cannot create ip6_dst_cache"); | ||
2269 | |||
2270 | fib6_init(); | 2425 | fib6_init(); |
2271 | #ifdef CONFIG_PROC_FS | 2426 | #ifdef CONFIG_PROC_FS |
2272 | p = proc_net_create("ipv6_route", 0, rt6_proc_info); | 2427 | p = proc_net_create("ipv6_route", 0, rt6_proc_info); |
@@ -2278,10 +2433,16 @@ void __init ip6_route_init(void) | |||
2278 | #ifdef CONFIG_XFRM | 2433 | #ifdef CONFIG_XFRM |
2279 | xfrm6_init(); | 2434 | xfrm6_init(); |
2280 | #endif | 2435 | #endif |
2436 | #ifdef CONFIG_IPV6_MULTIPLE_TABLES | ||
2437 | fib6_rules_init(); | ||
2438 | #endif | ||
2281 | } | 2439 | } |
2282 | 2440 | ||
2283 | void ip6_route_cleanup(void) | 2441 | void ip6_route_cleanup(void) |
2284 | { | 2442 | { |
2443 | #ifdef CONFIG_IPV6_MULTIPLE_TABLES | ||
2444 | fib6_rules_cleanup(); | ||
2445 | #endif | ||
2285 | #ifdef CONFIG_PROC_FS | 2446 | #ifdef CONFIG_PROC_FS |
2286 | proc_net_remove("ipv6_route"); | 2447 | proc_net_remove("ipv6_route"); |
2287 | proc_net_remove("rt6_stats"); | 2448 | proc_net_remove("rt6_stats"); |
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 802a1a6b1037..2546fc9f0a78 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c | |||
@@ -251,6 +251,8 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr, | |||
251 | final_p = &final; | 251 | final_p = &final; |
252 | } | 252 | } |
253 | 253 | ||
254 | security_sk_classify_flow(sk, &fl); | ||
255 | |||
254 | err = ip6_dst_lookup(sk, &dst, &fl); | 256 | err = ip6_dst_lookup(sk, &dst, &fl); |
255 | if (err) | 257 | if (err) |
256 | goto failure; | 258 | goto failure; |
@@ -270,7 +272,7 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr, | |||
270 | inet->rcv_saddr = LOOPBACK4_IPV6; | 272 | inet->rcv_saddr = LOOPBACK4_IPV6; |
271 | 273 | ||
272 | sk->sk_gso_type = SKB_GSO_TCPV6; | 274 | sk->sk_gso_type = SKB_GSO_TCPV6; |
273 | __ip6_dst_store(sk, dst, NULL); | 275 | __ip6_dst_store(sk, dst, NULL, NULL); |
274 | 276 | ||
275 | icsk->icsk_ext_hdr_len = 0; | 277 | icsk->icsk_ext_hdr_len = 0; |
276 | if (np->opt) | 278 | if (np->opt) |
@@ -374,6 +376,7 @@ static void tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, | |||
374 | fl.oif = sk->sk_bound_dev_if; | 376 | fl.oif = sk->sk_bound_dev_if; |
375 | fl.fl_ip_dport = inet->dport; | 377 | fl.fl_ip_dport = inet->dport; |
376 | fl.fl_ip_sport = inet->sport; | 378 | fl.fl_ip_sport = inet->sport; |
379 | security_skb_classify_flow(skb, &fl); | ||
377 | 380 | ||
378 | if ((err = ip6_dst_lookup(sk, &dst, &fl))) { | 381 | if ((err = ip6_dst_lookup(sk, &dst, &fl))) { |
379 | sk->sk_err_soft = -err; | 382 | sk->sk_err_soft = -err; |
@@ -467,6 +470,7 @@ static int tcp_v6_send_synack(struct sock *sk, struct request_sock *req, | |||
467 | fl.oif = treq->iif; | 470 | fl.oif = treq->iif; |
468 | fl.fl_ip_dport = inet_rsk(req)->rmt_port; | 471 | fl.fl_ip_dport = inet_rsk(req)->rmt_port; |
469 | fl.fl_ip_sport = inet_sk(sk)->sport; | 472 | fl.fl_ip_sport = inet_sk(sk)->sport; |
473 | security_req_classify_flow(req, &fl); | ||
470 | 474 | ||
471 | if (dst == NULL) { | 475 | if (dst == NULL) { |
472 | opt = np->opt; | 476 | opt = np->opt; |
@@ -541,7 +545,7 @@ static void tcp_v6_send_check(struct sock *sk, int len, struct sk_buff *skb) | |||
541 | struct ipv6_pinfo *np = inet6_sk(sk); | 545 | struct ipv6_pinfo *np = inet6_sk(sk); |
542 | struct tcphdr *th = skb->h.th; | 546 | struct tcphdr *th = skb->h.th; |
543 | 547 | ||
544 | if (skb->ip_summed == CHECKSUM_HW) { | 548 | if (skb->ip_summed == CHECKSUM_PARTIAL) { |
545 | th->check = ~csum_ipv6_magic(&np->saddr, &np->daddr, len, IPPROTO_TCP, 0); | 549 | th->check = ~csum_ipv6_magic(&np->saddr, &np->daddr, len, IPPROTO_TCP, 0); |
546 | skb->csum = offsetof(struct tcphdr, check); | 550 | skb->csum = offsetof(struct tcphdr, check); |
547 | } else { | 551 | } else { |
@@ -566,7 +570,7 @@ static int tcp_v6_gso_send_check(struct sk_buff *skb) | |||
566 | th->check = ~csum_ipv6_magic(&ipv6h->saddr, &ipv6h->daddr, skb->len, | 570 | th->check = ~csum_ipv6_magic(&ipv6h->saddr, &ipv6h->daddr, skb->len, |
567 | IPPROTO_TCP, 0); | 571 | IPPROTO_TCP, 0); |
568 | skb->csum = offsetof(struct tcphdr, check); | 572 | skb->csum = offsetof(struct tcphdr, check); |
569 | skb->ip_summed = CHECKSUM_HW; | 573 | skb->ip_summed = CHECKSUM_PARTIAL; |
570 | return 0; | 574 | return 0; |
571 | } | 575 | } |
572 | 576 | ||
@@ -625,6 +629,7 @@ static void tcp_v6_send_reset(struct sk_buff *skb) | |||
625 | fl.oif = inet6_iif(skb); | 629 | fl.oif = inet6_iif(skb); |
626 | fl.fl_ip_dport = t1->dest; | 630 | fl.fl_ip_dport = t1->dest; |
627 | fl.fl_ip_sport = t1->source; | 631 | fl.fl_ip_sport = t1->source; |
632 | security_skb_classify_flow(skb, &fl); | ||
628 | 633 | ||
629 | /* sk = NULL, but it is safe for now. RST socket required. */ | 634 | /* sk = NULL, but it is safe for now. RST socket required. */ |
630 | if (!ip6_dst_lookup(NULL, &buff->dst, &fl)) { | 635 | if (!ip6_dst_lookup(NULL, &buff->dst, &fl)) { |
@@ -691,6 +696,7 @@ static void tcp_v6_send_ack(struct sk_buff *skb, u32 seq, u32 ack, u32 win, u32 | |||
691 | fl.oif = inet6_iif(skb); | 696 | fl.oif = inet6_iif(skb); |
692 | fl.fl_ip_dport = t1->dest; | 697 | fl.fl_ip_dport = t1->dest; |
693 | fl.fl_ip_sport = t1->source; | 698 | fl.fl_ip_sport = t1->source; |
699 | security_skb_classify_flow(skb, &fl); | ||
694 | 700 | ||
695 | if (!ip6_dst_lookup(NULL, &buff->dst, &fl)) { | 701 | if (!ip6_dst_lookup(NULL, &buff->dst, &fl)) { |
696 | if (xfrm_lookup(&buff->dst, &fl, NULL, 0) >= 0) { | 702 | if (xfrm_lookup(&buff->dst, &fl, NULL, 0) >= 0) { |
@@ -820,6 +826,8 @@ static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb) | |||
820 | 826 | ||
821 | tcp_rsk(req)->snt_isn = isn; | 827 | tcp_rsk(req)->snt_isn = isn; |
822 | 828 | ||
829 | security_inet_conn_request(sk, skb, req); | ||
830 | |||
823 | if (tcp_v6_send_synack(sk, req, NULL)) | 831 | if (tcp_v6_send_synack(sk, req, NULL)) |
824 | goto drop; | 832 | goto drop; |
825 | 833 | ||
@@ -923,6 +931,7 @@ static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb, | |||
923 | fl.oif = sk->sk_bound_dev_if; | 931 | fl.oif = sk->sk_bound_dev_if; |
924 | fl.fl_ip_dport = inet_rsk(req)->rmt_port; | 932 | fl.fl_ip_dport = inet_rsk(req)->rmt_port; |
925 | fl.fl_ip_sport = inet_sk(sk)->sport; | 933 | fl.fl_ip_sport = inet_sk(sk)->sport; |
934 | security_req_classify_flow(req, &fl); | ||
926 | 935 | ||
927 | if (ip6_dst_lookup(sk, &dst, &fl)) | 936 | if (ip6_dst_lookup(sk, &dst, &fl)) |
928 | goto out; | 937 | goto out; |
@@ -945,7 +954,7 @@ static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb, | |||
945 | */ | 954 | */ |
946 | 955 | ||
947 | newsk->sk_gso_type = SKB_GSO_TCPV6; | 956 | newsk->sk_gso_type = SKB_GSO_TCPV6; |
948 | __ip6_dst_store(newsk, dst, NULL); | 957 | __ip6_dst_store(newsk, dst, NULL, NULL); |
949 | 958 | ||
950 | newtcp6sk = (struct tcp6_sock *)newsk; | 959 | newtcp6sk = (struct tcp6_sock *)newsk; |
951 | inet_sk(newsk)->pinet6 = &newtcp6sk->inet6; | 960 | inet_sk(newsk)->pinet6 = &newtcp6sk->inet6; |
@@ -1024,7 +1033,7 @@ out: | |||
1024 | 1033 | ||
1025 | static int tcp_v6_checksum_init(struct sk_buff *skb) | 1034 | static int tcp_v6_checksum_init(struct sk_buff *skb) |
1026 | { | 1035 | { |
1027 | if (skb->ip_summed == CHECKSUM_HW) { | 1036 | if (skb->ip_summed == CHECKSUM_COMPLETE) { |
1028 | if (!tcp_v6_check(skb->h.th,skb->len,&skb->nh.ipv6h->saddr, | 1037 | if (!tcp_v6_check(skb->h.th,skb->len,&skb->nh.ipv6h->saddr, |
1029 | &skb->nh.ipv6h->daddr,skb->csum)) { | 1038 | &skb->nh.ipv6h->daddr,skb->csum)) { |
1030 | skb->ip_summed = CHECKSUM_UNNECESSARY; | 1039 | skb->ip_summed = CHECKSUM_UNNECESSARY; |
@@ -1066,7 +1075,7 @@ static int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb) | |||
1066 | if (skb->protocol == htons(ETH_P_IP)) | 1075 | if (skb->protocol == htons(ETH_P_IP)) |
1067 | return tcp_v4_do_rcv(sk, skb); | 1076 | return tcp_v4_do_rcv(sk, skb); |
1068 | 1077 | ||
1069 | if (sk_filter(sk, skb, 0)) | 1078 | if (sk_filter(sk, skb)) |
1070 | goto discard; | 1079 | goto discard; |
1071 | 1080 | ||
1072 | /* | 1081 | /* |
@@ -1223,7 +1232,7 @@ process: | |||
1223 | if (!xfrm6_policy_check(sk, XFRM_POLICY_IN, skb)) | 1232 | if (!xfrm6_policy_check(sk, XFRM_POLICY_IN, skb)) |
1224 | goto discard_and_relse; | 1233 | goto discard_and_relse; |
1225 | 1234 | ||
1226 | if (sk_filter(sk, skb, 0)) | 1235 | if (sk_filter(sk, skb)) |
1227 | goto discard_and_relse; | 1236 | goto discard_and_relse; |
1228 | 1237 | ||
1229 | skb->dev = NULL; | 1238 | skb->dev = NULL; |
diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index 3d54f246411e..9662561701d1 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c | |||
@@ -61,81 +61,9 @@ | |||
61 | 61 | ||
62 | DEFINE_SNMP_STAT(struct udp_mib, udp_stats_in6) __read_mostly; | 62 | DEFINE_SNMP_STAT(struct udp_mib, udp_stats_in6) __read_mostly; |
63 | 63 | ||
64 | /* Grrr, addr_type already calculated by caller, but I don't want | 64 | static inline int udp_v6_get_port(struct sock *sk, unsigned short snum) |
65 | * to add some silly "cookie" argument to this method just for that. | ||
66 | */ | ||
67 | static int udp_v6_get_port(struct sock *sk, unsigned short snum) | ||
68 | { | 65 | { |
69 | struct sock *sk2; | 66 | return udp_get_port(sk, snum, ipv6_rcv_saddr_equal); |
70 | struct hlist_node *node; | ||
71 | |||
72 | write_lock_bh(&udp_hash_lock); | ||
73 | if (snum == 0) { | ||
74 | int best_size_so_far, best, result, i; | ||
75 | |||
76 | if (udp_port_rover > sysctl_local_port_range[1] || | ||
77 | udp_port_rover < sysctl_local_port_range[0]) | ||
78 | udp_port_rover = sysctl_local_port_range[0]; | ||
79 | best_size_so_far = 32767; | ||
80 | best = result = udp_port_rover; | ||
81 | for (i = 0; i < UDP_HTABLE_SIZE; i++, result++) { | ||
82 | int size; | ||
83 | struct hlist_head *list; | ||
84 | |||
85 | list = &udp_hash[result & (UDP_HTABLE_SIZE - 1)]; | ||
86 | if (hlist_empty(list)) { | ||
87 | if (result > sysctl_local_port_range[1]) | ||
88 | result = sysctl_local_port_range[0] + | ||
89 | ((result - sysctl_local_port_range[0]) & | ||
90 | (UDP_HTABLE_SIZE - 1)); | ||
91 | goto gotit; | ||
92 | } | ||
93 | size = 0; | ||
94 | sk_for_each(sk2, node, list) | ||
95 | if (++size >= best_size_so_far) | ||
96 | goto next; | ||
97 | best_size_so_far = size; | ||
98 | best = result; | ||
99 | next:; | ||
100 | } | ||
101 | result = best; | ||
102 | for(i = 0; i < (1 << 16) / UDP_HTABLE_SIZE; i++, result += UDP_HTABLE_SIZE) { | ||
103 | if (result > sysctl_local_port_range[1]) | ||
104 | result = sysctl_local_port_range[0] | ||
105 | + ((result - sysctl_local_port_range[0]) & | ||
106 | (UDP_HTABLE_SIZE - 1)); | ||
107 | if (!udp_lport_inuse(result)) | ||
108 | break; | ||
109 | } | ||
110 | if (i >= (1 << 16) / UDP_HTABLE_SIZE) | ||
111 | goto fail; | ||
112 | gotit: | ||
113 | udp_port_rover = snum = result; | ||
114 | } else { | ||
115 | sk_for_each(sk2, node, | ||
116 | &udp_hash[snum & (UDP_HTABLE_SIZE - 1)]) { | ||
117 | if (inet_sk(sk2)->num == snum && | ||
118 | sk2 != sk && | ||
119 | (!sk2->sk_bound_dev_if || | ||
120 | !sk->sk_bound_dev_if || | ||
121 | sk2->sk_bound_dev_if == sk->sk_bound_dev_if) && | ||
122 | (!sk2->sk_reuse || !sk->sk_reuse) && | ||
123 | ipv6_rcv_saddr_equal(sk, sk2)) | ||
124 | goto fail; | ||
125 | } | ||
126 | } | ||
127 | |||
128 | inet_sk(sk)->num = snum; | ||
129 | if (sk_unhashed(sk)) { | ||
130 | sk_add_node(sk, &udp_hash[snum & (UDP_HTABLE_SIZE - 1)]); | ||
131 | sock_prot_inc_use(sk->sk_prot); | ||
132 | } | ||
133 | write_unlock_bh(&udp_hash_lock); | ||
134 | return 0; | ||
135 | |||
136 | fail: | ||
137 | write_unlock_bh(&udp_hash_lock); | ||
138 | return 1; | ||
139 | } | 67 | } |
140 | 68 | ||
141 | static void udp_v6_hash(struct sock *sk) | 69 | static void udp_v6_hash(struct sock *sk) |
@@ -345,6 +273,8 @@ out: | |||
345 | 273 | ||
346 | static inline int udpv6_queue_rcv_skb(struct sock * sk, struct sk_buff *skb) | 274 | static inline int udpv6_queue_rcv_skb(struct sock * sk, struct sk_buff *skb) |
347 | { | 275 | { |
276 | int rc; | ||
277 | |||
348 | if (!xfrm6_policy_check(sk, XFRM_POLICY_IN, skb)) { | 278 | if (!xfrm6_policy_check(sk, XFRM_POLICY_IN, skb)) { |
349 | kfree_skb(skb); | 279 | kfree_skb(skb); |
350 | return -1; | 280 | return -1; |
@@ -356,7 +286,10 @@ static inline int udpv6_queue_rcv_skb(struct sock * sk, struct sk_buff *skb) | |||
356 | return 0; | 286 | return 0; |
357 | } | 287 | } |
358 | 288 | ||
359 | if (sock_queue_rcv_skb(sk,skb)<0) { | 289 | if ((rc = sock_queue_rcv_skb(sk,skb)) < 0) { |
290 | /* Note that an ENOMEM error is charged twice */ | ||
291 | if (rc == -ENOMEM) | ||
292 | UDP6_INC_STATS_BH(UDP_MIB_RCVBUFERRORS); | ||
360 | UDP6_INC_STATS_BH(UDP_MIB_INERRORS); | 293 | UDP6_INC_STATS_BH(UDP_MIB_INERRORS); |
361 | kfree_skb(skb); | 294 | kfree_skb(skb); |
362 | return 0; | 295 | return 0; |
@@ -475,7 +408,7 @@ static int udpv6_rcv(struct sk_buff **pskb) | |||
475 | uh = skb->h.uh; | 408 | uh = skb->h.uh; |
476 | } | 409 | } |
477 | 410 | ||
478 | if (skb->ip_summed == CHECKSUM_HW && | 411 | if (skb->ip_summed == CHECKSUM_COMPLETE && |
479 | !csum_ipv6_magic(saddr, daddr, ulen, IPPROTO_UDP, skb->csum)) | 412 | !csum_ipv6_magic(saddr, daddr, ulen, IPPROTO_UDP, skb->csum)) |
480 | skb->ip_summed = CHECKSUM_UNNECESSARY; | 413 | skb->ip_summed = CHECKSUM_UNNECESSARY; |
481 | 414 | ||
@@ -782,6 +715,8 @@ do_udp_sendmsg: | |||
782 | connected = 0; | 715 | connected = 0; |
783 | } | 716 | } |
784 | 717 | ||
718 | security_sk_classify_flow(sk, fl); | ||
719 | |||
785 | err = ip6_sk_dst_lookup(sk, &dst, fl); | 720 | err = ip6_sk_dst_lookup(sk, &dst, fl); |
786 | if (err) | 721 | if (err) |
787 | goto out; | 722 | goto out; |
@@ -840,7 +775,12 @@ do_append_data: | |||
840 | if (connected) { | 775 | if (connected) { |
841 | ip6_dst_store(sk, dst, | 776 | ip6_dst_store(sk, dst, |
842 | ipv6_addr_equal(&fl->fl6_dst, &np->daddr) ? | 777 | ipv6_addr_equal(&fl->fl6_dst, &np->daddr) ? |
843 | &np->daddr : NULL); | 778 | &np->daddr : NULL, |
779 | #ifdef CONFIG_IPV6_SUBTREES | ||
780 | ipv6_addr_equal(&fl->fl6_src, &np->saddr) ? | ||
781 | &np->saddr : | ||
782 | #endif | ||
783 | NULL); | ||
844 | } else { | 784 | } else { |
845 | dst_release(dst); | 785 | dst_release(dst); |
846 | } | 786 | } |
@@ -855,6 +795,16 @@ out: | |||
855 | UDP6_INC_STATS_USER(UDP_MIB_OUTDATAGRAMS); | 795 | UDP6_INC_STATS_USER(UDP_MIB_OUTDATAGRAMS); |
856 | return len; | 796 | return len; |
857 | } | 797 | } |
798 | /* | ||
799 | * ENOBUFS = no kernel mem, SOCK_NOSPACE = no sndbuf space. Reporting | ||
800 | * ENOBUFS might not be good (it's not tunable per se), but otherwise | ||
801 | * we don't have a good statistic (IpOutDiscards but it can be too many | ||
802 | * things). We could add another new stat but at least for now that | ||
803 | * seems like overkill. | ||
804 | */ | ||
805 | if (err == -ENOBUFS || test_bit(SOCK_NOSPACE, &sk->sk_socket->flags)) { | ||
806 | UDP6_INC_STATS_USER(UDP_MIB_SNDBUFERRORS); | ||
807 | } | ||
858 | return err; | 808 | return err; |
859 | 809 | ||
860 | do_confirm: | 810 | do_confirm: |
diff --git a/net/ipv6/xfrm6_input.c b/net/ipv6/xfrm6_input.c index 0405d74ff910..a40a05789013 100644 --- a/net/ipv6/xfrm6_input.c +++ b/net/ipv6/xfrm6_input.c | |||
@@ -72,7 +72,7 @@ int xfrm6_rcv_spi(struct sk_buff *skb, u32 spi) | |||
72 | if (x->mode->input(x, skb)) | 72 | if (x->mode->input(x, skb)) |
73 | goto drop; | 73 | goto drop; |
74 | 74 | ||
75 | if (x->props.mode) { /* XXX */ | 75 | if (x->props.mode == XFRM_MODE_TUNNEL) { /* XXX */ |
76 | decaps = 1; | 76 | decaps = 1; |
77 | break; | 77 | break; |
78 | } | 78 | } |
@@ -138,3 +138,111 @@ int xfrm6_rcv(struct sk_buff **pskb) | |||
138 | { | 138 | { |
139 | return xfrm6_rcv_spi(*pskb, 0); | 139 | return xfrm6_rcv_spi(*pskb, 0); |
140 | } | 140 | } |
141 | |||
142 | int xfrm6_input_addr(struct sk_buff *skb, xfrm_address_t *daddr, | ||
143 | xfrm_address_t *saddr, u8 proto) | ||
144 | { | ||
145 | struct xfrm_state *x = NULL; | ||
146 | int wildcard = 0; | ||
147 | struct in6_addr any; | ||
148 | xfrm_address_t *xany; | ||
149 | struct xfrm_state *xfrm_vec_one = NULL; | ||
150 | int nh = 0; | ||
151 | int i = 0; | ||
152 | |||
153 | ipv6_addr_set(&any, 0, 0, 0, 0); | ||
154 | xany = (xfrm_address_t *)&any; | ||
155 | |||
156 | for (i = 0; i < 3; i++) { | ||
157 | xfrm_address_t *dst, *src; | ||
158 | switch (i) { | ||
159 | case 0: | ||
160 | dst = daddr; | ||
161 | src = saddr; | ||
162 | break; | ||
163 | case 1: | ||
164 | /* lookup state with wild-card source address */ | ||
165 | wildcard = 1; | ||
166 | dst = daddr; | ||
167 | src = xany; | ||
168 | break; | ||
169 | case 2: | ||
170 | default: | ||
171 | /* lookup state with wild-card addresses */ | ||
172 | wildcard = 1; /* XXX */ | ||
173 | dst = xany; | ||
174 | src = xany; | ||
175 | break; | ||
176 | } | ||
177 | |||
178 | x = xfrm_state_lookup_byaddr(dst, src, proto, AF_INET6); | ||
179 | if (!x) | ||
180 | continue; | ||
181 | |||
182 | spin_lock(&x->lock); | ||
183 | |||
184 | if (wildcard) { | ||
185 | if ((x->props.flags & XFRM_STATE_WILDRECV) == 0) { | ||
186 | spin_unlock(&x->lock); | ||
187 | xfrm_state_put(x); | ||
188 | x = NULL; | ||
189 | continue; | ||
190 | } | ||
191 | } | ||
192 | |||
193 | if (unlikely(x->km.state != XFRM_STATE_VALID)) { | ||
194 | spin_unlock(&x->lock); | ||
195 | xfrm_state_put(x); | ||
196 | x = NULL; | ||
197 | continue; | ||
198 | } | ||
199 | if (xfrm_state_check_expire(x)) { | ||
200 | spin_unlock(&x->lock); | ||
201 | xfrm_state_put(x); | ||
202 | x = NULL; | ||
203 | continue; | ||
204 | } | ||
205 | |||
206 | nh = x->type->input(x, skb); | ||
207 | if (nh <= 0) { | ||
208 | spin_unlock(&x->lock); | ||
209 | xfrm_state_put(x); | ||
210 | x = NULL; | ||
211 | continue; | ||
212 | } | ||
213 | |||
214 | x->curlft.bytes += skb->len; | ||
215 | x->curlft.packets++; | ||
216 | |||
217 | spin_unlock(&x->lock); | ||
218 | |||
219 | xfrm_vec_one = x; | ||
220 | break; | ||
221 | } | ||
222 | |||
223 | if (!xfrm_vec_one) | ||
224 | goto drop; | ||
225 | |||
226 | /* Allocate new secpath or COW existing one. */ | ||
227 | if (!skb->sp || atomic_read(&skb->sp->refcnt) != 1) { | ||
228 | struct sec_path *sp; | ||
229 | sp = secpath_dup(skb->sp); | ||
230 | if (!sp) | ||
231 | goto drop; | ||
232 | if (skb->sp) | ||
233 | secpath_put(skb->sp); | ||
234 | skb->sp = sp; | ||
235 | } | ||
236 | |||
237 | if (1 + skb->sp->len > XFRM_MAX_DEPTH) | ||
238 | goto drop; | ||
239 | |||
240 | skb->sp->xvec[skb->sp->len] = xfrm_vec_one; | ||
241 | skb->sp->len ++; | ||
242 | |||
243 | return 1; | ||
244 | drop: | ||
245 | if (xfrm_vec_one) | ||
246 | xfrm_state_put(xfrm_vec_one); | ||
247 | return -1; | ||
248 | } | ||
diff --git a/net/ipv6/xfrm6_mode_ro.c b/net/ipv6/xfrm6_mode_ro.c new file mode 100644 index 000000000000..6031c16d46ca --- /dev/null +++ b/net/ipv6/xfrm6_mode_ro.c | |||
@@ -0,0 +1,93 @@ | |||
1 | /* | ||
2 | * xfrm6_mode_ro.c - Route optimization mode for IPv6. | ||
3 | * | ||
4 | * Copyright (C)2003-2006 Helsinki University of Technology | ||
5 | * Copyright (C)2003-2006 USAGI/WIDE Project | ||
6 | * | ||
7 | * This program is free software; you can redistribute it and/or modify | ||
8 | * it under the terms of the GNU General Public License as published by | ||
9 | * the Free Software Foundation; either version 2 of the License, or | ||
10 | * (at your option) any later version. | ||
11 | * | ||
12 | * This program is distributed in the hope that it will be useful, | ||
13 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
14 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
15 | * GNU General Public License for more details. | ||
16 | * | ||
17 | * You should have received a copy of the GNU General Public License | ||
18 | * along with this program; if not, write to the Free Software | ||
19 | * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA | ||
20 | */ | ||
21 | /* | ||
22 | * Authors: | ||
23 | * Noriaki TAKAMIYA @USAGI | ||
24 | * Masahide NAKAMURA @USAGI | ||
25 | */ | ||
26 | |||
27 | #include <linux/init.h> | ||
28 | #include <linux/kernel.h> | ||
29 | #include <linux/module.h> | ||
30 | #include <linux/skbuff.h> | ||
31 | #include <linux/stringify.h> | ||
32 | #include <net/ipv6.h> | ||
33 | #include <net/xfrm.h> | ||
34 | |||
35 | /* Add route optimization header space. | ||
36 | * | ||
37 | * The IP header and mutable extension headers will be moved forward to make | ||
38 | * space for the route optimization header. | ||
39 | * | ||
40 | * On exit, skb->h will be set to the start of the encapsulation header to be | ||
41 | * filled in by x->type->output and skb->nh will be set to the nextheader field | ||
42 | * of the extension header directly preceding the encapsulation header, or in | ||
43 | * its absence, that of the top IP header. The value of skb->data will always | ||
44 | * point to the top IP header. | ||
45 | */ | ||
46 | static int xfrm6_ro_output(struct xfrm_state *x, struct sk_buff *skb) | ||
47 | { | ||
48 | struct ipv6hdr *iph; | ||
49 | u8 *prevhdr; | ||
50 | int hdr_len; | ||
51 | |||
52 | skb_push(skb, x->props.header_len); | ||
53 | iph = skb->nh.ipv6h; | ||
54 | |||
55 | hdr_len = x->type->hdr_offset(x, skb, &prevhdr); | ||
56 | skb->nh.raw = prevhdr - x->props.header_len; | ||
57 | skb->h.raw = skb->data + hdr_len; | ||
58 | memmove(skb->data, iph, hdr_len); | ||
59 | return 0; | ||
60 | } | ||
61 | |||
62 | /* | ||
63 | * Do nothing about routing optimization header unlike IPsec. | ||
64 | */ | ||
65 | static int xfrm6_ro_input(struct xfrm_state *x, struct sk_buff *skb) | ||
66 | { | ||
67 | return 0; | ||
68 | } | ||
69 | |||
70 | static struct xfrm_mode xfrm6_ro_mode = { | ||
71 | .input = xfrm6_ro_input, | ||
72 | .output = xfrm6_ro_output, | ||
73 | .owner = THIS_MODULE, | ||
74 | .encap = XFRM_MODE_ROUTEOPTIMIZATION, | ||
75 | }; | ||
76 | |||
77 | static int __init xfrm6_ro_init(void) | ||
78 | { | ||
79 | return xfrm_register_mode(&xfrm6_ro_mode, AF_INET6); | ||
80 | } | ||
81 | |||
82 | static void __exit xfrm6_ro_exit(void) | ||
83 | { | ||
84 | int err; | ||
85 | |||
86 | err = xfrm_unregister_mode(&xfrm6_ro_mode, AF_INET6); | ||
87 | BUG_ON(err); | ||
88 | } | ||
89 | |||
90 | module_init(xfrm6_ro_init); | ||
91 | module_exit(xfrm6_ro_exit); | ||
92 | MODULE_LICENSE("GPL"); | ||
93 | MODULE_ALIAS_XFRM_MODE(AF_INET6, XFRM_MODE_ROUTEOPTIMIZATION); | ||
diff --git a/net/ipv6/xfrm6_mode_transport.c b/net/ipv6/xfrm6_mode_transport.c index 711d713e36d8..3a4b39b12bad 100644 --- a/net/ipv6/xfrm6_mode_transport.c +++ b/net/ipv6/xfrm6_mode_transport.c | |||
@@ -25,9 +25,8 @@ | |||
25 | * its absence, that of the top IP header. The value of skb->data will always | 25 | * its absence, that of the top IP header. The value of skb->data will always |
26 | * point to the top IP header. | 26 | * point to the top IP header. |
27 | */ | 27 | */ |
28 | static int xfrm6_transport_output(struct sk_buff *skb) | 28 | static int xfrm6_transport_output(struct xfrm_state *x, struct sk_buff *skb) |
29 | { | 29 | { |
30 | struct xfrm_state *x = skb->dst->xfrm; | ||
31 | struct ipv6hdr *iph; | 30 | struct ipv6hdr *iph; |
32 | u8 *prevhdr; | 31 | u8 *prevhdr; |
33 | int hdr_len; | 32 | int hdr_len; |
@@ -35,7 +34,7 @@ static int xfrm6_transport_output(struct sk_buff *skb) | |||
35 | skb_push(skb, x->props.header_len); | 34 | skb_push(skb, x->props.header_len); |
36 | iph = skb->nh.ipv6h; | 35 | iph = skb->nh.ipv6h; |
37 | 36 | ||
38 | hdr_len = ip6_find_1stfragopt(skb, &prevhdr); | 37 | hdr_len = x->type->hdr_offset(x, skb, &prevhdr); |
39 | skb->nh.raw = prevhdr - x->props.header_len; | 38 | skb->nh.raw = prevhdr - x->props.header_len; |
40 | skb->h.raw = skb->data + hdr_len; | 39 | skb->h.raw = skb->data + hdr_len; |
41 | memmove(skb->data, iph, hdr_len); | 40 | memmove(skb->data, iph, hdr_len); |
diff --git a/net/ipv6/xfrm6_mode_tunnel.c b/net/ipv6/xfrm6_mode_tunnel.c index 8af79be2edca..5e7d8a7d6414 100644 --- a/net/ipv6/xfrm6_mode_tunnel.c +++ b/net/ipv6/xfrm6_mode_tunnel.c | |||
@@ -37,10 +37,9 @@ static inline void ipip6_ecn_decapsulate(struct sk_buff *skb) | |||
37 | * its absence, that of the top IP header. The value of skb->data will always | 37 | * its absence, that of the top IP header. The value of skb->data will always |
38 | * point to the top IP header. | 38 | * point to the top IP header. |
39 | */ | 39 | */ |
40 | static int xfrm6_tunnel_output(struct sk_buff *skb) | 40 | static int xfrm6_tunnel_output(struct xfrm_state *x, struct sk_buff *skb) |
41 | { | 41 | { |
42 | struct dst_entry *dst = skb->dst; | 42 | struct dst_entry *dst = skb->dst; |
43 | struct xfrm_state *x = dst->xfrm; | ||
44 | struct ipv6hdr *iph, *top_iph; | 43 | struct ipv6hdr *iph, *top_iph; |
45 | int dsfield; | 44 | int dsfield; |
46 | 45 | ||
diff --git a/net/ipv6/xfrm6_output.c b/net/ipv6/xfrm6_output.c index c8c8b44a0f58..c260ea104c52 100644 --- a/net/ipv6/xfrm6_output.c +++ b/net/ipv6/xfrm6_output.c | |||
@@ -17,6 +17,12 @@ | |||
17 | #include <net/ipv6.h> | 17 | #include <net/ipv6.h> |
18 | #include <net/xfrm.h> | 18 | #include <net/xfrm.h> |
19 | 19 | ||
20 | int xfrm6_find_1stfragopt(struct xfrm_state *x, struct sk_buff *skb, | ||
21 | u8 **prevhdr) | ||
22 | { | ||
23 | return ip6_find_1stfragopt(skb, prevhdr); | ||
24 | } | ||
25 | |||
20 | static int xfrm6_tunnel_check_size(struct sk_buff *skb) | 26 | static int xfrm6_tunnel_check_size(struct sk_buff *skb) |
21 | { | 27 | { |
22 | int mtu, ret = 0; | 28 | int mtu, ret = 0; |
@@ -41,13 +47,13 @@ static int xfrm6_output_one(struct sk_buff *skb) | |||
41 | struct xfrm_state *x = dst->xfrm; | 47 | struct xfrm_state *x = dst->xfrm; |
42 | int err; | 48 | int err; |
43 | 49 | ||
44 | if (skb->ip_summed == CHECKSUM_HW) { | 50 | if (skb->ip_summed == CHECKSUM_PARTIAL) { |
45 | err = skb_checksum_help(skb, 0); | 51 | err = skb_checksum_help(skb); |
46 | if (err) | 52 | if (err) |
47 | goto error_nolock; | 53 | goto error_nolock; |
48 | } | 54 | } |
49 | 55 | ||
50 | if (x->props.mode) { | 56 | if (x->props.mode == XFRM_MODE_TUNNEL) { |
51 | err = xfrm6_tunnel_check_size(skb); | 57 | err = xfrm6_tunnel_check_size(skb); |
52 | if (err) | 58 | if (err) |
53 | goto error_nolock; | 59 | goto error_nolock; |
@@ -59,7 +65,7 @@ static int xfrm6_output_one(struct sk_buff *skb) | |||
59 | if (err) | 65 | if (err) |
60 | goto error; | 66 | goto error; |
61 | 67 | ||
62 | err = x->mode->output(skb); | 68 | err = x->mode->output(x, skb); |
63 | if (err) | 69 | if (err) |
64 | goto error; | 70 | goto error; |
65 | 71 | ||
@@ -69,6 +75,8 @@ static int xfrm6_output_one(struct sk_buff *skb) | |||
69 | 75 | ||
70 | x->curlft.bytes += skb->len; | 76 | x->curlft.bytes += skb->len; |
71 | x->curlft.packets++; | 77 | x->curlft.packets++; |
78 | if (x->props.mode == XFRM_MODE_ROUTEOPTIMIZATION) | ||
79 | x->lastused = (u64)xtime.tv_sec; | ||
72 | 80 | ||
73 | spin_unlock_bh(&x->lock); | 81 | spin_unlock_bh(&x->lock); |
74 | 82 | ||
@@ -80,7 +88,7 @@ static int xfrm6_output_one(struct sk_buff *skb) | |||
80 | } | 88 | } |
81 | dst = skb->dst; | 89 | dst = skb->dst; |
82 | x = dst->xfrm; | 90 | x = dst->xfrm; |
83 | } while (x && !x->props.mode); | 91 | } while (x && (x->props.mode != XFRM_MODE_TUNNEL)); |
84 | 92 | ||
85 | IP6CB(skb)->flags |= IP6SKB_XFRM_TRANSFORMED; | 93 | IP6CB(skb)->flags |= IP6SKB_XFRM_TRANSFORMED; |
86 | err = 0; | 94 | err = 0; |
diff --git a/net/ipv6/xfrm6_policy.c b/net/ipv6/xfrm6_policy.c index 73cd250aecbb..6a252e2134d1 100644 --- a/net/ipv6/xfrm6_policy.c +++ b/net/ipv6/xfrm6_policy.c | |||
@@ -18,6 +18,9 @@ | |||
18 | #include <net/ip.h> | 18 | #include <net/ip.h> |
19 | #include <net/ipv6.h> | 19 | #include <net/ipv6.h> |
20 | #include <net/ip6_route.h> | 20 | #include <net/ip6_route.h> |
21 | #ifdef CONFIG_IPV6_MIP6 | ||
22 | #include <net/mip6.h> | ||
23 | #endif | ||
21 | 24 | ||
22 | static struct dst_ops xfrm6_dst_ops; | 25 | static struct dst_ops xfrm6_dst_ops; |
23 | static struct xfrm_policy_afinfo xfrm6_policy_afinfo; | 26 | static struct xfrm_policy_afinfo xfrm6_policy_afinfo; |
@@ -31,6 +34,26 @@ static int xfrm6_dst_lookup(struct xfrm_dst **dst, struct flowi *fl) | |||
31 | return err; | 34 | return err; |
32 | } | 35 | } |
33 | 36 | ||
37 | static int xfrm6_get_saddr(xfrm_address_t *saddr, xfrm_address_t *daddr) | ||
38 | { | ||
39 | struct rt6_info *rt; | ||
40 | struct flowi fl_tunnel = { | ||
41 | .nl_u = { | ||
42 | .ip6_u = { | ||
43 | .daddr = *(struct in6_addr *)&daddr->a6, | ||
44 | }, | ||
45 | }, | ||
46 | }; | ||
47 | |||
48 | if (!xfrm6_dst_lookup((struct xfrm_dst **)&rt, &fl_tunnel)) { | ||
49 | ipv6_get_saddr(&rt->u.dst, (struct in6_addr *)&daddr->a6, | ||
50 | (struct in6_addr *)&saddr->a6); | ||
51 | dst_release(&rt->u.dst); | ||
52 | return 0; | ||
53 | } | ||
54 | return -EHOSTUNREACH; | ||
55 | } | ||
56 | |||
34 | static struct dst_entry * | 57 | static struct dst_entry * |
35 | __xfrm6_find_bundle(struct flowi *fl, struct xfrm_policy *policy) | 58 | __xfrm6_find_bundle(struct flowi *fl, struct xfrm_policy *policy) |
36 | { | 59 | { |
@@ -50,7 +73,9 @@ __xfrm6_find_bundle(struct flowi *fl, struct xfrm_policy *policy) | |||
50 | xdst->u.rt6.rt6i_src.plen); | 73 | xdst->u.rt6.rt6i_src.plen); |
51 | if (ipv6_addr_equal(&xdst->u.rt6.rt6i_dst.addr, &fl_dst_prefix) && | 74 | if (ipv6_addr_equal(&xdst->u.rt6.rt6i_dst.addr, &fl_dst_prefix) && |
52 | ipv6_addr_equal(&xdst->u.rt6.rt6i_src.addr, &fl_src_prefix) && | 75 | ipv6_addr_equal(&xdst->u.rt6.rt6i_src.addr, &fl_src_prefix) && |
53 | xfrm_bundle_ok(xdst, fl, AF_INET6)) { | 76 | xfrm_bundle_ok(xdst, fl, AF_INET6, |
77 | (xdst->u.rt6.rt6i_dst.plen != 128 || | ||
78 | xdst->u.rt6.rt6i_src.plen != 128))) { | ||
54 | dst_clone(dst); | 79 | dst_clone(dst); |
55 | break; | 80 | break; |
56 | } | 81 | } |
@@ -59,6 +84,40 @@ __xfrm6_find_bundle(struct flowi *fl, struct xfrm_policy *policy) | |||
59 | return dst; | 84 | return dst; |
60 | } | 85 | } |
61 | 86 | ||
87 | static inline struct in6_addr* | ||
88 | __xfrm6_bundle_addr_remote(struct xfrm_state *x, struct in6_addr *addr) | ||
89 | { | ||
90 | return (x->type->remote_addr) ? | ||
91 | (struct in6_addr*)x->type->remote_addr(x, (xfrm_address_t *)addr) : | ||
92 | (struct in6_addr*)&x->id.daddr; | ||
93 | } | ||
94 | |||
95 | static inline struct in6_addr* | ||
96 | __xfrm6_bundle_addr_local(struct xfrm_state *x, struct in6_addr *addr) | ||
97 | { | ||
98 | return (x->type->local_addr) ? | ||
99 | (struct in6_addr*)x->type->local_addr(x, (xfrm_address_t *)addr) : | ||
100 | (struct in6_addr*)&x->props.saddr; | ||
101 | } | ||
102 | |||
103 | static inline void | ||
104 | __xfrm6_bundle_len_inc(int *len, int *nflen, struct xfrm_state *x) | ||
105 | { | ||
106 | if (x->type->flags & XFRM_TYPE_NON_FRAGMENT) | ||
107 | *nflen += x->props.header_len; | ||
108 | else | ||
109 | *len += x->props.header_len; | ||
110 | } | ||
111 | |||
112 | static inline void | ||
113 | __xfrm6_bundle_len_dec(int *len, int *nflen, struct xfrm_state *x) | ||
114 | { | ||
115 | if (x->type->flags & XFRM_TYPE_NON_FRAGMENT) | ||
116 | *nflen -= x->props.header_len; | ||
117 | else | ||
118 | *len -= x->props.header_len; | ||
119 | } | ||
120 | |||
62 | /* Allocate chain of dst_entry's, attach known xfrm's, calculate | 121 | /* Allocate chain of dst_entry's, attach known xfrm's, calculate |
63 | * all the metrics... Shortly, bundle a bundle. | 122 | * all the metrics... Shortly, bundle a bundle. |
64 | */ | 123 | */ |
@@ -83,6 +142,7 @@ __xfrm6_bundle_create(struct xfrm_policy *policy, struct xfrm_state **xfrm, int | |||
83 | int i; | 142 | int i; |
84 | int err = 0; | 143 | int err = 0; |
85 | int header_len = 0; | 144 | int header_len = 0; |
145 | int nfheader_len = 0; | ||
86 | int trailer_len = 0; | 146 | int trailer_len = 0; |
87 | 147 | ||
88 | dst = dst_prev = NULL; | 148 | dst = dst_prev = NULL; |
@@ -109,17 +169,18 @@ __xfrm6_bundle_create(struct xfrm_policy *policy, struct xfrm_state **xfrm, int | |||
109 | 169 | ||
110 | xdst = (struct xfrm_dst *)dst1; | 170 | xdst = (struct xfrm_dst *)dst1; |
111 | xdst->route = &rt->u.dst; | 171 | xdst->route = &rt->u.dst; |
172 | xdst->genid = xfrm[i]->genid; | ||
112 | if (rt->rt6i_node) | 173 | if (rt->rt6i_node) |
113 | xdst->route_cookie = rt->rt6i_node->fn_sernum; | 174 | xdst->route_cookie = rt->rt6i_node->fn_sernum; |
114 | 175 | ||
115 | dst1->next = dst_prev; | 176 | dst1->next = dst_prev; |
116 | dst_prev = dst1; | 177 | dst_prev = dst1; |
117 | if (xfrm[i]->props.mode) { | 178 | if (xfrm[i]->props.mode != XFRM_MODE_TRANSPORT) { |
118 | remote = (struct in6_addr*)&xfrm[i]->id.daddr; | 179 | remote = __xfrm6_bundle_addr_remote(xfrm[i], remote); |
119 | local = (struct in6_addr*)&xfrm[i]->props.saddr; | 180 | local = __xfrm6_bundle_addr_local(xfrm[i], local); |
120 | tunnel = 1; | 181 | tunnel = 1; |
121 | } | 182 | } |
122 | header_len += xfrm[i]->props.header_len; | 183 | __xfrm6_bundle_len_inc(&header_len, &nfheader_len, xfrm[i]); |
123 | trailer_len += xfrm[i]->props.trailer_len; | 184 | trailer_len += xfrm[i]->props.trailer_len; |
124 | 185 | ||
125 | if (tunnel) { | 186 | if (tunnel) { |
@@ -154,6 +215,7 @@ __xfrm6_bundle_create(struct xfrm_policy *policy, struct xfrm_state **xfrm, int | |||
154 | dst_prev->flags |= DST_HOST; | 215 | dst_prev->flags |= DST_HOST; |
155 | dst_prev->lastuse = jiffies; | 216 | dst_prev->lastuse = jiffies; |
156 | dst_prev->header_len = header_len; | 217 | dst_prev->header_len = header_len; |
218 | dst_prev->nfheader_len = nfheader_len; | ||
157 | dst_prev->trailer_len = trailer_len; | 219 | dst_prev->trailer_len = trailer_len; |
158 | memcpy(&dst_prev->metrics, &x->route->metrics, sizeof(dst_prev->metrics)); | 220 | memcpy(&dst_prev->metrics, &x->route->metrics, sizeof(dst_prev->metrics)); |
159 | 221 | ||
@@ -172,7 +234,7 @@ __xfrm6_bundle_create(struct xfrm_policy *policy, struct xfrm_state **xfrm, int | |||
172 | x->u.rt6.rt6i_src = rt0->rt6i_src; | 234 | x->u.rt6.rt6i_src = rt0->rt6i_src; |
173 | x->u.rt6.rt6i_idev = rt0->rt6i_idev; | 235 | x->u.rt6.rt6i_idev = rt0->rt6i_idev; |
174 | in6_dev_hold(rt0->rt6i_idev); | 236 | in6_dev_hold(rt0->rt6i_idev); |
175 | header_len -= x->u.dst.xfrm->props.header_len; | 237 | __xfrm6_bundle_len_dec(&header_len, &nfheader_len, x->u.dst.xfrm); |
176 | trailer_len -= x->u.dst.xfrm->props.trailer_len; | 238 | trailer_len -= x->u.dst.xfrm->props.trailer_len; |
177 | } | 239 | } |
178 | 240 | ||
@@ -232,6 +294,18 @@ _decode_session6(struct sk_buff *skb, struct flowi *fl) | |||
232 | fl->proto = nexthdr; | 294 | fl->proto = nexthdr; |
233 | return; | 295 | return; |
234 | 296 | ||
297 | #ifdef CONFIG_IPV6_MIP6 | ||
298 | case IPPROTO_MH: | ||
299 | if (pskb_may_pull(skb, skb->nh.raw + offset + 3 - skb->data)) { | ||
300 | struct ip6_mh *mh; | ||
301 | mh = (struct ip6_mh *)exthdr; | ||
302 | |||
303 | fl->fl_mh_type = mh->ip6mh_type; | ||
304 | } | ||
305 | fl->proto = nexthdr; | ||
306 | return; | ||
307 | #endif | ||
308 | |||
235 | /* XXX Why are there these headers? */ | 309 | /* XXX Why are there these headers? */ |
236 | case IPPROTO_AH: | 310 | case IPPROTO_AH: |
237 | case IPPROTO_ESP: | 311 | case IPPROTO_ESP: |
@@ -308,6 +382,7 @@ static struct xfrm_policy_afinfo xfrm6_policy_afinfo = { | |||
308 | .family = AF_INET6, | 382 | .family = AF_INET6, |
309 | .dst_ops = &xfrm6_dst_ops, | 383 | .dst_ops = &xfrm6_dst_ops, |
310 | .dst_lookup = xfrm6_dst_lookup, | 384 | .dst_lookup = xfrm6_dst_lookup, |
385 | .get_saddr = xfrm6_get_saddr, | ||
311 | .find_bundle = __xfrm6_find_bundle, | 386 | .find_bundle = __xfrm6_find_bundle, |
312 | .bundle_create = __xfrm6_bundle_create, | 387 | .bundle_create = __xfrm6_bundle_create, |
313 | .decode_session = _decode_session6, | 388 | .decode_session = _decode_session6, |
diff --git a/net/ipv6/xfrm6_state.c b/net/ipv6/xfrm6_state.c index b33296b3f6de..711bfafb2472 100644 --- a/net/ipv6/xfrm6_state.c +++ b/net/ipv6/xfrm6_state.c | |||
@@ -42,102 +42,135 @@ __xfrm6_init_tempsel(struct xfrm_state *x, struct flowi *fl, | |||
42 | memcpy(&x->props.saddr, &tmpl->saddr, sizeof(x->props.saddr)); | 42 | memcpy(&x->props.saddr, &tmpl->saddr, sizeof(x->props.saddr)); |
43 | if (ipv6_addr_any((struct in6_addr*)&x->props.saddr)) | 43 | if (ipv6_addr_any((struct in6_addr*)&x->props.saddr)) |
44 | memcpy(&x->props.saddr, saddr, sizeof(x->props.saddr)); | 44 | memcpy(&x->props.saddr, saddr, sizeof(x->props.saddr)); |
45 | if (tmpl->mode && ipv6_addr_any((struct in6_addr*)&x->props.saddr)) { | ||
46 | struct rt6_info *rt; | ||
47 | struct flowi fl_tunnel = { | ||
48 | .nl_u = { | ||
49 | .ip6_u = { | ||
50 | .daddr = *(struct in6_addr *)daddr, | ||
51 | } | ||
52 | } | ||
53 | }; | ||
54 | if (!xfrm_dst_lookup((struct xfrm_dst **)&rt, | ||
55 | &fl_tunnel, AF_INET6)) { | ||
56 | ipv6_get_saddr(&rt->u.dst, (struct in6_addr *)daddr, | ||
57 | (struct in6_addr *)&x->props.saddr); | ||
58 | dst_release(&rt->u.dst); | ||
59 | } | ||
60 | } | ||
61 | x->props.mode = tmpl->mode; | 45 | x->props.mode = tmpl->mode; |
62 | x->props.reqid = tmpl->reqid; | 46 | x->props.reqid = tmpl->reqid; |
63 | x->props.family = AF_INET6; | 47 | x->props.family = AF_INET6; |
64 | } | 48 | } |
65 | 49 | ||
66 | static struct xfrm_state * | 50 | static int |
67 | __xfrm6_state_lookup(xfrm_address_t *daddr, u32 spi, u8 proto) | 51 | __xfrm6_state_sort(struct xfrm_state **dst, struct xfrm_state **src, int n) |
68 | { | 52 | { |
69 | unsigned h = __xfrm6_spi_hash(daddr, spi, proto); | 53 | int i; |
70 | struct xfrm_state *x; | 54 | int j = 0; |
71 | 55 | ||
72 | list_for_each_entry(x, xfrm6_state_afinfo.state_byspi+h, byspi) { | 56 | /* Rule 1: select IPsec transport except AH */ |
73 | if (x->props.family == AF_INET6 && | 57 | for (i = 0; i < n; i++) { |
74 | spi == x->id.spi && | 58 | if (src[i]->props.mode == XFRM_MODE_TRANSPORT && |
75 | ipv6_addr_equal((struct in6_addr *)daddr, (struct in6_addr *)x->id.daddr.a6) && | 59 | src[i]->id.proto != IPPROTO_AH) { |
76 | proto == x->id.proto) { | 60 | dst[j++] = src[i]; |
77 | xfrm_state_hold(x); | 61 | src[i] = NULL; |
78 | return x; | 62 | } |
63 | } | ||
64 | if (j == n) | ||
65 | goto end; | ||
66 | |||
67 | /* Rule 2: select MIPv6 RO or inbound trigger */ | ||
68 | #ifdef CONFIG_IPV6_MIP6 | ||
69 | for (i = 0; i < n; i++) { | ||
70 | if (src[i] && | ||
71 | (src[i]->props.mode == XFRM_MODE_ROUTEOPTIMIZATION || | ||
72 | src[i]->props.mode == XFRM_MODE_IN_TRIGGER)) { | ||
73 | dst[j++] = src[i]; | ||
74 | src[i] = NULL; | ||
75 | } | ||
76 | } | ||
77 | if (j == n) | ||
78 | goto end; | ||
79 | #endif | ||
80 | |||
81 | /* Rule 3: select IPsec transport AH */ | ||
82 | for (i = 0; i < n; i++) { | ||
83 | if (src[i] && | ||
84 | src[i]->props.mode == XFRM_MODE_TRANSPORT && | ||
85 | src[i]->id.proto == IPPROTO_AH) { | ||
86 | dst[j++] = src[i]; | ||
87 | src[i] = NULL; | ||
79 | } | 88 | } |
80 | } | 89 | } |
81 | return NULL; | 90 | if (j == n) |
91 | goto end; | ||
92 | |||
93 | /* Rule 4: select IPsec tunnel */ | ||
94 | for (i = 0; i < n; i++) { | ||
95 | if (src[i] && | ||
96 | src[i]->props.mode == XFRM_MODE_TUNNEL) { | ||
97 | dst[j++] = src[i]; | ||
98 | src[i] = NULL; | ||
99 | } | ||
100 | } | ||
101 | if (likely(j == n)) | ||
102 | goto end; | ||
103 | |||
104 | /* Final rule */ | ||
105 | for (i = 0; i < n; i++) { | ||
106 | if (src[i]) { | ||
107 | dst[j++] = src[i]; | ||
108 | src[i] = NULL; | ||
109 | } | ||
110 | } | ||
111 | |||
112 | end: | ||
113 | return 0; | ||
82 | } | 114 | } |
83 | 115 | ||
84 | static struct xfrm_state * | 116 | static int |
85 | __xfrm6_find_acq(u8 mode, u32 reqid, u8 proto, | 117 | __xfrm6_tmpl_sort(struct xfrm_tmpl **dst, struct xfrm_tmpl **src, int n) |
86 | xfrm_address_t *daddr, xfrm_address_t *saddr, | ||
87 | int create) | ||
88 | { | 118 | { |
89 | struct xfrm_state *x, *x0; | 119 | int i; |
90 | unsigned h = __xfrm6_dst_hash(daddr); | 120 | int j = 0; |
91 | 121 | ||
92 | x0 = NULL; | 122 | /* Rule 1: select IPsec transport */ |
93 | 123 | for (i = 0; i < n; i++) { | |
94 | list_for_each_entry(x, xfrm6_state_afinfo.state_bydst+h, bydst) { | 124 | if (src[i]->mode == XFRM_MODE_TRANSPORT) { |
95 | if (x->props.family == AF_INET6 && | 125 | dst[j++] = src[i]; |
96 | ipv6_addr_equal((struct in6_addr *)daddr, (struct in6_addr *)x->id.daddr.a6) && | 126 | src[i] = NULL; |
97 | mode == x->props.mode && | 127 | } |
98 | proto == x->id.proto && | ||
99 | ipv6_addr_equal((struct in6_addr *)saddr, (struct in6_addr *)x->props.saddr.a6) && | ||
100 | reqid == x->props.reqid && | ||
101 | x->km.state == XFRM_STATE_ACQ && | ||
102 | !x->id.spi) { | ||
103 | x0 = x; | ||
104 | break; | ||
105 | } | ||
106 | } | 128 | } |
107 | if (!x0 && create && (x0 = xfrm_state_alloc()) != NULL) { | 129 | if (j == n) |
108 | ipv6_addr_copy((struct in6_addr *)x0->sel.daddr.a6, | 130 | goto end; |
109 | (struct in6_addr *)daddr); | 131 | |
110 | ipv6_addr_copy((struct in6_addr *)x0->sel.saddr.a6, | 132 | /* Rule 2: select MIPv6 RO or inbound trigger */ |
111 | (struct in6_addr *)saddr); | 133 | #ifdef CONFIG_IPV6_MIP6 |
112 | x0->sel.prefixlen_d = 128; | 134 | for (i = 0; i < n; i++) { |
113 | x0->sel.prefixlen_s = 128; | 135 | if (src[i] && |
114 | ipv6_addr_copy((struct in6_addr *)x0->props.saddr.a6, | 136 | (src[i]->mode == XFRM_MODE_ROUTEOPTIMIZATION || |
115 | (struct in6_addr *)saddr); | 137 | src[i]->mode == XFRM_MODE_IN_TRIGGER)) { |
116 | x0->km.state = XFRM_STATE_ACQ; | 138 | dst[j++] = src[i]; |
117 | ipv6_addr_copy((struct in6_addr *)x0->id.daddr.a6, | 139 | src[i] = NULL; |
118 | (struct in6_addr *)daddr); | 140 | } |
119 | x0->id.proto = proto; | ||
120 | x0->props.family = AF_INET6; | ||
121 | x0->props.mode = mode; | ||
122 | x0->props.reqid = reqid; | ||
123 | x0->lft.hard_add_expires_seconds = XFRM_ACQ_EXPIRES; | ||
124 | xfrm_state_hold(x0); | ||
125 | x0->timer.expires = jiffies + XFRM_ACQ_EXPIRES*HZ; | ||
126 | add_timer(&x0->timer); | ||
127 | xfrm_state_hold(x0); | ||
128 | list_add_tail(&x0->bydst, xfrm6_state_afinfo.state_bydst+h); | ||
129 | wake_up(&km_waitq); | ||
130 | } | 141 | } |
131 | if (x0) | 142 | if (j == n) |
132 | xfrm_state_hold(x0); | 143 | goto end; |
133 | return x0; | 144 | #endif |
145 | |||
146 | /* Rule 3: select IPsec tunnel */ | ||
147 | for (i = 0; i < n; i++) { | ||
148 | if (src[i] && | ||
149 | src[i]->mode == XFRM_MODE_TUNNEL) { | ||
150 | dst[j++] = src[i]; | ||
151 | src[i] = NULL; | ||
152 | } | ||
153 | } | ||
154 | if (likely(j == n)) | ||
155 | goto end; | ||
156 | |||
157 | /* Final rule */ | ||
158 | for (i = 0; i < n; i++) { | ||
159 | if (src[i]) { | ||
160 | dst[j++] = src[i]; | ||
161 | src[i] = NULL; | ||
162 | } | ||
163 | } | ||
164 | |||
165 | end: | ||
166 | return 0; | ||
134 | } | 167 | } |
135 | 168 | ||
136 | static struct xfrm_state_afinfo xfrm6_state_afinfo = { | 169 | static struct xfrm_state_afinfo xfrm6_state_afinfo = { |
137 | .family = AF_INET6, | 170 | .family = AF_INET6, |
138 | .init_tempsel = __xfrm6_init_tempsel, | 171 | .init_tempsel = __xfrm6_init_tempsel, |
139 | .state_lookup = __xfrm6_state_lookup, | 172 | .tmpl_sort = __xfrm6_tmpl_sort, |
140 | .find_acq = __xfrm6_find_acq, | 173 | .state_sort = __xfrm6_state_sort, |
141 | }; | 174 | }; |
142 | 175 | ||
143 | void __init xfrm6_state_init(void) | 176 | void __init xfrm6_state_init(void) |
diff --git a/net/ipv6/xfrm6_tunnel.c b/net/ipv6/xfrm6_tunnel.c index c8f9369c2a87..59685ee8f700 100644 --- a/net/ipv6/xfrm6_tunnel.c +++ b/net/ipv6/xfrm6_tunnel.c | |||
@@ -307,7 +307,7 @@ static int xfrm6_tunnel_err(struct sk_buff *skb, struct inet6_skb_parm *opt, | |||
307 | 307 | ||
308 | static int xfrm6_tunnel_init_state(struct xfrm_state *x) | 308 | static int xfrm6_tunnel_init_state(struct xfrm_state *x) |
309 | { | 309 | { |
310 | if (!x->props.mode) | 310 | if (x->props.mode != XFRM_MODE_TUNNEL) |
311 | return -EINVAL; | 311 | return -EINVAL; |
312 | 312 | ||
313 | if (x->encap) | 313 | if (x->encap) |
diff --git a/net/key/af_key.c b/net/key/af_key.c index 3a95b2ee4690..83b443ddc72f 100644 --- a/net/key/af_key.c +++ b/net/key/af_key.c | |||
@@ -1731,7 +1731,8 @@ static u32 gen_reqid(void) | |||
1731 | ++reqid; | 1731 | ++reqid; |
1732 | if (reqid == 0) | 1732 | if (reqid == 0) |
1733 | reqid = IPSEC_MANUAL_REQID_MAX+1; | 1733 | reqid = IPSEC_MANUAL_REQID_MAX+1; |
1734 | if (xfrm_policy_walk(check_reqid, (void*)&reqid) != -EEXIST) | 1734 | if (xfrm_policy_walk(XFRM_POLICY_TYPE_MAIN, check_reqid, |
1735 | (void*)&reqid) != -EEXIST) | ||
1735 | return reqid; | 1736 | return reqid; |
1736 | } while (reqid != start); | 1737 | } while (reqid != start); |
1737 | return 0; | 1738 | return 0; |
@@ -1765,7 +1766,7 @@ parse_ipsecrequest(struct xfrm_policy *xp, struct sadb_x_ipsecrequest *rq) | |||
1765 | } | 1766 | } |
1766 | 1767 | ||
1767 | /* addresses present only in tunnel mode */ | 1768 | /* addresses present only in tunnel mode */ |
1768 | if (t->mode) { | 1769 | if (t->mode == XFRM_MODE_TUNNEL) { |
1769 | switch (xp->family) { | 1770 | switch (xp->family) { |
1770 | case AF_INET: | 1771 | case AF_INET: |
1771 | sin = (void*)(rq+1); | 1772 | sin = (void*)(rq+1); |
@@ -1997,7 +1998,7 @@ static void pfkey_xfrm_policy2msg(struct sk_buff *skb, struct xfrm_policy *xp, i | |||
1997 | int req_size; | 1998 | int req_size; |
1998 | 1999 | ||
1999 | req_size = sizeof(struct sadb_x_ipsecrequest); | 2000 | req_size = sizeof(struct sadb_x_ipsecrequest); |
2000 | if (t->mode) | 2001 | if (t->mode == XFRM_MODE_TUNNEL) |
2001 | req_size += 2*socklen; | 2002 | req_size += 2*socklen; |
2002 | else | 2003 | else |
2003 | size -= 2*socklen; | 2004 | size -= 2*socklen; |
@@ -2013,7 +2014,7 @@ static void pfkey_xfrm_policy2msg(struct sk_buff *skb, struct xfrm_policy *xp, i | |||
2013 | if (t->optional) | 2014 | if (t->optional) |
2014 | rq->sadb_x_ipsecrequest_level = IPSEC_LEVEL_USE; | 2015 | rq->sadb_x_ipsecrequest_level = IPSEC_LEVEL_USE; |
2015 | rq->sadb_x_ipsecrequest_reqid = t->reqid; | 2016 | rq->sadb_x_ipsecrequest_reqid = t->reqid; |
2016 | if (t->mode) { | 2017 | if (t->mode == XFRM_MODE_TUNNEL) { |
2017 | switch (xp->family) { | 2018 | switch (xp->family) { |
2018 | case AF_INET: | 2019 | case AF_INET: |
2019 | sin = (void*)(rq+1); | 2020 | sin = (void*)(rq+1); |
@@ -2268,7 +2269,8 @@ static int pfkey_spddelete(struct sock *sk, struct sk_buff *skb, struct sadb_msg | |||
2268 | return err; | 2269 | return err; |
2269 | } | 2270 | } |
2270 | 2271 | ||
2271 | xp = xfrm_policy_bysel_ctx(pol->sadb_x_policy_dir-1, &sel, tmp.security, 1); | 2272 | xp = xfrm_policy_bysel_ctx(XFRM_POLICY_TYPE_MAIN, pol->sadb_x_policy_dir-1, |
2273 | &sel, tmp.security, 1); | ||
2272 | security_xfrm_policy_free(&tmp); | 2274 | security_xfrm_policy_free(&tmp); |
2273 | if (xp == NULL) | 2275 | if (xp == NULL) |
2274 | return -ENOENT; | 2276 | return -ENOENT; |
@@ -2330,7 +2332,7 @@ static int pfkey_spdget(struct sock *sk, struct sk_buff *skb, struct sadb_msg *h | |||
2330 | if (dir >= XFRM_POLICY_MAX) | 2332 | if (dir >= XFRM_POLICY_MAX) |
2331 | return -EINVAL; | 2333 | return -EINVAL; |
2332 | 2334 | ||
2333 | xp = xfrm_policy_byid(dir, pol->sadb_x_policy_id, | 2335 | xp = xfrm_policy_byid(XFRM_POLICY_TYPE_MAIN, dir, pol->sadb_x_policy_id, |
2334 | hdr->sadb_msg_type == SADB_X_SPDDELETE2); | 2336 | hdr->sadb_msg_type == SADB_X_SPDDELETE2); |
2335 | if (xp == NULL) | 2337 | if (xp == NULL) |
2336 | return -ENOENT; | 2338 | return -ENOENT; |
@@ -2378,7 +2380,7 @@ static int pfkey_spddump(struct sock *sk, struct sk_buff *skb, struct sadb_msg * | |||
2378 | { | 2380 | { |
2379 | struct pfkey_dump_data data = { .skb = skb, .hdr = hdr, .sk = sk }; | 2381 | struct pfkey_dump_data data = { .skb = skb, .hdr = hdr, .sk = sk }; |
2380 | 2382 | ||
2381 | return xfrm_policy_walk(dump_sp, &data); | 2383 | return xfrm_policy_walk(XFRM_POLICY_TYPE_MAIN, dump_sp, &data); |
2382 | } | 2384 | } |
2383 | 2385 | ||
2384 | static int key_notify_policy_flush(struct km_event *c) | 2386 | static int key_notify_policy_flush(struct km_event *c) |
@@ -2405,7 +2407,8 @@ static int pfkey_spdflush(struct sock *sk, struct sk_buff *skb, struct sadb_msg | |||
2405 | { | 2407 | { |
2406 | struct km_event c; | 2408 | struct km_event c; |
2407 | 2409 | ||
2408 | xfrm_policy_flush(); | 2410 | xfrm_policy_flush(XFRM_POLICY_TYPE_MAIN); |
2411 | c.data.type = XFRM_POLICY_TYPE_MAIN; | ||
2409 | c.event = XFRM_MSG_FLUSHPOLICY; | 2412 | c.event = XFRM_MSG_FLUSHPOLICY; |
2410 | c.pid = hdr->sadb_msg_pid; | 2413 | c.pid = hdr->sadb_msg_pid; |
2411 | c.seq = hdr->sadb_msg_seq; | 2414 | c.seq = hdr->sadb_msg_seq; |
@@ -2667,6 +2670,9 @@ static int pfkey_send_notify(struct xfrm_state *x, struct km_event *c) | |||
2667 | 2670 | ||
2668 | static int pfkey_send_policy_notify(struct xfrm_policy *xp, int dir, struct km_event *c) | 2671 | static int pfkey_send_policy_notify(struct xfrm_policy *xp, int dir, struct km_event *c) |
2669 | { | 2672 | { |
2673 | if (xp && xp->type != XFRM_POLICY_TYPE_MAIN) | ||
2674 | return 0; | ||
2675 | |||
2670 | switch (c->event) { | 2676 | switch (c->event) { |
2671 | case XFRM_MSG_POLEXPIRE: | 2677 | case XFRM_MSG_POLEXPIRE: |
2672 | return key_notify_policy_expire(xp, c); | 2678 | return key_notify_policy_expire(xp, c); |
@@ -2675,6 +2681,8 @@ static int pfkey_send_policy_notify(struct xfrm_policy *xp, int dir, struct km_e | |||
2675 | case XFRM_MSG_UPDPOLICY: | 2681 | case XFRM_MSG_UPDPOLICY: |
2676 | return key_notify_policy(xp, dir, c); | 2682 | return key_notify_policy(xp, dir, c); |
2677 | case XFRM_MSG_FLUSHPOLICY: | 2683 | case XFRM_MSG_FLUSHPOLICY: |
2684 | if (c->data.type != XFRM_POLICY_TYPE_MAIN) | ||
2685 | break; | ||
2678 | return key_notify_policy_flush(c); | 2686 | return key_notify_policy_flush(c); |
2679 | default: | 2687 | default: |
2680 | printk("pfkey: Unknown policy event %d\n", c->event); | 2688 | printk("pfkey: Unknown policy event %d\n", c->event); |
@@ -2708,6 +2716,9 @@ static int pfkey_send_acquire(struct xfrm_state *x, struct xfrm_tmpl *t, struct | |||
2708 | #endif | 2716 | #endif |
2709 | int sockaddr_size; | 2717 | int sockaddr_size; |
2710 | int size; | 2718 | int size; |
2719 | struct sadb_x_sec_ctx *sec_ctx; | ||
2720 | struct xfrm_sec_ctx *xfrm_ctx; | ||
2721 | int ctx_size = 0; | ||
2711 | 2722 | ||
2712 | sockaddr_size = pfkey_sockaddr_size(x->props.family); | 2723 | sockaddr_size = pfkey_sockaddr_size(x->props.family); |
2713 | if (!sockaddr_size) | 2724 | if (!sockaddr_size) |
@@ -2723,6 +2734,11 @@ static int pfkey_send_acquire(struct xfrm_state *x, struct xfrm_tmpl *t, struct | |||
2723 | else if (x->id.proto == IPPROTO_ESP) | 2734 | else if (x->id.proto == IPPROTO_ESP) |
2724 | size += count_esp_combs(t); | 2735 | size += count_esp_combs(t); |
2725 | 2736 | ||
2737 | if ((xfrm_ctx = x->security)) { | ||
2738 | ctx_size = PFKEY_ALIGN8(xfrm_ctx->ctx_len); | ||
2739 | size += sizeof(struct sadb_x_sec_ctx) + ctx_size; | ||
2740 | } | ||
2741 | |||
2726 | skb = alloc_skb(size + 16, GFP_ATOMIC); | 2742 | skb = alloc_skb(size + 16, GFP_ATOMIC); |
2727 | if (skb == NULL) | 2743 | if (skb == NULL) |
2728 | return -ENOMEM; | 2744 | return -ENOMEM; |
@@ -2818,17 +2834,31 @@ static int pfkey_send_acquire(struct xfrm_state *x, struct xfrm_tmpl *t, struct | |||
2818 | else if (x->id.proto == IPPROTO_ESP) | 2834 | else if (x->id.proto == IPPROTO_ESP) |
2819 | dump_esp_combs(skb, t); | 2835 | dump_esp_combs(skb, t); |
2820 | 2836 | ||
2837 | /* security context */ | ||
2838 | if (xfrm_ctx) { | ||
2839 | sec_ctx = (struct sadb_x_sec_ctx *) skb_put(skb, | ||
2840 | sizeof(struct sadb_x_sec_ctx) + ctx_size); | ||
2841 | sec_ctx->sadb_x_sec_len = | ||
2842 | (sizeof(struct sadb_x_sec_ctx) + ctx_size) / sizeof(uint64_t); | ||
2843 | sec_ctx->sadb_x_sec_exttype = SADB_X_EXT_SEC_CTX; | ||
2844 | sec_ctx->sadb_x_ctx_doi = xfrm_ctx->ctx_doi; | ||
2845 | sec_ctx->sadb_x_ctx_alg = xfrm_ctx->ctx_alg; | ||
2846 | sec_ctx->sadb_x_ctx_len = xfrm_ctx->ctx_len; | ||
2847 | memcpy(sec_ctx + 1, xfrm_ctx->ctx_str, | ||
2848 | xfrm_ctx->ctx_len); | ||
2849 | } | ||
2850 | |||
2821 | return pfkey_broadcast(skb, GFP_ATOMIC, BROADCAST_REGISTERED, NULL); | 2851 | return pfkey_broadcast(skb, GFP_ATOMIC, BROADCAST_REGISTERED, NULL); |
2822 | } | 2852 | } |
2823 | 2853 | ||
2824 | static struct xfrm_policy *pfkey_compile_policy(u16 family, int opt, | 2854 | static struct xfrm_policy *pfkey_compile_policy(struct sock *sk, int opt, |
2825 | u8 *data, int len, int *dir) | 2855 | u8 *data, int len, int *dir) |
2826 | { | 2856 | { |
2827 | struct xfrm_policy *xp; | 2857 | struct xfrm_policy *xp; |
2828 | struct sadb_x_policy *pol = (struct sadb_x_policy*)data; | 2858 | struct sadb_x_policy *pol = (struct sadb_x_policy*)data; |
2829 | struct sadb_x_sec_ctx *sec_ctx; | 2859 | struct sadb_x_sec_ctx *sec_ctx; |
2830 | 2860 | ||
2831 | switch (family) { | 2861 | switch (sk->sk_family) { |
2832 | case AF_INET: | 2862 | case AF_INET: |
2833 | if (opt != IP_IPSEC_POLICY) { | 2863 | if (opt != IP_IPSEC_POLICY) { |
2834 | *dir = -EOPNOTSUPP; | 2864 | *dir = -EOPNOTSUPP; |
@@ -2869,7 +2899,7 @@ static struct xfrm_policy *pfkey_compile_policy(u16 family, int opt, | |||
2869 | xp->lft.hard_byte_limit = XFRM_INF; | 2899 | xp->lft.hard_byte_limit = XFRM_INF; |
2870 | xp->lft.soft_packet_limit = XFRM_INF; | 2900 | xp->lft.soft_packet_limit = XFRM_INF; |
2871 | xp->lft.hard_packet_limit = XFRM_INF; | 2901 | xp->lft.hard_packet_limit = XFRM_INF; |
2872 | xp->family = family; | 2902 | xp->family = sk->sk_family; |
2873 | 2903 | ||
2874 | xp->xfrm_nr = 0; | 2904 | xp->xfrm_nr = 0; |
2875 | if (pol->sadb_x_policy_type == IPSEC_POLICY_IPSEC && | 2905 | if (pol->sadb_x_policy_type == IPSEC_POLICY_IPSEC && |
@@ -2885,8 +2915,10 @@ static struct xfrm_policy *pfkey_compile_policy(u16 family, int opt, | |||
2885 | p += pol->sadb_x_policy_len*8; | 2915 | p += pol->sadb_x_policy_len*8; |
2886 | sec_ctx = (struct sadb_x_sec_ctx *)p; | 2916 | sec_ctx = (struct sadb_x_sec_ctx *)p; |
2887 | if (len < pol->sadb_x_policy_len*8 + | 2917 | if (len < pol->sadb_x_policy_len*8 + |
2888 | sec_ctx->sadb_x_sec_len) | 2918 | sec_ctx->sadb_x_sec_len) { |
2919 | *dir = -EINVAL; | ||
2889 | goto out; | 2920 | goto out; |
2921 | } | ||
2890 | if ((*dir = verify_sec_ctx_len(p))) | 2922 | if ((*dir = verify_sec_ctx_len(p))) |
2891 | goto out; | 2923 | goto out; |
2892 | uctx = pfkey_sadb2xfrm_user_sec_ctx(sec_ctx); | 2924 | uctx = pfkey_sadb2xfrm_user_sec_ctx(sec_ctx); |
@@ -2896,6 +2928,11 @@ static struct xfrm_policy *pfkey_compile_policy(u16 family, int opt, | |||
2896 | if (*dir) | 2928 | if (*dir) |
2897 | goto out; | 2929 | goto out; |
2898 | } | 2930 | } |
2931 | else { | ||
2932 | *dir = security_xfrm_sock_policy_alloc(xp, sk); | ||
2933 | if (*dir) | ||
2934 | goto out; | ||
2935 | } | ||
2899 | 2936 | ||
2900 | *dir = pol->sadb_x_policy_dir-1; | 2937 | *dir = pol->sadb_x_policy_dir-1; |
2901 | return xp; | 2938 | return xp; |
diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig index a9894ddfd72a..0a28d2c5c44f 100644 --- a/net/netfilter/Kconfig +++ b/net/netfilter/Kconfig | |||
@@ -148,6 +148,18 @@ config NETFILTER_XT_TARGET_CONNMARK | |||
148 | <file:Documentation/modules.txt>. The module will be called | 148 | <file:Documentation/modules.txt>. The module will be called |
149 | ipt_CONNMARK.o. If unsure, say `N'. | 149 | ipt_CONNMARK.o. If unsure, say `N'. |
150 | 150 | ||
151 | config NETFILTER_XT_TARGET_DSCP | ||
152 | tristate '"DSCP" target support' | ||
153 | depends on NETFILTER_XTABLES | ||
154 | depends on IP_NF_MANGLE || IP6_NF_MANGLE | ||
155 | help | ||
156 | This option adds a `DSCP' target, which allows you to manipulate | ||
157 | the IPv4/IPv6 header DSCP field (differentiated services codepoint). | ||
158 | |||
159 | The DSCP field can have any value between 0x0 and 0x3f inclusive. | ||
160 | |||
161 | To compile it as a module, choose M here. If unsure, say N. | ||
162 | |||
151 | config NETFILTER_XT_TARGET_MARK | 163 | config NETFILTER_XT_TARGET_MARK |
152 | tristate '"MARK" target support' | 164 | tristate '"MARK" target support' |
153 | depends on NETFILTER_XTABLES | 165 | depends on NETFILTER_XTABLES |
@@ -263,6 +275,17 @@ config NETFILTER_XT_MATCH_DCCP | |||
263 | If you want to compile it as a module, say M here and read | 275 | If you want to compile it as a module, say M here and read |
264 | <file:Documentation/modules.txt>. If unsure, say `N'. | 276 | <file:Documentation/modules.txt>. If unsure, say `N'. |
265 | 277 | ||
278 | config NETFILTER_XT_MATCH_DSCP | ||
279 | tristate '"DSCP" match support' | ||
280 | depends on NETFILTER_XTABLES | ||
281 | help | ||
282 | This option adds a `DSCP' match, which allows you to match against | ||
283 | the IPv4/IPv6 header DSCP field (differentiated services codepoint). | ||
284 | |||
285 | The DSCP field can have any value between 0x0 and 0x3f inclusive. | ||
286 | |||
287 | To compile it as a module, choose M here. If unsure, say N. | ||
288 | |||
266 | config NETFILTER_XT_MATCH_ESP | 289 | config NETFILTER_XT_MATCH_ESP |
267 | tristate '"ESP" match support' | 290 | tristate '"ESP" match support' |
268 | depends on NETFILTER_XTABLES | 291 | depends on NETFILTER_XTABLES |
diff --git a/net/netfilter/Makefile b/net/netfilter/Makefile index 6fa4b7580458..a74be492fd0a 100644 --- a/net/netfilter/Makefile +++ b/net/netfilter/Makefile | |||
@@ -25,6 +25,7 @@ obj-$(CONFIG_NETFILTER_XTABLES) += x_tables.o xt_tcpudp.o | |||
25 | # targets | 25 | # targets |
26 | obj-$(CONFIG_NETFILTER_XT_TARGET_CLASSIFY) += xt_CLASSIFY.o | 26 | obj-$(CONFIG_NETFILTER_XT_TARGET_CLASSIFY) += xt_CLASSIFY.o |
27 | obj-$(CONFIG_NETFILTER_XT_TARGET_CONNMARK) += xt_CONNMARK.o | 27 | obj-$(CONFIG_NETFILTER_XT_TARGET_CONNMARK) += xt_CONNMARK.o |
28 | obj-$(CONFIG_NETFILTER_XT_TARGET_DSCP) += xt_DSCP.o | ||
28 | obj-$(CONFIG_NETFILTER_XT_TARGET_MARK) += xt_MARK.o | 29 | obj-$(CONFIG_NETFILTER_XT_TARGET_MARK) += xt_MARK.o |
29 | obj-$(CONFIG_NETFILTER_XT_TARGET_NFQUEUE) += xt_NFQUEUE.o | 30 | obj-$(CONFIG_NETFILTER_XT_TARGET_NFQUEUE) += xt_NFQUEUE.o |
30 | obj-$(CONFIG_NETFILTER_XT_TARGET_NOTRACK) += xt_NOTRACK.o | 31 | obj-$(CONFIG_NETFILTER_XT_TARGET_NOTRACK) += xt_NOTRACK.o |
@@ -37,6 +38,7 @@ obj-$(CONFIG_NETFILTER_XT_MATCH_CONNBYTES) += xt_connbytes.o | |||
37 | obj-$(CONFIG_NETFILTER_XT_MATCH_CONNMARK) += xt_connmark.o | 38 | obj-$(CONFIG_NETFILTER_XT_MATCH_CONNMARK) += xt_connmark.o |
38 | obj-$(CONFIG_NETFILTER_XT_MATCH_CONNTRACK) += xt_conntrack.o | 39 | obj-$(CONFIG_NETFILTER_XT_MATCH_CONNTRACK) += xt_conntrack.o |
39 | obj-$(CONFIG_NETFILTER_XT_MATCH_DCCP) += xt_dccp.o | 40 | obj-$(CONFIG_NETFILTER_XT_MATCH_DCCP) += xt_dccp.o |
41 | obj-$(CONFIG_NETFILTER_XT_MATCH_DSCP) += xt_dscp.o | ||
40 | obj-$(CONFIG_NETFILTER_XT_MATCH_ESP) += xt_esp.o | 42 | obj-$(CONFIG_NETFILTER_XT_MATCH_ESP) += xt_esp.o |
41 | obj-$(CONFIG_NETFILTER_XT_MATCH_HELPER) += xt_helper.o | 43 | obj-$(CONFIG_NETFILTER_XT_MATCH_HELPER) += xt_helper.o |
42 | obj-$(CONFIG_NETFILTER_XT_MATCH_LENGTH) += xt_length.o | 44 | obj-$(CONFIG_NETFILTER_XT_MATCH_LENGTH) += xt_length.o |
diff --git a/net/netfilter/core.c b/net/netfilter/core.c index 5d29d5e23624..d80b935b3a92 100644 --- a/net/netfilter/core.c +++ b/net/netfilter/core.c | |||
@@ -182,7 +182,7 @@ next_hook: | |||
182 | ret = -EPERM; | 182 | ret = -EPERM; |
183 | } else if ((verdict & NF_VERDICT_MASK) == NF_QUEUE) { | 183 | } else if ((verdict & NF_VERDICT_MASK) == NF_QUEUE) { |
184 | NFDEBUG("nf_hook: Verdict = QUEUE.\n"); | 184 | NFDEBUG("nf_hook: Verdict = QUEUE.\n"); |
185 | if (!nf_queue(pskb, elem, pf, hook, indev, outdev, okfn, | 185 | if (!nf_queue(*pskb, elem, pf, hook, indev, outdev, okfn, |
186 | verdict >> NF_VERDICT_BITS)) | 186 | verdict >> NF_VERDICT_BITS)) |
187 | goto next_hook; | 187 | goto next_hook; |
188 | } | 188 | } |
@@ -222,6 +222,28 @@ copy_skb: | |||
222 | } | 222 | } |
223 | EXPORT_SYMBOL(skb_make_writable); | 223 | EXPORT_SYMBOL(skb_make_writable); |
224 | 224 | ||
225 | u_int16_t nf_csum_update(u_int32_t oldval, u_int32_t newval, u_int32_t csum) | ||
226 | { | ||
227 | u_int32_t diff[] = { oldval, newval }; | ||
228 | |||
229 | return csum_fold(csum_partial((char *)diff, sizeof(diff), ~csum)); | ||
230 | } | ||
231 | EXPORT_SYMBOL(nf_csum_update); | ||
232 | |||
233 | u_int16_t nf_proto_csum_update(struct sk_buff *skb, | ||
234 | u_int32_t oldval, u_int32_t newval, | ||
235 | u_int16_t csum, int pseudohdr) | ||
236 | { | ||
237 | if (skb->ip_summed != CHECKSUM_PARTIAL) { | ||
238 | csum = nf_csum_update(oldval, newval, csum); | ||
239 | if (skb->ip_summed == CHECKSUM_COMPLETE && pseudohdr) | ||
240 | skb->csum = nf_csum_update(oldval, newval, skb->csum); | ||
241 | } else if (pseudohdr) | ||
242 | csum = ~nf_csum_update(oldval, newval, ~csum); | ||
243 | |||
244 | return csum; | ||
245 | } | ||
246 | EXPORT_SYMBOL(nf_proto_csum_update); | ||
225 | 247 | ||
226 | /* This does not belong here, but locally generated errors need it if connection | 248 | /* This does not belong here, but locally generated errors need it if connection |
227 | tracking in use: without this, connection may not be in hash table, and hence | 249 | tracking in use: without this, connection may not be in hash table, and hence |
diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c index 8f2261965a68..093b3ddc513c 100644 --- a/net/netfilter/nf_conntrack_core.c +++ b/net/netfilter/nf_conntrack_core.c | |||
@@ -57,7 +57,6 @@ | |||
57 | #include <net/netfilter/nf_conntrack_protocol.h> | 57 | #include <net/netfilter/nf_conntrack_protocol.h> |
58 | #include <net/netfilter/nf_conntrack_helper.h> | 58 | #include <net/netfilter/nf_conntrack_helper.h> |
59 | #include <net/netfilter/nf_conntrack_core.h> | 59 | #include <net/netfilter/nf_conntrack_core.h> |
60 | #include <linux/netfilter_ipv4/listhelp.h> | ||
61 | 60 | ||
62 | #define NF_CONNTRACK_VERSION "0.5.0" | 61 | #define NF_CONNTRACK_VERSION "0.5.0" |
63 | 62 | ||
@@ -74,17 +73,17 @@ atomic_t nf_conntrack_count = ATOMIC_INIT(0); | |||
74 | 73 | ||
75 | void (*nf_conntrack_destroyed)(struct nf_conn *conntrack) = NULL; | 74 | void (*nf_conntrack_destroyed)(struct nf_conn *conntrack) = NULL; |
76 | LIST_HEAD(nf_conntrack_expect_list); | 75 | LIST_HEAD(nf_conntrack_expect_list); |
77 | struct nf_conntrack_protocol **nf_ct_protos[PF_MAX]; | 76 | struct nf_conntrack_protocol **nf_ct_protos[PF_MAX] __read_mostly; |
78 | struct nf_conntrack_l3proto *nf_ct_l3protos[PF_MAX]; | 77 | struct nf_conntrack_l3proto *nf_ct_l3protos[PF_MAX] __read_mostly; |
79 | static LIST_HEAD(helpers); | 78 | static LIST_HEAD(helpers); |
80 | unsigned int nf_conntrack_htable_size = 0; | 79 | unsigned int nf_conntrack_htable_size __read_mostly = 0; |
81 | int nf_conntrack_max; | 80 | int nf_conntrack_max __read_mostly; |
82 | struct list_head *nf_conntrack_hash; | 81 | struct list_head *nf_conntrack_hash __read_mostly; |
83 | static kmem_cache_t *nf_conntrack_expect_cachep; | 82 | static kmem_cache_t *nf_conntrack_expect_cachep __read_mostly; |
84 | struct nf_conn nf_conntrack_untracked; | 83 | struct nf_conn nf_conntrack_untracked; |
85 | unsigned int nf_ct_log_invalid; | 84 | unsigned int nf_ct_log_invalid __read_mostly; |
86 | static LIST_HEAD(unconfirmed); | 85 | static LIST_HEAD(unconfirmed); |
87 | static int nf_conntrack_vmalloc; | 86 | static int nf_conntrack_vmalloc __read_mostly; |
88 | 87 | ||
89 | static unsigned int nf_conntrack_next_id; | 88 | static unsigned int nf_conntrack_next_id; |
90 | static unsigned int nf_conntrack_expect_next_id; | 89 | static unsigned int nf_conntrack_expect_next_id; |
@@ -539,15 +538,10 @@ void nf_ct_remove_expectations(struct nf_conn *ct) | |||
539 | static void | 538 | static void |
540 | clean_from_lists(struct nf_conn *ct) | 539 | clean_from_lists(struct nf_conn *ct) |
541 | { | 540 | { |
542 | unsigned int ho, hr; | ||
543 | |||
544 | DEBUGP("clean_from_lists(%p)\n", ct); | 541 | DEBUGP("clean_from_lists(%p)\n", ct); |
545 | ASSERT_WRITE_LOCK(&nf_conntrack_lock); | 542 | ASSERT_WRITE_LOCK(&nf_conntrack_lock); |
546 | 543 | list_del(&ct->tuplehash[IP_CT_DIR_ORIGINAL].list); | |
547 | ho = hash_conntrack(&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple); | 544 | list_del(&ct->tuplehash[IP_CT_DIR_REPLY].list); |
548 | hr = hash_conntrack(&ct->tuplehash[IP_CT_DIR_REPLY].tuple); | ||
549 | LIST_DELETE(&nf_conntrack_hash[ho], &ct->tuplehash[IP_CT_DIR_ORIGINAL]); | ||
550 | LIST_DELETE(&nf_conntrack_hash[hr], &ct->tuplehash[IP_CT_DIR_REPLY]); | ||
551 | 545 | ||
552 | /* Destroy all pending expectations */ | 546 | /* Destroy all pending expectations */ |
553 | nf_ct_remove_expectations(ct); | 547 | nf_ct_remove_expectations(ct); |
@@ -617,16 +611,6 @@ static void death_by_timeout(unsigned long ul_conntrack) | |||
617 | nf_ct_put(ct); | 611 | nf_ct_put(ct); |
618 | } | 612 | } |
619 | 613 | ||
620 | static inline int | ||
621 | conntrack_tuple_cmp(const struct nf_conntrack_tuple_hash *i, | ||
622 | const struct nf_conntrack_tuple *tuple, | ||
623 | const struct nf_conn *ignored_conntrack) | ||
624 | { | ||
625 | ASSERT_READ_LOCK(&nf_conntrack_lock); | ||
626 | return nf_ct_tuplehash_to_ctrack(i) != ignored_conntrack | ||
627 | && nf_ct_tuple_equal(tuple, &i->tuple); | ||
628 | } | ||
629 | |||
630 | struct nf_conntrack_tuple_hash * | 614 | struct nf_conntrack_tuple_hash * |
631 | __nf_conntrack_find(const struct nf_conntrack_tuple *tuple, | 615 | __nf_conntrack_find(const struct nf_conntrack_tuple *tuple, |
632 | const struct nf_conn *ignored_conntrack) | 616 | const struct nf_conn *ignored_conntrack) |
@@ -636,7 +620,8 @@ __nf_conntrack_find(const struct nf_conntrack_tuple *tuple, | |||
636 | 620 | ||
637 | ASSERT_READ_LOCK(&nf_conntrack_lock); | 621 | ASSERT_READ_LOCK(&nf_conntrack_lock); |
638 | list_for_each_entry(h, &nf_conntrack_hash[hash], list) { | 622 | list_for_each_entry(h, &nf_conntrack_hash[hash], list) { |
639 | if (conntrack_tuple_cmp(h, tuple, ignored_conntrack)) { | 623 | if (nf_ct_tuplehash_to_ctrack(h) != ignored_conntrack && |
624 | nf_ct_tuple_equal(tuple, &h->tuple)) { | ||
640 | NF_CT_STAT_INC(found); | 625 | NF_CT_STAT_INC(found); |
641 | return h; | 626 | return h; |
642 | } | 627 | } |
@@ -667,10 +652,10 @@ static void __nf_conntrack_hash_insert(struct nf_conn *ct, | |||
667 | unsigned int repl_hash) | 652 | unsigned int repl_hash) |
668 | { | 653 | { |
669 | ct->id = ++nf_conntrack_next_id; | 654 | ct->id = ++nf_conntrack_next_id; |
670 | list_prepend(&nf_conntrack_hash[hash], | 655 | list_add(&ct->tuplehash[IP_CT_DIR_ORIGINAL].list, |
671 | &ct->tuplehash[IP_CT_DIR_ORIGINAL].list); | 656 | &nf_conntrack_hash[hash]); |
672 | list_prepend(&nf_conntrack_hash[repl_hash], | 657 | list_add(&ct->tuplehash[IP_CT_DIR_REPLY].list, |
673 | &ct->tuplehash[IP_CT_DIR_REPLY].list); | 658 | &nf_conntrack_hash[repl_hash]); |
674 | } | 659 | } |
675 | 660 | ||
676 | void nf_conntrack_hash_insert(struct nf_conn *ct) | 661 | void nf_conntrack_hash_insert(struct nf_conn *ct) |
@@ -690,7 +675,9 @@ int | |||
690 | __nf_conntrack_confirm(struct sk_buff **pskb) | 675 | __nf_conntrack_confirm(struct sk_buff **pskb) |
691 | { | 676 | { |
692 | unsigned int hash, repl_hash; | 677 | unsigned int hash, repl_hash; |
678 | struct nf_conntrack_tuple_hash *h; | ||
693 | struct nf_conn *ct; | 679 | struct nf_conn *ct; |
680 | struct nf_conn_help *help; | ||
694 | enum ip_conntrack_info ctinfo; | 681 | enum ip_conntrack_info ctinfo; |
695 | 682 | ||
696 | ct = nf_ct_get(*pskb, &ctinfo); | 683 | ct = nf_ct_get(*pskb, &ctinfo); |
@@ -720,41 +707,41 @@ __nf_conntrack_confirm(struct sk_buff **pskb) | |||
720 | /* See if there's one in the list already, including reverse: | 707 | /* See if there's one in the list already, including reverse: |
721 | NAT could have grabbed it without realizing, since we're | 708 | NAT could have grabbed it without realizing, since we're |
722 | not in the hash. If there is, we lost race. */ | 709 | not in the hash. If there is, we lost race. */ |
723 | if (!LIST_FIND(&nf_conntrack_hash[hash], | 710 | list_for_each_entry(h, &nf_conntrack_hash[hash], list) |
724 | conntrack_tuple_cmp, | 711 | if (nf_ct_tuple_equal(&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple, |
725 | struct nf_conntrack_tuple_hash *, | 712 | &h->tuple)) |
726 | &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple, NULL) | 713 | goto out; |
727 | && !LIST_FIND(&nf_conntrack_hash[repl_hash], | 714 | list_for_each_entry(h, &nf_conntrack_hash[repl_hash], list) |
728 | conntrack_tuple_cmp, | 715 | if (nf_ct_tuple_equal(&ct->tuplehash[IP_CT_DIR_REPLY].tuple, |
729 | struct nf_conntrack_tuple_hash *, | 716 | &h->tuple)) |
730 | &ct->tuplehash[IP_CT_DIR_REPLY].tuple, NULL)) { | 717 | goto out; |
731 | struct nf_conn_help *help; | ||
732 | /* Remove from unconfirmed list */ | ||
733 | list_del(&ct->tuplehash[IP_CT_DIR_ORIGINAL].list); | ||
734 | 718 | ||
735 | __nf_conntrack_hash_insert(ct, hash, repl_hash); | 719 | /* Remove from unconfirmed list */ |
736 | /* Timer relative to confirmation time, not original | 720 | list_del(&ct->tuplehash[IP_CT_DIR_ORIGINAL].list); |
737 | setting time, otherwise we'd get timer wrap in | 721 | |
738 | weird delay cases. */ | 722 | __nf_conntrack_hash_insert(ct, hash, repl_hash); |
739 | ct->timeout.expires += jiffies; | 723 | /* Timer relative to confirmation time, not original |
740 | add_timer(&ct->timeout); | 724 | setting time, otherwise we'd get timer wrap in |
741 | atomic_inc(&ct->ct_general.use); | 725 | weird delay cases. */ |
742 | set_bit(IPS_CONFIRMED_BIT, &ct->status); | 726 | ct->timeout.expires += jiffies; |
743 | NF_CT_STAT_INC(insert); | 727 | add_timer(&ct->timeout); |
744 | write_unlock_bh(&nf_conntrack_lock); | 728 | atomic_inc(&ct->ct_general.use); |
745 | help = nfct_help(ct); | 729 | set_bit(IPS_CONFIRMED_BIT, &ct->status); |
746 | if (help && help->helper) | 730 | NF_CT_STAT_INC(insert); |
747 | nf_conntrack_event_cache(IPCT_HELPER, *pskb); | 731 | write_unlock_bh(&nf_conntrack_lock); |
732 | help = nfct_help(ct); | ||
733 | if (help && help->helper) | ||
734 | nf_conntrack_event_cache(IPCT_HELPER, *pskb); | ||
748 | #ifdef CONFIG_NF_NAT_NEEDED | 735 | #ifdef CONFIG_NF_NAT_NEEDED |
749 | if (test_bit(IPS_SRC_NAT_DONE_BIT, &ct->status) || | 736 | if (test_bit(IPS_SRC_NAT_DONE_BIT, &ct->status) || |
750 | test_bit(IPS_DST_NAT_DONE_BIT, &ct->status)) | 737 | test_bit(IPS_DST_NAT_DONE_BIT, &ct->status)) |
751 | nf_conntrack_event_cache(IPCT_NATINFO, *pskb); | 738 | nf_conntrack_event_cache(IPCT_NATINFO, *pskb); |
752 | #endif | 739 | #endif |
753 | nf_conntrack_event_cache(master_ct(ct) ? | 740 | nf_conntrack_event_cache(master_ct(ct) ? |
754 | IPCT_RELATED : IPCT_NEW, *pskb); | 741 | IPCT_RELATED : IPCT_NEW, *pskb); |
755 | return NF_ACCEPT; | 742 | return NF_ACCEPT; |
756 | } | ||
757 | 743 | ||
744 | out: | ||
758 | NF_CT_STAT_INC(insert_failed); | 745 | NF_CT_STAT_INC(insert_failed); |
759 | write_unlock_bh(&nf_conntrack_lock); | 746 | write_unlock_bh(&nf_conntrack_lock); |
760 | return NF_DROP; | 747 | return NF_DROP; |
@@ -777,24 +764,21 @@ nf_conntrack_tuple_taken(const struct nf_conntrack_tuple *tuple, | |||
777 | 764 | ||
778 | /* There's a small race here where we may free a just-assured | 765 | /* There's a small race here where we may free a just-assured |
779 | connection. Too bad: we're in trouble anyway. */ | 766 | connection. Too bad: we're in trouble anyway. */ |
780 | static inline int unreplied(const struct nf_conntrack_tuple_hash *i) | ||
781 | { | ||
782 | return !(test_bit(IPS_ASSURED_BIT, | ||
783 | &nf_ct_tuplehash_to_ctrack(i)->status)); | ||
784 | } | ||
785 | |||
786 | static int early_drop(struct list_head *chain) | 767 | static int early_drop(struct list_head *chain) |
787 | { | 768 | { |
788 | /* Traverse backwards: gives us oldest, which is roughly LRU */ | 769 | /* Traverse backwards: gives us oldest, which is roughly LRU */ |
789 | struct nf_conntrack_tuple_hash *h; | 770 | struct nf_conntrack_tuple_hash *h; |
790 | struct nf_conn *ct = NULL; | 771 | struct nf_conn *ct = NULL, *tmp; |
791 | int dropped = 0; | 772 | int dropped = 0; |
792 | 773 | ||
793 | read_lock_bh(&nf_conntrack_lock); | 774 | read_lock_bh(&nf_conntrack_lock); |
794 | h = LIST_FIND_B(chain, unreplied, struct nf_conntrack_tuple_hash *); | 775 | list_for_each_entry_reverse(h, chain, list) { |
795 | if (h) { | 776 | tmp = nf_ct_tuplehash_to_ctrack(h); |
796 | ct = nf_ct_tuplehash_to_ctrack(h); | 777 | if (!test_bit(IPS_ASSURED_BIT, &tmp->status)) { |
797 | atomic_inc(&ct->ct_general.use); | 778 | ct = tmp; |
779 | atomic_inc(&ct->ct_general.use); | ||
780 | break; | ||
781 | } | ||
798 | } | 782 | } |
799 | read_unlock_bh(&nf_conntrack_lock); | 783 | read_unlock_bh(&nf_conntrack_lock); |
800 | 784 | ||
@@ -810,18 +794,16 @@ static int early_drop(struct list_head *chain) | |||
810 | return dropped; | 794 | return dropped; |
811 | } | 795 | } |
812 | 796 | ||
813 | static inline int helper_cmp(const struct nf_conntrack_helper *i, | ||
814 | const struct nf_conntrack_tuple *rtuple) | ||
815 | { | ||
816 | return nf_ct_tuple_mask_cmp(rtuple, &i->tuple, &i->mask); | ||
817 | } | ||
818 | |||
819 | static struct nf_conntrack_helper * | 797 | static struct nf_conntrack_helper * |
820 | __nf_ct_helper_find(const struct nf_conntrack_tuple *tuple) | 798 | __nf_ct_helper_find(const struct nf_conntrack_tuple *tuple) |
821 | { | 799 | { |
822 | return LIST_FIND(&helpers, helper_cmp, | 800 | struct nf_conntrack_helper *h; |
823 | struct nf_conntrack_helper *, | 801 | |
824 | tuple); | 802 | list_for_each_entry(h, &helpers, list) { |
803 | if (nf_ct_tuple_mask_cmp(tuple, &h->tuple, &h->mask)) | ||
804 | return h; | ||
805 | } | ||
806 | return NULL; | ||
825 | } | 807 | } |
826 | 808 | ||
827 | struct nf_conntrack_helper * | 809 | struct nf_conntrack_helper * |
@@ -866,11 +848,15 @@ __nf_conntrack_alloc(const struct nf_conntrack_tuple *orig, | |||
866 | nf_conntrack_hash_rnd_initted = 1; | 848 | nf_conntrack_hash_rnd_initted = 1; |
867 | } | 849 | } |
868 | 850 | ||
851 | /* We don't want any race condition at early drop stage */ | ||
852 | atomic_inc(&nf_conntrack_count); | ||
853 | |||
869 | if (nf_conntrack_max | 854 | if (nf_conntrack_max |
870 | && atomic_read(&nf_conntrack_count) >= nf_conntrack_max) { | 855 | && atomic_read(&nf_conntrack_count) > nf_conntrack_max) { |
871 | unsigned int hash = hash_conntrack(orig); | 856 | unsigned int hash = hash_conntrack(orig); |
872 | /* Try dropping from this hash chain. */ | 857 | /* Try dropping from this hash chain. */ |
873 | if (!early_drop(&nf_conntrack_hash[hash])) { | 858 | if (!early_drop(&nf_conntrack_hash[hash])) { |
859 | atomic_dec(&nf_conntrack_count); | ||
874 | if (net_ratelimit()) | 860 | if (net_ratelimit()) |
875 | printk(KERN_WARNING | 861 | printk(KERN_WARNING |
876 | "nf_conntrack: table full, dropping" | 862 | "nf_conntrack: table full, dropping" |
@@ -921,10 +907,12 @@ __nf_conntrack_alloc(const struct nf_conntrack_tuple *orig, | |||
921 | init_timer(&conntrack->timeout); | 907 | init_timer(&conntrack->timeout); |
922 | conntrack->timeout.data = (unsigned long)conntrack; | 908 | conntrack->timeout.data = (unsigned long)conntrack; |
923 | conntrack->timeout.function = death_by_timeout; | 909 | conntrack->timeout.function = death_by_timeout; |
910 | read_unlock_bh(&nf_ct_cache_lock); | ||
924 | 911 | ||
925 | atomic_inc(&nf_conntrack_count); | 912 | return conntrack; |
926 | out: | 913 | out: |
927 | read_unlock_bh(&nf_ct_cache_lock); | 914 | read_unlock_bh(&nf_ct_cache_lock); |
915 | atomic_dec(&nf_conntrack_count); | ||
928 | return conntrack; | 916 | return conntrack; |
929 | } | 917 | } |
930 | 918 | ||
@@ -1323,7 +1311,7 @@ int nf_conntrack_helper_register(struct nf_conntrack_helper *me) | |||
1323 | return ret; | 1311 | return ret; |
1324 | } | 1312 | } |
1325 | write_lock_bh(&nf_conntrack_lock); | 1313 | write_lock_bh(&nf_conntrack_lock); |
1326 | list_prepend(&helpers, me); | 1314 | list_add(&me->list, &helpers); |
1327 | write_unlock_bh(&nf_conntrack_lock); | 1315 | write_unlock_bh(&nf_conntrack_lock); |
1328 | 1316 | ||
1329 | return 0; | 1317 | return 0; |
@@ -1342,8 +1330,8 @@ __nf_conntrack_helper_find_byname(const char *name) | |||
1342 | return NULL; | 1330 | return NULL; |
1343 | } | 1331 | } |
1344 | 1332 | ||
1345 | static inline int unhelp(struct nf_conntrack_tuple_hash *i, | 1333 | static inline void unhelp(struct nf_conntrack_tuple_hash *i, |
1346 | const struct nf_conntrack_helper *me) | 1334 | const struct nf_conntrack_helper *me) |
1347 | { | 1335 | { |
1348 | struct nf_conn *ct = nf_ct_tuplehash_to_ctrack(i); | 1336 | struct nf_conn *ct = nf_ct_tuplehash_to_ctrack(i); |
1349 | struct nf_conn_help *help = nfct_help(ct); | 1337 | struct nf_conn_help *help = nfct_help(ct); |
@@ -1352,17 +1340,17 @@ static inline int unhelp(struct nf_conntrack_tuple_hash *i, | |||
1352 | nf_conntrack_event(IPCT_HELPER, ct); | 1340 | nf_conntrack_event(IPCT_HELPER, ct); |
1353 | help->helper = NULL; | 1341 | help->helper = NULL; |
1354 | } | 1342 | } |
1355 | return 0; | ||
1356 | } | 1343 | } |
1357 | 1344 | ||
1358 | void nf_conntrack_helper_unregister(struct nf_conntrack_helper *me) | 1345 | void nf_conntrack_helper_unregister(struct nf_conntrack_helper *me) |
1359 | { | 1346 | { |
1360 | unsigned int i; | 1347 | unsigned int i; |
1348 | struct nf_conntrack_tuple_hash *h; | ||
1361 | struct nf_conntrack_expect *exp, *tmp; | 1349 | struct nf_conntrack_expect *exp, *tmp; |
1362 | 1350 | ||
1363 | /* Need write lock here, to delete helper. */ | 1351 | /* Need write lock here, to delete helper. */ |
1364 | write_lock_bh(&nf_conntrack_lock); | 1352 | write_lock_bh(&nf_conntrack_lock); |
1365 | LIST_DELETE(&helpers, me); | 1353 | list_del(&me->list); |
1366 | 1354 | ||
1367 | /* Get rid of expectations */ | 1355 | /* Get rid of expectations */ |
1368 | list_for_each_entry_safe(exp, tmp, &nf_conntrack_expect_list, list) { | 1356 | list_for_each_entry_safe(exp, tmp, &nf_conntrack_expect_list, list) { |
@@ -1374,10 +1362,12 @@ void nf_conntrack_helper_unregister(struct nf_conntrack_helper *me) | |||
1374 | } | 1362 | } |
1375 | 1363 | ||
1376 | /* Get rid of expecteds, set helpers to NULL. */ | 1364 | /* Get rid of expecteds, set helpers to NULL. */ |
1377 | LIST_FIND_W(&unconfirmed, unhelp, struct nf_conntrack_tuple_hash*, me); | 1365 | list_for_each_entry(h, &unconfirmed, list) |
1378 | for (i = 0; i < nf_conntrack_htable_size; i++) | 1366 | unhelp(h, me); |
1379 | LIST_FIND_W(&nf_conntrack_hash[i], unhelp, | 1367 | for (i = 0; i < nf_conntrack_htable_size; i++) { |
1380 | struct nf_conntrack_tuple_hash *, me); | 1368 | list_for_each_entry(h, &nf_conntrack_hash[i], list) |
1369 | unhelp(h, me); | ||
1370 | } | ||
1381 | write_unlock_bh(&nf_conntrack_lock); | 1371 | write_unlock_bh(&nf_conntrack_lock); |
1382 | 1372 | ||
1383 | /* Someone could be still looking at the helper in a bh. */ | 1373 | /* Someone could be still looking at the helper in a bh. */ |
@@ -1510,37 +1500,40 @@ do_iter(const struct nf_conntrack_tuple_hash *i, | |||
1510 | } | 1500 | } |
1511 | 1501 | ||
1512 | /* Bring out ya dead! */ | 1502 | /* Bring out ya dead! */ |
1513 | static struct nf_conntrack_tuple_hash * | 1503 | static struct nf_conn * |
1514 | get_next_corpse(int (*iter)(struct nf_conn *i, void *data), | 1504 | get_next_corpse(int (*iter)(struct nf_conn *i, void *data), |
1515 | void *data, unsigned int *bucket) | 1505 | void *data, unsigned int *bucket) |
1516 | { | 1506 | { |
1517 | struct nf_conntrack_tuple_hash *h = NULL; | 1507 | struct nf_conntrack_tuple_hash *h; |
1508 | struct nf_conn *ct; | ||
1518 | 1509 | ||
1519 | write_lock_bh(&nf_conntrack_lock); | 1510 | write_lock_bh(&nf_conntrack_lock); |
1520 | for (; *bucket < nf_conntrack_htable_size; (*bucket)++) { | 1511 | for (; *bucket < nf_conntrack_htable_size; (*bucket)++) { |
1521 | h = LIST_FIND_W(&nf_conntrack_hash[*bucket], do_iter, | 1512 | list_for_each_entry(h, &nf_conntrack_hash[*bucket], list) { |
1522 | struct nf_conntrack_tuple_hash *, iter, data); | 1513 | ct = nf_ct_tuplehash_to_ctrack(h); |
1523 | if (h) | 1514 | if (iter(ct, data)) |
1524 | break; | 1515 | goto found; |
1516 | } | ||
1525 | } | 1517 | } |
1526 | if (!h) | 1518 | list_for_each_entry(h, &unconfirmed, list) { |
1527 | h = LIST_FIND_W(&unconfirmed, do_iter, | 1519 | ct = nf_ct_tuplehash_to_ctrack(h); |
1528 | struct nf_conntrack_tuple_hash *, iter, data); | 1520 | if (iter(ct, data)) |
1529 | if (h) | 1521 | goto found; |
1530 | atomic_inc(&nf_ct_tuplehash_to_ctrack(h)->ct_general.use); | 1522 | } |
1523 | return NULL; | ||
1524 | found: | ||
1525 | atomic_inc(&nf_ct_tuplehash_to_ctrack(h)->ct_general.use); | ||
1531 | write_unlock_bh(&nf_conntrack_lock); | 1526 | write_unlock_bh(&nf_conntrack_lock); |
1532 | 1527 | return ct; | |
1533 | return h; | ||
1534 | } | 1528 | } |
1535 | 1529 | ||
1536 | void | 1530 | void |
1537 | nf_ct_iterate_cleanup(int (*iter)(struct nf_conn *i, void *data), void *data) | 1531 | nf_ct_iterate_cleanup(int (*iter)(struct nf_conn *i, void *data), void *data) |
1538 | { | 1532 | { |
1539 | struct nf_conntrack_tuple_hash *h; | 1533 | struct nf_conn *ct; |
1540 | unsigned int bucket = 0; | 1534 | unsigned int bucket = 0; |
1541 | 1535 | ||
1542 | while ((h = get_next_corpse(iter, data, &bucket)) != NULL) { | 1536 | while ((ct = get_next_corpse(iter, data, &bucket)) != NULL) { |
1543 | struct nf_conn *ct = nf_ct_tuplehash_to_ctrack(h); | ||
1544 | /* Time to push up daises... */ | 1537 | /* Time to push up daises... */ |
1545 | if (del_timer(&ct->timeout)) | 1538 | if (del_timer(&ct->timeout)) |
1546 | death_by_timeout((unsigned long)ct); | 1539 | death_by_timeout((unsigned long)ct); |
diff --git a/net/netfilter/nf_conntrack_ftp.c b/net/netfilter/nf_conntrack_ftp.c index 960972d225f9..0c17a5bd112b 100644 --- a/net/netfilter/nf_conntrack_ftp.c +++ b/net/netfilter/nf_conntrack_ftp.c | |||
@@ -21,6 +21,7 @@ | |||
21 | #include <linux/ip.h> | 21 | #include <linux/ip.h> |
22 | #include <linux/ipv6.h> | 22 | #include <linux/ipv6.h> |
23 | #include <linux/ctype.h> | 23 | #include <linux/ctype.h> |
24 | #include <linux/inet.h> | ||
24 | #include <net/checksum.h> | 25 | #include <net/checksum.h> |
25 | #include <net/tcp.h> | 26 | #include <net/tcp.h> |
26 | 27 | ||
@@ -111,101 +112,14 @@ static struct ftp_search { | |||
111 | }, | 112 | }, |
112 | }; | 113 | }; |
113 | 114 | ||
114 | /* This code is based on inet_pton() in glibc-2.2.4 */ | ||
115 | static int | 115 | static int |
116 | get_ipv6_addr(const char *src, size_t dlen, struct in6_addr *dst, u_int8_t term) | 116 | get_ipv6_addr(const char *src, size_t dlen, struct in6_addr *dst, u_int8_t term) |
117 | { | 117 | { |
118 | static const char xdigits[] = "0123456789abcdef"; | 118 | const char *end; |
119 | u_int8_t tmp[16], *tp, *endp, *colonp; | 119 | int ret = in6_pton(src, min_t(size_t, dlen, 0xffff), (u8 *)dst, term, &end); |
120 | int ch, saw_xdigit; | 120 | if (ret > 0) |
121 | u_int32_t val; | 121 | return (int)(end - src); |
122 | size_t clen = 0; | 122 | return 0; |
123 | |||
124 | tp = memset(tmp, '\0', sizeof(tmp)); | ||
125 | endp = tp + sizeof(tmp); | ||
126 | colonp = NULL; | ||
127 | |||
128 | /* Leading :: requires some special handling. */ | ||
129 | if (*src == ':'){ | ||
130 | if (*++src != ':') { | ||
131 | DEBUGP("invalid \":\" at the head of addr\n"); | ||
132 | return 0; | ||
133 | } | ||
134 | clen++; | ||
135 | } | ||
136 | |||
137 | saw_xdigit = 0; | ||
138 | val = 0; | ||
139 | while ((clen < dlen) && (*src != term)) { | ||
140 | const char *pch; | ||
141 | |||
142 | ch = tolower(*src++); | ||
143 | clen++; | ||
144 | |||
145 | pch = strchr(xdigits, ch); | ||
146 | if (pch != NULL) { | ||
147 | val <<= 4; | ||
148 | val |= (pch - xdigits); | ||
149 | if (val > 0xffff) | ||
150 | return 0; | ||
151 | |||
152 | saw_xdigit = 1; | ||
153 | continue; | ||
154 | } | ||
155 | if (ch != ':') { | ||
156 | DEBUGP("get_ipv6_addr: invalid char. \'%c\'\n", ch); | ||
157 | return 0; | ||
158 | } | ||
159 | |||
160 | if (!saw_xdigit) { | ||
161 | if (colonp) { | ||
162 | DEBUGP("invalid location of \"::\".\n"); | ||
163 | return 0; | ||
164 | } | ||
165 | colonp = tp; | ||
166 | continue; | ||
167 | } else if (*src == term) { | ||
168 | DEBUGP("trancated IPv6 addr\n"); | ||
169 | return 0; | ||
170 | } | ||
171 | |||
172 | if (tp + 2 > endp) | ||
173 | return 0; | ||
174 | *tp++ = (u_int8_t) (val >> 8) & 0xff; | ||
175 | *tp++ = (u_int8_t) val & 0xff; | ||
176 | |||
177 | saw_xdigit = 0; | ||
178 | val = 0; | ||
179 | continue; | ||
180 | } | ||
181 | if (saw_xdigit) { | ||
182 | if (tp + 2 > endp) | ||
183 | return 0; | ||
184 | *tp++ = (u_int8_t) (val >> 8) & 0xff; | ||
185 | *tp++ = (u_int8_t) val & 0xff; | ||
186 | } | ||
187 | if (colonp != NULL) { | ||
188 | /* | ||
189 | * Since some memmove()'s erroneously fail to handle | ||
190 | * overlapping regions, we'll do the shift by hand. | ||
191 | */ | ||
192 | const int n = tp - colonp; | ||
193 | int i; | ||
194 | |||
195 | if (tp == endp) | ||
196 | return 0; | ||
197 | |||
198 | for (i = 1; i <= n; i++) { | ||
199 | endp[- i] = colonp[n - i]; | ||
200 | colonp[n - i] = 0; | ||
201 | } | ||
202 | tp = endp; | ||
203 | } | ||
204 | if (tp != endp || (*src != term)) | ||
205 | return 0; | ||
206 | |||
207 | memcpy(dst->s6_addr, tmp, sizeof(dst->s6_addr)); | ||
208 | return clen; | ||
209 | } | 123 | } |
210 | 124 | ||
211 | static int try_number(const char *data, size_t dlen, u_int32_t array[], | 125 | static int try_number(const char *data, size_t dlen, u_int32_t array[], |
diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c index 6527d4e048d8..1721f7c78c77 100644 --- a/net/netfilter/nf_conntrack_netlink.c +++ b/net/netfilter/nf_conntrack_netlink.c | |||
@@ -339,11 +339,7 @@ static int ctnetlink_conntrack_event(struct notifier_block *this, | |||
339 | /* dump everything */ | 339 | /* dump everything */ |
340 | events = ~0UL; | 340 | events = ~0UL; |
341 | group = NFNLGRP_CONNTRACK_NEW; | 341 | group = NFNLGRP_CONNTRACK_NEW; |
342 | } else if (events & (IPCT_STATUS | | 342 | } else if (events & (IPCT_STATUS | IPCT_PROTOINFO)) { |
343 | IPCT_PROTOINFO | | ||
344 | IPCT_HELPER | | ||
345 | IPCT_HELPINFO | | ||
346 | IPCT_NATINFO)) { | ||
347 | type = IPCTNL_MSG_CT_NEW; | 343 | type = IPCTNL_MSG_CT_NEW; |
348 | group = NFNLGRP_CONNTRACK_UPDATE; | 344 | group = NFNLGRP_CONNTRACK_UPDATE; |
349 | } else | 345 | } else |
@@ -395,6 +391,10 @@ static int ctnetlink_conntrack_event(struct notifier_block *this, | |||
395 | ctnetlink_dump_counters(skb, ct, IP_CT_DIR_REPLY) < 0) | 391 | ctnetlink_dump_counters(skb, ct, IP_CT_DIR_REPLY) < 0) |
396 | goto nfattr_failure; | 392 | goto nfattr_failure; |
397 | 393 | ||
394 | if (events & IPCT_MARK | ||
395 | && ctnetlink_dump_mark(skb, ct) < 0) | ||
396 | goto nfattr_failure; | ||
397 | |||
398 | nlh->nlmsg_len = skb->tail - b; | 398 | nlh->nlmsg_len = skb->tail - b; |
399 | nfnetlink_send(skb, 0, group, 0); | 399 | nfnetlink_send(skb, 0, group, 0); |
400 | return NOTIFY_DONE; | 400 | return NOTIFY_DONE; |
@@ -455,6 +455,11 @@ restart: | |||
455 | cb->args[1] = (unsigned long)ct; | 455 | cb->args[1] = (unsigned long)ct; |
456 | goto out; | 456 | goto out; |
457 | } | 457 | } |
458 | #ifdef CONFIG_NF_CT_ACCT | ||
459 | if (NFNL_MSG_TYPE(cb->nlh->nlmsg_type) == | ||
460 | IPCTNL_MSG_CT_GET_CTRZERO) | ||
461 | memset(&ct->counters, 0, sizeof(ct->counters)); | ||
462 | #endif | ||
458 | } | 463 | } |
459 | if (cb->args[1]) { | 464 | if (cb->args[1]) { |
460 | cb->args[1] = 0; | 465 | cb->args[1] = 0; |
@@ -470,50 +475,6 @@ out: | |||
470 | return skb->len; | 475 | return skb->len; |
471 | } | 476 | } |
472 | 477 | ||
473 | #ifdef CONFIG_NF_CT_ACCT | ||
474 | static int | ||
475 | ctnetlink_dump_table_w(struct sk_buff *skb, struct netlink_callback *cb) | ||
476 | { | ||
477 | struct nf_conn *ct = NULL; | ||
478 | struct nf_conntrack_tuple_hash *h; | ||
479 | struct list_head *i; | ||
480 | u_int32_t *id = (u_int32_t *) &cb->args[1]; | ||
481 | struct nfgenmsg *nfmsg = NLMSG_DATA(cb->nlh); | ||
482 | u_int8_t l3proto = nfmsg->nfgen_family; | ||
483 | |||
484 | DEBUGP("entered %s, last bucket=%u id=%u\n", __FUNCTION__, | ||
485 | cb->args[0], *id); | ||
486 | |||
487 | write_lock_bh(&nf_conntrack_lock); | ||
488 | for (; cb->args[0] < nf_conntrack_htable_size; cb->args[0]++, *id = 0) { | ||
489 | list_for_each_prev(i, &nf_conntrack_hash[cb->args[0]]) { | ||
490 | h = (struct nf_conntrack_tuple_hash *) i; | ||
491 | if (DIRECTION(h) != IP_CT_DIR_ORIGINAL) | ||
492 | continue; | ||
493 | ct = nf_ct_tuplehash_to_ctrack(h); | ||
494 | if (l3proto && L3PROTO(ct) != l3proto) | ||
495 | continue; | ||
496 | if (ct->id <= *id) | ||
497 | continue; | ||
498 | if (ctnetlink_fill_info(skb, NETLINK_CB(cb->skb).pid, | ||
499 | cb->nlh->nlmsg_seq, | ||
500 | IPCTNL_MSG_CT_NEW, | ||
501 | 1, ct) < 0) | ||
502 | goto out; | ||
503 | *id = ct->id; | ||
504 | |||
505 | memset(&ct->counters, 0, sizeof(ct->counters)); | ||
506 | } | ||
507 | } | ||
508 | out: | ||
509 | write_unlock_bh(&nf_conntrack_lock); | ||
510 | |||
511 | DEBUGP("leaving, last bucket=%lu id=%u\n", cb->args[0], *id); | ||
512 | |||
513 | return skb->len; | ||
514 | } | ||
515 | #endif | ||
516 | |||
517 | static inline int | 478 | static inline int |
518 | ctnetlink_parse_tuple_ip(struct nfattr *attr, struct nf_conntrack_tuple *tuple) | 479 | ctnetlink_parse_tuple_ip(struct nfattr *attr, struct nf_conntrack_tuple *tuple) |
519 | { | 480 | { |
@@ -788,22 +749,14 @@ ctnetlink_get_conntrack(struct sock *ctnl, struct sk_buff *skb, | |||
788 | if (nlh->nlmsg_flags & NLM_F_DUMP) { | 749 | if (nlh->nlmsg_flags & NLM_F_DUMP) { |
789 | u32 rlen; | 750 | u32 rlen; |
790 | 751 | ||
791 | if (NFNL_MSG_TYPE(nlh->nlmsg_type) == | 752 | #ifndef CONFIG_NF_CT_ACCT |
792 | IPCTNL_MSG_CT_GET_CTRZERO) { | 753 | if (NFNL_MSG_TYPE(nlh->nlmsg_type) == IPCTNL_MSG_CT_GET_CTRZERO) |
793 | #ifdef CONFIG_NF_CT_ACCT | ||
794 | if ((*errp = netlink_dump_start(ctnl, skb, nlh, | ||
795 | ctnetlink_dump_table_w, | ||
796 | ctnetlink_done)) != 0) | ||
797 | return -EINVAL; | ||
798 | #else | ||
799 | return -ENOTSUPP; | 754 | return -ENOTSUPP; |
800 | #endif | 755 | #endif |
801 | } else { | 756 | if ((*errp = netlink_dump_start(ctnl, skb, nlh, |
802 | if ((*errp = netlink_dump_start(ctnl, skb, nlh, | 757 | ctnetlink_dump_table, |
803 | ctnetlink_dump_table, | 758 | ctnetlink_done)) != 0) |
804 | ctnetlink_done)) != 0) | ||
805 | return -EINVAL; | 759 | return -EINVAL; |
806 | } | ||
807 | 760 | ||
808 | rlen = NLMSG_ALIGN(nlh->nlmsg_len); | 761 | rlen = NLMSG_ALIGN(nlh->nlmsg_len); |
809 | if (rlen > skb->len) | 762 | if (rlen > skb->len) |
@@ -1274,6 +1227,9 @@ static int ctnetlink_expect_event(struct notifier_block *this, | |||
1274 | } else | 1227 | } else |
1275 | return NOTIFY_DONE; | 1228 | return NOTIFY_DONE; |
1276 | 1229 | ||
1230 | if (!nfnetlink_has_listeners(NFNLGRP_CONNTRACK_EXP_NEW)) | ||
1231 | return NOTIFY_DONE; | ||
1232 | |||
1277 | skb = alloc_skb(NLMSG_GOODSIZE, GFP_ATOMIC); | 1233 | skb = alloc_skb(NLMSG_GOODSIZE, GFP_ATOMIC); |
1278 | if (!skb) | 1234 | if (!skb) |
1279 | return NOTIFY_DONE; | 1235 | return NOTIFY_DONE; |
diff --git a/net/netfilter/nf_conntrack_proto_generic.c b/net/netfilter/nf_conntrack_proto_generic.c index 46bc27e2756d..26408bb0955b 100644 --- a/net/netfilter/nf_conntrack_proto_generic.c +++ b/net/netfilter/nf_conntrack_proto_generic.c | |||
@@ -17,7 +17,7 @@ | |||
17 | #include <linux/netfilter.h> | 17 | #include <linux/netfilter.h> |
18 | #include <net/netfilter/nf_conntrack_protocol.h> | 18 | #include <net/netfilter/nf_conntrack_protocol.h> |
19 | 19 | ||
20 | unsigned int nf_ct_generic_timeout = 600*HZ; | 20 | unsigned int nf_ct_generic_timeout __read_mostly = 600*HZ; |
21 | 21 | ||
22 | static int generic_pkt_to_tuple(const struct sk_buff *skb, | 22 | static int generic_pkt_to_tuple(const struct sk_buff *skb, |
23 | unsigned int dataoff, | 23 | unsigned int dataoff, |
diff --git a/net/netfilter/nf_conntrack_proto_sctp.c b/net/netfilter/nf_conntrack_proto_sctp.c index 9bd8a7877fd5..af568777372b 100644 --- a/net/netfilter/nf_conntrack_proto_sctp.c +++ b/net/netfilter/nf_conntrack_proto_sctp.c | |||
@@ -64,13 +64,13 @@ static const char *sctp_conntrack_names[] = { | |||
64 | #define HOURS * 60 MINS | 64 | #define HOURS * 60 MINS |
65 | #define DAYS * 24 HOURS | 65 | #define DAYS * 24 HOURS |
66 | 66 | ||
67 | static unsigned int nf_ct_sctp_timeout_closed = 10 SECS; | 67 | static unsigned int nf_ct_sctp_timeout_closed __read_mostly = 10 SECS; |
68 | static unsigned int nf_ct_sctp_timeout_cookie_wait = 3 SECS; | 68 | static unsigned int nf_ct_sctp_timeout_cookie_wait __read_mostly = 3 SECS; |
69 | static unsigned int nf_ct_sctp_timeout_cookie_echoed = 3 SECS; | 69 | static unsigned int nf_ct_sctp_timeout_cookie_echoed __read_mostly = 3 SECS; |
70 | static unsigned int nf_ct_sctp_timeout_established = 5 DAYS; | 70 | static unsigned int nf_ct_sctp_timeout_established __read_mostly = 5 DAYS; |
71 | static unsigned int nf_ct_sctp_timeout_shutdown_sent = 300 SECS / 1000; | 71 | static unsigned int nf_ct_sctp_timeout_shutdown_sent __read_mostly = 300 SECS / 1000; |
72 | static unsigned int nf_ct_sctp_timeout_shutdown_recd = 300 SECS / 1000; | 72 | static unsigned int nf_ct_sctp_timeout_shutdown_recd __read_mostly = 300 SECS / 1000; |
73 | static unsigned int nf_ct_sctp_timeout_shutdown_ack_sent = 3 SECS; | 73 | static unsigned int nf_ct_sctp_timeout_shutdown_ack_sent __read_mostly = 3 SECS; |
74 | 74 | ||
75 | static unsigned int * sctp_timeouts[] | 75 | static unsigned int * sctp_timeouts[] |
76 | = { NULL, /* SCTP_CONNTRACK_NONE */ | 76 | = { NULL, /* SCTP_CONNTRACK_NONE */ |
diff --git a/net/netfilter/nf_conntrack_proto_tcp.c b/net/netfilter/nf_conntrack_proto_tcp.c index af8adcba23a7..238bbb5b72ef 100644 --- a/net/netfilter/nf_conntrack_proto_tcp.c +++ b/net/netfilter/nf_conntrack_proto_tcp.c | |||
@@ -57,19 +57,19 @@ static DEFINE_RWLOCK(tcp_lock); | |||
57 | /* "Be conservative in what you do, | 57 | /* "Be conservative in what you do, |
58 | be liberal in what you accept from others." | 58 | be liberal in what you accept from others." |
59 | If it's non-zero, we mark only out of window RST segments as INVALID. */ | 59 | If it's non-zero, we mark only out of window RST segments as INVALID. */ |
60 | int nf_ct_tcp_be_liberal = 0; | 60 | int nf_ct_tcp_be_liberal __read_mostly = 0; |
61 | 61 | ||
62 | /* When connection is picked up from the middle, how many packets are required | 62 | /* When connection is picked up from the middle, how many packets are required |
63 | to pass in each direction when we assume we are in sync - if any side uses | 63 | to pass in each direction when we assume we are in sync - if any side uses |
64 | window scaling, we lost the game. | 64 | window scaling, we lost the game. |
65 | If it is set to zero, we disable picking up already established | 65 | If it is set to zero, we disable picking up already established |
66 | connections. */ | 66 | connections. */ |
67 | int nf_ct_tcp_loose = 3; | 67 | int nf_ct_tcp_loose __read_mostly = 3; |
68 | 68 | ||
69 | /* Max number of the retransmitted packets without receiving an (acceptable) | 69 | /* Max number of the retransmitted packets without receiving an (acceptable) |
70 | ACK from the destination. If this number is reached, a shorter timer | 70 | ACK from the destination. If this number is reached, a shorter timer |
71 | will be started. */ | 71 | will be started. */ |
72 | int nf_ct_tcp_max_retrans = 3; | 72 | int nf_ct_tcp_max_retrans __read_mostly = 3; |
73 | 73 | ||
74 | /* FIXME: Examine ipfilter's timeouts and conntrack transitions more | 74 | /* FIXME: Examine ipfilter's timeouts and conntrack transitions more |
75 | closely. They're more complex. --RR */ | 75 | closely. They're more complex. --RR */ |
@@ -92,19 +92,19 @@ static const char *tcp_conntrack_names[] = { | |||
92 | #define HOURS * 60 MINS | 92 | #define HOURS * 60 MINS |
93 | #define DAYS * 24 HOURS | 93 | #define DAYS * 24 HOURS |
94 | 94 | ||
95 | unsigned int nf_ct_tcp_timeout_syn_sent = 2 MINS; | 95 | unsigned int nf_ct_tcp_timeout_syn_sent __read_mostly = 2 MINS; |
96 | unsigned int nf_ct_tcp_timeout_syn_recv = 60 SECS; | 96 | unsigned int nf_ct_tcp_timeout_syn_recv __read_mostly = 60 SECS; |
97 | unsigned int nf_ct_tcp_timeout_established = 5 DAYS; | 97 | unsigned int nf_ct_tcp_timeout_established __read_mostly = 5 DAYS; |
98 | unsigned int nf_ct_tcp_timeout_fin_wait = 2 MINS; | 98 | unsigned int nf_ct_tcp_timeout_fin_wait __read_mostly = 2 MINS; |
99 | unsigned int nf_ct_tcp_timeout_close_wait = 60 SECS; | 99 | unsigned int nf_ct_tcp_timeout_close_wait __read_mostly = 60 SECS; |
100 | unsigned int nf_ct_tcp_timeout_last_ack = 30 SECS; | 100 | unsigned int nf_ct_tcp_timeout_last_ack __read_mostly = 30 SECS; |
101 | unsigned int nf_ct_tcp_timeout_time_wait = 2 MINS; | 101 | unsigned int nf_ct_tcp_timeout_time_wait __read_mostly = 2 MINS; |
102 | unsigned int nf_ct_tcp_timeout_close = 10 SECS; | 102 | unsigned int nf_ct_tcp_timeout_close __read_mostly = 10 SECS; |
103 | 103 | ||
104 | /* RFC1122 says the R2 limit should be at least 100 seconds. | 104 | /* RFC1122 says the R2 limit should be at least 100 seconds. |
105 | Linux uses 15 packets as limit, which corresponds | 105 | Linux uses 15 packets as limit, which corresponds |
106 | to ~13-30min depending on RTO. */ | 106 | to ~13-30min depending on RTO. */ |
107 | unsigned int nf_ct_tcp_timeout_max_retrans = 5 MINS; | 107 | unsigned int nf_ct_tcp_timeout_max_retrans __read_mostly = 5 MINS; |
108 | 108 | ||
109 | static unsigned int * tcp_timeouts[] | 109 | static unsigned int * tcp_timeouts[] |
110 | = { NULL, /* TCP_CONNTRACK_NONE */ | 110 | = { NULL, /* TCP_CONNTRACK_NONE */ |
@@ -688,13 +688,15 @@ static int tcp_in_window(struct ip_ct_tcp *state, | |||
688 | if (state->last_dir == dir | 688 | if (state->last_dir == dir |
689 | && state->last_seq == seq | 689 | && state->last_seq == seq |
690 | && state->last_ack == ack | 690 | && state->last_ack == ack |
691 | && state->last_end == end) | 691 | && state->last_end == end |
692 | && state->last_win == win) | ||
692 | state->retrans++; | 693 | state->retrans++; |
693 | else { | 694 | else { |
694 | state->last_dir = dir; | 695 | state->last_dir = dir; |
695 | state->last_seq = seq; | 696 | state->last_seq = seq; |
696 | state->last_ack = ack; | 697 | state->last_ack = ack; |
697 | state->last_end = end; | 698 | state->last_end = end; |
699 | state->last_win = win; | ||
698 | state->retrans = 0; | 700 | state->retrans = 0; |
699 | } | 701 | } |
700 | } | 702 | } |
@@ -823,8 +825,7 @@ static int tcp_error(struct sk_buff *skb, | |||
823 | 825 | ||
824 | /* Checksum invalid? Ignore. | 826 | /* Checksum invalid? Ignore. |
825 | * We skip checking packets on the outgoing path | 827 | * We skip checking packets on the outgoing path |
826 | * because the semantic of CHECKSUM_HW is different there | 828 | * because the checksum is assumed to be correct. |
827 | * and moreover root might send raw packets. | ||
828 | */ | 829 | */ |
829 | /* FIXME: Source route IP option packets --RR */ | 830 | /* FIXME: Source route IP option packets --RR */ |
830 | if (nf_conntrack_checksum && | 831 | if (nf_conntrack_checksum && |
diff --git a/net/netfilter/nf_conntrack_proto_udp.c b/net/netfilter/nf_conntrack_proto_udp.c index ae07ebe3ab37..d28981cf9af5 100644 --- a/net/netfilter/nf_conntrack_proto_udp.c +++ b/net/netfilter/nf_conntrack_proto_udp.c | |||
@@ -27,8 +27,8 @@ | |||
27 | #include <linux/netfilter_ipv6.h> | 27 | #include <linux/netfilter_ipv6.h> |
28 | #include <net/netfilter/nf_conntrack_protocol.h> | 28 | #include <net/netfilter/nf_conntrack_protocol.h> |
29 | 29 | ||
30 | unsigned int nf_ct_udp_timeout = 30*HZ; | 30 | unsigned int nf_ct_udp_timeout __read_mostly = 30*HZ; |
31 | unsigned int nf_ct_udp_timeout_stream = 180*HZ; | 31 | unsigned int nf_ct_udp_timeout_stream __read_mostly = 180*HZ; |
32 | 32 | ||
33 | static int udp_pkt_to_tuple(const struct sk_buff *skb, | 33 | static int udp_pkt_to_tuple(const struct sk_buff *skb, |
34 | unsigned int dataoff, | 34 | unsigned int dataoff, |
@@ -131,8 +131,7 @@ static int udp_error(struct sk_buff *skb, unsigned int dataoff, | |||
131 | 131 | ||
132 | /* Checksum invalid? Ignore. | 132 | /* Checksum invalid? Ignore. |
133 | * We skip checking packets on the outgoing path | 133 | * We skip checking packets on the outgoing path |
134 | * because the semantic of CHECKSUM_HW is different there | 134 | * because the checksum is assumed to be correct. |
135 | * and moreover root might send raw packets. | ||
136 | * FIXME: Source route IP option packets --RR */ | 135 | * FIXME: Source route IP option packets --RR */ |
137 | if (nf_conntrack_checksum && | 136 | if (nf_conntrack_checksum && |
138 | ((pf == PF_INET && hooknum == NF_IP_PRE_ROUTING) || | 137 | ((pf == PF_INET && hooknum == NF_IP_PRE_ROUTING) || |
diff --git a/net/netfilter/nf_conntrack_standalone.c b/net/netfilter/nf_conntrack_standalone.c index 4ef836699962..5954f6773810 100644 --- a/net/netfilter/nf_conntrack_standalone.c +++ b/net/netfilter/nf_conntrack_standalone.c | |||
@@ -37,7 +37,6 @@ | |||
37 | #include <net/netfilter/nf_conntrack_protocol.h> | 37 | #include <net/netfilter/nf_conntrack_protocol.h> |
38 | #include <net/netfilter/nf_conntrack_core.h> | 38 | #include <net/netfilter/nf_conntrack_core.h> |
39 | #include <net/netfilter/nf_conntrack_helper.h> | 39 | #include <net/netfilter/nf_conntrack_helper.h> |
40 | #include <linux/netfilter_ipv4/listhelp.h> | ||
41 | 40 | ||
42 | #if 0 | 41 | #if 0 |
43 | #define DEBUGP printk | 42 | #define DEBUGP printk |
@@ -428,7 +427,7 @@ static struct file_operations ct_cpu_seq_fops = { | |||
428 | 427 | ||
429 | /* Sysctl support */ | 428 | /* Sysctl support */ |
430 | 429 | ||
431 | int nf_conntrack_checksum = 1; | 430 | int nf_conntrack_checksum __read_mostly = 1; |
432 | 431 | ||
433 | #ifdef CONFIG_SYSCTL | 432 | #ifdef CONFIG_SYSCTL |
434 | 433 | ||
diff --git a/net/netfilter/nf_internals.h b/net/netfilter/nf_internals.h index 86e392bfe833..a981971ce1d5 100644 --- a/net/netfilter/nf_internals.h +++ b/net/netfilter/nf_internals.h | |||
@@ -23,7 +23,7 @@ extern unsigned int nf_iterate(struct list_head *head, | |||
23 | int hook_thresh); | 23 | int hook_thresh); |
24 | 24 | ||
25 | /* nf_queue.c */ | 25 | /* nf_queue.c */ |
26 | extern int nf_queue(struct sk_buff **skb, | 26 | extern int nf_queue(struct sk_buff *skb, |
27 | struct list_head *elem, | 27 | struct list_head *elem, |
28 | int pf, unsigned int hook, | 28 | int pf, unsigned int hook, |
29 | struct net_device *indev, | 29 | struct net_device *indev, |
diff --git a/net/netfilter/nf_queue.c b/net/netfilter/nf_queue.c index 662a869593bf..4d8936ed581d 100644 --- a/net/netfilter/nf_queue.c +++ b/net/netfilter/nf_queue.c | |||
@@ -74,13 +74,13 @@ EXPORT_SYMBOL_GPL(nf_unregister_queue_handlers); | |||
74 | * Any packet that leaves via this function must come back | 74 | * Any packet that leaves via this function must come back |
75 | * through nf_reinject(). | 75 | * through nf_reinject(). |
76 | */ | 76 | */ |
77 | int nf_queue(struct sk_buff **skb, | 77 | static int __nf_queue(struct sk_buff *skb, |
78 | struct list_head *elem, | 78 | struct list_head *elem, |
79 | int pf, unsigned int hook, | 79 | int pf, unsigned int hook, |
80 | struct net_device *indev, | 80 | struct net_device *indev, |
81 | struct net_device *outdev, | 81 | struct net_device *outdev, |
82 | int (*okfn)(struct sk_buff *), | 82 | int (*okfn)(struct sk_buff *), |
83 | unsigned int queuenum) | 83 | unsigned int queuenum) |
84 | { | 84 | { |
85 | int status; | 85 | int status; |
86 | struct nf_info *info; | 86 | struct nf_info *info; |
@@ -94,14 +94,14 @@ int nf_queue(struct sk_buff **skb, | |||
94 | read_lock(&queue_handler_lock); | 94 | read_lock(&queue_handler_lock); |
95 | if (!queue_handler[pf]) { | 95 | if (!queue_handler[pf]) { |
96 | read_unlock(&queue_handler_lock); | 96 | read_unlock(&queue_handler_lock); |
97 | kfree_skb(*skb); | 97 | kfree_skb(skb); |
98 | return 1; | 98 | return 1; |
99 | } | 99 | } |
100 | 100 | ||
101 | afinfo = nf_get_afinfo(pf); | 101 | afinfo = nf_get_afinfo(pf); |
102 | if (!afinfo) { | 102 | if (!afinfo) { |
103 | read_unlock(&queue_handler_lock); | 103 | read_unlock(&queue_handler_lock); |
104 | kfree_skb(*skb); | 104 | kfree_skb(skb); |
105 | return 1; | 105 | return 1; |
106 | } | 106 | } |
107 | 107 | ||
@@ -109,9 +109,9 @@ int nf_queue(struct sk_buff **skb, | |||
109 | if (!info) { | 109 | if (!info) { |
110 | if (net_ratelimit()) | 110 | if (net_ratelimit()) |
111 | printk(KERN_ERR "OOM queueing packet %p\n", | 111 | printk(KERN_ERR "OOM queueing packet %p\n", |
112 | *skb); | 112 | skb); |
113 | read_unlock(&queue_handler_lock); | 113 | read_unlock(&queue_handler_lock); |
114 | kfree_skb(*skb); | 114 | kfree_skb(skb); |
115 | return 1; | 115 | return 1; |
116 | } | 116 | } |
117 | 117 | ||
@@ -130,15 +130,15 @@ int nf_queue(struct sk_buff **skb, | |||
130 | if (outdev) dev_hold(outdev); | 130 | if (outdev) dev_hold(outdev); |
131 | 131 | ||
132 | #ifdef CONFIG_BRIDGE_NETFILTER | 132 | #ifdef CONFIG_BRIDGE_NETFILTER |
133 | if ((*skb)->nf_bridge) { | 133 | if (skb->nf_bridge) { |
134 | physindev = (*skb)->nf_bridge->physindev; | 134 | physindev = skb->nf_bridge->physindev; |
135 | if (physindev) dev_hold(physindev); | 135 | if (physindev) dev_hold(physindev); |
136 | physoutdev = (*skb)->nf_bridge->physoutdev; | 136 | physoutdev = skb->nf_bridge->physoutdev; |
137 | if (physoutdev) dev_hold(physoutdev); | 137 | if (physoutdev) dev_hold(physoutdev); |
138 | } | 138 | } |
139 | #endif | 139 | #endif |
140 | afinfo->saveroute(*skb, info); | 140 | afinfo->saveroute(skb, info); |
141 | status = queue_handler[pf]->outfn(*skb, info, queuenum, | 141 | status = queue_handler[pf]->outfn(skb, info, queuenum, |
142 | queue_handler[pf]->data); | 142 | queue_handler[pf]->data); |
143 | 143 | ||
144 | read_unlock(&queue_handler_lock); | 144 | read_unlock(&queue_handler_lock); |
@@ -153,7 +153,7 @@ int nf_queue(struct sk_buff **skb, | |||
153 | #endif | 153 | #endif |
154 | module_put(info->elem->owner); | 154 | module_put(info->elem->owner); |
155 | kfree(info); | 155 | kfree(info); |
156 | kfree_skb(*skb); | 156 | kfree_skb(skb); |
157 | 157 | ||
158 | return 1; | 158 | return 1; |
159 | } | 159 | } |
@@ -161,6 +161,46 @@ int nf_queue(struct sk_buff **skb, | |||
161 | return 1; | 161 | return 1; |
162 | } | 162 | } |
163 | 163 | ||
164 | int nf_queue(struct sk_buff *skb, | ||
165 | struct list_head *elem, | ||
166 | int pf, unsigned int hook, | ||
167 | struct net_device *indev, | ||
168 | struct net_device *outdev, | ||
169 | int (*okfn)(struct sk_buff *), | ||
170 | unsigned int queuenum) | ||
171 | { | ||
172 | struct sk_buff *segs; | ||
173 | |||
174 | if (!skb_is_gso(skb)) | ||
175 | return __nf_queue(skb, elem, pf, hook, indev, outdev, okfn, | ||
176 | queuenum); | ||
177 | |||
178 | switch (pf) { | ||
179 | case AF_INET: | ||
180 | skb->protocol = htons(ETH_P_IP); | ||
181 | break; | ||
182 | case AF_INET6: | ||
183 | skb->protocol = htons(ETH_P_IPV6); | ||
184 | break; | ||
185 | } | ||
186 | |||
187 | segs = skb_gso_segment(skb, 0); | ||
188 | kfree_skb(skb); | ||
189 | if (unlikely(IS_ERR(segs))) | ||
190 | return 1; | ||
191 | |||
192 | do { | ||
193 | struct sk_buff *nskb = segs->next; | ||
194 | |||
195 | segs->next = NULL; | ||
196 | if (!__nf_queue(segs, elem, pf, hook, indev, outdev, okfn, | ||
197 | queuenum)) | ||
198 | kfree_skb(segs); | ||
199 | segs = nskb; | ||
200 | } while (segs); | ||
201 | return 1; | ||
202 | } | ||
203 | |||
164 | void nf_reinject(struct sk_buff *skb, struct nf_info *info, | 204 | void nf_reinject(struct sk_buff *skb, struct nf_info *info, |
165 | unsigned int verdict) | 205 | unsigned int verdict) |
166 | { | 206 | { |
@@ -224,9 +264,9 @@ void nf_reinject(struct sk_buff *skb, struct nf_info *info, | |||
224 | case NF_STOLEN: | 264 | case NF_STOLEN: |
225 | break; | 265 | break; |
226 | case NF_QUEUE: | 266 | case NF_QUEUE: |
227 | if (!nf_queue(&skb, elem, info->pf, info->hook, | 267 | if (!__nf_queue(skb, elem, info->pf, info->hook, |
228 | info->indev, info->outdev, info->okfn, | 268 | info->indev, info->outdev, info->okfn, |
229 | verdict >> NF_VERDICT_BITS)) | 269 | verdict >> NF_VERDICT_BITS)) |
230 | goto next_hook; | 270 | goto next_hook; |
231 | break; | 271 | break; |
232 | default: | 272 | default: |
diff --git a/net/netfilter/nfnetlink_queue.c b/net/netfilter/nfnetlink_queue.c index 49ef41e34c48..8eb2473d83e1 100644 --- a/net/netfilter/nfnetlink_queue.c +++ b/net/netfilter/nfnetlink_queue.c | |||
@@ -377,9 +377,9 @@ nfqnl_build_packet_message(struct nfqnl_instance *queue, | |||
377 | break; | 377 | break; |
378 | 378 | ||
379 | case NFQNL_COPY_PACKET: | 379 | case NFQNL_COPY_PACKET: |
380 | if (entskb->ip_summed == CHECKSUM_HW && | 380 | if ((entskb->ip_summed == CHECKSUM_PARTIAL || |
381 | (*errp = skb_checksum_help(entskb, | 381 | entskb->ip_summed == CHECKSUM_COMPLETE) && |
382 | outdev == NULL))) { | 382 | (*errp = skb_checksum_help(entskb))) { |
383 | spin_unlock_bh(&queue->lock); | 383 | spin_unlock_bh(&queue->lock); |
384 | return NULL; | 384 | return NULL; |
385 | } | 385 | } |
@@ -584,7 +584,7 @@ nfqnl_enqueue_packet(struct sk_buff *skb, struct nf_info *info, | |||
584 | queue->queue_dropped++; | 584 | queue->queue_dropped++; |
585 | status = -ENOSPC; | 585 | status = -ENOSPC; |
586 | if (net_ratelimit()) | 586 | if (net_ratelimit()) |
587 | printk(KERN_WARNING "ip_queue: full at %d entries, " | 587 | printk(KERN_WARNING "nf_queue: full at %d entries, " |
588 | "dropping packets(s). Dropped: %d\n", | 588 | "dropping packets(s). Dropped: %d\n", |
589 | queue->queue_total, queue->queue_dropped); | 589 | queue->queue_total, queue->queue_dropped); |
590 | goto err_out_free_nskb; | 590 | goto err_out_free_nskb; |
@@ -635,7 +635,7 @@ nfqnl_mangle(void *data, int data_len, struct nfqnl_queue_entry *e) | |||
635 | diff, | 635 | diff, |
636 | GFP_ATOMIC); | 636 | GFP_ATOMIC); |
637 | if (newskb == NULL) { | 637 | if (newskb == NULL) { |
638 | printk(KERN_WARNING "ip_queue: OOM " | 638 | printk(KERN_WARNING "nf_queue: OOM " |
639 | "in mangle, dropping packet\n"); | 639 | "in mangle, dropping packet\n"); |
640 | return -ENOMEM; | 640 | return -ENOMEM; |
641 | } | 641 | } |
diff --git a/net/netfilter/x_tables.c b/net/netfilter/x_tables.c index 174e8f970095..58522fc65d33 100644 --- a/net/netfilter/x_tables.c +++ b/net/netfilter/x_tables.c | |||
@@ -81,12 +81,42 @@ xt_unregister_target(struct xt_target *target) | |||
81 | int af = target->family; | 81 | int af = target->family; |
82 | 82 | ||
83 | mutex_lock(&xt[af].mutex); | 83 | mutex_lock(&xt[af].mutex); |
84 | LIST_DELETE(&xt[af].target, target); | 84 | list_del(&target->list); |
85 | mutex_unlock(&xt[af].mutex); | 85 | mutex_unlock(&xt[af].mutex); |
86 | } | 86 | } |
87 | EXPORT_SYMBOL(xt_unregister_target); | 87 | EXPORT_SYMBOL(xt_unregister_target); |
88 | 88 | ||
89 | int | 89 | int |
90 | xt_register_targets(struct xt_target *target, unsigned int n) | ||
91 | { | ||
92 | unsigned int i; | ||
93 | int err = 0; | ||
94 | |||
95 | for (i = 0; i < n; i++) { | ||
96 | err = xt_register_target(&target[i]); | ||
97 | if (err) | ||
98 | goto err; | ||
99 | } | ||
100 | return err; | ||
101 | |||
102 | err: | ||
103 | if (i > 0) | ||
104 | xt_unregister_targets(target, i); | ||
105 | return err; | ||
106 | } | ||
107 | EXPORT_SYMBOL(xt_register_targets); | ||
108 | |||
109 | void | ||
110 | xt_unregister_targets(struct xt_target *target, unsigned int n) | ||
111 | { | ||
112 | unsigned int i; | ||
113 | |||
114 | for (i = 0; i < n; i++) | ||
115 | xt_unregister_target(&target[i]); | ||
116 | } | ||
117 | EXPORT_SYMBOL(xt_unregister_targets); | ||
118 | |||
119 | int | ||
90 | xt_register_match(struct xt_match *match) | 120 | xt_register_match(struct xt_match *match) |
91 | { | 121 | { |
92 | int ret, af = match->family; | 122 | int ret, af = match->family; |
@@ -108,11 +138,41 @@ xt_unregister_match(struct xt_match *match) | |||
108 | int af = match->family; | 138 | int af = match->family; |
109 | 139 | ||
110 | mutex_lock(&xt[af].mutex); | 140 | mutex_lock(&xt[af].mutex); |
111 | LIST_DELETE(&xt[af].match, match); | 141 | list_del(&match->list); |
112 | mutex_unlock(&xt[af].mutex); | 142 | mutex_unlock(&xt[af].mutex); |
113 | } | 143 | } |
114 | EXPORT_SYMBOL(xt_unregister_match); | 144 | EXPORT_SYMBOL(xt_unregister_match); |
115 | 145 | ||
146 | int | ||
147 | xt_register_matches(struct xt_match *match, unsigned int n) | ||
148 | { | ||
149 | unsigned int i; | ||
150 | int err = 0; | ||
151 | |||
152 | for (i = 0; i < n; i++) { | ||
153 | err = xt_register_match(&match[i]); | ||
154 | if (err) | ||
155 | goto err; | ||
156 | } | ||
157 | return err; | ||
158 | |||
159 | err: | ||
160 | if (i > 0) | ||
161 | xt_unregister_matches(match, i); | ||
162 | return err; | ||
163 | } | ||
164 | EXPORT_SYMBOL(xt_register_matches); | ||
165 | |||
166 | void | ||
167 | xt_unregister_matches(struct xt_match *match, unsigned int n) | ||
168 | { | ||
169 | unsigned int i; | ||
170 | |||
171 | for (i = 0; i < n; i++) | ||
172 | xt_unregister_match(&match[i]); | ||
173 | } | ||
174 | EXPORT_SYMBOL(xt_unregister_matches); | ||
175 | |||
116 | 176 | ||
117 | /* | 177 | /* |
118 | * These are weird, but module loading must not be done with mutex | 178 | * These are weird, but module loading must not be done with mutex |
@@ -273,52 +333,65 @@ int xt_check_match(const struct xt_match *match, unsigned short family, | |||
273 | EXPORT_SYMBOL_GPL(xt_check_match); | 333 | EXPORT_SYMBOL_GPL(xt_check_match); |
274 | 334 | ||
275 | #ifdef CONFIG_COMPAT | 335 | #ifdef CONFIG_COMPAT |
276 | int xt_compat_match(void *match, void **dstptr, int *size, int convert) | 336 | int xt_compat_match_offset(struct xt_match *match) |
277 | { | 337 | { |
278 | struct xt_match *m; | 338 | u_int16_t csize = match->compatsize ? : match->matchsize; |
279 | struct compat_xt_entry_match *pcompat_m; | 339 | return XT_ALIGN(match->matchsize) - COMPAT_XT_ALIGN(csize); |
280 | struct xt_entry_match *pm; | 340 | } |
281 | u_int16_t msize; | 341 | EXPORT_SYMBOL_GPL(xt_compat_match_offset); |
282 | int off, ret; | ||
283 | 342 | ||
284 | ret = 0; | 343 | void xt_compat_match_from_user(struct xt_entry_match *m, void **dstptr, |
285 | m = ((struct xt_entry_match *)match)->u.kernel.match; | 344 | int *size) |
286 | off = XT_ALIGN(m->matchsize) - COMPAT_XT_ALIGN(m->matchsize); | 345 | { |
287 | switch (convert) { | 346 | struct xt_match *match = m->u.kernel.match; |
288 | case COMPAT_TO_USER: | 347 | struct compat_xt_entry_match *cm = (struct compat_xt_entry_match *)m; |
289 | pm = (struct xt_entry_match *)match; | 348 | int pad, off = xt_compat_match_offset(match); |
290 | msize = pm->u.user.match_size; | 349 | u_int16_t msize = cm->u.user.match_size; |
291 | if (copy_to_user(*dstptr, pm, msize)) { | 350 | |
292 | ret = -EFAULT; | 351 | m = *dstptr; |
293 | break; | 352 | memcpy(m, cm, sizeof(*cm)); |
294 | } | 353 | if (match->compat_from_user) |
295 | msize -= off; | 354 | match->compat_from_user(m->data, cm->data); |
296 | if (put_user(msize, (u_int16_t *)*dstptr)) | 355 | else |
297 | ret = -EFAULT; | 356 | memcpy(m->data, cm->data, msize - sizeof(*cm)); |
298 | *size -= off; | 357 | pad = XT_ALIGN(match->matchsize) - match->matchsize; |
299 | *dstptr += msize; | 358 | if (pad > 0) |
300 | break; | 359 | memset(m->data + match->matchsize, 0, pad); |
301 | case COMPAT_FROM_USER: | 360 | |
302 | pcompat_m = (struct compat_xt_entry_match *)match; | 361 | msize += off; |
303 | pm = (struct xt_entry_match *)*dstptr; | 362 | m->u.user.match_size = msize; |
304 | msize = pcompat_m->u.user.match_size; | 363 | |
305 | memcpy(pm, pcompat_m, msize); | 364 | *size += off; |
306 | msize += off; | 365 | *dstptr += msize; |
307 | pm->u.user.match_size = msize; | 366 | } |
308 | *size += off; | 367 | EXPORT_SYMBOL_GPL(xt_compat_match_from_user); |
309 | *dstptr += msize; | 368 | |
310 | break; | 369 | int xt_compat_match_to_user(struct xt_entry_match *m, void __user **dstptr, |
311 | case COMPAT_CALC_SIZE: | 370 | int *size) |
312 | *size += off; | 371 | { |
313 | break; | 372 | struct xt_match *match = m->u.kernel.match; |
314 | default: | 373 | struct compat_xt_entry_match __user *cm = *dstptr; |
315 | ret = -ENOPROTOOPT; | 374 | int off = xt_compat_match_offset(match); |
316 | break; | 375 | u_int16_t msize = m->u.user.match_size - off; |
376 | |||
377 | if (copy_to_user(cm, m, sizeof(*cm)) || | ||
378 | put_user(msize, &cm->u.user.match_size)) | ||
379 | return -EFAULT; | ||
380 | |||
381 | if (match->compat_to_user) { | ||
382 | if (match->compat_to_user((void __user *)cm->data, m->data)) | ||
383 | return -EFAULT; | ||
384 | } else { | ||
385 | if (copy_to_user(cm->data, m->data, msize - sizeof(*cm))) | ||
386 | return -EFAULT; | ||
317 | } | 387 | } |
318 | return ret; | 388 | |
389 | *size -= off; | ||
390 | *dstptr += msize; | ||
391 | return 0; | ||
319 | } | 392 | } |
320 | EXPORT_SYMBOL_GPL(xt_compat_match); | 393 | EXPORT_SYMBOL_GPL(xt_compat_match_to_user); |
321 | #endif | 394 | #endif /* CONFIG_COMPAT */ |
322 | 395 | ||
323 | int xt_check_target(const struct xt_target *target, unsigned short family, | 396 | int xt_check_target(const struct xt_target *target, unsigned short family, |
324 | unsigned int size, const char *table, unsigned int hook_mask, | 397 | unsigned int size, const char *table, unsigned int hook_mask, |
@@ -350,51 +423,64 @@ int xt_check_target(const struct xt_target *target, unsigned short family, | |||
350 | EXPORT_SYMBOL_GPL(xt_check_target); | 423 | EXPORT_SYMBOL_GPL(xt_check_target); |
351 | 424 | ||
352 | #ifdef CONFIG_COMPAT | 425 | #ifdef CONFIG_COMPAT |
353 | int xt_compat_target(void *target, void **dstptr, int *size, int convert) | 426 | int xt_compat_target_offset(struct xt_target *target) |
354 | { | 427 | { |
355 | struct xt_target *t; | 428 | u_int16_t csize = target->compatsize ? : target->targetsize; |
356 | struct compat_xt_entry_target *pcompat; | 429 | return XT_ALIGN(target->targetsize) - COMPAT_XT_ALIGN(csize); |
357 | struct xt_entry_target *pt; | 430 | } |
358 | u_int16_t tsize; | 431 | EXPORT_SYMBOL_GPL(xt_compat_target_offset); |
359 | int off, ret; | ||
360 | 432 | ||
361 | ret = 0; | 433 | void xt_compat_target_from_user(struct xt_entry_target *t, void **dstptr, |
362 | t = ((struct xt_entry_target *)target)->u.kernel.target; | 434 | int *size) |
363 | off = XT_ALIGN(t->targetsize) - COMPAT_XT_ALIGN(t->targetsize); | 435 | { |
364 | switch (convert) { | 436 | struct xt_target *target = t->u.kernel.target; |
365 | case COMPAT_TO_USER: | 437 | struct compat_xt_entry_target *ct = (struct compat_xt_entry_target *)t; |
366 | pt = (struct xt_entry_target *)target; | 438 | int pad, off = xt_compat_target_offset(target); |
367 | tsize = pt->u.user.target_size; | 439 | u_int16_t tsize = ct->u.user.target_size; |
368 | if (copy_to_user(*dstptr, pt, tsize)) { | 440 | |
369 | ret = -EFAULT; | 441 | t = *dstptr; |
370 | break; | 442 | memcpy(t, ct, sizeof(*ct)); |
371 | } | 443 | if (target->compat_from_user) |
372 | tsize -= off; | 444 | target->compat_from_user(t->data, ct->data); |
373 | if (put_user(tsize, (u_int16_t *)*dstptr)) | 445 | else |
374 | ret = -EFAULT; | 446 | memcpy(t->data, ct->data, tsize - sizeof(*ct)); |
375 | *size -= off; | 447 | pad = XT_ALIGN(target->targetsize) - target->targetsize; |
376 | *dstptr += tsize; | 448 | if (pad > 0) |
377 | break; | 449 | memset(t->data + target->targetsize, 0, pad); |
378 | case COMPAT_FROM_USER: | 450 | |
379 | pcompat = (struct compat_xt_entry_target *)target; | 451 | tsize += off; |
380 | pt = (struct xt_entry_target *)*dstptr; | 452 | t->u.user.target_size = tsize; |
381 | tsize = pcompat->u.user.target_size; | 453 | |
382 | memcpy(pt, pcompat, tsize); | 454 | *size += off; |
383 | tsize += off; | 455 | *dstptr += tsize; |
384 | pt->u.user.target_size = tsize; | 456 | } |
385 | *size += off; | 457 | EXPORT_SYMBOL_GPL(xt_compat_target_from_user); |
386 | *dstptr += tsize; | 458 | |
387 | break; | 459 | int xt_compat_target_to_user(struct xt_entry_target *t, void __user **dstptr, |
388 | case COMPAT_CALC_SIZE: | 460 | int *size) |
389 | *size += off; | 461 | { |
390 | break; | 462 | struct xt_target *target = t->u.kernel.target; |
391 | default: | 463 | struct compat_xt_entry_target __user *ct = *dstptr; |
392 | ret = -ENOPROTOOPT; | 464 | int off = xt_compat_target_offset(target); |
393 | break; | 465 | u_int16_t tsize = t->u.user.target_size - off; |
466 | |||
467 | if (copy_to_user(ct, t, sizeof(*ct)) || | ||
468 | put_user(tsize, &ct->u.user.target_size)) | ||
469 | return -EFAULT; | ||
470 | |||
471 | if (target->compat_to_user) { | ||
472 | if (target->compat_to_user((void __user *)ct->data, t->data)) | ||
473 | return -EFAULT; | ||
474 | } else { | ||
475 | if (copy_to_user(ct->data, t->data, tsize - sizeof(*ct))) | ||
476 | return -EFAULT; | ||
394 | } | 477 | } |
395 | return ret; | 478 | |
479 | *size -= off; | ||
480 | *dstptr += tsize; | ||
481 | return 0; | ||
396 | } | 482 | } |
397 | EXPORT_SYMBOL_GPL(xt_compat_target); | 483 | EXPORT_SYMBOL_GPL(xt_compat_target_to_user); |
398 | #endif | 484 | #endif |
399 | 485 | ||
400 | struct xt_table_info *xt_alloc_table_info(unsigned int size) | 486 | struct xt_table_info *xt_alloc_table_info(unsigned int size) |
@@ -515,15 +601,18 @@ int xt_register_table(struct xt_table *table, | |||
515 | { | 601 | { |
516 | int ret; | 602 | int ret; |
517 | struct xt_table_info *private; | 603 | struct xt_table_info *private; |
604 | struct xt_table *t; | ||
518 | 605 | ||
519 | ret = mutex_lock_interruptible(&xt[table->af].mutex); | 606 | ret = mutex_lock_interruptible(&xt[table->af].mutex); |
520 | if (ret != 0) | 607 | if (ret != 0) |
521 | return ret; | 608 | return ret; |
522 | 609 | ||
523 | /* Don't autoload: we'd eat our tail... */ | 610 | /* Don't autoload: we'd eat our tail... */ |
524 | if (list_named_find(&xt[table->af].tables, table->name)) { | 611 | list_for_each_entry(t, &xt[table->af].tables, list) { |
525 | ret = -EEXIST; | 612 | if (strcmp(t->name, table->name) == 0) { |
526 | goto unlock; | 613 | ret = -EEXIST; |
614 | goto unlock; | ||
615 | } | ||
527 | } | 616 | } |
528 | 617 | ||
529 | /* Simplifies replace_table code. */ | 618 | /* Simplifies replace_table code. */ |
@@ -538,7 +627,7 @@ int xt_register_table(struct xt_table *table, | |||
538 | /* save number of initial entries */ | 627 | /* save number of initial entries */ |
539 | private->initial_entries = private->number; | 628 | private->initial_entries = private->number; |
540 | 629 | ||
541 | list_prepend(&xt[table->af].tables, table); | 630 | list_add(&table->list, &xt[table->af].tables); |
542 | 631 | ||
543 | ret = 0; | 632 | ret = 0; |
544 | unlock: | 633 | unlock: |
@@ -553,7 +642,7 @@ void *xt_unregister_table(struct xt_table *table) | |||
553 | 642 | ||
554 | mutex_lock(&xt[table->af].mutex); | 643 | mutex_lock(&xt[table->af].mutex); |
555 | private = table->private; | 644 | private = table->private; |
556 | LIST_DELETE(&xt[table->af].tables, table); | 645 | list_del(&table->list); |
557 | mutex_unlock(&xt[table->af].mutex); | 646 | mutex_unlock(&xt[table->af].mutex); |
558 | 647 | ||
559 | return private; | 648 | return private; |
diff --git a/net/netfilter/xt_CLASSIFY.c b/net/netfilter/xt_CLASSIFY.c index e54e57730012..50de965bb104 100644 --- a/net/netfilter/xt_CLASSIFY.c +++ b/net/netfilter/xt_CLASSIFY.c | |||
@@ -29,8 +29,7 @@ target(struct sk_buff **pskb, | |||
29 | const struct net_device *out, | 29 | const struct net_device *out, |
30 | unsigned int hooknum, | 30 | unsigned int hooknum, |
31 | const struct xt_target *target, | 31 | const struct xt_target *target, |
32 | const void *targinfo, | 32 | const void *targinfo) |
33 | void *userinfo) | ||
34 | { | 33 | { |
35 | const struct xt_classify_target_info *clinfo = targinfo; | 34 | const struct xt_classify_target_info *clinfo = targinfo; |
36 | 35 | ||
@@ -40,47 +39,41 @@ target(struct sk_buff **pskb, | |||
40 | return XT_CONTINUE; | 39 | return XT_CONTINUE; |
41 | } | 40 | } |
42 | 41 | ||
43 | static struct xt_target classify_reg = { | 42 | static struct xt_target xt_classify_target[] = { |
44 | .name = "CLASSIFY", | 43 | { |
45 | .target = target, | 44 | .family = AF_INET, |
46 | .targetsize = sizeof(struct xt_classify_target_info), | 45 | .name = "CLASSIFY", |
47 | .table = "mangle", | 46 | .target = target, |
48 | .hooks = (1 << NF_IP_LOCAL_OUT) | (1 << NF_IP_FORWARD) | | 47 | .targetsize = sizeof(struct xt_classify_target_info), |
49 | (1 << NF_IP_POST_ROUTING), | 48 | .table = "mangle", |
50 | .family = AF_INET, | 49 | .hooks = (1 << NF_IP_LOCAL_OUT) | |
51 | .me = THIS_MODULE, | 50 | (1 << NF_IP_FORWARD) | |
51 | (1 << NF_IP_POST_ROUTING), | ||
52 | .me = THIS_MODULE, | ||
53 | }, | ||
54 | { | ||
55 | .name = "CLASSIFY", | ||
56 | .family = AF_INET6, | ||
57 | .target = target, | ||
58 | .targetsize = sizeof(struct xt_classify_target_info), | ||
59 | .table = "mangle", | ||
60 | .hooks = (1 << NF_IP_LOCAL_OUT) | | ||
61 | (1 << NF_IP_FORWARD) | | ||
62 | (1 << NF_IP_POST_ROUTING), | ||
63 | .me = THIS_MODULE, | ||
64 | }, | ||
52 | }; | 65 | }; |
53 | static struct xt_target classify6_reg = { | ||
54 | .name = "CLASSIFY", | ||
55 | .target = target, | ||
56 | .targetsize = sizeof(struct xt_classify_target_info), | ||
57 | .table = "mangle", | ||
58 | .hooks = (1 << NF_IP_LOCAL_OUT) | (1 << NF_IP_FORWARD) | | ||
59 | (1 << NF_IP_POST_ROUTING), | ||
60 | .family = AF_INET6, | ||
61 | .me = THIS_MODULE, | ||
62 | }; | ||
63 | |||
64 | 66 | ||
65 | static int __init xt_classify_init(void) | 67 | static int __init xt_classify_init(void) |
66 | { | 68 | { |
67 | int ret; | 69 | return xt_register_targets(xt_classify_target, |
68 | 70 | ARRAY_SIZE(xt_classify_target)); | |
69 | ret = xt_register_target(&classify_reg); | ||
70 | if (ret) | ||
71 | return ret; | ||
72 | |||
73 | ret = xt_register_target(&classify6_reg); | ||
74 | if (ret) | ||
75 | xt_unregister_target(&classify_reg); | ||
76 | |||
77 | return ret; | ||
78 | } | 71 | } |
79 | 72 | ||
80 | static void __exit xt_classify_fini(void) | 73 | static void __exit xt_classify_fini(void) |
81 | { | 74 | { |
82 | xt_unregister_target(&classify_reg); | 75 | xt_unregister_targets(xt_classify_target, |
83 | xt_unregister_target(&classify6_reg); | 76 | ARRAY_SIZE(xt_classify_target)); |
84 | } | 77 | } |
85 | 78 | ||
86 | module_init(xt_classify_init); | 79 | module_init(xt_classify_init); |
diff --git a/net/netfilter/xt_CONNMARK.c b/net/netfilter/xt_CONNMARK.c index 60c375d36f01..c01524f817f0 100644 --- a/net/netfilter/xt_CONNMARK.c +++ b/net/netfilter/xt_CONNMARK.c | |||
@@ -38,8 +38,7 @@ target(struct sk_buff **pskb, | |||
38 | const struct net_device *out, | 38 | const struct net_device *out, |
39 | unsigned int hooknum, | 39 | unsigned int hooknum, |
40 | const struct xt_target *target, | 40 | const struct xt_target *target, |
41 | const void *targinfo, | 41 | const void *targinfo) |
42 | void *userinfo) | ||
43 | { | 42 | { |
44 | const struct xt_connmark_target_info *markinfo = targinfo; | 43 | const struct xt_connmark_target_info *markinfo = targinfo; |
45 | u_int32_t diff; | 44 | u_int32_t diff; |
@@ -49,24 +48,37 @@ target(struct sk_buff **pskb, | |||
49 | u_int32_t *ctmark = nf_ct_get_mark(*pskb, &ctinfo); | 48 | u_int32_t *ctmark = nf_ct_get_mark(*pskb, &ctinfo); |
50 | 49 | ||
51 | if (ctmark) { | 50 | if (ctmark) { |
52 | switch(markinfo->mode) { | 51 | switch(markinfo->mode) { |
53 | case XT_CONNMARK_SET: | 52 | case XT_CONNMARK_SET: |
54 | newmark = (*ctmark & ~markinfo->mask) | markinfo->mark; | 53 | newmark = (*ctmark & ~markinfo->mask) | markinfo->mark; |
55 | if (newmark != *ctmark) | 54 | if (newmark != *ctmark) { |
56 | *ctmark = newmark; | 55 | *ctmark = newmark; |
57 | break; | 56 | #if defined(CONFIG_IP_NF_CONNTRACK) || defined(CONFIG_IP_NF_CONNTRACK_MODULE) |
58 | case XT_CONNMARK_SAVE: | 57 | ip_conntrack_event_cache(IPCT_MARK, *pskb); |
59 | newmark = (*ctmark & ~markinfo->mask) | ((*pskb)->nfmark & markinfo->mask); | 58 | #else |
60 | if (*ctmark != newmark) | 59 | nf_conntrack_event_cache(IPCT_MARK, *pskb); |
61 | *ctmark = newmark; | 60 | #endif |
62 | break; | 61 | } |
63 | case XT_CONNMARK_RESTORE: | 62 | break; |
64 | nfmark = (*pskb)->nfmark; | 63 | case XT_CONNMARK_SAVE: |
65 | diff = (*ctmark ^ nfmark) & markinfo->mask; | 64 | newmark = (*ctmark & ~markinfo->mask) | |
66 | if (diff != 0) | 65 | ((*pskb)->nfmark & markinfo->mask); |
67 | (*pskb)->nfmark = nfmark ^ diff; | 66 | if (*ctmark != newmark) { |
68 | break; | 67 | *ctmark = newmark; |
69 | } | 68 | #if defined(CONFIG_IP_NF_CONNTRACK) || defined(CONFIG_IP_NF_CONNTRACK_MODULE) |
69 | ip_conntrack_event_cache(IPCT_MARK, *pskb); | ||
70 | #else | ||
71 | nf_conntrack_event_cache(IPCT_MARK, *pskb); | ||
72 | #endif | ||
73 | } | ||
74 | break; | ||
75 | case XT_CONNMARK_RESTORE: | ||
76 | nfmark = (*pskb)->nfmark; | ||
77 | diff = (*ctmark ^ nfmark) & markinfo->mask; | ||
78 | if (diff != 0) | ||
79 | (*pskb)->nfmark = nfmark ^ diff; | ||
80 | break; | ||
81 | } | ||
70 | } | 82 | } |
71 | 83 | ||
72 | return XT_CONTINUE; | 84 | return XT_CONTINUE; |
@@ -77,65 +89,91 @@ checkentry(const char *tablename, | |||
77 | const void *entry, | 89 | const void *entry, |
78 | const struct xt_target *target, | 90 | const struct xt_target *target, |
79 | void *targinfo, | 91 | void *targinfo, |
80 | unsigned int targinfosize, | ||
81 | unsigned int hook_mask) | 92 | unsigned int hook_mask) |
82 | { | 93 | { |
83 | struct xt_connmark_target_info *matchinfo = targinfo; | 94 | struct xt_connmark_target_info *matchinfo = targinfo; |
84 | 95 | ||
85 | if (matchinfo->mode == XT_CONNMARK_RESTORE) { | 96 | if (matchinfo->mode == XT_CONNMARK_RESTORE) { |
86 | if (strcmp(tablename, "mangle") != 0) { | 97 | if (strcmp(tablename, "mangle") != 0) { |
87 | printk(KERN_WARNING "CONNMARK: restore can only be called from \"mangle\" table, not \"%s\"\n", tablename); | 98 | printk(KERN_WARNING "CONNMARK: restore can only be " |
88 | return 0; | 99 | "called from \"mangle\" table, not \"%s\"\n", |
89 | } | 100 | tablename); |
101 | return 0; | ||
102 | } | ||
90 | } | 103 | } |
91 | |||
92 | if (matchinfo->mark > 0xffffffff || matchinfo->mask > 0xffffffff) { | 104 | if (matchinfo->mark > 0xffffffff || matchinfo->mask > 0xffffffff) { |
93 | printk(KERN_WARNING "CONNMARK: Only supports 32bit mark\n"); | 105 | printk(KERN_WARNING "CONNMARK: Only supports 32bit mark\n"); |
94 | return 0; | 106 | return 0; |
95 | } | 107 | } |
96 | |||
97 | return 1; | 108 | return 1; |
98 | } | 109 | } |
99 | 110 | ||
100 | static struct xt_target connmark_reg = { | 111 | #ifdef CONFIG_COMPAT |
101 | .name = "CONNMARK", | 112 | struct compat_xt_connmark_target_info { |
102 | .target = target, | 113 | compat_ulong_t mark, mask; |
103 | .targetsize = sizeof(struct xt_connmark_target_info), | 114 | u_int8_t mode; |
104 | .checkentry = checkentry, | 115 | u_int8_t __pad1; |
105 | .family = AF_INET, | 116 | u_int16_t __pad2; |
106 | .me = THIS_MODULE | ||
107 | }; | 117 | }; |
108 | 118 | ||
109 | static struct xt_target connmark6_reg = { | 119 | static void compat_from_user(void *dst, void *src) |
110 | .name = "CONNMARK", | 120 | { |
111 | .target = target, | 121 | struct compat_xt_connmark_target_info *cm = src; |
112 | .targetsize = sizeof(struct xt_connmark_target_info), | 122 | struct xt_connmark_target_info m = { |
113 | .checkentry = checkentry, | 123 | .mark = cm->mark, |
114 | .family = AF_INET6, | 124 | .mask = cm->mask, |
115 | .me = THIS_MODULE | 125 | .mode = cm->mode, |
126 | }; | ||
127 | memcpy(dst, &m, sizeof(m)); | ||
128 | } | ||
129 | |||
130 | static int compat_to_user(void __user *dst, void *src) | ||
131 | { | ||
132 | struct xt_connmark_target_info *m = src; | ||
133 | struct compat_xt_connmark_target_info cm = { | ||
134 | .mark = m->mark, | ||
135 | .mask = m->mask, | ||
136 | .mode = m->mode, | ||
137 | }; | ||
138 | return copy_to_user(dst, &cm, sizeof(cm)) ? -EFAULT : 0; | ||
139 | } | ||
140 | #endif /* CONFIG_COMPAT */ | ||
141 | |||
142 | static struct xt_target xt_connmark_target[] = { | ||
143 | { | ||
144 | .name = "CONNMARK", | ||
145 | .family = AF_INET, | ||
146 | .checkentry = checkentry, | ||
147 | .target = target, | ||
148 | .targetsize = sizeof(struct xt_connmark_target_info), | ||
149 | #ifdef CONFIG_COMPAT | ||
150 | .compatsize = sizeof(struct compat_xt_connmark_target_info), | ||
151 | .compat_from_user = compat_from_user, | ||
152 | .compat_to_user = compat_to_user, | ||
153 | #endif | ||
154 | .me = THIS_MODULE | ||
155 | }, | ||
156 | { | ||
157 | .name = "CONNMARK", | ||
158 | .family = AF_INET6, | ||
159 | .checkentry = checkentry, | ||
160 | .target = target, | ||
161 | .targetsize = sizeof(struct xt_connmark_target_info), | ||
162 | .me = THIS_MODULE | ||
163 | }, | ||
116 | }; | 164 | }; |
117 | 165 | ||
118 | static int __init xt_connmark_init(void) | 166 | static int __init xt_connmark_init(void) |
119 | { | 167 | { |
120 | int ret; | ||
121 | |||
122 | need_conntrack(); | 168 | need_conntrack(); |
123 | 169 | return xt_register_targets(xt_connmark_target, | |
124 | ret = xt_register_target(&connmark_reg); | 170 | ARRAY_SIZE(xt_connmark_target)); |
125 | if (ret) | ||
126 | return ret; | ||
127 | |||
128 | ret = xt_register_target(&connmark6_reg); | ||
129 | if (ret) | ||
130 | xt_unregister_target(&connmark_reg); | ||
131 | |||
132 | return ret; | ||
133 | } | 171 | } |
134 | 172 | ||
135 | static void __exit xt_connmark_fini(void) | 173 | static void __exit xt_connmark_fini(void) |
136 | { | 174 | { |
137 | xt_unregister_target(&connmark_reg); | 175 | xt_unregister_targets(xt_connmark_target, |
138 | xt_unregister_target(&connmark6_reg); | 176 | ARRAY_SIZE(xt_connmark_target)); |
139 | } | 177 | } |
140 | 178 | ||
141 | module_init(xt_connmark_init); | 179 | module_init(xt_connmark_init); |
diff --git a/net/netfilter/xt_CONNSECMARK.c b/net/netfilter/xt_CONNSECMARK.c index 8c011e020769..467386266674 100644 --- a/net/netfilter/xt_CONNSECMARK.c +++ b/net/netfilter/xt_CONNSECMARK.c | |||
@@ -66,7 +66,7 @@ static void secmark_restore(struct sk_buff *skb) | |||
66 | static unsigned int target(struct sk_buff **pskb, const struct net_device *in, | 66 | static unsigned int target(struct sk_buff **pskb, const struct net_device *in, |
67 | const struct net_device *out, unsigned int hooknum, | 67 | const struct net_device *out, unsigned int hooknum, |
68 | const struct xt_target *target, | 68 | const struct xt_target *target, |
69 | const void *targinfo, void *userinfo) | 69 | const void *targinfo) |
70 | { | 70 | { |
71 | struct sk_buff *skb = *pskb; | 71 | struct sk_buff *skb = *pskb; |
72 | const struct xt_connsecmark_target_info *info = targinfo; | 72 | const struct xt_connsecmark_target_info *info = targinfo; |
@@ -89,7 +89,7 @@ static unsigned int target(struct sk_buff **pskb, const struct net_device *in, | |||
89 | 89 | ||
90 | static int checkentry(const char *tablename, const void *entry, | 90 | static int checkentry(const char *tablename, const void *entry, |
91 | const struct xt_target *target, void *targinfo, | 91 | const struct xt_target *target, void *targinfo, |
92 | unsigned int targinfosize, unsigned int hook_mask) | 92 | unsigned int hook_mask) |
93 | { | 93 | { |
94 | struct xt_connsecmark_target_info *info = targinfo; | 94 | struct xt_connsecmark_target_info *info = targinfo; |
95 | 95 | ||
@@ -106,49 +106,38 @@ static int checkentry(const char *tablename, const void *entry, | |||
106 | return 1; | 106 | return 1; |
107 | } | 107 | } |
108 | 108 | ||
109 | static struct xt_target ipt_connsecmark_reg = { | 109 | static struct xt_target xt_connsecmark_target[] = { |
110 | .name = "CONNSECMARK", | 110 | { |
111 | .target = target, | 111 | .name = "CONNSECMARK", |
112 | .targetsize = sizeof(struct xt_connsecmark_target_info), | 112 | .family = AF_INET, |
113 | .table = "mangle", | 113 | .checkentry = checkentry, |
114 | .checkentry = checkentry, | 114 | .target = target, |
115 | .me = THIS_MODULE, | 115 | .targetsize = sizeof(struct xt_connsecmark_target_info), |
116 | .family = AF_INET, | 116 | .table = "mangle", |
117 | .revision = 0, | 117 | .me = THIS_MODULE, |
118 | }; | 118 | }, |
119 | 119 | { | |
120 | static struct xt_target ip6t_connsecmark_reg = { | 120 | .name = "CONNSECMARK", |
121 | .name = "CONNSECMARK", | 121 | .family = AF_INET6, |
122 | .target = target, | 122 | .checkentry = checkentry, |
123 | .targetsize = sizeof(struct xt_connsecmark_target_info), | 123 | .target = target, |
124 | .table = "mangle", | 124 | .targetsize = sizeof(struct xt_connsecmark_target_info), |
125 | .checkentry = checkentry, | 125 | .table = "mangle", |
126 | .me = THIS_MODULE, | 126 | .me = THIS_MODULE, |
127 | .family = AF_INET6, | 127 | }, |
128 | .revision = 0, | ||
129 | }; | 128 | }; |
130 | 129 | ||
131 | static int __init xt_connsecmark_init(void) | 130 | static int __init xt_connsecmark_init(void) |
132 | { | 131 | { |
133 | int err; | ||
134 | |||
135 | need_conntrack(); | 132 | need_conntrack(); |
136 | 133 | return xt_register_targets(xt_connsecmark_target, | |
137 | err = xt_register_target(&ipt_connsecmark_reg); | 134 | ARRAY_SIZE(xt_connsecmark_target)); |
138 | if (err) | ||
139 | return err; | ||
140 | |||
141 | err = xt_register_target(&ip6t_connsecmark_reg); | ||
142 | if (err) | ||
143 | xt_unregister_target(&ipt_connsecmark_reg); | ||
144 | |||
145 | return err; | ||
146 | } | 135 | } |
147 | 136 | ||
148 | static void __exit xt_connsecmark_fini(void) | 137 | static void __exit xt_connsecmark_fini(void) |
149 | { | 138 | { |
150 | xt_unregister_target(&ip6t_connsecmark_reg); | 139 | xt_unregister_targets(xt_connsecmark_target, |
151 | xt_unregister_target(&ipt_connsecmark_reg); | 140 | ARRAY_SIZE(xt_connsecmark_target)); |
152 | } | 141 | } |
153 | 142 | ||
154 | module_init(xt_connsecmark_init); | 143 | module_init(xt_connsecmark_init); |
diff --git a/net/netfilter/xt_DSCP.c b/net/netfilter/xt_DSCP.c new file mode 100644 index 000000000000..a7cc75aeb38d --- /dev/null +++ b/net/netfilter/xt_DSCP.c | |||
@@ -0,0 +1,118 @@ | |||
1 | /* x_tables module for setting the IPv4/IPv6 DSCP field, Version 1.8 | ||
2 | * | ||
3 | * (C) 2002 by Harald Welte <laforge@netfilter.org> | ||
4 | * based on ipt_FTOS.c (C) 2000 by Matthew G. Marsh <mgm@paktronix.com> | ||
5 | * | ||
6 | * This program is free software; you can redistribute it and/or modify | ||
7 | * it under the terms of the GNU General Public License version 2 as | ||
8 | * published by the Free Software Foundation. | ||
9 | * | ||
10 | * See RFC2474 for a description of the DSCP field within the IP Header. | ||
11 | * | ||
12 | * xt_DSCP.c,v 1.8 2002/08/06 18:41:57 laforge Exp | ||
13 | */ | ||
14 | |||
15 | #include <linux/module.h> | ||
16 | #include <linux/skbuff.h> | ||
17 | #include <linux/ip.h> | ||
18 | #include <linux/ipv6.h> | ||
19 | #include <net/dsfield.h> | ||
20 | |||
21 | #include <linux/netfilter/x_tables.h> | ||
22 | #include <linux/netfilter/xt_DSCP.h> | ||
23 | |||
24 | MODULE_AUTHOR("Harald Welte <laforge@netfilter.org>"); | ||
25 | MODULE_DESCRIPTION("x_tables DSCP modification module"); | ||
26 | MODULE_LICENSE("GPL"); | ||
27 | MODULE_ALIAS("ipt_DSCP"); | ||
28 | MODULE_ALIAS("ip6t_DSCP"); | ||
29 | |||
30 | static unsigned int target(struct sk_buff **pskb, | ||
31 | const struct net_device *in, | ||
32 | const struct net_device *out, | ||
33 | unsigned int hooknum, | ||
34 | const struct xt_target *target, | ||
35 | const void *targinfo) | ||
36 | { | ||
37 | const struct xt_DSCP_info *dinfo = targinfo; | ||
38 | u_int8_t dscp = ipv4_get_dsfield((*pskb)->nh.iph) >> XT_DSCP_SHIFT; | ||
39 | |||
40 | if (dscp != dinfo->dscp) { | ||
41 | if (!skb_make_writable(pskb, sizeof(struct iphdr))) | ||
42 | return NF_DROP; | ||
43 | |||
44 | ipv4_change_dsfield((*pskb)->nh.iph, (__u8)(~XT_DSCP_MASK), | ||
45 | dinfo->dscp << XT_DSCP_SHIFT); | ||
46 | |||
47 | } | ||
48 | return XT_CONTINUE; | ||
49 | } | ||
50 | |||
51 | static unsigned int target6(struct sk_buff **pskb, | ||
52 | const struct net_device *in, | ||
53 | const struct net_device *out, | ||
54 | unsigned int hooknum, | ||
55 | const struct xt_target *target, | ||
56 | const void *targinfo) | ||
57 | { | ||
58 | const struct xt_DSCP_info *dinfo = targinfo; | ||
59 | u_int8_t dscp = ipv6_get_dsfield((*pskb)->nh.ipv6h) >> XT_DSCP_SHIFT; | ||
60 | |||
61 | if (dscp != dinfo->dscp) { | ||
62 | if (!skb_make_writable(pskb, sizeof(struct ipv6hdr))) | ||
63 | return NF_DROP; | ||
64 | |||
65 | ipv6_change_dsfield((*pskb)->nh.ipv6h, (__u8)(~XT_DSCP_MASK), | ||
66 | dinfo->dscp << XT_DSCP_SHIFT); | ||
67 | } | ||
68 | return XT_CONTINUE; | ||
69 | } | ||
70 | |||
71 | static int checkentry(const char *tablename, | ||
72 | const void *e_void, | ||
73 | const struct xt_target *target, | ||
74 | void *targinfo, | ||
75 | unsigned int hook_mask) | ||
76 | { | ||
77 | const u_int8_t dscp = ((struct xt_DSCP_info *)targinfo)->dscp; | ||
78 | |||
79 | if ((dscp > XT_DSCP_MAX)) { | ||
80 | printk(KERN_WARNING "DSCP: dscp %x out of range\n", dscp); | ||
81 | return 0; | ||
82 | } | ||
83 | return 1; | ||
84 | } | ||
85 | |||
86 | static struct xt_target xt_dscp_target[] = { | ||
87 | { | ||
88 | .name = "DSCP", | ||
89 | .family = AF_INET, | ||
90 | .checkentry = checkentry, | ||
91 | .target = target, | ||
92 | .targetsize = sizeof(struct xt_DSCP_info), | ||
93 | .table = "mangle", | ||
94 | .me = THIS_MODULE, | ||
95 | }, | ||
96 | { | ||
97 | .name = "DSCP", | ||
98 | .family = AF_INET6, | ||
99 | .checkentry = checkentry, | ||
100 | .target = target6, | ||
101 | .targetsize = sizeof(struct xt_DSCP_info), | ||
102 | .table = "mangle", | ||
103 | .me = THIS_MODULE, | ||
104 | }, | ||
105 | }; | ||
106 | |||
107 | static int __init xt_dscp_target_init(void) | ||
108 | { | ||
109 | return xt_register_targets(xt_dscp_target, ARRAY_SIZE(xt_dscp_target)); | ||
110 | } | ||
111 | |||
112 | static void __exit xt_dscp_target_fini(void) | ||
113 | { | ||
114 | xt_unregister_targets(xt_dscp_target, ARRAY_SIZE(xt_dscp_target)); | ||
115 | } | ||
116 | |||
117 | module_init(xt_dscp_target_init); | ||
118 | module_exit(xt_dscp_target_fini); | ||
diff --git a/net/netfilter/xt_MARK.c b/net/netfilter/xt_MARK.c index ee9c34edc76c..c6e860a7114f 100644 --- a/net/netfilter/xt_MARK.c +++ b/net/netfilter/xt_MARK.c | |||
@@ -27,8 +27,7 @@ target_v0(struct sk_buff **pskb, | |||
27 | const struct net_device *out, | 27 | const struct net_device *out, |
28 | unsigned int hooknum, | 28 | unsigned int hooknum, |
29 | const struct xt_target *target, | 29 | const struct xt_target *target, |
30 | const void *targinfo, | 30 | const void *targinfo) |
31 | void *userinfo) | ||
32 | { | 31 | { |
33 | const struct xt_mark_target_info *markinfo = targinfo; | 32 | const struct xt_mark_target_info *markinfo = targinfo; |
34 | 33 | ||
@@ -44,8 +43,7 @@ target_v1(struct sk_buff **pskb, | |||
44 | const struct net_device *out, | 43 | const struct net_device *out, |
45 | unsigned int hooknum, | 44 | unsigned int hooknum, |
46 | const struct xt_target *target, | 45 | const struct xt_target *target, |
47 | const void *targinfo, | 46 | const void *targinfo) |
48 | void *userinfo) | ||
49 | { | 47 | { |
50 | const struct xt_mark_target_info_v1 *markinfo = targinfo; | 48 | const struct xt_mark_target_info_v1 *markinfo = targinfo; |
51 | int mark = 0; | 49 | int mark = 0; |
@@ -76,7 +74,6 @@ checkentry_v0(const char *tablename, | |||
76 | const void *entry, | 74 | const void *entry, |
77 | const struct xt_target *target, | 75 | const struct xt_target *target, |
78 | void *targinfo, | 76 | void *targinfo, |
79 | unsigned int targinfosize, | ||
80 | unsigned int hook_mask) | 77 | unsigned int hook_mask) |
81 | { | 78 | { |
82 | struct xt_mark_target_info *markinfo = targinfo; | 79 | struct xt_mark_target_info *markinfo = targinfo; |
@@ -93,7 +90,6 @@ checkentry_v1(const char *tablename, | |||
93 | const void *entry, | 90 | const void *entry, |
94 | const struct xt_target *target, | 91 | const struct xt_target *target, |
95 | void *targinfo, | 92 | void *targinfo, |
96 | unsigned int targinfosize, | ||
97 | unsigned int hook_mask) | 93 | unsigned int hook_mask) |
98 | { | 94 | { |
99 | struct xt_mark_target_info_v1 *markinfo = targinfo; | 95 | struct xt_mark_target_info_v1 *markinfo = targinfo; |
@@ -112,65 +108,81 @@ checkentry_v1(const char *tablename, | |||
112 | return 1; | 108 | return 1; |
113 | } | 109 | } |
114 | 110 | ||
115 | static struct xt_target ipt_mark_reg_v0 = { | 111 | #ifdef CONFIG_COMPAT |
116 | .name = "MARK", | 112 | struct compat_xt_mark_target_info_v1 { |
117 | .target = target_v0, | 113 | compat_ulong_t mark; |
118 | .targetsize = sizeof(struct xt_mark_target_info), | 114 | u_int8_t mode; |
119 | .table = "mangle", | 115 | u_int8_t __pad1; |
120 | .checkentry = checkentry_v0, | 116 | u_int16_t __pad2; |
121 | .me = THIS_MODULE, | ||
122 | .family = AF_INET, | ||
123 | .revision = 0, | ||
124 | }; | 117 | }; |
125 | 118 | ||
126 | static struct xt_target ipt_mark_reg_v1 = { | 119 | static void compat_from_user_v1(void *dst, void *src) |
127 | .name = "MARK", | 120 | { |
128 | .target = target_v1, | 121 | struct compat_xt_mark_target_info_v1 *cm = src; |
129 | .targetsize = sizeof(struct xt_mark_target_info_v1), | 122 | struct xt_mark_target_info_v1 m = { |
130 | .table = "mangle", | 123 | .mark = cm->mark, |
131 | .checkentry = checkentry_v1, | 124 | .mode = cm->mode, |
132 | .me = THIS_MODULE, | 125 | }; |
133 | .family = AF_INET, | 126 | memcpy(dst, &m, sizeof(m)); |
134 | .revision = 1, | 127 | } |
135 | }; | ||
136 | 128 | ||
137 | static struct xt_target ip6t_mark_reg_v0 = { | 129 | static int compat_to_user_v1(void __user *dst, void *src) |
138 | .name = "MARK", | 130 | { |
139 | .target = target_v0, | 131 | struct xt_mark_target_info_v1 *m = src; |
140 | .targetsize = sizeof(struct xt_mark_target_info), | 132 | struct compat_xt_mark_target_info_v1 cm = { |
141 | .table = "mangle", | 133 | .mark = m->mark, |
142 | .checkentry = checkentry_v0, | 134 | .mode = m->mode, |
143 | .me = THIS_MODULE, | 135 | }; |
144 | .family = AF_INET6, | 136 | return copy_to_user(dst, &cm, sizeof(cm)) ? -EFAULT : 0; |
145 | .revision = 0, | 137 | } |
138 | #endif /* CONFIG_COMPAT */ | ||
139 | |||
140 | static struct xt_target xt_mark_target[] = { | ||
141 | { | ||
142 | .name = "MARK", | ||
143 | .family = AF_INET, | ||
144 | .revision = 0, | ||
145 | .checkentry = checkentry_v0, | ||
146 | .target = target_v0, | ||
147 | .targetsize = sizeof(struct xt_mark_target_info), | ||
148 | .table = "mangle", | ||
149 | .me = THIS_MODULE, | ||
150 | }, | ||
151 | { | ||
152 | .name = "MARK", | ||
153 | .family = AF_INET, | ||
154 | .revision = 1, | ||
155 | .checkentry = checkentry_v1, | ||
156 | .target = target_v1, | ||
157 | .targetsize = sizeof(struct xt_mark_target_info_v1), | ||
158 | #ifdef CONFIG_COMPAT | ||
159 | .compatsize = sizeof(struct compat_xt_mark_target_info_v1), | ||
160 | .compat_from_user = compat_from_user_v1, | ||
161 | .compat_to_user = compat_to_user_v1, | ||
162 | #endif | ||
163 | .table = "mangle", | ||
164 | .me = THIS_MODULE, | ||
165 | }, | ||
166 | { | ||
167 | .name = "MARK", | ||
168 | .family = AF_INET6, | ||
169 | .revision = 0, | ||
170 | .checkentry = checkentry_v0, | ||
171 | .target = target_v0, | ||
172 | .targetsize = sizeof(struct xt_mark_target_info), | ||
173 | .table = "mangle", | ||
174 | .me = THIS_MODULE, | ||
175 | }, | ||
146 | }; | 176 | }; |
147 | 177 | ||
148 | static int __init xt_mark_init(void) | 178 | static int __init xt_mark_init(void) |
149 | { | 179 | { |
150 | int err; | 180 | return xt_register_targets(xt_mark_target, ARRAY_SIZE(xt_mark_target)); |
151 | |||
152 | err = xt_register_target(&ipt_mark_reg_v0); | ||
153 | if (err) | ||
154 | return err; | ||
155 | |||
156 | err = xt_register_target(&ipt_mark_reg_v1); | ||
157 | if (err) | ||
158 | xt_unregister_target(&ipt_mark_reg_v0); | ||
159 | |||
160 | err = xt_register_target(&ip6t_mark_reg_v0); | ||
161 | if (err) { | ||
162 | xt_unregister_target(&ipt_mark_reg_v0); | ||
163 | xt_unregister_target(&ipt_mark_reg_v1); | ||
164 | } | ||
165 | |||
166 | return err; | ||
167 | } | 181 | } |
168 | 182 | ||
169 | static void __exit xt_mark_fini(void) | 183 | static void __exit xt_mark_fini(void) |
170 | { | 184 | { |
171 | xt_unregister_target(&ipt_mark_reg_v0); | 185 | xt_unregister_targets(xt_mark_target, ARRAY_SIZE(xt_mark_target)); |
172 | xt_unregister_target(&ipt_mark_reg_v1); | ||
173 | xt_unregister_target(&ip6t_mark_reg_v0); | ||
174 | } | 186 | } |
175 | 187 | ||
176 | module_init(xt_mark_init); | 188 | module_init(xt_mark_init); |
diff --git a/net/netfilter/xt_NFQUEUE.c b/net/netfilter/xt_NFQUEUE.c index 86ccceb61fdd..db9b896e57c8 100644 --- a/net/netfilter/xt_NFQUEUE.c +++ b/net/netfilter/xt_NFQUEUE.c | |||
@@ -29,65 +29,46 @@ target(struct sk_buff **pskb, | |||
29 | const struct net_device *out, | 29 | const struct net_device *out, |
30 | unsigned int hooknum, | 30 | unsigned int hooknum, |
31 | const struct xt_target *target, | 31 | const struct xt_target *target, |
32 | const void *targinfo, | 32 | const void *targinfo) |
33 | void *userinfo) | ||
34 | { | 33 | { |
35 | const struct xt_NFQ_info *tinfo = targinfo; | 34 | const struct xt_NFQ_info *tinfo = targinfo; |
36 | 35 | ||
37 | return NF_QUEUE_NR(tinfo->queuenum); | 36 | return NF_QUEUE_NR(tinfo->queuenum); |
38 | } | 37 | } |
39 | 38 | ||
40 | static struct xt_target ipt_NFQ_reg = { | 39 | static struct xt_target xt_nfqueue_target[] = { |
41 | .name = "NFQUEUE", | 40 | { |
42 | .target = target, | 41 | .name = "NFQUEUE", |
43 | .targetsize = sizeof(struct xt_NFQ_info), | 42 | .family = AF_INET, |
44 | .family = AF_INET, | 43 | .target = target, |
45 | .me = THIS_MODULE, | 44 | .targetsize = sizeof(struct xt_NFQ_info), |
46 | }; | 45 | .me = THIS_MODULE, |
47 | 46 | }, | |
48 | static struct xt_target ip6t_NFQ_reg = { | 47 | { |
49 | .name = "NFQUEUE", | 48 | .name = "NFQUEUE", |
50 | .target = target, | 49 | .family = AF_INET6, |
51 | .targetsize = sizeof(struct xt_NFQ_info), | 50 | .target = target, |
52 | .family = AF_INET6, | 51 | .targetsize = sizeof(struct xt_NFQ_info), |
53 | .me = THIS_MODULE, | 52 | .me = THIS_MODULE, |
54 | }; | 53 | }, |
55 | 54 | { | |
56 | static struct xt_target arpt_NFQ_reg = { | 55 | .name = "NFQUEUE", |
57 | .name = "NFQUEUE", | 56 | .family = NF_ARP, |
58 | .target = target, | 57 | .target = target, |
59 | .targetsize = sizeof(struct xt_NFQ_info), | 58 | .targetsize = sizeof(struct xt_NFQ_info), |
60 | .family = NF_ARP, | 59 | .me = THIS_MODULE, |
61 | .me = THIS_MODULE, | 60 | }, |
62 | }; | 61 | }; |
63 | 62 | ||
64 | static int __init xt_nfqueue_init(void) | 63 | static int __init xt_nfqueue_init(void) |
65 | { | 64 | { |
66 | int ret; | 65 | return xt_register_targets(xt_nfqueue_target, |
67 | ret = xt_register_target(&ipt_NFQ_reg); | 66 | ARRAY_SIZE(xt_nfqueue_target)); |
68 | if (ret) | ||
69 | return ret; | ||
70 | ret = xt_register_target(&ip6t_NFQ_reg); | ||
71 | if (ret) | ||
72 | goto out_ip; | ||
73 | ret = xt_register_target(&arpt_NFQ_reg); | ||
74 | if (ret) | ||
75 | goto out_ip6; | ||
76 | |||
77 | return ret; | ||
78 | out_ip6: | ||
79 | xt_unregister_target(&ip6t_NFQ_reg); | ||
80 | out_ip: | ||
81 | xt_unregister_target(&ipt_NFQ_reg); | ||
82 | |||
83 | return ret; | ||
84 | } | 67 | } |
85 | 68 | ||
86 | static void __exit xt_nfqueue_fini(void) | 69 | static void __exit xt_nfqueue_fini(void) |
87 | { | 70 | { |
88 | xt_unregister_target(&arpt_NFQ_reg); | 71 | xt_register_targets(xt_nfqueue_target, ARRAY_SIZE(xt_nfqueue_target)); |
89 | xt_unregister_target(&ip6t_NFQ_reg); | ||
90 | xt_unregister_target(&ipt_NFQ_reg); | ||
91 | } | 72 | } |
92 | 73 | ||
93 | module_init(xt_nfqueue_init); | 74 | module_init(xt_nfqueue_init); |
diff --git a/net/netfilter/xt_NOTRACK.c b/net/netfilter/xt_NOTRACK.c index 98f4b5363ce8..6d00dcaed238 100644 --- a/net/netfilter/xt_NOTRACK.c +++ b/net/netfilter/xt_NOTRACK.c | |||
@@ -16,8 +16,7 @@ target(struct sk_buff **pskb, | |||
16 | const struct net_device *out, | 16 | const struct net_device *out, |
17 | unsigned int hooknum, | 17 | unsigned int hooknum, |
18 | const struct xt_target *target, | 18 | const struct xt_target *target, |
19 | const void *targinfo, | 19 | const void *targinfo) |
20 | void *userinfo) | ||
21 | { | 20 | { |
22 | /* Previously seen (loopback)? Ignore. */ | 21 | /* Previously seen (loopback)? Ignore. */ |
23 | if ((*pskb)->nfct != NULL) | 22 | if ((*pskb)->nfct != NULL) |
@@ -34,43 +33,32 @@ target(struct sk_buff **pskb, | |||
34 | return XT_CONTINUE; | 33 | return XT_CONTINUE; |
35 | } | 34 | } |
36 | 35 | ||
37 | static struct xt_target notrack_reg = { | 36 | static struct xt_target xt_notrack_target[] = { |
38 | .name = "NOTRACK", | 37 | { |
39 | .target = target, | 38 | .name = "NOTRACK", |
40 | .targetsize = 0, | 39 | .family = AF_INET, |
41 | .table = "raw", | 40 | .target = target, |
42 | .family = AF_INET, | 41 | .table = "raw", |
43 | .me = THIS_MODULE, | 42 | .me = THIS_MODULE, |
44 | }; | 43 | }, |
45 | 44 | { | |
46 | static struct xt_target notrack6_reg = { | 45 | .name = "NOTRACK", |
47 | .name = "NOTRACK", | 46 | .family = AF_INET6, |
48 | .target = target, | 47 | .target = target, |
49 | .targetsize = 0, | 48 | .table = "raw", |
50 | .table = "raw", | 49 | .me = THIS_MODULE, |
51 | .family = AF_INET6, | 50 | }, |
52 | .me = THIS_MODULE, | ||
53 | }; | 51 | }; |
54 | 52 | ||
55 | static int __init xt_notrack_init(void) | 53 | static int __init xt_notrack_init(void) |
56 | { | 54 | { |
57 | int ret; | 55 | return xt_register_targets(xt_notrack_target, |
58 | 56 | ARRAY_SIZE(xt_notrack_target)); | |
59 | ret = xt_register_target(¬rack_reg); | ||
60 | if (ret) | ||
61 | return ret; | ||
62 | |||
63 | ret = xt_register_target(¬rack6_reg); | ||
64 | if (ret) | ||
65 | xt_unregister_target(¬rack_reg); | ||
66 | |||
67 | return ret; | ||
68 | } | 57 | } |
69 | 58 | ||
70 | static void __exit xt_notrack_fini(void) | 59 | static void __exit xt_notrack_fini(void) |
71 | { | 60 | { |
72 | xt_unregister_target(¬rack6_reg); | 61 | xt_unregister_targets(xt_notrack_target, ARRAY_SIZE(xt_notrack_target)); |
73 | xt_unregister_target(¬rack_reg); | ||
74 | } | 62 | } |
75 | 63 | ||
76 | module_init(xt_notrack_init); | 64 | module_init(xt_notrack_init); |
diff --git a/net/netfilter/xt_SECMARK.c b/net/netfilter/xt_SECMARK.c index de9537ad9a7c..add752196290 100644 --- a/net/netfilter/xt_SECMARK.c +++ b/net/netfilter/xt_SECMARK.c | |||
@@ -31,7 +31,7 @@ static u8 mode; | |||
31 | static unsigned int target(struct sk_buff **pskb, const struct net_device *in, | 31 | static unsigned int target(struct sk_buff **pskb, const struct net_device *in, |
32 | const struct net_device *out, unsigned int hooknum, | 32 | const struct net_device *out, unsigned int hooknum, |
33 | const struct xt_target *target, | 33 | const struct xt_target *target, |
34 | const void *targinfo, void *userinfo) | 34 | const void *targinfo) |
35 | { | 35 | { |
36 | u32 secmark = 0; | 36 | u32 secmark = 0; |
37 | const struct xt_secmark_target_info *info = targinfo; | 37 | const struct xt_secmark_target_info *info = targinfo; |
@@ -85,7 +85,7 @@ static int checkentry_selinux(struct xt_secmark_target_info *info) | |||
85 | 85 | ||
86 | static int checkentry(const char *tablename, const void *entry, | 86 | static int checkentry(const char *tablename, const void *entry, |
87 | const struct xt_target *target, void *targinfo, | 87 | const struct xt_target *target, void *targinfo, |
88 | unsigned int targinfosize, unsigned int hook_mask) | 88 | unsigned int hook_mask) |
89 | { | 89 | { |
90 | struct xt_secmark_target_info *info = targinfo; | 90 | struct xt_secmark_target_info *info = targinfo; |
91 | 91 | ||
@@ -111,47 +111,36 @@ static int checkentry(const char *tablename, const void *entry, | |||
111 | return 1; | 111 | return 1; |
112 | } | 112 | } |
113 | 113 | ||
114 | static struct xt_target ipt_secmark_reg = { | 114 | static struct xt_target xt_secmark_target[] = { |
115 | .name = "SECMARK", | 115 | { |
116 | .target = target, | 116 | .name = "SECMARK", |
117 | .targetsize = sizeof(struct xt_secmark_target_info), | 117 | .family = AF_INET, |
118 | .table = "mangle", | 118 | .checkentry = checkentry, |
119 | .checkentry = checkentry, | 119 | .target = target, |
120 | .me = THIS_MODULE, | 120 | .targetsize = sizeof(struct xt_secmark_target_info), |
121 | .family = AF_INET, | 121 | .table = "mangle", |
122 | .revision = 0, | 122 | .me = THIS_MODULE, |
123 | }; | 123 | }, |
124 | 124 | { | |
125 | static struct xt_target ip6t_secmark_reg = { | 125 | .name = "SECMARK", |
126 | .name = "SECMARK", | 126 | .family = AF_INET6, |
127 | .target = target, | 127 | .checkentry = checkentry, |
128 | .targetsize = sizeof(struct xt_secmark_target_info), | 128 | .target = target, |
129 | .table = "mangle", | 129 | .targetsize = sizeof(struct xt_secmark_target_info), |
130 | .checkentry = checkentry, | 130 | .table = "mangle", |
131 | .me = THIS_MODULE, | 131 | .me = THIS_MODULE, |
132 | .family = AF_INET6, | 132 | }, |
133 | .revision = 0, | ||
134 | }; | 133 | }; |
135 | 134 | ||
136 | static int __init xt_secmark_init(void) | 135 | static int __init xt_secmark_init(void) |
137 | { | 136 | { |
138 | int err; | 137 | return xt_register_targets(xt_secmark_target, |
139 | 138 | ARRAY_SIZE(xt_secmark_target)); | |
140 | err = xt_register_target(&ipt_secmark_reg); | ||
141 | if (err) | ||
142 | return err; | ||
143 | |||
144 | err = xt_register_target(&ip6t_secmark_reg); | ||
145 | if (err) | ||
146 | xt_unregister_target(&ipt_secmark_reg); | ||
147 | |||
148 | return err; | ||
149 | } | 139 | } |
150 | 140 | ||
151 | static void __exit xt_secmark_fini(void) | 141 | static void __exit xt_secmark_fini(void) |
152 | { | 142 | { |
153 | xt_unregister_target(&ip6t_secmark_reg); | 143 | xt_unregister_targets(xt_secmark_target, ARRAY_SIZE(xt_secmark_target)); |
154 | xt_unregister_target(&ipt_secmark_reg); | ||
155 | } | 144 | } |
156 | 145 | ||
157 | module_init(xt_secmark_init); | 146 | module_init(xt_secmark_init); |
diff --git a/net/netfilter/xt_comment.c b/net/netfilter/xt_comment.c index 197609cb06d7..7db492d65220 100644 --- a/net/netfilter/xt_comment.c +++ b/net/netfilter/xt_comment.c | |||
@@ -29,41 +29,32 @@ match(const struct sk_buff *skb, | |||
29 | return 1; | 29 | return 1; |
30 | } | 30 | } |
31 | 31 | ||
32 | static struct xt_match comment_match = { | 32 | static struct xt_match xt_comment_match[] = { |
33 | .name = "comment", | 33 | { |
34 | .match = match, | 34 | .name = "comment", |
35 | .matchsize = sizeof(struct xt_comment_info), | 35 | .family = AF_INET, |
36 | .family = AF_INET, | 36 | .match = match, |
37 | .me = THIS_MODULE | 37 | .matchsize = sizeof(struct xt_comment_info), |
38 | }; | 38 | .me = THIS_MODULE |
39 | 39 | }, | |
40 | static struct xt_match comment6_match = { | 40 | { |
41 | .name = "comment", | 41 | .name = "comment", |
42 | .match = match, | 42 | .family = AF_INET6, |
43 | .matchsize = sizeof(struct xt_comment_info), | 43 | .match = match, |
44 | .family = AF_INET6, | 44 | .matchsize = sizeof(struct xt_comment_info), |
45 | .me = THIS_MODULE | 45 | .me = THIS_MODULE |
46 | }, | ||
46 | }; | 47 | }; |
47 | 48 | ||
48 | static int __init xt_comment_init(void) | 49 | static int __init xt_comment_init(void) |
49 | { | 50 | { |
50 | int ret; | 51 | return xt_register_matches(xt_comment_match, |
51 | 52 | ARRAY_SIZE(xt_comment_match)); | |
52 | ret = xt_register_match(&comment_match); | ||
53 | if (ret) | ||
54 | return ret; | ||
55 | |||
56 | ret = xt_register_match(&comment6_match); | ||
57 | if (ret) | ||
58 | xt_unregister_match(&comment_match); | ||
59 | |||
60 | return ret; | ||
61 | } | 53 | } |
62 | 54 | ||
63 | static void __exit xt_comment_fini(void) | 55 | static void __exit xt_comment_fini(void) |
64 | { | 56 | { |
65 | xt_unregister_match(&comment_match); | 57 | xt_unregister_matches(xt_comment_match, ARRAY_SIZE(xt_comment_match)); |
66 | xt_unregister_match(&comment6_match); | ||
67 | } | 58 | } |
68 | 59 | ||
69 | module_init(xt_comment_init); | 60 | module_init(xt_comment_init); |
diff --git a/net/netfilter/xt_connbytes.c b/net/netfilter/xt_connbytes.c index 1396fe2d07c1..dcc497ea8183 100644 --- a/net/netfilter/xt_connbytes.c +++ b/net/netfilter/xt_connbytes.c | |||
@@ -125,7 +125,6 @@ static int check(const char *tablename, | |||
125 | const void *ip, | 125 | const void *ip, |
126 | const struct xt_match *match, | 126 | const struct xt_match *match, |
127 | void *matchinfo, | 127 | void *matchinfo, |
128 | unsigned int matchsize, | ||
129 | unsigned int hook_mask) | 128 | unsigned int hook_mask) |
130 | { | 129 | { |
131 | const struct xt_connbytes_info *sinfo = matchinfo; | 130 | const struct xt_connbytes_info *sinfo = matchinfo; |
@@ -143,40 +142,35 @@ static int check(const char *tablename, | |||
143 | return 1; | 142 | return 1; |
144 | } | 143 | } |
145 | 144 | ||
146 | static struct xt_match connbytes_match = { | 145 | static struct xt_match xt_connbytes_match[] = { |
147 | .name = "connbytes", | 146 | { |
148 | .match = match, | 147 | .name = "connbytes", |
149 | .checkentry = check, | 148 | .family = AF_INET, |
150 | .matchsize = sizeof(struct xt_connbytes_info), | 149 | .checkentry = check, |
151 | .family = AF_INET, | 150 | .match = match, |
152 | .me = THIS_MODULE | 151 | .matchsize = sizeof(struct xt_connbytes_info), |
153 | }; | 152 | .me = THIS_MODULE |
154 | static struct xt_match connbytes6_match = { | 153 | }, |
155 | .name = "connbytes", | 154 | { |
156 | .match = match, | 155 | .name = "connbytes", |
157 | .checkentry = check, | 156 | .family = AF_INET6, |
158 | .matchsize = sizeof(struct xt_connbytes_info), | 157 | .checkentry = check, |
159 | .family = AF_INET6, | 158 | .match = match, |
160 | .me = THIS_MODULE | 159 | .matchsize = sizeof(struct xt_connbytes_info), |
160 | .me = THIS_MODULE | ||
161 | }, | ||
161 | }; | 162 | }; |
162 | 163 | ||
163 | static int __init xt_connbytes_init(void) | 164 | static int __init xt_connbytes_init(void) |
164 | { | 165 | { |
165 | int ret; | 166 | return xt_register_matches(xt_connbytes_match, |
166 | ret = xt_register_match(&connbytes_match); | 167 | ARRAY_SIZE(xt_connbytes_match)); |
167 | if (ret) | ||
168 | return ret; | ||
169 | |||
170 | ret = xt_register_match(&connbytes6_match); | ||
171 | if (ret) | ||
172 | xt_unregister_match(&connbytes_match); | ||
173 | return ret; | ||
174 | } | 168 | } |
175 | 169 | ||
176 | static void __exit xt_connbytes_fini(void) | 170 | static void __exit xt_connbytes_fini(void) |
177 | { | 171 | { |
178 | xt_unregister_match(&connbytes_match); | 172 | xt_unregister_matches(xt_connbytes_match, |
179 | xt_unregister_match(&connbytes6_match); | 173 | ARRAY_SIZE(xt_connbytes_match)); |
180 | } | 174 | } |
181 | 175 | ||
182 | module_init(xt_connbytes_init); | 176 | module_init(xt_connbytes_init); |
diff --git a/net/netfilter/xt_connmark.c b/net/netfilter/xt_connmark.c index 56324c8aff0a..92a5726ef237 100644 --- a/net/netfilter/xt_connmark.c +++ b/net/netfilter/xt_connmark.c | |||
@@ -55,7 +55,6 @@ checkentry(const char *tablename, | |||
55 | const void *ip, | 55 | const void *ip, |
56 | const struct xt_match *match, | 56 | const struct xt_match *match, |
57 | void *matchinfo, | 57 | void *matchinfo, |
58 | unsigned int matchsize, | ||
59 | unsigned int hook_mask) | 58 | unsigned int hook_mask) |
60 | { | 59 | { |
61 | struct xt_connmark_info *cm = matchinfo; | 60 | struct xt_connmark_info *cm = matchinfo; |
@@ -75,53 +74,80 @@ checkentry(const char *tablename, | |||
75 | } | 74 | } |
76 | 75 | ||
77 | static void | 76 | static void |
78 | destroy(const struct xt_match *match, void *matchinfo, unsigned int matchsize) | 77 | destroy(const struct xt_match *match, void *matchinfo) |
79 | { | 78 | { |
80 | #if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE) | 79 | #if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE) |
81 | nf_ct_l3proto_module_put(match->family); | 80 | nf_ct_l3proto_module_put(match->family); |
82 | #endif | 81 | #endif |
83 | } | 82 | } |
84 | 83 | ||
85 | static struct xt_match connmark_match = { | 84 | #ifdef CONFIG_COMPAT |
86 | .name = "connmark", | 85 | struct compat_xt_connmark_info { |
87 | .match = match, | 86 | compat_ulong_t mark, mask; |
88 | .matchsize = sizeof(struct xt_connmark_info), | 87 | u_int8_t invert; |
89 | .checkentry = checkentry, | 88 | u_int8_t __pad1; |
90 | .destroy = destroy, | 89 | u_int16_t __pad2; |
91 | .family = AF_INET, | ||
92 | .me = THIS_MODULE | ||
93 | }; | 90 | }; |
94 | 91 | ||
95 | static struct xt_match connmark6_match = { | 92 | static void compat_from_user(void *dst, void *src) |
96 | .name = "connmark", | 93 | { |
97 | .match = match, | 94 | struct compat_xt_connmark_info *cm = src; |
98 | .matchsize = sizeof(struct xt_connmark_info), | 95 | struct xt_connmark_info m = { |
99 | .checkentry = checkentry, | 96 | .mark = cm->mark, |
100 | .destroy = destroy, | 97 | .mask = cm->mask, |
101 | .family = AF_INET6, | 98 | .invert = cm->invert, |
102 | .me = THIS_MODULE | 99 | }; |
100 | memcpy(dst, &m, sizeof(m)); | ||
101 | } | ||
102 | |||
103 | static int compat_to_user(void __user *dst, void *src) | ||
104 | { | ||
105 | struct xt_connmark_info *m = src; | ||
106 | struct compat_xt_connmark_info cm = { | ||
107 | .mark = m->mark, | ||
108 | .mask = m->mask, | ||
109 | .invert = m->invert, | ||
110 | }; | ||
111 | return copy_to_user(dst, &cm, sizeof(cm)) ? -EFAULT : 0; | ||
112 | } | ||
113 | #endif /* CONFIG_COMPAT */ | ||
114 | |||
115 | static struct xt_match xt_connmark_match[] = { | ||
116 | { | ||
117 | .name = "connmark", | ||
118 | .family = AF_INET, | ||
119 | .checkentry = checkentry, | ||
120 | .match = match, | ||
121 | .destroy = destroy, | ||
122 | .matchsize = sizeof(struct xt_connmark_info), | ||
123 | #ifdef CONFIG_COMPAT | ||
124 | .compatsize = sizeof(struct compat_xt_connmark_info), | ||
125 | .compat_from_user = compat_from_user, | ||
126 | .compat_to_user = compat_to_user, | ||
127 | #endif | ||
128 | .me = THIS_MODULE | ||
129 | }, | ||
130 | { | ||
131 | .name = "connmark", | ||
132 | .family = AF_INET6, | ||
133 | .checkentry = checkentry, | ||
134 | .match = match, | ||
135 | .destroy = destroy, | ||
136 | .matchsize = sizeof(struct xt_connmark_info), | ||
137 | .me = THIS_MODULE | ||
138 | }, | ||
103 | }; | 139 | }; |
104 | 140 | ||
105 | static int __init xt_connmark_init(void) | 141 | static int __init xt_connmark_init(void) |
106 | { | 142 | { |
107 | int ret; | ||
108 | |||
109 | need_conntrack(); | 143 | need_conntrack(); |
110 | 144 | return xt_register_matches(xt_connmark_match, | |
111 | ret = xt_register_match(&connmark_match); | 145 | ARRAY_SIZE(xt_connmark_match)); |
112 | if (ret) | ||
113 | return ret; | ||
114 | |||
115 | ret = xt_register_match(&connmark6_match); | ||
116 | if (ret) | ||
117 | xt_unregister_match(&connmark_match); | ||
118 | return ret; | ||
119 | } | 146 | } |
120 | 147 | ||
121 | static void __exit xt_connmark_fini(void) | 148 | static void __exit xt_connmark_fini(void) |
122 | { | 149 | { |
123 | xt_unregister_match(&connmark6_match); | 150 | xt_register_matches(xt_connmark_match, ARRAY_SIZE(xt_connmark_match)); |
124 | xt_unregister_match(&connmark_match); | ||
125 | } | 151 | } |
126 | 152 | ||
127 | module_init(xt_connmark_init); | 153 | module_init(xt_connmark_init); |
diff --git a/net/netfilter/xt_conntrack.c b/net/netfilter/xt_conntrack.c index 145489a4c3f2..0ea501a2fda5 100644 --- a/net/netfilter/xt_conntrack.c +++ b/net/netfilter/xt_conntrack.c | |||
@@ -45,7 +45,7 @@ match(const struct sk_buff *skb, | |||
45 | 45 | ||
46 | ct = ip_conntrack_get((struct sk_buff *)skb, &ctinfo); | 46 | ct = ip_conntrack_get((struct sk_buff *)skb, &ctinfo); |
47 | 47 | ||
48 | #define FWINV(bool,invflg) ((bool) ^ !!(sinfo->invflags & invflg)) | 48 | #define FWINV(bool, invflg) ((bool) ^ !!(sinfo->invflags & invflg)) |
49 | 49 | ||
50 | if (ct == &ip_conntrack_untracked) | 50 | if (ct == &ip_conntrack_untracked) |
51 | statebit = XT_CONNTRACK_STATE_UNTRACKED; | 51 | statebit = XT_CONNTRACK_STATE_UNTRACKED; |
@@ -54,63 +54,72 @@ match(const struct sk_buff *skb, | |||
54 | else | 54 | else |
55 | statebit = XT_CONNTRACK_STATE_INVALID; | 55 | statebit = XT_CONNTRACK_STATE_INVALID; |
56 | 56 | ||
57 | if(sinfo->flags & XT_CONNTRACK_STATE) { | 57 | if (sinfo->flags & XT_CONNTRACK_STATE) { |
58 | if (ct) { | 58 | if (ct) { |
59 | if(ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.ip != | 59 | if (test_bit(IPS_SRC_NAT_BIT, &ct->status)) |
60 | ct->tuplehash[IP_CT_DIR_REPLY].tuple.dst.ip) | ||
61 | statebit |= XT_CONNTRACK_STATE_SNAT; | 60 | statebit |= XT_CONNTRACK_STATE_SNAT; |
62 | 61 | if (test_bit(IPS_DST_NAT_BIT, &ct->status)) | |
63 | if(ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.ip != | ||
64 | ct->tuplehash[IP_CT_DIR_REPLY].tuple.src.ip) | ||
65 | statebit |= XT_CONNTRACK_STATE_DNAT; | 62 | statebit |= XT_CONNTRACK_STATE_DNAT; |
66 | } | 63 | } |
67 | 64 | if (FWINV((statebit & sinfo->statemask) == 0, | |
68 | if (FWINV((statebit & sinfo->statemask) == 0, XT_CONNTRACK_STATE)) | 65 | XT_CONNTRACK_STATE)) |
69 | return 0; | ||
70 | } | ||
71 | |||
72 | if(sinfo->flags & XT_CONNTRACK_PROTO) { | ||
73 | if (!ct || FWINV(ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.protonum != sinfo->tuple[IP_CT_DIR_ORIGINAL].dst.protonum, XT_CONNTRACK_PROTO)) | ||
74 | return 0; | ||
75 | } | ||
76 | |||
77 | if(sinfo->flags & XT_CONNTRACK_ORIGSRC) { | ||
78 | if (!ct || FWINV((ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.ip&sinfo->sipmsk[IP_CT_DIR_ORIGINAL].s_addr) != sinfo->tuple[IP_CT_DIR_ORIGINAL].src.ip, XT_CONNTRACK_ORIGSRC)) | ||
79 | return 0; | 66 | return 0; |
80 | } | 67 | } |
81 | 68 | ||
82 | if(sinfo->flags & XT_CONNTRACK_ORIGDST) { | 69 | if (ct == NULL) { |
83 | if (!ct || FWINV((ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.ip&sinfo->dipmsk[IP_CT_DIR_ORIGINAL].s_addr) != sinfo->tuple[IP_CT_DIR_ORIGINAL].dst.ip, XT_CONNTRACK_ORIGDST)) | 70 | if (sinfo->flags & ~XT_CONNTRACK_STATE) |
84 | return 0; | 71 | return 0; |
72 | return 1; | ||
85 | } | 73 | } |
86 | 74 | ||
87 | if(sinfo->flags & XT_CONNTRACK_REPLSRC) { | 75 | if (sinfo->flags & XT_CONNTRACK_PROTO && |
88 | if (!ct || FWINV((ct->tuplehash[IP_CT_DIR_REPLY].tuple.src.ip&sinfo->sipmsk[IP_CT_DIR_REPLY].s_addr) != sinfo->tuple[IP_CT_DIR_REPLY].src.ip, XT_CONNTRACK_REPLSRC)) | 76 | FWINV(ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.protonum != |
89 | return 0; | 77 | sinfo->tuple[IP_CT_DIR_ORIGINAL].dst.protonum, |
90 | } | 78 | XT_CONNTRACK_PROTO)) |
79 | return 0; | ||
80 | |||
81 | if (sinfo->flags & XT_CONNTRACK_ORIGSRC && | ||
82 | FWINV((ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.ip & | ||
83 | sinfo->sipmsk[IP_CT_DIR_ORIGINAL].s_addr) != | ||
84 | sinfo->tuple[IP_CT_DIR_ORIGINAL].src.ip, | ||
85 | XT_CONNTRACK_ORIGSRC)) | ||
86 | return 0; | ||
91 | 87 | ||
92 | if(sinfo->flags & XT_CONNTRACK_REPLDST) { | 88 | if (sinfo->flags & XT_CONNTRACK_ORIGDST && |
93 | if (!ct || FWINV((ct->tuplehash[IP_CT_DIR_REPLY].tuple.dst.ip&sinfo->dipmsk[IP_CT_DIR_REPLY].s_addr) != sinfo->tuple[IP_CT_DIR_REPLY].dst.ip, XT_CONNTRACK_REPLDST)) | 89 | FWINV((ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.ip & |
94 | return 0; | 90 | sinfo->dipmsk[IP_CT_DIR_ORIGINAL].s_addr) != |
95 | } | 91 | sinfo->tuple[IP_CT_DIR_ORIGINAL].dst.ip, |
92 | XT_CONNTRACK_ORIGDST)) | ||
93 | return 0; | ||
96 | 94 | ||
97 | if(sinfo->flags & XT_CONNTRACK_STATUS) { | 95 | if (sinfo->flags & XT_CONNTRACK_REPLSRC && |
98 | if (!ct || FWINV((ct->status & sinfo->statusmask) == 0, XT_CONNTRACK_STATUS)) | 96 | FWINV((ct->tuplehash[IP_CT_DIR_REPLY].tuple.src.ip & |
99 | return 0; | 97 | sinfo->sipmsk[IP_CT_DIR_REPLY].s_addr) != |
100 | } | 98 | sinfo->tuple[IP_CT_DIR_REPLY].src.ip, |
99 | XT_CONNTRACK_REPLSRC)) | ||
100 | return 0; | ||
101 | 101 | ||
102 | if(sinfo->flags & XT_CONNTRACK_EXPIRES) { | 102 | if (sinfo->flags & XT_CONNTRACK_REPLDST && |
103 | unsigned long expires; | 103 | FWINV((ct->tuplehash[IP_CT_DIR_REPLY].tuple.dst.ip & |
104 | sinfo->dipmsk[IP_CT_DIR_REPLY].s_addr) != | ||
105 | sinfo->tuple[IP_CT_DIR_REPLY].dst.ip, | ||
106 | XT_CONNTRACK_REPLDST)) | ||
107 | return 0; | ||
104 | 108 | ||
105 | if(!ct) | 109 | if (sinfo->flags & XT_CONNTRACK_STATUS && |
106 | return 0; | 110 | FWINV((ct->status & sinfo->statusmask) == 0, |
111 | XT_CONNTRACK_STATUS)) | ||
112 | return 0; | ||
107 | 113 | ||
108 | expires = timer_pending(&ct->timeout) ? (ct->timeout.expires - jiffies)/HZ : 0; | 114 | if (sinfo->flags & XT_CONNTRACK_EXPIRES) { |
115 | unsigned long expires = timer_pending(&ct->timeout) ? | ||
116 | (ct->timeout.expires - jiffies)/HZ : 0; | ||
109 | 117 | ||
110 | if (FWINV(!(expires >= sinfo->expires_min && expires <= sinfo->expires_max), XT_CONNTRACK_EXPIRES)) | 118 | if (FWINV(!(expires >= sinfo->expires_min && |
119 | expires <= sinfo->expires_max), | ||
120 | XT_CONNTRACK_EXPIRES)) | ||
111 | return 0; | 121 | return 0; |
112 | } | 122 | } |
113 | |||
114 | return 1; | 123 | return 1; |
115 | } | 124 | } |
116 | 125 | ||
@@ -141,63 +150,72 @@ match(const struct sk_buff *skb, | |||
141 | else | 150 | else |
142 | statebit = XT_CONNTRACK_STATE_INVALID; | 151 | statebit = XT_CONNTRACK_STATE_INVALID; |
143 | 152 | ||
144 | if(sinfo->flags & XT_CONNTRACK_STATE) { | 153 | if (sinfo->flags & XT_CONNTRACK_STATE) { |
145 | if (ct) { | 154 | if (ct) { |
146 | if(ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.u3.ip != | 155 | if (test_bit(IPS_SRC_NAT_BIT, &ct->status)) |
147 | ct->tuplehash[IP_CT_DIR_REPLY].tuple.dst.u3.ip) | ||
148 | statebit |= XT_CONNTRACK_STATE_SNAT; | 156 | statebit |= XT_CONNTRACK_STATE_SNAT; |
149 | 157 | if (test_bit(IPS_DST_NAT_BIT, &ct->status)) | |
150 | if(ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.u3.ip != | ||
151 | ct->tuplehash[IP_CT_DIR_REPLY].tuple.src.u3.ip) | ||
152 | statebit |= XT_CONNTRACK_STATE_DNAT; | 158 | statebit |= XT_CONNTRACK_STATE_DNAT; |
153 | } | 159 | } |
154 | 160 | if (FWINV((statebit & sinfo->statemask) == 0, | |
155 | if (FWINV((statebit & sinfo->statemask) == 0, XT_CONNTRACK_STATE)) | 161 | XT_CONNTRACK_STATE)) |
156 | return 0; | ||
157 | } | ||
158 | |||
159 | if(sinfo->flags & XT_CONNTRACK_PROTO) { | ||
160 | if (!ct || FWINV(ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.protonum != sinfo->tuple[IP_CT_DIR_ORIGINAL].dst.protonum, XT_CONNTRACK_PROTO)) | ||
161 | return 0; | ||
162 | } | ||
163 | |||
164 | if(sinfo->flags & XT_CONNTRACK_ORIGSRC) { | ||
165 | if (!ct || FWINV((ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.u3.ip&sinfo->sipmsk[IP_CT_DIR_ORIGINAL].s_addr) != sinfo->tuple[IP_CT_DIR_ORIGINAL].src.ip, XT_CONNTRACK_ORIGSRC)) | ||
166 | return 0; | 162 | return 0; |
167 | } | 163 | } |
168 | 164 | ||
169 | if(sinfo->flags & XT_CONNTRACK_ORIGDST) { | 165 | if (ct == NULL) { |
170 | if (!ct || FWINV((ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.u3.ip&sinfo->dipmsk[IP_CT_DIR_ORIGINAL].s_addr) != sinfo->tuple[IP_CT_DIR_ORIGINAL].dst.ip, XT_CONNTRACK_ORIGDST)) | 166 | if (sinfo->flags & ~XT_CONNTRACK_STATE) |
171 | return 0; | 167 | return 0; |
168 | return 1; | ||
172 | } | 169 | } |
173 | 170 | ||
174 | if(sinfo->flags & XT_CONNTRACK_REPLSRC) { | 171 | if (sinfo->flags & XT_CONNTRACK_PROTO && |
175 | if (!ct || FWINV((ct->tuplehash[IP_CT_DIR_REPLY].tuple.src.u3.ip&sinfo->sipmsk[IP_CT_DIR_REPLY].s_addr) != sinfo->tuple[IP_CT_DIR_REPLY].src.ip, XT_CONNTRACK_REPLSRC)) | 172 | FWINV(ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.protonum != |
176 | return 0; | 173 | sinfo->tuple[IP_CT_DIR_ORIGINAL].dst.protonum, |
177 | } | 174 | XT_CONNTRACK_PROTO)) |
175 | return 0; | ||
176 | |||
177 | if (sinfo->flags & XT_CONNTRACK_ORIGSRC && | ||
178 | FWINV((ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.u3.ip & | ||
179 | sinfo->sipmsk[IP_CT_DIR_ORIGINAL].s_addr) != | ||
180 | sinfo->tuple[IP_CT_DIR_ORIGINAL].src.ip, | ||
181 | XT_CONNTRACK_ORIGSRC)) | ||
182 | return 0; | ||
178 | 183 | ||
179 | if(sinfo->flags & XT_CONNTRACK_REPLDST) { | 184 | if (sinfo->flags & XT_CONNTRACK_ORIGDST && |
180 | if (!ct || FWINV((ct->tuplehash[IP_CT_DIR_REPLY].tuple.dst.u3.ip&sinfo->dipmsk[IP_CT_DIR_REPLY].s_addr) != sinfo->tuple[IP_CT_DIR_REPLY].dst.ip, XT_CONNTRACK_REPLDST)) | 185 | FWINV((ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.u3.ip & |
181 | return 0; | 186 | sinfo->dipmsk[IP_CT_DIR_ORIGINAL].s_addr) != |
182 | } | 187 | sinfo->tuple[IP_CT_DIR_ORIGINAL].dst.ip, |
188 | XT_CONNTRACK_ORIGDST)) | ||
189 | return 0; | ||
183 | 190 | ||
184 | if(sinfo->flags & XT_CONNTRACK_STATUS) { | 191 | if (sinfo->flags & XT_CONNTRACK_REPLSRC && |
185 | if (!ct || FWINV((ct->status & sinfo->statusmask) == 0, XT_CONNTRACK_STATUS)) | 192 | FWINV((ct->tuplehash[IP_CT_DIR_REPLY].tuple.src.u3.ip & |
186 | return 0; | 193 | sinfo->sipmsk[IP_CT_DIR_REPLY].s_addr) != |
187 | } | 194 | sinfo->tuple[IP_CT_DIR_REPLY].src.ip, |
195 | XT_CONNTRACK_REPLSRC)) | ||
196 | return 0; | ||
188 | 197 | ||
189 | if(sinfo->flags & XT_CONNTRACK_EXPIRES) { | 198 | if (sinfo->flags & XT_CONNTRACK_REPLDST && |
190 | unsigned long expires; | 199 | FWINV((ct->tuplehash[IP_CT_DIR_REPLY].tuple.dst.u3.ip & |
200 | sinfo->dipmsk[IP_CT_DIR_REPLY].s_addr) != | ||
201 | sinfo->tuple[IP_CT_DIR_REPLY].dst.ip, | ||
202 | XT_CONNTRACK_REPLDST)) | ||
203 | return 0; | ||
191 | 204 | ||
192 | if(!ct) | 205 | if (sinfo->flags & XT_CONNTRACK_STATUS && |
193 | return 0; | 206 | FWINV((ct->status & sinfo->statusmask) == 0, |
207 | XT_CONNTRACK_STATUS)) | ||
208 | return 0; | ||
194 | 209 | ||
195 | expires = timer_pending(&ct->timeout) ? (ct->timeout.expires - jiffies)/HZ : 0; | 210 | if(sinfo->flags & XT_CONNTRACK_EXPIRES) { |
211 | unsigned long expires = timer_pending(&ct->timeout) ? | ||
212 | (ct->timeout.expires - jiffies)/HZ : 0; | ||
196 | 213 | ||
197 | if (FWINV(!(expires >= sinfo->expires_min && expires <= sinfo->expires_max), XT_CONNTRACK_EXPIRES)) | 214 | if (FWINV(!(expires >= sinfo->expires_min && |
215 | expires <= sinfo->expires_max), | ||
216 | XT_CONNTRACK_EXPIRES)) | ||
198 | return 0; | 217 | return 0; |
199 | } | 218 | } |
200 | |||
201 | return 1; | 219 | return 1; |
202 | } | 220 | } |
203 | 221 | ||
@@ -208,7 +226,6 @@ checkentry(const char *tablename, | |||
208 | const void *ip, | 226 | const void *ip, |
209 | const struct xt_match *match, | 227 | const struct xt_match *match, |
210 | void *matchinfo, | 228 | void *matchinfo, |
211 | unsigned int matchsize, | ||
212 | unsigned int hook_mask) | 229 | unsigned int hook_mask) |
213 | { | 230 | { |
214 | #if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE) | 231 | #if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE) |
@@ -221,8 +238,7 @@ checkentry(const char *tablename, | |||
221 | return 1; | 238 | return 1; |
222 | } | 239 | } |
223 | 240 | ||
224 | static void | 241 | static void destroy(const struct xt_match *match, void *matchinfo) |
225 | destroy(const struct xt_match *match, void *matchinfo, unsigned int matchsize) | ||
226 | { | 242 | { |
227 | #if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE) | 243 | #if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE) |
228 | nf_ct_l3proto_module_put(match->family); | 244 | nf_ct_l3proto_module_put(match->family); |
@@ -241,11 +257,8 @@ static struct xt_match conntrack_match = { | |||
241 | 257 | ||
242 | static int __init xt_conntrack_init(void) | 258 | static int __init xt_conntrack_init(void) |
243 | { | 259 | { |
244 | int ret; | ||
245 | need_conntrack(); | 260 | need_conntrack(); |
246 | ret = xt_register_match(&conntrack_match); | 261 | return xt_register_match(&conntrack_match); |
247 | |||
248 | return ret; | ||
249 | } | 262 | } |
250 | 263 | ||
251 | static void __exit xt_conntrack_fini(void) | 264 | static void __exit xt_conntrack_fini(void) |
diff --git a/net/netfilter/xt_dccp.c b/net/netfilter/xt_dccp.c index 2e2f825dad4c..3e6cf430e518 100644 --- a/net/netfilter/xt_dccp.c +++ b/net/netfilter/xt_dccp.c | |||
@@ -131,7 +131,6 @@ checkentry(const char *tablename, | |||
131 | const void *inf, | 131 | const void *inf, |
132 | const struct xt_match *match, | 132 | const struct xt_match *match, |
133 | void *matchinfo, | 133 | void *matchinfo, |
134 | unsigned int matchsize, | ||
135 | unsigned int hook_mask) | 134 | unsigned int hook_mask) |
136 | { | 135 | { |
137 | const struct xt_dccp_info *info = matchinfo; | 136 | const struct xt_dccp_info *info = matchinfo; |
@@ -141,27 +140,26 @@ checkentry(const char *tablename, | |||
141 | && !(info->invflags & ~info->flags); | 140 | && !(info->invflags & ~info->flags); |
142 | } | 141 | } |
143 | 142 | ||
144 | static struct xt_match dccp_match = | 143 | static struct xt_match xt_dccp_match[] = { |
145 | { | 144 | { |
146 | .name = "dccp", | 145 | .name = "dccp", |
147 | .match = match, | 146 | .family = AF_INET, |
148 | .matchsize = sizeof(struct xt_dccp_info), | 147 | .checkentry = checkentry, |
149 | .proto = IPPROTO_DCCP, | 148 | .match = match, |
150 | .checkentry = checkentry, | 149 | .matchsize = sizeof(struct xt_dccp_info), |
151 | .family = AF_INET, | 150 | .proto = IPPROTO_DCCP, |
152 | .me = THIS_MODULE, | 151 | .me = THIS_MODULE, |
152 | }, | ||
153 | { | ||
154 | .name = "dccp", | ||
155 | .family = AF_INET6, | ||
156 | .checkentry = checkentry, | ||
157 | .match = match, | ||
158 | .matchsize = sizeof(struct xt_dccp_info), | ||
159 | .proto = IPPROTO_DCCP, | ||
160 | .me = THIS_MODULE, | ||
161 | }, | ||
153 | }; | 162 | }; |
154 | static struct xt_match dccp6_match = | ||
155 | { | ||
156 | .name = "dccp", | ||
157 | .match = match, | ||
158 | .matchsize = sizeof(struct xt_dccp_info), | ||
159 | .proto = IPPROTO_DCCP, | ||
160 | .checkentry = checkentry, | ||
161 | .family = AF_INET6, | ||
162 | .me = THIS_MODULE, | ||
163 | }; | ||
164 | |||
165 | 163 | ||
166 | static int __init xt_dccp_init(void) | 164 | static int __init xt_dccp_init(void) |
167 | { | 165 | { |
@@ -173,27 +171,19 @@ static int __init xt_dccp_init(void) | |||
173 | dccp_optbuf = kmalloc(256 * 4, GFP_KERNEL); | 171 | dccp_optbuf = kmalloc(256 * 4, GFP_KERNEL); |
174 | if (!dccp_optbuf) | 172 | if (!dccp_optbuf) |
175 | return -ENOMEM; | 173 | return -ENOMEM; |
176 | ret = xt_register_match(&dccp_match); | 174 | ret = xt_register_matches(xt_dccp_match, ARRAY_SIZE(xt_dccp_match)); |
177 | if (ret) | 175 | if (ret) |
178 | goto out_kfree; | 176 | goto out_kfree; |
179 | ret = xt_register_match(&dccp6_match); | ||
180 | if (ret) | ||
181 | goto out_unreg; | ||
182 | |||
183 | return ret; | 177 | return ret; |
184 | 178 | ||
185 | out_unreg: | ||
186 | xt_unregister_match(&dccp_match); | ||
187 | out_kfree: | 179 | out_kfree: |
188 | kfree(dccp_optbuf); | 180 | kfree(dccp_optbuf); |
189 | |||
190 | return ret; | 181 | return ret; |
191 | } | 182 | } |
192 | 183 | ||
193 | static void __exit xt_dccp_fini(void) | 184 | static void __exit xt_dccp_fini(void) |
194 | { | 185 | { |
195 | xt_unregister_match(&dccp6_match); | 186 | xt_unregister_matches(xt_dccp_match, ARRAY_SIZE(xt_dccp_match)); |
196 | xt_unregister_match(&dccp_match); | ||
197 | kfree(dccp_optbuf); | 187 | kfree(dccp_optbuf); |
198 | } | 188 | } |
199 | 189 | ||
diff --git a/net/netfilter/xt_dscp.c b/net/netfilter/xt_dscp.c new file mode 100644 index 000000000000..26c7f4ad102a --- /dev/null +++ b/net/netfilter/xt_dscp.c | |||
@@ -0,0 +1,103 @@ | |||
1 | /* IP tables module for matching the value of the IPv4/IPv6 DSCP field | ||
2 | * | ||
3 | * xt_dscp.c,v 1.3 2002/08/05 19:00:21 laforge Exp | ||
4 | * | ||
5 | * (C) 2002 by Harald Welte <laforge@netfilter.org> | ||
6 | * | ||
7 | * This program is free software; you can redistribute it and/or modify | ||
8 | * it under the terms of the GNU General Public License version 2 as | ||
9 | * published by the Free Software Foundation. | ||
10 | */ | ||
11 | |||
12 | #include <linux/module.h> | ||
13 | #include <linux/skbuff.h> | ||
14 | #include <linux/ip.h> | ||
15 | #include <linux/ipv6.h> | ||
16 | #include <net/dsfield.h> | ||
17 | |||
18 | #include <linux/netfilter/xt_dscp.h> | ||
19 | #include <linux/netfilter/x_tables.h> | ||
20 | |||
21 | MODULE_AUTHOR("Harald Welte <laforge@netfilter.org>"); | ||
22 | MODULE_DESCRIPTION("x_tables DSCP matching module"); | ||
23 | MODULE_LICENSE("GPL"); | ||
24 | MODULE_ALIAS("ipt_dscp"); | ||
25 | MODULE_ALIAS("ip6t_dscp"); | ||
26 | |||
27 | static int match(const struct sk_buff *skb, | ||
28 | const struct net_device *in, | ||
29 | const struct net_device *out, | ||
30 | const struct xt_match *match, | ||
31 | const void *matchinfo, | ||
32 | int offset, | ||
33 | unsigned int protoff, | ||
34 | int *hotdrop) | ||
35 | { | ||
36 | const struct xt_dscp_info *info = matchinfo; | ||
37 | u_int8_t dscp = ipv4_get_dsfield(skb->nh.iph) >> XT_DSCP_SHIFT; | ||
38 | |||
39 | return (dscp == info->dscp) ^ !!info->invert; | ||
40 | } | ||
41 | |||
42 | static int match6(const struct sk_buff *skb, | ||
43 | const struct net_device *in, | ||
44 | const struct net_device *out, | ||
45 | const struct xt_match *match, | ||
46 | const void *matchinfo, | ||
47 | int offset, | ||
48 | unsigned int protoff, | ||
49 | int *hotdrop) | ||
50 | { | ||
51 | const struct xt_dscp_info *info = matchinfo; | ||
52 | u_int8_t dscp = ipv6_get_dsfield(skb->nh.ipv6h) >> XT_DSCP_SHIFT; | ||
53 | |||
54 | return (dscp == info->dscp) ^ !!info->invert; | ||
55 | } | ||
56 | |||
57 | static int checkentry(const char *tablename, | ||
58 | const void *info, | ||
59 | const struct xt_match *match, | ||
60 | void *matchinfo, | ||
61 | unsigned int hook_mask) | ||
62 | { | ||
63 | const u_int8_t dscp = ((struct xt_dscp_info *)matchinfo)->dscp; | ||
64 | |||
65 | if (dscp > XT_DSCP_MAX) { | ||
66 | printk(KERN_ERR "xt_dscp: dscp %x out of range\n", dscp); | ||
67 | return 0; | ||
68 | } | ||
69 | |||
70 | return 1; | ||
71 | } | ||
72 | |||
73 | static struct xt_match xt_dscp_match[] = { | ||
74 | { | ||
75 | .name = "dscp", | ||
76 | .family = AF_INET, | ||
77 | .checkentry = checkentry, | ||
78 | .match = match, | ||
79 | .matchsize = sizeof(struct xt_dscp_info), | ||
80 | .me = THIS_MODULE, | ||
81 | }, | ||
82 | { | ||
83 | .name = "dscp", | ||
84 | .family = AF_INET6, | ||
85 | .checkentry = checkentry, | ||
86 | .match = match6, | ||
87 | .matchsize = sizeof(struct xt_dscp_info), | ||
88 | .me = THIS_MODULE, | ||
89 | }, | ||
90 | }; | ||
91 | |||
92 | static int __init xt_dscp_match_init(void) | ||
93 | { | ||
94 | return xt_register_matches(xt_dscp_match, ARRAY_SIZE(xt_dscp_match)); | ||
95 | } | ||
96 | |||
97 | static void __exit xt_dscp_match_fini(void) | ||
98 | { | ||
99 | xt_unregister_matches(xt_dscp_match, ARRAY_SIZE(xt_dscp_match)); | ||
100 | } | ||
101 | |||
102 | module_init(xt_dscp_match_init); | ||
103 | module_exit(xt_dscp_match_fini); | ||
diff --git a/net/netfilter/xt_esp.c b/net/netfilter/xt_esp.c index 9dad6281e0c1..7c95f149d942 100644 --- a/net/netfilter/xt_esp.c +++ b/net/netfilter/xt_esp.c | |||
@@ -79,7 +79,6 @@ checkentry(const char *tablename, | |||
79 | const void *ip_void, | 79 | const void *ip_void, |
80 | const struct xt_match *match, | 80 | const struct xt_match *match, |
81 | void *matchinfo, | 81 | void *matchinfo, |
82 | unsigned int matchinfosize, | ||
83 | unsigned int hook_mask) | 82 | unsigned int hook_mask) |
84 | { | 83 | { |
85 | const struct xt_esp *espinfo = matchinfo; | 84 | const struct xt_esp *espinfo = matchinfo; |
@@ -92,44 +91,35 @@ checkentry(const char *tablename, | |||
92 | return 1; | 91 | return 1; |
93 | } | 92 | } |
94 | 93 | ||
95 | static struct xt_match esp_match = { | 94 | static struct xt_match xt_esp_match[] = { |
96 | .name = "esp", | 95 | { |
97 | .family = AF_INET, | 96 | .name = "esp", |
98 | .proto = IPPROTO_ESP, | 97 | .family = AF_INET, |
99 | .match = &match, | 98 | .checkentry = checkentry, |
100 | .matchsize = sizeof(struct xt_esp), | 99 | .match = match, |
101 | .checkentry = &checkentry, | 100 | .matchsize = sizeof(struct xt_esp), |
102 | .me = THIS_MODULE, | 101 | .proto = IPPROTO_ESP, |
103 | }; | 102 | .me = THIS_MODULE, |
104 | 103 | }, | |
105 | static struct xt_match esp6_match = { | 104 | { |
106 | .name = "esp", | 105 | .name = "esp", |
107 | .family = AF_INET6, | 106 | .family = AF_INET6, |
108 | .proto = IPPROTO_ESP, | 107 | .checkentry = checkentry, |
109 | .match = &match, | 108 | .match = match, |
110 | .matchsize = sizeof(struct xt_esp), | 109 | .matchsize = sizeof(struct xt_esp), |
111 | .checkentry = &checkentry, | 110 | .proto = IPPROTO_ESP, |
112 | .me = THIS_MODULE, | 111 | .me = THIS_MODULE, |
112 | }, | ||
113 | }; | 113 | }; |
114 | 114 | ||
115 | static int __init xt_esp_init(void) | 115 | static int __init xt_esp_init(void) |
116 | { | 116 | { |
117 | int ret; | 117 | return xt_register_matches(xt_esp_match, ARRAY_SIZE(xt_esp_match)); |
118 | ret = xt_register_match(&esp_match); | ||
119 | if (ret) | ||
120 | return ret; | ||
121 | |||
122 | ret = xt_register_match(&esp6_match); | ||
123 | if (ret) | ||
124 | xt_unregister_match(&esp_match); | ||
125 | |||
126 | return ret; | ||
127 | } | 118 | } |
128 | 119 | ||
129 | static void __exit xt_esp_cleanup(void) | 120 | static void __exit xt_esp_cleanup(void) |
130 | { | 121 | { |
131 | xt_unregister_match(&esp_match); | 122 | xt_unregister_matches(xt_esp_match, ARRAY_SIZE(xt_esp_match)); |
132 | xt_unregister_match(&esp6_match); | ||
133 | } | 123 | } |
134 | 124 | ||
135 | module_init(xt_esp_init); | 125 | module_init(xt_esp_init); |
diff --git a/net/netfilter/xt_helper.c b/net/netfilter/xt_helper.c index 799c2a43e3b9..5d7818b73e3a 100644 --- a/net/netfilter/xt_helper.c +++ b/net/netfilter/xt_helper.c | |||
@@ -139,7 +139,6 @@ static int check(const char *tablename, | |||
139 | const void *inf, | 139 | const void *inf, |
140 | const struct xt_match *match, | 140 | const struct xt_match *match, |
141 | void *matchinfo, | 141 | void *matchinfo, |
142 | unsigned int matchsize, | ||
143 | unsigned int hook_mask) | 142 | unsigned int hook_mask) |
144 | { | 143 | { |
145 | struct xt_helper_info *info = matchinfo; | 144 | struct xt_helper_info *info = matchinfo; |
@@ -156,52 +155,44 @@ static int check(const char *tablename, | |||
156 | } | 155 | } |
157 | 156 | ||
158 | static void | 157 | static void |
159 | destroy(const struct xt_match *match, void *matchinfo, unsigned int matchsize) | 158 | destroy(const struct xt_match *match, void *matchinfo) |
160 | { | 159 | { |
161 | #if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE) | 160 | #if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE) |
162 | nf_ct_l3proto_module_put(match->family); | 161 | nf_ct_l3proto_module_put(match->family); |
163 | #endif | 162 | #endif |
164 | } | 163 | } |
165 | 164 | ||
166 | static struct xt_match helper_match = { | 165 | static struct xt_match xt_helper_match[] = { |
167 | .name = "helper", | 166 | { |
168 | .match = match, | 167 | .name = "helper", |
169 | .matchsize = sizeof(struct xt_helper_info), | 168 | .family = AF_INET, |
170 | .checkentry = check, | 169 | .checkentry = check, |
171 | .destroy = destroy, | 170 | .match = match, |
172 | .family = AF_INET, | 171 | .destroy = destroy, |
173 | .me = THIS_MODULE, | 172 | .matchsize = sizeof(struct xt_helper_info), |
174 | }; | 173 | .me = THIS_MODULE, |
175 | static struct xt_match helper6_match = { | 174 | }, |
176 | .name = "helper", | 175 | { |
177 | .match = match, | 176 | .name = "helper", |
178 | .matchsize = sizeof(struct xt_helper_info), | 177 | .family = AF_INET6, |
179 | .checkentry = check, | 178 | .checkentry = check, |
180 | .destroy = destroy, | 179 | .match = match, |
181 | .family = AF_INET6, | 180 | .destroy = destroy, |
182 | .me = THIS_MODULE, | 181 | .matchsize = sizeof(struct xt_helper_info), |
182 | .me = THIS_MODULE, | ||
183 | }, | ||
183 | }; | 184 | }; |
184 | 185 | ||
185 | static int __init xt_helper_init(void) | 186 | static int __init xt_helper_init(void) |
186 | { | 187 | { |
187 | int ret; | ||
188 | need_conntrack(); | 188 | need_conntrack(); |
189 | 189 | return xt_register_matches(xt_helper_match, | |
190 | ret = xt_register_match(&helper_match); | 190 | ARRAY_SIZE(xt_helper_match)); |
191 | if (ret < 0) | ||
192 | return ret; | ||
193 | |||
194 | ret = xt_register_match(&helper6_match); | ||
195 | if (ret < 0) | ||
196 | xt_unregister_match(&helper_match); | ||
197 | |||
198 | return ret; | ||
199 | } | 191 | } |
200 | 192 | ||
201 | static void __exit xt_helper_fini(void) | 193 | static void __exit xt_helper_fini(void) |
202 | { | 194 | { |
203 | xt_unregister_match(&helper_match); | 195 | xt_unregister_matches(xt_helper_match, ARRAY_SIZE(xt_helper_match)); |
204 | xt_unregister_match(&helper6_match); | ||
205 | } | 196 | } |
206 | 197 | ||
207 | module_init(xt_helper_init); | 198 | module_init(xt_helper_init); |
diff --git a/net/netfilter/xt_length.c b/net/netfilter/xt_length.c index 109132c9a146..67fd30d9f303 100644 --- a/net/netfilter/xt_length.c +++ b/net/netfilter/xt_length.c | |||
@@ -52,39 +52,32 @@ match6(const struct sk_buff *skb, | |||
52 | return (pktlen >= info->min && pktlen <= info->max) ^ info->invert; | 52 | return (pktlen >= info->min && pktlen <= info->max) ^ info->invert; |
53 | } | 53 | } |
54 | 54 | ||
55 | static struct xt_match length_match = { | 55 | static struct xt_match xt_length_match[] = { |
56 | .name = "length", | 56 | { |
57 | .match = match, | 57 | .name = "length", |
58 | .matchsize = sizeof(struct xt_length_info), | 58 | .family = AF_INET, |
59 | .family = AF_INET, | 59 | .match = match, |
60 | .me = THIS_MODULE, | 60 | .matchsize = sizeof(struct xt_length_info), |
61 | }; | 61 | .me = THIS_MODULE, |
62 | 62 | }, | |
63 | static struct xt_match length6_match = { | 63 | { |
64 | .name = "length", | 64 | .name = "length", |
65 | .match = match6, | 65 | .family = AF_INET6, |
66 | .matchsize = sizeof(struct xt_length_info), | 66 | .match = match6, |
67 | .family = AF_INET6, | 67 | .matchsize = sizeof(struct xt_length_info), |
68 | .me = THIS_MODULE, | 68 | .me = THIS_MODULE, |
69 | }, | ||
69 | }; | 70 | }; |
70 | 71 | ||
71 | static int __init xt_length_init(void) | 72 | static int __init xt_length_init(void) |
72 | { | 73 | { |
73 | int ret; | 74 | return xt_register_matches(xt_length_match, |
74 | ret = xt_register_match(&length_match); | 75 | ARRAY_SIZE(xt_length_match)); |
75 | if (ret) | ||
76 | return ret; | ||
77 | ret = xt_register_match(&length6_match); | ||
78 | if (ret) | ||
79 | xt_unregister_match(&length_match); | ||
80 | |||
81 | return ret; | ||
82 | } | 76 | } |
83 | 77 | ||
84 | static void __exit xt_length_fini(void) | 78 | static void __exit xt_length_fini(void) |
85 | { | 79 | { |
86 | xt_unregister_match(&length_match); | 80 | xt_unregister_matches(xt_length_match, ARRAY_SIZE(xt_length_match)); |
87 | xt_unregister_match(&length6_match); | ||
88 | } | 81 | } |
89 | 82 | ||
90 | module_init(xt_length_init); | 83 | module_init(xt_length_init); |
diff --git a/net/netfilter/xt_limit.c b/net/netfilter/xt_limit.c index ce7fdb7e4e07..fda7b7dec27d 100644 --- a/net/netfilter/xt_limit.c +++ b/net/netfilter/xt_limit.c | |||
@@ -110,7 +110,6 @@ ipt_limit_checkentry(const char *tablename, | |||
110 | const void *inf, | 110 | const void *inf, |
111 | const struct xt_match *match, | 111 | const struct xt_match *match, |
112 | void *matchinfo, | 112 | void *matchinfo, |
113 | unsigned int matchsize, | ||
114 | unsigned int hook_mask) | 113 | unsigned int hook_mask) |
115 | { | 114 | { |
116 | struct xt_rateinfo *r = matchinfo; | 115 | struct xt_rateinfo *r = matchinfo; |
@@ -123,55 +122,95 @@ ipt_limit_checkentry(const char *tablename, | |||
123 | return 0; | 122 | return 0; |
124 | } | 123 | } |
125 | 124 | ||
126 | /* User avg in seconds * XT_LIMIT_SCALE: convert to jiffies * | ||
127 | 128. */ | ||
128 | r->prev = jiffies; | ||
129 | r->credit = user2credits(r->avg * r->burst); /* Credits full. */ | ||
130 | r->credit_cap = user2credits(r->avg * r->burst); /* Credits full. */ | ||
131 | r->cost = user2credits(r->avg); | ||
132 | |||
133 | /* For SMP, we only want to use one set of counters. */ | 125 | /* For SMP, we only want to use one set of counters. */ |
134 | r->master = r; | 126 | r->master = r; |
135 | 127 | if (r->cost == 0) { | |
128 | /* User avg in seconds * XT_LIMIT_SCALE: convert to jiffies * | ||
129 | 128. */ | ||
130 | r->prev = jiffies; | ||
131 | r->credit = user2credits(r->avg * r->burst); /* Credits full. */ | ||
132 | r->credit_cap = user2credits(r->avg * r->burst); /* Credits full. */ | ||
133 | r->cost = user2credits(r->avg); | ||
134 | } | ||
136 | return 1; | 135 | return 1; |
137 | } | 136 | } |
138 | 137 | ||
139 | static struct xt_match ipt_limit_reg = { | 138 | #ifdef CONFIG_COMPAT |
140 | .name = "limit", | 139 | struct compat_xt_rateinfo { |
141 | .match = ipt_limit_match, | 140 | u_int32_t avg; |
142 | .matchsize = sizeof(struct xt_rateinfo), | 141 | u_int32_t burst; |
143 | .checkentry = ipt_limit_checkentry, | 142 | |
144 | .family = AF_INET, | 143 | compat_ulong_t prev; |
145 | .me = THIS_MODULE, | 144 | u_int32_t credit; |
145 | u_int32_t credit_cap, cost; | ||
146 | |||
147 | u_int32_t master; | ||
146 | }; | 148 | }; |
147 | static struct xt_match limit6_reg = { | 149 | |
148 | .name = "limit", | 150 | /* To keep the full "prev" timestamp, the upper 32 bits are stored in the |
149 | .match = ipt_limit_match, | 151 | * master pointer, which does not need to be preserved. */ |
150 | .matchsize = sizeof(struct xt_rateinfo), | 152 | static void compat_from_user(void *dst, void *src) |
151 | .checkentry = ipt_limit_checkentry, | 153 | { |
152 | .family = AF_INET6, | 154 | struct compat_xt_rateinfo *cm = src; |
153 | .me = THIS_MODULE, | 155 | struct xt_rateinfo m = { |
156 | .avg = cm->avg, | ||
157 | .burst = cm->burst, | ||
158 | .prev = cm->prev | (unsigned long)cm->master << 32, | ||
159 | .credit = cm->credit, | ||
160 | .credit_cap = cm->credit_cap, | ||
161 | .cost = cm->cost, | ||
162 | }; | ||
163 | memcpy(dst, &m, sizeof(m)); | ||
164 | } | ||
165 | |||
166 | static int compat_to_user(void __user *dst, void *src) | ||
167 | { | ||
168 | struct xt_rateinfo *m = src; | ||
169 | struct compat_xt_rateinfo cm = { | ||
170 | .avg = m->avg, | ||
171 | .burst = m->burst, | ||
172 | .prev = m->prev, | ||
173 | .credit = m->credit, | ||
174 | .credit_cap = m->credit_cap, | ||
175 | .cost = m->cost, | ||
176 | .master = m->prev >> 32, | ||
177 | }; | ||
178 | return copy_to_user(dst, &cm, sizeof(cm)) ? -EFAULT : 0; | ||
179 | } | ||
180 | #endif /* CONFIG_COMPAT */ | ||
181 | |||
182 | static struct xt_match xt_limit_match[] = { | ||
183 | { | ||
184 | .name = "limit", | ||
185 | .family = AF_INET, | ||
186 | .checkentry = ipt_limit_checkentry, | ||
187 | .match = ipt_limit_match, | ||
188 | .matchsize = sizeof(struct xt_rateinfo), | ||
189 | #ifdef CONFIG_COMPAT | ||
190 | .compatsize = sizeof(struct compat_xt_rateinfo), | ||
191 | .compat_from_user = compat_from_user, | ||
192 | .compat_to_user = compat_to_user, | ||
193 | #endif | ||
194 | .me = THIS_MODULE, | ||
195 | }, | ||
196 | { | ||
197 | .name = "limit", | ||
198 | .family = AF_INET6, | ||
199 | .checkentry = ipt_limit_checkentry, | ||
200 | .match = ipt_limit_match, | ||
201 | .matchsize = sizeof(struct xt_rateinfo), | ||
202 | .me = THIS_MODULE, | ||
203 | }, | ||
154 | }; | 204 | }; |
155 | 205 | ||
156 | static int __init xt_limit_init(void) | 206 | static int __init xt_limit_init(void) |
157 | { | 207 | { |
158 | int ret; | 208 | return xt_register_matches(xt_limit_match, ARRAY_SIZE(xt_limit_match)); |
159 | |||
160 | ret = xt_register_match(&ipt_limit_reg); | ||
161 | if (ret) | ||
162 | return ret; | ||
163 | |||
164 | ret = xt_register_match(&limit6_reg); | ||
165 | if (ret) | ||
166 | xt_unregister_match(&ipt_limit_reg); | ||
167 | |||
168 | return ret; | ||
169 | } | 209 | } |
170 | 210 | ||
171 | static void __exit xt_limit_fini(void) | 211 | static void __exit xt_limit_fini(void) |
172 | { | 212 | { |
173 | xt_unregister_match(&ipt_limit_reg); | 213 | xt_unregister_matches(xt_limit_match, ARRAY_SIZE(xt_limit_match)); |
174 | xt_unregister_match(&limit6_reg); | ||
175 | } | 214 | } |
176 | 215 | ||
177 | module_init(xt_limit_init); | 216 | module_init(xt_limit_init); |
diff --git a/net/netfilter/xt_mac.c b/net/netfilter/xt_mac.c index 356290ffe386..425fc21e31f5 100644 --- a/net/netfilter/xt_mac.c +++ b/net/netfilter/xt_mac.c | |||
@@ -43,43 +43,37 @@ match(const struct sk_buff *skb, | |||
43 | ^ info->invert)); | 43 | ^ info->invert)); |
44 | } | 44 | } |
45 | 45 | ||
46 | static struct xt_match mac_match = { | 46 | static struct xt_match xt_mac_match[] = { |
47 | .name = "mac", | 47 | { |
48 | .match = match, | 48 | .name = "mac", |
49 | .matchsize = sizeof(struct xt_mac_info), | 49 | .family = AF_INET, |
50 | .hooks = (1 << NF_IP_PRE_ROUTING) | (1 << NF_IP_LOCAL_IN) | | 50 | .match = match, |
51 | (1 << NF_IP_FORWARD), | 51 | .matchsize = sizeof(struct xt_mac_info), |
52 | .family = AF_INET, | 52 | .hooks = (1 << NF_IP_PRE_ROUTING) | |
53 | .me = THIS_MODULE, | 53 | (1 << NF_IP_LOCAL_IN) | |
54 | }; | 54 | (1 << NF_IP_FORWARD), |
55 | static struct xt_match mac6_match = { | 55 | .me = THIS_MODULE, |
56 | .name = "mac", | 56 | }, |
57 | .match = match, | 57 | { |
58 | .matchsize = sizeof(struct xt_mac_info), | 58 | .name = "mac", |
59 | .hooks = (1 << NF_IP_PRE_ROUTING) | (1 << NF_IP_LOCAL_IN) | | 59 | .family = AF_INET6, |
60 | (1 << NF_IP_FORWARD), | 60 | .match = match, |
61 | .family = AF_INET6, | 61 | .matchsize = sizeof(struct xt_mac_info), |
62 | .me = THIS_MODULE, | 62 | .hooks = (1 << NF_IP_PRE_ROUTING) | |
63 | (1 << NF_IP_LOCAL_IN) | | ||
64 | (1 << NF_IP_FORWARD), | ||
65 | .me = THIS_MODULE, | ||
66 | }, | ||
63 | }; | 67 | }; |
64 | 68 | ||
65 | static int __init xt_mac_init(void) | 69 | static int __init xt_mac_init(void) |
66 | { | 70 | { |
67 | int ret; | 71 | return xt_register_matches(xt_mac_match, ARRAY_SIZE(xt_mac_match)); |
68 | ret = xt_register_match(&mac_match); | ||
69 | if (ret) | ||
70 | return ret; | ||
71 | |||
72 | ret = xt_register_match(&mac6_match); | ||
73 | if (ret) | ||
74 | xt_unregister_match(&mac_match); | ||
75 | |||
76 | return ret; | ||
77 | } | 72 | } |
78 | 73 | ||
79 | static void __exit xt_mac_fini(void) | 74 | static void __exit xt_mac_fini(void) |
80 | { | 75 | { |
81 | xt_unregister_match(&mac_match); | 76 | xt_unregister_matches(xt_mac_match, ARRAY_SIZE(xt_mac_match)); |
82 | xt_unregister_match(&mac6_match); | ||
83 | } | 77 | } |
84 | 78 | ||
85 | module_init(xt_mac_init); | 79 | module_init(xt_mac_init); |
diff --git a/net/netfilter/xt_mark.c b/net/netfilter/xt_mark.c index 876bc5797738..934dddfbcd23 100644 --- a/net/netfilter/xt_mark.c +++ b/net/netfilter/xt_mark.c | |||
@@ -39,7 +39,6 @@ checkentry(const char *tablename, | |||
39 | const void *entry, | 39 | const void *entry, |
40 | const struct xt_match *match, | 40 | const struct xt_match *match, |
41 | void *matchinfo, | 41 | void *matchinfo, |
42 | unsigned int matchsize, | ||
43 | unsigned int hook_mask) | 42 | unsigned int hook_mask) |
44 | { | 43 | { |
45 | const struct xt_mark_info *minfo = matchinfo; | 44 | const struct xt_mark_info *minfo = matchinfo; |
@@ -51,42 +50,69 @@ checkentry(const char *tablename, | |||
51 | return 1; | 50 | return 1; |
52 | } | 51 | } |
53 | 52 | ||
54 | static struct xt_match mark_match = { | 53 | #ifdef CONFIG_COMPAT |
55 | .name = "mark", | 54 | struct compat_xt_mark_info { |
56 | .match = match, | 55 | compat_ulong_t mark, mask; |
57 | .matchsize = sizeof(struct xt_mark_info), | 56 | u_int8_t invert; |
58 | .checkentry = checkentry, | 57 | u_int8_t __pad1; |
59 | .family = AF_INET, | 58 | u_int16_t __pad2; |
60 | .me = THIS_MODULE, | ||
61 | }; | 59 | }; |
62 | 60 | ||
63 | static struct xt_match mark6_match = { | 61 | static void compat_from_user(void *dst, void *src) |
64 | .name = "mark", | 62 | { |
65 | .match = match, | 63 | struct compat_xt_mark_info *cm = src; |
66 | .matchsize = sizeof(struct xt_mark_info), | 64 | struct xt_mark_info m = { |
67 | .checkentry = checkentry, | 65 | .mark = cm->mark, |
68 | .family = AF_INET6, | 66 | .mask = cm->mask, |
69 | .me = THIS_MODULE, | 67 | .invert = cm->invert, |
70 | }; | 68 | }; |
69 | memcpy(dst, &m, sizeof(m)); | ||
70 | } | ||
71 | 71 | ||
72 | static int __init xt_mark_init(void) | 72 | static int compat_to_user(void __user *dst, void *src) |
73 | { | 73 | { |
74 | int ret; | 74 | struct xt_mark_info *m = src; |
75 | ret = xt_register_match(&mark_match); | 75 | struct compat_xt_mark_info cm = { |
76 | if (ret) | 76 | .mark = m->mark, |
77 | return ret; | 77 | .mask = m->mask, |
78 | .invert = m->invert, | ||
79 | }; | ||
80 | return copy_to_user(dst, &cm, sizeof(cm)) ? -EFAULT : 0; | ||
81 | } | ||
82 | #endif /* CONFIG_COMPAT */ | ||
78 | 83 | ||
79 | ret = xt_register_match(&mark6_match); | 84 | static struct xt_match xt_mark_match[] = { |
80 | if (ret) | 85 | { |
81 | xt_unregister_match(&mark_match); | 86 | .name = "mark", |
87 | .family = AF_INET, | ||
88 | .checkentry = checkentry, | ||
89 | .match = match, | ||
90 | .matchsize = sizeof(struct xt_mark_info), | ||
91 | #ifdef CONFIG_COMPAT | ||
92 | .compatsize = sizeof(struct compat_xt_mark_info), | ||
93 | .compat_from_user = compat_from_user, | ||
94 | .compat_to_user = compat_to_user, | ||
95 | #endif | ||
96 | .me = THIS_MODULE, | ||
97 | }, | ||
98 | { | ||
99 | .name = "mark", | ||
100 | .family = AF_INET6, | ||
101 | .checkentry = checkentry, | ||
102 | .match = match, | ||
103 | .matchsize = sizeof(struct xt_mark_info), | ||
104 | .me = THIS_MODULE, | ||
105 | }, | ||
106 | }; | ||
82 | 107 | ||
83 | return ret; | 108 | static int __init xt_mark_init(void) |
109 | { | ||
110 | return xt_register_matches(xt_mark_match, ARRAY_SIZE(xt_mark_match)); | ||
84 | } | 111 | } |
85 | 112 | ||
86 | static void __exit xt_mark_fini(void) | 113 | static void __exit xt_mark_fini(void) |
87 | { | 114 | { |
88 | xt_unregister_match(&mark_match); | 115 | xt_unregister_matches(xt_mark_match, ARRAY_SIZE(xt_mark_match)); |
89 | xt_unregister_match(&mark6_match); | ||
90 | } | 116 | } |
91 | 117 | ||
92 | module_init(xt_mark_init); | 118 | module_init(xt_mark_init); |
diff --git a/net/netfilter/xt_multiport.c b/net/netfilter/xt_multiport.c index 1ff0a25396e7..d3aefd380930 100644 --- a/net/netfilter/xt_multiport.c +++ b/net/netfilter/xt_multiport.c | |||
@@ -176,7 +176,6 @@ checkentry(const char *tablename, | |||
176 | const void *info, | 176 | const void *info, |
177 | const struct xt_match *match, | 177 | const struct xt_match *match, |
178 | void *matchinfo, | 178 | void *matchinfo, |
179 | unsigned int matchsize, | ||
180 | unsigned int hook_mask) | 179 | unsigned int hook_mask) |
181 | { | 180 | { |
182 | const struct ipt_ip *ip = info; | 181 | const struct ipt_ip *ip = info; |
@@ -191,7 +190,6 @@ checkentry_v1(const char *tablename, | |||
191 | const void *info, | 190 | const void *info, |
192 | const struct xt_match *match, | 191 | const struct xt_match *match, |
193 | void *matchinfo, | 192 | void *matchinfo, |
194 | unsigned int matchsize, | ||
195 | unsigned int hook_mask) | 193 | unsigned int hook_mask) |
196 | { | 194 | { |
197 | const struct ipt_ip *ip = info; | 195 | const struct ipt_ip *ip = info; |
@@ -206,7 +204,6 @@ checkentry6(const char *tablename, | |||
206 | const void *info, | 204 | const void *info, |
207 | const struct xt_match *match, | 205 | const struct xt_match *match, |
208 | void *matchinfo, | 206 | void *matchinfo, |
209 | unsigned int matchsize, | ||
210 | unsigned int hook_mask) | 207 | unsigned int hook_mask) |
211 | { | 208 | { |
212 | const struct ip6t_ip6 *ip = info; | 209 | const struct ip6t_ip6 *ip = info; |
@@ -221,7 +218,6 @@ checkentry6_v1(const char *tablename, | |||
221 | const void *info, | 218 | const void *info, |
222 | const struct xt_match *match, | 219 | const struct xt_match *match, |
223 | void *matchinfo, | 220 | void *matchinfo, |
224 | unsigned int matchsize, | ||
225 | unsigned int hook_mask) | 221 | unsigned int hook_mask) |
226 | { | 222 | { |
227 | const struct ip6t_ip6 *ip = info; | 223 | const struct ip6t_ip6 *ip = info; |
@@ -231,84 +227,55 @@ checkentry6_v1(const char *tablename, | |||
231 | multiinfo->count); | 227 | multiinfo->count); |
232 | } | 228 | } |
233 | 229 | ||
234 | static struct xt_match multiport_match = { | 230 | static struct xt_match xt_multiport_match[] = { |
235 | .name = "multiport", | 231 | { |
236 | .revision = 0, | 232 | .name = "multiport", |
237 | .matchsize = sizeof(struct xt_multiport), | 233 | .family = AF_INET, |
238 | .match = &match, | 234 | .revision = 0, |
239 | .checkentry = &checkentry, | 235 | .checkentry = checkentry, |
240 | .family = AF_INET, | 236 | .match = match, |
241 | .me = THIS_MODULE, | 237 | .matchsize = sizeof(struct xt_multiport), |
242 | }; | 238 | .me = THIS_MODULE, |
243 | 239 | }, | |
244 | static struct xt_match multiport_match_v1 = { | 240 | { |
245 | .name = "multiport", | 241 | .name = "multiport", |
246 | .revision = 1, | 242 | .family = AF_INET, |
247 | .matchsize = sizeof(struct xt_multiport_v1), | 243 | .revision = 1, |
248 | .match = &match_v1, | 244 | .checkentry = checkentry_v1, |
249 | .checkentry = &checkentry_v1, | 245 | .match = match_v1, |
250 | .family = AF_INET, | 246 | .matchsize = sizeof(struct xt_multiport_v1), |
251 | .me = THIS_MODULE, | 247 | .me = THIS_MODULE, |
252 | }; | 248 | }, |
253 | 249 | { | |
254 | static struct xt_match multiport6_match = { | 250 | .name = "multiport", |
255 | .name = "multiport", | 251 | .family = AF_INET6, |
256 | .revision = 0, | 252 | .revision = 0, |
257 | .matchsize = sizeof(struct xt_multiport), | 253 | .checkentry = checkentry6, |
258 | .match = &match, | 254 | .match = match, |
259 | .checkentry = &checkentry6, | 255 | .matchsize = sizeof(struct xt_multiport), |
260 | .family = AF_INET6, | 256 | .me = THIS_MODULE, |
261 | .me = THIS_MODULE, | 257 | }, |
262 | }; | 258 | { |
263 | 259 | .name = "multiport", | |
264 | static struct xt_match multiport6_match_v1 = { | 260 | .family = AF_INET6, |
265 | .name = "multiport", | 261 | .revision = 1, |
266 | .revision = 1, | 262 | .checkentry = checkentry6_v1, |
267 | .matchsize = sizeof(struct xt_multiport_v1), | 263 | .match = match_v1, |
268 | .match = &match_v1, | 264 | .matchsize = sizeof(struct xt_multiport_v1), |
269 | .checkentry = &checkentry6_v1, | 265 | .me = THIS_MODULE, |
270 | .family = AF_INET6, | 266 | }, |
271 | .me = THIS_MODULE, | ||
272 | }; | 267 | }; |
273 | 268 | ||
274 | static int __init xt_multiport_init(void) | 269 | static int __init xt_multiport_init(void) |
275 | { | 270 | { |
276 | int ret; | 271 | return xt_register_matches(xt_multiport_match, |
277 | 272 | ARRAY_SIZE(xt_multiport_match)); | |
278 | ret = xt_register_match(&multiport_match); | ||
279 | if (ret) | ||
280 | goto out; | ||
281 | |||
282 | ret = xt_register_match(&multiport_match_v1); | ||
283 | if (ret) | ||
284 | goto out_unreg_multi_v0; | ||
285 | |||
286 | ret = xt_register_match(&multiport6_match); | ||
287 | if (ret) | ||
288 | goto out_unreg_multi_v1; | ||
289 | |||
290 | ret = xt_register_match(&multiport6_match_v1); | ||
291 | if (ret) | ||
292 | goto out_unreg_multi6_v0; | ||
293 | |||
294 | return ret; | ||
295 | |||
296 | out_unreg_multi6_v0: | ||
297 | xt_unregister_match(&multiport6_match); | ||
298 | out_unreg_multi_v1: | ||
299 | xt_unregister_match(&multiport_match_v1); | ||
300 | out_unreg_multi_v0: | ||
301 | xt_unregister_match(&multiport_match); | ||
302 | out: | ||
303 | return ret; | ||
304 | } | 273 | } |
305 | 274 | ||
306 | static void __exit xt_multiport_fini(void) | 275 | static void __exit xt_multiport_fini(void) |
307 | { | 276 | { |
308 | xt_unregister_match(&multiport_match); | 277 | xt_unregister_matches(xt_multiport_match, |
309 | xt_unregister_match(&multiport_match_v1); | 278 | ARRAY_SIZE(xt_multiport_match)); |
310 | xt_unregister_match(&multiport6_match); | ||
311 | xt_unregister_match(&multiport6_match_v1); | ||
312 | } | 279 | } |
313 | 280 | ||
314 | module_init(xt_multiport_init); | 281 | module_init(xt_multiport_init); |
diff --git a/net/netfilter/xt_physdev.c b/net/netfilter/xt_physdev.c index 63a965467465..fd8f954cded5 100644 --- a/net/netfilter/xt_physdev.c +++ b/net/netfilter/xt_physdev.c | |||
@@ -106,7 +106,6 @@ checkentry(const char *tablename, | |||
106 | const void *ip, | 106 | const void *ip, |
107 | const struct xt_match *match, | 107 | const struct xt_match *match, |
108 | void *matchinfo, | 108 | void *matchinfo, |
109 | unsigned int matchsize, | ||
110 | unsigned int hook_mask) | 109 | unsigned int hook_mask) |
111 | { | 110 | { |
112 | const struct xt_physdev_info *info = matchinfo; | 111 | const struct xt_physdev_info *info = matchinfo; |
@@ -132,43 +131,34 @@ checkentry(const char *tablename, | |||
132 | return 1; | 131 | return 1; |
133 | } | 132 | } |
134 | 133 | ||
135 | static struct xt_match physdev_match = { | 134 | static struct xt_match xt_physdev_match[] = { |
136 | .name = "physdev", | 135 | { |
137 | .match = match, | 136 | .name = "physdev", |
138 | .matchsize = sizeof(struct xt_physdev_info), | 137 | .family = AF_INET, |
139 | .checkentry = checkentry, | 138 | .checkentry = checkentry, |
140 | .family = AF_INET, | 139 | .match = match, |
141 | .me = THIS_MODULE, | 140 | .matchsize = sizeof(struct xt_physdev_info), |
142 | }; | 141 | .me = THIS_MODULE, |
143 | 142 | }, | |
144 | static struct xt_match physdev6_match = { | 143 | { |
145 | .name = "physdev", | 144 | .name = "physdev", |
146 | .match = match, | 145 | .family = AF_INET6, |
147 | .matchsize = sizeof(struct xt_physdev_info), | 146 | .checkentry = checkentry, |
148 | .checkentry = checkentry, | 147 | .match = match, |
149 | .family = AF_INET6, | 148 | .matchsize = sizeof(struct xt_physdev_info), |
150 | .me = THIS_MODULE, | 149 | .me = THIS_MODULE, |
150 | }, | ||
151 | }; | 151 | }; |
152 | 152 | ||
153 | static int __init xt_physdev_init(void) | 153 | static int __init xt_physdev_init(void) |
154 | { | 154 | { |
155 | int ret; | 155 | return xt_register_matches(xt_physdev_match, |
156 | 156 | ARRAY_SIZE(xt_physdev_match)); | |
157 | ret = xt_register_match(&physdev_match); | ||
158 | if (ret < 0) | ||
159 | return ret; | ||
160 | |||
161 | ret = xt_register_match(&physdev6_match); | ||
162 | if (ret < 0) | ||
163 | xt_unregister_match(&physdev_match); | ||
164 | |||
165 | return ret; | ||
166 | } | 157 | } |
167 | 158 | ||
168 | static void __exit xt_physdev_fini(void) | 159 | static void __exit xt_physdev_fini(void) |
169 | { | 160 | { |
170 | xt_unregister_match(&physdev_match); | 161 | xt_unregister_matches(xt_physdev_match, ARRAY_SIZE(xt_physdev_match)); |
171 | xt_unregister_match(&physdev6_match); | ||
172 | } | 162 | } |
173 | 163 | ||
174 | module_init(xt_physdev_init); | 164 | module_init(xt_physdev_init); |
diff --git a/net/netfilter/xt_pkttype.c b/net/netfilter/xt_pkttype.c index d2f5320a80bf..16e7b0804287 100644 --- a/net/netfilter/xt_pkttype.c +++ b/net/netfilter/xt_pkttype.c | |||
@@ -43,40 +43,32 @@ static int match(const struct sk_buff *skb, | |||
43 | return (type == info->pkttype) ^ info->invert; | 43 | return (type == info->pkttype) ^ info->invert; |
44 | } | 44 | } |
45 | 45 | ||
46 | static struct xt_match pkttype_match = { | 46 | static struct xt_match xt_pkttype_match[] = { |
47 | .name = "pkttype", | 47 | { |
48 | .match = match, | 48 | .name = "pkttype", |
49 | .matchsize = sizeof(struct xt_pkttype_info), | 49 | .family = AF_INET, |
50 | .family = AF_INET, | 50 | .match = match, |
51 | .me = THIS_MODULE, | 51 | .matchsize = sizeof(struct xt_pkttype_info), |
52 | }; | 52 | .me = THIS_MODULE, |
53 | 53 | }, | |
54 | static struct xt_match pkttype6_match = { | 54 | { |
55 | .name = "pkttype", | 55 | .name = "pkttype", |
56 | .match = match, | 56 | .family = AF_INET6, |
57 | .matchsize = sizeof(struct xt_pkttype_info), | 57 | .match = match, |
58 | .family = AF_INET6, | 58 | .matchsize = sizeof(struct xt_pkttype_info), |
59 | .me = THIS_MODULE, | 59 | .me = THIS_MODULE, |
60 | }, | ||
60 | }; | 61 | }; |
61 | 62 | ||
62 | static int __init xt_pkttype_init(void) | 63 | static int __init xt_pkttype_init(void) |
63 | { | 64 | { |
64 | int ret; | 65 | return xt_register_matches(xt_pkttype_match, |
65 | ret = xt_register_match(&pkttype_match); | 66 | ARRAY_SIZE(xt_pkttype_match)); |
66 | if (ret) | ||
67 | return ret; | ||
68 | |||
69 | ret = xt_register_match(&pkttype6_match); | ||
70 | if (ret) | ||
71 | xt_unregister_match(&pkttype_match); | ||
72 | |||
73 | return ret; | ||
74 | } | 67 | } |
75 | 68 | ||
76 | static void __exit xt_pkttype_fini(void) | 69 | static void __exit xt_pkttype_fini(void) |
77 | { | 70 | { |
78 | xt_unregister_match(&pkttype_match); | 71 | xt_unregister_matches(xt_pkttype_match, ARRAY_SIZE(xt_pkttype_match)); |
79 | xt_unregister_match(&pkttype6_match); | ||
80 | } | 72 | } |
81 | 73 | ||
82 | module_init(xt_pkttype_init); | 74 | module_init(xt_pkttype_init); |
diff --git a/net/netfilter/xt_policy.c b/net/netfilter/xt_policy.c index ba1ca03abad3..46bde2b1e1e0 100644 --- a/net/netfilter/xt_policy.c +++ b/net/netfilter/xt_policy.c | |||
@@ -135,8 +135,7 @@ static int match(const struct sk_buff *skb, | |||
135 | 135 | ||
136 | static int checkentry(const char *tablename, const void *ip_void, | 136 | static int checkentry(const char *tablename, const void *ip_void, |
137 | const struct xt_match *match, | 137 | const struct xt_match *match, |
138 | void *matchinfo, unsigned int matchsize, | 138 | void *matchinfo, unsigned int hook_mask) |
139 | unsigned int hook_mask) | ||
140 | { | 139 | { |
141 | struct xt_policy_info *info = matchinfo; | 140 | struct xt_policy_info *info = matchinfo; |
142 | 141 | ||
@@ -165,43 +164,34 @@ static int checkentry(const char *tablename, const void *ip_void, | |||
165 | return 1; | 164 | return 1; |
166 | } | 165 | } |
167 | 166 | ||
168 | static struct xt_match policy_match = { | 167 | static struct xt_match xt_policy_match[] = { |
169 | .name = "policy", | 168 | { |
170 | .family = AF_INET, | 169 | .name = "policy", |
171 | .match = match, | 170 | .family = AF_INET, |
172 | .matchsize = sizeof(struct xt_policy_info), | 171 | .checkentry = checkentry, |
173 | .checkentry = checkentry, | 172 | .match = match, |
174 | .family = AF_INET, | 173 | .matchsize = sizeof(struct xt_policy_info), |
175 | .me = THIS_MODULE, | 174 | .me = THIS_MODULE, |
176 | }; | 175 | }, |
177 | 176 | { | |
178 | static struct xt_match policy6_match = { | 177 | .name = "policy", |
179 | .name = "policy", | 178 | .family = AF_INET6, |
180 | .family = AF_INET6, | 179 | .checkentry = checkentry, |
181 | .match = match, | 180 | .match = match, |
182 | .matchsize = sizeof(struct xt_policy_info), | 181 | .matchsize = sizeof(struct xt_policy_info), |
183 | .checkentry = checkentry, | 182 | .me = THIS_MODULE, |
184 | .family = AF_INET6, | 183 | }, |
185 | .me = THIS_MODULE, | ||
186 | }; | 184 | }; |
187 | 185 | ||
188 | static int __init init(void) | 186 | static int __init init(void) |
189 | { | 187 | { |
190 | int ret; | 188 | return xt_register_matches(xt_policy_match, |
191 | 189 | ARRAY_SIZE(xt_policy_match)); | |
192 | ret = xt_register_match(&policy_match); | ||
193 | if (ret) | ||
194 | return ret; | ||
195 | ret = xt_register_match(&policy6_match); | ||
196 | if (ret) | ||
197 | xt_unregister_match(&policy_match); | ||
198 | return ret; | ||
199 | } | 190 | } |
200 | 191 | ||
201 | static void __exit fini(void) | 192 | static void __exit fini(void) |
202 | { | 193 | { |
203 | xt_unregister_match(&policy6_match); | 194 | xt_unregister_matches(xt_policy_match, ARRAY_SIZE(xt_policy_match)); |
204 | xt_unregister_match(&policy_match); | ||
205 | } | 195 | } |
206 | 196 | ||
207 | module_init(init); | 197 | module_init(init); |
diff --git a/net/netfilter/xt_quota.c b/net/netfilter/xt_quota.c index be8d3c26b568..b75fa2c70e66 100644 --- a/net/netfilter/xt_quota.c +++ b/net/netfilter/xt_quota.c | |||
@@ -41,7 +41,7 @@ match(const struct sk_buff *skb, | |||
41 | static int | 41 | static int |
42 | checkentry(const char *tablename, const void *entry, | 42 | checkentry(const char *tablename, const void *entry, |
43 | const struct xt_match *match, void *matchinfo, | 43 | const struct xt_match *match, void *matchinfo, |
44 | unsigned int matchsize, unsigned int hook_mask) | 44 | unsigned int hook_mask) |
45 | { | 45 | { |
46 | struct xt_quota_info *q = (struct xt_quota_info *)matchinfo; | 46 | struct xt_quota_info *q = (struct xt_quota_info *)matchinfo; |
47 | 47 | ||
@@ -52,46 +52,33 @@ checkentry(const char *tablename, const void *entry, | |||
52 | return 1; | 52 | return 1; |
53 | } | 53 | } |
54 | 54 | ||
55 | static struct xt_match quota_match = { | 55 | static struct xt_match xt_quota_match[] = { |
56 | .name = "quota", | 56 | { |
57 | .family = AF_INET, | 57 | .name = "quota", |
58 | .match = match, | 58 | .family = AF_INET, |
59 | .matchsize = sizeof(struct xt_quota_info), | 59 | .checkentry = checkentry, |
60 | .checkentry = checkentry, | 60 | .match = match, |
61 | .me = THIS_MODULE | 61 | .matchsize = sizeof(struct xt_quota_info), |
62 | }; | 62 | .me = THIS_MODULE |
63 | 63 | }, | |
64 | static struct xt_match quota_match6 = { | 64 | { |
65 | .name = "quota", | 65 | .name = "quota", |
66 | .family = AF_INET6, | 66 | .family = AF_INET6, |
67 | .match = match, | 67 | .checkentry = checkentry, |
68 | .matchsize = sizeof(struct xt_quota_info), | 68 | .match = match, |
69 | .checkentry = checkentry, | 69 | .matchsize = sizeof(struct xt_quota_info), |
70 | .me = THIS_MODULE | 70 | .me = THIS_MODULE |
71 | }, | ||
71 | }; | 72 | }; |
72 | 73 | ||
73 | static int __init xt_quota_init(void) | 74 | static int __init xt_quota_init(void) |
74 | { | 75 | { |
75 | int ret; | 76 | return xt_register_matches(xt_quota_match, ARRAY_SIZE(xt_quota_match)); |
76 | |||
77 | ret = xt_register_match("a_match); | ||
78 | if (ret) | ||
79 | goto err1; | ||
80 | ret = xt_register_match("a_match6); | ||
81 | if (ret) | ||
82 | goto err2; | ||
83 | return ret; | ||
84 | |||
85 | err2: | ||
86 | xt_unregister_match("a_match); | ||
87 | err1: | ||
88 | return ret; | ||
89 | } | 77 | } |
90 | 78 | ||
91 | static void __exit xt_quota_fini(void) | 79 | static void __exit xt_quota_fini(void) |
92 | { | 80 | { |
93 | xt_unregister_match("a_match6); | 81 | xt_unregister_matches(xt_quota_match, ARRAY_SIZE(xt_quota_match)); |
94 | xt_unregister_match("a_match); | ||
95 | } | 82 | } |
96 | 83 | ||
97 | module_init(xt_quota_init); | 84 | module_init(xt_quota_init); |
diff --git a/net/netfilter/xt_sctp.c b/net/netfilter/xt_sctp.c index 843383e01d41..7956acaaa24b 100644 --- a/net/netfilter/xt_sctp.c +++ b/net/netfilter/xt_sctp.c | |||
@@ -163,7 +163,6 @@ checkentry(const char *tablename, | |||
163 | const void *inf, | 163 | const void *inf, |
164 | const struct xt_match *match, | 164 | const struct xt_match *match, |
165 | void *matchinfo, | 165 | void *matchinfo, |
166 | unsigned int matchsize, | ||
167 | unsigned int hook_mask) | 166 | unsigned int hook_mask) |
168 | { | 167 | { |
169 | const struct xt_sctp_info *info = matchinfo; | 168 | const struct xt_sctp_info *info = matchinfo; |
@@ -178,44 +177,35 @@ checkentry(const char *tablename, | |||
178 | | SCTP_CHUNK_MATCH_ONLY))); | 177 | | SCTP_CHUNK_MATCH_ONLY))); |
179 | } | 178 | } |
180 | 179 | ||
181 | static struct xt_match sctp_match = { | 180 | static struct xt_match xt_sctp_match[] = { |
182 | .name = "sctp", | 181 | { |
183 | .match = match, | 182 | .name = "sctp", |
184 | .matchsize = sizeof(struct xt_sctp_info), | 183 | .family = AF_INET, |
185 | .proto = IPPROTO_SCTP, | 184 | .checkentry = checkentry, |
186 | .checkentry = checkentry, | 185 | .match = match, |
187 | .family = AF_INET, | 186 | .matchsize = sizeof(struct xt_sctp_info), |
188 | .me = THIS_MODULE | 187 | .proto = IPPROTO_SCTP, |
189 | }; | 188 | .me = THIS_MODULE |
190 | 189 | }, | |
191 | static struct xt_match sctp6_match = { | 190 | { |
192 | .name = "sctp", | 191 | .name = "sctp", |
193 | .match = match, | 192 | .family = AF_INET6, |
194 | .matchsize = sizeof(struct xt_sctp_info), | 193 | .checkentry = checkentry, |
195 | .proto = IPPROTO_SCTP, | 194 | .match = match, |
196 | .checkentry = checkentry, | 195 | .matchsize = sizeof(struct xt_sctp_info), |
197 | .family = AF_INET6, | 196 | .proto = IPPROTO_SCTP, |
198 | .me = THIS_MODULE | 197 | .me = THIS_MODULE |
198 | }, | ||
199 | }; | 199 | }; |
200 | 200 | ||
201 | static int __init xt_sctp_init(void) | 201 | static int __init xt_sctp_init(void) |
202 | { | 202 | { |
203 | int ret; | 203 | return xt_register_matches(xt_sctp_match, ARRAY_SIZE(xt_sctp_match)); |
204 | ret = xt_register_match(&sctp_match); | ||
205 | if (ret) | ||
206 | return ret; | ||
207 | |||
208 | ret = xt_register_match(&sctp6_match); | ||
209 | if (ret) | ||
210 | xt_unregister_match(&sctp_match); | ||
211 | |||
212 | return ret; | ||
213 | } | 204 | } |
214 | 205 | ||
215 | static void __exit xt_sctp_fini(void) | 206 | static void __exit xt_sctp_fini(void) |
216 | { | 207 | { |
217 | xt_unregister_match(&sctp6_match); | 208 | xt_unregister_matches(xt_sctp_match, ARRAY_SIZE(xt_sctp_match)); |
218 | xt_unregister_match(&sctp_match); | ||
219 | } | 209 | } |
220 | 210 | ||
221 | module_init(xt_sctp_init); | 211 | module_init(xt_sctp_init); |
diff --git a/net/netfilter/xt_state.c b/net/netfilter/xt_state.c index f9e304dc4504..d9010b16a1f9 100644 --- a/net/netfilter/xt_state.c +++ b/net/netfilter/xt_state.c | |||
@@ -48,7 +48,6 @@ static int check(const char *tablename, | |||
48 | const void *inf, | 48 | const void *inf, |
49 | const struct xt_match *match, | 49 | const struct xt_match *match, |
50 | void *matchinfo, | 50 | void *matchinfo, |
51 | unsigned int matchsize, | ||
52 | unsigned int hook_mask) | 51 | unsigned int hook_mask) |
53 | { | 52 | { |
54 | #if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE) | 53 | #if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE) |
@@ -62,54 +61,43 @@ static int check(const char *tablename, | |||
62 | } | 61 | } |
63 | 62 | ||
64 | static void | 63 | static void |
65 | destroy(const struct xt_match *match, void *matchinfo, unsigned int matchsize) | 64 | destroy(const struct xt_match *match, void *matchinfo) |
66 | { | 65 | { |
67 | #if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE) | 66 | #if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE) |
68 | nf_ct_l3proto_module_put(match->family); | 67 | nf_ct_l3proto_module_put(match->family); |
69 | #endif | 68 | #endif |
70 | } | 69 | } |
71 | 70 | ||
72 | static struct xt_match state_match = { | 71 | static struct xt_match xt_state_match[] = { |
73 | .name = "state", | 72 | { |
74 | .match = match, | 73 | .name = "state", |
75 | .checkentry = check, | 74 | .family = AF_INET, |
76 | .destroy = destroy, | 75 | .checkentry = check, |
77 | .matchsize = sizeof(struct xt_state_info), | 76 | .match = match, |
78 | .family = AF_INET, | 77 | .destroy = destroy, |
79 | .me = THIS_MODULE, | 78 | .matchsize = sizeof(struct xt_state_info), |
80 | }; | 79 | .me = THIS_MODULE, |
81 | 80 | }, | |
82 | static struct xt_match state6_match = { | 81 | { |
83 | .name = "state", | 82 | .name = "state", |
84 | .match = match, | 83 | .family = AF_INET6, |
85 | .checkentry = check, | 84 | .checkentry = check, |
86 | .destroy = destroy, | 85 | .match = match, |
87 | .matchsize = sizeof(struct xt_state_info), | 86 | .destroy = destroy, |
88 | .family = AF_INET6, | 87 | .matchsize = sizeof(struct xt_state_info), |
89 | .me = THIS_MODULE, | 88 | .me = THIS_MODULE, |
89 | }, | ||
90 | }; | 90 | }; |
91 | 91 | ||
92 | static int __init xt_state_init(void) | 92 | static int __init xt_state_init(void) |
93 | { | 93 | { |
94 | int ret; | ||
95 | |||
96 | need_conntrack(); | 94 | need_conntrack(); |
97 | 95 | return xt_register_matches(xt_state_match, ARRAY_SIZE(xt_state_match)); | |
98 | ret = xt_register_match(&state_match); | ||
99 | if (ret < 0) | ||
100 | return ret; | ||
101 | |||
102 | ret = xt_register_match(&state6_match); | ||
103 | if (ret < 0) | ||
104 | xt_unregister_match(&state_match); | ||
105 | |||
106 | return ret; | ||
107 | } | 96 | } |
108 | 97 | ||
109 | static void __exit xt_state_fini(void) | 98 | static void __exit xt_state_fini(void) |
110 | { | 99 | { |
111 | xt_unregister_match(&state_match); | 100 | xt_unregister_matches(xt_state_match, ARRAY_SIZE(xt_state_match)); |
112 | xt_unregister_match(&state6_match); | ||
113 | } | 101 | } |
114 | 102 | ||
115 | module_init(xt_state_init); | 103 | module_init(xt_state_init); |
diff --git a/net/netfilter/xt_statistic.c b/net/netfilter/xt_statistic.c index de1037f58596..091a9f89f5d5 100644 --- a/net/netfilter/xt_statistic.c +++ b/net/netfilter/xt_statistic.c | |||
@@ -55,7 +55,7 @@ match(const struct sk_buff *skb, | |||
55 | static int | 55 | static int |
56 | checkentry(const char *tablename, const void *entry, | 56 | checkentry(const char *tablename, const void *entry, |
57 | const struct xt_match *match, void *matchinfo, | 57 | const struct xt_match *match, void *matchinfo, |
58 | unsigned int matchsize, unsigned int hook_mask) | 58 | unsigned int hook_mask) |
59 | { | 59 | { |
60 | struct xt_statistic_info *info = (struct xt_statistic_info *)matchinfo; | 60 | struct xt_statistic_info *info = (struct xt_statistic_info *)matchinfo; |
61 | 61 | ||
@@ -66,46 +66,35 @@ checkentry(const char *tablename, const void *entry, | |||
66 | return 1; | 66 | return 1; |
67 | } | 67 | } |
68 | 68 | ||
69 | static struct xt_match statistic_match = { | 69 | static struct xt_match xt_statistic_match[] = { |
70 | .name = "statistic", | 70 | { |
71 | .match = match, | 71 | .name = "statistic", |
72 | .matchsize = sizeof(struct xt_statistic_info), | 72 | .family = AF_INET, |
73 | .checkentry = checkentry, | 73 | .checkentry = checkentry, |
74 | .family = AF_INET, | 74 | .match = match, |
75 | .me = THIS_MODULE, | 75 | .matchsize = sizeof(struct xt_statistic_info), |
76 | }; | 76 | .me = THIS_MODULE, |
77 | 77 | }, | |
78 | static struct xt_match statistic_match6 = { | 78 | { |
79 | .name = "statistic", | 79 | .name = "statistic", |
80 | .match = match, | 80 | .family = AF_INET6, |
81 | .matchsize = sizeof(struct xt_statistic_info), | 81 | .checkentry = checkentry, |
82 | .checkentry = checkentry, | 82 | .match = match, |
83 | .family = AF_INET6, | 83 | .matchsize = sizeof(struct xt_statistic_info), |
84 | .me = THIS_MODULE, | 84 | .me = THIS_MODULE, |
85 | }, | ||
85 | }; | 86 | }; |
86 | 87 | ||
87 | static int __init xt_statistic_init(void) | 88 | static int __init xt_statistic_init(void) |
88 | { | 89 | { |
89 | int ret; | 90 | return xt_register_matches(xt_statistic_match, |
90 | 91 | ARRAY_SIZE(xt_statistic_match)); | |
91 | ret = xt_register_match(&statistic_match); | ||
92 | if (ret) | ||
93 | goto err1; | ||
94 | |||
95 | ret = xt_register_match(&statistic_match6); | ||
96 | if (ret) | ||
97 | goto err2; | ||
98 | return ret; | ||
99 | err2: | ||
100 | xt_unregister_match(&statistic_match); | ||
101 | err1: | ||
102 | return ret; | ||
103 | } | 92 | } |
104 | 93 | ||
105 | static void __exit xt_statistic_fini(void) | 94 | static void __exit xt_statistic_fini(void) |
106 | { | 95 | { |
107 | xt_unregister_match(&statistic_match6); | 96 | xt_unregister_matches(xt_statistic_match, |
108 | xt_unregister_match(&statistic_match); | 97 | ARRAY_SIZE(xt_statistic_match)); |
109 | } | 98 | } |
110 | 99 | ||
111 | module_init(xt_statistic_init); | 100 | module_init(xt_statistic_init); |
diff --git a/net/netfilter/xt_string.c b/net/netfilter/xt_string.c index 275330fcdaaa..4453252400aa 100644 --- a/net/netfilter/xt_string.c +++ b/net/netfilter/xt_string.c | |||
@@ -46,7 +46,6 @@ static int checkentry(const char *tablename, | |||
46 | const void *ip, | 46 | const void *ip, |
47 | const struct xt_match *match, | 47 | const struct xt_match *match, |
48 | void *matchinfo, | 48 | void *matchinfo, |
49 | unsigned int matchsize, | ||
50 | unsigned int hook_mask) | 49 | unsigned int hook_mask) |
51 | { | 50 | { |
52 | struct xt_string_info *conf = matchinfo; | 51 | struct xt_string_info *conf = matchinfo; |
@@ -69,49 +68,40 @@ static int checkentry(const char *tablename, | |||
69 | return 1; | 68 | return 1; |
70 | } | 69 | } |
71 | 70 | ||
72 | static void destroy(const struct xt_match *match, void *matchinfo, | 71 | static void destroy(const struct xt_match *match, void *matchinfo) |
73 | unsigned int matchsize) | ||
74 | { | 72 | { |
75 | textsearch_destroy(STRING_TEXT_PRIV(matchinfo)->config); | 73 | textsearch_destroy(STRING_TEXT_PRIV(matchinfo)->config); |
76 | } | 74 | } |
77 | 75 | ||
78 | static struct xt_match string_match = { | 76 | static struct xt_match xt_string_match[] = { |
79 | .name = "string", | 77 | { |
80 | .match = match, | 78 | .name = "string", |
81 | .matchsize = sizeof(struct xt_string_info), | 79 | .family = AF_INET, |
82 | .checkentry = checkentry, | 80 | .checkentry = checkentry, |
83 | .destroy = destroy, | 81 | .match = match, |
84 | .family = AF_INET, | 82 | .destroy = destroy, |
85 | .me = THIS_MODULE | 83 | .matchsize = sizeof(struct xt_string_info), |
86 | }; | 84 | .me = THIS_MODULE |
87 | static struct xt_match string6_match = { | 85 | }, |
88 | .name = "string", | 86 | { |
89 | .match = match, | 87 | .name = "string", |
90 | .matchsize = sizeof(struct xt_string_info), | 88 | .family = AF_INET6, |
91 | .checkentry = checkentry, | 89 | .checkentry = checkentry, |
92 | .destroy = destroy, | 90 | .match = match, |
93 | .family = AF_INET6, | 91 | .destroy = destroy, |
94 | .me = THIS_MODULE | 92 | .matchsize = sizeof(struct xt_string_info), |
93 | .me = THIS_MODULE | ||
94 | }, | ||
95 | }; | 95 | }; |
96 | 96 | ||
97 | static int __init xt_string_init(void) | 97 | static int __init xt_string_init(void) |
98 | { | 98 | { |
99 | int ret; | 99 | return xt_register_matches(xt_string_match, ARRAY_SIZE(xt_string_match)); |
100 | |||
101 | ret = xt_register_match(&string_match); | ||
102 | if (ret) | ||
103 | return ret; | ||
104 | ret = xt_register_match(&string6_match); | ||
105 | if (ret) | ||
106 | xt_unregister_match(&string_match); | ||
107 | |||
108 | return ret; | ||
109 | } | 100 | } |
110 | 101 | ||
111 | static void __exit xt_string_fini(void) | 102 | static void __exit xt_string_fini(void) |
112 | { | 103 | { |
113 | xt_unregister_match(&string_match); | 104 | xt_unregister_matches(xt_string_match, ARRAY_SIZE(xt_string_match)); |
114 | xt_unregister_match(&string6_match); | ||
115 | } | 105 | } |
116 | 106 | ||
117 | module_init(xt_string_init); | 107 | module_init(xt_string_init); |
diff --git a/net/netfilter/xt_tcpmss.c b/net/netfilter/xt_tcpmss.c index cf7d335cadcd..a3682fe2f192 100644 --- a/net/netfilter/xt_tcpmss.c +++ b/net/netfilter/xt_tcpmss.c | |||
@@ -18,21 +18,22 @@ | |||
18 | #include <linux/netfilter_ipv4/ip_tables.h> | 18 | #include <linux/netfilter_ipv4/ip_tables.h> |
19 | #include <linux/netfilter_ipv6/ip6_tables.h> | 19 | #include <linux/netfilter_ipv6/ip6_tables.h> |
20 | 20 | ||
21 | #define TH_SYN 0x02 | ||
22 | |||
23 | MODULE_LICENSE("GPL"); | 21 | MODULE_LICENSE("GPL"); |
24 | MODULE_AUTHOR("Marc Boucher <marc@mbsi.ca>"); | 22 | MODULE_AUTHOR("Marc Boucher <marc@mbsi.ca>"); |
25 | MODULE_DESCRIPTION("iptables TCP MSS match module"); | 23 | MODULE_DESCRIPTION("iptables TCP MSS match module"); |
26 | MODULE_ALIAS("ipt_tcpmss"); | 24 | MODULE_ALIAS("ipt_tcpmss"); |
27 | 25 | ||
28 | /* Returns 1 if the mss option is set and matched by the range, 0 otherwise */ | 26 | static int |
29 | static inline int | 27 | match(const struct sk_buff *skb, |
30 | mssoption_match(u_int16_t min, u_int16_t max, | 28 | const struct net_device *in, |
31 | const struct sk_buff *skb, | 29 | const struct net_device *out, |
32 | unsigned int protoff, | 30 | const struct xt_match *match, |
33 | int invert, | 31 | const void *matchinfo, |
34 | int *hotdrop) | 32 | int offset, |
33 | unsigned int protoff, | ||
34 | int *hotdrop) | ||
35 | { | 35 | { |
36 | const struct xt_tcpmss_match_info *info = matchinfo; | ||
36 | struct tcphdr _tcph, *th; | 37 | struct tcphdr _tcph, *th; |
37 | /* tcp.doff is only 4 bits, ie. max 15 * 4 bytes */ | 38 | /* tcp.doff is only 4 bits, ie. max 15 * 4 bytes */ |
38 | u8 _opt[15 * 4 - sizeof(_tcph)], *op; | 39 | u8 _opt[15 * 4 - sizeof(_tcph)], *op; |
@@ -64,72 +65,50 @@ mssoption_match(u_int16_t min, u_int16_t max, | |||
64 | 65 | ||
65 | mssval = (op[i+2] << 8) | op[i+3]; | 66 | mssval = (op[i+2] << 8) | op[i+3]; |
66 | 67 | ||
67 | return (mssval >= min && mssval <= max) ^ invert; | 68 | return (mssval >= info->mss_min && |
69 | mssval <= info->mss_max) ^ info->invert; | ||
68 | } | 70 | } |
69 | if (op[i] < 2) i++; | 71 | if (op[i] < 2) |
70 | else i += op[i+1]?:1; | 72 | i++; |
73 | else | ||
74 | i += op[i+1] ? : 1; | ||
71 | } | 75 | } |
72 | out: | 76 | out: |
73 | return invert; | 77 | return info->invert; |
74 | 78 | ||
75 | dropit: | 79 | dropit: |
76 | *hotdrop = 1; | 80 | *hotdrop = 1; |
77 | return 0; | 81 | return 0; |
78 | } | 82 | } |
79 | 83 | ||
80 | static int | 84 | static struct xt_match xt_tcpmss_match[] = { |
81 | match(const struct sk_buff *skb, | 85 | { |
82 | const struct net_device *in, | 86 | .name = "tcpmss", |
83 | const struct net_device *out, | 87 | .family = AF_INET, |
84 | const struct xt_match *match, | 88 | .match = match, |
85 | const void *matchinfo, | 89 | .matchsize = sizeof(struct xt_tcpmss_match_info), |
86 | int offset, | 90 | .proto = IPPROTO_TCP, |
87 | unsigned int protoff, | 91 | .me = THIS_MODULE, |
88 | int *hotdrop) | 92 | }, |
89 | { | 93 | { |
90 | const struct xt_tcpmss_match_info *info = matchinfo; | 94 | .name = "tcpmss", |
91 | 95 | .family = AF_INET6, | |
92 | return mssoption_match(info->mss_min, info->mss_max, skb, protoff, | 96 | .match = match, |
93 | info->invert, hotdrop); | 97 | .matchsize = sizeof(struct xt_tcpmss_match_info), |
94 | } | 98 | .proto = IPPROTO_TCP, |
95 | 99 | .me = THIS_MODULE, | |
96 | static struct xt_match tcpmss_match = { | 100 | }, |
97 | .name = "tcpmss", | ||
98 | .match = match, | ||
99 | .matchsize = sizeof(struct xt_tcpmss_match_info), | ||
100 | .proto = IPPROTO_TCP, | ||
101 | .family = AF_INET, | ||
102 | .me = THIS_MODULE, | ||
103 | }; | ||
104 | |||
105 | static struct xt_match tcpmss6_match = { | ||
106 | .name = "tcpmss", | ||
107 | .match = match, | ||
108 | .matchsize = sizeof(struct xt_tcpmss_match_info), | ||
109 | .proto = IPPROTO_TCP, | ||
110 | .family = AF_INET6, | ||
111 | .me = THIS_MODULE, | ||
112 | }; | 101 | }; |
113 | 102 | ||
114 | |||
115 | static int __init xt_tcpmss_init(void) | 103 | static int __init xt_tcpmss_init(void) |
116 | { | 104 | { |
117 | int ret; | 105 | return xt_register_matches(xt_tcpmss_match, |
118 | ret = xt_register_match(&tcpmss_match); | 106 | ARRAY_SIZE(xt_tcpmss_match)); |
119 | if (ret) | ||
120 | return ret; | ||
121 | |||
122 | ret = xt_register_match(&tcpmss6_match); | ||
123 | if (ret) | ||
124 | xt_unregister_match(&tcpmss_match); | ||
125 | |||
126 | return ret; | ||
127 | } | 107 | } |
128 | 108 | ||
129 | static void __exit xt_tcpmss_fini(void) | 109 | static void __exit xt_tcpmss_fini(void) |
130 | { | 110 | { |
131 | xt_unregister_match(&tcpmss6_match); | 111 | xt_unregister_matches(xt_tcpmss_match, ARRAY_SIZE(xt_tcpmss_match)); |
132 | xt_unregister_match(&tcpmss_match); | ||
133 | } | 112 | } |
134 | 113 | ||
135 | module_init(xt_tcpmss_init); | 114 | module_init(xt_tcpmss_init); |
diff --git a/net/netfilter/xt_tcpudp.c b/net/netfilter/xt_tcpudp.c index a9a63aa68936..e76a68e0bc66 100644 --- a/net/netfilter/xt_tcpudp.c +++ b/net/netfilter/xt_tcpudp.c | |||
@@ -141,7 +141,6 @@ tcp_checkentry(const char *tablename, | |||
141 | const void *info, | 141 | const void *info, |
142 | const struct xt_match *match, | 142 | const struct xt_match *match, |
143 | void *matchinfo, | 143 | void *matchinfo, |
144 | unsigned int matchsize, | ||
145 | unsigned int hook_mask) | 144 | unsigned int hook_mask) |
146 | { | 145 | { |
147 | const struct xt_tcp *tcpinfo = matchinfo; | 146 | const struct xt_tcp *tcpinfo = matchinfo; |
@@ -190,7 +189,6 @@ udp_checkentry(const char *tablename, | |||
190 | const void *info, | 189 | const void *info, |
191 | const struct xt_match *match, | 190 | const struct xt_match *match, |
192 | void *matchinfo, | 191 | void *matchinfo, |
193 | unsigned int matchsize, | ||
194 | unsigned int hook_mask) | 192 | unsigned int hook_mask) |
195 | { | 193 | { |
196 | const struct xt_tcp *udpinfo = matchinfo; | 194 | const struct xt_tcp *udpinfo = matchinfo; |
@@ -199,81 +197,54 @@ udp_checkentry(const char *tablename, | |||
199 | return !(udpinfo->invflags & ~XT_UDP_INV_MASK); | 197 | return !(udpinfo->invflags & ~XT_UDP_INV_MASK); |
200 | } | 198 | } |
201 | 199 | ||
202 | static struct xt_match tcp_matchstruct = { | 200 | static struct xt_match xt_tcpudp_match[] = { |
203 | .name = "tcp", | 201 | { |
204 | .match = tcp_match, | 202 | .name = "tcp", |
205 | .matchsize = sizeof(struct xt_tcp), | 203 | .family = AF_INET, |
206 | .proto = IPPROTO_TCP, | 204 | .checkentry = tcp_checkentry, |
207 | .family = AF_INET, | 205 | .match = tcp_match, |
208 | .checkentry = tcp_checkentry, | 206 | .matchsize = sizeof(struct xt_tcp), |
209 | .me = THIS_MODULE, | 207 | .proto = IPPROTO_TCP, |
210 | }; | 208 | .me = THIS_MODULE, |
211 | 209 | }, | |
212 | static struct xt_match tcp6_matchstruct = { | 210 | { |
213 | .name = "tcp", | 211 | .name = "tcp", |
214 | .match = tcp_match, | 212 | .family = AF_INET6, |
215 | .matchsize = sizeof(struct xt_tcp), | 213 | .checkentry = tcp_checkentry, |
216 | .proto = IPPROTO_TCP, | 214 | .match = tcp_match, |
217 | .family = AF_INET6, | 215 | .matchsize = sizeof(struct xt_tcp), |
218 | .checkentry = tcp_checkentry, | 216 | .proto = IPPROTO_TCP, |
219 | .me = THIS_MODULE, | 217 | .me = THIS_MODULE, |
220 | }; | 218 | }, |
221 | 219 | { | |
222 | static struct xt_match udp_matchstruct = { | 220 | .name = "udp", |
223 | .name = "udp", | 221 | .family = AF_INET, |
224 | .match = udp_match, | 222 | .checkentry = udp_checkentry, |
225 | .matchsize = sizeof(struct xt_udp), | 223 | .match = udp_match, |
226 | .proto = IPPROTO_UDP, | 224 | .matchsize = sizeof(struct xt_udp), |
227 | .family = AF_INET, | 225 | .proto = IPPROTO_UDP, |
228 | .checkentry = udp_checkentry, | 226 | .me = THIS_MODULE, |
229 | .me = THIS_MODULE, | 227 | }, |
230 | }; | 228 | { |
231 | static struct xt_match udp6_matchstruct = { | 229 | .name = "udp", |
232 | .name = "udp", | 230 | .family = AF_INET6, |
233 | .match = udp_match, | 231 | .checkentry = udp_checkentry, |
234 | .matchsize = sizeof(struct xt_udp), | 232 | .match = udp_match, |
235 | .proto = IPPROTO_UDP, | 233 | .matchsize = sizeof(struct xt_udp), |
236 | .family = AF_INET6, | 234 | .proto = IPPROTO_UDP, |
237 | .checkentry = udp_checkentry, | 235 | .me = THIS_MODULE, |
238 | .me = THIS_MODULE, | 236 | }, |
239 | }; | 237 | }; |
240 | 238 | ||
241 | static int __init xt_tcpudp_init(void) | 239 | static int __init xt_tcpudp_init(void) |
242 | { | 240 | { |
243 | int ret; | 241 | return xt_register_matches(xt_tcpudp_match, |
244 | ret = xt_register_match(&tcp_matchstruct); | 242 | ARRAY_SIZE(xt_tcpudp_match)); |
245 | if (ret) | ||
246 | return ret; | ||
247 | |||
248 | ret = xt_register_match(&tcp6_matchstruct); | ||
249 | if (ret) | ||
250 | goto out_unreg_tcp; | ||
251 | |||
252 | ret = xt_register_match(&udp_matchstruct); | ||
253 | if (ret) | ||
254 | goto out_unreg_tcp6; | ||
255 | |||
256 | ret = xt_register_match(&udp6_matchstruct); | ||
257 | if (ret) | ||
258 | goto out_unreg_udp; | ||
259 | |||
260 | return ret; | ||
261 | |||
262 | out_unreg_udp: | ||
263 | xt_unregister_match(&udp_matchstruct); | ||
264 | out_unreg_tcp6: | ||
265 | xt_unregister_match(&tcp6_matchstruct); | ||
266 | out_unreg_tcp: | ||
267 | xt_unregister_match(&tcp_matchstruct); | ||
268 | return ret; | ||
269 | } | 243 | } |
270 | 244 | ||
271 | static void __exit xt_tcpudp_fini(void) | 245 | static void __exit xt_tcpudp_fini(void) |
272 | { | 246 | { |
273 | xt_unregister_match(&udp6_matchstruct); | 247 | xt_unregister_matches(xt_tcpudp_match, ARRAY_SIZE(xt_tcpudp_match)); |
274 | xt_unregister_match(&udp_matchstruct); | ||
275 | xt_unregister_match(&tcp6_matchstruct); | ||
276 | xt_unregister_match(&tcp_matchstruct); | ||
277 | } | 248 | } |
278 | 249 | ||
279 | module_init(xt_tcpudp_init); | 250 | module_init(xt_tcpudp_init); |
diff --git a/net/netlabel/Kconfig b/net/netlabel/Kconfig new file mode 100644 index 000000000000..fe23cb7f1e87 --- /dev/null +++ b/net/netlabel/Kconfig | |||
@@ -0,0 +1,14 @@ | |||
1 | # | ||
2 | # NetLabel configuration | ||
3 | # | ||
4 | |||
5 | config NETLABEL | ||
6 | bool "NetLabel subsystem support" | ||
7 | depends on NET && SECURITY | ||
8 | default n | ||
9 | ---help--- | ||
10 | NetLabel provides support for explicit network packet labeling | ||
11 | protocols such as CIPSO and RIPSO. For more information see | ||
12 | Documentation/netlabel. | ||
13 | |||
14 | If you are unsure, say N. | ||
diff --git a/net/netlabel/Makefile b/net/netlabel/Makefile new file mode 100644 index 000000000000..8af18c0a47d9 --- /dev/null +++ b/net/netlabel/Makefile | |||
@@ -0,0 +1,16 @@ | |||
1 | # | ||
2 | # Makefile for the NetLabel subsystem. | ||
3 | # | ||
4 | # Feb 9, 2006, Paul Moore <paul.moore@hp.com> | ||
5 | # | ||
6 | |||
7 | # base objects | ||
8 | obj-y := netlabel_user.o netlabel_kapi.o netlabel_domainhash.o | ||
9 | |||
10 | # management objects | ||
11 | obj-y += netlabel_mgmt.o | ||
12 | |||
13 | # protocol modules | ||
14 | obj-y += netlabel_unlabeled.o | ||
15 | obj-y += netlabel_cipso_v4.o | ||
16 | |||
diff --git a/net/netlabel/netlabel_cipso_v4.c b/net/netlabel/netlabel_cipso_v4.c new file mode 100644 index 000000000000..a4f40adc447b --- /dev/null +++ b/net/netlabel/netlabel_cipso_v4.c | |||
@@ -0,0 +1,542 @@ | |||
1 | /* | ||
2 | * NetLabel CIPSO/IPv4 Support | ||
3 | * | ||
4 | * This file defines the CIPSO/IPv4 functions for the NetLabel system. The | ||
5 | * NetLabel system manages static and dynamic label mappings for network | ||
6 | * protocols such as CIPSO and RIPSO. | ||
7 | * | ||
8 | * Author: Paul Moore <paul.moore@hp.com> | ||
9 | * | ||
10 | */ | ||
11 | |||
12 | /* | ||
13 | * (c) Copyright Hewlett-Packard Development Company, L.P., 2006 | ||
14 | * | ||
15 | * This program is free software; you can redistribute it and/or modify | ||
16 | * it under the terms of the GNU General Public License as published by | ||
17 | * the Free Software Foundation; either version 2 of the License, or | ||
18 | * (at your option) any later version. | ||
19 | * | ||
20 | * This program is distributed in the hope that it will be useful, | ||
21 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
22 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See | ||
23 | * the GNU General Public License for more details. | ||
24 | * | ||
25 | * You should have received a copy of the GNU General Public License | ||
26 | * along with this program; if not, write to the Free Software | ||
27 | * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA | ||
28 | * | ||
29 | */ | ||
30 | |||
31 | #include <linux/types.h> | ||
32 | #include <linux/socket.h> | ||
33 | #include <linux/string.h> | ||
34 | #include <linux/skbuff.h> | ||
35 | #include <net/sock.h> | ||
36 | #include <net/netlink.h> | ||
37 | #include <net/genetlink.h> | ||
38 | #include <net/netlabel.h> | ||
39 | #include <net/cipso_ipv4.h> | ||
40 | |||
41 | #include "netlabel_user.h" | ||
42 | #include "netlabel_cipso_v4.h" | ||
43 | |||
44 | /* NetLabel Generic NETLINK CIPSOv4 family */ | ||
45 | static struct genl_family netlbl_cipsov4_gnl_family = { | ||
46 | .id = GENL_ID_GENERATE, | ||
47 | .hdrsize = 0, | ||
48 | .name = NETLBL_NLTYPE_CIPSOV4_NAME, | ||
49 | .version = NETLBL_PROTO_VERSION, | ||
50 | .maxattr = 0, | ||
51 | }; | ||
52 | |||
53 | |||
54 | /* | ||
55 | * Helper Functions | ||
56 | */ | ||
57 | |||
58 | /** | ||
59 | * netlbl_cipsov4_doi_free - Frees a CIPSO V4 DOI definition | ||
60 | * @entry: the entry's RCU field | ||
61 | * | ||
62 | * Description: | ||
63 | * This function is designed to be used as a callback to the call_rcu() | ||
64 | * function so that the memory allocated to the DOI definition can be released | ||
65 | * safely. | ||
66 | * | ||
67 | */ | ||
68 | static void netlbl_cipsov4_doi_free(struct rcu_head *entry) | ||
69 | { | ||
70 | struct cipso_v4_doi *ptr; | ||
71 | |||
72 | ptr = container_of(entry, struct cipso_v4_doi, rcu); | ||
73 | switch (ptr->type) { | ||
74 | case CIPSO_V4_MAP_STD: | ||
75 | kfree(ptr->map.std->lvl.cipso); | ||
76 | kfree(ptr->map.std->lvl.local); | ||
77 | kfree(ptr->map.std->cat.cipso); | ||
78 | kfree(ptr->map.std->cat.local); | ||
79 | break; | ||
80 | } | ||
81 | kfree(ptr); | ||
82 | } | ||
83 | |||
84 | |||
85 | /* | ||
86 | * NetLabel Command Handlers | ||
87 | */ | ||
88 | |||
89 | /** | ||
90 | * netlbl_cipsov4_add_std - Adds a CIPSO V4 DOI definition | ||
91 | * @doi: the DOI value | ||
92 | * @msg: the ADD message data | ||
93 | * @msg_size: the size of the ADD message buffer | ||
94 | * | ||
95 | * Description: | ||
96 | * Create a new CIPSO_V4_MAP_STD DOI definition based on the given ADD message | ||
97 | * and add it to the CIPSO V4 engine. Return zero on success and non-zero on | ||
98 | * error. | ||
99 | * | ||
100 | */ | ||
101 | static int netlbl_cipsov4_add_std(u32 doi, struct nlattr *msg, size_t msg_size) | ||
102 | { | ||
103 | int ret_val = -EINVAL; | ||
104 | int msg_len = msg_size; | ||
105 | u32 num_tags; | ||
106 | u32 num_lvls; | ||
107 | u32 num_cats; | ||
108 | struct cipso_v4_doi *doi_def = NULL; | ||
109 | u32 iter; | ||
110 | u32 tmp_val_a; | ||
111 | u32 tmp_val_b; | ||
112 | |||
113 | if (msg_len < NETLBL_LEN_U32) | ||
114 | goto add_std_failure; | ||
115 | num_tags = netlbl_getinc_u32(&msg, &msg_len); | ||
116 | if (num_tags == 0 || num_tags > CIPSO_V4_TAG_MAXCNT) | ||
117 | goto add_std_failure; | ||
118 | |||
119 | doi_def = kmalloc(sizeof(*doi_def), GFP_KERNEL); | ||
120 | if (doi_def == NULL) { | ||
121 | ret_val = -ENOMEM; | ||
122 | goto add_std_failure; | ||
123 | } | ||
124 | doi_def->map.std = kzalloc(sizeof(*doi_def->map.std), GFP_KERNEL); | ||
125 | if (doi_def->map.std == NULL) { | ||
126 | ret_val = -ENOMEM; | ||
127 | goto add_std_failure; | ||
128 | } | ||
129 | doi_def->type = CIPSO_V4_MAP_STD; | ||
130 | |||
131 | for (iter = 0; iter < num_tags; iter++) { | ||
132 | if (msg_len < NETLBL_LEN_U8) | ||
133 | goto add_std_failure; | ||
134 | doi_def->tags[iter] = netlbl_getinc_u8(&msg, &msg_len); | ||
135 | switch (doi_def->tags[iter]) { | ||
136 | case CIPSO_V4_TAG_RBITMAP: | ||
137 | break; | ||
138 | default: | ||
139 | goto add_std_failure; | ||
140 | } | ||
141 | } | ||
142 | if (iter < CIPSO_V4_TAG_MAXCNT) | ||
143 | doi_def->tags[iter] = CIPSO_V4_TAG_INVALID; | ||
144 | |||
145 | if (msg_len < 6 * NETLBL_LEN_U32) | ||
146 | goto add_std_failure; | ||
147 | |||
148 | num_lvls = netlbl_getinc_u32(&msg, &msg_len); | ||
149 | if (num_lvls == 0) | ||
150 | goto add_std_failure; | ||
151 | doi_def->map.std->lvl.local_size = netlbl_getinc_u32(&msg, &msg_len); | ||
152 | if (doi_def->map.std->lvl.local_size > CIPSO_V4_MAX_LOC_LVLS) | ||
153 | goto add_std_failure; | ||
154 | doi_def->map.std->lvl.local = kcalloc(doi_def->map.std->lvl.local_size, | ||
155 | sizeof(u32), | ||
156 | GFP_KERNEL); | ||
157 | if (doi_def->map.std->lvl.local == NULL) { | ||
158 | ret_val = -ENOMEM; | ||
159 | goto add_std_failure; | ||
160 | } | ||
161 | doi_def->map.std->lvl.cipso_size = netlbl_getinc_u8(&msg, &msg_len); | ||
162 | if (doi_def->map.std->lvl.cipso_size > CIPSO_V4_MAX_REM_LVLS) | ||
163 | goto add_std_failure; | ||
164 | doi_def->map.std->lvl.cipso = kcalloc(doi_def->map.std->lvl.cipso_size, | ||
165 | sizeof(u32), | ||
166 | GFP_KERNEL); | ||
167 | if (doi_def->map.std->lvl.cipso == NULL) { | ||
168 | ret_val = -ENOMEM; | ||
169 | goto add_std_failure; | ||
170 | } | ||
171 | |||
172 | num_cats = netlbl_getinc_u32(&msg, &msg_len); | ||
173 | doi_def->map.std->cat.local_size = netlbl_getinc_u32(&msg, &msg_len); | ||
174 | if (doi_def->map.std->cat.local_size > CIPSO_V4_MAX_LOC_CATS) | ||
175 | goto add_std_failure; | ||
176 | doi_def->map.std->cat.local = kcalloc(doi_def->map.std->cat.local_size, | ||
177 | sizeof(u32), | ||
178 | GFP_KERNEL); | ||
179 | if (doi_def->map.std->cat.local == NULL) { | ||
180 | ret_val = -ENOMEM; | ||
181 | goto add_std_failure; | ||
182 | } | ||
183 | doi_def->map.std->cat.cipso_size = netlbl_getinc_u16(&msg, &msg_len); | ||
184 | if (doi_def->map.std->cat.cipso_size > CIPSO_V4_MAX_REM_CATS) | ||
185 | goto add_std_failure; | ||
186 | doi_def->map.std->cat.cipso = kcalloc(doi_def->map.std->cat.cipso_size, | ||
187 | sizeof(u32), | ||
188 | GFP_KERNEL); | ||
189 | if (doi_def->map.std->cat.cipso == NULL) { | ||
190 | ret_val = -ENOMEM; | ||
191 | goto add_std_failure; | ||
192 | } | ||
193 | |||
194 | if (msg_len < | ||
195 | num_lvls * (NETLBL_LEN_U32 + NETLBL_LEN_U8) + | ||
196 | num_cats * (NETLBL_LEN_U32 + NETLBL_LEN_U16)) | ||
197 | goto add_std_failure; | ||
198 | |||
199 | for (iter = 0; iter < doi_def->map.std->lvl.cipso_size; iter++) | ||
200 | doi_def->map.std->lvl.cipso[iter] = CIPSO_V4_INV_LVL; | ||
201 | for (iter = 0; iter < doi_def->map.std->lvl.local_size; iter++) | ||
202 | doi_def->map.std->lvl.local[iter] = CIPSO_V4_INV_LVL; | ||
203 | for (iter = 0; iter < doi_def->map.std->cat.cipso_size; iter++) | ||
204 | doi_def->map.std->cat.cipso[iter] = CIPSO_V4_INV_CAT; | ||
205 | for (iter = 0; iter < doi_def->map.std->cat.local_size; iter++) | ||
206 | doi_def->map.std->cat.local[iter] = CIPSO_V4_INV_CAT; | ||
207 | |||
208 | for (iter = 0; iter < num_lvls; iter++) { | ||
209 | tmp_val_a = netlbl_getinc_u32(&msg, &msg_len); | ||
210 | tmp_val_b = netlbl_getinc_u8(&msg, &msg_len); | ||
211 | |||
212 | if (tmp_val_a >= doi_def->map.std->lvl.local_size || | ||
213 | tmp_val_b >= doi_def->map.std->lvl.cipso_size) | ||
214 | goto add_std_failure; | ||
215 | |||
216 | doi_def->map.std->lvl.cipso[tmp_val_b] = tmp_val_a; | ||
217 | doi_def->map.std->lvl.local[tmp_val_a] = tmp_val_b; | ||
218 | } | ||
219 | |||
220 | for (iter = 0; iter < num_cats; iter++) { | ||
221 | tmp_val_a = netlbl_getinc_u32(&msg, &msg_len); | ||
222 | tmp_val_b = netlbl_getinc_u16(&msg, &msg_len); | ||
223 | |||
224 | if (tmp_val_a >= doi_def->map.std->cat.local_size || | ||
225 | tmp_val_b >= doi_def->map.std->cat.cipso_size) | ||
226 | goto add_std_failure; | ||
227 | |||
228 | doi_def->map.std->cat.cipso[tmp_val_b] = tmp_val_a; | ||
229 | doi_def->map.std->cat.local[tmp_val_a] = tmp_val_b; | ||
230 | } | ||
231 | |||
232 | doi_def->doi = doi; | ||
233 | ret_val = cipso_v4_doi_add(doi_def); | ||
234 | if (ret_val != 0) | ||
235 | goto add_std_failure; | ||
236 | return 0; | ||
237 | |||
238 | add_std_failure: | ||
239 | if (doi_def) | ||
240 | netlbl_cipsov4_doi_free(&doi_def->rcu); | ||
241 | return ret_val; | ||
242 | } | ||
243 | |||
244 | /** | ||
245 | * netlbl_cipsov4_add_pass - Adds a CIPSO V4 DOI definition | ||
246 | * @doi: the DOI value | ||
247 | * @msg: the ADD message data | ||
248 | * @msg_size: the size of the ADD message buffer | ||
249 | * | ||
250 | * Description: | ||
251 | * Create a new CIPSO_V4_MAP_PASS DOI definition based on the given ADD message | ||
252 | * and add it to the CIPSO V4 engine. Return zero on success and non-zero on | ||
253 | * error. | ||
254 | * | ||
255 | */ | ||
256 | static int netlbl_cipsov4_add_pass(u32 doi, | ||
257 | struct nlattr *msg, | ||
258 | size_t msg_size) | ||
259 | { | ||
260 | int ret_val = -EINVAL; | ||
261 | int msg_len = msg_size; | ||
262 | u32 num_tags; | ||
263 | struct cipso_v4_doi *doi_def = NULL; | ||
264 | u32 iter; | ||
265 | |||
266 | if (msg_len < NETLBL_LEN_U32) | ||
267 | goto add_pass_failure; | ||
268 | num_tags = netlbl_getinc_u32(&msg, &msg_len); | ||
269 | if (num_tags == 0 || num_tags > CIPSO_V4_TAG_MAXCNT) | ||
270 | goto add_pass_failure; | ||
271 | |||
272 | doi_def = kmalloc(sizeof(*doi_def), GFP_KERNEL); | ||
273 | if (doi_def == NULL) { | ||
274 | ret_val = -ENOMEM; | ||
275 | goto add_pass_failure; | ||
276 | } | ||
277 | doi_def->type = CIPSO_V4_MAP_PASS; | ||
278 | |||
279 | for (iter = 0; iter < num_tags; iter++) { | ||
280 | if (msg_len < NETLBL_LEN_U8) | ||
281 | goto add_pass_failure; | ||
282 | doi_def->tags[iter] = netlbl_getinc_u8(&msg, &msg_len); | ||
283 | switch (doi_def->tags[iter]) { | ||
284 | case CIPSO_V4_TAG_RBITMAP: | ||
285 | break; | ||
286 | default: | ||
287 | goto add_pass_failure; | ||
288 | } | ||
289 | } | ||
290 | if (iter < CIPSO_V4_TAG_MAXCNT) | ||
291 | doi_def->tags[iter] = CIPSO_V4_TAG_INVALID; | ||
292 | |||
293 | doi_def->doi = doi; | ||
294 | ret_val = cipso_v4_doi_add(doi_def); | ||
295 | if (ret_val != 0) | ||
296 | goto add_pass_failure; | ||
297 | return 0; | ||
298 | |||
299 | add_pass_failure: | ||
300 | if (doi_def) | ||
301 | netlbl_cipsov4_doi_free(&doi_def->rcu); | ||
302 | return ret_val; | ||
303 | } | ||
304 | |||
305 | /** | ||
306 | * netlbl_cipsov4_add - Handle an ADD message | ||
307 | * @skb: the NETLINK buffer | ||
308 | * @info: the Generic NETLINK info block | ||
309 | * | ||
310 | * Description: | ||
311 | * Create a new DOI definition based on the given ADD message and add it to the | ||
312 | * CIPSO V4 engine. Returns zero on success, negative values on failure. | ||
313 | * | ||
314 | */ | ||
315 | static int netlbl_cipsov4_add(struct sk_buff *skb, struct genl_info *info) | ||
316 | |||
317 | { | ||
318 | int ret_val = -EINVAL; | ||
319 | u32 doi; | ||
320 | u32 map_type; | ||
321 | int msg_len = netlbl_netlink_payload_len(skb); | ||
322 | struct nlattr *msg = netlbl_netlink_payload_data(skb); | ||
323 | |||
324 | ret_val = netlbl_netlink_cap_check(skb, CAP_NET_ADMIN); | ||
325 | if (ret_val != 0) | ||
326 | goto add_return; | ||
327 | |||
328 | if (msg_len < 2 * NETLBL_LEN_U32) | ||
329 | goto add_return; | ||
330 | |||
331 | doi = netlbl_getinc_u32(&msg, &msg_len); | ||
332 | map_type = netlbl_getinc_u32(&msg, &msg_len); | ||
333 | switch (map_type) { | ||
334 | case CIPSO_V4_MAP_STD: | ||
335 | ret_val = netlbl_cipsov4_add_std(doi, msg, msg_len); | ||
336 | break; | ||
337 | case CIPSO_V4_MAP_PASS: | ||
338 | ret_val = netlbl_cipsov4_add_pass(doi, msg, msg_len); | ||
339 | break; | ||
340 | } | ||
341 | |||
342 | add_return: | ||
343 | netlbl_netlink_send_ack(info, | ||
344 | netlbl_cipsov4_gnl_family.id, | ||
345 | NLBL_CIPSOV4_C_ACK, | ||
346 | -ret_val); | ||
347 | return ret_val; | ||
348 | } | ||
349 | |||
350 | /** | ||
351 | * netlbl_cipsov4_list - Handle a LIST message | ||
352 | * @skb: the NETLINK buffer | ||
353 | * @info: the Generic NETLINK info block | ||
354 | * | ||
355 | * Description: | ||
356 | * Process a user generated LIST message and respond accordingly. Returns | ||
357 | * zero on success and negative values on error. | ||
358 | * | ||
359 | */ | ||
360 | static int netlbl_cipsov4_list(struct sk_buff *skb, struct genl_info *info) | ||
361 | { | ||
362 | int ret_val = -EINVAL; | ||
363 | u32 doi; | ||
364 | struct nlattr *msg = netlbl_netlink_payload_data(skb); | ||
365 | struct sk_buff *ans_skb; | ||
366 | |||
367 | if (netlbl_netlink_payload_len(skb) != NETLBL_LEN_U32) | ||
368 | goto list_failure; | ||
369 | |||
370 | doi = nla_get_u32(msg); | ||
371 | ans_skb = cipso_v4_doi_dump(doi, NLMSG_SPACE(GENL_HDRLEN)); | ||
372 | if (ans_skb == NULL) { | ||
373 | ret_val = -ENOMEM; | ||
374 | goto list_failure; | ||
375 | } | ||
376 | netlbl_netlink_hdr_push(ans_skb, | ||
377 | info->snd_pid, | ||
378 | 0, | ||
379 | netlbl_cipsov4_gnl_family.id, | ||
380 | NLBL_CIPSOV4_C_LIST); | ||
381 | |||
382 | ret_val = netlbl_netlink_snd(ans_skb, info->snd_pid); | ||
383 | if (ret_val != 0) | ||
384 | goto list_failure; | ||
385 | |||
386 | return 0; | ||
387 | |||
388 | list_failure: | ||
389 | netlbl_netlink_send_ack(info, | ||
390 | netlbl_cipsov4_gnl_family.id, | ||
391 | NLBL_CIPSOV4_C_ACK, | ||
392 | -ret_val); | ||
393 | return ret_val; | ||
394 | } | ||
395 | |||
396 | /** | ||
397 | * netlbl_cipsov4_listall - Handle a LISTALL message | ||
398 | * @skb: the NETLINK buffer | ||
399 | * @info: the Generic NETLINK info block | ||
400 | * | ||
401 | * Description: | ||
402 | * Process a user generated LISTALL message and respond accordingly. Returns | ||
403 | * zero on success and negative values on error. | ||
404 | * | ||
405 | */ | ||
406 | static int netlbl_cipsov4_listall(struct sk_buff *skb, struct genl_info *info) | ||
407 | { | ||
408 | int ret_val = -EINVAL; | ||
409 | struct sk_buff *ans_skb; | ||
410 | |||
411 | ans_skb = cipso_v4_doi_dump_all(NLMSG_SPACE(GENL_HDRLEN)); | ||
412 | if (ans_skb == NULL) { | ||
413 | ret_val = -ENOMEM; | ||
414 | goto listall_failure; | ||
415 | } | ||
416 | netlbl_netlink_hdr_push(ans_skb, | ||
417 | info->snd_pid, | ||
418 | 0, | ||
419 | netlbl_cipsov4_gnl_family.id, | ||
420 | NLBL_CIPSOV4_C_LISTALL); | ||
421 | |||
422 | ret_val = netlbl_netlink_snd(ans_skb, info->snd_pid); | ||
423 | if (ret_val != 0) | ||
424 | goto listall_failure; | ||
425 | |||
426 | return 0; | ||
427 | |||
428 | listall_failure: | ||
429 | netlbl_netlink_send_ack(info, | ||
430 | netlbl_cipsov4_gnl_family.id, | ||
431 | NLBL_CIPSOV4_C_ACK, | ||
432 | -ret_val); | ||
433 | return ret_val; | ||
434 | } | ||
435 | |||
436 | /** | ||
437 | * netlbl_cipsov4_remove - Handle a REMOVE message | ||
438 | * @skb: the NETLINK buffer | ||
439 | * @info: the Generic NETLINK info block | ||
440 | * | ||
441 | * Description: | ||
442 | * Process a user generated REMOVE message and respond accordingly. Returns | ||
443 | * zero on success, negative values on failure. | ||
444 | * | ||
445 | */ | ||
446 | static int netlbl_cipsov4_remove(struct sk_buff *skb, struct genl_info *info) | ||
447 | { | ||
448 | int ret_val; | ||
449 | u32 doi; | ||
450 | struct nlattr *msg = netlbl_netlink_payload_data(skb); | ||
451 | |||
452 | ret_val = netlbl_netlink_cap_check(skb, CAP_NET_ADMIN); | ||
453 | if (ret_val != 0) | ||
454 | goto remove_return; | ||
455 | |||
456 | if (netlbl_netlink_payload_len(skb) != NETLBL_LEN_U32) { | ||
457 | ret_val = -EINVAL; | ||
458 | goto remove_return; | ||
459 | } | ||
460 | |||
461 | doi = nla_get_u32(msg); | ||
462 | ret_val = cipso_v4_doi_remove(doi, netlbl_cipsov4_doi_free); | ||
463 | |||
464 | remove_return: | ||
465 | netlbl_netlink_send_ack(info, | ||
466 | netlbl_cipsov4_gnl_family.id, | ||
467 | NLBL_CIPSOV4_C_ACK, | ||
468 | -ret_val); | ||
469 | return ret_val; | ||
470 | } | ||
471 | |||
472 | /* | ||
473 | * NetLabel Generic NETLINK Command Definitions | ||
474 | */ | ||
475 | |||
476 | static struct genl_ops netlbl_cipsov4_genl_c_add = { | ||
477 | .cmd = NLBL_CIPSOV4_C_ADD, | ||
478 | .flags = 0, | ||
479 | .doit = netlbl_cipsov4_add, | ||
480 | .dumpit = NULL, | ||
481 | }; | ||
482 | |||
483 | static struct genl_ops netlbl_cipsov4_genl_c_remove = { | ||
484 | .cmd = NLBL_CIPSOV4_C_REMOVE, | ||
485 | .flags = 0, | ||
486 | .doit = netlbl_cipsov4_remove, | ||
487 | .dumpit = NULL, | ||
488 | }; | ||
489 | |||
490 | static struct genl_ops netlbl_cipsov4_genl_c_list = { | ||
491 | .cmd = NLBL_CIPSOV4_C_LIST, | ||
492 | .flags = 0, | ||
493 | .doit = netlbl_cipsov4_list, | ||
494 | .dumpit = NULL, | ||
495 | }; | ||
496 | |||
497 | static struct genl_ops netlbl_cipsov4_genl_c_listall = { | ||
498 | .cmd = NLBL_CIPSOV4_C_LISTALL, | ||
499 | .flags = 0, | ||
500 | .doit = netlbl_cipsov4_listall, | ||
501 | .dumpit = NULL, | ||
502 | }; | ||
503 | |||
504 | /* | ||
505 | * NetLabel Generic NETLINK Protocol Functions | ||
506 | */ | ||
507 | |||
508 | /** | ||
509 | * netlbl_cipsov4_genl_init - Register the CIPSOv4 NetLabel component | ||
510 | * | ||
511 | * Description: | ||
512 | * Register the CIPSOv4 packet NetLabel component with the Generic NETLINK | ||
513 | * mechanism. Returns zero on success, negative values on failure. | ||
514 | * | ||
515 | */ | ||
516 | int netlbl_cipsov4_genl_init(void) | ||
517 | { | ||
518 | int ret_val; | ||
519 | |||
520 | ret_val = genl_register_family(&netlbl_cipsov4_gnl_family); | ||
521 | if (ret_val != 0) | ||
522 | return ret_val; | ||
523 | |||
524 | ret_val = genl_register_ops(&netlbl_cipsov4_gnl_family, | ||
525 | &netlbl_cipsov4_genl_c_add); | ||
526 | if (ret_val != 0) | ||
527 | return ret_val; | ||
528 | ret_val = genl_register_ops(&netlbl_cipsov4_gnl_family, | ||
529 | &netlbl_cipsov4_genl_c_remove); | ||
530 | if (ret_val != 0) | ||
531 | return ret_val; | ||
532 | ret_val = genl_register_ops(&netlbl_cipsov4_gnl_family, | ||
533 | &netlbl_cipsov4_genl_c_list); | ||
534 | if (ret_val != 0) | ||
535 | return ret_val; | ||
536 | ret_val = genl_register_ops(&netlbl_cipsov4_gnl_family, | ||
537 | &netlbl_cipsov4_genl_c_listall); | ||
538 | if (ret_val != 0) | ||
539 | return ret_val; | ||
540 | |||
541 | return 0; | ||
542 | } | ||
diff --git a/net/netlabel/netlabel_cipso_v4.h b/net/netlabel/netlabel_cipso_v4.h new file mode 100644 index 000000000000..4c6ff4b93004 --- /dev/null +++ b/net/netlabel/netlabel_cipso_v4.h | |||
@@ -0,0 +1,217 @@ | |||
1 | /* | ||
2 | * NetLabel CIPSO/IPv4 Support | ||
3 | * | ||
4 | * This file defines the CIPSO/IPv4 functions for the NetLabel system. The | ||
5 | * NetLabel system manages static and dynamic label mappings for network | ||
6 | * protocols such as CIPSO and RIPSO. | ||
7 | * | ||
8 | * Author: Paul Moore <paul.moore@hp.com> | ||
9 | * | ||
10 | */ | ||
11 | |||
12 | /* | ||
13 | * (c) Copyright Hewlett-Packard Development Company, L.P., 2006 | ||
14 | * | ||
15 | * This program is free software; you can redistribute it and/or modify | ||
16 | * it under the terms of the GNU General Public License as published by | ||
17 | * the Free Software Foundation; either version 2 of the License, or | ||
18 | * (at your option) any later version. | ||
19 | * | ||
20 | * This program is distributed in the hope that it will be useful, | ||
21 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
22 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See | ||
23 | * the GNU General Public License for more details. | ||
24 | * | ||
25 | * You should have received a copy of the GNU General Public License | ||
26 | * along with this program; if not, write to the Free Software | ||
27 | * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA | ||
28 | * | ||
29 | */ | ||
30 | |||
31 | #ifndef _NETLABEL_CIPSO_V4 | ||
32 | #define _NETLABEL_CIPSO_V4 | ||
33 | |||
34 | #include <net/netlabel.h> | ||
35 | |||
36 | /* | ||
37 | * The following NetLabel payloads are supported by the CIPSO subsystem, all | ||
38 | * of which are preceeded by the nlmsghdr struct. | ||
39 | * | ||
40 | * o ACK: | ||
41 | * Sent by the kernel in response to an applications message, applications | ||
42 | * should never send this message. | ||
43 | * | ||
44 | * +----------------------+-----------------------+ | ||
45 | * | seq number (32 bits) | return code (32 bits) | | ||
46 | * +----------------------+-----------------------+ | ||
47 | * | ||
48 | * seq number: the sequence number of the original message, taken from the | ||
49 | * nlmsghdr structure | ||
50 | * return code: return value, based on errno values | ||
51 | * | ||
52 | * o ADD: | ||
53 | * Sent by an application to add a new DOI mapping table, after completion | ||
54 | * of the task the kernel should ACK this message. | ||
55 | * | ||
56 | * +---------------+--------------------+---------------------+ | ||
57 | * | DOI (32 bits) | map type (32 bits) | tag count (32 bits) | ... | ||
58 | * +---------------+--------------------+---------------------+ | ||
59 | * | ||
60 | * +-----------------+ | ||
61 | * | tag #X (8 bits) | ... repeated | ||
62 | * +-----------------+ | ||
63 | * | ||
64 | * +-------------- ---- --- -- - | ||
65 | * | mapping data | ||
66 | * +-------------- ---- --- -- - | ||
67 | * | ||
68 | * DOI: the DOI value | ||
69 | * map type: the mapping table type (defined in the cipso_ipv4.h header | ||
70 | * as CIPSO_V4_MAP_*) | ||
71 | * tag count: the number of tags, must be greater than zero | ||
72 | * tag: the CIPSO tag for the DOI, tags listed first are given | ||
73 | * higher priorirty when sending packets | ||
74 | * mapping data: specific to the map type (see below) | ||
75 | * | ||
76 | * CIPSO_V4_MAP_STD | ||
77 | * | ||
78 | * +------------------+-----------------------+----------------------+ | ||
79 | * | levels (32 bits) | max l level (32 bits) | max r level (8 bits) | ... | ||
80 | * +------------------+-----------------------+----------------------+ | ||
81 | * | ||
82 | * +----------------------+---------------------+---------------------+ | ||
83 | * | categories (32 bits) | max l cat (32 bits) | max r cat (16 bits) | ... | ||
84 | * +----------------------+---------------------+---------------------+ | ||
85 | * | ||
86 | * +--------------------------+-------------------------+ | ||
87 | * | local level #X (32 bits) | CIPSO level #X (8 bits) | ... repeated | ||
88 | * +--------------------------+-------------------------+ | ||
89 | * | ||
90 | * +-----------------------------+-----------------------------+ | ||
91 | * | local category #X (32 bits) | CIPSO category #X (16 bits) | ... repeated | ||
92 | * +-----------------------------+-----------------------------+ | ||
93 | * | ||
94 | * levels: the number of level mappings | ||
95 | * max l level: the highest local level | ||
96 | * max r level: the highest remote/CIPSO level | ||
97 | * categories: the number of category mappings | ||
98 | * max l cat: the highest local category | ||
99 | * max r cat: the highest remote/CIPSO category | ||
100 | * local level: the local part of a level mapping | ||
101 | * CIPSO level: the remote/CIPSO part of a level mapping | ||
102 | * local category: the local part of a category mapping | ||
103 | * CIPSO category: the remote/CIPSO part of a category mapping | ||
104 | * | ||
105 | * CIPSO_V4_MAP_PASS | ||
106 | * | ||
107 | * No mapping data is needed for this map type. | ||
108 | * | ||
109 | * o REMOVE: | ||
110 | * Sent by an application to remove a specific DOI mapping table from the | ||
111 | * CIPSO V4 system. The kernel should ACK this message. | ||
112 | * | ||
113 | * +---------------+ | ||
114 | * | DOI (32 bits) | | ||
115 | * +---------------+ | ||
116 | * | ||
117 | * DOI: the DOI value | ||
118 | * | ||
119 | * o LIST: | ||
120 | * Sent by an application to list the details of a DOI definition. The | ||
121 | * kernel should send an ACK on error or a response as indicated below. The | ||
122 | * application generated message format is shown below. | ||
123 | * | ||
124 | * +---------------+ | ||
125 | * | DOI (32 bits) | | ||
126 | * +---------------+ | ||
127 | * | ||
128 | * DOI: the DOI value | ||
129 | * | ||
130 | * The valid response message format depends on the type of the DOI mapping, | ||
131 | * the known formats are shown below. | ||
132 | * | ||
133 | * +--------------------+ | ||
134 | * | map type (32 bits) | ... | ||
135 | * +--------------------+ | ||
136 | * | ||
137 | * map type: the DOI mapping table type (defined in the cipso_ipv4.h | ||
138 | * header as CIPSO_V4_MAP_*) | ||
139 | * | ||
140 | * (map type == CIPSO_V4_MAP_STD) | ||
141 | * | ||
142 | * +----------------+------------------+----------------------+ | ||
143 | * | tags (32 bits) | levels (32 bits) | categories (32 bits) | ... | ||
144 | * +----------------+------------------+----------------------+ | ||
145 | * | ||
146 | * +-----------------+ | ||
147 | * | tag #X (8 bits) | ... repeated | ||
148 | * +-----------------+ | ||
149 | * | ||
150 | * +--------------------------+-------------------------+ | ||
151 | * | local level #X (32 bits) | CIPSO level #X (8 bits) | ... repeated | ||
152 | * +--------------------------+-------------------------+ | ||
153 | * | ||
154 | * +-----------------------------+-----------------------------+ | ||
155 | * | local category #X (32 bits) | CIPSO category #X (16 bits) | ... repeated | ||
156 | * +-----------------------------+-----------------------------+ | ||
157 | * | ||
158 | * tags: the number of CIPSO tag types | ||
159 | * levels: the number of level mappings | ||
160 | * categories: the number of category mappings | ||
161 | * tag: the tag number, tags listed first are given higher | ||
162 | * priority when sending packets | ||
163 | * local level: the local part of a level mapping | ||
164 | * CIPSO level: the remote/CIPSO part of a level mapping | ||
165 | * local category: the local part of a category mapping | ||
166 | * CIPSO category: the remote/CIPSO part of a category mapping | ||
167 | * | ||
168 | * (map type == CIPSO_V4_MAP_PASS) | ||
169 | * | ||
170 | * +----------------+ | ||
171 | * | tags (32 bits) | ... | ||
172 | * +----------------+ | ||
173 | * | ||
174 | * +-----------------+ | ||
175 | * | tag #X (8 bits) | ... repeated | ||
176 | * +-----------------+ | ||
177 | * | ||
178 | * tags: the number of CIPSO tag types | ||
179 | * tag: the tag number, tags listed first are given higher | ||
180 | * priority when sending packets | ||
181 | * | ||
182 | * o LISTALL: | ||
183 | * This message is sent by an application to list the valid DOIs on the | ||
184 | * system. There is no payload and the kernel should respond with an ACK | ||
185 | * or the following message. | ||
186 | * | ||
187 | * +---------------------+------------------+-----------------------+ | ||
188 | * | DOI count (32 bits) | DOI #X (32 bits) | map type #X (32 bits) | | ||
189 | * +---------------------+------------------+-----------------------+ | ||
190 | * | ||
191 | * +-----------------------+ | ||
192 | * | map type #X (32 bits) | ... | ||
193 | * +-----------------------+ | ||
194 | * | ||
195 | * DOI count: the number of DOIs | ||
196 | * DOI: the DOI value | ||
197 | * map type: the DOI mapping table type (defined in the cipso_ipv4.h | ||
198 | * header as CIPSO_V4_MAP_*) | ||
199 | * | ||
200 | */ | ||
201 | |||
202 | /* NetLabel CIPSOv4 commands */ | ||
203 | enum { | ||
204 | NLBL_CIPSOV4_C_UNSPEC, | ||
205 | NLBL_CIPSOV4_C_ACK, | ||
206 | NLBL_CIPSOV4_C_ADD, | ||
207 | NLBL_CIPSOV4_C_REMOVE, | ||
208 | NLBL_CIPSOV4_C_LIST, | ||
209 | NLBL_CIPSOV4_C_LISTALL, | ||
210 | __NLBL_CIPSOV4_C_MAX, | ||
211 | }; | ||
212 | #define NLBL_CIPSOV4_C_MAX (__NLBL_CIPSOV4_C_MAX - 1) | ||
213 | |||
214 | /* NetLabel protocol functions */ | ||
215 | int netlbl_cipsov4_genl_init(void); | ||
216 | |||
217 | #endif | ||
diff --git a/net/netlabel/netlabel_domainhash.c b/net/netlabel/netlabel_domainhash.c new file mode 100644 index 000000000000..0489a1378101 --- /dev/null +++ b/net/netlabel/netlabel_domainhash.c | |||
@@ -0,0 +1,513 @@ | |||
1 | /* | ||
2 | * NetLabel Domain Hash Table | ||
3 | * | ||
4 | * This file manages the domain hash table that NetLabel uses to determine | ||
5 | * which network labeling protocol to use for a given domain. The NetLabel | ||
6 | * system manages static and dynamic label mappings for network protocols such | ||
7 | * as CIPSO and RIPSO. | ||
8 | * | ||
9 | * Author: Paul Moore <paul.moore@hp.com> | ||
10 | * | ||
11 | */ | ||
12 | |||
13 | /* | ||
14 | * (c) Copyright Hewlett-Packard Development Company, L.P., 2006 | ||
15 | * | ||
16 | * This program is free software; you can redistribute it and/or modify | ||
17 | * it under the terms of the GNU General Public License as published by | ||
18 | * the Free Software Foundation; either version 2 of the License, or | ||
19 | * (at your option) any later version. | ||
20 | * | ||
21 | * This program is distributed in the hope that it will be useful, | ||
22 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
23 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See | ||
24 | * the GNU General Public License for more details. | ||
25 | * | ||
26 | * You should have received a copy of the GNU General Public License | ||
27 | * along with this program; if not, write to the Free Software | ||
28 | * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA | ||
29 | * | ||
30 | */ | ||
31 | |||
32 | #include <linux/types.h> | ||
33 | #include <linux/rcupdate.h> | ||
34 | #include <linux/list.h> | ||
35 | #include <linux/skbuff.h> | ||
36 | #include <linux/spinlock.h> | ||
37 | #include <linux/string.h> | ||
38 | #include <net/netlabel.h> | ||
39 | #include <net/cipso_ipv4.h> | ||
40 | #include <asm/bug.h> | ||
41 | |||
42 | #include "netlabel_mgmt.h" | ||
43 | #include "netlabel_domainhash.h" | ||
44 | |||
45 | struct netlbl_domhsh_tbl { | ||
46 | struct list_head *tbl; | ||
47 | u32 size; | ||
48 | }; | ||
49 | |||
50 | /* Domain hash table */ | ||
51 | /* XXX - updates should be so rare that having one spinlock for the entire | ||
52 | * hash table should be okay */ | ||
53 | static DEFINE_SPINLOCK(netlbl_domhsh_lock); | ||
54 | static struct netlbl_domhsh_tbl *netlbl_domhsh = NULL; | ||
55 | |||
56 | /* Default domain mapping */ | ||
57 | static DEFINE_SPINLOCK(netlbl_domhsh_def_lock); | ||
58 | static struct netlbl_dom_map *netlbl_domhsh_def = NULL; | ||
59 | |||
60 | /* | ||
61 | * Domain Hash Table Helper Functions | ||
62 | */ | ||
63 | |||
64 | /** | ||
65 | * netlbl_domhsh_free_entry - Frees a domain hash table entry | ||
66 | * @entry: the entry's RCU field | ||
67 | * | ||
68 | * Description: | ||
69 | * This function is designed to be used as a callback to the call_rcu() | ||
70 | * function so that the memory allocated to a hash table entry can be released | ||
71 | * safely. | ||
72 | * | ||
73 | */ | ||
74 | static void netlbl_domhsh_free_entry(struct rcu_head *entry) | ||
75 | { | ||
76 | struct netlbl_dom_map *ptr; | ||
77 | |||
78 | ptr = container_of(entry, struct netlbl_dom_map, rcu); | ||
79 | kfree(ptr->domain); | ||
80 | kfree(ptr); | ||
81 | } | ||
82 | |||
83 | /** | ||
84 | * netlbl_domhsh_hash - Hashing function for the domain hash table | ||
85 | * @domain: the domain name to hash | ||
86 | * | ||
87 | * Description: | ||
88 | * This is the hashing function for the domain hash table, it returns the | ||
89 | * correct bucket number for the domain. The caller is responsibile for | ||
90 | * calling the rcu_read_[un]lock() functions. | ||
91 | * | ||
92 | */ | ||
93 | static u32 netlbl_domhsh_hash(const char *key) | ||
94 | { | ||
95 | u32 iter; | ||
96 | u32 val; | ||
97 | u32 len; | ||
98 | |||
99 | /* This is taken (with slight modification) from | ||
100 | * security/selinux/ss/symtab.c:symhash() */ | ||
101 | |||
102 | for (iter = 0, val = 0, len = strlen(key); iter < len; iter++) | ||
103 | val = (val << 4 | (val >> (8 * sizeof(u32) - 4))) ^ key[iter]; | ||
104 | return val & (rcu_dereference(netlbl_domhsh)->size - 1); | ||
105 | } | ||
106 | |||
107 | /** | ||
108 | * netlbl_domhsh_search - Search for a domain entry | ||
109 | * @domain: the domain | ||
110 | * @def: return default if no match is found | ||
111 | * | ||
112 | * Description: | ||
113 | * Searches the domain hash table and returns a pointer to the hash table | ||
114 | * entry if found, otherwise NULL is returned. If @def is non-zero and a | ||
115 | * match is not found in the domain hash table the default mapping is returned | ||
116 | * if it exists. The caller is responsibile for the rcu hash table locks | ||
117 | * (i.e. the caller much call rcu_read_[un]lock()). | ||
118 | * | ||
119 | */ | ||
120 | static struct netlbl_dom_map *netlbl_domhsh_search(const char *domain, u32 def) | ||
121 | { | ||
122 | u32 bkt; | ||
123 | struct netlbl_dom_map *iter; | ||
124 | |||
125 | if (domain != NULL) { | ||
126 | bkt = netlbl_domhsh_hash(domain); | ||
127 | list_for_each_entry_rcu(iter, &netlbl_domhsh->tbl[bkt], list) | ||
128 | if (iter->valid && strcmp(iter->domain, domain) == 0) | ||
129 | return iter; | ||
130 | } | ||
131 | |||
132 | if (def != 0) { | ||
133 | iter = rcu_dereference(netlbl_domhsh_def); | ||
134 | if (iter != NULL && iter->valid) | ||
135 | return iter; | ||
136 | } | ||
137 | |||
138 | return NULL; | ||
139 | } | ||
140 | |||
141 | /* | ||
142 | * Domain Hash Table Functions | ||
143 | */ | ||
144 | |||
145 | /** | ||
146 | * netlbl_domhsh_init - Init for the domain hash | ||
147 | * @size: the number of bits to use for the hash buckets | ||
148 | * | ||
149 | * Description: | ||
150 | * Initializes the domain hash table, should be called only by | ||
151 | * netlbl_user_init() during initialization. Returns zero on success, non-zero | ||
152 | * values on error. | ||
153 | * | ||
154 | */ | ||
155 | int netlbl_domhsh_init(u32 size) | ||
156 | { | ||
157 | u32 iter; | ||
158 | struct netlbl_domhsh_tbl *hsh_tbl; | ||
159 | |||
160 | if (size == 0) | ||
161 | return -EINVAL; | ||
162 | |||
163 | hsh_tbl = kmalloc(sizeof(*hsh_tbl), GFP_KERNEL); | ||
164 | if (hsh_tbl == NULL) | ||
165 | return -ENOMEM; | ||
166 | hsh_tbl->size = 1 << size; | ||
167 | hsh_tbl->tbl = kcalloc(hsh_tbl->size, | ||
168 | sizeof(struct list_head), | ||
169 | GFP_KERNEL); | ||
170 | if (hsh_tbl->tbl == NULL) { | ||
171 | kfree(hsh_tbl); | ||
172 | return -ENOMEM; | ||
173 | } | ||
174 | for (iter = 0; iter < hsh_tbl->size; iter++) | ||
175 | INIT_LIST_HEAD(&hsh_tbl->tbl[iter]); | ||
176 | |||
177 | rcu_read_lock(); | ||
178 | spin_lock(&netlbl_domhsh_lock); | ||
179 | rcu_assign_pointer(netlbl_domhsh, hsh_tbl); | ||
180 | spin_unlock(&netlbl_domhsh_lock); | ||
181 | rcu_read_unlock(); | ||
182 | |||
183 | return 0; | ||
184 | } | ||
185 | |||
186 | /** | ||
187 | * netlbl_domhsh_add - Adds a entry to the domain hash table | ||
188 | * @entry: the entry to add | ||
189 | * | ||
190 | * Description: | ||
191 | * Adds a new entry to the domain hash table and handles any updates to the | ||
192 | * lower level protocol handler (i.e. CIPSO). Returns zero on success, | ||
193 | * negative on failure. | ||
194 | * | ||
195 | */ | ||
196 | int netlbl_domhsh_add(struct netlbl_dom_map *entry) | ||
197 | { | ||
198 | int ret_val; | ||
199 | u32 bkt; | ||
200 | |||
201 | switch (entry->type) { | ||
202 | case NETLBL_NLTYPE_UNLABELED: | ||
203 | ret_val = 0; | ||
204 | break; | ||
205 | case NETLBL_NLTYPE_CIPSOV4: | ||
206 | ret_val = cipso_v4_doi_domhsh_add(entry->type_def.cipsov4, | ||
207 | entry->domain); | ||
208 | break; | ||
209 | default: | ||
210 | return -EINVAL; | ||
211 | } | ||
212 | if (ret_val != 0) | ||
213 | return ret_val; | ||
214 | |||
215 | entry->valid = 1; | ||
216 | INIT_RCU_HEAD(&entry->rcu); | ||
217 | |||
218 | ret_val = 0; | ||
219 | rcu_read_lock(); | ||
220 | if (entry->domain != NULL) { | ||
221 | bkt = netlbl_domhsh_hash(entry->domain); | ||
222 | spin_lock(&netlbl_domhsh_lock); | ||
223 | if (netlbl_domhsh_search(entry->domain, 0) == NULL) | ||
224 | list_add_tail_rcu(&entry->list, | ||
225 | &netlbl_domhsh->tbl[bkt]); | ||
226 | else | ||
227 | ret_val = -EEXIST; | ||
228 | spin_unlock(&netlbl_domhsh_lock); | ||
229 | } else if (entry->domain == NULL) { | ||
230 | INIT_LIST_HEAD(&entry->list); | ||
231 | spin_lock(&netlbl_domhsh_def_lock); | ||
232 | if (rcu_dereference(netlbl_domhsh_def) == NULL) | ||
233 | rcu_assign_pointer(netlbl_domhsh_def, entry); | ||
234 | else | ||
235 | ret_val = -EEXIST; | ||
236 | spin_unlock(&netlbl_domhsh_def_lock); | ||
237 | } else | ||
238 | ret_val = -EINVAL; | ||
239 | rcu_read_unlock(); | ||
240 | |||
241 | if (ret_val != 0) { | ||
242 | switch (entry->type) { | ||
243 | case NETLBL_NLTYPE_CIPSOV4: | ||
244 | if (cipso_v4_doi_domhsh_remove(entry->type_def.cipsov4, | ||
245 | entry->domain) != 0) | ||
246 | BUG(); | ||
247 | break; | ||
248 | } | ||
249 | } | ||
250 | |||
251 | return ret_val; | ||
252 | } | ||
253 | |||
254 | /** | ||
255 | * netlbl_domhsh_add_default - Adds the default entry to the domain hash table | ||
256 | * @entry: the entry to add | ||
257 | * | ||
258 | * Description: | ||
259 | * Adds a new default entry to the domain hash table and handles any updates | ||
260 | * to the lower level protocol handler (i.e. CIPSO). Returns zero on success, | ||
261 | * negative on failure. | ||
262 | * | ||
263 | */ | ||
264 | int netlbl_domhsh_add_default(struct netlbl_dom_map *entry) | ||
265 | { | ||
266 | return netlbl_domhsh_add(entry); | ||
267 | } | ||
268 | |||
269 | /** | ||
270 | * netlbl_domhsh_remove - Removes an entry from the domain hash table | ||
271 | * @domain: the domain to remove | ||
272 | * | ||
273 | * Description: | ||
274 | * Removes an entry from the domain hash table and handles any updates to the | ||
275 | * lower level protocol handler (i.e. CIPSO). Returns zero on success, | ||
276 | * negative on failure. | ||
277 | * | ||
278 | */ | ||
279 | int netlbl_domhsh_remove(const char *domain) | ||
280 | { | ||
281 | int ret_val = -ENOENT; | ||
282 | struct netlbl_dom_map *entry; | ||
283 | |||
284 | rcu_read_lock(); | ||
285 | if (domain != NULL) | ||
286 | entry = netlbl_domhsh_search(domain, 0); | ||
287 | else | ||
288 | entry = netlbl_domhsh_search(domain, 1); | ||
289 | if (entry == NULL) | ||
290 | goto remove_return; | ||
291 | switch (entry->type) { | ||
292 | case NETLBL_NLTYPE_UNLABELED: | ||
293 | break; | ||
294 | case NETLBL_NLTYPE_CIPSOV4: | ||
295 | ret_val = cipso_v4_doi_domhsh_remove(entry->type_def.cipsov4, | ||
296 | entry->domain); | ||
297 | if (ret_val != 0) | ||
298 | goto remove_return; | ||
299 | break; | ||
300 | } | ||
301 | ret_val = 0; | ||
302 | if (entry != rcu_dereference(netlbl_domhsh_def)) { | ||
303 | spin_lock(&netlbl_domhsh_lock); | ||
304 | if (entry->valid) { | ||
305 | entry->valid = 0; | ||
306 | list_del_rcu(&entry->list); | ||
307 | } else | ||
308 | ret_val = -ENOENT; | ||
309 | spin_unlock(&netlbl_domhsh_lock); | ||
310 | } else { | ||
311 | spin_lock(&netlbl_domhsh_def_lock); | ||
312 | if (entry->valid) { | ||
313 | entry->valid = 0; | ||
314 | rcu_assign_pointer(netlbl_domhsh_def, NULL); | ||
315 | } else | ||
316 | ret_val = -ENOENT; | ||
317 | spin_unlock(&netlbl_domhsh_def_lock); | ||
318 | } | ||
319 | if (ret_val == 0) | ||
320 | call_rcu(&entry->rcu, netlbl_domhsh_free_entry); | ||
321 | |||
322 | remove_return: | ||
323 | rcu_read_unlock(); | ||
324 | return ret_val; | ||
325 | } | ||
326 | |||
327 | /** | ||
328 | * netlbl_domhsh_remove_default - Removes the default entry from the table | ||
329 | * | ||
330 | * Description: | ||
331 | * Removes/resets the default entry for the domain hash table and handles any | ||
332 | * updates to the lower level protocol handler (i.e. CIPSO). Returns zero on | ||
333 | * success, non-zero on failure. | ||
334 | * | ||
335 | */ | ||
336 | int netlbl_domhsh_remove_default(void) | ||
337 | { | ||
338 | return netlbl_domhsh_remove(NULL); | ||
339 | } | ||
340 | |||
341 | /** | ||
342 | * netlbl_domhsh_getentry - Get an entry from the domain hash table | ||
343 | * @domain: the domain name to search for | ||
344 | * | ||
345 | * Description: | ||
346 | * Look through the domain hash table searching for an entry to match @domain, | ||
347 | * return a pointer to a copy of the entry or NULL. The caller is responsibile | ||
348 | * for ensuring that rcu_read_[un]lock() is called. | ||
349 | * | ||
350 | */ | ||
351 | struct netlbl_dom_map *netlbl_domhsh_getentry(const char *domain) | ||
352 | { | ||
353 | return netlbl_domhsh_search(domain, 1); | ||
354 | } | ||
355 | |||
356 | /** | ||
357 | * netlbl_domhsh_dump - Dump the domain hash table into a sk_buff | ||
358 | * | ||
359 | * Description: | ||
360 | * Dump the domain hash table into a buffer suitable for returning to an | ||
361 | * application in response to a NetLabel management DOMAIN message. This | ||
362 | * function may fail if another process is growing the hash table at the same | ||
363 | * time. The returned sk_buff has room at the front of the sk_buff for | ||
364 | * @headroom bytes. See netlabel.h for the DOMAIN message format. Returns a | ||
365 | * pointer to a sk_buff on success, NULL on error. | ||
366 | * | ||
367 | */ | ||
368 | struct sk_buff *netlbl_domhsh_dump(size_t headroom) | ||
369 | { | ||
370 | struct sk_buff *skb = NULL; | ||
371 | ssize_t buf_len; | ||
372 | u32 bkt_iter; | ||
373 | u32 dom_cnt = 0; | ||
374 | struct netlbl_domhsh_tbl *hsh_tbl; | ||
375 | struct netlbl_dom_map *list_iter; | ||
376 | ssize_t tmp_len; | ||
377 | |||
378 | buf_len = NETLBL_LEN_U32; | ||
379 | rcu_read_lock(); | ||
380 | hsh_tbl = rcu_dereference(netlbl_domhsh); | ||
381 | for (bkt_iter = 0; bkt_iter < hsh_tbl->size; bkt_iter++) | ||
382 | list_for_each_entry_rcu(list_iter, | ||
383 | &hsh_tbl->tbl[bkt_iter], list) { | ||
384 | buf_len += NETLBL_LEN_U32 + | ||
385 | nla_total_size(strlen(list_iter->domain) + 1); | ||
386 | switch (list_iter->type) { | ||
387 | case NETLBL_NLTYPE_UNLABELED: | ||
388 | break; | ||
389 | case NETLBL_NLTYPE_CIPSOV4: | ||
390 | buf_len += 2 * NETLBL_LEN_U32; | ||
391 | break; | ||
392 | } | ||
393 | dom_cnt++; | ||
394 | } | ||
395 | |||
396 | skb = netlbl_netlink_alloc_skb(headroom, buf_len, GFP_ATOMIC); | ||
397 | if (skb == NULL) | ||
398 | goto dump_failure; | ||
399 | |||
400 | if (nla_put_u32(skb, NLA_U32, dom_cnt) != 0) | ||
401 | goto dump_failure; | ||
402 | buf_len -= NETLBL_LEN_U32; | ||
403 | hsh_tbl = rcu_dereference(netlbl_domhsh); | ||
404 | for (bkt_iter = 0; bkt_iter < hsh_tbl->size; bkt_iter++) | ||
405 | list_for_each_entry_rcu(list_iter, | ||
406 | &hsh_tbl->tbl[bkt_iter], list) { | ||
407 | tmp_len = nla_total_size(strlen(list_iter->domain) + | ||
408 | 1); | ||
409 | if (buf_len < NETLBL_LEN_U32 + tmp_len) | ||
410 | goto dump_failure; | ||
411 | if (nla_put_string(skb, | ||
412 | NLA_STRING, | ||
413 | list_iter->domain) != 0) | ||
414 | goto dump_failure; | ||
415 | if (nla_put_u32(skb, NLA_U32, list_iter->type) != 0) | ||
416 | goto dump_failure; | ||
417 | buf_len -= NETLBL_LEN_U32 + tmp_len; | ||
418 | switch (list_iter->type) { | ||
419 | case NETLBL_NLTYPE_UNLABELED: | ||
420 | break; | ||
421 | case NETLBL_NLTYPE_CIPSOV4: | ||
422 | if (buf_len < 2 * NETLBL_LEN_U32) | ||
423 | goto dump_failure; | ||
424 | if (nla_put_u32(skb, | ||
425 | NLA_U32, | ||
426 | list_iter->type_def.cipsov4->type) != 0) | ||
427 | goto dump_failure; | ||
428 | if (nla_put_u32(skb, | ||
429 | NLA_U32, | ||
430 | list_iter->type_def.cipsov4->doi) != 0) | ||
431 | goto dump_failure; | ||
432 | buf_len -= 2 * NETLBL_LEN_U32; | ||
433 | break; | ||
434 | } | ||
435 | } | ||
436 | rcu_read_unlock(); | ||
437 | |||
438 | return skb; | ||
439 | |||
440 | dump_failure: | ||
441 | rcu_read_unlock(); | ||
442 | kfree_skb(skb); | ||
443 | return NULL; | ||
444 | } | ||
445 | |||
446 | /** | ||
447 | * netlbl_domhsh_dump_default - Dump the default domain mapping into a sk_buff | ||
448 | * | ||
449 | * Description: | ||
450 | * Dump the default domain mapping into a buffer suitable for returning to an | ||
451 | * application in response to a NetLabel management DEFDOMAIN message. This | ||
452 | * function may fail if another process is changing the default domain mapping | ||
453 | * at the same time. The returned sk_buff has room at the front of the | ||
454 | * skb_buff for @headroom bytes. See netlabel.h for the DEFDOMAIN message | ||
455 | * format. Returns a pointer to a sk_buff on success, NULL on error. | ||
456 | * | ||
457 | */ | ||
458 | struct sk_buff *netlbl_domhsh_dump_default(size_t headroom) | ||
459 | { | ||
460 | struct sk_buff *skb; | ||
461 | ssize_t buf_len; | ||
462 | struct netlbl_dom_map *entry; | ||
463 | |||
464 | buf_len = NETLBL_LEN_U32; | ||
465 | rcu_read_lock(); | ||
466 | entry = rcu_dereference(netlbl_domhsh_def); | ||
467 | if (entry != NULL) | ||
468 | switch (entry->type) { | ||
469 | case NETLBL_NLTYPE_UNLABELED: | ||
470 | break; | ||
471 | case NETLBL_NLTYPE_CIPSOV4: | ||
472 | buf_len += 2 * NETLBL_LEN_U32; | ||
473 | break; | ||
474 | } | ||
475 | |||
476 | skb = netlbl_netlink_alloc_skb(headroom, buf_len, GFP_ATOMIC); | ||
477 | if (skb == NULL) | ||
478 | goto dump_default_failure; | ||
479 | |||
480 | if (entry != rcu_dereference(netlbl_domhsh_def)) | ||
481 | goto dump_default_failure; | ||
482 | if (entry != NULL) { | ||
483 | if (nla_put_u32(skb, NLA_U32, entry->type) != 0) | ||
484 | goto dump_default_failure; | ||
485 | buf_len -= NETLBL_LEN_U32; | ||
486 | switch (entry->type) { | ||
487 | case NETLBL_NLTYPE_UNLABELED: | ||
488 | break; | ||
489 | case NETLBL_NLTYPE_CIPSOV4: | ||
490 | if (buf_len < 2 * NETLBL_LEN_U32) | ||
491 | goto dump_default_failure; | ||
492 | if (nla_put_u32(skb, | ||
493 | NLA_U32, | ||
494 | entry->type_def.cipsov4->type) != 0) | ||
495 | goto dump_default_failure; | ||
496 | if (nla_put_u32(skb, | ||
497 | NLA_U32, | ||
498 | entry->type_def.cipsov4->doi) != 0) | ||
499 | goto dump_default_failure; | ||
500 | buf_len -= 2 * NETLBL_LEN_U32; | ||
501 | break; | ||
502 | } | ||
503 | } else | ||
504 | nla_put_u32(skb, NLA_U32, NETLBL_NLTYPE_NONE); | ||
505 | rcu_read_unlock(); | ||
506 | |||
507 | return skb; | ||
508 | |||
509 | dump_default_failure: | ||
510 | rcu_read_unlock(); | ||
511 | kfree_skb(skb); | ||
512 | return NULL; | ||
513 | } | ||
diff --git a/net/netlabel/netlabel_domainhash.h b/net/netlabel/netlabel_domainhash.h new file mode 100644 index 000000000000..99a2287de246 --- /dev/null +++ b/net/netlabel/netlabel_domainhash.h | |||
@@ -0,0 +1,67 @@ | |||
1 | /* | ||
2 | * NetLabel Domain Hash Table | ||
3 | * | ||
4 | * This file manages the domain hash table that NetLabel uses to determine | ||
5 | * which network labeling protocol to use for a given domain. The NetLabel | ||
6 | * system manages static and dynamic label mappings for network protocols such | ||
7 | * as CIPSO and RIPSO. | ||
8 | * | ||
9 | * Author: Paul Moore <paul.moore@hp.com> | ||
10 | * | ||
11 | */ | ||
12 | |||
13 | /* | ||
14 | * (c) Copyright Hewlett-Packard Development Company, L.P., 2006 | ||
15 | * | ||
16 | * This program is free software; you can redistribute it and/or modify | ||
17 | * it under the terms of the GNU General Public License as published by | ||
18 | * the Free Software Foundation; either version 2 of the License, or | ||
19 | * (at your option) any later version. | ||
20 | * | ||
21 | * This program is distributed in the hope that it will be useful, | ||
22 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
23 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See | ||
24 | * the GNU General Public License for more details. | ||
25 | * | ||
26 | * You should have received a copy of the GNU General Public License | ||
27 | * along with this program; if not, write to the Free Software | ||
28 | * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA | ||
29 | * | ||
30 | */ | ||
31 | |||
32 | #ifndef _NETLABEL_DOMAINHASH_H | ||
33 | #define _NETLABEL_DOMAINHASH_H | ||
34 | |||
35 | #include <linux/types.h> | ||
36 | #include <linux/rcupdate.h> | ||
37 | #include <linux/list.h> | ||
38 | |||
39 | /* Domain hash table size */ | ||
40 | /* XXX - currently this number is an uneducated guess */ | ||
41 | #define NETLBL_DOMHSH_BITSIZE 7 | ||
42 | |||
43 | /* Domain mapping definition struct */ | ||
44 | struct netlbl_dom_map { | ||
45 | char *domain; | ||
46 | u32 type; | ||
47 | union { | ||
48 | struct cipso_v4_doi *cipsov4; | ||
49 | } type_def; | ||
50 | |||
51 | u32 valid; | ||
52 | struct list_head list; | ||
53 | struct rcu_head rcu; | ||
54 | }; | ||
55 | |||
56 | /* init function */ | ||
57 | int netlbl_domhsh_init(u32 size); | ||
58 | |||
59 | /* Manipulate the domain hash table */ | ||
60 | int netlbl_domhsh_add(struct netlbl_dom_map *entry); | ||
61 | int netlbl_domhsh_add_default(struct netlbl_dom_map *entry); | ||
62 | int netlbl_domhsh_remove_default(void); | ||
63 | struct netlbl_dom_map *netlbl_domhsh_getentry(const char *domain); | ||
64 | struct sk_buff *netlbl_domhsh_dump(size_t headroom); | ||
65 | struct sk_buff *netlbl_domhsh_dump_default(size_t headroom); | ||
66 | |||
67 | #endif | ||
diff --git a/net/netlabel/netlabel_kapi.c b/net/netlabel/netlabel_kapi.c new file mode 100644 index 000000000000..0fd8aaafe23f --- /dev/null +++ b/net/netlabel/netlabel_kapi.c | |||
@@ -0,0 +1,231 @@ | |||
1 | /* | ||
2 | * NetLabel Kernel API | ||
3 | * | ||
4 | * This file defines the kernel API for the NetLabel system. The NetLabel | ||
5 | * system manages static and dynamic label mappings for network protocols such | ||
6 | * as CIPSO and RIPSO. | ||
7 | * | ||
8 | * Author: Paul Moore <paul.moore@hp.com> | ||
9 | * | ||
10 | */ | ||
11 | |||
12 | /* | ||
13 | * (c) Copyright Hewlett-Packard Development Company, L.P., 2006 | ||
14 | * | ||
15 | * This program is free software; you can redistribute it and/or modify | ||
16 | * it under the terms of the GNU General Public License as published by | ||
17 | * the Free Software Foundation; either version 2 of the License, or | ||
18 | * (at your option) any later version. | ||
19 | * | ||
20 | * This program is distributed in the hope that it will be useful, | ||
21 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
22 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See | ||
23 | * the GNU General Public License for more details. | ||
24 | * | ||
25 | * You should have received a copy of the GNU General Public License | ||
26 | * along with this program; if not, write to the Free Software | ||
27 | * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA | ||
28 | * | ||
29 | */ | ||
30 | |||
31 | #include <linux/init.h> | ||
32 | #include <linux/types.h> | ||
33 | #include <net/ip.h> | ||
34 | #include <net/netlabel.h> | ||
35 | #include <net/cipso_ipv4.h> | ||
36 | #include <asm/bug.h> | ||
37 | |||
38 | #include "netlabel_domainhash.h" | ||
39 | #include "netlabel_unlabeled.h" | ||
40 | #include "netlabel_user.h" | ||
41 | |||
42 | /* | ||
43 | * LSM Functions | ||
44 | */ | ||
45 | |||
46 | /** | ||
47 | * netlbl_socket_setattr - Label a socket using the correct protocol | ||
48 | * @sock: the socket to label | ||
49 | * @secattr: the security attributes | ||
50 | * | ||
51 | * Description: | ||
52 | * Attach the correct label to the given socket using the security attributes | ||
53 | * specified in @secattr. This function requires exclusive access to | ||
54 | * @sock->sk, which means it either needs to be in the process of being | ||
55 | * created or locked via lock_sock(sock->sk). Returns zero on success, | ||
56 | * negative values on failure. | ||
57 | * | ||
58 | */ | ||
59 | int netlbl_socket_setattr(const struct socket *sock, | ||
60 | const struct netlbl_lsm_secattr *secattr) | ||
61 | { | ||
62 | int ret_val = -ENOENT; | ||
63 | struct netlbl_dom_map *dom_entry; | ||
64 | |||
65 | rcu_read_lock(); | ||
66 | dom_entry = netlbl_domhsh_getentry(secattr->domain); | ||
67 | if (dom_entry == NULL) | ||
68 | goto socket_setattr_return; | ||
69 | switch (dom_entry->type) { | ||
70 | case NETLBL_NLTYPE_CIPSOV4: | ||
71 | ret_val = cipso_v4_socket_setattr(sock, | ||
72 | dom_entry->type_def.cipsov4, | ||
73 | secattr); | ||
74 | break; | ||
75 | case NETLBL_NLTYPE_UNLABELED: | ||
76 | ret_val = 0; | ||
77 | break; | ||
78 | default: | ||
79 | ret_val = -ENOENT; | ||
80 | } | ||
81 | |||
82 | socket_setattr_return: | ||
83 | rcu_read_unlock(); | ||
84 | return ret_val; | ||
85 | } | ||
86 | |||
87 | /** | ||
88 | * netlbl_socket_getattr - Determine the security attributes of a socket | ||
89 | * @sock: the socket | ||
90 | * @secattr: the security attributes | ||
91 | * | ||
92 | * Description: | ||
93 | * Examines the given socket to see any NetLabel style labeling has been | ||
94 | * applied to the socket, if so it parses the socket label and returns the | ||
95 | * security attributes in @secattr. Returns zero on success, negative values | ||
96 | * on failure. | ||
97 | * | ||
98 | */ | ||
99 | int netlbl_socket_getattr(const struct socket *sock, | ||
100 | struct netlbl_lsm_secattr *secattr) | ||
101 | { | ||
102 | int ret_val; | ||
103 | |||
104 | ret_val = cipso_v4_socket_getattr(sock, secattr); | ||
105 | if (ret_val == 0) | ||
106 | return 0; | ||
107 | |||
108 | return netlbl_unlabel_getattr(secattr); | ||
109 | } | ||
110 | |||
111 | /** | ||
112 | * netlbl_skbuff_getattr - Determine the security attributes of a packet | ||
113 | * @skb: the packet | ||
114 | * @secattr: the security attributes | ||
115 | * | ||
116 | * Description: | ||
117 | * Examines the given packet to see if a recognized form of packet labeling | ||
118 | * is present, if so it parses the packet label and returns the security | ||
119 | * attributes in @secattr. Returns zero on success, negative values on | ||
120 | * failure. | ||
121 | * | ||
122 | */ | ||
123 | int netlbl_skbuff_getattr(const struct sk_buff *skb, | ||
124 | struct netlbl_lsm_secattr *secattr) | ||
125 | { | ||
126 | int ret_val; | ||
127 | |||
128 | ret_val = cipso_v4_skbuff_getattr(skb, secattr); | ||
129 | if (ret_val == 0) | ||
130 | return 0; | ||
131 | |||
132 | return netlbl_unlabel_getattr(secattr); | ||
133 | } | ||
134 | |||
135 | /** | ||
136 | * netlbl_skbuff_err - Handle a LSM error on a sk_buff | ||
137 | * @skb: the packet | ||
138 | * @error: the error code | ||
139 | * | ||
140 | * Description: | ||
141 | * Deal with a LSM problem when handling the packet in @skb, typically this is | ||
142 | * a permission denied problem (-EACCES). The correct action is determined | ||
143 | * according to the packet's labeling protocol. | ||
144 | * | ||
145 | */ | ||
146 | void netlbl_skbuff_err(struct sk_buff *skb, int error) | ||
147 | { | ||
148 | if (CIPSO_V4_OPTEXIST(skb)) | ||
149 | cipso_v4_error(skb, error, 0); | ||
150 | } | ||
151 | |||
152 | /** | ||
153 | * netlbl_cache_invalidate - Invalidate all of the NetLabel protocol caches | ||
154 | * | ||
155 | * Description: | ||
156 | * For all of the NetLabel protocols that support some form of label mapping | ||
157 | * cache, invalidate the cache. Returns zero on success, negative values on | ||
158 | * error. | ||
159 | * | ||
160 | */ | ||
161 | void netlbl_cache_invalidate(void) | ||
162 | { | ||
163 | cipso_v4_cache_invalidate(); | ||
164 | } | ||
165 | |||
166 | /** | ||
167 | * netlbl_cache_add - Add an entry to a NetLabel protocol cache | ||
168 | * @skb: the packet | ||
169 | * @secattr: the packet's security attributes | ||
170 | * | ||
171 | * Description: | ||
172 | * Add the LSM security attributes for the given packet to the underlying | ||
173 | * NetLabel protocol's label mapping cache. Returns zero on success, negative | ||
174 | * values on error. | ||
175 | * | ||
176 | */ | ||
177 | int netlbl_cache_add(const struct sk_buff *skb, | ||
178 | const struct netlbl_lsm_secattr *secattr) | ||
179 | { | ||
180 | if (secattr->cache.data == NULL) | ||
181 | return -ENOMSG; | ||
182 | |||
183 | if (CIPSO_V4_OPTEXIST(skb)) | ||
184 | return cipso_v4_cache_add(skb, secattr); | ||
185 | |||
186 | return -ENOMSG; | ||
187 | } | ||
188 | |||
189 | /* | ||
190 | * Setup Functions | ||
191 | */ | ||
192 | |||
193 | /** | ||
194 | * netlbl_init - Initialize NetLabel | ||
195 | * | ||
196 | * Description: | ||
197 | * Perform the required NetLabel initialization before first use. | ||
198 | * | ||
199 | */ | ||
200 | static int __init netlbl_init(void) | ||
201 | { | ||
202 | int ret_val; | ||
203 | |||
204 | printk(KERN_INFO "NetLabel: Initializing\n"); | ||
205 | printk(KERN_INFO "NetLabel: domain hash size = %u\n", | ||
206 | (1 << NETLBL_DOMHSH_BITSIZE)); | ||
207 | printk(KERN_INFO "NetLabel: protocols =" | ||
208 | " UNLABELED" | ||
209 | " CIPSOv4" | ||
210 | "\n"); | ||
211 | |||
212 | ret_val = netlbl_domhsh_init(NETLBL_DOMHSH_BITSIZE); | ||
213 | if (ret_val != 0) | ||
214 | goto init_failure; | ||
215 | |||
216 | ret_val = netlbl_netlink_init(); | ||
217 | if (ret_val != 0) | ||
218 | goto init_failure; | ||
219 | |||
220 | ret_val = netlbl_unlabel_defconf(); | ||
221 | if (ret_val != 0) | ||
222 | goto init_failure; | ||
223 | printk(KERN_INFO "NetLabel: unlabeled traffic allowed by default\n"); | ||
224 | |||
225 | return 0; | ||
226 | |||
227 | init_failure: | ||
228 | panic("NetLabel: failed to initialize properly (%d)\n", ret_val); | ||
229 | } | ||
230 | |||
231 | subsys_initcall(netlbl_init); | ||
diff --git a/net/netlabel/netlabel_mgmt.c b/net/netlabel/netlabel_mgmt.c new file mode 100644 index 000000000000..85bc11a1fc46 --- /dev/null +++ b/net/netlabel/netlabel_mgmt.c | |||
@@ -0,0 +1,624 @@ | |||
1 | /* | ||
2 | * NetLabel Management Support | ||
3 | * | ||
4 | * This file defines the management functions for the NetLabel system. The | ||
5 | * NetLabel system manages static and dynamic label mappings for network | ||
6 | * protocols such as CIPSO and RIPSO. | ||
7 | * | ||
8 | * Author: Paul Moore <paul.moore@hp.com> | ||
9 | * | ||
10 | */ | ||
11 | |||
12 | /* | ||
13 | * (c) Copyright Hewlett-Packard Development Company, L.P., 2006 | ||
14 | * | ||
15 | * This program is free software; you can redistribute it and/or modify | ||
16 | * it under the terms of the GNU General Public License as published by | ||
17 | * the Free Software Foundation; either version 2 of the License, or | ||
18 | * (at your option) any later version. | ||
19 | * | ||
20 | * This program is distributed in the hope that it will be useful, | ||
21 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
22 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See | ||
23 | * the GNU General Public License for more details. | ||
24 | * | ||
25 | * You should have received a copy of the GNU General Public License | ||
26 | * along with this program; if not, write to the Free Software | ||
27 | * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA | ||
28 | * | ||
29 | */ | ||
30 | |||
31 | #include <linux/types.h> | ||
32 | #include <linux/socket.h> | ||
33 | #include <linux/string.h> | ||
34 | #include <linux/skbuff.h> | ||
35 | #include <net/sock.h> | ||
36 | #include <net/netlink.h> | ||
37 | #include <net/genetlink.h> | ||
38 | #include <net/netlabel.h> | ||
39 | #include <net/cipso_ipv4.h> | ||
40 | |||
41 | #include "netlabel_domainhash.h" | ||
42 | #include "netlabel_user.h" | ||
43 | #include "netlabel_mgmt.h" | ||
44 | |||
45 | /* NetLabel Generic NETLINK CIPSOv4 family */ | ||
46 | static struct genl_family netlbl_mgmt_gnl_family = { | ||
47 | .id = GENL_ID_GENERATE, | ||
48 | .hdrsize = 0, | ||
49 | .name = NETLBL_NLTYPE_MGMT_NAME, | ||
50 | .version = NETLBL_PROTO_VERSION, | ||
51 | .maxattr = 0, | ||
52 | }; | ||
53 | |||
54 | |||
55 | /* | ||
56 | * NetLabel Command Handlers | ||
57 | */ | ||
58 | |||
59 | /** | ||
60 | * netlbl_mgmt_add - Handle an ADD message | ||
61 | * @skb: the NETLINK buffer | ||
62 | * @info: the Generic NETLINK info block | ||
63 | * | ||
64 | * Description: | ||
65 | * Process a user generated ADD message and add the domains from the message | ||
66 | * to the hash table. See netlabel.h for a description of the message format. | ||
67 | * Returns zero on success, negative values on failure. | ||
68 | * | ||
69 | */ | ||
70 | static int netlbl_mgmt_add(struct sk_buff *skb, struct genl_info *info) | ||
71 | { | ||
72 | int ret_val = -EINVAL; | ||
73 | struct nlattr *msg_ptr = netlbl_netlink_payload_data(skb); | ||
74 | int msg_len = netlbl_netlink_payload_len(skb); | ||
75 | u32 count; | ||
76 | struct netlbl_dom_map *entry = NULL; | ||
77 | u32 iter; | ||
78 | u32 tmp_val; | ||
79 | int tmp_size; | ||
80 | |||
81 | ret_val = netlbl_netlink_cap_check(skb, CAP_NET_ADMIN); | ||
82 | if (ret_val != 0) | ||
83 | goto add_failure; | ||
84 | |||
85 | if (msg_len < NETLBL_LEN_U32) | ||
86 | goto add_failure; | ||
87 | count = netlbl_getinc_u32(&msg_ptr, &msg_len); | ||
88 | |||
89 | for (iter = 0; iter < count && msg_len > 0; iter++, entry = NULL) { | ||
90 | if (msg_len <= 0) { | ||
91 | ret_val = -EINVAL; | ||
92 | goto add_failure; | ||
93 | } | ||
94 | entry = kzalloc(sizeof(*entry), GFP_KERNEL); | ||
95 | if (entry == NULL) { | ||
96 | ret_val = -ENOMEM; | ||
97 | goto add_failure; | ||
98 | } | ||
99 | tmp_size = nla_len(msg_ptr); | ||
100 | if (tmp_size <= 0 || tmp_size > msg_len) { | ||
101 | ret_val = -EINVAL; | ||
102 | goto add_failure; | ||
103 | } | ||
104 | entry->domain = kmalloc(tmp_size, GFP_KERNEL); | ||
105 | if (entry->domain == NULL) { | ||
106 | ret_val = -ENOMEM; | ||
107 | goto add_failure; | ||
108 | } | ||
109 | nla_strlcpy(entry->domain, msg_ptr, tmp_size); | ||
110 | entry->domain[tmp_size - 1] = '\0'; | ||
111 | msg_ptr = nla_next(msg_ptr, &msg_len); | ||
112 | |||
113 | if (msg_len < NETLBL_LEN_U32) { | ||
114 | ret_val = -EINVAL; | ||
115 | goto add_failure; | ||
116 | } | ||
117 | tmp_val = netlbl_getinc_u32(&msg_ptr, &msg_len); | ||
118 | entry->type = tmp_val; | ||
119 | switch (tmp_val) { | ||
120 | case NETLBL_NLTYPE_UNLABELED: | ||
121 | ret_val = netlbl_domhsh_add(entry); | ||
122 | break; | ||
123 | case NETLBL_NLTYPE_CIPSOV4: | ||
124 | if (msg_len < NETLBL_LEN_U32) { | ||
125 | ret_val = -EINVAL; | ||
126 | goto add_failure; | ||
127 | } | ||
128 | tmp_val = netlbl_getinc_u32(&msg_ptr, &msg_len); | ||
129 | /* We should be holding a rcu_read_lock() here | ||
130 | * while we hold the result but since the entry | ||
131 | * will always be deleted when the CIPSO DOI | ||
132 | * is deleted we aren't going to keep the lock. */ | ||
133 | rcu_read_lock(); | ||
134 | entry->type_def.cipsov4 = cipso_v4_doi_getdef(tmp_val); | ||
135 | if (entry->type_def.cipsov4 == NULL) { | ||
136 | rcu_read_unlock(); | ||
137 | ret_val = -EINVAL; | ||
138 | goto add_failure; | ||
139 | } | ||
140 | ret_val = netlbl_domhsh_add(entry); | ||
141 | rcu_read_unlock(); | ||
142 | break; | ||
143 | default: | ||
144 | ret_val = -EINVAL; | ||
145 | } | ||
146 | if (ret_val != 0) | ||
147 | goto add_failure; | ||
148 | } | ||
149 | |||
150 | netlbl_netlink_send_ack(info, | ||
151 | netlbl_mgmt_gnl_family.id, | ||
152 | NLBL_MGMT_C_ACK, | ||
153 | NETLBL_E_OK); | ||
154 | return 0; | ||
155 | |||
156 | add_failure: | ||
157 | if (entry) | ||
158 | kfree(entry->domain); | ||
159 | kfree(entry); | ||
160 | netlbl_netlink_send_ack(info, | ||
161 | netlbl_mgmt_gnl_family.id, | ||
162 | NLBL_MGMT_C_ACK, | ||
163 | -ret_val); | ||
164 | return ret_val; | ||
165 | } | ||
166 | |||
167 | /** | ||
168 | * netlbl_mgmt_remove - Handle a REMOVE message | ||
169 | * @skb: the NETLINK buffer | ||
170 | * @info: the Generic NETLINK info block | ||
171 | * | ||
172 | * Description: | ||
173 | * Process a user generated REMOVE message and remove the specified domain | ||
174 | * mappings. Returns zero on success, negative values on failure. | ||
175 | * | ||
176 | */ | ||
177 | static int netlbl_mgmt_remove(struct sk_buff *skb, struct genl_info *info) | ||
178 | { | ||
179 | int ret_val = -EINVAL; | ||
180 | struct nlattr *msg_ptr = netlbl_netlink_payload_data(skb); | ||
181 | int msg_len = netlbl_netlink_payload_len(skb); | ||
182 | u32 count; | ||
183 | u32 iter; | ||
184 | int tmp_size; | ||
185 | unsigned char *domain; | ||
186 | |||
187 | ret_val = netlbl_netlink_cap_check(skb, CAP_NET_ADMIN); | ||
188 | if (ret_val != 0) | ||
189 | goto remove_return; | ||
190 | |||
191 | if (msg_len < NETLBL_LEN_U32) | ||
192 | goto remove_return; | ||
193 | count = netlbl_getinc_u32(&msg_ptr, &msg_len); | ||
194 | |||
195 | for (iter = 0; iter < count && msg_len > 0; iter++) { | ||
196 | if (msg_len <= 0) { | ||
197 | ret_val = -EINVAL; | ||
198 | goto remove_return; | ||
199 | } | ||
200 | tmp_size = nla_len(msg_ptr); | ||
201 | domain = nla_data(msg_ptr); | ||
202 | if (tmp_size <= 0 || tmp_size > msg_len || | ||
203 | domain[tmp_size - 1] != '\0') { | ||
204 | ret_val = -EINVAL; | ||
205 | goto remove_return; | ||
206 | } | ||
207 | ret_val = netlbl_domhsh_remove(domain); | ||
208 | if (ret_val != 0) | ||
209 | goto remove_return; | ||
210 | msg_ptr = nla_next(msg_ptr, &msg_len); | ||
211 | } | ||
212 | |||
213 | ret_val = 0; | ||
214 | |||
215 | remove_return: | ||
216 | netlbl_netlink_send_ack(info, | ||
217 | netlbl_mgmt_gnl_family.id, | ||
218 | NLBL_MGMT_C_ACK, | ||
219 | -ret_val); | ||
220 | return ret_val; | ||
221 | } | ||
222 | |||
223 | /** | ||
224 | * netlbl_mgmt_list - Handle a LIST message | ||
225 | * @skb: the NETLINK buffer | ||
226 | * @info: the Generic NETLINK info block | ||
227 | * | ||
228 | * Description: | ||
229 | * Process a user generated LIST message and dumps the domain hash table in a | ||
230 | * form suitable for use in a kernel generated LIST message. Returns zero on | ||
231 | * success, negative values on failure. | ||
232 | * | ||
233 | */ | ||
234 | static int netlbl_mgmt_list(struct sk_buff *skb, struct genl_info *info) | ||
235 | { | ||
236 | int ret_val = -ENOMEM; | ||
237 | struct sk_buff *ans_skb; | ||
238 | |||
239 | ans_skb = netlbl_domhsh_dump(NLMSG_SPACE(GENL_HDRLEN)); | ||
240 | if (ans_skb == NULL) | ||
241 | goto list_failure; | ||
242 | netlbl_netlink_hdr_push(ans_skb, | ||
243 | info->snd_pid, | ||
244 | 0, | ||
245 | netlbl_mgmt_gnl_family.id, | ||
246 | NLBL_MGMT_C_LIST); | ||
247 | |||
248 | ret_val = netlbl_netlink_snd(ans_skb, info->snd_pid); | ||
249 | if (ret_val != 0) | ||
250 | goto list_failure; | ||
251 | |||
252 | return 0; | ||
253 | |||
254 | list_failure: | ||
255 | netlbl_netlink_send_ack(info, | ||
256 | netlbl_mgmt_gnl_family.id, | ||
257 | NLBL_MGMT_C_ACK, | ||
258 | -ret_val); | ||
259 | return ret_val; | ||
260 | } | ||
261 | |||
262 | /** | ||
263 | * netlbl_mgmt_adddef - Handle an ADDDEF message | ||
264 | * @skb: the NETLINK buffer | ||
265 | * @info: the Generic NETLINK info block | ||
266 | * | ||
267 | * Description: | ||
268 | * Process a user generated ADDDEF message and respond accordingly. Returns | ||
269 | * zero on success, negative values on failure. | ||
270 | * | ||
271 | */ | ||
272 | static int netlbl_mgmt_adddef(struct sk_buff *skb, struct genl_info *info) | ||
273 | { | ||
274 | int ret_val = -EINVAL; | ||
275 | struct nlattr *msg_ptr = netlbl_netlink_payload_data(skb); | ||
276 | int msg_len = netlbl_netlink_payload_len(skb); | ||
277 | struct netlbl_dom_map *entry = NULL; | ||
278 | u32 tmp_val; | ||
279 | |||
280 | ret_val = netlbl_netlink_cap_check(skb, CAP_NET_ADMIN); | ||
281 | if (ret_val != 0) | ||
282 | goto adddef_failure; | ||
283 | |||
284 | if (msg_len < NETLBL_LEN_U32) | ||
285 | goto adddef_failure; | ||
286 | tmp_val = netlbl_getinc_u32(&msg_ptr, &msg_len); | ||
287 | |||
288 | entry = kzalloc(sizeof(*entry), GFP_KERNEL); | ||
289 | if (entry == NULL) { | ||
290 | ret_val = -ENOMEM; | ||
291 | goto adddef_failure; | ||
292 | } | ||
293 | |||
294 | entry->type = tmp_val; | ||
295 | switch (entry->type) { | ||
296 | case NETLBL_NLTYPE_UNLABELED: | ||
297 | ret_val = netlbl_domhsh_add_default(entry); | ||
298 | break; | ||
299 | case NETLBL_NLTYPE_CIPSOV4: | ||
300 | if (msg_len < NETLBL_LEN_U32) { | ||
301 | ret_val = -EINVAL; | ||
302 | goto adddef_failure; | ||
303 | } | ||
304 | tmp_val = netlbl_getinc_u32(&msg_ptr, &msg_len); | ||
305 | /* We should be holding a rcu_read_lock here while we | ||
306 | * hold the result but since the entry will always be | ||
307 | * deleted when the CIPSO DOI is deleted we are going | ||
308 | * to skip the lock. */ | ||
309 | rcu_read_lock(); | ||
310 | entry->type_def.cipsov4 = cipso_v4_doi_getdef(tmp_val); | ||
311 | if (entry->type_def.cipsov4 == NULL) { | ||
312 | rcu_read_unlock(); | ||
313 | ret_val = -EINVAL; | ||
314 | goto adddef_failure; | ||
315 | } | ||
316 | ret_val = netlbl_domhsh_add_default(entry); | ||
317 | rcu_read_unlock(); | ||
318 | break; | ||
319 | default: | ||
320 | ret_val = -EINVAL; | ||
321 | } | ||
322 | if (ret_val != 0) | ||
323 | goto adddef_failure; | ||
324 | |||
325 | netlbl_netlink_send_ack(info, | ||
326 | netlbl_mgmt_gnl_family.id, | ||
327 | NLBL_MGMT_C_ACK, | ||
328 | NETLBL_E_OK); | ||
329 | return 0; | ||
330 | |||
331 | adddef_failure: | ||
332 | kfree(entry); | ||
333 | netlbl_netlink_send_ack(info, | ||
334 | netlbl_mgmt_gnl_family.id, | ||
335 | NLBL_MGMT_C_ACK, | ||
336 | -ret_val); | ||
337 | return ret_val; | ||
338 | } | ||
339 | |||
340 | /** | ||
341 | * netlbl_mgmt_removedef - Handle a REMOVEDEF message | ||
342 | * @skb: the NETLINK buffer | ||
343 | * @info: the Generic NETLINK info block | ||
344 | * | ||
345 | * Description: | ||
346 | * Process a user generated REMOVEDEF message and remove the default domain | ||
347 | * mapping. Returns zero on success, negative values on failure. | ||
348 | * | ||
349 | */ | ||
350 | static int netlbl_mgmt_removedef(struct sk_buff *skb, struct genl_info *info) | ||
351 | { | ||
352 | int ret_val; | ||
353 | |||
354 | ret_val = netlbl_netlink_cap_check(skb, CAP_NET_ADMIN); | ||
355 | if (ret_val != 0) | ||
356 | goto removedef_return; | ||
357 | |||
358 | ret_val = netlbl_domhsh_remove_default(); | ||
359 | |||
360 | removedef_return: | ||
361 | netlbl_netlink_send_ack(info, | ||
362 | netlbl_mgmt_gnl_family.id, | ||
363 | NLBL_MGMT_C_ACK, | ||
364 | -ret_val); | ||
365 | return ret_val; | ||
366 | } | ||
367 | |||
368 | /** | ||
369 | * netlbl_mgmt_listdef - Handle a LISTDEF message | ||
370 | * @skb: the NETLINK buffer | ||
371 | * @info: the Generic NETLINK info block | ||
372 | * | ||
373 | * Description: | ||
374 | * Process a user generated LISTDEF message and dumps the default domain | ||
375 | * mapping in a form suitable for use in a kernel generated LISTDEF message. | ||
376 | * Returns zero on success, negative values on failure. | ||
377 | * | ||
378 | */ | ||
379 | static int netlbl_mgmt_listdef(struct sk_buff *skb, struct genl_info *info) | ||
380 | { | ||
381 | int ret_val = -ENOMEM; | ||
382 | struct sk_buff *ans_skb; | ||
383 | |||
384 | ans_skb = netlbl_domhsh_dump_default(NLMSG_SPACE(GENL_HDRLEN)); | ||
385 | if (ans_skb == NULL) | ||
386 | goto listdef_failure; | ||
387 | netlbl_netlink_hdr_push(ans_skb, | ||
388 | info->snd_pid, | ||
389 | 0, | ||
390 | netlbl_mgmt_gnl_family.id, | ||
391 | NLBL_MGMT_C_LISTDEF); | ||
392 | |||
393 | ret_val = netlbl_netlink_snd(ans_skb, info->snd_pid); | ||
394 | if (ret_val != 0) | ||
395 | goto listdef_failure; | ||
396 | |||
397 | return 0; | ||
398 | |||
399 | listdef_failure: | ||
400 | netlbl_netlink_send_ack(info, | ||
401 | netlbl_mgmt_gnl_family.id, | ||
402 | NLBL_MGMT_C_ACK, | ||
403 | -ret_val); | ||
404 | return ret_val; | ||
405 | } | ||
406 | |||
407 | /** | ||
408 | * netlbl_mgmt_modules - Handle a MODULES message | ||
409 | * @skb: the NETLINK buffer | ||
410 | * @info: the Generic NETLINK info block | ||
411 | * | ||
412 | * Description: | ||
413 | * Process a user generated MODULES message and respond accordingly. | ||
414 | * | ||
415 | */ | ||
416 | static int netlbl_mgmt_modules(struct sk_buff *skb, struct genl_info *info) | ||
417 | { | ||
418 | int ret_val = -ENOMEM; | ||
419 | size_t data_size; | ||
420 | u32 mod_count; | ||
421 | struct sk_buff *ans_skb = NULL; | ||
422 | |||
423 | /* unlabeled + cipsov4 */ | ||
424 | mod_count = 2; | ||
425 | |||
426 | data_size = GENL_HDRLEN + NETLBL_LEN_U32 + mod_count * NETLBL_LEN_U32; | ||
427 | ans_skb = netlbl_netlink_alloc_skb(0, data_size, GFP_KERNEL); | ||
428 | if (ans_skb == NULL) | ||
429 | goto modules_failure; | ||
430 | |||
431 | if (netlbl_netlink_hdr_put(ans_skb, | ||
432 | info->snd_pid, | ||
433 | 0, | ||
434 | netlbl_mgmt_gnl_family.id, | ||
435 | NLBL_MGMT_C_MODULES) == NULL) | ||
436 | goto modules_failure; | ||
437 | |||
438 | ret_val = nla_put_u32(ans_skb, NLA_U32, mod_count); | ||
439 | if (ret_val != 0) | ||
440 | goto modules_failure; | ||
441 | ret_val = nla_put_u32(ans_skb, NLA_U32, NETLBL_NLTYPE_UNLABELED); | ||
442 | if (ret_val != 0) | ||
443 | goto modules_failure; | ||
444 | ret_val = nla_put_u32(ans_skb, NLA_U32, NETLBL_NLTYPE_CIPSOV4); | ||
445 | if (ret_val != 0) | ||
446 | goto modules_failure; | ||
447 | |||
448 | ret_val = netlbl_netlink_snd(ans_skb, info->snd_pid); | ||
449 | if (ret_val != 0) | ||
450 | goto modules_failure; | ||
451 | |||
452 | return 0; | ||
453 | |||
454 | modules_failure: | ||
455 | kfree_skb(ans_skb); | ||
456 | netlbl_netlink_send_ack(info, | ||
457 | netlbl_mgmt_gnl_family.id, | ||
458 | NLBL_MGMT_C_ACK, | ||
459 | -ret_val); | ||
460 | return ret_val; | ||
461 | } | ||
462 | |||
463 | /** | ||
464 | * netlbl_mgmt_version - Handle a VERSION message | ||
465 | * @skb: the NETLINK buffer | ||
466 | * @info: the Generic NETLINK info block | ||
467 | * | ||
468 | * Description: | ||
469 | * Process a user generated VERSION message and respond accordingly. Returns | ||
470 | * zero on success, negative values on failure. | ||
471 | * | ||
472 | */ | ||
473 | static int netlbl_mgmt_version(struct sk_buff *skb, struct genl_info *info) | ||
474 | { | ||
475 | int ret_val = -ENOMEM; | ||
476 | struct sk_buff *ans_skb = NULL; | ||
477 | |||
478 | ans_skb = netlbl_netlink_alloc_skb(0, | ||
479 | GENL_HDRLEN + NETLBL_LEN_U32, | ||
480 | GFP_KERNEL); | ||
481 | if (ans_skb == NULL) | ||
482 | goto version_failure; | ||
483 | if (netlbl_netlink_hdr_put(ans_skb, | ||
484 | info->snd_pid, | ||
485 | 0, | ||
486 | netlbl_mgmt_gnl_family.id, | ||
487 | NLBL_MGMT_C_VERSION) == NULL) | ||
488 | goto version_failure; | ||
489 | |||
490 | ret_val = nla_put_u32(ans_skb, NLA_U32, NETLBL_PROTO_VERSION); | ||
491 | if (ret_val != 0) | ||
492 | goto version_failure; | ||
493 | |||
494 | ret_val = netlbl_netlink_snd(ans_skb, info->snd_pid); | ||
495 | if (ret_val != 0) | ||
496 | goto version_failure; | ||
497 | |||
498 | return 0; | ||
499 | |||
500 | version_failure: | ||
501 | kfree_skb(ans_skb); | ||
502 | netlbl_netlink_send_ack(info, | ||
503 | netlbl_mgmt_gnl_family.id, | ||
504 | NLBL_MGMT_C_ACK, | ||
505 | -ret_val); | ||
506 | return ret_val; | ||
507 | } | ||
508 | |||
509 | |||
510 | /* | ||
511 | * NetLabel Generic NETLINK Command Definitions | ||
512 | */ | ||
513 | |||
514 | static struct genl_ops netlbl_mgmt_genl_c_add = { | ||
515 | .cmd = NLBL_MGMT_C_ADD, | ||
516 | .flags = 0, | ||
517 | .doit = netlbl_mgmt_add, | ||
518 | .dumpit = NULL, | ||
519 | }; | ||
520 | |||
521 | static struct genl_ops netlbl_mgmt_genl_c_remove = { | ||
522 | .cmd = NLBL_MGMT_C_REMOVE, | ||
523 | .flags = 0, | ||
524 | .doit = netlbl_mgmt_remove, | ||
525 | .dumpit = NULL, | ||
526 | }; | ||
527 | |||
528 | static struct genl_ops netlbl_mgmt_genl_c_list = { | ||
529 | .cmd = NLBL_MGMT_C_LIST, | ||
530 | .flags = 0, | ||
531 | .doit = netlbl_mgmt_list, | ||
532 | .dumpit = NULL, | ||
533 | }; | ||
534 | |||
535 | static struct genl_ops netlbl_mgmt_genl_c_adddef = { | ||
536 | .cmd = NLBL_MGMT_C_ADDDEF, | ||
537 | .flags = 0, | ||
538 | .doit = netlbl_mgmt_adddef, | ||
539 | .dumpit = NULL, | ||
540 | }; | ||
541 | |||
542 | static struct genl_ops netlbl_mgmt_genl_c_removedef = { | ||
543 | .cmd = NLBL_MGMT_C_REMOVEDEF, | ||
544 | .flags = 0, | ||
545 | .doit = netlbl_mgmt_removedef, | ||
546 | .dumpit = NULL, | ||
547 | }; | ||
548 | |||
549 | static struct genl_ops netlbl_mgmt_genl_c_listdef = { | ||
550 | .cmd = NLBL_MGMT_C_LISTDEF, | ||
551 | .flags = 0, | ||
552 | .doit = netlbl_mgmt_listdef, | ||
553 | .dumpit = NULL, | ||
554 | }; | ||
555 | |||
556 | static struct genl_ops netlbl_mgmt_genl_c_modules = { | ||
557 | .cmd = NLBL_MGMT_C_MODULES, | ||
558 | .flags = 0, | ||
559 | .doit = netlbl_mgmt_modules, | ||
560 | .dumpit = NULL, | ||
561 | }; | ||
562 | |||
563 | static struct genl_ops netlbl_mgmt_genl_c_version = { | ||
564 | .cmd = NLBL_MGMT_C_VERSION, | ||
565 | .flags = 0, | ||
566 | .doit = netlbl_mgmt_version, | ||
567 | .dumpit = NULL, | ||
568 | }; | ||
569 | |||
570 | /* | ||
571 | * NetLabel Generic NETLINK Protocol Functions | ||
572 | */ | ||
573 | |||
574 | /** | ||
575 | * netlbl_mgmt_genl_init - Register the NetLabel management component | ||
576 | * | ||
577 | * Description: | ||
578 | * Register the NetLabel management component with the Generic NETLINK | ||
579 | * mechanism. Returns zero on success, negative values on failure. | ||
580 | * | ||
581 | */ | ||
582 | int netlbl_mgmt_genl_init(void) | ||
583 | { | ||
584 | int ret_val; | ||
585 | |||
586 | ret_val = genl_register_family(&netlbl_mgmt_gnl_family); | ||
587 | if (ret_val != 0) | ||
588 | return ret_val; | ||
589 | |||
590 | ret_val = genl_register_ops(&netlbl_mgmt_gnl_family, | ||
591 | &netlbl_mgmt_genl_c_add); | ||
592 | if (ret_val != 0) | ||
593 | return ret_val; | ||
594 | ret_val = genl_register_ops(&netlbl_mgmt_gnl_family, | ||
595 | &netlbl_mgmt_genl_c_remove); | ||
596 | if (ret_val != 0) | ||
597 | return ret_val; | ||
598 | ret_val = genl_register_ops(&netlbl_mgmt_gnl_family, | ||
599 | &netlbl_mgmt_genl_c_list); | ||
600 | if (ret_val != 0) | ||
601 | return ret_val; | ||
602 | ret_val = genl_register_ops(&netlbl_mgmt_gnl_family, | ||
603 | &netlbl_mgmt_genl_c_adddef); | ||
604 | if (ret_val != 0) | ||
605 | return ret_val; | ||
606 | ret_val = genl_register_ops(&netlbl_mgmt_gnl_family, | ||
607 | &netlbl_mgmt_genl_c_removedef); | ||
608 | if (ret_val != 0) | ||
609 | return ret_val; | ||
610 | ret_val = genl_register_ops(&netlbl_mgmt_gnl_family, | ||
611 | &netlbl_mgmt_genl_c_listdef); | ||
612 | if (ret_val != 0) | ||
613 | return ret_val; | ||
614 | ret_val = genl_register_ops(&netlbl_mgmt_gnl_family, | ||
615 | &netlbl_mgmt_genl_c_modules); | ||
616 | if (ret_val != 0) | ||
617 | return ret_val; | ||
618 | ret_val = genl_register_ops(&netlbl_mgmt_gnl_family, | ||
619 | &netlbl_mgmt_genl_c_version); | ||
620 | if (ret_val != 0) | ||
621 | return ret_val; | ||
622 | |||
623 | return 0; | ||
624 | } | ||
diff --git a/net/netlabel/netlabel_mgmt.h b/net/netlabel/netlabel_mgmt.h new file mode 100644 index 000000000000..fd6c6acbfa08 --- /dev/null +++ b/net/netlabel/netlabel_mgmt.h | |||
@@ -0,0 +1,246 @@ | |||
1 | /* | ||
2 | * NetLabel Management Support | ||
3 | * | ||
4 | * This file defines the management functions for the NetLabel system. The | ||
5 | * NetLabel system manages static and dynamic label mappings for network | ||
6 | * protocols such as CIPSO and RIPSO. | ||
7 | * | ||
8 | * Author: Paul Moore <paul.moore@hp.com> | ||
9 | * | ||
10 | */ | ||
11 | |||
12 | /* | ||
13 | * (c) Copyright Hewlett-Packard Development Company, L.P., 2006 | ||
14 | * | ||
15 | * This program is free software; you can redistribute it and/or modify | ||
16 | * it under the terms of the GNU General Public License as published by | ||
17 | * the Free Software Foundation; either version 2 of the License, or | ||
18 | * (at your option) any later version. | ||
19 | * | ||
20 | * This program is distributed in the hope that it will be useful, | ||
21 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
22 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See | ||
23 | * the GNU General Public License for more details. | ||
24 | * | ||
25 | * You should have received a copy of the GNU General Public License | ||
26 | * along with this program; if not, write to the Free Software | ||
27 | * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA | ||
28 | * | ||
29 | */ | ||
30 | |||
31 | #ifndef _NETLABEL_MGMT_H | ||
32 | #define _NETLABEL_MGMT_H | ||
33 | |||
34 | #include <net/netlabel.h> | ||
35 | |||
36 | /* | ||
37 | * The following NetLabel payloads are supported by the management interface, | ||
38 | * all of which are preceeded by the nlmsghdr struct. | ||
39 | * | ||
40 | * o ACK: | ||
41 | * Sent by the kernel in response to an applications message, applications | ||
42 | * should never send this message. | ||
43 | * | ||
44 | * +----------------------+-----------------------+ | ||
45 | * | seq number (32 bits) | return code (32 bits) | | ||
46 | * +----------------------+-----------------------+ | ||
47 | * | ||
48 | * seq number: the sequence number of the original message, taken from the | ||
49 | * nlmsghdr structure | ||
50 | * return code: return value, based on errno values | ||
51 | * | ||
52 | * o ADD: | ||
53 | * Sent by an application to add a domain mapping to the NetLabel system. | ||
54 | * The kernel should respond with an ACK. | ||
55 | * | ||
56 | * +-------------------+ | ||
57 | * | domains (32 bits) | ... | ||
58 | * +-------------------+ | ||
59 | * | ||
60 | * domains: the number of domains in the message | ||
61 | * | ||
62 | * +--------------------------+-------------------------+ | ||
63 | * | domain string (variable) | protocol type (32 bits) | ... | ||
64 | * +--------------------------+-------------------------+ | ||
65 | * | ||
66 | * +-------------- ---- --- -- - | ||
67 | * | mapping data ... repeated | ||
68 | * +-------------- ---- --- -- - | ||
69 | * | ||
70 | * domain string: the domain string, NULL terminated | ||
71 | * protocol type: the protocol type (defined by NETLBL_NLTYPE_*) | ||
72 | * mapping data: specific to the map type (see below) | ||
73 | * | ||
74 | * NETLBL_NLTYPE_UNLABELED | ||
75 | * | ||
76 | * No mapping data for this protocol type. | ||
77 | * | ||
78 | * NETLBL_NLTYPE_CIPSOV4 | ||
79 | * | ||
80 | * +---------------+ | ||
81 | * | doi (32 bits) | | ||
82 | * +---------------+ | ||
83 | * | ||
84 | * doi: the CIPSO DOI value | ||
85 | * | ||
86 | * o REMOVE: | ||
87 | * Sent by an application to remove a domain mapping from the NetLabel | ||
88 | * system. The kernel should ACK this message. | ||
89 | * | ||
90 | * +-------------------+ | ||
91 | * | domains (32 bits) | ... | ||
92 | * +-------------------+ | ||
93 | * | ||
94 | * domains: the number of domains in the message | ||
95 | * | ||
96 | * +--------------------------+ | ||
97 | * | domain string (variable) | ... | ||
98 | * +--------------------------+ | ||
99 | * | ||
100 | * domain string: the domain string, NULL terminated | ||
101 | * | ||
102 | * o LIST: | ||
103 | * This message can be sent either from an application or by the kernel in | ||
104 | * response to an application generated LIST message. When sent by an | ||
105 | * application there is no payload. The kernel should respond to a LIST | ||
106 | * message either with a LIST message on success or an ACK message on | ||
107 | * failure. | ||
108 | * | ||
109 | * +-------------------+ | ||
110 | * | domains (32 bits) | ... | ||
111 | * +-------------------+ | ||
112 | * | ||
113 | * domains: the number of domains in the message | ||
114 | * | ||
115 | * +--------------------------+ | ||
116 | * | domain string (variable) | ... | ||
117 | * +--------------------------+ | ||
118 | * | ||
119 | * +-------------------------+-------------- ---- --- -- - | ||
120 | * | protocol type (32 bits) | mapping data ... repeated | ||
121 | * +-------------------------+-------------- ---- --- -- - | ||
122 | * | ||
123 | * domain string: the domain string, NULL terminated | ||
124 | * protocol type: the protocol type (defined by NETLBL_NLTYPE_*) | ||
125 | * mapping data: specific to the map type (see below) | ||
126 | * | ||
127 | * NETLBL_NLTYPE_UNLABELED | ||
128 | * | ||
129 | * No mapping data for this protocol type. | ||
130 | * | ||
131 | * NETLBL_NLTYPE_CIPSOV4 | ||
132 | * | ||
133 | * +----------------+---------------+ | ||
134 | * | type (32 bits) | doi (32 bits) | | ||
135 | * +----------------+---------------+ | ||
136 | * | ||
137 | * type: the CIPSO mapping table type (defined in the cipso_ipv4.h header | ||
138 | * as CIPSO_V4_MAP_*) | ||
139 | * doi: the CIPSO DOI value | ||
140 | * | ||
141 | * o ADDDEF: | ||
142 | * Sent by an application to set the default domain mapping for the NetLabel | ||
143 | * system. The kernel should respond with an ACK. | ||
144 | * | ||
145 | * +-------------------------+-------------- ---- --- -- - | ||
146 | * | protocol type (32 bits) | mapping data ... repeated | ||
147 | * +-------------------------+-------------- ---- --- -- - | ||
148 | * | ||
149 | * protocol type: the protocol type (defined by NETLBL_NLTYPE_*) | ||
150 | * mapping data: specific to the map type (see below) | ||
151 | * | ||
152 | * NETLBL_NLTYPE_UNLABELED | ||
153 | * | ||
154 | * No mapping data for this protocol type. | ||
155 | * | ||
156 | * NETLBL_NLTYPE_CIPSOV4 | ||
157 | * | ||
158 | * +---------------+ | ||
159 | * | doi (32 bits) | | ||
160 | * +---------------+ | ||
161 | * | ||
162 | * doi: the CIPSO DOI value | ||
163 | * | ||
164 | * o REMOVEDEF: | ||
165 | * Sent by an application to remove the default domain mapping from the | ||
166 | * NetLabel system, there is no payload. The kernel should ACK this message. | ||
167 | * | ||
168 | * o LISTDEF: | ||
169 | * This message can be sent either from an application or by the kernel in | ||
170 | * response to an application generated LISTDEF message. When sent by an | ||
171 | * application there is no payload. The kernel should respond to a | ||
172 | * LISTDEF message either with a LISTDEF message on success or an ACK message | ||
173 | * on failure. | ||
174 | * | ||
175 | * +-------------------------+-------------- ---- --- -- - | ||
176 | * | protocol type (32 bits) | mapping data ... repeated | ||
177 | * +-------------------------+-------------- ---- --- -- - | ||
178 | * | ||
179 | * protocol type: the protocol type (defined by NETLBL_NLTYPE_*) | ||
180 | * mapping data: specific to the map type (see below) | ||
181 | * | ||
182 | * NETLBL_NLTYPE_UNLABELED | ||
183 | * | ||
184 | * No mapping data for this protocol type. | ||
185 | * | ||
186 | * NETLBL_NLTYPE_CIPSOV4 | ||
187 | * | ||
188 | * +----------------+---------------+ | ||
189 | * | type (32 bits) | doi (32 bits) | | ||
190 | * +----------------+---------------+ | ||
191 | * | ||
192 | * type: the CIPSO mapping table type (defined in the cipso_ipv4.h header | ||
193 | * as CIPSO_V4_MAP_*) | ||
194 | * doi: the CIPSO DOI value | ||
195 | * | ||
196 | * o MODULES: | ||
197 | * Sent by an application to request a list of configured NetLabel modules | ||
198 | * in the kernel. When sent by an application there is no payload. | ||
199 | * | ||
200 | * +-------------------+ | ||
201 | * | modules (32 bits) | ... | ||
202 | * +-------------------+ | ||
203 | * | ||
204 | * modules: the number of modules in the message, if this is an application | ||
205 | * generated message and the value is zero then return a list of | ||
206 | * the configured modules | ||
207 | * | ||
208 | * +------------------+ | ||
209 | * | module (32 bits) | ... repeated | ||
210 | * +------------------+ | ||
211 | * | ||
212 | * module: the module number as defined by NETLBL_NLTYPE_* | ||
213 | * | ||
214 | * o VERSION: | ||
215 | * Sent by an application to request the NetLabel version string. When sent | ||
216 | * by an application there is no payload. This message type is also used by | ||
217 | * the kernel to respond to an VERSION request. | ||
218 | * | ||
219 | * +-------------------+ | ||
220 | * | version (32 bits) | | ||
221 | * +-------------------+ | ||
222 | * | ||
223 | * version: the protocol version number | ||
224 | * | ||
225 | */ | ||
226 | |||
227 | /* NetLabel Management commands */ | ||
228 | enum { | ||
229 | NLBL_MGMT_C_UNSPEC, | ||
230 | NLBL_MGMT_C_ACK, | ||
231 | NLBL_MGMT_C_ADD, | ||
232 | NLBL_MGMT_C_REMOVE, | ||
233 | NLBL_MGMT_C_LIST, | ||
234 | NLBL_MGMT_C_ADDDEF, | ||
235 | NLBL_MGMT_C_REMOVEDEF, | ||
236 | NLBL_MGMT_C_LISTDEF, | ||
237 | NLBL_MGMT_C_MODULES, | ||
238 | NLBL_MGMT_C_VERSION, | ||
239 | __NLBL_MGMT_C_MAX, | ||
240 | }; | ||
241 | #define NLBL_MGMT_C_MAX (__NLBL_MGMT_C_MAX - 1) | ||
242 | |||
243 | /* NetLabel protocol functions */ | ||
244 | int netlbl_mgmt_genl_init(void); | ||
245 | |||
246 | #endif | ||
diff --git a/net/netlabel/netlabel_unlabeled.c b/net/netlabel/netlabel_unlabeled.c new file mode 100644 index 000000000000..785f4960e0d3 --- /dev/null +++ b/net/netlabel/netlabel_unlabeled.c | |||
@@ -0,0 +1,253 @@ | |||
1 | /* | ||
2 | * NetLabel Unlabeled Support | ||
3 | * | ||
4 | * This file defines functions for dealing with unlabeled packets for the | ||
5 | * NetLabel system. The NetLabel system manages static and dynamic label | ||
6 | * mappings for network protocols such as CIPSO and RIPSO. | ||
7 | * | ||
8 | * Author: Paul Moore <paul.moore@hp.com> | ||
9 | * | ||
10 | */ | ||
11 | |||
12 | /* | ||
13 | * (c) Copyright Hewlett-Packard Development Company, L.P., 2006 | ||
14 | * | ||
15 | * This program is free software; you can redistribute it and/or modify | ||
16 | * it under the terms of the GNU General Public License as published by | ||
17 | * the Free Software Foundation; either version 2 of the License, or | ||
18 | * (at your option) any later version. | ||
19 | * | ||
20 | * This program is distributed in the hope that it will be useful, | ||
21 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
22 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See | ||
23 | * the GNU General Public License for more details. | ||
24 | * | ||
25 | * You should have received a copy of the GNU General Public License | ||
26 | * along with this program; if not, write to the Free Software | ||
27 | * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA | ||
28 | * | ||
29 | */ | ||
30 | |||
31 | #include <linux/types.h> | ||
32 | #include <linux/rcupdate.h> | ||
33 | #include <linux/list.h> | ||
34 | #include <linux/spinlock.h> | ||
35 | #include <linux/socket.h> | ||
36 | #include <linux/string.h> | ||
37 | #include <linux/skbuff.h> | ||
38 | #include <net/sock.h> | ||
39 | #include <net/netlink.h> | ||
40 | #include <net/genetlink.h> | ||
41 | |||
42 | #include <net/netlabel.h> | ||
43 | #include <asm/bug.h> | ||
44 | |||
45 | #include "netlabel_user.h" | ||
46 | #include "netlabel_domainhash.h" | ||
47 | #include "netlabel_unlabeled.h" | ||
48 | |||
49 | /* Accept unlabeled packets flag */ | ||
50 | static atomic_t netlabel_unlabel_accept_flg = ATOMIC_INIT(0); | ||
51 | |||
52 | /* NetLabel Generic NETLINK CIPSOv4 family */ | ||
53 | static struct genl_family netlbl_unlabel_gnl_family = { | ||
54 | .id = GENL_ID_GENERATE, | ||
55 | .hdrsize = 0, | ||
56 | .name = NETLBL_NLTYPE_UNLABELED_NAME, | ||
57 | .version = NETLBL_PROTO_VERSION, | ||
58 | .maxattr = 0, | ||
59 | }; | ||
60 | |||
61 | |||
62 | /* | ||
63 | * NetLabel Command Handlers | ||
64 | */ | ||
65 | |||
66 | /** | ||
67 | * netlbl_unlabel_accept - Handle an ACCEPT message | ||
68 | * @skb: the NETLINK buffer | ||
69 | * @info: the Generic NETLINK info block | ||
70 | * | ||
71 | * Description: | ||
72 | * Process a user generated ACCEPT message and set the accept flag accordingly. | ||
73 | * Returns zero on success, negative values on failure. | ||
74 | * | ||
75 | */ | ||
76 | static int netlbl_unlabel_accept(struct sk_buff *skb, struct genl_info *info) | ||
77 | { | ||
78 | int ret_val; | ||
79 | struct nlattr *data = netlbl_netlink_payload_data(skb); | ||
80 | u32 value; | ||
81 | |||
82 | ret_val = netlbl_netlink_cap_check(skb, CAP_NET_ADMIN); | ||
83 | if (ret_val != 0) | ||
84 | return ret_val; | ||
85 | |||
86 | if (netlbl_netlink_payload_len(skb) == NETLBL_LEN_U32) { | ||
87 | value = nla_get_u32(data); | ||
88 | if (value == 1 || value == 0) { | ||
89 | atomic_set(&netlabel_unlabel_accept_flg, value); | ||
90 | netlbl_netlink_send_ack(info, | ||
91 | netlbl_unlabel_gnl_family.id, | ||
92 | NLBL_UNLABEL_C_ACK, | ||
93 | NETLBL_E_OK); | ||
94 | return 0; | ||
95 | } | ||
96 | } | ||
97 | |||
98 | netlbl_netlink_send_ack(info, | ||
99 | netlbl_unlabel_gnl_family.id, | ||
100 | NLBL_UNLABEL_C_ACK, | ||
101 | EINVAL); | ||
102 | return -EINVAL; | ||
103 | } | ||
104 | |||
105 | /** | ||
106 | * netlbl_unlabel_list - Handle a LIST message | ||
107 | * @skb: the NETLINK buffer | ||
108 | * @info: the Generic NETLINK info block | ||
109 | * | ||
110 | * Description: | ||
111 | * Process a user generated LIST message and respond with the current status. | ||
112 | * Returns zero on success, negative values on failure. | ||
113 | * | ||
114 | */ | ||
115 | static int netlbl_unlabel_list(struct sk_buff *skb, struct genl_info *info) | ||
116 | { | ||
117 | int ret_val = -ENOMEM; | ||
118 | struct sk_buff *ans_skb; | ||
119 | |||
120 | ans_skb = netlbl_netlink_alloc_skb(0, | ||
121 | GENL_HDRLEN + NETLBL_LEN_U32, | ||
122 | GFP_KERNEL); | ||
123 | if (ans_skb == NULL) | ||
124 | goto list_failure; | ||
125 | |||
126 | if (netlbl_netlink_hdr_put(ans_skb, | ||
127 | info->snd_pid, | ||
128 | 0, | ||
129 | netlbl_unlabel_gnl_family.id, | ||
130 | NLBL_UNLABEL_C_LIST) == NULL) | ||
131 | goto list_failure; | ||
132 | |||
133 | ret_val = nla_put_u32(ans_skb, | ||
134 | NLA_U32, | ||
135 | atomic_read(&netlabel_unlabel_accept_flg)); | ||
136 | if (ret_val != 0) | ||
137 | goto list_failure; | ||
138 | |||
139 | ret_val = netlbl_netlink_snd(ans_skb, info->snd_pid); | ||
140 | if (ret_val != 0) | ||
141 | goto list_failure; | ||
142 | |||
143 | return 0; | ||
144 | |||
145 | list_failure: | ||
146 | netlbl_netlink_send_ack(info, | ||
147 | netlbl_unlabel_gnl_family.id, | ||
148 | NLBL_UNLABEL_C_ACK, | ||
149 | -ret_val); | ||
150 | return ret_val; | ||
151 | } | ||
152 | |||
153 | |||
154 | /* | ||
155 | * NetLabel Generic NETLINK Command Definitions | ||
156 | */ | ||
157 | |||
158 | static struct genl_ops netlbl_unlabel_genl_c_accept = { | ||
159 | .cmd = NLBL_UNLABEL_C_ACCEPT, | ||
160 | .flags = 0, | ||
161 | .doit = netlbl_unlabel_accept, | ||
162 | .dumpit = NULL, | ||
163 | }; | ||
164 | |||
165 | static struct genl_ops netlbl_unlabel_genl_c_list = { | ||
166 | .cmd = NLBL_UNLABEL_C_LIST, | ||
167 | .flags = 0, | ||
168 | .doit = netlbl_unlabel_list, | ||
169 | .dumpit = NULL, | ||
170 | }; | ||
171 | |||
172 | |||
173 | /* | ||
174 | * NetLabel Generic NETLINK Protocol Functions | ||
175 | */ | ||
176 | |||
177 | /** | ||
178 | * netlbl_unlabel_genl_init - Register the Unlabeled NetLabel component | ||
179 | * | ||
180 | * Description: | ||
181 | * Register the unlabeled packet NetLabel component with the Generic NETLINK | ||
182 | * mechanism. Returns zero on success, negative values on failure. | ||
183 | * | ||
184 | */ | ||
185 | int netlbl_unlabel_genl_init(void) | ||
186 | { | ||
187 | int ret_val; | ||
188 | |||
189 | ret_val = genl_register_family(&netlbl_unlabel_gnl_family); | ||
190 | if (ret_val != 0) | ||
191 | return ret_val; | ||
192 | |||
193 | ret_val = genl_register_ops(&netlbl_unlabel_gnl_family, | ||
194 | &netlbl_unlabel_genl_c_accept); | ||
195 | if (ret_val != 0) | ||
196 | return ret_val; | ||
197 | |||
198 | ret_val = genl_register_ops(&netlbl_unlabel_gnl_family, | ||
199 | &netlbl_unlabel_genl_c_list); | ||
200 | if (ret_val != 0) | ||
201 | return ret_val; | ||
202 | |||
203 | return 0; | ||
204 | } | ||
205 | |||
206 | /* | ||
207 | * NetLabel KAPI Hooks | ||
208 | */ | ||
209 | |||
210 | /** | ||
211 | * netlbl_unlabel_getattr - Get the security attributes for an unlabled packet | ||
212 | * @secattr: the security attributes | ||
213 | * | ||
214 | * Description: | ||
215 | * Determine the security attributes, if any, for an unlabled packet and return | ||
216 | * them in @secattr. Returns zero on success and negative values on failure. | ||
217 | * | ||
218 | */ | ||
219 | int netlbl_unlabel_getattr(struct netlbl_lsm_secattr *secattr) | ||
220 | { | ||
221 | if (atomic_read(&netlabel_unlabel_accept_flg) == 1) { | ||
222 | memset(secattr, 0, sizeof(*secattr)); | ||
223 | return 0; | ||
224 | } | ||
225 | |||
226 | return -ENOMSG; | ||
227 | } | ||
228 | |||
229 | /** | ||
230 | * netlbl_unlabel_defconf - Set the default config to allow unlabeled packets | ||
231 | * | ||
232 | * Description: | ||
233 | * Set the default NetLabel configuration to allow incoming unlabeled packets | ||
234 | * and to send unlabeled network traffic by default. | ||
235 | * | ||
236 | */ | ||
237 | int netlbl_unlabel_defconf(void) | ||
238 | { | ||
239 | int ret_val; | ||
240 | struct netlbl_dom_map *entry; | ||
241 | |||
242 | entry = kzalloc(sizeof(*entry), GFP_KERNEL); | ||
243 | if (entry == NULL) | ||
244 | return -ENOMEM; | ||
245 | entry->type = NETLBL_NLTYPE_UNLABELED; | ||
246 | ret_val = netlbl_domhsh_add_default(entry); | ||
247 | if (ret_val != 0) | ||
248 | return ret_val; | ||
249 | |||
250 | atomic_set(&netlabel_unlabel_accept_flg, 1); | ||
251 | |||
252 | return 0; | ||
253 | } | ||
diff --git a/net/netlabel/netlabel_unlabeled.h b/net/netlabel/netlabel_unlabeled.h new file mode 100644 index 000000000000..f300e54e14b6 --- /dev/null +++ b/net/netlabel/netlabel_unlabeled.h | |||
@@ -0,0 +1,98 @@ | |||
1 | /* | ||
2 | * NetLabel Unlabeled Support | ||
3 | * | ||
4 | * This file defines functions for dealing with unlabeled packets for the | ||
5 | * NetLabel system. The NetLabel system manages static and dynamic label | ||
6 | * mappings for network protocols such as CIPSO and RIPSO. | ||
7 | * | ||
8 | * Author: Paul Moore <paul.moore@hp.com> | ||
9 | * | ||
10 | */ | ||
11 | |||
12 | /* | ||
13 | * (c) Copyright Hewlett-Packard Development Company, L.P., 2006 | ||
14 | * | ||
15 | * This program is free software; you can redistribute it and/or modify | ||
16 | * it under the terms of the GNU General Public License as published by | ||
17 | * the Free Software Foundation; either version 2 of the License, or | ||
18 | * (at your option) any later version. | ||
19 | * | ||
20 | * This program is distributed in the hope that it will be useful, | ||
21 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
22 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See | ||
23 | * the GNU General Public License for more details. | ||
24 | * | ||
25 | * You should have received a copy of the GNU General Public License | ||
26 | * along with this program; if not, write to the Free Software | ||
27 | * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA | ||
28 | * | ||
29 | */ | ||
30 | |||
31 | #ifndef _NETLABEL_UNLABELED_H | ||
32 | #define _NETLABEL_UNLABELED_H | ||
33 | |||
34 | #include <net/netlabel.h> | ||
35 | |||
36 | /* | ||
37 | * The following NetLabel payloads are supported by the Unlabeled subsystem. | ||
38 | * | ||
39 | * o ACK: | ||
40 | * Sent by the kernel in response to an applications message, applications | ||
41 | * should never send this message. | ||
42 | * | ||
43 | * +----------------------+-----------------------+ | ||
44 | * | seq number (32 bits) | return code (32 bits) | | ||
45 | * +----------------------+-----------------------+ | ||
46 | * | ||
47 | * seq number: the sequence number of the original message, taken from the | ||
48 | * nlmsghdr structure | ||
49 | * return code: return value, based on errno values | ||
50 | * | ||
51 | * o ACCEPT | ||
52 | * This message is sent from an application to specify if the kernel should | ||
53 | * allow unlabled packets to pass if they do not match any of the static | ||
54 | * mappings defined in the unlabeled module. | ||
55 | * | ||
56 | * +-----------------+ | ||
57 | * | allow (32 bits) | | ||
58 | * +-----------------+ | ||
59 | * | ||
60 | * allow: if true (1) then allow the packets to pass, if false (0) then | ||
61 | * reject the packets | ||
62 | * | ||
63 | * o LIST | ||
64 | * This message can be sent either from an application or by the kernel in | ||
65 | * response to an application generated LIST message. When sent by an | ||
66 | * application there is no payload. The kernel should respond to a LIST | ||
67 | * message either with a LIST message on success or an ACK message on | ||
68 | * failure. | ||
69 | * | ||
70 | * +-----------------------+ | ||
71 | * | accept flag (32 bits) | | ||
72 | * +-----------------------+ | ||
73 | * | ||
74 | * accept flag: if true (1) then unlabeled packets are allowed to pass, | ||
75 | * if false (0) then unlabeled packets are rejected | ||
76 | * | ||
77 | */ | ||
78 | |||
79 | /* NetLabel Unlabeled commands */ | ||
80 | enum { | ||
81 | NLBL_UNLABEL_C_UNSPEC, | ||
82 | NLBL_UNLABEL_C_ACK, | ||
83 | NLBL_UNLABEL_C_ACCEPT, | ||
84 | NLBL_UNLABEL_C_LIST, | ||
85 | __NLBL_UNLABEL_C_MAX, | ||
86 | }; | ||
87 | #define NLBL_UNLABEL_C_MAX (__NLBL_UNLABEL_C_MAX - 1) | ||
88 | |||
89 | /* NetLabel protocol functions */ | ||
90 | int netlbl_unlabel_genl_init(void); | ||
91 | |||
92 | /* Process Unlabeled incoming network packets */ | ||
93 | int netlbl_unlabel_getattr(struct netlbl_lsm_secattr *secattr); | ||
94 | |||
95 | /* Set the default configuration to allow Unlabeled packets */ | ||
96 | int netlbl_unlabel_defconf(void); | ||
97 | |||
98 | #endif | ||
diff --git a/net/netlabel/netlabel_user.c b/net/netlabel/netlabel_user.c new file mode 100644 index 000000000000..73cbe66e42ff --- /dev/null +++ b/net/netlabel/netlabel_user.c | |||
@@ -0,0 +1,158 @@ | |||
1 | /* | ||
2 | * NetLabel NETLINK Interface | ||
3 | * | ||
4 | * This file defines the NETLINK interface for the NetLabel system. The | ||
5 | * NetLabel system manages static and dynamic label mappings for network | ||
6 | * protocols such as CIPSO and RIPSO. | ||
7 | * | ||
8 | * Author: Paul Moore <paul.moore@hp.com> | ||
9 | * | ||
10 | */ | ||
11 | |||
12 | /* | ||
13 | * (c) Copyright Hewlett-Packard Development Company, L.P., 2006 | ||
14 | * | ||
15 | * This program is free software; you can redistribute it and/or modify | ||
16 | * it under the terms of the GNU General Public License as published by | ||
17 | * the Free Software Foundation; either version 2 of the License, or | ||
18 | * (at your option) any later version. | ||
19 | * | ||
20 | * This program is distributed in the hope that it will be useful, | ||
21 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
22 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See | ||
23 | * the GNU General Public License for more details. | ||
24 | * | ||
25 | * You should have received a copy of the GNU General Public License | ||
26 | * along with this program; if not, write to the Free Software | ||
27 | * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA | ||
28 | * | ||
29 | */ | ||
30 | |||
31 | #include <linux/init.h> | ||
32 | #include <linux/types.h> | ||
33 | #include <linux/list.h> | ||
34 | #include <linux/socket.h> | ||
35 | #include <net/sock.h> | ||
36 | #include <net/netlink.h> | ||
37 | #include <net/genetlink.h> | ||
38 | #include <net/netlabel.h> | ||
39 | #include <asm/bug.h> | ||
40 | |||
41 | #include "netlabel_mgmt.h" | ||
42 | #include "netlabel_unlabeled.h" | ||
43 | #include "netlabel_cipso_v4.h" | ||
44 | #include "netlabel_user.h" | ||
45 | |||
46 | /* | ||
47 | * NetLabel NETLINK Setup Functions | ||
48 | */ | ||
49 | |||
50 | /** | ||
51 | * netlbl_netlink_init - Initialize the NETLINK communication channel | ||
52 | * | ||
53 | * Description: | ||
54 | * Call out to the NetLabel components so they can register their families and | ||
55 | * commands with the Generic NETLINK mechanism. Returns zero on success and | ||
56 | * non-zero on failure. | ||
57 | * | ||
58 | */ | ||
59 | int netlbl_netlink_init(void) | ||
60 | { | ||
61 | int ret_val; | ||
62 | |||
63 | ret_val = netlbl_mgmt_genl_init(); | ||
64 | if (ret_val != 0) | ||
65 | return ret_val; | ||
66 | |||
67 | ret_val = netlbl_cipsov4_genl_init(); | ||
68 | if (ret_val != 0) | ||
69 | return ret_val; | ||
70 | |||
71 | ret_val = netlbl_unlabel_genl_init(); | ||
72 | if (ret_val != 0) | ||
73 | return ret_val; | ||
74 | |||
75 | return 0; | ||
76 | } | ||
77 | |||
78 | /* | ||
79 | * NetLabel Common Protocol Functions | ||
80 | */ | ||
81 | |||
82 | /** | ||
83 | * netlbl_netlink_send_ack - Send an ACK message | ||
84 | * @info: the generic NETLINK information | ||
85 | * @genl_family: the generic NETLINK family ID value | ||
86 | * @ack_cmd: the generic NETLINK family ACK command value | ||
87 | * @ret_code: return code to use | ||
88 | * | ||
89 | * Description: | ||
90 | * This function sends an ACK message to the sender of the NETLINK message | ||
91 | * specified by @info. | ||
92 | * | ||
93 | */ | ||
94 | void netlbl_netlink_send_ack(const struct genl_info *info, | ||
95 | u32 genl_family, | ||
96 | u8 ack_cmd, | ||
97 | u32 ret_code) | ||
98 | { | ||
99 | size_t data_size; | ||
100 | struct sk_buff *skb; | ||
101 | |||
102 | data_size = GENL_HDRLEN + 2 * NETLBL_LEN_U32; | ||
103 | skb = netlbl_netlink_alloc_skb(0, data_size, GFP_KERNEL); | ||
104 | if (skb == NULL) | ||
105 | return; | ||
106 | |||
107 | if (netlbl_netlink_hdr_put(skb, | ||
108 | info->snd_pid, | ||
109 | 0, | ||
110 | genl_family, | ||
111 | ack_cmd) == NULL) | ||
112 | goto send_ack_failure; | ||
113 | |||
114 | if (nla_put_u32(skb, NLA_U32, info->snd_seq) != 0) | ||
115 | goto send_ack_failure; | ||
116 | if (nla_put_u32(skb, NLA_U32, ret_code) != 0) | ||
117 | goto send_ack_failure; | ||
118 | |||
119 | netlbl_netlink_snd(skb, info->snd_pid); | ||
120 | return; | ||
121 | |||
122 | send_ack_failure: | ||
123 | kfree_skb(skb); | ||
124 | } | ||
125 | |||
126 | /* | ||
127 | * NETLINK I/O Functions | ||
128 | */ | ||
129 | |||
130 | /** | ||
131 | * netlbl_netlink_snd - Send a NetLabel message | ||
132 | * @skb: NetLabel message | ||
133 | * @pid: destination PID | ||
134 | * | ||
135 | * Description: | ||
136 | * Sends a unicast NetLabel message over the NETLINK socket. | ||
137 | * | ||
138 | */ | ||
139 | int netlbl_netlink_snd(struct sk_buff *skb, u32 pid) | ||
140 | { | ||
141 | return genlmsg_unicast(skb, pid); | ||
142 | } | ||
143 | |||
144 | /** | ||
145 | * netlbl_netlink_snd - Send a NetLabel message | ||
146 | * @skb: NetLabel message | ||
147 | * @pid: sending PID | ||
148 | * @group: multicast group id | ||
149 | * | ||
150 | * Description: | ||
151 | * Sends a multicast NetLabel message over the NETLINK socket to all members | ||
152 | * of @group except @pid. | ||
153 | * | ||
154 | */ | ||
155 | int netlbl_netlink_snd_multicast(struct sk_buff *skb, u32 pid, u32 group) | ||
156 | { | ||
157 | return genlmsg_multicast(skb, pid, group, GFP_KERNEL); | ||
158 | } | ||
diff --git a/net/netlabel/netlabel_user.h b/net/netlabel/netlabel_user.h new file mode 100644 index 000000000000..385a6c7488c6 --- /dev/null +++ b/net/netlabel/netlabel_user.h | |||
@@ -0,0 +1,215 @@ | |||
1 | /* | ||
2 | * NetLabel NETLINK Interface | ||
3 | * | ||
4 | * This file defines the NETLINK interface for the NetLabel system. The | ||
5 | * NetLabel system manages static and dynamic label mappings for network | ||
6 | * protocols such as CIPSO and RIPSO. | ||
7 | * | ||
8 | * Author: Paul Moore <paul.moore@hp.com> | ||
9 | * | ||
10 | */ | ||
11 | |||
12 | /* | ||
13 | * (c) Copyright Hewlett-Packard Development Company, L.P., 2006 | ||
14 | * | ||
15 | * This program is free software; you can redistribute it and/or modify | ||
16 | * it under the terms of the GNU General Public License as published by | ||
17 | * the Free Software Foundation; either version 2 of the License, or | ||
18 | * (at your option) any later version. | ||
19 | * | ||
20 | * This program is distributed in the hope that it will be useful, | ||
21 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
22 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See | ||
23 | * the GNU General Public License for more details. | ||
24 | * | ||
25 | * You should have received a copy of the GNU General Public License | ||
26 | * along with this program; if not, write to the Free Software | ||
27 | * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA | ||
28 | * | ||
29 | */ | ||
30 | |||
31 | #ifndef _NETLABEL_USER_H | ||
32 | #define _NETLABEL_USER_H | ||
33 | |||
34 | #include <linux/types.h> | ||
35 | #include <linux/skbuff.h> | ||
36 | #include <linux/capability.h> | ||
37 | #include <net/netlink.h> | ||
38 | #include <net/genetlink.h> | ||
39 | #include <net/netlabel.h> | ||
40 | |||
41 | /* NetLabel NETLINK helper functions */ | ||
42 | |||
43 | /** | ||
44 | * netlbl_netlink_cap_check - Check the NETLINK msg capabilities | ||
45 | * @skb: the NETLINK buffer | ||
46 | * @req_cap: the required capability | ||
47 | * | ||
48 | * Description: | ||
49 | * Check the NETLINK buffer's capabilities against the required capabilities. | ||
50 | * Returns zero on success, negative values on failure. | ||
51 | * | ||
52 | */ | ||
53 | static inline int netlbl_netlink_cap_check(const struct sk_buff *skb, | ||
54 | kernel_cap_t req_cap) | ||
55 | { | ||
56 | if (cap_raised(NETLINK_CB(skb).eff_cap, req_cap)) | ||
57 | return 0; | ||
58 | return -EPERM; | ||
59 | } | ||
60 | |||
61 | /** | ||
62 | * netlbl_getinc_u8 - Read a u8 value from a nlattr stream and move on | ||
63 | * @nla: the attribute | ||
64 | * @rem_len: remaining length | ||
65 | * | ||
66 | * Description: | ||
67 | * Return a u8 value pointed to by @nla and advance it to the next attribute. | ||
68 | * | ||
69 | */ | ||
70 | static inline u8 netlbl_getinc_u8(struct nlattr **nla, int *rem_len) | ||
71 | { | ||
72 | u8 val = nla_get_u8(*nla); | ||
73 | *nla = nla_next(*nla, rem_len); | ||
74 | return val; | ||
75 | } | ||
76 | |||
77 | /** | ||
78 | * netlbl_getinc_u16 - Read a u16 value from a nlattr stream and move on | ||
79 | * @nla: the attribute | ||
80 | * @rem_len: remaining length | ||
81 | * | ||
82 | * Description: | ||
83 | * Return a u16 value pointed to by @nla and advance it to the next attribute. | ||
84 | * | ||
85 | */ | ||
86 | static inline u16 netlbl_getinc_u16(struct nlattr **nla, int *rem_len) | ||
87 | { | ||
88 | u16 val = nla_get_u16(*nla); | ||
89 | *nla = nla_next(*nla, rem_len); | ||
90 | return val; | ||
91 | } | ||
92 | |||
93 | /** | ||
94 | * netlbl_getinc_u32 - Read a u32 value from a nlattr stream and move on | ||
95 | * @nla: the attribute | ||
96 | * @rem_len: remaining length | ||
97 | * | ||
98 | * Description: | ||
99 | * Return a u32 value pointed to by @nla and advance it to the next attribute. | ||
100 | * | ||
101 | */ | ||
102 | static inline u32 netlbl_getinc_u32(struct nlattr **nla, int *rem_len) | ||
103 | { | ||
104 | u32 val = nla_get_u32(*nla); | ||
105 | *nla = nla_next(*nla, rem_len); | ||
106 | return val; | ||
107 | } | ||
108 | |||
109 | /** | ||
110 | * netlbl_netlink_hdr_put - Write the NETLINK buffers into a sk_buff | ||
111 | * @skb: the packet | ||
112 | * @pid: the PID of the receipient | ||
113 | * @seq: the sequence number | ||
114 | * @type: the generic NETLINK message family type | ||
115 | * @cmd: command | ||
116 | * | ||
117 | * Description: | ||
118 | * Write both a NETLINK nlmsghdr structure and a Generic NETLINK genlmsghdr | ||
119 | * struct to the packet. Returns a pointer to the start of the payload buffer | ||
120 | * on success or NULL on failure. | ||
121 | * | ||
122 | */ | ||
123 | static inline void *netlbl_netlink_hdr_put(struct sk_buff *skb, | ||
124 | u32 pid, | ||
125 | u32 seq, | ||
126 | int type, | ||
127 | u8 cmd) | ||
128 | { | ||
129 | return genlmsg_put(skb, | ||
130 | pid, | ||
131 | seq, | ||
132 | type, | ||
133 | 0, | ||
134 | 0, | ||
135 | cmd, | ||
136 | NETLBL_PROTO_VERSION); | ||
137 | } | ||
138 | |||
139 | /** | ||
140 | * netlbl_netlink_hdr_push - Write the NETLINK buffers into a sk_buff | ||
141 | * @skb: the packet | ||
142 | * @pid: the PID of the receipient | ||
143 | * @seq: the sequence number | ||
144 | * @type: the generic NETLINK message family type | ||
145 | * @cmd: command | ||
146 | * | ||
147 | * Description: | ||
148 | * Write both a NETLINK nlmsghdr structure and a Generic NETLINK genlmsghdr | ||
149 | * struct to the packet. | ||
150 | * | ||
151 | */ | ||
152 | static inline void netlbl_netlink_hdr_push(struct sk_buff *skb, | ||
153 | u32 pid, | ||
154 | u32 seq, | ||
155 | int type, | ||
156 | u8 cmd) | ||
157 | |||
158 | { | ||
159 | struct nlmsghdr *nlh; | ||
160 | struct genlmsghdr *hdr; | ||
161 | |||
162 | nlh = (struct nlmsghdr *)skb_push(skb, NLMSG_SPACE(GENL_HDRLEN)); | ||
163 | nlh->nlmsg_type = type; | ||
164 | nlh->nlmsg_len = skb->len; | ||
165 | nlh->nlmsg_flags = 0; | ||
166 | nlh->nlmsg_pid = pid; | ||
167 | nlh->nlmsg_seq = seq; | ||
168 | |||
169 | hdr = nlmsg_data(nlh); | ||
170 | hdr->cmd = cmd; | ||
171 | hdr->version = NETLBL_PROTO_VERSION; | ||
172 | hdr->reserved = 0; | ||
173 | } | ||
174 | |||
175 | /** | ||
176 | * netlbl_netlink_payload_len - Return the length of the payload | ||
177 | * @skb: the NETLINK buffer | ||
178 | * | ||
179 | * Description: | ||
180 | * This function returns the length of the NetLabel payload. | ||
181 | * | ||
182 | */ | ||
183 | static inline u32 netlbl_netlink_payload_len(const struct sk_buff *skb) | ||
184 | { | ||
185 | return nlmsg_len((struct nlmsghdr *)skb->data) - GENL_HDRLEN; | ||
186 | } | ||
187 | |||
188 | /** | ||
189 | * netlbl_netlink_payload_data - Returns a pointer to the start of the payload | ||
190 | * @skb: the NETLINK buffer | ||
191 | * | ||
192 | * Description: | ||
193 | * This function returns a pointer to the start of the NetLabel payload. | ||
194 | * | ||
195 | */ | ||
196 | static inline void *netlbl_netlink_payload_data(const struct sk_buff *skb) | ||
197 | { | ||
198 | return (unsigned char *)nlmsg_data((struct nlmsghdr *)skb->data) + | ||
199 | GENL_HDRLEN; | ||
200 | } | ||
201 | |||
202 | /* NetLabel common protocol functions */ | ||
203 | |||
204 | void netlbl_netlink_send_ack(const struct genl_info *info, | ||
205 | u32 genl_family, | ||
206 | u8 ack_cmd, | ||
207 | u32 ret_code); | ||
208 | |||
209 | /* NetLabel NETLINK I/O functions */ | ||
210 | |||
211 | int netlbl_netlink_init(void); | ||
212 | int netlbl_netlink_snd(struct sk_buff *skb, u32 pid); | ||
213 | int netlbl_netlink_snd_multicast(struct sk_buff *skb, u32 pid, u32 group); | ||
214 | |||
215 | #endif | ||
diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c index 8b85036ba8e3..d56e0d21f919 100644 --- a/net/netlink/af_netlink.c +++ b/net/netlink/af_netlink.c | |||
@@ -1147,7 +1147,7 @@ static int netlink_sendmsg(struct kiocb *kiocb, struct socket *sock, | |||
1147 | if (len > sk->sk_sndbuf - 32) | 1147 | if (len > sk->sk_sndbuf - 32) |
1148 | goto out; | 1148 | goto out; |
1149 | err = -ENOBUFS; | 1149 | err = -ENOBUFS; |
1150 | skb = alloc_skb(len, GFP_KERNEL); | 1150 | skb = nlmsg_new(len, GFP_KERNEL); |
1151 | if (skb==NULL) | 1151 | if (skb==NULL) |
1152 | goto out; | 1152 | goto out; |
1153 | 1153 | ||
@@ -1341,19 +1341,18 @@ static int netlink_dump(struct sock *sk) | |||
1341 | struct netlink_callback *cb; | 1341 | struct netlink_callback *cb; |
1342 | struct sk_buff *skb; | 1342 | struct sk_buff *skb; |
1343 | struct nlmsghdr *nlh; | 1343 | struct nlmsghdr *nlh; |
1344 | int len; | 1344 | int len, err = -ENOBUFS; |
1345 | 1345 | ||
1346 | skb = sock_rmalloc(sk, NLMSG_GOODSIZE, 0, GFP_KERNEL); | 1346 | skb = sock_rmalloc(sk, NLMSG_GOODSIZE, 0, GFP_KERNEL); |
1347 | if (!skb) | 1347 | if (!skb) |
1348 | return -ENOBUFS; | 1348 | goto errout; |
1349 | 1349 | ||
1350 | spin_lock(&nlk->cb_lock); | 1350 | spin_lock(&nlk->cb_lock); |
1351 | 1351 | ||
1352 | cb = nlk->cb; | 1352 | cb = nlk->cb; |
1353 | if (cb == NULL) { | 1353 | if (cb == NULL) { |
1354 | spin_unlock(&nlk->cb_lock); | 1354 | err = -EINVAL; |
1355 | kfree_skb(skb); | 1355 | goto errout_skb; |
1356 | return -EINVAL; | ||
1357 | } | 1356 | } |
1358 | 1357 | ||
1359 | len = cb->dump(skb, cb); | 1358 | len = cb->dump(skb, cb); |
@@ -1365,8 +1364,12 @@ static int netlink_dump(struct sock *sk) | |||
1365 | return 0; | 1364 | return 0; |
1366 | } | 1365 | } |
1367 | 1366 | ||
1368 | nlh = NLMSG_NEW_ANSWER(skb, cb, NLMSG_DONE, sizeof(len), NLM_F_MULTI); | 1367 | nlh = nlmsg_put_answer(skb, cb, NLMSG_DONE, sizeof(len), NLM_F_MULTI); |
1369 | memcpy(NLMSG_DATA(nlh), &len, sizeof(len)); | 1368 | if (!nlh) |
1369 | goto errout_skb; | ||
1370 | |||
1371 | memcpy(nlmsg_data(nlh), &len, sizeof(len)); | ||
1372 | |||
1370 | skb_queue_tail(&sk->sk_receive_queue, skb); | 1373 | skb_queue_tail(&sk->sk_receive_queue, skb); |
1371 | sk->sk_data_ready(sk, skb->len); | 1374 | sk->sk_data_ready(sk, skb->len); |
1372 | 1375 | ||
@@ -1378,8 +1381,11 @@ static int netlink_dump(struct sock *sk) | |||
1378 | netlink_destroy_callback(cb); | 1381 | netlink_destroy_callback(cb); |
1379 | return 0; | 1382 | return 0; |
1380 | 1383 | ||
1381 | nlmsg_failure: | 1384 | errout_skb: |
1382 | return -ENOBUFS; | 1385 | spin_unlock(&nlk->cb_lock); |
1386 | kfree_skb(skb); | ||
1387 | errout: | ||
1388 | return err; | ||
1383 | } | 1389 | } |
1384 | 1390 | ||
1385 | int netlink_dump_start(struct sock *ssk, struct sk_buff *skb, | 1391 | int netlink_dump_start(struct sock *ssk, struct sk_buff *skb, |
@@ -1431,11 +1437,11 @@ void netlink_ack(struct sk_buff *in_skb, struct nlmsghdr *nlh, int err) | |||
1431 | int size; | 1437 | int size; |
1432 | 1438 | ||
1433 | if (err == 0) | 1439 | if (err == 0) |
1434 | size = NLMSG_SPACE(sizeof(struct nlmsgerr)); | 1440 | size = nlmsg_total_size(sizeof(*errmsg)); |
1435 | else | 1441 | else |
1436 | size = NLMSG_SPACE(4 + NLMSG_ALIGN(nlh->nlmsg_len)); | 1442 | size = nlmsg_total_size(sizeof(*errmsg) + nlmsg_len(nlh)); |
1437 | 1443 | ||
1438 | skb = alloc_skb(size, GFP_KERNEL); | 1444 | skb = nlmsg_new(size, GFP_KERNEL); |
1439 | if (!skb) { | 1445 | if (!skb) { |
1440 | struct sock *sk; | 1446 | struct sock *sk; |
1441 | 1447 | ||
@@ -1451,16 +1457,15 @@ void netlink_ack(struct sk_buff *in_skb, struct nlmsghdr *nlh, int err) | |||
1451 | 1457 | ||
1452 | rep = __nlmsg_put(skb, NETLINK_CB(in_skb).pid, nlh->nlmsg_seq, | 1458 | rep = __nlmsg_put(skb, NETLINK_CB(in_skb).pid, nlh->nlmsg_seq, |
1453 | NLMSG_ERROR, sizeof(struct nlmsgerr), 0); | 1459 | NLMSG_ERROR, sizeof(struct nlmsgerr), 0); |
1454 | errmsg = NLMSG_DATA(rep); | 1460 | errmsg = nlmsg_data(rep); |
1455 | errmsg->error = err; | 1461 | errmsg->error = err; |
1456 | memcpy(&errmsg->msg, nlh, err ? nlh->nlmsg_len : sizeof(struct nlmsghdr)); | 1462 | memcpy(&errmsg->msg, nlh, err ? nlh->nlmsg_len : sizeof(*nlh)); |
1457 | netlink_unicast(in_skb->sk, skb, NETLINK_CB(in_skb).pid, MSG_DONTWAIT); | 1463 | netlink_unicast(in_skb->sk, skb, NETLINK_CB(in_skb).pid, MSG_DONTWAIT); |
1458 | } | 1464 | } |
1459 | 1465 | ||
1460 | static int netlink_rcv_skb(struct sk_buff *skb, int (*cb)(struct sk_buff *, | 1466 | static int netlink_rcv_skb(struct sk_buff *skb, int (*cb)(struct sk_buff *, |
1461 | struct nlmsghdr *, int *)) | 1467 | struct nlmsghdr *, int *)) |
1462 | { | 1468 | { |
1463 | unsigned int total_len; | ||
1464 | struct nlmsghdr *nlh; | 1469 | struct nlmsghdr *nlh; |
1465 | int err; | 1470 | int err; |
1466 | 1471 | ||
@@ -1470,8 +1475,6 @@ static int netlink_rcv_skb(struct sk_buff *skb, int (*cb)(struct sk_buff *, | |||
1470 | if (nlh->nlmsg_len < NLMSG_HDRLEN || skb->len < nlh->nlmsg_len) | 1475 | if (nlh->nlmsg_len < NLMSG_HDRLEN || skb->len < nlh->nlmsg_len) |
1471 | return 0; | 1476 | return 0; |
1472 | 1477 | ||
1473 | total_len = min(NLMSG_ALIGN(nlh->nlmsg_len), skb->len); | ||
1474 | |||
1475 | if (cb(skb, nlh, &err) < 0) { | 1478 | if (cb(skb, nlh, &err) < 0) { |
1476 | /* Not an error, but we have to interrupt processing | 1479 | /* Not an error, but we have to interrupt processing |
1477 | * here. Note: that in this case we do not pull | 1480 | * here. Note: that in this case we do not pull |
@@ -1483,7 +1486,7 @@ static int netlink_rcv_skb(struct sk_buff *skb, int (*cb)(struct sk_buff *, | |||
1483 | } else if (nlh->nlmsg_flags & NLM_F_ACK) | 1486 | } else if (nlh->nlmsg_flags & NLM_F_ACK) |
1484 | netlink_ack(skb, nlh, 0); | 1487 | netlink_ack(skb, nlh, 0); |
1485 | 1488 | ||
1486 | skb_pull(skb, total_len); | 1489 | netlink_queue_skip(nlh, skb); |
1487 | } | 1490 | } |
1488 | 1491 | ||
1489 | return 0; | 1492 | return 0; |
@@ -1546,6 +1549,38 @@ void netlink_queue_skip(struct nlmsghdr *nlh, struct sk_buff *skb) | |||
1546 | skb_pull(skb, msglen); | 1549 | skb_pull(skb, msglen); |
1547 | } | 1550 | } |
1548 | 1551 | ||
1552 | /** | ||
1553 | * nlmsg_notify - send a notification netlink message | ||
1554 | * @sk: netlink socket to use | ||
1555 | * @skb: notification message | ||
1556 | * @pid: destination netlink pid for reports or 0 | ||
1557 | * @group: destination multicast group or 0 | ||
1558 | * @report: 1 to report back, 0 to disable | ||
1559 | * @flags: allocation flags | ||
1560 | */ | ||
1561 | int nlmsg_notify(struct sock *sk, struct sk_buff *skb, u32 pid, | ||
1562 | unsigned int group, int report, gfp_t flags) | ||
1563 | { | ||
1564 | int err = 0; | ||
1565 | |||
1566 | if (group) { | ||
1567 | int exclude_pid = 0; | ||
1568 | |||
1569 | if (report) { | ||
1570 | atomic_inc(&skb->users); | ||
1571 | exclude_pid = pid; | ||
1572 | } | ||
1573 | |||
1574 | /* errors reported via destination sk->sk_err */ | ||
1575 | nlmsg_multicast(sk, skb, exclude_pid, group, flags); | ||
1576 | } | ||
1577 | |||
1578 | if (report) | ||
1579 | err = nlmsg_unicast(sk, skb, pid); | ||
1580 | |||
1581 | return err; | ||
1582 | } | ||
1583 | |||
1549 | #ifdef CONFIG_PROC_FS | 1584 | #ifdef CONFIG_PROC_FS |
1550 | struct nl_seq_iter { | 1585 | struct nl_seq_iter { |
1551 | int link; | 1586 | int link; |
@@ -1727,8 +1762,6 @@ static struct net_proto_family netlink_family_ops = { | |||
1727 | .owner = THIS_MODULE, /* for consistency 8) */ | 1762 | .owner = THIS_MODULE, /* for consistency 8) */ |
1728 | }; | 1763 | }; |
1729 | 1764 | ||
1730 | extern void netlink_skb_parms_too_large(void); | ||
1731 | |||
1732 | static int __init netlink_proto_init(void) | 1765 | static int __init netlink_proto_init(void) |
1733 | { | 1766 | { |
1734 | struct sk_buff *dummy_skb; | 1767 | struct sk_buff *dummy_skb; |
@@ -1740,8 +1773,7 @@ static int __init netlink_proto_init(void) | |||
1740 | if (err != 0) | 1773 | if (err != 0) |
1741 | goto out; | 1774 | goto out; |
1742 | 1775 | ||
1743 | if (sizeof(struct netlink_skb_parms) > sizeof(dummy_skb->cb)) | 1776 | BUILD_BUG_ON(sizeof(struct netlink_skb_parms) > sizeof(dummy_skb->cb)); |
1744 | netlink_skb_parms_too_large(); | ||
1745 | 1777 | ||
1746 | nl_table = kcalloc(MAX_LINKS, sizeof(*nl_table), GFP_KERNEL); | 1778 | nl_table = kcalloc(MAX_LINKS, sizeof(*nl_table), GFP_KERNEL); |
1747 | if (!nl_table) | 1779 | if (!nl_table) |
@@ -1799,4 +1831,4 @@ EXPORT_SYMBOL(netlink_set_err); | |||
1799 | EXPORT_SYMBOL(netlink_set_nonroot); | 1831 | EXPORT_SYMBOL(netlink_set_nonroot); |
1800 | EXPORT_SYMBOL(netlink_unicast); | 1832 | EXPORT_SYMBOL(netlink_unicast); |
1801 | EXPORT_SYMBOL(netlink_unregister_notifier); | 1833 | EXPORT_SYMBOL(netlink_unregister_notifier); |
1802 | 1834 | EXPORT_SYMBOL(nlmsg_notify); | |
diff --git a/net/netlink/attr.c b/net/netlink/attr.c index dddbd15135a8..004139557e09 100644 --- a/net/netlink/attr.c +++ b/net/netlink/attr.c | |||
@@ -20,7 +20,6 @@ static u16 nla_attr_minlen[NLA_TYPE_MAX+1] __read_mostly = { | |||
20 | [NLA_U16] = sizeof(u16), | 20 | [NLA_U16] = sizeof(u16), |
21 | [NLA_U32] = sizeof(u32), | 21 | [NLA_U32] = sizeof(u32), |
22 | [NLA_U64] = sizeof(u64), | 22 | [NLA_U64] = sizeof(u64), |
23 | [NLA_STRING] = 1, | ||
24 | [NLA_NESTED] = NLA_HDRLEN, | 23 | [NLA_NESTED] = NLA_HDRLEN, |
25 | }; | 24 | }; |
26 | 25 | ||
@@ -28,7 +27,7 @@ static int validate_nla(struct nlattr *nla, int maxtype, | |||
28 | struct nla_policy *policy) | 27 | struct nla_policy *policy) |
29 | { | 28 | { |
30 | struct nla_policy *pt; | 29 | struct nla_policy *pt; |
31 | int minlen = 0; | 30 | int minlen = 0, attrlen = nla_len(nla); |
32 | 31 | ||
33 | if (nla->nla_type <= 0 || nla->nla_type > maxtype) | 32 | if (nla->nla_type <= 0 || nla->nla_type > maxtype) |
34 | return 0; | 33 | return 0; |
@@ -37,16 +36,46 @@ static int validate_nla(struct nlattr *nla, int maxtype, | |||
37 | 36 | ||
38 | BUG_ON(pt->type > NLA_TYPE_MAX); | 37 | BUG_ON(pt->type > NLA_TYPE_MAX); |
39 | 38 | ||
40 | if (pt->minlen) | 39 | switch (pt->type) { |
41 | minlen = pt->minlen; | 40 | case NLA_FLAG: |
42 | else if (pt->type != NLA_UNSPEC) | 41 | if (attrlen > 0) |
43 | minlen = nla_attr_minlen[pt->type]; | 42 | return -ERANGE; |
43 | break; | ||
44 | 44 | ||
45 | if (pt->type == NLA_FLAG && nla_len(nla) > 0) | 45 | case NLA_NUL_STRING: |
46 | return -ERANGE; | 46 | if (pt->len) |
47 | minlen = min_t(int, attrlen, pt->len + 1); | ||
48 | else | ||
49 | minlen = attrlen; | ||
47 | 50 | ||
48 | if (nla_len(nla) < minlen) | 51 | if (!minlen || memchr(nla_data(nla), '\0', minlen) == NULL) |
49 | return -ERANGE; | 52 | return -EINVAL; |
53 | /* fall through */ | ||
54 | |||
55 | case NLA_STRING: | ||
56 | if (attrlen < 1) | ||
57 | return -ERANGE; | ||
58 | |||
59 | if (pt->len) { | ||
60 | char *buf = nla_data(nla); | ||
61 | |||
62 | if (buf[attrlen - 1] == '\0') | ||
63 | attrlen--; | ||
64 | |||
65 | if (attrlen > pt->len) | ||
66 | return -ERANGE; | ||
67 | } | ||
68 | break; | ||
69 | |||
70 | default: | ||
71 | if (pt->len) | ||
72 | minlen = pt->len; | ||
73 | else if (pt->type != NLA_UNSPEC) | ||
74 | minlen = nla_attr_minlen[pt->type]; | ||
75 | |||
76 | if (attrlen < minlen) | ||
77 | return -ERANGE; | ||
78 | } | ||
50 | 79 | ||
51 | return 0; | 80 | return 0; |
52 | } | 81 | } |
@@ -255,6 +284,26 @@ struct nlattr *__nla_reserve(struct sk_buff *skb, int attrtype, int attrlen) | |||
255 | } | 284 | } |
256 | 285 | ||
257 | /** | 286 | /** |
287 | * __nla_reserve_nohdr - reserve room for attribute without header | ||
288 | * @skb: socket buffer to reserve room on | ||
289 | * @attrlen: length of attribute payload | ||
290 | * | ||
291 | * Reserves room for attribute payload without a header. | ||
292 | * | ||
293 | * The caller is responsible to ensure that the skb provides enough | ||
294 | * tailroom for the payload. | ||
295 | */ | ||
296 | void *__nla_reserve_nohdr(struct sk_buff *skb, int attrlen) | ||
297 | { | ||
298 | void *start; | ||
299 | |||
300 | start = skb_put(skb, NLA_ALIGN(attrlen)); | ||
301 | memset(start, 0, NLA_ALIGN(attrlen)); | ||
302 | |||
303 | return start; | ||
304 | } | ||
305 | |||
306 | /** | ||
258 | * nla_reserve - reserve room for attribute on the skb | 307 | * nla_reserve - reserve room for attribute on the skb |
259 | * @skb: socket buffer to reserve room on | 308 | * @skb: socket buffer to reserve room on |
260 | * @attrtype: attribute type | 309 | * @attrtype: attribute type |
@@ -275,6 +324,24 @@ struct nlattr *nla_reserve(struct sk_buff *skb, int attrtype, int attrlen) | |||
275 | } | 324 | } |
276 | 325 | ||
277 | /** | 326 | /** |
327 | * nla_reserve - reserve room for attribute without header | ||
328 | * @skb: socket buffer to reserve room on | ||
329 | * @len: length of attribute payload | ||
330 | * | ||
331 | * Reserves room for attribute payload without a header. | ||
332 | * | ||
333 | * Returns NULL if the tailroom of the skb is insufficient to store | ||
334 | * the attribute payload. | ||
335 | */ | ||
336 | void *nla_reserve_nohdr(struct sk_buff *skb, int attrlen) | ||
337 | { | ||
338 | if (unlikely(skb_tailroom(skb) < NLA_ALIGN(attrlen))) | ||
339 | return NULL; | ||
340 | |||
341 | return __nla_reserve_nohdr(skb, attrlen); | ||
342 | } | ||
343 | |||
344 | /** | ||
278 | * __nla_put - Add a netlink attribute to a socket buffer | 345 | * __nla_put - Add a netlink attribute to a socket buffer |
279 | * @skb: socket buffer to add attribute to | 346 | * @skb: socket buffer to add attribute to |
280 | * @attrtype: attribute type | 347 | * @attrtype: attribute type |
@@ -293,6 +360,22 @@ void __nla_put(struct sk_buff *skb, int attrtype, int attrlen, | |||
293 | memcpy(nla_data(nla), data, attrlen); | 360 | memcpy(nla_data(nla), data, attrlen); |
294 | } | 361 | } |
295 | 362 | ||
363 | /** | ||
364 | * __nla_put_nohdr - Add a netlink attribute without header | ||
365 | * @skb: socket buffer to add attribute to | ||
366 | * @attrlen: length of attribute payload | ||
367 | * @data: head of attribute payload | ||
368 | * | ||
369 | * The caller is responsible to ensure that the skb provides enough | ||
370 | * tailroom for the attribute payload. | ||
371 | */ | ||
372 | void __nla_put_nohdr(struct sk_buff *skb, int attrlen, const void *data) | ||
373 | { | ||
374 | void *start; | ||
375 | |||
376 | start = __nla_reserve_nohdr(skb, attrlen); | ||
377 | memcpy(start, data, attrlen); | ||
378 | } | ||
296 | 379 | ||
297 | /** | 380 | /** |
298 | * nla_put - Add a netlink attribute to a socket buffer | 381 | * nla_put - Add a netlink attribute to a socket buffer |
@@ -313,15 +396,36 @@ int nla_put(struct sk_buff *skb, int attrtype, int attrlen, const void *data) | |||
313 | return 0; | 396 | return 0; |
314 | } | 397 | } |
315 | 398 | ||
399 | /** | ||
400 | * nla_put_nohdr - Add a netlink attribute without header | ||
401 | * @skb: socket buffer to add attribute to | ||
402 | * @attrlen: length of attribute payload | ||
403 | * @data: head of attribute payload | ||
404 | * | ||
405 | * Returns -1 if the tailroom of the skb is insufficient to store | ||
406 | * the attribute payload. | ||
407 | */ | ||
408 | int nla_put_nohdr(struct sk_buff *skb, int attrlen, const void *data) | ||
409 | { | ||
410 | if (unlikely(skb_tailroom(skb) < NLA_ALIGN(attrlen))) | ||
411 | return -1; | ||
412 | |||
413 | __nla_put_nohdr(skb, attrlen, data); | ||
414 | return 0; | ||
415 | } | ||
316 | 416 | ||
317 | EXPORT_SYMBOL(nla_validate); | 417 | EXPORT_SYMBOL(nla_validate); |
318 | EXPORT_SYMBOL(nla_parse); | 418 | EXPORT_SYMBOL(nla_parse); |
319 | EXPORT_SYMBOL(nla_find); | 419 | EXPORT_SYMBOL(nla_find); |
320 | EXPORT_SYMBOL(nla_strlcpy); | 420 | EXPORT_SYMBOL(nla_strlcpy); |
321 | EXPORT_SYMBOL(__nla_reserve); | 421 | EXPORT_SYMBOL(__nla_reserve); |
422 | EXPORT_SYMBOL(__nla_reserve_nohdr); | ||
322 | EXPORT_SYMBOL(nla_reserve); | 423 | EXPORT_SYMBOL(nla_reserve); |
424 | EXPORT_SYMBOL(nla_reserve_nohdr); | ||
323 | EXPORT_SYMBOL(__nla_put); | 425 | EXPORT_SYMBOL(__nla_put); |
426 | EXPORT_SYMBOL(__nla_put_nohdr); | ||
324 | EXPORT_SYMBOL(nla_put); | 427 | EXPORT_SYMBOL(nla_put); |
428 | EXPORT_SYMBOL(nla_put_nohdr); | ||
325 | EXPORT_SYMBOL(nla_memcpy); | 429 | EXPORT_SYMBOL(nla_memcpy); |
326 | EXPORT_SYMBOL(nla_memcmp); | 430 | EXPORT_SYMBOL(nla_memcmp); |
327 | EXPORT_SYMBOL(nla_strcmp); | 431 | EXPORT_SYMBOL(nla_strcmp); |
diff --git a/net/netlink/genetlink.c b/net/netlink/genetlink.c index a298f77cc3e3..49bc2db7982b 100644 --- a/net/netlink/genetlink.c +++ b/net/netlink/genetlink.c | |||
@@ -387,7 +387,10 @@ static void genl_rcv(struct sock *sk, int len) | |||
387 | static int ctrl_fill_info(struct genl_family *family, u32 pid, u32 seq, | 387 | static int ctrl_fill_info(struct genl_family *family, u32 pid, u32 seq, |
388 | u32 flags, struct sk_buff *skb, u8 cmd) | 388 | u32 flags, struct sk_buff *skb, u8 cmd) |
389 | { | 389 | { |
390 | struct nlattr *nla_ops; | ||
391 | struct genl_ops *ops; | ||
390 | void *hdr; | 392 | void *hdr; |
393 | int idx = 1; | ||
391 | 394 | ||
392 | hdr = genlmsg_put(skb, pid, seq, GENL_ID_CTRL, 0, flags, cmd, | 395 | hdr = genlmsg_put(skb, pid, seq, GENL_ID_CTRL, 0, flags, cmd, |
393 | family->version); | 396 | family->version); |
@@ -396,6 +399,37 @@ static int ctrl_fill_info(struct genl_family *family, u32 pid, u32 seq, | |||
396 | 399 | ||
397 | NLA_PUT_STRING(skb, CTRL_ATTR_FAMILY_NAME, family->name); | 400 | NLA_PUT_STRING(skb, CTRL_ATTR_FAMILY_NAME, family->name); |
398 | NLA_PUT_U16(skb, CTRL_ATTR_FAMILY_ID, family->id); | 401 | NLA_PUT_U16(skb, CTRL_ATTR_FAMILY_ID, family->id); |
402 | NLA_PUT_U32(skb, CTRL_ATTR_VERSION, family->version); | ||
403 | NLA_PUT_U32(skb, CTRL_ATTR_HDRSIZE, family->hdrsize); | ||
404 | NLA_PUT_U32(skb, CTRL_ATTR_MAXATTR, family->maxattr); | ||
405 | |||
406 | nla_ops = nla_nest_start(skb, CTRL_ATTR_OPS); | ||
407 | if (nla_ops == NULL) | ||
408 | goto nla_put_failure; | ||
409 | |||
410 | list_for_each_entry(ops, &family->ops_list, ops_list) { | ||
411 | struct nlattr *nest; | ||
412 | |||
413 | nest = nla_nest_start(skb, idx++); | ||
414 | if (nest == NULL) | ||
415 | goto nla_put_failure; | ||
416 | |||
417 | NLA_PUT_U32(skb, CTRL_ATTR_OP_ID, ops->cmd); | ||
418 | NLA_PUT_U32(skb, CTRL_ATTR_OP_FLAGS, ops->flags); | ||
419 | |||
420 | if (ops->policy) | ||
421 | NLA_PUT_FLAG(skb, CTRL_ATTR_OP_POLICY); | ||
422 | |||
423 | if (ops->doit) | ||
424 | NLA_PUT_FLAG(skb, CTRL_ATTR_OP_DOIT); | ||
425 | |||
426 | if (ops->dumpit) | ||
427 | NLA_PUT_FLAG(skb, CTRL_ATTR_OP_DUMPIT); | ||
428 | |||
429 | nla_nest_end(skb, nest); | ||
430 | } | ||
431 | |||
432 | nla_nest_end(skb, nla_ops); | ||
399 | 433 | ||
400 | return genlmsg_end(skb, hdr); | 434 | return genlmsg_end(skb, hdr); |
401 | 435 | ||
@@ -411,6 +445,9 @@ static int ctrl_dumpfamily(struct sk_buff *skb, struct netlink_callback *cb) | |||
411 | int chains_to_skip = cb->args[0]; | 445 | int chains_to_skip = cb->args[0]; |
412 | int fams_to_skip = cb->args[1]; | 446 | int fams_to_skip = cb->args[1]; |
413 | 447 | ||
448 | if (chains_to_skip != 0) | ||
449 | genl_lock(); | ||
450 | |||
414 | for (i = 0; i < GENL_FAM_TAB_SIZE; i++) { | 451 | for (i = 0; i < GENL_FAM_TAB_SIZE; i++) { |
415 | if (i < chains_to_skip) | 452 | if (i < chains_to_skip) |
416 | continue; | 453 | continue; |
@@ -428,6 +465,9 @@ static int ctrl_dumpfamily(struct sk_buff *skb, struct netlink_callback *cb) | |||
428 | } | 465 | } |
429 | 466 | ||
430 | errout: | 467 | errout: |
468 | if (chains_to_skip != 0) | ||
469 | genl_unlock(); | ||
470 | |||
431 | cb->args[0] = i; | 471 | cb->args[0] = i; |
432 | cb->args[1] = n; | 472 | cb->args[1] = n; |
433 | 473 | ||
@@ -440,7 +480,7 @@ static struct sk_buff *ctrl_build_msg(struct genl_family *family, u32 pid, | |||
440 | struct sk_buff *skb; | 480 | struct sk_buff *skb; |
441 | int err; | 481 | int err; |
442 | 482 | ||
443 | skb = nlmsg_new(NLMSG_GOODSIZE); | 483 | skb = nlmsg_new(NLMSG_GOODSIZE, GFP_KERNEL); |
444 | if (skb == NULL) | 484 | if (skb == NULL) |
445 | return ERR_PTR(-ENOBUFS); | 485 | return ERR_PTR(-ENOBUFS); |
446 | 486 | ||
@@ -455,7 +495,8 @@ static struct sk_buff *ctrl_build_msg(struct genl_family *family, u32 pid, | |||
455 | 495 | ||
456 | static struct nla_policy ctrl_policy[CTRL_ATTR_MAX+1] __read_mostly = { | 496 | static struct nla_policy ctrl_policy[CTRL_ATTR_MAX+1] __read_mostly = { |
457 | [CTRL_ATTR_FAMILY_ID] = { .type = NLA_U16 }, | 497 | [CTRL_ATTR_FAMILY_ID] = { .type = NLA_U16 }, |
458 | [CTRL_ATTR_FAMILY_NAME] = { .type = NLA_STRING }, | 498 | [CTRL_ATTR_FAMILY_NAME] = { .type = NLA_NUL_STRING, |
499 | .len = GENL_NAMSIZ - 1 }, | ||
459 | }; | 500 | }; |
460 | 501 | ||
461 | static int ctrl_getfamily(struct sk_buff *skb, struct genl_info *info) | 502 | static int ctrl_getfamily(struct sk_buff *skb, struct genl_info *info) |
@@ -470,12 +511,9 @@ static int ctrl_getfamily(struct sk_buff *skb, struct genl_info *info) | |||
470 | } | 511 | } |
471 | 512 | ||
472 | if (info->attrs[CTRL_ATTR_FAMILY_NAME]) { | 513 | if (info->attrs[CTRL_ATTR_FAMILY_NAME]) { |
473 | char name[GENL_NAMSIZ]; | 514 | char *name; |
474 | |||
475 | if (nla_strlcpy(name, info->attrs[CTRL_ATTR_FAMILY_NAME], | ||
476 | GENL_NAMSIZ) >= GENL_NAMSIZ) | ||
477 | goto errout; | ||
478 | 515 | ||
516 | name = nla_data(info->attrs[CTRL_ATTR_FAMILY_NAME]); | ||
479 | res = genl_family_find_byname(name); | 517 | res = genl_family_find_byname(name); |
480 | } | 518 | } |
481 | 519 | ||
@@ -510,7 +548,7 @@ static int genl_ctrl_event(int event, void *data) | |||
510 | if (IS_ERR(msg)) | 548 | if (IS_ERR(msg)) |
511 | return PTR_ERR(msg); | 549 | return PTR_ERR(msg); |
512 | 550 | ||
513 | genlmsg_multicast(msg, 0, GENL_ID_CTRL); | 551 | genlmsg_multicast(msg, 0, GENL_ID_CTRL, GFP_KERNEL); |
514 | break; | 552 | break; |
515 | } | 553 | } |
516 | 554 | ||
diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index 4172a5235916..f4ccb90e6739 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c | |||
@@ -427,21 +427,24 @@ out_unlock: | |||
427 | } | 427 | } |
428 | #endif | 428 | #endif |
429 | 429 | ||
430 | static inline unsigned run_filter(struct sk_buff *skb, struct sock *sk, unsigned res) | 430 | static inline int run_filter(struct sk_buff *skb, struct sock *sk, |
431 | unsigned *snaplen) | ||
431 | { | 432 | { |
432 | struct sk_filter *filter; | 433 | struct sk_filter *filter; |
434 | int err = 0; | ||
433 | 435 | ||
434 | bh_lock_sock(sk); | 436 | rcu_read_lock_bh(); |
435 | filter = sk->sk_filter; | 437 | filter = rcu_dereference(sk->sk_filter); |
436 | /* | 438 | if (filter != NULL) { |
437 | * Our caller already checked that filter != NULL but we need to | 439 | err = sk_run_filter(skb, filter->insns, filter->len); |
438 | * verify that under bh_lock_sock() to be safe | 440 | if (!err) |
439 | */ | 441 | err = -EPERM; |
440 | if (likely(filter != NULL)) | 442 | else if (*snaplen > err) |
441 | res = sk_run_filter(skb, filter->insns, filter->len); | 443 | *snaplen = err; |
442 | bh_unlock_sock(sk); | 444 | } |
445 | rcu_read_unlock_bh(); | ||
443 | 446 | ||
444 | return res; | 447 | return err; |
445 | } | 448 | } |
446 | 449 | ||
447 | /* | 450 | /* |
@@ -491,13 +494,8 @@ static int packet_rcv(struct sk_buff *skb, struct net_device *dev, struct packet | |||
491 | 494 | ||
492 | snaplen = skb->len; | 495 | snaplen = skb->len; |
493 | 496 | ||
494 | if (sk->sk_filter) { | 497 | if (run_filter(skb, sk, &snaplen) < 0) |
495 | unsigned res = run_filter(skb, sk, snaplen); | 498 | goto drop_n_restore; |
496 | if (res == 0) | ||
497 | goto drop_n_restore; | ||
498 | if (snaplen > res) | ||
499 | snaplen = res; | ||
500 | } | ||
501 | 499 | ||
502 | if (atomic_read(&sk->sk_rmem_alloc) + skb->truesize >= | 500 | if (atomic_read(&sk->sk_rmem_alloc) + skb->truesize >= |
503 | (unsigned)sk->sk_rcvbuf) | 501 | (unsigned)sk->sk_rcvbuf) |
@@ -586,20 +584,15 @@ static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev, struct packe | |||
586 | else if (skb->pkt_type == PACKET_OUTGOING) { | 584 | else if (skb->pkt_type == PACKET_OUTGOING) { |
587 | /* Special case: outgoing packets have ll header at head */ | 585 | /* Special case: outgoing packets have ll header at head */ |
588 | skb_pull(skb, skb->nh.raw - skb->data); | 586 | skb_pull(skb, skb->nh.raw - skb->data); |
589 | if (skb->ip_summed == CHECKSUM_HW) | 587 | if (skb->ip_summed == CHECKSUM_PARTIAL) |
590 | status |= TP_STATUS_CSUMNOTREADY; | 588 | status |= TP_STATUS_CSUMNOTREADY; |
591 | } | 589 | } |
592 | } | 590 | } |
593 | 591 | ||
594 | snaplen = skb->len; | 592 | snaplen = skb->len; |
595 | 593 | ||
596 | if (sk->sk_filter) { | 594 | if (run_filter(skb, sk, &snaplen) < 0) |
597 | unsigned res = run_filter(skb, sk, snaplen); | 595 | goto drop_n_restore; |
598 | if (res == 0) | ||
599 | goto drop_n_restore; | ||
600 | if (snaplen > res) | ||
601 | snaplen = res; | ||
602 | } | ||
603 | 596 | ||
604 | if (sk->sk_type == SOCK_DGRAM) { | 597 | if (sk->sk_type == SOCK_DGRAM) { |
605 | macoff = netoff = TPACKET_ALIGN(TPACKET_HDRLEN) + 16; | 598 | macoff = netoff = TPACKET_ALIGN(TPACKET_HDRLEN) + 16; |
diff --git a/net/sched/act_api.c b/net/sched/act_api.c index a2587b52e531..835070e9169c 100644 --- a/net/sched/act_api.c +++ b/net/sched/act_api.c | |||
@@ -33,16 +33,230 @@ | |||
33 | #include <net/sch_generic.h> | 33 | #include <net/sch_generic.h> |
34 | #include <net/act_api.h> | 34 | #include <net/act_api.h> |
35 | 35 | ||
36 | #if 0 /* control */ | 36 | void tcf_hash_destroy(struct tcf_common *p, struct tcf_hashinfo *hinfo) |
37 | #define DPRINTK(format, args...) printk(KERN_DEBUG format, ##args) | 37 | { |
38 | #else | 38 | unsigned int h = tcf_hash(p->tcfc_index, hinfo->hmask); |
39 | #define DPRINTK(format, args...) | 39 | struct tcf_common **p1p; |
40 | |||
41 | for (p1p = &hinfo->htab[h]; *p1p; p1p = &(*p1p)->tcfc_next) { | ||
42 | if (*p1p == p) { | ||
43 | write_lock_bh(hinfo->lock); | ||
44 | *p1p = p->tcfc_next; | ||
45 | write_unlock_bh(hinfo->lock); | ||
46 | #ifdef CONFIG_NET_ESTIMATOR | ||
47 | gen_kill_estimator(&p->tcfc_bstats, | ||
48 | &p->tcfc_rate_est); | ||
40 | #endif | 49 | #endif |
41 | #if 0 /* data */ | 50 | kfree(p); |
42 | #define D2PRINTK(format, args...) printk(KERN_DEBUG format, ##args) | 51 | return; |
43 | #else | 52 | } |
44 | #define D2PRINTK(format, args...) | 53 | } |
54 | BUG_TRAP(0); | ||
55 | } | ||
56 | EXPORT_SYMBOL(tcf_hash_destroy); | ||
57 | |||
58 | int tcf_hash_release(struct tcf_common *p, int bind, | ||
59 | struct tcf_hashinfo *hinfo) | ||
60 | { | ||
61 | int ret = 0; | ||
62 | |||
63 | if (p) { | ||
64 | if (bind) | ||
65 | p->tcfc_bindcnt--; | ||
66 | |||
67 | p->tcfc_refcnt--; | ||
68 | if (p->tcfc_bindcnt <= 0 && p->tcfc_refcnt <= 0) { | ||
69 | tcf_hash_destroy(p, hinfo); | ||
70 | ret = 1; | ||
71 | } | ||
72 | } | ||
73 | return ret; | ||
74 | } | ||
75 | EXPORT_SYMBOL(tcf_hash_release); | ||
76 | |||
77 | static int tcf_dump_walker(struct sk_buff *skb, struct netlink_callback *cb, | ||
78 | struct tc_action *a, struct tcf_hashinfo *hinfo) | ||
79 | { | ||
80 | struct tcf_common *p; | ||
81 | int err = 0, index = -1,i = 0, s_i = 0, n_i = 0; | ||
82 | struct rtattr *r ; | ||
83 | |||
84 | read_lock(hinfo->lock); | ||
85 | |||
86 | s_i = cb->args[0]; | ||
87 | |||
88 | for (i = 0; i < (hinfo->hmask + 1); i++) { | ||
89 | p = hinfo->htab[tcf_hash(i, hinfo->hmask)]; | ||
90 | |||
91 | for (; p; p = p->tcfc_next) { | ||
92 | index++; | ||
93 | if (index < s_i) | ||
94 | continue; | ||
95 | a->priv = p; | ||
96 | a->order = n_i; | ||
97 | r = (struct rtattr*) skb->tail; | ||
98 | RTA_PUT(skb, a->order, 0, NULL); | ||
99 | err = tcf_action_dump_1(skb, a, 0, 0); | ||
100 | if (err < 0) { | ||
101 | index--; | ||
102 | skb_trim(skb, (u8*)r - skb->data); | ||
103 | goto done; | ||
104 | } | ||
105 | r->rta_len = skb->tail - (u8*)r; | ||
106 | n_i++; | ||
107 | if (n_i >= TCA_ACT_MAX_PRIO) | ||
108 | goto done; | ||
109 | } | ||
110 | } | ||
111 | done: | ||
112 | read_unlock(hinfo->lock); | ||
113 | if (n_i) | ||
114 | cb->args[0] += n_i; | ||
115 | return n_i; | ||
116 | |||
117 | rtattr_failure: | ||
118 | skb_trim(skb, (u8*)r - skb->data); | ||
119 | goto done; | ||
120 | } | ||
121 | |||
122 | static int tcf_del_walker(struct sk_buff *skb, struct tc_action *a, | ||
123 | struct tcf_hashinfo *hinfo) | ||
124 | { | ||
125 | struct tcf_common *p, *s_p; | ||
126 | struct rtattr *r ; | ||
127 | int i= 0, n_i = 0; | ||
128 | |||
129 | r = (struct rtattr*) skb->tail; | ||
130 | RTA_PUT(skb, a->order, 0, NULL); | ||
131 | RTA_PUT(skb, TCA_KIND, IFNAMSIZ, a->ops->kind); | ||
132 | for (i = 0; i < (hinfo->hmask + 1); i++) { | ||
133 | p = hinfo->htab[tcf_hash(i, hinfo->hmask)]; | ||
134 | |||
135 | while (p != NULL) { | ||
136 | s_p = p->tcfc_next; | ||
137 | if (ACT_P_DELETED == tcf_hash_release(p, 0, hinfo)) | ||
138 | module_put(a->ops->owner); | ||
139 | n_i++; | ||
140 | p = s_p; | ||
141 | } | ||
142 | } | ||
143 | RTA_PUT(skb, TCA_FCNT, 4, &n_i); | ||
144 | r->rta_len = skb->tail - (u8*)r; | ||
145 | |||
146 | return n_i; | ||
147 | rtattr_failure: | ||
148 | skb_trim(skb, (u8*)r - skb->data); | ||
149 | return -EINVAL; | ||
150 | } | ||
151 | |||
152 | int tcf_generic_walker(struct sk_buff *skb, struct netlink_callback *cb, | ||
153 | int type, struct tc_action *a) | ||
154 | { | ||
155 | struct tcf_hashinfo *hinfo = a->ops->hinfo; | ||
156 | |||
157 | if (type == RTM_DELACTION) { | ||
158 | return tcf_del_walker(skb, a, hinfo); | ||
159 | } else if (type == RTM_GETACTION) { | ||
160 | return tcf_dump_walker(skb, cb, a, hinfo); | ||
161 | } else { | ||
162 | printk("tcf_generic_walker: unknown action %d\n", type); | ||
163 | return -EINVAL; | ||
164 | } | ||
165 | } | ||
166 | EXPORT_SYMBOL(tcf_generic_walker); | ||
167 | |||
168 | struct tcf_common *tcf_hash_lookup(u32 index, struct tcf_hashinfo *hinfo) | ||
169 | { | ||
170 | struct tcf_common *p; | ||
171 | |||
172 | read_lock(hinfo->lock); | ||
173 | for (p = hinfo->htab[tcf_hash(index, hinfo->hmask)]; p; | ||
174 | p = p->tcfc_next) { | ||
175 | if (p->tcfc_index == index) | ||
176 | break; | ||
177 | } | ||
178 | read_unlock(hinfo->lock); | ||
179 | |||
180 | return p; | ||
181 | } | ||
182 | EXPORT_SYMBOL(tcf_hash_lookup); | ||
183 | |||
184 | u32 tcf_hash_new_index(u32 *idx_gen, struct tcf_hashinfo *hinfo) | ||
185 | { | ||
186 | u32 val = *idx_gen; | ||
187 | |||
188 | do { | ||
189 | if (++val == 0) | ||
190 | val = 1; | ||
191 | } while (tcf_hash_lookup(val, hinfo)); | ||
192 | |||
193 | return (*idx_gen = val); | ||
194 | } | ||
195 | EXPORT_SYMBOL(tcf_hash_new_index); | ||
196 | |||
197 | int tcf_hash_search(struct tc_action *a, u32 index) | ||
198 | { | ||
199 | struct tcf_hashinfo *hinfo = a->ops->hinfo; | ||
200 | struct tcf_common *p = tcf_hash_lookup(index, hinfo); | ||
201 | |||
202 | if (p) { | ||
203 | a->priv = p; | ||
204 | return 1; | ||
205 | } | ||
206 | return 0; | ||
207 | } | ||
208 | EXPORT_SYMBOL(tcf_hash_search); | ||
209 | |||
210 | struct tcf_common *tcf_hash_check(u32 index, struct tc_action *a, int bind, | ||
211 | struct tcf_hashinfo *hinfo) | ||
212 | { | ||
213 | struct tcf_common *p = NULL; | ||
214 | if (index && (p = tcf_hash_lookup(index, hinfo)) != NULL) { | ||
215 | if (bind) { | ||
216 | p->tcfc_bindcnt++; | ||
217 | p->tcfc_refcnt++; | ||
218 | } | ||
219 | a->priv = p; | ||
220 | } | ||
221 | return p; | ||
222 | } | ||
223 | EXPORT_SYMBOL(tcf_hash_check); | ||
224 | |||
225 | struct tcf_common *tcf_hash_create(u32 index, struct rtattr *est, struct tc_action *a, int size, int bind, u32 *idx_gen, struct tcf_hashinfo *hinfo) | ||
226 | { | ||
227 | struct tcf_common *p = kzalloc(size, GFP_KERNEL); | ||
228 | |||
229 | if (unlikely(!p)) | ||
230 | return p; | ||
231 | p->tcfc_refcnt = 1; | ||
232 | if (bind) | ||
233 | p->tcfc_bindcnt = 1; | ||
234 | |||
235 | spin_lock_init(&p->tcfc_lock); | ||
236 | p->tcfc_stats_lock = &p->tcfc_lock; | ||
237 | p->tcfc_index = index ? index : tcf_hash_new_index(idx_gen, hinfo); | ||
238 | p->tcfc_tm.install = jiffies; | ||
239 | p->tcfc_tm.lastuse = jiffies; | ||
240 | #ifdef CONFIG_NET_ESTIMATOR | ||
241 | if (est) | ||
242 | gen_new_estimator(&p->tcfc_bstats, &p->tcfc_rate_est, | ||
243 | p->tcfc_stats_lock, est); | ||
45 | #endif | 244 | #endif |
245 | a->priv = (void *) p; | ||
246 | return p; | ||
247 | } | ||
248 | EXPORT_SYMBOL(tcf_hash_create); | ||
249 | |||
250 | void tcf_hash_insert(struct tcf_common *p, struct tcf_hashinfo *hinfo) | ||
251 | { | ||
252 | unsigned int h = tcf_hash(p->tcfc_index, hinfo->hmask); | ||
253 | |||
254 | write_lock_bh(hinfo->lock); | ||
255 | p->tcfc_next = hinfo->htab[h]; | ||
256 | hinfo->htab[h] = p; | ||
257 | write_unlock_bh(hinfo->lock); | ||
258 | } | ||
259 | EXPORT_SYMBOL(tcf_hash_insert); | ||
46 | 260 | ||
47 | static struct tc_action_ops *act_base = NULL; | 261 | static struct tc_action_ops *act_base = NULL; |
48 | static DEFINE_RWLOCK(act_mod_lock); | 262 | static DEFINE_RWLOCK(act_mod_lock); |
@@ -155,9 +369,6 @@ int tcf_action_exec(struct sk_buff *skb, struct tc_action *act, | |||
155 | 369 | ||
156 | if (skb->tc_verd & TC_NCLS) { | 370 | if (skb->tc_verd & TC_NCLS) { |
157 | skb->tc_verd = CLR_TC_NCLS(skb->tc_verd); | 371 | skb->tc_verd = CLR_TC_NCLS(skb->tc_verd); |
158 | D2PRINTK("(%p)tcf_action_exec: cleared TC_NCLS in %s out %s\n", | ||
159 | skb, skb->input_dev ? skb->input_dev->name : "xxx", | ||
160 | skb->dev->name); | ||
161 | ret = TC_ACT_OK; | 372 | ret = TC_ACT_OK; |
162 | goto exec_done; | 373 | goto exec_done; |
163 | } | 374 | } |
@@ -187,8 +398,6 @@ void tcf_action_destroy(struct tc_action *act, int bind) | |||
187 | 398 | ||
188 | for (a = act; a; a = act) { | 399 | for (a = act; a; a = act) { |
189 | if (a->ops && a->ops->cleanup) { | 400 | if (a->ops && a->ops->cleanup) { |
190 | DPRINTK("tcf_action_destroy destroying %p next %p\n", | ||
191 | a, a->next); | ||
192 | if (a->ops->cleanup(a, bind) == ACT_P_DELETED) | 401 | if (a->ops->cleanup(a, bind) == ACT_P_DELETED) |
193 | module_put(a->ops->owner); | 402 | module_put(a->ops->owner); |
194 | act = act->next; | 403 | act = act->next; |
@@ -331,7 +540,6 @@ struct tc_action *tcf_action_init_1(struct rtattr *rta, struct rtattr *est, | |||
331 | if (*err != ACT_P_CREATED) | 540 | if (*err != ACT_P_CREATED) |
332 | module_put(a_o->owner); | 541 | module_put(a_o->owner); |
333 | a->ops = a_o; | 542 | a->ops = a_o; |
334 | DPRINTK("tcf_action_init_1: successfull %s\n", act_name); | ||
335 | 543 | ||
336 | *err = 0; | 544 | *err = 0; |
337 | return a; | 545 | return a; |
@@ -392,12 +600,12 @@ int tcf_action_copy_stats(struct sk_buff *skb, struct tc_action *a, | |||
392 | if (compat_mode) { | 600 | if (compat_mode) { |
393 | if (a->type == TCA_OLD_COMPAT) | 601 | if (a->type == TCA_OLD_COMPAT) |
394 | err = gnet_stats_start_copy_compat(skb, 0, | 602 | err = gnet_stats_start_copy_compat(skb, 0, |
395 | TCA_STATS, TCA_XSTATS, h->stats_lock, &d); | 603 | TCA_STATS, TCA_XSTATS, h->tcf_stats_lock, &d); |
396 | else | 604 | else |
397 | return 0; | 605 | return 0; |
398 | } else | 606 | } else |
399 | err = gnet_stats_start_copy(skb, TCA_ACT_STATS, | 607 | err = gnet_stats_start_copy(skb, TCA_ACT_STATS, |
400 | h->stats_lock, &d); | 608 | h->tcf_stats_lock, &d); |
401 | 609 | ||
402 | if (err < 0) | 610 | if (err < 0) |
403 | goto errout; | 611 | goto errout; |
@@ -406,11 +614,11 @@ int tcf_action_copy_stats(struct sk_buff *skb, struct tc_action *a, | |||
406 | if (a->ops->get_stats(skb, a) < 0) | 614 | if (a->ops->get_stats(skb, a) < 0) |
407 | goto errout; | 615 | goto errout; |
408 | 616 | ||
409 | if (gnet_stats_copy_basic(&d, &h->bstats) < 0 || | 617 | if (gnet_stats_copy_basic(&d, &h->tcf_bstats) < 0 || |
410 | #ifdef CONFIG_NET_ESTIMATOR | 618 | #ifdef CONFIG_NET_ESTIMATOR |
411 | gnet_stats_copy_rate_est(&d, &h->rate_est) < 0 || | 619 | gnet_stats_copy_rate_est(&d, &h->tcf_rate_est) < 0 || |
412 | #endif | 620 | #endif |
413 | gnet_stats_copy_queue(&d, &h->qstats) < 0) | 621 | gnet_stats_copy_queue(&d, &h->tcf_qstats) < 0) |
414 | goto errout; | 622 | goto errout; |
415 | 623 | ||
416 | if (gnet_stats_finish_copy(&d) < 0) | 624 | if (gnet_stats_finish_copy(&d) < 0) |
@@ -459,7 +667,6 @@ static int | |||
459 | act_get_notify(u32 pid, struct nlmsghdr *n, struct tc_action *a, int event) | 667 | act_get_notify(u32 pid, struct nlmsghdr *n, struct tc_action *a, int event) |
460 | { | 668 | { |
461 | struct sk_buff *skb; | 669 | struct sk_buff *skb; |
462 | int err = 0; | ||
463 | 670 | ||
464 | skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL); | 671 | skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL); |
465 | if (!skb) | 672 | if (!skb) |
@@ -468,10 +675,8 @@ act_get_notify(u32 pid, struct nlmsghdr *n, struct tc_action *a, int event) | |||
468 | kfree_skb(skb); | 675 | kfree_skb(skb); |
469 | return -EINVAL; | 676 | return -EINVAL; |
470 | } | 677 | } |
471 | err = netlink_unicast(rtnl, skb, pid, MSG_DONTWAIT); | 678 | |
472 | if (err > 0) | 679 | return rtnl_unicast(skb, pid); |
473 | err = 0; | ||
474 | return err; | ||
475 | } | 680 | } |
476 | 681 | ||
477 | static struct tc_action * | 682 | static struct tc_action * |
diff --git a/net/sched/act_gact.c b/net/sched/act_gact.c index e75a147ad60f..6cff56696a81 100644 --- a/net/sched/act_gact.c +++ b/net/sched/act_gact.c | |||
@@ -34,48 +34,43 @@ | |||
34 | #include <linux/tc_act/tc_gact.h> | 34 | #include <linux/tc_act/tc_gact.h> |
35 | #include <net/tc_act/tc_gact.h> | 35 | #include <net/tc_act/tc_gact.h> |
36 | 36 | ||
37 | /* use generic hash table */ | 37 | #define GACT_TAB_MASK 15 |
38 | #define MY_TAB_SIZE 16 | 38 | static struct tcf_common *tcf_gact_ht[GACT_TAB_MASK + 1]; |
39 | #define MY_TAB_MASK 15 | 39 | static u32 gact_idx_gen; |
40 | |||
41 | static u32 idx_gen; | ||
42 | static struct tcf_gact *tcf_gact_ht[MY_TAB_SIZE]; | ||
43 | static DEFINE_RWLOCK(gact_lock); | 40 | static DEFINE_RWLOCK(gact_lock); |
44 | 41 | ||
45 | /* ovewrride the defaults */ | 42 | static struct tcf_hashinfo gact_hash_info = { |
46 | #define tcf_st tcf_gact | 43 | .htab = tcf_gact_ht, |
47 | #define tc_st tc_gact | 44 | .hmask = GACT_TAB_MASK, |
48 | #define tcf_t_lock gact_lock | 45 | .lock = &gact_lock, |
49 | #define tcf_ht tcf_gact_ht | 46 | }; |
50 | |||
51 | #define CONFIG_NET_ACT_INIT 1 | ||
52 | #include <net/pkt_act.h> | ||
53 | 47 | ||
54 | #ifdef CONFIG_GACT_PROB | 48 | #ifdef CONFIG_GACT_PROB |
55 | static int gact_net_rand(struct tcf_gact *p) | 49 | static int gact_net_rand(struct tcf_gact *gact) |
56 | { | 50 | { |
57 | if (net_random()%p->pval) | 51 | if (net_random() % gact->tcfg_pval) |
58 | return p->action; | 52 | return gact->tcf_action; |
59 | return p->paction; | 53 | return gact->tcfg_paction; |
60 | } | 54 | } |
61 | 55 | ||
62 | static int gact_determ(struct tcf_gact *p) | 56 | static int gact_determ(struct tcf_gact *gact) |
63 | { | 57 | { |
64 | if (p->bstats.packets%p->pval) | 58 | if (gact->tcf_bstats.packets % gact->tcfg_pval) |
65 | return p->action; | 59 | return gact->tcf_action; |
66 | return p->paction; | 60 | return gact->tcfg_paction; |
67 | } | 61 | } |
68 | 62 | ||
69 | typedef int (*g_rand)(struct tcf_gact *p); | 63 | typedef int (*g_rand)(struct tcf_gact *gact); |
70 | static g_rand gact_rand[MAX_RAND]= { NULL, gact_net_rand, gact_determ }; | 64 | static g_rand gact_rand[MAX_RAND]= { NULL, gact_net_rand, gact_determ }; |
71 | #endif | 65 | #endif /* CONFIG_GACT_PROB */ |
72 | 66 | ||
73 | static int tcf_gact_init(struct rtattr *rta, struct rtattr *est, | 67 | static int tcf_gact_init(struct rtattr *rta, struct rtattr *est, |
74 | struct tc_action *a, int ovr, int bind) | 68 | struct tc_action *a, int ovr, int bind) |
75 | { | 69 | { |
76 | struct rtattr *tb[TCA_GACT_MAX]; | 70 | struct rtattr *tb[TCA_GACT_MAX]; |
77 | struct tc_gact *parm; | 71 | struct tc_gact *parm; |
78 | struct tcf_gact *p; | 72 | struct tcf_gact *gact; |
73 | struct tcf_common *pc; | ||
79 | int ret = 0; | 74 | int ret = 0; |
80 | 75 | ||
81 | if (rta == NULL || rtattr_parse_nested(tb, TCA_GACT_MAX, rta) < 0) | 76 | if (rta == NULL || rtattr_parse_nested(tb, TCA_GACT_MAX, rta) < 0) |
@@ -94,105 +89,106 @@ static int tcf_gact_init(struct rtattr *rta, struct rtattr *est, | |||
94 | return -EOPNOTSUPP; | 89 | return -EOPNOTSUPP; |
95 | #endif | 90 | #endif |
96 | 91 | ||
97 | p = tcf_hash_check(parm->index, a, ovr, bind); | 92 | pc = tcf_hash_check(parm->index, a, bind, &gact_hash_info); |
98 | if (p == NULL) { | 93 | if (!pc) { |
99 | p = tcf_hash_create(parm->index, est, a, sizeof(*p), ovr, bind); | 94 | pc = tcf_hash_create(parm->index, est, a, sizeof(*gact), |
100 | if (p == NULL) | 95 | bind, &gact_idx_gen, &gact_hash_info); |
96 | if (unlikely(!pc)) | ||
101 | return -ENOMEM; | 97 | return -ENOMEM; |
102 | ret = ACT_P_CREATED; | 98 | ret = ACT_P_CREATED; |
103 | } else { | 99 | } else { |
104 | if (!ovr) { | 100 | if (!ovr) { |
105 | tcf_hash_release(p, bind); | 101 | tcf_hash_release(pc, bind, &gact_hash_info); |
106 | return -EEXIST; | 102 | return -EEXIST; |
107 | } | 103 | } |
108 | } | 104 | } |
109 | 105 | ||
110 | spin_lock_bh(&p->lock); | 106 | gact = to_gact(pc); |
111 | p->action = parm->action; | 107 | |
108 | spin_lock_bh(&gact->tcf_lock); | ||
109 | gact->tcf_action = parm->action; | ||
112 | #ifdef CONFIG_GACT_PROB | 110 | #ifdef CONFIG_GACT_PROB |
113 | if (tb[TCA_GACT_PROB-1] != NULL) { | 111 | if (tb[TCA_GACT_PROB-1] != NULL) { |
114 | struct tc_gact_p *p_parm = RTA_DATA(tb[TCA_GACT_PROB-1]); | 112 | struct tc_gact_p *p_parm = RTA_DATA(tb[TCA_GACT_PROB-1]); |
115 | p->paction = p_parm->paction; | 113 | gact->tcfg_paction = p_parm->paction; |
116 | p->pval = p_parm->pval; | 114 | gact->tcfg_pval = p_parm->pval; |
117 | p->ptype = p_parm->ptype; | 115 | gact->tcfg_ptype = p_parm->ptype; |
118 | } | 116 | } |
119 | #endif | 117 | #endif |
120 | spin_unlock_bh(&p->lock); | 118 | spin_unlock_bh(&gact->tcf_lock); |
121 | if (ret == ACT_P_CREATED) | 119 | if (ret == ACT_P_CREATED) |
122 | tcf_hash_insert(p); | 120 | tcf_hash_insert(pc, &gact_hash_info); |
123 | return ret; | 121 | return ret; |
124 | } | 122 | } |
125 | 123 | ||
126 | static int | 124 | static int tcf_gact_cleanup(struct tc_action *a, int bind) |
127 | tcf_gact_cleanup(struct tc_action *a, int bind) | ||
128 | { | 125 | { |
129 | struct tcf_gact *p = PRIV(a, gact); | 126 | struct tcf_gact *gact = a->priv; |
130 | 127 | ||
131 | if (p != NULL) | 128 | if (gact) |
132 | return tcf_hash_release(p, bind); | 129 | return tcf_hash_release(&gact->common, bind, &gact_hash_info); |
133 | return 0; | 130 | return 0; |
134 | } | 131 | } |
135 | 132 | ||
136 | static int | 133 | static int tcf_gact(struct sk_buff *skb, struct tc_action *a, struct tcf_result *res) |
137 | tcf_gact(struct sk_buff *skb, struct tc_action *a, struct tcf_result *res) | ||
138 | { | 134 | { |
139 | struct tcf_gact *p = PRIV(a, gact); | 135 | struct tcf_gact *gact = a->priv; |
140 | int action = TC_ACT_SHOT; | 136 | int action = TC_ACT_SHOT; |
141 | 137 | ||
142 | spin_lock(&p->lock); | 138 | spin_lock(&gact->tcf_lock); |
143 | #ifdef CONFIG_GACT_PROB | 139 | #ifdef CONFIG_GACT_PROB |
144 | if (p->ptype && gact_rand[p->ptype] != NULL) | 140 | if (gact->tcfg_ptype && gact_rand[gact->tcfg_ptype] != NULL) |
145 | action = gact_rand[p->ptype](p); | 141 | action = gact_rand[gact->tcfg_ptype](gact); |
146 | else | 142 | else |
147 | action = p->action; | 143 | action = gact->tcf_action; |
148 | #else | 144 | #else |
149 | action = p->action; | 145 | action = gact->tcf_action; |
150 | #endif | 146 | #endif |
151 | p->bstats.bytes += skb->len; | 147 | gact->tcf_bstats.bytes += skb->len; |
152 | p->bstats.packets++; | 148 | gact->tcf_bstats.packets++; |
153 | if (action == TC_ACT_SHOT) | 149 | if (action == TC_ACT_SHOT) |
154 | p->qstats.drops++; | 150 | gact->tcf_qstats.drops++; |
155 | p->tm.lastuse = jiffies; | 151 | gact->tcf_tm.lastuse = jiffies; |
156 | spin_unlock(&p->lock); | 152 | spin_unlock(&gact->tcf_lock); |
157 | 153 | ||
158 | return action; | 154 | return action; |
159 | } | 155 | } |
160 | 156 | ||
161 | static int | 157 | static int tcf_gact_dump(struct sk_buff *skb, struct tc_action *a, int bind, int ref) |
162 | tcf_gact_dump(struct sk_buff *skb, struct tc_action *a, int bind, int ref) | ||
163 | { | 158 | { |
164 | unsigned char *b = skb->tail; | 159 | unsigned char *b = skb->tail; |
165 | struct tc_gact opt; | 160 | struct tc_gact opt; |
166 | struct tcf_gact *p = PRIV(a, gact); | 161 | struct tcf_gact *gact = a->priv; |
167 | struct tcf_t t; | 162 | struct tcf_t t; |
168 | 163 | ||
169 | opt.index = p->index; | 164 | opt.index = gact->tcf_index; |
170 | opt.refcnt = p->refcnt - ref; | 165 | opt.refcnt = gact->tcf_refcnt - ref; |
171 | opt.bindcnt = p->bindcnt - bind; | 166 | opt.bindcnt = gact->tcf_bindcnt - bind; |
172 | opt.action = p->action; | 167 | opt.action = gact->tcf_action; |
173 | RTA_PUT(skb, TCA_GACT_PARMS, sizeof(opt), &opt); | 168 | RTA_PUT(skb, TCA_GACT_PARMS, sizeof(opt), &opt); |
174 | #ifdef CONFIG_GACT_PROB | 169 | #ifdef CONFIG_GACT_PROB |
175 | if (p->ptype) { | 170 | if (gact->tcfg_ptype) { |
176 | struct tc_gact_p p_opt; | 171 | struct tc_gact_p p_opt; |
177 | p_opt.paction = p->paction; | 172 | p_opt.paction = gact->tcfg_paction; |
178 | p_opt.pval = p->pval; | 173 | p_opt.pval = gact->tcfg_pval; |
179 | p_opt.ptype = p->ptype; | 174 | p_opt.ptype = gact->tcfg_ptype; |
180 | RTA_PUT(skb, TCA_GACT_PROB, sizeof(p_opt), &p_opt); | 175 | RTA_PUT(skb, TCA_GACT_PROB, sizeof(p_opt), &p_opt); |
181 | } | 176 | } |
182 | #endif | 177 | #endif |
183 | t.install = jiffies_to_clock_t(jiffies - p->tm.install); | 178 | t.install = jiffies_to_clock_t(jiffies - gact->tcf_tm.install); |
184 | t.lastuse = jiffies_to_clock_t(jiffies - p->tm.lastuse); | 179 | t.lastuse = jiffies_to_clock_t(jiffies - gact->tcf_tm.lastuse); |
185 | t.expires = jiffies_to_clock_t(p->tm.expires); | 180 | t.expires = jiffies_to_clock_t(gact->tcf_tm.expires); |
186 | RTA_PUT(skb, TCA_GACT_TM, sizeof(t), &t); | 181 | RTA_PUT(skb, TCA_GACT_TM, sizeof(t), &t); |
187 | return skb->len; | 182 | return skb->len; |
188 | 183 | ||
189 | rtattr_failure: | 184 | rtattr_failure: |
190 | skb_trim(skb, b - skb->data); | 185 | skb_trim(skb, b - skb->data); |
191 | return -1; | 186 | return -1; |
192 | } | 187 | } |
193 | 188 | ||
194 | static struct tc_action_ops act_gact_ops = { | 189 | static struct tc_action_ops act_gact_ops = { |
195 | .kind = "gact", | 190 | .kind = "gact", |
191 | .hinfo = &gact_hash_info, | ||
196 | .type = TCA_ACT_GACT, | 192 | .type = TCA_ACT_GACT, |
197 | .capab = TCA_CAP_NONE, | 193 | .capab = TCA_CAP_NONE, |
198 | .owner = THIS_MODULE, | 194 | .owner = THIS_MODULE, |
@@ -208,8 +204,7 @@ MODULE_AUTHOR("Jamal Hadi Salim(2002-4)"); | |||
208 | MODULE_DESCRIPTION("Generic Classifier actions"); | 204 | MODULE_DESCRIPTION("Generic Classifier actions"); |
209 | MODULE_LICENSE("GPL"); | 205 | MODULE_LICENSE("GPL"); |
210 | 206 | ||
211 | static int __init | 207 | static int __init gact_init_module(void) |
212 | gact_init_module(void) | ||
213 | { | 208 | { |
214 | #ifdef CONFIG_GACT_PROB | 209 | #ifdef CONFIG_GACT_PROB |
215 | printk("GACT probability on\n"); | 210 | printk("GACT probability on\n"); |
@@ -219,8 +214,7 @@ gact_init_module(void) | |||
219 | return tcf_register_action(&act_gact_ops); | 214 | return tcf_register_action(&act_gact_ops); |
220 | } | 215 | } |
221 | 216 | ||
222 | static void __exit | 217 | static void __exit gact_cleanup_module(void) |
223 | gact_cleanup_module(void) | ||
224 | { | 218 | { |
225 | tcf_unregister_action(&act_gact_ops); | 219 | tcf_unregister_action(&act_gact_ops); |
226 | } | 220 | } |
diff --git a/net/sched/act_ipt.c b/net/sched/act_ipt.c index d799e01248c4..d8c9310da6e5 100644 --- a/net/sched/act_ipt.c +++ b/net/sched/act_ipt.c | |||
@@ -38,25 +38,19 @@ | |||
38 | 38 | ||
39 | #include <linux/netfilter_ipv4/ip_tables.h> | 39 | #include <linux/netfilter_ipv4/ip_tables.h> |
40 | 40 | ||
41 | /* use generic hash table */ | ||
42 | #define MY_TAB_SIZE 16 | ||
43 | #define MY_TAB_MASK 15 | ||
44 | 41 | ||
45 | static u32 idx_gen; | 42 | #define IPT_TAB_MASK 15 |
46 | static struct tcf_ipt *tcf_ipt_ht[MY_TAB_SIZE]; | 43 | static struct tcf_common *tcf_ipt_ht[IPT_TAB_MASK + 1]; |
47 | /* ipt hash table lock */ | 44 | static u32 ipt_idx_gen; |
48 | static DEFINE_RWLOCK(ipt_lock); | 45 | static DEFINE_RWLOCK(ipt_lock); |
49 | 46 | ||
50 | /* ovewrride the defaults */ | 47 | static struct tcf_hashinfo ipt_hash_info = { |
51 | #define tcf_st tcf_ipt | 48 | .htab = tcf_ipt_ht, |
52 | #define tcf_t_lock ipt_lock | 49 | .hmask = IPT_TAB_MASK, |
53 | #define tcf_ht tcf_ipt_ht | 50 | .lock = &ipt_lock, |
54 | 51 | }; | |
55 | #define CONFIG_NET_ACT_INIT | ||
56 | #include <net/pkt_act.h> | ||
57 | 52 | ||
58 | static int | 53 | static int ipt_init_target(struct ipt_entry_target *t, char *table, unsigned int hook) |
59 | ipt_init_target(struct ipt_entry_target *t, char *table, unsigned int hook) | ||
60 | { | 54 | { |
61 | struct ipt_target *target; | 55 | struct ipt_target *target; |
62 | int ret = 0; | 56 | int ret = 0; |
@@ -65,7 +59,6 @@ ipt_init_target(struct ipt_entry_target *t, char *table, unsigned int hook) | |||
65 | if (!target) | 59 | if (!target) |
66 | return -ENOENT; | 60 | return -ENOENT; |
67 | 61 | ||
68 | DPRINTK("ipt_init_target: found %s\n", target->name); | ||
69 | t->u.kernel.target = target; | 62 | t->u.kernel.target = target; |
70 | 63 | ||
71 | ret = xt_check_target(target, AF_INET, t->u.target_size - sizeof(*t), | 64 | ret = xt_check_target(target, AF_INET, t->u.target_size - sizeof(*t), |
@@ -76,10 +69,7 @@ ipt_init_target(struct ipt_entry_target *t, char *table, unsigned int hook) | |||
76 | if (t->u.kernel.target->checkentry | 69 | if (t->u.kernel.target->checkentry |
77 | && !t->u.kernel.target->checkentry(table, NULL, | 70 | && !t->u.kernel.target->checkentry(table, NULL, |
78 | t->u.kernel.target, t->data, | 71 | t->u.kernel.target, t->data, |
79 | t->u.target_size - sizeof(*t), | ||
80 | hook)) { | 72 | hook)) { |
81 | DPRINTK("ipt_init_target: check failed for `%s'.\n", | ||
82 | t->u.kernel.target->name); | ||
83 | module_put(t->u.kernel.target->me); | 73 | module_put(t->u.kernel.target->me); |
84 | ret = -EINVAL; | 74 | ret = -EINVAL; |
85 | } | 75 | } |
@@ -87,40 +77,37 @@ ipt_init_target(struct ipt_entry_target *t, char *table, unsigned int hook) | |||
87 | return ret; | 77 | return ret; |
88 | } | 78 | } |
89 | 79 | ||
90 | static void | 80 | static void ipt_destroy_target(struct ipt_entry_target *t) |
91 | ipt_destroy_target(struct ipt_entry_target *t) | ||
92 | { | 81 | { |
93 | if (t->u.kernel.target->destroy) | 82 | if (t->u.kernel.target->destroy) |
94 | t->u.kernel.target->destroy(t->u.kernel.target, t->data, | 83 | t->u.kernel.target->destroy(t->u.kernel.target, t->data); |
95 | t->u.target_size - sizeof(*t)); | ||
96 | module_put(t->u.kernel.target->me); | 84 | module_put(t->u.kernel.target->me); |
97 | } | 85 | } |
98 | 86 | ||
99 | static int | 87 | static int tcf_ipt_release(struct tcf_ipt *ipt, int bind) |
100 | tcf_ipt_release(struct tcf_ipt *p, int bind) | ||
101 | { | 88 | { |
102 | int ret = 0; | 89 | int ret = 0; |
103 | if (p) { | 90 | if (ipt) { |
104 | if (bind) | 91 | if (bind) |
105 | p->bindcnt--; | 92 | ipt->tcf_bindcnt--; |
106 | p->refcnt--; | 93 | ipt->tcf_refcnt--; |
107 | if (p->bindcnt <= 0 && p->refcnt <= 0) { | 94 | if (ipt->tcf_bindcnt <= 0 && ipt->tcf_refcnt <= 0) { |
108 | ipt_destroy_target(p->t); | 95 | ipt_destroy_target(ipt->tcfi_t); |
109 | kfree(p->tname); | 96 | kfree(ipt->tcfi_tname); |
110 | kfree(p->t); | 97 | kfree(ipt->tcfi_t); |
111 | tcf_hash_destroy(p); | 98 | tcf_hash_destroy(&ipt->common, &ipt_hash_info); |
112 | ret = ACT_P_DELETED; | 99 | ret = ACT_P_DELETED; |
113 | } | 100 | } |
114 | } | 101 | } |
115 | return ret; | 102 | return ret; |
116 | } | 103 | } |
117 | 104 | ||
118 | static int | 105 | static int tcf_ipt_init(struct rtattr *rta, struct rtattr *est, |
119 | tcf_ipt_init(struct rtattr *rta, struct rtattr *est, struct tc_action *a, | 106 | struct tc_action *a, int ovr, int bind) |
120 | int ovr, int bind) | ||
121 | { | 107 | { |
122 | struct rtattr *tb[TCA_IPT_MAX]; | 108 | struct rtattr *tb[TCA_IPT_MAX]; |
123 | struct tcf_ipt *p; | 109 | struct tcf_ipt *ipt; |
110 | struct tcf_common *pc; | ||
124 | struct ipt_entry_target *td, *t; | 111 | struct ipt_entry_target *td, *t; |
125 | char *tname; | 112 | char *tname; |
126 | int ret = 0, err; | 113 | int ret = 0, err; |
@@ -144,49 +131,51 @@ tcf_ipt_init(struct rtattr *rta, struct rtattr *est, struct tc_action *a, | |||
144 | RTA_PAYLOAD(tb[TCA_IPT_INDEX-1]) >= sizeof(u32)) | 131 | RTA_PAYLOAD(tb[TCA_IPT_INDEX-1]) >= sizeof(u32)) |
145 | index = *(u32 *)RTA_DATA(tb[TCA_IPT_INDEX-1]); | 132 | index = *(u32 *)RTA_DATA(tb[TCA_IPT_INDEX-1]); |
146 | 133 | ||
147 | p = tcf_hash_check(index, a, ovr, bind); | 134 | pc = tcf_hash_check(index, a, bind, &ipt_hash_info); |
148 | if (p == NULL) { | 135 | if (!pc) { |
149 | p = tcf_hash_create(index, est, a, sizeof(*p), ovr, bind); | 136 | pc = tcf_hash_create(index, est, a, sizeof(*ipt), bind, |
150 | if (p == NULL) | 137 | &ipt_idx_gen, &ipt_hash_info); |
138 | if (unlikely(!pc)) | ||
151 | return -ENOMEM; | 139 | return -ENOMEM; |
152 | ret = ACT_P_CREATED; | 140 | ret = ACT_P_CREATED; |
153 | } else { | 141 | } else { |
154 | if (!ovr) { | 142 | if (!ovr) { |
155 | tcf_ipt_release(p, bind); | 143 | tcf_ipt_release(to_ipt(pc), bind); |
156 | return -EEXIST; | 144 | return -EEXIST; |
157 | } | 145 | } |
158 | } | 146 | } |
147 | ipt = to_ipt(pc); | ||
159 | 148 | ||
160 | hook = *(u32 *)RTA_DATA(tb[TCA_IPT_HOOK-1]); | 149 | hook = *(u32 *)RTA_DATA(tb[TCA_IPT_HOOK-1]); |
161 | 150 | ||
162 | err = -ENOMEM; | 151 | err = -ENOMEM; |
163 | tname = kmalloc(IFNAMSIZ, GFP_KERNEL); | 152 | tname = kmalloc(IFNAMSIZ, GFP_KERNEL); |
164 | if (tname == NULL) | 153 | if (unlikely(!tname)) |
165 | goto err1; | 154 | goto err1; |
166 | if (tb[TCA_IPT_TABLE - 1] == NULL || | 155 | if (tb[TCA_IPT_TABLE - 1] == NULL || |
167 | rtattr_strlcpy(tname, tb[TCA_IPT_TABLE-1], IFNAMSIZ) >= IFNAMSIZ) | 156 | rtattr_strlcpy(tname, tb[TCA_IPT_TABLE-1], IFNAMSIZ) >= IFNAMSIZ) |
168 | strcpy(tname, "mangle"); | 157 | strcpy(tname, "mangle"); |
169 | 158 | ||
170 | t = kmalloc(td->u.target_size, GFP_KERNEL); | 159 | t = kmalloc(td->u.target_size, GFP_KERNEL); |
171 | if (t == NULL) | 160 | if (unlikely(!t)) |
172 | goto err2; | 161 | goto err2; |
173 | memcpy(t, td, td->u.target_size); | 162 | memcpy(t, td, td->u.target_size); |
174 | 163 | ||
175 | if ((err = ipt_init_target(t, tname, hook)) < 0) | 164 | if ((err = ipt_init_target(t, tname, hook)) < 0) |
176 | goto err3; | 165 | goto err3; |
177 | 166 | ||
178 | spin_lock_bh(&p->lock); | 167 | spin_lock_bh(&ipt->tcf_lock); |
179 | if (ret != ACT_P_CREATED) { | 168 | if (ret != ACT_P_CREATED) { |
180 | ipt_destroy_target(p->t); | 169 | ipt_destroy_target(ipt->tcfi_t); |
181 | kfree(p->tname); | 170 | kfree(ipt->tcfi_tname); |
182 | kfree(p->t); | 171 | kfree(ipt->tcfi_t); |
183 | } | 172 | } |
184 | p->tname = tname; | 173 | ipt->tcfi_tname = tname; |
185 | p->t = t; | 174 | ipt->tcfi_t = t; |
186 | p->hook = hook; | 175 | ipt->tcfi_hook = hook; |
187 | spin_unlock_bh(&p->lock); | 176 | spin_unlock_bh(&ipt->tcf_lock); |
188 | if (ret == ACT_P_CREATED) | 177 | if (ret == ACT_P_CREATED) |
189 | tcf_hash_insert(p); | 178 | tcf_hash_insert(pc, &ipt_hash_info); |
190 | return ret; | 179 | return ret; |
191 | 180 | ||
192 | err3: | 181 | err3: |
@@ -194,33 +183,32 @@ err3: | |||
194 | err2: | 183 | err2: |
195 | kfree(tname); | 184 | kfree(tname); |
196 | err1: | 185 | err1: |
197 | kfree(p); | 186 | kfree(pc); |
198 | return err; | 187 | return err; |
199 | } | 188 | } |
200 | 189 | ||
201 | static int | 190 | static int tcf_ipt_cleanup(struct tc_action *a, int bind) |
202 | tcf_ipt_cleanup(struct tc_action *a, int bind) | ||
203 | { | 191 | { |
204 | struct tcf_ipt *p = PRIV(a, ipt); | 192 | struct tcf_ipt *ipt = a->priv; |
205 | return tcf_ipt_release(p, bind); | 193 | return tcf_ipt_release(ipt, bind); |
206 | } | 194 | } |
207 | 195 | ||
208 | static int | 196 | static int tcf_ipt(struct sk_buff *skb, struct tc_action *a, |
209 | tcf_ipt(struct sk_buff *skb, struct tc_action *a, struct tcf_result *res) | 197 | struct tcf_result *res) |
210 | { | 198 | { |
211 | int ret = 0, result = 0; | 199 | int ret = 0, result = 0; |
212 | struct tcf_ipt *p = PRIV(a, ipt); | 200 | struct tcf_ipt *ipt = a->priv; |
213 | 201 | ||
214 | if (skb_cloned(skb)) { | 202 | if (skb_cloned(skb)) { |
215 | if (pskb_expand_head(skb, 0, 0, GFP_ATOMIC)) | 203 | if (pskb_expand_head(skb, 0, 0, GFP_ATOMIC)) |
216 | return TC_ACT_UNSPEC; | 204 | return TC_ACT_UNSPEC; |
217 | } | 205 | } |
218 | 206 | ||
219 | spin_lock(&p->lock); | 207 | spin_lock(&ipt->tcf_lock); |
220 | 208 | ||
221 | p->tm.lastuse = jiffies; | 209 | ipt->tcf_tm.lastuse = jiffies; |
222 | p->bstats.bytes += skb->len; | 210 | ipt->tcf_bstats.bytes += skb->len; |
223 | p->bstats.packets++; | 211 | ipt->tcf_bstats.packets++; |
224 | 212 | ||
225 | /* yes, we have to worry about both in and out dev | 213 | /* yes, we have to worry about both in and out dev |
226 | worry later - danger - this API seems to have changed | 214 | worry later - danger - this API seems to have changed |
@@ -229,16 +217,17 @@ tcf_ipt(struct sk_buff *skb, struct tc_action *a, struct tcf_result *res) | |||
229 | /* iptables targets take a double skb pointer in case the skb | 217 | /* iptables targets take a double skb pointer in case the skb |
230 | * needs to be replaced. We don't own the skb, so this must not | 218 | * needs to be replaced. We don't own the skb, so this must not |
231 | * happen. The pskb_expand_head above should make sure of this */ | 219 | * happen. The pskb_expand_head above should make sure of this */ |
232 | ret = p->t->u.kernel.target->target(&skb, skb->dev, NULL, p->hook, | 220 | ret = ipt->tcfi_t->u.kernel.target->target(&skb, skb->dev, NULL, |
233 | p->t->u.kernel.target, p->t->data, | 221 | ipt->tcfi_hook, |
234 | NULL); | 222 | ipt->tcfi_t->u.kernel.target, |
223 | ipt->tcfi_t->data); | ||
235 | switch (ret) { | 224 | switch (ret) { |
236 | case NF_ACCEPT: | 225 | case NF_ACCEPT: |
237 | result = TC_ACT_OK; | 226 | result = TC_ACT_OK; |
238 | break; | 227 | break; |
239 | case NF_DROP: | 228 | case NF_DROP: |
240 | result = TC_ACT_SHOT; | 229 | result = TC_ACT_SHOT; |
241 | p->qstats.drops++; | 230 | ipt->tcf_qstats.drops++; |
242 | break; | 231 | break; |
243 | case IPT_CONTINUE: | 232 | case IPT_CONTINUE: |
244 | result = TC_ACT_PIPE; | 233 | result = TC_ACT_PIPE; |
@@ -249,53 +238,46 @@ tcf_ipt(struct sk_buff *skb, struct tc_action *a, struct tcf_result *res) | |||
249 | result = TC_POLICE_OK; | 238 | result = TC_POLICE_OK; |
250 | break; | 239 | break; |
251 | } | 240 | } |
252 | spin_unlock(&p->lock); | 241 | spin_unlock(&ipt->tcf_lock); |
253 | return result; | 242 | return result; |
254 | 243 | ||
255 | } | 244 | } |
256 | 245 | ||
257 | static int | 246 | static int tcf_ipt_dump(struct sk_buff *skb, struct tc_action *a, int bind, int ref) |
258 | tcf_ipt_dump(struct sk_buff *skb, struct tc_action *a, int bind, int ref) | ||
259 | { | 247 | { |
248 | unsigned char *b = skb->tail; | ||
249 | struct tcf_ipt *ipt = a->priv; | ||
260 | struct ipt_entry_target *t; | 250 | struct ipt_entry_target *t; |
261 | struct tcf_t tm; | 251 | struct tcf_t tm; |
262 | struct tc_cnt c; | 252 | struct tc_cnt c; |
263 | unsigned char *b = skb->tail; | ||
264 | struct tcf_ipt *p = PRIV(a, ipt); | ||
265 | 253 | ||
266 | /* for simple targets kernel size == user size | 254 | /* for simple targets kernel size == user size |
267 | ** user name = target name | 255 | ** user name = target name |
268 | ** for foolproof you need to not assume this | 256 | ** for foolproof you need to not assume this |
269 | */ | 257 | */ |
270 | 258 | ||
271 | t = kmalloc(p->t->u.user.target_size, GFP_ATOMIC); | 259 | t = kmalloc(ipt->tcfi_t->u.user.target_size, GFP_ATOMIC); |
272 | if (t == NULL) | 260 | if (unlikely(!t)) |
273 | goto rtattr_failure; | 261 | goto rtattr_failure; |
274 | 262 | ||
275 | c.bindcnt = p->bindcnt - bind; | 263 | c.bindcnt = ipt->tcf_bindcnt - bind; |
276 | c.refcnt = p->refcnt - ref; | 264 | c.refcnt = ipt->tcf_refcnt - ref; |
277 | memcpy(t, p->t, p->t->u.user.target_size); | 265 | memcpy(t, ipt->tcfi_t, ipt->tcfi_t->u.user.target_size); |
278 | strcpy(t->u.user.name, p->t->u.kernel.target->name); | 266 | strcpy(t->u.user.name, ipt->tcfi_t->u.kernel.target->name); |
279 | 267 | ||
280 | DPRINTK("\ttcf_ipt_dump tablename %s length %d\n", p->tname, | 268 | RTA_PUT(skb, TCA_IPT_TARG, ipt->tcfi_t->u.user.target_size, t); |
281 | strlen(p->tname)); | 269 | RTA_PUT(skb, TCA_IPT_INDEX, 4, &ipt->tcf_index); |
282 | DPRINTK("\tdump target name %s size %d size user %d " | 270 | RTA_PUT(skb, TCA_IPT_HOOK, 4, &ipt->tcfi_hook); |
283 | "data[0] %x data[1] %x\n", p->t->u.kernel.target->name, | ||
284 | p->t->u.target_size, p->t->u.user.target_size, | ||
285 | p->t->data[0], p->t->data[1]); | ||
286 | RTA_PUT(skb, TCA_IPT_TARG, p->t->u.user.target_size, t); | ||
287 | RTA_PUT(skb, TCA_IPT_INDEX, 4, &p->index); | ||
288 | RTA_PUT(skb, TCA_IPT_HOOK, 4, &p->hook); | ||
289 | RTA_PUT(skb, TCA_IPT_CNT, sizeof(struct tc_cnt), &c); | 271 | RTA_PUT(skb, TCA_IPT_CNT, sizeof(struct tc_cnt), &c); |
290 | RTA_PUT(skb, TCA_IPT_TABLE, IFNAMSIZ, p->tname); | 272 | RTA_PUT(skb, TCA_IPT_TABLE, IFNAMSIZ, ipt->tcfi_tname); |
291 | tm.install = jiffies_to_clock_t(jiffies - p->tm.install); | 273 | tm.install = jiffies_to_clock_t(jiffies - ipt->tcf_tm.install); |
292 | tm.lastuse = jiffies_to_clock_t(jiffies - p->tm.lastuse); | 274 | tm.lastuse = jiffies_to_clock_t(jiffies - ipt->tcf_tm.lastuse); |
293 | tm.expires = jiffies_to_clock_t(p->tm.expires); | 275 | tm.expires = jiffies_to_clock_t(ipt->tcf_tm.expires); |
294 | RTA_PUT(skb, TCA_IPT_TM, sizeof (tm), &tm); | 276 | RTA_PUT(skb, TCA_IPT_TM, sizeof (tm), &tm); |
295 | kfree(t); | 277 | kfree(t); |
296 | return skb->len; | 278 | return skb->len; |
297 | 279 | ||
298 | rtattr_failure: | 280 | rtattr_failure: |
299 | skb_trim(skb, b - skb->data); | 281 | skb_trim(skb, b - skb->data); |
300 | kfree(t); | 282 | kfree(t); |
301 | return -1; | 283 | return -1; |
@@ -303,6 +285,7 @@ tcf_ipt_dump(struct sk_buff *skb, struct tc_action *a, int bind, int ref) | |||
303 | 285 | ||
304 | static struct tc_action_ops act_ipt_ops = { | 286 | static struct tc_action_ops act_ipt_ops = { |
305 | .kind = "ipt", | 287 | .kind = "ipt", |
288 | .hinfo = &ipt_hash_info, | ||
306 | .type = TCA_ACT_IPT, | 289 | .type = TCA_ACT_IPT, |
307 | .capab = TCA_CAP_NONE, | 290 | .capab = TCA_CAP_NONE, |
308 | .owner = THIS_MODULE, | 291 | .owner = THIS_MODULE, |
@@ -318,14 +301,12 @@ MODULE_AUTHOR("Jamal Hadi Salim(2002-4)"); | |||
318 | MODULE_DESCRIPTION("Iptables target actions"); | 301 | MODULE_DESCRIPTION("Iptables target actions"); |
319 | MODULE_LICENSE("GPL"); | 302 | MODULE_LICENSE("GPL"); |
320 | 303 | ||
321 | static int __init | 304 | static int __init ipt_init_module(void) |
322 | ipt_init_module(void) | ||
323 | { | 305 | { |
324 | return tcf_register_action(&act_ipt_ops); | 306 | return tcf_register_action(&act_ipt_ops); |
325 | } | 307 | } |
326 | 308 | ||
327 | static void __exit | 309 | static void __exit ipt_cleanup_module(void) |
328 | ipt_cleanup_module(void) | ||
329 | { | 310 | { |
330 | tcf_unregister_action(&act_ipt_ops); | 311 | tcf_unregister_action(&act_ipt_ops); |
331 | } | 312 | } |
diff --git a/net/sched/act_mirred.c b/net/sched/act_mirred.c index fc562047ecc5..483897271f15 100644 --- a/net/sched/act_mirred.c +++ b/net/sched/act_mirred.c | |||
@@ -39,46 +39,39 @@ | |||
39 | #include <linux/etherdevice.h> | 39 | #include <linux/etherdevice.h> |
40 | #include <linux/if_arp.h> | 40 | #include <linux/if_arp.h> |
41 | 41 | ||
42 | 42 | #define MIRRED_TAB_MASK 7 | |
43 | /* use generic hash table */ | 43 | static struct tcf_common *tcf_mirred_ht[MIRRED_TAB_MASK + 1]; |
44 | #define MY_TAB_SIZE 8 | 44 | static u32 mirred_idx_gen; |
45 | #define MY_TAB_MASK (MY_TAB_SIZE - 1) | ||
46 | static u32 idx_gen; | ||
47 | static struct tcf_mirred *tcf_mirred_ht[MY_TAB_SIZE]; | ||
48 | static DEFINE_RWLOCK(mirred_lock); | 45 | static DEFINE_RWLOCK(mirred_lock); |
49 | 46 | ||
50 | /* ovewrride the defaults */ | 47 | static struct tcf_hashinfo mirred_hash_info = { |
51 | #define tcf_st tcf_mirred | 48 | .htab = tcf_mirred_ht, |
52 | #define tc_st tc_mirred | 49 | .hmask = MIRRED_TAB_MASK, |
53 | #define tcf_t_lock mirred_lock | 50 | .lock = &mirred_lock, |
54 | #define tcf_ht tcf_mirred_ht | 51 | }; |
55 | |||
56 | #define CONFIG_NET_ACT_INIT 1 | ||
57 | #include <net/pkt_act.h> | ||
58 | 52 | ||
59 | static inline int | 53 | static inline int tcf_mirred_release(struct tcf_mirred *m, int bind) |
60 | tcf_mirred_release(struct tcf_mirred *p, int bind) | ||
61 | { | 54 | { |
62 | if (p) { | 55 | if (m) { |
63 | if (bind) | 56 | if (bind) |
64 | p->bindcnt--; | 57 | m->tcf_bindcnt--; |
65 | p->refcnt--; | 58 | m->tcf_refcnt--; |
66 | if(!p->bindcnt && p->refcnt <= 0) { | 59 | if(!m->tcf_bindcnt && m->tcf_refcnt <= 0) { |
67 | dev_put(p->dev); | 60 | dev_put(m->tcfm_dev); |
68 | tcf_hash_destroy(p); | 61 | tcf_hash_destroy(&m->common, &mirred_hash_info); |
69 | return 1; | 62 | return 1; |
70 | } | 63 | } |
71 | } | 64 | } |
72 | return 0; | 65 | return 0; |
73 | } | 66 | } |
74 | 67 | ||
75 | static int | 68 | static int tcf_mirred_init(struct rtattr *rta, struct rtattr *est, |
76 | tcf_mirred_init(struct rtattr *rta, struct rtattr *est, struct tc_action *a, | 69 | struct tc_action *a, int ovr, int bind) |
77 | int ovr, int bind) | ||
78 | { | 70 | { |
79 | struct rtattr *tb[TCA_MIRRED_MAX]; | 71 | struct rtattr *tb[TCA_MIRRED_MAX]; |
80 | struct tc_mirred *parm; | 72 | struct tc_mirred *parm; |
81 | struct tcf_mirred *p; | 73 | struct tcf_mirred *m; |
74 | struct tcf_common *pc; | ||
82 | struct net_device *dev = NULL; | 75 | struct net_device *dev = NULL; |
83 | int ret = 0; | 76 | int ret = 0; |
84 | int ok_push = 0; | 77 | int ok_push = 0; |
@@ -110,64 +103,62 @@ tcf_mirred_init(struct rtattr *rta, struct rtattr *est, struct tc_action *a, | |||
110 | } | 103 | } |
111 | } | 104 | } |
112 | 105 | ||
113 | p = tcf_hash_check(parm->index, a, ovr, bind); | 106 | pc = tcf_hash_check(parm->index, a, bind, &mirred_hash_info); |
114 | if (p == NULL) { | 107 | if (!pc) { |
115 | if (!parm->ifindex) | 108 | if (!parm->ifindex) |
116 | return -EINVAL; | 109 | return -EINVAL; |
117 | p = tcf_hash_create(parm->index, est, a, sizeof(*p), ovr, bind); | 110 | pc = tcf_hash_create(parm->index, est, a, sizeof(*m), bind, |
118 | if (p == NULL) | 111 | &mirred_idx_gen, &mirred_hash_info); |
112 | if (unlikely(!pc)) | ||
119 | return -ENOMEM; | 113 | return -ENOMEM; |
120 | ret = ACT_P_CREATED; | 114 | ret = ACT_P_CREATED; |
121 | } else { | 115 | } else { |
122 | if (!ovr) { | 116 | if (!ovr) { |
123 | tcf_mirred_release(p, bind); | 117 | tcf_mirred_release(to_mirred(pc), bind); |
124 | return -EEXIST; | 118 | return -EEXIST; |
125 | } | 119 | } |
126 | } | 120 | } |
121 | m = to_mirred(pc); | ||
127 | 122 | ||
128 | spin_lock_bh(&p->lock); | 123 | spin_lock_bh(&m->tcf_lock); |
129 | p->action = parm->action; | 124 | m->tcf_action = parm->action; |
130 | p->eaction = parm->eaction; | 125 | m->tcfm_eaction = parm->eaction; |
131 | if (parm->ifindex) { | 126 | if (parm->ifindex) { |
132 | p->ifindex = parm->ifindex; | 127 | m->tcfm_ifindex = parm->ifindex; |
133 | if (ret != ACT_P_CREATED) | 128 | if (ret != ACT_P_CREATED) |
134 | dev_put(p->dev); | 129 | dev_put(m->tcfm_dev); |
135 | p->dev = dev; | 130 | m->tcfm_dev = dev; |
136 | dev_hold(dev); | 131 | dev_hold(dev); |
137 | p->ok_push = ok_push; | 132 | m->tcfm_ok_push = ok_push; |
138 | } | 133 | } |
139 | spin_unlock_bh(&p->lock); | 134 | spin_unlock_bh(&m->tcf_lock); |
140 | if (ret == ACT_P_CREATED) | 135 | if (ret == ACT_P_CREATED) |
141 | tcf_hash_insert(p); | 136 | tcf_hash_insert(pc, &mirred_hash_info); |
142 | 137 | ||
143 | DPRINTK("tcf_mirred_init index %d action %d eaction %d device %s " | ||
144 | "ifindex %d\n", parm->index, parm->action, parm->eaction, | ||
145 | dev->name, parm->ifindex); | ||
146 | return ret; | 138 | return ret; |
147 | } | 139 | } |
148 | 140 | ||
149 | static int | 141 | static int tcf_mirred_cleanup(struct tc_action *a, int bind) |
150 | tcf_mirred_cleanup(struct tc_action *a, int bind) | ||
151 | { | 142 | { |
152 | struct tcf_mirred *p = PRIV(a, mirred); | 143 | struct tcf_mirred *m = a->priv; |
153 | 144 | ||
154 | if (p != NULL) | 145 | if (m) |
155 | return tcf_mirred_release(p, bind); | 146 | return tcf_mirred_release(m, bind); |
156 | return 0; | 147 | return 0; |
157 | } | 148 | } |
158 | 149 | ||
159 | static int | 150 | static int tcf_mirred(struct sk_buff *skb, struct tc_action *a, |
160 | tcf_mirred(struct sk_buff *skb, struct tc_action *a, struct tcf_result *res) | 151 | struct tcf_result *res) |
161 | { | 152 | { |
162 | struct tcf_mirred *p = PRIV(a, mirred); | 153 | struct tcf_mirred *m = a->priv; |
163 | struct net_device *dev; | 154 | struct net_device *dev; |
164 | struct sk_buff *skb2 = NULL; | 155 | struct sk_buff *skb2 = NULL; |
165 | u32 at = G_TC_AT(skb->tc_verd); | 156 | u32 at = G_TC_AT(skb->tc_verd); |
166 | 157 | ||
167 | spin_lock(&p->lock); | 158 | spin_lock(&m->tcf_lock); |
168 | 159 | ||
169 | dev = p->dev; | 160 | dev = m->tcfm_dev; |
170 | p->tm.lastuse = jiffies; | 161 | m->tcf_tm.lastuse = jiffies; |
171 | 162 | ||
172 | if (!(dev->flags&IFF_UP) ) { | 163 | if (!(dev->flags&IFF_UP) ) { |
173 | if (net_ratelimit()) | 164 | if (net_ratelimit()) |
@@ -176,10 +167,10 @@ tcf_mirred(struct sk_buff *skb, struct tc_action *a, struct tcf_result *res) | |||
176 | bad_mirred: | 167 | bad_mirred: |
177 | if (skb2 != NULL) | 168 | if (skb2 != NULL) |
178 | kfree_skb(skb2); | 169 | kfree_skb(skb2); |
179 | p->qstats.overlimits++; | 170 | m->tcf_qstats.overlimits++; |
180 | p->bstats.bytes += skb->len; | 171 | m->tcf_bstats.bytes += skb->len; |
181 | p->bstats.packets++; | 172 | m->tcf_bstats.packets++; |
182 | spin_unlock(&p->lock); | 173 | spin_unlock(&m->tcf_lock); |
183 | /* should we be asking for packet to be dropped? | 174 | /* should we be asking for packet to be dropped? |
184 | * may make sense for redirect case only | 175 | * may make sense for redirect case only |
185 | */ | 176 | */ |
@@ -189,59 +180,59 @@ bad_mirred: | |||
189 | skb2 = skb_clone(skb, GFP_ATOMIC); | 180 | skb2 = skb_clone(skb, GFP_ATOMIC); |
190 | if (skb2 == NULL) | 181 | if (skb2 == NULL) |
191 | goto bad_mirred; | 182 | goto bad_mirred; |
192 | if (p->eaction != TCA_EGRESS_MIRROR && p->eaction != TCA_EGRESS_REDIR) { | 183 | if (m->tcfm_eaction != TCA_EGRESS_MIRROR && |
184 | m->tcfm_eaction != TCA_EGRESS_REDIR) { | ||
193 | if (net_ratelimit()) | 185 | if (net_ratelimit()) |
194 | printk("tcf_mirred unknown action %d\n", p->eaction); | 186 | printk("tcf_mirred unknown action %d\n", |
187 | m->tcfm_eaction); | ||
195 | goto bad_mirred; | 188 | goto bad_mirred; |
196 | } | 189 | } |
197 | 190 | ||
198 | p->bstats.bytes += skb2->len; | 191 | m->tcf_bstats.bytes += skb2->len; |
199 | p->bstats.packets++; | 192 | m->tcf_bstats.packets++; |
200 | if (!(at & AT_EGRESS)) | 193 | if (!(at & AT_EGRESS)) |
201 | if (p->ok_push) | 194 | if (m->tcfm_ok_push) |
202 | skb_push(skb2, skb2->dev->hard_header_len); | 195 | skb_push(skb2, skb2->dev->hard_header_len); |
203 | 196 | ||
204 | /* mirror is always swallowed */ | 197 | /* mirror is always swallowed */ |
205 | if (p->eaction != TCA_EGRESS_MIRROR) | 198 | if (m->tcfm_eaction != TCA_EGRESS_MIRROR) |
206 | skb2->tc_verd = SET_TC_FROM(skb2->tc_verd, at); | 199 | skb2->tc_verd = SET_TC_FROM(skb2->tc_verd, at); |
207 | 200 | ||
208 | skb2->dev = dev; | 201 | skb2->dev = dev; |
209 | skb2->input_dev = skb->dev; | 202 | skb2->input_dev = skb->dev; |
210 | dev_queue_xmit(skb2); | 203 | dev_queue_xmit(skb2); |
211 | spin_unlock(&p->lock); | 204 | spin_unlock(&m->tcf_lock); |
212 | return p->action; | 205 | return m->tcf_action; |
213 | } | 206 | } |
214 | 207 | ||
215 | static int | 208 | static int tcf_mirred_dump(struct sk_buff *skb, struct tc_action *a, int bind, int ref) |
216 | tcf_mirred_dump(struct sk_buff *skb, struct tc_action *a, int bind, int ref) | ||
217 | { | 209 | { |
218 | unsigned char *b = skb->tail; | 210 | unsigned char *b = skb->tail; |
211 | struct tcf_mirred *m = a->priv; | ||
219 | struct tc_mirred opt; | 212 | struct tc_mirred opt; |
220 | struct tcf_mirred *p = PRIV(a, mirred); | ||
221 | struct tcf_t t; | 213 | struct tcf_t t; |
222 | 214 | ||
223 | opt.index = p->index; | 215 | opt.index = m->tcf_index; |
224 | opt.action = p->action; | 216 | opt.action = m->tcf_action; |
225 | opt.refcnt = p->refcnt - ref; | 217 | opt.refcnt = m->tcf_refcnt - ref; |
226 | opt.bindcnt = p->bindcnt - bind; | 218 | opt.bindcnt = m->tcf_bindcnt - bind; |
227 | opt.eaction = p->eaction; | 219 | opt.eaction = m->tcfm_eaction; |
228 | opt.ifindex = p->ifindex; | 220 | opt.ifindex = m->tcfm_ifindex; |
229 | DPRINTK("tcf_mirred_dump index %d action %d eaction %d ifindex %d\n", | ||
230 | p->index, p->action, p->eaction, p->ifindex); | ||
231 | RTA_PUT(skb, TCA_MIRRED_PARMS, sizeof(opt), &opt); | 221 | RTA_PUT(skb, TCA_MIRRED_PARMS, sizeof(opt), &opt); |
232 | t.install = jiffies_to_clock_t(jiffies - p->tm.install); | 222 | t.install = jiffies_to_clock_t(jiffies - m->tcf_tm.install); |
233 | t.lastuse = jiffies_to_clock_t(jiffies - p->tm.lastuse); | 223 | t.lastuse = jiffies_to_clock_t(jiffies - m->tcf_tm.lastuse); |
234 | t.expires = jiffies_to_clock_t(p->tm.expires); | 224 | t.expires = jiffies_to_clock_t(m->tcf_tm.expires); |
235 | RTA_PUT(skb, TCA_MIRRED_TM, sizeof(t), &t); | 225 | RTA_PUT(skb, TCA_MIRRED_TM, sizeof(t), &t); |
236 | return skb->len; | 226 | return skb->len; |
237 | 227 | ||
238 | rtattr_failure: | 228 | rtattr_failure: |
239 | skb_trim(skb, b - skb->data); | 229 | skb_trim(skb, b - skb->data); |
240 | return -1; | 230 | return -1; |
241 | } | 231 | } |
242 | 232 | ||
243 | static struct tc_action_ops act_mirred_ops = { | 233 | static struct tc_action_ops act_mirred_ops = { |
244 | .kind = "mirred", | 234 | .kind = "mirred", |
235 | .hinfo = &mirred_hash_info, | ||
245 | .type = TCA_ACT_MIRRED, | 236 | .type = TCA_ACT_MIRRED, |
246 | .capab = TCA_CAP_NONE, | 237 | .capab = TCA_CAP_NONE, |
247 | .owner = THIS_MODULE, | 238 | .owner = THIS_MODULE, |
@@ -257,15 +248,13 @@ MODULE_AUTHOR("Jamal Hadi Salim(2002)"); | |||
257 | MODULE_DESCRIPTION("Device Mirror/redirect actions"); | 248 | MODULE_DESCRIPTION("Device Mirror/redirect actions"); |
258 | MODULE_LICENSE("GPL"); | 249 | MODULE_LICENSE("GPL"); |
259 | 250 | ||
260 | static int __init | 251 | static int __init mirred_init_module(void) |
261 | mirred_init_module(void) | ||
262 | { | 252 | { |
263 | printk("Mirror/redirect action on\n"); | 253 | printk("Mirror/redirect action on\n"); |
264 | return tcf_register_action(&act_mirred_ops); | 254 | return tcf_register_action(&act_mirred_ops); |
265 | } | 255 | } |
266 | 256 | ||
267 | static void __exit | 257 | static void __exit mirred_cleanup_module(void) |
268 | mirred_cleanup_module(void) | ||
269 | { | 258 | { |
270 | tcf_unregister_action(&act_mirred_ops); | 259 | tcf_unregister_action(&act_mirred_ops); |
271 | } | 260 | } |
diff --git a/net/sched/act_pedit.c b/net/sched/act_pedit.c index f257475e0e0c..8ac65c219b98 100644 --- a/net/sched/act_pedit.c +++ b/net/sched/act_pedit.c | |||
@@ -33,32 +33,25 @@ | |||
33 | #include <linux/tc_act/tc_pedit.h> | 33 | #include <linux/tc_act/tc_pedit.h> |
34 | #include <net/tc_act/tc_pedit.h> | 34 | #include <net/tc_act/tc_pedit.h> |
35 | 35 | ||
36 | 36 | #define PEDIT_TAB_MASK 15 | |
37 | #define PEDIT_DEB 1 | 37 | static struct tcf_common *tcf_pedit_ht[PEDIT_TAB_MASK + 1]; |
38 | 38 | static u32 pedit_idx_gen; | |
39 | /* use generic hash table */ | ||
40 | #define MY_TAB_SIZE 16 | ||
41 | #define MY_TAB_MASK 15 | ||
42 | static u32 idx_gen; | ||
43 | static struct tcf_pedit *tcf_pedit_ht[MY_TAB_SIZE]; | ||
44 | static DEFINE_RWLOCK(pedit_lock); | 39 | static DEFINE_RWLOCK(pedit_lock); |
45 | 40 | ||
46 | #define tcf_st tcf_pedit | 41 | static struct tcf_hashinfo pedit_hash_info = { |
47 | #define tc_st tc_pedit | 42 | .htab = tcf_pedit_ht, |
48 | #define tcf_t_lock pedit_lock | 43 | .hmask = PEDIT_TAB_MASK, |
49 | #define tcf_ht tcf_pedit_ht | 44 | .lock = &pedit_lock, |
50 | 45 | }; | |
51 | #define CONFIG_NET_ACT_INIT 1 | ||
52 | #include <net/pkt_act.h> | ||
53 | 46 | ||
54 | static int | 47 | static int tcf_pedit_init(struct rtattr *rta, struct rtattr *est, |
55 | tcf_pedit_init(struct rtattr *rta, struct rtattr *est, struct tc_action *a, | 48 | struct tc_action *a, int ovr, int bind) |
56 | int ovr, int bind) | ||
57 | { | 49 | { |
58 | struct rtattr *tb[TCA_PEDIT_MAX]; | 50 | struct rtattr *tb[TCA_PEDIT_MAX]; |
59 | struct tc_pedit *parm; | 51 | struct tc_pedit *parm; |
60 | int ret = 0; | 52 | int ret = 0; |
61 | struct tcf_pedit *p; | 53 | struct tcf_pedit *p; |
54 | struct tcf_common *pc; | ||
62 | struct tc_pedit_key *keys = NULL; | 55 | struct tc_pedit_key *keys = NULL; |
63 | int ksize; | 56 | int ksize; |
64 | 57 | ||
@@ -73,54 +66,56 @@ tcf_pedit_init(struct rtattr *rta, struct rtattr *est, struct tc_action *a, | |||
73 | if (RTA_PAYLOAD(tb[TCA_PEDIT_PARMS-1]) < sizeof(*parm) + ksize) | 66 | if (RTA_PAYLOAD(tb[TCA_PEDIT_PARMS-1]) < sizeof(*parm) + ksize) |
74 | return -EINVAL; | 67 | return -EINVAL; |
75 | 68 | ||
76 | p = tcf_hash_check(parm->index, a, ovr, bind); | 69 | pc = tcf_hash_check(parm->index, a, bind, &pedit_hash_info); |
77 | if (p == NULL) { | 70 | if (!pc) { |
78 | if (!parm->nkeys) | 71 | if (!parm->nkeys) |
79 | return -EINVAL; | 72 | return -EINVAL; |
80 | p = tcf_hash_create(parm->index, est, a, sizeof(*p), ovr, bind); | 73 | pc = tcf_hash_create(parm->index, est, a, sizeof(*p), bind, |
81 | if (p == NULL) | 74 | &pedit_idx_gen, &pedit_hash_info); |
75 | if (unlikely(!pc)) | ||
82 | return -ENOMEM; | 76 | return -ENOMEM; |
77 | p = to_pedit(pc); | ||
83 | keys = kmalloc(ksize, GFP_KERNEL); | 78 | keys = kmalloc(ksize, GFP_KERNEL); |
84 | if (keys == NULL) { | 79 | if (keys == NULL) { |
85 | kfree(p); | 80 | kfree(pc); |
86 | return -ENOMEM; | 81 | return -ENOMEM; |
87 | } | 82 | } |
88 | ret = ACT_P_CREATED; | 83 | ret = ACT_P_CREATED; |
89 | } else { | 84 | } else { |
85 | p = to_pedit(pc); | ||
90 | if (!ovr) { | 86 | if (!ovr) { |
91 | tcf_hash_release(p, bind); | 87 | tcf_hash_release(pc, bind, &pedit_hash_info); |
92 | return -EEXIST; | 88 | return -EEXIST; |
93 | } | 89 | } |
94 | if (p->nkeys && p->nkeys != parm->nkeys) { | 90 | if (p->tcfp_nkeys && p->tcfp_nkeys != parm->nkeys) { |
95 | keys = kmalloc(ksize, GFP_KERNEL); | 91 | keys = kmalloc(ksize, GFP_KERNEL); |
96 | if (keys == NULL) | 92 | if (keys == NULL) |
97 | return -ENOMEM; | 93 | return -ENOMEM; |
98 | } | 94 | } |
99 | } | 95 | } |
100 | 96 | ||
101 | spin_lock_bh(&p->lock); | 97 | spin_lock_bh(&p->tcf_lock); |
102 | p->flags = parm->flags; | 98 | p->tcfp_flags = parm->flags; |
103 | p->action = parm->action; | 99 | p->tcf_action = parm->action; |
104 | if (keys) { | 100 | if (keys) { |
105 | kfree(p->keys); | 101 | kfree(p->tcfp_keys); |
106 | p->keys = keys; | 102 | p->tcfp_keys = keys; |
107 | p->nkeys = parm->nkeys; | 103 | p->tcfp_nkeys = parm->nkeys; |
108 | } | 104 | } |
109 | memcpy(p->keys, parm->keys, ksize); | 105 | memcpy(p->tcfp_keys, parm->keys, ksize); |
110 | spin_unlock_bh(&p->lock); | 106 | spin_unlock_bh(&p->tcf_lock); |
111 | if (ret == ACT_P_CREATED) | 107 | if (ret == ACT_P_CREATED) |
112 | tcf_hash_insert(p); | 108 | tcf_hash_insert(pc, &pedit_hash_info); |
113 | return ret; | 109 | return ret; |
114 | } | 110 | } |
115 | 111 | ||
116 | static int | 112 | static int tcf_pedit_cleanup(struct tc_action *a, int bind) |
117 | tcf_pedit_cleanup(struct tc_action *a, int bind) | ||
118 | { | 113 | { |
119 | struct tcf_pedit *p = PRIV(a, pedit); | 114 | struct tcf_pedit *p = a->priv; |
120 | 115 | ||
121 | if (p != NULL) { | 116 | if (p) { |
122 | struct tc_pedit_key *keys = p->keys; | 117 | struct tc_pedit_key *keys = p->tcfp_keys; |
123 | if (tcf_hash_release(p, bind)) { | 118 | if (tcf_hash_release(&p->common, bind, &pedit_hash_info)) { |
124 | kfree(keys); | 119 | kfree(keys); |
125 | return 1; | 120 | return 1; |
126 | } | 121 | } |
@@ -128,30 +123,30 @@ tcf_pedit_cleanup(struct tc_action *a, int bind) | |||
128 | return 0; | 123 | return 0; |
129 | } | 124 | } |
130 | 125 | ||
131 | static int | 126 | static int tcf_pedit(struct sk_buff *skb, struct tc_action *a, |
132 | tcf_pedit(struct sk_buff *skb, struct tc_action *a, struct tcf_result *res) | 127 | struct tcf_result *res) |
133 | { | 128 | { |
134 | struct tcf_pedit *p = PRIV(a, pedit); | 129 | struct tcf_pedit *p = a->priv; |
135 | int i, munged = 0; | 130 | int i, munged = 0; |
136 | u8 *pptr; | 131 | u8 *pptr; |
137 | 132 | ||
138 | if (!(skb->tc_verd & TC_OK2MUNGE)) { | 133 | if (!(skb->tc_verd & TC_OK2MUNGE)) { |
139 | /* should we set skb->cloned? */ | 134 | /* should we set skb->cloned? */ |
140 | if (pskb_expand_head(skb, 0, 0, GFP_ATOMIC)) { | 135 | if (pskb_expand_head(skb, 0, 0, GFP_ATOMIC)) { |
141 | return p->action; | 136 | return p->tcf_action; |
142 | } | 137 | } |
143 | } | 138 | } |
144 | 139 | ||
145 | pptr = skb->nh.raw; | 140 | pptr = skb->nh.raw; |
146 | 141 | ||
147 | spin_lock(&p->lock); | 142 | spin_lock(&p->tcf_lock); |
148 | 143 | ||
149 | p->tm.lastuse = jiffies; | 144 | p->tcf_tm.lastuse = jiffies; |
150 | 145 | ||
151 | if (p->nkeys > 0) { | 146 | if (p->tcfp_nkeys > 0) { |
152 | struct tc_pedit_key *tkey = p->keys; | 147 | struct tc_pedit_key *tkey = p->tcfp_keys; |
153 | 148 | ||
154 | for (i = p->nkeys; i > 0; i--, tkey++) { | 149 | for (i = p->tcfp_nkeys; i > 0; i--, tkey++) { |
155 | u32 *ptr; | 150 | u32 *ptr; |
156 | int offset = tkey->off; | 151 | int offset = tkey->off; |
157 | 152 | ||
@@ -169,7 +164,8 @@ tcf_pedit(struct sk_buff *skb, struct tc_action *a, struct tcf_result *res) | |||
169 | printk("offset must be on 32 bit boundaries\n"); | 164 | printk("offset must be on 32 bit boundaries\n"); |
170 | goto bad; | 165 | goto bad; |
171 | } | 166 | } |
172 | if (skb->len < 0 || (offset > 0 && offset > skb->len)) { | 167 | if (skb->len < 0 || |
168 | (offset > 0 && offset > skb->len)) { | ||
173 | printk("offset %d cant exceed pkt length %d\n", | 169 | printk("offset %d cant exceed pkt length %d\n", |
174 | offset, skb->len); | 170 | offset, skb->len); |
175 | goto bad; | 171 | goto bad; |
@@ -185,63 +181,47 @@ tcf_pedit(struct sk_buff *skb, struct tc_action *a, struct tcf_result *res) | |||
185 | skb->tc_verd = SET_TC_MUNGED(skb->tc_verd); | 181 | skb->tc_verd = SET_TC_MUNGED(skb->tc_verd); |
186 | goto done; | 182 | goto done; |
187 | } else { | 183 | } else { |
188 | printk("pedit BUG: index %d\n",p->index); | 184 | printk("pedit BUG: index %d\n", p->tcf_index); |
189 | } | 185 | } |
190 | 186 | ||
191 | bad: | 187 | bad: |
192 | p->qstats.overlimits++; | 188 | p->tcf_qstats.overlimits++; |
193 | done: | 189 | done: |
194 | p->bstats.bytes += skb->len; | 190 | p->tcf_bstats.bytes += skb->len; |
195 | p->bstats.packets++; | 191 | p->tcf_bstats.packets++; |
196 | spin_unlock(&p->lock); | 192 | spin_unlock(&p->tcf_lock); |
197 | return p->action; | 193 | return p->tcf_action; |
198 | } | 194 | } |
199 | 195 | ||
200 | static int | 196 | static int tcf_pedit_dump(struct sk_buff *skb, struct tc_action *a, |
201 | tcf_pedit_dump(struct sk_buff *skb, struct tc_action *a,int bind, int ref) | 197 | int bind, int ref) |
202 | { | 198 | { |
203 | unsigned char *b = skb->tail; | 199 | unsigned char *b = skb->tail; |
200 | struct tcf_pedit *p = a->priv; | ||
204 | struct tc_pedit *opt; | 201 | struct tc_pedit *opt; |
205 | struct tcf_pedit *p = PRIV(a, pedit); | ||
206 | struct tcf_t t; | 202 | struct tcf_t t; |
207 | int s; | 203 | int s; |
208 | 204 | ||
209 | s = sizeof(*opt) + p->nkeys * sizeof(struct tc_pedit_key); | 205 | s = sizeof(*opt) + p->tcfp_nkeys * sizeof(struct tc_pedit_key); |
210 | 206 | ||
211 | /* netlink spinlocks held above us - must use ATOMIC */ | 207 | /* netlink spinlocks held above us - must use ATOMIC */ |
212 | opt = kzalloc(s, GFP_ATOMIC); | 208 | opt = kzalloc(s, GFP_ATOMIC); |
213 | if (opt == NULL) | 209 | if (unlikely(!opt)) |
214 | return -ENOBUFS; | 210 | return -ENOBUFS; |
215 | 211 | ||
216 | memcpy(opt->keys, p->keys, p->nkeys * sizeof(struct tc_pedit_key)); | 212 | memcpy(opt->keys, p->tcfp_keys, |
217 | opt->index = p->index; | 213 | p->tcfp_nkeys * sizeof(struct tc_pedit_key)); |
218 | opt->nkeys = p->nkeys; | 214 | opt->index = p->tcf_index; |
219 | opt->flags = p->flags; | 215 | opt->nkeys = p->tcfp_nkeys; |
220 | opt->action = p->action; | 216 | opt->flags = p->tcfp_flags; |
221 | opt->refcnt = p->refcnt - ref; | 217 | opt->action = p->tcf_action; |
222 | opt->bindcnt = p->bindcnt - bind; | 218 | opt->refcnt = p->tcf_refcnt - ref; |
223 | 219 | opt->bindcnt = p->tcf_bindcnt - bind; | |
224 | |||
225 | #ifdef PEDIT_DEB | ||
226 | { | ||
227 | /* Debug - get rid of later */ | ||
228 | int i; | ||
229 | struct tc_pedit_key *key = opt->keys; | ||
230 | |||
231 | for (i=0; i<opt->nkeys; i++, key++) { | ||
232 | printk( "\n key #%d",i); | ||
233 | printk( " at %d: val %08x mask %08x", | ||
234 | (unsigned int)key->off, | ||
235 | (unsigned int)key->val, | ||
236 | (unsigned int)key->mask); | ||
237 | } | ||
238 | } | ||
239 | #endif | ||
240 | 220 | ||
241 | RTA_PUT(skb, TCA_PEDIT_PARMS, s, opt); | 221 | RTA_PUT(skb, TCA_PEDIT_PARMS, s, opt); |
242 | t.install = jiffies_to_clock_t(jiffies - p->tm.install); | 222 | t.install = jiffies_to_clock_t(jiffies - p->tcf_tm.install); |
243 | t.lastuse = jiffies_to_clock_t(jiffies - p->tm.lastuse); | 223 | t.lastuse = jiffies_to_clock_t(jiffies - p->tcf_tm.lastuse); |
244 | t.expires = jiffies_to_clock_t(p->tm.expires); | 224 | t.expires = jiffies_to_clock_t(p->tcf_tm.expires); |
245 | RTA_PUT(skb, TCA_PEDIT_TM, sizeof(t), &t); | 225 | RTA_PUT(skb, TCA_PEDIT_TM, sizeof(t), &t); |
246 | kfree(opt); | 226 | kfree(opt); |
247 | return skb->len; | 227 | return skb->len; |
@@ -252,9 +232,9 @@ rtattr_failure: | |||
252 | return -1; | 232 | return -1; |
253 | } | 233 | } |
254 | 234 | ||
255 | static | 235 | static struct tc_action_ops act_pedit_ops = { |
256 | struct tc_action_ops act_pedit_ops = { | ||
257 | .kind = "pedit", | 236 | .kind = "pedit", |
237 | .hinfo = &pedit_hash_info, | ||
258 | .type = TCA_ACT_PEDIT, | 238 | .type = TCA_ACT_PEDIT, |
259 | .capab = TCA_CAP_NONE, | 239 | .capab = TCA_CAP_NONE, |
260 | .owner = THIS_MODULE, | 240 | .owner = THIS_MODULE, |
@@ -270,14 +250,12 @@ MODULE_AUTHOR("Jamal Hadi Salim(2002-4)"); | |||
270 | MODULE_DESCRIPTION("Generic Packet Editor actions"); | 250 | MODULE_DESCRIPTION("Generic Packet Editor actions"); |
271 | MODULE_LICENSE("GPL"); | 251 | MODULE_LICENSE("GPL"); |
272 | 252 | ||
273 | static int __init | 253 | static int __init pedit_init_module(void) |
274 | pedit_init_module(void) | ||
275 | { | 254 | { |
276 | return tcf_register_action(&act_pedit_ops); | 255 | return tcf_register_action(&act_pedit_ops); |
277 | } | 256 | } |
278 | 257 | ||
279 | static void __exit | 258 | static void __exit pedit_cleanup_module(void) |
280 | pedit_cleanup_module(void) | ||
281 | { | 259 | { |
282 | tcf_unregister_action(&act_pedit_ops); | 260 | tcf_unregister_action(&act_pedit_ops); |
283 | } | 261 | } |
diff --git a/net/sched/act_police.c b/net/sched/act_police.c index da905d7b4b40..fed47b658837 100644 --- a/net/sched/act_police.c +++ b/net/sched/act_police.c | |||
@@ -32,43 +32,27 @@ | |||
32 | #include <net/sock.h> | 32 | #include <net/sock.h> |
33 | #include <net/act_api.h> | 33 | #include <net/act_api.h> |
34 | 34 | ||
35 | #define L2T(p,L) ((p)->R_tab->data[(L)>>(p)->R_tab->rate.cell_log]) | 35 | #define L2T(p,L) ((p)->tcfp_R_tab->data[(L)>>(p)->tcfp_R_tab->rate.cell_log]) |
36 | #define L2T_P(p,L) ((p)->P_tab->data[(L)>>(p)->P_tab->rate.cell_log]) | 36 | #define L2T_P(p,L) ((p)->tcfp_P_tab->data[(L)>>(p)->tcfp_P_tab->rate.cell_log]) |
37 | #define PRIV(a) ((struct tcf_police *) (a)->priv) | ||
38 | |||
39 | /* use generic hash table */ | ||
40 | #define MY_TAB_SIZE 16 | ||
41 | #define MY_TAB_MASK 15 | ||
42 | static u32 idx_gen; | ||
43 | static struct tcf_police *tcf_police_ht[MY_TAB_SIZE]; | ||
44 | /* Policer hash table lock */ | ||
45 | static DEFINE_RWLOCK(police_lock); | ||
46 | |||
47 | /* Each policer is serialized by its individual spinlock */ | ||
48 | 37 | ||
49 | static __inline__ unsigned tcf_police_hash(u32 index) | 38 | #define POL_TAB_MASK 15 |
50 | { | 39 | static struct tcf_common *tcf_police_ht[POL_TAB_MASK + 1]; |
51 | return index&0xF; | 40 | static u32 police_idx_gen; |
52 | } | 41 | static DEFINE_RWLOCK(police_lock); |
53 | 42 | ||
54 | static __inline__ struct tcf_police * tcf_police_lookup(u32 index) | 43 | static struct tcf_hashinfo police_hash_info = { |
55 | { | 44 | .htab = tcf_police_ht, |
56 | struct tcf_police *p; | 45 | .hmask = POL_TAB_MASK, |
46 | .lock = &police_lock, | ||
47 | }; | ||
57 | 48 | ||
58 | read_lock(&police_lock); | 49 | /* Each policer is serialized by its individual spinlock */ |
59 | for (p = tcf_police_ht[tcf_police_hash(index)]; p; p = p->next) { | ||
60 | if (p->index == index) | ||
61 | break; | ||
62 | } | ||
63 | read_unlock(&police_lock); | ||
64 | return p; | ||
65 | } | ||
66 | 50 | ||
67 | #ifdef CONFIG_NET_CLS_ACT | 51 | #ifdef CONFIG_NET_CLS_ACT |
68 | static int tcf_act_police_walker(struct sk_buff *skb, struct netlink_callback *cb, | 52 | static int tcf_act_police_walker(struct sk_buff *skb, struct netlink_callback *cb, |
69 | int type, struct tc_action *a) | 53 | int type, struct tc_action *a) |
70 | { | 54 | { |
71 | struct tcf_police *p; | 55 | struct tcf_common *p; |
72 | int err = 0, index = -1, i = 0, s_i = 0, n_i = 0; | 56 | int err = 0, index = -1, i = 0, s_i = 0, n_i = 0; |
73 | struct rtattr *r; | 57 | struct rtattr *r; |
74 | 58 | ||
@@ -76,10 +60,10 @@ static int tcf_act_police_walker(struct sk_buff *skb, struct netlink_callback *c | |||
76 | 60 | ||
77 | s_i = cb->args[0]; | 61 | s_i = cb->args[0]; |
78 | 62 | ||
79 | for (i = 0; i < MY_TAB_SIZE; i++) { | 63 | for (i = 0; i < (POL_TAB_MASK + 1); i++) { |
80 | p = tcf_police_ht[tcf_police_hash(i)]; | 64 | p = tcf_police_ht[tcf_hash(i, POL_TAB_MASK)]; |
81 | 65 | ||
82 | for (; p; p = p->next) { | 66 | for (; p; p = p->tcfc_next) { |
83 | index++; | 67 | index++; |
84 | if (index < s_i) | 68 | if (index < s_i) |
85 | continue; | 69 | continue; |
@@ -110,48 +94,26 @@ rtattr_failure: | |||
110 | skb_trim(skb, (u8*)r - skb->data); | 94 | skb_trim(skb, (u8*)r - skb->data); |
111 | goto done; | 95 | goto done; |
112 | } | 96 | } |
113 | |||
114 | static inline int | ||
115 | tcf_act_police_hash_search(struct tc_action *a, u32 index) | ||
116 | { | ||
117 | struct tcf_police *p = tcf_police_lookup(index); | ||
118 | |||
119 | if (p != NULL) { | ||
120 | a->priv = p; | ||
121 | return 1; | ||
122 | } else { | ||
123 | return 0; | ||
124 | } | ||
125 | } | ||
126 | #endif | 97 | #endif |
127 | 98 | ||
128 | static inline u32 tcf_police_new_index(void) | ||
129 | { | ||
130 | do { | ||
131 | if (++idx_gen == 0) | ||
132 | idx_gen = 1; | ||
133 | } while (tcf_police_lookup(idx_gen)); | ||
134 | |||
135 | return idx_gen; | ||
136 | } | ||
137 | |||
138 | void tcf_police_destroy(struct tcf_police *p) | 99 | void tcf_police_destroy(struct tcf_police *p) |
139 | { | 100 | { |
140 | unsigned h = tcf_police_hash(p->index); | 101 | unsigned int h = tcf_hash(p->tcf_index, POL_TAB_MASK); |
141 | struct tcf_police **p1p; | 102 | struct tcf_common **p1p; |
142 | 103 | ||
143 | for (p1p = &tcf_police_ht[h]; *p1p; p1p = &(*p1p)->next) { | 104 | for (p1p = &tcf_police_ht[h]; *p1p; p1p = &(*p1p)->tcfc_next) { |
144 | if (*p1p == p) { | 105 | if (*p1p == &p->common) { |
145 | write_lock_bh(&police_lock); | 106 | write_lock_bh(&police_lock); |
146 | *p1p = p->next; | 107 | *p1p = p->tcf_next; |
147 | write_unlock_bh(&police_lock); | 108 | write_unlock_bh(&police_lock); |
148 | #ifdef CONFIG_NET_ESTIMATOR | 109 | #ifdef CONFIG_NET_ESTIMATOR |
149 | gen_kill_estimator(&p->bstats, &p->rate_est); | 110 | gen_kill_estimator(&p->tcf_bstats, |
111 | &p->tcf_rate_est); | ||
150 | #endif | 112 | #endif |
151 | if (p->R_tab) | 113 | if (p->tcfp_R_tab) |
152 | qdisc_put_rtab(p->R_tab); | 114 | qdisc_put_rtab(p->tcfp_R_tab); |
153 | if (p->P_tab) | 115 | if (p->tcfp_P_tab) |
154 | qdisc_put_rtab(p->P_tab); | 116 | qdisc_put_rtab(p->tcfp_P_tab); |
155 | kfree(p); | 117 | kfree(p); |
156 | return; | 118 | return; |
157 | } | 119 | } |
@@ -167,7 +129,7 @@ static int tcf_act_police_locate(struct rtattr *rta, struct rtattr *est, | |||
167 | int ret = 0, err; | 129 | int ret = 0, err; |
168 | struct rtattr *tb[TCA_POLICE_MAX]; | 130 | struct rtattr *tb[TCA_POLICE_MAX]; |
169 | struct tc_police *parm; | 131 | struct tc_police *parm; |
170 | struct tcf_police *p; | 132 | struct tcf_police *police; |
171 | struct qdisc_rate_table *R_tab = NULL, *P_tab = NULL; | 133 | struct qdisc_rate_table *R_tab = NULL, *P_tab = NULL; |
172 | 134 | ||
173 | if (rta == NULL || rtattr_parse_nested(tb, TCA_POLICE_MAX, rta) < 0) | 135 | if (rta == NULL || rtattr_parse_nested(tb, TCA_POLICE_MAX, rta) < 0) |
@@ -185,27 +147,32 @@ static int tcf_act_police_locate(struct rtattr *rta, struct rtattr *est, | |||
185 | RTA_PAYLOAD(tb[TCA_POLICE_RESULT-1]) != sizeof(u32)) | 147 | RTA_PAYLOAD(tb[TCA_POLICE_RESULT-1]) != sizeof(u32)) |
186 | return -EINVAL; | 148 | return -EINVAL; |
187 | 149 | ||
188 | if (parm->index && (p = tcf_police_lookup(parm->index)) != NULL) { | 150 | if (parm->index) { |
189 | a->priv = p; | 151 | struct tcf_common *pc; |
190 | if (bind) { | 152 | |
191 | p->bindcnt += 1; | 153 | pc = tcf_hash_lookup(parm->index, &police_hash_info); |
192 | p->refcnt += 1; | 154 | if (pc != NULL) { |
155 | a->priv = pc; | ||
156 | police = to_police(pc); | ||
157 | if (bind) { | ||
158 | police->tcf_bindcnt += 1; | ||
159 | police->tcf_refcnt += 1; | ||
160 | } | ||
161 | if (ovr) | ||
162 | goto override; | ||
163 | return ret; | ||
193 | } | 164 | } |
194 | if (ovr) | ||
195 | goto override; | ||
196 | return ret; | ||
197 | } | 165 | } |
198 | 166 | ||
199 | p = kzalloc(sizeof(*p), GFP_KERNEL); | 167 | police = kzalloc(sizeof(*police), GFP_KERNEL); |
200 | if (p == NULL) | 168 | if (police == NULL) |
201 | return -ENOMEM; | 169 | return -ENOMEM; |
202 | |||
203 | ret = ACT_P_CREATED; | 170 | ret = ACT_P_CREATED; |
204 | p->refcnt = 1; | 171 | police->tcf_refcnt = 1; |
205 | spin_lock_init(&p->lock); | 172 | spin_lock_init(&police->tcf_lock); |
206 | p->stats_lock = &p->lock; | 173 | police->tcf_stats_lock = &police->tcf_lock; |
207 | if (bind) | 174 | if (bind) |
208 | p->bindcnt = 1; | 175 | police->tcf_bindcnt = 1; |
209 | override: | 176 | override: |
210 | if (parm->rate.rate) { | 177 | if (parm->rate.rate) { |
211 | err = -ENOMEM; | 178 | err = -ENOMEM; |
@@ -215,67 +182,71 @@ override: | |||
215 | if (parm->peakrate.rate) { | 182 | if (parm->peakrate.rate) { |
216 | P_tab = qdisc_get_rtab(&parm->peakrate, | 183 | P_tab = qdisc_get_rtab(&parm->peakrate, |
217 | tb[TCA_POLICE_PEAKRATE-1]); | 184 | tb[TCA_POLICE_PEAKRATE-1]); |
218 | if (p->P_tab == NULL) { | 185 | if (P_tab == NULL) { |
219 | qdisc_put_rtab(R_tab); | 186 | qdisc_put_rtab(R_tab); |
220 | goto failure; | 187 | goto failure; |
221 | } | 188 | } |
222 | } | 189 | } |
223 | } | 190 | } |
224 | /* No failure allowed after this point */ | 191 | /* No failure allowed after this point */ |
225 | spin_lock_bh(&p->lock); | 192 | spin_lock_bh(&police->tcf_lock); |
226 | if (R_tab != NULL) { | 193 | if (R_tab != NULL) { |
227 | qdisc_put_rtab(p->R_tab); | 194 | qdisc_put_rtab(police->tcfp_R_tab); |
228 | p->R_tab = R_tab; | 195 | police->tcfp_R_tab = R_tab; |
229 | } | 196 | } |
230 | if (P_tab != NULL) { | 197 | if (P_tab != NULL) { |
231 | qdisc_put_rtab(p->P_tab); | 198 | qdisc_put_rtab(police->tcfp_P_tab); |
232 | p->P_tab = P_tab; | 199 | police->tcfp_P_tab = P_tab; |
233 | } | 200 | } |
234 | 201 | ||
235 | if (tb[TCA_POLICE_RESULT-1]) | 202 | if (tb[TCA_POLICE_RESULT-1]) |
236 | p->result = *(u32*)RTA_DATA(tb[TCA_POLICE_RESULT-1]); | 203 | police->tcfp_result = *(u32*)RTA_DATA(tb[TCA_POLICE_RESULT-1]); |
237 | p->toks = p->burst = parm->burst; | 204 | police->tcfp_toks = police->tcfp_burst = parm->burst; |
238 | p->mtu = parm->mtu; | 205 | police->tcfp_mtu = parm->mtu; |
239 | if (p->mtu == 0) { | 206 | if (police->tcfp_mtu == 0) { |
240 | p->mtu = ~0; | 207 | police->tcfp_mtu = ~0; |
241 | if (p->R_tab) | 208 | if (police->tcfp_R_tab) |
242 | p->mtu = 255<<p->R_tab->rate.cell_log; | 209 | police->tcfp_mtu = 255<<police->tcfp_R_tab->rate.cell_log; |
243 | } | 210 | } |
244 | if (p->P_tab) | 211 | if (police->tcfp_P_tab) |
245 | p->ptoks = L2T_P(p, p->mtu); | 212 | police->tcfp_ptoks = L2T_P(police, police->tcfp_mtu); |
246 | p->action = parm->action; | 213 | police->tcf_action = parm->action; |
247 | 214 | ||
248 | #ifdef CONFIG_NET_ESTIMATOR | 215 | #ifdef CONFIG_NET_ESTIMATOR |
249 | if (tb[TCA_POLICE_AVRATE-1]) | 216 | if (tb[TCA_POLICE_AVRATE-1]) |
250 | p->ewma_rate = *(u32*)RTA_DATA(tb[TCA_POLICE_AVRATE-1]); | 217 | police->tcfp_ewma_rate = |
218 | *(u32*)RTA_DATA(tb[TCA_POLICE_AVRATE-1]); | ||
251 | if (est) | 219 | if (est) |
252 | gen_replace_estimator(&p->bstats, &p->rate_est, p->stats_lock, est); | 220 | gen_replace_estimator(&police->tcf_bstats, |
221 | &police->tcf_rate_est, | ||
222 | police->tcf_stats_lock, est); | ||
253 | #endif | 223 | #endif |
254 | 224 | ||
255 | spin_unlock_bh(&p->lock); | 225 | spin_unlock_bh(&police->tcf_lock); |
256 | if (ret != ACT_P_CREATED) | 226 | if (ret != ACT_P_CREATED) |
257 | return ret; | 227 | return ret; |
258 | 228 | ||
259 | PSCHED_GET_TIME(p->t_c); | 229 | PSCHED_GET_TIME(police->tcfp_t_c); |
260 | p->index = parm->index ? : tcf_police_new_index(); | 230 | police->tcf_index = parm->index ? parm->index : |
261 | h = tcf_police_hash(p->index); | 231 | tcf_hash_new_index(&police_idx_gen, &police_hash_info); |
232 | h = tcf_hash(police->tcf_index, POL_TAB_MASK); | ||
262 | write_lock_bh(&police_lock); | 233 | write_lock_bh(&police_lock); |
263 | p->next = tcf_police_ht[h]; | 234 | police->tcf_next = tcf_police_ht[h]; |
264 | tcf_police_ht[h] = p; | 235 | tcf_police_ht[h] = &police->common; |
265 | write_unlock_bh(&police_lock); | 236 | write_unlock_bh(&police_lock); |
266 | 237 | ||
267 | a->priv = p; | 238 | a->priv = police; |
268 | return ret; | 239 | return ret; |
269 | 240 | ||
270 | failure: | 241 | failure: |
271 | if (ret == ACT_P_CREATED) | 242 | if (ret == ACT_P_CREATED) |
272 | kfree(p); | 243 | kfree(police); |
273 | return err; | 244 | return err; |
274 | } | 245 | } |
275 | 246 | ||
276 | static int tcf_act_police_cleanup(struct tc_action *a, int bind) | 247 | static int tcf_act_police_cleanup(struct tc_action *a, int bind) |
277 | { | 248 | { |
278 | struct tcf_police *p = PRIV(a); | 249 | struct tcf_police *p = a->priv; |
279 | 250 | ||
280 | if (p != NULL) | 251 | if (p != NULL) |
281 | return tcf_police_release(p, bind); | 252 | return tcf_police_release(p, bind); |
@@ -285,86 +256,87 @@ static int tcf_act_police_cleanup(struct tc_action *a, int bind) | |||
285 | static int tcf_act_police(struct sk_buff *skb, struct tc_action *a, | 256 | static int tcf_act_police(struct sk_buff *skb, struct tc_action *a, |
286 | struct tcf_result *res) | 257 | struct tcf_result *res) |
287 | { | 258 | { |
259 | struct tcf_police *police = a->priv; | ||
288 | psched_time_t now; | 260 | psched_time_t now; |
289 | struct tcf_police *p = PRIV(a); | ||
290 | long toks; | 261 | long toks; |
291 | long ptoks = 0; | 262 | long ptoks = 0; |
292 | 263 | ||
293 | spin_lock(&p->lock); | 264 | spin_lock(&police->tcf_lock); |
294 | 265 | ||
295 | p->bstats.bytes += skb->len; | 266 | police->tcf_bstats.bytes += skb->len; |
296 | p->bstats.packets++; | 267 | police->tcf_bstats.packets++; |
297 | 268 | ||
298 | #ifdef CONFIG_NET_ESTIMATOR | 269 | #ifdef CONFIG_NET_ESTIMATOR |
299 | if (p->ewma_rate && p->rate_est.bps >= p->ewma_rate) { | 270 | if (police->tcfp_ewma_rate && |
300 | p->qstats.overlimits++; | 271 | police->tcf_rate_est.bps >= police->tcfp_ewma_rate) { |
301 | spin_unlock(&p->lock); | 272 | police->tcf_qstats.overlimits++; |
302 | return p->action; | 273 | spin_unlock(&police->tcf_lock); |
274 | return police->tcf_action; | ||
303 | } | 275 | } |
304 | #endif | 276 | #endif |
305 | 277 | ||
306 | if (skb->len <= p->mtu) { | 278 | if (skb->len <= police->tcfp_mtu) { |
307 | if (p->R_tab == NULL) { | 279 | if (police->tcfp_R_tab == NULL) { |
308 | spin_unlock(&p->lock); | 280 | spin_unlock(&police->tcf_lock); |
309 | return p->result; | 281 | return police->tcfp_result; |
310 | } | 282 | } |
311 | 283 | ||
312 | PSCHED_GET_TIME(now); | 284 | PSCHED_GET_TIME(now); |
313 | 285 | ||
314 | toks = PSCHED_TDIFF_SAFE(now, p->t_c, p->burst); | 286 | toks = PSCHED_TDIFF_SAFE(now, police->tcfp_t_c, |
315 | 287 | police->tcfp_burst); | |
316 | if (p->P_tab) { | 288 | if (police->tcfp_P_tab) { |
317 | ptoks = toks + p->ptoks; | 289 | ptoks = toks + police->tcfp_ptoks; |
318 | if (ptoks > (long)L2T_P(p, p->mtu)) | 290 | if (ptoks > (long)L2T_P(police, police->tcfp_mtu)) |
319 | ptoks = (long)L2T_P(p, p->mtu); | 291 | ptoks = (long)L2T_P(police, police->tcfp_mtu); |
320 | ptoks -= L2T_P(p, skb->len); | 292 | ptoks -= L2T_P(police, skb->len); |
321 | } | 293 | } |
322 | toks += p->toks; | 294 | toks += police->tcfp_toks; |
323 | if (toks > (long)p->burst) | 295 | if (toks > (long)police->tcfp_burst) |
324 | toks = p->burst; | 296 | toks = police->tcfp_burst; |
325 | toks -= L2T(p, skb->len); | 297 | toks -= L2T(police, skb->len); |
326 | |||
327 | if ((toks|ptoks) >= 0) { | 298 | if ((toks|ptoks) >= 0) { |
328 | p->t_c = now; | 299 | police->tcfp_t_c = now; |
329 | p->toks = toks; | 300 | police->tcfp_toks = toks; |
330 | p->ptoks = ptoks; | 301 | police->tcfp_ptoks = ptoks; |
331 | spin_unlock(&p->lock); | 302 | spin_unlock(&police->tcf_lock); |
332 | return p->result; | 303 | return police->tcfp_result; |
333 | } | 304 | } |
334 | } | 305 | } |
335 | 306 | ||
336 | p->qstats.overlimits++; | 307 | police->tcf_qstats.overlimits++; |
337 | spin_unlock(&p->lock); | 308 | spin_unlock(&police->tcf_lock); |
338 | return p->action; | 309 | return police->tcf_action; |
339 | } | 310 | } |
340 | 311 | ||
341 | static int | 312 | static int |
342 | tcf_act_police_dump(struct sk_buff *skb, struct tc_action *a, int bind, int ref) | 313 | tcf_act_police_dump(struct sk_buff *skb, struct tc_action *a, int bind, int ref) |
343 | { | 314 | { |
344 | unsigned char *b = skb->tail; | 315 | unsigned char *b = skb->tail; |
316 | struct tcf_police *police = a->priv; | ||
345 | struct tc_police opt; | 317 | struct tc_police opt; |
346 | struct tcf_police *p = PRIV(a); | 318 | |
347 | 319 | opt.index = police->tcf_index; | |
348 | opt.index = p->index; | 320 | opt.action = police->tcf_action; |
349 | opt.action = p->action; | 321 | opt.mtu = police->tcfp_mtu; |
350 | opt.mtu = p->mtu; | 322 | opt.burst = police->tcfp_burst; |
351 | opt.burst = p->burst; | 323 | opt.refcnt = police->tcf_refcnt - ref; |
352 | opt.refcnt = p->refcnt - ref; | 324 | opt.bindcnt = police->tcf_bindcnt - bind; |
353 | opt.bindcnt = p->bindcnt - bind; | 325 | if (police->tcfp_R_tab) |
354 | if (p->R_tab) | 326 | opt.rate = police->tcfp_R_tab->rate; |
355 | opt.rate = p->R_tab->rate; | ||
356 | else | 327 | else |
357 | memset(&opt.rate, 0, sizeof(opt.rate)); | 328 | memset(&opt.rate, 0, sizeof(opt.rate)); |
358 | if (p->P_tab) | 329 | if (police->tcfp_P_tab) |
359 | opt.peakrate = p->P_tab->rate; | 330 | opt.peakrate = police->tcfp_P_tab->rate; |
360 | else | 331 | else |
361 | memset(&opt.peakrate, 0, sizeof(opt.peakrate)); | 332 | memset(&opt.peakrate, 0, sizeof(opt.peakrate)); |
362 | RTA_PUT(skb, TCA_POLICE_TBF, sizeof(opt), &opt); | 333 | RTA_PUT(skb, TCA_POLICE_TBF, sizeof(opt), &opt); |
363 | if (p->result) | 334 | if (police->tcfp_result) |
364 | RTA_PUT(skb, TCA_POLICE_RESULT, sizeof(int), &p->result); | 335 | RTA_PUT(skb, TCA_POLICE_RESULT, sizeof(int), |
336 | &police->tcfp_result); | ||
365 | #ifdef CONFIG_NET_ESTIMATOR | 337 | #ifdef CONFIG_NET_ESTIMATOR |
366 | if (p->ewma_rate) | 338 | if (police->tcfp_ewma_rate) |
367 | RTA_PUT(skb, TCA_POLICE_AVRATE, 4, &p->ewma_rate); | 339 | RTA_PUT(skb, TCA_POLICE_AVRATE, 4, &police->tcfp_ewma_rate); |
368 | #endif | 340 | #endif |
369 | return skb->len; | 341 | return skb->len; |
370 | 342 | ||
@@ -379,13 +351,14 @@ MODULE_LICENSE("GPL"); | |||
379 | 351 | ||
380 | static struct tc_action_ops act_police_ops = { | 352 | static struct tc_action_ops act_police_ops = { |
381 | .kind = "police", | 353 | .kind = "police", |
354 | .hinfo = &police_hash_info, | ||
382 | .type = TCA_ID_POLICE, | 355 | .type = TCA_ID_POLICE, |
383 | .capab = TCA_CAP_NONE, | 356 | .capab = TCA_CAP_NONE, |
384 | .owner = THIS_MODULE, | 357 | .owner = THIS_MODULE, |
385 | .act = tcf_act_police, | 358 | .act = tcf_act_police, |
386 | .dump = tcf_act_police_dump, | 359 | .dump = tcf_act_police_dump, |
387 | .cleanup = tcf_act_police_cleanup, | 360 | .cleanup = tcf_act_police_cleanup, |
388 | .lookup = tcf_act_police_hash_search, | 361 | .lookup = tcf_hash_search, |
389 | .init = tcf_act_police_locate, | 362 | .init = tcf_act_police_locate, |
390 | .walk = tcf_act_police_walker | 363 | .walk = tcf_act_police_walker |
391 | }; | 364 | }; |
@@ -407,10 +380,39 @@ module_exit(police_cleanup_module); | |||
407 | 380 | ||
408 | #else /* CONFIG_NET_CLS_ACT */ | 381 | #else /* CONFIG_NET_CLS_ACT */ |
409 | 382 | ||
410 | struct tcf_police * tcf_police_locate(struct rtattr *rta, struct rtattr *est) | 383 | static struct tcf_common *tcf_police_lookup(u32 index) |
411 | { | 384 | { |
412 | unsigned h; | 385 | struct tcf_hashinfo *hinfo = &police_hash_info; |
413 | struct tcf_police *p; | 386 | struct tcf_common *p; |
387 | |||
388 | read_lock(hinfo->lock); | ||
389 | for (p = hinfo->htab[tcf_hash(index, hinfo->hmask)]; p; | ||
390 | p = p->tcfc_next) { | ||
391 | if (p->tcfc_index == index) | ||
392 | break; | ||
393 | } | ||
394 | read_unlock(hinfo->lock); | ||
395 | |||
396 | return p; | ||
397 | } | ||
398 | |||
399 | static u32 tcf_police_new_index(void) | ||
400 | { | ||
401 | u32 *idx_gen = &police_idx_gen; | ||
402 | u32 val = *idx_gen; | ||
403 | |||
404 | do { | ||
405 | if (++val == 0) | ||
406 | val = 1; | ||
407 | } while (tcf_police_lookup(val)); | ||
408 | |||
409 | return (*idx_gen = val); | ||
410 | } | ||
411 | |||
412 | struct tcf_police *tcf_police_locate(struct rtattr *rta, struct rtattr *est) | ||
413 | { | ||
414 | unsigned int h; | ||
415 | struct tcf_police *police; | ||
414 | struct rtattr *tb[TCA_POLICE_MAX]; | 416 | struct rtattr *tb[TCA_POLICE_MAX]; |
415 | struct tc_police *parm; | 417 | struct tc_police *parm; |
416 | 418 | ||
@@ -423,149 +425,158 @@ struct tcf_police * tcf_police_locate(struct rtattr *rta, struct rtattr *est) | |||
423 | 425 | ||
424 | parm = RTA_DATA(tb[TCA_POLICE_TBF-1]); | 426 | parm = RTA_DATA(tb[TCA_POLICE_TBF-1]); |
425 | 427 | ||
426 | if (parm->index && (p = tcf_police_lookup(parm->index)) != NULL) { | 428 | if (parm->index) { |
427 | p->refcnt++; | 429 | struct tcf_common *pc; |
428 | return p; | ||
429 | } | ||
430 | 430 | ||
431 | p = kzalloc(sizeof(*p), GFP_KERNEL); | 431 | pc = tcf_police_lookup(parm->index); |
432 | if (p == NULL) | 432 | if (pc) { |
433 | police = to_police(pc); | ||
434 | police->tcf_refcnt++; | ||
435 | return police; | ||
436 | } | ||
437 | } | ||
438 | police = kzalloc(sizeof(*police), GFP_KERNEL); | ||
439 | if (unlikely(!police)) | ||
433 | return NULL; | 440 | return NULL; |
434 | 441 | ||
435 | p->refcnt = 1; | 442 | police->tcf_refcnt = 1; |
436 | spin_lock_init(&p->lock); | 443 | spin_lock_init(&police->tcf_lock); |
437 | p->stats_lock = &p->lock; | 444 | police->tcf_stats_lock = &police->tcf_lock; |
438 | if (parm->rate.rate) { | 445 | if (parm->rate.rate) { |
439 | p->R_tab = qdisc_get_rtab(&parm->rate, tb[TCA_POLICE_RATE-1]); | 446 | police->tcfp_R_tab = |
440 | if (p->R_tab == NULL) | 447 | qdisc_get_rtab(&parm->rate, tb[TCA_POLICE_RATE-1]); |
448 | if (police->tcfp_R_tab == NULL) | ||
441 | goto failure; | 449 | goto failure; |
442 | if (parm->peakrate.rate) { | 450 | if (parm->peakrate.rate) { |
443 | p->P_tab = qdisc_get_rtab(&parm->peakrate, | 451 | police->tcfp_P_tab = |
444 | tb[TCA_POLICE_PEAKRATE-1]); | 452 | qdisc_get_rtab(&parm->peakrate, |
445 | if (p->P_tab == NULL) | 453 | tb[TCA_POLICE_PEAKRATE-1]); |
454 | if (police->tcfp_P_tab == NULL) | ||
446 | goto failure; | 455 | goto failure; |
447 | } | 456 | } |
448 | } | 457 | } |
449 | if (tb[TCA_POLICE_RESULT-1]) { | 458 | if (tb[TCA_POLICE_RESULT-1]) { |
450 | if (RTA_PAYLOAD(tb[TCA_POLICE_RESULT-1]) != sizeof(u32)) | 459 | if (RTA_PAYLOAD(tb[TCA_POLICE_RESULT-1]) != sizeof(u32)) |
451 | goto failure; | 460 | goto failure; |
452 | p->result = *(u32*)RTA_DATA(tb[TCA_POLICE_RESULT-1]); | 461 | police->tcfp_result = *(u32*)RTA_DATA(tb[TCA_POLICE_RESULT-1]); |
453 | } | 462 | } |
454 | #ifdef CONFIG_NET_ESTIMATOR | 463 | #ifdef CONFIG_NET_ESTIMATOR |
455 | if (tb[TCA_POLICE_AVRATE-1]) { | 464 | if (tb[TCA_POLICE_AVRATE-1]) { |
456 | if (RTA_PAYLOAD(tb[TCA_POLICE_AVRATE-1]) != sizeof(u32)) | 465 | if (RTA_PAYLOAD(tb[TCA_POLICE_AVRATE-1]) != sizeof(u32)) |
457 | goto failure; | 466 | goto failure; |
458 | p->ewma_rate = *(u32*)RTA_DATA(tb[TCA_POLICE_AVRATE-1]); | 467 | police->tcfp_ewma_rate = |
468 | *(u32*)RTA_DATA(tb[TCA_POLICE_AVRATE-1]); | ||
459 | } | 469 | } |
460 | #endif | 470 | #endif |
461 | p->toks = p->burst = parm->burst; | 471 | police->tcfp_toks = police->tcfp_burst = parm->burst; |
462 | p->mtu = parm->mtu; | 472 | police->tcfp_mtu = parm->mtu; |
463 | if (p->mtu == 0) { | 473 | if (police->tcfp_mtu == 0) { |
464 | p->mtu = ~0; | 474 | police->tcfp_mtu = ~0; |
465 | if (p->R_tab) | 475 | if (police->tcfp_R_tab) |
466 | p->mtu = 255<<p->R_tab->rate.cell_log; | 476 | police->tcfp_mtu = 255<<police->tcfp_R_tab->rate.cell_log; |
467 | } | 477 | } |
468 | if (p->P_tab) | 478 | if (police->tcfp_P_tab) |
469 | p->ptoks = L2T_P(p, p->mtu); | 479 | police->tcfp_ptoks = L2T_P(police, police->tcfp_mtu); |
470 | PSCHED_GET_TIME(p->t_c); | 480 | PSCHED_GET_TIME(police->tcfp_t_c); |
471 | p->index = parm->index ? : tcf_police_new_index(); | 481 | police->tcf_index = parm->index ? parm->index : |
472 | p->action = parm->action; | 482 | tcf_police_new_index(); |
483 | police->tcf_action = parm->action; | ||
473 | #ifdef CONFIG_NET_ESTIMATOR | 484 | #ifdef CONFIG_NET_ESTIMATOR |
474 | if (est) | 485 | if (est) |
475 | gen_new_estimator(&p->bstats, &p->rate_est, p->stats_lock, est); | 486 | gen_new_estimator(&police->tcf_bstats, &police->tcf_rate_est, |
487 | police->tcf_stats_lock, est); | ||
476 | #endif | 488 | #endif |
477 | h = tcf_police_hash(p->index); | 489 | h = tcf_hash(police->tcf_index, POL_TAB_MASK); |
478 | write_lock_bh(&police_lock); | 490 | write_lock_bh(&police_lock); |
479 | p->next = tcf_police_ht[h]; | 491 | police->tcf_next = tcf_police_ht[h]; |
480 | tcf_police_ht[h] = p; | 492 | tcf_police_ht[h] = &police->common; |
481 | write_unlock_bh(&police_lock); | 493 | write_unlock_bh(&police_lock); |
482 | return p; | 494 | return police; |
483 | 495 | ||
484 | failure: | 496 | failure: |
485 | if (p->R_tab) | 497 | if (police->tcfp_R_tab) |
486 | qdisc_put_rtab(p->R_tab); | 498 | qdisc_put_rtab(police->tcfp_R_tab); |
487 | kfree(p); | 499 | kfree(police); |
488 | return NULL; | 500 | return NULL; |
489 | } | 501 | } |
490 | 502 | ||
491 | int tcf_police(struct sk_buff *skb, struct tcf_police *p) | 503 | int tcf_police(struct sk_buff *skb, struct tcf_police *police) |
492 | { | 504 | { |
493 | psched_time_t now; | 505 | psched_time_t now; |
494 | long toks; | 506 | long toks; |
495 | long ptoks = 0; | 507 | long ptoks = 0; |
496 | 508 | ||
497 | spin_lock(&p->lock); | 509 | spin_lock(&police->tcf_lock); |
498 | 510 | ||
499 | p->bstats.bytes += skb->len; | 511 | police->tcf_bstats.bytes += skb->len; |
500 | p->bstats.packets++; | 512 | police->tcf_bstats.packets++; |
501 | 513 | ||
502 | #ifdef CONFIG_NET_ESTIMATOR | 514 | #ifdef CONFIG_NET_ESTIMATOR |
503 | if (p->ewma_rate && p->rate_est.bps >= p->ewma_rate) { | 515 | if (police->tcfp_ewma_rate && |
504 | p->qstats.overlimits++; | 516 | police->tcf_rate_est.bps >= police->tcfp_ewma_rate) { |
505 | spin_unlock(&p->lock); | 517 | police->tcf_qstats.overlimits++; |
506 | return p->action; | 518 | spin_unlock(&police->tcf_lock); |
519 | return police->tcf_action; | ||
507 | } | 520 | } |
508 | #endif | 521 | #endif |
509 | 522 | if (skb->len <= police->tcfp_mtu) { | |
510 | if (skb->len <= p->mtu) { | 523 | if (police->tcfp_R_tab == NULL) { |
511 | if (p->R_tab == NULL) { | 524 | spin_unlock(&police->tcf_lock); |
512 | spin_unlock(&p->lock); | 525 | return police->tcfp_result; |
513 | return p->result; | ||
514 | } | 526 | } |
515 | 527 | ||
516 | PSCHED_GET_TIME(now); | 528 | PSCHED_GET_TIME(now); |
517 | 529 | toks = PSCHED_TDIFF_SAFE(now, police->tcfp_t_c, | |
518 | toks = PSCHED_TDIFF_SAFE(now, p->t_c, p->burst); | 530 | police->tcfp_burst); |
519 | 531 | if (police->tcfp_P_tab) { | |
520 | if (p->P_tab) { | 532 | ptoks = toks + police->tcfp_ptoks; |
521 | ptoks = toks + p->ptoks; | 533 | if (ptoks > (long)L2T_P(police, police->tcfp_mtu)) |
522 | if (ptoks > (long)L2T_P(p, p->mtu)) | 534 | ptoks = (long)L2T_P(police, police->tcfp_mtu); |
523 | ptoks = (long)L2T_P(p, p->mtu); | 535 | ptoks -= L2T_P(police, skb->len); |
524 | ptoks -= L2T_P(p, skb->len); | ||
525 | } | 536 | } |
526 | toks += p->toks; | 537 | toks += police->tcfp_toks; |
527 | if (toks > (long)p->burst) | 538 | if (toks > (long)police->tcfp_burst) |
528 | toks = p->burst; | 539 | toks = police->tcfp_burst; |
529 | toks -= L2T(p, skb->len); | 540 | toks -= L2T(police, skb->len); |
530 | |||
531 | if ((toks|ptoks) >= 0) { | 541 | if ((toks|ptoks) >= 0) { |
532 | p->t_c = now; | 542 | police->tcfp_t_c = now; |
533 | p->toks = toks; | 543 | police->tcfp_toks = toks; |
534 | p->ptoks = ptoks; | 544 | police->tcfp_ptoks = ptoks; |
535 | spin_unlock(&p->lock); | 545 | spin_unlock(&police->tcf_lock); |
536 | return p->result; | 546 | return police->tcfp_result; |
537 | } | 547 | } |
538 | } | 548 | } |
539 | 549 | ||
540 | p->qstats.overlimits++; | 550 | police->tcf_qstats.overlimits++; |
541 | spin_unlock(&p->lock); | 551 | spin_unlock(&police->tcf_lock); |
542 | return p->action; | 552 | return police->tcf_action; |
543 | } | 553 | } |
544 | EXPORT_SYMBOL(tcf_police); | 554 | EXPORT_SYMBOL(tcf_police); |
545 | 555 | ||
546 | int tcf_police_dump(struct sk_buff *skb, struct tcf_police *p) | 556 | int tcf_police_dump(struct sk_buff *skb, struct tcf_police *police) |
547 | { | 557 | { |
548 | unsigned char *b = skb->tail; | 558 | unsigned char *b = skb->tail; |
549 | struct tc_police opt; | 559 | struct tc_police opt; |
550 | 560 | ||
551 | opt.index = p->index; | 561 | opt.index = police->tcf_index; |
552 | opt.action = p->action; | 562 | opt.action = police->tcf_action; |
553 | opt.mtu = p->mtu; | 563 | opt.mtu = police->tcfp_mtu; |
554 | opt.burst = p->burst; | 564 | opt.burst = police->tcfp_burst; |
555 | if (p->R_tab) | 565 | if (police->tcfp_R_tab) |
556 | opt.rate = p->R_tab->rate; | 566 | opt.rate = police->tcfp_R_tab->rate; |
557 | else | 567 | else |
558 | memset(&opt.rate, 0, sizeof(opt.rate)); | 568 | memset(&opt.rate, 0, sizeof(opt.rate)); |
559 | if (p->P_tab) | 569 | if (police->tcfp_P_tab) |
560 | opt.peakrate = p->P_tab->rate; | 570 | opt.peakrate = police->tcfp_P_tab->rate; |
561 | else | 571 | else |
562 | memset(&opt.peakrate, 0, sizeof(opt.peakrate)); | 572 | memset(&opt.peakrate, 0, sizeof(opt.peakrate)); |
563 | RTA_PUT(skb, TCA_POLICE_TBF, sizeof(opt), &opt); | 573 | RTA_PUT(skb, TCA_POLICE_TBF, sizeof(opt), &opt); |
564 | if (p->result) | 574 | if (police->tcfp_result) |
565 | RTA_PUT(skb, TCA_POLICE_RESULT, sizeof(int), &p->result); | 575 | RTA_PUT(skb, TCA_POLICE_RESULT, sizeof(int), |
576 | &police->tcfp_result); | ||
566 | #ifdef CONFIG_NET_ESTIMATOR | 577 | #ifdef CONFIG_NET_ESTIMATOR |
567 | if (p->ewma_rate) | 578 | if (police->tcfp_ewma_rate) |
568 | RTA_PUT(skb, TCA_POLICE_AVRATE, 4, &p->ewma_rate); | 579 | RTA_PUT(skb, TCA_POLICE_AVRATE, 4, &police->tcfp_ewma_rate); |
569 | #endif | 580 | #endif |
570 | return skb->len; | 581 | return skb->len; |
571 | 582 | ||
@@ -574,19 +585,20 @@ rtattr_failure: | |||
574 | return -1; | 585 | return -1; |
575 | } | 586 | } |
576 | 587 | ||
577 | int tcf_police_dump_stats(struct sk_buff *skb, struct tcf_police *p) | 588 | int tcf_police_dump_stats(struct sk_buff *skb, struct tcf_police *police) |
578 | { | 589 | { |
579 | struct gnet_dump d; | 590 | struct gnet_dump d; |
580 | 591 | ||
581 | if (gnet_stats_start_copy_compat(skb, TCA_STATS2, TCA_STATS, | 592 | if (gnet_stats_start_copy_compat(skb, TCA_STATS2, TCA_STATS, |
582 | TCA_XSTATS, p->stats_lock, &d) < 0) | 593 | TCA_XSTATS, police->tcf_stats_lock, |
594 | &d) < 0) | ||
583 | goto errout; | 595 | goto errout; |
584 | 596 | ||
585 | if (gnet_stats_copy_basic(&d, &p->bstats) < 0 || | 597 | if (gnet_stats_copy_basic(&d, &police->tcf_bstats) < 0 || |
586 | #ifdef CONFIG_NET_ESTIMATOR | 598 | #ifdef CONFIG_NET_ESTIMATOR |
587 | gnet_stats_copy_rate_est(&d, &p->rate_est) < 0 || | 599 | gnet_stats_copy_rate_est(&d, &police->tcf_rate_est) < 0 || |
588 | #endif | 600 | #endif |
589 | gnet_stats_copy_queue(&d, &p->qstats) < 0) | 601 | gnet_stats_copy_queue(&d, &police->tcf_qstats) < 0) |
590 | goto errout; | 602 | goto errout; |
591 | 603 | ||
592 | if (gnet_stats_finish_copy(&d) < 0) | 604 | if (gnet_stats_finish_copy(&d) < 0) |
diff --git a/net/sched/act_simple.c b/net/sched/act_simple.c index 17105c82537f..901571a67707 100644 --- a/net/sched/act_simple.c +++ b/net/sched/act_simple.c | |||
@@ -20,54 +20,175 @@ | |||
20 | 20 | ||
21 | #define TCA_ACT_SIMP 22 | 21 | #define TCA_ACT_SIMP 22 |
22 | 22 | ||
23 | /* XXX: Hide all these common elements under some macro | ||
24 | * probably | ||
25 | */ | ||
26 | #include <linux/tc_act/tc_defact.h> | 23 | #include <linux/tc_act/tc_defact.h> |
27 | #include <net/tc_act/tc_defact.h> | 24 | #include <net/tc_act/tc_defact.h> |
28 | 25 | ||
29 | /* use generic hash table with 8 buckets */ | 26 | #define SIMP_TAB_MASK 7 |
30 | #define MY_TAB_SIZE 8 | 27 | static struct tcf_common *tcf_simp_ht[SIMP_TAB_MASK + 1]; |
31 | #define MY_TAB_MASK (MY_TAB_SIZE - 1) | 28 | static u32 simp_idx_gen; |
32 | static u32 idx_gen; | ||
33 | static struct tcf_defact *tcf_simp_ht[MY_TAB_SIZE]; | ||
34 | static DEFINE_RWLOCK(simp_lock); | 29 | static DEFINE_RWLOCK(simp_lock); |
35 | 30 | ||
36 | /* override the defaults */ | 31 | static struct tcf_hashinfo simp_hash_info = { |
37 | #define tcf_st tcf_defact | 32 | .htab = tcf_simp_ht, |
38 | #define tc_st tc_defact | 33 | .hmask = SIMP_TAB_MASK, |
39 | #define tcf_t_lock simp_lock | 34 | .lock = &simp_lock, |
40 | #define tcf_ht tcf_simp_ht | 35 | }; |
41 | |||
42 | #define CONFIG_NET_ACT_INIT 1 | ||
43 | #include <net/pkt_act.h> | ||
44 | #include <net/act_generic.h> | ||
45 | 36 | ||
46 | static int tcf_simp(struct sk_buff *skb, struct tc_action *a, struct tcf_result *res) | 37 | static int tcf_simp(struct sk_buff *skb, struct tc_action *a, struct tcf_result *res) |
47 | { | 38 | { |
48 | struct tcf_defact *p = PRIV(a, defact); | 39 | struct tcf_defact *d = a->priv; |
49 | 40 | ||
50 | spin_lock(&p->lock); | 41 | spin_lock(&d->tcf_lock); |
51 | p->tm.lastuse = jiffies; | 42 | d->tcf_tm.lastuse = jiffies; |
52 | p->bstats.bytes += skb->len; | 43 | d->tcf_bstats.bytes += skb->len; |
53 | p->bstats.packets++; | 44 | d->tcf_bstats.packets++; |
54 | 45 | ||
55 | /* print policy string followed by _ then packet count | 46 | /* print policy string followed by _ then packet count |
56 | * Example if this was the 3rd packet and the string was "hello" | 47 | * Example if this was the 3rd packet and the string was "hello" |
57 | * then it would look like "hello_3" (without quotes) | 48 | * then it would look like "hello_3" (without quotes) |
58 | **/ | 49 | **/ |
59 | printk("simple: %s_%d\n", (char *)p->defdata, p->bstats.packets); | 50 | printk("simple: %s_%d\n", |
60 | spin_unlock(&p->lock); | 51 | (char *)d->tcfd_defdata, d->tcf_bstats.packets); |
61 | return p->action; | 52 | spin_unlock(&d->tcf_lock); |
53 | return d->tcf_action; | ||
54 | } | ||
55 | |||
56 | static int tcf_simp_release(struct tcf_defact *d, int bind) | ||
57 | { | ||
58 | int ret = 0; | ||
59 | if (d) { | ||
60 | if (bind) | ||
61 | d->tcf_bindcnt--; | ||
62 | d->tcf_refcnt--; | ||
63 | if (d->tcf_bindcnt <= 0 && d->tcf_refcnt <= 0) { | ||
64 | kfree(d->tcfd_defdata); | ||
65 | tcf_hash_destroy(&d->common, &simp_hash_info); | ||
66 | ret = 1; | ||
67 | } | ||
68 | } | ||
69 | return ret; | ||
70 | } | ||
71 | |||
72 | static int alloc_defdata(struct tcf_defact *d, u32 datalen, void *defdata) | ||
73 | { | ||
74 | d->tcfd_defdata = kmalloc(datalen, GFP_KERNEL); | ||
75 | if (unlikely(!d->tcfd_defdata)) | ||
76 | return -ENOMEM; | ||
77 | d->tcfd_datalen = datalen; | ||
78 | memcpy(d->tcfd_defdata, defdata, datalen); | ||
79 | return 0; | ||
80 | } | ||
81 | |||
82 | static int realloc_defdata(struct tcf_defact *d, u32 datalen, void *defdata) | ||
83 | { | ||
84 | kfree(d->tcfd_defdata); | ||
85 | return alloc_defdata(d, datalen, defdata); | ||
86 | } | ||
87 | |||
88 | static int tcf_simp_init(struct rtattr *rta, struct rtattr *est, | ||
89 | struct tc_action *a, int ovr, int bind) | ||
90 | { | ||
91 | struct rtattr *tb[TCA_DEF_MAX]; | ||
92 | struct tc_defact *parm; | ||
93 | struct tcf_defact *d; | ||
94 | struct tcf_common *pc; | ||
95 | void *defdata; | ||
96 | u32 datalen = 0; | ||
97 | int ret = 0; | ||
98 | |||
99 | if (rta == NULL || rtattr_parse_nested(tb, TCA_DEF_MAX, rta) < 0) | ||
100 | return -EINVAL; | ||
101 | |||
102 | if (tb[TCA_DEF_PARMS - 1] == NULL || | ||
103 | RTA_PAYLOAD(tb[TCA_DEF_PARMS - 1]) < sizeof(*parm)) | ||
104 | return -EINVAL; | ||
105 | |||
106 | parm = RTA_DATA(tb[TCA_DEF_PARMS - 1]); | ||
107 | defdata = RTA_DATA(tb[TCA_DEF_DATA - 1]); | ||
108 | if (defdata == NULL) | ||
109 | return -EINVAL; | ||
110 | |||
111 | datalen = RTA_PAYLOAD(tb[TCA_DEF_DATA - 1]); | ||
112 | if (datalen <= 0) | ||
113 | return -EINVAL; | ||
114 | |||
115 | pc = tcf_hash_check(parm->index, a, bind, &simp_hash_info); | ||
116 | if (!pc) { | ||
117 | pc = tcf_hash_create(parm->index, est, a, sizeof(*d), bind, | ||
118 | &simp_idx_gen, &simp_hash_info); | ||
119 | if (unlikely(!pc)) | ||
120 | return -ENOMEM; | ||
121 | |||
122 | d = to_defact(pc); | ||
123 | ret = alloc_defdata(d, datalen, defdata); | ||
124 | if (ret < 0) { | ||
125 | kfree(pc); | ||
126 | return ret; | ||
127 | } | ||
128 | ret = ACT_P_CREATED; | ||
129 | } else { | ||
130 | d = to_defact(pc); | ||
131 | if (!ovr) { | ||
132 | tcf_simp_release(d, bind); | ||
133 | return -EEXIST; | ||
134 | } | ||
135 | realloc_defdata(d, datalen, defdata); | ||
136 | } | ||
137 | |||
138 | spin_lock_bh(&d->tcf_lock); | ||
139 | d->tcf_action = parm->action; | ||
140 | spin_unlock_bh(&d->tcf_lock); | ||
141 | |||
142 | if (ret == ACT_P_CREATED) | ||
143 | tcf_hash_insert(pc, &simp_hash_info); | ||
144 | return ret; | ||
145 | } | ||
146 | |||
147 | static inline int tcf_simp_cleanup(struct tc_action *a, int bind) | ||
148 | { | ||
149 | struct tcf_defact *d = a->priv; | ||
150 | |||
151 | if (d) | ||
152 | return tcf_simp_release(d, bind); | ||
153 | return 0; | ||
154 | } | ||
155 | |||
156 | static inline int tcf_simp_dump(struct sk_buff *skb, struct tc_action *a, | ||
157 | int bind, int ref) | ||
158 | { | ||
159 | unsigned char *b = skb->tail; | ||
160 | struct tcf_defact *d = a->priv; | ||
161 | struct tc_defact opt; | ||
162 | struct tcf_t t; | ||
163 | |||
164 | opt.index = d->tcf_index; | ||
165 | opt.refcnt = d->tcf_refcnt - ref; | ||
166 | opt.bindcnt = d->tcf_bindcnt - bind; | ||
167 | opt.action = d->tcf_action; | ||
168 | RTA_PUT(skb, TCA_DEF_PARMS, sizeof(opt), &opt); | ||
169 | RTA_PUT(skb, TCA_DEF_DATA, d->tcfd_datalen, d->tcfd_defdata); | ||
170 | t.install = jiffies_to_clock_t(jiffies - d->tcf_tm.install); | ||
171 | t.lastuse = jiffies_to_clock_t(jiffies - d->tcf_tm.lastuse); | ||
172 | t.expires = jiffies_to_clock_t(d->tcf_tm.expires); | ||
173 | RTA_PUT(skb, TCA_DEF_TM, sizeof(t), &t); | ||
174 | return skb->len; | ||
175 | |||
176 | rtattr_failure: | ||
177 | skb_trim(skb, b - skb->data); | ||
178 | return -1; | ||
62 | } | 179 | } |
63 | 180 | ||
64 | static struct tc_action_ops act_simp_ops = { | 181 | static struct tc_action_ops act_simp_ops = { |
65 | .kind = "simple", | 182 | .kind = "simple", |
66 | .type = TCA_ACT_SIMP, | 183 | .hinfo = &simp_hash_info, |
67 | .capab = TCA_CAP_NONE, | 184 | .type = TCA_ACT_SIMP, |
68 | .owner = THIS_MODULE, | 185 | .capab = TCA_CAP_NONE, |
69 | .act = tcf_simp, | 186 | .owner = THIS_MODULE, |
70 | tca_use_default_ops | 187 | .act = tcf_simp, |
188 | .dump = tcf_simp_dump, | ||
189 | .cleanup = tcf_simp_cleanup, | ||
190 | .init = tcf_simp_init, | ||
191 | .walk = tcf_generic_walker, | ||
71 | }; | 192 | }; |
72 | 193 | ||
73 | MODULE_AUTHOR("Jamal Hadi Salim(2005)"); | 194 | MODULE_AUTHOR("Jamal Hadi Salim(2005)"); |
diff --git a/net/sched/cls_fw.c b/net/sched/cls_fw.c index e6973d9b686d..e54acc6bcccd 100644 --- a/net/sched/cls_fw.c +++ b/net/sched/cls_fw.c | |||
@@ -50,6 +50,7 @@ | |||
50 | struct fw_head | 50 | struct fw_head |
51 | { | 51 | { |
52 | struct fw_filter *ht[HTSIZE]; | 52 | struct fw_filter *ht[HTSIZE]; |
53 | u32 mask; | ||
53 | }; | 54 | }; |
54 | 55 | ||
55 | struct fw_filter | 56 | struct fw_filter |
@@ -101,7 +102,7 @@ static int fw_classify(struct sk_buff *skb, struct tcf_proto *tp, | |||
101 | struct fw_filter *f; | 102 | struct fw_filter *f; |
102 | int r; | 103 | int r; |
103 | #ifdef CONFIG_NETFILTER | 104 | #ifdef CONFIG_NETFILTER |
104 | u32 id = skb->nfmark; | 105 | u32 id = skb->nfmark & head->mask; |
105 | #else | 106 | #else |
106 | u32 id = 0; | 107 | u32 id = 0; |
107 | #endif | 108 | #endif |
@@ -209,7 +210,9 @@ static int | |||
209 | fw_change_attrs(struct tcf_proto *tp, struct fw_filter *f, | 210 | fw_change_attrs(struct tcf_proto *tp, struct fw_filter *f, |
210 | struct rtattr **tb, struct rtattr **tca, unsigned long base) | 211 | struct rtattr **tb, struct rtattr **tca, unsigned long base) |
211 | { | 212 | { |
213 | struct fw_head *head = (struct fw_head *)tp->root; | ||
212 | struct tcf_exts e; | 214 | struct tcf_exts e; |
215 | u32 mask; | ||
213 | int err; | 216 | int err; |
214 | 217 | ||
215 | err = tcf_exts_validate(tp, tb, tca[TCA_RATE-1], &e, &fw_ext_map); | 218 | err = tcf_exts_validate(tp, tb, tca[TCA_RATE-1], &e, &fw_ext_map); |
@@ -232,6 +235,15 @@ fw_change_attrs(struct tcf_proto *tp, struct fw_filter *f, | |||
232 | } | 235 | } |
233 | #endif /* CONFIG_NET_CLS_IND */ | 236 | #endif /* CONFIG_NET_CLS_IND */ |
234 | 237 | ||
238 | if (tb[TCA_FW_MASK-1]) { | ||
239 | if (RTA_PAYLOAD(tb[TCA_FW_MASK-1]) != sizeof(u32)) | ||
240 | goto errout; | ||
241 | mask = *(u32*)RTA_DATA(tb[TCA_FW_MASK-1]); | ||
242 | if (mask != head->mask) | ||
243 | goto errout; | ||
244 | } else if (head->mask != 0xFFFFFFFF) | ||
245 | goto errout; | ||
246 | |||
235 | tcf_exts_change(tp, &f->exts, &e); | 247 | tcf_exts_change(tp, &f->exts, &e); |
236 | 248 | ||
237 | return 0; | 249 | return 0; |
@@ -267,9 +279,17 @@ static int fw_change(struct tcf_proto *tp, unsigned long base, | |||
267 | return -EINVAL; | 279 | return -EINVAL; |
268 | 280 | ||
269 | if (head == NULL) { | 281 | if (head == NULL) { |
282 | u32 mask = 0xFFFFFFFF; | ||
283 | if (tb[TCA_FW_MASK-1]) { | ||
284 | if (RTA_PAYLOAD(tb[TCA_FW_MASK-1]) != sizeof(u32)) | ||
285 | return -EINVAL; | ||
286 | mask = *(u32*)RTA_DATA(tb[TCA_FW_MASK-1]); | ||
287 | } | ||
288 | |||
270 | head = kzalloc(sizeof(struct fw_head), GFP_KERNEL); | 289 | head = kzalloc(sizeof(struct fw_head), GFP_KERNEL); |
271 | if (head == NULL) | 290 | if (head == NULL) |
272 | return -ENOBUFS; | 291 | return -ENOBUFS; |
292 | head->mask = mask; | ||
273 | 293 | ||
274 | tcf_tree_lock(tp); | 294 | tcf_tree_lock(tp); |
275 | tp->root = head; | 295 | tp->root = head; |
@@ -330,6 +350,7 @@ static void fw_walk(struct tcf_proto *tp, struct tcf_walker *arg) | |||
330 | static int fw_dump(struct tcf_proto *tp, unsigned long fh, | 350 | static int fw_dump(struct tcf_proto *tp, unsigned long fh, |
331 | struct sk_buff *skb, struct tcmsg *t) | 351 | struct sk_buff *skb, struct tcmsg *t) |
332 | { | 352 | { |
353 | struct fw_head *head = (struct fw_head *)tp->root; | ||
333 | struct fw_filter *f = (struct fw_filter*)fh; | 354 | struct fw_filter *f = (struct fw_filter*)fh; |
334 | unsigned char *b = skb->tail; | 355 | unsigned char *b = skb->tail; |
335 | struct rtattr *rta; | 356 | struct rtattr *rta; |
@@ -351,6 +372,8 @@ static int fw_dump(struct tcf_proto *tp, unsigned long fh, | |||
351 | if (strlen(f->indev)) | 372 | if (strlen(f->indev)) |
352 | RTA_PUT(skb, TCA_FW_INDEV, IFNAMSIZ, f->indev); | 373 | RTA_PUT(skb, TCA_FW_INDEV, IFNAMSIZ, f->indev); |
353 | #endif /* CONFIG_NET_CLS_IND */ | 374 | #endif /* CONFIG_NET_CLS_IND */ |
375 | if (head->mask != 0xFFFFFFFF) | ||
376 | RTA_PUT(skb, TCA_FW_MASK, 4, &head->mask); | ||
354 | 377 | ||
355 | if (tcf_exts_dump(skb, &f->exts, &fw_ext_map) < 0) | 378 | if (tcf_exts_dump(skb, &f->exts, &fw_ext_map) < 0) |
356 | goto rtattr_failure; | 379 | goto rtattr_failure; |
diff --git a/net/sched/sch_htb.c b/net/sched/sch_htb.c index 880a3394a51f..bb3ddd4784b1 100644 --- a/net/sched/sch_htb.c +++ b/net/sched/sch_htb.c | |||
@@ -1,4 +1,4 @@ | |||
1 | /* vim: ts=8 sw=8 | 1 | /* |
2 | * net/sched/sch_htb.c Hierarchical token bucket, feed tree version | 2 | * net/sched/sch_htb.c Hierarchical token bucket, feed tree version |
3 | * | 3 | * |
4 | * This program is free software; you can redistribute it and/or | 4 | * This program is free software; you can redistribute it and/or |
@@ -68,218 +68,165 @@ | |||
68 | one less than their parent. | 68 | one less than their parent. |
69 | */ | 69 | */ |
70 | 70 | ||
71 | #define HTB_HSIZE 16 /* classid hash size */ | 71 | #define HTB_HSIZE 16 /* classid hash size */ |
72 | #define HTB_EWMAC 2 /* rate average over HTB_EWMAC*HTB_HSIZE sec */ | 72 | #define HTB_EWMAC 2 /* rate average over HTB_EWMAC*HTB_HSIZE sec */ |
73 | #undef HTB_DEBUG /* compile debugging support (activated by tc tool) */ | 73 | #define HTB_RATECM 1 /* whether to use rate computer */ |
74 | #define HTB_RATECM 1 /* whether to use rate computer */ | 74 | #define HTB_HYSTERESIS 1 /* whether to use mode hysteresis for speedup */ |
75 | #define HTB_HYSTERESIS 1/* whether to use mode hysteresis for speedup */ | 75 | #define HTB_VER 0x30011 /* major must be matched with number suplied by TC as version */ |
76 | #define HTB_QLOCK(S) spin_lock_bh(&(S)->dev->queue_lock) | ||
77 | #define HTB_QUNLOCK(S) spin_unlock_bh(&(S)->dev->queue_lock) | ||
78 | #define HTB_VER 0x30011 /* major must be matched with number suplied by TC as version */ | ||
79 | 76 | ||
80 | #if HTB_VER >> 16 != TC_HTB_PROTOVER | 77 | #if HTB_VER >> 16 != TC_HTB_PROTOVER |
81 | #error "Mismatched sch_htb.c and pkt_sch.h" | 78 | #error "Mismatched sch_htb.c and pkt_sch.h" |
82 | #endif | 79 | #endif |
83 | 80 | ||
84 | /* debugging support; S is subsystem, these are defined: | ||
85 | 0 - netlink messages | ||
86 | 1 - enqueue | ||
87 | 2 - drop & requeue | ||
88 | 3 - dequeue main | ||
89 | 4 - dequeue one prio DRR part | ||
90 | 5 - dequeue class accounting | ||
91 | 6 - class overlimit status computation | ||
92 | 7 - hint tree | ||
93 | 8 - event queue | ||
94 | 10 - rate estimator | ||
95 | 11 - classifier | ||
96 | 12 - fast dequeue cache | ||
97 | |||
98 | L is level; 0 = none, 1 = basic info, 2 = detailed, 3 = full | ||
99 | q->debug uint32 contains 16 2-bit fields one for subsystem starting | ||
100 | from LSB | ||
101 | */ | ||
102 | #ifdef HTB_DEBUG | ||
103 | #define HTB_DBG_COND(S,L) (((q->debug>>(2*S))&3) >= L) | ||
104 | #define HTB_DBG(S,L,FMT,ARG...) if (HTB_DBG_COND(S,L)) \ | ||
105 | printk(KERN_DEBUG FMT,##ARG) | ||
106 | #define HTB_CHCL(cl) BUG_TRAP((cl)->magic == HTB_CMAGIC) | ||
107 | #define HTB_PASSQ q, | ||
108 | #define HTB_ARGQ struct htb_sched *q, | ||
109 | #define static | ||
110 | #undef __inline__ | ||
111 | #define __inline__ | ||
112 | #undef inline | ||
113 | #define inline | ||
114 | #define HTB_CMAGIC 0xFEFAFEF1 | ||
115 | #define htb_safe_rb_erase(N,R) do { BUG_TRAP((N)->rb_color != -1); \ | ||
116 | if ((N)->rb_color == -1) break; \ | ||
117 | rb_erase(N,R); \ | ||
118 | (N)->rb_color = -1; } while (0) | ||
119 | #else | ||
120 | #define HTB_DBG_COND(S,L) (0) | ||
121 | #define HTB_DBG(S,L,FMT,ARG...) | ||
122 | #define HTB_PASSQ | ||
123 | #define HTB_ARGQ | ||
124 | #define HTB_CHCL(cl) | ||
125 | #define htb_safe_rb_erase(N,R) rb_erase(N,R) | ||
126 | #endif | ||
127 | |||
128 | |||
129 | /* used internaly to keep status of single class */ | 81 | /* used internaly to keep status of single class */ |
130 | enum htb_cmode { | 82 | enum htb_cmode { |
131 | HTB_CANT_SEND, /* class can't send and can't borrow */ | 83 | HTB_CANT_SEND, /* class can't send and can't borrow */ |
132 | HTB_MAY_BORROW, /* class can't send but may borrow */ | 84 | HTB_MAY_BORROW, /* class can't send but may borrow */ |
133 | HTB_CAN_SEND /* class can send */ | 85 | HTB_CAN_SEND /* class can send */ |
134 | }; | 86 | }; |
135 | 87 | ||
136 | /* interior & leaf nodes; props specific to leaves are marked L: */ | 88 | /* interior & leaf nodes; props specific to leaves are marked L: */ |
137 | struct htb_class | 89 | struct htb_class { |
138 | { | 90 | /* general class parameters */ |
139 | #ifdef HTB_DEBUG | 91 | u32 classid; |
140 | unsigned magic; | 92 | struct gnet_stats_basic bstats; |
141 | #endif | 93 | struct gnet_stats_queue qstats; |
142 | /* general class parameters */ | 94 | struct gnet_stats_rate_est rate_est; |
143 | u32 classid; | 95 | struct tc_htb_xstats xstats; /* our special stats */ |
144 | struct gnet_stats_basic bstats; | 96 | int refcnt; /* usage count of this class */ |
145 | struct gnet_stats_queue qstats; | ||
146 | struct gnet_stats_rate_est rate_est; | ||
147 | struct tc_htb_xstats xstats;/* our special stats */ | ||
148 | int refcnt; /* usage count of this class */ | ||
149 | 97 | ||
150 | #ifdef HTB_RATECM | 98 | #ifdef HTB_RATECM |
151 | /* rate measurement counters */ | 99 | /* rate measurement counters */ |
152 | unsigned long rate_bytes,sum_bytes; | 100 | unsigned long rate_bytes, sum_bytes; |
153 | unsigned long rate_packets,sum_packets; | 101 | unsigned long rate_packets, sum_packets; |
154 | #endif | 102 | #endif |
155 | 103 | ||
156 | /* topology */ | 104 | /* topology */ |
157 | int level; /* our level (see above) */ | 105 | int level; /* our level (see above) */ |
158 | struct htb_class *parent; /* parent class */ | 106 | struct htb_class *parent; /* parent class */ |
159 | struct list_head hlist; /* classid hash list item */ | 107 | struct hlist_node hlist; /* classid hash list item */ |
160 | struct list_head sibling; /* sibling list item */ | 108 | struct list_head sibling; /* sibling list item */ |
161 | struct list_head children; /* children list */ | 109 | struct list_head children; /* children list */ |
162 | 110 | ||
163 | union { | 111 | union { |
164 | struct htb_class_leaf { | 112 | struct htb_class_leaf { |
165 | struct Qdisc *q; | 113 | struct Qdisc *q; |
166 | int prio; | 114 | int prio; |
167 | int aprio; | 115 | int aprio; |
168 | int quantum; | 116 | int quantum; |
169 | int deficit[TC_HTB_MAXDEPTH]; | 117 | int deficit[TC_HTB_MAXDEPTH]; |
170 | struct list_head drop_list; | 118 | struct list_head drop_list; |
171 | } leaf; | 119 | } leaf; |
172 | struct htb_class_inner { | 120 | struct htb_class_inner { |
173 | struct rb_root feed[TC_HTB_NUMPRIO]; /* feed trees */ | 121 | struct rb_root feed[TC_HTB_NUMPRIO]; /* feed trees */ |
174 | struct rb_node *ptr[TC_HTB_NUMPRIO]; /* current class ptr */ | 122 | struct rb_node *ptr[TC_HTB_NUMPRIO]; /* current class ptr */ |
175 | /* When class changes from state 1->2 and disconnects from | 123 | /* When class changes from state 1->2 and disconnects from |
176 | parent's feed then we lost ptr value and start from the | 124 | parent's feed then we lost ptr value and start from the |
177 | first child again. Here we store classid of the | 125 | first child again. Here we store classid of the |
178 | last valid ptr (used when ptr is NULL). */ | 126 | last valid ptr (used when ptr is NULL). */ |
179 | u32 last_ptr_id[TC_HTB_NUMPRIO]; | 127 | u32 last_ptr_id[TC_HTB_NUMPRIO]; |
180 | } inner; | 128 | } inner; |
181 | } un; | 129 | } un; |
182 | struct rb_node node[TC_HTB_NUMPRIO]; /* node for self or feed tree */ | 130 | struct rb_node node[TC_HTB_NUMPRIO]; /* node for self or feed tree */ |
183 | struct rb_node pq_node; /* node for event queue */ | 131 | struct rb_node pq_node; /* node for event queue */ |
184 | unsigned long pq_key; /* the same type as jiffies global */ | 132 | unsigned long pq_key; /* the same type as jiffies global */ |
185 | 133 | ||
186 | int prio_activity; /* for which prios are we active */ | 134 | int prio_activity; /* for which prios are we active */ |
187 | enum htb_cmode cmode; /* current mode of the class */ | 135 | enum htb_cmode cmode; /* current mode of the class */ |
188 | 136 | ||
189 | /* class attached filters */ | 137 | /* class attached filters */ |
190 | struct tcf_proto *filter_list; | 138 | struct tcf_proto *filter_list; |
191 | int filter_cnt; | 139 | int filter_cnt; |
192 | 140 | ||
193 | int warned; /* only one warning about non work conserving .. */ | 141 | int warned; /* only one warning about non work conserving .. */ |
194 | 142 | ||
195 | /* token bucket parameters */ | 143 | /* token bucket parameters */ |
196 | struct qdisc_rate_table *rate; /* rate table of the class itself */ | 144 | struct qdisc_rate_table *rate; /* rate table of the class itself */ |
197 | struct qdisc_rate_table *ceil; /* ceiling rate (limits borrows too) */ | 145 | struct qdisc_rate_table *ceil; /* ceiling rate (limits borrows too) */ |
198 | long buffer,cbuffer; /* token bucket depth/rate */ | 146 | long buffer, cbuffer; /* token bucket depth/rate */ |
199 | psched_tdiff_t mbuffer; /* max wait time */ | 147 | psched_tdiff_t mbuffer; /* max wait time */ |
200 | long tokens,ctokens; /* current number of tokens */ | 148 | long tokens, ctokens; /* current number of tokens */ |
201 | psched_time_t t_c; /* checkpoint time */ | 149 | psched_time_t t_c; /* checkpoint time */ |
202 | }; | 150 | }; |
203 | 151 | ||
204 | /* TODO: maybe compute rate when size is too large .. or drop ? */ | 152 | /* TODO: maybe compute rate when size is too large .. or drop ? */ |
205 | static __inline__ long L2T(struct htb_class *cl,struct qdisc_rate_table *rate, | 153 | static inline long L2T(struct htb_class *cl, struct qdisc_rate_table *rate, |
206 | int size) | 154 | int size) |
207 | { | 155 | { |
208 | int slot = size >> rate->rate.cell_log; | 156 | int slot = size >> rate->rate.cell_log; |
209 | if (slot > 255) { | 157 | if (slot > 255) { |
210 | cl->xstats.giants++; | 158 | cl->xstats.giants++; |
211 | slot = 255; | 159 | slot = 255; |
212 | } | 160 | } |
213 | return rate->data[slot]; | 161 | return rate->data[slot]; |
214 | } | 162 | } |
215 | 163 | ||
216 | struct htb_sched | 164 | struct htb_sched { |
217 | { | 165 | struct list_head root; /* root classes list */ |
218 | struct list_head root; /* root classes list */ | 166 | struct hlist_head hash[HTB_HSIZE]; /* hashed by classid */ |
219 | struct list_head hash[HTB_HSIZE]; /* hashed by classid */ | 167 | struct list_head drops[TC_HTB_NUMPRIO];/* active leaves (for drops) */ |
220 | struct list_head drops[TC_HTB_NUMPRIO]; /* active leaves (for drops) */ | 168 | |
221 | 169 | /* self list - roots of self generating tree */ | |
222 | /* self list - roots of self generating tree */ | 170 | struct rb_root row[TC_HTB_MAXDEPTH][TC_HTB_NUMPRIO]; |
223 | struct rb_root row[TC_HTB_MAXDEPTH][TC_HTB_NUMPRIO]; | 171 | int row_mask[TC_HTB_MAXDEPTH]; |
224 | int row_mask[TC_HTB_MAXDEPTH]; | 172 | struct rb_node *ptr[TC_HTB_MAXDEPTH][TC_HTB_NUMPRIO]; |
225 | struct rb_node *ptr[TC_HTB_MAXDEPTH][TC_HTB_NUMPRIO]; | 173 | u32 last_ptr_id[TC_HTB_MAXDEPTH][TC_HTB_NUMPRIO]; |
226 | u32 last_ptr_id[TC_HTB_MAXDEPTH][TC_HTB_NUMPRIO]; | 174 | |
227 | 175 | /* self wait list - roots of wait PQs per row */ | |
228 | /* self wait list - roots of wait PQs per row */ | 176 | struct rb_root wait_pq[TC_HTB_MAXDEPTH]; |
229 | struct rb_root wait_pq[TC_HTB_MAXDEPTH]; | 177 | |
230 | 178 | /* time of nearest event per level (row) */ | |
231 | /* time of nearest event per level (row) */ | 179 | unsigned long near_ev_cache[TC_HTB_MAXDEPTH]; |
232 | unsigned long near_ev_cache[TC_HTB_MAXDEPTH]; | 180 | |
233 | 181 | /* cached value of jiffies in dequeue */ | |
234 | /* cached value of jiffies in dequeue */ | 182 | unsigned long jiffies; |
235 | unsigned long jiffies; | 183 | |
236 | 184 | /* whether we hit non-work conserving class during this dequeue; we use */ | |
237 | /* whether we hit non-work conserving class during this dequeue; we use */ | 185 | int nwc_hit; /* this to disable mindelay complaint in dequeue */ |
238 | int nwc_hit; /* this to disable mindelay complaint in dequeue */ | 186 | |
239 | 187 | int defcls; /* class where unclassified flows go to */ | |
240 | int defcls; /* class where unclassified flows go to */ | 188 | |
241 | u32 debug; /* subsystem debug levels */ | 189 | /* filters for qdisc itself */ |
242 | 190 | struct tcf_proto *filter_list; | |
243 | /* filters for qdisc itself */ | 191 | int filter_cnt; |
244 | struct tcf_proto *filter_list; | 192 | |
245 | int filter_cnt; | 193 | int rate2quantum; /* quant = rate / rate2quantum */ |
246 | 194 | psched_time_t now; /* cached dequeue time */ | |
247 | int rate2quantum; /* quant = rate / rate2quantum */ | 195 | struct timer_list timer; /* send delay timer */ |
248 | psched_time_t now; /* cached dequeue time */ | ||
249 | struct timer_list timer; /* send delay timer */ | ||
250 | #ifdef HTB_RATECM | 196 | #ifdef HTB_RATECM |
251 | struct timer_list rttim; /* rate computer timer */ | 197 | struct timer_list rttim; /* rate computer timer */ |
252 | int recmp_bucket; /* which hash bucket to recompute next */ | 198 | int recmp_bucket; /* which hash bucket to recompute next */ |
253 | #endif | 199 | #endif |
254 | |||
255 | /* non shaped skbs; let them go directly thru */ | ||
256 | struct sk_buff_head direct_queue; | ||
257 | int direct_qlen; /* max qlen of above */ | ||
258 | 200 | ||
259 | long direct_pkts; | 201 | /* non shaped skbs; let them go directly thru */ |
202 | struct sk_buff_head direct_queue; | ||
203 | int direct_qlen; /* max qlen of above */ | ||
204 | |||
205 | long direct_pkts; | ||
260 | }; | 206 | }; |
261 | 207 | ||
262 | /* compute hash of size HTB_HSIZE for given handle */ | 208 | /* compute hash of size HTB_HSIZE for given handle */ |
263 | static __inline__ int htb_hash(u32 h) | 209 | static inline int htb_hash(u32 h) |
264 | { | 210 | { |
265 | #if HTB_HSIZE != 16 | 211 | #if HTB_HSIZE != 16 |
266 | #error "Declare new hash for your HTB_HSIZE" | 212 | #error "Declare new hash for your HTB_HSIZE" |
267 | #endif | 213 | #endif |
268 | h ^= h>>8; /* stolen from cbq_hash */ | 214 | h ^= h >> 8; /* stolen from cbq_hash */ |
269 | h ^= h>>4; | 215 | h ^= h >> 4; |
270 | return h & 0xf; | 216 | return h & 0xf; |
271 | } | 217 | } |
272 | 218 | ||
273 | /* find class in global hash table using given handle */ | 219 | /* find class in global hash table using given handle */ |
274 | static __inline__ struct htb_class *htb_find(u32 handle, struct Qdisc *sch) | 220 | static inline struct htb_class *htb_find(u32 handle, struct Qdisc *sch) |
275 | { | 221 | { |
276 | struct htb_sched *q = qdisc_priv(sch); | 222 | struct htb_sched *q = qdisc_priv(sch); |
277 | struct list_head *p; | 223 | struct hlist_node *p; |
278 | if (TC_H_MAJ(handle) != sch->handle) | 224 | struct htb_class *cl; |
225 | |||
226 | if (TC_H_MAJ(handle) != sch->handle) | ||
279 | return NULL; | 227 | return NULL; |
280 | 228 | ||
281 | list_for_each (p,q->hash+htb_hash(handle)) { | 229 | hlist_for_each_entry(cl, p, q->hash + htb_hash(handle), hlist) { |
282 | struct htb_class *cl = list_entry(p,struct htb_class,hlist); | ||
283 | if (cl->classid == handle) | 230 | if (cl->classid == handle) |
284 | return cl; | 231 | return cl; |
285 | } | 232 | } |
@@ -304,7 +251,8 @@ static inline u32 htb_classid(struct htb_class *cl) | |||
304 | return (cl && cl != HTB_DIRECT) ? cl->classid : TC_H_UNSPEC; | 251 | return (cl && cl != HTB_DIRECT) ? cl->classid : TC_H_UNSPEC; |
305 | } | 252 | } |
306 | 253 | ||
307 | static struct htb_class *htb_classify(struct sk_buff *skb, struct Qdisc *sch, int *qerr) | 254 | static struct htb_class *htb_classify(struct sk_buff *skb, struct Qdisc *sch, |
255 | int *qerr) | ||
308 | { | 256 | { |
309 | struct htb_sched *q = qdisc_priv(sch); | 257 | struct htb_sched *q = qdisc_priv(sch); |
310 | struct htb_class *cl; | 258 | struct htb_class *cl; |
@@ -316,8 +264,8 @@ static struct htb_class *htb_classify(struct sk_buff *skb, struct Qdisc *sch, in | |||
316 | note that nfmark can be used too by attaching filter fw with no | 264 | note that nfmark can be used too by attaching filter fw with no |
317 | rules in it */ | 265 | rules in it */ |
318 | if (skb->priority == sch->handle) | 266 | if (skb->priority == sch->handle) |
319 | return HTB_DIRECT; /* X:0 (direct flow) selected */ | 267 | return HTB_DIRECT; /* X:0 (direct flow) selected */ |
320 | if ((cl = htb_find(skb->priority,sch)) != NULL && cl->level == 0) | 268 | if ((cl = htb_find(skb->priority, sch)) != NULL && cl->level == 0) |
321 | return cl; | 269 | return cl; |
322 | 270 | ||
323 | *qerr = NET_XMIT_BYPASS; | 271 | *qerr = NET_XMIT_BYPASS; |
@@ -326,7 +274,7 @@ static struct htb_class *htb_classify(struct sk_buff *skb, struct Qdisc *sch, in | |||
326 | #ifdef CONFIG_NET_CLS_ACT | 274 | #ifdef CONFIG_NET_CLS_ACT |
327 | switch (result) { | 275 | switch (result) { |
328 | case TC_ACT_QUEUED: | 276 | case TC_ACT_QUEUED: |
329 | case TC_ACT_STOLEN: | 277 | case TC_ACT_STOLEN: |
330 | *qerr = NET_XMIT_SUCCESS; | 278 | *qerr = NET_XMIT_SUCCESS; |
331 | case TC_ACT_SHOT: | 279 | case TC_ACT_SHOT: |
332 | return NULL; | 280 | return NULL; |
@@ -335,97 +283,44 @@ static struct htb_class *htb_classify(struct sk_buff *skb, struct Qdisc *sch, in | |||
335 | if (result == TC_POLICE_SHOT) | 283 | if (result == TC_POLICE_SHOT) |
336 | return HTB_DIRECT; | 284 | return HTB_DIRECT; |
337 | #endif | 285 | #endif |
338 | if ((cl = (void*)res.class) == NULL) { | 286 | if ((cl = (void *)res.class) == NULL) { |
339 | if (res.classid == sch->handle) | 287 | if (res.classid == sch->handle) |
340 | return HTB_DIRECT; /* X:0 (direct flow) */ | 288 | return HTB_DIRECT; /* X:0 (direct flow) */ |
341 | if ((cl = htb_find(res.classid,sch)) == NULL) | 289 | if ((cl = htb_find(res.classid, sch)) == NULL) |
342 | break; /* filter selected invalid classid */ | 290 | break; /* filter selected invalid classid */ |
343 | } | 291 | } |
344 | if (!cl->level) | 292 | if (!cl->level) |
345 | return cl; /* we hit leaf; return it */ | 293 | return cl; /* we hit leaf; return it */ |
346 | 294 | ||
347 | /* we have got inner class; apply inner filter chain */ | 295 | /* we have got inner class; apply inner filter chain */ |
348 | tcf = cl->filter_list; | 296 | tcf = cl->filter_list; |
349 | } | 297 | } |
350 | /* classification failed; try to use default class */ | 298 | /* classification failed; try to use default class */ |
351 | cl = htb_find(TC_H_MAKE(TC_H_MAJ(sch->handle),q->defcls),sch); | 299 | cl = htb_find(TC_H_MAKE(TC_H_MAJ(sch->handle), q->defcls), sch); |
352 | if (!cl || cl->level) | 300 | if (!cl || cl->level) |
353 | return HTB_DIRECT; /* bad default .. this is safe bet */ | 301 | return HTB_DIRECT; /* bad default .. this is safe bet */ |
354 | return cl; | 302 | return cl; |
355 | } | 303 | } |
356 | 304 | ||
357 | #ifdef HTB_DEBUG | ||
358 | static void htb_next_rb_node(struct rb_node **n); | ||
359 | #define HTB_DUMTREE(root,memb) if(root) { \ | ||
360 | struct rb_node *n = (root)->rb_node; \ | ||
361 | while (n->rb_left) n = n->rb_left; \ | ||
362 | while (n) { \ | ||
363 | struct htb_class *cl = rb_entry(n, struct htb_class, memb); \ | ||
364 | printk(" %x",cl->classid); htb_next_rb_node (&n); \ | ||
365 | } } | ||
366 | |||
367 | static void htb_debug_dump (struct htb_sched *q) | ||
368 | { | ||
369 | int i,p; | ||
370 | printk(KERN_DEBUG "htb*g j=%lu lj=%lu\n",jiffies,q->jiffies); | ||
371 | /* rows */ | ||
372 | for (i=TC_HTB_MAXDEPTH-1;i>=0;i--) { | ||
373 | printk(KERN_DEBUG "htb*r%d m=%x",i,q->row_mask[i]); | ||
374 | for (p=0;p<TC_HTB_NUMPRIO;p++) { | ||
375 | if (!q->row[i][p].rb_node) continue; | ||
376 | printk(" p%d:",p); | ||
377 | HTB_DUMTREE(q->row[i]+p,node[p]); | ||
378 | } | ||
379 | printk("\n"); | ||
380 | } | ||
381 | /* classes */ | ||
382 | for (i = 0; i < HTB_HSIZE; i++) { | ||
383 | struct list_head *l; | ||
384 | list_for_each (l,q->hash+i) { | ||
385 | struct htb_class *cl = list_entry(l,struct htb_class,hlist); | ||
386 | long diff = PSCHED_TDIFF_SAFE(q->now, cl->t_c, (u32)cl->mbuffer); | ||
387 | printk(KERN_DEBUG "htb*c%x m=%d t=%ld c=%ld pq=%lu df=%ld ql=%d " | ||
388 | "pa=%x f:", | ||
389 | cl->classid,cl->cmode,cl->tokens,cl->ctokens, | ||
390 | cl->pq_node.rb_color==-1?0:cl->pq_key,diff, | ||
391 | cl->level?0:cl->un.leaf.q->q.qlen,cl->prio_activity); | ||
392 | if (cl->level) | ||
393 | for (p=0;p<TC_HTB_NUMPRIO;p++) { | ||
394 | if (!cl->un.inner.feed[p].rb_node) continue; | ||
395 | printk(" p%d a=%x:",p,cl->un.inner.ptr[p]?rb_entry(cl->un.inner.ptr[p], struct htb_class,node[p])->classid:0); | ||
396 | HTB_DUMTREE(cl->un.inner.feed+p,node[p]); | ||
397 | } | ||
398 | printk("\n"); | ||
399 | } | ||
400 | } | ||
401 | } | ||
402 | #endif | ||
403 | /** | 305 | /** |
404 | * htb_add_to_id_tree - adds class to the round robin list | 306 | * htb_add_to_id_tree - adds class to the round robin list |
405 | * | 307 | * |
406 | * Routine adds class to the list (actually tree) sorted by classid. | 308 | * Routine adds class to the list (actually tree) sorted by classid. |
407 | * Make sure that class is not already on such list for given prio. | 309 | * Make sure that class is not already on such list for given prio. |
408 | */ | 310 | */ |
409 | static void htb_add_to_id_tree (HTB_ARGQ struct rb_root *root, | 311 | static void htb_add_to_id_tree(struct rb_root *root, |
410 | struct htb_class *cl,int prio) | 312 | struct htb_class *cl, int prio) |
411 | { | 313 | { |
412 | struct rb_node **p = &root->rb_node, *parent = NULL; | 314 | struct rb_node **p = &root->rb_node, *parent = NULL; |
413 | HTB_DBG(7,3,"htb_add_id_tree cl=%X prio=%d\n",cl->classid,prio); | 315 | |
414 | #ifdef HTB_DEBUG | ||
415 | if (cl->node[prio].rb_color != -1) { BUG_TRAP(0); return; } | ||
416 | HTB_CHCL(cl); | ||
417 | if (*p) { | ||
418 | struct htb_class *x = rb_entry(*p,struct htb_class,node[prio]); | ||
419 | HTB_CHCL(x); | ||
420 | } | ||
421 | #endif | ||
422 | while (*p) { | 316 | while (*p) { |
423 | struct htb_class *c; parent = *p; | 317 | struct htb_class *c; |
318 | parent = *p; | ||
424 | c = rb_entry(parent, struct htb_class, node[prio]); | 319 | c = rb_entry(parent, struct htb_class, node[prio]); |
425 | HTB_CHCL(c); | 320 | |
426 | if (cl->classid > c->classid) | 321 | if (cl->classid > c->classid) |
427 | p = &parent->rb_right; | 322 | p = &parent->rb_right; |
428 | else | 323 | else |
429 | p = &parent->rb_left; | 324 | p = &parent->rb_left; |
430 | } | 325 | } |
431 | rb_link_node(&cl->node[prio], parent, p); | 326 | rb_link_node(&cl->node[prio], parent, p); |
@@ -439,17 +334,11 @@ static void htb_add_to_id_tree (HTB_ARGQ struct rb_root *root, | |||
439 | * change its mode in cl->pq_key microseconds. Make sure that class is not | 334 | * change its mode in cl->pq_key microseconds. Make sure that class is not |
440 | * already in the queue. | 335 | * already in the queue. |
441 | */ | 336 | */ |
442 | static void htb_add_to_wait_tree (struct htb_sched *q, | 337 | static void htb_add_to_wait_tree(struct htb_sched *q, |
443 | struct htb_class *cl,long delay,int debug_hint) | 338 | struct htb_class *cl, long delay) |
444 | { | 339 | { |
445 | struct rb_node **p = &q->wait_pq[cl->level].rb_node, *parent = NULL; | 340 | struct rb_node **p = &q->wait_pq[cl->level].rb_node, *parent = NULL; |
446 | HTB_DBG(7,3,"htb_add_wt cl=%X key=%lu\n",cl->classid,cl->pq_key); | 341 | |
447 | #ifdef HTB_DEBUG | ||
448 | if (cl->pq_node.rb_color != -1) { BUG_TRAP(0); return; } | ||
449 | HTB_CHCL(cl); | ||
450 | if ((delay <= 0 || delay > cl->mbuffer) && net_ratelimit()) | ||
451 | printk(KERN_ERR "HTB: suspicious delay in wait_tree d=%ld cl=%X h=%d\n",delay,cl->classid,debug_hint); | ||
452 | #endif | ||
453 | cl->pq_key = q->jiffies + PSCHED_US2JIFFIE(delay); | 342 | cl->pq_key = q->jiffies + PSCHED_US2JIFFIE(delay); |
454 | if (cl->pq_key == q->jiffies) | 343 | if (cl->pq_key == q->jiffies) |
455 | cl->pq_key++; | 344 | cl->pq_key++; |
@@ -457,13 +346,14 @@ static void htb_add_to_wait_tree (struct htb_sched *q, | |||
457 | /* update the nearest event cache */ | 346 | /* update the nearest event cache */ |
458 | if (time_after(q->near_ev_cache[cl->level], cl->pq_key)) | 347 | if (time_after(q->near_ev_cache[cl->level], cl->pq_key)) |
459 | q->near_ev_cache[cl->level] = cl->pq_key; | 348 | q->near_ev_cache[cl->level] = cl->pq_key; |
460 | 349 | ||
461 | while (*p) { | 350 | while (*p) { |
462 | struct htb_class *c; parent = *p; | 351 | struct htb_class *c; |
352 | parent = *p; | ||
463 | c = rb_entry(parent, struct htb_class, pq_node); | 353 | c = rb_entry(parent, struct htb_class, pq_node); |
464 | if (time_after_eq(cl->pq_key, c->pq_key)) | 354 | if (time_after_eq(cl->pq_key, c->pq_key)) |
465 | p = &parent->rb_right; | 355 | p = &parent->rb_right; |
466 | else | 356 | else |
467 | p = &parent->rb_left; | 357 | p = &parent->rb_left; |
468 | } | 358 | } |
469 | rb_link_node(&cl->pq_node, parent, p); | 359 | rb_link_node(&cl->pq_node, parent, p); |
@@ -476,7 +366,7 @@ static void htb_add_to_wait_tree (struct htb_sched *q, | |||
476 | * When we are past last key we return NULL. | 366 | * When we are past last key we return NULL. |
477 | * Average complexity is 2 steps per call. | 367 | * Average complexity is 2 steps per call. |
478 | */ | 368 | */ |
479 | static void htb_next_rb_node(struct rb_node **n) | 369 | static inline void htb_next_rb_node(struct rb_node **n) |
480 | { | 370 | { |
481 | *n = rb_next(*n); | 371 | *n = rb_next(*n); |
482 | } | 372 | } |
@@ -487,42 +377,51 @@ static void htb_next_rb_node(struct rb_node **n) | |||
487 | * The class is added to row at priorities marked in mask. | 377 | * The class is added to row at priorities marked in mask. |
488 | * It does nothing if mask == 0. | 378 | * It does nothing if mask == 0. |
489 | */ | 379 | */ |
490 | static inline void htb_add_class_to_row(struct htb_sched *q, | 380 | static inline void htb_add_class_to_row(struct htb_sched *q, |
491 | struct htb_class *cl,int mask) | 381 | struct htb_class *cl, int mask) |
492 | { | 382 | { |
493 | HTB_DBG(7,2,"htb_addrow cl=%X mask=%X rmask=%X\n", | ||
494 | cl->classid,mask,q->row_mask[cl->level]); | ||
495 | HTB_CHCL(cl); | ||
496 | q->row_mask[cl->level] |= mask; | 383 | q->row_mask[cl->level] |= mask; |
497 | while (mask) { | 384 | while (mask) { |
498 | int prio = ffz(~mask); | 385 | int prio = ffz(~mask); |
499 | mask &= ~(1 << prio); | 386 | mask &= ~(1 << prio); |
500 | htb_add_to_id_tree(HTB_PASSQ q->row[cl->level]+prio,cl,prio); | 387 | htb_add_to_id_tree(q->row[cl->level] + prio, cl, prio); |
388 | } | ||
389 | } | ||
390 | |||
391 | /* If this triggers, it is a bug in this code, but it need not be fatal */ | ||
392 | static void htb_safe_rb_erase(struct rb_node *rb, struct rb_root *root) | ||
393 | { | ||
394 | if (RB_EMPTY_NODE(rb)) { | ||
395 | WARN_ON(1); | ||
396 | } else { | ||
397 | rb_erase(rb, root); | ||
398 | RB_CLEAR_NODE(rb); | ||
501 | } | 399 | } |
502 | } | 400 | } |
503 | 401 | ||
402 | |||
504 | /** | 403 | /** |
505 | * htb_remove_class_from_row - removes class from its row | 404 | * htb_remove_class_from_row - removes class from its row |
506 | * | 405 | * |
507 | * The class is removed from row at priorities marked in mask. | 406 | * The class is removed from row at priorities marked in mask. |
508 | * It does nothing if mask == 0. | 407 | * It does nothing if mask == 0. |
509 | */ | 408 | */ |
510 | static __inline__ void htb_remove_class_from_row(struct htb_sched *q, | 409 | static inline void htb_remove_class_from_row(struct htb_sched *q, |
511 | struct htb_class *cl,int mask) | 410 | struct htb_class *cl, int mask) |
512 | { | 411 | { |
513 | int m = 0; | 412 | int m = 0; |
514 | HTB_CHCL(cl); | 413 | |
515 | while (mask) { | 414 | while (mask) { |
516 | int prio = ffz(~mask); | 415 | int prio = ffz(~mask); |
416 | |||
517 | mask &= ~(1 << prio); | 417 | mask &= ~(1 << prio); |
518 | if (q->ptr[cl->level][prio] == cl->node+prio) | 418 | if (q->ptr[cl->level][prio] == cl->node + prio) |
519 | htb_next_rb_node(q->ptr[cl->level]+prio); | 419 | htb_next_rb_node(q->ptr[cl->level] + prio); |
520 | htb_safe_rb_erase(cl->node + prio,q->row[cl->level]+prio); | 420 | |
521 | if (!q->row[cl->level][prio].rb_node) | 421 | htb_safe_rb_erase(cl->node + prio, q->row[cl->level] + prio); |
422 | if (!q->row[cl->level][prio].rb_node) | ||
522 | m |= 1 << prio; | 423 | m |= 1 << prio; |
523 | } | 424 | } |
524 | HTB_DBG(7,2,"htb_delrow cl=%X mask=%X rmask=%X maskdel=%X\n", | ||
525 | cl->classid,mask,q->row_mask[cl->level],m); | ||
526 | q->row_mask[cl->level] &= ~m; | 425 | q->row_mask[cl->level] &= ~m; |
527 | } | 426 | } |
528 | 427 | ||
@@ -533,34 +432,31 @@ static __inline__ void htb_remove_class_from_row(struct htb_sched *q, | |||
533 | * for priorities it is participating on. cl->cmode must be new | 432 | * for priorities it is participating on. cl->cmode must be new |
534 | * (activated) mode. It does nothing if cl->prio_activity == 0. | 433 | * (activated) mode. It does nothing if cl->prio_activity == 0. |
535 | */ | 434 | */ |
536 | static void htb_activate_prios(struct htb_sched *q,struct htb_class *cl) | 435 | static void htb_activate_prios(struct htb_sched *q, struct htb_class *cl) |
537 | { | 436 | { |
538 | struct htb_class *p = cl->parent; | 437 | struct htb_class *p = cl->parent; |
539 | long m,mask = cl->prio_activity; | 438 | long m, mask = cl->prio_activity; |
540 | HTB_DBG(7,2,"htb_act_prios cl=%X mask=%lX cmode=%d\n",cl->classid,mask,cl->cmode); | ||
541 | HTB_CHCL(cl); | ||
542 | 439 | ||
543 | while (cl->cmode == HTB_MAY_BORROW && p && mask) { | 440 | while (cl->cmode == HTB_MAY_BORROW && p && mask) { |
544 | HTB_CHCL(p); | 441 | m = mask; |
545 | m = mask; while (m) { | 442 | while (m) { |
546 | int prio = ffz(~m); | 443 | int prio = ffz(~m); |
547 | m &= ~(1 << prio); | 444 | m &= ~(1 << prio); |
548 | 445 | ||
549 | if (p->un.inner.feed[prio].rb_node) | 446 | if (p->un.inner.feed[prio].rb_node) |
550 | /* parent already has its feed in use so that | 447 | /* parent already has its feed in use so that |
551 | reset bit in mask as parent is already ok */ | 448 | reset bit in mask as parent is already ok */ |
552 | mask &= ~(1 << prio); | 449 | mask &= ~(1 << prio); |
553 | 450 | ||
554 | htb_add_to_id_tree(HTB_PASSQ p->un.inner.feed+prio,cl,prio); | 451 | htb_add_to_id_tree(p->un.inner.feed + prio, cl, prio); |
555 | } | 452 | } |
556 | HTB_DBG(7,3,"htb_act_pr_aft p=%X pact=%X mask=%lX pmode=%d\n", | ||
557 | p->classid,p->prio_activity,mask,p->cmode); | ||
558 | p->prio_activity |= mask; | 453 | p->prio_activity |= mask; |
559 | cl = p; p = cl->parent; | 454 | cl = p; |
560 | HTB_CHCL(cl); | 455 | p = cl->parent; |
456 | |||
561 | } | 457 | } |
562 | if (cl->cmode == HTB_CAN_SEND && mask) | 458 | if (cl->cmode == HTB_CAN_SEND && mask) |
563 | htb_add_class_to_row(q,cl,mask); | 459 | htb_add_class_to_row(q, cl, mask); |
564 | } | 460 | } |
565 | 461 | ||
566 | /** | 462 | /** |
@@ -573,39 +469,52 @@ static void htb_activate_prios(struct htb_sched *q,struct htb_class *cl) | |||
573 | static void htb_deactivate_prios(struct htb_sched *q, struct htb_class *cl) | 469 | static void htb_deactivate_prios(struct htb_sched *q, struct htb_class *cl) |
574 | { | 470 | { |
575 | struct htb_class *p = cl->parent; | 471 | struct htb_class *p = cl->parent; |
576 | long m,mask = cl->prio_activity; | 472 | long m, mask = cl->prio_activity; |
577 | HTB_DBG(7,2,"htb_deact_prios cl=%X mask=%lX cmode=%d\n",cl->classid,mask,cl->cmode); | ||
578 | HTB_CHCL(cl); | ||
579 | 473 | ||
580 | while (cl->cmode == HTB_MAY_BORROW && p && mask) { | 474 | while (cl->cmode == HTB_MAY_BORROW && p && mask) { |
581 | m = mask; mask = 0; | 475 | m = mask; |
476 | mask = 0; | ||
582 | while (m) { | 477 | while (m) { |
583 | int prio = ffz(~m); | 478 | int prio = ffz(~m); |
584 | m &= ~(1 << prio); | 479 | m &= ~(1 << prio); |
585 | 480 | ||
586 | if (p->un.inner.ptr[prio] == cl->node+prio) { | 481 | if (p->un.inner.ptr[prio] == cl->node + prio) { |
587 | /* we are removing child which is pointed to from | 482 | /* we are removing child which is pointed to from |
588 | parent feed - forget the pointer but remember | 483 | parent feed - forget the pointer but remember |
589 | classid */ | 484 | classid */ |
590 | p->un.inner.last_ptr_id[prio] = cl->classid; | 485 | p->un.inner.last_ptr_id[prio] = cl->classid; |
591 | p->un.inner.ptr[prio] = NULL; | 486 | p->un.inner.ptr[prio] = NULL; |
592 | } | 487 | } |
593 | 488 | ||
594 | htb_safe_rb_erase(cl->node + prio,p->un.inner.feed + prio); | 489 | htb_safe_rb_erase(cl->node + prio, p->un.inner.feed + prio); |
595 | 490 | ||
596 | if (!p->un.inner.feed[prio].rb_node) | 491 | if (!p->un.inner.feed[prio].rb_node) |
597 | mask |= 1 << prio; | 492 | mask |= 1 << prio; |
598 | } | 493 | } |
599 | HTB_DBG(7,3,"htb_deact_pr_aft p=%X pact=%X mask=%lX pmode=%d\n", | 494 | |
600 | p->classid,p->prio_activity,mask,p->cmode); | ||
601 | p->prio_activity &= ~mask; | 495 | p->prio_activity &= ~mask; |
602 | cl = p; p = cl->parent; | 496 | cl = p; |
603 | HTB_CHCL(cl); | 497 | p = cl->parent; |
498 | |||
604 | } | 499 | } |
605 | if (cl->cmode == HTB_CAN_SEND && mask) | 500 | if (cl->cmode == HTB_CAN_SEND && mask) |
606 | htb_remove_class_from_row(q,cl,mask); | 501 | htb_remove_class_from_row(q, cl, mask); |
607 | } | 502 | } |
608 | 503 | ||
504 | #if HTB_HYSTERESIS | ||
505 | static inline long htb_lowater(const struct htb_class *cl) | ||
506 | { | ||
507 | return cl->cmode != HTB_CANT_SEND ? -cl->cbuffer : 0; | ||
508 | } | ||
509 | static inline long htb_hiwater(const struct htb_class *cl) | ||
510 | { | ||
511 | return cl->cmode == HTB_CAN_SEND ? -cl->buffer : 0; | ||
512 | } | ||
513 | #else | ||
514 | #define htb_lowater(cl) (0) | ||
515 | #define htb_hiwater(cl) (0) | ||
516 | #endif | ||
517 | |||
609 | /** | 518 | /** |
610 | * htb_class_mode - computes and returns current class mode | 519 | * htb_class_mode - computes and returns current class mode |
611 | * | 520 | * |
@@ -617,28 +526,21 @@ static void htb_deactivate_prios(struct htb_sched *q, struct htb_class *cl) | |||
617 | * 0 .. -cl->{c,}buffer range. It is meant to limit number of | 526 | * 0 .. -cl->{c,}buffer range. It is meant to limit number of |
618 | * mode transitions per time unit. The speed gain is about 1/6. | 527 | * mode transitions per time unit. The speed gain is about 1/6. |
619 | */ | 528 | */ |
620 | static __inline__ enum htb_cmode | 529 | static inline enum htb_cmode |
621 | htb_class_mode(struct htb_class *cl,long *diff) | 530 | htb_class_mode(struct htb_class *cl, long *diff) |
622 | { | 531 | { |
623 | long toks; | 532 | long toks; |
624 | 533 | ||
625 | if ((toks = (cl->ctokens + *diff)) < ( | 534 | if ((toks = (cl->ctokens + *diff)) < htb_lowater(cl)) { |
626 | #if HTB_HYSTERESIS | 535 | *diff = -toks; |
627 | cl->cmode != HTB_CANT_SEND ? -cl->cbuffer : | 536 | return HTB_CANT_SEND; |
628 | #endif | 537 | } |
629 | 0)) { | 538 | |
630 | *diff = -toks; | 539 | if ((toks = (cl->tokens + *diff)) >= htb_hiwater(cl)) |
631 | return HTB_CANT_SEND; | 540 | return HTB_CAN_SEND; |
632 | } | ||
633 | if ((toks = (cl->tokens + *diff)) >= ( | ||
634 | #if HTB_HYSTERESIS | ||
635 | cl->cmode == HTB_CAN_SEND ? -cl->buffer : | ||
636 | #endif | ||
637 | 0)) | ||
638 | return HTB_CAN_SEND; | ||
639 | 541 | ||
640 | *diff = -toks; | 542 | *diff = -toks; |
641 | return HTB_MAY_BORROW; | 543 | return HTB_MAY_BORROW; |
642 | } | 544 | } |
643 | 545 | ||
644 | /** | 546 | /** |
@@ -650,24 +552,21 @@ htb_class_mode(struct htb_class *cl,long *diff) | |||
650 | * be different from old one and cl->pq_key has to be valid if changing | 552 | * be different from old one and cl->pq_key has to be valid if changing |
651 | * to mode other than HTB_CAN_SEND (see htb_add_to_wait_tree). | 553 | * to mode other than HTB_CAN_SEND (see htb_add_to_wait_tree). |
652 | */ | 554 | */ |
653 | static void | 555 | static void |
654 | htb_change_class_mode(struct htb_sched *q, struct htb_class *cl, long *diff) | 556 | htb_change_class_mode(struct htb_sched *q, struct htb_class *cl, long *diff) |
655 | { | 557 | { |
656 | enum htb_cmode new_mode = htb_class_mode(cl,diff); | 558 | enum htb_cmode new_mode = htb_class_mode(cl, diff); |
657 | |||
658 | HTB_CHCL(cl); | ||
659 | HTB_DBG(7,1,"htb_chging_clmode %d->%d cl=%X\n",cl->cmode,new_mode,cl->classid); | ||
660 | 559 | ||
661 | if (new_mode == cl->cmode) | 560 | if (new_mode == cl->cmode) |
662 | return; | 561 | return; |
663 | 562 | ||
664 | if (cl->prio_activity) { /* not necessary: speed optimization */ | 563 | if (cl->prio_activity) { /* not necessary: speed optimization */ |
665 | if (cl->cmode != HTB_CANT_SEND) | 564 | if (cl->cmode != HTB_CANT_SEND) |
666 | htb_deactivate_prios(q,cl); | 565 | htb_deactivate_prios(q, cl); |
667 | cl->cmode = new_mode; | 566 | cl->cmode = new_mode; |
668 | if (new_mode != HTB_CANT_SEND) | 567 | if (new_mode != HTB_CANT_SEND) |
669 | htb_activate_prios(q,cl); | 568 | htb_activate_prios(q, cl); |
670 | } else | 569 | } else |
671 | cl->cmode = new_mode; | 570 | cl->cmode = new_mode; |
672 | } | 571 | } |
673 | 572 | ||
@@ -678,14 +577,15 @@ htb_change_class_mode(struct htb_sched *q, struct htb_class *cl, long *diff) | |||
678 | * for the prio. It can be called on already active leaf safely. | 577 | * for the prio. It can be called on already active leaf safely. |
679 | * It also adds leaf into droplist. | 578 | * It also adds leaf into droplist. |
680 | */ | 579 | */ |
681 | static __inline__ void htb_activate(struct htb_sched *q,struct htb_class *cl) | 580 | static inline void htb_activate(struct htb_sched *q, struct htb_class *cl) |
682 | { | 581 | { |
683 | BUG_TRAP(!cl->level && cl->un.leaf.q && cl->un.leaf.q->q.qlen); | 582 | BUG_TRAP(!cl->level && cl->un.leaf.q && cl->un.leaf.q->q.qlen); |
684 | HTB_CHCL(cl); | 583 | |
685 | if (!cl->prio_activity) { | 584 | if (!cl->prio_activity) { |
686 | cl->prio_activity = 1 << (cl->un.leaf.aprio = cl->un.leaf.prio); | 585 | cl->prio_activity = 1 << (cl->un.leaf.aprio = cl->un.leaf.prio); |
687 | htb_activate_prios(q,cl); | 586 | htb_activate_prios(q, cl); |
688 | list_add_tail(&cl->un.leaf.drop_list,q->drops+cl->un.leaf.aprio); | 587 | list_add_tail(&cl->un.leaf.drop_list, |
588 | q->drops + cl->un.leaf.aprio); | ||
689 | } | 589 | } |
690 | } | 590 | } |
691 | 591 | ||
@@ -695,120 +595,120 @@ static __inline__ void htb_activate(struct htb_sched *q,struct htb_class *cl) | |||
695 | * Make sure that leaf is active. In the other words it can't be called | 595 | * Make sure that leaf is active. In the other words it can't be called |
696 | * with non-active leaf. It also removes class from the drop list. | 596 | * with non-active leaf. It also removes class from the drop list. |
697 | */ | 597 | */ |
698 | static __inline__ void | 598 | static inline void htb_deactivate(struct htb_sched *q, struct htb_class *cl) |
699 | htb_deactivate(struct htb_sched *q,struct htb_class *cl) | ||
700 | { | 599 | { |
701 | BUG_TRAP(cl->prio_activity); | 600 | BUG_TRAP(cl->prio_activity); |
702 | HTB_CHCL(cl); | 601 | |
703 | htb_deactivate_prios(q,cl); | 602 | htb_deactivate_prios(q, cl); |
704 | cl->prio_activity = 0; | 603 | cl->prio_activity = 0; |
705 | list_del_init(&cl->un.leaf.drop_list); | 604 | list_del_init(&cl->un.leaf.drop_list); |
706 | } | 605 | } |
707 | 606 | ||
708 | static int htb_enqueue(struct sk_buff *skb, struct Qdisc *sch) | 607 | static int htb_enqueue(struct sk_buff *skb, struct Qdisc *sch) |
709 | { | 608 | { |
710 | int ret; | 609 | int ret; |
711 | struct htb_sched *q = qdisc_priv(sch); | 610 | struct htb_sched *q = qdisc_priv(sch); |
712 | struct htb_class *cl = htb_classify(skb,sch,&ret); | 611 | struct htb_class *cl = htb_classify(skb, sch, &ret); |
713 | 612 | ||
714 | if (cl == HTB_DIRECT) { | 613 | if (cl == HTB_DIRECT) { |
715 | /* enqueue to helper queue */ | 614 | /* enqueue to helper queue */ |
716 | if (q->direct_queue.qlen < q->direct_qlen) { | 615 | if (q->direct_queue.qlen < q->direct_qlen) { |
717 | __skb_queue_tail(&q->direct_queue, skb); | 616 | __skb_queue_tail(&q->direct_queue, skb); |
718 | q->direct_pkts++; | 617 | q->direct_pkts++; |
719 | } else { | 618 | } else { |
720 | kfree_skb(skb); | 619 | kfree_skb(skb); |
721 | sch->qstats.drops++; | 620 | sch->qstats.drops++; |
722 | return NET_XMIT_DROP; | 621 | return NET_XMIT_DROP; |
723 | } | 622 | } |
724 | #ifdef CONFIG_NET_CLS_ACT | 623 | #ifdef CONFIG_NET_CLS_ACT |
725 | } else if (!cl) { | 624 | } else if (!cl) { |
726 | if (ret == NET_XMIT_BYPASS) | 625 | if (ret == NET_XMIT_BYPASS) |
727 | sch->qstats.drops++; | 626 | sch->qstats.drops++; |
728 | kfree_skb (skb); | 627 | kfree_skb(skb); |
729 | return ret; | 628 | return ret; |
730 | #endif | 629 | #endif |
731 | } else if (cl->un.leaf.q->enqueue(skb, cl->un.leaf.q) != NET_XMIT_SUCCESS) { | 630 | } else if (cl->un.leaf.q->enqueue(skb, cl->un.leaf.q) != |
732 | sch->qstats.drops++; | 631 | NET_XMIT_SUCCESS) { |
733 | cl->qstats.drops++; | 632 | sch->qstats.drops++; |
734 | return NET_XMIT_DROP; | 633 | cl->qstats.drops++; |
735 | } else { | 634 | return NET_XMIT_DROP; |
736 | cl->bstats.packets++; cl->bstats.bytes += skb->len; | 635 | } else { |
737 | htb_activate (q,cl); | 636 | cl->bstats.packets++; |
738 | } | 637 | cl->bstats.bytes += skb->len; |
739 | 638 | htb_activate(q, cl); | |
740 | sch->q.qlen++; | 639 | } |
741 | sch->bstats.packets++; sch->bstats.bytes += skb->len; | 640 | |
742 | HTB_DBG(1,1,"htb_enq_ok cl=%X skb=%p\n",(cl && cl != HTB_DIRECT)?cl->classid:0,skb); | 641 | sch->q.qlen++; |
743 | return NET_XMIT_SUCCESS; | 642 | sch->bstats.packets++; |
643 | sch->bstats.bytes += skb->len; | ||
644 | return NET_XMIT_SUCCESS; | ||
744 | } | 645 | } |
745 | 646 | ||
746 | /* TODO: requeuing packet charges it to policers again !! */ | 647 | /* TODO: requeuing packet charges it to policers again !! */ |
747 | static int htb_requeue(struct sk_buff *skb, struct Qdisc *sch) | 648 | static int htb_requeue(struct sk_buff *skb, struct Qdisc *sch) |
748 | { | 649 | { |
749 | struct htb_sched *q = qdisc_priv(sch); | 650 | struct htb_sched *q = qdisc_priv(sch); |
750 | int ret = NET_XMIT_SUCCESS; | 651 | int ret = NET_XMIT_SUCCESS; |
751 | struct htb_class *cl = htb_classify(skb,sch, &ret); | 652 | struct htb_class *cl = htb_classify(skb, sch, &ret); |
752 | struct sk_buff *tskb; | 653 | struct sk_buff *tskb; |
753 | 654 | ||
754 | if (cl == HTB_DIRECT || !cl) { | 655 | if (cl == HTB_DIRECT || !cl) { |
755 | /* enqueue to helper queue */ | 656 | /* enqueue to helper queue */ |
756 | if (q->direct_queue.qlen < q->direct_qlen && cl) { | 657 | if (q->direct_queue.qlen < q->direct_qlen && cl) { |
757 | __skb_queue_head(&q->direct_queue, skb); | 658 | __skb_queue_head(&q->direct_queue, skb); |
758 | } else { | 659 | } else { |
759 | __skb_queue_head(&q->direct_queue, skb); | 660 | __skb_queue_head(&q->direct_queue, skb); |
760 | tskb = __skb_dequeue_tail(&q->direct_queue); | 661 | tskb = __skb_dequeue_tail(&q->direct_queue); |
761 | kfree_skb (tskb); | 662 | kfree_skb(tskb); |
762 | sch->qstats.drops++; | 663 | sch->qstats.drops++; |
763 | return NET_XMIT_CN; | 664 | return NET_XMIT_CN; |
764 | } | 665 | } |
765 | } else if (cl->un.leaf.q->ops->requeue(skb, cl->un.leaf.q) != NET_XMIT_SUCCESS) { | 666 | } else if (cl->un.leaf.q->ops->requeue(skb, cl->un.leaf.q) != |
766 | sch->qstats.drops++; | 667 | NET_XMIT_SUCCESS) { |
767 | cl->qstats.drops++; | 668 | sch->qstats.drops++; |
768 | return NET_XMIT_DROP; | 669 | cl->qstats.drops++; |
769 | } else | 670 | return NET_XMIT_DROP; |
770 | htb_activate (q,cl); | 671 | } else |
771 | 672 | htb_activate(q, cl); | |
772 | sch->q.qlen++; | 673 | |
773 | sch->qstats.requeues++; | 674 | sch->q.qlen++; |
774 | HTB_DBG(1,1,"htb_req_ok cl=%X skb=%p\n",(cl && cl != HTB_DIRECT)?cl->classid:0,skb); | 675 | sch->qstats.requeues++; |
775 | return NET_XMIT_SUCCESS; | 676 | return NET_XMIT_SUCCESS; |
776 | } | 677 | } |
777 | 678 | ||
778 | static void htb_timer(unsigned long arg) | 679 | static void htb_timer(unsigned long arg) |
779 | { | 680 | { |
780 | struct Qdisc *sch = (struct Qdisc*)arg; | 681 | struct Qdisc *sch = (struct Qdisc *)arg; |
781 | sch->flags &= ~TCQ_F_THROTTLED; | 682 | sch->flags &= ~TCQ_F_THROTTLED; |
782 | wmb(); | 683 | wmb(); |
783 | netif_schedule(sch->dev); | 684 | netif_schedule(sch->dev); |
784 | } | 685 | } |
785 | 686 | ||
786 | #ifdef HTB_RATECM | 687 | #ifdef HTB_RATECM |
787 | #define RT_GEN(D,R) R+=D-(R/HTB_EWMAC);D=0 | 688 | #define RT_GEN(D,R) R+=D-(R/HTB_EWMAC);D=0 |
788 | static void htb_rate_timer(unsigned long arg) | 689 | static void htb_rate_timer(unsigned long arg) |
789 | { | 690 | { |
790 | struct Qdisc *sch = (struct Qdisc*)arg; | 691 | struct Qdisc *sch = (struct Qdisc *)arg; |
791 | struct htb_sched *q = qdisc_priv(sch); | 692 | struct htb_sched *q = qdisc_priv(sch); |
792 | struct list_head *p; | 693 | struct hlist_node *p; |
694 | struct htb_class *cl; | ||
695 | |||
793 | 696 | ||
794 | /* lock queue so that we can muck with it */ | 697 | /* lock queue so that we can muck with it */ |
795 | HTB_QLOCK(sch); | 698 | spin_lock_bh(&sch->dev->queue_lock); |
796 | HTB_DBG(10,1,"htb_rttmr j=%ld\n",jiffies); | ||
797 | 699 | ||
798 | q->rttim.expires = jiffies + HZ; | 700 | q->rttim.expires = jiffies + HZ; |
799 | add_timer(&q->rttim); | 701 | add_timer(&q->rttim); |
800 | 702 | ||
801 | /* scan and recompute one bucket at time */ | 703 | /* scan and recompute one bucket at time */ |
802 | if (++q->recmp_bucket >= HTB_HSIZE) | 704 | if (++q->recmp_bucket >= HTB_HSIZE) |
803 | q->recmp_bucket = 0; | 705 | q->recmp_bucket = 0; |
804 | list_for_each (p,q->hash+q->recmp_bucket) { | 706 | |
805 | struct htb_class *cl = list_entry(p,struct htb_class,hlist); | 707 | hlist_for_each_entry(cl,p, q->hash + q->recmp_bucket, hlist) { |
806 | HTB_DBG(10,2,"htb_rttmr_cl cl=%X sbyte=%lu spkt=%lu\n", | 708 | RT_GEN(cl->sum_bytes, cl->rate_bytes); |
807 | cl->classid,cl->sum_bytes,cl->sum_packets); | 709 | RT_GEN(cl->sum_packets, cl->rate_packets); |
808 | RT_GEN (cl->sum_bytes,cl->rate_bytes); | ||
809 | RT_GEN (cl->sum_packets,cl->rate_packets); | ||
810 | } | 710 | } |
811 | HTB_QUNLOCK(sch); | 711 | spin_unlock_bh(&sch->dev->queue_lock); |
812 | } | 712 | } |
813 | #endif | 713 | #endif |
814 | 714 | ||
@@ -823,12 +723,11 @@ static void htb_rate_timer(unsigned long arg) | |||
823 | * CAN_SEND) because we can use more precise clock that event queue here. | 723 | * CAN_SEND) because we can use more precise clock that event queue here. |
824 | * In such case we remove class from event queue first. | 724 | * In such case we remove class from event queue first. |
825 | */ | 725 | */ |
826 | static void htb_charge_class(struct htb_sched *q,struct htb_class *cl, | 726 | static void htb_charge_class(struct htb_sched *q, struct htb_class *cl, |
827 | int level,int bytes) | 727 | int level, int bytes) |
828 | { | 728 | { |
829 | long toks,diff; | 729 | long toks, diff; |
830 | enum htb_cmode old_mode; | 730 | enum htb_cmode old_mode; |
831 | HTB_DBG(5,1,"htb_chrg_cl cl=%X lev=%d len=%d\n",cl->classid,level,bytes); | ||
832 | 731 | ||
833 | #define HTB_ACCNT(T,B,R) toks = diff + cl->T; \ | 732 | #define HTB_ACCNT(T,B,R) toks = diff + cl->T; \ |
834 | if (toks > cl->B) toks = cl->B; \ | 733 | if (toks > cl->B) toks = cl->B; \ |
@@ -837,47 +736,31 @@ static void htb_charge_class(struct htb_sched *q,struct htb_class *cl, | |||
837 | cl->T = toks | 736 | cl->T = toks |
838 | 737 | ||
839 | while (cl) { | 738 | while (cl) { |
840 | HTB_CHCL(cl); | 739 | diff = PSCHED_TDIFF_SAFE(q->now, cl->t_c, (u32) cl->mbuffer); |
841 | diff = PSCHED_TDIFF_SAFE(q->now, cl->t_c, (u32)cl->mbuffer); | ||
842 | #ifdef HTB_DEBUG | ||
843 | if (diff > cl->mbuffer || diff < 0 || PSCHED_TLESS(q->now, cl->t_c)) { | ||
844 | if (net_ratelimit()) | ||
845 | printk(KERN_ERR "HTB: bad diff in charge, cl=%X diff=%lX now=%Lu then=%Lu j=%lu\n", | ||
846 | cl->classid, diff, | ||
847 | #ifdef CONFIG_NET_SCH_CLK_GETTIMEOFDAY | ||
848 | q->now.tv_sec * 1000000ULL + q->now.tv_usec, | ||
849 | cl->t_c.tv_sec * 1000000ULL + cl->t_c.tv_usec, | ||
850 | #else | ||
851 | (unsigned long long) q->now, | ||
852 | (unsigned long long) cl->t_c, | ||
853 | #endif | ||
854 | q->jiffies); | ||
855 | diff = 1000; | ||
856 | } | ||
857 | #endif | ||
858 | if (cl->level >= level) { | 740 | if (cl->level >= level) { |
859 | if (cl->level == level) cl->xstats.lends++; | 741 | if (cl->level == level) |
860 | HTB_ACCNT (tokens,buffer,rate); | 742 | cl->xstats.lends++; |
743 | HTB_ACCNT(tokens, buffer, rate); | ||
861 | } else { | 744 | } else { |
862 | cl->xstats.borrows++; | 745 | cl->xstats.borrows++; |
863 | cl->tokens += diff; /* we moved t_c; update tokens */ | 746 | cl->tokens += diff; /* we moved t_c; update tokens */ |
864 | } | 747 | } |
865 | HTB_ACCNT (ctokens,cbuffer,ceil); | 748 | HTB_ACCNT(ctokens, cbuffer, ceil); |
866 | cl->t_c = q->now; | 749 | cl->t_c = q->now; |
867 | HTB_DBG(5,2,"htb_chrg_clp cl=%X diff=%ld tok=%ld ctok=%ld\n",cl->classid,diff,cl->tokens,cl->ctokens); | ||
868 | 750 | ||
869 | old_mode = cl->cmode; diff = 0; | 751 | old_mode = cl->cmode; |
870 | htb_change_class_mode(q,cl,&diff); | 752 | diff = 0; |
753 | htb_change_class_mode(q, cl, &diff); | ||
871 | if (old_mode != cl->cmode) { | 754 | if (old_mode != cl->cmode) { |
872 | if (old_mode != HTB_CAN_SEND) | 755 | if (old_mode != HTB_CAN_SEND) |
873 | htb_safe_rb_erase(&cl->pq_node,q->wait_pq+cl->level); | 756 | htb_safe_rb_erase(&cl->pq_node, q->wait_pq + cl->level); |
874 | if (cl->cmode != HTB_CAN_SEND) | 757 | if (cl->cmode != HTB_CAN_SEND) |
875 | htb_add_to_wait_tree (q,cl,diff,1); | 758 | htb_add_to_wait_tree(q, cl, diff); |
876 | } | 759 | } |
877 | |||
878 | #ifdef HTB_RATECM | 760 | #ifdef HTB_RATECM |
879 | /* update rate counters */ | 761 | /* update rate counters */ |
880 | cl->sum_bytes += bytes; cl->sum_packets++; | 762 | cl->sum_bytes += bytes; |
763 | cl->sum_packets++; | ||
881 | #endif | 764 | #endif |
882 | 765 | ||
883 | /* update byte stats except for leaves which are already updated */ | 766 | /* update byte stats except for leaves which are already updated */ |
@@ -896,60 +779,46 @@ static void htb_charge_class(struct htb_sched *q,struct htb_class *cl, | |||
896 | * next pending event (0 for no event in pq). | 779 | * next pending event (0 for no event in pq). |
897 | * Note: Aplied are events whose have cl->pq_key <= jiffies. | 780 | * Note: Aplied are events whose have cl->pq_key <= jiffies. |
898 | */ | 781 | */ |
899 | static long htb_do_events(struct htb_sched *q,int level) | 782 | static long htb_do_events(struct htb_sched *q, int level) |
900 | { | 783 | { |
901 | int i; | 784 | int i; |
902 | HTB_DBG(8,1,"htb_do_events l=%d root=%p rmask=%X\n", | 785 | |
903 | level,q->wait_pq[level].rb_node,q->row_mask[level]); | ||
904 | for (i = 0; i < 500; i++) { | 786 | for (i = 0; i < 500; i++) { |
905 | struct htb_class *cl; | 787 | struct htb_class *cl; |
906 | long diff; | 788 | long diff; |
907 | struct rb_node *p = q->wait_pq[level].rb_node; | 789 | struct rb_node *p = q->wait_pq[level].rb_node; |
908 | if (!p) return 0; | 790 | if (!p) |
909 | while (p->rb_left) p = p->rb_left; | 791 | return 0; |
792 | while (p->rb_left) | ||
793 | p = p->rb_left; | ||
910 | 794 | ||
911 | cl = rb_entry(p, struct htb_class, pq_node); | 795 | cl = rb_entry(p, struct htb_class, pq_node); |
912 | if (time_after(cl->pq_key, q->jiffies)) { | 796 | if (time_after(cl->pq_key, q->jiffies)) { |
913 | HTB_DBG(8,3,"htb_do_ev_ret delay=%ld\n",cl->pq_key - q->jiffies); | ||
914 | return cl->pq_key - q->jiffies; | 797 | return cl->pq_key - q->jiffies; |
915 | } | 798 | } |
916 | htb_safe_rb_erase(p,q->wait_pq+level); | 799 | htb_safe_rb_erase(p, q->wait_pq + level); |
917 | diff = PSCHED_TDIFF_SAFE(q->now, cl->t_c, (u32)cl->mbuffer); | 800 | diff = PSCHED_TDIFF_SAFE(q->now, cl->t_c, (u32) cl->mbuffer); |
918 | #ifdef HTB_DEBUG | 801 | htb_change_class_mode(q, cl, &diff); |
919 | if (diff > cl->mbuffer || diff < 0 || PSCHED_TLESS(q->now, cl->t_c)) { | ||
920 | if (net_ratelimit()) | ||
921 | printk(KERN_ERR "HTB: bad diff in events, cl=%X diff=%lX now=%Lu then=%Lu j=%lu\n", | ||
922 | cl->classid, diff, | ||
923 | #ifdef CONFIG_NET_SCH_CLK_GETTIMEOFDAY | ||
924 | q->now.tv_sec * 1000000ULL + q->now.tv_usec, | ||
925 | cl->t_c.tv_sec * 1000000ULL + cl->t_c.tv_usec, | ||
926 | #else | ||
927 | (unsigned long long) q->now, | ||
928 | (unsigned long long) cl->t_c, | ||
929 | #endif | ||
930 | q->jiffies); | ||
931 | diff = 1000; | ||
932 | } | ||
933 | #endif | ||
934 | htb_change_class_mode(q,cl,&diff); | ||
935 | if (cl->cmode != HTB_CAN_SEND) | 802 | if (cl->cmode != HTB_CAN_SEND) |
936 | htb_add_to_wait_tree (q,cl,diff,2); | 803 | htb_add_to_wait_tree(q, cl, diff); |
937 | } | 804 | } |
938 | if (net_ratelimit()) | 805 | if (net_ratelimit()) |
939 | printk(KERN_WARNING "htb: too many events !\n"); | 806 | printk(KERN_WARNING "htb: too many events !\n"); |
940 | return HZ/10; | 807 | return HZ / 10; |
941 | } | 808 | } |
942 | 809 | ||
943 | /* Returns class->node+prio from id-tree where classe's id is >= id. NULL | 810 | /* Returns class->node+prio from id-tree where classe's id is >= id. NULL |
944 | is no such one exists. */ | 811 | is no such one exists. */ |
945 | static struct rb_node * | 812 | static struct rb_node *htb_id_find_next_upper(int prio, struct rb_node *n, |
946 | htb_id_find_next_upper(int prio,struct rb_node *n,u32 id) | 813 | u32 id) |
947 | { | 814 | { |
948 | struct rb_node *r = NULL; | 815 | struct rb_node *r = NULL; |
949 | while (n) { | 816 | while (n) { |
950 | struct htb_class *cl = rb_entry(n,struct htb_class,node[prio]); | 817 | struct htb_class *cl = |
951 | if (id == cl->classid) return n; | 818 | rb_entry(n, struct htb_class, node[prio]); |
952 | 819 | if (id == cl->classid) | |
820 | return n; | ||
821 | |||
953 | if (id > cl->classid) { | 822 | if (id > cl->classid) { |
954 | n = n->rb_right; | 823 | n = n->rb_right; |
955 | } else { | 824 | } else { |
@@ -965,49 +834,49 @@ htb_id_find_next_upper(int prio,struct rb_node *n,u32 id) | |||
965 | * | 834 | * |
966 | * Find leaf where current feed pointers points to. | 835 | * Find leaf where current feed pointers points to. |
967 | */ | 836 | */ |
968 | static struct htb_class * | 837 | static struct htb_class *htb_lookup_leaf(struct rb_root *tree, int prio, |
969 | htb_lookup_leaf(HTB_ARGQ struct rb_root *tree,int prio,struct rb_node **pptr,u32 *pid) | 838 | struct rb_node **pptr, u32 * pid) |
970 | { | 839 | { |
971 | int i; | 840 | int i; |
972 | struct { | 841 | struct { |
973 | struct rb_node *root; | 842 | struct rb_node *root; |
974 | struct rb_node **pptr; | 843 | struct rb_node **pptr; |
975 | u32 *pid; | 844 | u32 *pid; |
976 | } stk[TC_HTB_MAXDEPTH],*sp = stk; | 845 | } stk[TC_HTB_MAXDEPTH], *sp = stk; |
977 | 846 | ||
978 | BUG_TRAP(tree->rb_node); | 847 | BUG_TRAP(tree->rb_node); |
979 | sp->root = tree->rb_node; | 848 | sp->root = tree->rb_node; |
980 | sp->pptr = pptr; | 849 | sp->pptr = pptr; |
981 | sp->pid = pid; | 850 | sp->pid = pid; |
982 | 851 | ||
983 | for (i = 0; i < 65535; i++) { | 852 | for (i = 0; i < 65535; i++) { |
984 | HTB_DBG(4,2,"htb_lleaf ptr=%p pid=%X\n",*sp->pptr,*sp->pid); | 853 | if (!*sp->pptr && *sp->pid) { |
985 | |||
986 | if (!*sp->pptr && *sp->pid) { | ||
987 | /* ptr was invalidated but id is valid - try to recover | 854 | /* ptr was invalidated but id is valid - try to recover |
988 | the original or next ptr */ | 855 | the original or next ptr */ |
989 | *sp->pptr = htb_id_find_next_upper(prio,sp->root,*sp->pid); | 856 | *sp->pptr = |
857 | htb_id_find_next_upper(prio, sp->root, *sp->pid); | ||
990 | } | 858 | } |
991 | *sp->pid = 0; /* ptr is valid now so that remove this hint as it | 859 | *sp->pid = 0; /* ptr is valid now so that remove this hint as it |
992 | can become out of date quickly */ | 860 | can become out of date quickly */ |
993 | if (!*sp->pptr) { /* we are at right end; rewind & go up */ | 861 | if (!*sp->pptr) { /* we are at right end; rewind & go up */ |
994 | *sp->pptr = sp->root; | 862 | *sp->pptr = sp->root; |
995 | while ((*sp->pptr)->rb_left) | 863 | while ((*sp->pptr)->rb_left) |
996 | *sp->pptr = (*sp->pptr)->rb_left; | 864 | *sp->pptr = (*sp->pptr)->rb_left; |
997 | if (sp > stk) { | 865 | if (sp > stk) { |
998 | sp--; | 866 | sp--; |
999 | BUG_TRAP(*sp->pptr); if(!*sp->pptr) return NULL; | 867 | BUG_TRAP(*sp->pptr); |
1000 | htb_next_rb_node (sp->pptr); | 868 | if (!*sp->pptr) |
869 | return NULL; | ||
870 | htb_next_rb_node(sp->pptr); | ||
1001 | } | 871 | } |
1002 | } else { | 872 | } else { |
1003 | struct htb_class *cl; | 873 | struct htb_class *cl; |
1004 | cl = rb_entry(*sp->pptr,struct htb_class,node[prio]); | 874 | cl = rb_entry(*sp->pptr, struct htb_class, node[prio]); |
1005 | HTB_CHCL(cl); | 875 | if (!cl->level) |
1006 | if (!cl->level) | ||
1007 | return cl; | 876 | return cl; |
1008 | (++sp)->root = cl->un.inner.feed[prio].rb_node; | 877 | (++sp)->root = cl->un.inner.feed[prio].rb_node; |
1009 | sp->pptr = cl->un.inner.ptr+prio; | 878 | sp->pptr = cl->un.inner.ptr + prio; |
1010 | sp->pid = cl->un.inner.last_ptr_id+prio; | 879 | sp->pid = cl->un.inner.last_ptr_id + prio; |
1011 | } | 880 | } |
1012 | } | 881 | } |
1013 | BUG_TRAP(0); | 882 | BUG_TRAP(0); |
@@ -1016,21 +885,21 @@ htb_lookup_leaf(HTB_ARGQ struct rb_root *tree,int prio,struct rb_node **pptr,u32 | |||
1016 | 885 | ||
1017 | /* dequeues packet at given priority and level; call only if | 886 | /* dequeues packet at given priority and level; call only if |
1018 | you are sure that there is active class at prio/level */ | 887 | you are sure that there is active class at prio/level */ |
1019 | static struct sk_buff * | 888 | static struct sk_buff *htb_dequeue_tree(struct htb_sched *q, int prio, |
1020 | htb_dequeue_tree(struct htb_sched *q,int prio,int level) | 889 | int level) |
1021 | { | 890 | { |
1022 | struct sk_buff *skb = NULL; | 891 | struct sk_buff *skb = NULL; |
1023 | struct htb_class *cl,*start; | 892 | struct htb_class *cl, *start; |
1024 | /* look initial class up in the row */ | 893 | /* look initial class up in the row */ |
1025 | start = cl = htb_lookup_leaf (HTB_PASSQ q->row[level]+prio,prio, | 894 | start = cl = htb_lookup_leaf(q->row[level] + prio, prio, |
1026 | q->ptr[level]+prio,q->last_ptr_id[level]+prio); | 895 | q->ptr[level] + prio, |
1027 | 896 | q->last_ptr_id[level] + prio); | |
897 | |||
1028 | do { | 898 | do { |
1029 | next: | 899 | next: |
1030 | BUG_TRAP(cl); | 900 | BUG_TRAP(cl); |
1031 | if (!cl) return NULL; | 901 | if (!cl) |
1032 | HTB_DBG(4,1,"htb_deq_tr prio=%d lev=%d cl=%X defic=%d\n", | 902 | return NULL; |
1033 | prio,level,cl->classid,cl->un.leaf.deficit[level]); | ||
1034 | 903 | ||
1035 | /* class can be empty - it is unlikely but can be true if leaf | 904 | /* class can be empty - it is unlikely but can be true if leaf |
1036 | qdisc drops packets in enqueue routine or if someone used | 905 | qdisc drops packets in enqueue routine or if someone used |
@@ -1038,64 +907,69 @@ next: | |||
1038 | simply deactivate and skip such class */ | 907 | simply deactivate and skip such class */ |
1039 | if (unlikely(cl->un.leaf.q->q.qlen == 0)) { | 908 | if (unlikely(cl->un.leaf.q->q.qlen == 0)) { |
1040 | struct htb_class *next; | 909 | struct htb_class *next; |
1041 | htb_deactivate(q,cl); | 910 | htb_deactivate(q, cl); |
1042 | 911 | ||
1043 | /* row/level might become empty */ | 912 | /* row/level might become empty */ |
1044 | if ((q->row_mask[level] & (1 << prio)) == 0) | 913 | if ((q->row_mask[level] & (1 << prio)) == 0) |
1045 | return NULL; | 914 | return NULL; |
1046 | 915 | ||
1047 | next = htb_lookup_leaf (HTB_PASSQ q->row[level]+prio, | 916 | next = htb_lookup_leaf(q->row[level] + prio, |
1048 | prio,q->ptr[level]+prio,q->last_ptr_id[level]+prio); | 917 | prio, q->ptr[level] + prio, |
918 | q->last_ptr_id[level] + prio); | ||
1049 | 919 | ||
1050 | if (cl == start) /* fix start if we just deleted it */ | 920 | if (cl == start) /* fix start if we just deleted it */ |
1051 | start = next; | 921 | start = next; |
1052 | cl = next; | 922 | cl = next; |
1053 | goto next; | 923 | goto next; |
1054 | } | 924 | } |
1055 | 925 | ||
1056 | if (likely((skb = cl->un.leaf.q->dequeue(cl->un.leaf.q)) != NULL)) | 926 | skb = cl->un.leaf.q->dequeue(cl->un.leaf.q); |
927 | if (likely(skb != NULL)) | ||
1057 | break; | 928 | break; |
1058 | if (!cl->warned) { | 929 | if (!cl->warned) { |
1059 | printk(KERN_WARNING "htb: class %X isn't work conserving ?!\n",cl->classid); | 930 | printk(KERN_WARNING |
931 | "htb: class %X isn't work conserving ?!\n", | ||
932 | cl->classid); | ||
1060 | cl->warned = 1; | 933 | cl->warned = 1; |
1061 | } | 934 | } |
1062 | q->nwc_hit++; | 935 | q->nwc_hit++; |
1063 | htb_next_rb_node((level?cl->parent->un.inner.ptr:q->ptr[0])+prio); | 936 | htb_next_rb_node((level ? cl->parent->un.inner.ptr : q-> |
1064 | cl = htb_lookup_leaf (HTB_PASSQ q->row[level]+prio,prio,q->ptr[level]+prio, | 937 | ptr[0]) + prio); |
1065 | q->last_ptr_id[level]+prio); | 938 | cl = htb_lookup_leaf(q->row[level] + prio, prio, |
939 | q->ptr[level] + prio, | ||
940 | q->last_ptr_id[level] + prio); | ||
1066 | 941 | ||
1067 | } while (cl != start); | 942 | } while (cl != start); |
1068 | 943 | ||
1069 | if (likely(skb != NULL)) { | 944 | if (likely(skb != NULL)) { |
1070 | if ((cl->un.leaf.deficit[level] -= skb->len) < 0) { | 945 | if ((cl->un.leaf.deficit[level] -= skb->len) < 0) { |
1071 | HTB_DBG(4,2,"htb_next_cl oldptr=%p quant_add=%d\n", | ||
1072 | level?cl->parent->un.inner.ptr[prio]:q->ptr[0][prio],cl->un.leaf.quantum); | ||
1073 | cl->un.leaf.deficit[level] += cl->un.leaf.quantum; | 946 | cl->un.leaf.deficit[level] += cl->un.leaf.quantum; |
1074 | htb_next_rb_node((level?cl->parent->un.inner.ptr:q->ptr[0])+prio); | 947 | htb_next_rb_node((level ? cl->parent->un.inner.ptr : q-> |
948 | ptr[0]) + prio); | ||
1075 | } | 949 | } |
1076 | /* this used to be after charge_class but this constelation | 950 | /* this used to be after charge_class but this constelation |
1077 | gives us slightly better performance */ | 951 | gives us slightly better performance */ |
1078 | if (!cl->un.leaf.q->q.qlen) | 952 | if (!cl->un.leaf.q->q.qlen) |
1079 | htb_deactivate (q,cl); | 953 | htb_deactivate(q, cl); |
1080 | htb_charge_class (q,cl,level,skb->len); | 954 | htb_charge_class(q, cl, level, skb->len); |
1081 | } | 955 | } |
1082 | return skb; | 956 | return skb; |
1083 | } | 957 | } |
1084 | 958 | ||
1085 | static void htb_delay_by(struct Qdisc *sch,long delay) | 959 | static void htb_delay_by(struct Qdisc *sch, long delay) |
1086 | { | 960 | { |
1087 | struct htb_sched *q = qdisc_priv(sch); | 961 | struct htb_sched *q = qdisc_priv(sch); |
1088 | if (delay <= 0) delay = 1; | 962 | if (delay <= 0) |
1089 | if (unlikely(delay > 5*HZ)) { | 963 | delay = 1; |
964 | if (unlikely(delay > 5 * HZ)) { | ||
1090 | if (net_ratelimit()) | 965 | if (net_ratelimit()) |
1091 | printk(KERN_INFO "HTB delay %ld > 5sec\n", delay); | 966 | printk(KERN_INFO "HTB delay %ld > 5sec\n", delay); |
1092 | delay = 5*HZ; | 967 | delay = 5 * HZ; |
1093 | } | 968 | } |
1094 | /* why don't use jiffies here ? because expires can be in past */ | 969 | /* why don't use jiffies here ? because expires can be in past */ |
1095 | mod_timer(&q->timer, q->jiffies + delay); | 970 | mod_timer(&q->timer, q->jiffies + delay); |
1096 | sch->flags |= TCQ_F_THROTTLED; | 971 | sch->flags |= TCQ_F_THROTTLED; |
1097 | sch->qstats.overlimits++; | 972 | sch->qstats.overlimits++; |
1098 | HTB_DBG(3,1,"htb_deq t_delay=%ld\n",delay); | ||
1099 | } | 973 | } |
1100 | 974 | ||
1101 | static struct sk_buff *htb_dequeue(struct Qdisc *sch) | 975 | static struct sk_buff *htb_dequeue(struct Qdisc *sch) |
@@ -1104,22 +978,19 @@ static struct sk_buff *htb_dequeue(struct Qdisc *sch) | |||
1104 | struct htb_sched *q = qdisc_priv(sch); | 978 | struct htb_sched *q = qdisc_priv(sch); |
1105 | int level; | 979 | int level; |
1106 | long min_delay; | 980 | long min_delay; |
1107 | #ifdef HTB_DEBUG | ||
1108 | int evs_used = 0; | ||
1109 | #endif | ||
1110 | 981 | ||
1111 | q->jiffies = jiffies; | 982 | q->jiffies = jiffies; |
1112 | HTB_DBG(3,1,"htb_deq dircnt=%d qlen=%d\n",skb_queue_len(&q->direct_queue), | ||
1113 | sch->q.qlen); | ||
1114 | 983 | ||
1115 | /* try to dequeue direct packets as high prio (!) to minimize cpu work */ | 984 | /* try to dequeue direct packets as high prio (!) to minimize cpu work */ |
1116 | if ((skb = __skb_dequeue(&q->direct_queue)) != NULL) { | 985 | skb = __skb_dequeue(&q->direct_queue); |
986 | if (skb != NULL) { | ||
1117 | sch->flags &= ~TCQ_F_THROTTLED; | 987 | sch->flags &= ~TCQ_F_THROTTLED; |
1118 | sch->q.qlen--; | 988 | sch->q.qlen--; |
1119 | return skb; | 989 | return skb; |
1120 | } | 990 | } |
1121 | 991 | ||
1122 | if (!sch->q.qlen) goto fin; | 992 | if (!sch->q.qlen) |
993 | goto fin; | ||
1123 | PSCHED_GET_TIME(q->now); | 994 | PSCHED_GET_TIME(q->now); |
1124 | 995 | ||
1125 | min_delay = LONG_MAX; | 996 | min_delay = LONG_MAX; |
@@ -1129,21 +1000,19 @@ static struct sk_buff *htb_dequeue(struct Qdisc *sch) | |||
1129 | int m; | 1000 | int m; |
1130 | long delay; | 1001 | long delay; |
1131 | if (time_after_eq(q->jiffies, q->near_ev_cache[level])) { | 1002 | if (time_after_eq(q->jiffies, q->near_ev_cache[level])) { |
1132 | delay = htb_do_events(q,level); | 1003 | delay = htb_do_events(q, level); |
1133 | q->near_ev_cache[level] = q->jiffies + (delay ? delay : HZ); | 1004 | q->near_ev_cache[level] = |
1134 | #ifdef HTB_DEBUG | 1005 | q->jiffies + (delay ? delay : HZ); |
1135 | evs_used++; | ||
1136 | #endif | ||
1137 | } else | 1006 | } else |
1138 | delay = q->near_ev_cache[level] - q->jiffies; | 1007 | delay = q->near_ev_cache[level] - q->jiffies; |
1139 | 1008 | ||
1140 | if (delay && min_delay > delay) | 1009 | if (delay && min_delay > delay) |
1141 | min_delay = delay; | 1010 | min_delay = delay; |
1142 | m = ~q->row_mask[level]; | 1011 | m = ~q->row_mask[level]; |
1143 | while (m != (int)(-1)) { | 1012 | while (m != (int)(-1)) { |
1144 | int prio = ffz (m); | 1013 | int prio = ffz(m); |
1145 | m |= 1 << prio; | 1014 | m |= 1 << prio; |
1146 | skb = htb_dequeue_tree(q,prio,level); | 1015 | skb = htb_dequeue_tree(q, prio, level); |
1147 | if (likely(skb != NULL)) { | 1016 | if (likely(skb != NULL)) { |
1148 | sch->q.qlen--; | 1017 | sch->q.qlen--; |
1149 | sch->flags &= ~TCQ_F_THROTTLED; | 1018 | sch->flags &= ~TCQ_F_THROTTLED; |
@@ -1151,40 +1020,28 @@ static struct sk_buff *htb_dequeue(struct Qdisc *sch) | |||
1151 | } | 1020 | } |
1152 | } | 1021 | } |
1153 | } | 1022 | } |
1154 | #ifdef HTB_DEBUG | 1023 | htb_delay_by(sch, min_delay > 5 * HZ ? 5 * HZ : min_delay); |
1155 | if (!q->nwc_hit && min_delay >= 10*HZ && net_ratelimit()) { | ||
1156 | if (min_delay == LONG_MAX) { | ||
1157 | printk(KERN_ERR "HTB: dequeue bug (%d,%lu,%lu), report it please !\n", | ||
1158 | evs_used,q->jiffies,jiffies); | ||
1159 | htb_debug_dump(q); | ||
1160 | } else | ||
1161 | printk(KERN_WARNING "HTB: mindelay=%ld, some class has " | ||
1162 | "too small rate\n",min_delay); | ||
1163 | } | ||
1164 | #endif | ||
1165 | htb_delay_by (sch,min_delay > 5*HZ ? 5*HZ : min_delay); | ||
1166 | fin: | 1024 | fin: |
1167 | HTB_DBG(3,1,"htb_deq_end %s j=%lu skb=%p\n",sch->dev->name,q->jiffies,skb); | ||
1168 | return skb; | 1025 | return skb; |
1169 | } | 1026 | } |
1170 | 1027 | ||
1171 | /* try to drop from each class (by prio) until one succeed */ | 1028 | /* try to drop from each class (by prio) until one succeed */ |
1172 | static unsigned int htb_drop(struct Qdisc* sch) | 1029 | static unsigned int htb_drop(struct Qdisc *sch) |
1173 | { | 1030 | { |
1174 | struct htb_sched *q = qdisc_priv(sch); | 1031 | struct htb_sched *q = qdisc_priv(sch); |
1175 | int prio; | 1032 | int prio; |
1176 | 1033 | ||
1177 | for (prio = TC_HTB_NUMPRIO - 1; prio >= 0; prio--) { | 1034 | for (prio = TC_HTB_NUMPRIO - 1; prio >= 0; prio--) { |
1178 | struct list_head *p; | 1035 | struct list_head *p; |
1179 | list_for_each (p,q->drops+prio) { | 1036 | list_for_each(p, q->drops + prio) { |
1180 | struct htb_class *cl = list_entry(p, struct htb_class, | 1037 | struct htb_class *cl = list_entry(p, struct htb_class, |
1181 | un.leaf.drop_list); | 1038 | un.leaf.drop_list); |
1182 | unsigned int len; | 1039 | unsigned int len; |
1183 | if (cl->un.leaf.q->ops->drop && | 1040 | if (cl->un.leaf.q->ops->drop && |
1184 | (len = cl->un.leaf.q->ops->drop(cl->un.leaf.q))) { | 1041 | (len = cl->un.leaf.q->ops->drop(cl->un.leaf.q))) { |
1185 | sch->q.qlen--; | 1042 | sch->q.qlen--; |
1186 | if (!cl->un.leaf.q->q.qlen) | 1043 | if (!cl->un.leaf.q->q.qlen) |
1187 | htb_deactivate (q,cl); | 1044 | htb_deactivate(q, cl); |
1188 | return len; | 1045 | return len; |
1189 | } | 1046 | } |
1190 | } | 1047 | } |
@@ -1194,29 +1051,25 @@ static unsigned int htb_drop(struct Qdisc* sch) | |||
1194 | 1051 | ||
1195 | /* reset all classes */ | 1052 | /* reset all classes */ |
1196 | /* always caled under BH & queue lock */ | 1053 | /* always caled under BH & queue lock */ |
1197 | static void htb_reset(struct Qdisc* sch) | 1054 | static void htb_reset(struct Qdisc *sch) |
1198 | { | 1055 | { |
1199 | struct htb_sched *q = qdisc_priv(sch); | 1056 | struct htb_sched *q = qdisc_priv(sch); |
1200 | int i; | 1057 | int i; |
1201 | HTB_DBG(0,1,"htb_reset sch=%p, handle=%X\n",sch,sch->handle); | ||
1202 | 1058 | ||
1203 | for (i = 0; i < HTB_HSIZE; i++) { | 1059 | for (i = 0; i < HTB_HSIZE; i++) { |
1204 | struct list_head *p; | 1060 | struct hlist_node *p; |
1205 | list_for_each (p,q->hash+i) { | 1061 | struct htb_class *cl; |
1206 | struct htb_class *cl = list_entry(p,struct htb_class,hlist); | 1062 | |
1063 | hlist_for_each_entry(cl, p, q->hash + i, hlist) { | ||
1207 | if (cl->level) | 1064 | if (cl->level) |
1208 | memset(&cl->un.inner,0,sizeof(cl->un.inner)); | 1065 | memset(&cl->un.inner, 0, sizeof(cl->un.inner)); |
1209 | else { | 1066 | else { |
1210 | if (cl->un.leaf.q) | 1067 | if (cl->un.leaf.q) |
1211 | qdisc_reset(cl->un.leaf.q); | 1068 | qdisc_reset(cl->un.leaf.q); |
1212 | INIT_LIST_HEAD(&cl->un.leaf.drop_list); | 1069 | INIT_LIST_HEAD(&cl->un.leaf.drop_list); |
1213 | } | 1070 | } |
1214 | cl->prio_activity = 0; | 1071 | cl->prio_activity = 0; |
1215 | cl->cmode = HTB_CAN_SEND; | 1072 | cl->cmode = HTB_CAN_SEND; |
1216 | #ifdef HTB_DEBUG | ||
1217 | cl->pq_node.rb_color = -1; | ||
1218 | memset(cl->node,255,sizeof(cl->node)); | ||
1219 | #endif | ||
1220 | 1073 | ||
1221 | } | 1074 | } |
1222 | } | 1075 | } |
@@ -1224,12 +1077,12 @@ static void htb_reset(struct Qdisc* sch) | |||
1224 | del_timer(&q->timer); | 1077 | del_timer(&q->timer); |
1225 | __skb_queue_purge(&q->direct_queue); | 1078 | __skb_queue_purge(&q->direct_queue); |
1226 | sch->q.qlen = 0; | 1079 | sch->q.qlen = 0; |
1227 | memset(q->row,0,sizeof(q->row)); | 1080 | memset(q->row, 0, sizeof(q->row)); |
1228 | memset(q->row_mask,0,sizeof(q->row_mask)); | 1081 | memset(q->row_mask, 0, sizeof(q->row_mask)); |
1229 | memset(q->wait_pq,0,sizeof(q->wait_pq)); | 1082 | memset(q->wait_pq, 0, sizeof(q->wait_pq)); |
1230 | memset(q->ptr,0,sizeof(q->ptr)); | 1083 | memset(q->ptr, 0, sizeof(q->ptr)); |
1231 | for (i = 0; i < TC_HTB_NUMPRIO; i++) | 1084 | for (i = 0; i < TC_HTB_NUMPRIO; i++) |
1232 | INIT_LIST_HEAD(q->drops+i); | 1085 | INIT_LIST_HEAD(q->drops + i); |
1233 | } | 1086 | } |
1234 | 1087 | ||
1235 | static int htb_init(struct Qdisc *sch, struct rtattr *opt) | 1088 | static int htb_init(struct Qdisc *sch, struct rtattr *opt) |
@@ -1238,36 +1091,31 @@ static int htb_init(struct Qdisc *sch, struct rtattr *opt) | |||
1238 | struct rtattr *tb[TCA_HTB_INIT]; | 1091 | struct rtattr *tb[TCA_HTB_INIT]; |
1239 | struct tc_htb_glob *gopt; | 1092 | struct tc_htb_glob *gopt; |
1240 | int i; | 1093 | int i; |
1241 | #ifdef HTB_DEBUG | ||
1242 | printk(KERN_INFO "HTB init, kernel part version %d.%d\n", | ||
1243 | HTB_VER >> 16,HTB_VER & 0xffff); | ||
1244 | #endif | ||
1245 | if (!opt || rtattr_parse_nested(tb, TCA_HTB_INIT, opt) || | 1094 | if (!opt || rtattr_parse_nested(tb, TCA_HTB_INIT, opt) || |
1246 | tb[TCA_HTB_INIT-1] == NULL || | 1095 | tb[TCA_HTB_INIT - 1] == NULL || |
1247 | RTA_PAYLOAD(tb[TCA_HTB_INIT-1]) < sizeof(*gopt)) { | 1096 | RTA_PAYLOAD(tb[TCA_HTB_INIT - 1]) < sizeof(*gopt)) { |
1248 | printk(KERN_ERR "HTB: hey probably you have bad tc tool ?\n"); | 1097 | printk(KERN_ERR "HTB: hey probably you have bad tc tool ?\n"); |
1249 | return -EINVAL; | 1098 | return -EINVAL; |
1250 | } | 1099 | } |
1251 | gopt = RTA_DATA(tb[TCA_HTB_INIT-1]); | 1100 | gopt = RTA_DATA(tb[TCA_HTB_INIT - 1]); |
1252 | if (gopt->version != HTB_VER >> 16) { | 1101 | if (gopt->version != HTB_VER >> 16) { |
1253 | printk(KERN_ERR "HTB: need tc/htb version %d (minor is %d), you have %d\n", | 1102 | printk(KERN_ERR |
1254 | HTB_VER >> 16,HTB_VER & 0xffff,gopt->version); | 1103 | "HTB: need tc/htb version %d (minor is %d), you have %d\n", |
1104 | HTB_VER >> 16, HTB_VER & 0xffff, gopt->version); | ||
1255 | return -EINVAL; | 1105 | return -EINVAL; |
1256 | } | 1106 | } |
1257 | q->debug = gopt->debug; | ||
1258 | HTB_DBG(0,1,"htb_init sch=%p handle=%X r2q=%d\n",sch,sch->handle,gopt->rate2quantum); | ||
1259 | 1107 | ||
1260 | INIT_LIST_HEAD(&q->root); | 1108 | INIT_LIST_HEAD(&q->root); |
1261 | for (i = 0; i < HTB_HSIZE; i++) | 1109 | for (i = 0; i < HTB_HSIZE; i++) |
1262 | INIT_LIST_HEAD(q->hash+i); | 1110 | INIT_HLIST_HEAD(q->hash + i); |
1263 | for (i = 0; i < TC_HTB_NUMPRIO; i++) | 1111 | for (i = 0; i < TC_HTB_NUMPRIO; i++) |
1264 | INIT_LIST_HEAD(q->drops+i); | 1112 | INIT_LIST_HEAD(q->drops + i); |
1265 | 1113 | ||
1266 | init_timer(&q->timer); | 1114 | init_timer(&q->timer); |
1267 | skb_queue_head_init(&q->direct_queue); | 1115 | skb_queue_head_init(&q->direct_queue); |
1268 | 1116 | ||
1269 | q->direct_qlen = sch->dev->tx_queue_len; | 1117 | q->direct_qlen = sch->dev->tx_queue_len; |
1270 | if (q->direct_qlen < 2) /* some devices have zero tx_queue_len */ | 1118 | if (q->direct_qlen < 2) /* some devices have zero tx_queue_len */ |
1271 | q->direct_qlen = 2; | 1119 | q->direct_qlen = 2; |
1272 | q->timer.function = htb_timer; | 1120 | q->timer.function = htb_timer; |
1273 | q->timer.data = (unsigned long)sch; | 1121 | q->timer.data = (unsigned long)sch; |
@@ -1289,80 +1137,72 @@ static int htb_init(struct Qdisc *sch, struct rtattr *opt) | |||
1289 | static int htb_dump(struct Qdisc *sch, struct sk_buff *skb) | 1137 | static int htb_dump(struct Qdisc *sch, struct sk_buff *skb) |
1290 | { | 1138 | { |
1291 | struct htb_sched *q = qdisc_priv(sch); | 1139 | struct htb_sched *q = qdisc_priv(sch); |
1292 | unsigned char *b = skb->tail; | 1140 | unsigned char *b = skb->tail; |
1293 | struct rtattr *rta; | 1141 | struct rtattr *rta; |
1294 | struct tc_htb_glob gopt; | 1142 | struct tc_htb_glob gopt; |
1295 | HTB_DBG(0,1,"htb_dump sch=%p, handle=%X\n",sch,sch->handle); | 1143 | spin_lock_bh(&sch->dev->queue_lock); |
1296 | HTB_QLOCK(sch); | ||
1297 | gopt.direct_pkts = q->direct_pkts; | 1144 | gopt.direct_pkts = q->direct_pkts; |
1298 | 1145 | ||
1299 | #ifdef HTB_DEBUG | ||
1300 | if (HTB_DBG_COND(0,2)) | ||
1301 | htb_debug_dump(q); | ||
1302 | #endif | ||
1303 | gopt.version = HTB_VER; | 1146 | gopt.version = HTB_VER; |
1304 | gopt.rate2quantum = q->rate2quantum; | 1147 | gopt.rate2quantum = q->rate2quantum; |
1305 | gopt.defcls = q->defcls; | 1148 | gopt.defcls = q->defcls; |
1306 | gopt.debug = q->debug; | 1149 | gopt.debug = 0; |
1307 | rta = (struct rtattr*)b; | 1150 | rta = (struct rtattr *)b; |
1308 | RTA_PUT(skb, TCA_OPTIONS, 0, NULL); | 1151 | RTA_PUT(skb, TCA_OPTIONS, 0, NULL); |
1309 | RTA_PUT(skb, TCA_HTB_INIT, sizeof(gopt), &gopt); | 1152 | RTA_PUT(skb, TCA_HTB_INIT, sizeof(gopt), &gopt); |
1310 | rta->rta_len = skb->tail - b; | 1153 | rta->rta_len = skb->tail - b; |
1311 | HTB_QUNLOCK(sch); | 1154 | spin_unlock_bh(&sch->dev->queue_lock); |
1312 | return skb->len; | 1155 | return skb->len; |
1313 | rtattr_failure: | 1156 | rtattr_failure: |
1314 | HTB_QUNLOCK(sch); | 1157 | spin_unlock_bh(&sch->dev->queue_lock); |
1315 | skb_trim(skb, skb->tail - skb->data); | 1158 | skb_trim(skb, skb->tail - skb->data); |
1316 | return -1; | 1159 | return -1; |
1317 | } | 1160 | } |
1318 | 1161 | ||
1319 | static int htb_dump_class(struct Qdisc *sch, unsigned long arg, | 1162 | static int htb_dump_class(struct Qdisc *sch, unsigned long arg, |
1320 | struct sk_buff *skb, struct tcmsg *tcm) | 1163 | struct sk_buff *skb, struct tcmsg *tcm) |
1321 | { | 1164 | { |
1322 | #ifdef HTB_DEBUG | 1165 | struct htb_class *cl = (struct htb_class *)arg; |
1323 | struct htb_sched *q = qdisc_priv(sch); | 1166 | unsigned char *b = skb->tail; |
1324 | #endif | ||
1325 | struct htb_class *cl = (struct htb_class*)arg; | ||
1326 | unsigned char *b = skb->tail; | ||
1327 | struct rtattr *rta; | 1167 | struct rtattr *rta; |
1328 | struct tc_htb_opt opt; | 1168 | struct tc_htb_opt opt; |
1329 | 1169 | ||
1330 | HTB_DBG(0,1,"htb_dump_class handle=%X clid=%X\n",sch->handle,cl->classid); | 1170 | spin_lock_bh(&sch->dev->queue_lock); |
1331 | |||
1332 | HTB_QLOCK(sch); | ||
1333 | tcm->tcm_parent = cl->parent ? cl->parent->classid : TC_H_ROOT; | 1171 | tcm->tcm_parent = cl->parent ? cl->parent->classid : TC_H_ROOT; |
1334 | tcm->tcm_handle = cl->classid; | 1172 | tcm->tcm_handle = cl->classid; |
1335 | if (!cl->level && cl->un.leaf.q) | 1173 | if (!cl->level && cl->un.leaf.q) |
1336 | tcm->tcm_info = cl->un.leaf.q->handle; | 1174 | tcm->tcm_info = cl->un.leaf.q->handle; |
1337 | 1175 | ||
1338 | rta = (struct rtattr*)b; | 1176 | rta = (struct rtattr *)b; |
1339 | RTA_PUT(skb, TCA_OPTIONS, 0, NULL); | 1177 | RTA_PUT(skb, TCA_OPTIONS, 0, NULL); |
1340 | 1178 | ||
1341 | memset (&opt,0,sizeof(opt)); | 1179 | memset(&opt, 0, sizeof(opt)); |
1342 | 1180 | ||
1343 | opt.rate = cl->rate->rate; opt.buffer = cl->buffer; | 1181 | opt.rate = cl->rate->rate; |
1344 | opt.ceil = cl->ceil->rate; opt.cbuffer = cl->cbuffer; | 1182 | opt.buffer = cl->buffer; |
1345 | opt.quantum = cl->un.leaf.quantum; opt.prio = cl->un.leaf.prio; | 1183 | opt.ceil = cl->ceil->rate; |
1346 | opt.level = cl->level; | 1184 | opt.cbuffer = cl->cbuffer; |
1185 | opt.quantum = cl->un.leaf.quantum; | ||
1186 | opt.prio = cl->un.leaf.prio; | ||
1187 | opt.level = cl->level; | ||
1347 | RTA_PUT(skb, TCA_HTB_PARMS, sizeof(opt), &opt); | 1188 | RTA_PUT(skb, TCA_HTB_PARMS, sizeof(opt), &opt); |
1348 | rta->rta_len = skb->tail - b; | 1189 | rta->rta_len = skb->tail - b; |
1349 | HTB_QUNLOCK(sch); | 1190 | spin_unlock_bh(&sch->dev->queue_lock); |
1350 | return skb->len; | 1191 | return skb->len; |
1351 | rtattr_failure: | 1192 | rtattr_failure: |
1352 | HTB_QUNLOCK(sch); | 1193 | spin_unlock_bh(&sch->dev->queue_lock); |
1353 | skb_trim(skb, b - skb->data); | 1194 | skb_trim(skb, b - skb->data); |
1354 | return -1; | 1195 | return -1; |
1355 | } | 1196 | } |
1356 | 1197 | ||
1357 | static int | 1198 | static int |
1358 | htb_dump_class_stats(struct Qdisc *sch, unsigned long arg, | 1199 | htb_dump_class_stats(struct Qdisc *sch, unsigned long arg, struct gnet_dump *d) |
1359 | struct gnet_dump *d) | ||
1360 | { | 1200 | { |
1361 | struct htb_class *cl = (struct htb_class*)arg; | 1201 | struct htb_class *cl = (struct htb_class *)arg; |
1362 | 1202 | ||
1363 | #ifdef HTB_RATECM | 1203 | #ifdef HTB_RATECM |
1364 | cl->rate_est.bps = cl->rate_bytes/(HTB_EWMAC*HTB_HSIZE); | 1204 | cl->rate_est.bps = cl->rate_bytes / (HTB_EWMAC * HTB_HSIZE); |
1365 | cl->rate_est.pps = cl->rate_packets/(HTB_EWMAC*HTB_HSIZE); | 1205 | cl->rate_est.pps = cl->rate_packets / (HTB_EWMAC * HTB_HSIZE); |
1366 | #endif | 1206 | #endif |
1367 | 1207 | ||
1368 | if (!cl->level && cl->un.leaf.q) | 1208 | if (!cl->level && cl->un.leaf.q) |
@@ -1379,21 +1219,22 @@ htb_dump_class_stats(struct Qdisc *sch, unsigned long arg, | |||
1379 | } | 1219 | } |
1380 | 1220 | ||
1381 | static int htb_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new, | 1221 | static int htb_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new, |
1382 | struct Qdisc **old) | 1222 | struct Qdisc **old) |
1383 | { | 1223 | { |
1384 | struct htb_class *cl = (struct htb_class*)arg; | 1224 | struct htb_class *cl = (struct htb_class *)arg; |
1385 | 1225 | ||
1386 | if (cl && !cl->level) { | 1226 | if (cl && !cl->level) { |
1387 | if (new == NULL && (new = qdisc_create_dflt(sch->dev, | 1227 | if (new == NULL && (new = qdisc_create_dflt(sch->dev, |
1388 | &pfifo_qdisc_ops)) == NULL) | 1228 | &pfifo_qdisc_ops)) |
1389 | return -ENOBUFS; | 1229 | == NULL) |
1230 | return -ENOBUFS; | ||
1390 | sch_tree_lock(sch); | 1231 | sch_tree_lock(sch); |
1391 | if ((*old = xchg(&cl->un.leaf.q, new)) != NULL) { | 1232 | if ((*old = xchg(&cl->un.leaf.q, new)) != NULL) { |
1392 | if (cl->prio_activity) | 1233 | if (cl->prio_activity) |
1393 | htb_deactivate (qdisc_priv(sch),cl); | 1234 | htb_deactivate(qdisc_priv(sch), cl); |
1394 | 1235 | ||
1395 | /* TODO: is it correct ? Why CBQ doesn't do it ? */ | 1236 | /* TODO: is it correct ? Why CBQ doesn't do it ? */ |
1396 | sch->q.qlen -= (*old)->q.qlen; | 1237 | sch->q.qlen -= (*old)->q.qlen; |
1397 | qdisc_reset(*old); | 1238 | qdisc_reset(*old); |
1398 | } | 1239 | } |
1399 | sch_tree_unlock(sch); | 1240 | sch_tree_unlock(sch); |
@@ -1402,20 +1243,16 @@ static int htb_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new, | |||
1402 | return -ENOENT; | 1243 | return -ENOENT; |
1403 | } | 1244 | } |
1404 | 1245 | ||
1405 | static struct Qdisc * htb_leaf(struct Qdisc *sch, unsigned long arg) | 1246 | static struct Qdisc *htb_leaf(struct Qdisc *sch, unsigned long arg) |
1406 | { | 1247 | { |
1407 | struct htb_class *cl = (struct htb_class*)arg; | 1248 | struct htb_class *cl = (struct htb_class *)arg; |
1408 | return (cl && !cl->level) ? cl->un.leaf.q : NULL; | 1249 | return (cl && !cl->level) ? cl->un.leaf.q : NULL; |
1409 | } | 1250 | } |
1410 | 1251 | ||
1411 | static unsigned long htb_get(struct Qdisc *sch, u32 classid) | 1252 | static unsigned long htb_get(struct Qdisc *sch, u32 classid) |
1412 | { | 1253 | { |
1413 | #ifdef HTB_DEBUG | 1254 | struct htb_class *cl = htb_find(classid, sch); |
1414 | struct htb_sched *q = qdisc_priv(sch); | 1255 | if (cl) |
1415 | #endif | ||
1416 | struct htb_class *cl = htb_find(classid,sch); | ||
1417 | HTB_DBG(0,1,"htb_get clid=%X q=%p cl=%p ref=%d\n",classid,q,cl,cl?cl->refcnt:0); | ||
1418 | if (cl) | ||
1419 | cl->refcnt++; | 1256 | cl->refcnt++; |
1420 | return (unsigned long)cl; | 1257 | return (unsigned long)cl; |
1421 | } | 1258 | } |
@@ -1430,10 +1267,9 @@ static void htb_destroy_filters(struct tcf_proto **fl) | |||
1430 | } | 1267 | } |
1431 | } | 1268 | } |
1432 | 1269 | ||
1433 | static void htb_destroy_class(struct Qdisc* sch,struct htb_class *cl) | 1270 | static void htb_destroy_class(struct Qdisc *sch, struct htb_class *cl) |
1434 | { | 1271 | { |
1435 | struct htb_sched *q = qdisc_priv(sch); | 1272 | struct htb_sched *q = qdisc_priv(sch); |
1436 | HTB_DBG(0,1,"htb_destrycls clid=%X ref=%d\n", cl?cl->classid:0,cl?cl->refcnt:0); | ||
1437 | if (!cl->level) { | 1273 | if (!cl->level) { |
1438 | BUG_TRAP(cl->un.leaf.q); | 1274 | BUG_TRAP(cl->un.leaf.q); |
1439 | sch->q.qlen -= cl->un.leaf.q->q.qlen; | 1275 | sch->q.qlen -= cl->un.leaf.q->q.qlen; |
@@ -1441,45 +1277,45 @@ static void htb_destroy_class(struct Qdisc* sch,struct htb_class *cl) | |||
1441 | } | 1277 | } |
1442 | qdisc_put_rtab(cl->rate); | 1278 | qdisc_put_rtab(cl->rate); |
1443 | qdisc_put_rtab(cl->ceil); | 1279 | qdisc_put_rtab(cl->ceil); |
1444 | 1280 | ||
1445 | htb_destroy_filters (&cl->filter_list); | 1281 | htb_destroy_filters(&cl->filter_list); |
1446 | 1282 | ||
1447 | while (!list_empty(&cl->children)) | 1283 | while (!list_empty(&cl->children)) |
1448 | htb_destroy_class (sch,list_entry(cl->children.next, | 1284 | htb_destroy_class(sch, list_entry(cl->children.next, |
1449 | struct htb_class,sibling)); | 1285 | struct htb_class, sibling)); |
1450 | 1286 | ||
1451 | /* note: this delete may happen twice (see htb_delete) */ | 1287 | /* note: this delete may happen twice (see htb_delete) */ |
1452 | list_del(&cl->hlist); | 1288 | if (!hlist_unhashed(&cl->hlist)) |
1289 | hlist_del(&cl->hlist); | ||
1453 | list_del(&cl->sibling); | 1290 | list_del(&cl->sibling); |
1454 | 1291 | ||
1455 | if (cl->prio_activity) | 1292 | if (cl->prio_activity) |
1456 | htb_deactivate (q,cl); | 1293 | htb_deactivate(q, cl); |
1457 | 1294 | ||
1458 | if (cl->cmode != HTB_CAN_SEND) | 1295 | if (cl->cmode != HTB_CAN_SEND) |
1459 | htb_safe_rb_erase(&cl->pq_node,q->wait_pq+cl->level); | 1296 | htb_safe_rb_erase(&cl->pq_node, q->wait_pq + cl->level); |
1460 | 1297 | ||
1461 | kfree(cl); | 1298 | kfree(cl); |
1462 | } | 1299 | } |
1463 | 1300 | ||
1464 | /* always caled under BH & queue lock */ | 1301 | /* always caled under BH & queue lock */ |
1465 | static void htb_destroy(struct Qdisc* sch) | 1302 | static void htb_destroy(struct Qdisc *sch) |
1466 | { | 1303 | { |
1467 | struct htb_sched *q = qdisc_priv(sch); | 1304 | struct htb_sched *q = qdisc_priv(sch); |
1468 | HTB_DBG(0,1,"htb_destroy q=%p\n",q); | ||
1469 | 1305 | ||
1470 | del_timer_sync (&q->timer); | 1306 | del_timer_sync(&q->timer); |
1471 | #ifdef HTB_RATECM | 1307 | #ifdef HTB_RATECM |
1472 | del_timer_sync (&q->rttim); | 1308 | del_timer_sync(&q->rttim); |
1473 | #endif | 1309 | #endif |
1474 | /* This line used to be after htb_destroy_class call below | 1310 | /* This line used to be after htb_destroy_class call below |
1475 | and surprisingly it worked in 2.4. But it must precede it | 1311 | and surprisingly it worked in 2.4. But it must precede it |
1476 | because filter need its target class alive to be able to call | 1312 | because filter need its target class alive to be able to call |
1477 | unbind_filter on it (without Oops). */ | 1313 | unbind_filter on it (without Oops). */ |
1478 | htb_destroy_filters(&q->filter_list); | 1314 | htb_destroy_filters(&q->filter_list); |
1479 | 1315 | ||
1480 | while (!list_empty(&q->root)) | 1316 | while (!list_empty(&q->root)) |
1481 | htb_destroy_class (sch,list_entry(q->root.next, | 1317 | htb_destroy_class(sch, list_entry(q->root.next, |
1482 | struct htb_class,sibling)); | 1318 | struct htb_class, sibling)); |
1483 | 1319 | ||
1484 | __skb_queue_purge(&q->direct_queue); | 1320 | __skb_queue_purge(&q->direct_queue); |
1485 | } | 1321 | } |
@@ -1487,24 +1323,25 @@ static void htb_destroy(struct Qdisc* sch) | |||
1487 | static int htb_delete(struct Qdisc *sch, unsigned long arg) | 1323 | static int htb_delete(struct Qdisc *sch, unsigned long arg) |
1488 | { | 1324 | { |
1489 | struct htb_sched *q = qdisc_priv(sch); | 1325 | struct htb_sched *q = qdisc_priv(sch); |
1490 | struct htb_class *cl = (struct htb_class*)arg; | 1326 | struct htb_class *cl = (struct htb_class *)arg; |
1491 | HTB_DBG(0,1,"htb_delete q=%p cl=%X ref=%d\n",q,cl?cl->classid:0,cl?cl->refcnt:0); | ||
1492 | 1327 | ||
1493 | // TODO: why don't allow to delete subtree ? references ? does | 1328 | // TODO: why don't allow to delete subtree ? references ? does |
1494 | // tc subsys quarantee us that in htb_destroy it holds no class | 1329 | // tc subsys quarantee us that in htb_destroy it holds no class |
1495 | // refs so that we can remove children safely there ? | 1330 | // refs so that we can remove children safely there ? |
1496 | if (!list_empty(&cl->children) || cl->filter_cnt) | 1331 | if (!list_empty(&cl->children) || cl->filter_cnt) |
1497 | return -EBUSY; | 1332 | return -EBUSY; |
1498 | 1333 | ||
1499 | sch_tree_lock(sch); | 1334 | sch_tree_lock(sch); |
1500 | 1335 | ||
1501 | /* delete from hash and active; remainder in destroy_class */ | 1336 | /* delete from hash and active; remainder in destroy_class */ |
1502 | list_del_init(&cl->hlist); | 1337 | if (!hlist_unhashed(&cl->hlist)) |
1338 | hlist_del(&cl->hlist); | ||
1339 | |||
1503 | if (cl->prio_activity) | 1340 | if (cl->prio_activity) |
1504 | htb_deactivate (q,cl); | 1341 | htb_deactivate(q, cl); |
1505 | 1342 | ||
1506 | if (--cl->refcnt == 0) | 1343 | if (--cl->refcnt == 0) |
1507 | htb_destroy_class(sch,cl); | 1344 | htb_destroy_class(sch, cl); |
1508 | 1345 | ||
1509 | sch_tree_unlock(sch); | 1346 | sch_tree_unlock(sch); |
1510 | return 0; | 1347 | return 0; |
@@ -1512,45 +1349,46 @@ static int htb_delete(struct Qdisc *sch, unsigned long arg) | |||
1512 | 1349 | ||
1513 | static void htb_put(struct Qdisc *sch, unsigned long arg) | 1350 | static void htb_put(struct Qdisc *sch, unsigned long arg) |
1514 | { | 1351 | { |
1515 | #ifdef HTB_DEBUG | 1352 | struct htb_class *cl = (struct htb_class *)arg; |
1516 | struct htb_sched *q = qdisc_priv(sch); | ||
1517 | #endif | ||
1518 | struct htb_class *cl = (struct htb_class*)arg; | ||
1519 | HTB_DBG(0,1,"htb_put q=%p cl=%X ref=%d\n",q,cl?cl->classid:0,cl?cl->refcnt:0); | ||
1520 | 1353 | ||
1521 | if (--cl->refcnt == 0) | 1354 | if (--cl->refcnt == 0) |
1522 | htb_destroy_class(sch,cl); | 1355 | htb_destroy_class(sch, cl); |
1523 | } | 1356 | } |
1524 | 1357 | ||
1525 | static int htb_change_class(struct Qdisc *sch, u32 classid, | 1358 | static int htb_change_class(struct Qdisc *sch, u32 classid, |
1526 | u32 parentid, struct rtattr **tca, unsigned long *arg) | 1359 | u32 parentid, struct rtattr **tca, |
1360 | unsigned long *arg) | ||
1527 | { | 1361 | { |
1528 | int err = -EINVAL; | 1362 | int err = -EINVAL; |
1529 | struct htb_sched *q = qdisc_priv(sch); | 1363 | struct htb_sched *q = qdisc_priv(sch); |
1530 | struct htb_class *cl = (struct htb_class*)*arg,*parent; | 1364 | struct htb_class *cl = (struct htb_class *)*arg, *parent; |
1531 | struct rtattr *opt = tca[TCA_OPTIONS-1]; | 1365 | struct rtattr *opt = tca[TCA_OPTIONS - 1]; |
1532 | struct qdisc_rate_table *rtab = NULL, *ctab = NULL; | 1366 | struct qdisc_rate_table *rtab = NULL, *ctab = NULL; |
1533 | struct rtattr *tb[TCA_HTB_RTAB]; | 1367 | struct rtattr *tb[TCA_HTB_RTAB]; |
1534 | struct tc_htb_opt *hopt; | 1368 | struct tc_htb_opt *hopt; |
1535 | 1369 | ||
1536 | /* extract all subattrs from opt attr */ | 1370 | /* extract all subattrs from opt attr */ |
1537 | if (!opt || rtattr_parse_nested(tb, TCA_HTB_RTAB, opt) || | 1371 | if (!opt || rtattr_parse_nested(tb, TCA_HTB_RTAB, opt) || |
1538 | tb[TCA_HTB_PARMS-1] == NULL || | 1372 | tb[TCA_HTB_PARMS - 1] == NULL || |
1539 | RTA_PAYLOAD(tb[TCA_HTB_PARMS-1]) < sizeof(*hopt)) | 1373 | RTA_PAYLOAD(tb[TCA_HTB_PARMS - 1]) < sizeof(*hopt)) |
1540 | goto failure; | 1374 | goto failure; |
1541 | |||
1542 | parent = parentid == TC_H_ROOT ? NULL : htb_find (parentid,sch); | ||
1543 | 1375 | ||
1544 | hopt = RTA_DATA(tb[TCA_HTB_PARMS-1]); | 1376 | parent = parentid == TC_H_ROOT ? NULL : htb_find(parentid, sch); |
1545 | HTB_DBG(0,1,"htb_chg cl=%p(%X), clid=%X, parid=%X, opt/prio=%d, rate=%u, buff=%d, quant=%d\n", cl,cl?cl->classid:0,classid,parentid,(int)hopt->prio,hopt->rate.rate,hopt->buffer,hopt->quantum); | 1377 | |
1546 | rtab = qdisc_get_rtab(&hopt->rate, tb[TCA_HTB_RTAB-1]); | 1378 | hopt = RTA_DATA(tb[TCA_HTB_PARMS - 1]); |
1547 | ctab = qdisc_get_rtab(&hopt->ceil, tb[TCA_HTB_CTAB-1]); | 1379 | |
1548 | if (!rtab || !ctab) goto failure; | 1380 | rtab = qdisc_get_rtab(&hopt->rate, tb[TCA_HTB_RTAB - 1]); |
1381 | ctab = qdisc_get_rtab(&hopt->ceil, tb[TCA_HTB_CTAB - 1]); | ||
1382 | if (!rtab || !ctab) | ||
1383 | goto failure; | ||
1549 | 1384 | ||
1550 | if (!cl) { /* new class */ | 1385 | if (!cl) { /* new class */ |
1551 | struct Qdisc *new_q; | 1386 | struct Qdisc *new_q; |
1387 | int prio; | ||
1388 | |||
1552 | /* check for valid classid */ | 1389 | /* check for valid classid */ |
1553 | if (!classid || TC_H_MAJ(classid^sch->handle) || htb_find(classid,sch)) | 1390 | if (!classid || TC_H_MAJ(classid ^ sch->handle) |
1391 | || htb_find(classid, sch)) | ||
1554 | goto failure; | 1392 | goto failure; |
1555 | 1393 | ||
1556 | /* check maximal depth */ | 1394 | /* check maximal depth */ |
@@ -1561,15 +1399,16 @@ static int htb_change_class(struct Qdisc *sch, u32 classid, | |||
1561 | err = -ENOBUFS; | 1399 | err = -ENOBUFS; |
1562 | if ((cl = kzalloc(sizeof(*cl), GFP_KERNEL)) == NULL) | 1400 | if ((cl = kzalloc(sizeof(*cl), GFP_KERNEL)) == NULL) |
1563 | goto failure; | 1401 | goto failure; |
1564 | 1402 | ||
1565 | cl->refcnt = 1; | 1403 | cl->refcnt = 1; |
1566 | INIT_LIST_HEAD(&cl->sibling); | 1404 | INIT_LIST_HEAD(&cl->sibling); |
1567 | INIT_LIST_HEAD(&cl->hlist); | 1405 | INIT_HLIST_NODE(&cl->hlist); |
1568 | INIT_LIST_HEAD(&cl->children); | 1406 | INIT_LIST_HEAD(&cl->children); |
1569 | INIT_LIST_HEAD(&cl->un.leaf.drop_list); | 1407 | INIT_LIST_HEAD(&cl->un.leaf.drop_list); |
1570 | #ifdef HTB_DEBUG | 1408 | RB_CLEAR_NODE(&cl->pq_node); |
1571 | cl->magic = HTB_CMAGIC; | 1409 | |
1572 | #endif | 1410 | for (prio = 0; prio < TC_HTB_NUMPRIO; prio++) |
1411 | RB_CLEAR_NODE(&cl->node[prio]); | ||
1573 | 1412 | ||
1574 | /* create leaf qdisc early because it uses kmalloc(GFP_KERNEL) | 1413 | /* create leaf qdisc early because it uses kmalloc(GFP_KERNEL) |
1575 | so that can't be used inside of sch_tree_lock | 1414 | so that can't be used inside of sch_tree_lock |
@@ -1579,53 +1418,53 @@ static int htb_change_class(struct Qdisc *sch, u32 classid, | |||
1579 | if (parent && !parent->level) { | 1418 | if (parent && !parent->level) { |
1580 | /* turn parent into inner node */ | 1419 | /* turn parent into inner node */ |
1581 | sch->q.qlen -= parent->un.leaf.q->q.qlen; | 1420 | sch->q.qlen -= parent->un.leaf.q->q.qlen; |
1582 | qdisc_destroy (parent->un.leaf.q); | 1421 | qdisc_destroy(parent->un.leaf.q); |
1583 | if (parent->prio_activity) | 1422 | if (parent->prio_activity) |
1584 | htb_deactivate (q,parent); | 1423 | htb_deactivate(q, parent); |
1585 | 1424 | ||
1586 | /* remove from evt list because of level change */ | 1425 | /* remove from evt list because of level change */ |
1587 | if (parent->cmode != HTB_CAN_SEND) { | 1426 | if (parent->cmode != HTB_CAN_SEND) { |
1588 | htb_safe_rb_erase(&parent->pq_node,q->wait_pq /*+0*/); | 1427 | htb_safe_rb_erase(&parent->pq_node, q->wait_pq); |
1589 | parent->cmode = HTB_CAN_SEND; | 1428 | parent->cmode = HTB_CAN_SEND; |
1590 | } | 1429 | } |
1591 | parent->level = (parent->parent ? parent->parent->level | 1430 | parent->level = (parent->parent ? parent->parent->level |
1592 | : TC_HTB_MAXDEPTH) - 1; | 1431 | : TC_HTB_MAXDEPTH) - 1; |
1593 | memset (&parent->un.inner,0,sizeof(parent->un.inner)); | 1432 | memset(&parent->un.inner, 0, sizeof(parent->un.inner)); |
1594 | } | 1433 | } |
1595 | /* leaf (we) needs elementary qdisc */ | 1434 | /* leaf (we) needs elementary qdisc */ |
1596 | cl->un.leaf.q = new_q ? new_q : &noop_qdisc; | 1435 | cl->un.leaf.q = new_q ? new_q : &noop_qdisc; |
1597 | 1436 | ||
1598 | cl->classid = classid; cl->parent = parent; | 1437 | cl->classid = classid; |
1438 | cl->parent = parent; | ||
1599 | 1439 | ||
1600 | /* set class to be in HTB_CAN_SEND state */ | 1440 | /* set class to be in HTB_CAN_SEND state */ |
1601 | cl->tokens = hopt->buffer; | 1441 | cl->tokens = hopt->buffer; |
1602 | cl->ctokens = hopt->cbuffer; | 1442 | cl->ctokens = hopt->cbuffer; |
1603 | cl->mbuffer = PSCHED_JIFFIE2US(HZ*60); /* 1min */ | 1443 | cl->mbuffer = PSCHED_JIFFIE2US(HZ * 60); /* 1min */ |
1604 | PSCHED_GET_TIME(cl->t_c); | 1444 | PSCHED_GET_TIME(cl->t_c); |
1605 | cl->cmode = HTB_CAN_SEND; | 1445 | cl->cmode = HTB_CAN_SEND; |
1606 | 1446 | ||
1607 | /* attach to the hash list and parent's family */ | 1447 | /* attach to the hash list and parent's family */ |
1608 | list_add_tail(&cl->hlist, q->hash+htb_hash(classid)); | 1448 | hlist_add_head(&cl->hlist, q->hash + htb_hash(classid)); |
1609 | list_add_tail(&cl->sibling, parent ? &parent->children : &q->root); | 1449 | list_add_tail(&cl->sibling, |
1610 | #ifdef HTB_DEBUG | 1450 | parent ? &parent->children : &q->root); |
1611 | { | 1451 | } else |
1612 | int i; | 1452 | sch_tree_lock(sch); |
1613 | for (i = 0; i < TC_HTB_NUMPRIO; i++) cl->node[i].rb_color = -1; | ||
1614 | cl->pq_node.rb_color = -1; | ||
1615 | } | ||
1616 | #endif | ||
1617 | } else sch_tree_lock(sch); | ||
1618 | 1453 | ||
1619 | /* it used to be a nasty bug here, we have to check that node | 1454 | /* it used to be a nasty bug here, we have to check that node |
1620 | is really leaf before changing cl->un.leaf ! */ | 1455 | is really leaf before changing cl->un.leaf ! */ |
1621 | if (!cl->level) { | 1456 | if (!cl->level) { |
1622 | cl->un.leaf.quantum = rtab->rate.rate / q->rate2quantum; | 1457 | cl->un.leaf.quantum = rtab->rate.rate / q->rate2quantum; |
1623 | if (!hopt->quantum && cl->un.leaf.quantum < 1000) { | 1458 | if (!hopt->quantum && cl->un.leaf.quantum < 1000) { |
1624 | printk(KERN_WARNING "HTB: quantum of class %X is small. Consider r2q change.\n", cl->classid); | 1459 | printk(KERN_WARNING |
1460 | "HTB: quantum of class %X is small. Consider r2q change.\n", | ||
1461 | cl->classid); | ||
1625 | cl->un.leaf.quantum = 1000; | 1462 | cl->un.leaf.quantum = 1000; |
1626 | } | 1463 | } |
1627 | if (!hopt->quantum && cl->un.leaf.quantum > 200000) { | 1464 | if (!hopt->quantum && cl->un.leaf.quantum > 200000) { |
1628 | printk(KERN_WARNING "HTB: quantum of class %X is big. Consider r2q change.\n", cl->classid); | 1465 | printk(KERN_WARNING |
1466 | "HTB: quantum of class %X is big. Consider r2q change.\n", | ||
1467 | cl->classid); | ||
1629 | cl->un.leaf.quantum = 200000; | 1468 | cl->un.leaf.quantum = 200000; |
1630 | } | 1469 | } |
1631 | if (hopt->quantum) | 1470 | if (hopt->quantum) |
@@ -1636,16 +1475,22 @@ static int htb_change_class(struct Qdisc *sch, u32 classid, | |||
1636 | 1475 | ||
1637 | cl->buffer = hopt->buffer; | 1476 | cl->buffer = hopt->buffer; |
1638 | cl->cbuffer = hopt->cbuffer; | 1477 | cl->cbuffer = hopt->cbuffer; |
1639 | if (cl->rate) qdisc_put_rtab(cl->rate); cl->rate = rtab; | 1478 | if (cl->rate) |
1640 | if (cl->ceil) qdisc_put_rtab(cl->ceil); cl->ceil = ctab; | 1479 | qdisc_put_rtab(cl->rate); |
1480 | cl->rate = rtab; | ||
1481 | if (cl->ceil) | ||
1482 | qdisc_put_rtab(cl->ceil); | ||
1483 | cl->ceil = ctab; | ||
1641 | sch_tree_unlock(sch); | 1484 | sch_tree_unlock(sch); |
1642 | 1485 | ||
1643 | *arg = (unsigned long)cl; | 1486 | *arg = (unsigned long)cl; |
1644 | return 0; | 1487 | return 0; |
1645 | 1488 | ||
1646 | failure: | 1489 | failure: |
1647 | if (rtab) qdisc_put_rtab(rtab); | 1490 | if (rtab) |
1648 | if (ctab) qdisc_put_rtab(ctab); | 1491 | qdisc_put_rtab(rtab); |
1492 | if (ctab) | ||
1493 | qdisc_put_rtab(ctab); | ||
1649 | return err; | 1494 | return err; |
1650 | } | 1495 | } |
1651 | 1496 | ||
@@ -1654,28 +1499,28 @@ static struct tcf_proto **htb_find_tcf(struct Qdisc *sch, unsigned long arg) | |||
1654 | struct htb_sched *q = qdisc_priv(sch); | 1499 | struct htb_sched *q = qdisc_priv(sch); |
1655 | struct htb_class *cl = (struct htb_class *)arg; | 1500 | struct htb_class *cl = (struct htb_class *)arg; |
1656 | struct tcf_proto **fl = cl ? &cl->filter_list : &q->filter_list; | 1501 | struct tcf_proto **fl = cl ? &cl->filter_list : &q->filter_list; |
1657 | HTB_DBG(0,2,"htb_tcf q=%p clid=%X fref=%d fl=%p\n",q,cl?cl->classid:0,cl?cl->filter_cnt:q->filter_cnt,*fl); | 1502 | |
1658 | return fl; | 1503 | return fl; |
1659 | } | 1504 | } |
1660 | 1505 | ||
1661 | static unsigned long htb_bind_filter(struct Qdisc *sch, unsigned long parent, | 1506 | static unsigned long htb_bind_filter(struct Qdisc *sch, unsigned long parent, |
1662 | u32 classid) | 1507 | u32 classid) |
1663 | { | 1508 | { |
1664 | struct htb_sched *q = qdisc_priv(sch); | 1509 | struct htb_sched *q = qdisc_priv(sch); |
1665 | struct htb_class *cl = htb_find (classid,sch); | 1510 | struct htb_class *cl = htb_find(classid, sch); |
1666 | HTB_DBG(0,2,"htb_bind q=%p clid=%X cl=%p fref=%d\n",q,classid,cl,cl?cl->filter_cnt:q->filter_cnt); | 1511 | |
1667 | /*if (cl && !cl->level) return 0; | 1512 | /*if (cl && !cl->level) return 0; |
1668 | The line above used to be there to prevent attaching filters to | 1513 | The line above used to be there to prevent attaching filters to |
1669 | leaves. But at least tc_index filter uses this just to get class | 1514 | leaves. But at least tc_index filter uses this just to get class |
1670 | for other reasons so that we have to allow for it. | 1515 | for other reasons so that we have to allow for it. |
1671 | ---- | 1516 | ---- |
1672 | 19.6.2002 As Werner explained it is ok - bind filter is just | 1517 | 19.6.2002 As Werner explained it is ok - bind filter is just |
1673 | another way to "lock" the class - unlike "get" this lock can | 1518 | another way to "lock" the class - unlike "get" this lock can |
1674 | be broken by class during destroy IIUC. | 1519 | be broken by class during destroy IIUC. |
1675 | */ | 1520 | */ |
1676 | if (cl) | 1521 | if (cl) |
1677 | cl->filter_cnt++; | 1522 | cl->filter_cnt++; |
1678 | else | 1523 | else |
1679 | q->filter_cnt++; | 1524 | q->filter_cnt++; |
1680 | return (unsigned long)cl; | 1525 | return (unsigned long)cl; |
1681 | } | 1526 | } |
@@ -1684,10 +1529,10 @@ static void htb_unbind_filter(struct Qdisc *sch, unsigned long arg) | |||
1684 | { | 1529 | { |
1685 | struct htb_sched *q = qdisc_priv(sch); | 1530 | struct htb_sched *q = qdisc_priv(sch); |
1686 | struct htb_class *cl = (struct htb_class *)arg; | 1531 | struct htb_class *cl = (struct htb_class *)arg; |
1687 | HTB_DBG(0,2,"htb_unbind q=%p cl=%p fref=%d\n",q,cl,cl?cl->filter_cnt:q->filter_cnt); | 1532 | |
1688 | if (cl) | 1533 | if (cl) |
1689 | cl->filter_cnt--; | 1534 | cl->filter_cnt--; |
1690 | else | 1535 | else |
1691 | q->filter_cnt--; | 1536 | q->filter_cnt--; |
1692 | } | 1537 | } |
1693 | 1538 | ||
@@ -1700,9 +1545,10 @@ static void htb_walk(struct Qdisc *sch, struct qdisc_walker *arg) | |||
1700 | return; | 1545 | return; |
1701 | 1546 | ||
1702 | for (i = 0; i < HTB_HSIZE; i++) { | 1547 | for (i = 0; i < HTB_HSIZE; i++) { |
1703 | struct list_head *p; | 1548 | struct hlist_node *p; |
1704 | list_for_each (p,q->hash+i) { | 1549 | struct htb_class *cl; |
1705 | struct htb_class *cl = list_entry(p,struct htb_class,hlist); | 1550 | |
1551 | hlist_for_each_entry(cl, p, q->hash + i, hlist) { | ||
1706 | if (arg->count < arg->skip) { | 1552 | if (arg->count < arg->skip) { |
1707 | arg->count++; | 1553 | arg->count++; |
1708 | continue; | 1554 | continue; |
@@ -1750,12 +1596,13 @@ static struct Qdisc_ops htb_qdisc_ops = { | |||
1750 | 1596 | ||
1751 | static int __init htb_module_init(void) | 1597 | static int __init htb_module_init(void) |
1752 | { | 1598 | { |
1753 | return register_qdisc(&htb_qdisc_ops); | 1599 | return register_qdisc(&htb_qdisc_ops); |
1754 | } | 1600 | } |
1755 | static void __exit htb_module_exit(void) | 1601 | static void __exit htb_module_exit(void) |
1756 | { | 1602 | { |
1757 | unregister_qdisc(&htb_qdisc_ops); | 1603 | unregister_qdisc(&htb_qdisc_ops); |
1758 | } | 1604 | } |
1605 | |||
1759 | module_init(htb_module_init) | 1606 | module_init(htb_module_init) |
1760 | module_exit(htb_module_exit) | 1607 | module_exit(htb_module_exit) |
1761 | MODULE_LICENSE("GPL"); | 1608 | MODULE_LICENSE("GPL"); |
diff --git a/net/sched/sch_netem.c b/net/sched/sch_netem.c index a08ec4c7c55d..45939bafbdf8 100644 --- a/net/sched/sch_netem.c +++ b/net/sched/sch_netem.c | |||
@@ -192,8 +192,8 @@ static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch) | |||
192 | */ | 192 | */ |
193 | if (q->corrupt && q->corrupt >= get_crandom(&q->corrupt_cor)) { | 193 | if (q->corrupt && q->corrupt >= get_crandom(&q->corrupt_cor)) { |
194 | if (!(skb = skb_unshare(skb, GFP_ATOMIC)) | 194 | if (!(skb = skb_unshare(skb, GFP_ATOMIC)) |
195 | || (skb->ip_summed == CHECKSUM_HW | 195 | || (skb->ip_summed == CHECKSUM_PARTIAL |
196 | && skb_checksum_help(skb, 0))) { | 196 | && skb_checksum_help(skb))) { |
197 | sch->qstats.drops++; | 197 | sch->qstats.drops++; |
198 | return NET_XMIT_DROP; | 198 | return NET_XMIT_DROP; |
199 | } | 199 | } |
diff --git a/net/sctp/input.c b/net/sctp/input.c index 42b66e74bbb5..03f65de75d88 100644 --- a/net/sctp/input.c +++ b/net/sctp/input.c | |||
@@ -228,7 +228,7 @@ int sctp_rcv(struct sk_buff *skb) | |||
228 | goto discard_release; | 228 | goto discard_release; |
229 | nf_reset(skb); | 229 | nf_reset(skb); |
230 | 230 | ||
231 | if (sk_filter(sk, skb, 1)) | 231 | if (sk_filter(sk, skb)) |
232 | goto discard_release; | 232 | goto discard_release; |
233 | 233 | ||
234 | /* Create an SCTP packet structure. */ | 234 | /* Create an SCTP packet structure. */ |
@@ -255,10 +255,13 @@ int sctp_rcv(struct sk_buff *skb) | |||
255 | */ | 255 | */ |
256 | sctp_bh_lock_sock(sk); | 256 | sctp_bh_lock_sock(sk); |
257 | 257 | ||
258 | if (sock_owned_by_user(sk)) | 258 | if (sock_owned_by_user(sk)) { |
259 | SCTP_INC_STATS_BH(SCTP_MIB_IN_PKT_BACKLOG); | ||
259 | sctp_add_backlog(sk, skb); | 260 | sctp_add_backlog(sk, skb); |
260 | else | 261 | } else { |
262 | SCTP_INC_STATS_BH(SCTP_MIB_IN_PKT_SOFTIRQ); | ||
261 | sctp_inq_push(&chunk->rcvr->inqueue, chunk); | 263 | sctp_inq_push(&chunk->rcvr->inqueue, chunk); |
264 | } | ||
262 | 265 | ||
263 | sctp_bh_unlock_sock(sk); | 266 | sctp_bh_unlock_sock(sk); |
264 | 267 | ||
@@ -271,6 +274,7 @@ int sctp_rcv(struct sk_buff *skb) | |||
271 | return 0; | 274 | return 0; |
272 | 275 | ||
273 | discard_it: | 276 | discard_it: |
277 | SCTP_INC_STATS_BH(SCTP_MIB_IN_PKT_DISCARDS); | ||
274 | kfree_skb(skb); | 278 | kfree_skb(skb); |
275 | return 0; | 279 | return 0; |
276 | 280 | ||
diff --git a/net/sctp/inqueue.c b/net/sctp/inqueue.c index cf0c767d43ae..cf6deed7e849 100644 --- a/net/sctp/inqueue.c +++ b/net/sctp/inqueue.c | |||
@@ -87,7 +87,7 @@ void sctp_inq_free(struct sctp_inq *queue) | |||
87 | /* Put a new packet in an SCTP inqueue. | 87 | /* Put a new packet in an SCTP inqueue. |
88 | * We assume that packet->sctp_hdr is set and in host byte order. | 88 | * We assume that packet->sctp_hdr is set and in host byte order. |
89 | */ | 89 | */ |
90 | void sctp_inq_push(struct sctp_inq *q, struct sctp_chunk *packet) | 90 | void sctp_inq_push(struct sctp_inq *q, struct sctp_chunk *chunk) |
91 | { | 91 | { |
92 | /* Directly call the packet handling routine. */ | 92 | /* Directly call the packet handling routine. */ |
93 | 93 | ||
@@ -96,7 +96,7 @@ void sctp_inq_push(struct sctp_inq *q, struct sctp_chunk *packet) | |||
96 | * Eventually, we should clean up inqueue to not rely | 96 | * Eventually, we should clean up inqueue to not rely |
97 | * on the BH related data structures. | 97 | * on the BH related data structures. |
98 | */ | 98 | */ |
99 | list_add_tail(&packet->list, &q->in_chunk_list); | 99 | list_add_tail(&chunk->list, &q->in_chunk_list); |
100 | q->immediate.func(q->immediate.data); | 100 | q->immediate.func(q->immediate.data); |
101 | } | 101 | } |
102 | 102 | ||
diff --git a/net/sctp/ipv6.c b/net/sctp/ipv6.c index 99c0cefc04e0..249e5033c1a8 100644 --- a/net/sctp/ipv6.c +++ b/net/sctp/ipv6.c | |||
@@ -78,7 +78,6 @@ | |||
78 | 78 | ||
79 | #include <asm/uaccess.h> | 79 | #include <asm/uaccess.h> |
80 | 80 | ||
81 | extern int sctp_inetaddr_event(struct notifier_block *, unsigned long, void *); | ||
82 | static struct notifier_block sctp_inet6addr_notifier = { | 81 | static struct notifier_block sctp_inet6addr_notifier = { |
83 | .notifier_call = sctp_inetaddr_event, | 82 | .notifier_call = sctp_inetaddr_event, |
84 | }; | 83 | }; |
@@ -322,9 +321,9 @@ static void sctp_v6_copy_addrlist(struct list_head *addrlist, | |||
322 | struct inet6_ifaddr *ifp; | 321 | struct inet6_ifaddr *ifp; |
323 | struct sctp_sockaddr_entry *addr; | 322 | struct sctp_sockaddr_entry *addr; |
324 | 323 | ||
325 | read_lock(&addrconf_lock); | 324 | rcu_read_lock(); |
326 | if ((in6_dev = __in6_dev_get(dev)) == NULL) { | 325 | if ((in6_dev = __in6_dev_get(dev)) == NULL) { |
327 | read_unlock(&addrconf_lock); | 326 | rcu_read_unlock(); |
328 | return; | 327 | return; |
329 | } | 328 | } |
330 | 329 | ||
@@ -343,7 +342,7 @@ static void sctp_v6_copy_addrlist(struct list_head *addrlist, | |||
343 | } | 342 | } |
344 | 343 | ||
345 | read_unlock(&in6_dev->lock); | 344 | read_unlock(&in6_dev->lock); |
346 | read_unlock(&addrconf_lock); | 345 | rcu_read_unlock(); |
347 | } | 346 | } |
348 | 347 | ||
349 | /* Initialize a sockaddr_storage from in incoming skb. */ | 348 | /* Initialize a sockaddr_storage from in incoming skb. */ |
diff --git a/net/sctp/outqueue.c b/net/sctp/outqueue.c index 30b710c54e64..37074a39ecbb 100644 --- a/net/sctp/outqueue.c +++ b/net/sctp/outqueue.c | |||
@@ -467,6 +467,7 @@ void sctp_retransmit(struct sctp_outq *q, struct sctp_transport *transport, | |||
467 | 467 | ||
468 | switch(reason) { | 468 | switch(reason) { |
469 | case SCTP_RTXR_T3_RTX: | 469 | case SCTP_RTXR_T3_RTX: |
470 | SCTP_INC_STATS(SCTP_MIB_T3_RETRANSMITS); | ||
470 | sctp_transport_lower_cwnd(transport, SCTP_LOWER_CWND_T3_RTX); | 471 | sctp_transport_lower_cwnd(transport, SCTP_LOWER_CWND_T3_RTX); |
471 | /* Update the retran path if the T3-rtx timer has expired for | 472 | /* Update the retran path if the T3-rtx timer has expired for |
472 | * the current retran path. | 473 | * the current retran path. |
@@ -475,12 +476,15 @@ void sctp_retransmit(struct sctp_outq *q, struct sctp_transport *transport, | |||
475 | sctp_assoc_update_retran_path(transport->asoc); | 476 | sctp_assoc_update_retran_path(transport->asoc); |
476 | break; | 477 | break; |
477 | case SCTP_RTXR_FAST_RTX: | 478 | case SCTP_RTXR_FAST_RTX: |
479 | SCTP_INC_STATS(SCTP_MIB_FAST_RETRANSMITS); | ||
478 | sctp_transport_lower_cwnd(transport, SCTP_LOWER_CWND_FAST_RTX); | 480 | sctp_transport_lower_cwnd(transport, SCTP_LOWER_CWND_FAST_RTX); |
479 | fast_retransmit = 1; | 481 | fast_retransmit = 1; |
480 | break; | 482 | break; |
481 | case SCTP_RTXR_PMTUD: | 483 | case SCTP_RTXR_PMTUD: |
482 | default: | 484 | SCTP_INC_STATS(SCTP_MIB_PMTUD_RETRANSMITS); |
483 | break; | 485 | break; |
486 | default: | ||
487 | BUG(); | ||
484 | } | 488 | } |
485 | 489 | ||
486 | sctp_retransmit_mark(q, transport, fast_retransmit); | 490 | sctp_retransmit_mark(q, transport, fast_retransmit); |
diff --git a/net/sctp/proc.c b/net/sctp/proc.c index 5b3b0e0ae7e5..a356d8d310a9 100644 --- a/net/sctp/proc.c +++ b/net/sctp/proc.c | |||
@@ -57,6 +57,21 @@ static struct snmp_mib sctp_snmp_list[] = { | |||
57 | SNMP_MIB_ITEM("SctpReasmUsrMsgs", SCTP_MIB_REASMUSRMSGS), | 57 | SNMP_MIB_ITEM("SctpReasmUsrMsgs", SCTP_MIB_REASMUSRMSGS), |
58 | SNMP_MIB_ITEM("SctpOutSCTPPacks", SCTP_MIB_OUTSCTPPACKS), | 58 | SNMP_MIB_ITEM("SctpOutSCTPPacks", SCTP_MIB_OUTSCTPPACKS), |
59 | SNMP_MIB_ITEM("SctpInSCTPPacks", SCTP_MIB_INSCTPPACKS), | 59 | SNMP_MIB_ITEM("SctpInSCTPPacks", SCTP_MIB_INSCTPPACKS), |
60 | SNMP_MIB_ITEM("SctpT1InitExpireds", SCTP_MIB_T1_INIT_EXPIREDS), | ||
61 | SNMP_MIB_ITEM("SctpT1CookieExpireds", SCTP_MIB_T1_COOKIE_EXPIREDS), | ||
62 | SNMP_MIB_ITEM("SctpT2ShutdownExpireds", SCTP_MIB_T2_SHUTDOWN_EXPIREDS), | ||
63 | SNMP_MIB_ITEM("SctpT3RtxExpireds", SCTP_MIB_T3_RTX_EXPIREDS), | ||
64 | SNMP_MIB_ITEM("SctpT4RtoExpireds", SCTP_MIB_T4_RTO_EXPIREDS), | ||
65 | SNMP_MIB_ITEM("SctpT5ShutdownGuardExpireds", SCTP_MIB_T5_SHUTDOWN_GUARD_EXPIREDS), | ||
66 | SNMP_MIB_ITEM("SctpDelaySackExpireds", SCTP_MIB_DELAY_SACK_EXPIREDS), | ||
67 | SNMP_MIB_ITEM("SctpAutocloseExpireds", SCTP_MIB_AUTOCLOSE_EXPIREDS), | ||
68 | SNMP_MIB_ITEM("SctpT3Retransmits", SCTP_MIB_T3_RETRANSMITS), | ||
69 | SNMP_MIB_ITEM("SctpPmtudRetransmits", SCTP_MIB_PMTUD_RETRANSMITS), | ||
70 | SNMP_MIB_ITEM("SctpFastRetransmits", SCTP_MIB_FAST_RETRANSMITS), | ||
71 | SNMP_MIB_ITEM("SctpInPktSoftirq", SCTP_MIB_IN_PKT_SOFTIRQ), | ||
72 | SNMP_MIB_ITEM("SctpInPktBacklog", SCTP_MIB_IN_PKT_BACKLOG), | ||
73 | SNMP_MIB_ITEM("SctpInPktDiscards", SCTP_MIB_IN_PKT_DISCARDS), | ||
74 | SNMP_MIB_ITEM("SctpInDataChunkDiscards", SCTP_MIB_IN_DATA_CHUNK_DISCARDS), | ||
60 | SNMP_MIB_SENTINEL | 75 | SNMP_MIB_SENTINEL |
61 | }; | 76 | }; |
62 | 77 | ||
@@ -328,8 +343,8 @@ static int sctp_assocs_seq_show(struct seq_file *seq, void *v) | |||
328 | "%8p %8p %-3d %-3d %-2d %-4d %4d %8d %8d %7d %5lu %-5d %5d ", | 343 | "%8p %8p %-3d %-3d %-2d %-4d %4d %8d %8d %7d %5lu %-5d %5d ", |
329 | assoc, sk, sctp_sk(sk)->type, sk->sk_state, | 344 | assoc, sk, sctp_sk(sk)->type, sk->sk_state, |
330 | assoc->state, hash, assoc->assoc_id, | 345 | assoc->state, hash, assoc->assoc_id, |
331 | (sk->sk_rcvbuf - assoc->rwnd), | ||
332 | assoc->sndbuf_used, | 346 | assoc->sndbuf_used, |
347 | (sk->sk_rcvbuf - assoc->rwnd), | ||
333 | sock_i_uid(sk), sock_i_ino(sk), | 348 | sock_i_uid(sk), sock_i_ino(sk), |
334 | epb->bind_addr.port, | 349 | epb->bind_addr.port, |
335 | assoc->peer.port); | 350 | assoc->peer.port); |
diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c index 1ab03a27a76e..fac7674438a4 100644 --- a/net/sctp/protocol.c +++ b/net/sctp/protocol.c | |||
@@ -61,7 +61,7 @@ | |||
61 | #include <net/inet_ecn.h> | 61 | #include <net/inet_ecn.h> |
62 | 62 | ||
63 | /* Global data structures. */ | 63 | /* Global data structures. */ |
64 | struct sctp_globals sctp_globals; | 64 | struct sctp_globals sctp_globals __read_mostly; |
65 | struct proc_dir_entry *proc_net_sctp; | 65 | struct proc_dir_entry *proc_net_sctp; |
66 | DEFINE_SNMP_STAT(struct sctp_mib, sctp_statistics) __read_mostly; | 66 | DEFINE_SNMP_STAT(struct sctp_mib, sctp_statistics) __read_mostly; |
67 | 67 | ||
@@ -82,13 +82,6 @@ static struct sctp_af *sctp_af_v6_specific; | |||
82 | kmem_cache_t *sctp_chunk_cachep __read_mostly; | 82 | kmem_cache_t *sctp_chunk_cachep __read_mostly; |
83 | kmem_cache_t *sctp_bucket_cachep __read_mostly; | 83 | kmem_cache_t *sctp_bucket_cachep __read_mostly; |
84 | 84 | ||
85 | extern int sctp_snmp_proc_init(void); | ||
86 | extern int sctp_snmp_proc_exit(void); | ||
87 | extern int sctp_eps_proc_init(void); | ||
88 | extern int sctp_eps_proc_exit(void); | ||
89 | extern int sctp_assocs_proc_init(void); | ||
90 | extern int sctp_assocs_proc_exit(void); | ||
91 | |||
92 | /* Return the address of the control sock. */ | 85 | /* Return the address of the control sock. */ |
93 | struct sock *sctp_get_ctl_sock(void) | 86 | struct sock *sctp_get_ctl_sock(void) |
94 | { | 87 | { |
@@ -1049,7 +1042,7 @@ SCTP_STATIC __init int sctp_init(void) | |||
1049 | sctp_rto_beta = SCTP_RTO_BETA; | 1042 | sctp_rto_beta = SCTP_RTO_BETA; |
1050 | 1043 | ||
1051 | /* Valid.Cookie.Life - 60 seconds */ | 1044 | /* Valid.Cookie.Life - 60 seconds */ |
1052 | sctp_valid_cookie_life = 60 * HZ; | 1045 | sctp_valid_cookie_life = SCTP_DEFAULT_COOKIE_LIFE; |
1053 | 1046 | ||
1054 | /* Whether Cookie Preservative is enabled(1) or not(0) */ | 1047 | /* Whether Cookie Preservative is enabled(1) or not(0) */ |
1055 | sctp_cookie_preserve_enable = 1; | 1048 | sctp_cookie_preserve_enable = 1; |
diff --git a/net/sctp/sm_statefuns.c b/net/sctp/sm_statefuns.c index 5b5ae7958322..1c42fe983a5b 100644 --- a/net/sctp/sm_statefuns.c +++ b/net/sctp/sm_statefuns.c | |||
@@ -187,10 +187,9 @@ sctp_disposition_t sctp_sf_do_4_C(const struct sctp_endpoint *ep, | |||
187 | */ | 187 | */ |
188 | ev = sctp_ulpevent_make_assoc_change(asoc, 0, SCTP_SHUTDOWN_COMP, | 188 | ev = sctp_ulpevent_make_assoc_change(asoc, 0, SCTP_SHUTDOWN_COMP, |
189 | 0, 0, 0, GFP_ATOMIC); | 189 | 0, 0, 0, GFP_ATOMIC); |
190 | if (!ev) | 190 | if (ev) |
191 | goto nomem; | 191 | sctp_add_cmd_sf(commands, SCTP_CMD_EVENT_ULP, |
192 | 192 | SCTP_ULPEVENT(ev)); | |
193 | sctp_add_cmd_sf(commands, SCTP_CMD_EVENT_ULP, SCTP_ULPEVENT(ev)); | ||
194 | 193 | ||
195 | /* Upon reception of the SHUTDOWN COMPLETE chunk the endpoint | 194 | /* Upon reception of the SHUTDOWN COMPLETE chunk the endpoint |
196 | * will verify that it is in SHUTDOWN-ACK-SENT state, if it is | 195 | * will verify that it is in SHUTDOWN-ACK-SENT state, if it is |
@@ -215,9 +214,6 @@ sctp_disposition_t sctp_sf_do_4_C(const struct sctp_endpoint *ep, | |||
215 | sctp_add_cmd_sf(commands, SCTP_CMD_DELETE_TCB, SCTP_NULL()); | 214 | sctp_add_cmd_sf(commands, SCTP_CMD_DELETE_TCB, SCTP_NULL()); |
216 | 215 | ||
217 | return SCTP_DISPOSITION_DELETE_TCB; | 216 | return SCTP_DISPOSITION_DELETE_TCB; |
218 | |||
219 | nomem: | ||
220 | return SCTP_DISPOSITION_NOMEM; | ||
221 | } | 217 | } |
222 | 218 | ||
223 | /* | 219 | /* |
@@ -347,8 +343,6 @@ sctp_disposition_t sctp_sf_do_5_1B_init(const struct sctp_endpoint *ep, | |||
347 | GFP_ATOMIC)) | 343 | GFP_ATOMIC)) |
348 | goto nomem_init; | 344 | goto nomem_init; |
349 | 345 | ||
350 | sctp_add_cmd_sf(commands, SCTP_CMD_NEW_ASOC, SCTP_ASOC(new_asoc)); | ||
351 | |||
352 | /* B) "Z" shall respond immediately with an INIT ACK chunk. */ | 346 | /* B) "Z" shall respond immediately with an INIT ACK chunk. */ |
353 | 347 | ||
354 | /* If there are errors need to be reported for unknown parameters, | 348 | /* If there are errors need to be reported for unknown parameters, |
@@ -360,11 +354,11 @@ sctp_disposition_t sctp_sf_do_5_1B_init(const struct sctp_endpoint *ep, | |||
360 | sizeof(sctp_chunkhdr_t); | 354 | sizeof(sctp_chunkhdr_t); |
361 | 355 | ||
362 | if (sctp_assoc_set_bind_addr_from_ep(new_asoc, GFP_ATOMIC) < 0) | 356 | if (sctp_assoc_set_bind_addr_from_ep(new_asoc, GFP_ATOMIC) < 0) |
363 | goto nomem_ack; | 357 | goto nomem_init; |
364 | 358 | ||
365 | repl = sctp_make_init_ack(new_asoc, chunk, GFP_ATOMIC, len); | 359 | repl = sctp_make_init_ack(new_asoc, chunk, GFP_ATOMIC, len); |
366 | if (!repl) | 360 | if (!repl) |
367 | goto nomem_ack; | 361 | goto nomem_init; |
368 | 362 | ||
369 | /* If there are errors need to be reported for unknown parameters, | 363 | /* If there are errors need to be reported for unknown parameters, |
370 | * include them in the outgoing INIT ACK as "Unrecognized parameter" | 364 | * include them in the outgoing INIT ACK as "Unrecognized parameter" |
@@ -388,6 +382,8 @@ sctp_disposition_t sctp_sf_do_5_1B_init(const struct sctp_endpoint *ep, | |||
388 | sctp_chunk_free(err_chunk); | 382 | sctp_chunk_free(err_chunk); |
389 | } | 383 | } |
390 | 384 | ||
385 | sctp_add_cmd_sf(commands, SCTP_CMD_NEW_ASOC, SCTP_ASOC(new_asoc)); | ||
386 | |||
391 | sctp_add_cmd_sf(commands, SCTP_CMD_REPLY, SCTP_CHUNK(repl)); | 387 | sctp_add_cmd_sf(commands, SCTP_CMD_REPLY, SCTP_CHUNK(repl)); |
392 | 388 | ||
393 | /* | 389 | /* |
@@ -400,12 +396,11 @@ sctp_disposition_t sctp_sf_do_5_1B_init(const struct sctp_endpoint *ep, | |||
400 | 396 | ||
401 | return SCTP_DISPOSITION_DELETE_TCB; | 397 | return SCTP_DISPOSITION_DELETE_TCB; |
402 | 398 | ||
403 | nomem_ack: | ||
404 | if (err_chunk) | ||
405 | sctp_chunk_free(err_chunk); | ||
406 | nomem_init: | 399 | nomem_init: |
407 | sctp_association_free(new_asoc); | 400 | sctp_association_free(new_asoc); |
408 | nomem: | 401 | nomem: |
402 | if (err_chunk) | ||
403 | sctp_chunk_free(err_chunk); | ||
409 | return SCTP_DISPOSITION_NOMEM; | 404 | return SCTP_DISPOSITION_NOMEM; |
410 | } | 405 | } |
411 | 406 | ||
@@ -600,7 +595,7 @@ sctp_disposition_t sctp_sf_do_5_1D_ce(const struct sctp_endpoint *ep, | |||
600 | struct sctp_association *new_asoc; | 595 | struct sctp_association *new_asoc; |
601 | sctp_init_chunk_t *peer_init; | 596 | sctp_init_chunk_t *peer_init; |
602 | struct sctp_chunk *repl; | 597 | struct sctp_chunk *repl; |
603 | struct sctp_ulpevent *ev; | 598 | struct sctp_ulpevent *ev, *ai_ev = NULL; |
604 | int error = 0; | 599 | int error = 0; |
605 | struct sctp_chunk *err_chk_p; | 600 | struct sctp_chunk *err_chk_p; |
606 | 601 | ||
@@ -659,20 +654,10 @@ sctp_disposition_t sctp_sf_do_5_1D_ce(const struct sctp_endpoint *ep, | |||
659 | }; | 654 | }; |
660 | } | 655 | } |
661 | 656 | ||
662 | sctp_add_cmd_sf(commands, SCTP_CMD_NEW_ASOC, SCTP_ASOC(new_asoc)); | ||
663 | sctp_add_cmd_sf(commands, SCTP_CMD_NEW_STATE, | ||
664 | SCTP_STATE(SCTP_STATE_ESTABLISHED)); | ||
665 | SCTP_INC_STATS(SCTP_MIB_CURRESTAB); | ||
666 | SCTP_INC_STATS(SCTP_MIB_PASSIVEESTABS); | ||
667 | sctp_add_cmd_sf(commands, SCTP_CMD_HB_TIMERS_START, SCTP_NULL()); | ||
668 | 657 | ||
669 | if (new_asoc->autoclose) | 658 | /* Delay state machine commands until later. |
670 | sctp_add_cmd_sf(commands, SCTP_CMD_TIMER_START, | 659 | * |
671 | SCTP_TO(SCTP_EVENT_TIMEOUT_AUTOCLOSE)); | 660 | * Re-build the bind address for the association is done in |
672 | |||
673 | sctp_add_cmd_sf(commands, SCTP_CMD_TRANSMIT, SCTP_NULL()); | ||
674 | |||
675 | /* Re-build the bind address for the association is done in | ||
676 | * the sctp_unpack_cookie() already. | 661 | * the sctp_unpack_cookie() already. |
677 | */ | 662 | */ |
678 | /* This is a brand-new association, so these are not yet side | 663 | /* This is a brand-new association, so these are not yet side |
@@ -687,9 +672,7 @@ sctp_disposition_t sctp_sf_do_5_1D_ce(const struct sctp_endpoint *ep, | |||
687 | 672 | ||
688 | repl = sctp_make_cookie_ack(new_asoc, chunk); | 673 | repl = sctp_make_cookie_ack(new_asoc, chunk); |
689 | if (!repl) | 674 | if (!repl) |
690 | goto nomem_repl; | 675 | goto nomem_init; |
691 | |||
692 | sctp_add_cmd_sf(commands, SCTP_CMD_REPLY, SCTP_CHUNK(repl)); | ||
693 | 676 | ||
694 | /* RFC 2960 5.1 Normal Establishment of an Association | 677 | /* RFC 2960 5.1 Normal Establishment of an Association |
695 | * | 678 | * |
@@ -704,28 +687,53 @@ sctp_disposition_t sctp_sf_do_5_1D_ce(const struct sctp_endpoint *ep, | |||
704 | if (!ev) | 687 | if (!ev) |
705 | goto nomem_ev; | 688 | goto nomem_ev; |
706 | 689 | ||
707 | sctp_add_cmd_sf(commands, SCTP_CMD_EVENT_ULP, SCTP_ULPEVENT(ev)); | ||
708 | |||
709 | /* Sockets API Draft Section 5.3.1.6 | 690 | /* Sockets API Draft Section 5.3.1.6 |
710 | * When a peer sends a Adaption Layer Indication parameter , SCTP | 691 | * When a peer sends a Adaption Layer Indication parameter , SCTP |
711 | * delivers this notification to inform the application that of the | 692 | * delivers this notification to inform the application that of the |
712 | * peers requested adaption layer. | 693 | * peers requested adaption layer. |
713 | */ | 694 | */ |
714 | if (new_asoc->peer.adaption_ind) { | 695 | if (new_asoc->peer.adaption_ind) { |
715 | ev = sctp_ulpevent_make_adaption_indication(new_asoc, | 696 | ai_ev = sctp_ulpevent_make_adaption_indication(new_asoc, |
716 | GFP_ATOMIC); | 697 | GFP_ATOMIC); |
717 | if (!ev) | 698 | if (!ai_ev) |
718 | goto nomem_ev; | 699 | goto nomem_aiev; |
700 | } | ||
701 | |||
702 | /* Add all the state machine commands now since we've created | ||
703 | * everything. This way we don't introduce memory corruptions | ||
704 | * during side-effect processing and correclty count established | ||
705 | * associations. | ||
706 | */ | ||
707 | sctp_add_cmd_sf(commands, SCTP_CMD_NEW_ASOC, SCTP_ASOC(new_asoc)); | ||
708 | sctp_add_cmd_sf(commands, SCTP_CMD_NEW_STATE, | ||
709 | SCTP_STATE(SCTP_STATE_ESTABLISHED)); | ||
710 | SCTP_INC_STATS(SCTP_MIB_CURRESTAB); | ||
711 | SCTP_INC_STATS(SCTP_MIB_PASSIVEESTABS); | ||
712 | sctp_add_cmd_sf(commands, SCTP_CMD_HB_TIMERS_START, SCTP_NULL()); | ||
713 | |||
714 | if (new_asoc->autoclose) | ||
715 | sctp_add_cmd_sf(commands, SCTP_CMD_TIMER_START, | ||
716 | SCTP_TO(SCTP_EVENT_TIMEOUT_AUTOCLOSE)); | ||
719 | 717 | ||
718 | sctp_add_cmd_sf(commands, SCTP_CMD_TRANSMIT, SCTP_NULL()); | ||
719 | |||
720 | /* This will send the COOKIE ACK */ | ||
721 | sctp_add_cmd_sf(commands, SCTP_CMD_REPLY, SCTP_CHUNK(repl)); | ||
722 | |||
723 | /* Queue the ASSOC_CHANGE event */ | ||
724 | sctp_add_cmd_sf(commands, SCTP_CMD_EVENT_ULP, SCTP_ULPEVENT(ev)); | ||
725 | |||
726 | /* Send up the Adaptation Layer Indication event */ | ||
727 | if (ai_ev) | ||
720 | sctp_add_cmd_sf(commands, SCTP_CMD_EVENT_ULP, | 728 | sctp_add_cmd_sf(commands, SCTP_CMD_EVENT_ULP, |
721 | SCTP_ULPEVENT(ev)); | 729 | SCTP_ULPEVENT(ai_ev)); |
722 | } | ||
723 | 730 | ||
724 | return SCTP_DISPOSITION_CONSUME; | 731 | return SCTP_DISPOSITION_CONSUME; |
725 | 732 | ||
733 | nomem_aiev: | ||
734 | sctp_ulpevent_free(ev); | ||
726 | nomem_ev: | 735 | nomem_ev: |
727 | sctp_chunk_free(repl); | 736 | sctp_chunk_free(repl); |
728 | nomem_repl: | ||
729 | nomem_init: | 737 | nomem_init: |
730 | sctp_association_free(new_asoc); | 738 | sctp_association_free(new_asoc); |
731 | nomem: | 739 | nomem: |
@@ -1360,10 +1368,8 @@ static sctp_disposition_t sctp_sf_do_unexpected_init( | |||
1360 | if (!sctp_process_init(new_asoc, chunk->chunk_hdr->type, | 1368 | if (!sctp_process_init(new_asoc, chunk->chunk_hdr->type, |
1361 | sctp_source(chunk), | 1369 | sctp_source(chunk), |
1362 | (sctp_init_chunk_t *)chunk->chunk_hdr, | 1370 | (sctp_init_chunk_t *)chunk->chunk_hdr, |
1363 | GFP_ATOMIC)) { | 1371 | GFP_ATOMIC)) |
1364 | retval = SCTP_DISPOSITION_NOMEM; | 1372 | goto nomem; |
1365 | goto nomem_init; | ||
1366 | } | ||
1367 | 1373 | ||
1368 | /* Make sure no new addresses are being added during the | 1374 | /* Make sure no new addresses are being added during the |
1369 | * restart. Do not do this check for COOKIE-WAIT state, | 1375 | * restart. Do not do this check for COOKIE-WAIT state, |
@@ -1374,7 +1380,7 @@ static sctp_disposition_t sctp_sf_do_unexpected_init( | |||
1374 | if (!sctp_sf_check_restart_addrs(new_asoc, asoc, chunk, | 1380 | if (!sctp_sf_check_restart_addrs(new_asoc, asoc, chunk, |
1375 | commands)) { | 1381 | commands)) { |
1376 | retval = SCTP_DISPOSITION_CONSUME; | 1382 | retval = SCTP_DISPOSITION_CONSUME; |
1377 | goto cleanup_asoc; | 1383 | goto nomem_retval; |
1378 | } | 1384 | } |
1379 | } | 1385 | } |
1380 | 1386 | ||
@@ -1430,17 +1436,17 @@ static sctp_disposition_t sctp_sf_do_unexpected_init( | |||
1430 | sctp_add_cmd_sf(commands, SCTP_CMD_DELETE_TCB, SCTP_NULL()); | 1436 | sctp_add_cmd_sf(commands, SCTP_CMD_DELETE_TCB, SCTP_NULL()); |
1431 | retval = SCTP_DISPOSITION_CONSUME; | 1437 | retval = SCTP_DISPOSITION_CONSUME; |
1432 | 1438 | ||
1439 | return retval; | ||
1440 | |||
1441 | nomem: | ||
1442 | retval = SCTP_DISPOSITION_NOMEM; | ||
1443 | nomem_retval: | ||
1444 | if (new_asoc) | ||
1445 | sctp_association_free(new_asoc); | ||
1433 | cleanup: | 1446 | cleanup: |
1434 | if (err_chunk) | 1447 | if (err_chunk) |
1435 | sctp_chunk_free(err_chunk); | 1448 | sctp_chunk_free(err_chunk); |
1436 | return retval; | 1449 | return retval; |
1437 | nomem: | ||
1438 | retval = SCTP_DISPOSITION_NOMEM; | ||
1439 | goto cleanup; | ||
1440 | nomem_init: | ||
1441 | cleanup_asoc: | ||
1442 | sctp_association_free(new_asoc); | ||
1443 | goto cleanup; | ||
1444 | } | 1450 | } |
1445 | 1451 | ||
1446 | /* | 1452 | /* |
@@ -1611,15 +1617,10 @@ static sctp_disposition_t sctp_sf_do_dupcook_a(const struct sctp_endpoint *ep, | |||
1611 | */ | 1617 | */ |
1612 | sctp_add_cmd_sf(commands, SCTP_CMD_PURGE_OUTQUEUE, SCTP_NULL()); | 1618 | sctp_add_cmd_sf(commands, SCTP_CMD_PURGE_OUTQUEUE, SCTP_NULL()); |
1613 | 1619 | ||
1614 | /* Update the content of current association. */ | ||
1615 | sctp_add_cmd_sf(commands, SCTP_CMD_UPDATE_ASSOC, SCTP_ASOC(new_asoc)); | ||
1616 | |||
1617 | repl = sctp_make_cookie_ack(new_asoc, chunk); | 1620 | repl = sctp_make_cookie_ack(new_asoc, chunk); |
1618 | if (!repl) | 1621 | if (!repl) |
1619 | goto nomem; | 1622 | goto nomem; |
1620 | 1623 | ||
1621 | sctp_add_cmd_sf(commands, SCTP_CMD_REPLY, SCTP_CHUNK(repl)); | ||
1622 | |||
1623 | /* Report association restart to upper layer. */ | 1624 | /* Report association restart to upper layer. */ |
1624 | ev = sctp_ulpevent_make_assoc_change(asoc, 0, SCTP_RESTART, 0, | 1625 | ev = sctp_ulpevent_make_assoc_change(asoc, 0, SCTP_RESTART, 0, |
1625 | new_asoc->c.sinit_num_ostreams, | 1626 | new_asoc->c.sinit_num_ostreams, |
@@ -1628,6 +1629,9 @@ static sctp_disposition_t sctp_sf_do_dupcook_a(const struct sctp_endpoint *ep, | |||
1628 | if (!ev) | 1629 | if (!ev) |
1629 | goto nomem_ev; | 1630 | goto nomem_ev; |
1630 | 1631 | ||
1632 | /* Update the content of current association. */ | ||
1633 | sctp_add_cmd_sf(commands, SCTP_CMD_UPDATE_ASSOC, SCTP_ASOC(new_asoc)); | ||
1634 | sctp_add_cmd_sf(commands, SCTP_CMD_REPLY, SCTP_CHUNK(repl)); | ||
1631 | sctp_add_cmd_sf(commands, SCTP_CMD_EVENT_ULP, SCTP_ULPEVENT(ev)); | 1635 | sctp_add_cmd_sf(commands, SCTP_CMD_EVENT_ULP, SCTP_ULPEVENT(ev)); |
1632 | return SCTP_DISPOSITION_CONSUME; | 1636 | return SCTP_DISPOSITION_CONSUME; |
1633 | 1637 | ||
@@ -1751,7 +1755,7 @@ static sctp_disposition_t sctp_sf_do_dupcook_d(const struct sctp_endpoint *ep, | |||
1751 | sctp_cmd_seq_t *commands, | 1755 | sctp_cmd_seq_t *commands, |
1752 | struct sctp_association *new_asoc) | 1756 | struct sctp_association *new_asoc) |
1753 | { | 1757 | { |
1754 | struct sctp_ulpevent *ev = NULL; | 1758 | struct sctp_ulpevent *ev = NULL, *ai_ev = NULL; |
1755 | struct sctp_chunk *repl; | 1759 | struct sctp_chunk *repl; |
1756 | 1760 | ||
1757 | /* Clarification from Implementor's Guide: | 1761 | /* Clarification from Implementor's Guide: |
@@ -1778,29 +1782,25 @@ static sctp_disposition_t sctp_sf_do_dupcook_d(const struct sctp_endpoint *ep, | |||
1778 | * SCTP user upon reception of a valid COOKIE | 1782 | * SCTP user upon reception of a valid COOKIE |
1779 | * ECHO chunk. | 1783 | * ECHO chunk. |
1780 | */ | 1784 | */ |
1781 | ev = sctp_ulpevent_make_assoc_change(new_asoc, 0, | 1785 | ev = sctp_ulpevent_make_assoc_change(asoc, 0, |
1782 | SCTP_COMM_UP, 0, | 1786 | SCTP_COMM_UP, 0, |
1783 | new_asoc->c.sinit_num_ostreams, | 1787 | asoc->c.sinit_num_ostreams, |
1784 | new_asoc->c.sinit_max_instreams, | 1788 | asoc->c.sinit_max_instreams, |
1785 | GFP_ATOMIC); | 1789 | GFP_ATOMIC); |
1786 | if (!ev) | 1790 | if (!ev) |
1787 | goto nomem; | 1791 | goto nomem; |
1788 | sctp_add_cmd_sf(commands, SCTP_CMD_EVENT_ULP, | ||
1789 | SCTP_ULPEVENT(ev)); | ||
1790 | 1792 | ||
1791 | /* Sockets API Draft Section 5.3.1.6 | 1793 | /* Sockets API Draft Section 5.3.1.6 |
1792 | * When a peer sends a Adaption Layer Indication parameter, | 1794 | * When a peer sends a Adaption Layer Indication parameter, |
1793 | * SCTP delivers this notification to inform the application | 1795 | * SCTP delivers this notification to inform the application |
1794 | * that of the peers requested adaption layer. | 1796 | * that of the peers requested adaption layer. |
1795 | */ | 1797 | */ |
1796 | if (new_asoc->peer.adaption_ind) { | 1798 | if (asoc->peer.adaption_ind) { |
1797 | ev = sctp_ulpevent_make_adaption_indication(new_asoc, | 1799 | ai_ev = sctp_ulpevent_make_adaption_indication(asoc, |
1798 | GFP_ATOMIC); | 1800 | GFP_ATOMIC); |
1799 | if (!ev) | 1801 | if (!ai_ev) |
1800 | goto nomem; | 1802 | goto nomem; |
1801 | 1803 | ||
1802 | sctp_add_cmd_sf(commands, SCTP_CMD_EVENT_ULP, | ||
1803 | SCTP_ULPEVENT(ev)); | ||
1804 | } | 1804 | } |
1805 | } | 1805 | } |
1806 | sctp_add_cmd_sf(commands, SCTP_CMD_TRANSMIT, SCTP_NULL()); | 1806 | sctp_add_cmd_sf(commands, SCTP_CMD_TRANSMIT, SCTP_NULL()); |
@@ -1809,12 +1809,21 @@ static sctp_disposition_t sctp_sf_do_dupcook_d(const struct sctp_endpoint *ep, | |||
1809 | if (!repl) | 1809 | if (!repl) |
1810 | goto nomem; | 1810 | goto nomem; |
1811 | 1811 | ||
1812 | if (ev) | ||
1813 | sctp_add_cmd_sf(commands, SCTP_CMD_EVENT_ULP, | ||
1814 | SCTP_ULPEVENT(ev)); | ||
1815 | if (ai_ev) | ||
1816 | sctp_add_cmd_sf(commands, SCTP_CMD_EVENT_ULP, | ||
1817 | SCTP_ULPEVENT(ai_ev)); | ||
1818 | |||
1812 | sctp_add_cmd_sf(commands, SCTP_CMD_REPLY, SCTP_CHUNK(repl)); | 1819 | sctp_add_cmd_sf(commands, SCTP_CMD_REPLY, SCTP_CHUNK(repl)); |
1813 | sctp_add_cmd_sf(commands, SCTP_CMD_TRANSMIT, SCTP_NULL()); | 1820 | sctp_add_cmd_sf(commands, SCTP_CMD_TRANSMIT, SCTP_NULL()); |
1814 | 1821 | ||
1815 | return SCTP_DISPOSITION_CONSUME; | 1822 | return SCTP_DISPOSITION_CONSUME; |
1816 | 1823 | ||
1817 | nomem: | 1824 | nomem: |
1825 | if (ai_ev) | ||
1826 | sctp_ulpevent_free(ai_ev); | ||
1818 | if (ev) | 1827 | if (ev) |
1819 | sctp_ulpevent_free(ev); | 1828 | sctp_ulpevent_free(ev); |
1820 | return SCTP_DISPOSITION_NOMEM; | 1829 | return SCTP_DISPOSITION_NOMEM; |
@@ -2663,9 +2672,11 @@ sctp_disposition_t sctp_sf_eat_data_6_2(const struct sctp_endpoint *ep, | |||
2663 | break; | 2672 | break; |
2664 | case SCTP_IERROR_HIGH_TSN: | 2673 | case SCTP_IERROR_HIGH_TSN: |
2665 | case SCTP_IERROR_BAD_STREAM: | 2674 | case SCTP_IERROR_BAD_STREAM: |
2675 | SCTP_INC_STATS(SCTP_MIB_IN_DATA_CHUNK_DISCARDS); | ||
2666 | goto discard_noforce; | 2676 | goto discard_noforce; |
2667 | case SCTP_IERROR_DUP_TSN: | 2677 | case SCTP_IERROR_DUP_TSN: |
2668 | case SCTP_IERROR_IGNORE_TSN: | 2678 | case SCTP_IERROR_IGNORE_TSN: |
2679 | SCTP_INC_STATS(SCTP_MIB_IN_DATA_CHUNK_DISCARDS); | ||
2669 | goto discard_force; | 2680 | goto discard_force; |
2670 | case SCTP_IERROR_NO_DATA: | 2681 | case SCTP_IERROR_NO_DATA: |
2671 | goto consume; | 2682 | goto consume; |
@@ -3017,7 +3028,6 @@ sctp_disposition_t sctp_sf_do_9_2_final(const struct sctp_endpoint *ep, | |||
3017 | if (!sctp_chunk_length_valid(chunk, sizeof(sctp_chunkhdr_t))) | 3028 | if (!sctp_chunk_length_valid(chunk, sizeof(sctp_chunkhdr_t))) |
3018 | return sctp_sf_violation_chunklen(ep, asoc, type, arg, | 3029 | return sctp_sf_violation_chunklen(ep, asoc, type, arg, |
3019 | commands); | 3030 | commands); |
3020 | |||
3021 | /* 10.2 H) SHUTDOWN COMPLETE notification | 3031 | /* 10.2 H) SHUTDOWN COMPLETE notification |
3022 | * | 3032 | * |
3023 | * When SCTP completes the shutdown procedures (section 9.2) this | 3033 | * When SCTP completes the shutdown procedures (section 9.2) this |
@@ -3028,6 +3038,14 @@ sctp_disposition_t sctp_sf_do_9_2_final(const struct sctp_endpoint *ep, | |||
3028 | if (!ev) | 3038 | if (!ev) |
3029 | goto nomem; | 3039 | goto nomem; |
3030 | 3040 | ||
3041 | /* ...send a SHUTDOWN COMPLETE chunk to its peer, */ | ||
3042 | reply = sctp_make_shutdown_complete(asoc, chunk); | ||
3043 | if (!reply) | ||
3044 | goto nomem_chunk; | ||
3045 | |||
3046 | /* Do all the commands now (after allocation), so that we | ||
3047 | * have consistent state if memory allocation failes | ||
3048 | */ | ||
3031 | sctp_add_cmd_sf(commands, SCTP_CMD_EVENT_ULP, SCTP_ULPEVENT(ev)); | 3049 | sctp_add_cmd_sf(commands, SCTP_CMD_EVENT_ULP, SCTP_ULPEVENT(ev)); |
3032 | 3050 | ||
3033 | /* Upon the receipt of the SHUTDOWN ACK, the SHUTDOWN sender shall | 3051 | /* Upon the receipt of the SHUTDOWN ACK, the SHUTDOWN sender shall |
@@ -3039,11 +3057,6 @@ sctp_disposition_t sctp_sf_do_9_2_final(const struct sctp_endpoint *ep, | |||
3039 | sctp_add_cmd_sf(commands, SCTP_CMD_TIMER_STOP, | 3057 | sctp_add_cmd_sf(commands, SCTP_CMD_TIMER_STOP, |
3040 | SCTP_TO(SCTP_EVENT_TIMEOUT_T5_SHUTDOWN_GUARD)); | 3058 | SCTP_TO(SCTP_EVENT_TIMEOUT_T5_SHUTDOWN_GUARD)); |
3041 | 3059 | ||
3042 | /* ...send a SHUTDOWN COMPLETE chunk to its peer, */ | ||
3043 | reply = sctp_make_shutdown_complete(asoc, chunk); | ||
3044 | if (!reply) | ||
3045 | goto nomem; | ||
3046 | |||
3047 | sctp_add_cmd_sf(commands, SCTP_CMD_NEW_STATE, | 3060 | sctp_add_cmd_sf(commands, SCTP_CMD_NEW_STATE, |
3048 | SCTP_STATE(SCTP_STATE_CLOSED)); | 3061 | SCTP_STATE(SCTP_STATE_CLOSED)); |
3049 | SCTP_INC_STATS(SCTP_MIB_SHUTDOWNS); | 3062 | SCTP_INC_STATS(SCTP_MIB_SHUTDOWNS); |
@@ -3054,6 +3067,8 @@ sctp_disposition_t sctp_sf_do_9_2_final(const struct sctp_endpoint *ep, | |||
3054 | sctp_add_cmd_sf(commands, SCTP_CMD_DELETE_TCB, SCTP_NULL()); | 3067 | sctp_add_cmd_sf(commands, SCTP_CMD_DELETE_TCB, SCTP_NULL()); |
3055 | return SCTP_DISPOSITION_DELETE_TCB; | 3068 | return SCTP_DISPOSITION_DELETE_TCB; |
3056 | 3069 | ||
3070 | nomem_chunk: | ||
3071 | sctp_ulpevent_free(ev); | ||
3057 | nomem: | 3072 | nomem: |
3058 | return SCTP_DISPOSITION_NOMEM; | 3073 | return SCTP_DISPOSITION_NOMEM; |
3059 | } | 3074 | } |
@@ -3652,6 +3667,7 @@ sctp_disposition_t sctp_sf_pdiscard(const struct sctp_endpoint *ep, | |||
3652 | void *arg, | 3667 | void *arg, |
3653 | sctp_cmd_seq_t *commands) | 3668 | sctp_cmd_seq_t *commands) |
3654 | { | 3669 | { |
3670 | SCTP_INC_STATS(SCTP_MIB_IN_PKT_DISCARDS); | ||
3655 | sctp_add_cmd_sf(commands, SCTP_CMD_DISCARD_PACKET, SCTP_NULL()); | 3671 | sctp_add_cmd_sf(commands, SCTP_CMD_DISCARD_PACKET, SCTP_NULL()); |
3656 | 3672 | ||
3657 | return SCTP_DISPOSITION_CONSUME; | 3673 | return SCTP_DISPOSITION_CONSUME; |
@@ -4548,6 +4564,8 @@ sctp_disposition_t sctp_sf_do_6_3_3_rtx(const struct sctp_endpoint *ep, | |||
4548 | { | 4564 | { |
4549 | struct sctp_transport *transport = arg; | 4565 | struct sctp_transport *transport = arg; |
4550 | 4566 | ||
4567 | SCTP_INC_STATS(SCTP_MIB_T3_RTX_EXPIREDS); | ||
4568 | |||
4551 | if (asoc->overall_error_count >= asoc->max_retrans) { | 4569 | if (asoc->overall_error_count >= asoc->max_retrans) { |
4552 | sctp_add_cmd_sf(commands, SCTP_CMD_SET_SK_ERR, | 4570 | sctp_add_cmd_sf(commands, SCTP_CMD_SET_SK_ERR, |
4553 | SCTP_ERROR(ETIMEDOUT)); | 4571 | SCTP_ERROR(ETIMEDOUT)); |
@@ -4616,6 +4634,7 @@ sctp_disposition_t sctp_sf_do_6_2_sack(const struct sctp_endpoint *ep, | |||
4616 | void *arg, | 4634 | void *arg, |
4617 | sctp_cmd_seq_t *commands) | 4635 | sctp_cmd_seq_t *commands) |
4618 | { | 4636 | { |
4637 | SCTP_INC_STATS(SCTP_MIB_DELAY_SACK_EXPIREDS); | ||
4619 | sctp_add_cmd_sf(commands, SCTP_CMD_GEN_SACK, SCTP_FORCE()); | 4638 | sctp_add_cmd_sf(commands, SCTP_CMD_GEN_SACK, SCTP_FORCE()); |
4620 | return SCTP_DISPOSITION_CONSUME; | 4639 | return SCTP_DISPOSITION_CONSUME; |
4621 | } | 4640 | } |
@@ -4650,6 +4669,7 @@ sctp_disposition_t sctp_sf_t1_init_timer_expire(const struct sctp_endpoint *ep, | |||
4650 | int attempts = asoc->init_err_counter + 1; | 4669 | int attempts = asoc->init_err_counter + 1; |
4651 | 4670 | ||
4652 | SCTP_DEBUG_PRINTK("Timer T1 expired (INIT).\n"); | 4671 | SCTP_DEBUG_PRINTK("Timer T1 expired (INIT).\n"); |
4672 | SCTP_INC_STATS(SCTP_MIB_T1_INIT_EXPIREDS); | ||
4653 | 4673 | ||
4654 | if (attempts <= asoc->max_init_attempts) { | 4674 | if (attempts <= asoc->max_init_attempts) { |
4655 | bp = (struct sctp_bind_addr *) &asoc->base.bind_addr; | 4675 | bp = (struct sctp_bind_addr *) &asoc->base.bind_addr; |
@@ -4709,6 +4729,7 @@ sctp_disposition_t sctp_sf_t1_cookie_timer_expire(const struct sctp_endpoint *ep | |||
4709 | int attempts = asoc->init_err_counter + 1; | 4729 | int attempts = asoc->init_err_counter + 1; |
4710 | 4730 | ||
4711 | SCTP_DEBUG_PRINTK("Timer T1 expired (COOKIE-ECHO).\n"); | 4731 | SCTP_DEBUG_PRINTK("Timer T1 expired (COOKIE-ECHO).\n"); |
4732 | SCTP_INC_STATS(SCTP_MIB_T1_COOKIE_EXPIREDS); | ||
4712 | 4733 | ||
4713 | if (attempts <= asoc->max_init_attempts) { | 4734 | if (attempts <= asoc->max_init_attempts) { |
4714 | repl = sctp_make_cookie_echo(asoc, NULL); | 4735 | repl = sctp_make_cookie_echo(asoc, NULL); |
@@ -4753,6 +4774,8 @@ sctp_disposition_t sctp_sf_t2_timer_expire(const struct sctp_endpoint *ep, | |||
4753 | struct sctp_chunk *reply = NULL; | 4774 | struct sctp_chunk *reply = NULL; |
4754 | 4775 | ||
4755 | SCTP_DEBUG_PRINTK("Timer T2 expired.\n"); | 4776 | SCTP_DEBUG_PRINTK("Timer T2 expired.\n"); |
4777 | SCTP_INC_STATS(SCTP_MIB_T2_SHUTDOWN_EXPIREDS); | ||
4778 | |||
4756 | if (asoc->overall_error_count >= asoc->max_retrans) { | 4779 | if (asoc->overall_error_count >= asoc->max_retrans) { |
4757 | sctp_add_cmd_sf(commands, SCTP_CMD_SET_SK_ERR, | 4780 | sctp_add_cmd_sf(commands, SCTP_CMD_SET_SK_ERR, |
4758 | SCTP_ERROR(ETIMEDOUT)); | 4781 | SCTP_ERROR(ETIMEDOUT)); |
@@ -4814,6 +4837,8 @@ sctp_disposition_t sctp_sf_t4_timer_expire( | |||
4814 | struct sctp_chunk *chunk = asoc->addip_last_asconf; | 4837 | struct sctp_chunk *chunk = asoc->addip_last_asconf; |
4815 | struct sctp_transport *transport = chunk->transport; | 4838 | struct sctp_transport *transport = chunk->transport; |
4816 | 4839 | ||
4840 | SCTP_INC_STATS(SCTP_MIB_T4_RTO_EXPIREDS); | ||
4841 | |||
4817 | /* ADDIP 4.1 B1) Increment the error counters and perform path failure | 4842 | /* ADDIP 4.1 B1) Increment the error counters and perform path failure |
4818 | * detection on the appropriate destination address as defined in | 4843 | * detection on the appropriate destination address as defined in |
4819 | * RFC2960 [5] section 8.1 and 8.2. | 4844 | * RFC2960 [5] section 8.1 and 8.2. |
@@ -4880,6 +4905,7 @@ sctp_disposition_t sctp_sf_t5_timer_expire(const struct sctp_endpoint *ep, | |||
4880 | struct sctp_chunk *reply = NULL; | 4905 | struct sctp_chunk *reply = NULL; |
4881 | 4906 | ||
4882 | SCTP_DEBUG_PRINTK("Timer T5 expired.\n"); | 4907 | SCTP_DEBUG_PRINTK("Timer T5 expired.\n"); |
4908 | SCTP_INC_STATS(SCTP_MIB_T5_SHUTDOWN_GUARD_EXPIREDS); | ||
4883 | 4909 | ||
4884 | reply = sctp_make_abort(asoc, NULL, 0); | 4910 | reply = sctp_make_abort(asoc, NULL, 0); |
4885 | if (!reply) | 4911 | if (!reply) |
@@ -4910,6 +4936,8 @@ sctp_disposition_t sctp_sf_autoclose_timer_expire( | |||
4910 | { | 4936 | { |
4911 | int disposition; | 4937 | int disposition; |
4912 | 4938 | ||
4939 | SCTP_INC_STATS(SCTP_MIB_AUTOCLOSE_EXPIREDS); | ||
4940 | |||
4913 | /* From 9.2 Shutdown of an Association | 4941 | /* From 9.2 Shutdown of an Association |
4914 | * Upon receipt of the SHUTDOWN primitive from its upper | 4942 | * Upon receipt of the SHUTDOWN primitive from its upper |
4915 | * layer, the endpoint enters SHUTDOWN-PENDING state and | 4943 | * layer, the endpoint enters SHUTDOWN-PENDING state and |
diff --git a/net/sctp/socket.c b/net/sctp/socket.c index 85caf7963886..79c3e072cf28 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c | |||
@@ -2081,13 +2081,13 @@ static int sctp_setsockopt_autoclose(struct sock *sk, char __user *optval, | |||
2081 | * SPP_SACKDELAY_ENABLE, setting both will have undefined | 2081 | * SPP_SACKDELAY_ENABLE, setting both will have undefined |
2082 | * results. | 2082 | * results. |
2083 | */ | 2083 | */ |
2084 | int sctp_apply_peer_addr_params(struct sctp_paddrparams *params, | 2084 | static int sctp_apply_peer_addr_params(struct sctp_paddrparams *params, |
2085 | struct sctp_transport *trans, | 2085 | struct sctp_transport *trans, |
2086 | struct sctp_association *asoc, | 2086 | struct sctp_association *asoc, |
2087 | struct sctp_sock *sp, | 2087 | struct sctp_sock *sp, |
2088 | int hb_change, | 2088 | int hb_change, |
2089 | int pmtud_change, | 2089 | int pmtud_change, |
2090 | int sackdelay_change) | 2090 | int sackdelay_change) |
2091 | { | 2091 | { |
2092 | int error; | 2092 | int error; |
2093 | 2093 | ||
@@ -2970,7 +2970,7 @@ SCTP_STATIC struct sock *sctp_accept(struct sock *sk, int flags, int *err) | |||
2970 | goto out; | 2970 | goto out; |
2971 | } | 2971 | } |
2972 | 2972 | ||
2973 | timeo = sock_rcvtimeo(sk, sk->sk_socket->file->f_flags & O_NONBLOCK); | 2973 | timeo = sock_rcvtimeo(sk, flags & O_NONBLOCK); |
2974 | 2974 | ||
2975 | error = sctp_wait_for_accept(sk, timeo); | 2975 | error = sctp_wait_for_accept(sk, timeo); |
2976 | if (error) | 2976 | if (error) |
@@ -3045,14 +3045,14 @@ SCTP_STATIC int sctp_init_sock(struct sock *sk) | |||
3045 | sp->initmsg.sinit_num_ostreams = sctp_max_outstreams; | 3045 | sp->initmsg.sinit_num_ostreams = sctp_max_outstreams; |
3046 | sp->initmsg.sinit_max_instreams = sctp_max_instreams; | 3046 | sp->initmsg.sinit_max_instreams = sctp_max_instreams; |
3047 | sp->initmsg.sinit_max_attempts = sctp_max_retrans_init; | 3047 | sp->initmsg.sinit_max_attempts = sctp_max_retrans_init; |
3048 | sp->initmsg.sinit_max_init_timeo = jiffies_to_msecs(sctp_rto_max); | 3048 | sp->initmsg.sinit_max_init_timeo = sctp_rto_max; |
3049 | 3049 | ||
3050 | /* Initialize default RTO related parameters. These parameters can | 3050 | /* Initialize default RTO related parameters. These parameters can |
3051 | * be modified for with the SCTP_RTOINFO socket option. | 3051 | * be modified for with the SCTP_RTOINFO socket option. |
3052 | */ | 3052 | */ |
3053 | sp->rtoinfo.srto_initial = jiffies_to_msecs(sctp_rto_initial); | 3053 | sp->rtoinfo.srto_initial = sctp_rto_initial; |
3054 | sp->rtoinfo.srto_max = jiffies_to_msecs(sctp_rto_max); | 3054 | sp->rtoinfo.srto_max = sctp_rto_max; |
3055 | sp->rtoinfo.srto_min = jiffies_to_msecs(sctp_rto_min); | 3055 | sp->rtoinfo.srto_min = sctp_rto_min; |
3056 | 3056 | ||
3057 | /* Initialize default association related parameters. These parameters | 3057 | /* Initialize default association related parameters. These parameters |
3058 | * can be modified with the SCTP_ASSOCINFO socket option. | 3058 | * can be modified with the SCTP_ASSOCINFO socket option. |
@@ -3061,8 +3061,7 @@ SCTP_STATIC int sctp_init_sock(struct sock *sk) | |||
3061 | sp->assocparams.sasoc_number_peer_destinations = 0; | 3061 | sp->assocparams.sasoc_number_peer_destinations = 0; |
3062 | sp->assocparams.sasoc_peer_rwnd = 0; | 3062 | sp->assocparams.sasoc_peer_rwnd = 0; |
3063 | sp->assocparams.sasoc_local_rwnd = 0; | 3063 | sp->assocparams.sasoc_local_rwnd = 0; |
3064 | sp->assocparams.sasoc_cookie_life = | 3064 | sp->assocparams.sasoc_cookie_life = sctp_valid_cookie_life; |
3065 | jiffies_to_msecs(sctp_valid_cookie_life); | ||
3066 | 3065 | ||
3067 | /* Initialize default event subscriptions. By default, all the | 3066 | /* Initialize default event subscriptions. By default, all the |
3068 | * options are off. | 3067 | * options are off. |
@@ -3072,10 +3071,10 @@ SCTP_STATIC int sctp_init_sock(struct sock *sk) | |||
3072 | /* Default Peer Address Parameters. These defaults can | 3071 | /* Default Peer Address Parameters. These defaults can |
3073 | * be modified via SCTP_PEER_ADDR_PARAMS | 3072 | * be modified via SCTP_PEER_ADDR_PARAMS |
3074 | */ | 3073 | */ |
3075 | sp->hbinterval = jiffies_to_msecs(sctp_hb_interval); | 3074 | sp->hbinterval = sctp_hb_interval; |
3076 | sp->pathmaxrxt = sctp_max_retrans_path; | 3075 | sp->pathmaxrxt = sctp_max_retrans_path; |
3077 | sp->pathmtu = 0; // allow default discovery | 3076 | sp->pathmtu = 0; // allow default discovery |
3078 | sp->sackdelay = jiffies_to_msecs(sctp_sack_timeout); | 3077 | sp->sackdelay = sctp_sack_timeout; |
3079 | sp->param_flags = SPP_HB_ENABLE | | 3078 | sp->param_flags = SPP_HB_ENABLE | |
3080 | SPP_PMTUD_ENABLE | | 3079 | SPP_PMTUD_ENABLE | |
3081 | SPP_SACKDELAY_ENABLE; | 3080 | SPP_SACKDELAY_ENABLE; |
@@ -5619,6 +5618,8 @@ static void sctp_sock_migrate(struct sock *oldsk, struct sock *newsk, | |||
5619 | /* Copy the bind_addr list from the original endpoint to the new | 5618 | /* Copy the bind_addr list from the original endpoint to the new |
5620 | * endpoint so that we can handle restarts properly | 5619 | * endpoint so that we can handle restarts properly |
5621 | */ | 5620 | */ |
5621 | if (PF_INET6 == assoc->base.sk->sk_family) | ||
5622 | flags = SCTP_ADDR6_ALLOWED; | ||
5622 | if (assoc->peer.ipv4_address) | 5623 | if (assoc->peer.ipv4_address) |
5623 | flags |= SCTP_ADDR4_PEERSUPP; | 5624 | flags |= SCTP_ADDR4_PEERSUPP; |
5624 | if (assoc->peer.ipv6_address) | 5625 | if (assoc->peer.ipv6_address) |
diff --git a/net/sctp/sysctl.c b/net/sctp/sysctl.c index dc6f3ff32358..633cd178654b 100644 --- a/net/sctp/sysctl.c +++ b/net/sctp/sysctl.c | |||
@@ -45,9 +45,10 @@ | |||
45 | #include <net/sctp/sctp.h> | 45 | #include <net/sctp/sctp.h> |
46 | #include <linux/sysctl.h> | 46 | #include <linux/sysctl.h> |
47 | 47 | ||
48 | static ctl_handler sctp_sysctl_jiffies_ms; | 48 | static int zero = 0; |
49 | static long rto_timer_min = 1; | 49 | static int one = 1; |
50 | static long rto_timer_max = 86400000; /* One day */ | 50 | static int timer_max = 86400000; /* ms in one day */ |
51 | static int int_max = INT_MAX; | ||
51 | static long sack_timer_min = 1; | 52 | static long sack_timer_min = 1; |
52 | static long sack_timer_max = 500; | 53 | static long sack_timer_max = 500; |
53 | 54 | ||
@@ -56,45 +57,45 @@ static ctl_table sctp_table[] = { | |||
56 | .ctl_name = NET_SCTP_RTO_INITIAL, | 57 | .ctl_name = NET_SCTP_RTO_INITIAL, |
57 | .procname = "rto_initial", | 58 | .procname = "rto_initial", |
58 | .data = &sctp_rto_initial, | 59 | .data = &sctp_rto_initial, |
59 | .maxlen = sizeof(long), | 60 | .maxlen = sizeof(unsigned int), |
60 | .mode = 0644, | 61 | .mode = 0644, |
61 | .proc_handler = &proc_doulongvec_ms_jiffies_minmax, | 62 | .proc_handler = &proc_dointvec_minmax, |
62 | .strategy = &sctp_sysctl_jiffies_ms, | 63 | .strategy = &sysctl_intvec, |
63 | .extra1 = &rto_timer_min, | 64 | .extra1 = &one, |
64 | .extra2 = &rto_timer_max | 65 | .extra2 = &timer_max |
65 | }, | 66 | }, |
66 | { | 67 | { |
67 | .ctl_name = NET_SCTP_RTO_MIN, | 68 | .ctl_name = NET_SCTP_RTO_MIN, |
68 | .procname = "rto_min", | 69 | .procname = "rto_min", |
69 | .data = &sctp_rto_min, | 70 | .data = &sctp_rto_min, |
70 | .maxlen = sizeof(long), | 71 | .maxlen = sizeof(unsigned int), |
71 | .mode = 0644, | 72 | .mode = 0644, |
72 | .proc_handler = &proc_doulongvec_ms_jiffies_minmax, | 73 | .proc_handler = &proc_dointvec_minmax, |
73 | .strategy = &sctp_sysctl_jiffies_ms, | 74 | .strategy = &sysctl_intvec, |
74 | .extra1 = &rto_timer_min, | 75 | .extra1 = &one, |
75 | .extra2 = &rto_timer_max | 76 | .extra2 = &timer_max |
76 | }, | 77 | }, |
77 | { | 78 | { |
78 | .ctl_name = NET_SCTP_RTO_MAX, | 79 | .ctl_name = NET_SCTP_RTO_MAX, |
79 | .procname = "rto_max", | 80 | .procname = "rto_max", |
80 | .data = &sctp_rto_max, | 81 | .data = &sctp_rto_max, |
81 | .maxlen = sizeof(long), | 82 | .maxlen = sizeof(unsigned int), |
82 | .mode = 0644, | 83 | .mode = 0644, |
83 | .proc_handler = &proc_doulongvec_ms_jiffies_minmax, | 84 | .proc_handler = &proc_dointvec_minmax, |
84 | .strategy = &sctp_sysctl_jiffies_ms, | 85 | .strategy = &sysctl_intvec, |
85 | .extra1 = &rto_timer_min, | 86 | .extra1 = &one, |
86 | .extra2 = &rto_timer_max | 87 | .extra2 = &timer_max |
87 | }, | 88 | }, |
88 | { | 89 | { |
89 | .ctl_name = NET_SCTP_VALID_COOKIE_LIFE, | 90 | .ctl_name = NET_SCTP_VALID_COOKIE_LIFE, |
90 | .procname = "valid_cookie_life", | 91 | .procname = "valid_cookie_life", |
91 | .data = &sctp_valid_cookie_life, | 92 | .data = &sctp_valid_cookie_life, |
92 | .maxlen = sizeof(long), | 93 | .maxlen = sizeof(unsigned int), |
93 | .mode = 0644, | 94 | .mode = 0644, |
94 | .proc_handler = &proc_doulongvec_ms_jiffies_minmax, | 95 | .proc_handler = &proc_dointvec_minmax, |
95 | .strategy = &sctp_sysctl_jiffies_ms, | 96 | .strategy = &sysctl_intvec, |
96 | .extra1 = &rto_timer_min, | 97 | .extra1 = &one, |
97 | .extra2 = &rto_timer_max | 98 | .extra2 = &timer_max |
98 | }, | 99 | }, |
99 | { | 100 | { |
100 | .ctl_name = NET_SCTP_MAX_BURST, | 101 | .ctl_name = NET_SCTP_MAX_BURST, |
@@ -102,7 +103,10 @@ static ctl_table sctp_table[] = { | |||
102 | .data = &sctp_max_burst, | 103 | .data = &sctp_max_burst, |
103 | .maxlen = sizeof(int), | 104 | .maxlen = sizeof(int), |
104 | .mode = 0644, | 105 | .mode = 0644, |
105 | .proc_handler = &proc_dointvec | 106 | .proc_handler = &proc_dointvec_minmax, |
107 | .strategy = &sysctl_intvec, | ||
108 | .extra1 = &zero, | ||
109 | .extra2 = &int_max | ||
106 | }, | 110 | }, |
107 | { | 111 | { |
108 | .ctl_name = NET_SCTP_ASSOCIATION_MAX_RETRANS, | 112 | .ctl_name = NET_SCTP_ASSOCIATION_MAX_RETRANS, |
@@ -110,7 +114,10 @@ static ctl_table sctp_table[] = { | |||
110 | .data = &sctp_max_retrans_association, | 114 | .data = &sctp_max_retrans_association, |
111 | .maxlen = sizeof(int), | 115 | .maxlen = sizeof(int), |
112 | .mode = 0644, | 116 | .mode = 0644, |
113 | .proc_handler = &proc_dointvec | 117 | .proc_handler = &proc_dointvec_minmax, |
118 | .strategy = &sysctl_intvec, | ||
119 | .extra1 = &one, | ||
120 | .extra2 = &int_max | ||
114 | }, | 121 | }, |
115 | { | 122 | { |
116 | .ctl_name = NET_SCTP_SNDBUF_POLICY, | 123 | .ctl_name = NET_SCTP_SNDBUF_POLICY, |
@@ -118,7 +125,8 @@ static ctl_table sctp_table[] = { | |||
118 | .data = &sctp_sndbuf_policy, | 125 | .data = &sctp_sndbuf_policy, |
119 | .maxlen = sizeof(int), | 126 | .maxlen = sizeof(int), |
120 | .mode = 0644, | 127 | .mode = 0644, |
121 | .proc_handler = &proc_dointvec | 128 | .proc_handler = &proc_dointvec, |
129 | .strategy = &sysctl_intvec | ||
122 | }, | 130 | }, |
123 | { | 131 | { |
124 | .ctl_name = NET_SCTP_RCVBUF_POLICY, | 132 | .ctl_name = NET_SCTP_RCVBUF_POLICY, |
@@ -126,7 +134,8 @@ static ctl_table sctp_table[] = { | |||
126 | .data = &sctp_rcvbuf_policy, | 134 | .data = &sctp_rcvbuf_policy, |
127 | .maxlen = sizeof(int), | 135 | .maxlen = sizeof(int), |
128 | .mode = 0644, | 136 | .mode = 0644, |
129 | .proc_handler = &proc_dointvec | 137 | .proc_handler = &proc_dointvec, |
138 | .strategy = &sysctl_intvec | ||
130 | }, | 139 | }, |
131 | { | 140 | { |
132 | .ctl_name = NET_SCTP_PATH_MAX_RETRANS, | 141 | .ctl_name = NET_SCTP_PATH_MAX_RETRANS, |
@@ -134,7 +143,10 @@ static ctl_table sctp_table[] = { | |||
134 | .data = &sctp_max_retrans_path, | 143 | .data = &sctp_max_retrans_path, |
135 | .maxlen = sizeof(int), | 144 | .maxlen = sizeof(int), |
136 | .mode = 0644, | 145 | .mode = 0644, |
137 | .proc_handler = &proc_dointvec | 146 | .proc_handler = &proc_dointvec_minmax, |
147 | .strategy = &sysctl_intvec, | ||
148 | .extra1 = &one, | ||
149 | .extra2 = &int_max | ||
138 | }, | 150 | }, |
139 | { | 151 | { |
140 | .ctl_name = NET_SCTP_MAX_INIT_RETRANSMITS, | 152 | .ctl_name = NET_SCTP_MAX_INIT_RETRANSMITS, |
@@ -142,18 +154,21 @@ static ctl_table sctp_table[] = { | |||
142 | .data = &sctp_max_retrans_init, | 154 | .data = &sctp_max_retrans_init, |
143 | .maxlen = sizeof(int), | 155 | .maxlen = sizeof(int), |
144 | .mode = 0644, | 156 | .mode = 0644, |
145 | .proc_handler = &proc_dointvec | 157 | .proc_handler = &proc_dointvec_minmax, |
158 | .strategy = &sysctl_intvec, | ||
159 | .extra1 = &one, | ||
160 | .extra2 = &int_max | ||
146 | }, | 161 | }, |
147 | { | 162 | { |
148 | .ctl_name = NET_SCTP_HB_INTERVAL, | 163 | .ctl_name = NET_SCTP_HB_INTERVAL, |
149 | .procname = "hb_interval", | 164 | .procname = "hb_interval", |
150 | .data = &sctp_hb_interval, | 165 | .data = &sctp_hb_interval, |
151 | .maxlen = sizeof(long), | 166 | .maxlen = sizeof(unsigned int), |
152 | .mode = 0644, | 167 | .mode = 0644, |
153 | .proc_handler = &proc_doulongvec_ms_jiffies_minmax, | 168 | .proc_handler = &proc_dointvec_minmax, |
154 | .strategy = &sctp_sysctl_jiffies_ms, | 169 | .strategy = &sysctl_intvec, |
155 | .extra1 = &rto_timer_min, | 170 | .extra1 = &one, |
156 | .extra2 = &rto_timer_max | 171 | .extra2 = &timer_max |
157 | }, | 172 | }, |
158 | { | 173 | { |
159 | .ctl_name = NET_SCTP_PRESERVE_ENABLE, | 174 | .ctl_name = NET_SCTP_PRESERVE_ENABLE, |
@@ -161,23 +176,26 @@ static ctl_table sctp_table[] = { | |||
161 | .data = &sctp_cookie_preserve_enable, | 176 | .data = &sctp_cookie_preserve_enable, |
162 | .maxlen = sizeof(int), | 177 | .maxlen = sizeof(int), |
163 | .mode = 0644, | 178 | .mode = 0644, |
164 | .proc_handler = &proc_dointvec | 179 | .proc_handler = &proc_dointvec, |
180 | .strategy = &sysctl_intvec | ||
165 | }, | 181 | }, |
166 | { | 182 | { |
167 | .ctl_name = NET_SCTP_RTO_ALPHA, | 183 | .ctl_name = NET_SCTP_RTO_ALPHA, |
168 | .procname = "rto_alpha_exp_divisor", | 184 | .procname = "rto_alpha_exp_divisor", |
169 | .data = &sctp_rto_alpha, | 185 | .data = &sctp_rto_alpha, |
170 | .maxlen = sizeof(int), | 186 | .maxlen = sizeof(int), |
171 | .mode = 0644, | 187 | .mode = 0444, |
172 | .proc_handler = &proc_dointvec | 188 | .proc_handler = &proc_dointvec, |
189 | .strategy = &sysctl_intvec | ||
173 | }, | 190 | }, |
174 | { | 191 | { |
175 | .ctl_name = NET_SCTP_RTO_BETA, | 192 | .ctl_name = NET_SCTP_RTO_BETA, |
176 | .procname = "rto_beta_exp_divisor", | 193 | .procname = "rto_beta_exp_divisor", |
177 | .data = &sctp_rto_beta, | 194 | .data = &sctp_rto_beta, |
178 | .maxlen = sizeof(int), | 195 | .maxlen = sizeof(int), |
179 | .mode = 0644, | 196 | .mode = 0444, |
180 | .proc_handler = &proc_dointvec | 197 | .proc_handler = &proc_dointvec, |
198 | .strategy = &sysctl_intvec | ||
181 | }, | 199 | }, |
182 | { | 200 | { |
183 | .ctl_name = NET_SCTP_ADDIP_ENABLE, | 201 | .ctl_name = NET_SCTP_ADDIP_ENABLE, |
@@ -185,7 +203,8 @@ static ctl_table sctp_table[] = { | |||
185 | .data = &sctp_addip_enable, | 203 | .data = &sctp_addip_enable, |
186 | .maxlen = sizeof(int), | 204 | .maxlen = sizeof(int), |
187 | .mode = 0644, | 205 | .mode = 0644, |
188 | .proc_handler = &proc_dointvec | 206 | .proc_handler = &proc_dointvec, |
207 | .strategy = &sysctl_intvec | ||
189 | }, | 208 | }, |
190 | { | 209 | { |
191 | .ctl_name = NET_SCTP_PRSCTP_ENABLE, | 210 | .ctl_name = NET_SCTP_PRSCTP_ENABLE, |
@@ -193,7 +212,8 @@ static ctl_table sctp_table[] = { | |||
193 | .data = &sctp_prsctp_enable, | 212 | .data = &sctp_prsctp_enable, |
194 | .maxlen = sizeof(int), | 213 | .maxlen = sizeof(int), |
195 | .mode = 0644, | 214 | .mode = 0644, |
196 | .proc_handler = &proc_dointvec | 215 | .proc_handler = &proc_dointvec, |
216 | .strategy = &sysctl_intvec | ||
197 | }, | 217 | }, |
198 | { | 218 | { |
199 | .ctl_name = NET_SCTP_SACK_TIMEOUT, | 219 | .ctl_name = NET_SCTP_SACK_TIMEOUT, |
@@ -201,8 +221,8 @@ static ctl_table sctp_table[] = { | |||
201 | .data = &sctp_sack_timeout, | 221 | .data = &sctp_sack_timeout, |
202 | .maxlen = sizeof(long), | 222 | .maxlen = sizeof(long), |
203 | .mode = 0644, | 223 | .mode = 0644, |
204 | .proc_handler = &proc_doulongvec_ms_jiffies_minmax, | 224 | .proc_handler = &proc_dointvec_minmax, |
205 | .strategy = &sctp_sysctl_jiffies_ms, | 225 | .strategy = &sysctl_intvec, |
206 | .extra1 = &sack_timer_min, | 226 | .extra1 = &sack_timer_min, |
207 | .extra2 = &sack_timer_max, | 227 | .extra2 = &sack_timer_max, |
208 | }, | 228 | }, |
@@ -242,37 +262,3 @@ void sctp_sysctl_unregister(void) | |||
242 | { | 262 | { |
243 | unregister_sysctl_table(sctp_sysctl_header); | 263 | unregister_sysctl_table(sctp_sysctl_header); |
244 | } | 264 | } |
245 | |||
246 | /* Strategy function to convert jiffies to milliseconds. */ | ||
247 | static int sctp_sysctl_jiffies_ms(ctl_table *table, int __user *name, int nlen, | ||
248 | void __user *oldval, size_t __user *oldlenp, | ||
249 | void __user *newval, size_t newlen, void **context) { | ||
250 | |||
251 | if (oldval) { | ||
252 | size_t olen; | ||
253 | |||
254 | if (oldlenp) { | ||
255 | if (get_user(olen, oldlenp)) | ||
256 | return -EFAULT; | ||
257 | |||
258 | if (olen != sizeof (int)) | ||
259 | return -EINVAL; | ||
260 | } | ||
261 | if (put_user((*(int *)(table->data) * 1000) / HZ, | ||
262 | (int __user *)oldval) || | ||
263 | (oldlenp && put_user(sizeof (int), oldlenp))) | ||
264 | return -EFAULT; | ||
265 | } | ||
266 | if (newval && newlen) { | ||
267 | int new; | ||
268 | |||
269 | if (newlen != sizeof (int)) | ||
270 | return -EINVAL; | ||
271 | |||
272 | if (get_user(new, (int __user *)newval)) | ||
273 | return -EFAULT; | ||
274 | |||
275 | *(int *)(table->data) = (new * HZ) / 1000; | ||
276 | } | ||
277 | return 1; | ||
278 | } | ||
diff --git a/net/sctp/transport.c b/net/sctp/transport.c index 2763aa93de1a..3e5936a5f671 100644 --- a/net/sctp/transport.c +++ b/net/sctp/transport.c | |||
@@ -75,7 +75,7 @@ static struct sctp_transport *sctp_transport_init(struct sctp_transport *peer, | |||
75 | * parameter 'RTO.Initial'. | 75 | * parameter 'RTO.Initial'. |
76 | */ | 76 | */ |
77 | peer->rtt = 0; | 77 | peer->rtt = 0; |
78 | peer->rto = sctp_rto_initial; | 78 | peer->rto = msecs_to_jiffies(sctp_rto_initial); |
79 | peer->rttvar = 0; | 79 | peer->rttvar = 0; |
80 | peer->srtt = 0; | 80 | peer->srtt = 0; |
81 | peer->rto_pending = 0; | 81 | peer->rto_pending = 0; |
diff --git a/net/socket.c b/net/socket.c index 6d261bf206fc..1bc4167e0da8 100644 --- a/net/socket.c +++ b/net/socket.c | |||
@@ -42,7 +42,7 @@ | |||
42 | * Andi Kleen : Some small cleanups, optimizations, | 42 | * Andi Kleen : Some small cleanups, optimizations, |
43 | * and fixed a copy_from_user() bug. | 43 | * and fixed a copy_from_user() bug. |
44 | * Tigran Aivazian : sys_send(args) calls sys_sendto(args, NULL, 0) | 44 | * Tigran Aivazian : sys_send(args) calls sys_sendto(args, NULL, 0) |
45 | * Tigran Aivazian : Made listen(2) backlog sanity checks | 45 | * Tigran Aivazian : Made listen(2) backlog sanity checks |
46 | * protocol-independent | 46 | * protocol-independent |
47 | * | 47 | * |
48 | * | 48 | * |
@@ -53,17 +53,17 @@ | |||
53 | * | 53 | * |
54 | * | 54 | * |
55 | * This module is effectively the top level interface to the BSD socket | 55 | * This module is effectively the top level interface to the BSD socket |
56 | * paradigm. | 56 | * paradigm. |
57 | * | 57 | * |
58 | * Based upon Swansea University Computer Society NET3.039 | 58 | * Based upon Swansea University Computer Society NET3.039 |
59 | */ | 59 | */ |
60 | 60 | ||
61 | #include <linux/mm.h> | 61 | #include <linux/mm.h> |
62 | #include <linux/smp_lock.h> | ||
63 | #include <linux/socket.h> | 62 | #include <linux/socket.h> |
64 | #include <linux/file.h> | 63 | #include <linux/file.h> |
65 | #include <linux/net.h> | 64 | #include <linux/net.h> |
66 | #include <linux/interrupt.h> | 65 | #include <linux/interrupt.h> |
66 | #include <linux/rcupdate.h> | ||
67 | #include <linux/netdevice.h> | 67 | #include <linux/netdevice.h> |
68 | #include <linux/proc_fs.h> | 68 | #include <linux/proc_fs.h> |
69 | #include <linux/seq_file.h> | 69 | #include <linux/seq_file.h> |
@@ -96,25 +96,24 @@ | |||
96 | 96 | ||
97 | static int sock_no_open(struct inode *irrelevant, struct file *dontcare); | 97 | static int sock_no_open(struct inode *irrelevant, struct file *dontcare); |
98 | static ssize_t sock_aio_read(struct kiocb *iocb, char __user *buf, | 98 | static ssize_t sock_aio_read(struct kiocb *iocb, char __user *buf, |
99 | size_t size, loff_t pos); | 99 | size_t size, loff_t pos); |
100 | static ssize_t sock_aio_write(struct kiocb *iocb, const char __user *buf, | 100 | static ssize_t sock_aio_write(struct kiocb *iocb, const char __user *buf, |
101 | size_t size, loff_t pos); | 101 | size_t size, loff_t pos); |
102 | static int sock_mmap(struct file *file, struct vm_area_struct * vma); | 102 | static int sock_mmap(struct file *file, struct vm_area_struct *vma); |
103 | 103 | ||
104 | static int sock_close(struct inode *inode, struct file *file); | 104 | static int sock_close(struct inode *inode, struct file *file); |
105 | static unsigned int sock_poll(struct file *file, | 105 | static unsigned int sock_poll(struct file *file, |
106 | struct poll_table_struct *wait); | 106 | struct poll_table_struct *wait); |
107 | static long sock_ioctl(struct file *file, | 107 | static long sock_ioctl(struct file *file, unsigned int cmd, unsigned long arg); |
108 | unsigned int cmd, unsigned long arg); | ||
109 | #ifdef CONFIG_COMPAT | 108 | #ifdef CONFIG_COMPAT |
110 | static long compat_sock_ioctl(struct file *file, | 109 | static long compat_sock_ioctl(struct file *file, |
111 | unsigned int cmd, unsigned long arg); | 110 | unsigned int cmd, unsigned long arg); |
112 | #endif | 111 | #endif |
113 | static int sock_fasync(int fd, struct file *filp, int on); | 112 | static int sock_fasync(int fd, struct file *filp, int on); |
114 | static ssize_t sock_readv(struct file *file, const struct iovec *vector, | 113 | static ssize_t sock_readv(struct file *file, const struct iovec *vector, |
115 | unsigned long count, loff_t *ppos); | 114 | unsigned long count, loff_t *ppos); |
116 | static ssize_t sock_writev(struct file *file, const struct iovec *vector, | 115 | static ssize_t sock_writev(struct file *file, const struct iovec *vector, |
117 | unsigned long count, loff_t *ppos); | 116 | unsigned long count, loff_t *ppos); |
118 | static ssize_t sock_sendpage(struct file *file, struct page *page, | 117 | static ssize_t sock_sendpage(struct file *file, struct page *page, |
119 | int offset, size_t size, loff_t *ppos, int more); | 118 | int offset, size_t size, loff_t *ppos, int more); |
120 | 119 | ||
@@ -147,52 +146,8 @@ static struct file_operations socket_file_ops = { | |||
147 | * The protocol list. Each protocol is registered in here. | 146 | * The protocol list. Each protocol is registered in here. |
148 | */ | 147 | */ |
149 | 148 | ||
150 | static struct net_proto_family *net_families[NPROTO]; | ||
151 | |||
152 | #if defined(CONFIG_SMP) || defined(CONFIG_PREEMPT) | ||
153 | static atomic_t net_family_lockct = ATOMIC_INIT(0); | ||
154 | static DEFINE_SPINLOCK(net_family_lock); | 149 | static DEFINE_SPINLOCK(net_family_lock); |
155 | 150 | static const struct net_proto_family *net_families[NPROTO] __read_mostly; | |
156 | /* The strategy is: modifications net_family vector are short, do not | ||
157 | sleep and veeery rare, but read access should be free of any exclusive | ||
158 | locks. | ||
159 | */ | ||
160 | |||
161 | static void net_family_write_lock(void) | ||
162 | { | ||
163 | spin_lock(&net_family_lock); | ||
164 | while (atomic_read(&net_family_lockct) != 0) { | ||
165 | spin_unlock(&net_family_lock); | ||
166 | |||
167 | yield(); | ||
168 | |||
169 | spin_lock(&net_family_lock); | ||
170 | } | ||
171 | } | ||
172 | |||
173 | static __inline__ void net_family_write_unlock(void) | ||
174 | { | ||
175 | spin_unlock(&net_family_lock); | ||
176 | } | ||
177 | |||
178 | static __inline__ void net_family_read_lock(void) | ||
179 | { | ||
180 | atomic_inc(&net_family_lockct); | ||
181 | spin_unlock_wait(&net_family_lock); | ||
182 | } | ||
183 | |||
184 | static __inline__ void net_family_read_unlock(void) | ||
185 | { | ||
186 | atomic_dec(&net_family_lockct); | ||
187 | } | ||
188 | |||
189 | #else | ||
190 | #define net_family_write_lock() do { } while(0) | ||
191 | #define net_family_write_unlock() do { } while(0) | ||
192 | #define net_family_read_lock() do { } while(0) | ||
193 | #define net_family_read_unlock() do { } while(0) | ||
194 | #endif | ||
195 | |||
196 | 151 | ||
197 | /* | 152 | /* |
198 | * Statistics counters of the socket lists | 153 | * Statistics counters of the socket lists |
@@ -201,19 +156,20 @@ static __inline__ void net_family_read_unlock(void) | |||
201 | static DEFINE_PER_CPU(int, sockets_in_use) = 0; | 156 | static DEFINE_PER_CPU(int, sockets_in_use) = 0; |
202 | 157 | ||
203 | /* | 158 | /* |
204 | * Support routines. Move socket addresses back and forth across the kernel/user | 159 | * Support routines. |
205 | * divide and look after the messy bits. | 160 | * Move socket addresses back and forth across the kernel/user |
161 | * divide and look after the messy bits. | ||
206 | */ | 162 | */ |
207 | 163 | ||
208 | #define MAX_SOCK_ADDR 128 /* 108 for Unix domain - | 164 | #define MAX_SOCK_ADDR 128 /* 108 for Unix domain - |
209 | 16 for IP, 16 for IPX, | 165 | 16 for IP, 16 for IPX, |
210 | 24 for IPv6, | 166 | 24 for IPv6, |
211 | about 80 for AX.25 | 167 | about 80 for AX.25 |
212 | must be at least one bigger than | 168 | must be at least one bigger than |
213 | the AF_UNIX size (see net/unix/af_unix.c | 169 | the AF_UNIX size (see net/unix/af_unix.c |
214 | :unix_mkname()). | 170 | :unix_mkname()). |
215 | */ | 171 | */ |
216 | 172 | ||
217 | /** | 173 | /** |
218 | * move_addr_to_kernel - copy a socket address into kernel space | 174 | * move_addr_to_kernel - copy a socket address into kernel space |
219 | * @uaddr: Address in user space | 175 | * @uaddr: Address in user space |
@@ -227,11 +183,11 @@ static DEFINE_PER_CPU(int, sockets_in_use) = 0; | |||
227 | 183 | ||
228 | int move_addr_to_kernel(void __user *uaddr, int ulen, void *kaddr) | 184 | int move_addr_to_kernel(void __user *uaddr, int ulen, void *kaddr) |
229 | { | 185 | { |
230 | if(ulen<0||ulen>MAX_SOCK_ADDR) | 186 | if (ulen < 0 || ulen > MAX_SOCK_ADDR) |
231 | return -EINVAL; | 187 | return -EINVAL; |
232 | if(ulen==0) | 188 | if (ulen == 0) |
233 | return 0; | 189 | return 0; |
234 | if(copy_from_user(kaddr,uaddr,ulen)) | 190 | if (copy_from_user(kaddr, uaddr, ulen)) |
235 | return -EFAULT; | 191 | return -EFAULT; |
236 | return audit_sockaddr(ulen, kaddr); | 192 | return audit_sockaddr(ulen, kaddr); |
237 | } | 193 | } |
@@ -252,51 +208,52 @@ int move_addr_to_kernel(void __user *uaddr, int ulen, void *kaddr) | |||
252 | * length of the data is written over the length limit the user | 208 | * length of the data is written over the length limit the user |
253 | * specified. Zero is returned for a success. | 209 | * specified. Zero is returned for a success. |
254 | */ | 210 | */ |
255 | 211 | ||
256 | int move_addr_to_user(void *kaddr, int klen, void __user *uaddr, int __user *ulen) | 212 | int move_addr_to_user(void *kaddr, int klen, void __user *uaddr, |
213 | int __user *ulen) | ||
257 | { | 214 | { |
258 | int err; | 215 | int err; |
259 | int len; | 216 | int len; |
260 | 217 | ||
261 | if((err=get_user(len, ulen))) | 218 | err = get_user(len, ulen); |
219 | if (err) | ||
262 | return err; | 220 | return err; |
263 | if(len>klen) | 221 | if (len > klen) |
264 | len=klen; | 222 | len = klen; |
265 | if(len<0 || len> MAX_SOCK_ADDR) | 223 | if (len < 0 || len > MAX_SOCK_ADDR) |
266 | return -EINVAL; | 224 | return -EINVAL; |
267 | if(len) | 225 | if (len) { |
268 | { | ||
269 | if (audit_sockaddr(klen, kaddr)) | 226 | if (audit_sockaddr(klen, kaddr)) |
270 | return -ENOMEM; | 227 | return -ENOMEM; |
271 | if(copy_to_user(uaddr,kaddr,len)) | 228 | if (copy_to_user(uaddr, kaddr, len)) |
272 | return -EFAULT; | 229 | return -EFAULT; |
273 | } | 230 | } |
274 | /* | 231 | /* |
275 | * "fromlen shall refer to the value before truncation.." | 232 | * "fromlen shall refer to the value before truncation.." |
276 | * 1003.1g | 233 | * 1003.1g |
277 | */ | 234 | */ |
278 | return __put_user(klen, ulen); | 235 | return __put_user(klen, ulen); |
279 | } | 236 | } |
280 | 237 | ||
281 | #define SOCKFS_MAGIC 0x534F434B | 238 | #define SOCKFS_MAGIC 0x534F434B |
282 | 239 | ||
283 | static kmem_cache_t * sock_inode_cachep __read_mostly; | 240 | static kmem_cache_t *sock_inode_cachep __read_mostly; |
284 | 241 | ||
285 | static struct inode *sock_alloc_inode(struct super_block *sb) | 242 | static struct inode *sock_alloc_inode(struct super_block *sb) |
286 | { | 243 | { |
287 | struct socket_alloc *ei; | 244 | struct socket_alloc *ei; |
288 | ei = (struct socket_alloc *)kmem_cache_alloc(sock_inode_cachep, SLAB_KERNEL); | 245 | |
246 | ei = kmem_cache_alloc(sock_inode_cachep, SLAB_KERNEL); | ||
289 | if (!ei) | 247 | if (!ei) |
290 | return NULL; | 248 | return NULL; |
291 | init_waitqueue_head(&ei->socket.wait); | 249 | init_waitqueue_head(&ei->socket.wait); |
292 | 250 | ||
293 | ei->socket.fasync_list = NULL; | 251 | ei->socket.fasync_list = NULL; |
294 | ei->socket.state = SS_UNCONNECTED; | 252 | ei->socket.state = SS_UNCONNECTED; |
295 | ei->socket.flags = 0; | 253 | ei->socket.flags = 0; |
296 | ei->socket.ops = NULL; | 254 | ei->socket.ops = NULL; |
297 | ei->socket.sk = NULL; | 255 | ei->socket.sk = NULL; |
298 | ei->socket.file = NULL; | 256 | ei->socket.file = NULL; |
299 | ei->socket.flags = 0; | ||
300 | 257 | ||
301 | return &ei->vfs_inode; | 258 | return &ei->vfs_inode; |
302 | } | 259 | } |
@@ -307,22 +264,25 @@ static void sock_destroy_inode(struct inode *inode) | |||
307 | container_of(inode, struct socket_alloc, vfs_inode)); | 264 | container_of(inode, struct socket_alloc, vfs_inode)); |
308 | } | 265 | } |
309 | 266 | ||
310 | static void init_once(void * foo, kmem_cache_t * cachep, unsigned long flags) | 267 | static void init_once(void *foo, kmem_cache_t *cachep, unsigned long flags) |
311 | { | 268 | { |
312 | struct socket_alloc *ei = (struct socket_alloc *) foo; | 269 | struct socket_alloc *ei = (struct socket_alloc *)foo; |
313 | 270 | ||
314 | if ((flags & (SLAB_CTOR_VERIFY|SLAB_CTOR_CONSTRUCTOR)) == | 271 | if ((flags & (SLAB_CTOR_VERIFY|SLAB_CTOR_CONSTRUCTOR)) |
315 | SLAB_CTOR_CONSTRUCTOR) | 272 | == SLAB_CTOR_CONSTRUCTOR) |
316 | inode_init_once(&ei->vfs_inode); | 273 | inode_init_once(&ei->vfs_inode); |
317 | } | 274 | } |
318 | 275 | ||
319 | static int init_inodecache(void) | 276 | static int init_inodecache(void) |
320 | { | 277 | { |
321 | sock_inode_cachep = kmem_cache_create("sock_inode_cache", | 278 | sock_inode_cachep = kmem_cache_create("sock_inode_cache", |
322 | sizeof(struct socket_alloc), | 279 | sizeof(struct socket_alloc), |
323 | 0, (SLAB_HWCACHE_ALIGN|SLAB_RECLAIM_ACCOUNT| | 280 | 0, |
324 | SLAB_MEM_SPREAD), | 281 | (SLAB_HWCACHE_ALIGN | |
325 | init_once, NULL); | 282 | SLAB_RECLAIM_ACCOUNT | |
283 | SLAB_MEM_SPREAD), | ||
284 | init_once, | ||
285 | NULL); | ||
326 | if (sock_inode_cachep == NULL) | 286 | if (sock_inode_cachep == NULL) |
327 | return -ENOMEM; | 287 | return -ENOMEM; |
328 | return 0; | 288 | return 0; |
@@ -335,7 +295,8 @@ static struct super_operations sockfs_ops = { | |||
335 | }; | 295 | }; |
336 | 296 | ||
337 | static int sockfs_get_sb(struct file_system_type *fs_type, | 297 | static int sockfs_get_sb(struct file_system_type *fs_type, |
338 | int flags, const char *dev_name, void *data, struct vfsmount *mnt) | 298 | int flags, const char *dev_name, void *data, |
299 | struct vfsmount *mnt) | ||
339 | { | 300 | { |
340 | return get_sb_pseudo(fs_type, "socket:", &sockfs_ops, SOCKFS_MAGIC, | 301 | return get_sb_pseudo(fs_type, "socket:", &sockfs_ops, SOCKFS_MAGIC, |
341 | mnt); | 302 | mnt); |
@@ -348,12 +309,13 @@ static struct file_system_type sock_fs_type = { | |||
348 | .get_sb = sockfs_get_sb, | 309 | .get_sb = sockfs_get_sb, |
349 | .kill_sb = kill_anon_super, | 310 | .kill_sb = kill_anon_super, |
350 | }; | 311 | }; |
312 | |||
351 | static int sockfs_delete_dentry(struct dentry *dentry) | 313 | static int sockfs_delete_dentry(struct dentry *dentry) |
352 | { | 314 | { |
353 | return 1; | 315 | return 1; |
354 | } | 316 | } |
355 | static struct dentry_operations sockfs_dentry_operations = { | 317 | static struct dentry_operations sockfs_dentry_operations = { |
356 | .d_delete = sockfs_delete_dentry, | 318 | .d_delete = sockfs_delete_dentry, |
357 | }; | 319 | }; |
358 | 320 | ||
359 | /* | 321 | /* |
@@ -477,10 +439,12 @@ struct socket *sockfd_lookup(int fd, int *err) | |||
477 | struct file *file; | 439 | struct file *file; |
478 | struct socket *sock; | 440 | struct socket *sock; |
479 | 441 | ||
480 | if (!(file = fget(fd))) { | 442 | file = fget(fd); |
443 | if (!file) { | ||
481 | *err = -EBADF; | 444 | *err = -EBADF; |
482 | return NULL; | 445 | return NULL; |
483 | } | 446 | } |
447 | |||
484 | sock = sock_from_file(file, err); | 448 | sock = sock_from_file(file, err); |
485 | if (!sock) | 449 | if (!sock) |
486 | fput(file); | 450 | fput(file); |
@@ -505,7 +469,7 @@ static struct socket *sockfd_lookup_light(int fd, int *err, int *fput_needed) | |||
505 | 469 | ||
506 | /** | 470 | /** |
507 | * sock_alloc - allocate a socket | 471 | * sock_alloc - allocate a socket |
508 | * | 472 | * |
509 | * Allocate a new inode and socket object. The two are bound together | 473 | * Allocate a new inode and socket object. The two are bound together |
510 | * and initialised. The socket is then returned. If we are out of inodes | 474 | * and initialised. The socket is then returned. If we are out of inodes |
511 | * NULL is returned. | 475 | * NULL is returned. |
@@ -513,8 +477,8 @@ static struct socket *sockfd_lookup_light(int fd, int *err, int *fput_needed) | |||
513 | 477 | ||
514 | static struct socket *sock_alloc(void) | 478 | static struct socket *sock_alloc(void) |
515 | { | 479 | { |
516 | struct inode * inode; | 480 | struct inode *inode; |
517 | struct socket * sock; | 481 | struct socket *sock; |
518 | 482 | ||
519 | inode = new_inode(sock_mnt->mnt_sb); | 483 | inode = new_inode(sock_mnt->mnt_sb); |
520 | if (!inode) | 484 | if (!inode) |
@@ -522,7 +486,7 @@ static struct socket *sock_alloc(void) | |||
522 | 486 | ||
523 | sock = SOCKET_I(inode); | 487 | sock = SOCKET_I(inode); |
524 | 488 | ||
525 | inode->i_mode = S_IFSOCK|S_IRWXUGO; | 489 | inode->i_mode = S_IFSOCK | S_IRWXUGO; |
526 | inode->i_uid = current->fsuid; | 490 | inode->i_uid = current->fsuid; |
527 | inode->i_gid = current->fsgid; | 491 | inode->i_gid = current->fsgid; |
528 | 492 | ||
@@ -536,7 +500,7 @@ static struct socket *sock_alloc(void) | |||
536 | * a back door. Remember to keep it shut otherwise you'll let the | 500 | * a back door. Remember to keep it shut otherwise you'll let the |
537 | * creepy crawlies in. | 501 | * creepy crawlies in. |
538 | */ | 502 | */ |
539 | 503 | ||
540 | static int sock_no_open(struct inode *irrelevant, struct file *dontcare) | 504 | static int sock_no_open(struct inode *irrelevant, struct file *dontcare) |
541 | { | 505 | { |
542 | return -ENXIO; | 506 | return -ENXIO; |
@@ -553,9 +517,9 @@ const struct file_operations bad_sock_fops = { | |||
553 | * | 517 | * |
554 | * The socket is released from the protocol stack if it has a release | 518 | * The socket is released from the protocol stack if it has a release |
555 | * callback, and the inode is then released if the socket is bound to | 519 | * callback, and the inode is then released if the socket is bound to |
556 | * an inode not a file. | 520 | * an inode not a file. |
557 | */ | 521 | */ |
558 | 522 | ||
559 | void sock_release(struct socket *sock) | 523 | void sock_release(struct socket *sock) |
560 | { | 524 | { |
561 | if (sock->ops) { | 525 | if (sock->ops) { |
@@ -575,10 +539,10 @@ void sock_release(struct socket *sock) | |||
575 | iput(SOCK_INODE(sock)); | 539 | iput(SOCK_INODE(sock)); |
576 | return; | 540 | return; |
577 | } | 541 | } |
578 | sock->file=NULL; | 542 | sock->file = NULL; |
579 | } | 543 | } |
580 | 544 | ||
581 | static inline int __sock_sendmsg(struct kiocb *iocb, struct socket *sock, | 545 | static inline int __sock_sendmsg(struct kiocb *iocb, struct socket *sock, |
582 | struct msghdr *msg, size_t size) | 546 | struct msghdr *msg, size_t size) |
583 | { | 547 | { |
584 | struct sock_iocb *si = kiocb_to_siocb(iocb); | 548 | struct sock_iocb *si = kiocb_to_siocb(iocb); |
@@ -621,14 +585,14 @@ int kernel_sendmsg(struct socket *sock, struct msghdr *msg, | |||
621 | * the following is safe, since for compiler definitions of kvec and | 585 | * the following is safe, since for compiler definitions of kvec and |
622 | * iovec are identical, yielding the same in-core layout and alignment | 586 | * iovec are identical, yielding the same in-core layout and alignment |
623 | */ | 587 | */ |
624 | msg->msg_iov = (struct iovec *)vec, | 588 | msg->msg_iov = (struct iovec *)vec; |
625 | msg->msg_iovlen = num; | 589 | msg->msg_iovlen = num; |
626 | result = sock_sendmsg(sock, msg, size); | 590 | result = sock_sendmsg(sock, msg, size); |
627 | set_fs(oldfs); | 591 | set_fs(oldfs); |
628 | return result; | 592 | return result; |
629 | } | 593 | } |
630 | 594 | ||
631 | static inline int __sock_recvmsg(struct kiocb *iocb, struct socket *sock, | 595 | static inline int __sock_recvmsg(struct kiocb *iocb, struct socket *sock, |
632 | struct msghdr *msg, size_t size, int flags) | 596 | struct msghdr *msg, size_t size, int flags) |
633 | { | 597 | { |
634 | int err; | 598 | int err; |
@@ -647,14 +611,14 @@ static inline int __sock_recvmsg(struct kiocb *iocb, struct socket *sock, | |||
647 | return sock->ops->recvmsg(iocb, sock, msg, size, flags); | 611 | return sock->ops->recvmsg(iocb, sock, msg, size, flags); |
648 | } | 612 | } |
649 | 613 | ||
650 | int sock_recvmsg(struct socket *sock, struct msghdr *msg, | 614 | int sock_recvmsg(struct socket *sock, struct msghdr *msg, |
651 | size_t size, int flags) | 615 | size_t size, int flags) |
652 | { | 616 | { |
653 | struct kiocb iocb; | 617 | struct kiocb iocb; |
654 | struct sock_iocb siocb; | 618 | struct sock_iocb siocb; |
655 | int ret; | 619 | int ret; |
656 | 620 | ||
657 | init_sync_kiocb(&iocb, NULL); | 621 | init_sync_kiocb(&iocb, NULL); |
658 | iocb.private = &siocb; | 622 | iocb.private = &siocb; |
659 | ret = __sock_recvmsg(&iocb, sock, msg, size, flags); | 623 | ret = __sock_recvmsg(&iocb, sock, msg, size, flags); |
660 | if (-EIOCBQUEUED == ret) | 624 | if (-EIOCBQUEUED == ret) |
@@ -662,9 +626,8 @@ int sock_recvmsg(struct socket *sock, struct msghdr *msg, | |||
662 | return ret; | 626 | return ret; |
663 | } | 627 | } |
664 | 628 | ||
665 | int kernel_recvmsg(struct socket *sock, struct msghdr *msg, | 629 | int kernel_recvmsg(struct socket *sock, struct msghdr *msg, |
666 | struct kvec *vec, size_t num, | 630 | struct kvec *vec, size_t num, size_t size, int flags) |
667 | size_t size, int flags) | ||
668 | { | 631 | { |
669 | mm_segment_t oldfs = get_fs(); | 632 | mm_segment_t oldfs = get_fs(); |
670 | int result; | 633 | int result; |
@@ -674,8 +637,7 @@ int kernel_recvmsg(struct socket *sock, struct msghdr *msg, | |||
674 | * the following is safe, since for compiler definitions of kvec and | 637 | * the following is safe, since for compiler definitions of kvec and |
675 | * iovec are identical, yielding the same in-core layout and alignment | 638 | * iovec are identical, yielding the same in-core layout and alignment |
676 | */ | 639 | */ |
677 | msg->msg_iov = (struct iovec *)vec, | 640 | msg->msg_iov = (struct iovec *)vec, msg->msg_iovlen = num; |
678 | msg->msg_iovlen = num; | ||
679 | result = sock_recvmsg(sock, msg, size, flags); | 641 | result = sock_recvmsg(sock, msg, size, flags); |
680 | set_fs(oldfs); | 642 | set_fs(oldfs); |
681 | return result; | 643 | return result; |
@@ -702,7 +664,8 @@ static ssize_t sock_sendpage(struct file *file, struct page *page, | |||
702 | } | 664 | } |
703 | 665 | ||
704 | static struct sock_iocb *alloc_sock_iocb(struct kiocb *iocb, | 666 | static struct sock_iocb *alloc_sock_iocb(struct kiocb *iocb, |
705 | char __user *ubuf, size_t size, struct sock_iocb *siocb) | 667 | char __user *ubuf, size_t size, |
668 | struct sock_iocb *siocb) | ||
706 | { | 669 | { |
707 | if (!is_sync_kiocb(iocb)) { | 670 | if (!is_sync_kiocb(iocb)) { |
708 | siocb = kmalloc(sizeof(*siocb), GFP_KERNEL); | 671 | siocb = kmalloc(sizeof(*siocb), GFP_KERNEL); |
@@ -720,20 +683,21 @@ static struct sock_iocb *alloc_sock_iocb(struct kiocb *iocb, | |||
720 | } | 683 | } |
721 | 684 | ||
722 | static ssize_t do_sock_read(struct msghdr *msg, struct kiocb *iocb, | 685 | static ssize_t do_sock_read(struct msghdr *msg, struct kiocb *iocb, |
723 | struct file *file, struct iovec *iov, unsigned long nr_segs) | 686 | struct file *file, struct iovec *iov, |
687 | unsigned long nr_segs) | ||
724 | { | 688 | { |
725 | struct socket *sock = file->private_data; | 689 | struct socket *sock = file->private_data; |
726 | size_t size = 0; | 690 | size_t size = 0; |
727 | int i; | 691 | int i; |
728 | 692 | ||
729 | for (i = 0 ; i < nr_segs ; i++) | 693 | for (i = 0; i < nr_segs; i++) |
730 | size += iov[i].iov_len; | 694 | size += iov[i].iov_len; |
731 | 695 | ||
732 | msg->msg_name = NULL; | 696 | msg->msg_name = NULL; |
733 | msg->msg_namelen = 0; | 697 | msg->msg_namelen = 0; |
734 | msg->msg_control = NULL; | 698 | msg->msg_control = NULL; |
735 | msg->msg_controllen = 0; | 699 | msg->msg_controllen = 0; |
736 | msg->msg_iov = (struct iovec *) iov; | 700 | msg->msg_iov = (struct iovec *)iov; |
737 | msg->msg_iovlen = nr_segs; | 701 | msg->msg_iovlen = nr_segs; |
738 | msg->msg_flags = (file->f_flags & O_NONBLOCK) ? MSG_DONTWAIT : 0; | 702 | msg->msg_flags = (file->f_flags & O_NONBLOCK) ? MSG_DONTWAIT : 0; |
739 | 703 | ||
@@ -748,7 +712,7 @@ static ssize_t sock_readv(struct file *file, const struct iovec *iov, | |||
748 | struct msghdr msg; | 712 | struct msghdr msg; |
749 | int ret; | 713 | int ret; |
750 | 714 | ||
751 | init_sync_kiocb(&iocb, NULL); | 715 | init_sync_kiocb(&iocb, NULL); |
752 | iocb.private = &siocb; | 716 | iocb.private = &siocb; |
753 | 717 | ||
754 | ret = do_sock_read(&msg, &iocb, file, (struct iovec *)iov, nr_segs); | 718 | ret = do_sock_read(&msg, &iocb, file, (struct iovec *)iov, nr_segs); |
@@ -758,7 +722,7 @@ static ssize_t sock_readv(struct file *file, const struct iovec *iov, | |||
758 | } | 722 | } |
759 | 723 | ||
760 | static ssize_t sock_aio_read(struct kiocb *iocb, char __user *ubuf, | 724 | static ssize_t sock_aio_read(struct kiocb *iocb, char __user *ubuf, |
761 | size_t count, loff_t pos) | 725 | size_t count, loff_t pos) |
762 | { | 726 | { |
763 | struct sock_iocb siocb, *x; | 727 | struct sock_iocb siocb, *x; |
764 | 728 | ||
@@ -771,24 +735,25 @@ static ssize_t sock_aio_read(struct kiocb *iocb, char __user *ubuf, | |||
771 | if (!x) | 735 | if (!x) |
772 | return -ENOMEM; | 736 | return -ENOMEM; |
773 | return do_sock_read(&x->async_msg, iocb, iocb->ki_filp, | 737 | return do_sock_read(&x->async_msg, iocb, iocb->ki_filp, |
774 | &x->async_iov, 1); | 738 | &x->async_iov, 1); |
775 | } | 739 | } |
776 | 740 | ||
777 | static ssize_t do_sock_write(struct msghdr *msg, struct kiocb *iocb, | 741 | static ssize_t do_sock_write(struct msghdr *msg, struct kiocb *iocb, |
778 | struct file *file, struct iovec *iov, unsigned long nr_segs) | 742 | struct file *file, struct iovec *iov, |
743 | unsigned long nr_segs) | ||
779 | { | 744 | { |
780 | struct socket *sock = file->private_data; | 745 | struct socket *sock = file->private_data; |
781 | size_t size = 0; | 746 | size_t size = 0; |
782 | int i; | 747 | int i; |
783 | 748 | ||
784 | for (i = 0 ; i < nr_segs ; i++) | 749 | for (i = 0; i < nr_segs; i++) |
785 | size += iov[i].iov_len; | 750 | size += iov[i].iov_len; |
786 | 751 | ||
787 | msg->msg_name = NULL; | 752 | msg->msg_name = NULL; |
788 | msg->msg_namelen = 0; | 753 | msg->msg_namelen = 0; |
789 | msg->msg_control = NULL; | 754 | msg->msg_control = NULL; |
790 | msg->msg_controllen = 0; | 755 | msg->msg_controllen = 0; |
791 | msg->msg_iov = (struct iovec *) iov; | 756 | msg->msg_iov = (struct iovec *)iov; |
792 | msg->msg_iovlen = nr_segs; | 757 | msg->msg_iovlen = nr_segs; |
793 | msg->msg_flags = (file->f_flags & O_NONBLOCK) ? MSG_DONTWAIT : 0; | 758 | msg->msg_flags = (file->f_flags & O_NONBLOCK) ? MSG_DONTWAIT : 0; |
794 | if (sock->type == SOCK_SEQPACKET) | 759 | if (sock->type == SOCK_SEQPACKET) |
@@ -815,7 +780,7 @@ static ssize_t sock_writev(struct file *file, const struct iovec *iov, | |||
815 | } | 780 | } |
816 | 781 | ||
817 | static ssize_t sock_aio_write(struct kiocb *iocb, const char __user *ubuf, | 782 | static ssize_t sock_aio_write(struct kiocb *iocb, const char __user *ubuf, |
818 | size_t count, loff_t pos) | 783 | size_t count, loff_t pos) |
819 | { | 784 | { |
820 | struct sock_iocb siocb, *x; | 785 | struct sock_iocb siocb, *x; |
821 | 786 | ||
@@ -829,46 +794,48 @@ static ssize_t sock_aio_write(struct kiocb *iocb, const char __user *ubuf, | |||
829 | return -ENOMEM; | 794 | return -ENOMEM; |
830 | 795 | ||
831 | return do_sock_write(&x->async_msg, iocb, iocb->ki_filp, | 796 | return do_sock_write(&x->async_msg, iocb, iocb->ki_filp, |
832 | &x->async_iov, 1); | 797 | &x->async_iov, 1); |
833 | } | 798 | } |
834 | 799 | ||
835 | |||
836 | /* | 800 | /* |
837 | * Atomic setting of ioctl hooks to avoid race | 801 | * Atomic setting of ioctl hooks to avoid race |
838 | * with module unload. | 802 | * with module unload. |
839 | */ | 803 | */ |
840 | 804 | ||
841 | static DEFINE_MUTEX(br_ioctl_mutex); | 805 | static DEFINE_MUTEX(br_ioctl_mutex); |
842 | static int (*br_ioctl_hook)(unsigned int cmd, void __user *arg) = NULL; | 806 | static int (*br_ioctl_hook) (unsigned int cmd, void __user *arg) = NULL; |
843 | 807 | ||
844 | void brioctl_set(int (*hook)(unsigned int, void __user *)) | 808 | void brioctl_set(int (*hook) (unsigned int, void __user *)) |
845 | { | 809 | { |
846 | mutex_lock(&br_ioctl_mutex); | 810 | mutex_lock(&br_ioctl_mutex); |
847 | br_ioctl_hook = hook; | 811 | br_ioctl_hook = hook; |
848 | mutex_unlock(&br_ioctl_mutex); | 812 | mutex_unlock(&br_ioctl_mutex); |
849 | } | 813 | } |
814 | |||
850 | EXPORT_SYMBOL(brioctl_set); | 815 | EXPORT_SYMBOL(brioctl_set); |
851 | 816 | ||
852 | static DEFINE_MUTEX(vlan_ioctl_mutex); | 817 | static DEFINE_MUTEX(vlan_ioctl_mutex); |
853 | static int (*vlan_ioctl_hook)(void __user *arg); | 818 | static int (*vlan_ioctl_hook) (void __user *arg); |
854 | 819 | ||
855 | void vlan_ioctl_set(int (*hook)(void __user *)) | 820 | void vlan_ioctl_set(int (*hook) (void __user *)) |
856 | { | 821 | { |
857 | mutex_lock(&vlan_ioctl_mutex); | 822 | mutex_lock(&vlan_ioctl_mutex); |
858 | vlan_ioctl_hook = hook; | 823 | vlan_ioctl_hook = hook; |
859 | mutex_unlock(&vlan_ioctl_mutex); | 824 | mutex_unlock(&vlan_ioctl_mutex); |
860 | } | 825 | } |
826 | |||
861 | EXPORT_SYMBOL(vlan_ioctl_set); | 827 | EXPORT_SYMBOL(vlan_ioctl_set); |
862 | 828 | ||
863 | static DEFINE_MUTEX(dlci_ioctl_mutex); | 829 | static DEFINE_MUTEX(dlci_ioctl_mutex); |
864 | static int (*dlci_ioctl_hook)(unsigned int, void __user *); | 830 | static int (*dlci_ioctl_hook) (unsigned int, void __user *); |
865 | 831 | ||
866 | void dlci_ioctl_set(int (*hook)(unsigned int, void __user *)) | 832 | void dlci_ioctl_set(int (*hook) (unsigned int, void __user *)) |
867 | { | 833 | { |
868 | mutex_lock(&dlci_ioctl_mutex); | 834 | mutex_lock(&dlci_ioctl_mutex); |
869 | dlci_ioctl_hook = hook; | 835 | dlci_ioctl_hook = hook; |
870 | mutex_unlock(&dlci_ioctl_mutex); | 836 | mutex_unlock(&dlci_ioctl_mutex); |
871 | } | 837 | } |
838 | |||
872 | EXPORT_SYMBOL(dlci_ioctl_set); | 839 | EXPORT_SYMBOL(dlci_ioctl_set); |
873 | 840 | ||
874 | /* | 841 | /* |
@@ -890,8 +857,8 @@ static long sock_ioctl(struct file *file, unsigned cmd, unsigned long arg) | |||
890 | if (cmd >= SIOCIWFIRST && cmd <= SIOCIWLAST) { | 857 | if (cmd >= SIOCIWFIRST && cmd <= SIOCIWLAST) { |
891 | err = dev_ioctl(cmd, argp); | 858 | err = dev_ioctl(cmd, argp); |
892 | } else | 859 | } else |
893 | #endif /* CONFIG_WIRELESS_EXT */ | 860 | #endif /* CONFIG_WIRELESS_EXT */ |
894 | switch (cmd) { | 861 | switch (cmd) { |
895 | case FIOSETOWN: | 862 | case FIOSETOWN: |
896 | case SIOCSPGRP: | 863 | case SIOCSPGRP: |
897 | err = -EFAULT; | 864 | err = -EFAULT; |
@@ -901,7 +868,8 @@ static long sock_ioctl(struct file *file, unsigned cmd, unsigned long arg) | |||
901 | break; | 868 | break; |
902 | case FIOGETOWN: | 869 | case FIOGETOWN: |
903 | case SIOCGPGRP: | 870 | case SIOCGPGRP: |
904 | err = put_user(sock->file->f_owner.pid, (int __user *)argp); | 871 | err = put_user(sock->file->f_owner.pid, |
872 | (int __user *)argp); | ||
905 | break; | 873 | break; |
906 | case SIOCGIFBR: | 874 | case SIOCGIFBR: |
907 | case SIOCSIFBR: | 875 | case SIOCSIFBR: |
@@ -912,7 +880,7 @@ static long sock_ioctl(struct file *file, unsigned cmd, unsigned long arg) | |||
912 | request_module("bridge"); | 880 | request_module("bridge"); |
913 | 881 | ||
914 | mutex_lock(&br_ioctl_mutex); | 882 | mutex_lock(&br_ioctl_mutex); |
915 | if (br_ioctl_hook) | 883 | if (br_ioctl_hook) |
916 | err = br_ioctl_hook(cmd, argp); | 884 | err = br_ioctl_hook(cmd, argp); |
917 | mutex_unlock(&br_ioctl_mutex); | 885 | mutex_unlock(&br_ioctl_mutex); |
918 | break; | 886 | break; |
@@ -929,7 +897,7 @@ static long sock_ioctl(struct file *file, unsigned cmd, unsigned long arg) | |||
929 | break; | 897 | break; |
930 | case SIOCGIFDIVERT: | 898 | case SIOCGIFDIVERT: |
931 | case SIOCSIFDIVERT: | 899 | case SIOCSIFDIVERT: |
932 | /* Convert this to call through a hook */ | 900 | /* Convert this to call through a hook */ |
933 | err = divert_ioctl(cmd, argp); | 901 | err = divert_ioctl(cmd, argp); |
934 | break; | 902 | break; |
935 | case SIOCADDDLCI: | 903 | case SIOCADDDLCI: |
@@ -954,7 +922,7 @@ static long sock_ioctl(struct file *file, unsigned cmd, unsigned long arg) | |||
954 | if (err == -ENOIOCTLCMD) | 922 | if (err == -ENOIOCTLCMD) |
955 | err = dev_ioctl(cmd, argp); | 923 | err = dev_ioctl(cmd, argp); |
956 | break; | 924 | break; |
957 | } | 925 | } |
958 | return err; | 926 | return err; |
959 | } | 927 | } |
960 | 928 | ||
@@ -962,7 +930,7 @@ int sock_create_lite(int family, int type, int protocol, struct socket **res) | |||
962 | { | 930 | { |
963 | int err; | 931 | int err; |
964 | struct socket *sock = NULL; | 932 | struct socket *sock = NULL; |
965 | 933 | ||
966 | err = security_socket_create(family, type, protocol, 1); | 934 | err = security_socket_create(family, type, protocol, 1); |
967 | if (err) | 935 | if (err) |
968 | goto out; | 936 | goto out; |
@@ -973,26 +941,33 @@ int sock_create_lite(int family, int type, int protocol, struct socket **res) | |||
973 | goto out; | 941 | goto out; |
974 | } | 942 | } |
975 | 943 | ||
976 | security_socket_post_create(sock, family, type, protocol, 1); | ||
977 | sock->type = type; | 944 | sock->type = type; |
945 | err = security_socket_post_create(sock, family, type, protocol, 1); | ||
946 | if (err) | ||
947 | goto out_release; | ||
948 | |||
978 | out: | 949 | out: |
979 | *res = sock; | 950 | *res = sock; |
980 | return err; | 951 | return err; |
952 | out_release: | ||
953 | sock_release(sock); | ||
954 | sock = NULL; | ||
955 | goto out; | ||
981 | } | 956 | } |
982 | 957 | ||
983 | /* No kernel lock held - perfect */ | 958 | /* No kernel lock held - perfect */ |
984 | static unsigned int sock_poll(struct file *file, poll_table * wait) | 959 | static unsigned int sock_poll(struct file *file, poll_table *wait) |
985 | { | 960 | { |
986 | struct socket *sock; | 961 | struct socket *sock; |
987 | 962 | ||
988 | /* | 963 | /* |
989 | * We can't return errors to poll, so it's either yes or no. | 964 | * We can't return errors to poll, so it's either yes or no. |
990 | */ | 965 | */ |
991 | sock = file->private_data; | 966 | sock = file->private_data; |
992 | return sock->ops->poll(file, sock, wait); | 967 | return sock->ops->poll(file, sock, wait); |
993 | } | 968 | } |
994 | 969 | ||
995 | static int sock_mmap(struct file * file, struct vm_area_struct * vma) | 970 | static int sock_mmap(struct file *file, struct vm_area_struct *vma) |
996 | { | 971 | { |
997 | struct socket *sock = file->private_data; | 972 | struct socket *sock = file->private_data; |
998 | 973 | ||
@@ -1002,12 +977,11 @@ static int sock_mmap(struct file * file, struct vm_area_struct * vma) | |||
1002 | static int sock_close(struct inode *inode, struct file *filp) | 977 | static int sock_close(struct inode *inode, struct file *filp) |
1003 | { | 978 | { |
1004 | /* | 979 | /* |
1005 | * It was possible the inode is NULL we were | 980 | * It was possible the inode is NULL we were |
1006 | * closing an unfinished socket. | 981 | * closing an unfinished socket. |
1007 | */ | 982 | */ |
1008 | 983 | ||
1009 | if (!inode) | 984 | if (!inode) { |
1010 | { | ||
1011 | printk(KERN_DEBUG "sock_close: NULL inode\n"); | 985 | printk(KERN_DEBUG "sock_close: NULL inode\n"); |
1012 | return 0; | 986 | return 0; |
1013 | } | 987 | } |
@@ -1033,57 +1007,52 @@ static int sock_close(struct inode *inode, struct file *filp) | |||
1033 | 1007 | ||
1034 | static int sock_fasync(int fd, struct file *filp, int on) | 1008 | static int sock_fasync(int fd, struct file *filp, int on) |
1035 | { | 1009 | { |
1036 | struct fasync_struct *fa, *fna=NULL, **prev; | 1010 | struct fasync_struct *fa, *fna = NULL, **prev; |
1037 | struct socket *sock; | 1011 | struct socket *sock; |
1038 | struct sock *sk; | 1012 | struct sock *sk; |
1039 | 1013 | ||
1040 | if (on) | 1014 | if (on) { |
1041 | { | ||
1042 | fna = kmalloc(sizeof(struct fasync_struct), GFP_KERNEL); | 1015 | fna = kmalloc(sizeof(struct fasync_struct), GFP_KERNEL); |
1043 | if(fna==NULL) | 1016 | if (fna == NULL) |
1044 | return -ENOMEM; | 1017 | return -ENOMEM; |
1045 | } | 1018 | } |
1046 | 1019 | ||
1047 | sock = filp->private_data; | 1020 | sock = filp->private_data; |
1048 | 1021 | ||
1049 | if ((sk=sock->sk) == NULL) { | 1022 | sk = sock->sk; |
1023 | if (sk == NULL) { | ||
1050 | kfree(fna); | 1024 | kfree(fna); |
1051 | return -EINVAL; | 1025 | return -EINVAL; |
1052 | } | 1026 | } |
1053 | 1027 | ||
1054 | lock_sock(sk); | 1028 | lock_sock(sk); |
1055 | 1029 | ||
1056 | prev=&(sock->fasync_list); | 1030 | prev = &(sock->fasync_list); |
1057 | 1031 | ||
1058 | for (fa=*prev; fa!=NULL; prev=&fa->fa_next,fa=*prev) | 1032 | for (fa = *prev; fa != NULL; prev = &fa->fa_next, fa = *prev) |
1059 | if (fa->fa_file==filp) | 1033 | if (fa->fa_file == filp) |
1060 | break; | 1034 | break; |
1061 | 1035 | ||
1062 | if(on) | 1036 | if (on) { |
1063 | { | 1037 | if (fa != NULL) { |
1064 | if(fa!=NULL) | ||
1065 | { | ||
1066 | write_lock_bh(&sk->sk_callback_lock); | 1038 | write_lock_bh(&sk->sk_callback_lock); |
1067 | fa->fa_fd=fd; | 1039 | fa->fa_fd = fd; |
1068 | write_unlock_bh(&sk->sk_callback_lock); | 1040 | write_unlock_bh(&sk->sk_callback_lock); |
1069 | 1041 | ||
1070 | kfree(fna); | 1042 | kfree(fna); |
1071 | goto out; | 1043 | goto out; |
1072 | } | 1044 | } |
1073 | fna->fa_file=filp; | 1045 | fna->fa_file = filp; |
1074 | fna->fa_fd=fd; | 1046 | fna->fa_fd = fd; |
1075 | fna->magic=FASYNC_MAGIC; | 1047 | fna->magic = FASYNC_MAGIC; |
1076 | fna->fa_next=sock->fasync_list; | 1048 | fna->fa_next = sock->fasync_list; |
1077 | write_lock_bh(&sk->sk_callback_lock); | 1049 | write_lock_bh(&sk->sk_callback_lock); |
1078 | sock->fasync_list=fna; | 1050 | sock->fasync_list = fna; |
1079 | write_unlock_bh(&sk->sk_callback_lock); | 1051 | write_unlock_bh(&sk->sk_callback_lock); |
1080 | } | 1052 | } else { |
1081 | else | 1053 | if (fa != NULL) { |
1082 | { | ||
1083 | if (fa!=NULL) | ||
1084 | { | ||
1085 | write_lock_bh(&sk->sk_callback_lock); | 1054 | write_lock_bh(&sk->sk_callback_lock); |
1086 | *prev=fa->fa_next; | 1055 | *prev = fa->fa_next; |
1087 | write_unlock_bh(&sk->sk_callback_lock); | 1056 | write_unlock_bh(&sk->sk_callback_lock); |
1088 | kfree(fa); | 1057 | kfree(fa); |
1089 | } | 1058 | } |
@@ -1100,10 +1069,9 @@ int sock_wake_async(struct socket *sock, int how, int band) | |||
1100 | { | 1069 | { |
1101 | if (!sock || !sock->fasync_list) | 1070 | if (!sock || !sock->fasync_list) |
1102 | return -1; | 1071 | return -1; |
1103 | switch (how) | 1072 | switch (how) { |
1104 | { | ||
1105 | case 1: | 1073 | case 1: |
1106 | 1074 | ||
1107 | if (test_bit(SOCK_ASYNC_WAITDATA, &sock->flags)) | 1075 | if (test_bit(SOCK_ASYNC_WAITDATA, &sock->flags)) |
1108 | break; | 1076 | break; |
1109 | goto call_kill; | 1077 | goto call_kill; |
@@ -1112,7 +1080,7 @@ int sock_wake_async(struct socket *sock, int how, int band) | |||
1112 | break; | 1080 | break; |
1113 | /* fall through */ | 1081 | /* fall through */ |
1114 | case 0: | 1082 | case 0: |
1115 | call_kill: | 1083 | call_kill: |
1116 | __kill_fasync(sock->fasync_list, SIGIO, band); | 1084 | __kill_fasync(sock->fasync_list, SIGIO, band); |
1117 | break; | 1085 | break; |
1118 | case 3: | 1086 | case 3: |
@@ -1121,13 +1089,15 @@ int sock_wake_async(struct socket *sock, int how, int band) | |||
1121 | return 0; | 1089 | return 0; |
1122 | } | 1090 | } |
1123 | 1091 | ||
1124 | static int __sock_create(int family, int type, int protocol, struct socket **res, int kern) | 1092 | static int __sock_create(int family, int type, int protocol, |
1093 | struct socket **res, int kern) | ||
1125 | { | 1094 | { |
1126 | int err; | 1095 | int err; |
1127 | struct socket *sock; | 1096 | struct socket *sock; |
1097 | const struct net_proto_family *pf; | ||
1128 | 1098 | ||
1129 | /* | 1099 | /* |
1130 | * Check protocol is in range | 1100 | * Check protocol is in range |
1131 | */ | 1101 | */ |
1132 | if (family < 0 || family >= NPROTO) | 1102 | if (family < 0 || family >= NPROTO) |
1133 | return -EAFNOSUPPORT; | 1103 | return -EAFNOSUPPORT; |
@@ -1140,10 +1110,11 @@ static int __sock_create(int family, int type, int protocol, struct socket **res | |||
1140 | deadlock in module load. | 1110 | deadlock in module load. |
1141 | */ | 1111 | */ |
1142 | if (family == PF_INET && type == SOCK_PACKET) { | 1112 | if (family == PF_INET && type == SOCK_PACKET) { |
1143 | static int warned; | 1113 | static int warned; |
1144 | if (!warned) { | 1114 | if (!warned) { |
1145 | warned = 1; | 1115 | warned = 1; |
1146 | printk(KERN_INFO "%s uses obsolete (PF_INET,SOCK_PACKET)\n", current->comm); | 1116 | printk(KERN_INFO "%s uses obsolete (PF_INET,SOCK_PACKET)\n", |
1117 | current->comm); | ||
1147 | } | 1118 | } |
1148 | family = PF_PACKET; | 1119 | family = PF_PACKET; |
1149 | } | 1120 | } |
@@ -1151,79 +1122,84 @@ static int __sock_create(int family, int type, int protocol, struct socket **res | |||
1151 | err = security_socket_create(family, type, protocol, kern); | 1122 | err = security_socket_create(family, type, protocol, kern); |
1152 | if (err) | 1123 | if (err) |
1153 | return err; | 1124 | return err; |
1154 | 1125 | ||
1126 | /* | ||
1127 | * Allocate the socket and allow the family to set things up. if | ||
1128 | * the protocol is 0, the family is instructed to select an appropriate | ||
1129 | * default. | ||
1130 | */ | ||
1131 | sock = sock_alloc(); | ||
1132 | if (!sock) { | ||
1133 | if (net_ratelimit()) | ||
1134 | printk(KERN_WARNING "socket: no more sockets\n"); | ||
1135 | return -ENFILE; /* Not exactly a match, but its the | ||
1136 | closest posix thing */ | ||
1137 | } | ||
1138 | |||
1139 | sock->type = type; | ||
1140 | |||
1155 | #if defined(CONFIG_KMOD) | 1141 | #if defined(CONFIG_KMOD) |
1156 | /* Attempt to load a protocol module if the find failed. | 1142 | /* Attempt to load a protocol module if the find failed. |
1157 | * | 1143 | * |
1158 | * 12/09/1996 Marcin: But! this makes REALLY only sense, if the user | 1144 | * 12/09/1996 Marcin: But! this makes REALLY only sense, if the user |
1159 | * requested real, full-featured networking support upon configuration. | 1145 | * requested real, full-featured networking support upon configuration. |
1160 | * Otherwise module support will break! | 1146 | * Otherwise module support will break! |
1161 | */ | 1147 | */ |
1162 | if (net_families[family]==NULL) | 1148 | if (net_families[family] == NULL) |
1163 | { | 1149 | request_module("net-pf-%d", family); |
1164 | request_module("net-pf-%d",family); | ||
1165 | } | ||
1166 | #endif | 1150 | #endif |
1167 | 1151 | ||
1168 | net_family_read_lock(); | 1152 | rcu_read_lock(); |
1169 | if (net_families[family] == NULL) { | 1153 | pf = rcu_dereference(net_families[family]); |
1170 | err = -EAFNOSUPPORT; | 1154 | err = -EAFNOSUPPORT; |
1171 | goto out; | 1155 | if (!pf) |
1172 | } | 1156 | goto out_release; |
1173 | |||
1174 | /* | ||
1175 | * Allocate the socket and allow the family to set things up. if | ||
1176 | * the protocol is 0, the family is instructed to select an appropriate | ||
1177 | * default. | ||
1178 | */ | ||
1179 | |||
1180 | if (!(sock = sock_alloc())) { | ||
1181 | if (net_ratelimit()) | ||
1182 | printk(KERN_WARNING "socket: no more sockets\n"); | ||
1183 | err = -ENFILE; /* Not exactly a match, but its the | ||
1184 | closest posix thing */ | ||
1185 | goto out; | ||
1186 | } | ||
1187 | |||
1188 | sock->type = type; | ||
1189 | 1157 | ||
1190 | /* | 1158 | /* |
1191 | * We will call the ->create function, that possibly is in a loadable | 1159 | * We will call the ->create function, that possibly is in a loadable |
1192 | * module, so we have to bump that loadable module refcnt first. | 1160 | * module, so we have to bump that loadable module refcnt first. |
1193 | */ | 1161 | */ |
1194 | err = -EAFNOSUPPORT; | 1162 | if (!try_module_get(pf->owner)) |
1195 | if (!try_module_get(net_families[family]->owner)) | ||
1196 | goto out_release; | 1163 | goto out_release; |
1197 | 1164 | ||
1198 | if ((err = net_families[family]->create(sock, protocol)) < 0) { | 1165 | /* Now protected by module ref count */ |
1199 | sock->ops = NULL; | 1166 | rcu_read_unlock(); |
1167 | |||
1168 | err = pf->create(sock, protocol); | ||
1169 | if (err < 0) | ||
1200 | goto out_module_put; | 1170 | goto out_module_put; |
1201 | } | ||
1202 | 1171 | ||
1203 | /* | 1172 | /* |
1204 | * Now to bump the refcnt of the [loadable] module that owns this | 1173 | * Now to bump the refcnt of the [loadable] module that owns this |
1205 | * socket at sock_release time we decrement its refcnt. | 1174 | * socket at sock_release time we decrement its refcnt. |
1206 | */ | 1175 | */ |
1207 | if (!try_module_get(sock->ops->owner)) { | 1176 | if (!try_module_get(sock->ops->owner)) |
1208 | sock->ops = NULL; | 1177 | goto out_module_busy; |
1209 | goto out_module_put; | 1178 | |
1210 | } | ||
1211 | /* | 1179 | /* |
1212 | * Now that we're done with the ->create function, the [loadable] | 1180 | * Now that we're done with the ->create function, the [loadable] |
1213 | * module can have its refcnt decremented | 1181 | * module can have its refcnt decremented |
1214 | */ | 1182 | */ |
1215 | module_put(net_families[family]->owner); | 1183 | module_put(pf->owner); |
1184 | err = security_socket_post_create(sock, family, type, protocol, kern); | ||
1185 | if (err) | ||
1186 | goto out_release; | ||
1216 | *res = sock; | 1187 | *res = sock; |
1217 | security_socket_post_create(sock, family, type, protocol, kern); | ||
1218 | 1188 | ||
1219 | out: | 1189 | return 0; |
1220 | net_family_read_unlock(); | 1190 | |
1221 | return err; | 1191 | out_module_busy: |
1192 | err = -EAFNOSUPPORT; | ||
1222 | out_module_put: | 1193 | out_module_put: |
1223 | module_put(net_families[family]->owner); | 1194 | sock->ops = NULL; |
1224 | out_release: | 1195 | module_put(pf->owner); |
1196 | out_sock_release: | ||
1225 | sock_release(sock); | 1197 | sock_release(sock); |
1226 | goto out; | 1198 | return err; |
1199 | |||
1200 | out_release: | ||
1201 | rcu_read_unlock(); | ||
1202 | goto out_sock_release; | ||
1227 | } | 1203 | } |
1228 | 1204 | ||
1229 | int sock_create(int family, int type, int protocol, struct socket **res) | 1205 | int sock_create(int family, int type, int protocol, struct socket **res) |
@@ -1262,7 +1238,8 @@ out_release: | |||
1262 | * Create a pair of connected sockets. | 1238 | * Create a pair of connected sockets. |
1263 | */ | 1239 | */ |
1264 | 1240 | ||
1265 | asmlinkage long sys_socketpair(int family, int type, int protocol, int __user *usockvec) | 1241 | asmlinkage long sys_socketpair(int family, int type, int protocol, |
1242 | int __user *usockvec) | ||
1266 | { | 1243 | { |
1267 | struct socket *sock1, *sock2; | 1244 | struct socket *sock1, *sock2; |
1268 | int fd1, fd2, err; | 1245 | int fd1, fd2, err; |
@@ -1281,7 +1258,7 @@ asmlinkage long sys_socketpair(int family, int type, int protocol, int __user *u | |||
1281 | goto out_release_1; | 1258 | goto out_release_1; |
1282 | 1259 | ||
1283 | err = sock1->ops->socketpair(sock1, sock2); | 1260 | err = sock1->ops->socketpair(sock1, sock2); |
1284 | if (err < 0) | 1261 | if (err < 0) |
1285 | goto out_release_both; | 1262 | goto out_release_both; |
1286 | 1263 | ||
1287 | fd1 = fd2 = -1; | 1264 | fd1 = fd2 = -1; |
@@ -1300,7 +1277,7 @@ asmlinkage long sys_socketpair(int family, int type, int protocol, int __user *u | |||
1300 | * Not kernel problem. | 1277 | * Not kernel problem. |
1301 | */ | 1278 | */ |
1302 | 1279 | ||
1303 | err = put_user(fd1, &usockvec[0]); | 1280 | err = put_user(fd1, &usockvec[0]); |
1304 | if (!err) | 1281 | if (!err) |
1305 | err = put_user(fd2, &usockvec[1]); | 1282 | err = put_user(fd2, &usockvec[1]); |
1306 | if (!err) | 1283 | if (!err) |
@@ -1311,19 +1288,18 @@ asmlinkage long sys_socketpair(int family, int type, int protocol, int __user *u | |||
1311 | return err; | 1288 | return err; |
1312 | 1289 | ||
1313 | out_close_1: | 1290 | out_close_1: |
1314 | sock_release(sock2); | 1291 | sock_release(sock2); |
1315 | sys_close(fd1); | 1292 | sys_close(fd1); |
1316 | return err; | 1293 | return err; |
1317 | 1294 | ||
1318 | out_release_both: | 1295 | out_release_both: |
1319 | sock_release(sock2); | 1296 | sock_release(sock2); |
1320 | out_release_1: | 1297 | out_release_1: |
1321 | sock_release(sock1); | 1298 | sock_release(sock1); |
1322 | out: | 1299 | out: |
1323 | return err; | 1300 | return err; |
1324 | } | 1301 | } |
1325 | 1302 | ||
1326 | |||
1327 | /* | 1303 | /* |
1328 | * Bind a name to a socket. Nothing much to do here since it's | 1304 | * Bind a name to a socket. Nothing much to do here since it's |
1329 | * the protocol's responsibility to handle the local address. | 1305 | * the protocol's responsibility to handle the local address. |
@@ -1338,35 +1314,39 @@ asmlinkage long sys_bind(int fd, struct sockaddr __user *umyaddr, int addrlen) | |||
1338 | char address[MAX_SOCK_ADDR]; | 1314 | char address[MAX_SOCK_ADDR]; |
1339 | int err, fput_needed; | 1315 | int err, fput_needed; |
1340 | 1316 | ||
1341 | if((sock = sockfd_lookup_light(fd, &err, &fput_needed))!=NULL) | 1317 | sock = sockfd_lookup_light(fd, &err, &fput_needed); |
1342 | { | 1318 | if(sock) { |
1343 | if((err=move_addr_to_kernel(umyaddr,addrlen,address))>=0) { | 1319 | err = move_addr_to_kernel(umyaddr, addrlen, address); |
1344 | err = security_socket_bind(sock, (struct sockaddr *)address, addrlen); | 1320 | if (err >= 0) { |
1321 | err = security_socket_bind(sock, | ||
1322 | (struct sockaddr *)address, | ||
1323 | addrlen); | ||
1345 | if (!err) | 1324 | if (!err) |
1346 | err = sock->ops->bind(sock, | 1325 | err = sock->ops->bind(sock, |
1347 | (struct sockaddr *)address, addrlen); | 1326 | (struct sockaddr *) |
1327 | address, addrlen); | ||
1348 | } | 1328 | } |
1349 | fput_light(sock->file, fput_needed); | 1329 | fput_light(sock->file, fput_needed); |
1350 | } | 1330 | } |
1351 | return err; | 1331 | return err; |
1352 | } | 1332 | } |
1353 | 1333 | ||
1354 | |||
1355 | /* | 1334 | /* |
1356 | * Perform a listen. Basically, we allow the protocol to do anything | 1335 | * Perform a listen. Basically, we allow the protocol to do anything |
1357 | * necessary for a listen, and if that works, we mark the socket as | 1336 | * necessary for a listen, and if that works, we mark the socket as |
1358 | * ready for listening. | 1337 | * ready for listening. |
1359 | */ | 1338 | */ |
1360 | 1339 | ||
1361 | int sysctl_somaxconn = SOMAXCONN; | 1340 | int sysctl_somaxconn __read_mostly = SOMAXCONN; |
1362 | 1341 | ||
1363 | asmlinkage long sys_listen(int fd, int backlog) | 1342 | asmlinkage long sys_listen(int fd, int backlog) |
1364 | { | 1343 | { |
1365 | struct socket *sock; | 1344 | struct socket *sock; |
1366 | int err, fput_needed; | 1345 | int err, fput_needed; |
1367 | 1346 | ||
1368 | if ((sock = sockfd_lookup_light(fd, &err, &fput_needed)) != NULL) { | 1347 | sock = sockfd_lookup_light(fd, &err, &fput_needed); |
1369 | if ((unsigned) backlog > sysctl_somaxconn) | 1348 | if (sock) { |
1349 | if ((unsigned)backlog > sysctl_somaxconn) | ||
1370 | backlog = sysctl_somaxconn; | 1350 | backlog = sysctl_somaxconn; |
1371 | 1351 | ||
1372 | err = security_socket_listen(sock, backlog); | 1352 | err = security_socket_listen(sock, backlog); |
@@ -1378,7 +1358,6 @@ asmlinkage long sys_listen(int fd, int backlog) | |||
1378 | return err; | 1358 | return err; |
1379 | } | 1359 | } |
1380 | 1360 | ||
1381 | |||
1382 | /* | 1361 | /* |
1383 | * For accept, we attempt to create a new socket, set up the link | 1362 | * For accept, we attempt to create a new socket, set up the link |
1384 | * with the client, wake up the client, then return the new | 1363 | * with the client, wake up the client, then return the new |
@@ -1391,7 +1370,8 @@ asmlinkage long sys_listen(int fd, int backlog) | |||
1391 | * clean when we restucture accept also. | 1370 | * clean when we restucture accept also. |
1392 | */ | 1371 | */ |
1393 | 1372 | ||
1394 | asmlinkage long sys_accept(int fd, struct sockaddr __user *upeer_sockaddr, int __user *upeer_addrlen) | 1373 | asmlinkage long sys_accept(int fd, struct sockaddr __user *upeer_sockaddr, |
1374 | int __user *upeer_addrlen) | ||
1395 | { | 1375 | { |
1396 | struct socket *sock, *newsock; | 1376 | struct socket *sock, *newsock; |
1397 | struct file *newfile; | 1377 | struct file *newfile; |
@@ -1403,7 +1383,7 @@ asmlinkage long sys_accept(int fd, struct sockaddr __user *upeer_sockaddr, int _ | |||
1403 | goto out; | 1383 | goto out; |
1404 | 1384 | ||
1405 | err = -ENFILE; | 1385 | err = -ENFILE; |
1406 | if (!(newsock = sock_alloc())) | 1386 | if (!(newsock = sock_alloc())) |
1407 | goto out_put; | 1387 | goto out_put; |
1408 | 1388 | ||
1409 | newsock->type = sock->type; | 1389 | newsock->type = sock->type; |
@@ -1435,11 +1415,13 @@ asmlinkage long sys_accept(int fd, struct sockaddr __user *upeer_sockaddr, int _ | |||
1435 | goto out_fd; | 1415 | goto out_fd; |
1436 | 1416 | ||
1437 | if (upeer_sockaddr) { | 1417 | if (upeer_sockaddr) { |
1438 | if(newsock->ops->getname(newsock, (struct sockaddr *)address, &len, 2)<0) { | 1418 | if (newsock->ops->getname(newsock, (struct sockaddr *)address, |
1419 | &len, 2) < 0) { | ||
1439 | err = -ECONNABORTED; | 1420 | err = -ECONNABORTED; |
1440 | goto out_fd; | 1421 | goto out_fd; |
1441 | } | 1422 | } |
1442 | err = move_addr_to_user(address, len, upeer_sockaddr, upeer_addrlen); | 1423 | err = move_addr_to_user(address, len, upeer_sockaddr, |
1424 | upeer_addrlen); | ||
1443 | if (err < 0) | 1425 | if (err < 0) |
1444 | goto out_fd; | 1426 | goto out_fd; |
1445 | } | 1427 | } |
@@ -1461,7 +1443,6 @@ out_fd: | |||
1461 | goto out_put; | 1443 | goto out_put; |
1462 | } | 1444 | } |
1463 | 1445 | ||
1464 | |||
1465 | /* | 1446 | /* |
1466 | * Attempt to connect to a socket with the server address. The address | 1447 | * Attempt to connect to a socket with the server address. The address |
1467 | * is in user space so we verify it is OK and move it to kernel space. | 1448 | * is in user space so we verify it is OK and move it to kernel space. |
@@ -1474,7 +1455,8 @@ out_fd: | |||
1474 | * include the -EINPROGRESS status for such sockets. | 1455 | * include the -EINPROGRESS status for such sockets. |
1475 | */ | 1456 | */ |
1476 | 1457 | ||
1477 | asmlinkage long sys_connect(int fd, struct sockaddr __user *uservaddr, int addrlen) | 1458 | asmlinkage long sys_connect(int fd, struct sockaddr __user *uservaddr, |
1459 | int addrlen) | ||
1478 | { | 1460 | { |
1479 | struct socket *sock; | 1461 | struct socket *sock; |
1480 | char address[MAX_SOCK_ADDR]; | 1462 | char address[MAX_SOCK_ADDR]; |
@@ -1487,11 +1469,12 @@ asmlinkage long sys_connect(int fd, struct sockaddr __user *uservaddr, int addrl | |||
1487 | if (err < 0) | 1469 | if (err < 0) |
1488 | goto out_put; | 1470 | goto out_put; |
1489 | 1471 | ||
1490 | err = security_socket_connect(sock, (struct sockaddr *)address, addrlen); | 1472 | err = |
1473 | security_socket_connect(sock, (struct sockaddr *)address, addrlen); | ||
1491 | if (err) | 1474 | if (err) |
1492 | goto out_put; | 1475 | goto out_put; |
1493 | 1476 | ||
1494 | err = sock->ops->connect(sock, (struct sockaddr *) address, addrlen, | 1477 | err = sock->ops->connect(sock, (struct sockaddr *)address, addrlen, |
1495 | sock->file->f_flags); | 1478 | sock->file->f_flags); |
1496 | out_put: | 1479 | out_put: |
1497 | fput_light(sock->file, fput_needed); | 1480 | fput_light(sock->file, fput_needed); |
@@ -1504,12 +1487,13 @@ out: | |||
1504 | * name to user space. | 1487 | * name to user space. |
1505 | */ | 1488 | */ |
1506 | 1489 | ||
1507 | asmlinkage long sys_getsockname(int fd, struct sockaddr __user *usockaddr, int __user *usockaddr_len) | 1490 | asmlinkage long sys_getsockname(int fd, struct sockaddr __user *usockaddr, |
1491 | int __user *usockaddr_len) | ||
1508 | { | 1492 | { |
1509 | struct socket *sock; | 1493 | struct socket *sock; |
1510 | char address[MAX_SOCK_ADDR]; | 1494 | char address[MAX_SOCK_ADDR]; |
1511 | int len, err, fput_needed; | 1495 | int len, err, fput_needed; |
1512 | 1496 | ||
1513 | sock = sockfd_lookup_light(fd, &err, &fput_needed); | 1497 | sock = sockfd_lookup_light(fd, &err, &fput_needed); |
1514 | if (!sock) | 1498 | if (!sock) |
1515 | goto out; | 1499 | goto out; |
@@ -1534,22 +1518,27 @@ out: | |||
1534 | * name to user space. | 1518 | * name to user space. |
1535 | */ | 1519 | */ |
1536 | 1520 | ||
1537 | asmlinkage long sys_getpeername(int fd, struct sockaddr __user *usockaddr, int __user *usockaddr_len) | 1521 | asmlinkage long sys_getpeername(int fd, struct sockaddr __user *usockaddr, |
1522 | int __user *usockaddr_len) | ||
1538 | { | 1523 | { |
1539 | struct socket *sock; | 1524 | struct socket *sock; |
1540 | char address[MAX_SOCK_ADDR]; | 1525 | char address[MAX_SOCK_ADDR]; |
1541 | int len, err, fput_needed; | 1526 | int len, err, fput_needed; |
1542 | 1527 | ||
1543 | if ((sock = sockfd_lookup_light(fd, &err, &fput_needed)) != NULL) { | 1528 | sock = sockfd_lookup_light(fd, &err, &fput_needed); |
1529 | if (sock != NULL) { | ||
1544 | err = security_socket_getpeername(sock); | 1530 | err = security_socket_getpeername(sock); |
1545 | if (err) { | 1531 | if (err) { |
1546 | fput_light(sock->file, fput_needed); | 1532 | fput_light(sock->file, fput_needed); |
1547 | return err; | 1533 | return err; |
1548 | } | 1534 | } |
1549 | 1535 | ||
1550 | err = sock->ops->getname(sock, (struct sockaddr *)address, &len, 1); | 1536 | err = |
1537 | sock->ops->getname(sock, (struct sockaddr *)address, &len, | ||
1538 | 1); | ||
1551 | if (!err) | 1539 | if (!err) |
1552 | err=move_addr_to_user(address,len, usockaddr, usockaddr_len); | 1540 | err = move_addr_to_user(address, len, usockaddr, |
1541 | usockaddr_len); | ||
1553 | fput_light(sock->file, fput_needed); | 1542 | fput_light(sock->file, fput_needed); |
1554 | } | 1543 | } |
1555 | return err; | 1544 | return err; |
@@ -1561,8 +1550,9 @@ asmlinkage long sys_getpeername(int fd, struct sockaddr __user *usockaddr, int _ | |||
1561 | * the protocol. | 1550 | * the protocol. |
1562 | */ | 1551 | */ |
1563 | 1552 | ||
1564 | asmlinkage long sys_sendto(int fd, void __user * buff, size_t len, unsigned flags, | 1553 | asmlinkage long sys_sendto(int fd, void __user *buff, size_t len, |
1565 | struct sockaddr __user *addr, int addr_len) | 1554 | unsigned flags, struct sockaddr __user *addr, |
1555 | int addr_len) | ||
1566 | { | 1556 | { |
1567 | struct socket *sock; | 1557 | struct socket *sock; |
1568 | char address[MAX_SOCK_ADDR]; | 1558 | char address[MAX_SOCK_ADDR]; |
@@ -1579,54 +1569,55 @@ asmlinkage long sys_sendto(int fd, void __user * buff, size_t len, unsigned flag | |||
1579 | sock = sock_from_file(sock_file, &err); | 1569 | sock = sock_from_file(sock_file, &err); |
1580 | if (!sock) | 1570 | if (!sock) |
1581 | goto out_put; | 1571 | goto out_put; |
1582 | iov.iov_base=buff; | 1572 | iov.iov_base = buff; |
1583 | iov.iov_len=len; | 1573 | iov.iov_len = len; |
1584 | msg.msg_name=NULL; | 1574 | msg.msg_name = NULL; |
1585 | msg.msg_iov=&iov; | 1575 | msg.msg_iov = &iov; |
1586 | msg.msg_iovlen=1; | 1576 | msg.msg_iovlen = 1; |
1587 | msg.msg_control=NULL; | 1577 | msg.msg_control = NULL; |
1588 | msg.msg_controllen=0; | 1578 | msg.msg_controllen = 0; |
1589 | msg.msg_namelen=0; | 1579 | msg.msg_namelen = 0; |
1590 | if (addr) { | 1580 | if (addr) { |
1591 | err = move_addr_to_kernel(addr, addr_len, address); | 1581 | err = move_addr_to_kernel(addr, addr_len, address); |
1592 | if (err < 0) | 1582 | if (err < 0) |
1593 | goto out_put; | 1583 | goto out_put; |
1594 | msg.msg_name=address; | 1584 | msg.msg_name = address; |
1595 | msg.msg_namelen=addr_len; | 1585 | msg.msg_namelen = addr_len; |
1596 | } | 1586 | } |
1597 | if (sock->file->f_flags & O_NONBLOCK) | 1587 | if (sock->file->f_flags & O_NONBLOCK) |
1598 | flags |= MSG_DONTWAIT; | 1588 | flags |= MSG_DONTWAIT; |
1599 | msg.msg_flags = flags; | 1589 | msg.msg_flags = flags; |
1600 | err = sock_sendmsg(sock, &msg, len); | 1590 | err = sock_sendmsg(sock, &msg, len); |
1601 | 1591 | ||
1602 | out_put: | 1592 | out_put: |
1603 | fput_light(sock_file, fput_needed); | 1593 | fput_light(sock_file, fput_needed); |
1604 | return err; | 1594 | return err; |
1605 | } | 1595 | } |
1606 | 1596 | ||
1607 | /* | 1597 | /* |
1608 | * Send a datagram down a socket. | 1598 | * Send a datagram down a socket. |
1609 | */ | 1599 | */ |
1610 | 1600 | ||
1611 | asmlinkage long sys_send(int fd, void __user * buff, size_t len, unsigned flags) | 1601 | asmlinkage long sys_send(int fd, void __user *buff, size_t len, unsigned flags) |
1612 | { | 1602 | { |
1613 | return sys_sendto(fd, buff, len, flags, NULL, 0); | 1603 | return sys_sendto(fd, buff, len, flags, NULL, 0); |
1614 | } | 1604 | } |
1615 | 1605 | ||
1616 | /* | 1606 | /* |
1617 | * Receive a frame from the socket and optionally record the address of the | 1607 | * Receive a frame from the socket and optionally record the address of the |
1618 | * sender. We verify the buffers are writable and if needed move the | 1608 | * sender. We verify the buffers are writable and if needed move the |
1619 | * sender address from kernel to user space. | 1609 | * sender address from kernel to user space. |
1620 | */ | 1610 | */ |
1621 | 1611 | ||
1622 | asmlinkage long sys_recvfrom(int fd, void __user * ubuf, size_t size, unsigned flags, | 1612 | asmlinkage long sys_recvfrom(int fd, void __user *ubuf, size_t size, |
1623 | struct sockaddr __user *addr, int __user *addr_len) | 1613 | unsigned flags, struct sockaddr __user *addr, |
1614 | int __user *addr_len) | ||
1624 | { | 1615 | { |
1625 | struct socket *sock; | 1616 | struct socket *sock; |
1626 | struct iovec iov; | 1617 | struct iovec iov; |
1627 | struct msghdr msg; | 1618 | struct msghdr msg; |
1628 | char address[MAX_SOCK_ADDR]; | 1619 | char address[MAX_SOCK_ADDR]; |
1629 | int err,err2; | 1620 | int err, err2; |
1630 | struct file *sock_file; | 1621 | struct file *sock_file; |
1631 | int fput_needed; | 1622 | int fput_needed; |
1632 | 1623 | ||
@@ -1638,23 +1629,22 @@ asmlinkage long sys_recvfrom(int fd, void __user * ubuf, size_t size, unsigned f | |||
1638 | if (!sock) | 1629 | if (!sock) |
1639 | goto out; | 1630 | goto out; |
1640 | 1631 | ||
1641 | msg.msg_control=NULL; | 1632 | msg.msg_control = NULL; |
1642 | msg.msg_controllen=0; | 1633 | msg.msg_controllen = 0; |
1643 | msg.msg_iovlen=1; | 1634 | msg.msg_iovlen = 1; |
1644 | msg.msg_iov=&iov; | 1635 | msg.msg_iov = &iov; |
1645 | iov.iov_len=size; | 1636 | iov.iov_len = size; |
1646 | iov.iov_base=ubuf; | 1637 | iov.iov_base = ubuf; |
1647 | msg.msg_name=address; | 1638 | msg.msg_name = address; |
1648 | msg.msg_namelen=MAX_SOCK_ADDR; | 1639 | msg.msg_namelen = MAX_SOCK_ADDR; |
1649 | if (sock->file->f_flags & O_NONBLOCK) | 1640 | if (sock->file->f_flags & O_NONBLOCK) |
1650 | flags |= MSG_DONTWAIT; | 1641 | flags |= MSG_DONTWAIT; |
1651 | err=sock_recvmsg(sock, &msg, size, flags); | 1642 | err = sock_recvmsg(sock, &msg, size, flags); |
1652 | 1643 | ||
1653 | if(err >= 0 && addr != NULL) | 1644 | if (err >= 0 && addr != NULL) { |
1654 | { | 1645 | err2 = move_addr_to_user(address, msg.msg_namelen, addr, addr_len); |
1655 | err2=move_addr_to_user(address, msg.msg_namelen, addr, addr_len); | 1646 | if (err2 < 0) |
1656 | if(err2<0) | 1647 | err = err2; |
1657 | err=err2; | ||
1658 | } | 1648 | } |
1659 | out: | 1649 | out: |
1660 | fput_light(sock_file, fput_needed); | 1650 | fput_light(sock_file, fput_needed); |
@@ -1662,10 +1652,11 @@ out: | |||
1662 | } | 1652 | } |
1663 | 1653 | ||
1664 | /* | 1654 | /* |
1665 | * Receive a datagram from a socket. | 1655 | * Receive a datagram from a socket. |
1666 | */ | 1656 | */ |
1667 | 1657 | ||
1668 | asmlinkage long sys_recv(int fd, void __user * ubuf, size_t size, unsigned flags) | 1658 | asmlinkage long sys_recv(int fd, void __user *ubuf, size_t size, |
1659 | unsigned flags) | ||
1669 | { | 1660 | { |
1670 | return sys_recvfrom(fd, ubuf, size, flags, NULL, NULL); | 1661 | return sys_recvfrom(fd, ubuf, size, flags, NULL, NULL); |
1671 | } | 1662 | } |
@@ -1675,24 +1666,29 @@ asmlinkage long sys_recv(int fd, void __user * ubuf, size_t size, unsigned flags | |||
1675 | * to pass the user mode parameter for the protocols to sort out. | 1666 | * to pass the user mode parameter for the protocols to sort out. |
1676 | */ | 1667 | */ |
1677 | 1668 | ||
1678 | asmlinkage long sys_setsockopt(int fd, int level, int optname, char __user *optval, int optlen) | 1669 | asmlinkage long sys_setsockopt(int fd, int level, int optname, |
1670 | char __user *optval, int optlen) | ||
1679 | { | 1671 | { |
1680 | int err, fput_needed; | 1672 | int err, fput_needed; |
1681 | struct socket *sock; | 1673 | struct socket *sock; |
1682 | 1674 | ||
1683 | if (optlen < 0) | 1675 | if (optlen < 0) |
1684 | return -EINVAL; | 1676 | return -EINVAL; |
1685 | 1677 | ||
1686 | if ((sock = sockfd_lookup_light(fd, &err, &fput_needed)) != NULL) | 1678 | sock = sockfd_lookup_light(fd, &err, &fput_needed); |
1687 | { | 1679 | if (sock != NULL) { |
1688 | err = security_socket_setsockopt(sock,level,optname); | 1680 | err = security_socket_setsockopt(sock, level, optname); |
1689 | if (err) | 1681 | if (err) |
1690 | goto out_put; | 1682 | goto out_put; |
1691 | 1683 | ||
1692 | if (level == SOL_SOCKET) | 1684 | if (level == SOL_SOCKET) |
1693 | err=sock_setsockopt(sock,level,optname,optval,optlen); | 1685 | err = |
1686 | sock_setsockopt(sock, level, optname, optval, | ||
1687 | optlen); | ||
1694 | else | 1688 | else |
1695 | err=sock->ops->setsockopt(sock, level, optname, optval, optlen); | 1689 | err = |
1690 | sock->ops->setsockopt(sock, level, optname, optval, | ||
1691 | optlen); | ||
1696 | out_put: | 1692 | out_put: |
1697 | fput_light(sock->file, fput_needed); | 1693 | fput_light(sock->file, fput_needed); |
1698 | } | 1694 | } |
@@ -1704,27 +1700,32 @@ out_put: | |||
1704 | * to pass a user mode parameter for the protocols to sort out. | 1700 | * to pass a user mode parameter for the protocols to sort out. |
1705 | */ | 1701 | */ |
1706 | 1702 | ||
1707 | asmlinkage long sys_getsockopt(int fd, int level, int optname, char __user *optval, int __user *optlen) | 1703 | asmlinkage long sys_getsockopt(int fd, int level, int optname, |
1704 | char __user *optval, int __user *optlen) | ||
1708 | { | 1705 | { |
1709 | int err, fput_needed; | 1706 | int err, fput_needed; |
1710 | struct socket *sock; | 1707 | struct socket *sock; |
1711 | 1708 | ||
1712 | if ((sock = sockfd_lookup_light(fd, &err, &fput_needed)) != NULL) { | 1709 | sock = sockfd_lookup_light(fd, &err, &fput_needed); |
1710 | if (sock != NULL) { | ||
1713 | err = security_socket_getsockopt(sock, level, optname); | 1711 | err = security_socket_getsockopt(sock, level, optname); |
1714 | if (err) | 1712 | if (err) |
1715 | goto out_put; | 1713 | goto out_put; |
1716 | 1714 | ||
1717 | if (level == SOL_SOCKET) | 1715 | if (level == SOL_SOCKET) |
1718 | err=sock_getsockopt(sock,level,optname,optval,optlen); | 1716 | err = |
1717 | sock_getsockopt(sock, level, optname, optval, | ||
1718 | optlen); | ||
1719 | else | 1719 | else |
1720 | err=sock->ops->getsockopt(sock, level, optname, optval, optlen); | 1720 | err = |
1721 | sock->ops->getsockopt(sock, level, optname, optval, | ||
1722 | optlen); | ||
1721 | out_put: | 1723 | out_put: |
1722 | fput_light(sock->file, fput_needed); | 1724 | fput_light(sock->file, fput_needed); |
1723 | } | 1725 | } |
1724 | return err; | 1726 | return err; |
1725 | } | 1727 | } |
1726 | 1728 | ||
1727 | |||
1728 | /* | 1729 | /* |
1729 | * Shutdown a socket. | 1730 | * Shutdown a socket. |
1730 | */ | 1731 | */ |
@@ -1734,8 +1735,8 @@ asmlinkage long sys_shutdown(int fd, int how) | |||
1734 | int err, fput_needed; | 1735 | int err, fput_needed; |
1735 | struct socket *sock; | 1736 | struct socket *sock; |
1736 | 1737 | ||
1737 | if ((sock = sockfd_lookup_light(fd, &err, &fput_needed))!=NULL) | 1738 | sock = sockfd_lookup_light(fd, &err, &fput_needed); |
1738 | { | 1739 | if (sock != NULL) { |
1739 | err = security_socket_shutdown(sock, how); | 1740 | err = security_socket_shutdown(sock, how); |
1740 | if (!err) | 1741 | if (!err) |
1741 | err = sock->ops->shutdown(sock, how); | 1742 | err = sock->ops->shutdown(sock, how); |
@@ -1744,41 +1745,42 @@ asmlinkage long sys_shutdown(int fd, int how) | |||
1744 | return err; | 1745 | return err; |
1745 | } | 1746 | } |
1746 | 1747 | ||
1747 | /* A couple of helpful macros for getting the address of the 32/64 bit | 1748 | /* A couple of helpful macros for getting the address of the 32/64 bit |
1748 | * fields which are the same type (int / unsigned) on our platforms. | 1749 | * fields which are the same type (int / unsigned) on our platforms. |
1749 | */ | 1750 | */ |
1750 | #define COMPAT_MSG(msg, member) ((MSG_CMSG_COMPAT & flags) ? &msg##_compat->member : &msg->member) | 1751 | #define COMPAT_MSG(msg, member) ((MSG_CMSG_COMPAT & flags) ? &msg##_compat->member : &msg->member) |
1751 | #define COMPAT_NAMELEN(msg) COMPAT_MSG(msg, msg_namelen) | 1752 | #define COMPAT_NAMELEN(msg) COMPAT_MSG(msg, msg_namelen) |
1752 | #define COMPAT_FLAGS(msg) COMPAT_MSG(msg, msg_flags) | 1753 | #define COMPAT_FLAGS(msg) COMPAT_MSG(msg, msg_flags) |
1753 | 1754 | ||
1754 | |||
1755 | /* | 1755 | /* |
1756 | * BSD sendmsg interface | 1756 | * BSD sendmsg interface |
1757 | */ | 1757 | */ |
1758 | 1758 | ||
1759 | asmlinkage long sys_sendmsg(int fd, struct msghdr __user *msg, unsigned flags) | 1759 | asmlinkage long sys_sendmsg(int fd, struct msghdr __user *msg, unsigned flags) |
1760 | { | 1760 | { |
1761 | struct compat_msghdr __user *msg_compat = (struct compat_msghdr __user *)msg; | 1761 | struct compat_msghdr __user *msg_compat = |
1762 | (struct compat_msghdr __user *)msg; | ||
1762 | struct socket *sock; | 1763 | struct socket *sock; |
1763 | char address[MAX_SOCK_ADDR]; | 1764 | char address[MAX_SOCK_ADDR]; |
1764 | struct iovec iovstack[UIO_FASTIOV], *iov = iovstack; | 1765 | struct iovec iovstack[UIO_FASTIOV], *iov = iovstack; |
1765 | unsigned char ctl[sizeof(struct cmsghdr) + 20] | 1766 | unsigned char ctl[sizeof(struct cmsghdr) + 20] |
1766 | __attribute__ ((aligned (sizeof(__kernel_size_t)))); | 1767 | __attribute__ ((aligned(sizeof(__kernel_size_t)))); |
1767 | /* 20 is size of ipv6_pktinfo */ | 1768 | /* 20 is size of ipv6_pktinfo */ |
1768 | unsigned char *ctl_buf = ctl; | 1769 | unsigned char *ctl_buf = ctl; |
1769 | struct msghdr msg_sys; | 1770 | struct msghdr msg_sys; |
1770 | int err, ctl_len, iov_size, total_len; | 1771 | int err, ctl_len, iov_size, total_len; |
1771 | int fput_needed; | 1772 | int fput_needed; |
1772 | 1773 | ||
1773 | err = -EFAULT; | 1774 | err = -EFAULT; |
1774 | if (MSG_CMSG_COMPAT & flags) { | 1775 | if (MSG_CMSG_COMPAT & flags) { |
1775 | if (get_compat_msghdr(&msg_sys, msg_compat)) | 1776 | if (get_compat_msghdr(&msg_sys, msg_compat)) |
1776 | return -EFAULT; | 1777 | return -EFAULT; |
1777 | } else if (copy_from_user(&msg_sys, msg, sizeof(struct msghdr))) | 1778 | } |
1779 | else if (copy_from_user(&msg_sys, msg, sizeof(struct msghdr))) | ||
1778 | return -EFAULT; | 1780 | return -EFAULT; |
1779 | 1781 | ||
1780 | sock = sockfd_lookup_light(fd, &err, &fput_needed); | 1782 | sock = sockfd_lookup_light(fd, &err, &fput_needed); |
1781 | if (!sock) | 1783 | if (!sock) |
1782 | goto out; | 1784 | goto out; |
1783 | 1785 | ||
1784 | /* do not move before msg_sys is valid */ | 1786 | /* do not move before msg_sys is valid */ |
@@ -1786,7 +1788,7 @@ asmlinkage long sys_sendmsg(int fd, struct msghdr __user *msg, unsigned flags) | |||
1786 | if (msg_sys.msg_iovlen > UIO_MAXIOV) | 1788 | if (msg_sys.msg_iovlen > UIO_MAXIOV) |
1787 | goto out_put; | 1789 | goto out_put; |
1788 | 1790 | ||
1789 | /* Check whether to allocate the iovec area*/ | 1791 | /* Check whether to allocate the iovec area */ |
1790 | err = -ENOMEM; | 1792 | err = -ENOMEM; |
1791 | iov_size = msg_sys.msg_iovlen * sizeof(struct iovec); | 1793 | iov_size = msg_sys.msg_iovlen * sizeof(struct iovec); |
1792 | if (msg_sys.msg_iovlen > UIO_FASTIOV) { | 1794 | if (msg_sys.msg_iovlen > UIO_FASTIOV) { |
@@ -1800,7 +1802,7 @@ asmlinkage long sys_sendmsg(int fd, struct msghdr __user *msg, unsigned flags) | |||
1800 | err = verify_compat_iovec(&msg_sys, iov, address, VERIFY_READ); | 1802 | err = verify_compat_iovec(&msg_sys, iov, address, VERIFY_READ); |
1801 | } else | 1803 | } else |
1802 | err = verify_iovec(&msg_sys, iov, address, VERIFY_READ); | 1804 | err = verify_iovec(&msg_sys, iov, address, VERIFY_READ); |
1803 | if (err < 0) | 1805 | if (err < 0) |
1804 | goto out_freeiov; | 1806 | goto out_freeiov; |
1805 | total_len = err; | 1807 | total_len = err; |
1806 | 1808 | ||
@@ -1808,18 +1810,19 @@ asmlinkage long sys_sendmsg(int fd, struct msghdr __user *msg, unsigned flags) | |||
1808 | 1810 | ||
1809 | if (msg_sys.msg_controllen > INT_MAX) | 1811 | if (msg_sys.msg_controllen > INT_MAX) |
1810 | goto out_freeiov; | 1812 | goto out_freeiov; |
1811 | ctl_len = msg_sys.msg_controllen; | 1813 | ctl_len = msg_sys.msg_controllen; |
1812 | if ((MSG_CMSG_COMPAT & flags) && ctl_len) { | 1814 | if ((MSG_CMSG_COMPAT & flags) && ctl_len) { |
1813 | err = cmsghdr_from_user_compat_to_kern(&msg_sys, sock->sk, ctl, sizeof(ctl)); | 1815 | err = |
1816 | cmsghdr_from_user_compat_to_kern(&msg_sys, sock->sk, ctl, | ||
1817 | sizeof(ctl)); | ||
1814 | if (err) | 1818 | if (err) |
1815 | goto out_freeiov; | 1819 | goto out_freeiov; |
1816 | ctl_buf = msg_sys.msg_control; | 1820 | ctl_buf = msg_sys.msg_control; |
1817 | ctl_len = msg_sys.msg_controllen; | 1821 | ctl_len = msg_sys.msg_controllen; |
1818 | } else if (ctl_len) { | 1822 | } else if (ctl_len) { |
1819 | if (ctl_len > sizeof(ctl)) | 1823 | if (ctl_len > sizeof(ctl)) { |
1820 | { | ||
1821 | ctl_buf = sock_kmalloc(sock->sk, ctl_len, GFP_KERNEL); | 1824 | ctl_buf = sock_kmalloc(sock->sk, ctl_len, GFP_KERNEL); |
1822 | if (ctl_buf == NULL) | 1825 | if (ctl_buf == NULL) |
1823 | goto out_freeiov; | 1826 | goto out_freeiov; |
1824 | } | 1827 | } |
1825 | err = -EFAULT; | 1828 | err = -EFAULT; |
@@ -1828,7 +1831,8 @@ asmlinkage long sys_sendmsg(int fd, struct msghdr __user *msg, unsigned flags) | |||
1828 | * Afterwards, it will be a kernel pointer. Thus the compiler-assisted | 1831 | * Afterwards, it will be a kernel pointer. Thus the compiler-assisted |
1829 | * checking falls down on this. | 1832 | * checking falls down on this. |
1830 | */ | 1833 | */ |
1831 | if (copy_from_user(ctl_buf, (void __user *) msg_sys.msg_control, ctl_len)) | 1834 | if (copy_from_user(ctl_buf, (void __user *)msg_sys.msg_control, |
1835 | ctl_len)) | ||
1832 | goto out_freectl; | 1836 | goto out_freectl; |
1833 | msg_sys.msg_control = ctl_buf; | 1837 | msg_sys.msg_control = ctl_buf; |
1834 | } | 1838 | } |
@@ -1839,14 +1843,14 @@ asmlinkage long sys_sendmsg(int fd, struct msghdr __user *msg, unsigned flags) | |||
1839 | err = sock_sendmsg(sock, &msg_sys, total_len); | 1843 | err = sock_sendmsg(sock, &msg_sys, total_len); |
1840 | 1844 | ||
1841 | out_freectl: | 1845 | out_freectl: |
1842 | if (ctl_buf != ctl) | 1846 | if (ctl_buf != ctl) |
1843 | sock_kfree_s(sock->sk, ctl_buf, ctl_len); | 1847 | sock_kfree_s(sock->sk, ctl_buf, ctl_len); |
1844 | out_freeiov: | 1848 | out_freeiov: |
1845 | if (iov != iovstack) | 1849 | if (iov != iovstack) |
1846 | sock_kfree_s(sock->sk, iov, iov_size); | 1850 | sock_kfree_s(sock->sk, iov, iov_size); |
1847 | out_put: | 1851 | out_put: |
1848 | fput_light(sock->file, fput_needed); | 1852 | fput_light(sock->file, fput_needed); |
1849 | out: | 1853 | out: |
1850 | return err; | 1854 | return err; |
1851 | } | 1855 | } |
1852 | 1856 | ||
@@ -1854,12 +1858,14 @@ out: | |||
1854 | * BSD recvmsg interface | 1858 | * BSD recvmsg interface |
1855 | */ | 1859 | */ |
1856 | 1860 | ||
1857 | asmlinkage long sys_recvmsg(int fd, struct msghdr __user *msg, unsigned int flags) | 1861 | asmlinkage long sys_recvmsg(int fd, struct msghdr __user *msg, |
1862 | unsigned int flags) | ||
1858 | { | 1863 | { |
1859 | struct compat_msghdr __user *msg_compat = (struct compat_msghdr __user *)msg; | 1864 | struct compat_msghdr __user *msg_compat = |
1865 | (struct compat_msghdr __user *)msg; | ||
1860 | struct socket *sock; | 1866 | struct socket *sock; |
1861 | struct iovec iovstack[UIO_FASTIOV]; | 1867 | struct iovec iovstack[UIO_FASTIOV]; |
1862 | struct iovec *iov=iovstack; | 1868 | struct iovec *iov = iovstack; |
1863 | struct msghdr msg_sys; | 1869 | struct msghdr msg_sys; |
1864 | unsigned long cmsg_ptr; | 1870 | unsigned long cmsg_ptr; |
1865 | int err, iov_size, total_len, len; | 1871 | int err, iov_size, total_len, len; |
@@ -1871,13 +1877,13 @@ asmlinkage long sys_recvmsg(int fd, struct msghdr __user *msg, unsigned int flag | |||
1871 | /* user mode address pointers */ | 1877 | /* user mode address pointers */ |
1872 | struct sockaddr __user *uaddr; | 1878 | struct sockaddr __user *uaddr; |
1873 | int __user *uaddr_len; | 1879 | int __user *uaddr_len; |
1874 | 1880 | ||
1875 | if (MSG_CMSG_COMPAT & flags) { | 1881 | if (MSG_CMSG_COMPAT & flags) { |
1876 | if (get_compat_msghdr(&msg_sys, msg_compat)) | 1882 | if (get_compat_msghdr(&msg_sys, msg_compat)) |
1877 | return -EFAULT; | 1883 | return -EFAULT; |
1878 | } else | 1884 | } |
1879 | if (copy_from_user(&msg_sys,msg,sizeof(struct msghdr))) | 1885 | else if (copy_from_user(&msg_sys, msg, sizeof(struct msghdr))) |
1880 | return -EFAULT; | 1886 | return -EFAULT; |
1881 | 1887 | ||
1882 | sock = sockfd_lookup_light(fd, &err, &fput_needed); | 1888 | sock = sockfd_lookup_light(fd, &err, &fput_needed); |
1883 | if (!sock) | 1889 | if (!sock) |
@@ -1886,8 +1892,8 @@ asmlinkage long sys_recvmsg(int fd, struct msghdr __user *msg, unsigned int flag | |||
1886 | err = -EMSGSIZE; | 1892 | err = -EMSGSIZE; |
1887 | if (msg_sys.msg_iovlen > UIO_MAXIOV) | 1893 | if (msg_sys.msg_iovlen > UIO_MAXIOV) |
1888 | goto out_put; | 1894 | goto out_put; |
1889 | 1895 | ||
1890 | /* Check whether to allocate the iovec area*/ | 1896 | /* Check whether to allocate the iovec area */ |
1891 | err = -ENOMEM; | 1897 | err = -ENOMEM; |
1892 | iov_size = msg_sys.msg_iovlen * sizeof(struct iovec); | 1898 | iov_size = msg_sys.msg_iovlen * sizeof(struct iovec); |
1893 | if (msg_sys.msg_iovlen > UIO_FASTIOV) { | 1899 | if (msg_sys.msg_iovlen > UIO_FASTIOV) { |
@@ -1897,11 +1903,11 @@ asmlinkage long sys_recvmsg(int fd, struct msghdr __user *msg, unsigned int flag | |||
1897 | } | 1903 | } |
1898 | 1904 | ||
1899 | /* | 1905 | /* |
1900 | * Save the user-mode address (verify_iovec will change the | 1906 | * Save the user-mode address (verify_iovec will change the |
1901 | * kernel msghdr to use the kernel address space) | 1907 | * kernel msghdr to use the kernel address space) |
1902 | */ | 1908 | */ |
1903 | 1909 | ||
1904 | uaddr = (void __user *) msg_sys.msg_name; | 1910 | uaddr = (void __user *)msg_sys.msg_name; |
1905 | uaddr_len = COMPAT_NAMELEN(msg); | 1911 | uaddr_len = COMPAT_NAMELEN(msg); |
1906 | if (MSG_CMSG_COMPAT & flags) { | 1912 | if (MSG_CMSG_COMPAT & flags) { |
1907 | err = verify_compat_iovec(&msg_sys, iov, addr, VERIFY_WRITE); | 1913 | err = verify_compat_iovec(&msg_sys, iov, addr, VERIFY_WRITE); |
@@ -1909,13 +1915,13 @@ asmlinkage long sys_recvmsg(int fd, struct msghdr __user *msg, unsigned int flag | |||
1909 | err = verify_iovec(&msg_sys, iov, addr, VERIFY_WRITE); | 1915 | err = verify_iovec(&msg_sys, iov, addr, VERIFY_WRITE); |
1910 | if (err < 0) | 1916 | if (err < 0) |
1911 | goto out_freeiov; | 1917 | goto out_freeiov; |
1912 | total_len=err; | 1918 | total_len = err; |
1913 | 1919 | ||
1914 | cmsg_ptr = (unsigned long)msg_sys.msg_control; | 1920 | cmsg_ptr = (unsigned long)msg_sys.msg_control; |
1915 | msg_sys.msg_flags = 0; | 1921 | msg_sys.msg_flags = 0; |
1916 | if (MSG_CMSG_COMPAT & flags) | 1922 | if (MSG_CMSG_COMPAT & flags) |
1917 | msg_sys.msg_flags = MSG_CMSG_COMPAT; | 1923 | msg_sys.msg_flags = MSG_CMSG_COMPAT; |
1918 | 1924 | ||
1919 | if (sock->file->f_flags & O_NONBLOCK) | 1925 | if (sock->file->f_flags & O_NONBLOCK) |
1920 | flags |= MSG_DONTWAIT; | 1926 | flags |= MSG_DONTWAIT; |
1921 | err = sock_recvmsg(sock, &msg_sys, total_len, flags); | 1927 | err = sock_recvmsg(sock, &msg_sys, total_len, flags); |
@@ -1924,7 +1930,8 @@ asmlinkage long sys_recvmsg(int fd, struct msghdr __user *msg, unsigned int flag | |||
1924 | len = err; | 1930 | len = err; |
1925 | 1931 | ||
1926 | if (uaddr != NULL) { | 1932 | if (uaddr != NULL) { |
1927 | err = move_addr_to_user(addr, msg_sys.msg_namelen, uaddr, uaddr_len); | 1933 | err = move_addr_to_user(addr, msg_sys.msg_namelen, uaddr, |
1934 | uaddr_len); | ||
1928 | if (err < 0) | 1935 | if (err < 0) |
1929 | goto out_freeiov; | 1936 | goto out_freeiov; |
1930 | } | 1937 | } |
@@ -1933,10 +1940,10 @@ asmlinkage long sys_recvmsg(int fd, struct msghdr __user *msg, unsigned int flag | |||
1933 | if (err) | 1940 | if (err) |
1934 | goto out_freeiov; | 1941 | goto out_freeiov; |
1935 | if (MSG_CMSG_COMPAT & flags) | 1942 | if (MSG_CMSG_COMPAT & flags) |
1936 | err = __put_user((unsigned long)msg_sys.msg_control-cmsg_ptr, | 1943 | err = __put_user((unsigned long)msg_sys.msg_control - cmsg_ptr, |
1937 | &msg_compat->msg_controllen); | 1944 | &msg_compat->msg_controllen); |
1938 | else | 1945 | else |
1939 | err = __put_user((unsigned long)msg_sys.msg_control-cmsg_ptr, | 1946 | err = __put_user((unsigned long)msg_sys.msg_control - cmsg_ptr, |
1940 | &msg->msg_controllen); | 1947 | &msg->msg_controllen); |
1941 | if (err) | 1948 | if (err) |
1942 | goto out_freeiov; | 1949 | goto out_freeiov; |
@@ -1955,163 +1962,187 @@ out: | |||
1955 | 1962 | ||
1956 | /* Argument list sizes for sys_socketcall */ | 1963 | /* Argument list sizes for sys_socketcall */ |
1957 | #define AL(x) ((x) * sizeof(unsigned long)) | 1964 | #define AL(x) ((x) * sizeof(unsigned long)) |
1958 | static unsigned char nargs[18]={AL(0),AL(3),AL(3),AL(3),AL(2),AL(3), | 1965 | static const unsigned char nargs[18]={ |
1959 | AL(3),AL(3),AL(4),AL(4),AL(4),AL(6), | 1966 | AL(0),AL(3),AL(3),AL(3),AL(2),AL(3), |
1960 | AL(6),AL(2),AL(5),AL(5),AL(3),AL(3)}; | 1967 | AL(3),AL(3),AL(4),AL(4),AL(4),AL(6), |
1968 | AL(6),AL(2),AL(5),AL(5),AL(3),AL(3) | ||
1969 | }; | ||
1970 | |||
1961 | #undef AL | 1971 | #undef AL |
1962 | 1972 | ||
1963 | /* | 1973 | /* |
1964 | * System call vectors. | 1974 | * System call vectors. |
1965 | * | 1975 | * |
1966 | * Argument checking cleaned up. Saved 20% in size. | 1976 | * Argument checking cleaned up. Saved 20% in size. |
1967 | * This function doesn't need to set the kernel lock because | 1977 | * This function doesn't need to set the kernel lock because |
1968 | * it is set by the callees. | 1978 | * it is set by the callees. |
1969 | */ | 1979 | */ |
1970 | 1980 | ||
1971 | asmlinkage long sys_socketcall(int call, unsigned long __user *args) | 1981 | asmlinkage long sys_socketcall(int call, unsigned long __user *args) |
1972 | { | 1982 | { |
1973 | unsigned long a[6]; | 1983 | unsigned long a[6]; |
1974 | unsigned long a0,a1; | 1984 | unsigned long a0, a1; |
1975 | int err; | 1985 | int err; |
1976 | 1986 | ||
1977 | if(call<1||call>SYS_RECVMSG) | 1987 | if (call < 1 || call > SYS_RECVMSG) |
1978 | return -EINVAL; | 1988 | return -EINVAL; |
1979 | 1989 | ||
1980 | /* copy_from_user should be SMP safe. */ | 1990 | /* copy_from_user should be SMP safe. */ |
1981 | if (copy_from_user(a, args, nargs[call])) | 1991 | if (copy_from_user(a, args, nargs[call])) |
1982 | return -EFAULT; | 1992 | return -EFAULT; |
1983 | 1993 | ||
1984 | err = audit_socketcall(nargs[call]/sizeof(unsigned long), a); | 1994 | err = audit_socketcall(nargs[call] / sizeof(unsigned long), a); |
1985 | if (err) | 1995 | if (err) |
1986 | return err; | 1996 | return err; |
1987 | 1997 | ||
1988 | a0=a[0]; | 1998 | a0 = a[0]; |
1989 | a1=a[1]; | 1999 | a1 = a[1]; |
1990 | 2000 | ||
1991 | switch(call) | 2001 | switch (call) { |
1992 | { | 2002 | case SYS_SOCKET: |
1993 | case SYS_SOCKET: | 2003 | err = sys_socket(a0, a1, a[2]); |
1994 | err = sys_socket(a0,a1,a[2]); | 2004 | break; |
1995 | break; | 2005 | case SYS_BIND: |
1996 | case SYS_BIND: | 2006 | err = sys_bind(a0, (struct sockaddr __user *)a1, a[2]); |
1997 | err = sys_bind(a0,(struct sockaddr __user *)a1, a[2]); | 2007 | break; |
1998 | break; | 2008 | case SYS_CONNECT: |
1999 | case SYS_CONNECT: | 2009 | err = sys_connect(a0, (struct sockaddr __user *)a1, a[2]); |
2000 | err = sys_connect(a0, (struct sockaddr __user *)a1, a[2]); | 2010 | break; |
2001 | break; | 2011 | case SYS_LISTEN: |
2002 | case SYS_LISTEN: | 2012 | err = sys_listen(a0, a1); |
2003 | err = sys_listen(a0,a1); | 2013 | break; |
2004 | break; | 2014 | case SYS_ACCEPT: |
2005 | case SYS_ACCEPT: | 2015 | err = |
2006 | err = sys_accept(a0,(struct sockaddr __user *)a1, (int __user *)a[2]); | 2016 | sys_accept(a0, (struct sockaddr __user *)a1, |
2007 | break; | 2017 | (int __user *)a[2]); |
2008 | case SYS_GETSOCKNAME: | 2018 | break; |
2009 | err = sys_getsockname(a0,(struct sockaddr __user *)a1, (int __user *)a[2]); | 2019 | case SYS_GETSOCKNAME: |
2010 | break; | 2020 | err = |
2011 | case SYS_GETPEERNAME: | 2021 | sys_getsockname(a0, (struct sockaddr __user *)a1, |
2012 | err = sys_getpeername(a0, (struct sockaddr __user *)a1, (int __user *)a[2]); | 2022 | (int __user *)a[2]); |
2013 | break; | 2023 | break; |
2014 | case SYS_SOCKETPAIR: | 2024 | case SYS_GETPEERNAME: |
2015 | err = sys_socketpair(a0,a1, a[2], (int __user *)a[3]); | 2025 | err = |
2016 | break; | 2026 | sys_getpeername(a0, (struct sockaddr __user *)a1, |
2017 | case SYS_SEND: | 2027 | (int __user *)a[2]); |
2018 | err = sys_send(a0, (void __user *)a1, a[2], a[3]); | 2028 | break; |
2019 | break; | 2029 | case SYS_SOCKETPAIR: |
2020 | case SYS_SENDTO: | 2030 | err = sys_socketpair(a0, a1, a[2], (int __user *)a[3]); |
2021 | err = sys_sendto(a0,(void __user *)a1, a[2], a[3], | 2031 | break; |
2022 | (struct sockaddr __user *)a[4], a[5]); | 2032 | case SYS_SEND: |
2023 | break; | 2033 | err = sys_send(a0, (void __user *)a1, a[2], a[3]); |
2024 | case SYS_RECV: | 2034 | break; |
2025 | err = sys_recv(a0, (void __user *)a1, a[2], a[3]); | 2035 | case SYS_SENDTO: |
2026 | break; | 2036 | err = sys_sendto(a0, (void __user *)a1, a[2], a[3], |
2027 | case SYS_RECVFROM: | 2037 | (struct sockaddr __user *)a[4], a[5]); |
2028 | err = sys_recvfrom(a0, (void __user *)a1, a[2], a[3], | 2038 | break; |
2029 | (struct sockaddr __user *)a[4], (int __user *)a[5]); | 2039 | case SYS_RECV: |
2030 | break; | 2040 | err = sys_recv(a0, (void __user *)a1, a[2], a[3]); |
2031 | case SYS_SHUTDOWN: | 2041 | break; |
2032 | err = sys_shutdown(a0,a1); | 2042 | case SYS_RECVFROM: |
2033 | break; | 2043 | err = sys_recvfrom(a0, (void __user *)a1, a[2], a[3], |
2034 | case SYS_SETSOCKOPT: | 2044 | (struct sockaddr __user *)a[4], |
2035 | err = sys_setsockopt(a0, a1, a[2], (char __user *)a[3], a[4]); | 2045 | (int __user *)a[5]); |
2036 | break; | 2046 | break; |
2037 | case SYS_GETSOCKOPT: | 2047 | case SYS_SHUTDOWN: |
2038 | err = sys_getsockopt(a0, a1, a[2], (char __user *)a[3], (int __user *)a[4]); | 2048 | err = sys_shutdown(a0, a1); |
2039 | break; | 2049 | break; |
2040 | case SYS_SENDMSG: | 2050 | case SYS_SETSOCKOPT: |
2041 | err = sys_sendmsg(a0, (struct msghdr __user *) a1, a[2]); | 2051 | err = sys_setsockopt(a0, a1, a[2], (char __user *)a[3], a[4]); |
2042 | break; | 2052 | break; |
2043 | case SYS_RECVMSG: | 2053 | case SYS_GETSOCKOPT: |
2044 | err = sys_recvmsg(a0, (struct msghdr __user *) a1, a[2]); | 2054 | err = |
2045 | break; | 2055 | sys_getsockopt(a0, a1, a[2], (char __user *)a[3], |
2046 | default: | 2056 | (int __user *)a[4]); |
2047 | err = -EINVAL; | 2057 | break; |
2048 | break; | 2058 | case SYS_SENDMSG: |
2059 | err = sys_sendmsg(a0, (struct msghdr __user *)a1, a[2]); | ||
2060 | break; | ||
2061 | case SYS_RECVMSG: | ||
2062 | err = sys_recvmsg(a0, (struct msghdr __user *)a1, a[2]); | ||
2063 | break; | ||
2064 | default: | ||
2065 | err = -EINVAL; | ||
2066 | break; | ||
2049 | } | 2067 | } |
2050 | return err; | 2068 | return err; |
2051 | } | 2069 | } |
2052 | 2070 | ||
2053 | #endif /* __ARCH_WANT_SYS_SOCKETCALL */ | 2071 | #endif /* __ARCH_WANT_SYS_SOCKETCALL */ |
2054 | 2072 | ||
2055 | /* | 2073 | /** |
2074 | * sock_register - add a socket protocol handler | ||
2075 | * @ops: description of protocol | ||
2076 | * | ||
2056 | * This function is called by a protocol handler that wants to | 2077 | * This function is called by a protocol handler that wants to |
2057 | * advertise its address family, and have it linked into the | 2078 | * advertise its address family, and have it linked into the |
2058 | * SOCKET module. | 2079 | * socket interface. The value ops->family coresponds to the |
2080 | * socket system call protocol family. | ||
2059 | */ | 2081 | */ |
2060 | 2082 | int sock_register(const struct net_proto_family *ops) | |
2061 | int sock_register(struct net_proto_family *ops) | ||
2062 | { | 2083 | { |
2063 | int err; | 2084 | int err; |
2064 | 2085 | ||
2065 | if (ops->family >= NPROTO) { | 2086 | if (ops->family >= NPROTO) { |
2066 | printk(KERN_CRIT "protocol %d >= NPROTO(%d)\n", ops->family, NPROTO); | 2087 | printk(KERN_CRIT "protocol %d >= NPROTO(%d)\n", ops->family, |
2088 | NPROTO); | ||
2067 | return -ENOBUFS; | 2089 | return -ENOBUFS; |
2068 | } | 2090 | } |
2069 | net_family_write_lock(); | 2091 | |
2070 | err = -EEXIST; | 2092 | spin_lock(&net_family_lock); |
2071 | if (net_families[ops->family] == NULL) { | 2093 | if (net_families[ops->family]) |
2072 | net_families[ops->family]=ops; | 2094 | err = -EEXIST; |
2095 | else { | ||
2096 | net_families[ops->family] = ops; | ||
2073 | err = 0; | 2097 | err = 0; |
2074 | } | 2098 | } |
2075 | net_family_write_unlock(); | 2099 | spin_unlock(&net_family_lock); |
2076 | printk(KERN_INFO "NET: Registered protocol family %d\n", | 2100 | |
2077 | ops->family); | 2101 | printk(KERN_INFO "NET: Registered protocol family %d\n", ops->family); |
2078 | return err; | 2102 | return err; |
2079 | } | 2103 | } |
2080 | 2104 | ||
2081 | /* | 2105 | /** |
2106 | * sock_unregister - remove a protocol handler | ||
2107 | * @family: protocol family to remove | ||
2108 | * | ||
2082 | * This function is called by a protocol handler that wants to | 2109 | * This function is called by a protocol handler that wants to |
2083 | * remove its address family, and have it unlinked from the | 2110 | * remove its address family, and have it unlinked from the |
2084 | * SOCKET module. | 2111 | * new socket creation. |
2112 | * | ||
2113 | * If protocol handler is a module, then it can use module reference | ||
2114 | * counts to protect against new references. If protocol handler is not | ||
2115 | * a module then it needs to provide its own protection in | ||
2116 | * the ops->create routine. | ||
2085 | */ | 2117 | */ |
2086 | 2118 | void sock_unregister(int family) | |
2087 | int sock_unregister(int family) | ||
2088 | { | 2119 | { |
2089 | if (family < 0 || family >= NPROTO) | 2120 | BUG_ON(family < 0 || family >= NPROTO); |
2090 | return -1; | ||
2091 | 2121 | ||
2092 | net_family_write_lock(); | 2122 | spin_lock(&net_family_lock); |
2093 | net_families[family]=NULL; | 2123 | net_families[family] = NULL; |
2094 | net_family_write_unlock(); | 2124 | spin_unlock(&net_family_lock); |
2095 | printk(KERN_INFO "NET: Unregistered protocol family %d\n", | 2125 | |
2096 | family); | 2126 | synchronize_rcu(); |
2097 | return 0; | 2127 | |
2128 | printk(KERN_INFO "NET: Unregistered protocol family %d\n", family); | ||
2098 | } | 2129 | } |
2099 | 2130 | ||
2100 | static int __init sock_init(void) | 2131 | static int __init sock_init(void) |
2101 | { | 2132 | { |
2102 | /* | 2133 | /* |
2103 | * Initialize sock SLAB cache. | 2134 | * Initialize sock SLAB cache. |
2104 | */ | 2135 | */ |
2105 | 2136 | ||
2106 | sk_init(); | 2137 | sk_init(); |
2107 | 2138 | ||
2108 | /* | 2139 | /* |
2109 | * Initialize skbuff SLAB cache | 2140 | * Initialize skbuff SLAB cache |
2110 | */ | 2141 | */ |
2111 | skb_init(); | 2142 | skb_init(); |
2112 | 2143 | ||
2113 | /* | 2144 | /* |
2114 | * Initialize the protocols module. | 2145 | * Initialize the protocols module. |
2115 | */ | 2146 | */ |
2116 | 2147 | ||
2117 | init_inodecache(); | 2148 | init_inodecache(); |
@@ -2137,7 +2168,7 @@ void socket_seq_show(struct seq_file *seq) | |||
2137 | int counter = 0; | 2168 | int counter = 0; |
2138 | 2169 | ||
2139 | for_each_possible_cpu(cpu) | 2170 | for_each_possible_cpu(cpu) |
2140 | counter += per_cpu(sockets_in_use, cpu); | 2171 | counter += per_cpu(sockets_in_use, cpu); |
2141 | 2172 | ||
2142 | /* It can be negative, by the way. 8) */ | 2173 | /* It can be negative, by the way. 8) */ |
2143 | if (counter < 0) | 2174 | if (counter < 0) |
@@ -2145,11 +2176,11 @@ void socket_seq_show(struct seq_file *seq) | |||
2145 | 2176 | ||
2146 | seq_printf(seq, "sockets: used %d\n", counter); | 2177 | seq_printf(seq, "sockets: used %d\n", counter); |
2147 | } | 2178 | } |
2148 | #endif /* CONFIG_PROC_FS */ | 2179 | #endif /* CONFIG_PROC_FS */ |
2149 | 2180 | ||
2150 | #ifdef CONFIG_COMPAT | 2181 | #ifdef CONFIG_COMPAT |
2151 | static long compat_sock_ioctl(struct file *file, unsigned cmd, | 2182 | static long compat_sock_ioctl(struct file *file, unsigned cmd, |
2152 | unsigned long arg) | 2183 | unsigned long arg) |
2153 | { | 2184 | { |
2154 | struct socket *sock = file->private_data; | 2185 | struct socket *sock = file->private_data; |
2155 | int ret = -ENOIOCTLCMD; | 2186 | int ret = -ENOIOCTLCMD; |
@@ -2161,6 +2192,109 @@ static long compat_sock_ioctl(struct file *file, unsigned cmd, | |||
2161 | } | 2192 | } |
2162 | #endif | 2193 | #endif |
2163 | 2194 | ||
2195 | int kernel_bind(struct socket *sock, struct sockaddr *addr, int addrlen) | ||
2196 | { | ||
2197 | return sock->ops->bind(sock, addr, addrlen); | ||
2198 | } | ||
2199 | |||
2200 | int kernel_listen(struct socket *sock, int backlog) | ||
2201 | { | ||
2202 | return sock->ops->listen(sock, backlog); | ||
2203 | } | ||
2204 | |||
2205 | int kernel_accept(struct socket *sock, struct socket **newsock, int flags) | ||
2206 | { | ||
2207 | struct sock *sk = sock->sk; | ||
2208 | int err; | ||
2209 | |||
2210 | err = sock_create_lite(sk->sk_family, sk->sk_type, sk->sk_protocol, | ||
2211 | newsock); | ||
2212 | if (err < 0) | ||
2213 | goto done; | ||
2214 | |||
2215 | err = sock->ops->accept(sock, *newsock, flags); | ||
2216 | if (err < 0) { | ||
2217 | sock_release(*newsock); | ||
2218 | goto done; | ||
2219 | } | ||
2220 | |||
2221 | (*newsock)->ops = sock->ops; | ||
2222 | |||
2223 | done: | ||
2224 | return err; | ||
2225 | } | ||
2226 | |||
2227 | int kernel_connect(struct socket *sock, struct sockaddr *addr, int addrlen, | ||
2228 | int flags) | ||
2229 | { | ||
2230 | return sock->ops->connect(sock, addr, addrlen, flags); | ||
2231 | } | ||
2232 | |||
2233 | int kernel_getsockname(struct socket *sock, struct sockaddr *addr, | ||
2234 | int *addrlen) | ||
2235 | { | ||
2236 | return sock->ops->getname(sock, addr, addrlen, 0); | ||
2237 | } | ||
2238 | |||
2239 | int kernel_getpeername(struct socket *sock, struct sockaddr *addr, | ||
2240 | int *addrlen) | ||
2241 | { | ||
2242 | return sock->ops->getname(sock, addr, addrlen, 1); | ||
2243 | } | ||
2244 | |||
2245 | int kernel_getsockopt(struct socket *sock, int level, int optname, | ||
2246 | char *optval, int *optlen) | ||
2247 | { | ||
2248 | mm_segment_t oldfs = get_fs(); | ||
2249 | int err; | ||
2250 | |||
2251 | set_fs(KERNEL_DS); | ||
2252 | if (level == SOL_SOCKET) | ||
2253 | err = sock_getsockopt(sock, level, optname, optval, optlen); | ||
2254 | else | ||
2255 | err = sock->ops->getsockopt(sock, level, optname, optval, | ||
2256 | optlen); | ||
2257 | set_fs(oldfs); | ||
2258 | return err; | ||
2259 | } | ||
2260 | |||
2261 | int kernel_setsockopt(struct socket *sock, int level, int optname, | ||
2262 | char *optval, int optlen) | ||
2263 | { | ||
2264 | mm_segment_t oldfs = get_fs(); | ||
2265 | int err; | ||
2266 | |||
2267 | set_fs(KERNEL_DS); | ||
2268 | if (level == SOL_SOCKET) | ||
2269 | err = sock_setsockopt(sock, level, optname, optval, optlen); | ||
2270 | else | ||
2271 | err = sock->ops->setsockopt(sock, level, optname, optval, | ||
2272 | optlen); | ||
2273 | set_fs(oldfs); | ||
2274 | return err; | ||
2275 | } | ||
2276 | |||
2277 | int kernel_sendpage(struct socket *sock, struct page *page, int offset, | ||
2278 | size_t size, int flags) | ||
2279 | { | ||
2280 | if (sock->ops->sendpage) | ||
2281 | return sock->ops->sendpage(sock, page, offset, size, flags); | ||
2282 | |||
2283 | return sock_no_sendpage(sock, page, offset, size, flags); | ||
2284 | } | ||
2285 | |||
2286 | int kernel_sock_ioctl(struct socket *sock, int cmd, unsigned long arg) | ||
2287 | { | ||
2288 | mm_segment_t oldfs = get_fs(); | ||
2289 | int err; | ||
2290 | |||
2291 | set_fs(KERNEL_DS); | ||
2292 | err = sock->ops->ioctl(sock, cmd, arg); | ||
2293 | set_fs(oldfs); | ||
2294 | |||
2295 | return err; | ||
2296 | } | ||
2297 | |||
2164 | /* ABI emulation layers need these two */ | 2298 | /* ABI emulation layers need these two */ |
2165 | EXPORT_SYMBOL(move_addr_to_kernel); | 2299 | EXPORT_SYMBOL(move_addr_to_kernel); |
2166 | EXPORT_SYMBOL(move_addr_to_user); | 2300 | EXPORT_SYMBOL(move_addr_to_user); |
@@ -2177,3 +2311,13 @@ EXPORT_SYMBOL(sock_wake_async); | |||
2177 | EXPORT_SYMBOL(sockfd_lookup); | 2311 | EXPORT_SYMBOL(sockfd_lookup); |
2178 | EXPORT_SYMBOL(kernel_sendmsg); | 2312 | EXPORT_SYMBOL(kernel_sendmsg); |
2179 | EXPORT_SYMBOL(kernel_recvmsg); | 2313 | EXPORT_SYMBOL(kernel_recvmsg); |
2314 | EXPORT_SYMBOL(kernel_bind); | ||
2315 | EXPORT_SYMBOL(kernel_listen); | ||
2316 | EXPORT_SYMBOL(kernel_accept); | ||
2317 | EXPORT_SYMBOL(kernel_connect); | ||
2318 | EXPORT_SYMBOL(kernel_getsockname); | ||
2319 | EXPORT_SYMBOL(kernel_getpeername); | ||
2320 | EXPORT_SYMBOL(kernel_getsockopt); | ||
2321 | EXPORT_SYMBOL(kernel_setsockopt); | ||
2322 | EXPORT_SYMBOL(kernel_sendpage); | ||
2323 | EXPORT_SYMBOL(kernel_sock_ioctl); | ||
diff --git a/net/sunrpc/auth_gss/auth_gss.c b/net/sunrpc/auth_gss/auth_gss.c index ef1cf5b476c8..6eed3e166ba3 100644 --- a/net/sunrpc/auth_gss/auth_gss.c +++ b/net/sunrpc/auth_gss/auth_gss.c | |||
@@ -88,7 +88,6 @@ struct gss_auth { | |||
88 | struct list_head upcalls; | 88 | struct list_head upcalls; |
89 | struct rpc_clnt *client; | 89 | struct rpc_clnt *client; |
90 | struct dentry *dentry; | 90 | struct dentry *dentry; |
91 | char path[48]; | ||
92 | spinlock_t lock; | 91 | spinlock_t lock; |
93 | }; | 92 | }; |
94 | 93 | ||
@@ -690,10 +689,8 @@ gss_create(struct rpc_clnt *clnt, rpc_authflavor_t flavor) | |||
690 | if (err) | 689 | if (err) |
691 | goto err_put_mech; | 690 | goto err_put_mech; |
692 | 691 | ||
693 | snprintf(gss_auth->path, sizeof(gss_auth->path), "%s/%s", | 692 | gss_auth->dentry = rpc_mkpipe(clnt->cl_dentry, gss_auth->mech->gm_name, |
694 | clnt->cl_pathname, | 693 | clnt, &gss_upcall_ops, RPC_PIPE_WAIT_FOR_OPEN); |
695 | gss_auth->mech->gm_name); | ||
696 | gss_auth->dentry = rpc_mkpipe(gss_auth->path, clnt, &gss_upcall_ops, RPC_PIPE_WAIT_FOR_OPEN); | ||
697 | if (IS_ERR(gss_auth->dentry)) { | 694 | if (IS_ERR(gss_auth->dentry)) { |
698 | err = PTR_ERR(gss_auth->dentry); | 695 | err = PTR_ERR(gss_auth->dentry); |
699 | goto err_put_mech; | 696 | goto err_put_mech; |
diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c index 3e19d321067a..084a0ad5c64e 100644 --- a/net/sunrpc/clnt.c +++ b/net/sunrpc/clnt.c | |||
@@ -97,17 +97,7 @@ rpc_setup_pipedir(struct rpc_clnt *clnt, char *dir_name) | |||
97 | } | 97 | } |
98 | } | 98 | } |
99 | 99 | ||
100 | /* | 100 | static struct rpc_clnt * rpc_new_client(struct rpc_xprt *xprt, char *servname, struct rpc_program *program, u32 vers, rpc_authflavor_t flavor) |
101 | * Create an RPC client | ||
102 | * FIXME: This should also take a flags argument (as in task->tk_flags). | ||
103 | * It's called (among others) from pmap_create_client, which may in | ||
104 | * turn be called by an async task. In this case, rpciod should not be | ||
105 | * made to sleep too long. | ||
106 | */ | ||
107 | struct rpc_clnt * | ||
108 | rpc_new_client(struct rpc_xprt *xprt, char *servname, | ||
109 | struct rpc_program *program, u32 vers, | ||
110 | rpc_authflavor_t flavor) | ||
111 | { | 101 | { |
112 | struct rpc_version *version; | 102 | struct rpc_version *version; |
113 | struct rpc_clnt *clnt = NULL; | 103 | struct rpc_clnt *clnt = NULL; |
@@ -147,16 +137,12 @@ rpc_new_client(struct rpc_xprt *xprt, char *servname, | |||
147 | clnt->cl_procinfo = version->procs; | 137 | clnt->cl_procinfo = version->procs; |
148 | clnt->cl_maxproc = version->nrprocs; | 138 | clnt->cl_maxproc = version->nrprocs; |
149 | clnt->cl_protname = program->name; | 139 | clnt->cl_protname = program->name; |
150 | clnt->cl_pmap = &clnt->cl_pmap_default; | ||
151 | clnt->cl_port = xprt->addr.sin_port; | ||
152 | clnt->cl_prog = program->number; | 140 | clnt->cl_prog = program->number; |
153 | clnt->cl_vers = version->number; | 141 | clnt->cl_vers = version->number; |
154 | clnt->cl_prot = xprt->prot; | ||
155 | clnt->cl_stats = program->stats; | 142 | clnt->cl_stats = program->stats; |
156 | clnt->cl_metrics = rpc_alloc_iostats(clnt); | 143 | clnt->cl_metrics = rpc_alloc_iostats(clnt); |
157 | rpc_init_wait_queue(&clnt->cl_pmap_default.pm_bindwait, "bindwait"); | ||
158 | 144 | ||
159 | if (!clnt->cl_port) | 145 | if (!xprt_bound(clnt->cl_xprt)) |
160 | clnt->cl_autobind = 1; | 146 | clnt->cl_autobind = 1; |
161 | 147 | ||
162 | clnt->cl_rtt = &clnt->cl_rtt_default; | 148 | clnt->cl_rtt = &clnt->cl_rtt_default; |
@@ -191,40 +177,71 @@ out_no_path: | |||
191 | kfree(clnt->cl_server); | 177 | kfree(clnt->cl_server); |
192 | kfree(clnt); | 178 | kfree(clnt); |
193 | out_err: | 179 | out_err: |
194 | xprt_destroy(xprt); | 180 | xprt_put(xprt); |
195 | out_no_xprt: | 181 | out_no_xprt: |
196 | return ERR_PTR(err); | 182 | return ERR_PTR(err); |
197 | } | 183 | } |
198 | 184 | ||
199 | /** | 185 | /* |
200 | * Create an RPC client | 186 | * rpc_create - create an RPC client and transport with one call |
201 | * @xprt - pointer to xprt struct | 187 | * @args: rpc_clnt create argument structure |
202 | * @servname - name of server | ||
203 | * @info - rpc_program | ||
204 | * @version - rpc_program version | ||
205 | * @authflavor - rpc_auth flavour to use | ||
206 | * | 188 | * |
207 | * Creates an RPC client structure, then pings the server in order to | 189 | * Creates and initializes an RPC transport and an RPC client. |
208 | * determine if it is up, and if it supports this program and version. | ||
209 | * | 190 | * |
210 | * This function should never be called by asynchronous tasks such as | 191 | * It can ping the server in order to determine if it is up, and to see if |
211 | * the portmapper. | 192 | * it supports this program and version. RPC_CLNT_CREATE_NOPING disables |
193 | * this behavior so asynchronous tasks can also use rpc_create. | ||
212 | */ | 194 | */ |
213 | struct rpc_clnt *rpc_create_client(struct rpc_xprt *xprt, char *servname, | 195 | struct rpc_clnt *rpc_create(struct rpc_create_args *args) |
214 | struct rpc_program *info, u32 version, rpc_authflavor_t authflavor) | ||
215 | { | 196 | { |
197 | struct rpc_xprt *xprt; | ||
216 | struct rpc_clnt *clnt; | 198 | struct rpc_clnt *clnt; |
217 | int err; | 199 | |
218 | 200 | xprt = xprt_create_transport(args->protocol, args->address, | |
219 | clnt = rpc_new_client(xprt, servname, info, version, authflavor); | 201 | args->addrsize, args->timeout); |
202 | if (IS_ERR(xprt)) | ||
203 | return (struct rpc_clnt *)xprt; | ||
204 | |||
205 | /* | ||
206 | * By default, kernel RPC client connects from a reserved port. | ||
207 | * CAP_NET_BIND_SERVICE will not be set for unprivileged requesters, | ||
208 | * but it is always enabled for rpciod, which handles the connect | ||
209 | * operation. | ||
210 | */ | ||
211 | xprt->resvport = 1; | ||
212 | if (args->flags & RPC_CLNT_CREATE_NONPRIVPORT) | ||
213 | xprt->resvport = 0; | ||
214 | |||
215 | dprintk("RPC: creating %s client for %s (xprt %p)\n", | ||
216 | args->program->name, args->servername, xprt); | ||
217 | |||
218 | clnt = rpc_new_client(xprt, args->servername, args->program, | ||
219 | args->version, args->authflavor); | ||
220 | if (IS_ERR(clnt)) | 220 | if (IS_ERR(clnt)) |
221 | return clnt; | 221 | return clnt; |
222 | err = rpc_ping(clnt, RPC_TASK_SOFT|RPC_TASK_NOINTR); | 222 | |
223 | if (err == 0) | 223 | if (!(args->flags & RPC_CLNT_CREATE_NOPING)) { |
224 | return clnt; | 224 | int err = rpc_ping(clnt, RPC_TASK_SOFT|RPC_TASK_NOINTR); |
225 | rpc_shutdown_client(clnt); | 225 | if (err != 0) { |
226 | return ERR_PTR(err); | 226 | rpc_shutdown_client(clnt); |
227 | return ERR_PTR(err); | ||
228 | } | ||
229 | } | ||
230 | |||
231 | clnt->cl_softrtry = 1; | ||
232 | if (args->flags & RPC_CLNT_CREATE_HARDRTRY) | ||
233 | clnt->cl_softrtry = 0; | ||
234 | |||
235 | if (args->flags & RPC_CLNT_CREATE_INTR) | ||
236 | clnt->cl_intr = 1; | ||
237 | if (args->flags & RPC_CLNT_CREATE_AUTOBIND) | ||
238 | clnt->cl_autobind = 1; | ||
239 | if (args->flags & RPC_CLNT_CREATE_ONESHOT) | ||
240 | clnt->cl_oneshot = 1; | ||
241 | |||
242 | return clnt; | ||
227 | } | 243 | } |
244 | EXPORT_SYMBOL_GPL(rpc_create); | ||
228 | 245 | ||
229 | /* | 246 | /* |
230 | * This function clones the RPC client structure. It allows us to share the | 247 | * This function clones the RPC client structure. It allows us to share the |
@@ -244,8 +261,7 @@ rpc_clone_client(struct rpc_clnt *clnt) | |||
244 | atomic_set(&new->cl_users, 0); | 261 | atomic_set(&new->cl_users, 0); |
245 | new->cl_parent = clnt; | 262 | new->cl_parent = clnt; |
246 | atomic_inc(&clnt->cl_count); | 263 | atomic_inc(&clnt->cl_count); |
247 | /* Duplicate portmapper */ | 264 | new->cl_xprt = xprt_get(clnt->cl_xprt); |
248 | rpc_init_wait_queue(&new->cl_pmap_default.pm_bindwait, "bindwait"); | ||
249 | /* Turn off autobind on clones */ | 265 | /* Turn off autobind on clones */ |
250 | new->cl_autobind = 0; | 266 | new->cl_autobind = 0; |
251 | new->cl_oneshot = 0; | 267 | new->cl_oneshot = 0; |
@@ -255,8 +271,7 @@ rpc_clone_client(struct rpc_clnt *clnt) | |||
255 | rpc_init_rtt(&new->cl_rtt_default, clnt->cl_xprt->timeout.to_initval); | 271 | rpc_init_rtt(&new->cl_rtt_default, clnt->cl_xprt->timeout.to_initval); |
256 | if (new->cl_auth) | 272 | if (new->cl_auth) |
257 | atomic_inc(&new->cl_auth->au_count); | 273 | atomic_inc(&new->cl_auth->au_count); |
258 | new->cl_pmap = &new->cl_pmap_default; | 274 | new->cl_metrics = rpc_alloc_iostats(clnt); |
259 | new->cl_metrics = rpc_alloc_iostats(clnt); | ||
260 | return new; | 275 | return new; |
261 | out_no_clnt: | 276 | out_no_clnt: |
262 | printk(KERN_INFO "RPC: out of memory in %s\n", __FUNCTION__); | 277 | printk(KERN_INFO "RPC: out of memory in %s\n", __FUNCTION__); |
@@ -323,15 +338,12 @@ rpc_destroy_client(struct rpc_clnt *clnt) | |||
323 | rpc_rmdir(clnt->cl_dentry); | 338 | rpc_rmdir(clnt->cl_dentry); |
324 | rpc_put_mount(); | 339 | rpc_put_mount(); |
325 | } | 340 | } |
326 | if (clnt->cl_xprt) { | ||
327 | xprt_destroy(clnt->cl_xprt); | ||
328 | clnt->cl_xprt = NULL; | ||
329 | } | ||
330 | if (clnt->cl_server != clnt->cl_inline_name) | 341 | if (clnt->cl_server != clnt->cl_inline_name) |
331 | kfree(clnt->cl_server); | 342 | kfree(clnt->cl_server); |
332 | out_free: | 343 | out_free: |
333 | rpc_free_iostats(clnt->cl_metrics); | 344 | rpc_free_iostats(clnt->cl_metrics); |
334 | clnt->cl_metrics = NULL; | 345 | clnt->cl_metrics = NULL; |
346 | xprt_put(clnt->cl_xprt); | ||
335 | kfree(clnt); | 347 | kfree(clnt); |
336 | return 0; | 348 | return 0; |
337 | } | 349 | } |
@@ -540,6 +552,40 @@ rpc_call_setup(struct rpc_task *task, struct rpc_message *msg, int flags) | |||
540 | task->tk_action = rpc_exit_task; | 552 | task->tk_action = rpc_exit_task; |
541 | } | 553 | } |
542 | 554 | ||
555 | /** | ||
556 | * rpc_peeraddr - extract remote peer address from clnt's xprt | ||
557 | * @clnt: RPC client structure | ||
558 | * @buf: target buffer | ||
559 | * @size: length of target buffer | ||
560 | * | ||
561 | * Returns the number of bytes that are actually in the stored address. | ||
562 | */ | ||
563 | size_t rpc_peeraddr(struct rpc_clnt *clnt, struct sockaddr *buf, size_t bufsize) | ||
564 | { | ||
565 | size_t bytes; | ||
566 | struct rpc_xprt *xprt = clnt->cl_xprt; | ||
567 | |||
568 | bytes = sizeof(xprt->addr); | ||
569 | if (bytes > bufsize) | ||
570 | bytes = bufsize; | ||
571 | memcpy(buf, &clnt->cl_xprt->addr, bytes); | ||
572 | return xprt->addrlen; | ||
573 | } | ||
574 | EXPORT_SYMBOL_GPL(rpc_peeraddr); | ||
575 | |||
576 | /** | ||
577 | * rpc_peeraddr2str - return remote peer address in printable format | ||
578 | * @clnt: RPC client structure | ||
579 | * @format: address format | ||
580 | * | ||
581 | */ | ||
582 | char *rpc_peeraddr2str(struct rpc_clnt *clnt, enum rpc_display_format_t format) | ||
583 | { | ||
584 | struct rpc_xprt *xprt = clnt->cl_xprt; | ||
585 | return xprt->ops->print_addr(xprt, format); | ||
586 | } | ||
587 | EXPORT_SYMBOL_GPL(rpc_peeraddr2str); | ||
588 | |||
543 | void | 589 | void |
544 | rpc_setbufsize(struct rpc_clnt *clnt, unsigned int sndsize, unsigned int rcvsize) | 590 | rpc_setbufsize(struct rpc_clnt *clnt, unsigned int sndsize, unsigned int rcvsize) |
545 | { | 591 | { |
@@ -560,7 +606,7 @@ size_t rpc_max_payload(struct rpc_clnt *clnt) | |||
560 | { | 606 | { |
561 | return clnt->cl_xprt->max_payload; | 607 | return clnt->cl_xprt->max_payload; |
562 | } | 608 | } |
563 | EXPORT_SYMBOL(rpc_max_payload); | 609 | EXPORT_SYMBOL_GPL(rpc_max_payload); |
564 | 610 | ||
565 | /** | 611 | /** |
566 | * rpc_force_rebind - force transport to check that remote port is unchanged | 612 | * rpc_force_rebind - force transport to check that remote port is unchanged |
@@ -570,9 +616,9 @@ EXPORT_SYMBOL(rpc_max_payload); | |||
570 | void rpc_force_rebind(struct rpc_clnt *clnt) | 616 | void rpc_force_rebind(struct rpc_clnt *clnt) |
571 | { | 617 | { |
572 | if (clnt->cl_autobind) | 618 | if (clnt->cl_autobind) |
573 | clnt->cl_port = 0; | 619 | xprt_clear_bound(clnt->cl_xprt); |
574 | } | 620 | } |
575 | EXPORT_SYMBOL(rpc_force_rebind); | 621 | EXPORT_SYMBOL_GPL(rpc_force_rebind); |
576 | 622 | ||
577 | /* | 623 | /* |
578 | * Restart an (async) RPC call. Usually called from within the | 624 | * Restart an (async) RPC call. Usually called from within the |
@@ -781,16 +827,16 @@ call_encode(struct rpc_task *task) | |||
781 | static void | 827 | static void |
782 | call_bind(struct rpc_task *task) | 828 | call_bind(struct rpc_task *task) |
783 | { | 829 | { |
784 | struct rpc_clnt *clnt = task->tk_client; | 830 | struct rpc_xprt *xprt = task->tk_xprt; |
785 | 831 | ||
786 | dprintk("RPC: %4d call_bind (status %d)\n", | 832 | dprintk("RPC: %4d call_bind (status %d)\n", |
787 | task->tk_pid, task->tk_status); | 833 | task->tk_pid, task->tk_status); |
788 | 834 | ||
789 | task->tk_action = call_connect; | 835 | task->tk_action = call_connect; |
790 | if (!clnt->cl_port) { | 836 | if (!xprt_bound(xprt)) { |
791 | task->tk_action = call_bind_status; | 837 | task->tk_action = call_bind_status; |
792 | task->tk_timeout = task->tk_xprt->bind_timeout; | 838 | task->tk_timeout = xprt->bind_timeout; |
793 | rpc_getport(task, clnt); | 839 | xprt->ops->rpcbind(task); |
794 | } | 840 | } |
795 | } | 841 | } |
796 | 842 | ||
@@ -815,15 +861,11 @@ call_bind_status(struct rpc_task *task) | |||
815 | dprintk("RPC: %4d remote rpcbind: RPC program/version unavailable\n", | 861 | dprintk("RPC: %4d remote rpcbind: RPC program/version unavailable\n", |
816 | task->tk_pid); | 862 | task->tk_pid); |
817 | rpc_delay(task, 3*HZ); | 863 | rpc_delay(task, 3*HZ); |
818 | goto retry_bind; | 864 | goto retry_timeout; |
819 | case -ETIMEDOUT: | 865 | case -ETIMEDOUT: |
820 | dprintk("RPC: %4d rpcbind request timed out\n", | 866 | dprintk("RPC: %4d rpcbind request timed out\n", |
821 | task->tk_pid); | 867 | task->tk_pid); |
822 | if (RPC_IS_SOFT(task)) { | 868 | goto retry_timeout; |
823 | status = -EIO; | ||
824 | break; | ||
825 | } | ||
826 | goto retry_bind; | ||
827 | case -EPFNOSUPPORT: | 869 | case -EPFNOSUPPORT: |
828 | dprintk("RPC: %4d remote rpcbind service unavailable\n", | 870 | dprintk("RPC: %4d remote rpcbind service unavailable\n", |
829 | task->tk_pid); | 871 | task->tk_pid); |
@@ -836,16 +878,13 @@ call_bind_status(struct rpc_task *task) | |||
836 | dprintk("RPC: %4d unrecognized rpcbind error (%d)\n", | 878 | dprintk("RPC: %4d unrecognized rpcbind error (%d)\n", |
837 | task->tk_pid, -task->tk_status); | 879 | task->tk_pid, -task->tk_status); |
838 | status = -EIO; | 880 | status = -EIO; |
839 | break; | ||
840 | } | 881 | } |
841 | 882 | ||
842 | rpc_exit(task, status); | 883 | rpc_exit(task, status); |
843 | return; | 884 | return; |
844 | 885 | ||
845 | retry_bind: | 886 | retry_timeout: |
846 | task->tk_status = 0; | 887 | task->tk_action = call_timeout; |
847 | task->tk_action = call_bind; | ||
848 | return; | ||
849 | } | 888 | } |
850 | 889 | ||
851 | /* | 890 | /* |
@@ -893,14 +932,16 @@ call_connect_status(struct rpc_task *task) | |||
893 | 932 | ||
894 | switch (status) { | 933 | switch (status) { |
895 | case -ENOTCONN: | 934 | case -ENOTCONN: |
896 | case -ETIMEDOUT: | ||
897 | case -EAGAIN: | 935 | case -EAGAIN: |
898 | task->tk_action = call_bind; | 936 | task->tk_action = call_bind; |
899 | break; | 937 | if (!RPC_IS_SOFT(task)) |
900 | default: | 938 | return; |
901 | rpc_exit(task, -EIO); | 939 | /* if soft mounted, test if we've timed out */ |
902 | break; | 940 | case -ETIMEDOUT: |
941 | task->tk_action = call_timeout; | ||
942 | return; | ||
903 | } | 943 | } |
944 | rpc_exit(task, -EIO); | ||
904 | } | 945 | } |
905 | 946 | ||
906 | /* | 947 | /* |
@@ -982,6 +1023,14 @@ call_status(struct rpc_task *task) | |||
982 | 1023 | ||
983 | task->tk_status = 0; | 1024 | task->tk_status = 0; |
984 | switch(status) { | 1025 | switch(status) { |
1026 | case -EHOSTDOWN: | ||
1027 | case -EHOSTUNREACH: | ||
1028 | case -ENETUNREACH: | ||
1029 | /* | ||
1030 | * Delay any retries for 3 seconds, then handle as if it | ||
1031 | * were a timeout. | ||
1032 | */ | ||
1033 | rpc_delay(task, 3*HZ); | ||
985 | case -ETIMEDOUT: | 1034 | case -ETIMEDOUT: |
986 | task->tk_action = call_timeout; | 1035 | task->tk_action = call_timeout; |
987 | break; | 1036 | break; |
@@ -1001,7 +1050,6 @@ call_status(struct rpc_task *task) | |||
1001 | printk("%s: RPC call returned error %d\n", | 1050 | printk("%s: RPC call returned error %d\n", |
1002 | clnt->cl_protname, -status); | 1051 | clnt->cl_protname, -status); |
1003 | rpc_exit(task, status); | 1052 | rpc_exit(task, status); |
1004 | break; | ||
1005 | } | 1053 | } |
1006 | } | 1054 | } |
1007 | 1055 | ||
@@ -1069,10 +1117,10 @@ call_decode(struct rpc_task *task) | |||
1069 | clnt->cl_stats->rpcretrans++; | 1117 | clnt->cl_stats->rpcretrans++; |
1070 | goto out_retry; | 1118 | goto out_retry; |
1071 | } | 1119 | } |
1072 | printk(KERN_WARNING "%s: too small RPC reply size (%d bytes)\n", | 1120 | dprintk("%s: too small RPC reply size (%d bytes)\n", |
1073 | clnt->cl_protname, task->tk_status); | 1121 | clnt->cl_protname, task->tk_status); |
1074 | rpc_exit(task, -EIO); | 1122 | task->tk_action = call_timeout; |
1075 | return; | 1123 | goto out_retry; |
1076 | } | 1124 | } |
1077 | 1125 | ||
1078 | /* | 1126 | /* |
diff --git a/net/sunrpc/pmap_clnt.c b/net/sunrpc/pmap_clnt.c index 623180f224c9..c04609d3476a 100644 --- a/net/sunrpc/pmap_clnt.c +++ b/net/sunrpc/pmap_clnt.c | |||
@@ -1,7 +1,9 @@ | |||
1 | /* | 1 | /* |
2 | * linux/net/sunrpc/pmap.c | 2 | * linux/net/sunrpc/pmap_clnt.c |
3 | * | 3 | * |
4 | * Portmapper client. | 4 | * In-kernel RPC portmapper client. |
5 | * | ||
6 | * Portmapper supports version 2 of the rpcbind protocol (RFC 1833). | ||
5 | * | 7 | * |
6 | * Copyright (C) 1996, Olaf Kirch <okir@monad.swb.de> | 8 | * Copyright (C) 1996, Olaf Kirch <okir@monad.swb.de> |
7 | */ | 9 | */ |
@@ -13,7 +15,6 @@ | |||
13 | #include <linux/uio.h> | 15 | #include <linux/uio.h> |
14 | #include <linux/in.h> | 16 | #include <linux/in.h> |
15 | #include <linux/sunrpc/clnt.h> | 17 | #include <linux/sunrpc/clnt.h> |
16 | #include <linux/sunrpc/xprt.h> | ||
17 | #include <linux/sunrpc/sched.h> | 18 | #include <linux/sunrpc/sched.h> |
18 | 19 | ||
19 | #ifdef RPC_DEBUG | 20 | #ifdef RPC_DEBUG |
@@ -24,80 +25,141 @@ | |||
24 | #define PMAP_UNSET 2 | 25 | #define PMAP_UNSET 2 |
25 | #define PMAP_GETPORT 3 | 26 | #define PMAP_GETPORT 3 |
26 | 27 | ||
28 | struct portmap_args { | ||
29 | u32 pm_prog; | ||
30 | u32 pm_vers; | ||
31 | u32 pm_prot; | ||
32 | unsigned short pm_port; | ||
33 | struct rpc_xprt * pm_xprt; | ||
34 | }; | ||
35 | |||
27 | static struct rpc_procinfo pmap_procedures[]; | 36 | static struct rpc_procinfo pmap_procedures[]; |
28 | static struct rpc_clnt * pmap_create(char *, struct sockaddr_in *, int, int); | 37 | static struct rpc_clnt * pmap_create(char *, struct sockaddr_in *, int, int); |
29 | static void pmap_getport_done(struct rpc_task *); | 38 | static void pmap_getport_done(struct rpc_task *, void *); |
30 | static struct rpc_program pmap_program; | 39 | static struct rpc_program pmap_program; |
31 | static DEFINE_SPINLOCK(pmap_lock); | ||
32 | 40 | ||
33 | /* | 41 | static void pmap_getport_prepare(struct rpc_task *task, void *calldata) |
34 | * Obtain the port for a given RPC service on a given host. This one can | ||
35 | * be called for an ongoing RPC request. | ||
36 | */ | ||
37 | void | ||
38 | rpc_getport(struct rpc_task *task, struct rpc_clnt *clnt) | ||
39 | { | 42 | { |
40 | struct rpc_portmap *map = clnt->cl_pmap; | 43 | struct portmap_args *map = calldata; |
41 | struct sockaddr_in *sap = &clnt->cl_xprt->addr; | ||
42 | struct rpc_message msg = { | 44 | struct rpc_message msg = { |
43 | .rpc_proc = &pmap_procedures[PMAP_GETPORT], | 45 | .rpc_proc = &pmap_procedures[PMAP_GETPORT], |
44 | .rpc_argp = map, | 46 | .rpc_argp = map, |
45 | .rpc_resp = &clnt->cl_port, | 47 | .rpc_resp = &map->pm_port, |
46 | .rpc_cred = NULL | ||
47 | }; | 48 | }; |
49 | |||
50 | rpc_call_setup(task, &msg, 0); | ||
51 | } | ||
52 | |||
53 | static inline struct portmap_args *pmap_map_alloc(void) | ||
54 | { | ||
55 | return kmalloc(sizeof(struct portmap_args), GFP_NOFS); | ||
56 | } | ||
57 | |||
58 | static inline void pmap_map_free(struct portmap_args *map) | ||
59 | { | ||
60 | kfree(map); | ||
61 | } | ||
62 | |||
63 | static void pmap_map_release(void *data) | ||
64 | { | ||
65 | pmap_map_free(data); | ||
66 | } | ||
67 | |||
68 | static const struct rpc_call_ops pmap_getport_ops = { | ||
69 | .rpc_call_prepare = pmap_getport_prepare, | ||
70 | .rpc_call_done = pmap_getport_done, | ||
71 | .rpc_release = pmap_map_release, | ||
72 | }; | ||
73 | |||
74 | static inline void pmap_wake_portmap_waiters(struct rpc_xprt *xprt, int status) | ||
75 | { | ||
76 | xprt_clear_binding(xprt); | ||
77 | rpc_wake_up_status(&xprt->binding, status); | ||
78 | } | ||
79 | |||
80 | /** | ||
81 | * rpc_getport - obtain the port for a given RPC service on a given host | ||
82 | * @task: task that is waiting for portmapper request | ||
83 | * | ||
84 | * This one can be called for an ongoing RPC request, and can be used in | ||
85 | * an async (rpciod) context. | ||
86 | */ | ||
87 | void rpc_getport(struct rpc_task *task) | ||
88 | { | ||
89 | struct rpc_clnt *clnt = task->tk_client; | ||
90 | struct rpc_xprt *xprt = task->tk_xprt; | ||
91 | struct sockaddr_in addr; | ||
92 | struct portmap_args *map; | ||
48 | struct rpc_clnt *pmap_clnt; | 93 | struct rpc_clnt *pmap_clnt; |
49 | struct rpc_task *child; | 94 | struct rpc_task *child; |
95 | int status; | ||
50 | 96 | ||
51 | dprintk("RPC: %4d rpc_getport(%s, %d, %d, %d)\n", | 97 | dprintk("RPC: %4d rpc_getport(%s, %u, %u, %d)\n", |
52 | task->tk_pid, clnt->cl_server, | 98 | task->tk_pid, clnt->cl_server, |
53 | map->pm_prog, map->pm_vers, map->pm_prot); | 99 | clnt->cl_prog, clnt->cl_vers, xprt->prot); |
54 | 100 | ||
55 | /* Autobind on cloned rpc clients is discouraged */ | 101 | /* Autobind on cloned rpc clients is discouraged */ |
56 | BUG_ON(clnt->cl_parent != clnt); | 102 | BUG_ON(clnt->cl_parent != clnt); |
57 | 103 | ||
58 | spin_lock(&pmap_lock); | 104 | if (xprt_test_and_set_binding(xprt)) { |
59 | if (map->pm_binding) { | 105 | task->tk_status = -EACCES; /* tell caller to check again */ |
60 | rpc_sleep_on(&map->pm_bindwait, task, NULL, NULL); | 106 | rpc_sleep_on(&xprt->binding, task, NULL, NULL); |
61 | spin_unlock(&pmap_lock); | ||
62 | return; | 107 | return; |
63 | } | 108 | } |
64 | map->pm_binding = 1; | ||
65 | spin_unlock(&pmap_lock); | ||
66 | 109 | ||
67 | pmap_clnt = pmap_create(clnt->cl_server, sap, map->pm_prot, 0); | 110 | /* Someone else may have bound if we slept */ |
68 | if (IS_ERR(pmap_clnt)) { | 111 | status = 0; |
69 | task->tk_status = PTR_ERR(pmap_clnt); | 112 | if (xprt_bound(xprt)) |
113 | goto bailout_nofree; | ||
114 | |||
115 | status = -ENOMEM; | ||
116 | map = pmap_map_alloc(); | ||
117 | if (!map) | ||
118 | goto bailout_nofree; | ||
119 | map->pm_prog = clnt->cl_prog; | ||
120 | map->pm_vers = clnt->cl_vers; | ||
121 | map->pm_prot = xprt->prot; | ||
122 | map->pm_port = 0; | ||
123 | map->pm_xprt = xprt_get(xprt); | ||
124 | |||
125 | rpc_peeraddr(clnt, (struct sockaddr *) &addr, sizeof(addr)); | ||
126 | pmap_clnt = pmap_create(clnt->cl_server, &addr, map->pm_prot, 0); | ||
127 | status = PTR_ERR(pmap_clnt); | ||
128 | if (IS_ERR(pmap_clnt)) | ||
70 | goto bailout; | 129 | goto bailout; |
71 | } | ||
72 | task->tk_status = 0; | ||
73 | 130 | ||
74 | /* | 131 | status = -EIO; |
75 | * Note: rpc_new_child will release client after a failure. | 132 | child = rpc_run_task(pmap_clnt, RPC_TASK_ASYNC, &pmap_getport_ops, map); |
76 | */ | 133 | if (IS_ERR(child)) |
77 | if (!(child = rpc_new_child(pmap_clnt, task))) | ||
78 | goto bailout; | 134 | goto bailout; |
135 | rpc_release_task(child); | ||
79 | 136 | ||
80 | /* Setup the call info struct */ | 137 | rpc_sleep_on(&xprt->binding, task, NULL, NULL); |
81 | rpc_call_setup(child, &msg, 0); | ||
82 | 138 | ||
83 | /* ... and run the child task */ | ||
84 | task->tk_xprt->stat.bind_count++; | 139 | task->tk_xprt->stat.bind_count++; |
85 | rpc_run_child(task, child, pmap_getport_done); | ||
86 | return; | 140 | return; |
87 | 141 | ||
88 | bailout: | 142 | bailout: |
89 | spin_lock(&pmap_lock); | 143 | pmap_map_free(map); |
90 | map->pm_binding = 0; | 144 | xprt_put(xprt); |
91 | rpc_wake_up(&map->pm_bindwait); | 145 | bailout_nofree: |
92 | spin_unlock(&pmap_lock); | 146 | task->tk_status = status; |
93 | rpc_exit(task, -EIO); | 147 | pmap_wake_portmap_waiters(xprt, status); |
94 | } | 148 | } |
95 | 149 | ||
96 | #ifdef CONFIG_ROOT_NFS | 150 | #ifdef CONFIG_ROOT_NFS |
97 | int | 151 | /** |
98 | rpc_getport_external(struct sockaddr_in *sin, __u32 prog, __u32 vers, int prot) | 152 | * rpc_getport_external - obtain the port for a given RPC service on a given host |
153 | * @sin: address of remote peer | ||
154 | * @prog: RPC program number to bind | ||
155 | * @vers: RPC version number to bind | ||
156 | * @prot: transport protocol to use to make this request | ||
157 | * | ||
158 | * This one is called from outside the RPC client in a synchronous task context. | ||
159 | */ | ||
160 | int rpc_getport_external(struct sockaddr_in *sin, __u32 prog, __u32 vers, int prot) | ||
99 | { | 161 | { |
100 | struct rpc_portmap map = { | 162 | struct portmap_args map = { |
101 | .pm_prog = prog, | 163 | .pm_prog = prog, |
102 | .pm_vers = vers, | 164 | .pm_vers = vers, |
103 | .pm_prot = prot, | 165 | .pm_prot = prot, |
@@ -112,7 +174,7 @@ rpc_getport_external(struct sockaddr_in *sin, __u32 prog, __u32 vers, int prot) | |||
112 | char hostname[32]; | 174 | char hostname[32]; |
113 | int status; | 175 | int status; |
114 | 176 | ||
115 | dprintk("RPC: rpc_getport_external(%u.%u.%u.%u, %d, %d, %d)\n", | 177 | dprintk("RPC: rpc_getport_external(%u.%u.%u.%u, %u, %u, %d)\n", |
116 | NIPQUAD(sin->sin_addr.s_addr), prog, vers, prot); | 178 | NIPQUAD(sin->sin_addr.s_addr), prog, vers, prot); |
117 | 179 | ||
118 | sprintf(hostname, "%u.%u.%u.%u", NIPQUAD(sin->sin_addr.s_addr)); | 180 | sprintf(hostname, "%u.%u.%u.%u", NIPQUAD(sin->sin_addr.s_addr)); |
@@ -132,45 +194,53 @@ rpc_getport_external(struct sockaddr_in *sin, __u32 prog, __u32 vers, int prot) | |||
132 | } | 194 | } |
133 | #endif | 195 | #endif |
134 | 196 | ||
135 | static void | 197 | /* |
136 | pmap_getport_done(struct rpc_task *task) | 198 | * Portmapper child task invokes this callback via tk_exit. |
199 | */ | ||
200 | static void pmap_getport_done(struct rpc_task *child, void *data) | ||
137 | { | 201 | { |
138 | struct rpc_clnt *clnt = task->tk_client; | 202 | struct portmap_args *map = data; |
139 | struct rpc_xprt *xprt = task->tk_xprt; | 203 | struct rpc_xprt *xprt = map->pm_xprt; |
140 | struct rpc_portmap *map = clnt->cl_pmap; | 204 | int status = child->tk_status; |
141 | 205 | ||
142 | dprintk("RPC: %4d pmap_getport_done(status %d, port %d)\n", | 206 | if (status < 0) { |
143 | task->tk_pid, task->tk_status, clnt->cl_port); | 207 | /* Portmapper not available */ |
144 | 208 | xprt->ops->set_port(xprt, 0); | |
145 | xprt->ops->set_port(xprt, 0); | 209 | } else if (map->pm_port == 0) { |
146 | if (task->tk_status < 0) { | 210 | /* Requested RPC service wasn't registered */ |
147 | /* Make the calling task exit with an error */ | 211 | xprt->ops->set_port(xprt, 0); |
148 | task->tk_action = rpc_exit_task; | 212 | status = -EACCES; |
149 | } else if (clnt->cl_port == 0) { | ||
150 | /* Program not registered */ | ||
151 | rpc_exit(task, -EACCES); | ||
152 | } else { | 213 | } else { |
153 | xprt->ops->set_port(xprt, clnt->cl_port); | 214 | /* Succeeded */ |
154 | clnt->cl_port = htons(clnt->cl_port); | 215 | xprt->ops->set_port(xprt, map->pm_port); |
216 | xprt_set_bound(xprt); | ||
217 | status = 0; | ||
155 | } | 218 | } |
156 | spin_lock(&pmap_lock); | 219 | |
157 | map->pm_binding = 0; | 220 | dprintk("RPC: %4d pmap_getport_done(status %d, port %u)\n", |
158 | rpc_wake_up(&map->pm_bindwait); | 221 | child->tk_pid, status, map->pm_port); |
159 | spin_unlock(&pmap_lock); | 222 | |
223 | pmap_wake_portmap_waiters(xprt, status); | ||
224 | xprt_put(xprt); | ||
160 | } | 225 | } |
161 | 226 | ||
162 | /* | 227 | /** |
163 | * Set or unset a port registration with the local portmapper. | 228 | * rpc_register - set or unset a port registration with the local portmapper |
229 | * @prog: RPC program number to bind | ||
230 | * @vers: RPC version number to bind | ||
231 | * @prot: transport protocol to use to make this request | ||
232 | * @port: port value to register | ||
233 | * @okay: result code | ||
234 | * | ||
164 | * port == 0 means unregister, port != 0 means register. | 235 | * port == 0 means unregister, port != 0 means register. |
165 | */ | 236 | */ |
166 | int | 237 | int rpc_register(u32 prog, u32 vers, int prot, unsigned short port, int *okay) |
167 | rpc_register(u32 prog, u32 vers, int prot, unsigned short port, int *okay) | ||
168 | { | 238 | { |
169 | struct sockaddr_in sin = { | 239 | struct sockaddr_in sin = { |
170 | .sin_family = AF_INET, | 240 | .sin_family = AF_INET, |
171 | .sin_addr.s_addr = htonl(INADDR_LOOPBACK), | 241 | .sin_addr.s_addr = htonl(INADDR_LOOPBACK), |
172 | }; | 242 | }; |
173 | struct rpc_portmap map = { | 243 | struct portmap_args map = { |
174 | .pm_prog = prog, | 244 | .pm_prog = prog, |
175 | .pm_vers = vers, | 245 | .pm_vers = vers, |
176 | .pm_prot = prot, | 246 | .pm_prot = prot, |
@@ -184,7 +254,7 @@ rpc_register(u32 prog, u32 vers, int prot, unsigned short port, int *okay) | |||
184 | struct rpc_clnt *pmap_clnt; | 254 | struct rpc_clnt *pmap_clnt; |
185 | int error = 0; | 255 | int error = 0; |
186 | 256 | ||
187 | dprintk("RPC: registering (%d, %d, %d, %d) with portmapper.\n", | 257 | dprintk("RPC: registering (%u, %u, %d, %u) with portmapper.\n", |
188 | prog, vers, prot, port); | 258 | prog, vers, prot, port); |
189 | 259 | ||
190 | pmap_clnt = pmap_create("localhost", &sin, IPPROTO_UDP, 1); | 260 | pmap_clnt = pmap_create("localhost", &sin, IPPROTO_UDP, 1); |
@@ -207,38 +277,32 @@ rpc_register(u32 prog, u32 vers, int prot, unsigned short port, int *okay) | |||
207 | return error; | 277 | return error; |
208 | } | 278 | } |
209 | 279 | ||
210 | static struct rpc_clnt * | 280 | static struct rpc_clnt *pmap_create(char *hostname, struct sockaddr_in *srvaddr, int proto, int privileged) |
211 | pmap_create(char *hostname, struct sockaddr_in *srvaddr, int proto, int privileged) | ||
212 | { | 281 | { |
213 | struct rpc_xprt *xprt; | 282 | struct rpc_create_args args = { |
214 | struct rpc_clnt *clnt; | 283 | .protocol = proto, |
215 | 284 | .address = (struct sockaddr *)srvaddr, | |
216 | /* printk("pmap: create xprt\n"); */ | 285 | .addrsize = sizeof(*srvaddr), |
217 | xprt = xprt_create_proto(proto, srvaddr, NULL); | 286 | .servername = hostname, |
218 | if (IS_ERR(xprt)) | 287 | .program = &pmap_program, |
219 | return (struct rpc_clnt *)xprt; | 288 | .version = RPC_PMAP_VERSION, |
220 | xprt->ops->set_port(xprt, RPC_PMAP_PORT); | 289 | .authflavor = RPC_AUTH_UNIX, |
290 | .flags = (RPC_CLNT_CREATE_ONESHOT | | ||
291 | RPC_CLNT_CREATE_NOPING), | ||
292 | }; | ||
293 | |||
294 | srvaddr->sin_port = htons(RPC_PMAP_PORT); | ||
221 | if (!privileged) | 295 | if (!privileged) |
222 | xprt->resvport = 0; | 296 | args.flags |= RPC_CLNT_CREATE_NONPRIVPORT; |
223 | 297 | return rpc_create(&args); | |
224 | /* printk("pmap: create clnt\n"); */ | ||
225 | clnt = rpc_new_client(xprt, hostname, | ||
226 | &pmap_program, RPC_PMAP_VERSION, | ||
227 | RPC_AUTH_UNIX); | ||
228 | if (!IS_ERR(clnt)) { | ||
229 | clnt->cl_softrtry = 1; | ||
230 | clnt->cl_oneshot = 1; | ||
231 | } | ||
232 | return clnt; | ||
233 | } | 298 | } |
234 | 299 | ||
235 | /* | 300 | /* |
236 | * XDR encode/decode functions for PMAP | 301 | * XDR encode/decode functions for PMAP |
237 | */ | 302 | */ |
238 | static int | 303 | static int xdr_encode_mapping(struct rpc_rqst *req, u32 *p, struct portmap_args *map) |
239 | xdr_encode_mapping(struct rpc_rqst *req, u32 *p, struct rpc_portmap *map) | ||
240 | { | 304 | { |
241 | dprintk("RPC: xdr_encode_mapping(%d, %d, %d, %d)\n", | 305 | dprintk("RPC: xdr_encode_mapping(%u, %u, %u, %u)\n", |
242 | map->pm_prog, map->pm_vers, map->pm_prot, map->pm_port); | 306 | map->pm_prog, map->pm_vers, map->pm_prot, map->pm_port); |
243 | *p++ = htonl(map->pm_prog); | 307 | *p++ = htonl(map->pm_prog); |
244 | *p++ = htonl(map->pm_vers); | 308 | *p++ = htonl(map->pm_vers); |
@@ -249,15 +313,13 @@ xdr_encode_mapping(struct rpc_rqst *req, u32 *p, struct rpc_portmap *map) | |||
249 | return 0; | 313 | return 0; |
250 | } | 314 | } |
251 | 315 | ||
252 | static int | 316 | static int xdr_decode_port(struct rpc_rqst *req, u32 *p, unsigned short *portp) |
253 | xdr_decode_port(struct rpc_rqst *req, u32 *p, unsigned short *portp) | ||
254 | { | 317 | { |
255 | *portp = (unsigned short) ntohl(*p++); | 318 | *portp = (unsigned short) ntohl(*p++); |
256 | return 0; | 319 | return 0; |
257 | } | 320 | } |
258 | 321 | ||
259 | static int | 322 | static int xdr_decode_bool(struct rpc_rqst *req, u32 *p, unsigned int *boolp) |
260 | xdr_decode_bool(struct rpc_rqst *req, u32 *p, unsigned int *boolp) | ||
261 | { | 323 | { |
262 | *boolp = (unsigned int) ntohl(*p++); | 324 | *boolp = (unsigned int) ntohl(*p++); |
263 | return 0; | 325 | return 0; |
diff --git a/net/sunrpc/rpc_pipe.c b/net/sunrpc/rpc_pipe.c index 0b1a1ac8a4bc..dfa504fe383f 100644 --- a/net/sunrpc/rpc_pipe.c +++ b/net/sunrpc/rpc_pipe.c | |||
@@ -327,10 +327,8 @@ rpc_show_info(struct seq_file *m, void *v) | |||
327 | seq_printf(m, "RPC server: %s\n", clnt->cl_server); | 327 | seq_printf(m, "RPC server: %s\n", clnt->cl_server); |
328 | seq_printf(m, "service: %s (%d) version %d\n", clnt->cl_protname, | 328 | seq_printf(m, "service: %s (%d) version %d\n", clnt->cl_protname, |
329 | clnt->cl_prog, clnt->cl_vers); | 329 | clnt->cl_prog, clnt->cl_vers); |
330 | seq_printf(m, "address: %u.%u.%u.%u\n", | 330 | seq_printf(m, "address: %s\n", rpc_peeraddr2str(clnt, RPC_DISPLAY_ADDR)); |
331 | NIPQUAD(clnt->cl_xprt->addr.sin_addr.s_addr)); | 331 | seq_printf(m, "protocol: %s\n", rpc_peeraddr2str(clnt, RPC_DISPLAY_PROTO)); |
332 | seq_printf(m, "protocol: %s\n", | ||
333 | clnt->cl_xprt->prot == IPPROTO_UDP ? "udp" : "tcp"); | ||
334 | return 0; | 332 | return 0; |
335 | } | 333 | } |
336 | 334 | ||
@@ -623,17 +621,13 @@ __rpc_rmdir(struct inode *dir, struct dentry *dentry) | |||
623 | } | 621 | } |
624 | 622 | ||
625 | static struct dentry * | 623 | static struct dentry * |
626 | rpc_lookup_negative(char *path, struct nameidata *nd) | 624 | rpc_lookup_create(struct dentry *parent, const char *name, int len) |
627 | { | 625 | { |
626 | struct inode *dir = parent->d_inode; | ||
628 | struct dentry *dentry; | 627 | struct dentry *dentry; |
629 | struct inode *dir; | ||
630 | int error; | ||
631 | 628 | ||
632 | if ((error = rpc_lookup_parent(path, nd)) != 0) | ||
633 | return ERR_PTR(error); | ||
634 | dir = nd->dentry->d_inode; | ||
635 | mutex_lock_nested(&dir->i_mutex, I_MUTEX_PARENT); | 629 | mutex_lock_nested(&dir->i_mutex, I_MUTEX_PARENT); |
636 | dentry = lookup_one_len(nd->last.name, nd->dentry, nd->last.len); | 630 | dentry = lookup_one_len(name, parent, len); |
637 | if (IS_ERR(dentry)) | 631 | if (IS_ERR(dentry)) |
638 | goto out_err; | 632 | goto out_err; |
639 | if (dentry->d_inode) { | 633 | if (dentry->d_inode) { |
@@ -644,7 +638,20 @@ rpc_lookup_negative(char *path, struct nameidata *nd) | |||
644 | return dentry; | 638 | return dentry; |
645 | out_err: | 639 | out_err: |
646 | mutex_unlock(&dir->i_mutex); | 640 | mutex_unlock(&dir->i_mutex); |
647 | rpc_release_path(nd); | 641 | return dentry; |
642 | } | ||
643 | |||
644 | static struct dentry * | ||
645 | rpc_lookup_negative(char *path, struct nameidata *nd) | ||
646 | { | ||
647 | struct dentry *dentry; | ||
648 | int error; | ||
649 | |||
650 | if ((error = rpc_lookup_parent(path, nd)) != 0) | ||
651 | return ERR_PTR(error); | ||
652 | dentry = rpc_lookup_create(nd->dentry, nd->last.name, nd->last.len); | ||
653 | if (IS_ERR(dentry)) | ||
654 | rpc_release_path(nd); | ||
648 | return dentry; | 655 | return dentry; |
649 | } | 656 | } |
650 | 657 | ||
@@ -703,18 +710,17 @@ rpc_rmdir(struct dentry *dentry) | |||
703 | } | 710 | } |
704 | 711 | ||
705 | struct dentry * | 712 | struct dentry * |
706 | rpc_mkpipe(char *path, void *private, struct rpc_pipe_ops *ops, int flags) | 713 | rpc_mkpipe(struct dentry *parent, const char *name, void *private, struct rpc_pipe_ops *ops, int flags) |
707 | { | 714 | { |
708 | struct nameidata nd; | ||
709 | struct dentry *dentry; | 715 | struct dentry *dentry; |
710 | struct inode *dir, *inode; | 716 | struct inode *dir, *inode; |
711 | struct rpc_inode *rpci; | 717 | struct rpc_inode *rpci; |
712 | 718 | ||
713 | dentry = rpc_lookup_negative(path, &nd); | 719 | dentry = rpc_lookup_create(parent, name, strlen(name)); |
714 | if (IS_ERR(dentry)) | 720 | if (IS_ERR(dentry)) |
715 | return dentry; | 721 | return dentry; |
716 | dir = nd.dentry->d_inode; | 722 | dir = parent->d_inode; |
717 | inode = rpc_get_inode(dir->i_sb, S_IFSOCK | S_IRUSR | S_IWUSR); | 723 | inode = rpc_get_inode(dir->i_sb, S_IFIFO | S_IRUSR | S_IWUSR); |
718 | if (!inode) | 724 | if (!inode) |
719 | goto err_dput; | 725 | goto err_dput; |
720 | inode->i_ino = iunique(dir->i_sb, 100); | 726 | inode->i_ino = iunique(dir->i_sb, 100); |
@@ -728,13 +734,13 @@ rpc_mkpipe(char *path, void *private, struct rpc_pipe_ops *ops, int flags) | |||
728 | dget(dentry); | 734 | dget(dentry); |
729 | out: | 735 | out: |
730 | mutex_unlock(&dir->i_mutex); | 736 | mutex_unlock(&dir->i_mutex); |
731 | rpc_release_path(&nd); | ||
732 | return dentry; | 737 | return dentry; |
733 | err_dput: | 738 | err_dput: |
734 | dput(dentry); | 739 | dput(dentry); |
735 | dentry = ERR_PTR(-ENOMEM); | 740 | dentry = ERR_PTR(-ENOMEM); |
736 | printk(KERN_WARNING "%s: %s() failed to create pipe %s (errno = %d)\n", | 741 | printk(KERN_WARNING "%s: %s() failed to create pipe %s/%s (errno = %d)\n", |
737 | __FILE__, __FUNCTION__, path, -ENOMEM); | 742 | __FILE__, __FUNCTION__, parent->d_name.name, name, |
743 | -ENOMEM); | ||
738 | goto out; | 744 | goto out; |
739 | } | 745 | } |
740 | 746 | ||
diff --git a/net/sunrpc/sched.c b/net/sunrpc/sched.c index 5c3eee768504..6390461a9756 100644 --- a/net/sunrpc/sched.c +++ b/net/sunrpc/sched.c | |||
@@ -21,7 +21,6 @@ | |||
21 | #include <linux/mutex.h> | 21 | #include <linux/mutex.h> |
22 | 22 | ||
23 | #include <linux/sunrpc/clnt.h> | 23 | #include <linux/sunrpc/clnt.h> |
24 | #include <linux/sunrpc/xprt.h> | ||
25 | 24 | ||
26 | #ifdef RPC_DEBUG | 25 | #ifdef RPC_DEBUG |
27 | #define RPCDBG_FACILITY RPCDBG_SCHED | 26 | #define RPCDBG_FACILITY RPCDBG_SCHED |
@@ -45,12 +44,6 @@ static void rpciod_killall(void); | |||
45 | static void rpc_async_schedule(void *); | 44 | static void rpc_async_schedule(void *); |
46 | 45 | ||
47 | /* | 46 | /* |
48 | * RPC tasks that create another task (e.g. for contacting the portmapper) | ||
49 | * will wait on this queue for their child's completion | ||
50 | */ | ||
51 | static RPC_WAITQ(childq, "childq"); | ||
52 | |||
53 | /* | ||
54 | * RPC tasks sit here while waiting for conditions to improve. | 47 | * RPC tasks sit here while waiting for conditions to improve. |
55 | */ | 48 | */ |
56 | static RPC_WAITQ(delay_queue, "delayq"); | 49 | static RPC_WAITQ(delay_queue, "delayq"); |
@@ -324,16 +317,6 @@ static void rpc_make_runnable(struct rpc_task *task) | |||
324 | } | 317 | } |
325 | 318 | ||
326 | /* | 319 | /* |
327 | * Place a newly initialized task on the workqueue. | ||
328 | */ | ||
329 | static inline void | ||
330 | rpc_schedule_run(struct rpc_task *task) | ||
331 | { | ||
332 | rpc_set_active(task); | ||
333 | rpc_make_runnable(task); | ||
334 | } | ||
335 | |||
336 | /* | ||
337 | * Prepare for sleeping on a wait queue. | 320 | * Prepare for sleeping on a wait queue. |
338 | * By always appending tasks to the list we ensure FIFO behavior. | 321 | * By always appending tasks to the list we ensure FIFO behavior. |
339 | * NB: An RPC task will only receive interrupt-driven events as long | 322 | * NB: An RPC task will only receive interrupt-driven events as long |
@@ -559,24 +542,20 @@ void rpc_wake_up_status(struct rpc_wait_queue *queue, int status) | |||
559 | spin_unlock_bh(&queue->lock); | 542 | spin_unlock_bh(&queue->lock); |
560 | } | 543 | } |
561 | 544 | ||
545 | static void __rpc_atrun(struct rpc_task *task) | ||
546 | { | ||
547 | rpc_wake_up_task(task); | ||
548 | } | ||
549 | |||
562 | /* | 550 | /* |
563 | * Run a task at a later time | 551 | * Run a task at a later time |
564 | */ | 552 | */ |
565 | static void __rpc_atrun(struct rpc_task *); | 553 | void rpc_delay(struct rpc_task *task, unsigned long delay) |
566 | void | ||
567 | rpc_delay(struct rpc_task *task, unsigned long delay) | ||
568 | { | 554 | { |
569 | task->tk_timeout = delay; | 555 | task->tk_timeout = delay; |
570 | rpc_sleep_on(&delay_queue, task, NULL, __rpc_atrun); | 556 | rpc_sleep_on(&delay_queue, task, NULL, __rpc_atrun); |
571 | } | 557 | } |
572 | 558 | ||
573 | static void | ||
574 | __rpc_atrun(struct rpc_task *task) | ||
575 | { | ||
576 | task->tk_status = 0; | ||
577 | rpc_wake_up_task(task); | ||
578 | } | ||
579 | |||
580 | /* | 559 | /* |
581 | * Helper to call task->tk_ops->rpc_call_prepare | 560 | * Helper to call task->tk_ops->rpc_call_prepare |
582 | */ | 561 | */ |
@@ -933,72 +912,6 @@ struct rpc_task *rpc_run_task(struct rpc_clnt *clnt, int flags, | |||
933 | } | 912 | } |
934 | EXPORT_SYMBOL(rpc_run_task); | 913 | EXPORT_SYMBOL(rpc_run_task); |
935 | 914 | ||
936 | /** | ||
937 | * rpc_find_parent - find the parent of a child task. | ||
938 | * @child: child task | ||
939 | * @parent: parent task | ||
940 | * | ||
941 | * Checks that the parent task is still sleeping on the | ||
942 | * queue 'childq'. If so returns a pointer to the parent. | ||
943 | * Upon failure returns NULL. | ||
944 | * | ||
945 | * Caller must hold childq.lock | ||
946 | */ | ||
947 | static inline struct rpc_task *rpc_find_parent(struct rpc_task *child, struct rpc_task *parent) | ||
948 | { | ||
949 | struct rpc_task *task; | ||
950 | struct list_head *le; | ||
951 | |||
952 | task_for_each(task, le, &childq.tasks[0]) | ||
953 | if (task == parent) | ||
954 | return parent; | ||
955 | |||
956 | return NULL; | ||
957 | } | ||
958 | |||
959 | static void rpc_child_exit(struct rpc_task *child, void *calldata) | ||
960 | { | ||
961 | struct rpc_task *parent; | ||
962 | |||
963 | spin_lock_bh(&childq.lock); | ||
964 | if ((parent = rpc_find_parent(child, calldata)) != NULL) { | ||
965 | parent->tk_status = child->tk_status; | ||
966 | __rpc_wake_up_task(parent); | ||
967 | } | ||
968 | spin_unlock_bh(&childq.lock); | ||
969 | } | ||
970 | |||
971 | static const struct rpc_call_ops rpc_child_ops = { | ||
972 | .rpc_call_done = rpc_child_exit, | ||
973 | }; | ||
974 | |||
975 | /* | ||
976 | * Note: rpc_new_task releases the client after a failure. | ||
977 | */ | ||
978 | struct rpc_task * | ||
979 | rpc_new_child(struct rpc_clnt *clnt, struct rpc_task *parent) | ||
980 | { | ||
981 | struct rpc_task *task; | ||
982 | |||
983 | task = rpc_new_task(clnt, RPC_TASK_ASYNC | RPC_TASK_CHILD, &rpc_child_ops, parent); | ||
984 | if (!task) | ||
985 | goto fail; | ||
986 | return task; | ||
987 | |||
988 | fail: | ||
989 | parent->tk_status = -ENOMEM; | ||
990 | return NULL; | ||
991 | } | ||
992 | |||
993 | void rpc_run_child(struct rpc_task *task, struct rpc_task *child, rpc_action func) | ||
994 | { | ||
995 | spin_lock_bh(&childq.lock); | ||
996 | /* N.B. Is it possible for the child to have already finished? */ | ||
997 | __rpc_sleep_on(&childq, task, func, NULL); | ||
998 | rpc_schedule_run(child); | ||
999 | spin_unlock_bh(&childq.lock); | ||
1000 | } | ||
1001 | |||
1002 | /* | 915 | /* |
1003 | * Kill all tasks for the given client. | 916 | * Kill all tasks for the given client. |
1004 | * XXX: kill their descendants as well? | 917 | * XXX: kill their descendants as well? |
diff --git a/net/sunrpc/socklib.c b/net/sunrpc/socklib.c index eb330d4f66d6..6f17527b9e69 100644 --- a/net/sunrpc/socklib.c +++ b/net/sunrpc/socklib.c | |||
@@ -168,7 +168,7 @@ int csum_partial_copy_to_xdr(struct xdr_buf *xdr, struct sk_buff *skb) | |||
168 | return -1; | 168 | return -1; |
169 | if ((unsigned short)csum_fold(desc.csum)) | 169 | if ((unsigned short)csum_fold(desc.csum)) |
170 | return -1; | 170 | return -1; |
171 | if (unlikely(skb->ip_summed == CHECKSUM_HW)) | 171 | if (unlikely(skb->ip_summed == CHECKSUM_COMPLETE)) |
172 | netdev_rx_csum_fault(skb->dev); | 172 | netdev_rx_csum_fault(skb->dev); |
173 | return 0; | 173 | return 0; |
174 | no_checksum: | 174 | no_checksum: |
diff --git a/net/sunrpc/sunrpc_syms.c b/net/sunrpc/sunrpc_syms.c index f38f939ce95f..26c0531d7e25 100644 --- a/net/sunrpc/sunrpc_syms.c +++ b/net/sunrpc/sunrpc_syms.c | |||
@@ -36,8 +36,6 @@ EXPORT_SYMBOL(rpc_wake_up_status); | |||
36 | EXPORT_SYMBOL(rpc_release_task); | 36 | EXPORT_SYMBOL(rpc_release_task); |
37 | 37 | ||
38 | /* RPC client functions */ | 38 | /* RPC client functions */ |
39 | EXPORT_SYMBOL(rpc_create_client); | ||
40 | EXPORT_SYMBOL(rpc_new_client); | ||
41 | EXPORT_SYMBOL(rpc_clone_client); | 39 | EXPORT_SYMBOL(rpc_clone_client); |
42 | EXPORT_SYMBOL(rpc_bind_new_program); | 40 | EXPORT_SYMBOL(rpc_bind_new_program); |
43 | EXPORT_SYMBOL(rpc_destroy_client); | 41 | EXPORT_SYMBOL(rpc_destroy_client); |
@@ -57,7 +55,6 @@ EXPORT_SYMBOL(rpc_queue_upcall); | |||
57 | EXPORT_SYMBOL(rpc_mkpipe); | 55 | EXPORT_SYMBOL(rpc_mkpipe); |
58 | 56 | ||
59 | /* Client transport */ | 57 | /* Client transport */ |
60 | EXPORT_SYMBOL(xprt_create_proto); | ||
61 | EXPORT_SYMBOL(xprt_set_timeout); | 58 | EXPORT_SYMBOL(xprt_set_timeout); |
62 | 59 | ||
63 | /* Client credential cache */ | 60 | /* Client credential cache */ |
diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c index d9a95732df46..953aff89bcac 100644 --- a/net/sunrpc/svcsock.c +++ b/net/sunrpc/svcsock.c | |||
@@ -388,7 +388,7 @@ svc_sendto(struct svc_rqst *rqstp, struct xdr_buf *xdr) | |||
388 | /* send head */ | 388 | /* send head */ |
389 | if (slen == xdr->head[0].iov_len) | 389 | if (slen == xdr->head[0].iov_len) |
390 | flags = 0; | 390 | flags = 0; |
391 | len = sock->ops->sendpage(sock, rqstp->rq_respages[0], 0, xdr->head[0].iov_len, flags); | 391 | len = kernel_sendpage(sock, rqstp->rq_respages[0], 0, xdr->head[0].iov_len, flags); |
392 | if (len != xdr->head[0].iov_len) | 392 | if (len != xdr->head[0].iov_len) |
393 | goto out; | 393 | goto out; |
394 | slen -= xdr->head[0].iov_len; | 394 | slen -= xdr->head[0].iov_len; |
@@ -400,7 +400,7 @@ svc_sendto(struct svc_rqst *rqstp, struct xdr_buf *xdr) | |||
400 | while (pglen > 0) { | 400 | while (pglen > 0) { |
401 | if (slen == size) | 401 | if (slen == size) |
402 | flags = 0; | 402 | flags = 0; |
403 | result = sock->ops->sendpage(sock, *ppage, base, size, flags); | 403 | result = kernel_sendpage(sock, *ppage, base, size, flags); |
404 | if (result > 0) | 404 | if (result > 0) |
405 | len += result; | 405 | len += result; |
406 | if (result != size) | 406 | if (result != size) |
@@ -413,7 +413,7 @@ svc_sendto(struct svc_rqst *rqstp, struct xdr_buf *xdr) | |||
413 | } | 413 | } |
414 | /* send tail */ | 414 | /* send tail */ |
415 | if (xdr->tail[0].iov_len) { | 415 | if (xdr->tail[0].iov_len) { |
416 | result = sock->ops->sendpage(sock, rqstp->rq_respages[rqstp->rq_restailpage], | 416 | result = kernel_sendpage(sock, rqstp->rq_respages[rqstp->rq_restailpage], |
417 | ((unsigned long)xdr->tail[0].iov_base)& (PAGE_SIZE-1), | 417 | ((unsigned long)xdr->tail[0].iov_base)& (PAGE_SIZE-1), |
418 | xdr->tail[0].iov_len, 0); | 418 | xdr->tail[0].iov_len, 0); |
419 | 419 | ||
@@ -434,13 +434,10 @@ out: | |||
434 | static int | 434 | static int |
435 | svc_recv_available(struct svc_sock *svsk) | 435 | svc_recv_available(struct svc_sock *svsk) |
436 | { | 436 | { |
437 | mm_segment_t oldfs; | ||
438 | struct socket *sock = svsk->sk_sock; | 437 | struct socket *sock = svsk->sk_sock; |
439 | int avail, err; | 438 | int avail, err; |
440 | 439 | ||
441 | oldfs = get_fs(); set_fs(KERNEL_DS); | 440 | err = kernel_sock_ioctl(sock, TIOCINQ, (unsigned long) &avail); |
442 | err = sock->ops->ioctl(sock, TIOCINQ, (unsigned long) &avail); | ||
443 | set_fs(oldfs); | ||
444 | 441 | ||
445 | return (err >= 0)? avail : err; | 442 | return (err >= 0)? avail : err; |
446 | } | 443 | } |
@@ -472,7 +469,7 @@ svc_recvfrom(struct svc_rqst *rqstp, struct kvec *iov, int nr, int buflen) | |||
472 | * at accept time. FIXME | 469 | * at accept time. FIXME |
473 | */ | 470 | */ |
474 | alen = sizeof(rqstp->rq_addr); | 471 | alen = sizeof(rqstp->rq_addr); |
475 | sock->ops->getname(sock, (struct sockaddr *)&rqstp->rq_addr, &alen, 1); | 472 | kernel_getpeername(sock, (struct sockaddr *)&rqstp->rq_addr, &alen); |
476 | 473 | ||
477 | dprintk("svc: socket %p recvfrom(%p, %Zu) = %d\n", | 474 | dprintk("svc: socket %p recvfrom(%p, %Zu) = %d\n", |
478 | rqstp->rq_sock, iov[0].iov_base, iov[0].iov_len, len); | 475 | rqstp->rq_sock, iov[0].iov_base, iov[0].iov_len, len); |
@@ -758,7 +755,6 @@ svc_tcp_accept(struct svc_sock *svsk) | |||
758 | struct svc_serv *serv = svsk->sk_server; | 755 | struct svc_serv *serv = svsk->sk_server; |
759 | struct socket *sock = svsk->sk_sock; | 756 | struct socket *sock = svsk->sk_sock; |
760 | struct socket *newsock; | 757 | struct socket *newsock; |
761 | const struct proto_ops *ops; | ||
762 | struct svc_sock *newsvsk; | 758 | struct svc_sock *newsvsk; |
763 | int err, slen; | 759 | int err, slen; |
764 | 760 | ||
@@ -766,29 +762,23 @@ svc_tcp_accept(struct svc_sock *svsk) | |||
766 | if (!sock) | 762 | if (!sock) |
767 | return; | 763 | return; |
768 | 764 | ||
769 | err = sock_create_lite(PF_INET, SOCK_STREAM, IPPROTO_TCP, &newsock); | 765 | clear_bit(SK_CONN, &svsk->sk_flags); |
770 | if (err) { | 766 | err = kernel_accept(sock, &newsock, O_NONBLOCK); |
767 | if (err < 0) { | ||
771 | if (err == -ENOMEM) | 768 | if (err == -ENOMEM) |
772 | printk(KERN_WARNING "%s: no more sockets!\n", | 769 | printk(KERN_WARNING "%s: no more sockets!\n", |
773 | serv->sv_name); | 770 | serv->sv_name); |
774 | return; | 771 | else if (err != -EAGAIN && net_ratelimit()) |
775 | } | ||
776 | |||
777 | dprintk("svc: tcp_accept %p allocated\n", newsock); | ||
778 | newsock->ops = ops = sock->ops; | ||
779 | |||
780 | clear_bit(SK_CONN, &svsk->sk_flags); | ||
781 | if ((err = ops->accept(sock, newsock, O_NONBLOCK)) < 0) { | ||
782 | if (err != -EAGAIN && net_ratelimit()) | ||
783 | printk(KERN_WARNING "%s: accept failed (err %d)!\n", | 772 | printk(KERN_WARNING "%s: accept failed (err %d)!\n", |
784 | serv->sv_name, -err); | 773 | serv->sv_name, -err); |
785 | goto failed; /* aborted connection or whatever */ | 774 | return; |
786 | } | 775 | } |
776 | |||
787 | set_bit(SK_CONN, &svsk->sk_flags); | 777 | set_bit(SK_CONN, &svsk->sk_flags); |
788 | svc_sock_enqueue(svsk); | 778 | svc_sock_enqueue(svsk); |
789 | 779 | ||
790 | slen = sizeof(sin); | 780 | slen = sizeof(sin); |
791 | err = ops->getname(newsock, (struct sockaddr *) &sin, &slen, 1); | 781 | err = kernel_getpeername(newsock, (struct sockaddr *) &sin, &slen); |
792 | if (err < 0) { | 782 | if (err < 0) { |
793 | if (net_ratelimit()) | 783 | if (net_ratelimit()) |
794 | printk(KERN_WARNING "%s: peername failed (err %d)!\n", | 784 | printk(KERN_WARNING "%s: peername failed (err %d)!\n", |
@@ -1406,14 +1396,14 @@ svc_create_socket(struct svc_serv *serv, int protocol, struct sockaddr_in *sin) | |||
1406 | if (sin != NULL) { | 1396 | if (sin != NULL) { |
1407 | if (type == SOCK_STREAM) | 1397 | if (type == SOCK_STREAM) |
1408 | sock->sk->sk_reuse = 1; /* allow address reuse */ | 1398 | sock->sk->sk_reuse = 1; /* allow address reuse */ |
1409 | error = sock->ops->bind(sock, (struct sockaddr *) sin, | 1399 | error = kernel_bind(sock, (struct sockaddr *) sin, |
1410 | sizeof(*sin)); | 1400 | sizeof(*sin)); |
1411 | if (error < 0) | 1401 | if (error < 0) |
1412 | goto bummer; | 1402 | goto bummer; |
1413 | } | 1403 | } |
1414 | 1404 | ||
1415 | if (protocol == IPPROTO_TCP) { | 1405 | if (protocol == IPPROTO_TCP) { |
1416 | if ((error = sock->ops->listen(sock, 64)) < 0) | 1406 | if ((error = kernel_listen(sock, 64)) < 0) |
1417 | goto bummer; | 1407 | goto bummer; |
1418 | } | 1408 | } |
1419 | 1409 | ||
diff --git a/net/sunrpc/timer.c b/net/sunrpc/timer.c index bcbdf6430d5c..8142fdb8a930 100644 --- a/net/sunrpc/timer.c +++ b/net/sunrpc/timer.c | |||
@@ -19,8 +19,6 @@ | |||
19 | #include <linux/unistd.h> | 19 | #include <linux/unistd.h> |
20 | 20 | ||
21 | #include <linux/sunrpc/clnt.h> | 21 | #include <linux/sunrpc/clnt.h> |
22 | #include <linux/sunrpc/xprt.h> | ||
23 | #include <linux/sunrpc/timer.h> | ||
24 | 22 | ||
25 | #define RPC_RTO_MAX (60*HZ) | 23 | #define RPC_RTO_MAX (60*HZ) |
26 | #define RPC_RTO_INIT (HZ/5) | 24 | #define RPC_RTO_INIT (HZ/5) |
diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c index e8c2bc4977f3..1f786f68729d 100644 --- a/net/sunrpc/xprt.c +++ b/net/sunrpc/xprt.c | |||
@@ -534,7 +534,7 @@ void xprt_connect(struct rpc_task *task) | |||
534 | dprintk("RPC: %4d xprt_connect xprt %p %s connected\n", task->tk_pid, | 534 | dprintk("RPC: %4d xprt_connect xprt %p %s connected\n", task->tk_pid, |
535 | xprt, (xprt_connected(xprt) ? "is" : "is not")); | 535 | xprt, (xprt_connected(xprt) ? "is" : "is not")); |
536 | 536 | ||
537 | if (!xprt->addr.sin_port) { | 537 | if (!xprt_bound(xprt)) { |
538 | task->tk_status = -EIO; | 538 | task->tk_status = -EIO; |
539 | return; | 539 | return; |
540 | } | 540 | } |
@@ -585,13 +585,6 @@ static void xprt_connect_status(struct rpc_task *task) | |||
585 | task->tk_pid, -task->tk_status, task->tk_client->cl_server); | 585 | task->tk_pid, -task->tk_status, task->tk_client->cl_server); |
586 | xprt_release_write(xprt, task); | 586 | xprt_release_write(xprt, task); |
587 | task->tk_status = -EIO; | 587 | task->tk_status = -EIO; |
588 | return; | ||
589 | } | ||
590 | |||
591 | /* if soft mounted, just cause this RPC to fail */ | ||
592 | if (RPC_IS_SOFT(task)) { | ||
593 | xprt_release_write(xprt, task); | ||
594 | task->tk_status = -EIO; | ||
595 | } | 588 | } |
596 | } | 589 | } |
597 | 590 | ||
@@ -829,6 +822,7 @@ static void xprt_request_init(struct rpc_task *task, struct rpc_xprt *xprt) | |||
829 | req->rq_bufsize = 0; | 822 | req->rq_bufsize = 0; |
830 | req->rq_xid = xprt_alloc_xid(xprt); | 823 | req->rq_xid = xprt_alloc_xid(xprt); |
831 | req->rq_release_snd_buf = NULL; | 824 | req->rq_release_snd_buf = NULL; |
825 | xprt_reset_majortimeo(req); | ||
832 | dprintk("RPC: %4d reserved req %p xid %08x\n", task->tk_pid, | 826 | dprintk("RPC: %4d reserved req %p xid %08x\n", task->tk_pid, |
833 | req, ntohl(req->rq_xid)); | 827 | req, ntohl(req->rq_xid)); |
834 | } | 828 | } |
@@ -887,16 +881,32 @@ void xprt_set_timeout(struct rpc_timeout *to, unsigned int retr, unsigned long i | |||
887 | to->to_exponential = 0; | 881 | to->to_exponential = 0; |
888 | } | 882 | } |
889 | 883 | ||
890 | static struct rpc_xprt *xprt_setup(int proto, struct sockaddr_in *ap, struct rpc_timeout *to) | 884 | /** |
885 | * xprt_create_transport - create an RPC transport | ||
886 | * @proto: requested transport protocol | ||
887 | * @ap: remote peer address | ||
888 | * @size: length of address | ||
889 | * @to: timeout parameters | ||
890 | * | ||
891 | */ | ||
892 | struct rpc_xprt *xprt_create_transport(int proto, struct sockaddr *ap, size_t size, struct rpc_timeout *to) | ||
891 | { | 893 | { |
892 | int result; | 894 | int result; |
893 | struct rpc_xprt *xprt; | 895 | struct rpc_xprt *xprt; |
894 | struct rpc_rqst *req; | 896 | struct rpc_rqst *req; |
895 | 897 | ||
896 | if ((xprt = kzalloc(sizeof(struct rpc_xprt), GFP_KERNEL)) == NULL) | 898 | if ((xprt = kzalloc(sizeof(struct rpc_xprt), GFP_KERNEL)) == NULL) { |
899 | dprintk("RPC: xprt_create_transport: no memory\n"); | ||
897 | return ERR_PTR(-ENOMEM); | 900 | return ERR_PTR(-ENOMEM); |
898 | 901 | } | |
899 | xprt->addr = *ap; | 902 | if (size <= sizeof(xprt->addr)) { |
903 | memcpy(&xprt->addr, ap, size); | ||
904 | xprt->addrlen = size; | ||
905 | } else { | ||
906 | kfree(xprt); | ||
907 | dprintk("RPC: xprt_create_transport: address too large\n"); | ||
908 | return ERR_PTR(-EBADF); | ||
909 | } | ||
900 | 910 | ||
901 | switch (proto) { | 911 | switch (proto) { |
902 | case IPPROTO_UDP: | 912 | case IPPROTO_UDP: |
@@ -908,14 +918,15 @@ static struct rpc_xprt *xprt_setup(int proto, struct sockaddr_in *ap, struct rpc | |||
908 | default: | 918 | default: |
909 | printk(KERN_ERR "RPC: unrecognized transport protocol: %d\n", | 919 | printk(KERN_ERR "RPC: unrecognized transport protocol: %d\n", |
910 | proto); | 920 | proto); |
911 | result = -EIO; | 921 | return ERR_PTR(-EIO); |
912 | break; | ||
913 | } | 922 | } |
914 | if (result) { | 923 | if (result) { |
915 | kfree(xprt); | 924 | kfree(xprt); |
925 | dprintk("RPC: xprt_create_transport: failed, %d\n", result); | ||
916 | return ERR_PTR(result); | 926 | return ERR_PTR(result); |
917 | } | 927 | } |
918 | 928 | ||
929 | kref_init(&xprt->kref); | ||
919 | spin_lock_init(&xprt->transport_lock); | 930 | spin_lock_init(&xprt->transport_lock); |
920 | spin_lock_init(&xprt->reserve_lock); | 931 | spin_lock_init(&xprt->reserve_lock); |
921 | 932 | ||
@@ -928,6 +939,7 @@ static struct rpc_xprt *xprt_setup(int proto, struct sockaddr_in *ap, struct rpc | |||
928 | xprt->last_used = jiffies; | 939 | xprt->last_used = jiffies; |
929 | xprt->cwnd = RPC_INITCWND; | 940 | xprt->cwnd = RPC_INITCWND; |
930 | 941 | ||
942 | rpc_init_wait_queue(&xprt->binding, "xprt_binding"); | ||
931 | rpc_init_wait_queue(&xprt->pending, "xprt_pending"); | 943 | rpc_init_wait_queue(&xprt->pending, "xprt_pending"); |
932 | rpc_init_wait_queue(&xprt->sending, "xprt_sending"); | 944 | rpc_init_wait_queue(&xprt->sending, "xprt_sending"); |
933 | rpc_init_wait_queue(&xprt->resend, "xprt_resend"); | 945 | rpc_init_wait_queue(&xprt->resend, "xprt_resend"); |
@@ -941,41 +953,43 @@ static struct rpc_xprt *xprt_setup(int proto, struct sockaddr_in *ap, struct rpc | |||
941 | 953 | ||
942 | dprintk("RPC: created transport %p with %u slots\n", xprt, | 954 | dprintk("RPC: created transport %p with %u slots\n", xprt, |
943 | xprt->max_reqs); | 955 | xprt->max_reqs); |
944 | |||
945 | return xprt; | ||
946 | } | ||
947 | 956 | ||
948 | /** | ||
949 | * xprt_create_proto - create an RPC client transport | ||
950 | * @proto: requested transport protocol | ||
951 | * @sap: remote peer's address | ||
952 | * @to: timeout parameters for new transport | ||
953 | * | ||
954 | */ | ||
955 | struct rpc_xprt *xprt_create_proto(int proto, struct sockaddr_in *sap, struct rpc_timeout *to) | ||
956 | { | ||
957 | struct rpc_xprt *xprt; | ||
958 | |||
959 | xprt = xprt_setup(proto, sap, to); | ||
960 | if (IS_ERR(xprt)) | ||
961 | dprintk("RPC: xprt_create_proto failed\n"); | ||
962 | else | ||
963 | dprintk("RPC: xprt_create_proto created xprt %p\n", xprt); | ||
964 | return xprt; | 957 | return xprt; |
965 | } | 958 | } |
966 | 959 | ||
967 | /** | 960 | /** |
968 | * xprt_destroy - destroy an RPC transport, killing off all requests. | 961 | * xprt_destroy - destroy an RPC transport, killing off all requests. |
969 | * @xprt: transport to destroy | 962 | * @kref: kref for the transport to destroy |
970 | * | 963 | * |
971 | */ | 964 | */ |
972 | int xprt_destroy(struct rpc_xprt *xprt) | 965 | static void xprt_destroy(struct kref *kref) |
973 | { | 966 | { |
967 | struct rpc_xprt *xprt = container_of(kref, struct rpc_xprt, kref); | ||
968 | |||
974 | dprintk("RPC: destroying transport %p\n", xprt); | 969 | dprintk("RPC: destroying transport %p\n", xprt); |
975 | xprt->shutdown = 1; | 970 | xprt->shutdown = 1; |
976 | del_timer_sync(&xprt->timer); | 971 | del_timer_sync(&xprt->timer); |
977 | xprt->ops->destroy(xprt); | 972 | xprt->ops->destroy(xprt); |
978 | kfree(xprt); | 973 | kfree(xprt); |
974 | } | ||
979 | 975 | ||
980 | return 0; | 976 | /** |
977 | * xprt_put - release a reference to an RPC transport. | ||
978 | * @xprt: pointer to the transport | ||
979 | * | ||
980 | */ | ||
981 | void xprt_put(struct rpc_xprt *xprt) | ||
982 | { | ||
983 | kref_put(&xprt->kref, xprt_destroy); | ||
984 | } | ||
985 | |||
986 | /** | ||
987 | * xprt_get - return a reference to an RPC transport. | ||
988 | * @xprt: pointer to the transport | ||
989 | * | ||
990 | */ | ||
991 | struct rpc_xprt *xprt_get(struct rpc_xprt *xprt) | ||
992 | { | ||
993 | kref_get(&xprt->kref); | ||
994 | return xprt; | ||
981 | } | 995 | } |
diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c index 441bd53f5eca..9b62923a9c06 100644 --- a/net/sunrpc/xprtsock.c +++ b/net/sunrpc/xprtsock.c | |||
@@ -125,6 +125,47 @@ static inline void xs_pktdump(char *msg, u32 *packet, unsigned int count) | |||
125 | } | 125 | } |
126 | #endif | 126 | #endif |
127 | 127 | ||
128 | static void xs_format_peer_addresses(struct rpc_xprt *xprt) | ||
129 | { | ||
130 | struct sockaddr_in *addr = (struct sockaddr_in *) &xprt->addr; | ||
131 | char *buf; | ||
132 | |||
133 | buf = kzalloc(20, GFP_KERNEL); | ||
134 | if (buf) { | ||
135 | snprintf(buf, 20, "%u.%u.%u.%u", | ||
136 | NIPQUAD(addr->sin_addr.s_addr)); | ||
137 | } | ||
138 | xprt->address_strings[RPC_DISPLAY_ADDR] = buf; | ||
139 | |||
140 | buf = kzalloc(8, GFP_KERNEL); | ||
141 | if (buf) { | ||
142 | snprintf(buf, 8, "%u", | ||
143 | ntohs(addr->sin_port)); | ||
144 | } | ||
145 | xprt->address_strings[RPC_DISPLAY_PORT] = buf; | ||
146 | |||
147 | if (xprt->prot == IPPROTO_UDP) | ||
148 | xprt->address_strings[RPC_DISPLAY_PROTO] = "udp"; | ||
149 | else | ||
150 | xprt->address_strings[RPC_DISPLAY_PROTO] = "tcp"; | ||
151 | |||
152 | buf = kzalloc(48, GFP_KERNEL); | ||
153 | if (buf) { | ||
154 | snprintf(buf, 48, "addr=%u.%u.%u.%u port=%u proto=%s", | ||
155 | NIPQUAD(addr->sin_addr.s_addr), | ||
156 | ntohs(addr->sin_port), | ||
157 | xprt->prot == IPPROTO_UDP ? "udp" : "tcp"); | ||
158 | } | ||
159 | xprt->address_strings[RPC_DISPLAY_ALL] = buf; | ||
160 | } | ||
161 | |||
162 | static void xs_free_peer_addresses(struct rpc_xprt *xprt) | ||
163 | { | ||
164 | kfree(xprt->address_strings[RPC_DISPLAY_ADDR]); | ||
165 | kfree(xprt->address_strings[RPC_DISPLAY_PORT]); | ||
166 | kfree(xprt->address_strings[RPC_DISPLAY_ALL]); | ||
167 | } | ||
168 | |||
128 | #define XS_SENDMSG_FLAGS (MSG_DONTWAIT | MSG_NOSIGNAL) | 169 | #define XS_SENDMSG_FLAGS (MSG_DONTWAIT | MSG_NOSIGNAL) |
129 | 170 | ||
130 | static inline int xs_send_head(struct socket *sock, struct sockaddr *addr, int addrlen, struct xdr_buf *xdr, unsigned int base, unsigned int len) | 171 | static inline int xs_send_head(struct socket *sock, struct sockaddr *addr, int addrlen, struct xdr_buf *xdr, unsigned int base, unsigned int len) |
@@ -174,7 +215,6 @@ static inline int xs_sendpages(struct socket *sock, struct sockaddr *addr, int a | |||
174 | struct page **ppage = xdr->pages; | 215 | struct page **ppage = xdr->pages; |
175 | unsigned int len, pglen = xdr->page_len; | 216 | unsigned int len, pglen = xdr->page_len; |
176 | int err, ret = 0; | 217 | int err, ret = 0; |
177 | ssize_t (*sendpage)(struct socket *, struct page *, int, size_t, int); | ||
178 | 218 | ||
179 | if (unlikely(!sock)) | 219 | if (unlikely(!sock)) |
180 | return -ENOTCONN; | 220 | return -ENOTCONN; |
@@ -207,7 +247,6 @@ static inline int xs_sendpages(struct socket *sock, struct sockaddr *addr, int a | |||
207 | base &= ~PAGE_CACHE_MASK; | 247 | base &= ~PAGE_CACHE_MASK; |
208 | } | 248 | } |
209 | 249 | ||
210 | sendpage = sock->ops->sendpage ? : sock_no_sendpage; | ||
211 | do { | 250 | do { |
212 | int flags = XS_SENDMSG_FLAGS; | 251 | int flags = XS_SENDMSG_FLAGS; |
213 | 252 | ||
@@ -220,10 +259,7 @@ static inline int xs_sendpages(struct socket *sock, struct sockaddr *addr, int a | |||
220 | if (pglen != len || xdr->tail[0].iov_len != 0) | 259 | if (pglen != len || xdr->tail[0].iov_len != 0) |
221 | flags |= MSG_MORE; | 260 | flags |= MSG_MORE; |
222 | 261 | ||
223 | /* Hmm... We might be dealing with highmem pages */ | 262 | err = kernel_sendpage(sock, *ppage, base, len, flags); |
224 | if (PageHighMem(*ppage)) | ||
225 | sendpage = sock_no_sendpage; | ||
226 | err = sendpage(sock, *ppage, base, len, flags); | ||
227 | if (ret == 0) | 263 | if (ret == 0) |
228 | ret = err; | 264 | ret = err; |
229 | else if (err > 0) | 265 | else if (err > 0) |
@@ -300,7 +336,7 @@ static int xs_udp_send_request(struct rpc_task *task) | |||
300 | 336 | ||
301 | req->rq_xtime = jiffies; | 337 | req->rq_xtime = jiffies; |
302 | status = xs_sendpages(xprt->sock, (struct sockaddr *) &xprt->addr, | 338 | status = xs_sendpages(xprt->sock, (struct sockaddr *) &xprt->addr, |
303 | sizeof(xprt->addr), xdr, req->rq_bytes_sent); | 339 | xprt->addrlen, xdr, req->rq_bytes_sent); |
304 | 340 | ||
305 | dprintk("RPC: xs_udp_send_request(%u) = %d\n", | 341 | dprintk("RPC: xs_udp_send_request(%u) = %d\n", |
306 | xdr->len - req->rq_bytes_sent, status); | 342 | xdr->len - req->rq_bytes_sent, status); |
@@ -490,6 +526,7 @@ static void xs_destroy(struct rpc_xprt *xprt) | |||
490 | 526 | ||
491 | xprt_disconnect(xprt); | 527 | xprt_disconnect(xprt); |
492 | xs_close(xprt); | 528 | xs_close(xprt); |
529 | xs_free_peer_addresses(xprt); | ||
493 | kfree(xprt->slot); | 530 | kfree(xprt->slot); |
494 | } | 531 | } |
495 | 532 | ||
@@ -965,6 +1002,19 @@ static unsigned short xs_get_random_port(void) | |||
965 | } | 1002 | } |
966 | 1003 | ||
967 | /** | 1004 | /** |
1005 | * xs_print_peer_address - format an IPv4 address for printing | ||
1006 | * @xprt: generic transport | ||
1007 | * @format: flags field indicating which parts of the address to render | ||
1008 | */ | ||
1009 | static char *xs_print_peer_address(struct rpc_xprt *xprt, enum rpc_display_format_t format) | ||
1010 | { | ||
1011 | if (xprt->address_strings[format] != NULL) | ||
1012 | return xprt->address_strings[format]; | ||
1013 | else | ||
1014 | return "unprintable"; | ||
1015 | } | ||
1016 | |||
1017 | /** | ||
968 | * xs_set_port - reset the port number in the remote endpoint address | 1018 | * xs_set_port - reset the port number in the remote endpoint address |
969 | * @xprt: generic transport | 1019 | * @xprt: generic transport |
970 | * @port: new port number | 1020 | * @port: new port number |
@@ -972,8 +1022,11 @@ static unsigned short xs_get_random_port(void) | |||
972 | */ | 1022 | */ |
973 | static void xs_set_port(struct rpc_xprt *xprt, unsigned short port) | 1023 | static void xs_set_port(struct rpc_xprt *xprt, unsigned short port) |
974 | { | 1024 | { |
1025 | struct sockaddr_in *sap = (struct sockaddr_in *) &xprt->addr; | ||
1026 | |||
975 | dprintk("RPC: setting port for xprt %p to %u\n", xprt, port); | 1027 | dprintk("RPC: setting port for xprt %p to %u\n", xprt, port); |
976 | xprt->addr.sin_port = htons(port); | 1028 | |
1029 | sap->sin_port = htons(port); | ||
977 | } | 1030 | } |
978 | 1031 | ||
979 | static int xs_bindresvport(struct rpc_xprt *xprt, struct socket *sock) | 1032 | static int xs_bindresvport(struct rpc_xprt *xprt, struct socket *sock) |
@@ -986,7 +1039,7 @@ static int xs_bindresvport(struct rpc_xprt *xprt, struct socket *sock) | |||
986 | 1039 | ||
987 | do { | 1040 | do { |
988 | myaddr.sin_port = htons(port); | 1041 | myaddr.sin_port = htons(port); |
989 | err = sock->ops->bind(sock, (struct sockaddr *) &myaddr, | 1042 | err = kernel_bind(sock, (struct sockaddr *) &myaddr, |
990 | sizeof(myaddr)); | 1043 | sizeof(myaddr)); |
991 | if (err == 0) { | 1044 | if (err == 0) { |
992 | xprt->port = port; | 1045 | xprt->port = port; |
@@ -1016,11 +1069,9 @@ static void xs_udp_connect_worker(void *args) | |||
1016 | struct socket *sock = xprt->sock; | 1069 | struct socket *sock = xprt->sock; |
1017 | int err, status = -EIO; | 1070 | int err, status = -EIO; |
1018 | 1071 | ||
1019 | if (xprt->shutdown || xprt->addr.sin_port == 0) | 1072 | if (xprt->shutdown || !xprt_bound(xprt)) |
1020 | goto out; | 1073 | goto out; |
1021 | 1074 | ||
1022 | dprintk("RPC: xs_udp_connect_worker for xprt %p\n", xprt); | ||
1023 | |||
1024 | /* Start by resetting any existing state */ | 1075 | /* Start by resetting any existing state */ |
1025 | xs_close(xprt); | 1076 | xs_close(xprt); |
1026 | 1077 | ||
@@ -1034,6 +1085,9 @@ static void xs_udp_connect_worker(void *args) | |||
1034 | goto out; | 1085 | goto out; |
1035 | } | 1086 | } |
1036 | 1087 | ||
1088 | dprintk("RPC: worker connecting xprt %p to address: %s\n", | ||
1089 | xprt, xs_print_peer_address(xprt, RPC_DISPLAY_ALL)); | ||
1090 | |||
1037 | if (!xprt->inet) { | 1091 | if (!xprt->inet) { |
1038 | struct sock *sk = sock->sk; | 1092 | struct sock *sk = sock->sk; |
1039 | 1093 | ||
@@ -1081,7 +1135,7 @@ static void xs_tcp_reuse_connection(struct rpc_xprt *xprt) | |||
1081 | */ | 1135 | */ |
1082 | memset(&any, 0, sizeof(any)); | 1136 | memset(&any, 0, sizeof(any)); |
1083 | any.sa_family = AF_UNSPEC; | 1137 | any.sa_family = AF_UNSPEC; |
1084 | result = sock->ops->connect(sock, &any, sizeof(any), 0); | 1138 | result = kernel_connect(sock, &any, sizeof(any), 0); |
1085 | if (result) | 1139 | if (result) |
1086 | dprintk("RPC: AF_UNSPEC connect return code %d\n", | 1140 | dprintk("RPC: AF_UNSPEC connect return code %d\n", |
1087 | result); | 1141 | result); |
@@ -1099,11 +1153,9 @@ static void xs_tcp_connect_worker(void *args) | |||
1099 | struct socket *sock = xprt->sock; | 1153 | struct socket *sock = xprt->sock; |
1100 | int err, status = -EIO; | 1154 | int err, status = -EIO; |
1101 | 1155 | ||
1102 | if (xprt->shutdown || xprt->addr.sin_port == 0) | 1156 | if (xprt->shutdown || !xprt_bound(xprt)) |
1103 | goto out; | 1157 | goto out; |
1104 | 1158 | ||
1105 | dprintk("RPC: xs_tcp_connect_worker for xprt %p\n", xprt); | ||
1106 | |||
1107 | if (!xprt->sock) { | 1159 | if (!xprt->sock) { |
1108 | /* start from scratch */ | 1160 | /* start from scratch */ |
1109 | if ((err = sock_create_kern(PF_INET, SOCK_STREAM, IPPROTO_TCP, &sock)) < 0) { | 1161 | if ((err = sock_create_kern(PF_INET, SOCK_STREAM, IPPROTO_TCP, &sock)) < 0) { |
@@ -1119,6 +1171,9 @@ static void xs_tcp_connect_worker(void *args) | |||
1119 | /* "close" the socket, preserving the local port */ | 1171 | /* "close" the socket, preserving the local port */ |
1120 | xs_tcp_reuse_connection(xprt); | 1172 | xs_tcp_reuse_connection(xprt); |
1121 | 1173 | ||
1174 | dprintk("RPC: worker connecting xprt %p to address: %s\n", | ||
1175 | xprt, xs_print_peer_address(xprt, RPC_DISPLAY_ALL)); | ||
1176 | |||
1122 | if (!xprt->inet) { | 1177 | if (!xprt->inet) { |
1123 | struct sock *sk = sock->sk; | 1178 | struct sock *sk = sock->sk; |
1124 | 1179 | ||
@@ -1151,8 +1206,8 @@ static void xs_tcp_connect_worker(void *args) | |||
1151 | /* Tell the socket layer to start connecting... */ | 1206 | /* Tell the socket layer to start connecting... */ |
1152 | xprt->stat.connect_count++; | 1207 | xprt->stat.connect_count++; |
1153 | xprt->stat.connect_start = jiffies; | 1208 | xprt->stat.connect_start = jiffies; |
1154 | status = sock->ops->connect(sock, (struct sockaddr *) &xprt->addr, | 1209 | status = kernel_connect(sock, (struct sockaddr *) &xprt->addr, |
1155 | sizeof(xprt->addr), O_NONBLOCK); | 1210 | xprt->addrlen, O_NONBLOCK); |
1156 | dprintk("RPC: %p connect status %d connected %d sock state %d\n", | 1211 | dprintk("RPC: %p connect status %d connected %d sock state %d\n", |
1157 | xprt, -status, xprt_connected(xprt), sock->sk->sk_state); | 1212 | xprt, -status, xprt_connected(xprt), sock->sk->sk_state); |
1158 | if (status < 0) { | 1213 | if (status < 0) { |
@@ -1260,8 +1315,10 @@ static void xs_tcp_print_stats(struct rpc_xprt *xprt, struct seq_file *seq) | |||
1260 | 1315 | ||
1261 | static struct rpc_xprt_ops xs_udp_ops = { | 1316 | static struct rpc_xprt_ops xs_udp_ops = { |
1262 | .set_buffer_size = xs_udp_set_buffer_size, | 1317 | .set_buffer_size = xs_udp_set_buffer_size, |
1318 | .print_addr = xs_print_peer_address, | ||
1263 | .reserve_xprt = xprt_reserve_xprt_cong, | 1319 | .reserve_xprt = xprt_reserve_xprt_cong, |
1264 | .release_xprt = xprt_release_xprt_cong, | 1320 | .release_xprt = xprt_release_xprt_cong, |
1321 | .rpcbind = rpc_getport, | ||
1265 | .set_port = xs_set_port, | 1322 | .set_port = xs_set_port, |
1266 | .connect = xs_connect, | 1323 | .connect = xs_connect, |
1267 | .buf_alloc = rpc_malloc, | 1324 | .buf_alloc = rpc_malloc, |
@@ -1276,8 +1333,10 @@ static struct rpc_xprt_ops xs_udp_ops = { | |||
1276 | }; | 1333 | }; |
1277 | 1334 | ||
1278 | static struct rpc_xprt_ops xs_tcp_ops = { | 1335 | static struct rpc_xprt_ops xs_tcp_ops = { |
1336 | .print_addr = xs_print_peer_address, | ||
1279 | .reserve_xprt = xprt_reserve_xprt, | 1337 | .reserve_xprt = xprt_reserve_xprt, |
1280 | .release_xprt = xs_tcp_release_xprt, | 1338 | .release_xprt = xs_tcp_release_xprt, |
1339 | .rpcbind = rpc_getport, | ||
1281 | .set_port = xs_set_port, | 1340 | .set_port = xs_set_port, |
1282 | .connect = xs_connect, | 1341 | .connect = xs_connect, |
1283 | .buf_alloc = rpc_malloc, | 1342 | .buf_alloc = rpc_malloc, |
@@ -1298,8 +1357,7 @@ static struct rpc_xprt_ops xs_tcp_ops = { | |||
1298 | int xs_setup_udp(struct rpc_xprt *xprt, struct rpc_timeout *to) | 1357 | int xs_setup_udp(struct rpc_xprt *xprt, struct rpc_timeout *to) |
1299 | { | 1358 | { |
1300 | size_t slot_table_size; | 1359 | size_t slot_table_size; |
1301 | 1360 | struct sockaddr_in *addr = (struct sockaddr_in *) &xprt->addr; | |
1302 | dprintk("RPC: setting up udp-ipv4 transport...\n"); | ||
1303 | 1361 | ||
1304 | xprt->max_reqs = xprt_udp_slot_table_entries; | 1362 | xprt->max_reqs = xprt_udp_slot_table_entries; |
1305 | slot_table_size = xprt->max_reqs * sizeof(xprt->slot[0]); | 1363 | slot_table_size = xprt->max_reqs * sizeof(xprt->slot[0]); |
@@ -1307,10 +1365,12 @@ int xs_setup_udp(struct rpc_xprt *xprt, struct rpc_timeout *to) | |||
1307 | if (xprt->slot == NULL) | 1365 | if (xprt->slot == NULL) |
1308 | return -ENOMEM; | 1366 | return -ENOMEM; |
1309 | 1367 | ||
1310 | xprt->prot = IPPROTO_UDP; | 1368 | if (ntohs(addr->sin_port != 0)) |
1369 | xprt_set_bound(xprt); | ||
1311 | xprt->port = xs_get_random_port(); | 1370 | xprt->port = xs_get_random_port(); |
1371 | |||
1372 | xprt->prot = IPPROTO_UDP; | ||
1312 | xprt->tsh_size = 0; | 1373 | xprt->tsh_size = 0; |
1313 | xprt->resvport = capable(CAP_NET_BIND_SERVICE) ? 1 : 0; | ||
1314 | /* XXX: header size can vary due to auth type, IPv6, etc. */ | 1374 | /* XXX: header size can vary due to auth type, IPv6, etc. */ |
1315 | xprt->max_payload = (1U << 16) - (MAX_HEADER << 3); | 1375 | xprt->max_payload = (1U << 16) - (MAX_HEADER << 3); |
1316 | 1376 | ||
@@ -1327,6 +1387,10 @@ int xs_setup_udp(struct rpc_xprt *xprt, struct rpc_timeout *to) | |||
1327 | else | 1387 | else |
1328 | xprt_set_timeout(&xprt->timeout, 5, 5 * HZ); | 1388 | xprt_set_timeout(&xprt->timeout, 5, 5 * HZ); |
1329 | 1389 | ||
1390 | xs_format_peer_addresses(xprt); | ||
1391 | dprintk("RPC: set up transport to address %s\n", | ||
1392 | xs_print_peer_address(xprt, RPC_DISPLAY_ALL)); | ||
1393 | |||
1330 | return 0; | 1394 | return 0; |
1331 | } | 1395 | } |
1332 | 1396 | ||
@@ -1339,8 +1403,7 @@ int xs_setup_udp(struct rpc_xprt *xprt, struct rpc_timeout *to) | |||
1339 | int xs_setup_tcp(struct rpc_xprt *xprt, struct rpc_timeout *to) | 1403 | int xs_setup_tcp(struct rpc_xprt *xprt, struct rpc_timeout *to) |
1340 | { | 1404 | { |
1341 | size_t slot_table_size; | 1405 | size_t slot_table_size; |
1342 | 1406 | struct sockaddr_in *addr = (struct sockaddr_in *) &xprt->addr; | |
1343 | dprintk("RPC: setting up tcp-ipv4 transport...\n"); | ||
1344 | 1407 | ||
1345 | xprt->max_reqs = xprt_tcp_slot_table_entries; | 1408 | xprt->max_reqs = xprt_tcp_slot_table_entries; |
1346 | slot_table_size = xprt->max_reqs * sizeof(xprt->slot[0]); | 1409 | slot_table_size = xprt->max_reqs * sizeof(xprt->slot[0]); |
@@ -1348,10 +1411,12 @@ int xs_setup_tcp(struct rpc_xprt *xprt, struct rpc_timeout *to) | |||
1348 | if (xprt->slot == NULL) | 1411 | if (xprt->slot == NULL) |
1349 | return -ENOMEM; | 1412 | return -ENOMEM; |
1350 | 1413 | ||
1351 | xprt->prot = IPPROTO_TCP; | 1414 | if (ntohs(addr->sin_port) != 0) |
1415 | xprt_set_bound(xprt); | ||
1352 | xprt->port = xs_get_random_port(); | 1416 | xprt->port = xs_get_random_port(); |
1417 | |||
1418 | xprt->prot = IPPROTO_TCP; | ||
1353 | xprt->tsh_size = sizeof(rpc_fraghdr) / sizeof(u32); | 1419 | xprt->tsh_size = sizeof(rpc_fraghdr) / sizeof(u32); |
1354 | xprt->resvport = capable(CAP_NET_BIND_SERVICE) ? 1 : 0; | ||
1355 | xprt->max_payload = RPC_MAX_FRAGMENT_SIZE; | 1420 | xprt->max_payload = RPC_MAX_FRAGMENT_SIZE; |
1356 | 1421 | ||
1357 | INIT_WORK(&xprt->connect_worker, xs_tcp_connect_worker, xprt); | 1422 | INIT_WORK(&xprt->connect_worker, xs_tcp_connect_worker, xprt); |
@@ -1367,5 +1432,9 @@ int xs_setup_tcp(struct rpc_xprt *xprt, struct rpc_timeout *to) | |||
1367 | else | 1432 | else |
1368 | xprt_set_timeout(&xprt->timeout, 2, 60 * HZ); | 1433 | xprt_set_timeout(&xprt->timeout, 2, 60 * HZ); |
1369 | 1434 | ||
1435 | xs_format_peer_addresses(xprt); | ||
1436 | dprintk("RPC: set up transport to address %s\n", | ||
1437 | xs_print_peer_address(xprt, RPC_DISPLAY_ALL)); | ||
1438 | |||
1370 | return 0; | 1439 | return 0; |
1371 | } | 1440 | } |
diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index de6ec519272e..b43a27828df5 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c | |||
@@ -117,7 +117,7 @@ | |||
117 | #include <net/checksum.h> | 117 | #include <net/checksum.h> |
118 | #include <linux/security.h> | 118 | #include <linux/security.h> |
119 | 119 | ||
120 | int sysctl_unix_max_dgram_qlen = 10; | 120 | int sysctl_unix_max_dgram_qlen __read_mostly = 10; |
121 | 121 | ||
122 | struct hlist_head unix_socket_table[UNIX_HASH_SIZE + 1]; | 122 | struct hlist_head unix_socket_table[UNIX_HASH_SIZE + 1]; |
123 | DEFINE_SPINLOCK(unix_table_lock); | 123 | DEFINE_SPINLOCK(unix_table_lock); |
@@ -2060,10 +2060,7 @@ static int __init af_unix_init(void) | |||
2060 | int rc = -1; | 2060 | int rc = -1; |
2061 | struct sk_buff *dummy_skb; | 2061 | struct sk_buff *dummy_skb; |
2062 | 2062 | ||
2063 | if (sizeof(struct unix_skb_parms) > sizeof(dummy_skb->cb)) { | 2063 | BUILD_BUG_ON(sizeof(struct unix_skb_parms) > sizeof(dummy_skb->cb)); |
2064 | printk(KERN_CRIT "%s: panic\n", __FUNCTION__); | ||
2065 | goto out; | ||
2066 | } | ||
2067 | 2064 | ||
2068 | rc = proto_register(&unix_proto, 1); | 2065 | rc = proto_register(&unix_proto, 1); |
2069 | if (rc != 0) { | 2066 | if (rc != 0) { |
diff --git a/net/xfrm/Kconfig b/net/xfrm/Kconfig index 0c1c04322baf..0faab6332586 100644 --- a/net/xfrm/Kconfig +++ b/net/xfrm/Kconfig | |||
@@ -6,14 +6,24 @@ config XFRM | |||
6 | depends on NET | 6 | depends on NET |
7 | 7 | ||
8 | config XFRM_USER | 8 | config XFRM_USER |
9 | tristate "IPsec user configuration interface" | 9 | tristate "Transformation user configuration interface" |
10 | depends on INET && XFRM | 10 | depends on INET && XFRM |
11 | ---help--- | 11 | ---help--- |
12 | Support for IPsec user configuration interface used | 12 | Support for Transformation(XFRM) user configuration interface |
13 | by native Linux tools. | 13 | like IPsec used by native Linux tools. |
14 | 14 | ||
15 | If unsure, say Y. | 15 | If unsure, say Y. |
16 | 16 | ||
17 | config XFRM_SUB_POLICY | ||
18 | bool "Transformation sub policy support (EXPERIMENTAL)" | ||
19 | depends on XFRM && EXPERIMENTAL | ||
20 | ---help--- | ||
21 | Support sub policy for developers. By using sub policy with main | ||
22 | one, two policies can be applied to the same packet at once. | ||
23 | Policy which lives shorter time in kernel should be a sub. | ||
24 | |||
25 | If unsure, say N. | ||
26 | |||
17 | config NET_KEY | 27 | config NET_KEY |
18 | tristate "PF_KEY sockets" | 28 | tristate "PF_KEY sockets" |
19 | select XFRM | 29 | select XFRM |
diff --git a/net/xfrm/Makefile b/net/xfrm/Makefile index 693aac1aa833..de3c1a625a46 100644 --- a/net/xfrm/Makefile +++ b/net/xfrm/Makefile | |||
@@ -2,6 +2,7 @@ | |||
2 | # Makefile for the XFRM subsystem. | 2 | # Makefile for the XFRM subsystem. |
3 | # | 3 | # |
4 | 4 | ||
5 | obj-$(CONFIG_XFRM) := xfrm_policy.o xfrm_state.o xfrm_input.o xfrm_algo.o | 5 | obj-$(CONFIG_XFRM) := xfrm_policy.o xfrm_state.o xfrm_hash.o \ |
6 | xfrm_input.o xfrm_algo.o | ||
6 | obj-$(CONFIG_XFRM_USER) += xfrm_user.o | 7 | obj-$(CONFIG_XFRM_USER) += xfrm_user.o |
7 | 8 | ||
diff --git a/net/xfrm/xfrm_hash.c b/net/xfrm/xfrm_hash.c new file mode 100644 index 000000000000..37643bb8768a --- /dev/null +++ b/net/xfrm/xfrm_hash.c | |||
@@ -0,0 +1,41 @@ | |||
1 | /* xfrm_hash.c: Common hash table code. | ||
2 | * | ||
3 | * Copyright (C) 2006 David S. Miller (davem@davemloft.net) | ||
4 | */ | ||
5 | |||
6 | #include <linux/kernel.h> | ||
7 | #include <linux/mm.h> | ||
8 | #include <linux/bootmem.h> | ||
9 | #include <linux/vmalloc.h> | ||
10 | #include <linux/slab.h> | ||
11 | #include <linux/xfrm.h> | ||
12 | |||
13 | #include "xfrm_hash.h" | ||
14 | |||
15 | struct hlist_head *xfrm_hash_alloc(unsigned int sz) | ||
16 | { | ||
17 | struct hlist_head *n; | ||
18 | |||
19 | if (sz <= PAGE_SIZE) | ||
20 | n = kmalloc(sz, GFP_KERNEL); | ||
21 | else if (hashdist) | ||
22 | n = __vmalloc(sz, GFP_KERNEL, PAGE_KERNEL); | ||
23 | else | ||
24 | n = (struct hlist_head *) | ||
25 | __get_free_pages(GFP_KERNEL, get_order(sz)); | ||
26 | |||
27 | if (n) | ||
28 | memset(n, 0, sz); | ||
29 | |||
30 | return n; | ||
31 | } | ||
32 | |||
33 | void xfrm_hash_free(struct hlist_head *n, unsigned int sz) | ||
34 | { | ||
35 | if (sz <= PAGE_SIZE) | ||
36 | kfree(n); | ||
37 | else if (hashdist) | ||
38 | vfree(n); | ||
39 | else | ||
40 | free_pages((unsigned long)n, get_order(sz)); | ||
41 | } | ||
diff --git a/net/xfrm/xfrm_hash.h b/net/xfrm/xfrm_hash.h new file mode 100644 index 000000000000..d3abb0b7dc62 --- /dev/null +++ b/net/xfrm/xfrm_hash.h | |||
@@ -0,0 +1,128 @@ | |||
1 | #ifndef _XFRM_HASH_H | ||
2 | #define _XFRM_HASH_H | ||
3 | |||
4 | #include <linux/xfrm.h> | ||
5 | #include <linux/socket.h> | ||
6 | |||
7 | static inline unsigned int __xfrm4_addr_hash(xfrm_address_t *addr) | ||
8 | { | ||
9 | return ntohl(addr->a4); | ||
10 | } | ||
11 | |||
12 | static inline unsigned int __xfrm6_addr_hash(xfrm_address_t *addr) | ||
13 | { | ||
14 | return ntohl(addr->a6[2] ^ addr->a6[3]); | ||
15 | } | ||
16 | |||
17 | static inline unsigned int __xfrm4_daddr_saddr_hash(xfrm_address_t *daddr, xfrm_address_t *saddr) | ||
18 | { | ||
19 | return ntohl(daddr->a4 ^ saddr->a4); | ||
20 | } | ||
21 | |||
22 | static inline unsigned int __xfrm6_daddr_saddr_hash(xfrm_address_t *daddr, xfrm_address_t *saddr) | ||
23 | { | ||
24 | return ntohl(daddr->a6[2] ^ daddr->a6[3] ^ | ||
25 | saddr->a6[2] ^ saddr->a6[3]); | ||
26 | } | ||
27 | |||
28 | static inline unsigned int __xfrm_dst_hash(xfrm_address_t *daddr, xfrm_address_t *saddr, | ||
29 | u32 reqid, unsigned short family, | ||
30 | unsigned int hmask) | ||
31 | { | ||
32 | unsigned int h = family ^ reqid; | ||
33 | switch (family) { | ||
34 | case AF_INET: | ||
35 | h ^= __xfrm4_daddr_saddr_hash(daddr, saddr); | ||
36 | break; | ||
37 | case AF_INET6: | ||
38 | h ^= __xfrm6_daddr_saddr_hash(daddr, saddr); | ||
39 | break; | ||
40 | } | ||
41 | return (h ^ (h >> 16)) & hmask; | ||
42 | } | ||
43 | |||
44 | static inline unsigned __xfrm_src_hash(xfrm_address_t *saddr, | ||
45 | unsigned short family, | ||
46 | unsigned int hmask) | ||
47 | { | ||
48 | unsigned int h = family; | ||
49 | switch (family) { | ||
50 | case AF_INET: | ||
51 | h ^= __xfrm4_addr_hash(saddr); | ||
52 | break; | ||
53 | case AF_INET6: | ||
54 | h ^= __xfrm6_addr_hash(saddr); | ||
55 | break; | ||
56 | }; | ||
57 | return (h ^ (h >> 16)) & hmask; | ||
58 | } | ||
59 | |||
60 | static inline unsigned int | ||
61 | __xfrm_spi_hash(xfrm_address_t *daddr, u32 spi, u8 proto, unsigned short family, | ||
62 | unsigned int hmask) | ||
63 | { | ||
64 | unsigned int h = spi ^ proto; | ||
65 | switch (family) { | ||
66 | case AF_INET: | ||
67 | h ^= __xfrm4_addr_hash(daddr); | ||
68 | break; | ||
69 | case AF_INET6: | ||
70 | h ^= __xfrm6_addr_hash(daddr); | ||
71 | break; | ||
72 | } | ||
73 | return (h ^ (h >> 10) ^ (h >> 20)) & hmask; | ||
74 | } | ||
75 | |||
76 | static inline unsigned int __idx_hash(u32 index, unsigned int hmask) | ||
77 | { | ||
78 | return (index ^ (index >> 8)) & hmask; | ||
79 | } | ||
80 | |||
81 | static inline unsigned int __sel_hash(struct xfrm_selector *sel, unsigned short family, unsigned int hmask) | ||
82 | { | ||
83 | xfrm_address_t *daddr = &sel->daddr; | ||
84 | xfrm_address_t *saddr = &sel->saddr; | ||
85 | unsigned int h = 0; | ||
86 | |||
87 | switch (family) { | ||
88 | case AF_INET: | ||
89 | if (sel->prefixlen_d != 32 || | ||
90 | sel->prefixlen_s != 32) | ||
91 | return hmask + 1; | ||
92 | |||
93 | h = __xfrm4_daddr_saddr_hash(daddr, saddr); | ||
94 | break; | ||
95 | |||
96 | case AF_INET6: | ||
97 | if (sel->prefixlen_d != 128 || | ||
98 | sel->prefixlen_s != 128) | ||
99 | return hmask + 1; | ||
100 | |||
101 | h = __xfrm6_daddr_saddr_hash(daddr, saddr); | ||
102 | break; | ||
103 | }; | ||
104 | h ^= (h >> 16); | ||
105 | return h & hmask; | ||
106 | } | ||
107 | |||
108 | static inline unsigned int __addr_hash(xfrm_address_t *daddr, xfrm_address_t *saddr, unsigned short family, unsigned int hmask) | ||
109 | { | ||
110 | unsigned int h = 0; | ||
111 | |||
112 | switch (family) { | ||
113 | case AF_INET: | ||
114 | h = __xfrm4_daddr_saddr_hash(daddr, saddr); | ||
115 | break; | ||
116 | |||
117 | case AF_INET6: | ||
118 | h = __xfrm6_daddr_saddr_hash(daddr, saddr); | ||
119 | break; | ||
120 | }; | ||
121 | h ^= (h >> 16); | ||
122 | return h & hmask; | ||
123 | } | ||
124 | |||
125 | extern struct hlist_head *xfrm_hash_alloc(unsigned int sz); | ||
126 | extern void xfrm_hash_free(struct hlist_head *n, unsigned int sz); | ||
127 | |||
128 | #endif /* _XFRM_HASH_H */ | ||
diff --git a/net/xfrm/xfrm_input.c b/net/xfrm/xfrm_input.c index 891a6090cc09..dfc90bb1cf1f 100644 --- a/net/xfrm/xfrm_input.c +++ b/net/xfrm/xfrm_input.c | |||
@@ -82,8 +82,6 @@ void __init xfrm_input_init(void) | |||
82 | { | 82 | { |
83 | secpath_cachep = kmem_cache_create("secpath_cache", | 83 | secpath_cachep = kmem_cache_create("secpath_cache", |
84 | sizeof(struct sec_path), | 84 | sizeof(struct sec_path), |
85 | 0, SLAB_HWCACHE_ALIGN, | 85 | 0, SLAB_HWCACHE_ALIGN|SLAB_PANIC, |
86 | NULL, NULL); | 86 | NULL, NULL); |
87 | if (!secpath_cachep) | ||
88 | panic("XFRM: failed to allocate secpath_cache\n"); | ||
89 | } | 87 | } |
diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index 3da67ca2c3ce..b6e2e79d7261 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c | |||
@@ -22,16 +22,19 @@ | |||
22 | #include <linux/netdevice.h> | 22 | #include <linux/netdevice.h> |
23 | #include <linux/netfilter.h> | 23 | #include <linux/netfilter.h> |
24 | #include <linux/module.h> | 24 | #include <linux/module.h> |
25 | #include <linux/cache.h> | ||
25 | #include <net/xfrm.h> | 26 | #include <net/xfrm.h> |
26 | #include <net/ip.h> | 27 | #include <net/ip.h> |
27 | 28 | ||
29 | #include "xfrm_hash.h" | ||
30 | |||
28 | DEFINE_MUTEX(xfrm_cfg_mutex); | 31 | DEFINE_MUTEX(xfrm_cfg_mutex); |
29 | EXPORT_SYMBOL(xfrm_cfg_mutex); | 32 | EXPORT_SYMBOL(xfrm_cfg_mutex); |
30 | 33 | ||
31 | static DEFINE_RWLOCK(xfrm_policy_lock); | 34 | static DEFINE_RWLOCK(xfrm_policy_lock); |
32 | 35 | ||
33 | struct xfrm_policy *xfrm_policy_list[XFRM_POLICY_MAX*2]; | 36 | unsigned int xfrm_policy_count[XFRM_POLICY_MAX*2]; |
34 | EXPORT_SYMBOL(xfrm_policy_list); | 37 | EXPORT_SYMBOL(xfrm_policy_count); |
35 | 38 | ||
36 | static DEFINE_RWLOCK(xfrm_policy_afinfo_lock); | 39 | static DEFINE_RWLOCK(xfrm_policy_afinfo_lock); |
37 | static struct xfrm_policy_afinfo *xfrm_policy_afinfo[NPROTO]; | 40 | static struct xfrm_policy_afinfo *xfrm_policy_afinfo[NPROTO]; |
@@ -39,8 +42,7 @@ static struct xfrm_policy_afinfo *xfrm_policy_afinfo[NPROTO]; | |||
39 | static kmem_cache_t *xfrm_dst_cache __read_mostly; | 42 | static kmem_cache_t *xfrm_dst_cache __read_mostly; |
40 | 43 | ||
41 | static struct work_struct xfrm_policy_gc_work; | 44 | static struct work_struct xfrm_policy_gc_work; |
42 | static struct list_head xfrm_policy_gc_list = | 45 | static HLIST_HEAD(xfrm_policy_gc_list); |
43 | LIST_HEAD_INIT(xfrm_policy_gc_list); | ||
44 | static DEFINE_SPINLOCK(xfrm_policy_gc_lock); | 46 | static DEFINE_SPINLOCK(xfrm_policy_gc_lock); |
45 | 47 | ||
46 | static struct xfrm_policy_afinfo *xfrm_policy_get_afinfo(unsigned short family); | 48 | static struct xfrm_policy_afinfo *xfrm_policy_get_afinfo(unsigned short family); |
@@ -310,8 +312,10 @@ struct xfrm_policy *xfrm_policy_alloc(gfp_t gfp) | |||
310 | policy = kzalloc(sizeof(struct xfrm_policy), gfp); | 312 | policy = kzalloc(sizeof(struct xfrm_policy), gfp); |
311 | 313 | ||
312 | if (policy) { | 314 | if (policy) { |
313 | atomic_set(&policy->refcnt, 1); | 315 | INIT_HLIST_NODE(&policy->bydst); |
316 | INIT_HLIST_NODE(&policy->byidx); | ||
314 | rwlock_init(&policy->lock); | 317 | rwlock_init(&policy->lock); |
318 | atomic_set(&policy->refcnt, 1); | ||
315 | init_timer(&policy->timer); | 319 | init_timer(&policy->timer); |
316 | policy->timer.data = (unsigned long)policy; | 320 | policy->timer.data = (unsigned long)policy; |
317 | policy->timer.function = xfrm_policy_timer; | 321 | policy->timer.function = xfrm_policy_timer; |
@@ -357,17 +361,16 @@ static void xfrm_policy_gc_kill(struct xfrm_policy *policy) | |||
357 | static void xfrm_policy_gc_task(void *data) | 361 | static void xfrm_policy_gc_task(void *data) |
358 | { | 362 | { |
359 | struct xfrm_policy *policy; | 363 | struct xfrm_policy *policy; |
360 | struct list_head *entry, *tmp; | 364 | struct hlist_node *entry, *tmp; |
361 | struct list_head gc_list = LIST_HEAD_INIT(gc_list); | 365 | struct hlist_head gc_list; |
362 | 366 | ||
363 | spin_lock_bh(&xfrm_policy_gc_lock); | 367 | spin_lock_bh(&xfrm_policy_gc_lock); |
364 | list_splice_init(&xfrm_policy_gc_list, &gc_list); | 368 | gc_list.first = xfrm_policy_gc_list.first; |
369 | INIT_HLIST_HEAD(&xfrm_policy_gc_list); | ||
365 | spin_unlock_bh(&xfrm_policy_gc_lock); | 370 | spin_unlock_bh(&xfrm_policy_gc_lock); |
366 | 371 | ||
367 | list_for_each_safe(entry, tmp, &gc_list) { | 372 | hlist_for_each_entry_safe(policy, entry, tmp, &gc_list, bydst) |
368 | policy = list_entry(entry, struct xfrm_policy, list); | ||
369 | xfrm_policy_gc_kill(policy); | 373 | xfrm_policy_gc_kill(policy); |
370 | } | ||
371 | } | 374 | } |
372 | 375 | ||
373 | /* Rule must be locked. Release descentant resources, announce | 376 | /* Rule must be locked. Release descentant resources, announce |
@@ -389,70 +392,275 @@ static void xfrm_policy_kill(struct xfrm_policy *policy) | |||
389 | } | 392 | } |
390 | 393 | ||
391 | spin_lock(&xfrm_policy_gc_lock); | 394 | spin_lock(&xfrm_policy_gc_lock); |
392 | list_add(&policy->list, &xfrm_policy_gc_list); | 395 | hlist_add_head(&policy->bydst, &xfrm_policy_gc_list); |
393 | spin_unlock(&xfrm_policy_gc_lock); | 396 | spin_unlock(&xfrm_policy_gc_lock); |
394 | 397 | ||
395 | schedule_work(&xfrm_policy_gc_work); | 398 | schedule_work(&xfrm_policy_gc_work); |
396 | } | 399 | } |
397 | 400 | ||
401 | struct xfrm_policy_hash { | ||
402 | struct hlist_head *table; | ||
403 | unsigned int hmask; | ||
404 | }; | ||
405 | |||
406 | static struct hlist_head xfrm_policy_inexact[XFRM_POLICY_MAX*2]; | ||
407 | static struct xfrm_policy_hash xfrm_policy_bydst[XFRM_POLICY_MAX*2] __read_mostly; | ||
408 | static struct hlist_head *xfrm_policy_byidx __read_mostly; | ||
409 | static unsigned int xfrm_idx_hmask __read_mostly; | ||
410 | static unsigned int xfrm_policy_hashmax __read_mostly = 1 * 1024 * 1024; | ||
411 | |||
412 | static inline unsigned int idx_hash(u32 index) | ||
413 | { | ||
414 | return __idx_hash(index, xfrm_idx_hmask); | ||
415 | } | ||
416 | |||
417 | static struct hlist_head *policy_hash_bysel(struct xfrm_selector *sel, unsigned short family, int dir) | ||
418 | { | ||
419 | unsigned int hmask = xfrm_policy_bydst[dir].hmask; | ||
420 | unsigned int hash = __sel_hash(sel, family, hmask); | ||
421 | |||
422 | return (hash == hmask + 1 ? | ||
423 | &xfrm_policy_inexact[dir] : | ||
424 | xfrm_policy_bydst[dir].table + hash); | ||
425 | } | ||
426 | |||
427 | static struct hlist_head *policy_hash_direct(xfrm_address_t *daddr, xfrm_address_t *saddr, unsigned short family, int dir) | ||
428 | { | ||
429 | unsigned int hmask = xfrm_policy_bydst[dir].hmask; | ||
430 | unsigned int hash = __addr_hash(daddr, saddr, family, hmask); | ||
431 | |||
432 | return xfrm_policy_bydst[dir].table + hash; | ||
433 | } | ||
434 | |||
435 | static void xfrm_dst_hash_transfer(struct hlist_head *list, | ||
436 | struct hlist_head *ndsttable, | ||
437 | unsigned int nhashmask) | ||
438 | { | ||
439 | struct hlist_node *entry, *tmp; | ||
440 | struct xfrm_policy *pol; | ||
441 | |||
442 | hlist_for_each_entry_safe(pol, entry, tmp, list, bydst) { | ||
443 | unsigned int h; | ||
444 | |||
445 | h = __addr_hash(&pol->selector.daddr, &pol->selector.saddr, | ||
446 | pol->family, nhashmask); | ||
447 | hlist_add_head(&pol->bydst, ndsttable+h); | ||
448 | } | ||
449 | } | ||
450 | |||
451 | static void xfrm_idx_hash_transfer(struct hlist_head *list, | ||
452 | struct hlist_head *nidxtable, | ||
453 | unsigned int nhashmask) | ||
454 | { | ||
455 | struct hlist_node *entry, *tmp; | ||
456 | struct xfrm_policy *pol; | ||
457 | |||
458 | hlist_for_each_entry_safe(pol, entry, tmp, list, byidx) { | ||
459 | unsigned int h; | ||
460 | |||
461 | h = __idx_hash(pol->index, nhashmask); | ||
462 | hlist_add_head(&pol->byidx, nidxtable+h); | ||
463 | } | ||
464 | } | ||
465 | |||
466 | static unsigned long xfrm_new_hash_mask(unsigned int old_hmask) | ||
467 | { | ||
468 | return ((old_hmask + 1) << 1) - 1; | ||
469 | } | ||
470 | |||
471 | static void xfrm_bydst_resize(int dir) | ||
472 | { | ||
473 | unsigned int hmask = xfrm_policy_bydst[dir].hmask; | ||
474 | unsigned int nhashmask = xfrm_new_hash_mask(hmask); | ||
475 | unsigned int nsize = (nhashmask + 1) * sizeof(struct hlist_head); | ||
476 | struct hlist_head *odst = xfrm_policy_bydst[dir].table; | ||
477 | struct hlist_head *ndst = xfrm_hash_alloc(nsize); | ||
478 | int i; | ||
479 | |||
480 | if (!ndst) | ||
481 | return; | ||
482 | |||
483 | write_lock_bh(&xfrm_policy_lock); | ||
484 | |||
485 | for (i = hmask; i >= 0; i--) | ||
486 | xfrm_dst_hash_transfer(odst + i, ndst, nhashmask); | ||
487 | |||
488 | xfrm_policy_bydst[dir].table = ndst; | ||
489 | xfrm_policy_bydst[dir].hmask = nhashmask; | ||
490 | |||
491 | write_unlock_bh(&xfrm_policy_lock); | ||
492 | |||
493 | xfrm_hash_free(odst, (hmask + 1) * sizeof(struct hlist_head)); | ||
494 | } | ||
495 | |||
496 | static void xfrm_byidx_resize(int total) | ||
497 | { | ||
498 | unsigned int hmask = xfrm_idx_hmask; | ||
499 | unsigned int nhashmask = xfrm_new_hash_mask(hmask); | ||
500 | unsigned int nsize = (nhashmask + 1) * sizeof(struct hlist_head); | ||
501 | struct hlist_head *oidx = xfrm_policy_byidx; | ||
502 | struct hlist_head *nidx = xfrm_hash_alloc(nsize); | ||
503 | int i; | ||
504 | |||
505 | if (!nidx) | ||
506 | return; | ||
507 | |||
508 | write_lock_bh(&xfrm_policy_lock); | ||
509 | |||
510 | for (i = hmask; i >= 0; i--) | ||
511 | xfrm_idx_hash_transfer(oidx + i, nidx, nhashmask); | ||
512 | |||
513 | xfrm_policy_byidx = nidx; | ||
514 | xfrm_idx_hmask = nhashmask; | ||
515 | |||
516 | write_unlock_bh(&xfrm_policy_lock); | ||
517 | |||
518 | xfrm_hash_free(oidx, (hmask + 1) * sizeof(struct hlist_head)); | ||
519 | } | ||
520 | |||
521 | static inline int xfrm_bydst_should_resize(int dir, int *total) | ||
522 | { | ||
523 | unsigned int cnt = xfrm_policy_count[dir]; | ||
524 | unsigned int hmask = xfrm_policy_bydst[dir].hmask; | ||
525 | |||
526 | if (total) | ||
527 | *total += cnt; | ||
528 | |||
529 | if ((hmask + 1) < xfrm_policy_hashmax && | ||
530 | cnt > hmask) | ||
531 | return 1; | ||
532 | |||
533 | return 0; | ||
534 | } | ||
535 | |||
536 | static inline int xfrm_byidx_should_resize(int total) | ||
537 | { | ||
538 | unsigned int hmask = xfrm_idx_hmask; | ||
539 | |||
540 | if ((hmask + 1) < xfrm_policy_hashmax && | ||
541 | total > hmask) | ||
542 | return 1; | ||
543 | |||
544 | return 0; | ||
545 | } | ||
546 | |||
547 | static DEFINE_MUTEX(hash_resize_mutex); | ||
548 | |||
549 | static void xfrm_hash_resize(void *__unused) | ||
550 | { | ||
551 | int dir, total; | ||
552 | |||
553 | mutex_lock(&hash_resize_mutex); | ||
554 | |||
555 | total = 0; | ||
556 | for (dir = 0; dir < XFRM_POLICY_MAX * 2; dir++) { | ||
557 | if (xfrm_bydst_should_resize(dir, &total)) | ||
558 | xfrm_bydst_resize(dir); | ||
559 | } | ||
560 | if (xfrm_byidx_should_resize(total)) | ||
561 | xfrm_byidx_resize(total); | ||
562 | |||
563 | mutex_unlock(&hash_resize_mutex); | ||
564 | } | ||
565 | |||
566 | static DECLARE_WORK(xfrm_hash_work, xfrm_hash_resize, NULL); | ||
567 | |||
398 | /* Generate new index... KAME seems to generate them ordered by cost | 568 | /* Generate new index... KAME seems to generate them ordered by cost |
399 | * of an absolute inpredictability of ordering of rules. This will not pass. */ | 569 | * of an absolute inpredictability of ordering of rules. This will not pass. */ |
400 | static u32 xfrm_gen_index(int dir) | 570 | static u32 xfrm_gen_index(u8 type, int dir) |
401 | { | 571 | { |
402 | u32 idx; | ||
403 | struct xfrm_policy *p; | ||
404 | static u32 idx_generator; | 572 | static u32 idx_generator; |
405 | 573 | ||
406 | for (;;) { | 574 | for (;;) { |
575 | struct hlist_node *entry; | ||
576 | struct hlist_head *list; | ||
577 | struct xfrm_policy *p; | ||
578 | u32 idx; | ||
579 | int found; | ||
580 | |||
407 | idx = (idx_generator | dir); | 581 | idx = (idx_generator | dir); |
408 | idx_generator += 8; | 582 | idx_generator += 8; |
409 | if (idx == 0) | 583 | if (idx == 0) |
410 | idx = 8; | 584 | idx = 8; |
411 | for (p = xfrm_policy_list[dir]; p; p = p->next) { | 585 | list = xfrm_policy_byidx + idx_hash(idx); |
412 | if (p->index == idx) | 586 | found = 0; |
587 | hlist_for_each_entry(p, entry, list, byidx) { | ||
588 | if (p->index == idx) { | ||
589 | found = 1; | ||
413 | break; | 590 | break; |
591 | } | ||
414 | } | 592 | } |
415 | if (!p) | 593 | if (!found) |
416 | return idx; | 594 | return idx; |
417 | } | 595 | } |
418 | } | 596 | } |
419 | 597 | ||
598 | static inline int selector_cmp(struct xfrm_selector *s1, struct xfrm_selector *s2) | ||
599 | { | ||
600 | u32 *p1 = (u32 *) s1; | ||
601 | u32 *p2 = (u32 *) s2; | ||
602 | int len = sizeof(struct xfrm_selector) / sizeof(u32); | ||
603 | int i; | ||
604 | |||
605 | for (i = 0; i < len; i++) { | ||
606 | if (p1[i] != p2[i]) | ||
607 | return 1; | ||
608 | } | ||
609 | |||
610 | return 0; | ||
611 | } | ||
612 | |||
420 | int xfrm_policy_insert(int dir, struct xfrm_policy *policy, int excl) | 613 | int xfrm_policy_insert(int dir, struct xfrm_policy *policy, int excl) |
421 | { | 614 | { |
422 | struct xfrm_policy *pol, **p; | 615 | struct xfrm_policy *pol; |
423 | struct xfrm_policy *delpol = NULL; | 616 | struct xfrm_policy *delpol; |
424 | struct xfrm_policy **newpos = NULL; | 617 | struct hlist_head *chain; |
618 | struct hlist_node *entry, *newpos, *last; | ||
425 | struct dst_entry *gc_list; | 619 | struct dst_entry *gc_list; |
426 | 620 | ||
427 | write_lock_bh(&xfrm_policy_lock); | 621 | write_lock_bh(&xfrm_policy_lock); |
428 | for (p = &xfrm_policy_list[dir]; (pol=*p)!=NULL;) { | 622 | chain = policy_hash_bysel(&policy->selector, policy->family, dir); |
429 | if (!delpol && memcmp(&policy->selector, &pol->selector, sizeof(pol->selector)) == 0 && | 623 | delpol = NULL; |
624 | newpos = NULL; | ||
625 | last = NULL; | ||
626 | hlist_for_each_entry(pol, entry, chain, bydst) { | ||
627 | if (!delpol && | ||
628 | pol->type == policy->type && | ||
629 | !selector_cmp(&pol->selector, &policy->selector) && | ||
430 | xfrm_sec_ctx_match(pol->security, policy->security)) { | 630 | xfrm_sec_ctx_match(pol->security, policy->security)) { |
431 | if (excl) { | 631 | if (excl) { |
432 | write_unlock_bh(&xfrm_policy_lock); | 632 | write_unlock_bh(&xfrm_policy_lock); |
433 | return -EEXIST; | 633 | return -EEXIST; |
434 | } | 634 | } |
435 | *p = pol->next; | ||
436 | delpol = pol; | 635 | delpol = pol; |
437 | if (policy->priority > pol->priority) | 636 | if (policy->priority > pol->priority) |
438 | continue; | 637 | continue; |
439 | } else if (policy->priority >= pol->priority) { | 638 | } else if (policy->priority >= pol->priority) { |
440 | p = &pol->next; | 639 | last = &pol->bydst; |
441 | continue; | 640 | continue; |
442 | } | 641 | } |
443 | if (!newpos) | 642 | if (!newpos) |
444 | newpos = p; | 643 | newpos = &pol->bydst; |
445 | if (delpol) | 644 | if (delpol) |
446 | break; | 645 | break; |
447 | p = &pol->next; | 646 | last = &pol->bydst; |
448 | } | 647 | } |
648 | if (!newpos) | ||
649 | newpos = last; | ||
449 | if (newpos) | 650 | if (newpos) |
450 | p = newpos; | 651 | hlist_add_after(newpos, &policy->bydst); |
652 | else | ||
653 | hlist_add_head(&policy->bydst, chain); | ||
451 | xfrm_pol_hold(policy); | 654 | xfrm_pol_hold(policy); |
452 | policy->next = *p; | 655 | xfrm_policy_count[dir]++; |
453 | *p = policy; | ||
454 | atomic_inc(&flow_cache_genid); | 656 | atomic_inc(&flow_cache_genid); |
455 | policy->index = delpol ? delpol->index : xfrm_gen_index(dir); | 657 | if (delpol) { |
658 | hlist_del(&delpol->bydst); | ||
659 | hlist_del(&delpol->byidx); | ||
660 | xfrm_policy_count[dir]--; | ||
661 | } | ||
662 | policy->index = delpol ? delpol->index : xfrm_gen_index(policy->type, dir); | ||
663 | hlist_add_head(&policy->byidx, xfrm_policy_byidx+idx_hash(policy->index)); | ||
456 | policy->curlft.add_time = (unsigned long)xtime.tv_sec; | 664 | policy->curlft.add_time = (unsigned long)xtime.tv_sec; |
457 | policy->curlft.use_time = 0; | 665 | policy->curlft.use_time = 0; |
458 | if (!mod_timer(&policy->timer, jiffies + HZ)) | 666 | if (!mod_timer(&policy->timer, jiffies + HZ)) |
@@ -461,10 +669,13 @@ int xfrm_policy_insert(int dir, struct xfrm_policy *policy, int excl) | |||
461 | 669 | ||
462 | if (delpol) | 670 | if (delpol) |
463 | xfrm_policy_kill(delpol); | 671 | xfrm_policy_kill(delpol); |
672 | else if (xfrm_bydst_should_resize(dir, NULL)) | ||
673 | schedule_work(&xfrm_hash_work); | ||
464 | 674 | ||
465 | read_lock_bh(&xfrm_policy_lock); | 675 | read_lock_bh(&xfrm_policy_lock); |
466 | gc_list = NULL; | 676 | gc_list = NULL; |
467 | for (policy = policy->next; policy; policy = policy->next) { | 677 | entry = &policy->bydst; |
678 | hlist_for_each_entry_continue(policy, entry, bydst) { | ||
468 | struct dst_entry *dst; | 679 | struct dst_entry *dst; |
469 | 680 | ||
470 | write_lock(&policy->lock); | 681 | write_lock(&policy->lock); |
@@ -493,87 +704,146 @@ int xfrm_policy_insert(int dir, struct xfrm_policy *policy, int excl) | |||
493 | } | 704 | } |
494 | EXPORT_SYMBOL(xfrm_policy_insert); | 705 | EXPORT_SYMBOL(xfrm_policy_insert); |
495 | 706 | ||
496 | struct xfrm_policy *xfrm_policy_bysel_ctx(int dir, struct xfrm_selector *sel, | 707 | struct xfrm_policy *xfrm_policy_bysel_ctx(u8 type, int dir, |
708 | struct xfrm_selector *sel, | ||
497 | struct xfrm_sec_ctx *ctx, int delete) | 709 | struct xfrm_sec_ctx *ctx, int delete) |
498 | { | 710 | { |
499 | struct xfrm_policy *pol, **p; | 711 | struct xfrm_policy *pol, *ret; |
712 | struct hlist_head *chain; | ||
713 | struct hlist_node *entry; | ||
500 | 714 | ||
501 | write_lock_bh(&xfrm_policy_lock); | 715 | write_lock_bh(&xfrm_policy_lock); |
502 | for (p = &xfrm_policy_list[dir]; (pol=*p)!=NULL; p = &pol->next) { | 716 | chain = policy_hash_bysel(sel, sel->family, dir); |
503 | if ((memcmp(sel, &pol->selector, sizeof(*sel)) == 0) && | 717 | ret = NULL; |
504 | (xfrm_sec_ctx_match(ctx, pol->security))) { | 718 | hlist_for_each_entry(pol, entry, chain, bydst) { |
719 | if (pol->type == type && | ||
720 | !selector_cmp(sel, &pol->selector) && | ||
721 | xfrm_sec_ctx_match(ctx, pol->security)) { | ||
505 | xfrm_pol_hold(pol); | 722 | xfrm_pol_hold(pol); |
506 | if (delete) | 723 | if (delete) { |
507 | *p = pol->next; | 724 | hlist_del(&pol->bydst); |
725 | hlist_del(&pol->byidx); | ||
726 | xfrm_policy_count[dir]--; | ||
727 | } | ||
728 | ret = pol; | ||
508 | break; | 729 | break; |
509 | } | 730 | } |
510 | } | 731 | } |
511 | write_unlock_bh(&xfrm_policy_lock); | 732 | write_unlock_bh(&xfrm_policy_lock); |
512 | 733 | ||
513 | if (pol && delete) { | 734 | if (ret && delete) { |
514 | atomic_inc(&flow_cache_genid); | 735 | atomic_inc(&flow_cache_genid); |
515 | xfrm_policy_kill(pol); | 736 | xfrm_policy_kill(ret); |
516 | } | 737 | } |
517 | return pol; | 738 | return ret; |
518 | } | 739 | } |
519 | EXPORT_SYMBOL(xfrm_policy_bysel_ctx); | 740 | EXPORT_SYMBOL(xfrm_policy_bysel_ctx); |
520 | 741 | ||
521 | struct xfrm_policy *xfrm_policy_byid(int dir, u32 id, int delete) | 742 | struct xfrm_policy *xfrm_policy_byid(u8 type, int dir, u32 id, int delete) |
522 | { | 743 | { |
523 | struct xfrm_policy *pol, **p; | 744 | struct xfrm_policy *pol, *ret; |
745 | struct hlist_head *chain; | ||
746 | struct hlist_node *entry; | ||
524 | 747 | ||
525 | write_lock_bh(&xfrm_policy_lock); | 748 | write_lock_bh(&xfrm_policy_lock); |
526 | for (p = &xfrm_policy_list[dir]; (pol=*p)!=NULL; p = &pol->next) { | 749 | chain = xfrm_policy_byidx + idx_hash(id); |
527 | if (pol->index == id) { | 750 | ret = NULL; |
751 | hlist_for_each_entry(pol, entry, chain, byidx) { | ||
752 | if (pol->type == type && pol->index == id) { | ||
528 | xfrm_pol_hold(pol); | 753 | xfrm_pol_hold(pol); |
529 | if (delete) | 754 | if (delete) { |
530 | *p = pol->next; | 755 | hlist_del(&pol->bydst); |
756 | hlist_del(&pol->byidx); | ||
757 | xfrm_policy_count[dir]--; | ||
758 | } | ||
759 | ret = pol; | ||
531 | break; | 760 | break; |
532 | } | 761 | } |
533 | } | 762 | } |
534 | write_unlock_bh(&xfrm_policy_lock); | 763 | write_unlock_bh(&xfrm_policy_lock); |
535 | 764 | ||
536 | if (pol && delete) { | 765 | if (ret && delete) { |
537 | atomic_inc(&flow_cache_genid); | 766 | atomic_inc(&flow_cache_genid); |
538 | xfrm_policy_kill(pol); | 767 | xfrm_policy_kill(ret); |
539 | } | 768 | } |
540 | return pol; | 769 | return ret; |
541 | } | 770 | } |
542 | EXPORT_SYMBOL(xfrm_policy_byid); | 771 | EXPORT_SYMBOL(xfrm_policy_byid); |
543 | 772 | ||
544 | void xfrm_policy_flush(void) | 773 | void xfrm_policy_flush(u8 type) |
545 | { | 774 | { |
546 | struct xfrm_policy *xp; | ||
547 | int dir; | 775 | int dir; |
548 | 776 | ||
549 | write_lock_bh(&xfrm_policy_lock); | 777 | write_lock_bh(&xfrm_policy_lock); |
550 | for (dir = 0; dir < XFRM_POLICY_MAX; dir++) { | 778 | for (dir = 0; dir < XFRM_POLICY_MAX; dir++) { |
551 | while ((xp = xfrm_policy_list[dir]) != NULL) { | 779 | struct xfrm_policy *pol; |
552 | xfrm_policy_list[dir] = xp->next; | 780 | struct hlist_node *entry; |
781 | int i; | ||
782 | |||
783 | again1: | ||
784 | hlist_for_each_entry(pol, entry, | ||
785 | &xfrm_policy_inexact[dir], bydst) { | ||
786 | if (pol->type != type) | ||
787 | continue; | ||
788 | hlist_del(&pol->bydst); | ||
789 | hlist_del(&pol->byidx); | ||
553 | write_unlock_bh(&xfrm_policy_lock); | 790 | write_unlock_bh(&xfrm_policy_lock); |
554 | 791 | ||
555 | xfrm_policy_kill(xp); | 792 | xfrm_policy_kill(pol); |
556 | 793 | ||
557 | write_lock_bh(&xfrm_policy_lock); | 794 | write_lock_bh(&xfrm_policy_lock); |
795 | goto again1; | ||
558 | } | 796 | } |
797 | |||
798 | for (i = xfrm_policy_bydst[dir].hmask; i >= 0; i--) { | ||
799 | again2: | ||
800 | hlist_for_each_entry(pol, entry, | ||
801 | xfrm_policy_bydst[dir].table + i, | ||
802 | bydst) { | ||
803 | if (pol->type != type) | ||
804 | continue; | ||
805 | hlist_del(&pol->bydst); | ||
806 | hlist_del(&pol->byidx); | ||
807 | write_unlock_bh(&xfrm_policy_lock); | ||
808 | |||
809 | xfrm_policy_kill(pol); | ||
810 | |||
811 | write_lock_bh(&xfrm_policy_lock); | ||
812 | goto again2; | ||
813 | } | ||
814 | } | ||
815 | |||
816 | xfrm_policy_count[dir] = 0; | ||
559 | } | 817 | } |
560 | atomic_inc(&flow_cache_genid); | 818 | atomic_inc(&flow_cache_genid); |
561 | write_unlock_bh(&xfrm_policy_lock); | 819 | write_unlock_bh(&xfrm_policy_lock); |
562 | } | 820 | } |
563 | EXPORT_SYMBOL(xfrm_policy_flush); | 821 | EXPORT_SYMBOL(xfrm_policy_flush); |
564 | 822 | ||
565 | int xfrm_policy_walk(int (*func)(struct xfrm_policy *, int, int, void*), | 823 | int xfrm_policy_walk(u8 type, int (*func)(struct xfrm_policy *, int, int, void*), |
566 | void *data) | 824 | void *data) |
567 | { | 825 | { |
568 | struct xfrm_policy *xp; | 826 | struct xfrm_policy *pol; |
569 | int dir; | 827 | struct hlist_node *entry; |
570 | int count = 0; | 828 | int dir, count, error; |
571 | int error = 0; | ||
572 | 829 | ||
573 | read_lock_bh(&xfrm_policy_lock); | 830 | read_lock_bh(&xfrm_policy_lock); |
831 | count = 0; | ||
574 | for (dir = 0; dir < 2*XFRM_POLICY_MAX; dir++) { | 832 | for (dir = 0; dir < 2*XFRM_POLICY_MAX; dir++) { |
575 | for (xp = xfrm_policy_list[dir]; xp; xp = xp->next) | 833 | struct hlist_head *table = xfrm_policy_bydst[dir].table; |
576 | count++; | 834 | int i; |
835 | |||
836 | hlist_for_each_entry(pol, entry, | ||
837 | &xfrm_policy_inexact[dir], bydst) { | ||
838 | if (pol->type == type) | ||
839 | count++; | ||
840 | } | ||
841 | for (i = xfrm_policy_bydst[dir].hmask; i >= 0; i--) { | ||
842 | hlist_for_each_entry(pol, entry, table + i, bydst) { | ||
843 | if (pol->type == type) | ||
844 | count++; | ||
845 | } | ||
846 | } | ||
577 | } | 847 | } |
578 | 848 | ||
579 | if (count == 0) { | 849 | if (count == 0) { |
@@ -582,13 +852,28 @@ int xfrm_policy_walk(int (*func)(struct xfrm_policy *, int, int, void*), | |||
582 | } | 852 | } |
583 | 853 | ||
584 | for (dir = 0; dir < 2*XFRM_POLICY_MAX; dir++) { | 854 | for (dir = 0; dir < 2*XFRM_POLICY_MAX; dir++) { |
585 | for (xp = xfrm_policy_list[dir]; xp; xp = xp->next) { | 855 | struct hlist_head *table = xfrm_policy_bydst[dir].table; |
586 | error = func(xp, dir%XFRM_POLICY_MAX, --count, data); | 856 | int i; |
857 | |||
858 | hlist_for_each_entry(pol, entry, | ||
859 | &xfrm_policy_inexact[dir], bydst) { | ||
860 | if (pol->type != type) | ||
861 | continue; | ||
862 | error = func(pol, dir % XFRM_POLICY_MAX, --count, data); | ||
587 | if (error) | 863 | if (error) |
588 | goto out; | 864 | goto out; |
589 | } | 865 | } |
866 | for (i = xfrm_policy_bydst[dir].hmask; i >= 0; i--) { | ||
867 | hlist_for_each_entry(pol, entry, table + i, bydst) { | ||
868 | if (pol->type != type) | ||
869 | continue; | ||
870 | error = func(pol, dir % XFRM_POLICY_MAX, --count, data); | ||
871 | if (error) | ||
872 | goto out; | ||
873 | } | ||
874 | } | ||
590 | } | 875 | } |
591 | 876 | error = 0; | |
592 | out: | 877 | out: |
593 | read_unlock_bh(&xfrm_policy_lock); | 878 | read_unlock_bh(&xfrm_policy_lock); |
594 | return error; | 879 | return error; |
@@ -597,29 +882,79 @@ EXPORT_SYMBOL(xfrm_policy_walk); | |||
597 | 882 | ||
598 | /* Find policy to apply to this flow. */ | 883 | /* Find policy to apply to this flow. */ |
599 | 884 | ||
600 | static void xfrm_policy_lookup(struct flowi *fl, u32 sk_sid, u16 family, u8 dir, | 885 | static int xfrm_policy_match(struct xfrm_policy *pol, struct flowi *fl, |
601 | void **objp, atomic_t **obj_refp) | 886 | u8 type, u16 family, int dir) |
602 | { | 887 | { |
603 | struct xfrm_policy *pol; | 888 | struct xfrm_selector *sel = &pol->selector; |
889 | int match; | ||
604 | 890 | ||
605 | read_lock_bh(&xfrm_policy_lock); | 891 | if (pol->family != family || |
606 | for (pol = xfrm_policy_list[dir]; pol; pol = pol->next) { | 892 | pol->type != type) |
607 | struct xfrm_selector *sel = &pol->selector; | 893 | return 0; |
608 | int match; | ||
609 | 894 | ||
610 | if (pol->family != family) | 895 | match = xfrm_selector_match(sel, fl, family); |
611 | continue; | 896 | if (match) { |
897 | if (!security_xfrm_policy_lookup(pol, fl->secid, dir)) | ||
898 | return 1; | ||
899 | } | ||
612 | 900 | ||
613 | match = xfrm_selector_match(sel, fl, family); | 901 | return 0; |
902 | } | ||
614 | 903 | ||
615 | if (match) { | 904 | static struct xfrm_policy *xfrm_policy_lookup_bytype(u8 type, struct flowi *fl, |
616 | if (!security_xfrm_policy_lookup(pol, sk_sid, dir)) { | 905 | u16 family, u8 dir) |
617 | xfrm_pol_hold(pol); | 906 | { |
618 | break; | 907 | struct xfrm_policy *pol, *ret; |
619 | } | 908 | xfrm_address_t *daddr, *saddr; |
909 | struct hlist_node *entry; | ||
910 | struct hlist_head *chain; | ||
911 | u32 priority = ~0U; | ||
912 | |||
913 | daddr = xfrm_flowi_daddr(fl, family); | ||
914 | saddr = xfrm_flowi_saddr(fl, family); | ||
915 | if (unlikely(!daddr || !saddr)) | ||
916 | return NULL; | ||
917 | |||
918 | read_lock_bh(&xfrm_policy_lock); | ||
919 | chain = policy_hash_direct(daddr, saddr, family, dir); | ||
920 | ret = NULL; | ||
921 | hlist_for_each_entry(pol, entry, chain, bydst) { | ||
922 | if (xfrm_policy_match(pol, fl, type, family, dir)) { | ||
923 | ret = pol; | ||
924 | priority = ret->priority; | ||
925 | break; | ||
926 | } | ||
927 | } | ||
928 | chain = &xfrm_policy_inexact[dir]; | ||
929 | hlist_for_each_entry(pol, entry, chain, bydst) { | ||
930 | if (xfrm_policy_match(pol, fl, type, family, dir) && | ||
931 | pol->priority < priority) { | ||
932 | ret = pol; | ||
933 | break; | ||
620 | } | 934 | } |
621 | } | 935 | } |
936 | if (ret) | ||
937 | xfrm_pol_hold(ret); | ||
622 | read_unlock_bh(&xfrm_policy_lock); | 938 | read_unlock_bh(&xfrm_policy_lock); |
939 | |||
940 | return ret; | ||
941 | } | ||
942 | |||
943 | static void xfrm_policy_lookup(struct flowi *fl, u16 family, u8 dir, | ||
944 | void **objp, atomic_t **obj_refp) | ||
945 | { | ||
946 | struct xfrm_policy *pol; | ||
947 | |||
948 | #ifdef CONFIG_XFRM_SUB_POLICY | ||
949 | pol = xfrm_policy_lookup_bytype(XFRM_POLICY_TYPE_SUB, fl, family, dir); | ||
950 | if (pol) | ||
951 | goto end; | ||
952 | #endif | ||
953 | pol = xfrm_policy_lookup_bytype(XFRM_POLICY_TYPE_MAIN, fl, family, dir); | ||
954 | |||
955 | #ifdef CONFIG_XFRM_SUB_POLICY | ||
956 | end: | ||
957 | #endif | ||
623 | if ((*objp = (void *) pol) != NULL) | 958 | if ((*objp = (void *) pol) != NULL) |
624 | *obj_refp = &pol->refcnt; | 959 | *obj_refp = &pol->refcnt; |
625 | } | 960 | } |
@@ -641,7 +976,7 @@ static inline int policy_to_flow_dir(int dir) | |||
641 | }; | 976 | }; |
642 | } | 977 | } |
643 | 978 | ||
644 | static struct xfrm_policy *xfrm_sk_policy_lookup(struct sock *sk, int dir, struct flowi *fl, u32 sk_sid) | 979 | static struct xfrm_policy *xfrm_sk_policy_lookup(struct sock *sk, int dir, struct flowi *fl) |
645 | { | 980 | { |
646 | struct xfrm_policy *pol; | 981 | struct xfrm_policy *pol; |
647 | 982 | ||
@@ -652,7 +987,7 @@ static struct xfrm_policy *xfrm_sk_policy_lookup(struct sock *sk, int dir, struc | |||
652 | int err = 0; | 987 | int err = 0; |
653 | 988 | ||
654 | if (match) | 989 | if (match) |
655 | err = security_xfrm_policy_lookup(pol, sk_sid, policy_to_flow_dir(dir)); | 990 | err = security_xfrm_policy_lookup(pol, fl->secid, policy_to_flow_dir(dir)); |
656 | 991 | ||
657 | if (match && !err) | 992 | if (match && !err) |
658 | xfrm_pol_hold(pol); | 993 | xfrm_pol_hold(pol); |
@@ -665,24 +1000,29 @@ static struct xfrm_policy *xfrm_sk_policy_lookup(struct sock *sk, int dir, struc | |||
665 | 1000 | ||
666 | static void __xfrm_policy_link(struct xfrm_policy *pol, int dir) | 1001 | static void __xfrm_policy_link(struct xfrm_policy *pol, int dir) |
667 | { | 1002 | { |
668 | pol->next = xfrm_policy_list[dir]; | 1003 | struct hlist_head *chain = policy_hash_bysel(&pol->selector, |
669 | xfrm_policy_list[dir] = pol; | 1004 | pol->family, dir); |
1005 | |||
1006 | hlist_add_head(&pol->bydst, chain); | ||
1007 | hlist_add_head(&pol->byidx, xfrm_policy_byidx+idx_hash(pol->index)); | ||
1008 | xfrm_policy_count[dir]++; | ||
670 | xfrm_pol_hold(pol); | 1009 | xfrm_pol_hold(pol); |
1010 | |||
1011 | if (xfrm_bydst_should_resize(dir, NULL)) | ||
1012 | schedule_work(&xfrm_hash_work); | ||
671 | } | 1013 | } |
672 | 1014 | ||
673 | static struct xfrm_policy *__xfrm_policy_unlink(struct xfrm_policy *pol, | 1015 | static struct xfrm_policy *__xfrm_policy_unlink(struct xfrm_policy *pol, |
674 | int dir) | 1016 | int dir) |
675 | { | 1017 | { |
676 | struct xfrm_policy **polp; | 1018 | if (hlist_unhashed(&pol->bydst)) |
1019 | return NULL; | ||
677 | 1020 | ||
678 | for (polp = &xfrm_policy_list[dir]; | 1021 | hlist_del(&pol->bydst); |
679 | *polp != NULL; polp = &(*polp)->next) { | 1022 | hlist_del(&pol->byidx); |
680 | if (*polp == pol) { | 1023 | xfrm_policy_count[dir]--; |
681 | *polp = pol->next; | 1024 | |
682 | return pol; | 1025 | return pol; |
683 | } | ||
684 | } | ||
685 | return NULL; | ||
686 | } | 1026 | } |
687 | 1027 | ||
688 | int xfrm_policy_delete(struct xfrm_policy *pol, int dir) | 1028 | int xfrm_policy_delete(struct xfrm_policy *pol, int dir) |
@@ -704,12 +1044,17 @@ int xfrm_sk_policy_insert(struct sock *sk, int dir, struct xfrm_policy *pol) | |||
704 | { | 1044 | { |
705 | struct xfrm_policy *old_pol; | 1045 | struct xfrm_policy *old_pol; |
706 | 1046 | ||
1047 | #ifdef CONFIG_XFRM_SUB_POLICY | ||
1048 | if (pol && pol->type != XFRM_POLICY_TYPE_MAIN) | ||
1049 | return -EINVAL; | ||
1050 | #endif | ||
1051 | |||
707 | write_lock_bh(&xfrm_policy_lock); | 1052 | write_lock_bh(&xfrm_policy_lock); |
708 | old_pol = sk->sk_policy[dir]; | 1053 | old_pol = sk->sk_policy[dir]; |
709 | sk->sk_policy[dir] = pol; | 1054 | sk->sk_policy[dir] = pol; |
710 | if (pol) { | 1055 | if (pol) { |
711 | pol->curlft.add_time = (unsigned long)xtime.tv_sec; | 1056 | pol->curlft.add_time = (unsigned long)xtime.tv_sec; |
712 | pol->index = xfrm_gen_index(XFRM_POLICY_MAX+dir); | 1057 | pol->index = xfrm_gen_index(pol->type, XFRM_POLICY_MAX+dir); |
713 | __xfrm_policy_link(pol, XFRM_POLICY_MAX+dir); | 1058 | __xfrm_policy_link(pol, XFRM_POLICY_MAX+dir); |
714 | } | 1059 | } |
715 | if (old_pol) | 1060 | if (old_pol) |
@@ -738,6 +1083,7 @@ static struct xfrm_policy *clone_policy(struct xfrm_policy *old, int dir) | |||
738 | newp->flags = old->flags; | 1083 | newp->flags = old->flags; |
739 | newp->xfrm_nr = old->xfrm_nr; | 1084 | newp->xfrm_nr = old->xfrm_nr; |
740 | newp->index = old->index; | 1085 | newp->index = old->index; |
1086 | newp->type = old->type; | ||
741 | memcpy(newp->xfrm_vec, old->xfrm_vec, | 1087 | memcpy(newp->xfrm_vec, old->xfrm_vec, |
742 | newp->xfrm_nr*sizeof(struct xfrm_tmpl)); | 1088 | newp->xfrm_nr*sizeof(struct xfrm_tmpl)); |
743 | write_lock_bh(&xfrm_policy_lock); | 1089 | write_lock_bh(&xfrm_policy_lock); |
@@ -761,17 +1107,32 @@ int __xfrm_sk_clone_policy(struct sock *sk) | |||
761 | return 0; | 1107 | return 0; |
762 | } | 1108 | } |
763 | 1109 | ||
1110 | static int | ||
1111 | xfrm_get_saddr(xfrm_address_t *local, xfrm_address_t *remote, | ||
1112 | unsigned short family) | ||
1113 | { | ||
1114 | int err; | ||
1115 | struct xfrm_policy_afinfo *afinfo = xfrm_policy_get_afinfo(family); | ||
1116 | |||
1117 | if (unlikely(afinfo == NULL)) | ||
1118 | return -EINVAL; | ||
1119 | err = afinfo->get_saddr(local, remote); | ||
1120 | xfrm_policy_put_afinfo(afinfo); | ||
1121 | return err; | ||
1122 | } | ||
1123 | |||
764 | /* Resolve list of templates for the flow, given policy. */ | 1124 | /* Resolve list of templates for the flow, given policy. */ |
765 | 1125 | ||
766 | static int | 1126 | static int |
767 | xfrm_tmpl_resolve(struct xfrm_policy *policy, struct flowi *fl, | 1127 | xfrm_tmpl_resolve_one(struct xfrm_policy *policy, struct flowi *fl, |
768 | struct xfrm_state **xfrm, | 1128 | struct xfrm_state **xfrm, |
769 | unsigned short family) | 1129 | unsigned short family) |
770 | { | 1130 | { |
771 | int nx; | 1131 | int nx; |
772 | int i, error; | 1132 | int i, error; |
773 | xfrm_address_t *daddr = xfrm_flowi_daddr(fl, family); | 1133 | xfrm_address_t *daddr = xfrm_flowi_daddr(fl, family); |
774 | xfrm_address_t *saddr = xfrm_flowi_saddr(fl, family); | 1134 | xfrm_address_t *saddr = xfrm_flowi_saddr(fl, family); |
1135 | xfrm_address_t tmp; | ||
775 | 1136 | ||
776 | for (nx=0, i = 0; i < policy->xfrm_nr; i++) { | 1137 | for (nx=0, i = 0; i < policy->xfrm_nr; i++) { |
777 | struct xfrm_state *x; | 1138 | struct xfrm_state *x; |
@@ -779,9 +1140,15 @@ xfrm_tmpl_resolve(struct xfrm_policy *policy, struct flowi *fl, | |||
779 | xfrm_address_t *local = saddr; | 1140 | xfrm_address_t *local = saddr; |
780 | struct xfrm_tmpl *tmpl = &policy->xfrm_vec[i]; | 1141 | struct xfrm_tmpl *tmpl = &policy->xfrm_vec[i]; |
781 | 1142 | ||
782 | if (tmpl->mode) { | 1143 | if (tmpl->mode == XFRM_MODE_TUNNEL) { |
783 | remote = &tmpl->id.daddr; | 1144 | remote = &tmpl->id.daddr; |
784 | local = &tmpl->saddr; | 1145 | local = &tmpl->saddr; |
1146 | if (xfrm_addr_any(local, family)) { | ||
1147 | error = xfrm_get_saddr(&tmp, remote, family); | ||
1148 | if (error) | ||
1149 | goto fail; | ||
1150 | local = &tmp; | ||
1151 | } | ||
785 | } | 1152 | } |
786 | 1153 | ||
787 | x = xfrm_state_find(remote, local, fl, tmpl, policy, &error, family); | 1154 | x = xfrm_state_find(remote, local, fl, tmpl, policy, &error, family); |
@@ -809,6 +1176,45 @@ fail: | |||
809 | return error; | 1176 | return error; |
810 | } | 1177 | } |
811 | 1178 | ||
1179 | static int | ||
1180 | xfrm_tmpl_resolve(struct xfrm_policy **pols, int npols, struct flowi *fl, | ||
1181 | struct xfrm_state **xfrm, | ||
1182 | unsigned short family) | ||
1183 | { | ||
1184 | struct xfrm_state *tp[XFRM_MAX_DEPTH]; | ||
1185 | struct xfrm_state **tpp = (npols > 1) ? tp : xfrm; | ||
1186 | int cnx = 0; | ||
1187 | int error; | ||
1188 | int ret; | ||
1189 | int i; | ||
1190 | |||
1191 | for (i = 0; i < npols; i++) { | ||
1192 | if (cnx + pols[i]->xfrm_nr >= XFRM_MAX_DEPTH) { | ||
1193 | error = -ENOBUFS; | ||
1194 | goto fail; | ||
1195 | } | ||
1196 | |||
1197 | ret = xfrm_tmpl_resolve_one(pols[i], fl, &tpp[cnx], family); | ||
1198 | if (ret < 0) { | ||
1199 | error = ret; | ||
1200 | goto fail; | ||
1201 | } else | ||
1202 | cnx += ret; | ||
1203 | } | ||
1204 | |||
1205 | /* found states are sorted for outbound processing */ | ||
1206 | if (npols > 1) | ||
1207 | xfrm_state_sort(xfrm, tpp, cnx, family); | ||
1208 | |||
1209 | return cnx; | ||
1210 | |||
1211 | fail: | ||
1212 | for (cnx--; cnx>=0; cnx--) | ||
1213 | xfrm_state_put(tpp[cnx]); | ||
1214 | return error; | ||
1215 | |||
1216 | } | ||
1217 | |||
812 | /* Check that the bundle accepts the flow and its components are | 1218 | /* Check that the bundle accepts the flow and its components are |
813 | * still valid. | 1219 | * still valid. |
814 | */ | 1220 | */ |
@@ -855,6 +1261,11 @@ int xfrm_lookup(struct dst_entry **dst_p, struct flowi *fl, | |||
855 | struct sock *sk, int flags) | 1261 | struct sock *sk, int flags) |
856 | { | 1262 | { |
857 | struct xfrm_policy *policy; | 1263 | struct xfrm_policy *policy; |
1264 | struct xfrm_policy *pols[XFRM_POLICY_TYPE_MAX]; | ||
1265 | int npols; | ||
1266 | int pol_dead; | ||
1267 | int xfrm_nr; | ||
1268 | int pi; | ||
858 | struct xfrm_state *xfrm[XFRM_MAX_DEPTH]; | 1269 | struct xfrm_state *xfrm[XFRM_MAX_DEPTH]; |
859 | struct dst_entry *dst, *dst_orig = *dst_p; | 1270 | struct dst_entry *dst, *dst_orig = *dst_p; |
860 | int nx = 0; | 1271 | int nx = 0; |
@@ -862,19 +1273,26 @@ int xfrm_lookup(struct dst_entry **dst_p, struct flowi *fl, | |||
862 | u32 genid; | 1273 | u32 genid; |
863 | u16 family; | 1274 | u16 family; |
864 | u8 dir = policy_to_flow_dir(XFRM_POLICY_OUT); | 1275 | u8 dir = policy_to_flow_dir(XFRM_POLICY_OUT); |
865 | u32 sk_sid = security_sk_sid(sk, fl, dir); | 1276 | |
866 | restart: | 1277 | restart: |
867 | genid = atomic_read(&flow_cache_genid); | 1278 | genid = atomic_read(&flow_cache_genid); |
868 | policy = NULL; | 1279 | policy = NULL; |
1280 | for (pi = 0; pi < ARRAY_SIZE(pols); pi++) | ||
1281 | pols[pi] = NULL; | ||
1282 | npols = 0; | ||
1283 | pol_dead = 0; | ||
1284 | xfrm_nr = 0; | ||
1285 | |||
869 | if (sk && sk->sk_policy[1]) | 1286 | if (sk && sk->sk_policy[1]) |
870 | policy = xfrm_sk_policy_lookup(sk, XFRM_POLICY_OUT, fl, sk_sid); | 1287 | policy = xfrm_sk_policy_lookup(sk, XFRM_POLICY_OUT, fl); |
871 | 1288 | ||
872 | if (!policy) { | 1289 | if (!policy) { |
873 | /* To accelerate a bit... */ | 1290 | /* To accelerate a bit... */ |
874 | if ((dst_orig->flags & DST_NOXFRM) || !xfrm_policy_list[XFRM_POLICY_OUT]) | 1291 | if ((dst_orig->flags & DST_NOXFRM) || |
1292 | !xfrm_policy_count[XFRM_POLICY_OUT]) | ||
875 | return 0; | 1293 | return 0; |
876 | 1294 | ||
877 | policy = flow_cache_lookup(fl, sk_sid, dst_orig->ops->family, | 1295 | policy = flow_cache_lookup(fl, dst_orig->ops->family, |
878 | dir, xfrm_policy_lookup); | 1296 | dir, xfrm_policy_lookup); |
879 | } | 1297 | } |
880 | 1298 | ||
@@ -883,6 +1301,9 @@ restart: | |||
883 | 1301 | ||
884 | family = dst_orig->ops->family; | 1302 | family = dst_orig->ops->family; |
885 | policy->curlft.use_time = (unsigned long)xtime.tv_sec; | 1303 | policy->curlft.use_time = (unsigned long)xtime.tv_sec; |
1304 | pols[0] = policy; | ||
1305 | npols ++; | ||
1306 | xfrm_nr += pols[0]->xfrm_nr; | ||
886 | 1307 | ||
887 | switch (policy->action) { | 1308 | switch (policy->action) { |
888 | case XFRM_POLICY_BLOCK: | 1309 | case XFRM_POLICY_BLOCK: |
@@ -891,11 +1312,13 @@ restart: | |||
891 | goto error; | 1312 | goto error; |
892 | 1313 | ||
893 | case XFRM_POLICY_ALLOW: | 1314 | case XFRM_POLICY_ALLOW: |
1315 | #ifndef CONFIG_XFRM_SUB_POLICY | ||
894 | if (policy->xfrm_nr == 0) { | 1316 | if (policy->xfrm_nr == 0) { |
895 | /* Flow passes not transformed. */ | 1317 | /* Flow passes not transformed. */ |
896 | xfrm_pol_put(policy); | 1318 | xfrm_pol_put(policy); |
897 | return 0; | 1319 | return 0; |
898 | } | 1320 | } |
1321 | #endif | ||
899 | 1322 | ||
900 | /* Try to find matching bundle. | 1323 | /* Try to find matching bundle. |
901 | * | 1324 | * |
@@ -911,7 +1334,36 @@ restart: | |||
911 | if (dst) | 1334 | if (dst) |
912 | break; | 1335 | break; |
913 | 1336 | ||
914 | nx = xfrm_tmpl_resolve(policy, fl, xfrm, family); | 1337 | #ifdef CONFIG_XFRM_SUB_POLICY |
1338 | if (pols[0]->type != XFRM_POLICY_TYPE_MAIN) { | ||
1339 | pols[1] = xfrm_policy_lookup_bytype(XFRM_POLICY_TYPE_MAIN, | ||
1340 | fl, family, | ||
1341 | XFRM_POLICY_OUT); | ||
1342 | if (pols[1]) { | ||
1343 | if (pols[1]->action == XFRM_POLICY_BLOCK) { | ||
1344 | err = -EPERM; | ||
1345 | goto error; | ||
1346 | } | ||
1347 | npols ++; | ||
1348 | xfrm_nr += pols[1]->xfrm_nr; | ||
1349 | } | ||
1350 | } | ||
1351 | |||
1352 | /* | ||
1353 | * Because neither flowi nor bundle information knows about | ||
1354 | * transformation template size. On more than one policy usage | ||
1355 | * we can realize whether all of them is bypass or not after | ||
1356 | * they are searched. See above not-transformed bypass | ||
1357 | * is surrounded by non-sub policy configuration, too. | ||
1358 | */ | ||
1359 | if (xfrm_nr == 0) { | ||
1360 | /* Flow passes not transformed. */ | ||
1361 | xfrm_pols_put(pols, npols); | ||
1362 | return 0; | ||
1363 | } | ||
1364 | |||
1365 | #endif | ||
1366 | nx = xfrm_tmpl_resolve(pols, npols, fl, xfrm, family); | ||
915 | 1367 | ||
916 | if (unlikely(nx<0)) { | 1368 | if (unlikely(nx<0)) { |
917 | err = nx; | 1369 | err = nx; |
@@ -924,7 +1376,7 @@ restart: | |||
924 | set_current_state(TASK_RUNNING); | 1376 | set_current_state(TASK_RUNNING); |
925 | remove_wait_queue(&km_waitq, &wait); | 1377 | remove_wait_queue(&km_waitq, &wait); |
926 | 1378 | ||
927 | nx = xfrm_tmpl_resolve(policy, fl, xfrm, family); | 1379 | nx = xfrm_tmpl_resolve(pols, npols, fl, xfrm, family); |
928 | 1380 | ||
929 | if (nx == -EAGAIN && signal_pending(current)) { | 1381 | if (nx == -EAGAIN && signal_pending(current)) { |
930 | err = -ERESTART; | 1382 | err = -ERESTART; |
@@ -932,7 +1384,7 @@ restart: | |||
932 | } | 1384 | } |
933 | if (nx == -EAGAIN || | 1385 | if (nx == -EAGAIN || |
934 | genid != atomic_read(&flow_cache_genid)) { | 1386 | genid != atomic_read(&flow_cache_genid)) { |
935 | xfrm_pol_put(policy); | 1387 | xfrm_pols_put(pols, npols); |
936 | goto restart; | 1388 | goto restart; |
937 | } | 1389 | } |
938 | err = nx; | 1390 | err = nx; |
@@ -942,7 +1394,7 @@ restart: | |||
942 | } | 1394 | } |
943 | if (nx == 0) { | 1395 | if (nx == 0) { |
944 | /* Flow passes not transformed. */ | 1396 | /* Flow passes not transformed. */ |
945 | xfrm_pol_put(policy); | 1397 | xfrm_pols_put(pols, npols); |
946 | return 0; | 1398 | return 0; |
947 | } | 1399 | } |
948 | 1400 | ||
@@ -956,8 +1408,14 @@ restart: | |||
956 | goto error; | 1408 | goto error; |
957 | } | 1409 | } |
958 | 1410 | ||
1411 | for (pi = 0; pi < npols; pi++) { | ||
1412 | read_lock_bh(&pols[pi]->lock); | ||
1413 | pol_dead |= pols[pi]->dead; | ||
1414 | read_unlock_bh(&pols[pi]->lock); | ||
1415 | } | ||
1416 | |||
959 | write_lock_bh(&policy->lock); | 1417 | write_lock_bh(&policy->lock); |
960 | if (unlikely(policy->dead || stale_bundle(dst))) { | 1418 | if (unlikely(pol_dead || stale_bundle(dst))) { |
961 | /* Wow! While we worked on resolving, this | 1419 | /* Wow! While we worked on resolving, this |
962 | * policy has gone. Retry. It is not paranoia, | 1420 | * policy has gone. Retry. It is not paranoia, |
963 | * we just cannot enlist new bundle to dead object. | 1421 | * we just cannot enlist new bundle to dead object. |
@@ -977,17 +1435,34 @@ restart: | |||
977 | } | 1435 | } |
978 | *dst_p = dst; | 1436 | *dst_p = dst; |
979 | dst_release(dst_orig); | 1437 | dst_release(dst_orig); |
980 | xfrm_pol_put(policy); | 1438 | xfrm_pols_put(pols, npols); |
981 | return 0; | 1439 | return 0; |
982 | 1440 | ||
983 | error: | 1441 | error: |
984 | dst_release(dst_orig); | 1442 | dst_release(dst_orig); |
985 | xfrm_pol_put(policy); | 1443 | xfrm_pols_put(pols, npols); |
986 | *dst_p = NULL; | 1444 | *dst_p = NULL; |
987 | return err; | 1445 | return err; |
988 | } | 1446 | } |
989 | EXPORT_SYMBOL(xfrm_lookup); | 1447 | EXPORT_SYMBOL(xfrm_lookup); |
990 | 1448 | ||
1449 | static inline int | ||
1450 | xfrm_secpath_reject(int idx, struct sk_buff *skb, struct flowi *fl) | ||
1451 | { | ||
1452 | struct xfrm_state *x; | ||
1453 | int err; | ||
1454 | |||
1455 | if (!skb->sp || idx < 0 || idx >= skb->sp->len) | ||
1456 | return 0; | ||
1457 | x = skb->sp->xvec[idx]; | ||
1458 | if (!x->type->reject) | ||
1459 | return 0; | ||
1460 | xfrm_state_hold(x); | ||
1461 | err = x->type->reject(x, skb, fl); | ||
1462 | xfrm_state_put(x); | ||
1463 | return err; | ||
1464 | } | ||
1465 | |||
991 | /* When skb is transformed back to its "native" form, we have to | 1466 | /* When skb is transformed back to its "native" form, we have to |
992 | * check policy restrictions. At the moment we make this in maximally | 1467 | * check policy restrictions. At the moment we make this in maximally |
993 | * stupid way. Shame on me. :-) Of course, connected sockets must | 1468 | * stupid way. Shame on me. :-) Of course, connected sockets must |
@@ -1004,10 +1479,19 @@ xfrm_state_ok(struct xfrm_tmpl *tmpl, struct xfrm_state *x, | |||
1004 | (x->id.spi == tmpl->id.spi || !tmpl->id.spi) && | 1479 | (x->id.spi == tmpl->id.spi || !tmpl->id.spi) && |
1005 | (x->props.reqid == tmpl->reqid || !tmpl->reqid) && | 1480 | (x->props.reqid == tmpl->reqid || !tmpl->reqid) && |
1006 | x->props.mode == tmpl->mode && | 1481 | x->props.mode == tmpl->mode && |
1007 | (tmpl->aalgos & (1<<x->props.aalgo)) && | 1482 | ((tmpl->aalgos & (1<<x->props.aalgo)) || |
1008 | !(x->props.mode && xfrm_state_addr_cmp(tmpl, x, family)); | 1483 | !(xfrm_id_proto_match(tmpl->id.proto, IPSEC_PROTO_ANY))) && |
1484 | !(x->props.mode != XFRM_MODE_TRANSPORT && | ||
1485 | xfrm_state_addr_cmp(tmpl, x, family)); | ||
1009 | } | 1486 | } |
1010 | 1487 | ||
1488 | /* | ||
1489 | * 0 or more than 0 is returned when validation is succeeded (either bypass | ||
1490 | * because of optional transport mode, or next index of the mathced secpath | ||
1491 | * state with the template. | ||
1492 | * -1 is returned when no matching template is found. | ||
1493 | * Otherwise "-2 - errored_index" is returned. | ||
1494 | */ | ||
1011 | static inline int | 1495 | static inline int |
1012 | xfrm_policy_ok(struct xfrm_tmpl *tmpl, struct sec_path *sp, int start, | 1496 | xfrm_policy_ok(struct xfrm_tmpl *tmpl, struct sec_path *sp, int start, |
1013 | unsigned short family) | 1497 | unsigned short family) |
@@ -1015,15 +1499,18 @@ xfrm_policy_ok(struct xfrm_tmpl *tmpl, struct sec_path *sp, int start, | |||
1015 | int idx = start; | 1499 | int idx = start; |
1016 | 1500 | ||
1017 | if (tmpl->optional) { | 1501 | if (tmpl->optional) { |
1018 | if (!tmpl->mode) | 1502 | if (tmpl->mode == XFRM_MODE_TRANSPORT) |
1019 | return start; | 1503 | return start; |
1020 | } else | 1504 | } else |
1021 | start = -1; | 1505 | start = -1; |
1022 | for (; idx < sp->len; idx++) { | 1506 | for (; idx < sp->len; idx++) { |
1023 | if (xfrm_state_ok(tmpl, sp->xvec[idx], family)) | 1507 | if (xfrm_state_ok(tmpl, sp->xvec[idx], family)) |
1024 | return ++idx; | 1508 | return ++idx; |
1025 | if (sp->xvec[idx]->props.mode) | 1509 | if (sp->xvec[idx]->props.mode != XFRM_MODE_TRANSPORT) { |
1510 | if (start == -1) | ||
1511 | start = -2-idx; | ||
1026 | break; | 1512 | break; |
1513 | } | ||
1027 | } | 1514 | } |
1028 | return start; | 1515 | return start; |
1029 | } | 1516 | } |
@@ -1032,21 +1519,25 @@ int | |||
1032 | xfrm_decode_session(struct sk_buff *skb, struct flowi *fl, unsigned short family) | 1519 | xfrm_decode_session(struct sk_buff *skb, struct flowi *fl, unsigned short family) |
1033 | { | 1520 | { |
1034 | struct xfrm_policy_afinfo *afinfo = xfrm_policy_get_afinfo(family); | 1521 | struct xfrm_policy_afinfo *afinfo = xfrm_policy_get_afinfo(family); |
1522 | int err; | ||
1035 | 1523 | ||
1036 | if (unlikely(afinfo == NULL)) | 1524 | if (unlikely(afinfo == NULL)) |
1037 | return -EAFNOSUPPORT; | 1525 | return -EAFNOSUPPORT; |
1038 | 1526 | ||
1039 | afinfo->decode_session(skb, fl); | 1527 | afinfo->decode_session(skb, fl); |
1528 | err = security_xfrm_decode_session(skb, &fl->secid); | ||
1040 | xfrm_policy_put_afinfo(afinfo); | 1529 | xfrm_policy_put_afinfo(afinfo); |
1041 | return 0; | 1530 | return err; |
1042 | } | 1531 | } |
1043 | EXPORT_SYMBOL(xfrm_decode_session); | 1532 | EXPORT_SYMBOL(xfrm_decode_session); |
1044 | 1533 | ||
1045 | static inline int secpath_has_tunnel(struct sec_path *sp, int k) | 1534 | static inline int secpath_has_nontransport(struct sec_path *sp, int k, int *idxp) |
1046 | { | 1535 | { |
1047 | for (; k < sp->len; k++) { | 1536 | for (; k < sp->len; k++) { |
1048 | if (sp->xvec[k]->props.mode) | 1537 | if (sp->xvec[k]->props.mode != XFRM_MODE_TRANSPORT) { |
1538 | *idxp = k; | ||
1049 | return 1; | 1539 | return 1; |
1540 | } | ||
1050 | } | 1541 | } |
1051 | 1542 | ||
1052 | return 0; | 1543 | return 0; |
@@ -1056,16 +1547,18 @@ int __xfrm_policy_check(struct sock *sk, int dir, struct sk_buff *skb, | |||
1056 | unsigned short family) | 1547 | unsigned short family) |
1057 | { | 1548 | { |
1058 | struct xfrm_policy *pol; | 1549 | struct xfrm_policy *pol; |
1550 | struct xfrm_policy *pols[XFRM_POLICY_TYPE_MAX]; | ||
1551 | int npols = 0; | ||
1552 | int xfrm_nr; | ||
1553 | int pi; | ||
1059 | struct flowi fl; | 1554 | struct flowi fl; |
1060 | u8 fl_dir = policy_to_flow_dir(dir); | 1555 | u8 fl_dir = policy_to_flow_dir(dir); |
1061 | u32 sk_sid; | 1556 | int xerr_idx = -1; |
1062 | 1557 | ||
1063 | if (xfrm_decode_session(skb, &fl, family) < 0) | 1558 | if (xfrm_decode_session(skb, &fl, family) < 0) |
1064 | return 0; | 1559 | return 0; |
1065 | nf_nat_decode_session(skb, &fl, family); | 1560 | nf_nat_decode_session(skb, &fl, family); |
1066 | 1561 | ||
1067 | sk_sid = security_sk_sid(sk, &fl, fl_dir); | ||
1068 | |||
1069 | /* First, check used SA against their selectors. */ | 1562 | /* First, check used SA against their selectors. */ |
1070 | if (skb->sp) { | 1563 | if (skb->sp) { |
1071 | int i; | 1564 | int i; |
@@ -1079,46 +1572,90 @@ int __xfrm_policy_check(struct sock *sk, int dir, struct sk_buff *skb, | |||
1079 | 1572 | ||
1080 | pol = NULL; | 1573 | pol = NULL; |
1081 | if (sk && sk->sk_policy[dir]) | 1574 | if (sk && sk->sk_policy[dir]) |
1082 | pol = xfrm_sk_policy_lookup(sk, dir, &fl, sk_sid); | 1575 | pol = xfrm_sk_policy_lookup(sk, dir, &fl); |
1083 | 1576 | ||
1084 | if (!pol) | 1577 | if (!pol) |
1085 | pol = flow_cache_lookup(&fl, sk_sid, family, fl_dir, | 1578 | pol = flow_cache_lookup(&fl, family, fl_dir, |
1086 | xfrm_policy_lookup); | 1579 | xfrm_policy_lookup); |
1087 | 1580 | ||
1088 | if (!pol) | 1581 | if (!pol) { |
1089 | return !skb->sp || !secpath_has_tunnel(skb->sp, 0); | 1582 | if (skb->sp && secpath_has_nontransport(skb->sp, 0, &xerr_idx)) { |
1583 | xfrm_secpath_reject(xerr_idx, skb, &fl); | ||
1584 | return 0; | ||
1585 | } | ||
1586 | return 1; | ||
1587 | } | ||
1090 | 1588 | ||
1091 | pol->curlft.use_time = (unsigned long)xtime.tv_sec; | 1589 | pol->curlft.use_time = (unsigned long)xtime.tv_sec; |
1092 | 1590 | ||
1591 | pols[0] = pol; | ||
1592 | npols ++; | ||
1593 | #ifdef CONFIG_XFRM_SUB_POLICY | ||
1594 | if (pols[0]->type != XFRM_POLICY_TYPE_MAIN) { | ||
1595 | pols[1] = xfrm_policy_lookup_bytype(XFRM_POLICY_TYPE_MAIN, | ||
1596 | &fl, family, | ||
1597 | XFRM_POLICY_IN); | ||
1598 | if (pols[1]) { | ||
1599 | pols[1]->curlft.use_time = (unsigned long)xtime.tv_sec; | ||
1600 | npols ++; | ||
1601 | } | ||
1602 | } | ||
1603 | #endif | ||
1604 | |||
1093 | if (pol->action == XFRM_POLICY_ALLOW) { | 1605 | if (pol->action == XFRM_POLICY_ALLOW) { |
1094 | struct sec_path *sp; | 1606 | struct sec_path *sp; |
1095 | static struct sec_path dummy; | 1607 | static struct sec_path dummy; |
1608 | struct xfrm_tmpl *tp[XFRM_MAX_DEPTH]; | ||
1609 | struct xfrm_tmpl *stp[XFRM_MAX_DEPTH]; | ||
1610 | struct xfrm_tmpl **tpp = tp; | ||
1611 | int ti = 0; | ||
1096 | int i, k; | 1612 | int i, k; |
1097 | 1613 | ||
1098 | if ((sp = skb->sp) == NULL) | 1614 | if ((sp = skb->sp) == NULL) |
1099 | sp = &dummy; | 1615 | sp = &dummy; |
1100 | 1616 | ||
1617 | for (pi = 0; pi < npols; pi++) { | ||
1618 | if (pols[pi] != pol && | ||
1619 | pols[pi]->action != XFRM_POLICY_ALLOW) | ||
1620 | goto reject; | ||
1621 | if (ti + pols[pi]->xfrm_nr >= XFRM_MAX_DEPTH) | ||
1622 | goto reject_error; | ||
1623 | for (i = 0; i < pols[pi]->xfrm_nr; i++) | ||
1624 | tpp[ti++] = &pols[pi]->xfrm_vec[i]; | ||
1625 | } | ||
1626 | xfrm_nr = ti; | ||
1627 | if (npols > 1) { | ||
1628 | xfrm_tmpl_sort(stp, tpp, xfrm_nr, family); | ||
1629 | tpp = stp; | ||
1630 | } | ||
1631 | |||
1101 | /* For each tunnel xfrm, find the first matching tmpl. | 1632 | /* For each tunnel xfrm, find the first matching tmpl. |
1102 | * For each tmpl before that, find corresponding xfrm. | 1633 | * For each tmpl before that, find corresponding xfrm. |
1103 | * Order is _important_. Later we will implement | 1634 | * Order is _important_. Later we will implement |
1104 | * some barriers, but at the moment barriers | 1635 | * some barriers, but at the moment barriers |
1105 | * are implied between each two transformations. | 1636 | * are implied between each two transformations. |
1106 | */ | 1637 | */ |
1107 | for (i = pol->xfrm_nr-1, k = 0; i >= 0; i--) { | 1638 | for (i = xfrm_nr-1, k = 0; i >= 0; i--) { |
1108 | k = xfrm_policy_ok(pol->xfrm_vec+i, sp, k, family); | 1639 | k = xfrm_policy_ok(tpp[i], sp, k, family); |
1109 | if (k < 0) | 1640 | if (k < 0) { |
1641 | if (k < -1) | ||
1642 | /* "-2 - errored_index" returned */ | ||
1643 | xerr_idx = -(2+k); | ||
1110 | goto reject; | 1644 | goto reject; |
1645 | } | ||
1111 | } | 1646 | } |
1112 | 1647 | ||
1113 | if (secpath_has_tunnel(sp, k)) | 1648 | if (secpath_has_nontransport(sp, k, &xerr_idx)) |
1114 | goto reject; | 1649 | goto reject; |
1115 | 1650 | ||
1116 | xfrm_pol_put(pol); | 1651 | xfrm_pols_put(pols, npols); |
1117 | return 1; | 1652 | return 1; |
1118 | } | 1653 | } |
1119 | 1654 | ||
1120 | reject: | 1655 | reject: |
1121 | xfrm_pol_put(pol); | 1656 | xfrm_secpath_reject(xerr_idx, skb, &fl); |
1657 | reject_error: | ||
1658 | xfrm_pols_put(pols, npols); | ||
1122 | return 0; | 1659 | return 0; |
1123 | } | 1660 | } |
1124 | EXPORT_SYMBOL(__xfrm_policy_check); | 1661 | EXPORT_SYMBOL(__xfrm_policy_check); |
@@ -1166,7 +1703,7 @@ static struct dst_entry *xfrm_dst_check(struct dst_entry *dst, u32 cookie) | |||
1166 | 1703 | ||
1167 | static int stale_bundle(struct dst_entry *dst) | 1704 | static int stale_bundle(struct dst_entry *dst) |
1168 | { | 1705 | { |
1169 | return !xfrm_bundle_ok((struct xfrm_dst *)dst, NULL, AF_UNSPEC); | 1706 | return !xfrm_bundle_ok((struct xfrm_dst *)dst, NULL, AF_UNSPEC, 0); |
1170 | } | 1707 | } |
1171 | 1708 | ||
1172 | void xfrm_dst_ifdown(struct dst_entry *dst, struct net_device *dev) | 1709 | void xfrm_dst_ifdown(struct dst_entry *dst, struct net_device *dev) |
@@ -1196,33 +1733,50 @@ static struct dst_entry *xfrm_negative_advice(struct dst_entry *dst) | |||
1196 | return dst; | 1733 | return dst; |
1197 | } | 1734 | } |
1198 | 1735 | ||
1736 | static void prune_one_bundle(struct xfrm_policy *pol, int (*func)(struct dst_entry *), struct dst_entry **gc_list_p) | ||
1737 | { | ||
1738 | struct dst_entry *dst, **dstp; | ||
1739 | |||
1740 | write_lock(&pol->lock); | ||
1741 | dstp = &pol->bundles; | ||
1742 | while ((dst=*dstp) != NULL) { | ||
1743 | if (func(dst)) { | ||
1744 | *dstp = dst->next; | ||
1745 | dst->next = *gc_list_p; | ||
1746 | *gc_list_p = dst; | ||
1747 | } else { | ||
1748 | dstp = &dst->next; | ||
1749 | } | ||
1750 | } | ||
1751 | write_unlock(&pol->lock); | ||
1752 | } | ||
1753 | |||
1199 | static void xfrm_prune_bundles(int (*func)(struct dst_entry *)) | 1754 | static void xfrm_prune_bundles(int (*func)(struct dst_entry *)) |
1200 | { | 1755 | { |
1201 | int i; | 1756 | struct dst_entry *gc_list = NULL; |
1202 | struct xfrm_policy *pol; | 1757 | int dir; |
1203 | struct dst_entry *dst, **dstp, *gc_list = NULL; | ||
1204 | 1758 | ||
1205 | read_lock_bh(&xfrm_policy_lock); | 1759 | read_lock_bh(&xfrm_policy_lock); |
1206 | for (i=0; i<2*XFRM_POLICY_MAX; i++) { | 1760 | for (dir = 0; dir < XFRM_POLICY_MAX * 2; dir++) { |
1207 | for (pol = xfrm_policy_list[i]; pol; pol = pol->next) { | 1761 | struct xfrm_policy *pol; |
1208 | write_lock(&pol->lock); | 1762 | struct hlist_node *entry; |
1209 | dstp = &pol->bundles; | 1763 | struct hlist_head *table; |
1210 | while ((dst=*dstp) != NULL) { | 1764 | int i; |
1211 | if (func(dst)) { | 1765 | |
1212 | *dstp = dst->next; | 1766 | hlist_for_each_entry(pol, entry, |
1213 | dst->next = gc_list; | 1767 | &xfrm_policy_inexact[dir], bydst) |
1214 | gc_list = dst; | 1768 | prune_one_bundle(pol, func, &gc_list); |
1215 | } else { | 1769 | |
1216 | dstp = &dst->next; | 1770 | table = xfrm_policy_bydst[dir].table; |
1217 | } | 1771 | for (i = xfrm_policy_bydst[dir].hmask; i >= 0; i--) { |
1218 | } | 1772 | hlist_for_each_entry(pol, entry, table + i, bydst) |
1219 | write_unlock(&pol->lock); | 1773 | prune_one_bundle(pol, func, &gc_list); |
1220 | } | 1774 | } |
1221 | } | 1775 | } |
1222 | read_unlock_bh(&xfrm_policy_lock); | 1776 | read_unlock_bh(&xfrm_policy_lock); |
1223 | 1777 | ||
1224 | while (gc_list) { | 1778 | while (gc_list) { |
1225 | dst = gc_list; | 1779 | struct dst_entry *dst = gc_list; |
1226 | gc_list = dst->next; | 1780 | gc_list = dst->next; |
1227 | dst_free(dst); | 1781 | dst_free(dst); |
1228 | } | 1782 | } |
@@ -1238,22 +1792,12 @@ static void __xfrm_garbage_collect(void) | |||
1238 | xfrm_prune_bundles(unused_bundle); | 1792 | xfrm_prune_bundles(unused_bundle); |
1239 | } | 1793 | } |
1240 | 1794 | ||
1241 | int xfrm_flush_bundles(void) | 1795 | static int xfrm_flush_bundles(void) |
1242 | { | 1796 | { |
1243 | xfrm_prune_bundles(stale_bundle); | 1797 | xfrm_prune_bundles(stale_bundle); |
1244 | return 0; | 1798 | return 0; |
1245 | } | 1799 | } |
1246 | 1800 | ||
1247 | static int always_true(struct dst_entry *dst) | ||
1248 | { | ||
1249 | return 1; | ||
1250 | } | ||
1251 | |||
1252 | void xfrm_flush_all_bundles(void) | ||
1253 | { | ||
1254 | xfrm_prune_bundles(always_true); | ||
1255 | } | ||
1256 | |||
1257 | void xfrm_init_pmtu(struct dst_entry *dst) | 1801 | void xfrm_init_pmtu(struct dst_entry *dst) |
1258 | { | 1802 | { |
1259 | do { | 1803 | do { |
@@ -1281,7 +1825,7 @@ EXPORT_SYMBOL(xfrm_init_pmtu); | |||
1281 | * still valid. | 1825 | * still valid. |
1282 | */ | 1826 | */ |
1283 | 1827 | ||
1284 | int xfrm_bundle_ok(struct xfrm_dst *first, struct flowi *fl, int family) | 1828 | int xfrm_bundle_ok(struct xfrm_dst *first, struct flowi *fl, int family, int strict) |
1285 | { | 1829 | { |
1286 | struct dst_entry *dst = &first->u.dst; | 1830 | struct dst_entry *dst = &first->u.dst; |
1287 | struct xfrm_dst *last; | 1831 | struct xfrm_dst *last; |
@@ -1298,8 +1842,16 @@ int xfrm_bundle_ok(struct xfrm_dst *first, struct flowi *fl, int family) | |||
1298 | 1842 | ||
1299 | if (fl && !xfrm_selector_match(&dst->xfrm->sel, fl, family)) | 1843 | if (fl && !xfrm_selector_match(&dst->xfrm->sel, fl, family)) |
1300 | return 0; | 1844 | return 0; |
1845 | if (fl && !security_xfrm_flow_state_match(fl, dst->xfrm)) | ||
1846 | return 0; | ||
1301 | if (dst->xfrm->km.state != XFRM_STATE_VALID) | 1847 | if (dst->xfrm->km.state != XFRM_STATE_VALID) |
1302 | return 0; | 1848 | return 0; |
1849 | if (xdst->genid != dst->xfrm->genid) | ||
1850 | return 0; | ||
1851 | |||
1852 | if (strict && fl && dst->xfrm->props.mode != XFRM_MODE_TUNNEL && | ||
1853 | !xfrm_state_addr_flow_check(dst->xfrm, fl, family)) | ||
1854 | return 0; | ||
1303 | 1855 | ||
1304 | mtu = dst_mtu(dst->child); | 1856 | mtu = dst_mtu(dst->child); |
1305 | if (xdst->child_mtu_cached != mtu) { | 1857 | if (xdst->child_mtu_cached != mtu) { |
@@ -1448,12 +2000,33 @@ static struct notifier_block xfrm_dev_notifier = { | |||
1448 | 2000 | ||
1449 | static void __init xfrm_policy_init(void) | 2001 | static void __init xfrm_policy_init(void) |
1450 | { | 2002 | { |
2003 | unsigned int hmask, sz; | ||
2004 | int dir; | ||
2005 | |||
1451 | xfrm_dst_cache = kmem_cache_create("xfrm_dst_cache", | 2006 | xfrm_dst_cache = kmem_cache_create("xfrm_dst_cache", |
1452 | sizeof(struct xfrm_dst), | 2007 | sizeof(struct xfrm_dst), |
1453 | 0, SLAB_HWCACHE_ALIGN, | 2008 | 0, SLAB_HWCACHE_ALIGN|SLAB_PANIC, |
1454 | NULL, NULL); | 2009 | NULL, NULL); |
1455 | if (!xfrm_dst_cache) | 2010 | |
1456 | panic("XFRM: failed to allocate xfrm_dst_cache\n"); | 2011 | hmask = 8 - 1; |
2012 | sz = (hmask+1) * sizeof(struct hlist_head); | ||
2013 | |||
2014 | xfrm_policy_byidx = xfrm_hash_alloc(sz); | ||
2015 | xfrm_idx_hmask = hmask; | ||
2016 | if (!xfrm_policy_byidx) | ||
2017 | panic("XFRM: failed to allocate byidx hash\n"); | ||
2018 | |||
2019 | for (dir = 0; dir < XFRM_POLICY_MAX * 2; dir++) { | ||
2020 | struct xfrm_policy_hash *htab; | ||
2021 | |||
2022 | INIT_HLIST_HEAD(&xfrm_policy_inexact[dir]); | ||
2023 | |||
2024 | htab = &xfrm_policy_bydst[dir]; | ||
2025 | htab->table = xfrm_hash_alloc(sz); | ||
2026 | htab->hmask = hmask; | ||
2027 | if (!htab->table) | ||
2028 | panic("XFRM: failed to allocate bydst hash\n"); | ||
2029 | } | ||
1457 | 2030 | ||
1458 | INIT_WORK(&xfrm_policy_gc_work, xfrm_policy_gc_task, NULL); | 2031 | INIT_WORK(&xfrm_policy_gc_work, xfrm_policy_gc_task, NULL); |
1459 | register_netdevice_notifier(&xfrm_dev_notifier); | 2032 | register_netdevice_notifier(&xfrm_dev_notifier); |
diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c index 0021aad5db43..9f63edd39346 100644 --- a/net/xfrm/xfrm_state.c +++ b/net/xfrm/xfrm_state.c | |||
@@ -18,8 +18,11 @@ | |||
18 | #include <linux/pfkeyv2.h> | 18 | #include <linux/pfkeyv2.h> |
19 | #include <linux/ipsec.h> | 19 | #include <linux/ipsec.h> |
20 | #include <linux/module.h> | 20 | #include <linux/module.h> |
21 | #include <linux/cache.h> | ||
21 | #include <asm/uaccess.h> | 22 | #include <asm/uaccess.h> |
22 | 23 | ||
24 | #include "xfrm_hash.h" | ||
25 | |||
23 | struct sock *xfrm_nl; | 26 | struct sock *xfrm_nl; |
24 | EXPORT_SYMBOL(xfrm_nl); | 27 | EXPORT_SYMBOL(xfrm_nl); |
25 | 28 | ||
@@ -32,7 +35,7 @@ EXPORT_SYMBOL(sysctl_xfrm_aevent_rseqth); | |||
32 | /* Each xfrm_state may be linked to two tables: | 35 | /* Each xfrm_state may be linked to two tables: |
33 | 36 | ||
34 | 1. Hash table by (spi,daddr,ah/esp) to find SA by SPI. (input,ctl) | 37 | 1. Hash table by (spi,daddr,ah/esp) to find SA by SPI. (input,ctl) |
35 | 2. Hash table by daddr to find what SAs exist for given | 38 | 2. Hash table by (daddr,family,reqid) to find what SAs exist for given |
36 | destination/tunnel endpoint. (output) | 39 | destination/tunnel endpoint. (output) |
37 | */ | 40 | */ |
38 | 41 | ||
@@ -44,8 +47,123 @@ static DEFINE_SPINLOCK(xfrm_state_lock); | |||
44 | * Main use is finding SA after policy selected tunnel or transport mode. | 47 | * Main use is finding SA after policy selected tunnel or transport mode. |
45 | * Also, it can be used by ah/esp icmp error handler to find offending SA. | 48 | * Also, it can be used by ah/esp icmp error handler to find offending SA. |
46 | */ | 49 | */ |
47 | static struct list_head xfrm_state_bydst[XFRM_DST_HSIZE]; | 50 | static struct hlist_head *xfrm_state_bydst __read_mostly; |
48 | static struct list_head xfrm_state_byspi[XFRM_DST_HSIZE]; | 51 | static struct hlist_head *xfrm_state_bysrc __read_mostly; |
52 | static struct hlist_head *xfrm_state_byspi __read_mostly; | ||
53 | static unsigned int xfrm_state_hmask __read_mostly; | ||
54 | static unsigned int xfrm_state_hashmax __read_mostly = 1 * 1024 * 1024; | ||
55 | static unsigned int xfrm_state_num; | ||
56 | static unsigned int xfrm_state_genid; | ||
57 | |||
58 | static inline unsigned int xfrm_dst_hash(xfrm_address_t *daddr, | ||
59 | xfrm_address_t *saddr, | ||
60 | u32 reqid, | ||
61 | unsigned short family) | ||
62 | { | ||
63 | return __xfrm_dst_hash(daddr, saddr, reqid, family, xfrm_state_hmask); | ||
64 | } | ||
65 | |||
66 | static inline unsigned int xfrm_src_hash(xfrm_address_t *addr, | ||
67 | unsigned short family) | ||
68 | { | ||
69 | return __xfrm_src_hash(addr, family, xfrm_state_hmask); | ||
70 | } | ||
71 | |||
72 | static inline unsigned int | ||
73 | xfrm_spi_hash(xfrm_address_t *daddr, u32 spi, u8 proto, unsigned short family) | ||
74 | { | ||
75 | return __xfrm_spi_hash(daddr, spi, proto, family, xfrm_state_hmask); | ||
76 | } | ||
77 | |||
78 | static void xfrm_hash_transfer(struct hlist_head *list, | ||
79 | struct hlist_head *ndsttable, | ||
80 | struct hlist_head *nsrctable, | ||
81 | struct hlist_head *nspitable, | ||
82 | unsigned int nhashmask) | ||
83 | { | ||
84 | struct hlist_node *entry, *tmp; | ||
85 | struct xfrm_state *x; | ||
86 | |||
87 | hlist_for_each_entry_safe(x, entry, tmp, list, bydst) { | ||
88 | unsigned int h; | ||
89 | |||
90 | h = __xfrm_dst_hash(&x->id.daddr, &x->props.saddr, | ||
91 | x->props.reqid, x->props.family, | ||
92 | nhashmask); | ||
93 | hlist_add_head(&x->bydst, ndsttable+h); | ||
94 | |||
95 | h = __xfrm_src_hash(&x->props.saddr, x->props.family, | ||
96 | nhashmask); | ||
97 | hlist_add_head(&x->bysrc, nsrctable+h); | ||
98 | |||
99 | h = __xfrm_spi_hash(&x->id.daddr, x->id.spi, x->id.proto, | ||
100 | x->props.family, nhashmask); | ||
101 | hlist_add_head(&x->byspi, nspitable+h); | ||
102 | } | ||
103 | } | ||
104 | |||
105 | static unsigned long xfrm_hash_new_size(void) | ||
106 | { | ||
107 | return ((xfrm_state_hmask + 1) << 1) * | ||
108 | sizeof(struct hlist_head); | ||
109 | } | ||
110 | |||
111 | static DEFINE_MUTEX(hash_resize_mutex); | ||
112 | |||
113 | static void xfrm_hash_resize(void *__unused) | ||
114 | { | ||
115 | struct hlist_head *ndst, *nsrc, *nspi, *odst, *osrc, *ospi; | ||
116 | unsigned long nsize, osize; | ||
117 | unsigned int nhashmask, ohashmask; | ||
118 | int i; | ||
119 | |||
120 | mutex_lock(&hash_resize_mutex); | ||
121 | |||
122 | nsize = xfrm_hash_new_size(); | ||
123 | ndst = xfrm_hash_alloc(nsize); | ||
124 | if (!ndst) | ||
125 | goto out_unlock; | ||
126 | nsrc = xfrm_hash_alloc(nsize); | ||
127 | if (!nsrc) { | ||
128 | xfrm_hash_free(ndst, nsize); | ||
129 | goto out_unlock; | ||
130 | } | ||
131 | nspi = xfrm_hash_alloc(nsize); | ||
132 | if (!nspi) { | ||
133 | xfrm_hash_free(ndst, nsize); | ||
134 | xfrm_hash_free(nsrc, nsize); | ||
135 | goto out_unlock; | ||
136 | } | ||
137 | |||
138 | spin_lock_bh(&xfrm_state_lock); | ||
139 | |||
140 | nhashmask = (nsize / sizeof(struct hlist_head)) - 1U; | ||
141 | for (i = xfrm_state_hmask; i >= 0; i--) | ||
142 | xfrm_hash_transfer(xfrm_state_bydst+i, ndst, nsrc, nspi, | ||
143 | nhashmask); | ||
144 | |||
145 | odst = xfrm_state_bydst; | ||
146 | osrc = xfrm_state_bysrc; | ||
147 | ospi = xfrm_state_byspi; | ||
148 | ohashmask = xfrm_state_hmask; | ||
149 | |||
150 | xfrm_state_bydst = ndst; | ||
151 | xfrm_state_bysrc = nsrc; | ||
152 | xfrm_state_byspi = nspi; | ||
153 | xfrm_state_hmask = nhashmask; | ||
154 | |||
155 | spin_unlock_bh(&xfrm_state_lock); | ||
156 | |||
157 | osize = (ohashmask + 1) * sizeof(struct hlist_head); | ||
158 | xfrm_hash_free(odst, osize); | ||
159 | xfrm_hash_free(osrc, osize); | ||
160 | xfrm_hash_free(ospi, osize); | ||
161 | |||
162 | out_unlock: | ||
163 | mutex_unlock(&hash_resize_mutex); | ||
164 | } | ||
165 | |||
166 | static DECLARE_WORK(xfrm_hash_work, xfrm_hash_resize, NULL); | ||
49 | 167 | ||
50 | DECLARE_WAIT_QUEUE_HEAD(km_waitq); | 168 | DECLARE_WAIT_QUEUE_HEAD(km_waitq); |
51 | EXPORT_SYMBOL(km_waitq); | 169 | EXPORT_SYMBOL(km_waitq); |
@@ -54,11 +172,9 @@ static DEFINE_RWLOCK(xfrm_state_afinfo_lock); | |||
54 | static struct xfrm_state_afinfo *xfrm_state_afinfo[NPROTO]; | 172 | static struct xfrm_state_afinfo *xfrm_state_afinfo[NPROTO]; |
55 | 173 | ||
56 | static struct work_struct xfrm_state_gc_work; | 174 | static struct work_struct xfrm_state_gc_work; |
57 | static struct list_head xfrm_state_gc_list = LIST_HEAD_INIT(xfrm_state_gc_list); | 175 | static HLIST_HEAD(xfrm_state_gc_list); |
58 | static DEFINE_SPINLOCK(xfrm_state_gc_lock); | 176 | static DEFINE_SPINLOCK(xfrm_state_gc_lock); |
59 | 177 | ||
60 | static int xfrm_state_gc_flush_bundles; | ||
61 | |||
62 | int __xfrm_state_delete(struct xfrm_state *x); | 178 | int __xfrm_state_delete(struct xfrm_state *x); |
63 | 179 | ||
64 | static struct xfrm_state_afinfo *xfrm_state_get_afinfo(unsigned short family); | 180 | static struct xfrm_state_afinfo *xfrm_state_get_afinfo(unsigned short family); |
@@ -69,14 +185,13 @@ void km_state_expired(struct xfrm_state *x, int hard, u32 pid); | |||
69 | 185 | ||
70 | static void xfrm_state_gc_destroy(struct xfrm_state *x) | 186 | static void xfrm_state_gc_destroy(struct xfrm_state *x) |
71 | { | 187 | { |
72 | if (del_timer(&x->timer)) | 188 | del_timer_sync(&x->timer); |
73 | BUG(); | 189 | del_timer_sync(&x->rtimer); |
74 | if (del_timer(&x->rtimer)) | ||
75 | BUG(); | ||
76 | kfree(x->aalg); | 190 | kfree(x->aalg); |
77 | kfree(x->ealg); | 191 | kfree(x->ealg); |
78 | kfree(x->calg); | 192 | kfree(x->calg); |
79 | kfree(x->encap); | 193 | kfree(x->encap); |
194 | kfree(x->coaddr); | ||
80 | if (x->mode) | 195 | if (x->mode) |
81 | xfrm_put_mode(x->mode); | 196 | xfrm_put_mode(x->mode); |
82 | if (x->type) { | 197 | if (x->type) { |
@@ -90,22 +205,17 @@ static void xfrm_state_gc_destroy(struct xfrm_state *x) | |||
90 | static void xfrm_state_gc_task(void *data) | 205 | static void xfrm_state_gc_task(void *data) |
91 | { | 206 | { |
92 | struct xfrm_state *x; | 207 | struct xfrm_state *x; |
93 | struct list_head *entry, *tmp; | 208 | struct hlist_node *entry, *tmp; |
94 | struct list_head gc_list = LIST_HEAD_INIT(gc_list); | 209 | struct hlist_head gc_list; |
95 | |||
96 | if (xfrm_state_gc_flush_bundles) { | ||
97 | xfrm_state_gc_flush_bundles = 0; | ||
98 | xfrm_flush_bundles(); | ||
99 | } | ||
100 | 210 | ||
101 | spin_lock_bh(&xfrm_state_gc_lock); | 211 | spin_lock_bh(&xfrm_state_gc_lock); |
102 | list_splice_init(&xfrm_state_gc_list, &gc_list); | 212 | gc_list.first = xfrm_state_gc_list.first; |
213 | INIT_HLIST_HEAD(&xfrm_state_gc_list); | ||
103 | spin_unlock_bh(&xfrm_state_gc_lock); | 214 | spin_unlock_bh(&xfrm_state_gc_lock); |
104 | 215 | ||
105 | list_for_each_safe(entry, tmp, &gc_list) { | 216 | hlist_for_each_entry_safe(x, entry, tmp, &gc_list, bydst) |
106 | x = list_entry(entry, struct xfrm_state, bydst); | ||
107 | xfrm_state_gc_destroy(x); | 217 | xfrm_state_gc_destroy(x); |
108 | } | 218 | |
109 | wake_up(&km_waitq); | 219 | wake_up(&km_waitq); |
110 | } | 220 | } |
111 | 221 | ||
@@ -168,9 +278,9 @@ static void xfrm_timer_handler(unsigned long data) | |||
168 | if (warn) | 278 | if (warn) |
169 | km_state_expired(x, 0, 0); | 279 | km_state_expired(x, 0, 0); |
170 | resched: | 280 | resched: |
171 | if (next != LONG_MAX && | 281 | if (next != LONG_MAX) |
172 | !mod_timer(&x->timer, jiffies + make_jiffies(next))) | 282 | mod_timer(&x->timer, jiffies + make_jiffies(next)); |
173 | xfrm_state_hold(x); | 283 | |
174 | goto out; | 284 | goto out; |
175 | 285 | ||
176 | expired: | 286 | expired: |
@@ -185,7 +295,6 @@ expired: | |||
185 | 295 | ||
186 | out: | 296 | out: |
187 | spin_unlock(&x->lock); | 297 | spin_unlock(&x->lock); |
188 | xfrm_state_put(x); | ||
189 | } | 298 | } |
190 | 299 | ||
191 | static void xfrm_replay_timer_handler(unsigned long data); | 300 | static void xfrm_replay_timer_handler(unsigned long data); |
@@ -199,8 +308,9 @@ struct xfrm_state *xfrm_state_alloc(void) | |||
199 | if (x) { | 308 | if (x) { |
200 | atomic_set(&x->refcnt, 1); | 309 | atomic_set(&x->refcnt, 1); |
201 | atomic_set(&x->tunnel_users, 0); | 310 | atomic_set(&x->tunnel_users, 0); |
202 | INIT_LIST_HEAD(&x->bydst); | 311 | INIT_HLIST_NODE(&x->bydst); |
203 | INIT_LIST_HEAD(&x->byspi); | 312 | INIT_HLIST_NODE(&x->bysrc); |
313 | INIT_HLIST_NODE(&x->byspi); | ||
204 | init_timer(&x->timer); | 314 | init_timer(&x->timer); |
205 | x->timer.function = xfrm_timer_handler; | 315 | x->timer.function = xfrm_timer_handler; |
206 | x->timer.data = (unsigned long)x; | 316 | x->timer.data = (unsigned long)x; |
@@ -225,7 +335,7 @@ void __xfrm_state_destroy(struct xfrm_state *x) | |||
225 | BUG_TRAP(x->km.state == XFRM_STATE_DEAD); | 335 | BUG_TRAP(x->km.state == XFRM_STATE_DEAD); |
226 | 336 | ||
227 | spin_lock_bh(&xfrm_state_gc_lock); | 337 | spin_lock_bh(&xfrm_state_gc_lock); |
228 | list_add(&x->bydst, &xfrm_state_gc_list); | 338 | hlist_add_head(&x->bydst, &xfrm_state_gc_list); |
229 | spin_unlock_bh(&xfrm_state_gc_lock); | 339 | spin_unlock_bh(&xfrm_state_gc_lock); |
230 | schedule_work(&xfrm_state_gc_work); | 340 | schedule_work(&xfrm_state_gc_work); |
231 | } | 341 | } |
@@ -238,27 +348,12 @@ int __xfrm_state_delete(struct xfrm_state *x) | |||
238 | if (x->km.state != XFRM_STATE_DEAD) { | 348 | if (x->km.state != XFRM_STATE_DEAD) { |
239 | x->km.state = XFRM_STATE_DEAD; | 349 | x->km.state = XFRM_STATE_DEAD; |
240 | spin_lock(&xfrm_state_lock); | 350 | spin_lock(&xfrm_state_lock); |
241 | list_del(&x->bydst); | 351 | hlist_del(&x->bydst); |
242 | __xfrm_state_put(x); | 352 | hlist_del(&x->bysrc); |
243 | if (x->id.spi) { | 353 | if (x->id.spi) |
244 | list_del(&x->byspi); | 354 | hlist_del(&x->byspi); |
245 | __xfrm_state_put(x); | 355 | xfrm_state_num--; |
246 | } | ||
247 | spin_unlock(&xfrm_state_lock); | 356 | spin_unlock(&xfrm_state_lock); |
248 | if (del_timer(&x->timer)) | ||
249 | __xfrm_state_put(x); | ||
250 | if (del_timer(&x->rtimer)) | ||
251 | __xfrm_state_put(x); | ||
252 | |||
253 | /* The number two in this test is the reference | ||
254 | * mentioned in the comment below plus the reference | ||
255 | * our caller holds. A larger value means that | ||
256 | * there are DSTs attached to this xfrm_state. | ||
257 | */ | ||
258 | if (atomic_read(&x->refcnt) > 2) { | ||
259 | xfrm_state_gc_flush_bundles = 1; | ||
260 | schedule_work(&xfrm_state_gc_work); | ||
261 | } | ||
262 | 357 | ||
263 | /* All xfrm_state objects are created by xfrm_state_alloc. | 358 | /* All xfrm_state objects are created by xfrm_state_alloc. |
264 | * The xfrm_state_alloc call gives a reference, and that | 359 | * The xfrm_state_alloc call gives a reference, and that |
@@ -287,14 +382,15 @@ EXPORT_SYMBOL(xfrm_state_delete); | |||
287 | void xfrm_state_flush(u8 proto) | 382 | void xfrm_state_flush(u8 proto) |
288 | { | 383 | { |
289 | int i; | 384 | int i; |
290 | struct xfrm_state *x; | ||
291 | 385 | ||
292 | spin_lock_bh(&xfrm_state_lock); | 386 | spin_lock_bh(&xfrm_state_lock); |
293 | for (i = 0; i < XFRM_DST_HSIZE; i++) { | 387 | for (i = 0; i <= xfrm_state_hmask; i++) { |
388 | struct hlist_node *entry; | ||
389 | struct xfrm_state *x; | ||
294 | restart: | 390 | restart: |
295 | list_for_each_entry(x, xfrm_state_bydst+i, bydst) { | 391 | hlist_for_each_entry(x, entry, xfrm_state_bydst+i, bydst) { |
296 | if (!xfrm_state_kern(x) && | 392 | if (!xfrm_state_kern(x) && |
297 | (proto == IPSEC_PROTO_ANY || x->id.proto == proto)) { | 393 | xfrm_id_proto_match(x->id.proto, proto)) { |
298 | xfrm_state_hold(x); | 394 | xfrm_state_hold(x); |
299 | spin_unlock_bh(&xfrm_state_lock); | 395 | spin_unlock_bh(&xfrm_state_lock); |
300 | 396 | ||
@@ -325,29 +421,103 @@ xfrm_init_tempsel(struct xfrm_state *x, struct flowi *fl, | |||
325 | return 0; | 421 | return 0; |
326 | } | 422 | } |
327 | 423 | ||
424 | static struct xfrm_state *__xfrm_state_lookup(xfrm_address_t *daddr, u32 spi, u8 proto, unsigned short family) | ||
425 | { | ||
426 | unsigned int h = xfrm_spi_hash(daddr, spi, proto, family); | ||
427 | struct xfrm_state *x; | ||
428 | struct hlist_node *entry; | ||
429 | |||
430 | hlist_for_each_entry(x, entry, xfrm_state_byspi+h, byspi) { | ||
431 | if (x->props.family != family || | ||
432 | x->id.spi != spi || | ||
433 | x->id.proto != proto) | ||
434 | continue; | ||
435 | |||
436 | switch (family) { | ||
437 | case AF_INET: | ||
438 | if (x->id.daddr.a4 != daddr->a4) | ||
439 | continue; | ||
440 | break; | ||
441 | case AF_INET6: | ||
442 | if (!ipv6_addr_equal((struct in6_addr *)daddr, | ||
443 | (struct in6_addr *) | ||
444 | x->id.daddr.a6)) | ||
445 | continue; | ||
446 | break; | ||
447 | }; | ||
448 | |||
449 | xfrm_state_hold(x); | ||
450 | return x; | ||
451 | } | ||
452 | |||
453 | return NULL; | ||
454 | } | ||
455 | |||
456 | static struct xfrm_state *__xfrm_state_lookup_byaddr(xfrm_address_t *daddr, xfrm_address_t *saddr, u8 proto, unsigned short family) | ||
457 | { | ||
458 | unsigned int h = xfrm_src_hash(saddr, family); | ||
459 | struct xfrm_state *x; | ||
460 | struct hlist_node *entry; | ||
461 | |||
462 | hlist_for_each_entry(x, entry, xfrm_state_bysrc+h, bysrc) { | ||
463 | if (x->props.family != family || | ||
464 | x->id.proto != proto) | ||
465 | continue; | ||
466 | |||
467 | switch (family) { | ||
468 | case AF_INET: | ||
469 | if (x->id.daddr.a4 != daddr->a4 || | ||
470 | x->props.saddr.a4 != saddr->a4) | ||
471 | continue; | ||
472 | break; | ||
473 | case AF_INET6: | ||
474 | if (!ipv6_addr_equal((struct in6_addr *)daddr, | ||
475 | (struct in6_addr *) | ||
476 | x->id.daddr.a6) || | ||
477 | !ipv6_addr_equal((struct in6_addr *)saddr, | ||
478 | (struct in6_addr *) | ||
479 | x->props.saddr.a6)) | ||
480 | continue; | ||
481 | break; | ||
482 | }; | ||
483 | |||
484 | xfrm_state_hold(x); | ||
485 | return x; | ||
486 | } | ||
487 | |||
488 | return NULL; | ||
489 | } | ||
490 | |||
491 | static inline struct xfrm_state * | ||
492 | __xfrm_state_locate(struct xfrm_state *x, int use_spi, int family) | ||
493 | { | ||
494 | if (use_spi) | ||
495 | return __xfrm_state_lookup(&x->id.daddr, x->id.spi, | ||
496 | x->id.proto, family); | ||
497 | else | ||
498 | return __xfrm_state_lookup_byaddr(&x->id.daddr, | ||
499 | &x->props.saddr, | ||
500 | x->id.proto, family); | ||
501 | } | ||
502 | |||
328 | struct xfrm_state * | 503 | struct xfrm_state * |
329 | xfrm_state_find(xfrm_address_t *daddr, xfrm_address_t *saddr, | 504 | xfrm_state_find(xfrm_address_t *daddr, xfrm_address_t *saddr, |
330 | struct flowi *fl, struct xfrm_tmpl *tmpl, | 505 | struct flowi *fl, struct xfrm_tmpl *tmpl, |
331 | struct xfrm_policy *pol, int *err, | 506 | struct xfrm_policy *pol, int *err, |
332 | unsigned short family) | 507 | unsigned short family) |
333 | { | 508 | { |
334 | unsigned h = xfrm_dst_hash(daddr, family); | 509 | unsigned int h = xfrm_dst_hash(daddr, saddr, tmpl->reqid, family); |
510 | struct hlist_node *entry; | ||
335 | struct xfrm_state *x, *x0; | 511 | struct xfrm_state *x, *x0; |
336 | int acquire_in_progress = 0; | 512 | int acquire_in_progress = 0; |
337 | int error = 0; | 513 | int error = 0; |
338 | struct xfrm_state *best = NULL; | 514 | struct xfrm_state *best = NULL; |
339 | struct xfrm_state_afinfo *afinfo; | ||
340 | 515 | ||
341 | afinfo = xfrm_state_get_afinfo(family); | ||
342 | if (afinfo == NULL) { | ||
343 | *err = -EAFNOSUPPORT; | ||
344 | return NULL; | ||
345 | } | ||
346 | |||
347 | spin_lock_bh(&xfrm_state_lock); | 516 | spin_lock_bh(&xfrm_state_lock); |
348 | list_for_each_entry(x, xfrm_state_bydst+h, bydst) { | 517 | hlist_for_each_entry(x, entry, xfrm_state_bydst+h, bydst) { |
349 | if (x->props.family == family && | 518 | if (x->props.family == family && |
350 | x->props.reqid == tmpl->reqid && | 519 | x->props.reqid == tmpl->reqid && |
520 | !(x->props.flags & XFRM_STATE_WILDRECV) && | ||
351 | xfrm_state_addr_check(x, daddr, saddr, family) && | 521 | xfrm_state_addr_check(x, daddr, saddr, family) && |
352 | tmpl->mode == x->props.mode && | 522 | tmpl->mode == x->props.mode && |
353 | tmpl->id.proto == x->id.proto && | 523 | tmpl->id.proto == x->id.proto && |
@@ -367,7 +537,7 @@ xfrm_state_find(xfrm_address_t *daddr, xfrm_address_t *saddr, | |||
367 | */ | 537 | */ |
368 | if (x->km.state == XFRM_STATE_VALID) { | 538 | if (x->km.state == XFRM_STATE_VALID) { |
369 | if (!xfrm_selector_match(&x->sel, fl, family) || | 539 | if (!xfrm_selector_match(&x->sel, fl, family) || |
370 | !xfrm_sec_ctx_match(pol->security, x->security)) | 540 | !security_xfrm_state_pol_flow_match(x, pol, fl)) |
371 | continue; | 541 | continue; |
372 | if (!best || | 542 | if (!best || |
373 | best->km.dying > x->km.dying || | 543 | best->km.dying > x->km.dying || |
@@ -379,7 +549,7 @@ xfrm_state_find(xfrm_address_t *daddr, xfrm_address_t *saddr, | |||
379 | } else if (x->km.state == XFRM_STATE_ERROR || | 549 | } else if (x->km.state == XFRM_STATE_ERROR || |
380 | x->km.state == XFRM_STATE_EXPIRED) { | 550 | x->km.state == XFRM_STATE_EXPIRED) { |
381 | if (xfrm_selector_match(&x->sel, fl, family) && | 551 | if (xfrm_selector_match(&x->sel, fl, family) && |
382 | xfrm_sec_ctx_match(pol->security, x->security)) | 552 | security_xfrm_state_pol_flow_match(x, pol, fl)) |
383 | error = -ESRCH; | 553 | error = -ESRCH; |
384 | } | 554 | } |
385 | } | 555 | } |
@@ -388,8 +558,8 @@ xfrm_state_find(xfrm_address_t *daddr, xfrm_address_t *saddr, | |||
388 | x = best; | 558 | x = best; |
389 | if (!x && !error && !acquire_in_progress) { | 559 | if (!x && !error && !acquire_in_progress) { |
390 | if (tmpl->id.spi && | 560 | if (tmpl->id.spi && |
391 | (x0 = afinfo->state_lookup(daddr, tmpl->id.spi, | 561 | (x0 = __xfrm_state_lookup(daddr, tmpl->id.spi, |
392 | tmpl->id.proto)) != NULL) { | 562 | tmpl->id.proto, family)) != NULL) { |
393 | xfrm_state_put(x0); | 563 | xfrm_state_put(x0); |
394 | error = -EEXIST; | 564 | error = -EEXIST; |
395 | goto out; | 565 | goto out; |
@@ -403,17 +573,24 @@ xfrm_state_find(xfrm_address_t *daddr, xfrm_address_t *saddr, | |||
403 | * to current session. */ | 573 | * to current session. */ |
404 | xfrm_init_tempsel(x, fl, tmpl, daddr, saddr, family); | 574 | xfrm_init_tempsel(x, fl, tmpl, daddr, saddr, family); |
405 | 575 | ||
576 | error = security_xfrm_state_alloc_acquire(x, pol->security, fl->secid); | ||
577 | if (error) { | ||
578 | x->km.state = XFRM_STATE_DEAD; | ||
579 | xfrm_state_put(x); | ||
580 | x = NULL; | ||
581 | goto out; | ||
582 | } | ||
583 | |||
406 | if (km_query(x, tmpl, pol) == 0) { | 584 | if (km_query(x, tmpl, pol) == 0) { |
407 | x->km.state = XFRM_STATE_ACQ; | 585 | x->km.state = XFRM_STATE_ACQ; |
408 | list_add_tail(&x->bydst, xfrm_state_bydst+h); | 586 | hlist_add_head(&x->bydst, xfrm_state_bydst+h); |
409 | xfrm_state_hold(x); | 587 | h = xfrm_src_hash(saddr, family); |
588 | hlist_add_head(&x->bysrc, xfrm_state_bysrc+h); | ||
410 | if (x->id.spi) { | 589 | if (x->id.spi) { |
411 | h = xfrm_spi_hash(&x->id.daddr, x->id.spi, x->id.proto, family); | 590 | h = xfrm_spi_hash(&x->id.daddr, x->id.spi, x->id.proto, family); |
412 | list_add(&x->byspi, xfrm_state_byspi+h); | 591 | hlist_add_head(&x->byspi, xfrm_state_byspi+h); |
413 | xfrm_state_hold(x); | ||
414 | } | 592 | } |
415 | x->lft.hard_add_expires_seconds = XFRM_ACQ_EXPIRES; | 593 | x->lft.hard_add_expires_seconds = XFRM_ACQ_EXPIRES; |
416 | xfrm_state_hold(x); | ||
417 | x->timer.expires = jiffies + XFRM_ACQ_EXPIRES*HZ; | 594 | x->timer.expires = jiffies + XFRM_ACQ_EXPIRES*HZ; |
418 | add_timer(&x->timer); | 595 | add_timer(&x->timer); |
419 | } else { | 596 | } else { |
@@ -429,59 +606,167 @@ out: | |||
429 | else | 606 | else |
430 | *err = acquire_in_progress ? -EAGAIN : error; | 607 | *err = acquire_in_progress ? -EAGAIN : error; |
431 | spin_unlock_bh(&xfrm_state_lock); | 608 | spin_unlock_bh(&xfrm_state_lock); |
432 | xfrm_state_put_afinfo(afinfo); | ||
433 | return x; | 609 | return x; |
434 | } | 610 | } |
435 | 611 | ||
436 | static void __xfrm_state_insert(struct xfrm_state *x) | 612 | static void __xfrm_state_insert(struct xfrm_state *x) |
437 | { | 613 | { |
438 | unsigned h = xfrm_dst_hash(&x->id.daddr, x->props.family); | 614 | unsigned int h; |
439 | 615 | ||
440 | list_add(&x->bydst, xfrm_state_bydst+h); | 616 | x->genid = ++xfrm_state_genid; |
441 | xfrm_state_hold(x); | ||
442 | 617 | ||
443 | h = xfrm_spi_hash(&x->id.daddr, x->id.spi, x->id.proto, x->props.family); | 618 | h = xfrm_dst_hash(&x->id.daddr, &x->props.saddr, |
619 | x->props.reqid, x->props.family); | ||
620 | hlist_add_head(&x->bydst, xfrm_state_bydst+h); | ||
444 | 621 | ||
445 | list_add(&x->byspi, xfrm_state_byspi+h); | 622 | h = xfrm_src_hash(&x->props.saddr, x->props.family); |
446 | xfrm_state_hold(x); | 623 | hlist_add_head(&x->bysrc, xfrm_state_bysrc+h); |
447 | 624 | ||
448 | if (!mod_timer(&x->timer, jiffies + HZ)) | 625 | if (xfrm_id_proto_match(x->id.proto, IPSEC_PROTO_ANY)) { |
449 | xfrm_state_hold(x); | 626 | h = xfrm_spi_hash(&x->id.daddr, x->id.spi, x->id.proto, |
627 | x->props.family); | ||
450 | 628 | ||
451 | if (x->replay_maxage && | 629 | hlist_add_head(&x->byspi, xfrm_state_byspi+h); |
452 | !mod_timer(&x->rtimer, jiffies + x->replay_maxage)) | 630 | } |
453 | xfrm_state_hold(x); | 631 | |
632 | mod_timer(&x->timer, jiffies + HZ); | ||
633 | if (x->replay_maxage) | ||
634 | mod_timer(&x->rtimer, jiffies + x->replay_maxage); | ||
454 | 635 | ||
455 | wake_up(&km_waitq); | 636 | wake_up(&km_waitq); |
637 | |||
638 | xfrm_state_num++; | ||
639 | |||
640 | if (x->bydst.next != NULL && | ||
641 | (xfrm_state_hmask + 1) < xfrm_state_hashmax && | ||
642 | xfrm_state_num > xfrm_state_hmask) | ||
643 | schedule_work(&xfrm_hash_work); | ||
644 | } | ||
645 | |||
646 | /* xfrm_state_lock is held */ | ||
647 | static void __xfrm_state_bump_genids(struct xfrm_state *xnew) | ||
648 | { | ||
649 | unsigned short family = xnew->props.family; | ||
650 | u32 reqid = xnew->props.reqid; | ||
651 | struct xfrm_state *x; | ||
652 | struct hlist_node *entry; | ||
653 | unsigned int h; | ||
654 | |||
655 | h = xfrm_dst_hash(&xnew->id.daddr, &xnew->props.saddr, reqid, family); | ||
656 | hlist_for_each_entry(x, entry, xfrm_state_bydst+h, bydst) { | ||
657 | if (x->props.family == family && | ||
658 | x->props.reqid == reqid && | ||
659 | !xfrm_addr_cmp(&x->id.daddr, &xnew->id.daddr, family) && | ||
660 | !xfrm_addr_cmp(&x->props.saddr, &xnew->props.saddr, family)) | ||
661 | x->genid = xfrm_state_genid; | ||
662 | } | ||
456 | } | 663 | } |
457 | 664 | ||
458 | void xfrm_state_insert(struct xfrm_state *x) | 665 | void xfrm_state_insert(struct xfrm_state *x) |
459 | { | 666 | { |
460 | spin_lock_bh(&xfrm_state_lock); | 667 | spin_lock_bh(&xfrm_state_lock); |
668 | __xfrm_state_bump_genids(x); | ||
461 | __xfrm_state_insert(x); | 669 | __xfrm_state_insert(x); |
462 | spin_unlock_bh(&xfrm_state_lock); | 670 | spin_unlock_bh(&xfrm_state_lock); |
463 | |||
464 | xfrm_flush_all_bundles(); | ||
465 | } | 671 | } |
466 | EXPORT_SYMBOL(xfrm_state_insert); | 672 | EXPORT_SYMBOL(xfrm_state_insert); |
467 | 673 | ||
674 | /* xfrm_state_lock is held */ | ||
675 | static struct xfrm_state *__find_acq_core(unsigned short family, u8 mode, u32 reqid, u8 proto, xfrm_address_t *daddr, xfrm_address_t *saddr, int create) | ||
676 | { | ||
677 | unsigned int h = xfrm_dst_hash(daddr, saddr, reqid, family); | ||
678 | struct hlist_node *entry; | ||
679 | struct xfrm_state *x; | ||
680 | |||
681 | hlist_for_each_entry(x, entry, xfrm_state_bydst+h, bydst) { | ||
682 | if (x->props.reqid != reqid || | ||
683 | x->props.mode != mode || | ||
684 | x->props.family != family || | ||
685 | x->km.state != XFRM_STATE_ACQ || | ||
686 | x->id.spi != 0) | ||
687 | continue; | ||
688 | |||
689 | switch (family) { | ||
690 | case AF_INET: | ||
691 | if (x->id.daddr.a4 != daddr->a4 || | ||
692 | x->props.saddr.a4 != saddr->a4) | ||
693 | continue; | ||
694 | break; | ||
695 | case AF_INET6: | ||
696 | if (!ipv6_addr_equal((struct in6_addr *)x->id.daddr.a6, | ||
697 | (struct in6_addr *)daddr) || | ||
698 | !ipv6_addr_equal((struct in6_addr *) | ||
699 | x->props.saddr.a6, | ||
700 | (struct in6_addr *)saddr)) | ||
701 | continue; | ||
702 | break; | ||
703 | }; | ||
704 | |||
705 | xfrm_state_hold(x); | ||
706 | return x; | ||
707 | } | ||
708 | |||
709 | if (!create) | ||
710 | return NULL; | ||
711 | |||
712 | x = xfrm_state_alloc(); | ||
713 | if (likely(x)) { | ||
714 | switch (family) { | ||
715 | case AF_INET: | ||
716 | x->sel.daddr.a4 = daddr->a4; | ||
717 | x->sel.saddr.a4 = saddr->a4; | ||
718 | x->sel.prefixlen_d = 32; | ||
719 | x->sel.prefixlen_s = 32; | ||
720 | x->props.saddr.a4 = saddr->a4; | ||
721 | x->id.daddr.a4 = daddr->a4; | ||
722 | break; | ||
723 | |||
724 | case AF_INET6: | ||
725 | ipv6_addr_copy((struct in6_addr *)x->sel.daddr.a6, | ||
726 | (struct in6_addr *)daddr); | ||
727 | ipv6_addr_copy((struct in6_addr *)x->sel.saddr.a6, | ||
728 | (struct in6_addr *)saddr); | ||
729 | x->sel.prefixlen_d = 128; | ||
730 | x->sel.prefixlen_s = 128; | ||
731 | ipv6_addr_copy((struct in6_addr *)x->props.saddr.a6, | ||
732 | (struct in6_addr *)saddr); | ||
733 | ipv6_addr_copy((struct in6_addr *)x->id.daddr.a6, | ||
734 | (struct in6_addr *)daddr); | ||
735 | break; | ||
736 | }; | ||
737 | |||
738 | x->km.state = XFRM_STATE_ACQ; | ||
739 | x->id.proto = proto; | ||
740 | x->props.family = family; | ||
741 | x->props.mode = mode; | ||
742 | x->props.reqid = reqid; | ||
743 | x->lft.hard_add_expires_seconds = XFRM_ACQ_EXPIRES; | ||
744 | xfrm_state_hold(x); | ||
745 | x->timer.expires = jiffies + XFRM_ACQ_EXPIRES*HZ; | ||
746 | add_timer(&x->timer); | ||
747 | hlist_add_head(&x->bydst, xfrm_state_bydst+h); | ||
748 | h = xfrm_src_hash(saddr, family); | ||
749 | hlist_add_head(&x->bysrc, xfrm_state_bysrc+h); | ||
750 | wake_up(&km_waitq); | ||
751 | } | ||
752 | |||
753 | return x; | ||
754 | } | ||
755 | |||
468 | static struct xfrm_state *__xfrm_find_acq_byseq(u32 seq); | 756 | static struct xfrm_state *__xfrm_find_acq_byseq(u32 seq); |
469 | 757 | ||
470 | int xfrm_state_add(struct xfrm_state *x) | 758 | int xfrm_state_add(struct xfrm_state *x) |
471 | { | 759 | { |
472 | struct xfrm_state_afinfo *afinfo; | ||
473 | struct xfrm_state *x1; | 760 | struct xfrm_state *x1; |
474 | int family; | 761 | int family; |
475 | int err; | 762 | int err; |
763 | int use_spi = xfrm_id_proto_match(x->id.proto, IPSEC_PROTO_ANY); | ||
476 | 764 | ||
477 | family = x->props.family; | 765 | family = x->props.family; |
478 | afinfo = xfrm_state_get_afinfo(family); | ||
479 | if (unlikely(afinfo == NULL)) | ||
480 | return -EAFNOSUPPORT; | ||
481 | 766 | ||
482 | spin_lock_bh(&xfrm_state_lock); | 767 | spin_lock_bh(&xfrm_state_lock); |
483 | 768 | ||
484 | x1 = afinfo->state_lookup(&x->id.daddr, x->id.spi, x->id.proto); | 769 | x1 = __xfrm_state_locate(x, use_spi, family); |
485 | if (x1) { | 770 | if (x1) { |
486 | xfrm_state_put(x1); | 771 | xfrm_state_put(x1); |
487 | x1 = NULL; | 772 | x1 = NULL; |
@@ -489,7 +774,7 @@ int xfrm_state_add(struct xfrm_state *x) | |||
489 | goto out; | 774 | goto out; |
490 | } | 775 | } |
491 | 776 | ||
492 | if (x->km.seq) { | 777 | if (use_spi && x->km.seq) { |
493 | x1 = __xfrm_find_acq_byseq(x->km.seq); | 778 | x1 = __xfrm_find_acq_byseq(x->km.seq); |
494 | if (x1 && xfrm_addr_cmp(&x1->id.daddr, &x->id.daddr, family)) { | 779 | if (x1 && xfrm_addr_cmp(&x1->id.daddr, &x->id.daddr, family)) { |
495 | xfrm_state_put(x1); | 780 | xfrm_state_put(x1); |
@@ -497,20 +782,17 @@ int xfrm_state_add(struct xfrm_state *x) | |||
497 | } | 782 | } |
498 | } | 783 | } |
499 | 784 | ||
500 | if (!x1) | 785 | if (use_spi && !x1) |
501 | x1 = afinfo->find_acq( | 786 | x1 = __find_acq_core(family, x->props.mode, x->props.reqid, |
502 | x->props.mode, x->props.reqid, x->id.proto, | 787 | x->id.proto, |
503 | &x->id.daddr, &x->props.saddr, 0); | 788 | &x->id.daddr, &x->props.saddr, 0); |
504 | 789 | ||
790 | __xfrm_state_bump_genids(x); | ||
505 | __xfrm_state_insert(x); | 791 | __xfrm_state_insert(x); |
506 | err = 0; | 792 | err = 0; |
507 | 793 | ||
508 | out: | 794 | out: |
509 | spin_unlock_bh(&xfrm_state_lock); | 795 | spin_unlock_bh(&xfrm_state_lock); |
510 | xfrm_state_put_afinfo(afinfo); | ||
511 | |||
512 | if (!err) | ||
513 | xfrm_flush_all_bundles(); | ||
514 | 796 | ||
515 | if (x1) { | 797 | if (x1) { |
516 | xfrm_state_delete(x1); | 798 | xfrm_state_delete(x1); |
@@ -523,16 +805,12 @@ EXPORT_SYMBOL(xfrm_state_add); | |||
523 | 805 | ||
524 | int xfrm_state_update(struct xfrm_state *x) | 806 | int xfrm_state_update(struct xfrm_state *x) |
525 | { | 807 | { |
526 | struct xfrm_state_afinfo *afinfo; | ||
527 | struct xfrm_state *x1; | 808 | struct xfrm_state *x1; |
528 | int err; | 809 | int err; |
529 | 810 | int use_spi = xfrm_id_proto_match(x->id.proto, IPSEC_PROTO_ANY); | |
530 | afinfo = xfrm_state_get_afinfo(x->props.family); | ||
531 | if (unlikely(afinfo == NULL)) | ||
532 | return -EAFNOSUPPORT; | ||
533 | 811 | ||
534 | spin_lock_bh(&xfrm_state_lock); | 812 | spin_lock_bh(&xfrm_state_lock); |
535 | x1 = afinfo->state_lookup(&x->id.daddr, x->id.spi, x->id.proto); | 813 | x1 = __xfrm_state_locate(x, use_spi, x->props.family); |
536 | 814 | ||
537 | err = -ESRCH; | 815 | err = -ESRCH; |
538 | if (!x1) | 816 | if (!x1) |
@@ -552,7 +830,6 @@ int xfrm_state_update(struct xfrm_state *x) | |||
552 | 830 | ||
553 | out: | 831 | out: |
554 | spin_unlock_bh(&xfrm_state_lock); | 832 | spin_unlock_bh(&xfrm_state_lock); |
555 | xfrm_state_put_afinfo(afinfo); | ||
556 | 833 | ||
557 | if (err) | 834 | if (err) |
558 | return err; | 835 | return err; |
@@ -568,11 +845,15 @@ out: | |||
568 | if (likely(x1->km.state == XFRM_STATE_VALID)) { | 845 | if (likely(x1->km.state == XFRM_STATE_VALID)) { |
569 | if (x->encap && x1->encap) | 846 | if (x->encap && x1->encap) |
570 | memcpy(x1->encap, x->encap, sizeof(*x1->encap)); | 847 | memcpy(x1->encap, x->encap, sizeof(*x1->encap)); |
848 | if (x->coaddr && x1->coaddr) { | ||
849 | memcpy(x1->coaddr, x->coaddr, sizeof(*x1->coaddr)); | ||
850 | } | ||
851 | if (!use_spi && memcmp(&x1->sel, &x->sel, sizeof(x1->sel))) | ||
852 | memcpy(&x1->sel, &x->sel, sizeof(x1->sel)); | ||
571 | memcpy(&x1->lft, &x->lft, sizeof(x1->lft)); | 853 | memcpy(&x1->lft, &x->lft, sizeof(x1->lft)); |
572 | x1->km.dying = 0; | 854 | x1->km.dying = 0; |
573 | 855 | ||
574 | if (!mod_timer(&x1->timer, jiffies + HZ)) | 856 | mod_timer(&x1->timer, jiffies + HZ); |
575 | xfrm_state_hold(x1); | ||
576 | if (x1->curlft.use_time) | 857 | if (x1->curlft.use_time) |
577 | xfrm_state_check_expire(x1); | 858 | xfrm_state_check_expire(x1); |
578 | 859 | ||
@@ -597,8 +878,7 @@ int xfrm_state_check_expire(struct xfrm_state *x) | |||
597 | if (x->curlft.bytes >= x->lft.hard_byte_limit || | 878 | if (x->curlft.bytes >= x->lft.hard_byte_limit || |
598 | x->curlft.packets >= x->lft.hard_packet_limit) { | 879 | x->curlft.packets >= x->lft.hard_packet_limit) { |
599 | x->km.state = XFRM_STATE_EXPIRED; | 880 | x->km.state = XFRM_STATE_EXPIRED; |
600 | if (!mod_timer(&x->timer, jiffies)) | 881 | mod_timer(&x->timer, jiffies); |
601 | xfrm_state_hold(x); | ||
602 | return -EINVAL; | 882 | return -EINVAL; |
603 | } | 883 | } |
604 | 884 | ||
@@ -640,46 +920,93 @@ xfrm_state_lookup(xfrm_address_t *daddr, u32 spi, u8 proto, | |||
640 | unsigned short family) | 920 | unsigned short family) |
641 | { | 921 | { |
642 | struct xfrm_state *x; | 922 | struct xfrm_state *x; |
643 | struct xfrm_state_afinfo *afinfo = xfrm_state_get_afinfo(family); | ||
644 | if (!afinfo) | ||
645 | return NULL; | ||
646 | 923 | ||
647 | spin_lock_bh(&xfrm_state_lock); | 924 | spin_lock_bh(&xfrm_state_lock); |
648 | x = afinfo->state_lookup(daddr, spi, proto); | 925 | x = __xfrm_state_lookup(daddr, spi, proto, family); |
649 | spin_unlock_bh(&xfrm_state_lock); | 926 | spin_unlock_bh(&xfrm_state_lock); |
650 | xfrm_state_put_afinfo(afinfo); | ||
651 | return x; | 927 | return x; |
652 | } | 928 | } |
653 | EXPORT_SYMBOL(xfrm_state_lookup); | 929 | EXPORT_SYMBOL(xfrm_state_lookup); |
654 | 930 | ||
655 | struct xfrm_state * | 931 | struct xfrm_state * |
932 | xfrm_state_lookup_byaddr(xfrm_address_t *daddr, xfrm_address_t *saddr, | ||
933 | u8 proto, unsigned short family) | ||
934 | { | ||
935 | struct xfrm_state *x; | ||
936 | |||
937 | spin_lock_bh(&xfrm_state_lock); | ||
938 | x = __xfrm_state_lookup_byaddr(daddr, saddr, proto, family); | ||
939 | spin_unlock_bh(&xfrm_state_lock); | ||
940 | return x; | ||
941 | } | ||
942 | EXPORT_SYMBOL(xfrm_state_lookup_byaddr); | ||
943 | |||
944 | struct xfrm_state * | ||
656 | xfrm_find_acq(u8 mode, u32 reqid, u8 proto, | 945 | xfrm_find_acq(u8 mode, u32 reqid, u8 proto, |
657 | xfrm_address_t *daddr, xfrm_address_t *saddr, | 946 | xfrm_address_t *daddr, xfrm_address_t *saddr, |
658 | int create, unsigned short family) | 947 | int create, unsigned short family) |
659 | { | 948 | { |
660 | struct xfrm_state *x; | 949 | struct xfrm_state *x; |
950 | |||
951 | spin_lock_bh(&xfrm_state_lock); | ||
952 | x = __find_acq_core(family, mode, reqid, proto, daddr, saddr, create); | ||
953 | spin_unlock_bh(&xfrm_state_lock); | ||
954 | |||
955 | return x; | ||
956 | } | ||
957 | EXPORT_SYMBOL(xfrm_find_acq); | ||
958 | |||
959 | #ifdef CONFIG_XFRM_SUB_POLICY | ||
960 | int | ||
961 | xfrm_tmpl_sort(struct xfrm_tmpl **dst, struct xfrm_tmpl **src, int n, | ||
962 | unsigned short family) | ||
963 | { | ||
964 | int err = 0; | ||
661 | struct xfrm_state_afinfo *afinfo = xfrm_state_get_afinfo(family); | 965 | struct xfrm_state_afinfo *afinfo = xfrm_state_get_afinfo(family); |
662 | if (!afinfo) | 966 | if (!afinfo) |
663 | return NULL; | 967 | return -EAFNOSUPPORT; |
664 | 968 | ||
665 | spin_lock_bh(&xfrm_state_lock); | 969 | spin_lock_bh(&xfrm_state_lock); |
666 | x = afinfo->find_acq(mode, reqid, proto, daddr, saddr, create); | 970 | if (afinfo->tmpl_sort) |
971 | err = afinfo->tmpl_sort(dst, src, n); | ||
667 | spin_unlock_bh(&xfrm_state_lock); | 972 | spin_unlock_bh(&xfrm_state_lock); |
668 | xfrm_state_put_afinfo(afinfo); | 973 | xfrm_state_put_afinfo(afinfo); |
669 | return x; | 974 | return err; |
670 | } | 975 | } |
671 | EXPORT_SYMBOL(xfrm_find_acq); | 976 | EXPORT_SYMBOL(xfrm_tmpl_sort); |
977 | |||
978 | int | ||
979 | xfrm_state_sort(struct xfrm_state **dst, struct xfrm_state **src, int n, | ||
980 | unsigned short family) | ||
981 | { | ||
982 | int err = 0; | ||
983 | struct xfrm_state_afinfo *afinfo = xfrm_state_get_afinfo(family); | ||
984 | if (!afinfo) | ||
985 | return -EAFNOSUPPORT; | ||
986 | |||
987 | spin_lock_bh(&xfrm_state_lock); | ||
988 | if (afinfo->state_sort) | ||
989 | err = afinfo->state_sort(dst, src, n); | ||
990 | spin_unlock_bh(&xfrm_state_lock); | ||
991 | xfrm_state_put_afinfo(afinfo); | ||
992 | return err; | ||
993 | } | ||
994 | EXPORT_SYMBOL(xfrm_state_sort); | ||
995 | #endif | ||
672 | 996 | ||
673 | /* Silly enough, but I'm lazy to build resolution list */ | 997 | /* Silly enough, but I'm lazy to build resolution list */ |
674 | 998 | ||
675 | static struct xfrm_state *__xfrm_find_acq_byseq(u32 seq) | 999 | static struct xfrm_state *__xfrm_find_acq_byseq(u32 seq) |
676 | { | 1000 | { |
677 | int i; | 1001 | int i; |
678 | struct xfrm_state *x; | ||
679 | 1002 | ||
680 | for (i = 0; i < XFRM_DST_HSIZE; i++) { | 1003 | for (i = 0; i <= xfrm_state_hmask; i++) { |
681 | list_for_each_entry(x, xfrm_state_bydst+i, bydst) { | 1004 | struct hlist_node *entry; |
682 | if (x->km.seq == seq && x->km.state == XFRM_STATE_ACQ) { | 1005 | struct xfrm_state *x; |
1006 | |||
1007 | hlist_for_each_entry(x, entry, xfrm_state_bydst+i, bydst) { | ||
1008 | if (x->km.seq == seq && | ||
1009 | x->km.state == XFRM_STATE_ACQ) { | ||
683 | xfrm_state_hold(x); | 1010 | xfrm_state_hold(x); |
684 | return x; | 1011 | return x; |
685 | } | 1012 | } |
@@ -715,7 +1042,7 @@ EXPORT_SYMBOL(xfrm_get_acqseq); | |||
715 | void | 1042 | void |
716 | xfrm_alloc_spi(struct xfrm_state *x, u32 minspi, u32 maxspi) | 1043 | xfrm_alloc_spi(struct xfrm_state *x, u32 minspi, u32 maxspi) |
717 | { | 1044 | { |
718 | u32 h; | 1045 | unsigned int h; |
719 | struct xfrm_state *x0; | 1046 | struct xfrm_state *x0; |
720 | 1047 | ||
721 | if (x->id.spi) | 1048 | if (x->id.spi) |
@@ -745,8 +1072,7 @@ xfrm_alloc_spi(struct xfrm_state *x, u32 minspi, u32 maxspi) | |||
745 | if (x->id.spi) { | 1072 | if (x->id.spi) { |
746 | spin_lock_bh(&xfrm_state_lock); | 1073 | spin_lock_bh(&xfrm_state_lock); |
747 | h = xfrm_spi_hash(&x->id.daddr, x->id.spi, x->id.proto, x->props.family); | 1074 | h = xfrm_spi_hash(&x->id.daddr, x->id.spi, x->id.proto, x->props.family); |
748 | list_add(&x->byspi, xfrm_state_byspi+h); | 1075 | hlist_add_head(&x->byspi, xfrm_state_byspi+h); |
749 | xfrm_state_hold(x); | ||
750 | spin_unlock_bh(&xfrm_state_lock); | 1076 | spin_unlock_bh(&xfrm_state_lock); |
751 | wake_up(&km_waitq); | 1077 | wake_up(&km_waitq); |
752 | } | 1078 | } |
@@ -758,13 +1084,14 @@ int xfrm_state_walk(u8 proto, int (*func)(struct xfrm_state *, int, void*), | |||
758 | { | 1084 | { |
759 | int i; | 1085 | int i; |
760 | struct xfrm_state *x; | 1086 | struct xfrm_state *x; |
1087 | struct hlist_node *entry; | ||
761 | int count = 0; | 1088 | int count = 0; |
762 | int err = 0; | 1089 | int err = 0; |
763 | 1090 | ||
764 | spin_lock_bh(&xfrm_state_lock); | 1091 | spin_lock_bh(&xfrm_state_lock); |
765 | for (i = 0; i < XFRM_DST_HSIZE; i++) { | 1092 | for (i = 0; i <= xfrm_state_hmask; i++) { |
766 | list_for_each_entry(x, xfrm_state_bydst+i, bydst) { | 1093 | hlist_for_each_entry(x, entry, xfrm_state_bydst+i, bydst) { |
767 | if (proto == IPSEC_PROTO_ANY || x->id.proto == proto) | 1094 | if (xfrm_id_proto_match(x->id.proto, proto)) |
768 | count++; | 1095 | count++; |
769 | } | 1096 | } |
770 | } | 1097 | } |
@@ -773,9 +1100,9 @@ int xfrm_state_walk(u8 proto, int (*func)(struct xfrm_state *, int, void*), | |||
773 | goto out; | 1100 | goto out; |
774 | } | 1101 | } |
775 | 1102 | ||
776 | for (i = 0; i < XFRM_DST_HSIZE; i++) { | 1103 | for (i = 0; i <= xfrm_state_hmask; i++) { |
777 | list_for_each_entry(x, xfrm_state_bydst+i, bydst) { | 1104 | hlist_for_each_entry(x, entry, xfrm_state_bydst+i, bydst) { |
778 | if (proto != IPSEC_PROTO_ANY && x->id.proto != proto) | 1105 | if (!xfrm_id_proto_match(x->id.proto, proto)) |
779 | continue; | 1106 | continue; |
780 | err = func(x, --count, data); | 1107 | err = func(x, --count, data); |
781 | if (err) | 1108 | if (err) |
@@ -832,10 +1159,8 @@ void xfrm_replay_notify(struct xfrm_state *x, int event) | |||
832 | km_state_notify(x, &c); | 1159 | km_state_notify(x, &c); |
833 | 1160 | ||
834 | if (x->replay_maxage && | 1161 | if (x->replay_maxage && |
835 | !mod_timer(&x->rtimer, jiffies + x->replay_maxage)) { | 1162 | !mod_timer(&x->rtimer, jiffies + x->replay_maxage)) |
836 | xfrm_state_hold(x); | ||
837 | x->xflags &= ~XFRM_TIME_DEFER; | 1163 | x->xflags &= ~XFRM_TIME_DEFER; |
838 | } | ||
839 | } | 1164 | } |
840 | EXPORT_SYMBOL(xfrm_replay_notify); | 1165 | EXPORT_SYMBOL(xfrm_replay_notify); |
841 | 1166 | ||
@@ -853,7 +1178,6 @@ static void xfrm_replay_timer_handler(unsigned long data) | |||
853 | } | 1178 | } |
854 | 1179 | ||
855 | spin_unlock(&x->lock); | 1180 | spin_unlock(&x->lock); |
856 | xfrm_state_put(x); | ||
857 | } | 1181 | } |
858 | 1182 | ||
859 | int xfrm_replay_check(struct xfrm_state *x, u32 seq) | 1183 | int xfrm_replay_check(struct xfrm_state *x, u32 seq) |
@@ -997,6 +1321,25 @@ void km_policy_expired(struct xfrm_policy *pol, int dir, int hard, u32 pid) | |||
997 | } | 1321 | } |
998 | EXPORT_SYMBOL(km_policy_expired); | 1322 | EXPORT_SYMBOL(km_policy_expired); |
999 | 1323 | ||
1324 | int km_report(u8 proto, struct xfrm_selector *sel, xfrm_address_t *addr) | ||
1325 | { | ||
1326 | int err = -EINVAL; | ||
1327 | int ret; | ||
1328 | struct xfrm_mgr *km; | ||
1329 | |||
1330 | read_lock(&xfrm_km_lock); | ||
1331 | list_for_each_entry(km, &xfrm_km_list, list) { | ||
1332 | if (km->report) { | ||
1333 | ret = km->report(proto, sel, addr); | ||
1334 | if (!ret) | ||
1335 | err = ret; | ||
1336 | } | ||
1337 | } | ||
1338 | read_unlock(&xfrm_km_lock); | ||
1339 | return err; | ||
1340 | } | ||
1341 | EXPORT_SYMBOL(km_report); | ||
1342 | |||
1000 | int xfrm_user_policy(struct sock *sk, int optname, u8 __user *optval, int optlen) | 1343 | int xfrm_user_policy(struct sock *sk, int optname, u8 __user *optval, int optlen) |
1001 | { | 1344 | { |
1002 | int err; | 1345 | int err; |
@@ -1018,7 +1361,7 @@ int xfrm_user_policy(struct sock *sk, int optname, u8 __user *optval, int optlen | |||
1018 | err = -EINVAL; | 1361 | err = -EINVAL; |
1019 | read_lock(&xfrm_km_lock); | 1362 | read_lock(&xfrm_km_lock); |
1020 | list_for_each_entry(km, &xfrm_km_list, list) { | 1363 | list_for_each_entry(km, &xfrm_km_list, list) { |
1021 | pol = km->compile_policy(sk->sk_family, optname, data, | 1364 | pol = km->compile_policy(sk, optname, data, |
1022 | optlen, &err); | 1365 | optlen, &err); |
1023 | if (err >= 0) | 1366 | if (err >= 0) |
1024 | break; | 1367 | break; |
@@ -1065,11 +1408,8 @@ int xfrm_state_register_afinfo(struct xfrm_state_afinfo *afinfo) | |||
1065 | write_lock_bh(&xfrm_state_afinfo_lock); | 1408 | write_lock_bh(&xfrm_state_afinfo_lock); |
1066 | if (unlikely(xfrm_state_afinfo[afinfo->family] != NULL)) | 1409 | if (unlikely(xfrm_state_afinfo[afinfo->family] != NULL)) |
1067 | err = -ENOBUFS; | 1410 | err = -ENOBUFS; |
1068 | else { | 1411 | else |
1069 | afinfo->state_bydst = xfrm_state_bydst; | ||
1070 | afinfo->state_byspi = xfrm_state_byspi; | ||
1071 | xfrm_state_afinfo[afinfo->family] = afinfo; | 1412 | xfrm_state_afinfo[afinfo->family] = afinfo; |
1072 | } | ||
1073 | write_unlock_bh(&xfrm_state_afinfo_lock); | 1413 | write_unlock_bh(&xfrm_state_afinfo_lock); |
1074 | return err; | 1414 | return err; |
1075 | } | 1415 | } |
@@ -1086,11 +1426,8 @@ int xfrm_state_unregister_afinfo(struct xfrm_state_afinfo *afinfo) | |||
1086 | if (likely(xfrm_state_afinfo[afinfo->family] != NULL)) { | 1426 | if (likely(xfrm_state_afinfo[afinfo->family] != NULL)) { |
1087 | if (unlikely(xfrm_state_afinfo[afinfo->family] != afinfo)) | 1427 | if (unlikely(xfrm_state_afinfo[afinfo->family] != afinfo)) |
1088 | err = -EINVAL; | 1428 | err = -EINVAL; |
1089 | else { | 1429 | else |
1090 | xfrm_state_afinfo[afinfo->family] = NULL; | 1430 | xfrm_state_afinfo[afinfo->family] = NULL; |
1091 | afinfo->state_byspi = NULL; | ||
1092 | afinfo->state_bydst = NULL; | ||
1093 | } | ||
1094 | } | 1431 | } |
1095 | write_unlock_bh(&xfrm_state_afinfo_lock); | 1432 | write_unlock_bh(&xfrm_state_afinfo_lock); |
1096 | return err; | 1433 | return err; |
@@ -1206,12 +1543,17 @@ EXPORT_SYMBOL(xfrm_init_state); | |||
1206 | 1543 | ||
1207 | void __init xfrm_state_init(void) | 1544 | void __init xfrm_state_init(void) |
1208 | { | 1545 | { |
1209 | int i; | 1546 | unsigned int sz; |
1547 | |||
1548 | sz = sizeof(struct hlist_head) * 8; | ||
1549 | |||
1550 | xfrm_state_bydst = xfrm_hash_alloc(sz); | ||
1551 | xfrm_state_bysrc = xfrm_hash_alloc(sz); | ||
1552 | xfrm_state_byspi = xfrm_hash_alloc(sz); | ||
1553 | if (!xfrm_state_bydst || !xfrm_state_bysrc || !xfrm_state_byspi) | ||
1554 | panic("XFRM: Cannot allocate bydst/bysrc/byspi hashes."); | ||
1555 | xfrm_state_hmask = ((sz / sizeof(struct hlist_head)) - 1); | ||
1210 | 1556 | ||
1211 | for (i=0; i<XFRM_DST_HSIZE; i++) { | ||
1212 | INIT_LIST_HEAD(&xfrm_state_bydst[i]); | ||
1213 | INIT_LIST_HEAD(&xfrm_state_byspi[i]); | ||
1214 | } | ||
1215 | INIT_WORK(&xfrm_state_gc_work, xfrm_state_gc_task, NULL); | 1557 | INIT_WORK(&xfrm_state_gc_work, xfrm_state_gc_task, NULL); |
1216 | } | 1558 | } |
1217 | 1559 | ||
diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c index fa79ddc4239e..c59a78d2923a 100644 --- a/net/xfrm/xfrm_user.c +++ b/net/xfrm/xfrm_user.c | |||
@@ -28,6 +28,9 @@ | |||
28 | #include <net/xfrm.h> | 28 | #include <net/xfrm.h> |
29 | #include <net/netlink.h> | 29 | #include <net/netlink.h> |
30 | #include <asm/uaccess.h> | 30 | #include <asm/uaccess.h> |
31 | #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) | ||
32 | #include <linux/in6.h> | ||
33 | #endif | ||
31 | 34 | ||
32 | static int verify_one_alg(struct rtattr **xfrma, enum xfrm_attr_type_t type) | 35 | static int verify_one_alg(struct rtattr **xfrma, enum xfrm_attr_type_t type) |
33 | { | 36 | { |
@@ -87,6 +90,22 @@ static int verify_encap_tmpl(struct rtattr **xfrma) | |||
87 | return 0; | 90 | return 0; |
88 | } | 91 | } |
89 | 92 | ||
93 | static int verify_one_addr(struct rtattr **xfrma, enum xfrm_attr_type_t type, | ||
94 | xfrm_address_t **addrp) | ||
95 | { | ||
96 | struct rtattr *rt = xfrma[type - 1]; | ||
97 | |||
98 | if (!rt) | ||
99 | return 0; | ||
100 | |||
101 | if ((rt->rta_len - sizeof(*rt)) < sizeof(**addrp)) | ||
102 | return -EINVAL; | ||
103 | |||
104 | if (addrp) | ||
105 | *addrp = RTA_DATA(rt); | ||
106 | |||
107 | return 0; | ||
108 | } | ||
90 | 109 | ||
91 | static inline int verify_sec_ctx_len(struct rtattr **xfrma) | 110 | static inline int verify_sec_ctx_len(struct rtattr **xfrma) |
92 | { | 111 | { |
@@ -157,6 +176,19 @@ static int verify_newsa_info(struct xfrm_usersa_info *p, | |||
157 | goto out; | 176 | goto out; |
158 | break; | 177 | break; |
159 | 178 | ||
179 | #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) | ||
180 | case IPPROTO_DSTOPTS: | ||
181 | case IPPROTO_ROUTING: | ||
182 | if (xfrma[XFRMA_ALG_COMP-1] || | ||
183 | xfrma[XFRMA_ALG_AUTH-1] || | ||
184 | xfrma[XFRMA_ALG_CRYPT-1] || | ||
185 | xfrma[XFRMA_ENCAP-1] || | ||
186 | xfrma[XFRMA_SEC_CTX-1] || | ||
187 | !xfrma[XFRMA_COADDR-1]) | ||
188 | goto out; | ||
189 | break; | ||
190 | #endif | ||
191 | |||
160 | default: | 192 | default: |
161 | goto out; | 193 | goto out; |
162 | }; | 194 | }; |
@@ -171,11 +203,14 @@ static int verify_newsa_info(struct xfrm_usersa_info *p, | |||
171 | goto out; | 203 | goto out; |
172 | if ((err = verify_sec_ctx_len(xfrma))) | 204 | if ((err = verify_sec_ctx_len(xfrma))) |
173 | goto out; | 205 | goto out; |
206 | if ((err = verify_one_addr(xfrma, XFRMA_COADDR, NULL))) | ||
207 | goto out; | ||
174 | 208 | ||
175 | err = -EINVAL; | 209 | err = -EINVAL; |
176 | switch (p->mode) { | 210 | switch (p->mode) { |
177 | case 0: | 211 | case XFRM_MODE_TRANSPORT: |
178 | case 1: | 212 | case XFRM_MODE_TUNNEL: |
213 | case XFRM_MODE_ROUTEOPTIMIZATION: | ||
179 | break; | 214 | break; |
180 | 215 | ||
181 | default: | 216 | default: |
@@ -260,6 +295,24 @@ static int attach_sec_ctx(struct xfrm_state *x, struct rtattr *u_arg) | |||
260 | return security_xfrm_state_alloc(x, uctx); | 295 | return security_xfrm_state_alloc(x, uctx); |
261 | } | 296 | } |
262 | 297 | ||
298 | static int attach_one_addr(xfrm_address_t **addrpp, struct rtattr *u_arg) | ||
299 | { | ||
300 | struct rtattr *rta = u_arg; | ||
301 | xfrm_address_t *p, *uaddrp; | ||
302 | |||
303 | if (!rta) | ||
304 | return 0; | ||
305 | |||
306 | uaddrp = RTA_DATA(rta); | ||
307 | p = kmalloc(sizeof(*p), GFP_KERNEL); | ||
308 | if (!p) | ||
309 | return -ENOMEM; | ||
310 | |||
311 | memcpy(p, uaddrp, sizeof(*p)); | ||
312 | *addrpp = p; | ||
313 | return 0; | ||
314 | } | ||
315 | |||
263 | static void copy_from_user_state(struct xfrm_state *x, struct xfrm_usersa_info *p) | 316 | static void copy_from_user_state(struct xfrm_state *x, struct xfrm_usersa_info *p) |
264 | { | 317 | { |
265 | memcpy(&x->id, &p->id, sizeof(x->id)); | 318 | memcpy(&x->id, &p->id, sizeof(x->id)); |
@@ -349,7 +402,8 @@ static struct xfrm_state *xfrm_state_construct(struct xfrm_usersa_info *p, | |||
349 | goto error; | 402 | goto error; |
350 | if ((err = attach_encap_tmpl(&x->encap, xfrma[XFRMA_ENCAP-1]))) | 403 | if ((err = attach_encap_tmpl(&x->encap, xfrma[XFRMA_ENCAP-1]))) |
351 | goto error; | 404 | goto error; |
352 | 405 | if ((err = attach_one_addr(&x->coaddr, xfrma[XFRMA_COADDR-1]))) | |
406 | goto error; | ||
353 | err = xfrm_init_state(x); | 407 | err = xfrm_init_state(x); |
354 | if (err) | 408 | if (err) |
355 | goto error; | 409 | goto error; |
@@ -418,16 +472,48 @@ out: | |||
418 | return err; | 472 | return err; |
419 | } | 473 | } |
420 | 474 | ||
475 | static struct xfrm_state *xfrm_user_state_lookup(struct xfrm_usersa_id *p, | ||
476 | struct rtattr **xfrma, | ||
477 | int *errp) | ||
478 | { | ||
479 | struct xfrm_state *x = NULL; | ||
480 | int err; | ||
481 | |||
482 | if (xfrm_id_proto_match(p->proto, IPSEC_PROTO_ANY)) { | ||
483 | err = -ESRCH; | ||
484 | x = xfrm_state_lookup(&p->daddr, p->spi, p->proto, p->family); | ||
485 | } else { | ||
486 | xfrm_address_t *saddr = NULL; | ||
487 | |||
488 | err = verify_one_addr(xfrma, XFRMA_SRCADDR, &saddr); | ||
489 | if (err) | ||
490 | goto out; | ||
491 | |||
492 | if (!saddr) { | ||
493 | err = -EINVAL; | ||
494 | goto out; | ||
495 | } | ||
496 | |||
497 | x = xfrm_state_lookup_byaddr(&p->daddr, saddr, p->proto, | ||
498 | p->family); | ||
499 | } | ||
500 | |||
501 | out: | ||
502 | if (!x && errp) | ||
503 | *errp = err; | ||
504 | return x; | ||
505 | } | ||
506 | |||
421 | static int xfrm_del_sa(struct sk_buff *skb, struct nlmsghdr *nlh, void **xfrma) | 507 | static int xfrm_del_sa(struct sk_buff *skb, struct nlmsghdr *nlh, void **xfrma) |
422 | { | 508 | { |
423 | struct xfrm_state *x; | 509 | struct xfrm_state *x; |
424 | int err; | 510 | int err = -ESRCH; |
425 | struct km_event c; | 511 | struct km_event c; |
426 | struct xfrm_usersa_id *p = NLMSG_DATA(nlh); | 512 | struct xfrm_usersa_id *p = NLMSG_DATA(nlh); |
427 | 513 | ||
428 | x = xfrm_state_lookup(&p->daddr, p->spi, p->proto, p->family); | 514 | x = xfrm_user_state_lookup(p, (struct rtattr **)xfrma, &err); |
429 | if (x == NULL) | 515 | if (x == NULL) |
430 | return -ESRCH; | 516 | return err; |
431 | 517 | ||
432 | if ((err = security_xfrm_state_delete(x)) != 0) | 518 | if ((err = security_xfrm_state_delete(x)) != 0) |
433 | goto out; | 519 | goto out; |
@@ -521,6 +607,13 @@ static int dump_one_state(struct xfrm_state *x, int count, void *ptr) | |||
521 | uctx->ctx_len = x->security->ctx_len; | 607 | uctx->ctx_len = x->security->ctx_len; |
522 | memcpy(uctx + 1, x->security->ctx_str, x->security->ctx_len); | 608 | memcpy(uctx + 1, x->security->ctx_str, x->security->ctx_len); |
523 | } | 609 | } |
610 | |||
611 | if (x->coaddr) | ||
612 | RTA_PUT(skb, XFRMA_COADDR, sizeof(*x->coaddr), x->coaddr); | ||
613 | |||
614 | if (x->lastused) | ||
615 | RTA_PUT(skb, XFRMA_LASTUSED, sizeof(x->lastused), &x->lastused); | ||
616 | |||
524 | nlh->nlmsg_len = skb->tail - b; | 617 | nlh->nlmsg_len = skb->tail - b; |
525 | out: | 618 | out: |
526 | sp->this_idx++; | 619 | sp->this_idx++; |
@@ -542,7 +635,7 @@ static int xfrm_dump_sa(struct sk_buff *skb, struct netlink_callback *cb) | |||
542 | info.nlmsg_flags = NLM_F_MULTI; | 635 | info.nlmsg_flags = NLM_F_MULTI; |
543 | info.this_idx = 0; | 636 | info.this_idx = 0; |
544 | info.start_idx = cb->args[0]; | 637 | info.start_idx = cb->args[0]; |
545 | (void) xfrm_state_walk(IPSEC_PROTO_ANY, dump_one_state, &info); | 638 | (void) xfrm_state_walk(0, dump_one_state, &info); |
546 | cb->args[0] = info.this_idx; | 639 | cb->args[0] = info.this_idx; |
547 | 640 | ||
548 | return skb->len; | 641 | return skb->len; |
@@ -578,10 +671,9 @@ static int xfrm_get_sa(struct sk_buff *skb, struct nlmsghdr *nlh, void **xfrma) | |||
578 | struct xfrm_usersa_id *p = NLMSG_DATA(nlh); | 671 | struct xfrm_usersa_id *p = NLMSG_DATA(nlh); |
579 | struct xfrm_state *x; | 672 | struct xfrm_state *x; |
580 | struct sk_buff *resp_skb; | 673 | struct sk_buff *resp_skb; |
581 | int err; | 674 | int err = -ESRCH; |
582 | 675 | ||
583 | x = xfrm_state_lookup(&p->daddr, p->spi, p->proto, p->family); | 676 | x = xfrm_user_state_lookup(p, (struct rtattr **)xfrma, &err); |
584 | err = -ESRCH; | ||
585 | if (x == NULL) | 677 | if (x == NULL) |
586 | goto out_noput; | 678 | goto out_noput; |
587 | 679 | ||
@@ -694,6 +786,22 @@ static int verify_policy_dir(__u8 dir) | |||
694 | return 0; | 786 | return 0; |
695 | } | 787 | } |
696 | 788 | ||
789 | static int verify_policy_type(__u8 type) | ||
790 | { | ||
791 | switch (type) { | ||
792 | case XFRM_POLICY_TYPE_MAIN: | ||
793 | #ifdef CONFIG_XFRM_SUB_POLICY | ||
794 | case XFRM_POLICY_TYPE_SUB: | ||
795 | #endif | ||
796 | break; | ||
797 | |||
798 | default: | ||
799 | return -EINVAL; | ||
800 | }; | ||
801 | |||
802 | return 0; | ||
803 | } | ||
804 | |||
697 | static int verify_newpolicy_info(struct xfrm_userpolicy_info *p) | 805 | static int verify_newpolicy_info(struct xfrm_userpolicy_info *p) |
698 | { | 806 | { |
699 | switch (p->share) { | 807 | switch (p->share) { |
@@ -787,6 +895,29 @@ static int copy_from_user_tmpl(struct xfrm_policy *pol, struct rtattr **xfrma) | |||
787 | return 0; | 895 | return 0; |
788 | } | 896 | } |
789 | 897 | ||
898 | static int copy_from_user_policy_type(u8 *tp, struct rtattr **xfrma) | ||
899 | { | ||
900 | struct rtattr *rt = xfrma[XFRMA_POLICY_TYPE-1]; | ||
901 | struct xfrm_userpolicy_type *upt; | ||
902 | __u8 type = XFRM_POLICY_TYPE_MAIN; | ||
903 | int err; | ||
904 | |||
905 | if (rt) { | ||
906 | if (rt->rta_len < sizeof(*upt)) | ||
907 | return -EINVAL; | ||
908 | |||
909 | upt = RTA_DATA(rt); | ||
910 | type = upt->type; | ||
911 | } | ||
912 | |||
913 | err = verify_policy_type(type); | ||
914 | if (err) | ||
915 | return err; | ||
916 | |||
917 | *tp = type; | ||
918 | return 0; | ||
919 | } | ||
920 | |||
790 | static void copy_from_user_policy(struct xfrm_policy *xp, struct xfrm_userpolicy_info *p) | 921 | static void copy_from_user_policy(struct xfrm_policy *xp, struct xfrm_userpolicy_info *p) |
791 | { | 922 | { |
792 | xp->priority = p->priority; | 923 | xp->priority = p->priority; |
@@ -825,16 +956,20 @@ static struct xfrm_policy *xfrm_policy_construct(struct xfrm_userpolicy_info *p, | |||
825 | 956 | ||
826 | copy_from_user_policy(xp, p); | 957 | copy_from_user_policy(xp, p); |
827 | 958 | ||
959 | err = copy_from_user_policy_type(&xp->type, xfrma); | ||
960 | if (err) | ||
961 | goto error; | ||
962 | |||
828 | if (!(err = copy_from_user_tmpl(xp, xfrma))) | 963 | if (!(err = copy_from_user_tmpl(xp, xfrma))) |
829 | err = copy_from_user_sec_ctx(xp, xfrma); | 964 | err = copy_from_user_sec_ctx(xp, xfrma); |
830 | 965 | if (err) | |
831 | if (err) { | 966 | goto error; |
832 | *errp = err; | ||
833 | kfree(xp); | ||
834 | xp = NULL; | ||
835 | } | ||
836 | 967 | ||
837 | return xp; | 968 | return xp; |
969 | error: | ||
970 | *errp = err; | ||
971 | kfree(xp); | ||
972 | return NULL; | ||
838 | } | 973 | } |
839 | 974 | ||
840 | static int xfrm_add_policy(struct sk_buff *skb, struct nlmsghdr *nlh, void **xfrma) | 975 | static int xfrm_add_policy(struct sk_buff *skb, struct nlmsghdr *nlh, void **xfrma) |
@@ -911,27 +1046,63 @@ rtattr_failure: | |||
911 | return -1; | 1046 | return -1; |
912 | } | 1047 | } |
913 | 1048 | ||
914 | static int copy_to_user_sec_ctx(struct xfrm_policy *xp, struct sk_buff *skb) | 1049 | static int copy_sec_ctx(struct xfrm_sec_ctx *s, struct sk_buff *skb) |
915 | { | 1050 | { |
916 | if (xp->security) { | 1051 | int ctx_size = sizeof(struct xfrm_sec_ctx) + s->ctx_len; |
917 | int ctx_size = sizeof(struct xfrm_sec_ctx) + | 1052 | struct rtattr *rt = __RTA_PUT(skb, XFRMA_SEC_CTX, ctx_size); |
918 | xp->security->ctx_len; | 1053 | struct xfrm_user_sec_ctx *uctx = RTA_DATA(rt); |
919 | struct rtattr *rt = __RTA_PUT(skb, XFRMA_SEC_CTX, ctx_size); | 1054 | |
920 | struct xfrm_user_sec_ctx *uctx = RTA_DATA(rt); | 1055 | uctx->exttype = XFRMA_SEC_CTX; |
1056 | uctx->len = ctx_size; | ||
1057 | uctx->ctx_doi = s->ctx_doi; | ||
1058 | uctx->ctx_alg = s->ctx_alg; | ||
1059 | uctx->ctx_len = s->ctx_len; | ||
1060 | memcpy(uctx + 1, s->ctx_str, s->ctx_len); | ||
1061 | return 0; | ||
921 | 1062 | ||
922 | uctx->exttype = XFRMA_SEC_CTX; | 1063 | rtattr_failure: |
923 | uctx->len = ctx_size; | 1064 | return -1; |
924 | uctx->ctx_doi = xp->security->ctx_doi; | 1065 | } |
925 | uctx->ctx_alg = xp->security->ctx_alg; | 1066 | |
926 | uctx->ctx_len = xp->security->ctx_len; | 1067 | static inline int copy_to_user_state_sec_ctx(struct xfrm_state *x, struct sk_buff *skb) |
927 | memcpy(uctx + 1, xp->security->ctx_str, xp->security->ctx_len); | 1068 | { |
1069 | if (x->security) { | ||
1070 | return copy_sec_ctx(x->security, skb); | ||
928 | } | 1071 | } |
929 | return 0; | 1072 | return 0; |
1073 | } | ||
930 | 1074 | ||
931 | rtattr_failure: | 1075 | static inline int copy_to_user_sec_ctx(struct xfrm_policy *xp, struct sk_buff *skb) |
1076 | { | ||
1077 | if (xp->security) { | ||
1078 | return copy_sec_ctx(xp->security, skb); | ||
1079 | } | ||
1080 | return 0; | ||
1081 | } | ||
1082 | |||
1083 | #ifdef CONFIG_XFRM_SUB_POLICY | ||
1084 | static int copy_to_user_policy_type(struct xfrm_policy *xp, struct sk_buff *skb) | ||
1085 | { | ||
1086 | struct xfrm_userpolicy_type upt; | ||
1087 | |||
1088 | memset(&upt, 0, sizeof(upt)); | ||
1089 | upt.type = xp->type; | ||
1090 | |||
1091 | RTA_PUT(skb, XFRMA_POLICY_TYPE, sizeof(upt), &upt); | ||
1092 | |||
1093 | return 0; | ||
1094 | |||
1095 | rtattr_failure: | ||
932 | return -1; | 1096 | return -1; |
933 | } | 1097 | } |
934 | 1098 | ||
1099 | #else | ||
1100 | static inline int copy_to_user_policy_type(struct xfrm_policy *xp, struct sk_buff *skb) | ||
1101 | { | ||
1102 | return 0; | ||
1103 | } | ||
1104 | #endif | ||
1105 | |||
935 | static int dump_one_policy(struct xfrm_policy *xp, int dir, int count, void *ptr) | 1106 | static int dump_one_policy(struct xfrm_policy *xp, int dir, int count, void *ptr) |
936 | { | 1107 | { |
937 | struct xfrm_dump_info *sp = ptr; | 1108 | struct xfrm_dump_info *sp = ptr; |
@@ -955,6 +1126,8 @@ static int dump_one_policy(struct xfrm_policy *xp, int dir, int count, void *ptr | |||
955 | goto nlmsg_failure; | 1126 | goto nlmsg_failure; |
956 | if (copy_to_user_sec_ctx(xp, skb)) | 1127 | if (copy_to_user_sec_ctx(xp, skb)) |
957 | goto nlmsg_failure; | 1128 | goto nlmsg_failure; |
1129 | if (copy_to_user_policy_type(xp, skb) < 0) | ||
1130 | goto nlmsg_failure; | ||
958 | 1131 | ||
959 | nlh->nlmsg_len = skb->tail - b; | 1132 | nlh->nlmsg_len = skb->tail - b; |
960 | out: | 1133 | out: |
@@ -976,7 +1149,10 @@ static int xfrm_dump_policy(struct sk_buff *skb, struct netlink_callback *cb) | |||
976 | info.nlmsg_flags = NLM_F_MULTI; | 1149 | info.nlmsg_flags = NLM_F_MULTI; |
977 | info.this_idx = 0; | 1150 | info.this_idx = 0; |
978 | info.start_idx = cb->args[0]; | 1151 | info.start_idx = cb->args[0]; |
979 | (void) xfrm_policy_walk(dump_one_policy, &info); | 1152 | (void) xfrm_policy_walk(XFRM_POLICY_TYPE_MAIN, dump_one_policy, &info); |
1153 | #ifdef CONFIG_XFRM_SUB_POLICY | ||
1154 | (void) xfrm_policy_walk(XFRM_POLICY_TYPE_SUB, dump_one_policy, &info); | ||
1155 | #endif | ||
980 | cb->args[0] = info.this_idx; | 1156 | cb->args[0] = info.this_idx; |
981 | 1157 | ||
982 | return skb->len; | 1158 | return skb->len; |
@@ -1012,6 +1188,7 @@ static int xfrm_get_policy(struct sk_buff *skb, struct nlmsghdr *nlh, void **xfr | |||
1012 | { | 1188 | { |
1013 | struct xfrm_policy *xp; | 1189 | struct xfrm_policy *xp; |
1014 | struct xfrm_userpolicy_id *p; | 1190 | struct xfrm_userpolicy_id *p; |
1191 | __u8 type = XFRM_POLICY_TYPE_MAIN; | ||
1015 | int err; | 1192 | int err; |
1016 | struct km_event c; | 1193 | struct km_event c; |
1017 | int delete; | 1194 | int delete; |
@@ -1019,12 +1196,16 @@ static int xfrm_get_policy(struct sk_buff *skb, struct nlmsghdr *nlh, void **xfr | |||
1019 | p = NLMSG_DATA(nlh); | 1196 | p = NLMSG_DATA(nlh); |
1020 | delete = nlh->nlmsg_type == XFRM_MSG_DELPOLICY; | 1197 | delete = nlh->nlmsg_type == XFRM_MSG_DELPOLICY; |
1021 | 1198 | ||
1199 | err = copy_from_user_policy_type(&type, (struct rtattr **)xfrma); | ||
1200 | if (err) | ||
1201 | return err; | ||
1202 | |||
1022 | err = verify_policy_dir(p->dir); | 1203 | err = verify_policy_dir(p->dir); |
1023 | if (err) | 1204 | if (err) |
1024 | return err; | 1205 | return err; |
1025 | 1206 | ||
1026 | if (p->index) | 1207 | if (p->index) |
1027 | xp = xfrm_policy_byid(p->dir, p->index, delete); | 1208 | xp = xfrm_policy_byid(type, p->dir, p->index, delete); |
1028 | else { | 1209 | else { |
1029 | struct rtattr **rtattrs = (struct rtattr **)xfrma; | 1210 | struct rtattr **rtattrs = (struct rtattr **)xfrma; |
1030 | struct rtattr *rt = rtattrs[XFRMA_SEC_CTX-1]; | 1211 | struct rtattr *rt = rtattrs[XFRMA_SEC_CTX-1]; |
@@ -1041,7 +1222,7 @@ static int xfrm_get_policy(struct sk_buff *skb, struct nlmsghdr *nlh, void **xfr | |||
1041 | if ((err = security_xfrm_policy_alloc(&tmp, uctx))) | 1222 | if ((err = security_xfrm_policy_alloc(&tmp, uctx))) |
1042 | return err; | 1223 | return err; |
1043 | } | 1224 | } |
1044 | xp = xfrm_policy_bysel_ctx(p->dir, &p->sel, tmp.security, delete); | 1225 | xp = xfrm_policy_bysel_ctx(type, p->dir, &p->sel, tmp.security, delete); |
1045 | security_xfrm_policy_free(&tmp); | 1226 | security_xfrm_policy_free(&tmp); |
1046 | } | 1227 | } |
1047 | if (xp == NULL) | 1228 | if (xp == NULL) |
@@ -1224,9 +1405,16 @@ out: | |||
1224 | 1405 | ||
1225 | static int xfrm_flush_policy(struct sk_buff *skb, struct nlmsghdr *nlh, void **xfrma) | 1406 | static int xfrm_flush_policy(struct sk_buff *skb, struct nlmsghdr *nlh, void **xfrma) |
1226 | { | 1407 | { |
1227 | struct km_event c; | 1408 | struct km_event c; |
1409 | __u8 type = XFRM_POLICY_TYPE_MAIN; | ||
1410 | int err; | ||
1411 | |||
1412 | err = copy_from_user_policy_type(&type, (struct rtattr **)xfrma); | ||
1413 | if (err) | ||
1414 | return err; | ||
1228 | 1415 | ||
1229 | xfrm_policy_flush(); | 1416 | xfrm_policy_flush(type); |
1417 | c.data.type = type; | ||
1230 | c.event = nlh->nlmsg_type; | 1418 | c.event = nlh->nlmsg_type; |
1231 | c.seq = nlh->nlmsg_seq; | 1419 | c.seq = nlh->nlmsg_seq; |
1232 | c.pid = nlh->nlmsg_pid; | 1420 | c.pid = nlh->nlmsg_pid; |
@@ -1239,10 +1427,15 @@ static int xfrm_add_pol_expire(struct sk_buff *skb, struct nlmsghdr *nlh, void * | |||
1239 | struct xfrm_policy *xp; | 1427 | struct xfrm_policy *xp; |
1240 | struct xfrm_user_polexpire *up = NLMSG_DATA(nlh); | 1428 | struct xfrm_user_polexpire *up = NLMSG_DATA(nlh); |
1241 | struct xfrm_userpolicy_info *p = &up->pol; | 1429 | struct xfrm_userpolicy_info *p = &up->pol; |
1430 | __u8 type = XFRM_POLICY_TYPE_MAIN; | ||
1242 | int err = -ENOENT; | 1431 | int err = -ENOENT; |
1243 | 1432 | ||
1433 | err = copy_from_user_policy_type(&type, (struct rtattr **)xfrma); | ||
1434 | if (err) | ||
1435 | return err; | ||
1436 | |||
1244 | if (p->index) | 1437 | if (p->index) |
1245 | xp = xfrm_policy_byid(p->dir, p->index, 0); | 1438 | xp = xfrm_policy_byid(type, p->dir, p->index, 0); |
1246 | else { | 1439 | else { |
1247 | struct rtattr **rtattrs = (struct rtattr **)xfrma; | 1440 | struct rtattr **rtattrs = (struct rtattr **)xfrma; |
1248 | struct rtattr *rt = rtattrs[XFRMA_SEC_CTX-1]; | 1441 | struct rtattr *rt = rtattrs[XFRMA_SEC_CTX-1]; |
@@ -1259,7 +1452,7 @@ static int xfrm_add_pol_expire(struct sk_buff *skb, struct nlmsghdr *nlh, void * | |||
1259 | if ((err = security_xfrm_policy_alloc(&tmp, uctx))) | 1452 | if ((err = security_xfrm_policy_alloc(&tmp, uctx))) |
1260 | return err; | 1453 | return err; |
1261 | } | 1454 | } |
1262 | xp = xfrm_policy_bysel_ctx(p->dir, &p->sel, tmp.security, 0); | 1455 | xp = xfrm_policy_bysel_ctx(type, p->dir, &p->sel, tmp.security, 0); |
1263 | security_xfrm_policy_free(&tmp); | 1456 | security_xfrm_policy_free(&tmp); |
1264 | } | 1457 | } |
1265 | 1458 | ||
@@ -1386,6 +1579,7 @@ static const int xfrm_msg_min[XFRM_NR_MSGTYPES] = { | |||
1386 | [XFRM_MSG_FLUSHPOLICY - XFRM_MSG_BASE] = NLMSG_LENGTH(0), | 1579 | [XFRM_MSG_FLUSHPOLICY - XFRM_MSG_BASE] = NLMSG_LENGTH(0), |
1387 | [XFRM_MSG_NEWAE - XFRM_MSG_BASE] = XMSGSIZE(xfrm_aevent_id), | 1580 | [XFRM_MSG_NEWAE - XFRM_MSG_BASE] = XMSGSIZE(xfrm_aevent_id), |
1388 | [XFRM_MSG_GETAE - XFRM_MSG_BASE] = XMSGSIZE(xfrm_aevent_id), | 1581 | [XFRM_MSG_GETAE - XFRM_MSG_BASE] = XMSGSIZE(xfrm_aevent_id), |
1582 | [XFRM_MSG_REPORT - XFRM_MSG_BASE] = XMSGSIZE(xfrm_user_report), | ||
1389 | }; | 1583 | }; |
1390 | 1584 | ||
1391 | #undef XMSGSIZE | 1585 | #undef XMSGSIZE |
@@ -1710,7 +1904,9 @@ static int build_acquire(struct sk_buff *skb, struct xfrm_state *x, | |||
1710 | 1904 | ||
1711 | if (copy_to_user_tmpl(xp, skb) < 0) | 1905 | if (copy_to_user_tmpl(xp, skb) < 0) |
1712 | goto nlmsg_failure; | 1906 | goto nlmsg_failure; |
1713 | if (copy_to_user_sec_ctx(xp, skb)) | 1907 | if (copy_to_user_state_sec_ctx(x, skb)) |
1908 | goto nlmsg_failure; | ||
1909 | if (copy_to_user_policy_type(xp, skb) < 0) | ||
1714 | goto nlmsg_failure; | 1910 | goto nlmsg_failure; |
1715 | 1911 | ||
1716 | nlh->nlmsg_len = skb->tail - b; | 1912 | nlh->nlmsg_len = skb->tail - b; |
@@ -1744,7 +1940,7 @@ static int xfrm_send_acquire(struct xfrm_state *x, struct xfrm_tmpl *xt, | |||
1744 | /* User gives us xfrm_user_policy_info followed by an array of 0 | 1940 | /* User gives us xfrm_user_policy_info followed by an array of 0 |
1745 | * or more templates. | 1941 | * or more templates. |
1746 | */ | 1942 | */ |
1747 | static struct xfrm_policy *xfrm_compile_policy(u16 family, int opt, | 1943 | static struct xfrm_policy *xfrm_compile_policy(struct sock *sk, int opt, |
1748 | u8 *data, int len, int *dir) | 1944 | u8 *data, int len, int *dir) |
1749 | { | 1945 | { |
1750 | struct xfrm_userpolicy_info *p = (struct xfrm_userpolicy_info *)data; | 1946 | struct xfrm_userpolicy_info *p = (struct xfrm_userpolicy_info *)data; |
@@ -1752,7 +1948,7 @@ static struct xfrm_policy *xfrm_compile_policy(u16 family, int opt, | |||
1752 | struct xfrm_policy *xp; | 1948 | struct xfrm_policy *xp; |
1753 | int nr; | 1949 | int nr; |
1754 | 1950 | ||
1755 | switch (family) { | 1951 | switch (sk->sk_family) { |
1756 | case AF_INET: | 1952 | case AF_INET: |
1757 | if (opt != IP_XFRM_POLICY) { | 1953 | if (opt != IP_XFRM_POLICY) { |
1758 | *dir = -EOPNOTSUPP; | 1954 | *dir = -EOPNOTSUPP; |
@@ -1792,8 +1988,18 @@ static struct xfrm_policy *xfrm_compile_policy(u16 family, int opt, | |||
1792 | } | 1988 | } |
1793 | 1989 | ||
1794 | copy_from_user_policy(xp, p); | 1990 | copy_from_user_policy(xp, p); |
1991 | xp->type = XFRM_POLICY_TYPE_MAIN; | ||
1795 | copy_templates(xp, ut, nr); | 1992 | copy_templates(xp, ut, nr); |
1796 | 1993 | ||
1994 | if (!xp->security) { | ||
1995 | int err = security_xfrm_sock_policy_alloc(xp, sk); | ||
1996 | if (err) { | ||
1997 | kfree(xp); | ||
1998 | *dir = err; | ||
1999 | return NULL; | ||
2000 | } | ||
2001 | } | ||
2002 | |||
1797 | *dir = p->dir; | 2003 | *dir = p->dir; |
1798 | 2004 | ||
1799 | return xp; | 2005 | return xp; |
@@ -1816,6 +2022,8 @@ static int build_polexpire(struct sk_buff *skb, struct xfrm_policy *xp, | |||
1816 | goto nlmsg_failure; | 2022 | goto nlmsg_failure; |
1817 | if (copy_to_user_sec_ctx(xp, skb)) | 2023 | if (copy_to_user_sec_ctx(xp, skb)) |
1818 | goto nlmsg_failure; | 2024 | goto nlmsg_failure; |
2025 | if (copy_to_user_policy_type(xp, skb) < 0) | ||
2026 | goto nlmsg_failure; | ||
1819 | upe->hard = !!hard; | 2027 | upe->hard = !!hard; |
1820 | 2028 | ||
1821 | nlh->nlmsg_len = skb->tail - b; | 2029 | nlh->nlmsg_len = skb->tail - b; |
@@ -1887,6 +2095,8 @@ static int xfrm_notify_policy(struct xfrm_policy *xp, int dir, struct km_event * | |||
1887 | copy_to_user_policy(xp, p, dir); | 2095 | copy_to_user_policy(xp, p, dir); |
1888 | if (copy_to_user_tmpl(xp, skb) < 0) | 2096 | if (copy_to_user_tmpl(xp, skb) < 0) |
1889 | goto nlmsg_failure; | 2097 | goto nlmsg_failure; |
2098 | if (copy_to_user_policy_type(xp, skb) < 0) | ||
2099 | goto nlmsg_failure; | ||
1890 | 2100 | ||
1891 | nlh->nlmsg_len = skb->tail - b; | 2101 | nlh->nlmsg_len = skb->tail - b; |
1892 | 2102 | ||
@@ -1904,6 +2114,9 @@ static int xfrm_notify_policy_flush(struct km_event *c) | |||
1904 | struct nlmsghdr *nlh; | 2114 | struct nlmsghdr *nlh; |
1905 | struct sk_buff *skb; | 2115 | struct sk_buff *skb; |
1906 | unsigned char *b; | 2116 | unsigned char *b; |
2117 | #ifdef CONFIG_XFRM_SUB_POLICY | ||
2118 | struct xfrm_userpolicy_type upt; | ||
2119 | #endif | ||
1907 | int len = NLMSG_LENGTH(0); | 2120 | int len = NLMSG_LENGTH(0); |
1908 | 2121 | ||
1909 | skb = alloc_skb(len, GFP_ATOMIC); | 2122 | skb = alloc_skb(len, GFP_ATOMIC); |
@@ -1913,6 +2126,13 @@ static int xfrm_notify_policy_flush(struct km_event *c) | |||
1913 | 2126 | ||
1914 | 2127 | ||
1915 | nlh = NLMSG_PUT(skb, c->pid, c->seq, XFRM_MSG_FLUSHPOLICY, 0); | 2128 | nlh = NLMSG_PUT(skb, c->pid, c->seq, XFRM_MSG_FLUSHPOLICY, 0); |
2129 | nlh->nlmsg_flags = 0; | ||
2130 | |||
2131 | #ifdef CONFIG_XFRM_SUB_POLICY | ||
2132 | memset(&upt, 0, sizeof(upt)); | ||
2133 | upt.type = c->data.type; | ||
2134 | RTA_PUT(skb, XFRMA_POLICY_TYPE, sizeof(upt), &upt); | ||
2135 | #endif | ||
1916 | 2136 | ||
1917 | nlh->nlmsg_len = skb->tail - b; | 2137 | nlh->nlmsg_len = skb->tail - b; |
1918 | 2138 | ||
@@ -1920,6 +2140,9 @@ static int xfrm_notify_policy_flush(struct km_event *c) | |||
1920 | return netlink_broadcast(xfrm_nl, skb, 0, XFRMNLGRP_POLICY, GFP_ATOMIC); | 2140 | return netlink_broadcast(xfrm_nl, skb, 0, XFRMNLGRP_POLICY, GFP_ATOMIC); |
1921 | 2141 | ||
1922 | nlmsg_failure: | 2142 | nlmsg_failure: |
2143 | #ifdef CONFIG_XFRM_SUB_POLICY | ||
2144 | rtattr_failure: | ||
2145 | #endif | ||
1923 | kfree_skb(skb); | 2146 | kfree_skb(skb); |
1924 | return -1; | 2147 | return -1; |
1925 | } | 2148 | } |
@@ -1944,19 +2167,64 @@ static int xfrm_send_policy_notify(struct xfrm_policy *xp, int dir, struct km_ev | |||
1944 | 2167 | ||
1945 | } | 2168 | } |
1946 | 2169 | ||
2170 | static int build_report(struct sk_buff *skb, u8 proto, | ||
2171 | struct xfrm_selector *sel, xfrm_address_t *addr) | ||
2172 | { | ||
2173 | struct xfrm_user_report *ur; | ||
2174 | struct nlmsghdr *nlh; | ||
2175 | unsigned char *b = skb->tail; | ||
2176 | |||
2177 | nlh = NLMSG_PUT(skb, 0, 0, XFRM_MSG_REPORT, sizeof(*ur)); | ||
2178 | ur = NLMSG_DATA(nlh); | ||
2179 | nlh->nlmsg_flags = 0; | ||
2180 | |||
2181 | ur->proto = proto; | ||
2182 | memcpy(&ur->sel, sel, sizeof(ur->sel)); | ||
2183 | |||
2184 | if (addr) | ||
2185 | RTA_PUT(skb, XFRMA_COADDR, sizeof(*addr), addr); | ||
2186 | |||
2187 | nlh->nlmsg_len = skb->tail - b; | ||
2188 | return skb->len; | ||
2189 | |||
2190 | nlmsg_failure: | ||
2191 | rtattr_failure: | ||
2192 | skb_trim(skb, b - skb->data); | ||
2193 | return -1; | ||
2194 | } | ||
2195 | |||
2196 | static int xfrm_send_report(u8 proto, struct xfrm_selector *sel, | ||
2197 | xfrm_address_t *addr) | ||
2198 | { | ||
2199 | struct sk_buff *skb; | ||
2200 | size_t len; | ||
2201 | |||
2202 | len = NLMSG_ALIGN(NLMSG_LENGTH(sizeof(struct xfrm_user_report))); | ||
2203 | skb = alloc_skb(len, GFP_ATOMIC); | ||
2204 | if (skb == NULL) | ||
2205 | return -ENOMEM; | ||
2206 | |||
2207 | if (build_report(skb, proto, sel, addr) < 0) | ||
2208 | BUG(); | ||
2209 | |||
2210 | NETLINK_CB(skb).dst_group = XFRMNLGRP_REPORT; | ||
2211 | return netlink_broadcast(xfrm_nl, skb, 0, XFRMNLGRP_REPORT, GFP_ATOMIC); | ||
2212 | } | ||
2213 | |||
1947 | static struct xfrm_mgr netlink_mgr = { | 2214 | static struct xfrm_mgr netlink_mgr = { |
1948 | .id = "netlink", | 2215 | .id = "netlink", |
1949 | .notify = xfrm_send_state_notify, | 2216 | .notify = xfrm_send_state_notify, |
1950 | .acquire = xfrm_send_acquire, | 2217 | .acquire = xfrm_send_acquire, |
1951 | .compile_policy = xfrm_compile_policy, | 2218 | .compile_policy = xfrm_compile_policy, |
1952 | .notify_policy = xfrm_send_policy_notify, | 2219 | .notify_policy = xfrm_send_policy_notify, |
2220 | .report = xfrm_send_report, | ||
1953 | }; | 2221 | }; |
1954 | 2222 | ||
1955 | static int __init xfrm_user_init(void) | 2223 | static int __init xfrm_user_init(void) |
1956 | { | 2224 | { |
1957 | struct sock *nlsk; | 2225 | struct sock *nlsk; |
1958 | 2226 | ||
1959 | printk(KERN_INFO "Initializing IPsec netlink socket\n"); | 2227 | printk(KERN_INFO "Initializing XFRM netlink socket\n"); |
1960 | 2228 | ||
1961 | nlsk = netlink_kernel_create(NETLINK_XFRM, XFRMNLGRP_MAX, | 2229 | nlsk = netlink_kernel_create(NETLINK_XFRM, XFRMNLGRP_MAX, |
1962 | xfrm_netlink_rcv, THIS_MODULE); | 2230 | xfrm_netlink_rcv, THIS_MODULE); |