diff options
Diffstat (limited to 'net')
38 files changed, 894 insertions, 1079 deletions
diff --git a/net/core/datagram.c b/net/core/datagram.c index 81987df536..d219435d08 100644 --- a/net/core/datagram.c +++ b/net/core/datagram.c | |||
@@ -213,6 +213,10 @@ int skb_copy_datagram_iovec(const struct sk_buff *skb, int offset, | |||
213 | { | 213 | { |
214 | int i, err, fraglen, end = 0; | 214 | int i, err, fraglen, end = 0; |
215 | struct sk_buff *next = skb_shinfo(skb)->frag_list; | 215 | struct sk_buff *next = skb_shinfo(skb)->frag_list; |
216 | |||
217 | if (!len) | ||
218 | return 0; | ||
219 | |||
216 | next_skb: | 220 | next_skb: |
217 | fraglen = skb_headlen(skb); | 221 | fraglen = skb_headlen(skb); |
218 | i = -1; | 222 | i = -1; |
diff --git a/net/core/stream.c b/net/core/stream.c index ac9edfdf87..15bfd03e80 100644 --- a/net/core/stream.c +++ b/net/core/stream.c | |||
@@ -52,8 +52,9 @@ int sk_stream_wait_connect(struct sock *sk, long *timeo_p) | |||
52 | { | 52 | { |
53 | struct task_struct *tsk = current; | 53 | struct task_struct *tsk = current; |
54 | DEFINE_WAIT(wait); | 54 | DEFINE_WAIT(wait); |
55 | int done; | ||
55 | 56 | ||
56 | while (1) { | 57 | do { |
57 | if (sk->sk_err) | 58 | if (sk->sk_err) |
58 | return sock_error(sk); | 59 | return sock_error(sk); |
59 | if ((1 << sk->sk_state) & ~(TCPF_SYN_SENT | TCPF_SYN_RECV)) | 60 | if ((1 << sk->sk_state) & ~(TCPF_SYN_SENT | TCPF_SYN_RECV)) |
@@ -65,13 +66,12 @@ int sk_stream_wait_connect(struct sock *sk, long *timeo_p) | |||
65 | 66 | ||
66 | prepare_to_wait(sk->sk_sleep, &wait, TASK_INTERRUPTIBLE); | 67 | prepare_to_wait(sk->sk_sleep, &wait, TASK_INTERRUPTIBLE); |
67 | sk->sk_write_pending++; | 68 | sk->sk_write_pending++; |
68 | if (sk_wait_event(sk, timeo_p, | 69 | done = sk_wait_event(sk, timeo_p, |
69 | !((1 << sk->sk_state) & | 70 | !((1 << sk->sk_state) & |
70 | ~(TCPF_ESTABLISHED | TCPF_CLOSE_WAIT)))) | 71 | ~(TCPF_ESTABLISHED | TCPF_CLOSE_WAIT))); |
71 | break; | ||
72 | finish_wait(sk->sk_sleep, &wait); | 72 | finish_wait(sk->sk_sleep, &wait); |
73 | sk->sk_write_pending--; | 73 | sk->sk_write_pending--; |
74 | } | 74 | } while (!done); |
75 | return 0; | 75 | return 0; |
76 | } | 76 | } |
77 | 77 | ||
diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c index 6298cf58ff..4b9bc81ae1 100644 --- a/net/dccp/ipv4.c +++ b/net/dccp/ipv4.c | |||
@@ -31,8 +31,6 @@ struct inet_hashinfo __cacheline_aligned dccp_hashinfo = { | |||
31 | .lhash_lock = RW_LOCK_UNLOCKED, | 31 | .lhash_lock = RW_LOCK_UNLOCKED, |
32 | .lhash_users = ATOMIC_INIT(0), | 32 | .lhash_users = ATOMIC_INIT(0), |
33 | .lhash_wait = __WAIT_QUEUE_HEAD_INITIALIZER(dccp_hashinfo.lhash_wait), | 33 | .lhash_wait = __WAIT_QUEUE_HEAD_INITIALIZER(dccp_hashinfo.lhash_wait), |
34 | .portalloc_lock = SPIN_LOCK_UNLOCKED, | ||
35 | .port_rover = 1024 - 1, | ||
36 | }; | 34 | }; |
37 | 35 | ||
38 | EXPORT_SYMBOL_GPL(dccp_hashinfo); | 36 | EXPORT_SYMBOL_GPL(dccp_hashinfo); |
@@ -125,36 +123,15 @@ static int dccp_v4_hash_connect(struct sock *sk) | |||
125 | int ret; | 123 | int ret; |
126 | 124 | ||
127 | if (snum == 0) { | 125 | if (snum == 0) { |
128 | int rover; | ||
129 | int low = sysctl_local_port_range[0]; | 126 | int low = sysctl_local_port_range[0]; |
130 | int high = sysctl_local_port_range[1]; | 127 | int high = sysctl_local_port_range[1]; |
131 | int remaining = (high - low) + 1; | 128 | int remaining = (high - low) + 1; |
129 | int rover = net_random() % (high - low) + low; | ||
132 | struct hlist_node *node; | 130 | struct hlist_node *node; |
133 | struct inet_timewait_sock *tw = NULL; | 131 | struct inet_timewait_sock *tw = NULL; |
134 | 132 | ||
135 | local_bh_disable(); | 133 | local_bh_disable(); |
136 | |||
137 | /* TODO. Actually it is not so bad idea to remove | ||
138 | * dccp_hashinfo.portalloc_lock before next submission to | ||
139 | * Linus. | ||
140 | * As soon as we touch this place at all it is time to think. | ||
141 | * | ||
142 | * Now it protects single _advisory_ variable | ||
143 | * dccp_hashinfo.port_rover, hence it is mostly useless. | ||
144 | * Code will work nicely if we just delete it, but | ||
145 | * I am afraid in contented case it will work not better or | ||
146 | * even worse: another cpu just will hit the same bucket | ||
147 | * and spin there. | ||
148 | * So some cpu salt could remove both contention and | ||
149 | * memory pingpong. Any ideas how to do this in a nice way? | ||
150 | */ | ||
151 | spin_lock(&dccp_hashinfo.portalloc_lock); | ||
152 | rover = dccp_hashinfo.port_rover; | ||
153 | |||
154 | do { | 134 | do { |
155 | rover++; | ||
156 | if ((rover < low) || (rover > high)) | ||
157 | rover = low; | ||
158 | head = &dccp_hashinfo.bhash[inet_bhashfn(rover, | 135 | head = &dccp_hashinfo.bhash[inet_bhashfn(rover, |
159 | dccp_hashinfo.bhash_size)]; | 136 | dccp_hashinfo.bhash_size)]; |
160 | spin_lock(&head->lock); | 137 | spin_lock(&head->lock); |
@@ -187,9 +164,9 @@ static int dccp_v4_hash_connect(struct sock *sk) | |||
187 | 164 | ||
188 | next_port: | 165 | next_port: |
189 | spin_unlock(&head->lock); | 166 | spin_unlock(&head->lock); |
167 | if (++rover > high) | ||
168 | rover = low; | ||
190 | } while (--remaining > 0); | 169 | } while (--remaining > 0); |
191 | dccp_hashinfo.port_rover = rover; | ||
192 | spin_unlock(&dccp_hashinfo.portalloc_lock); | ||
193 | 170 | ||
194 | local_bh_enable(); | 171 | local_bh_enable(); |
195 | 172 | ||
@@ -197,9 +174,6 @@ static int dccp_v4_hash_connect(struct sock *sk) | |||
197 | 174 | ||
198 | ok: | 175 | ok: |
199 | /* All locks still held and bhs disabled */ | 176 | /* All locks still held and bhs disabled */ |
200 | dccp_hashinfo.port_rover = rover; | ||
201 | spin_unlock(&dccp_hashinfo.portalloc_lock); | ||
202 | |||
203 | inet_bind_hash(sk, tb, rover); | 177 | inet_bind_hash(sk, tb, rover); |
204 | if (sk_unhashed(sk)) { | 178 | if (sk_unhashed(sk)) { |
205 | inet_sk(sk)->sport = htons(rover); | 179 | inet_sk(sk)->sport = htons(rover); |
diff --git a/net/ieee80211/ieee80211_crypt.c b/net/ieee80211/ieee80211_crypt.c index f3b6aa3be6..20cc580a07 100644 --- a/net/ieee80211/ieee80211_crypt.c +++ b/net/ieee80211/ieee80211_crypt.c | |||
@@ -12,7 +12,6 @@ | |||
12 | */ | 12 | */ |
13 | 13 | ||
14 | #include <linux/config.h> | 14 | #include <linux/config.h> |
15 | #include <linux/version.h> | ||
16 | #include <linux/module.h> | 15 | #include <linux/module.h> |
17 | #include <linux/init.h> | 16 | #include <linux/init.h> |
18 | #include <linux/slab.h> | 17 | #include <linux/slab.h> |
diff --git a/net/ieee80211/ieee80211_crypt_ccmp.c b/net/ieee80211/ieee80211_crypt_ccmp.c index 05a853c130..4702217285 100644 --- a/net/ieee80211/ieee80211_crypt_ccmp.c +++ b/net/ieee80211/ieee80211_crypt_ccmp.c | |||
@@ -10,7 +10,6 @@ | |||
10 | */ | 10 | */ |
11 | 11 | ||
12 | #include <linux/config.h> | 12 | #include <linux/config.h> |
13 | #include <linux/version.h> | ||
14 | #include <linux/module.h> | 13 | #include <linux/module.h> |
15 | #include <linux/init.h> | 14 | #include <linux/init.h> |
16 | #include <linux/slab.h> | 15 | #include <linux/slab.h> |
diff --git a/net/ieee80211/ieee80211_crypt_tkip.c b/net/ieee80211/ieee80211_crypt_tkip.c index 2e34f29b79..e0988320ef 100644 --- a/net/ieee80211/ieee80211_crypt_tkip.c +++ b/net/ieee80211/ieee80211_crypt_tkip.c | |||
@@ -10,7 +10,6 @@ | |||
10 | */ | 10 | */ |
11 | 11 | ||
12 | #include <linux/config.h> | 12 | #include <linux/config.h> |
13 | #include <linux/version.h> | ||
14 | #include <linux/module.h> | 13 | #include <linux/module.h> |
15 | #include <linux/init.h> | 14 | #include <linux/init.h> |
16 | #include <linux/slab.h> | 15 | #include <linux/slab.h> |
diff --git a/net/ieee80211/ieee80211_crypt_wep.c b/net/ieee80211/ieee80211_crypt_wep.c index 7c08ed2f26..073aebdf0f 100644 --- a/net/ieee80211/ieee80211_crypt_wep.c +++ b/net/ieee80211/ieee80211_crypt_wep.c | |||
@@ -10,7 +10,6 @@ | |||
10 | */ | 10 | */ |
11 | 11 | ||
12 | #include <linux/config.h> | 12 | #include <linux/config.h> |
13 | #include <linux/version.h> | ||
14 | #include <linux/module.h> | 13 | #include <linux/module.h> |
15 | #include <linux/init.h> | 14 | #include <linux/init.h> |
16 | #include <linux/slab.h> | 15 | #include <linux/slab.h> |
diff --git a/net/ieee80211/ieee80211_geo.c b/net/ieee80211/ieee80211_geo.c index c4b54ef8f6..610cc5cbc2 100644 --- a/net/ieee80211/ieee80211_geo.c +++ b/net/ieee80211/ieee80211_geo.c | |||
@@ -38,7 +38,6 @@ | |||
38 | #include <linux/slab.h> | 38 | #include <linux/slab.h> |
39 | #include <linux/tcp.h> | 39 | #include <linux/tcp.h> |
40 | #include <linux/types.h> | 40 | #include <linux/types.h> |
41 | #include <linux/version.h> | ||
42 | #include <linux/wireless.h> | 41 | #include <linux/wireless.h> |
43 | #include <linux/etherdevice.h> | 42 | #include <linux/etherdevice.h> |
44 | #include <asm/uaccess.h> | 43 | #include <asm/uaccess.h> |
diff --git a/net/ieee80211/ieee80211_module.c b/net/ieee80211/ieee80211_module.c index f66d792cd2..321287bc88 100644 --- a/net/ieee80211/ieee80211_module.c +++ b/net/ieee80211/ieee80211_module.c | |||
@@ -45,7 +45,6 @@ | |||
45 | #include <linux/slab.h> | 45 | #include <linux/slab.h> |
46 | #include <linux/tcp.h> | 46 | #include <linux/tcp.h> |
47 | #include <linux/types.h> | 47 | #include <linux/types.h> |
48 | #include <linux/version.h> | ||
49 | #include <linux/wireless.h> | 48 | #include <linux/wireless.h> |
50 | #include <linux/etherdevice.h> | 49 | #include <linux/etherdevice.h> |
51 | #include <asm/uaccess.h> | 50 | #include <asm/uaccess.h> |
diff --git a/net/ieee80211/ieee80211_rx.c b/net/ieee80211/ieee80211_rx.c index ce694cf5c1..6ad88218f5 100644 --- a/net/ieee80211/ieee80211_rx.c +++ b/net/ieee80211/ieee80211_rx.c | |||
@@ -28,7 +28,6 @@ | |||
28 | #include <linux/slab.h> | 28 | #include <linux/slab.h> |
29 | #include <linux/tcp.h> | 29 | #include <linux/tcp.h> |
30 | #include <linux/types.h> | 30 | #include <linux/types.h> |
31 | #include <linux/version.h> | ||
32 | #include <linux/wireless.h> | 31 | #include <linux/wireless.h> |
33 | #include <linux/etherdevice.h> | 32 | #include <linux/etherdevice.h> |
34 | #include <asm/uaccess.h> | 33 | #include <asm/uaccess.h> |
diff --git a/net/ieee80211/ieee80211_tx.c b/net/ieee80211/ieee80211_tx.c index 95ccbadbf5..445f206e65 100644 --- a/net/ieee80211/ieee80211_tx.c +++ b/net/ieee80211/ieee80211_tx.c | |||
@@ -38,7 +38,6 @@ | |||
38 | #include <linux/slab.h> | 38 | #include <linux/slab.h> |
39 | #include <linux/tcp.h> | 39 | #include <linux/tcp.h> |
40 | #include <linux/types.h> | 40 | #include <linux/types.h> |
41 | #include <linux/version.h> | ||
42 | #include <linux/wireless.h> | 41 | #include <linux/wireless.h> |
43 | #include <linux/etherdevice.h> | 42 | #include <linux/etherdevice.h> |
44 | #include <asm/uaccess.h> | 43 | #include <asm/uaccess.h> |
diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c index 8b6d3939e1..c6247fc840 100644 --- a/net/ipv4/igmp.c +++ b/net/ipv4/igmp.c | |||
@@ -1908,8 +1908,11 @@ int ip_mc_msfilter(struct sock *sk, struct ip_msfilter *msf, int ifindex) | |||
1908 | sock_kfree_s(sk, newpsl, IP_SFLSIZE(newpsl->sl_max)); | 1908 | sock_kfree_s(sk, newpsl, IP_SFLSIZE(newpsl->sl_max)); |
1909 | goto done; | 1909 | goto done; |
1910 | } | 1910 | } |
1911 | } else | 1911 | } else { |
1912 | newpsl = NULL; | 1912 | newpsl = NULL; |
1913 | (void) ip_mc_add_src(in_dev, &msf->imsf_multiaddr, | ||
1914 | msf->imsf_fmode, 0, NULL, 0); | ||
1915 | } | ||
1913 | psl = pmc->sflist; | 1916 | psl = pmc->sflist; |
1914 | if (psl) { | 1917 | if (psl) { |
1915 | (void) ip_mc_del_src(in_dev, &msf->imsf_multiaddr, pmc->sfmode, | 1918 | (void) ip_mc_del_src(in_dev, &msf->imsf_multiaddr, pmc->sfmode, |
diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c index 94468a76c5..3fe021f1a5 100644 --- a/net/ipv4/inet_connection_sock.c +++ b/net/ipv4/inet_connection_sock.c | |||
@@ -78,17 +78,9 @@ int inet_csk_get_port(struct inet_hashinfo *hashinfo, | |||
78 | int low = sysctl_local_port_range[0]; | 78 | int low = sysctl_local_port_range[0]; |
79 | int high = sysctl_local_port_range[1]; | 79 | int high = sysctl_local_port_range[1]; |
80 | int remaining = (high - low) + 1; | 80 | int remaining = (high - low) + 1; |
81 | int rover; | 81 | int rover = net_random() % (high - low) + low; |
82 | 82 | ||
83 | spin_lock(&hashinfo->portalloc_lock); | ||
84 | if (hashinfo->port_rover < low) | ||
85 | rover = low; | ||
86 | else | ||
87 | rover = hashinfo->port_rover; | ||
88 | do { | 83 | do { |
89 | rover++; | ||
90 | if (rover > high) | ||
91 | rover = low; | ||
92 | head = &hashinfo->bhash[inet_bhashfn(rover, hashinfo->bhash_size)]; | 84 | head = &hashinfo->bhash[inet_bhashfn(rover, hashinfo->bhash_size)]; |
93 | spin_lock(&head->lock); | 85 | spin_lock(&head->lock); |
94 | inet_bind_bucket_for_each(tb, node, &head->chain) | 86 | inet_bind_bucket_for_each(tb, node, &head->chain) |
@@ -97,9 +89,9 @@ int inet_csk_get_port(struct inet_hashinfo *hashinfo, | |||
97 | break; | 89 | break; |
98 | next: | 90 | next: |
99 | spin_unlock(&head->lock); | 91 | spin_unlock(&head->lock); |
92 | if (++rover > high) | ||
93 | rover = low; | ||
100 | } while (--remaining > 0); | 94 | } while (--remaining > 0); |
101 | hashinfo->port_rover = rover; | ||
102 | spin_unlock(&hashinfo->portalloc_lock); | ||
103 | 95 | ||
104 | /* Exhausted local port range during search? It is not | 96 | /* Exhausted local port range during search? It is not |
105 | * possible for us to be holding one of the bind hash | 97 | * possible for us to be holding one of the bind hash |
diff --git a/net/ipv4/netfilter/ip_conntrack_helper_pptp.c b/net/ipv4/netfilter/ip_conntrack_helper_pptp.c index 926a668464..4108a5e12b 100644 --- a/net/ipv4/netfilter/ip_conntrack_helper_pptp.c +++ b/net/ipv4/netfilter/ip_conntrack_helper_pptp.c | |||
@@ -270,14 +270,10 @@ exp_gre(struct ip_conntrack *master, | |||
270 | exp_orig->expectfn = pptp_expectfn; | 270 | exp_orig->expectfn = pptp_expectfn; |
271 | exp_orig->flags = 0; | 271 | exp_orig->flags = 0; |
272 | 272 | ||
273 | exp_orig->dir = IP_CT_DIR_ORIGINAL; | ||
274 | |||
275 | /* both expectations are identical apart from tuple */ | 273 | /* both expectations are identical apart from tuple */ |
276 | memcpy(exp_reply, exp_orig, sizeof(*exp_reply)); | 274 | memcpy(exp_reply, exp_orig, sizeof(*exp_reply)); |
277 | memcpy(&exp_reply->tuple, &exp_tuples[1], sizeof(exp_reply->tuple)); | 275 | memcpy(&exp_reply->tuple, &exp_tuples[1], sizeof(exp_reply->tuple)); |
278 | 276 | ||
279 | exp_reply->dir = !exp_orig->dir; | ||
280 | |||
281 | if (ip_nat_pptp_hook_exp_gre) | 277 | if (ip_nat_pptp_hook_exp_gre) |
282 | ret = ip_nat_pptp_hook_exp_gre(exp_orig, exp_reply); | 278 | ret = ip_nat_pptp_hook_exp_gre(exp_orig, exp_reply); |
283 | else { | 279 | else { |
diff --git a/net/ipv4/netfilter/ip_conntrack_netlink.c b/net/ipv4/netfilter/ip_conntrack_netlink.c index 166e6069f1..82a65043a8 100644 --- a/net/ipv4/netfilter/ip_conntrack_netlink.c +++ b/net/ipv4/netfilter/ip_conntrack_netlink.c | |||
@@ -815,7 +815,7 @@ ctnetlink_get_conntrack(struct sock *ctnl, struct sk_buff *skb, | |||
815 | IPCTNL_MSG_CT_NEW, 1, ct); | 815 | IPCTNL_MSG_CT_NEW, 1, ct); |
816 | ip_conntrack_put(ct); | 816 | ip_conntrack_put(ct); |
817 | if (err <= 0) | 817 | if (err <= 0) |
818 | goto out; | 818 | goto free; |
819 | 819 | ||
820 | err = netlink_unicast(ctnl, skb2, NETLINK_CB(skb).pid, MSG_DONTWAIT); | 820 | err = netlink_unicast(ctnl, skb2, NETLINK_CB(skb).pid, MSG_DONTWAIT); |
821 | if (err < 0) | 821 | if (err < 0) |
@@ -824,9 +824,9 @@ ctnetlink_get_conntrack(struct sock *ctnl, struct sk_buff *skb, | |||
824 | DEBUGP("leaving\n"); | 824 | DEBUGP("leaving\n"); |
825 | return 0; | 825 | return 0; |
826 | 826 | ||
827 | free: | ||
828 | kfree_skb(skb2); | ||
827 | out: | 829 | out: |
828 | if (skb2) | ||
829 | kfree_skb(skb2); | ||
830 | return -1; | 830 | return -1; |
831 | } | 831 | } |
832 | 832 | ||
@@ -1322,21 +1322,16 @@ ctnetlink_get_expect(struct sock *ctnl, struct sk_buff *skb, | |||
1322 | nlh->nlmsg_seq, IPCTNL_MSG_EXP_NEW, | 1322 | nlh->nlmsg_seq, IPCTNL_MSG_EXP_NEW, |
1323 | 1, exp); | 1323 | 1, exp); |
1324 | if (err <= 0) | 1324 | if (err <= 0) |
1325 | goto out; | 1325 | goto free; |
1326 | 1326 | ||
1327 | ip_conntrack_expect_put(exp); | 1327 | ip_conntrack_expect_put(exp); |
1328 | 1328 | ||
1329 | err = netlink_unicast(ctnl, skb2, NETLINK_CB(skb).pid, MSG_DONTWAIT); | 1329 | return netlink_unicast(ctnl, skb2, NETLINK_CB(skb).pid, MSG_DONTWAIT); |
1330 | if (err < 0) | ||
1331 | goto free; | ||
1332 | |||
1333 | return err; | ||
1334 | 1330 | ||
1331 | free: | ||
1332 | kfree_skb(skb2); | ||
1335 | out: | 1333 | out: |
1336 | ip_conntrack_expect_put(exp); | 1334 | ip_conntrack_expect_put(exp); |
1337 | free: | ||
1338 | if (skb2) | ||
1339 | kfree_skb(skb2); | ||
1340 | return err; | 1335 | return err; |
1341 | } | 1336 | } |
1342 | 1337 | ||
diff --git a/net/ipv4/netfilter/ip_nat_core.c b/net/ipv4/netfilter/ip_nat_core.c index c5e3abd246..762f4d9393 100644 --- a/net/ipv4/netfilter/ip_nat_core.c +++ b/net/ipv4/netfilter/ip_nat_core.c | |||
@@ -66,10 +66,8 @@ ip_nat_proto_find_get(u_int8_t protonum) | |||
66 | * removed until we've grabbed the reference */ | 66 | * removed until we've grabbed the reference */ |
67 | preempt_disable(); | 67 | preempt_disable(); |
68 | p = __ip_nat_proto_find(protonum); | 68 | p = __ip_nat_proto_find(protonum); |
69 | if (p) { | 69 | if (!try_module_get(p->me)) |
70 | if (!try_module_get(p->me)) | 70 | p = &ip_nat_unknown_protocol; |
71 | p = &ip_nat_unknown_protocol; | ||
72 | } | ||
73 | preempt_enable(); | 71 | preempt_enable(); |
74 | 72 | ||
75 | return p; | 73 | return p; |
diff --git a/net/ipv4/netfilter/ip_nat_helper_pptp.c b/net/ipv4/netfilter/ip_nat_helper_pptp.c index 3cdd0684d3..ee6ab74ad3 100644 --- a/net/ipv4/netfilter/ip_nat_helper_pptp.c +++ b/net/ipv4/netfilter/ip_nat_helper_pptp.c | |||
@@ -216,6 +216,7 @@ pptp_exp_gre(struct ip_conntrack_expect *expect_orig, | |||
216 | expect_orig->saved_proto.gre.key = htons(nat_pptp_info->pac_call_id); | 216 | expect_orig->saved_proto.gre.key = htons(nat_pptp_info->pac_call_id); |
217 | expect_orig->tuple.src.u.gre.key = htons(nat_pptp_info->pns_call_id); | 217 | expect_orig->tuple.src.u.gre.key = htons(nat_pptp_info->pns_call_id); |
218 | expect_orig->tuple.dst.u.gre.key = htons(ct_pptp_info->pac_call_id); | 218 | expect_orig->tuple.dst.u.gre.key = htons(ct_pptp_info->pac_call_id); |
219 | expect_orig->dir = IP_CT_DIR_ORIGINAL; | ||
219 | inv_t.src.ip = reply_t->src.ip; | 220 | inv_t.src.ip = reply_t->src.ip; |
220 | inv_t.dst.ip = reply_t->dst.ip; | 221 | inv_t.dst.ip = reply_t->dst.ip; |
221 | inv_t.src.u.gre.key = htons(nat_pptp_info->pac_call_id); | 222 | inv_t.src.u.gre.key = htons(nat_pptp_info->pac_call_id); |
@@ -233,6 +234,7 @@ pptp_exp_gre(struct ip_conntrack_expect *expect_orig, | |||
233 | expect_reply->saved_proto.gre.key = htons(nat_pptp_info->pns_call_id); | 234 | expect_reply->saved_proto.gre.key = htons(nat_pptp_info->pns_call_id); |
234 | expect_reply->tuple.src.u.gre.key = htons(nat_pptp_info->pac_call_id); | 235 | expect_reply->tuple.src.u.gre.key = htons(nat_pptp_info->pac_call_id); |
235 | expect_reply->tuple.dst.u.gre.key = htons(ct_pptp_info->pns_call_id); | 236 | expect_reply->tuple.dst.u.gre.key = htons(ct_pptp_info->pns_call_id); |
237 | expect_reply->dir = IP_CT_DIR_REPLY; | ||
236 | inv_t.src.ip = orig_t->src.ip; | 238 | inv_t.src.ip = orig_t->src.ip; |
237 | inv_t.dst.ip = orig_t->dst.ip; | 239 | inv_t.dst.ip = orig_t->dst.ip; |
238 | inv_t.src.u.gre.key = htons(nat_pptp_info->pns_call_id); | 240 | inv_t.src.u.gre.key = htons(nat_pptp_info->pns_call_id); |
diff --git a/net/ipv4/netfilter/ip_nat_proto_gre.c b/net/ipv4/netfilter/ip_nat_proto_gre.c index 7c12854016..f7cad7cf1a 100644 --- a/net/ipv4/netfilter/ip_nat_proto_gre.c +++ b/net/ipv4/netfilter/ip_nat_proto_gre.c | |||
@@ -139,8 +139,8 @@ gre_manip_pkt(struct sk_buff **pskb, | |||
139 | break; | 139 | break; |
140 | case GRE_VERSION_PPTP: | 140 | case GRE_VERSION_PPTP: |
141 | DEBUGP("call_id -> 0x%04x\n", | 141 | DEBUGP("call_id -> 0x%04x\n", |
142 | ntohl(tuple->dst.u.gre.key)); | 142 | ntohs(tuple->dst.u.gre.key)); |
143 | pgreh->call_id = htons(ntohl(tuple->dst.u.gre.key)); | 143 | pgreh->call_id = tuple->dst.u.gre.key; |
144 | break; | 144 | break; |
145 | default: | 145 | default: |
146 | DEBUGP("can't nat unknown GRE version\n"); | 146 | DEBUGP("can't nat unknown GRE version\n"); |
diff --git a/net/ipv4/netfilter/ip_nat_proto_unknown.c b/net/ipv4/netfilter/ip_nat_proto_unknown.c index 99bbef56f8..f0099a646a 100644 --- a/net/ipv4/netfilter/ip_nat_proto_unknown.c +++ b/net/ipv4/netfilter/ip_nat_proto_unknown.c | |||
@@ -62,7 +62,7 @@ unknown_print_range(char *buffer, const struct ip_nat_range *range) | |||
62 | 62 | ||
63 | struct ip_nat_protocol ip_nat_unknown_protocol = { | 63 | struct ip_nat_protocol ip_nat_unknown_protocol = { |
64 | .name = "unknown", | 64 | .name = "unknown", |
65 | .me = THIS_MODULE, | 65 | /* .me isn't set: getting a ref to this cannot fail. */ |
66 | .manip_pkt = unknown_manip_pkt, | 66 | .manip_pkt = unknown_manip_pkt, |
67 | .in_range = unknown_in_range, | 67 | .in_range = unknown_in_range, |
68 | .unique_tuple = unknown_unique_tuple, | 68 | .unique_tuple = unknown_unique_tuple, |
diff --git a/net/ipv4/netfilter/ipt_CONNMARK.c b/net/ipv4/netfilter/ipt_CONNMARK.c index 1346380213..05d66ab594 100644 --- a/net/ipv4/netfilter/ipt_CONNMARK.c +++ b/net/ipv4/netfilter/ipt_CONNMARK.c | |||
@@ -109,6 +109,7 @@ static struct ipt_target ipt_connmark_reg = { | |||
109 | 109 | ||
110 | static int __init init(void) | 110 | static int __init init(void) |
111 | { | 111 | { |
112 | need_ip_conntrack(); | ||
112 | return ipt_register_target(&ipt_connmark_reg); | 113 | return ipt_register_target(&ipt_connmark_reg); |
113 | } | 114 | } |
114 | 115 | ||
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index f3f0013a95..72b7c22e1e 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c | |||
@@ -2112,7 +2112,6 @@ void __init tcp_init(void) | |||
2112 | sysctl_tcp_max_orphans >>= (3 - order); | 2112 | sysctl_tcp_max_orphans >>= (3 - order); |
2113 | sysctl_max_syn_backlog = 128; | 2113 | sysctl_max_syn_backlog = 128; |
2114 | } | 2114 | } |
2115 | tcp_hashinfo.port_rover = sysctl_local_port_range[0] - 1; | ||
2116 | 2115 | ||
2117 | sysctl_tcp_mem[0] = 768 << order; | 2116 | sysctl_tcp_mem[0] = 768 << order; |
2118 | sysctl_tcp_mem[1] = 1024 << order; | 2117 | sysctl_tcp_mem[1] = 1024 << order; |
diff --git a/net/ipv4/tcp_bic.c b/net/ipv4/tcp_bic.c index 6d80e063c1..ae35e06090 100644 --- a/net/ipv4/tcp_bic.c +++ b/net/ipv4/tcp_bic.c | |||
@@ -27,7 +27,7 @@ | |||
27 | */ | 27 | */ |
28 | 28 | ||
29 | static int fast_convergence = 1; | 29 | static int fast_convergence = 1; |
30 | static int max_increment = 32; | 30 | static int max_increment = 16; |
31 | static int low_window = 14; | 31 | static int low_window = 14; |
32 | static int beta = 819; /* = 819/1024 (BICTCP_BETA_SCALE) */ | 32 | static int beta = 819; /* = 819/1024 (BICTCP_BETA_SCALE) */ |
33 | static int low_utilization_threshold = 153; | 33 | static int low_utilization_threshold = 153; |
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index c85819d847..49d67cd75e 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c | |||
@@ -93,8 +93,6 @@ struct inet_hashinfo __cacheline_aligned tcp_hashinfo = { | |||
93 | .lhash_lock = RW_LOCK_UNLOCKED, | 93 | .lhash_lock = RW_LOCK_UNLOCKED, |
94 | .lhash_users = ATOMIC_INIT(0), | 94 | .lhash_users = ATOMIC_INIT(0), |
95 | .lhash_wait = __WAIT_QUEUE_HEAD_INITIALIZER(tcp_hashinfo.lhash_wait), | 95 | .lhash_wait = __WAIT_QUEUE_HEAD_INITIALIZER(tcp_hashinfo.lhash_wait), |
96 | .portalloc_lock = SPIN_LOCK_UNLOCKED, | ||
97 | .port_rover = 1024 - 1, | ||
98 | }; | 96 | }; |
99 | 97 | ||
100 | static int tcp_v4_get_port(struct sock *sk, unsigned short snum) | 98 | static int tcp_v4_get_port(struct sock *sk, unsigned short snum) |
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 41edc14851..2c5f57299d 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c | |||
@@ -2163,7 +2163,7 @@ static int addrconf_ifdown(struct net_device *dev, int how) | |||
2163 | 2163 | ||
2164 | /* Step 5: netlink notification of this interface */ | 2164 | /* Step 5: netlink notification of this interface */ |
2165 | idev->tstamp = jiffies; | 2165 | idev->tstamp = jiffies; |
2166 | inet6_ifinfo_notify(RTM_NEWLINK, idev); | 2166 | inet6_ifinfo_notify(RTM_DELLINK, idev); |
2167 | 2167 | ||
2168 | /* Shot the device (if unregistered) */ | 2168 | /* Shot the device (if unregistered) */ |
2169 | 2169 | ||
diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c index 966b2372aa..f15e04ad02 100644 --- a/net/ipv6/mcast.c +++ b/net/ipv6/mcast.c | |||
@@ -545,8 +545,10 @@ int ip6_mc_msfilter(struct sock *sk, struct group_filter *gsf) | |||
545 | sock_kfree_s(sk, newpsl, IP6_SFLSIZE(newpsl->sl_max)); | 545 | sock_kfree_s(sk, newpsl, IP6_SFLSIZE(newpsl->sl_max)); |
546 | goto done; | 546 | goto done; |
547 | } | 547 | } |
548 | } else | 548 | } else { |
549 | newpsl = NULL; | 549 | newpsl = NULL; |
550 | (void) ip6_mc_add_src(idev, group, gsf->gf_fmode, 0, NULL, 0); | ||
551 | } | ||
550 | psl = pmc->sflist; | 552 | psl = pmc->sflist; |
551 | if (psl) { | 553 | if (psl) { |
552 | (void) ip6_mc_del_src(idev, group, pmc->sfmode, | 554 | (void) ip6_mc_del_src(idev, group, pmc->sfmode, |
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index d693cb988b..d746d3b27e 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c | |||
@@ -114,16 +114,9 @@ static int tcp_v6_get_port(struct sock *sk, unsigned short snum) | |||
114 | int low = sysctl_local_port_range[0]; | 114 | int low = sysctl_local_port_range[0]; |
115 | int high = sysctl_local_port_range[1]; | 115 | int high = sysctl_local_port_range[1]; |
116 | int remaining = (high - low) + 1; | 116 | int remaining = (high - low) + 1; |
117 | int rover; | 117 | int rover = net_random() % (high - low) + low; |
118 | 118 | ||
119 | spin_lock(&tcp_hashinfo.portalloc_lock); | 119 | do { |
120 | if (tcp_hashinfo.port_rover < low) | ||
121 | rover = low; | ||
122 | else | ||
123 | rover = tcp_hashinfo.port_rover; | ||
124 | do { rover++; | ||
125 | if (rover > high) | ||
126 | rover = low; | ||
127 | head = &tcp_hashinfo.bhash[inet_bhashfn(rover, tcp_hashinfo.bhash_size)]; | 120 | head = &tcp_hashinfo.bhash[inet_bhashfn(rover, tcp_hashinfo.bhash_size)]; |
128 | spin_lock(&head->lock); | 121 | spin_lock(&head->lock); |
129 | inet_bind_bucket_for_each(tb, node, &head->chain) | 122 | inet_bind_bucket_for_each(tb, node, &head->chain) |
@@ -132,9 +125,9 @@ static int tcp_v6_get_port(struct sock *sk, unsigned short snum) | |||
132 | break; | 125 | break; |
133 | next: | 126 | next: |
134 | spin_unlock(&head->lock); | 127 | spin_unlock(&head->lock); |
128 | if (++rover > high) | ||
129 | rover = low; | ||
135 | } while (--remaining > 0); | 130 | } while (--remaining > 0); |
136 | tcp_hashinfo.port_rover = rover; | ||
137 | spin_unlock(&tcp_hashinfo.portalloc_lock); | ||
138 | 131 | ||
139 | /* Exhausted local port range during search? It is not | 132 | /* Exhausted local port range during search? It is not |
140 | * possible for us to be holding one of the bind hash | 133 | * possible for us to be holding one of the bind hash |
diff --git a/net/netfilter/nf_queue.c b/net/netfilter/nf_queue.c index d10d552d9c..d3a4f30a7f 100644 --- a/net/netfilter/nf_queue.c +++ b/net/netfilter/nf_queue.c | |||
@@ -117,7 +117,7 @@ int nf_queue(struct sk_buff **skb, | |||
117 | 117 | ||
118 | /* QUEUE == DROP if noone is waiting, to be safe. */ | 118 | /* QUEUE == DROP if noone is waiting, to be safe. */ |
119 | read_lock(&queue_handler_lock); | 119 | read_lock(&queue_handler_lock); |
120 | if (!queue_handler[pf]->outfn) { | 120 | if (!queue_handler[pf] || !queue_handler[pf]->outfn) { |
121 | read_unlock(&queue_handler_lock); | 121 | read_unlock(&queue_handler_lock); |
122 | kfree_skb(*skb); | 122 | kfree_skb(*skb); |
123 | return 1; | 123 | return 1; |
diff --git a/net/netfilter/nfnetlink_log.c b/net/netfilter/nfnetlink_log.c index efcd10f996..d194676f36 100644 --- a/net/netfilter/nfnetlink_log.c +++ b/net/netfilter/nfnetlink_log.c | |||
@@ -146,11 +146,10 @@ instance_create(u_int16_t group_num, int pid) | |||
146 | goto out_unlock; | 146 | goto out_unlock; |
147 | } | 147 | } |
148 | 148 | ||
149 | inst = kmalloc(sizeof(*inst), GFP_ATOMIC); | 149 | inst = kzalloc(sizeof(*inst), GFP_ATOMIC); |
150 | if (!inst) | 150 | if (!inst) |
151 | goto out_unlock; | 151 | goto out_unlock; |
152 | 152 | ||
153 | memset(inst, 0, sizeof(*inst)); | ||
154 | INIT_HLIST_NODE(&inst->hlist); | 153 | INIT_HLIST_NODE(&inst->hlist); |
155 | inst->lock = SPIN_LOCK_UNLOCKED; | 154 | inst->lock = SPIN_LOCK_UNLOCKED; |
156 | /* needs to be two, since we _put() after creation */ | 155 | /* needs to be two, since we _put() after creation */ |
@@ -962,10 +961,9 @@ static int nful_open(struct inode *inode, struct file *file) | |||
962 | struct iter_state *is; | 961 | struct iter_state *is; |
963 | int ret; | 962 | int ret; |
964 | 963 | ||
965 | is = kmalloc(sizeof(*is), GFP_KERNEL); | 964 | is = kzalloc(sizeof(*is), GFP_KERNEL); |
966 | if (!is) | 965 | if (!is) |
967 | return -ENOMEM; | 966 | return -ENOMEM; |
968 | memset(is, 0, sizeof(*is)); | ||
969 | ret = seq_open(file, &nful_seq_ops); | 967 | ret = seq_open(file, &nful_seq_ops); |
970 | if (ret < 0) | 968 | if (ret < 0) |
971 | goto out_free; | 969 | goto out_free; |
diff --git a/net/netfilter/nfnetlink_queue.c b/net/netfilter/nfnetlink_queue.c index eaa44c4956..f065a6c949 100644 --- a/net/netfilter/nfnetlink_queue.c +++ b/net/netfilter/nfnetlink_queue.c | |||
@@ -136,11 +136,10 @@ instance_create(u_int16_t queue_num, int pid) | |||
136 | goto out_unlock; | 136 | goto out_unlock; |
137 | } | 137 | } |
138 | 138 | ||
139 | inst = kmalloc(sizeof(*inst), GFP_ATOMIC); | 139 | inst = kzalloc(sizeof(*inst), GFP_ATOMIC); |
140 | if (!inst) | 140 | if (!inst) |
141 | goto out_unlock; | 141 | goto out_unlock; |
142 | 142 | ||
143 | memset(inst, 0, sizeof(*inst)); | ||
144 | inst->queue_num = queue_num; | 143 | inst->queue_num = queue_num; |
145 | inst->peer_pid = pid; | 144 | inst->peer_pid = pid; |
146 | inst->queue_maxlen = NFQNL_QMAX_DEFAULT; | 145 | inst->queue_maxlen = NFQNL_QMAX_DEFAULT; |
@@ -1036,10 +1035,9 @@ static int nfqnl_open(struct inode *inode, struct file *file) | |||
1036 | struct iter_state *is; | 1035 | struct iter_state *is; |
1037 | int ret; | 1036 | int ret; |
1038 | 1037 | ||
1039 | is = kmalloc(sizeof(*is), GFP_KERNEL); | 1038 | is = kzalloc(sizeof(*is), GFP_KERNEL); |
1040 | if (!is) | 1039 | if (!is) |
1041 | return -ENOMEM; | 1040 | return -ENOMEM; |
1042 | memset(is, 0, sizeof(*is)); | ||
1043 | ret = seq_open(file, &nfqnl_seq_ops); | 1041 | ret = seq_open(file, &nfqnl_seq_ops); |
1044 | if (ret < 0) | 1042 | if (ret < 0) |
1045 | goto out_free; | 1043 | goto out_free; |
diff --git a/net/sched/Kconfig b/net/sched/Kconfig index 81510da317..7f34e7fd76 100644 --- a/net/sched/Kconfig +++ b/net/sched/Kconfig | |||
@@ -2,13 +2,15 @@ | |||
2 | # Traffic control configuration. | 2 | # Traffic control configuration. |
3 | # | 3 | # |
4 | 4 | ||
5 | menuconfig NET_SCHED | 5 | menu "QoS and/or fair queueing" |
6 | |||
7 | config NET_SCHED | ||
6 | bool "QoS and/or fair queueing" | 8 | bool "QoS and/or fair queueing" |
7 | ---help--- | 9 | ---help--- |
8 | When the kernel has several packets to send out over a network | 10 | When the kernel has several packets to send out over a network |
9 | device, it has to decide which ones to send first, which ones to | 11 | device, it has to decide which ones to send first, which ones to |
10 | delay, and which ones to drop. This is the job of the packet | 12 | delay, and which ones to drop. This is the job of the queueing |
11 | scheduler, and several different algorithms for how to do this | 13 | disciplines, several different algorithms for how to do this |
12 | "fairly" have been proposed. | 14 | "fairly" have been proposed. |
13 | 15 | ||
14 | If you say N here, you will get the standard packet scheduler, which | 16 | If you say N here, you will get the standard packet scheduler, which |
@@ -23,13 +25,13 @@ menuconfig NET_SCHED | |||
23 | To administer these schedulers, you'll need the user-level utilities | 25 | To administer these schedulers, you'll need the user-level utilities |
24 | from the package iproute2+tc at <ftp://ftp.tux.org/pub/net/ip-routing/>. | 26 | from the package iproute2+tc at <ftp://ftp.tux.org/pub/net/ip-routing/>. |
25 | That package also contains some documentation; for more, check out | 27 | That package also contains some documentation; for more, check out |
26 | <http://snafu.freedom.org/linux2.2/iproute-notes.html>. | 28 | <http://linux-net.osdl.org/index.php/Iproute2>. |
27 | 29 | ||
28 | This Quality of Service (QoS) support will enable you to use | 30 | This Quality of Service (QoS) support will enable you to use |
29 | Differentiated Services (diffserv) and Resource Reservation Protocol | 31 | Differentiated Services (diffserv) and Resource Reservation Protocol |
30 | (RSVP) on your Linux router if you also say Y to "QoS support", | 32 | (RSVP) on your Linux router if you also say Y to the corresponding |
31 | "Packet classifier API" and to some classifiers below. Documentation | 33 | classifiers below. Documentation and software is at |
32 | and software is at <http://diffserv.sourceforge.net/>. | 34 | <http://diffserv.sourceforge.net/>. |
33 | 35 | ||
34 | If you say Y here and to "/proc file system" below, you will be able | 36 | If you say Y here and to "/proc file system" below, you will be able |
35 | to read status information about packet schedulers from the file | 37 | to read status information about packet schedulers from the file |
@@ -42,7 +44,7 @@ choice | |||
42 | prompt "Packet scheduler clock source" | 44 | prompt "Packet scheduler clock source" |
43 | depends on NET_SCHED | 45 | depends on NET_SCHED |
44 | default NET_SCH_CLK_JIFFIES | 46 | default NET_SCH_CLK_JIFFIES |
45 | help | 47 | ---help--- |
46 | Packet schedulers need a monotonic clock that increments at a static | 48 | Packet schedulers need a monotonic clock that increments at a static |
47 | rate. The kernel provides several suitable interfaces, each with | 49 | rate. The kernel provides several suitable interfaces, each with |
48 | different properties: | 50 | different properties: |
@@ -56,7 +58,7 @@ choice | |||
56 | 58 | ||
57 | config NET_SCH_CLK_JIFFIES | 59 | config NET_SCH_CLK_JIFFIES |
58 | bool "Timer interrupt" | 60 | bool "Timer interrupt" |
59 | help | 61 | ---help--- |
60 | Say Y here if you want to use the timer interrupt (jiffies) as clock | 62 | Say Y here if you want to use the timer interrupt (jiffies) as clock |
61 | source. This clock source is fast, synchronized on all processors and | 63 | source. This clock source is fast, synchronized on all processors and |
62 | handles cpu clock frequency changes, but its resolution is too low | 64 | handles cpu clock frequency changes, but its resolution is too low |
@@ -64,7 +66,7 @@ config NET_SCH_CLK_JIFFIES | |||
64 | 66 | ||
65 | config NET_SCH_CLK_GETTIMEOFDAY | 67 | config NET_SCH_CLK_GETTIMEOFDAY |
66 | bool "gettimeofday" | 68 | bool "gettimeofday" |
67 | help | 69 | ---help--- |
68 | Say Y here if you want to use gettimeofday as clock source. This clock | 70 | Say Y here if you want to use gettimeofday as clock source. This clock |
69 | source has high resolution, is synchronized on all processors and | 71 | source has high resolution, is synchronized on all processors and |
70 | handles cpu clock frequency changes, but it is slow. | 72 | handles cpu clock frequency changes, but it is slow. |
@@ -77,7 +79,7 @@ config NET_SCH_CLK_GETTIMEOFDAY | |||
77 | config NET_SCH_CLK_CPU | 79 | config NET_SCH_CLK_CPU |
78 | bool "CPU cycle counter" | 80 | bool "CPU cycle counter" |
79 | depends on ((X86_TSC || X86_64) && !SMP) || ALPHA || SPARC64 || PPC64 || IA64 | 81 | depends on ((X86_TSC || X86_64) && !SMP) || ALPHA || SPARC64 || PPC64 || IA64 |
80 | help | 82 | ---help--- |
81 | Say Y here if you want to use the CPU's cycle counter as clock source. | 83 | Say Y here if you want to use the CPU's cycle counter as clock source. |
82 | This is a cheap and high resolution clock source, but on some | 84 | This is a cheap and high resolution clock source, but on some |
83 | architectures it is not synchronized on all processors and doesn't | 85 | architectures it is not synchronized on all processors and doesn't |
@@ -95,134 +97,129 @@ config NET_SCH_CLK_CPU | |||
95 | 97 | ||
96 | endchoice | 98 | endchoice |
97 | 99 | ||
100 | comment "Queueing/Scheduling" | ||
101 | depends on NET_SCHED | ||
102 | |||
98 | config NET_SCH_CBQ | 103 | config NET_SCH_CBQ |
99 | tristate "CBQ packet scheduler" | 104 | tristate "Class Based Queueing (CBQ)" |
100 | depends on NET_SCHED | 105 | depends on NET_SCHED |
101 | ---help--- | 106 | ---help--- |
102 | Say Y here if you want to use the Class-Based Queueing (CBQ) packet | 107 | Say Y here if you want to use the Class-Based Queueing (CBQ) packet |
103 | scheduling algorithm for some of your network devices. This | 108 | scheduling algorithm. This algorithm classifies the waiting packets |
104 | algorithm classifies the waiting packets into a tree-like hierarchy | 109 | into a tree-like hierarchy of classes; the leaves of this tree are |
105 | of classes; the leaves of this tree are in turn scheduled by | 110 | in turn scheduled by separate algorithms. |
106 | separate algorithms (called "disciplines" in this context). | ||
107 | 111 | ||
108 | See the top of <file:net/sched/sch_cbq.c> for references about the | 112 | See the top of <file:net/sched/sch_cbq.c> for more details. |
109 | CBQ algorithm. | ||
110 | 113 | ||
111 | CBQ is a commonly used scheduler, so if you're unsure, you should | 114 | CBQ is a commonly used scheduler, so if you're unsure, you should |
112 | say Y here. Then say Y to all the queueing algorithms below that you | 115 | say Y here. Then say Y to all the queueing algorithms below that you |
113 | want to use as CBQ disciplines. Then say Y to "Packet classifier | 116 | want to use as leaf disciplines. |
114 | API" and say Y to all the classifiers you want to use; a classifier | ||
115 | is a routine that allows you to sort your outgoing traffic into | ||
116 | classes based on a certain criterion. | ||
117 | 117 | ||
118 | To compile this code as a module, choose M here: the | 118 | To compile this code as a module, choose M here: the |
119 | module will be called sch_cbq. | 119 | module will be called sch_cbq. |
120 | 120 | ||
121 | config NET_SCH_HTB | 121 | config NET_SCH_HTB |
122 | tristate "HTB packet scheduler" | 122 | tristate "Hierarchical Token Bucket (HTB)" |
123 | depends on NET_SCHED | 123 | depends on NET_SCHED |
124 | ---help--- | 124 | ---help--- |
125 | Say Y here if you want to use the Hierarchical Token Buckets (HTB) | 125 | Say Y here if you want to use the Hierarchical Token Buckets (HTB) |
126 | packet scheduling algorithm for some of your network devices. See | 126 | packet scheduling algorithm. See |
127 | <http://luxik.cdi.cz/~devik/qos/htb/> for complete manual and | 127 | <http://luxik.cdi.cz/~devik/qos/htb/> for complete manual and |
128 | in-depth articles. | 128 | in-depth articles. |
129 | 129 | ||
130 | HTB is very similar to the CBQ regarding its goals however is has | 130 | HTB is very similar to CBQ regarding its goals however is has |
131 | different properties and different algorithm. | 131 | different properties and different algorithm. |
132 | 132 | ||
133 | To compile this code as a module, choose M here: the | 133 | To compile this code as a module, choose M here: the |
134 | module will be called sch_htb. | 134 | module will be called sch_htb. |
135 | 135 | ||
136 | config NET_SCH_HFSC | 136 | config NET_SCH_HFSC |
137 | tristate "HFSC packet scheduler" | 137 | tristate "Hierarchical Fair Service Curve (HFSC)" |
138 | depends on NET_SCHED | 138 | depends on NET_SCHED |
139 | ---help--- | 139 | ---help--- |
140 | Say Y here if you want to use the Hierarchical Fair Service Curve | 140 | Say Y here if you want to use the Hierarchical Fair Service Curve |
141 | (HFSC) packet scheduling algorithm for some of your network devices. | 141 | (HFSC) packet scheduling algorithm. |
142 | 142 | ||
143 | To compile this code as a module, choose M here: the | 143 | To compile this code as a module, choose M here: the |
144 | module will be called sch_hfsc. | 144 | module will be called sch_hfsc. |
145 | 145 | ||
146 | #tristate ' H-PFQ packet scheduler' CONFIG_NET_SCH_HPFQ | ||
147 | config NET_SCH_ATM | 146 | config NET_SCH_ATM |
148 | tristate "ATM pseudo-scheduler" | 147 | tristate "ATM Virtual Circuits (ATM)" |
149 | depends on NET_SCHED && ATM | 148 | depends on NET_SCHED && ATM |
150 | ---help--- | 149 | ---help--- |
151 | Say Y here if you want to use the ATM pseudo-scheduler. This | 150 | Say Y here if you want to use the ATM pseudo-scheduler. This |
152 | provides a framework for invoking classifiers (aka "filters"), which | 151 | provides a framework for invoking classifiers, which in turn |
153 | in turn select classes of this queuing discipline. Each class maps | 152 | select classes of this queuing discipline. Each class maps |
154 | the flow(s) it is handling to a given virtual circuit (see the top of | 153 | the flow(s) it is handling to a given virtual circuit. |
155 | <file:net/sched/sch_atm.c>). | 154 | |
155 | See the top of <file:net/sched/sch_atm.c>) for more details. | ||
156 | 156 | ||
157 | To compile this code as a module, choose M here: the | 157 | To compile this code as a module, choose M here: the |
158 | module will be called sch_atm. | 158 | module will be called sch_atm. |
159 | 159 | ||
160 | config NET_SCH_PRIO | 160 | config NET_SCH_PRIO |
161 | tristate "The simplest PRIO pseudoscheduler" | 161 | tristate "Multi Band Priority Queueing (PRIO)" |
162 | depends on NET_SCHED | 162 | depends on NET_SCHED |
163 | help | 163 | ---help--- |
164 | Say Y here if you want to use an n-band priority queue packet | 164 | Say Y here if you want to use an n-band priority queue packet |
165 | "scheduler" for some of your network devices or as a leaf discipline | 165 | scheduler. |
166 | for the CBQ scheduling algorithm. If unsure, say Y. | ||
167 | 166 | ||
168 | To compile this code as a module, choose M here: the | 167 | To compile this code as a module, choose M here: the |
169 | module will be called sch_prio. | 168 | module will be called sch_prio. |
170 | 169 | ||
171 | config NET_SCH_RED | 170 | config NET_SCH_RED |
172 | tristate "RED queue" | 171 | tristate "Random Early Detection (RED)" |
173 | depends on NET_SCHED | 172 | depends on NET_SCHED |
174 | help | 173 | ---help--- |
175 | Say Y here if you want to use the Random Early Detection (RED) | 174 | Say Y here if you want to use the Random Early Detection (RED) |
176 | packet scheduling algorithm for some of your network devices (see | 175 | packet scheduling algorithm. |
177 | the top of <file:net/sched/sch_red.c> for details and references | 176 | |
178 | about the algorithm). | 177 | See the top of <file:net/sched/sch_red.c> for more details. |
179 | 178 | ||
180 | To compile this code as a module, choose M here: the | 179 | To compile this code as a module, choose M here: the |
181 | module will be called sch_red. | 180 | module will be called sch_red. |
182 | 181 | ||
183 | config NET_SCH_SFQ | 182 | config NET_SCH_SFQ |
184 | tristate "SFQ queue" | 183 | tristate "Stochastic Fairness Queueing (SFQ)" |
185 | depends on NET_SCHED | 184 | depends on NET_SCHED |
186 | ---help--- | 185 | ---help--- |
187 | Say Y here if you want to use the Stochastic Fairness Queueing (SFQ) | 186 | Say Y here if you want to use the Stochastic Fairness Queueing (SFQ) |
188 | packet scheduling algorithm for some of your network devices or as a | 187 | packet scheduling algorithm . |
189 | leaf discipline for the CBQ scheduling algorithm (see the top of | 188 | |
190 | <file:net/sched/sch_sfq.c> for details and references about the SFQ | 189 | See the top of <file:net/sched/sch_sfq.c> for more details. |
191 | algorithm). | ||
192 | 190 | ||
193 | To compile this code as a module, choose M here: the | 191 | To compile this code as a module, choose M here: the |
194 | module will be called sch_sfq. | 192 | module will be called sch_sfq. |
195 | 193 | ||
196 | config NET_SCH_TEQL | 194 | config NET_SCH_TEQL |
197 | tristate "TEQL queue" | 195 | tristate "True Link Equalizer (TEQL)" |
198 | depends on NET_SCHED | 196 | depends on NET_SCHED |
199 | ---help--- | 197 | ---help--- |
200 | Say Y here if you want to use the True Link Equalizer (TLE) packet | 198 | Say Y here if you want to use the True Link Equalizer (TLE) packet |
201 | scheduling algorithm for some of your network devices or as a leaf | 199 | scheduling algorithm. This queueing discipline allows the combination |
202 | discipline for the CBQ scheduling algorithm. This queueing | 200 | of several physical devices into one virtual device. |
203 | discipline allows the combination of several physical devices into | 201 | |
204 | one virtual device. (see the top of <file:net/sched/sch_teql.c> for | 202 | See the top of <file:net/sched/sch_teql.c> for more details. |
205 | details). | ||
206 | 203 | ||
207 | To compile this code as a module, choose M here: the | 204 | To compile this code as a module, choose M here: the |
208 | module will be called sch_teql. | 205 | module will be called sch_teql. |
209 | 206 | ||
210 | config NET_SCH_TBF | 207 | config NET_SCH_TBF |
211 | tristate "TBF queue" | 208 | tristate "Token Bucket Filter (TBF)" |
212 | depends on NET_SCHED | 209 | depends on NET_SCHED |
213 | help | 210 | ---help--- |
214 | Say Y here if you want to use the Simple Token Bucket Filter (TBF) | 211 | Say Y here if you want to use the Token Bucket Filter (TBF) packet |
215 | packet scheduling algorithm for some of your network devices or as a | 212 | scheduling algorithm. |
216 | leaf discipline for the CBQ scheduling algorithm (see the top of | 213 | |
217 | <file:net/sched/sch_tbf.c> for a description of the TBF algorithm). | 214 | See the top of <file:net/sched/sch_tbf.c> for more details. |
218 | 215 | ||
219 | To compile this code as a module, choose M here: the | 216 | To compile this code as a module, choose M here: the |
220 | module will be called sch_tbf. | 217 | module will be called sch_tbf. |
221 | 218 | ||
222 | config NET_SCH_GRED | 219 | config NET_SCH_GRED |
223 | tristate "GRED queue" | 220 | tristate "Generic Random Early Detection (GRED)" |
224 | depends on NET_SCHED | 221 | depends on NET_SCHED |
225 | help | 222 | ---help--- |
226 | Say Y here if you want to use the Generic Random Early Detection | 223 | Say Y here if you want to use the Generic Random Early Detection |
227 | (GRED) packet scheduling algorithm for some of your network devices | 224 | (GRED) packet scheduling algorithm for some of your network devices |
228 | (see the top of <file:net/sched/sch_red.c> for details and | 225 | (see the top of <file:net/sched/sch_red.c> for details and |
@@ -232,9 +229,9 @@ config NET_SCH_GRED | |||
232 | module will be called sch_gred. | 229 | module will be called sch_gred. |
233 | 230 | ||
234 | config NET_SCH_DSMARK | 231 | config NET_SCH_DSMARK |
235 | tristate "Diffserv field marker" | 232 | tristate "Differentiated Services marker (DSMARK)" |
236 | depends on NET_SCHED | 233 | depends on NET_SCHED |
237 | help | 234 | ---help--- |
238 | Say Y if you want to schedule packets according to the | 235 | Say Y if you want to schedule packets according to the |
239 | Differentiated Services architecture proposed in RFC 2475. | 236 | Differentiated Services architecture proposed in RFC 2475. |
240 | Technical information on this method, with pointers to associated | 237 | Technical information on this method, with pointers to associated |
@@ -244,9 +241,9 @@ config NET_SCH_DSMARK | |||
244 | module will be called sch_dsmark. | 241 | module will be called sch_dsmark. |
245 | 242 | ||
246 | config NET_SCH_NETEM | 243 | config NET_SCH_NETEM |
247 | tristate "Network emulator" | 244 | tristate "Network emulator (NETEM)" |
248 | depends on NET_SCHED | 245 | depends on NET_SCHED |
249 | help | 246 | ---help--- |
250 | Say Y if you want to emulate network delay, loss, and packet | 247 | Say Y if you want to emulate network delay, loss, and packet |
251 | re-ordering. This is often useful to simulate networks when | 248 | re-ordering. This is often useful to simulate networks when |
252 | testing applications or protocols. | 249 | testing applications or protocols. |
@@ -259,58 +256,23 @@ config NET_SCH_NETEM | |||
259 | config NET_SCH_INGRESS | 256 | config NET_SCH_INGRESS |
260 | tristate "Ingress Qdisc" | 257 | tristate "Ingress Qdisc" |
261 | depends on NET_SCHED | 258 | depends on NET_SCHED |
262 | help | 259 | ---help--- |
263 | If you say Y here, you will be able to police incoming bandwidth | 260 | Say Y here if you want to use classifiers for incoming packets. |
264 | and drop packets when this bandwidth exceeds your desired rate. | ||
265 | If unsure, say Y. | 261 | If unsure, say Y. |
266 | 262 | ||
267 | To compile this code as a module, choose M here: the | 263 | To compile this code as a module, choose M here: the |
268 | module will be called sch_ingress. | 264 | module will be called sch_ingress. |
269 | 265 | ||
270 | config NET_QOS | 266 | comment "Classification" |
271 | bool "QoS support" | ||
272 | depends on NET_SCHED | 267 | depends on NET_SCHED |
273 | ---help--- | ||
274 | Say Y here if you want to include Quality Of Service scheduling | ||
275 | features, which means that you will be able to request certain | ||
276 | rate-of-flow limits for your network devices. | ||
277 | |||
278 | This Quality of Service (QoS) support will enable you to use | ||
279 | Differentiated Services (diffserv) and Resource Reservation Protocol | ||
280 | (RSVP) on your Linux router if you also say Y to "Packet classifier | ||
281 | API" and to some classifiers below. Documentation and software is at | ||
282 | <http://diffserv.sourceforge.net/>. | ||
283 | |||
284 | Note that the answer to this question won't directly affect the | ||
285 | kernel: saying N will just cause the configurator to skip all | ||
286 | the questions about QoS support. | ||
287 | |||
288 | config NET_ESTIMATOR | ||
289 | bool "Rate estimator" | ||
290 | depends on NET_QOS | ||
291 | help | ||
292 | In order for Quality of Service scheduling to work, the current | ||
293 | rate-of-flow for a network device has to be estimated; if you say Y | ||
294 | here, the kernel will do just that. | ||
295 | 268 | ||
296 | config NET_CLS | 269 | config NET_CLS |
297 | bool "Packet classifier API" | 270 | boolean |
298 | depends on NET_SCHED | ||
299 | ---help--- | ||
300 | The CBQ scheduling algorithm requires that network packets which are | ||
301 | scheduled to be sent out over a network device be classified | ||
302 | according to some criterion. If you say Y here, you will get a | ||
303 | choice of several different packet classifiers with the following | ||
304 | questions. | ||
305 | |||
306 | This will enable you to use Differentiated Services (diffserv) and | ||
307 | Resource Reservation Protocol (RSVP) on your Linux router. | ||
308 | Documentation and software is at | ||
309 | <http://diffserv.sourceforge.net/>. | ||
310 | 271 | ||
311 | config NET_CLS_BASIC | 272 | config NET_CLS_BASIC |
312 | tristate "Basic classifier" | 273 | tristate "Elementary classification (BASIC)" |
313 | depends on NET_CLS | 274 | depends NET_SCHED |
275 | select NET_CLS | ||
314 | ---help--- | 276 | ---help--- |
315 | Say Y here if you want to be able to classify packets using | 277 | Say Y here if you want to be able to classify packets using |
316 | only extended matches and actions. | 278 | only extended matches and actions. |
@@ -319,24 +281,25 @@ config NET_CLS_BASIC | |||
319 | module will be called cls_basic. | 281 | module will be called cls_basic. |
320 | 282 | ||
321 | config NET_CLS_TCINDEX | 283 | config NET_CLS_TCINDEX |
322 | tristate "TC index classifier" | 284 | tristate "Traffic-Control Index (TCINDEX)" |
323 | depends on NET_CLS | 285 | depends NET_SCHED |
324 | help | 286 | select NET_CLS |
325 | If you say Y here, you will be able to classify outgoing packets | 287 | ---help--- |
326 | according to the tc_index field of the skb. You will want this | 288 | Say Y here if you want to be able to classify packets based on |
327 | feature if you want to implement Differentiated Services using | 289 | traffic control indices. You will want this feature if you want |
328 | sch_dsmark. If unsure, say Y. | 290 | to implement Differentiated Services together with DSMARK. |
329 | 291 | ||
330 | To compile this code as a module, choose M here: the | 292 | To compile this code as a module, choose M here: the |
331 | module will be called cls_tcindex. | 293 | module will be called cls_tcindex. |
332 | 294 | ||
333 | config NET_CLS_ROUTE4 | 295 | config NET_CLS_ROUTE4 |
334 | tristate "Routing table based classifier" | 296 | tristate "Routing decision (ROUTE)" |
335 | depends on NET_CLS | 297 | depends NET_SCHED |
336 | select NET_CLS_ROUTE | 298 | select NET_CLS_ROUTE |
337 | help | 299 | select NET_CLS |
338 | If you say Y here, you will be able to classify outgoing packets | 300 | ---help--- |
339 | according to the route table entry they matched. If unsure, say Y. | 301 | If you say Y here, you will be able to classify packets |
302 | according to the route table entry they matched. | ||
340 | 303 | ||
341 | To compile this code as a module, choose M here: the | 304 | To compile this code as a module, choose M here: the |
342 | module will be called cls_route. | 305 | module will be called cls_route. |
@@ -346,58 +309,45 @@ config NET_CLS_ROUTE | |||
346 | default n | 309 | default n |
347 | 310 | ||
348 | config NET_CLS_FW | 311 | config NET_CLS_FW |
349 | tristate "Firewall based classifier" | 312 | tristate "Netfilter mark (FW)" |
350 | depends on NET_CLS | 313 | depends NET_SCHED |
351 | help | 314 | select NET_CLS |
352 | If you say Y here, you will be able to classify outgoing packets | 315 | ---help--- |
353 | according to firewall criteria you specified. | 316 | If you say Y here, you will be able to classify packets |
317 | according to netfilter/firewall marks. | ||
354 | 318 | ||
355 | To compile this code as a module, choose M here: the | 319 | To compile this code as a module, choose M here: the |
356 | module will be called cls_fw. | 320 | module will be called cls_fw. |
357 | 321 | ||
358 | config NET_CLS_U32 | 322 | config NET_CLS_U32 |
359 | tristate "U32 classifier" | 323 | tristate "Universal 32bit comparisons w/ hashing (U32)" |
360 | depends on NET_CLS | 324 | depends NET_SCHED |
361 | help | 325 | select NET_CLS |
362 | If you say Y here, you will be able to classify outgoing packets | 326 | ---help--- |
363 | according to their destination address. If unsure, say Y. | 327 | Say Y here to be able to classify packetes using a universal |
328 | 32bit pieces based comparison scheme. | ||
364 | 329 | ||
365 | To compile this code as a module, choose M here: the | 330 | To compile this code as a module, choose M here: the |
366 | module will be called cls_u32. | 331 | module will be called cls_u32. |
367 | 332 | ||
368 | config CLS_U32_PERF | 333 | config CLS_U32_PERF |
369 | bool "U32 classifier performance counters" | 334 | bool "Performance counters support" |
370 | depends on NET_CLS_U32 | 335 | depends on NET_CLS_U32 |
371 | help | 336 | ---help--- |
372 | gathers stats that could be used to tune u32 classifier performance. | 337 | Say Y here to make u32 gather additional statistics useful for |
373 | Requires a new iproute2 | 338 | fine tuning u32 classifiers. |
374 | You MUST NOT turn this on if you dont have an update iproute2. | ||
375 | |||
376 | config NET_CLS_IND | ||
377 | bool "classify input device (slows things u32/fw) " | ||
378 | depends on NET_CLS_U32 || NET_CLS_FW | ||
379 | help | ||
380 | This option will be killed eventually when a | ||
381 | metadata action appears because it slows things a little | ||
382 | Available only for u32 and fw classifiers. | ||
383 | Requires a new iproute2 | ||
384 | You MUST NOT turn this on if you dont have an update iproute2. | ||
385 | 339 | ||
386 | config CLS_U32_MARK | 340 | config CLS_U32_MARK |
387 | bool "Use nfmark as a key in U32 classifier" | 341 | bool "Netfilter marks support" |
388 | depends on NET_CLS_U32 && NETFILTER | 342 | depends on NET_CLS_U32 && NETFILTER |
389 | help | 343 | ---help--- |
390 | This allows you to match mark in a u32 filter. | 344 | Say Y here to be able to use netfilter marks as u32 key. |
391 | Example: | ||
392 | tc filter add dev eth0 protocol ip parent 1:0 prio 5 u32 \ | ||
393 | match mark 0x0090 0xffff \ | ||
394 | match ip dst 4.4.4.4 \ | ||
395 | flowid 1:90 | ||
396 | You must use a new iproute2 to use this feature. | ||
397 | 345 | ||
398 | config NET_CLS_RSVP | 346 | config NET_CLS_RSVP |
399 | tristate "Special RSVP classifier" | 347 | tristate "IPv4 Resource Reservation Protocol (RSVP)" |
400 | depends on NET_CLS && NET_QOS | 348 | depends on NET_SCHED |
349 | select NET_CLS | ||
350 | select NET_ESTIMATOR | ||
401 | ---help--- | 351 | ---help--- |
402 | The Resource Reservation Protocol (RSVP) permits end systems to | 352 | The Resource Reservation Protocol (RSVP) permits end systems to |
403 | request a minimum and maximum data flow rate for a connection; this | 353 | request a minimum and maximum data flow rate for a connection; this |
@@ -410,31 +360,33 @@ config NET_CLS_RSVP | |||
410 | module will be called cls_rsvp. | 360 | module will be called cls_rsvp. |
411 | 361 | ||
412 | config NET_CLS_RSVP6 | 362 | config NET_CLS_RSVP6 |
413 | tristate "Special RSVP classifier for IPv6" | 363 | tristate "IPv6 Resource Reservation Protocol (RSVP6)" |
414 | depends on NET_CLS && NET_QOS | 364 | depends on NET_SCHED |
365 | select NET_CLS | ||
366 | select NET_ESTIMATOR | ||
415 | ---help--- | 367 | ---help--- |
416 | The Resource Reservation Protocol (RSVP) permits end systems to | 368 | The Resource Reservation Protocol (RSVP) permits end systems to |
417 | request a minimum and maximum data flow rate for a connection; this | 369 | request a minimum and maximum data flow rate for a connection; this |
418 | is important for real time data such as streaming sound or video. | 370 | is important for real time data such as streaming sound or video. |
419 | 371 | ||
420 | Say Y here if you want to be able to classify outgoing packets based | 372 | Say Y here if you want to be able to classify outgoing packets based |
421 | on their RSVP requests and you are using the new Internet Protocol | 373 | on their RSVP requests and you are using the IPv6. |
422 | IPv6 as opposed to the older and more common IPv4. | ||
423 | 374 | ||
424 | To compile this code as a module, choose M here: the | 375 | To compile this code as a module, choose M here: the |
425 | module will be called cls_rsvp6. | 376 | module will be called cls_rsvp6. |
426 | 377 | ||
427 | config NET_EMATCH | 378 | config NET_EMATCH |
428 | bool "Extended Matches" | 379 | bool "Extended Matches" |
429 | depends on NET_CLS | 380 | depends NET_SCHED |
381 | select NET_CLS | ||
430 | ---help--- | 382 | ---help--- |
431 | Say Y here if you want to use extended matches on top of classifiers | 383 | Say Y here if you want to use extended matches on top of classifiers |
432 | and select the extended matches below. | 384 | and select the extended matches below. |
433 | 385 | ||
434 | Extended matches are small classification helpers not worth writing | 386 | Extended matches are small classification helpers not worth writing |
435 | a separate classifier. | 387 | a separate classifier for. |
436 | 388 | ||
437 | You must have a recent version of the iproute2 tools in order to use | 389 | A recent version of the iproute2 package is required to use |
438 | extended matches. | 390 | extended matches. |
439 | 391 | ||
440 | config NET_EMATCH_STACK | 392 | config NET_EMATCH_STACK |
@@ -468,7 +420,7 @@ config NET_EMATCH_NBYTE | |||
468 | module will be called em_nbyte. | 420 | module will be called em_nbyte. |
469 | 421 | ||
470 | config NET_EMATCH_U32 | 422 | config NET_EMATCH_U32 |
471 | tristate "U32 hashing key" | 423 | tristate "U32 key" |
472 | depends on NET_EMATCH | 424 | depends on NET_EMATCH |
473 | ---help--- | 425 | ---help--- |
474 | Say Y here if you want to be able to classify packets using | 426 | Say Y here if you want to be able to classify packets using |
@@ -496,76 +448,120 @@ config NET_EMATCH_TEXT | |||
496 | select TEXTSEARCH_BM | 448 | select TEXTSEARCH_BM |
497 | select TEXTSEARCH_FSM | 449 | select TEXTSEARCH_FSM |
498 | ---help--- | 450 | ---help--- |
499 | Say Y here if you want to be ablt to classify packets based on | 451 | Say Y here if you want to be able to classify packets based on |
500 | textsearch comparisons. | 452 | textsearch comparisons. |
501 | 453 | ||
502 | To compile this code as a module, choose M here: the | 454 | To compile this code as a module, choose M here: the |
503 | module will be called em_text. | 455 | module will be called em_text. |
504 | 456 | ||
505 | config NET_CLS_ACT | 457 | config NET_CLS_ACT |
506 | bool "Packet ACTION" | 458 | bool "Actions" |
507 | depends on EXPERIMENTAL && NET_CLS && NET_QOS | 459 | depends on EXPERIMENTAL && NET_SCHED |
460 | select NET_ESTIMATOR | ||
508 | ---help--- | 461 | ---help--- |
509 | This option requires you have a new iproute2. It enables | 462 | Say Y here if you want to use traffic control actions. Actions |
510 | tc extensions which can be used with tc classifiers. | 463 | get attached to classifiers and are invoked after a successful |
511 | You MUST NOT turn this on if you dont have an update iproute2. | 464 | classification. They are used to overwrite the classification |
465 | result, instantly drop or redirect packets, etc. | ||
466 | |||
467 | A recent version of the iproute2 package is required to use | ||
468 | extended matches. | ||
512 | 469 | ||
513 | config NET_ACT_POLICE | 470 | config NET_ACT_POLICE |
514 | tristate "Policing Actions" | 471 | tristate "Traffic Policing" |
515 | depends on NET_CLS_ACT | 472 | depends on NET_CLS_ACT |
516 | ---help--- | 473 | ---help--- |
517 | If you are using a newer iproute2 select this one, otherwise use one | 474 | Say Y here if you want to do traffic policing, i.e. strict |
518 | below to select a policer. | 475 | bandwidth limiting. This action replaces the existing policing |
519 | You MUST NOT turn this on if you dont have an update iproute2. | 476 | module. |
477 | |||
478 | To compile this code as a module, choose M here: the | ||
479 | module will be called police. | ||
520 | 480 | ||
521 | config NET_ACT_GACT | 481 | config NET_ACT_GACT |
522 | tristate "generic Actions" | 482 | tristate "Generic actions" |
523 | depends on NET_CLS_ACT | 483 | depends on NET_CLS_ACT |
524 | ---help--- | 484 | ---help--- |
525 | You must have new iproute2 to use this feature. | 485 | Say Y here to take generic actions such as dropping and |
526 | This adds simple filtering actions like drop, accept etc. | 486 | accepting packets. |
487 | |||
488 | To compile this code as a module, choose M here: the | ||
489 | module will be called gact. | ||
527 | 490 | ||
528 | config GACT_PROB | 491 | config GACT_PROB |
529 | bool "generic Actions probability" | 492 | bool "Probability support" |
530 | depends on NET_ACT_GACT | 493 | depends on NET_ACT_GACT |
531 | ---help--- | 494 | ---help--- |
532 | Allows generic actions to be randomly or deterministically used. | 495 | Say Y here to use the generic action randomly or deterministically. |
533 | 496 | ||
534 | config NET_ACT_MIRRED | 497 | config NET_ACT_MIRRED |
535 | tristate "Packet In/Egress redirecton/mirror Actions" | 498 | tristate "Redirecting and Mirroring" |
536 | depends on NET_CLS_ACT | 499 | depends on NET_CLS_ACT |
537 | ---help--- | 500 | ---help--- |
538 | requires new iproute2 | 501 | Say Y here to allow packets to be mirrored or redirected to |
539 | This allows packets to be mirrored or redirected to netdevices | 502 | other devices. |
503 | |||
504 | To compile this code as a module, choose M here: the | ||
505 | module will be called mirred. | ||
540 | 506 | ||
541 | config NET_ACT_IPT | 507 | config NET_ACT_IPT |
542 | tristate "iptables Actions" | 508 | tristate "IPtables targets" |
543 | depends on NET_CLS_ACT && NETFILTER && IP_NF_IPTABLES | 509 | depends on NET_CLS_ACT && NETFILTER && IP_NF_IPTABLES |
544 | ---help--- | 510 | ---help--- |
545 | requires new iproute2 | 511 | Say Y here to be able to invoke iptables targets after succesful |
546 | This allows iptables targets to be used by tc filters | 512 | classification. |
513 | |||
514 | To compile this code as a module, choose M here: the | ||
515 | module will be called ipt. | ||
547 | 516 | ||
548 | config NET_ACT_PEDIT | 517 | config NET_ACT_PEDIT |
549 | tristate "Generic Packet Editor Actions" | 518 | tristate "Packet Editing" |
550 | depends on NET_CLS_ACT | 519 | depends on NET_CLS_ACT |
551 | ---help--- | 520 | ---help--- |
552 | requires new iproute2 | 521 | Say Y here if you want to mangle the content of packets. |
553 | This allows for packets to be generically edited | ||
554 | 522 | ||
555 | config NET_CLS_POLICE | 523 | To compile this code as a module, choose M here: the |
556 | bool "Traffic policing (needed for in/egress)" | 524 | module will be called pedit. |
557 | depends on NET_CLS && NET_QOS && NET_CLS_ACT!=y | ||
558 | help | ||
559 | Say Y to support traffic policing (bandwidth limits). Needed for | ||
560 | ingress and egress rate limiting. | ||
561 | 525 | ||
562 | config NET_ACT_SIMP | 526 | config NET_ACT_SIMP |
563 | tristate "Simple action" | 527 | tristate "Simple Example (Debug)" |
564 | depends on NET_CLS_ACT | 528 | depends on NET_CLS_ACT |
565 | ---help--- | 529 | ---help--- |
566 | You must have new iproute2 to use this feature. | 530 | Say Y here to add a simple action for demonstration purposes. |
567 | This adds a very simple action for demonstration purposes | 531 | It is meant as an example and for debugging purposes. It will |
568 | The idea is to give action authors a basic example to look at. | 532 | print a configured policy string followed by the packet count |
569 | All this action will do is print on the console the configured | 533 | to the console for every packet that passes by. |
570 | policy string followed by _ then packet count. | 534 | |
535 | If unsure, say N. | ||
536 | |||
537 | To compile this code as a module, choose M here: the | ||
538 | module will be called simple. | ||
539 | |||
540 | config NET_CLS_POLICE | ||
541 | bool "Traffic Policing (obsolete)" | ||
542 | depends on NET_SCHED && NET_CLS_ACT!=y | ||
543 | select NET_ESTIMATOR | ||
544 | ---help--- | ||
545 | Say Y here if you want to do traffic policing, i.e. strict | ||
546 | bandwidth limiting. This option is obsoleted by the traffic | ||
547 | policer implemented as action, it stays here for compatibility | ||
548 | reasons. | ||
549 | |||
550 | config NET_CLS_IND | ||
551 | bool "Incoming device classification" | ||
552 | depends on NET_SCHED && (NET_CLS_U32 || NET_CLS_FW) | ||
553 | ---help--- | ||
554 | Say Y here to extend the u32 and fw classifier to support | ||
555 | classification based on the incoming device. This option is | ||
556 | likely to disappear in favour of the metadata ematch. | ||
557 | |||
558 | config NET_ESTIMATOR | ||
559 | bool "Rate estimator" | ||
560 | depends on NET_SCHED | ||
561 | ---help--- | ||
562 | Say Y here to allow using rate estimators to estimate the current | ||
563 | rate-of-flow for network devices, queues, etc. This module is | ||
564 | automaticaly selected if needed but can be selected manually for | ||
565 | statstical purposes. | ||
571 | 566 | ||
567 | endmenu | ||
diff --git a/net/sched/sch_gred.c b/net/sched/sch_gred.c index 25c171c327..29a2dd9f30 100644 --- a/net/sched/sch_gred.c +++ b/net/sched/sch_gred.c | |||
@@ -15,247 +15,281 @@ | |||
15 | * from Ren Liu | 15 | * from Ren Liu |
16 | * - More error checks | 16 | * - More error checks |
17 | * | 17 | * |
18 | * | 18 | * For all the glorious comments look at include/net/red.h |
19 | * | ||
20 | * For all the glorious comments look at Alexey's sch_red.c | ||
21 | */ | 19 | */ |
22 | 20 | ||
23 | #include <linux/config.h> | 21 | #include <linux/config.h> |
24 | #include <linux/module.h> | 22 | #include <linux/module.h> |
25 | #include <asm/uaccess.h> | ||
26 | #include <asm/system.h> | ||
27 | #include <linux/bitops.h> | ||
28 | #include <linux/types.h> | 23 | #include <linux/types.h> |
29 | #include <linux/kernel.h> | 24 | #include <linux/kernel.h> |
30 | #include <linux/sched.h> | ||
31 | #include <linux/string.h> | ||
32 | #include <linux/mm.h> | ||
33 | #include <linux/socket.h> | ||
34 | #include <linux/sockios.h> | ||
35 | #include <linux/in.h> | ||
36 | #include <linux/errno.h> | ||
37 | #include <linux/interrupt.h> | ||
38 | #include <linux/if_ether.h> | ||
39 | #include <linux/inet.h> | ||
40 | #include <linux/netdevice.h> | 25 | #include <linux/netdevice.h> |
41 | #include <linux/etherdevice.h> | ||
42 | #include <linux/notifier.h> | ||
43 | #include <net/ip.h> | ||
44 | #include <net/route.h> | ||
45 | #include <linux/skbuff.h> | 26 | #include <linux/skbuff.h> |
46 | #include <net/sock.h> | ||
47 | #include <net/pkt_sched.h> | 27 | #include <net/pkt_sched.h> |
28 | #include <net/red.h> | ||
48 | 29 | ||
49 | #if 1 /* control */ | 30 | #define GRED_DEF_PRIO (MAX_DPs / 2) |
50 | #define DPRINTK(format,args...) printk(KERN_DEBUG format,##args) | 31 | #define GRED_VQ_MASK (MAX_DPs - 1) |
51 | #else | ||
52 | #define DPRINTK(format,args...) | ||
53 | #endif | ||
54 | |||
55 | #if 0 /* data */ | ||
56 | #define D2PRINTK(format,args...) printk(KERN_DEBUG format,##args) | ||
57 | #else | ||
58 | #define D2PRINTK(format,args...) | ||
59 | #endif | ||
60 | 32 | ||
61 | struct gred_sched_data; | 33 | struct gred_sched_data; |
62 | struct gred_sched; | 34 | struct gred_sched; |
63 | 35 | ||
64 | struct gred_sched_data | 36 | struct gred_sched_data |
65 | { | 37 | { |
66 | /* Parameters */ | ||
67 | u32 limit; /* HARD maximal queue length */ | 38 | u32 limit; /* HARD maximal queue length */ |
68 | u32 qth_min; /* Min average length threshold: A scaled */ | ||
69 | u32 qth_max; /* Max average length threshold: A scaled */ | ||
70 | u32 DP; /* the drop pramaters */ | 39 | u32 DP; /* the drop pramaters */ |
71 | char Wlog; /* log(W) */ | ||
72 | char Plog; /* random number bits */ | ||
73 | u32 Scell_max; | ||
74 | u32 Rmask; | ||
75 | u32 bytesin; /* bytes seen on virtualQ so far*/ | 40 | u32 bytesin; /* bytes seen on virtualQ so far*/ |
76 | u32 packetsin; /* packets seen on virtualQ so far*/ | 41 | u32 packetsin; /* packets seen on virtualQ so far*/ |
77 | u32 backlog; /* bytes on the virtualQ */ | 42 | u32 backlog; /* bytes on the virtualQ */ |
78 | u32 forced; /* packets dropped for exceeding limits */ | 43 | u8 prio; /* the prio of this vq */ |
79 | u32 early; /* packets dropped as a warning */ | 44 | |
80 | u32 other; /* packets dropped by invoking drop() */ | 45 | struct red_parms parms; |
81 | u32 pdrop; /* packets dropped because we exceeded physical queue limits */ | 46 | struct red_stats stats; |
82 | char Scell_log; | 47 | }; |
83 | u8 Stab[256]; | 48 | |
84 | u8 prio; /* the prio of this vq */ | 49 | enum { |
85 | 50 | GRED_WRED_MODE = 1, | |
86 | /* Variables */ | 51 | GRED_RIO_MODE, |
87 | unsigned long qave; /* Average queue length: A scaled */ | ||
88 | int qcount; /* Packets since last random number generation */ | ||
89 | u32 qR; /* Cached random number */ | ||
90 | |||
91 | psched_time_t qidlestart; /* Start of idle period */ | ||
92 | }; | 52 | }; |
93 | 53 | ||
94 | struct gred_sched | 54 | struct gred_sched |
95 | { | 55 | { |
96 | struct gred_sched_data *tab[MAX_DPs]; | 56 | struct gred_sched_data *tab[MAX_DPs]; |
97 | u32 DPs; | 57 | unsigned long flags; |
98 | u32 def; | 58 | u32 red_flags; |
99 | u8 initd; | 59 | u32 DPs; |
100 | u8 grio; | 60 | u32 def; |
101 | u8 eqp; | 61 | struct red_parms wred_set; |
102 | }; | 62 | }; |
103 | 63 | ||
104 | static int | 64 | static inline int gred_wred_mode(struct gred_sched *table) |
105 | gred_enqueue(struct sk_buff *skb, struct Qdisc* sch) | ||
106 | { | 65 | { |
107 | psched_time_t now; | 66 | return test_bit(GRED_WRED_MODE, &table->flags); |
108 | struct gred_sched_data *q=NULL; | 67 | } |
109 | struct gred_sched *t= qdisc_priv(sch); | 68 | |
110 | unsigned long qave=0; | 69 | static inline void gred_enable_wred_mode(struct gred_sched *table) |
111 | int i=0; | 70 | { |
71 | __set_bit(GRED_WRED_MODE, &table->flags); | ||
72 | } | ||
73 | |||
74 | static inline void gred_disable_wred_mode(struct gred_sched *table) | ||
75 | { | ||
76 | __clear_bit(GRED_WRED_MODE, &table->flags); | ||
77 | } | ||
78 | |||
79 | static inline int gred_rio_mode(struct gred_sched *table) | ||
80 | { | ||
81 | return test_bit(GRED_RIO_MODE, &table->flags); | ||
82 | } | ||
83 | |||
84 | static inline void gred_enable_rio_mode(struct gred_sched *table) | ||
85 | { | ||
86 | __set_bit(GRED_RIO_MODE, &table->flags); | ||
87 | } | ||
88 | |||
89 | static inline void gred_disable_rio_mode(struct gred_sched *table) | ||
90 | { | ||
91 | __clear_bit(GRED_RIO_MODE, &table->flags); | ||
92 | } | ||
93 | |||
94 | static inline int gred_wred_mode_check(struct Qdisc *sch) | ||
95 | { | ||
96 | struct gred_sched *table = qdisc_priv(sch); | ||
97 | int i; | ||
112 | 98 | ||
113 | if (!t->initd && skb_queue_len(&sch->q) < (sch->dev->tx_queue_len ? : 1)) { | 99 | /* Really ugly O(n^2) but shouldn't be necessary too frequent. */ |
114 | D2PRINTK("NO GRED Queues setup yet! Enqueued anyway\n"); | 100 | for (i = 0; i < table->DPs; i++) { |
115 | goto do_enqueue; | 101 | struct gred_sched_data *q = table->tab[i]; |
102 | int n; | ||
103 | |||
104 | if (q == NULL) | ||
105 | continue; | ||
106 | |||
107 | for (n = 0; n < table->DPs; n++) | ||
108 | if (table->tab[n] && table->tab[n] != q && | ||
109 | table->tab[n]->prio == q->prio) | ||
110 | return 1; | ||
116 | } | 111 | } |
117 | 112 | ||
113 | return 0; | ||
114 | } | ||
115 | |||
116 | static inline unsigned int gred_backlog(struct gred_sched *table, | ||
117 | struct gred_sched_data *q, | ||
118 | struct Qdisc *sch) | ||
119 | { | ||
120 | if (gred_wred_mode(table)) | ||
121 | return sch->qstats.backlog; | ||
122 | else | ||
123 | return q->backlog; | ||
124 | } | ||
125 | |||
126 | static inline u16 tc_index_to_dp(struct sk_buff *skb) | ||
127 | { | ||
128 | return skb->tc_index & GRED_VQ_MASK; | ||
129 | } | ||
130 | |||
131 | static inline void gred_load_wred_set(struct gred_sched *table, | ||
132 | struct gred_sched_data *q) | ||
133 | { | ||
134 | q->parms.qavg = table->wred_set.qavg; | ||
135 | q->parms.qidlestart = table->wred_set.qidlestart; | ||
136 | } | ||
137 | |||
138 | static inline void gred_store_wred_set(struct gred_sched *table, | ||
139 | struct gred_sched_data *q) | ||
140 | { | ||
141 | table->wred_set.qavg = q->parms.qavg; | ||
142 | } | ||
143 | |||
144 | static inline int gred_use_ecn(struct gred_sched *t) | ||
145 | { | ||
146 | return t->red_flags & TC_RED_ECN; | ||
147 | } | ||
118 | 148 | ||
119 | if ( ((skb->tc_index&0xf) > (t->DPs -1)) || !(q=t->tab[skb->tc_index&0xf])) { | 149 | static inline int gred_use_harddrop(struct gred_sched *t) |
120 | printk("GRED: setting to default (%d)\n ",t->def); | 150 | { |
121 | if (!(q=t->tab[t->def])) { | 151 | return t->red_flags & TC_RED_HARDDROP; |
122 | DPRINTK("GRED: setting to default FAILED! dropping!! " | 152 | } |
123 | "(%d)\n ", t->def); | 153 | |
124 | goto drop; | 154 | static int gred_enqueue(struct sk_buff *skb, struct Qdisc* sch) |
155 | { | ||
156 | struct gred_sched_data *q=NULL; | ||
157 | struct gred_sched *t= qdisc_priv(sch); | ||
158 | unsigned long qavg = 0; | ||
159 | u16 dp = tc_index_to_dp(skb); | ||
160 | |||
161 | if (dp >= t->DPs || (q = t->tab[dp]) == NULL) { | ||
162 | dp = t->def; | ||
163 | |||
164 | if ((q = t->tab[dp]) == NULL) { | ||
165 | /* Pass through packets not assigned to a DP | ||
166 | * if no default DP has been configured. This | ||
167 | * allows for DP flows to be left untouched. | ||
168 | */ | ||
169 | if (skb_queue_len(&sch->q) < sch->dev->tx_queue_len) | ||
170 | return qdisc_enqueue_tail(skb, sch); | ||
171 | else | ||
172 | goto drop; | ||
125 | } | 173 | } |
174 | |||
126 | /* fix tc_index? --could be controvesial but needed for | 175 | /* fix tc_index? --could be controvesial but needed for |
127 | requeueing */ | 176 | requeueing */ |
128 | skb->tc_index=(skb->tc_index&0xfffffff0) | t->def; | 177 | skb->tc_index = (skb->tc_index & ~GRED_VQ_MASK) | dp; |
129 | } | 178 | } |
130 | 179 | ||
131 | D2PRINTK("gred_enqueue virtualQ 0x%x classid %x backlog %d " | 180 | /* sum up all the qaves of prios <= to ours to get the new qave */ |
132 | "general backlog %d\n",skb->tc_index&0xf,sch->handle,q->backlog, | 181 | if (!gred_wred_mode(t) && gred_rio_mode(t)) { |
133 | sch->qstats.backlog); | 182 | int i; |
134 | /* sum up all the qaves of prios <= to ours to get the new qave*/ | 183 | |
135 | if (!t->eqp && t->grio) { | 184 | for (i = 0; i < t->DPs; i++) { |
136 | for (i=0;i<t->DPs;i++) { | 185 | if (t->tab[i] && t->tab[i]->prio < q->prio && |
137 | if ((!t->tab[i]) || (i==q->DP)) | 186 | !red_is_idling(&t->tab[i]->parms)) |
138 | continue; | 187 | qavg +=t->tab[i]->parms.qavg; |
139 | |||
140 | if ((t->tab[i]->prio < q->prio) && (PSCHED_IS_PASTPERFECT(t->tab[i]->qidlestart))) | ||
141 | qave +=t->tab[i]->qave; | ||
142 | } | 188 | } |
143 | 189 | ||
144 | } | 190 | } |
145 | 191 | ||
146 | q->packetsin++; | 192 | q->packetsin++; |
147 | q->bytesin+=skb->len; | 193 | q->bytesin += skb->len; |
148 | 194 | ||
149 | if (t->eqp && t->grio) { | 195 | if (gred_wred_mode(t)) |
150 | qave=0; | 196 | gred_load_wred_set(t, q); |
151 | q->qave=t->tab[t->def]->qave; | ||
152 | q->qidlestart=t->tab[t->def]->qidlestart; | ||
153 | } | ||
154 | 197 | ||
155 | if (!PSCHED_IS_PASTPERFECT(q->qidlestart)) { | 198 | q->parms.qavg = red_calc_qavg(&q->parms, gred_backlog(t, q, sch)); |
156 | long us_idle; | ||
157 | PSCHED_GET_TIME(now); | ||
158 | us_idle = PSCHED_TDIFF_SAFE(now, q->qidlestart, q->Scell_max); | ||
159 | PSCHED_SET_PASTPERFECT(q->qidlestart); | ||
160 | 199 | ||
161 | q->qave >>= q->Stab[(us_idle>>q->Scell_log)&0xFF]; | 200 | if (red_is_idling(&q->parms)) |
162 | } else { | 201 | red_end_of_idle_period(&q->parms); |
163 | if (t->eqp) { | ||
164 | q->qave += sch->qstats.backlog - (q->qave >> q->Wlog); | ||
165 | } else { | ||
166 | q->qave += q->backlog - (q->qave >> q->Wlog); | ||
167 | } | ||
168 | 202 | ||
169 | } | 203 | if (gred_wred_mode(t)) |
170 | 204 | gred_store_wred_set(t, q); | |
171 | |||
172 | if (t->eqp && t->grio) | ||
173 | t->tab[t->def]->qave=q->qave; | ||
174 | |||
175 | if ((q->qave+qave) < q->qth_min) { | ||
176 | q->qcount = -1; | ||
177 | enqueue: | ||
178 | if (q->backlog + skb->len <= q->limit) { | ||
179 | q->backlog += skb->len; | ||
180 | do_enqueue: | ||
181 | __skb_queue_tail(&sch->q, skb); | ||
182 | sch->qstats.backlog += skb->len; | ||
183 | sch->bstats.bytes += skb->len; | ||
184 | sch->bstats.packets++; | ||
185 | return 0; | ||
186 | } else { | ||
187 | q->pdrop++; | ||
188 | } | ||
189 | 205 | ||
190 | drop: | 206 | switch (red_action(&q->parms, q->parms.qavg + qavg)) { |
191 | kfree_skb(skb); | 207 | case RED_DONT_MARK: |
192 | sch->qstats.drops++; | 208 | break; |
193 | return NET_XMIT_DROP; | 209 | |
194 | } | 210 | case RED_PROB_MARK: |
195 | if ((q->qave+qave) >= q->qth_max) { | 211 | sch->qstats.overlimits++; |
196 | q->qcount = -1; | 212 | if (!gred_use_ecn(t) || !INET_ECN_set_ce(skb)) { |
197 | sch->qstats.overlimits++; | 213 | q->stats.prob_drop++; |
198 | q->forced++; | 214 | goto congestion_drop; |
199 | goto drop; | 215 | } |
216 | |||
217 | q->stats.prob_mark++; | ||
218 | break; | ||
219 | |||
220 | case RED_HARD_MARK: | ||
221 | sch->qstats.overlimits++; | ||
222 | if (gred_use_harddrop(t) || !gred_use_ecn(t) || | ||
223 | !INET_ECN_set_ce(skb)) { | ||
224 | q->stats.forced_drop++; | ||
225 | goto congestion_drop; | ||
226 | } | ||
227 | q->stats.forced_mark++; | ||
228 | break; | ||
200 | } | 229 | } |
201 | if (++q->qcount) { | 230 | |
202 | if ((((qave+q->qave) - q->qth_min)>>q->Wlog)*q->qcount < q->qR) | 231 | if (q->backlog + skb->len <= q->limit) { |
203 | goto enqueue; | 232 | q->backlog += skb->len; |
204 | q->qcount = 0; | 233 | return qdisc_enqueue_tail(skb, sch); |
205 | q->qR = net_random()&q->Rmask; | ||
206 | sch->qstats.overlimits++; | ||
207 | q->early++; | ||
208 | goto drop; | ||
209 | } | 234 | } |
210 | q->qR = net_random()&q->Rmask; | 235 | |
211 | goto enqueue; | 236 | q->stats.pdrop++; |
237 | drop: | ||
238 | return qdisc_drop(skb, sch); | ||
239 | |||
240 | congestion_drop: | ||
241 | qdisc_drop(skb, sch); | ||
242 | return NET_XMIT_CN; | ||
212 | } | 243 | } |
213 | 244 | ||
214 | static int | 245 | static int gred_requeue(struct sk_buff *skb, struct Qdisc* sch) |
215 | gred_requeue(struct sk_buff *skb, struct Qdisc* sch) | ||
216 | { | 246 | { |
247 | struct gred_sched *t = qdisc_priv(sch); | ||
217 | struct gred_sched_data *q; | 248 | struct gred_sched_data *q; |
218 | struct gred_sched *t= qdisc_priv(sch); | 249 | u16 dp = tc_index_to_dp(skb); |
219 | q= t->tab[(skb->tc_index&0xf)]; | 250 | |
220 | /* error checking here -- probably unnecessary */ | 251 | if (dp >= t->DPs || (q = t->tab[dp]) == NULL) { |
221 | PSCHED_SET_PASTPERFECT(q->qidlestart); | 252 | if (net_ratelimit()) |
222 | 253 | printk(KERN_WARNING "GRED: Unable to relocate VQ 0x%x " | |
223 | __skb_queue_head(&sch->q, skb); | 254 | "for requeue, screwing up backlog.\n", |
224 | sch->qstats.backlog += skb->len; | 255 | tc_index_to_dp(skb)); |
225 | sch->qstats.requeues++; | 256 | } else { |
226 | q->backlog += skb->len; | 257 | if (red_is_idling(&q->parms)) |
227 | return 0; | 258 | red_end_of_idle_period(&q->parms); |
259 | q->backlog += skb->len; | ||
260 | } | ||
261 | |||
262 | return qdisc_requeue(skb, sch); | ||
228 | } | 263 | } |
229 | 264 | ||
230 | static struct sk_buff * | 265 | static struct sk_buff *gred_dequeue(struct Qdisc* sch) |
231 | gred_dequeue(struct Qdisc* sch) | ||
232 | { | 266 | { |
233 | struct sk_buff *skb; | 267 | struct sk_buff *skb; |
234 | struct gred_sched_data *q; | 268 | struct gred_sched *t = qdisc_priv(sch); |
235 | struct gred_sched *t= qdisc_priv(sch); | 269 | |
270 | skb = qdisc_dequeue_head(sch); | ||
236 | 271 | ||
237 | skb = __skb_dequeue(&sch->q); | ||
238 | if (skb) { | 272 | if (skb) { |
239 | sch->qstats.backlog -= skb->len; | 273 | struct gred_sched_data *q; |
240 | q= t->tab[(skb->tc_index&0xf)]; | 274 | u16 dp = tc_index_to_dp(skb); |
241 | if (q) { | 275 | |
242 | q->backlog -= skb->len; | 276 | if (dp >= t->DPs || (q = t->tab[dp]) == NULL) { |
243 | if (!q->backlog && !t->eqp) | 277 | if (net_ratelimit()) |
244 | PSCHED_GET_TIME(q->qidlestart); | 278 | printk(KERN_WARNING "GRED: Unable to relocate " |
279 | "VQ 0x%x after dequeue, screwing up " | ||
280 | "backlog.\n", tc_index_to_dp(skb)); | ||
245 | } else { | 281 | } else { |
246 | D2PRINTK("gred_dequeue: skb has bad tcindex %x\n",skb->tc_index&0xf); | 282 | q->backlog -= skb->len; |
283 | |||
284 | if (!q->backlog && !gred_wred_mode(t)) | ||
285 | red_start_of_idle_period(&q->parms); | ||
247 | } | 286 | } |
287 | |||
248 | return skb; | 288 | return skb; |
249 | } | 289 | } |
250 | 290 | ||
251 | if (t->eqp) { | 291 | if (gred_wred_mode(t) && !red_is_idling(&t->wred_set)) |
252 | q= t->tab[t->def]; | 292 | red_start_of_idle_period(&t->wred_set); |
253 | if (!q) | ||
254 | D2PRINTK("no default VQ set: Results will be " | ||
255 | "screwed up\n"); | ||
256 | else | ||
257 | PSCHED_GET_TIME(q->qidlestart); | ||
258 | } | ||
259 | 293 | ||
260 | return NULL; | 294 | return NULL; |
261 | } | 295 | } |
@@ -263,36 +297,34 @@ gred_dequeue(struct Qdisc* sch) | |||
263 | static unsigned int gred_drop(struct Qdisc* sch) | 297 | static unsigned int gred_drop(struct Qdisc* sch) |
264 | { | 298 | { |
265 | struct sk_buff *skb; | 299 | struct sk_buff *skb; |
300 | struct gred_sched *t = qdisc_priv(sch); | ||
266 | 301 | ||
267 | struct gred_sched_data *q; | 302 | skb = qdisc_dequeue_tail(sch); |
268 | struct gred_sched *t= qdisc_priv(sch); | ||
269 | |||
270 | skb = __skb_dequeue_tail(&sch->q); | ||
271 | if (skb) { | 303 | if (skb) { |
272 | unsigned int len = skb->len; | 304 | unsigned int len = skb->len; |
273 | sch->qstats.backlog -= len; | 305 | struct gred_sched_data *q; |
274 | sch->qstats.drops++; | 306 | u16 dp = tc_index_to_dp(skb); |
275 | q= t->tab[(skb->tc_index&0xf)]; | 307 | |
276 | if (q) { | 308 | if (dp >= t->DPs || (q = t->tab[dp]) == NULL) { |
277 | q->backlog -= len; | 309 | if (net_ratelimit()) |
278 | q->other++; | 310 | printk(KERN_WARNING "GRED: Unable to relocate " |
279 | if (!q->backlog && !t->eqp) | 311 | "VQ 0x%x while dropping, screwing up " |
280 | PSCHED_GET_TIME(q->qidlestart); | 312 | "backlog.\n", tc_index_to_dp(skb)); |
281 | } else { | 313 | } else { |
282 | D2PRINTK("gred_dequeue: skb has bad tcindex %x\n",skb->tc_index&0xf); | 314 | q->backlog -= len; |
315 | q->stats.other++; | ||
316 | |||
317 | if (!q->backlog && !gred_wred_mode(t)) | ||
318 | red_start_of_idle_period(&q->parms); | ||
283 | } | 319 | } |
284 | 320 | ||
285 | kfree_skb(skb); | 321 | qdisc_drop(skb, sch); |
286 | return len; | 322 | return len; |
287 | } | 323 | } |
288 | 324 | ||
289 | q=t->tab[t->def]; | 325 | if (gred_wred_mode(t) && !red_is_idling(&t->wred_set)) |
290 | if (!q) { | 326 | red_start_of_idle_period(&t->wred_set); |
291 | D2PRINTK("no default VQ set: Results might be screwed up\n"); | ||
292 | return 0; | ||
293 | } | ||
294 | 327 | ||
295 | PSCHED_GET_TIME(q->qidlestart); | ||
296 | return 0; | 328 | return 0; |
297 | 329 | ||
298 | } | 330 | } |
@@ -300,293 +332,241 @@ static unsigned int gred_drop(struct Qdisc* sch) | |||
300 | static void gred_reset(struct Qdisc* sch) | 332 | static void gred_reset(struct Qdisc* sch) |
301 | { | 333 | { |
302 | int i; | 334 | int i; |
303 | struct gred_sched_data *q; | 335 | struct gred_sched *t = qdisc_priv(sch); |
304 | struct gred_sched *t= qdisc_priv(sch); | 336 | |
337 | qdisc_reset_queue(sch); | ||
305 | 338 | ||
306 | __skb_queue_purge(&sch->q); | 339 | for (i = 0; i < t->DPs; i++) { |
340 | struct gred_sched_data *q = t->tab[i]; | ||
307 | 341 | ||
308 | sch->qstats.backlog = 0; | 342 | if (!q) |
343 | continue; | ||
309 | 344 | ||
310 | for (i=0;i<t->DPs;i++) { | 345 | red_restart(&q->parms); |
311 | q= t->tab[i]; | ||
312 | if (!q) | ||
313 | continue; | ||
314 | PSCHED_SET_PASTPERFECT(q->qidlestart); | ||
315 | q->qave = 0; | ||
316 | q->qcount = -1; | ||
317 | q->backlog = 0; | 346 | q->backlog = 0; |
318 | q->other=0; | ||
319 | q->forced=0; | ||
320 | q->pdrop=0; | ||
321 | q->early=0; | ||
322 | } | 347 | } |
323 | } | 348 | } |
324 | 349 | ||
325 | static int gred_change(struct Qdisc *sch, struct rtattr *opt) | 350 | static inline void gred_destroy_vq(struct gred_sched_data *q) |
351 | { | ||
352 | kfree(q); | ||
353 | } | ||
354 | |||
355 | static inline int gred_change_table_def(struct Qdisc *sch, struct rtattr *dps) | ||
326 | { | 356 | { |
327 | struct gred_sched *table = qdisc_priv(sch); | 357 | struct gred_sched *table = qdisc_priv(sch); |
328 | struct gred_sched_data *q; | ||
329 | struct tc_gred_qopt *ctl; | ||
330 | struct tc_gred_sopt *sopt; | 358 | struct tc_gred_sopt *sopt; |
331 | struct rtattr *tb[TCA_GRED_STAB]; | ||
332 | struct rtattr *tb2[TCA_GRED_DPS]; | ||
333 | int i; | 359 | int i; |
334 | 360 | ||
335 | if (opt == NULL || rtattr_parse_nested(tb, TCA_GRED_STAB, opt)) | 361 | if (dps == NULL || RTA_PAYLOAD(dps) < sizeof(*sopt)) |
336 | return -EINVAL; | 362 | return -EINVAL; |
337 | 363 | ||
338 | if (tb[TCA_GRED_PARMS-1] == 0 && tb[TCA_GRED_STAB-1] == 0) { | 364 | sopt = RTA_DATA(dps); |
339 | rtattr_parse_nested(tb2, TCA_GRED_DPS, opt); | 365 | |
366 | if (sopt->DPs > MAX_DPs || sopt->DPs == 0 || sopt->def_DP >= sopt->DPs) | ||
367 | return -EINVAL; | ||
340 | 368 | ||
341 | if (tb2[TCA_GRED_DPS-1] == 0) | 369 | sch_tree_lock(sch); |
342 | return -EINVAL; | 370 | table->DPs = sopt->DPs; |
371 | table->def = sopt->def_DP; | ||
372 | table->red_flags = sopt->flags; | ||
373 | |||
374 | /* | ||
375 | * Every entry point to GRED is synchronized with the above code | ||
376 | * and the DP is checked against DPs, i.e. shadowed VQs can no | ||
377 | * longer be found so we can unlock right here. | ||
378 | */ | ||
379 | sch_tree_unlock(sch); | ||
380 | |||
381 | if (sopt->grio) { | ||
382 | gred_enable_rio_mode(table); | ||
383 | gred_disable_wred_mode(table); | ||
384 | if (gred_wred_mode_check(sch)) | ||
385 | gred_enable_wred_mode(table); | ||
386 | } else { | ||
387 | gred_disable_rio_mode(table); | ||
388 | gred_disable_wred_mode(table); | ||
389 | } | ||
343 | 390 | ||
344 | sopt = RTA_DATA(tb2[TCA_GRED_DPS-1]); | 391 | for (i = table->DPs; i < MAX_DPs; i++) { |
345 | table->DPs=sopt->DPs; | 392 | if (table->tab[i]) { |
346 | table->def=sopt->def_DP; | 393 | printk(KERN_WARNING "GRED: Warning: Destroying " |
347 | table->grio=sopt->grio; | 394 | "shadowed VQ 0x%x\n", i); |
348 | table->initd=0; | 395 | gred_destroy_vq(table->tab[i]); |
349 | /* probably need to clear all the table DP entries as well */ | 396 | table->tab[i] = NULL; |
350 | return 0; | 397 | } |
351 | } | 398 | } |
352 | 399 | ||
400 | return 0; | ||
401 | } | ||
353 | 402 | ||
354 | if (!table->DPs || tb[TCA_GRED_PARMS-1] == 0 || tb[TCA_GRED_STAB-1] == 0 || | 403 | static inline int gred_change_vq(struct Qdisc *sch, int dp, |
355 | RTA_PAYLOAD(tb[TCA_GRED_PARMS-1]) < sizeof(*ctl) || | 404 | struct tc_gred_qopt *ctl, int prio, u8 *stab) |
356 | RTA_PAYLOAD(tb[TCA_GRED_STAB-1]) < 256) | 405 | { |
357 | return -EINVAL; | 406 | struct gred_sched *table = qdisc_priv(sch); |
407 | struct gred_sched_data *q; | ||
358 | 408 | ||
359 | ctl = RTA_DATA(tb[TCA_GRED_PARMS-1]); | 409 | if (table->tab[dp] == NULL) { |
360 | if (ctl->DP > MAX_DPs-1 ) { | 410 | table->tab[dp] = kmalloc(sizeof(*q), GFP_KERNEL); |
361 | /* misbehaving is punished! Put in the default drop probability */ | 411 | if (table->tab[dp] == NULL) |
362 | DPRINTK("\nGRED: DP %u not in the proper range fixed. New DP " | ||
363 | "set to default at %d\n",ctl->DP,table->def); | ||
364 | ctl->DP=table->def; | ||
365 | } | ||
366 | |||
367 | if (table->tab[ctl->DP] == NULL) { | ||
368 | table->tab[ctl->DP]=kmalloc(sizeof(struct gred_sched_data), | ||
369 | GFP_KERNEL); | ||
370 | if (NULL == table->tab[ctl->DP]) | ||
371 | return -ENOMEM; | 412 | return -ENOMEM; |
372 | memset(table->tab[ctl->DP], 0, (sizeof(struct gred_sched_data))); | 413 | memset(table->tab[dp], 0, sizeof(*q)); |
373 | } | ||
374 | q= table->tab[ctl->DP]; | ||
375 | |||
376 | if (table->grio) { | ||
377 | if (ctl->prio <=0) { | ||
378 | if (table->def && table->tab[table->def]) { | ||
379 | DPRINTK("\nGRED: DP %u does not have a prio" | ||
380 | "setting default to %d\n",ctl->DP, | ||
381 | table->tab[table->def]->prio); | ||
382 | q->prio=table->tab[table->def]->prio; | ||
383 | } else { | ||
384 | DPRINTK("\nGRED: DP %u does not have a prio" | ||
385 | " setting default to 8\n",ctl->DP); | ||
386 | q->prio=8; | ||
387 | } | ||
388 | } else { | ||
389 | q->prio=ctl->prio; | ||
390 | } | ||
391 | } else { | ||
392 | q->prio=8; | ||
393 | } | 414 | } |
394 | 415 | ||
395 | 416 | q = table->tab[dp]; | |
396 | q->DP=ctl->DP; | 417 | q->DP = dp; |
397 | q->Wlog = ctl->Wlog; | 418 | q->prio = prio; |
398 | q->Plog = ctl->Plog; | ||
399 | q->limit = ctl->limit; | 419 | q->limit = ctl->limit; |
400 | q->Scell_log = ctl->Scell_log; | ||
401 | q->Rmask = ctl->Plog < 32 ? ((1<<ctl->Plog) - 1) : ~0UL; | ||
402 | q->Scell_max = (255<<q->Scell_log); | ||
403 | q->qth_min = ctl->qth_min<<ctl->Wlog; | ||
404 | q->qth_max = ctl->qth_max<<ctl->Wlog; | ||
405 | q->qave=0; | ||
406 | q->backlog=0; | ||
407 | q->qcount = -1; | ||
408 | q->other=0; | ||
409 | q->forced=0; | ||
410 | q->pdrop=0; | ||
411 | q->early=0; | ||
412 | |||
413 | PSCHED_SET_PASTPERFECT(q->qidlestart); | ||
414 | memcpy(q->Stab, RTA_DATA(tb[TCA_GRED_STAB-1]), 256); | ||
415 | |||
416 | if ( table->initd && table->grio) { | ||
417 | /* this looks ugly but it's not in the fast path */ | ||
418 | for (i=0;i<table->DPs;i++) { | ||
419 | if ((!table->tab[i]) || (i==q->DP) ) | ||
420 | continue; | ||
421 | if (table->tab[i]->prio == q->prio ){ | ||
422 | /* WRED mode detected */ | ||
423 | table->eqp=1; | ||
424 | break; | ||
425 | } | ||
426 | } | ||
427 | } | ||
428 | 420 | ||
429 | if (!table->initd) { | 421 | if (q->backlog == 0) |
430 | table->initd=1; | 422 | red_end_of_idle_period(&q->parms); |
431 | /* | ||
432 | the first entry also goes into the default until | ||
433 | over-written | ||
434 | */ | ||
435 | |||
436 | if (table->tab[table->def] == NULL) { | ||
437 | table->tab[table->def]= | ||
438 | kmalloc(sizeof(struct gred_sched_data), GFP_KERNEL); | ||
439 | if (NULL == table->tab[table->def]) | ||
440 | return -ENOMEM; | ||
441 | |||
442 | memset(table->tab[table->def], 0, | ||
443 | (sizeof(struct gred_sched_data))); | ||
444 | } | ||
445 | q= table->tab[table->def]; | ||
446 | q->DP=table->def; | ||
447 | q->Wlog = ctl->Wlog; | ||
448 | q->Plog = ctl->Plog; | ||
449 | q->limit = ctl->limit; | ||
450 | q->Scell_log = ctl->Scell_log; | ||
451 | q->Rmask = ctl->Plog < 32 ? ((1<<ctl->Plog) - 1) : ~0UL; | ||
452 | q->Scell_max = (255<<q->Scell_log); | ||
453 | q->qth_min = ctl->qth_min<<ctl->Wlog; | ||
454 | q->qth_max = ctl->qth_max<<ctl->Wlog; | ||
455 | |||
456 | if (table->grio) | ||
457 | q->prio=table->tab[ctl->DP]->prio; | ||
458 | else | ||
459 | q->prio=8; | ||
460 | |||
461 | q->qcount = -1; | ||
462 | PSCHED_SET_PASTPERFECT(q->qidlestart); | ||
463 | memcpy(q->Stab, RTA_DATA(tb[TCA_GRED_STAB-1]), 256); | ||
464 | } | ||
465 | return 0; | ||
466 | 423 | ||
424 | red_set_parms(&q->parms, | ||
425 | ctl->qth_min, ctl->qth_max, ctl->Wlog, ctl->Plog, | ||
426 | ctl->Scell_log, stab); | ||
427 | |||
428 | return 0; | ||
467 | } | 429 | } |
468 | 430 | ||
469 | static int gred_init(struct Qdisc *sch, struct rtattr *opt) | 431 | static int gred_change(struct Qdisc *sch, struct rtattr *opt) |
470 | { | 432 | { |
471 | struct gred_sched *table = qdisc_priv(sch); | 433 | struct gred_sched *table = qdisc_priv(sch); |
472 | struct tc_gred_sopt *sopt; | 434 | struct tc_gred_qopt *ctl; |
473 | struct rtattr *tb[TCA_GRED_STAB]; | 435 | struct rtattr *tb[TCA_GRED_MAX]; |
474 | struct rtattr *tb2[TCA_GRED_DPS]; | 436 | int err = -EINVAL, prio = GRED_DEF_PRIO; |
437 | u8 *stab; | ||
475 | 438 | ||
476 | if (opt == NULL || rtattr_parse_nested(tb, TCA_GRED_STAB, opt)) | 439 | if (opt == NULL || rtattr_parse_nested(tb, TCA_GRED_MAX, opt)) |
477 | return -EINVAL; | 440 | return -EINVAL; |
478 | 441 | ||
479 | if (tb[TCA_GRED_PARMS-1] == 0 && tb[TCA_GRED_STAB-1] == 0) { | 442 | if (tb[TCA_GRED_PARMS-1] == NULL && tb[TCA_GRED_STAB-1] == NULL) |
480 | rtattr_parse_nested(tb2, TCA_GRED_DPS, opt); | 443 | return gred_change_table_def(sch, opt); |
444 | |||
445 | if (tb[TCA_GRED_PARMS-1] == NULL || | ||
446 | RTA_PAYLOAD(tb[TCA_GRED_PARMS-1]) < sizeof(*ctl) || | ||
447 | tb[TCA_GRED_STAB-1] == NULL || | ||
448 | RTA_PAYLOAD(tb[TCA_GRED_STAB-1]) < 256) | ||
449 | return -EINVAL; | ||
450 | |||
451 | ctl = RTA_DATA(tb[TCA_GRED_PARMS-1]); | ||
452 | stab = RTA_DATA(tb[TCA_GRED_STAB-1]); | ||
453 | |||
454 | if (ctl->DP >= table->DPs) | ||
455 | goto errout; | ||
481 | 456 | ||
482 | if (tb2[TCA_GRED_DPS-1] == 0) | 457 | if (gred_rio_mode(table)) { |
483 | return -EINVAL; | 458 | if (ctl->prio == 0) { |
459 | int def_prio = GRED_DEF_PRIO; | ||
484 | 460 | ||
485 | sopt = RTA_DATA(tb2[TCA_GRED_DPS-1]); | 461 | if (table->tab[table->def]) |
486 | table->DPs=sopt->DPs; | 462 | def_prio = table->tab[table->def]->prio; |
487 | table->def=sopt->def_DP; | 463 | |
488 | table->grio=sopt->grio; | 464 | printk(KERN_DEBUG "GRED: DP %u does not have a prio " |
489 | table->initd=0; | 465 | "setting default to %d\n", ctl->DP, def_prio); |
490 | return 0; | 466 | |
467 | prio = def_prio; | ||
468 | } else | ||
469 | prio = ctl->prio; | ||
470 | } | ||
471 | |||
472 | sch_tree_lock(sch); | ||
473 | |||
474 | err = gred_change_vq(sch, ctl->DP, ctl, prio, stab); | ||
475 | if (err < 0) | ||
476 | goto errout_locked; | ||
477 | |||
478 | if (gred_rio_mode(table)) { | ||
479 | gred_disable_wred_mode(table); | ||
480 | if (gred_wred_mode_check(sch)) | ||
481 | gred_enable_wred_mode(table); | ||
491 | } | 482 | } |
492 | 483 | ||
493 | DPRINTK("\n GRED_INIT error!\n"); | 484 | err = 0; |
494 | return -EINVAL; | 485 | |
486 | errout_locked: | ||
487 | sch_tree_unlock(sch); | ||
488 | errout: | ||
489 | return err; | ||
495 | } | 490 | } |
496 | 491 | ||
497 | static int gred_dump(struct Qdisc *sch, struct sk_buff *skb) | 492 | static int gred_init(struct Qdisc *sch, struct rtattr *opt) |
498 | { | 493 | { |
499 | unsigned long qave; | 494 | struct rtattr *tb[TCA_GRED_MAX]; |
500 | struct rtattr *rta; | ||
501 | struct tc_gred_qopt *opt = NULL ; | ||
502 | struct tc_gred_qopt *dst; | ||
503 | struct gred_sched *table = qdisc_priv(sch); | ||
504 | struct gred_sched_data *q; | ||
505 | int i; | ||
506 | unsigned char *b = skb->tail; | ||
507 | 495 | ||
508 | rta = (struct rtattr*)b; | 496 | if (opt == NULL || rtattr_parse_nested(tb, TCA_GRED_MAX, opt)) |
509 | RTA_PUT(skb, TCA_OPTIONS, 0, NULL); | 497 | return -EINVAL; |
510 | 498 | ||
511 | opt=kmalloc(sizeof(struct tc_gred_qopt)*MAX_DPs, GFP_KERNEL); | 499 | if (tb[TCA_GRED_PARMS-1] || tb[TCA_GRED_STAB-1]) |
500 | return -EINVAL; | ||
512 | 501 | ||
513 | if (opt == NULL) { | 502 | return gred_change_table_def(sch, tb[TCA_GRED_DPS-1]); |
514 | DPRINTK("gred_dump:failed to malloc for %Zd\n", | 503 | } |
515 | sizeof(struct tc_gred_qopt)*MAX_DPs); | ||
516 | goto rtattr_failure; | ||
517 | } | ||
518 | 504 | ||
519 | memset(opt, 0, (sizeof(struct tc_gred_qopt))*table->DPs); | 505 | static int gred_dump(struct Qdisc *sch, struct sk_buff *skb) |
506 | { | ||
507 | struct gred_sched *table = qdisc_priv(sch); | ||
508 | struct rtattr *parms, *opts = NULL; | ||
509 | int i; | ||
510 | struct tc_gred_sopt sopt = { | ||
511 | .DPs = table->DPs, | ||
512 | .def_DP = table->def, | ||
513 | .grio = gred_rio_mode(table), | ||
514 | .flags = table->red_flags, | ||
515 | }; | ||
520 | 516 | ||
521 | if (!table->initd) { | 517 | opts = RTA_NEST(skb, TCA_OPTIONS); |
522 | DPRINTK("NO GRED Queues setup!\n"); | 518 | RTA_PUT(skb, TCA_GRED_DPS, sizeof(sopt), &sopt); |
523 | } | 519 | parms = RTA_NEST(skb, TCA_GRED_PARMS); |
520 | |||
521 | for (i = 0; i < MAX_DPs; i++) { | ||
522 | struct gred_sched_data *q = table->tab[i]; | ||
523 | struct tc_gred_qopt opt; | ||
524 | 524 | ||
525 | for (i=0;i<MAX_DPs;i++) { | 525 | memset(&opt, 0, sizeof(opt)); |
526 | dst= &opt[i]; | ||
527 | q= table->tab[i]; | ||
528 | 526 | ||
529 | if (!q) { | 527 | if (!q) { |
530 | /* hack -- fix at some point with proper message | 528 | /* hack -- fix at some point with proper message |
531 | This is how we indicate to tc that there is no VQ | 529 | This is how we indicate to tc that there is no VQ |
532 | at this DP */ | 530 | at this DP */ |
533 | 531 | ||
534 | dst->DP=MAX_DPs+i; | 532 | opt.DP = MAX_DPs + i; |
535 | continue; | 533 | goto append_opt; |
536 | } | 534 | } |
537 | 535 | ||
538 | dst->limit=q->limit; | 536 | opt.limit = q->limit; |
539 | dst->qth_min=q->qth_min>>q->Wlog; | 537 | opt.DP = q->DP; |
540 | dst->qth_max=q->qth_max>>q->Wlog; | 538 | opt.backlog = q->backlog; |
541 | dst->DP=q->DP; | 539 | opt.prio = q->prio; |
542 | dst->backlog=q->backlog; | 540 | opt.qth_min = q->parms.qth_min >> q->parms.Wlog; |
543 | if (q->qave) { | 541 | opt.qth_max = q->parms.qth_max >> q->parms.Wlog; |
544 | if (table->eqp && table->grio) { | 542 | opt.Wlog = q->parms.Wlog; |
545 | q->qidlestart=table->tab[table->def]->qidlestart; | 543 | opt.Plog = q->parms.Plog; |
546 | q->qave=table->tab[table->def]->qave; | 544 | opt.Scell_log = q->parms.Scell_log; |
547 | } | 545 | opt.other = q->stats.other; |
548 | if (!PSCHED_IS_PASTPERFECT(q->qidlestart)) { | 546 | opt.early = q->stats.prob_drop; |
549 | long idle; | 547 | opt.forced = q->stats.forced_drop; |
550 | psched_time_t now; | 548 | opt.pdrop = q->stats.pdrop; |
551 | PSCHED_GET_TIME(now); | 549 | opt.packets = q->packetsin; |
552 | idle = PSCHED_TDIFF_SAFE(now, q->qidlestart, q->Scell_max); | 550 | opt.bytesin = q->bytesin; |
553 | qave = q->qave >> q->Stab[(idle>>q->Scell_log)&0xFF]; | 551 | |
554 | dst->qave = qave >> q->Wlog; | 552 | if (gred_wred_mode(table)) { |
555 | 553 | q->parms.qidlestart = | |
556 | } else { | 554 | table->tab[table->def]->parms.qidlestart; |
557 | dst->qave = q->qave >> q->Wlog; | 555 | q->parms.qavg = table->tab[table->def]->parms.qavg; |
558 | } | ||
559 | } else { | ||
560 | dst->qave = 0; | ||
561 | } | 556 | } |
562 | 557 | ||
563 | 558 | opt.qave = red_calc_qavg(&q->parms, q->parms.qavg); | |
564 | dst->Wlog = q->Wlog; | 559 | |
565 | dst->Plog = q->Plog; | 560 | append_opt: |
566 | dst->Scell_log = q->Scell_log; | 561 | RTA_APPEND(skb, sizeof(opt), &opt); |
567 | dst->other = q->other; | ||
568 | dst->forced = q->forced; | ||
569 | dst->early = q->early; | ||
570 | dst->pdrop = q->pdrop; | ||
571 | dst->prio = q->prio; | ||
572 | dst->packets=q->packetsin; | ||
573 | dst->bytesin=q->bytesin; | ||
574 | } | 562 | } |
575 | 563 | ||
576 | RTA_PUT(skb, TCA_GRED_PARMS, sizeof(struct tc_gred_qopt)*MAX_DPs, opt); | 564 | RTA_NEST_END(skb, parms); |
577 | rta->rta_len = skb->tail - b; | ||
578 | 565 | ||
579 | kfree(opt); | 566 | return RTA_NEST_END(skb, opts); |
580 | return skb->len; | ||
581 | 567 | ||
582 | rtattr_failure: | 568 | rtattr_failure: |
583 | if (opt) | 569 | return RTA_NEST_CANCEL(skb, opts); |
584 | kfree(opt); | ||
585 | DPRINTK("gred_dump: FAILURE!!!!\n"); | ||
586 | |||
587 | /* also free the opt struct here */ | ||
588 | skb_trim(skb, b - skb->data); | ||
589 | return -1; | ||
590 | } | 570 | } |
591 | 571 | ||
592 | static void gred_destroy(struct Qdisc *sch) | 572 | static void gred_destroy(struct Qdisc *sch) |
@@ -594,15 +574,13 @@ static void gred_destroy(struct Qdisc *sch) | |||
594 | struct gred_sched *table = qdisc_priv(sch); | 574 | struct gred_sched *table = qdisc_priv(sch); |
595 | int i; | 575 | int i; |
596 | 576 | ||
597 | for (i = 0;i < table->DPs; i++) { | 577 | for (i = 0; i < table->DPs; i++) { |
598 | if (table->tab[i]) | 578 | if (table->tab[i]) |
599 | kfree(table->tab[i]); | 579 | gred_destroy_vq(table->tab[i]); |
600 | } | 580 | } |
601 | } | 581 | } |
602 | 582 | ||
603 | static struct Qdisc_ops gred_qdisc_ops = { | 583 | static struct Qdisc_ops gred_qdisc_ops = { |
604 | .next = NULL, | ||
605 | .cl_ops = NULL, | ||
606 | .id = "gred", | 584 | .id = "gred", |
607 | .priv_size = sizeof(struct gred_sched), | 585 | .priv_size = sizeof(struct gred_sched), |
608 | .enqueue = gred_enqueue, | 586 | .enqueue = gred_enqueue, |
@@ -621,10 +599,13 @@ static int __init gred_module_init(void) | |||
621 | { | 599 | { |
622 | return register_qdisc(&gred_qdisc_ops); | 600 | return register_qdisc(&gred_qdisc_ops); |
623 | } | 601 | } |
624 | static void __exit gred_module_exit(void) | 602 | |
603 | static void __exit gred_module_exit(void) | ||
625 | { | 604 | { |
626 | unregister_qdisc(&gred_qdisc_ops); | 605 | unregister_qdisc(&gred_qdisc_ops); |
627 | } | 606 | } |
607 | |||
628 | module_init(gred_module_init) | 608 | module_init(gred_module_init) |
629 | module_exit(gred_module_exit) | 609 | module_exit(gred_module_exit) |
610 | |||
630 | MODULE_LICENSE("GPL"); | 611 | MODULE_LICENSE("GPL"); |
diff --git a/net/sched/sch_netem.c b/net/sched/sch_netem.c index bb9bf8d500..cdc8d28379 100644 --- a/net/sched/sch_netem.c +++ b/net/sched/sch_netem.c | |||
@@ -25,6 +25,8 @@ | |||
25 | 25 | ||
26 | #include <net/pkt_sched.h> | 26 | #include <net/pkt_sched.h> |
27 | 27 | ||
28 | #define VERSION "1.1" | ||
29 | |||
28 | /* Network Emulation Queuing algorithm. | 30 | /* Network Emulation Queuing algorithm. |
29 | ==================================== | 31 | ==================================== |
30 | 32 | ||
@@ -185,10 +187,13 @@ static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch) | |||
185 | || q->counter < q->gap /* inside last reordering gap */ | 187 | || q->counter < q->gap /* inside last reordering gap */ |
186 | || q->reorder < get_crandom(&q->reorder_cor)) { | 188 | || q->reorder < get_crandom(&q->reorder_cor)) { |
187 | psched_time_t now; | 189 | psched_time_t now; |
190 | psched_tdiff_t delay; | ||
191 | |||
192 | delay = tabledist(q->latency, q->jitter, | ||
193 | &q->delay_cor, q->delay_dist); | ||
194 | |||
188 | PSCHED_GET_TIME(now); | 195 | PSCHED_GET_TIME(now); |
189 | PSCHED_TADD2(now, tabledist(q->latency, q->jitter, | 196 | PSCHED_TADD2(now, delay, cb->time_to_send); |
190 | &q->delay_cor, q->delay_dist), | ||
191 | cb->time_to_send); | ||
192 | ++q->counter; | 197 | ++q->counter; |
193 | ret = q->qdisc->enqueue(skb, q->qdisc); | 198 | ret = q->qdisc->enqueue(skb, q->qdisc); |
194 | } else { | 199 | } else { |
@@ -248,24 +253,31 @@ static struct sk_buff *netem_dequeue(struct Qdisc *sch) | |||
248 | const struct netem_skb_cb *cb | 253 | const struct netem_skb_cb *cb |
249 | = (const struct netem_skb_cb *)skb->cb; | 254 | = (const struct netem_skb_cb *)skb->cb; |
250 | psched_time_t now; | 255 | psched_time_t now; |
251 | long delay; | ||
252 | 256 | ||
253 | /* if more time remaining? */ | 257 | /* if more time remaining? */ |
254 | PSCHED_GET_TIME(now); | 258 | PSCHED_GET_TIME(now); |
255 | delay = PSCHED_US2JIFFIE(PSCHED_TDIFF(cb->time_to_send, now)); | 259 | |
256 | pr_debug("netem_run: skb=%p delay=%ld\n", skb, delay); | 260 | if (PSCHED_TLESS(cb->time_to_send, now)) { |
257 | if (delay <= 0) { | ||
258 | pr_debug("netem_dequeue: return skb=%p\n", skb); | 261 | pr_debug("netem_dequeue: return skb=%p\n", skb); |
259 | sch->q.qlen--; | 262 | sch->q.qlen--; |
260 | sch->flags &= ~TCQ_F_THROTTLED; | 263 | sch->flags &= ~TCQ_F_THROTTLED; |
261 | return skb; | 264 | return skb; |
262 | } | 265 | } else { |
266 | psched_tdiff_t delay = PSCHED_TDIFF(cb->time_to_send, now); | ||
267 | |||
268 | if (q->qdisc->ops->requeue(skb, q->qdisc) != NET_XMIT_SUCCESS) { | ||
269 | sch->qstats.drops++; | ||
263 | 270 | ||
264 | mod_timer(&q->timer, jiffies + delay); | 271 | /* After this qlen is confused */ |
265 | sch->flags |= TCQ_F_THROTTLED; | 272 | printk(KERN_ERR "netem: queue discpline %s could not requeue\n", |
273 | q->qdisc->ops->id); | ||
266 | 274 | ||
267 | if (q->qdisc->ops->requeue(skb, q->qdisc) != 0) | 275 | sch->q.qlen--; |
268 | sch->qstats.drops++; | 276 | } |
277 | |||
278 | mod_timer(&q->timer, jiffies + PSCHED_US2JIFFIE(delay)); | ||
279 | sch->flags |= TCQ_F_THROTTLED; | ||
280 | } | ||
269 | } | 281 | } |
270 | 282 | ||
271 | return NULL; | 283 | return NULL; |
@@ -290,11 +302,16 @@ static void netem_reset(struct Qdisc *sch) | |||
290 | del_timer_sync(&q->timer); | 302 | del_timer_sync(&q->timer); |
291 | } | 303 | } |
292 | 304 | ||
305 | /* Pass size change message down to embedded FIFO */ | ||
293 | static int set_fifo_limit(struct Qdisc *q, int limit) | 306 | static int set_fifo_limit(struct Qdisc *q, int limit) |
294 | { | 307 | { |
295 | struct rtattr *rta; | 308 | struct rtattr *rta; |
296 | int ret = -ENOMEM; | 309 | int ret = -ENOMEM; |
297 | 310 | ||
311 | /* Hack to avoid sending change message to non-FIFO */ | ||
312 | if (strncmp(q->ops->id + 1, "fifo", 4) != 0) | ||
313 | return 0; | ||
314 | |||
298 | rta = kmalloc(RTA_LENGTH(sizeof(struct tc_fifo_qopt)), GFP_KERNEL); | 315 | rta = kmalloc(RTA_LENGTH(sizeof(struct tc_fifo_qopt)), GFP_KERNEL); |
299 | if (rta) { | 316 | if (rta) { |
300 | rta->rta_type = RTM_NEWQDISC; | 317 | rta->rta_type = RTM_NEWQDISC; |
@@ -426,6 +443,84 @@ static int netem_change(struct Qdisc *sch, struct rtattr *opt) | |||
426 | return 0; | 443 | return 0; |
427 | } | 444 | } |
428 | 445 | ||
446 | /* | ||
447 | * Special case version of FIFO queue for use by netem. | ||
448 | * It queues in order based on timestamps in skb's | ||
449 | */ | ||
450 | struct fifo_sched_data { | ||
451 | u32 limit; | ||
452 | }; | ||
453 | |||
454 | static int tfifo_enqueue(struct sk_buff *nskb, struct Qdisc *sch) | ||
455 | { | ||
456 | struct fifo_sched_data *q = qdisc_priv(sch); | ||
457 | struct sk_buff_head *list = &sch->q; | ||
458 | const struct netem_skb_cb *ncb | ||
459 | = (const struct netem_skb_cb *)nskb->cb; | ||
460 | struct sk_buff *skb; | ||
461 | |||
462 | if (likely(skb_queue_len(list) < q->limit)) { | ||
463 | skb_queue_reverse_walk(list, skb) { | ||
464 | const struct netem_skb_cb *cb | ||
465 | = (const struct netem_skb_cb *)skb->cb; | ||
466 | |||
467 | if (PSCHED_TLESS(cb->time_to_send, ncb->time_to_send)) | ||
468 | break; | ||
469 | } | ||
470 | |||
471 | __skb_queue_after(list, skb, nskb); | ||
472 | |||
473 | sch->qstats.backlog += nskb->len; | ||
474 | sch->bstats.bytes += nskb->len; | ||
475 | sch->bstats.packets++; | ||
476 | |||
477 | return NET_XMIT_SUCCESS; | ||
478 | } | ||
479 | |||
480 | return qdisc_drop(nskb, sch); | ||
481 | } | ||
482 | |||
483 | static int tfifo_init(struct Qdisc *sch, struct rtattr *opt) | ||
484 | { | ||
485 | struct fifo_sched_data *q = qdisc_priv(sch); | ||
486 | |||
487 | if (opt) { | ||
488 | struct tc_fifo_qopt *ctl = RTA_DATA(opt); | ||
489 | if (RTA_PAYLOAD(opt) < sizeof(*ctl)) | ||
490 | return -EINVAL; | ||
491 | |||
492 | q->limit = ctl->limit; | ||
493 | } else | ||
494 | q->limit = max_t(u32, sch->dev->tx_queue_len, 1); | ||
495 | |||
496 | return 0; | ||
497 | } | ||
498 | |||
499 | static int tfifo_dump(struct Qdisc *sch, struct sk_buff *skb) | ||
500 | { | ||
501 | struct fifo_sched_data *q = qdisc_priv(sch); | ||
502 | struct tc_fifo_qopt opt = { .limit = q->limit }; | ||
503 | |||
504 | RTA_PUT(skb, TCA_OPTIONS, sizeof(opt), &opt); | ||
505 | return skb->len; | ||
506 | |||
507 | rtattr_failure: | ||
508 | return -1; | ||
509 | } | ||
510 | |||
511 | static struct Qdisc_ops tfifo_qdisc_ops = { | ||
512 | .id = "tfifo", | ||
513 | .priv_size = sizeof(struct fifo_sched_data), | ||
514 | .enqueue = tfifo_enqueue, | ||
515 | .dequeue = qdisc_dequeue_head, | ||
516 | .requeue = qdisc_requeue, | ||
517 | .drop = qdisc_queue_drop, | ||
518 | .init = tfifo_init, | ||
519 | .reset = qdisc_reset_queue, | ||
520 | .change = tfifo_init, | ||
521 | .dump = tfifo_dump, | ||
522 | }; | ||
523 | |||
429 | static int netem_init(struct Qdisc *sch, struct rtattr *opt) | 524 | static int netem_init(struct Qdisc *sch, struct rtattr *opt) |
430 | { | 525 | { |
431 | struct netem_sched_data *q = qdisc_priv(sch); | 526 | struct netem_sched_data *q = qdisc_priv(sch); |
@@ -438,7 +533,7 @@ static int netem_init(struct Qdisc *sch, struct rtattr *opt) | |||
438 | q->timer.function = netem_watchdog; | 533 | q->timer.function = netem_watchdog; |
439 | q->timer.data = (unsigned long) sch; | 534 | q->timer.data = (unsigned long) sch; |
440 | 535 | ||
441 | q->qdisc = qdisc_create_dflt(sch->dev, &pfifo_qdisc_ops); | 536 | q->qdisc = qdisc_create_dflt(sch->dev, &tfifo_qdisc_ops); |
442 | if (!q->qdisc) { | 537 | if (!q->qdisc) { |
443 | pr_debug("netem: qdisc create failed\n"); | 538 | pr_debug("netem: qdisc create failed\n"); |
444 | return -ENOMEM; | 539 | return -ENOMEM; |
@@ -601,6 +696,7 @@ static struct Qdisc_ops netem_qdisc_ops = { | |||
601 | 696 | ||
602 | static int __init netem_module_init(void) | 697 | static int __init netem_module_init(void) |
603 | { | 698 | { |
699 | pr_info("netem: version " VERSION "\n"); | ||
604 | return register_qdisc(&netem_qdisc_ops); | 700 | return register_qdisc(&netem_qdisc_ops); |
605 | } | 701 | } |
606 | static void __exit netem_module_exit(void) | 702 | static void __exit netem_module_exit(void) |
diff --git a/net/sched/sch_red.c b/net/sched/sch_red.c index 7845d045ee..dccfa44c2d 100644 --- a/net/sched/sch_red.c +++ b/net/sched/sch_red.c | |||
@@ -9,76 +9,23 @@ | |||
9 | * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru> | 9 | * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru> |
10 | * | 10 | * |
11 | * Changes: | 11 | * Changes: |
12 | * J Hadi Salim <hadi@nortel.com> 980914: computation fixes | 12 | * J Hadi Salim 980914: computation fixes |
13 | * Alexey Makarenko <makar@phoenix.kharkov.ua> 990814: qave on idle link was calculated incorrectly. | 13 | * Alexey Makarenko <makar@phoenix.kharkov.ua> 990814: qave on idle link was calculated incorrectly. |
14 | * J Hadi Salim <hadi@nortelnetworks.com> 980816: ECN support | 14 | * J Hadi Salim 980816: ECN support |
15 | */ | 15 | */ |
16 | 16 | ||
17 | #include <linux/config.h> | 17 | #include <linux/config.h> |
18 | #include <linux/module.h> | 18 | #include <linux/module.h> |
19 | #include <asm/uaccess.h> | ||
20 | #include <asm/system.h> | ||
21 | #include <linux/bitops.h> | ||
22 | #include <linux/types.h> | 19 | #include <linux/types.h> |
23 | #include <linux/kernel.h> | 20 | #include <linux/kernel.h> |
24 | #include <linux/sched.h> | ||
25 | #include <linux/string.h> | ||
26 | #include <linux/mm.h> | ||
27 | #include <linux/socket.h> | ||
28 | #include <linux/sockios.h> | ||
29 | #include <linux/in.h> | ||
30 | #include <linux/errno.h> | ||
31 | #include <linux/interrupt.h> | ||
32 | #include <linux/if_ether.h> | ||
33 | #include <linux/inet.h> | ||
34 | #include <linux/netdevice.h> | 21 | #include <linux/netdevice.h> |
35 | #include <linux/etherdevice.h> | ||
36 | #include <linux/notifier.h> | ||
37 | #include <net/ip.h> | ||
38 | #include <net/route.h> | ||
39 | #include <linux/skbuff.h> | 22 | #include <linux/skbuff.h> |
40 | #include <net/sock.h> | ||
41 | #include <net/pkt_sched.h> | 23 | #include <net/pkt_sched.h> |
42 | #include <net/inet_ecn.h> | 24 | #include <net/inet_ecn.h> |
43 | #include <net/dsfield.h> | 25 | #include <net/red.h> |
44 | 26 | ||
45 | 27 | ||
46 | /* Random Early Detection (RED) algorithm. | 28 | /* Parameters, settable by user: |
47 | ======================================= | ||
48 | |||
49 | Source: Sally Floyd and Van Jacobson, "Random Early Detection Gateways | ||
50 | for Congestion Avoidance", 1993, IEEE/ACM Transactions on Networking. | ||
51 | |||
52 | This file codes a "divisionless" version of RED algorithm | ||
53 | as written down in Fig.17 of the paper. | ||
54 | |||
55 | Short description. | ||
56 | ------------------ | ||
57 | |||
58 | When a new packet arrives we calculate the average queue length: | ||
59 | |||
60 | avg = (1-W)*avg + W*current_queue_len, | ||
61 | |||
62 | W is the filter time constant (chosen as 2^(-Wlog)), it controls | ||
63 | the inertia of the algorithm. To allow larger bursts, W should be | ||
64 | decreased. | ||
65 | |||
66 | if (avg > th_max) -> packet marked (dropped). | ||
67 | if (avg < th_min) -> packet passes. | ||
68 | if (th_min < avg < th_max) we calculate probability: | ||
69 | |||
70 | Pb = max_P * (avg - th_min)/(th_max-th_min) | ||
71 | |||
72 | and mark (drop) packet with this probability. | ||
73 | Pb changes from 0 (at avg==th_min) to max_P (avg==th_max). | ||
74 | max_P should be small (not 1), usually 0.01..0.02 is good value. | ||
75 | |||
76 | max_P is chosen as a number, so that max_P/(th_max-th_min) | ||
77 | is a negative power of two in order arithmetics to contain | ||
78 | only shifts. | ||
79 | |||
80 | |||
81 | Parameters, settable by user: | ||
82 | ----------------------------- | 29 | ----------------------------- |
83 | 30 | ||
84 | limit - bytes (must be > qth_max + burst) | 31 | limit - bytes (must be > qth_max + burst) |
@@ -89,243 +36,93 @@ Short description. | |||
89 | arbitrarily high (well, less than ram size) | 36 | arbitrarily high (well, less than ram size) |
90 | Really, this limit will never be reached | 37 | Really, this limit will never be reached |
91 | if RED works correctly. | 38 | if RED works correctly. |
92 | |||
93 | qth_min - bytes (should be < qth_max/2) | ||
94 | qth_max - bytes (should be at least 2*qth_min and less limit) | ||
95 | Wlog - bits (<32) log(1/W). | ||
96 | Plog - bits (<32) | ||
97 | |||
98 | Plog is related to max_P by formula: | ||
99 | |||
100 | max_P = (qth_max-qth_min)/2^Plog; | ||
101 | |||
102 | F.e. if qth_max=128K and qth_min=32K, then Plog=22 | ||
103 | corresponds to max_P=0.02 | ||
104 | |||
105 | Scell_log | ||
106 | Stab | ||
107 | |||
108 | Lookup table for log((1-W)^(t/t_ave). | ||
109 | |||
110 | |||
111 | NOTES: | ||
112 | |||
113 | Upper bound on W. | ||
114 | ----------------- | ||
115 | |||
116 | If you want to allow bursts of L packets of size S, | ||
117 | you should choose W: | ||
118 | |||
119 | L + 1 - th_min/S < (1-(1-W)^L)/W | ||
120 | |||
121 | th_min/S = 32 th_min/S = 4 | ||
122 | |||
123 | log(W) L | ||
124 | -1 33 | ||
125 | -2 35 | ||
126 | -3 39 | ||
127 | -4 46 | ||
128 | -5 57 | ||
129 | -6 75 | ||
130 | -7 101 | ||
131 | -8 135 | ||
132 | -9 190 | ||
133 | etc. | ||
134 | */ | 39 | */ |
135 | 40 | ||
136 | struct red_sched_data | 41 | struct red_sched_data |
137 | { | 42 | { |
138 | /* Parameters */ | 43 | u32 limit; /* HARD maximal queue length */ |
139 | u32 limit; /* HARD maximal queue length */ | 44 | unsigned char flags; |
140 | u32 qth_min; /* Min average length threshold: A scaled */ | 45 | struct red_parms parms; |
141 | u32 qth_max; /* Max average length threshold: A scaled */ | 46 | struct red_stats stats; |
142 | u32 Rmask; | ||
143 | u32 Scell_max; | ||
144 | unsigned char flags; | ||
145 | char Wlog; /* log(W) */ | ||
146 | char Plog; /* random number bits */ | ||
147 | char Scell_log; | ||
148 | u8 Stab[256]; | ||
149 | |||
150 | /* Variables */ | ||
151 | unsigned long qave; /* Average queue length: A scaled */ | ||
152 | int qcount; /* Packets since last random number generation */ | ||
153 | u32 qR; /* Cached random number */ | ||
154 | |||
155 | psched_time_t qidlestart; /* Start of idle period */ | ||
156 | struct tc_red_xstats st; | ||
157 | }; | 47 | }; |
158 | 48 | ||
159 | static int red_ecn_mark(struct sk_buff *skb) | 49 | static inline int red_use_ecn(struct red_sched_data *q) |
160 | { | 50 | { |
161 | if (skb->nh.raw + 20 > skb->tail) | 51 | return q->flags & TC_RED_ECN; |
162 | return 0; | ||
163 | |||
164 | switch (skb->protocol) { | ||
165 | case __constant_htons(ETH_P_IP): | ||
166 | if (INET_ECN_is_not_ect(skb->nh.iph->tos)) | ||
167 | return 0; | ||
168 | IP_ECN_set_ce(skb->nh.iph); | ||
169 | return 1; | ||
170 | case __constant_htons(ETH_P_IPV6): | ||
171 | if (INET_ECN_is_not_ect(ipv6_get_dsfield(skb->nh.ipv6h))) | ||
172 | return 0; | ||
173 | IP6_ECN_set_ce(skb->nh.ipv6h); | ||
174 | return 1; | ||
175 | default: | ||
176 | return 0; | ||
177 | } | ||
178 | } | 52 | } |
179 | 53 | ||
180 | static int | 54 | static inline int red_use_harddrop(struct red_sched_data *q) |
181 | red_enqueue(struct sk_buff *skb, struct Qdisc* sch) | 55 | { |
56 | return q->flags & TC_RED_HARDDROP; | ||
57 | } | ||
58 | |||
59 | static int red_enqueue(struct sk_buff *skb, struct Qdisc* sch) | ||
182 | { | 60 | { |
183 | struct red_sched_data *q = qdisc_priv(sch); | 61 | struct red_sched_data *q = qdisc_priv(sch); |
184 | 62 | ||
185 | psched_time_t now; | 63 | q->parms.qavg = red_calc_qavg(&q->parms, sch->qstats.backlog); |
186 | 64 | ||
187 | if (!PSCHED_IS_PASTPERFECT(q->qidlestart)) { | 65 | if (red_is_idling(&q->parms)) |
188 | long us_idle; | 66 | red_end_of_idle_period(&q->parms); |
189 | int shift; | ||
190 | 67 | ||
191 | PSCHED_GET_TIME(now); | 68 | switch (red_action(&q->parms, q->parms.qavg)) { |
192 | us_idle = PSCHED_TDIFF_SAFE(now, q->qidlestart, q->Scell_max); | 69 | case RED_DONT_MARK: |
193 | PSCHED_SET_PASTPERFECT(q->qidlestart); | 70 | break; |
194 | 71 | ||
195 | /* | 72 | case RED_PROB_MARK: |
196 | The problem: ideally, average length queue recalcultion should | 73 | sch->qstats.overlimits++; |
197 | be done over constant clock intervals. This is too expensive, so that | 74 | if (!red_use_ecn(q) || !INET_ECN_set_ce(skb)) { |
198 | the calculation is driven by outgoing packets. | 75 | q->stats.prob_drop++; |
199 | When the queue is idle we have to model this clock by hand. | 76 | goto congestion_drop; |
200 | 77 | } | |
201 | SF+VJ proposed to "generate" m = idletime/(average_pkt_size/bandwidth) | ||
202 | dummy packets as a burst after idle time, i.e. | ||
203 | |||
204 | q->qave *= (1-W)^m | ||
205 | |||
206 | This is an apparently overcomplicated solution (f.e. we have to precompute | ||
207 | a table to make this calculation in reasonable time) | ||
208 | I believe that a simpler model may be used here, | ||
209 | but it is field for experiments. | ||
210 | */ | ||
211 | shift = q->Stab[us_idle>>q->Scell_log]; | ||
212 | |||
213 | if (shift) { | ||
214 | q->qave >>= shift; | ||
215 | } else { | ||
216 | /* Approximate initial part of exponent | ||
217 | with linear function: | ||
218 | (1-W)^m ~= 1-mW + ... | ||
219 | |||
220 | Seems, it is the best solution to | ||
221 | problem of too coarce exponent tabulation. | ||
222 | */ | ||
223 | |||
224 | us_idle = (q->qave * us_idle)>>q->Scell_log; | ||
225 | if (us_idle < q->qave/2) | ||
226 | q->qave -= us_idle; | ||
227 | else | ||
228 | q->qave >>= 1; | ||
229 | } | ||
230 | } else { | ||
231 | q->qave += sch->qstats.backlog - (q->qave >> q->Wlog); | ||
232 | /* NOTE: | ||
233 | q->qave is fixed point number with point at Wlog. | ||
234 | The formulae above is equvalent to floating point | ||
235 | version: | ||
236 | |||
237 | qave = qave*(1-W) + sch->qstats.backlog*W; | ||
238 | --ANK (980924) | ||
239 | */ | ||
240 | } | ||
241 | 78 | ||
242 | if (q->qave < q->qth_min) { | 79 | q->stats.prob_mark++; |
243 | q->qcount = -1; | 80 | break; |
244 | enqueue: | 81 | |
245 | if (sch->qstats.backlog + skb->len <= q->limit) { | 82 | case RED_HARD_MARK: |
246 | __skb_queue_tail(&sch->q, skb); | 83 | sch->qstats.overlimits++; |
247 | sch->qstats.backlog += skb->len; | 84 | if (red_use_harddrop(q) || !red_use_ecn(q) || |
248 | sch->bstats.bytes += skb->len; | 85 | !INET_ECN_set_ce(skb)) { |
249 | sch->bstats.packets++; | 86 | q->stats.forced_drop++; |
250 | return NET_XMIT_SUCCESS; | 87 | goto congestion_drop; |
251 | } else { | 88 | } |
252 | q->st.pdrop++; | ||
253 | } | ||
254 | kfree_skb(skb); | ||
255 | sch->qstats.drops++; | ||
256 | return NET_XMIT_DROP; | ||
257 | } | ||
258 | if (q->qave >= q->qth_max) { | ||
259 | q->qcount = -1; | ||
260 | sch->qstats.overlimits++; | ||
261 | mark: | ||
262 | if (!(q->flags&TC_RED_ECN) || !red_ecn_mark(skb)) { | ||
263 | q->st.early++; | ||
264 | goto drop; | ||
265 | } | ||
266 | q->st.marked++; | ||
267 | goto enqueue; | ||
268 | } | ||
269 | 89 | ||
270 | if (++q->qcount) { | 90 | q->stats.forced_mark++; |
271 | /* The formula used below causes questions. | 91 | break; |
272 | |||
273 | OK. qR is random number in the interval 0..Rmask | ||
274 | i.e. 0..(2^Plog). If we used floating point | ||
275 | arithmetics, it would be: (2^Plog)*rnd_num, | ||
276 | where rnd_num is less 1. | ||
277 | |||
278 | Taking into account, that qave have fixed | ||
279 | point at Wlog, and Plog is related to max_P by | ||
280 | max_P = (qth_max-qth_min)/2^Plog; two lines | ||
281 | below have the following floating point equivalent: | ||
282 | |||
283 | max_P*(qave - qth_min)/(qth_max-qth_min) < rnd/qcount | ||
284 | |||
285 | Any questions? --ANK (980924) | ||
286 | */ | ||
287 | if (((q->qave - q->qth_min)>>q->Wlog)*q->qcount < q->qR) | ||
288 | goto enqueue; | ||
289 | q->qcount = 0; | ||
290 | q->qR = net_random()&q->Rmask; | ||
291 | sch->qstats.overlimits++; | ||
292 | goto mark; | ||
293 | } | 92 | } |
294 | q->qR = net_random()&q->Rmask; | ||
295 | goto enqueue; | ||
296 | 93 | ||
297 | drop: | 94 | if (sch->qstats.backlog + skb->len <= q->limit) |
298 | kfree_skb(skb); | 95 | return qdisc_enqueue_tail(skb, sch); |
299 | sch->qstats.drops++; | 96 | |
97 | q->stats.pdrop++; | ||
98 | return qdisc_drop(skb, sch); | ||
99 | |||
100 | congestion_drop: | ||
101 | qdisc_drop(skb, sch); | ||
300 | return NET_XMIT_CN; | 102 | return NET_XMIT_CN; |
301 | } | 103 | } |
302 | 104 | ||
303 | static int | 105 | static int red_requeue(struct sk_buff *skb, struct Qdisc* sch) |
304 | red_requeue(struct sk_buff *skb, struct Qdisc* sch) | ||
305 | { | 106 | { |
306 | struct red_sched_data *q = qdisc_priv(sch); | 107 | struct red_sched_data *q = qdisc_priv(sch); |
307 | 108 | ||
308 | PSCHED_SET_PASTPERFECT(q->qidlestart); | 109 | if (red_is_idling(&q->parms)) |
110 | red_end_of_idle_period(&q->parms); | ||
309 | 111 | ||
310 | __skb_queue_head(&sch->q, skb); | 112 | return qdisc_requeue(skb, sch); |
311 | sch->qstats.backlog += skb->len; | ||
312 | sch->qstats.requeues++; | ||
313 | return 0; | ||
314 | } | 113 | } |
315 | 114 | ||
316 | static struct sk_buff * | 115 | static struct sk_buff * red_dequeue(struct Qdisc* sch) |
317 | red_dequeue(struct Qdisc* sch) | ||
318 | { | 116 | { |
319 | struct sk_buff *skb; | 117 | struct sk_buff *skb; |
320 | struct red_sched_data *q = qdisc_priv(sch); | 118 | struct red_sched_data *q = qdisc_priv(sch); |
321 | 119 | ||
322 | skb = __skb_dequeue(&sch->q); | 120 | skb = qdisc_dequeue_head(sch); |
323 | if (skb) { | 121 | |
324 | sch->qstats.backlog -= skb->len; | 122 | if (skb == NULL && !red_is_idling(&q->parms)) |
325 | return skb; | 123 | red_start_of_idle_period(&q->parms); |
326 | } | 124 | |
327 | PSCHED_GET_TIME(q->qidlestart); | 125 | return skb; |
328 | return NULL; | ||
329 | } | 126 | } |
330 | 127 | ||
331 | static unsigned int red_drop(struct Qdisc* sch) | 128 | static unsigned int red_drop(struct Qdisc* sch) |
@@ -333,16 +130,17 @@ static unsigned int red_drop(struct Qdisc* sch) | |||
333 | struct sk_buff *skb; | 130 | struct sk_buff *skb; |
334 | struct red_sched_data *q = qdisc_priv(sch); | 131 | struct red_sched_data *q = qdisc_priv(sch); |
335 | 132 | ||
336 | skb = __skb_dequeue_tail(&sch->q); | 133 | skb = qdisc_dequeue_tail(sch); |
337 | if (skb) { | 134 | if (skb) { |
338 | unsigned int len = skb->len; | 135 | unsigned int len = skb->len; |
339 | sch->qstats.backlog -= len; | 136 | q->stats.other++; |
340 | sch->qstats.drops++; | 137 | qdisc_drop(skb, sch); |
341 | q->st.other++; | ||
342 | kfree_skb(skb); | ||
343 | return len; | 138 | return len; |
344 | } | 139 | } |
345 | PSCHED_GET_TIME(q->qidlestart); | 140 | |
141 | if (!red_is_idling(&q->parms)) | ||
142 | red_start_of_idle_period(&q->parms); | ||
143 | |||
346 | return 0; | 144 | return 0; |
347 | } | 145 | } |
348 | 146 | ||
@@ -350,43 +148,38 @@ static void red_reset(struct Qdisc* sch) | |||
350 | { | 148 | { |
351 | struct red_sched_data *q = qdisc_priv(sch); | 149 | struct red_sched_data *q = qdisc_priv(sch); |
352 | 150 | ||
353 | __skb_queue_purge(&sch->q); | 151 | qdisc_reset_queue(sch); |
354 | sch->qstats.backlog = 0; | 152 | red_restart(&q->parms); |
355 | PSCHED_SET_PASTPERFECT(q->qidlestart); | ||
356 | q->qave = 0; | ||
357 | q->qcount = -1; | ||
358 | } | 153 | } |
359 | 154 | ||
360 | static int red_change(struct Qdisc *sch, struct rtattr *opt) | 155 | static int red_change(struct Qdisc *sch, struct rtattr *opt) |
361 | { | 156 | { |
362 | struct red_sched_data *q = qdisc_priv(sch); | 157 | struct red_sched_data *q = qdisc_priv(sch); |
363 | struct rtattr *tb[TCA_RED_STAB]; | 158 | struct rtattr *tb[TCA_RED_MAX]; |
364 | struct tc_red_qopt *ctl; | 159 | struct tc_red_qopt *ctl; |
365 | 160 | ||
366 | if (opt == NULL || | 161 | if (opt == NULL || rtattr_parse_nested(tb, TCA_RED_MAX, opt)) |
367 | rtattr_parse_nested(tb, TCA_RED_STAB, opt) || | 162 | return -EINVAL; |
368 | tb[TCA_RED_PARMS-1] == 0 || tb[TCA_RED_STAB-1] == 0 || | 163 | |
164 | if (tb[TCA_RED_PARMS-1] == NULL || | ||
369 | RTA_PAYLOAD(tb[TCA_RED_PARMS-1]) < sizeof(*ctl) || | 165 | RTA_PAYLOAD(tb[TCA_RED_PARMS-1]) < sizeof(*ctl) || |
370 | RTA_PAYLOAD(tb[TCA_RED_STAB-1]) < 256) | 166 | tb[TCA_RED_STAB-1] == NULL || |
167 | RTA_PAYLOAD(tb[TCA_RED_STAB-1]) < RED_STAB_SIZE) | ||
371 | return -EINVAL; | 168 | return -EINVAL; |
372 | 169 | ||
373 | ctl = RTA_DATA(tb[TCA_RED_PARMS-1]); | 170 | ctl = RTA_DATA(tb[TCA_RED_PARMS-1]); |
374 | 171 | ||
375 | sch_tree_lock(sch); | 172 | sch_tree_lock(sch); |
376 | q->flags = ctl->flags; | 173 | q->flags = ctl->flags; |
377 | q->Wlog = ctl->Wlog; | ||
378 | q->Plog = ctl->Plog; | ||
379 | q->Rmask = ctl->Plog < 32 ? ((1<<ctl->Plog) - 1) : ~0UL; | ||
380 | q->Scell_log = ctl->Scell_log; | ||
381 | q->Scell_max = (255<<q->Scell_log); | ||
382 | q->qth_min = ctl->qth_min<<ctl->Wlog; | ||
383 | q->qth_max = ctl->qth_max<<ctl->Wlog; | ||
384 | q->limit = ctl->limit; | 174 | q->limit = ctl->limit; |
385 | memcpy(q->Stab, RTA_DATA(tb[TCA_RED_STAB-1]), 256); | ||
386 | 175 | ||
387 | q->qcount = -1; | 176 | red_set_parms(&q->parms, ctl->qth_min, ctl->qth_max, ctl->Wlog, |
177 | ctl->Plog, ctl->Scell_log, | ||
178 | RTA_DATA(tb[TCA_RED_STAB-1])); | ||
179 | |||
388 | if (skb_queue_empty(&sch->q)) | 180 | if (skb_queue_empty(&sch->q)) |
389 | PSCHED_SET_PASTPERFECT(q->qidlestart); | 181 | red_end_of_idle_period(&q->parms); |
182 | |||
390 | sch_tree_unlock(sch); | 183 | sch_tree_unlock(sch); |
391 | return 0; | 184 | return 0; |
392 | } | 185 | } |
@@ -399,39 +192,39 @@ static int red_init(struct Qdisc* sch, struct rtattr *opt) | |||
399 | static int red_dump(struct Qdisc *sch, struct sk_buff *skb) | 192 | static int red_dump(struct Qdisc *sch, struct sk_buff *skb) |
400 | { | 193 | { |
401 | struct red_sched_data *q = qdisc_priv(sch); | 194 | struct red_sched_data *q = qdisc_priv(sch); |
402 | unsigned char *b = skb->tail; | 195 | struct rtattr *opts = NULL; |
403 | struct rtattr *rta; | 196 | struct tc_red_qopt opt = { |
404 | struct tc_red_qopt opt; | 197 | .limit = q->limit, |
405 | 198 | .flags = q->flags, | |
406 | rta = (struct rtattr*)b; | 199 | .qth_min = q->parms.qth_min >> q->parms.Wlog, |
407 | RTA_PUT(skb, TCA_OPTIONS, 0, NULL); | 200 | .qth_max = q->parms.qth_max >> q->parms.Wlog, |
408 | opt.limit = q->limit; | 201 | .Wlog = q->parms.Wlog, |
409 | opt.qth_min = q->qth_min>>q->Wlog; | 202 | .Plog = q->parms.Plog, |
410 | opt.qth_max = q->qth_max>>q->Wlog; | 203 | .Scell_log = q->parms.Scell_log, |
411 | opt.Wlog = q->Wlog; | 204 | }; |
412 | opt.Plog = q->Plog; | 205 | |
413 | opt.Scell_log = q->Scell_log; | 206 | opts = RTA_NEST(skb, TCA_OPTIONS); |
414 | opt.flags = q->flags; | ||
415 | RTA_PUT(skb, TCA_RED_PARMS, sizeof(opt), &opt); | 207 | RTA_PUT(skb, TCA_RED_PARMS, sizeof(opt), &opt); |
416 | rta->rta_len = skb->tail - b; | 208 | return RTA_NEST_END(skb, opts); |
417 | |||
418 | return skb->len; | ||
419 | 209 | ||
420 | rtattr_failure: | 210 | rtattr_failure: |
421 | skb_trim(skb, b - skb->data); | 211 | return RTA_NEST_CANCEL(skb, opts); |
422 | return -1; | ||
423 | } | 212 | } |
424 | 213 | ||
425 | static int red_dump_stats(struct Qdisc *sch, struct gnet_dump *d) | 214 | static int red_dump_stats(struct Qdisc *sch, struct gnet_dump *d) |
426 | { | 215 | { |
427 | struct red_sched_data *q = qdisc_priv(sch); | 216 | struct red_sched_data *q = qdisc_priv(sch); |
428 | 217 | struct tc_red_xstats st = { | |
429 | return gnet_stats_copy_app(d, &q->st, sizeof(q->st)); | 218 | .early = q->stats.prob_drop + q->stats.forced_drop, |
219 | .pdrop = q->stats.pdrop, | ||
220 | .other = q->stats.other, | ||
221 | .marked = q->stats.prob_mark + q->stats.forced_mark, | ||
222 | }; | ||
223 | |||
224 | return gnet_stats_copy_app(d, &st, sizeof(st)); | ||
430 | } | 225 | } |
431 | 226 | ||
432 | static struct Qdisc_ops red_qdisc_ops = { | 227 | static struct Qdisc_ops red_qdisc_ops = { |
433 | .next = NULL, | ||
434 | .cl_ops = NULL, | ||
435 | .id = "red", | 228 | .id = "red", |
436 | .priv_size = sizeof(struct red_sched_data), | 229 | .priv_size = sizeof(struct red_sched_data), |
437 | .enqueue = red_enqueue, | 230 | .enqueue = red_enqueue, |
@@ -450,10 +243,13 @@ static int __init red_module_init(void) | |||
450 | { | 243 | { |
451 | return register_qdisc(&red_qdisc_ops); | 244 | return register_qdisc(&red_qdisc_ops); |
452 | } | 245 | } |
453 | static void __exit red_module_exit(void) | 246 | |
247 | static void __exit red_module_exit(void) | ||
454 | { | 248 | { |
455 | unregister_qdisc(&red_qdisc_ops); | 249 | unregister_qdisc(&red_qdisc_ops); |
456 | } | 250 | } |
251 | |||
457 | module_init(red_module_init) | 252 | module_init(red_module_init) |
458 | module_exit(red_module_exit) | 253 | module_exit(red_module_exit) |
254 | |||
459 | MODULE_LICENSE("GPL"); | 255 | MODULE_LICENSE("GPL"); |
diff --git a/net/sunrpc/auth.c b/net/sunrpc/auth.c index a415d99c39..8c7756036e 100644 --- a/net/sunrpc/auth.c +++ b/net/sunrpc/auth.c | |||
@@ -299,11 +299,10 @@ put_rpccred(struct rpc_cred *cred) | |||
299 | void | 299 | void |
300 | rpcauth_unbindcred(struct rpc_task *task) | 300 | rpcauth_unbindcred(struct rpc_task *task) |
301 | { | 301 | { |
302 | struct rpc_auth *auth = task->tk_auth; | ||
303 | struct rpc_cred *cred = task->tk_msg.rpc_cred; | 302 | struct rpc_cred *cred = task->tk_msg.rpc_cred; |
304 | 303 | ||
305 | dprintk("RPC: %4d releasing %s cred %p\n", | 304 | dprintk("RPC: %4d releasing %s cred %p\n", |
306 | task->tk_pid, auth->au_ops->au_name, cred); | 305 | task->tk_pid, task->tk_auth->au_ops->au_name, cred); |
307 | 306 | ||
308 | put_rpccred(cred); | 307 | put_rpccred(cred); |
309 | task->tk_msg.rpc_cred = NULL; | 308 | task->tk_msg.rpc_cred = NULL; |
@@ -312,22 +311,22 @@ rpcauth_unbindcred(struct rpc_task *task) | |||
312 | u32 * | 311 | u32 * |
313 | rpcauth_marshcred(struct rpc_task *task, u32 *p) | 312 | rpcauth_marshcred(struct rpc_task *task, u32 *p) |
314 | { | 313 | { |
315 | struct rpc_auth *auth = task->tk_auth; | ||
316 | struct rpc_cred *cred = task->tk_msg.rpc_cred; | 314 | struct rpc_cred *cred = task->tk_msg.rpc_cred; |
317 | 315 | ||
318 | dprintk("RPC: %4d marshaling %s cred %p\n", | 316 | dprintk("RPC: %4d marshaling %s cred %p\n", |
319 | task->tk_pid, auth->au_ops->au_name, cred); | 317 | task->tk_pid, task->tk_auth->au_ops->au_name, cred); |
318 | |||
320 | return cred->cr_ops->crmarshal(task, p); | 319 | return cred->cr_ops->crmarshal(task, p); |
321 | } | 320 | } |
322 | 321 | ||
323 | u32 * | 322 | u32 * |
324 | rpcauth_checkverf(struct rpc_task *task, u32 *p) | 323 | rpcauth_checkverf(struct rpc_task *task, u32 *p) |
325 | { | 324 | { |
326 | struct rpc_auth *auth = task->tk_auth; | ||
327 | struct rpc_cred *cred = task->tk_msg.rpc_cred; | 325 | struct rpc_cred *cred = task->tk_msg.rpc_cred; |
328 | 326 | ||
329 | dprintk("RPC: %4d validating %s cred %p\n", | 327 | dprintk("RPC: %4d validating %s cred %p\n", |
330 | task->tk_pid, auth->au_ops->au_name, cred); | 328 | task->tk_pid, task->tk_auth->au_ops->au_name, cred); |
329 | |||
331 | return cred->cr_ops->crvalidate(task, p); | 330 | return cred->cr_ops->crvalidate(task, p); |
332 | } | 331 | } |
333 | 332 | ||
@@ -363,12 +362,12 @@ rpcauth_unwrap_resp(struct rpc_task *task, kxdrproc_t decode, void *rqstp, | |||
363 | int | 362 | int |
364 | rpcauth_refreshcred(struct rpc_task *task) | 363 | rpcauth_refreshcred(struct rpc_task *task) |
365 | { | 364 | { |
366 | struct rpc_auth *auth = task->tk_auth; | ||
367 | struct rpc_cred *cred = task->tk_msg.rpc_cred; | 365 | struct rpc_cred *cred = task->tk_msg.rpc_cred; |
368 | int err; | 366 | int err; |
369 | 367 | ||
370 | dprintk("RPC: %4d refreshing %s cred %p\n", | 368 | dprintk("RPC: %4d refreshing %s cred %p\n", |
371 | task->tk_pid, auth->au_ops->au_name, cred); | 369 | task->tk_pid, task->tk_auth->au_ops->au_name, cred); |
370 | |||
372 | err = cred->cr_ops->crrefresh(task); | 371 | err = cred->cr_ops->crrefresh(task); |
373 | if (err < 0) | 372 | if (err < 0) |
374 | task->tk_status = err; | 373 | task->tk_status = err; |
diff --git a/net/sunrpc/sunrpc_syms.c b/net/sunrpc/sunrpc_syms.c index 2387e7b823..a03d4b600c 100644 --- a/net/sunrpc/sunrpc_syms.c +++ b/net/sunrpc/sunrpc_syms.c | |||
@@ -63,8 +63,6 @@ EXPORT_SYMBOL(rpc_mkpipe); | |||
63 | /* Client transport */ | 63 | /* Client transport */ |
64 | EXPORT_SYMBOL(xprt_create_proto); | 64 | EXPORT_SYMBOL(xprt_create_proto); |
65 | EXPORT_SYMBOL(xprt_set_timeout); | 65 | EXPORT_SYMBOL(xprt_set_timeout); |
66 | EXPORT_SYMBOL(xprt_udp_slot_table_entries); | ||
67 | EXPORT_SYMBOL(xprt_tcp_slot_table_entries); | ||
68 | 66 | ||
69 | /* Client credential cache */ | 67 | /* Client credential cache */ |
70 | EXPORT_SYMBOL(rpcauth_register); | 68 | EXPORT_SYMBOL(rpcauth_register); |
diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c index e9bd91265f..5a220b2bb3 100644 --- a/net/sunrpc/svc.c +++ b/net/sunrpc/svc.c | |||
@@ -313,6 +313,11 @@ svc_process(struct svc_serv *serv, struct svc_rqst *rqstp) | |||
313 | rqstp->rq_proc = proc = ntohl(svc_getu32(argv)); /* procedure number */ | 313 | rqstp->rq_proc = proc = ntohl(svc_getu32(argv)); /* procedure number */ |
314 | 314 | ||
315 | progp = serv->sv_program; | 315 | progp = serv->sv_program; |
316 | |||
317 | for (progp = serv->sv_program; progp; progp = progp->pg_next) | ||
318 | if (prog == progp->pg_prog) | ||
319 | break; | ||
320 | |||
316 | /* | 321 | /* |
317 | * Decode auth data, and add verifier to reply buffer. | 322 | * Decode auth data, and add verifier to reply buffer. |
318 | * We do this before anything else in order to get a decent | 323 | * We do this before anything else in order to get a decent |
@@ -320,7 +325,7 @@ svc_process(struct svc_serv *serv, struct svc_rqst *rqstp) | |||
320 | */ | 325 | */ |
321 | auth_res = svc_authenticate(rqstp, &auth_stat); | 326 | auth_res = svc_authenticate(rqstp, &auth_stat); |
322 | /* Also give the program a chance to reject this call: */ | 327 | /* Also give the program a chance to reject this call: */ |
323 | if (auth_res == SVC_OK) { | 328 | if (auth_res == SVC_OK && progp) { |
324 | auth_stat = rpc_autherr_badcred; | 329 | auth_stat = rpc_autherr_badcred; |
325 | auth_res = progp->pg_authenticate(rqstp); | 330 | auth_res = progp->pg_authenticate(rqstp); |
326 | } | 331 | } |
@@ -340,10 +345,7 @@ svc_process(struct svc_serv *serv, struct svc_rqst *rqstp) | |||
340 | case SVC_COMPLETE: | 345 | case SVC_COMPLETE: |
341 | goto sendit; | 346 | goto sendit; |
342 | } | 347 | } |
343 | 348 | ||
344 | for (progp = serv->sv_program; progp; progp = progp->pg_next) | ||
345 | if (prog == progp->pg_prog) | ||
346 | break; | ||
347 | if (progp == NULL) | 349 | if (progp == NULL) |
348 | goto err_bad_prog; | 350 | goto err_bad_prog; |
349 | 351 | ||
diff --git a/net/sunrpc/sysctl.c b/net/sunrpc/sysctl.c index d0c9f460e4..1065904841 100644 --- a/net/sunrpc/sysctl.c +++ b/net/sunrpc/sysctl.c | |||
@@ -119,13 +119,6 @@ done: | |||
119 | return 0; | 119 | return 0; |
120 | } | 120 | } |
121 | 121 | ||
122 | unsigned int xprt_udp_slot_table_entries = RPC_DEF_SLOT_TABLE; | ||
123 | unsigned int xprt_tcp_slot_table_entries = RPC_DEF_SLOT_TABLE; | ||
124 | unsigned int xprt_min_resvport = RPC_DEF_MIN_RESVPORT; | ||
125 | EXPORT_SYMBOL(xprt_min_resvport); | ||
126 | unsigned int xprt_max_resvport = RPC_DEF_MAX_RESVPORT; | ||
127 | EXPORT_SYMBOL(xprt_max_resvport); | ||
128 | |||
129 | 122 | ||
130 | static unsigned int min_slot_table_size = RPC_MIN_SLOT_TABLE; | 123 | static unsigned int min_slot_table_size = RPC_MIN_SLOT_TABLE; |
131 | static unsigned int max_slot_table_size = RPC_MAX_SLOT_TABLE; | 124 | static unsigned int max_slot_table_size = RPC_MAX_SLOT_TABLE; |
diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c index 2e1529217e..0a51fd46a8 100644 --- a/net/sunrpc/xprtsock.c +++ b/net/sunrpc/xprtsock.c | |||
@@ -36,6 +36,15 @@ | |||
36 | #include <net/tcp.h> | 36 | #include <net/tcp.h> |
37 | 37 | ||
38 | /* | 38 | /* |
39 | * xprtsock tunables | ||
40 | */ | ||
41 | unsigned int xprt_udp_slot_table_entries = RPC_DEF_SLOT_TABLE; | ||
42 | unsigned int xprt_tcp_slot_table_entries = RPC_DEF_SLOT_TABLE; | ||
43 | |||
44 | unsigned int xprt_min_resvport = RPC_DEF_MIN_RESVPORT; | ||
45 | unsigned int xprt_max_resvport = RPC_DEF_MAX_RESVPORT; | ||
46 | |||
47 | /* | ||
39 | * How many times to try sending a request on a socket before waiting | 48 | * How many times to try sending a request on a socket before waiting |
40 | * for the socket buffer to clear. | 49 | * for the socket buffer to clear. |
41 | */ | 50 | */ |