aboutsummaryrefslogtreecommitdiffstats
path: root/net
diff options
context:
space:
mode:
Diffstat (limited to 'net')
-rw-r--r--net/core/datagram.c4
-rw-r--r--net/core/stream.c12
-rw-r--r--net/dccp/ipv4.c32
-rw-r--r--net/ieee80211/ieee80211_crypt.c1
-rw-r--r--net/ieee80211/ieee80211_crypt_ccmp.c1
-rw-r--r--net/ieee80211/ieee80211_crypt_tkip.c1
-rw-r--r--net/ieee80211/ieee80211_crypt_wep.c1
-rw-r--r--net/ieee80211/ieee80211_geo.c1
-rw-r--r--net/ieee80211/ieee80211_module.c1
-rw-r--r--net/ieee80211/ieee80211_rx.c1
-rw-r--r--net/ieee80211/ieee80211_tx.c1
-rw-r--r--net/ipv4/igmp.c5
-rw-r--r--net/ipv4/inet_connection_sock.c14
-rw-r--r--net/ipv4/netfilter/ip_conntrack_helper_pptp.c4
-rw-r--r--net/ipv4/netfilter/ip_conntrack_netlink.c19
-rw-r--r--net/ipv4/netfilter/ip_nat_core.c6
-rw-r--r--net/ipv4/netfilter/ip_nat_helper_pptp.c2
-rw-r--r--net/ipv4/netfilter/ip_nat_proto_gre.c4
-rw-r--r--net/ipv4/netfilter/ip_nat_proto_unknown.c2
-rw-r--r--net/ipv4/netfilter/ipt_CONNMARK.c1
-rw-r--r--net/ipv4/tcp.c1
-rw-r--r--net/ipv4/tcp_bic.c2
-rw-r--r--net/ipv4/tcp_ipv4.c2
-rw-r--r--net/ipv6/addrconf.c2
-rw-r--r--net/ipv6/mcast.c4
-rw-r--r--net/ipv6/tcp_ipv6.c15
-rw-r--r--net/netfilter/nf_queue.c2
-rw-r--r--net/netfilter/nfnetlink_log.c6
-rw-r--r--net/netfilter/nfnetlink_queue.c6
-rw-r--r--net/sched/Kconfig394
-rw-r--r--net/sched/sch_gred.c841
-rw-r--r--net/sched/sch_netem.c122
-rw-r--r--net/sched/sch_red.c418
-rw-r--r--net/sunrpc/auth.c15
-rw-r--r--net/sunrpc/sunrpc_syms.c2
-rw-r--r--net/sunrpc/svc.c12
-rw-r--r--net/sunrpc/sysctl.c7
-rw-r--r--net/sunrpc/xprtsock.c9
38 files changed, 894 insertions, 1079 deletions
diff --git a/net/core/datagram.c b/net/core/datagram.c
index 81987df536..d219435d08 100644
--- a/net/core/datagram.c
+++ b/net/core/datagram.c
@@ -213,6 +213,10 @@ int skb_copy_datagram_iovec(const struct sk_buff *skb, int offset,
213{ 213{
214 int i, err, fraglen, end = 0; 214 int i, err, fraglen, end = 0;
215 struct sk_buff *next = skb_shinfo(skb)->frag_list; 215 struct sk_buff *next = skb_shinfo(skb)->frag_list;
216
217 if (!len)
218 return 0;
219
216next_skb: 220next_skb:
217 fraglen = skb_headlen(skb); 221 fraglen = skb_headlen(skb);
218 i = -1; 222 i = -1;
diff --git a/net/core/stream.c b/net/core/stream.c
index ac9edfdf87..15bfd03e80 100644
--- a/net/core/stream.c
+++ b/net/core/stream.c
@@ -52,8 +52,9 @@ int sk_stream_wait_connect(struct sock *sk, long *timeo_p)
52{ 52{
53 struct task_struct *tsk = current; 53 struct task_struct *tsk = current;
54 DEFINE_WAIT(wait); 54 DEFINE_WAIT(wait);
55 int done;
55 56
56 while (1) { 57 do {
57 if (sk->sk_err) 58 if (sk->sk_err)
58 return sock_error(sk); 59 return sock_error(sk);
59 if ((1 << sk->sk_state) & ~(TCPF_SYN_SENT | TCPF_SYN_RECV)) 60 if ((1 << sk->sk_state) & ~(TCPF_SYN_SENT | TCPF_SYN_RECV))
@@ -65,13 +66,12 @@ int sk_stream_wait_connect(struct sock *sk, long *timeo_p)
65 66
66 prepare_to_wait(sk->sk_sleep, &wait, TASK_INTERRUPTIBLE); 67 prepare_to_wait(sk->sk_sleep, &wait, TASK_INTERRUPTIBLE);
67 sk->sk_write_pending++; 68 sk->sk_write_pending++;
68 if (sk_wait_event(sk, timeo_p, 69 done = sk_wait_event(sk, timeo_p,
69 !((1 << sk->sk_state) & 70 !((1 << sk->sk_state) &
70 ~(TCPF_ESTABLISHED | TCPF_CLOSE_WAIT)))) 71 ~(TCPF_ESTABLISHED | TCPF_CLOSE_WAIT)));
71 break;
72 finish_wait(sk->sk_sleep, &wait); 72 finish_wait(sk->sk_sleep, &wait);
73 sk->sk_write_pending--; 73 sk->sk_write_pending--;
74 } 74 } while (!done);
75 return 0; 75 return 0;
76} 76}
77 77
diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c
index 6298cf58ff..4b9bc81ae1 100644
--- a/net/dccp/ipv4.c
+++ b/net/dccp/ipv4.c
@@ -31,8 +31,6 @@ struct inet_hashinfo __cacheline_aligned dccp_hashinfo = {
31 .lhash_lock = RW_LOCK_UNLOCKED, 31 .lhash_lock = RW_LOCK_UNLOCKED,
32 .lhash_users = ATOMIC_INIT(0), 32 .lhash_users = ATOMIC_INIT(0),
33 .lhash_wait = __WAIT_QUEUE_HEAD_INITIALIZER(dccp_hashinfo.lhash_wait), 33 .lhash_wait = __WAIT_QUEUE_HEAD_INITIALIZER(dccp_hashinfo.lhash_wait),
34 .portalloc_lock = SPIN_LOCK_UNLOCKED,
35 .port_rover = 1024 - 1,
36}; 34};
37 35
38EXPORT_SYMBOL_GPL(dccp_hashinfo); 36EXPORT_SYMBOL_GPL(dccp_hashinfo);
@@ -125,36 +123,15 @@ static int dccp_v4_hash_connect(struct sock *sk)
125 int ret; 123 int ret;
126 124
127 if (snum == 0) { 125 if (snum == 0) {
128 int rover;
129 int low = sysctl_local_port_range[0]; 126 int low = sysctl_local_port_range[0];
130 int high = sysctl_local_port_range[1]; 127 int high = sysctl_local_port_range[1];
131 int remaining = (high - low) + 1; 128 int remaining = (high - low) + 1;
129 int rover = net_random() % (high - low) + low;
132 struct hlist_node *node; 130 struct hlist_node *node;
133 struct inet_timewait_sock *tw = NULL; 131 struct inet_timewait_sock *tw = NULL;
134 132
135 local_bh_disable(); 133 local_bh_disable();
136
137 /* TODO. Actually it is not so bad idea to remove
138 * dccp_hashinfo.portalloc_lock before next submission to
139 * Linus.
140 * As soon as we touch this place at all it is time to think.
141 *
142 * Now it protects single _advisory_ variable
143 * dccp_hashinfo.port_rover, hence it is mostly useless.
144 * Code will work nicely if we just delete it, but
145 * I am afraid in contented case it will work not better or
146 * even worse: another cpu just will hit the same bucket
147 * and spin there.
148 * So some cpu salt could remove both contention and
149 * memory pingpong. Any ideas how to do this in a nice way?
150 */
151 spin_lock(&dccp_hashinfo.portalloc_lock);
152 rover = dccp_hashinfo.port_rover;
153
154 do { 134 do {
155 rover++;
156 if ((rover < low) || (rover > high))
157 rover = low;
158 head = &dccp_hashinfo.bhash[inet_bhashfn(rover, 135 head = &dccp_hashinfo.bhash[inet_bhashfn(rover,
159 dccp_hashinfo.bhash_size)]; 136 dccp_hashinfo.bhash_size)];
160 spin_lock(&head->lock); 137 spin_lock(&head->lock);
@@ -187,9 +164,9 @@ static int dccp_v4_hash_connect(struct sock *sk)
187 164
188 next_port: 165 next_port:
189 spin_unlock(&head->lock); 166 spin_unlock(&head->lock);
167 if (++rover > high)
168 rover = low;
190 } while (--remaining > 0); 169 } while (--remaining > 0);
191 dccp_hashinfo.port_rover = rover;
192 spin_unlock(&dccp_hashinfo.portalloc_lock);
193 170
194 local_bh_enable(); 171 local_bh_enable();
195 172
@@ -197,9 +174,6 @@ static int dccp_v4_hash_connect(struct sock *sk)
197 174
198ok: 175ok:
199 /* All locks still held and bhs disabled */ 176 /* All locks still held and bhs disabled */
200 dccp_hashinfo.port_rover = rover;
201 spin_unlock(&dccp_hashinfo.portalloc_lock);
202
203 inet_bind_hash(sk, tb, rover); 177 inet_bind_hash(sk, tb, rover);
204 if (sk_unhashed(sk)) { 178 if (sk_unhashed(sk)) {
205 inet_sk(sk)->sport = htons(rover); 179 inet_sk(sk)->sport = htons(rover);
diff --git a/net/ieee80211/ieee80211_crypt.c b/net/ieee80211/ieee80211_crypt.c
index f3b6aa3be6..20cc580a07 100644
--- a/net/ieee80211/ieee80211_crypt.c
+++ b/net/ieee80211/ieee80211_crypt.c
@@ -12,7 +12,6 @@
12 */ 12 */
13 13
14#include <linux/config.h> 14#include <linux/config.h>
15#include <linux/version.h>
16#include <linux/module.h> 15#include <linux/module.h>
17#include <linux/init.h> 16#include <linux/init.h>
18#include <linux/slab.h> 17#include <linux/slab.h>
diff --git a/net/ieee80211/ieee80211_crypt_ccmp.c b/net/ieee80211/ieee80211_crypt_ccmp.c
index 05a853c130..4702217285 100644
--- a/net/ieee80211/ieee80211_crypt_ccmp.c
+++ b/net/ieee80211/ieee80211_crypt_ccmp.c
@@ -10,7 +10,6 @@
10 */ 10 */
11 11
12#include <linux/config.h> 12#include <linux/config.h>
13#include <linux/version.h>
14#include <linux/module.h> 13#include <linux/module.h>
15#include <linux/init.h> 14#include <linux/init.h>
16#include <linux/slab.h> 15#include <linux/slab.h>
diff --git a/net/ieee80211/ieee80211_crypt_tkip.c b/net/ieee80211/ieee80211_crypt_tkip.c
index 2e34f29b79..e0988320ef 100644
--- a/net/ieee80211/ieee80211_crypt_tkip.c
+++ b/net/ieee80211/ieee80211_crypt_tkip.c
@@ -10,7 +10,6 @@
10 */ 10 */
11 11
12#include <linux/config.h> 12#include <linux/config.h>
13#include <linux/version.h>
14#include <linux/module.h> 13#include <linux/module.h>
15#include <linux/init.h> 14#include <linux/init.h>
16#include <linux/slab.h> 15#include <linux/slab.h>
diff --git a/net/ieee80211/ieee80211_crypt_wep.c b/net/ieee80211/ieee80211_crypt_wep.c
index 7c08ed2f26..073aebdf0f 100644
--- a/net/ieee80211/ieee80211_crypt_wep.c
+++ b/net/ieee80211/ieee80211_crypt_wep.c
@@ -10,7 +10,6 @@
10 */ 10 */
11 11
12#include <linux/config.h> 12#include <linux/config.h>
13#include <linux/version.h>
14#include <linux/module.h> 13#include <linux/module.h>
15#include <linux/init.h> 14#include <linux/init.h>
16#include <linux/slab.h> 15#include <linux/slab.h>
diff --git a/net/ieee80211/ieee80211_geo.c b/net/ieee80211/ieee80211_geo.c
index c4b54ef8f6..610cc5cbc2 100644
--- a/net/ieee80211/ieee80211_geo.c
+++ b/net/ieee80211/ieee80211_geo.c
@@ -38,7 +38,6 @@
38#include <linux/slab.h> 38#include <linux/slab.h>
39#include <linux/tcp.h> 39#include <linux/tcp.h>
40#include <linux/types.h> 40#include <linux/types.h>
41#include <linux/version.h>
42#include <linux/wireless.h> 41#include <linux/wireless.h>
43#include <linux/etherdevice.h> 42#include <linux/etherdevice.h>
44#include <asm/uaccess.h> 43#include <asm/uaccess.h>
diff --git a/net/ieee80211/ieee80211_module.c b/net/ieee80211/ieee80211_module.c
index f66d792cd2..321287bc88 100644
--- a/net/ieee80211/ieee80211_module.c
+++ b/net/ieee80211/ieee80211_module.c
@@ -45,7 +45,6 @@
45#include <linux/slab.h> 45#include <linux/slab.h>
46#include <linux/tcp.h> 46#include <linux/tcp.h>
47#include <linux/types.h> 47#include <linux/types.h>
48#include <linux/version.h>
49#include <linux/wireless.h> 48#include <linux/wireless.h>
50#include <linux/etherdevice.h> 49#include <linux/etherdevice.h>
51#include <asm/uaccess.h> 50#include <asm/uaccess.h>
diff --git a/net/ieee80211/ieee80211_rx.c b/net/ieee80211/ieee80211_rx.c
index ce694cf5c1..6ad88218f5 100644
--- a/net/ieee80211/ieee80211_rx.c
+++ b/net/ieee80211/ieee80211_rx.c
@@ -28,7 +28,6 @@
28#include <linux/slab.h> 28#include <linux/slab.h>
29#include <linux/tcp.h> 29#include <linux/tcp.h>
30#include <linux/types.h> 30#include <linux/types.h>
31#include <linux/version.h>
32#include <linux/wireless.h> 31#include <linux/wireless.h>
33#include <linux/etherdevice.h> 32#include <linux/etherdevice.h>
34#include <asm/uaccess.h> 33#include <asm/uaccess.h>
diff --git a/net/ieee80211/ieee80211_tx.c b/net/ieee80211/ieee80211_tx.c
index 95ccbadbf5..445f206e65 100644
--- a/net/ieee80211/ieee80211_tx.c
+++ b/net/ieee80211/ieee80211_tx.c
@@ -38,7 +38,6 @@
38#include <linux/slab.h> 38#include <linux/slab.h>
39#include <linux/tcp.h> 39#include <linux/tcp.h>
40#include <linux/types.h> 40#include <linux/types.h>
41#include <linux/version.h>
42#include <linux/wireless.h> 41#include <linux/wireless.h>
43#include <linux/etherdevice.h> 42#include <linux/etherdevice.h>
44#include <asm/uaccess.h> 43#include <asm/uaccess.h>
diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c
index 8b6d3939e1..c6247fc840 100644
--- a/net/ipv4/igmp.c
+++ b/net/ipv4/igmp.c
@@ -1908,8 +1908,11 @@ int ip_mc_msfilter(struct sock *sk, struct ip_msfilter *msf, int ifindex)
1908 sock_kfree_s(sk, newpsl, IP_SFLSIZE(newpsl->sl_max)); 1908 sock_kfree_s(sk, newpsl, IP_SFLSIZE(newpsl->sl_max));
1909 goto done; 1909 goto done;
1910 } 1910 }
1911 } else 1911 } else {
1912 newpsl = NULL; 1912 newpsl = NULL;
1913 (void) ip_mc_add_src(in_dev, &msf->imsf_multiaddr,
1914 msf->imsf_fmode, 0, NULL, 0);
1915 }
1913 psl = pmc->sflist; 1916 psl = pmc->sflist;
1914 if (psl) { 1917 if (psl) {
1915 (void) ip_mc_del_src(in_dev, &msf->imsf_multiaddr, pmc->sfmode, 1918 (void) ip_mc_del_src(in_dev, &msf->imsf_multiaddr, pmc->sfmode,
diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c
index 94468a76c5..3fe021f1a5 100644
--- a/net/ipv4/inet_connection_sock.c
+++ b/net/ipv4/inet_connection_sock.c
@@ -78,17 +78,9 @@ int inet_csk_get_port(struct inet_hashinfo *hashinfo,
78 int low = sysctl_local_port_range[0]; 78 int low = sysctl_local_port_range[0];
79 int high = sysctl_local_port_range[1]; 79 int high = sysctl_local_port_range[1];
80 int remaining = (high - low) + 1; 80 int remaining = (high - low) + 1;
81 int rover; 81 int rover = net_random() % (high - low) + low;
82 82
83 spin_lock(&hashinfo->portalloc_lock);
84 if (hashinfo->port_rover < low)
85 rover = low;
86 else
87 rover = hashinfo->port_rover;
88 do { 83 do {
89 rover++;
90 if (rover > high)
91 rover = low;
92 head = &hashinfo->bhash[inet_bhashfn(rover, hashinfo->bhash_size)]; 84 head = &hashinfo->bhash[inet_bhashfn(rover, hashinfo->bhash_size)];
93 spin_lock(&head->lock); 85 spin_lock(&head->lock);
94 inet_bind_bucket_for_each(tb, node, &head->chain) 86 inet_bind_bucket_for_each(tb, node, &head->chain)
@@ -97,9 +89,9 @@ int inet_csk_get_port(struct inet_hashinfo *hashinfo,
97 break; 89 break;
98 next: 90 next:
99 spin_unlock(&head->lock); 91 spin_unlock(&head->lock);
92 if (++rover > high)
93 rover = low;
100 } while (--remaining > 0); 94 } while (--remaining > 0);
101 hashinfo->port_rover = rover;
102 spin_unlock(&hashinfo->portalloc_lock);
103 95
104 /* Exhausted local port range during search? It is not 96 /* Exhausted local port range during search? It is not
105 * possible for us to be holding one of the bind hash 97 * possible for us to be holding one of the bind hash
diff --git a/net/ipv4/netfilter/ip_conntrack_helper_pptp.c b/net/ipv4/netfilter/ip_conntrack_helper_pptp.c
index 926a668464..4108a5e12b 100644
--- a/net/ipv4/netfilter/ip_conntrack_helper_pptp.c
+++ b/net/ipv4/netfilter/ip_conntrack_helper_pptp.c
@@ -270,14 +270,10 @@ exp_gre(struct ip_conntrack *master,
270 exp_orig->expectfn = pptp_expectfn; 270 exp_orig->expectfn = pptp_expectfn;
271 exp_orig->flags = 0; 271 exp_orig->flags = 0;
272 272
273 exp_orig->dir = IP_CT_DIR_ORIGINAL;
274
275 /* both expectations are identical apart from tuple */ 273 /* both expectations are identical apart from tuple */
276 memcpy(exp_reply, exp_orig, sizeof(*exp_reply)); 274 memcpy(exp_reply, exp_orig, sizeof(*exp_reply));
277 memcpy(&exp_reply->tuple, &exp_tuples[1], sizeof(exp_reply->tuple)); 275 memcpy(&exp_reply->tuple, &exp_tuples[1], sizeof(exp_reply->tuple));
278 276
279 exp_reply->dir = !exp_orig->dir;
280
281 if (ip_nat_pptp_hook_exp_gre) 277 if (ip_nat_pptp_hook_exp_gre)
282 ret = ip_nat_pptp_hook_exp_gre(exp_orig, exp_reply); 278 ret = ip_nat_pptp_hook_exp_gre(exp_orig, exp_reply);
283 else { 279 else {
diff --git a/net/ipv4/netfilter/ip_conntrack_netlink.c b/net/ipv4/netfilter/ip_conntrack_netlink.c
index 166e6069f1..82a65043a8 100644
--- a/net/ipv4/netfilter/ip_conntrack_netlink.c
+++ b/net/ipv4/netfilter/ip_conntrack_netlink.c
@@ -815,7 +815,7 @@ ctnetlink_get_conntrack(struct sock *ctnl, struct sk_buff *skb,
815 IPCTNL_MSG_CT_NEW, 1, ct); 815 IPCTNL_MSG_CT_NEW, 1, ct);
816 ip_conntrack_put(ct); 816 ip_conntrack_put(ct);
817 if (err <= 0) 817 if (err <= 0)
818 goto out; 818 goto free;
819 819
820 err = netlink_unicast(ctnl, skb2, NETLINK_CB(skb).pid, MSG_DONTWAIT); 820 err = netlink_unicast(ctnl, skb2, NETLINK_CB(skb).pid, MSG_DONTWAIT);
821 if (err < 0) 821 if (err < 0)
@@ -824,9 +824,9 @@ ctnetlink_get_conntrack(struct sock *ctnl, struct sk_buff *skb,
824 DEBUGP("leaving\n"); 824 DEBUGP("leaving\n");
825 return 0; 825 return 0;
826 826
827free:
828 kfree_skb(skb2);
827out: 829out:
828 if (skb2)
829 kfree_skb(skb2);
830 return -1; 830 return -1;
831} 831}
832 832
@@ -1322,21 +1322,16 @@ ctnetlink_get_expect(struct sock *ctnl, struct sk_buff *skb,
1322 nlh->nlmsg_seq, IPCTNL_MSG_EXP_NEW, 1322 nlh->nlmsg_seq, IPCTNL_MSG_EXP_NEW,
1323 1, exp); 1323 1, exp);
1324 if (err <= 0) 1324 if (err <= 0)
1325 goto out; 1325 goto free;
1326 1326
1327 ip_conntrack_expect_put(exp); 1327 ip_conntrack_expect_put(exp);
1328 1328
1329 err = netlink_unicast(ctnl, skb2, NETLINK_CB(skb).pid, MSG_DONTWAIT); 1329 return netlink_unicast(ctnl, skb2, NETLINK_CB(skb).pid, MSG_DONTWAIT);
1330 if (err < 0)
1331 goto free;
1332
1333 return err;
1334 1330
1331free:
1332 kfree_skb(skb2);
1335out: 1333out:
1336 ip_conntrack_expect_put(exp); 1334 ip_conntrack_expect_put(exp);
1337free:
1338 if (skb2)
1339 kfree_skb(skb2);
1340 return err; 1335 return err;
1341} 1336}
1342 1337
diff --git a/net/ipv4/netfilter/ip_nat_core.c b/net/ipv4/netfilter/ip_nat_core.c
index c5e3abd246..762f4d9393 100644
--- a/net/ipv4/netfilter/ip_nat_core.c
+++ b/net/ipv4/netfilter/ip_nat_core.c
@@ -66,10 +66,8 @@ ip_nat_proto_find_get(u_int8_t protonum)
66 * removed until we've grabbed the reference */ 66 * removed until we've grabbed the reference */
67 preempt_disable(); 67 preempt_disable();
68 p = __ip_nat_proto_find(protonum); 68 p = __ip_nat_proto_find(protonum);
69 if (p) { 69 if (!try_module_get(p->me))
70 if (!try_module_get(p->me)) 70 p = &ip_nat_unknown_protocol;
71 p = &ip_nat_unknown_protocol;
72 }
73 preempt_enable(); 71 preempt_enable();
74 72
75 return p; 73 return p;
diff --git a/net/ipv4/netfilter/ip_nat_helper_pptp.c b/net/ipv4/netfilter/ip_nat_helper_pptp.c
index 3cdd0684d3..ee6ab74ad3 100644
--- a/net/ipv4/netfilter/ip_nat_helper_pptp.c
+++ b/net/ipv4/netfilter/ip_nat_helper_pptp.c
@@ -216,6 +216,7 @@ pptp_exp_gre(struct ip_conntrack_expect *expect_orig,
216 expect_orig->saved_proto.gre.key = htons(nat_pptp_info->pac_call_id); 216 expect_orig->saved_proto.gre.key = htons(nat_pptp_info->pac_call_id);
217 expect_orig->tuple.src.u.gre.key = htons(nat_pptp_info->pns_call_id); 217 expect_orig->tuple.src.u.gre.key = htons(nat_pptp_info->pns_call_id);
218 expect_orig->tuple.dst.u.gre.key = htons(ct_pptp_info->pac_call_id); 218 expect_orig->tuple.dst.u.gre.key = htons(ct_pptp_info->pac_call_id);
219 expect_orig->dir = IP_CT_DIR_ORIGINAL;
219 inv_t.src.ip = reply_t->src.ip; 220 inv_t.src.ip = reply_t->src.ip;
220 inv_t.dst.ip = reply_t->dst.ip; 221 inv_t.dst.ip = reply_t->dst.ip;
221 inv_t.src.u.gre.key = htons(nat_pptp_info->pac_call_id); 222 inv_t.src.u.gre.key = htons(nat_pptp_info->pac_call_id);
@@ -233,6 +234,7 @@ pptp_exp_gre(struct ip_conntrack_expect *expect_orig,
233 expect_reply->saved_proto.gre.key = htons(nat_pptp_info->pns_call_id); 234 expect_reply->saved_proto.gre.key = htons(nat_pptp_info->pns_call_id);
234 expect_reply->tuple.src.u.gre.key = htons(nat_pptp_info->pac_call_id); 235 expect_reply->tuple.src.u.gre.key = htons(nat_pptp_info->pac_call_id);
235 expect_reply->tuple.dst.u.gre.key = htons(ct_pptp_info->pns_call_id); 236 expect_reply->tuple.dst.u.gre.key = htons(ct_pptp_info->pns_call_id);
237 expect_reply->dir = IP_CT_DIR_REPLY;
236 inv_t.src.ip = orig_t->src.ip; 238 inv_t.src.ip = orig_t->src.ip;
237 inv_t.dst.ip = orig_t->dst.ip; 239 inv_t.dst.ip = orig_t->dst.ip;
238 inv_t.src.u.gre.key = htons(nat_pptp_info->pns_call_id); 240 inv_t.src.u.gre.key = htons(nat_pptp_info->pns_call_id);
diff --git a/net/ipv4/netfilter/ip_nat_proto_gre.c b/net/ipv4/netfilter/ip_nat_proto_gre.c
index 7c12854016..f7cad7cf1a 100644
--- a/net/ipv4/netfilter/ip_nat_proto_gre.c
+++ b/net/ipv4/netfilter/ip_nat_proto_gre.c
@@ -139,8 +139,8 @@ gre_manip_pkt(struct sk_buff **pskb,
139 break; 139 break;
140 case GRE_VERSION_PPTP: 140 case GRE_VERSION_PPTP:
141 DEBUGP("call_id -> 0x%04x\n", 141 DEBUGP("call_id -> 0x%04x\n",
142 ntohl(tuple->dst.u.gre.key)); 142 ntohs(tuple->dst.u.gre.key));
143 pgreh->call_id = htons(ntohl(tuple->dst.u.gre.key)); 143 pgreh->call_id = tuple->dst.u.gre.key;
144 break; 144 break;
145 default: 145 default:
146 DEBUGP("can't nat unknown GRE version\n"); 146 DEBUGP("can't nat unknown GRE version\n");
diff --git a/net/ipv4/netfilter/ip_nat_proto_unknown.c b/net/ipv4/netfilter/ip_nat_proto_unknown.c
index 99bbef56f8..f0099a646a 100644
--- a/net/ipv4/netfilter/ip_nat_proto_unknown.c
+++ b/net/ipv4/netfilter/ip_nat_proto_unknown.c
@@ -62,7 +62,7 @@ unknown_print_range(char *buffer, const struct ip_nat_range *range)
62 62
63struct ip_nat_protocol ip_nat_unknown_protocol = { 63struct ip_nat_protocol ip_nat_unknown_protocol = {
64 .name = "unknown", 64 .name = "unknown",
65 .me = THIS_MODULE, 65 /* .me isn't set: getting a ref to this cannot fail. */
66 .manip_pkt = unknown_manip_pkt, 66 .manip_pkt = unknown_manip_pkt,
67 .in_range = unknown_in_range, 67 .in_range = unknown_in_range,
68 .unique_tuple = unknown_unique_tuple, 68 .unique_tuple = unknown_unique_tuple,
diff --git a/net/ipv4/netfilter/ipt_CONNMARK.c b/net/ipv4/netfilter/ipt_CONNMARK.c
index 1346380213..05d66ab594 100644
--- a/net/ipv4/netfilter/ipt_CONNMARK.c
+++ b/net/ipv4/netfilter/ipt_CONNMARK.c
@@ -109,6 +109,7 @@ static struct ipt_target ipt_connmark_reg = {
109 109
110static int __init init(void) 110static int __init init(void)
111{ 111{
112 need_ip_conntrack();
112 return ipt_register_target(&ipt_connmark_reg); 113 return ipt_register_target(&ipt_connmark_reg);
113} 114}
114 115
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index f3f0013a95..72b7c22e1e 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -2112,7 +2112,6 @@ void __init tcp_init(void)
2112 sysctl_tcp_max_orphans >>= (3 - order); 2112 sysctl_tcp_max_orphans >>= (3 - order);
2113 sysctl_max_syn_backlog = 128; 2113 sysctl_max_syn_backlog = 128;
2114 } 2114 }
2115 tcp_hashinfo.port_rover = sysctl_local_port_range[0] - 1;
2116 2115
2117 sysctl_tcp_mem[0] = 768 << order; 2116 sysctl_tcp_mem[0] = 768 << order;
2118 sysctl_tcp_mem[1] = 1024 << order; 2117 sysctl_tcp_mem[1] = 1024 << order;
diff --git a/net/ipv4/tcp_bic.c b/net/ipv4/tcp_bic.c
index 6d80e063c1..ae35e06090 100644
--- a/net/ipv4/tcp_bic.c
+++ b/net/ipv4/tcp_bic.c
@@ -27,7 +27,7 @@
27 */ 27 */
28 28
29static int fast_convergence = 1; 29static int fast_convergence = 1;
30static int max_increment = 32; 30static int max_increment = 16;
31static int low_window = 14; 31static int low_window = 14;
32static int beta = 819; /* = 819/1024 (BICTCP_BETA_SCALE) */ 32static int beta = 819; /* = 819/1024 (BICTCP_BETA_SCALE) */
33static int low_utilization_threshold = 153; 33static int low_utilization_threshold = 153;
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index c85819d847..49d67cd75e 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -93,8 +93,6 @@ struct inet_hashinfo __cacheline_aligned tcp_hashinfo = {
93 .lhash_lock = RW_LOCK_UNLOCKED, 93 .lhash_lock = RW_LOCK_UNLOCKED,
94 .lhash_users = ATOMIC_INIT(0), 94 .lhash_users = ATOMIC_INIT(0),
95 .lhash_wait = __WAIT_QUEUE_HEAD_INITIALIZER(tcp_hashinfo.lhash_wait), 95 .lhash_wait = __WAIT_QUEUE_HEAD_INITIALIZER(tcp_hashinfo.lhash_wait),
96 .portalloc_lock = SPIN_LOCK_UNLOCKED,
97 .port_rover = 1024 - 1,
98}; 96};
99 97
100static int tcp_v4_get_port(struct sock *sk, unsigned short snum) 98static int tcp_v4_get_port(struct sock *sk, unsigned short snum)
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index 41edc14851..2c5f57299d 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -2163,7 +2163,7 @@ static int addrconf_ifdown(struct net_device *dev, int how)
2163 2163
2164 /* Step 5: netlink notification of this interface */ 2164 /* Step 5: netlink notification of this interface */
2165 idev->tstamp = jiffies; 2165 idev->tstamp = jiffies;
2166 inet6_ifinfo_notify(RTM_NEWLINK, idev); 2166 inet6_ifinfo_notify(RTM_DELLINK, idev);
2167 2167
2168 /* Shot the device (if unregistered) */ 2168 /* Shot the device (if unregistered) */
2169 2169
diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c
index 966b2372aa..f15e04ad02 100644
--- a/net/ipv6/mcast.c
+++ b/net/ipv6/mcast.c
@@ -545,8 +545,10 @@ int ip6_mc_msfilter(struct sock *sk, struct group_filter *gsf)
545 sock_kfree_s(sk, newpsl, IP6_SFLSIZE(newpsl->sl_max)); 545 sock_kfree_s(sk, newpsl, IP6_SFLSIZE(newpsl->sl_max));
546 goto done; 546 goto done;
547 } 547 }
548 } else 548 } else {
549 newpsl = NULL; 549 newpsl = NULL;
550 (void) ip6_mc_add_src(idev, group, gsf->gf_fmode, 0, NULL, 0);
551 }
550 psl = pmc->sflist; 552 psl = pmc->sflist;
551 if (psl) { 553 if (psl) {
552 (void) ip6_mc_del_src(idev, group, pmc->sfmode, 554 (void) ip6_mc_del_src(idev, group, pmc->sfmode,
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index d693cb988b..d746d3b27e 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -114,16 +114,9 @@ static int tcp_v6_get_port(struct sock *sk, unsigned short snum)
114 int low = sysctl_local_port_range[0]; 114 int low = sysctl_local_port_range[0];
115 int high = sysctl_local_port_range[1]; 115 int high = sysctl_local_port_range[1];
116 int remaining = (high - low) + 1; 116 int remaining = (high - low) + 1;
117 int rover; 117 int rover = net_random() % (high - low) + low;
118 118
119 spin_lock(&tcp_hashinfo.portalloc_lock); 119 do {
120 if (tcp_hashinfo.port_rover < low)
121 rover = low;
122 else
123 rover = tcp_hashinfo.port_rover;
124 do { rover++;
125 if (rover > high)
126 rover = low;
127 head = &tcp_hashinfo.bhash[inet_bhashfn(rover, tcp_hashinfo.bhash_size)]; 120 head = &tcp_hashinfo.bhash[inet_bhashfn(rover, tcp_hashinfo.bhash_size)];
128 spin_lock(&head->lock); 121 spin_lock(&head->lock);
129 inet_bind_bucket_for_each(tb, node, &head->chain) 122 inet_bind_bucket_for_each(tb, node, &head->chain)
@@ -132,9 +125,9 @@ static int tcp_v6_get_port(struct sock *sk, unsigned short snum)
132 break; 125 break;
133 next: 126 next:
134 spin_unlock(&head->lock); 127 spin_unlock(&head->lock);
128 if (++rover > high)
129 rover = low;
135 } while (--remaining > 0); 130 } while (--remaining > 0);
136 tcp_hashinfo.port_rover = rover;
137 spin_unlock(&tcp_hashinfo.portalloc_lock);
138 131
139 /* Exhausted local port range during search? It is not 132 /* Exhausted local port range during search? It is not
140 * possible for us to be holding one of the bind hash 133 * possible for us to be holding one of the bind hash
diff --git a/net/netfilter/nf_queue.c b/net/netfilter/nf_queue.c
index d10d552d9c..d3a4f30a7f 100644
--- a/net/netfilter/nf_queue.c
+++ b/net/netfilter/nf_queue.c
@@ -117,7 +117,7 @@ int nf_queue(struct sk_buff **skb,
117 117
118 /* QUEUE == DROP if noone is waiting, to be safe. */ 118 /* QUEUE == DROP if noone is waiting, to be safe. */
119 read_lock(&queue_handler_lock); 119 read_lock(&queue_handler_lock);
120 if (!queue_handler[pf]->outfn) { 120 if (!queue_handler[pf] || !queue_handler[pf]->outfn) {
121 read_unlock(&queue_handler_lock); 121 read_unlock(&queue_handler_lock);
122 kfree_skb(*skb); 122 kfree_skb(*skb);
123 return 1; 123 return 1;
diff --git a/net/netfilter/nfnetlink_log.c b/net/netfilter/nfnetlink_log.c
index efcd10f996..d194676f36 100644
--- a/net/netfilter/nfnetlink_log.c
+++ b/net/netfilter/nfnetlink_log.c
@@ -146,11 +146,10 @@ instance_create(u_int16_t group_num, int pid)
146 goto out_unlock; 146 goto out_unlock;
147 } 147 }
148 148
149 inst = kmalloc(sizeof(*inst), GFP_ATOMIC); 149 inst = kzalloc(sizeof(*inst), GFP_ATOMIC);
150 if (!inst) 150 if (!inst)
151 goto out_unlock; 151 goto out_unlock;
152 152
153 memset(inst, 0, sizeof(*inst));
154 INIT_HLIST_NODE(&inst->hlist); 153 INIT_HLIST_NODE(&inst->hlist);
155 inst->lock = SPIN_LOCK_UNLOCKED; 154 inst->lock = SPIN_LOCK_UNLOCKED;
156 /* needs to be two, since we _put() after creation */ 155 /* needs to be two, since we _put() after creation */
@@ -962,10 +961,9 @@ static int nful_open(struct inode *inode, struct file *file)
962 struct iter_state *is; 961 struct iter_state *is;
963 int ret; 962 int ret;
964 963
965 is = kmalloc(sizeof(*is), GFP_KERNEL); 964 is = kzalloc(sizeof(*is), GFP_KERNEL);
966 if (!is) 965 if (!is)
967 return -ENOMEM; 966 return -ENOMEM;
968 memset(is, 0, sizeof(*is));
969 ret = seq_open(file, &nful_seq_ops); 967 ret = seq_open(file, &nful_seq_ops);
970 if (ret < 0) 968 if (ret < 0)
971 goto out_free; 969 goto out_free;
diff --git a/net/netfilter/nfnetlink_queue.c b/net/netfilter/nfnetlink_queue.c
index eaa44c4956..f065a6c949 100644
--- a/net/netfilter/nfnetlink_queue.c
+++ b/net/netfilter/nfnetlink_queue.c
@@ -136,11 +136,10 @@ instance_create(u_int16_t queue_num, int pid)
136 goto out_unlock; 136 goto out_unlock;
137 } 137 }
138 138
139 inst = kmalloc(sizeof(*inst), GFP_ATOMIC); 139 inst = kzalloc(sizeof(*inst), GFP_ATOMIC);
140 if (!inst) 140 if (!inst)
141 goto out_unlock; 141 goto out_unlock;
142 142
143 memset(inst, 0, sizeof(*inst));
144 inst->queue_num = queue_num; 143 inst->queue_num = queue_num;
145 inst->peer_pid = pid; 144 inst->peer_pid = pid;
146 inst->queue_maxlen = NFQNL_QMAX_DEFAULT; 145 inst->queue_maxlen = NFQNL_QMAX_DEFAULT;
@@ -1036,10 +1035,9 @@ static int nfqnl_open(struct inode *inode, struct file *file)
1036 struct iter_state *is; 1035 struct iter_state *is;
1037 int ret; 1036 int ret;
1038 1037
1039 is = kmalloc(sizeof(*is), GFP_KERNEL); 1038 is = kzalloc(sizeof(*is), GFP_KERNEL);
1040 if (!is) 1039 if (!is)
1041 return -ENOMEM; 1040 return -ENOMEM;
1042 memset(is, 0, sizeof(*is));
1043 ret = seq_open(file, &nfqnl_seq_ops); 1041 ret = seq_open(file, &nfqnl_seq_ops);
1044 if (ret < 0) 1042 if (ret < 0)
1045 goto out_free; 1043 goto out_free;
diff --git a/net/sched/Kconfig b/net/sched/Kconfig
index 81510da317..7f34e7fd76 100644
--- a/net/sched/Kconfig
+++ b/net/sched/Kconfig
@@ -2,13 +2,15 @@
2# Traffic control configuration. 2# Traffic control configuration.
3# 3#
4 4
5menuconfig NET_SCHED 5menu "QoS and/or fair queueing"
6
7config NET_SCHED
6 bool "QoS and/or fair queueing" 8 bool "QoS and/or fair queueing"
7 ---help--- 9 ---help---
8 When the kernel has several packets to send out over a network 10 When the kernel has several packets to send out over a network
9 device, it has to decide which ones to send first, which ones to 11 device, it has to decide which ones to send first, which ones to
10 delay, and which ones to drop. This is the job of the packet 12 delay, and which ones to drop. This is the job of the queueing
11 scheduler, and several different algorithms for how to do this 13 disciplines, several different algorithms for how to do this
12 "fairly" have been proposed. 14 "fairly" have been proposed.
13 15
14 If you say N here, you will get the standard packet scheduler, which 16 If you say N here, you will get the standard packet scheduler, which
@@ -23,13 +25,13 @@ menuconfig NET_SCHED
23 To administer these schedulers, you'll need the user-level utilities 25 To administer these schedulers, you'll need the user-level utilities
24 from the package iproute2+tc at <ftp://ftp.tux.org/pub/net/ip-routing/>. 26 from the package iproute2+tc at <ftp://ftp.tux.org/pub/net/ip-routing/>.
25 That package also contains some documentation; for more, check out 27 That package also contains some documentation; for more, check out
26 <http://snafu.freedom.org/linux2.2/iproute-notes.html>. 28 <http://linux-net.osdl.org/index.php/Iproute2>.
27 29
28 This Quality of Service (QoS) support will enable you to use 30 This Quality of Service (QoS) support will enable you to use
29 Differentiated Services (diffserv) and Resource Reservation Protocol 31 Differentiated Services (diffserv) and Resource Reservation Protocol
30 (RSVP) on your Linux router if you also say Y to "QoS support", 32 (RSVP) on your Linux router if you also say Y to the corresponding
31 "Packet classifier API" and to some classifiers below. Documentation 33 classifiers below. Documentation and software is at
32 and software is at <http://diffserv.sourceforge.net/>. 34 <http://diffserv.sourceforge.net/>.
33 35
34 If you say Y here and to "/proc file system" below, you will be able 36 If you say Y here and to "/proc file system" below, you will be able
35 to read status information about packet schedulers from the file 37 to read status information about packet schedulers from the file
@@ -42,7 +44,7 @@ choice
42 prompt "Packet scheduler clock source" 44 prompt "Packet scheduler clock source"
43 depends on NET_SCHED 45 depends on NET_SCHED
44 default NET_SCH_CLK_JIFFIES 46 default NET_SCH_CLK_JIFFIES
45 help 47 ---help---
46 Packet schedulers need a monotonic clock that increments at a static 48 Packet schedulers need a monotonic clock that increments at a static
47 rate. The kernel provides several suitable interfaces, each with 49 rate. The kernel provides several suitable interfaces, each with
48 different properties: 50 different properties:
@@ -56,7 +58,7 @@ choice
56 58
57config NET_SCH_CLK_JIFFIES 59config NET_SCH_CLK_JIFFIES
58 bool "Timer interrupt" 60 bool "Timer interrupt"
59 help 61 ---help---
60 Say Y here if you want to use the timer interrupt (jiffies) as clock 62 Say Y here if you want to use the timer interrupt (jiffies) as clock
61 source. This clock source is fast, synchronized on all processors and 63 source. This clock source is fast, synchronized on all processors and
62 handles cpu clock frequency changes, but its resolution is too low 64 handles cpu clock frequency changes, but its resolution is too low
@@ -64,7 +66,7 @@ config NET_SCH_CLK_JIFFIES
64 66
65config NET_SCH_CLK_GETTIMEOFDAY 67config NET_SCH_CLK_GETTIMEOFDAY
66 bool "gettimeofday" 68 bool "gettimeofday"
67 help 69 ---help---
68 Say Y here if you want to use gettimeofday as clock source. This clock 70 Say Y here if you want to use gettimeofday as clock source. This clock
69 source has high resolution, is synchronized on all processors and 71 source has high resolution, is synchronized on all processors and
70 handles cpu clock frequency changes, but it is slow. 72 handles cpu clock frequency changes, but it is slow.
@@ -77,7 +79,7 @@ config NET_SCH_CLK_GETTIMEOFDAY
77config NET_SCH_CLK_CPU 79config NET_SCH_CLK_CPU
78 bool "CPU cycle counter" 80 bool "CPU cycle counter"
79 depends on ((X86_TSC || X86_64) && !SMP) || ALPHA || SPARC64 || PPC64 || IA64 81 depends on ((X86_TSC || X86_64) && !SMP) || ALPHA || SPARC64 || PPC64 || IA64
80 help 82 ---help---
81 Say Y here if you want to use the CPU's cycle counter as clock source. 83 Say Y here if you want to use the CPU's cycle counter as clock source.
82 This is a cheap and high resolution clock source, but on some 84 This is a cheap and high resolution clock source, but on some
83 architectures it is not synchronized on all processors and doesn't 85 architectures it is not synchronized on all processors and doesn't
@@ -95,134 +97,129 @@ config NET_SCH_CLK_CPU
95 97
96endchoice 98endchoice
97 99
100comment "Queueing/Scheduling"
101 depends on NET_SCHED
102
98config NET_SCH_CBQ 103config NET_SCH_CBQ
99 tristate "CBQ packet scheduler" 104 tristate "Class Based Queueing (CBQ)"
100 depends on NET_SCHED 105 depends on NET_SCHED
101 ---help--- 106 ---help---
102 Say Y here if you want to use the Class-Based Queueing (CBQ) packet 107 Say Y here if you want to use the Class-Based Queueing (CBQ) packet
103 scheduling algorithm for some of your network devices. This 108 scheduling algorithm. This algorithm classifies the waiting packets
104 algorithm classifies the waiting packets into a tree-like hierarchy 109 into a tree-like hierarchy of classes; the leaves of this tree are
105 of classes; the leaves of this tree are in turn scheduled by 110 in turn scheduled by separate algorithms.
106 separate algorithms (called "disciplines" in this context).
107 111
108 See the top of <file:net/sched/sch_cbq.c> for references about the 112 See the top of <file:net/sched/sch_cbq.c> for more details.
109 CBQ algorithm.
110 113
111 CBQ is a commonly used scheduler, so if you're unsure, you should 114 CBQ is a commonly used scheduler, so if you're unsure, you should
112 say Y here. Then say Y to all the queueing algorithms below that you 115 say Y here. Then say Y to all the queueing algorithms below that you
113 want to use as CBQ disciplines. Then say Y to "Packet classifier 116 want to use as leaf disciplines.
114 API" and say Y to all the classifiers you want to use; a classifier
115 is a routine that allows you to sort your outgoing traffic into
116 classes based on a certain criterion.
117 117
118 To compile this code as a module, choose M here: the 118 To compile this code as a module, choose M here: the
119 module will be called sch_cbq. 119 module will be called sch_cbq.
120 120
121config NET_SCH_HTB 121config NET_SCH_HTB
122 tristate "HTB packet scheduler" 122 tristate "Hierarchical Token Bucket (HTB)"
123 depends on NET_SCHED 123 depends on NET_SCHED
124 ---help--- 124 ---help---
125 Say Y here if you want to use the Hierarchical Token Buckets (HTB) 125 Say Y here if you want to use the Hierarchical Token Buckets (HTB)
126 packet scheduling algorithm for some of your network devices. See 126 packet scheduling algorithm. See
127 <http://luxik.cdi.cz/~devik/qos/htb/> for complete manual and 127 <http://luxik.cdi.cz/~devik/qos/htb/> for complete manual and
128 in-depth articles. 128 in-depth articles.
129 129
130 HTB is very similar to the CBQ regarding its goals however is has 130 HTB is very similar to CBQ regarding its goals however is has
131 different properties and different algorithm. 131 different properties and different algorithm.
132 132
133 To compile this code as a module, choose M here: the 133 To compile this code as a module, choose M here: the
134 module will be called sch_htb. 134 module will be called sch_htb.
135 135
136config NET_SCH_HFSC 136config NET_SCH_HFSC
137 tristate "HFSC packet scheduler" 137 tristate "Hierarchical Fair Service Curve (HFSC)"
138 depends on NET_SCHED 138 depends on NET_SCHED
139 ---help--- 139 ---help---
140 Say Y here if you want to use the Hierarchical Fair Service Curve 140 Say Y here if you want to use the Hierarchical Fair Service Curve
141 (HFSC) packet scheduling algorithm for some of your network devices. 141 (HFSC) packet scheduling algorithm.
142 142
143 To compile this code as a module, choose M here: the 143 To compile this code as a module, choose M here: the
144 module will be called sch_hfsc. 144 module will be called sch_hfsc.
145 145
146#tristate ' H-PFQ packet scheduler' CONFIG_NET_SCH_HPFQ
147config NET_SCH_ATM 146config NET_SCH_ATM
148 tristate "ATM pseudo-scheduler" 147 tristate "ATM Virtual Circuits (ATM)"
149 depends on NET_SCHED && ATM 148 depends on NET_SCHED && ATM
150 ---help--- 149 ---help---
151 Say Y here if you want to use the ATM pseudo-scheduler. This 150 Say Y here if you want to use the ATM pseudo-scheduler. This
152 provides a framework for invoking classifiers (aka "filters"), which 151 provides a framework for invoking classifiers, which in turn
153 in turn select classes of this queuing discipline. Each class maps 152 select classes of this queuing discipline. Each class maps
154 the flow(s) it is handling to a given virtual circuit (see the top of 153 the flow(s) it is handling to a given virtual circuit.
155 <file:net/sched/sch_atm.c>). 154
155 See the top of <file:net/sched/sch_atm.c>) for more details.
156 156
157 To compile this code as a module, choose M here: the 157 To compile this code as a module, choose M here: the
158 module will be called sch_atm. 158 module will be called sch_atm.
159 159
160config NET_SCH_PRIO 160config NET_SCH_PRIO
161 tristate "The simplest PRIO pseudoscheduler" 161 tristate "Multi Band Priority Queueing (PRIO)"
162 depends on NET_SCHED 162 depends on NET_SCHED
163 help 163 ---help---
164 Say Y here if you want to use an n-band priority queue packet 164 Say Y here if you want to use an n-band priority queue packet
165 "scheduler" for some of your network devices or as a leaf discipline 165 scheduler.
166 for the CBQ scheduling algorithm. If unsure, say Y.
167 166
168 To compile this code as a module, choose M here: the 167 To compile this code as a module, choose M here: the
169 module will be called sch_prio. 168 module will be called sch_prio.
170 169
171config NET_SCH_RED 170config NET_SCH_RED
172 tristate "RED queue" 171 tristate "Random Early Detection (RED)"
173 depends on NET_SCHED 172 depends on NET_SCHED
174 help 173 ---help---
175 Say Y here if you want to use the Random Early Detection (RED) 174 Say Y here if you want to use the Random Early Detection (RED)
176 packet scheduling algorithm for some of your network devices (see 175 packet scheduling algorithm.
177 the top of <file:net/sched/sch_red.c> for details and references 176
178 about the algorithm). 177 See the top of <file:net/sched/sch_red.c> for more details.
179 178
180 To compile this code as a module, choose M here: the 179 To compile this code as a module, choose M here: the
181 module will be called sch_red. 180 module will be called sch_red.
182 181
183config NET_SCH_SFQ 182config NET_SCH_SFQ
184 tristate "SFQ queue" 183 tristate "Stochastic Fairness Queueing (SFQ)"
185 depends on NET_SCHED 184 depends on NET_SCHED
186 ---help--- 185 ---help---
187 Say Y here if you want to use the Stochastic Fairness Queueing (SFQ) 186 Say Y here if you want to use the Stochastic Fairness Queueing (SFQ)
188 packet scheduling algorithm for some of your network devices or as a 187 packet scheduling algorithm .
189 leaf discipline for the CBQ scheduling algorithm (see the top of 188
190 <file:net/sched/sch_sfq.c> for details and references about the SFQ 189 See the top of <file:net/sched/sch_sfq.c> for more details.
191 algorithm).
192 190
193 To compile this code as a module, choose M here: the 191 To compile this code as a module, choose M here: the
194 module will be called sch_sfq. 192 module will be called sch_sfq.
195 193
196config NET_SCH_TEQL 194config NET_SCH_TEQL
197 tristate "TEQL queue" 195 tristate "True Link Equalizer (TEQL)"
198 depends on NET_SCHED 196 depends on NET_SCHED
199 ---help--- 197 ---help---
200 Say Y here if you want to use the True Link Equalizer (TLE) packet 198 Say Y here if you want to use the True Link Equalizer (TLE) packet
201 scheduling algorithm for some of your network devices or as a leaf 199 scheduling algorithm. This queueing discipline allows the combination
202 discipline for the CBQ scheduling algorithm. This queueing 200 of several physical devices into one virtual device.
203 discipline allows the combination of several physical devices into 201
204 one virtual device. (see the top of <file:net/sched/sch_teql.c> for 202 See the top of <file:net/sched/sch_teql.c> for more details.
205 details).
206 203
207 To compile this code as a module, choose M here: the 204 To compile this code as a module, choose M here: the
208 module will be called sch_teql. 205 module will be called sch_teql.
209 206
210config NET_SCH_TBF 207config NET_SCH_TBF
211 tristate "TBF queue" 208 tristate "Token Bucket Filter (TBF)"
212 depends on NET_SCHED 209 depends on NET_SCHED
213 help 210 ---help---
214 Say Y here if you want to use the Simple Token Bucket Filter (TBF) 211 Say Y here if you want to use the Token Bucket Filter (TBF) packet
215 packet scheduling algorithm for some of your network devices or as a 212 scheduling algorithm.
216 leaf discipline for the CBQ scheduling algorithm (see the top of 213
217 <file:net/sched/sch_tbf.c> for a description of the TBF algorithm). 214 See the top of <file:net/sched/sch_tbf.c> for more details.
218 215
219 To compile this code as a module, choose M here: the 216 To compile this code as a module, choose M here: the
220 module will be called sch_tbf. 217 module will be called sch_tbf.
221 218
222config NET_SCH_GRED 219config NET_SCH_GRED
223 tristate "GRED queue" 220 tristate "Generic Random Early Detection (GRED)"
224 depends on NET_SCHED 221 depends on NET_SCHED
225 help 222 ---help---
226 Say Y here if you want to use the Generic Random Early Detection 223 Say Y here if you want to use the Generic Random Early Detection
227 (GRED) packet scheduling algorithm for some of your network devices 224 (GRED) packet scheduling algorithm for some of your network devices
228 (see the top of <file:net/sched/sch_red.c> for details and 225 (see the top of <file:net/sched/sch_red.c> for details and
@@ -232,9 +229,9 @@ config NET_SCH_GRED
232 module will be called sch_gred. 229 module will be called sch_gred.
233 230
234config NET_SCH_DSMARK 231config NET_SCH_DSMARK
235 tristate "Diffserv field marker" 232 tristate "Differentiated Services marker (DSMARK)"
236 depends on NET_SCHED 233 depends on NET_SCHED
237 help 234 ---help---
238 Say Y if you want to schedule packets according to the 235 Say Y if you want to schedule packets according to the
239 Differentiated Services architecture proposed in RFC 2475. 236 Differentiated Services architecture proposed in RFC 2475.
240 Technical information on this method, with pointers to associated 237 Technical information on this method, with pointers to associated
@@ -244,9 +241,9 @@ config NET_SCH_DSMARK
244 module will be called sch_dsmark. 241 module will be called sch_dsmark.
245 242
246config NET_SCH_NETEM 243config NET_SCH_NETEM
247 tristate "Network emulator" 244 tristate "Network emulator (NETEM)"
248 depends on NET_SCHED 245 depends on NET_SCHED
249 help 246 ---help---
250 Say Y if you want to emulate network delay, loss, and packet 247 Say Y if you want to emulate network delay, loss, and packet
251 re-ordering. This is often useful to simulate networks when 248 re-ordering. This is often useful to simulate networks when
252 testing applications or protocols. 249 testing applications or protocols.
@@ -259,58 +256,23 @@ config NET_SCH_NETEM
259config NET_SCH_INGRESS 256config NET_SCH_INGRESS
260 tristate "Ingress Qdisc" 257 tristate "Ingress Qdisc"
261 depends on NET_SCHED 258 depends on NET_SCHED
262 help 259 ---help---
263 If you say Y here, you will be able to police incoming bandwidth 260 Say Y here if you want to use classifiers for incoming packets.
264 and drop packets when this bandwidth exceeds your desired rate.
265 If unsure, say Y. 261 If unsure, say Y.
266 262
267 To compile this code as a module, choose M here: the 263 To compile this code as a module, choose M here: the
268 module will be called sch_ingress. 264 module will be called sch_ingress.
269 265
270config NET_QOS 266comment "Classification"
271 bool "QoS support"
272 depends on NET_SCHED 267 depends on NET_SCHED
273 ---help---
274 Say Y here if you want to include Quality Of Service scheduling
275 features, which means that you will be able to request certain
276 rate-of-flow limits for your network devices.
277
278 This Quality of Service (QoS) support will enable you to use
279 Differentiated Services (diffserv) and Resource Reservation Protocol
280 (RSVP) on your Linux router if you also say Y to "Packet classifier
281 API" and to some classifiers below. Documentation and software is at
282 <http://diffserv.sourceforge.net/>.
283
284 Note that the answer to this question won't directly affect the
285 kernel: saying N will just cause the configurator to skip all
286 the questions about QoS support.
287
288config NET_ESTIMATOR
289 bool "Rate estimator"
290 depends on NET_QOS
291 help
292 In order for Quality of Service scheduling to work, the current
293 rate-of-flow for a network device has to be estimated; if you say Y
294 here, the kernel will do just that.
295 268
296config NET_CLS 269config NET_CLS
297 bool "Packet classifier API" 270 boolean
298 depends on NET_SCHED
299 ---help---
300 The CBQ scheduling algorithm requires that network packets which are
301 scheduled to be sent out over a network device be classified
302 according to some criterion. If you say Y here, you will get a
303 choice of several different packet classifiers with the following
304 questions.
305
306 This will enable you to use Differentiated Services (diffserv) and
307 Resource Reservation Protocol (RSVP) on your Linux router.
308 Documentation and software is at
309 <http://diffserv.sourceforge.net/>.
310 271
311config NET_CLS_BASIC 272config NET_CLS_BASIC
312 tristate "Basic classifier" 273 tristate "Elementary classification (BASIC)"
313 depends on NET_CLS 274 depends NET_SCHED
275 select NET_CLS
314 ---help--- 276 ---help---
315 Say Y here if you want to be able to classify packets using 277 Say Y here if you want to be able to classify packets using
316 only extended matches and actions. 278 only extended matches and actions.
@@ -319,24 +281,25 @@ config NET_CLS_BASIC
319 module will be called cls_basic. 281 module will be called cls_basic.
320 282
321config NET_CLS_TCINDEX 283config NET_CLS_TCINDEX
322 tristate "TC index classifier" 284 tristate "Traffic-Control Index (TCINDEX)"
323 depends on NET_CLS 285 depends NET_SCHED
324 help 286 select NET_CLS
325 If you say Y here, you will be able to classify outgoing packets 287 ---help---
326 according to the tc_index field of the skb. You will want this 288 Say Y here if you want to be able to classify packets based on
327 feature if you want to implement Differentiated Services using 289 traffic control indices. You will want this feature if you want
328 sch_dsmark. If unsure, say Y. 290 to implement Differentiated Services together with DSMARK.
329 291
330 To compile this code as a module, choose M here: the 292 To compile this code as a module, choose M here: the
331 module will be called cls_tcindex. 293 module will be called cls_tcindex.
332 294
333config NET_CLS_ROUTE4 295config NET_CLS_ROUTE4
334 tristate "Routing table based classifier" 296 tristate "Routing decision (ROUTE)"
335 depends on NET_CLS 297 depends NET_SCHED
336 select NET_CLS_ROUTE 298 select NET_CLS_ROUTE
337 help 299 select NET_CLS
338 If you say Y here, you will be able to classify outgoing packets 300 ---help---
339 according to the route table entry they matched. If unsure, say Y. 301 If you say Y here, you will be able to classify packets
302 according to the route table entry they matched.
340 303
341 To compile this code as a module, choose M here: the 304 To compile this code as a module, choose M here: the
342 module will be called cls_route. 305 module will be called cls_route.
@@ -346,58 +309,45 @@ config NET_CLS_ROUTE
346 default n 309 default n
347 310
348config NET_CLS_FW 311config NET_CLS_FW
349 tristate "Firewall based classifier" 312 tristate "Netfilter mark (FW)"
350 depends on NET_CLS 313 depends NET_SCHED
351 help 314 select NET_CLS
352 If you say Y here, you will be able to classify outgoing packets 315 ---help---
353 according to firewall criteria you specified. 316 If you say Y here, you will be able to classify packets
317 according to netfilter/firewall marks.
354 318
355 To compile this code as a module, choose M here: the 319 To compile this code as a module, choose M here: the
356 module will be called cls_fw. 320 module will be called cls_fw.
357 321
358config NET_CLS_U32 322config NET_CLS_U32
359 tristate "U32 classifier" 323 tristate "Universal 32bit comparisons w/ hashing (U32)"
360 depends on NET_CLS 324 depends NET_SCHED
361 help 325 select NET_CLS
362 If you say Y here, you will be able to classify outgoing packets 326 ---help---
363 according to their destination address. If unsure, say Y. 327 Say Y here to be able to classify packetes using a universal
328 32bit pieces based comparison scheme.
364 329
365 To compile this code as a module, choose M here: the 330 To compile this code as a module, choose M here: the
366 module will be called cls_u32. 331 module will be called cls_u32.
367 332
368config CLS_U32_PERF 333config CLS_U32_PERF
369 bool "U32 classifier performance counters" 334 bool "Performance counters support"
370 depends on NET_CLS_U32 335 depends on NET_CLS_U32
371 help 336 ---help---
372 gathers stats that could be used to tune u32 classifier performance. 337 Say Y here to make u32 gather additional statistics useful for
373 Requires a new iproute2 338 fine tuning u32 classifiers.
374 You MUST NOT turn this on if you dont have an update iproute2.
375
376config NET_CLS_IND
377 bool "classify input device (slows things u32/fw) "
378 depends on NET_CLS_U32 || NET_CLS_FW
379 help
380 This option will be killed eventually when a
381 metadata action appears because it slows things a little
382 Available only for u32 and fw classifiers.
383 Requires a new iproute2
384 You MUST NOT turn this on if you dont have an update iproute2.
385 339
386config CLS_U32_MARK 340config CLS_U32_MARK
387 bool "Use nfmark as a key in U32 classifier" 341 bool "Netfilter marks support"
388 depends on NET_CLS_U32 && NETFILTER 342 depends on NET_CLS_U32 && NETFILTER
389 help 343 ---help---
390 This allows you to match mark in a u32 filter. 344 Say Y here to be able to use netfilter marks as u32 key.
391 Example:
392 tc filter add dev eth0 protocol ip parent 1:0 prio 5 u32 \
393 match mark 0x0090 0xffff \
394 match ip dst 4.4.4.4 \
395 flowid 1:90
396 You must use a new iproute2 to use this feature.
397 345
398config NET_CLS_RSVP 346config NET_CLS_RSVP
399 tristate "Special RSVP classifier" 347 tristate "IPv4 Resource Reservation Protocol (RSVP)"
400 depends on NET_CLS && NET_QOS 348 depends on NET_SCHED
349 select NET_CLS
350 select NET_ESTIMATOR
401 ---help--- 351 ---help---
402 The Resource Reservation Protocol (RSVP) permits end systems to 352 The Resource Reservation Protocol (RSVP) permits end systems to
403 request a minimum and maximum data flow rate for a connection; this 353 request a minimum and maximum data flow rate for a connection; this
@@ -410,31 +360,33 @@ config NET_CLS_RSVP
410 module will be called cls_rsvp. 360 module will be called cls_rsvp.
411 361
412config NET_CLS_RSVP6 362config NET_CLS_RSVP6
413 tristate "Special RSVP classifier for IPv6" 363 tristate "IPv6 Resource Reservation Protocol (RSVP6)"
414 depends on NET_CLS && NET_QOS 364 depends on NET_SCHED
365 select NET_CLS
366 select NET_ESTIMATOR
415 ---help--- 367 ---help---
416 The Resource Reservation Protocol (RSVP) permits end systems to 368 The Resource Reservation Protocol (RSVP) permits end systems to
417 request a minimum and maximum data flow rate for a connection; this 369 request a minimum and maximum data flow rate for a connection; this
418 is important for real time data such as streaming sound or video. 370 is important for real time data such as streaming sound or video.
419 371
420 Say Y here if you want to be able to classify outgoing packets based 372 Say Y here if you want to be able to classify outgoing packets based
421 on their RSVP requests and you are using the new Internet Protocol 373 on their RSVP requests and you are using the IPv6.
422 IPv6 as opposed to the older and more common IPv4.
423 374
424 To compile this code as a module, choose M here: the 375 To compile this code as a module, choose M here: the
425 module will be called cls_rsvp6. 376 module will be called cls_rsvp6.
426 377
427config NET_EMATCH 378config NET_EMATCH
428 bool "Extended Matches" 379 bool "Extended Matches"
429 depends on NET_CLS 380 depends NET_SCHED
381 select NET_CLS
430 ---help--- 382 ---help---
431 Say Y here if you want to use extended matches on top of classifiers 383 Say Y here if you want to use extended matches on top of classifiers
432 and select the extended matches below. 384 and select the extended matches below.
433 385
434 Extended matches are small classification helpers not worth writing 386 Extended matches are small classification helpers not worth writing
435 a separate classifier. 387 a separate classifier for.
436 388
437 You must have a recent version of the iproute2 tools in order to use 389 A recent version of the iproute2 package is required to use
438 extended matches. 390 extended matches.
439 391
440config NET_EMATCH_STACK 392config NET_EMATCH_STACK
@@ -468,7 +420,7 @@ config NET_EMATCH_NBYTE
468 module will be called em_nbyte. 420 module will be called em_nbyte.
469 421
470config NET_EMATCH_U32 422config NET_EMATCH_U32
471 tristate "U32 hashing key" 423 tristate "U32 key"
472 depends on NET_EMATCH 424 depends on NET_EMATCH
473 ---help--- 425 ---help---
474 Say Y here if you want to be able to classify packets using 426 Say Y here if you want to be able to classify packets using
@@ -496,76 +448,120 @@ config NET_EMATCH_TEXT
496 select TEXTSEARCH_BM 448 select TEXTSEARCH_BM
497 select TEXTSEARCH_FSM 449 select TEXTSEARCH_FSM
498 ---help--- 450 ---help---
499 Say Y here if you want to be ablt to classify packets based on 451 Say Y here if you want to be able to classify packets based on
500 textsearch comparisons. 452 textsearch comparisons.
501 453
502 To compile this code as a module, choose M here: the 454 To compile this code as a module, choose M here: the
503 module will be called em_text. 455 module will be called em_text.
504 456
505config NET_CLS_ACT 457config NET_CLS_ACT
506 bool "Packet ACTION" 458 bool "Actions"
507 depends on EXPERIMENTAL && NET_CLS && NET_QOS 459 depends on EXPERIMENTAL && NET_SCHED
460 select NET_ESTIMATOR
508 ---help--- 461 ---help---
509 This option requires you have a new iproute2. It enables 462 Say Y here if you want to use traffic control actions. Actions
510 tc extensions which can be used with tc classifiers. 463 get attached to classifiers and are invoked after a successful
511 You MUST NOT turn this on if you dont have an update iproute2. 464 classification. They are used to overwrite the classification
465 result, instantly drop or redirect packets, etc.
466
467 A recent version of the iproute2 package is required to use
468 extended matches.
512 469
513config NET_ACT_POLICE 470config NET_ACT_POLICE
514 tristate "Policing Actions" 471 tristate "Traffic Policing"
515 depends on NET_CLS_ACT 472 depends on NET_CLS_ACT
516 ---help--- 473 ---help---
517 If you are using a newer iproute2 select this one, otherwise use one 474 Say Y here if you want to do traffic policing, i.e. strict
518 below to select a policer. 475 bandwidth limiting. This action replaces the existing policing
519 You MUST NOT turn this on if you dont have an update iproute2. 476 module.
477
478 To compile this code as a module, choose M here: the
479 module will be called police.
520 480
521config NET_ACT_GACT 481config NET_ACT_GACT
522 tristate "generic Actions" 482 tristate "Generic actions"
523 depends on NET_CLS_ACT 483 depends on NET_CLS_ACT
524 ---help--- 484 ---help---
525 You must have new iproute2 to use this feature. 485 Say Y here to take generic actions such as dropping and
526 This adds simple filtering actions like drop, accept etc. 486 accepting packets.
487
488 To compile this code as a module, choose M here: the
489 module will be called gact.
527 490
528config GACT_PROB 491config GACT_PROB
529 bool "generic Actions probability" 492 bool "Probability support"
530 depends on NET_ACT_GACT 493 depends on NET_ACT_GACT
531 ---help--- 494 ---help---
532 Allows generic actions to be randomly or deterministically used. 495 Say Y here to use the generic action randomly or deterministically.
533 496
534config NET_ACT_MIRRED 497config NET_ACT_MIRRED
535 tristate "Packet In/Egress redirecton/mirror Actions" 498 tristate "Redirecting and Mirroring"
536 depends on NET_CLS_ACT 499 depends on NET_CLS_ACT
537 ---help--- 500 ---help---
538 requires new iproute2 501 Say Y here to allow packets to be mirrored or redirected to
539 This allows packets to be mirrored or redirected to netdevices 502 other devices.
503
504 To compile this code as a module, choose M here: the
505 module will be called mirred.
540 506
541config NET_ACT_IPT 507config NET_ACT_IPT
542 tristate "iptables Actions" 508 tristate "IPtables targets"
543 depends on NET_CLS_ACT && NETFILTER && IP_NF_IPTABLES 509 depends on NET_CLS_ACT && NETFILTER && IP_NF_IPTABLES
544 ---help--- 510 ---help---
545 requires new iproute2 511 Say Y here to be able to invoke iptables targets after succesful
546 This allows iptables targets to be used by tc filters 512 classification.
513
514 To compile this code as a module, choose M here: the
515 module will be called ipt.
547 516
548config NET_ACT_PEDIT 517config NET_ACT_PEDIT
549 tristate "Generic Packet Editor Actions" 518 tristate "Packet Editing"
550 depends on NET_CLS_ACT 519 depends on NET_CLS_ACT
551 ---help--- 520 ---help---
552 requires new iproute2 521 Say Y here if you want to mangle the content of packets.
553 This allows for packets to be generically edited
554 522
555config NET_CLS_POLICE 523 To compile this code as a module, choose M here: the
556 bool "Traffic policing (needed for in/egress)" 524 module will be called pedit.
557 depends on NET_CLS && NET_QOS && NET_CLS_ACT!=y
558 help
559 Say Y to support traffic policing (bandwidth limits). Needed for
560 ingress and egress rate limiting.
561 525
562config NET_ACT_SIMP 526config NET_ACT_SIMP
563 tristate "Simple action" 527 tristate "Simple Example (Debug)"
564 depends on NET_CLS_ACT 528 depends on NET_CLS_ACT
565 ---help--- 529 ---help---
566 You must have new iproute2 to use this feature. 530 Say Y here to add a simple action for demonstration purposes.
567 This adds a very simple action for demonstration purposes 531 It is meant as an example and for debugging purposes. It will
568 The idea is to give action authors a basic example to look at. 532 print a configured policy string followed by the packet count
569 All this action will do is print on the console the configured 533 to the console for every packet that passes by.
570 policy string followed by _ then packet count. 534
535 If unsure, say N.
536
537 To compile this code as a module, choose M here: the
538 module will be called simple.
539
540config NET_CLS_POLICE
541 bool "Traffic Policing (obsolete)"
542 depends on NET_SCHED && NET_CLS_ACT!=y
543 select NET_ESTIMATOR
544 ---help---
545 Say Y here if you want to do traffic policing, i.e. strict
546 bandwidth limiting. This option is obsoleted by the traffic
547 policer implemented as action, it stays here for compatibility
548 reasons.
549
550config NET_CLS_IND
551 bool "Incoming device classification"
552 depends on NET_SCHED && (NET_CLS_U32 || NET_CLS_FW)
553 ---help---
554 Say Y here to extend the u32 and fw classifier to support
555 classification based on the incoming device. This option is
556 likely to disappear in favour of the metadata ematch.
557
558config NET_ESTIMATOR
559 bool "Rate estimator"
560 depends on NET_SCHED
561 ---help---
562 Say Y here to allow using rate estimators to estimate the current
563 rate-of-flow for network devices, queues, etc. This module is
564 automaticaly selected if needed but can be selected manually for
565 statstical purposes.
571 566
567endmenu
diff --git a/net/sched/sch_gred.c b/net/sched/sch_gred.c
index 25c171c327..29a2dd9f30 100644
--- a/net/sched/sch_gred.c
+++ b/net/sched/sch_gred.c
@@ -15,247 +15,281 @@
15 * from Ren Liu 15 * from Ren Liu
16 * - More error checks 16 * - More error checks
17 * 17 *
18 * 18 * For all the glorious comments look at include/net/red.h
19 *
20 * For all the glorious comments look at Alexey's sch_red.c
21 */ 19 */
22 20
23#include <linux/config.h> 21#include <linux/config.h>
24#include <linux/module.h> 22#include <linux/module.h>
25#include <asm/uaccess.h>
26#include <asm/system.h>
27#include <linux/bitops.h>
28#include <linux/types.h> 23#include <linux/types.h>
29#include <linux/kernel.h> 24#include <linux/kernel.h>
30#include <linux/sched.h>
31#include <linux/string.h>
32#include <linux/mm.h>
33#include <linux/socket.h>
34#include <linux/sockios.h>
35#include <linux/in.h>
36#include <linux/errno.h>
37#include <linux/interrupt.h>
38#include <linux/if_ether.h>
39#include <linux/inet.h>
40#include <linux/netdevice.h> 25#include <linux/netdevice.h>
41#include <linux/etherdevice.h>
42#include <linux/notifier.h>
43#include <net/ip.h>
44#include <net/route.h>
45#include <linux/skbuff.h> 26#include <linux/skbuff.h>
46#include <net/sock.h>
47#include <net/pkt_sched.h> 27#include <net/pkt_sched.h>
28#include <net/red.h>
48 29
49#if 1 /* control */ 30#define GRED_DEF_PRIO (MAX_DPs / 2)
50#define DPRINTK(format,args...) printk(KERN_DEBUG format,##args) 31#define GRED_VQ_MASK (MAX_DPs - 1)
51#else
52#define DPRINTK(format,args...)
53#endif
54
55#if 0 /* data */
56#define D2PRINTK(format,args...) printk(KERN_DEBUG format,##args)
57#else
58#define D2PRINTK(format,args...)
59#endif
60 32
61struct gred_sched_data; 33struct gred_sched_data;
62struct gred_sched; 34struct gred_sched;
63 35
64struct gred_sched_data 36struct gred_sched_data
65{ 37{
66/* Parameters */
67 u32 limit; /* HARD maximal queue length */ 38 u32 limit; /* HARD maximal queue length */
68 u32 qth_min; /* Min average length threshold: A scaled */
69 u32 qth_max; /* Max average length threshold: A scaled */
70 u32 DP; /* the drop pramaters */ 39 u32 DP; /* the drop pramaters */
71 char Wlog; /* log(W) */
72 char Plog; /* random number bits */
73 u32 Scell_max;
74 u32 Rmask;
75 u32 bytesin; /* bytes seen on virtualQ so far*/ 40 u32 bytesin; /* bytes seen on virtualQ so far*/
76 u32 packetsin; /* packets seen on virtualQ so far*/ 41 u32 packetsin; /* packets seen on virtualQ so far*/
77 u32 backlog; /* bytes on the virtualQ */ 42 u32 backlog; /* bytes on the virtualQ */
78 u32 forced; /* packets dropped for exceeding limits */ 43 u8 prio; /* the prio of this vq */
79 u32 early; /* packets dropped as a warning */ 44
80 u32 other; /* packets dropped by invoking drop() */ 45 struct red_parms parms;
81 u32 pdrop; /* packets dropped because we exceeded physical queue limits */ 46 struct red_stats stats;
82 char Scell_log; 47};
83 u8 Stab[256]; 48
84 u8 prio; /* the prio of this vq */ 49enum {
85 50 GRED_WRED_MODE = 1,
86/* Variables */ 51 GRED_RIO_MODE,
87 unsigned long qave; /* Average queue length: A scaled */
88 int qcount; /* Packets since last random number generation */
89 u32 qR; /* Cached random number */
90
91 psched_time_t qidlestart; /* Start of idle period */
92}; 52};
93 53
94struct gred_sched 54struct gred_sched
95{ 55{
96 struct gred_sched_data *tab[MAX_DPs]; 56 struct gred_sched_data *tab[MAX_DPs];
97 u32 DPs; 57 unsigned long flags;
98 u32 def; 58 u32 red_flags;
99 u8 initd; 59 u32 DPs;
100 u8 grio; 60 u32 def;
101 u8 eqp; 61 struct red_parms wred_set;
102}; 62};
103 63
104static int 64static inline int gred_wred_mode(struct gred_sched *table)
105gred_enqueue(struct sk_buff *skb, struct Qdisc* sch)
106{ 65{
107 psched_time_t now; 66 return test_bit(GRED_WRED_MODE, &table->flags);
108 struct gred_sched_data *q=NULL; 67}
109 struct gred_sched *t= qdisc_priv(sch); 68
110 unsigned long qave=0; 69static inline void gred_enable_wred_mode(struct gred_sched *table)
111 int i=0; 70{
71 __set_bit(GRED_WRED_MODE, &table->flags);
72}
73
74static inline void gred_disable_wred_mode(struct gred_sched *table)
75{
76 __clear_bit(GRED_WRED_MODE, &table->flags);
77}
78
79static inline int gred_rio_mode(struct gred_sched *table)
80{
81 return test_bit(GRED_RIO_MODE, &table->flags);
82}
83
84static inline void gred_enable_rio_mode(struct gred_sched *table)
85{
86 __set_bit(GRED_RIO_MODE, &table->flags);
87}
88
89static inline void gred_disable_rio_mode(struct gred_sched *table)
90{
91 __clear_bit(GRED_RIO_MODE, &table->flags);
92}
93
94static inline int gred_wred_mode_check(struct Qdisc *sch)
95{
96 struct gred_sched *table = qdisc_priv(sch);
97 int i;
112 98
113 if (!t->initd && skb_queue_len(&sch->q) < (sch->dev->tx_queue_len ? : 1)) { 99 /* Really ugly O(n^2) but shouldn't be necessary too frequent. */
114 D2PRINTK("NO GRED Queues setup yet! Enqueued anyway\n"); 100 for (i = 0; i < table->DPs; i++) {
115 goto do_enqueue; 101 struct gred_sched_data *q = table->tab[i];
102 int n;
103
104 if (q == NULL)
105 continue;
106
107 for (n = 0; n < table->DPs; n++)
108 if (table->tab[n] && table->tab[n] != q &&
109 table->tab[n]->prio == q->prio)
110 return 1;
116 } 111 }
117 112
113 return 0;
114}
115
116static inline unsigned int gred_backlog(struct gred_sched *table,
117 struct gred_sched_data *q,
118 struct Qdisc *sch)
119{
120 if (gred_wred_mode(table))
121 return sch->qstats.backlog;
122 else
123 return q->backlog;
124}
125
126static inline u16 tc_index_to_dp(struct sk_buff *skb)
127{
128 return skb->tc_index & GRED_VQ_MASK;
129}
130
131static inline void gred_load_wred_set(struct gred_sched *table,
132 struct gred_sched_data *q)
133{
134 q->parms.qavg = table->wred_set.qavg;
135 q->parms.qidlestart = table->wred_set.qidlestart;
136}
137
138static inline void gred_store_wred_set(struct gred_sched *table,
139 struct gred_sched_data *q)
140{
141 table->wred_set.qavg = q->parms.qavg;
142}
143
144static inline int gred_use_ecn(struct gred_sched *t)
145{
146 return t->red_flags & TC_RED_ECN;
147}
118 148
119 if ( ((skb->tc_index&0xf) > (t->DPs -1)) || !(q=t->tab[skb->tc_index&0xf])) { 149static inline int gred_use_harddrop(struct gred_sched *t)
120 printk("GRED: setting to default (%d)\n ",t->def); 150{
121 if (!(q=t->tab[t->def])) { 151 return t->red_flags & TC_RED_HARDDROP;
122 DPRINTK("GRED: setting to default FAILED! dropping!! " 152}
123 "(%d)\n ", t->def); 153
124 goto drop; 154static int gred_enqueue(struct sk_buff *skb, struct Qdisc* sch)
155{
156 struct gred_sched_data *q=NULL;
157 struct gred_sched *t= qdisc_priv(sch);
158 unsigned long qavg = 0;
159 u16 dp = tc_index_to_dp(skb);
160
161 if (dp >= t->DPs || (q = t->tab[dp]) == NULL) {
162 dp = t->def;
163
164 if ((q = t->tab[dp]) == NULL) {
165 /* Pass through packets not assigned to a DP
166 * if no default DP has been configured. This
167 * allows for DP flows to be left untouched.
168 */
169 if (skb_queue_len(&sch->q) < sch->dev->tx_queue_len)
170 return qdisc_enqueue_tail(skb, sch);
171 else
172 goto drop;
125 } 173 }
174
126 /* fix tc_index? --could be controvesial but needed for 175 /* fix tc_index? --could be controvesial but needed for
127 requeueing */ 176 requeueing */
128 skb->tc_index=(skb->tc_index&0xfffffff0) | t->def; 177 skb->tc_index = (skb->tc_index & ~GRED_VQ_MASK) | dp;
129 } 178 }
130 179
131 D2PRINTK("gred_enqueue virtualQ 0x%x classid %x backlog %d " 180 /* sum up all the qaves of prios <= to ours to get the new qave */
132 "general backlog %d\n",skb->tc_index&0xf,sch->handle,q->backlog, 181 if (!gred_wred_mode(t) && gred_rio_mode(t)) {
133 sch->qstats.backlog); 182 int i;
134 /* sum up all the qaves of prios <= to ours to get the new qave*/ 183
135 if (!t->eqp && t->grio) { 184 for (i = 0; i < t->DPs; i++) {
136 for (i=0;i<t->DPs;i++) { 185 if (t->tab[i] && t->tab[i]->prio < q->prio &&
137 if ((!t->tab[i]) || (i==q->DP)) 186 !red_is_idling(&t->tab[i]->parms))
138 continue; 187 qavg +=t->tab[i]->parms.qavg;
139
140 if ((t->tab[i]->prio < q->prio) && (PSCHED_IS_PASTPERFECT(t->tab[i]->qidlestart)))
141 qave +=t->tab[i]->qave;
142 } 188 }
143 189
144 } 190 }
145 191
146 q->packetsin++; 192 q->packetsin++;
147 q->bytesin+=skb->len; 193 q->bytesin += skb->len;
148 194
149 if (t->eqp && t->grio) { 195 if (gred_wred_mode(t))
150 qave=0; 196 gred_load_wred_set(t, q);
151 q->qave=t->tab[t->def]->qave;
152 q->qidlestart=t->tab[t->def]->qidlestart;
153 }
154 197
155 if (!PSCHED_IS_PASTPERFECT(q->qidlestart)) { 198 q->parms.qavg = red_calc_qavg(&q->parms, gred_backlog(t, q, sch));
156 long us_idle;
157 PSCHED_GET_TIME(now);
158 us_idle = PSCHED_TDIFF_SAFE(now, q->qidlestart, q->Scell_max);
159 PSCHED_SET_PASTPERFECT(q->qidlestart);
160 199
161 q->qave >>= q->Stab[(us_idle>>q->Scell_log)&0xFF]; 200 if (red_is_idling(&q->parms))
162 } else { 201 red_end_of_idle_period(&q->parms);
163 if (t->eqp) {
164 q->qave += sch->qstats.backlog - (q->qave >> q->Wlog);
165 } else {
166 q->qave += q->backlog - (q->qave >> q->Wlog);
167 }
168 202
169 } 203 if (gred_wred_mode(t))
170 204 gred_store_wred_set(t, q);
171
172 if (t->eqp && t->grio)
173 t->tab[t->def]->qave=q->qave;
174
175 if ((q->qave+qave) < q->qth_min) {
176 q->qcount = -1;
177enqueue:
178 if (q->backlog + skb->len <= q->limit) {
179 q->backlog += skb->len;
180do_enqueue:
181 __skb_queue_tail(&sch->q, skb);
182 sch->qstats.backlog += skb->len;
183 sch->bstats.bytes += skb->len;
184 sch->bstats.packets++;
185 return 0;
186 } else {
187 q->pdrop++;
188 }
189 205
190drop: 206 switch (red_action(&q->parms, q->parms.qavg + qavg)) {
191 kfree_skb(skb); 207 case RED_DONT_MARK:
192 sch->qstats.drops++; 208 break;
193 return NET_XMIT_DROP; 209
194 } 210 case RED_PROB_MARK:
195 if ((q->qave+qave) >= q->qth_max) { 211 sch->qstats.overlimits++;
196 q->qcount = -1; 212 if (!gred_use_ecn(t) || !INET_ECN_set_ce(skb)) {
197 sch->qstats.overlimits++; 213 q->stats.prob_drop++;
198 q->forced++; 214 goto congestion_drop;
199 goto drop; 215 }
216
217 q->stats.prob_mark++;
218 break;
219
220 case RED_HARD_MARK:
221 sch->qstats.overlimits++;
222 if (gred_use_harddrop(t) || !gred_use_ecn(t) ||
223 !INET_ECN_set_ce(skb)) {
224 q->stats.forced_drop++;
225 goto congestion_drop;
226 }
227 q->stats.forced_mark++;
228 break;
200 } 229 }
201 if (++q->qcount) { 230
202 if ((((qave+q->qave) - q->qth_min)>>q->Wlog)*q->qcount < q->qR) 231 if (q->backlog + skb->len <= q->limit) {
203 goto enqueue; 232 q->backlog += skb->len;
204 q->qcount = 0; 233 return qdisc_enqueue_tail(skb, sch);
205 q->qR = net_random()&q->Rmask;
206 sch->qstats.overlimits++;
207 q->early++;
208 goto drop;
209 } 234 }
210 q->qR = net_random()&q->Rmask; 235
211 goto enqueue; 236 q->stats.pdrop++;
237drop:
238 return qdisc_drop(skb, sch);
239
240congestion_drop:
241 qdisc_drop(skb, sch);
242 return NET_XMIT_CN;
212} 243}
213 244
214static int 245static int gred_requeue(struct sk_buff *skb, struct Qdisc* sch)
215gred_requeue(struct sk_buff *skb, struct Qdisc* sch)
216{ 246{
247 struct gred_sched *t = qdisc_priv(sch);
217 struct gred_sched_data *q; 248 struct gred_sched_data *q;
218 struct gred_sched *t= qdisc_priv(sch); 249 u16 dp = tc_index_to_dp(skb);
219 q= t->tab[(skb->tc_index&0xf)]; 250
220/* error checking here -- probably unnecessary */ 251 if (dp >= t->DPs || (q = t->tab[dp]) == NULL) {
221 PSCHED_SET_PASTPERFECT(q->qidlestart); 252 if (net_ratelimit())
222 253 printk(KERN_WARNING "GRED: Unable to relocate VQ 0x%x "
223 __skb_queue_head(&sch->q, skb); 254 "for requeue, screwing up backlog.\n",
224 sch->qstats.backlog += skb->len; 255 tc_index_to_dp(skb));
225 sch->qstats.requeues++; 256 } else {
226 q->backlog += skb->len; 257 if (red_is_idling(&q->parms))
227 return 0; 258 red_end_of_idle_period(&q->parms);
259 q->backlog += skb->len;
260 }
261
262 return qdisc_requeue(skb, sch);
228} 263}
229 264
230static struct sk_buff * 265static struct sk_buff *gred_dequeue(struct Qdisc* sch)
231gred_dequeue(struct Qdisc* sch)
232{ 266{
233 struct sk_buff *skb; 267 struct sk_buff *skb;
234 struct gred_sched_data *q; 268 struct gred_sched *t = qdisc_priv(sch);
235 struct gred_sched *t= qdisc_priv(sch); 269
270 skb = qdisc_dequeue_head(sch);
236 271
237 skb = __skb_dequeue(&sch->q);
238 if (skb) { 272 if (skb) {
239 sch->qstats.backlog -= skb->len; 273 struct gred_sched_data *q;
240 q= t->tab[(skb->tc_index&0xf)]; 274 u16 dp = tc_index_to_dp(skb);
241 if (q) { 275
242 q->backlog -= skb->len; 276 if (dp >= t->DPs || (q = t->tab[dp]) == NULL) {
243 if (!q->backlog && !t->eqp) 277 if (net_ratelimit())
244 PSCHED_GET_TIME(q->qidlestart); 278 printk(KERN_WARNING "GRED: Unable to relocate "
279 "VQ 0x%x after dequeue, screwing up "
280 "backlog.\n", tc_index_to_dp(skb));
245 } else { 281 } else {
246 D2PRINTK("gred_dequeue: skb has bad tcindex %x\n",skb->tc_index&0xf); 282 q->backlog -= skb->len;
283
284 if (!q->backlog && !gred_wred_mode(t))
285 red_start_of_idle_period(&q->parms);
247 } 286 }
287
248 return skb; 288 return skb;
249 } 289 }
250 290
251 if (t->eqp) { 291 if (gred_wred_mode(t) && !red_is_idling(&t->wred_set))
252 q= t->tab[t->def]; 292 red_start_of_idle_period(&t->wred_set);
253 if (!q)
254 D2PRINTK("no default VQ set: Results will be "
255 "screwed up\n");
256 else
257 PSCHED_GET_TIME(q->qidlestart);
258 }
259 293
260 return NULL; 294 return NULL;
261} 295}
@@ -263,36 +297,34 @@ gred_dequeue(struct Qdisc* sch)
263static unsigned int gred_drop(struct Qdisc* sch) 297static unsigned int gred_drop(struct Qdisc* sch)
264{ 298{
265 struct sk_buff *skb; 299 struct sk_buff *skb;
300 struct gred_sched *t = qdisc_priv(sch);
266 301
267 struct gred_sched_data *q; 302 skb = qdisc_dequeue_tail(sch);
268 struct gred_sched *t= qdisc_priv(sch);
269
270 skb = __skb_dequeue_tail(&sch->q);
271 if (skb) { 303 if (skb) {
272 unsigned int len = skb->len; 304 unsigned int len = skb->len;
273 sch->qstats.backlog -= len; 305 struct gred_sched_data *q;
274 sch->qstats.drops++; 306 u16 dp = tc_index_to_dp(skb);
275 q= t->tab[(skb->tc_index&0xf)]; 307
276 if (q) { 308 if (dp >= t->DPs || (q = t->tab[dp]) == NULL) {
277 q->backlog -= len; 309 if (net_ratelimit())
278 q->other++; 310 printk(KERN_WARNING "GRED: Unable to relocate "
279 if (!q->backlog && !t->eqp) 311 "VQ 0x%x while dropping, screwing up "
280 PSCHED_GET_TIME(q->qidlestart); 312 "backlog.\n", tc_index_to_dp(skb));
281 } else { 313 } else {
282 D2PRINTK("gred_dequeue: skb has bad tcindex %x\n",skb->tc_index&0xf); 314 q->backlog -= len;
315 q->stats.other++;
316
317 if (!q->backlog && !gred_wred_mode(t))
318 red_start_of_idle_period(&q->parms);
283 } 319 }
284 320
285 kfree_skb(skb); 321 qdisc_drop(skb, sch);
286 return len; 322 return len;
287 } 323 }
288 324
289 q=t->tab[t->def]; 325 if (gred_wred_mode(t) && !red_is_idling(&t->wred_set))
290 if (!q) { 326 red_start_of_idle_period(&t->wred_set);
291 D2PRINTK("no default VQ set: Results might be screwed up\n");
292 return 0;
293 }
294 327
295 PSCHED_GET_TIME(q->qidlestart);
296 return 0; 328 return 0;
297 329
298} 330}
@@ -300,293 +332,241 @@ static unsigned int gred_drop(struct Qdisc* sch)
300static void gred_reset(struct Qdisc* sch) 332static void gred_reset(struct Qdisc* sch)
301{ 333{
302 int i; 334 int i;
303 struct gred_sched_data *q; 335 struct gred_sched *t = qdisc_priv(sch);
304 struct gred_sched *t= qdisc_priv(sch); 336
337 qdisc_reset_queue(sch);
305 338
306 __skb_queue_purge(&sch->q); 339 for (i = 0; i < t->DPs; i++) {
340 struct gred_sched_data *q = t->tab[i];
307 341
308 sch->qstats.backlog = 0; 342 if (!q)
343 continue;
309 344
310 for (i=0;i<t->DPs;i++) { 345 red_restart(&q->parms);
311 q= t->tab[i];
312 if (!q)
313 continue;
314 PSCHED_SET_PASTPERFECT(q->qidlestart);
315 q->qave = 0;
316 q->qcount = -1;
317 q->backlog = 0; 346 q->backlog = 0;
318 q->other=0;
319 q->forced=0;
320 q->pdrop=0;
321 q->early=0;
322 } 347 }
323} 348}
324 349
325static int gred_change(struct Qdisc *sch, struct rtattr *opt) 350static inline void gred_destroy_vq(struct gred_sched_data *q)
351{
352 kfree(q);
353}
354
355static inline int gred_change_table_def(struct Qdisc *sch, struct rtattr *dps)
326{ 356{
327 struct gred_sched *table = qdisc_priv(sch); 357 struct gred_sched *table = qdisc_priv(sch);
328 struct gred_sched_data *q;
329 struct tc_gred_qopt *ctl;
330 struct tc_gred_sopt *sopt; 358 struct tc_gred_sopt *sopt;
331 struct rtattr *tb[TCA_GRED_STAB];
332 struct rtattr *tb2[TCA_GRED_DPS];
333 int i; 359 int i;
334 360
335 if (opt == NULL || rtattr_parse_nested(tb, TCA_GRED_STAB, opt)) 361 if (dps == NULL || RTA_PAYLOAD(dps) < sizeof(*sopt))
336 return -EINVAL; 362 return -EINVAL;
337 363
338 if (tb[TCA_GRED_PARMS-1] == 0 && tb[TCA_GRED_STAB-1] == 0) { 364 sopt = RTA_DATA(dps);
339 rtattr_parse_nested(tb2, TCA_GRED_DPS, opt); 365
366 if (sopt->DPs > MAX_DPs || sopt->DPs == 0 || sopt->def_DP >= sopt->DPs)
367 return -EINVAL;
340 368
341 if (tb2[TCA_GRED_DPS-1] == 0) 369 sch_tree_lock(sch);
342 return -EINVAL; 370 table->DPs = sopt->DPs;
371 table->def = sopt->def_DP;
372 table->red_flags = sopt->flags;
373
374 /*
375 * Every entry point to GRED is synchronized with the above code
376 * and the DP is checked against DPs, i.e. shadowed VQs can no
377 * longer be found so we can unlock right here.
378 */
379 sch_tree_unlock(sch);
380
381 if (sopt->grio) {
382 gred_enable_rio_mode(table);
383 gred_disable_wred_mode(table);
384 if (gred_wred_mode_check(sch))
385 gred_enable_wred_mode(table);
386 } else {
387 gred_disable_rio_mode(table);
388 gred_disable_wred_mode(table);
389 }
343 390
344 sopt = RTA_DATA(tb2[TCA_GRED_DPS-1]); 391 for (i = table->DPs; i < MAX_DPs; i++) {
345 table->DPs=sopt->DPs; 392 if (table->tab[i]) {
346 table->def=sopt->def_DP; 393 printk(KERN_WARNING "GRED: Warning: Destroying "
347 table->grio=sopt->grio; 394 "shadowed VQ 0x%x\n", i);
348 table->initd=0; 395 gred_destroy_vq(table->tab[i]);
349 /* probably need to clear all the table DP entries as well */ 396 table->tab[i] = NULL;
350 return 0; 397 }
351 } 398 }
352 399
400 return 0;
401}
353 402
354 if (!table->DPs || tb[TCA_GRED_PARMS-1] == 0 || tb[TCA_GRED_STAB-1] == 0 || 403static inline int gred_change_vq(struct Qdisc *sch, int dp,
355 RTA_PAYLOAD(tb[TCA_GRED_PARMS-1]) < sizeof(*ctl) || 404 struct tc_gred_qopt *ctl, int prio, u8 *stab)
356 RTA_PAYLOAD(tb[TCA_GRED_STAB-1]) < 256) 405{
357 return -EINVAL; 406 struct gred_sched *table = qdisc_priv(sch);
407 struct gred_sched_data *q;
358 408
359 ctl = RTA_DATA(tb[TCA_GRED_PARMS-1]); 409 if (table->tab[dp] == NULL) {
360 if (ctl->DP > MAX_DPs-1 ) { 410 table->tab[dp] = kmalloc(sizeof(*q), GFP_KERNEL);
361 /* misbehaving is punished! Put in the default drop probability */ 411 if (table->tab[dp] == NULL)
362 DPRINTK("\nGRED: DP %u not in the proper range fixed. New DP "
363 "set to default at %d\n",ctl->DP,table->def);
364 ctl->DP=table->def;
365 }
366
367 if (table->tab[ctl->DP] == NULL) {
368 table->tab[ctl->DP]=kmalloc(sizeof(struct gred_sched_data),
369 GFP_KERNEL);
370 if (NULL == table->tab[ctl->DP])
371 return -ENOMEM; 412 return -ENOMEM;
372 memset(table->tab[ctl->DP], 0, (sizeof(struct gred_sched_data))); 413 memset(table->tab[dp], 0, sizeof(*q));
373 }
374 q= table->tab[ctl->DP];
375
376 if (table->grio) {
377 if (ctl->prio <=0) {
378 if (table->def && table->tab[table->def]) {
379 DPRINTK("\nGRED: DP %u does not have a prio"
380 "setting default to %d\n",ctl->DP,
381 table->tab[table->def]->prio);
382 q->prio=table->tab[table->def]->prio;
383 } else {
384 DPRINTK("\nGRED: DP %u does not have a prio"
385 " setting default to 8\n",ctl->DP);
386 q->prio=8;
387 }
388 } else {
389 q->prio=ctl->prio;
390 }
391 } else {
392 q->prio=8;
393 } 414 }
394 415
395 416 q = table->tab[dp];
396 q->DP=ctl->DP; 417 q->DP = dp;
397 q->Wlog = ctl->Wlog; 418 q->prio = prio;
398 q->Plog = ctl->Plog;
399 q->limit = ctl->limit; 419 q->limit = ctl->limit;
400 q->Scell_log = ctl->Scell_log;
401 q->Rmask = ctl->Plog < 32 ? ((1<<ctl->Plog) - 1) : ~0UL;
402 q->Scell_max = (255<<q->Scell_log);
403 q->qth_min = ctl->qth_min<<ctl->Wlog;
404 q->qth_max = ctl->qth_max<<ctl->Wlog;
405 q->qave=0;
406 q->backlog=0;
407 q->qcount = -1;
408 q->other=0;
409 q->forced=0;
410 q->pdrop=0;
411 q->early=0;
412
413 PSCHED_SET_PASTPERFECT(q->qidlestart);
414 memcpy(q->Stab, RTA_DATA(tb[TCA_GRED_STAB-1]), 256);
415
416 if ( table->initd && table->grio) {
417 /* this looks ugly but it's not in the fast path */
418 for (i=0;i<table->DPs;i++) {
419 if ((!table->tab[i]) || (i==q->DP) )
420 continue;
421 if (table->tab[i]->prio == q->prio ){
422 /* WRED mode detected */
423 table->eqp=1;
424 break;
425 }
426 }
427 }
428 420
429 if (!table->initd) { 421 if (q->backlog == 0)
430 table->initd=1; 422 red_end_of_idle_period(&q->parms);
431 /*
432 the first entry also goes into the default until
433 over-written
434 */
435
436 if (table->tab[table->def] == NULL) {
437 table->tab[table->def]=
438 kmalloc(sizeof(struct gred_sched_data), GFP_KERNEL);
439 if (NULL == table->tab[table->def])
440 return -ENOMEM;
441
442 memset(table->tab[table->def], 0,
443 (sizeof(struct gred_sched_data)));
444 }
445 q= table->tab[table->def];
446 q->DP=table->def;
447 q->Wlog = ctl->Wlog;
448 q->Plog = ctl->Plog;
449 q->limit = ctl->limit;
450 q->Scell_log = ctl->Scell_log;
451 q->Rmask = ctl->Plog < 32 ? ((1<<ctl->Plog) - 1) : ~0UL;
452 q->Scell_max = (255<<q->Scell_log);
453 q->qth_min = ctl->qth_min<<ctl->Wlog;
454 q->qth_max = ctl->qth_max<<ctl->Wlog;
455
456 if (table->grio)
457 q->prio=table->tab[ctl->DP]->prio;
458 else
459 q->prio=8;
460
461 q->qcount = -1;
462 PSCHED_SET_PASTPERFECT(q->qidlestart);
463 memcpy(q->Stab, RTA_DATA(tb[TCA_GRED_STAB-1]), 256);
464 }
465 return 0;
466 423
424 red_set_parms(&q->parms,
425 ctl->qth_min, ctl->qth_max, ctl->Wlog, ctl->Plog,
426 ctl->Scell_log, stab);
427
428 return 0;
467} 429}
468 430
469static int gred_init(struct Qdisc *sch, struct rtattr *opt) 431static int gred_change(struct Qdisc *sch, struct rtattr *opt)
470{ 432{
471 struct gred_sched *table = qdisc_priv(sch); 433 struct gred_sched *table = qdisc_priv(sch);
472 struct tc_gred_sopt *sopt; 434 struct tc_gred_qopt *ctl;
473 struct rtattr *tb[TCA_GRED_STAB]; 435 struct rtattr *tb[TCA_GRED_MAX];
474 struct rtattr *tb2[TCA_GRED_DPS]; 436 int err = -EINVAL, prio = GRED_DEF_PRIO;
437 u8 *stab;
475 438
476 if (opt == NULL || rtattr_parse_nested(tb, TCA_GRED_STAB, opt)) 439 if (opt == NULL || rtattr_parse_nested(tb, TCA_GRED_MAX, opt))
477 return -EINVAL; 440 return -EINVAL;
478 441
479 if (tb[TCA_GRED_PARMS-1] == 0 && tb[TCA_GRED_STAB-1] == 0) { 442 if (tb[TCA_GRED_PARMS-1] == NULL && tb[TCA_GRED_STAB-1] == NULL)
480 rtattr_parse_nested(tb2, TCA_GRED_DPS, opt); 443 return gred_change_table_def(sch, opt);
444
445 if (tb[TCA_GRED_PARMS-1] == NULL ||
446 RTA_PAYLOAD(tb[TCA_GRED_PARMS-1]) < sizeof(*ctl) ||
447 tb[TCA_GRED_STAB-1] == NULL ||
448 RTA_PAYLOAD(tb[TCA_GRED_STAB-1]) < 256)
449 return -EINVAL;
450
451 ctl = RTA_DATA(tb[TCA_GRED_PARMS-1]);
452 stab = RTA_DATA(tb[TCA_GRED_STAB-1]);
453
454 if (ctl->DP >= table->DPs)
455 goto errout;
481 456
482 if (tb2[TCA_GRED_DPS-1] == 0) 457 if (gred_rio_mode(table)) {
483 return -EINVAL; 458 if (ctl->prio == 0) {
459 int def_prio = GRED_DEF_PRIO;
484 460
485 sopt = RTA_DATA(tb2[TCA_GRED_DPS-1]); 461 if (table->tab[table->def])
486 table->DPs=sopt->DPs; 462 def_prio = table->tab[table->def]->prio;
487 table->def=sopt->def_DP; 463
488 table->grio=sopt->grio; 464 printk(KERN_DEBUG "GRED: DP %u does not have a prio "
489 table->initd=0; 465 "setting default to %d\n", ctl->DP, def_prio);
490 return 0; 466
467 prio = def_prio;
468 } else
469 prio = ctl->prio;
470 }
471
472 sch_tree_lock(sch);
473
474 err = gred_change_vq(sch, ctl->DP, ctl, prio, stab);
475 if (err < 0)
476 goto errout_locked;
477
478 if (gred_rio_mode(table)) {
479 gred_disable_wred_mode(table);
480 if (gred_wred_mode_check(sch))
481 gred_enable_wred_mode(table);
491 } 482 }
492 483
493 DPRINTK("\n GRED_INIT error!\n"); 484 err = 0;
494 return -EINVAL; 485
486errout_locked:
487 sch_tree_unlock(sch);
488errout:
489 return err;
495} 490}
496 491
497static int gred_dump(struct Qdisc *sch, struct sk_buff *skb) 492static int gred_init(struct Qdisc *sch, struct rtattr *opt)
498{ 493{
499 unsigned long qave; 494 struct rtattr *tb[TCA_GRED_MAX];
500 struct rtattr *rta;
501 struct tc_gred_qopt *opt = NULL ;
502 struct tc_gred_qopt *dst;
503 struct gred_sched *table = qdisc_priv(sch);
504 struct gred_sched_data *q;
505 int i;
506 unsigned char *b = skb->tail;
507 495
508 rta = (struct rtattr*)b; 496 if (opt == NULL || rtattr_parse_nested(tb, TCA_GRED_MAX, opt))
509 RTA_PUT(skb, TCA_OPTIONS, 0, NULL); 497 return -EINVAL;
510 498
511 opt=kmalloc(sizeof(struct tc_gred_qopt)*MAX_DPs, GFP_KERNEL); 499 if (tb[TCA_GRED_PARMS-1] || tb[TCA_GRED_STAB-1])
500 return -EINVAL;
512 501
513 if (opt == NULL) { 502 return gred_change_table_def(sch, tb[TCA_GRED_DPS-1]);
514 DPRINTK("gred_dump:failed to malloc for %Zd\n", 503}
515 sizeof(struct tc_gred_qopt)*MAX_DPs);
516 goto rtattr_failure;
517 }
518 504
519 memset(opt, 0, (sizeof(struct tc_gred_qopt))*table->DPs); 505static int gred_dump(struct Qdisc *sch, struct sk_buff *skb)
506{
507 struct gred_sched *table = qdisc_priv(sch);
508 struct rtattr *parms, *opts = NULL;
509 int i;
510 struct tc_gred_sopt sopt = {
511 .DPs = table->DPs,
512 .def_DP = table->def,
513 .grio = gred_rio_mode(table),
514 .flags = table->red_flags,
515 };
520 516
521 if (!table->initd) { 517 opts = RTA_NEST(skb, TCA_OPTIONS);
522 DPRINTK("NO GRED Queues setup!\n"); 518 RTA_PUT(skb, TCA_GRED_DPS, sizeof(sopt), &sopt);
523 } 519 parms = RTA_NEST(skb, TCA_GRED_PARMS);
520
521 for (i = 0; i < MAX_DPs; i++) {
522 struct gred_sched_data *q = table->tab[i];
523 struct tc_gred_qopt opt;
524 524
525 for (i=0;i<MAX_DPs;i++) { 525 memset(&opt, 0, sizeof(opt));
526 dst= &opt[i];
527 q= table->tab[i];
528 526
529 if (!q) { 527 if (!q) {
530 /* hack -- fix at some point with proper message 528 /* hack -- fix at some point with proper message
531 This is how we indicate to tc that there is no VQ 529 This is how we indicate to tc that there is no VQ
532 at this DP */ 530 at this DP */
533 531
534 dst->DP=MAX_DPs+i; 532 opt.DP = MAX_DPs + i;
535 continue; 533 goto append_opt;
536 } 534 }
537 535
538 dst->limit=q->limit; 536 opt.limit = q->limit;
539 dst->qth_min=q->qth_min>>q->Wlog; 537 opt.DP = q->DP;
540 dst->qth_max=q->qth_max>>q->Wlog; 538 opt.backlog = q->backlog;
541 dst->DP=q->DP; 539 opt.prio = q->prio;
542 dst->backlog=q->backlog; 540 opt.qth_min = q->parms.qth_min >> q->parms.Wlog;
543 if (q->qave) { 541 opt.qth_max = q->parms.qth_max >> q->parms.Wlog;
544 if (table->eqp && table->grio) { 542 opt.Wlog = q->parms.Wlog;
545 q->qidlestart=table->tab[table->def]->qidlestart; 543 opt.Plog = q->parms.Plog;
546 q->qave=table->tab[table->def]->qave; 544 opt.Scell_log = q->parms.Scell_log;
547 } 545 opt.other = q->stats.other;
548 if (!PSCHED_IS_PASTPERFECT(q->qidlestart)) { 546 opt.early = q->stats.prob_drop;
549 long idle; 547 opt.forced = q->stats.forced_drop;
550 psched_time_t now; 548 opt.pdrop = q->stats.pdrop;
551 PSCHED_GET_TIME(now); 549 opt.packets = q->packetsin;
552 idle = PSCHED_TDIFF_SAFE(now, q->qidlestart, q->Scell_max); 550 opt.bytesin = q->bytesin;
553 qave = q->qave >> q->Stab[(idle>>q->Scell_log)&0xFF]; 551
554 dst->qave = qave >> q->Wlog; 552 if (gred_wred_mode(table)) {
555 553 q->parms.qidlestart =
556 } else { 554 table->tab[table->def]->parms.qidlestart;
557 dst->qave = q->qave >> q->Wlog; 555 q->parms.qavg = table->tab[table->def]->parms.qavg;
558 }
559 } else {
560 dst->qave = 0;
561 } 556 }
562 557
563 558 opt.qave = red_calc_qavg(&q->parms, q->parms.qavg);
564 dst->Wlog = q->Wlog; 559
565 dst->Plog = q->Plog; 560append_opt:
566 dst->Scell_log = q->Scell_log; 561 RTA_APPEND(skb, sizeof(opt), &opt);
567 dst->other = q->other;
568 dst->forced = q->forced;
569 dst->early = q->early;
570 dst->pdrop = q->pdrop;
571 dst->prio = q->prio;
572 dst->packets=q->packetsin;
573 dst->bytesin=q->bytesin;
574 } 562 }
575 563
576 RTA_PUT(skb, TCA_GRED_PARMS, sizeof(struct tc_gred_qopt)*MAX_DPs, opt); 564 RTA_NEST_END(skb, parms);
577 rta->rta_len = skb->tail - b;
578 565
579 kfree(opt); 566 return RTA_NEST_END(skb, opts);
580 return skb->len;
581 567
582rtattr_failure: 568rtattr_failure:
583 if (opt) 569 return RTA_NEST_CANCEL(skb, opts);
584 kfree(opt);
585 DPRINTK("gred_dump: FAILURE!!!!\n");
586
587/* also free the opt struct here */
588 skb_trim(skb, b - skb->data);
589 return -1;
590} 570}
591 571
592static void gred_destroy(struct Qdisc *sch) 572static void gred_destroy(struct Qdisc *sch)
@@ -594,15 +574,13 @@ static void gred_destroy(struct Qdisc *sch)
594 struct gred_sched *table = qdisc_priv(sch); 574 struct gred_sched *table = qdisc_priv(sch);
595 int i; 575 int i;
596 576
597 for (i = 0;i < table->DPs; i++) { 577 for (i = 0; i < table->DPs; i++) {
598 if (table->tab[i]) 578 if (table->tab[i])
599 kfree(table->tab[i]); 579 gred_destroy_vq(table->tab[i]);
600 } 580 }
601} 581}
602 582
603static struct Qdisc_ops gred_qdisc_ops = { 583static struct Qdisc_ops gred_qdisc_ops = {
604 .next = NULL,
605 .cl_ops = NULL,
606 .id = "gred", 584 .id = "gred",
607 .priv_size = sizeof(struct gred_sched), 585 .priv_size = sizeof(struct gred_sched),
608 .enqueue = gred_enqueue, 586 .enqueue = gred_enqueue,
@@ -621,10 +599,13 @@ static int __init gred_module_init(void)
621{ 599{
622 return register_qdisc(&gred_qdisc_ops); 600 return register_qdisc(&gred_qdisc_ops);
623} 601}
624static void __exit gred_module_exit(void) 602
603static void __exit gred_module_exit(void)
625{ 604{
626 unregister_qdisc(&gred_qdisc_ops); 605 unregister_qdisc(&gred_qdisc_ops);
627} 606}
607
628module_init(gred_module_init) 608module_init(gred_module_init)
629module_exit(gred_module_exit) 609module_exit(gred_module_exit)
610
630MODULE_LICENSE("GPL"); 611MODULE_LICENSE("GPL");
diff --git a/net/sched/sch_netem.c b/net/sched/sch_netem.c
index bb9bf8d500..cdc8d28379 100644
--- a/net/sched/sch_netem.c
+++ b/net/sched/sch_netem.c
@@ -25,6 +25,8 @@
25 25
26#include <net/pkt_sched.h> 26#include <net/pkt_sched.h>
27 27
28#define VERSION "1.1"
29
28/* Network Emulation Queuing algorithm. 30/* Network Emulation Queuing algorithm.
29 ==================================== 31 ====================================
30 32
@@ -185,10 +187,13 @@ static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch)
185 || q->counter < q->gap /* inside last reordering gap */ 187 || q->counter < q->gap /* inside last reordering gap */
186 || q->reorder < get_crandom(&q->reorder_cor)) { 188 || q->reorder < get_crandom(&q->reorder_cor)) {
187 psched_time_t now; 189 psched_time_t now;
190 psched_tdiff_t delay;
191
192 delay = tabledist(q->latency, q->jitter,
193 &q->delay_cor, q->delay_dist);
194
188 PSCHED_GET_TIME(now); 195 PSCHED_GET_TIME(now);
189 PSCHED_TADD2(now, tabledist(q->latency, q->jitter, 196 PSCHED_TADD2(now, delay, cb->time_to_send);
190 &q->delay_cor, q->delay_dist),
191 cb->time_to_send);
192 ++q->counter; 197 ++q->counter;
193 ret = q->qdisc->enqueue(skb, q->qdisc); 198 ret = q->qdisc->enqueue(skb, q->qdisc);
194 } else { 199 } else {
@@ -248,24 +253,31 @@ static struct sk_buff *netem_dequeue(struct Qdisc *sch)
248 const struct netem_skb_cb *cb 253 const struct netem_skb_cb *cb
249 = (const struct netem_skb_cb *)skb->cb; 254 = (const struct netem_skb_cb *)skb->cb;
250 psched_time_t now; 255 psched_time_t now;
251 long delay;
252 256
253 /* if more time remaining? */ 257 /* if more time remaining? */
254 PSCHED_GET_TIME(now); 258 PSCHED_GET_TIME(now);
255 delay = PSCHED_US2JIFFIE(PSCHED_TDIFF(cb->time_to_send, now)); 259
256 pr_debug("netem_run: skb=%p delay=%ld\n", skb, delay); 260 if (PSCHED_TLESS(cb->time_to_send, now)) {
257 if (delay <= 0) {
258 pr_debug("netem_dequeue: return skb=%p\n", skb); 261 pr_debug("netem_dequeue: return skb=%p\n", skb);
259 sch->q.qlen--; 262 sch->q.qlen--;
260 sch->flags &= ~TCQ_F_THROTTLED; 263 sch->flags &= ~TCQ_F_THROTTLED;
261 return skb; 264 return skb;
262 } 265 } else {
266 psched_tdiff_t delay = PSCHED_TDIFF(cb->time_to_send, now);
267
268 if (q->qdisc->ops->requeue(skb, q->qdisc) != NET_XMIT_SUCCESS) {
269 sch->qstats.drops++;
263 270
264 mod_timer(&q->timer, jiffies + delay); 271 /* After this qlen is confused */
265 sch->flags |= TCQ_F_THROTTLED; 272 printk(KERN_ERR "netem: queue discpline %s could not requeue\n",
273 q->qdisc->ops->id);
266 274
267 if (q->qdisc->ops->requeue(skb, q->qdisc) != 0) 275 sch->q.qlen--;
268 sch->qstats.drops++; 276 }
277
278 mod_timer(&q->timer, jiffies + PSCHED_US2JIFFIE(delay));
279 sch->flags |= TCQ_F_THROTTLED;
280 }
269 } 281 }
270 282
271 return NULL; 283 return NULL;
@@ -290,11 +302,16 @@ static void netem_reset(struct Qdisc *sch)
290 del_timer_sync(&q->timer); 302 del_timer_sync(&q->timer);
291} 303}
292 304
305/* Pass size change message down to embedded FIFO */
293static int set_fifo_limit(struct Qdisc *q, int limit) 306static int set_fifo_limit(struct Qdisc *q, int limit)
294{ 307{
295 struct rtattr *rta; 308 struct rtattr *rta;
296 int ret = -ENOMEM; 309 int ret = -ENOMEM;
297 310
311 /* Hack to avoid sending change message to non-FIFO */
312 if (strncmp(q->ops->id + 1, "fifo", 4) != 0)
313 return 0;
314
298 rta = kmalloc(RTA_LENGTH(sizeof(struct tc_fifo_qopt)), GFP_KERNEL); 315 rta = kmalloc(RTA_LENGTH(sizeof(struct tc_fifo_qopt)), GFP_KERNEL);
299 if (rta) { 316 if (rta) {
300 rta->rta_type = RTM_NEWQDISC; 317 rta->rta_type = RTM_NEWQDISC;
@@ -426,6 +443,84 @@ static int netem_change(struct Qdisc *sch, struct rtattr *opt)
426 return 0; 443 return 0;
427} 444}
428 445
446/*
447 * Special case version of FIFO queue for use by netem.
448 * It queues in order based on timestamps in skb's
449 */
450struct fifo_sched_data {
451 u32 limit;
452};
453
454static int tfifo_enqueue(struct sk_buff *nskb, struct Qdisc *sch)
455{
456 struct fifo_sched_data *q = qdisc_priv(sch);
457 struct sk_buff_head *list = &sch->q;
458 const struct netem_skb_cb *ncb
459 = (const struct netem_skb_cb *)nskb->cb;
460 struct sk_buff *skb;
461
462 if (likely(skb_queue_len(list) < q->limit)) {
463 skb_queue_reverse_walk(list, skb) {
464 const struct netem_skb_cb *cb
465 = (const struct netem_skb_cb *)skb->cb;
466
467 if (PSCHED_TLESS(cb->time_to_send, ncb->time_to_send))
468 break;
469 }
470
471 __skb_queue_after(list, skb, nskb);
472
473 sch->qstats.backlog += nskb->len;
474 sch->bstats.bytes += nskb->len;
475 sch->bstats.packets++;
476
477 return NET_XMIT_SUCCESS;
478 }
479
480 return qdisc_drop(nskb, sch);
481}
482
483static int tfifo_init(struct Qdisc *sch, struct rtattr *opt)
484{
485 struct fifo_sched_data *q = qdisc_priv(sch);
486
487 if (opt) {
488 struct tc_fifo_qopt *ctl = RTA_DATA(opt);
489 if (RTA_PAYLOAD(opt) < sizeof(*ctl))
490 return -EINVAL;
491
492 q->limit = ctl->limit;
493 } else
494 q->limit = max_t(u32, sch->dev->tx_queue_len, 1);
495
496 return 0;
497}
498
499static int tfifo_dump(struct Qdisc *sch, struct sk_buff *skb)
500{
501 struct fifo_sched_data *q = qdisc_priv(sch);
502 struct tc_fifo_qopt opt = { .limit = q->limit };
503
504 RTA_PUT(skb, TCA_OPTIONS, sizeof(opt), &opt);
505 return skb->len;
506
507rtattr_failure:
508 return -1;
509}
510
511static struct Qdisc_ops tfifo_qdisc_ops = {
512 .id = "tfifo",
513 .priv_size = sizeof(struct fifo_sched_data),
514 .enqueue = tfifo_enqueue,
515 .dequeue = qdisc_dequeue_head,
516 .requeue = qdisc_requeue,
517 .drop = qdisc_queue_drop,
518 .init = tfifo_init,
519 .reset = qdisc_reset_queue,
520 .change = tfifo_init,
521 .dump = tfifo_dump,
522};
523
429static int netem_init(struct Qdisc *sch, struct rtattr *opt) 524static int netem_init(struct Qdisc *sch, struct rtattr *opt)
430{ 525{
431 struct netem_sched_data *q = qdisc_priv(sch); 526 struct netem_sched_data *q = qdisc_priv(sch);
@@ -438,7 +533,7 @@ static int netem_init(struct Qdisc *sch, struct rtattr *opt)
438 q->timer.function = netem_watchdog; 533 q->timer.function = netem_watchdog;
439 q->timer.data = (unsigned long) sch; 534 q->timer.data = (unsigned long) sch;
440 535
441 q->qdisc = qdisc_create_dflt(sch->dev, &pfifo_qdisc_ops); 536 q->qdisc = qdisc_create_dflt(sch->dev, &tfifo_qdisc_ops);
442 if (!q->qdisc) { 537 if (!q->qdisc) {
443 pr_debug("netem: qdisc create failed\n"); 538 pr_debug("netem: qdisc create failed\n");
444 return -ENOMEM; 539 return -ENOMEM;
@@ -601,6 +696,7 @@ static struct Qdisc_ops netem_qdisc_ops = {
601 696
602static int __init netem_module_init(void) 697static int __init netem_module_init(void)
603{ 698{
699 pr_info("netem: version " VERSION "\n");
604 return register_qdisc(&netem_qdisc_ops); 700 return register_qdisc(&netem_qdisc_ops);
605} 701}
606static void __exit netem_module_exit(void) 702static void __exit netem_module_exit(void)
diff --git a/net/sched/sch_red.c b/net/sched/sch_red.c
index 7845d045ee..dccfa44c2d 100644
--- a/net/sched/sch_red.c
+++ b/net/sched/sch_red.c
@@ -9,76 +9,23 @@
9 * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru> 9 * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
10 * 10 *
11 * Changes: 11 * Changes:
12 * J Hadi Salim <hadi@nortel.com> 980914: computation fixes 12 * J Hadi Salim 980914: computation fixes
13 * Alexey Makarenko <makar@phoenix.kharkov.ua> 990814: qave on idle link was calculated incorrectly. 13 * Alexey Makarenko <makar@phoenix.kharkov.ua> 990814: qave on idle link was calculated incorrectly.
14 * J Hadi Salim <hadi@nortelnetworks.com> 980816: ECN support 14 * J Hadi Salim 980816: ECN support
15 */ 15 */
16 16
17#include <linux/config.h> 17#include <linux/config.h>
18#include <linux/module.h> 18#include <linux/module.h>
19#include <asm/uaccess.h>
20#include <asm/system.h>
21#include <linux/bitops.h>
22#include <linux/types.h> 19#include <linux/types.h>
23#include <linux/kernel.h> 20#include <linux/kernel.h>
24#include <linux/sched.h>
25#include <linux/string.h>
26#include <linux/mm.h>
27#include <linux/socket.h>
28#include <linux/sockios.h>
29#include <linux/in.h>
30#include <linux/errno.h>
31#include <linux/interrupt.h>
32#include <linux/if_ether.h>
33#include <linux/inet.h>
34#include <linux/netdevice.h> 21#include <linux/netdevice.h>
35#include <linux/etherdevice.h>
36#include <linux/notifier.h>
37#include <net/ip.h>
38#include <net/route.h>
39#include <linux/skbuff.h> 22#include <linux/skbuff.h>
40#include <net/sock.h>
41#include <net/pkt_sched.h> 23#include <net/pkt_sched.h>
42#include <net/inet_ecn.h> 24#include <net/inet_ecn.h>
43#include <net/dsfield.h> 25#include <net/red.h>
44 26
45 27
46/* Random Early Detection (RED) algorithm. 28/* Parameters, settable by user:
47 =======================================
48
49 Source: Sally Floyd and Van Jacobson, "Random Early Detection Gateways
50 for Congestion Avoidance", 1993, IEEE/ACM Transactions on Networking.
51
52 This file codes a "divisionless" version of RED algorithm
53 as written down in Fig.17 of the paper.
54
55Short description.
56------------------
57
58 When a new packet arrives we calculate the average queue length:
59
60 avg = (1-W)*avg + W*current_queue_len,
61
62 W is the filter time constant (chosen as 2^(-Wlog)), it controls
63 the inertia of the algorithm. To allow larger bursts, W should be
64 decreased.
65
66 if (avg > th_max) -> packet marked (dropped).
67 if (avg < th_min) -> packet passes.
68 if (th_min < avg < th_max) we calculate probability:
69
70 Pb = max_P * (avg - th_min)/(th_max-th_min)
71
72 and mark (drop) packet with this probability.
73 Pb changes from 0 (at avg==th_min) to max_P (avg==th_max).
74 max_P should be small (not 1), usually 0.01..0.02 is good value.
75
76 max_P is chosen as a number, so that max_P/(th_max-th_min)
77 is a negative power of two in order arithmetics to contain
78 only shifts.
79
80
81 Parameters, settable by user:
82 ----------------------------- 29 -----------------------------
83 30
84 limit - bytes (must be > qth_max + burst) 31 limit - bytes (must be > qth_max + burst)
@@ -89,243 +36,93 @@ Short description.
89 arbitrarily high (well, less than ram size) 36 arbitrarily high (well, less than ram size)
90 Really, this limit will never be reached 37 Really, this limit will never be reached
91 if RED works correctly. 38 if RED works correctly.
92
93 qth_min - bytes (should be < qth_max/2)
94 qth_max - bytes (should be at least 2*qth_min and less limit)
95 Wlog - bits (<32) log(1/W).
96 Plog - bits (<32)
97
98 Plog is related to max_P by formula:
99
100 max_P = (qth_max-qth_min)/2^Plog;
101
102 F.e. if qth_max=128K and qth_min=32K, then Plog=22
103 corresponds to max_P=0.02
104
105 Scell_log
106 Stab
107
108 Lookup table for log((1-W)^(t/t_ave).
109
110
111NOTES:
112
113Upper bound on W.
114-----------------
115
116 If you want to allow bursts of L packets of size S,
117 you should choose W:
118
119 L + 1 - th_min/S < (1-(1-W)^L)/W
120
121 th_min/S = 32 th_min/S = 4
122
123 log(W) L
124 -1 33
125 -2 35
126 -3 39
127 -4 46
128 -5 57
129 -6 75
130 -7 101
131 -8 135
132 -9 190
133 etc.
134 */ 39 */
135 40
136struct red_sched_data 41struct red_sched_data
137{ 42{
138/* Parameters */ 43 u32 limit; /* HARD maximal queue length */
139 u32 limit; /* HARD maximal queue length */ 44 unsigned char flags;
140 u32 qth_min; /* Min average length threshold: A scaled */ 45 struct red_parms parms;
141 u32 qth_max; /* Max average length threshold: A scaled */ 46 struct red_stats stats;
142 u32 Rmask;
143 u32 Scell_max;
144 unsigned char flags;
145 char Wlog; /* log(W) */
146 char Plog; /* random number bits */
147 char Scell_log;
148 u8 Stab[256];
149
150/* Variables */
151 unsigned long qave; /* Average queue length: A scaled */
152 int qcount; /* Packets since last random number generation */
153 u32 qR; /* Cached random number */
154
155 psched_time_t qidlestart; /* Start of idle period */
156 struct tc_red_xstats st;
157}; 47};
158 48
159static int red_ecn_mark(struct sk_buff *skb) 49static inline int red_use_ecn(struct red_sched_data *q)
160{ 50{
161 if (skb->nh.raw + 20 > skb->tail) 51 return q->flags & TC_RED_ECN;
162 return 0;
163
164 switch (skb->protocol) {
165 case __constant_htons(ETH_P_IP):
166 if (INET_ECN_is_not_ect(skb->nh.iph->tos))
167 return 0;
168 IP_ECN_set_ce(skb->nh.iph);
169 return 1;
170 case __constant_htons(ETH_P_IPV6):
171 if (INET_ECN_is_not_ect(ipv6_get_dsfield(skb->nh.ipv6h)))
172 return 0;
173 IP6_ECN_set_ce(skb->nh.ipv6h);
174 return 1;
175 default:
176 return 0;
177 }
178} 52}
179 53
180static int 54static inline int red_use_harddrop(struct red_sched_data *q)
181red_enqueue(struct sk_buff *skb, struct Qdisc* sch) 55{
56 return q->flags & TC_RED_HARDDROP;
57}
58
59static int red_enqueue(struct sk_buff *skb, struct Qdisc* sch)
182{ 60{
183 struct red_sched_data *q = qdisc_priv(sch); 61 struct red_sched_data *q = qdisc_priv(sch);
184 62
185 psched_time_t now; 63 q->parms.qavg = red_calc_qavg(&q->parms, sch->qstats.backlog);
186 64
187 if (!PSCHED_IS_PASTPERFECT(q->qidlestart)) { 65 if (red_is_idling(&q->parms))
188 long us_idle; 66 red_end_of_idle_period(&q->parms);
189 int shift;
190 67
191 PSCHED_GET_TIME(now); 68 switch (red_action(&q->parms, q->parms.qavg)) {
192 us_idle = PSCHED_TDIFF_SAFE(now, q->qidlestart, q->Scell_max); 69 case RED_DONT_MARK:
193 PSCHED_SET_PASTPERFECT(q->qidlestart); 70 break;
194 71
195/* 72 case RED_PROB_MARK:
196 The problem: ideally, average length queue recalcultion should 73 sch->qstats.overlimits++;
197 be done over constant clock intervals. This is too expensive, so that 74 if (!red_use_ecn(q) || !INET_ECN_set_ce(skb)) {
198 the calculation is driven by outgoing packets. 75 q->stats.prob_drop++;
199 When the queue is idle we have to model this clock by hand. 76 goto congestion_drop;
200 77 }
201 SF+VJ proposed to "generate" m = idletime/(average_pkt_size/bandwidth)
202 dummy packets as a burst after idle time, i.e.
203
204 q->qave *= (1-W)^m
205
206 This is an apparently overcomplicated solution (f.e. we have to precompute
207 a table to make this calculation in reasonable time)
208 I believe that a simpler model may be used here,
209 but it is field for experiments.
210*/
211 shift = q->Stab[us_idle>>q->Scell_log];
212
213 if (shift) {
214 q->qave >>= shift;
215 } else {
216 /* Approximate initial part of exponent
217 with linear function:
218 (1-W)^m ~= 1-mW + ...
219
220 Seems, it is the best solution to
221 problem of too coarce exponent tabulation.
222 */
223
224 us_idle = (q->qave * us_idle)>>q->Scell_log;
225 if (us_idle < q->qave/2)
226 q->qave -= us_idle;
227 else
228 q->qave >>= 1;
229 }
230 } else {
231 q->qave += sch->qstats.backlog - (q->qave >> q->Wlog);
232 /* NOTE:
233 q->qave is fixed point number with point at Wlog.
234 The formulae above is equvalent to floating point
235 version:
236
237 qave = qave*(1-W) + sch->qstats.backlog*W;
238 --ANK (980924)
239 */
240 }
241 78
242 if (q->qave < q->qth_min) { 79 q->stats.prob_mark++;
243 q->qcount = -1; 80 break;
244enqueue: 81
245 if (sch->qstats.backlog + skb->len <= q->limit) { 82 case RED_HARD_MARK:
246 __skb_queue_tail(&sch->q, skb); 83 sch->qstats.overlimits++;
247 sch->qstats.backlog += skb->len; 84 if (red_use_harddrop(q) || !red_use_ecn(q) ||
248 sch->bstats.bytes += skb->len; 85 !INET_ECN_set_ce(skb)) {
249 sch->bstats.packets++; 86 q->stats.forced_drop++;
250 return NET_XMIT_SUCCESS; 87 goto congestion_drop;
251 } else { 88 }
252 q->st.pdrop++;
253 }
254 kfree_skb(skb);
255 sch->qstats.drops++;
256 return NET_XMIT_DROP;
257 }
258 if (q->qave >= q->qth_max) {
259 q->qcount = -1;
260 sch->qstats.overlimits++;
261mark:
262 if (!(q->flags&TC_RED_ECN) || !red_ecn_mark(skb)) {
263 q->st.early++;
264 goto drop;
265 }
266 q->st.marked++;
267 goto enqueue;
268 }
269 89
270 if (++q->qcount) { 90 q->stats.forced_mark++;
271 /* The formula used below causes questions. 91 break;
272
273 OK. qR is random number in the interval 0..Rmask
274 i.e. 0..(2^Plog). If we used floating point
275 arithmetics, it would be: (2^Plog)*rnd_num,
276 where rnd_num is less 1.
277
278 Taking into account, that qave have fixed
279 point at Wlog, and Plog is related to max_P by
280 max_P = (qth_max-qth_min)/2^Plog; two lines
281 below have the following floating point equivalent:
282
283 max_P*(qave - qth_min)/(qth_max-qth_min) < rnd/qcount
284
285 Any questions? --ANK (980924)
286 */
287 if (((q->qave - q->qth_min)>>q->Wlog)*q->qcount < q->qR)
288 goto enqueue;
289 q->qcount = 0;
290 q->qR = net_random()&q->Rmask;
291 sch->qstats.overlimits++;
292 goto mark;
293 } 92 }
294 q->qR = net_random()&q->Rmask;
295 goto enqueue;
296 93
297drop: 94 if (sch->qstats.backlog + skb->len <= q->limit)
298 kfree_skb(skb); 95 return qdisc_enqueue_tail(skb, sch);
299 sch->qstats.drops++; 96
97 q->stats.pdrop++;
98 return qdisc_drop(skb, sch);
99
100congestion_drop:
101 qdisc_drop(skb, sch);
300 return NET_XMIT_CN; 102 return NET_XMIT_CN;
301} 103}
302 104
303static int 105static int red_requeue(struct sk_buff *skb, struct Qdisc* sch)
304red_requeue(struct sk_buff *skb, struct Qdisc* sch)
305{ 106{
306 struct red_sched_data *q = qdisc_priv(sch); 107 struct red_sched_data *q = qdisc_priv(sch);
307 108
308 PSCHED_SET_PASTPERFECT(q->qidlestart); 109 if (red_is_idling(&q->parms))
110 red_end_of_idle_period(&q->parms);
309 111
310 __skb_queue_head(&sch->q, skb); 112 return qdisc_requeue(skb, sch);
311 sch->qstats.backlog += skb->len;
312 sch->qstats.requeues++;
313 return 0;
314} 113}
315 114
316static struct sk_buff * 115static struct sk_buff * red_dequeue(struct Qdisc* sch)
317red_dequeue(struct Qdisc* sch)
318{ 116{
319 struct sk_buff *skb; 117 struct sk_buff *skb;
320 struct red_sched_data *q = qdisc_priv(sch); 118 struct red_sched_data *q = qdisc_priv(sch);
321 119
322 skb = __skb_dequeue(&sch->q); 120 skb = qdisc_dequeue_head(sch);
323 if (skb) { 121
324 sch->qstats.backlog -= skb->len; 122 if (skb == NULL && !red_is_idling(&q->parms))
325 return skb; 123 red_start_of_idle_period(&q->parms);
326 } 124
327 PSCHED_GET_TIME(q->qidlestart); 125 return skb;
328 return NULL;
329} 126}
330 127
331static unsigned int red_drop(struct Qdisc* sch) 128static unsigned int red_drop(struct Qdisc* sch)
@@ -333,16 +130,17 @@ static unsigned int red_drop(struct Qdisc* sch)
333 struct sk_buff *skb; 130 struct sk_buff *skb;
334 struct red_sched_data *q = qdisc_priv(sch); 131 struct red_sched_data *q = qdisc_priv(sch);
335 132
336 skb = __skb_dequeue_tail(&sch->q); 133 skb = qdisc_dequeue_tail(sch);
337 if (skb) { 134 if (skb) {
338 unsigned int len = skb->len; 135 unsigned int len = skb->len;
339 sch->qstats.backlog -= len; 136 q->stats.other++;
340 sch->qstats.drops++; 137 qdisc_drop(skb, sch);
341 q->st.other++;
342 kfree_skb(skb);
343 return len; 138 return len;
344 } 139 }
345 PSCHED_GET_TIME(q->qidlestart); 140
141 if (!red_is_idling(&q->parms))
142 red_start_of_idle_period(&q->parms);
143
346 return 0; 144 return 0;
347} 145}
348 146
@@ -350,43 +148,38 @@ static void red_reset(struct Qdisc* sch)
350{ 148{
351 struct red_sched_data *q = qdisc_priv(sch); 149 struct red_sched_data *q = qdisc_priv(sch);
352 150
353 __skb_queue_purge(&sch->q); 151 qdisc_reset_queue(sch);
354 sch->qstats.backlog = 0; 152 red_restart(&q->parms);
355 PSCHED_SET_PASTPERFECT(q->qidlestart);
356 q->qave = 0;
357 q->qcount = -1;
358} 153}
359 154
360static int red_change(struct Qdisc *sch, struct rtattr *opt) 155static int red_change(struct Qdisc *sch, struct rtattr *opt)
361{ 156{
362 struct red_sched_data *q = qdisc_priv(sch); 157 struct red_sched_data *q = qdisc_priv(sch);
363 struct rtattr *tb[TCA_RED_STAB]; 158 struct rtattr *tb[TCA_RED_MAX];
364 struct tc_red_qopt *ctl; 159 struct tc_red_qopt *ctl;
365 160
366 if (opt == NULL || 161 if (opt == NULL || rtattr_parse_nested(tb, TCA_RED_MAX, opt))
367 rtattr_parse_nested(tb, TCA_RED_STAB, opt) || 162 return -EINVAL;
368 tb[TCA_RED_PARMS-1] == 0 || tb[TCA_RED_STAB-1] == 0 || 163
164 if (tb[TCA_RED_PARMS-1] == NULL ||
369 RTA_PAYLOAD(tb[TCA_RED_PARMS-1]) < sizeof(*ctl) || 165 RTA_PAYLOAD(tb[TCA_RED_PARMS-1]) < sizeof(*ctl) ||
370 RTA_PAYLOAD(tb[TCA_RED_STAB-1]) < 256) 166 tb[TCA_RED_STAB-1] == NULL ||
167 RTA_PAYLOAD(tb[TCA_RED_STAB-1]) < RED_STAB_SIZE)
371 return -EINVAL; 168 return -EINVAL;
372 169
373 ctl = RTA_DATA(tb[TCA_RED_PARMS-1]); 170 ctl = RTA_DATA(tb[TCA_RED_PARMS-1]);
374 171
375 sch_tree_lock(sch); 172 sch_tree_lock(sch);
376 q->flags = ctl->flags; 173 q->flags = ctl->flags;
377 q->Wlog = ctl->Wlog;
378 q->Plog = ctl->Plog;
379 q->Rmask = ctl->Plog < 32 ? ((1<<ctl->Plog) - 1) : ~0UL;
380 q->Scell_log = ctl->Scell_log;
381 q->Scell_max = (255<<q->Scell_log);
382 q->qth_min = ctl->qth_min<<ctl->Wlog;
383 q->qth_max = ctl->qth_max<<ctl->Wlog;
384 q->limit = ctl->limit; 174 q->limit = ctl->limit;
385 memcpy(q->Stab, RTA_DATA(tb[TCA_RED_STAB-1]), 256);
386 175
387 q->qcount = -1; 176 red_set_parms(&q->parms, ctl->qth_min, ctl->qth_max, ctl->Wlog,
177 ctl->Plog, ctl->Scell_log,
178 RTA_DATA(tb[TCA_RED_STAB-1]));
179
388 if (skb_queue_empty(&sch->q)) 180 if (skb_queue_empty(&sch->q))
389 PSCHED_SET_PASTPERFECT(q->qidlestart); 181 red_end_of_idle_period(&q->parms);
182
390 sch_tree_unlock(sch); 183 sch_tree_unlock(sch);
391 return 0; 184 return 0;
392} 185}
@@ -399,39 +192,39 @@ static int red_init(struct Qdisc* sch, struct rtattr *opt)
399static int red_dump(struct Qdisc *sch, struct sk_buff *skb) 192static int red_dump(struct Qdisc *sch, struct sk_buff *skb)
400{ 193{
401 struct red_sched_data *q = qdisc_priv(sch); 194 struct red_sched_data *q = qdisc_priv(sch);
402 unsigned char *b = skb->tail; 195 struct rtattr *opts = NULL;
403 struct rtattr *rta; 196 struct tc_red_qopt opt = {
404 struct tc_red_qopt opt; 197 .limit = q->limit,
405 198 .flags = q->flags,
406 rta = (struct rtattr*)b; 199 .qth_min = q->parms.qth_min >> q->parms.Wlog,
407 RTA_PUT(skb, TCA_OPTIONS, 0, NULL); 200 .qth_max = q->parms.qth_max >> q->parms.Wlog,
408 opt.limit = q->limit; 201 .Wlog = q->parms.Wlog,
409 opt.qth_min = q->qth_min>>q->Wlog; 202 .Plog = q->parms.Plog,
410 opt.qth_max = q->qth_max>>q->Wlog; 203 .Scell_log = q->parms.Scell_log,
411 opt.Wlog = q->Wlog; 204 };
412 opt.Plog = q->Plog; 205
413 opt.Scell_log = q->Scell_log; 206 opts = RTA_NEST(skb, TCA_OPTIONS);
414 opt.flags = q->flags;
415 RTA_PUT(skb, TCA_RED_PARMS, sizeof(opt), &opt); 207 RTA_PUT(skb, TCA_RED_PARMS, sizeof(opt), &opt);
416 rta->rta_len = skb->tail - b; 208 return RTA_NEST_END(skb, opts);
417
418 return skb->len;
419 209
420rtattr_failure: 210rtattr_failure:
421 skb_trim(skb, b - skb->data); 211 return RTA_NEST_CANCEL(skb, opts);
422 return -1;
423} 212}
424 213
425static int red_dump_stats(struct Qdisc *sch, struct gnet_dump *d) 214static int red_dump_stats(struct Qdisc *sch, struct gnet_dump *d)
426{ 215{
427 struct red_sched_data *q = qdisc_priv(sch); 216 struct red_sched_data *q = qdisc_priv(sch);
428 217 struct tc_red_xstats st = {
429 return gnet_stats_copy_app(d, &q->st, sizeof(q->st)); 218 .early = q->stats.prob_drop + q->stats.forced_drop,
219 .pdrop = q->stats.pdrop,
220 .other = q->stats.other,
221 .marked = q->stats.prob_mark + q->stats.forced_mark,
222 };
223
224 return gnet_stats_copy_app(d, &st, sizeof(st));
430} 225}
431 226
432static struct Qdisc_ops red_qdisc_ops = { 227static struct Qdisc_ops red_qdisc_ops = {
433 .next = NULL,
434 .cl_ops = NULL,
435 .id = "red", 228 .id = "red",
436 .priv_size = sizeof(struct red_sched_data), 229 .priv_size = sizeof(struct red_sched_data),
437 .enqueue = red_enqueue, 230 .enqueue = red_enqueue,
@@ -450,10 +243,13 @@ static int __init red_module_init(void)
450{ 243{
451 return register_qdisc(&red_qdisc_ops); 244 return register_qdisc(&red_qdisc_ops);
452} 245}
453static void __exit red_module_exit(void) 246
247static void __exit red_module_exit(void)
454{ 248{
455 unregister_qdisc(&red_qdisc_ops); 249 unregister_qdisc(&red_qdisc_ops);
456} 250}
251
457module_init(red_module_init) 252module_init(red_module_init)
458module_exit(red_module_exit) 253module_exit(red_module_exit)
254
459MODULE_LICENSE("GPL"); 255MODULE_LICENSE("GPL");
diff --git a/net/sunrpc/auth.c b/net/sunrpc/auth.c
index a415d99c39..8c7756036e 100644
--- a/net/sunrpc/auth.c
+++ b/net/sunrpc/auth.c
@@ -299,11 +299,10 @@ put_rpccred(struct rpc_cred *cred)
299void 299void
300rpcauth_unbindcred(struct rpc_task *task) 300rpcauth_unbindcred(struct rpc_task *task)
301{ 301{
302 struct rpc_auth *auth = task->tk_auth;
303 struct rpc_cred *cred = task->tk_msg.rpc_cred; 302 struct rpc_cred *cred = task->tk_msg.rpc_cred;
304 303
305 dprintk("RPC: %4d releasing %s cred %p\n", 304 dprintk("RPC: %4d releasing %s cred %p\n",
306 task->tk_pid, auth->au_ops->au_name, cred); 305 task->tk_pid, task->tk_auth->au_ops->au_name, cred);
307 306
308 put_rpccred(cred); 307 put_rpccred(cred);
309 task->tk_msg.rpc_cred = NULL; 308 task->tk_msg.rpc_cred = NULL;
@@ -312,22 +311,22 @@ rpcauth_unbindcred(struct rpc_task *task)
312u32 * 311u32 *
313rpcauth_marshcred(struct rpc_task *task, u32 *p) 312rpcauth_marshcred(struct rpc_task *task, u32 *p)
314{ 313{
315 struct rpc_auth *auth = task->tk_auth;
316 struct rpc_cred *cred = task->tk_msg.rpc_cred; 314 struct rpc_cred *cred = task->tk_msg.rpc_cred;
317 315
318 dprintk("RPC: %4d marshaling %s cred %p\n", 316 dprintk("RPC: %4d marshaling %s cred %p\n",
319 task->tk_pid, auth->au_ops->au_name, cred); 317 task->tk_pid, task->tk_auth->au_ops->au_name, cred);
318
320 return cred->cr_ops->crmarshal(task, p); 319 return cred->cr_ops->crmarshal(task, p);
321} 320}
322 321
323u32 * 322u32 *
324rpcauth_checkverf(struct rpc_task *task, u32 *p) 323rpcauth_checkverf(struct rpc_task *task, u32 *p)
325{ 324{
326 struct rpc_auth *auth = task->tk_auth;
327 struct rpc_cred *cred = task->tk_msg.rpc_cred; 325 struct rpc_cred *cred = task->tk_msg.rpc_cred;
328 326
329 dprintk("RPC: %4d validating %s cred %p\n", 327 dprintk("RPC: %4d validating %s cred %p\n",
330 task->tk_pid, auth->au_ops->au_name, cred); 328 task->tk_pid, task->tk_auth->au_ops->au_name, cred);
329
331 return cred->cr_ops->crvalidate(task, p); 330 return cred->cr_ops->crvalidate(task, p);
332} 331}
333 332
@@ -363,12 +362,12 @@ rpcauth_unwrap_resp(struct rpc_task *task, kxdrproc_t decode, void *rqstp,
363int 362int
364rpcauth_refreshcred(struct rpc_task *task) 363rpcauth_refreshcred(struct rpc_task *task)
365{ 364{
366 struct rpc_auth *auth = task->tk_auth;
367 struct rpc_cred *cred = task->tk_msg.rpc_cred; 365 struct rpc_cred *cred = task->tk_msg.rpc_cred;
368 int err; 366 int err;
369 367
370 dprintk("RPC: %4d refreshing %s cred %p\n", 368 dprintk("RPC: %4d refreshing %s cred %p\n",
371 task->tk_pid, auth->au_ops->au_name, cred); 369 task->tk_pid, task->tk_auth->au_ops->au_name, cred);
370
372 err = cred->cr_ops->crrefresh(task); 371 err = cred->cr_ops->crrefresh(task);
373 if (err < 0) 372 if (err < 0)
374 task->tk_status = err; 373 task->tk_status = err;
diff --git a/net/sunrpc/sunrpc_syms.c b/net/sunrpc/sunrpc_syms.c
index 2387e7b823..a03d4b600c 100644
--- a/net/sunrpc/sunrpc_syms.c
+++ b/net/sunrpc/sunrpc_syms.c
@@ -63,8 +63,6 @@ EXPORT_SYMBOL(rpc_mkpipe);
63/* Client transport */ 63/* Client transport */
64EXPORT_SYMBOL(xprt_create_proto); 64EXPORT_SYMBOL(xprt_create_proto);
65EXPORT_SYMBOL(xprt_set_timeout); 65EXPORT_SYMBOL(xprt_set_timeout);
66EXPORT_SYMBOL(xprt_udp_slot_table_entries);
67EXPORT_SYMBOL(xprt_tcp_slot_table_entries);
68 66
69/* Client credential cache */ 67/* Client credential cache */
70EXPORT_SYMBOL(rpcauth_register); 68EXPORT_SYMBOL(rpcauth_register);
diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c
index e9bd91265f..5a220b2bb3 100644
--- a/net/sunrpc/svc.c
+++ b/net/sunrpc/svc.c
@@ -313,6 +313,11 @@ svc_process(struct svc_serv *serv, struct svc_rqst *rqstp)
313 rqstp->rq_proc = proc = ntohl(svc_getu32(argv)); /* procedure number */ 313 rqstp->rq_proc = proc = ntohl(svc_getu32(argv)); /* procedure number */
314 314
315 progp = serv->sv_program; 315 progp = serv->sv_program;
316
317 for (progp = serv->sv_program; progp; progp = progp->pg_next)
318 if (prog == progp->pg_prog)
319 break;
320
316 /* 321 /*
317 * Decode auth data, and add verifier to reply buffer. 322 * Decode auth data, and add verifier to reply buffer.
318 * We do this before anything else in order to get a decent 323 * We do this before anything else in order to get a decent
@@ -320,7 +325,7 @@ svc_process(struct svc_serv *serv, struct svc_rqst *rqstp)
320 */ 325 */
321 auth_res = svc_authenticate(rqstp, &auth_stat); 326 auth_res = svc_authenticate(rqstp, &auth_stat);
322 /* Also give the program a chance to reject this call: */ 327 /* Also give the program a chance to reject this call: */
323 if (auth_res == SVC_OK) { 328 if (auth_res == SVC_OK && progp) {
324 auth_stat = rpc_autherr_badcred; 329 auth_stat = rpc_autherr_badcred;
325 auth_res = progp->pg_authenticate(rqstp); 330 auth_res = progp->pg_authenticate(rqstp);
326 } 331 }
@@ -340,10 +345,7 @@ svc_process(struct svc_serv *serv, struct svc_rqst *rqstp)
340 case SVC_COMPLETE: 345 case SVC_COMPLETE:
341 goto sendit; 346 goto sendit;
342 } 347 }
343 348
344 for (progp = serv->sv_program; progp; progp = progp->pg_next)
345 if (prog == progp->pg_prog)
346 break;
347 if (progp == NULL) 349 if (progp == NULL)
348 goto err_bad_prog; 350 goto err_bad_prog;
349 351
diff --git a/net/sunrpc/sysctl.c b/net/sunrpc/sysctl.c
index d0c9f460e4..1065904841 100644
--- a/net/sunrpc/sysctl.c
+++ b/net/sunrpc/sysctl.c
@@ -119,13 +119,6 @@ done:
119 return 0; 119 return 0;
120} 120}
121 121
122unsigned int xprt_udp_slot_table_entries = RPC_DEF_SLOT_TABLE;
123unsigned int xprt_tcp_slot_table_entries = RPC_DEF_SLOT_TABLE;
124unsigned int xprt_min_resvport = RPC_DEF_MIN_RESVPORT;
125EXPORT_SYMBOL(xprt_min_resvport);
126unsigned int xprt_max_resvport = RPC_DEF_MAX_RESVPORT;
127EXPORT_SYMBOL(xprt_max_resvport);
128
129 122
130static unsigned int min_slot_table_size = RPC_MIN_SLOT_TABLE; 123static unsigned int min_slot_table_size = RPC_MIN_SLOT_TABLE;
131static unsigned int max_slot_table_size = RPC_MAX_SLOT_TABLE; 124static unsigned int max_slot_table_size = RPC_MAX_SLOT_TABLE;
diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c
index 2e1529217e..0a51fd46a8 100644
--- a/net/sunrpc/xprtsock.c
+++ b/net/sunrpc/xprtsock.c
@@ -36,6 +36,15 @@
36#include <net/tcp.h> 36#include <net/tcp.h>
37 37
38/* 38/*
39 * xprtsock tunables
40 */
41unsigned int xprt_udp_slot_table_entries = RPC_DEF_SLOT_TABLE;
42unsigned int xprt_tcp_slot_table_entries = RPC_DEF_SLOT_TABLE;
43
44unsigned int xprt_min_resvport = RPC_DEF_MIN_RESVPORT;
45unsigned int xprt_max_resvport = RPC_DEF_MAX_RESVPORT;
46
47/*
39 * How many times to try sending a request on a socket before waiting 48 * How many times to try sending a request on a socket before waiting
40 * for the socket buffer to clear. 49 * for the socket buffer to clear.
41 */ 50 */