diff options
| author | Arnaldo Carvalho de Melo <acme@mandriva.com> | 2005-11-05 19:30:29 -0500 |
|---|---|---|
| committer | Arnaldo Carvalho de Melo <acme@mandriva.com> | 2005-11-05 19:30:29 -0500 |
| commit | 2d43f1128a4282fbe8442f40b4cbbac05d8f10aa (patch) | |
| tree | 788e5a970c3efb090b73cef0de32aae25444b734 | |
| parent | 6df716340da3a6fdd33d73d7ed4c6f7590ca1c42 (diff) | |
| parent | bdc450a0bb1d48144ced1f899cc8366ec8e85024 (diff) | |
Merge branch 'red' of 84.73.165.173:/home/tgr/repos/net-2.6
| -rw-r--r-- | include/linux/pkt_sched.h | 50 | ||||
| -rw-r--r-- | include/net/inet_ecn.h | 28 | ||||
| -rw-r--r-- | include/net/red.h | 325 | ||||
| -rw-r--r-- | net/sched/sch_gred.c | 841 | ||||
| -rw-r--r-- | net/sched/sch_red.c | 418 |
5 files changed, 891 insertions, 771 deletions
diff --git a/include/linux/pkt_sched.h b/include/linux/pkt_sched.h index 60ffcb9c5791..e87b233615b3 100644 --- a/include/linux/pkt_sched.h +++ b/include/linux/pkt_sched.h | |||
| @@ -93,6 +93,7 @@ struct tc_fifo_qopt | |||
| 93 | /* PRIO section */ | 93 | /* PRIO section */ |
| 94 | 94 | ||
| 95 | #define TCQ_PRIO_BANDS 16 | 95 | #define TCQ_PRIO_BANDS 16 |
| 96 | #define TCQ_MIN_PRIO_BANDS 2 | ||
| 96 | 97 | ||
| 97 | struct tc_prio_qopt | 98 | struct tc_prio_qopt |
| 98 | { | 99 | { |
| @@ -169,6 +170,7 @@ struct tc_red_qopt | |||
| 169 | unsigned char Scell_log; /* cell size for idle damping */ | 170 | unsigned char Scell_log; /* cell size for idle damping */ |
| 170 | unsigned char flags; | 171 | unsigned char flags; |
| 171 | #define TC_RED_ECN 1 | 172 | #define TC_RED_ECN 1 |
| 173 | #define TC_RED_HARDDROP 2 | ||
| 172 | }; | 174 | }; |
| 173 | 175 | ||
| 174 | struct tc_red_xstats | 176 | struct tc_red_xstats |
| @@ -194,38 +196,34 @@ enum | |||
| 194 | 196 | ||
| 195 | #define TCA_GRED_MAX (__TCA_GRED_MAX - 1) | 197 | #define TCA_GRED_MAX (__TCA_GRED_MAX - 1) |
| 196 | 198 | ||
| 197 | #define TCA_SET_OFF TCA_GRED_PARMS | ||
| 198 | struct tc_gred_qopt | 199 | struct tc_gred_qopt |
| 199 | { | 200 | { |
| 200 | __u32 limit; /* HARD maximal queue length (bytes) | 201 | __u32 limit; /* HARD maximal queue length (bytes) */ |
| 201 | */ | 202 | __u32 qth_min; /* Min average length threshold (bytes) */ |
| 202 | __u32 qth_min; /* Min average length threshold (bytes) | 203 | __u32 qth_max; /* Max average length threshold (bytes) */ |
| 203 | */ | 204 | __u32 DP; /* upto 2^32 DPs */ |
| 204 | __u32 qth_max; /* Max average length threshold (bytes) | 205 | __u32 backlog; |
| 205 | */ | 206 | __u32 qave; |
| 206 | __u32 DP; /* upto 2^32 DPs */ | 207 | __u32 forced; |
| 207 | __u32 backlog; | 208 | __u32 early; |
| 208 | __u32 qave; | 209 | __u32 other; |
| 209 | __u32 forced; | 210 | __u32 pdrop; |
| 210 | __u32 early; | 211 | __u8 Wlog; /* log(W) */ |
| 211 | __u32 other; | 212 | __u8 Plog; /* log(P_max/(qth_max-qth_min)) */ |
| 212 | __u32 pdrop; | 213 | __u8 Scell_log; /* cell size for idle damping */ |
| 213 | 214 | __u8 prio; /* prio of this VQ */ | |
| 214 | unsigned char Wlog; /* log(W) */ | 215 | __u32 packets; |
| 215 | unsigned char Plog; /* log(P_max/(qth_max-qth_min)) */ | 216 | __u32 bytesin; |
| 216 | unsigned char Scell_log; /* cell size for idle damping */ | ||
| 217 | __u8 prio; /* prio of this VQ */ | ||
| 218 | __u32 packets; | ||
| 219 | __u32 bytesin; | ||
| 220 | }; | 217 | }; |
| 218 | |||
| 221 | /* gred setup */ | 219 | /* gred setup */ |
| 222 | struct tc_gred_sopt | 220 | struct tc_gred_sopt |
| 223 | { | 221 | { |
| 224 | __u32 DPs; | 222 | __u32 DPs; |
| 225 | __u32 def_DP; | 223 | __u32 def_DP; |
| 226 | __u8 grio; | 224 | __u8 grio; |
| 227 | __u8 pad1; | 225 | __u8 flags; |
| 228 | __u16 pad2; | 226 | __u16 pad1; |
| 229 | }; | 227 | }; |
| 230 | 228 | ||
| 231 | /* HTB section */ | 229 | /* HTB section */ |
diff --git a/include/net/inet_ecn.h b/include/net/inet_ecn.h index f87845e2e965..b0c47e2eccf1 100644 --- a/include/net/inet_ecn.h +++ b/include/net/inet_ecn.h | |||
| @@ -2,6 +2,7 @@ | |||
| 2 | #define _INET_ECN_H_ | 2 | #define _INET_ECN_H_ |
| 3 | 3 | ||
| 4 | #include <linux/ip.h> | 4 | #include <linux/ip.h> |
| 5 | #include <linux/skbuff.h> | ||
| 5 | #include <net/dsfield.h> | 6 | #include <net/dsfield.h> |
| 6 | 7 | ||
| 7 | enum { | 8 | enum { |
| @@ -48,7 +49,7 @@ static inline __u8 INET_ECN_encapsulate(__u8 outer, __u8 inner) | |||
| 48 | (label) |= __constant_htons(INET_ECN_ECT_0 << 4); \ | 49 | (label) |= __constant_htons(INET_ECN_ECT_0 << 4); \ |
| 49 | } while (0) | 50 | } while (0) |
| 50 | 51 | ||
| 51 | static inline void IP_ECN_set_ce(struct iphdr *iph) | 52 | static inline int IP_ECN_set_ce(struct iphdr *iph) |
| 52 | { | 53 | { |
| 53 | u32 check = iph->check; | 54 | u32 check = iph->check; |
| 54 | u32 ecn = (iph->tos + 1) & INET_ECN_MASK; | 55 | u32 ecn = (iph->tos + 1) & INET_ECN_MASK; |
| @@ -61,7 +62,7 @@ static inline void IP_ECN_set_ce(struct iphdr *iph) | |||
| 61 | * INET_ECN_CE => 00 | 62 | * INET_ECN_CE => 00 |
| 62 | */ | 63 | */ |
| 63 | if (!(ecn & 2)) | 64 | if (!(ecn & 2)) |
| 64 | return; | 65 | return !ecn; |
| 65 | 66 | ||
| 66 | /* | 67 | /* |
| 67 | * The following gives us: | 68 | * The following gives us: |
| @@ -72,6 +73,7 @@ static inline void IP_ECN_set_ce(struct iphdr *iph) | |||
| 72 | 73 | ||
| 73 | iph->check = check + (check>=0xFFFF); | 74 | iph->check = check + (check>=0xFFFF); |
| 74 | iph->tos |= INET_ECN_CE; | 75 | iph->tos |= INET_ECN_CE; |
| 76 | return 1; | ||
| 75 | } | 77 | } |
| 76 | 78 | ||
| 77 | static inline void IP_ECN_clear(struct iphdr *iph) | 79 | static inline void IP_ECN_clear(struct iphdr *iph) |
| @@ -87,11 +89,12 @@ static inline void ipv4_copy_dscp(struct iphdr *outer, struct iphdr *inner) | |||
| 87 | 89 | ||
| 88 | struct ipv6hdr; | 90 | struct ipv6hdr; |
| 89 | 91 | ||
| 90 | static inline void IP6_ECN_set_ce(struct ipv6hdr *iph) | 92 | static inline int IP6_ECN_set_ce(struct ipv6hdr *iph) |
| 91 | { | 93 | { |
| 92 | if (INET_ECN_is_not_ect(ipv6_get_dsfield(iph))) | 94 | if (INET_ECN_is_not_ect(ipv6_get_dsfield(iph))) |
| 93 | return; | 95 | return 0; |
| 94 | *(u32*)iph |= htonl(INET_ECN_CE << 20); | 96 | *(u32*)iph |= htonl(INET_ECN_CE << 20); |
| 97 | return 1; | ||
| 95 | } | 98 | } |
| 96 | 99 | ||
| 97 | static inline void IP6_ECN_clear(struct ipv6hdr *iph) | 100 | static inline void IP6_ECN_clear(struct ipv6hdr *iph) |
| @@ -105,4 +108,21 @@ static inline void ipv6_copy_dscp(struct ipv6hdr *outer, struct ipv6hdr *inner) | |||
| 105 | ipv6_change_dsfield(inner, INET_ECN_MASK, dscp); | 108 | ipv6_change_dsfield(inner, INET_ECN_MASK, dscp); |
| 106 | } | 109 | } |
| 107 | 110 | ||
| 111 | static inline int INET_ECN_set_ce(struct sk_buff *skb) | ||
| 112 | { | ||
| 113 | switch (skb->protocol) { | ||
| 114 | case __constant_htons(ETH_P_IP): | ||
| 115 | if (skb->nh.raw + sizeof(struct iphdr) <= skb->tail) | ||
| 116 | return IP_ECN_set_ce(skb->nh.iph); | ||
| 117 | break; | ||
| 118 | |||
| 119 | case __constant_htons(ETH_P_IPV6): | ||
| 120 | if (skb->nh.raw + sizeof(struct ipv6hdr) <= skb->tail) | ||
| 121 | return IP6_ECN_set_ce(skb->nh.ipv6h); | ||
| 122 | break; | ||
| 123 | } | ||
| 124 | |||
| 125 | return 0; | ||
| 126 | } | ||
| 127 | |||
| 108 | #endif | 128 | #endif |
diff --git a/include/net/red.h b/include/net/red.h new file mode 100644 index 000000000000..2ed4358e3295 --- /dev/null +++ b/include/net/red.h | |||
| @@ -0,0 +1,325 @@ | |||
| 1 | #ifndef __NET_SCHED_RED_H | ||
| 2 | #define __NET_SCHED_RED_H | ||
| 3 | |||
| 4 | #include <linux/config.h> | ||
| 5 | #include <linux/types.h> | ||
| 6 | #include <net/pkt_sched.h> | ||
| 7 | #include <net/inet_ecn.h> | ||
| 8 | #include <net/dsfield.h> | ||
| 9 | |||
| 10 | /* Random Early Detection (RED) algorithm. | ||
| 11 | ======================================= | ||
| 12 | |||
| 13 | Source: Sally Floyd and Van Jacobson, "Random Early Detection Gateways | ||
| 14 | for Congestion Avoidance", 1993, IEEE/ACM Transactions on Networking. | ||
| 15 | |||
| 16 | This file codes a "divisionless" version of RED algorithm | ||
| 17 | as written down in Fig.17 of the paper. | ||
| 18 | |||
| 19 | Short description. | ||
| 20 | ------------------ | ||
| 21 | |||
| 22 | When a new packet arrives we calculate the average queue length: | ||
| 23 | |||
| 24 | avg = (1-W)*avg + W*current_queue_len, | ||
| 25 | |||
| 26 | W is the filter time constant (chosen as 2^(-Wlog)), it controls | ||
| 27 | the inertia of the algorithm. To allow larger bursts, W should be | ||
| 28 | decreased. | ||
| 29 | |||
| 30 | if (avg > th_max) -> packet marked (dropped). | ||
| 31 | if (avg < th_min) -> packet passes. | ||
| 32 | if (th_min < avg < th_max) we calculate probability: | ||
| 33 | |||
| 34 | Pb = max_P * (avg - th_min)/(th_max-th_min) | ||
| 35 | |||
| 36 | and mark (drop) packet with this probability. | ||
| 37 | Pb changes from 0 (at avg==th_min) to max_P (avg==th_max). | ||
| 38 | max_P should be small (not 1), usually 0.01..0.02 is good value. | ||
| 39 | |||
| 40 | max_P is chosen as a number, so that max_P/(th_max-th_min) | ||
| 41 | is a negative power of two in order arithmetics to contain | ||
| 42 | only shifts. | ||
| 43 | |||
| 44 | |||
| 45 | Parameters, settable by user: | ||
| 46 | ----------------------------- | ||
| 47 | |||
| 48 | qth_min - bytes (should be < qth_max/2) | ||
| 49 | qth_max - bytes (should be at least 2*qth_min and less limit) | ||
| 50 | Wlog - bits (<32) log(1/W). | ||
| 51 | Plog - bits (<32) | ||
| 52 | |||
| 53 | Plog is related to max_P by formula: | ||
| 54 | |||
| 55 | max_P = (qth_max-qth_min)/2^Plog; | ||
| 56 | |||
| 57 | F.e. if qth_max=128K and qth_min=32K, then Plog=22 | ||
| 58 | corresponds to max_P=0.02 | ||
| 59 | |||
| 60 | Scell_log | ||
| 61 | Stab | ||
| 62 | |||
| 63 | Lookup table for log((1-W)^(t/t_ave). | ||
| 64 | |||
| 65 | |||
| 66 | NOTES: | ||
| 67 | |||
| 68 | Upper bound on W. | ||
| 69 | ----------------- | ||
| 70 | |||
| 71 | If you want to allow bursts of L packets of size S, | ||
| 72 | you should choose W: | ||
| 73 | |||
| 74 | L + 1 - th_min/S < (1-(1-W)^L)/W | ||
| 75 | |||
| 76 | th_min/S = 32 th_min/S = 4 | ||
| 77 | |||
| 78 | log(W) L | ||
| 79 | -1 33 | ||
| 80 | -2 35 | ||
| 81 | -3 39 | ||
| 82 | -4 46 | ||
| 83 | -5 57 | ||
| 84 | -6 75 | ||
| 85 | -7 101 | ||
| 86 | -8 135 | ||
| 87 | -9 190 | ||
| 88 | etc. | ||
| 89 | */ | ||
| 90 | |||
| 91 | #define RED_STAB_SIZE 256 | ||
| 92 | #define RED_STAB_MASK (RED_STAB_SIZE - 1) | ||
| 93 | |||
| 94 | struct red_stats | ||
| 95 | { | ||
| 96 | u32 prob_drop; /* Early probability drops */ | ||
| 97 | u32 prob_mark; /* Early probability marks */ | ||
| 98 | u32 forced_drop; /* Forced drops, qavg > max_thresh */ | ||
| 99 | u32 forced_mark; /* Forced marks, qavg > max_thresh */ | ||
| 100 | u32 pdrop; /* Drops due to queue limits */ | ||
| 101 | u32 other; /* Drops due to drop() calls */ | ||
| 102 | u32 backlog; | ||
| 103 | }; | ||
| 104 | |||
| 105 | struct red_parms | ||
| 106 | { | ||
| 107 | /* Parameters */ | ||
| 108 | u32 qth_min; /* Min avg length threshold: A scaled */ | ||
| 109 | u32 qth_max; /* Max avg length threshold: A scaled */ | ||
| 110 | u32 Scell_max; | ||
| 111 | u32 Rmask; /* Cached random mask, see red_rmask */ | ||
| 112 | u8 Scell_log; | ||
| 113 | u8 Wlog; /* log(W) */ | ||
| 114 | u8 Plog; /* random number bits */ | ||
| 115 | u8 Stab[RED_STAB_SIZE]; | ||
| 116 | |||
| 117 | /* Variables */ | ||
| 118 | int qcount; /* Number of packets since last random | ||
| 119 | number generation */ | ||
| 120 | u32 qR; /* Cached random number */ | ||
| 121 | |||
| 122 | unsigned long qavg; /* Average queue length: A scaled */ | ||
| 123 | psched_time_t qidlestart; /* Start of current idle period */ | ||
| 124 | }; | ||
| 125 | |||
| 126 | static inline u32 red_rmask(u8 Plog) | ||
| 127 | { | ||
| 128 | return Plog < 32 ? ((1 << Plog) - 1) : ~0UL; | ||
| 129 | } | ||
| 130 | |||
| 131 | static inline void red_set_parms(struct red_parms *p, | ||
| 132 | u32 qth_min, u32 qth_max, u8 Wlog, u8 Plog, | ||
| 133 | u8 Scell_log, u8 *stab) | ||
| 134 | { | ||
| 135 | /* Reset average queue length, the value is strictly bound | ||
| 136 | * to the parameters below, reseting hurts a bit but leaving | ||
| 137 | * it might result in an unreasonable qavg for a while. --TGR | ||
| 138 | */ | ||
| 139 | p->qavg = 0; | ||
| 140 | |||
| 141 | p->qcount = -1; | ||
| 142 | p->qth_min = qth_min << Wlog; | ||
| 143 | p->qth_max = qth_max << Wlog; | ||
| 144 | p->Wlog = Wlog; | ||
| 145 | p->Plog = Plog; | ||
| 146 | p->Rmask = red_rmask(Plog); | ||
| 147 | p->Scell_log = Scell_log; | ||
| 148 | p->Scell_max = (255 << Scell_log); | ||
| 149 | |||
| 150 | memcpy(p->Stab, stab, sizeof(p->Stab)); | ||
| 151 | } | ||
| 152 | |||
| 153 | static inline int red_is_idling(struct red_parms *p) | ||
| 154 | { | ||
| 155 | return !PSCHED_IS_PASTPERFECT(p->qidlestart); | ||
| 156 | } | ||
| 157 | |||
| 158 | static inline void red_start_of_idle_period(struct red_parms *p) | ||
| 159 | { | ||
| 160 | PSCHED_GET_TIME(p->qidlestart); | ||
| 161 | } | ||
| 162 | |||
| 163 | static inline void red_end_of_idle_period(struct red_parms *p) | ||
| 164 | { | ||
| 165 | PSCHED_SET_PASTPERFECT(p->qidlestart); | ||
| 166 | } | ||
| 167 | |||
| 168 | static inline void red_restart(struct red_parms *p) | ||
| 169 | { | ||
| 170 | red_end_of_idle_period(p); | ||
| 171 | p->qavg = 0; | ||
| 172 | p->qcount = -1; | ||
| 173 | } | ||
| 174 | |||
| 175 | static inline unsigned long red_calc_qavg_from_idle_time(struct red_parms *p) | ||
| 176 | { | ||
| 177 | psched_time_t now; | ||
| 178 | long us_idle; | ||
| 179 | int shift; | ||
| 180 | |||
| 181 | PSCHED_GET_TIME(now); | ||
| 182 | us_idle = PSCHED_TDIFF_SAFE(now, p->qidlestart, p->Scell_max); | ||
| 183 | |||
| 184 | /* | ||
| 185 | * The problem: ideally, average length queue recalcultion should | ||
| 186 | * be done over constant clock intervals. This is too expensive, so | ||
| 187 | * that the calculation is driven by outgoing packets. | ||
| 188 | * When the queue is idle we have to model this clock by hand. | ||
| 189 | * | ||
| 190 | * SF+VJ proposed to "generate": | ||
| 191 | * | ||
| 192 | * m = idletime / (average_pkt_size / bandwidth) | ||
| 193 | * | ||
| 194 | * dummy packets as a burst after idle time, i.e. | ||
| 195 | * | ||
| 196 | * p->qavg *= (1-W)^m | ||
| 197 | * | ||
| 198 | * This is an apparently overcomplicated solution (f.e. we have to | ||
| 199 | * precompute a table to make this calculation in reasonable time) | ||
| 200 | * I believe that a simpler model may be used here, | ||
| 201 | * but it is field for experiments. | ||
| 202 | */ | ||
| 203 | |||
| 204 | shift = p->Stab[(us_idle >> p->Scell_log) & RED_STAB_MASK]; | ||
| 205 | |||
| 206 | if (shift) | ||
| 207 | return p->qavg >> shift; | ||
| 208 | else { | ||
| 209 | /* Approximate initial part of exponent with linear function: | ||
| 210 | * | ||
| 211 | * (1-W)^m ~= 1-mW + ... | ||
| 212 | * | ||
| 213 | * Seems, it is the best solution to | ||
| 214 | * problem of too coarse exponent tabulation. | ||
| 215 | */ | ||
| 216 | us_idle = (p->qavg * us_idle) >> p->Scell_log; | ||
| 217 | |||
| 218 | if (us_idle < (p->qavg >> 1)) | ||
| 219 | return p->qavg - us_idle; | ||
| 220 | else | ||
| 221 | return p->qavg >> 1; | ||
| 222 | } | ||
| 223 | } | ||
| 224 | |||
| 225 | static inline unsigned long red_calc_qavg_no_idle_time(struct red_parms *p, | ||
| 226 | unsigned int backlog) | ||
| 227 | { | ||
| 228 | /* | ||
| 229 | * NOTE: p->qavg is fixed point number with point at Wlog. | ||
| 230 | * The formula below is equvalent to floating point | ||
| 231 | * version: | ||
| 232 | * | ||
| 233 | * qavg = qavg*(1-W) + backlog*W; | ||
| 234 | * | ||
| 235 | * --ANK (980924) | ||
| 236 | */ | ||
| 237 | return p->qavg + (backlog - (p->qavg >> p->Wlog)); | ||
| 238 | } | ||
| 239 | |||
| 240 | static inline unsigned long red_calc_qavg(struct red_parms *p, | ||
| 241 | unsigned int backlog) | ||
| 242 | { | ||
| 243 | if (!red_is_idling(p)) | ||
| 244 | return red_calc_qavg_no_idle_time(p, backlog); | ||
| 245 | else | ||
| 246 | return red_calc_qavg_from_idle_time(p); | ||
| 247 | } | ||
| 248 | |||
| 249 | static inline u32 red_random(struct red_parms *p) | ||
| 250 | { | ||
| 251 | return net_random() & p->Rmask; | ||
| 252 | } | ||
| 253 | |||
| 254 | static inline int red_mark_probability(struct red_parms *p, unsigned long qavg) | ||
| 255 | { | ||
| 256 | /* The formula used below causes questions. | ||
| 257 | |||
| 258 | OK. qR is random number in the interval 0..Rmask | ||
| 259 | i.e. 0..(2^Plog). If we used floating point | ||
| 260 | arithmetics, it would be: (2^Plog)*rnd_num, | ||
| 261 | where rnd_num is less 1. | ||
| 262 | |||
| 263 | Taking into account, that qavg have fixed | ||
| 264 | point at Wlog, and Plog is related to max_P by | ||
| 265 | max_P = (qth_max-qth_min)/2^Plog; two lines | ||
| 266 | below have the following floating point equivalent: | ||
| 267 | |||
| 268 | max_P*(qavg - qth_min)/(qth_max-qth_min) < rnd/qcount | ||
| 269 | |||
| 270 | Any questions? --ANK (980924) | ||
| 271 | */ | ||
| 272 | return !(((qavg - p->qth_min) >> p->Wlog) * p->qcount < p->qR); | ||
| 273 | } | ||
| 274 | |||
| 275 | enum { | ||
| 276 | RED_BELOW_MIN_THRESH, | ||
| 277 | RED_BETWEEN_TRESH, | ||
| 278 | RED_ABOVE_MAX_TRESH, | ||
| 279 | }; | ||
| 280 | |||
| 281 | static inline int red_cmp_thresh(struct red_parms *p, unsigned long qavg) | ||
| 282 | { | ||
| 283 | if (qavg < p->qth_min) | ||
| 284 | return RED_BELOW_MIN_THRESH; | ||
| 285 | else if (qavg >= p->qth_max) | ||
| 286 | return RED_ABOVE_MAX_TRESH; | ||
| 287 | else | ||
| 288 | return RED_BETWEEN_TRESH; | ||
| 289 | } | ||
| 290 | |||
| 291 | enum { | ||
| 292 | RED_DONT_MARK, | ||
| 293 | RED_PROB_MARK, | ||
| 294 | RED_HARD_MARK, | ||
| 295 | }; | ||
| 296 | |||
| 297 | static inline int red_action(struct red_parms *p, unsigned long qavg) | ||
| 298 | { | ||
| 299 | switch (red_cmp_thresh(p, qavg)) { | ||
| 300 | case RED_BELOW_MIN_THRESH: | ||
| 301 | p->qcount = -1; | ||
| 302 | return RED_DONT_MARK; | ||
| 303 | |||
| 304 | case RED_BETWEEN_TRESH: | ||
| 305 | if (++p->qcount) { | ||
| 306 | if (red_mark_probability(p, qavg)) { | ||
| 307 | p->qcount = 0; | ||
| 308 | p->qR = red_random(p); | ||
| 309 | return RED_PROB_MARK; | ||
| 310 | } | ||
| 311 | } else | ||
| 312 | p->qR = red_random(p); | ||
| 313 | |||
| 314 | return RED_DONT_MARK; | ||
| 315 | |||
| 316 | case RED_ABOVE_MAX_TRESH: | ||
| 317 | p->qcount = -1; | ||
| 318 | return RED_HARD_MARK; | ||
| 319 | } | ||
| 320 | |||
| 321 | BUG(); | ||
| 322 | return RED_DONT_MARK; | ||
| 323 | } | ||
| 324 | |||
| 325 | #endif | ||
diff --git a/net/sched/sch_gred.c b/net/sched/sch_gred.c index 25c171c32715..29a2dd9f3029 100644 --- a/net/sched/sch_gred.c +++ b/net/sched/sch_gred.c | |||
| @@ -15,247 +15,281 @@ | |||
| 15 | * from Ren Liu | 15 | * from Ren Liu |
| 16 | * - More error checks | 16 | * - More error checks |
| 17 | * | 17 | * |
| 18 | * | 18 | * For all the glorious comments look at include/net/red.h |
| 19 | * | ||
| 20 | * For all the glorious comments look at Alexey's sch_red.c | ||
| 21 | */ | 19 | */ |
| 22 | 20 | ||
| 23 | #include <linux/config.h> | 21 | #include <linux/config.h> |
| 24 | #include <linux/module.h> | 22 | #include <linux/module.h> |
| 25 | #include <asm/uaccess.h> | ||
| 26 | #include <asm/system.h> | ||
| 27 | #include <linux/bitops.h> | ||
| 28 | #include <linux/types.h> | 23 | #include <linux/types.h> |
| 29 | #include <linux/kernel.h> | 24 | #include <linux/kernel.h> |
| 30 | #include <linux/sched.h> | ||
| 31 | #include <linux/string.h> | ||
| 32 | #include <linux/mm.h> | ||
| 33 | #include <linux/socket.h> | ||
| 34 | #include <linux/sockios.h> | ||
| 35 | #include <linux/in.h> | ||
| 36 | #include <linux/errno.h> | ||
| 37 | #include <linux/interrupt.h> | ||
| 38 | #include <linux/if_ether.h> | ||
| 39 | #include <linux/inet.h> | ||
| 40 | #include <linux/netdevice.h> | 25 | #include <linux/netdevice.h> |
| 41 | #include <linux/etherdevice.h> | ||
| 42 | #include <linux/notifier.h> | ||
| 43 | #include <net/ip.h> | ||
| 44 | #include <net/route.h> | ||
| 45 | #include <linux/skbuff.h> | 26 | #include <linux/skbuff.h> |
| 46 | #include <net/sock.h> | ||
| 47 | #include <net/pkt_sched.h> | 27 | #include <net/pkt_sched.h> |
| 28 | #include <net/red.h> | ||
| 48 | 29 | ||
| 49 | #if 1 /* control */ | 30 | #define GRED_DEF_PRIO (MAX_DPs / 2) |
| 50 | #define DPRINTK(format,args...) printk(KERN_DEBUG format,##args) | 31 | #define GRED_VQ_MASK (MAX_DPs - 1) |
| 51 | #else | ||
| 52 | #define DPRINTK(format,args...) | ||
| 53 | #endif | ||
| 54 | |||
| 55 | #if 0 /* data */ | ||
| 56 | #define D2PRINTK(format,args...) printk(KERN_DEBUG format,##args) | ||
| 57 | #else | ||
| 58 | #define D2PRINTK(format,args...) | ||
| 59 | #endif | ||
| 60 | 32 | ||
| 61 | struct gred_sched_data; | 33 | struct gred_sched_data; |
| 62 | struct gred_sched; | 34 | struct gred_sched; |
| 63 | 35 | ||
| 64 | struct gred_sched_data | 36 | struct gred_sched_data |
| 65 | { | 37 | { |
| 66 | /* Parameters */ | ||
| 67 | u32 limit; /* HARD maximal queue length */ | 38 | u32 limit; /* HARD maximal queue length */ |
| 68 | u32 qth_min; /* Min average length threshold: A scaled */ | ||
| 69 | u32 qth_max; /* Max average length threshold: A scaled */ | ||
| 70 | u32 DP; /* the drop pramaters */ | 39 | u32 DP; /* the drop pramaters */ |
| 71 | char Wlog; /* log(W) */ | ||
| 72 | char Plog; /* random number bits */ | ||
| 73 | u32 Scell_max; | ||
| 74 | u32 Rmask; | ||
| 75 | u32 bytesin; /* bytes seen on virtualQ so far*/ | 40 | u32 bytesin; /* bytes seen on virtualQ so far*/ |
| 76 | u32 packetsin; /* packets seen on virtualQ so far*/ | 41 | u32 packetsin; /* packets seen on virtualQ so far*/ |
| 77 | u32 backlog; /* bytes on the virtualQ */ | 42 | u32 backlog; /* bytes on the virtualQ */ |
| 78 | u32 forced; /* packets dropped for exceeding limits */ | 43 | u8 prio; /* the prio of this vq */ |
| 79 | u32 early; /* packets dropped as a warning */ | 44 | |
| 80 | u32 other; /* packets dropped by invoking drop() */ | 45 | struct red_parms parms; |
| 81 | u32 pdrop; /* packets dropped because we exceeded physical queue limits */ | 46 | struct red_stats stats; |
| 82 | char Scell_log; | 47 | }; |
| 83 | u8 Stab[256]; | 48 | |
| 84 | u8 prio; /* the prio of this vq */ | 49 | enum { |
| 85 | 50 | GRED_WRED_MODE = 1, | |
| 86 | /* Variables */ | 51 | GRED_RIO_MODE, |
| 87 | unsigned long qave; /* Average queue length: A scaled */ | ||
| 88 | int qcount; /* Packets since last random number generation */ | ||
| 89 | u32 qR; /* Cached random number */ | ||
| 90 | |||
| 91 | psched_time_t qidlestart; /* Start of idle period */ | ||
| 92 | }; | 52 | }; |
| 93 | 53 | ||
| 94 | struct gred_sched | 54 | struct gred_sched |
| 95 | { | 55 | { |
| 96 | struct gred_sched_data *tab[MAX_DPs]; | 56 | struct gred_sched_data *tab[MAX_DPs]; |
| 97 | u32 DPs; | 57 | unsigned long flags; |
| 98 | u32 def; | 58 | u32 red_flags; |
| 99 | u8 initd; | 59 | u32 DPs; |
| 100 | u8 grio; | 60 | u32 def; |
| 101 | u8 eqp; | 61 | struct red_parms wred_set; |
| 102 | }; | 62 | }; |
| 103 | 63 | ||
| 104 | static int | 64 | static inline int gred_wred_mode(struct gred_sched *table) |
| 105 | gred_enqueue(struct sk_buff *skb, struct Qdisc* sch) | ||
| 106 | { | 65 | { |
| 107 | psched_time_t now; | 66 | return test_bit(GRED_WRED_MODE, &table->flags); |
| 108 | struct gred_sched_data *q=NULL; | 67 | } |
| 109 | struct gred_sched *t= qdisc_priv(sch); | 68 | |
| 110 | unsigned long qave=0; | 69 | static inline void gred_enable_wred_mode(struct gred_sched *table) |
| 111 | int i=0; | 70 | { |
| 71 | __set_bit(GRED_WRED_MODE, &table->flags); | ||
| 72 | } | ||
| 73 | |||
| 74 | static inline void gred_disable_wred_mode(struct gred_sched *table) | ||
| 75 | { | ||
| 76 | __clear_bit(GRED_WRED_MODE, &table->flags); | ||
| 77 | } | ||
| 78 | |||
| 79 | static inline int gred_rio_mode(struct gred_sched *table) | ||
| 80 | { | ||
| 81 | return test_bit(GRED_RIO_MODE, &table->flags); | ||
| 82 | } | ||
| 83 | |||
| 84 | static inline void gred_enable_rio_mode(struct gred_sched *table) | ||
| 85 | { | ||
| 86 | __set_bit(GRED_RIO_MODE, &table->flags); | ||
| 87 | } | ||
| 88 | |||
| 89 | static inline void gred_disable_rio_mode(struct gred_sched *table) | ||
| 90 | { | ||
| 91 | __clear_bit(GRED_RIO_MODE, &table->flags); | ||
| 92 | } | ||
| 93 | |||
| 94 | static inline int gred_wred_mode_check(struct Qdisc *sch) | ||
| 95 | { | ||
| 96 | struct gred_sched *table = qdisc_priv(sch); | ||
| 97 | int i; | ||
| 112 | 98 | ||
| 113 | if (!t->initd && skb_queue_len(&sch->q) < (sch->dev->tx_queue_len ? : 1)) { | 99 | /* Really ugly O(n^2) but shouldn't be necessary too frequent. */ |
| 114 | D2PRINTK("NO GRED Queues setup yet! Enqueued anyway\n"); | 100 | for (i = 0; i < table->DPs; i++) { |
| 115 | goto do_enqueue; | 101 | struct gred_sched_data *q = table->tab[i]; |
| 102 | int n; | ||
| 103 | |||
| 104 | if (q == NULL) | ||
| 105 | continue; | ||
| 106 | |||
| 107 | for (n = 0; n < table->DPs; n++) | ||
| 108 | if (table->tab[n] && table->tab[n] != q && | ||
| 109 | table->tab[n]->prio == q->prio) | ||
| 110 | return 1; | ||
| 116 | } | 111 | } |
| 117 | 112 | ||
| 113 | return 0; | ||
| 114 | } | ||
| 115 | |||
| 116 | static inline unsigned int gred_backlog(struct gred_sched *table, | ||
| 117 | struct gred_sched_data *q, | ||
| 118 | struct Qdisc *sch) | ||
| 119 | { | ||
| 120 | if (gred_wred_mode(table)) | ||
| 121 | return sch->qstats.backlog; | ||
| 122 | else | ||
| 123 | return q->backlog; | ||
| 124 | } | ||
| 125 | |||
| 126 | static inline u16 tc_index_to_dp(struct sk_buff *skb) | ||
| 127 | { | ||
| 128 | return skb->tc_index & GRED_VQ_MASK; | ||
| 129 | } | ||
| 130 | |||
| 131 | static inline void gred_load_wred_set(struct gred_sched *table, | ||
| 132 | struct gred_sched_data *q) | ||
| 133 | { | ||
| 134 | q->parms.qavg = table->wred_set.qavg; | ||
| 135 | q->parms.qidlestart = table->wred_set.qidlestart; | ||
| 136 | } | ||
| 137 | |||
| 138 | static inline void gred_store_wred_set(struct gred_sched *table, | ||
| 139 | struct gred_sched_data *q) | ||
| 140 | { | ||
| 141 | table->wred_set.qavg = q->parms.qavg; | ||
| 142 | } | ||
| 143 | |||
| 144 | static inline int gred_use_ecn(struct gred_sched *t) | ||
| 145 | { | ||
| 146 | return t->red_flags & TC_RED_ECN; | ||
| 147 | } | ||
| 118 | 148 | ||
| 119 | if ( ((skb->tc_index&0xf) > (t->DPs -1)) || !(q=t->tab[skb->tc_index&0xf])) { | 149 | static inline int gred_use_harddrop(struct gred_sched *t) |
| 120 | printk("GRED: setting to default (%d)\n ",t->def); | 150 | { |
| 121 | if (!(q=t->tab[t->def])) { | 151 | return t->red_flags & TC_RED_HARDDROP; |
| 122 | DPRINTK("GRED: setting to default FAILED! dropping!! " | 152 | } |
| 123 | "(%d)\n ", t->def); | 153 | |
| 124 | goto drop; | 154 | static int gred_enqueue(struct sk_buff *skb, struct Qdisc* sch) |
| 155 | { | ||
| 156 | struct gred_sched_data *q=NULL; | ||
| 157 | struct gred_sched *t= qdisc_priv(sch); | ||
| 158 | unsigned long qavg = 0; | ||
| 159 | u16 dp = tc_index_to_dp(skb); | ||
| 160 | |||
| 161 | if (dp >= t->DPs || (q = t->tab[dp]) == NULL) { | ||
| 162 | dp = t->def; | ||
| 163 | |||
| 164 | if ((q = t->tab[dp]) == NULL) { | ||
| 165 | /* Pass through packets not assigned to a DP | ||
| 166 | * if no default DP has been configured. This | ||
| 167 | * allows for DP flows to be left untouched. | ||
| 168 | */ | ||
| 169 | if (skb_queue_len(&sch->q) < sch->dev->tx_queue_len) | ||
| 170 | return qdisc_enqueue_tail(skb, sch); | ||
| 171 | else | ||
| 172 | goto drop; | ||
| 125 | } | 173 | } |
| 174 | |||
| 126 | /* fix tc_index? --could be controvesial but needed for | 175 | /* fix tc_index? --could be controvesial but needed for |
| 127 | requeueing */ | 176 | requeueing */ |
| 128 | skb->tc_index=(skb->tc_index&0xfffffff0) | t->def; | 177 | skb->tc_index = (skb->tc_index & ~GRED_VQ_MASK) | dp; |
| 129 | } | 178 | } |
| 130 | 179 | ||
| 131 | D2PRINTK("gred_enqueue virtualQ 0x%x classid %x backlog %d " | 180 | /* sum up all the qaves of prios <= to ours to get the new qave */ |
| 132 | "general backlog %d\n",skb->tc_index&0xf,sch->handle,q->backlog, | 181 | if (!gred_wred_mode(t) && gred_rio_mode(t)) { |
| 133 | sch->qstats.backlog); | 182 | int i; |
| 134 | /* sum up all the qaves of prios <= to ours to get the new qave*/ | 183 | |
| 135 | if (!t->eqp && t->grio) { | 184 | for (i = 0; i < t->DPs; i++) { |
| 136 | for (i=0;i<t->DPs;i++) { | 185 | if (t->tab[i] && t->tab[i]->prio < q->prio && |
| 137 | if ((!t->tab[i]) || (i==q->DP)) | 186 | !red_is_idling(&t->tab[i]->parms)) |
| 138 | continue; | 187 | qavg +=t->tab[i]->parms.qavg; |
| 139 | |||
| 140 | if ((t->tab[i]->prio < q->prio) && (PSCHED_IS_PASTPERFECT(t->tab[i]->qidlestart))) | ||
| 141 | qave +=t->tab[i]->qave; | ||
| 142 | } | 188 | } |
| 143 | 189 | ||
| 144 | } | 190 | } |
| 145 | 191 | ||
| 146 | q->packetsin++; | 192 | q->packetsin++; |
| 147 | q->bytesin+=skb->len; | 193 | q->bytesin += skb->len; |
| 148 | 194 | ||
| 149 | if (t->eqp && t->grio) { | 195 | if (gred_wred_mode(t)) |
| 150 | qave=0; | 196 | gred_load_wred_set(t, q); |
| 151 | q->qave=t->tab[t->def]->qave; | ||
| 152 | q->qidlestart=t->tab[t->def]->qidlestart; | ||
| 153 | } | ||
| 154 | 197 | ||
| 155 | if (!PSCHED_IS_PASTPERFECT(q->qidlestart)) { | 198 | q->parms.qavg = red_calc_qavg(&q->parms, gred_backlog(t, q, sch)); |
| 156 | long us_idle; | ||
| 157 | PSCHED_GET_TIME(now); | ||
| 158 | us_idle = PSCHED_TDIFF_SAFE(now, q->qidlestart, q->Scell_max); | ||
| 159 | PSCHED_SET_PASTPERFECT(q->qidlestart); | ||
| 160 | 199 | ||
| 161 | q->qave >>= q->Stab[(us_idle>>q->Scell_log)&0xFF]; | 200 | if (red_is_idling(&q->parms)) |
| 162 | } else { | 201 | red_end_of_idle_period(&q->parms); |
| 163 | if (t->eqp) { | ||
| 164 | q->qave += sch->qstats.backlog - (q->qave >> q->Wlog); | ||
| 165 | } else { | ||
| 166 | q->qave += q->backlog - (q->qave >> q->Wlog); | ||
| 167 | } | ||
| 168 | 202 | ||
| 169 | } | 203 | if (gred_wred_mode(t)) |
| 170 | 204 | gred_store_wred_set(t, q); | |
| 171 | |||
| 172 | if (t->eqp && t->grio) | ||
| 173 | t->tab[t->def]->qave=q->qave; | ||
| 174 | |||
| 175 | if ((q->qave+qave) < q->qth_min) { | ||
| 176 | q->qcount = -1; | ||
| 177 | enqueue: | ||
| 178 | if (q->backlog + skb->len <= q->limit) { | ||
| 179 | q->backlog += skb->len; | ||
| 180 | do_enqueue: | ||
| 181 | __skb_queue_tail(&sch->q, skb); | ||
| 182 | sch->qstats.backlog += skb->len; | ||
| 183 | sch->bstats.bytes += skb->len; | ||
| 184 | sch->bstats.packets++; | ||
| 185 | return 0; | ||
| 186 | } else { | ||
| 187 | q->pdrop++; | ||
| 188 | } | ||
| 189 | 205 | ||
| 190 | drop: | 206 | switch (red_action(&q->parms, q->parms.qavg + qavg)) { |
| 191 | kfree_skb(skb); | 207 | case RED_DONT_MARK: |
| 192 | sch->qstats.drops++; | 208 | break; |
| 193 | return NET_XMIT_DROP; | 209 | |
| 194 | } | 210 | case RED_PROB_MARK: |
| 195 | if ((q->qave+qave) >= q->qth_max) { | 211 | sch->qstats.overlimits++; |
| 196 | q->qcount = -1; | 212 | if (!gred_use_ecn(t) || !INET_ECN_set_ce(skb)) { |
| 197 | sch->qstats.overlimits++; | 213 | q->stats.prob_drop++; |
| 198 | q->forced++; | 214 | goto congestion_drop; |
| 199 | goto drop; | 215 | } |
| 216 | |||
| 217 | q->stats.prob_mark++; | ||
| 218 | break; | ||
| 219 | |||
| 220 | case RED_HARD_MARK: | ||
| 221 | sch->qstats.overlimits++; | ||
| 222 | if (gred_use_harddrop(t) || !gred_use_ecn(t) || | ||
| 223 | !INET_ECN_set_ce(skb)) { | ||
| 224 | q->stats.forced_drop++; | ||
| 225 | goto congestion_drop; | ||
| 226 | } | ||
| 227 | q->stats.forced_mark++; | ||
| 228 | break; | ||
| 200 | } | 229 | } |
| 201 | if (++q->qcount) { | 230 | |
| 202 | if ((((qave+q->qave) - q->qth_min)>>q->Wlog)*q->qcount < q->qR) | 231 | if (q->backlog + skb->len <= q->limit) { |
| 203 | goto enqueue; | 232 | q->backlog += skb->len; |
| 204 | q->qcount = 0; | 233 | return qdisc_enqueue_tail(skb, sch); |
| 205 | q->qR = net_random()&q->Rmask; | ||
| 206 | sch->qstats.overlimits++; | ||
| 207 | q->early++; | ||
| 208 | goto drop; | ||
| 209 | } | 234 | } |
| 210 | q->qR = net_random()&q->Rmask; | 235 | |
| 211 | goto enqueue; | 236 | q->stats.pdrop++; |
| 237 | drop: | ||
| 238 | return qdisc_drop(skb, sch); | ||
| 239 | |||
| 240 | congestion_drop: | ||
| 241 | qdisc_drop(skb, sch); | ||
| 242 | return NET_XMIT_CN; | ||
| 212 | } | 243 | } |
| 213 | 244 | ||
| 214 | static int | 245 | static int gred_requeue(struct sk_buff *skb, struct Qdisc* sch) |
| 215 | gred_requeue(struct sk_buff *skb, struct Qdisc* sch) | ||
| 216 | { | 246 | { |
| 247 | struct gred_sched *t = qdisc_priv(sch); | ||
| 217 | struct gred_sched_data *q; | 248 | struct gred_sched_data *q; |
| 218 | struct gred_sched *t= qdisc_priv(sch); | 249 | u16 dp = tc_index_to_dp(skb); |
| 219 | q= t->tab[(skb->tc_index&0xf)]; | 250 | |
| 220 | /* error checking here -- probably unnecessary */ | 251 | if (dp >= t->DPs || (q = t->tab[dp]) == NULL) { |
| 221 | PSCHED_SET_PASTPERFECT(q->qidlestart); | 252 | if (net_ratelimit()) |
| 222 | 253 | printk(KERN_WARNING "GRED: Unable to relocate VQ 0x%x " | |
| 223 | __skb_queue_head(&sch->q, skb); | 254 | "for requeue, screwing up backlog.\n", |
| 224 | sch->qstats.backlog += skb->len; | 255 | tc_index_to_dp(skb)); |
| 225 | sch->qstats.requeues++; | 256 | } else { |
| 226 | q->backlog += skb->len; | 257 | if (red_is_idling(&q->parms)) |
| 227 | return 0; | 258 | red_end_of_idle_period(&q->parms); |
| 259 | q->backlog += skb->len; | ||
| 260 | } | ||
| 261 | |||
| 262 | return qdisc_requeue(skb, sch); | ||
| 228 | } | 263 | } |
| 229 | 264 | ||
| 230 | static struct sk_buff * | 265 | static struct sk_buff *gred_dequeue(struct Qdisc* sch) |
| 231 | gred_dequeue(struct Qdisc* sch) | ||
| 232 | { | 266 | { |
| 233 | struct sk_buff *skb; | 267 | struct sk_buff *skb; |
| 234 | struct gred_sched_data *q; | 268 | struct gred_sched *t = qdisc_priv(sch); |
| 235 | struct gred_sched *t= qdisc_priv(sch); | 269 | |
| 270 | skb = qdisc_dequeue_head(sch); | ||
| 236 | 271 | ||
| 237 | skb = __skb_dequeue(&sch->q); | ||
| 238 | if (skb) { | 272 | if (skb) { |
| 239 | sch->qstats.backlog -= skb->len; | 273 | struct gred_sched_data *q; |
| 240 | q= t->tab[(skb->tc_index&0xf)]; | 274 | u16 dp = tc_index_to_dp(skb); |
| 241 | if (q) { | 275 | |
| 242 | q->backlog -= skb->len; | 276 | if (dp >= t->DPs || (q = t->tab[dp]) == NULL) { |
| 243 | if (!q->backlog && !t->eqp) | 277 | if (net_ratelimit()) |
| 244 | PSCHED_GET_TIME(q->qidlestart); | 278 | printk(KERN_WARNING "GRED: Unable to relocate " |
| 279 | "VQ 0x%x after dequeue, screwing up " | ||
| 280 | "backlog.\n", tc_index_to_dp(skb)); | ||
| 245 | } else { | 281 | } else { |
| 246 | D2PRINTK("gred_dequeue: skb has bad tcindex %x\n",skb->tc_index&0xf); | 282 | q->backlog -= skb->len; |
| 283 | |||
| 284 | if (!q->backlog && !gred_wred_mode(t)) | ||
| 285 | red_start_of_idle_period(&q->parms); | ||
| 247 | } | 286 | } |
| 287 | |||
| 248 | return skb; | 288 | return skb; |
| 249 | } | 289 | } |
| 250 | 290 | ||
| 251 | if (t->eqp) { | 291 | if (gred_wred_mode(t) && !red_is_idling(&t->wred_set)) |
| 252 | q= t->tab[t->def]; | 292 | red_start_of_idle_period(&t->wred_set); |
| 253 | if (!q) | ||
| 254 | D2PRINTK("no default VQ set: Results will be " | ||
| 255 | "screwed up\n"); | ||
| 256 | else | ||
| 257 | PSCHED_GET_TIME(q->qidlestart); | ||
| 258 | } | ||
| 259 | 293 | ||
| 260 | return NULL; | 294 | return NULL; |
| 261 | } | 295 | } |
| @@ -263,36 +297,34 @@ gred_dequeue(struct Qdisc* sch) | |||
| 263 | static unsigned int gred_drop(struct Qdisc* sch) | 297 | static unsigned int gred_drop(struct Qdisc* sch) |
| 264 | { | 298 | { |
| 265 | struct sk_buff *skb; | 299 | struct sk_buff *skb; |
| 300 | struct gred_sched *t = qdisc_priv(sch); | ||
| 266 | 301 | ||
| 267 | struct gred_sched_data *q; | 302 | skb = qdisc_dequeue_tail(sch); |
| 268 | struct gred_sched *t= qdisc_priv(sch); | ||
| 269 | |||
| 270 | skb = __skb_dequeue_tail(&sch->q); | ||
| 271 | if (skb) { | 303 | if (skb) { |
| 272 | unsigned int len = skb->len; | 304 | unsigned int len = skb->len; |
| 273 | sch->qstats.backlog -= len; | 305 | struct gred_sched_data *q; |
| 274 | sch->qstats.drops++; | 306 | u16 dp = tc_index_to_dp(skb); |
| 275 | q= t->tab[(skb->tc_index&0xf)]; | 307 | |
| 276 | if (q) { | 308 | if (dp >= t->DPs || (q = t->tab[dp]) == NULL) { |
| 277 | q->backlog -= len; | 309 | if (net_ratelimit()) |
| 278 | q->other++; | 310 | printk(KERN_WARNING "GRED: Unable to relocate " |
| 279 | if (!q->backlog && !t->eqp) | 311 | "VQ 0x%x while dropping, screwing up " |
| 280 | PSCHED_GET_TIME(q->qidlestart); | 312 | "backlog.\n", tc_index_to_dp(skb)); |
| 281 | } else { | 313 | } else { |
| 282 | D2PRINTK("gred_dequeue: skb has bad tcindex %x\n",skb->tc_index&0xf); | 314 | q->backlog -= len; |
| 315 | q->stats.other++; | ||
| 316 | |||
| 317 | if (!q->backlog && !gred_wred_mode(t)) | ||
| 318 | red_start_of_idle_period(&q->parms); | ||
| 283 | } | 319 | } |
| 284 | 320 | ||
| 285 | kfree_skb(skb); | 321 | qdisc_drop(skb, sch); |
| 286 | return len; | 322 | return len; |
| 287 | } | 323 | } |
| 288 | 324 | ||
| 289 | q=t->tab[t->def]; | 325 | if (gred_wred_mode(t) && !red_is_idling(&t->wred_set)) |
| 290 | if (!q) { | 326 | red_start_of_idle_period(&t->wred_set); |
| 291 | D2PRINTK("no default VQ set: Results might be screwed up\n"); | ||
| 292 | return 0; | ||
| 293 | } | ||
| 294 | 327 | ||
| 295 | PSCHED_GET_TIME(q->qidlestart); | ||
| 296 | return 0; | 328 | return 0; |
| 297 | 329 | ||
| 298 | } | 330 | } |
| @@ -300,293 +332,241 @@ static unsigned int gred_drop(struct Qdisc* sch) | |||
| 300 | static void gred_reset(struct Qdisc* sch) | 332 | static void gred_reset(struct Qdisc* sch) |
| 301 | { | 333 | { |
| 302 | int i; | 334 | int i; |
| 303 | struct gred_sched_data *q; | 335 | struct gred_sched *t = qdisc_priv(sch); |
| 304 | struct gred_sched *t= qdisc_priv(sch); | 336 | |
| 337 | qdisc_reset_queue(sch); | ||
| 305 | 338 | ||
| 306 | __skb_queue_purge(&sch->q); | 339 | for (i = 0; i < t->DPs; i++) { |
| 340 | struct gred_sched_data *q = t->tab[i]; | ||
| 307 | 341 | ||
| 308 | sch->qstats.backlog = 0; | 342 | if (!q) |
| 343 | continue; | ||
| 309 | 344 | ||
| 310 | for (i=0;i<t->DPs;i++) { | 345 | red_restart(&q->parms); |
| 311 | q= t->tab[i]; | ||
| 312 | if (!q) | ||
| 313 | continue; | ||
| 314 | PSCHED_SET_PASTPERFECT(q->qidlestart); | ||
| 315 | q->qave = 0; | ||
| 316 | q->qcount = -1; | ||
| 317 | q->backlog = 0; | 346 | q->backlog = 0; |
| 318 | q->other=0; | ||
| 319 | q->forced=0; | ||
| 320 | q->pdrop=0; | ||
| 321 | q->early=0; | ||
| 322 | } | 347 | } |
| 323 | } | 348 | } |
| 324 | 349 | ||
| 325 | static int gred_change(struct Qdisc *sch, struct rtattr *opt) | 350 | static inline void gred_destroy_vq(struct gred_sched_data *q) |
| 351 | { | ||
| 352 | kfree(q); | ||
| 353 | } | ||
| 354 | |||
| 355 | static inline int gred_change_table_def(struct Qdisc *sch, struct rtattr *dps) | ||
| 326 | { | 356 | { |
| 327 | struct gred_sched *table = qdisc_priv(sch); | 357 | struct gred_sched *table = qdisc_priv(sch); |
| 328 | struct gred_sched_data *q; | ||
| 329 | struct tc_gred_qopt *ctl; | ||
| 330 | struct tc_gred_sopt *sopt; | 358 | struct tc_gred_sopt *sopt; |
| 331 | struct rtattr *tb[TCA_GRED_STAB]; | ||
| 332 | struct rtattr *tb2[TCA_GRED_DPS]; | ||
| 333 | int i; | 359 | int i; |
| 334 | 360 | ||
| 335 | if (opt == NULL || rtattr_parse_nested(tb, TCA_GRED_STAB, opt)) | 361 | if (dps == NULL || RTA_PAYLOAD(dps) < sizeof(*sopt)) |
| 336 | return -EINVAL; | 362 | return -EINVAL; |
| 337 | 363 | ||
| 338 | if (tb[TCA_GRED_PARMS-1] == 0 && tb[TCA_GRED_STAB-1] == 0) { | 364 | sopt = RTA_DATA(dps); |
| 339 | rtattr_parse_nested(tb2, TCA_GRED_DPS, opt); | 365 | |
| 366 | if (sopt->DPs > MAX_DPs || sopt->DPs == 0 || sopt->def_DP >= sopt->DPs) | ||
| 367 | return -EINVAL; | ||
| 340 | 368 | ||
| 341 | if (tb2[TCA_GRED_DPS-1] == 0) | 369 | sch_tree_lock(sch); |
| 342 | return -EINVAL; | 370 | table->DPs = sopt->DPs; |
| 371 | table->def = sopt->def_DP; | ||
| 372 | table->red_flags = sopt->flags; | ||
| 373 | |||
| 374 | /* | ||
| 375 | * Every entry point to GRED is synchronized with the above code | ||
| 376 | * and the DP is checked against DPs, i.e. shadowed VQs can no | ||
| 377 | * longer be found so we can unlock right here. | ||
| 378 | */ | ||
| 379 | sch_tree_unlock(sch); | ||
| 380 | |||
| 381 | if (sopt->grio) { | ||
| 382 | gred_enable_rio_mode(table); | ||
| 383 | gred_disable_wred_mode(table); | ||
| 384 | if (gred_wred_mode_check(sch)) | ||
| 385 | gred_enable_wred_mode(table); | ||
| 386 | } else { | ||
| 387 | gred_disable_rio_mode(table); | ||
| 388 | gred_disable_wred_mode(table); | ||
| 389 | } | ||
| 343 | 390 | ||
| 344 | sopt = RTA_DATA(tb2[TCA_GRED_DPS-1]); | 391 | for (i = table->DPs; i < MAX_DPs; i++) { |
| 345 | table->DPs=sopt->DPs; | 392 | if (table->tab[i]) { |
| 346 | table->def=sopt->def_DP; | 393 | printk(KERN_WARNING "GRED: Warning: Destroying " |
| 347 | table->grio=sopt->grio; | 394 | "shadowed VQ 0x%x\n", i); |
| 348 | table->initd=0; | 395 | gred_destroy_vq(table->tab[i]); |
| 349 | /* probably need to clear all the table DP entries as well */ | 396 | table->tab[i] = NULL; |
| 350 | return 0; | 397 | } |
| 351 | } | 398 | } |
| 352 | 399 | ||
| 400 | return 0; | ||
| 401 | } | ||
| 353 | 402 | ||
| 354 | if (!table->DPs || tb[TCA_GRED_PARMS-1] == 0 || tb[TCA_GRED_STAB-1] == 0 || | 403 | static inline int gred_change_vq(struct Qdisc *sch, int dp, |
| 355 | RTA_PAYLOAD(tb[TCA_GRED_PARMS-1]) < sizeof(*ctl) || | 404 | struct tc_gred_qopt *ctl, int prio, u8 *stab) |
| 356 | RTA_PAYLOAD(tb[TCA_GRED_STAB-1]) < 256) | 405 | { |
| 357 | return -EINVAL; | 406 | struct gred_sched *table = qdisc_priv(sch); |
| 407 | struct gred_sched_data *q; | ||
| 358 | 408 | ||
| 359 | ctl = RTA_DATA(tb[TCA_GRED_PARMS-1]); | 409 | if (table->tab[dp] == NULL) { |
| 360 | if (ctl->DP > MAX_DPs-1 ) { | 410 | table->tab[dp] = kmalloc(sizeof(*q), GFP_KERNEL); |
| 361 | /* misbehaving is punished! Put in the default drop probability */ | 411 | if (table->tab[dp] == NULL) |
| 362 | DPRINTK("\nGRED: DP %u not in the proper range fixed. New DP " | ||
| 363 | "set to default at %d\n",ctl->DP,table->def); | ||
| 364 | ctl->DP=table->def; | ||
| 365 | } | ||
| 366 | |||
| 367 | if (table->tab[ctl->DP] == NULL) { | ||
| 368 | table->tab[ctl->DP]=kmalloc(sizeof(struct gred_sched_data), | ||
| 369 | GFP_KERNEL); | ||
| 370 | if (NULL == table->tab[ctl->DP]) | ||
| 371 | return -ENOMEM; | 412 | return -ENOMEM; |
| 372 | memset(table->tab[ctl->DP], 0, (sizeof(struct gred_sched_data))); | 413 | memset(table->tab[dp], 0, sizeof(*q)); |
| 373 | } | ||
| 374 | q= table->tab[ctl->DP]; | ||
| 375 | |||
| 376 | if (table->grio) { | ||
| 377 | if (ctl->prio <=0) { | ||
| 378 | if (table->def && table->tab[table->def]) { | ||
| 379 | DPRINTK("\nGRED: DP %u does not have a prio" | ||
| 380 | "setting default to %d\n",ctl->DP, | ||
| 381 | table->tab[table->def]->prio); | ||
| 382 | q->prio=table->tab[table->def]->prio; | ||
| 383 | } else { | ||
| 384 | DPRINTK("\nGRED: DP %u does not have a prio" | ||
| 385 | " setting default to 8\n",ctl->DP); | ||
| 386 | q->prio=8; | ||
| 387 | } | ||
| 388 | } else { | ||
| 389 | q->prio=ctl->prio; | ||
| 390 | } | ||
| 391 | } else { | ||
| 392 | q->prio=8; | ||
| 393 | } | 414 | } |
| 394 | 415 | ||
| 395 | 416 | q = table->tab[dp]; | |
| 396 | q->DP=ctl->DP; | 417 | q->DP = dp; |
| 397 | q->Wlog = ctl->Wlog; | 418 | q->prio = prio; |
| 398 | q->Plog = ctl->Plog; | ||
| 399 | q->limit = ctl->limit; | 419 | q->limit = ctl->limit; |
| 400 | q->Scell_log = ctl->Scell_log; | ||
| 401 | q->Rmask = ctl->Plog < 32 ? ((1<<ctl->Plog) - 1) : ~0UL; | ||
| 402 | q->Scell_max = (255<<q->Scell_log); | ||
| 403 | q->qth_min = ctl->qth_min<<ctl->Wlog; | ||
| 404 | q->qth_max = ctl->qth_max<<ctl->Wlog; | ||
| 405 | q->qave=0; | ||
| 406 | q->backlog=0; | ||
| 407 | q->qcount = -1; | ||
| 408 | q->other=0; | ||
| 409 | q->forced=0; | ||
| 410 | q->pdrop=0; | ||
| 411 | q->early=0; | ||
| 412 | |||
| 413 | PSCHED_SET_PASTPERFECT(q->qidlestart); | ||
| 414 | memcpy(q->Stab, RTA_DATA(tb[TCA_GRED_STAB-1]), 256); | ||
| 415 | |||
| 416 | if ( table->initd && table->grio) { | ||
| 417 | /* this looks ugly but it's not in the fast path */ | ||
| 418 | for (i=0;i<table->DPs;i++) { | ||
| 419 | if ((!table->tab[i]) || (i==q->DP) ) | ||
| 420 | continue; | ||
| 421 | if (table->tab[i]->prio == q->prio ){ | ||
| 422 | /* WRED mode detected */ | ||
| 423 | table->eqp=1; | ||
| 424 | break; | ||
| 425 | } | ||
| 426 | } | ||
| 427 | } | ||
| 428 | 420 | ||
| 429 | if (!table->initd) { | 421 | if (q->backlog == 0) |
| 430 | table->initd=1; | 422 | red_end_of_idle_period(&q->parms); |
| 431 | /* | ||
| 432 | the first entry also goes into the default until | ||
| 433 | over-written | ||
| 434 | */ | ||
| 435 | |||
| 436 | if (table->tab[table->def] == NULL) { | ||
| 437 | table->tab[table->def]= | ||
| 438 | kmalloc(sizeof(struct gred_sched_data), GFP_KERNEL); | ||
| 439 | if (NULL == table->tab[table->def]) | ||
| 440 | return -ENOMEM; | ||
| 441 | |||
| 442 | memset(table->tab[table->def], 0, | ||
| 443 | (sizeof(struct gred_sched_data))); | ||
| 444 | } | ||
| 445 | q= table->tab[table->def]; | ||
| 446 | q->DP=table->def; | ||
| 447 | q->Wlog = ctl->Wlog; | ||
| 448 | q->Plog = ctl->Plog; | ||
| 449 | q->limit = ctl->limit; | ||
| 450 | q->Scell_log = ctl->Scell_log; | ||
| 451 | q->Rmask = ctl->Plog < 32 ? ((1<<ctl->Plog) - 1) : ~0UL; | ||
| 452 | q->Scell_max = (255<<q->Scell_log); | ||
| 453 | q->qth_min = ctl->qth_min<<ctl->Wlog; | ||
| 454 | q->qth_max = ctl->qth_max<<ctl->Wlog; | ||
| 455 | |||
| 456 | if (table->grio) | ||
| 457 | q->prio=table->tab[ctl->DP]->prio; | ||
| 458 | else | ||
| 459 | q->prio=8; | ||
| 460 | |||
| 461 | q->qcount = -1; | ||
| 462 | PSCHED_SET_PASTPERFECT(q->qidlestart); | ||
| 463 | memcpy(q->Stab, RTA_DATA(tb[TCA_GRED_STAB-1]), 256); | ||
| 464 | } | ||
| 465 | return 0; | ||
| 466 | 423 | ||
| 424 | red_set_parms(&q->parms, | ||
| 425 | ctl->qth_min, ctl->qth_max, ctl->Wlog, ctl->Plog, | ||
| 426 | ctl->Scell_log, stab); | ||
| 427 | |||
| 428 | return 0; | ||
| 467 | } | 429 | } |
| 468 | 430 | ||
| 469 | static int gred_init(struct Qdisc *sch, struct rtattr *opt) | 431 | static int gred_change(struct Qdisc *sch, struct rtattr *opt) |
| 470 | { | 432 | { |
| 471 | struct gred_sched *table = qdisc_priv(sch); | 433 | struct gred_sched *table = qdisc_priv(sch); |
| 472 | struct tc_gred_sopt *sopt; | 434 | struct tc_gred_qopt *ctl; |
| 473 | struct rtattr *tb[TCA_GRED_STAB]; | 435 | struct rtattr *tb[TCA_GRED_MAX]; |
| 474 | struct rtattr *tb2[TCA_GRED_DPS]; | 436 | int err = -EINVAL, prio = GRED_DEF_PRIO; |
| 437 | u8 *stab; | ||
| 475 | 438 | ||
| 476 | if (opt == NULL || rtattr_parse_nested(tb, TCA_GRED_STAB, opt)) | 439 | if (opt == NULL || rtattr_parse_nested(tb, TCA_GRED_MAX, opt)) |
| 477 | return -EINVAL; | 440 | return -EINVAL; |
| 478 | 441 | ||
| 479 | if (tb[TCA_GRED_PARMS-1] == 0 && tb[TCA_GRED_STAB-1] == 0) { | 442 | if (tb[TCA_GRED_PARMS-1] == NULL && tb[TCA_GRED_STAB-1] == NULL) |
| 480 | rtattr_parse_nested(tb2, TCA_GRED_DPS, opt); | 443 | return gred_change_table_def(sch, opt); |
| 444 | |||
| 445 | if (tb[TCA_GRED_PARMS-1] == NULL || | ||
| 446 | RTA_PAYLOAD(tb[TCA_GRED_PARMS-1]) < sizeof(*ctl) || | ||
| 447 | tb[TCA_GRED_STAB-1] == NULL || | ||
| 448 | RTA_PAYLOAD(tb[TCA_GRED_STAB-1]) < 256) | ||
| 449 | return -EINVAL; | ||
| 450 | |||
| 451 | ctl = RTA_DATA(tb[TCA_GRED_PARMS-1]); | ||
| 452 | stab = RTA_DATA(tb[TCA_GRED_STAB-1]); | ||
| 453 | |||
| 454 | if (ctl->DP >= table->DPs) | ||
| 455 | goto errout; | ||
| 481 | 456 | ||
| 482 | if (tb2[TCA_GRED_DPS-1] == 0) | 457 | if (gred_rio_mode(table)) { |
| 483 | return -EINVAL; | 458 | if (ctl->prio == 0) { |
| 459 | int def_prio = GRED_DEF_PRIO; | ||
| 484 | 460 | ||
| 485 | sopt = RTA_DATA(tb2[TCA_GRED_DPS-1]); | 461 | if (table->tab[table->def]) |
| 486 | table->DPs=sopt->DPs; | 462 | def_prio = table->tab[table->def]->prio; |
| 487 | table->def=sopt->def_DP; | 463 | |
| 488 | table->grio=sopt->grio; | 464 | printk(KERN_DEBUG "GRED: DP %u does not have a prio " |
| 489 | table->initd=0; | 465 | "setting default to %d\n", ctl->DP, def_prio); |
| 490 | return 0; | 466 | |
| 467 | prio = def_prio; | ||
| 468 | } else | ||
| 469 | prio = ctl->prio; | ||
| 470 | } | ||
| 471 | |||
| 472 | sch_tree_lock(sch); | ||
| 473 | |||
| 474 | err = gred_change_vq(sch, ctl->DP, ctl, prio, stab); | ||
| 475 | if (err < 0) | ||
| 476 | goto errout_locked; | ||
| 477 | |||
| 478 | if (gred_rio_mode(table)) { | ||
| 479 | gred_disable_wred_mode(table); | ||
| 480 | if (gred_wred_mode_check(sch)) | ||
| 481 | gred_enable_wred_mode(table); | ||
| 491 | } | 482 | } |
| 492 | 483 | ||
| 493 | DPRINTK("\n GRED_INIT error!\n"); | 484 | err = 0; |
| 494 | return -EINVAL; | 485 | |
| 486 | errout_locked: | ||
| 487 | sch_tree_unlock(sch); | ||
| 488 | errout: | ||
| 489 | return err; | ||
| 495 | } | 490 | } |
| 496 | 491 | ||
| 497 | static int gred_dump(struct Qdisc *sch, struct sk_buff *skb) | 492 | static int gred_init(struct Qdisc *sch, struct rtattr *opt) |
| 498 | { | 493 | { |
| 499 | unsigned long qave; | 494 | struct rtattr *tb[TCA_GRED_MAX]; |
| 500 | struct rtattr *rta; | ||
| 501 | struct tc_gred_qopt *opt = NULL ; | ||
| 502 | struct tc_gred_qopt *dst; | ||
| 503 | struct gred_sched *table = qdisc_priv(sch); | ||
| 504 | struct gred_sched_data *q; | ||
| 505 | int i; | ||
| 506 | unsigned char *b = skb->tail; | ||
| 507 | 495 | ||
| 508 | rta = (struct rtattr*)b; | 496 | if (opt == NULL || rtattr_parse_nested(tb, TCA_GRED_MAX, opt)) |
| 509 | RTA_PUT(skb, TCA_OPTIONS, 0, NULL); | 497 | return -EINVAL; |
| 510 | 498 | ||
| 511 | opt=kmalloc(sizeof(struct tc_gred_qopt)*MAX_DPs, GFP_KERNEL); | 499 | if (tb[TCA_GRED_PARMS-1] || tb[TCA_GRED_STAB-1]) |
| 500 | return -EINVAL; | ||
| 512 | 501 | ||
| 513 | if (opt == NULL) { | 502 | return gred_change_table_def(sch, tb[TCA_GRED_DPS-1]); |
| 514 | DPRINTK("gred_dump:failed to malloc for %Zd\n", | 503 | } |
| 515 | sizeof(struct tc_gred_qopt)*MAX_DPs); | ||
| 516 | goto rtattr_failure; | ||
| 517 | } | ||
| 518 | 504 | ||
| 519 | memset(opt, 0, (sizeof(struct tc_gred_qopt))*table->DPs); | 505 | static int gred_dump(struct Qdisc *sch, struct sk_buff *skb) |
| 506 | { | ||
| 507 | struct gred_sched *table = qdisc_priv(sch); | ||
| 508 | struct rtattr *parms, *opts = NULL; | ||
| 509 | int i; | ||
| 510 | struct tc_gred_sopt sopt = { | ||
| 511 | .DPs = table->DPs, | ||
| 512 | .def_DP = table->def, | ||
| 513 | .grio = gred_rio_mode(table), | ||
| 514 | .flags = table->red_flags, | ||
| 515 | }; | ||
| 520 | 516 | ||
| 521 | if (!table->initd) { | 517 | opts = RTA_NEST(skb, TCA_OPTIONS); |
| 522 | DPRINTK("NO GRED Queues setup!\n"); | 518 | RTA_PUT(skb, TCA_GRED_DPS, sizeof(sopt), &sopt); |
| 523 | } | 519 | parms = RTA_NEST(skb, TCA_GRED_PARMS); |
| 520 | |||
| 521 | for (i = 0; i < MAX_DPs; i++) { | ||
| 522 | struct gred_sched_data *q = table->tab[i]; | ||
| 523 | struct tc_gred_qopt opt; | ||
| 524 | 524 | ||
| 525 | for (i=0;i<MAX_DPs;i++) { | 525 | memset(&opt, 0, sizeof(opt)); |
| 526 | dst= &opt[i]; | ||
| 527 | q= table->tab[i]; | ||
| 528 | 526 | ||
| 529 | if (!q) { | 527 | if (!q) { |
| 530 | /* hack -- fix at some point with proper message | 528 | /* hack -- fix at some point with proper message |
| 531 | This is how we indicate to tc that there is no VQ | 529 | This is how we indicate to tc that there is no VQ |
| 532 | at this DP */ | 530 | at this DP */ |
| 533 | 531 | ||
| 534 | dst->DP=MAX_DPs+i; | 532 | opt.DP = MAX_DPs + i; |
| 535 | continue; | 533 | goto append_opt; |
| 536 | } | 534 | } |
| 537 | 535 | ||
| 538 | dst->limit=q->limit; | 536 | opt.limit = q->limit; |
| 539 | dst->qth_min=q->qth_min>>q->Wlog; | 537 | opt.DP = q->DP; |
| 540 | dst->qth_max=q->qth_max>>q->Wlog; | 538 | opt.backlog = q->backlog; |
| 541 | dst->DP=q->DP; | 539 | opt.prio = q->prio; |
| 542 | dst->backlog=q->backlog; | 540 | opt.qth_min = q->parms.qth_min >> q->parms.Wlog; |
| 543 | if (q->qave) { | 541 | opt.qth_max = q->parms.qth_max >> q->parms.Wlog; |
| 544 | if (table->eqp && table->grio) { | 542 | opt.Wlog = q->parms.Wlog; |
| 545 | q->qidlestart=table->tab[table->def]->qidlestart; | 543 | opt.Plog = q->parms.Plog; |
| 546 | q->qave=table->tab[table->def]->qave; | 544 | opt.Scell_log = q->parms.Scell_log; |
| 547 | } | 545 | opt.other = q->stats.other; |
| 548 | if (!PSCHED_IS_PASTPERFECT(q->qidlestart)) { | 546 | opt.early = q->stats.prob_drop; |
| 549 | long idle; | 547 | opt.forced = q->stats.forced_drop; |
| 550 | psched_time_t now; | 548 | opt.pdrop = q->stats.pdrop; |
| 551 | PSCHED_GET_TIME(now); | 549 | opt.packets = q->packetsin; |
| 552 | idle = PSCHED_TDIFF_SAFE(now, q->qidlestart, q->Scell_max); | 550 | opt.bytesin = q->bytesin; |
| 553 | qave = q->qave >> q->Stab[(idle>>q->Scell_log)&0xFF]; | 551 | |
| 554 | dst->qave = qave >> q->Wlog; | 552 | if (gred_wred_mode(table)) { |
| 555 | 553 | q->parms.qidlestart = | |
| 556 | } else { | 554 | table->tab[table->def]->parms.qidlestart; |
| 557 | dst->qave = q->qave >> q->Wlog; | 555 | q->parms.qavg = table->tab[table->def]->parms.qavg; |
| 558 | } | ||
| 559 | } else { | ||
| 560 | dst->qave = 0; | ||
| 561 | } | 556 | } |
| 562 | 557 | ||
| 563 | 558 | opt.qave = red_calc_qavg(&q->parms, q->parms.qavg); | |
| 564 | dst->Wlog = q->Wlog; | 559 | |
| 565 | dst->Plog = q->Plog; | 560 | append_opt: |
| 566 | dst->Scell_log = q->Scell_log; | 561 | RTA_APPEND(skb, sizeof(opt), &opt); |
| 567 | dst->other = q->other; | ||
| 568 | dst->forced = q->forced; | ||
| 569 | dst->early = q->early; | ||
| 570 | dst->pdrop = q->pdrop; | ||
| 571 | dst->prio = q->prio; | ||
| 572 | dst->packets=q->packetsin; | ||
| 573 | dst->bytesin=q->bytesin; | ||
| 574 | } | 562 | } |
| 575 | 563 | ||
| 576 | RTA_PUT(skb, TCA_GRED_PARMS, sizeof(struct tc_gred_qopt)*MAX_DPs, opt); | 564 | RTA_NEST_END(skb, parms); |
| 577 | rta->rta_len = skb->tail - b; | ||
| 578 | 565 | ||
| 579 | kfree(opt); | 566 | return RTA_NEST_END(skb, opts); |
| 580 | return skb->len; | ||
| 581 | 567 | ||
| 582 | rtattr_failure: | 568 | rtattr_failure: |
| 583 | if (opt) | 569 | return RTA_NEST_CANCEL(skb, opts); |
| 584 | kfree(opt); | ||
| 585 | DPRINTK("gred_dump: FAILURE!!!!\n"); | ||
| 586 | |||
| 587 | /* also free the opt struct here */ | ||
| 588 | skb_trim(skb, b - skb->data); | ||
| 589 | return -1; | ||
| 590 | } | 570 | } |
| 591 | 571 | ||
| 592 | static void gred_destroy(struct Qdisc *sch) | 572 | static void gred_destroy(struct Qdisc *sch) |
| @@ -594,15 +574,13 @@ static void gred_destroy(struct Qdisc *sch) | |||
| 594 | struct gred_sched *table = qdisc_priv(sch); | 574 | struct gred_sched *table = qdisc_priv(sch); |
| 595 | int i; | 575 | int i; |
| 596 | 576 | ||
| 597 | for (i = 0;i < table->DPs; i++) { | 577 | for (i = 0; i < table->DPs; i++) { |
| 598 | if (table->tab[i]) | 578 | if (table->tab[i]) |
| 599 | kfree(table->tab[i]); | 579 | gred_destroy_vq(table->tab[i]); |
| 600 | } | 580 | } |
| 601 | } | 581 | } |
| 602 | 582 | ||
| 603 | static struct Qdisc_ops gred_qdisc_ops = { | 583 | static struct Qdisc_ops gred_qdisc_ops = { |
| 604 | .next = NULL, | ||
| 605 | .cl_ops = NULL, | ||
| 606 | .id = "gred", | 584 | .id = "gred", |
| 607 | .priv_size = sizeof(struct gred_sched), | 585 | .priv_size = sizeof(struct gred_sched), |
| 608 | .enqueue = gred_enqueue, | 586 | .enqueue = gred_enqueue, |
| @@ -621,10 +599,13 @@ static int __init gred_module_init(void) | |||
| 621 | { | 599 | { |
| 622 | return register_qdisc(&gred_qdisc_ops); | 600 | return register_qdisc(&gred_qdisc_ops); |
| 623 | } | 601 | } |
| 624 | static void __exit gred_module_exit(void) | 602 | |
| 603 | static void __exit gred_module_exit(void) | ||
| 625 | { | 604 | { |
| 626 | unregister_qdisc(&gred_qdisc_ops); | 605 | unregister_qdisc(&gred_qdisc_ops); |
| 627 | } | 606 | } |
| 607 | |||
| 628 | module_init(gred_module_init) | 608 | module_init(gred_module_init) |
| 629 | module_exit(gred_module_exit) | 609 | module_exit(gred_module_exit) |
| 610 | |||
| 630 | MODULE_LICENSE("GPL"); | 611 | MODULE_LICENSE("GPL"); |
diff --git a/net/sched/sch_red.c b/net/sched/sch_red.c index 7845d045eec4..dccfa44c2d71 100644 --- a/net/sched/sch_red.c +++ b/net/sched/sch_red.c | |||
| @@ -9,76 +9,23 @@ | |||
| 9 | * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru> | 9 | * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru> |
| 10 | * | 10 | * |
| 11 | * Changes: | 11 | * Changes: |
| 12 | * J Hadi Salim <hadi@nortel.com> 980914: computation fixes | 12 | * J Hadi Salim 980914: computation fixes |
| 13 | * Alexey Makarenko <makar@phoenix.kharkov.ua> 990814: qave on idle link was calculated incorrectly. | 13 | * Alexey Makarenko <makar@phoenix.kharkov.ua> 990814: qave on idle link was calculated incorrectly. |
| 14 | * J Hadi Salim <hadi@nortelnetworks.com> 980816: ECN support | 14 | * J Hadi Salim 980816: ECN support |
| 15 | */ | 15 | */ |
| 16 | 16 | ||
| 17 | #include <linux/config.h> | 17 | #include <linux/config.h> |
| 18 | #include <linux/module.h> | 18 | #include <linux/module.h> |
| 19 | #include <asm/uaccess.h> | ||
| 20 | #include <asm/system.h> | ||
| 21 | #include <linux/bitops.h> | ||
| 22 | #include <linux/types.h> | 19 | #include <linux/types.h> |
| 23 | #include <linux/kernel.h> | 20 | #include <linux/kernel.h> |
| 24 | #include <linux/sched.h> | ||
| 25 | #include <linux/string.h> | ||
| 26 | #include <linux/mm.h> | ||
| 27 | #include <linux/socket.h> | ||
| 28 | #include <linux/sockios.h> | ||
| 29 | #include <linux/in.h> | ||
| 30 | #include <linux/errno.h> | ||
| 31 | #include <linux/interrupt.h> | ||
| 32 | #include <linux/if_ether.h> | ||
| 33 | #include <linux/inet.h> | ||
| 34 | #include <linux/netdevice.h> | 21 | #include <linux/netdevice.h> |
| 35 | #include <linux/etherdevice.h> | ||
| 36 | #include <linux/notifier.h> | ||
| 37 | #include <net/ip.h> | ||
| 38 | #include <net/route.h> | ||
| 39 | #include <linux/skbuff.h> | 22 | #include <linux/skbuff.h> |
| 40 | #include <net/sock.h> | ||
| 41 | #include <net/pkt_sched.h> | 23 | #include <net/pkt_sched.h> |
| 42 | #include <net/inet_ecn.h> | 24 | #include <net/inet_ecn.h> |
| 43 | #include <net/dsfield.h> | 25 | #include <net/red.h> |
| 44 | 26 | ||
| 45 | 27 | ||
| 46 | /* Random Early Detection (RED) algorithm. | 28 | /* Parameters, settable by user: |
| 47 | ======================================= | ||
| 48 | |||
| 49 | Source: Sally Floyd and Van Jacobson, "Random Early Detection Gateways | ||
| 50 | for Congestion Avoidance", 1993, IEEE/ACM Transactions on Networking. | ||
| 51 | |||
| 52 | This file codes a "divisionless" version of RED algorithm | ||
| 53 | as written down in Fig.17 of the paper. | ||
| 54 | |||
| 55 | Short description. | ||
| 56 | ------------------ | ||
| 57 | |||
| 58 | When a new packet arrives we calculate the average queue length: | ||
| 59 | |||
| 60 | avg = (1-W)*avg + W*current_queue_len, | ||
| 61 | |||
| 62 | W is the filter time constant (chosen as 2^(-Wlog)), it controls | ||
| 63 | the inertia of the algorithm. To allow larger bursts, W should be | ||
| 64 | decreased. | ||
| 65 | |||
| 66 | if (avg > th_max) -> packet marked (dropped). | ||
| 67 | if (avg < th_min) -> packet passes. | ||
| 68 | if (th_min < avg < th_max) we calculate probability: | ||
| 69 | |||
| 70 | Pb = max_P * (avg - th_min)/(th_max-th_min) | ||
| 71 | |||
| 72 | and mark (drop) packet with this probability. | ||
| 73 | Pb changes from 0 (at avg==th_min) to max_P (avg==th_max). | ||
| 74 | max_P should be small (not 1), usually 0.01..0.02 is good value. | ||
| 75 | |||
| 76 | max_P is chosen as a number, so that max_P/(th_max-th_min) | ||
| 77 | is a negative power of two in order arithmetics to contain | ||
| 78 | only shifts. | ||
| 79 | |||
| 80 | |||
| 81 | Parameters, settable by user: | ||
| 82 | ----------------------------- | 29 | ----------------------------- |
| 83 | 30 | ||
| 84 | limit - bytes (must be > qth_max + burst) | 31 | limit - bytes (must be > qth_max + burst) |
| @@ -89,243 +36,93 @@ Short description. | |||
| 89 | arbitrarily high (well, less than ram size) | 36 | arbitrarily high (well, less than ram size) |
| 90 | Really, this limit will never be reached | 37 | Really, this limit will never be reached |
| 91 | if RED works correctly. | 38 | if RED works correctly. |
| 92 | |||
| 93 | qth_min - bytes (should be < qth_max/2) | ||
| 94 | qth_max - bytes (should be at least 2*qth_min and less limit) | ||
| 95 | Wlog - bits (<32) log(1/W). | ||
| 96 | Plog - bits (<32) | ||
| 97 | |||
| 98 | Plog is related to max_P by formula: | ||
| 99 | |||
| 100 | max_P = (qth_max-qth_min)/2^Plog; | ||
| 101 | |||
| 102 | F.e. if qth_max=128K and qth_min=32K, then Plog=22 | ||
| 103 | corresponds to max_P=0.02 | ||
| 104 | |||
| 105 | Scell_log | ||
| 106 | Stab | ||
| 107 | |||
| 108 | Lookup table for log((1-W)^(t/t_ave). | ||
| 109 | |||
| 110 | |||
| 111 | NOTES: | ||
| 112 | |||
| 113 | Upper bound on W. | ||
| 114 | ----------------- | ||
| 115 | |||
| 116 | If you want to allow bursts of L packets of size S, | ||
| 117 | you should choose W: | ||
| 118 | |||
| 119 | L + 1 - th_min/S < (1-(1-W)^L)/W | ||
| 120 | |||
| 121 | th_min/S = 32 th_min/S = 4 | ||
| 122 | |||
| 123 | log(W) L | ||
| 124 | -1 33 | ||
| 125 | -2 35 | ||
| 126 | -3 39 | ||
| 127 | -4 46 | ||
| 128 | -5 57 | ||
| 129 | -6 75 | ||
| 130 | -7 101 | ||
| 131 | -8 135 | ||
| 132 | -9 190 | ||
| 133 | etc. | ||
| 134 | */ | 39 | */ |
| 135 | 40 | ||
| 136 | struct red_sched_data | 41 | struct red_sched_data |
| 137 | { | 42 | { |
| 138 | /* Parameters */ | 43 | u32 limit; /* HARD maximal queue length */ |
| 139 | u32 limit; /* HARD maximal queue length */ | 44 | unsigned char flags; |
| 140 | u32 qth_min; /* Min average length threshold: A scaled */ | 45 | struct red_parms parms; |
| 141 | u32 qth_max; /* Max average length threshold: A scaled */ | 46 | struct red_stats stats; |
| 142 | u32 Rmask; | ||
| 143 | u32 Scell_max; | ||
| 144 | unsigned char flags; | ||
| 145 | char Wlog; /* log(W) */ | ||
| 146 | char Plog; /* random number bits */ | ||
| 147 | char Scell_log; | ||
| 148 | u8 Stab[256]; | ||
| 149 | |||
| 150 | /* Variables */ | ||
| 151 | unsigned long qave; /* Average queue length: A scaled */ | ||
| 152 | int qcount; /* Packets since last random number generation */ | ||
| 153 | u32 qR; /* Cached random number */ | ||
| 154 | |||
| 155 | psched_time_t qidlestart; /* Start of idle period */ | ||
| 156 | struct tc_red_xstats st; | ||
| 157 | }; | 47 | }; |
| 158 | 48 | ||
| 159 | static int red_ecn_mark(struct sk_buff *skb) | 49 | static inline int red_use_ecn(struct red_sched_data *q) |
| 160 | { | 50 | { |
| 161 | if (skb->nh.raw + 20 > skb->tail) | 51 | return q->flags & TC_RED_ECN; |
| 162 | return 0; | ||
| 163 | |||
| 164 | switch (skb->protocol) { | ||
| 165 | case __constant_htons(ETH_P_IP): | ||
| 166 | if (INET_ECN_is_not_ect(skb->nh.iph->tos)) | ||
| 167 | return 0; | ||
| 168 | IP_ECN_set_ce(skb->nh.iph); | ||
| 169 | return 1; | ||
| 170 | case __constant_htons(ETH_P_IPV6): | ||
| 171 | if (INET_ECN_is_not_ect(ipv6_get_dsfield(skb->nh.ipv6h))) | ||
| 172 | return 0; | ||
| 173 | IP6_ECN_set_ce(skb->nh.ipv6h); | ||
| 174 | return 1; | ||
| 175 | default: | ||
| 176 | return 0; | ||
| 177 | } | ||
| 178 | } | 52 | } |
| 179 | 53 | ||
| 180 | static int | 54 | static inline int red_use_harddrop(struct red_sched_data *q) |
| 181 | red_enqueue(struct sk_buff *skb, struct Qdisc* sch) | 55 | { |
| 56 | return q->flags & TC_RED_HARDDROP; | ||
| 57 | } | ||
| 58 | |||
| 59 | static int red_enqueue(struct sk_buff *skb, struct Qdisc* sch) | ||
| 182 | { | 60 | { |
| 183 | struct red_sched_data *q = qdisc_priv(sch); | 61 | struct red_sched_data *q = qdisc_priv(sch); |
| 184 | 62 | ||
| 185 | psched_time_t now; | 63 | q->parms.qavg = red_calc_qavg(&q->parms, sch->qstats.backlog); |
| 186 | 64 | ||
| 187 | if (!PSCHED_IS_PASTPERFECT(q->qidlestart)) { | 65 | if (red_is_idling(&q->parms)) |
| 188 | long us_idle; | 66 | red_end_of_idle_period(&q->parms); |
| 189 | int shift; | ||
| 190 | 67 | ||
| 191 | PSCHED_GET_TIME(now); | 68 | switch (red_action(&q->parms, q->parms.qavg)) { |
| 192 | us_idle = PSCHED_TDIFF_SAFE(now, q->qidlestart, q->Scell_max); | 69 | case RED_DONT_MARK: |
| 193 | PSCHED_SET_PASTPERFECT(q->qidlestart); | 70 | break; |
| 194 | 71 | ||
| 195 | /* | 72 | case RED_PROB_MARK: |
| 196 | The problem: ideally, average length queue recalcultion should | 73 | sch->qstats.overlimits++; |
| 197 | be done over constant clock intervals. This is too expensive, so that | 74 | if (!red_use_ecn(q) || !INET_ECN_set_ce(skb)) { |
| 198 | the calculation is driven by outgoing packets. | 75 | q->stats.prob_drop++; |
| 199 | When the queue is idle we have to model this clock by hand. | 76 | goto congestion_drop; |
| 200 | 77 | } | |
| 201 | SF+VJ proposed to "generate" m = idletime/(average_pkt_size/bandwidth) | ||
| 202 | dummy packets as a burst after idle time, i.e. | ||
| 203 | |||
| 204 | q->qave *= (1-W)^m | ||
| 205 | |||
| 206 | This is an apparently overcomplicated solution (f.e. we have to precompute | ||
| 207 | a table to make this calculation in reasonable time) | ||
| 208 | I believe that a simpler model may be used here, | ||
| 209 | but it is field for experiments. | ||
| 210 | */ | ||
| 211 | shift = q->Stab[us_idle>>q->Scell_log]; | ||
| 212 | |||
| 213 | if (shift) { | ||
| 214 | q->qave >>= shift; | ||
| 215 | } else { | ||
| 216 | /* Approximate initial part of exponent | ||
| 217 | with linear function: | ||
| 218 | (1-W)^m ~= 1-mW + ... | ||
| 219 | |||
| 220 | Seems, it is the best solution to | ||
| 221 | problem of too coarce exponent tabulation. | ||
| 222 | */ | ||
| 223 | |||
| 224 | us_idle = (q->qave * us_idle)>>q->Scell_log; | ||
| 225 | if (us_idle < q->qave/2) | ||
| 226 | q->qave -= us_idle; | ||
| 227 | else | ||
| 228 | q->qave >>= 1; | ||
| 229 | } | ||
| 230 | } else { | ||
| 231 | q->qave += sch->qstats.backlog - (q->qave >> q->Wlog); | ||
| 232 | /* NOTE: | ||
| 233 | q->qave is fixed point number with point at Wlog. | ||
| 234 | The formulae above is equvalent to floating point | ||
| 235 | version: | ||
| 236 | |||
| 237 | qave = qave*(1-W) + sch->qstats.backlog*W; | ||
| 238 | --ANK (980924) | ||
| 239 | */ | ||
| 240 | } | ||
| 241 | 78 | ||
| 242 | if (q->qave < q->qth_min) { | 79 | q->stats.prob_mark++; |
| 243 | q->qcount = -1; | 80 | break; |
| 244 | enqueue: | 81 | |
| 245 | if (sch->qstats.backlog + skb->len <= q->limit) { | 82 | case RED_HARD_MARK: |
| 246 | __skb_queue_tail(&sch->q, skb); | 83 | sch->qstats.overlimits++; |
| 247 | sch->qstats.backlog += skb->len; | 84 | if (red_use_harddrop(q) || !red_use_ecn(q) || |
| 248 | sch->bstats.bytes += skb->len; | 85 | !INET_ECN_set_ce(skb)) { |
| 249 | sch->bstats.packets++; | 86 | q->stats.forced_drop++; |
| 250 | return NET_XMIT_SUCCESS; | 87 | goto congestion_drop; |
| 251 | } else { | 88 | } |
| 252 | q->st.pdrop++; | ||
| 253 | } | ||
| 254 | kfree_skb(skb); | ||
| 255 | sch->qstats.drops++; | ||
| 256 | return NET_XMIT_DROP; | ||
| 257 | } | ||
| 258 | if (q->qave >= q->qth_max) { | ||
| 259 | q->qcount = -1; | ||
| 260 | sch->qstats.overlimits++; | ||
| 261 | mark: | ||
| 262 | if (!(q->flags&TC_RED_ECN) || !red_ecn_mark(skb)) { | ||
| 263 | q->st.early++; | ||
| 264 | goto drop; | ||
| 265 | } | ||
| 266 | q->st.marked++; | ||
| 267 | goto enqueue; | ||
| 268 | } | ||
| 269 | 89 | ||
| 270 | if (++q->qcount) { | 90 | q->stats.forced_mark++; |
| 271 | /* The formula used below causes questions. | 91 | break; |
| 272 | |||
| 273 | OK. qR is random number in the interval 0..Rmask | ||
| 274 | i.e. 0..(2^Plog). If we used floating point | ||
| 275 | arithmetics, it would be: (2^Plog)*rnd_num, | ||
| 276 | where rnd_num is less 1. | ||
| 277 | |||
| 278 | Taking into account, that qave have fixed | ||
| 279 | point at Wlog, and Plog is related to max_P by | ||
| 280 | max_P = (qth_max-qth_min)/2^Plog; two lines | ||
| 281 | below have the following floating point equivalent: | ||
| 282 | |||
| 283 | max_P*(qave - qth_min)/(qth_max-qth_min) < rnd/qcount | ||
| 284 | |||
| 285 | Any questions? --ANK (980924) | ||
| 286 | */ | ||
| 287 | if (((q->qave - q->qth_min)>>q->Wlog)*q->qcount < q->qR) | ||
| 288 | goto enqueue; | ||
| 289 | q->qcount = 0; | ||
| 290 | q->qR = net_random()&q->Rmask; | ||
| 291 | sch->qstats.overlimits++; | ||
| 292 | goto mark; | ||
| 293 | } | 92 | } |
| 294 | q->qR = net_random()&q->Rmask; | ||
| 295 | goto enqueue; | ||
| 296 | 93 | ||
| 297 | drop: | 94 | if (sch->qstats.backlog + skb->len <= q->limit) |
| 298 | kfree_skb(skb); | 95 | return qdisc_enqueue_tail(skb, sch); |
| 299 | sch->qstats.drops++; | 96 | |
| 97 | q->stats.pdrop++; | ||
| 98 | return qdisc_drop(skb, sch); | ||
| 99 | |||
| 100 | congestion_drop: | ||
| 101 | qdisc_drop(skb, sch); | ||
| 300 | return NET_XMIT_CN; | 102 | return NET_XMIT_CN; |
| 301 | } | 103 | } |
| 302 | 104 | ||
| 303 | static int | 105 | static int red_requeue(struct sk_buff *skb, struct Qdisc* sch) |
| 304 | red_requeue(struct sk_buff *skb, struct Qdisc* sch) | ||
| 305 | { | 106 | { |
| 306 | struct red_sched_data *q = qdisc_priv(sch); | 107 | struct red_sched_data *q = qdisc_priv(sch); |
| 307 | 108 | ||
| 308 | PSCHED_SET_PASTPERFECT(q->qidlestart); | 109 | if (red_is_idling(&q->parms)) |
| 110 | red_end_of_idle_period(&q->parms); | ||
| 309 | 111 | ||
| 310 | __skb_queue_head(&sch->q, skb); | 112 | return qdisc_requeue(skb, sch); |
| 311 | sch->qstats.backlog += skb->len; | ||
| 312 | sch->qstats.requeues++; | ||
| 313 | return 0; | ||
| 314 | } | 113 | } |
| 315 | 114 | ||
| 316 | static struct sk_buff * | 115 | static struct sk_buff * red_dequeue(struct Qdisc* sch) |
| 317 | red_dequeue(struct Qdisc* sch) | ||
| 318 | { | 116 | { |
| 319 | struct sk_buff *skb; | 117 | struct sk_buff *skb; |
| 320 | struct red_sched_data *q = qdisc_priv(sch); | 118 | struct red_sched_data *q = qdisc_priv(sch); |
| 321 | 119 | ||
| 322 | skb = __skb_dequeue(&sch->q); | 120 | skb = qdisc_dequeue_head(sch); |
| 323 | if (skb) { | 121 | |
| 324 | sch->qstats.backlog -= skb->len; | 122 | if (skb == NULL && !red_is_idling(&q->parms)) |
| 325 | return skb; | 123 | red_start_of_idle_period(&q->parms); |
| 326 | } | 124 | |
| 327 | PSCHED_GET_TIME(q->qidlestart); | 125 | return skb; |
| 328 | return NULL; | ||
| 329 | } | 126 | } |
| 330 | 127 | ||
| 331 | static unsigned int red_drop(struct Qdisc* sch) | 128 | static unsigned int red_drop(struct Qdisc* sch) |
| @@ -333,16 +130,17 @@ static unsigned int red_drop(struct Qdisc* sch) | |||
| 333 | struct sk_buff *skb; | 130 | struct sk_buff *skb; |
| 334 | struct red_sched_data *q = qdisc_priv(sch); | 131 | struct red_sched_data *q = qdisc_priv(sch); |
| 335 | 132 | ||
| 336 | skb = __skb_dequeue_tail(&sch->q); | 133 | skb = qdisc_dequeue_tail(sch); |
| 337 | if (skb) { | 134 | if (skb) { |
| 338 | unsigned int len = skb->len; | 135 | unsigned int len = skb->len; |
| 339 | sch->qstats.backlog -= len; | 136 | q->stats.other++; |
| 340 | sch->qstats.drops++; | 137 | qdisc_drop(skb, sch); |
| 341 | q->st.other++; | ||
| 342 | kfree_skb(skb); | ||
| 343 | return len; | 138 | return len; |
| 344 | } | 139 | } |
| 345 | PSCHED_GET_TIME(q->qidlestart); | 140 | |
| 141 | if (!red_is_idling(&q->parms)) | ||
| 142 | red_start_of_idle_period(&q->parms); | ||
| 143 | |||
| 346 | return 0; | 144 | return 0; |
| 347 | } | 145 | } |
| 348 | 146 | ||
| @@ -350,43 +148,38 @@ static void red_reset(struct Qdisc* sch) | |||
| 350 | { | 148 | { |
| 351 | struct red_sched_data *q = qdisc_priv(sch); | 149 | struct red_sched_data *q = qdisc_priv(sch); |
| 352 | 150 | ||
| 353 | __skb_queue_purge(&sch->q); | 151 | qdisc_reset_queue(sch); |
| 354 | sch->qstats.backlog = 0; | 152 | red_restart(&q->parms); |
| 355 | PSCHED_SET_PASTPERFECT(q->qidlestart); | ||
| 356 | q->qave = 0; | ||
| 357 | q->qcount = -1; | ||
| 358 | } | 153 | } |
| 359 | 154 | ||
| 360 | static int red_change(struct Qdisc *sch, struct rtattr *opt) | 155 | static int red_change(struct Qdisc *sch, struct rtattr *opt) |
| 361 | { | 156 | { |
| 362 | struct red_sched_data *q = qdisc_priv(sch); | 157 | struct red_sched_data *q = qdisc_priv(sch); |
| 363 | struct rtattr *tb[TCA_RED_STAB]; | 158 | struct rtattr *tb[TCA_RED_MAX]; |
| 364 | struct tc_red_qopt *ctl; | 159 | struct tc_red_qopt *ctl; |
| 365 | 160 | ||
| 366 | if (opt == NULL || | 161 | if (opt == NULL || rtattr_parse_nested(tb, TCA_RED_MAX, opt)) |
| 367 | rtattr_parse_nested(tb, TCA_RED_STAB, opt) || | 162 | return -EINVAL; |
| 368 | tb[TCA_RED_PARMS-1] == 0 || tb[TCA_RED_STAB-1] == 0 || | 163 | |
| 164 | if (tb[TCA_RED_PARMS-1] == NULL || | ||
| 369 | RTA_PAYLOAD(tb[TCA_RED_PARMS-1]) < sizeof(*ctl) || | 165 | RTA_PAYLOAD(tb[TCA_RED_PARMS-1]) < sizeof(*ctl) || |
| 370 | RTA_PAYLOAD(tb[TCA_RED_STAB-1]) < 256) | 166 | tb[TCA_RED_STAB-1] == NULL || |
| 167 | RTA_PAYLOAD(tb[TCA_RED_STAB-1]) < RED_STAB_SIZE) | ||
| 371 | return -EINVAL; | 168 | return -EINVAL; |
| 372 | 169 | ||
| 373 | ctl = RTA_DATA(tb[TCA_RED_PARMS-1]); | 170 | ctl = RTA_DATA(tb[TCA_RED_PARMS-1]); |
| 374 | 171 | ||
| 375 | sch_tree_lock(sch); | 172 | sch_tree_lock(sch); |
| 376 | q->flags = ctl->flags; | 173 | q->flags = ctl->flags; |
| 377 | q->Wlog = ctl->Wlog; | ||
| 378 | q->Plog = ctl->Plog; | ||
| 379 | q->Rmask = ctl->Plog < 32 ? ((1<<ctl->Plog) - 1) : ~0UL; | ||
| 380 | q->Scell_log = ctl->Scell_log; | ||
| 381 | q->Scell_max = (255<<q->Scell_log); | ||
| 382 | q->qth_min = ctl->qth_min<<ctl->Wlog; | ||
| 383 | q->qth_max = ctl->qth_max<<ctl->Wlog; | ||
| 384 | q->limit = ctl->limit; | 174 | q->limit = ctl->limit; |
| 385 | memcpy(q->Stab, RTA_DATA(tb[TCA_RED_STAB-1]), 256); | ||
| 386 | 175 | ||
| 387 | q->qcount = -1; | 176 | red_set_parms(&q->parms, ctl->qth_min, ctl->qth_max, ctl->Wlog, |
| 177 | ctl->Plog, ctl->Scell_log, | ||
| 178 | RTA_DATA(tb[TCA_RED_STAB-1])); | ||
| 179 | |||
| 388 | if (skb_queue_empty(&sch->q)) | 180 | if (skb_queue_empty(&sch->q)) |
| 389 | PSCHED_SET_PASTPERFECT(q->qidlestart); | 181 | red_end_of_idle_period(&q->parms); |
| 182 | |||
| 390 | sch_tree_unlock(sch); | 183 | sch_tree_unlock(sch); |
| 391 | return 0; | 184 | return 0; |
| 392 | } | 185 | } |
| @@ -399,39 +192,39 @@ static int red_init(struct Qdisc* sch, struct rtattr *opt) | |||
| 399 | static int red_dump(struct Qdisc *sch, struct sk_buff *skb) | 192 | static int red_dump(struct Qdisc *sch, struct sk_buff *skb) |
| 400 | { | 193 | { |
| 401 | struct red_sched_data *q = qdisc_priv(sch); | 194 | struct red_sched_data *q = qdisc_priv(sch); |
| 402 | unsigned char *b = skb->tail; | 195 | struct rtattr *opts = NULL; |
| 403 | struct rtattr *rta; | 196 | struct tc_red_qopt opt = { |
| 404 | struct tc_red_qopt opt; | 197 | .limit = q->limit, |
| 405 | 198 | .flags = q->flags, | |
| 406 | rta = (struct rtattr*)b; | 199 | .qth_min = q->parms.qth_min >> q->parms.Wlog, |
| 407 | RTA_PUT(skb, TCA_OPTIONS, 0, NULL); | 200 | .qth_max = q->parms.qth_max >> q->parms.Wlog, |
| 408 | opt.limit = q->limit; | 201 | .Wlog = q->parms.Wlog, |
| 409 | opt.qth_min = q->qth_min>>q->Wlog; | 202 | .Plog = q->parms.Plog, |
| 410 | opt.qth_max = q->qth_max>>q->Wlog; | 203 | .Scell_log = q->parms.Scell_log, |
| 411 | opt.Wlog = q->Wlog; | 204 | }; |
| 412 | opt.Plog = q->Plog; | 205 | |
| 413 | opt.Scell_log = q->Scell_log; | 206 | opts = RTA_NEST(skb, TCA_OPTIONS); |
| 414 | opt.flags = q->flags; | ||
| 415 | RTA_PUT(skb, TCA_RED_PARMS, sizeof(opt), &opt); | 207 | RTA_PUT(skb, TCA_RED_PARMS, sizeof(opt), &opt); |
| 416 | rta->rta_len = skb->tail - b; | 208 | return RTA_NEST_END(skb, opts); |
| 417 | |||
| 418 | return skb->len; | ||
| 419 | 209 | ||
| 420 | rtattr_failure: | 210 | rtattr_failure: |
| 421 | skb_trim(skb, b - skb->data); | 211 | return RTA_NEST_CANCEL(skb, opts); |
| 422 | return -1; | ||
| 423 | } | 212 | } |
| 424 | 213 | ||
| 425 | static int red_dump_stats(struct Qdisc *sch, struct gnet_dump *d) | 214 | static int red_dump_stats(struct Qdisc *sch, struct gnet_dump *d) |
| 426 | { | 215 | { |
| 427 | struct red_sched_data *q = qdisc_priv(sch); | 216 | struct red_sched_data *q = qdisc_priv(sch); |
| 428 | 217 | struct tc_red_xstats st = { | |
| 429 | return gnet_stats_copy_app(d, &q->st, sizeof(q->st)); | 218 | .early = q->stats.prob_drop + q->stats.forced_drop, |
| 219 | .pdrop = q->stats.pdrop, | ||
| 220 | .other = q->stats.other, | ||
| 221 | .marked = q->stats.prob_mark + q->stats.forced_mark, | ||
| 222 | }; | ||
| 223 | |||
| 224 | return gnet_stats_copy_app(d, &st, sizeof(st)); | ||
| 430 | } | 225 | } |
| 431 | 226 | ||
| 432 | static struct Qdisc_ops red_qdisc_ops = { | 227 | static struct Qdisc_ops red_qdisc_ops = { |
| 433 | .next = NULL, | ||
| 434 | .cl_ops = NULL, | ||
| 435 | .id = "red", | 228 | .id = "red", |
| 436 | .priv_size = sizeof(struct red_sched_data), | 229 | .priv_size = sizeof(struct red_sched_data), |
| 437 | .enqueue = red_enqueue, | 230 | .enqueue = red_enqueue, |
| @@ -450,10 +243,13 @@ static int __init red_module_init(void) | |||
| 450 | { | 243 | { |
| 451 | return register_qdisc(&red_qdisc_ops); | 244 | return register_qdisc(&red_qdisc_ops); |
| 452 | } | 245 | } |
| 453 | static void __exit red_module_exit(void) | 246 | |
| 247 | static void __exit red_module_exit(void) | ||
| 454 | { | 248 | { |
| 455 | unregister_qdisc(&red_qdisc_ops); | 249 | unregister_qdisc(&red_qdisc_ops); |
| 456 | } | 250 | } |
| 251 | |||
| 457 | module_init(red_module_init) | 252 | module_init(red_module_init) |
| 458 | module_exit(red_module_exit) | 253 | module_exit(red_module_exit) |
| 254 | |||
| 459 | MODULE_LICENSE("GPL"); | 255 | MODULE_LICENSE("GPL"); |
