diff options
author | Linus Torvalds <torvalds@ppc970.osdl.org> | 2005-04-16 18:20:36 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@ppc970.osdl.org> | 2005-04-16 18:20:36 -0400 |
commit | 1da177e4c3f41524e886b7f1b8a0c1fc7321cac2 (patch) | |
tree | 0bba044c4ce775e45a88a51686b5d9f90697ea9d /net/core/gen_estimator.c |
Linux-2.6.12-rc2
Initial git repository build. I'm not bothering with the full history,
even though we have it. We can create a separate "historical" git
archive of that later if we want to, and in the meantime it's about
3.2GB when imported into git - space that would just make the early
git days unnecessarily complicated, when we don't have a lot of good
infrastructure for it.
Let it rip!
Diffstat (limited to 'net/core/gen_estimator.c')
-rw-r--r-- | net/core/gen_estimator.c | 250 |
1 files changed, 250 insertions, 0 deletions
diff --git a/net/core/gen_estimator.c b/net/core/gen_estimator.c new file mode 100644 index 000000000000..b07c029e8219 --- /dev/null +++ b/net/core/gen_estimator.c | |||
@@ -0,0 +1,250 @@ | |||
1 | /* | ||
2 | * net/sched/gen_estimator.c Simple rate estimator. | ||
3 | * | ||
4 | * This program is free software; you can redistribute it and/or | ||
5 | * modify it under the terms of the GNU General Public License | ||
6 | * as published by the Free Software Foundation; either version | ||
7 | * 2 of the License, or (at your option) any later version. | ||
8 | * | ||
9 | * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru> | ||
10 | * | ||
11 | * Changes: | ||
12 | * Jamal Hadi Salim - moved it to net/core and reshulfed | ||
13 | * names to make it usable in general net subsystem. | ||
14 | */ | ||
15 | |||
16 | #include <asm/uaccess.h> | ||
17 | #include <asm/system.h> | ||
18 | #include <asm/bitops.h> | ||
19 | #include <linux/module.h> | ||
20 | #include <linux/types.h> | ||
21 | #include <linux/kernel.h> | ||
22 | #include <linux/jiffies.h> | ||
23 | #include <linux/string.h> | ||
24 | #include <linux/mm.h> | ||
25 | #include <linux/socket.h> | ||
26 | #include <linux/sockios.h> | ||
27 | #include <linux/in.h> | ||
28 | #include <linux/errno.h> | ||
29 | #include <linux/interrupt.h> | ||
30 | #include <linux/netdevice.h> | ||
31 | #include <linux/skbuff.h> | ||
32 | #include <linux/rtnetlink.h> | ||
33 | #include <linux/init.h> | ||
34 | #include <net/sock.h> | ||
35 | #include <net/gen_stats.h> | ||
36 | |||
37 | /* | ||
38 | This code is NOT intended to be used for statistics collection, | ||
39 | its purpose is to provide a base for statistical multiplexing | ||
40 | for controlled load service. | ||
41 | If you need only statistics, run a user level daemon which | ||
42 | periodically reads byte counters. | ||
43 | |||
44 | Unfortunately, rate estimation is not a very easy task. | ||
45 | F.e. I did not find a simple way to estimate the current peak rate | ||
46 | and even failed to formulate the problem 8)8) | ||
47 | |||
48 | So I preferred not to built an estimator into the scheduler, | ||
49 | but run this task separately. | ||
50 | Ideally, it should be kernel thread(s), but for now it runs | ||
51 | from timers, which puts apparent top bounds on the number of rated | ||
52 | flows, has minimal overhead on small, but is enough | ||
53 | to handle controlled load service, sets of aggregates. | ||
54 | |||
55 | We measure rate over A=(1<<interval) seconds and evaluate EWMA: | ||
56 | |||
57 | avrate = avrate*(1-W) + rate*W | ||
58 | |||
59 | where W is chosen as negative power of 2: W = 2^(-ewma_log) | ||
60 | |||
61 | The resulting time constant is: | ||
62 | |||
63 | T = A/(-ln(1-W)) | ||
64 | |||
65 | |||
66 | NOTES. | ||
67 | |||
68 | * The stored value for avbps is scaled by 2^5, so that maximal | ||
69 | rate is ~1Gbit, avpps is scaled by 2^10. | ||
70 | |||
71 | * Minimal interval is HZ/4=250msec (it is the greatest common divisor | ||
72 | for HZ=100 and HZ=1024 8)), maximal interval | ||
73 | is (HZ*2^EST_MAX_INTERVAL)/4 = 8sec. Shorter intervals | ||
74 | are too expensive, longer ones can be implemented | ||
75 | at user level painlessly. | ||
76 | */ | ||
77 | |||
78 | #define EST_MAX_INTERVAL 5 | ||
79 | |||
80 | struct gen_estimator | ||
81 | { | ||
82 | struct gen_estimator *next; | ||
83 | struct gnet_stats_basic *bstats; | ||
84 | struct gnet_stats_rate_est *rate_est; | ||
85 | spinlock_t *stats_lock; | ||
86 | unsigned interval; | ||
87 | int ewma_log; | ||
88 | u64 last_bytes; | ||
89 | u32 last_packets; | ||
90 | u32 avpps; | ||
91 | u32 avbps; | ||
92 | }; | ||
93 | |||
94 | struct gen_estimator_head | ||
95 | { | ||
96 | struct timer_list timer; | ||
97 | struct gen_estimator *list; | ||
98 | }; | ||
99 | |||
100 | static struct gen_estimator_head elist[EST_MAX_INTERVAL+1]; | ||
101 | |||
102 | /* Estimator array lock */ | ||
103 | static DEFINE_RWLOCK(est_lock); | ||
104 | |||
105 | static void est_timer(unsigned long arg) | ||
106 | { | ||
107 | int idx = (int)arg; | ||
108 | struct gen_estimator *e; | ||
109 | |||
110 | read_lock(&est_lock); | ||
111 | for (e = elist[idx].list; e; e = e->next) { | ||
112 | u64 nbytes; | ||
113 | u32 npackets; | ||
114 | u32 rate; | ||
115 | |||
116 | spin_lock(e->stats_lock); | ||
117 | nbytes = e->bstats->bytes; | ||
118 | npackets = e->bstats->packets; | ||
119 | rate = (nbytes - e->last_bytes)<<(7 - idx); | ||
120 | e->last_bytes = nbytes; | ||
121 | e->avbps += ((long)rate - (long)e->avbps) >> e->ewma_log; | ||
122 | e->rate_est->bps = (e->avbps+0xF)>>5; | ||
123 | |||
124 | rate = (npackets - e->last_packets)<<(12 - idx); | ||
125 | e->last_packets = npackets; | ||
126 | e->avpps += ((long)rate - (long)e->avpps) >> e->ewma_log; | ||
127 | e->rate_est->pps = (e->avpps+0x1FF)>>10; | ||
128 | spin_unlock(e->stats_lock); | ||
129 | } | ||
130 | |||
131 | mod_timer(&elist[idx].timer, jiffies + ((HZ<<idx)/4)); | ||
132 | read_unlock(&est_lock); | ||
133 | } | ||
134 | |||
135 | /** | ||
136 | * gen_new_estimator - create a new rate estimator | ||
137 | * @bstats: basic statistics | ||
138 | * @rate_est: rate estimator statistics | ||
139 | * @stats_lock: statistics lock | ||
140 | * @opt: rate estimator configuration TLV | ||
141 | * | ||
142 | * Creates a new rate estimator with &bstats as source and &rate_est | ||
143 | * as destination. A new timer with the interval specified in the | ||
144 | * configuration TLV is created. Upon each interval, the latest statistics | ||
145 | * will be read from &bstats and the estimated rate will be stored in | ||
146 | * &rate_est with the statistics lock grabed during this period. | ||
147 | * | ||
148 | * Returns 0 on success or a negative error code. | ||
149 | */ | ||
150 | int gen_new_estimator(struct gnet_stats_basic *bstats, | ||
151 | struct gnet_stats_rate_est *rate_est, spinlock_t *stats_lock, struct rtattr *opt) | ||
152 | { | ||
153 | struct gen_estimator *est; | ||
154 | struct gnet_estimator *parm = RTA_DATA(opt); | ||
155 | |||
156 | if (RTA_PAYLOAD(opt) < sizeof(*parm)) | ||
157 | return -EINVAL; | ||
158 | |||
159 | if (parm->interval < -2 || parm->interval > 3) | ||
160 | return -EINVAL; | ||
161 | |||
162 | est = kmalloc(sizeof(*est), GFP_KERNEL); | ||
163 | if (est == NULL) | ||
164 | return -ENOBUFS; | ||
165 | |||
166 | memset(est, 0, sizeof(*est)); | ||
167 | est->interval = parm->interval + 2; | ||
168 | est->bstats = bstats; | ||
169 | est->rate_est = rate_est; | ||
170 | est->stats_lock = stats_lock; | ||
171 | est->ewma_log = parm->ewma_log; | ||
172 | est->last_bytes = bstats->bytes; | ||
173 | est->avbps = rate_est->bps<<5; | ||
174 | est->last_packets = bstats->packets; | ||
175 | est->avpps = rate_est->pps<<10; | ||
176 | |||
177 | est->next = elist[est->interval].list; | ||
178 | if (est->next == NULL) { | ||
179 | init_timer(&elist[est->interval].timer); | ||
180 | elist[est->interval].timer.data = est->interval; | ||
181 | elist[est->interval].timer.expires = jiffies + ((HZ<<est->interval)/4); | ||
182 | elist[est->interval].timer.function = est_timer; | ||
183 | add_timer(&elist[est->interval].timer); | ||
184 | } | ||
185 | write_lock_bh(&est_lock); | ||
186 | elist[est->interval].list = est; | ||
187 | write_unlock_bh(&est_lock); | ||
188 | return 0; | ||
189 | } | ||
190 | |||
191 | /** | ||
192 | * gen_kill_estimator - remove a rate estimator | ||
193 | * @bstats: basic statistics | ||
194 | * @rate_est: rate estimator statistics | ||
195 | * | ||
196 | * Removes the rate estimator specified by &bstats and &rate_est | ||
197 | * and deletes the timer. | ||
198 | */ | ||
199 | void gen_kill_estimator(struct gnet_stats_basic *bstats, | ||
200 | struct gnet_stats_rate_est *rate_est) | ||
201 | { | ||
202 | int idx; | ||
203 | struct gen_estimator *est, **pest; | ||
204 | |||
205 | for (idx=0; idx <= EST_MAX_INTERVAL; idx++) { | ||
206 | int killed = 0; | ||
207 | pest = &elist[idx].list; | ||
208 | while ((est=*pest) != NULL) { | ||
209 | if (est->rate_est != rate_est || est->bstats != bstats) { | ||
210 | pest = &est->next; | ||
211 | continue; | ||
212 | } | ||
213 | |||
214 | write_lock_bh(&est_lock); | ||
215 | *pest = est->next; | ||
216 | write_unlock_bh(&est_lock); | ||
217 | |||
218 | kfree(est); | ||
219 | killed++; | ||
220 | } | ||
221 | if (killed && elist[idx].list == NULL) | ||
222 | del_timer(&elist[idx].timer); | ||
223 | } | ||
224 | } | ||
225 | |||
226 | /** | ||
227 | * gen_replace_estimator - replace rate estimator configruation | ||
228 | * @bstats: basic statistics | ||
229 | * @rate_est: rate estimator statistics | ||
230 | * @stats_lock: statistics lock | ||
231 | * @opt: rate estimator configuration TLV | ||
232 | * | ||
233 | * Replaces the configuration of a rate estimator by calling | ||
234 | * gen_kill_estimator() and gen_new_estimator(). | ||
235 | * | ||
236 | * Returns 0 on success or a negative error code. | ||
237 | */ | ||
238 | int | ||
239 | gen_replace_estimator(struct gnet_stats_basic *bstats, | ||
240 | struct gnet_stats_rate_est *rate_est, spinlock_t *stats_lock, | ||
241 | struct rtattr *opt) | ||
242 | { | ||
243 | gen_kill_estimator(bstats, rate_est); | ||
244 | return gen_new_estimator(bstats, rate_est, stats_lock, opt); | ||
245 | } | ||
246 | |||
247 | |||
248 | EXPORT_SYMBOL(gen_kill_estimator); | ||
249 | EXPORT_SYMBOL(gen_new_estimator); | ||
250 | EXPORT_SYMBOL(gen_replace_estimator); | ||