aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorPatrick McHardy <kaber@trash.net>2007-03-16 04:18:42 -0400
committerDavid S. Miller <davem@sunset.davemloft.net>2007-04-26 01:26:04 -0400
commit641b9e0e8b7f96425da6ce98f3361e3af0baee29 (patch)
tree2315fed3b4fd9df52a52464b9b1ce1561d403a87
parentddc7b8e32b22fe8b45d306b7d99472d4b560add6 (diff)
[NET_SCHED]: Use ktime as clocksource
Get rid of the manual clock source selection mess and use ktime. Also use a scalar representation, which allows to clean up pkt_sched.h a bit more and results in less ktime_to_ns() calls in most cases. The PSCHED_US2JIFFIE/PSCHED_JIFFIE2US macros are implemented quite inefficient by this patch, following patches will convert all qdiscs to hrtimers and get rid of them entirely. Signed-off-by: Patrick McHardy <kaber@trash.net> Signed-off-by: David S. Miller <davem@davemloft.net>
-rw-r--r--include/net/pkt_sched.h169
-rw-r--r--kernel/hrtimer.c1
-rw-r--r--net/sched/Kconfig56
-rw-r--r--net/sched/sch_api.c77
-rw-r--r--net/sched/sch_hfsc.c31
5 files changed, 19 insertions, 315 deletions
diff --git a/include/net/pkt_sched.h b/include/net/pkt_sched.h
index f6afee73235d..1c12afd113d6 100644
--- a/include/net/pkt_sched.h
+++ b/include/net/pkt_sched.h
@@ -2,6 +2,7 @@
2#define __NET_PKT_SCHED_H 2#define __NET_PKT_SCHED_H
3 3
4#include <linux/jiffies.h> 4#include <linux/jiffies.h>
5#include <linux/ktime.h>
5#include <net/sch_generic.h> 6#include <net/sch_generic.h>
6 7
7struct qdisc_walker 8struct qdisc_walker
@@ -37,176 +38,32 @@ static inline void *qdisc_priv(struct Qdisc *q)
37 The things are not so bad, because we may use artifical 38 The things are not so bad, because we may use artifical
38 clock evaluated by integration of network data flow 39 clock evaluated by integration of network data flow
39 in the most critical places. 40 in the most critical places.
40
41 Note: we do not use fastgettimeofday.
42 The reason is that, when it is not the same thing as
43 gettimeofday, it returns invalid timestamp, which is
44 not updated, when net_bh is active.
45 */ 41 */
46 42
47/* General note about internal clock.
48
49 Any clock source returns time intervals, measured in units
50 close to 1usec. With source CONFIG_NET_SCH_CLK_GETTIMEOFDAY it is precisely
51 microseconds, otherwise something close but different chosen to minimize
52 arithmetic cost. Ratio usec/internal untis in form nominator/denominator
53 may be read from /proc/net/psched.
54 */
55
56
57#ifdef CONFIG_NET_SCH_CLK_GETTIMEOFDAY
58
59typedef struct timeval psched_time_t;
60typedef long psched_tdiff_t;
61
62#define PSCHED_GET_TIME(stamp) do_gettimeofday(&(stamp))
63#define PSCHED_US2JIFFIE(usecs) usecs_to_jiffies(usecs)
64#define PSCHED_JIFFIE2US(delay) jiffies_to_usecs(delay)
65
66#else /* !CONFIG_NET_SCH_CLK_GETTIMEOFDAY */
67
68typedef u64 psched_time_t; 43typedef u64 psched_time_t;
69typedef long psched_tdiff_t; 44typedef long psched_tdiff_t;
70 45
71#ifdef CONFIG_NET_SCH_CLK_JIFFIES 46/* Avoid doing 64 bit divide by 1000 */
72 47#define PSCHED_US2NS(x) ((s64)(x) << 10)
73#if HZ < 96 48#define PSCHED_NS2US(x) ((x) >> 10)
74#define PSCHED_JSCALE 14
75#elif HZ >= 96 && HZ < 192
76#define PSCHED_JSCALE 13
77#elif HZ >= 192 && HZ < 384
78#define PSCHED_JSCALE 12
79#elif HZ >= 384 && HZ < 768
80#define PSCHED_JSCALE 11
81#elif HZ >= 768
82#define PSCHED_JSCALE 10
83#endif
84
85#define PSCHED_GET_TIME(stamp) ((stamp) = (get_jiffies_64()<<PSCHED_JSCALE))
86#define PSCHED_US2JIFFIE(delay) (((delay)+(1<<PSCHED_JSCALE)-1)>>PSCHED_JSCALE)
87#define PSCHED_JIFFIE2US(delay) ((delay)<<PSCHED_JSCALE)
88
89#endif /* CONFIG_NET_SCH_CLK_JIFFIES */
90#ifdef CONFIG_NET_SCH_CLK_CPU
91#include <asm/timex.h>
92
93extern psched_tdiff_t psched_clock_per_hz;
94extern int psched_clock_scale;
95extern psched_time_t psched_time_base;
96extern cycles_t psched_time_mark;
97
98#define PSCHED_GET_TIME(stamp) \
99do { \
100 cycles_t cur = get_cycles(); \
101 if (sizeof(cycles_t) == sizeof(u32)) { \
102 if (cur <= psched_time_mark) \
103 psched_time_base += 0x100000000ULL; \
104 psched_time_mark = cur; \
105 (stamp) = (psched_time_base + cur)>>psched_clock_scale; \
106 } else { \
107 (stamp) = cur>>psched_clock_scale; \
108 } \
109} while (0)
110#define PSCHED_US2JIFFIE(delay) (((delay)+psched_clock_per_hz-1)/psched_clock_per_hz)
111#define PSCHED_JIFFIE2US(delay) ((delay)*psched_clock_per_hz)
112
113#endif /* CONFIG_NET_SCH_CLK_CPU */
114
115#endif /* !CONFIG_NET_SCH_CLK_GETTIMEOFDAY */
116
117#ifdef CONFIG_NET_SCH_CLK_GETTIMEOFDAY
118#define PSCHED_TDIFF(tv1, tv2) \
119({ \
120 int __delta_sec = (tv1).tv_sec - (tv2).tv_sec; \
121 int __delta = (tv1).tv_usec - (tv2).tv_usec; \
122 if (__delta_sec) { \
123 switch (__delta_sec) { \
124 default: \
125 __delta = 0; \
126 case 2: \
127 __delta += USEC_PER_SEC; \
128 case 1: \
129 __delta += USEC_PER_SEC; \
130 } \
131 } \
132 __delta; \
133})
134
135static inline int
136psched_tod_diff(int delta_sec, int bound)
137{
138 int delta;
139
140 if (bound <= USEC_PER_SEC || delta_sec > (0x7FFFFFFF/USEC_PER_SEC)-1)
141 return bound;
142 delta = delta_sec * USEC_PER_SEC;
143 if (delta > bound || delta < 0)
144 delta = bound;
145 return delta;
146}
147
148#define PSCHED_TDIFF_SAFE(tv1, tv2, bound) \
149({ \
150 int __delta_sec = (tv1).tv_sec - (tv2).tv_sec; \
151 int __delta = (tv1).tv_usec - (tv2).tv_usec; \
152 switch (__delta_sec) { \
153 default: \
154 __delta = psched_tod_diff(__delta_sec, bound); break; \
155 case 2: \
156 __delta += USEC_PER_SEC; \
157 case 1: \
158 __delta += USEC_PER_SEC; \
159 case 0: \
160 if (__delta > bound || __delta < 0) \
161 __delta = bound; \
162 } \
163 __delta; \
164})
165
166#define PSCHED_TLESS(tv1, tv2) (((tv1).tv_usec < (tv2).tv_usec && \
167 (tv1).tv_sec <= (tv2).tv_sec) || \
168 (tv1).tv_sec < (tv2).tv_sec)
169
170#define PSCHED_TADD2(tv, delta, tv_res) \
171({ \
172 int __delta = (tv).tv_usec + (delta); \
173 (tv_res).tv_sec = (tv).tv_sec; \
174 while (__delta >= USEC_PER_SEC) { (tv_res).tv_sec++; __delta -= USEC_PER_SEC; } \
175 (tv_res).tv_usec = __delta; \
176})
177
178#define PSCHED_TADD(tv, delta) \
179({ \
180 (tv).tv_usec += (delta); \
181 while ((tv).tv_usec >= USEC_PER_SEC) { (tv).tv_sec++; \
182 (tv).tv_usec -= USEC_PER_SEC; } \
183})
184
185/* Set/check that time is in the "past perfect";
186 it depends on concrete representation of system time
187 */
188
189#define PSCHED_SET_PASTPERFECT(t) ((t).tv_sec = 0)
190#define PSCHED_IS_PASTPERFECT(t) ((t).tv_sec == 0)
191 49
192#define PSCHED_AUDIT_TDIFF(t) ({ if ((t) > 2000000) (t) = 2000000; }) 50#define PSCHED_TICKS_PER_SEC PSCHED_NS2US(NSEC_PER_SEC)
51#define PSCHED_GET_TIME(stamp) \
52 ((stamp) = PSCHED_NS2US(ktime_to_ns(ktime_get())))
193 53
194#else /* !CONFIG_NET_SCH_CLK_GETTIMEOFDAY */ 54#define PSCHED_US2JIFFIE(usecs) usecs_to_jiffies(PSCHED_US2NS((usecs)) / NSEC_PER_USEC)
55#define PSCHED_JIFFIE2US(delay) PSCHED_NS2US(jiffies_to_usecs((delay)) * NSEC_PER_USEC)
195 56
196#define PSCHED_TDIFF(tv1, tv2) (long)((tv1) - (tv2)) 57#define PSCHED_TDIFF(tv1, tv2) (long)((tv1) - (tv2))
197#define PSCHED_TDIFF_SAFE(tv1, tv2, bound) \ 58#define PSCHED_TDIFF_SAFE(tv1, tv2, bound) \
198 min_t(long long, (tv1) - (tv2), bound) 59 min_t(long long, (tv1) - (tv2), bound)
199 60#define PSCHED_TLESS(tv1, tv2) ((tv1) < (tv2))
200
201#define PSCHED_TLESS(tv1, tv2) ((tv1) < (tv2))
202#define PSCHED_TADD2(tv, delta, tv_res) ((tv_res) = (tv) + (delta)) 61#define PSCHED_TADD2(tv, delta, tv_res) ((tv_res) = (tv) + (delta))
203#define PSCHED_TADD(tv, delta) ((tv) += (delta)) 62#define PSCHED_TADD(tv, delta) ((tv) += (delta))
204#define PSCHED_SET_PASTPERFECT(t) ((t) = 0) 63#define PSCHED_SET_PASTPERFECT(t) ((t) = 0)
205#define PSCHED_IS_PASTPERFECT(t) ((t) == 0) 64#define PSCHED_IS_PASTPERFECT(t) ((t) == 0)
206#define PSCHED_AUDIT_TDIFF(t) 65#define PSCHED_AUDIT_TDIFF(t)
207 66
208#endif /* !CONFIG_NET_SCH_CLK_GETTIMEOFDAY */
209
210extern struct Qdisc_ops pfifo_qdisc_ops; 67extern struct Qdisc_ops pfifo_qdisc_ops;
211extern struct Qdisc_ops bfifo_qdisc_ops; 68extern struct Qdisc_ops bfifo_qdisc_ops;
212 69
diff --git a/kernel/hrtimer.c b/kernel/hrtimer.c
index b74860aaf5f1..f5cfde8c9025 100644
--- a/kernel/hrtimer.c
+++ b/kernel/hrtimer.c
@@ -59,6 +59,7 @@ ktime_t ktime_get(void)
59 59
60 return timespec_to_ktime(now); 60 return timespec_to_ktime(now);
61} 61}
62EXPORT_SYMBOL_GPL(ktime_get);
62 63
63/** 64/**
64 * ktime_get_real - get the real (wall-) time in ktime_t format 65 * ktime_get_real - get the real (wall-) time in ktime_t format
diff --git a/net/sched/Kconfig b/net/sched/Kconfig
index f4544dd86476..475df8449be9 100644
--- a/net/sched/Kconfig
+++ b/net/sched/Kconfig
@@ -46,62 +46,6 @@ config NET_SCH_FIFO
46 46
47if NET_SCHED 47if NET_SCHED
48 48
49choice
50 prompt "Packet scheduler clock source"
51 default NET_SCH_CLK_GETTIMEOFDAY
52 ---help---
53 Packet schedulers need a monotonic clock that increments at a static
54 rate. The kernel provides several suitable interfaces, each with
55 different properties:
56
57 - high resolution (us or better)
58 - fast to read (minimal locking, no i/o access)
59 - synchronized on all processors
60 - handles cpu clock frequency changes
61
62 but nothing provides all of the above.
63
64config NET_SCH_CLK_JIFFIES
65 bool "Timer interrupt"
66 ---help---
67 Say Y here if you want to use the timer interrupt (jiffies) as clock
68 source. This clock source is fast, synchronized on all processors and
69 handles cpu clock frequency changes, but its resolution is too low
70 for accurate shaping except at very low speed.
71
72config NET_SCH_CLK_GETTIMEOFDAY
73 bool "gettimeofday"
74 ---help---
75 Say Y here if you want to use gettimeofday as clock source. This clock
76 source has high resolution, is synchronized on all processors and
77 handles cpu clock frequency changes, but it is slow.
78
79 Choose this if you need a high resolution clock source but can't use
80 the CPU's cycle counter.
81
82# don't allow on SMP x86 because they can have unsynchronized TSCs.
83# gettimeofday is a good alternative
84config NET_SCH_CLK_CPU
85 bool "CPU cycle counter"
86 depends on ((X86_TSC || X86_64) && !SMP) || ALPHA || SPARC64 || PPC64 || IA64
87 ---help---
88 Say Y here if you want to use the CPU's cycle counter as clock source.
89 This is a cheap and high resolution clock source, but on some
90 architectures it is not synchronized on all processors and doesn't
91 handle cpu clock frequency changes.
92
93 The useable cycle counters are:
94
95 x86/x86_64 - Timestamp Counter
96 alpha - Cycle Counter
97 sparc64 - %ticks register
98 ppc64 - Time base
99 ia64 - Interval Time Counter
100
101 Choose this if your CPU's cycle counter is working properly.
102
103endchoice
104
105comment "Queueing/Scheduling" 49comment "Queueing/Scheduling"
106 50
107config NET_SCH_CBQ 51config NET_SCH_CBQ
diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c
index 4a927a5e1fa6..d71bf79eb80b 100644
--- a/net/sched/sch_api.c
+++ b/net/sched/sch_api.c
@@ -1175,15 +1175,12 @@ reclassify:
1175 return -1; 1175 return -1;
1176} 1176}
1177 1177
1178static int psched_us_per_tick = 1;
1179static int psched_tick_per_us = 1;
1180
1181#ifdef CONFIG_PROC_FS 1178#ifdef CONFIG_PROC_FS
1182static int psched_show(struct seq_file *seq, void *v) 1179static int psched_show(struct seq_file *seq, void *v)
1183{ 1180{
1184 seq_printf(seq, "%08x %08x %08x %08x\n", 1181 seq_printf(seq, "%08x %08x %08x %08x\n",
1185 psched_tick_per_us, psched_us_per_tick, 1182 (u32)NSEC_PER_USEC, (u32)PSCHED_US2NS(1),
1186 1000000, HZ); 1183 1000000, HZ);
1187 1184
1188 return 0; 1185 return 0;
1189} 1186}
@@ -1202,80 +1199,10 @@ static const struct file_operations psched_fops = {
1202}; 1199};
1203#endif 1200#endif
1204 1201
1205#ifdef CONFIG_NET_SCH_CLK_CPU
1206psched_tdiff_t psched_clock_per_hz;
1207int psched_clock_scale;
1208EXPORT_SYMBOL(psched_clock_per_hz);
1209EXPORT_SYMBOL(psched_clock_scale);
1210
1211psched_time_t psched_time_base;
1212cycles_t psched_time_mark;
1213EXPORT_SYMBOL(psched_time_mark);
1214EXPORT_SYMBOL(psched_time_base);
1215
1216/*
1217 * Periodically adjust psched_time_base to avoid overflow
1218 * with 32-bit get_cycles(). Safe up to 4GHz CPU.
1219 */
1220static void psched_tick(unsigned long);
1221static DEFINE_TIMER(psched_timer, psched_tick, 0, 0);
1222
1223static void psched_tick(unsigned long dummy)
1224{
1225 if (sizeof(cycles_t) == sizeof(u32)) {
1226 psched_time_t dummy_stamp;
1227 PSCHED_GET_TIME(dummy_stamp);
1228 psched_timer.expires = jiffies + 1*HZ;
1229 add_timer(&psched_timer);
1230 }
1231}
1232
1233int __init psched_calibrate_clock(void)
1234{
1235 psched_time_t stamp, stamp1;
1236 struct timeval tv, tv1;
1237 psched_tdiff_t delay;
1238 long rdelay;
1239 unsigned long stop;
1240
1241 psched_tick(0);
1242 stop = jiffies + HZ/10;
1243 PSCHED_GET_TIME(stamp);
1244 do_gettimeofday(&tv);
1245 while (time_before(jiffies, stop)) {
1246 barrier();
1247 cpu_relax();
1248 }
1249 PSCHED_GET_TIME(stamp1);
1250 do_gettimeofday(&tv1);
1251
1252 delay = PSCHED_TDIFF(stamp1, stamp);
1253 rdelay = tv1.tv_usec - tv.tv_usec;
1254 rdelay += (tv1.tv_sec - tv.tv_sec)*1000000;
1255 if (rdelay > delay)
1256 return -1;
1257 delay /= rdelay;
1258 psched_tick_per_us = delay;
1259 while ((delay>>=1) != 0)
1260 psched_clock_scale++;
1261 psched_us_per_tick = 1<<psched_clock_scale;
1262 psched_clock_per_hz = (psched_tick_per_us*(1000000/HZ))>>psched_clock_scale;
1263 return 0;
1264}
1265#endif
1266
1267static int __init pktsched_init(void) 1202static int __init pktsched_init(void)
1268{ 1203{
1269 struct rtnetlink_link *link_p; 1204 struct rtnetlink_link *link_p;
1270 1205
1271#ifdef CONFIG_NET_SCH_CLK_CPU
1272 if (psched_calibrate_clock() < 0)
1273 return -1;
1274#elif defined(CONFIG_NET_SCH_CLK_JIFFIES)
1275 psched_tick_per_us = HZ<<PSCHED_JSCALE;
1276 psched_us_per_tick = 1000000;
1277#endif
1278
1279 link_p = rtnetlink_links[PF_UNSPEC]; 1206 link_p = rtnetlink_links[PF_UNSPEC];
1280 1207
1281 /* Setup rtnetlink links. It is made here to avoid 1208 /* Setup rtnetlink links. It is made here to avoid
diff --git a/net/sched/sch_hfsc.c b/net/sched/sch_hfsc.c
index 407c6fb1ba14..f85cfba647f8 100644
--- a/net/sched/sch_hfsc.c
+++ b/net/sched/sch_hfsc.c
@@ -195,20 +195,6 @@ struct hfsc_sched
195 struct timer_list wd_timer; /* watchdog timer */ 195 struct timer_list wd_timer; /* watchdog timer */
196}; 196};
197 197
198/*
199 * macros
200 */
201#ifdef CONFIG_NET_SCH_CLK_GETTIMEOFDAY
202#include <linux/time.h>
203#undef PSCHED_GET_TIME
204#define PSCHED_GET_TIME(stamp) \
205do { \
206 struct timeval tv; \
207 do_gettimeofday(&tv); \
208 (stamp) = 1ULL * USEC_PER_SEC * tv.tv_sec + tv.tv_usec; \
209} while (0)
210#endif
211
212#define HT_INFINITY 0xffffffffffffffffULL /* infinite time value */ 198#define HT_INFINITY 0xffffffffffffffffULL /* infinite time value */
213 199
214 200
@@ -394,28 +380,17 @@ cftree_update(struct hfsc_class *cl)
394 * ism: (psched_us/byte) << ISM_SHIFT 380 * ism: (psched_us/byte) << ISM_SHIFT
395 * dx: psched_us 381 * dx: psched_us
396 * 382 *
397 * Clock source resolution (CONFIG_NET_SCH_CLK_*) 383 * The clock source resolution with ktime is 1.024us.
398 * JIFFIES: for 48<=HZ<=1534 resolution is between 0.63us and 1.27us.
399 * CPU: resolution is between 0.5us and 1us.
400 * GETTIMEOFDAY: resolution is exactly 1us.
401 * 384 *
402 * sm and ism are scaled in order to keep effective digits. 385 * sm and ism are scaled in order to keep effective digits.
403 * SM_SHIFT and ISM_SHIFT are selected to keep at least 4 effective 386 * SM_SHIFT and ISM_SHIFT are selected to keep at least 4 effective
404 * digits in decimal using the following table. 387 * digits in decimal using the following table.
405 * 388 *
406 * Note: We can afford the additional accuracy (altq hfsc keeps at most
407 * 3 effective digits) thanks to the fact that linux clock is bounded
408 * much more tightly.
409 *
410 * bits/sec 100Kbps 1Mbps 10Mbps 100Mbps 1Gbps 389 * bits/sec 100Kbps 1Mbps 10Mbps 100Mbps 1Gbps
411 * ------------+------------------------------------------------------- 390 * ------------+-------------------------------------------------------
412 * bytes/0.5us 6.25e-3 62.5e-3 625e-3 6250e-e 62500e-3 391 * bytes/1.024us 12.8e-3 128e-3 1280e-3 12800e-3 128000e-3
413 * bytes/us 12.5e-3 125e-3 1250e-3 12500e-3 125000e-3
414 * bytes/1.27us 15.875e-3 158.75e-3 1587.5e-3 15875e-3 158750e-3
415 * 392 *
416 * 0.5us/byte 160 16 1.6 0.16 0.016 393 * 1.024us/byte 78.125 7.8125 0.78125 0.078125 0.0078125
417 * us/byte 80 8 0.8 0.08 0.008
418 * 1.27us/byte 63 6.3 0.63 0.063 0.0063
419 */ 394 */
420#define SM_SHIFT 20 395#define SM_SHIFT 20
421#define ISM_SHIFT 18 396#define ISM_SHIFT 18