diff options
author | Jeff Moyer <jmoyer@redhat.com> | 2016-06-08 10:55:34 -0400 |
---|---|---|
committer | Jens Axboe <axboe@fb.com> | 2016-06-08 10:55:34 -0400 |
commit | 9a7f38c42c2b92391d9dabaf9f51df7cfe5608e4 (patch) | |
tree | be1306256203e951448399175af96a53175cef9a /block | |
parent | 28a8f0d317bf225ff15008f5dd66ae16242dd843 (diff) |
cfq-iosched: Convert from jiffies to nanoseconds
Convert all time-keeping in CFQ IO scheduler from jiffies to nanoseconds
so that we can later make the intervals more fine-grained than jiffies.
One jiffie is several miliseconds and even for today's rotating disks
that is a noticeable amount of time and thus we leave disk unnecessarily
idle.
Signed-off-by: Jeff Moyer <jmoyer@redhat.com>
Signed-off-by: Jan Kara <jack@suse.cz>
Signed-off-by: Jens Axboe <axboe@fb.com>
Diffstat (limited to 'block')
-rw-r--r-- | block/cfq-iosched.c | 273 |
1 files changed, 136 insertions, 137 deletions
diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c index b1154861f4c9..9c2e82c1ea88 100644 --- a/block/cfq-iosched.c +++ b/block/cfq-iosched.c | |||
@@ -10,7 +10,7 @@ | |||
10 | #include <linux/slab.h> | 10 | #include <linux/slab.h> |
11 | #include <linux/blkdev.h> | 11 | #include <linux/blkdev.h> |
12 | #include <linux/elevator.h> | 12 | #include <linux/elevator.h> |
13 | #include <linux/jiffies.h> | 13 | #include <linux/ktime.h> |
14 | #include <linux/rbtree.h> | 14 | #include <linux/rbtree.h> |
15 | #include <linux/ioprio.h> | 15 | #include <linux/ioprio.h> |
16 | #include <linux/blktrace_api.h> | 16 | #include <linux/blktrace_api.h> |
@@ -22,28 +22,28 @@ | |||
22 | */ | 22 | */ |
23 | /* max queue in one round of service */ | 23 | /* max queue in one round of service */ |
24 | static const int cfq_quantum = 8; | 24 | static const int cfq_quantum = 8; |
25 | static const int cfq_fifo_expire[2] = { HZ / 4, HZ / 8 }; | 25 | static const u64 cfq_fifo_expire[2] = { NSEC_PER_SEC / 4, NSEC_PER_SEC / 8 }; |
26 | /* maximum backwards seek, in KiB */ | 26 | /* maximum backwards seek, in KiB */ |
27 | static const int cfq_back_max = 16 * 1024; | 27 | static const int cfq_back_max = 16 * 1024; |
28 | /* penalty of a backwards seek */ | 28 | /* penalty of a backwards seek */ |
29 | static const int cfq_back_penalty = 2; | 29 | static const int cfq_back_penalty = 2; |
30 | static const int cfq_slice_sync = HZ / 10; | 30 | static const u64 cfq_slice_sync = NSEC_PER_SEC / 10; |
31 | static int cfq_slice_async = HZ / 25; | 31 | static u64 cfq_slice_async = NSEC_PER_SEC / 25; |
32 | static const int cfq_slice_async_rq = 2; | 32 | static const int cfq_slice_async_rq = 2; |
33 | static int cfq_slice_idle = HZ / 125; | 33 | static u64 cfq_slice_idle = NSEC_PER_SEC / 125; |
34 | static int cfq_group_idle = HZ / 125; | 34 | static u64 cfq_group_idle = NSEC_PER_SEC / 125; |
35 | static const int cfq_target_latency = HZ * 3/10; /* 300 ms */ | 35 | static const u64 cfq_target_latency = (u64)NSEC_PER_SEC * 3/10; /* 300 ms */ |
36 | static const int cfq_hist_divisor = 4; | 36 | static const int cfq_hist_divisor = 4; |
37 | 37 | ||
38 | /* | 38 | /* |
39 | * offset from end of service tree | 39 | * offset from end of service tree |
40 | */ | 40 | */ |
41 | #define CFQ_IDLE_DELAY (HZ / 5) | 41 | #define CFQ_IDLE_DELAY (NSEC_PER_SEC / 5) |
42 | 42 | ||
43 | /* | 43 | /* |
44 | * below this threshold, we consider thinktime immediate | 44 | * below this threshold, we consider thinktime immediate |
45 | */ | 45 | */ |
46 | #define CFQ_MIN_TT (2) | 46 | #define CFQ_MIN_TT (2 * NSEC_PER_SEC / HZ) |
47 | 47 | ||
48 | #define CFQ_SLICE_SCALE (5) | 48 | #define CFQ_SLICE_SCALE (5) |
49 | #define CFQ_HW_QUEUE_MIN (5) | 49 | #define CFQ_HW_QUEUE_MIN (5) |
@@ -73,11 +73,11 @@ static struct kmem_cache *cfq_pool; | |||
73 | #define CFQ_WEIGHT_LEGACY_MAX 1000 | 73 | #define CFQ_WEIGHT_LEGACY_MAX 1000 |
74 | 74 | ||
75 | struct cfq_ttime { | 75 | struct cfq_ttime { |
76 | unsigned long last_end_request; | 76 | u64 last_end_request; |
77 | 77 | ||
78 | unsigned long ttime_total; | 78 | u64 ttime_total; |
79 | u64 ttime_mean; | ||
79 | unsigned long ttime_samples; | 80 | unsigned long ttime_samples; |
80 | unsigned long ttime_mean; | ||
81 | }; | 81 | }; |
82 | 82 | ||
83 | /* | 83 | /* |
@@ -94,7 +94,7 @@ struct cfq_rb_root { | |||
94 | struct cfq_ttime ttime; | 94 | struct cfq_ttime ttime; |
95 | }; | 95 | }; |
96 | #define CFQ_RB_ROOT (struct cfq_rb_root) { .rb = RB_ROOT, \ | 96 | #define CFQ_RB_ROOT (struct cfq_rb_root) { .rb = RB_ROOT, \ |
97 | .ttime = {.last_end_request = jiffies,},} | 97 | .ttime = {.last_end_request = ktime_get_ns(),},} |
98 | 98 | ||
99 | /* | 99 | /* |
100 | * Per process-grouping structure | 100 | * Per process-grouping structure |
@@ -109,7 +109,7 @@ struct cfq_queue { | |||
109 | /* service_tree member */ | 109 | /* service_tree member */ |
110 | struct rb_node rb_node; | 110 | struct rb_node rb_node; |
111 | /* service_tree key */ | 111 | /* service_tree key */ |
112 | unsigned long rb_key; | 112 | u64 rb_key; |
113 | /* prio tree member */ | 113 | /* prio tree member */ |
114 | struct rb_node p_node; | 114 | struct rb_node p_node; |
115 | /* prio tree root we belong to, if any */ | 115 | /* prio tree root we belong to, if any */ |
@@ -126,13 +126,13 @@ struct cfq_queue { | |||
126 | struct list_head fifo; | 126 | struct list_head fifo; |
127 | 127 | ||
128 | /* time when queue got scheduled in to dispatch first request. */ | 128 | /* time when queue got scheduled in to dispatch first request. */ |
129 | unsigned long dispatch_start; | 129 | u64 dispatch_start; |
130 | unsigned int allocated_slice; | 130 | u64 allocated_slice; |
131 | unsigned int slice_dispatch; | 131 | u64 slice_dispatch; |
132 | /* time when first request from queue completed and slice started. */ | 132 | /* time when first request from queue completed and slice started. */ |
133 | unsigned long slice_start; | 133 | u64 slice_start; |
134 | unsigned long slice_end; | 134 | u64 slice_end; |
135 | long slice_resid; | 135 | u64 slice_resid; |
136 | 136 | ||
137 | /* pending priority requests */ | 137 | /* pending priority requests */ |
138 | int prio_pending; | 138 | int prio_pending; |
@@ -290,7 +290,7 @@ struct cfq_group { | |||
290 | struct cfq_rb_root service_trees[2][3]; | 290 | struct cfq_rb_root service_trees[2][3]; |
291 | struct cfq_rb_root service_tree_idle; | 291 | struct cfq_rb_root service_tree_idle; |
292 | 292 | ||
293 | unsigned long saved_wl_slice; | 293 | u64 saved_wl_slice; |
294 | enum wl_type_t saved_wl_type; | 294 | enum wl_type_t saved_wl_type; |
295 | enum wl_class_t saved_wl_class; | 295 | enum wl_class_t saved_wl_class; |
296 | 296 | ||
@@ -329,7 +329,7 @@ struct cfq_data { | |||
329 | */ | 329 | */ |
330 | enum wl_class_t serving_wl_class; | 330 | enum wl_class_t serving_wl_class; |
331 | enum wl_type_t serving_wl_type; | 331 | enum wl_type_t serving_wl_type; |
332 | unsigned long workload_expires; | 332 | u64 workload_expires; |
333 | struct cfq_group *serving_group; | 333 | struct cfq_group *serving_group; |
334 | 334 | ||
335 | /* | 335 | /* |
@@ -374,22 +374,22 @@ struct cfq_data { | |||
374 | * tunables, see top of file | 374 | * tunables, see top of file |
375 | */ | 375 | */ |
376 | unsigned int cfq_quantum; | 376 | unsigned int cfq_quantum; |
377 | unsigned int cfq_fifo_expire[2]; | ||
378 | unsigned int cfq_back_penalty; | 377 | unsigned int cfq_back_penalty; |
379 | unsigned int cfq_back_max; | 378 | unsigned int cfq_back_max; |
380 | unsigned int cfq_slice[2]; | ||
381 | unsigned int cfq_slice_async_rq; | 379 | unsigned int cfq_slice_async_rq; |
382 | unsigned int cfq_slice_idle; | ||
383 | unsigned int cfq_group_idle; | ||
384 | unsigned int cfq_latency; | 380 | unsigned int cfq_latency; |
385 | unsigned int cfq_target_latency; | 381 | u64 cfq_fifo_expire[2]; |
382 | u64 cfq_slice[2]; | ||
383 | u64 cfq_slice_idle; | ||
384 | u64 cfq_group_idle; | ||
385 | u64 cfq_target_latency; | ||
386 | 386 | ||
387 | /* | 387 | /* |
388 | * Fallback dummy cfqq for extreme OOM conditions | 388 | * Fallback dummy cfqq for extreme OOM conditions |
389 | */ | 389 | */ |
390 | struct cfq_queue oom_cfqq; | 390 | struct cfq_queue oom_cfqq; |
391 | 391 | ||
392 | unsigned long last_delayed_sync; | 392 | u64 last_delayed_sync; |
393 | }; | 393 | }; |
394 | 394 | ||
395 | static struct cfq_group *cfq_get_next_cfqg(struct cfq_data *cfqd); | 395 | static struct cfq_group *cfq_get_next_cfqg(struct cfq_data *cfqd); |
@@ -676,7 +676,7 @@ static inline void cfqg_stats_update_io_add(struct cfq_group *cfqg, | |||
676 | } | 676 | } |
677 | 677 | ||
678 | static inline void cfqg_stats_update_timeslice_used(struct cfq_group *cfqg, | 678 | static inline void cfqg_stats_update_timeslice_used(struct cfq_group *cfqg, |
679 | unsigned long time, unsigned long unaccounted_time) | 679 | uint64_t time, unsigned long unaccounted_time) |
680 | { | 680 | { |
681 | blkg_stat_add(&cfqg->stats.time, time); | 681 | blkg_stat_add(&cfqg->stats.time, time); |
682 | #ifdef CONFIG_DEBUG_BLK_CGROUP | 682 | #ifdef CONFIG_DEBUG_BLK_CGROUP |
@@ -788,7 +788,7 @@ static inline void cfqg_put(struct cfq_group *cfqg) { } | |||
788 | static inline void cfqg_stats_update_io_add(struct cfq_group *cfqg, | 788 | static inline void cfqg_stats_update_io_add(struct cfq_group *cfqg, |
789 | struct cfq_group *curr_cfqg, int op, int op_flags) { } | 789 | struct cfq_group *curr_cfqg, int op, int op_flags) { } |
790 | static inline void cfqg_stats_update_timeslice_used(struct cfq_group *cfqg, | 790 | static inline void cfqg_stats_update_timeslice_used(struct cfq_group *cfqg, |
791 | unsigned long time, unsigned long unaccounted_time) { } | 791 | uint64_t time, unsigned long unaccounted_time) { } |
792 | static inline void cfqg_stats_update_io_remove(struct cfq_group *cfqg, int op, | 792 | static inline void cfqg_stats_update_io_remove(struct cfq_group *cfqg, int op, |
793 | int op_flags) { } | 793 | int op_flags) { } |
794 | static inline void cfqg_stats_update_io_merged(struct cfq_group *cfqg, int op, | 794 | static inline void cfqg_stats_update_io_merged(struct cfq_group *cfqg, int op, |
@@ -815,7 +815,7 @@ static inline void cfqg_stats_update_completion(struct cfq_group *cfqg, | |||
815 | static inline bool cfq_io_thinktime_big(struct cfq_data *cfqd, | 815 | static inline bool cfq_io_thinktime_big(struct cfq_data *cfqd, |
816 | struct cfq_ttime *ttime, bool group_idle) | 816 | struct cfq_ttime *ttime, bool group_idle) |
817 | { | 817 | { |
818 | unsigned long slice; | 818 | u64 slice; |
819 | if (!sample_valid(ttime->ttime_samples)) | 819 | if (!sample_valid(ttime->ttime_samples)) |
820 | return false; | 820 | return false; |
821 | if (group_idle) | 821 | if (group_idle) |
@@ -938,17 +938,18 @@ static inline void cfq_schedule_dispatch(struct cfq_data *cfqd) | |||
938 | * if a queue is marked sync and has sync io queued. A sync queue with async | 938 | * if a queue is marked sync and has sync io queued. A sync queue with async |
939 | * io only, should not get full sync slice length. | 939 | * io only, should not get full sync slice length. |
940 | */ | 940 | */ |
941 | static inline int cfq_prio_slice(struct cfq_data *cfqd, bool sync, | 941 | static inline u64 cfq_prio_slice(struct cfq_data *cfqd, bool sync, |
942 | unsigned short prio) | 942 | unsigned short prio) |
943 | { | 943 | { |
944 | const int base_slice = cfqd->cfq_slice[sync]; | 944 | u64 base_slice = cfqd->cfq_slice[sync]; |
945 | u64 slice = div_u64(base_slice, CFQ_SLICE_SCALE); | ||
945 | 946 | ||
946 | WARN_ON(prio >= IOPRIO_BE_NR); | 947 | WARN_ON(prio >= IOPRIO_BE_NR); |
947 | 948 | ||
948 | return base_slice + (base_slice/CFQ_SLICE_SCALE * (4 - prio)); | 949 | return base_slice + (slice * (4 - prio)); |
949 | } | 950 | } |
950 | 951 | ||
951 | static inline int | 952 | static inline u64 |
952 | cfq_prio_to_slice(struct cfq_data *cfqd, struct cfq_queue *cfqq) | 953 | cfq_prio_to_slice(struct cfq_data *cfqd, struct cfq_queue *cfqq) |
953 | { | 954 | { |
954 | return cfq_prio_slice(cfqd, cfq_cfqq_sync(cfqq), cfqq->ioprio); | 955 | return cfq_prio_slice(cfqd, cfq_cfqq_sync(cfqq), cfqq->ioprio); |
@@ -966,15 +967,14 @@ cfq_prio_to_slice(struct cfq_data *cfqd, struct cfq_queue *cfqq) | |||
966 | * | 967 | * |
967 | * The result is also in fixed point w/ CFQ_SERVICE_SHIFT. | 968 | * The result is also in fixed point w/ CFQ_SERVICE_SHIFT. |
968 | */ | 969 | */ |
969 | static inline u64 cfqg_scale_charge(unsigned long charge, | 970 | static inline u64 cfqg_scale_charge(u64 charge, |
970 | unsigned int vfraction) | 971 | unsigned int vfraction) |
971 | { | 972 | { |
972 | u64 c = charge << CFQ_SERVICE_SHIFT; /* make it fixed point */ | 973 | u64 c = charge << CFQ_SERVICE_SHIFT; /* make it fixed point */ |
973 | 974 | ||
974 | /* charge / vfraction */ | 975 | /* charge / vfraction */ |
975 | c <<= CFQ_SERVICE_SHIFT; | 976 | c <<= CFQ_SERVICE_SHIFT; |
976 | do_div(c, vfraction); | 977 | return div_u64(c, vfraction); |
977 | return c; | ||
978 | } | 978 | } |
979 | 979 | ||
980 | static inline u64 max_vdisktime(u64 min_vdisktime, u64 vdisktime) | 980 | static inline u64 max_vdisktime(u64 min_vdisktime, u64 vdisktime) |
@@ -1027,16 +1027,16 @@ static inline unsigned cfq_group_get_avg_queues(struct cfq_data *cfqd, | |||
1027 | return cfqg->busy_queues_avg[rt]; | 1027 | return cfqg->busy_queues_avg[rt]; |
1028 | } | 1028 | } |
1029 | 1029 | ||
1030 | static inline unsigned | 1030 | static inline u64 |
1031 | cfq_group_slice(struct cfq_data *cfqd, struct cfq_group *cfqg) | 1031 | cfq_group_slice(struct cfq_data *cfqd, struct cfq_group *cfqg) |
1032 | { | 1032 | { |
1033 | return cfqd->cfq_target_latency * cfqg->vfraction >> CFQ_SERVICE_SHIFT; | 1033 | return cfqd->cfq_target_latency * cfqg->vfraction >> CFQ_SERVICE_SHIFT; |
1034 | } | 1034 | } |
1035 | 1035 | ||
1036 | static inline unsigned | 1036 | static inline u64 |
1037 | cfq_scaled_cfqq_slice(struct cfq_data *cfqd, struct cfq_queue *cfqq) | 1037 | cfq_scaled_cfqq_slice(struct cfq_data *cfqd, struct cfq_queue *cfqq) |
1038 | { | 1038 | { |
1039 | unsigned slice = cfq_prio_to_slice(cfqd, cfqq); | 1039 | u64 slice = cfq_prio_to_slice(cfqd, cfqq); |
1040 | if (cfqd->cfq_latency) { | 1040 | if (cfqd->cfq_latency) { |
1041 | /* | 1041 | /* |
1042 | * interested queues (we consider only the ones with the same | 1042 | * interested queues (we consider only the ones with the same |
@@ -1044,20 +1044,22 @@ cfq_scaled_cfqq_slice(struct cfq_data *cfqd, struct cfq_queue *cfqq) | |||
1044 | */ | 1044 | */ |
1045 | unsigned iq = cfq_group_get_avg_queues(cfqd, cfqq->cfqg, | 1045 | unsigned iq = cfq_group_get_avg_queues(cfqd, cfqq->cfqg, |
1046 | cfq_class_rt(cfqq)); | 1046 | cfq_class_rt(cfqq)); |
1047 | unsigned sync_slice = cfqd->cfq_slice[1]; | 1047 | u64 sync_slice = cfqd->cfq_slice[1]; |
1048 | unsigned expect_latency = sync_slice * iq; | 1048 | u64 expect_latency = sync_slice * iq; |
1049 | unsigned group_slice = cfq_group_slice(cfqd, cfqq->cfqg); | 1049 | u64 group_slice = cfq_group_slice(cfqd, cfqq->cfqg); |
1050 | 1050 | ||
1051 | if (expect_latency > group_slice) { | 1051 | if (expect_latency > group_slice) { |
1052 | unsigned base_low_slice = 2 * cfqd->cfq_slice_idle; | 1052 | u64 base_low_slice = 2 * cfqd->cfq_slice_idle; |
1053 | u64 low_slice; | ||
1054 | |||
1053 | /* scale low_slice according to IO priority | 1055 | /* scale low_slice according to IO priority |
1054 | * and sync vs async */ | 1056 | * and sync vs async */ |
1055 | unsigned low_slice = | 1057 | low_slice = div64_u64(base_low_slice*slice, sync_slice); |
1056 | min(slice, base_low_slice * slice / sync_slice); | 1058 | low_slice = min(slice, low_slice); |
1057 | /* the adapted slice value is scaled to fit all iqs | 1059 | /* the adapted slice value is scaled to fit all iqs |
1058 | * into the target latency */ | 1060 | * into the target latency */ |
1059 | slice = max(slice * group_slice / expect_latency, | 1061 | slice = div64_u64(slice*group_slice, expect_latency); |
1060 | low_slice); | 1062 | slice = max(slice, low_slice); |
1061 | } | 1063 | } |
1062 | } | 1064 | } |
1063 | return slice; | 1065 | return slice; |
@@ -1066,12 +1068,13 @@ cfq_scaled_cfqq_slice(struct cfq_data *cfqd, struct cfq_queue *cfqq) | |||
1066 | static inline void | 1068 | static inline void |
1067 | cfq_set_prio_slice(struct cfq_data *cfqd, struct cfq_queue *cfqq) | 1069 | cfq_set_prio_slice(struct cfq_data *cfqd, struct cfq_queue *cfqq) |
1068 | { | 1070 | { |
1069 | unsigned slice = cfq_scaled_cfqq_slice(cfqd, cfqq); | 1071 | u64 slice = cfq_scaled_cfqq_slice(cfqd, cfqq); |
1072 | u64 now = ktime_get_ns(); | ||
1070 | 1073 | ||
1071 | cfqq->slice_start = jiffies; | 1074 | cfqq->slice_start = now; |
1072 | cfqq->slice_end = jiffies + slice; | 1075 | cfqq->slice_end = now + slice; |
1073 | cfqq->allocated_slice = slice; | 1076 | cfqq->allocated_slice = slice; |
1074 | cfq_log_cfqq(cfqd, cfqq, "set_slice=%lu", cfqq->slice_end - jiffies); | 1077 | cfq_log_cfqq(cfqd, cfqq, "set_slice=%llu", cfqq->slice_end - now); |
1075 | } | 1078 | } |
1076 | 1079 | ||
1077 | /* | 1080 | /* |
@@ -1083,7 +1086,7 @@ static inline bool cfq_slice_used(struct cfq_queue *cfqq) | |||
1083 | { | 1086 | { |
1084 | if (cfq_cfqq_slice_new(cfqq)) | 1087 | if (cfq_cfqq_slice_new(cfqq)) |
1085 | return false; | 1088 | return false; |
1086 | if (time_before(jiffies, cfqq->slice_end)) | 1089 | if (ktime_get_ns() < cfqq->slice_end) |
1087 | return false; | 1090 | return false; |
1088 | 1091 | ||
1089 | return true; | 1092 | return true; |
@@ -1249,8 +1252,8 @@ cfq_find_next_rq(struct cfq_data *cfqd, struct cfq_queue *cfqq, | |||
1249 | return cfq_choose_req(cfqd, next, prev, blk_rq_pos(last)); | 1252 | return cfq_choose_req(cfqd, next, prev, blk_rq_pos(last)); |
1250 | } | 1253 | } |
1251 | 1254 | ||
1252 | static unsigned long cfq_slice_offset(struct cfq_data *cfqd, | 1255 | static u64 cfq_slice_offset(struct cfq_data *cfqd, |
1253 | struct cfq_queue *cfqq) | 1256 | struct cfq_queue *cfqq) |
1254 | { | 1257 | { |
1255 | /* | 1258 | /* |
1256 | * just an approximation, should be ok. | 1259 | * just an approximation, should be ok. |
@@ -1443,31 +1446,31 @@ cfq_group_notify_queue_del(struct cfq_data *cfqd, struct cfq_group *cfqg) | |||
1443 | cfqg_stats_update_dequeue(cfqg); | 1446 | cfqg_stats_update_dequeue(cfqg); |
1444 | } | 1447 | } |
1445 | 1448 | ||
1446 | static inline unsigned int cfq_cfqq_slice_usage(struct cfq_queue *cfqq, | 1449 | static inline u64 cfq_cfqq_slice_usage(struct cfq_queue *cfqq, |
1447 | unsigned int *unaccounted_time) | 1450 | u64 *unaccounted_time) |
1448 | { | 1451 | { |
1449 | unsigned int slice_used; | 1452 | u64 slice_used; |
1453 | u64 now = ktime_get_ns(); | ||
1450 | 1454 | ||
1451 | /* | 1455 | /* |
1452 | * Queue got expired before even a single request completed or | 1456 | * Queue got expired before even a single request completed or |
1453 | * got expired immediately after first request completion. | 1457 | * got expired immediately after first request completion. |
1454 | */ | 1458 | */ |
1455 | if (!cfqq->slice_start || cfqq->slice_start == jiffies) { | 1459 | if (!cfqq->slice_start || cfqq->slice_start == now) { |
1456 | /* | 1460 | /* |
1457 | * Also charge the seek time incurred to the group, otherwise | 1461 | * Also charge the seek time incurred to the group, otherwise |
1458 | * if there are mutiple queues in the group, each can dispatch | 1462 | * if there are mutiple queues in the group, each can dispatch |
1459 | * a single request on seeky media and cause lots of seek time | 1463 | * a single request on seeky media and cause lots of seek time |
1460 | * and group will never know it. | 1464 | * and group will never know it. |
1461 | */ | 1465 | */ |
1462 | slice_used = max_t(unsigned, (jiffies - cfqq->dispatch_start), | 1466 | slice_used = max_t(u64, (now - cfqq->dispatch_start), 1); |
1463 | 1); | ||
1464 | } else { | 1467 | } else { |
1465 | slice_used = jiffies - cfqq->slice_start; | 1468 | slice_used = now - cfqq->slice_start; |
1466 | if (slice_used > cfqq->allocated_slice) { | 1469 | if (slice_used > cfqq->allocated_slice) { |
1467 | *unaccounted_time = slice_used - cfqq->allocated_slice; | 1470 | *unaccounted_time = slice_used - cfqq->allocated_slice; |
1468 | slice_used = cfqq->allocated_slice; | 1471 | slice_used = cfqq->allocated_slice; |
1469 | } | 1472 | } |
1470 | if (time_after(cfqq->slice_start, cfqq->dispatch_start)) | 1473 | if (cfqq->slice_start > cfqq->dispatch_start) |
1471 | *unaccounted_time += cfqq->slice_start - | 1474 | *unaccounted_time += cfqq->slice_start - |
1472 | cfqq->dispatch_start; | 1475 | cfqq->dispatch_start; |
1473 | } | 1476 | } |
@@ -1479,10 +1482,11 @@ static void cfq_group_served(struct cfq_data *cfqd, struct cfq_group *cfqg, | |||
1479 | struct cfq_queue *cfqq) | 1482 | struct cfq_queue *cfqq) |
1480 | { | 1483 | { |
1481 | struct cfq_rb_root *st = &cfqd->grp_service_tree; | 1484 | struct cfq_rb_root *st = &cfqd->grp_service_tree; |
1482 | unsigned int used_sl, charge, unaccounted_sl = 0; | 1485 | u64 used_sl, charge, unaccounted_sl = 0; |
1483 | int nr_sync = cfqg->nr_cfqq - cfqg_busy_async_queues(cfqd, cfqg) | 1486 | int nr_sync = cfqg->nr_cfqq - cfqg_busy_async_queues(cfqd, cfqg) |
1484 | - cfqg->service_tree_idle.count; | 1487 | - cfqg->service_tree_idle.count; |
1485 | unsigned int vfr; | 1488 | unsigned int vfr; |
1489 | u64 now = ktime_get_ns(); | ||
1486 | 1490 | ||
1487 | BUG_ON(nr_sync < 0); | 1491 | BUG_ON(nr_sync < 0); |
1488 | used_sl = charge = cfq_cfqq_slice_usage(cfqq, &unaccounted_sl); | 1492 | used_sl = charge = cfq_cfqq_slice_usage(cfqq, &unaccounted_sl); |
@@ -1504,9 +1508,8 @@ static void cfq_group_served(struct cfq_data *cfqd, struct cfq_group *cfqg, | |||
1504 | cfq_group_service_tree_add(st, cfqg); | 1508 | cfq_group_service_tree_add(st, cfqg); |
1505 | 1509 | ||
1506 | /* This group is being expired. Save the context */ | 1510 | /* This group is being expired. Save the context */ |
1507 | if (time_after(cfqd->workload_expires, jiffies)) { | 1511 | if (cfqd->workload_expires > now) { |
1508 | cfqg->saved_wl_slice = cfqd->workload_expires | 1512 | cfqg->saved_wl_slice = cfqd->workload_expires - now; |
1509 | - jiffies; | ||
1510 | cfqg->saved_wl_type = cfqd->serving_wl_type; | 1513 | cfqg->saved_wl_type = cfqd->serving_wl_type; |
1511 | cfqg->saved_wl_class = cfqd->serving_wl_class; | 1514 | cfqg->saved_wl_class = cfqd->serving_wl_class; |
1512 | } else | 1515 | } else |
@@ -1515,7 +1518,7 @@ static void cfq_group_served(struct cfq_data *cfqd, struct cfq_group *cfqg, | |||
1515 | cfq_log_cfqg(cfqd, cfqg, "served: vt=%llu min_vt=%llu", cfqg->vdisktime, | 1518 | cfq_log_cfqg(cfqd, cfqg, "served: vt=%llu min_vt=%llu", cfqg->vdisktime, |
1516 | st->min_vdisktime); | 1519 | st->min_vdisktime); |
1517 | cfq_log_cfqq(cfqq->cfqd, cfqq, | 1520 | cfq_log_cfqq(cfqq->cfqd, cfqq, |
1518 | "sl_used=%u disp=%u charge=%u iops=%u sect=%lu", | 1521 | "sl_used=%llu disp=%llu charge=%llu iops=%u sect=%lu", |
1519 | used_sl, cfqq->slice_dispatch, charge, | 1522 | used_sl, cfqq->slice_dispatch, charge, |
1520 | iops_mode(cfqd), cfqq->nr_sectors); | 1523 | iops_mode(cfqd), cfqq->nr_sectors); |
1521 | cfqg_stats_update_timeslice_used(cfqg, used_sl, unaccounted_sl); | 1524 | cfqg_stats_update_timeslice_used(cfqg, used_sl, unaccounted_sl); |
@@ -1538,7 +1541,7 @@ static void cfq_init_cfqg_base(struct cfq_group *cfqg) | |||
1538 | *st = CFQ_RB_ROOT; | 1541 | *st = CFQ_RB_ROOT; |
1539 | RB_CLEAR_NODE(&cfqg->rb_node); | 1542 | RB_CLEAR_NODE(&cfqg->rb_node); |
1540 | 1543 | ||
1541 | cfqg->ttime.last_end_request = jiffies; | 1544 | cfqg->ttime.last_end_request = ktime_get_ns(); |
1542 | } | 1545 | } |
1543 | 1546 | ||
1544 | #ifdef CONFIG_CFQ_GROUP_IOSCHED | 1547 | #ifdef CONFIG_CFQ_GROUP_IOSCHED |
@@ -2221,10 +2224,11 @@ static void cfq_service_tree_add(struct cfq_data *cfqd, struct cfq_queue *cfqq, | |||
2221 | { | 2224 | { |
2222 | struct rb_node **p, *parent; | 2225 | struct rb_node **p, *parent; |
2223 | struct cfq_queue *__cfqq; | 2226 | struct cfq_queue *__cfqq; |
2224 | unsigned long rb_key; | 2227 | u64 rb_key; |
2225 | struct cfq_rb_root *st; | 2228 | struct cfq_rb_root *st; |
2226 | int left; | 2229 | int left; |
2227 | int new_cfqq = 1; | 2230 | int new_cfqq = 1; |
2231 | u64 now = ktime_get_ns(); | ||
2228 | 2232 | ||
2229 | st = st_for(cfqq->cfqg, cfqq_class(cfqq), cfqq_type(cfqq)); | 2233 | st = st_for(cfqq->cfqg, cfqq_class(cfqq), cfqq_type(cfqq)); |
2230 | if (cfq_class_idle(cfqq)) { | 2234 | if (cfq_class_idle(cfqq)) { |
@@ -2234,7 +2238,7 @@ static void cfq_service_tree_add(struct cfq_data *cfqd, struct cfq_queue *cfqq, | |||
2234 | __cfqq = rb_entry(parent, struct cfq_queue, rb_node); | 2238 | __cfqq = rb_entry(parent, struct cfq_queue, rb_node); |
2235 | rb_key += __cfqq->rb_key; | 2239 | rb_key += __cfqq->rb_key; |
2236 | } else | 2240 | } else |
2237 | rb_key += jiffies; | 2241 | rb_key += now; |
2238 | } else if (!add_front) { | 2242 | } else if (!add_front) { |
2239 | /* | 2243 | /* |
2240 | * Get our rb key offset. Subtract any residual slice | 2244 | * Get our rb key offset. Subtract any residual slice |
@@ -2242,13 +2246,13 @@ static void cfq_service_tree_add(struct cfq_data *cfqd, struct cfq_queue *cfqq, | |||
2242 | * count indicates slice overrun, and this should position | 2246 | * count indicates slice overrun, and this should position |
2243 | * the next service time further away in the tree. | 2247 | * the next service time further away in the tree. |
2244 | */ | 2248 | */ |
2245 | rb_key = cfq_slice_offset(cfqd, cfqq) + jiffies; | 2249 | rb_key = cfq_slice_offset(cfqd, cfqq) + now; |
2246 | rb_key -= cfqq->slice_resid; | 2250 | rb_key -= cfqq->slice_resid; |
2247 | cfqq->slice_resid = 0; | 2251 | cfqq->slice_resid = 0; |
2248 | } else { | 2252 | } else { |
2249 | rb_key = -HZ; | 2253 | rb_key = -NSEC_PER_SEC; |
2250 | __cfqq = cfq_rb_first(st); | 2254 | __cfqq = cfq_rb_first(st); |
2251 | rb_key += __cfqq ? __cfqq->rb_key : jiffies; | 2255 | rb_key += __cfqq ? __cfqq->rb_key : now; |
2252 | } | 2256 | } |
2253 | 2257 | ||
2254 | if (!RB_EMPTY_NODE(&cfqq->rb_node)) { | 2258 | if (!RB_EMPTY_NODE(&cfqq->rb_node)) { |
@@ -2274,7 +2278,7 @@ static void cfq_service_tree_add(struct cfq_data *cfqd, struct cfq_queue *cfqq, | |||
2274 | /* | 2278 | /* |
2275 | * sort by key, that represents service time. | 2279 | * sort by key, that represents service time. |
2276 | */ | 2280 | */ |
2277 | if (time_before(rb_key, __cfqq->rb_key)) | 2281 | if (rb_key < __cfqq->rb_key) |
2278 | p = &parent->rb_left; | 2282 | p = &parent->rb_left; |
2279 | else { | 2283 | else { |
2280 | p = &parent->rb_right; | 2284 | p = &parent->rb_right; |
@@ -2574,7 +2578,7 @@ cfq_merged_requests(struct request_queue *q, struct request *rq, | |||
2574 | * reposition in fifo if next is older than rq | 2578 | * reposition in fifo if next is older than rq |
2575 | */ | 2579 | */ |
2576 | if (!list_empty(&rq->queuelist) && !list_empty(&next->queuelist) && | 2580 | if (!list_empty(&rq->queuelist) && !list_empty(&next->queuelist) && |
2577 | time_before(next->fifo_time, rq->fifo_time) && | 2581 | next->fifo_time < rq->fifo_time && |
2578 | cfqq == RQ_CFQQ(next)) { | 2582 | cfqq == RQ_CFQQ(next)) { |
2579 | list_move(&rq->queuelist, &next->queuelist); | 2583 | list_move(&rq->queuelist, &next->queuelist); |
2580 | rq->fifo_time = next->fifo_time; | 2584 | rq->fifo_time = next->fifo_time; |
@@ -2635,7 +2639,7 @@ static void __cfq_set_active_queue(struct cfq_data *cfqd, | |||
2635 | cfqd->serving_wl_class, cfqd->serving_wl_type); | 2639 | cfqd->serving_wl_class, cfqd->serving_wl_type); |
2636 | cfqg_stats_update_avg_queue_size(cfqq->cfqg); | 2640 | cfqg_stats_update_avg_queue_size(cfqq->cfqg); |
2637 | cfqq->slice_start = 0; | 2641 | cfqq->slice_start = 0; |
2638 | cfqq->dispatch_start = jiffies; | 2642 | cfqq->dispatch_start = ktime_get_ns(); |
2639 | cfqq->allocated_slice = 0; | 2643 | cfqq->allocated_slice = 0; |
2640 | cfqq->slice_end = 0; | 2644 | cfqq->slice_end = 0; |
2641 | cfqq->slice_dispatch = 0; | 2645 | cfqq->slice_dispatch = 0; |
@@ -2684,8 +2688,8 @@ __cfq_slice_expired(struct cfq_data *cfqd, struct cfq_queue *cfqq, | |||
2684 | if (cfq_cfqq_slice_new(cfqq)) | 2688 | if (cfq_cfqq_slice_new(cfqq)) |
2685 | cfqq->slice_resid = cfq_scaled_cfqq_slice(cfqd, cfqq); | 2689 | cfqq->slice_resid = cfq_scaled_cfqq_slice(cfqd, cfqq); |
2686 | else | 2690 | else |
2687 | cfqq->slice_resid = cfqq->slice_end - jiffies; | 2691 | cfqq->slice_resid = cfqq->slice_end - ktime_get_ns(); |
2688 | cfq_log_cfqq(cfqd, cfqq, "resid=%ld", cfqq->slice_resid); | 2692 | cfq_log_cfqq(cfqd, cfqq, "resid=%llu", cfqq->slice_resid); |
2689 | } | 2693 | } |
2690 | 2694 | ||
2691 | cfq_group_served(cfqd, cfqq->cfqg, cfqq); | 2695 | cfq_group_served(cfqd, cfqq->cfqg, cfqq); |
@@ -2919,7 +2923,8 @@ static void cfq_arm_slice_timer(struct cfq_data *cfqd) | |||
2919 | struct cfq_queue *cfqq = cfqd->active_queue; | 2923 | struct cfq_queue *cfqq = cfqd->active_queue; |
2920 | struct cfq_rb_root *st = cfqq->service_tree; | 2924 | struct cfq_rb_root *st = cfqq->service_tree; |
2921 | struct cfq_io_cq *cic; | 2925 | struct cfq_io_cq *cic; |
2922 | unsigned long sl, group_idle = 0; | 2926 | u64 sl, group_idle = 0; |
2927 | u64 now = ktime_get_ns(); | ||
2923 | 2928 | ||
2924 | /* | 2929 | /* |
2925 | * SSD device without seek penalty, disable idling. But only do so | 2930 | * SSD device without seek penalty, disable idling. But only do so |
@@ -2962,8 +2967,8 @@ static void cfq_arm_slice_timer(struct cfq_data *cfqd) | |||
2962 | * time slice. | 2967 | * time slice. |
2963 | */ | 2968 | */ |
2964 | if (sample_valid(cic->ttime.ttime_samples) && | 2969 | if (sample_valid(cic->ttime.ttime_samples) && |
2965 | (cfqq->slice_end - jiffies < cic->ttime.ttime_mean)) { | 2970 | (cfqq->slice_end - now < cic->ttime.ttime_mean)) { |
2966 | cfq_log_cfqq(cfqd, cfqq, "Not idling. think_time:%lu", | 2971 | cfq_log_cfqq(cfqd, cfqq, "Not idling. think_time:%llu", |
2967 | cic->ttime.ttime_mean); | 2972 | cic->ttime.ttime_mean); |
2968 | return; | 2973 | return; |
2969 | } | 2974 | } |
@@ -2984,9 +2989,9 @@ static void cfq_arm_slice_timer(struct cfq_data *cfqd) | |||
2984 | else | 2989 | else |
2985 | sl = cfqd->cfq_slice_idle; | 2990 | sl = cfqd->cfq_slice_idle; |
2986 | 2991 | ||
2987 | mod_timer(&cfqd->idle_slice_timer, jiffies + sl); | 2992 | mod_timer(&cfqd->idle_slice_timer, now + sl); |
2988 | cfqg_stats_set_start_idle_time(cfqq->cfqg); | 2993 | cfqg_stats_set_start_idle_time(cfqq->cfqg); |
2989 | cfq_log_cfqq(cfqd, cfqq, "arm_idle: %lu group_idle: %d", sl, | 2994 | cfq_log_cfqq(cfqd, cfqq, "arm_idle: %llu group_idle: %d", sl, |
2990 | group_idle ? 1 : 0); | 2995 | group_idle ? 1 : 0); |
2991 | } | 2996 | } |
2992 | 2997 | ||
@@ -3026,7 +3031,7 @@ static struct request *cfq_check_fifo(struct cfq_queue *cfqq) | |||
3026 | return NULL; | 3031 | return NULL; |
3027 | 3032 | ||
3028 | rq = rq_entry_fifo(cfqq->fifo.next); | 3033 | rq = rq_entry_fifo(cfqq->fifo.next); |
3029 | if (time_before(jiffies, rq->fifo_time)) | 3034 | if (ktime_get_ns() < rq->fifo_time) |
3030 | rq = NULL; | 3035 | rq = NULL; |
3031 | 3036 | ||
3032 | cfq_log_cfqq(cfqq->cfqd, cfqq, "fifo=%p", rq); | 3037 | cfq_log_cfqq(cfqq->cfqd, cfqq, "fifo=%p", rq); |
@@ -3104,14 +3109,14 @@ static enum wl_type_t cfq_choose_wl_type(struct cfq_data *cfqd, | |||
3104 | struct cfq_queue *queue; | 3109 | struct cfq_queue *queue; |
3105 | int i; | 3110 | int i; |
3106 | bool key_valid = false; | 3111 | bool key_valid = false; |
3107 | unsigned long lowest_key = 0; | 3112 | u64 lowest_key = 0; |
3108 | enum wl_type_t cur_best = SYNC_NOIDLE_WORKLOAD; | 3113 | enum wl_type_t cur_best = SYNC_NOIDLE_WORKLOAD; |
3109 | 3114 | ||
3110 | for (i = 0; i <= SYNC_WORKLOAD; ++i) { | 3115 | for (i = 0; i <= SYNC_WORKLOAD; ++i) { |
3111 | /* select the one with lowest rb_key */ | 3116 | /* select the one with lowest rb_key */ |
3112 | queue = cfq_rb_first(st_for(cfqg, wl_class, i)); | 3117 | queue = cfq_rb_first(st_for(cfqg, wl_class, i)); |
3113 | if (queue && | 3118 | if (queue && |
3114 | (!key_valid || time_before(queue->rb_key, lowest_key))) { | 3119 | (!key_valid || queue->rb_key < lowest_key)) { |
3115 | lowest_key = queue->rb_key; | 3120 | lowest_key = queue->rb_key; |
3116 | cur_best = i; | 3121 | cur_best = i; |
3117 | key_valid = true; | 3122 | key_valid = true; |
@@ -3124,11 +3129,12 @@ static enum wl_type_t cfq_choose_wl_type(struct cfq_data *cfqd, | |||
3124 | static void | 3129 | static void |
3125 | choose_wl_class_and_type(struct cfq_data *cfqd, struct cfq_group *cfqg) | 3130 | choose_wl_class_and_type(struct cfq_data *cfqd, struct cfq_group *cfqg) |
3126 | { | 3131 | { |
3127 | unsigned slice; | 3132 | u64 slice; |
3128 | unsigned count; | 3133 | unsigned count; |
3129 | struct cfq_rb_root *st; | 3134 | struct cfq_rb_root *st; |
3130 | unsigned group_slice; | 3135 | u64 group_slice; |
3131 | enum wl_class_t original_class = cfqd->serving_wl_class; | 3136 | enum wl_class_t original_class = cfqd->serving_wl_class; |
3137 | u64 now = ktime_get_ns(); | ||
3132 | 3138 | ||
3133 | /* Choose next priority. RT > BE > IDLE */ | 3139 | /* Choose next priority. RT > BE > IDLE */ |
3134 | if (cfq_group_busy_queues_wl(RT_WORKLOAD, cfqd, cfqg)) | 3140 | if (cfq_group_busy_queues_wl(RT_WORKLOAD, cfqd, cfqg)) |
@@ -3137,7 +3143,7 @@ choose_wl_class_and_type(struct cfq_data *cfqd, struct cfq_group *cfqg) | |||
3137 | cfqd->serving_wl_class = BE_WORKLOAD; | 3143 | cfqd->serving_wl_class = BE_WORKLOAD; |
3138 | else { | 3144 | else { |
3139 | cfqd->serving_wl_class = IDLE_WORKLOAD; | 3145 | cfqd->serving_wl_class = IDLE_WORKLOAD; |
3140 | cfqd->workload_expires = jiffies + 1; | 3146 | cfqd->workload_expires = now + jiffies_to_nsecs(1); |
3141 | return; | 3147 | return; |
3142 | } | 3148 | } |
3143 | 3149 | ||
@@ -3155,7 +3161,7 @@ choose_wl_class_and_type(struct cfq_data *cfqd, struct cfq_group *cfqg) | |||
3155 | /* | 3161 | /* |
3156 | * check workload expiration, and that we still have other queues ready | 3162 | * check workload expiration, and that we still have other queues ready |
3157 | */ | 3163 | */ |
3158 | if (count && !time_after(jiffies, cfqd->workload_expires)) | 3164 | if (count && !(now > cfqd->workload_expires)) |
3159 | return; | 3165 | return; |
3160 | 3166 | ||
3161 | new_workload: | 3167 | new_workload: |
@@ -3172,13 +3178,13 @@ new_workload: | |||
3172 | */ | 3178 | */ |
3173 | group_slice = cfq_group_slice(cfqd, cfqg); | 3179 | group_slice = cfq_group_slice(cfqd, cfqg); |
3174 | 3180 | ||
3175 | slice = group_slice * count / | 3181 | slice = div_u64(group_slice * count, |
3176 | max_t(unsigned, cfqg->busy_queues_avg[cfqd->serving_wl_class], | 3182 | max_t(unsigned, cfqg->busy_queues_avg[cfqd->serving_wl_class], |
3177 | cfq_group_busy_queues_wl(cfqd->serving_wl_class, cfqd, | 3183 | cfq_group_busy_queues_wl(cfqd->serving_wl_class, cfqd, |
3178 | cfqg)); | 3184 | cfqg))); |
3179 | 3185 | ||
3180 | if (cfqd->serving_wl_type == ASYNC_WORKLOAD) { | 3186 | if (cfqd->serving_wl_type == ASYNC_WORKLOAD) { |
3181 | unsigned int tmp; | 3187 | u64 tmp; |
3182 | 3188 | ||
3183 | /* | 3189 | /* |
3184 | * Async queues are currently system wide. Just taking | 3190 | * Async queues are currently system wide. Just taking |
@@ -3189,19 +3195,19 @@ new_workload: | |||
3189 | */ | 3195 | */ |
3190 | tmp = cfqd->cfq_target_latency * | 3196 | tmp = cfqd->cfq_target_latency * |
3191 | cfqg_busy_async_queues(cfqd, cfqg); | 3197 | cfqg_busy_async_queues(cfqd, cfqg); |
3192 | tmp = tmp/cfqd->busy_queues; | 3198 | tmp = div_u64(tmp, cfqd->busy_queues); |
3193 | slice = min_t(unsigned, slice, tmp); | 3199 | slice = min_t(u64, slice, tmp); |
3194 | 3200 | ||
3195 | /* async workload slice is scaled down according to | 3201 | /* async workload slice is scaled down according to |
3196 | * the sync/async slice ratio. */ | 3202 | * the sync/async slice ratio. */ |
3197 | slice = slice * cfqd->cfq_slice[0] / cfqd->cfq_slice[1]; | 3203 | slice = div64_u64(slice*cfqd->cfq_slice[0], cfqd->cfq_slice[1]); |
3198 | } else | 3204 | } else |
3199 | /* sync workload slice is at least 2 * cfq_slice_idle */ | 3205 | /* sync workload slice is at least 2 * cfq_slice_idle */ |
3200 | slice = max(slice, 2 * cfqd->cfq_slice_idle); | 3206 | slice = max(slice, 2 * cfqd->cfq_slice_idle); |
3201 | 3207 | ||
3202 | slice = max_t(unsigned, slice, CFQ_MIN_TT); | 3208 | slice = max_t(u64, slice, CFQ_MIN_TT); |
3203 | cfq_log(cfqd, "workload slice:%d", slice); | 3209 | cfq_log(cfqd, "workload slice:%llu", slice); |
3204 | cfqd->workload_expires = jiffies + slice; | 3210 | cfqd->workload_expires = now + slice; |
3205 | } | 3211 | } |
3206 | 3212 | ||
3207 | static struct cfq_group *cfq_get_next_cfqg(struct cfq_data *cfqd) | 3213 | static struct cfq_group *cfq_get_next_cfqg(struct cfq_data *cfqd) |
@@ -3219,16 +3225,17 @@ static struct cfq_group *cfq_get_next_cfqg(struct cfq_data *cfqd) | |||
3219 | static void cfq_choose_cfqg(struct cfq_data *cfqd) | 3225 | static void cfq_choose_cfqg(struct cfq_data *cfqd) |
3220 | { | 3226 | { |
3221 | struct cfq_group *cfqg = cfq_get_next_cfqg(cfqd); | 3227 | struct cfq_group *cfqg = cfq_get_next_cfqg(cfqd); |
3228 | u64 now = ktime_get_ns(); | ||
3222 | 3229 | ||
3223 | cfqd->serving_group = cfqg; | 3230 | cfqd->serving_group = cfqg; |
3224 | 3231 | ||
3225 | /* Restore the workload type data */ | 3232 | /* Restore the workload type data */ |
3226 | if (cfqg->saved_wl_slice) { | 3233 | if (cfqg->saved_wl_slice) { |
3227 | cfqd->workload_expires = jiffies + cfqg->saved_wl_slice; | 3234 | cfqd->workload_expires = now + cfqg->saved_wl_slice; |
3228 | cfqd->serving_wl_type = cfqg->saved_wl_type; | 3235 | cfqd->serving_wl_type = cfqg->saved_wl_type; |
3229 | cfqd->serving_wl_class = cfqg->saved_wl_class; | 3236 | cfqd->serving_wl_class = cfqg->saved_wl_class; |
3230 | } else | 3237 | } else |
3231 | cfqd->workload_expires = jiffies - 1; | 3238 | cfqd->workload_expires = now - 1; |
3232 | 3239 | ||
3233 | choose_wl_class_and_type(cfqd, cfqg); | 3240 | choose_wl_class_and_type(cfqd, cfqg); |
3234 | } | 3241 | } |
@@ -3240,6 +3247,7 @@ static void cfq_choose_cfqg(struct cfq_data *cfqd) | |||
3240 | static struct cfq_queue *cfq_select_queue(struct cfq_data *cfqd) | 3247 | static struct cfq_queue *cfq_select_queue(struct cfq_data *cfqd) |
3241 | { | 3248 | { |
3242 | struct cfq_queue *cfqq, *new_cfqq = NULL; | 3249 | struct cfq_queue *cfqq, *new_cfqq = NULL; |
3250 | u64 now = ktime_get_ns(); | ||
3243 | 3251 | ||
3244 | cfqq = cfqd->active_queue; | 3252 | cfqq = cfqd->active_queue; |
3245 | if (!cfqq) | 3253 | if (!cfqq) |
@@ -3311,7 +3319,7 @@ static struct cfq_queue *cfq_select_queue(struct cfq_data *cfqd) | |||
3311 | **/ | 3319 | **/ |
3312 | if (CFQQ_SEEKY(cfqq) && cfq_cfqq_idle_window(cfqq) && | 3320 | if (CFQQ_SEEKY(cfqq) && cfq_cfqq_idle_window(cfqq) && |
3313 | (cfq_cfqq_slice_new(cfqq) || | 3321 | (cfq_cfqq_slice_new(cfqq) || |
3314 | (cfqq->slice_end - jiffies > jiffies - cfqq->slice_start))) { | 3322 | (cfqq->slice_end - now > now - cfqq->slice_start))) { |
3315 | cfq_clear_cfqq_deep(cfqq); | 3323 | cfq_clear_cfqq_deep(cfqq); |
3316 | cfq_clear_cfqq_idle_window(cfqq); | 3324 | cfq_clear_cfqq_idle_window(cfqq); |
3317 | } | 3325 | } |
@@ -3389,11 +3397,12 @@ static int cfq_forced_dispatch(struct cfq_data *cfqd) | |||
3389 | static inline bool cfq_slice_used_soon(struct cfq_data *cfqd, | 3397 | static inline bool cfq_slice_used_soon(struct cfq_data *cfqd, |
3390 | struct cfq_queue *cfqq) | 3398 | struct cfq_queue *cfqq) |
3391 | { | 3399 | { |
3400 | u64 now = ktime_get_ns(); | ||
3401 | |||
3392 | /* the queue hasn't finished any request, can't estimate */ | 3402 | /* the queue hasn't finished any request, can't estimate */ |
3393 | if (cfq_cfqq_slice_new(cfqq)) | 3403 | if (cfq_cfqq_slice_new(cfqq)) |
3394 | return true; | 3404 | return true; |
3395 | if (time_after(jiffies + cfqd->cfq_slice_idle * cfqq->dispatched, | 3405 | if (now + cfqd->cfq_slice_idle * cfqq->dispatched > cfqq->slice_end) |
3396 | cfqq->slice_end)) | ||
3397 | return true; | 3406 | return true; |
3398 | 3407 | ||
3399 | return false; | 3408 | return false; |
@@ -3468,10 +3477,10 @@ static bool cfq_may_dispatch(struct cfq_data *cfqd, struct cfq_queue *cfqq) | |||
3468 | * based on the last sync IO we serviced | 3477 | * based on the last sync IO we serviced |
3469 | */ | 3478 | */ |
3470 | if (!cfq_cfqq_sync(cfqq) && cfqd->cfq_latency) { | 3479 | if (!cfq_cfqq_sync(cfqq) && cfqd->cfq_latency) { |
3471 | unsigned long last_sync = jiffies - cfqd->last_delayed_sync; | 3480 | u64 last_sync = ktime_get_ns() - cfqd->last_delayed_sync; |
3472 | unsigned int depth; | 3481 | unsigned int depth; |
3473 | 3482 | ||
3474 | depth = last_sync / cfqd->cfq_slice[1]; | 3483 | depth = div64_u64(last_sync, cfqd->cfq_slice[1]); |
3475 | if (!depth && !cfqq->dispatched) | 3484 | if (!depth && !cfqq->dispatched) |
3476 | depth = 1; | 3485 | depth = 1; |
3477 | if (depth < max_dispatch) | 3486 | if (depth < max_dispatch) |
@@ -3554,7 +3563,7 @@ static int cfq_dispatch_requests(struct request_queue *q, int force) | |||
3554 | if (cfqd->busy_queues > 1 && ((!cfq_cfqq_sync(cfqq) && | 3563 | if (cfqd->busy_queues > 1 && ((!cfq_cfqq_sync(cfqq) && |
3555 | cfqq->slice_dispatch >= cfq_prio_to_maxrq(cfqd, cfqq)) || | 3564 | cfqq->slice_dispatch >= cfq_prio_to_maxrq(cfqd, cfqq)) || |
3556 | cfq_class_idle(cfqq))) { | 3565 | cfq_class_idle(cfqq))) { |
3557 | cfqq->slice_end = jiffies + 1; | 3566 | cfqq->slice_end = ktime_get_ns() + 1; |
3558 | cfq_slice_expired(cfqd, 0); | 3567 | cfq_slice_expired(cfqd, 0); |
3559 | } | 3568 | } |
3560 | 3569 | ||
@@ -3632,7 +3641,7 @@ static void cfq_init_icq(struct io_cq *icq) | |||
3632 | { | 3641 | { |
3633 | struct cfq_io_cq *cic = icq_to_cic(icq); | 3642 | struct cfq_io_cq *cic = icq_to_cic(icq); |
3634 | 3643 | ||
3635 | cic->ttime.last_end_request = jiffies; | 3644 | cic->ttime.last_end_request = ktime_get_ns(); |
3636 | } | 3645 | } |
3637 | 3646 | ||
3638 | static void cfq_exit_icq(struct io_cq *icq) | 3647 | static void cfq_exit_icq(struct io_cq *icq) |
@@ -3853,14 +3862,15 @@ out: | |||
3853 | } | 3862 | } |
3854 | 3863 | ||
3855 | static void | 3864 | static void |
3856 | __cfq_update_io_thinktime(struct cfq_ttime *ttime, unsigned long slice_idle) | 3865 | __cfq_update_io_thinktime(struct cfq_ttime *ttime, u64 slice_idle) |
3857 | { | 3866 | { |
3858 | unsigned long elapsed = jiffies - ttime->last_end_request; | 3867 | u64 elapsed = ktime_get_ns() - ttime->last_end_request; |
3859 | elapsed = min(elapsed, 2UL * slice_idle); | 3868 | elapsed = min(elapsed, 2UL * slice_idle); |
3860 | 3869 | ||
3861 | ttime->ttime_samples = (7*ttime->ttime_samples + 256) / 8; | 3870 | ttime->ttime_samples = (7*ttime->ttime_samples + 256) / 8; |
3862 | ttime->ttime_total = (7*ttime->ttime_total + 256*elapsed) / 8; | 3871 | ttime->ttime_total = div_u64(7*ttime->ttime_total + 256*elapsed, 8); |
3863 | ttime->ttime_mean = (ttime->ttime_total + 128) / ttime->ttime_samples; | 3872 | ttime->ttime_mean = div64_ul(ttime->ttime_total + 128, |
3873 | ttime->ttime_samples); | ||
3864 | } | 3874 | } |
3865 | 3875 | ||
3866 | static void | 3876 | static void |
@@ -4113,7 +4123,7 @@ static void cfq_insert_request(struct request_queue *q, struct request *rq) | |||
4113 | cfq_log_cfqq(cfqd, cfqq, "insert_request"); | 4123 | cfq_log_cfqq(cfqd, cfqq, "insert_request"); |
4114 | cfq_init_prio_data(cfqq, RQ_CIC(rq)); | 4124 | cfq_init_prio_data(cfqq, RQ_CIC(rq)); |
4115 | 4125 | ||
4116 | rq->fifo_time = jiffies + cfqd->cfq_fifo_expire[rq_is_sync(rq)]; | 4126 | rq->fifo_time = ktime_get_ns() + cfqd->cfq_fifo_expire[rq_is_sync(rq)]; |
4117 | list_add_tail(&rq->queuelist, &cfqq->fifo); | 4127 | list_add_tail(&rq->queuelist, &cfqq->fifo); |
4118 | cfq_add_rq_rb(rq); | 4128 | cfq_add_rq_rb(rq); |
4119 | cfqg_stats_update_io_add(RQ_CFQG(rq), cfqd->serving_group, req_op(rq), | 4129 | cfqg_stats_update_io_add(RQ_CFQG(rq), cfqd->serving_group, req_op(rq), |
@@ -4161,6 +4171,7 @@ static void cfq_update_hw_tag(struct cfq_data *cfqd) | |||
4161 | static bool cfq_should_wait_busy(struct cfq_data *cfqd, struct cfq_queue *cfqq) | 4171 | static bool cfq_should_wait_busy(struct cfq_data *cfqd, struct cfq_queue *cfqq) |
4162 | { | 4172 | { |
4163 | struct cfq_io_cq *cic = cfqd->active_cic; | 4173 | struct cfq_io_cq *cic = cfqd->active_cic; |
4174 | u64 now = ktime_get_ns(); | ||
4164 | 4175 | ||
4165 | /* If the queue already has requests, don't wait */ | 4176 | /* If the queue already has requests, don't wait */ |
4166 | if (!RB_EMPTY_ROOT(&cfqq->sort_list)) | 4177 | if (!RB_EMPTY_ROOT(&cfqq->sort_list)) |
@@ -4179,7 +4190,7 @@ static bool cfq_should_wait_busy(struct cfq_data *cfqd, struct cfq_queue *cfqq) | |||
4179 | 4190 | ||
4180 | /* if slice left is less than think time, wait busy */ | 4191 | /* if slice left is less than think time, wait busy */ |
4181 | if (cic && sample_valid(cic->ttime.ttime_samples) | 4192 | if (cic && sample_valid(cic->ttime.ttime_samples) |
4182 | && (cfqq->slice_end - jiffies < cic->ttime.ttime_mean)) | 4193 | && (cfqq->slice_end - now < cic->ttime.ttime_mean)) |
4183 | return true; | 4194 | return true; |
4184 | 4195 | ||
4185 | /* | 4196 | /* |
@@ -4189,7 +4200,7 @@ static bool cfq_should_wait_busy(struct cfq_data *cfqd, struct cfq_queue *cfqq) | |||
4189 | * case where think time is less than a jiffy, mark the queue wait | 4200 | * case where think time is less than a jiffy, mark the queue wait |
4190 | * busy if only 1 jiffy is left in the slice. | 4201 | * busy if only 1 jiffy is left in the slice. |
4191 | */ | 4202 | */ |
4192 | if (cfqq->slice_end - jiffies == 1) | 4203 | if (cfqq->slice_end - now <= jiffies_to_nsecs(1)) |
4193 | return true; | 4204 | return true; |
4194 | 4205 | ||
4195 | return false; | 4206 | return false; |
@@ -4200,9 +4211,8 @@ static void cfq_completed_request(struct request_queue *q, struct request *rq) | |||
4200 | struct cfq_queue *cfqq = RQ_CFQQ(rq); | 4211 | struct cfq_queue *cfqq = RQ_CFQQ(rq); |
4201 | struct cfq_data *cfqd = cfqq->cfqd; | 4212 | struct cfq_data *cfqd = cfqq->cfqd; |
4202 | const int sync = rq_is_sync(rq); | 4213 | const int sync = rq_is_sync(rq); |
4203 | unsigned long now; | 4214 | u64 now = ktime_get_ns(); |
4204 | 4215 | ||
4205 | now = jiffies; | ||
4206 | cfq_log_cfqq(cfqd, cfqq, "complete rqnoidle %d", | 4216 | cfq_log_cfqq(cfqd, cfqq, "complete rqnoidle %d", |
4207 | !!(rq->cmd_flags & REQ_NOIDLE)); | 4217 | !!(rq->cmd_flags & REQ_NOIDLE)); |
4208 | 4218 | ||
@@ -4231,7 +4241,7 @@ static void cfq_completed_request(struct request_queue *q, struct request *rq) | |||
4231 | cfqq_type(cfqq)); | 4241 | cfqq_type(cfqq)); |
4232 | 4242 | ||
4233 | st->ttime.last_end_request = now; | 4243 | st->ttime.last_end_request = now; |
4234 | if (!time_after(rq->start_time + cfqd->cfq_fifo_expire[1], now)) | 4244 | if (!(rq->start_time + cfqd->cfq_fifo_expire[1] > now)) |
4235 | cfqd->last_delayed_sync = now; | 4245 | cfqd->last_delayed_sync = now; |
4236 | } | 4246 | } |
4237 | 4247 | ||
@@ -4256,10 +4266,10 @@ static void cfq_completed_request(struct request_queue *q, struct request *rq) | |||
4256 | * the queue. | 4266 | * the queue. |
4257 | */ | 4267 | */ |
4258 | if (cfq_should_wait_busy(cfqd, cfqq)) { | 4268 | if (cfq_should_wait_busy(cfqd, cfqq)) { |
4259 | unsigned long extend_sl = cfqd->cfq_slice_idle; | 4269 | u64 extend_sl = cfqd->cfq_slice_idle; |
4260 | if (!cfqd->cfq_slice_idle) | 4270 | if (!cfqd->cfq_slice_idle) |
4261 | extend_sl = cfqd->cfq_group_idle; | 4271 | extend_sl = cfqd->cfq_group_idle; |
4262 | cfqq->slice_end = jiffies + extend_sl; | 4272 | cfqq->slice_end = now + extend_sl; |
4263 | cfq_mark_cfqq_wait_busy(cfqq); | 4273 | cfq_mark_cfqq_wait_busy(cfqq); |
4264 | cfq_log_cfqq(cfqd, cfqq, "will busy wait"); | 4274 | cfq_log_cfqq(cfqd, cfqq, "will busy wait"); |
4265 | } | 4275 | } |
@@ -4618,7 +4628,7 @@ static int cfq_init_queue(struct request_queue *q, struct elevator_type *e) | |||
4618 | * we optimistically start assuming sync ops weren't delayed in last | 4628 | * we optimistically start assuming sync ops weren't delayed in last |
4619 | * second, in order to have larger depth for async operations. | 4629 | * second, in order to have larger depth for async operations. |
4620 | */ | 4630 | */ |
4621 | cfqd->last_delayed_sync = jiffies - HZ; | 4631 | cfqd->last_delayed_sync = ktime_get_ns() - NSEC_PER_SEC; |
4622 | return 0; | 4632 | return 0; |
4623 | 4633 | ||
4624 | out_free: | 4634 | out_free: |
@@ -4661,9 +4671,9 @@ cfq_var_store(unsigned int *var, const char *page, size_t count) | |||
4661 | static ssize_t __FUNC(struct elevator_queue *e, char *page) \ | 4671 | static ssize_t __FUNC(struct elevator_queue *e, char *page) \ |
4662 | { \ | 4672 | { \ |
4663 | struct cfq_data *cfqd = e->elevator_data; \ | 4673 | struct cfq_data *cfqd = e->elevator_data; \ |
4664 | unsigned int __data = __VAR; \ | 4674 | u64 __data = __VAR; \ |
4665 | if (__CONV) \ | 4675 | if (__CONV) \ |
4666 | __data = jiffies_to_msecs(__data); \ | 4676 | __data = div_u64(__data, NSEC_PER_MSEC); \ |
4667 | return cfq_var_show(__data, (page)); \ | 4677 | return cfq_var_show(__data, (page)); \ |
4668 | } | 4678 | } |
4669 | SHOW_FUNCTION(cfq_quantum_show, cfqd->cfq_quantum, 0); | 4679 | SHOW_FUNCTION(cfq_quantum_show, cfqd->cfq_quantum, 0); |
@@ -4691,7 +4701,7 @@ static ssize_t __FUNC(struct elevator_queue *e, const char *page, size_t count) | |||
4691 | else if (__data > (MAX)) \ | 4701 | else if (__data > (MAX)) \ |
4692 | __data = (MAX); \ | 4702 | __data = (MAX); \ |
4693 | if (__CONV) \ | 4703 | if (__CONV) \ |
4694 | *(__PTR) = msecs_to_jiffies(__data); \ | 4704 | *(__PTR) = (u64)__data * NSEC_PER_MSEC; \ |
4695 | else \ | 4705 | else \ |
4696 | *(__PTR) = __data; \ | 4706 | *(__PTR) = __data; \ |
4697 | return ret; \ | 4707 | return ret; \ |
@@ -4785,18 +4795,7 @@ static int __init cfq_init(void) | |||
4785 | { | 4795 | { |
4786 | int ret; | 4796 | int ret; |
4787 | 4797 | ||
4788 | /* | ||
4789 | * could be 0 on HZ < 1000 setups | ||
4790 | */ | ||
4791 | if (!cfq_slice_async) | ||
4792 | cfq_slice_async = 1; | ||
4793 | if (!cfq_slice_idle) | ||
4794 | cfq_slice_idle = 1; | ||
4795 | |||
4796 | #ifdef CONFIG_CFQ_GROUP_IOSCHED | 4798 | #ifdef CONFIG_CFQ_GROUP_IOSCHED |
4797 | if (!cfq_group_idle) | ||
4798 | cfq_group_idle = 1; | ||
4799 | |||
4800 | ret = blkcg_policy_register(&blkcg_policy_cfq); | 4799 | ret = blkcg_policy_register(&blkcg_policy_cfq); |
4801 | if (ret) | 4800 | if (ret) |
4802 | return ret; | 4801 | return ret; |