aboutsummaryrefslogtreecommitdiffstats
path: root/block
diff options
context:
space:
mode:
authorJeff Moyer <jmoyer@redhat.com>2016-06-08 10:55:34 -0400
committerJens Axboe <axboe@fb.com>2016-06-08 10:55:34 -0400
commit9a7f38c42c2b92391d9dabaf9f51df7cfe5608e4 (patch)
treebe1306256203e951448399175af96a53175cef9a /block
parent28a8f0d317bf225ff15008f5dd66ae16242dd843 (diff)
cfq-iosched: Convert from jiffies to nanoseconds
Convert all time-keeping in CFQ IO scheduler from jiffies to nanoseconds so that we can later make the intervals more fine-grained than jiffies. One jiffie is several miliseconds and even for today's rotating disks that is a noticeable amount of time and thus we leave disk unnecessarily idle. Signed-off-by: Jeff Moyer <jmoyer@redhat.com> Signed-off-by: Jan Kara <jack@suse.cz> Signed-off-by: Jens Axboe <axboe@fb.com>
Diffstat (limited to 'block')
-rw-r--r--block/cfq-iosched.c273
1 files changed, 136 insertions, 137 deletions
diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c
index b1154861f4c9..9c2e82c1ea88 100644
--- a/block/cfq-iosched.c
+++ b/block/cfq-iosched.c
@@ -10,7 +10,7 @@
10#include <linux/slab.h> 10#include <linux/slab.h>
11#include <linux/blkdev.h> 11#include <linux/blkdev.h>
12#include <linux/elevator.h> 12#include <linux/elevator.h>
13#include <linux/jiffies.h> 13#include <linux/ktime.h>
14#include <linux/rbtree.h> 14#include <linux/rbtree.h>
15#include <linux/ioprio.h> 15#include <linux/ioprio.h>
16#include <linux/blktrace_api.h> 16#include <linux/blktrace_api.h>
@@ -22,28 +22,28 @@
22 */ 22 */
23/* max queue in one round of service */ 23/* max queue in one round of service */
24static const int cfq_quantum = 8; 24static const int cfq_quantum = 8;
25static const int cfq_fifo_expire[2] = { HZ / 4, HZ / 8 }; 25static const u64 cfq_fifo_expire[2] = { NSEC_PER_SEC / 4, NSEC_PER_SEC / 8 };
26/* maximum backwards seek, in KiB */ 26/* maximum backwards seek, in KiB */
27static const int cfq_back_max = 16 * 1024; 27static const int cfq_back_max = 16 * 1024;
28/* penalty of a backwards seek */ 28/* penalty of a backwards seek */
29static const int cfq_back_penalty = 2; 29static const int cfq_back_penalty = 2;
30static const int cfq_slice_sync = HZ / 10; 30static const u64 cfq_slice_sync = NSEC_PER_SEC / 10;
31static int cfq_slice_async = HZ / 25; 31static u64 cfq_slice_async = NSEC_PER_SEC / 25;
32static const int cfq_slice_async_rq = 2; 32static const int cfq_slice_async_rq = 2;
33static int cfq_slice_idle = HZ / 125; 33static u64 cfq_slice_idle = NSEC_PER_SEC / 125;
34static int cfq_group_idle = HZ / 125; 34static u64 cfq_group_idle = NSEC_PER_SEC / 125;
35static const int cfq_target_latency = HZ * 3/10; /* 300 ms */ 35static const u64 cfq_target_latency = (u64)NSEC_PER_SEC * 3/10; /* 300 ms */
36static const int cfq_hist_divisor = 4; 36static const int cfq_hist_divisor = 4;
37 37
38/* 38/*
39 * offset from end of service tree 39 * offset from end of service tree
40 */ 40 */
41#define CFQ_IDLE_DELAY (HZ / 5) 41#define CFQ_IDLE_DELAY (NSEC_PER_SEC / 5)
42 42
43/* 43/*
44 * below this threshold, we consider thinktime immediate 44 * below this threshold, we consider thinktime immediate
45 */ 45 */
46#define CFQ_MIN_TT (2) 46#define CFQ_MIN_TT (2 * NSEC_PER_SEC / HZ)
47 47
48#define CFQ_SLICE_SCALE (5) 48#define CFQ_SLICE_SCALE (5)
49#define CFQ_HW_QUEUE_MIN (5) 49#define CFQ_HW_QUEUE_MIN (5)
@@ -73,11 +73,11 @@ static struct kmem_cache *cfq_pool;
73#define CFQ_WEIGHT_LEGACY_MAX 1000 73#define CFQ_WEIGHT_LEGACY_MAX 1000
74 74
75struct cfq_ttime { 75struct cfq_ttime {
76 unsigned long last_end_request; 76 u64 last_end_request;
77 77
78 unsigned long ttime_total; 78 u64 ttime_total;
79 u64 ttime_mean;
79 unsigned long ttime_samples; 80 unsigned long ttime_samples;
80 unsigned long ttime_mean;
81}; 81};
82 82
83/* 83/*
@@ -94,7 +94,7 @@ struct cfq_rb_root {
94 struct cfq_ttime ttime; 94 struct cfq_ttime ttime;
95}; 95};
96#define CFQ_RB_ROOT (struct cfq_rb_root) { .rb = RB_ROOT, \ 96#define CFQ_RB_ROOT (struct cfq_rb_root) { .rb = RB_ROOT, \
97 .ttime = {.last_end_request = jiffies,},} 97 .ttime = {.last_end_request = ktime_get_ns(),},}
98 98
99/* 99/*
100 * Per process-grouping structure 100 * Per process-grouping structure
@@ -109,7 +109,7 @@ struct cfq_queue {
109 /* service_tree member */ 109 /* service_tree member */
110 struct rb_node rb_node; 110 struct rb_node rb_node;
111 /* service_tree key */ 111 /* service_tree key */
112 unsigned long rb_key; 112 u64 rb_key;
113 /* prio tree member */ 113 /* prio tree member */
114 struct rb_node p_node; 114 struct rb_node p_node;
115 /* prio tree root we belong to, if any */ 115 /* prio tree root we belong to, if any */
@@ -126,13 +126,13 @@ struct cfq_queue {
126 struct list_head fifo; 126 struct list_head fifo;
127 127
128 /* time when queue got scheduled in to dispatch first request. */ 128 /* time when queue got scheduled in to dispatch first request. */
129 unsigned long dispatch_start; 129 u64 dispatch_start;
130 unsigned int allocated_slice; 130 u64 allocated_slice;
131 unsigned int slice_dispatch; 131 u64 slice_dispatch;
132 /* time when first request from queue completed and slice started. */ 132 /* time when first request from queue completed and slice started. */
133 unsigned long slice_start; 133 u64 slice_start;
134 unsigned long slice_end; 134 u64 slice_end;
135 long slice_resid; 135 u64 slice_resid;
136 136
137 /* pending priority requests */ 137 /* pending priority requests */
138 int prio_pending; 138 int prio_pending;
@@ -290,7 +290,7 @@ struct cfq_group {
290 struct cfq_rb_root service_trees[2][3]; 290 struct cfq_rb_root service_trees[2][3];
291 struct cfq_rb_root service_tree_idle; 291 struct cfq_rb_root service_tree_idle;
292 292
293 unsigned long saved_wl_slice; 293 u64 saved_wl_slice;
294 enum wl_type_t saved_wl_type; 294 enum wl_type_t saved_wl_type;
295 enum wl_class_t saved_wl_class; 295 enum wl_class_t saved_wl_class;
296 296
@@ -329,7 +329,7 @@ struct cfq_data {
329 */ 329 */
330 enum wl_class_t serving_wl_class; 330 enum wl_class_t serving_wl_class;
331 enum wl_type_t serving_wl_type; 331 enum wl_type_t serving_wl_type;
332 unsigned long workload_expires; 332 u64 workload_expires;
333 struct cfq_group *serving_group; 333 struct cfq_group *serving_group;
334 334
335 /* 335 /*
@@ -374,22 +374,22 @@ struct cfq_data {
374 * tunables, see top of file 374 * tunables, see top of file
375 */ 375 */
376 unsigned int cfq_quantum; 376 unsigned int cfq_quantum;
377 unsigned int cfq_fifo_expire[2];
378 unsigned int cfq_back_penalty; 377 unsigned int cfq_back_penalty;
379 unsigned int cfq_back_max; 378 unsigned int cfq_back_max;
380 unsigned int cfq_slice[2];
381 unsigned int cfq_slice_async_rq; 379 unsigned int cfq_slice_async_rq;
382 unsigned int cfq_slice_idle;
383 unsigned int cfq_group_idle;
384 unsigned int cfq_latency; 380 unsigned int cfq_latency;
385 unsigned int cfq_target_latency; 381 u64 cfq_fifo_expire[2];
382 u64 cfq_slice[2];
383 u64 cfq_slice_idle;
384 u64 cfq_group_idle;
385 u64 cfq_target_latency;
386 386
387 /* 387 /*
388 * Fallback dummy cfqq for extreme OOM conditions 388 * Fallback dummy cfqq for extreme OOM conditions
389 */ 389 */
390 struct cfq_queue oom_cfqq; 390 struct cfq_queue oom_cfqq;
391 391
392 unsigned long last_delayed_sync; 392 u64 last_delayed_sync;
393}; 393};
394 394
395static struct cfq_group *cfq_get_next_cfqg(struct cfq_data *cfqd); 395static struct cfq_group *cfq_get_next_cfqg(struct cfq_data *cfqd);
@@ -676,7 +676,7 @@ static inline void cfqg_stats_update_io_add(struct cfq_group *cfqg,
676} 676}
677 677
678static inline void cfqg_stats_update_timeslice_used(struct cfq_group *cfqg, 678static inline void cfqg_stats_update_timeslice_used(struct cfq_group *cfqg,
679 unsigned long time, unsigned long unaccounted_time) 679 uint64_t time, unsigned long unaccounted_time)
680{ 680{
681 blkg_stat_add(&cfqg->stats.time, time); 681 blkg_stat_add(&cfqg->stats.time, time);
682#ifdef CONFIG_DEBUG_BLK_CGROUP 682#ifdef CONFIG_DEBUG_BLK_CGROUP
@@ -788,7 +788,7 @@ static inline void cfqg_put(struct cfq_group *cfqg) { }
788static inline void cfqg_stats_update_io_add(struct cfq_group *cfqg, 788static inline void cfqg_stats_update_io_add(struct cfq_group *cfqg,
789 struct cfq_group *curr_cfqg, int op, int op_flags) { } 789 struct cfq_group *curr_cfqg, int op, int op_flags) { }
790static inline void cfqg_stats_update_timeslice_used(struct cfq_group *cfqg, 790static inline void cfqg_stats_update_timeslice_used(struct cfq_group *cfqg,
791 unsigned long time, unsigned long unaccounted_time) { } 791 uint64_t time, unsigned long unaccounted_time) { }
792static inline void cfqg_stats_update_io_remove(struct cfq_group *cfqg, int op, 792static inline void cfqg_stats_update_io_remove(struct cfq_group *cfqg, int op,
793 int op_flags) { } 793 int op_flags) { }
794static inline void cfqg_stats_update_io_merged(struct cfq_group *cfqg, int op, 794static inline void cfqg_stats_update_io_merged(struct cfq_group *cfqg, int op,
@@ -815,7 +815,7 @@ static inline void cfqg_stats_update_completion(struct cfq_group *cfqg,
815static inline bool cfq_io_thinktime_big(struct cfq_data *cfqd, 815static inline bool cfq_io_thinktime_big(struct cfq_data *cfqd,
816 struct cfq_ttime *ttime, bool group_idle) 816 struct cfq_ttime *ttime, bool group_idle)
817{ 817{
818 unsigned long slice; 818 u64 slice;
819 if (!sample_valid(ttime->ttime_samples)) 819 if (!sample_valid(ttime->ttime_samples))
820 return false; 820 return false;
821 if (group_idle) 821 if (group_idle)
@@ -938,17 +938,18 @@ static inline void cfq_schedule_dispatch(struct cfq_data *cfqd)
938 * if a queue is marked sync and has sync io queued. A sync queue with async 938 * if a queue is marked sync and has sync io queued. A sync queue with async
939 * io only, should not get full sync slice length. 939 * io only, should not get full sync slice length.
940 */ 940 */
941static inline int cfq_prio_slice(struct cfq_data *cfqd, bool sync, 941static inline u64 cfq_prio_slice(struct cfq_data *cfqd, bool sync,
942 unsigned short prio) 942 unsigned short prio)
943{ 943{
944 const int base_slice = cfqd->cfq_slice[sync]; 944 u64 base_slice = cfqd->cfq_slice[sync];
945 u64 slice = div_u64(base_slice, CFQ_SLICE_SCALE);
945 946
946 WARN_ON(prio >= IOPRIO_BE_NR); 947 WARN_ON(prio >= IOPRIO_BE_NR);
947 948
948 return base_slice + (base_slice/CFQ_SLICE_SCALE * (4 - prio)); 949 return base_slice + (slice * (4 - prio));
949} 950}
950 951
951static inline int 952static inline u64
952cfq_prio_to_slice(struct cfq_data *cfqd, struct cfq_queue *cfqq) 953cfq_prio_to_slice(struct cfq_data *cfqd, struct cfq_queue *cfqq)
953{ 954{
954 return cfq_prio_slice(cfqd, cfq_cfqq_sync(cfqq), cfqq->ioprio); 955 return cfq_prio_slice(cfqd, cfq_cfqq_sync(cfqq), cfqq->ioprio);
@@ -966,15 +967,14 @@ cfq_prio_to_slice(struct cfq_data *cfqd, struct cfq_queue *cfqq)
966 * 967 *
967 * The result is also in fixed point w/ CFQ_SERVICE_SHIFT. 968 * The result is also in fixed point w/ CFQ_SERVICE_SHIFT.
968 */ 969 */
969static inline u64 cfqg_scale_charge(unsigned long charge, 970static inline u64 cfqg_scale_charge(u64 charge,
970 unsigned int vfraction) 971 unsigned int vfraction)
971{ 972{
972 u64 c = charge << CFQ_SERVICE_SHIFT; /* make it fixed point */ 973 u64 c = charge << CFQ_SERVICE_SHIFT; /* make it fixed point */
973 974
974 /* charge / vfraction */ 975 /* charge / vfraction */
975 c <<= CFQ_SERVICE_SHIFT; 976 c <<= CFQ_SERVICE_SHIFT;
976 do_div(c, vfraction); 977 return div_u64(c, vfraction);
977 return c;
978} 978}
979 979
980static inline u64 max_vdisktime(u64 min_vdisktime, u64 vdisktime) 980static inline u64 max_vdisktime(u64 min_vdisktime, u64 vdisktime)
@@ -1027,16 +1027,16 @@ static inline unsigned cfq_group_get_avg_queues(struct cfq_data *cfqd,
1027 return cfqg->busy_queues_avg[rt]; 1027 return cfqg->busy_queues_avg[rt];
1028} 1028}
1029 1029
1030static inline unsigned 1030static inline u64
1031cfq_group_slice(struct cfq_data *cfqd, struct cfq_group *cfqg) 1031cfq_group_slice(struct cfq_data *cfqd, struct cfq_group *cfqg)
1032{ 1032{
1033 return cfqd->cfq_target_latency * cfqg->vfraction >> CFQ_SERVICE_SHIFT; 1033 return cfqd->cfq_target_latency * cfqg->vfraction >> CFQ_SERVICE_SHIFT;
1034} 1034}
1035 1035
1036static inline unsigned 1036static inline u64
1037cfq_scaled_cfqq_slice(struct cfq_data *cfqd, struct cfq_queue *cfqq) 1037cfq_scaled_cfqq_slice(struct cfq_data *cfqd, struct cfq_queue *cfqq)
1038{ 1038{
1039 unsigned slice = cfq_prio_to_slice(cfqd, cfqq); 1039 u64 slice = cfq_prio_to_slice(cfqd, cfqq);
1040 if (cfqd->cfq_latency) { 1040 if (cfqd->cfq_latency) {
1041 /* 1041 /*
1042 * interested queues (we consider only the ones with the same 1042 * interested queues (we consider only the ones with the same
@@ -1044,20 +1044,22 @@ cfq_scaled_cfqq_slice(struct cfq_data *cfqd, struct cfq_queue *cfqq)
1044 */ 1044 */
1045 unsigned iq = cfq_group_get_avg_queues(cfqd, cfqq->cfqg, 1045 unsigned iq = cfq_group_get_avg_queues(cfqd, cfqq->cfqg,
1046 cfq_class_rt(cfqq)); 1046 cfq_class_rt(cfqq));
1047 unsigned sync_slice = cfqd->cfq_slice[1]; 1047 u64 sync_slice = cfqd->cfq_slice[1];
1048 unsigned expect_latency = sync_slice * iq; 1048 u64 expect_latency = sync_slice * iq;
1049 unsigned group_slice = cfq_group_slice(cfqd, cfqq->cfqg); 1049 u64 group_slice = cfq_group_slice(cfqd, cfqq->cfqg);
1050 1050
1051 if (expect_latency > group_slice) { 1051 if (expect_latency > group_slice) {
1052 unsigned base_low_slice = 2 * cfqd->cfq_slice_idle; 1052 u64 base_low_slice = 2 * cfqd->cfq_slice_idle;
1053 u64 low_slice;
1054
1053 /* scale low_slice according to IO priority 1055 /* scale low_slice according to IO priority
1054 * and sync vs async */ 1056 * and sync vs async */
1055 unsigned low_slice = 1057 low_slice = div64_u64(base_low_slice*slice, sync_slice);
1056 min(slice, base_low_slice * slice / sync_slice); 1058 low_slice = min(slice, low_slice);
1057 /* the adapted slice value is scaled to fit all iqs 1059 /* the adapted slice value is scaled to fit all iqs
1058 * into the target latency */ 1060 * into the target latency */
1059 slice = max(slice * group_slice / expect_latency, 1061 slice = div64_u64(slice*group_slice, expect_latency);
1060 low_slice); 1062 slice = max(slice, low_slice);
1061 } 1063 }
1062 } 1064 }
1063 return slice; 1065 return slice;
@@ -1066,12 +1068,13 @@ cfq_scaled_cfqq_slice(struct cfq_data *cfqd, struct cfq_queue *cfqq)
1066static inline void 1068static inline void
1067cfq_set_prio_slice(struct cfq_data *cfqd, struct cfq_queue *cfqq) 1069cfq_set_prio_slice(struct cfq_data *cfqd, struct cfq_queue *cfqq)
1068{ 1070{
1069 unsigned slice = cfq_scaled_cfqq_slice(cfqd, cfqq); 1071 u64 slice = cfq_scaled_cfqq_slice(cfqd, cfqq);
1072 u64 now = ktime_get_ns();
1070 1073
1071 cfqq->slice_start = jiffies; 1074 cfqq->slice_start = now;
1072 cfqq->slice_end = jiffies + slice; 1075 cfqq->slice_end = now + slice;
1073 cfqq->allocated_slice = slice; 1076 cfqq->allocated_slice = slice;
1074 cfq_log_cfqq(cfqd, cfqq, "set_slice=%lu", cfqq->slice_end - jiffies); 1077 cfq_log_cfqq(cfqd, cfqq, "set_slice=%llu", cfqq->slice_end - now);
1075} 1078}
1076 1079
1077/* 1080/*
@@ -1083,7 +1086,7 @@ static inline bool cfq_slice_used(struct cfq_queue *cfqq)
1083{ 1086{
1084 if (cfq_cfqq_slice_new(cfqq)) 1087 if (cfq_cfqq_slice_new(cfqq))
1085 return false; 1088 return false;
1086 if (time_before(jiffies, cfqq->slice_end)) 1089 if (ktime_get_ns() < cfqq->slice_end)
1087 return false; 1090 return false;
1088 1091
1089 return true; 1092 return true;
@@ -1249,8 +1252,8 @@ cfq_find_next_rq(struct cfq_data *cfqd, struct cfq_queue *cfqq,
1249 return cfq_choose_req(cfqd, next, prev, blk_rq_pos(last)); 1252 return cfq_choose_req(cfqd, next, prev, blk_rq_pos(last));
1250} 1253}
1251 1254
1252static unsigned long cfq_slice_offset(struct cfq_data *cfqd, 1255static u64 cfq_slice_offset(struct cfq_data *cfqd,
1253 struct cfq_queue *cfqq) 1256 struct cfq_queue *cfqq)
1254{ 1257{
1255 /* 1258 /*
1256 * just an approximation, should be ok. 1259 * just an approximation, should be ok.
@@ -1443,31 +1446,31 @@ cfq_group_notify_queue_del(struct cfq_data *cfqd, struct cfq_group *cfqg)
1443 cfqg_stats_update_dequeue(cfqg); 1446 cfqg_stats_update_dequeue(cfqg);
1444} 1447}
1445 1448
1446static inline unsigned int cfq_cfqq_slice_usage(struct cfq_queue *cfqq, 1449static inline u64 cfq_cfqq_slice_usage(struct cfq_queue *cfqq,
1447 unsigned int *unaccounted_time) 1450 u64 *unaccounted_time)
1448{ 1451{
1449 unsigned int slice_used; 1452 u64 slice_used;
1453 u64 now = ktime_get_ns();
1450 1454
1451 /* 1455 /*
1452 * Queue got expired before even a single request completed or 1456 * Queue got expired before even a single request completed or
1453 * got expired immediately after first request completion. 1457 * got expired immediately after first request completion.
1454 */ 1458 */
1455 if (!cfqq->slice_start || cfqq->slice_start == jiffies) { 1459 if (!cfqq->slice_start || cfqq->slice_start == now) {
1456 /* 1460 /*
1457 * Also charge the seek time incurred to the group, otherwise 1461 * Also charge the seek time incurred to the group, otherwise
1458 * if there are mutiple queues in the group, each can dispatch 1462 * if there are mutiple queues in the group, each can dispatch
1459 * a single request on seeky media and cause lots of seek time 1463 * a single request on seeky media and cause lots of seek time
1460 * and group will never know it. 1464 * and group will never know it.
1461 */ 1465 */
1462 slice_used = max_t(unsigned, (jiffies - cfqq->dispatch_start), 1466 slice_used = max_t(u64, (now - cfqq->dispatch_start), 1);
1463 1);
1464 } else { 1467 } else {
1465 slice_used = jiffies - cfqq->slice_start; 1468 slice_used = now - cfqq->slice_start;
1466 if (slice_used > cfqq->allocated_slice) { 1469 if (slice_used > cfqq->allocated_slice) {
1467 *unaccounted_time = slice_used - cfqq->allocated_slice; 1470 *unaccounted_time = slice_used - cfqq->allocated_slice;
1468 slice_used = cfqq->allocated_slice; 1471 slice_used = cfqq->allocated_slice;
1469 } 1472 }
1470 if (time_after(cfqq->slice_start, cfqq->dispatch_start)) 1473 if (cfqq->slice_start > cfqq->dispatch_start)
1471 *unaccounted_time += cfqq->slice_start - 1474 *unaccounted_time += cfqq->slice_start -
1472 cfqq->dispatch_start; 1475 cfqq->dispatch_start;
1473 } 1476 }
@@ -1479,10 +1482,11 @@ static void cfq_group_served(struct cfq_data *cfqd, struct cfq_group *cfqg,
1479 struct cfq_queue *cfqq) 1482 struct cfq_queue *cfqq)
1480{ 1483{
1481 struct cfq_rb_root *st = &cfqd->grp_service_tree; 1484 struct cfq_rb_root *st = &cfqd->grp_service_tree;
1482 unsigned int used_sl, charge, unaccounted_sl = 0; 1485 u64 used_sl, charge, unaccounted_sl = 0;
1483 int nr_sync = cfqg->nr_cfqq - cfqg_busy_async_queues(cfqd, cfqg) 1486 int nr_sync = cfqg->nr_cfqq - cfqg_busy_async_queues(cfqd, cfqg)
1484 - cfqg->service_tree_idle.count; 1487 - cfqg->service_tree_idle.count;
1485 unsigned int vfr; 1488 unsigned int vfr;
1489 u64 now = ktime_get_ns();
1486 1490
1487 BUG_ON(nr_sync < 0); 1491 BUG_ON(nr_sync < 0);
1488 used_sl = charge = cfq_cfqq_slice_usage(cfqq, &unaccounted_sl); 1492 used_sl = charge = cfq_cfqq_slice_usage(cfqq, &unaccounted_sl);
@@ -1504,9 +1508,8 @@ static void cfq_group_served(struct cfq_data *cfqd, struct cfq_group *cfqg,
1504 cfq_group_service_tree_add(st, cfqg); 1508 cfq_group_service_tree_add(st, cfqg);
1505 1509
1506 /* This group is being expired. Save the context */ 1510 /* This group is being expired. Save the context */
1507 if (time_after(cfqd->workload_expires, jiffies)) { 1511 if (cfqd->workload_expires > now) {
1508 cfqg->saved_wl_slice = cfqd->workload_expires 1512 cfqg->saved_wl_slice = cfqd->workload_expires - now;
1509 - jiffies;
1510 cfqg->saved_wl_type = cfqd->serving_wl_type; 1513 cfqg->saved_wl_type = cfqd->serving_wl_type;
1511 cfqg->saved_wl_class = cfqd->serving_wl_class; 1514 cfqg->saved_wl_class = cfqd->serving_wl_class;
1512 } else 1515 } else
@@ -1515,7 +1518,7 @@ static void cfq_group_served(struct cfq_data *cfqd, struct cfq_group *cfqg,
1515 cfq_log_cfqg(cfqd, cfqg, "served: vt=%llu min_vt=%llu", cfqg->vdisktime, 1518 cfq_log_cfqg(cfqd, cfqg, "served: vt=%llu min_vt=%llu", cfqg->vdisktime,
1516 st->min_vdisktime); 1519 st->min_vdisktime);
1517 cfq_log_cfqq(cfqq->cfqd, cfqq, 1520 cfq_log_cfqq(cfqq->cfqd, cfqq,
1518 "sl_used=%u disp=%u charge=%u iops=%u sect=%lu", 1521 "sl_used=%llu disp=%llu charge=%llu iops=%u sect=%lu",
1519 used_sl, cfqq->slice_dispatch, charge, 1522 used_sl, cfqq->slice_dispatch, charge,
1520 iops_mode(cfqd), cfqq->nr_sectors); 1523 iops_mode(cfqd), cfqq->nr_sectors);
1521 cfqg_stats_update_timeslice_used(cfqg, used_sl, unaccounted_sl); 1524 cfqg_stats_update_timeslice_used(cfqg, used_sl, unaccounted_sl);
@@ -1538,7 +1541,7 @@ static void cfq_init_cfqg_base(struct cfq_group *cfqg)
1538 *st = CFQ_RB_ROOT; 1541 *st = CFQ_RB_ROOT;
1539 RB_CLEAR_NODE(&cfqg->rb_node); 1542 RB_CLEAR_NODE(&cfqg->rb_node);
1540 1543
1541 cfqg->ttime.last_end_request = jiffies; 1544 cfqg->ttime.last_end_request = ktime_get_ns();
1542} 1545}
1543 1546
1544#ifdef CONFIG_CFQ_GROUP_IOSCHED 1547#ifdef CONFIG_CFQ_GROUP_IOSCHED
@@ -2221,10 +2224,11 @@ static void cfq_service_tree_add(struct cfq_data *cfqd, struct cfq_queue *cfqq,
2221{ 2224{
2222 struct rb_node **p, *parent; 2225 struct rb_node **p, *parent;
2223 struct cfq_queue *__cfqq; 2226 struct cfq_queue *__cfqq;
2224 unsigned long rb_key; 2227 u64 rb_key;
2225 struct cfq_rb_root *st; 2228 struct cfq_rb_root *st;
2226 int left; 2229 int left;
2227 int new_cfqq = 1; 2230 int new_cfqq = 1;
2231 u64 now = ktime_get_ns();
2228 2232
2229 st = st_for(cfqq->cfqg, cfqq_class(cfqq), cfqq_type(cfqq)); 2233 st = st_for(cfqq->cfqg, cfqq_class(cfqq), cfqq_type(cfqq));
2230 if (cfq_class_idle(cfqq)) { 2234 if (cfq_class_idle(cfqq)) {
@@ -2234,7 +2238,7 @@ static void cfq_service_tree_add(struct cfq_data *cfqd, struct cfq_queue *cfqq,
2234 __cfqq = rb_entry(parent, struct cfq_queue, rb_node); 2238 __cfqq = rb_entry(parent, struct cfq_queue, rb_node);
2235 rb_key += __cfqq->rb_key; 2239 rb_key += __cfqq->rb_key;
2236 } else 2240 } else
2237 rb_key += jiffies; 2241 rb_key += now;
2238 } else if (!add_front) { 2242 } else if (!add_front) {
2239 /* 2243 /*
2240 * Get our rb key offset. Subtract any residual slice 2244 * Get our rb key offset. Subtract any residual slice
@@ -2242,13 +2246,13 @@ static void cfq_service_tree_add(struct cfq_data *cfqd, struct cfq_queue *cfqq,
2242 * count indicates slice overrun, and this should position 2246 * count indicates slice overrun, and this should position
2243 * the next service time further away in the tree. 2247 * the next service time further away in the tree.
2244 */ 2248 */
2245 rb_key = cfq_slice_offset(cfqd, cfqq) + jiffies; 2249 rb_key = cfq_slice_offset(cfqd, cfqq) + now;
2246 rb_key -= cfqq->slice_resid; 2250 rb_key -= cfqq->slice_resid;
2247 cfqq->slice_resid = 0; 2251 cfqq->slice_resid = 0;
2248 } else { 2252 } else {
2249 rb_key = -HZ; 2253 rb_key = -NSEC_PER_SEC;
2250 __cfqq = cfq_rb_first(st); 2254 __cfqq = cfq_rb_first(st);
2251 rb_key += __cfqq ? __cfqq->rb_key : jiffies; 2255 rb_key += __cfqq ? __cfqq->rb_key : now;
2252 } 2256 }
2253 2257
2254 if (!RB_EMPTY_NODE(&cfqq->rb_node)) { 2258 if (!RB_EMPTY_NODE(&cfqq->rb_node)) {
@@ -2274,7 +2278,7 @@ static void cfq_service_tree_add(struct cfq_data *cfqd, struct cfq_queue *cfqq,
2274 /* 2278 /*
2275 * sort by key, that represents service time. 2279 * sort by key, that represents service time.
2276 */ 2280 */
2277 if (time_before(rb_key, __cfqq->rb_key)) 2281 if (rb_key < __cfqq->rb_key)
2278 p = &parent->rb_left; 2282 p = &parent->rb_left;
2279 else { 2283 else {
2280 p = &parent->rb_right; 2284 p = &parent->rb_right;
@@ -2574,7 +2578,7 @@ cfq_merged_requests(struct request_queue *q, struct request *rq,
2574 * reposition in fifo if next is older than rq 2578 * reposition in fifo if next is older than rq
2575 */ 2579 */
2576 if (!list_empty(&rq->queuelist) && !list_empty(&next->queuelist) && 2580 if (!list_empty(&rq->queuelist) && !list_empty(&next->queuelist) &&
2577 time_before(next->fifo_time, rq->fifo_time) && 2581 next->fifo_time < rq->fifo_time &&
2578 cfqq == RQ_CFQQ(next)) { 2582 cfqq == RQ_CFQQ(next)) {
2579 list_move(&rq->queuelist, &next->queuelist); 2583 list_move(&rq->queuelist, &next->queuelist);
2580 rq->fifo_time = next->fifo_time; 2584 rq->fifo_time = next->fifo_time;
@@ -2635,7 +2639,7 @@ static void __cfq_set_active_queue(struct cfq_data *cfqd,
2635 cfqd->serving_wl_class, cfqd->serving_wl_type); 2639 cfqd->serving_wl_class, cfqd->serving_wl_type);
2636 cfqg_stats_update_avg_queue_size(cfqq->cfqg); 2640 cfqg_stats_update_avg_queue_size(cfqq->cfqg);
2637 cfqq->slice_start = 0; 2641 cfqq->slice_start = 0;
2638 cfqq->dispatch_start = jiffies; 2642 cfqq->dispatch_start = ktime_get_ns();
2639 cfqq->allocated_slice = 0; 2643 cfqq->allocated_slice = 0;
2640 cfqq->slice_end = 0; 2644 cfqq->slice_end = 0;
2641 cfqq->slice_dispatch = 0; 2645 cfqq->slice_dispatch = 0;
@@ -2684,8 +2688,8 @@ __cfq_slice_expired(struct cfq_data *cfqd, struct cfq_queue *cfqq,
2684 if (cfq_cfqq_slice_new(cfqq)) 2688 if (cfq_cfqq_slice_new(cfqq))
2685 cfqq->slice_resid = cfq_scaled_cfqq_slice(cfqd, cfqq); 2689 cfqq->slice_resid = cfq_scaled_cfqq_slice(cfqd, cfqq);
2686 else 2690 else
2687 cfqq->slice_resid = cfqq->slice_end - jiffies; 2691 cfqq->slice_resid = cfqq->slice_end - ktime_get_ns();
2688 cfq_log_cfqq(cfqd, cfqq, "resid=%ld", cfqq->slice_resid); 2692 cfq_log_cfqq(cfqd, cfqq, "resid=%llu", cfqq->slice_resid);
2689 } 2693 }
2690 2694
2691 cfq_group_served(cfqd, cfqq->cfqg, cfqq); 2695 cfq_group_served(cfqd, cfqq->cfqg, cfqq);
@@ -2919,7 +2923,8 @@ static void cfq_arm_slice_timer(struct cfq_data *cfqd)
2919 struct cfq_queue *cfqq = cfqd->active_queue; 2923 struct cfq_queue *cfqq = cfqd->active_queue;
2920 struct cfq_rb_root *st = cfqq->service_tree; 2924 struct cfq_rb_root *st = cfqq->service_tree;
2921 struct cfq_io_cq *cic; 2925 struct cfq_io_cq *cic;
2922 unsigned long sl, group_idle = 0; 2926 u64 sl, group_idle = 0;
2927 u64 now = ktime_get_ns();
2923 2928
2924 /* 2929 /*
2925 * SSD device without seek penalty, disable idling. But only do so 2930 * SSD device without seek penalty, disable idling. But only do so
@@ -2962,8 +2967,8 @@ static void cfq_arm_slice_timer(struct cfq_data *cfqd)
2962 * time slice. 2967 * time slice.
2963 */ 2968 */
2964 if (sample_valid(cic->ttime.ttime_samples) && 2969 if (sample_valid(cic->ttime.ttime_samples) &&
2965 (cfqq->slice_end - jiffies < cic->ttime.ttime_mean)) { 2970 (cfqq->slice_end - now < cic->ttime.ttime_mean)) {
2966 cfq_log_cfqq(cfqd, cfqq, "Not idling. think_time:%lu", 2971 cfq_log_cfqq(cfqd, cfqq, "Not idling. think_time:%llu",
2967 cic->ttime.ttime_mean); 2972 cic->ttime.ttime_mean);
2968 return; 2973 return;
2969 } 2974 }
@@ -2984,9 +2989,9 @@ static void cfq_arm_slice_timer(struct cfq_data *cfqd)
2984 else 2989 else
2985 sl = cfqd->cfq_slice_idle; 2990 sl = cfqd->cfq_slice_idle;
2986 2991
2987 mod_timer(&cfqd->idle_slice_timer, jiffies + sl); 2992 mod_timer(&cfqd->idle_slice_timer, now + sl);
2988 cfqg_stats_set_start_idle_time(cfqq->cfqg); 2993 cfqg_stats_set_start_idle_time(cfqq->cfqg);
2989 cfq_log_cfqq(cfqd, cfqq, "arm_idle: %lu group_idle: %d", sl, 2994 cfq_log_cfqq(cfqd, cfqq, "arm_idle: %llu group_idle: %d", sl,
2990 group_idle ? 1 : 0); 2995 group_idle ? 1 : 0);
2991} 2996}
2992 2997
@@ -3026,7 +3031,7 @@ static struct request *cfq_check_fifo(struct cfq_queue *cfqq)
3026 return NULL; 3031 return NULL;
3027 3032
3028 rq = rq_entry_fifo(cfqq->fifo.next); 3033 rq = rq_entry_fifo(cfqq->fifo.next);
3029 if (time_before(jiffies, rq->fifo_time)) 3034 if (ktime_get_ns() < rq->fifo_time)
3030 rq = NULL; 3035 rq = NULL;
3031 3036
3032 cfq_log_cfqq(cfqq->cfqd, cfqq, "fifo=%p", rq); 3037 cfq_log_cfqq(cfqq->cfqd, cfqq, "fifo=%p", rq);
@@ -3104,14 +3109,14 @@ static enum wl_type_t cfq_choose_wl_type(struct cfq_data *cfqd,
3104 struct cfq_queue *queue; 3109 struct cfq_queue *queue;
3105 int i; 3110 int i;
3106 bool key_valid = false; 3111 bool key_valid = false;
3107 unsigned long lowest_key = 0; 3112 u64 lowest_key = 0;
3108 enum wl_type_t cur_best = SYNC_NOIDLE_WORKLOAD; 3113 enum wl_type_t cur_best = SYNC_NOIDLE_WORKLOAD;
3109 3114
3110 for (i = 0; i <= SYNC_WORKLOAD; ++i) { 3115 for (i = 0; i <= SYNC_WORKLOAD; ++i) {
3111 /* select the one with lowest rb_key */ 3116 /* select the one with lowest rb_key */
3112 queue = cfq_rb_first(st_for(cfqg, wl_class, i)); 3117 queue = cfq_rb_first(st_for(cfqg, wl_class, i));
3113 if (queue && 3118 if (queue &&
3114 (!key_valid || time_before(queue->rb_key, lowest_key))) { 3119 (!key_valid || queue->rb_key < lowest_key)) {
3115 lowest_key = queue->rb_key; 3120 lowest_key = queue->rb_key;
3116 cur_best = i; 3121 cur_best = i;
3117 key_valid = true; 3122 key_valid = true;
@@ -3124,11 +3129,12 @@ static enum wl_type_t cfq_choose_wl_type(struct cfq_data *cfqd,
3124static void 3129static void
3125choose_wl_class_and_type(struct cfq_data *cfqd, struct cfq_group *cfqg) 3130choose_wl_class_and_type(struct cfq_data *cfqd, struct cfq_group *cfqg)
3126{ 3131{
3127 unsigned slice; 3132 u64 slice;
3128 unsigned count; 3133 unsigned count;
3129 struct cfq_rb_root *st; 3134 struct cfq_rb_root *st;
3130 unsigned group_slice; 3135 u64 group_slice;
3131 enum wl_class_t original_class = cfqd->serving_wl_class; 3136 enum wl_class_t original_class = cfqd->serving_wl_class;
3137 u64 now = ktime_get_ns();
3132 3138
3133 /* Choose next priority. RT > BE > IDLE */ 3139 /* Choose next priority. RT > BE > IDLE */
3134 if (cfq_group_busy_queues_wl(RT_WORKLOAD, cfqd, cfqg)) 3140 if (cfq_group_busy_queues_wl(RT_WORKLOAD, cfqd, cfqg))
@@ -3137,7 +3143,7 @@ choose_wl_class_and_type(struct cfq_data *cfqd, struct cfq_group *cfqg)
3137 cfqd->serving_wl_class = BE_WORKLOAD; 3143 cfqd->serving_wl_class = BE_WORKLOAD;
3138 else { 3144 else {
3139 cfqd->serving_wl_class = IDLE_WORKLOAD; 3145 cfqd->serving_wl_class = IDLE_WORKLOAD;
3140 cfqd->workload_expires = jiffies + 1; 3146 cfqd->workload_expires = now + jiffies_to_nsecs(1);
3141 return; 3147 return;
3142 } 3148 }
3143 3149
@@ -3155,7 +3161,7 @@ choose_wl_class_and_type(struct cfq_data *cfqd, struct cfq_group *cfqg)
3155 /* 3161 /*
3156 * check workload expiration, and that we still have other queues ready 3162 * check workload expiration, and that we still have other queues ready
3157 */ 3163 */
3158 if (count && !time_after(jiffies, cfqd->workload_expires)) 3164 if (count && !(now > cfqd->workload_expires))
3159 return; 3165 return;
3160 3166
3161new_workload: 3167new_workload:
@@ -3172,13 +3178,13 @@ new_workload:
3172 */ 3178 */
3173 group_slice = cfq_group_slice(cfqd, cfqg); 3179 group_slice = cfq_group_slice(cfqd, cfqg);
3174 3180
3175 slice = group_slice * count / 3181 slice = div_u64(group_slice * count,
3176 max_t(unsigned, cfqg->busy_queues_avg[cfqd->serving_wl_class], 3182 max_t(unsigned, cfqg->busy_queues_avg[cfqd->serving_wl_class],
3177 cfq_group_busy_queues_wl(cfqd->serving_wl_class, cfqd, 3183 cfq_group_busy_queues_wl(cfqd->serving_wl_class, cfqd,
3178 cfqg)); 3184 cfqg)));
3179 3185
3180 if (cfqd->serving_wl_type == ASYNC_WORKLOAD) { 3186 if (cfqd->serving_wl_type == ASYNC_WORKLOAD) {
3181 unsigned int tmp; 3187 u64 tmp;
3182 3188
3183 /* 3189 /*
3184 * Async queues are currently system wide. Just taking 3190 * Async queues are currently system wide. Just taking
@@ -3189,19 +3195,19 @@ new_workload:
3189 */ 3195 */
3190 tmp = cfqd->cfq_target_latency * 3196 tmp = cfqd->cfq_target_latency *
3191 cfqg_busy_async_queues(cfqd, cfqg); 3197 cfqg_busy_async_queues(cfqd, cfqg);
3192 tmp = tmp/cfqd->busy_queues; 3198 tmp = div_u64(tmp, cfqd->busy_queues);
3193 slice = min_t(unsigned, slice, tmp); 3199 slice = min_t(u64, slice, tmp);
3194 3200
3195 /* async workload slice is scaled down according to 3201 /* async workload slice is scaled down according to
3196 * the sync/async slice ratio. */ 3202 * the sync/async slice ratio. */
3197 slice = slice * cfqd->cfq_slice[0] / cfqd->cfq_slice[1]; 3203 slice = div64_u64(slice*cfqd->cfq_slice[0], cfqd->cfq_slice[1]);
3198 } else 3204 } else
3199 /* sync workload slice is at least 2 * cfq_slice_idle */ 3205 /* sync workload slice is at least 2 * cfq_slice_idle */
3200 slice = max(slice, 2 * cfqd->cfq_slice_idle); 3206 slice = max(slice, 2 * cfqd->cfq_slice_idle);
3201 3207
3202 slice = max_t(unsigned, slice, CFQ_MIN_TT); 3208 slice = max_t(u64, slice, CFQ_MIN_TT);
3203 cfq_log(cfqd, "workload slice:%d", slice); 3209 cfq_log(cfqd, "workload slice:%llu", slice);
3204 cfqd->workload_expires = jiffies + slice; 3210 cfqd->workload_expires = now + slice;
3205} 3211}
3206 3212
3207static struct cfq_group *cfq_get_next_cfqg(struct cfq_data *cfqd) 3213static struct cfq_group *cfq_get_next_cfqg(struct cfq_data *cfqd)
@@ -3219,16 +3225,17 @@ static struct cfq_group *cfq_get_next_cfqg(struct cfq_data *cfqd)
3219static void cfq_choose_cfqg(struct cfq_data *cfqd) 3225static void cfq_choose_cfqg(struct cfq_data *cfqd)
3220{ 3226{
3221 struct cfq_group *cfqg = cfq_get_next_cfqg(cfqd); 3227 struct cfq_group *cfqg = cfq_get_next_cfqg(cfqd);
3228 u64 now = ktime_get_ns();
3222 3229
3223 cfqd->serving_group = cfqg; 3230 cfqd->serving_group = cfqg;
3224 3231
3225 /* Restore the workload type data */ 3232 /* Restore the workload type data */
3226 if (cfqg->saved_wl_slice) { 3233 if (cfqg->saved_wl_slice) {
3227 cfqd->workload_expires = jiffies + cfqg->saved_wl_slice; 3234 cfqd->workload_expires = now + cfqg->saved_wl_slice;
3228 cfqd->serving_wl_type = cfqg->saved_wl_type; 3235 cfqd->serving_wl_type = cfqg->saved_wl_type;
3229 cfqd->serving_wl_class = cfqg->saved_wl_class; 3236 cfqd->serving_wl_class = cfqg->saved_wl_class;
3230 } else 3237 } else
3231 cfqd->workload_expires = jiffies - 1; 3238 cfqd->workload_expires = now - 1;
3232 3239
3233 choose_wl_class_and_type(cfqd, cfqg); 3240 choose_wl_class_and_type(cfqd, cfqg);
3234} 3241}
@@ -3240,6 +3247,7 @@ static void cfq_choose_cfqg(struct cfq_data *cfqd)
3240static struct cfq_queue *cfq_select_queue(struct cfq_data *cfqd) 3247static struct cfq_queue *cfq_select_queue(struct cfq_data *cfqd)
3241{ 3248{
3242 struct cfq_queue *cfqq, *new_cfqq = NULL; 3249 struct cfq_queue *cfqq, *new_cfqq = NULL;
3250 u64 now = ktime_get_ns();
3243 3251
3244 cfqq = cfqd->active_queue; 3252 cfqq = cfqd->active_queue;
3245 if (!cfqq) 3253 if (!cfqq)
@@ -3311,7 +3319,7 @@ static struct cfq_queue *cfq_select_queue(struct cfq_data *cfqd)
3311 **/ 3319 **/
3312 if (CFQQ_SEEKY(cfqq) && cfq_cfqq_idle_window(cfqq) && 3320 if (CFQQ_SEEKY(cfqq) && cfq_cfqq_idle_window(cfqq) &&
3313 (cfq_cfqq_slice_new(cfqq) || 3321 (cfq_cfqq_slice_new(cfqq) ||
3314 (cfqq->slice_end - jiffies > jiffies - cfqq->slice_start))) { 3322 (cfqq->slice_end - now > now - cfqq->slice_start))) {
3315 cfq_clear_cfqq_deep(cfqq); 3323 cfq_clear_cfqq_deep(cfqq);
3316 cfq_clear_cfqq_idle_window(cfqq); 3324 cfq_clear_cfqq_idle_window(cfqq);
3317 } 3325 }
@@ -3389,11 +3397,12 @@ static int cfq_forced_dispatch(struct cfq_data *cfqd)
3389static inline bool cfq_slice_used_soon(struct cfq_data *cfqd, 3397static inline bool cfq_slice_used_soon(struct cfq_data *cfqd,
3390 struct cfq_queue *cfqq) 3398 struct cfq_queue *cfqq)
3391{ 3399{
3400 u64 now = ktime_get_ns();
3401
3392 /* the queue hasn't finished any request, can't estimate */ 3402 /* the queue hasn't finished any request, can't estimate */
3393 if (cfq_cfqq_slice_new(cfqq)) 3403 if (cfq_cfqq_slice_new(cfqq))
3394 return true; 3404 return true;
3395 if (time_after(jiffies + cfqd->cfq_slice_idle * cfqq->dispatched, 3405 if (now + cfqd->cfq_slice_idle * cfqq->dispatched > cfqq->slice_end)
3396 cfqq->slice_end))
3397 return true; 3406 return true;
3398 3407
3399 return false; 3408 return false;
@@ -3468,10 +3477,10 @@ static bool cfq_may_dispatch(struct cfq_data *cfqd, struct cfq_queue *cfqq)
3468 * based on the last sync IO we serviced 3477 * based on the last sync IO we serviced
3469 */ 3478 */
3470 if (!cfq_cfqq_sync(cfqq) && cfqd->cfq_latency) { 3479 if (!cfq_cfqq_sync(cfqq) && cfqd->cfq_latency) {
3471 unsigned long last_sync = jiffies - cfqd->last_delayed_sync; 3480 u64 last_sync = ktime_get_ns() - cfqd->last_delayed_sync;
3472 unsigned int depth; 3481 unsigned int depth;
3473 3482
3474 depth = last_sync / cfqd->cfq_slice[1]; 3483 depth = div64_u64(last_sync, cfqd->cfq_slice[1]);
3475 if (!depth && !cfqq->dispatched) 3484 if (!depth && !cfqq->dispatched)
3476 depth = 1; 3485 depth = 1;
3477 if (depth < max_dispatch) 3486 if (depth < max_dispatch)
@@ -3554,7 +3563,7 @@ static int cfq_dispatch_requests(struct request_queue *q, int force)
3554 if (cfqd->busy_queues > 1 && ((!cfq_cfqq_sync(cfqq) && 3563 if (cfqd->busy_queues > 1 && ((!cfq_cfqq_sync(cfqq) &&
3555 cfqq->slice_dispatch >= cfq_prio_to_maxrq(cfqd, cfqq)) || 3564 cfqq->slice_dispatch >= cfq_prio_to_maxrq(cfqd, cfqq)) ||
3556 cfq_class_idle(cfqq))) { 3565 cfq_class_idle(cfqq))) {
3557 cfqq->slice_end = jiffies + 1; 3566 cfqq->slice_end = ktime_get_ns() + 1;
3558 cfq_slice_expired(cfqd, 0); 3567 cfq_slice_expired(cfqd, 0);
3559 } 3568 }
3560 3569
@@ -3632,7 +3641,7 @@ static void cfq_init_icq(struct io_cq *icq)
3632{ 3641{
3633 struct cfq_io_cq *cic = icq_to_cic(icq); 3642 struct cfq_io_cq *cic = icq_to_cic(icq);
3634 3643
3635 cic->ttime.last_end_request = jiffies; 3644 cic->ttime.last_end_request = ktime_get_ns();
3636} 3645}
3637 3646
3638static void cfq_exit_icq(struct io_cq *icq) 3647static void cfq_exit_icq(struct io_cq *icq)
@@ -3853,14 +3862,15 @@ out:
3853} 3862}
3854 3863
3855static void 3864static void
3856__cfq_update_io_thinktime(struct cfq_ttime *ttime, unsigned long slice_idle) 3865__cfq_update_io_thinktime(struct cfq_ttime *ttime, u64 slice_idle)
3857{ 3866{
3858 unsigned long elapsed = jiffies - ttime->last_end_request; 3867 u64 elapsed = ktime_get_ns() - ttime->last_end_request;
3859 elapsed = min(elapsed, 2UL * slice_idle); 3868 elapsed = min(elapsed, 2UL * slice_idle);
3860 3869
3861 ttime->ttime_samples = (7*ttime->ttime_samples + 256) / 8; 3870 ttime->ttime_samples = (7*ttime->ttime_samples + 256) / 8;
3862 ttime->ttime_total = (7*ttime->ttime_total + 256*elapsed) / 8; 3871 ttime->ttime_total = div_u64(7*ttime->ttime_total + 256*elapsed, 8);
3863 ttime->ttime_mean = (ttime->ttime_total + 128) / ttime->ttime_samples; 3872 ttime->ttime_mean = div64_ul(ttime->ttime_total + 128,
3873 ttime->ttime_samples);
3864} 3874}
3865 3875
3866static void 3876static void
@@ -4113,7 +4123,7 @@ static void cfq_insert_request(struct request_queue *q, struct request *rq)
4113 cfq_log_cfqq(cfqd, cfqq, "insert_request"); 4123 cfq_log_cfqq(cfqd, cfqq, "insert_request");
4114 cfq_init_prio_data(cfqq, RQ_CIC(rq)); 4124 cfq_init_prio_data(cfqq, RQ_CIC(rq));
4115 4125
4116 rq->fifo_time = jiffies + cfqd->cfq_fifo_expire[rq_is_sync(rq)]; 4126 rq->fifo_time = ktime_get_ns() + cfqd->cfq_fifo_expire[rq_is_sync(rq)];
4117 list_add_tail(&rq->queuelist, &cfqq->fifo); 4127 list_add_tail(&rq->queuelist, &cfqq->fifo);
4118 cfq_add_rq_rb(rq); 4128 cfq_add_rq_rb(rq);
4119 cfqg_stats_update_io_add(RQ_CFQG(rq), cfqd->serving_group, req_op(rq), 4129 cfqg_stats_update_io_add(RQ_CFQG(rq), cfqd->serving_group, req_op(rq),
@@ -4161,6 +4171,7 @@ static void cfq_update_hw_tag(struct cfq_data *cfqd)
4161static bool cfq_should_wait_busy(struct cfq_data *cfqd, struct cfq_queue *cfqq) 4171static bool cfq_should_wait_busy(struct cfq_data *cfqd, struct cfq_queue *cfqq)
4162{ 4172{
4163 struct cfq_io_cq *cic = cfqd->active_cic; 4173 struct cfq_io_cq *cic = cfqd->active_cic;
4174 u64 now = ktime_get_ns();
4164 4175
4165 /* If the queue already has requests, don't wait */ 4176 /* If the queue already has requests, don't wait */
4166 if (!RB_EMPTY_ROOT(&cfqq->sort_list)) 4177 if (!RB_EMPTY_ROOT(&cfqq->sort_list))
@@ -4179,7 +4190,7 @@ static bool cfq_should_wait_busy(struct cfq_data *cfqd, struct cfq_queue *cfqq)
4179 4190
4180 /* if slice left is less than think time, wait busy */ 4191 /* if slice left is less than think time, wait busy */
4181 if (cic && sample_valid(cic->ttime.ttime_samples) 4192 if (cic && sample_valid(cic->ttime.ttime_samples)
4182 && (cfqq->slice_end - jiffies < cic->ttime.ttime_mean)) 4193 && (cfqq->slice_end - now < cic->ttime.ttime_mean))
4183 return true; 4194 return true;
4184 4195
4185 /* 4196 /*
@@ -4189,7 +4200,7 @@ static bool cfq_should_wait_busy(struct cfq_data *cfqd, struct cfq_queue *cfqq)
4189 * case where think time is less than a jiffy, mark the queue wait 4200 * case where think time is less than a jiffy, mark the queue wait
4190 * busy if only 1 jiffy is left in the slice. 4201 * busy if only 1 jiffy is left in the slice.
4191 */ 4202 */
4192 if (cfqq->slice_end - jiffies == 1) 4203 if (cfqq->slice_end - now <= jiffies_to_nsecs(1))
4193 return true; 4204 return true;
4194 4205
4195 return false; 4206 return false;
@@ -4200,9 +4211,8 @@ static void cfq_completed_request(struct request_queue *q, struct request *rq)
4200 struct cfq_queue *cfqq = RQ_CFQQ(rq); 4211 struct cfq_queue *cfqq = RQ_CFQQ(rq);
4201 struct cfq_data *cfqd = cfqq->cfqd; 4212 struct cfq_data *cfqd = cfqq->cfqd;
4202 const int sync = rq_is_sync(rq); 4213 const int sync = rq_is_sync(rq);
4203 unsigned long now; 4214 u64 now = ktime_get_ns();
4204 4215
4205 now = jiffies;
4206 cfq_log_cfqq(cfqd, cfqq, "complete rqnoidle %d", 4216 cfq_log_cfqq(cfqd, cfqq, "complete rqnoidle %d",
4207 !!(rq->cmd_flags & REQ_NOIDLE)); 4217 !!(rq->cmd_flags & REQ_NOIDLE));
4208 4218
@@ -4231,7 +4241,7 @@ static void cfq_completed_request(struct request_queue *q, struct request *rq)
4231 cfqq_type(cfqq)); 4241 cfqq_type(cfqq));
4232 4242
4233 st->ttime.last_end_request = now; 4243 st->ttime.last_end_request = now;
4234 if (!time_after(rq->start_time + cfqd->cfq_fifo_expire[1], now)) 4244 if (!(rq->start_time + cfqd->cfq_fifo_expire[1] > now))
4235 cfqd->last_delayed_sync = now; 4245 cfqd->last_delayed_sync = now;
4236 } 4246 }
4237 4247
@@ -4256,10 +4266,10 @@ static void cfq_completed_request(struct request_queue *q, struct request *rq)
4256 * the queue. 4266 * the queue.
4257 */ 4267 */
4258 if (cfq_should_wait_busy(cfqd, cfqq)) { 4268 if (cfq_should_wait_busy(cfqd, cfqq)) {
4259 unsigned long extend_sl = cfqd->cfq_slice_idle; 4269 u64 extend_sl = cfqd->cfq_slice_idle;
4260 if (!cfqd->cfq_slice_idle) 4270 if (!cfqd->cfq_slice_idle)
4261 extend_sl = cfqd->cfq_group_idle; 4271 extend_sl = cfqd->cfq_group_idle;
4262 cfqq->slice_end = jiffies + extend_sl; 4272 cfqq->slice_end = now + extend_sl;
4263 cfq_mark_cfqq_wait_busy(cfqq); 4273 cfq_mark_cfqq_wait_busy(cfqq);
4264 cfq_log_cfqq(cfqd, cfqq, "will busy wait"); 4274 cfq_log_cfqq(cfqd, cfqq, "will busy wait");
4265 } 4275 }
@@ -4618,7 +4628,7 @@ static int cfq_init_queue(struct request_queue *q, struct elevator_type *e)
4618 * we optimistically start assuming sync ops weren't delayed in last 4628 * we optimistically start assuming sync ops weren't delayed in last
4619 * second, in order to have larger depth for async operations. 4629 * second, in order to have larger depth for async operations.
4620 */ 4630 */
4621 cfqd->last_delayed_sync = jiffies - HZ; 4631 cfqd->last_delayed_sync = ktime_get_ns() - NSEC_PER_SEC;
4622 return 0; 4632 return 0;
4623 4633
4624out_free: 4634out_free:
@@ -4661,9 +4671,9 @@ cfq_var_store(unsigned int *var, const char *page, size_t count)
4661static ssize_t __FUNC(struct elevator_queue *e, char *page) \ 4671static ssize_t __FUNC(struct elevator_queue *e, char *page) \
4662{ \ 4672{ \
4663 struct cfq_data *cfqd = e->elevator_data; \ 4673 struct cfq_data *cfqd = e->elevator_data; \
4664 unsigned int __data = __VAR; \ 4674 u64 __data = __VAR; \
4665 if (__CONV) \ 4675 if (__CONV) \
4666 __data = jiffies_to_msecs(__data); \ 4676 __data = div_u64(__data, NSEC_PER_MSEC); \
4667 return cfq_var_show(__data, (page)); \ 4677 return cfq_var_show(__data, (page)); \
4668} 4678}
4669SHOW_FUNCTION(cfq_quantum_show, cfqd->cfq_quantum, 0); 4679SHOW_FUNCTION(cfq_quantum_show, cfqd->cfq_quantum, 0);
@@ -4691,7 +4701,7 @@ static ssize_t __FUNC(struct elevator_queue *e, const char *page, size_t count)
4691 else if (__data > (MAX)) \ 4701 else if (__data > (MAX)) \
4692 __data = (MAX); \ 4702 __data = (MAX); \
4693 if (__CONV) \ 4703 if (__CONV) \
4694 *(__PTR) = msecs_to_jiffies(__data); \ 4704 *(__PTR) = (u64)__data * NSEC_PER_MSEC; \
4695 else \ 4705 else \
4696 *(__PTR) = __data; \ 4706 *(__PTR) = __data; \
4697 return ret; \ 4707 return ret; \
@@ -4785,18 +4795,7 @@ static int __init cfq_init(void)
4785{ 4795{
4786 int ret; 4796 int ret;
4787 4797
4788 /*
4789 * could be 0 on HZ < 1000 setups
4790 */
4791 if (!cfq_slice_async)
4792 cfq_slice_async = 1;
4793 if (!cfq_slice_idle)
4794 cfq_slice_idle = 1;
4795
4796#ifdef CONFIG_CFQ_GROUP_IOSCHED 4798#ifdef CONFIG_CFQ_GROUP_IOSCHED
4797 if (!cfq_group_idle)
4798 cfq_group_idle = 1;
4799
4800 ret = blkcg_policy_register(&blkcg_policy_cfq); 4799 ret = blkcg_policy_register(&blkcg_policy_cfq);
4801 if (ret) 4800 if (ret)
4802 return ret; 4801 return ret;