aboutsummaryrefslogtreecommitdiffstats
path: root/block
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2011-07-25 13:33:36 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2011-07-25 13:33:36 -0400
commit096a705bbc080a4041636d07514560da8d78acbe (patch)
tree38c3c01225709ffa53419083ea6332f8a72610de /block
parentfea80311a939a746533a6d7e7c3183729d6a3faf (diff)
parent5757a6d76cdf6dda2a492c09b985c015e86779b1 (diff)
Merge branch 'for-3.1/core' of git://git.kernel.dk/linux-block
* 'for-3.1/core' of git://git.kernel.dk/linux-block: (24 commits) block: strict rq_affinity backing-dev: use synchronize_rcu_expedited instead of synchronize_rcu block: fix patch import error in max_discard_sectors check block: reorder request_queue to remove 64 bit alignment padding CFQ: add think time check for group CFQ: add think time check for service tree CFQ: move think time check variables to a separate struct fixlet: Remove fs_excl from struct task. cfq: Remove special treatment for metadata rqs. block: document blk_plug list access block: avoid building too big plug list compat_ioctl: fix make headers_check regression block: eliminate potential for infinite loop in blkdev_issue_discard compat_ioctl: fix warning caused by qemu block: flush MEDIA_CHANGE from drivers on close(2) blk-throttle: Make total_nr_queued unsigned block: Add __attribute__((format(printf...) and fix fallout fs/partitions/check.c: make local symbols static block:remove some spare spaces in genhd.c block:fix the comment error in blkdev.h ...
Diffstat (limited to 'block')
-rw-r--r--block/blk-core.c11
-rw-r--r--block/blk-ioc.c40
-rw-r--r--block/blk-lib.c5
-rw-r--r--block/blk-softirq.c11
-rw-r--r--block/blk-sysfs.c13
-rw-r--r--block/blk-throttle.c8
-rw-r--r--block/cfq-iosched.c152
-rw-r--r--block/compat_ioctl.c14
-rw-r--r--block/deadline-iosched.c4
-rw-r--r--block/elevator.c7
-rw-r--r--block/genhd.c28
11 files changed, 145 insertions, 148 deletions
diff --git a/block/blk-core.c b/block/blk-core.c
index 1d49e1c7c905..f8cb09951830 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -1282,10 +1282,8 @@ get_rq:
1282 init_request_from_bio(req, bio); 1282 init_request_from_bio(req, bio);
1283 1283
1284 if (test_bit(QUEUE_FLAG_SAME_COMP, &q->queue_flags) || 1284 if (test_bit(QUEUE_FLAG_SAME_COMP, &q->queue_flags) ||
1285 bio_flagged(bio, BIO_CPU_AFFINE)) { 1285 bio_flagged(bio, BIO_CPU_AFFINE))
1286 req->cpu = blk_cpu_to_group(get_cpu()); 1286 req->cpu = smp_processor_id();
1287 put_cpu();
1288 }
1289 1287
1290 plug = current->plug; 1288 plug = current->plug;
1291 if (plug) { 1289 if (plug) {
@@ -1305,7 +1303,10 @@ get_rq:
1305 plug->should_sort = 1; 1303 plug->should_sort = 1;
1306 } 1304 }
1307 list_add_tail(&req->queuelist, &plug->list); 1305 list_add_tail(&req->queuelist, &plug->list);
1306 plug->count++;
1308 drive_stat_acct(req, 1); 1307 drive_stat_acct(req, 1);
1308 if (plug->count >= BLK_MAX_REQUEST_COUNT)
1309 blk_flush_plug_list(plug, false);
1309 } else { 1310 } else {
1310 spin_lock_irq(q->queue_lock); 1311 spin_lock_irq(q->queue_lock);
1311 add_acct_request(q, req, where); 1312 add_acct_request(q, req, where);
@@ -2629,6 +2630,7 @@ void blk_start_plug(struct blk_plug *plug)
2629 INIT_LIST_HEAD(&plug->list); 2630 INIT_LIST_HEAD(&plug->list);
2630 INIT_LIST_HEAD(&plug->cb_list); 2631 INIT_LIST_HEAD(&plug->cb_list);
2631 plug->should_sort = 0; 2632 plug->should_sort = 0;
2633 plug->count = 0;
2632 2634
2633 /* 2635 /*
2634 * If this is a nested plug, don't actually assign it. It will be 2636 * If this is a nested plug, don't actually assign it. It will be
@@ -2712,6 +2714,7 @@ void blk_flush_plug_list(struct blk_plug *plug, bool from_schedule)
2712 return; 2714 return;
2713 2715
2714 list_splice_init(&plug->list, &list); 2716 list_splice_init(&plug->list, &list);
2717 plug->count = 0;
2715 2718
2716 if (plug->should_sort) { 2719 if (plug->should_sort) {
2717 list_sort(NULL, &list, plug_rq_cmp); 2720 list_sort(NULL, &list, plug_rq_cmp);
diff --git a/block/blk-ioc.c b/block/blk-ioc.c
index 342eae9b0d3c..6f9bbd978653 100644
--- a/block/blk-ioc.c
+++ b/block/blk-ioc.c
@@ -82,26 +82,26 @@ void exit_io_context(struct task_struct *task)
82 82
83struct io_context *alloc_io_context(gfp_t gfp_flags, int node) 83struct io_context *alloc_io_context(gfp_t gfp_flags, int node)
84{ 84{
85 struct io_context *ret; 85 struct io_context *ioc;
86 86
87 ret = kmem_cache_alloc_node(iocontext_cachep, gfp_flags, node); 87 ioc = kmem_cache_alloc_node(iocontext_cachep, gfp_flags, node);
88 if (ret) { 88 if (ioc) {
89 atomic_long_set(&ret->refcount, 1); 89 atomic_long_set(&ioc->refcount, 1);
90 atomic_set(&ret->nr_tasks, 1); 90 atomic_set(&ioc->nr_tasks, 1);
91 spin_lock_init(&ret->lock); 91 spin_lock_init(&ioc->lock);
92 ret->ioprio_changed = 0; 92 ioc->ioprio_changed = 0;
93 ret->ioprio = 0; 93 ioc->ioprio = 0;
94 ret->last_waited = 0; /* doesn't matter... */ 94 ioc->last_waited = 0; /* doesn't matter... */
95 ret->nr_batch_requests = 0; /* because this is 0 */ 95 ioc->nr_batch_requests = 0; /* because this is 0 */
96 INIT_RADIX_TREE(&ret->radix_root, GFP_ATOMIC | __GFP_HIGH); 96 INIT_RADIX_TREE(&ioc->radix_root, GFP_ATOMIC | __GFP_HIGH);
97 INIT_HLIST_HEAD(&ret->cic_list); 97 INIT_HLIST_HEAD(&ioc->cic_list);
98 ret->ioc_data = NULL; 98 ioc->ioc_data = NULL;
99#if defined(CONFIG_BLK_CGROUP) || defined(CONFIG_BLK_CGROUP_MODULE) 99#if defined(CONFIG_BLK_CGROUP) || defined(CONFIG_BLK_CGROUP_MODULE)
100 ret->cgroup_changed = 0; 100 ioc->cgroup_changed = 0;
101#endif 101#endif
102 } 102 }
103 103
104 return ret; 104 return ioc;
105} 105}
106 106
107/* 107/*
@@ -139,19 +139,19 @@ struct io_context *current_io_context(gfp_t gfp_flags, int node)
139 */ 139 */
140struct io_context *get_io_context(gfp_t gfp_flags, int node) 140struct io_context *get_io_context(gfp_t gfp_flags, int node)
141{ 141{
142 struct io_context *ret = NULL; 142 struct io_context *ioc = NULL;
143 143
144 /* 144 /*
145 * Check for unlikely race with exiting task. ioc ref count is 145 * Check for unlikely race with exiting task. ioc ref count is
146 * zero when ioc is being detached. 146 * zero when ioc is being detached.
147 */ 147 */
148 do { 148 do {
149 ret = current_io_context(gfp_flags, node); 149 ioc = current_io_context(gfp_flags, node);
150 if (unlikely(!ret)) 150 if (unlikely(!ioc))
151 break; 151 break;
152 } while (!atomic_long_inc_not_zero(&ret->refcount)); 152 } while (!atomic_long_inc_not_zero(&ioc->refcount));
153 153
154 return ret; 154 return ioc;
155} 155}
156EXPORT_SYMBOL(get_io_context); 156EXPORT_SYMBOL(get_io_context);
157 157
diff --git a/block/blk-lib.c b/block/blk-lib.c
index 78e627e2581d..2b461b496a78 100644
--- a/block/blk-lib.c
+++ b/block/blk-lib.c
@@ -59,7 +59,10 @@ int blkdev_issue_discard(struct block_device *bdev, sector_t sector,
59 * granularity 59 * granularity
60 */ 60 */
61 max_discard_sectors = min(q->limits.max_discard_sectors, UINT_MAX >> 9); 61 max_discard_sectors = min(q->limits.max_discard_sectors, UINT_MAX >> 9);
62 if (q->limits.discard_granularity) { 62 if (unlikely(!max_discard_sectors)) {
63 /* Avoid infinite loop below. Being cautious never hurts. */
64 return -EOPNOTSUPP;
65 } else if (q->limits.discard_granularity) {
63 unsigned int disc_sects = q->limits.discard_granularity >> 9; 66 unsigned int disc_sects = q->limits.discard_granularity >> 9;
64 67
65 max_discard_sectors &= ~(disc_sects - 1); 68 max_discard_sectors &= ~(disc_sects - 1);
diff --git a/block/blk-softirq.c b/block/blk-softirq.c
index ee9c21602228..475fab809a80 100644
--- a/block/blk-softirq.c
+++ b/block/blk-softirq.c
@@ -103,22 +103,25 @@ static struct notifier_block __cpuinitdata blk_cpu_notifier = {
103 103
104void __blk_complete_request(struct request *req) 104void __blk_complete_request(struct request *req)
105{ 105{
106 int ccpu, cpu, group_cpu = NR_CPUS;
106 struct request_queue *q = req->q; 107 struct request_queue *q = req->q;
107 unsigned long flags; 108 unsigned long flags;
108 int ccpu, cpu, group_cpu;
109 109
110 BUG_ON(!q->softirq_done_fn); 110 BUG_ON(!q->softirq_done_fn);
111 111
112 local_irq_save(flags); 112 local_irq_save(flags);
113 cpu = smp_processor_id(); 113 cpu = smp_processor_id();
114 group_cpu = blk_cpu_to_group(cpu);
115 114
116 /* 115 /*
117 * Select completion CPU 116 * Select completion CPU
118 */ 117 */
119 if (test_bit(QUEUE_FLAG_SAME_COMP, &q->queue_flags) && req->cpu != -1) 118 if (test_bit(QUEUE_FLAG_SAME_COMP, &q->queue_flags) && req->cpu != -1) {
120 ccpu = req->cpu; 119 ccpu = req->cpu;
121 else 120 if (!test_bit(QUEUE_FLAG_SAME_FORCE, &q->queue_flags)) {
121 ccpu = blk_cpu_to_group(ccpu);
122 group_cpu = blk_cpu_to_group(cpu);
123 }
124 } else
122 ccpu = cpu; 125 ccpu = cpu;
123 126
124 if (ccpu == cpu || ccpu == group_cpu) { 127 if (ccpu == cpu || ccpu == group_cpu) {
diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c
index d935bd859c87..0ee17b5e7fb6 100644
--- a/block/blk-sysfs.c
+++ b/block/blk-sysfs.c
@@ -244,8 +244,9 @@ static ssize_t queue_nomerges_store(struct request_queue *q, const char *page,
244static ssize_t queue_rq_affinity_show(struct request_queue *q, char *page) 244static ssize_t queue_rq_affinity_show(struct request_queue *q, char *page)
245{ 245{
246 bool set = test_bit(QUEUE_FLAG_SAME_COMP, &q->queue_flags); 246 bool set = test_bit(QUEUE_FLAG_SAME_COMP, &q->queue_flags);
247 bool force = test_bit(QUEUE_FLAG_SAME_FORCE, &q->queue_flags);
247 248
248 return queue_var_show(set, page); 249 return queue_var_show(set << force, page);
249} 250}
250 251
251static ssize_t 252static ssize_t
@@ -257,10 +258,14 @@ queue_rq_affinity_store(struct request_queue *q, const char *page, size_t count)
257 258
258 ret = queue_var_store(&val, page, count); 259 ret = queue_var_store(&val, page, count);
259 spin_lock_irq(q->queue_lock); 260 spin_lock_irq(q->queue_lock);
260 if (val) 261 if (val) {
261 queue_flag_set(QUEUE_FLAG_SAME_COMP, q); 262 queue_flag_set(QUEUE_FLAG_SAME_COMP, q);
262 else 263 if (val == 2)
263 queue_flag_clear(QUEUE_FLAG_SAME_COMP, q); 264 queue_flag_set(QUEUE_FLAG_SAME_FORCE, q);
265 } else {
266 queue_flag_clear(QUEUE_FLAG_SAME_COMP, q);
267 queue_flag_clear(QUEUE_FLAG_SAME_FORCE, q);
268 }
264 spin_unlock_irq(q->queue_lock); 269 spin_unlock_irq(q->queue_lock);
265#endif 270#endif
266 return ret; 271 return ret;
diff --git a/block/blk-throttle.c b/block/blk-throttle.c
index 3689f833afdc..f6a794120505 100644
--- a/block/blk-throttle.c
+++ b/block/blk-throttle.c
@@ -142,9 +142,9 @@ static inline struct throtl_grp *tg_of_blkg(struct blkio_group *blkg)
142 return NULL; 142 return NULL;
143} 143}
144 144
145static inline int total_nr_queued(struct throtl_data *td) 145static inline unsigned int total_nr_queued(struct throtl_data *td)
146{ 146{
147 return (td->nr_queued[0] + td->nr_queued[1]); 147 return td->nr_queued[0] + td->nr_queued[1];
148} 148}
149 149
150static inline struct throtl_grp *throtl_ref_get_tg(struct throtl_grp *tg) 150static inline struct throtl_grp *throtl_ref_get_tg(struct throtl_grp *tg)
@@ -927,7 +927,7 @@ static int throtl_dispatch(struct request_queue *q)
927 927
928 bio_list_init(&bio_list_on_stack); 928 bio_list_init(&bio_list_on_stack);
929 929
930 throtl_log(td, "dispatch nr_queued=%d read=%u write=%u", 930 throtl_log(td, "dispatch nr_queued=%u read=%u write=%u",
931 total_nr_queued(td), td->nr_queued[READ], 931 total_nr_queued(td), td->nr_queued[READ],
932 td->nr_queued[WRITE]); 932 td->nr_queued[WRITE]);
933 933
@@ -970,7 +970,7 @@ throtl_schedule_delayed_work(struct throtl_data *td, unsigned long delay)
970 struct delayed_work *dwork = &td->throtl_work; 970 struct delayed_work *dwork = &td->throtl_work;
971 971
972 /* schedule work if limits changed even if no bio is queued */ 972 /* schedule work if limits changed even if no bio is queued */
973 if (total_nr_queued(td) > 0 || td->limits_changed) { 973 if (total_nr_queued(td) || td->limits_changed) {
974 /* 974 /*
975 * We might have a work scheduled to be executed in future. 975 * We might have a work scheduled to be executed in future.
976 * Cancel that and schedule a new one. 976 * Cancel that and schedule a new one.
diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c
index ae21919f15e1..1f96ad6254f1 100644
--- a/block/cfq-iosched.c
+++ b/block/cfq-iosched.c
@@ -87,9 +87,10 @@ struct cfq_rb_root {
87 unsigned count; 87 unsigned count;
88 unsigned total_weight; 88 unsigned total_weight;
89 u64 min_vdisktime; 89 u64 min_vdisktime;
90 struct cfq_ttime ttime;
90}; 91};
91#define CFQ_RB_ROOT (struct cfq_rb_root) { .rb = RB_ROOT, .left = NULL, \ 92#define CFQ_RB_ROOT (struct cfq_rb_root) { .rb = RB_ROOT, \
92 .count = 0, .min_vdisktime = 0, } 93 .ttime = {.last_end_request = jiffies,},}
93 94
94/* 95/*
95 * Per process-grouping structure 96 * Per process-grouping structure
@@ -129,14 +130,12 @@ struct cfq_queue {
129 unsigned long slice_end; 130 unsigned long slice_end;
130 long slice_resid; 131 long slice_resid;
131 132
132 /* pending metadata requests */
133 int meta_pending;
134 /* number of requests that are on the dispatch list or inside driver */ 133 /* number of requests that are on the dispatch list or inside driver */
135 int dispatched; 134 int dispatched;
136 135
137 /* io prio of this group */ 136 /* io prio of this group */
138 unsigned short ioprio, org_ioprio; 137 unsigned short ioprio, org_ioprio;
139 unsigned short ioprio_class, org_ioprio_class; 138 unsigned short ioprio_class;
140 139
141 pid_t pid; 140 pid_t pid;
142 141
@@ -212,6 +211,7 @@ struct cfq_group {
212#endif 211#endif
213 /* number of requests that are on the dispatch list or inside driver */ 212 /* number of requests that are on the dispatch list or inside driver */
214 int dispatched; 213 int dispatched;
214 struct cfq_ttime ttime;
215}; 215};
216 216
217/* 217/*
@@ -393,6 +393,18 @@ CFQ_CFQQ_FNS(wait_busy);
393 j++, st = i < IDLE_WORKLOAD ? \ 393 j++, st = i < IDLE_WORKLOAD ? \
394 &cfqg->service_trees[i][j]: NULL) \ 394 &cfqg->service_trees[i][j]: NULL) \
395 395
396static inline bool cfq_io_thinktime_big(struct cfq_data *cfqd,
397 struct cfq_ttime *ttime, bool group_idle)
398{
399 unsigned long slice;
400 if (!sample_valid(ttime->ttime_samples))
401 return false;
402 if (group_idle)
403 slice = cfqd->cfq_group_idle;
404 else
405 slice = cfqd->cfq_slice_idle;
406 return ttime->ttime_mean > slice;
407}
396 408
397static inline bool iops_mode(struct cfq_data *cfqd) 409static inline bool iops_mode(struct cfq_data *cfqd)
398{ 410{
@@ -670,9 +682,6 @@ cfq_choose_req(struct cfq_data *cfqd, struct request *rq1, struct request *rq2,
670 if (rq_is_sync(rq1) != rq_is_sync(rq2)) 682 if (rq_is_sync(rq1) != rq_is_sync(rq2))
671 return rq_is_sync(rq1) ? rq1 : rq2; 683 return rq_is_sync(rq1) ? rq1 : rq2;
672 684
673 if ((rq1->cmd_flags ^ rq2->cmd_flags) & REQ_META)
674 return rq1->cmd_flags & REQ_META ? rq1 : rq2;
675
676 s1 = blk_rq_pos(rq1); 685 s1 = blk_rq_pos(rq1);
677 s2 = blk_rq_pos(rq2); 686 s2 = blk_rq_pos(rq2);
678 687
@@ -1005,8 +1014,8 @@ static inline struct cfq_group *cfqg_of_blkg(struct blkio_group *blkg)
1005 return NULL; 1014 return NULL;
1006} 1015}
1007 1016
1008void cfq_update_blkio_group_weight(void *key, struct blkio_group *blkg, 1017static void cfq_update_blkio_group_weight(void *key, struct blkio_group *blkg,
1009 unsigned int weight) 1018 unsigned int weight)
1010{ 1019{
1011 struct cfq_group *cfqg = cfqg_of_blkg(blkg); 1020 struct cfq_group *cfqg = cfqg_of_blkg(blkg);
1012 cfqg->new_weight = weight; 1021 cfqg->new_weight = weight;
@@ -1059,6 +1068,8 @@ static struct cfq_group * cfq_alloc_cfqg(struct cfq_data *cfqd)
1059 *st = CFQ_RB_ROOT; 1068 *st = CFQ_RB_ROOT;
1060 RB_CLEAR_NODE(&cfqg->rb_node); 1069 RB_CLEAR_NODE(&cfqg->rb_node);
1061 1070
1071 cfqg->ttime.last_end_request = jiffies;
1072
1062 /* 1073 /*
1063 * Take the initial reference that will be released on destroy 1074 * Take the initial reference that will be released on destroy
1064 * This can be thought of a joint reference by cgroup and 1075 * This can be thought of a joint reference by cgroup and
@@ -1235,7 +1246,7 @@ static void cfq_release_cfq_groups(struct cfq_data *cfqd)
1235 * it should not be NULL as even if elevator was exiting, cgroup deltion 1246 * it should not be NULL as even if elevator was exiting, cgroup deltion
1236 * path got to it first. 1247 * path got to it first.
1237 */ 1248 */
1238void cfq_unlink_blkio_group(void *key, struct blkio_group *blkg) 1249static void cfq_unlink_blkio_group(void *key, struct blkio_group *blkg)
1239{ 1250{
1240 unsigned long flags; 1251 unsigned long flags;
1241 struct cfq_data *cfqd = key; 1252 struct cfq_data *cfqd = key;
@@ -1502,16 +1513,11 @@ static void cfq_add_rq_rb(struct request *rq)
1502{ 1513{
1503 struct cfq_queue *cfqq = RQ_CFQQ(rq); 1514 struct cfq_queue *cfqq = RQ_CFQQ(rq);
1504 struct cfq_data *cfqd = cfqq->cfqd; 1515 struct cfq_data *cfqd = cfqq->cfqd;
1505 struct request *__alias, *prev; 1516 struct request *prev;
1506 1517
1507 cfqq->queued[rq_is_sync(rq)]++; 1518 cfqq->queued[rq_is_sync(rq)]++;
1508 1519
1509 /* 1520 elv_rb_add(&cfqq->sort_list, rq);
1510 * looks a little odd, but the first insert might return an alias.
1511 * if that happens, put the alias on the dispatch list
1512 */
1513 while ((__alias = elv_rb_add(&cfqq->sort_list, rq)) != NULL)
1514 cfq_dispatch_insert(cfqd->queue, __alias);
1515 1521
1516 if (!cfq_cfqq_on_rr(cfqq)) 1522 if (!cfq_cfqq_on_rr(cfqq))
1517 cfq_add_cfqq_rr(cfqd, cfqq); 1523 cfq_add_cfqq_rr(cfqd, cfqq);
@@ -1598,10 +1604,6 @@ static void cfq_remove_request(struct request *rq)
1598 cfqq->cfqd->rq_queued--; 1604 cfqq->cfqd->rq_queued--;
1599 cfq_blkiocg_update_io_remove_stats(&(RQ_CFQG(rq))->blkg, 1605 cfq_blkiocg_update_io_remove_stats(&(RQ_CFQG(rq))->blkg,
1600 rq_data_dir(rq), rq_is_sync(rq)); 1606 rq_data_dir(rq), rq_is_sync(rq));
1601 if (rq->cmd_flags & REQ_META) {
1602 WARN_ON(!cfqq->meta_pending);
1603 cfqq->meta_pending--;
1604 }
1605} 1607}
1606 1608
1607static int cfq_merge(struct request_queue *q, struct request **req, 1609static int cfq_merge(struct request_queue *q, struct request **req,
@@ -1969,7 +1971,8 @@ static bool cfq_should_idle(struct cfq_data *cfqd, struct cfq_queue *cfqq)
1969 * Otherwise, we do only if they are the last ones 1971 * Otherwise, we do only if they are the last ones
1970 * in their service tree. 1972 * in their service tree.
1971 */ 1973 */
1972 if (service_tree->count == 1 && cfq_cfqq_sync(cfqq)) 1974 if (service_tree->count == 1 && cfq_cfqq_sync(cfqq) &&
1975 !cfq_io_thinktime_big(cfqd, &service_tree->ttime, false))
1973 return true; 1976 return true;
1974 cfq_log_cfqq(cfqd, cfqq, "Not idling. st->count:%d", 1977 cfq_log_cfqq(cfqd, cfqq, "Not idling. st->count:%d",
1975 service_tree->count); 1978 service_tree->count);
@@ -2022,10 +2025,10 @@ static void cfq_arm_slice_timer(struct cfq_data *cfqd)
2022 * slice, then don't idle. This avoids overrunning the allotted 2025 * slice, then don't idle. This avoids overrunning the allotted
2023 * time slice. 2026 * time slice.
2024 */ 2027 */
2025 if (sample_valid(cic->ttime_samples) && 2028 if (sample_valid(cic->ttime.ttime_samples) &&
2026 (cfqq->slice_end - jiffies < cic->ttime_mean)) { 2029 (cfqq->slice_end - jiffies < cic->ttime.ttime_mean)) {
2027 cfq_log_cfqq(cfqd, cfqq, "Not idling. think_time:%lu", 2030 cfq_log_cfqq(cfqd, cfqq, "Not idling. think_time:%lu",
2028 cic->ttime_mean); 2031 cic->ttime.ttime_mean);
2029 return; 2032 return;
2030 } 2033 }
2031 2034
@@ -2381,8 +2384,9 @@ static struct cfq_queue *cfq_select_queue(struct cfq_data *cfqd)
2381 * this group, wait for requests to complete. 2384 * this group, wait for requests to complete.
2382 */ 2385 */
2383check_group_idle: 2386check_group_idle:
2384 if (cfqd->cfq_group_idle && cfqq->cfqg->nr_cfqq == 1 2387 if (cfqd->cfq_group_idle && cfqq->cfqg->nr_cfqq == 1 &&
2385 && cfqq->cfqg->dispatched) { 2388 cfqq->cfqg->dispatched &&
2389 !cfq_io_thinktime_big(cfqd, &cfqq->cfqg->ttime, true)) {
2386 cfqq = NULL; 2390 cfqq = NULL;
2387 goto keep_queue; 2391 goto keep_queue;
2388 } 2392 }
@@ -2833,7 +2837,7 @@ cfq_alloc_io_context(struct cfq_data *cfqd, gfp_t gfp_mask)
2833 cic = kmem_cache_alloc_node(cfq_ioc_pool, gfp_mask | __GFP_ZERO, 2837 cic = kmem_cache_alloc_node(cfq_ioc_pool, gfp_mask | __GFP_ZERO,
2834 cfqd->queue->node); 2838 cfqd->queue->node);
2835 if (cic) { 2839 if (cic) {
2836 cic->last_end_request = jiffies; 2840 cic->ttime.last_end_request = jiffies;
2837 INIT_LIST_HEAD(&cic->queue_list); 2841 INIT_LIST_HEAD(&cic->queue_list);
2838 INIT_HLIST_NODE(&cic->cic_list); 2842 INIT_HLIST_NODE(&cic->cic_list);
2839 cic->dtor = cfq_free_io_context; 2843 cic->dtor = cfq_free_io_context;
@@ -2883,7 +2887,6 @@ static void cfq_init_prio_data(struct cfq_queue *cfqq, struct io_context *ioc)
2883 * elevate the priority of this queue 2887 * elevate the priority of this queue
2884 */ 2888 */
2885 cfqq->org_ioprio = cfqq->ioprio; 2889 cfqq->org_ioprio = cfqq->ioprio;
2886 cfqq->org_ioprio_class = cfqq->ioprio_class;
2887 cfq_clear_cfqq_prio_changed(cfqq); 2890 cfq_clear_cfqq_prio_changed(cfqq);
2888} 2891}
2889 2892
@@ -3221,14 +3224,28 @@ err:
3221} 3224}
3222 3225
3223static void 3226static void
3224cfq_update_io_thinktime(struct cfq_data *cfqd, struct cfq_io_context *cic) 3227__cfq_update_io_thinktime(struct cfq_ttime *ttime, unsigned long slice_idle)
3225{ 3228{
3226 unsigned long elapsed = jiffies - cic->last_end_request; 3229 unsigned long elapsed = jiffies - ttime->last_end_request;
3227 unsigned long ttime = min(elapsed, 2UL * cfqd->cfq_slice_idle); 3230 elapsed = min(elapsed, 2UL * slice_idle);
3228 3231
3229 cic->ttime_samples = (7*cic->ttime_samples + 256) / 8; 3232 ttime->ttime_samples = (7*ttime->ttime_samples + 256) / 8;
3230 cic->ttime_total = (7*cic->ttime_total + 256*ttime) / 8; 3233 ttime->ttime_total = (7*ttime->ttime_total + 256*elapsed) / 8;
3231 cic->ttime_mean = (cic->ttime_total + 128) / cic->ttime_samples; 3234 ttime->ttime_mean = (ttime->ttime_total + 128) / ttime->ttime_samples;
3235}
3236
3237static void
3238cfq_update_io_thinktime(struct cfq_data *cfqd, struct cfq_queue *cfqq,
3239 struct cfq_io_context *cic)
3240{
3241 if (cfq_cfqq_sync(cfqq)) {
3242 __cfq_update_io_thinktime(&cic->ttime, cfqd->cfq_slice_idle);
3243 __cfq_update_io_thinktime(&cfqq->service_tree->ttime,
3244 cfqd->cfq_slice_idle);
3245 }
3246#ifdef CONFIG_CFQ_GROUP_IOSCHED
3247 __cfq_update_io_thinktime(&cfqq->cfqg->ttime, cfqd->cfq_group_idle);
3248#endif
3232} 3249}
3233 3250
3234static void 3251static void
@@ -3277,8 +3294,8 @@ cfq_update_idle_window(struct cfq_data *cfqd, struct cfq_queue *cfqq,
3277 else if (!atomic_read(&cic->ioc->nr_tasks) || !cfqd->cfq_slice_idle || 3294 else if (!atomic_read(&cic->ioc->nr_tasks) || !cfqd->cfq_slice_idle ||
3278 (!cfq_cfqq_deep(cfqq) && CFQQ_SEEKY(cfqq))) 3295 (!cfq_cfqq_deep(cfqq) && CFQQ_SEEKY(cfqq)))
3279 enable_idle = 0; 3296 enable_idle = 0;
3280 else if (sample_valid(cic->ttime_samples)) { 3297 else if (sample_valid(cic->ttime.ttime_samples)) {
3281 if (cic->ttime_mean > cfqd->cfq_slice_idle) 3298 if (cic->ttime.ttime_mean > cfqd->cfq_slice_idle)
3282 enable_idle = 0; 3299 enable_idle = 0;
3283 else 3300 else
3284 enable_idle = 1; 3301 enable_idle = 1;
@@ -3340,13 +3357,6 @@ cfq_should_preempt(struct cfq_data *cfqd, struct cfq_queue *new_cfqq,
3340 return true; 3357 return true;
3341 3358
3342 /* 3359 /*
3343 * So both queues are sync. Let the new request get disk time if
3344 * it's a metadata request and the current queue is doing regular IO.
3345 */
3346 if ((rq->cmd_flags & REQ_META) && !cfqq->meta_pending)
3347 return true;
3348
3349 /*
3350 * Allow an RT request to pre-empt an ongoing non-RT cfqq timeslice. 3360 * Allow an RT request to pre-empt an ongoing non-RT cfqq timeslice.
3351 */ 3361 */
3352 if (cfq_class_rt(new_cfqq) && !cfq_class_rt(cfqq)) 3362 if (cfq_class_rt(new_cfqq) && !cfq_class_rt(cfqq))
@@ -3410,10 +3420,8 @@ cfq_rq_enqueued(struct cfq_data *cfqd, struct cfq_queue *cfqq,
3410 struct cfq_io_context *cic = RQ_CIC(rq); 3420 struct cfq_io_context *cic = RQ_CIC(rq);
3411 3421
3412 cfqd->rq_queued++; 3422 cfqd->rq_queued++;
3413 if (rq->cmd_flags & REQ_META)
3414 cfqq->meta_pending++;
3415 3423
3416 cfq_update_io_thinktime(cfqd, cic); 3424 cfq_update_io_thinktime(cfqd, cfqq, cic);
3417 cfq_update_io_seektime(cfqd, cfqq, rq); 3425 cfq_update_io_seektime(cfqd, cfqq, rq);
3418 cfq_update_idle_window(cfqd, cfqq, cic); 3426 cfq_update_idle_window(cfqd, cfqq, cic);
3419 3427
@@ -3520,12 +3528,16 @@ static bool cfq_should_wait_busy(struct cfq_data *cfqd, struct cfq_queue *cfqq)
3520 if (cfqq->cfqg->nr_cfqq > 1) 3528 if (cfqq->cfqg->nr_cfqq > 1)
3521 return false; 3529 return false;
3522 3530
3531 /* the only queue in the group, but think time is big */
3532 if (cfq_io_thinktime_big(cfqd, &cfqq->cfqg->ttime, true))
3533 return false;
3534
3523 if (cfq_slice_used(cfqq)) 3535 if (cfq_slice_used(cfqq))
3524 return true; 3536 return true;
3525 3537
3526 /* if slice left is less than think time, wait busy */ 3538 /* if slice left is less than think time, wait busy */
3527 if (cic && sample_valid(cic->ttime_samples) 3539 if (cic && sample_valid(cic->ttime.ttime_samples)
3528 && (cfqq->slice_end - jiffies < cic->ttime_mean)) 3540 && (cfqq->slice_end - jiffies < cic->ttime.ttime_mean))
3529 return true; 3541 return true;
3530 3542
3531 /* 3543 /*
@@ -3566,11 +3578,24 @@ static void cfq_completed_request(struct request_queue *q, struct request *rq)
3566 cfqd->rq_in_flight[cfq_cfqq_sync(cfqq)]--; 3578 cfqd->rq_in_flight[cfq_cfqq_sync(cfqq)]--;
3567 3579
3568 if (sync) { 3580 if (sync) {
3569 RQ_CIC(rq)->last_end_request = now; 3581 struct cfq_rb_root *service_tree;
3582
3583 RQ_CIC(rq)->ttime.last_end_request = now;
3584
3585 if (cfq_cfqq_on_rr(cfqq))
3586 service_tree = cfqq->service_tree;
3587 else
3588 service_tree = service_tree_for(cfqq->cfqg,
3589 cfqq_prio(cfqq), cfqq_type(cfqq));
3590 service_tree->ttime.last_end_request = now;
3570 if (!time_after(rq->start_time + cfqd->cfq_fifo_expire[1], now)) 3591 if (!time_after(rq->start_time + cfqd->cfq_fifo_expire[1], now))
3571 cfqd->last_delayed_sync = now; 3592 cfqd->last_delayed_sync = now;
3572 } 3593 }
3573 3594
3595#ifdef CONFIG_CFQ_GROUP_IOSCHED
3596 cfqq->cfqg->ttime.last_end_request = now;
3597#endif
3598
3574 /* 3599 /*
3575 * If this is the active queue, check if it needs to be expired, 3600 * If this is the active queue, check if it needs to be expired,
3576 * or if we want to idle in case it has no pending requests. 3601 * or if we want to idle in case it has no pending requests.
@@ -3616,30 +3641,6 @@ static void cfq_completed_request(struct request_queue *q, struct request *rq)
3616 cfq_schedule_dispatch(cfqd); 3641 cfq_schedule_dispatch(cfqd);
3617} 3642}
3618 3643
3619/*
3620 * we temporarily boost lower priority queues if they are holding fs exclusive
3621 * resources. they are boosted to normal prio (CLASS_BE/4)
3622 */
3623static void cfq_prio_boost(struct cfq_queue *cfqq)
3624{
3625 if (has_fs_excl()) {
3626 /*
3627 * boost idle prio on transactions that would lock out other
3628 * users of the filesystem
3629 */
3630 if (cfq_class_idle(cfqq))
3631 cfqq->ioprio_class = IOPRIO_CLASS_BE;
3632 if (cfqq->ioprio > IOPRIO_NORM)
3633 cfqq->ioprio = IOPRIO_NORM;
3634 } else {
3635 /*
3636 * unboost the queue (if needed)
3637 */
3638 cfqq->ioprio_class = cfqq->org_ioprio_class;
3639 cfqq->ioprio = cfqq->org_ioprio;
3640 }
3641}
3642
3643static inline int __cfq_may_queue(struct cfq_queue *cfqq) 3644static inline int __cfq_may_queue(struct cfq_queue *cfqq)
3644{ 3645{
3645 if (cfq_cfqq_wait_request(cfqq) && !cfq_cfqq_must_alloc_slice(cfqq)) { 3646 if (cfq_cfqq_wait_request(cfqq) && !cfq_cfqq_must_alloc_slice(cfqq)) {
@@ -3670,7 +3671,6 @@ static int cfq_may_queue(struct request_queue *q, int rw)
3670 cfqq = cic_to_cfqq(cic, rw_is_sync(rw)); 3671 cfqq = cic_to_cfqq(cic, rw_is_sync(rw));
3671 if (cfqq) { 3672 if (cfqq) {
3672 cfq_init_prio_data(cfqq, cic->ioc); 3673 cfq_init_prio_data(cfqq, cic->ioc);
3673 cfq_prio_boost(cfqq);
3674 3674
3675 return __cfq_may_queue(cfqq); 3675 return __cfq_may_queue(cfqq);
3676 } 3676 }
diff --git a/block/compat_ioctl.c b/block/compat_ioctl.c
index cc3eb78e333a..7b725020823c 100644
--- a/block/compat_ioctl.c
+++ b/block/compat_ioctl.c
@@ -208,19 +208,6 @@ static int compat_blkpg_ioctl(struct block_device *bdev, fmode_t mode,
208#define BLKBSZSET_32 _IOW(0x12, 113, int) 208#define BLKBSZSET_32 _IOW(0x12, 113, int)
209#define BLKGETSIZE64_32 _IOR(0x12, 114, int) 209#define BLKGETSIZE64_32 _IOR(0x12, 114, int)
210 210
211struct compat_floppy_struct {
212 compat_uint_t size;
213 compat_uint_t sect;
214 compat_uint_t head;
215 compat_uint_t track;
216 compat_uint_t stretch;
217 unsigned char gap;
218 unsigned char rate;
219 unsigned char spec1;
220 unsigned char fmt_gap;
221 const compat_caddr_t name;
222};
223
224struct compat_floppy_drive_params { 211struct compat_floppy_drive_params {
225 char cmos; 212 char cmos;
226 compat_ulong_t max_dtr; 213 compat_ulong_t max_dtr;
@@ -288,7 +275,6 @@ struct compat_floppy_write_errors {
288 275
289#define FDSETPRM32 _IOW(2, 0x42, struct compat_floppy_struct) 276#define FDSETPRM32 _IOW(2, 0x42, struct compat_floppy_struct)
290#define FDDEFPRM32 _IOW(2, 0x43, struct compat_floppy_struct) 277#define FDDEFPRM32 _IOW(2, 0x43, struct compat_floppy_struct)
291#define FDGETPRM32 _IOR(2, 0x04, struct compat_floppy_struct)
292#define FDSETDRVPRM32 _IOW(2, 0x90, struct compat_floppy_drive_params) 278#define FDSETDRVPRM32 _IOW(2, 0x90, struct compat_floppy_drive_params)
293#define FDGETDRVPRM32 _IOR(2, 0x11, struct compat_floppy_drive_params) 279#define FDGETDRVPRM32 _IOR(2, 0x11, struct compat_floppy_drive_params)
294#define FDGETDRVSTAT32 _IOR(2, 0x12, struct compat_floppy_drive_struct) 280#define FDGETDRVSTAT32 _IOR(2, 0x12, struct compat_floppy_drive_struct)
diff --git a/block/deadline-iosched.c b/block/deadline-iosched.c
index 5139c0ea1864..c644137d9cd6 100644
--- a/block/deadline-iosched.c
+++ b/block/deadline-iosched.c
@@ -77,10 +77,8 @@ static void
77deadline_add_rq_rb(struct deadline_data *dd, struct request *rq) 77deadline_add_rq_rb(struct deadline_data *dd, struct request *rq)
78{ 78{
79 struct rb_root *root = deadline_rb_root(dd, rq); 79 struct rb_root *root = deadline_rb_root(dd, rq);
80 struct request *__alias;
81 80
82 while (unlikely(__alias = elv_rb_add(root, rq))) 81 elv_rb_add(root, rq);
83 deadline_move_request(dd, __alias);
84} 82}
85 83
86static inline void 84static inline void
diff --git a/block/elevator.c b/block/elevator.c
index b0b38ce0dcb6..a3b64bc71d88 100644
--- a/block/elevator.c
+++ b/block/elevator.c
@@ -353,7 +353,7 @@ static struct request *elv_rqhash_find(struct request_queue *q, sector_t offset)
353 * RB-tree support functions for inserting/lookup/removal of requests 353 * RB-tree support functions for inserting/lookup/removal of requests
354 * in a sorted RB tree. 354 * in a sorted RB tree.
355 */ 355 */
356struct request *elv_rb_add(struct rb_root *root, struct request *rq) 356void elv_rb_add(struct rb_root *root, struct request *rq)
357{ 357{
358 struct rb_node **p = &root->rb_node; 358 struct rb_node **p = &root->rb_node;
359 struct rb_node *parent = NULL; 359 struct rb_node *parent = NULL;
@@ -365,15 +365,12 @@ struct request *elv_rb_add(struct rb_root *root, struct request *rq)
365 365
366 if (blk_rq_pos(rq) < blk_rq_pos(__rq)) 366 if (blk_rq_pos(rq) < blk_rq_pos(__rq))
367 p = &(*p)->rb_left; 367 p = &(*p)->rb_left;
368 else if (blk_rq_pos(rq) > blk_rq_pos(__rq)) 368 else if (blk_rq_pos(rq) >= blk_rq_pos(__rq))
369 p = &(*p)->rb_right; 369 p = &(*p)->rb_right;
370 else
371 return __rq;
372 } 370 }
373 371
374 rb_link_node(&rq->rb_node, parent, p); 372 rb_link_node(&rq->rb_node, parent, p);
375 rb_insert_color(&rq->rb_node, root); 373 rb_insert_color(&rq->rb_node, root);
376 return NULL;
377} 374}
378EXPORT_SYMBOL(elv_rb_add); 375EXPORT_SYMBOL(elv_rb_add);
379 376
diff --git a/block/genhd.c b/block/genhd.c
index 6024b82e3209..5cb51c55f6d8 100644
--- a/block/genhd.c
+++ b/block/genhd.c
@@ -602,7 +602,7 @@ void add_disk(struct gendisk *disk)
602 disk->major = MAJOR(devt); 602 disk->major = MAJOR(devt);
603 disk->first_minor = MINOR(devt); 603 disk->first_minor = MINOR(devt);
604 604
605 /* Register BDI before referencing it from bdev */ 605 /* Register BDI before referencing it from bdev */
606 bdi = &disk->queue->backing_dev_info; 606 bdi = &disk->queue->backing_dev_info;
607 bdi_register_dev(bdi, disk_devt(disk)); 607 bdi_register_dev(bdi, disk_devt(disk));
608 608
@@ -1140,7 +1140,7 @@ static int diskstats_show(struct seq_file *seqf, void *v)
1140 "wsect wuse running use aveq" 1140 "wsect wuse running use aveq"
1141 "\n\n"); 1141 "\n\n");
1142 */ 1142 */
1143 1143
1144 disk_part_iter_init(&piter, gp, DISK_PITER_INCL_EMPTY_PART0); 1144 disk_part_iter_init(&piter, gp, DISK_PITER_INCL_EMPTY_PART0);
1145 while ((hd = disk_part_iter_next(&piter))) { 1145 while ((hd = disk_part_iter_next(&piter))) {
1146 cpu = part_stat_lock(); 1146 cpu = part_stat_lock();
@@ -1164,7 +1164,7 @@ static int diskstats_show(struct seq_file *seqf, void *v)
1164 ); 1164 );
1165 } 1165 }
1166 disk_part_iter_exit(&piter); 1166 disk_part_iter_exit(&piter);
1167 1167
1168 return 0; 1168 return 0;
1169} 1169}
1170 1170
@@ -1492,30 +1492,32 @@ void disk_unblock_events(struct gendisk *disk)
1492} 1492}
1493 1493
1494/** 1494/**
1495 * disk_check_events - schedule immediate event checking 1495 * disk_flush_events - schedule immediate event checking and flushing
1496 * @disk: disk to check events for 1496 * @disk: disk to check and flush events for
1497 * @mask: events to flush
1497 * 1498 *
1498 * Schedule immediate event checking on @disk if not blocked. 1499 * Schedule immediate event checking on @disk if not blocked. Events in
1500 * @mask are scheduled to be cleared from the driver. Note that this
1501 * doesn't clear the events from @disk->ev.
1499 * 1502 *
1500 * CONTEXT: 1503 * CONTEXT:
1501 * Don't care. Safe to call from irq context. 1504 * If @mask is non-zero must be called with bdev->bd_mutex held.
1502 */ 1505 */
1503void disk_check_events(struct gendisk *disk) 1506void disk_flush_events(struct gendisk *disk, unsigned int mask)
1504{ 1507{
1505 struct disk_events *ev = disk->ev; 1508 struct disk_events *ev = disk->ev;
1506 unsigned long flags;
1507 1509
1508 if (!ev) 1510 if (!ev)
1509 return; 1511 return;
1510 1512
1511 spin_lock_irqsave(&ev->lock, flags); 1513 spin_lock_irq(&ev->lock);
1514 ev->clearing |= mask;
1512 if (!ev->block) { 1515 if (!ev->block) {
1513 cancel_delayed_work(&ev->dwork); 1516 cancel_delayed_work(&ev->dwork);
1514 queue_delayed_work(system_nrt_wq, &ev->dwork, 0); 1517 queue_delayed_work(system_nrt_wq, &ev->dwork, 0);
1515 } 1518 }
1516 spin_unlock_irqrestore(&ev->lock, flags); 1519 spin_unlock_irq(&ev->lock);
1517} 1520}
1518EXPORT_SYMBOL_GPL(disk_check_events);
1519 1521
1520/** 1522/**
1521 * disk_clear_events - synchronously check, clear and return pending events 1523 * disk_clear_events - synchronously check, clear and return pending events
@@ -1705,7 +1707,7 @@ static int disk_events_set_dfl_poll_msecs(const char *val,
1705 mutex_lock(&disk_events_mutex); 1707 mutex_lock(&disk_events_mutex);
1706 1708
1707 list_for_each_entry(ev, &disk_events, node) 1709 list_for_each_entry(ev, &disk_events, node)
1708 disk_check_events(ev->disk); 1710 disk_flush_events(ev->disk, 0);
1709 1711
1710 mutex_unlock(&disk_events_mutex); 1712 mutex_unlock(&disk_events_mutex);
1711 1713