aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2011-07-25 13:33:36 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2011-07-25 13:33:36 -0400
commit096a705bbc080a4041636d07514560da8d78acbe (patch)
tree38c3c01225709ffa53419083ea6332f8a72610de
parentfea80311a939a746533a6d7e7c3183729d6a3faf (diff)
parent5757a6d76cdf6dda2a492c09b985c015e86779b1 (diff)
Merge branch 'for-3.1/core' of git://git.kernel.dk/linux-block
* 'for-3.1/core' of git://git.kernel.dk/linux-block: (24 commits) block: strict rq_affinity backing-dev: use synchronize_rcu_expedited instead of synchronize_rcu block: fix patch import error in max_discard_sectors check block: reorder request_queue to remove 64 bit alignment padding CFQ: add think time check for group CFQ: add think time check for service tree CFQ: move think time check variables to a separate struct fixlet: Remove fs_excl from struct task. cfq: Remove special treatment for metadata rqs. block: document blk_plug list access block: avoid building too big plug list compat_ioctl: fix make headers_check regression block: eliminate potential for infinite loop in blkdev_issue_discard compat_ioctl: fix warning caused by qemu block: flush MEDIA_CHANGE from drivers on close(2) blk-throttle: Make total_nr_queued unsigned block: Add __attribute__((format(printf...) and fix fallout fs/partitions/check.c: make local symbols static block:remove some spare spaces in genhd.c block:fix the comment error in blkdev.h ...
-rw-r--r--Documentation/block/queue-sysfs.txt10
-rw-r--r--block/blk-core.c11
-rw-r--r--block/blk-ioc.c40
-rw-r--r--block/blk-lib.c5
-rw-r--r--block/blk-softirq.c11
-rw-r--r--block/blk-sysfs.c13
-rw-r--r--block/blk-throttle.c8
-rw-r--r--block/cfq-iosched.c152
-rw-r--r--block/compat_ioctl.c14
-rw-r--r--block/deadline-iosched.c4
-rw-r--r--block/elevator.c7
-rw-r--r--block/genhd.c28
-rw-r--r--fs/block_dev.c23
-rw-r--r--fs/compat_ioctl.c5
-rw-r--r--fs/partitions/check.c12
-rw-r--r--fs/reiserfs/journal.c13
-rw-r--r--fs/super.c4
-rw-r--r--include/linux/blkdev.h27
-rw-r--r--include/linux/elevator.h2
-rw-r--r--include/linux/fd.h22
-rw-r--r--include/linux/fs.h4
-rw-r--r--include/linux/genhd.h2
-rw-r--r--include/linux/init_task.h1
-rw-r--r--include/linux/iocontext.h14
-rw-r--r--include/linux/sched.h1
-rw-r--r--kernel/exit.c1
-rw-r--r--kernel/fork.c1
-rw-r--r--mm/backing-dev.c2
28 files changed, 229 insertions, 208 deletions
diff --git a/Documentation/block/queue-sysfs.txt b/Documentation/block/queue-sysfs.txt
index f65274081c8d..d8147b336c35 100644
--- a/Documentation/block/queue-sysfs.txt
+++ b/Documentation/block/queue-sysfs.txt
@@ -45,9 +45,13 @@ device.
45 45
46rq_affinity (RW) 46rq_affinity (RW)
47---------------- 47----------------
48If this option is enabled, the block layer will migrate request completions 48If this option is '1', the block layer will migrate request completions to the
49to the CPU that originally submitted the request. For some workloads 49cpu "group" that originally submitted the request. For some workloads this
50this provides a significant reduction in CPU cycles due to caching effects. 50provides a significant reduction in CPU cycles due to caching effects.
51
52For storage configurations that need to maximize distribution of completion
53processing setting this option to '2' forces the completion to run on the
54requesting cpu (bypassing the "group" aggregation logic).
51 55
52scheduler (RW) 56scheduler (RW)
53-------------- 57--------------
diff --git a/block/blk-core.c b/block/blk-core.c
index 1d49e1c7c905..f8cb09951830 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -1282,10 +1282,8 @@ get_rq:
1282 init_request_from_bio(req, bio); 1282 init_request_from_bio(req, bio);
1283 1283
1284 if (test_bit(QUEUE_FLAG_SAME_COMP, &q->queue_flags) || 1284 if (test_bit(QUEUE_FLAG_SAME_COMP, &q->queue_flags) ||
1285 bio_flagged(bio, BIO_CPU_AFFINE)) { 1285 bio_flagged(bio, BIO_CPU_AFFINE))
1286 req->cpu = blk_cpu_to_group(get_cpu()); 1286 req->cpu = smp_processor_id();
1287 put_cpu();
1288 }
1289 1287
1290 plug = current->plug; 1288 plug = current->plug;
1291 if (plug) { 1289 if (plug) {
@@ -1305,7 +1303,10 @@ get_rq:
1305 plug->should_sort = 1; 1303 plug->should_sort = 1;
1306 } 1304 }
1307 list_add_tail(&req->queuelist, &plug->list); 1305 list_add_tail(&req->queuelist, &plug->list);
1306 plug->count++;
1308 drive_stat_acct(req, 1); 1307 drive_stat_acct(req, 1);
1308 if (plug->count >= BLK_MAX_REQUEST_COUNT)
1309 blk_flush_plug_list(plug, false);
1309 } else { 1310 } else {
1310 spin_lock_irq(q->queue_lock); 1311 spin_lock_irq(q->queue_lock);
1311 add_acct_request(q, req, where); 1312 add_acct_request(q, req, where);
@@ -2629,6 +2630,7 @@ void blk_start_plug(struct blk_plug *plug)
2629 INIT_LIST_HEAD(&plug->list); 2630 INIT_LIST_HEAD(&plug->list);
2630 INIT_LIST_HEAD(&plug->cb_list); 2631 INIT_LIST_HEAD(&plug->cb_list);
2631 plug->should_sort = 0; 2632 plug->should_sort = 0;
2633 plug->count = 0;
2632 2634
2633 /* 2635 /*
2634 * If this is a nested plug, don't actually assign it. It will be 2636 * If this is a nested plug, don't actually assign it. It will be
@@ -2712,6 +2714,7 @@ void blk_flush_plug_list(struct blk_plug *plug, bool from_schedule)
2712 return; 2714 return;
2713 2715
2714 list_splice_init(&plug->list, &list); 2716 list_splice_init(&plug->list, &list);
2717 plug->count = 0;
2715 2718
2716 if (plug->should_sort) { 2719 if (plug->should_sort) {
2717 list_sort(NULL, &list, plug_rq_cmp); 2720 list_sort(NULL, &list, plug_rq_cmp);
diff --git a/block/blk-ioc.c b/block/blk-ioc.c
index 342eae9b0d3c..6f9bbd978653 100644
--- a/block/blk-ioc.c
+++ b/block/blk-ioc.c
@@ -82,26 +82,26 @@ void exit_io_context(struct task_struct *task)
82 82
83struct io_context *alloc_io_context(gfp_t gfp_flags, int node) 83struct io_context *alloc_io_context(gfp_t gfp_flags, int node)
84{ 84{
85 struct io_context *ret; 85 struct io_context *ioc;
86 86
87 ret = kmem_cache_alloc_node(iocontext_cachep, gfp_flags, node); 87 ioc = kmem_cache_alloc_node(iocontext_cachep, gfp_flags, node);
88 if (ret) { 88 if (ioc) {
89 atomic_long_set(&ret->refcount, 1); 89 atomic_long_set(&ioc->refcount, 1);
90 atomic_set(&ret->nr_tasks, 1); 90 atomic_set(&ioc->nr_tasks, 1);
91 spin_lock_init(&ret->lock); 91 spin_lock_init(&ioc->lock);
92 ret->ioprio_changed = 0; 92 ioc->ioprio_changed = 0;
93 ret->ioprio = 0; 93 ioc->ioprio = 0;
94 ret->last_waited = 0; /* doesn't matter... */ 94 ioc->last_waited = 0; /* doesn't matter... */
95 ret->nr_batch_requests = 0; /* because this is 0 */ 95 ioc->nr_batch_requests = 0; /* because this is 0 */
96 INIT_RADIX_TREE(&ret->radix_root, GFP_ATOMIC | __GFP_HIGH); 96 INIT_RADIX_TREE(&ioc->radix_root, GFP_ATOMIC | __GFP_HIGH);
97 INIT_HLIST_HEAD(&ret->cic_list); 97 INIT_HLIST_HEAD(&ioc->cic_list);
98 ret->ioc_data = NULL; 98 ioc->ioc_data = NULL;
99#if defined(CONFIG_BLK_CGROUP) || defined(CONFIG_BLK_CGROUP_MODULE) 99#if defined(CONFIG_BLK_CGROUP) || defined(CONFIG_BLK_CGROUP_MODULE)
100 ret->cgroup_changed = 0; 100 ioc->cgroup_changed = 0;
101#endif 101#endif
102 } 102 }
103 103
104 return ret; 104 return ioc;
105} 105}
106 106
107/* 107/*
@@ -139,19 +139,19 @@ struct io_context *current_io_context(gfp_t gfp_flags, int node)
139 */ 139 */
140struct io_context *get_io_context(gfp_t gfp_flags, int node) 140struct io_context *get_io_context(gfp_t gfp_flags, int node)
141{ 141{
142 struct io_context *ret = NULL; 142 struct io_context *ioc = NULL;
143 143
144 /* 144 /*
145 * Check for unlikely race with exiting task. ioc ref count is 145 * Check for unlikely race with exiting task. ioc ref count is
146 * zero when ioc is being detached. 146 * zero when ioc is being detached.
147 */ 147 */
148 do { 148 do {
149 ret = current_io_context(gfp_flags, node); 149 ioc = current_io_context(gfp_flags, node);
150 if (unlikely(!ret)) 150 if (unlikely(!ioc))
151 break; 151 break;
152 } while (!atomic_long_inc_not_zero(&ret->refcount)); 152 } while (!atomic_long_inc_not_zero(&ioc->refcount));
153 153
154 return ret; 154 return ioc;
155} 155}
156EXPORT_SYMBOL(get_io_context); 156EXPORT_SYMBOL(get_io_context);
157 157
diff --git a/block/blk-lib.c b/block/blk-lib.c
index 78e627e2581d..2b461b496a78 100644
--- a/block/blk-lib.c
+++ b/block/blk-lib.c
@@ -59,7 +59,10 @@ int blkdev_issue_discard(struct block_device *bdev, sector_t sector,
59 * granularity 59 * granularity
60 */ 60 */
61 max_discard_sectors = min(q->limits.max_discard_sectors, UINT_MAX >> 9); 61 max_discard_sectors = min(q->limits.max_discard_sectors, UINT_MAX >> 9);
62 if (q->limits.discard_granularity) { 62 if (unlikely(!max_discard_sectors)) {
63 /* Avoid infinite loop below. Being cautious never hurts. */
64 return -EOPNOTSUPP;
65 } else if (q->limits.discard_granularity) {
63 unsigned int disc_sects = q->limits.discard_granularity >> 9; 66 unsigned int disc_sects = q->limits.discard_granularity >> 9;
64 67
65 max_discard_sectors &= ~(disc_sects - 1); 68 max_discard_sectors &= ~(disc_sects - 1);
diff --git a/block/blk-softirq.c b/block/blk-softirq.c
index ee9c21602228..475fab809a80 100644
--- a/block/blk-softirq.c
+++ b/block/blk-softirq.c
@@ -103,22 +103,25 @@ static struct notifier_block __cpuinitdata blk_cpu_notifier = {
103 103
104void __blk_complete_request(struct request *req) 104void __blk_complete_request(struct request *req)
105{ 105{
106 int ccpu, cpu, group_cpu = NR_CPUS;
106 struct request_queue *q = req->q; 107 struct request_queue *q = req->q;
107 unsigned long flags; 108 unsigned long flags;
108 int ccpu, cpu, group_cpu;
109 109
110 BUG_ON(!q->softirq_done_fn); 110 BUG_ON(!q->softirq_done_fn);
111 111
112 local_irq_save(flags); 112 local_irq_save(flags);
113 cpu = smp_processor_id(); 113 cpu = smp_processor_id();
114 group_cpu = blk_cpu_to_group(cpu);
115 114
116 /* 115 /*
117 * Select completion CPU 116 * Select completion CPU
118 */ 117 */
119 if (test_bit(QUEUE_FLAG_SAME_COMP, &q->queue_flags) && req->cpu != -1) 118 if (test_bit(QUEUE_FLAG_SAME_COMP, &q->queue_flags) && req->cpu != -1) {
120 ccpu = req->cpu; 119 ccpu = req->cpu;
121 else 120 if (!test_bit(QUEUE_FLAG_SAME_FORCE, &q->queue_flags)) {
121 ccpu = blk_cpu_to_group(ccpu);
122 group_cpu = blk_cpu_to_group(cpu);
123 }
124 } else
122 ccpu = cpu; 125 ccpu = cpu;
123 126
124 if (ccpu == cpu || ccpu == group_cpu) { 127 if (ccpu == cpu || ccpu == group_cpu) {
diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c
index d935bd859c87..0ee17b5e7fb6 100644
--- a/block/blk-sysfs.c
+++ b/block/blk-sysfs.c
@@ -244,8 +244,9 @@ static ssize_t queue_nomerges_store(struct request_queue *q, const char *page,
244static ssize_t queue_rq_affinity_show(struct request_queue *q, char *page) 244static ssize_t queue_rq_affinity_show(struct request_queue *q, char *page)
245{ 245{
246 bool set = test_bit(QUEUE_FLAG_SAME_COMP, &q->queue_flags); 246 bool set = test_bit(QUEUE_FLAG_SAME_COMP, &q->queue_flags);
247 bool force = test_bit(QUEUE_FLAG_SAME_FORCE, &q->queue_flags);
247 248
248 return queue_var_show(set, page); 249 return queue_var_show(set << force, page);
249} 250}
250 251
251static ssize_t 252static ssize_t
@@ -257,10 +258,14 @@ queue_rq_affinity_store(struct request_queue *q, const char *page, size_t count)
257 258
258 ret = queue_var_store(&val, page, count); 259 ret = queue_var_store(&val, page, count);
259 spin_lock_irq(q->queue_lock); 260 spin_lock_irq(q->queue_lock);
260 if (val) 261 if (val) {
261 queue_flag_set(QUEUE_FLAG_SAME_COMP, q); 262 queue_flag_set(QUEUE_FLAG_SAME_COMP, q);
262 else 263 if (val == 2)
263 queue_flag_clear(QUEUE_FLAG_SAME_COMP, q); 264 queue_flag_set(QUEUE_FLAG_SAME_FORCE, q);
265 } else {
266 queue_flag_clear(QUEUE_FLAG_SAME_COMP, q);
267 queue_flag_clear(QUEUE_FLAG_SAME_FORCE, q);
268 }
264 spin_unlock_irq(q->queue_lock); 269 spin_unlock_irq(q->queue_lock);
265#endif 270#endif
266 return ret; 271 return ret;
diff --git a/block/blk-throttle.c b/block/blk-throttle.c
index 3689f833afdc..f6a794120505 100644
--- a/block/blk-throttle.c
+++ b/block/blk-throttle.c
@@ -142,9 +142,9 @@ static inline struct throtl_grp *tg_of_blkg(struct blkio_group *blkg)
142 return NULL; 142 return NULL;
143} 143}
144 144
145static inline int total_nr_queued(struct throtl_data *td) 145static inline unsigned int total_nr_queued(struct throtl_data *td)
146{ 146{
147 return (td->nr_queued[0] + td->nr_queued[1]); 147 return td->nr_queued[0] + td->nr_queued[1];
148} 148}
149 149
150static inline struct throtl_grp *throtl_ref_get_tg(struct throtl_grp *tg) 150static inline struct throtl_grp *throtl_ref_get_tg(struct throtl_grp *tg)
@@ -927,7 +927,7 @@ static int throtl_dispatch(struct request_queue *q)
927 927
928 bio_list_init(&bio_list_on_stack); 928 bio_list_init(&bio_list_on_stack);
929 929
930 throtl_log(td, "dispatch nr_queued=%d read=%u write=%u", 930 throtl_log(td, "dispatch nr_queued=%u read=%u write=%u",
931 total_nr_queued(td), td->nr_queued[READ], 931 total_nr_queued(td), td->nr_queued[READ],
932 td->nr_queued[WRITE]); 932 td->nr_queued[WRITE]);
933 933
@@ -970,7 +970,7 @@ throtl_schedule_delayed_work(struct throtl_data *td, unsigned long delay)
970 struct delayed_work *dwork = &td->throtl_work; 970 struct delayed_work *dwork = &td->throtl_work;
971 971
972 /* schedule work if limits changed even if no bio is queued */ 972 /* schedule work if limits changed even if no bio is queued */
973 if (total_nr_queued(td) > 0 || td->limits_changed) { 973 if (total_nr_queued(td) || td->limits_changed) {
974 /* 974 /*
975 * We might have a work scheduled to be executed in future. 975 * We might have a work scheduled to be executed in future.
976 * Cancel that and schedule a new one. 976 * Cancel that and schedule a new one.
diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c
index ae21919f15e1..1f96ad6254f1 100644
--- a/block/cfq-iosched.c
+++ b/block/cfq-iosched.c
@@ -87,9 +87,10 @@ struct cfq_rb_root {
87 unsigned count; 87 unsigned count;
88 unsigned total_weight; 88 unsigned total_weight;
89 u64 min_vdisktime; 89 u64 min_vdisktime;
90 struct cfq_ttime ttime;
90}; 91};
91#define CFQ_RB_ROOT (struct cfq_rb_root) { .rb = RB_ROOT, .left = NULL, \ 92#define CFQ_RB_ROOT (struct cfq_rb_root) { .rb = RB_ROOT, \
92 .count = 0, .min_vdisktime = 0, } 93 .ttime = {.last_end_request = jiffies,},}
93 94
94/* 95/*
95 * Per process-grouping structure 96 * Per process-grouping structure
@@ -129,14 +130,12 @@ struct cfq_queue {
129 unsigned long slice_end; 130 unsigned long slice_end;
130 long slice_resid; 131 long slice_resid;
131 132
132 /* pending metadata requests */
133 int meta_pending;
134 /* number of requests that are on the dispatch list or inside driver */ 133 /* number of requests that are on the dispatch list or inside driver */
135 int dispatched; 134 int dispatched;
136 135
137 /* io prio of this group */ 136 /* io prio of this group */
138 unsigned short ioprio, org_ioprio; 137 unsigned short ioprio, org_ioprio;
139 unsigned short ioprio_class, org_ioprio_class; 138 unsigned short ioprio_class;
140 139
141 pid_t pid; 140 pid_t pid;
142 141
@@ -212,6 +211,7 @@ struct cfq_group {
212#endif 211#endif
213 /* number of requests that are on the dispatch list or inside driver */ 212 /* number of requests that are on the dispatch list or inside driver */
214 int dispatched; 213 int dispatched;
214 struct cfq_ttime ttime;
215}; 215};
216 216
217/* 217/*
@@ -393,6 +393,18 @@ CFQ_CFQQ_FNS(wait_busy);
393 j++, st = i < IDLE_WORKLOAD ? \ 393 j++, st = i < IDLE_WORKLOAD ? \
394 &cfqg->service_trees[i][j]: NULL) \ 394 &cfqg->service_trees[i][j]: NULL) \
395 395
396static inline bool cfq_io_thinktime_big(struct cfq_data *cfqd,
397 struct cfq_ttime *ttime, bool group_idle)
398{
399 unsigned long slice;
400 if (!sample_valid(ttime->ttime_samples))
401 return false;
402 if (group_idle)
403 slice = cfqd->cfq_group_idle;
404 else
405 slice = cfqd->cfq_slice_idle;
406 return ttime->ttime_mean > slice;
407}
396 408
397static inline bool iops_mode(struct cfq_data *cfqd) 409static inline bool iops_mode(struct cfq_data *cfqd)
398{ 410{
@@ -670,9 +682,6 @@ cfq_choose_req(struct cfq_data *cfqd, struct request *rq1, struct request *rq2,
670 if (rq_is_sync(rq1) != rq_is_sync(rq2)) 682 if (rq_is_sync(rq1) != rq_is_sync(rq2))
671 return rq_is_sync(rq1) ? rq1 : rq2; 683 return rq_is_sync(rq1) ? rq1 : rq2;
672 684
673 if ((rq1->cmd_flags ^ rq2->cmd_flags) & REQ_META)
674 return rq1->cmd_flags & REQ_META ? rq1 : rq2;
675
676 s1 = blk_rq_pos(rq1); 685 s1 = blk_rq_pos(rq1);
677 s2 = blk_rq_pos(rq2); 686 s2 = blk_rq_pos(rq2);
678 687
@@ -1005,8 +1014,8 @@ static inline struct cfq_group *cfqg_of_blkg(struct blkio_group *blkg)
1005 return NULL; 1014 return NULL;
1006} 1015}
1007 1016
1008void cfq_update_blkio_group_weight(void *key, struct blkio_group *blkg, 1017static void cfq_update_blkio_group_weight(void *key, struct blkio_group *blkg,
1009 unsigned int weight) 1018 unsigned int weight)
1010{ 1019{
1011 struct cfq_group *cfqg = cfqg_of_blkg(blkg); 1020 struct cfq_group *cfqg = cfqg_of_blkg(blkg);
1012 cfqg->new_weight = weight; 1021 cfqg->new_weight = weight;
@@ -1059,6 +1068,8 @@ static struct cfq_group * cfq_alloc_cfqg(struct cfq_data *cfqd)
1059 *st = CFQ_RB_ROOT; 1068 *st = CFQ_RB_ROOT;
1060 RB_CLEAR_NODE(&cfqg->rb_node); 1069 RB_CLEAR_NODE(&cfqg->rb_node);
1061 1070
1071 cfqg->ttime.last_end_request = jiffies;
1072
1062 /* 1073 /*
1063 * Take the initial reference that will be released on destroy 1074 * Take the initial reference that will be released on destroy
1064 * This can be thought of a joint reference by cgroup and 1075 * This can be thought of a joint reference by cgroup and
@@ -1235,7 +1246,7 @@ static void cfq_release_cfq_groups(struct cfq_data *cfqd)
1235 * it should not be NULL as even if elevator was exiting, cgroup deltion 1246 * it should not be NULL as even if elevator was exiting, cgroup deltion
1236 * path got to it first. 1247 * path got to it first.
1237 */ 1248 */
1238void cfq_unlink_blkio_group(void *key, struct blkio_group *blkg) 1249static void cfq_unlink_blkio_group(void *key, struct blkio_group *blkg)
1239{ 1250{
1240 unsigned long flags; 1251 unsigned long flags;
1241 struct cfq_data *cfqd = key; 1252 struct cfq_data *cfqd = key;
@@ -1502,16 +1513,11 @@ static void cfq_add_rq_rb(struct request *rq)
1502{ 1513{
1503 struct cfq_queue *cfqq = RQ_CFQQ(rq); 1514 struct cfq_queue *cfqq = RQ_CFQQ(rq);
1504 struct cfq_data *cfqd = cfqq->cfqd; 1515 struct cfq_data *cfqd = cfqq->cfqd;
1505 struct request *__alias, *prev; 1516 struct request *prev;
1506 1517
1507 cfqq->queued[rq_is_sync(rq)]++; 1518 cfqq->queued[rq_is_sync(rq)]++;
1508 1519
1509 /* 1520 elv_rb_add(&cfqq->sort_list, rq);
1510 * looks a little odd, but the first insert might return an alias.
1511 * if that happens, put the alias on the dispatch list
1512 */
1513 while ((__alias = elv_rb_add(&cfqq->sort_list, rq)) != NULL)
1514 cfq_dispatch_insert(cfqd->queue, __alias);
1515 1521
1516 if (!cfq_cfqq_on_rr(cfqq)) 1522 if (!cfq_cfqq_on_rr(cfqq))
1517 cfq_add_cfqq_rr(cfqd, cfqq); 1523 cfq_add_cfqq_rr(cfqd, cfqq);
@@ -1598,10 +1604,6 @@ static void cfq_remove_request(struct request *rq)
1598 cfqq->cfqd->rq_queued--; 1604 cfqq->cfqd->rq_queued--;
1599 cfq_blkiocg_update_io_remove_stats(&(RQ_CFQG(rq))->blkg, 1605 cfq_blkiocg_update_io_remove_stats(&(RQ_CFQG(rq))->blkg,
1600 rq_data_dir(rq), rq_is_sync(rq)); 1606 rq_data_dir(rq), rq_is_sync(rq));
1601 if (rq->cmd_flags & REQ_META) {
1602 WARN_ON(!cfqq->meta_pending);
1603 cfqq->meta_pending--;
1604 }
1605} 1607}
1606 1608
1607static int cfq_merge(struct request_queue *q, struct request **req, 1609static int cfq_merge(struct request_queue *q, struct request **req,
@@ -1969,7 +1971,8 @@ static bool cfq_should_idle(struct cfq_data *cfqd, struct cfq_queue *cfqq)
1969 * Otherwise, we do only if they are the last ones 1971 * Otherwise, we do only if they are the last ones
1970 * in their service tree. 1972 * in their service tree.
1971 */ 1973 */
1972 if (service_tree->count == 1 && cfq_cfqq_sync(cfqq)) 1974 if (service_tree->count == 1 && cfq_cfqq_sync(cfqq) &&
1975 !cfq_io_thinktime_big(cfqd, &service_tree->ttime, false))
1973 return true; 1976 return true;
1974 cfq_log_cfqq(cfqd, cfqq, "Not idling. st->count:%d", 1977 cfq_log_cfqq(cfqd, cfqq, "Not idling. st->count:%d",
1975 service_tree->count); 1978 service_tree->count);
@@ -2022,10 +2025,10 @@ static void cfq_arm_slice_timer(struct cfq_data *cfqd)
2022 * slice, then don't idle. This avoids overrunning the allotted 2025 * slice, then don't idle. This avoids overrunning the allotted
2023 * time slice. 2026 * time slice.
2024 */ 2027 */
2025 if (sample_valid(cic->ttime_samples) && 2028 if (sample_valid(cic->ttime.ttime_samples) &&
2026 (cfqq->slice_end - jiffies < cic->ttime_mean)) { 2029 (cfqq->slice_end - jiffies < cic->ttime.ttime_mean)) {
2027 cfq_log_cfqq(cfqd, cfqq, "Not idling. think_time:%lu", 2030 cfq_log_cfqq(cfqd, cfqq, "Not idling. think_time:%lu",
2028 cic->ttime_mean); 2031 cic->ttime.ttime_mean);
2029 return; 2032 return;
2030 } 2033 }
2031 2034
@@ -2381,8 +2384,9 @@ static struct cfq_queue *cfq_select_queue(struct cfq_data *cfqd)
2381 * this group, wait for requests to complete. 2384 * this group, wait for requests to complete.
2382 */ 2385 */
2383check_group_idle: 2386check_group_idle:
2384 if (cfqd->cfq_group_idle && cfqq->cfqg->nr_cfqq == 1 2387 if (cfqd->cfq_group_idle && cfqq->cfqg->nr_cfqq == 1 &&
2385 && cfqq->cfqg->dispatched) { 2388 cfqq->cfqg->dispatched &&
2389 !cfq_io_thinktime_big(cfqd, &cfqq->cfqg->ttime, true)) {
2386 cfqq = NULL; 2390 cfqq = NULL;
2387 goto keep_queue; 2391 goto keep_queue;
2388 } 2392 }
@@ -2833,7 +2837,7 @@ cfq_alloc_io_context(struct cfq_data *cfqd, gfp_t gfp_mask)
2833 cic = kmem_cache_alloc_node(cfq_ioc_pool, gfp_mask | __GFP_ZERO, 2837 cic = kmem_cache_alloc_node(cfq_ioc_pool, gfp_mask | __GFP_ZERO,
2834 cfqd->queue->node); 2838 cfqd->queue->node);
2835 if (cic) { 2839 if (cic) {
2836 cic->last_end_request = jiffies; 2840 cic->ttime.last_end_request = jiffies;
2837 INIT_LIST_HEAD(&cic->queue_list); 2841 INIT_LIST_HEAD(&cic->queue_list);
2838 INIT_HLIST_NODE(&cic->cic_list); 2842 INIT_HLIST_NODE(&cic->cic_list);
2839 cic->dtor = cfq_free_io_context; 2843 cic->dtor = cfq_free_io_context;
@@ -2883,7 +2887,6 @@ static void cfq_init_prio_data(struct cfq_queue *cfqq, struct io_context *ioc)
2883 * elevate the priority of this queue 2887 * elevate the priority of this queue
2884 */ 2888 */
2885 cfqq->org_ioprio = cfqq->ioprio; 2889 cfqq->org_ioprio = cfqq->ioprio;
2886 cfqq->org_ioprio_class = cfqq->ioprio_class;
2887 cfq_clear_cfqq_prio_changed(cfqq); 2890 cfq_clear_cfqq_prio_changed(cfqq);
2888} 2891}
2889 2892
@@ -3221,14 +3224,28 @@ err:
3221} 3224}
3222 3225
3223static void 3226static void
3224cfq_update_io_thinktime(struct cfq_data *cfqd, struct cfq_io_context *cic) 3227__cfq_update_io_thinktime(struct cfq_ttime *ttime, unsigned long slice_idle)
3225{ 3228{
3226 unsigned long elapsed = jiffies - cic->last_end_request; 3229 unsigned long elapsed = jiffies - ttime->last_end_request;
3227 unsigned long ttime = min(elapsed, 2UL * cfqd->cfq_slice_idle); 3230 elapsed = min(elapsed, 2UL * slice_idle);
3228 3231
3229 cic->ttime_samples = (7*cic->ttime_samples + 256) / 8; 3232 ttime->ttime_samples = (7*ttime->ttime_samples + 256) / 8;
3230 cic->ttime_total = (7*cic->ttime_total + 256*ttime) / 8; 3233 ttime->ttime_total = (7*ttime->ttime_total + 256*elapsed) / 8;
3231 cic->ttime_mean = (cic->ttime_total + 128) / cic->ttime_samples; 3234 ttime->ttime_mean = (ttime->ttime_total + 128) / ttime->ttime_samples;
3235}
3236
3237static void
3238cfq_update_io_thinktime(struct cfq_data *cfqd, struct cfq_queue *cfqq,
3239 struct cfq_io_context *cic)
3240{
3241 if (cfq_cfqq_sync(cfqq)) {
3242 __cfq_update_io_thinktime(&cic->ttime, cfqd->cfq_slice_idle);
3243 __cfq_update_io_thinktime(&cfqq->service_tree->ttime,
3244 cfqd->cfq_slice_idle);
3245 }
3246#ifdef CONFIG_CFQ_GROUP_IOSCHED
3247 __cfq_update_io_thinktime(&cfqq->cfqg->ttime, cfqd->cfq_group_idle);
3248#endif
3232} 3249}
3233 3250
3234static void 3251static void
@@ -3277,8 +3294,8 @@ cfq_update_idle_window(struct cfq_data *cfqd, struct cfq_queue *cfqq,
3277 else if (!atomic_read(&cic->ioc->nr_tasks) || !cfqd->cfq_slice_idle || 3294 else if (!atomic_read(&cic->ioc->nr_tasks) || !cfqd->cfq_slice_idle ||
3278 (!cfq_cfqq_deep(cfqq) && CFQQ_SEEKY(cfqq))) 3295 (!cfq_cfqq_deep(cfqq) && CFQQ_SEEKY(cfqq)))
3279 enable_idle = 0; 3296 enable_idle = 0;
3280 else if (sample_valid(cic->ttime_samples)) { 3297 else if (sample_valid(cic->ttime.ttime_samples)) {
3281 if (cic->ttime_mean > cfqd->cfq_slice_idle) 3298 if (cic->ttime.ttime_mean > cfqd->cfq_slice_idle)
3282 enable_idle = 0; 3299 enable_idle = 0;
3283 else 3300 else
3284 enable_idle = 1; 3301 enable_idle = 1;
@@ -3340,13 +3357,6 @@ cfq_should_preempt(struct cfq_data *cfqd, struct cfq_queue *new_cfqq,
3340 return true; 3357 return true;
3341 3358
3342 /* 3359 /*
3343 * So both queues are sync. Let the new request get disk time if
3344 * it's a metadata request and the current queue is doing regular IO.
3345 */
3346 if ((rq->cmd_flags & REQ_META) && !cfqq->meta_pending)
3347 return true;
3348
3349 /*
3350 * Allow an RT request to pre-empt an ongoing non-RT cfqq timeslice. 3360 * Allow an RT request to pre-empt an ongoing non-RT cfqq timeslice.
3351 */ 3361 */
3352 if (cfq_class_rt(new_cfqq) && !cfq_class_rt(cfqq)) 3362 if (cfq_class_rt(new_cfqq) && !cfq_class_rt(cfqq))
@@ -3410,10 +3420,8 @@ cfq_rq_enqueued(struct cfq_data *cfqd, struct cfq_queue *cfqq,
3410 struct cfq_io_context *cic = RQ_CIC(rq); 3420 struct cfq_io_context *cic = RQ_CIC(rq);
3411 3421
3412 cfqd->rq_queued++; 3422 cfqd->rq_queued++;
3413 if (rq->cmd_flags & REQ_META)
3414 cfqq->meta_pending++;
3415 3423
3416 cfq_update_io_thinktime(cfqd, cic); 3424 cfq_update_io_thinktime(cfqd, cfqq, cic);
3417 cfq_update_io_seektime(cfqd, cfqq, rq); 3425 cfq_update_io_seektime(cfqd, cfqq, rq);
3418 cfq_update_idle_window(cfqd, cfqq, cic); 3426 cfq_update_idle_window(cfqd, cfqq, cic);
3419 3427
@@ -3520,12 +3528,16 @@ static bool cfq_should_wait_busy(struct cfq_data *cfqd, struct cfq_queue *cfqq)
3520 if (cfqq->cfqg->nr_cfqq > 1) 3528 if (cfqq->cfqg->nr_cfqq > 1)
3521 return false; 3529 return false;
3522 3530
3531 /* the only queue in the group, but think time is big */
3532 if (cfq_io_thinktime_big(cfqd, &cfqq->cfqg->ttime, true))
3533 return false;
3534
3523 if (cfq_slice_used(cfqq)) 3535 if (cfq_slice_used(cfqq))
3524 return true; 3536 return true;
3525 3537
3526 /* if slice left is less than think time, wait busy */ 3538 /* if slice left is less than think time, wait busy */
3527 if (cic && sample_valid(cic->ttime_samples) 3539 if (cic && sample_valid(cic->ttime.ttime_samples)
3528 && (cfqq->slice_end - jiffies < cic->ttime_mean)) 3540 && (cfqq->slice_end - jiffies < cic->ttime.ttime_mean))
3529 return true; 3541 return true;
3530 3542
3531 /* 3543 /*
@@ -3566,11 +3578,24 @@ static void cfq_completed_request(struct request_queue *q, struct request *rq)
3566 cfqd->rq_in_flight[cfq_cfqq_sync(cfqq)]--; 3578 cfqd->rq_in_flight[cfq_cfqq_sync(cfqq)]--;
3567 3579
3568 if (sync) { 3580 if (sync) {
3569 RQ_CIC(rq)->last_end_request = now; 3581 struct cfq_rb_root *service_tree;
3582
3583 RQ_CIC(rq)->ttime.last_end_request = now;
3584
3585 if (cfq_cfqq_on_rr(cfqq))
3586 service_tree = cfqq->service_tree;
3587 else
3588 service_tree = service_tree_for(cfqq->cfqg,
3589 cfqq_prio(cfqq), cfqq_type(cfqq));
3590 service_tree->ttime.last_end_request = now;
3570 if (!time_after(rq->start_time + cfqd->cfq_fifo_expire[1], now)) 3591 if (!time_after(rq->start_time + cfqd->cfq_fifo_expire[1], now))
3571 cfqd->last_delayed_sync = now; 3592 cfqd->last_delayed_sync = now;
3572 } 3593 }
3573 3594
3595#ifdef CONFIG_CFQ_GROUP_IOSCHED
3596 cfqq->cfqg->ttime.last_end_request = now;
3597#endif
3598
3574 /* 3599 /*
3575 * If this is the active queue, check if it needs to be expired, 3600 * If this is the active queue, check if it needs to be expired,
3576 * or if we want to idle in case it has no pending requests. 3601 * or if we want to idle in case it has no pending requests.
@@ -3616,30 +3641,6 @@ static void cfq_completed_request(struct request_queue *q, struct request *rq)
3616 cfq_schedule_dispatch(cfqd); 3641 cfq_schedule_dispatch(cfqd);
3617} 3642}
3618 3643
3619/*
3620 * we temporarily boost lower priority queues if they are holding fs exclusive
3621 * resources. they are boosted to normal prio (CLASS_BE/4)
3622 */
3623static void cfq_prio_boost(struct cfq_queue *cfqq)
3624{
3625 if (has_fs_excl()) {
3626 /*
3627 * boost idle prio on transactions that would lock out other
3628 * users of the filesystem
3629 */
3630 if (cfq_class_idle(cfqq))
3631 cfqq->ioprio_class = IOPRIO_CLASS_BE;
3632 if (cfqq->ioprio > IOPRIO_NORM)
3633 cfqq->ioprio = IOPRIO_NORM;
3634 } else {
3635 /*
3636 * unboost the queue (if needed)
3637 */
3638 cfqq->ioprio_class = cfqq->org_ioprio_class;
3639 cfqq->ioprio = cfqq->org_ioprio;
3640 }
3641}
3642
3643static inline int __cfq_may_queue(struct cfq_queue *cfqq) 3644static inline int __cfq_may_queue(struct cfq_queue *cfqq)
3644{ 3645{
3645 if (cfq_cfqq_wait_request(cfqq) && !cfq_cfqq_must_alloc_slice(cfqq)) { 3646 if (cfq_cfqq_wait_request(cfqq) && !cfq_cfqq_must_alloc_slice(cfqq)) {
@@ -3670,7 +3671,6 @@ static int cfq_may_queue(struct request_queue *q, int rw)
3670 cfqq = cic_to_cfqq(cic, rw_is_sync(rw)); 3671 cfqq = cic_to_cfqq(cic, rw_is_sync(rw));
3671 if (cfqq) { 3672 if (cfqq) {
3672 cfq_init_prio_data(cfqq, cic->ioc); 3673 cfq_init_prio_data(cfqq, cic->ioc);
3673 cfq_prio_boost(cfqq);
3674 3674
3675 return __cfq_may_queue(cfqq); 3675 return __cfq_may_queue(cfqq);
3676 } 3676 }
diff --git a/block/compat_ioctl.c b/block/compat_ioctl.c
index cc3eb78e333a..7b725020823c 100644
--- a/block/compat_ioctl.c
+++ b/block/compat_ioctl.c
@@ -208,19 +208,6 @@ static int compat_blkpg_ioctl(struct block_device *bdev, fmode_t mode,
208#define BLKBSZSET_32 _IOW(0x12, 113, int) 208#define BLKBSZSET_32 _IOW(0x12, 113, int)
209#define BLKGETSIZE64_32 _IOR(0x12, 114, int) 209#define BLKGETSIZE64_32 _IOR(0x12, 114, int)
210 210
211struct compat_floppy_struct {
212 compat_uint_t size;
213 compat_uint_t sect;
214 compat_uint_t head;
215 compat_uint_t track;
216 compat_uint_t stretch;
217 unsigned char gap;
218 unsigned char rate;
219 unsigned char spec1;
220 unsigned char fmt_gap;
221 const compat_caddr_t name;
222};
223
224struct compat_floppy_drive_params { 211struct compat_floppy_drive_params {
225 char cmos; 212 char cmos;
226 compat_ulong_t max_dtr; 213 compat_ulong_t max_dtr;
@@ -288,7 +275,6 @@ struct compat_floppy_write_errors {
288 275
289#define FDSETPRM32 _IOW(2, 0x42, struct compat_floppy_struct) 276#define FDSETPRM32 _IOW(2, 0x42, struct compat_floppy_struct)
290#define FDDEFPRM32 _IOW(2, 0x43, struct compat_floppy_struct) 277#define FDDEFPRM32 _IOW(2, 0x43, struct compat_floppy_struct)
291#define FDGETPRM32 _IOR(2, 0x04, struct compat_floppy_struct)
292#define FDSETDRVPRM32 _IOW(2, 0x90, struct compat_floppy_drive_params) 278#define FDSETDRVPRM32 _IOW(2, 0x90, struct compat_floppy_drive_params)
293#define FDGETDRVPRM32 _IOR(2, 0x11, struct compat_floppy_drive_params) 279#define FDGETDRVPRM32 _IOR(2, 0x11, struct compat_floppy_drive_params)
294#define FDGETDRVSTAT32 _IOR(2, 0x12, struct compat_floppy_drive_struct) 280#define FDGETDRVSTAT32 _IOR(2, 0x12, struct compat_floppy_drive_struct)
diff --git a/block/deadline-iosched.c b/block/deadline-iosched.c
index 5139c0ea1864..c644137d9cd6 100644
--- a/block/deadline-iosched.c
+++ b/block/deadline-iosched.c
@@ -77,10 +77,8 @@ static void
77deadline_add_rq_rb(struct deadline_data *dd, struct request *rq) 77deadline_add_rq_rb(struct deadline_data *dd, struct request *rq)
78{ 78{
79 struct rb_root *root = deadline_rb_root(dd, rq); 79 struct rb_root *root = deadline_rb_root(dd, rq);
80 struct request *__alias;
81 80
82 while (unlikely(__alias = elv_rb_add(root, rq))) 81 elv_rb_add(root, rq);
83 deadline_move_request(dd, __alias);
84} 82}
85 83
86static inline void 84static inline void
diff --git a/block/elevator.c b/block/elevator.c
index b0b38ce0dcb6..a3b64bc71d88 100644
--- a/block/elevator.c
+++ b/block/elevator.c
@@ -353,7 +353,7 @@ static struct request *elv_rqhash_find(struct request_queue *q, sector_t offset)
353 * RB-tree support functions for inserting/lookup/removal of requests 353 * RB-tree support functions for inserting/lookup/removal of requests
354 * in a sorted RB tree. 354 * in a sorted RB tree.
355 */ 355 */
356struct request *elv_rb_add(struct rb_root *root, struct request *rq) 356void elv_rb_add(struct rb_root *root, struct request *rq)
357{ 357{
358 struct rb_node **p = &root->rb_node; 358 struct rb_node **p = &root->rb_node;
359 struct rb_node *parent = NULL; 359 struct rb_node *parent = NULL;
@@ -365,15 +365,12 @@ struct request *elv_rb_add(struct rb_root *root, struct request *rq)
365 365
366 if (blk_rq_pos(rq) < blk_rq_pos(__rq)) 366 if (blk_rq_pos(rq) < blk_rq_pos(__rq))
367 p = &(*p)->rb_left; 367 p = &(*p)->rb_left;
368 else if (blk_rq_pos(rq) > blk_rq_pos(__rq)) 368 else if (blk_rq_pos(rq) >= blk_rq_pos(__rq))
369 p = &(*p)->rb_right; 369 p = &(*p)->rb_right;
370 else
371 return __rq;
372 } 370 }
373 371
374 rb_link_node(&rq->rb_node, parent, p); 372 rb_link_node(&rq->rb_node, parent, p);
375 rb_insert_color(&rq->rb_node, root); 373 rb_insert_color(&rq->rb_node, root);
376 return NULL;
377} 374}
378EXPORT_SYMBOL(elv_rb_add); 375EXPORT_SYMBOL(elv_rb_add);
379 376
diff --git a/block/genhd.c b/block/genhd.c
index 6024b82e3209..5cb51c55f6d8 100644
--- a/block/genhd.c
+++ b/block/genhd.c
@@ -602,7 +602,7 @@ void add_disk(struct gendisk *disk)
602 disk->major = MAJOR(devt); 602 disk->major = MAJOR(devt);
603 disk->first_minor = MINOR(devt); 603 disk->first_minor = MINOR(devt);
604 604
605 /* Register BDI before referencing it from bdev */ 605 /* Register BDI before referencing it from bdev */
606 bdi = &disk->queue->backing_dev_info; 606 bdi = &disk->queue->backing_dev_info;
607 bdi_register_dev(bdi, disk_devt(disk)); 607 bdi_register_dev(bdi, disk_devt(disk));
608 608
@@ -1140,7 +1140,7 @@ static int diskstats_show(struct seq_file *seqf, void *v)
1140 "wsect wuse running use aveq" 1140 "wsect wuse running use aveq"
1141 "\n\n"); 1141 "\n\n");
1142 */ 1142 */
1143 1143
1144 disk_part_iter_init(&piter, gp, DISK_PITER_INCL_EMPTY_PART0); 1144 disk_part_iter_init(&piter, gp, DISK_PITER_INCL_EMPTY_PART0);
1145 while ((hd = disk_part_iter_next(&piter))) { 1145 while ((hd = disk_part_iter_next(&piter))) {
1146 cpu = part_stat_lock(); 1146 cpu = part_stat_lock();
@@ -1164,7 +1164,7 @@ static int diskstats_show(struct seq_file *seqf, void *v)
1164 ); 1164 );
1165 } 1165 }
1166 disk_part_iter_exit(&piter); 1166 disk_part_iter_exit(&piter);
1167 1167
1168 return 0; 1168 return 0;
1169} 1169}
1170 1170
@@ -1492,30 +1492,32 @@ void disk_unblock_events(struct gendisk *disk)
1492} 1492}
1493 1493
1494/** 1494/**
1495 * disk_check_events - schedule immediate event checking 1495 * disk_flush_events - schedule immediate event checking and flushing
1496 * @disk: disk to check events for 1496 * @disk: disk to check and flush events for
1497 * @mask: events to flush
1497 * 1498 *
1498 * Schedule immediate event checking on @disk if not blocked. 1499 * Schedule immediate event checking on @disk if not blocked. Events in
1500 * @mask are scheduled to be cleared from the driver. Note that this
1501 * doesn't clear the events from @disk->ev.
1499 * 1502 *
1500 * CONTEXT: 1503 * CONTEXT:
1501 * Don't care. Safe to call from irq context. 1504 * If @mask is non-zero must be called with bdev->bd_mutex held.
1502 */ 1505 */
1503void disk_check_events(struct gendisk *disk) 1506void disk_flush_events(struct gendisk *disk, unsigned int mask)
1504{ 1507{
1505 struct disk_events *ev = disk->ev; 1508 struct disk_events *ev = disk->ev;
1506 unsigned long flags;
1507 1509
1508 if (!ev) 1510 if (!ev)
1509 return; 1511 return;
1510 1512
1511 spin_lock_irqsave(&ev->lock, flags); 1513 spin_lock_irq(&ev->lock);
1514 ev->clearing |= mask;
1512 if (!ev->block) { 1515 if (!ev->block) {
1513 cancel_delayed_work(&ev->dwork); 1516 cancel_delayed_work(&ev->dwork);
1514 queue_delayed_work(system_nrt_wq, &ev->dwork, 0); 1517 queue_delayed_work(system_nrt_wq, &ev->dwork, 0);
1515 } 1518 }
1516 spin_unlock_irqrestore(&ev->lock, flags); 1519 spin_unlock_irq(&ev->lock);
1517} 1520}
1518EXPORT_SYMBOL_GPL(disk_check_events);
1519 1521
1520/** 1522/**
1521 * disk_clear_events - synchronously check, clear and return pending events 1523 * disk_clear_events - synchronously check, clear and return pending events
@@ -1705,7 +1707,7 @@ static int disk_events_set_dfl_poll_msecs(const char *val,
1705 mutex_lock(&disk_events_mutex); 1707 mutex_lock(&disk_events_mutex);
1706 1708
1707 list_for_each_entry(ev, &disk_events, node) 1709 list_for_each_entry(ev, &disk_events, node)
1708 disk_check_events(ev->disk); 1710 disk_flush_events(ev->disk, 0);
1709 1711
1710 mutex_unlock(&disk_events_mutex); 1712 mutex_unlock(&disk_events_mutex);
1711 1713
diff --git a/fs/block_dev.c b/fs/block_dev.c
index 9fb0b15331d3..c62fb84944d5 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -1448,6 +1448,8 @@ static int __blkdev_put(struct block_device *bdev, fmode_t mode, int for_part)
1448 1448
1449int blkdev_put(struct block_device *bdev, fmode_t mode) 1449int blkdev_put(struct block_device *bdev, fmode_t mode)
1450{ 1450{
1451 mutex_lock(&bdev->bd_mutex);
1452
1451 if (mode & FMODE_EXCL) { 1453 if (mode & FMODE_EXCL) {
1452 bool bdev_free; 1454 bool bdev_free;
1453 1455
@@ -1456,7 +1458,6 @@ int blkdev_put(struct block_device *bdev, fmode_t mode)
1456 * are protected with bdev_lock. bd_mutex is to 1458 * are protected with bdev_lock. bd_mutex is to
1457 * synchronize disk_holder unlinking. 1459 * synchronize disk_holder unlinking.
1458 */ 1460 */
1459 mutex_lock(&bdev->bd_mutex);
1460 spin_lock(&bdev_lock); 1461 spin_lock(&bdev_lock);
1461 1462
1462 WARN_ON_ONCE(--bdev->bd_holders < 0); 1463 WARN_ON_ONCE(--bdev->bd_holders < 0);
@@ -1474,17 +1475,21 @@ int blkdev_put(struct block_device *bdev, fmode_t mode)
1474 * If this was the last claim, remove holder link and 1475 * If this was the last claim, remove holder link and
1475 * unblock evpoll if it was a write holder. 1476 * unblock evpoll if it was a write holder.
1476 */ 1477 */
1477 if (bdev_free) { 1478 if (bdev_free && bdev->bd_write_holder) {
1478 if (bdev->bd_write_holder) { 1479 disk_unblock_events(bdev->bd_disk);
1479 disk_unblock_events(bdev->bd_disk); 1480 bdev->bd_write_holder = false;
1480 disk_check_events(bdev->bd_disk);
1481 bdev->bd_write_holder = false;
1482 }
1483 } 1481 }
1484
1485 mutex_unlock(&bdev->bd_mutex);
1486 } 1482 }
1487 1483
1484 /*
1485 * Trigger event checking and tell drivers to flush MEDIA_CHANGE
1486 * event. This is to ensure detection of media removal commanded
1487 * from userland - e.g. eject(1).
1488 */
1489 disk_flush_events(bdev->bd_disk, DISK_EVENT_MEDIA_CHANGE);
1490
1491 mutex_unlock(&bdev->bd_mutex);
1492
1488 return __blkdev_put(bdev, mode, 0); 1493 return __blkdev_put(bdev, mode, 0);
1489} 1494}
1490EXPORT_SYMBOL(blkdev_put); 1495EXPORT_SYMBOL(blkdev_put);
diff --git a/fs/compat_ioctl.c b/fs/compat_ioctl.c
index 61abb638b4bf..8be086e9abe4 100644
--- a/fs/compat_ioctl.c
+++ b/fs/compat_ioctl.c
@@ -68,6 +68,8 @@
68 68
69#ifdef CONFIG_BLOCK 69#ifdef CONFIG_BLOCK
70#include <linux/loop.h> 70#include <linux/loop.h>
71#include <linux/cdrom.h>
72#include <linux/fd.h>
71#include <scsi/scsi.h> 73#include <scsi/scsi.h>
72#include <scsi/scsi_ioctl.h> 74#include <scsi/scsi_ioctl.h>
73#include <scsi/sg.h> 75#include <scsi/sg.h>
@@ -944,6 +946,9 @@ COMPATIBLE_IOCTL(FIOQSIZE)
944IGNORE_IOCTL(LOOP_CLR_FD) 946IGNORE_IOCTL(LOOP_CLR_FD)
945/* md calls this on random blockdevs */ 947/* md calls this on random blockdevs */
946IGNORE_IOCTL(RAID_VERSION) 948IGNORE_IOCTL(RAID_VERSION)
949/* qemu/qemu-img might call these two on plain files for probing */
950IGNORE_IOCTL(CDROM_DRIVE_STATUS)
951IGNORE_IOCTL(FDGETPRM32)
947/* SG stuff */ 952/* SG stuff */
948COMPATIBLE_IOCTL(SG_SET_TIMEOUT) 953COMPATIBLE_IOCTL(SG_SET_TIMEOUT)
949COMPATIBLE_IOCTL(SG_GET_TIMEOUT) 954COMPATIBLE_IOCTL(SG_GET_TIMEOUT)
diff --git a/fs/partitions/check.c b/fs/partitions/check.c
index d545e97d99c3..e3c63d1c5e13 100644
--- a/fs/partitions/check.c
+++ b/fs/partitions/check.c
@@ -237,22 +237,22 @@ ssize_t part_size_show(struct device *dev,
237 return sprintf(buf, "%llu\n",(unsigned long long)p->nr_sects); 237 return sprintf(buf, "%llu\n",(unsigned long long)p->nr_sects);
238} 238}
239 239
240ssize_t part_ro_show(struct device *dev, 240static ssize_t part_ro_show(struct device *dev,
241 struct device_attribute *attr, char *buf) 241 struct device_attribute *attr, char *buf)
242{ 242{
243 struct hd_struct *p = dev_to_part(dev); 243 struct hd_struct *p = dev_to_part(dev);
244 return sprintf(buf, "%d\n", p->policy ? 1 : 0); 244 return sprintf(buf, "%d\n", p->policy ? 1 : 0);
245} 245}
246 246
247ssize_t part_alignment_offset_show(struct device *dev, 247static ssize_t part_alignment_offset_show(struct device *dev,
248 struct device_attribute *attr, char *buf) 248 struct device_attribute *attr, char *buf)
249{ 249{
250 struct hd_struct *p = dev_to_part(dev); 250 struct hd_struct *p = dev_to_part(dev);
251 return sprintf(buf, "%llu\n", (unsigned long long)p->alignment_offset); 251 return sprintf(buf, "%llu\n", (unsigned long long)p->alignment_offset);
252} 252}
253 253
254ssize_t part_discard_alignment_show(struct device *dev, 254static ssize_t part_discard_alignment_show(struct device *dev,
255 struct device_attribute *attr, char *buf) 255 struct device_attribute *attr, char *buf)
256{ 256{
257 struct hd_struct *p = dev_to_part(dev); 257 struct hd_struct *p = dev_to_part(dev);
258 return sprintf(buf, "%u\n", p->discard_alignment); 258 return sprintf(buf, "%u\n", p->discard_alignment);
diff --git a/fs/reiserfs/journal.c b/fs/reiserfs/journal.c
index c5e82ece7c6c..a159ba5a35e7 100644
--- a/fs/reiserfs/journal.c
+++ b/fs/reiserfs/journal.c
@@ -678,23 +678,19 @@ struct buffer_chunk {
678static void write_chunk(struct buffer_chunk *chunk) 678static void write_chunk(struct buffer_chunk *chunk)
679{ 679{
680 int i; 680 int i;
681 get_fs_excl();
682 for (i = 0; i < chunk->nr; i++) { 681 for (i = 0; i < chunk->nr; i++) {
683 submit_logged_buffer(chunk->bh[i]); 682 submit_logged_buffer(chunk->bh[i]);
684 } 683 }
685 chunk->nr = 0; 684 chunk->nr = 0;
686 put_fs_excl();
687} 685}
688 686
689static void write_ordered_chunk(struct buffer_chunk *chunk) 687static void write_ordered_chunk(struct buffer_chunk *chunk)
690{ 688{
691 int i; 689 int i;
692 get_fs_excl();
693 for (i = 0; i < chunk->nr; i++) { 690 for (i = 0; i < chunk->nr; i++) {
694 submit_ordered_buffer(chunk->bh[i]); 691 submit_ordered_buffer(chunk->bh[i]);
695 } 692 }
696 chunk->nr = 0; 693 chunk->nr = 0;
697 put_fs_excl();
698} 694}
699 695
700static int add_to_chunk(struct buffer_chunk *chunk, struct buffer_head *bh, 696static int add_to_chunk(struct buffer_chunk *chunk, struct buffer_head *bh,
@@ -986,8 +982,6 @@ static int flush_commit_list(struct super_block *s,
986 return 0; 982 return 0;
987 } 983 }
988 984
989 get_fs_excl();
990
991 /* before we can put our commit blocks on disk, we have to make sure everyone older than 985 /* before we can put our commit blocks on disk, we have to make sure everyone older than
992 ** us is on disk too 986 ** us is on disk too
993 */ 987 */
@@ -1145,7 +1139,6 @@ static int flush_commit_list(struct super_block *s,
1145 if (retval) 1139 if (retval)
1146 reiserfs_abort(s, retval, "Journal write error in %s", 1140 reiserfs_abort(s, retval, "Journal write error in %s",
1147 __func__); 1141 __func__);
1148 put_fs_excl();
1149 return retval; 1142 return retval;
1150} 1143}
1151 1144
@@ -1374,8 +1367,6 @@ static int flush_journal_list(struct super_block *s,
1374 return 0; 1367 return 0;
1375 } 1368 }
1376 1369
1377 get_fs_excl();
1378
1379 /* if all the work is already done, get out of here */ 1370 /* if all the work is already done, get out of here */
1380 if (atomic_read(&(jl->j_nonzerolen)) <= 0 && 1371 if (atomic_read(&(jl->j_nonzerolen)) <= 0 &&
1381 atomic_read(&(jl->j_commit_left)) <= 0) { 1372 atomic_read(&(jl->j_commit_left)) <= 0) {
@@ -1597,7 +1588,6 @@ static int flush_journal_list(struct super_block *s,
1597 put_journal_list(s, jl); 1588 put_journal_list(s, jl);
1598 if (flushall) 1589 if (flushall)
1599 mutex_unlock(&journal->j_flush_mutex); 1590 mutex_unlock(&journal->j_flush_mutex);
1600 put_fs_excl();
1601 return err; 1591 return err;
1602} 1592}
1603 1593
@@ -3108,7 +3098,6 @@ static int do_journal_begin_r(struct reiserfs_transaction_handle *th,
3108 th->t_trans_id = journal->j_trans_id; 3098 th->t_trans_id = journal->j_trans_id;
3109 unlock_journal(sb); 3099 unlock_journal(sb);
3110 INIT_LIST_HEAD(&th->t_list); 3100 INIT_LIST_HEAD(&th->t_list);
3111 get_fs_excl();
3112 return 0; 3101 return 0;
3113 3102
3114 out_fail: 3103 out_fail:
@@ -3964,7 +3953,6 @@ static int do_journal_end(struct reiserfs_transaction_handle *th,
3964 flush = flags & FLUSH_ALL; 3953 flush = flags & FLUSH_ALL;
3965 wait_on_commit = flags & WAIT; 3954 wait_on_commit = flags & WAIT;
3966 3955
3967 put_fs_excl();
3968 current->journal_info = th->t_handle_save; 3956 current->journal_info = th->t_handle_save;
3969 reiserfs_check_lock_depth(sb, "journal end"); 3957 reiserfs_check_lock_depth(sb, "journal end");
3970 if (journal->j_len == 0) { 3958 if (journal->j_len == 0) {
@@ -4316,4 +4304,3 @@ void reiserfs_abort_journal(struct super_block *sb, int errno)
4316 dump_stack(); 4304 dump_stack();
4317#endif 4305#endif
4318} 4306}
4319
diff --git a/fs/super.c b/fs/super.c
index 7943f04cb3a9..3f56a269a4f4 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -351,13 +351,11 @@ bool grab_super_passive(struct super_block *sb)
351 */ 351 */
352void lock_super(struct super_block * sb) 352void lock_super(struct super_block * sb)
353{ 353{
354 get_fs_excl();
355 mutex_lock(&sb->s_lock); 354 mutex_lock(&sb->s_lock);
356} 355}
357 356
358void unlock_super(struct super_block * sb) 357void unlock_super(struct super_block * sb)
359{ 358{
360 put_fs_excl();
361 mutex_unlock(&sb->s_lock); 359 mutex_unlock(&sb->s_lock);
362} 360}
363 361
@@ -385,7 +383,6 @@ void generic_shutdown_super(struct super_block *sb)
385 if (sb->s_root) { 383 if (sb->s_root) {
386 shrink_dcache_for_umount(sb); 384 shrink_dcache_for_umount(sb);
387 sync_filesystem(sb); 385 sync_filesystem(sb);
388 get_fs_excl();
389 sb->s_flags &= ~MS_ACTIVE; 386 sb->s_flags &= ~MS_ACTIVE;
390 387
391 fsnotify_unmount_inodes(&sb->s_inodes); 388 fsnotify_unmount_inodes(&sb->s_inodes);
@@ -400,7 +397,6 @@ void generic_shutdown_super(struct super_block *sb)
400 "Self-destruct in 5 seconds. Have a nice day...\n", 397 "Self-destruct in 5 seconds. Have a nice day...\n",
401 sb->s_id); 398 sb->s_id);
402 } 399 }
403 put_fs_excl();
404 } 400 }
405 spin_lock(&sb_lock); 401 spin_lock(&sb_lock);
406 /* should be initialized for __put_super_and_need_restart() */ 402 /* should be initialized for __put_super_and_need_restart() */
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 1a23722e8878..0e67c45b3bc9 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -73,7 +73,7 @@ enum rq_cmd_type_bits {
73 73
74/* 74/*
75 * try to put the fields that are referenced together in the same cacheline. 75 * try to put the fields that are referenced together in the same cacheline.
76 * if you modify this structure, be sure to check block/blk-core.c:rq_init() 76 * if you modify this structure, be sure to check block/blk-core.c:blk_rq_init()
77 * as well! 77 * as well!
78 */ 78 */
79struct request { 79struct request {
@@ -260,8 +260,7 @@ struct queue_limits {
260 unsigned char discard_zeroes_data; 260 unsigned char discard_zeroes_data;
261}; 261};
262 262
263struct request_queue 263struct request_queue {
264{
265 /* 264 /*
266 * Together with queue_head for cacheline sharing 265 * Together with queue_head for cacheline sharing
267 */ 266 */
@@ -304,14 +303,14 @@ struct request_queue
304 void *queuedata; 303 void *queuedata;
305 304
306 /* 305 /*
307 * queue needs bounce pages for pages above this limit 306 * various queue flags, see QUEUE_* below
308 */ 307 */
309 gfp_t bounce_gfp; 308 unsigned long queue_flags;
310 309
311 /* 310 /*
312 * various queue flags, see QUEUE_* below 311 * queue needs bounce pages for pages above this limit
313 */ 312 */
314 unsigned long queue_flags; 313 gfp_t bounce_gfp;
315 314
316 /* 315 /*
317 * protects queue structures from reentrancy. ->__queue_lock should 316 * protects queue structures from reentrancy. ->__queue_lock should
@@ -334,8 +333,8 @@ struct request_queue
334 unsigned int nr_congestion_off; 333 unsigned int nr_congestion_off;
335 unsigned int nr_batching; 334 unsigned int nr_batching;
336 335
337 void *dma_drain_buffer;
338 unsigned int dma_drain_size; 336 unsigned int dma_drain_size;
337 void *dma_drain_buffer;
339 unsigned int dma_pad_mask; 338 unsigned int dma_pad_mask;
340 unsigned int dma_alignment; 339 unsigned int dma_alignment;
341 340
@@ -393,7 +392,7 @@ struct request_queue
393#define QUEUE_FLAG_ELVSWITCH 6 /* don't use elevator, just do FIFO */ 392#define QUEUE_FLAG_ELVSWITCH 6 /* don't use elevator, just do FIFO */
394#define QUEUE_FLAG_BIDI 7 /* queue supports bidi requests */ 393#define QUEUE_FLAG_BIDI 7 /* queue supports bidi requests */
395#define QUEUE_FLAG_NOMERGES 8 /* disable merge attempts */ 394#define QUEUE_FLAG_NOMERGES 8 /* disable merge attempts */
396#define QUEUE_FLAG_SAME_COMP 9 /* force complete on same CPU */ 395#define QUEUE_FLAG_SAME_COMP 9 /* complete on same CPU-group */
397#define QUEUE_FLAG_FAIL_IO 10 /* fake timeout */ 396#define QUEUE_FLAG_FAIL_IO 10 /* fake timeout */
398#define QUEUE_FLAG_STACKABLE 11 /* supports request stacking */ 397#define QUEUE_FLAG_STACKABLE 11 /* supports request stacking */
399#define QUEUE_FLAG_NONROT 12 /* non-rotational device (SSD) */ 398#define QUEUE_FLAG_NONROT 12 /* non-rotational device (SSD) */
@@ -403,6 +402,7 @@ struct request_queue
403#define QUEUE_FLAG_NOXMERGES 15 /* No extended merges */ 402#define QUEUE_FLAG_NOXMERGES 15 /* No extended merges */
404#define QUEUE_FLAG_ADD_RANDOM 16 /* Contributes to random pool */ 403#define QUEUE_FLAG_ADD_RANDOM 16 /* Contributes to random pool */
405#define QUEUE_FLAG_SECDISCARD 17 /* supports SECDISCARD */ 404#define QUEUE_FLAG_SECDISCARD 17 /* supports SECDISCARD */
405#define QUEUE_FLAG_SAME_FORCE 18 /* force complete on same CPU */
406 406
407#define QUEUE_FLAG_DEFAULT ((1 << QUEUE_FLAG_IO_STAT) | \ 407#define QUEUE_FLAG_DEFAULT ((1 << QUEUE_FLAG_IO_STAT) | \
408 (1 << QUEUE_FLAG_STACKABLE) | \ 408 (1 << QUEUE_FLAG_STACKABLE) | \
@@ -857,12 +857,21 @@ struct request_queue *blk_alloc_queue(gfp_t);
857struct request_queue *blk_alloc_queue_node(gfp_t, int); 857struct request_queue *blk_alloc_queue_node(gfp_t, int);
858extern void blk_put_queue(struct request_queue *); 858extern void blk_put_queue(struct request_queue *);
859 859
860/*
861 * Note: Code in between changing the blk_plug list/cb_list or element of such
862 * lists is preemptable, but such code can't do sleep (or be very careful),
863 * otherwise data is corrupted. For details, please check schedule() where
864 * blk_schedule_flush_plug() is called.
865 */
860struct blk_plug { 866struct blk_plug {
861 unsigned long magic; 867 unsigned long magic;
862 struct list_head list; 868 struct list_head list;
863 struct list_head cb_list; 869 struct list_head cb_list;
864 unsigned int should_sort; 870 unsigned int should_sort;
871 unsigned int count;
865}; 872};
873#define BLK_MAX_REQUEST_COUNT 16
874
866struct blk_plug_cb { 875struct blk_plug_cb {
867 struct list_head list; 876 struct list_head list;
868 void (*callback)(struct blk_plug_cb *); 877 void (*callback)(struct blk_plug_cb *);
diff --git a/include/linux/elevator.h b/include/linux/elevator.h
index 21a8ebf2dc3a..d800d5142184 100644
--- a/include/linux/elevator.h
+++ b/include/linux/elevator.h
@@ -146,7 +146,7 @@ extern struct request *elv_rb_latter_request(struct request_queue *, struct requ
146/* 146/*
147 * rb support functions. 147 * rb support functions.
148 */ 148 */
149extern struct request *elv_rb_add(struct rb_root *, struct request *); 149extern void elv_rb_add(struct rb_root *, struct request *);
150extern void elv_rb_del(struct rb_root *, struct request *); 150extern void elv_rb_del(struct rb_root *, struct request *);
151extern struct request *elv_rb_find(struct rb_root *, sector_t); 151extern struct request *elv_rb_find(struct rb_root *, sector_t);
152 152
diff --git a/include/linux/fd.h b/include/linux/fd.h
index f5d194af07a8..72202b1b9a6a 100644
--- a/include/linux/fd.h
+++ b/include/linux/fd.h
@@ -377,4 +377,26 @@ struct floppy_raw_cmd {
377#define FDEJECT _IO(2, 0x5a) 377#define FDEJECT _IO(2, 0x5a)
378/* eject the disk */ 378/* eject the disk */
379 379
380
381#ifdef __KERNEL__
382#ifdef CONFIG_COMPAT
383#include <linux/compat.h>
384
385struct compat_floppy_struct {
386 compat_uint_t size;
387 compat_uint_t sect;
388 compat_uint_t head;
389 compat_uint_t track;
390 compat_uint_t stretch;
391 unsigned char gap;
392 unsigned char rate;
393 unsigned char spec1;
394 unsigned char fmt_gap;
395 const compat_caddr_t name;
396};
397
398#define FDGETPRM32 _IOR(2, 0x04, struct compat_floppy_struct)
399#endif
400#endif
401
380#endif 402#endif
diff --git a/include/linux/fs.h b/include/linux/fs.h
index b224dc468a23..0c35d6e767d9 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1469,10 +1469,6 @@ enum {
1469#define vfs_check_frozen(sb, level) \ 1469#define vfs_check_frozen(sb, level) \
1470 wait_event((sb)->s_wait_unfrozen, ((sb)->s_frozen < (level))) 1470 wait_event((sb)->s_wait_unfrozen, ((sb)->s_frozen < (level)))
1471 1471
1472#define get_fs_excl() atomic_inc(&current->fs_excl)
1473#define put_fs_excl() atomic_dec(&current->fs_excl)
1474#define has_fs_excl() atomic_read(&current->fs_excl)
1475
1476/* 1472/*
1477 * until VFS tracks user namespaces for inodes, just make all files 1473 * until VFS tracks user namespaces for inodes, just make all files
1478 * belong to init_user_ns 1474 * belong to init_user_ns
diff --git a/include/linux/genhd.h b/include/linux/genhd.h
index 300d7582006e..02fa4697a0e5 100644
--- a/include/linux/genhd.h
+++ b/include/linux/genhd.h
@@ -420,7 +420,7 @@ static inline int get_disk_ro(struct gendisk *disk)
420 420
421extern void disk_block_events(struct gendisk *disk); 421extern void disk_block_events(struct gendisk *disk);
422extern void disk_unblock_events(struct gendisk *disk); 422extern void disk_unblock_events(struct gendisk *disk);
423extern void disk_check_events(struct gendisk *disk); 423extern void disk_flush_events(struct gendisk *disk, unsigned int mask);
424extern unsigned int disk_clear_events(struct gendisk *disk, unsigned int mask); 424extern unsigned int disk_clear_events(struct gendisk *disk, unsigned int mask);
425 425
426/* drivers/char/random.c */ 426/* drivers/char/random.c */
diff --git a/include/linux/init_task.h b/include/linux/init_task.h
index 580f70c02391..d14e058aaeed 100644
--- a/include/linux/init_task.h
+++ b/include/linux/init_task.h
@@ -176,7 +176,6 @@ extern struct cred init_cred;
176 .alloc_lock = __SPIN_LOCK_UNLOCKED(tsk.alloc_lock), \ 176 .alloc_lock = __SPIN_LOCK_UNLOCKED(tsk.alloc_lock), \
177 .journal_info = NULL, \ 177 .journal_info = NULL, \
178 .cpu_timers = INIT_CPU_TIMERS(tsk.cpu_timers), \ 178 .cpu_timers = INIT_CPU_TIMERS(tsk.cpu_timers), \
179 .fs_excl = ATOMIC_INIT(0), \
180 .pi_lock = __RAW_SPIN_LOCK_UNLOCKED(tsk.pi_lock), \ 179 .pi_lock = __RAW_SPIN_LOCK_UNLOCKED(tsk.pi_lock), \
181 .timer_slack_ns = 50000, /* 50 usec default slack */ \ 180 .timer_slack_ns = 50000, /* 50 usec default slack */ \
182 .pids = { \ 181 .pids = { \
diff --git a/include/linux/iocontext.h b/include/linux/iocontext.h
index b2eee896dcbc..5037a0ad2312 100644
--- a/include/linux/iocontext.h
+++ b/include/linux/iocontext.h
@@ -5,6 +5,14 @@
5#include <linux/rcupdate.h> 5#include <linux/rcupdate.h>
6 6
7struct cfq_queue; 7struct cfq_queue;
8struct cfq_ttime {
9 unsigned long last_end_request;
10
11 unsigned long ttime_total;
12 unsigned long ttime_samples;
13 unsigned long ttime_mean;
14};
15
8struct cfq_io_context { 16struct cfq_io_context {
9 void *key; 17 void *key;
10 18
@@ -12,11 +20,7 @@ struct cfq_io_context {
12 20
13 struct io_context *ioc; 21 struct io_context *ioc;
14 22
15 unsigned long last_end_request; 23 struct cfq_ttime ttime;
16
17 unsigned long ttime_total;
18 unsigned long ttime_samples;
19 unsigned long ttime_mean;
20 24
21 struct list_head queue_list; 25 struct list_head queue_list;
22 struct hlist_node cic_list; 26 struct hlist_node cic_list;
diff --git a/include/linux/sched.h b/include/linux/sched.h
index ed766add9b23..20b03bf94748 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1512,7 +1512,6 @@ struct task_struct {
1512 short il_next; 1512 short il_next;
1513 short pref_node_fork; 1513 short pref_node_fork;
1514#endif 1514#endif
1515 atomic_t fs_excl; /* holding fs exclusive resources */
1516 struct rcu_head rcu; 1515 struct rcu_head rcu;
1517 1516
1518 /* 1517 /*
diff --git a/kernel/exit.c b/kernel/exit.c
index 73bb192a3d32..12ea415c6435 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -898,7 +898,6 @@ NORET_TYPE void do_exit(long code)
898 898
899 profile_task_exit(tsk); 899 profile_task_exit(tsk);
900 900
901 WARN_ON(atomic_read(&tsk->fs_excl));
902 WARN_ON(blk_needs_flush_plug(tsk)); 901 WARN_ON(blk_needs_flush_plug(tsk));
903 902
904 if (unlikely(in_interrupt())) 903 if (unlikely(in_interrupt()))
diff --git a/kernel/fork.c b/kernel/fork.c
index aeae5b11b62e..17bf7c8d6511 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -290,7 +290,6 @@ static struct task_struct *dup_task_struct(struct task_struct *orig)
290 290
291 /* One for us, one for whoever does the "release_task()" (usually parent) */ 291 /* One for us, one for whoever does the "release_task()" (usually parent) */
292 atomic_set(&tsk->usage,2); 292 atomic_set(&tsk->usage,2);
293 atomic_set(&tsk->fs_excl, 0);
294#ifdef CONFIG_BLK_DEV_IO_TRACE 293#ifdef CONFIG_BLK_DEV_IO_TRACE
295 tsk->btrace_seq = 0; 294 tsk->btrace_seq = 0;
296#endif 295#endif
diff --git a/mm/backing-dev.c b/mm/backing-dev.c
index f032e6e1e09a..2ef0dc9e7f39 100644
--- a/mm/backing-dev.c
+++ b/mm/backing-dev.c
@@ -505,7 +505,7 @@ static void bdi_remove_from_list(struct backing_dev_info *bdi)
505 list_del_rcu(&bdi->bdi_list); 505 list_del_rcu(&bdi->bdi_list);
506 spin_unlock_bh(&bdi_lock); 506 spin_unlock_bh(&bdi_lock);
507 507
508 synchronize_rcu(); 508 synchronize_rcu_expedited();
509} 509}
510 510
511int bdi_register(struct backing_dev_info *bdi, struct device *parent, 511int bdi_register(struct backing_dev_info *bdi, struct device *parent,