28 files changed, 229 insertions, 208 deletions
diff --git a/Documentation/block/queue-sysfs.txt b/Documentation/block/queue-sysfs.txt
index f65274081c8d..d8147b336c35 100644
--- a/Documentation/block/queue-sysfs.txt
+++ b/Documentation/block/queue-sysfs.txt
@@ -45,9 +45,13 @@ device.
 rq_affinity (RW)
 ----------------
-If this option is enabled, the block layer will migrate request completions
+If this option is '1', the block layer will migrate request completions to the
-to the CPU that originally submitted the request. For some workloads
+cpu "group" that originally submitted the request. For some workloads this
-this provides a significant reduction in CPU cycles due to caching effects.
+provides a significant reduction in CPU cycles due to caching effects.
+For storage configurations that need to maximize distribution of completion
+processing setting this option to '2' forces the completion to run on the
+requesting cpu (bypassing the "group" aggregation logic).
 scheduler (RW)
 --------------
diff --git a/block/blk-core.c b/block/blk-core.c
index 1d49e1c7c905..f8cb09951830 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -1282,10 +1282,8 @@ get_rq:
        init_request_from_bio(req, bio);
        if (test_bit(QUEUE_FLAG_SAME_COMP, &q->queue_flags) ||
-            bio_flagged(bio, BIO_CPU_AFFINE)) {
+            bio_flagged(bio, BIO_CPU_AFFINE))
-                req->cpu = blk_cpu_to_group(get_cpu());
+                req->cpu = smp_processor_id();
-                put_cpu();
-        }
        plug = current->plug;
        if (plug) {
@@ -1305,7 +1303,10 @@ get_rq:
                                plug->should_sort = 1;
                }
                list_add_tail(&req->queuelist, &plug->list);
+                plug->count++;
                drive_stat_acct(req, 1);
+                if (plug->count >= BLK_MAX_REQUEST_COUNT)
+                        blk_flush_plug_list(plug, false);
        } else {
                spin_lock_irq(q->queue_lock);
                add_acct_request(q, req, where);
@@ -2629,6 +2630,7 @@ void blk_start_plug(struct blk_plug *plug)
        INIT_LIST_HEAD(&plug->list);
        INIT_LIST_HEAD(&plug->cb_list);
        plug->should_sort = 0;
+        plug->count = 0;
        /*
         * If this is a nested plug, don't actually assign it. It will be
@@ -2712,6 +2714,7 @@ void blk_flush_plug_list(struct blk_plug *plug, bool from_schedule)
                return;
        list_splice_init(&plug->list, &list);
+        plug->count = 0;
        if (plug->should_sort) {
                list_sort(NULL, &list, plug_rq_cmp);
diff --git a/block/blk-ioc.c b/block/blk-ioc.c
index 342eae9b0d3c..6f9bbd978653 100644
--- a/block/blk-ioc.c
+++ b/block/blk-ioc.c
@@ -82,26 +82,26 @@ void exit_io_context(struct task_struct *task)
 struct io_context *alloc_io_context(gfp_t gfp_flags, int node)
 {
-        struct io_context *ret;
+        struct io_context *ioc;
-        ret = kmem_cache_alloc_node(iocontext_cachep, gfp_flags, node);
+        ioc = kmem_cache_alloc_node(iocontext_cachep, gfp_flags, node);
-        if (ret) {
+        if (ioc) {
-                atomic_long_set(&ret->refcount, 1);
+                atomic_long_set(&ioc->refcount, 1);
-                atomic_set(&ret->nr_tasks, 1);
+                atomic_set(&ioc->nr_tasks, 1);
-                spin_lock_init(&ret->lock);
+                spin_lock_init(&ioc->lock);
-                ret->ioprio_changed = 0;
+                ioc->ioprio_changed = 0;
-                ret->ioprio = 0;
+                ioc->ioprio = 0;
-                ret->last_waited = 0; /* doesn't matter... */
+                ioc->last_waited = 0; /* doesn't matter... */
-                ret->nr_batch_requests = 0; /* because this is 0 */
+                ioc->nr_batch_requests = 0; /* because this is 0 */
-                INIT_RADIX_TREE(&ret->radix_root, GFP_ATOMIC | __GFP_HIGH);
+                INIT_RADIX_TREE(&ioc->radix_root, GFP_ATOMIC | __GFP_HIGH);
-                INIT_HLIST_HEAD(&ret->cic_list);
+                INIT_HLIST_HEAD(&ioc->cic_list);
-                ret->ioc_data = NULL;
+                ioc->ioc_data = NULL;
 #if defined(CONFIG_BLK_CGROUP) || defined(CONFIG_BLK_CGROUP_MODULE)
-                ret->cgroup_changed = 0;
+                ioc->cgroup_changed = 0;
 #endif
        }
-        return ret;
+        return ioc;
 }
 /*
@@ -139,19 +139,19 @@ struct io_context *current_io_context(gfp_t gfp_flags, int node)
 */
 struct io_context *get_io_context(gfp_t gfp_flags, int node)
 {
-        struct io_context *ret = NULL;
+        struct io_context *ioc = NULL;
        /*
         * Check for unlikely race with exiting task. ioc ref count is
         * zero when ioc is being detached.
         */
        do {
-                ret = current_io_context(gfp_flags, node);
+                ioc = current_io_context(gfp_flags, node);
-                if (unlikely(!ret))
+                if (unlikely(!ioc))
                        break;
-        } while (!atomic_long_inc_not_zero(&ret->refcount));
+        } while (!atomic_long_inc_not_zero(&ioc->refcount));
-        return ret;
+        return ioc;
 }
 EXPORT_SYMBOL(get_io_context);
diff --git a/block/blk-lib.c b/block/blk-lib.c
index 78e627e2581d..2b461b496a78 100644
--- a/block/blk-lib.c
+++ b/block/blk-lib.c
@@ -59,7 +59,10 @@ int blkdev_issue_discard(struct block_device *bdev, sector_t sector,
         * granularity
         */
        max_discard_sectors = min(q->limits.max_discard_sectors, UINT_MAX >> 9);
-        if (q->limits.discard_granularity) {
+        if (unlikely(!max_discard_sectors)) {
+                /* Avoid infinite loop below. Being cautious never hurts. */
+                return -EOPNOTSUPP;
+        } else if (q->limits.discard_granularity) {
                unsigned int disc_sects = q->limits.discard_granularity >> 9;
                max_discard_sectors &= ~(disc_sects - 1);
diff --git a/block/blk-softirq.c b/block/blk-softirq.c
index ee9c21602228..475fab809a80 100644
--- a/block/blk-softirq.c
+++ b/block/blk-softirq.c
@@ -103,22 +103,25 @@ static struct notifier_block __cpuinitdata blk_cpu_notifier = {
 void __blk_complete_request(struct request *req)
 {
+        int ccpu, cpu, group_cpu = NR_CPUS;
        struct request_queue *q = req->q;
        unsigned long flags;
-        int ccpu, cpu, group_cpu;
        BUG_ON(!q->softirq_done_fn);
        local_irq_save(flags);
        cpu = smp_processor_id();
-        group_cpu = blk_cpu_to_group(cpu);
        /*
         * Select completion CPU
         */
-        if (test_bit(QUEUE_FLAG_SAME_COMP, &q->queue_flags) && req->cpu != -1)
+        if (test_bit(QUEUE_FLAG_SAME_COMP, &q->queue_flags) && req->cpu != -1) {
                ccpu = req->cpu;
-        else
+                if (!test_bit(QUEUE_FLAG_SAME_FORCE, &q->queue_flags)) {
+                        ccpu = blk_cpu_to_group(ccpu);
+                        group_cpu = blk_cpu_to_group(cpu);
+                }
+        } else
                ccpu = cpu;
        if (ccpu == cpu || ccpu == group_cpu) {
diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c
index d935bd859c87..0ee17b5e7fb6 100644
--- a/block/blk-sysfs.c
+++ b/block/blk-sysfs.c
@@ -244,8 +244,9 @@ static ssize_t queue_nomerges_store(struct request_queue *q, const char *page,
 static ssize_t queue_rq_affinity_show(struct request_queue *q, char *page)
 {
        bool set = test_bit(QUEUE_FLAG_SAME_COMP, &q->queue_flags);
+        bool force = test_bit(QUEUE_FLAG_SAME_FORCE, &q->queue_flags);
-        return queue_var_show(set, page);
+        return queue_var_show(set << force, page);
 }
 static ssize_t
@@ -257,10 +258,14 @@ queue_rq_affinity_store(struct request_queue *q, const char *page, size_t count)
        ret = queue_var_store(&val, page, count);
        spin_lock_irq(q->queue_lock);
-        if (val)
+        if (val) {
                queue_flag_set(QUEUE_FLAG_SAME_COMP, q);
-        else
+                if (val == 2)
-                queue_flag_clear(QUEUE_FLAG_SAME_COMP,  q);
+                        queue_flag_set(QUEUE_FLAG_SAME_FORCE, q);
+        } else {
+                queue_flag_clear(QUEUE_FLAG_SAME_COMP, q);
+                queue_flag_clear(QUEUE_FLAG_SAME_FORCE, q);
+        }
        spin_unlock_irq(q->queue_lock);
 #endif
        return ret;
diff --git a/block/blk-throttle.c b/block/blk-throttle.c
index 3689f833afdc..f6a794120505 100644
--- a/block/blk-throttle.c
+++ b/block/blk-throttle.c
@@ -142,9 +142,9 @@ static inline struct throtl_grp *tg_of_blkg(struct blkio_group *blkg)
        return NULL;
 }
-static inline int total_nr_queued(struct throtl_data *td)
+static inline unsigned int total_nr_queued(struct throtl_data *td)
 {
-        return (td->nr_queued[0] + td->nr_queued[1]);
+        return td->nr_queued[0] + td->nr_queued[1];
 }
 static inline struct throtl_grp *throtl_ref_get_tg(struct throtl_grp *tg)
@@ -927,7 +927,7 @@ static int throtl_dispatch(struct request_queue *q)
        bio_list_init(&bio_list_on_stack);
-        throtl_log(td, "dispatch nr_queued=%d read=%u write=%u",
+        throtl_log(td, "dispatch nr_queued=%u read=%u write=%u",
                        total_nr_queued(td), td->nr_queued[READ],
                        td->nr_queued[WRITE]);
@@ -970,7 +970,7 @@ throtl_schedule_delayed_work(struct throtl_data *td, unsigned long delay)
        struct delayed_work *dwork = &td->throtl_work;
        /* schedule work if limits changed even if no bio is queued */
-        if (total_nr_queued(td) > 0 || td->limits_changed) {
+        if (total_nr_queued(td) || td->limits_changed) {
                /*
                 * We might have a work scheduled to be executed in future.
                 * Cancel that and schedule a new one.
diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c
index ae21919f15e1..1f96ad6254f1 100644
--- a/block/cfq-iosched.c
+++ b/block/cfq-iosched.c
@@ -87,9 +87,10 @@ struct cfq_rb_root {
        unsigned count;
        unsigned total_weight;
        u64 min_vdisktime;
+        struct cfq_ttime ttime;
 };
-#define CFQ_RB_ROOT     (struct cfq_rb_root) { .rb = RB_ROOT, .left = NULL, \
+#define CFQ_RB_ROOT     (struct cfq_rb_root) { .rb = RB_ROOT, \
-                        .count = 0, .min_vdisktime = 0, }
+                        .ttime = {.last_end_request = jiffies,},}
 /*
 * Per process-grouping structure
@@ -129,14 +130,12 @@ struct cfq_queue {
        unsigned long slice_end;
        long slice_resid;
-        /* pending metadata requests */
-        int meta_pending;
        /* number of requests that are on the dispatch list or inside driver */
        int dispatched;
        /* io prio of this group */
        unsigned short ioprio, org_ioprio;
-        unsigned short ioprio_class, org_ioprio_class;
+        unsigned short ioprio_class;
        pid_t pid;
@@ -212,6 +211,7 @@ struct cfq_group {
 #endif
        /* number of requests that are on the dispatch list or inside driver */
        int dispatched;
+        struct cfq_ttime ttime;
 };
 /*
@@ -393,6 +393,18 @@ CFQ_CFQQ_FNS(wait_busy);
                        j++, st = i < IDLE_WORKLOAD ? \
                        &cfqg->service_trees[i][j]: NULL) \
+static inline bool cfq_io_thinktime_big(struct cfq_data *cfqd,
+        struct cfq_ttime *ttime, bool group_idle)
+{
+        unsigned long slice;
+        if (!sample_valid(ttime->ttime_samples))
+                return false;
+        if (group_idle)
+                slice = cfqd->cfq_group_idle;
+        else
+                slice = cfqd->cfq_slice_idle;
+        return ttime->ttime_mean > slice;
+}
 static inline bool iops_mode(struct cfq_data *cfqd)
 {
@@ -670,9 +682,6 @@ cfq_choose_req(struct cfq_data *cfqd, struct request *rq1, struct request *rq2,
        if (rq_is_sync(rq1) != rq_is_sync(rq2))
                return rq_is_sync(rq1) ? rq1 : rq2;
-        if ((rq1->cmd_flags ^ rq2->cmd_flags) & REQ_META)
-                return rq1->cmd_flags & REQ_META ? rq1 : rq2;
        s1 = blk_rq_pos(rq1);
        s2 = blk_rq_pos(rq2);
@@ -1005,8 +1014,8 @@ static inline struct cfq_group *cfqg_of_blkg(struct blkio_group *blkg)
        return NULL;
 }
-void cfq_update_blkio_group_weight(void *key, struct blkio_group *blkg,
+static void cfq_update_blkio_group_weight(void *key, struct blkio_group *blkg,
-                                        unsigned int weight)
+                                          unsigned int weight)
 {
        struct cfq_group *cfqg = cfqg_of_blkg(blkg);
        cfqg->new_weight = weight;
@@ -1059,6 +1068,8 @@ static struct cfq_group * cfq_alloc_cfqg(struct cfq_data *cfqd)
                *st = CFQ_RB_ROOT;
        RB_CLEAR_NODE(&cfqg->rb_node);
+        cfqg->ttime.last_end_request = jiffies;
        /*
         * Take the initial reference that will be released on destroy
         * This can be thought of a joint reference by cgroup and
@@ -1235,7 +1246,7 @@ static void cfq_release_cfq_groups(struct cfq_data *cfqd)
 * it should not be NULL as even if elevator was exiting, cgroup deltion
 * path got to it first.
 */
-void cfq_unlink_blkio_group(void *key, struct blkio_group *blkg)
+static void cfq_unlink_blkio_group(void *key, struct blkio_group *blkg)
 {
        unsigned long  flags;
        struct cfq_data *cfqd = key;
@@ -1502,16 +1513,11 @@ static void cfq_add_rq_rb(struct request *rq)
 {
        struct cfq_queue *cfqq = RQ_CFQQ(rq);
        struct cfq_data *cfqd = cfqq->cfqd;
-        struct request *__alias, *prev;
+        struct request *prev;
        cfqq->queued[rq_is_sync(rq)]++;
-        /*
+        elv_rb_add(&cfqq->sort_list, rq);
-         * looks a little odd, but the first insert might return an alias.
-         * if that happens, put the alias on the dispatch list
-         */
-        while ((__alias = elv_rb_add(&cfqq->sort_list, rq)) != NULL)
-                cfq_dispatch_insert(cfqd->queue, __alias);
        if (!cfq_cfqq_on_rr(cfqq))
                cfq_add_cfqq_rr(cfqd, cfqq);
@@ -1598,10 +1604,6 @@ static void cfq_remove_request(struct request *rq)
        cfqq->cfqd->rq_queued--;
        cfq_blkiocg_update_io_remove_stats(&(RQ_CFQG(rq))->blkg,
                                        rq_data_dir(rq), rq_is_sync(rq));
-        if (rq->cmd_flags & REQ_META) {
-                WARN_ON(!cfqq->meta_pending);
-                cfqq->meta_pending--;
-        }
 }
 static int cfq_merge(struct request_queue *q, struct request **req,
@@ -1969,7 +1971,8 @@ static bool cfq_should_idle(struct cfq_data *cfqd, struct cfq_queue *cfqq)
         * Otherwise, we do only if they are the last ones
         * in their service tree.
         */
-        if (service_tree->count == 1 && cfq_cfqq_sync(cfqq))
+        if (service_tree->count == 1 && cfq_cfqq_sync(cfqq) &&
+           !cfq_io_thinktime_big(cfqd, &service_tree->ttime, false))
                return true;
        cfq_log_cfqq(cfqd, cfqq, "Not idling. st->count:%d",
                        service_tree->count);
@@ -2022,10 +2025,10 @@ static void cfq_arm_slice_timer(struct cfq_data *cfqd)
         * slice, then don't idle. This avoids overrunning the allotted
         * time slice.
         */
-        if (sample_valid(cic->ttime_samples) &&
+        if (sample_valid(cic->ttime.ttime_samples) &&
-            (cfqq->slice_end - jiffies < cic->ttime_mean)) {
+            (cfqq->slice_end - jiffies < cic->ttime.ttime_mean)) {
                cfq_log_cfqq(cfqd, cfqq, "Not idling. think_time:%lu",
-                             cic->ttime_mean);
+                             cic->ttime.ttime_mean);
                return;
        }
@@ -2381,8 +2384,9 @@ static struct cfq_queue *cfq_select_queue(struct cfq_data *cfqd)
         * this group, wait for requests to complete.
         */
 check_group_idle:
-        if (cfqd->cfq_group_idle && cfqq->cfqg->nr_cfqq == 1
+        if (cfqd->cfq_group_idle && cfqq->cfqg->nr_cfqq == 1 &&
-            && cfqq->cfqg->dispatched) {
+            cfqq->cfqg->dispatched &&
+            !cfq_io_thinktime_big(cfqd, &cfqq->cfqg->ttime, true)) {
                cfqq = NULL;
                goto keep_queue;
        }
@@ -2833,7 +2837,7 @@ cfq_alloc_io_context(struct cfq_data *cfqd, gfp_t gfp_mask)
        cic = kmem_cache_alloc_node(cfq_ioc_pool, gfp_mask | __GFP_ZERO,
                                                        cfqd->queue->node);
        if (cic) {
-                cic->last_end_request = jiffies;
+                cic->ttime.last_end_request = jiffies;
                INIT_LIST_HEAD(&cic->queue_list);
                INIT_HLIST_NODE(&cic->cic_list);
                cic->dtor = cfq_free_io_context;
@@ -2883,7 +2887,6 @@ static void cfq_init_prio_data(struct cfq_queue *cfqq, struct io_context *ioc)
         * elevate the priority of this queue
         */
        cfqq->org_ioprio = cfqq->ioprio;
-        cfqq->org_ioprio_class = cfqq->ioprio_class;
        cfq_clear_cfqq_prio_changed(cfqq);
 }
@@ -3221,14 +3224,28 @@ err:
 }
 static void
-cfq_update_io_thinktime(struct cfq_data *cfqd, struct cfq_io_context *cic)
+__cfq_update_io_thinktime(struct cfq_ttime *ttime, unsigned long slice_idle)
 {
-        unsigned long elapsed = jiffies - cic->last_end_request;
+        unsigned long elapsed = jiffies - ttime->last_end_request;
-        unsigned long ttime = min(elapsed, 2UL * cfqd->cfq_slice_idle);
+        elapsed = min(elapsed, 2UL * slice_idle);
-        cic->ttime_samples = (7*cic->ttime_samples + 256) / 8;
+        ttime->ttime_samples = (7*ttime->ttime_samples + 256) / 8;
-        cic->ttime_total = (7*cic->ttime_total + 256*ttime) / 8;
+        ttime->ttime_total = (7*ttime->ttime_total + 256*elapsed) / 8;
-        cic->ttime_mean = (cic->ttime_total + 128) / cic->ttime_samples;
+        ttime->ttime_mean = (ttime->ttime_total + 128) / ttime->ttime_samples;
+}
+static void
+cfq_update_io_thinktime(struct cfq_data *cfqd, struct cfq_queue *cfqq,
+        struct cfq_io_context *cic)
+{
+        if (cfq_cfqq_sync(cfqq)) {
+                __cfq_update_io_thinktime(&cic->ttime, cfqd->cfq_slice_idle);
+                __cfq_update_io_thinktime(&cfqq->service_tree->ttime,
+                        cfqd->cfq_slice_idle);
+        }
+#ifdef CONFIG_CFQ_GROUP_IOSCHED
+        __cfq_update_io_thinktime(&cfqq->cfqg->ttime, cfqd->cfq_group_idle);
+#endif
 }
 static void
@@ -3277,8 +3294,8 @@ cfq_update_idle_window(struct cfq_data *cfqd, struct cfq_queue *cfqq,
        else if (!atomic_read(&cic->ioc->nr_tasks) || !cfqd->cfq_slice_idle ||
            (!cfq_cfqq_deep(cfqq) && CFQQ_SEEKY(cfqq)))
                enable_idle = 0;
-        else if (sample_valid(cic->ttime_samples)) {
+        else if (sample_valid(cic->ttime.ttime_samples)) {
-                if (cic->ttime_mean > cfqd->cfq_slice_idle)
+                if (cic->ttime.ttime_mean > cfqd->cfq_slice_idle)
                        enable_idle = 0;
                else
                        enable_idle = 1;
@@ -3340,13 +3357,6 @@ cfq_should_preempt(struct cfq_data *cfqd, struct cfq_queue *new_cfqq,
                return true;
        /*
-         * So both queues are sync. Let the new request get disk time if
-         * it's a metadata request and the current queue is doing regular IO.
-         */
-        if ((rq->cmd_flags & REQ_META) && !cfqq->meta_pending)
-                return true;
-        /*
         * Allow an RT request to pre-empt an ongoing non-RT cfqq timeslice.
         */
        if (cfq_class_rt(new_cfqq) && !cfq_class_rt(cfqq))
@@ -3410,10 +3420,8 @@ cfq_rq_enqueued(struct cfq_data *cfqd, struct cfq_queue *cfqq,
        struct cfq_io_context *cic = RQ_CIC(rq);
        cfqd->rq_queued++;
-        if (rq->cmd_flags & REQ_META)
-                cfqq->meta_pending++;
-        cfq_update_io_thinktime(cfqd, cic);
+        cfq_update_io_thinktime(cfqd, cfqq, cic);
        cfq_update_io_seektime(cfqd, cfqq, rq);
        cfq_update_idle_window(cfqd, cfqq, cic);
@@ -3520,12 +3528,16 @@ static bool cfq_should_wait_busy(struct cfq_data *cfqd, struct cfq_queue *cfqq)
        if (cfqq->cfqg->nr_cfqq > 1)
                return false;
+        /* the only queue in the group, but think time is big */
+        if (cfq_io_thinktime_big(cfqd, &cfqq->cfqg->ttime, true))
+                return false;
        if (cfq_slice_used(cfqq))
                return true;
        /* if slice left is less than think time, wait busy */
-        if (cic && sample_valid(cic->ttime_samples)
+        if (cic && sample_valid(cic->ttime.ttime_samples)
-            && (cfqq->slice_end - jiffies < cic->ttime_mean))
+            && (cfqq->slice_end - jiffies < cic->ttime.ttime_mean))
                return true;
        /*
@@ -3566,11 +3578,24 @@ static void cfq_completed_request(struct request_queue *q, struct request *rq)
        cfqd->rq_in_flight[cfq_cfqq_sync(cfqq)]--;
        if (sync) {
-                RQ_CIC(rq)->last_end_request = now;
+                struct cfq_rb_root *service_tree;
+                RQ_CIC(rq)->ttime.last_end_request = now;
+                if (cfq_cfqq_on_rr(cfqq))
+                        service_tree = cfqq->service_tree;
+                else
+                        service_tree = service_tree_for(cfqq->cfqg,
+                                cfqq_prio(cfqq), cfqq_type(cfqq));
+                service_tree->ttime.last_end_request = now;
                if (!time_after(rq->start_time + cfqd->cfq_fifo_expire[1], now))
                        cfqd->last_delayed_sync = now;
        }
+#ifdef CONFIG_CFQ_GROUP_IOSCHED
+        cfqq->cfqg->ttime.last_end_request = now;
+#endif
        /*
         * If this is the active queue, check if it needs to be expired,
         * or if we want to idle in case it has no pending requests.
@@ -3616,30 +3641,6 @@ static void cfq_completed_request(struct request_queue *q, struct request *rq)
                cfq_schedule_dispatch(cfqd);
 }
-/*
- * we temporarily boost lower priority queues if they are holding fs exclusive
- * resources. they are boosted to normal prio (CLASS_BE/4)
- */
-static void cfq_prio_boost(struct cfq_queue *cfqq)
-{
-        if (has_fs_excl()) {
-                /*
-                 * boost idle prio on transactions that would lock out other
-                 * users of the filesystem
-                 */
-                if (cfq_class_idle(cfqq))
-                        cfqq->ioprio_class = IOPRIO_CLASS_BE;
-                if (cfqq->ioprio > IOPRIO_NORM)
-                        cfqq->ioprio = IOPRIO_NORM;
-        } else {
-                /*
-                 * unboost the queue (if needed)
-                 */
-                cfqq->ioprio_class = cfqq->org_ioprio_class;
-                cfqq->ioprio = cfqq->org_ioprio;
-        }
-}
 static inline int __cfq_may_queue(struct cfq_queue *cfqq)
 {
        if (cfq_cfqq_wait_request(cfqq) && !cfq_cfqq_must_alloc_slice(cfqq)) {
@@ -3670,7 +3671,6 @@ static int cfq_may_queue(struct request_queue *q, int rw)
        cfqq = cic_to_cfqq(cic, rw_is_sync(rw));
        if (cfqq) {
                cfq_init_prio_data(cfqq, cic->ioc);
-                cfq_prio_boost(cfqq);
                return __cfq_may_queue(cfqq);
        }
diff --git a/block/compat_ioctl.c b/block/compat_ioctl.c
index cc3eb78e333a..7b725020823c 100644
--- a/block/compat_ioctl.c
+++ b/block/compat_ioctl.c
@@ -208,19 +208,6 @@ static int compat_blkpg_ioctl(struct block_device *bdev, fmode_t mode,
 #define BLKBSZSET_32            _IOW(0x12, 113, int)
 #define BLKGETSIZE64_32         _IOR(0x12, 114, int)
-struct compat_floppy_struct {
-        compat_uint_t   size;
-        compat_uint_t   sect;
-        compat_uint_t   head;
-        compat_uint_t   track;
-        compat_uint_t   stretch;
-        unsigned char   gap;
-        unsigned char   rate;
-        unsigned char   spec1;
-        unsigned char   fmt_gap;
-        const compat_caddr_t name;
-};
 struct compat_floppy_drive_params {
        char            cmos;
        compat_ulong_t  max_dtr;
@@ -288,7 +275,6 @@ struct compat_floppy_write_errors {
 #define FDSETPRM32 _IOW(2, 0x42, struct compat_floppy_struct)
 #define FDDEFPRM32 _IOW(2, 0x43, struct compat_floppy_struct)
-#define FDGETPRM32 _IOR(2, 0x04, struct compat_floppy_struct)
 #define FDSETDRVPRM32 _IOW(2, 0x90, struct compat_floppy_drive_params)
 #define FDGETDRVPRM32 _IOR(2, 0x11, struct compat_floppy_drive_params)
 #define FDGETDRVSTAT32 _IOR(2, 0x12, struct compat_floppy_drive_struct)
diff --git a/block/deadline-iosched.c b/block/deadline-iosched.c
index 5139c0ea1864..c644137d9cd6 100644
--- a/block/deadline-iosched.c
+++ b/block/deadline-iosched.c
@@ -77,10 +77,8 @@ static void
 deadline_add_rq_rb(struct deadline_data *dd, struct request *rq)
 {
        struct rb_root *root = deadline_rb_root(dd, rq);
-        struct request *__alias;
-        while (unlikely(__alias = elv_rb_add(root, rq)))
+        elv_rb_add(root, rq);
-                deadline_move_request(dd, __alias);
 }
 static inline void
diff --git a/block/elevator.c b/block/elevator.c
index b0b38ce0dcb6..a3b64bc71d88 100644
--- a/block/elevator.c
+++ b/block/elevator.c
@@ -353,7 +353,7 @@ static struct request *elv_rqhash_find(struct request_queue *q, sector_t offset)
 * RB-tree support functions for inserting/lookup/removal of requests
 * in a sorted RB tree.
 */
-struct request *elv_rb_add(struct rb_root *root, struct request *rq)
+void elv_rb_add(struct rb_root *root, struct request *rq)
 {
        struct rb_node **p = &root->rb_node;
        struct rb_node *parent = NULL;
@@ -365,15 +365,12 @@ struct request *elv_rb_add(struct rb_root *root, struct request *rq)
                if (blk_rq_pos(rq) < blk_rq_pos(__rq))
                        p = &(*p)->rb_left;
-                else if (blk_rq_pos(rq) > blk_rq_pos(__rq))
+                else if (blk_rq_pos(rq) >= blk_rq_pos(__rq))
                        p = &(*p)->rb_right;
-                else
-                        return __rq;
        }
        rb_link_node(&rq->rb_node, parent, p);
        rb_insert_color(&rq->rb_node, root);
-        return NULL;
 }
 EXPORT_SYMBOL(elv_rb_add);
diff --git a/block/genhd.c b/block/genhd.c
index 6024b82e3209..5cb51c55f6d8 100644
--- a/block/genhd.c
+++ b/block/genhd.c
@@ -602,7 +602,7 @@ void add_disk(struct gendisk *disk)
        disk->major = MAJOR(devt);
        disk->first_minor = MINOR(devt);
-        /* Register BDI before referencing it from bdev */ 
+        /* Register BDI before referencing it from bdev */
        bdi = &disk->queue->backing_dev_info;
        bdi_register_dev(bdi, disk_devt(disk));
@@ -1140,7 +1140,7 @@ static int diskstats_show(struct seq_file *seqf, void *v)
                                "wsect wuse running use aveq"
                                "\n\n");
        */
- 
        disk_part_iter_init(&piter, gp, DISK_PITER_INCL_EMPTY_PART0);
        while ((hd = disk_part_iter_next(&piter))) {
                cpu = part_stat_lock();
@@ -1164,7 +1164,7 @@ static int diskstats_show(struct seq_file *seqf, void *v)
                        );
        }
        disk_part_iter_exit(&piter);
- 
        return 0;
 }
@@ -1492,30 +1492,32 @@ void disk_unblock_events(struct gendisk *disk)
 }
 /**
- * disk_check_events - schedule immediate event checking
+ * disk_flush_events - schedule immediate event checking and flushing
- * @disk: disk to check events for
+ * @disk: disk to check and flush events for
+ * @mask: events to flush
 *
- * Schedule immediate event checking on @disk if not blocked.
+ * Schedule immediate event checking on @disk if not blocked.  Events in
+ * @mask are scheduled to be cleared from the driver.  Note that this
+ * doesn't clear the events from @disk->ev.
 *
 * CONTEXT:
- * Don't care.  Safe to call from irq context.
+ * If @mask is non-zero must be called with bdev->bd_mutex held.
 */
-void disk_check_events(struct gendisk *disk)
+void disk_flush_events(struct gendisk *disk, unsigned int mask)
 {
        struct disk_events *ev = disk->ev;
-        unsigned long flags;
        if (!ev)
                return;
-        spin_lock_irqsave(&ev->lock, flags);
+        spin_lock_irq(&ev->lock);
+        ev->clearing |= mask;
        if (!ev->block) {
                cancel_delayed_work(&ev->dwork);
                queue_delayed_work(system_nrt_wq, &ev->dwork, 0);
        }
-        spin_unlock_irqrestore(&ev->lock, flags);
+        spin_unlock_irq(&ev->lock);
 }
-EXPORT_SYMBOL_GPL(disk_check_events);
 /**
 * disk_clear_events - synchronously check, clear and return pending events
@@ -1705,7 +1707,7 @@ static int disk_events_set_dfl_poll_msecs(const char *val,
        mutex_lock(&disk_events_mutex);
        list_for_each_entry(ev, &disk_events, node)
-                disk_check_events(ev->disk);
+                disk_flush_events(ev->disk, 0);
        mutex_unlock(&disk_events_mutex);
diff --git a/fs/block_dev.c b/fs/block_dev.c
index 9fb0b15331d3..c62fb84944d5 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -1448,6 +1448,8 @@ static int __blkdev_put(struct block_device *bdev, fmode_t mode, int for_part)
 int blkdev_put(struct block_device *bdev, fmode_t mode)
 {
+        mutex_lock(&bdev->bd_mutex);
        if (mode & FMODE_EXCL) {
                bool bdev_free;
@@ -1456,7 +1458,6 @@ int blkdev_put(struct block_device *bdev, fmode_t mode)
                 * are protected with bdev_lock.  bd_mutex is to
                 * synchronize disk_holder unlinking.
                 */
-                mutex_lock(&bdev->bd_mutex);
                spin_lock(&bdev_lock);
                WARN_ON_ONCE(--bdev->bd_holders < 0);
@@ -1474,17 +1475,21 @@ int blkdev_put(struct block_device *bdev, fmode_t mode)
                 * If this was the last claim, remove holder link and
                 * unblock evpoll if it was a write holder.
                 */
-                if (bdev_free) {
+                if (bdev_free && bdev->bd_write_holder) {
-                        if (bdev->bd_write_holder) {
+                        disk_unblock_events(bdev->bd_disk);
-                                disk_unblock_events(bdev->bd_disk);
+                        bdev->bd_write_holder = false;
-                                disk_check_events(bdev->bd_disk);
-                                bdev->bd_write_holder = false;
-                        }
                }
-                mutex_unlock(&bdev->bd_mutex);
        }
+        /*
+         * Trigger event checking and tell drivers to flush MEDIA_CHANGE
+         * event.  This is to ensure detection of media removal commanded
+         * from userland - e.g. eject(1).
+         */
+        disk_flush_events(bdev->bd_disk, DISK_EVENT_MEDIA_CHANGE);
+        mutex_unlock(&bdev->bd_mutex);
        return __blkdev_put(bdev, mode, 0);
 }
 EXPORT_SYMBOL(blkdev_put);
diff --git a/fs/compat_ioctl.c b/fs/compat_ioctl.c
index 61abb638b4bf..8be086e9abe4 100644
--- a/fs/compat_ioctl.c
+++ b/fs/compat_ioctl.c
@@ -68,6 +68,8 @@
 #ifdef CONFIG_BLOCK
 #include <linux/loop.h>
+#include <linux/cdrom.h>
+#include <linux/fd.h>
 #include <scsi/scsi.h>
 #include <scsi/scsi_ioctl.h>
 #include <scsi/sg.h>
@@ -944,6 +946,9 @@ COMPATIBLE_IOCTL(FIOQSIZE)
 IGNORE_IOCTL(LOOP_CLR_FD)
 /* md calls this on random blockdevs */
 IGNORE_IOCTL(RAID_VERSION)
+/* qemu/qemu-img might call these two on plain files for probing */
+IGNORE_IOCTL(CDROM_DRIVE_STATUS)
+IGNORE_IOCTL(FDGETPRM32)
 /* SG stuff */
 COMPATIBLE_IOCTL(SG_SET_TIMEOUT)
 COMPATIBLE_IOCTL(SG_GET_TIMEOUT)
diff --git a/fs/partitions/check.c b/fs/partitions/check.c
index d545e97d99c3..e3c63d1c5e13 100644
--- a/fs/partitions/check.c
+++ b/fs/partitions/check.c
@@ -237,22 +237,22 @@ ssize_t part_size_show(struct device *dev,
        return sprintf(buf, "%llu\n",(unsigned long long)p->nr_sects);
 }
-ssize_t part_ro_show(struct device *dev,
+static ssize_t part_ro_show(struct device *dev,
-                       struct device_attribute *attr, char *buf)
+                            struct device_attribute *attr, char *buf)
 {
        struct hd_struct *p = dev_to_part(dev);
        return sprintf(buf, "%d\n", p->policy ? 1 : 0);
 }
-ssize_t part_alignment_offset_show(struct device *dev,
+static ssize_t part_alignment_offset_show(struct device *dev,
-                                   struct device_attribute *attr, char *buf)
+                                          struct device_attribute *attr, char *buf)
 {
        struct hd_struct *p = dev_to_part(dev);
        return sprintf(buf, "%llu\n", (unsigned long long)p->alignment_offset);
 }
-ssize_t part_discard_alignment_show(struct device *dev,
+static ssize_t part_discard_alignment_show(struct device *dev,
-                                   struct device_attribute *attr, char *buf)
+                                           struct device_attribute *attr, char *buf)
 {
        struct hd_struct *p = dev_to_part(dev);
        return sprintf(buf, "%u\n", p->discard_alignment);
diff --git a/fs/reiserfs/journal.c b/fs/reiserfs/journal.c
index c5e82ece7c6c..a159ba5a35e7 100644
--- a/fs/reiserfs/journal.c
+++ b/fs/reiserfs/journal.c
@@ -678,23 +678,19 @@ struct buffer_chunk {
 static void write_chunk(struct buffer_chunk *chunk)
 {
        int i;
-        get_fs_excl();
        for (i = 0; i < chunk->nr; i++) {
                submit_logged_buffer(chunk->bh[i]);
        }
        chunk->nr = 0;
-        put_fs_excl();
 }
 static void write_ordered_chunk(struct buffer_chunk *chunk)
 {
        int i;
-        get_fs_excl();
        for (i = 0; i < chunk->nr; i++) {
                submit_ordered_buffer(chunk->bh[i]);
        }
        chunk->nr = 0;
-        put_fs_excl();
 }
 static int add_to_chunk(struct buffer_chunk *chunk, struct buffer_head *bh,
@@ -986,8 +982,6 @@ static int flush_commit_list(struct super_block *s,
                return 0;
        }
-        get_fs_excl();
        /* before we can put our commit blocks on disk, we have to make sure everyone older than
         ** us is on disk too
         */
@@ -1145,7 +1139,6 @@ static int flush_commit_list(struct super_block *s,
        if (retval)
                reiserfs_abort(s, retval, "Journal write error in %s",
                               __func__);
-        put_fs_excl();
        return retval;
 }
@@ -1374,8 +1367,6 @@ static int flush_journal_list(struct super_block *s,
                return 0;
        }
-        get_fs_excl();
        /* if all the work is already done, get out of here */
        if (atomic_read(&(jl->j_nonzerolen)) <= 0 &&
            atomic_read(&(jl->j_commit_left)) <= 0) {
@@ -1597,7 +1588,6 @@ static int flush_journal_list(struct super_block *s,
        put_journal_list(s, jl);
        if (flushall)
                mutex_unlock(&journal->j_flush_mutex);
-        put_fs_excl();
        return err;
 }
@@ -3108,7 +3098,6 @@ static int do_journal_begin_r(struct reiserfs_transaction_handle *th,
        th->t_trans_id = journal->j_trans_id;
        unlock_journal(sb);
        INIT_LIST_HEAD(&th->t_list);
-        get_fs_excl();
        return 0;
      out_fail:
@@ -3964,7 +3953,6 @@ static int do_journal_end(struct reiserfs_transaction_handle *th,
        flush = flags & FLUSH_ALL;
        wait_on_commit = flags & WAIT;
-        put_fs_excl();
        current->journal_info = th->t_handle_save;
        reiserfs_check_lock_depth(sb, "journal end");
        if (journal->j_len == 0) {
@@ -4316,4 +4304,3 @@ void reiserfs_abort_journal(struct super_block *sb, int errno)
        dump_stack();
 #endif
 }
diff --git a/fs/super.c b/fs/super.c
index 7943f04cb3a9..3f56a269a4f4 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -351,13 +351,11 @@ bool grab_super_passive(struct super_block *sb)
 */
 void lock_super(struct super_block * sb)
 {
-        get_fs_excl();
        mutex_lock(&sb->s_lock);
 }
 void unlock_super(struct super_block * sb)
 {
-        put_fs_excl();
        mutex_unlock(&sb->s_lock);
 }
@@ -385,7 +383,6 @@ void generic_shutdown_super(struct super_block *sb)
        if (sb->s_root) {
                shrink_dcache_for_umount(sb);
                sync_filesystem(sb);
-                get_fs_excl();
                sb->s_flags &= ~MS_ACTIVE;
                fsnotify_unmount_inodes(&sb->s_inodes);
@@ -400,7 +397,6 @@ void generic_shutdown_super(struct super_block *sb)
                           "Self-destruct in 5 seconds.  Have a nice day...\n",
                           sb->s_id);
                }
-                put_fs_excl();
        }
        spin_lock(&sb_lock);
        /* should be initialized for __put_super_and_need_restart() */
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 1a23722e8878..0e67c45b3bc9 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -73,7 +73,7 @@ enum rq_cmd_type_bits {
 /*
 * try to put the fields that are referenced together in the same cacheline.
- * if you modify this structure, be sure to check block/blk-core.c:rq_init()
+ * if you modify this structure, be sure to check block/blk-core.c:blk_rq_init()
 * as well!
 */
 struct request {
@@ -260,8 +260,7 @@ struct queue_limits {
        unsigned char           discard_zeroes_data;
 };
-struct request_queue
+struct request_queue {
-{
        /*
         * Together with queue_head for cacheline sharing
         */
@@ -304,14 +303,14 @@ struct request_queue
        void                    *queuedata;
        /*
-         * queue needs bounce pages for pages above this limit
+         * various queue flags, see QUEUE_* below
         */
-        gfp_t                   bounce_gfp;
+        unsigned long           queue_flags;
        /*
-         * various queue flags, see QUEUE_* below
+         * queue needs bounce pages for pages above this limit
         */
-        unsigned long           queue_flags;
+        gfp_t                   bounce_gfp;
        /*
         * protects queue structures from reentrancy. ->__queue_lock should
@@ -334,8 +333,8 @@ struct request_queue
        unsigned int            nr_congestion_off;
        unsigned int            nr_batching;
-        void                    *dma_drain_buffer;
        unsigned int            dma_drain_size;
+        void                    *dma_drain_buffer;
        unsigned int            dma_pad_mask;
        unsigned int            dma_alignment;
@@ -393,7 +392,7 @@ struct request_queue
 #define QUEUE_FLAG_ELVSWITCH    6       /* don't use elevator, just do FIFO */
 #define QUEUE_FLAG_BIDI         7       /* queue supports bidi requests */
 #define QUEUE_FLAG_NOMERGES     8       /* disable merge attempts */
-#define QUEUE_FLAG_SAME_COMP    9       /* force complete on same CPU */
+#define QUEUE_FLAG_SAME_COMP    9       /* complete on same CPU-group */
 #define QUEUE_FLAG_FAIL_IO     10       /* fake timeout */
 #define QUEUE_FLAG_STACKABLE   11       /* supports request stacking */
 #define QUEUE_FLAG_NONROT      12       /* non-rotational device (SSD) */
@@ -403,6 +402,7 @@ struct request_queue
 #define QUEUE_FLAG_NOXMERGES   15       /* No extended merges */
 #define QUEUE_FLAG_ADD_RANDOM  16       /* Contributes to random pool */
 #define QUEUE_FLAG_SECDISCARD  17       /* supports SECDISCARD */
+#define QUEUE_FLAG_SAME_FORCE  18       /* force complete on same CPU */
 #define QUEUE_FLAG_DEFAULT      ((1 << QUEUE_FLAG_IO_STAT) |            \
                                 (1 << QUEUE_FLAG_STACKABLE)    |       \
@@ -857,12 +857,21 @@ struct request_queue *blk_alloc_queue(gfp_t);
 struct request_queue *blk_alloc_queue_node(gfp_t, int);
 extern void blk_put_queue(struct request_queue *);
+/*
+ * Note: Code in between changing the blk_plug list/cb_list or element of such
+ * lists is preemptable, but such code can't do sleep (or be very careful),
+ * otherwise data is corrupted. For details, please check schedule() where
+ * blk_schedule_flush_plug() is called.
+ */
 struct blk_plug {
        unsigned long magic;
        struct list_head list;
        struct list_head cb_list;
        unsigned int should_sort;
+        unsigned int count;
 };
+#define BLK_MAX_REQUEST_COUNT 16
 struct blk_plug_cb {
        struct list_head list;
        void (*callback)(struct blk_plug_cb *);
diff --git a/include/linux/elevator.h b/include/linux/elevator.h
index 21a8ebf2dc3a..d800d5142184 100644
--- a/include/linux/elevator.h
+++ b/include/linux/elevator.h
@@ -146,7 +146,7 @@ extern struct request *elv_rb_latter_request(struct request_queue *, struct requ
 /*
 * rb support functions.
 */
-extern struct request *elv_rb_add(struct rb_root *, struct request *);
+extern void elv_rb_add(struct rb_root *, struct request *);
 extern void elv_rb_del(struct rb_root *, struct request *);
 extern struct request *elv_rb_find(struct rb_root *, sector_t);
diff --git a/include/linux/fd.h b/include/linux/fd.h
index f5d194af07a8..72202b1b9a6a 100644
--- a/include/linux/fd.h
+++ b/include/linux/fd.h
@@ -377,4 +377,26 @@ struct floppy_raw_cmd {
 #define FDEJECT _IO(2, 0x5a)
 /* eject the disk */
+#ifdef __KERNEL__
+#ifdef CONFIG_COMPAT
+#include <linux/compat.h>
+struct compat_floppy_struct {
+        compat_uint_t   size;
+        compat_uint_t   sect;
+        compat_uint_t   head;
+        compat_uint_t   track;
+        compat_uint_t   stretch;
+        unsigned char   gap;
+        unsigned char   rate;
+        unsigned char   spec1;
+        unsigned char   fmt_gap;
+        const compat_caddr_t name;
+};
+#define FDGETPRM32 _IOR(2, 0x04, struct compat_floppy_struct)
+#endif
+#endif
 #endif
diff --git a/include/linux/fs.h b/include/linux/fs.h
index b224dc468a23..0c35d6e767d9 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1469,10 +1469,6 @@ enum {
 #define vfs_check_frozen(sb, level) \
        wait_event((sb)->s_wait_unfrozen, ((sb)->s_frozen < (level)))
-#define get_fs_excl() atomic_inc(&current->fs_excl)
-#define put_fs_excl() atomic_dec(&current->fs_excl)
-#define has_fs_excl() atomic_read(&current->fs_excl)
 /*
 * until VFS tracks user namespaces for inodes, just make all files
 * belong to init_user_ns
diff --git a/include/linux/genhd.h b/include/linux/genhd.h
index 300d7582006e..02fa4697a0e5 100644
--- a/include/linux/genhd.h
+++ b/include/linux/genhd.h
@@ -420,7 +420,7 @@ static inline int get_disk_ro(struct gendisk *disk)
 extern void disk_block_events(struct gendisk *disk);
 extern void disk_unblock_events(struct gendisk *disk);
-extern void disk_check_events(struct gendisk *disk);
+extern void disk_flush_events(struct gendisk *disk, unsigned int mask);
 extern unsigned int disk_clear_events(struct gendisk *disk, unsigned int mask);
 /* drivers/char/random.c */
diff --git a/include/linux/init_task.h b/include/linux/init_task.h
index 580f70c02391..d14e058aaeed 100644
--- a/include/linux/init_task.h
+++ b/include/linux/init_task.h
@@ -176,7 +176,6 @@ extern struct cred init_cred;
        .alloc_lock     = __SPIN_LOCK_UNLOCKED(tsk.alloc_lock),         \
        .journal_info   = NULL,                                         \
        .cpu_timers     = INIT_CPU_TIMERS(tsk.cpu_timers),              \
-        .fs_excl        = ATOMIC_INIT(0),                               \
        .pi_lock        = __RAW_SPIN_LOCK_UNLOCKED(tsk.pi_lock),        \
        .timer_slack_ns = 50000, /* 50 usec default slack */            \
        .pids = {                                                       \
diff --git a/include/linux/iocontext.h b/include/linux/iocontext.h
index b2eee896dcbc..5037a0ad2312 100644
--- a/include/linux/iocontext.h
+++ b/include/linux/iocontext.h
@@ -5,6 +5,14 @@
 #include <linux/rcupdate.h>
 struct cfq_queue;
+struct cfq_ttime {
+        unsigned long last_end_request;
+        unsigned long ttime_total;
+        unsigned long ttime_samples;
+        unsigned long ttime_mean;
+};
 struct cfq_io_context {
        void *key;
@@ -12,11 +20,7 @@ struct cfq_io_context {
        struct io_context *ioc;
-        unsigned long last_end_request;
+        struct cfq_ttime ttime;
-        unsigned long ttime_total;
-        unsigned long ttime_samples;
-        unsigned long ttime_mean;
        struct list_head queue_list;
        struct hlist_node cic_list;
diff --git a/include/linux/sched.h b/include/linux/sched.h
index ed766add9b23..20b03bf94748 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1512,7 +1512,6 @@ struct task_struct {
        short il_next;
        short pref_node_fork;
 #endif
-        atomic_t fs_excl;       /* holding fs exclusive resources */
        struct rcu_head rcu;
        /*
diff --git a/kernel/exit.c b/kernel/exit.c
index 73bb192a3d32..12ea415c6435 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -898,7 +898,6 @@ NORET_TYPE void do_exit(long code)
        profile_task_exit(tsk);
-        WARN_ON(atomic_read(&tsk->fs_excl));
        WARN_ON(blk_needs_flush_plug(tsk));
        if (unlikely(in_interrupt()))
diff --git a/kernel/fork.c b/kernel/fork.c
index aeae5b11b62e..17bf7c8d6511 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -290,7 +290,6 @@ static struct task_struct *dup_task_struct(struct task_struct *orig)
        /* One for us, one for whoever does the "release_task()" (usually parent) */
        atomic_set(&tsk->usage,2);
-        atomic_set(&tsk->fs_excl, 0);
 #ifdef CONFIG_BLK_DEV_IO_TRACE
        tsk->btrace_seq = 0;
 #endif
diff --git a/mm/backing-dev.c b/mm/backing-dev.c
index f032e6e1e09a..2ef0dc9e7f39 100644
--- a/mm/backing-dev.c
+++ b/mm/backing-dev.c
@@ -505,7 +505,7 @@ static void bdi_remove_from_list(struct backing_dev_info *bdi)
        list_del_rcu(&bdi->bdi_list);
        spin_unlock_bh(&bdi_lock);
-        synchronize_rcu();
+        synchronize_rcu_expedited();
 }
 int bdi_register(struct backing_dev_info *bdi, struct device *parent,