12 files changed, 258 insertions, 271 deletions
diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c
index 1359d637831f..ea84a23d5e68 100644
--- a/block/blk-cgroup.c
+++ b/block/blk-cgroup.c
@@ -1653,7 +1653,7 @@ static void blkiocg_attach(struct cgroup *cgrp, struct cgroup_taskset *tset)
                ioc = get_task_io_context(task, GFP_ATOMIC, NUMA_NO_NODE);
                if (ioc) {
                        ioc_cgroup_changed(ioc);
-                        put_io_context(ioc, NULL);
+                        put_io_context(ioc);
                }
        }
 }
diff --git a/block/blk-core.c b/block/blk-core.c
index e6c05a97ee2b..3a78b00edd71 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -642,7 +642,7 @@ static inline void blk_free_request(struct request_queue *q, struct request *rq)
        if (rq->cmd_flags & REQ_ELVPRIV) {
                elv_put_request(q, rq);
                if (rq->elv.icq)
-                        put_io_context(rq->elv.icq->ioc, q);
+                        put_io_context(rq->elv.icq->ioc);
        }
        mempool_free(rq, q->rq.rq_pool);
@@ -872,13 +872,15 @@ retry:
        spin_unlock_irq(q->queue_lock);
        /* create icq if missing */
-        if (unlikely(et->icq_cache && !icq))
+        if ((rw_flags & REQ_ELVPRIV) && unlikely(et->icq_cache && !icq)) {
                icq = ioc_create_icq(q, gfp_mask);
+                if (!icq)
+                        goto fail_icq;
+        }
-        /* rqs are guaranteed to have icq on elv_set_request() if requested */
+        rq = blk_alloc_request(q, icq, rw_flags, gfp_mask);
-        if (likely(!et->icq_cache || icq))
-                rq = blk_alloc_request(q, icq, rw_flags, gfp_mask);
+fail_icq:
        if (unlikely(!rq)) {
                /*
                 * Allocation failed presumably due to memory. Undo anything
@@ -1210,7 +1212,6 @@ static bool bio_attempt_back_merge(struct request_queue *q, struct request *req,
        req->ioprio = ioprio_best(req->ioprio, bio_prio(bio));
        drive_stat_acct(req, 0);
-        elv_bio_merged(q, req, bio);
        return true;
 }
@@ -1241,7 +1242,6 @@ static bool bio_attempt_front_merge(struct request_queue *q,
        req->ioprio = ioprio_best(req->ioprio, bio_prio(bio));
        drive_stat_acct(req, 0);
-        elv_bio_merged(q, req, bio);
        return true;
 }
@@ -1255,13 +1255,12 @@ static bool bio_attempt_front_merge(struct request_queue *q,
 * on %current's plugged list.  Returns %true if merge was successful,
 * otherwise %false.
 *
- * This function is called without @q->queue_lock; however, elevator is
+ * Plugging coalesces IOs from the same issuer for the same purpose without
- * accessed iff there already are requests on the plugged list which in
+ * going through @q->queue_lock.  As such it's more of an issuing mechanism
- * turn guarantees validity of the elevator.
+ * than scheduling, and the request, while may have elvpriv data, is not
- *
+ * added on the elevator at this point.  In addition, we don't have
- * Note that, on successful merge, elevator operation
+ * reliable access to the elevator outside queue lock.  Only check basic
- * elevator_bio_merged_fn() will be called without queue lock.  Elevator
+ * merging parameters without querying the elevator.
- * must be ready for this.
 */
 static bool attempt_plug_merge(struct request_queue *q, struct bio *bio,
                               unsigned int *request_count)
@@ -1280,10 +1279,10 @@ static bool attempt_plug_merge(struct request_queue *q, struct bio *bio,
                (*request_count)++;
-                if (rq->q != q)
+                if (rq->q != q || !blk_rq_merge_ok(rq, bio))
                        continue;
-                el_ret = elv_try_merge(rq, bio);
+                el_ret = blk_try_merge(rq, bio);
                if (el_ret == ELEVATOR_BACK_MERGE) {
                        ret = bio_attempt_back_merge(q, rq, bio);
                        if (ret)
@@ -1345,12 +1344,14 @@ void blk_queue_bio(struct request_queue *q, struct bio *bio)
        el_ret = elv_merge(q, &req, bio);
        if (el_ret == ELEVATOR_BACK_MERGE) {
                if (bio_attempt_back_merge(q, req, bio)) {
+                        elv_bio_merged(q, req, bio);
                        if (!attempt_back_merge(q, req))
                                elv_merged_request(q, req, el_ret);
                        goto out_unlock;
                }
        } else if (el_ret == ELEVATOR_FRONT_MERGE) {
                if (bio_attempt_front_merge(q, req, bio)) {
+                        elv_bio_merged(q, req, bio);
                        if (!attempt_front_merge(q, req))
                                elv_merged_request(q, req, el_ret);
                        goto out_unlock;
diff --git a/block/blk-ioc.c b/block/blk-ioc.c
index 27a06e00eaec..fb95dd2f889a 100644
--- a/block/blk-ioc.c
+++ b/block/blk-ioc.c
@@ -29,21 +29,6 @@ void get_io_context(struct io_context *ioc)
 }
 EXPORT_SYMBOL(get_io_context);
-/*
- * Releasing ioc may nest into another put_io_context() leading to nested
- * fast path release.  As the ioc's can't be the same, this is okay but
- * makes lockdep whine.  Keep track of nesting and use it as subclass.
- */
-#ifdef CONFIG_LOCKDEP
-#define ioc_release_depth(q)            ((q) ? (q)->ioc_release_depth : 0)
-#define ioc_release_depth_inc(q)        (q)->ioc_release_depth++
-#define ioc_release_depth_dec(q)        (q)->ioc_release_depth--
-#else
-#define ioc_release_depth(q)            0
-#define ioc_release_depth_inc(q)        do { } while (0)
-#define ioc_release_depth_dec(q)        do { } while (0)
-#endif
 static void icq_free_icq_rcu(struct rcu_head *head)
 {
        struct io_cq *icq = container_of(head, struct io_cq, __rcu_head);
@@ -51,11 +36,23 @@ static void icq_free_icq_rcu(struct rcu_head *head)
        kmem_cache_free(icq->__rcu_icq_cache, icq);
 }
-/*
+/* Exit an icq. Called with both ioc and q locked. */
- * Exit and free an icq.  Called with both ioc and q locked.
- */
 static void ioc_exit_icq(struct io_cq *icq)
 {
+        struct elevator_type *et = icq->q->elevator->type;
+        if (icq->flags & ICQ_EXITED)
+                return;
+        if (et->ops.elevator_exit_icq_fn)
+                et->ops.elevator_exit_icq_fn(icq);
+        icq->flags |= ICQ_EXITED;
+}
+/* Release an icq.  Called with both ioc and q locked. */
+static void ioc_destroy_icq(struct io_cq *icq)
+{
        struct io_context *ioc = icq->ioc;
        struct request_queue *q = icq->q;
        struct elevator_type *et = q->elevator->type;
@@ -75,11 +72,7 @@ static void ioc_exit_icq(struct io_cq *icq)
        if (rcu_dereference_raw(ioc->icq_hint) == icq)
                rcu_assign_pointer(ioc->icq_hint, NULL);
-        if (et->ops.elevator_exit_icq_fn) {
+        ioc_exit_icq(icq);
-                ioc_release_depth_inc(q);
-                et->ops.elevator_exit_icq_fn(icq);
-                ioc_release_depth_dec(q);
-        }
        /*
         * @icq->q might have gone away by the time RCU callback runs
@@ -97,51 +90,32 @@ static void ioc_release_fn(struct work_struct *work)
 {
        struct io_context *ioc = container_of(work, struct io_context,
                                              release_work);
-        struct request_queue *last_q = NULL;
+        unsigned long flags;
-        spin_lock_irq(&ioc->lock);
+        /*
+         * Exiting icq may call into put_io_context() through elevator
+         * which will trigger lockdep warning.  The ioc's are guaranteed to
+         * be different, use a different locking subclass here.  Use
+         * irqsave variant as there's no spin_lock_irq_nested().
+         */
+        spin_lock_irqsave_nested(&ioc->lock, flags, 1);
        while (!hlist_empty(&ioc->icq_list)) {
                struct io_cq *icq = hlist_entry(ioc->icq_list.first,
                                                struct io_cq, ioc_node);
-                struct request_queue *this_q = icq->q;
+                struct request_queue *q = icq->q;
-                if (this_q != last_q) {
+                if (spin_trylock(q->queue_lock)) {
-                        /*
+                        ioc_destroy_icq(icq);
-                         * Need to switch to @this_q.  Once we release
+                        spin_unlock(q->queue_lock);
-                         * @ioc->lock, it can go away along with @cic.
+                } else {
-                         * Hold on to it.
+                        spin_unlock_irqrestore(&ioc->lock, flags);
-                         */
+                        cpu_relax();
-                        __blk_get_queue(this_q);
+                        spin_lock_irqsave_nested(&ioc->lock, flags, 1);
-                        /*
-                         * blk_put_queue() might sleep thanks to kobject
-                         * idiocy.  Always release both locks, put and
-                         * restart.
-                         */
-                        if (last_q) {
-                                spin_unlock(last_q->queue_lock);
-                                spin_unlock_irq(&ioc->lock);
-                                blk_put_queue(last_q);
-                        } else {
-                                spin_unlock_irq(&ioc->lock);
-                        }
-                        last_q = this_q;
-                        spin_lock_irq(this_q->queue_lock);
-                        spin_lock(&ioc->lock);
-                        continue;
                }
-                ioc_exit_icq(icq);
        }
-        if (last_q) {
+        spin_unlock_irqrestore(&ioc->lock, flags);
-                spin_unlock(last_q->queue_lock);
-                spin_unlock_irq(&ioc->lock);
-                blk_put_queue(last_q);
-        } else {
-                spin_unlock_irq(&ioc->lock);
-        }
        kmem_cache_free(iocontext_cachep, ioc);
 }
@@ -149,79 +123,35 @@ static void ioc_release_fn(struct work_struct *work)
 /**
 * put_io_context - put a reference of io_context
 * @ioc: io_context to put
- * @locked_q: request_queue the caller is holding queue_lock of (hint)
 *
 * Decrement reference count of @ioc and release it if the count reaches
- * zero.  If the caller is holding queue_lock of a queue, it can indicate
+ * zero.
- * that with @locked_q.  This is an optimization hint and the caller is
- * allowed to pass in %NULL even when it's holding a queue_lock.
 */
-void put_io_context(struct io_context *ioc, struct request_queue *locked_q)
+void put_io_context(struct io_context *ioc)
 {
-        struct request_queue *last_q = locked_q;
        unsigned long flags;
+        bool free_ioc = false;
        if (ioc == NULL)
                return;
        BUG_ON(atomic_long_read(&ioc->refcount) <= 0);
-        if (locked_q)
-                lockdep_assert_held(locked_q->queue_lock);
-        if (!atomic_long_dec_and_test(&ioc->refcount))
-                return;
        /*
-         * Destroy @ioc.  This is a bit messy because icq's are chained
+         * Releasing ioc requires reverse order double locking and we may
-         * from both ioc and queue, and ioc->lock nests inside queue_lock.
+         * already be holding a queue_lock.  Do it asynchronously from wq.
-         * The inner ioc->lock should be held to walk our icq_list and then
-         * for each icq the outer matching queue_lock should be grabbed.
-         * ie. We need to do reverse-order double lock dancing.
-         *
-         * Another twist is that we are often called with one of the
-         * matching queue_locks held as indicated by @locked_q, which
-         * prevents performing double-lock dance for other queues.
-         *
-         * So, we do it in two stages.  The fast path uses the queue_lock
-         * the caller is holding and, if other queues need to be accessed,
-         * uses trylock to avoid introducing locking dependency.  This can
-         * handle most cases, especially if @ioc was performing IO on only
-         * single device.
-         *
-         * If trylock doesn't cut it, we defer to @ioc->release_work which
-         * can do all the double-locking dancing.
         */
-        spin_lock_irqsave_nested(&ioc->lock, flags,
+        if (atomic_long_dec_and_test(&ioc->refcount)) {
-                                 ioc_release_depth(locked_q));
+                spin_lock_irqsave(&ioc->lock, flags);
+                if (!hlist_empty(&ioc->icq_list))
-        while (!hlist_empty(&ioc->icq_list)) {
+                        schedule_work(&ioc->release_work);
-                struct io_cq *icq = hlist_entry(ioc->icq_list.first,
+                else
-                                                struct io_cq, ioc_node);
+                        free_ioc = true;
-                struct request_queue *this_q = icq->q;
+                spin_unlock_irqrestore(&ioc->lock, flags);
-                if (this_q != last_q) {
-                        if (last_q && last_q != locked_q)
-                                spin_unlock(last_q->queue_lock);
-                        last_q = NULL;
-                        if (!spin_trylock(this_q->queue_lock))
-                                break;
-                        last_q = this_q;
-                        continue;
-                }
-                ioc_exit_icq(icq);
        }
-        if (last_q && last_q != locked_q)
+        if (free_ioc)
-                spin_unlock(last_q->queue_lock);
-        spin_unlock_irqrestore(&ioc->lock, flags);
-        /* if no icq is left, we're done; otherwise, kick release_work */
-        if (hlist_empty(&ioc->icq_list))
                kmem_cache_free(iocontext_cachep, ioc);
-        else
-                schedule_work(&ioc->release_work);
 }
 EXPORT_SYMBOL(put_io_context);
@@ -229,14 +159,42 @@ EXPORT_SYMBOL(put_io_context);
 void exit_io_context(struct task_struct *task)
 {
        struct io_context *ioc;
+        struct io_cq *icq;
+        struct hlist_node *n;
+        unsigned long flags;
        task_lock(task);
        ioc = task->io_context;
        task->io_context = NULL;
        task_unlock(task);
-        atomic_dec(&ioc->nr_tasks);
+        if (!atomic_dec_and_test(&ioc->nr_tasks)) {
-        put_io_context(ioc, NULL);
+                put_io_context(ioc);
+                return;
+        }
+        /*
+         * Need ioc lock to walk icq_list and q lock to exit icq.  Perform
+         * reverse double locking.  Read comment in ioc_release_fn() for
+         * explanation on the nested locking annotation.
+         */
+retry:
+        spin_lock_irqsave_nested(&ioc->lock, flags, 1);
+        hlist_for_each_entry(icq, n, &ioc->icq_list, ioc_node) {
+                if (icq->flags & ICQ_EXITED)
+                        continue;
+                if (spin_trylock(icq->q->queue_lock)) {
+                        ioc_exit_icq(icq);
+                        spin_unlock(icq->q->queue_lock);
+                } else {
+                        spin_unlock_irqrestore(&ioc->lock, flags);
+                        cpu_relax();
+                        goto retry;
+                }
+        }
+        spin_unlock_irqrestore(&ioc->lock, flags);
+        put_io_context(ioc);
 }
 /**
@@ -255,7 +213,7 @@ void ioc_clear_queue(struct request_queue *q)
                struct io_context *ioc = icq->ioc;
                spin_lock(&ioc->lock);
-                ioc_exit_icq(icq);
+                ioc_destroy_icq(icq);
                spin_unlock(&ioc->lock);
        }
 }
@@ -424,13 +382,13 @@ struct io_cq *ioc_create_icq(struct request_queue *q, gfp_t gfp_mask)
        return icq;
 }
-void ioc_set_changed(struct io_context *ioc, int which)
+void ioc_set_icq_flags(struct io_context *ioc, unsigned int flags)
 {
        struct io_cq *icq;
        struct hlist_node *n;
        hlist_for_each_entry(icq, n, &ioc->icq_list, ioc_node)
-                set_bit(which, &icq->changed);
+                icq->flags |= flags;
 }
 /**
@@ -448,7 +406,7 @@ void ioc_ioprio_changed(struct io_context *ioc, int ioprio)
        spin_lock_irqsave(&ioc->lock, flags);
        ioc->ioprio = ioprio;
-        ioc_set_changed(ioc, ICQ_IOPRIO_CHANGED);
+        ioc_set_icq_flags(ioc, ICQ_IOPRIO_CHANGED);
        spin_unlock_irqrestore(&ioc->lock, flags);
 }
@@ -465,11 +423,33 @@ void ioc_cgroup_changed(struct io_context *ioc)
        unsigned long flags;
        spin_lock_irqsave(&ioc->lock, flags);
-        ioc_set_changed(ioc, ICQ_CGROUP_CHANGED);
+        ioc_set_icq_flags(ioc, ICQ_CGROUP_CHANGED);
        spin_unlock_irqrestore(&ioc->lock, flags);
 }
 EXPORT_SYMBOL(ioc_cgroup_changed);
+/**
+ * icq_get_changed - fetch and clear icq changed mask
+ * @icq: icq of interest
+ *
+ * Fetch and clear ICQ_*_CHANGED bits from @icq.  Grabs and releases
+ * @icq->ioc->lock.
+ */
+unsigned icq_get_changed(struct io_cq *icq)
+{
+        unsigned int changed = 0;
+        unsigned long flags;
+        if (unlikely(icq->flags & ICQ_CHANGED_MASK)) {
+                spin_lock_irqsave(&icq->ioc->lock, flags);
+                changed = icq->flags & ICQ_CHANGED_MASK;
+                icq->flags &= ~ICQ_CHANGED_MASK;
+                spin_unlock_irqrestore(&icq->ioc->lock, flags);
+        }
+        return changed;
+}
+EXPORT_SYMBOL(icq_get_changed);
 static int __init blk_ioc_init(void)
 {
        iocontext_cachep = kmem_cache_create("blkdev_ioc",
diff --git a/block/blk-merge.c b/block/blk-merge.c
index cfcc37cb222b..160035f54882 100644
--- a/block/blk-merge.c
+++ b/block/blk-merge.c
@@ -471,3 +471,40 @@ int blk_attempt_req_merge(struct request_queue *q, struct request *rq,
 {
        return attempt_merge(q, rq, next);
 }
+bool blk_rq_merge_ok(struct request *rq, struct bio *bio)
+{
+        if (!rq_mergeable(rq))
+                return false;
+        /* don't merge file system requests and discard requests */
+        if ((bio->bi_rw & REQ_DISCARD) != (rq->bio->bi_rw & REQ_DISCARD))
+                return false;
+        /* don't merge discard requests and secure discard requests */
+        if ((bio->bi_rw & REQ_SECURE) != (rq->bio->bi_rw & REQ_SECURE))
+                return false;
+        /* different data direction or already started, don't merge */
+        if (bio_data_dir(bio) != rq_data_dir(rq))
+                return false;
+        /* must be same device and not a special request */
+        if (rq->rq_disk != bio->bi_bdev->bd_disk || rq->special)
+                return false;
+        /* only merge integrity protected bio into ditto rq */
+        if (bio_integrity(bio) != blk_integrity_rq(rq))
+                return false;
+        return true;
+}
+int blk_try_merge(struct request *rq, struct bio *bio)
+{
+        if (blk_rq_pos(rq) + blk_rq_sectors(rq) == bio->bi_sector)
+                return ELEVATOR_BACK_MERGE;
+        else if (blk_rq_pos(rq) - bio_sectors(bio) == bio->bi_sector)
+                return ELEVATOR_FRONT_MERGE;
+        return ELEVATOR_NO_MERGE;
+}
diff --git a/block/blk-softirq.c b/block/blk-softirq.c
index 1366a89d8e66..467c8de88642 100644
--- a/block/blk-softirq.c
+++ b/block/blk-softirq.c
@@ -8,6 +8,7 @@
 #include <linux/blkdev.h>
 #include <linux/interrupt.h>
 #include <linux/cpu.h>
+#include <linux/sched.h>
 #include "blk.h"
@@ -103,9 +104,10 @@ static struct notifier_block __cpuinitdata blk_cpu_notifier = {
 void __blk_complete_request(struct request *req)
 {
-        int ccpu, cpu, group_cpu = NR_CPUS;
+        int ccpu, cpu;
        struct request_queue *q = req->q;
        unsigned long flags;
+        bool shared = false;
        BUG_ON(!q->softirq_done_fn);
@@ -117,22 +119,20 @@ void __blk_complete_request(struct request *req)
         */
        if (req->cpu != -1) {
                ccpu = req->cpu;
-                if (!test_bit(QUEUE_FLAG_SAME_FORCE, &q->queue_flags)) {
+                if (!test_bit(QUEUE_FLAG_SAME_FORCE, &q->queue_flags))
-                        ccpu = blk_cpu_to_group(ccpu);
+                        shared = cpus_share_cache(cpu, ccpu);
-                        group_cpu = blk_cpu_to_group(cpu);
-                }
        } else
                ccpu = cpu;
        /*
-         * If current CPU and requested CPU are in the same group, running
+         * If current CPU and requested CPU share a cache, run the softirq on
-         * softirq in current CPU. One might concern this is just like
+         * the current CPU. One might concern this is just like
         * QUEUE_FLAG_SAME_FORCE, but actually not. blk_complete_request() is
         * running in interrupt handler, and currently I/O controller doesn't
         * support multiple interrupts, so current CPU is unique actually. This
         * avoids IPI sending from current CPU to the first CPU of a group.
         */
-        if (ccpu == cpu || ccpu == group_cpu) {
+        if (ccpu == cpu || shared) {
                struct list_head *list;
 do_local:
                list = &__get_cpu_var(blk_cpu_done);
diff --git a/block/blk.h b/block/blk.h
index 7efd772336de..d45be871329e 100644
--- a/block/blk.h
+++ b/block/blk.h
@@ -137,6 +137,8 @@ int blk_attempt_req_merge(struct request_queue *q, struct request *rq,
                                struct request *next);
 void blk_recalc_rq_segments(struct request *rq);
 void blk_rq_set_mixed_merge(struct request *rq);
+bool blk_rq_merge_ok(struct request *rq, struct bio *bio);
+int blk_try_merge(struct request *rq, struct bio *bio);
 void blk_queue_congestion_threshold(struct request_queue *q);
@@ -164,22 +166,6 @@ static inline int queue_congestion_off_threshold(struct request_queue *q)
        return q->nr_congestion_off;
 }
-static inline int blk_cpu_to_group(int cpu)
-{
-        int group = NR_CPUS;
-#ifdef CONFIG_SCHED_MC
-        const struct cpumask *mask = cpu_coregroup_mask(cpu);
-        group = cpumask_first(mask);
-#elif defined(CONFIG_SCHED_SMT)
-        group = cpumask_first(topology_thread_cpumask(cpu));
-#else
-        return cpu;
-#endif
-        if (likely(group < NR_CPUS))
-                return group;
-        return cpu;
-}
 /*
 * Contribute to IO statistics IFF:
 *
diff --git a/block/bsg.c b/block/bsg.c
index 4cf703fd98bb..ff64ae3bacee 100644
--- a/block/bsg.c
+++ b/block/bsg.c
@@ -983,7 +983,8 @@ void bsg_unregister_queue(struct request_queue *q)
        mutex_lock(&bsg_mutex);
        idr_remove(&bsg_minor_idr, bcd->minor);
-        sysfs_remove_link(&q->kobj, "bsg");
+        if (q->kobj.sd)
+                sysfs_remove_link(&q->kobj, "bsg");
        device_unregister(bcd->class_dev);
        bcd->class_dev = NULL;
        kref_put(&bcd->ref, bsg_kref_release_function);
diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c
index ee55019066a1..457295253566 100644
--- a/block/cfq-iosched.c
+++ b/block/cfq-iosched.c
@@ -1699,18 +1699,11 @@ static int cfq_allow_merge(struct request_queue *q, struct request *rq,
        /*
         * Lookup the cfqq that this bio will be queued with and allow
-         * merge only if rq is queued there.  This function can be called
+         * merge only if rq is queued there.
-         * from plug merge without queue_lock.  In such cases, ioc of @rq
-         * and %current are guaranteed to be equal.  Avoid lookup which
-         * requires queue_lock by using @rq's cic.
         */
-        if (current->io_context == RQ_CIC(rq)->icq.ioc) {
+        cic = cfq_cic_lookup(cfqd, current->io_context);
-                cic = RQ_CIC(rq);
+        if (!cic)
-        } else {
+                return false;
-                cic = cfq_cic_lookup(cfqd, current->io_context);
-                if (!cic)
-                        return false;
-        }
        cfqq = cic_to_cfqq(cic, cfq_bio_sync(bio));
        return cfqq == RQ_CFQQ(rq);
@@ -1794,7 +1787,7 @@ __cfq_slice_expired(struct cfq_data *cfqd, struct cfq_queue *cfqq,
                cfqd->active_queue = NULL;
        if (cfqd->active_cic) {
-                put_io_context(cfqd->active_cic->icq.ioc, cfqd->queue);
+                put_io_context(cfqd->active_cic->icq.ioc);
                cfqd->active_cic = NULL;
        }
 }
@@ -3117,17 +3110,18 @@ cfq_should_preempt(struct cfq_data *cfqd, struct cfq_queue *new_cfqq,
 */
 static void cfq_preempt_queue(struct cfq_data *cfqd, struct cfq_queue *cfqq)
 {
+        enum wl_type_t old_type = cfqq_type(cfqd->active_queue);
        cfq_log_cfqq(cfqd, cfqq, "preempt");
+        cfq_slice_expired(cfqd, 1);
        /*
         * workload type is changed, don't save slice, otherwise preempt
         * doesn't happen
         */
-        if (cfqq_type(cfqd->active_queue) != cfqq_type(cfqq))
+        if (old_type != cfqq_type(cfqq))
                cfqq->cfqg->saved_workload_slice = 0;
-        cfq_slice_expired(cfqd, 1);
        /*
         * Put the new queue at the front of the of the current list,
         * so we know that it will be selected next.
@@ -3476,20 +3470,20 @@ cfq_set_request(struct request_queue *q, struct request *rq, gfp_t gfp_mask)
        const int rw = rq_data_dir(rq);
        const bool is_sync = rq_is_sync(rq);
        struct cfq_queue *cfqq;
+        unsigned int changed;
        might_sleep_if(gfp_mask & __GFP_WAIT);
        spin_lock_irq(q->queue_lock);
        /* handle changed notifications */
-        if (unlikely(cic->icq.changed)) {
+        changed = icq_get_changed(&cic->icq);
-                if (test_and_clear_bit(ICQ_IOPRIO_CHANGED, &cic->icq.changed))
+        if (unlikely(changed & ICQ_IOPRIO_CHANGED))
-                        changed_ioprio(cic);
+                changed_ioprio(cic);
 #ifdef CONFIG_CFQ_GROUP_IOSCHED
-                if (test_and_clear_bit(ICQ_CGROUP_CHANGED, &cic->icq.changed))
+        if (unlikely(changed & ICQ_CGROUP_CHANGED))
-                        changed_cgroup(cic);
+                changed_cgroup(cic);
 #endif
-        }
 new_queue:
        cfqq = cic_to_cfqq(cic, is_sync);
diff --git a/block/elevator.c b/block/elevator.c
index 91e18f8af9be..f016855a46b0 100644
--- a/block/elevator.c
+++ b/block/elevator.c
@@ -70,39 +70,9 @@ static int elv_iosched_allow_merge(struct request *rq, struct bio *bio)
 /*
 * can we safely merge with this request?
 */
-int elv_rq_merge_ok(struct request *rq, struct bio *bio)
+bool elv_rq_merge_ok(struct request *rq, struct bio *bio)
 {
-        if (!rq_mergeable(rq))
+        if (!blk_rq_merge_ok(rq, bio))
-                return 0;
-        /*
-         * Don't merge file system requests and discard requests
-         */
-        if ((bio->bi_rw & REQ_DISCARD) != (rq->bio->bi_rw & REQ_DISCARD))
-                return 0;
-        /*
-         * Don't merge discard requests and secure discard requests
-         */
-        if ((bio->bi_rw & REQ_SECURE) != (rq->bio->bi_rw & REQ_SECURE))
-                return 0;
-        /*
-         * different data direction or already started, don't merge
-         */
-        if (bio_data_dir(bio) != rq_data_dir(rq))
-                return 0;
-        /*
-         * must be same device and not a special request
-         */
-        if (rq->rq_disk != bio->bi_bdev->bd_disk || rq->special)
-                return 0;
-        /*
-         * only merge integrity protected bio into ditto rq
-         */
-        if (bio_integrity(bio) != blk_integrity_rq(rq))
                return 0;
        if (!elv_iosched_allow_merge(rq, bio))
@@ -112,23 +82,6 @@ int elv_rq_merge_ok(struct request *rq, struct bio *bio)
 }
 EXPORT_SYMBOL(elv_rq_merge_ok);
-int elv_try_merge(struct request *__rq, struct bio *bio)
-{
-        int ret = ELEVATOR_NO_MERGE;
-        /*
-         * we can merge and sequence is ok, check if it's possible
-         */
-        if (elv_rq_merge_ok(__rq, bio)) {
-                if (blk_rq_pos(__rq) + blk_rq_sectors(__rq) == bio->bi_sector)
-                        ret = ELEVATOR_BACK_MERGE;
-                else if (blk_rq_pos(__rq) - bio_sectors(bio) == bio->bi_sector)
-                        ret = ELEVATOR_FRONT_MERGE;
-        }
-        return ret;
-}
 static struct elevator_type *elevator_find(const char *name)
 {
        struct elevator_type *e;
@@ -478,8 +431,8 @@ int elv_merge(struct request_queue *q, struct request **req, struct bio *bio)
        /*
         * First try one-hit cache.
         */
-        if (q->last_merge) {
+        if (q->last_merge && elv_rq_merge_ok(q->last_merge, bio)) {
-                ret = elv_try_merge(q->last_merge, bio);
+                ret = blk_try_merge(q->last_merge, bio);
                if (ret != ELEVATOR_NO_MERGE) {
                        *req = q->last_merge;
                        return ret;
diff --git a/block/genhd.c b/block/genhd.c
index 23b4f7063322..df9816ede75b 100644
--- a/block/genhd.c
+++ b/block/genhd.c
@@ -35,6 +35,7 @@ static DEFINE_IDR(ext_devt_idr);
 static struct device_type disk_type;
+static void disk_alloc_events(struct gendisk *disk);
 static void disk_add_events(struct gendisk *disk);
 static void disk_del_events(struct gendisk *disk);
 static void disk_release_events(struct gendisk *disk);
@@ -601,6 +602,8 @@ void add_disk(struct gendisk *disk)
        disk->major = MAJOR(devt);
        disk->first_minor = MINOR(devt);
+        disk_alloc_events(disk);
        /* Register BDI before referencing it from bdev */
        bdi = &disk->queue->backing_dev_info;
        bdi_register_dev(bdi, disk_devt(disk));
@@ -1475,9 +1478,9 @@ static void __disk_unblock_events(struct gendisk *disk, bool check_now)
        intv = disk_events_poll_jiffies(disk);
        set_timer_slack(&ev->dwork.timer, intv / 4);
        if (check_now)
-                queue_delayed_work(system_nrt_wq, &ev->dwork, 0);
+                queue_delayed_work(system_nrt_freezable_wq, &ev->dwork, 0);
        else if (intv)
-                queue_delayed_work(system_nrt_wq, &ev->dwork, intv);
+                queue_delayed_work(system_nrt_freezable_wq, &ev->dwork, intv);
 out_unlock:
        spin_unlock_irqrestore(&ev->lock, flags);
 }
@@ -1521,7 +1524,7 @@ void disk_flush_events(struct gendisk *disk, unsigned int mask)
        ev->clearing |= mask;
        if (!ev->block) {
                cancel_delayed_work(&ev->dwork);
-                queue_delayed_work(system_nrt_wq, &ev->dwork, 0);
+                queue_delayed_work(system_nrt_freezable_wq, &ev->dwork, 0);
        }
        spin_unlock_irq(&ev->lock);
 }
@@ -1558,7 +1561,7 @@ unsigned int disk_clear_events(struct gendisk *disk, unsigned int mask)
        /* uncondtionally schedule event check and wait for it to finish */
        disk_block_events(disk);
-        queue_delayed_work(system_nrt_wq, &ev->dwork, 0);
+        queue_delayed_work(system_nrt_freezable_wq, &ev->dwork, 0);
        flush_delayed_work(&ev->dwork);
        __disk_unblock_events(disk, false);
@@ -1595,7 +1598,7 @@ static void disk_events_workfn(struct work_struct *work)
        intv = disk_events_poll_jiffies(disk);
        if (!ev->block && intv)
-                queue_delayed_work(system_nrt_wq, &ev->dwork, intv);
+                queue_delayed_work(system_nrt_freezable_wq, &ev->dwork, intv);
        spin_unlock_irq(&ev->lock);
@@ -1733,9 +1736,9 @@ module_param_cb(events_dfl_poll_msecs, &disk_events_dfl_poll_msecs_param_ops,
                &disk_events_dfl_poll_msecs, 0644);
 /*
- * disk_{add|del|release}_events - initialize and destroy disk_events.
+ * disk_{alloc|add|del|release}_events - initialize and destroy disk_events.
 */
-static void disk_add_events(struct gendisk *disk)
+static void disk_alloc_events(struct gendisk *disk)
 {
        struct disk_events *ev;
@@ -1748,16 +1751,6 @@ static void disk_add_events(struct gendisk *disk)
                return;
        }
-        if (sysfs_create_files(&disk_to_dev(disk)->kobj,
-                               disk_events_attrs) < 0) {
-                pr_warn("%s: failed to create sysfs files for events\n",
-                        disk->disk_name);
-                kfree(ev);
-                return;
-        }
-        disk->ev = ev;
        INIT_LIST_HEAD(&ev->node);
        ev->disk = disk;
        spin_lock_init(&ev->lock);
@@ -1766,8 +1759,21 @@ static void disk_add_events(struct gendisk *disk)
        ev->poll_msecs = -1;
        INIT_DELAYED_WORK(&ev->dwork, disk_events_workfn);
+        disk->ev = ev;
+}
+static void disk_add_events(struct gendisk *disk)
+{
+        if (!disk->ev)
+                return;
+        /* FIXME: error handling */
+        if (sysfs_create_files(&disk_to_dev(disk)->kobj, disk_events_attrs) < 0)
+                pr_warn("%s: failed to create sysfs files for events\n",
+                        disk->disk_name);
        mutex_lock(&disk_events_mutex);
-        list_add_tail(&ev->node, &disk_events);
+        list_add_tail(&disk->ev->node, &disk_events);
        mutex_unlock(&disk_events_mutex);
        /*
diff --git a/block/partition-generic.c b/block/partition-generic.c
index d06ec1c829c2..6df5d6928a44 100644
--- a/block/partition-generic.c
+++ b/block/partition-generic.c
@@ -389,17 +389,11 @@ static bool disk_unlock_native_capacity(struct gendisk *disk)
        }
 }
-int rescan_partitions(struct gendisk *disk, struct block_device *bdev)
+static int drop_partitions(struct gendisk *disk, struct block_device *bdev)
 {
-        struct parsed_partitions *state = NULL;
        struct disk_part_iter piter;
        struct hd_struct *part;
-        int p, highest, res;
+        int res;
-rescan:
-        if (state && !IS_ERR(state)) {
-                kfree(state);
-                state = NULL;
-        }
        if (bdev->bd_part_count)
                return -EBUSY;
@@ -412,6 +406,24 @@ rescan:
                delete_partition(disk, part->partno);
        disk_part_iter_exit(&piter);
+        return 0;
+}
+int rescan_partitions(struct gendisk *disk, struct block_device *bdev)
+{
+        struct parsed_partitions *state = NULL;
+        struct hd_struct *part;
+        int p, highest, res;
+rescan:
+        if (state && !IS_ERR(state)) {
+                kfree(state);
+                state = NULL;
+        }
+        res = drop_partitions(disk, bdev);
+        if (res)
+                return res;
        if (disk->fops->revalidate_disk)
                disk->fops->revalidate_disk(disk);
        check_disk_size_change(disk, bdev);
@@ -515,6 +527,26 @@ rescan:
        return 0;
 }
+int invalidate_partitions(struct gendisk *disk, struct block_device *bdev)
+{
+        int res;
+        if (!bdev->bd_invalidated)
+                return 0;
+        res = drop_partitions(disk, bdev);
+        if (res)
+                return res;
+        set_capacity(disk, 0);
+        check_disk_size_change(disk, bdev);
+        bdev->bd_invalidated = 0;
+        /* tell userspace that the media / partition table may have changed */
+        kobject_uevent(&disk_to_dev(disk)->kobj, KOBJ_CHANGE);
+        return 0;
+}
 unsigned char *read_dev_sector(struct block_device *bdev, sector_t n, Sector *p)
 {
        struct address_space *mapping = bdev->bd_inode->i_mapping;
diff --git a/block/partitions/ldm.c b/block/partitions/ldm.c
index bd8ae788f689..e507cfbd044e 100644
--- a/block/partitions/ldm.c
+++ b/block/partitions/ldm.c
@@ -2,7 +2,7 @@
 * ldm - Support for Windows Logical Disk Manager (Dynamic Disks)
 *
 * Copyright (C) 2001,2002 Richard Russon <ldm@flatcap.org>
- * Copyright (c) 2001-2007 Anton Altaparmakov
+ * Copyright (c) 2001-2012 Anton Altaparmakov
 * Copyright (C) 2001,2002 Jakob Kemi <jakob.kemi@telia.com>
 *
 * Documentation is available at http://www.linux-ntfs.org/doku.php?id=downloads 
@@ -1341,20 +1341,17 @@ found:
                ldm_error("REC value (%d) exceeds NUM value (%d)", rec, f->num);
                return false;
        }
        if (f->map & (1 << rec)) {
                ldm_error ("Duplicate VBLK, part %d.", rec);
                f->map &= 0x7F;                 /* Mark the group as broken */
                return false;
        }
        f->map |= (1 << rec);
+        if (!rec)
+                memcpy(f->data, data, VBLK_SIZE_HEAD);
        data += VBLK_SIZE_HEAD;
        size -= VBLK_SIZE_HEAD;
+        memcpy(f->data + VBLK_SIZE_HEAD + rec * size, data, size);
-        memcpy (f->data+rec*(size-VBLK_SIZE_HEAD)+VBLK_SIZE_HEAD, data, size);
        return true;
 }