12 files changed, 258 insertions, 153 deletions
diff --git a/block/as-iosched.c b/block/as-iosched.c
index 7a12cf6ee1d3..ce8ba57c6557 100644
--- a/block/as-iosched.c
+++ b/block/as-iosched.c
@@ -146,7 +146,7 @@ enum arq_state {
 #define RQ_STATE(rq)    ((enum arq_state)(rq)->elevator_private2)
 #define RQ_SET_STATE(rq, state) ((rq)->elevator_private2 = (void *) state)
-static DEFINE_PER_CPU(unsigned long, ioc_count);
+static DEFINE_PER_CPU(unsigned long, as_ioc_count);
 static struct completion *ioc_gone;
 static DEFINE_SPINLOCK(ioc_gone_lock);
@@ -161,7 +161,7 @@ static void as_antic_stop(struct as_data *ad);
 static void free_as_io_context(struct as_io_context *aic)
 {
        kfree(aic);
-        elv_ioc_count_dec(ioc_count);
+        elv_ioc_count_dec(as_ioc_count);
        if (ioc_gone) {
                /*
                 * AS scheduler is exiting, grab exit lock and check
@@ -169,7 +169,7 @@ static void free_as_io_context(struct as_io_context *aic)
                 * complete ioc_gone and set it back to NULL.
                 */
                spin_lock(&ioc_gone_lock);
-                if (ioc_gone && !elv_ioc_count_read(ioc_count)) {
+                if (ioc_gone && !elv_ioc_count_read(as_ioc_count)) {
                        complete(ioc_gone);
                        ioc_gone = NULL;
                }
@@ -211,7 +211,7 @@ static struct as_io_context *alloc_as_io_context(void)
                ret->seek_total = 0;
                ret->seek_samples = 0;
                ret->seek_mean = 0;
-                elv_ioc_count_inc(ioc_count);
+                elv_ioc_count_inc(as_ioc_count);
        }
        return ret;
@@ -1507,7 +1507,7 @@ static void __exit as_exit(void)
        ioc_gone = &all_gone;
        /* ioc_gone's update must be visible before reading ioc_count */
        smp_wmb();
-        if (elv_ioc_count_read(ioc_count))
+        if (elv_ioc_count_read(as_ioc_count))
                wait_for_completion(&all_gone);
        synchronize_rcu();
 }
diff --git a/block/blk-barrier.c b/block/blk-barrier.c
index 6593ab39cfe9..8873b9b439ff 100644
--- a/block/blk-barrier.c
+++ b/block/blk-barrier.c
@@ -350,6 +350,7 @@ static void blkdev_discard_end_io(struct bio *bio, int err)
        if (bio->bi_private)
                complete(bio->bi_private);
+        __free_page(bio_page(bio));
        bio_put(bio);
 }
@@ -372,30 +373,50 @@ int blkdev_issue_discard(struct block_device *bdev, sector_t sector,
        struct request_queue *q = bdev_get_queue(bdev);
        int type = flags & DISCARD_FL_BARRIER ?
                DISCARD_BARRIER : DISCARD_NOBARRIER;
+        struct bio *bio;
+        struct page *page;
        int ret = 0;
        if (!q)
                return -ENXIO;
-        if (!q->prepare_discard_fn)
+        if (!blk_queue_discard(q))
                return -EOPNOTSUPP;
        while (nr_sects && !ret) {
-                struct bio *bio = bio_alloc(gfp_mask, 0);
+                unsigned int sector_size = q->limits.logical_block_size;
-                if (!bio)
+                unsigned int max_discard_sectors =
-                        return -ENOMEM;
+                        min(q->limits.max_discard_sectors, UINT_MAX >> 9);
+                bio = bio_alloc(gfp_mask, 1);
+                if (!bio)
+                        goto out;
+                bio->bi_sector = sector;
                bio->bi_end_io = blkdev_discard_end_io;
                bio->bi_bdev = bdev;
                if (flags & DISCARD_FL_WAIT)
                        bio->bi_private = &wait;
-                bio->bi_sector = sector;
+                /*
+                 * Add a zeroed one-sector payload as that's what
+                 * our current implementations need.  If we'll ever need
+                 * more the interface will need revisiting.
+                 */
+                page = alloc_page(GFP_KERNEL | __GFP_ZERO);
+                if (!page)
+                        goto out_free_bio;
+                if (bio_add_pc_page(q, bio, page, sector_size, 0) < sector_size)
+                        goto out_free_page;
-                if (nr_sects > queue_max_hw_sectors(q)) {
+                /*
-                        bio->bi_size = queue_max_hw_sectors(q) << 9;
+                 * And override the bio size - the way discard works we
-                        nr_sects -= queue_max_hw_sectors(q);
+                 * touch many more blocks on disk than the actual payload
-                        sector += queue_max_hw_sectors(q);
+                 * length.
+                 */
+                if (nr_sects > max_discard_sectors) {
+                        bio->bi_size = max_discard_sectors << 9;
+                        nr_sects -= max_discard_sectors;
+                        sector += max_discard_sectors;
                } else {
                        bio->bi_size = nr_sects << 9;
                        nr_sects = 0;
@@ -414,5 +435,11 @@ int blkdev_issue_discard(struct block_device *bdev, sector_t sector,
                bio_put(bio);
        }
        return ret;
+out_free_page:
+        __free_page(page);
+out_free_bio:
+        bio_put(bio);
+out:
+        return -ENOMEM;
 }
 EXPORT_SYMBOL(blkdev_issue_discard);
diff --git a/block/blk-core.c b/block/blk-core.c
index 8135228e4b29..ac0fa10f8fa5 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -34,6 +34,7 @@
 #include "blk.h"
 EXPORT_TRACEPOINT_SYMBOL_GPL(block_remap);
+EXPORT_TRACEPOINT_SYMBOL_GPL(block_rq_remap);
 EXPORT_TRACEPOINT_SYMBOL_GPL(block_bio_complete);
 static int __make_request(struct request_queue *q, struct bio *bio);
@@ -1029,7 +1030,7 @@ static void part_round_stats_single(int cpu, struct hd_struct *part,
        if (now == part->stamp)
                return;
-        if (part->in_flight) {
+        if (part_in_flight(part)) {
                __part_stat_add(cpu, part, time_in_queue,
                                part_in_flight(part) * (now - part->stamp));
                __part_stat_add(cpu, part, io_ticks, (now - part->stamp));
@@ -1124,7 +1125,6 @@ void init_request_from_bio(struct request *req, struct bio *bio)
                req->cmd_flags |= REQ_DISCARD;
                if (bio_rw_flagged(bio, BIO_RW_BARRIER))
                        req->cmd_flags |= REQ_SOFTBARRIER;
-                req->q->prepare_discard_fn(req->q, req);
        } else if (unlikely(bio_rw_flagged(bio, BIO_RW_BARRIER)))
                req->cmd_flags |= REQ_HARDBARRIER;
@@ -1437,7 +1437,8 @@ static inline void __generic_make_request(struct bio *bio)
                        goto end_io;
                }
-                if (unlikely(nr_sectors > queue_max_hw_sectors(q))) {
+                if (unlikely(!bio_rw_flagged(bio, BIO_RW_DISCARD) &&
+                             nr_sectors > queue_max_hw_sectors(q))) {
                        printk(KERN_ERR "bio too big device %s (%u > %u)\n",
                               bdevname(bio->bi_bdev, b),
                               bio_sectors(bio),
@@ -1470,7 +1471,7 @@ static inline void __generic_make_request(struct bio *bio)
                        goto end_io;
                if (bio_rw_flagged(bio, BIO_RW_DISCARD) &&
-                    !q->prepare_discard_fn) {
+                    !blk_queue_discard(q)) {
                        err = -EOPNOTSUPP;
                        goto end_io;
                }
diff --git a/block/blk-settings.c b/block/blk-settings.c
index 83413ff83739..66d4aa8799b7 100644
--- a/block/blk-settings.c
+++ b/block/blk-settings.c
@@ -34,23 +34,6 @@ void blk_queue_prep_rq(struct request_queue *q, prep_rq_fn *pfn)
 EXPORT_SYMBOL(blk_queue_prep_rq);
 /**
- * blk_queue_set_discard - set a discard_sectors function for queue
- * @q:          queue
- * @dfn:        prepare_discard function
- *
- * It's possible for a queue to register a discard callback which is used
- * to transform a discard request into the appropriate type for the
- * hardware. If none is registered, then discard requests are failed
- * with %EOPNOTSUPP.
- *
- */
-void blk_queue_set_discard(struct request_queue *q, prepare_discard_fn *dfn)
-{
-        q->prepare_discard_fn = dfn;
-}
-EXPORT_SYMBOL(blk_queue_set_discard);
-/**
 * blk_queue_merge_bvec - set a merge_bvec function for queue
 * @q:          queue
 * @mbfn:       merge_bvec_fn
@@ -111,7 +94,9 @@ void blk_set_default_limits(struct queue_limits *lim)
        lim->max_hw_segments = MAX_HW_SEGMENTS;
        lim->seg_boundary_mask = BLK_SEG_BOUNDARY_MASK;
        lim->max_segment_size = MAX_SEGMENT_SIZE;
-        lim->max_sectors = lim->max_hw_sectors = SAFE_MAX_SECTORS;
+        lim->max_sectors = BLK_DEF_MAX_SECTORS;
+        lim->max_hw_sectors = INT_MAX;
+        lim->max_discard_sectors = SAFE_MAX_SECTORS;
        lim->logical_block_size = lim->physical_block_size = lim->io_min = 512;
        lim->bounce_pfn = (unsigned long)(BLK_BOUNCE_ANY >> PAGE_SHIFT);
        lim->alignment_offset = 0;
@@ -164,6 +149,7 @@ void blk_queue_make_request(struct request_queue *q, make_request_fn *mfn)
        q->unplug_timer.data = (unsigned long)q;
        blk_set_default_limits(&q->limits);
+        blk_queue_max_sectors(q, SAFE_MAX_SECTORS);
        /*
         * If the caller didn't supply a lock, fall back to our embedded
@@ -254,6 +240,18 @@ void blk_queue_max_hw_sectors(struct request_queue *q, unsigned int max_sectors)
 EXPORT_SYMBOL(blk_queue_max_hw_sectors);
 /**
+ * blk_queue_max_discard_sectors - set max sectors for a single discard
+ * @q:  the request queue for the device
+ * @max_discard_sectors: maximum number of sectors to discard
+ **/
+void blk_queue_max_discard_sectors(struct request_queue *q,
+                unsigned int max_discard_sectors)
+{
+        q->limits.max_discard_sectors = max_discard_sectors;
+}
+EXPORT_SYMBOL(blk_queue_max_discard_sectors);
+/**
 * blk_queue_max_phys_segments - set max phys segments for a request for this queue
 * @q:  the request queue for the device
 * @max_segments:  max number of segments
diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c
index b78c9c3e2670..8a6d81afb284 100644
--- a/block/blk-sysfs.c
+++ b/block/blk-sysfs.c
@@ -452,6 +452,7 @@ int blk_register_queue(struct gendisk *disk)
        if (ret) {
                kobject_uevent(&q->kobj, KOBJ_REMOVE);
                kobject_del(&q->kobj);
+                blk_trace_remove_sysfs(disk_to_dev(disk));
                return ret;
        }
@@ -465,11 +466,11 @@ void blk_unregister_queue(struct gendisk *disk)
        if (WARN_ON(!q))
                return;
-        if (q->request_fn) {
+        if (q->request_fn)
                elv_unregister_queue(q);
-                kobject_uevent(&q->kobj, KOBJ_REMOVE);
+        kobject_uevent(&q->kobj, KOBJ_REMOVE);
-                kobject_del(&q->kobj);
+        kobject_del(&q->kobj);
-                kobject_put(&disk_to_dev(disk)->kobj);
+        blk_trace_remove_sysfs(disk_to_dev(disk));
-        }
+        kobject_put(&disk_to_dev(disk)->kobj);
 }
diff --git a/block/blk-tag.c b/block/blk-tag.c
index 2e5cfeb59333..6b0f52c20964 100644
--- a/block/blk-tag.c
+++ b/block/blk-tag.c
@@ -359,7 +359,7 @@ int blk_queue_start_tag(struct request_queue *q, struct request *rq)
                max_depth -= 2;
                if (!max_depth)
                        max_depth = 1;
-                if (q->in_flight[0] > max_depth)
+                if (q->in_flight[BLK_RW_ASYNC] > max_depth)
                        return 1;
        }
diff --git a/block/bsg.c b/block/bsg.c
index 5f184bb3ff9e..0676301f16d0 100644
--- a/block/bsg.c
+++ b/block/bsg.c
@@ -1062,7 +1062,7 @@ EXPORT_SYMBOL_GPL(bsg_register_queue);
 static struct cdev bsg_cdev;
-static char *bsg_nodename(struct device *dev)
+static char *bsg_devnode(struct device *dev, mode_t *mode)
 {
        return kasprintf(GFP_KERNEL, "bsg/%s", dev_name(dev));
 }
@@ -1087,7 +1087,7 @@ static int __init bsg_init(void)
                ret = PTR_ERR(bsg_class);
                goto destroy_kmemcache;
        }
-        bsg_class->nodename = bsg_nodename;
+        bsg_class->devnode = bsg_devnode;
        ret = alloc_chrdev_region(&devid, 0, BSG_MAX_DEVS, "bsg");
        if (ret)
diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c
index 0e3814b662af..069a61017c02 100644
--- a/block/cfq-iosched.c
+++ b/block/cfq-iosched.c
@@ -48,7 +48,7 @@ static int cfq_slice_idle = HZ / 125;
 static struct kmem_cache *cfq_pool;
 static struct kmem_cache *cfq_ioc_pool;
-static DEFINE_PER_CPU(unsigned long, ioc_count);
+static DEFINE_PER_CPU(unsigned long, cfq_ioc_count);
 static struct completion *ioc_gone;
 static DEFINE_SPINLOCK(ioc_gone_lock);
@@ -173,6 +173,7 @@ struct cfq_data {
        unsigned int cfq_slice[2];
        unsigned int cfq_slice_async_rq;
        unsigned int cfq_slice_idle;
+        unsigned int cfq_latency;
        struct list_head cic_list;
@@ -180,6 +181,8 @@ struct cfq_data {
         * Fallback dummy cfqq for extreme OOM conditions
         */
        struct cfq_queue oom_cfqq;
+        unsigned long last_end_sync_rq;
 };
 enum cfqq_state_flags {
@@ -227,7 +230,7 @@ CFQ_CFQQ_FNS(coop);
        blk_add_trace_msg((cfqd)->queue, "cfq " fmt, ##args)
 static void cfq_dispatch_insert(struct request_queue *, struct request *);
-static struct cfq_queue *cfq_get_queue(struct cfq_data *, int,
+static struct cfq_queue *cfq_get_queue(struct cfq_data *, bool,
                                       struct io_context *, gfp_t);
 static struct cfq_io_context *cfq_cic_lookup(struct cfq_data *,
                                                struct io_context *);
@@ -238,27 +241,24 @@ static inline int rq_in_driver(struct cfq_data *cfqd)
 }
 static inline struct cfq_queue *cic_to_cfqq(struct cfq_io_context *cic,
-                                            int is_sync)
+                                            bool is_sync)
 {
-        return cic->cfqq[!!is_sync];
+        return cic->cfqq[is_sync];
 }
 static inline void cic_set_cfqq(struct cfq_io_context *cic,
-                                struct cfq_queue *cfqq, int is_sync)
+                                struct cfq_queue *cfqq, bool is_sync)
 {
-        cic->cfqq[!!is_sync] = cfqq;
+        cic->cfqq[is_sync] = cfqq;
 }
 /*
 * We regard a request as SYNC, if it's either a read or has the SYNC bit
 * set (in which case it could also be direct WRITE).
 */
-static inline int cfq_bio_sync(struct bio *bio)
+static inline bool cfq_bio_sync(struct bio *bio)
 {
-        if (bio_data_dir(bio) == READ || bio_rw_flagged(bio, BIO_RW_SYNCIO))
+        return bio_data_dir(bio) == READ || bio_rw_flagged(bio, BIO_RW_SYNCIO);
-                return 1;
-        return 0;
 }
 /*
@@ -285,7 +285,7 @@ static int cfq_queue_empty(struct request_queue *q)
 * if a queue is marked sync and has sync io queued. A sync queue with async
 * io only, should not get full sync slice length.
 */
-static inline int cfq_prio_slice(struct cfq_data *cfqd, int sync,
+static inline int cfq_prio_slice(struct cfq_data *cfqd, bool sync,
                                 unsigned short prio)
 {
        const int base_slice = cfqd->cfq_slice[sync];
@@ -313,7 +313,7 @@ cfq_set_prio_slice(struct cfq_data *cfqd, struct cfq_queue *cfqq)
 * isn't valid until the first request from the dispatch is activated
 * and the slice time set.
 */
-static inline int cfq_slice_used(struct cfq_queue *cfqq)
+static inline bool cfq_slice_used(struct cfq_queue *cfqq)
 {
        if (cfq_cfqq_slice_new(cfqq))
                return 0;
@@ -488,7 +488,7 @@ static unsigned long cfq_slice_offset(struct cfq_data *cfqd,
 * we will service the queues.
 */
 static void cfq_service_tree_add(struct cfq_data *cfqd, struct cfq_queue *cfqq,
-                                 int add_front)
+                                 bool add_front)
 {
        struct rb_node **p, *parent;
        struct cfq_queue *__cfqq;
@@ -504,11 +504,20 @@ static void cfq_service_tree_add(struct cfq_data *cfqd, struct cfq_queue *cfqq,
                } else
                        rb_key += jiffies;
        } else if (!add_front) {
+                /*
+                 * Get our rb key offset. Subtract any residual slice
+                 * value carried from last service. A negative resid
+                 * count indicates slice overrun, and this should position
+                 * the next service time further away in the tree.
+                 */
                rb_key = cfq_slice_offset(cfqd, cfqq) + jiffies;
-                rb_key += cfqq->slice_resid;
+                rb_key -= cfqq->slice_resid;
                cfqq->slice_resid = 0;
-        } else
+        } else {
-                rb_key = 0;
+                rb_key = -HZ;
+                __cfqq = cfq_rb_first(&cfqd->service_tree);
+                rb_key += __cfqq ? __cfqq->rb_key : jiffies;
+        }
        if (!RB_EMPTY_NODE(&cfqq->rb_node)) {
                /*
@@ -542,7 +551,7 @@ static void cfq_service_tree_add(struct cfq_data *cfqd, struct cfq_queue *cfqq,
                        n = &(*p)->rb_left;
                else if (cfq_class_idle(cfqq) > cfq_class_idle(__cfqq))
                        n = &(*p)->rb_right;
-                else if (rb_key < __cfqq->rb_key)
+                else if (time_before(rb_key, __cfqq->rb_key))
                        n = &(*p)->rb_left;
                else
                        n = &(*p)->rb_right;
@@ -822,8 +831,10 @@ cfq_merged_requests(struct request_queue *q, struct request *rq,
         * reposition in fifo if next is older than rq
         */
        if (!list_empty(&rq->queuelist) && !list_empty(&next->queuelist) &&
-            time_before(next->start_time, rq->start_time))
+            time_before(rq_fifo_time(next), rq_fifo_time(rq))) {
                list_move(&rq->queuelist, &next->queuelist);
+                rq_set_fifo_time(rq, rq_fifo_time(next));
+        }
        cfq_remove_request(next);
 }
@@ -839,7 +850,7 @@ static int cfq_allow_merge(struct request_queue *q, struct request *rq,
         * Disallow merge of a sync bio into an async request.
         */
        if (cfq_bio_sync(bio) && !rq_is_sync(rq))
-                return 0;
+                return false;
        /*
         * Lookup the cfqq that this bio will be queued with. Allow
@@ -847,13 +858,10 @@ static int cfq_allow_merge(struct request_queue *q, struct request *rq,
         */
        cic = cfq_cic_lookup(cfqd, current->io_context);
        if (!cic)
-                return 0;
+                return false;
        cfqq = cic_to_cfqq(cic, cfq_bio_sync(bio));
-        if (cfqq == RQ_CFQQ(rq))
+        return cfqq == RQ_CFQQ(rq);
-                return 1;
-        return 0;
 }
 static void __cfq_set_active_queue(struct cfq_data *cfqd,
@@ -881,7 +889,7 @@ static void __cfq_set_active_queue(struct cfq_data *cfqd,
 */
 static void
 __cfq_slice_expired(struct cfq_data *cfqd, struct cfq_queue *cfqq,
-                    int timed_out)
+                    bool timed_out)
 {
        cfq_log_cfqq(cfqd, cfqq, "slice expired t=%d", timed_out);
@@ -909,7 +917,7 @@ __cfq_slice_expired(struct cfq_data *cfqd, struct cfq_queue *cfqq,
        }
 }
-static inline void cfq_slice_expired(struct cfq_data *cfqd, int timed_out)
+static inline void cfq_slice_expired(struct cfq_data *cfqd, bool timed_out)
 {
        struct cfq_queue *cfqq = cfqd->active_queue;
@@ -1021,7 +1029,7 @@ static struct cfq_queue *cfqq_close(struct cfq_data *cfqd,
 */
 static struct cfq_queue *cfq_close_cooperator(struct cfq_data *cfqd,
                                              struct cfq_queue *cur_cfqq,
-                                              int probe)
+                                              bool probe)
 {
        struct cfq_queue *cfqq;
@@ -1085,6 +1093,15 @@ static void cfq_arm_slice_timer(struct cfq_data *cfqd)
        if (!cic || !atomic_read(&cic->ioc->nr_tasks))
                return;
+        /*
+         * If our average think time is larger than the remaining time
+         * slice, then don't idle. This avoids overrunning the allotted
+         * time slice.
+         */
+        if (sample_valid(cic->ttime_samples) &&
+            (cfqq->slice_end - jiffies < cic->ttime_mean))
+                return;
        cfq_mark_cfqq_wait_request(cfqq);
        /*
@@ -1124,9 +1141,7 @@ static void cfq_dispatch_insert(struct request_queue *q, struct request *rq)
 */
 static struct request *cfq_check_fifo(struct cfq_queue *cfqq)
 {
-        struct cfq_data *cfqd = cfqq->cfqd;
+        struct request *rq = NULL;
-        struct request *rq;
-        int fifo;
        if (cfq_cfqq_fifo_expire(cfqq))
                return NULL;
@@ -1136,13 +1151,11 @@ static struct request *cfq_check_fifo(struct cfq_queue *cfqq)
        if (list_empty(&cfqq->fifo))
                return NULL;
-        fifo = cfq_cfqq_sync(cfqq);
        rq = rq_entry_fifo(cfqq->fifo.next);
+        if (time_before(jiffies, rq_fifo_time(rq)))
-        if (time_before(jiffies, rq->start_time + cfqd->cfq_fifo_expire[fifo]))
                rq = NULL;
-        cfq_log_cfqq(cfqd, cfqq, "fifo=%p", rq);
+        cfq_log_cfqq(cfqq->cfqd, cfqq, "fifo=%p", rq);
        return rq;
 }
@@ -1243,16 +1256,83 @@ static int cfq_forced_dispatch(struct cfq_data *cfqd)
        return dispatched;
 }
+static bool cfq_may_dispatch(struct cfq_data *cfqd, struct cfq_queue *cfqq)
+{
+        unsigned int max_dispatch;
+        /*
+         * Drain async requests before we start sync IO
+         */
+        if (cfq_cfqq_idle_window(cfqq) && cfqd->rq_in_driver[BLK_RW_ASYNC])
+                return false;
+        /*
+         * If this is an async queue and we have sync IO in flight, let it wait
+         */
+        if (cfqd->sync_flight && !cfq_cfqq_sync(cfqq))
+                return false;
+        max_dispatch = cfqd->cfq_quantum;
+        if (cfq_class_idle(cfqq))
+                max_dispatch = 1;
+        /*
+         * Does this cfqq already have too much IO in flight?
+         */
+        if (cfqq->dispatched >= max_dispatch) {
+                /*
+                 * idle queue must always only have a single IO in flight
+                 */
+                if (cfq_class_idle(cfqq))
+                        return false;
+                /*
+                 * We have other queues, don't allow more IO from this one
+                 */
+                if (cfqd->busy_queues > 1)
+                        return false;
+                /*
+                 * Sole queue user, allow bigger slice
+                 */
+                max_dispatch *= 4;
+        }
+        /*
+         * Async queues must wait a bit before being allowed dispatch.
+         * We also ramp up the dispatch depth gradually for async IO,
+         * based on the last sync IO we serviced
+         */
+        if (!cfq_cfqq_sync(cfqq) && cfqd->cfq_latency) {
+                unsigned long last_sync = jiffies - cfqd->last_end_sync_rq;
+                unsigned int depth;
+                depth = last_sync / cfqd->cfq_slice[1];
+                if (!depth && !cfqq->dispatched)
+                        depth = 1;
+                if (depth < max_dispatch)
+                        max_dispatch = depth;
+        }
+        /*
+         * If we're below the current max, allow a dispatch
+         */
+        return cfqq->dispatched < max_dispatch;
+}
 /*
 * Dispatch a request from cfqq, moving them to the request queue
 * dispatch list.
 */
-static void cfq_dispatch_request(struct cfq_data *cfqd, struct cfq_queue *cfqq)
+static bool cfq_dispatch_request(struct cfq_data *cfqd, struct cfq_queue *cfqq)
 {
        struct request *rq;
        BUG_ON(RB_EMPTY_ROOT(&cfqq->sort_list));
+        if (!cfq_may_dispatch(cfqd, cfqq))
+                return false;
        /*
         * follow expired path, else get first next available
         */
@@ -1271,6 +1351,8 @@ static void cfq_dispatch_request(struct cfq_data *cfqd, struct cfq_queue *cfqq)
                atomic_long_inc(&cic->ioc->refcount);
                cfqd->active_cic = cic;
        }
+        return true;
 }
 /*
@@ -1281,7 +1363,6 @@ static int cfq_dispatch_requests(struct request_queue *q, int force)
 {
        struct cfq_data *cfqd = q->elevator->elevator_data;
        struct cfq_queue *cfqq;
-        unsigned int max_dispatch;
        if (!cfqd->busy_queues)
                return 0;
@@ -1294,48 +1375,11 @@ static int cfq_dispatch_requests(struct request_queue *q, int force)
                return 0;
        /*
-         * Drain async requests before we start sync IO
+         * Dispatch a request from this cfqq, if it is allowed
         */
-        if (cfq_cfqq_idle_window(cfqq) && cfqd->rq_in_driver[BLK_RW_ASYNC])
+        if (!cfq_dispatch_request(cfqd, cfqq))
                return 0;
-        /*
-         * If this is an async queue and we have sync IO in flight, let it wait
-         */
-        if (cfqd->sync_flight && !cfq_cfqq_sync(cfqq))
-                return 0;
-        max_dispatch = cfqd->cfq_quantum;
-        if (cfq_class_idle(cfqq))
-                max_dispatch = 1;
-        /*
-         * Does this cfqq already have too much IO in flight?
-         */
-        if (cfqq->dispatched >= max_dispatch) {
-                /*
-                 * idle queue must always only have a single IO in flight
-                 */
-                if (cfq_class_idle(cfqq))
-                        return 0;
-                /*
-                 * We have other queues, don't allow more IO from this one
-                 */
-                if (cfqd->busy_queues > 1)
-                        return 0;
-                /*
-                 * we are the only queue, allow up to 4 times of 'quantum'
-                 */
-                if (cfqq->dispatched >= 4 * max_dispatch)
-                        return 0;
-        }
-        /*
-         * Dispatch a request from this cfqq
-         */
-        cfq_dispatch_request(cfqd, cfqq);
        cfqq->slice_dispatch++;
        cfq_clear_cfqq_must_dispatch(cfqq);
@@ -1415,7 +1459,7 @@ static void cfq_cic_free_rcu(struct rcu_head *head)
        cic = container_of(head, struct cfq_io_context, rcu_head);
        kmem_cache_free(cfq_ioc_pool, cic);
-        elv_ioc_count_dec(ioc_count);
+        elv_ioc_count_dec(cfq_ioc_count);
        if (ioc_gone) {
                /*
@@ -1424,7 +1468,7 @@ static void cfq_cic_free_rcu(struct rcu_head *head)
                 * complete ioc_gone and set it back to NULL
                 */
                spin_lock(&ioc_gone_lock);
-                if (ioc_gone && !elv_ioc_count_read(ioc_count)) {
+                if (ioc_gone && !elv_ioc_count_read(cfq_ioc_count)) {
                        complete(ioc_gone);
                        ioc_gone = NULL;
                }
@@ -1550,7 +1594,7 @@ cfq_alloc_io_context(struct cfq_data *cfqd, gfp_t gfp_mask)
                INIT_HLIST_NODE(&cic->cic_list);
                cic->dtor = cfq_free_io_context;
                cic->exit = cfq_exit_io_context;
-                elv_ioc_count_inc(ioc_count);
+                elv_ioc_count_inc(cfq_ioc_count);
        }
        return cic;
@@ -1635,7 +1679,7 @@ static void cfq_ioc_set_ioprio(struct io_context *ioc)
 }
 static void cfq_init_cfqq(struct cfq_data *cfqd, struct cfq_queue *cfqq,
-                          pid_t pid, int is_sync)
+                          pid_t pid, bool is_sync)
 {
        RB_CLEAR_NODE(&cfqq->rb_node);
        RB_CLEAR_NODE(&cfqq->p_node);
@@ -1655,7 +1699,7 @@ static void cfq_init_cfqq(struct cfq_data *cfqd, struct cfq_queue *cfqq,
 }
 static struct cfq_queue *
-cfq_find_alloc_queue(struct cfq_data *cfqd, int is_sync,
+cfq_find_alloc_queue(struct cfq_data *cfqd, bool is_sync,
                     struct io_context *ioc, gfp_t gfp_mask)
 {
        struct cfq_queue *cfqq, *new_cfqq = NULL;
@@ -1719,7 +1763,7 @@ cfq_async_queue_prio(struct cfq_data *cfqd, int ioprio_class, int ioprio)
 }
 static struct cfq_queue *
-cfq_get_queue(struct cfq_data *cfqd, int is_sync, struct io_context *ioc,
+cfq_get_queue(struct cfq_data *cfqd, bool is_sync, struct io_context *ioc,
              gfp_t gfp_mask)
 {
        const int ioprio = task_ioprio(ioc);
@@ -1951,10 +1995,13 @@ cfq_update_idle_window(struct cfq_data *cfqd, struct cfq_queue *cfqq,
        enable_idle = old_idle = cfq_cfqq_idle_window(cfqq);
        if (!atomic_read(&cic->ioc->nr_tasks) || !cfqd->cfq_slice_idle ||
-            (cfqd->hw_tag && CIC_SEEKY(cic)))
+            (!cfqd->cfq_latency && cfqd->hw_tag && CIC_SEEKY(cic)))
                enable_idle = 0;
        else if (sample_valid(cic->ttime_samples)) {
-                if (cic->ttime_mean > cfqd->cfq_slice_idle)
+                unsigned int slice_idle = cfqd->cfq_slice_idle;
+                if (sample_valid(cic->seek_samples) && CIC_SEEKY(cic))
+                        slice_idle = msecs_to_jiffies(CFQ_MIN_TT);
+                if (cic->ttime_mean > slice_idle)
                        enable_idle = 0;
                else
                        enable_idle = 1;
@@ -1973,7 +2020,7 @@ cfq_update_idle_window(struct cfq_data *cfqd, struct cfq_queue *cfqq,
 * Check if new_cfqq should preempt the currently active queue. Return 0 for
 * no or if we aren't sure, a 1 will cause a preempt.
 */
-static int
+static bool
 cfq_should_preempt(struct cfq_data *cfqd, struct cfq_queue *new_cfqq,
                   struct request *rq)
 {
@@ -1981,48 +2028,48 @@ cfq_should_preempt(struct cfq_data *cfqd, struct cfq_queue *new_cfqq,
        cfqq = cfqd->active_queue;
        if (!cfqq)
-                return 0;
+                return false;
        if (cfq_slice_used(cfqq))
-                return 1;
+                return true;
        if (cfq_class_idle(new_cfqq))
-                return 0;
+                return false;
        if (cfq_class_idle(cfqq))
-                return 1;
+                return true;
        /*
         * if the new request is sync, but the currently running queue is
         * not, let the sync request have priority.
         */
        if (rq_is_sync(rq) && !cfq_cfqq_sync(cfqq))
-                return 1;
+                return true;
        /*
         * So both queues are sync. Let the new request get disk time if
         * it's a metadata request and the current queue is doing regular IO.
         */
        if (rq_is_meta(rq) && !cfqq->meta_pending)
-                return 1;
+                return false;
        /*
         * Allow an RT request to pre-empt an ongoing non-RT cfqq timeslice.
         */
        if (cfq_class_rt(new_cfqq) && !cfq_class_rt(cfqq))
-                return 1;
+                return true;
        if (!cfqd->active_cic || !cfq_cfqq_wait_request(cfqq))
-                return 0;
+                return false;
        /*
         * if this request is as-good as one we would expect from the
         * current cfqq, let it preempt
         */
        if (cfq_rq_close(cfqd, rq))
-                return 1;
+                return true;
-        return 0;
+        return false;
 }
 /*
@@ -2107,6 +2154,7 @@ static void cfq_insert_request(struct request_queue *q, struct request *rq)
        cfq_add_rq_rb(rq);
+        rq_set_fifo_time(rq, jiffies + cfqd->cfq_fifo_expire[rq_is_sync(rq)]);
        list_add_tail(&rq->queuelist, &cfqq->fifo);
        cfq_rq_enqueued(cfqd, cfqq, rq);
@@ -2157,8 +2205,10 @@ static void cfq_completed_request(struct request_queue *q, struct request *rq)
        if (cfq_cfqq_sync(cfqq))
                cfqd->sync_flight--;
-        if (sync)
+        if (sync) {
                RQ_CIC(rq)->last_end_request = now;
+                cfqd->last_end_sync_rq = now;
+        }
        /*
         * If this is the active queue, check if it needs to be expired,
@@ -2284,7 +2334,7 @@ cfq_set_request(struct request_queue *q, struct request *rq, gfp_t gfp_mask)
        struct cfq_data *cfqd = q->elevator->elevator_data;
        struct cfq_io_context *cic;
        const int rw = rq_data_dir(rq);
-        const int is_sync = rq_is_sync(rq);
+        const bool is_sync = rq_is_sync(rq);
        struct cfq_queue *cfqq;
        unsigned long flags;
@@ -2480,8 +2530,9 @@ static void *cfq_init_queue(struct request_queue *q)
        cfqd->cfq_slice[1] = cfq_slice_sync;
        cfqd->cfq_slice_async_rq = cfq_slice_async_rq;
        cfqd->cfq_slice_idle = cfq_slice_idle;
+        cfqd->cfq_latency = 1;
        cfqd->hw_tag = 1;
+        cfqd->last_end_sync_rq = jiffies;
        return cfqd;
 }
@@ -2549,6 +2600,7 @@ SHOW_FUNCTION(cfq_slice_idle_show, cfqd->cfq_slice_idle, 1);
 SHOW_FUNCTION(cfq_slice_sync_show, cfqd->cfq_slice[1], 1);
 SHOW_FUNCTION(cfq_slice_async_show, cfqd->cfq_slice[0], 1);
 SHOW_FUNCTION(cfq_slice_async_rq_show, cfqd->cfq_slice_async_rq, 0);
+SHOW_FUNCTION(cfq_low_latency_show, cfqd->cfq_latency, 0);
 #undef SHOW_FUNCTION
 #define STORE_FUNCTION(__FUNC, __PTR, MIN, MAX, __CONV)                 \
@@ -2580,6 +2632,7 @@ STORE_FUNCTION(cfq_slice_sync_store, &cfqd->cfq_slice[1], 1, UINT_MAX, 1);
 STORE_FUNCTION(cfq_slice_async_store, &cfqd->cfq_slice[0], 1, UINT_MAX, 1);
 STORE_FUNCTION(cfq_slice_async_rq_store, &cfqd->cfq_slice_async_rq, 1,
                UINT_MAX, 0);
+STORE_FUNCTION(cfq_low_latency_store, &cfqd->cfq_latency, 0, 1, 0);
 #undef STORE_FUNCTION
 #define CFQ_ATTR(name) \
@@ -2595,6 +2648,7 @@ static struct elv_fs_entry cfq_attrs[] = {
        CFQ_ATTR(slice_async),
        CFQ_ATTR(slice_async_rq),
        CFQ_ATTR(slice_idle),
+        CFQ_ATTR(low_latency),
        __ATTR_NULL
 };
@@ -2654,7 +2708,7 @@ static void __exit cfq_exit(void)
         * this also protects us from entering cfq_slab_kill() with
         * pending RCU callbacks
         */
-        if (elv_ioc_count_read(ioc_count))
+        if (elv_ioc_count_read(cfq_ioc_count))
                wait_for_completion(&all_gone);
        cfq_slab_kill();
 }
diff --git a/block/compat_ioctl.c b/block/compat_ioctl.c
index 7865a34e0faa..9bd086c1a4d5 100644
--- a/block/compat_ioctl.c
+++ b/block/compat_ioctl.c
@@ -21,6 +21,11 @@ static int compat_put_int(unsigned long arg, int val)
        return put_user(val, (compat_int_t __user *)compat_ptr(arg));
 }
+static int compat_put_uint(unsigned long arg, unsigned int val)
+{
+        return put_user(val, (compat_uint_t __user *)compat_ptr(arg));
+}
 static int compat_put_long(unsigned long arg, long val)
 {
        return put_user(val, (compat_long_t __user *)compat_ptr(arg));
@@ -734,6 +739,14 @@ long compat_blkdev_ioctl(struct file *file, unsigned cmd, unsigned long arg)
        switch (cmd) {
        case HDIO_GETGEO:
                return compat_hdio_getgeo(disk, bdev, compat_ptr(arg));
+        case BLKPBSZGET:
+                return compat_put_uint(arg, bdev_physical_block_size(bdev));
+        case BLKIOMIN:
+                return compat_put_uint(arg, bdev_io_min(bdev));
+        case BLKIOOPT:
+                return compat_put_uint(arg, bdev_io_opt(bdev));
+        case BLKALIGNOFF:
+                return compat_put_int(arg, bdev_alignment_offset(bdev));
        case BLKFLSBUF:
        case BLKROSET:
        case BLKDISCARD:
diff --git a/block/elevator.c b/block/elevator.c
index 1975b619c86d..a847046c6e53 100644
--- a/block/elevator.c
+++ b/block/elevator.c
@@ -1059,9 +1059,7 @@ ssize_t elv_iosched_store(struct request_queue *q, const char *name,
                return count;
        strlcpy(elevator_name, name, sizeof(elevator_name));
-        strstrip(elevator_name);
+        e = elevator_get(strstrip(elevator_name));
-        e = elevator_get(elevator_name);
        if (!e) {
                printk(KERN_ERR "elevator: type %s not found\n", elevator_name);
                return -EINVAL;
diff --git a/block/genhd.c b/block/genhd.c
index 5b76bf55d05c..517e4332cb37 100644
--- a/block/genhd.c
+++ b/block/genhd.c
@@ -903,7 +903,7 @@ static struct attribute_group disk_attr_group = {
        .attrs = disk_attrs,
 };
-static struct attribute_group *disk_attr_groups[] = {
+static const struct attribute_group *disk_attr_groups[] = {
        &disk_attr_group,
        NULL
 };
@@ -998,12 +998,12 @@ struct class block_class = {
        .name           = "block",
 };
-static char *block_nodename(struct device *dev)
+static char *block_devnode(struct device *dev, mode_t *mode)
 {
        struct gendisk *disk = dev_to_disk(dev);
-        if (disk->nodename)
+        if (disk->devnode)
-                return disk->nodename(disk);
+                return disk->devnode(disk, mode);
        return NULL;
 }
@@ -1011,7 +1011,7 @@ static struct device_type disk_type = {
        .name           = "disk",
        .groups         = disk_attr_groups,
        .release        = disk_release,
-        .nodename       = block_nodename,
+        .devnode        = block_devnode,
 };
 #ifdef CONFIG_PROC_FS
diff --git a/block/ioctl.c b/block/ioctl.c
index d3e6b5827a34..1f4d1de12b09 100644
--- a/block/ioctl.c
+++ b/block/ioctl.c
@@ -138,6 +138,11 @@ static int put_int(unsigned long arg, int val)
        return put_user(val, (int __user *)arg);
 }
+static int put_uint(unsigned long arg, unsigned int val)
+{
+        return put_user(val, (unsigned int __user *)arg);
+}
 static int put_long(unsigned long arg, long val)
 {
        return put_user(val, (long __user *)arg);
@@ -263,10 +268,18 @@ int blkdev_ioctl(struct block_device *bdev, fmode_t mode, unsigned cmd,
                return put_long(arg, (bdi->ra_pages * PAGE_CACHE_SIZE) / 512);
        case BLKROGET:
                return put_int(arg, bdev_read_only(bdev) != 0);
-        case BLKBSZGET: /* get the logical block size (cf. BLKSSZGET) */
+        case BLKBSZGET: /* get block device soft block size (cf. BLKSSZGET) */
                return put_int(arg, block_size(bdev));
-        case BLKSSZGET: /* get block device hardware sector size */
+        case BLKSSZGET: /* get block device logical block size */
                return put_int(arg, bdev_logical_block_size(bdev));
+        case BLKPBSZGET: /* get block device physical block size */
+                return put_uint(arg, bdev_physical_block_size(bdev));
+        case BLKIOMIN:
+                return put_uint(arg, bdev_io_min(bdev));
+        case BLKIOOPT:
+                return put_uint(arg, bdev_io_opt(bdev));
+        case BLKALIGNOFF:
+                return put_int(arg, bdev_alignment_offset(bdev));
        case BLKSECTGET:
                return put_ushort(arg, queue_max_sectors(bdev_get_queue(bdev)));
        case BLKRASET: