aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--MAINTAINERS1
-rw-r--r--block/bio-integrity.c4
-rw-r--r--block/bio.c77
-rw-r--r--block/blk-cgroup.c92
-rw-r--r--block/blk-cgroup.h40
-rw-r--r--block/blk-core.c136
-rw-r--r--block/blk-exec.c10
-rw-r--r--block/blk-merge.c3
-rw-r--r--block/blk-mq-tag.c38
-rw-r--r--block/blk-mq-tag.h1
-rw-r--r--block/blk-mq.c160
-rw-r--r--block/blk.h5
-rw-r--r--block/bounce.c3
-rw-r--r--block/cfq-iosched.c125
-rw-r--r--block/elevator.c2
-rw-r--r--block/ioctl.c37
-rw-r--r--drivers/block/nbd.c50
-rw-r--r--drivers/block/paride/pd.c4
-rw-r--r--drivers/block/sx8.c4
-rw-r--r--drivers/block/virtio_blk.c6
-rw-r--r--drivers/ide/ide-atapi.c10
-rw-r--r--drivers/ide/ide-cd.c10
-rw-r--r--drivers/ide/ide-cd_ioctl.c2
-rw-r--r--drivers/ide/ide-devsets.c2
-rw-r--r--drivers/ide/ide-eh.c4
-rw-r--r--drivers/ide/ide-floppy.c8
-rw-r--r--drivers/ide/ide-io.c12
-rw-r--r--drivers/ide/ide-ioctls.c2
-rw-r--r--drivers/ide/ide-park.c4
-rw-r--r--drivers/ide/ide-pm.c56
-rw-r--r--drivers/ide/ide-tape.c6
-rw-r--r--drivers/ide/ide-taskfile.c2
-rw-r--r--drivers/md/bcache/io.c2
-rw-r--r--drivers/md/bcache/request.c2
-rw-r--r--drivers/md/dm-cache-target.c6
-rw-r--r--drivers/md/dm-raid1.c2
-rw-r--r--drivers/md/dm-snap.c1
-rw-r--r--drivers/md/dm-table.c25
-rw-r--r--drivers/md/dm-thin.c9
-rw-r--r--drivers/md/dm-verity.c2
-rw-r--r--drivers/md/dm.c171
-rw-r--r--drivers/md/dm.h5
-rw-r--r--fs/btrfs/disk-io.c13
-rw-r--r--fs/btrfs/extent_io.c2
-rw-r--r--fs/btrfs/volumes.c18
-rw-r--r--fs/btrfs/volumes.h2
-rw-r--r--fs/buffer.c13
-rw-r--r--fs/ext4/page-io.c1
-rw-r--r--fs/nilfs2/segbuf.c12
-rw-r--r--fs/xfs/xfs_aops.c1
-rw-r--r--include/linux/bio.h17
-rw-r--r--include/linux/blk-mq.h4
-rw-r--r--include/linux/blk_types.h25
-rw-r--r--include/linux/blkdev.h45
-rw-r--r--include/linux/elevator.h2
-rw-r--r--include/linux/fs.h3
-rw-r--r--include/linux/ide.h27
-rw-r--r--include/linux/swap.h1
-rw-r--r--include/uapi/linux/nbd.h2
-rw-r--r--kernel/power/Makefile3
-rw-r--r--kernel/power/block_io.c103
-rw-r--r--kernel/power/power.h9
-rw-r--r--kernel/power/swap.c159
-rw-r--r--mm/page_io.c2
64 files changed, 852 insertions, 753 deletions
diff --git a/MAINTAINERS b/MAINTAINERS
index 798dc538529c..d42970b10a22 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -2075,6 +2075,7 @@ M: Jens Axboe <axboe@kernel.dk>
2075T: git git://git.kernel.org/pub/scm/linux/kernel/git/axboe/linux-block.git 2075T: git git://git.kernel.org/pub/scm/linux/kernel/git/axboe/linux-block.git
2076S: Maintained 2076S: Maintained
2077F: block/ 2077F: block/
2078F: kernel/trace/blktrace.c
2078 2079
2079BLOCK2MTD DRIVER 2080BLOCK2MTD DRIVER
2080M: Joern Engel <joern@lazybastard.org> 2081M: Joern Engel <joern@lazybastard.org>
diff --git a/block/bio-integrity.c b/block/bio-integrity.c
index 5cbd5d9ea61d..0436c21db7f2 100644
--- a/block/bio-integrity.c
+++ b/block/bio-integrity.c
@@ -361,7 +361,7 @@ static void bio_integrity_verify_fn(struct work_struct *work)
361 361
362 /* Restore original bio completion handler */ 362 /* Restore original bio completion handler */
363 bio->bi_end_io = bip->bip_end_io; 363 bio->bi_end_io = bip->bip_end_io;
364 bio_endio_nodec(bio, error); 364 bio_endio(bio, error);
365} 365}
366 366
367/** 367/**
@@ -388,7 +388,7 @@ void bio_integrity_endio(struct bio *bio, int error)
388 */ 388 */
389 if (error) { 389 if (error) {
390 bio->bi_end_io = bip->bip_end_io; 390 bio->bi_end_io = bip->bip_end_io;
391 bio_endio_nodec(bio, error); 391 bio_endio(bio, error);
392 392
393 return; 393 return;
394 } 394 }
diff --git a/block/bio.c b/block/bio.c
index f66a4eae16ee..259197d97de1 100644
--- a/block/bio.c
+++ b/block/bio.c
@@ -270,8 +270,8 @@ void bio_init(struct bio *bio)
270{ 270{
271 memset(bio, 0, sizeof(*bio)); 271 memset(bio, 0, sizeof(*bio));
272 bio->bi_flags = 1 << BIO_UPTODATE; 272 bio->bi_flags = 1 << BIO_UPTODATE;
273 atomic_set(&bio->bi_remaining, 1); 273 atomic_set(&bio->__bi_remaining, 1);
274 atomic_set(&bio->bi_cnt, 1); 274 atomic_set(&bio->__bi_cnt, 1);
275} 275}
276EXPORT_SYMBOL(bio_init); 276EXPORT_SYMBOL(bio_init);
277 277
@@ -292,8 +292,8 @@ void bio_reset(struct bio *bio)
292 __bio_free(bio); 292 __bio_free(bio);
293 293
294 memset(bio, 0, BIO_RESET_BYTES); 294 memset(bio, 0, BIO_RESET_BYTES);
295 bio->bi_flags = flags|(1 << BIO_UPTODATE); 295 bio->bi_flags = flags | (1 << BIO_UPTODATE);
296 atomic_set(&bio->bi_remaining, 1); 296 atomic_set(&bio->__bi_remaining, 1);
297} 297}
298EXPORT_SYMBOL(bio_reset); 298EXPORT_SYMBOL(bio_reset);
299 299
@@ -303,6 +303,17 @@ static void bio_chain_endio(struct bio *bio, int error)
303 bio_put(bio); 303 bio_put(bio);
304} 304}
305 305
306/*
307 * Increment chain count for the bio. Make sure the CHAIN flag update
308 * is visible before the raised count.
309 */
310static inline void bio_inc_remaining(struct bio *bio)
311{
312 bio->bi_flags |= (1 << BIO_CHAIN);
313 smp_mb__before_atomic();
314 atomic_inc(&bio->__bi_remaining);
315}
316
306/** 317/**
307 * bio_chain - chain bio completions 318 * bio_chain - chain bio completions
308 * @bio: the target bio 319 * @bio: the target bio
@@ -320,7 +331,7 @@ void bio_chain(struct bio *bio, struct bio *parent)
320 331
321 bio->bi_private = parent; 332 bio->bi_private = parent;
322 bio->bi_end_io = bio_chain_endio; 333 bio->bi_end_io = bio_chain_endio;
323 atomic_inc(&parent->bi_remaining); 334 bio_inc_remaining(parent);
324} 335}
325EXPORT_SYMBOL(bio_chain); 336EXPORT_SYMBOL(bio_chain);
326 337
@@ -524,13 +535,17 @@ EXPORT_SYMBOL(zero_fill_bio);
524 **/ 535 **/
525void bio_put(struct bio *bio) 536void bio_put(struct bio *bio)
526{ 537{
527 BIO_BUG_ON(!atomic_read(&bio->bi_cnt)); 538 if (!bio_flagged(bio, BIO_REFFED))
528
529 /*
530 * last put frees it
531 */
532 if (atomic_dec_and_test(&bio->bi_cnt))
533 bio_free(bio); 539 bio_free(bio);
540 else {
541 BIO_BUG_ON(!atomic_read(&bio->__bi_cnt));
542
543 /*
544 * last put frees it
545 */
546 if (atomic_dec_and_test(&bio->__bi_cnt))
547 bio_free(bio);
548 }
534} 549}
535EXPORT_SYMBOL(bio_put); 550EXPORT_SYMBOL(bio_put);
536 551
@@ -1741,6 +1756,25 @@ void bio_flush_dcache_pages(struct bio *bi)
1741EXPORT_SYMBOL(bio_flush_dcache_pages); 1756EXPORT_SYMBOL(bio_flush_dcache_pages);
1742#endif 1757#endif
1743 1758
1759static inline bool bio_remaining_done(struct bio *bio)
1760{
1761 /*
1762 * If we're not chaining, then ->__bi_remaining is always 1 and
1763 * we always end io on the first invocation.
1764 */
1765 if (!bio_flagged(bio, BIO_CHAIN))
1766 return true;
1767
1768 BUG_ON(atomic_read(&bio->__bi_remaining) <= 0);
1769
1770 if (atomic_dec_and_test(&bio->__bi_remaining)) {
1771 clear_bit(BIO_CHAIN, &bio->bi_flags);
1772 return true;
1773 }
1774
1775 return false;
1776}
1777
1744/** 1778/**
1745 * bio_endio - end I/O on a bio 1779 * bio_endio - end I/O on a bio
1746 * @bio: bio 1780 * @bio: bio
@@ -1758,15 +1792,13 @@ EXPORT_SYMBOL(bio_flush_dcache_pages);
1758void bio_endio(struct bio *bio, int error) 1792void bio_endio(struct bio *bio, int error)
1759{ 1793{
1760 while (bio) { 1794 while (bio) {
1761 BUG_ON(atomic_read(&bio->bi_remaining) <= 0);
1762
1763 if (error) 1795 if (error)
1764 clear_bit(BIO_UPTODATE, &bio->bi_flags); 1796 clear_bit(BIO_UPTODATE, &bio->bi_flags);
1765 else if (!test_bit(BIO_UPTODATE, &bio->bi_flags)) 1797 else if (!test_bit(BIO_UPTODATE, &bio->bi_flags))
1766 error = -EIO; 1798 error = -EIO;
1767 1799
1768 if (!atomic_dec_and_test(&bio->bi_remaining)) 1800 if (unlikely(!bio_remaining_done(bio)))
1769 return; 1801 break;
1770 1802
1771 /* 1803 /*
1772 * Need to have a real endio function for chained bios, 1804 * Need to have a real endio function for chained bios,
@@ -1790,21 +1822,6 @@ void bio_endio(struct bio *bio, int error)
1790EXPORT_SYMBOL(bio_endio); 1822EXPORT_SYMBOL(bio_endio);
1791 1823
1792/** 1824/**
1793 * bio_endio_nodec - end I/O on a bio, without decrementing bi_remaining
1794 * @bio: bio
1795 * @error: error, if any
1796 *
1797 * For code that has saved and restored bi_end_io; thing hard before using this
1798 * function, probably you should've cloned the entire bio.
1799 **/
1800void bio_endio_nodec(struct bio *bio, int error)
1801{
1802 atomic_inc(&bio->bi_remaining);
1803 bio_endio(bio, error);
1804}
1805EXPORT_SYMBOL(bio_endio_nodec);
1806
1807/**
1808 * bio_split - split a bio 1825 * bio_split - split a bio
1809 * @bio: bio to split 1826 * @bio: bio to split
1810 * @sectors: number of sectors to split from the front of @bio 1827 * @sectors: number of sectors to split from the front of @bio
diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c
index 0ac817b750db..6e43fa355e71 100644
--- a/block/blk-cgroup.c
+++ b/block/blk-cgroup.c
@@ -9,6 +9,10 @@
9 * 9 *
10 * Copyright (C) 2009 Vivek Goyal <vgoyal@redhat.com> 10 * Copyright (C) 2009 Vivek Goyal <vgoyal@redhat.com>
11 * Nauman Rafique <nauman@google.com> 11 * Nauman Rafique <nauman@google.com>
12 *
13 * For policy-specific per-blkcg data:
14 * Copyright (C) 2015 Paolo Valente <paolo.valente@unimore.it>
15 * Arianna Avanzini <avanzini.arianna@gmail.com>
12 */ 16 */
13#include <linux/ioprio.h> 17#include <linux/ioprio.h>
14#include <linux/kdev_t.h> 18#include <linux/kdev_t.h>
@@ -26,8 +30,7 @@
26 30
27static DEFINE_MUTEX(blkcg_pol_mutex); 31static DEFINE_MUTEX(blkcg_pol_mutex);
28 32
29struct blkcg blkcg_root = { .cfq_weight = 2 * CFQ_WEIGHT_DEFAULT, 33struct blkcg blkcg_root;
30 .cfq_leaf_weight = 2 * CFQ_WEIGHT_DEFAULT, };
31EXPORT_SYMBOL_GPL(blkcg_root); 34EXPORT_SYMBOL_GPL(blkcg_root);
32 35
33static struct blkcg_policy *blkcg_policy[BLKCG_MAX_POLS]; 36static struct blkcg_policy *blkcg_policy[BLKCG_MAX_POLS];
@@ -823,6 +826,8 @@ static struct cgroup_subsys_state *
823blkcg_css_alloc(struct cgroup_subsys_state *parent_css) 826blkcg_css_alloc(struct cgroup_subsys_state *parent_css)
824{ 827{
825 struct blkcg *blkcg; 828 struct blkcg *blkcg;
829 struct cgroup_subsys_state *ret;
830 int i;
826 831
827 if (!parent_css) { 832 if (!parent_css) {
828 blkcg = &blkcg_root; 833 blkcg = &blkcg_root;
@@ -830,17 +835,49 @@ blkcg_css_alloc(struct cgroup_subsys_state *parent_css)
830 } 835 }
831 836
832 blkcg = kzalloc(sizeof(*blkcg), GFP_KERNEL); 837 blkcg = kzalloc(sizeof(*blkcg), GFP_KERNEL);
833 if (!blkcg) 838 if (!blkcg) {
834 return ERR_PTR(-ENOMEM); 839 ret = ERR_PTR(-ENOMEM);
840 goto free_blkcg;
841 }
842
843 for (i = 0; i < BLKCG_MAX_POLS ; i++) {
844 struct blkcg_policy *pol = blkcg_policy[i];
845 struct blkcg_policy_data *cpd;
846
847 /*
848 * If the policy hasn't been attached yet, wait for it
849 * to be attached before doing anything else. Otherwise,
850 * check if the policy requires any specific per-cgroup
851 * data: if it does, allocate and initialize it.
852 */
853 if (!pol || !pol->cpd_size)
854 continue;
855
856 BUG_ON(blkcg->pd[i]);
857 cpd = kzalloc(pol->cpd_size, GFP_KERNEL);
858 if (!cpd) {
859 ret = ERR_PTR(-ENOMEM);
860 goto free_pd_blkcg;
861 }
862 blkcg->pd[i] = cpd;
863 cpd->plid = i;
864 pol->cpd_init_fn(blkcg);
865 }
835 866
836 blkcg->cfq_weight = CFQ_WEIGHT_DEFAULT;
837 blkcg->cfq_leaf_weight = CFQ_WEIGHT_DEFAULT;
838done: 867done:
839 spin_lock_init(&blkcg->lock); 868 spin_lock_init(&blkcg->lock);
840 INIT_RADIX_TREE(&blkcg->blkg_tree, GFP_ATOMIC); 869 INIT_RADIX_TREE(&blkcg->blkg_tree, GFP_ATOMIC);
841 INIT_HLIST_HEAD(&blkcg->blkg_list); 870 INIT_HLIST_HEAD(&blkcg->blkg_list);
842 871
843 return &blkcg->css; 872 return &blkcg->css;
873
874free_pd_blkcg:
875 for (i--; i >= 0; i--)
876 kfree(blkcg->pd[i]);
877
878free_blkcg:
879 kfree(blkcg);
880 return ret;
844} 881}
845 882
846/** 883/**
@@ -958,8 +995,10 @@ int blkcg_activate_policy(struct request_queue *q,
958 const struct blkcg_policy *pol) 995 const struct blkcg_policy *pol)
959{ 996{
960 LIST_HEAD(pds); 997 LIST_HEAD(pds);
998 LIST_HEAD(cpds);
961 struct blkcg_gq *blkg, *new_blkg; 999 struct blkcg_gq *blkg, *new_blkg;
962 struct blkg_policy_data *pd, *n; 1000 struct blkg_policy_data *pd, *nd;
1001 struct blkcg_policy_data *cpd, *cnd;
963 int cnt = 0, ret; 1002 int cnt = 0, ret;
964 bool preloaded; 1003 bool preloaded;
965 1004
@@ -1003,7 +1042,10 @@ int blkcg_activate_policy(struct request_queue *q,
1003 1042
1004 spin_unlock_irq(q->queue_lock); 1043 spin_unlock_irq(q->queue_lock);
1005 1044
1006 /* allocate policy_data for all existing blkgs */ 1045 /*
1046 * Allocate per-blkg and per-blkcg policy data
1047 * for all existing blkgs.
1048 */
1007 while (cnt--) { 1049 while (cnt--) {
1008 pd = kzalloc_node(pol->pd_size, GFP_KERNEL, q->node); 1050 pd = kzalloc_node(pol->pd_size, GFP_KERNEL, q->node);
1009 if (!pd) { 1051 if (!pd) {
@@ -1011,26 +1053,50 @@ int blkcg_activate_policy(struct request_queue *q,
1011 goto out_free; 1053 goto out_free;
1012 } 1054 }
1013 list_add_tail(&pd->alloc_node, &pds); 1055 list_add_tail(&pd->alloc_node, &pds);
1056
1057 if (!pol->cpd_size)
1058 continue;
1059 cpd = kzalloc_node(pol->cpd_size, GFP_KERNEL, q->node);
1060 if (!cpd) {
1061 ret = -ENOMEM;
1062 goto out_free;
1063 }
1064 list_add_tail(&cpd->alloc_node, &cpds);
1014 } 1065 }
1015 1066
1016 /* 1067 /*
1017 * Install the allocated pds. With @q bypassing, no new blkg 1068 * Install the allocated pds and cpds. With @q bypassing, no new blkg
1018 * should have been created while the queue lock was dropped. 1069 * should have been created while the queue lock was dropped.
1019 */ 1070 */
1020 spin_lock_irq(q->queue_lock); 1071 spin_lock_irq(q->queue_lock);
1021 1072
1022 list_for_each_entry(blkg, &q->blkg_list, q_node) { 1073 list_for_each_entry(blkg, &q->blkg_list, q_node) {
1023 if (WARN_ON(list_empty(&pds))) { 1074 if (WARN_ON(list_empty(&pds)) ||
1075 WARN_ON(pol->cpd_size && list_empty(&cpds))) {
1024 /* umm... this shouldn't happen, just abort */ 1076 /* umm... this shouldn't happen, just abort */
1025 ret = -ENOMEM; 1077 ret = -ENOMEM;
1026 goto out_unlock; 1078 goto out_unlock;
1027 } 1079 }
1080 cpd = list_first_entry(&cpds, struct blkcg_policy_data,
1081 alloc_node);
1082 list_del_init(&cpd->alloc_node);
1028 pd = list_first_entry(&pds, struct blkg_policy_data, alloc_node); 1083 pd = list_first_entry(&pds, struct blkg_policy_data, alloc_node);
1029 list_del_init(&pd->alloc_node); 1084 list_del_init(&pd->alloc_node);
1030 1085
1031 /* grab blkcg lock too while installing @pd on @blkg */ 1086 /* grab blkcg lock too while installing @pd on @blkg */
1032 spin_lock(&blkg->blkcg->lock); 1087 spin_lock(&blkg->blkcg->lock);
1033 1088
1089 if (!pol->cpd_size)
1090 goto no_cpd;
1091 if (!blkg->blkcg->pd[pol->plid]) {
1092 /* Per-policy per-blkcg data */
1093 blkg->blkcg->pd[pol->plid] = cpd;
1094 cpd->plid = pol->plid;
1095 pol->cpd_init_fn(blkg->blkcg);
1096 } else { /* must free it as it has already been extracted */
1097 kfree(cpd);
1098 }
1099no_cpd:
1034 blkg->pd[pol->plid] = pd; 1100 blkg->pd[pol->plid] = pd;
1035 pd->blkg = blkg; 1101 pd->blkg = blkg;
1036 pd->plid = pol->plid; 1102 pd->plid = pol->plid;
@@ -1045,8 +1111,10 @@ out_unlock:
1045 spin_unlock_irq(q->queue_lock); 1111 spin_unlock_irq(q->queue_lock);
1046out_free: 1112out_free:
1047 blk_queue_bypass_end(q); 1113 blk_queue_bypass_end(q);
1048 list_for_each_entry_safe(pd, n, &pds, alloc_node) 1114 list_for_each_entry_safe(pd, nd, &pds, alloc_node)
1049 kfree(pd); 1115 kfree(pd);
1116 list_for_each_entry_safe(cpd, cnd, &cpds, alloc_node)
1117 kfree(cpd);
1050 return ret; 1118 return ret;
1051} 1119}
1052EXPORT_SYMBOL_GPL(blkcg_activate_policy); 1120EXPORT_SYMBOL_GPL(blkcg_activate_policy);
@@ -1087,6 +1155,8 @@ void blkcg_deactivate_policy(struct request_queue *q,
1087 1155
1088 kfree(blkg->pd[pol->plid]); 1156 kfree(blkg->pd[pol->plid]);
1089 blkg->pd[pol->plid] = NULL; 1157 blkg->pd[pol->plid] = NULL;
1158 kfree(blkg->blkcg->pd[pol->plid]);
1159 blkg->blkcg->pd[pol->plid] = NULL;
1090 1160
1091 spin_unlock(&blkg->blkcg->lock); 1161 spin_unlock(&blkg->blkcg->lock);
1092 } 1162 }
diff --git a/block/blk-cgroup.h b/block/blk-cgroup.h
index c567865b5f1d..74296a78bba1 100644
--- a/block/blk-cgroup.h
+++ b/block/blk-cgroup.h
@@ -23,11 +23,6 @@
23/* Max limits for throttle policy */ 23/* Max limits for throttle policy */
24#define THROTL_IOPS_MAX UINT_MAX 24#define THROTL_IOPS_MAX UINT_MAX
25 25
26/* CFQ specific, out here for blkcg->cfq_weight */
27#define CFQ_WEIGHT_MIN 10
28#define CFQ_WEIGHT_MAX 1000
29#define CFQ_WEIGHT_DEFAULT 500
30
31#ifdef CONFIG_BLK_CGROUP 26#ifdef CONFIG_BLK_CGROUP
32 27
33enum blkg_rwstat_type { 28enum blkg_rwstat_type {
@@ -50,9 +45,7 @@ struct blkcg {
50 struct blkcg_gq *blkg_hint; 45 struct blkcg_gq *blkg_hint;
51 struct hlist_head blkg_list; 46 struct hlist_head blkg_list;
52 47
53 /* TODO: per-policy storage in blkcg */ 48 struct blkcg_policy_data *pd[BLKCG_MAX_POLS];
54 unsigned int cfq_weight; /* belongs to cfq */
55 unsigned int cfq_leaf_weight;
56}; 49};
57 50
58struct blkg_stat { 51struct blkg_stat {
@@ -87,6 +80,24 @@ struct blkg_policy_data {
87 struct list_head alloc_node; 80 struct list_head alloc_node;
88}; 81};
89 82
83/*
84 * Policies that need to keep per-blkcg data which is independent
85 * from any request_queue associated to it must specify its size
86 * with the cpd_size field of the blkcg_policy structure and
87 * embed a blkcg_policy_data in it. blkcg core allocates
88 * policy-specific per-blkcg structures lazily the first time
89 * they are actually needed, so it handles them together with
90 * blkgs. cpd_init() is invoked to let each policy handle
91 * per-blkcg data.
92 */
93struct blkcg_policy_data {
94 /* the policy id this per-policy data belongs to */
95 int plid;
96
97 /* used during policy activation */
98 struct list_head alloc_node;
99};
100
90/* association between a blk cgroup and a request queue */ 101/* association between a blk cgroup and a request queue */
91struct blkcg_gq { 102struct blkcg_gq {
92 /* Pointer to the associated request_queue */ 103 /* Pointer to the associated request_queue */
@@ -112,6 +123,7 @@ struct blkcg_gq {
112 struct rcu_head rcu_head; 123 struct rcu_head rcu_head;
113}; 124};
114 125
126typedef void (blkcg_pol_init_cpd_fn)(const struct blkcg *blkcg);
115typedef void (blkcg_pol_init_pd_fn)(struct blkcg_gq *blkg); 127typedef void (blkcg_pol_init_pd_fn)(struct blkcg_gq *blkg);
116typedef void (blkcg_pol_online_pd_fn)(struct blkcg_gq *blkg); 128typedef void (blkcg_pol_online_pd_fn)(struct blkcg_gq *blkg);
117typedef void (blkcg_pol_offline_pd_fn)(struct blkcg_gq *blkg); 129typedef void (blkcg_pol_offline_pd_fn)(struct blkcg_gq *blkg);
@@ -122,10 +134,13 @@ struct blkcg_policy {
122 int plid; 134 int plid;
123 /* policy specific private data size */ 135 /* policy specific private data size */
124 size_t pd_size; 136 size_t pd_size;
137 /* policy specific per-blkcg data size */
138 size_t cpd_size;
125 /* cgroup files for the policy */ 139 /* cgroup files for the policy */
126 struct cftype *cftypes; 140 struct cftype *cftypes;
127 141
128 /* operations */ 142 /* operations */
143 blkcg_pol_init_cpd_fn *cpd_init_fn;
129 blkcg_pol_init_pd_fn *pd_init_fn; 144 blkcg_pol_init_pd_fn *pd_init_fn;
130 blkcg_pol_online_pd_fn *pd_online_fn; 145 blkcg_pol_online_pd_fn *pd_online_fn;
131 blkcg_pol_offline_pd_fn *pd_offline_fn; 146 blkcg_pol_offline_pd_fn *pd_offline_fn;
@@ -218,6 +233,12 @@ static inline struct blkg_policy_data *blkg_to_pd(struct blkcg_gq *blkg,
218 return blkg ? blkg->pd[pol->plid] : NULL; 233 return blkg ? blkg->pd[pol->plid] : NULL;
219} 234}
220 235
236static inline struct blkcg_policy_data *blkcg_to_cpd(struct blkcg *blkcg,
237 struct blkcg_policy *pol)
238{
239 return blkcg ? blkcg->pd[pol->plid] : NULL;
240}
241
221/** 242/**
222 * pdata_to_blkg - get blkg associated with policy private data 243 * pdata_to_blkg - get blkg associated with policy private data
223 * @pd: policy private data of interest 244 * @pd: policy private data of interest
@@ -564,6 +585,9 @@ struct blkcg;
564struct blkg_policy_data { 585struct blkg_policy_data {
565}; 586};
566 587
588struct blkcg_policy_data {
589};
590
567struct blkcg_gq { 591struct blkcg_gq {
568}; 592};
569 593
diff --git a/block/blk-core.c b/block/blk-core.c
index 03b5f8d77f37..f6ab750060fe 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -117,7 +117,7 @@ EXPORT_SYMBOL(blk_rq_init);
117static void req_bio_endio(struct request *rq, struct bio *bio, 117static void req_bio_endio(struct request *rq, struct bio *bio,
118 unsigned int nbytes, int error) 118 unsigned int nbytes, int error)
119{ 119{
120 if (error) 120 if (error && !(rq->cmd_flags & REQ_CLONE))
121 clear_bit(BIO_UPTODATE, &bio->bi_flags); 121 clear_bit(BIO_UPTODATE, &bio->bi_flags);
122 else if (!test_bit(BIO_UPTODATE, &bio->bi_flags)) 122 else if (!test_bit(BIO_UPTODATE, &bio->bi_flags))
123 error = -EIO; 123 error = -EIO;
@@ -128,7 +128,8 @@ static void req_bio_endio(struct request *rq, struct bio *bio,
128 bio_advance(bio, nbytes); 128 bio_advance(bio, nbytes);
129 129
130 /* don't actually finish bio if it's part of flush sequence */ 130 /* don't actually finish bio if it's part of flush sequence */
131 if (bio->bi_iter.bi_size == 0 && !(rq->cmd_flags & REQ_FLUSH_SEQ)) 131 if (bio->bi_iter.bi_size == 0 &&
132 !(rq->cmd_flags & (REQ_FLUSH_SEQ|REQ_CLONE)))
132 bio_endio(bio, error); 133 bio_endio(bio, error);
133} 134}
134 135
@@ -285,6 +286,7 @@ inline void __blk_run_queue_uncond(struct request_queue *q)
285 q->request_fn(q); 286 q->request_fn(q);
286 q->request_fn_active--; 287 q->request_fn_active--;
287} 288}
289EXPORT_SYMBOL_GPL(__blk_run_queue_uncond);
288 290
289/** 291/**
290 * __blk_run_queue - run a single device queue 292 * __blk_run_queue - run a single device queue
@@ -1525,7 +1527,8 @@ bool bio_attempt_front_merge(struct request_queue *q, struct request *req,
1525 * Caller must ensure !blk_queue_nomerges(q) beforehand. 1527 * Caller must ensure !blk_queue_nomerges(q) beforehand.
1526 */ 1528 */
1527bool blk_attempt_plug_merge(struct request_queue *q, struct bio *bio, 1529bool blk_attempt_plug_merge(struct request_queue *q, struct bio *bio,
1528 unsigned int *request_count) 1530 unsigned int *request_count,
1531 struct request **same_queue_rq)
1529{ 1532{
1530 struct blk_plug *plug; 1533 struct blk_plug *plug;
1531 struct request *rq; 1534 struct request *rq;
@@ -1545,8 +1548,16 @@ bool blk_attempt_plug_merge(struct request_queue *q, struct bio *bio,
1545 list_for_each_entry_reverse(rq, plug_list, queuelist) { 1548 list_for_each_entry_reverse(rq, plug_list, queuelist) {
1546 int el_ret; 1549 int el_ret;
1547 1550
1548 if (rq->q == q) 1551 if (rq->q == q) {
1549 (*request_count)++; 1552 (*request_count)++;
1553 /*
1554 * Only blk-mq multiple hardware queues case checks the
1555 * rq in the same queue, there should be only one such
1556 * rq in a queue
1557 **/
1558 if (same_queue_rq)
1559 *same_queue_rq = rq;
1560 }
1550 1561
1551 if (rq->q != q || !blk_rq_merge_ok(rq, bio)) 1562 if (rq->q != q || !blk_rq_merge_ok(rq, bio))
1552 continue; 1563 continue;
@@ -1611,7 +1622,7 @@ static void blk_queue_bio(struct request_queue *q, struct bio *bio)
1611 * any locks. 1622 * any locks.
1612 */ 1623 */
1613 if (!blk_queue_nomerges(q) && 1624 if (!blk_queue_nomerges(q) &&
1614 blk_attempt_plug_merge(q, bio, &request_count)) 1625 blk_attempt_plug_merge(q, bio, &request_count, NULL))
1615 return; 1626 return;
1616 1627
1617 spin_lock_irq(q->queue_lock); 1628 spin_lock_irq(q->queue_lock);
@@ -1718,8 +1729,6 @@ static void handle_bad_sector(struct bio *bio)
1718 bio->bi_rw, 1729 bio->bi_rw,
1719 (unsigned long long)bio_end_sector(bio), 1730 (unsigned long long)bio_end_sector(bio),
1720 (long long)(i_size_read(bio->bi_bdev->bd_inode) >> 9)); 1731 (long long)(i_size_read(bio->bi_bdev->bd_inode) >> 9));
1721
1722 set_bit(BIO_EOF, &bio->bi_flags);
1723} 1732}
1724 1733
1725#ifdef CONFIG_FAIL_MAKE_REQUEST 1734#ifdef CONFIG_FAIL_MAKE_REQUEST
@@ -2904,95 +2913,22 @@ int blk_lld_busy(struct request_queue *q)
2904} 2913}
2905EXPORT_SYMBOL_GPL(blk_lld_busy); 2914EXPORT_SYMBOL_GPL(blk_lld_busy);
2906 2915
2907/** 2916void blk_rq_prep_clone(struct request *dst, struct request *src)
2908 * blk_rq_unprep_clone - Helper function to free all bios in a cloned request
2909 * @rq: the clone request to be cleaned up
2910 *
2911 * Description:
2912 * Free all bios in @rq for a cloned request.
2913 */
2914void blk_rq_unprep_clone(struct request *rq)
2915{
2916 struct bio *bio;
2917
2918 while ((bio = rq->bio) != NULL) {
2919 rq->bio = bio->bi_next;
2920
2921 bio_put(bio);
2922 }
2923}
2924EXPORT_SYMBOL_GPL(blk_rq_unprep_clone);
2925
2926/*
2927 * Copy attributes of the original request to the clone request.
2928 * The actual data parts (e.g. ->cmd, ->sense) are not copied.
2929 */
2930static void __blk_rq_prep_clone(struct request *dst, struct request *src)
2931{ 2917{
2932 dst->cpu = src->cpu; 2918 dst->cpu = src->cpu;
2933 dst->cmd_flags |= (src->cmd_flags & REQ_CLONE_MASK) | REQ_NOMERGE; 2919 dst->cmd_flags |= (src->cmd_flags & REQ_CLONE_MASK);
2920 dst->cmd_flags |= REQ_NOMERGE | REQ_CLONE;
2934 dst->cmd_type = src->cmd_type; 2921 dst->cmd_type = src->cmd_type;
2935 dst->__sector = blk_rq_pos(src); 2922 dst->__sector = blk_rq_pos(src);
2936 dst->__data_len = blk_rq_bytes(src); 2923 dst->__data_len = blk_rq_bytes(src);
2937 dst->nr_phys_segments = src->nr_phys_segments; 2924 dst->nr_phys_segments = src->nr_phys_segments;
2938 dst->ioprio = src->ioprio; 2925 dst->ioprio = src->ioprio;
2939 dst->extra_len = src->extra_len; 2926 dst->extra_len = src->extra_len;
2940} 2927 dst->bio = src->bio;
2941 2928 dst->biotail = src->biotail;
2942/** 2929 dst->cmd = src->cmd;
2943 * blk_rq_prep_clone - Helper function to setup clone request 2930 dst->cmd_len = src->cmd_len;
2944 * @rq: the request to be setup 2931 dst->sense = src->sense;
2945 * @rq_src: original request to be cloned
2946 * @bs: bio_set that bios for clone are allocated from
2947 * @gfp_mask: memory allocation mask for bio
2948 * @bio_ctr: setup function to be called for each clone bio.
2949 * Returns %0 for success, non %0 for failure.
2950 * @data: private data to be passed to @bio_ctr
2951 *
2952 * Description:
2953 * Clones bios in @rq_src to @rq, and copies attributes of @rq_src to @rq.
2954 * The actual data parts of @rq_src (e.g. ->cmd, ->sense)
2955 * are not copied, and copying such parts is the caller's responsibility.
2956 * Also, pages which the original bios are pointing to are not copied
2957 * and the cloned bios just point same pages.
2958 * So cloned bios must be completed before original bios, which means
2959 * the caller must complete @rq before @rq_src.
2960 */
2961int blk_rq_prep_clone(struct request *rq, struct request *rq_src,
2962 struct bio_set *bs, gfp_t gfp_mask,
2963 int (*bio_ctr)(struct bio *, struct bio *, void *),
2964 void *data)
2965{
2966 struct bio *bio, *bio_src;
2967
2968 if (!bs)
2969 bs = fs_bio_set;
2970
2971 __rq_for_each_bio(bio_src, rq_src) {
2972 bio = bio_clone_fast(bio_src, gfp_mask, bs);
2973 if (!bio)
2974 goto free_and_out;
2975
2976 if (bio_ctr && bio_ctr(bio, bio_src, data))
2977 goto free_and_out;
2978
2979 if (rq->bio) {
2980 rq->biotail->bi_next = bio;
2981 rq->biotail = bio;
2982 } else
2983 rq->bio = rq->biotail = bio;
2984 }
2985
2986 __blk_rq_prep_clone(rq, rq_src);
2987
2988 return 0;
2989
2990free_and_out:
2991 if (bio)
2992 bio_put(bio);
2993 blk_rq_unprep_clone(rq);
2994
2995 return -ENOMEM;
2996} 2932}
2997EXPORT_SYMBOL_GPL(blk_rq_prep_clone); 2933EXPORT_SYMBOL_GPL(blk_rq_prep_clone);
2998 2934
@@ -3034,21 +2970,20 @@ void blk_start_plug(struct blk_plug *plug)
3034{ 2970{
3035 struct task_struct *tsk = current; 2971 struct task_struct *tsk = current;
3036 2972
2973 /*
2974 * If this is a nested plug, don't actually assign it.
2975 */
2976 if (tsk->plug)
2977 return;
2978
3037 INIT_LIST_HEAD(&plug->list); 2979 INIT_LIST_HEAD(&plug->list);
3038 INIT_LIST_HEAD(&plug->mq_list); 2980 INIT_LIST_HEAD(&plug->mq_list);
3039 INIT_LIST_HEAD(&plug->cb_list); 2981 INIT_LIST_HEAD(&plug->cb_list);
3040
3041 /* 2982 /*
3042 * If this is a nested plug, don't actually assign it. It will be 2983 * Store ordering should not be needed here, since a potential
3043 * flushed on its own. 2984 * preempt will imply a full memory barrier
3044 */ 2985 */
3045 if (!tsk->plug) { 2986 tsk->plug = plug;
3046 /*
3047 * Store ordering should not be needed here, since a potential
3048 * preempt will imply a full memory barrier
3049 */
3050 tsk->plug = plug;
3051 }
3052} 2987}
3053EXPORT_SYMBOL(blk_start_plug); 2988EXPORT_SYMBOL(blk_start_plug);
3054 2989
@@ -3195,10 +3130,11 @@ void blk_flush_plug_list(struct blk_plug *plug, bool from_schedule)
3195 3130
3196void blk_finish_plug(struct blk_plug *plug) 3131void blk_finish_plug(struct blk_plug *plug)
3197{ 3132{
3133 if (plug != current->plug)
3134 return;
3198 blk_flush_plug_list(plug, false); 3135 blk_flush_plug_list(plug, false);
3199 3136
3200 if (plug == current->plug) 3137 current->plug = NULL;
3201 current->plug = NULL;
3202} 3138}
3203EXPORT_SYMBOL(blk_finish_plug); 3139EXPORT_SYMBOL(blk_finish_plug);
3204 3140
diff --git a/block/blk-exec.c b/block/blk-exec.c
index 9924725fa50d..3fec8a29d0fa 100644
--- a/block/blk-exec.c
+++ b/block/blk-exec.c
@@ -53,7 +53,6 @@ void blk_execute_rq_nowait(struct request_queue *q, struct gendisk *bd_disk,
53 rq_end_io_fn *done) 53 rq_end_io_fn *done)
54{ 54{
55 int where = at_head ? ELEVATOR_INSERT_FRONT : ELEVATOR_INSERT_BACK; 55 int where = at_head ? ELEVATOR_INSERT_FRONT : ELEVATOR_INSERT_BACK;
56 bool is_pm_resume;
57 56
58 WARN_ON(irqs_disabled()); 57 WARN_ON(irqs_disabled());
59 WARN_ON(rq->cmd_type == REQ_TYPE_FS); 58 WARN_ON(rq->cmd_type == REQ_TYPE_FS);
@@ -70,12 +69,6 @@ void blk_execute_rq_nowait(struct request_queue *q, struct gendisk *bd_disk,
70 return; 69 return;
71 } 70 }
72 71
73 /*
74 * need to check this before __blk_run_queue(), because rq can
75 * be freed before that returns.
76 */
77 is_pm_resume = rq->cmd_type == REQ_TYPE_PM_RESUME;
78
79 spin_lock_irq(q->queue_lock); 72 spin_lock_irq(q->queue_lock);
80 73
81 if (unlikely(blk_queue_dying(q))) { 74 if (unlikely(blk_queue_dying(q))) {
@@ -88,9 +81,6 @@ void blk_execute_rq_nowait(struct request_queue *q, struct gendisk *bd_disk,
88 81
89 __elv_add_request(q, rq, where); 82 __elv_add_request(q, rq, where);
90 __blk_run_queue(q); 83 __blk_run_queue(q);
91 /* the queue is stopped so it won't be run */
92 if (is_pm_resume)
93 __blk_run_queue_uncond(q);
94 spin_unlock_irq(q->queue_lock); 84 spin_unlock_irq(q->queue_lock);
95} 85}
96EXPORT_SYMBOL_GPL(blk_execute_rq_nowait); 86EXPORT_SYMBOL_GPL(blk_execute_rq_nowait);
diff --git a/block/blk-merge.c b/block/blk-merge.c
index fd3fee81c23c..30a0d9f89017 100644
--- a/block/blk-merge.c
+++ b/block/blk-merge.c
@@ -589,7 +589,8 @@ bool blk_rq_merge_ok(struct request *rq, struct bio *bio)
589 !blk_write_same_mergeable(rq->bio, bio)) 589 !blk_write_same_mergeable(rq->bio, bio))
590 return false; 590 return false;
591 591
592 if (q->queue_flags & (1 << QUEUE_FLAG_SG_GAPS)) { 592 /* Only check gaps if the bio carries data */
593 if (q->queue_flags & (1 << QUEUE_FLAG_SG_GAPS) && bio_has_data(bio)) {
593 struct bio_vec *bprev; 594 struct bio_vec *bprev;
594 595
595 bprev = &rq->biotail->bi_io_vec[rq->biotail->bi_vcnt - 1]; 596 bprev = &rq->biotail->bi_io_vec[rq->biotail->bi_vcnt - 1];
diff --git a/block/blk-mq-tag.c b/block/blk-mq-tag.c
index be3290cc0644..9b6e28830b82 100644
--- a/block/blk-mq-tag.c
+++ b/block/blk-mq-tag.c
@@ -438,6 +438,39 @@ static void bt_for_each(struct blk_mq_hw_ctx *hctx,
438 } 438 }
439} 439}
440 440
441static void bt_tags_for_each(struct blk_mq_tags *tags,
442 struct blk_mq_bitmap_tags *bt, unsigned int off,
443 busy_tag_iter_fn *fn, void *data, bool reserved)
444{
445 struct request *rq;
446 int bit, i;
447
448 if (!tags->rqs)
449 return;
450 for (i = 0; i < bt->map_nr; i++) {
451 struct blk_align_bitmap *bm = &bt->map[i];
452
453 for (bit = find_first_bit(&bm->word, bm->depth);
454 bit < bm->depth;
455 bit = find_next_bit(&bm->word, bm->depth, bit + 1)) {
456 rq = blk_mq_tag_to_rq(tags, off + bit);
457 fn(rq, data, reserved);
458 }
459
460 off += (1 << bt->bits_per_word);
461 }
462}
463
464void blk_mq_all_tag_busy_iter(struct blk_mq_tags *tags, busy_tag_iter_fn *fn,
465 void *priv)
466{
467 if (tags->nr_reserved_tags)
468 bt_tags_for_each(tags, &tags->breserved_tags, 0, fn, priv, true);
469 bt_tags_for_each(tags, &tags->bitmap_tags, tags->nr_reserved_tags, fn, priv,
470 false);
471}
472EXPORT_SYMBOL(blk_mq_all_tag_busy_iter);
473
441void blk_mq_tag_busy_iter(struct blk_mq_hw_ctx *hctx, busy_iter_fn *fn, 474void blk_mq_tag_busy_iter(struct blk_mq_hw_ctx *hctx, busy_iter_fn *fn,
442 void *priv) 475 void *priv)
443{ 476{
@@ -580,6 +613,11 @@ struct blk_mq_tags *blk_mq_init_tags(unsigned int total_tags,
580 if (!tags) 613 if (!tags)
581 return NULL; 614 return NULL;
582 615
616 if (!zalloc_cpumask_var(&tags->cpumask, GFP_KERNEL)) {
617 kfree(tags);
618 return NULL;
619 }
620
583 tags->nr_tags = total_tags; 621 tags->nr_tags = total_tags;
584 tags->nr_reserved_tags = reserved_tags; 622 tags->nr_reserved_tags = reserved_tags;
585 623
diff --git a/block/blk-mq-tag.h b/block/blk-mq-tag.h
index 90767b370308..75893a34237d 100644
--- a/block/blk-mq-tag.h
+++ b/block/blk-mq-tag.h
@@ -44,6 +44,7 @@ struct blk_mq_tags {
44 struct list_head page_list; 44 struct list_head page_list;
45 45
46 int alloc_policy; 46 int alloc_policy;
47 cpumask_var_t cpumask;
47}; 48};
48 49
49 50
diff --git a/block/blk-mq.c b/block/blk-mq.c
index 594eea04266e..f53779692c77 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -89,7 +89,8 @@ static int blk_mq_queue_enter(struct request_queue *q, gfp_t gfp)
89 return -EBUSY; 89 return -EBUSY;
90 90
91 ret = wait_event_interruptible(q->mq_freeze_wq, 91 ret = wait_event_interruptible(q->mq_freeze_wq,
92 !q->mq_freeze_depth || blk_queue_dying(q)); 92 !atomic_read(&q->mq_freeze_depth) ||
93 blk_queue_dying(q));
93 if (blk_queue_dying(q)) 94 if (blk_queue_dying(q))
94 return -ENODEV; 95 return -ENODEV;
95 if (ret) 96 if (ret)
@@ -112,13 +113,10 @@ static void blk_mq_usage_counter_release(struct percpu_ref *ref)
112 113
113void blk_mq_freeze_queue_start(struct request_queue *q) 114void blk_mq_freeze_queue_start(struct request_queue *q)
114{ 115{
115 bool freeze; 116 int freeze_depth;
116 117
117 spin_lock_irq(q->queue_lock); 118 freeze_depth = atomic_inc_return(&q->mq_freeze_depth);
118 freeze = !q->mq_freeze_depth++; 119 if (freeze_depth == 1) {
119 spin_unlock_irq(q->queue_lock);
120
121 if (freeze) {
122 percpu_ref_kill(&q->mq_usage_counter); 120 percpu_ref_kill(&q->mq_usage_counter);
123 blk_mq_run_hw_queues(q, false); 121 blk_mq_run_hw_queues(q, false);
124 } 122 }
@@ -143,13 +141,11 @@ EXPORT_SYMBOL_GPL(blk_mq_freeze_queue);
143 141
144void blk_mq_unfreeze_queue(struct request_queue *q) 142void blk_mq_unfreeze_queue(struct request_queue *q)
145{ 143{
146 bool wake; 144 int freeze_depth;
147 145
148 spin_lock_irq(q->queue_lock); 146 freeze_depth = atomic_dec_return(&q->mq_freeze_depth);
149 wake = !--q->mq_freeze_depth; 147 WARN_ON_ONCE(freeze_depth < 0);
150 WARN_ON_ONCE(q->mq_freeze_depth < 0); 148 if (!freeze_depth) {
151 spin_unlock_irq(q->queue_lock);
152 if (wake) {
153 percpu_ref_reinit(&q->mq_usage_counter); 149 percpu_ref_reinit(&q->mq_usage_counter);
154 wake_up_all(&q->mq_freeze_wq); 150 wake_up_all(&q->mq_freeze_wq);
155 } 151 }
@@ -1237,6 +1233,38 @@ static struct request *blk_mq_map_request(struct request_queue *q,
1237 return rq; 1233 return rq;
1238} 1234}
1239 1235
1236static int blk_mq_direct_issue_request(struct request *rq)
1237{
1238 int ret;
1239 struct request_queue *q = rq->q;
1240 struct blk_mq_hw_ctx *hctx = q->mq_ops->map_queue(q,
1241 rq->mq_ctx->cpu);
1242 struct blk_mq_queue_data bd = {
1243 .rq = rq,
1244 .list = NULL,
1245 .last = 1
1246 };
1247
1248 /*
1249 * For OK queue, we are done. For error, kill it. Any other
1250 * error (busy), just add it to our list as we previously
1251 * would have done
1252 */
1253 ret = q->mq_ops->queue_rq(hctx, &bd);
1254 if (ret == BLK_MQ_RQ_QUEUE_OK)
1255 return 0;
1256 else {
1257 __blk_mq_requeue_request(rq);
1258
1259 if (ret == BLK_MQ_RQ_QUEUE_ERROR) {
1260 rq->errors = -EIO;
1261 blk_mq_end_request(rq, rq->errors);
1262 return 0;
1263 }
1264 return -1;
1265 }
1266}
1267
1240/* 1268/*
1241 * Multiple hardware queue variant. This will not use per-process plugs, 1269 * Multiple hardware queue variant. This will not use per-process plugs,
1242 * but will attempt to bypass the hctx queueing if we can go straight to 1270 * but will attempt to bypass the hctx queueing if we can go straight to
@@ -1248,6 +1276,9 @@ static void blk_mq_make_request(struct request_queue *q, struct bio *bio)
1248 const int is_flush_fua = bio->bi_rw & (REQ_FLUSH | REQ_FUA); 1276 const int is_flush_fua = bio->bi_rw & (REQ_FLUSH | REQ_FUA);
1249 struct blk_map_ctx data; 1277 struct blk_map_ctx data;
1250 struct request *rq; 1278 struct request *rq;
1279 unsigned int request_count = 0;
1280 struct blk_plug *plug;
1281 struct request *same_queue_rq = NULL;
1251 1282
1252 blk_queue_bounce(q, &bio); 1283 blk_queue_bounce(q, &bio);
1253 1284
@@ -1256,6 +1287,10 @@ static void blk_mq_make_request(struct request_queue *q, struct bio *bio)
1256 return; 1287 return;
1257 } 1288 }
1258 1289
1290 if (!is_flush_fua && !blk_queue_nomerges(q) &&
1291 blk_attempt_plug_merge(q, bio, &request_count, &same_queue_rq))
1292 return;
1293
1259 rq = blk_mq_map_request(q, bio, &data); 1294 rq = blk_mq_map_request(q, bio, &data);
1260 if (unlikely(!rq)) 1295 if (unlikely(!rq))
1261 return; 1296 return;
@@ -1266,38 +1301,42 @@ static void blk_mq_make_request(struct request_queue *q, struct bio *bio)
1266 goto run_queue; 1301 goto run_queue;
1267 } 1302 }
1268 1303
1304 plug = current->plug;
1269 /* 1305 /*
1270 * If the driver supports defer issued based on 'last', then 1306 * If the driver supports defer issued based on 'last', then
1271 * queue it up like normal since we can potentially save some 1307 * queue it up like normal since we can potentially save some
1272 * CPU this way. 1308 * CPU this way.
1273 */ 1309 */
1274 if (is_sync && !(data.hctx->flags & BLK_MQ_F_DEFER_ISSUE)) { 1310 if (((plug && !blk_queue_nomerges(q)) || is_sync) &&
1275 struct blk_mq_queue_data bd = { 1311 !(data.hctx->flags & BLK_MQ_F_DEFER_ISSUE)) {
1276 .rq = rq, 1312 struct request *old_rq = NULL;
1277 .list = NULL,
1278 .last = 1
1279 };
1280 int ret;
1281 1313
1282 blk_mq_bio_to_request(rq, bio); 1314 blk_mq_bio_to_request(rq, bio);
1283 1315
1284 /* 1316 /*
1285 * For OK queue, we are done. For error, kill it. Any other 1317 * we do limited pluging. If bio can be merged, do merge.
1286 * error (busy), just add it to our list as we previously 1318 * Otherwise the existing request in the plug list will be
1287 * would have done 1319 * issued. So the plug list will have one request at most
1288 */ 1320 */
1289 ret = q->mq_ops->queue_rq(data.hctx, &bd); 1321 if (plug) {
1290 if (ret == BLK_MQ_RQ_QUEUE_OK) 1322 /*
1291 goto done; 1323 * The plug list might get flushed before this. If that
1292 else { 1324 * happens, same_queue_rq is invalid and plug list is empty
1293 __blk_mq_requeue_request(rq); 1325 **/
1294 1326 if (same_queue_rq && !list_empty(&plug->mq_list)) {
1295 if (ret == BLK_MQ_RQ_QUEUE_ERROR) { 1327 old_rq = same_queue_rq;
1296 rq->errors = -EIO; 1328 list_del_init(&old_rq->queuelist);
1297 blk_mq_end_request(rq, rq->errors);
1298 goto done;
1299 } 1329 }
1300 } 1330 list_add_tail(&rq->queuelist, &plug->mq_list);
1331 } else /* is_sync */
1332 old_rq = rq;
1333 blk_mq_put_ctx(data.ctx);
1334 if (!old_rq)
1335 return;
1336 if (!blk_mq_direct_issue_request(old_rq))
1337 return;
1338 blk_mq_insert_request(old_rq, false, true, true);
1339 return;
1301 } 1340 }
1302 1341
1303 if (!blk_mq_merge_queue_io(data.hctx, data.ctx, rq, bio)) { 1342 if (!blk_mq_merge_queue_io(data.hctx, data.ctx, rq, bio)) {
@@ -1310,7 +1349,6 @@ static void blk_mq_make_request(struct request_queue *q, struct bio *bio)
1310run_queue: 1349run_queue:
1311 blk_mq_run_hw_queue(data.hctx, !is_sync || is_flush_fua); 1350 blk_mq_run_hw_queue(data.hctx, !is_sync || is_flush_fua);
1312 } 1351 }
1313done:
1314 blk_mq_put_ctx(data.ctx); 1352 blk_mq_put_ctx(data.ctx);
1315} 1353}
1316 1354
@@ -1322,16 +1360,11 @@ static void blk_sq_make_request(struct request_queue *q, struct bio *bio)
1322{ 1360{
1323 const int is_sync = rw_is_sync(bio->bi_rw); 1361 const int is_sync = rw_is_sync(bio->bi_rw);
1324 const int is_flush_fua = bio->bi_rw & (REQ_FLUSH | REQ_FUA); 1362 const int is_flush_fua = bio->bi_rw & (REQ_FLUSH | REQ_FUA);
1325 unsigned int use_plug, request_count = 0; 1363 struct blk_plug *plug;
1364 unsigned int request_count = 0;
1326 struct blk_map_ctx data; 1365 struct blk_map_ctx data;
1327 struct request *rq; 1366 struct request *rq;
1328 1367
1329 /*
1330 * If we have multiple hardware queues, just go directly to
1331 * one of those for sync IO.
1332 */
1333 use_plug = !is_flush_fua && !is_sync;
1334
1335 blk_queue_bounce(q, &bio); 1368 blk_queue_bounce(q, &bio);
1336 1369
1337 if (bio_integrity_enabled(bio) && bio_integrity_prep(bio)) { 1370 if (bio_integrity_enabled(bio) && bio_integrity_prep(bio)) {
@@ -1339,8 +1372,8 @@ static void blk_sq_make_request(struct request_queue *q, struct bio *bio)
1339 return; 1372 return;
1340 } 1373 }
1341 1374
1342 if (use_plug && !blk_queue_nomerges(q) && 1375 if (!is_flush_fua && !blk_queue_nomerges(q) &&
1343 blk_attempt_plug_merge(q, bio, &request_count)) 1376 blk_attempt_plug_merge(q, bio, &request_count, NULL))
1344 return; 1377 return;
1345 1378
1346 rq = blk_mq_map_request(q, bio, &data); 1379 rq = blk_mq_map_request(q, bio, &data);
@@ -1358,21 +1391,18 @@ static void blk_sq_make_request(struct request_queue *q, struct bio *bio)
1358 * utilize that to temporarily store requests until the task is 1391 * utilize that to temporarily store requests until the task is
1359 * either done or scheduled away. 1392 * either done or scheduled away.
1360 */ 1393 */
1361 if (use_plug) { 1394 plug = current->plug;
1362 struct blk_plug *plug = current->plug; 1395 if (plug) {
1363 1396 blk_mq_bio_to_request(rq, bio);
1364 if (plug) { 1397 if (list_empty(&plug->mq_list))
1365 blk_mq_bio_to_request(rq, bio); 1398 trace_block_plug(q);
1366 if (list_empty(&plug->mq_list)) 1399 else if (request_count >= BLK_MAX_REQUEST_COUNT) {
1367 trace_block_plug(q); 1400 blk_flush_plug_list(plug, false);
1368 else if (request_count >= BLK_MAX_REQUEST_COUNT) { 1401 trace_block_plug(q);
1369 blk_flush_plug_list(plug, false);
1370 trace_block_plug(q);
1371 }
1372 list_add_tail(&rq->queuelist, &plug->mq_list);
1373 blk_mq_put_ctx(data.ctx);
1374 return;
1375 } 1402 }
1403 list_add_tail(&rq->queuelist, &plug->mq_list);
1404 blk_mq_put_ctx(data.ctx);
1405 return;
1376 } 1406 }
1377 1407
1378 if (!blk_mq_merge_queue_io(data.hctx, data.ctx, rq, bio)) { 1408 if (!blk_mq_merge_queue_io(data.hctx, data.ctx, rq, bio)) {
@@ -1508,7 +1538,6 @@ static struct blk_mq_tags *blk_mq_init_rq_map(struct blk_mq_tag_set *set,
1508 i++; 1538 i++;
1509 } 1539 }
1510 } 1540 }
1511
1512 return tags; 1541 return tags;
1513 1542
1514fail: 1543fail:
@@ -1792,6 +1821,7 @@ static void blk_mq_map_swqueue(struct request_queue *q)
1792 1821
1793 hctx = q->mq_ops->map_queue(q, i); 1822 hctx = q->mq_ops->map_queue(q, i);
1794 cpumask_set_cpu(i, hctx->cpumask); 1823 cpumask_set_cpu(i, hctx->cpumask);
1824 cpumask_set_cpu(i, hctx->tags->cpumask);
1795 ctx->index_hw = hctx->nr_ctx; 1825 ctx->index_hw = hctx->nr_ctx;
1796 hctx->ctxs[hctx->nr_ctx++] = ctx; 1826 hctx->ctxs[hctx->nr_ctx++] = ctx;
1797 } 1827 }
@@ -2056,7 +2086,7 @@ void blk_mq_free_queue(struct request_queue *q)
2056/* Basically redo blk_mq_init_queue with queue frozen */ 2086/* Basically redo blk_mq_init_queue with queue frozen */
2057static void blk_mq_queue_reinit(struct request_queue *q) 2087static void blk_mq_queue_reinit(struct request_queue *q)
2058{ 2088{
2059 WARN_ON_ONCE(!q->mq_freeze_depth); 2089 WARN_ON_ONCE(!atomic_read(&q->mq_freeze_depth));
2060 2090
2061 blk_mq_sysfs_unregister(q); 2091 blk_mq_sysfs_unregister(q);
2062 2092
@@ -2173,6 +2203,12 @@ static int blk_mq_alloc_rq_maps(struct blk_mq_tag_set *set)
2173 return 0; 2203 return 0;
2174} 2204}
2175 2205
2206struct cpumask *blk_mq_tags_cpumask(struct blk_mq_tags *tags)
2207{
2208 return tags->cpumask;
2209}
2210EXPORT_SYMBOL_GPL(blk_mq_tags_cpumask);
2211
2176/* 2212/*
2177 * Alloc a tag set to be associated with one or more request queues. 2213 * Alloc a tag set to be associated with one or more request queues.
2178 * May fail with EINVAL for various error conditions. May adjust the 2214 * May fail with EINVAL for various error conditions. May adjust the
@@ -2234,8 +2270,10 @@ void blk_mq_free_tag_set(struct blk_mq_tag_set *set)
2234 int i; 2270 int i;
2235 2271
2236 for (i = 0; i < set->nr_hw_queues; i++) { 2272 for (i = 0; i < set->nr_hw_queues; i++) {
2237 if (set->tags[i]) 2273 if (set->tags[i]) {
2238 blk_mq_free_rq_map(set, set->tags[i], i); 2274 blk_mq_free_rq_map(set, set->tags[i], i);
2275 free_cpumask_var(set->tags[i]->cpumask);
2276 }
2239 } 2277 }
2240 2278
2241 kfree(set->tags); 2279 kfree(set->tags);
diff --git a/block/blk.h b/block/blk.h
index 43b036185712..026d9594142b 100644
--- a/block/blk.h
+++ b/block/blk.h
@@ -78,7 +78,8 @@ bool bio_attempt_front_merge(struct request_queue *q, struct request *req,
78bool bio_attempt_back_merge(struct request_queue *q, struct request *req, 78bool bio_attempt_back_merge(struct request_queue *q, struct request *req,
79 struct bio *bio); 79 struct bio *bio);
80bool blk_attempt_plug_merge(struct request_queue *q, struct bio *bio, 80bool blk_attempt_plug_merge(struct request_queue *q, struct bio *bio,
81 unsigned int *request_count); 81 unsigned int *request_count,
82 struct request **same_queue_rq);
82 83
83void blk_account_io_start(struct request *req, bool new_io); 84void blk_account_io_start(struct request *req, bool new_io);
84void blk_account_io_completion(struct request *req, unsigned int bytes); 85void blk_account_io_completion(struct request *req, unsigned int bytes);
@@ -193,8 +194,6 @@ int blk_try_merge(struct request *rq, struct bio *bio);
193 194
194void blk_queue_congestion_threshold(struct request_queue *q); 195void blk_queue_congestion_threshold(struct request_queue *q);
195 196
196void __blk_run_queue_uncond(struct request_queue *q);
197
198int blk_dev_init(void); 197int blk_dev_init(void);
199 198
200 199
diff --git a/block/bounce.c b/block/bounce.c
index ed9dd8067120..3ab0bce1c947 100644
--- a/block/bounce.c
+++ b/block/bounce.c
@@ -128,9 +128,6 @@ static void bounce_end_io(struct bio *bio, mempool_t *pool, int err)
128 struct bio_vec *bvec, *org_vec; 128 struct bio_vec *bvec, *org_vec;
129 int i; 129 int i;
130 130
131 if (test_bit(BIO_EOPNOTSUPP, &bio->bi_flags))
132 set_bit(BIO_EOPNOTSUPP, &bio_orig->bi_flags);
133
134 /* 131 /*
135 * free up bounce indirect pages used 132 * free up bounce indirect pages used
136 */ 133 */
diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c
index 5da8e6e9ab4b..d8ad45ccd8fa 100644
--- a/block/cfq-iosched.c
+++ b/block/cfq-iosched.c
@@ -67,6 +67,11 @@ static struct kmem_cache *cfq_pool;
67#define sample_valid(samples) ((samples) > 80) 67#define sample_valid(samples) ((samples) > 80)
68#define rb_entry_cfqg(node) rb_entry((node), struct cfq_group, rb_node) 68#define rb_entry_cfqg(node) rb_entry((node), struct cfq_group, rb_node)
69 69
70/* blkio-related constants */
71#define CFQ_WEIGHT_MIN 10
72#define CFQ_WEIGHT_MAX 1000
73#define CFQ_WEIGHT_DEFAULT 500
74
70struct cfq_ttime { 75struct cfq_ttime {
71 unsigned long last_end_request; 76 unsigned long last_end_request;
72 77
@@ -212,6 +217,15 @@ struct cfqg_stats {
212#endif /* CONFIG_CFQ_GROUP_IOSCHED */ 217#endif /* CONFIG_CFQ_GROUP_IOSCHED */
213}; 218};
214 219
220/* Per-cgroup data */
221struct cfq_group_data {
222 /* must be the first member */
223 struct blkcg_policy_data pd;
224
225 unsigned int weight;
226 unsigned int leaf_weight;
227};
228
215/* This is per cgroup per device grouping structure */ 229/* This is per cgroup per device grouping structure */
216struct cfq_group { 230struct cfq_group {
217 /* must be the first member */ 231 /* must be the first member */
@@ -446,16 +460,6 @@ CFQ_CFQQ_FNS(deep);
446CFQ_CFQQ_FNS(wait_busy); 460CFQ_CFQQ_FNS(wait_busy);
447#undef CFQ_CFQQ_FNS 461#undef CFQ_CFQQ_FNS
448 462
449static inline struct cfq_group *pd_to_cfqg(struct blkg_policy_data *pd)
450{
451 return pd ? container_of(pd, struct cfq_group, pd) : NULL;
452}
453
454static inline struct blkcg_gq *cfqg_to_blkg(struct cfq_group *cfqg)
455{
456 return pd_to_blkg(&cfqg->pd);
457}
458
459#if defined(CONFIG_CFQ_GROUP_IOSCHED) && defined(CONFIG_DEBUG_BLK_CGROUP) 463#if defined(CONFIG_CFQ_GROUP_IOSCHED) && defined(CONFIG_DEBUG_BLK_CGROUP)
460 464
461/* cfqg stats flags */ 465/* cfqg stats flags */
@@ -600,6 +604,22 @@ static inline void cfqg_stats_update_avg_queue_size(struct cfq_group *cfqg) { }
600 604
601#ifdef CONFIG_CFQ_GROUP_IOSCHED 605#ifdef CONFIG_CFQ_GROUP_IOSCHED
602 606
607static inline struct cfq_group *pd_to_cfqg(struct blkg_policy_data *pd)
608{
609 return pd ? container_of(pd, struct cfq_group, pd) : NULL;
610}
611
612static struct cfq_group_data
613*cpd_to_cfqgd(struct blkcg_policy_data *cpd)
614{
615 return cpd ? container_of(cpd, struct cfq_group_data, pd) : NULL;
616}
617
618static inline struct blkcg_gq *cfqg_to_blkg(struct cfq_group *cfqg)
619{
620 return pd_to_blkg(&cfqg->pd);
621}
622
603static struct blkcg_policy blkcg_policy_cfq; 623static struct blkcg_policy blkcg_policy_cfq;
604 624
605static inline struct cfq_group *blkg_to_cfqg(struct blkcg_gq *blkg) 625static inline struct cfq_group *blkg_to_cfqg(struct blkcg_gq *blkg)
@@ -607,6 +627,11 @@ static inline struct cfq_group *blkg_to_cfqg(struct blkcg_gq *blkg)
607 return pd_to_cfqg(blkg_to_pd(blkg, &blkcg_policy_cfq)); 627 return pd_to_cfqg(blkg_to_pd(blkg, &blkcg_policy_cfq));
608} 628}
609 629
630static struct cfq_group_data *blkcg_to_cfqgd(struct blkcg *blkcg)
631{
632 return cpd_to_cfqgd(blkcg_to_cpd(blkcg, &blkcg_policy_cfq));
633}
634
610static inline struct cfq_group *cfqg_parent(struct cfq_group *cfqg) 635static inline struct cfq_group *cfqg_parent(struct cfq_group *cfqg)
611{ 636{
612 struct blkcg_gq *pblkg = cfqg_to_blkg(cfqg)->parent; 637 struct blkcg_gq *pblkg = cfqg_to_blkg(cfqg)->parent;
@@ -1544,13 +1569,28 @@ static void cfqg_stats_init(struct cfqg_stats *stats)
1544#endif 1569#endif
1545} 1570}
1546 1571
1572static void cfq_cpd_init(const struct blkcg *blkcg)
1573{
1574 struct cfq_group_data *cgd =
1575 cpd_to_cfqgd(blkcg->pd[blkcg_policy_cfq.plid]);
1576
1577 if (blkcg == &blkcg_root) {
1578 cgd->weight = 2 * CFQ_WEIGHT_DEFAULT;
1579 cgd->leaf_weight = 2 * CFQ_WEIGHT_DEFAULT;
1580 } else {
1581 cgd->weight = CFQ_WEIGHT_DEFAULT;
1582 cgd->leaf_weight = CFQ_WEIGHT_DEFAULT;
1583 }
1584}
1585
1547static void cfq_pd_init(struct blkcg_gq *blkg) 1586static void cfq_pd_init(struct blkcg_gq *blkg)
1548{ 1587{
1549 struct cfq_group *cfqg = blkg_to_cfqg(blkg); 1588 struct cfq_group *cfqg = blkg_to_cfqg(blkg);
1589 struct cfq_group_data *cgd = blkcg_to_cfqgd(blkg->blkcg);
1550 1590
1551 cfq_init_cfqg_base(cfqg); 1591 cfq_init_cfqg_base(cfqg);
1552 cfqg->weight = blkg->blkcg->cfq_weight; 1592 cfqg->weight = cgd->weight;
1553 cfqg->leaf_weight = blkg->blkcg->cfq_leaf_weight; 1593 cfqg->leaf_weight = cgd->leaf_weight;
1554 cfqg_stats_init(&cfqg->stats); 1594 cfqg_stats_init(&cfqg->stats);
1555 cfqg_stats_init(&cfqg->dead_stats); 1595 cfqg_stats_init(&cfqg->dead_stats);
1556} 1596}
@@ -1673,13 +1713,27 @@ static int cfqg_print_leaf_weight_device(struct seq_file *sf, void *v)
1673 1713
1674static int cfq_print_weight(struct seq_file *sf, void *v) 1714static int cfq_print_weight(struct seq_file *sf, void *v)
1675{ 1715{
1676 seq_printf(sf, "%u\n", css_to_blkcg(seq_css(sf))->cfq_weight); 1716 struct blkcg *blkcg = css_to_blkcg(seq_css(sf));
1717 struct cfq_group_data *cgd = blkcg_to_cfqgd(blkcg);
1718 unsigned int val = 0;
1719
1720 if (cgd)
1721 val = cgd->weight;
1722
1723 seq_printf(sf, "%u\n", val);
1677 return 0; 1724 return 0;
1678} 1725}
1679 1726
1680static int cfq_print_leaf_weight(struct seq_file *sf, void *v) 1727static int cfq_print_leaf_weight(struct seq_file *sf, void *v)
1681{ 1728{
1682 seq_printf(sf, "%u\n", css_to_blkcg(seq_css(sf))->cfq_leaf_weight); 1729 struct blkcg *blkcg = css_to_blkcg(seq_css(sf));
1730 struct cfq_group_data *cgd = blkcg_to_cfqgd(blkcg);
1731 unsigned int val = 0;
1732
1733 if (cgd)
1734 val = cgd->leaf_weight;
1735
1736 seq_printf(sf, "%u\n", val);
1683 return 0; 1737 return 0;
1684} 1738}
1685 1739
@@ -1690,6 +1744,7 @@ static ssize_t __cfqg_set_weight_device(struct kernfs_open_file *of,
1690 struct blkcg *blkcg = css_to_blkcg(of_css(of)); 1744 struct blkcg *blkcg = css_to_blkcg(of_css(of));
1691 struct blkg_conf_ctx ctx; 1745 struct blkg_conf_ctx ctx;
1692 struct cfq_group *cfqg; 1746 struct cfq_group *cfqg;
1747 struct cfq_group_data *cfqgd;
1693 int ret; 1748 int ret;
1694 1749
1695 ret = blkg_conf_prep(blkcg, &blkcg_policy_cfq, buf, &ctx); 1750 ret = blkg_conf_prep(blkcg, &blkcg_policy_cfq, buf, &ctx);
@@ -1698,17 +1753,22 @@ static ssize_t __cfqg_set_weight_device(struct kernfs_open_file *of,
1698 1753
1699 ret = -EINVAL; 1754 ret = -EINVAL;
1700 cfqg = blkg_to_cfqg(ctx.blkg); 1755 cfqg = blkg_to_cfqg(ctx.blkg);
1756 cfqgd = blkcg_to_cfqgd(blkcg);
1757 if (!cfqg || !cfqgd)
1758 goto err;
1759
1701 if (!ctx.v || (ctx.v >= CFQ_WEIGHT_MIN && ctx.v <= CFQ_WEIGHT_MAX)) { 1760 if (!ctx.v || (ctx.v >= CFQ_WEIGHT_MIN && ctx.v <= CFQ_WEIGHT_MAX)) {
1702 if (!is_leaf_weight) { 1761 if (!is_leaf_weight) {
1703 cfqg->dev_weight = ctx.v; 1762 cfqg->dev_weight = ctx.v;
1704 cfqg->new_weight = ctx.v ?: blkcg->cfq_weight; 1763 cfqg->new_weight = ctx.v ?: cfqgd->weight;
1705 } else { 1764 } else {
1706 cfqg->dev_leaf_weight = ctx.v; 1765 cfqg->dev_leaf_weight = ctx.v;
1707 cfqg->new_leaf_weight = ctx.v ?: blkcg->cfq_leaf_weight; 1766 cfqg->new_leaf_weight = ctx.v ?: cfqgd->leaf_weight;
1708 } 1767 }
1709 ret = 0; 1768 ret = 0;
1710 } 1769 }
1711 1770
1771err:
1712 blkg_conf_finish(&ctx); 1772 blkg_conf_finish(&ctx);
1713 return ret ?: nbytes; 1773 return ret ?: nbytes;
1714} 1774}
@@ -1730,16 +1790,23 @@ static int __cfq_set_weight(struct cgroup_subsys_state *css, struct cftype *cft,
1730{ 1790{
1731 struct blkcg *blkcg = css_to_blkcg(css); 1791 struct blkcg *blkcg = css_to_blkcg(css);
1732 struct blkcg_gq *blkg; 1792 struct blkcg_gq *blkg;
1793 struct cfq_group_data *cfqgd;
1794 int ret = 0;
1733 1795
1734 if (val < CFQ_WEIGHT_MIN || val > CFQ_WEIGHT_MAX) 1796 if (val < CFQ_WEIGHT_MIN || val > CFQ_WEIGHT_MAX)
1735 return -EINVAL; 1797 return -EINVAL;
1736 1798
1737 spin_lock_irq(&blkcg->lock); 1799 spin_lock_irq(&blkcg->lock);
1800 cfqgd = blkcg_to_cfqgd(blkcg);
1801 if (!cfqgd) {
1802 ret = -EINVAL;
1803 goto out;
1804 }
1738 1805
1739 if (!is_leaf_weight) 1806 if (!is_leaf_weight)
1740 blkcg->cfq_weight = val; 1807 cfqgd->weight = val;
1741 else 1808 else
1742 blkcg->cfq_leaf_weight = val; 1809 cfqgd->leaf_weight = val;
1743 1810
1744 hlist_for_each_entry(blkg, &blkcg->blkg_list, blkcg_node) { 1811 hlist_for_each_entry(blkg, &blkcg->blkg_list, blkcg_node) {
1745 struct cfq_group *cfqg = blkg_to_cfqg(blkg); 1812 struct cfq_group *cfqg = blkg_to_cfqg(blkg);
@@ -1749,15 +1816,16 @@ static int __cfq_set_weight(struct cgroup_subsys_state *css, struct cftype *cft,
1749 1816
1750 if (!is_leaf_weight) { 1817 if (!is_leaf_weight) {
1751 if (!cfqg->dev_weight) 1818 if (!cfqg->dev_weight)
1752 cfqg->new_weight = blkcg->cfq_weight; 1819 cfqg->new_weight = cfqgd->weight;
1753 } else { 1820 } else {
1754 if (!cfqg->dev_leaf_weight) 1821 if (!cfqg->dev_leaf_weight)
1755 cfqg->new_leaf_weight = blkcg->cfq_leaf_weight; 1822 cfqg->new_leaf_weight = cfqgd->leaf_weight;
1756 } 1823 }
1757 } 1824 }
1758 1825
1826out:
1759 spin_unlock_irq(&blkcg->lock); 1827 spin_unlock_irq(&blkcg->lock);
1760 return 0; 1828 return ret;
1761} 1829}
1762 1830
1763static int cfq_set_weight(struct cgroup_subsys_state *css, struct cftype *cft, 1831static int cfq_set_weight(struct cgroup_subsys_state *css, struct cftype *cft,
@@ -4477,6 +4545,18 @@ out_free:
4477 return ret; 4545 return ret;
4478} 4546}
4479 4547
4548static void cfq_registered_queue(struct request_queue *q)
4549{
4550 struct elevator_queue *e = q->elevator;
4551 struct cfq_data *cfqd = e->elevator_data;
4552
4553 /*
4554 * Default to IOPS mode with no idling for SSDs
4555 */
4556 if (blk_queue_nonrot(q))
4557 cfqd->cfq_slice_idle = 0;
4558}
4559
4480/* 4560/*
4481 * sysfs parts below --> 4561 * sysfs parts below -->
4482 */ 4562 */
@@ -4592,6 +4672,7 @@ static struct elevator_type iosched_cfq = {
4592 .elevator_may_queue_fn = cfq_may_queue, 4672 .elevator_may_queue_fn = cfq_may_queue,
4593 .elevator_init_fn = cfq_init_queue, 4673 .elevator_init_fn = cfq_init_queue,
4594 .elevator_exit_fn = cfq_exit_queue, 4674 .elevator_exit_fn = cfq_exit_queue,
4675 .elevator_registered_fn = cfq_registered_queue,
4595 }, 4676 },
4596 .icq_size = sizeof(struct cfq_io_cq), 4677 .icq_size = sizeof(struct cfq_io_cq),
4597 .icq_align = __alignof__(struct cfq_io_cq), 4678 .icq_align = __alignof__(struct cfq_io_cq),
@@ -4603,8 +4684,10 @@ static struct elevator_type iosched_cfq = {
4603#ifdef CONFIG_CFQ_GROUP_IOSCHED 4684#ifdef CONFIG_CFQ_GROUP_IOSCHED
4604static struct blkcg_policy blkcg_policy_cfq = { 4685static struct blkcg_policy blkcg_policy_cfq = {
4605 .pd_size = sizeof(struct cfq_group), 4686 .pd_size = sizeof(struct cfq_group),
4687 .cpd_size = sizeof(struct cfq_group_data),
4606 .cftypes = cfq_blkcg_files, 4688 .cftypes = cfq_blkcg_files,
4607 4689
4690 .cpd_init_fn = cfq_cpd_init,
4608 .pd_init_fn = cfq_pd_init, 4691 .pd_init_fn = cfq_pd_init,
4609 .pd_offline_fn = cfq_pd_offline, 4692 .pd_offline_fn = cfq_pd_offline,
4610 .pd_reset_stats_fn = cfq_pd_reset_stats, 4693 .pd_reset_stats_fn = cfq_pd_reset_stats,
diff --git a/block/elevator.c b/block/elevator.c
index 8985038f398c..942579d04128 100644
--- a/block/elevator.c
+++ b/block/elevator.c
@@ -806,6 +806,8 @@ int elv_register_queue(struct request_queue *q)
806 } 806 }
807 kobject_uevent(&e->kobj, KOBJ_ADD); 807 kobject_uevent(&e->kobj, KOBJ_ADD);
808 e->registered = 1; 808 e->registered = 1;
809 if (e->type->ops.elevator_registered_fn)
810 e->type->ops.elevator_registered_fn(q);
809 } 811 }
810 return error; 812 return error;
811} 813}
diff --git a/block/ioctl.c b/block/ioctl.c
index 7d8befde2aca..8061eba42887 100644
--- a/block/ioctl.c
+++ b/block/ioctl.c
@@ -150,21 +150,48 @@ static int blkpg_ioctl(struct block_device *bdev, struct blkpg_ioctl_arg __user
150 } 150 }
151} 151}
152 152
153static int blkdev_reread_part(struct block_device *bdev) 153/*
154 * This is an exported API for the block driver, and will not
155 * acquire bd_mutex. This API should be used in case that
156 * caller has held bd_mutex already.
157 */
158int __blkdev_reread_part(struct block_device *bdev)
154{ 159{
155 struct gendisk *disk = bdev->bd_disk; 160 struct gendisk *disk = bdev->bd_disk;
156 int res;
157 161
158 if (!disk_part_scan_enabled(disk) || bdev != bdev->bd_contains) 162 if (!disk_part_scan_enabled(disk) || bdev != bdev->bd_contains)
159 return -EINVAL; 163 return -EINVAL;
160 if (!capable(CAP_SYS_ADMIN)) 164 if (!capable(CAP_SYS_ADMIN))
161 return -EACCES; 165 return -EACCES;
162 if (!mutex_trylock(&bdev->bd_mutex)) 166
163 return -EBUSY; 167 lockdep_assert_held(&bdev->bd_mutex);
164 res = rescan_partitions(disk, bdev); 168
169 return rescan_partitions(disk, bdev);
170}
171EXPORT_SYMBOL(__blkdev_reread_part);
172
173/*
174 * This is an exported API for the block driver, and will
175 * try to acquire bd_mutex. If bd_mutex has been held already
176 * in current context, please call __blkdev_reread_part().
177 *
178 * Make sure the held locks in current context aren't required
179 * in open()/close() handler and I/O path for avoiding ABBA deadlock:
180 * - bd_mutex is held before calling block driver's open/close
181 * handler
182 * - reading partition table may submit I/O to the block device
183 */
184int blkdev_reread_part(struct block_device *bdev)
185{
186 int res;
187
188 mutex_lock(&bdev->bd_mutex);
189 res = __blkdev_reread_part(bdev);
165 mutex_unlock(&bdev->bd_mutex); 190 mutex_unlock(&bdev->bd_mutex);
191
166 return res; 192 return res;
167} 193}
194EXPORT_SYMBOL(blkdev_reread_part);
168 195
169static int blk_ioctl_discard(struct block_device *bdev, uint64_t start, 196static int blk_ioctl_discard(struct block_device *bdev, uint64_t start,
170 uint64_t len, int secure) 197 uint64_t len, int secure)
diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c
index 39e5f7fae3ef..83a7ba4a3eec 100644
--- a/drivers/block/nbd.c
+++ b/drivers/block/nbd.c
@@ -230,29 +230,40 @@ static int nbd_send_req(struct nbd_device *nbd, struct request *req)
230 int result, flags; 230 int result, flags;
231 struct nbd_request request; 231 struct nbd_request request;
232 unsigned long size = blk_rq_bytes(req); 232 unsigned long size = blk_rq_bytes(req);
233 u32 type;
234
235 if (req->cmd_type == REQ_TYPE_DRV_PRIV)
236 type = NBD_CMD_DISC;
237 else if (req->cmd_flags & REQ_DISCARD)
238 type = NBD_CMD_TRIM;
239 else if (req->cmd_flags & REQ_FLUSH)
240 type = NBD_CMD_FLUSH;
241 else if (rq_data_dir(req) == WRITE)
242 type = NBD_CMD_WRITE;
243 else
244 type = NBD_CMD_READ;
233 245
234 memset(&request, 0, sizeof(request)); 246 memset(&request, 0, sizeof(request));
235 request.magic = htonl(NBD_REQUEST_MAGIC); 247 request.magic = htonl(NBD_REQUEST_MAGIC);
236 request.type = htonl(nbd_cmd(req)); 248 request.type = htonl(type);
237 249 if (type != NBD_CMD_FLUSH && type != NBD_CMD_DISC) {
238 if (nbd_cmd(req) != NBD_CMD_FLUSH && nbd_cmd(req) != NBD_CMD_DISC) {
239 request.from = cpu_to_be64((u64)blk_rq_pos(req) << 9); 250 request.from = cpu_to_be64((u64)blk_rq_pos(req) << 9);
240 request.len = htonl(size); 251 request.len = htonl(size);
241 } 252 }
242 memcpy(request.handle, &req, sizeof(req)); 253 memcpy(request.handle, &req, sizeof(req));
243 254
244 dev_dbg(nbd_to_dev(nbd), "request %p: sending control (%s@%llu,%uB)\n", 255 dev_dbg(nbd_to_dev(nbd), "request %p: sending control (%s@%llu,%uB)\n",
245 req, nbdcmd_to_ascii(nbd_cmd(req)), 256 req, nbdcmd_to_ascii(type),
246 (unsigned long long)blk_rq_pos(req) << 9, blk_rq_bytes(req)); 257 (unsigned long long)blk_rq_pos(req) << 9, blk_rq_bytes(req));
247 result = sock_xmit(nbd, 1, &request, sizeof(request), 258 result = sock_xmit(nbd, 1, &request, sizeof(request),
248 (nbd_cmd(req) == NBD_CMD_WRITE) ? MSG_MORE : 0); 259 (type == NBD_CMD_WRITE) ? MSG_MORE : 0);
249 if (result <= 0) { 260 if (result <= 0) {
250 dev_err(disk_to_dev(nbd->disk), 261 dev_err(disk_to_dev(nbd->disk),
251 "Send control failed (result %d)\n", result); 262 "Send control failed (result %d)\n", result);
252 return -EIO; 263 return -EIO;
253 } 264 }
254 265
255 if (nbd_cmd(req) == NBD_CMD_WRITE) { 266 if (type == NBD_CMD_WRITE) {
256 struct req_iterator iter; 267 struct req_iterator iter;
257 struct bio_vec bvec; 268 struct bio_vec bvec;
258 /* 269 /*
@@ -352,7 +363,7 @@ static struct request *nbd_read_stat(struct nbd_device *nbd)
352 } 363 }
353 364
354 dev_dbg(nbd_to_dev(nbd), "request %p: got reply\n", req); 365 dev_dbg(nbd_to_dev(nbd), "request %p: got reply\n", req);
355 if (nbd_cmd(req) == NBD_CMD_READ) { 366 if (rq_data_dir(req) != WRITE) {
356 struct req_iterator iter; 367 struct req_iterator iter;
357 struct bio_vec bvec; 368 struct bio_vec bvec;
358 369
@@ -452,23 +463,11 @@ static void nbd_handle_req(struct nbd_device *nbd, struct request *req)
452 if (req->cmd_type != REQ_TYPE_FS) 463 if (req->cmd_type != REQ_TYPE_FS)
453 goto error_out; 464 goto error_out;
454 465
455 nbd_cmd(req) = NBD_CMD_READ; 466 if (rq_data_dir(req) == WRITE &&
456 if (rq_data_dir(req) == WRITE) { 467 (nbd->flags & NBD_FLAG_READ_ONLY)) {
457 if ((req->cmd_flags & REQ_DISCARD)) { 468 dev_err(disk_to_dev(nbd->disk),
458 WARN_ON(!(nbd->flags & NBD_FLAG_SEND_TRIM)); 469 "Write on read-only\n");
459 nbd_cmd(req) = NBD_CMD_TRIM; 470 goto error_out;
460 } else
461 nbd_cmd(req) = NBD_CMD_WRITE;
462 if (nbd->flags & NBD_FLAG_READ_ONLY) {
463 dev_err(disk_to_dev(nbd->disk),
464 "Write on read-only\n");
465 goto error_out;
466 }
467 }
468
469 if (req->cmd_flags & REQ_FLUSH) {
470 BUG_ON(unlikely(blk_rq_sectors(req)));
471 nbd_cmd(req) = NBD_CMD_FLUSH;
472 } 471 }
473 472
474 req->errors = 0; 473 req->errors = 0;
@@ -592,8 +591,7 @@ static int __nbd_ioctl(struct block_device *bdev, struct nbd_device *nbd,
592 fsync_bdev(bdev); 591 fsync_bdev(bdev);
593 mutex_lock(&nbd->tx_lock); 592 mutex_lock(&nbd->tx_lock);
594 blk_rq_init(NULL, &sreq); 593 blk_rq_init(NULL, &sreq);
595 sreq.cmd_type = REQ_TYPE_SPECIAL; 594 sreq.cmd_type = REQ_TYPE_DRV_PRIV;
596 nbd_cmd(&sreq) = NBD_CMD_DISC;
597 595
598 /* Check again after getting mutex back. */ 596 /* Check again after getting mutex back. */
599 if (!nbd->sock) 597 if (!nbd->sock)
diff --git a/drivers/block/paride/pd.c b/drivers/block/paride/pd.c
index d48715b287e6..dbb4da1cdca8 100644
--- a/drivers/block/paride/pd.c
+++ b/drivers/block/paride/pd.c
@@ -442,7 +442,7 @@ static char *pd_buf; /* buffer for request in progress */
442 442
443static enum action do_pd_io_start(void) 443static enum action do_pd_io_start(void)
444{ 444{
445 if (pd_req->cmd_type == REQ_TYPE_SPECIAL) { 445 if (pd_req->cmd_type == REQ_TYPE_DRV_PRIV) {
446 phase = pd_special; 446 phase = pd_special;
447 return pd_special(); 447 return pd_special();
448 } 448 }
@@ -725,7 +725,7 @@ static int pd_special_command(struct pd_unit *disk,
725 if (IS_ERR(rq)) 725 if (IS_ERR(rq))
726 return PTR_ERR(rq); 726 return PTR_ERR(rq);
727 727
728 rq->cmd_type = REQ_TYPE_SPECIAL; 728 rq->cmd_type = REQ_TYPE_DRV_PRIV;
729 rq->special = func; 729 rq->special = func;
730 730
731 err = blk_execute_rq(disk->gd->queue, disk->gd, rq, 0); 731 err = blk_execute_rq(disk->gd->queue, disk->gd, rq, 0);
diff --git a/drivers/block/sx8.c b/drivers/block/sx8.c
index 5d552857de41..59c91d49b14b 100644
--- a/drivers/block/sx8.c
+++ b/drivers/block/sx8.c
@@ -620,7 +620,7 @@ static int carm_array_info (struct carm_host *host, unsigned int array_idx)
620 spin_unlock_irq(&host->lock); 620 spin_unlock_irq(&host->lock);
621 621
622 DPRINTK("blk_execute_rq_nowait, tag == %u\n", idx); 622 DPRINTK("blk_execute_rq_nowait, tag == %u\n", idx);
623 crq->rq->cmd_type = REQ_TYPE_SPECIAL; 623 crq->rq->cmd_type = REQ_TYPE_DRV_PRIV;
624 crq->rq->special = crq; 624 crq->rq->special = crq;
625 blk_execute_rq_nowait(host->oob_q, NULL, crq->rq, true, NULL); 625 blk_execute_rq_nowait(host->oob_q, NULL, crq->rq, true, NULL);
626 626
@@ -661,7 +661,7 @@ static int carm_send_special (struct carm_host *host, carm_sspc_t func)
661 crq->msg_bucket = (u32) rc; 661 crq->msg_bucket = (u32) rc;
662 662
663 DPRINTK("blk_execute_rq_nowait, tag == %u\n", idx); 663 DPRINTK("blk_execute_rq_nowait, tag == %u\n", idx);
664 crq->rq->cmd_type = REQ_TYPE_SPECIAL; 664 crq->rq->cmd_type = REQ_TYPE_DRV_PRIV;
665 crq->rq->special = crq; 665 crq->rq->special = crq;
666 blk_execute_rq_nowait(host->oob_q, NULL, crq->rq, true, NULL); 666 blk_execute_rq_nowait(host->oob_q, NULL, crq->rq, true, NULL);
667 667
diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c
index 5ea2f0bbbc7c..d4d05f064d39 100644
--- a/drivers/block/virtio_blk.c
+++ b/drivers/block/virtio_blk.c
@@ -124,7 +124,7 @@ static inline void virtblk_request_done(struct request *req)
124 req->resid_len = virtio32_to_cpu(vblk->vdev, vbr->in_hdr.residual); 124 req->resid_len = virtio32_to_cpu(vblk->vdev, vbr->in_hdr.residual);
125 req->sense_len = virtio32_to_cpu(vblk->vdev, vbr->in_hdr.sense_len); 125 req->sense_len = virtio32_to_cpu(vblk->vdev, vbr->in_hdr.sense_len);
126 req->errors = virtio32_to_cpu(vblk->vdev, vbr->in_hdr.errors); 126 req->errors = virtio32_to_cpu(vblk->vdev, vbr->in_hdr.errors);
127 } else if (req->cmd_type == REQ_TYPE_SPECIAL) { 127 } else if (req->cmd_type == REQ_TYPE_DRV_PRIV) {
128 req->errors = (error != 0); 128 req->errors = (error != 0);
129 } 129 }
130 130
@@ -188,7 +188,7 @@ static int virtio_queue_rq(struct blk_mq_hw_ctx *hctx,
188 vbr->out_hdr.sector = 0; 188 vbr->out_hdr.sector = 0;
189 vbr->out_hdr.ioprio = cpu_to_virtio32(vblk->vdev, req_get_ioprio(vbr->req)); 189 vbr->out_hdr.ioprio = cpu_to_virtio32(vblk->vdev, req_get_ioprio(vbr->req));
190 break; 190 break;
191 case REQ_TYPE_SPECIAL: 191 case REQ_TYPE_DRV_PRIV:
192 vbr->out_hdr.type = cpu_to_virtio32(vblk->vdev, VIRTIO_BLK_T_GET_ID); 192 vbr->out_hdr.type = cpu_to_virtio32(vblk->vdev, VIRTIO_BLK_T_GET_ID);
193 vbr->out_hdr.sector = 0; 193 vbr->out_hdr.sector = 0;
194 vbr->out_hdr.ioprio = cpu_to_virtio32(vblk->vdev, req_get_ioprio(vbr->req)); 194 vbr->out_hdr.ioprio = cpu_to_virtio32(vblk->vdev, req_get_ioprio(vbr->req));
@@ -251,7 +251,7 @@ static int virtblk_get_id(struct gendisk *disk, char *id_str)
251 return PTR_ERR(req); 251 return PTR_ERR(req);
252 } 252 }
253 253
254 req->cmd_type = REQ_TYPE_SPECIAL; 254 req->cmd_type = REQ_TYPE_DRV_PRIV;
255 err = blk_execute_rq(vblk->disk->queue, vblk->disk, req, false); 255 err = blk_execute_rq(vblk->disk->queue, vblk->disk, req, false);
256 blk_put_request(req); 256 blk_put_request(req);
257 257
diff --git a/drivers/ide/ide-atapi.c b/drivers/ide/ide-atapi.c
index fac3d9da2e07..1362ad80a76c 100644
--- a/drivers/ide/ide-atapi.c
+++ b/drivers/ide/ide-atapi.c
@@ -93,7 +93,7 @@ int ide_queue_pc_tail(ide_drive_t *drive, struct gendisk *disk,
93 int error; 93 int error;
94 94
95 rq = blk_get_request(drive->queue, READ, __GFP_WAIT); 95 rq = blk_get_request(drive->queue, READ, __GFP_WAIT);
96 rq->cmd_type = REQ_TYPE_SPECIAL; 96 rq->cmd_type = REQ_TYPE_DRV_PRIV;
97 rq->special = (char *)pc; 97 rq->special = (char *)pc;
98 98
99 if (buf && bufflen) { 99 if (buf && bufflen) {
@@ -191,7 +191,7 @@ void ide_prep_sense(ide_drive_t *drive, struct request *rq)
191 191
192 BUG_ON(sense_len > sizeof(*sense)); 192 BUG_ON(sense_len > sizeof(*sense));
193 193
194 if (rq->cmd_type == REQ_TYPE_SENSE || drive->sense_rq_armed) 194 if (rq->cmd_type == REQ_TYPE_ATA_SENSE || drive->sense_rq_armed)
195 return; 195 return;
196 196
197 memset(sense, 0, sizeof(*sense)); 197 memset(sense, 0, sizeof(*sense));
@@ -210,7 +210,7 @@ void ide_prep_sense(ide_drive_t *drive, struct request *rq)
210 sense_rq->rq_disk = rq->rq_disk; 210 sense_rq->rq_disk = rq->rq_disk;
211 sense_rq->cmd[0] = GPCMD_REQUEST_SENSE; 211 sense_rq->cmd[0] = GPCMD_REQUEST_SENSE;
212 sense_rq->cmd[4] = cmd_len; 212 sense_rq->cmd[4] = cmd_len;
213 sense_rq->cmd_type = REQ_TYPE_SENSE; 213 sense_rq->cmd_type = REQ_TYPE_ATA_SENSE;
214 sense_rq->cmd_flags |= REQ_PREEMPT; 214 sense_rq->cmd_flags |= REQ_PREEMPT;
215 215
216 if (drive->media == ide_tape) 216 if (drive->media == ide_tape)
@@ -310,7 +310,7 @@ int ide_cd_get_xferlen(struct request *rq)
310 switch (rq->cmd_type) { 310 switch (rq->cmd_type) {
311 case REQ_TYPE_FS: 311 case REQ_TYPE_FS:
312 return 32768; 312 return 32768;
313 case REQ_TYPE_SENSE: 313 case REQ_TYPE_ATA_SENSE:
314 case REQ_TYPE_BLOCK_PC: 314 case REQ_TYPE_BLOCK_PC:
315 case REQ_TYPE_ATA_PC: 315 case REQ_TYPE_ATA_PC:
316 return blk_rq_bytes(rq); 316 return blk_rq_bytes(rq);
@@ -477,7 +477,7 @@ static ide_startstop_t ide_pc_intr(ide_drive_t *drive)
477 if (uptodate == 0) 477 if (uptodate == 0)
478 drive->failed_pc = NULL; 478 drive->failed_pc = NULL;
479 479
480 if (rq->cmd_type == REQ_TYPE_SPECIAL) { 480 if (rq->cmd_type == REQ_TYPE_DRV_PRIV) {
481 rq->errors = 0; 481 rq->errors = 0;
482 error = 0; 482 error = 0;
483 } else { 483 } else {
diff --git a/drivers/ide/ide-cd.c b/drivers/ide/ide-cd.c
index 0b510bafd90e..64a6b827b3dd 100644
--- a/drivers/ide/ide-cd.c
+++ b/drivers/ide/ide-cd.c
@@ -210,7 +210,7 @@ static void cdrom_analyze_sense_data(ide_drive_t *drive,
210static void ide_cd_complete_failed_rq(ide_drive_t *drive, struct request *rq) 210static void ide_cd_complete_failed_rq(ide_drive_t *drive, struct request *rq)
211{ 211{
212 /* 212 /*
213 * For REQ_TYPE_SENSE, "rq->special" points to the original 213 * For REQ_TYPE_ATA_SENSE, "rq->special" points to the original
214 * failed request. Also, the sense data should be read 214 * failed request. Also, the sense data should be read
215 * directly from rq which might be different from the original 215 * directly from rq which might be different from the original
216 * sense buffer if it got copied during mapping. 216 * sense buffer if it got copied during mapping.
@@ -285,7 +285,7 @@ static int cdrom_decode_status(ide_drive_t *drive, u8 stat)
285 "stat 0x%x", 285 "stat 0x%x",
286 rq->cmd[0], rq->cmd_type, err, stat); 286 rq->cmd[0], rq->cmd_type, err, stat);
287 287
288 if (rq->cmd_type == REQ_TYPE_SENSE) { 288 if (rq->cmd_type == REQ_TYPE_ATA_SENSE) {
289 /* 289 /*
290 * We got an error trying to get sense info from the drive 290 * We got an error trying to get sense info from the drive
291 * (probably while trying to recover from a former error). 291 * (probably while trying to recover from a former error).
@@ -526,7 +526,7 @@ static ide_startstop_t cdrom_newpc_intr(ide_drive_t *drive)
526 ide_expiry_t *expiry = NULL; 526 ide_expiry_t *expiry = NULL;
527 int dma_error = 0, dma, thislen, uptodate = 0; 527 int dma_error = 0, dma, thislen, uptodate = 0;
528 int write = (rq_data_dir(rq) == WRITE) ? 1 : 0, rc = 0; 528 int write = (rq_data_dir(rq) == WRITE) ? 1 : 0, rc = 0;
529 int sense = (rq->cmd_type == REQ_TYPE_SENSE); 529 int sense = (rq->cmd_type == REQ_TYPE_ATA_SENSE);
530 unsigned int timeout; 530 unsigned int timeout;
531 u16 len; 531 u16 len;
532 u8 ireason, stat; 532 u8 ireason, stat;
@@ -791,7 +791,7 @@ static ide_startstop_t ide_cd_do_request(ide_drive_t *drive, struct request *rq,
791 if (cdrom_start_rw(drive, rq) == ide_stopped) 791 if (cdrom_start_rw(drive, rq) == ide_stopped)
792 goto out_end; 792 goto out_end;
793 break; 793 break;
794 case REQ_TYPE_SENSE: 794 case REQ_TYPE_ATA_SENSE:
795 case REQ_TYPE_BLOCK_PC: 795 case REQ_TYPE_BLOCK_PC:
796 case REQ_TYPE_ATA_PC: 796 case REQ_TYPE_ATA_PC:
797 if (!rq->timeout) 797 if (!rq->timeout)
@@ -799,7 +799,7 @@ static ide_startstop_t ide_cd_do_request(ide_drive_t *drive, struct request *rq,
799 799
800 cdrom_do_block_pc(drive, rq); 800 cdrom_do_block_pc(drive, rq);
801 break; 801 break;
802 case REQ_TYPE_SPECIAL: 802 case REQ_TYPE_DRV_PRIV:
803 /* right now this can only be a reset... */ 803 /* right now this can only be a reset... */
804 uptodate = 1; 804 uptodate = 1;
805 goto out_end; 805 goto out_end;
diff --git a/drivers/ide/ide-cd_ioctl.c b/drivers/ide/ide-cd_ioctl.c
index 02caa7dd51c8..066e39036518 100644
--- a/drivers/ide/ide-cd_ioctl.c
+++ b/drivers/ide/ide-cd_ioctl.c
@@ -304,7 +304,7 @@ int ide_cdrom_reset(struct cdrom_device_info *cdi)
304 int ret; 304 int ret;
305 305
306 rq = blk_get_request(drive->queue, READ, __GFP_WAIT); 306 rq = blk_get_request(drive->queue, READ, __GFP_WAIT);
307 rq->cmd_type = REQ_TYPE_SPECIAL; 307 rq->cmd_type = REQ_TYPE_DRV_PRIV;
308 rq->cmd_flags = REQ_QUIET; 308 rq->cmd_flags = REQ_QUIET;
309 ret = blk_execute_rq(drive->queue, cd->disk, rq, 0); 309 ret = blk_execute_rq(drive->queue, cd->disk, rq, 0);
310 blk_put_request(rq); 310 blk_put_request(rq);
diff --git a/drivers/ide/ide-devsets.c b/drivers/ide/ide-devsets.c
index 9e98122f646e..b05a74d78ef5 100644
--- a/drivers/ide/ide-devsets.c
+++ b/drivers/ide/ide-devsets.c
@@ -166,7 +166,7 @@ int ide_devset_execute(ide_drive_t *drive, const struct ide_devset *setting,
166 return setting->set(drive, arg); 166 return setting->set(drive, arg);
167 167
168 rq = blk_get_request(q, READ, __GFP_WAIT); 168 rq = blk_get_request(q, READ, __GFP_WAIT);
169 rq->cmd_type = REQ_TYPE_SPECIAL; 169 rq->cmd_type = REQ_TYPE_DRV_PRIV;
170 rq->cmd_len = 5; 170 rq->cmd_len = 5;
171 rq->cmd[0] = REQ_DEVSET_EXEC; 171 rq->cmd[0] = REQ_DEVSET_EXEC;
172 *(int *)&rq->cmd[1] = arg; 172 *(int *)&rq->cmd[1] = arg;
diff --git a/drivers/ide/ide-eh.c b/drivers/ide/ide-eh.c
index 32970664c275..d6da011299f5 100644
--- a/drivers/ide/ide-eh.c
+++ b/drivers/ide/ide-eh.c
@@ -129,7 +129,7 @@ ide_startstop_t ide_error(ide_drive_t *drive, const char *msg, u8 stat)
129 129
130 if (cmd) 130 if (cmd)
131 ide_complete_cmd(drive, cmd, stat, err); 131 ide_complete_cmd(drive, cmd, stat, err);
132 } else if (blk_pm_request(rq)) { 132 } else if (ata_pm_request(rq)) {
133 rq->errors = 1; 133 rq->errors = 1;
134 ide_complete_pm_rq(drive, rq); 134 ide_complete_pm_rq(drive, rq);
135 return ide_stopped; 135 return ide_stopped;
@@ -147,7 +147,7 @@ static inline void ide_complete_drive_reset(ide_drive_t *drive, int err)
147{ 147{
148 struct request *rq = drive->hwif->rq; 148 struct request *rq = drive->hwif->rq;
149 149
150 if (rq && rq->cmd_type == REQ_TYPE_SPECIAL && 150 if (rq && rq->cmd_type == REQ_TYPE_DRV_PRIV &&
151 rq->cmd[0] == REQ_DRIVE_RESET) { 151 rq->cmd[0] == REQ_DRIVE_RESET) {
152 if (err <= 0 && rq->errors == 0) 152 if (err <= 0 && rq->errors == 0)
153 rq->errors = -EIO; 153 rq->errors = -EIO;
diff --git a/drivers/ide/ide-floppy.c b/drivers/ide/ide-floppy.c
index 8c6363cdd208..2fb5350c5410 100644
--- a/drivers/ide/ide-floppy.c
+++ b/drivers/ide/ide-floppy.c
@@ -97,7 +97,7 @@ static int ide_floppy_callback(ide_drive_t *drive, int dsc)
97 "Aborting request!\n"); 97 "Aborting request!\n");
98 } 98 }
99 99
100 if (rq->cmd_type == REQ_TYPE_SPECIAL) 100 if (rq->cmd_type == REQ_TYPE_DRV_PRIV)
101 rq->errors = uptodate ? 0 : IDE_DRV_ERROR_GENERAL; 101 rq->errors = uptodate ? 0 : IDE_DRV_ERROR_GENERAL;
102 102
103 return uptodate; 103 return uptodate;
@@ -246,7 +246,7 @@ static ide_startstop_t ide_floppy_do_request(ide_drive_t *drive,
246 } else 246 } else
247 printk(KERN_ERR PFX "%s: I/O error\n", drive->name); 247 printk(KERN_ERR PFX "%s: I/O error\n", drive->name);
248 248
249 if (rq->cmd_type == REQ_TYPE_SPECIAL) { 249 if (rq->cmd_type == REQ_TYPE_DRV_PRIV) {
250 rq->errors = 0; 250 rq->errors = 0;
251 ide_complete_rq(drive, 0, blk_rq_bytes(rq)); 251 ide_complete_rq(drive, 0, blk_rq_bytes(rq));
252 return ide_stopped; 252 return ide_stopped;
@@ -265,8 +265,8 @@ static ide_startstop_t ide_floppy_do_request(ide_drive_t *drive,
265 pc = &floppy->queued_pc; 265 pc = &floppy->queued_pc;
266 idefloppy_create_rw_cmd(drive, pc, rq, (unsigned long)block); 266 idefloppy_create_rw_cmd(drive, pc, rq, (unsigned long)block);
267 break; 267 break;
268 case REQ_TYPE_SPECIAL: 268 case REQ_TYPE_DRV_PRIV:
269 case REQ_TYPE_SENSE: 269 case REQ_TYPE_ATA_SENSE:
270 pc = (struct ide_atapi_pc *)rq->special; 270 pc = (struct ide_atapi_pc *)rq->special;
271 break; 271 break;
272 case REQ_TYPE_BLOCK_PC: 272 case REQ_TYPE_BLOCK_PC:
diff --git a/drivers/ide/ide-io.c b/drivers/ide/ide-io.c
index 177db6d5b2f5..669ea1e45795 100644
--- a/drivers/ide/ide-io.c
+++ b/drivers/ide/ide-io.c
@@ -135,7 +135,7 @@ EXPORT_SYMBOL(ide_complete_rq);
135 135
136void ide_kill_rq(ide_drive_t *drive, struct request *rq) 136void ide_kill_rq(ide_drive_t *drive, struct request *rq)
137{ 137{
138 u8 drv_req = (rq->cmd_type == REQ_TYPE_SPECIAL) && rq->rq_disk; 138 u8 drv_req = (rq->cmd_type == REQ_TYPE_DRV_PRIV) && rq->rq_disk;
139 u8 media = drive->media; 139 u8 media = drive->media;
140 140
141 drive->failed_pc = NULL; 141 drive->failed_pc = NULL;
@@ -320,7 +320,7 @@ static ide_startstop_t start_request (ide_drive_t *drive, struct request *rq)
320 goto kill_rq; 320 goto kill_rq;
321 } 321 }
322 322
323 if (blk_pm_request(rq)) 323 if (ata_pm_request(rq))
324 ide_check_pm_state(drive, rq); 324 ide_check_pm_state(drive, rq);
325 325
326 drive->hwif->tp_ops->dev_select(drive); 326 drive->hwif->tp_ops->dev_select(drive);
@@ -342,8 +342,8 @@ static ide_startstop_t start_request (ide_drive_t *drive, struct request *rq)
342 342
343 if (rq->cmd_type == REQ_TYPE_ATA_TASKFILE) 343 if (rq->cmd_type == REQ_TYPE_ATA_TASKFILE)
344 return execute_drive_cmd(drive, rq); 344 return execute_drive_cmd(drive, rq);
345 else if (blk_pm_request(rq)) { 345 else if (ata_pm_request(rq)) {
346 struct request_pm_state *pm = rq->special; 346 struct ide_pm_state *pm = rq->special;
347#ifdef DEBUG_PM 347#ifdef DEBUG_PM
348 printk("%s: start_power_step(step: %d)\n", 348 printk("%s: start_power_step(step: %d)\n",
349 drive->name, pm->pm_step); 349 drive->name, pm->pm_step);
@@ -353,7 +353,7 @@ static ide_startstop_t start_request (ide_drive_t *drive, struct request *rq)
353 pm->pm_step == IDE_PM_COMPLETED) 353 pm->pm_step == IDE_PM_COMPLETED)
354 ide_complete_pm_rq(drive, rq); 354 ide_complete_pm_rq(drive, rq);
355 return startstop; 355 return startstop;
356 } else if (!rq->rq_disk && rq->cmd_type == REQ_TYPE_SPECIAL) 356 } else if (!rq->rq_disk && rq->cmd_type == REQ_TYPE_DRV_PRIV)
357 /* 357 /*
358 * TODO: Once all ULDs have been modified to 358 * TODO: Once all ULDs have been modified to
359 * check for specific op codes rather than 359 * check for specific op codes rather than
@@ -538,7 +538,7 @@ repeat:
538 * state machine. 538 * state machine.
539 */ 539 */
540 if ((drive->dev_flags & IDE_DFLAG_BLOCKED) && 540 if ((drive->dev_flags & IDE_DFLAG_BLOCKED) &&
541 blk_pm_request(rq) == 0 && 541 ata_pm_request(rq) == 0 &&
542 (rq->cmd_flags & REQ_PREEMPT) == 0) { 542 (rq->cmd_flags & REQ_PREEMPT) == 0) {
543 /* there should be no pending command at this point */ 543 /* there should be no pending command at this point */
544 ide_unlock_port(hwif); 544 ide_unlock_port(hwif);
diff --git a/drivers/ide/ide-ioctls.c b/drivers/ide/ide-ioctls.c
index 6233fa2cb8a9..aa2e9b77b20d 100644
--- a/drivers/ide/ide-ioctls.c
+++ b/drivers/ide/ide-ioctls.c
@@ -222,7 +222,7 @@ static int generic_drive_reset(ide_drive_t *drive)
222 int ret = 0; 222 int ret = 0;
223 223
224 rq = blk_get_request(drive->queue, READ, __GFP_WAIT); 224 rq = blk_get_request(drive->queue, READ, __GFP_WAIT);
225 rq->cmd_type = REQ_TYPE_SPECIAL; 225 rq->cmd_type = REQ_TYPE_DRV_PRIV;
226 rq->cmd_len = 1; 226 rq->cmd_len = 1;
227 rq->cmd[0] = REQ_DRIVE_RESET; 227 rq->cmd[0] = REQ_DRIVE_RESET;
228 if (blk_execute_rq(drive->queue, NULL, rq, 1)) 228 if (blk_execute_rq(drive->queue, NULL, rq, 1))
diff --git a/drivers/ide/ide-park.c b/drivers/ide/ide-park.c
index ca958604cda2..c80868520488 100644
--- a/drivers/ide/ide-park.c
+++ b/drivers/ide/ide-park.c
@@ -34,7 +34,7 @@ static void issue_park_cmd(ide_drive_t *drive, unsigned long timeout)
34 rq = blk_get_request(q, READ, __GFP_WAIT); 34 rq = blk_get_request(q, READ, __GFP_WAIT);
35 rq->cmd[0] = REQ_PARK_HEADS; 35 rq->cmd[0] = REQ_PARK_HEADS;
36 rq->cmd_len = 1; 36 rq->cmd_len = 1;
37 rq->cmd_type = REQ_TYPE_SPECIAL; 37 rq->cmd_type = REQ_TYPE_DRV_PRIV;
38 rq->special = &timeout; 38 rq->special = &timeout;
39 rc = blk_execute_rq(q, NULL, rq, 1); 39 rc = blk_execute_rq(q, NULL, rq, 1);
40 blk_put_request(rq); 40 blk_put_request(rq);
@@ -51,7 +51,7 @@ static void issue_park_cmd(ide_drive_t *drive, unsigned long timeout)
51 51
52 rq->cmd[0] = REQ_UNPARK_HEADS; 52 rq->cmd[0] = REQ_UNPARK_HEADS;
53 rq->cmd_len = 1; 53 rq->cmd_len = 1;
54 rq->cmd_type = REQ_TYPE_SPECIAL; 54 rq->cmd_type = REQ_TYPE_DRV_PRIV;
55 elv_add_request(q, rq, ELEVATOR_INSERT_FRONT); 55 elv_add_request(q, rq, ELEVATOR_INSERT_FRONT);
56 56
57out: 57out:
diff --git a/drivers/ide/ide-pm.c b/drivers/ide/ide-pm.c
index 8d1e32d7cd97..081e43458d50 100644
--- a/drivers/ide/ide-pm.c
+++ b/drivers/ide/ide-pm.c
@@ -8,7 +8,7 @@ int generic_ide_suspend(struct device *dev, pm_message_t mesg)
8 ide_drive_t *pair = ide_get_pair_dev(drive); 8 ide_drive_t *pair = ide_get_pair_dev(drive);
9 ide_hwif_t *hwif = drive->hwif; 9 ide_hwif_t *hwif = drive->hwif;
10 struct request *rq; 10 struct request *rq;
11 struct request_pm_state rqpm; 11 struct ide_pm_state rqpm;
12 int ret; 12 int ret;
13 13
14 if (ide_port_acpi(hwif)) { 14 if (ide_port_acpi(hwif)) {
@@ -19,7 +19,7 @@ int generic_ide_suspend(struct device *dev, pm_message_t mesg)
19 19
20 memset(&rqpm, 0, sizeof(rqpm)); 20 memset(&rqpm, 0, sizeof(rqpm));
21 rq = blk_get_request(drive->queue, READ, __GFP_WAIT); 21 rq = blk_get_request(drive->queue, READ, __GFP_WAIT);
22 rq->cmd_type = REQ_TYPE_PM_SUSPEND; 22 rq->cmd_type = REQ_TYPE_ATA_PM_SUSPEND;
23 rq->special = &rqpm; 23 rq->special = &rqpm;
24 rqpm.pm_step = IDE_PM_START_SUSPEND; 24 rqpm.pm_step = IDE_PM_START_SUSPEND;
25 if (mesg.event == PM_EVENT_PRETHAW) 25 if (mesg.event == PM_EVENT_PRETHAW)
@@ -38,13 +38,43 @@ int generic_ide_suspend(struct device *dev, pm_message_t mesg)
38 return ret; 38 return ret;
39} 39}
40 40
41static void ide_end_sync_rq(struct request *rq, int error)
42{
43 complete(rq->end_io_data);
44}
45
46static int ide_pm_execute_rq(struct request *rq)
47{
48 struct request_queue *q = rq->q;
49 DECLARE_COMPLETION_ONSTACK(wait);
50
51 rq->end_io_data = &wait;
52 rq->end_io = ide_end_sync_rq;
53
54 spin_lock_irq(q->queue_lock);
55 if (unlikely(blk_queue_dying(q))) {
56 rq->cmd_flags |= REQ_QUIET;
57 rq->errors = -ENXIO;
58 __blk_end_request_all(rq, rq->errors);
59 spin_unlock_irq(q->queue_lock);
60 return -ENXIO;
61 }
62 __elv_add_request(q, rq, ELEVATOR_INSERT_FRONT);
63 __blk_run_queue_uncond(q);
64 spin_unlock_irq(q->queue_lock);
65
66 wait_for_completion_io(&wait);
67
68 return rq->errors ? -EIO : 0;
69}
70
41int generic_ide_resume(struct device *dev) 71int generic_ide_resume(struct device *dev)
42{ 72{
43 ide_drive_t *drive = to_ide_device(dev); 73 ide_drive_t *drive = to_ide_device(dev);
44 ide_drive_t *pair = ide_get_pair_dev(drive); 74 ide_drive_t *pair = ide_get_pair_dev(drive);
45 ide_hwif_t *hwif = drive->hwif; 75 ide_hwif_t *hwif = drive->hwif;
46 struct request *rq; 76 struct request *rq;
47 struct request_pm_state rqpm; 77 struct ide_pm_state rqpm;
48 int err; 78 int err;
49 79
50 if (ide_port_acpi(hwif)) { 80 if (ide_port_acpi(hwif)) {
@@ -59,13 +89,13 @@ int generic_ide_resume(struct device *dev)
59 89
60 memset(&rqpm, 0, sizeof(rqpm)); 90 memset(&rqpm, 0, sizeof(rqpm));
61 rq = blk_get_request(drive->queue, READ, __GFP_WAIT); 91 rq = blk_get_request(drive->queue, READ, __GFP_WAIT);
62 rq->cmd_type = REQ_TYPE_PM_RESUME; 92 rq->cmd_type = REQ_TYPE_ATA_PM_RESUME;
63 rq->cmd_flags |= REQ_PREEMPT; 93 rq->cmd_flags |= REQ_PREEMPT;
64 rq->special = &rqpm; 94 rq->special = &rqpm;
65 rqpm.pm_step = IDE_PM_START_RESUME; 95 rqpm.pm_step = IDE_PM_START_RESUME;
66 rqpm.pm_state = PM_EVENT_ON; 96 rqpm.pm_state = PM_EVENT_ON;
67 97
68 err = blk_execute_rq(drive->queue, NULL, rq, 1); 98 err = ide_pm_execute_rq(rq);
69 blk_put_request(rq); 99 blk_put_request(rq);
70 100
71 if (err == 0 && dev->driver) { 101 if (err == 0 && dev->driver) {
@@ -80,7 +110,7 @@ int generic_ide_resume(struct device *dev)
80 110
81void ide_complete_power_step(ide_drive_t *drive, struct request *rq) 111void ide_complete_power_step(ide_drive_t *drive, struct request *rq)
82{ 112{
83 struct request_pm_state *pm = rq->special; 113 struct ide_pm_state *pm = rq->special;
84 114
85#ifdef DEBUG_PM 115#ifdef DEBUG_PM
86 printk(KERN_INFO "%s: complete_power_step(step: %d)\n", 116 printk(KERN_INFO "%s: complete_power_step(step: %d)\n",
@@ -110,7 +140,7 @@ void ide_complete_power_step(ide_drive_t *drive, struct request *rq)
110 140
111ide_startstop_t ide_start_power_step(ide_drive_t *drive, struct request *rq) 141ide_startstop_t ide_start_power_step(ide_drive_t *drive, struct request *rq)
112{ 142{
113 struct request_pm_state *pm = rq->special; 143 struct ide_pm_state *pm = rq->special;
114 struct ide_cmd cmd = { }; 144 struct ide_cmd cmd = { };
115 145
116 switch (pm->pm_step) { 146 switch (pm->pm_step) {
@@ -182,7 +212,7 @@ out_do_tf:
182void ide_complete_pm_rq(ide_drive_t *drive, struct request *rq) 212void ide_complete_pm_rq(ide_drive_t *drive, struct request *rq)
183{ 213{
184 struct request_queue *q = drive->queue; 214 struct request_queue *q = drive->queue;
185 struct request_pm_state *pm = rq->special; 215 struct ide_pm_state *pm = rq->special;
186 unsigned long flags; 216 unsigned long flags;
187 217
188 ide_complete_power_step(drive, rq); 218 ide_complete_power_step(drive, rq);
@@ -191,10 +221,10 @@ void ide_complete_pm_rq(ide_drive_t *drive, struct request *rq)
191 221
192#ifdef DEBUG_PM 222#ifdef DEBUG_PM
193 printk("%s: completing PM request, %s\n", drive->name, 223 printk("%s: completing PM request, %s\n", drive->name,
194 (rq->cmd_type == REQ_TYPE_PM_SUSPEND) ? "suspend" : "resume"); 224 (rq->cmd_type == REQ_TYPE_ATA_PM_SUSPEND) ? "suspend" : "resume");
195#endif 225#endif
196 spin_lock_irqsave(q->queue_lock, flags); 226 spin_lock_irqsave(q->queue_lock, flags);
197 if (rq->cmd_type == REQ_TYPE_PM_SUSPEND) 227 if (rq->cmd_type == REQ_TYPE_ATA_PM_SUSPEND)
198 blk_stop_queue(q); 228 blk_stop_queue(q);
199 else 229 else
200 drive->dev_flags &= ~IDE_DFLAG_BLOCKED; 230 drive->dev_flags &= ~IDE_DFLAG_BLOCKED;
@@ -208,13 +238,13 @@ void ide_complete_pm_rq(ide_drive_t *drive, struct request *rq)
208 238
209void ide_check_pm_state(ide_drive_t *drive, struct request *rq) 239void ide_check_pm_state(ide_drive_t *drive, struct request *rq)
210{ 240{
211 struct request_pm_state *pm = rq->special; 241 struct ide_pm_state *pm = rq->special;
212 242
213 if (rq->cmd_type == REQ_TYPE_PM_SUSPEND && 243 if (rq->cmd_type == REQ_TYPE_ATA_PM_SUSPEND &&
214 pm->pm_step == IDE_PM_START_SUSPEND) 244 pm->pm_step == IDE_PM_START_SUSPEND)
215 /* Mark drive blocked when starting the suspend sequence. */ 245 /* Mark drive blocked when starting the suspend sequence. */
216 drive->dev_flags |= IDE_DFLAG_BLOCKED; 246 drive->dev_flags |= IDE_DFLAG_BLOCKED;
217 else if (rq->cmd_type == REQ_TYPE_PM_RESUME && 247 else if (rq->cmd_type == REQ_TYPE_ATA_PM_RESUME &&
218 pm->pm_step == IDE_PM_START_RESUME) { 248 pm->pm_step == IDE_PM_START_RESUME) {
219 /* 249 /*
220 * The first thing we do on wakeup is to wait for BSY bit to 250 * The first thing we do on wakeup is to wait for BSY bit to
diff --git a/drivers/ide/ide-tape.c b/drivers/ide/ide-tape.c
index 6eb738ca6d2f..f5d51d1d09ee 100644
--- a/drivers/ide/ide-tape.c
+++ b/drivers/ide/ide-tape.c
@@ -576,8 +576,8 @@ static ide_startstop_t idetape_do_request(ide_drive_t *drive,
576 rq->cmd[0], (unsigned long long)blk_rq_pos(rq), 576 rq->cmd[0], (unsigned long long)blk_rq_pos(rq),
577 blk_rq_sectors(rq)); 577 blk_rq_sectors(rq));
578 578
579 BUG_ON(!(rq->cmd_type == REQ_TYPE_SPECIAL || 579 BUG_ON(!(rq->cmd_type == REQ_TYPE_DRV_PRIV ||
580 rq->cmd_type == REQ_TYPE_SENSE)); 580 rq->cmd_type == REQ_TYPE_ATA_SENSE));
581 581
582 /* Retry a failed packet command */ 582 /* Retry a failed packet command */
583 if (drive->failed_pc && drive->pc->c[0] == REQUEST_SENSE) { 583 if (drive->failed_pc && drive->pc->c[0] == REQUEST_SENSE) {
@@ -853,7 +853,7 @@ static int idetape_queue_rw_tail(ide_drive_t *drive, int cmd, int size)
853 BUG_ON(size < 0 || size % tape->blk_size); 853 BUG_ON(size < 0 || size % tape->blk_size);
854 854
855 rq = blk_get_request(drive->queue, READ, __GFP_WAIT); 855 rq = blk_get_request(drive->queue, READ, __GFP_WAIT);
856 rq->cmd_type = REQ_TYPE_SPECIAL; 856 rq->cmd_type = REQ_TYPE_DRV_PRIV;
857 rq->cmd[13] = cmd; 857 rq->cmd[13] = cmd;
858 rq->rq_disk = tape->disk; 858 rq->rq_disk = tape->disk;
859 rq->__sector = tape->first_frame; 859 rq->__sector = tape->first_frame;
diff --git a/drivers/ide/ide-taskfile.c b/drivers/ide/ide-taskfile.c
index dabb88b1cbec..0979e126fff1 100644
--- a/drivers/ide/ide-taskfile.c
+++ b/drivers/ide/ide-taskfile.c
@@ -186,7 +186,7 @@ static ide_startstop_t task_no_data_intr(ide_drive_t *drive)
186 tf->command == ATA_CMD_CHK_POWER) { 186 tf->command == ATA_CMD_CHK_POWER) {
187 struct request *rq = hwif->rq; 187 struct request *rq = hwif->rq;
188 188
189 if (blk_pm_request(rq)) 189 if (ata_pm_request(rq))
190 ide_complete_pm_rq(drive, rq); 190 ide_complete_pm_rq(drive, rq);
191 else 191 else
192 ide_finish_cmd(drive, cmd, stat); 192 ide_finish_cmd(drive, cmd, stat);
diff --git a/drivers/md/bcache/io.c b/drivers/md/bcache/io.c
index fa028fa82df4..cb64e64a4789 100644
--- a/drivers/md/bcache/io.c
+++ b/drivers/md/bcache/io.c
@@ -55,7 +55,7 @@ static void bch_bio_submit_split_done(struct closure *cl)
55 55
56 s->bio->bi_end_io = s->bi_end_io; 56 s->bio->bi_end_io = s->bi_end_io;
57 s->bio->bi_private = s->bi_private; 57 s->bio->bi_private = s->bi_private;
58 bio_endio_nodec(s->bio, 0); 58 bio_endio(s->bio, 0);
59 59
60 closure_debug_destroy(&s->cl); 60 closure_debug_destroy(&s->cl);
61 mempool_free(s, s->p->bio_split_hook); 61 mempool_free(s, s->p->bio_split_hook);
diff --git a/drivers/md/bcache/request.c b/drivers/md/bcache/request.c
index ab43faddb447..1616f668a4cb 100644
--- a/drivers/md/bcache/request.c
+++ b/drivers/md/bcache/request.c
@@ -619,7 +619,7 @@ static void do_bio_hook(struct search *s, struct bio *orig_bio)
619 bio->bi_end_io = request_endio; 619 bio->bi_end_io = request_endio;
620 bio->bi_private = &s->cl; 620 bio->bi_private = &s->cl;
621 621
622 atomic_set(&bio->bi_cnt, 3); 622 bio_cnt_set(bio, 3);
623} 623}
624 624
625static void search_free(struct closure *cl) 625static void search_free(struct closure *cl)
diff --git a/drivers/md/dm-cache-target.c b/drivers/md/dm-cache-target.c
index 7755af351867..41b2594a80c6 100644
--- a/drivers/md/dm-cache-target.c
+++ b/drivers/md/dm-cache-target.c
@@ -86,12 +86,6 @@ static void dm_unhook_bio(struct dm_hook_info *h, struct bio *bio)
86{ 86{
87 bio->bi_end_io = h->bi_end_io; 87 bio->bi_end_io = h->bi_end_io;
88 bio->bi_private = h->bi_private; 88 bio->bi_private = h->bi_private;
89
90 /*
91 * Must bump bi_remaining to allow bio to complete with
92 * restored bi_end_io.
93 */
94 atomic_inc(&bio->bi_remaining);
95} 89}
96 90
97/*----------------------------------------------------------------*/ 91/*----------------------------------------------------------------*/
diff --git a/drivers/md/dm-raid1.c b/drivers/md/dm-raid1.c
index 089d62751f7f..743fa9bbae9e 100644
--- a/drivers/md/dm-raid1.c
+++ b/drivers/md/dm-raid1.c
@@ -1254,8 +1254,6 @@ static int mirror_end_io(struct dm_target *ti, struct bio *bio, int error)
1254 dm_bio_restore(bd, bio); 1254 dm_bio_restore(bd, bio);
1255 bio_record->details.bi_bdev = NULL; 1255 bio_record->details.bi_bdev = NULL;
1256 1256
1257 atomic_inc(&bio->bi_remaining);
1258
1259 queue_bio(ms, bio, rw); 1257 queue_bio(ms, bio, rw);
1260 return DM_ENDIO_INCOMPLETE; 1258 return DM_ENDIO_INCOMPLETE;
1261 } 1259 }
diff --git a/drivers/md/dm-snap.c b/drivers/md/dm-snap.c
index f83a0f3fc365..7c82d3ccce87 100644
--- a/drivers/md/dm-snap.c
+++ b/drivers/md/dm-snap.c
@@ -1478,7 +1478,6 @@ out:
1478 if (full_bio) { 1478 if (full_bio) {
1479 full_bio->bi_end_io = pe->full_bio_end_io; 1479 full_bio->bi_end_io = pe->full_bio_end_io;
1480 full_bio->bi_private = pe->full_bio_private; 1480 full_bio->bi_private = pe->full_bio_private;
1481 atomic_inc(&full_bio->bi_remaining);
1482 } 1481 }
1483 increment_pending_exceptions_done_count(); 1482 increment_pending_exceptions_done_count();
1484 1483
diff --git a/drivers/md/dm-table.c b/drivers/md/dm-table.c
index 16ba55ad7089..a5f94125ad01 100644
--- a/drivers/md/dm-table.c
+++ b/drivers/md/dm-table.c
@@ -942,21 +942,28 @@ static int dm_table_alloc_md_mempools(struct dm_table *t, struct mapped_device *
942{ 942{
943 unsigned type = dm_table_get_type(t); 943 unsigned type = dm_table_get_type(t);
944 unsigned per_bio_data_size = 0; 944 unsigned per_bio_data_size = 0;
945 struct dm_target *tgt;
946 unsigned i; 945 unsigned i;
947 946
948 if (unlikely(type == DM_TYPE_NONE)) { 947 switch (type) {
948 case DM_TYPE_BIO_BASED:
949 for (i = 0; i < t->num_targets; i++) {
950 struct dm_target *tgt = t->targets + i;
951
952 per_bio_data_size = max(per_bio_data_size,
953 tgt->per_bio_data_size);
954 }
955 t->mempools = dm_alloc_bio_mempools(t->integrity_supported,
956 per_bio_data_size);
957 break;
958 case DM_TYPE_REQUEST_BASED:
959 case DM_TYPE_MQ_REQUEST_BASED:
960 t->mempools = dm_alloc_rq_mempools(md, type);
961 break;
962 default:
949 DMWARN("no table type is set, can't allocate mempools"); 963 DMWARN("no table type is set, can't allocate mempools");
950 return -EINVAL; 964 return -EINVAL;
951 } 965 }
952 966
953 if (type == DM_TYPE_BIO_BASED)
954 for (i = 0; i < t->num_targets; i++) {
955 tgt = t->targets + i;
956 per_bio_data_size = max(per_bio_data_size, tgt->per_bio_data_size);
957 }
958
959 t->mempools = dm_alloc_md_mempools(md, type, t->integrity_supported, per_bio_data_size);
960 if (!t->mempools) 967 if (!t->mempools)
961 return -ENOMEM; 968 return -ENOMEM;
962 969
diff --git a/drivers/md/dm-thin.c b/drivers/md/dm-thin.c
index 921aafd12aee..e852602c0091 100644
--- a/drivers/md/dm-thin.c
+++ b/drivers/md/dm-thin.c
@@ -793,10 +793,9 @@ static void inc_remap_and_issue_cell(struct thin_c *tc,
793 793
794static void process_prepared_mapping_fail(struct dm_thin_new_mapping *m) 794static void process_prepared_mapping_fail(struct dm_thin_new_mapping *m)
795{ 795{
796 if (m->bio) { 796 if (m->bio)
797 m->bio->bi_end_io = m->saved_bi_end_io; 797 m->bio->bi_end_io = m->saved_bi_end_io;
798 atomic_inc(&m->bio->bi_remaining); 798
799 }
800 cell_error(m->tc->pool, m->cell); 799 cell_error(m->tc->pool, m->cell);
801 list_del(&m->list); 800 list_del(&m->list);
802 mempool_free(m, m->tc->pool->mapping_pool); 801 mempool_free(m, m->tc->pool->mapping_pool);
@@ -810,10 +809,8 @@ static void process_prepared_mapping(struct dm_thin_new_mapping *m)
810 int r; 809 int r;
811 810
812 bio = m->bio; 811 bio = m->bio;
813 if (bio) { 812 if (bio)
814 bio->bi_end_io = m->saved_bi_end_io; 813 bio->bi_end_io = m->saved_bi_end_io;
815 atomic_inc(&bio->bi_remaining);
816 }
817 814
818 if (m->err) { 815 if (m->err) {
819 cell_error(pool, m->cell); 816 cell_error(pool, m->cell);
diff --git a/drivers/md/dm-verity.c b/drivers/md/dm-verity.c
index 66616db33e6f..bb9c6a00e4b0 100644
--- a/drivers/md/dm-verity.c
+++ b/drivers/md/dm-verity.c
@@ -459,7 +459,7 @@ static void verity_finish_io(struct dm_verity_io *io, int error)
459 bio->bi_end_io = io->orig_bi_end_io; 459 bio->bi_end_io = io->orig_bi_end_io;
460 bio->bi_private = io->orig_bi_private; 460 bio->bi_private = io->orig_bi_private;
461 461
462 bio_endio_nodec(bio, error); 462 bio_endio(bio, error);
463} 463}
464 464
465static void verity_work(struct work_struct *w) 465static void verity_work(struct work_struct *w)
diff --git a/drivers/md/dm.c b/drivers/md/dm.c
index 2caf492890d6..4d6f089a0e9e 100644
--- a/drivers/md/dm.c
+++ b/drivers/md/dm.c
@@ -990,57 +990,6 @@ static void clone_endio(struct bio *bio, int error)
990 dec_pending(io, error); 990 dec_pending(io, error);
991} 991}
992 992
993/*
994 * Partial completion handling for request-based dm
995 */
996static void end_clone_bio(struct bio *clone, int error)
997{
998 struct dm_rq_clone_bio_info *info =
999 container_of(clone, struct dm_rq_clone_bio_info, clone);
1000 struct dm_rq_target_io *tio = info->tio;
1001 struct bio *bio = info->orig;
1002 unsigned int nr_bytes = info->orig->bi_iter.bi_size;
1003
1004 bio_put(clone);
1005
1006 if (tio->error)
1007 /*
1008 * An error has already been detected on the request.
1009 * Once error occurred, just let clone->end_io() handle
1010 * the remainder.
1011 */
1012 return;
1013 else if (error) {
1014 /*
1015 * Don't notice the error to the upper layer yet.
1016 * The error handling decision is made by the target driver,
1017 * when the request is completed.
1018 */
1019 tio->error = error;
1020 return;
1021 }
1022
1023 /*
1024 * I/O for the bio successfully completed.
1025 * Notice the data completion to the upper layer.
1026 */
1027
1028 /*
1029 * bios are processed from the head of the list.
1030 * So the completing bio should always be rq->bio.
1031 * If it's not, something wrong is happening.
1032 */
1033 if (tio->orig->bio != bio)
1034 DMERR("bio completion is going in the middle of the request");
1035
1036 /*
1037 * Update the original request.
1038 * Do not use blk_end_request() here, because it may complete
1039 * the original request before the clone, and break the ordering.
1040 */
1041 blk_update_request(tio->orig, 0, nr_bytes);
1042}
1043
1044static struct dm_rq_target_io *tio_from_request(struct request *rq) 993static struct dm_rq_target_io *tio_from_request(struct request *rq)
1045{ 994{
1046 return (rq->q->mq_ops ? blk_mq_rq_to_pdu(rq) : rq->special); 995 return (rq->q->mq_ops ? blk_mq_rq_to_pdu(rq) : rq->special);
@@ -1087,8 +1036,6 @@ static void free_rq_clone(struct request *clone)
1087 struct dm_rq_target_io *tio = clone->end_io_data; 1036 struct dm_rq_target_io *tio = clone->end_io_data;
1088 struct mapped_device *md = tio->md; 1037 struct mapped_device *md = tio->md;
1089 1038
1090 blk_rq_unprep_clone(clone);
1091
1092 if (md->type == DM_TYPE_MQ_REQUEST_BASED) 1039 if (md->type == DM_TYPE_MQ_REQUEST_BASED)
1093 /* stacked on blk-mq queue(s) */ 1040 /* stacked on blk-mq queue(s) */
1094 tio->ti->type->release_clone_rq(clone); 1041 tio->ti->type->release_clone_rq(clone);
@@ -1827,39 +1774,13 @@ static void dm_dispatch_clone_request(struct request *clone, struct request *rq)
1827 dm_complete_request(rq, r); 1774 dm_complete_request(rq, r);
1828} 1775}
1829 1776
1830static int dm_rq_bio_constructor(struct bio *bio, struct bio *bio_orig, 1777static void setup_clone(struct request *clone, struct request *rq,
1831 void *data) 1778 struct dm_rq_target_io *tio)
1832{ 1779{
1833 struct dm_rq_target_io *tio = data; 1780 blk_rq_prep_clone(clone, rq);
1834 struct dm_rq_clone_bio_info *info =
1835 container_of(bio, struct dm_rq_clone_bio_info, clone);
1836
1837 info->orig = bio_orig;
1838 info->tio = tio;
1839 bio->bi_end_io = end_clone_bio;
1840
1841 return 0;
1842}
1843
1844static int setup_clone(struct request *clone, struct request *rq,
1845 struct dm_rq_target_io *tio, gfp_t gfp_mask)
1846{
1847 int r;
1848
1849 r = blk_rq_prep_clone(clone, rq, tio->md->bs, gfp_mask,
1850 dm_rq_bio_constructor, tio);
1851 if (r)
1852 return r;
1853
1854 clone->cmd = rq->cmd;
1855 clone->cmd_len = rq->cmd_len;
1856 clone->sense = rq->sense;
1857 clone->end_io = end_clone_request; 1781 clone->end_io = end_clone_request;
1858 clone->end_io_data = tio; 1782 clone->end_io_data = tio;
1859
1860 tio->clone = clone; 1783 tio->clone = clone;
1861
1862 return 0;
1863} 1784}
1864 1785
1865static struct request *clone_rq(struct request *rq, struct mapped_device *md, 1786static struct request *clone_rq(struct request *rq, struct mapped_device *md,
@@ -1880,12 +1801,7 @@ static struct request *clone_rq(struct request *rq, struct mapped_device *md,
1880 clone = tio->clone; 1801 clone = tio->clone;
1881 1802
1882 blk_rq_init(NULL, clone); 1803 blk_rq_init(NULL, clone);
1883 if (setup_clone(clone, rq, tio, gfp_mask)) { 1804 setup_clone(clone, rq, tio);
1884 /* -ENOMEM */
1885 if (alloc_clone)
1886 free_clone_request(md, clone);
1887 return NULL;
1888 }
1889 1805
1890 return clone; 1806 return clone;
1891} 1807}
@@ -1979,11 +1895,7 @@ static int map_request(struct dm_rq_target_io *tio, struct request *rq,
1979 } 1895 }
1980 if (r != DM_MAPIO_REMAPPED) 1896 if (r != DM_MAPIO_REMAPPED)
1981 return r; 1897 return r;
1982 if (setup_clone(clone, rq, tio, GFP_ATOMIC)) { 1898 setup_clone(clone, rq, tio);
1983 /* -ENOMEM */
1984 ti->type->release_clone_rq(clone);
1985 return DM_MAPIO_REQUEUE;
1986 }
1987 } 1899 }
1988 1900
1989 switch (r) { 1901 switch (r) {
@@ -2437,8 +2349,6 @@ static void __bind_mempools(struct mapped_device *md, struct dm_table *t)
2437 goto out; 2349 goto out;
2438 } 2350 }
2439 2351
2440 BUG_ON(!p || md->io_pool || md->rq_pool || md->bs);
2441
2442 md->io_pool = p->io_pool; 2352 md->io_pool = p->io_pool;
2443 p->io_pool = NULL; 2353 p->io_pool = NULL;
2444 md->rq_pool = p->rq_pool; 2354 md->rq_pool = p->rq_pool;
@@ -3544,48 +3454,23 @@ int dm_noflush_suspending(struct dm_target *ti)
3544} 3454}
3545EXPORT_SYMBOL_GPL(dm_noflush_suspending); 3455EXPORT_SYMBOL_GPL(dm_noflush_suspending);
3546 3456
3547struct dm_md_mempools *dm_alloc_md_mempools(struct mapped_device *md, unsigned type, 3457struct dm_md_mempools *dm_alloc_bio_mempools(unsigned integrity,
3548 unsigned integrity, unsigned per_bio_data_size) 3458 unsigned per_bio_data_size)
3549{ 3459{
3550 struct dm_md_mempools *pools = kzalloc(sizeof(*pools), GFP_KERNEL); 3460 struct dm_md_mempools *pools;
3551 struct kmem_cache *cachep = NULL; 3461 unsigned int pool_size = dm_get_reserved_bio_based_ios();
3552 unsigned int pool_size = 0;
3553 unsigned int front_pad; 3462 unsigned int front_pad;
3554 3463
3464 pools = kzalloc(sizeof(*pools), GFP_KERNEL);
3555 if (!pools) 3465 if (!pools)
3556 return NULL; 3466 return NULL;
3557 3467
3558 type = filter_md_type(type, md); 3468 front_pad = roundup(per_bio_data_size, __alignof__(struct dm_target_io)) +
3469 offsetof(struct dm_target_io, clone);
3559 3470
3560 switch (type) { 3471 pools->io_pool = mempool_create_slab_pool(pool_size, _io_cache);
3561 case DM_TYPE_BIO_BASED: 3472 if (!pools->io_pool)
3562 cachep = _io_cache; 3473 goto out;
3563 pool_size = dm_get_reserved_bio_based_ios();
3564 front_pad = roundup(per_bio_data_size, __alignof__(struct dm_target_io)) + offsetof(struct dm_target_io, clone);
3565 break;
3566 case DM_TYPE_REQUEST_BASED:
3567 cachep = _rq_tio_cache;
3568 pool_size = dm_get_reserved_rq_based_ios();
3569 pools->rq_pool = mempool_create_slab_pool(pool_size, _rq_cache);
3570 if (!pools->rq_pool)
3571 goto out;
3572 /* fall through to setup remaining rq-based pools */
3573 case DM_TYPE_MQ_REQUEST_BASED:
3574 if (!pool_size)
3575 pool_size = dm_get_reserved_rq_based_ios();
3576 front_pad = offsetof(struct dm_rq_clone_bio_info, clone);
3577 /* per_bio_data_size is not used. See __bind_mempools(). */
3578 WARN_ON(per_bio_data_size != 0);
3579 break;
3580 default:
3581 BUG();
3582 }
3583
3584 if (cachep) {
3585 pools->io_pool = mempool_create_slab_pool(pool_size, cachep);
3586 if (!pools->io_pool)
3587 goto out;
3588 }
3589 3474
3590 pools->bs = bioset_create_nobvec(pool_size, front_pad); 3475 pools->bs = bioset_create_nobvec(pool_size, front_pad);
3591 if (!pools->bs) 3476 if (!pools->bs)
@@ -3595,10 +3480,34 @@ struct dm_md_mempools *dm_alloc_md_mempools(struct mapped_device *md, unsigned t
3595 goto out; 3480 goto out;
3596 3481
3597 return pools; 3482 return pools;
3598
3599out: 3483out:
3600 dm_free_md_mempools(pools); 3484 dm_free_md_mempools(pools);
3485 return NULL;
3486}
3487
3488struct dm_md_mempools *dm_alloc_rq_mempools(struct mapped_device *md,
3489 unsigned type)
3490{
3491 unsigned int pool_size = dm_get_reserved_rq_based_ios();
3492 struct dm_md_mempools *pools;
3493
3494 pools = kzalloc(sizeof(*pools), GFP_KERNEL);
3495 if (!pools)
3496 return NULL;
3497
3498 if (filter_md_type(type, md) == DM_TYPE_REQUEST_BASED) {
3499 pools->rq_pool = mempool_create_slab_pool(pool_size, _rq_cache);
3500 if (!pools->rq_pool)
3501 goto out;
3502 }
3601 3503
3504 pools->io_pool = mempool_create_slab_pool(pool_size, _rq_tio_cache);
3505 if (!pools->io_pool)
3506 goto out;
3507
3508 return pools;
3509out:
3510 dm_free_md_mempools(pools);
3602 return NULL; 3511 return NULL;
3603} 3512}
3604 3513
diff --git a/drivers/md/dm.h b/drivers/md/dm.h
index 6123c2bf9150..e6e66d087b26 100644
--- a/drivers/md/dm.h
+++ b/drivers/md/dm.h
@@ -222,8 +222,9 @@ void dm_kcopyd_exit(void);
222/* 222/*
223 * Mempool operations 223 * Mempool operations
224 */ 224 */
225struct dm_md_mempools *dm_alloc_md_mempools(struct mapped_device *md, unsigned type, 225struct dm_md_mempools *dm_alloc_bio_mempools(unsigned integrity,
226 unsigned integrity, unsigned per_bio_data_size); 226 unsigned per_bio_data_size);
227struct dm_md_mempools *dm_alloc_rq_mempools(struct mapped_device *md, unsigned type);
227void dm_free_md_mempools(struct dm_md_mempools *pools); 228void dm_free_md_mempools(struct dm_md_mempools *pools);
228 229
229/* 230/*
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index 2ef9a4b72d06..0bccf18dc1dc 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -1745,7 +1745,7 @@ static void end_workqueue_fn(struct btrfs_work *work)
1745 bio->bi_private = end_io_wq->private; 1745 bio->bi_private = end_io_wq->private;
1746 bio->bi_end_io = end_io_wq->end_io; 1746 bio->bi_end_io = end_io_wq->end_io;
1747 kmem_cache_free(btrfs_end_io_wq_cache, end_io_wq); 1747 kmem_cache_free(btrfs_end_io_wq_cache, end_io_wq);
1748 bio_endio_nodec(bio, error); 1748 bio_endio(bio, error);
1749} 1749}
1750 1750
1751static int cleaner_kthread(void *arg) 1751static int cleaner_kthread(void *arg)
@@ -3269,11 +3269,8 @@ static int write_dev_supers(struct btrfs_device *device,
3269 */ 3269 */
3270static void btrfs_end_empty_barrier(struct bio *bio, int err) 3270static void btrfs_end_empty_barrier(struct bio *bio, int err)
3271{ 3271{
3272 if (err) { 3272 if (err)
3273 if (err == -EOPNOTSUPP)
3274 set_bit(BIO_EOPNOTSUPP, &bio->bi_flags);
3275 clear_bit(BIO_UPTODATE, &bio->bi_flags); 3273 clear_bit(BIO_UPTODATE, &bio->bi_flags);
3276 }
3277 if (bio->bi_private) 3274 if (bio->bi_private)
3278 complete(bio->bi_private); 3275 complete(bio->bi_private);
3279 bio_put(bio); 3276 bio_put(bio);
@@ -3301,11 +3298,7 @@ static int write_dev_flush(struct btrfs_device *device, int wait)
3301 3298
3302 wait_for_completion(&device->flush_wait); 3299 wait_for_completion(&device->flush_wait);
3303 3300
3304 if (bio_flagged(bio, BIO_EOPNOTSUPP)) { 3301 if (!bio_flagged(bio, BIO_UPTODATE)) {
3305 printk_in_rcu("BTRFS: disabling barriers on dev %s\n",
3306 rcu_str_deref(device->name));
3307 device->nobarriers = 1;
3308 } else if (!bio_flagged(bio, BIO_UPTODATE)) {
3309 ret = -EIO; 3302 ret = -EIO;
3310 btrfs_dev_stat_inc_and_print(device, 3303 btrfs_dev_stat_inc_and_print(device,
3311 BTRFS_DEV_STAT_FLUSH_ERRS); 3304 BTRFS_DEV_STAT_FLUSH_ERRS);
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index c32d226bfecc..c374e1e71e5f 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -2767,8 +2767,6 @@ static int __must_check submit_one_bio(int rw, struct bio *bio,
2767 else 2767 else
2768 btrfsic_submit_bio(rw, bio); 2768 btrfsic_submit_bio(rw, bio);
2769 2769
2770 if (bio_flagged(bio, BIO_EOPNOTSUPP))
2771 ret = -EOPNOTSUPP;
2772 bio_put(bio); 2770 bio_put(bio);
2773 return ret; 2771 return ret;
2774} 2772}
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index 174f5e1e00ab..53af23f2c087 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -345,7 +345,7 @@ loop_lock:
345 waitqueue_active(&fs_info->async_submit_wait)) 345 waitqueue_active(&fs_info->async_submit_wait))
346 wake_up(&fs_info->async_submit_wait); 346 wake_up(&fs_info->async_submit_wait);
347 347
348 BUG_ON(atomic_read(&cur->bi_cnt) == 0); 348 BUG_ON(atomic_read(&cur->__bi_cnt) == 0);
349 349
350 /* 350 /*
351 * if we're doing the sync list, record that our 351 * if we're doing the sync list, record that our
@@ -5586,10 +5586,10 @@ int btrfs_rmap_block(struct btrfs_mapping_tree *map_tree,
5586 5586
5587static inline void btrfs_end_bbio(struct btrfs_bio *bbio, struct bio *bio, int err) 5587static inline void btrfs_end_bbio(struct btrfs_bio *bbio, struct bio *bio, int err)
5588{ 5588{
5589 if (likely(bbio->flags & BTRFS_BIO_ORIG_BIO_SUBMITTED)) 5589 bio->bi_private = bbio->private;
5590 bio_endio_nodec(bio, err); 5590 bio->bi_end_io = bbio->end_io;
5591 else 5591 bio_endio(bio, err);
5592 bio_endio(bio, err); 5592
5593 btrfs_put_bbio(bbio); 5593 btrfs_put_bbio(bbio);
5594} 5594}
5595 5595
@@ -5633,8 +5633,6 @@ static void btrfs_end_bio(struct bio *bio, int err)
5633 bio = bbio->orig_bio; 5633 bio = bbio->orig_bio;
5634 } 5634 }
5635 5635
5636 bio->bi_private = bbio->private;
5637 bio->bi_end_io = bbio->end_io;
5638 btrfs_io_bio(bio)->mirror_num = bbio->mirror_num; 5636 btrfs_io_bio(bio)->mirror_num = bbio->mirror_num;
5639 /* only send an error to the higher layers if it is 5637 /* only send an error to the higher layers if it is
5640 * beyond the tolerance of the btrfs bio 5638 * beyond the tolerance of the btrfs bio
@@ -5816,8 +5814,6 @@ static void bbio_error(struct btrfs_bio *bbio, struct bio *bio, u64 logical)
5816 /* Shoud be the original bio. */ 5814 /* Shoud be the original bio. */
5817 WARN_ON(bio != bbio->orig_bio); 5815 WARN_ON(bio != bbio->orig_bio);
5818 5816
5819 bio->bi_private = bbio->private;
5820 bio->bi_end_io = bbio->end_io;
5821 btrfs_io_bio(bio)->mirror_num = bbio->mirror_num; 5817 btrfs_io_bio(bio)->mirror_num = bbio->mirror_num;
5822 bio->bi_iter.bi_sector = logical >> 9; 5818 bio->bi_iter.bi_sector = logical >> 9;
5823 5819
@@ -5898,10 +5894,8 @@ int btrfs_map_bio(struct btrfs_root *root, int rw, struct bio *bio,
5898 if (dev_nr < total_devs - 1) { 5894 if (dev_nr < total_devs - 1) {
5899 bio = btrfs_bio_clone(first_bio, GFP_NOFS); 5895 bio = btrfs_bio_clone(first_bio, GFP_NOFS);
5900 BUG_ON(!bio); /* -ENOMEM */ 5896 BUG_ON(!bio); /* -ENOMEM */
5901 } else { 5897 } else
5902 bio = first_bio; 5898 bio = first_bio;
5903 bbio->flags |= BTRFS_BIO_ORIG_BIO_SUBMITTED;
5904 }
5905 5899
5906 submit_stripe_bio(root, bbio, bio, 5900 submit_stripe_bio(root, bbio, bio,
5907 bbio->stripes[dev_nr].physical, dev_nr, rw, 5901 bbio->stripes[dev_nr].physical, dev_nr, rw,
diff --git a/fs/btrfs/volumes.h b/fs/btrfs/volumes.h
index ebc31331a837..cedae0356558 100644
--- a/fs/btrfs/volumes.h
+++ b/fs/btrfs/volumes.h
@@ -292,8 +292,6 @@ struct btrfs_bio_stripe {
292struct btrfs_bio; 292struct btrfs_bio;
293typedef void (btrfs_bio_end_io_t) (struct btrfs_bio *bio, int err); 293typedef void (btrfs_bio_end_io_t) (struct btrfs_bio *bio, int err);
294 294
295#define BTRFS_BIO_ORIG_BIO_SUBMITTED (1 << 0)
296
297struct btrfs_bio { 295struct btrfs_bio {
298 atomic_t refs; 296 atomic_t refs;
299 atomic_t stripes_pending; 297 atomic_t stripes_pending;
diff --git a/fs/buffer.c b/fs/buffer.c
index c7a5602d01ee..f96173ad62d9 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -2938,10 +2938,6 @@ static void end_bio_bh_io_sync(struct bio *bio, int err)
2938{ 2938{
2939 struct buffer_head *bh = bio->bi_private; 2939 struct buffer_head *bh = bio->bi_private;
2940 2940
2941 if (err == -EOPNOTSUPP) {
2942 set_bit(BIO_EOPNOTSUPP, &bio->bi_flags);
2943 }
2944
2945 if (unlikely (test_bit(BIO_QUIET,&bio->bi_flags))) 2941 if (unlikely (test_bit(BIO_QUIET,&bio->bi_flags)))
2946 set_bit(BH_Quiet, &bh->b_state); 2942 set_bit(BH_Quiet, &bh->b_state);
2947 2943
@@ -3000,7 +2996,6 @@ void guard_bio_eod(int rw, struct bio *bio)
3000int _submit_bh(int rw, struct buffer_head *bh, unsigned long bio_flags) 2996int _submit_bh(int rw, struct buffer_head *bh, unsigned long bio_flags)
3001{ 2997{
3002 struct bio *bio; 2998 struct bio *bio;
3003 int ret = 0;
3004 2999
3005 BUG_ON(!buffer_locked(bh)); 3000 BUG_ON(!buffer_locked(bh));
3006 BUG_ON(!buffer_mapped(bh)); 3001 BUG_ON(!buffer_mapped(bh));
@@ -3041,14 +3036,8 @@ int _submit_bh(int rw, struct buffer_head *bh, unsigned long bio_flags)
3041 if (buffer_prio(bh)) 3036 if (buffer_prio(bh))
3042 rw |= REQ_PRIO; 3037 rw |= REQ_PRIO;
3043 3038
3044 bio_get(bio);
3045 submit_bio(rw, bio); 3039 submit_bio(rw, bio);
3046 3040 return 0;
3047 if (bio_flagged(bio, BIO_EOPNOTSUPP))
3048 ret = -EOPNOTSUPP;
3049
3050 bio_put(bio);
3051 return ret;
3052} 3041}
3053EXPORT_SYMBOL_GPL(_submit_bh); 3042EXPORT_SYMBOL_GPL(_submit_bh);
3054 3043
diff --git a/fs/ext4/page-io.c b/fs/ext4/page-io.c
index 79636e21d3a2..5602450f03f6 100644
--- a/fs/ext4/page-io.c
+++ b/fs/ext4/page-io.c
@@ -359,7 +359,6 @@ void ext4_io_submit(struct ext4_io_submit *io)
359 if (bio) { 359 if (bio) {
360 bio_get(io->io_bio); 360 bio_get(io->io_bio);
361 submit_bio(io->io_op, io->io_bio); 361 submit_bio(io->io_op, io->io_bio);
362 BUG_ON(bio_flagged(io->io_bio, BIO_EOPNOTSUPP));
363 bio_put(io->io_bio); 362 bio_put(io->io_bio);
364 } 363 }
365 io->io_bio = NULL; 364 io->io_bio = NULL;
diff --git a/fs/nilfs2/segbuf.c b/fs/nilfs2/segbuf.c
index dc3a9efdaab8..42468e5ab3e7 100644
--- a/fs/nilfs2/segbuf.c
+++ b/fs/nilfs2/segbuf.c
@@ -343,11 +343,6 @@ static void nilfs_end_bio_write(struct bio *bio, int err)
343 const int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags); 343 const int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags);
344 struct nilfs_segment_buffer *segbuf = bio->bi_private; 344 struct nilfs_segment_buffer *segbuf = bio->bi_private;
345 345
346 if (err == -EOPNOTSUPP) {
347 set_bit(BIO_EOPNOTSUPP, &bio->bi_flags);
348 /* to be detected by nilfs_segbuf_submit_bio() */
349 }
350
351 if (!uptodate) 346 if (!uptodate)
352 atomic_inc(&segbuf->sb_err); 347 atomic_inc(&segbuf->sb_err);
353 348
@@ -374,15 +369,8 @@ static int nilfs_segbuf_submit_bio(struct nilfs_segment_buffer *segbuf,
374 369
375 bio->bi_end_io = nilfs_end_bio_write; 370 bio->bi_end_io = nilfs_end_bio_write;
376 bio->bi_private = segbuf; 371 bio->bi_private = segbuf;
377 bio_get(bio);
378 submit_bio(mode, bio); 372 submit_bio(mode, bio);
379 segbuf->sb_nbio++; 373 segbuf->sb_nbio++;
380 if (bio_flagged(bio, BIO_EOPNOTSUPP)) {
381 bio_put(bio);
382 err = -EOPNOTSUPP;
383 goto failed;
384 }
385 bio_put(bio);
386 374
387 wi->bio = NULL; 375 wi->bio = NULL;
388 wi->rest_blocks -= wi->end - wi->start; 376 wi->rest_blocks -= wi->end - wi->start;
diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c
index a56960dd1684..095f94c2d8b5 100644
--- a/fs/xfs/xfs_aops.c
+++ b/fs/xfs/xfs_aops.c
@@ -356,7 +356,6 @@ xfs_end_bio(
356{ 356{
357 xfs_ioend_t *ioend = bio->bi_private; 357 xfs_ioend_t *ioend = bio->bi_private;
358 358
359 ASSERT(atomic_read(&bio->bi_cnt) >= 1);
360 ioend->io_error = test_bit(BIO_UPTODATE, &bio->bi_flags) ? 0 : error; 359 ioend->io_error = test_bit(BIO_UPTODATE, &bio->bi_flags) ? 0 : error;
361 360
362 /* Toss bio and pass work off to an xfsdatad thread */ 361 /* Toss bio and pass work off to an xfsdatad thread */
diff --git a/include/linux/bio.h b/include/linux/bio.h
index da3a127c9958..f0291cf64cc5 100644
--- a/include/linux/bio.h
+++ b/include/linux/bio.h
@@ -290,7 +290,21 @@ static inline unsigned bio_segments(struct bio *bio)
290 * returns. and then bio would be freed memory when if (bio->bi_flags ...) 290 * returns. and then bio would be freed memory when if (bio->bi_flags ...)
291 * runs 291 * runs
292 */ 292 */
293#define bio_get(bio) atomic_inc(&(bio)->bi_cnt) 293static inline void bio_get(struct bio *bio)
294{
295 bio->bi_flags |= (1 << BIO_REFFED);
296 smp_mb__before_atomic();
297 atomic_inc(&bio->__bi_cnt);
298}
299
300static inline void bio_cnt_set(struct bio *bio, unsigned int count)
301{
302 if (count != 1) {
303 bio->bi_flags |= (1 << BIO_REFFED);
304 smp_mb__before_atomic();
305 }
306 atomic_set(&bio->__bi_cnt, count);
307}
294 308
295enum bip_flags { 309enum bip_flags {
296 BIP_BLOCK_INTEGRITY = 1 << 0, /* block layer owns integrity data */ 310 BIP_BLOCK_INTEGRITY = 1 << 0, /* block layer owns integrity data */
@@ -413,7 +427,6 @@ static inline struct bio *bio_clone_kmalloc(struct bio *bio, gfp_t gfp_mask)
413} 427}
414 428
415extern void bio_endio(struct bio *, int); 429extern void bio_endio(struct bio *, int);
416extern void bio_endio_nodec(struct bio *, int);
417struct request_queue; 430struct request_queue;
418extern int bio_phys_segments(struct request_queue *, struct bio *); 431extern int bio_phys_segments(struct request_queue *, struct bio *);
419 432
diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h
index 2056a99b92f8..37d1602c4f7a 100644
--- a/include/linux/blk-mq.h
+++ b/include/linux/blk-mq.h
@@ -96,6 +96,7 @@ typedef void (exit_request_fn)(void *, struct request *, unsigned int,
96 96
97typedef void (busy_iter_fn)(struct blk_mq_hw_ctx *, struct request *, void *, 97typedef void (busy_iter_fn)(struct blk_mq_hw_ctx *, struct request *, void *,
98 bool); 98 bool);
99typedef void (busy_tag_iter_fn)(struct request *, void *, bool);
99 100
100struct blk_mq_ops { 101struct blk_mq_ops {
101 /* 102 /*
@@ -182,6 +183,7 @@ bool blk_mq_can_queue(struct blk_mq_hw_ctx *);
182struct request *blk_mq_alloc_request(struct request_queue *q, int rw, 183struct request *blk_mq_alloc_request(struct request_queue *q, int rw,
183 gfp_t gfp, bool reserved); 184 gfp_t gfp, bool reserved);
184struct request *blk_mq_tag_to_rq(struct blk_mq_tags *tags, unsigned int tag); 185struct request *blk_mq_tag_to_rq(struct blk_mq_tags *tags, unsigned int tag);
186struct cpumask *blk_mq_tags_cpumask(struct blk_mq_tags *tags);
185 187
186enum { 188enum {
187 BLK_MQ_UNIQUE_TAG_BITS = 16, 189 BLK_MQ_UNIQUE_TAG_BITS = 16,
@@ -224,6 +226,8 @@ void blk_mq_run_hw_queues(struct request_queue *q, bool async);
224void blk_mq_delay_queue(struct blk_mq_hw_ctx *hctx, unsigned long msecs); 226void blk_mq_delay_queue(struct blk_mq_hw_ctx *hctx, unsigned long msecs);
225void blk_mq_tag_busy_iter(struct blk_mq_hw_ctx *hctx, busy_iter_fn *fn, 227void blk_mq_tag_busy_iter(struct blk_mq_hw_ctx *hctx, busy_iter_fn *fn,
226 void *priv); 228 void *priv);
229void blk_mq_all_tag_busy_iter(struct blk_mq_tags *tags, busy_tag_iter_fn *fn,
230 void *priv);
227void blk_mq_freeze_queue(struct request_queue *q); 231void blk_mq_freeze_queue(struct request_queue *q);
228void blk_mq_unfreeze_queue(struct request_queue *q); 232void blk_mq_unfreeze_queue(struct request_queue *q);
229void blk_mq_freeze_queue_start(struct request_queue *q); 233void blk_mq_freeze_queue_start(struct request_queue *q);
diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h
index b7299febc4b4..6ab9d12d1f17 100644
--- a/include/linux/blk_types.h
+++ b/include/linux/blk_types.h
@@ -65,7 +65,7 @@ struct bio {
65 unsigned int bi_seg_front_size; 65 unsigned int bi_seg_front_size;
66 unsigned int bi_seg_back_size; 66 unsigned int bi_seg_back_size;
67 67
68 atomic_t bi_remaining; 68 atomic_t __bi_remaining;
69 69
70 bio_end_io_t *bi_end_io; 70 bio_end_io_t *bi_end_io;
71 71
@@ -92,7 +92,7 @@ struct bio {
92 92
93 unsigned short bi_max_vecs; /* max bvl_vecs we can hold */ 93 unsigned short bi_max_vecs; /* max bvl_vecs we can hold */
94 94
95 atomic_t bi_cnt; /* pin count */ 95 atomic_t __bi_cnt; /* pin count */
96 96
97 struct bio_vec *bi_io_vec; /* the actual vec list */ 97 struct bio_vec *bi_io_vec; /* the actual vec list */
98 98
@@ -112,16 +112,15 @@ struct bio {
112 * bio flags 112 * bio flags
113 */ 113 */
114#define BIO_UPTODATE 0 /* ok after I/O completion */ 114#define BIO_UPTODATE 0 /* ok after I/O completion */
115#define BIO_RW_BLOCK 1 /* RW_AHEAD set, and read/write would block */ 115#define BIO_SEG_VALID 1 /* bi_phys_segments valid */
116#define BIO_EOF 2 /* out-out-bounds error */ 116#define BIO_CLONED 2 /* doesn't own data */
117#define BIO_SEG_VALID 3 /* bi_phys_segments valid */ 117#define BIO_BOUNCED 3 /* bio is a bounce bio */
118#define BIO_CLONED 4 /* doesn't own data */ 118#define BIO_USER_MAPPED 4 /* contains user pages */
119#define BIO_BOUNCED 5 /* bio is a bounce bio */ 119#define BIO_NULL_MAPPED 5 /* contains invalid user pages */
120#define BIO_USER_MAPPED 6 /* contains user pages */ 120#define BIO_QUIET 6 /* Make BIO Quiet */
121#define BIO_EOPNOTSUPP 7 /* not supported */ 121#define BIO_SNAP_STABLE 7 /* bio data must be snapshotted during write */
122#define BIO_NULL_MAPPED 8 /* contains invalid user pages */ 122#define BIO_CHAIN 8 /* chained bio, ->bi_remaining in effect */
123#define BIO_QUIET 9 /* Make BIO Quiet */ 123#define BIO_REFFED 9 /* bio has elevated ->bi_cnt */
124#define BIO_SNAP_STABLE 10 /* bio data must be snapshotted during write */
125 124
126/* 125/*
127 * Flags starting here get preserved by bio_reset() - this includes 126 * Flags starting here get preserved by bio_reset() - this includes
@@ -193,6 +192,7 @@ enum rq_flag_bits {
193 __REQ_HASHED, /* on IO scheduler merge hash */ 192 __REQ_HASHED, /* on IO scheduler merge hash */
194 __REQ_MQ_INFLIGHT, /* track inflight for MQ */ 193 __REQ_MQ_INFLIGHT, /* track inflight for MQ */
195 __REQ_NO_TIMEOUT, /* requests may never expire */ 194 __REQ_NO_TIMEOUT, /* requests may never expire */
195 __REQ_CLONE, /* cloned bios */
196 __REQ_NR_BITS, /* stops here */ 196 __REQ_NR_BITS, /* stops here */
197}; 197};
198 198
@@ -247,5 +247,6 @@ enum rq_flag_bits {
247#define REQ_HASHED (1ULL << __REQ_HASHED) 247#define REQ_HASHED (1ULL << __REQ_HASHED)
248#define REQ_MQ_INFLIGHT (1ULL << __REQ_MQ_INFLIGHT) 248#define REQ_MQ_INFLIGHT (1ULL << __REQ_MQ_INFLIGHT)
249#define REQ_NO_TIMEOUT (1ULL << __REQ_NO_TIMEOUT) 249#define REQ_NO_TIMEOUT (1ULL << __REQ_NO_TIMEOUT)
250#define REQ_CLONE (1ULL << __REQ_CLONE)
250 251
251#endif /* __LINUX_BLK_TYPES_H */ 252#endif /* __LINUX_BLK_TYPES_H */
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 5d93a6645e88..a6ae5f9bee49 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -30,7 +30,6 @@ struct scsi_ioctl_command;
30 30
31struct request_queue; 31struct request_queue;
32struct elevator_queue; 32struct elevator_queue;
33struct request_pm_state;
34struct blk_trace; 33struct blk_trace;
35struct request; 34struct request;
36struct sg_io_hdr; 35struct sg_io_hdr;
@@ -75,18 +74,7 @@ struct request_list {
75enum rq_cmd_type_bits { 74enum rq_cmd_type_bits {
76 REQ_TYPE_FS = 1, /* fs request */ 75 REQ_TYPE_FS = 1, /* fs request */
77 REQ_TYPE_BLOCK_PC, /* scsi command */ 76 REQ_TYPE_BLOCK_PC, /* scsi command */
78 REQ_TYPE_SENSE, /* sense request */ 77 REQ_TYPE_DRV_PRIV, /* driver defined types from here */
79 REQ_TYPE_PM_SUSPEND, /* suspend request */
80 REQ_TYPE_PM_RESUME, /* resume request */
81 REQ_TYPE_PM_SHUTDOWN, /* shutdown request */
82 REQ_TYPE_SPECIAL, /* driver defined type */
83 /*
84 * for ATA/ATAPI devices. this really doesn't belong here, ide should
85 * use REQ_TYPE_SPECIAL and use rq->cmd[0] with the range of driver
86 * private REQ_LB opcodes to differentiate what type of request this is
87 */
88 REQ_TYPE_ATA_TASKFILE,
89 REQ_TYPE_ATA_PC,
90}; 78};
91 79
92#define BLK_MAX_CDB 16 80#define BLK_MAX_CDB 16
@@ -108,7 +96,7 @@ struct request {
108 struct blk_mq_ctx *mq_ctx; 96 struct blk_mq_ctx *mq_ctx;
109 97
110 u64 cmd_flags; 98 u64 cmd_flags;
111 enum rq_cmd_type_bits cmd_type; 99 unsigned cmd_type;
112 unsigned long atomic_flags; 100 unsigned long atomic_flags;
113 101
114 int cpu; 102 int cpu;
@@ -216,19 +204,6 @@ static inline unsigned short req_get_ioprio(struct request *req)
216 return req->ioprio; 204 return req->ioprio;
217} 205}
218 206
219/*
220 * State information carried for REQ_TYPE_PM_SUSPEND and REQ_TYPE_PM_RESUME
221 * requests. Some step values could eventually be made generic.
222 */
223struct request_pm_state
224{
225 /* PM state machine step value, currently driver specific */
226 int pm_step;
227 /* requested PM state value (S1, S2, S3, S4, ...) */
228 u32 pm_state;
229 void* data; /* for driver use */
230};
231
232#include <linux/elevator.h> 207#include <linux/elevator.h>
233 208
234struct blk_queue_ctx; 209struct blk_queue_ctx;
@@ -469,7 +444,7 @@ struct request_queue {
469 struct mutex sysfs_lock; 444 struct mutex sysfs_lock;
470 445
471 int bypass_depth; 446 int bypass_depth;
472 int mq_freeze_depth; 447 atomic_t mq_freeze_depth;
473 448
474#if defined(CONFIG_BLK_DEV_BSG) 449#if defined(CONFIG_BLK_DEV_BSG)
475 bsg_job_fn *bsg_job_fn; 450 bsg_job_fn *bsg_job_fn;
@@ -610,10 +585,6 @@ static inline void queue_flag_clear(unsigned int flag, struct request_queue *q)
610 (((rq)->cmd_flags & REQ_STARTED) && \ 585 (((rq)->cmd_flags & REQ_STARTED) && \
611 ((rq)->cmd_type == REQ_TYPE_FS)) 586 ((rq)->cmd_type == REQ_TYPE_FS))
612 587
613#define blk_pm_request(rq) \
614 ((rq)->cmd_type == REQ_TYPE_PM_SUSPEND || \
615 (rq)->cmd_type == REQ_TYPE_PM_RESUME)
616
617#define blk_rq_cpu_valid(rq) ((rq)->cpu != -1) 588#define blk_rq_cpu_valid(rq) ((rq)->cpu != -1)
618#define blk_bidi_rq(rq) ((rq)->next_rq != NULL) 589#define blk_bidi_rq(rq) ((rq)->next_rq != NULL)
619/* rq->queuelist of dequeued request must be list_empty() */ 590/* rq->queuelist of dequeued request must be list_empty() */
@@ -804,11 +775,7 @@ extern void blk_add_request_payload(struct request *rq, struct page *page,
804 unsigned int len); 775 unsigned int len);
805extern int blk_rq_check_limits(struct request_queue *q, struct request *rq); 776extern int blk_rq_check_limits(struct request_queue *q, struct request *rq);
806extern int blk_lld_busy(struct request_queue *q); 777extern int blk_lld_busy(struct request_queue *q);
807extern int blk_rq_prep_clone(struct request *rq, struct request *rq_src, 778extern void blk_rq_prep_clone(struct request *rq, struct request *rq_src);
808 struct bio_set *bs, gfp_t gfp_mask,
809 int (*bio_ctr)(struct bio *, struct bio *, void *),
810 void *data);
811extern void blk_rq_unprep_clone(struct request *rq);
812extern int blk_insert_cloned_request(struct request_queue *q, 779extern int blk_insert_cloned_request(struct request_queue *q,
813 struct request *rq); 780 struct request *rq);
814extern void blk_delay_queue(struct request_queue *, unsigned long); 781extern void blk_delay_queue(struct request_queue *, unsigned long);
@@ -845,6 +812,7 @@ extern void blk_stop_queue(struct request_queue *q);
845extern void blk_sync_queue(struct request_queue *q); 812extern void blk_sync_queue(struct request_queue *q);
846extern void __blk_stop_queue(struct request_queue *q); 813extern void __blk_stop_queue(struct request_queue *q);
847extern void __blk_run_queue(struct request_queue *q); 814extern void __blk_run_queue(struct request_queue *q);
815extern void __blk_run_queue_uncond(struct request_queue *q);
848extern void blk_run_queue(struct request_queue *); 816extern void blk_run_queue(struct request_queue *);
849extern void blk_run_queue_async(struct request_queue *q); 817extern void blk_run_queue_async(struct request_queue *q);
850extern int blk_rq_map_user(struct request_queue *, struct request *, 818extern int blk_rq_map_user(struct request_queue *, struct request *,
@@ -933,7 +901,7 @@ static inline unsigned int blk_rq_get_max_sectors(struct request *rq)
933 if (unlikely(rq->cmd_type == REQ_TYPE_BLOCK_PC)) 901 if (unlikely(rq->cmd_type == REQ_TYPE_BLOCK_PC))
934 return q->limits.max_hw_sectors; 902 return q->limits.max_hw_sectors;
935 903
936 if (!q->limits.chunk_sectors) 904 if (!q->limits.chunk_sectors || (rq->cmd_flags & REQ_DISCARD))
937 return blk_queue_get_max_sectors(q, rq->cmd_flags); 905 return blk_queue_get_max_sectors(q, rq->cmd_flags);
938 906
939 return min(blk_max_size_offset(q, blk_rq_pos(rq)), 907 return min(blk_max_size_offset(q, blk_rq_pos(rq)),
@@ -1054,6 +1022,7 @@ bool __must_check blk_get_queue(struct request_queue *);
1054struct request_queue *blk_alloc_queue(gfp_t); 1022struct request_queue *blk_alloc_queue(gfp_t);
1055struct request_queue *blk_alloc_queue_node(gfp_t, int); 1023struct request_queue *blk_alloc_queue_node(gfp_t, int);
1056extern void blk_put_queue(struct request_queue *); 1024extern void blk_put_queue(struct request_queue *);
1025extern void blk_set_queue_dying(struct request_queue *);
1057 1026
1058/* 1027/*
1059 * block layer runtime pm functions 1028 * block layer runtime pm functions
diff --git a/include/linux/elevator.h b/include/linux/elevator.h
index 45a91474487d..638b324f0291 100644
--- a/include/linux/elevator.h
+++ b/include/linux/elevator.h
@@ -39,6 +39,7 @@ typedef void (elevator_deactivate_req_fn) (struct request_queue *, struct reques
39typedef int (elevator_init_fn) (struct request_queue *, 39typedef int (elevator_init_fn) (struct request_queue *,
40 struct elevator_type *e); 40 struct elevator_type *e);
41typedef void (elevator_exit_fn) (struct elevator_queue *); 41typedef void (elevator_exit_fn) (struct elevator_queue *);
42typedef void (elevator_registered_fn) (struct request_queue *);
42 43
43struct elevator_ops 44struct elevator_ops
44{ 45{
@@ -68,6 +69,7 @@ struct elevator_ops
68 69
69 elevator_init_fn *elevator_init_fn; 70 elevator_init_fn *elevator_init_fn;
70 elevator_exit_fn *elevator_exit_fn; 71 elevator_exit_fn *elevator_exit_fn;
72 elevator_registered_fn *elevator_registered_fn;
71}; 73};
72 74
73#define ELV_NAME_MAX (16) 75#define ELV_NAME_MAX (16)
diff --git a/include/linux/fs.h b/include/linux/fs.h
index b577e801b4af..5db7b1379d17 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -2280,6 +2280,9 @@ extern struct block_device *blkdev_get_by_path(const char *path, fmode_t mode,
2280extern struct block_device *blkdev_get_by_dev(dev_t dev, fmode_t mode, 2280extern struct block_device *blkdev_get_by_dev(dev_t dev, fmode_t mode,
2281 void *holder); 2281 void *holder);
2282extern void blkdev_put(struct block_device *bdev, fmode_t mode); 2282extern void blkdev_put(struct block_device *bdev, fmode_t mode);
2283extern int __blkdev_reread_part(struct block_device *bdev);
2284extern int blkdev_reread_part(struct block_device *bdev);
2285
2283#ifdef CONFIG_SYSFS 2286#ifdef CONFIG_SYSFS
2284extern int bd_link_disk_holder(struct block_device *bdev, struct gendisk *disk); 2287extern int bd_link_disk_holder(struct block_device *bdev, struct gendisk *disk);
2285extern void bd_unlink_disk_holder(struct block_device *bdev, 2288extern void bd_unlink_disk_holder(struct block_device *bdev,
diff --git a/include/linux/ide.h b/include/linux/ide.h
index 93b5ca754b5b..a633898f36ac 100644
--- a/include/linux/ide.h
+++ b/include/linux/ide.h
@@ -39,6 +39,19 @@
39 39
40struct device; 40struct device;
41 41
42/* IDE-specific values for req->cmd_type */
43enum ata_cmd_type_bits {
44 REQ_TYPE_ATA_TASKFILE = REQ_TYPE_DRV_PRIV + 1,
45 REQ_TYPE_ATA_PC,
46 REQ_TYPE_ATA_SENSE, /* sense request */
47 REQ_TYPE_ATA_PM_SUSPEND,/* suspend request */
48 REQ_TYPE_ATA_PM_RESUME, /* resume request */
49};
50
51#define ata_pm_request(rq) \
52 ((rq)->cmd_type == REQ_TYPE_ATA_PM_SUSPEND || \
53 (rq)->cmd_type == REQ_TYPE_ATA_PM_RESUME)
54
42/* Error codes returned in rq->errors to the higher part of the driver. */ 55/* Error codes returned in rq->errors to the higher part of the driver. */
43enum { 56enum {
44 IDE_DRV_ERROR_GENERAL = 101, 57 IDE_DRV_ERROR_GENERAL = 101,
@@ -1314,6 +1327,19 @@ struct ide_port_info {
1314 u8 udma_mask; 1327 u8 udma_mask;
1315}; 1328};
1316 1329
1330/*
1331 * State information carried for REQ_TYPE_ATA_PM_SUSPEND and REQ_TYPE_ATA_PM_RESUME
1332 * requests.
1333 */
1334struct ide_pm_state {
1335 /* PM state machine step value, currently driver specific */
1336 int pm_step;
1337 /* requested PM state value (S1, S2, S3, S4, ...) */
1338 u32 pm_state;
1339 void* data; /* for driver use */
1340};
1341
1342
1317int ide_pci_init_one(struct pci_dev *, const struct ide_port_info *, void *); 1343int ide_pci_init_one(struct pci_dev *, const struct ide_port_info *, void *);
1318int ide_pci_init_two(struct pci_dev *, struct pci_dev *, 1344int ide_pci_init_two(struct pci_dev *, struct pci_dev *,
1319 const struct ide_port_info *, void *); 1345 const struct ide_port_info *, void *);
@@ -1551,4 +1577,5 @@ static inline void ide_set_drivedata(ide_drive_t *drive, void *data)
1551#define ide_host_for_each_port(i, port, host) \ 1577#define ide_host_for_each_port(i, port, host) \
1552 for ((i) = 0; ((port) = (host)->ports[i]) || (i) < MAX_HOST_PORTS; (i)++) 1578 for ((i) = 0; ((port) = (host)->ports[i]) || (i) < MAX_HOST_PORTS; (i)++)
1553 1579
1580
1554#endif /* _IDE_H */ 1581#endif /* _IDE_H */
diff --git a/include/linux/swap.h b/include/linux/swap.h
index cee108cbe2d5..38874729dc5f 100644
--- a/include/linux/swap.h
+++ b/include/linux/swap.h
@@ -377,7 +377,6 @@ extern void end_swap_bio_write(struct bio *bio, int err);
377extern int __swap_writepage(struct page *page, struct writeback_control *wbc, 377extern int __swap_writepage(struct page *page, struct writeback_control *wbc,
378 void (*end_write_func)(struct bio *, int)); 378 void (*end_write_func)(struct bio *, int));
379extern int swap_set_page_dirty(struct page *page); 379extern int swap_set_page_dirty(struct page *page);
380extern void end_swap_bio_read(struct bio *bio, int err);
381 380
382int add_swap_extent(struct swap_info_struct *sis, unsigned long start_page, 381int add_swap_extent(struct swap_info_struct *sis, unsigned long start_page,
383 unsigned long nr_pages, sector_t start_block); 382 unsigned long nr_pages, sector_t start_block);
diff --git a/include/uapi/linux/nbd.h b/include/uapi/linux/nbd.h
index 4f52549b23ff..e08e413d5f71 100644
--- a/include/uapi/linux/nbd.h
+++ b/include/uapi/linux/nbd.h
@@ -44,8 +44,6 @@ enum {
44/* there is a gap here to match userspace */ 44/* there is a gap here to match userspace */
45#define NBD_FLAG_SEND_TRIM (1 << 5) /* send trim/discard */ 45#define NBD_FLAG_SEND_TRIM (1 << 5) /* send trim/discard */
46 46
47#define nbd_cmd(req) ((req)->cmd[0])
48
49/* userspace doesn't need the nbd_device structure */ 47/* userspace doesn't need the nbd_device structure */
50 48
51/* These are sent over the network in the request/reply magic fields */ 49/* These are sent over the network in the request/reply magic fields */
diff --git a/kernel/power/Makefile b/kernel/power/Makefile
index 29472bff11ef..cb880a14cc39 100644
--- a/kernel/power/Makefile
+++ b/kernel/power/Makefile
@@ -7,8 +7,7 @@ obj-$(CONFIG_VT_CONSOLE_SLEEP) += console.o
7obj-$(CONFIG_FREEZER) += process.o 7obj-$(CONFIG_FREEZER) += process.o
8obj-$(CONFIG_SUSPEND) += suspend.o 8obj-$(CONFIG_SUSPEND) += suspend.o
9obj-$(CONFIG_PM_TEST_SUSPEND) += suspend_test.o 9obj-$(CONFIG_PM_TEST_SUSPEND) += suspend_test.o
10obj-$(CONFIG_HIBERNATION) += hibernate.o snapshot.o swap.o user.o \ 10obj-$(CONFIG_HIBERNATION) += hibernate.o snapshot.o swap.o user.o
11 block_io.o
12obj-$(CONFIG_PM_AUTOSLEEP) += autosleep.o 11obj-$(CONFIG_PM_AUTOSLEEP) += autosleep.o
13obj-$(CONFIG_PM_WAKELOCKS) += wakelock.o 12obj-$(CONFIG_PM_WAKELOCKS) += wakelock.o
14 13
diff --git a/kernel/power/block_io.c b/kernel/power/block_io.c
deleted file mode 100644
index 9a58bc258810..000000000000
--- a/kernel/power/block_io.c
+++ /dev/null
@@ -1,103 +0,0 @@
1/*
2 * This file provides functions for block I/O operations on swap/file.
3 *
4 * Copyright (C) 1998,2001-2005 Pavel Machek <pavel@ucw.cz>
5 * Copyright (C) 2006 Rafael J. Wysocki <rjw@sisk.pl>
6 *
7 * This file is released under the GPLv2.
8 */
9
10#include <linux/bio.h>
11#include <linux/kernel.h>
12#include <linux/pagemap.h>
13#include <linux/swap.h>
14
15#include "power.h"
16
17/**
18 * submit - submit BIO request.
19 * @rw: READ or WRITE.
20 * @off physical offset of page.
21 * @page: page we're reading or writing.
22 * @bio_chain: list of pending biod (for async reading)
23 *
24 * Straight from the textbook - allocate and initialize the bio.
25 * If we're reading, make sure the page is marked as dirty.
26 * Then submit it and, if @bio_chain == NULL, wait.
27 */
28static int submit(int rw, struct block_device *bdev, sector_t sector,
29 struct page *page, struct bio **bio_chain)
30{
31 const int bio_rw = rw | REQ_SYNC;
32 struct bio *bio;
33
34 bio = bio_alloc(__GFP_WAIT | __GFP_HIGH, 1);
35 bio->bi_iter.bi_sector = sector;
36 bio->bi_bdev = bdev;
37 bio->bi_end_io = end_swap_bio_read;
38
39 if (bio_add_page(bio, page, PAGE_SIZE, 0) < PAGE_SIZE) {
40 printk(KERN_ERR "PM: Adding page to bio failed at %llu\n",
41 (unsigned long long)sector);
42 bio_put(bio);
43 return -EFAULT;
44 }
45
46 lock_page(page);
47 bio_get(bio);
48
49 if (bio_chain == NULL) {
50 submit_bio(bio_rw, bio);
51 wait_on_page_locked(page);
52 if (rw == READ)
53 bio_set_pages_dirty(bio);
54 bio_put(bio);
55 } else {
56 if (rw == READ)
57 get_page(page); /* These pages are freed later */
58 bio->bi_private = *bio_chain;
59 *bio_chain = bio;
60 submit_bio(bio_rw, bio);
61 }
62 return 0;
63}
64
65int hib_bio_read_page(pgoff_t page_off, void *addr, struct bio **bio_chain)
66{
67 return submit(READ, hib_resume_bdev, page_off * (PAGE_SIZE >> 9),
68 virt_to_page(addr), bio_chain);
69}
70
71int hib_bio_write_page(pgoff_t page_off, void *addr, struct bio **bio_chain)
72{
73 return submit(WRITE, hib_resume_bdev, page_off * (PAGE_SIZE >> 9),
74 virt_to_page(addr), bio_chain);
75}
76
77int hib_wait_on_bio_chain(struct bio **bio_chain)
78{
79 struct bio *bio;
80 struct bio *next_bio;
81 int ret = 0;
82
83 if (bio_chain == NULL)
84 return 0;
85
86 bio = *bio_chain;
87 if (bio == NULL)
88 return 0;
89 while (bio) {
90 struct page *page;
91
92 next_bio = bio->bi_private;
93 page = bio->bi_io_vec[0].bv_page;
94 wait_on_page_locked(page);
95 if (!PageUptodate(page) || PageError(page))
96 ret = -EIO;
97 put_page(page);
98 bio_put(bio);
99 bio = next_bio;
100 }
101 *bio_chain = NULL;
102 return ret;
103}
diff --git a/kernel/power/power.h b/kernel/power/power.h
index ce9b8328a689..caadb566e82b 100644
--- a/kernel/power/power.h
+++ b/kernel/power/power.h
@@ -163,15 +163,6 @@ extern void swsusp_close(fmode_t);
163extern int swsusp_unmark(void); 163extern int swsusp_unmark(void);
164#endif 164#endif
165 165
166/* kernel/power/block_io.c */
167extern struct block_device *hib_resume_bdev;
168
169extern int hib_bio_read_page(pgoff_t page_off, void *addr,
170 struct bio **bio_chain);
171extern int hib_bio_write_page(pgoff_t page_off, void *addr,
172 struct bio **bio_chain);
173extern int hib_wait_on_bio_chain(struct bio **bio_chain);
174
175struct timeval; 166struct timeval;
176/* kernel/power/swsusp.c */ 167/* kernel/power/swsusp.c */
177extern void swsusp_show_speed(ktime_t, ktime_t, unsigned int, char *); 168extern void swsusp_show_speed(ktime_t, ktime_t, unsigned int, char *);
diff --git a/kernel/power/swap.c b/kernel/power/swap.c
index 570aff817543..2f30ca91e4fa 100644
--- a/kernel/power/swap.c
+++ b/kernel/power/swap.c
@@ -212,7 +212,84 @@ int swsusp_swap_in_use(void)
212 */ 212 */
213 213
214static unsigned short root_swap = 0xffff; 214static unsigned short root_swap = 0xffff;
215struct block_device *hib_resume_bdev; 215static struct block_device *hib_resume_bdev;
216
217struct hib_bio_batch {
218 atomic_t count;
219 wait_queue_head_t wait;
220 int error;
221};
222
223static void hib_init_batch(struct hib_bio_batch *hb)
224{
225 atomic_set(&hb->count, 0);
226 init_waitqueue_head(&hb->wait);
227 hb->error = 0;
228}
229
230static void hib_end_io(struct bio *bio, int error)
231{
232 struct hib_bio_batch *hb = bio->bi_private;
233 const int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags);
234 struct page *page = bio->bi_io_vec[0].bv_page;
235
236 if (!uptodate || error) {
237 printk(KERN_ALERT "Read-error on swap-device (%u:%u:%Lu)\n",
238 imajor(bio->bi_bdev->bd_inode),
239 iminor(bio->bi_bdev->bd_inode),
240 (unsigned long long)bio->bi_iter.bi_sector);
241
242 if (!error)
243 error = -EIO;
244 }
245
246 if (bio_data_dir(bio) == WRITE)
247 put_page(page);
248
249 if (error && !hb->error)
250 hb->error = error;
251 if (atomic_dec_and_test(&hb->count))
252 wake_up(&hb->wait);
253
254 bio_put(bio);
255}
256
257static int hib_submit_io(int rw, pgoff_t page_off, void *addr,
258 struct hib_bio_batch *hb)
259{
260 struct page *page = virt_to_page(addr);
261 struct bio *bio;
262 int error = 0;
263
264 bio = bio_alloc(__GFP_WAIT | __GFP_HIGH, 1);
265 bio->bi_iter.bi_sector = page_off * (PAGE_SIZE >> 9);
266 bio->bi_bdev = hib_resume_bdev;
267
268 if (bio_add_page(bio, page, PAGE_SIZE, 0) < PAGE_SIZE) {
269 printk(KERN_ERR "PM: Adding page to bio failed at %llu\n",
270 (unsigned long long)bio->bi_iter.bi_sector);
271 bio_put(bio);
272 return -EFAULT;
273 }
274
275 if (hb) {
276 bio->bi_end_io = hib_end_io;
277 bio->bi_private = hb;
278 atomic_inc(&hb->count);
279 submit_bio(rw, bio);
280 } else {
281 error = submit_bio_wait(rw, bio);
282 bio_put(bio);
283 }
284
285 return error;
286}
287
288static int hib_wait_io(struct hib_bio_batch *hb)
289{
290 wait_event(hb->wait, atomic_read(&hb->count) == 0);
291 return hb->error;
292}
216 293
217/* 294/*
218 * Saving part 295 * Saving part
@@ -222,7 +299,7 @@ static int mark_swapfiles(struct swap_map_handle *handle, unsigned int flags)
222{ 299{
223 int error; 300 int error;
224 301
225 hib_bio_read_page(swsusp_resume_block, swsusp_header, NULL); 302 hib_submit_io(READ_SYNC, swsusp_resume_block, swsusp_header, NULL);
226 if (!memcmp("SWAP-SPACE",swsusp_header->sig, 10) || 303 if (!memcmp("SWAP-SPACE",swsusp_header->sig, 10) ||
227 !memcmp("SWAPSPACE2",swsusp_header->sig, 10)) { 304 !memcmp("SWAPSPACE2",swsusp_header->sig, 10)) {
228 memcpy(swsusp_header->orig_sig,swsusp_header->sig, 10); 305 memcpy(swsusp_header->orig_sig,swsusp_header->sig, 10);
@@ -231,7 +308,7 @@ static int mark_swapfiles(struct swap_map_handle *handle, unsigned int flags)
231 swsusp_header->flags = flags; 308 swsusp_header->flags = flags;
232 if (flags & SF_CRC32_MODE) 309 if (flags & SF_CRC32_MODE)
233 swsusp_header->crc32 = handle->crc32; 310 swsusp_header->crc32 = handle->crc32;
234 error = hib_bio_write_page(swsusp_resume_block, 311 error = hib_submit_io(WRITE_SYNC, swsusp_resume_block,
235 swsusp_header, NULL); 312 swsusp_header, NULL);
236 } else { 313 } else {
237 printk(KERN_ERR "PM: Swap header not found!\n"); 314 printk(KERN_ERR "PM: Swap header not found!\n");
@@ -271,10 +348,10 @@ static int swsusp_swap_check(void)
271 * write_page - Write one page to given swap location. 348 * write_page - Write one page to given swap location.
272 * @buf: Address we're writing. 349 * @buf: Address we're writing.
273 * @offset: Offset of the swap page we're writing to. 350 * @offset: Offset of the swap page we're writing to.
274 * @bio_chain: Link the next write BIO here 351 * @hb: bio completion batch
275 */ 352 */
276 353
277static int write_page(void *buf, sector_t offset, struct bio **bio_chain) 354static int write_page(void *buf, sector_t offset, struct hib_bio_batch *hb)
278{ 355{
279 void *src; 356 void *src;
280 int ret; 357 int ret;
@@ -282,13 +359,13 @@ static int write_page(void *buf, sector_t offset, struct bio **bio_chain)
282 if (!offset) 359 if (!offset)
283 return -ENOSPC; 360 return -ENOSPC;
284 361
285 if (bio_chain) { 362 if (hb) {
286 src = (void *)__get_free_page(__GFP_WAIT | __GFP_NOWARN | 363 src = (void *)__get_free_page(__GFP_WAIT | __GFP_NOWARN |
287 __GFP_NORETRY); 364 __GFP_NORETRY);
288 if (src) { 365 if (src) {
289 copy_page(src, buf); 366 copy_page(src, buf);
290 } else { 367 } else {
291 ret = hib_wait_on_bio_chain(bio_chain); /* Free pages */ 368 ret = hib_wait_io(hb); /* Free pages */
292 if (ret) 369 if (ret)
293 return ret; 370 return ret;
294 src = (void *)__get_free_page(__GFP_WAIT | 371 src = (void *)__get_free_page(__GFP_WAIT |
@@ -298,14 +375,14 @@ static int write_page(void *buf, sector_t offset, struct bio **bio_chain)
298 copy_page(src, buf); 375 copy_page(src, buf);
299 } else { 376 } else {
300 WARN_ON_ONCE(1); 377 WARN_ON_ONCE(1);
301 bio_chain = NULL; /* Go synchronous */ 378 hb = NULL; /* Go synchronous */
302 src = buf; 379 src = buf;
303 } 380 }
304 } 381 }
305 } else { 382 } else {
306 src = buf; 383 src = buf;
307 } 384 }
308 return hib_bio_write_page(offset, src, bio_chain); 385 return hib_submit_io(WRITE_SYNC, offset, src, hb);
309} 386}
310 387
311static void release_swap_writer(struct swap_map_handle *handle) 388static void release_swap_writer(struct swap_map_handle *handle)
@@ -348,7 +425,7 @@ err_close:
348} 425}
349 426
350static int swap_write_page(struct swap_map_handle *handle, void *buf, 427static int swap_write_page(struct swap_map_handle *handle, void *buf,
351 struct bio **bio_chain) 428 struct hib_bio_batch *hb)
352{ 429{
353 int error = 0; 430 int error = 0;
354 sector_t offset; 431 sector_t offset;
@@ -356,7 +433,7 @@ static int swap_write_page(struct swap_map_handle *handle, void *buf,
356 if (!handle->cur) 433 if (!handle->cur)
357 return -EINVAL; 434 return -EINVAL;
358 offset = alloc_swapdev_block(root_swap); 435 offset = alloc_swapdev_block(root_swap);
359 error = write_page(buf, offset, bio_chain); 436 error = write_page(buf, offset, hb);
360 if (error) 437 if (error)
361 return error; 438 return error;
362 handle->cur->entries[handle->k++] = offset; 439 handle->cur->entries[handle->k++] = offset;
@@ -365,15 +442,15 @@ static int swap_write_page(struct swap_map_handle *handle, void *buf,
365 if (!offset) 442 if (!offset)
366 return -ENOSPC; 443 return -ENOSPC;
367 handle->cur->next_swap = offset; 444 handle->cur->next_swap = offset;
368 error = write_page(handle->cur, handle->cur_swap, bio_chain); 445 error = write_page(handle->cur, handle->cur_swap, hb);
369 if (error) 446 if (error)
370 goto out; 447 goto out;
371 clear_page(handle->cur); 448 clear_page(handle->cur);
372 handle->cur_swap = offset; 449 handle->cur_swap = offset;
373 handle->k = 0; 450 handle->k = 0;
374 451
375 if (bio_chain && low_free_pages() <= handle->reqd_free_pages) { 452 if (hb && low_free_pages() <= handle->reqd_free_pages) {
376 error = hib_wait_on_bio_chain(bio_chain); 453 error = hib_wait_io(hb);
377 if (error) 454 if (error)
378 goto out; 455 goto out;
379 /* 456 /*
@@ -445,23 +522,24 @@ static int save_image(struct swap_map_handle *handle,
445 int ret; 522 int ret;
446 int nr_pages; 523 int nr_pages;
447 int err2; 524 int err2;
448 struct bio *bio; 525 struct hib_bio_batch hb;
449 ktime_t start; 526 ktime_t start;
450 ktime_t stop; 527 ktime_t stop;
451 528
529 hib_init_batch(&hb);
530
452 printk(KERN_INFO "PM: Saving image data pages (%u pages)...\n", 531 printk(KERN_INFO "PM: Saving image data pages (%u pages)...\n",
453 nr_to_write); 532 nr_to_write);
454 m = nr_to_write / 10; 533 m = nr_to_write / 10;
455 if (!m) 534 if (!m)
456 m = 1; 535 m = 1;
457 nr_pages = 0; 536 nr_pages = 0;
458 bio = NULL;
459 start = ktime_get(); 537 start = ktime_get();
460 while (1) { 538 while (1) {
461 ret = snapshot_read_next(snapshot); 539 ret = snapshot_read_next(snapshot);
462 if (ret <= 0) 540 if (ret <= 0)
463 break; 541 break;
464 ret = swap_write_page(handle, data_of(*snapshot), &bio); 542 ret = swap_write_page(handle, data_of(*snapshot), &hb);
465 if (ret) 543 if (ret)
466 break; 544 break;
467 if (!(nr_pages % m)) 545 if (!(nr_pages % m))
@@ -469,7 +547,7 @@ static int save_image(struct swap_map_handle *handle,
469 nr_pages / m * 10); 547 nr_pages / m * 10);
470 nr_pages++; 548 nr_pages++;
471 } 549 }
472 err2 = hib_wait_on_bio_chain(&bio); 550 err2 = hib_wait_io(&hb);
473 stop = ktime_get(); 551 stop = ktime_get();
474 if (!ret) 552 if (!ret)
475 ret = err2; 553 ret = err2;
@@ -580,7 +658,7 @@ static int save_image_lzo(struct swap_map_handle *handle,
580 int ret = 0; 658 int ret = 0;
581 int nr_pages; 659 int nr_pages;
582 int err2; 660 int err2;
583 struct bio *bio; 661 struct hib_bio_batch hb;
584 ktime_t start; 662 ktime_t start;
585 ktime_t stop; 663 ktime_t stop;
586 size_t off; 664 size_t off;
@@ -589,6 +667,8 @@ static int save_image_lzo(struct swap_map_handle *handle,
589 struct cmp_data *data = NULL; 667 struct cmp_data *data = NULL;
590 struct crc_data *crc = NULL; 668 struct crc_data *crc = NULL;
591 669
670 hib_init_batch(&hb);
671
592 /* 672 /*
593 * We'll limit the number of threads for compression to limit memory 673 * We'll limit the number of threads for compression to limit memory
594 * footprint. 674 * footprint.
@@ -674,7 +754,6 @@ static int save_image_lzo(struct swap_map_handle *handle,
674 if (!m) 754 if (!m)
675 m = 1; 755 m = 1;
676 nr_pages = 0; 756 nr_pages = 0;
677 bio = NULL;
678 start = ktime_get(); 757 start = ktime_get();
679 for (;;) { 758 for (;;) {
680 for (thr = 0; thr < nr_threads; thr++) { 759 for (thr = 0; thr < nr_threads; thr++) {
@@ -748,7 +827,7 @@ static int save_image_lzo(struct swap_map_handle *handle,
748 off += PAGE_SIZE) { 827 off += PAGE_SIZE) {
749 memcpy(page, data[thr].cmp + off, PAGE_SIZE); 828 memcpy(page, data[thr].cmp + off, PAGE_SIZE);
750 829
751 ret = swap_write_page(handle, page, &bio); 830 ret = swap_write_page(handle, page, &hb);
752 if (ret) 831 if (ret)
753 goto out_finish; 832 goto out_finish;
754 } 833 }
@@ -759,7 +838,7 @@ static int save_image_lzo(struct swap_map_handle *handle,
759 } 838 }
760 839
761out_finish: 840out_finish:
762 err2 = hib_wait_on_bio_chain(&bio); 841 err2 = hib_wait_io(&hb);
763 stop = ktime_get(); 842 stop = ktime_get();
764 if (!ret) 843 if (!ret)
765 ret = err2; 844 ret = err2;
@@ -906,7 +985,7 @@ static int get_swap_reader(struct swap_map_handle *handle,
906 return -ENOMEM; 985 return -ENOMEM;
907 } 986 }
908 987
909 error = hib_bio_read_page(offset, tmp->map, NULL); 988 error = hib_submit_io(READ_SYNC, offset, tmp->map, NULL);
910 if (error) { 989 if (error) {
911 release_swap_reader(handle); 990 release_swap_reader(handle);
912 return error; 991 return error;
@@ -919,7 +998,7 @@ static int get_swap_reader(struct swap_map_handle *handle,
919} 998}
920 999
921static int swap_read_page(struct swap_map_handle *handle, void *buf, 1000static int swap_read_page(struct swap_map_handle *handle, void *buf,
922 struct bio **bio_chain) 1001 struct hib_bio_batch *hb)
923{ 1002{
924 sector_t offset; 1003 sector_t offset;
925 int error; 1004 int error;
@@ -930,7 +1009,7 @@ static int swap_read_page(struct swap_map_handle *handle, void *buf,
930 offset = handle->cur->entries[handle->k]; 1009 offset = handle->cur->entries[handle->k];
931 if (!offset) 1010 if (!offset)
932 return -EFAULT; 1011 return -EFAULT;
933 error = hib_bio_read_page(offset, buf, bio_chain); 1012 error = hib_submit_io(READ_SYNC, offset, buf, hb);
934 if (error) 1013 if (error)
935 return error; 1014 return error;
936 if (++handle->k >= MAP_PAGE_ENTRIES) { 1015 if (++handle->k >= MAP_PAGE_ENTRIES) {
@@ -968,27 +1047,28 @@ static int load_image(struct swap_map_handle *handle,
968 int ret = 0; 1047 int ret = 0;
969 ktime_t start; 1048 ktime_t start;
970 ktime_t stop; 1049 ktime_t stop;
971 struct bio *bio; 1050 struct hib_bio_batch hb;
972 int err2; 1051 int err2;
973 unsigned nr_pages; 1052 unsigned nr_pages;
974 1053
1054 hib_init_batch(&hb);
1055
975 printk(KERN_INFO "PM: Loading image data pages (%u pages)...\n", 1056 printk(KERN_INFO "PM: Loading image data pages (%u pages)...\n",
976 nr_to_read); 1057 nr_to_read);
977 m = nr_to_read / 10; 1058 m = nr_to_read / 10;
978 if (!m) 1059 if (!m)
979 m = 1; 1060 m = 1;
980 nr_pages = 0; 1061 nr_pages = 0;
981 bio = NULL;
982 start = ktime_get(); 1062 start = ktime_get();
983 for ( ; ; ) { 1063 for ( ; ; ) {
984 ret = snapshot_write_next(snapshot); 1064 ret = snapshot_write_next(snapshot);
985 if (ret <= 0) 1065 if (ret <= 0)
986 break; 1066 break;
987 ret = swap_read_page(handle, data_of(*snapshot), &bio); 1067 ret = swap_read_page(handle, data_of(*snapshot), &hb);
988 if (ret) 1068 if (ret)
989 break; 1069 break;
990 if (snapshot->sync_read) 1070 if (snapshot->sync_read)
991 ret = hib_wait_on_bio_chain(&bio); 1071 ret = hib_wait_io(&hb);
992 if (ret) 1072 if (ret)
993 break; 1073 break;
994 if (!(nr_pages % m)) 1074 if (!(nr_pages % m))
@@ -996,7 +1076,7 @@ static int load_image(struct swap_map_handle *handle,
996 nr_pages / m * 10); 1076 nr_pages / m * 10);
997 nr_pages++; 1077 nr_pages++;
998 } 1078 }
999 err2 = hib_wait_on_bio_chain(&bio); 1079 err2 = hib_wait_io(&hb);
1000 stop = ktime_get(); 1080 stop = ktime_get();
1001 if (!ret) 1081 if (!ret)
1002 ret = err2; 1082 ret = err2;
@@ -1067,7 +1147,7 @@ static int load_image_lzo(struct swap_map_handle *handle,
1067 unsigned int m; 1147 unsigned int m;
1068 int ret = 0; 1148 int ret = 0;
1069 int eof = 0; 1149 int eof = 0;
1070 struct bio *bio; 1150 struct hib_bio_batch hb;
1071 ktime_t start; 1151 ktime_t start;
1072 ktime_t stop; 1152 ktime_t stop;
1073 unsigned nr_pages; 1153 unsigned nr_pages;
@@ -1080,6 +1160,8 @@ static int load_image_lzo(struct swap_map_handle *handle,
1080 struct dec_data *data = NULL; 1160 struct dec_data *data = NULL;
1081 struct crc_data *crc = NULL; 1161 struct crc_data *crc = NULL;
1082 1162
1163 hib_init_batch(&hb);
1164
1083 /* 1165 /*
1084 * We'll limit the number of threads for decompression to limit memory 1166 * We'll limit the number of threads for decompression to limit memory
1085 * footprint. 1167 * footprint.
@@ -1190,7 +1272,6 @@ static int load_image_lzo(struct swap_map_handle *handle,
1190 if (!m) 1272 if (!m)
1191 m = 1; 1273 m = 1;
1192 nr_pages = 0; 1274 nr_pages = 0;
1193 bio = NULL;
1194 start = ktime_get(); 1275 start = ktime_get();
1195 1276
1196 ret = snapshot_write_next(snapshot); 1277 ret = snapshot_write_next(snapshot);
@@ -1199,7 +1280,7 @@ static int load_image_lzo(struct swap_map_handle *handle,
1199 1280
1200 for(;;) { 1281 for(;;) {
1201 for (i = 0; !eof && i < want; i++) { 1282 for (i = 0; !eof && i < want; i++) {
1202 ret = swap_read_page(handle, page[ring], &bio); 1283 ret = swap_read_page(handle, page[ring], &hb);
1203 if (ret) { 1284 if (ret) {
1204 /* 1285 /*
1205 * On real read error, finish. On end of data, 1286 * On real read error, finish. On end of data,
@@ -1226,7 +1307,7 @@ static int load_image_lzo(struct swap_map_handle *handle,
1226 if (!asked) 1307 if (!asked)
1227 break; 1308 break;
1228 1309
1229 ret = hib_wait_on_bio_chain(&bio); 1310 ret = hib_wait_io(&hb);
1230 if (ret) 1311 if (ret)
1231 goto out_finish; 1312 goto out_finish;
1232 have += asked; 1313 have += asked;
@@ -1281,7 +1362,7 @@ static int load_image_lzo(struct swap_map_handle *handle,
1281 * Wait for more data while we are decompressing. 1362 * Wait for more data while we are decompressing.
1282 */ 1363 */
1283 if (have < LZO_CMP_PAGES && asked) { 1364 if (have < LZO_CMP_PAGES && asked) {
1284 ret = hib_wait_on_bio_chain(&bio); 1365 ret = hib_wait_io(&hb);
1285 if (ret) 1366 if (ret)
1286 goto out_finish; 1367 goto out_finish;
1287 have += asked; 1368 have += asked;
@@ -1430,7 +1511,7 @@ int swsusp_check(void)
1430 if (!IS_ERR(hib_resume_bdev)) { 1511 if (!IS_ERR(hib_resume_bdev)) {
1431 set_blocksize(hib_resume_bdev, PAGE_SIZE); 1512 set_blocksize(hib_resume_bdev, PAGE_SIZE);
1432 clear_page(swsusp_header); 1513 clear_page(swsusp_header);
1433 error = hib_bio_read_page(swsusp_resume_block, 1514 error = hib_submit_io(READ_SYNC, swsusp_resume_block,
1434 swsusp_header, NULL); 1515 swsusp_header, NULL);
1435 if (error) 1516 if (error)
1436 goto put; 1517 goto put;
@@ -1438,7 +1519,7 @@ int swsusp_check(void)
1438 if (!memcmp(HIBERNATE_SIG, swsusp_header->sig, 10)) { 1519 if (!memcmp(HIBERNATE_SIG, swsusp_header->sig, 10)) {
1439 memcpy(swsusp_header->sig, swsusp_header->orig_sig, 10); 1520 memcpy(swsusp_header->sig, swsusp_header->orig_sig, 10);
1440 /* Reset swap signature now */ 1521 /* Reset swap signature now */
1441 error = hib_bio_write_page(swsusp_resume_block, 1522 error = hib_submit_io(WRITE_SYNC, swsusp_resume_block,
1442 swsusp_header, NULL); 1523 swsusp_header, NULL);
1443 } else { 1524 } else {
1444 error = -EINVAL; 1525 error = -EINVAL;
@@ -1482,10 +1563,10 @@ int swsusp_unmark(void)
1482{ 1563{
1483 int error; 1564 int error;
1484 1565
1485 hib_bio_read_page(swsusp_resume_block, swsusp_header, NULL); 1566 hib_submit_io(READ_SYNC, swsusp_resume_block, swsusp_header, NULL);
1486 if (!memcmp(HIBERNATE_SIG,swsusp_header->sig, 10)) { 1567 if (!memcmp(HIBERNATE_SIG,swsusp_header->sig, 10)) {
1487 memcpy(swsusp_header->sig,swsusp_header->orig_sig, 10); 1568 memcpy(swsusp_header->sig,swsusp_header->orig_sig, 10);
1488 error = hib_bio_write_page(swsusp_resume_block, 1569 error = hib_submit_io(WRITE_SYNC, swsusp_resume_block,
1489 swsusp_header, NULL); 1570 swsusp_header, NULL);
1490 } else { 1571 } else {
1491 printk(KERN_ERR "PM: Cannot find swsusp signature!\n"); 1572 printk(KERN_ERR "PM: Cannot find swsusp signature!\n");
diff --git a/mm/page_io.c b/mm/page_io.c
index 6424869e275e..520baa4b04d7 100644
--- a/mm/page_io.c
+++ b/mm/page_io.c
@@ -69,7 +69,7 @@ void end_swap_bio_write(struct bio *bio, int err)
69 bio_put(bio); 69 bio_put(bio);
70} 70}
71 71
72void end_swap_bio_read(struct bio *bio, int err) 72static void end_swap_bio_read(struct bio *bio, int err)
73{ 73{
74 const int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags); 74 const int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags);
75 struct page *page = bio->bi_io_vec[0].bv_page; 75 struct page *page = bio->bi_io_vec[0].bv_page;