aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/md
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/md')
-rw-r--r--drivers/md/bcache/Kconfig1
-rw-r--r--drivers/md/bcache/bcache.h2
-rw-r--r--drivers/md/bcache/stats.c34
-rw-r--r--drivers/md/bcache/super.c185
-rw-r--r--drivers/md/bcache/writeback.c2
-rw-r--r--drivers/md/md.c2
-rw-r--r--drivers/md/raid1.c38
-rw-r--r--drivers/md/raid10.c29
-rw-r--r--drivers/md/raid5.c6
9 files changed, 149 insertions, 150 deletions
diff --git a/drivers/md/bcache/Kconfig b/drivers/md/bcache/Kconfig
index 05c220d05e23..f950c9d29f3e 100644
--- a/drivers/md/bcache/Kconfig
+++ b/drivers/md/bcache/Kconfig
@@ -1,7 +1,6 @@
1 1
2config BCACHE 2config BCACHE
3 tristate "Block device as cache" 3 tristate "Block device as cache"
4 select CLOSURES
5 ---help--- 4 ---help---
6 Allows a block device to be used as cache for other devices; uses 5 Allows a block device to be used as cache for other devices; uses
7 a btree for indexing and the layout is optimized for SSDs. 6 a btree for indexing and the layout is optimized for SSDs.
diff --git a/drivers/md/bcache/bcache.h b/drivers/md/bcache/bcache.h
index 340146d7c17f..d3e15b42a4ab 100644
--- a/drivers/md/bcache/bcache.h
+++ b/drivers/md/bcache/bcache.h
@@ -1241,7 +1241,7 @@ void bch_cache_set_stop(struct cache_set *);
1241struct cache_set *bch_cache_set_alloc(struct cache_sb *); 1241struct cache_set *bch_cache_set_alloc(struct cache_sb *);
1242void bch_btree_cache_free(struct cache_set *); 1242void bch_btree_cache_free(struct cache_set *);
1243int bch_btree_cache_alloc(struct cache_set *); 1243int bch_btree_cache_alloc(struct cache_set *);
1244void bch_writeback_init_cached_dev(struct cached_dev *); 1244void bch_cached_dev_writeback_init(struct cached_dev *);
1245void bch_moving_init_cache_set(struct cache_set *); 1245void bch_moving_init_cache_set(struct cache_set *);
1246 1246
1247void bch_cache_allocator_exit(struct cache *ca); 1247void bch_cache_allocator_exit(struct cache *ca);
diff --git a/drivers/md/bcache/stats.c b/drivers/md/bcache/stats.c
index 64e679449c2a..b8730e714d69 100644
--- a/drivers/md/bcache/stats.c
+++ b/drivers/md/bcache/stats.c
@@ -93,24 +93,6 @@ static struct attribute *bch_stats_files[] = {
93}; 93};
94static KTYPE(bch_stats); 94static KTYPE(bch_stats);
95 95
96static void scale_accounting(unsigned long data);
97
98void bch_cache_accounting_init(struct cache_accounting *acc,
99 struct closure *parent)
100{
101 kobject_init(&acc->total.kobj, &bch_stats_ktype);
102 kobject_init(&acc->five_minute.kobj, &bch_stats_ktype);
103 kobject_init(&acc->hour.kobj, &bch_stats_ktype);
104 kobject_init(&acc->day.kobj, &bch_stats_ktype);
105
106 closure_init(&acc->cl, parent);
107 init_timer(&acc->timer);
108 acc->timer.expires = jiffies + accounting_delay;
109 acc->timer.data = (unsigned long) acc;
110 acc->timer.function = scale_accounting;
111 add_timer(&acc->timer);
112}
113
114int bch_cache_accounting_add_kobjs(struct cache_accounting *acc, 96int bch_cache_accounting_add_kobjs(struct cache_accounting *acc,
115 struct kobject *parent) 97 struct kobject *parent)
116{ 98{
@@ -244,3 +226,19 @@ void bch_mark_sectors_bypassed(struct search *s, int sectors)
244 atomic_add(sectors, &dc->accounting.collector.sectors_bypassed); 226 atomic_add(sectors, &dc->accounting.collector.sectors_bypassed);
245 atomic_add(sectors, &s->op.c->accounting.collector.sectors_bypassed); 227 atomic_add(sectors, &s->op.c->accounting.collector.sectors_bypassed);
246} 228}
229
230void bch_cache_accounting_init(struct cache_accounting *acc,
231 struct closure *parent)
232{
233 kobject_init(&acc->total.kobj, &bch_stats_ktype);
234 kobject_init(&acc->five_minute.kobj, &bch_stats_ktype);
235 kobject_init(&acc->hour.kobj, &bch_stats_ktype);
236 kobject_init(&acc->day.kobj, &bch_stats_ktype);
237
238 closure_init(&acc->cl, parent);
239 init_timer(&acc->timer);
240 acc->timer.expires = jiffies + accounting_delay;
241 acc->timer.data = (unsigned long) acc;
242 acc->timer.function = scale_accounting;
243 add_timer(&acc->timer);
244}
diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c
index c8046bc4aa57..f88e2b653a3f 100644
--- a/drivers/md/bcache/super.c
+++ b/drivers/md/bcache/super.c
@@ -634,11 +634,10 @@ static int open_dev(struct block_device *b, fmode_t mode)
634 return 0; 634 return 0;
635} 635}
636 636
637static int release_dev(struct gendisk *b, fmode_t mode) 637static void release_dev(struct gendisk *b, fmode_t mode)
638{ 638{
639 struct bcache_device *d = b->private_data; 639 struct bcache_device *d = b->private_data;
640 closure_put(&d->cl); 640 closure_put(&d->cl);
641 return 0;
642} 641}
643 642
644static int ioctl_dev(struct block_device *b, fmode_t mode, 643static int ioctl_dev(struct block_device *b, fmode_t mode,
@@ -732,8 +731,7 @@ static void bcache_device_free(struct bcache_device *d)
732 731
733 if (d->c) 732 if (d->c)
734 bcache_device_detach(d); 733 bcache_device_detach(d);
735 734 if (d->disk && d->disk->flags & GENHD_FL_UP)
736 if (d->disk)
737 del_gendisk(d->disk); 735 del_gendisk(d->disk);
738 if (d->disk && d->disk->queue) 736 if (d->disk && d->disk->queue)
739 blk_cleanup_queue(d->disk->queue); 737 blk_cleanup_queue(d->disk->queue);
@@ -756,12 +754,9 @@ static int bcache_device_init(struct bcache_device *d, unsigned block_size)
756 if (!(d->bio_split = bioset_create(4, offsetof(struct bbio, bio))) || 754 if (!(d->bio_split = bioset_create(4, offsetof(struct bbio, bio))) ||
757 !(d->unaligned_bvec = mempool_create_kmalloc_pool(1, 755 !(d->unaligned_bvec = mempool_create_kmalloc_pool(1,
758 sizeof(struct bio_vec) * BIO_MAX_PAGES)) || 756 sizeof(struct bio_vec) * BIO_MAX_PAGES)) ||
759 bio_split_pool_init(&d->bio_split_hook)) 757 bio_split_pool_init(&d->bio_split_hook) ||
760 758 !(d->disk = alloc_disk(1)) ||
761 return -ENOMEM; 759 !(q = blk_alloc_queue(GFP_KERNEL)))
762
763 d->disk = alloc_disk(1);
764 if (!d->disk)
765 return -ENOMEM; 760 return -ENOMEM;
766 761
767 snprintf(d->disk->disk_name, DISK_NAME_LEN, "bcache%i", bcache_minor); 762 snprintf(d->disk->disk_name, DISK_NAME_LEN, "bcache%i", bcache_minor);
@@ -771,10 +766,6 @@ static int bcache_device_init(struct bcache_device *d, unsigned block_size)
771 d->disk->fops = &bcache_ops; 766 d->disk->fops = &bcache_ops;
772 d->disk->private_data = d; 767 d->disk->private_data = d;
773 768
774 q = blk_alloc_queue(GFP_KERNEL);
775 if (!q)
776 return -ENOMEM;
777
778 blk_queue_make_request(q, NULL); 769 blk_queue_make_request(q, NULL);
779 d->disk->queue = q; 770 d->disk->queue = q;
780 q->queuedata = d; 771 q->queuedata = d;
@@ -999,14 +990,17 @@ static void cached_dev_free(struct closure *cl)
999 990
1000 mutex_lock(&bch_register_lock); 991 mutex_lock(&bch_register_lock);
1001 992
1002 bd_unlink_disk_holder(dc->bdev, dc->disk.disk); 993 if (atomic_read(&dc->running))
994 bd_unlink_disk_holder(dc->bdev, dc->disk.disk);
1003 bcache_device_free(&dc->disk); 995 bcache_device_free(&dc->disk);
1004 list_del(&dc->list); 996 list_del(&dc->list);
1005 997
1006 mutex_unlock(&bch_register_lock); 998 mutex_unlock(&bch_register_lock);
1007 999
1008 if (!IS_ERR_OR_NULL(dc->bdev)) { 1000 if (!IS_ERR_OR_NULL(dc->bdev)) {
1009 blk_sync_queue(bdev_get_queue(dc->bdev)); 1001 if (dc->bdev->bd_disk)
1002 blk_sync_queue(bdev_get_queue(dc->bdev));
1003
1010 blkdev_put(dc->bdev, FMODE_READ|FMODE_WRITE|FMODE_EXCL); 1004 blkdev_put(dc->bdev, FMODE_READ|FMODE_WRITE|FMODE_EXCL);
1011 } 1005 }
1012 1006
@@ -1028,73 +1022,67 @@ static void cached_dev_flush(struct closure *cl)
1028 1022
1029static int cached_dev_init(struct cached_dev *dc, unsigned block_size) 1023static int cached_dev_init(struct cached_dev *dc, unsigned block_size)
1030{ 1024{
1031 int err; 1025 int ret;
1032 struct io *io; 1026 struct io *io;
1033 1027 struct request_queue *q = bdev_get_queue(dc->bdev);
1034 closure_init(&dc->disk.cl, NULL);
1035 set_closure_fn(&dc->disk.cl, cached_dev_flush, system_wq);
1036 1028
1037 __module_get(THIS_MODULE); 1029 __module_get(THIS_MODULE);
1038 INIT_LIST_HEAD(&dc->list); 1030 INIT_LIST_HEAD(&dc->list);
1031 closure_init(&dc->disk.cl, NULL);
1032 set_closure_fn(&dc->disk.cl, cached_dev_flush, system_wq);
1039 kobject_init(&dc->disk.kobj, &bch_cached_dev_ktype); 1033 kobject_init(&dc->disk.kobj, &bch_cached_dev_ktype);
1040
1041 bch_cache_accounting_init(&dc->accounting, &dc->disk.cl);
1042
1043 err = bcache_device_init(&dc->disk, block_size);
1044 if (err)
1045 goto err;
1046
1047 spin_lock_init(&dc->io_lock);
1048 closure_init_unlocked(&dc->sb_write);
1049 INIT_WORK(&dc->detach, cached_dev_detach_finish); 1034 INIT_WORK(&dc->detach, cached_dev_detach_finish);
1035 closure_init_unlocked(&dc->sb_write);
1036 INIT_LIST_HEAD(&dc->io_lru);
1037 spin_lock_init(&dc->io_lock);
1038 bch_cache_accounting_init(&dc->accounting, &dc->disk.cl);
1050 1039
1051 dc->sequential_merge = true; 1040 dc->sequential_merge = true;
1052 dc->sequential_cutoff = 4 << 20; 1041 dc->sequential_cutoff = 4 << 20;
1053 1042
1054 INIT_LIST_HEAD(&dc->io_lru);
1055 dc->sb_bio.bi_max_vecs = 1;
1056 dc->sb_bio.bi_io_vec = dc->sb_bio.bi_inline_vecs;
1057
1058 for (io = dc->io; io < dc->io + RECENT_IO; io++) { 1043 for (io = dc->io; io < dc->io + RECENT_IO; io++) {
1059 list_add(&io->lru, &dc->io_lru); 1044 list_add(&io->lru, &dc->io_lru);
1060 hlist_add_head(&io->hash, dc->io_hash + RECENT_IO); 1045 hlist_add_head(&io->hash, dc->io_hash + RECENT_IO);
1061 } 1046 }
1062 1047
1063 bch_writeback_init_cached_dev(dc); 1048 ret = bcache_device_init(&dc->disk, block_size);
1049 if (ret)
1050 return ret;
1051
1052 set_capacity(dc->disk.disk,
1053 dc->bdev->bd_part->nr_sects - dc->sb.data_offset);
1054
1055 dc->disk.disk->queue->backing_dev_info.ra_pages =
1056 max(dc->disk.disk->queue->backing_dev_info.ra_pages,
1057 q->backing_dev_info.ra_pages);
1058
1059 bch_cached_dev_request_init(dc);
1060 bch_cached_dev_writeback_init(dc);
1064 return 0; 1061 return 0;
1065err:
1066 bcache_device_stop(&dc->disk);
1067 return err;
1068} 1062}
1069 1063
1070/* Cached device - bcache superblock */ 1064/* Cached device - bcache superblock */
1071 1065
1072static const char *register_bdev(struct cache_sb *sb, struct page *sb_page, 1066static void register_bdev(struct cache_sb *sb, struct page *sb_page,
1073 struct block_device *bdev, 1067 struct block_device *bdev,
1074 struct cached_dev *dc) 1068 struct cached_dev *dc)
1075{ 1069{
1076 char name[BDEVNAME_SIZE]; 1070 char name[BDEVNAME_SIZE];
1077 const char *err = "cannot allocate memory"; 1071 const char *err = "cannot allocate memory";
1078 struct gendisk *g;
1079 struct cache_set *c; 1072 struct cache_set *c;
1080 1073
1081 if (!dc || cached_dev_init(dc, sb->block_size << 9) != 0)
1082 return err;
1083
1084 memcpy(&dc->sb, sb, sizeof(struct cache_sb)); 1074 memcpy(&dc->sb, sb, sizeof(struct cache_sb));
1085 dc->sb_bio.bi_io_vec[0].bv_page = sb_page;
1086 dc->bdev = bdev; 1075 dc->bdev = bdev;
1087 dc->bdev->bd_holder = dc; 1076 dc->bdev->bd_holder = dc;
1088 1077
1089 g = dc->disk.disk; 1078 bio_init(&dc->sb_bio);
1090 1079 dc->sb_bio.bi_max_vecs = 1;
1091 set_capacity(g, dc->bdev->bd_part->nr_sects - dc->sb.data_offset); 1080 dc->sb_bio.bi_io_vec = dc->sb_bio.bi_inline_vecs;
1092 1081 dc->sb_bio.bi_io_vec[0].bv_page = sb_page;
1093 g->queue->backing_dev_info.ra_pages = 1082 get_page(sb_page);
1094 max(g->queue->backing_dev_info.ra_pages,
1095 bdev->bd_queue->backing_dev_info.ra_pages);
1096 1083
1097 bch_cached_dev_request_init(dc); 1084 if (cached_dev_init(dc, sb->block_size << 9))
1085 goto err;
1098 1086
1099 err = "error creating kobject"; 1087 err = "error creating kobject";
1100 if (kobject_add(&dc->disk.kobj, &part_to_dev(bdev->bd_part)->kobj, 1088 if (kobject_add(&dc->disk.kobj, &part_to_dev(bdev->bd_part)->kobj,
@@ -1103,6 +1091,8 @@ static const char *register_bdev(struct cache_sb *sb, struct page *sb_page,
1103 if (bch_cache_accounting_add_kobjs(&dc->accounting, &dc->disk.kobj)) 1091 if (bch_cache_accounting_add_kobjs(&dc->accounting, &dc->disk.kobj))
1104 goto err; 1092 goto err;
1105 1093
1094 pr_info("registered backing device %s", bdevname(bdev, name));
1095
1106 list_add(&dc->list, &uncached_devices); 1096 list_add(&dc->list, &uncached_devices);
1107 list_for_each_entry(c, &bch_cache_sets, list) 1097 list_for_each_entry(c, &bch_cache_sets, list)
1108 bch_cached_dev_attach(dc, c); 1098 bch_cached_dev_attach(dc, c);
@@ -1111,15 +1101,10 @@ static const char *register_bdev(struct cache_sb *sb, struct page *sb_page,
1111 BDEV_STATE(&dc->sb) == BDEV_STATE_STALE) 1101 BDEV_STATE(&dc->sb) == BDEV_STATE_STALE)
1112 bch_cached_dev_run(dc); 1102 bch_cached_dev_run(dc);
1113 1103
1114 return NULL; 1104 return;
1115err: 1105err:
1116 kobject_put(&dc->disk.kobj);
1117 pr_notice("error opening %s: %s", bdevname(bdev, name), err); 1106 pr_notice("error opening %s: %s", bdevname(bdev, name), err);
1118 /* 1107 bcache_device_stop(&dc->disk);
1119 * Return NULL instead of an error because kobject_put() cleans
1120 * everything up
1121 */
1122 return NULL;
1123} 1108}
1124 1109
1125/* Flash only volumes */ 1110/* Flash only volumes */
@@ -1717,20 +1702,11 @@ static int cache_alloc(struct cache_sb *sb, struct cache *ca)
1717 size_t free; 1702 size_t free;
1718 struct bucket *b; 1703 struct bucket *b;
1719 1704
1720 if (!ca)
1721 return -ENOMEM;
1722
1723 __module_get(THIS_MODULE); 1705 __module_get(THIS_MODULE);
1724 kobject_init(&ca->kobj, &bch_cache_ktype); 1706 kobject_init(&ca->kobj, &bch_cache_ktype);
1725 1707
1726 memcpy(&ca->sb, sb, sizeof(struct cache_sb));
1727
1728 INIT_LIST_HEAD(&ca->discards); 1708 INIT_LIST_HEAD(&ca->discards);
1729 1709
1730 bio_init(&ca->sb_bio);
1731 ca->sb_bio.bi_max_vecs = 1;
1732 ca->sb_bio.bi_io_vec = ca->sb_bio.bi_inline_vecs;
1733
1734 bio_init(&ca->journal.bio); 1710 bio_init(&ca->journal.bio);
1735 ca->journal.bio.bi_max_vecs = 8; 1711 ca->journal.bio.bi_max_vecs = 8;
1736 ca->journal.bio.bi_io_vec = ca->journal.bio.bi_inline_vecs; 1712 ca->journal.bio.bi_io_vec = ca->journal.bio.bi_inline_vecs;
@@ -1742,18 +1718,17 @@ static int cache_alloc(struct cache_sb *sb, struct cache *ca)
1742 !init_fifo(&ca->free_inc, free << 2, GFP_KERNEL) || 1718 !init_fifo(&ca->free_inc, free << 2, GFP_KERNEL) ||
1743 !init_fifo(&ca->unused, free << 2, GFP_KERNEL) || 1719 !init_fifo(&ca->unused, free << 2, GFP_KERNEL) ||
1744 !init_heap(&ca->heap, free << 3, GFP_KERNEL) || 1720 !init_heap(&ca->heap, free << 3, GFP_KERNEL) ||
1745 !(ca->buckets = vmalloc(sizeof(struct bucket) * 1721 !(ca->buckets = vzalloc(sizeof(struct bucket) *
1746 ca->sb.nbuckets)) || 1722 ca->sb.nbuckets)) ||
1747 !(ca->prio_buckets = kzalloc(sizeof(uint64_t) * prio_buckets(ca) * 1723 !(ca->prio_buckets = kzalloc(sizeof(uint64_t) * prio_buckets(ca) *
1748 2, GFP_KERNEL)) || 1724 2, GFP_KERNEL)) ||
1749 !(ca->disk_buckets = alloc_bucket_pages(GFP_KERNEL, ca)) || 1725 !(ca->disk_buckets = alloc_bucket_pages(GFP_KERNEL, ca)) ||
1750 !(ca->alloc_workqueue = alloc_workqueue("bch_allocator", 0, 1)) || 1726 !(ca->alloc_workqueue = alloc_workqueue("bch_allocator", 0, 1)) ||
1751 bio_split_pool_init(&ca->bio_split_hook)) 1727 bio_split_pool_init(&ca->bio_split_hook))
1752 goto err; 1728 return -ENOMEM;
1753 1729
1754 ca->prio_last_buckets = ca->prio_buckets + prio_buckets(ca); 1730 ca->prio_last_buckets = ca->prio_buckets + prio_buckets(ca);
1755 1731
1756 memset(ca->buckets, 0, ca->sb.nbuckets * sizeof(struct bucket));
1757 for_each_bucket(b, ca) 1732 for_each_bucket(b, ca)
1758 atomic_set(&b->pin, 0); 1733 atomic_set(&b->pin, 0);
1759 1734
@@ -1766,22 +1741,28 @@ err:
1766 return -ENOMEM; 1741 return -ENOMEM;
1767} 1742}
1768 1743
1769static const char *register_cache(struct cache_sb *sb, struct page *sb_page, 1744static void register_cache(struct cache_sb *sb, struct page *sb_page,
1770 struct block_device *bdev, struct cache *ca) 1745 struct block_device *bdev, struct cache *ca)
1771{ 1746{
1772 char name[BDEVNAME_SIZE]; 1747 char name[BDEVNAME_SIZE];
1773 const char *err = "cannot allocate memory"; 1748 const char *err = "cannot allocate memory";
1774 1749
1775 if (cache_alloc(sb, ca) != 0) 1750 memcpy(&ca->sb, sb, sizeof(struct cache_sb));
1776 return err;
1777
1778 ca->sb_bio.bi_io_vec[0].bv_page = sb_page;
1779 ca->bdev = bdev; 1751 ca->bdev = bdev;
1780 ca->bdev->bd_holder = ca; 1752 ca->bdev->bd_holder = ca;
1781 1753
1754 bio_init(&ca->sb_bio);
1755 ca->sb_bio.bi_max_vecs = 1;
1756 ca->sb_bio.bi_io_vec = ca->sb_bio.bi_inline_vecs;
1757 ca->sb_bio.bi_io_vec[0].bv_page = sb_page;
1758 get_page(sb_page);
1759
1782 if (blk_queue_discard(bdev_get_queue(ca->bdev))) 1760 if (blk_queue_discard(bdev_get_queue(ca->bdev)))
1783 ca->discard = CACHE_DISCARD(&ca->sb); 1761 ca->discard = CACHE_DISCARD(&ca->sb);
1784 1762
1763 if (cache_alloc(sb, ca) != 0)
1764 goto err;
1765
1785 err = "error creating kobject"; 1766 err = "error creating kobject";
1786 if (kobject_add(&ca->kobj, &part_to_dev(bdev->bd_part)->kobj, "bcache")) 1767 if (kobject_add(&ca->kobj, &part_to_dev(bdev->bd_part)->kobj, "bcache"))
1787 goto err; 1768 goto err;
@@ -1791,15 +1772,10 @@ static const char *register_cache(struct cache_sb *sb, struct page *sb_page,
1791 goto err; 1772 goto err;
1792 1773
1793 pr_info("registered cache device %s", bdevname(bdev, name)); 1774 pr_info("registered cache device %s", bdevname(bdev, name));
1794 1775 return;
1795 return NULL;
1796err: 1776err:
1777 pr_notice("error opening %s: %s", bdevname(bdev, name), err);
1797 kobject_put(&ca->kobj); 1778 kobject_put(&ca->kobj);
1798 pr_info("error opening %s: %s", bdevname(bdev, name), err);
1799 /* Return NULL instead of an error because kobject_put() cleans
1800 * everything up
1801 */
1802 return NULL;
1803} 1779}
1804 1780
1805/* Global interfaces/init */ 1781/* Global interfaces/init */
@@ -1833,12 +1809,15 @@ static ssize_t register_bcache(struct kobject *k, struct kobj_attribute *attr,
1833 bdev = blkdev_get_by_path(strim(path), 1809 bdev = blkdev_get_by_path(strim(path),
1834 FMODE_READ|FMODE_WRITE|FMODE_EXCL, 1810 FMODE_READ|FMODE_WRITE|FMODE_EXCL,
1835 sb); 1811 sb);
1836 if (bdev == ERR_PTR(-EBUSY)) 1812 if (IS_ERR(bdev)) {
1837 err = "device busy"; 1813 if (bdev == ERR_PTR(-EBUSY))
1838 1814 err = "device busy";
1839 if (IS_ERR(bdev) ||
1840 set_blocksize(bdev, 4096))
1841 goto err; 1815 goto err;
1816 }
1817
1818 err = "failed to set blocksize";
1819 if (set_blocksize(bdev, 4096))
1820 goto err_close;
1842 1821
1843 err = read_super(sb, bdev, &sb_page); 1822 err = read_super(sb, bdev, &sb_page);
1844 if (err) 1823 if (err)
@@ -1846,33 +1825,33 @@ static ssize_t register_bcache(struct kobject *k, struct kobj_attribute *attr,
1846 1825
1847 if (SB_IS_BDEV(sb)) { 1826 if (SB_IS_BDEV(sb)) {
1848 struct cached_dev *dc = kzalloc(sizeof(*dc), GFP_KERNEL); 1827 struct cached_dev *dc = kzalloc(sizeof(*dc), GFP_KERNEL);
1828 if (!dc)
1829 goto err_close;
1849 1830
1850 err = register_bdev(sb, sb_page, bdev, dc); 1831 register_bdev(sb, sb_page, bdev, dc);
1851 } else { 1832 } else {
1852 struct cache *ca = kzalloc(sizeof(*ca), GFP_KERNEL); 1833 struct cache *ca = kzalloc(sizeof(*ca), GFP_KERNEL);
1834 if (!ca)
1835 goto err_close;
1853 1836
1854 err = register_cache(sb, sb_page, bdev, ca); 1837 register_cache(sb, sb_page, bdev, ca);
1855 } 1838 }
1856 1839out:
1857 if (err) { 1840 if (sb_page)
1858 /* register_(bdev|cache) will only return an error if they
1859 * didn't get far enough to create the kobject - if they did,
1860 * the kobject destructor will do this cleanup.
1861 */
1862 put_page(sb_page); 1841 put_page(sb_page);
1863err_close:
1864 blkdev_put(bdev, FMODE_READ|FMODE_WRITE|FMODE_EXCL);
1865err:
1866 if (attr != &ksysfs_register_quiet)
1867 pr_info("error opening %s: %s", path, err);
1868 ret = -EINVAL;
1869 }
1870
1871 kfree(sb); 1842 kfree(sb);
1872 kfree(path); 1843 kfree(path);
1873 mutex_unlock(&bch_register_lock); 1844 mutex_unlock(&bch_register_lock);
1874 module_put(THIS_MODULE); 1845 module_put(THIS_MODULE);
1875 return ret; 1846 return ret;
1847
1848err_close:
1849 blkdev_put(bdev, FMODE_READ|FMODE_WRITE|FMODE_EXCL);
1850err:
1851 if (attr != &ksysfs_register_quiet)
1852 pr_info("error opening %s: %s", path, err);
1853 ret = -EINVAL;
1854 goto out;
1876} 1855}
1877 1856
1878static int bcache_reboot(struct notifier_block *n, unsigned long code, void *x) 1857static int bcache_reboot(struct notifier_block *n, unsigned long code, void *x)
diff --git a/drivers/md/bcache/writeback.c b/drivers/md/bcache/writeback.c
index 93e7e31a4bd3..2714ed3991d1 100644
--- a/drivers/md/bcache/writeback.c
+++ b/drivers/md/bcache/writeback.c
@@ -375,7 +375,7 @@ err:
375 refill_dirty(cl); 375 refill_dirty(cl);
376} 376}
377 377
378void bch_writeback_init_cached_dev(struct cached_dev *dc) 378void bch_cached_dev_writeback_init(struct cached_dev *dc)
379{ 379{
380 closure_init_unlocked(&dc->writeback); 380 closure_init_unlocked(&dc->writeback);
381 init_rwsem(&dc->writeback_lock); 381 init_rwsem(&dc->writeback_lock);
diff --git a/drivers/md/md.c b/drivers/md/md.c
index 681d1099a2d5..9b82377a833b 100644
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -5268,8 +5268,8 @@ static void md_clean(struct mddev *mddev)
5268 5268
5269static void __md_stop_writes(struct mddev *mddev) 5269static void __md_stop_writes(struct mddev *mddev)
5270{ 5270{
5271 set_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
5271 if (mddev->sync_thread) { 5272 if (mddev->sync_thread) {
5272 set_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
5273 set_bit(MD_RECOVERY_INTR, &mddev->recovery); 5273 set_bit(MD_RECOVERY_INTR, &mddev->recovery);
5274 md_reap_sync_thread(mddev); 5274 md_reap_sync_thread(mddev);
5275 } 5275 }
diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c
index 55951182af73..6e17f8181c4b 100644
--- a/drivers/md/raid1.c
+++ b/drivers/md/raid1.c
@@ -417,7 +417,17 @@ static void raid1_end_write_request(struct bio *bio, int error)
417 417
418 r1_bio->bios[mirror] = NULL; 418 r1_bio->bios[mirror] = NULL;
419 to_put = bio; 419 to_put = bio;
420 set_bit(R1BIO_Uptodate, &r1_bio->state); 420 /*
421 * Do not set R1BIO_Uptodate if the current device is
422 * rebuilding or Faulty. This is because we cannot use
423 * such device for properly reading the data back (we could
424 * potentially use it, if the current write would have felt
425 * before rdev->recovery_offset, but for simplicity we don't
426 * check this here.
427 */
428 if (test_bit(In_sync, &conf->mirrors[mirror].rdev->flags) &&
429 !test_bit(Faulty, &conf->mirrors[mirror].rdev->flags))
430 set_bit(R1BIO_Uptodate, &r1_bio->state);
421 431
422 /* Maybe we can clear some bad blocks. */ 432 /* Maybe we can clear some bad blocks. */
423 if (is_badblock(conf->mirrors[mirror].rdev, 433 if (is_badblock(conf->mirrors[mirror].rdev,
@@ -870,17 +880,17 @@ static void allow_barrier(struct r1conf *conf)
870 wake_up(&conf->wait_barrier); 880 wake_up(&conf->wait_barrier);
871} 881}
872 882
873static void freeze_array(struct r1conf *conf) 883static void freeze_array(struct r1conf *conf, int extra)
874{ 884{
875 /* stop syncio and normal IO and wait for everything to 885 /* stop syncio and normal IO and wait for everything to
876 * go quite. 886 * go quite.
877 * We increment barrier and nr_waiting, and then 887 * We increment barrier and nr_waiting, and then
878 * wait until nr_pending match nr_queued+1 888 * wait until nr_pending match nr_queued+extra
879 * This is called in the context of one normal IO request 889 * This is called in the context of one normal IO request
880 * that has failed. Thus any sync request that might be pending 890 * that has failed. Thus any sync request that might be pending
881 * will be blocked by nr_pending, and we need to wait for 891 * will be blocked by nr_pending, and we need to wait for
882 * pending IO requests to complete or be queued for re-try. 892 * pending IO requests to complete or be queued for re-try.
883 * Thus the number queued (nr_queued) plus this request (1) 893 * Thus the number queued (nr_queued) plus this request (extra)
884 * must match the number of pending IOs (nr_pending) before 894 * must match the number of pending IOs (nr_pending) before
885 * we continue. 895 * we continue.
886 */ 896 */
@@ -888,7 +898,7 @@ static void freeze_array(struct r1conf *conf)
888 conf->barrier++; 898 conf->barrier++;
889 conf->nr_waiting++; 899 conf->nr_waiting++;
890 wait_event_lock_irq_cmd(conf->wait_barrier, 900 wait_event_lock_irq_cmd(conf->wait_barrier,
891 conf->nr_pending == conf->nr_queued+1, 901 conf->nr_pending == conf->nr_queued+extra,
892 conf->resync_lock, 902 conf->resync_lock,
893 flush_pending_writes(conf)); 903 flush_pending_writes(conf));
894 spin_unlock_irq(&conf->resync_lock); 904 spin_unlock_irq(&conf->resync_lock);
@@ -1544,8 +1554,8 @@ static int raid1_add_disk(struct mddev *mddev, struct md_rdev *rdev)
1544 * we wait for all outstanding requests to complete. 1554 * we wait for all outstanding requests to complete.
1545 */ 1555 */
1546 synchronize_sched(); 1556 synchronize_sched();
1547 raise_barrier(conf); 1557 freeze_array(conf, 0);
1548 lower_barrier(conf); 1558 unfreeze_array(conf);
1549 clear_bit(Unmerged, &rdev->flags); 1559 clear_bit(Unmerged, &rdev->flags);
1550 } 1560 }
1551 md_integrity_add_rdev(rdev, mddev); 1561 md_integrity_add_rdev(rdev, mddev);
@@ -1595,11 +1605,11 @@ static int raid1_remove_disk(struct mddev *mddev, struct md_rdev *rdev)
1595 */ 1605 */
1596 struct md_rdev *repl = 1606 struct md_rdev *repl =
1597 conf->mirrors[conf->raid_disks + number].rdev; 1607 conf->mirrors[conf->raid_disks + number].rdev;
1598 raise_barrier(conf); 1608 freeze_array(conf, 0);
1599 clear_bit(Replacement, &repl->flags); 1609 clear_bit(Replacement, &repl->flags);
1600 p->rdev = repl; 1610 p->rdev = repl;
1601 conf->mirrors[conf->raid_disks + number].rdev = NULL; 1611 conf->mirrors[conf->raid_disks + number].rdev = NULL;
1602 lower_barrier(conf); 1612 unfreeze_array(conf);
1603 clear_bit(WantReplacement, &rdev->flags); 1613 clear_bit(WantReplacement, &rdev->flags);
1604 } else 1614 } else
1605 clear_bit(WantReplacement, &rdev->flags); 1615 clear_bit(WantReplacement, &rdev->flags);
@@ -2195,7 +2205,7 @@ static void handle_read_error(struct r1conf *conf, struct r1bio *r1_bio)
2195 * frozen 2205 * frozen
2196 */ 2206 */
2197 if (mddev->ro == 0) { 2207 if (mddev->ro == 0) {
2198 freeze_array(conf); 2208 freeze_array(conf, 1);
2199 fix_read_error(conf, r1_bio->read_disk, 2209 fix_read_error(conf, r1_bio->read_disk,
2200 r1_bio->sector, r1_bio->sectors); 2210 r1_bio->sector, r1_bio->sectors);
2201 unfreeze_array(conf); 2211 unfreeze_array(conf);
@@ -2780,8 +2790,8 @@ static int run(struct mddev *mddev)
2780 return PTR_ERR(conf); 2790 return PTR_ERR(conf);
2781 2791
2782 if (mddev->queue) 2792 if (mddev->queue)
2783 blk_queue_max_write_same_sectors(mddev->queue, 2793 blk_queue_max_write_same_sectors(mddev->queue, 0);
2784 mddev->chunk_sectors); 2794
2785 rdev_for_each(rdev, mddev) { 2795 rdev_for_each(rdev, mddev) {
2786 if (!mddev->gendisk) 2796 if (!mddev->gendisk)
2787 continue; 2797 continue;
@@ -2963,7 +2973,7 @@ static int raid1_reshape(struct mddev *mddev)
2963 return -ENOMEM; 2973 return -ENOMEM;
2964 } 2974 }
2965 2975
2966 raise_barrier(conf); 2976 freeze_array(conf, 0);
2967 2977
2968 /* ok, everything is stopped */ 2978 /* ok, everything is stopped */
2969 oldpool = conf->r1bio_pool; 2979 oldpool = conf->r1bio_pool;
@@ -2994,7 +3004,7 @@ static int raid1_reshape(struct mddev *mddev)
2994 conf->raid_disks = mddev->raid_disks = raid_disks; 3004 conf->raid_disks = mddev->raid_disks = raid_disks;
2995 mddev->delta_disks = 0; 3005 mddev->delta_disks = 0;
2996 3006
2997 lower_barrier(conf); 3007 unfreeze_array(conf);
2998 3008
2999 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery); 3009 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
3000 md_wakeup_thread(mddev->thread); 3010 md_wakeup_thread(mddev->thread);
diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c
index 59d4daa5f4c7..6ddae2501b9a 100644
--- a/drivers/md/raid10.c
+++ b/drivers/md/raid10.c
@@ -490,7 +490,17 @@ static void raid10_end_write_request(struct bio *bio, int error)
490 sector_t first_bad; 490 sector_t first_bad;
491 int bad_sectors; 491 int bad_sectors;
492 492
493 set_bit(R10BIO_Uptodate, &r10_bio->state); 493 /*
494 * Do not set R10BIO_Uptodate if the current device is
495 * rebuilding or Faulty. This is because we cannot use
496 * such device for properly reading the data back (we could
497 * potentially use it, if the current write would have felt
498 * before rdev->recovery_offset, but for simplicity we don't
499 * check this here.
500 */
501 if (test_bit(In_sync, &rdev->flags) &&
502 !test_bit(Faulty, &rdev->flags))
503 set_bit(R10BIO_Uptodate, &r10_bio->state);
494 504
495 /* Maybe we can clear some bad blocks. */ 505 /* Maybe we can clear some bad blocks. */
496 if (is_badblock(rdev, 506 if (is_badblock(rdev,
@@ -1055,17 +1065,17 @@ static void allow_barrier(struct r10conf *conf)
1055 wake_up(&conf->wait_barrier); 1065 wake_up(&conf->wait_barrier);
1056} 1066}
1057 1067
1058static void freeze_array(struct r10conf *conf) 1068static void freeze_array(struct r10conf *conf, int extra)
1059{ 1069{
1060 /* stop syncio and normal IO and wait for everything to 1070 /* stop syncio and normal IO and wait for everything to
1061 * go quiet. 1071 * go quiet.
1062 * We increment barrier and nr_waiting, and then 1072 * We increment barrier and nr_waiting, and then
1063 * wait until nr_pending match nr_queued+1 1073 * wait until nr_pending match nr_queued+extra
1064 * This is called in the context of one normal IO request 1074 * This is called in the context of one normal IO request
1065 * that has failed. Thus any sync request that might be pending 1075 * that has failed. Thus any sync request that might be pending
1066 * will be blocked by nr_pending, and we need to wait for 1076 * will be blocked by nr_pending, and we need to wait for
1067 * pending IO requests to complete or be queued for re-try. 1077 * pending IO requests to complete or be queued for re-try.
1068 * Thus the number queued (nr_queued) plus this request (1) 1078 * Thus the number queued (nr_queued) plus this request (extra)
1069 * must match the number of pending IOs (nr_pending) before 1079 * must match the number of pending IOs (nr_pending) before
1070 * we continue. 1080 * we continue.
1071 */ 1081 */
@@ -1073,7 +1083,7 @@ static void freeze_array(struct r10conf *conf)
1073 conf->barrier++; 1083 conf->barrier++;
1074 conf->nr_waiting++; 1084 conf->nr_waiting++;
1075 wait_event_lock_irq_cmd(conf->wait_barrier, 1085 wait_event_lock_irq_cmd(conf->wait_barrier,
1076 conf->nr_pending == conf->nr_queued+1, 1086 conf->nr_pending == conf->nr_queued+extra,
1077 conf->resync_lock, 1087 conf->resync_lock,
1078 flush_pending_writes(conf)); 1088 flush_pending_writes(conf));
1079 1089
@@ -1837,8 +1847,8 @@ static int raid10_add_disk(struct mddev *mddev, struct md_rdev *rdev)
1837 * we wait for all outstanding requests to complete. 1847 * we wait for all outstanding requests to complete.
1838 */ 1848 */
1839 synchronize_sched(); 1849 synchronize_sched();
1840 raise_barrier(conf, 0); 1850 freeze_array(conf, 0);
1841 lower_barrier(conf); 1851 unfreeze_array(conf);
1842 clear_bit(Unmerged, &rdev->flags); 1852 clear_bit(Unmerged, &rdev->flags);
1843 } 1853 }
1844 md_integrity_add_rdev(rdev, mddev); 1854 md_integrity_add_rdev(rdev, mddev);
@@ -2612,7 +2622,7 @@ static void handle_read_error(struct mddev *mddev, struct r10bio *r10_bio)
2612 r10_bio->devs[slot].bio = NULL; 2622 r10_bio->devs[slot].bio = NULL;
2613 2623
2614 if (mddev->ro == 0) { 2624 if (mddev->ro == 0) {
2615 freeze_array(conf); 2625 freeze_array(conf, 1);
2616 fix_read_error(conf, mddev, r10_bio); 2626 fix_read_error(conf, mddev, r10_bio);
2617 unfreeze_array(conf); 2627 unfreeze_array(conf);
2618 } else 2628 } else
@@ -3609,8 +3619,7 @@ static int run(struct mddev *mddev)
3609 if (mddev->queue) { 3619 if (mddev->queue) {
3610 blk_queue_max_discard_sectors(mddev->queue, 3620 blk_queue_max_discard_sectors(mddev->queue,
3611 mddev->chunk_sectors); 3621 mddev->chunk_sectors);
3612 blk_queue_max_write_same_sectors(mddev->queue, 3622 blk_queue_max_write_same_sectors(mddev->queue, 0);
3613 mddev->chunk_sectors);
3614 blk_queue_io_min(mddev->queue, chunk_size); 3623 blk_queue_io_min(mddev->queue, chunk_size);
3615 if (conf->geo.raid_disks % conf->geo.near_copies) 3624 if (conf->geo.raid_disks % conf->geo.near_copies)
3616 blk_queue_io_opt(mddev->queue, chunk_size * conf->geo.raid_disks); 3625 blk_queue_io_opt(mddev->queue, chunk_size * conf->geo.raid_disks);
diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c
index 9359828ffe26..05e4a105b9c7 100644
--- a/drivers/md/raid5.c
+++ b/drivers/md/raid5.c
@@ -664,6 +664,7 @@ static void ops_run_io(struct stripe_head *sh, struct stripe_head_state *s)
664 if (test_bit(R5_ReadNoMerge, &sh->dev[i].flags)) 664 if (test_bit(R5_ReadNoMerge, &sh->dev[i].flags))
665 bi->bi_rw |= REQ_FLUSH; 665 bi->bi_rw |= REQ_FLUSH;
666 666
667 bi->bi_vcnt = 1;
667 bi->bi_io_vec[0].bv_len = STRIPE_SIZE; 668 bi->bi_io_vec[0].bv_len = STRIPE_SIZE;
668 bi->bi_io_vec[0].bv_offset = 0; 669 bi->bi_io_vec[0].bv_offset = 0;
669 bi->bi_size = STRIPE_SIZE; 670 bi->bi_size = STRIPE_SIZE;
@@ -701,6 +702,7 @@ static void ops_run_io(struct stripe_head *sh, struct stripe_head_state *s)
701 else 702 else
702 rbi->bi_sector = (sh->sector 703 rbi->bi_sector = (sh->sector
703 + rrdev->data_offset); 704 + rrdev->data_offset);
705 rbi->bi_vcnt = 1;
704 rbi->bi_io_vec[0].bv_len = STRIPE_SIZE; 706 rbi->bi_io_vec[0].bv_len = STRIPE_SIZE;
705 rbi->bi_io_vec[0].bv_offset = 0; 707 rbi->bi_io_vec[0].bv_offset = 0;
706 rbi->bi_size = STRIPE_SIZE; 708 rbi->bi_size = STRIPE_SIZE;
@@ -5464,7 +5466,7 @@ static int run(struct mddev *mddev)
5464 if (mddev->major_version == 0 && 5466 if (mddev->major_version == 0 &&
5465 mddev->minor_version > 90) 5467 mddev->minor_version > 90)
5466 rdev->recovery_offset = reshape_offset; 5468 rdev->recovery_offset = reshape_offset;
5467 5469
5468 if (rdev->recovery_offset < reshape_offset) { 5470 if (rdev->recovery_offset < reshape_offset) {
5469 /* We need to check old and new layout */ 5471 /* We need to check old and new layout */
5470 if (!only_parity(rdev->raid_disk, 5472 if (!only_parity(rdev->raid_disk,
@@ -5587,6 +5589,8 @@ static int run(struct mddev *mddev)
5587 */ 5589 */
5588 mddev->queue->limits.discard_zeroes_data = 0; 5590 mddev->queue->limits.discard_zeroes_data = 0;
5589 5591
5592 blk_queue_max_write_same_sectors(mddev->queue, 0);
5593
5590 rdev_for_each(rdev, mddev) { 5594 rdev_for_each(rdev, mddev) {
5591 disk_stack_limits(mddev->gendisk, rdev->bdev, 5595 disk_stack_limits(mddev->gendisk, rdev->bdev,
5592 rdev->data_offset << 9); 5596 rdev->data_offset << 9);