aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/block
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/block')
-rw-r--r--drivers/block/Kconfig5
-rw-r--r--drivers/block/Makefile2
-rw-r--r--drivers/block/aoe/aoe.h10
-rw-r--r--drivers/block/aoe/aoecmd.c153
-rw-r--r--drivers/block/brd.c16
-rw-r--r--drivers/block/cciss.c4
-rw-r--r--drivers/block/drbd/drbd_actlog.c2
-rw-r--r--drivers/block/drbd/drbd_bitmap.c2
-rw-r--r--drivers/block/drbd/drbd_main.c27
-rw-r--r--drivers/block/drbd/drbd_receiver.c19
-rw-r--r--drivers/block/drbd/drbd_req.c6
-rw-r--r--drivers/block/drbd/drbd_req.h2
-rw-r--r--drivers/block/drbd/drbd_worker.c8
-rw-r--r--drivers/block/floppy.c52
-rw-r--r--drivers/block/loop.c29
-rw-r--r--drivers/block/mg_disk.c2
-rw-r--r--drivers/block/mtip32xx/mtip32xx.c270
-rw-r--r--drivers/block/mtip32xx/mtip32xx.h14
-rw-r--r--drivers/block/nbd.c14
-rw-r--r--drivers/block/null_blk.c15
-rw-r--r--drivers/block/nvme-core.c142
-rw-r--r--drivers/block/paride/pg.c2
-rw-r--r--drivers/block/pktcdvd.c186
-rw-r--r--drivers/block/ps3disk.c17
-rw-r--r--drivers/block/ps3vram.c12
-rw-r--r--drivers/block/rbd.c394
-rw-r--r--drivers/block/rsxx/dev.c6
-rw-r--r--drivers/block/rsxx/dma.c15
-rw-r--r--drivers/block/sx8.c16
-rw-r--r--drivers/block/umem.c53
-rw-r--r--drivers/block/xen-blkback/blkback.c2
-rw-r--r--drivers/block/xen-blkfront.c2
-rw-r--r--drivers/block/z2ram.c7
-rw-r--r--drivers/block/zram/Kconfig24
-rw-r--r--drivers/block/zram/Makefile3
-rw-r--r--drivers/block/zram/zram_drv.c958
-rw-r--r--drivers/block/zram/zram_drv.h109
37 files changed, 1854 insertions, 746 deletions
diff --git a/drivers/block/Kconfig b/drivers/block/Kconfig
index 86b9f37d102e..014a1cfc41c5 100644
--- a/drivers/block/Kconfig
+++ b/drivers/block/Kconfig
@@ -108,6 +108,8 @@ source "drivers/block/paride/Kconfig"
108 108
109source "drivers/block/mtip32xx/Kconfig" 109source "drivers/block/mtip32xx/Kconfig"
110 110
111source "drivers/block/zram/Kconfig"
112
111config BLK_CPQ_DA 113config BLK_CPQ_DA
112 tristate "Compaq SMART2 support" 114 tristate "Compaq SMART2 support"
113 depends on PCI && VIRT_TO_BUS && 0 115 depends on PCI && VIRT_TO_BUS && 0
@@ -368,7 +370,8 @@ config BLK_DEV_RAM
368 For details, read <file:Documentation/blockdev/ramdisk.txt>. 370 For details, read <file:Documentation/blockdev/ramdisk.txt>.
369 371
370 To compile this driver as a module, choose M here: the 372 To compile this driver as a module, choose M here: the
371 module will be called rd. 373 module will be called brd. An alias "rd" has been defined
374 for historical reasons.
372 375
373 Most normal users won't need the RAM disk functionality, and can 376 Most normal users won't need the RAM disk functionality, and can
374 thus say N here. 377 thus say N here.
diff --git a/drivers/block/Makefile b/drivers/block/Makefile
index 8cc98cd0d4a8..02b688d1438d 100644
--- a/drivers/block/Makefile
+++ b/drivers/block/Makefile
@@ -31,7 +31,6 @@ obj-$(CONFIG_BLK_DEV_NBD) += nbd.o
31obj-$(CONFIG_BLK_DEV_CRYPTOLOOP) += cryptoloop.o 31obj-$(CONFIG_BLK_DEV_CRYPTOLOOP) += cryptoloop.o
32obj-$(CONFIG_VIRTIO_BLK) += virtio_blk.o 32obj-$(CONFIG_VIRTIO_BLK) += virtio_blk.o
33 33
34obj-$(CONFIG_VIODASD) += viodasd.o
35obj-$(CONFIG_BLK_DEV_SX8) += sx8.o 34obj-$(CONFIG_BLK_DEV_SX8) += sx8.o
36obj-$(CONFIG_BLK_DEV_HD) += hd.o 35obj-$(CONFIG_BLK_DEV_HD) += hd.o
37 36
@@ -43,6 +42,7 @@ obj-$(CONFIG_BLK_DEV_PCIESSD_MTIP32XX) += mtip32xx/
43 42
44obj-$(CONFIG_BLK_DEV_RSXX) += rsxx/ 43obj-$(CONFIG_BLK_DEV_RSXX) += rsxx/
45obj-$(CONFIG_BLK_DEV_NULL_BLK) += null_blk.o 44obj-$(CONFIG_BLK_DEV_NULL_BLK) += null_blk.o
45obj-$(CONFIG_ZRAM) += zram/
46 46
47nvme-y := nvme-core.o nvme-scsi.o 47nvme-y := nvme-core.o nvme-scsi.o
48skd-y := skd_main.o 48skd-y := skd_main.o
diff --git a/drivers/block/aoe/aoe.h b/drivers/block/aoe/aoe.h
index 14a9d1912318..9220f8e833d0 100644
--- a/drivers/block/aoe/aoe.h
+++ b/drivers/block/aoe/aoe.h
@@ -100,11 +100,8 @@ enum {
100 100
101struct buf { 101struct buf {
102 ulong nframesout; 102 ulong nframesout;
103 ulong resid;
104 ulong bv_resid;
105 sector_t sector;
106 struct bio *bio; 103 struct bio *bio;
107 struct bio_vec *bv; 104 struct bvec_iter iter;
108 struct request *rq; 105 struct request *rq;
109}; 106};
110 107
@@ -120,13 +117,10 @@ struct frame {
120 ulong waited; 117 ulong waited;
121 ulong waited_total; 118 ulong waited_total;
122 struct aoetgt *t; /* parent target I belong to */ 119 struct aoetgt *t; /* parent target I belong to */
123 sector_t lba;
124 struct sk_buff *skb; /* command skb freed on module exit */ 120 struct sk_buff *skb; /* command skb freed on module exit */
125 struct sk_buff *r_skb; /* response skb for async processing */ 121 struct sk_buff *r_skb; /* response skb for async processing */
126 struct buf *buf; 122 struct buf *buf;
127 struct bio_vec *bv; 123 struct bvec_iter iter;
128 ulong bcnt;
129 ulong bv_off;
130 char flags; 124 char flags;
131}; 125};
132 126
diff --git a/drivers/block/aoe/aoecmd.c b/drivers/block/aoe/aoecmd.c
index d2515435e23f..8184451b57c0 100644
--- a/drivers/block/aoe/aoecmd.c
+++ b/drivers/block/aoe/aoecmd.c
@@ -196,8 +196,7 @@ aoe_freetframe(struct frame *f)
196 196
197 t = f->t; 197 t = f->t;
198 f->buf = NULL; 198 f->buf = NULL;
199 f->lba = 0; 199 memset(&f->iter, 0, sizeof(f->iter));
200 f->bv = NULL;
201 f->r_skb = NULL; 200 f->r_skb = NULL;
202 f->flags = 0; 201 f->flags = 0;
203 list_add(&f->head, &t->ffree); 202 list_add(&f->head, &t->ffree);
@@ -295,21 +294,14 @@ newframe(struct aoedev *d)
295} 294}
296 295
297static void 296static void
298skb_fillup(struct sk_buff *skb, struct bio_vec *bv, ulong off, ulong cnt) 297skb_fillup(struct sk_buff *skb, struct bio *bio, struct bvec_iter iter)
299{ 298{
300 int frag = 0; 299 int frag = 0;
301 ulong fcnt; 300 struct bio_vec bv;
302loop: 301
303 fcnt = bv->bv_len - (off - bv->bv_offset); 302 __bio_for_each_segment(bv, bio, iter, iter)
304 if (fcnt > cnt) 303 skb_fill_page_desc(skb, frag++, bv.bv_page,
305 fcnt = cnt; 304 bv.bv_offset, bv.bv_len);
306 skb_fill_page_desc(skb, frag++, bv->bv_page, off, fcnt);
307 cnt -= fcnt;
308 if (cnt <= 0)
309 return;
310 bv++;
311 off = bv->bv_offset;
312 goto loop;
313} 305}
314 306
315static void 307static void
@@ -346,12 +338,10 @@ ata_rw_frameinit(struct frame *f)
346 t->nout++; 338 t->nout++;
347 f->waited = 0; 339 f->waited = 0;
348 f->waited_total = 0; 340 f->waited_total = 0;
349 if (f->buf)
350 f->lba = f->buf->sector;
351 341
352 /* set up ata header */ 342 /* set up ata header */
353 ah->scnt = f->bcnt >> 9; 343 ah->scnt = f->iter.bi_size >> 9;
354 put_lba(ah, f->lba); 344 put_lba(ah, f->iter.bi_sector);
355 if (t->d->flags & DEVFL_EXT) { 345 if (t->d->flags & DEVFL_EXT) {
356 ah->aflags |= AOEAFL_EXT; 346 ah->aflags |= AOEAFL_EXT;
357 } else { 347 } else {
@@ -360,11 +350,11 @@ ata_rw_frameinit(struct frame *f)
360 ah->lba3 |= 0xe0; /* LBA bit + obsolete 0xa0 */ 350 ah->lba3 |= 0xe0; /* LBA bit + obsolete 0xa0 */
361 } 351 }
362 if (f->buf && bio_data_dir(f->buf->bio) == WRITE) { 352 if (f->buf && bio_data_dir(f->buf->bio) == WRITE) {
363 skb_fillup(skb, f->bv, f->bv_off, f->bcnt); 353 skb_fillup(skb, f->buf->bio, f->iter);
364 ah->aflags |= AOEAFL_WRITE; 354 ah->aflags |= AOEAFL_WRITE;
365 skb->len += f->bcnt; 355 skb->len += f->iter.bi_size;
366 skb->data_len = f->bcnt; 356 skb->data_len = f->iter.bi_size;
367 skb->truesize += f->bcnt; 357 skb->truesize += f->iter.bi_size;
368 t->wpkts++; 358 t->wpkts++;
369 } else { 359 } else {
370 t->rpkts++; 360 t->rpkts++;
@@ -382,7 +372,6 @@ aoecmd_ata_rw(struct aoedev *d)
382 struct buf *buf; 372 struct buf *buf;
383 struct sk_buff *skb; 373 struct sk_buff *skb;
384 struct sk_buff_head queue; 374 struct sk_buff_head queue;
385 ulong bcnt, fbcnt;
386 375
387 buf = nextbuf(d); 376 buf = nextbuf(d);
388 if (buf == NULL) 377 if (buf == NULL)
@@ -390,39 +379,22 @@ aoecmd_ata_rw(struct aoedev *d)
390 f = newframe(d); 379 f = newframe(d);
391 if (f == NULL) 380 if (f == NULL)
392 return 0; 381 return 0;
393 bcnt = d->maxbcnt;
394 if (bcnt == 0)
395 bcnt = DEFAULTBCNT;
396 if (bcnt > buf->resid)
397 bcnt = buf->resid;
398 fbcnt = bcnt;
399 f->bv = buf->bv;
400 f->bv_off = f->bv->bv_offset + (f->bv->bv_len - buf->bv_resid);
401 do {
402 if (fbcnt < buf->bv_resid) {
403 buf->bv_resid -= fbcnt;
404 buf->resid -= fbcnt;
405 break;
406 }
407 fbcnt -= buf->bv_resid;
408 buf->resid -= buf->bv_resid;
409 if (buf->resid == 0) {
410 d->ip.buf = NULL;
411 break;
412 }
413 buf->bv++;
414 buf->bv_resid = buf->bv->bv_len;
415 WARN_ON(buf->bv_resid == 0);
416 } while (fbcnt);
417 382
418 /* initialize the headers & frame */ 383 /* initialize the headers & frame */
419 f->buf = buf; 384 f->buf = buf;
420 f->bcnt = bcnt; 385 f->iter = buf->iter;
421 ata_rw_frameinit(f); 386 f->iter.bi_size = min_t(unsigned long,
387 d->maxbcnt ?: DEFAULTBCNT,
388 f->iter.bi_size);
389 bio_advance_iter(buf->bio, &buf->iter, f->iter.bi_size);
390
391 if (!buf->iter.bi_size)
392 d->ip.buf = NULL;
422 393
423 /* mark all tracking fields and load out */ 394 /* mark all tracking fields and load out */
424 buf->nframesout += 1; 395 buf->nframesout += 1;
425 buf->sector += bcnt >> 9; 396
397 ata_rw_frameinit(f);
426 398
427 skb = skb_clone(f->skb, GFP_ATOMIC); 399 skb = skb_clone(f->skb, GFP_ATOMIC);
428 if (skb) { 400 if (skb) {
@@ -613,10 +585,7 @@ reassign_frame(struct frame *f)
613 skb = nf->skb; 585 skb = nf->skb;
614 nf->skb = f->skb; 586 nf->skb = f->skb;
615 nf->buf = f->buf; 587 nf->buf = f->buf;
616 nf->bcnt = f->bcnt; 588 nf->iter = f->iter;
617 nf->lba = f->lba;
618 nf->bv = f->bv;
619 nf->bv_off = f->bv_off;
620 nf->waited = 0; 589 nf->waited = 0;
621 nf->waited_total = f->waited_total; 590 nf->waited_total = f->waited_total;
622 nf->sent = f->sent; 591 nf->sent = f->sent;
@@ -648,19 +617,19 @@ probe(struct aoetgt *t)
648 } 617 }
649 f->flags |= FFL_PROBE; 618 f->flags |= FFL_PROBE;
650 ifrotate(t); 619 ifrotate(t);
651 f->bcnt = t->d->maxbcnt ? t->d->maxbcnt : DEFAULTBCNT; 620 f->iter.bi_size = t->d->maxbcnt ? t->d->maxbcnt : DEFAULTBCNT;
652 ata_rw_frameinit(f); 621 ata_rw_frameinit(f);
653 skb = f->skb; 622 skb = f->skb;
654 for (frag = 0, n = f->bcnt; n > 0; ++frag, n -= m) { 623 for (frag = 0, n = f->iter.bi_size; n > 0; ++frag, n -= m) {
655 if (n < PAGE_SIZE) 624 if (n < PAGE_SIZE)
656 m = n; 625 m = n;
657 else 626 else
658 m = PAGE_SIZE; 627 m = PAGE_SIZE;
659 skb_fill_page_desc(skb, frag, empty_page, 0, m); 628 skb_fill_page_desc(skb, frag, empty_page, 0, m);
660 } 629 }
661 skb->len += f->bcnt; 630 skb->len += f->iter.bi_size;
662 skb->data_len = f->bcnt; 631 skb->data_len = f->iter.bi_size;
663 skb->truesize += f->bcnt; 632 skb->truesize += f->iter.bi_size;
664 633
665 skb = skb_clone(f->skb, GFP_ATOMIC); 634 skb = skb_clone(f->skb, GFP_ATOMIC);
666 if (skb) { 635 if (skb) {
@@ -897,15 +866,15 @@ rqbiocnt(struct request *r)
897static void 866static void
898bio_pageinc(struct bio *bio) 867bio_pageinc(struct bio *bio)
899{ 868{
900 struct bio_vec *bv; 869 struct bio_vec bv;
901 struct page *page; 870 struct page *page;
902 int i; 871 struct bvec_iter iter;
903 872
904 bio_for_each_segment(bv, bio, i) { 873 bio_for_each_segment(bv, bio, iter) {
905 /* Non-zero page count for non-head members of 874 /* Non-zero page count for non-head members of
906 * compound pages is no longer allowed by the kernel. 875 * compound pages is no longer allowed by the kernel.
907 */ 876 */
908 page = compound_trans_head(bv->bv_page); 877 page = compound_trans_head(bv.bv_page);
909 atomic_inc(&page->_count); 878 atomic_inc(&page->_count);
910 } 879 }
911} 880}
@@ -913,12 +882,12 @@ bio_pageinc(struct bio *bio)
913static void 882static void
914bio_pagedec(struct bio *bio) 883bio_pagedec(struct bio *bio)
915{ 884{
916 struct bio_vec *bv;
917 struct page *page; 885 struct page *page;
918 int i; 886 struct bio_vec bv;
887 struct bvec_iter iter;
919 888
920 bio_for_each_segment(bv, bio, i) { 889 bio_for_each_segment(bv, bio, iter) {
921 page = compound_trans_head(bv->bv_page); 890 page = compound_trans_head(bv.bv_page);
922 atomic_dec(&page->_count); 891 atomic_dec(&page->_count);
923 } 892 }
924} 893}
@@ -929,12 +898,8 @@ bufinit(struct buf *buf, struct request *rq, struct bio *bio)
929 memset(buf, 0, sizeof(*buf)); 898 memset(buf, 0, sizeof(*buf));
930 buf->rq = rq; 899 buf->rq = rq;
931 buf->bio = bio; 900 buf->bio = bio;
932 buf->resid = bio->bi_size; 901 buf->iter = bio->bi_iter;
933 buf->sector = bio->bi_sector;
934 bio_pageinc(bio); 902 bio_pageinc(bio);
935 buf->bv = bio_iovec(bio);
936 buf->bv_resid = buf->bv->bv_len;
937 WARN_ON(buf->bv_resid == 0);
938} 903}
939 904
940static struct buf * 905static struct buf *
@@ -1119,24 +1084,18 @@ gettgt(struct aoedev *d, char *addr)
1119} 1084}
1120 1085
1121static void 1086static void
1122bvcpy(struct bio_vec *bv, ulong off, struct sk_buff *skb, long cnt) 1087bvcpy(struct sk_buff *skb, struct bio *bio, struct bvec_iter iter, long cnt)
1123{ 1088{
1124 ulong fcnt;
1125 char *p;
1126 int soff = 0; 1089 int soff = 0;
1127loop: 1090 struct bio_vec bv;
1128 fcnt = bv->bv_len - (off - bv->bv_offset); 1091
1129 if (fcnt > cnt) 1092 iter.bi_size = cnt;
1130 fcnt = cnt; 1093
1131 p = page_address(bv->bv_page) + off; 1094 __bio_for_each_segment(bv, bio, iter, iter) {
1132 skb_copy_bits(skb, soff, p, fcnt); 1095 char *p = page_address(bv.bv_page) + bv.bv_offset;
1133 soff += fcnt; 1096 skb_copy_bits(skb, soff, p, bv.bv_len);
1134 cnt -= fcnt; 1097 soff += bv.bv_len;
1135 if (cnt <= 0) 1098 }
1136 return;
1137 bv++;
1138 off = bv->bv_offset;
1139 goto loop;
1140} 1099}
1141 1100
1142void 1101void
@@ -1152,7 +1111,7 @@ aoe_end_request(struct aoedev *d, struct request *rq, int fastfail)
1152 do { 1111 do {
1153 bio = rq->bio; 1112 bio = rq->bio;
1154 bok = !fastfail && test_bit(BIO_UPTODATE, &bio->bi_flags); 1113 bok = !fastfail && test_bit(BIO_UPTODATE, &bio->bi_flags);
1155 } while (__blk_end_request(rq, bok ? 0 : -EIO, bio->bi_size)); 1114 } while (__blk_end_request(rq, bok ? 0 : -EIO, bio->bi_iter.bi_size));
1156 1115
1157 /* cf. http://lkml.org/lkml/2006/10/31/28 */ 1116 /* cf. http://lkml.org/lkml/2006/10/31/28 */
1158 if (!fastfail) 1117 if (!fastfail)
@@ -1229,7 +1188,15 @@ noskb: if (buf)
1229 clear_bit(BIO_UPTODATE, &buf->bio->bi_flags); 1188 clear_bit(BIO_UPTODATE, &buf->bio->bi_flags);
1230 break; 1189 break;
1231 } 1190 }
1232 bvcpy(f->bv, f->bv_off, skb, n); 1191 if (n > f->iter.bi_size) {
1192 pr_err_ratelimited("%s e%ld.%d. bytes=%ld need=%u\n",
1193 "aoe: too-large data size in read from",
1194 (long) d->aoemajor, d->aoeminor,
1195 n, f->iter.bi_size);
1196 clear_bit(BIO_UPTODATE, &buf->bio->bi_flags);
1197 break;
1198 }
1199 bvcpy(skb, f->buf->bio, f->iter, n);
1233 case ATA_CMD_PIO_WRITE: 1200 case ATA_CMD_PIO_WRITE:
1234 case ATA_CMD_PIO_WRITE_EXT: 1201 case ATA_CMD_PIO_WRITE_EXT:
1235 spin_lock_irq(&d->lock); 1202 spin_lock_irq(&d->lock);
@@ -1272,7 +1239,7 @@ out:
1272 1239
1273 aoe_freetframe(f); 1240 aoe_freetframe(f);
1274 1241
1275 if (buf && --buf->nframesout == 0 && buf->resid == 0) 1242 if (buf && --buf->nframesout == 0 && buf->iter.bi_size == 0)
1276 aoe_end_buf(d, buf); 1243 aoe_end_buf(d, buf);
1277 1244
1278 spin_unlock_irq(&d->lock); 1245 spin_unlock_irq(&d->lock);
@@ -1727,7 +1694,7 @@ aoe_failbuf(struct aoedev *d, struct buf *buf)
1727{ 1694{
1728 if (buf == NULL) 1695 if (buf == NULL)
1729 return; 1696 return;
1730 buf->resid = 0; 1697 buf->iter.bi_size = 0;
1731 clear_bit(BIO_UPTODATE, &buf->bio->bi_flags); 1698 clear_bit(BIO_UPTODATE, &buf->bio->bi_flags);
1732 if (buf->nframesout == 0) 1699 if (buf->nframesout == 0)
1733 aoe_end_buf(d, buf); 1700 aoe_end_buf(d, buf);
diff --git a/drivers/block/brd.c b/drivers/block/brd.c
index d91f1a56e861..e73b85cf0756 100644
--- a/drivers/block/brd.c
+++ b/drivers/block/brd.c
@@ -328,18 +328,18 @@ static void brd_make_request(struct request_queue *q, struct bio *bio)
328 struct block_device *bdev = bio->bi_bdev; 328 struct block_device *bdev = bio->bi_bdev;
329 struct brd_device *brd = bdev->bd_disk->private_data; 329 struct brd_device *brd = bdev->bd_disk->private_data;
330 int rw; 330 int rw;
331 struct bio_vec *bvec; 331 struct bio_vec bvec;
332 sector_t sector; 332 sector_t sector;
333 int i; 333 struct bvec_iter iter;
334 int err = -EIO; 334 int err = -EIO;
335 335
336 sector = bio->bi_sector; 336 sector = bio->bi_iter.bi_sector;
337 if (bio_end_sector(bio) > get_capacity(bdev->bd_disk)) 337 if (bio_end_sector(bio) > get_capacity(bdev->bd_disk))
338 goto out; 338 goto out;
339 339
340 if (unlikely(bio->bi_rw & REQ_DISCARD)) { 340 if (unlikely(bio->bi_rw & REQ_DISCARD)) {
341 err = 0; 341 err = 0;
342 discard_from_brd(brd, sector, bio->bi_size); 342 discard_from_brd(brd, sector, bio->bi_iter.bi_size);
343 goto out; 343 goto out;
344 } 344 }
345 345
@@ -347,10 +347,10 @@ static void brd_make_request(struct request_queue *q, struct bio *bio)
347 if (rw == READA) 347 if (rw == READA)
348 rw = READ; 348 rw = READ;
349 349
350 bio_for_each_segment(bvec, bio, i) { 350 bio_for_each_segment(bvec, bio, iter) {
351 unsigned int len = bvec->bv_len; 351 unsigned int len = bvec.bv_len;
352 err = brd_do_bvec(brd, bvec->bv_page, len, 352 err = brd_do_bvec(brd, bvec.bv_page, len,
353 bvec->bv_offset, rw, sector); 353 bvec.bv_offset, rw, sector);
354 if (err) 354 if (err)
355 break; 355 break;
356 sector += len >> SECTOR_SHIFT; 356 sector += len >> SECTOR_SHIFT;
diff --git a/drivers/block/cciss.c b/drivers/block/cciss.c
index b35fc4f5237c..036e8ab86c71 100644
--- a/drivers/block/cciss.c
+++ b/drivers/block/cciss.c
@@ -5004,7 +5004,7 @@ reinit_after_soft_reset:
5004 5004
5005 i = alloc_cciss_hba(pdev); 5005 i = alloc_cciss_hba(pdev);
5006 if (i < 0) 5006 if (i < 0)
5007 return -1; 5007 return -ENOMEM;
5008 5008
5009 h = hba[i]; 5009 h = hba[i];
5010 h->pdev = pdev; 5010 h->pdev = pdev;
@@ -5205,7 +5205,7 @@ clean_no_release_regions:
5205 */ 5205 */
5206 pci_set_drvdata(pdev, NULL); 5206 pci_set_drvdata(pdev, NULL);
5207 free_hba(h); 5207 free_hba(h);
5208 return -1; 5208 return -ENODEV;
5209} 5209}
5210 5210
5211static void cciss_shutdown(struct pci_dev *pdev) 5211static void cciss_shutdown(struct pci_dev *pdev)
diff --git a/drivers/block/drbd/drbd_actlog.c b/drivers/block/drbd/drbd_actlog.c
index 28c73ca320a8..a9b13f2cc420 100644
--- a/drivers/block/drbd/drbd_actlog.c
+++ b/drivers/block/drbd/drbd_actlog.c
@@ -159,7 +159,7 @@ static int _drbd_md_sync_page_io(struct drbd_conf *mdev,
159 159
160 bio = bio_alloc_drbd(GFP_NOIO); 160 bio = bio_alloc_drbd(GFP_NOIO);
161 bio->bi_bdev = bdev->md_bdev; 161 bio->bi_bdev = bdev->md_bdev;
162 bio->bi_sector = sector; 162 bio->bi_iter.bi_sector = sector;
163 err = -EIO; 163 err = -EIO;
164 if (bio_add_page(bio, page, size, 0) != size) 164 if (bio_add_page(bio, page, size, 0) != size)
165 goto out; 165 goto out;
diff --git a/drivers/block/drbd/drbd_bitmap.c b/drivers/block/drbd/drbd_bitmap.c
index b12c11ec4bd2..597f111df67b 100644
--- a/drivers/block/drbd/drbd_bitmap.c
+++ b/drivers/block/drbd/drbd_bitmap.c
@@ -1028,7 +1028,7 @@ static void bm_page_io_async(struct bm_aio_ctx *ctx, int page_nr, int rw) __must
1028 } else 1028 } else
1029 page = b->bm_pages[page_nr]; 1029 page = b->bm_pages[page_nr];
1030 bio->bi_bdev = mdev->ldev->md_bdev; 1030 bio->bi_bdev = mdev->ldev->md_bdev;
1031 bio->bi_sector = on_disk_sector; 1031 bio->bi_iter.bi_sector = on_disk_sector;
1032 /* bio_add_page of a single page to an empty bio will always succeed, 1032 /* bio_add_page of a single page to an empty bio will always succeed,
1033 * according to api. Do we want to assert that? */ 1033 * according to api. Do we want to assert that? */
1034 bio_add_page(bio, page, len, 0); 1034 bio_add_page(bio, page, len, 0);
diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c
index 9e3818b1bc83..929468e1512a 100644
--- a/drivers/block/drbd/drbd_main.c
+++ b/drivers/block/drbd/drbd_main.c
@@ -1537,15 +1537,17 @@ static int _drbd_send_page(struct drbd_conf *mdev, struct page *page,
1537 1537
1538static int _drbd_send_bio(struct drbd_conf *mdev, struct bio *bio) 1538static int _drbd_send_bio(struct drbd_conf *mdev, struct bio *bio)
1539{ 1539{
1540 struct bio_vec *bvec; 1540 struct bio_vec bvec;
1541 int i; 1541 struct bvec_iter iter;
1542
1542 /* hint all but last page with MSG_MORE */ 1543 /* hint all but last page with MSG_MORE */
1543 bio_for_each_segment(bvec, bio, i) { 1544 bio_for_each_segment(bvec, bio, iter) {
1544 int err; 1545 int err;
1545 1546
1546 err = _drbd_no_send_page(mdev, bvec->bv_page, 1547 err = _drbd_no_send_page(mdev, bvec.bv_page,
1547 bvec->bv_offset, bvec->bv_len, 1548 bvec.bv_offset, bvec.bv_len,
1548 i == bio->bi_vcnt - 1 ? 0 : MSG_MORE); 1549 bio_iter_last(bvec, iter)
1550 ? 0 : MSG_MORE);
1549 if (err) 1551 if (err)
1550 return err; 1552 return err;
1551 } 1553 }
@@ -1554,15 +1556,16 @@ static int _drbd_send_bio(struct drbd_conf *mdev, struct bio *bio)
1554 1556
1555static int _drbd_send_zc_bio(struct drbd_conf *mdev, struct bio *bio) 1557static int _drbd_send_zc_bio(struct drbd_conf *mdev, struct bio *bio)
1556{ 1558{
1557 struct bio_vec *bvec; 1559 struct bio_vec bvec;
1558 int i; 1560 struct bvec_iter iter;
1561
1559 /* hint all but last page with MSG_MORE */ 1562 /* hint all but last page with MSG_MORE */
1560 bio_for_each_segment(bvec, bio, i) { 1563 bio_for_each_segment(bvec, bio, iter) {
1561 int err; 1564 int err;
1562 1565
1563 err = _drbd_send_page(mdev, bvec->bv_page, 1566 err = _drbd_send_page(mdev, bvec.bv_page,
1564 bvec->bv_offset, bvec->bv_len, 1567 bvec.bv_offset, bvec.bv_len,
1565 i == bio->bi_vcnt - 1 ? 0 : MSG_MORE); 1568 bio_iter_last(bvec, iter) ? 0 : MSG_MORE);
1566 if (err) 1569 if (err)
1567 return err; 1570 return err;
1568 } 1571 }
diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c
index 6fa6673b36b3..d073305ffd5e 100644
--- a/drivers/block/drbd/drbd_receiver.c
+++ b/drivers/block/drbd/drbd_receiver.c
@@ -1333,7 +1333,7 @@ next_bio:
1333 goto fail; 1333 goto fail;
1334 } 1334 }
1335 /* > peer_req->i.sector, unless this is the first bio */ 1335 /* > peer_req->i.sector, unless this is the first bio */
1336 bio->bi_sector = sector; 1336 bio->bi_iter.bi_sector = sector;
1337 bio->bi_bdev = mdev->ldev->backing_bdev; 1337 bio->bi_bdev = mdev->ldev->backing_bdev;
1338 bio->bi_rw = rw; 1338 bio->bi_rw = rw;
1339 bio->bi_private = peer_req; 1339 bio->bi_private = peer_req;
@@ -1353,7 +1353,7 @@ next_bio:
1353 dev_err(DEV, 1353 dev_err(DEV,
1354 "bio_add_page failed for len=%u, " 1354 "bio_add_page failed for len=%u, "
1355 "bi_vcnt=0 (bi_sector=%llu)\n", 1355 "bi_vcnt=0 (bi_sector=%llu)\n",
1356 len, (unsigned long long)bio->bi_sector); 1356 len, (uint64_t)bio->bi_iter.bi_sector);
1357 err = -ENOSPC; 1357 err = -ENOSPC;
1358 goto fail; 1358 goto fail;
1359 } 1359 }
@@ -1595,9 +1595,10 @@ static int drbd_drain_block(struct drbd_conf *mdev, int data_size)
1595static int recv_dless_read(struct drbd_conf *mdev, struct drbd_request *req, 1595static int recv_dless_read(struct drbd_conf *mdev, struct drbd_request *req,
1596 sector_t sector, int data_size) 1596 sector_t sector, int data_size)
1597{ 1597{
1598 struct bio_vec *bvec; 1598 struct bio_vec bvec;
1599 struct bvec_iter iter;
1599 struct bio *bio; 1600 struct bio *bio;
1600 int dgs, err, i, expect; 1601 int dgs, err, expect;
1601 void *dig_in = mdev->tconn->int_dig_in; 1602 void *dig_in = mdev->tconn->int_dig_in;
1602 void *dig_vv = mdev->tconn->int_dig_vv; 1603 void *dig_vv = mdev->tconn->int_dig_vv;
1603 1604
@@ -1615,13 +1616,13 @@ static int recv_dless_read(struct drbd_conf *mdev, struct drbd_request *req,
1615 mdev->recv_cnt += data_size>>9; 1616 mdev->recv_cnt += data_size>>9;
1616 1617
1617 bio = req->master_bio; 1618 bio = req->master_bio;
1618 D_ASSERT(sector == bio->bi_sector); 1619 D_ASSERT(sector == bio->bi_iter.bi_sector);
1619 1620
1620 bio_for_each_segment(bvec, bio, i) { 1621 bio_for_each_segment(bvec, bio, iter) {
1621 void *mapped = kmap(bvec->bv_page) + bvec->bv_offset; 1622 void *mapped = kmap(bvec.bv_page) + bvec.bv_offset;
1622 expect = min_t(int, data_size, bvec->bv_len); 1623 expect = min_t(int, data_size, bvec.bv_len);
1623 err = drbd_recv_all_warn(mdev->tconn, mapped, expect); 1624 err = drbd_recv_all_warn(mdev->tconn, mapped, expect);
1624 kunmap(bvec->bv_page); 1625 kunmap(bvec.bv_page);
1625 if (err) 1626 if (err)
1626 return err; 1627 return err;
1627 data_size -= expect; 1628 data_size -= expect;
diff --git a/drivers/block/drbd/drbd_req.c b/drivers/block/drbd/drbd_req.c
index fec7bef44994..104a040f24de 100644
--- a/drivers/block/drbd/drbd_req.c
+++ b/drivers/block/drbd/drbd_req.c
@@ -77,8 +77,8 @@ static struct drbd_request *drbd_req_new(struct drbd_conf *mdev,
77 req->epoch = 0; 77 req->epoch = 0;
78 78
79 drbd_clear_interval(&req->i); 79 drbd_clear_interval(&req->i);
80 req->i.sector = bio_src->bi_sector; 80 req->i.sector = bio_src->bi_iter.bi_sector;
81 req->i.size = bio_src->bi_size; 81 req->i.size = bio_src->bi_iter.bi_size;
82 req->i.local = true; 82 req->i.local = true;
83 req->i.waiting = false; 83 req->i.waiting = false;
84 84
@@ -1280,7 +1280,7 @@ void drbd_make_request(struct request_queue *q, struct bio *bio)
1280 /* 1280 /*
1281 * what we "blindly" assume: 1281 * what we "blindly" assume:
1282 */ 1282 */
1283 D_ASSERT(IS_ALIGNED(bio->bi_size, 512)); 1283 D_ASSERT(IS_ALIGNED(bio->bi_iter.bi_size, 512));
1284 1284
1285 inc_ap_bio(mdev); 1285 inc_ap_bio(mdev);
1286 __drbd_make_request(mdev, bio, start_time); 1286 __drbd_make_request(mdev, bio, start_time);
diff --git a/drivers/block/drbd/drbd_req.h b/drivers/block/drbd/drbd_req.h
index 978cb1addc98..28e15d91197a 100644
--- a/drivers/block/drbd/drbd_req.h
+++ b/drivers/block/drbd/drbd_req.h
@@ -269,7 +269,7 @@ static inline void drbd_req_make_private_bio(struct drbd_request *req, struct bi
269 269
270/* Short lived temporary struct on the stack. 270/* Short lived temporary struct on the stack.
271 * We could squirrel the error to be returned into 271 * We could squirrel the error to be returned into
272 * bio->bi_size, or similar. But that would be too ugly. */ 272 * bio->bi_iter.bi_size, or similar. But that would be too ugly. */
273struct bio_and_error { 273struct bio_and_error {
274 struct bio *bio; 274 struct bio *bio;
275 int error; 275 int error;
diff --git a/drivers/block/drbd/drbd_worker.c b/drivers/block/drbd/drbd_worker.c
index 891c0ecaa292..84d3175d493a 100644
--- a/drivers/block/drbd/drbd_worker.c
+++ b/drivers/block/drbd/drbd_worker.c
@@ -313,8 +313,8 @@ void drbd_csum_bio(struct drbd_conf *mdev, struct crypto_hash *tfm, struct bio *
313{ 313{
314 struct hash_desc desc; 314 struct hash_desc desc;
315 struct scatterlist sg; 315 struct scatterlist sg;
316 struct bio_vec *bvec; 316 struct bio_vec bvec;
317 int i; 317 struct bvec_iter iter;
318 318
319 desc.tfm = tfm; 319 desc.tfm = tfm;
320 desc.flags = 0; 320 desc.flags = 0;
@@ -322,8 +322,8 @@ void drbd_csum_bio(struct drbd_conf *mdev, struct crypto_hash *tfm, struct bio *
322 sg_init_table(&sg, 1); 322 sg_init_table(&sg, 1);
323 crypto_hash_init(&desc); 323 crypto_hash_init(&desc);
324 324
325 bio_for_each_segment(bvec, bio, i) { 325 bio_for_each_segment(bvec, bio, iter) {
326 sg_set_page(&sg, bvec->bv_page, bvec->bv_len, bvec->bv_offset); 326 sg_set_page(&sg, bvec.bv_page, bvec.bv_len, bvec.bv_offset);
327 crypto_hash_update(&desc, &sg, sg.length); 327 crypto_hash_update(&desc, &sg, sg.length);
328 } 328 }
329 crypto_hash_final(&desc, digest); 329 crypto_hash_final(&desc, digest);
diff --git a/drivers/block/floppy.c b/drivers/block/floppy.c
index 000abe2f105c..2023043ce7c0 100644
--- a/drivers/block/floppy.c
+++ b/drivers/block/floppy.c
@@ -2351,7 +2351,7 @@ static void rw_interrupt(void)
2351/* Compute maximal contiguous buffer size. */ 2351/* Compute maximal contiguous buffer size. */
2352static int buffer_chain_size(void) 2352static int buffer_chain_size(void)
2353{ 2353{
2354 struct bio_vec *bv; 2354 struct bio_vec bv;
2355 int size; 2355 int size;
2356 struct req_iterator iter; 2356 struct req_iterator iter;
2357 char *base; 2357 char *base;
@@ -2360,10 +2360,10 @@ static int buffer_chain_size(void)
2360 size = 0; 2360 size = 0;
2361 2361
2362 rq_for_each_segment(bv, current_req, iter) { 2362 rq_for_each_segment(bv, current_req, iter) {
2363 if (page_address(bv->bv_page) + bv->bv_offset != base + size) 2363 if (page_address(bv.bv_page) + bv.bv_offset != base + size)
2364 break; 2364 break;
2365 2365
2366 size += bv->bv_len; 2366 size += bv.bv_len;
2367 } 2367 }
2368 2368
2369 return size >> 9; 2369 return size >> 9;
@@ -2389,7 +2389,7 @@ static int transfer_size(int ssize, int max_sector, int max_size)
2389static void copy_buffer(int ssize, int max_sector, int max_sector_2) 2389static void copy_buffer(int ssize, int max_sector, int max_sector_2)
2390{ 2390{
2391 int remaining; /* number of transferred 512-byte sectors */ 2391 int remaining; /* number of transferred 512-byte sectors */
2392 struct bio_vec *bv; 2392 struct bio_vec bv;
2393 char *buffer; 2393 char *buffer;
2394 char *dma_buffer; 2394 char *dma_buffer;
2395 int size; 2395 int size;
@@ -2427,10 +2427,10 @@ static void copy_buffer(int ssize, int max_sector, int max_sector_2)
2427 if (!remaining) 2427 if (!remaining)
2428 break; 2428 break;
2429 2429
2430 size = bv->bv_len; 2430 size = bv.bv_len;
2431 SUPBOUND(size, remaining); 2431 SUPBOUND(size, remaining);
2432 2432
2433 buffer = page_address(bv->bv_page) + bv->bv_offset; 2433 buffer = page_address(bv.bv_page) + bv.bv_offset;
2434 if (dma_buffer + size > 2434 if (dma_buffer + size >
2435 floppy_track_buffer + (max_buffer_sectors << 10) || 2435 floppy_track_buffer + (max_buffer_sectors << 10) ||
2436 dma_buffer < floppy_track_buffer) { 2436 dma_buffer < floppy_track_buffer) {
@@ -3691,9 +3691,12 @@ static int floppy_open(struct block_device *bdev, fmode_t mode)
3691 if (!(mode & FMODE_NDELAY)) { 3691 if (!(mode & FMODE_NDELAY)) {
3692 if (mode & (FMODE_READ|FMODE_WRITE)) { 3692 if (mode & (FMODE_READ|FMODE_WRITE)) {
3693 UDRS->last_checked = 0; 3693 UDRS->last_checked = 0;
3694 clear_bit(FD_OPEN_SHOULD_FAIL_BIT, &UDRS->flags);
3694 check_disk_change(bdev); 3695 check_disk_change(bdev);
3695 if (test_bit(FD_DISK_CHANGED_BIT, &UDRS->flags)) 3696 if (test_bit(FD_DISK_CHANGED_BIT, &UDRS->flags))
3696 goto out; 3697 goto out;
3698 if (test_bit(FD_OPEN_SHOULD_FAIL_BIT, &UDRS->flags))
3699 goto out;
3697 } 3700 }
3698 res = -EROFS; 3701 res = -EROFS;
3699 if ((mode & FMODE_WRITE) && 3702 if ((mode & FMODE_WRITE) &&
@@ -3746,17 +3749,29 @@ static unsigned int floppy_check_events(struct gendisk *disk,
3746 * a disk in the drive, and whether that disk is writable. 3749 * a disk in the drive, and whether that disk is writable.
3747 */ 3750 */
3748 3751
3749static void floppy_rb0_complete(struct bio *bio, int err) 3752struct rb0_cbdata {
3753 int drive;
3754 struct completion complete;
3755};
3756
3757static void floppy_rb0_cb(struct bio *bio, int err)
3750{ 3758{
3751 complete((struct completion *)bio->bi_private); 3759 struct rb0_cbdata *cbdata = (struct rb0_cbdata *)bio->bi_private;
3760 int drive = cbdata->drive;
3761
3762 if (err) {
3763 pr_info("floppy: error %d while reading block 0", err);
3764 set_bit(FD_OPEN_SHOULD_FAIL_BIT, &UDRS->flags);
3765 }
3766 complete(&cbdata->complete);
3752} 3767}
3753 3768
3754static int __floppy_read_block_0(struct block_device *bdev) 3769static int __floppy_read_block_0(struct block_device *bdev, int drive)
3755{ 3770{
3756 struct bio bio; 3771 struct bio bio;
3757 struct bio_vec bio_vec; 3772 struct bio_vec bio_vec;
3758 struct completion complete;
3759 struct page *page; 3773 struct page *page;
3774 struct rb0_cbdata cbdata;
3760 size_t size; 3775 size_t size;
3761 3776
3762 page = alloc_page(GFP_NOIO); 3777 page = alloc_page(GFP_NOIO);
@@ -3769,23 +3784,26 @@ static int __floppy_read_block_0(struct block_device *bdev)
3769 if (!size) 3784 if (!size)
3770 size = 1024; 3785 size = 1024;
3771 3786
3787 cbdata.drive = drive;
3788
3772 bio_init(&bio); 3789 bio_init(&bio);
3773 bio.bi_io_vec = &bio_vec; 3790 bio.bi_io_vec = &bio_vec;
3774 bio_vec.bv_page = page; 3791 bio_vec.bv_page = page;
3775 bio_vec.bv_len = size; 3792 bio_vec.bv_len = size;
3776 bio_vec.bv_offset = 0; 3793 bio_vec.bv_offset = 0;
3777 bio.bi_vcnt = 1; 3794 bio.bi_vcnt = 1;
3778 bio.bi_size = size; 3795 bio.bi_iter.bi_size = size;
3779 bio.bi_bdev = bdev; 3796 bio.bi_bdev = bdev;
3780 bio.bi_sector = 0; 3797 bio.bi_iter.bi_sector = 0;
3781 bio.bi_flags = (1 << BIO_QUIET); 3798 bio.bi_flags = (1 << BIO_QUIET);
3782 init_completion(&complete); 3799 bio.bi_private = &cbdata;
3783 bio.bi_private = &complete; 3800 bio.bi_end_io = floppy_rb0_cb;
3784 bio.bi_end_io = floppy_rb0_complete;
3785 3801
3786 submit_bio(READ, &bio); 3802 submit_bio(READ, &bio);
3787 process_fd_request(); 3803 process_fd_request();
3788 wait_for_completion(&complete); 3804
3805 init_completion(&cbdata.complete);
3806 wait_for_completion(&cbdata.complete);
3789 3807
3790 __free_page(page); 3808 __free_page(page);
3791 3809
@@ -3827,7 +3845,7 @@ static int floppy_revalidate(struct gendisk *disk)
3827 UDRS->generation++; 3845 UDRS->generation++;
3828 if (drive_no_geom(drive)) { 3846 if (drive_no_geom(drive)) {
3829 /* auto-sensing */ 3847 /* auto-sensing */
3830 res = __floppy_read_block_0(opened_bdev[drive]); 3848 res = __floppy_read_block_0(opened_bdev[drive], drive);
3831 } else { 3849 } else {
3832 if (cf) 3850 if (cf)
3833 poll_drive(false, FD_RAW_NEED_DISK); 3851 poll_drive(false, FD_RAW_NEED_DISK);
diff --git a/drivers/block/loop.c b/drivers/block/loop.c
index c8dac7305244..66e8c3b94ef3 100644
--- a/drivers/block/loop.c
+++ b/drivers/block/loop.c
@@ -288,9 +288,10 @@ static int lo_send(struct loop_device *lo, struct bio *bio, loff_t pos)
288{ 288{
289 int (*do_lo_send)(struct loop_device *, struct bio_vec *, loff_t, 289 int (*do_lo_send)(struct loop_device *, struct bio_vec *, loff_t,
290 struct page *page); 290 struct page *page);
291 struct bio_vec *bvec; 291 struct bio_vec bvec;
292 struct bvec_iter iter;
292 struct page *page = NULL; 293 struct page *page = NULL;
293 int i, ret = 0; 294 int ret = 0;
294 295
295 if (lo->transfer != transfer_none) { 296 if (lo->transfer != transfer_none) {
296 page = alloc_page(GFP_NOIO | __GFP_HIGHMEM); 297 page = alloc_page(GFP_NOIO | __GFP_HIGHMEM);
@@ -302,11 +303,11 @@ static int lo_send(struct loop_device *lo, struct bio *bio, loff_t pos)
302 do_lo_send = do_lo_send_direct_write; 303 do_lo_send = do_lo_send_direct_write;
303 } 304 }
304 305
305 bio_for_each_segment(bvec, bio, i) { 306 bio_for_each_segment(bvec, bio, iter) {
306 ret = do_lo_send(lo, bvec, pos, page); 307 ret = do_lo_send(lo, &bvec, pos, page);
307 if (ret < 0) 308 if (ret < 0)
308 break; 309 break;
309 pos += bvec->bv_len; 310 pos += bvec.bv_len;
310 } 311 }
311 if (page) { 312 if (page) {
312 kunmap(page); 313 kunmap(page);
@@ -392,20 +393,20 @@ do_lo_receive(struct loop_device *lo,
392static int 393static int
393lo_receive(struct loop_device *lo, struct bio *bio, int bsize, loff_t pos) 394lo_receive(struct loop_device *lo, struct bio *bio, int bsize, loff_t pos)
394{ 395{
395 struct bio_vec *bvec; 396 struct bio_vec bvec;
397 struct bvec_iter iter;
396 ssize_t s; 398 ssize_t s;
397 int i;
398 399
399 bio_for_each_segment(bvec, bio, i) { 400 bio_for_each_segment(bvec, bio, iter) {
400 s = do_lo_receive(lo, bvec, bsize, pos); 401 s = do_lo_receive(lo, &bvec, bsize, pos);
401 if (s < 0) 402 if (s < 0)
402 return s; 403 return s;
403 404
404 if (s != bvec->bv_len) { 405 if (s != bvec.bv_len) {
405 zero_fill_bio(bio); 406 zero_fill_bio(bio);
406 break; 407 break;
407 } 408 }
408 pos += bvec->bv_len; 409 pos += bvec.bv_len;
409 } 410 }
410 return 0; 411 return 0;
411} 412}
@@ -415,7 +416,7 @@ static int do_bio_filebacked(struct loop_device *lo, struct bio *bio)
415 loff_t pos; 416 loff_t pos;
416 int ret; 417 int ret;
417 418
418 pos = ((loff_t) bio->bi_sector << 9) + lo->lo_offset; 419 pos = ((loff_t) bio->bi_iter.bi_sector << 9) + lo->lo_offset;
419 420
420 if (bio_rw(bio) == WRITE) { 421 if (bio_rw(bio) == WRITE) {
421 struct file *file = lo->lo_backing_file; 422 struct file *file = lo->lo_backing_file;
@@ -444,7 +445,7 @@ static int do_bio_filebacked(struct loop_device *lo, struct bio *bio)
444 goto out; 445 goto out;
445 } 446 }
446 ret = file->f_op->fallocate(file, mode, pos, 447 ret = file->f_op->fallocate(file, mode, pos,
447 bio->bi_size); 448 bio->bi_iter.bi_size);
448 if (unlikely(ret && ret != -EINVAL && 449 if (unlikely(ret && ret != -EINVAL &&
449 ret != -EOPNOTSUPP)) 450 ret != -EOPNOTSUPP))
450 ret = -EIO; 451 ret = -EIO;
@@ -798,7 +799,7 @@ static void loop_config_discard(struct loop_device *lo)
798 799
799 /* 800 /*
800 * We use punch hole to reclaim the free space used by the 801 * We use punch hole to reclaim the free space used by the
801 * image a.k.a. discard. However we do support discard if 802 * image a.k.a. discard. However we do not support discard if
802 * encryption is enabled, because it may give an attacker 803 * encryption is enabled, because it may give an attacker
803 * useful information. 804 * useful information.
804 */ 805 */
diff --git a/drivers/block/mg_disk.c b/drivers/block/mg_disk.c
index 7bc363f1ee82..eb59b1241366 100644
--- a/drivers/block/mg_disk.c
+++ b/drivers/block/mg_disk.c
@@ -915,7 +915,7 @@ static int mg_probe(struct platform_device *plat_dev)
915 915
916 /* disk reset */ 916 /* disk reset */
917 if (prv_data->dev_attr == MG_STORAGE_DEV) { 917 if (prv_data->dev_attr == MG_STORAGE_DEV) {
918 /* If POR seq. not yet finised, wait */ 918 /* If POR seq. not yet finished, wait */
919 err = mg_wait_rstout(host->rstout, MG_TMAX_RSTOUT); 919 err = mg_wait_rstout(host->rstout, MG_TMAX_RSTOUT);
920 if (err) 920 if (err)
921 goto probe_err_3b; 921 goto probe_err_3b;
diff --git a/drivers/block/mtip32xx/mtip32xx.c b/drivers/block/mtip32xx/mtip32xx.c
index 050c71267f14..516026954be6 100644
--- a/drivers/block/mtip32xx/mtip32xx.c
+++ b/drivers/block/mtip32xx/mtip32xx.c
@@ -41,10 +41,31 @@
41#include "mtip32xx.h" 41#include "mtip32xx.h"
42 42
43#define HW_CMD_SLOT_SZ (MTIP_MAX_COMMAND_SLOTS * 32) 43#define HW_CMD_SLOT_SZ (MTIP_MAX_COMMAND_SLOTS * 32)
44#define HW_CMD_TBL_SZ (AHCI_CMD_TBL_HDR_SZ + (MTIP_MAX_SG * 16)) 44
45#define HW_CMD_TBL_AR_SZ (HW_CMD_TBL_SZ * MTIP_MAX_COMMAND_SLOTS) 45/* DMA region containing RX Fis, Identify, RLE10, and SMART buffers */
46#define HW_PORT_PRIV_DMA_SZ \ 46#define AHCI_RX_FIS_SZ 0x100
47 (HW_CMD_SLOT_SZ + HW_CMD_TBL_AR_SZ + AHCI_RX_FIS_SZ) 47#define AHCI_RX_FIS_OFFSET 0x0
48#define AHCI_IDFY_SZ ATA_SECT_SIZE
49#define AHCI_IDFY_OFFSET 0x400
50#define AHCI_SECTBUF_SZ ATA_SECT_SIZE
51#define AHCI_SECTBUF_OFFSET 0x800
52#define AHCI_SMARTBUF_SZ ATA_SECT_SIZE
53#define AHCI_SMARTBUF_OFFSET 0xC00
54/* 0x100 + 0x200 + 0x200 + 0x200 is smaller than 4k but we pad it out */
55#define BLOCK_DMA_ALLOC_SZ 4096
56
57/* DMA region containing command table (should be 8192 bytes) */
58#define AHCI_CMD_SLOT_SZ sizeof(struct mtip_cmd_hdr)
59#define AHCI_CMD_TBL_SZ (MTIP_MAX_COMMAND_SLOTS * AHCI_CMD_SLOT_SZ)
60#define AHCI_CMD_TBL_OFFSET 0x0
61
62/* DMA region per command (contains header and SGL) */
63#define AHCI_CMD_TBL_HDR_SZ 0x80
64#define AHCI_CMD_TBL_HDR_OFFSET 0x0
65#define AHCI_CMD_TBL_SGL_SZ (MTIP_MAX_SG * sizeof(struct mtip_cmd_sg))
66#define AHCI_CMD_TBL_SGL_OFFSET AHCI_CMD_TBL_HDR_SZ
67#define CMD_DMA_ALLOC_SZ (AHCI_CMD_TBL_SGL_SZ + AHCI_CMD_TBL_HDR_SZ)
68
48 69
49#define HOST_CAP_NZDMA (1 << 19) 70#define HOST_CAP_NZDMA (1 << 19)
50#define HOST_HSORG 0xFC 71#define HOST_HSORG 0xFC
@@ -899,8 +920,9 @@ static void mtip_handle_tfe(struct driver_data *dd)
899 fail_reason = "thermal shutdown"; 920 fail_reason = "thermal shutdown";
900 } 921 }
901 if (buf[288] == 0xBF) { 922 if (buf[288] == 0xBF) {
923 set_bit(MTIP_DDF_SEC_LOCK_BIT, &dd->dd_flag);
902 dev_info(&dd->pdev->dev, 924 dev_info(&dd->pdev->dev,
903 "Drive indicates rebuild has failed.\n"); 925 "Drive indicates rebuild has failed. Secure erase required.\n");
904 fail_all_ncq_cmds = 1; 926 fail_all_ncq_cmds = 1;
905 fail_reason = "rebuild failed"; 927 fail_reason = "rebuild failed";
906 } 928 }
@@ -1566,6 +1588,12 @@ static int mtip_get_identify(struct mtip_port *port, void __user *user_buffer)
1566 } 1588 }
1567#endif 1589#endif
1568 1590
1591 /* Check security locked state */
1592 if (port->identify[128] & 0x4)
1593 set_bit(MTIP_DDF_SEC_LOCK_BIT, &port->dd->dd_flag);
1594 else
1595 clear_bit(MTIP_DDF_SEC_LOCK_BIT, &port->dd->dd_flag);
1596
1569#ifdef MTIP_TRIM /* Disabling TRIM support temporarily */ 1597#ifdef MTIP_TRIM /* Disabling TRIM support temporarily */
1570 /* Demux ID.DRAT & ID.RZAT to determine trim support */ 1598 /* Demux ID.DRAT & ID.RZAT to determine trim support */
1571 if (port->identify[69] & (1 << 14) && port->identify[69] & (1 << 5)) 1599 if (port->identify[69] & (1 << 14) && port->identify[69] & (1 << 5))
@@ -1887,6 +1915,10 @@ static void mtip_dump_identify(struct mtip_port *port)
1887 strlcpy(cbuf, (char *)(port->identify+27), 41); 1915 strlcpy(cbuf, (char *)(port->identify+27), 41);
1888 dev_info(&port->dd->pdev->dev, "Model: %s\n", cbuf); 1916 dev_info(&port->dd->pdev->dev, "Model: %s\n", cbuf);
1889 1917
1918 dev_info(&port->dd->pdev->dev, "Security: %04x %s\n",
1919 port->identify[128],
1920 port->identify[128] & 0x4 ? "(LOCKED)" : "");
1921
1890 if (mtip_hw_get_capacity(port->dd, &sectors)) 1922 if (mtip_hw_get_capacity(port->dd, &sectors))
1891 dev_info(&port->dd->pdev->dev, 1923 dev_info(&port->dd->pdev->dev,
1892 "Capacity: %llu sectors (%llu MB)\n", 1924 "Capacity: %llu sectors (%llu MB)\n",
@@ -3313,6 +3345,118 @@ st_out:
3313} 3345}
3314 3346
3315/* 3347/*
3348 * DMA region teardown
3349 *
3350 * @dd Pointer to driver_data structure
3351 *
3352 * return value
3353 * None
3354 */
3355static void mtip_dma_free(struct driver_data *dd)
3356{
3357 int i;
3358 struct mtip_port *port = dd->port;
3359
3360 if (port->block1)
3361 dmam_free_coherent(&dd->pdev->dev, BLOCK_DMA_ALLOC_SZ,
3362 port->block1, port->block1_dma);
3363
3364 if (port->command_list) {
3365 dmam_free_coherent(&dd->pdev->dev, AHCI_CMD_TBL_SZ,
3366 port->command_list, port->command_list_dma);
3367 }
3368
3369 for (i = 0; i < MTIP_MAX_COMMAND_SLOTS; i++) {
3370 if (port->commands[i].command)
3371 dmam_free_coherent(&dd->pdev->dev, CMD_DMA_ALLOC_SZ,
3372 port->commands[i].command,
3373 port->commands[i].command_dma);
3374 }
3375}
3376
3377/*
3378 * DMA region setup
3379 *
3380 * @dd Pointer to driver_data structure
3381 *
3382 * return value
3383 * -ENOMEM Not enough free DMA region space to initialize driver
3384 */
3385static int mtip_dma_alloc(struct driver_data *dd)
3386{
3387 struct mtip_port *port = dd->port;
3388 int i, rv = 0;
3389 u32 host_cap_64 = readl(dd->mmio + HOST_CAP) & HOST_CAP_64;
3390
3391 /* Allocate dma memory for RX Fis, Identify, and Sector Bufffer */
3392 port->block1 =
3393 dmam_alloc_coherent(&dd->pdev->dev, BLOCK_DMA_ALLOC_SZ,
3394 &port->block1_dma, GFP_KERNEL);
3395 if (!port->block1)
3396 return -ENOMEM;
3397 memset(port->block1, 0, BLOCK_DMA_ALLOC_SZ);
3398
3399 /* Allocate dma memory for command list */
3400 port->command_list =
3401 dmam_alloc_coherent(&dd->pdev->dev, AHCI_CMD_TBL_SZ,
3402 &port->command_list_dma, GFP_KERNEL);
3403 if (!port->command_list) {
3404 dmam_free_coherent(&dd->pdev->dev, BLOCK_DMA_ALLOC_SZ,
3405 port->block1, port->block1_dma);
3406 port->block1 = NULL;
3407 port->block1_dma = 0;
3408 return -ENOMEM;
3409 }
3410 memset(port->command_list, 0, AHCI_CMD_TBL_SZ);
3411
3412 /* Setup all pointers into first DMA region */
3413 port->rxfis = port->block1 + AHCI_RX_FIS_OFFSET;
3414 port->rxfis_dma = port->block1_dma + AHCI_RX_FIS_OFFSET;
3415 port->identify = port->block1 + AHCI_IDFY_OFFSET;
3416 port->identify_dma = port->block1_dma + AHCI_IDFY_OFFSET;
3417 port->log_buf = port->block1 + AHCI_SECTBUF_OFFSET;
3418 port->log_buf_dma = port->block1_dma + AHCI_SECTBUF_OFFSET;
3419 port->smart_buf = port->block1 + AHCI_SMARTBUF_OFFSET;
3420 port->smart_buf_dma = port->block1_dma + AHCI_SMARTBUF_OFFSET;
3421
3422 /* Setup per command SGL DMA region */
3423
3424 /* Point the command headers at the command tables */
3425 for (i = 0; i < MTIP_MAX_COMMAND_SLOTS; i++) {
3426 port->commands[i].command =
3427 dmam_alloc_coherent(&dd->pdev->dev, CMD_DMA_ALLOC_SZ,
3428 &port->commands[i].command_dma, GFP_KERNEL);
3429 if (!port->commands[i].command) {
3430 rv = -ENOMEM;
3431 mtip_dma_free(dd);
3432 return rv;
3433 }
3434 memset(port->commands[i].command, 0, CMD_DMA_ALLOC_SZ);
3435
3436 port->commands[i].command_header = port->command_list +
3437 (sizeof(struct mtip_cmd_hdr) * i);
3438 port->commands[i].command_header_dma =
3439 dd->port->command_list_dma +
3440 (sizeof(struct mtip_cmd_hdr) * i);
3441
3442 if (host_cap_64)
3443 port->commands[i].command_header->ctbau =
3444 __force_bit2int cpu_to_le32(
3445 (port->commands[i].command_dma >> 16) >> 16);
3446
3447 port->commands[i].command_header->ctba =
3448 __force_bit2int cpu_to_le32(
3449 port->commands[i].command_dma & 0xFFFFFFFF);
3450
3451 sg_init_table(port->commands[i].sg, MTIP_MAX_SG);
3452
3453 /* Mark command as currently inactive */
3454 atomic_set(&dd->port->commands[i].active, 0);
3455 }
3456 return 0;
3457}
3458
3459/*
3316 * Called once for each card. 3460 * Called once for each card.
3317 * 3461 *
3318 * @dd Pointer to the driver data structure. 3462 * @dd Pointer to the driver data structure.
@@ -3370,83 +3514,10 @@ static int mtip_hw_init(struct driver_data *dd)
3370 dd->port->mmio = dd->mmio + PORT_OFFSET; 3514 dd->port->mmio = dd->mmio + PORT_OFFSET;
3371 dd->port->dd = dd; 3515 dd->port->dd = dd;
3372 3516
3373 /* Allocate memory for the command list. */ 3517 /* DMA allocations */
3374 dd->port->command_list = 3518 rv = mtip_dma_alloc(dd);
3375 dmam_alloc_coherent(&dd->pdev->dev, 3519 if (rv < 0)
3376 HW_PORT_PRIV_DMA_SZ + (ATA_SECT_SIZE * 4),
3377 &dd->port->command_list_dma,
3378 GFP_KERNEL);
3379 if (!dd->port->command_list) {
3380 dev_err(&dd->pdev->dev,
3381 "Memory allocation: command list\n");
3382 rv = -ENOMEM;
3383 goto out1; 3520 goto out1;
3384 }
3385
3386 /* Clear the memory we have allocated. */
3387 memset(dd->port->command_list,
3388 0,
3389 HW_PORT_PRIV_DMA_SZ + (ATA_SECT_SIZE * 4));
3390
3391 /* Setup the addresse of the RX FIS. */
3392 dd->port->rxfis = dd->port->command_list + HW_CMD_SLOT_SZ;
3393 dd->port->rxfis_dma = dd->port->command_list_dma + HW_CMD_SLOT_SZ;
3394
3395 /* Setup the address of the command tables. */
3396 dd->port->command_table = dd->port->rxfis + AHCI_RX_FIS_SZ;
3397 dd->port->command_tbl_dma = dd->port->rxfis_dma + AHCI_RX_FIS_SZ;
3398
3399 /* Setup the address of the identify data. */
3400 dd->port->identify = dd->port->command_table +
3401 HW_CMD_TBL_AR_SZ;
3402 dd->port->identify_dma = dd->port->command_tbl_dma +
3403 HW_CMD_TBL_AR_SZ;
3404
3405 /* Setup the address of the sector buffer - for some non-ncq cmds */
3406 dd->port->sector_buffer = (void *) dd->port->identify + ATA_SECT_SIZE;
3407 dd->port->sector_buffer_dma = dd->port->identify_dma + ATA_SECT_SIZE;
3408
3409 /* Setup the address of the log buf - for read log command */
3410 dd->port->log_buf = (void *)dd->port->sector_buffer + ATA_SECT_SIZE;
3411 dd->port->log_buf_dma = dd->port->sector_buffer_dma + ATA_SECT_SIZE;
3412
3413 /* Setup the address of the smart buf - for smart read data command */
3414 dd->port->smart_buf = (void *)dd->port->log_buf + ATA_SECT_SIZE;
3415 dd->port->smart_buf_dma = dd->port->log_buf_dma + ATA_SECT_SIZE;
3416
3417
3418 /* Point the command headers at the command tables. */
3419 for (i = 0; i < num_command_slots; i++) {
3420 dd->port->commands[i].command_header =
3421 dd->port->command_list +
3422 (sizeof(struct mtip_cmd_hdr) * i);
3423 dd->port->commands[i].command_header_dma =
3424 dd->port->command_list_dma +
3425 (sizeof(struct mtip_cmd_hdr) * i);
3426
3427 dd->port->commands[i].command =
3428 dd->port->command_table + (HW_CMD_TBL_SZ * i);
3429 dd->port->commands[i].command_dma =
3430 dd->port->command_tbl_dma + (HW_CMD_TBL_SZ * i);
3431
3432 if (readl(dd->mmio + HOST_CAP) & HOST_CAP_64)
3433 dd->port->commands[i].command_header->ctbau =
3434 __force_bit2int cpu_to_le32(
3435 (dd->port->commands[i].command_dma >> 16) >> 16);
3436 dd->port->commands[i].command_header->ctba =
3437 __force_bit2int cpu_to_le32(
3438 dd->port->commands[i].command_dma & 0xFFFFFFFF);
3439
3440 /*
3441 * If this is not done, a bug is reported by the stock
3442 * FC11 i386. Due to the fact that it has lots of kernel
3443 * debugging enabled.
3444 */
3445 sg_init_table(dd->port->commands[i].sg, MTIP_MAX_SG);
3446
3447 /* Mark all commands as currently inactive.*/
3448 atomic_set(&dd->port->commands[i].active, 0);
3449 }
3450 3521
3451 /* Setup the pointers to the extended s_active and CI registers. */ 3522 /* Setup the pointers to the extended s_active and CI registers. */
3452 for (i = 0; i < dd->slot_groups; i++) { 3523 for (i = 0; i < dd->slot_groups; i++) {
@@ -3594,12 +3665,8 @@ out3:
3594 3665
3595out2: 3666out2:
3596 mtip_deinit_port(dd->port); 3667 mtip_deinit_port(dd->port);
3668 mtip_dma_free(dd);
3597 3669
3598 /* Free the command/command header memory. */
3599 dmam_free_coherent(&dd->pdev->dev,
3600 HW_PORT_PRIV_DMA_SZ + (ATA_SECT_SIZE * 4),
3601 dd->port->command_list,
3602 dd->port->command_list_dma);
3603out1: 3670out1:
3604 /* Free the memory allocated for the for structure. */ 3671 /* Free the memory allocated for the for structure. */
3605 kfree(dd->port); 3672 kfree(dd->port);
@@ -3622,7 +3689,8 @@ static int mtip_hw_exit(struct driver_data *dd)
3622 * saves its state. 3689 * saves its state.
3623 */ 3690 */
3624 if (!dd->sr) { 3691 if (!dd->sr) {
3625 if (!test_bit(MTIP_DDF_REBUILD_FAILED_BIT, &dd->dd_flag)) 3692 if (!test_bit(MTIP_PF_REBUILD_BIT, &dd->port->flags) &&
3693 !test_bit(MTIP_DDF_SEC_LOCK_BIT, &dd->dd_flag))
3626 if (mtip_standby_immediate(dd->port)) 3694 if (mtip_standby_immediate(dd->port))
3627 dev_warn(&dd->pdev->dev, 3695 dev_warn(&dd->pdev->dev,
3628 "STANDBY IMMEDIATE failed\n"); 3696 "STANDBY IMMEDIATE failed\n");
@@ -3641,11 +3709,9 @@ static int mtip_hw_exit(struct driver_data *dd)
3641 irq_set_affinity_hint(dd->pdev->irq, NULL); 3709 irq_set_affinity_hint(dd->pdev->irq, NULL);
3642 devm_free_irq(&dd->pdev->dev, dd->pdev->irq, dd); 3710 devm_free_irq(&dd->pdev->dev, dd->pdev->irq, dd);
3643 3711
3644 /* Free the command/command header memory. */ 3712 /* Free dma regions */
3645 dmam_free_coherent(&dd->pdev->dev, 3713 mtip_dma_free(dd);
3646 HW_PORT_PRIV_DMA_SZ + (ATA_SECT_SIZE * 4), 3714
3647 dd->port->command_list,
3648 dd->port->command_list_dma);
3649 /* Free the memory allocated for the for structure. */ 3715 /* Free the memory allocated for the for structure. */
3650 kfree(dd->port); 3716 kfree(dd->port);
3651 dd->port = NULL; 3717 dd->port = NULL;
@@ -3962,8 +4028,9 @@ static void mtip_make_request(struct request_queue *queue, struct bio *bio)
3962{ 4028{
3963 struct driver_data *dd = queue->queuedata; 4029 struct driver_data *dd = queue->queuedata;
3964 struct scatterlist *sg; 4030 struct scatterlist *sg;
3965 struct bio_vec *bvec; 4031 struct bio_vec bvec;
3966 int i, nents = 0; 4032 struct bvec_iter iter;
4033 int nents = 0;
3967 int tag = 0, unaligned = 0; 4034 int tag = 0, unaligned = 0;
3968 4035
3969 if (unlikely(dd->dd_flag & MTIP_DDF_STOP_IO)) { 4036 if (unlikely(dd->dd_flag & MTIP_DDF_STOP_IO)) {
@@ -3993,7 +4060,7 @@ static void mtip_make_request(struct request_queue *queue, struct bio *bio)
3993 } 4060 }
3994 4061
3995 if (unlikely(bio->bi_rw & REQ_DISCARD)) { 4062 if (unlikely(bio->bi_rw & REQ_DISCARD)) {
3996 bio_endio(bio, mtip_send_trim(dd, bio->bi_sector, 4063 bio_endio(bio, mtip_send_trim(dd, bio->bi_iter.bi_sector,
3997 bio_sectors(bio))); 4064 bio_sectors(bio)));
3998 return; 4065 return;
3999 } 4066 }
@@ -4006,7 +4073,8 @@ static void mtip_make_request(struct request_queue *queue, struct bio *bio)
4006 4073
4007 if (bio_data_dir(bio) == WRITE && bio_sectors(bio) <= 64 && 4074 if (bio_data_dir(bio) == WRITE && bio_sectors(bio) <= 64 &&
4008 dd->unal_qdepth) { 4075 dd->unal_qdepth) {
4009 if (bio->bi_sector % 8 != 0) /* Unaligned on 4k boundaries */ 4076 if (bio->bi_iter.bi_sector % 8 != 0)
4077 /* Unaligned on 4k boundaries */
4010 unaligned = 1; 4078 unaligned = 1;
4011 else if (bio_sectors(bio) % 8 != 0) /* Aligned but not 4k/8k */ 4079 else if (bio_sectors(bio) % 8 != 0) /* Aligned but not 4k/8k */
4012 unaligned = 1; 4080 unaligned = 1;
@@ -4025,17 +4093,17 @@ static void mtip_make_request(struct request_queue *queue, struct bio *bio)
4025 } 4093 }
4026 4094
4027 /* Create the scatter list for this bio. */ 4095 /* Create the scatter list for this bio. */
4028 bio_for_each_segment(bvec, bio, i) { 4096 bio_for_each_segment(bvec, bio, iter) {
4029 sg_set_page(&sg[nents], 4097 sg_set_page(&sg[nents],
4030 bvec->bv_page, 4098 bvec.bv_page,
4031 bvec->bv_len, 4099 bvec.bv_len,
4032 bvec->bv_offset); 4100 bvec.bv_offset);
4033 nents++; 4101 nents++;
4034 } 4102 }
4035 4103
4036 /* Issue the read/write. */ 4104 /* Issue the read/write. */
4037 mtip_hw_submit_io(dd, 4105 mtip_hw_submit_io(dd,
4038 bio->bi_sector, 4106 bio->bi_iter.bi_sector,
4039 bio_sectors(bio), 4107 bio_sectors(bio),
4040 nents, 4108 nents,
4041 tag, 4109 tag,
diff --git a/drivers/block/mtip32xx/mtip32xx.h b/drivers/block/mtip32xx/mtip32xx.h
index 9be7a1582ad3..b52e9a6d6aad 100644
--- a/drivers/block/mtip32xx/mtip32xx.h
+++ b/drivers/block/mtip32xx/mtip32xx.h
@@ -69,7 +69,7 @@
69 * Maximum number of scatter gather entries 69 * Maximum number of scatter gather entries
70 * a single command may have. 70 * a single command may have.
71 */ 71 */
72#define MTIP_MAX_SG 128 72#define MTIP_MAX_SG 504
73 73
74/* 74/*
75 * Maximum number of slot groups (Command Issue & s_active registers) 75 * Maximum number of slot groups (Command Issue & s_active registers)
@@ -92,7 +92,7 @@
92 92
93/* Driver name and version strings */ 93/* Driver name and version strings */
94#define MTIP_DRV_NAME "mtip32xx" 94#define MTIP_DRV_NAME "mtip32xx"
95#define MTIP_DRV_VERSION "1.2.6os3" 95#define MTIP_DRV_VERSION "1.3.0"
96 96
97/* Maximum number of minor device numbers per device. */ 97/* Maximum number of minor device numbers per device. */
98#define MTIP_MAX_MINORS 16 98#define MTIP_MAX_MINORS 16
@@ -391,15 +391,13 @@ struct mtip_port {
391 */ 391 */
392 dma_addr_t rxfis_dma; 392 dma_addr_t rxfis_dma;
393 /* 393 /*
394 * Pointer to the beginning of the command table memory as used 394 * Pointer to the DMA region for RX Fis, Identify, RLE10, and SMART
395 * by the driver.
396 */ 395 */
397 void *command_table; 396 void *block1;
398 /* 397 /*
399 * Pointer to the beginning of the command table memory as used 398 * DMA address of region for RX Fis, Identify, RLE10, and SMART
400 * by the DMA.
401 */ 399 */
402 dma_addr_t command_tbl_dma; 400 dma_addr_t block1_dma;
403 /* 401 /*
404 * Pointer to the beginning of the identify data memory as used 402 * Pointer to the beginning of the identify data memory as used
405 * by the driver. 403 * by the driver.
diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c
index 2dc3b5153f0d..55298db36b2d 100644
--- a/drivers/block/nbd.c
+++ b/drivers/block/nbd.c
@@ -271,18 +271,18 @@ static int nbd_send_req(struct nbd_device *nbd, struct request *req)
271 271
272 if (nbd_cmd(req) == NBD_CMD_WRITE) { 272 if (nbd_cmd(req) == NBD_CMD_WRITE) {
273 struct req_iterator iter; 273 struct req_iterator iter;
274 struct bio_vec *bvec; 274 struct bio_vec bvec;
275 /* 275 /*
276 * we are really probing at internals to determine 276 * we are really probing at internals to determine
277 * whether to set MSG_MORE or not... 277 * whether to set MSG_MORE or not...
278 */ 278 */
279 rq_for_each_segment(bvec, req, iter) { 279 rq_for_each_segment(bvec, req, iter) {
280 flags = 0; 280 flags = 0;
281 if (!rq_iter_last(req, iter)) 281 if (!rq_iter_last(bvec, iter))
282 flags = MSG_MORE; 282 flags = MSG_MORE;
283 dprintk(DBG_TX, "%s: request %p: sending %d bytes data\n", 283 dprintk(DBG_TX, "%s: request %p: sending %d bytes data\n",
284 nbd->disk->disk_name, req, bvec->bv_len); 284 nbd->disk->disk_name, req, bvec.bv_len);
285 result = sock_send_bvec(nbd, bvec, flags); 285 result = sock_send_bvec(nbd, &bvec, flags);
286 if (result <= 0) { 286 if (result <= 0) {
287 dev_err(disk_to_dev(nbd->disk), 287 dev_err(disk_to_dev(nbd->disk),
288 "Send data failed (result %d)\n", 288 "Send data failed (result %d)\n",
@@ -378,10 +378,10 @@ static struct request *nbd_read_stat(struct nbd_device *nbd)
378 nbd->disk->disk_name, req); 378 nbd->disk->disk_name, req);
379 if (nbd_cmd(req) == NBD_CMD_READ) { 379 if (nbd_cmd(req) == NBD_CMD_READ) {
380 struct req_iterator iter; 380 struct req_iterator iter;
381 struct bio_vec *bvec; 381 struct bio_vec bvec;
382 382
383 rq_for_each_segment(bvec, req, iter) { 383 rq_for_each_segment(bvec, req, iter) {
384 result = sock_recv_bvec(nbd, bvec); 384 result = sock_recv_bvec(nbd, &bvec);
385 if (result <= 0) { 385 if (result <= 0) {
386 dev_err(disk_to_dev(nbd->disk), "Receive data failed (result %d)\n", 386 dev_err(disk_to_dev(nbd->disk), "Receive data failed (result %d)\n",
387 result); 387 result);
@@ -389,7 +389,7 @@ static struct request *nbd_read_stat(struct nbd_device *nbd)
389 return req; 389 return req;
390 } 390 }
391 dprintk(DBG_RX, "%s: request %p: got %d bytes data\n", 391 dprintk(DBG_RX, "%s: request %p: got %d bytes data\n",
392 nbd->disk->disk_name, req, bvec->bv_len); 392 nbd->disk->disk_name, req, bvec.bv_len);
393 } 393 }
394 } 394 }
395 return req; 395 return req;
diff --git a/drivers/block/null_blk.c b/drivers/block/null_blk.c
index a2e69d26266d..3107282a9741 100644
--- a/drivers/block/null_blk.c
+++ b/drivers/block/null_blk.c
@@ -425,10 +425,7 @@ static void null_del_dev(struct nullb *nullb)
425 list_del_init(&nullb->list); 425 list_del_init(&nullb->list);
426 426
427 del_gendisk(nullb->disk); 427 del_gendisk(nullb->disk);
428 if (queue_mode == NULL_Q_MQ) 428 blk_cleanup_queue(nullb->q);
429 blk_mq_free_queue(nullb->q);
430 else
431 blk_cleanup_queue(nullb->q);
432 put_disk(nullb->disk); 429 put_disk(nullb->disk);
433 kfree(nullb); 430 kfree(nullb);
434} 431}
@@ -578,10 +575,7 @@ static int null_add_dev(void)
578 disk = nullb->disk = alloc_disk_node(1, home_node); 575 disk = nullb->disk = alloc_disk_node(1, home_node);
579 if (!disk) { 576 if (!disk) {
580queue_fail: 577queue_fail:
581 if (queue_mode == NULL_Q_MQ) 578 blk_cleanup_queue(nullb->q);
582 blk_mq_free_queue(nullb->q);
583 else
584 blk_cleanup_queue(nullb->q);
585 cleanup_queues(nullb); 579 cleanup_queues(nullb);
586err: 580err:
587 kfree(nullb); 581 kfree(nullb);
@@ -622,6 +616,11 @@ static int __init null_init(void)
622 irqmode = NULL_IRQ_NONE; 616 irqmode = NULL_IRQ_NONE;
623 } 617 }
624#endif 618#endif
619 if (bs > PAGE_SIZE) {
620 pr_warn("null_blk: invalid block size\n");
621 pr_warn("null_blk: defaults block size to %lu\n", PAGE_SIZE);
622 bs = PAGE_SIZE;
623 }
625 624
626 if (queue_mode == NULL_Q_MQ && use_per_node_hctx) { 625 if (queue_mode == NULL_Q_MQ && use_per_node_hctx) {
627 if (submit_queues < nr_online_nodes) { 626 if (submit_queues < nr_online_nodes) {
diff --git a/drivers/block/nvme-core.c b/drivers/block/nvme-core.c
index 26d03fa0bf26..1f14ac403945 100644
--- a/drivers/block/nvme-core.c
+++ b/drivers/block/nvme-core.c
@@ -441,104 +441,19 @@ int nvme_setup_prps(struct nvme_dev *dev, struct nvme_common_command *cmd,
441 return total_len; 441 return total_len;
442} 442}
443 443
444struct nvme_bio_pair {
445 struct bio b1, b2, *parent;
446 struct bio_vec *bv1, *bv2;
447 int err;
448 atomic_t cnt;
449};
450
451static void nvme_bio_pair_endio(struct bio *bio, int err)
452{
453 struct nvme_bio_pair *bp = bio->bi_private;
454
455 if (err)
456 bp->err = err;
457
458 if (atomic_dec_and_test(&bp->cnt)) {
459 bio_endio(bp->parent, bp->err);
460 kfree(bp->bv1);
461 kfree(bp->bv2);
462 kfree(bp);
463 }
464}
465
466static struct nvme_bio_pair *nvme_bio_split(struct bio *bio, int idx,
467 int len, int offset)
468{
469 struct nvme_bio_pair *bp;
470
471 BUG_ON(len > bio->bi_size);
472 BUG_ON(idx > bio->bi_vcnt);
473
474 bp = kmalloc(sizeof(*bp), GFP_ATOMIC);
475 if (!bp)
476 return NULL;
477 bp->err = 0;
478
479 bp->b1 = *bio;
480 bp->b2 = *bio;
481
482 bp->b1.bi_size = len;
483 bp->b2.bi_size -= len;
484 bp->b1.bi_vcnt = idx;
485 bp->b2.bi_idx = idx;
486 bp->b2.bi_sector += len >> 9;
487
488 if (offset) {
489 bp->bv1 = kmalloc(bio->bi_max_vecs * sizeof(struct bio_vec),
490 GFP_ATOMIC);
491 if (!bp->bv1)
492 goto split_fail_1;
493
494 bp->bv2 = kmalloc(bio->bi_max_vecs * sizeof(struct bio_vec),
495 GFP_ATOMIC);
496 if (!bp->bv2)
497 goto split_fail_2;
498
499 memcpy(bp->bv1, bio->bi_io_vec,
500 bio->bi_max_vecs * sizeof(struct bio_vec));
501 memcpy(bp->bv2, bio->bi_io_vec,
502 bio->bi_max_vecs * sizeof(struct bio_vec));
503
504 bp->b1.bi_io_vec = bp->bv1;
505 bp->b2.bi_io_vec = bp->bv2;
506 bp->b2.bi_io_vec[idx].bv_offset += offset;
507 bp->b2.bi_io_vec[idx].bv_len -= offset;
508 bp->b1.bi_io_vec[idx].bv_len = offset;
509 bp->b1.bi_vcnt++;
510 } else
511 bp->bv1 = bp->bv2 = NULL;
512
513 bp->b1.bi_private = bp;
514 bp->b2.bi_private = bp;
515
516 bp->b1.bi_end_io = nvme_bio_pair_endio;
517 bp->b2.bi_end_io = nvme_bio_pair_endio;
518
519 bp->parent = bio;
520 atomic_set(&bp->cnt, 2);
521
522 return bp;
523
524 split_fail_2:
525 kfree(bp->bv1);
526 split_fail_1:
527 kfree(bp);
528 return NULL;
529}
530
531static int nvme_split_and_submit(struct bio *bio, struct nvme_queue *nvmeq, 444static int nvme_split_and_submit(struct bio *bio, struct nvme_queue *nvmeq,
532 int idx, int len, int offset) 445 int len)
533{ 446{
534 struct nvme_bio_pair *bp = nvme_bio_split(bio, idx, len, offset); 447 struct bio *split = bio_split(bio, len >> 9, GFP_ATOMIC, NULL);
535 if (!bp) 448 if (!split)
536 return -ENOMEM; 449 return -ENOMEM;
537 450
451 bio_chain(split, bio);
452
538 if (bio_list_empty(&nvmeq->sq_cong)) 453 if (bio_list_empty(&nvmeq->sq_cong))
539 add_wait_queue(&nvmeq->sq_full, &nvmeq->sq_cong_wait); 454 add_wait_queue(&nvmeq->sq_full, &nvmeq->sq_cong_wait);
540 bio_list_add(&nvmeq->sq_cong, &bp->b1); 455 bio_list_add(&nvmeq->sq_cong, split);
541 bio_list_add(&nvmeq->sq_cong, &bp->b2); 456 bio_list_add(&nvmeq->sq_cong, bio);
542 457
543 return 0; 458 return 0;
544} 459}
@@ -550,41 +465,44 @@ static int nvme_split_and_submit(struct bio *bio, struct nvme_queue *nvmeq,
550static int nvme_map_bio(struct nvme_queue *nvmeq, struct nvme_iod *iod, 465static int nvme_map_bio(struct nvme_queue *nvmeq, struct nvme_iod *iod,
551 struct bio *bio, enum dma_data_direction dma_dir, int psegs) 466 struct bio *bio, enum dma_data_direction dma_dir, int psegs)
552{ 467{
553 struct bio_vec *bvec, *bvprv = NULL; 468 struct bio_vec bvec, bvprv;
469 struct bvec_iter iter;
554 struct scatterlist *sg = NULL; 470 struct scatterlist *sg = NULL;
555 int i, length = 0, nsegs = 0, split_len = bio->bi_size; 471 int length = 0, nsegs = 0, split_len = bio->bi_iter.bi_size;
472 int first = 1;
556 473
557 if (nvmeq->dev->stripe_size) 474 if (nvmeq->dev->stripe_size)
558 split_len = nvmeq->dev->stripe_size - 475 split_len = nvmeq->dev->stripe_size -
559 ((bio->bi_sector << 9) & (nvmeq->dev->stripe_size - 1)); 476 ((bio->bi_iter.bi_sector << 9) &
477 (nvmeq->dev->stripe_size - 1));
560 478
561 sg_init_table(iod->sg, psegs); 479 sg_init_table(iod->sg, psegs);
562 bio_for_each_segment(bvec, bio, i) { 480 bio_for_each_segment(bvec, bio, iter) {
563 if (bvprv && BIOVEC_PHYS_MERGEABLE(bvprv, bvec)) { 481 if (!first && BIOVEC_PHYS_MERGEABLE(&bvprv, &bvec)) {
564 sg->length += bvec->bv_len; 482 sg->length += bvec.bv_len;
565 } else { 483 } else {
566 if (bvprv && BIOVEC_NOT_VIRT_MERGEABLE(bvprv, bvec)) 484 if (!first && BIOVEC_NOT_VIRT_MERGEABLE(&bvprv, &bvec))
567 return nvme_split_and_submit(bio, nvmeq, i, 485 return nvme_split_and_submit(bio, nvmeq,
568 length, 0); 486 length);
569 487
570 sg = sg ? sg + 1 : iod->sg; 488 sg = sg ? sg + 1 : iod->sg;
571 sg_set_page(sg, bvec->bv_page, bvec->bv_len, 489 sg_set_page(sg, bvec.bv_page,
572 bvec->bv_offset); 490 bvec.bv_len, bvec.bv_offset);
573 nsegs++; 491 nsegs++;
574 } 492 }
575 493
576 if (split_len - length < bvec->bv_len) 494 if (split_len - length < bvec.bv_len)
577 return nvme_split_and_submit(bio, nvmeq, i, split_len, 495 return nvme_split_and_submit(bio, nvmeq, split_len);
578 split_len - length); 496 length += bvec.bv_len;
579 length += bvec->bv_len;
580 bvprv = bvec; 497 bvprv = bvec;
498 first = 0;
581 } 499 }
582 iod->nents = nsegs; 500 iod->nents = nsegs;
583 sg_mark_end(sg); 501 sg_mark_end(sg);
584 if (dma_map_sg(nvmeq->q_dmadev, iod->sg, iod->nents, dma_dir) == 0) 502 if (dma_map_sg(nvmeq->q_dmadev, iod->sg, iod->nents, dma_dir) == 0)
585 return -ENOMEM; 503 return -ENOMEM;
586 504
587 BUG_ON(length != bio->bi_size); 505 BUG_ON(length != bio->bi_iter.bi_size);
588 return length; 506 return length;
589} 507}
590 508
@@ -608,8 +526,8 @@ static int nvme_submit_discard(struct nvme_queue *nvmeq, struct nvme_ns *ns,
608 iod->npages = 0; 526 iod->npages = 0;
609 527
610 range->cattr = cpu_to_le32(0); 528 range->cattr = cpu_to_le32(0);
611 range->nlb = cpu_to_le32(bio->bi_size >> ns->lba_shift); 529 range->nlb = cpu_to_le32(bio->bi_iter.bi_size >> ns->lba_shift);
612 range->slba = cpu_to_le64(nvme_block_nr(ns, bio->bi_sector)); 530 range->slba = cpu_to_le64(nvme_block_nr(ns, bio->bi_iter.bi_sector));
613 531
614 memset(cmnd, 0, sizeof(*cmnd)); 532 memset(cmnd, 0, sizeof(*cmnd));
615 cmnd->dsm.opcode = nvme_cmd_dsm; 533 cmnd->dsm.opcode = nvme_cmd_dsm;
@@ -674,7 +592,7 @@ static int nvme_submit_bio_queue(struct nvme_queue *nvmeq, struct nvme_ns *ns,
674 } 592 }
675 593
676 result = -ENOMEM; 594 result = -ENOMEM;
677 iod = nvme_alloc_iod(psegs, bio->bi_size, GFP_ATOMIC); 595 iod = nvme_alloc_iod(psegs, bio->bi_iter.bi_size, GFP_ATOMIC);
678 if (!iod) 596 if (!iod)
679 goto nomem; 597 goto nomem;
680 iod->private = bio; 598 iod->private = bio;
@@ -723,7 +641,7 @@ static int nvme_submit_bio_queue(struct nvme_queue *nvmeq, struct nvme_ns *ns,
723 cmnd->rw.nsid = cpu_to_le32(ns->ns_id); 641 cmnd->rw.nsid = cpu_to_le32(ns->ns_id);
724 length = nvme_setup_prps(nvmeq->dev, &cmnd->common, iod, length, 642 length = nvme_setup_prps(nvmeq->dev, &cmnd->common, iod, length,
725 GFP_ATOMIC); 643 GFP_ATOMIC);
726 cmnd->rw.slba = cpu_to_le64(nvme_block_nr(ns, bio->bi_sector)); 644 cmnd->rw.slba = cpu_to_le64(nvme_block_nr(ns, bio->bi_iter.bi_sector));
727 cmnd->rw.length = cpu_to_le16((length >> ns->lba_shift) - 1); 645 cmnd->rw.length = cpu_to_le16((length >> ns->lba_shift) - 1);
728 cmnd->rw.control = cpu_to_le16(control); 646 cmnd->rw.control = cpu_to_le16(control);
729 cmnd->rw.dsmgmt = cpu_to_le32(dsmgmt); 647 cmnd->rw.dsmgmt = cpu_to_le32(dsmgmt);
diff --git a/drivers/block/paride/pg.c b/drivers/block/paride/pg.c
index 4a27b1de5fcb..2ce3dfd7e6b9 100644
--- a/drivers/block/paride/pg.c
+++ b/drivers/block/paride/pg.c
@@ -581,7 +581,7 @@ static ssize_t pg_write(struct file *filp, const char __user *buf, size_t count,
581 581
582 if (hdr.magic != PG_MAGIC) 582 if (hdr.magic != PG_MAGIC)
583 return -EINVAL; 583 return -EINVAL;
584 if (hdr.dlen > PG_MAX_DATA) 584 if (hdr.dlen < 0 || hdr.dlen > PG_MAX_DATA)
585 return -EINVAL; 585 return -EINVAL;
586 if ((count - hs) > PG_MAX_DATA) 586 if ((count - hs) > PG_MAX_DATA)
587 return -EINVAL; 587 return -EINVAL;
diff --git a/drivers/block/pktcdvd.c b/drivers/block/pktcdvd.c
index ff8668c5efb1..a2af73db187b 100644
--- a/drivers/block/pktcdvd.c
+++ b/drivers/block/pktcdvd.c
@@ -651,7 +651,7 @@ static struct pkt_rb_node *pkt_rbtree_find(struct pktcdvd_device *pd, sector_t s
651 651
652 for (;;) { 652 for (;;) {
653 tmp = rb_entry(n, struct pkt_rb_node, rb_node); 653 tmp = rb_entry(n, struct pkt_rb_node, rb_node);
654 if (s <= tmp->bio->bi_sector) 654 if (s <= tmp->bio->bi_iter.bi_sector)
655 next = n->rb_left; 655 next = n->rb_left;
656 else 656 else
657 next = n->rb_right; 657 next = n->rb_right;
@@ -660,12 +660,12 @@ static struct pkt_rb_node *pkt_rbtree_find(struct pktcdvd_device *pd, sector_t s
660 n = next; 660 n = next;
661 } 661 }
662 662
663 if (s > tmp->bio->bi_sector) { 663 if (s > tmp->bio->bi_iter.bi_sector) {
664 tmp = pkt_rbtree_next(tmp); 664 tmp = pkt_rbtree_next(tmp);
665 if (!tmp) 665 if (!tmp)
666 return NULL; 666 return NULL;
667 } 667 }
668 BUG_ON(s > tmp->bio->bi_sector); 668 BUG_ON(s > tmp->bio->bi_iter.bi_sector);
669 return tmp; 669 return tmp;
670} 670}
671 671
@@ -676,13 +676,13 @@ static void pkt_rbtree_insert(struct pktcdvd_device *pd, struct pkt_rb_node *nod
676{ 676{
677 struct rb_node **p = &pd->bio_queue.rb_node; 677 struct rb_node **p = &pd->bio_queue.rb_node;
678 struct rb_node *parent = NULL; 678 struct rb_node *parent = NULL;
679 sector_t s = node->bio->bi_sector; 679 sector_t s = node->bio->bi_iter.bi_sector;
680 struct pkt_rb_node *tmp; 680 struct pkt_rb_node *tmp;
681 681
682 while (*p) { 682 while (*p) {
683 parent = *p; 683 parent = *p;
684 tmp = rb_entry(parent, struct pkt_rb_node, rb_node); 684 tmp = rb_entry(parent, struct pkt_rb_node, rb_node);
685 if (s < tmp->bio->bi_sector) 685 if (s < tmp->bio->bi_iter.bi_sector)
686 p = &(*p)->rb_left; 686 p = &(*p)->rb_left;
687 else 687 else
688 p = &(*p)->rb_right; 688 p = &(*p)->rb_right;
@@ -706,7 +706,9 @@ static int pkt_generic_packet(struct pktcdvd_device *pd, struct packet_command *
706 WRITE : READ, __GFP_WAIT); 706 WRITE : READ, __GFP_WAIT);
707 707
708 if (cgc->buflen) { 708 if (cgc->buflen) {
709 if (blk_rq_map_kern(q, rq, cgc->buffer, cgc->buflen, __GFP_WAIT)) 709 ret = blk_rq_map_kern(q, rq, cgc->buffer, cgc->buflen,
710 __GFP_WAIT);
711 if (ret)
710 goto out; 712 goto out;
711 } 713 }
712 714
@@ -857,7 +859,8 @@ static void pkt_iosched_process_queue(struct pktcdvd_device *pd)
857 spin_lock(&pd->iosched.lock); 859 spin_lock(&pd->iosched.lock);
858 bio = bio_list_peek(&pd->iosched.write_queue); 860 bio = bio_list_peek(&pd->iosched.write_queue);
859 spin_unlock(&pd->iosched.lock); 861 spin_unlock(&pd->iosched.lock);
860 if (bio && (bio->bi_sector == pd->iosched.last_write)) 862 if (bio && (bio->bi_iter.bi_sector ==
863 pd->iosched.last_write))
861 need_write_seek = 0; 864 need_write_seek = 0;
862 if (need_write_seek && reads_queued) { 865 if (need_write_seek && reads_queued) {
863 if (atomic_read(&pd->cdrw.pending_bios) > 0) { 866 if (atomic_read(&pd->cdrw.pending_bios) > 0) {
@@ -888,7 +891,8 @@ static void pkt_iosched_process_queue(struct pktcdvd_device *pd)
888 continue; 891 continue;
889 892
890 if (bio_data_dir(bio) == READ) 893 if (bio_data_dir(bio) == READ)
891 pd->iosched.successive_reads += bio->bi_size >> 10; 894 pd->iosched.successive_reads +=
895 bio->bi_iter.bi_size >> 10;
892 else { 896 else {
893 pd->iosched.successive_reads = 0; 897 pd->iosched.successive_reads = 0;
894 pd->iosched.last_write = bio_end_sector(bio); 898 pd->iosched.last_write = bio_end_sector(bio);
@@ -978,7 +982,7 @@ static void pkt_end_io_read(struct bio *bio, int err)
978 982
979 pkt_dbg(2, pd, "bio=%p sec0=%llx sec=%llx err=%d\n", 983 pkt_dbg(2, pd, "bio=%p sec0=%llx sec=%llx err=%d\n",
980 bio, (unsigned long long)pkt->sector, 984 bio, (unsigned long long)pkt->sector,
981 (unsigned long long)bio->bi_sector, err); 985 (unsigned long long)bio->bi_iter.bi_sector, err);
982 986
983 if (err) 987 if (err)
984 atomic_inc(&pkt->io_errors); 988 atomic_inc(&pkt->io_errors);
@@ -1026,8 +1030,9 @@ static void pkt_gather_data(struct pktcdvd_device *pd, struct packet_data *pkt)
1026 memset(written, 0, sizeof(written)); 1030 memset(written, 0, sizeof(written));
1027 spin_lock(&pkt->lock); 1031 spin_lock(&pkt->lock);
1028 bio_list_for_each(bio, &pkt->orig_bios) { 1032 bio_list_for_each(bio, &pkt->orig_bios) {
1029 int first_frame = (bio->bi_sector - pkt->sector) / (CD_FRAMESIZE >> 9); 1033 int first_frame = (bio->bi_iter.bi_sector - pkt->sector) /
1030 int num_frames = bio->bi_size / CD_FRAMESIZE; 1034 (CD_FRAMESIZE >> 9);
1035 int num_frames = bio->bi_iter.bi_size / CD_FRAMESIZE;
1031 pd->stats.secs_w += num_frames * (CD_FRAMESIZE >> 9); 1036 pd->stats.secs_w += num_frames * (CD_FRAMESIZE >> 9);
1032 BUG_ON(first_frame < 0); 1037 BUG_ON(first_frame < 0);
1033 BUG_ON(first_frame + num_frames > pkt->frames); 1038 BUG_ON(first_frame + num_frames > pkt->frames);
@@ -1053,7 +1058,7 @@ static void pkt_gather_data(struct pktcdvd_device *pd, struct packet_data *pkt)
1053 1058
1054 bio = pkt->r_bios[f]; 1059 bio = pkt->r_bios[f];
1055 bio_reset(bio); 1060 bio_reset(bio);
1056 bio->bi_sector = pkt->sector + f * (CD_FRAMESIZE >> 9); 1061 bio->bi_iter.bi_sector = pkt->sector + f * (CD_FRAMESIZE >> 9);
1057 bio->bi_bdev = pd->bdev; 1062 bio->bi_bdev = pd->bdev;
1058 bio->bi_end_io = pkt_end_io_read; 1063 bio->bi_end_io = pkt_end_io_read;
1059 bio->bi_private = pkt; 1064 bio->bi_private = pkt;
@@ -1150,8 +1155,8 @@ static int pkt_start_recovery(struct packet_data *pkt)
1150 bio_reset(pkt->bio); 1155 bio_reset(pkt->bio);
1151 pkt->bio->bi_bdev = pd->bdev; 1156 pkt->bio->bi_bdev = pd->bdev;
1152 pkt->bio->bi_rw = REQ_WRITE; 1157 pkt->bio->bi_rw = REQ_WRITE;
1153 pkt->bio->bi_sector = new_sector; 1158 pkt->bio->bi_iter.bi_sector = new_sector;
1154 pkt->bio->bi_size = pkt->frames * CD_FRAMESIZE; 1159 pkt->bio->bi_iter.bi_size = pkt->frames * CD_FRAMESIZE;
1155 pkt->bio->bi_vcnt = pkt->frames; 1160 pkt->bio->bi_vcnt = pkt->frames;
1156 1161
1157 pkt->bio->bi_end_io = pkt_end_io_packet_write; 1162 pkt->bio->bi_end_io = pkt_end_io_packet_write;
@@ -1213,7 +1218,7 @@ static int pkt_handle_queue(struct pktcdvd_device *pd)
1213 node = first_node; 1218 node = first_node;
1214 while (node) { 1219 while (node) {
1215 bio = node->bio; 1220 bio = node->bio;
1216 zone = get_zone(bio->bi_sector, pd); 1221 zone = get_zone(bio->bi_iter.bi_sector, pd);
1217 list_for_each_entry(p, &pd->cdrw.pkt_active_list, list) { 1222 list_for_each_entry(p, &pd->cdrw.pkt_active_list, list) {
1218 if (p->sector == zone) { 1223 if (p->sector == zone) {
1219 bio = NULL; 1224 bio = NULL;
@@ -1252,14 +1257,14 @@ try_next_bio:
1252 pkt_dbg(2, pd, "looking for zone %llx\n", (unsigned long long)zone); 1257 pkt_dbg(2, pd, "looking for zone %llx\n", (unsigned long long)zone);
1253 while ((node = pkt_rbtree_find(pd, zone)) != NULL) { 1258 while ((node = pkt_rbtree_find(pd, zone)) != NULL) {
1254 bio = node->bio; 1259 bio = node->bio;
1255 pkt_dbg(2, pd, "found zone=%llx\n", 1260 pkt_dbg(2, pd, "found zone=%llx\n", (unsigned long long)
1256 (unsigned long long)get_zone(bio->bi_sector, pd)); 1261 get_zone(bio->bi_iter.bi_sector, pd));
1257 if (get_zone(bio->bi_sector, pd) != zone) 1262 if (get_zone(bio->bi_iter.bi_sector, pd) != zone)
1258 break; 1263 break;
1259 pkt_rbtree_erase(pd, node); 1264 pkt_rbtree_erase(pd, node);
1260 spin_lock(&pkt->lock); 1265 spin_lock(&pkt->lock);
1261 bio_list_add(&pkt->orig_bios, bio); 1266 bio_list_add(&pkt->orig_bios, bio);
1262 pkt->write_size += bio->bi_size / CD_FRAMESIZE; 1267 pkt->write_size += bio->bi_iter.bi_size / CD_FRAMESIZE;
1263 spin_unlock(&pkt->lock); 1268 spin_unlock(&pkt->lock);
1264 } 1269 }
1265 /* check write congestion marks, and if bio_queue_size is 1270 /* check write congestion marks, and if bio_queue_size is
@@ -1293,7 +1298,7 @@ static void pkt_start_write(struct pktcdvd_device *pd, struct packet_data *pkt)
1293 struct bio_vec *bvec = pkt->w_bio->bi_io_vec; 1298 struct bio_vec *bvec = pkt->w_bio->bi_io_vec;
1294 1299
1295 bio_reset(pkt->w_bio); 1300 bio_reset(pkt->w_bio);
1296 pkt->w_bio->bi_sector = pkt->sector; 1301 pkt->w_bio->bi_iter.bi_sector = pkt->sector;
1297 pkt->w_bio->bi_bdev = pd->bdev; 1302 pkt->w_bio->bi_bdev = pd->bdev;
1298 pkt->w_bio->bi_end_io = pkt_end_io_packet_write; 1303 pkt->w_bio->bi_end_io = pkt_end_io_packet_write;
1299 pkt->w_bio->bi_private = pkt; 1304 pkt->w_bio->bi_private = pkt;
@@ -2335,75 +2340,29 @@ static void pkt_end_io_read_cloned(struct bio *bio, int err)
2335 pkt_bio_finished(pd); 2340 pkt_bio_finished(pd);
2336} 2341}
2337 2342
2338static void pkt_make_request(struct request_queue *q, struct bio *bio) 2343static void pkt_make_request_read(struct pktcdvd_device *pd, struct bio *bio)
2339{ 2344{
2340 struct pktcdvd_device *pd; 2345 struct bio *cloned_bio = bio_clone(bio, GFP_NOIO);
2341 char b[BDEVNAME_SIZE]; 2346 struct packet_stacked_data *psd = mempool_alloc(psd_pool, GFP_NOIO);
2347
2348 psd->pd = pd;
2349 psd->bio = bio;
2350 cloned_bio->bi_bdev = pd->bdev;
2351 cloned_bio->bi_private = psd;
2352 cloned_bio->bi_end_io = pkt_end_io_read_cloned;
2353 pd->stats.secs_r += bio_sectors(bio);
2354 pkt_queue_bio(pd, cloned_bio);
2355}
2356
2357static void pkt_make_request_write(struct request_queue *q, struct bio *bio)
2358{
2359 struct pktcdvd_device *pd = q->queuedata;
2342 sector_t zone; 2360 sector_t zone;
2343 struct packet_data *pkt; 2361 struct packet_data *pkt;
2344 int was_empty, blocked_bio; 2362 int was_empty, blocked_bio;
2345 struct pkt_rb_node *node; 2363 struct pkt_rb_node *node;
2346 2364
2347 pd = q->queuedata; 2365 zone = get_zone(bio->bi_iter.bi_sector, pd);
2348 if (!pd) {
2349 pr_err("%s incorrect request queue\n",
2350 bdevname(bio->bi_bdev, b));
2351 goto end_io;
2352 }
2353
2354 /*
2355 * Clone READ bios so we can have our own bi_end_io callback.
2356 */
2357 if (bio_data_dir(bio) == READ) {
2358 struct bio *cloned_bio = bio_clone(bio, GFP_NOIO);
2359 struct packet_stacked_data *psd = mempool_alloc(psd_pool, GFP_NOIO);
2360
2361 psd->pd = pd;
2362 psd->bio = bio;
2363 cloned_bio->bi_bdev = pd->bdev;
2364 cloned_bio->bi_private = psd;
2365 cloned_bio->bi_end_io = pkt_end_io_read_cloned;
2366 pd->stats.secs_r += bio_sectors(bio);
2367 pkt_queue_bio(pd, cloned_bio);
2368 return;
2369 }
2370
2371 if (!test_bit(PACKET_WRITABLE, &pd->flags)) {
2372 pkt_notice(pd, "WRITE for ro device (%llu)\n",
2373 (unsigned long long)bio->bi_sector);
2374 goto end_io;
2375 }
2376
2377 if (!bio->bi_size || (bio->bi_size % CD_FRAMESIZE)) {
2378 pkt_err(pd, "wrong bio size\n");
2379 goto end_io;
2380 }
2381
2382 blk_queue_bounce(q, &bio);
2383
2384 zone = get_zone(bio->bi_sector, pd);
2385 pkt_dbg(2, pd, "start = %6llx stop = %6llx\n",
2386 (unsigned long long)bio->bi_sector,
2387 (unsigned long long)bio_end_sector(bio));
2388
2389 /* Check if we have to split the bio */
2390 {
2391 struct bio_pair *bp;
2392 sector_t last_zone;
2393 int first_sectors;
2394
2395 last_zone = get_zone(bio_end_sector(bio) - 1, pd);
2396 if (last_zone != zone) {
2397 BUG_ON(last_zone != zone + pd->settings.size);
2398 first_sectors = last_zone - bio->bi_sector;
2399 bp = bio_split(bio, first_sectors);
2400 BUG_ON(!bp);
2401 pkt_make_request(q, &bp->bio1);
2402 pkt_make_request(q, &bp->bio2);
2403 bio_pair_release(bp);
2404 return;
2405 }
2406 }
2407 2366
2408 /* 2367 /*
2409 * If we find a matching packet in state WAITING or READ_WAIT, we can 2368 * If we find a matching packet in state WAITING or READ_WAIT, we can
@@ -2417,7 +2376,8 @@ static void pkt_make_request(struct request_queue *q, struct bio *bio)
2417 if ((pkt->state == PACKET_WAITING_STATE) || 2376 if ((pkt->state == PACKET_WAITING_STATE) ||
2418 (pkt->state == PACKET_READ_WAIT_STATE)) { 2377 (pkt->state == PACKET_READ_WAIT_STATE)) {
2419 bio_list_add(&pkt->orig_bios, bio); 2378 bio_list_add(&pkt->orig_bios, bio);
2420 pkt->write_size += bio->bi_size / CD_FRAMESIZE; 2379 pkt->write_size +=
2380 bio->bi_iter.bi_size / CD_FRAMESIZE;
2421 if ((pkt->write_size >= pkt->frames) && 2381 if ((pkt->write_size >= pkt->frames) &&
2422 (pkt->state == PACKET_WAITING_STATE)) { 2382 (pkt->state == PACKET_WAITING_STATE)) {
2423 atomic_inc(&pkt->run_sm); 2383 atomic_inc(&pkt->run_sm);
@@ -2476,6 +2436,64 @@ static void pkt_make_request(struct request_queue *q, struct bio *bio)
2476 */ 2436 */
2477 wake_up(&pd->wqueue); 2437 wake_up(&pd->wqueue);
2478 } 2438 }
2439}
2440
2441static void pkt_make_request(struct request_queue *q, struct bio *bio)
2442{
2443 struct pktcdvd_device *pd;
2444 char b[BDEVNAME_SIZE];
2445 struct bio *split;
2446
2447 pd = q->queuedata;
2448 if (!pd) {
2449 pr_err("%s incorrect request queue\n",
2450 bdevname(bio->bi_bdev, b));
2451 goto end_io;
2452 }
2453
2454 pkt_dbg(2, pd, "start = %6llx stop = %6llx\n",
2455 (unsigned long long)bio->bi_iter.bi_sector,
2456 (unsigned long long)bio_end_sector(bio));
2457
2458 /*
2459 * Clone READ bios so we can have our own bi_end_io callback.
2460 */
2461 if (bio_data_dir(bio) == READ) {
2462 pkt_make_request_read(pd, bio);
2463 return;
2464 }
2465
2466 if (!test_bit(PACKET_WRITABLE, &pd->flags)) {
2467 pkt_notice(pd, "WRITE for ro device (%llu)\n",
2468 (unsigned long long)bio->bi_iter.bi_sector);
2469 goto end_io;
2470 }
2471
2472 if (!bio->bi_iter.bi_size || (bio->bi_iter.bi_size % CD_FRAMESIZE)) {
2473 pkt_err(pd, "wrong bio size\n");
2474 goto end_io;
2475 }
2476
2477 blk_queue_bounce(q, &bio);
2478
2479 do {
2480 sector_t zone = get_zone(bio->bi_iter.bi_sector, pd);
2481 sector_t last_zone = get_zone(bio_end_sector(bio) - 1, pd);
2482
2483 if (last_zone != zone) {
2484 BUG_ON(last_zone != zone + pd->settings.size);
2485
2486 split = bio_split(bio, last_zone -
2487 bio->bi_iter.bi_sector,
2488 GFP_NOIO, fs_bio_set);
2489 bio_chain(split, bio);
2490 } else {
2491 split = bio;
2492 }
2493
2494 pkt_make_request_write(q, split);
2495 } while (split != bio);
2496
2479 return; 2497 return;
2480end_io: 2498end_io:
2481 bio_io_error(bio); 2499 bio_io_error(bio);
diff --git a/drivers/block/ps3disk.c b/drivers/block/ps3disk.c
index d754a88d7585..c120d70d3fb3 100644
--- a/drivers/block/ps3disk.c
+++ b/drivers/block/ps3disk.c
@@ -94,26 +94,25 @@ static void ps3disk_scatter_gather(struct ps3_storage_device *dev,
94{ 94{
95 unsigned int offset = 0; 95 unsigned int offset = 0;
96 struct req_iterator iter; 96 struct req_iterator iter;
97 struct bio_vec *bvec; 97 struct bio_vec bvec;
98 unsigned int i = 0; 98 unsigned int i = 0;
99 size_t size; 99 size_t size;
100 void *buf; 100 void *buf;
101 101
102 rq_for_each_segment(bvec, req, iter) { 102 rq_for_each_segment(bvec, req, iter) {
103 unsigned long flags; 103 unsigned long flags;
104 dev_dbg(&dev->sbd.core, 104 dev_dbg(&dev->sbd.core, "%s:%u: bio %u: %u sectors from %lu\n",
105 "%s:%u: bio %u: %u segs %u sectors from %lu\n", 105 __func__, __LINE__, i, bio_sectors(iter.bio),
106 __func__, __LINE__, i, bio_segments(iter.bio), 106 iter.bio->bi_iter.bi_sector);
107 bio_sectors(iter.bio), iter.bio->bi_sector);
108 107
109 size = bvec->bv_len; 108 size = bvec.bv_len;
110 buf = bvec_kmap_irq(bvec, &flags); 109 buf = bvec_kmap_irq(&bvec, &flags);
111 if (gather) 110 if (gather)
112 memcpy(dev->bounce_buf+offset, buf, size); 111 memcpy(dev->bounce_buf+offset, buf, size);
113 else 112 else
114 memcpy(buf, dev->bounce_buf+offset, size); 113 memcpy(buf, dev->bounce_buf+offset, size);
115 offset += size; 114 offset += size;
116 flush_kernel_dcache_page(bvec->bv_page); 115 flush_kernel_dcache_page(bvec.bv_page);
117 bvec_kunmap_irq(buf, &flags); 116 bvec_kunmap_irq(buf, &flags);
118 i++; 117 i++;
119 } 118 }
@@ -130,7 +129,7 @@ static int ps3disk_submit_request_sg(struct ps3_storage_device *dev,
130 129
131#ifdef DEBUG 130#ifdef DEBUG
132 unsigned int n = 0; 131 unsigned int n = 0;
133 struct bio_vec *bv; 132 struct bio_vec bv;
134 struct req_iterator iter; 133 struct req_iterator iter;
135 134
136 rq_for_each_segment(bv, req, iter) 135 rq_for_each_segment(bv, req, iter)
diff --git a/drivers/block/ps3vram.c b/drivers/block/ps3vram.c
index 06a2e53e5f37..ef45cfb98fd2 100644
--- a/drivers/block/ps3vram.c
+++ b/drivers/block/ps3vram.c
@@ -553,16 +553,16 @@ static struct bio *ps3vram_do_bio(struct ps3_system_bus_device *dev,
553 struct ps3vram_priv *priv = ps3_system_bus_get_drvdata(dev); 553 struct ps3vram_priv *priv = ps3_system_bus_get_drvdata(dev);
554 int write = bio_data_dir(bio) == WRITE; 554 int write = bio_data_dir(bio) == WRITE;
555 const char *op = write ? "write" : "read"; 555 const char *op = write ? "write" : "read";
556 loff_t offset = bio->bi_sector << 9; 556 loff_t offset = bio->bi_iter.bi_sector << 9;
557 int error = 0; 557 int error = 0;
558 struct bio_vec *bvec; 558 struct bio_vec bvec;
559 unsigned int i; 559 struct bvec_iter iter;
560 struct bio *next; 560 struct bio *next;
561 561
562 bio_for_each_segment(bvec, bio, i) { 562 bio_for_each_segment(bvec, bio, iter) {
563 /* PS3 is ppc64, so we don't handle highmem */ 563 /* PS3 is ppc64, so we don't handle highmem */
564 char *ptr = page_address(bvec->bv_page) + bvec->bv_offset; 564 char *ptr = page_address(bvec.bv_page) + bvec.bv_offset;
565 size_t len = bvec->bv_len, retlen; 565 size_t len = bvec.bv_len, retlen;
566 566
567 dev_dbg(&dev->core, " %s %zu bytes at offset %llu\n", op, 567 dev_dbg(&dev->core, " %s %zu bytes at offset %llu\n", op,
568 len, offset); 568 len, offset);
diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c
index cb1db2979d3d..b365e0dfccb6 100644
--- a/drivers/block/rbd.c
+++ b/drivers/block/rbd.c
@@ -41,6 +41,7 @@
41#include <linux/fs.h> 41#include <linux/fs.h>
42#include <linux/blkdev.h> 42#include <linux/blkdev.h>
43#include <linux/slab.h> 43#include <linux/slab.h>
44#include <linux/idr.h>
44 45
45#include "rbd_types.h" 46#include "rbd_types.h"
46 47
@@ -89,9 +90,9 @@ static int atomic_dec_return_safe(atomic_t *v)
89} 90}
90 91
91#define RBD_DRV_NAME "rbd" 92#define RBD_DRV_NAME "rbd"
92#define RBD_DRV_NAME_LONG "rbd (rados block device)"
93 93
94#define RBD_MINORS_PER_MAJOR 256 /* max minors per blkdev */ 94#define RBD_MINORS_PER_MAJOR 256
95#define RBD_SINGLE_MAJOR_PART_SHIFT 4
95 96
96#define RBD_SNAP_DEV_NAME_PREFIX "snap_" 97#define RBD_SNAP_DEV_NAME_PREFIX "snap_"
97#define RBD_MAX_SNAP_NAME_LEN \ 98#define RBD_MAX_SNAP_NAME_LEN \
@@ -323,6 +324,7 @@ struct rbd_device {
323 int dev_id; /* blkdev unique id */ 324 int dev_id; /* blkdev unique id */
324 325
325 int major; /* blkdev assigned major */ 326 int major; /* blkdev assigned major */
327 int minor;
326 struct gendisk *disk; /* blkdev's gendisk and rq */ 328 struct gendisk *disk; /* blkdev's gendisk and rq */
327 329
328 u32 image_format; /* Either 1 or 2 */ 330 u32 image_format; /* Either 1 or 2 */
@@ -386,6 +388,17 @@ static struct kmem_cache *rbd_img_request_cache;
386static struct kmem_cache *rbd_obj_request_cache; 388static struct kmem_cache *rbd_obj_request_cache;
387static struct kmem_cache *rbd_segment_name_cache; 389static struct kmem_cache *rbd_segment_name_cache;
388 390
391static int rbd_major;
392static DEFINE_IDA(rbd_dev_id_ida);
393
394/*
395 * Default to false for now, as single-major requires >= 0.75 version of
396 * userspace rbd utility.
397 */
398static bool single_major = false;
399module_param(single_major, bool, S_IRUGO);
400MODULE_PARM_DESC(single_major, "Use a single major number for all rbd devices (default: false)");
401
389static int rbd_img_request_submit(struct rbd_img_request *img_request); 402static int rbd_img_request_submit(struct rbd_img_request *img_request);
390 403
391static void rbd_dev_device_release(struct device *dev); 404static void rbd_dev_device_release(struct device *dev);
@@ -394,18 +407,52 @@ static ssize_t rbd_add(struct bus_type *bus, const char *buf,
394 size_t count); 407 size_t count);
395static ssize_t rbd_remove(struct bus_type *bus, const char *buf, 408static ssize_t rbd_remove(struct bus_type *bus, const char *buf,
396 size_t count); 409 size_t count);
410static ssize_t rbd_add_single_major(struct bus_type *bus, const char *buf,
411 size_t count);
412static ssize_t rbd_remove_single_major(struct bus_type *bus, const char *buf,
413 size_t count);
397static int rbd_dev_image_probe(struct rbd_device *rbd_dev, bool mapping); 414static int rbd_dev_image_probe(struct rbd_device *rbd_dev, bool mapping);
398static void rbd_spec_put(struct rbd_spec *spec); 415static void rbd_spec_put(struct rbd_spec *spec);
399 416
417static int rbd_dev_id_to_minor(int dev_id)
418{
419 return dev_id << RBD_SINGLE_MAJOR_PART_SHIFT;
420}
421
422static int minor_to_rbd_dev_id(int minor)
423{
424 return minor >> RBD_SINGLE_MAJOR_PART_SHIFT;
425}
426
400static BUS_ATTR(add, S_IWUSR, NULL, rbd_add); 427static BUS_ATTR(add, S_IWUSR, NULL, rbd_add);
401static BUS_ATTR(remove, S_IWUSR, NULL, rbd_remove); 428static BUS_ATTR(remove, S_IWUSR, NULL, rbd_remove);
429static BUS_ATTR(add_single_major, S_IWUSR, NULL, rbd_add_single_major);
430static BUS_ATTR(remove_single_major, S_IWUSR, NULL, rbd_remove_single_major);
402 431
403static struct attribute *rbd_bus_attrs[] = { 432static struct attribute *rbd_bus_attrs[] = {
404 &bus_attr_add.attr, 433 &bus_attr_add.attr,
405 &bus_attr_remove.attr, 434 &bus_attr_remove.attr,
435 &bus_attr_add_single_major.attr,
436 &bus_attr_remove_single_major.attr,
406 NULL, 437 NULL,
407}; 438};
408ATTRIBUTE_GROUPS(rbd_bus); 439
440static umode_t rbd_bus_is_visible(struct kobject *kobj,
441 struct attribute *attr, int index)
442{
443 if (!single_major &&
444 (attr == &bus_attr_add_single_major.attr ||
445 attr == &bus_attr_remove_single_major.attr))
446 return 0;
447
448 return attr->mode;
449}
450
451static const struct attribute_group rbd_bus_group = {
452 .attrs = rbd_bus_attrs,
453 .is_visible = rbd_bus_is_visible,
454};
455__ATTRIBUTE_GROUPS(rbd_bus);
409 456
410static struct bus_type rbd_bus_type = { 457static struct bus_type rbd_bus_type = {
411 .name = "rbd", 458 .name = "rbd",
@@ -1041,9 +1088,9 @@ static const char *rbd_segment_name(struct rbd_device *rbd_dev, u64 offset)
1041 name_format = "%s.%012llx"; 1088 name_format = "%s.%012llx";
1042 if (rbd_dev->image_format == 2) 1089 if (rbd_dev->image_format == 2)
1043 name_format = "%s.%016llx"; 1090 name_format = "%s.%016llx";
1044 ret = snprintf(name, MAX_OBJ_NAME_SIZE + 1, name_format, 1091 ret = snprintf(name, CEPH_MAX_OID_NAME_LEN + 1, name_format,
1045 rbd_dev->header.object_prefix, segment); 1092 rbd_dev->header.object_prefix, segment);
1046 if (ret < 0 || ret > MAX_OBJ_NAME_SIZE) { 1093 if (ret < 0 || ret > CEPH_MAX_OID_NAME_LEN) {
1047 pr_err("error formatting segment name for #%llu (%d)\n", 1094 pr_err("error formatting segment name for #%llu (%d)\n",
1048 segment, ret); 1095 segment, ret);
1049 kfree(name); 1096 kfree(name);
@@ -1109,23 +1156,23 @@ static void bio_chain_put(struct bio *chain)
1109 */ 1156 */
1110static void zero_bio_chain(struct bio *chain, int start_ofs) 1157static void zero_bio_chain(struct bio *chain, int start_ofs)
1111{ 1158{
1112 struct bio_vec *bv; 1159 struct bio_vec bv;
1160 struct bvec_iter iter;
1113 unsigned long flags; 1161 unsigned long flags;
1114 void *buf; 1162 void *buf;
1115 int i;
1116 int pos = 0; 1163 int pos = 0;
1117 1164
1118 while (chain) { 1165 while (chain) {
1119 bio_for_each_segment(bv, chain, i) { 1166 bio_for_each_segment(bv, chain, iter) {
1120 if (pos + bv->bv_len > start_ofs) { 1167 if (pos + bv.bv_len > start_ofs) {
1121 int remainder = max(start_ofs - pos, 0); 1168 int remainder = max(start_ofs - pos, 0);
1122 buf = bvec_kmap_irq(bv, &flags); 1169 buf = bvec_kmap_irq(&bv, &flags);
1123 memset(buf + remainder, 0, 1170 memset(buf + remainder, 0,
1124 bv->bv_len - remainder); 1171 bv.bv_len - remainder);
1125 flush_dcache_page(bv->bv_page); 1172 flush_dcache_page(bv.bv_page);
1126 bvec_kunmap_irq(buf, &flags); 1173 bvec_kunmap_irq(buf, &flags);
1127 } 1174 }
1128 pos += bv->bv_len; 1175 pos += bv.bv_len;
1129 } 1176 }
1130 1177
1131 chain = chain->bi_next; 1178 chain = chain->bi_next;
@@ -1173,74 +1220,14 @@ static struct bio *bio_clone_range(struct bio *bio_src,
1173 unsigned int len, 1220 unsigned int len,
1174 gfp_t gfpmask) 1221 gfp_t gfpmask)
1175{ 1222{
1176 struct bio_vec *bv;
1177 unsigned int resid;
1178 unsigned short idx;
1179 unsigned int voff;
1180 unsigned short end_idx;
1181 unsigned short vcnt;
1182 struct bio *bio; 1223 struct bio *bio;
1183 1224
1184 /* Handle the easy case for the caller */ 1225 bio = bio_clone(bio_src, gfpmask);
1185
1186 if (!offset && len == bio_src->bi_size)
1187 return bio_clone(bio_src, gfpmask);
1188
1189 if (WARN_ON_ONCE(!len))
1190 return NULL;
1191 if (WARN_ON_ONCE(len > bio_src->bi_size))
1192 return NULL;
1193 if (WARN_ON_ONCE(offset > bio_src->bi_size - len))
1194 return NULL;
1195
1196 /* Find first affected segment... */
1197
1198 resid = offset;
1199 bio_for_each_segment(bv, bio_src, idx) {
1200 if (resid < bv->bv_len)
1201 break;
1202 resid -= bv->bv_len;
1203 }
1204 voff = resid;
1205
1206 /* ...and the last affected segment */
1207
1208 resid += len;
1209 __bio_for_each_segment(bv, bio_src, end_idx, idx) {
1210 if (resid <= bv->bv_len)
1211 break;
1212 resid -= bv->bv_len;
1213 }
1214 vcnt = end_idx - idx + 1;
1215
1216 /* Build the clone */
1217
1218 bio = bio_alloc(gfpmask, (unsigned int) vcnt);
1219 if (!bio) 1226 if (!bio)
1220 return NULL; /* ENOMEM */ 1227 return NULL; /* ENOMEM */
1221 1228
1222 bio->bi_bdev = bio_src->bi_bdev; 1229 bio_advance(bio, offset);
1223 bio->bi_sector = bio_src->bi_sector + (offset >> SECTOR_SHIFT); 1230 bio->bi_iter.bi_size = len;
1224 bio->bi_rw = bio_src->bi_rw;
1225 bio->bi_flags |= 1 << BIO_CLONED;
1226
1227 /*
1228 * Copy over our part of the bio_vec, then update the first
1229 * and last (or only) entries.
1230 */
1231 memcpy(&bio->bi_io_vec[0], &bio_src->bi_io_vec[idx],
1232 vcnt * sizeof (struct bio_vec));
1233 bio->bi_io_vec[0].bv_offset += voff;
1234 if (vcnt > 1) {
1235 bio->bi_io_vec[0].bv_len -= voff;
1236 bio->bi_io_vec[vcnt - 1].bv_len = resid;
1237 } else {
1238 bio->bi_io_vec[0].bv_len = len;
1239 }
1240
1241 bio->bi_vcnt = vcnt;
1242 bio->bi_size = len;
1243 bio->bi_idx = 0;
1244 1231
1245 return bio; 1232 return bio;
1246} 1233}
@@ -1271,7 +1258,7 @@ static struct bio *bio_chain_clone_range(struct bio **bio_src,
1271 1258
1272 /* Build up a chain of clone bios up to the limit */ 1259 /* Build up a chain of clone bios up to the limit */
1273 1260
1274 if (!bi || off >= bi->bi_size || !len) 1261 if (!bi || off >= bi->bi_iter.bi_size || !len)
1275 return NULL; /* Nothing to clone */ 1262 return NULL; /* Nothing to clone */
1276 1263
1277 end = &chain; 1264 end = &chain;
@@ -1283,7 +1270,7 @@ static struct bio *bio_chain_clone_range(struct bio **bio_src,
1283 rbd_warn(NULL, "bio_chain exhausted with %u left", len); 1270 rbd_warn(NULL, "bio_chain exhausted with %u left", len);
1284 goto out_err; /* EINVAL; ran out of bio's */ 1271 goto out_err; /* EINVAL; ran out of bio's */
1285 } 1272 }
1286 bi_size = min_t(unsigned int, bi->bi_size - off, len); 1273 bi_size = min_t(unsigned int, bi->bi_iter.bi_size - off, len);
1287 bio = bio_clone_range(bi, off, bi_size, gfpmask); 1274 bio = bio_clone_range(bi, off, bi_size, gfpmask);
1288 if (!bio) 1275 if (!bio)
1289 goto out_err; /* ENOMEM */ 1276 goto out_err; /* ENOMEM */
@@ -1292,7 +1279,7 @@ static struct bio *bio_chain_clone_range(struct bio **bio_src,
1292 end = &bio->bi_next; 1279 end = &bio->bi_next;
1293 1280
1294 off += bi_size; 1281 off += bi_size;
1295 if (off == bi->bi_size) { 1282 if (off == bi->bi_iter.bi_size) {
1296 bi = bi->bi_next; 1283 bi = bi->bi_next;
1297 off = 0; 1284 off = 0;
1298 } 1285 }
@@ -1761,11 +1748,8 @@ static struct ceph_osd_request *rbd_osd_req_create(
1761 osd_req->r_callback = rbd_osd_req_callback; 1748 osd_req->r_callback = rbd_osd_req_callback;
1762 osd_req->r_priv = obj_request; 1749 osd_req->r_priv = obj_request;
1763 1750
1764 osd_req->r_oid_len = strlen(obj_request->object_name); 1751 osd_req->r_base_oloc.pool = ceph_file_layout_pg_pool(rbd_dev->layout);
1765 rbd_assert(osd_req->r_oid_len < sizeof (osd_req->r_oid)); 1752 ceph_oid_set_name(&osd_req->r_base_oid, obj_request->object_name);
1766 memcpy(osd_req->r_oid, obj_request->object_name, osd_req->r_oid_len);
1767
1768 osd_req->r_file_layout = rbd_dev->layout; /* struct */
1769 1753
1770 return osd_req; 1754 return osd_req;
1771} 1755}
@@ -1802,11 +1786,8 @@ rbd_osd_req_create_copyup(struct rbd_obj_request *obj_request)
1802 osd_req->r_callback = rbd_osd_req_callback; 1786 osd_req->r_callback = rbd_osd_req_callback;
1803 osd_req->r_priv = obj_request; 1787 osd_req->r_priv = obj_request;
1804 1788
1805 osd_req->r_oid_len = strlen(obj_request->object_name); 1789 osd_req->r_base_oloc.pool = ceph_file_layout_pg_pool(rbd_dev->layout);
1806 rbd_assert(osd_req->r_oid_len < sizeof (osd_req->r_oid)); 1790 ceph_oid_set_name(&osd_req->r_base_oid, obj_request->object_name);
1807 memcpy(osd_req->r_oid, obj_request->object_name, osd_req->r_oid_len);
1808
1809 osd_req->r_file_layout = rbd_dev->layout; /* struct */
1810 1791
1811 return osd_req; 1792 return osd_req;
1812} 1793}
@@ -2186,7 +2167,8 @@ static int rbd_img_request_fill(struct rbd_img_request *img_request,
2186 2167
2187 if (type == OBJ_REQUEST_BIO) { 2168 if (type == OBJ_REQUEST_BIO) {
2188 bio_list = data_desc; 2169 bio_list = data_desc;
2189 rbd_assert(img_offset == bio_list->bi_sector << SECTOR_SHIFT); 2170 rbd_assert(img_offset ==
2171 bio_list->bi_iter.bi_sector << SECTOR_SHIFT);
2190 } else { 2172 } else {
2191 rbd_assert(type == OBJ_REQUEST_PAGES); 2173 rbd_assert(type == OBJ_REQUEST_PAGES);
2192 pages = data_desc; 2174 pages = data_desc;
@@ -2866,7 +2848,7 @@ static void rbd_watch_cb(u64 ver, u64 notify_id, u8 opcode, void *data)
2866 * Request sync osd watch/unwatch. The value of "start" determines 2848 * Request sync osd watch/unwatch. The value of "start" determines
2867 * whether a watch request is being initiated or torn down. 2849 * whether a watch request is being initiated or torn down.
2868 */ 2850 */
2869static int rbd_dev_header_watch_sync(struct rbd_device *rbd_dev, bool start) 2851static int __rbd_dev_header_watch_sync(struct rbd_device *rbd_dev, bool start)
2870{ 2852{
2871 struct ceph_osd_client *osdc = &rbd_dev->rbd_client->client->osdc; 2853 struct ceph_osd_client *osdc = &rbd_dev->rbd_client->client->osdc;
2872 struct rbd_obj_request *obj_request; 2854 struct rbd_obj_request *obj_request;
@@ -2941,6 +2923,22 @@ out_cancel:
2941 return ret; 2923 return ret;
2942} 2924}
2943 2925
2926static int rbd_dev_header_watch_sync(struct rbd_device *rbd_dev)
2927{
2928 return __rbd_dev_header_watch_sync(rbd_dev, true);
2929}
2930
2931static void rbd_dev_header_unwatch_sync(struct rbd_device *rbd_dev)
2932{
2933 int ret;
2934
2935 ret = __rbd_dev_header_watch_sync(rbd_dev, false);
2936 if (ret) {
2937 rbd_warn(rbd_dev, "unable to tear down watch request: %d\n",
2938 ret);
2939 }
2940}
2941
2944/* 2942/*
2945 * Synchronous osd object method call. Returns the number of bytes 2943 * Synchronous osd object method call. Returns the number of bytes
2946 * returned in the outbound buffer, or a negative error code. 2944 * returned in the outbound buffer, or a negative error code.
@@ -3388,14 +3386,18 @@ static int rbd_init_disk(struct rbd_device *rbd_dev)
3388 u64 segment_size; 3386 u64 segment_size;
3389 3387
3390 /* create gendisk info */ 3388 /* create gendisk info */
3391 disk = alloc_disk(RBD_MINORS_PER_MAJOR); 3389 disk = alloc_disk(single_major ?
3390 (1 << RBD_SINGLE_MAJOR_PART_SHIFT) :
3391 RBD_MINORS_PER_MAJOR);
3392 if (!disk) 3392 if (!disk)
3393 return -ENOMEM; 3393 return -ENOMEM;
3394 3394
3395 snprintf(disk->disk_name, sizeof(disk->disk_name), RBD_DRV_NAME "%d", 3395 snprintf(disk->disk_name, sizeof(disk->disk_name), RBD_DRV_NAME "%d",
3396 rbd_dev->dev_id); 3396 rbd_dev->dev_id);
3397 disk->major = rbd_dev->major; 3397 disk->major = rbd_dev->major;
3398 disk->first_minor = 0; 3398 disk->first_minor = rbd_dev->minor;
3399 if (single_major)
3400 disk->flags |= GENHD_FL_EXT_DEVT;
3399 disk->fops = &rbd_bd_ops; 3401 disk->fops = &rbd_bd_ops;
3400 disk->private_data = rbd_dev; 3402 disk->private_data = rbd_dev;
3401 3403
@@ -3467,7 +3469,14 @@ static ssize_t rbd_major_show(struct device *dev,
3467 return sprintf(buf, "%d\n", rbd_dev->major); 3469 return sprintf(buf, "%d\n", rbd_dev->major);
3468 3470
3469 return sprintf(buf, "(none)\n"); 3471 return sprintf(buf, "(none)\n");
3472}
3473
3474static ssize_t rbd_minor_show(struct device *dev,
3475 struct device_attribute *attr, char *buf)
3476{
3477 struct rbd_device *rbd_dev = dev_to_rbd_dev(dev);
3470 3478
3479 return sprintf(buf, "%d\n", rbd_dev->minor);
3471} 3480}
3472 3481
3473static ssize_t rbd_client_id_show(struct device *dev, 3482static ssize_t rbd_client_id_show(struct device *dev,
@@ -3589,6 +3598,7 @@ static ssize_t rbd_image_refresh(struct device *dev,
3589static DEVICE_ATTR(size, S_IRUGO, rbd_size_show, NULL); 3598static DEVICE_ATTR(size, S_IRUGO, rbd_size_show, NULL);
3590static DEVICE_ATTR(features, S_IRUGO, rbd_features_show, NULL); 3599static DEVICE_ATTR(features, S_IRUGO, rbd_features_show, NULL);
3591static DEVICE_ATTR(major, S_IRUGO, rbd_major_show, NULL); 3600static DEVICE_ATTR(major, S_IRUGO, rbd_major_show, NULL);
3601static DEVICE_ATTR(minor, S_IRUGO, rbd_minor_show, NULL);
3592static DEVICE_ATTR(client_id, S_IRUGO, rbd_client_id_show, NULL); 3602static DEVICE_ATTR(client_id, S_IRUGO, rbd_client_id_show, NULL);
3593static DEVICE_ATTR(pool, S_IRUGO, rbd_pool_show, NULL); 3603static DEVICE_ATTR(pool, S_IRUGO, rbd_pool_show, NULL);
3594static DEVICE_ATTR(pool_id, S_IRUGO, rbd_pool_id_show, NULL); 3604static DEVICE_ATTR(pool_id, S_IRUGO, rbd_pool_id_show, NULL);
@@ -3602,6 +3612,7 @@ static struct attribute *rbd_attrs[] = {
3602 &dev_attr_size.attr, 3612 &dev_attr_size.attr,
3603 &dev_attr_features.attr, 3613 &dev_attr_features.attr,
3604 &dev_attr_major.attr, 3614 &dev_attr_major.attr,
3615 &dev_attr_minor.attr,
3605 &dev_attr_client_id.attr, 3616 &dev_attr_client_id.attr,
3606 &dev_attr_pool.attr, 3617 &dev_attr_pool.attr,
3607 &dev_attr_pool_id.attr, 3618 &dev_attr_pool_id.attr,
@@ -4372,21 +4383,29 @@ static void rbd_bus_del_dev(struct rbd_device *rbd_dev)
4372 device_unregister(&rbd_dev->dev); 4383 device_unregister(&rbd_dev->dev);
4373} 4384}
4374 4385
4375static atomic64_t rbd_dev_id_max = ATOMIC64_INIT(0);
4376
4377/* 4386/*
4378 * Get a unique rbd identifier for the given new rbd_dev, and add 4387 * Get a unique rbd identifier for the given new rbd_dev, and add
4379 * the rbd_dev to the global list. The minimum rbd id is 1. 4388 * the rbd_dev to the global list.
4380 */ 4389 */
4381static void rbd_dev_id_get(struct rbd_device *rbd_dev) 4390static int rbd_dev_id_get(struct rbd_device *rbd_dev)
4382{ 4391{
4383 rbd_dev->dev_id = atomic64_inc_return(&rbd_dev_id_max); 4392 int new_dev_id;
4393
4394 new_dev_id = ida_simple_get(&rbd_dev_id_ida,
4395 0, minor_to_rbd_dev_id(1 << MINORBITS),
4396 GFP_KERNEL);
4397 if (new_dev_id < 0)
4398 return new_dev_id;
4399
4400 rbd_dev->dev_id = new_dev_id;
4384 4401
4385 spin_lock(&rbd_dev_list_lock); 4402 spin_lock(&rbd_dev_list_lock);
4386 list_add_tail(&rbd_dev->node, &rbd_dev_list); 4403 list_add_tail(&rbd_dev->node, &rbd_dev_list);
4387 spin_unlock(&rbd_dev_list_lock); 4404 spin_unlock(&rbd_dev_list_lock);
4388 dout("rbd_dev %p given dev id %llu\n", rbd_dev, 4405
4389 (unsigned long long) rbd_dev->dev_id); 4406 dout("rbd_dev %p given dev id %d\n", rbd_dev, rbd_dev->dev_id);
4407
4408 return 0;
4390} 4409}
4391 4410
4392/* 4411/*
@@ -4395,49 +4414,13 @@ static void rbd_dev_id_get(struct rbd_device *rbd_dev)
4395 */ 4414 */
4396static void rbd_dev_id_put(struct rbd_device *rbd_dev) 4415static void rbd_dev_id_put(struct rbd_device *rbd_dev)
4397{ 4416{
4398 struct list_head *tmp;
4399 int rbd_id = rbd_dev->dev_id;
4400 int max_id;
4401
4402 rbd_assert(rbd_id > 0);
4403
4404 dout("rbd_dev %p released dev id %llu\n", rbd_dev,
4405 (unsigned long long) rbd_dev->dev_id);
4406 spin_lock(&rbd_dev_list_lock); 4417 spin_lock(&rbd_dev_list_lock);
4407 list_del_init(&rbd_dev->node); 4418 list_del_init(&rbd_dev->node);
4408
4409 /*
4410 * If the id being "put" is not the current maximum, there
4411 * is nothing special we need to do.
4412 */
4413 if (rbd_id != atomic64_read(&rbd_dev_id_max)) {
4414 spin_unlock(&rbd_dev_list_lock);
4415 return;
4416 }
4417
4418 /*
4419 * We need to update the current maximum id. Search the
4420 * list to find out what it is. We're more likely to find
4421 * the maximum at the end, so search the list backward.
4422 */
4423 max_id = 0;
4424 list_for_each_prev(tmp, &rbd_dev_list) {
4425 struct rbd_device *rbd_dev;
4426
4427 rbd_dev = list_entry(tmp, struct rbd_device, node);
4428 if (rbd_dev->dev_id > max_id)
4429 max_id = rbd_dev->dev_id;
4430 }
4431 spin_unlock(&rbd_dev_list_lock); 4419 spin_unlock(&rbd_dev_list_lock);
4432 4420
4433 /* 4421 ida_simple_remove(&rbd_dev_id_ida, rbd_dev->dev_id);
4434 * The max id could have been updated by rbd_dev_id_get(), in 4422
4435 * which case it now accurately reflects the new maximum. 4423 dout("rbd_dev %p released dev id %d\n", rbd_dev, rbd_dev->dev_id);
4436 * Be careful not to overwrite the maximum value in that
4437 * case.
4438 */
4439 atomic64_cmpxchg(&rbd_dev_id_max, rbd_id, max_id);
4440 dout(" max dev id has been reset\n");
4441} 4424}
4442 4425
4443/* 4426/*
@@ -4860,20 +4843,29 @@ static int rbd_dev_device_setup(struct rbd_device *rbd_dev)
4860{ 4843{
4861 int ret; 4844 int ret;
4862 4845
4863 /* generate unique id: find highest unique id, add one */ 4846 /* Get an id and fill in device name. */
4864 rbd_dev_id_get(rbd_dev); 4847
4848 ret = rbd_dev_id_get(rbd_dev);
4849 if (ret)
4850 return ret;
4865 4851
4866 /* Fill in the device name, now that we have its id. */
4867 BUILD_BUG_ON(DEV_NAME_LEN 4852 BUILD_BUG_ON(DEV_NAME_LEN
4868 < sizeof (RBD_DRV_NAME) + MAX_INT_FORMAT_WIDTH); 4853 < sizeof (RBD_DRV_NAME) + MAX_INT_FORMAT_WIDTH);
4869 sprintf(rbd_dev->name, "%s%d", RBD_DRV_NAME, rbd_dev->dev_id); 4854 sprintf(rbd_dev->name, "%s%d", RBD_DRV_NAME, rbd_dev->dev_id);
4870 4855
4871 /* Get our block major device number. */ 4856 /* Record our major and minor device numbers. */
4872 4857
4873 ret = register_blkdev(0, rbd_dev->name); 4858 if (!single_major) {
4874 if (ret < 0) 4859 ret = register_blkdev(0, rbd_dev->name);
4875 goto err_out_id; 4860 if (ret < 0)
4876 rbd_dev->major = ret; 4861 goto err_out_id;
4862
4863 rbd_dev->major = ret;
4864 rbd_dev->minor = 0;
4865 } else {
4866 rbd_dev->major = rbd_major;
4867 rbd_dev->minor = rbd_dev_id_to_minor(rbd_dev->dev_id);
4868 }
4877 4869
4878 /* Set up the blkdev mapping. */ 4870 /* Set up the blkdev mapping. */
4879 4871
@@ -4905,7 +4897,8 @@ err_out_mapping:
4905err_out_disk: 4897err_out_disk:
4906 rbd_free_disk(rbd_dev); 4898 rbd_free_disk(rbd_dev);
4907err_out_blkdev: 4899err_out_blkdev:
4908 unregister_blkdev(rbd_dev->major, rbd_dev->name); 4900 if (!single_major)
4901 unregister_blkdev(rbd_dev->major, rbd_dev->name);
4909err_out_id: 4902err_out_id:
4910 rbd_dev_id_put(rbd_dev); 4903 rbd_dev_id_put(rbd_dev);
4911 rbd_dev_mapping_clear(rbd_dev); 4904 rbd_dev_mapping_clear(rbd_dev);
@@ -4961,7 +4954,6 @@ static void rbd_dev_image_release(struct rbd_device *rbd_dev)
4961static int rbd_dev_image_probe(struct rbd_device *rbd_dev, bool mapping) 4954static int rbd_dev_image_probe(struct rbd_device *rbd_dev, bool mapping)
4962{ 4955{
4963 int ret; 4956 int ret;
4964 int tmp;
4965 4957
4966 /* 4958 /*
4967 * Get the id from the image id object. Unless there's an 4959 * Get the id from the image id object. Unless there's an
@@ -4980,7 +4972,7 @@ static int rbd_dev_image_probe(struct rbd_device *rbd_dev, bool mapping)
4980 goto err_out_format; 4972 goto err_out_format;
4981 4973
4982 if (mapping) { 4974 if (mapping) {
4983 ret = rbd_dev_header_watch_sync(rbd_dev, true); 4975 ret = rbd_dev_header_watch_sync(rbd_dev);
4984 if (ret) 4976 if (ret)
4985 goto out_header_name; 4977 goto out_header_name;
4986 } 4978 }
@@ -5007,12 +4999,8 @@ static int rbd_dev_image_probe(struct rbd_device *rbd_dev, bool mapping)
5007err_out_probe: 4999err_out_probe:
5008 rbd_dev_unprobe(rbd_dev); 5000 rbd_dev_unprobe(rbd_dev);
5009err_out_watch: 5001err_out_watch:
5010 if (mapping) { 5002 if (mapping)
5011 tmp = rbd_dev_header_watch_sync(rbd_dev, false); 5003 rbd_dev_header_unwatch_sync(rbd_dev);
5012 if (tmp)
5013 rbd_warn(rbd_dev, "unable to tear down "
5014 "watch request (%d)\n", tmp);
5015 }
5016out_header_name: 5004out_header_name:
5017 kfree(rbd_dev->header_name); 5005 kfree(rbd_dev->header_name);
5018 rbd_dev->header_name = NULL; 5006 rbd_dev->header_name = NULL;
@@ -5026,9 +5014,9 @@ err_out_format:
5026 return ret; 5014 return ret;
5027} 5015}
5028 5016
5029static ssize_t rbd_add(struct bus_type *bus, 5017static ssize_t do_rbd_add(struct bus_type *bus,
5030 const char *buf, 5018 const char *buf,
5031 size_t count) 5019 size_t count)
5032{ 5020{
5033 struct rbd_device *rbd_dev = NULL; 5021 struct rbd_device *rbd_dev = NULL;
5034 struct ceph_options *ceph_opts = NULL; 5022 struct ceph_options *ceph_opts = NULL;
@@ -5090,6 +5078,12 @@ static ssize_t rbd_add(struct bus_type *bus,
5090 5078
5091 rc = rbd_dev_device_setup(rbd_dev); 5079 rc = rbd_dev_device_setup(rbd_dev);
5092 if (rc) { 5080 if (rc) {
5081 /*
5082 * rbd_dev_header_unwatch_sync() can't be moved into
5083 * rbd_dev_image_release() without refactoring, see
5084 * commit 1f3ef78861ac.
5085 */
5086 rbd_dev_header_unwatch_sync(rbd_dev);
5093 rbd_dev_image_release(rbd_dev); 5087 rbd_dev_image_release(rbd_dev);
5094 goto err_out_module; 5088 goto err_out_module;
5095 } 5089 }
@@ -5110,6 +5104,23 @@ err_out_module:
5110 return (ssize_t)rc; 5104 return (ssize_t)rc;
5111} 5105}
5112 5106
5107static ssize_t rbd_add(struct bus_type *bus,
5108 const char *buf,
5109 size_t count)
5110{
5111 if (single_major)
5112 return -EINVAL;
5113
5114 return do_rbd_add(bus, buf, count);
5115}
5116
5117static ssize_t rbd_add_single_major(struct bus_type *bus,
5118 const char *buf,
5119 size_t count)
5120{
5121 return do_rbd_add(bus, buf, count);
5122}
5123
5113static void rbd_dev_device_release(struct device *dev) 5124static void rbd_dev_device_release(struct device *dev)
5114{ 5125{
5115 struct rbd_device *rbd_dev = dev_to_rbd_dev(dev); 5126 struct rbd_device *rbd_dev = dev_to_rbd_dev(dev);
@@ -5117,8 +5128,8 @@ static void rbd_dev_device_release(struct device *dev)
5117 rbd_free_disk(rbd_dev); 5128 rbd_free_disk(rbd_dev);
5118 clear_bit(RBD_DEV_FLAG_EXISTS, &rbd_dev->flags); 5129 clear_bit(RBD_DEV_FLAG_EXISTS, &rbd_dev->flags);
5119 rbd_dev_mapping_clear(rbd_dev); 5130 rbd_dev_mapping_clear(rbd_dev);
5120 unregister_blkdev(rbd_dev->major, rbd_dev->name); 5131 if (!single_major)
5121 rbd_dev->major = 0; 5132 unregister_blkdev(rbd_dev->major, rbd_dev->name);
5122 rbd_dev_id_put(rbd_dev); 5133 rbd_dev_id_put(rbd_dev);
5123 rbd_dev_mapping_clear(rbd_dev); 5134 rbd_dev_mapping_clear(rbd_dev);
5124} 5135}
@@ -5149,9 +5160,9 @@ static void rbd_dev_remove_parent(struct rbd_device *rbd_dev)
5149 } 5160 }
5150} 5161}
5151 5162
5152static ssize_t rbd_remove(struct bus_type *bus, 5163static ssize_t do_rbd_remove(struct bus_type *bus,
5153 const char *buf, 5164 const char *buf,
5154 size_t count) 5165 size_t count)
5155{ 5166{
5156 struct rbd_device *rbd_dev = NULL; 5167 struct rbd_device *rbd_dev = NULL;
5157 struct list_head *tmp; 5168 struct list_head *tmp;
@@ -5191,16 +5202,14 @@ static ssize_t rbd_remove(struct bus_type *bus,
5191 if (ret < 0 || already) 5202 if (ret < 0 || already)
5192 return ret; 5203 return ret;
5193 5204
5194 ret = rbd_dev_header_watch_sync(rbd_dev, false); 5205 rbd_dev_header_unwatch_sync(rbd_dev);
5195 if (ret)
5196 rbd_warn(rbd_dev, "failed to cancel watch event (%d)\n", ret);
5197
5198 /* 5206 /*
5199 * flush remaining watch callbacks - these must be complete 5207 * flush remaining watch callbacks - these must be complete
5200 * before the osd_client is shutdown 5208 * before the osd_client is shutdown
5201 */ 5209 */
5202 dout("%s: flushing notifies", __func__); 5210 dout("%s: flushing notifies", __func__);
5203 ceph_osdc_flush_notifies(&rbd_dev->rbd_client->client->osdc); 5211 ceph_osdc_flush_notifies(&rbd_dev->rbd_client->client->osdc);
5212
5204 /* 5213 /*
5205 * Don't free anything from rbd_dev->disk until after all 5214 * Don't free anything from rbd_dev->disk until after all
5206 * notifies are completely processed. Otherwise 5215 * notifies are completely processed. Otherwise
@@ -5214,6 +5223,23 @@ static ssize_t rbd_remove(struct bus_type *bus,
5214 return count; 5223 return count;
5215} 5224}
5216 5225
5226static ssize_t rbd_remove(struct bus_type *bus,
5227 const char *buf,
5228 size_t count)
5229{
5230 if (single_major)
5231 return -EINVAL;
5232
5233 return do_rbd_remove(bus, buf, count);
5234}
5235
5236static ssize_t rbd_remove_single_major(struct bus_type *bus,
5237 const char *buf,
5238 size_t count)
5239{
5240 return do_rbd_remove(bus, buf, count);
5241}
5242
5217/* 5243/*
5218 * create control files in sysfs 5244 * create control files in sysfs
5219 * /sys/bus/rbd/... 5245 * /sys/bus/rbd/...
@@ -5259,7 +5285,7 @@ static int rbd_slab_init(void)
5259 5285
5260 rbd_assert(!rbd_segment_name_cache); 5286 rbd_assert(!rbd_segment_name_cache);
5261 rbd_segment_name_cache = kmem_cache_create("rbd_segment_name", 5287 rbd_segment_name_cache = kmem_cache_create("rbd_segment_name",
5262 MAX_OBJ_NAME_SIZE + 1, 1, 0, NULL); 5288 CEPH_MAX_OID_NAME_LEN + 1, 1, 0, NULL);
5263 if (rbd_segment_name_cache) 5289 if (rbd_segment_name_cache)
5264 return 0; 5290 return 0;
5265out_err: 5291out_err:
@@ -5295,24 +5321,45 @@ static int __init rbd_init(void)
5295 5321
5296 if (!libceph_compatible(NULL)) { 5322 if (!libceph_compatible(NULL)) {
5297 rbd_warn(NULL, "libceph incompatibility (quitting)"); 5323 rbd_warn(NULL, "libceph incompatibility (quitting)");
5298
5299 return -EINVAL; 5324 return -EINVAL;
5300 } 5325 }
5326
5301 rc = rbd_slab_init(); 5327 rc = rbd_slab_init();
5302 if (rc) 5328 if (rc)
5303 return rc; 5329 return rc;
5330
5331 if (single_major) {
5332 rbd_major = register_blkdev(0, RBD_DRV_NAME);
5333 if (rbd_major < 0) {
5334 rc = rbd_major;
5335 goto err_out_slab;
5336 }
5337 }
5338
5304 rc = rbd_sysfs_init(); 5339 rc = rbd_sysfs_init();
5305 if (rc) 5340 if (rc)
5306 rbd_slab_exit(); 5341 goto err_out_blkdev;
5342
5343 if (single_major)
5344 pr_info("loaded (major %d)\n", rbd_major);
5307 else 5345 else
5308 pr_info("loaded " RBD_DRV_NAME_LONG "\n"); 5346 pr_info("loaded\n");
5347
5348 return 0;
5309 5349
5350err_out_blkdev:
5351 if (single_major)
5352 unregister_blkdev(rbd_major, RBD_DRV_NAME);
5353err_out_slab:
5354 rbd_slab_exit();
5310 return rc; 5355 return rc;
5311} 5356}
5312 5357
5313static void __exit rbd_exit(void) 5358static void __exit rbd_exit(void)
5314{ 5359{
5315 rbd_sysfs_cleanup(); 5360 rbd_sysfs_cleanup();
5361 if (single_major)
5362 unregister_blkdev(rbd_major, RBD_DRV_NAME);
5316 rbd_slab_exit(); 5363 rbd_slab_exit();
5317} 5364}
5318 5365
@@ -5322,9 +5369,8 @@ module_exit(rbd_exit);
5322MODULE_AUTHOR("Alex Elder <elder@inktank.com>"); 5369MODULE_AUTHOR("Alex Elder <elder@inktank.com>");
5323MODULE_AUTHOR("Sage Weil <sage@newdream.net>"); 5370MODULE_AUTHOR("Sage Weil <sage@newdream.net>");
5324MODULE_AUTHOR("Yehuda Sadeh <yehuda@hq.newdream.net>"); 5371MODULE_AUTHOR("Yehuda Sadeh <yehuda@hq.newdream.net>");
5325MODULE_DESCRIPTION("rados block device");
5326
5327/* following authorship retained from original osdblk.c */ 5372/* following authorship retained from original osdblk.c */
5328MODULE_AUTHOR("Jeff Garzik <jeff@garzik.org>"); 5373MODULE_AUTHOR("Jeff Garzik <jeff@garzik.org>");
5329 5374
5375MODULE_DESCRIPTION("RADOS Block Device (RBD) driver");
5330MODULE_LICENSE("GPL"); 5376MODULE_LICENSE("GPL");
diff --git a/drivers/block/rsxx/dev.c b/drivers/block/rsxx/dev.c
index 2284f5d3a54a..2839d37e5af7 100644
--- a/drivers/block/rsxx/dev.c
+++ b/drivers/block/rsxx/dev.c
@@ -174,7 +174,7 @@ static void rsxx_make_request(struct request_queue *q, struct bio *bio)
174 if (!card) 174 if (!card)
175 goto req_err; 175 goto req_err;
176 176
177 if (bio->bi_sector + (bio->bi_size >> 9) > get_capacity(card->gendisk)) 177 if (bio_end_sector(bio) > get_capacity(card->gendisk))
178 goto req_err; 178 goto req_err;
179 179
180 if (unlikely(card->halt)) { 180 if (unlikely(card->halt)) {
@@ -187,7 +187,7 @@ static void rsxx_make_request(struct request_queue *q, struct bio *bio)
187 goto req_err; 187 goto req_err;
188 } 188 }
189 189
190 if (bio->bi_size == 0) { 190 if (bio->bi_iter.bi_size == 0) {
191 dev_err(CARD_TO_DEV(card), "size zero BIO!\n"); 191 dev_err(CARD_TO_DEV(card), "size zero BIO!\n");
192 goto req_err; 192 goto req_err;
193 } 193 }
@@ -208,7 +208,7 @@ static void rsxx_make_request(struct request_queue *q, struct bio *bio)
208 208
209 dev_dbg(CARD_TO_DEV(card), "BIO[%c]: meta: %p addr8: x%llx size: %d\n", 209 dev_dbg(CARD_TO_DEV(card), "BIO[%c]: meta: %p addr8: x%llx size: %d\n",
210 bio_data_dir(bio) ? 'W' : 'R', bio_meta, 210 bio_data_dir(bio) ? 'W' : 'R', bio_meta,
211 (u64)bio->bi_sector << 9, bio->bi_size); 211 (u64)bio->bi_iter.bi_sector << 9, bio->bi_iter.bi_size);
212 212
213 st = rsxx_dma_queue_bio(card, bio, &bio_meta->pending_dmas, 213 st = rsxx_dma_queue_bio(card, bio, &bio_meta->pending_dmas,
214 bio_dma_done_cb, bio_meta); 214 bio_dma_done_cb, bio_meta);
diff --git a/drivers/block/rsxx/dma.c b/drivers/block/rsxx/dma.c
index fc88ba3e1bd2..cf8cd293abb5 100644
--- a/drivers/block/rsxx/dma.c
+++ b/drivers/block/rsxx/dma.c
@@ -684,7 +684,8 @@ int rsxx_dma_queue_bio(struct rsxx_cardinfo *card,
684 void *cb_data) 684 void *cb_data)
685{ 685{
686 struct list_head dma_list[RSXX_MAX_TARGETS]; 686 struct list_head dma_list[RSXX_MAX_TARGETS];
687 struct bio_vec *bvec; 687 struct bio_vec bvec;
688 struct bvec_iter iter;
688 unsigned long long addr8; 689 unsigned long long addr8;
689 unsigned int laddr; 690 unsigned int laddr;
690 unsigned int bv_len; 691 unsigned int bv_len;
@@ -696,7 +697,7 @@ int rsxx_dma_queue_bio(struct rsxx_cardinfo *card,
696 int st; 697 int st;
697 int i; 698 int i;
698 699
699 addr8 = bio->bi_sector << 9; /* sectors are 512 bytes */ 700 addr8 = bio->bi_iter.bi_sector << 9; /* sectors are 512 bytes */
700 atomic_set(n_dmas, 0); 701 atomic_set(n_dmas, 0);
701 702
702 for (i = 0; i < card->n_targets; i++) { 703 for (i = 0; i < card->n_targets; i++) {
@@ -705,7 +706,7 @@ int rsxx_dma_queue_bio(struct rsxx_cardinfo *card,
705 } 706 }
706 707
707 if (bio->bi_rw & REQ_DISCARD) { 708 if (bio->bi_rw & REQ_DISCARD) {
708 bv_len = bio->bi_size; 709 bv_len = bio->bi_iter.bi_size;
709 710
710 while (bv_len > 0) { 711 while (bv_len > 0) {
711 tgt = rsxx_get_dma_tgt(card, addr8); 712 tgt = rsxx_get_dma_tgt(card, addr8);
@@ -722,9 +723,9 @@ int rsxx_dma_queue_bio(struct rsxx_cardinfo *card,
722 bv_len -= RSXX_HW_BLK_SIZE; 723 bv_len -= RSXX_HW_BLK_SIZE;
723 } 724 }
724 } else { 725 } else {
725 bio_for_each_segment(bvec, bio, i) { 726 bio_for_each_segment(bvec, bio, iter) {
726 bv_len = bvec->bv_len; 727 bv_len = bvec.bv_len;
727 bv_off = bvec->bv_offset; 728 bv_off = bvec.bv_offset;
728 729
729 while (bv_len > 0) { 730 while (bv_len > 0) {
730 tgt = rsxx_get_dma_tgt(card, addr8); 731 tgt = rsxx_get_dma_tgt(card, addr8);
@@ -736,7 +737,7 @@ int rsxx_dma_queue_bio(struct rsxx_cardinfo *card,
736 st = rsxx_queue_dma(card, &dma_list[tgt], 737 st = rsxx_queue_dma(card, &dma_list[tgt],
737 bio_data_dir(bio), 738 bio_data_dir(bio),
738 dma_off, dma_len, 739 dma_off, dma_len,
739 laddr, bvec->bv_page, 740 laddr, bvec.bv_page,
740 bv_off, cb, cb_data); 741 bv_off, cb, cb_data);
741 if (st) 742 if (st)
742 goto bvec_err; 743 goto bvec_err;
diff --git a/drivers/block/sx8.c b/drivers/block/sx8.c
index 3fb6ab4c8b4e..d5e2d12b9d9e 100644
--- a/drivers/block/sx8.c
+++ b/drivers/block/sx8.c
@@ -1744,20 +1744,6 @@ static void carm_remove_one (struct pci_dev *pdev)
1744 kfree(host); 1744 kfree(host);
1745 pci_release_regions(pdev); 1745 pci_release_regions(pdev);
1746 pci_disable_device(pdev); 1746 pci_disable_device(pdev);
1747 pci_set_drvdata(pdev, NULL);
1748} 1747}
1749 1748
1750static int __init carm_init(void) 1749module_pci_driver(carm_driver);
1751{
1752 return pci_register_driver(&carm_driver);
1753}
1754
1755static void __exit carm_exit(void)
1756{
1757 pci_unregister_driver(&carm_driver);
1758}
1759
1760module_init(carm_init);
1761module_exit(carm_exit);
1762
1763
diff --git a/drivers/block/umem.c b/drivers/block/umem.c
index ad70868f8a96..4cf81b5bf0f7 100644
--- a/drivers/block/umem.c
+++ b/drivers/block/umem.c
@@ -108,8 +108,7 @@ struct cardinfo {
108 * have been written 108 * have been written
109 */ 109 */
110 struct bio *bio, *currentbio, **biotail; 110 struct bio *bio, *currentbio, **biotail;
111 int current_idx; 111 struct bvec_iter current_iter;
112 sector_t current_sector;
113 112
114 struct request_queue *queue; 113 struct request_queue *queue;
115 114
@@ -118,7 +117,7 @@ struct cardinfo {
118 struct mm_dma_desc *desc; 117 struct mm_dma_desc *desc;
119 int cnt, headcnt; 118 int cnt, headcnt;
120 struct bio *bio, **biotail; 119 struct bio *bio, **biotail;
121 int idx; 120 struct bvec_iter iter;
122 } mm_pages[2]; 121 } mm_pages[2];
123#define DESC_PER_PAGE ((PAGE_SIZE*2)/sizeof(struct mm_dma_desc)) 122#define DESC_PER_PAGE ((PAGE_SIZE*2)/sizeof(struct mm_dma_desc))
124 123
@@ -344,16 +343,13 @@ static int add_bio(struct cardinfo *card)
344 dma_addr_t dma_handle; 343 dma_addr_t dma_handle;
345 int offset; 344 int offset;
346 struct bio *bio; 345 struct bio *bio;
347 struct bio_vec *vec; 346 struct bio_vec vec;
348 int idx;
349 int rw; 347 int rw;
350 int len;
351 348
352 bio = card->currentbio; 349 bio = card->currentbio;
353 if (!bio && card->bio) { 350 if (!bio && card->bio) {
354 card->currentbio = card->bio; 351 card->currentbio = card->bio;
355 card->current_idx = card->bio->bi_idx; 352 card->current_iter = card->bio->bi_iter;
356 card->current_sector = card->bio->bi_sector;
357 card->bio = card->bio->bi_next; 353 card->bio = card->bio->bi_next;
358 if (card->bio == NULL) 354 if (card->bio == NULL)
359 card->biotail = &card->bio; 355 card->biotail = &card->bio;
@@ -362,18 +358,17 @@ static int add_bio(struct cardinfo *card)
362 } 358 }
363 if (!bio) 359 if (!bio)
364 return 0; 360 return 0;
365 idx = card->current_idx;
366 361
367 rw = bio_rw(bio); 362 rw = bio_rw(bio);
368 if (card->mm_pages[card->Ready].cnt >= DESC_PER_PAGE) 363 if (card->mm_pages[card->Ready].cnt >= DESC_PER_PAGE)
369 return 0; 364 return 0;
370 365
371 vec = bio_iovec_idx(bio, idx); 366 vec = bio_iter_iovec(bio, card->current_iter);
372 len = vec->bv_len; 367
373 dma_handle = pci_map_page(card->dev, 368 dma_handle = pci_map_page(card->dev,
374 vec->bv_page, 369 vec.bv_page,
375 vec->bv_offset, 370 vec.bv_offset,
376 len, 371 vec.bv_len,
377 (rw == READ) ? 372 (rw == READ) ?
378 PCI_DMA_FROMDEVICE : PCI_DMA_TODEVICE); 373 PCI_DMA_FROMDEVICE : PCI_DMA_TODEVICE);
379 374
@@ -381,7 +376,7 @@ static int add_bio(struct cardinfo *card)
381 desc = &p->desc[p->cnt]; 376 desc = &p->desc[p->cnt];
382 p->cnt++; 377 p->cnt++;
383 if (p->bio == NULL) 378 if (p->bio == NULL)
384 p->idx = idx; 379 p->iter = card->current_iter;
385 if ((p->biotail) != &bio->bi_next) { 380 if ((p->biotail) != &bio->bi_next) {
386 *(p->biotail) = bio; 381 *(p->biotail) = bio;
387 p->biotail = &(bio->bi_next); 382 p->biotail = &(bio->bi_next);
@@ -391,8 +386,8 @@ static int add_bio(struct cardinfo *card)
391 desc->data_dma_handle = dma_handle; 386 desc->data_dma_handle = dma_handle;
392 387
393 desc->pci_addr = cpu_to_le64((u64)desc->data_dma_handle); 388 desc->pci_addr = cpu_to_le64((u64)desc->data_dma_handle);
394 desc->local_addr = cpu_to_le64(card->current_sector << 9); 389 desc->local_addr = cpu_to_le64(card->current_iter.bi_sector << 9);
395 desc->transfer_size = cpu_to_le32(len); 390 desc->transfer_size = cpu_to_le32(vec.bv_len);
396 offset = (((char *)&desc->sem_control_bits) - ((char *)p->desc)); 391 offset = (((char *)&desc->sem_control_bits) - ((char *)p->desc));
397 desc->sem_addr = cpu_to_le64((u64)(p->page_dma+offset)); 392 desc->sem_addr = cpu_to_le64((u64)(p->page_dma+offset));
398 desc->zero1 = desc->zero2 = 0; 393 desc->zero1 = desc->zero2 = 0;
@@ -407,10 +402,9 @@ static int add_bio(struct cardinfo *card)
407 desc->control_bits |= cpu_to_le32(DMASCR_TRANSFER_READ); 402 desc->control_bits |= cpu_to_le32(DMASCR_TRANSFER_READ);
408 desc->sem_control_bits = desc->control_bits; 403 desc->sem_control_bits = desc->control_bits;
409 404
410 card->current_sector += (len >> 9); 405
411 idx++; 406 bio_advance_iter(bio, &card->current_iter, vec.bv_len);
412 card->current_idx = idx; 407 if (!card->current_iter.bi_size)
413 if (idx >= bio->bi_vcnt)
414 card->currentbio = NULL; 408 card->currentbio = NULL;
415 409
416 return 1; 410 return 1;
@@ -439,23 +433,25 @@ static void process_page(unsigned long data)
439 struct mm_dma_desc *desc = &page->desc[page->headcnt]; 433 struct mm_dma_desc *desc = &page->desc[page->headcnt];
440 int control = le32_to_cpu(desc->sem_control_bits); 434 int control = le32_to_cpu(desc->sem_control_bits);
441 int last = 0; 435 int last = 0;
442 int idx; 436 struct bio_vec vec;
443 437
444 if (!(control & DMASCR_DMA_COMPLETE)) { 438 if (!(control & DMASCR_DMA_COMPLETE)) {
445 control = dma_status; 439 control = dma_status;
446 last = 1; 440 last = 1;
447 } 441 }
442
448 page->headcnt++; 443 page->headcnt++;
449 idx = page->idx; 444 vec = bio_iter_iovec(bio, page->iter);
450 page->idx++; 445 bio_advance_iter(bio, &page->iter, vec.bv_len);
451 if (page->idx >= bio->bi_vcnt) { 446
447 if (!page->iter.bi_size) {
452 page->bio = bio->bi_next; 448 page->bio = bio->bi_next;
453 if (page->bio) 449 if (page->bio)
454 page->idx = page->bio->bi_idx; 450 page->iter = page->bio->bi_iter;
455 } 451 }
456 452
457 pci_unmap_page(card->dev, desc->data_dma_handle, 453 pci_unmap_page(card->dev, desc->data_dma_handle,
458 bio_iovec_idx(bio, idx)->bv_len, 454 vec.bv_len,
459 (control & DMASCR_TRANSFER_READ) ? 455 (control & DMASCR_TRANSFER_READ) ?
460 PCI_DMA_TODEVICE : PCI_DMA_FROMDEVICE); 456 PCI_DMA_TODEVICE : PCI_DMA_FROMDEVICE);
461 if (control & DMASCR_HARD_ERROR) { 457 if (control & DMASCR_HARD_ERROR) {
@@ -532,7 +528,8 @@ static void mm_make_request(struct request_queue *q, struct bio *bio)
532{ 528{
533 struct cardinfo *card = q->queuedata; 529 struct cardinfo *card = q->queuedata;
534 pr_debug("mm_make_request %llu %u\n", 530 pr_debug("mm_make_request %llu %u\n",
535 (unsigned long long)bio->bi_sector, bio->bi_size); 531 (unsigned long long)bio->bi_iter.bi_sector,
532 bio->bi_iter.bi_size);
536 533
537 spin_lock_irq(&card->lock); 534 spin_lock_irq(&card->lock);
538 *card->biotail = bio; 535 *card->biotail = bio;
diff --git a/drivers/block/xen-blkback/blkback.c b/drivers/block/xen-blkback/blkback.c
index 875025f299b6..da18046d0e07 100644
--- a/drivers/block/xen-blkback/blkback.c
+++ b/drivers/block/xen-blkback/blkback.c
@@ -1254,7 +1254,7 @@ static int dispatch_rw_block_io(struct xen_blkif *blkif,
1254 bio->bi_bdev = preq.bdev; 1254 bio->bi_bdev = preq.bdev;
1255 bio->bi_private = pending_req; 1255 bio->bi_private = pending_req;
1256 bio->bi_end_io = end_block_io_op; 1256 bio->bi_end_io = end_block_io_op;
1257 bio->bi_sector = preq.sector_number; 1257 bio->bi_iter.bi_sector = preq.sector_number;
1258 } 1258 }
1259 1259
1260 preq.sector_number += seg[i].nsec; 1260 preq.sector_number += seg[i].nsec;
diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c
index f9c43f91f03e..8dcfb54f1603 100644
--- a/drivers/block/xen-blkfront.c
+++ b/drivers/block/xen-blkfront.c
@@ -1547,7 +1547,7 @@ static int blkif_recover(struct blkfront_info *info)
1547 for (i = 0; i < pending; i++) { 1547 for (i = 0; i < pending; i++) {
1548 offset = (i * segs * PAGE_SIZE) >> 9; 1548 offset = (i * segs * PAGE_SIZE) >> 9;
1549 size = min((unsigned int)(segs * PAGE_SIZE) >> 9, 1549 size = min((unsigned int)(segs * PAGE_SIZE) >> 9,
1550 (unsigned int)(bio->bi_size >> 9) - offset); 1550 (unsigned int)bio_sectors(bio) - offset);
1551 cloned_bio = bio_clone(bio, GFP_NOIO); 1551 cloned_bio = bio_clone(bio, GFP_NOIO);
1552 BUG_ON(cloned_bio == NULL); 1552 BUG_ON(cloned_bio == NULL);
1553 bio_trim(cloned_bio, offset, size); 1553 bio_trim(cloned_bio, offset, size);
diff --git a/drivers/block/z2ram.c b/drivers/block/z2ram.c
index 5a95baf4b104..27de5046708a 100644
--- a/drivers/block/z2ram.c
+++ b/drivers/block/z2ram.c
@@ -43,9 +43,6 @@
43#include <linux/zorro.h> 43#include <linux/zorro.h>
44 44
45 45
46extern int m68k_realnum_memory;
47extern struct mem_info m68k_memory[NUM_MEMINFO];
48
49#define Z2MINOR_COMBINED (0) 46#define Z2MINOR_COMBINED (0)
50#define Z2MINOR_Z2ONLY (1) 47#define Z2MINOR_Z2ONLY (1)
51#define Z2MINOR_CHIPONLY (2) 48#define Z2MINOR_CHIPONLY (2)
@@ -116,8 +113,8 @@ get_z2ram( void )
116 if ( test_bit( i, zorro_unused_z2ram ) ) 113 if ( test_bit( i, zorro_unused_z2ram ) )
117 { 114 {
118 z2_count++; 115 z2_count++;
119 z2ram_map[ z2ram_size++ ] = 116 z2ram_map[z2ram_size++] = (unsigned long)ZTWO_VADDR(Z2RAM_START) +
120 ZTWO_VADDR( Z2RAM_START ) + ( i << Z2RAM_CHUNKSHIFT ); 117 (i << Z2RAM_CHUNKSHIFT);
121 clear_bit( i, zorro_unused_z2ram ); 118 clear_bit( i, zorro_unused_z2ram );
122 } 119 }
123 } 120 }
diff --git a/drivers/block/zram/Kconfig b/drivers/block/zram/Kconfig
new file mode 100644
index 000000000000..3450be850399
--- /dev/null
+++ b/drivers/block/zram/Kconfig
@@ -0,0 +1,24 @@
1config ZRAM
2 tristate "Compressed RAM block device support"
3 depends on BLOCK && SYSFS && ZSMALLOC
4 select LZO_COMPRESS
5 select LZO_DECOMPRESS
6 default n
7 help
8 Creates virtual block devices called /dev/zramX (X = 0, 1, ...).
9 Pages written to these disks are compressed and stored in memory
10 itself. These disks allow very fast I/O and compression provides
11 good amounts of memory savings.
12
13 It has several use cases, for example: /tmp storage, use as swap
14 disks and maybe many more.
15
16 See zram.txt for more information.
17
18config ZRAM_DEBUG
19 bool "Compressed RAM block device debug support"
20 depends on ZRAM
21 default n
22 help
23 This option adds additional debugging code to the compressed
24 RAM block device driver.
diff --git a/drivers/block/zram/Makefile b/drivers/block/zram/Makefile
new file mode 100644
index 000000000000..cb0f9ced6a93
--- /dev/null
+++ b/drivers/block/zram/Makefile
@@ -0,0 +1,3 @@
1zram-y := zram_drv.o
2
3obj-$(CONFIG_ZRAM) += zram.o
diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c
new file mode 100644
index 000000000000..011e55d820b1
--- /dev/null
+++ b/drivers/block/zram/zram_drv.c
@@ -0,0 +1,958 @@
1/*
2 * Compressed RAM block device
3 *
4 * Copyright (C) 2008, 2009, 2010 Nitin Gupta
5 * 2012, 2013 Minchan Kim
6 *
7 * This code is released using a dual license strategy: BSD/GPL
8 * You can choose the licence that better fits your requirements.
9 *
10 * Released under the terms of 3-clause BSD License
11 * Released under the terms of GNU General Public License Version 2.0
12 *
13 */
14
15#define KMSG_COMPONENT "zram"
16#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
17
18#ifdef CONFIG_ZRAM_DEBUG
19#define DEBUG
20#endif
21
22#include <linux/module.h>
23#include <linux/kernel.h>
24#include <linux/bio.h>
25#include <linux/bitops.h>
26#include <linux/blkdev.h>
27#include <linux/buffer_head.h>
28#include <linux/device.h>
29#include <linux/genhd.h>
30#include <linux/highmem.h>
31#include <linux/slab.h>
32#include <linux/lzo.h>
33#include <linux/string.h>
34#include <linux/vmalloc.h>
35
36#include "zram_drv.h"
37
38/* Globals */
39static int zram_major;
40static struct zram *zram_devices;
41
42/* Module params (documentation at end) */
43static unsigned int num_devices = 1;
44
45static inline struct zram *dev_to_zram(struct device *dev)
46{
47 return (struct zram *)dev_to_disk(dev)->private_data;
48}
49
50static ssize_t disksize_show(struct device *dev,
51 struct device_attribute *attr, char *buf)
52{
53 struct zram *zram = dev_to_zram(dev);
54
55 return sprintf(buf, "%llu\n", zram->disksize);
56}
57
58static ssize_t initstate_show(struct device *dev,
59 struct device_attribute *attr, char *buf)
60{
61 struct zram *zram = dev_to_zram(dev);
62
63 return sprintf(buf, "%u\n", zram->init_done);
64}
65
66static ssize_t num_reads_show(struct device *dev,
67 struct device_attribute *attr, char *buf)
68{
69 struct zram *zram = dev_to_zram(dev);
70
71 return sprintf(buf, "%llu\n",
72 (u64)atomic64_read(&zram->stats.num_reads));
73}
74
75static ssize_t num_writes_show(struct device *dev,
76 struct device_attribute *attr, char *buf)
77{
78 struct zram *zram = dev_to_zram(dev);
79
80 return sprintf(buf, "%llu\n",
81 (u64)atomic64_read(&zram->stats.num_writes));
82}
83
84static ssize_t invalid_io_show(struct device *dev,
85 struct device_attribute *attr, char *buf)
86{
87 struct zram *zram = dev_to_zram(dev);
88
89 return sprintf(buf, "%llu\n",
90 (u64)atomic64_read(&zram->stats.invalid_io));
91}
92
93static ssize_t notify_free_show(struct device *dev,
94 struct device_attribute *attr, char *buf)
95{
96 struct zram *zram = dev_to_zram(dev);
97
98 return sprintf(buf, "%llu\n",
99 (u64)atomic64_read(&zram->stats.notify_free));
100}
101
102static ssize_t zero_pages_show(struct device *dev,
103 struct device_attribute *attr, char *buf)
104{
105 struct zram *zram = dev_to_zram(dev);
106
107 return sprintf(buf, "%u\n", atomic_read(&zram->stats.pages_zero));
108}
109
110static ssize_t orig_data_size_show(struct device *dev,
111 struct device_attribute *attr, char *buf)
112{
113 struct zram *zram = dev_to_zram(dev);
114
115 return sprintf(buf, "%llu\n",
116 (u64)(atomic_read(&zram->stats.pages_stored)) << PAGE_SHIFT);
117}
118
119static ssize_t compr_data_size_show(struct device *dev,
120 struct device_attribute *attr, char *buf)
121{
122 struct zram *zram = dev_to_zram(dev);
123
124 return sprintf(buf, "%llu\n",
125 (u64)atomic64_read(&zram->stats.compr_size));
126}
127
128static ssize_t mem_used_total_show(struct device *dev,
129 struct device_attribute *attr, char *buf)
130{
131 u64 val = 0;
132 struct zram *zram = dev_to_zram(dev);
133 struct zram_meta *meta = zram->meta;
134
135 down_read(&zram->init_lock);
136 if (zram->init_done)
137 val = zs_get_total_size_bytes(meta->mem_pool);
138 up_read(&zram->init_lock);
139
140 return sprintf(buf, "%llu\n", val);
141}
142
143/* flag operations needs meta->tb_lock */
144static int zram_test_flag(struct zram_meta *meta, u32 index,
145 enum zram_pageflags flag)
146{
147 return meta->table[index].flags & BIT(flag);
148}
149
150static void zram_set_flag(struct zram_meta *meta, u32 index,
151 enum zram_pageflags flag)
152{
153 meta->table[index].flags |= BIT(flag);
154}
155
156static void zram_clear_flag(struct zram_meta *meta, u32 index,
157 enum zram_pageflags flag)
158{
159 meta->table[index].flags &= ~BIT(flag);
160}
161
162static inline int is_partial_io(struct bio_vec *bvec)
163{
164 return bvec->bv_len != PAGE_SIZE;
165}
166
167/*
168 * Check if request is within bounds and aligned on zram logical blocks.
169 */
170static inline int valid_io_request(struct zram *zram, struct bio *bio)
171{
172 u64 start, end, bound;
173
174 /* unaligned request */
175 if (unlikely(bio->bi_iter.bi_sector &
176 (ZRAM_SECTOR_PER_LOGICAL_BLOCK - 1)))
177 return 0;
178 if (unlikely(bio->bi_iter.bi_size & (ZRAM_LOGICAL_BLOCK_SIZE - 1)))
179 return 0;
180
181 start = bio->bi_iter.bi_sector;
182 end = start + (bio->bi_iter.bi_size >> SECTOR_SHIFT);
183 bound = zram->disksize >> SECTOR_SHIFT;
184 /* out of range range */
185 if (unlikely(start >= bound || end > bound || start > end))
186 return 0;
187
188 /* I/O request is valid */
189 return 1;
190}
191
192static void zram_meta_free(struct zram_meta *meta)
193{
194 zs_destroy_pool(meta->mem_pool);
195 kfree(meta->compress_workmem);
196 free_pages((unsigned long)meta->compress_buffer, 1);
197 vfree(meta->table);
198 kfree(meta);
199}
200
201static struct zram_meta *zram_meta_alloc(u64 disksize)
202{
203 size_t num_pages;
204 struct zram_meta *meta = kmalloc(sizeof(*meta), GFP_KERNEL);
205 if (!meta)
206 goto out;
207
208 meta->compress_workmem = kzalloc(LZO1X_MEM_COMPRESS, GFP_KERNEL);
209 if (!meta->compress_workmem)
210 goto free_meta;
211
212 meta->compress_buffer =
213 (void *)__get_free_pages(GFP_KERNEL | __GFP_ZERO, 1);
214 if (!meta->compress_buffer) {
215 pr_err("Error allocating compressor buffer space\n");
216 goto free_workmem;
217 }
218
219 num_pages = disksize >> PAGE_SHIFT;
220 meta->table = vzalloc(num_pages * sizeof(*meta->table));
221 if (!meta->table) {
222 pr_err("Error allocating zram address table\n");
223 goto free_buffer;
224 }
225
226 meta->mem_pool = zs_create_pool(GFP_NOIO | __GFP_HIGHMEM);
227 if (!meta->mem_pool) {
228 pr_err("Error creating memory pool\n");
229 goto free_table;
230 }
231
232 rwlock_init(&meta->tb_lock);
233 mutex_init(&meta->buffer_lock);
234 return meta;
235
236free_table:
237 vfree(meta->table);
238free_buffer:
239 free_pages((unsigned long)meta->compress_buffer, 1);
240free_workmem:
241 kfree(meta->compress_workmem);
242free_meta:
243 kfree(meta);
244 meta = NULL;
245out:
246 return meta;
247}
248
249static void update_position(u32 *index, int *offset, struct bio_vec *bvec)
250{
251 if (*offset + bvec->bv_len >= PAGE_SIZE)
252 (*index)++;
253 *offset = (*offset + bvec->bv_len) % PAGE_SIZE;
254}
255
256static int page_zero_filled(void *ptr)
257{
258 unsigned int pos;
259 unsigned long *page;
260
261 page = (unsigned long *)ptr;
262
263 for (pos = 0; pos != PAGE_SIZE / sizeof(*page); pos++) {
264 if (page[pos])
265 return 0;
266 }
267
268 return 1;
269}
270
271static void handle_zero_page(struct bio_vec *bvec)
272{
273 struct page *page = bvec->bv_page;
274 void *user_mem;
275
276 user_mem = kmap_atomic(page);
277 if (is_partial_io(bvec))
278 memset(user_mem + bvec->bv_offset, 0, bvec->bv_len);
279 else
280 clear_page(user_mem);
281 kunmap_atomic(user_mem);
282
283 flush_dcache_page(page);
284}
285
286/* NOTE: caller should hold meta->tb_lock with write-side */
287static void zram_free_page(struct zram *zram, size_t index)
288{
289 struct zram_meta *meta = zram->meta;
290 unsigned long handle = meta->table[index].handle;
291 u16 size = meta->table[index].size;
292
293 if (unlikely(!handle)) {
294 /*
295 * No memory is allocated for zero filled pages.
296 * Simply clear zero page flag.
297 */
298 if (zram_test_flag(meta, index, ZRAM_ZERO)) {
299 zram_clear_flag(meta, index, ZRAM_ZERO);
300 atomic_dec(&zram->stats.pages_zero);
301 }
302 return;
303 }
304
305 if (unlikely(size > max_zpage_size))
306 atomic_dec(&zram->stats.bad_compress);
307
308 zs_free(meta->mem_pool, handle);
309
310 if (size <= PAGE_SIZE / 2)
311 atomic_dec(&zram->stats.good_compress);
312
313 atomic64_sub(meta->table[index].size, &zram->stats.compr_size);
314 atomic_dec(&zram->stats.pages_stored);
315
316 meta->table[index].handle = 0;
317 meta->table[index].size = 0;
318}
319
320static int zram_decompress_page(struct zram *zram, char *mem, u32 index)
321{
322 int ret = LZO_E_OK;
323 size_t clen = PAGE_SIZE;
324 unsigned char *cmem;
325 struct zram_meta *meta = zram->meta;
326 unsigned long handle;
327 u16 size;
328
329 read_lock(&meta->tb_lock);
330 handle = meta->table[index].handle;
331 size = meta->table[index].size;
332
333 if (!handle || zram_test_flag(meta, index, ZRAM_ZERO)) {
334 read_unlock(&meta->tb_lock);
335 clear_page(mem);
336 return 0;
337 }
338
339 cmem = zs_map_object(meta->mem_pool, handle, ZS_MM_RO);
340 if (size == PAGE_SIZE)
341 copy_page(mem, cmem);
342 else
343 ret = lzo1x_decompress_safe(cmem, size, mem, &clen);
344 zs_unmap_object(meta->mem_pool, handle);
345 read_unlock(&meta->tb_lock);
346
347 /* Should NEVER happen. Return bio error if it does. */
348 if (unlikely(ret != LZO_E_OK)) {
349 pr_err("Decompression failed! err=%d, page=%u\n", ret, index);
350 atomic64_inc(&zram->stats.failed_reads);
351 return ret;
352 }
353
354 return 0;
355}
356
357static int zram_bvec_read(struct zram *zram, struct bio_vec *bvec,
358 u32 index, int offset, struct bio *bio)
359{
360 int ret;
361 struct page *page;
362 unsigned char *user_mem, *uncmem = NULL;
363 struct zram_meta *meta = zram->meta;
364 page = bvec->bv_page;
365
366 read_lock(&meta->tb_lock);
367 if (unlikely(!meta->table[index].handle) ||
368 zram_test_flag(meta, index, ZRAM_ZERO)) {
369 read_unlock(&meta->tb_lock);
370 handle_zero_page(bvec);
371 return 0;
372 }
373 read_unlock(&meta->tb_lock);
374
375 if (is_partial_io(bvec))
376 /* Use a temporary buffer to decompress the page */
377 uncmem = kmalloc(PAGE_SIZE, GFP_NOIO);
378
379 user_mem = kmap_atomic(page);
380 if (!is_partial_io(bvec))
381 uncmem = user_mem;
382
383 if (!uncmem) {
384 pr_info("Unable to allocate temp memory\n");
385 ret = -ENOMEM;
386 goto out_cleanup;
387 }
388
389 ret = zram_decompress_page(zram, uncmem, index);
390 /* Should NEVER happen. Return bio error if it does. */
391 if (unlikely(ret != LZO_E_OK))
392 goto out_cleanup;
393
394 if (is_partial_io(bvec))
395 memcpy(user_mem + bvec->bv_offset, uncmem + offset,
396 bvec->bv_len);
397
398 flush_dcache_page(page);
399 ret = 0;
400out_cleanup:
401 kunmap_atomic(user_mem);
402 if (is_partial_io(bvec))
403 kfree(uncmem);
404 return ret;
405}
406
407static int zram_bvec_write(struct zram *zram, struct bio_vec *bvec, u32 index,
408 int offset)
409{
410 int ret = 0;
411 size_t clen;
412 unsigned long handle;
413 struct page *page;
414 unsigned char *user_mem, *cmem, *src, *uncmem = NULL;
415 struct zram_meta *meta = zram->meta;
416 bool locked = false;
417
418 page = bvec->bv_page;
419 src = meta->compress_buffer;
420
421 if (is_partial_io(bvec)) {
422 /*
423 * This is a partial IO. We need to read the full page
424 * before to write the changes.
425 */
426 uncmem = kmalloc(PAGE_SIZE, GFP_NOIO);
427 if (!uncmem) {
428 ret = -ENOMEM;
429 goto out;
430 }
431 ret = zram_decompress_page(zram, uncmem, index);
432 if (ret)
433 goto out;
434 }
435
436 mutex_lock(&meta->buffer_lock);
437 locked = true;
438 user_mem = kmap_atomic(page);
439
440 if (is_partial_io(bvec)) {
441 memcpy(uncmem + offset, user_mem + bvec->bv_offset,
442 bvec->bv_len);
443 kunmap_atomic(user_mem);
444 user_mem = NULL;
445 } else {
446 uncmem = user_mem;
447 }
448
449 if (page_zero_filled(uncmem)) {
450 kunmap_atomic(user_mem);
451 /* Free memory associated with this sector now. */
452 write_lock(&zram->meta->tb_lock);
453 zram_free_page(zram, index);
454 zram_set_flag(meta, index, ZRAM_ZERO);
455 write_unlock(&zram->meta->tb_lock);
456
457 atomic_inc(&zram->stats.pages_zero);
458 ret = 0;
459 goto out;
460 }
461
462 ret = lzo1x_1_compress(uncmem, PAGE_SIZE, src, &clen,
463 meta->compress_workmem);
464 if (!is_partial_io(bvec)) {
465 kunmap_atomic(user_mem);
466 user_mem = NULL;
467 uncmem = NULL;
468 }
469
470 if (unlikely(ret != LZO_E_OK)) {
471 pr_err("Compression failed! err=%d\n", ret);
472 goto out;
473 }
474
475 if (unlikely(clen > max_zpage_size)) {
476 atomic_inc(&zram->stats.bad_compress);
477 clen = PAGE_SIZE;
478 src = NULL;
479 if (is_partial_io(bvec))
480 src = uncmem;
481 }
482
483 handle = zs_malloc(meta->mem_pool, clen);
484 if (!handle) {
485 pr_info("Error allocating memory for compressed page: %u, size=%zu\n",
486 index, clen);
487 ret = -ENOMEM;
488 goto out;
489 }
490 cmem = zs_map_object(meta->mem_pool, handle, ZS_MM_WO);
491
492 if ((clen == PAGE_SIZE) && !is_partial_io(bvec)) {
493 src = kmap_atomic(page);
494 copy_page(cmem, src);
495 kunmap_atomic(src);
496 } else {
497 memcpy(cmem, src, clen);
498 }
499
500 zs_unmap_object(meta->mem_pool, handle);
501
502 /*
503 * Free memory associated with this sector
504 * before overwriting unused sectors.
505 */
506 write_lock(&zram->meta->tb_lock);
507 zram_free_page(zram, index);
508
509 meta->table[index].handle = handle;
510 meta->table[index].size = clen;
511 write_unlock(&zram->meta->tb_lock);
512
513 /* Update stats */
514 atomic64_add(clen, &zram->stats.compr_size);
515 atomic_inc(&zram->stats.pages_stored);
516 if (clen <= PAGE_SIZE / 2)
517 atomic_inc(&zram->stats.good_compress);
518
519out:
520 if (locked)
521 mutex_unlock(&meta->buffer_lock);
522 if (is_partial_io(bvec))
523 kfree(uncmem);
524
525 if (ret)
526 atomic64_inc(&zram->stats.failed_writes);
527 return ret;
528}
529
530static int zram_bvec_rw(struct zram *zram, struct bio_vec *bvec, u32 index,
531 int offset, struct bio *bio, int rw)
532{
533 int ret;
534
535 if (rw == READ)
536 ret = zram_bvec_read(zram, bvec, index, offset, bio);
537 else
538 ret = zram_bvec_write(zram, bvec, index, offset);
539
540 return ret;
541}
542
543static void zram_reset_device(struct zram *zram, bool reset_capacity)
544{
545 size_t index;
546 struct zram_meta *meta;
547
548 down_write(&zram->init_lock);
549 if (!zram->init_done) {
550 up_write(&zram->init_lock);
551 return;
552 }
553
554 meta = zram->meta;
555 zram->init_done = 0;
556
557 /* Free all pages that are still in this zram device */
558 for (index = 0; index < zram->disksize >> PAGE_SHIFT; index++) {
559 unsigned long handle = meta->table[index].handle;
560 if (!handle)
561 continue;
562
563 zs_free(meta->mem_pool, handle);
564 }
565
566 zram_meta_free(zram->meta);
567 zram->meta = NULL;
568 /* Reset stats */
569 memset(&zram->stats, 0, sizeof(zram->stats));
570
571 zram->disksize = 0;
572 if (reset_capacity)
573 set_capacity(zram->disk, 0);
574 up_write(&zram->init_lock);
575}
576
577static void zram_init_device(struct zram *zram, struct zram_meta *meta)
578{
579 if (zram->disksize > 2 * (totalram_pages << PAGE_SHIFT)) {
580 pr_info(
581 "There is little point creating a zram of greater than "
582 "twice the size of memory since we expect a 2:1 compression "
583 "ratio. Note that zram uses about 0.1%% of the size of "
584 "the disk when not in use so a huge zram is "
585 "wasteful.\n"
586 "\tMemory Size: %lu kB\n"
587 "\tSize you selected: %llu kB\n"
588 "Continuing anyway ...\n",
589 (totalram_pages << PAGE_SHIFT) >> 10, zram->disksize >> 10
590 );
591 }
592
593 /* zram devices sort of resembles non-rotational disks */
594 queue_flag_set_unlocked(QUEUE_FLAG_NONROT, zram->disk->queue);
595
596 zram->meta = meta;
597 zram->init_done = 1;
598
599 pr_debug("Initialization done!\n");
600}
601
602static ssize_t disksize_store(struct device *dev,
603 struct device_attribute *attr, const char *buf, size_t len)
604{
605 u64 disksize;
606 struct zram_meta *meta;
607 struct zram *zram = dev_to_zram(dev);
608
609 disksize = memparse(buf, NULL);
610 if (!disksize)
611 return -EINVAL;
612
613 disksize = PAGE_ALIGN(disksize);
614 meta = zram_meta_alloc(disksize);
615 down_write(&zram->init_lock);
616 if (zram->init_done) {
617 up_write(&zram->init_lock);
618 zram_meta_free(meta);
619 pr_info("Cannot change disksize for initialized device\n");
620 return -EBUSY;
621 }
622
623 zram->disksize = disksize;
624 set_capacity(zram->disk, zram->disksize >> SECTOR_SHIFT);
625 zram_init_device(zram, meta);
626 up_write(&zram->init_lock);
627
628 return len;
629}
630
631static ssize_t reset_store(struct device *dev,
632 struct device_attribute *attr, const char *buf, size_t len)
633{
634 int ret;
635 unsigned short do_reset;
636 struct zram *zram;
637 struct block_device *bdev;
638
639 zram = dev_to_zram(dev);
640 bdev = bdget_disk(zram->disk, 0);
641
642 if (!bdev)
643 return -ENOMEM;
644
645 /* Do not reset an active device! */
646 if (bdev->bd_holders) {
647 ret = -EBUSY;
648 goto out;
649 }
650
651 ret = kstrtou16(buf, 10, &do_reset);
652 if (ret)
653 goto out;
654
655 if (!do_reset) {
656 ret = -EINVAL;
657 goto out;
658 }
659
660 /* Make sure all pending I/O is finished */
661 fsync_bdev(bdev);
662 bdput(bdev);
663
664 zram_reset_device(zram, true);
665 return len;
666
667out:
668 bdput(bdev);
669 return ret;
670}
671
672static void __zram_make_request(struct zram *zram, struct bio *bio, int rw)
673{
674 int offset;
675 u32 index;
676 struct bio_vec bvec;
677 struct bvec_iter iter;
678
679 switch (rw) {
680 case READ:
681 atomic64_inc(&zram->stats.num_reads);
682 break;
683 case WRITE:
684 atomic64_inc(&zram->stats.num_writes);
685 break;
686 }
687
688 index = bio->bi_iter.bi_sector >> SECTORS_PER_PAGE_SHIFT;
689 offset = (bio->bi_iter.bi_sector &
690 (SECTORS_PER_PAGE - 1)) << SECTOR_SHIFT;
691
692 bio_for_each_segment(bvec, bio, iter) {
693 int max_transfer_size = PAGE_SIZE - offset;
694
695 if (bvec.bv_len > max_transfer_size) {
696 /*
697 * zram_bvec_rw() can only make operation on a single
698 * zram page. Split the bio vector.
699 */
700 struct bio_vec bv;
701
702 bv.bv_page = bvec.bv_page;
703 bv.bv_len = max_transfer_size;
704 bv.bv_offset = bvec.bv_offset;
705
706 if (zram_bvec_rw(zram, &bv, index, offset, bio, rw) < 0)
707 goto out;
708
709 bv.bv_len = bvec.bv_len - max_transfer_size;
710 bv.bv_offset += max_transfer_size;
711 if (zram_bvec_rw(zram, &bv, index+1, 0, bio, rw) < 0)
712 goto out;
713 } else
714 if (zram_bvec_rw(zram, &bvec, index, offset, bio, rw)
715 < 0)
716 goto out;
717
718 update_position(&index, &offset, &bvec);
719 }
720
721 set_bit(BIO_UPTODATE, &bio->bi_flags);
722 bio_endio(bio, 0);
723 return;
724
725out:
726 bio_io_error(bio);
727}
728
729/*
730 * Handler function for all zram I/O requests.
731 */
732static void zram_make_request(struct request_queue *queue, struct bio *bio)
733{
734 struct zram *zram = queue->queuedata;
735
736 down_read(&zram->init_lock);
737 if (unlikely(!zram->init_done))
738 goto error;
739
740 if (!valid_io_request(zram, bio)) {
741 atomic64_inc(&zram->stats.invalid_io);
742 goto error;
743 }
744
745 __zram_make_request(zram, bio, bio_data_dir(bio));
746 up_read(&zram->init_lock);
747
748 return;
749
750error:
751 up_read(&zram->init_lock);
752 bio_io_error(bio);
753}
754
755static void zram_slot_free_notify(struct block_device *bdev,
756 unsigned long index)
757{
758 struct zram *zram;
759 struct zram_meta *meta;
760
761 zram = bdev->bd_disk->private_data;
762 meta = zram->meta;
763
764 write_lock(&meta->tb_lock);
765 zram_free_page(zram, index);
766 write_unlock(&meta->tb_lock);
767 atomic64_inc(&zram->stats.notify_free);
768}
769
770static const struct block_device_operations zram_devops = {
771 .swap_slot_free_notify = zram_slot_free_notify,
772 .owner = THIS_MODULE
773};
774
775static DEVICE_ATTR(disksize, S_IRUGO | S_IWUSR,
776 disksize_show, disksize_store);
777static DEVICE_ATTR(initstate, S_IRUGO, initstate_show, NULL);
778static DEVICE_ATTR(reset, S_IWUSR, NULL, reset_store);
779static DEVICE_ATTR(num_reads, S_IRUGO, num_reads_show, NULL);
780static DEVICE_ATTR(num_writes, S_IRUGO, num_writes_show, NULL);
781static DEVICE_ATTR(invalid_io, S_IRUGO, invalid_io_show, NULL);
782static DEVICE_ATTR(notify_free, S_IRUGO, notify_free_show, NULL);
783static DEVICE_ATTR(zero_pages, S_IRUGO, zero_pages_show, NULL);
784static DEVICE_ATTR(orig_data_size, S_IRUGO, orig_data_size_show, NULL);
785static DEVICE_ATTR(compr_data_size, S_IRUGO, compr_data_size_show, NULL);
786static DEVICE_ATTR(mem_used_total, S_IRUGO, mem_used_total_show, NULL);
787
788static struct attribute *zram_disk_attrs[] = {
789 &dev_attr_disksize.attr,
790 &dev_attr_initstate.attr,
791 &dev_attr_reset.attr,
792 &dev_attr_num_reads.attr,
793 &dev_attr_num_writes.attr,
794 &dev_attr_invalid_io.attr,
795 &dev_attr_notify_free.attr,
796 &dev_attr_zero_pages.attr,
797 &dev_attr_orig_data_size.attr,
798 &dev_attr_compr_data_size.attr,
799 &dev_attr_mem_used_total.attr,
800 NULL,
801};
802
803static struct attribute_group zram_disk_attr_group = {
804 .attrs = zram_disk_attrs,
805};
806
807static int create_device(struct zram *zram, int device_id)
808{
809 int ret = -ENOMEM;
810
811 init_rwsem(&zram->init_lock);
812
813 zram->queue = blk_alloc_queue(GFP_KERNEL);
814 if (!zram->queue) {
815 pr_err("Error allocating disk queue for device %d\n",
816 device_id);
817 goto out;
818 }
819
820 blk_queue_make_request(zram->queue, zram_make_request);
821 zram->queue->queuedata = zram;
822
823 /* gendisk structure */
824 zram->disk = alloc_disk(1);
825 if (!zram->disk) {
826 pr_warn("Error allocating disk structure for device %d\n",
827 device_id);
828 goto out_free_queue;
829 }
830
831 zram->disk->major = zram_major;
832 zram->disk->first_minor = device_id;
833 zram->disk->fops = &zram_devops;
834 zram->disk->queue = zram->queue;
835 zram->disk->private_data = zram;
836 snprintf(zram->disk->disk_name, 16, "zram%d", device_id);
837
838 /* Actual capacity set using syfs (/sys/block/zram<id>/disksize */
839 set_capacity(zram->disk, 0);
840
841 /*
842 * To ensure that we always get PAGE_SIZE aligned
843 * and n*PAGE_SIZED sized I/O requests.
844 */
845 blk_queue_physical_block_size(zram->disk->queue, PAGE_SIZE);
846 blk_queue_logical_block_size(zram->disk->queue,
847 ZRAM_LOGICAL_BLOCK_SIZE);
848 blk_queue_io_min(zram->disk->queue, PAGE_SIZE);
849 blk_queue_io_opt(zram->disk->queue, PAGE_SIZE);
850
851 add_disk(zram->disk);
852
853 ret = sysfs_create_group(&disk_to_dev(zram->disk)->kobj,
854 &zram_disk_attr_group);
855 if (ret < 0) {
856 pr_warn("Error creating sysfs group");
857 goto out_free_disk;
858 }
859
860 zram->init_done = 0;
861 return 0;
862
863out_free_disk:
864 del_gendisk(zram->disk);
865 put_disk(zram->disk);
866out_free_queue:
867 blk_cleanup_queue(zram->queue);
868out:
869 return ret;
870}
871
872static void destroy_device(struct zram *zram)
873{
874 sysfs_remove_group(&disk_to_dev(zram->disk)->kobj,
875 &zram_disk_attr_group);
876
877 del_gendisk(zram->disk);
878 put_disk(zram->disk);
879
880 blk_cleanup_queue(zram->queue);
881}
882
883static int __init zram_init(void)
884{
885 int ret, dev_id;
886
887 if (num_devices > max_num_devices) {
888 pr_warn("Invalid value for num_devices: %u\n",
889 num_devices);
890 ret = -EINVAL;
891 goto out;
892 }
893
894 zram_major = register_blkdev(0, "zram");
895 if (zram_major <= 0) {
896 pr_warn("Unable to get major number\n");
897 ret = -EBUSY;
898 goto out;
899 }
900
901 /* Allocate the device array and initialize each one */
902 zram_devices = kzalloc(num_devices * sizeof(struct zram), GFP_KERNEL);
903 if (!zram_devices) {
904 ret = -ENOMEM;
905 goto unregister;
906 }
907
908 for (dev_id = 0; dev_id < num_devices; dev_id++) {
909 ret = create_device(&zram_devices[dev_id], dev_id);
910 if (ret)
911 goto free_devices;
912 }
913
914 pr_info("Created %u device(s) ...\n", num_devices);
915
916 return 0;
917
918free_devices:
919 while (dev_id)
920 destroy_device(&zram_devices[--dev_id]);
921 kfree(zram_devices);
922unregister:
923 unregister_blkdev(zram_major, "zram");
924out:
925 return ret;
926}
927
928static void __exit zram_exit(void)
929{
930 int i;
931 struct zram *zram;
932
933 for (i = 0; i < num_devices; i++) {
934 zram = &zram_devices[i];
935
936 destroy_device(zram);
937 /*
938 * Shouldn't access zram->disk after destroy_device
939 * because destroy_device already released zram->disk.
940 */
941 zram_reset_device(zram, false);
942 }
943
944 unregister_blkdev(zram_major, "zram");
945
946 kfree(zram_devices);
947 pr_debug("Cleanup done!\n");
948}
949
950module_init(zram_init);
951module_exit(zram_exit);
952
953module_param(num_devices, uint, 0);
954MODULE_PARM_DESC(num_devices, "Number of zram devices");
955
956MODULE_LICENSE("Dual BSD/GPL");
957MODULE_AUTHOR("Nitin Gupta <ngupta@vflare.org>");
958MODULE_DESCRIPTION("Compressed RAM Block Device");
diff --git a/drivers/block/zram/zram_drv.h b/drivers/block/zram/zram_drv.h
new file mode 100644
index 000000000000..ad8aa35bae00
--- /dev/null
+++ b/drivers/block/zram/zram_drv.h
@@ -0,0 +1,109 @@
1/*
2 * Compressed RAM block device
3 *
4 * Copyright (C) 2008, 2009, 2010 Nitin Gupta
5 * 2012, 2013 Minchan Kim
6 *
7 * This code is released using a dual license strategy: BSD/GPL
8 * You can choose the licence that better fits your requirements.
9 *
10 * Released under the terms of 3-clause BSD License
11 * Released under the terms of GNU General Public License Version 2.0
12 *
13 */
14
15#ifndef _ZRAM_DRV_H_
16#define _ZRAM_DRV_H_
17
18#include <linux/spinlock.h>
19#include <linux/mutex.h>
20#include <linux/zsmalloc.h>
21
22/*
23 * Some arbitrary value. This is just to catch
24 * invalid value for num_devices module parameter.
25 */
26static const unsigned max_num_devices = 32;
27
28/*-- Configurable parameters */
29
30/*
31 * Pages that compress to size greater than this are stored
32 * uncompressed in memory.
33 */
34static const size_t max_zpage_size = PAGE_SIZE / 4 * 3;
35
36/*
37 * NOTE: max_zpage_size must be less than or equal to:
38 * ZS_MAX_ALLOC_SIZE. Otherwise, zs_malloc() would
39 * always return failure.
40 */
41
42/*-- End of configurable params */
43
44#define SECTOR_SHIFT 9
45#define SECTOR_SIZE (1 << SECTOR_SHIFT)
46#define SECTORS_PER_PAGE_SHIFT (PAGE_SHIFT - SECTOR_SHIFT)
47#define SECTORS_PER_PAGE (1 << SECTORS_PER_PAGE_SHIFT)
48#define ZRAM_LOGICAL_BLOCK_SHIFT 12
49#define ZRAM_LOGICAL_BLOCK_SIZE (1 << ZRAM_LOGICAL_BLOCK_SHIFT)
50#define ZRAM_SECTOR_PER_LOGICAL_BLOCK \
51 (1 << (ZRAM_LOGICAL_BLOCK_SHIFT - SECTOR_SHIFT))
52
53/* Flags for zram pages (table[page_no].flags) */
54enum zram_pageflags {
55 /* Page consists entirely of zeros */
56 ZRAM_ZERO,
57
58 __NR_ZRAM_PAGEFLAGS,
59};
60
61/*-- Data structures */
62
63/* Allocated for each disk page */
64struct table {
65 unsigned long handle;
66 u16 size; /* object size (excluding header) */
67 u8 count; /* object ref count (not yet used) */
68 u8 flags;
69} __aligned(4);
70
71struct zram_stats {
72 atomic64_t compr_size; /* compressed size of pages stored */
73 atomic64_t num_reads; /* failed + successful */
74 atomic64_t num_writes; /* --do-- */
75 atomic64_t failed_reads; /* should NEVER! happen */
76 atomic64_t failed_writes; /* can happen when memory is too low */
77 atomic64_t invalid_io; /* non-page-aligned I/O requests */
78 atomic64_t notify_free; /* no. of swap slot free notifications */
79 atomic_t pages_zero; /* no. of zero filled pages */
80 atomic_t pages_stored; /* no. of pages currently stored */
81 atomic_t good_compress; /* % of pages with compression ratio<=50% */
82 atomic_t bad_compress; /* % of pages with compression ratio>=75% */
83};
84
85struct zram_meta {
86 rwlock_t tb_lock; /* protect table */
87 void *compress_workmem;
88 void *compress_buffer;
89 struct table *table;
90 struct zs_pool *mem_pool;
91 struct mutex buffer_lock; /* protect compress buffers */
92};
93
94struct zram {
95 struct zram_meta *meta;
96 struct request_queue *queue;
97 struct gendisk *disk;
98 int init_done;
99 /* Prevent concurrent execution of device init, reset and R/W request */
100 struct rw_semaphore init_lock;
101 /*
102 * This is the limit on amount of *uncompressed* worth of data
103 * we can store in a disk.
104 */
105 u64 disksize; /* bytes */
106
107 struct zram_stats stats;
108};
109#endif