diff options
Diffstat (limited to 'drivers/block')
32 files changed, 1258 insertions, 377 deletions
diff --git a/drivers/block/brd.c b/drivers/block/brd.c index dd96a935fba0..ba5145d384d8 100644 --- a/drivers/block/brd.c +++ b/drivers/block/brd.c | |||
@@ -347,9 +347,7 @@ static blk_qc_t brd_make_request(struct request_queue *q, struct bio *bio) | |||
347 | goto out; | 347 | goto out; |
348 | } | 348 | } |
349 | 349 | ||
350 | rw = bio_rw(bio); | 350 | rw = bio_data_dir(bio); |
351 | if (rw == READA) | ||
352 | rw = READ; | ||
353 | 351 | ||
354 | bio_for_each_segment(bvec, bio, iter) { | 352 | bio_for_each_segment(bvec, bio, iter) { |
355 | unsigned int len = bvec.bv_len; | 353 | unsigned int len = bvec.bv_len; |
diff --git a/drivers/block/cciss.c b/drivers/block/cciss.c index 63c2064689f8..db9d6bb6352d 100644 --- a/drivers/block/cciss.c +++ b/drivers/block/cciss.c | |||
@@ -1951,7 +1951,6 @@ static int cciss_add_disk(ctlr_info_t *h, struct gendisk *disk, | |||
1951 | if (cciss_create_ld_sysfs_entry(h, drv_index)) | 1951 | if (cciss_create_ld_sysfs_entry(h, drv_index)) |
1952 | goto cleanup_queue; | 1952 | goto cleanup_queue; |
1953 | disk->private_data = h->drv[drv_index]; | 1953 | disk->private_data = h->drv[drv_index]; |
1954 | disk->driverfs_dev = &h->drv[drv_index]->dev; | ||
1955 | 1954 | ||
1956 | /* Set up queue information */ | 1955 | /* Set up queue information */ |
1957 | blk_queue_bounce_limit(disk->queue, h->pdev->dma_mask); | 1956 | blk_queue_bounce_limit(disk->queue, h->pdev->dma_mask); |
@@ -1973,7 +1972,7 @@ static int cciss_add_disk(ctlr_info_t *h, struct gendisk *disk, | |||
1973 | /* allows the interrupt handler to start the queue */ | 1972 | /* allows the interrupt handler to start the queue */ |
1974 | wmb(); | 1973 | wmb(); |
1975 | h->drv[drv_index]->queue = disk->queue; | 1974 | h->drv[drv_index]->queue = disk->queue; |
1976 | add_disk(disk); | 1975 | device_add_disk(&h->drv[drv_index]->dev, disk); |
1977 | return 0; | 1976 | return 0; |
1978 | 1977 | ||
1979 | cleanup_queue: | 1978 | cleanup_queue: |
diff --git a/drivers/block/drbd/drbd_actlog.c b/drivers/block/drbd/drbd_actlog.c index d524973f94b3..0a1aaf8c24c4 100644 --- a/drivers/block/drbd/drbd_actlog.c +++ b/drivers/block/drbd/drbd_actlog.c | |||
@@ -258,7 +258,7 @@ bool drbd_al_begin_io_fastpath(struct drbd_device *device, struct drbd_interval | |||
258 | unsigned first = i->sector >> (AL_EXTENT_SHIFT-9); | 258 | unsigned first = i->sector >> (AL_EXTENT_SHIFT-9); |
259 | unsigned last = i->size == 0 ? first : (i->sector + (i->size >> 9) - 1) >> (AL_EXTENT_SHIFT-9); | 259 | unsigned last = i->size == 0 ? first : (i->sector + (i->size >> 9) - 1) >> (AL_EXTENT_SHIFT-9); |
260 | 260 | ||
261 | D_ASSERT(device, (unsigned)(last - first) <= 1); | 261 | D_ASSERT(device, first <= last); |
262 | D_ASSERT(device, atomic_read(&device->local_cnt) > 0); | 262 | D_ASSERT(device, atomic_read(&device->local_cnt) > 0); |
263 | 263 | ||
264 | /* FIXME figure out a fast path for bios crossing AL extent boundaries */ | 264 | /* FIXME figure out a fast path for bios crossing AL extent boundaries */ |
@@ -341,6 +341,8 @@ static int __al_write_transaction(struct drbd_device *device, struct al_transact | |||
341 | 341 | ||
342 | i = 0; | 342 | i = 0; |
343 | 343 | ||
344 | drbd_bm_reset_al_hints(device); | ||
345 | |||
344 | /* Even though no one can start to change this list | 346 | /* Even though no one can start to change this list |
345 | * once we set the LC_LOCKED -- from drbd_al_begin_io(), | 347 | * once we set the LC_LOCKED -- from drbd_al_begin_io(), |
346 | * lc_try_lock_for_transaction() --, someone may still | 348 | * lc_try_lock_for_transaction() --, someone may still |
@@ -770,10 +772,18 @@ static bool lazy_bitmap_update_due(struct drbd_device *device) | |||
770 | 772 | ||
771 | static void maybe_schedule_on_disk_bitmap_update(struct drbd_device *device, bool rs_done) | 773 | static void maybe_schedule_on_disk_bitmap_update(struct drbd_device *device, bool rs_done) |
772 | { | 774 | { |
773 | if (rs_done) | 775 | if (rs_done) { |
774 | set_bit(RS_DONE, &device->flags); | 776 | struct drbd_connection *connection = first_peer_device(device)->connection; |
775 | /* and also set RS_PROGRESS below */ | 777 | if (connection->agreed_pro_version <= 95 || |
776 | else if (!lazy_bitmap_update_due(device)) | 778 | is_sync_target_state(device->state.conn)) |
779 | set_bit(RS_DONE, &device->flags); | ||
780 | /* and also set RS_PROGRESS below */ | ||
781 | |||
782 | /* Else: rather wait for explicit notification via receive_state, | ||
783 | * to avoid uuids-rotated-too-fast causing full resync | ||
784 | * in next handshake, in case the replication link breaks | ||
785 | * at the most unfortunate time... */ | ||
786 | } else if (!lazy_bitmap_update_due(device)) | ||
777 | return; | 787 | return; |
778 | 788 | ||
779 | drbd_device_post_work(device, RS_PROGRESS); | 789 | drbd_device_post_work(device, RS_PROGRESS); |
@@ -832,6 +842,13 @@ static int update_sync_bits(struct drbd_device *device, | |||
832 | return count; | 842 | return count; |
833 | } | 843 | } |
834 | 844 | ||
845 | static bool plausible_request_size(int size) | ||
846 | { | ||
847 | return size > 0 | ||
848 | && size <= DRBD_MAX_BATCH_BIO_SIZE | ||
849 | && IS_ALIGNED(size, 512); | ||
850 | } | ||
851 | |||
835 | /* clear the bit corresponding to the piece of storage in question: | 852 | /* clear the bit corresponding to the piece of storage in question: |
836 | * size byte of data starting from sector. Only clear a bits of the affected | 853 | * size byte of data starting from sector. Only clear a bits of the affected |
837 | * one ore more _aligned_ BM_BLOCK_SIZE blocks. | 854 | * one ore more _aligned_ BM_BLOCK_SIZE blocks. |
@@ -851,7 +868,7 @@ int __drbd_change_sync(struct drbd_device *device, sector_t sector, int size, | |||
851 | if ((mode == SET_OUT_OF_SYNC) && size == 0) | 868 | if ((mode == SET_OUT_OF_SYNC) && size == 0) |
852 | return 0; | 869 | return 0; |
853 | 870 | ||
854 | if (size <= 0 || !IS_ALIGNED(size, 512) || size > DRBD_MAX_DISCARD_SIZE) { | 871 | if (!plausible_request_size(size)) { |
855 | drbd_err(device, "%s: sector=%llus size=%d nonsense!\n", | 872 | drbd_err(device, "%s: sector=%llus size=%d nonsense!\n", |
856 | drbd_change_sync_fname[mode], | 873 | drbd_change_sync_fname[mode], |
857 | (unsigned long long)sector, size); | 874 | (unsigned long long)sector, size); |
diff --git a/drivers/block/drbd/drbd_bitmap.c b/drivers/block/drbd/drbd_bitmap.c index e5d89f623b90..ab62b81c2ca7 100644 --- a/drivers/block/drbd/drbd_bitmap.c +++ b/drivers/block/drbd/drbd_bitmap.c | |||
@@ -96,6 +96,13 @@ struct drbd_bitmap { | |||
96 | struct page **bm_pages; | 96 | struct page **bm_pages; |
97 | spinlock_t bm_lock; | 97 | spinlock_t bm_lock; |
98 | 98 | ||
99 | /* exclusively to be used by __al_write_transaction(), | ||
100 | * drbd_bm_mark_for_writeout() and | ||
101 | * and drbd_bm_write_hinted() -> bm_rw() called from there. | ||
102 | */ | ||
103 | unsigned int n_bitmap_hints; | ||
104 | unsigned int al_bitmap_hints[AL_UPDATES_PER_TRANSACTION]; | ||
105 | |||
99 | /* see LIMITATIONS: above */ | 106 | /* see LIMITATIONS: above */ |
100 | 107 | ||
101 | unsigned long bm_set; /* nr of set bits; THINK maybe atomic_t? */ | 108 | unsigned long bm_set; /* nr of set bits; THINK maybe atomic_t? */ |
@@ -242,6 +249,11 @@ static void bm_set_page_need_writeout(struct page *page) | |||
242 | set_bit(BM_PAGE_NEED_WRITEOUT, &page_private(page)); | 249 | set_bit(BM_PAGE_NEED_WRITEOUT, &page_private(page)); |
243 | } | 250 | } |
244 | 251 | ||
252 | void drbd_bm_reset_al_hints(struct drbd_device *device) | ||
253 | { | ||
254 | device->bitmap->n_bitmap_hints = 0; | ||
255 | } | ||
256 | |||
245 | /** | 257 | /** |
246 | * drbd_bm_mark_for_writeout() - mark a page with a "hint" to be considered for writeout | 258 | * drbd_bm_mark_for_writeout() - mark a page with a "hint" to be considered for writeout |
247 | * @device: DRBD device. | 259 | * @device: DRBD device. |
@@ -253,6 +265,7 @@ static void bm_set_page_need_writeout(struct page *page) | |||
253 | */ | 265 | */ |
254 | void drbd_bm_mark_for_writeout(struct drbd_device *device, int page_nr) | 266 | void drbd_bm_mark_for_writeout(struct drbd_device *device, int page_nr) |
255 | { | 267 | { |
268 | struct drbd_bitmap *b = device->bitmap; | ||
256 | struct page *page; | 269 | struct page *page; |
257 | if (page_nr >= device->bitmap->bm_number_of_pages) { | 270 | if (page_nr >= device->bitmap->bm_number_of_pages) { |
258 | drbd_warn(device, "BAD: page_nr: %u, number_of_pages: %u\n", | 271 | drbd_warn(device, "BAD: page_nr: %u, number_of_pages: %u\n", |
@@ -260,7 +273,9 @@ void drbd_bm_mark_for_writeout(struct drbd_device *device, int page_nr) | |||
260 | return; | 273 | return; |
261 | } | 274 | } |
262 | page = device->bitmap->bm_pages[page_nr]; | 275 | page = device->bitmap->bm_pages[page_nr]; |
263 | set_bit(BM_PAGE_HINT_WRITEOUT, &page_private(page)); | 276 | BUG_ON(b->n_bitmap_hints >= ARRAY_SIZE(b->al_bitmap_hints)); |
277 | if (!test_and_set_bit(BM_PAGE_HINT_WRITEOUT, &page_private(page))) | ||
278 | b->al_bitmap_hints[b->n_bitmap_hints++] = page_nr; | ||
264 | } | 279 | } |
265 | 280 | ||
266 | static int bm_test_page_unchanged(struct page *page) | 281 | static int bm_test_page_unchanged(struct page *page) |
@@ -427,8 +442,7 @@ static struct page **bm_realloc_pages(struct drbd_bitmap *b, unsigned long want) | |||
427 | } | 442 | } |
428 | 443 | ||
429 | /* | 444 | /* |
430 | * called on driver init only. TODO call when a device is created. | 445 | * allocates the drbd_bitmap and stores it in device->bitmap. |
431 | * allocates the drbd_bitmap, and stores it in device->bitmap. | ||
432 | */ | 446 | */ |
433 | int drbd_bm_init(struct drbd_device *device) | 447 | int drbd_bm_init(struct drbd_device *device) |
434 | { | 448 | { |
@@ -633,7 +647,8 @@ int drbd_bm_resize(struct drbd_device *device, sector_t capacity, int set_new_bi | |||
633 | unsigned long bits, words, owords, obits; | 647 | unsigned long bits, words, owords, obits; |
634 | unsigned long want, have, onpages; /* number of pages */ | 648 | unsigned long want, have, onpages; /* number of pages */ |
635 | struct page **npages, **opages = NULL; | 649 | struct page **npages, **opages = NULL; |
636 | int err = 0, growing; | 650 | int err = 0; |
651 | bool growing; | ||
637 | 652 | ||
638 | if (!expect(b)) | 653 | if (!expect(b)) |
639 | return -ENOMEM; | 654 | return -ENOMEM; |
@@ -1030,7 +1045,7 @@ static int bm_rw(struct drbd_device *device, const unsigned int flags, unsigned | |||
1030 | { | 1045 | { |
1031 | struct drbd_bm_aio_ctx *ctx; | 1046 | struct drbd_bm_aio_ctx *ctx; |
1032 | struct drbd_bitmap *b = device->bitmap; | 1047 | struct drbd_bitmap *b = device->bitmap; |
1033 | int num_pages, i, count = 0; | 1048 | unsigned int num_pages, i, count = 0; |
1034 | unsigned long now; | 1049 | unsigned long now; |
1035 | char ppb[10]; | 1050 | char ppb[10]; |
1036 | int err = 0; | 1051 | int err = 0; |
@@ -1078,16 +1093,37 @@ static int bm_rw(struct drbd_device *device, const unsigned int flags, unsigned | |||
1078 | now = jiffies; | 1093 | now = jiffies; |
1079 | 1094 | ||
1080 | /* let the layers below us try to merge these bios... */ | 1095 | /* let the layers below us try to merge these bios... */ |
1081 | for (i = 0; i < num_pages; i++) { | ||
1082 | /* ignore completely unchanged pages */ | ||
1083 | if (lazy_writeout_upper_idx && i == lazy_writeout_upper_idx) | ||
1084 | break; | ||
1085 | if (!(flags & BM_AIO_READ)) { | ||
1086 | if ((flags & BM_AIO_WRITE_HINTED) && | ||
1087 | !test_and_clear_bit(BM_PAGE_HINT_WRITEOUT, | ||
1088 | &page_private(b->bm_pages[i]))) | ||
1089 | continue; | ||
1090 | 1096 | ||
1097 | if (flags & BM_AIO_READ) { | ||
1098 | for (i = 0; i < num_pages; i++) { | ||
1099 | atomic_inc(&ctx->in_flight); | ||
1100 | bm_page_io_async(ctx, i); | ||
1101 | ++count; | ||
1102 | cond_resched(); | ||
1103 | } | ||
1104 | } else if (flags & BM_AIO_WRITE_HINTED) { | ||
1105 | /* ASSERT: BM_AIO_WRITE_ALL_PAGES is not set. */ | ||
1106 | unsigned int hint; | ||
1107 | for (hint = 0; hint < b->n_bitmap_hints; hint++) { | ||
1108 | i = b->al_bitmap_hints[hint]; | ||
1109 | if (i >= num_pages) /* == -1U: no hint here. */ | ||
1110 | continue; | ||
1111 | /* Several AL-extents may point to the same page. */ | ||
1112 | if (!test_and_clear_bit(BM_PAGE_HINT_WRITEOUT, | ||
1113 | &page_private(b->bm_pages[i]))) | ||
1114 | continue; | ||
1115 | /* Has it even changed? */ | ||
1116 | if (bm_test_page_unchanged(b->bm_pages[i])) | ||
1117 | continue; | ||
1118 | atomic_inc(&ctx->in_flight); | ||
1119 | bm_page_io_async(ctx, i); | ||
1120 | ++count; | ||
1121 | } | ||
1122 | } else { | ||
1123 | for (i = 0; i < num_pages; i++) { | ||
1124 | /* ignore completely unchanged pages */ | ||
1125 | if (lazy_writeout_upper_idx && i == lazy_writeout_upper_idx) | ||
1126 | break; | ||
1091 | if (!(flags & BM_AIO_WRITE_ALL_PAGES) && | 1127 | if (!(flags & BM_AIO_WRITE_ALL_PAGES) && |
1092 | bm_test_page_unchanged(b->bm_pages[i])) { | 1128 | bm_test_page_unchanged(b->bm_pages[i])) { |
1093 | dynamic_drbd_dbg(device, "skipped bm write for idx %u\n", i); | 1129 | dynamic_drbd_dbg(device, "skipped bm write for idx %u\n", i); |
@@ -1100,11 +1136,11 @@ static int bm_rw(struct drbd_device *device, const unsigned int flags, unsigned | |||
1100 | dynamic_drbd_dbg(device, "skipped bm lazy write for idx %u\n", i); | 1136 | dynamic_drbd_dbg(device, "skipped bm lazy write for idx %u\n", i); |
1101 | continue; | 1137 | continue; |
1102 | } | 1138 | } |
1139 | atomic_inc(&ctx->in_flight); | ||
1140 | bm_page_io_async(ctx, i); | ||
1141 | ++count; | ||
1142 | cond_resched(); | ||
1103 | } | 1143 | } |
1104 | atomic_inc(&ctx->in_flight); | ||
1105 | bm_page_io_async(ctx, i); | ||
1106 | ++count; | ||
1107 | cond_resched(); | ||
1108 | } | 1144 | } |
1109 | 1145 | ||
1110 | /* | 1146 | /* |
@@ -1121,10 +1157,14 @@ static int bm_rw(struct drbd_device *device, const unsigned int flags, unsigned | |||
1121 | kref_put(&ctx->kref, &drbd_bm_aio_ctx_destroy); | 1157 | kref_put(&ctx->kref, &drbd_bm_aio_ctx_destroy); |
1122 | 1158 | ||
1123 | /* summary for global bitmap IO */ | 1159 | /* summary for global bitmap IO */ |
1124 | if (flags == 0) | 1160 | if (flags == 0) { |
1125 | drbd_info(device, "bitmap %s of %u pages took %lu jiffies\n", | 1161 | unsigned int ms = jiffies_to_msecs(jiffies - now); |
1126 | (flags & BM_AIO_READ) ? "READ" : "WRITE", | 1162 | if (ms > 5) { |
1127 | count, jiffies - now); | 1163 | drbd_info(device, "bitmap %s of %u pages took %u ms\n", |
1164 | (flags & BM_AIO_READ) ? "READ" : "WRITE", | ||
1165 | count, ms); | ||
1166 | } | ||
1167 | } | ||
1128 | 1168 | ||
1129 | if (ctx->error) { | 1169 | if (ctx->error) { |
1130 | drbd_alert(device, "we had at least one MD IO ERROR during bitmap IO\n"); | 1170 | drbd_alert(device, "we had at least one MD IO ERROR during bitmap IO\n"); |
diff --git a/drivers/block/drbd/drbd_debugfs.c b/drivers/block/drbd/drbd_debugfs.c index 4de95bbff486..be91a8d7c22a 100644 --- a/drivers/block/drbd/drbd_debugfs.c +++ b/drivers/block/drbd/drbd_debugfs.c | |||
@@ -237,14 +237,9 @@ static void seq_print_peer_request_flags(struct seq_file *m, struct drbd_peer_re | |||
237 | seq_print_rq_state_bit(m, f & EE_SEND_WRITE_ACK, &sep, "C"); | 237 | seq_print_rq_state_bit(m, f & EE_SEND_WRITE_ACK, &sep, "C"); |
238 | seq_print_rq_state_bit(m, f & EE_MAY_SET_IN_SYNC, &sep, "set-in-sync"); | 238 | seq_print_rq_state_bit(m, f & EE_MAY_SET_IN_SYNC, &sep, "set-in-sync"); |
239 | 239 | ||
240 | if (f & EE_IS_TRIM) { | 240 | if (f & EE_IS_TRIM) |
241 | seq_putc(m, sep); | 241 | __seq_print_rq_state_bit(m, f & EE_IS_TRIM_USE_ZEROOUT, &sep, "zero-out", "trim"); |
242 | sep = '|'; | 242 | seq_print_rq_state_bit(m, f & EE_WRITE_SAME, &sep, "write-same"); |
243 | if (f & EE_IS_TRIM_USE_ZEROOUT) | ||
244 | seq_puts(m, "zero-out"); | ||
245 | else | ||
246 | seq_puts(m, "trim"); | ||
247 | } | ||
248 | seq_putc(m, '\n'); | 243 | seq_putc(m, '\n'); |
249 | } | 244 | } |
250 | 245 | ||
@@ -908,7 +903,7 @@ static int drbd_version_open(struct inode *inode, struct file *file) | |||
908 | return single_open(file, drbd_version_show, NULL); | 903 | return single_open(file, drbd_version_show, NULL); |
909 | } | 904 | } |
910 | 905 | ||
911 | static struct file_operations drbd_version_fops = { | 906 | static const struct file_operations drbd_version_fops = { |
912 | .owner = THIS_MODULE, | 907 | .owner = THIS_MODULE, |
913 | .open = drbd_version_open, | 908 | .open = drbd_version_open, |
914 | .llseek = seq_lseek, | 909 | .llseek = seq_lseek, |
diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h index a64c645b4184..7b54354976a5 100644 --- a/drivers/block/drbd/drbd_int.h +++ b/drivers/block/drbd/drbd_int.h | |||
@@ -468,9 +468,15 @@ enum { | |||
468 | /* this is/was a write request */ | 468 | /* this is/was a write request */ |
469 | __EE_WRITE, | 469 | __EE_WRITE, |
470 | 470 | ||
471 | /* this is/was a write same request */ | ||
472 | __EE_WRITE_SAME, | ||
473 | |||
471 | /* this originates from application on peer | 474 | /* this originates from application on peer |
472 | * (not some resync or verify or other DRBD internal request) */ | 475 | * (not some resync or verify or other DRBD internal request) */ |
473 | __EE_APPLICATION, | 476 | __EE_APPLICATION, |
477 | |||
478 | /* If it contains only 0 bytes, send back P_RS_DEALLOCATED */ | ||
479 | __EE_RS_THIN_REQ, | ||
474 | }; | 480 | }; |
475 | #define EE_CALL_AL_COMPLETE_IO (1<<__EE_CALL_AL_COMPLETE_IO) | 481 | #define EE_CALL_AL_COMPLETE_IO (1<<__EE_CALL_AL_COMPLETE_IO) |
476 | #define EE_MAY_SET_IN_SYNC (1<<__EE_MAY_SET_IN_SYNC) | 482 | #define EE_MAY_SET_IN_SYNC (1<<__EE_MAY_SET_IN_SYNC) |
@@ -484,7 +490,9 @@ enum { | |||
484 | #define EE_IN_INTERVAL_TREE (1<<__EE_IN_INTERVAL_TREE) | 490 | #define EE_IN_INTERVAL_TREE (1<<__EE_IN_INTERVAL_TREE) |
485 | #define EE_SUBMITTED (1<<__EE_SUBMITTED) | 491 | #define EE_SUBMITTED (1<<__EE_SUBMITTED) |
486 | #define EE_WRITE (1<<__EE_WRITE) | 492 | #define EE_WRITE (1<<__EE_WRITE) |
493 | #define EE_WRITE_SAME (1<<__EE_WRITE_SAME) | ||
487 | #define EE_APPLICATION (1<<__EE_APPLICATION) | 494 | #define EE_APPLICATION (1<<__EE_APPLICATION) |
495 | #define EE_RS_THIN_REQ (1<<__EE_RS_THIN_REQ) | ||
488 | 496 | ||
489 | /* flag bits per device */ | 497 | /* flag bits per device */ |
490 | enum { | 498 | enum { |
@@ -1123,6 +1131,7 @@ extern int drbd_send_ov_request(struct drbd_peer_device *, sector_t sector, int | |||
1123 | extern int drbd_send_bitmap(struct drbd_device *device); | 1131 | extern int drbd_send_bitmap(struct drbd_device *device); |
1124 | extern void drbd_send_sr_reply(struct drbd_peer_device *, enum drbd_state_rv retcode); | 1132 | extern void drbd_send_sr_reply(struct drbd_peer_device *, enum drbd_state_rv retcode); |
1125 | extern void conn_send_sr_reply(struct drbd_connection *connection, enum drbd_state_rv retcode); | 1133 | extern void conn_send_sr_reply(struct drbd_connection *connection, enum drbd_state_rv retcode); |
1134 | extern int drbd_send_rs_deallocated(struct drbd_peer_device *, struct drbd_peer_request *); | ||
1126 | extern void drbd_backing_dev_free(struct drbd_device *device, struct drbd_backing_dev *ldev); | 1135 | extern void drbd_backing_dev_free(struct drbd_device *device, struct drbd_backing_dev *ldev); |
1127 | extern void drbd_device_cleanup(struct drbd_device *device); | 1136 | extern void drbd_device_cleanup(struct drbd_device *device); |
1128 | void drbd_print_uuids(struct drbd_device *device, const char *text); | 1137 | void drbd_print_uuids(struct drbd_device *device, const char *text); |
@@ -1342,11 +1351,11 @@ struct bm_extent { | |||
1342 | #define DRBD_MAX_SIZE_H80_PACKET (1U << 15) /* Header 80 only allows packets up to 32KiB data */ | 1351 | #define DRBD_MAX_SIZE_H80_PACKET (1U << 15) /* Header 80 only allows packets up to 32KiB data */ |
1343 | #define DRBD_MAX_BIO_SIZE_P95 (1U << 17) /* Protocol 95 to 99 allows bios up to 128KiB */ | 1352 | #define DRBD_MAX_BIO_SIZE_P95 (1U << 17) /* Protocol 95 to 99 allows bios up to 128KiB */ |
1344 | 1353 | ||
1345 | /* For now, don't allow more than one activity log extent worth of data | 1354 | /* For now, don't allow more than half of what we can "activate" in one |
1346 | * to be discarded in one go. We may need to rework drbd_al_begin_io() | 1355 | * activity log transaction to be discarded in one go. We may need to rework |
1347 | * to allow for even larger discard ranges */ | 1356 | * drbd_al_begin_io() to allow for even larger discard ranges */ |
1348 | #define DRBD_MAX_DISCARD_SIZE AL_EXTENT_SIZE | 1357 | #define DRBD_MAX_BATCH_BIO_SIZE (AL_UPDATES_PER_TRANSACTION/2*AL_EXTENT_SIZE) |
1349 | #define DRBD_MAX_DISCARD_SECTORS (DRBD_MAX_DISCARD_SIZE >> 9) | 1358 | #define DRBD_MAX_BBIO_SECTORS (DRBD_MAX_BATCH_BIO_SIZE >> 9) |
1350 | 1359 | ||
1351 | extern int drbd_bm_init(struct drbd_device *device); | 1360 | extern int drbd_bm_init(struct drbd_device *device); |
1352 | extern int drbd_bm_resize(struct drbd_device *device, sector_t sectors, int set_new_bits); | 1361 | extern int drbd_bm_resize(struct drbd_device *device, sector_t sectors, int set_new_bits); |
@@ -1369,6 +1378,7 @@ extern int drbd_bm_e_weight(struct drbd_device *device, unsigned long enr); | |||
1369 | extern int drbd_bm_read(struct drbd_device *device) __must_hold(local); | 1378 | extern int drbd_bm_read(struct drbd_device *device) __must_hold(local); |
1370 | extern void drbd_bm_mark_for_writeout(struct drbd_device *device, int page_nr); | 1379 | extern void drbd_bm_mark_for_writeout(struct drbd_device *device, int page_nr); |
1371 | extern int drbd_bm_write(struct drbd_device *device) __must_hold(local); | 1380 | extern int drbd_bm_write(struct drbd_device *device) __must_hold(local); |
1381 | extern void drbd_bm_reset_al_hints(struct drbd_device *device) __must_hold(local); | ||
1372 | extern int drbd_bm_write_hinted(struct drbd_device *device) __must_hold(local); | 1382 | extern int drbd_bm_write_hinted(struct drbd_device *device) __must_hold(local); |
1373 | extern int drbd_bm_write_lazy(struct drbd_device *device, unsigned upper_idx) __must_hold(local); | 1383 | extern int drbd_bm_write_lazy(struct drbd_device *device, unsigned upper_idx) __must_hold(local); |
1374 | extern int drbd_bm_write_all(struct drbd_device *device) __must_hold(local); | 1384 | extern int drbd_bm_write_all(struct drbd_device *device) __must_hold(local); |
@@ -1483,12 +1493,14 @@ enum determine_dev_size { | |||
1483 | extern enum determine_dev_size | 1493 | extern enum determine_dev_size |
1484 | drbd_determine_dev_size(struct drbd_device *, enum dds_flags, struct resize_parms *) __must_hold(local); | 1494 | drbd_determine_dev_size(struct drbd_device *, enum dds_flags, struct resize_parms *) __must_hold(local); |
1485 | extern void resync_after_online_grow(struct drbd_device *); | 1495 | extern void resync_after_online_grow(struct drbd_device *); |
1486 | extern void drbd_reconsider_max_bio_size(struct drbd_device *device, struct drbd_backing_dev *bdev); | 1496 | extern void drbd_reconsider_queue_parameters(struct drbd_device *device, |
1497 | struct drbd_backing_dev *bdev, struct o_qlim *o); | ||
1487 | extern enum drbd_state_rv drbd_set_role(struct drbd_device *device, | 1498 | extern enum drbd_state_rv drbd_set_role(struct drbd_device *device, |
1488 | enum drbd_role new_role, | 1499 | enum drbd_role new_role, |
1489 | int force); | 1500 | int force); |
1490 | extern bool conn_try_outdate_peer(struct drbd_connection *connection); | 1501 | extern bool conn_try_outdate_peer(struct drbd_connection *connection); |
1491 | extern void conn_try_outdate_peer_async(struct drbd_connection *connection); | 1502 | extern void conn_try_outdate_peer_async(struct drbd_connection *connection); |
1503 | extern enum drbd_peer_state conn_khelper(struct drbd_connection *connection, char *cmd); | ||
1492 | extern int drbd_khelper(struct drbd_device *device, char *cmd); | 1504 | extern int drbd_khelper(struct drbd_device *device, char *cmd); |
1493 | 1505 | ||
1494 | /* drbd_worker.c */ | 1506 | /* drbd_worker.c */ |
@@ -1548,6 +1560,8 @@ extern void start_resync_timer_fn(unsigned long data); | |||
1548 | extern void drbd_endio_write_sec_final(struct drbd_peer_request *peer_req); | 1560 | extern void drbd_endio_write_sec_final(struct drbd_peer_request *peer_req); |
1549 | 1561 | ||
1550 | /* drbd_receiver.c */ | 1562 | /* drbd_receiver.c */ |
1563 | extern int drbd_issue_discard_or_zero_out(struct drbd_device *device, | ||
1564 | sector_t start, unsigned int nr_sectors, bool discard); | ||
1551 | extern int drbd_receiver(struct drbd_thread *thi); | 1565 | extern int drbd_receiver(struct drbd_thread *thi); |
1552 | extern int drbd_ack_receiver(struct drbd_thread *thi); | 1566 | extern int drbd_ack_receiver(struct drbd_thread *thi); |
1553 | extern void drbd_send_ping_wf(struct work_struct *ws); | 1567 | extern void drbd_send_ping_wf(struct work_struct *ws); |
@@ -1561,7 +1575,7 @@ extern int drbd_submit_peer_request(struct drbd_device *, | |||
1561 | extern int drbd_free_peer_reqs(struct drbd_device *, struct list_head *); | 1575 | extern int drbd_free_peer_reqs(struct drbd_device *, struct list_head *); |
1562 | extern struct drbd_peer_request *drbd_alloc_peer_req(struct drbd_peer_device *, u64, | 1576 | extern struct drbd_peer_request *drbd_alloc_peer_req(struct drbd_peer_device *, u64, |
1563 | sector_t, unsigned int, | 1577 | sector_t, unsigned int, |
1564 | bool, | 1578 | unsigned int, |
1565 | gfp_t) __must_hold(local); | 1579 | gfp_t) __must_hold(local); |
1566 | extern void __drbd_free_peer_req(struct drbd_device *, struct drbd_peer_request *, | 1580 | extern void __drbd_free_peer_req(struct drbd_device *, struct drbd_peer_request *, |
1567 | int); | 1581 | int); |
@@ -1635,8 +1649,6 @@ void drbd_bump_write_ordering(struct drbd_resource *resource, struct drbd_backin | |||
1635 | /* drbd_proc.c */ | 1649 | /* drbd_proc.c */ |
1636 | extern struct proc_dir_entry *drbd_proc; | 1650 | extern struct proc_dir_entry *drbd_proc; |
1637 | extern const struct file_operations drbd_proc_fops; | 1651 | extern const struct file_operations drbd_proc_fops; |
1638 | extern const char *drbd_conn_str(enum drbd_conns s); | ||
1639 | extern const char *drbd_role_str(enum drbd_role s); | ||
1640 | 1652 | ||
1641 | /* drbd_actlog.c */ | 1653 | /* drbd_actlog.c */ |
1642 | extern bool drbd_al_begin_io_prepare(struct drbd_device *device, struct drbd_interval *i); | 1654 | extern bool drbd_al_begin_io_prepare(struct drbd_device *device, struct drbd_interval *i); |
@@ -2095,13 +2107,22 @@ static inline void _sub_unacked(struct drbd_device *device, int n, const char *f | |||
2095 | ERR_IF_CNT_IS_NEGATIVE(unacked_cnt, func, line); | 2107 | ERR_IF_CNT_IS_NEGATIVE(unacked_cnt, func, line); |
2096 | } | 2108 | } |
2097 | 2109 | ||
2110 | static inline bool is_sync_target_state(enum drbd_conns connection_state) | ||
2111 | { | ||
2112 | return connection_state == C_SYNC_TARGET || | ||
2113 | connection_state == C_PAUSED_SYNC_T; | ||
2114 | } | ||
2115 | |||
2116 | static inline bool is_sync_source_state(enum drbd_conns connection_state) | ||
2117 | { | ||
2118 | return connection_state == C_SYNC_SOURCE || | ||
2119 | connection_state == C_PAUSED_SYNC_S; | ||
2120 | } | ||
2121 | |||
2098 | static inline bool is_sync_state(enum drbd_conns connection_state) | 2122 | static inline bool is_sync_state(enum drbd_conns connection_state) |
2099 | { | 2123 | { |
2100 | return | 2124 | return is_sync_source_state(connection_state) || |
2101 | (connection_state == C_SYNC_SOURCE | 2125 | is_sync_target_state(connection_state); |
2102 | || connection_state == C_SYNC_TARGET | ||
2103 | || connection_state == C_PAUSED_SYNC_S | ||
2104 | || connection_state == C_PAUSED_SYNC_T); | ||
2105 | } | 2126 | } |
2106 | 2127 | ||
2107 | /** | 2128 | /** |
diff --git a/drivers/block/drbd/drbd_interval.h b/drivers/block/drbd/drbd_interval.h index f210543f05f4..23c5a94428d2 100644 --- a/drivers/block/drbd/drbd_interval.h +++ b/drivers/block/drbd/drbd_interval.h | |||
@@ -6,13 +6,13 @@ | |||
6 | 6 | ||
7 | struct drbd_interval { | 7 | struct drbd_interval { |
8 | struct rb_node rb; | 8 | struct rb_node rb; |
9 | sector_t sector; /* start sector of the interval */ | 9 | sector_t sector; /* start sector of the interval */ |
10 | unsigned int size; /* size in bytes */ | 10 | unsigned int size; /* size in bytes */ |
11 | sector_t end; /* highest interval end in subtree */ | 11 | sector_t end; /* highest interval end in subtree */ |
12 | int local:1 /* local or remote request? */; | 12 | unsigned int local:1 /* local or remote request? */; |
13 | int waiting:1; /* someone is waiting for this to complete */ | 13 | unsigned int waiting:1; /* someone is waiting for completion */ |
14 | int completed:1; /* this has been completed already; | 14 | unsigned int completed:1; /* this has been completed already; |
15 | * ignore for conflict detection */ | 15 | * ignore for conflict detection */ |
16 | }; | 16 | }; |
17 | 17 | ||
18 | static inline void drbd_clear_interval(struct drbd_interval *i) | 18 | static inline void drbd_clear_interval(struct drbd_interval *i) |
diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c index 2b37744db0fa..0501ae0c517b 100644 --- a/drivers/block/drbd/drbd_main.c +++ b/drivers/block/drbd/drbd_main.c | |||
@@ -31,7 +31,7 @@ | |||
31 | #include <linux/module.h> | 31 | #include <linux/module.h> |
32 | #include <linux/jiffies.h> | 32 | #include <linux/jiffies.h> |
33 | #include <linux/drbd.h> | 33 | #include <linux/drbd.h> |
34 | #include <asm/uaccess.h> | 34 | #include <linux/uaccess.h> |
35 | #include <asm/types.h> | 35 | #include <asm/types.h> |
36 | #include <net/sock.h> | 36 | #include <net/sock.h> |
37 | #include <linux/ctype.h> | 37 | #include <linux/ctype.h> |
@@ -920,6 +920,31 @@ void drbd_gen_and_send_sync_uuid(struct drbd_peer_device *peer_device) | |||
920 | } | 920 | } |
921 | } | 921 | } |
922 | 922 | ||
923 | /* communicated if (agreed_features & DRBD_FF_WSAME) */ | ||
924 | void assign_p_sizes_qlim(struct drbd_device *device, struct p_sizes *p, struct request_queue *q) | ||
925 | { | ||
926 | if (q) { | ||
927 | p->qlim->physical_block_size = cpu_to_be32(queue_physical_block_size(q)); | ||
928 | p->qlim->logical_block_size = cpu_to_be32(queue_logical_block_size(q)); | ||
929 | p->qlim->alignment_offset = cpu_to_be32(queue_alignment_offset(q)); | ||
930 | p->qlim->io_min = cpu_to_be32(queue_io_min(q)); | ||
931 | p->qlim->io_opt = cpu_to_be32(queue_io_opt(q)); | ||
932 | p->qlim->discard_enabled = blk_queue_discard(q); | ||
933 | p->qlim->discard_zeroes_data = queue_discard_zeroes_data(q); | ||
934 | p->qlim->write_same_capable = !!q->limits.max_write_same_sectors; | ||
935 | } else { | ||
936 | q = device->rq_queue; | ||
937 | p->qlim->physical_block_size = cpu_to_be32(queue_physical_block_size(q)); | ||
938 | p->qlim->logical_block_size = cpu_to_be32(queue_logical_block_size(q)); | ||
939 | p->qlim->alignment_offset = 0; | ||
940 | p->qlim->io_min = cpu_to_be32(queue_io_min(q)); | ||
941 | p->qlim->io_opt = cpu_to_be32(queue_io_opt(q)); | ||
942 | p->qlim->discard_enabled = 0; | ||
943 | p->qlim->discard_zeroes_data = 0; | ||
944 | p->qlim->write_same_capable = 0; | ||
945 | } | ||
946 | } | ||
947 | |||
923 | int drbd_send_sizes(struct drbd_peer_device *peer_device, int trigger_reply, enum dds_flags flags) | 948 | int drbd_send_sizes(struct drbd_peer_device *peer_device, int trigger_reply, enum dds_flags flags) |
924 | { | 949 | { |
925 | struct drbd_device *device = peer_device->device; | 950 | struct drbd_device *device = peer_device->device; |
@@ -928,29 +953,37 @@ int drbd_send_sizes(struct drbd_peer_device *peer_device, int trigger_reply, enu | |||
928 | sector_t d_size, u_size; | 953 | sector_t d_size, u_size; |
929 | int q_order_type; | 954 | int q_order_type; |
930 | unsigned int max_bio_size; | 955 | unsigned int max_bio_size; |
956 | unsigned int packet_size; | ||
957 | |||
958 | sock = &peer_device->connection->data; | ||
959 | p = drbd_prepare_command(peer_device, sock); | ||
960 | if (!p) | ||
961 | return -EIO; | ||
931 | 962 | ||
963 | packet_size = sizeof(*p); | ||
964 | if (peer_device->connection->agreed_features & DRBD_FF_WSAME) | ||
965 | packet_size += sizeof(p->qlim[0]); | ||
966 | |||
967 | memset(p, 0, packet_size); | ||
932 | if (get_ldev_if_state(device, D_NEGOTIATING)) { | 968 | if (get_ldev_if_state(device, D_NEGOTIATING)) { |
933 | D_ASSERT(device, device->ldev->backing_bdev); | 969 | struct request_queue *q = bdev_get_queue(device->ldev->backing_bdev); |
934 | d_size = drbd_get_max_capacity(device->ldev); | 970 | d_size = drbd_get_max_capacity(device->ldev); |
935 | rcu_read_lock(); | 971 | rcu_read_lock(); |
936 | u_size = rcu_dereference(device->ldev->disk_conf)->disk_size; | 972 | u_size = rcu_dereference(device->ldev->disk_conf)->disk_size; |
937 | rcu_read_unlock(); | 973 | rcu_read_unlock(); |
938 | q_order_type = drbd_queue_order_type(device); | 974 | q_order_type = drbd_queue_order_type(device); |
939 | max_bio_size = queue_max_hw_sectors(device->ldev->backing_bdev->bd_disk->queue) << 9; | 975 | max_bio_size = queue_max_hw_sectors(q) << 9; |
940 | max_bio_size = min(max_bio_size, DRBD_MAX_BIO_SIZE); | 976 | max_bio_size = min(max_bio_size, DRBD_MAX_BIO_SIZE); |
977 | assign_p_sizes_qlim(device, p, q); | ||
941 | put_ldev(device); | 978 | put_ldev(device); |
942 | } else { | 979 | } else { |
943 | d_size = 0; | 980 | d_size = 0; |
944 | u_size = 0; | 981 | u_size = 0; |
945 | q_order_type = QUEUE_ORDERED_NONE; | 982 | q_order_type = QUEUE_ORDERED_NONE; |
946 | max_bio_size = DRBD_MAX_BIO_SIZE; /* ... multiple BIOs per peer_request */ | 983 | max_bio_size = DRBD_MAX_BIO_SIZE; /* ... multiple BIOs per peer_request */ |
984 | assign_p_sizes_qlim(device, p, NULL); | ||
947 | } | 985 | } |
948 | 986 | ||
949 | sock = &peer_device->connection->data; | ||
950 | p = drbd_prepare_command(peer_device, sock); | ||
951 | if (!p) | ||
952 | return -EIO; | ||
953 | |||
954 | if (peer_device->connection->agreed_pro_version <= 94) | 987 | if (peer_device->connection->agreed_pro_version <= 94) |
955 | max_bio_size = min(max_bio_size, DRBD_MAX_SIZE_H80_PACKET); | 988 | max_bio_size = min(max_bio_size, DRBD_MAX_SIZE_H80_PACKET); |
956 | else if (peer_device->connection->agreed_pro_version < 100) | 989 | else if (peer_device->connection->agreed_pro_version < 100) |
@@ -962,7 +995,8 @@ int drbd_send_sizes(struct drbd_peer_device *peer_device, int trigger_reply, enu | |||
962 | p->max_bio_size = cpu_to_be32(max_bio_size); | 995 | p->max_bio_size = cpu_to_be32(max_bio_size); |
963 | p->queue_order_type = cpu_to_be16(q_order_type); | 996 | p->queue_order_type = cpu_to_be16(q_order_type); |
964 | p->dds_flags = cpu_to_be16(flags); | 997 | p->dds_flags = cpu_to_be16(flags); |
965 | return drbd_send_command(peer_device, sock, P_SIZES, sizeof(*p), NULL, 0); | 998 | |
999 | return drbd_send_command(peer_device, sock, P_SIZES, packet_size, NULL, 0); | ||
966 | } | 1000 | } |
967 | 1001 | ||
968 | /** | 1002 | /** |
@@ -1377,6 +1411,22 @@ int drbd_send_ack_ex(struct drbd_peer_device *peer_device, enum drbd_packet cmd, | |||
1377 | cpu_to_be64(block_id)); | 1411 | cpu_to_be64(block_id)); |
1378 | } | 1412 | } |
1379 | 1413 | ||
1414 | int drbd_send_rs_deallocated(struct drbd_peer_device *peer_device, | ||
1415 | struct drbd_peer_request *peer_req) | ||
1416 | { | ||
1417 | struct drbd_socket *sock; | ||
1418 | struct p_block_desc *p; | ||
1419 | |||
1420 | sock = &peer_device->connection->data; | ||
1421 | p = drbd_prepare_command(peer_device, sock); | ||
1422 | if (!p) | ||
1423 | return -EIO; | ||
1424 | p->sector = cpu_to_be64(peer_req->i.sector); | ||
1425 | p->blksize = cpu_to_be32(peer_req->i.size); | ||
1426 | p->pad = 0; | ||
1427 | return drbd_send_command(peer_device, sock, P_RS_DEALLOCATED, sizeof(*p), NULL, 0); | ||
1428 | } | ||
1429 | |||
1380 | int drbd_send_drequest(struct drbd_peer_device *peer_device, int cmd, | 1430 | int drbd_send_drequest(struct drbd_peer_device *peer_device, int cmd, |
1381 | sector_t sector, int size, u64 block_id) | 1431 | sector_t sector, int size, u64 block_id) |
1382 | { | 1432 | { |
@@ -1561,6 +1611,9 @@ static int _drbd_send_bio(struct drbd_peer_device *peer_device, struct bio *bio) | |||
1561 | ? 0 : MSG_MORE); | 1611 | ? 0 : MSG_MORE); |
1562 | if (err) | 1612 | if (err) |
1563 | return err; | 1613 | return err; |
1614 | /* REQ_OP_WRITE_SAME has only one segment */ | ||
1615 | if (bio_op(bio) == REQ_OP_WRITE_SAME) | ||
1616 | break; | ||
1564 | } | 1617 | } |
1565 | return 0; | 1618 | return 0; |
1566 | } | 1619 | } |
@@ -1579,6 +1632,9 @@ static int _drbd_send_zc_bio(struct drbd_peer_device *peer_device, struct bio *b | |||
1579 | bio_iter_last(bvec, iter) ? 0 : MSG_MORE); | 1632 | bio_iter_last(bvec, iter) ? 0 : MSG_MORE); |
1580 | if (err) | 1633 | if (err) |
1581 | return err; | 1634 | return err; |
1635 | /* REQ_OP_WRITE_SAME has only one segment */ | ||
1636 | if (bio_op(bio) == REQ_OP_WRITE_SAME) | ||
1637 | break; | ||
1582 | } | 1638 | } |
1583 | return 0; | 1639 | return 0; |
1584 | } | 1640 | } |
@@ -1610,6 +1666,7 @@ static u32 bio_flags_to_wire(struct drbd_connection *connection, | |||
1610 | return (bio->bi_rw & REQ_SYNC ? DP_RW_SYNC : 0) | | 1666 | return (bio->bi_rw & REQ_SYNC ? DP_RW_SYNC : 0) | |
1611 | (bio->bi_rw & REQ_FUA ? DP_FUA : 0) | | 1667 | (bio->bi_rw & REQ_FUA ? DP_FUA : 0) | |
1612 | (bio->bi_rw & REQ_PREFLUSH ? DP_FLUSH : 0) | | 1668 | (bio->bi_rw & REQ_PREFLUSH ? DP_FLUSH : 0) | |
1669 | (bio_op(bio) == REQ_OP_WRITE_SAME ? DP_WSAME : 0) | | ||
1613 | (bio_op(bio) == REQ_OP_DISCARD ? DP_DISCARD : 0); | 1670 | (bio_op(bio) == REQ_OP_DISCARD ? DP_DISCARD : 0); |
1614 | else | 1671 | else |
1615 | return bio->bi_rw & REQ_SYNC ? DP_RW_SYNC : 0; | 1672 | return bio->bi_rw & REQ_SYNC ? DP_RW_SYNC : 0; |
@@ -1623,6 +1680,8 @@ int drbd_send_dblock(struct drbd_peer_device *peer_device, struct drbd_request * | |||
1623 | struct drbd_device *device = peer_device->device; | 1680 | struct drbd_device *device = peer_device->device; |
1624 | struct drbd_socket *sock; | 1681 | struct drbd_socket *sock; |
1625 | struct p_data *p; | 1682 | struct p_data *p; |
1683 | struct p_wsame *wsame = NULL; | ||
1684 | void *digest_out; | ||
1626 | unsigned int dp_flags = 0; | 1685 | unsigned int dp_flags = 0; |
1627 | int digest_size; | 1686 | int digest_size; |
1628 | int err; | 1687 | int err; |
@@ -1658,12 +1717,29 @@ int drbd_send_dblock(struct drbd_peer_device *peer_device, struct drbd_request * | |||
1658 | err = __send_command(peer_device->connection, device->vnr, sock, P_TRIM, sizeof(*t), NULL, 0); | 1717 | err = __send_command(peer_device->connection, device->vnr, sock, P_TRIM, sizeof(*t), NULL, 0); |
1659 | goto out; | 1718 | goto out; |
1660 | } | 1719 | } |
1720 | if (dp_flags & DP_WSAME) { | ||
1721 | /* this will only work if DRBD_FF_WSAME is set AND the | ||
1722 | * handshake agreed that all nodes and backend devices are | ||
1723 | * WRITE_SAME capable and agree on logical_block_size */ | ||
1724 | wsame = (struct p_wsame*)p; | ||
1725 | digest_out = wsame + 1; | ||
1726 | wsame->size = cpu_to_be32(req->i.size); | ||
1727 | } else | ||
1728 | digest_out = p + 1; | ||
1661 | 1729 | ||
1662 | /* our digest is still only over the payload. | 1730 | /* our digest is still only over the payload. |
1663 | * TRIM does not carry any payload. */ | 1731 | * TRIM does not carry any payload. */ |
1664 | if (digest_size) | 1732 | if (digest_size) |
1665 | drbd_csum_bio(peer_device->connection->integrity_tfm, req->master_bio, p + 1); | 1733 | drbd_csum_bio(peer_device->connection->integrity_tfm, req->master_bio, digest_out); |
1666 | err = __send_command(peer_device->connection, device->vnr, sock, P_DATA, sizeof(*p) + digest_size, NULL, req->i.size); | 1734 | if (wsame) { |
1735 | err = | ||
1736 | __send_command(peer_device->connection, device->vnr, sock, P_WSAME, | ||
1737 | sizeof(*wsame) + digest_size, NULL, | ||
1738 | bio_iovec(req->master_bio).bv_len); | ||
1739 | } else | ||
1740 | err = | ||
1741 | __send_command(peer_device->connection, device->vnr, sock, P_DATA, | ||
1742 | sizeof(*p) + digest_size, NULL, req->i.size); | ||
1667 | if (!err) { | 1743 | if (!err) { |
1668 | /* For protocol A, we have to memcpy the payload into | 1744 | /* For protocol A, we have to memcpy the payload into |
1669 | * socket buffers, as we may complete right away | 1745 | * socket buffers, as we may complete right away |
@@ -3507,7 +3583,12 @@ static int w_bitmap_io(struct drbd_work *w, int unused) | |||
3507 | struct bm_io_work *work = &device->bm_io_work; | 3583 | struct bm_io_work *work = &device->bm_io_work; |
3508 | int rv = -EIO; | 3584 | int rv = -EIO; |
3509 | 3585 | ||
3510 | D_ASSERT(device, atomic_read(&device->ap_bio_cnt) == 0); | 3586 | if (work->flags != BM_LOCKED_CHANGE_ALLOWED) { |
3587 | int cnt = atomic_read(&device->ap_bio_cnt); | ||
3588 | if (cnt) | ||
3589 | drbd_err(device, "FIXME: ap_bio_cnt %d, expected 0; queued for '%s'\n", | ||
3590 | cnt, work->why); | ||
3591 | } | ||
3511 | 3592 | ||
3512 | if (get_ldev(device)) { | 3593 | if (get_ldev(device)) { |
3513 | drbd_bm_lock(device, work->why, work->flags); | 3594 | drbd_bm_lock(device, work->why, work->flags); |
@@ -3587,18 +3668,20 @@ void drbd_queue_bitmap_io(struct drbd_device *device, | |||
3587 | int drbd_bitmap_io(struct drbd_device *device, int (*io_fn)(struct drbd_device *), | 3668 | int drbd_bitmap_io(struct drbd_device *device, int (*io_fn)(struct drbd_device *), |
3588 | char *why, enum bm_flag flags) | 3669 | char *why, enum bm_flag flags) |
3589 | { | 3670 | { |
3671 | /* Only suspend io, if some operation is supposed to be locked out */ | ||
3672 | const bool do_suspend_io = flags & (BM_DONT_CLEAR|BM_DONT_SET|BM_DONT_TEST); | ||
3590 | int rv; | 3673 | int rv; |
3591 | 3674 | ||
3592 | D_ASSERT(device, current != first_peer_device(device)->connection->worker.task); | 3675 | D_ASSERT(device, current != first_peer_device(device)->connection->worker.task); |
3593 | 3676 | ||
3594 | if ((flags & BM_LOCKED_SET_ALLOWED) == 0) | 3677 | if (do_suspend_io) |
3595 | drbd_suspend_io(device); | 3678 | drbd_suspend_io(device); |
3596 | 3679 | ||
3597 | drbd_bm_lock(device, why, flags); | 3680 | drbd_bm_lock(device, why, flags); |
3598 | rv = io_fn(device); | 3681 | rv = io_fn(device); |
3599 | drbd_bm_unlock(device); | 3682 | drbd_bm_unlock(device); |
3600 | 3683 | ||
3601 | if ((flags & BM_LOCKED_SET_ALLOWED) == 0) | 3684 | if (do_suspend_io) |
3602 | drbd_resume_io(device); | 3685 | drbd_resume_io(device); |
3603 | 3686 | ||
3604 | return rv; | 3687 | return rv; |
@@ -3637,6 +3720,8 @@ const char *cmdname(enum drbd_packet cmd) | |||
3637 | * one PRO_VERSION */ | 3720 | * one PRO_VERSION */ |
3638 | static const char *cmdnames[] = { | 3721 | static const char *cmdnames[] = { |
3639 | [P_DATA] = "Data", | 3722 | [P_DATA] = "Data", |
3723 | [P_WSAME] = "WriteSame", | ||
3724 | [P_TRIM] = "Trim", | ||
3640 | [P_DATA_REPLY] = "DataReply", | 3725 | [P_DATA_REPLY] = "DataReply", |
3641 | [P_RS_DATA_REPLY] = "RSDataReply", | 3726 | [P_RS_DATA_REPLY] = "RSDataReply", |
3642 | [P_BARRIER] = "Barrier", | 3727 | [P_BARRIER] = "Barrier", |
@@ -3681,6 +3766,8 @@ const char *cmdname(enum drbd_packet cmd) | |||
3681 | [P_CONN_ST_CHG_REPLY] = "conn_st_chg_reply", | 3766 | [P_CONN_ST_CHG_REPLY] = "conn_st_chg_reply", |
3682 | [P_RETRY_WRITE] = "retry_write", | 3767 | [P_RETRY_WRITE] = "retry_write", |
3683 | [P_PROTOCOL_UPDATE] = "protocol_update", | 3768 | [P_PROTOCOL_UPDATE] = "protocol_update", |
3769 | [P_RS_THIN_REQ] = "rs_thin_req", | ||
3770 | [P_RS_DEALLOCATED] = "rs_deallocated", | ||
3684 | 3771 | ||
3685 | /* enum drbd_packet, but not commands - obsoleted flags: | 3772 | /* enum drbd_packet, but not commands - obsoleted flags: |
3686 | * P_MAY_IGNORE | 3773 | * P_MAY_IGNORE |
diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c index 0bac9c8246bc..f35db29cac76 100644 --- a/drivers/block/drbd/drbd_nl.c +++ b/drivers/block/drbd/drbd_nl.c | |||
@@ -343,7 +343,7 @@ int drbd_khelper(struct drbd_device *device, char *cmd) | |||
343 | (char[20]) { }, /* address family */ | 343 | (char[20]) { }, /* address family */ |
344 | (char[60]) { }, /* address */ | 344 | (char[60]) { }, /* address */ |
345 | NULL }; | 345 | NULL }; |
346 | char mb[12]; | 346 | char mb[14]; |
347 | char *argv[] = {usermode_helper, cmd, mb, NULL }; | 347 | char *argv[] = {usermode_helper, cmd, mb, NULL }; |
348 | struct drbd_connection *connection = first_peer_device(device)->connection; | 348 | struct drbd_connection *connection = first_peer_device(device)->connection; |
349 | struct sib_info sib; | 349 | struct sib_info sib; |
@@ -352,7 +352,7 @@ int drbd_khelper(struct drbd_device *device, char *cmd) | |||
352 | if (current == connection->worker.task) | 352 | if (current == connection->worker.task) |
353 | set_bit(CALLBACK_PENDING, &connection->flags); | 353 | set_bit(CALLBACK_PENDING, &connection->flags); |
354 | 354 | ||
355 | snprintf(mb, 12, "minor-%d", device_to_minor(device)); | 355 | snprintf(mb, 14, "minor-%d", device_to_minor(device)); |
356 | setup_khelper_env(connection, envp); | 356 | setup_khelper_env(connection, envp); |
357 | 357 | ||
358 | /* The helper may take some time. | 358 | /* The helper may take some time. |
@@ -387,7 +387,7 @@ int drbd_khelper(struct drbd_device *device, char *cmd) | |||
387 | return ret; | 387 | return ret; |
388 | } | 388 | } |
389 | 389 | ||
390 | static int conn_khelper(struct drbd_connection *connection, char *cmd) | 390 | enum drbd_peer_state conn_khelper(struct drbd_connection *connection, char *cmd) |
391 | { | 391 | { |
392 | char *envp[] = { "HOME=/", | 392 | char *envp[] = { "HOME=/", |
393 | "TERM=linux", | 393 | "TERM=linux", |
@@ -442,19 +442,17 @@ static enum drbd_fencing_p highest_fencing_policy(struct drbd_connection *connec | |||
442 | } | 442 | } |
443 | rcu_read_unlock(); | 443 | rcu_read_unlock(); |
444 | 444 | ||
445 | if (fp == FP_NOT_AVAIL) { | ||
446 | /* IO Suspending works on the whole resource. | ||
447 | Do it only for one device. */ | ||
448 | vnr = 0; | ||
449 | peer_device = idr_get_next(&connection->peer_devices, &vnr); | ||
450 | drbd_change_state(peer_device->device, CS_VERBOSE | CS_HARD, NS(susp_fen, 0)); | ||
451 | } | ||
452 | |||
453 | return fp; | 445 | return fp; |
454 | } | 446 | } |
455 | 447 | ||
448 | static bool resource_is_supended(struct drbd_resource *resource) | ||
449 | { | ||
450 | return resource->susp || resource->susp_fen || resource->susp_nod; | ||
451 | } | ||
452 | |||
456 | bool conn_try_outdate_peer(struct drbd_connection *connection) | 453 | bool conn_try_outdate_peer(struct drbd_connection *connection) |
457 | { | 454 | { |
455 | struct drbd_resource * const resource = connection->resource; | ||
458 | unsigned int connect_cnt; | 456 | unsigned int connect_cnt; |
459 | union drbd_state mask = { }; | 457 | union drbd_state mask = { }; |
460 | union drbd_state val = { }; | 458 | union drbd_state val = { }; |
@@ -462,21 +460,41 @@ bool conn_try_outdate_peer(struct drbd_connection *connection) | |||
462 | char *ex_to_string; | 460 | char *ex_to_string; |
463 | int r; | 461 | int r; |
464 | 462 | ||
465 | spin_lock_irq(&connection->resource->req_lock); | 463 | spin_lock_irq(&resource->req_lock); |
466 | if (connection->cstate >= C_WF_REPORT_PARAMS) { | 464 | if (connection->cstate >= C_WF_REPORT_PARAMS) { |
467 | drbd_err(connection, "Expected cstate < C_WF_REPORT_PARAMS\n"); | 465 | drbd_err(connection, "Expected cstate < C_WF_REPORT_PARAMS\n"); |
468 | spin_unlock_irq(&connection->resource->req_lock); | 466 | spin_unlock_irq(&resource->req_lock); |
469 | return false; | 467 | return false; |
470 | } | 468 | } |
471 | 469 | ||
472 | connect_cnt = connection->connect_cnt; | 470 | connect_cnt = connection->connect_cnt; |
473 | spin_unlock_irq(&connection->resource->req_lock); | 471 | spin_unlock_irq(&resource->req_lock); |
474 | 472 | ||
475 | fp = highest_fencing_policy(connection); | 473 | fp = highest_fencing_policy(connection); |
476 | switch (fp) { | 474 | switch (fp) { |
477 | case FP_NOT_AVAIL: | 475 | case FP_NOT_AVAIL: |
478 | drbd_warn(connection, "Not fencing peer, I'm not even Consistent myself.\n"); | 476 | drbd_warn(connection, "Not fencing peer, I'm not even Consistent myself.\n"); |
479 | goto out; | 477 | spin_lock_irq(&resource->req_lock); |
478 | if (connection->cstate < C_WF_REPORT_PARAMS) { | ||
479 | _conn_request_state(connection, | ||
480 | (union drbd_state) { { .susp_fen = 1 } }, | ||
481 | (union drbd_state) { { .susp_fen = 0 } }, | ||
482 | CS_VERBOSE | CS_HARD | CS_DC_SUSP); | ||
483 | /* We are no longer suspended due to the fencing policy. | ||
484 | * We may still be suspended due to the on-no-data-accessible policy. | ||
485 | * If that was OND_IO_ERROR, fail pending requests. */ | ||
486 | if (!resource_is_supended(resource)) | ||
487 | _tl_restart(connection, CONNECTION_LOST_WHILE_PENDING); | ||
488 | } | ||
489 | /* Else: in case we raced with a connection handshake, | ||
490 | * let the handshake figure out if we maybe can RESEND, | ||
491 | * and do not resume/fail pending requests here. | ||
492 | * Worst case is we stay suspended for now, which may be | ||
493 | * resolved by either re-establishing the replication link, or | ||
494 | * the next link failure, or eventually the administrator. */ | ||
495 | spin_unlock_irq(&resource->req_lock); | ||
496 | return false; | ||
497 | |||
480 | case FP_DONT_CARE: | 498 | case FP_DONT_CARE: |
481 | return true; | 499 | return true; |
482 | default: ; | 500 | default: ; |
@@ -485,17 +503,17 @@ bool conn_try_outdate_peer(struct drbd_connection *connection) | |||
485 | r = conn_khelper(connection, "fence-peer"); | 503 | r = conn_khelper(connection, "fence-peer"); |
486 | 504 | ||
487 | switch ((r>>8) & 0xff) { | 505 | switch ((r>>8) & 0xff) { |
488 | case 3: /* peer is inconsistent */ | 506 | case P_INCONSISTENT: /* peer is inconsistent */ |
489 | ex_to_string = "peer is inconsistent or worse"; | 507 | ex_to_string = "peer is inconsistent or worse"; |
490 | mask.pdsk = D_MASK; | 508 | mask.pdsk = D_MASK; |
491 | val.pdsk = D_INCONSISTENT; | 509 | val.pdsk = D_INCONSISTENT; |
492 | break; | 510 | break; |
493 | case 4: /* peer got outdated, or was already outdated */ | 511 | case P_OUTDATED: /* peer got outdated, or was already outdated */ |
494 | ex_to_string = "peer was fenced"; | 512 | ex_to_string = "peer was fenced"; |
495 | mask.pdsk = D_MASK; | 513 | mask.pdsk = D_MASK; |
496 | val.pdsk = D_OUTDATED; | 514 | val.pdsk = D_OUTDATED; |
497 | break; | 515 | break; |
498 | case 5: /* peer was down */ | 516 | case P_DOWN: /* peer was down */ |
499 | if (conn_highest_disk(connection) == D_UP_TO_DATE) { | 517 | if (conn_highest_disk(connection) == D_UP_TO_DATE) { |
500 | /* we will(have) create(d) a new UUID anyways... */ | 518 | /* we will(have) create(d) a new UUID anyways... */ |
501 | ex_to_string = "peer is unreachable, assumed to be dead"; | 519 | ex_to_string = "peer is unreachable, assumed to be dead"; |
@@ -505,7 +523,7 @@ bool conn_try_outdate_peer(struct drbd_connection *connection) | |||
505 | ex_to_string = "peer unreachable, doing nothing since disk != UpToDate"; | 523 | ex_to_string = "peer unreachable, doing nothing since disk != UpToDate"; |
506 | } | 524 | } |
507 | break; | 525 | break; |
508 | case 6: /* Peer is primary, voluntarily outdate myself. | 526 | case P_PRIMARY: /* Peer is primary, voluntarily outdate myself. |
509 | * This is useful when an unconnected R_SECONDARY is asked to | 527 | * This is useful when an unconnected R_SECONDARY is asked to |
510 | * become R_PRIMARY, but finds the other peer being active. */ | 528 | * become R_PRIMARY, but finds the other peer being active. */ |
511 | ex_to_string = "peer is active"; | 529 | ex_to_string = "peer is active"; |
@@ -513,7 +531,9 @@ bool conn_try_outdate_peer(struct drbd_connection *connection) | |||
513 | mask.disk = D_MASK; | 531 | mask.disk = D_MASK; |
514 | val.disk = D_OUTDATED; | 532 | val.disk = D_OUTDATED; |
515 | break; | 533 | break; |
516 | case 7: | 534 | case P_FENCING: |
535 | /* THINK: do we need to handle this | ||
536 | * like case 4, or more like case 5? */ | ||
517 | if (fp != FP_STONITH) | 537 | if (fp != FP_STONITH) |
518 | drbd_err(connection, "fence-peer() = 7 && fencing != Stonith !!!\n"); | 538 | drbd_err(connection, "fence-peer() = 7 && fencing != Stonith !!!\n"); |
519 | ex_to_string = "peer was stonithed"; | 539 | ex_to_string = "peer was stonithed"; |
@@ -529,13 +549,11 @@ bool conn_try_outdate_peer(struct drbd_connection *connection) | |||
529 | drbd_info(connection, "fence-peer helper returned %d (%s)\n", | 549 | drbd_info(connection, "fence-peer helper returned %d (%s)\n", |
530 | (r>>8) & 0xff, ex_to_string); | 550 | (r>>8) & 0xff, ex_to_string); |
531 | 551 | ||
532 | out: | ||
533 | |||
534 | /* Not using | 552 | /* Not using |
535 | conn_request_state(connection, mask, val, CS_VERBOSE); | 553 | conn_request_state(connection, mask, val, CS_VERBOSE); |
536 | here, because we might were able to re-establish the connection in the | 554 | here, because we might were able to re-establish the connection in the |
537 | meantime. */ | 555 | meantime. */ |
538 | spin_lock_irq(&connection->resource->req_lock); | 556 | spin_lock_irq(&resource->req_lock); |
539 | if (connection->cstate < C_WF_REPORT_PARAMS && !test_bit(STATE_SENT, &connection->flags)) { | 557 | if (connection->cstate < C_WF_REPORT_PARAMS && !test_bit(STATE_SENT, &connection->flags)) { |
540 | if (connection->connect_cnt != connect_cnt) | 558 | if (connection->connect_cnt != connect_cnt) |
541 | /* In case the connection was established and droped | 559 | /* In case the connection was established and droped |
@@ -544,7 +562,7 @@ bool conn_try_outdate_peer(struct drbd_connection *connection) | |||
544 | else | 562 | else |
545 | _conn_request_state(connection, mask, val, CS_VERBOSE); | 563 | _conn_request_state(connection, mask, val, CS_VERBOSE); |
546 | } | 564 | } |
547 | spin_unlock_irq(&connection->resource->req_lock); | 565 | spin_unlock_irq(&resource->req_lock); |
548 | 566 | ||
549 | return conn_highest_pdsk(connection) <= D_OUTDATED; | 567 | return conn_highest_pdsk(connection) <= D_OUTDATED; |
550 | } | 568 | } |
@@ -1154,51 +1172,160 @@ static int drbd_check_al_size(struct drbd_device *device, struct disk_conf *dc) | |||
1154 | return 0; | 1172 | return 0; |
1155 | } | 1173 | } |
1156 | 1174 | ||
1175 | static void blk_queue_discard_granularity(struct request_queue *q, unsigned int granularity) | ||
1176 | { | ||
1177 | q->limits.discard_granularity = granularity; | ||
1178 | } | ||
1179 | |||
1180 | static unsigned int drbd_max_discard_sectors(struct drbd_connection *connection) | ||
1181 | { | ||
1182 | /* when we introduced REQ_WRITE_SAME support, we also bumped | ||
1183 | * our maximum supported batch bio size used for discards. */ | ||
1184 | if (connection->agreed_features & DRBD_FF_WSAME) | ||
1185 | return DRBD_MAX_BBIO_SECTORS; | ||
1186 | /* before, with DRBD <= 8.4.6, we only allowed up to one AL_EXTENT_SIZE. */ | ||
1187 | return AL_EXTENT_SIZE >> 9; | ||
1188 | } | ||
1189 | |||
1190 | static void decide_on_discard_support(struct drbd_device *device, | ||
1191 | struct request_queue *q, | ||
1192 | struct request_queue *b, | ||
1193 | bool discard_zeroes_if_aligned) | ||
1194 | { | ||
1195 | /* q = drbd device queue (device->rq_queue) | ||
1196 | * b = backing device queue (device->ldev->backing_bdev->bd_disk->queue), | ||
1197 | * or NULL if diskless | ||
1198 | */ | ||
1199 | struct drbd_connection *connection = first_peer_device(device)->connection; | ||
1200 | bool can_do = b ? blk_queue_discard(b) : true; | ||
1201 | |||
1202 | if (can_do && b && !b->limits.discard_zeroes_data && !discard_zeroes_if_aligned) { | ||
1203 | can_do = false; | ||
1204 | drbd_info(device, "discard_zeroes_data=0 and discard_zeroes_if_aligned=no: disabling discards\n"); | ||
1205 | } | ||
1206 | if (can_do && connection->cstate >= C_CONNECTED && !(connection->agreed_features & DRBD_FF_TRIM)) { | ||
1207 | can_do = false; | ||
1208 | drbd_info(connection, "peer DRBD too old, does not support TRIM: disabling discards\n"); | ||
1209 | } | ||
1210 | if (can_do) { | ||
1211 | /* We don't care for the granularity, really. | ||
1212 | * Stacking limits below should fix it for the local | ||
1213 | * device. Whether or not it is a suitable granularity | ||
1214 | * on the remote device is not our problem, really. If | ||
1215 | * you care, you need to use devices with similar | ||
1216 | * topology on all peers. */ | ||
1217 | blk_queue_discard_granularity(q, 512); | ||
1218 | q->limits.max_discard_sectors = drbd_max_discard_sectors(connection); | ||
1219 | queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, q); | ||
1220 | } else { | ||
1221 | queue_flag_clear_unlocked(QUEUE_FLAG_DISCARD, q); | ||
1222 | blk_queue_discard_granularity(q, 0); | ||
1223 | q->limits.max_discard_sectors = 0; | ||
1224 | } | ||
1225 | } | ||
1226 | |||
1227 | static void fixup_discard_if_not_supported(struct request_queue *q) | ||
1228 | { | ||
1229 | /* To avoid confusion, if this queue does not support discard, clear | ||
1230 | * max_discard_sectors, which is what lsblk -D reports to the user. | ||
1231 | * Older kernels got this wrong in "stack limits". | ||
1232 | * */ | ||
1233 | if (!blk_queue_discard(q)) { | ||
1234 | blk_queue_max_discard_sectors(q, 0); | ||
1235 | blk_queue_discard_granularity(q, 0); | ||
1236 | } | ||
1237 | } | ||
1238 | |||
1239 | static void decide_on_write_same_support(struct drbd_device *device, | ||
1240 | struct request_queue *q, | ||
1241 | struct request_queue *b, struct o_qlim *o) | ||
1242 | { | ||
1243 | struct drbd_peer_device *peer_device = first_peer_device(device); | ||
1244 | struct drbd_connection *connection = peer_device->connection; | ||
1245 | bool can_do = b ? b->limits.max_write_same_sectors : true; | ||
1246 | |||
1247 | if (can_do && connection->cstate >= C_CONNECTED && !(connection->agreed_features & DRBD_FF_WSAME)) { | ||
1248 | can_do = false; | ||
1249 | drbd_info(peer_device, "peer does not support WRITE_SAME\n"); | ||
1250 | } | ||
1251 | |||
1252 | if (o) { | ||
1253 | /* logical block size; queue_logical_block_size(NULL) is 512 */ | ||
1254 | unsigned int peer_lbs = be32_to_cpu(o->logical_block_size); | ||
1255 | unsigned int me_lbs_b = queue_logical_block_size(b); | ||
1256 | unsigned int me_lbs = queue_logical_block_size(q); | ||
1257 | |||
1258 | if (me_lbs_b != me_lbs) { | ||
1259 | drbd_warn(device, | ||
1260 | "logical block size of local backend does not match (drbd:%u, backend:%u); was this a late attach?\n", | ||
1261 | me_lbs, me_lbs_b); | ||
1262 | /* rather disable write same than trigger some BUG_ON later in the scsi layer. */ | ||
1263 | can_do = false; | ||
1264 | } | ||
1265 | if (me_lbs_b != peer_lbs) { | ||
1266 | drbd_warn(peer_device, "logical block sizes do not match (me:%u, peer:%u); this may cause problems.\n", | ||
1267 | me_lbs, peer_lbs); | ||
1268 | if (can_do) { | ||
1269 | drbd_dbg(peer_device, "logical block size mismatch: WRITE_SAME disabled.\n"); | ||
1270 | can_do = false; | ||
1271 | } | ||
1272 | me_lbs = max(me_lbs, me_lbs_b); | ||
1273 | /* We cannot change the logical block size of an in-use queue. | ||
1274 | * We can only hope that access happens to be properly aligned. | ||
1275 | * If not, the peer will likely produce an IO error, and detach. */ | ||
1276 | if (peer_lbs > me_lbs) { | ||
1277 | if (device->state.role != R_PRIMARY) { | ||
1278 | blk_queue_logical_block_size(q, peer_lbs); | ||
1279 | drbd_warn(peer_device, "logical block size set to %u\n", peer_lbs); | ||
1280 | } else { | ||
1281 | drbd_warn(peer_device, | ||
1282 | "current Primary must NOT adjust logical block size (%u -> %u); hope for the best.\n", | ||
1283 | me_lbs, peer_lbs); | ||
1284 | } | ||
1285 | } | ||
1286 | } | ||
1287 | if (can_do && !o->write_same_capable) { | ||
1288 | /* If we introduce an open-coded write-same loop on the receiving side, | ||
1289 | * the peer would present itself as "capable". */ | ||
1290 | drbd_dbg(peer_device, "WRITE_SAME disabled (peer device not capable)\n"); | ||
1291 | can_do = false; | ||
1292 | } | ||
1293 | } | ||
1294 | |||
1295 | blk_queue_max_write_same_sectors(q, can_do ? DRBD_MAX_BBIO_SECTORS : 0); | ||
1296 | } | ||
1297 | |||
1157 | static void drbd_setup_queue_param(struct drbd_device *device, struct drbd_backing_dev *bdev, | 1298 | static void drbd_setup_queue_param(struct drbd_device *device, struct drbd_backing_dev *bdev, |
1158 | unsigned int max_bio_size) | 1299 | unsigned int max_bio_size, struct o_qlim *o) |
1159 | { | 1300 | { |
1160 | struct request_queue * const q = device->rq_queue; | 1301 | struct request_queue * const q = device->rq_queue; |
1161 | unsigned int max_hw_sectors = max_bio_size >> 9; | 1302 | unsigned int max_hw_sectors = max_bio_size >> 9; |
1162 | unsigned int max_segments = 0; | 1303 | unsigned int max_segments = 0; |
1163 | struct request_queue *b = NULL; | 1304 | struct request_queue *b = NULL; |
1305 | struct disk_conf *dc; | ||
1306 | bool discard_zeroes_if_aligned = true; | ||
1164 | 1307 | ||
1165 | if (bdev) { | 1308 | if (bdev) { |
1166 | b = bdev->backing_bdev->bd_disk->queue; | 1309 | b = bdev->backing_bdev->bd_disk->queue; |
1167 | 1310 | ||
1168 | max_hw_sectors = min(queue_max_hw_sectors(b), max_bio_size >> 9); | 1311 | max_hw_sectors = min(queue_max_hw_sectors(b), max_bio_size >> 9); |
1169 | rcu_read_lock(); | 1312 | rcu_read_lock(); |
1170 | max_segments = rcu_dereference(device->ldev->disk_conf)->max_bio_bvecs; | 1313 | dc = rcu_dereference(device->ldev->disk_conf); |
1314 | max_segments = dc->max_bio_bvecs; | ||
1315 | discard_zeroes_if_aligned = dc->discard_zeroes_if_aligned; | ||
1171 | rcu_read_unlock(); | 1316 | rcu_read_unlock(); |
1172 | 1317 | ||
1173 | blk_set_stacking_limits(&q->limits); | 1318 | blk_set_stacking_limits(&q->limits); |
1174 | blk_queue_max_write_same_sectors(q, 0); | ||
1175 | } | 1319 | } |
1176 | 1320 | ||
1177 | blk_queue_logical_block_size(q, 512); | ||
1178 | blk_queue_max_hw_sectors(q, max_hw_sectors); | 1321 | blk_queue_max_hw_sectors(q, max_hw_sectors); |
1179 | /* This is the workaround for "bio would need to, but cannot, be split" */ | 1322 | /* This is the workaround for "bio would need to, but cannot, be split" */ |
1180 | blk_queue_max_segments(q, max_segments ? max_segments : BLK_MAX_SEGMENTS); | 1323 | blk_queue_max_segments(q, max_segments ? max_segments : BLK_MAX_SEGMENTS); |
1181 | blk_queue_segment_boundary(q, PAGE_SIZE-1); | 1324 | blk_queue_segment_boundary(q, PAGE_SIZE-1); |
1325 | decide_on_discard_support(device, q, b, discard_zeroes_if_aligned); | ||
1326 | decide_on_write_same_support(device, q, b, o); | ||
1182 | 1327 | ||
1183 | if (b) { | 1328 | if (b) { |
1184 | struct drbd_connection *connection = first_peer_device(device)->connection; | ||
1185 | |||
1186 | blk_queue_max_discard_sectors(q, DRBD_MAX_DISCARD_SECTORS); | ||
1187 | |||
1188 | if (blk_queue_discard(b) && | ||
1189 | (connection->cstate < C_CONNECTED || connection->agreed_features & FF_TRIM)) { | ||
1190 | /* We don't care, stacking below should fix it for the local device. | ||
1191 | * Whether or not it is a suitable granularity on the remote device | ||
1192 | * is not our problem, really. If you care, you need to | ||
1193 | * use devices with similar topology on all peers. */ | ||
1194 | q->limits.discard_granularity = 512; | ||
1195 | queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, q); | ||
1196 | } else { | ||
1197 | blk_queue_max_discard_sectors(q, 0); | ||
1198 | queue_flag_clear_unlocked(QUEUE_FLAG_DISCARD, q); | ||
1199 | q->limits.discard_granularity = 0; | ||
1200 | } | ||
1201 | |||
1202 | blk_queue_stack_limits(q, b); | 1329 | blk_queue_stack_limits(q, b); |
1203 | 1330 | ||
1204 | if (q->backing_dev_info.ra_pages != b->backing_dev_info.ra_pages) { | 1331 | if (q->backing_dev_info.ra_pages != b->backing_dev_info.ra_pages) { |
@@ -1208,15 +1335,10 @@ static void drbd_setup_queue_param(struct drbd_device *device, struct drbd_backi | |||
1208 | q->backing_dev_info.ra_pages = b->backing_dev_info.ra_pages; | 1335 | q->backing_dev_info.ra_pages = b->backing_dev_info.ra_pages; |
1209 | } | 1336 | } |
1210 | } | 1337 | } |
1211 | /* To avoid confusion, if this queue does not support discard, clear | 1338 | fixup_discard_if_not_supported(q); |
1212 | * max_discard_sectors, which is what lsblk -D reports to the user. */ | ||
1213 | if (!blk_queue_discard(q)) { | ||
1214 | blk_queue_max_discard_sectors(q, 0); | ||
1215 | q->limits.discard_granularity = 0; | ||
1216 | } | ||
1217 | } | 1339 | } |
1218 | 1340 | ||
1219 | void drbd_reconsider_max_bio_size(struct drbd_device *device, struct drbd_backing_dev *bdev) | 1341 | void drbd_reconsider_queue_parameters(struct drbd_device *device, struct drbd_backing_dev *bdev, struct o_qlim *o) |
1220 | { | 1342 | { |
1221 | unsigned int now, new, local, peer; | 1343 | unsigned int now, new, local, peer; |
1222 | 1344 | ||
@@ -1259,7 +1381,7 @@ void drbd_reconsider_max_bio_size(struct drbd_device *device, struct drbd_backin | |||
1259 | if (new != now) | 1381 | if (new != now) |
1260 | drbd_info(device, "max BIO size = %u\n", new); | 1382 | drbd_info(device, "max BIO size = %u\n", new); |
1261 | 1383 | ||
1262 | drbd_setup_queue_param(device, bdev, new); | 1384 | drbd_setup_queue_param(device, bdev, new, o); |
1263 | } | 1385 | } |
1264 | 1386 | ||
1265 | /* Starts the worker thread */ | 1387 | /* Starts the worker thread */ |
@@ -1348,6 +1470,43 @@ static bool write_ordering_changed(struct disk_conf *a, struct disk_conf *b) | |||
1348 | a->disk_drain != b->disk_drain; | 1470 | a->disk_drain != b->disk_drain; |
1349 | } | 1471 | } |
1350 | 1472 | ||
1473 | static void sanitize_disk_conf(struct drbd_device *device, struct disk_conf *disk_conf, | ||
1474 | struct drbd_backing_dev *nbc) | ||
1475 | { | ||
1476 | struct request_queue * const q = nbc->backing_bdev->bd_disk->queue; | ||
1477 | |||
1478 | if (disk_conf->al_extents < DRBD_AL_EXTENTS_MIN) | ||
1479 | disk_conf->al_extents = DRBD_AL_EXTENTS_MIN; | ||
1480 | if (disk_conf->al_extents > drbd_al_extents_max(nbc)) | ||
1481 | disk_conf->al_extents = drbd_al_extents_max(nbc); | ||
1482 | |||
1483 | if (!blk_queue_discard(q) | ||
1484 | || (!q->limits.discard_zeroes_data && !disk_conf->discard_zeroes_if_aligned)) { | ||
1485 | if (disk_conf->rs_discard_granularity) { | ||
1486 | disk_conf->rs_discard_granularity = 0; /* disable feature */ | ||
1487 | drbd_info(device, "rs_discard_granularity feature disabled\n"); | ||
1488 | } | ||
1489 | } | ||
1490 | |||
1491 | if (disk_conf->rs_discard_granularity) { | ||
1492 | int orig_value = disk_conf->rs_discard_granularity; | ||
1493 | int remainder; | ||
1494 | |||
1495 | if (q->limits.discard_granularity > disk_conf->rs_discard_granularity) | ||
1496 | disk_conf->rs_discard_granularity = q->limits.discard_granularity; | ||
1497 | |||
1498 | remainder = disk_conf->rs_discard_granularity % q->limits.discard_granularity; | ||
1499 | disk_conf->rs_discard_granularity += remainder; | ||
1500 | |||
1501 | if (disk_conf->rs_discard_granularity > q->limits.max_discard_sectors << 9) | ||
1502 | disk_conf->rs_discard_granularity = q->limits.max_discard_sectors << 9; | ||
1503 | |||
1504 | if (disk_conf->rs_discard_granularity != orig_value) | ||
1505 | drbd_info(device, "rs_discard_granularity changed to %d\n", | ||
1506 | disk_conf->rs_discard_granularity); | ||
1507 | } | ||
1508 | } | ||
1509 | |||
1351 | int drbd_adm_disk_opts(struct sk_buff *skb, struct genl_info *info) | 1510 | int drbd_adm_disk_opts(struct sk_buff *skb, struct genl_info *info) |
1352 | { | 1511 | { |
1353 | struct drbd_config_context adm_ctx; | 1512 | struct drbd_config_context adm_ctx; |
@@ -1395,10 +1554,7 @@ int drbd_adm_disk_opts(struct sk_buff *skb, struct genl_info *info) | |||
1395 | if (!expect(new_disk_conf->resync_rate >= 1)) | 1554 | if (!expect(new_disk_conf->resync_rate >= 1)) |
1396 | new_disk_conf->resync_rate = 1; | 1555 | new_disk_conf->resync_rate = 1; |
1397 | 1556 | ||
1398 | if (new_disk_conf->al_extents < DRBD_AL_EXTENTS_MIN) | 1557 | sanitize_disk_conf(device, new_disk_conf, device->ldev); |
1399 | new_disk_conf->al_extents = DRBD_AL_EXTENTS_MIN; | ||
1400 | if (new_disk_conf->al_extents > drbd_al_extents_max(device->ldev)) | ||
1401 | new_disk_conf->al_extents = drbd_al_extents_max(device->ldev); | ||
1402 | 1558 | ||
1403 | if (new_disk_conf->c_plan_ahead > DRBD_C_PLAN_AHEAD_MAX) | 1559 | if (new_disk_conf->c_plan_ahead > DRBD_C_PLAN_AHEAD_MAX) |
1404 | new_disk_conf->c_plan_ahead = DRBD_C_PLAN_AHEAD_MAX; | 1560 | new_disk_conf->c_plan_ahead = DRBD_C_PLAN_AHEAD_MAX; |
@@ -1457,6 +1613,9 @@ int drbd_adm_disk_opts(struct sk_buff *skb, struct genl_info *info) | |||
1457 | if (write_ordering_changed(old_disk_conf, new_disk_conf)) | 1613 | if (write_ordering_changed(old_disk_conf, new_disk_conf)) |
1458 | drbd_bump_write_ordering(device->resource, NULL, WO_BDEV_FLUSH); | 1614 | drbd_bump_write_ordering(device->resource, NULL, WO_BDEV_FLUSH); |
1459 | 1615 | ||
1616 | if (old_disk_conf->discard_zeroes_if_aligned != new_disk_conf->discard_zeroes_if_aligned) | ||
1617 | drbd_reconsider_queue_parameters(device, device->ldev, NULL); | ||
1618 | |||
1460 | drbd_md_sync(device); | 1619 | drbd_md_sync(device); |
1461 | 1620 | ||
1462 | if (device->state.conn >= C_CONNECTED) { | 1621 | if (device->state.conn >= C_CONNECTED) { |
@@ -1693,10 +1852,7 @@ int drbd_adm_attach(struct sk_buff *skb, struct genl_info *info) | |||
1693 | if (retcode != NO_ERROR) | 1852 | if (retcode != NO_ERROR) |
1694 | goto fail; | 1853 | goto fail; |
1695 | 1854 | ||
1696 | if (new_disk_conf->al_extents < DRBD_AL_EXTENTS_MIN) | 1855 | sanitize_disk_conf(device, new_disk_conf, nbc); |
1697 | new_disk_conf->al_extents = DRBD_AL_EXTENTS_MIN; | ||
1698 | if (new_disk_conf->al_extents > drbd_al_extents_max(nbc)) | ||
1699 | new_disk_conf->al_extents = drbd_al_extents_max(nbc); | ||
1700 | 1856 | ||
1701 | if (drbd_get_max_capacity(nbc) < new_disk_conf->disk_size) { | 1857 | if (drbd_get_max_capacity(nbc) < new_disk_conf->disk_size) { |
1702 | drbd_err(device, "max capacity %llu smaller than disk size %llu\n", | 1858 | drbd_err(device, "max capacity %llu smaller than disk size %llu\n", |
@@ -1838,7 +1994,7 @@ int drbd_adm_attach(struct sk_buff *skb, struct genl_info *info) | |||
1838 | device->read_cnt = 0; | 1994 | device->read_cnt = 0; |
1839 | device->writ_cnt = 0; | 1995 | device->writ_cnt = 0; |
1840 | 1996 | ||
1841 | drbd_reconsider_max_bio_size(device, device->ldev); | 1997 | drbd_reconsider_queue_parameters(device, device->ldev, NULL); |
1842 | 1998 | ||
1843 | /* If I am currently not R_PRIMARY, | 1999 | /* If I am currently not R_PRIMARY, |
1844 | * but meta data primary indicator is set, | 2000 | * but meta data primary indicator is set, |
diff --git a/drivers/block/drbd/drbd_proc.c b/drivers/block/drbd/drbd_proc.c index 6537b25db9c1..be2b93fd2c11 100644 --- a/drivers/block/drbd/drbd_proc.c +++ b/drivers/block/drbd/drbd_proc.c | |||
@@ -25,7 +25,7 @@ | |||
25 | 25 | ||
26 | #include <linux/module.h> | 26 | #include <linux/module.h> |
27 | 27 | ||
28 | #include <asm/uaccess.h> | 28 | #include <linux/uaccess.h> |
29 | #include <linux/fs.h> | 29 | #include <linux/fs.h> |
30 | #include <linux/file.h> | 30 | #include <linux/file.h> |
31 | #include <linux/proc_fs.h> | 31 | #include <linux/proc_fs.h> |
@@ -122,18 +122,18 @@ static void drbd_syncer_progress(struct drbd_device *device, struct seq_file *se | |||
122 | 122 | ||
123 | x = res/50; | 123 | x = res/50; |
124 | y = 20-x; | 124 | y = 20-x; |
125 | seq_printf(seq, "\t["); | 125 | seq_puts(seq, "\t["); |
126 | for (i = 1; i < x; i++) | 126 | for (i = 1; i < x; i++) |
127 | seq_printf(seq, "="); | 127 | seq_putc(seq, '='); |
128 | seq_printf(seq, ">"); | 128 | seq_putc(seq, '>'); |
129 | for (i = 0; i < y; i++) | 129 | for (i = 0; i < y; i++) |
130 | seq_printf(seq, "."); | 130 | seq_printf(seq, "."); |
131 | seq_printf(seq, "] "); | 131 | seq_puts(seq, "] "); |
132 | 132 | ||
133 | if (state.conn == C_VERIFY_S || state.conn == C_VERIFY_T) | 133 | if (state.conn == C_VERIFY_S || state.conn == C_VERIFY_T) |
134 | seq_printf(seq, "verified:"); | 134 | seq_puts(seq, "verified:"); |
135 | else | 135 | else |
136 | seq_printf(seq, "sync'ed:"); | 136 | seq_puts(seq, "sync'ed:"); |
137 | seq_printf(seq, "%3u.%u%% ", res / 10, res % 10); | 137 | seq_printf(seq, "%3u.%u%% ", res / 10, res % 10); |
138 | 138 | ||
139 | /* if more than a few GB, display in MB */ | 139 | /* if more than a few GB, display in MB */ |
@@ -146,7 +146,7 @@ static void drbd_syncer_progress(struct drbd_device *device, struct seq_file *se | |||
146 | (unsigned long) Bit2KB(rs_left), | 146 | (unsigned long) Bit2KB(rs_left), |
147 | (unsigned long) Bit2KB(rs_total)); | 147 | (unsigned long) Bit2KB(rs_total)); |
148 | 148 | ||
149 | seq_printf(seq, "\n\t"); | 149 | seq_puts(seq, "\n\t"); |
150 | 150 | ||
151 | /* see drivers/md/md.c | 151 | /* see drivers/md/md.c |
152 | * We do not want to overflow, so the order of operands and | 152 | * We do not want to overflow, so the order of operands and |
@@ -175,9 +175,9 @@ static void drbd_syncer_progress(struct drbd_device *device, struct seq_file *se | |||
175 | rt / 3600, (rt % 3600) / 60, rt % 60); | 175 | rt / 3600, (rt % 3600) / 60, rt % 60); |
176 | 176 | ||
177 | dbdt = Bit2KB(db/dt); | 177 | dbdt = Bit2KB(db/dt); |
178 | seq_printf(seq, " speed: "); | 178 | seq_puts(seq, " speed: "); |
179 | seq_printf_with_thousands_grouping(seq, dbdt); | 179 | seq_printf_with_thousands_grouping(seq, dbdt); |
180 | seq_printf(seq, " ("); | 180 | seq_puts(seq, " ("); |
181 | /* ------------------------- ~3s average ------------------------ */ | 181 | /* ------------------------- ~3s average ------------------------ */ |
182 | if (proc_details >= 1) { | 182 | if (proc_details >= 1) { |
183 | /* this is what drbd_rs_should_slow_down() uses */ | 183 | /* this is what drbd_rs_should_slow_down() uses */ |
@@ -188,7 +188,7 @@ static void drbd_syncer_progress(struct drbd_device *device, struct seq_file *se | |||
188 | db = device->rs_mark_left[i] - rs_left; | 188 | db = device->rs_mark_left[i] - rs_left; |
189 | dbdt = Bit2KB(db/dt); | 189 | dbdt = Bit2KB(db/dt); |
190 | seq_printf_with_thousands_grouping(seq, dbdt); | 190 | seq_printf_with_thousands_grouping(seq, dbdt); |
191 | seq_printf(seq, " -- "); | 191 | seq_puts(seq, " -- "); |
192 | } | 192 | } |
193 | 193 | ||
194 | /* --------------------- long term average ---------------------- */ | 194 | /* --------------------- long term average ---------------------- */ |
@@ -200,11 +200,11 @@ static void drbd_syncer_progress(struct drbd_device *device, struct seq_file *se | |||
200 | db = rs_total - rs_left; | 200 | db = rs_total - rs_left; |
201 | dbdt = Bit2KB(db/dt); | 201 | dbdt = Bit2KB(db/dt); |
202 | seq_printf_with_thousands_grouping(seq, dbdt); | 202 | seq_printf_with_thousands_grouping(seq, dbdt); |
203 | seq_printf(seq, ")"); | 203 | seq_putc(seq, ')'); |
204 | 204 | ||
205 | if (state.conn == C_SYNC_TARGET || | 205 | if (state.conn == C_SYNC_TARGET || |
206 | state.conn == C_VERIFY_S) { | 206 | state.conn == C_VERIFY_S) { |
207 | seq_printf(seq, " want: "); | 207 | seq_puts(seq, " want: "); |
208 | seq_printf_with_thousands_grouping(seq, device->c_sync_rate); | 208 | seq_printf_with_thousands_grouping(seq, device->c_sync_rate); |
209 | } | 209 | } |
210 | seq_printf(seq, " K/sec%s\n", stalled ? " (stalled)" : ""); | 210 | seq_printf(seq, " K/sec%s\n", stalled ? " (stalled)" : ""); |
@@ -231,7 +231,7 @@ static void drbd_syncer_progress(struct drbd_device *device, struct seq_file *se | |||
231 | (unsigned long long)bm_bits * BM_SECT_PER_BIT); | 231 | (unsigned long long)bm_bits * BM_SECT_PER_BIT); |
232 | if (stop_sector != 0 && stop_sector != ULLONG_MAX) | 232 | if (stop_sector != 0 && stop_sector != ULLONG_MAX) |
233 | seq_printf(seq, " stop sector: %llu", stop_sector); | 233 | seq_printf(seq, " stop sector: %llu", stop_sector); |
234 | seq_printf(seq, "\n"); | 234 | seq_putc(seq, '\n'); |
235 | } | 235 | } |
236 | } | 236 | } |
237 | 237 | ||
@@ -276,7 +276,7 @@ static int drbd_seq_show(struct seq_file *seq, void *v) | |||
276 | rcu_read_lock(); | 276 | rcu_read_lock(); |
277 | idr_for_each_entry(&drbd_devices, device, i) { | 277 | idr_for_each_entry(&drbd_devices, device, i) { |
278 | if (prev_i != i - 1) | 278 | if (prev_i != i - 1) |
279 | seq_printf(seq, "\n"); | 279 | seq_putc(seq, '\n'); |
280 | prev_i = i; | 280 | prev_i = i; |
281 | 281 | ||
282 | state = device->state; | 282 | state = device->state; |
diff --git a/drivers/block/drbd/drbd_protocol.h b/drivers/block/drbd/drbd_protocol.h index 129f8c76c9b1..4d296800f706 100644 --- a/drivers/block/drbd/drbd_protocol.h +++ b/drivers/block/drbd/drbd_protocol.h | |||
@@ -60,6 +60,15 @@ enum drbd_packet { | |||
60 | * which is why I chose TRIM here, to disambiguate. */ | 60 | * which is why I chose TRIM here, to disambiguate. */ |
61 | P_TRIM = 0x31, | 61 | P_TRIM = 0x31, |
62 | 62 | ||
63 | /* Only use these two if both support FF_THIN_RESYNC */ | ||
64 | P_RS_THIN_REQ = 0x32, /* Request a block for resync or reply P_RS_DEALLOCATED */ | ||
65 | P_RS_DEALLOCATED = 0x33, /* Contains only zeros on sync source node */ | ||
66 | |||
67 | /* REQ_WRITE_SAME. | ||
68 | * On a receiving side without REQ_WRITE_SAME, | ||
69 | * we may fall back to an opencoded loop instead. */ | ||
70 | P_WSAME = 0x34, | ||
71 | |||
63 | P_MAY_IGNORE = 0x100, /* Flag to test if (cmd > P_MAY_IGNORE) ... */ | 72 | P_MAY_IGNORE = 0x100, /* Flag to test if (cmd > P_MAY_IGNORE) ... */ |
64 | P_MAX_OPT_CMD = 0x101, | 73 | P_MAX_OPT_CMD = 0x101, |
65 | 74 | ||
@@ -106,8 +115,11 @@ struct p_header100 { | |||
106 | u32 pad; | 115 | u32 pad; |
107 | } __packed; | 116 | } __packed; |
108 | 117 | ||
109 | /* these defines must not be changed without changing the protocol version */ | 118 | /* These defines must not be changed without changing the protocol version. |
110 | #define DP_HARDBARRIER 1 /* depricated */ | 119 | * New defines may only be introduced together with protocol version bump or |
120 | * new protocol feature flags. | ||
121 | */ | ||
122 | #define DP_HARDBARRIER 1 /* no longer used */ | ||
111 | #define DP_RW_SYNC 2 /* equals REQ_SYNC */ | 123 | #define DP_RW_SYNC 2 /* equals REQ_SYNC */ |
112 | #define DP_MAY_SET_IN_SYNC 4 | 124 | #define DP_MAY_SET_IN_SYNC 4 |
113 | #define DP_UNPLUG 8 /* not used anymore */ | 125 | #define DP_UNPLUG 8 /* not used anymore */ |
@@ -116,6 +128,7 @@ struct p_header100 { | |||
116 | #define DP_DISCARD 64 /* equals REQ_DISCARD */ | 128 | #define DP_DISCARD 64 /* equals REQ_DISCARD */ |
117 | #define DP_SEND_RECEIVE_ACK 128 /* This is a proto B write request */ | 129 | #define DP_SEND_RECEIVE_ACK 128 /* This is a proto B write request */ |
118 | #define DP_SEND_WRITE_ACK 256 /* This is a proto C write request */ | 130 | #define DP_SEND_WRITE_ACK 256 /* This is a proto C write request */ |
131 | #define DP_WSAME 512 /* equiv. REQ_WRITE_SAME */ | ||
119 | 132 | ||
120 | struct p_data { | 133 | struct p_data { |
121 | u64 sector; /* 64 bits sector number */ | 134 | u64 sector; /* 64 bits sector number */ |
@@ -129,6 +142,11 @@ struct p_trim { | |||
129 | u32 size; /* == bio->bi_size */ | 142 | u32 size; /* == bio->bi_size */ |
130 | } __packed; | 143 | } __packed; |
131 | 144 | ||
145 | struct p_wsame { | ||
146 | struct p_data p_data; | ||
147 | u32 size; /* == bio->bi_size */ | ||
148 | } __packed; | ||
149 | |||
132 | /* | 150 | /* |
133 | * commands which share a struct: | 151 | * commands which share a struct: |
134 | * p_block_ack: | 152 | * p_block_ack: |
@@ -160,7 +178,23 @@ struct p_block_req { | |||
160 | * ReportParams | 178 | * ReportParams |
161 | */ | 179 | */ |
162 | 180 | ||
163 | #define FF_TRIM 1 | 181 | /* supports TRIM/DISCARD on the "wire" protocol */ |
182 | #define DRBD_FF_TRIM 1 | ||
183 | |||
184 | /* Detect all-zeros during resync, and rather TRIM/UNMAP/DISCARD those blocks | ||
185 | * instead of fully allocate a supposedly thin volume on initial resync */ | ||
186 | #define DRBD_FF_THIN_RESYNC 2 | ||
187 | |||
188 | /* supports REQ_WRITE_SAME on the "wire" protocol. | ||
189 | * Note: this flag is overloaded, | ||
190 | * its presence also | ||
191 | * - indicates support for 128 MiB "batch bios", | ||
192 | * max discard size of 128 MiB | ||
193 | * instead of 4M before that. | ||
194 | * - indicates that we exchange additional settings in p_sizes | ||
195 | * drbd_send_sizes()/receive_sizes() | ||
196 | */ | ||
197 | #define DRBD_FF_WSAME 4 | ||
164 | 198 | ||
165 | struct p_connection_features { | 199 | struct p_connection_features { |
166 | u32 protocol_min; | 200 | u32 protocol_min; |
@@ -235,6 +269,40 @@ struct p_rs_uuid { | |||
235 | u64 uuid; | 269 | u64 uuid; |
236 | } __packed; | 270 | } __packed; |
237 | 271 | ||
272 | /* optional queue_limits if (agreed_features & DRBD_FF_WSAME) | ||
273 | * see also struct queue_limits, as of late 2015 */ | ||
274 | struct o_qlim { | ||
275 | /* we don't need it yet, but we may as well communicate it now */ | ||
276 | u32 physical_block_size; | ||
277 | |||
278 | /* so the original in struct queue_limits is unsigned short, | ||
279 | * but I'd have to put in padding anyways. */ | ||
280 | u32 logical_block_size; | ||
281 | |||
282 | /* One incoming bio becomes one DRBD request, | ||
283 | * which may be translated to several bio on the receiving side. | ||
284 | * We don't need to communicate chunk/boundary/segment ... limits. | ||
285 | */ | ||
286 | |||
287 | /* various IO hints may be useful with "diskless client" setups */ | ||
288 | u32 alignment_offset; | ||
289 | u32 io_min; | ||
290 | u32 io_opt; | ||
291 | |||
292 | /* We may need to communicate integrity stuff at some point, | ||
293 | * but let's not get ahead of ourselves. */ | ||
294 | |||
295 | /* Backend discard capabilities. | ||
296 | * Receiving side uses "blkdev_issue_discard()", no need to communicate | ||
297 | * more specifics. If the backend cannot do discards, the DRBD peer | ||
298 | * may fall back to blkdev_issue_zeroout(). | ||
299 | */ | ||
300 | u8 discard_enabled; | ||
301 | u8 discard_zeroes_data; | ||
302 | u8 write_same_capable; | ||
303 | u8 _pad; | ||
304 | } __packed; | ||
305 | |||
238 | struct p_sizes { | 306 | struct p_sizes { |
239 | u64 d_size; /* size of disk */ | 307 | u64 d_size; /* size of disk */ |
240 | u64 u_size; /* user requested size */ | 308 | u64 u_size; /* user requested size */ |
@@ -242,6 +310,9 @@ struct p_sizes { | |||
242 | u32 max_bio_size; /* Maximal size of a BIO */ | 310 | u32 max_bio_size; /* Maximal size of a BIO */ |
243 | u16 queue_order_type; /* not yet implemented in DRBD*/ | 311 | u16 queue_order_type; /* not yet implemented in DRBD*/ |
244 | u16 dds_flags; /* use enum dds_flags here. */ | 312 | u16 dds_flags; /* use enum dds_flags here. */ |
313 | |||
314 | /* optional queue_limits if (agreed_features & DRBD_FF_WSAME) */ | ||
315 | struct o_qlim qlim[0]; | ||
245 | } __packed; | 316 | } __packed; |
246 | 317 | ||
247 | struct p_state { | 318 | struct p_state { |
diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index 1ee002352ea2..df45713dfbe8 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c | |||
@@ -25,7 +25,7 @@ | |||
25 | 25 | ||
26 | #include <linux/module.h> | 26 | #include <linux/module.h> |
27 | 27 | ||
28 | #include <asm/uaccess.h> | 28 | #include <linux/uaccess.h> |
29 | #include <net/sock.h> | 29 | #include <net/sock.h> |
30 | 30 | ||
31 | #include <linux/drbd.h> | 31 | #include <linux/drbd.h> |
@@ -48,7 +48,7 @@ | |||
48 | #include "drbd_req.h" | 48 | #include "drbd_req.h" |
49 | #include "drbd_vli.h" | 49 | #include "drbd_vli.h" |
50 | 50 | ||
51 | #define PRO_FEATURES (FF_TRIM) | 51 | #define PRO_FEATURES (DRBD_FF_TRIM|DRBD_FF_THIN_RESYNC|DRBD_FF_WSAME) |
52 | 52 | ||
53 | struct packet_info { | 53 | struct packet_info { |
54 | enum drbd_packet cmd; | 54 | enum drbd_packet cmd; |
@@ -361,14 +361,17 @@ You must not have the req_lock: | |||
361 | drbd_wait_ee_list_empty() | 361 | drbd_wait_ee_list_empty() |
362 | */ | 362 | */ |
363 | 363 | ||
364 | /* normal: payload_size == request size (bi_size) | ||
365 | * w_same: payload_size == logical_block_size | ||
366 | * trim: payload_size == 0 */ | ||
364 | struct drbd_peer_request * | 367 | struct drbd_peer_request * |
365 | drbd_alloc_peer_req(struct drbd_peer_device *peer_device, u64 id, sector_t sector, | 368 | drbd_alloc_peer_req(struct drbd_peer_device *peer_device, u64 id, sector_t sector, |
366 | unsigned int data_size, bool has_payload, gfp_t gfp_mask) __must_hold(local) | 369 | unsigned int request_size, unsigned int payload_size, gfp_t gfp_mask) __must_hold(local) |
367 | { | 370 | { |
368 | struct drbd_device *device = peer_device->device; | 371 | struct drbd_device *device = peer_device->device; |
369 | struct drbd_peer_request *peer_req; | 372 | struct drbd_peer_request *peer_req; |
370 | struct page *page = NULL; | 373 | struct page *page = NULL; |
371 | unsigned nr_pages = (data_size + PAGE_SIZE -1) >> PAGE_SHIFT; | 374 | unsigned nr_pages = (payload_size + PAGE_SIZE -1) >> PAGE_SHIFT; |
372 | 375 | ||
373 | if (drbd_insert_fault(device, DRBD_FAULT_AL_EE)) | 376 | if (drbd_insert_fault(device, DRBD_FAULT_AL_EE)) |
374 | return NULL; | 377 | return NULL; |
@@ -380,7 +383,7 @@ drbd_alloc_peer_req(struct drbd_peer_device *peer_device, u64 id, sector_t secto | |||
380 | return NULL; | 383 | return NULL; |
381 | } | 384 | } |
382 | 385 | ||
383 | if (has_payload && data_size) { | 386 | if (nr_pages) { |
384 | page = drbd_alloc_pages(peer_device, nr_pages, | 387 | page = drbd_alloc_pages(peer_device, nr_pages, |
385 | gfpflags_allow_blocking(gfp_mask)); | 388 | gfpflags_allow_blocking(gfp_mask)); |
386 | if (!page) | 389 | if (!page) |
@@ -390,7 +393,7 @@ drbd_alloc_peer_req(struct drbd_peer_device *peer_device, u64 id, sector_t secto | |||
390 | memset(peer_req, 0, sizeof(*peer_req)); | 393 | memset(peer_req, 0, sizeof(*peer_req)); |
391 | INIT_LIST_HEAD(&peer_req->w.list); | 394 | INIT_LIST_HEAD(&peer_req->w.list); |
392 | drbd_clear_interval(&peer_req->i); | 395 | drbd_clear_interval(&peer_req->i); |
393 | peer_req->i.size = data_size; | 396 | peer_req->i.size = request_size; |
394 | peer_req->i.sector = sector; | 397 | peer_req->i.sector = sector; |
395 | peer_req->submit_jif = jiffies; | 398 | peer_req->submit_jif = jiffies; |
396 | peer_req->peer_device = peer_device; | 399 | peer_req->peer_device = peer_device; |
@@ -1204,13 +1207,84 @@ static int drbd_recv_header(struct drbd_connection *connection, struct packet_in | |||
1204 | return err; | 1207 | return err; |
1205 | } | 1208 | } |
1206 | 1209 | ||
1207 | static void drbd_flush(struct drbd_connection *connection) | 1210 | /* This is blkdev_issue_flush, but asynchronous. |
1211 | * We want to submit to all component volumes in parallel, | ||
1212 | * then wait for all completions. | ||
1213 | */ | ||
1214 | struct issue_flush_context { | ||
1215 | atomic_t pending; | ||
1216 | int error; | ||
1217 | struct completion done; | ||
1218 | }; | ||
1219 | struct one_flush_context { | ||
1220 | struct drbd_device *device; | ||
1221 | struct issue_flush_context *ctx; | ||
1222 | }; | ||
1223 | |||
1224 | void one_flush_endio(struct bio *bio) | ||
1208 | { | 1225 | { |
1209 | int rv; | 1226 | struct one_flush_context *octx = bio->bi_private; |
1210 | struct drbd_peer_device *peer_device; | 1227 | struct drbd_device *device = octx->device; |
1211 | int vnr; | 1228 | struct issue_flush_context *ctx = octx->ctx; |
1212 | 1229 | ||
1230 | if (bio->bi_error) { | ||
1231 | ctx->error = bio->bi_error; | ||
1232 | drbd_info(device, "local disk FLUSH FAILED with status %d\n", bio->bi_error); | ||
1233 | } | ||
1234 | kfree(octx); | ||
1235 | bio_put(bio); | ||
1236 | |||
1237 | clear_bit(FLUSH_PENDING, &device->flags); | ||
1238 | put_ldev(device); | ||
1239 | kref_put(&device->kref, drbd_destroy_device); | ||
1240 | |||
1241 | if (atomic_dec_and_test(&ctx->pending)) | ||
1242 | complete(&ctx->done); | ||
1243 | } | ||
1244 | |||
1245 | static void submit_one_flush(struct drbd_device *device, struct issue_flush_context *ctx) | ||
1246 | { | ||
1247 | struct bio *bio = bio_alloc(GFP_NOIO, 0); | ||
1248 | struct one_flush_context *octx = kmalloc(sizeof(*octx), GFP_NOIO); | ||
1249 | if (!bio || !octx) { | ||
1250 | drbd_warn(device, "Could not allocate a bio, CANNOT ISSUE FLUSH\n"); | ||
1251 | /* FIXME: what else can I do now? disconnecting or detaching | ||
1252 | * really does not help to improve the state of the world, either. | ||
1253 | */ | ||
1254 | kfree(octx); | ||
1255 | if (bio) | ||
1256 | bio_put(bio); | ||
1257 | |||
1258 | ctx->error = -ENOMEM; | ||
1259 | put_ldev(device); | ||
1260 | kref_put(&device->kref, drbd_destroy_device); | ||
1261 | return; | ||
1262 | } | ||
1263 | |||
1264 | octx->device = device; | ||
1265 | octx->ctx = ctx; | ||
1266 | bio->bi_bdev = device->ldev->backing_bdev; | ||
1267 | bio->bi_private = octx; | ||
1268 | bio->bi_end_io = one_flush_endio; | ||
1269 | bio_set_op_attrs(bio, REQ_OP_FLUSH, WRITE_FLUSH); | ||
1270 | |||
1271 | device->flush_jif = jiffies; | ||
1272 | set_bit(FLUSH_PENDING, &device->flags); | ||
1273 | atomic_inc(&ctx->pending); | ||
1274 | submit_bio(bio); | ||
1275 | } | ||
1276 | |||
1277 | static void drbd_flush(struct drbd_connection *connection) | ||
1278 | { | ||
1213 | if (connection->resource->write_ordering >= WO_BDEV_FLUSH) { | 1279 | if (connection->resource->write_ordering >= WO_BDEV_FLUSH) { |
1280 | struct drbd_peer_device *peer_device; | ||
1281 | struct issue_flush_context ctx; | ||
1282 | int vnr; | ||
1283 | |||
1284 | atomic_set(&ctx.pending, 1); | ||
1285 | ctx.error = 0; | ||
1286 | init_completion(&ctx.done); | ||
1287 | |||
1214 | rcu_read_lock(); | 1288 | rcu_read_lock(); |
1215 | idr_for_each_entry(&connection->peer_devices, peer_device, vnr) { | 1289 | idr_for_each_entry(&connection->peer_devices, peer_device, vnr) { |
1216 | struct drbd_device *device = peer_device->device; | 1290 | struct drbd_device *device = peer_device->device; |
@@ -1220,31 +1294,24 @@ static void drbd_flush(struct drbd_connection *connection) | |||
1220 | kref_get(&device->kref); | 1294 | kref_get(&device->kref); |
1221 | rcu_read_unlock(); | 1295 | rcu_read_unlock(); |
1222 | 1296 | ||
1223 | /* Right now, we have only this one synchronous code path | 1297 | submit_one_flush(device, &ctx); |
1224 | * for flushes between request epochs. | ||
1225 | * We may want to make those asynchronous, | ||
1226 | * or at least parallelize the flushes to the volume devices. | ||
1227 | */ | ||
1228 | device->flush_jif = jiffies; | ||
1229 | set_bit(FLUSH_PENDING, &device->flags); | ||
1230 | rv = blkdev_issue_flush(device->ldev->backing_bdev, | ||
1231 | GFP_NOIO, NULL); | ||
1232 | clear_bit(FLUSH_PENDING, &device->flags); | ||
1233 | if (rv) { | ||
1234 | drbd_info(device, "local disk flush failed with status %d\n", rv); | ||
1235 | /* would rather check on EOPNOTSUPP, but that is not reliable. | ||
1236 | * don't try again for ANY return value != 0 | ||
1237 | * if (rv == -EOPNOTSUPP) */ | ||
1238 | drbd_bump_write_ordering(connection->resource, NULL, WO_DRAIN_IO); | ||
1239 | } | ||
1240 | put_ldev(device); | ||
1241 | kref_put(&device->kref, drbd_destroy_device); | ||
1242 | 1298 | ||
1243 | rcu_read_lock(); | 1299 | rcu_read_lock(); |
1244 | if (rv) | ||
1245 | break; | ||
1246 | } | 1300 | } |
1247 | rcu_read_unlock(); | 1301 | rcu_read_unlock(); |
1302 | |||
1303 | /* Do we want to add a timeout, | ||
1304 | * if disk-timeout is set? */ | ||
1305 | if (!atomic_dec_and_test(&ctx.pending)) | ||
1306 | wait_for_completion(&ctx.done); | ||
1307 | |||
1308 | if (ctx.error) { | ||
1309 | /* would rather check on EOPNOTSUPP, but that is not reliable. | ||
1310 | * don't try again for ANY return value != 0 | ||
1311 | * if (rv == -EOPNOTSUPP) */ | ||
1312 | /* Any error is already reported by bio_endio callback. */ | ||
1313 | drbd_bump_write_ordering(connection->resource, NULL, WO_DRAIN_IO); | ||
1314 | } | ||
1248 | } | 1315 | } |
1249 | } | 1316 | } |
1250 | 1317 | ||
@@ -1379,6 +1446,120 @@ void drbd_bump_write_ordering(struct drbd_resource *resource, struct drbd_backin | |||
1379 | drbd_info(resource, "Method to ensure write ordering: %s\n", write_ordering_str[resource->write_ordering]); | 1446 | drbd_info(resource, "Method to ensure write ordering: %s\n", write_ordering_str[resource->write_ordering]); |
1380 | } | 1447 | } |
1381 | 1448 | ||
1449 | /* | ||
1450 | * We *may* ignore the discard-zeroes-data setting, if so configured. | ||
1451 | * | ||
1452 | * Assumption is that it "discard_zeroes_data=0" is only because the backend | ||
1453 | * may ignore partial unaligned discards. | ||
1454 | * | ||
1455 | * LVM/DM thin as of at least | ||
1456 | * LVM version: 2.02.115(2)-RHEL7 (2015-01-28) | ||
1457 | * Library version: 1.02.93-RHEL7 (2015-01-28) | ||
1458 | * Driver version: 4.29.0 | ||
1459 | * still behaves this way. | ||
1460 | * | ||
1461 | * For unaligned (wrt. alignment and granularity) or too small discards, | ||
1462 | * we zero-out the initial (and/or) trailing unaligned partial chunks, | ||
1463 | * but discard all the aligned full chunks. | ||
1464 | * | ||
1465 | * At least for LVM/DM thin, the result is effectively "discard_zeroes_data=1". | ||
1466 | */ | ||
1467 | int drbd_issue_discard_or_zero_out(struct drbd_device *device, sector_t start, unsigned int nr_sectors, bool discard) | ||
1468 | { | ||
1469 | struct block_device *bdev = device->ldev->backing_bdev; | ||
1470 | struct request_queue *q = bdev_get_queue(bdev); | ||
1471 | sector_t tmp, nr; | ||
1472 | unsigned int max_discard_sectors, granularity; | ||
1473 | int alignment; | ||
1474 | int err = 0; | ||
1475 | |||
1476 | if (!discard) | ||
1477 | goto zero_out; | ||
1478 | |||
1479 | /* Zero-sector (unknown) and one-sector granularities are the same. */ | ||
1480 | granularity = max(q->limits.discard_granularity >> 9, 1U); | ||
1481 | alignment = (bdev_discard_alignment(bdev) >> 9) % granularity; | ||
1482 | |||
1483 | max_discard_sectors = min(q->limits.max_discard_sectors, (1U << 22)); | ||
1484 | max_discard_sectors -= max_discard_sectors % granularity; | ||
1485 | if (unlikely(!max_discard_sectors)) | ||
1486 | goto zero_out; | ||
1487 | |||
1488 | if (nr_sectors < granularity) | ||
1489 | goto zero_out; | ||
1490 | |||
1491 | tmp = start; | ||
1492 | if (sector_div(tmp, granularity) != alignment) { | ||
1493 | if (nr_sectors < 2*granularity) | ||
1494 | goto zero_out; | ||
1495 | /* start + gran - (start + gran - align) % gran */ | ||
1496 | tmp = start + granularity - alignment; | ||
1497 | tmp = start + granularity - sector_div(tmp, granularity); | ||
1498 | |||
1499 | nr = tmp - start; | ||
1500 | err |= blkdev_issue_zeroout(bdev, start, nr, GFP_NOIO, 0); | ||
1501 | nr_sectors -= nr; | ||
1502 | start = tmp; | ||
1503 | } | ||
1504 | while (nr_sectors >= granularity) { | ||
1505 | nr = min_t(sector_t, nr_sectors, max_discard_sectors); | ||
1506 | err |= blkdev_issue_discard(bdev, start, nr, GFP_NOIO, 0); | ||
1507 | nr_sectors -= nr; | ||
1508 | start += nr; | ||
1509 | } | ||
1510 | zero_out: | ||
1511 | if (nr_sectors) { | ||
1512 | err |= blkdev_issue_zeroout(bdev, start, nr_sectors, GFP_NOIO, 0); | ||
1513 | } | ||
1514 | return err != 0; | ||
1515 | } | ||
1516 | |||
1517 | static bool can_do_reliable_discards(struct drbd_device *device) | ||
1518 | { | ||
1519 | struct request_queue *q = bdev_get_queue(device->ldev->backing_bdev); | ||
1520 | struct disk_conf *dc; | ||
1521 | bool can_do; | ||
1522 | |||
1523 | if (!blk_queue_discard(q)) | ||
1524 | return false; | ||
1525 | |||
1526 | if (q->limits.discard_zeroes_data) | ||
1527 | return true; | ||
1528 | |||
1529 | rcu_read_lock(); | ||
1530 | dc = rcu_dereference(device->ldev->disk_conf); | ||
1531 | can_do = dc->discard_zeroes_if_aligned; | ||
1532 | rcu_read_unlock(); | ||
1533 | return can_do; | ||
1534 | } | ||
1535 | |||
1536 | static void drbd_issue_peer_discard(struct drbd_device *device, struct drbd_peer_request *peer_req) | ||
1537 | { | ||
1538 | /* If the backend cannot discard, or does not guarantee | ||
1539 | * read-back zeroes in discarded ranges, we fall back to | ||
1540 | * zero-out. Unless configuration specifically requested | ||
1541 | * otherwise. */ | ||
1542 | if (!can_do_reliable_discards(device)) | ||
1543 | peer_req->flags |= EE_IS_TRIM_USE_ZEROOUT; | ||
1544 | |||
1545 | if (drbd_issue_discard_or_zero_out(device, peer_req->i.sector, | ||
1546 | peer_req->i.size >> 9, !(peer_req->flags & EE_IS_TRIM_USE_ZEROOUT))) | ||
1547 | peer_req->flags |= EE_WAS_ERROR; | ||
1548 | drbd_endio_write_sec_final(peer_req); | ||
1549 | } | ||
1550 | |||
1551 | static void drbd_issue_peer_wsame(struct drbd_device *device, | ||
1552 | struct drbd_peer_request *peer_req) | ||
1553 | { | ||
1554 | struct block_device *bdev = device->ldev->backing_bdev; | ||
1555 | sector_t s = peer_req->i.sector; | ||
1556 | sector_t nr = peer_req->i.size >> 9; | ||
1557 | if (blkdev_issue_write_same(bdev, s, nr, GFP_NOIO, peer_req->pages)) | ||
1558 | peer_req->flags |= EE_WAS_ERROR; | ||
1559 | drbd_endio_write_sec_final(peer_req); | ||
1560 | } | ||
1561 | |||
1562 | |||
1382 | /** | 1563 | /** |
1383 | * drbd_submit_peer_request() | 1564 | * drbd_submit_peer_request() |
1384 | * @device: DRBD device. | 1565 | * @device: DRBD device. |
@@ -1410,7 +1591,13 @@ int drbd_submit_peer_request(struct drbd_device *device, | |||
1410 | unsigned nr_pages = (data_size + PAGE_SIZE -1) >> PAGE_SHIFT; | 1591 | unsigned nr_pages = (data_size + PAGE_SIZE -1) >> PAGE_SHIFT; |
1411 | int err = -ENOMEM; | 1592 | int err = -ENOMEM; |
1412 | 1593 | ||
1413 | if (peer_req->flags & EE_IS_TRIM_USE_ZEROOUT) { | 1594 | /* TRIM/DISCARD: for now, always use the helper function |
1595 | * blkdev_issue_zeroout(..., discard=true). | ||
1596 | * It's synchronous, but it does the right thing wrt. bio splitting. | ||
1597 | * Correctness first, performance later. Next step is to code an | ||
1598 | * asynchronous variant of the same. | ||
1599 | */ | ||
1600 | if (peer_req->flags & (EE_IS_TRIM|EE_WRITE_SAME)) { | ||
1414 | /* wait for all pending IO completions, before we start | 1601 | /* wait for all pending IO completions, before we start |
1415 | * zeroing things out. */ | 1602 | * zeroing things out. */ |
1416 | conn_wait_active_ee_empty(peer_req->peer_device->connection); | 1603 | conn_wait_active_ee_empty(peer_req->peer_device->connection); |
@@ -1418,22 +1605,22 @@ int drbd_submit_peer_request(struct drbd_device *device, | |||
1418 | * so we can find it to present it in debugfs */ | 1605 | * so we can find it to present it in debugfs */ |
1419 | peer_req->submit_jif = jiffies; | 1606 | peer_req->submit_jif = jiffies; |
1420 | peer_req->flags |= EE_SUBMITTED; | 1607 | peer_req->flags |= EE_SUBMITTED; |
1421 | spin_lock_irq(&device->resource->req_lock); | 1608 | |
1422 | list_add_tail(&peer_req->w.list, &device->active_ee); | 1609 | /* If this was a resync request from receive_rs_deallocated(), |
1423 | spin_unlock_irq(&device->resource->req_lock); | 1610 | * it is already on the sync_ee list */ |
1424 | if (blkdev_issue_zeroout(device->ldev->backing_bdev, | 1611 | if (list_empty(&peer_req->w.list)) { |
1425 | sector, data_size >> 9, GFP_NOIO, false)) | 1612 | spin_lock_irq(&device->resource->req_lock); |
1426 | peer_req->flags |= EE_WAS_ERROR; | 1613 | list_add_tail(&peer_req->w.list, &device->active_ee); |
1427 | drbd_endio_write_sec_final(peer_req); | 1614 | spin_unlock_irq(&device->resource->req_lock); |
1615 | } | ||
1616 | |||
1617 | if (peer_req->flags & EE_IS_TRIM) | ||
1618 | drbd_issue_peer_discard(device, peer_req); | ||
1619 | else /* EE_WRITE_SAME */ | ||
1620 | drbd_issue_peer_wsame(device, peer_req); | ||
1428 | return 0; | 1621 | return 0; |
1429 | } | 1622 | } |
1430 | 1623 | ||
1431 | /* Discards don't have any payload. | ||
1432 | * But the scsi layer still expects a bio_vec it can use internally, | ||
1433 | * see sd_setup_discard_cmnd() and blk_add_request_payload(). */ | ||
1434 | if (peer_req->flags & EE_IS_TRIM) | ||
1435 | nr_pages = 1; | ||
1436 | |||
1437 | /* In most cases, we will only need one bio. But in case the lower | 1624 | /* In most cases, we will only need one bio. But in case the lower |
1438 | * level restrictions happen to be different at this offset on this | 1625 | * level restrictions happen to be different at this offset on this |
1439 | * side than those of the sending peer, we may need to submit the | 1626 | * side than those of the sending peer, we may need to submit the |
@@ -1459,11 +1646,6 @@ next_bio: | |||
1459 | bios = bio; | 1646 | bios = bio; |
1460 | ++n_bios; | 1647 | ++n_bios; |
1461 | 1648 | ||
1462 | if (op == REQ_OP_DISCARD) { | ||
1463 | bio->bi_iter.bi_size = data_size; | ||
1464 | goto submit; | ||
1465 | } | ||
1466 | |||
1467 | page_chain_for_each(page) { | 1649 | page_chain_for_each(page) { |
1468 | unsigned len = min_t(unsigned, data_size, PAGE_SIZE); | 1650 | unsigned len = min_t(unsigned, data_size, PAGE_SIZE); |
1469 | if (!bio_add_page(bio, page, len, 0)) { | 1651 | if (!bio_add_page(bio, page, len, 0)) { |
@@ -1485,7 +1667,6 @@ next_bio: | |||
1485 | --nr_pages; | 1667 | --nr_pages; |
1486 | } | 1668 | } |
1487 | D_ASSERT(device, data_size == 0); | 1669 | D_ASSERT(device, data_size == 0); |
1488 | submit: | ||
1489 | D_ASSERT(device, page == NULL); | 1670 | D_ASSERT(device, page == NULL); |
1490 | 1671 | ||
1491 | atomic_set(&peer_req->pending_bios, n_bios); | 1672 | atomic_set(&peer_req->pending_bios, n_bios); |
@@ -1609,8 +1790,26 @@ static int receive_Barrier(struct drbd_connection *connection, struct packet_inf | |||
1609 | return 0; | 1790 | return 0; |
1610 | } | 1791 | } |
1611 | 1792 | ||
1793 | /* quick wrapper in case payload size != request_size (write same) */ | ||
1794 | static void drbd_csum_ee_size(struct crypto_ahash *h, | ||
1795 | struct drbd_peer_request *r, void *d, | ||
1796 | unsigned int payload_size) | ||
1797 | { | ||
1798 | unsigned int tmp = r->i.size; | ||
1799 | r->i.size = payload_size; | ||
1800 | drbd_csum_ee(h, r, d); | ||
1801 | r->i.size = tmp; | ||
1802 | } | ||
1803 | |||
1612 | /* used from receive_RSDataReply (recv_resync_read) | 1804 | /* used from receive_RSDataReply (recv_resync_read) |
1613 | * and from receive_Data */ | 1805 | * and from receive_Data. |
1806 | * data_size: actual payload ("data in") | ||
1807 | * for normal writes that is bi_size. | ||
1808 | * for discards, that is zero. | ||
1809 | * for write same, it is logical_block_size. | ||
1810 | * both trim and write same have the bi_size ("data len to be affected") | ||
1811 | * as extra argument in the packet header. | ||
1812 | */ | ||
1614 | static struct drbd_peer_request * | 1813 | static struct drbd_peer_request * |
1615 | read_in_block(struct drbd_peer_device *peer_device, u64 id, sector_t sector, | 1814 | read_in_block(struct drbd_peer_device *peer_device, u64 id, sector_t sector, |
1616 | struct packet_info *pi) __must_hold(local) | 1815 | struct packet_info *pi) __must_hold(local) |
@@ -1625,6 +1824,7 @@ read_in_block(struct drbd_peer_device *peer_device, u64 id, sector_t sector, | |||
1625 | void *dig_vv = peer_device->connection->int_dig_vv; | 1824 | void *dig_vv = peer_device->connection->int_dig_vv; |
1626 | unsigned long *data; | 1825 | unsigned long *data; |
1627 | struct p_trim *trim = (pi->cmd == P_TRIM) ? pi->data : NULL; | 1826 | struct p_trim *trim = (pi->cmd == P_TRIM) ? pi->data : NULL; |
1827 | struct p_trim *wsame = (pi->cmd == P_WSAME) ? pi->data : NULL; | ||
1628 | 1828 | ||
1629 | digest_size = 0; | 1829 | digest_size = 0; |
1630 | if (!trim && peer_device->connection->peer_integrity_tfm) { | 1830 | if (!trim && peer_device->connection->peer_integrity_tfm) { |
@@ -1639,38 +1839,60 @@ read_in_block(struct drbd_peer_device *peer_device, u64 id, sector_t sector, | |||
1639 | data_size -= digest_size; | 1839 | data_size -= digest_size; |
1640 | } | 1840 | } |
1641 | 1841 | ||
1842 | /* assume request_size == data_size, but special case trim and wsame. */ | ||
1843 | ds = data_size; | ||
1642 | if (trim) { | 1844 | if (trim) { |
1643 | D_ASSERT(peer_device, data_size == 0); | 1845 | if (!expect(data_size == 0)) |
1644 | data_size = be32_to_cpu(trim->size); | 1846 | return NULL; |
1847 | ds = be32_to_cpu(trim->size); | ||
1848 | } else if (wsame) { | ||
1849 | if (data_size != queue_logical_block_size(device->rq_queue)) { | ||
1850 | drbd_err(peer_device, "data size (%u) != drbd logical block size (%u)\n", | ||
1851 | data_size, queue_logical_block_size(device->rq_queue)); | ||
1852 | return NULL; | ||
1853 | } | ||
1854 | if (data_size != bdev_logical_block_size(device->ldev->backing_bdev)) { | ||
1855 | drbd_err(peer_device, "data size (%u) != backend logical block size (%u)\n", | ||
1856 | data_size, bdev_logical_block_size(device->ldev->backing_bdev)); | ||
1857 | return NULL; | ||
1858 | } | ||
1859 | ds = be32_to_cpu(wsame->size); | ||
1645 | } | 1860 | } |
1646 | 1861 | ||
1647 | if (!expect(IS_ALIGNED(data_size, 512))) | 1862 | if (!expect(IS_ALIGNED(ds, 512))) |
1648 | return NULL; | 1863 | return NULL; |
1649 | /* prepare for larger trim requests. */ | 1864 | if (trim || wsame) { |
1650 | if (!trim && !expect(data_size <= DRBD_MAX_BIO_SIZE)) | 1865 | if (!expect(ds <= (DRBD_MAX_BBIO_SECTORS << 9))) |
1866 | return NULL; | ||
1867 | } else if (!expect(ds <= DRBD_MAX_BIO_SIZE)) | ||
1651 | return NULL; | 1868 | return NULL; |
1652 | 1869 | ||
1653 | /* even though we trust out peer, | 1870 | /* even though we trust out peer, |
1654 | * we sometimes have to double check. */ | 1871 | * we sometimes have to double check. */ |
1655 | if (sector + (data_size>>9) > capacity) { | 1872 | if (sector + (ds>>9) > capacity) { |
1656 | drbd_err(device, "request from peer beyond end of local disk: " | 1873 | drbd_err(device, "request from peer beyond end of local disk: " |
1657 | "capacity: %llus < sector: %llus + size: %u\n", | 1874 | "capacity: %llus < sector: %llus + size: %u\n", |
1658 | (unsigned long long)capacity, | 1875 | (unsigned long long)capacity, |
1659 | (unsigned long long)sector, data_size); | 1876 | (unsigned long long)sector, ds); |
1660 | return NULL; | 1877 | return NULL; |
1661 | } | 1878 | } |
1662 | 1879 | ||
1663 | /* GFP_NOIO, because we must not cause arbitrary write-out: in a DRBD | 1880 | /* GFP_NOIO, because we must not cause arbitrary write-out: in a DRBD |
1664 | * "criss-cross" setup, that might cause write-out on some other DRBD, | 1881 | * "criss-cross" setup, that might cause write-out on some other DRBD, |
1665 | * which in turn might block on the other node at this very place. */ | 1882 | * which in turn might block on the other node at this very place. */ |
1666 | peer_req = drbd_alloc_peer_req(peer_device, id, sector, data_size, trim == NULL, GFP_NOIO); | 1883 | peer_req = drbd_alloc_peer_req(peer_device, id, sector, ds, data_size, GFP_NOIO); |
1667 | if (!peer_req) | 1884 | if (!peer_req) |
1668 | return NULL; | 1885 | return NULL; |
1669 | 1886 | ||
1670 | peer_req->flags |= EE_WRITE; | 1887 | peer_req->flags |= EE_WRITE; |
1671 | if (trim) | 1888 | if (trim) { |
1889 | peer_req->flags |= EE_IS_TRIM; | ||
1672 | return peer_req; | 1890 | return peer_req; |
1891 | } | ||
1892 | if (wsame) | ||
1893 | peer_req->flags |= EE_WRITE_SAME; | ||
1673 | 1894 | ||
1895 | /* receive payload size bytes into page chain */ | ||
1674 | ds = data_size; | 1896 | ds = data_size; |
1675 | page = peer_req->pages; | 1897 | page = peer_req->pages; |
1676 | page_chain_for_each(page) { | 1898 | page_chain_for_each(page) { |
@@ -1690,7 +1912,7 @@ read_in_block(struct drbd_peer_device *peer_device, u64 id, sector_t sector, | |||
1690 | } | 1912 | } |
1691 | 1913 | ||
1692 | if (digest_size) { | 1914 | if (digest_size) { |
1693 | drbd_csum_ee(peer_device->connection->peer_integrity_tfm, peer_req, dig_vv); | 1915 | drbd_csum_ee_size(peer_device->connection->peer_integrity_tfm, peer_req, dig_vv, data_size); |
1694 | if (memcmp(dig_in, dig_vv, digest_size)) { | 1916 | if (memcmp(dig_in, dig_vv, digest_size)) { |
1695 | drbd_err(device, "Digest integrity check FAILED: %llus +%u\n", | 1917 | drbd_err(device, "Digest integrity check FAILED: %llus +%u\n", |
1696 | (unsigned long long)sector, data_size); | 1918 | (unsigned long long)sector, data_size); |
@@ -2067,13 +2289,13 @@ static inline int overlaps(sector_t s1, int l1, sector_t s2, int l2) | |||
2067 | static bool overlapping_resync_write(struct drbd_device *device, struct drbd_peer_request *peer_req) | 2289 | static bool overlapping_resync_write(struct drbd_device *device, struct drbd_peer_request *peer_req) |
2068 | { | 2290 | { |
2069 | struct drbd_peer_request *rs_req; | 2291 | struct drbd_peer_request *rs_req; |
2070 | bool rv = 0; | 2292 | bool rv = false; |
2071 | 2293 | ||
2072 | spin_lock_irq(&device->resource->req_lock); | 2294 | spin_lock_irq(&device->resource->req_lock); |
2073 | list_for_each_entry(rs_req, &device->sync_ee, w.list) { | 2295 | list_for_each_entry(rs_req, &device->sync_ee, w.list) { |
2074 | if (overlaps(peer_req->i.sector, peer_req->i.size, | 2296 | if (overlaps(peer_req->i.sector, peer_req->i.size, |
2075 | rs_req->i.sector, rs_req->i.size)) { | 2297 | rs_req->i.sector, rs_req->i.size)) { |
2076 | rv = 1; | 2298 | rv = true; |
2077 | break; | 2299 | break; |
2078 | } | 2300 | } |
2079 | } | 2301 | } |
@@ -2354,10 +2576,6 @@ static int receive_Data(struct drbd_connection *connection, struct packet_info * | |||
2354 | op = wire_flags_to_bio_op(dp_flags); | 2576 | op = wire_flags_to_bio_op(dp_flags); |
2355 | op_flags = wire_flags_to_bio_flags(dp_flags); | 2577 | op_flags = wire_flags_to_bio_flags(dp_flags); |
2356 | if (pi->cmd == P_TRIM) { | 2578 | if (pi->cmd == P_TRIM) { |
2357 | struct request_queue *q = bdev_get_queue(device->ldev->backing_bdev); | ||
2358 | peer_req->flags |= EE_IS_TRIM; | ||
2359 | if (!blk_queue_discard(q)) | ||
2360 | peer_req->flags |= EE_IS_TRIM_USE_ZEROOUT; | ||
2361 | D_ASSERT(peer_device, peer_req->i.size > 0); | 2579 | D_ASSERT(peer_device, peer_req->i.size > 0); |
2362 | D_ASSERT(peer_device, op == REQ_OP_DISCARD); | 2580 | D_ASSERT(peer_device, op == REQ_OP_DISCARD); |
2363 | D_ASSERT(peer_device, peer_req->pages == NULL); | 2581 | D_ASSERT(peer_device, peer_req->pages == NULL); |
@@ -2424,11 +2642,11 @@ static int receive_Data(struct drbd_connection *connection, struct packet_info * | |||
2424 | update_peer_seq(peer_device, peer_seq); | 2642 | update_peer_seq(peer_device, peer_seq); |
2425 | spin_lock_irq(&device->resource->req_lock); | 2643 | spin_lock_irq(&device->resource->req_lock); |
2426 | } | 2644 | } |
2427 | /* if we use the zeroout fallback code, we process synchronously | 2645 | /* TRIM and WRITE_SAME are processed synchronously, |
2428 | * and we wait for all pending requests, respectively wait for | 2646 | * we wait for all pending requests, respectively wait for |
2429 | * active_ee to become empty in drbd_submit_peer_request(); | 2647 | * active_ee to become empty in drbd_submit_peer_request(); |
2430 | * better not add ourselves here. */ | 2648 | * better not add ourselves here. */ |
2431 | if ((peer_req->flags & EE_IS_TRIM_USE_ZEROOUT) == 0) | 2649 | if ((peer_req->flags & (EE_IS_TRIM|EE_WRITE_SAME)) == 0) |
2432 | list_add_tail(&peer_req->w.list, &device->active_ee); | 2650 | list_add_tail(&peer_req->w.list, &device->active_ee); |
2433 | spin_unlock_irq(&device->resource->req_lock); | 2651 | spin_unlock_irq(&device->resource->req_lock); |
2434 | 2652 | ||
@@ -2460,7 +2678,7 @@ static int receive_Data(struct drbd_connection *connection, struct packet_info * | |||
2460 | } | 2678 | } |
2461 | 2679 | ||
2462 | out_interrupted: | 2680 | out_interrupted: |
2463 | drbd_may_finish_epoch(connection, peer_req->epoch, EV_PUT + EV_CLEANUP); | 2681 | drbd_may_finish_epoch(connection, peer_req->epoch, EV_PUT | EV_CLEANUP); |
2464 | put_ldev(device); | 2682 | put_ldev(device); |
2465 | drbd_free_peer_req(device, peer_req); | 2683 | drbd_free_peer_req(device, peer_req); |
2466 | return err; | 2684 | return err; |
@@ -2585,6 +2803,7 @@ static int receive_DataRequest(struct drbd_connection *connection, struct packet | |||
2585 | case P_DATA_REQUEST: | 2803 | case P_DATA_REQUEST: |
2586 | drbd_send_ack_rp(peer_device, P_NEG_DREPLY, p); | 2804 | drbd_send_ack_rp(peer_device, P_NEG_DREPLY, p); |
2587 | break; | 2805 | break; |
2806 | case P_RS_THIN_REQ: | ||
2588 | case P_RS_DATA_REQUEST: | 2807 | case P_RS_DATA_REQUEST: |
2589 | case P_CSUM_RS_REQUEST: | 2808 | case P_CSUM_RS_REQUEST: |
2590 | case P_OV_REQUEST: | 2809 | case P_OV_REQUEST: |
@@ -2610,7 +2829,7 @@ static int receive_DataRequest(struct drbd_connection *connection, struct packet | |||
2610 | * "criss-cross" setup, that might cause write-out on some other DRBD, | 2829 | * "criss-cross" setup, that might cause write-out on some other DRBD, |
2611 | * which in turn might block on the other node at this very place. */ | 2830 | * which in turn might block on the other node at this very place. */ |
2612 | peer_req = drbd_alloc_peer_req(peer_device, p->block_id, sector, size, | 2831 | peer_req = drbd_alloc_peer_req(peer_device, p->block_id, sector, size, |
2613 | true /* has real payload */, GFP_NOIO); | 2832 | size, GFP_NOIO); |
2614 | if (!peer_req) { | 2833 | if (!peer_req) { |
2615 | put_ldev(device); | 2834 | put_ldev(device); |
2616 | return -ENOMEM; | 2835 | return -ENOMEM; |
@@ -2624,6 +2843,12 @@ static int receive_DataRequest(struct drbd_connection *connection, struct packet | |||
2624 | peer_req->flags |= EE_APPLICATION; | 2843 | peer_req->flags |= EE_APPLICATION; |
2625 | goto submit; | 2844 | goto submit; |
2626 | 2845 | ||
2846 | case P_RS_THIN_REQ: | ||
2847 | /* If at some point in the future we have a smart way to | ||
2848 | find out if this data block is completely deallocated, | ||
2849 | then we would do something smarter here than reading | ||
2850 | the block... */ | ||
2851 | peer_req->flags |= EE_RS_THIN_REQ; | ||
2627 | case P_RS_DATA_REQUEST: | 2852 | case P_RS_DATA_REQUEST: |
2628 | peer_req->w.cb = w_e_end_rsdata_req; | 2853 | peer_req->w.cb = w_e_end_rsdata_req; |
2629 | fault_type = DRBD_FAULT_RS_RD; | 2854 | fault_type = DRBD_FAULT_RS_RD; |
@@ -2969,7 +3194,8 @@ static void drbd_uuid_dump(struct drbd_device *device, char *text, u64 *uuid, | |||
2969 | -1091 requires proto 91 | 3194 | -1091 requires proto 91 |
2970 | -1096 requires proto 96 | 3195 | -1096 requires proto 96 |
2971 | */ | 3196 | */ |
2972 | static int drbd_uuid_compare(struct drbd_device *const device, int *rule_nr) __must_hold(local) | 3197 | |
3198 | static int drbd_uuid_compare(struct drbd_device *const device, enum drbd_role const peer_role, int *rule_nr) __must_hold(local) | ||
2973 | { | 3199 | { |
2974 | struct drbd_peer_device *const peer_device = first_peer_device(device); | 3200 | struct drbd_peer_device *const peer_device = first_peer_device(device); |
2975 | struct drbd_connection *const connection = peer_device ? peer_device->connection : NULL; | 3201 | struct drbd_connection *const connection = peer_device ? peer_device->connection : NULL; |
@@ -3049,8 +3275,39 @@ static int drbd_uuid_compare(struct drbd_device *const device, int *rule_nr) __m | |||
3049 | * next bit (weight 2) is set when peer was primary */ | 3275 | * next bit (weight 2) is set when peer was primary */ |
3050 | *rule_nr = 40; | 3276 | *rule_nr = 40; |
3051 | 3277 | ||
3278 | /* Neither has the "crashed primary" flag set, | ||
3279 | * only a replication link hickup. */ | ||
3280 | if (rct == 0) | ||
3281 | return 0; | ||
3282 | |||
3283 | /* Current UUID equal and no bitmap uuid; does not necessarily | ||
3284 | * mean this was a "simultaneous hard crash", maybe IO was | ||
3285 | * frozen, so no UUID-bump happened. | ||
3286 | * This is a protocol change, overload DRBD_FF_WSAME as flag | ||
3287 | * for "new-enough" peer DRBD version. */ | ||
3288 | if (device->state.role == R_PRIMARY || peer_role == R_PRIMARY) { | ||
3289 | *rule_nr = 41; | ||
3290 | if (!(connection->agreed_features & DRBD_FF_WSAME)) { | ||
3291 | drbd_warn(peer_device, "Equivalent unrotated UUIDs, but current primary present.\n"); | ||
3292 | return -(0x10000 | PRO_VERSION_MAX | (DRBD_FF_WSAME << 8)); | ||
3293 | } | ||
3294 | if (device->state.role == R_PRIMARY && peer_role == R_PRIMARY) { | ||
3295 | /* At least one has the "crashed primary" bit set, | ||
3296 | * both are primary now, but neither has rotated its UUIDs? | ||
3297 | * "Can not happen." */ | ||
3298 | drbd_err(peer_device, "Equivalent unrotated UUIDs, but both are primary. Can not resolve this.\n"); | ||
3299 | return -100; | ||
3300 | } | ||
3301 | if (device->state.role == R_PRIMARY) | ||
3302 | return 1; | ||
3303 | return -1; | ||
3304 | } | ||
3305 | |||
3306 | /* Both are secondary. | ||
3307 | * Really looks like recovery from simultaneous hard crash. | ||
3308 | * Check which had been primary before, and arbitrate. */ | ||
3052 | switch (rct) { | 3309 | switch (rct) { |
3053 | case 0: /* !self_pri && !peer_pri */ return 0; | 3310 | case 0: /* !self_pri && !peer_pri */ return 0; /* already handled */ |
3054 | case 1: /* self_pri && !peer_pri */ return 1; | 3311 | case 1: /* self_pri && !peer_pri */ return 1; |
3055 | case 2: /* !self_pri && peer_pri */ return -1; | 3312 | case 2: /* !self_pri && peer_pri */ return -1; |
3056 | case 3: /* self_pri && peer_pri */ | 3313 | case 3: /* self_pri && peer_pri */ |
@@ -3177,7 +3434,7 @@ static enum drbd_conns drbd_sync_handshake(struct drbd_peer_device *peer_device, | |||
3177 | drbd_uuid_dump(device, "peer", device->p_uuid, | 3434 | drbd_uuid_dump(device, "peer", device->p_uuid, |
3178 | device->p_uuid[UI_SIZE], device->p_uuid[UI_FLAGS]); | 3435 | device->p_uuid[UI_SIZE], device->p_uuid[UI_FLAGS]); |
3179 | 3436 | ||
3180 | hg = drbd_uuid_compare(device, &rule_nr); | 3437 | hg = drbd_uuid_compare(device, peer_role, &rule_nr); |
3181 | spin_unlock_irq(&device->ldev->md.uuid_lock); | 3438 | spin_unlock_irq(&device->ldev->md.uuid_lock); |
3182 | 3439 | ||
3183 | drbd_info(device, "uuid_compare()=%d by rule %d\n", hg, rule_nr); | 3440 | drbd_info(device, "uuid_compare()=%d by rule %d\n", hg, rule_nr); |
@@ -3186,6 +3443,15 @@ static enum drbd_conns drbd_sync_handshake(struct drbd_peer_device *peer_device, | |||
3186 | drbd_alert(device, "Unrelated data, aborting!\n"); | 3443 | drbd_alert(device, "Unrelated data, aborting!\n"); |
3187 | return C_MASK; | 3444 | return C_MASK; |
3188 | } | 3445 | } |
3446 | if (hg < -0x10000) { | ||
3447 | int proto, fflags; | ||
3448 | hg = -hg; | ||
3449 | proto = hg & 0xff; | ||
3450 | fflags = (hg >> 8) & 0xff; | ||
3451 | drbd_alert(device, "To resolve this both sides have to support at least protocol %d and feature flags 0x%x\n", | ||
3452 | proto, fflags); | ||
3453 | return C_MASK; | ||
3454 | } | ||
3189 | if (hg < -1000) { | 3455 | if (hg < -1000) { |
3190 | drbd_alert(device, "To resolve this both sides have to support at least protocol %d\n", -hg - 1000); | 3456 | drbd_alert(device, "To resolve this both sides have to support at least protocol %d\n", -hg - 1000); |
3191 | return C_MASK; | 3457 | return C_MASK; |
@@ -3415,7 +3681,8 @@ static int receive_protocol(struct drbd_connection *connection, struct packet_in | |||
3415 | */ | 3681 | */ |
3416 | 3682 | ||
3417 | peer_integrity_tfm = crypto_alloc_ahash(integrity_alg, 0, CRYPTO_ALG_ASYNC); | 3683 | peer_integrity_tfm = crypto_alloc_ahash(integrity_alg, 0, CRYPTO_ALG_ASYNC); |
3418 | if (!peer_integrity_tfm) { | 3684 | if (IS_ERR(peer_integrity_tfm)) { |
3685 | peer_integrity_tfm = NULL; | ||
3419 | drbd_err(connection, "peer data-integrity-alg %s not supported\n", | 3686 | drbd_err(connection, "peer data-integrity-alg %s not supported\n", |
3420 | integrity_alg); | 3687 | integrity_alg); |
3421 | goto disconnect; | 3688 | goto disconnect; |
@@ -3766,6 +4033,7 @@ static int receive_sizes(struct drbd_connection *connection, struct packet_info | |||
3766 | struct drbd_peer_device *peer_device; | 4033 | struct drbd_peer_device *peer_device; |
3767 | struct drbd_device *device; | 4034 | struct drbd_device *device; |
3768 | struct p_sizes *p = pi->data; | 4035 | struct p_sizes *p = pi->data; |
4036 | struct o_qlim *o = (connection->agreed_features & DRBD_FF_WSAME) ? p->qlim : NULL; | ||
3769 | enum determine_dev_size dd = DS_UNCHANGED; | 4037 | enum determine_dev_size dd = DS_UNCHANGED; |
3770 | sector_t p_size, p_usize, p_csize, my_usize; | 4038 | sector_t p_size, p_usize, p_csize, my_usize; |
3771 | int ldsc = 0; /* local disk size changed */ | 4039 | int ldsc = 0; /* local disk size changed */ |
@@ -3785,6 +4053,7 @@ static int receive_sizes(struct drbd_connection *connection, struct packet_info | |||
3785 | device->p_size = p_size; | 4053 | device->p_size = p_size; |
3786 | 4054 | ||
3787 | if (get_ldev(device)) { | 4055 | if (get_ldev(device)) { |
4056 | sector_t new_size, cur_size; | ||
3788 | rcu_read_lock(); | 4057 | rcu_read_lock(); |
3789 | my_usize = rcu_dereference(device->ldev->disk_conf)->disk_size; | 4058 | my_usize = rcu_dereference(device->ldev->disk_conf)->disk_size; |
3790 | rcu_read_unlock(); | 4059 | rcu_read_unlock(); |
@@ -3801,11 +4070,13 @@ static int receive_sizes(struct drbd_connection *connection, struct packet_info | |||
3801 | 4070 | ||
3802 | /* Never shrink a device with usable data during connect. | 4071 | /* Never shrink a device with usable data during connect. |
3803 | But allow online shrinking if we are connected. */ | 4072 | But allow online shrinking if we are connected. */ |
3804 | if (drbd_new_dev_size(device, device->ldev, p_usize, 0) < | 4073 | new_size = drbd_new_dev_size(device, device->ldev, p_usize, 0); |
3805 | drbd_get_capacity(device->this_bdev) && | 4074 | cur_size = drbd_get_capacity(device->this_bdev); |
4075 | if (new_size < cur_size && | ||
3806 | device->state.disk >= D_OUTDATED && | 4076 | device->state.disk >= D_OUTDATED && |
3807 | device->state.conn < C_CONNECTED) { | 4077 | device->state.conn < C_CONNECTED) { |
3808 | drbd_err(device, "The peer's disk size is too small!\n"); | 4078 | drbd_err(device, "The peer's disk size is too small! (%llu < %llu sectors)\n", |
4079 | (unsigned long long)new_size, (unsigned long long)cur_size); | ||
3809 | conn_request_state(peer_device->connection, NS(conn, C_DISCONNECTING), CS_HARD); | 4080 | conn_request_state(peer_device->connection, NS(conn, C_DISCONNECTING), CS_HARD); |
3810 | put_ldev(device); | 4081 | put_ldev(device); |
3811 | return -EIO; | 4082 | return -EIO; |
@@ -3839,14 +4110,14 @@ static int receive_sizes(struct drbd_connection *connection, struct packet_info | |||
3839 | } | 4110 | } |
3840 | 4111 | ||
3841 | device->peer_max_bio_size = be32_to_cpu(p->max_bio_size); | 4112 | device->peer_max_bio_size = be32_to_cpu(p->max_bio_size); |
3842 | /* Leave drbd_reconsider_max_bio_size() before drbd_determine_dev_size(). | 4113 | /* Leave drbd_reconsider_queue_parameters() before drbd_determine_dev_size(). |
3843 | In case we cleared the QUEUE_FLAG_DISCARD from our queue in | 4114 | In case we cleared the QUEUE_FLAG_DISCARD from our queue in |
3844 | drbd_reconsider_max_bio_size(), we can be sure that after | 4115 | drbd_reconsider_queue_parameters(), we can be sure that after |
3845 | drbd_determine_dev_size() no REQ_DISCARDs are in the queue. */ | 4116 | drbd_determine_dev_size() no REQ_DISCARDs are in the queue. */ |
3846 | 4117 | ||
3847 | ddsf = be16_to_cpu(p->dds_flags); | 4118 | ddsf = be16_to_cpu(p->dds_flags); |
3848 | if (get_ldev(device)) { | 4119 | if (get_ldev(device)) { |
3849 | drbd_reconsider_max_bio_size(device, device->ldev); | 4120 | drbd_reconsider_queue_parameters(device, device->ldev, o); |
3850 | dd = drbd_determine_dev_size(device, ddsf, NULL); | 4121 | dd = drbd_determine_dev_size(device, ddsf, NULL); |
3851 | put_ldev(device); | 4122 | put_ldev(device); |
3852 | if (dd == DS_ERROR) | 4123 | if (dd == DS_ERROR) |
@@ -3866,7 +4137,7 @@ static int receive_sizes(struct drbd_connection *connection, struct packet_info | |||
3866 | * However, if he sends a zero current size, | 4137 | * However, if he sends a zero current size, |
3867 | * take his (user-capped or) backing disk size anyways. | 4138 | * take his (user-capped or) backing disk size anyways. |
3868 | */ | 4139 | */ |
3869 | drbd_reconsider_max_bio_size(device, NULL); | 4140 | drbd_reconsider_queue_parameters(device, NULL, o); |
3870 | drbd_set_my_capacity(device, p_csize ?: p_usize ?: p_size); | 4141 | drbd_set_my_capacity(device, p_csize ?: p_usize ?: p_size); |
3871 | } | 4142 | } |
3872 | 4143 | ||
@@ -4599,9 +4870,75 @@ static int receive_out_of_sync(struct drbd_connection *connection, struct packet | |||
4599 | return 0; | 4870 | return 0; |
4600 | } | 4871 | } |
4601 | 4872 | ||
4873 | static int receive_rs_deallocated(struct drbd_connection *connection, struct packet_info *pi) | ||
4874 | { | ||
4875 | struct drbd_peer_device *peer_device; | ||
4876 | struct p_block_desc *p = pi->data; | ||
4877 | struct drbd_device *device; | ||
4878 | sector_t sector; | ||
4879 | int size, err = 0; | ||
4880 | |||
4881 | peer_device = conn_peer_device(connection, pi->vnr); | ||
4882 | if (!peer_device) | ||
4883 | return -EIO; | ||
4884 | device = peer_device->device; | ||
4885 | |||
4886 | sector = be64_to_cpu(p->sector); | ||
4887 | size = be32_to_cpu(p->blksize); | ||
4888 | |||
4889 | dec_rs_pending(device); | ||
4890 | |||
4891 | if (get_ldev(device)) { | ||
4892 | struct drbd_peer_request *peer_req; | ||
4893 | const int op = REQ_OP_DISCARD; | ||
4894 | |||
4895 | peer_req = drbd_alloc_peer_req(peer_device, ID_SYNCER, sector, | ||
4896 | size, 0, GFP_NOIO); | ||
4897 | if (!peer_req) { | ||
4898 | put_ldev(device); | ||
4899 | return -ENOMEM; | ||
4900 | } | ||
4901 | |||
4902 | peer_req->w.cb = e_end_resync_block; | ||
4903 | peer_req->submit_jif = jiffies; | ||
4904 | peer_req->flags |= EE_IS_TRIM; | ||
4905 | |||
4906 | spin_lock_irq(&device->resource->req_lock); | ||
4907 | list_add_tail(&peer_req->w.list, &device->sync_ee); | ||
4908 | spin_unlock_irq(&device->resource->req_lock); | ||
4909 | |||
4910 | atomic_add(pi->size >> 9, &device->rs_sect_ev); | ||
4911 | err = drbd_submit_peer_request(device, peer_req, op, 0, DRBD_FAULT_RS_WR); | ||
4912 | |||
4913 | if (err) { | ||
4914 | spin_lock_irq(&device->resource->req_lock); | ||
4915 | list_del(&peer_req->w.list); | ||
4916 | spin_unlock_irq(&device->resource->req_lock); | ||
4917 | |||
4918 | drbd_free_peer_req(device, peer_req); | ||
4919 | put_ldev(device); | ||
4920 | err = 0; | ||
4921 | goto fail; | ||
4922 | } | ||
4923 | |||
4924 | inc_unacked(device); | ||
4925 | |||
4926 | /* No put_ldev() here. Gets called in drbd_endio_write_sec_final(), | ||
4927 | as well as drbd_rs_complete_io() */ | ||
4928 | } else { | ||
4929 | fail: | ||
4930 | drbd_rs_complete_io(device, sector); | ||
4931 | drbd_send_ack_ex(peer_device, P_NEG_ACK, sector, size, ID_SYNCER); | ||
4932 | } | ||
4933 | |||
4934 | atomic_add(size >> 9, &device->rs_sect_in); | ||
4935 | |||
4936 | return err; | ||
4937 | } | ||
4938 | |||
4602 | struct data_cmd { | 4939 | struct data_cmd { |
4603 | int expect_payload; | 4940 | int expect_payload; |
4604 | size_t pkt_size; | 4941 | unsigned int pkt_size; |
4605 | int (*fn)(struct drbd_connection *, struct packet_info *); | 4942 | int (*fn)(struct drbd_connection *, struct packet_info *); |
4606 | }; | 4943 | }; |
4607 | 4944 | ||
@@ -4626,11 +4963,14 @@ static struct data_cmd drbd_cmd_handler[] = { | |||
4626 | [P_OV_REQUEST] = { 0, sizeof(struct p_block_req), receive_DataRequest }, | 4963 | [P_OV_REQUEST] = { 0, sizeof(struct p_block_req), receive_DataRequest }, |
4627 | [P_OV_REPLY] = { 1, sizeof(struct p_block_req), receive_DataRequest }, | 4964 | [P_OV_REPLY] = { 1, sizeof(struct p_block_req), receive_DataRequest }, |
4628 | [P_CSUM_RS_REQUEST] = { 1, sizeof(struct p_block_req), receive_DataRequest }, | 4965 | [P_CSUM_RS_REQUEST] = { 1, sizeof(struct p_block_req), receive_DataRequest }, |
4966 | [P_RS_THIN_REQ] = { 0, sizeof(struct p_block_req), receive_DataRequest }, | ||
4629 | [P_DELAY_PROBE] = { 0, sizeof(struct p_delay_probe93), receive_skip }, | 4967 | [P_DELAY_PROBE] = { 0, sizeof(struct p_delay_probe93), receive_skip }, |
4630 | [P_OUT_OF_SYNC] = { 0, sizeof(struct p_block_desc), receive_out_of_sync }, | 4968 | [P_OUT_OF_SYNC] = { 0, sizeof(struct p_block_desc), receive_out_of_sync }, |
4631 | [P_CONN_ST_CHG_REQ] = { 0, sizeof(struct p_req_state), receive_req_conn_state }, | 4969 | [P_CONN_ST_CHG_REQ] = { 0, sizeof(struct p_req_state), receive_req_conn_state }, |
4632 | [P_PROTOCOL_UPDATE] = { 1, sizeof(struct p_protocol), receive_protocol }, | 4970 | [P_PROTOCOL_UPDATE] = { 1, sizeof(struct p_protocol), receive_protocol }, |
4633 | [P_TRIM] = { 0, sizeof(struct p_trim), receive_Data }, | 4971 | [P_TRIM] = { 0, sizeof(struct p_trim), receive_Data }, |
4972 | [P_RS_DEALLOCATED] = { 0, sizeof(struct p_block_desc), receive_rs_deallocated }, | ||
4973 | [P_WSAME] = { 1, sizeof(struct p_wsame), receive_Data }, | ||
4634 | }; | 4974 | }; |
4635 | 4975 | ||
4636 | static void drbdd(struct drbd_connection *connection) | 4976 | static void drbdd(struct drbd_connection *connection) |
@@ -4640,7 +4980,7 @@ static void drbdd(struct drbd_connection *connection) | |||
4640 | int err; | 4980 | int err; |
4641 | 4981 | ||
4642 | while (get_t_state(&connection->receiver) == RUNNING) { | 4982 | while (get_t_state(&connection->receiver) == RUNNING) { |
4643 | struct data_cmd *cmd; | 4983 | struct data_cmd const *cmd; |
4644 | 4984 | ||
4645 | drbd_thread_current_set_cpu(&connection->receiver); | 4985 | drbd_thread_current_set_cpu(&connection->receiver); |
4646 | update_receiver_timing_details(connection, drbd_recv_header); | 4986 | update_receiver_timing_details(connection, drbd_recv_header); |
@@ -4655,11 +4995,18 @@ static void drbdd(struct drbd_connection *connection) | |||
4655 | } | 4995 | } |
4656 | 4996 | ||
4657 | shs = cmd->pkt_size; | 4997 | shs = cmd->pkt_size; |
4998 | if (pi.cmd == P_SIZES && connection->agreed_features & DRBD_FF_WSAME) | ||
4999 | shs += sizeof(struct o_qlim); | ||
4658 | if (pi.size > shs && !cmd->expect_payload) { | 5000 | if (pi.size > shs && !cmd->expect_payload) { |
4659 | drbd_err(connection, "No payload expected %s l:%d\n", | 5001 | drbd_err(connection, "No payload expected %s l:%d\n", |
4660 | cmdname(pi.cmd), pi.size); | 5002 | cmdname(pi.cmd), pi.size); |
4661 | goto err_out; | 5003 | goto err_out; |
4662 | } | 5004 | } |
5005 | if (pi.size < shs) { | ||
5006 | drbd_err(connection, "%s: unexpected packet size, expected:%d received:%d\n", | ||
5007 | cmdname(pi.cmd), (int)shs, pi.size); | ||
5008 | goto err_out; | ||
5009 | } | ||
4663 | 5010 | ||
4664 | if (shs) { | 5011 | if (shs) { |
4665 | update_receiver_timing_details(connection, drbd_recv_all_warn); | 5012 | update_receiver_timing_details(connection, drbd_recv_all_warn); |
@@ -4795,9 +5142,11 @@ static int drbd_disconnected(struct drbd_peer_device *peer_device) | |||
4795 | 5142 | ||
4796 | drbd_md_sync(device); | 5143 | drbd_md_sync(device); |
4797 | 5144 | ||
4798 | /* serialize with bitmap writeout triggered by the state change, | 5145 | if (get_ldev(device)) { |
4799 | * if any. */ | 5146 | drbd_bitmap_io(device, &drbd_bm_write_copy_pages, |
4800 | wait_event(device->misc_wait, !test_bit(BITMAP_IO, &device->flags)); | 5147 | "write from disconnected", BM_LOCKED_CHANGE_ALLOWED); |
5148 | put_ldev(device); | ||
5149 | } | ||
4801 | 5150 | ||
4802 | /* tcp_close and release of sendpage pages can be deferred. I don't | 5151 | /* tcp_close and release of sendpage pages can be deferred. I don't |
4803 | * want to use SO_LINGER, because apparently it can be deferred for | 5152 | * want to use SO_LINGER, because apparently it can be deferred for |
@@ -4904,8 +5253,12 @@ static int drbd_do_features(struct drbd_connection *connection) | |||
4904 | drbd_info(connection, "Handshake successful: " | 5253 | drbd_info(connection, "Handshake successful: " |
4905 | "Agreed network protocol version %d\n", connection->agreed_pro_version); | 5254 | "Agreed network protocol version %d\n", connection->agreed_pro_version); |
4906 | 5255 | ||
4907 | drbd_info(connection, "Agreed to%ssupport TRIM on protocol level\n", | 5256 | drbd_info(connection, "Feature flags enabled on protocol level: 0x%x%s%s%s.\n", |
4908 | connection->agreed_features & FF_TRIM ? " " : " not "); | 5257 | connection->agreed_features, |
5258 | connection->agreed_features & DRBD_FF_TRIM ? " TRIM" : "", | ||
5259 | connection->agreed_features & DRBD_FF_THIN_RESYNC ? " THIN_RESYNC" : "", | ||
5260 | connection->agreed_features & DRBD_FF_WSAME ? " WRITE_SAME" : | ||
5261 | connection->agreed_features ? "" : " none"); | ||
4909 | 5262 | ||
4910 | return 1; | 5263 | return 1; |
4911 | 5264 | ||
diff --git a/drivers/block/drbd/drbd_req.c b/drivers/block/drbd/drbd_req.c index eef6e9575b4e..66b8e4bb74d8 100644 --- a/drivers/block/drbd/drbd_req.c +++ b/drivers/block/drbd/drbd_req.c | |||
@@ -47,8 +47,7 @@ static void _drbd_end_io_acct(struct drbd_device *device, struct drbd_request *r | |||
47 | &device->vdisk->part0, req->start_jif); | 47 | &device->vdisk->part0, req->start_jif); |
48 | } | 48 | } |
49 | 49 | ||
50 | static struct drbd_request *drbd_req_new(struct drbd_device *device, | 50 | static struct drbd_request *drbd_req_new(struct drbd_device *device, struct bio *bio_src) |
51 | struct bio *bio_src) | ||
52 | { | 51 | { |
53 | struct drbd_request *req; | 52 | struct drbd_request *req; |
54 | 53 | ||
@@ -58,10 +57,12 @@ static struct drbd_request *drbd_req_new(struct drbd_device *device, | |||
58 | memset(req, 0, sizeof(*req)); | 57 | memset(req, 0, sizeof(*req)); |
59 | 58 | ||
60 | drbd_req_make_private_bio(req, bio_src); | 59 | drbd_req_make_private_bio(req, bio_src); |
61 | req->rq_state = bio_data_dir(bio_src) == WRITE ? RQ_WRITE : 0; | 60 | req->rq_state = (bio_data_dir(bio_src) == WRITE ? RQ_WRITE : 0) |
62 | req->device = device; | 61 | | (bio_op(bio_src) == REQ_OP_WRITE_SAME ? RQ_WSAME : 0) |
63 | req->master_bio = bio_src; | 62 | | (bio_op(bio_src) == REQ_OP_DISCARD ? RQ_UNMAP : 0); |
64 | req->epoch = 0; | 63 | req->device = device; |
64 | req->master_bio = bio_src; | ||
65 | req->epoch = 0; | ||
65 | 66 | ||
66 | drbd_clear_interval(&req->i); | 67 | drbd_clear_interval(&req->i); |
67 | req->i.sector = bio_src->bi_iter.bi_sector; | 68 | req->i.sector = bio_src->bi_iter.bi_sector; |
@@ -218,7 +219,6 @@ void drbd_req_complete(struct drbd_request *req, struct bio_and_error *m) | |||
218 | { | 219 | { |
219 | const unsigned s = req->rq_state; | 220 | const unsigned s = req->rq_state; |
220 | struct drbd_device *device = req->device; | 221 | struct drbd_device *device = req->device; |
221 | int rw; | ||
222 | int error, ok; | 222 | int error, ok; |
223 | 223 | ||
224 | /* we must not complete the master bio, while it is | 224 | /* we must not complete the master bio, while it is |
@@ -242,8 +242,6 @@ void drbd_req_complete(struct drbd_request *req, struct bio_and_error *m) | |||
242 | return; | 242 | return; |
243 | } | 243 | } |
244 | 244 | ||
245 | rw = bio_rw(req->master_bio); | ||
246 | |||
247 | /* | 245 | /* |
248 | * figure out whether to report success or failure. | 246 | * figure out whether to report success or failure. |
249 | * | 247 | * |
@@ -267,7 +265,7 @@ void drbd_req_complete(struct drbd_request *req, struct bio_and_error *m) | |||
267 | * epoch number. If they match, increase the current_tle_nr, | 265 | * epoch number. If they match, increase the current_tle_nr, |
268 | * and reset the transfer log epoch write_cnt. | 266 | * and reset the transfer log epoch write_cnt. |
269 | */ | 267 | */ |
270 | if (rw == WRITE && | 268 | if (op_is_write(bio_op(req->master_bio)) && |
271 | req->epoch == atomic_read(&first_peer_device(device)->connection->current_tle_nr)) | 269 | req->epoch == atomic_read(&first_peer_device(device)->connection->current_tle_nr)) |
272 | start_new_tl_epoch(first_peer_device(device)->connection); | 270 | start_new_tl_epoch(first_peer_device(device)->connection); |
273 | 271 | ||
@@ -284,11 +282,14 @@ void drbd_req_complete(struct drbd_request *req, struct bio_and_error *m) | |||
284 | * because no path was available, in which case | 282 | * because no path was available, in which case |
285 | * it was not even added to the transfer_log. | 283 | * it was not even added to the transfer_log. |
286 | * | 284 | * |
287 | * READA may fail, and will not be retried. | 285 | * read-ahead may fail, and will not be retried. |
288 | * | 286 | * |
289 | * WRITE should have used all available paths already. | 287 | * WRITE should have used all available paths already. |
290 | */ | 288 | */ |
291 | if (!ok && rw == READ && !list_empty(&req->tl_requests)) | 289 | if (!ok && |
290 | bio_op(req->master_bio) == REQ_OP_READ && | ||
291 | !(req->master_bio->bi_rw & REQ_RAHEAD) && | ||
292 | !list_empty(&req->tl_requests)) | ||
292 | req->rq_state |= RQ_POSTPONED; | 293 | req->rq_state |= RQ_POSTPONED; |
293 | 294 | ||
294 | if (!(req->rq_state & RQ_POSTPONED)) { | 295 | if (!(req->rq_state & RQ_POSTPONED)) { |
@@ -644,7 +645,7 @@ int __req_mod(struct drbd_request *req, enum drbd_req_event what, | |||
644 | __drbd_chk_io_error(device, DRBD_READ_ERROR); | 645 | __drbd_chk_io_error(device, DRBD_READ_ERROR); |
645 | /* fall through. */ | 646 | /* fall through. */ |
646 | case READ_AHEAD_COMPLETED_WITH_ERROR: | 647 | case READ_AHEAD_COMPLETED_WITH_ERROR: |
647 | /* it is legal to fail READA, no __drbd_chk_io_error in that case. */ | 648 | /* it is legal to fail read-ahead, no __drbd_chk_io_error in that case. */ |
648 | mod_rq_state(req, m, RQ_LOCAL_PENDING, RQ_LOCAL_COMPLETED); | 649 | mod_rq_state(req, m, RQ_LOCAL_PENDING, RQ_LOCAL_COMPLETED); |
649 | break; | 650 | break; |
650 | 651 | ||
@@ -656,7 +657,7 @@ int __req_mod(struct drbd_request *req, enum drbd_req_event what, | |||
656 | break; | 657 | break; |
657 | 658 | ||
658 | case QUEUE_FOR_NET_READ: | 659 | case QUEUE_FOR_NET_READ: |
659 | /* READ or READA, and | 660 | /* READ, and |
660 | * no local disk, | 661 | * no local disk, |
661 | * or target area marked as invalid, | 662 | * or target area marked as invalid, |
662 | * or just got an io-error. */ | 663 | * or just got an io-error. */ |
@@ -977,16 +978,20 @@ static void complete_conflicting_writes(struct drbd_request *req) | |||
977 | sector_t sector = req->i.sector; | 978 | sector_t sector = req->i.sector; |
978 | int size = req->i.size; | 979 | int size = req->i.size; |
979 | 980 | ||
980 | i = drbd_find_overlap(&device->write_requests, sector, size); | ||
981 | if (!i) | ||
982 | return; | ||
983 | |||
984 | for (;;) { | 981 | for (;;) { |
985 | prepare_to_wait(&device->misc_wait, &wait, TASK_UNINTERRUPTIBLE); | 982 | drbd_for_each_overlap(i, &device->write_requests, sector, size) { |
986 | i = drbd_find_overlap(&device->write_requests, sector, size); | 983 | /* Ignore, if already completed to upper layers. */ |
987 | if (!i) | 984 | if (i->completed) |
985 | continue; | ||
986 | /* Handle the first found overlap. After the schedule | ||
987 | * we have to restart the tree walk. */ | ||
988 | break; | ||
989 | } | ||
990 | if (!i) /* if any */ | ||
988 | break; | 991 | break; |
992 | |||
989 | /* Indicate to wake up device->misc_wait on progress. */ | 993 | /* Indicate to wake up device->misc_wait on progress. */ |
994 | prepare_to_wait(&device->misc_wait, &wait, TASK_UNINTERRUPTIBLE); | ||
990 | i->waiting = true; | 995 | i->waiting = true; |
991 | spin_unlock_irq(&device->resource->req_lock); | 996 | spin_unlock_irq(&device->resource->req_lock); |
992 | schedule(); | 997 | schedule(); |
@@ -995,7 +1000,7 @@ static void complete_conflicting_writes(struct drbd_request *req) | |||
995 | finish_wait(&device->misc_wait, &wait); | 1000 | finish_wait(&device->misc_wait, &wait); |
996 | } | 1001 | } |
997 | 1002 | ||
998 | /* called within req_lock and rcu_read_lock() */ | 1003 | /* called within req_lock */ |
999 | static void maybe_pull_ahead(struct drbd_device *device) | 1004 | static void maybe_pull_ahead(struct drbd_device *device) |
1000 | { | 1005 | { |
1001 | struct drbd_connection *connection = first_peer_device(device)->connection; | 1006 | struct drbd_connection *connection = first_peer_device(device)->connection; |
@@ -1152,12 +1157,29 @@ static int drbd_process_write_request(struct drbd_request *req) | |||
1152 | return remote; | 1157 | return remote; |
1153 | } | 1158 | } |
1154 | 1159 | ||
1160 | static void drbd_process_discard_req(struct drbd_request *req) | ||
1161 | { | ||
1162 | int err = drbd_issue_discard_or_zero_out(req->device, | ||
1163 | req->i.sector, req->i.size >> 9, true); | ||
1164 | |||
1165 | if (err) | ||
1166 | req->private_bio->bi_error = -EIO; | ||
1167 | bio_endio(req->private_bio); | ||
1168 | } | ||
1169 | |||
1155 | static void | 1170 | static void |
1156 | drbd_submit_req_private_bio(struct drbd_request *req) | 1171 | drbd_submit_req_private_bio(struct drbd_request *req) |
1157 | { | 1172 | { |
1158 | struct drbd_device *device = req->device; | 1173 | struct drbd_device *device = req->device; |
1159 | struct bio *bio = req->private_bio; | 1174 | struct bio *bio = req->private_bio; |
1160 | const int rw = bio_rw(bio); | 1175 | unsigned int type; |
1176 | |||
1177 | if (bio_op(bio) != REQ_OP_READ) | ||
1178 | type = DRBD_FAULT_DT_WR; | ||
1179 | else if (bio->bi_rw & REQ_RAHEAD) | ||
1180 | type = DRBD_FAULT_DT_RA; | ||
1181 | else | ||
1182 | type = DRBD_FAULT_DT_RD; | ||
1161 | 1183 | ||
1162 | bio->bi_bdev = device->ldev->backing_bdev; | 1184 | bio->bi_bdev = device->ldev->backing_bdev; |
1163 | 1185 | ||
@@ -1167,11 +1189,10 @@ drbd_submit_req_private_bio(struct drbd_request *req) | |||
1167 | * stable storage, and this is a WRITE, we may not even submit | 1189 | * stable storage, and this is a WRITE, we may not even submit |
1168 | * this bio. */ | 1190 | * this bio. */ |
1169 | if (get_ldev(device)) { | 1191 | if (get_ldev(device)) { |
1170 | if (drbd_insert_fault(device, | 1192 | if (drbd_insert_fault(device, type)) |
1171 | rw == WRITE ? DRBD_FAULT_DT_WR | ||
1172 | : rw == READ ? DRBD_FAULT_DT_RD | ||
1173 | : DRBD_FAULT_DT_RA)) | ||
1174 | bio_io_error(bio); | 1193 | bio_io_error(bio); |
1194 | else if (bio_op(bio) == REQ_OP_DISCARD) | ||
1195 | drbd_process_discard_req(req); | ||
1175 | else | 1196 | else |
1176 | generic_make_request(bio); | 1197 | generic_make_request(bio); |
1177 | put_ldev(device); | 1198 | put_ldev(device); |
@@ -1223,24 +1244,45 @@ drbd_request_prepare(struct drbd_device *device, struct bio *bio, unsigned long | |||
1223 | /* Update disk stats */ | 1244 | /* Update disk stats */ |
1224 | _drbd_start_io_acct(device, req); | 1245 | _drbd_start_io_acct(device, req); |
1225 | 1246 | ||
1247 | /* process discards always from our submitter thread */ | ||
1248 | if (bio_op(bio) & REQ_OP_DISCARD) | ||
1249 | goto queue_for_submitter_thread; | ||
1250 | |||
1226 | if (rw == WRITE && req->private_bio && req->i.size | 1251 | if (rw == WRITE && req->private_bio && req->i.size |
1227 | && !test_bit(AL_SUSPENDED, &device->flags)) { | 1252 | && !test_bit(AL_SUSPENDED, &device->flags)) { |
1228 | if (!drbd_al_begin_io_fastpath(device, &req->i)) { | 1253 | if (!drbd_al_begin_io_fastpath(device, &req->i)) |
1229 | atomic_inc(&device->ap_actlog_cnt); | 1254 | goto queue_for_submitter_thread; |
1230 | drbd_queue_write(device, req); | ||
1231 | return NULL; | ||
1232 | } | ||
1233 | req->rq_state |= RQ_IN_ACT_LOG; | 1255 | req->rq_state |= RQ_IN_ACT_LOG; |
1234 | req->in_actlog_jif = jiffies; | 1256 | req->in_actlog_jif = jiffies; |
1235 | } | 1257 | } |
1236 | |||
1237 | return req; | 1258 | return req; |
1259 | |||
1260 | queue_for_submitter_thread: | ||
1261 | atomic_inc(&device->ap_actlog_cnt); | ||
1262 | drbd_queue_write(device, req); | ||
1263 | return NULL; | ||
1264 | } | ||
1265 | |||
1266 | /* Require at least one path to current data. | ||
1267 | * We don't want to allow writes on C_STANDALONE D_INCONSISTENT: | ||
1268 | * We would not allow to read what was written, | ||
1269 | * we would not have bumped the data generation uuids, | ||
1270 | * we would cause data divergence for all the wrong reasons. | ||
1271 | * | ||
1272 | * If we don't see at least one D_UP_TO_DATE, we will fail this request, | ||
1273 | * which either returns EIO, or, if OND_SUSPEND_IO is set, suspends IO, | ||
1274 | * and queues for retry later. | ||
1275 | */ | ||
1276 | static bool may_do_writes(struct drbd_device *device) | ||
1277 | { | ||
1278 | const union drbd_dev_state s = device->state; | ||
1279 | return s.disk == D_UP_TO_DATE || s.pdsk == D_UP_TO_DATE; | ||
1238 | } | 1280 | } |
1239 | 1281 | ||
1240 | static void drbd_send_and_submit(struct drbd_device *device, struct drbd_request *req) | 1282 | static void drbd_send_and_submit(struct drbd_device *device, struct drbd_request *req) |
1241 | { | 1283 | { |
1242 | struct drbd_resource *resource = device->resource; | 1284 | struct drbd_resource *resource = device->resource; |
1243 | const int rw = bio_rw(req->master_bio); | 1285 | const int rw = bio_data_dir(req->master_bio); |
1244 | struct bio_and_error m = { NULL, }; | 1286 | struct bio_and_error m = { NULL, }; |
1245 | bool no_remote = false; | 1287 | bool no_remote = false; |
1246 | bool submit_private_bio = false; | 1288 | bool submit_private_bio = false; |
@@ -1270,7 +1312,7 @@ static void drbd_send_and_submit(struct drbd_device *device, struct drbd_request | |||
1270 | goto out; | 1312 | goto out; |
1271 | } | 1313 | } |
1272 | 1314 | ||
1273 | /* We fail READ/READA early, if we can not serve it. | 1315 | /* We fail READ early, if we can not serve it. |
1274 | * We must do this before req is registered on any lists. | 1316 | * We must do this before req is registered on any lists. |
1275 | * Otherwise, drbd_req_complete() will queue failed READ for retry. */ | 1317 | * Otherwise, drbd_req_complete() will queue failed READ for retry. */ |
1276 | if (rw != WRITE) { | 1318 | if (rw != WRITE) { |
@@ -1291,6 +1333,12 @@ static void drbd_send_and_submit(struct drbd_device *device, struct drbd_request | |||
1291 | } | 1333 | } |
1292 | 1334 | ||
1293 | if (rw == WRITE) { | 1335 | if (rw == WRITE) { |
1336 | if (req->private_bio && !may_do_writes(device)) { | ||
1337 | bio_put(req->private_bio); | ||
1338 | req->private_bio = NULL; | ||
1339 | put_ldev(device); | ||
1340 | goto nodata; | ||
1341 | } | ||
1294 | if (!drbd_process_write_request(req)) | 1342 | if (!drbd_process_write_request(req)) |
1295 | no_remote = true; | 1343 | no_remote = true; |
1296 | } else { | 1344 | } else { |
diff --git a/drivers/block/drbd/drbd_req.h b/drivers/block/drbd/drbd_req.h index bb2ef78165e5..eb49e7f2da91 100644 --- a/drivers/block/drbd/drbd_req.h +++ b/drivers/block/drbd/drbd_req.h | |||
@@ -206,6 +206,8 @@ enum drbd_req_state_bits { | |||
206 | 206 | ||
207 | /* Set when this is a write, clear for a read */ | 207 | /* Set when this is a write, clear for a read */ |
208 | __RQ_WRITE, | 208 | __RQ_WRITE, |
209 | __RQ_WSAME, | ||
210 | __RQ_UNMAP, | ||
209 | 211 | ||
210 | /* Should call drbd_al_complete_io() for this request... */ | 212 | /* Should call drbd_al_complete_io() for this request... */ |
211 | __RQ_IN_ACT_LOG, | 213 | __RQ_IN_ACT_LOG, |
@@ -241,10 +243,11 @@ enum drbd_req_state_bits { | |||
241 | #define RQ_NET_OK (1UL << __RQ_NET_OK) | 243 | #define RQ_NET_OK (1UL << __RQ_NET_OK) |
242 | #define RQ_NET_SIS (1UL << __RQ_NET_SIS) | 244 | #define RQ_NET_SIS (1UL << __RQ_NET_SIS) |
243 | 245 | ||
244 | /* 0x1f8 */ | ||
245 | #define RQ_NET_MASK (((1UL << __RQ_NET_MAX)-1) & ~RQ_LOCAL_MASK) | 246 | #define RQ_NET_MASK (((1UL << __RQ_NET_MAX)-1) & ~RQ_LOCAL_MASK) |
246 | 247 | ||
247 | #define RQ_WRITE (1UL << __RQ_WRITE) | 248 | #define RQ_WRITE (1UL << __RQ_WRITE) |
249 | #define RQ_WSAME (1UL << __RQ_WSAME) | ||
250 | #define RQ_UNMAP (1UL << __RQ_UNMAP) | ||
248 | #define RQ_IN_ACT_LOG (1UL << __RQ_IN_ACT_LOG) | 251 | #define RQ_IN_ACT_LOG (1UL << __RQ_IN_ACT_LOG) |
249 | #define RQ_POSTPONED (1UL << __RQ_POSTPONED) | 252 | #define RQ_POSTPONED (1UL << __RQ_POSTPONED) |
250 | #define RQ_COMPLETION_SUSP (1UL << __RQ_COMPLETION_SUSP) | 253 | #define RQ_COMPLETION_SUSP (1UL << __RQ_COMPLETION_SUSP) |
diff --git a/drivers/block/drbd/drbd_state.c b/drivers/block/drbd/drbd_state.c index 5a7ef7873b67..eea0c4aec978 100644 --- a/drivers/block/drbd/drbd_state.c +++ b/drivers/block/drbd/drbd_state.c | |||
@@ -814,7 +814,7 @@ is_valid_state(struct drbd_device *device, union drbd_state ns) | |||
814 | } | 814 | } |
815 | 815 | ||
816 | if (rv <= 0) | 816 | if (rv <= 0) |
817 | /* already found a reason to abort */; | 817 | goto out; /* already found a reason to abort */ |
818 | else if (ns.role == R_SECONDARY && device->open_cnt) | 818 | else if (ns.role == R_SECONDARY && device->open_cnt) |
819 | rv = SS_DEVICE_IN_USE; | 819 | rv = SS_DEVICE_IN_USE; |
820 | 820 | ||
@@ -862,6 +862,7 @@ is_valid_state(struct drbd_device *device, union drbd_state ns) | |||
862 | else if (ns.conn >= C_CONNECTED && ns.pdsk == D_UNKNOWN) | 862 | else if (ns.conn >= C_CONNECTED && ns.pdsk == D_UNKNOWN) |
863 | rv = SS_CONNECTED_OUTDATES; | 863 | rv = SS_CONNECTED_OUTDATES; |
864 | 864 | ||
865 | out: | ||
865 | rcu_read_unlock(); | 866 | rcu_read_unlock(); |
866 | 867 | ||
867 | return rv; | 868 | return rv; |
@@ -906,6 +907,15 @@ is_valid_soft_transition(union drbd_state os, union drbd_state ns, struct drbd_c | |||
906 | (ns.conn >= C_CONNECTED && os.conn == C_WF_REPORT_PARAMS))) | 907 | (ns.conn >= C_CONNECTED && os.conn == C_WF_REPORT_PARAMS))) |
907 | rv = SS_IN_TRANSIENT_STATE; | 908 | rv = SS_IN_TRANSIENT_STATE; |
908 | 909 | ||
910 | /* Do not promote during resync handshake triggered by "force primary". | ||
911 | * This is a hack. It should really be rejected by the peer during the | ||
912 | * cluster wide state change request. */ | ||
913 | if (os.role != R_PRIMARY && ns.role == R_PRIMARY | ||
914 | && ns.pdsk == D_UP_TO_DATE | ||
915 | && ns.disk != D_UP_TO_DATE && ns.disk != D_DISKLESS | ||
916 | && (ns.conn <= C_WF_SYNC_UUID || ns.conn != os.conn)) | ||
917 | rv = SS_IN_TRANSIENT_STATE; | ||
918 | |||
909 | if ((ns.conn == C_VERIFY_S || ns.conn == C_VERIFY_T) && os.conn < C_CONNECTED) | 919 | if ((ns.conn == C_VERIFY_S || ns.conn == C_VERIFY_T) && os.conn < C_CONNECTED) |
910 | rv = SS_NEED_CONNECTION; | 920 | rv = SS_NEED_CONNECTION; |
911 | 921 | ||
@@ -1628,6 +1638,26 @@ static void broadcast_state_change(struct drbd_state_change *state_change) | |||
1628 | #undef REMEMBER_STATE_CHANGE | 1638 | #undef REMEMBER_STATE_CHANGE |
1629 | } | 1639 | } |
1630 | 1640 | ||
1641 | /* takes old and new peer disk state */ | ||
1642 | static bool lost_contact_to_peer_data(enum drbd_disk_state os, enum drbd_disk_state ns) | ||
1643 | { | ||
1644 | if ((os >= D_INCONSISTENT && os != D_UNKNOWN && os != D_OUTDATED) | ||
1645 | && (ns < D_INCONSISTENT || ns == D_UNKNOWN || ns == D_OUTDATED)) | ||
1646 | return true; | ||
1647 | |||
1648 | /* Scenario, starting with normal operation | ||
1649 | * Connected Primary/Secondary UpToDate/UpToDate | ||
1650 | * NetworkFailure Primary/Unknown UpToDate/DUnknown (frozen) | ||
1651 | * ... | ||
1652 | * Connected Primary/Secondary UpToDate/Diskless (resumed; needs to bump uuid!) | ||
1653 | */ | ||
1654 | if (os == D_UNKNOWN | ||
1655 | && (ns == D_DISKLESS || ns == D_FAILED || ns == D_OUTDATED)) | ||
1656 | return true; | ||
1657 | |||
1658 | return false; | ||
1659 | } | ||
1660 | |||
1631 | /** | 1661 | /** |
1632 | * after_state_ch() - Perform after state change actions that may sleep | 1662 | * after_state_ch() - Perform after state change actions that may sleep |
1633 | * @device: DRBD device. | 1663 | * @device: DRBD device. |
@@ -1675,7 +1705,7 @@ static void after_state_ch(struct drbd_device *device, union drbd_state os, | |||
1675 | what = RESEND; | 1705 | what = RESEND; |
1676 | 1706 | ||
1677 | if ((os.disk == D_ATTACHING || os.disk == D_NEGOTIATING) && | 1707 | if ((os.disk == D_ATTACHING || os.disk == D_NEGOTIATING) && |
1678 | conn_lowest_disk(connection) > D_NEGOTIATING) | 1708 | conn_lowest_disk(connection) == D_UP_TO_DATE) |
1679 | what = RESTART_FROZEN_DISK_IO; | 1709 | what = RESTART_FROZEN_DISK_IO; |
1680 | 1710 | ||
1681 | if (resource->susp_nod && what != NOTHING) { | 1711 | if (resource->susp_nod && what != NOTHING) { |
@@ -1699,6 +1729,13 @@ static void after_state_ch(struct drbd_device *device, union drbd_state os, | |||
1699 | idr_for_each_entry(&connection->peer_devices, peer_device, vnr) | 1729 | idr_for_each_entry(&connection->peer_devices, peer_device, vnr) |
1700 | clear_bit(NEW_CUR_UUID, &peer_device->device->flags); | 1730 | clear_bit(NEW_CUR_UUID, &peer_device->device->flags); |
1701 | rcu_read_unlock(); | 1731 | rcu_read_unlock(); |
1732 | |||
1733 | /* We should actively create a new uuid, _before_ | ||
1734 | * we resume/resent, if the peer is diskless | ||
1735 | * (recovery from a multiple error scenario). | ||
1736 | * Currently, this happens with a slight delay | ||
1737 | * below when checking lost_contact_to_peer_data() ... | ||
1738 | */ | ||
1702 | _tl_restart(connection, RESEND); | 1739 | _tl_restart(connection, RESEND); |
1703 | _conn_request_state(connection, | 1740 | _conn_request_state(connection, |
1704 | (union drbd_state) { { .susp_fen = 1 } }, | 1741 | (union drbd_state) { { .susp_fen = 1 } }, |
@@ -1742,12 +1779,7 @@ static void after_state_ch(struct drbd_device *device, union drbd_state os, | |||
1742 | BM_LOCKED_TEST_ALLOWED); | 1779 | BM_LOCKED_TEST_ALLOWED); |
1743 | 1780 | ||
1744 | /* Lost contact to peer's copy of the data */ | 1781 | /* Lost contact to peer's copy of the data */ |
1745 | if ((os.pdsk >= D_INCONSISTENT && | 1782 | if (lost_contact_to_peer_data(os.pdsk, ns.pdsk)) { |
1746 | os.pdsk != D_UNKNOWN && | ||
1747 | os.pdsk != D_OUTDATED) | ||
1748 | && (ns.pdsk < D_INCONSISTENT || | ||
1749 | ns.pdsk == D_UNKNOWN || | ||
1750 | ns.pdsk == D_OUTDATED)) { | ||
1751 | if (get_ldev(device)) { | 1783 | if (get_ldev(device)) { |
1752 | if ((ns.role == R_PRIMARY || ns.peer == R_PRIMARY) && | 1784 | if ((ns.role == R_PRIMARY || ns.peer == R_PRIMARY) && |
1753 | device->ldev->md.uuid[UI_BITMAP] == 0 && ns.disk >= D_UP_TO_DATE) { | 1785 | device->ldev->md.uuid[UI_BITMAP] == 0 && ns.disk >= D_UP_TO_DATE) { |
@@ -1934,12 +1966,17 @@ static void after_state_ch(struct drbd_device *device, union drbd_state os, | |||
1934 | 1966 | ||
1935 | /* This triggers bitmap writeout of potentially still unwritten pages | 1967 | /* This triggers bitmap writeout of potentially still unwritten pages |
1936 | * if the resync finished cleanly, or aborted because of peer disk | 1968 | * if the resync finished cleanly, or aborted because of peer disk |
1937 | * failure, or because of connection loss. | 1969 | * failure, or on transition from resync back to AHEAD/BEHIND. |
1970 | * | ||
1971 | * Connection loss is handled in drbd_disconnected() by the receiver. | ||
1972 | * | ||
1938 | * For resync aborted because of local disk failure, we cannot do | 1973 | * For resync aborted because of local disk failure, we cannot do |
1939 | * any bitmap writeout anymore. | 1974 | * any bitmap writeout anymore. |
1975 | * | ||
1940 | * No harm done if some bits change during this phase. | 1976 | * No harm done if some bits change during this phase. |
1941 | */ | 1977 | */ |
1942 | if (os.conn > C_CONNECTED && ns.conn <= C_CONNECTED && get_ldev(device)) { | 1978 | if ((os.conn > C_CONNECTED && os.conn < C_AHEAD) && |
1979 | (ns.conn == C_CONNECTED || ns.conn >= C_AHEAD) && get_ldev(device)) { | ||
1943 | drbd_queue_bitmap_io(device, &drbd_bm_write_copy_pages, NULL, | 1980 | drbd_queue_bitmap_io(device, &drbd_bm_write_copy_pages, NULL, |
1944 | "write from resync_finished", BM_LOCKED_CHANGE_ALLOWED); | 1981 | "write from resync_finished", BM_LOCKED_CHANGE_ALLOWED); |
1945 | put_ldev(device); | 1982 | put_ldev(device); |
@@ -2160,9 +2197,7 @@ conn_set_state(struct drbd_connection *connection, union drbd_state mask, union | |||
2160 | ns.disk = os.disk; | 2197 | ns.disk = os.disk; |
2161 | 2198 | ||
2162 | rv = _drbd_set_state(device, ns, flags, NULL); | 2199 | rv = _drbd_set_state(device, ns, flags, NULL); |
2163 | if (rv < SS_SUCCESS) | 2200 | BUG_ON(rv < SS_SUCCESS); |
2164 | BUG(); | ||
2165 | |||
2166 | ns.i = device->state.i; | 2201 | ns.i = device->state.i; |
2167 | ns_max.role = max_role(ns.role, ns_max.role); | 2202 | ns_max.role = max_role(ns.role, ns_max.role); |
2168 | ns_max.peer = max_role(ns.peer, ns_max.peer); | 2203 | ns_max.peer = max_role(ns.peer, ns_max.peer); |
diff --git a/drivers/block/drbd/drbd_state.h b/drivers/block/drbd/drbd_state.h index bd989536f888..6c9d5d4a8a75 100644 --- a/drivers/block/drbd/drbd_state.h +++ b/drivers/block/drbd/drbd_state.h | |||
@@ -140,7 +140,7 @@ extern void drbd_resume_al(struct drbd_device *device); | |||
140 | extern bool conn_all_vols_unconf(struct drbd_connection *connection); | 140 | extern bool conn_all_vols_unconf(struct drbd_connection *connection); |
141 | 141 | ||
142 | /** | 142 | /** |
143 | * drbd_request_state() - Reqest a state change | 143 | * drbd_request_state() - Request a state change |
144 | * @device: DRBD device. | 144 | * @device: DRBD device. |
145 | * @mask: mask of state bits to change. | 145 | * @mask: mask of state bits to change. |
146 | * @val: value of new state bits. | 146 | * @val: value of new state bits. |
diff --git a/drivers/block/drbd/drbd_strings.c b/drivers/block/drbd/drbd_strings.c index 80b0f63c7075..0eeab14776e9 100644 --- a/drivers/block/drbd/drbd_strings.c +++ b/drivers/block/drbd/drbd_strings.c | |||
@@ -26,7 +26,7 @@ | |||
26 | #include <linux/drbd.h> | 26 | #include <linux/drbd.h> |
27 | #include "drbd_strings.h" | 27 | #include "drbd_strings.h" |
28 | 28 | ||
29 | static const char *drbd_conn_s_names[] = { | 29 | static const char * const drbd_conn_s_names[] = { |
30 | [C_STANDALONE] = "StandAlone", | 30 | [C_STANDALONE] = "StandAlone", |
31 | [C_DISCONNECTING] = "Disconnecting", | 31 | [C_DISCONNECTING] = "Disconnecting", |
32 | [C_UNCONNECTED] = "Unconnected", | 32 | [C_UNCONNECTED] = "Unconnected", |
@@ -53,13 +53,13 @@ static const char *drbd_conn_s_names[] = { | |||
53 | [C_BEHIND] = "Behind", | 53 | [C_BEHIND] = "Behind", |
54 | }; | 54 | }; |
55 | 55 | ||
56 | static const char *drbd_role_s_names[] = { | 56 | static const char * const drbd_role_s_names[] = { |
57 | [R_PRIMARY] = "Primary", | 57 | [R_PRIMARY] = "Primary", |
58 | [R_SECONDARY] = "Secondary", | 58 | [R_SECONDARY] = "Secondary", |
59 | [R_UNKNOWN] = "Unknown" | 59 | [R_UNKNOWN] = "Unknown" |
60 | }; | 60 | }; |
61 | 61 | ||
62 | static const char *drbd_disk_s_names[] = { | 62 | static const char * const drbd_disk_s_names[] = { |
63 | [D_DISKLESS] = "Diskless", | 63 | [D_DISKLESS] = "Diskless", |
64 | [D_ATTACHING] = "Attaching", | 64 | [D_ATTACHING] = "Attaching", |
65 | [D_FAILED] = "Failed", | 65 | [D_FAILED] = "Failed", |
@@ -71,7 +71,7 @@ static const char *drbd_disk_s_names[] = { | |||
71 | [D_UP_TO_DATE] = "UpToDate", | 71 | [D_UP_TO_DATE] = "UpToDate", |
72 | }; | 72 | }; |
73 | 73 | ||
74 | static const char *drbd_state_sw_errors[] = { | 74 | static const char * const drbd_state_sw_errors[] = { |
75 | [-SS_TWO_PRIMARIES] = "Multiple primaries not allowed by config", | 75 | [-SS_TWO_PRIMARIES] = "Multiple primaries not allowed by config", |
76 | [-SS_NO_UP_TO_DATE_DISK] = "Need access to UpToDate data", | 76 | [-SS_NO_UP_TO_DATE_DISK] = "Need access to UpToDate data", |
77 | [-SS_NO_LOCAL_DISK] = "Can not resync without local disk", | 77 | [-SS_NO_LOCAL_DISK] = "Can not resync without local disk", |
diff --git a/drivers/block/drbd/drbd_worker.c b/drivers/block/drbd/drbd_worker.c index 51fab978eb61..35dbb3dca47e 100644 --- a/drivers/block/drbd/drbd_worker.c +++ b/drivers/block/drbd/drbd_worker.c | |||
@@ -173,8 +173,8 @@ void drbd_peer_request_endio(struct bio *bio) | |||
173 | { | 173 | { |
174 | struct drbd_peer_request *peer_req = bio->bi_private; | 174 | struct drbd_peer_request *peer_req = bio->bi_private; |
175 | struct drbd_device *device = peer_req->peer_device->device; | 175 | struct drbd_device *device = peer_req->peer_device->device; |
176 | int is_write = bio_data_dir(bio) == WRITE; | 176 | bool is_write = bio_data_dir(bio) == WRITE; |
177 | int is_discard = !!(bio_op(bio) == REQ_OP_DISCARD); | 177 | bool is_discard = !!(bio_op(bio) == REQ_OP_DISCARD); |
178 | 178 | ||
179 | if (bio->bi_error && __ratelimit(&drbd_ratelimit_state)) | 179 | if (bio->bi_error && __ratelimit(&drbd_ratelimit_state)) |
180 | drbd_warn(device, "%s: error=%d s=%llus\n", | 180 | drbd_warn(device, "%s: error=%d s=%llus\n", |
@@ -248,18 +248,26 @@ void drbd_request_endio(struct bio *bio) | |||
248 | 248 | ||
249 | /* to avoid recursion in __req_mod */ | 249 | /* to avoid recursion in __req_mod */ |
250 | if (unlikely(bio->bi_error)) { | 250 | if (unlikely(bio->bi_error)) { |
251 | if (bio_op(bio) == REQ_OP_DISCARD) | 251 | switch (bio_op(bio)) { |
252 | what = (bio->bi_error == -EOPNOTSUPP) | 252 | case REQ_OP_DISCARD: |
253 | ? DISCARD_COMPLETED_NOTSUPP | 253 | if (bio->bi_error == -EOPNOTSUPP) |
254 | : DISCARD_COMPLETED_WITH_ERROR; | 254 | what = DISCARD_COMPLETED_NOTSUPP; |
255 | else | 255 | else |
256 | what = (bio_data_dir(bio) == WRITE) | 256 | what = DISCARD_COMPLETED_WITH_ERROR; |
257 | ? WRITE_COMPLETED_WITH_ERROR | 257 | break; |
258 | : (bio_rw(bio) == READ) | 258 | case REQ_OP_READ: |
259 | ? READ_COMPLETED_WITH_ERROR | 259 | if (bio->bi_rw & REQ_RAHEAD) |
260 | : READ_AHEAD_COMPLETED_WITH_ERROR; | 260 | what = READ_AHEAD_COMPLETED_WITH_ERROR; |
261 | } else | 261 | else |
262 | what = READ_COMPLETED_WITH_ERROR; | ||
263 | break; | ||
264 | default: | ||
265 | what = WRITE_COMPLETED_WITH_ERROR; | ||
266 | break; | ||
267 | } | ||
268 | } else { | ||
262 | what = COMPLETED_OK; | 269 | what = COMPLETED_OK; |
270 | } | ||
263 | 271 | ||
264 | bio_put(req->private_bio); | 272 | bio_put(req->private_bio); |
265 | req->private_bio = ERR_PTR(bio->bi_error); | 273 | req->private_bio = ERR_PTR(bio->bi_error); |
@@ -320,6 +328,10 @@ void drbd_csum_bio(struct crypto_ahash *tfm, struct bio *bio, void *digest) | |||
320 | sg_set_page(&sg, bvec.bv_page, bvec.bv_len, bvec.bv_offset); | 328 | sg_set_page(&sg, bvec.bv_page, bvec.bv_len, bvec.bv_offset); |
321 | ahash_request_set_crypt(req, &sg, NULL, sg.length); | 329 | ahash_request_set_crypt(req, &sg, NULL, sg.length); |
322 | crypto_ahash_update(req); | 330 | crypto_ahash_update(req); |
331 | /* REQ_OP_WRITE_SAME has only one segment, | ||
332 | * checksum the payload only once. */ | ||
333 | if (bio_op(bio) == REQ_OP_WRITE_SAME) | ||
334 | break; | ||
323 | } | 335 | } |
324 | ahash_request_set_crypt(req, NULL, digest, 0); | 336 | ahash_request_set_crypt(req, NULL, digest, 0); |
325 | crypto_ahash_final(req); | 337 | crypto_ahash_final(req); |
@@ -387,7 +399,7 @@ static int read_for_csum(struct drbd_peer_device *peer_device, sector_t sector, | |||
387 | /* GFP_TRY, because if there is no memory available right now, this may | 399 | /* GFP_TRY, because if there is no memory available right now, this may |
388 | * be rescheduled for later. It is "only" background resync, after all. */ | 400 | * be rescheduled for later. It is "only" background resync, after all. */ |
389 | peer_req = drbd_alloc_peer_req(peer_device, ID_SYNCER /* unused */, sector, | 401 | peer_req = drbd_alloc_peer_req(peer_device, ID_SYNCER /* unused */, sector, |
390 | size, true /* has real payload */, GFP_TRY); | 402 | size, size, GFP_TRY); |
391 | if (!peer_req) | 403 | if (!peer_req) |
392 | goto defer; | 404 | goto defer; |
393 | 405 | ||
@@ -583,6 +595,7 @@ static int make_resync_request(struct drbd_device *const device, int cancel) | |||
583 | int number, rollback_i, size; | 595 | int number, rollback_i, size; |
584 | int align, requeue = 0; | 596 | int align, requeue = 0; |
585 | int i = 0; | 597 | int i = 0; |
598 | int discard_granularity = 0; | ||
586 | 599 | ||
587 | if (unlikely(cancel)) | 600 | if (unlikely(cancel)) |
588 | return 0; | 601 | return 0; |
@@ -602,6 +615,12 @@ static int make_resync_request(struct drbd_device *const device, int cancel) | |||
602 | return 0; | 615 | return 0; |
603 | } | 616 | } |
604 | 617 | ||
618 | if (connection->agreed_features & DRBD_FF_THIN_RESYNC) { | ||
619 | rcu_read_lock(); | ||
620 | discard_granularity = rcu_dereference(device->ldev->disk_conf)->rs_discard_granularity; | ||
621 | rcu_read_unlock(); | ||
622 | } | ||
623 | |||
605 | max_bio_size = queue_max_hw_sectors(device->rq_queue) << 9; | 624 | max_bio_size = queue_max_hw_sectors(device->rq_queue) << 9; |
606 | number = drbd_rs_number_requests(device); | 625 | number = drbd_rs_number_requests(device); |
607 | if (number <= 0) | 626 | if (number <= 0) |
@@ -666,6 +685,9 @@ next_sector: | |||
666 | if (sector & ((1<<(align+3))-1)) | 685 | if (sector & ((1<<(align+3))-1)) |
667 | break; | 686 | break; |
668 | 687 | ||
688 | if (discard_granularity && size == discard_granularity) | ||
689 | break; | ||
690 | |||
669 | /* do not cross extent boundaries */ | 691 | /* do not cross extent boundaries */ |
670 | if (((bit+1) & BM_BLOCKS_PER_BM_EXT_MASK) == 0) | 692 | if (((bit+1) & BM_BLOCKS_PER_BM_EXT_MASK) == 0) |
671 | break; | 693 | break; |
@@ -712,7 +734,8 @@ next_sector: | |||
712 | int err; | 734 | int err; |
713 | 735 | ||
714 | inc_rs_pending(device); | 736 | inc_rs_pending(device); |
715 | err = drbd_send_drequest(peer_device, P_RS_DATA_REQUEST, | 737 | err = drbd_send_drequest(peer_device, |
738 | size == discard_granularity ? P_RS_THIN_REQ : P_RS_DATA_REQUEST, | ||
716 | sector, size, ID_SYNCER); | 739 | sector, size, ID_SYNCER); |
717 | if (err) { | 740 | if (err) { |
718 | drbd_err(device, "drbd_send_drequest() failed, aborting...\n"); | 741 | drbd_err(device, "drbd_send_drequest() failed, aborting...\n"); |
@@ -829,6 +852,7 @@ static void ping_peer(struct drbd_device *device) | |||
829 | 852 | ||
830 | int drbd_resync_finished(struct drbd_device *device) | 853 | int drbd_resync_finished(struct drbd_device *device) |
831 | { | 854 | { |
855 | struct drbd_connection *connection = first_peer_device(device)->connection; | ||
832 | unsigned long db, dt, dbdt; | 856 | unsigned long db, dt, dbdt; |
833 | unsigned long n_oos; | 857 | unsigned long n_oos; |
834 | union drbd_state os, ns; | 858 | union drbd_state os, ns; |
@@ -850,8 +874,7 @@ int drbd_resync_finished(struct drbd_device *device) | |||
850 | if (dw) { | 874 | if (dw) { |
851 | dw->w.cb = w_resync_finished; | 875 | dw->w.cb = w_resync_finished; |
852 | dw->device = device; | 876 | dw->device = device; |
853 | drbd_queue_work(&first_peer_device(device)->connection->sender_work, | 877 | drbd_queue_work(&connection->sender_work, &dw->w); |
854 | &dw->w); | ||
855 | return 1; | 878 | return 1; |
856 | } | 879 | } |
857 | drbd_err(device, "Warn failed to drbd_rs_del_all() and to kmalloc(dw).\n"); | 880 | drbd_err(device, "Warn failed to drbd_rs_del_all() and to kmalloc(dw).\n"); |
@@ -964,6 +987,30 @@ int drbd_resync_finished(struct drbd_device *device) | |||
964 | _drbd_set_state(device, ns, CS_VERBOSE, NULL); | 987 | _drbd_set_state(device, ns, CS_VERBOSE, NULL); |
965 | out_unlock: | 988 | out_unlock: |
966 | spin_unlock_irq(&device->resource->req_lock); | 989 | spin_unlock_irq(&device->resource->req_lock); |
990 | |||
991 | /* If we have been sync source, and have an effective fencing-policy, | ||
992 | * once *all* volumes are back in sync, call "unfence". */ | ||
993 | if (os.conn == C_SYNC_SOURCE) { | ||
994 | enum drbd_disk_state disk_state = D_MASK; | ||
995 | enum drbd_disk_state pdsk_state = D_MASK; | ||
996 | enum drbd_fencing_p fp = FP_DONT_CARE; | ||
997 | |||
998 | rcu_read_lock(); | ||
999 | fp = rcu_dereference(device->ldev->disk_conf)->fencing; | ||
1000 | if (fp != FP_DONT_CARE) { | ||
1001 | struct drbd_peer_device *peer_device; | ||
1002 | int vnr; | ||
1003 | idr_for_each_entry(&connection->peer_devices, peer_device, vnr) { | ||
1004 | struct drbd_device *device = peer_device->device; | ||
1005 | disk_state = min_t(enum drbd_disk_state, disk_state, device->state.disk); | ||
1006 | pdsk_state = min_t(enum drbd_disk_state, pdsk_state, device->state.pdsk); | ||
1007 | } | ||
1008 | } | ||
1009 | rcu_read_unlock(); | ||
1010 | if (disk_state == D_UP_TO_DATE && pdsk_state == D_UP_TO_DATE) | ||
1011 | conn_khelper(connection, "unfence-peer"); | ||
1012 | } | ||
1013 | |||
967 | put_ldev(device); | 1014 | put_ldev(device); |
968 | out: | 1015 | out: |
969 | device->rs_total = 0; | 1016 | device->rs_total = 0; |
@@ -1000,7 +1047,6 @@ static void move_to_net_ee_or_free(struct drbd_device *device, struct drbd_peer_ | |||
1000 | 1047 | ||
1001 | /** | 1048 | /** |
1002 | * w_e_end_data_req() - Worker callback, to send a P_DATA_REPLY packet in response to a P_DATA_REQUEST | 1049 | * w_e_end_data_req() - Worker callback, to send a P_DATA_REPLY packet in response to a P_DATA_REQUEST |
1003 | * @device: DRBD device. | ||
1004 | * @w: work object. | 1050 | * @w: work object. |
1005 | * @cancel: The connection will be closed anyways | 1051 | * @cancel: The connection will be closed anyways |
1006 | */ | 1052 | */ |
@@ -1036,6 +1082,30 @@ int w_e_end_data_req(struct drbd_work *w, int cancel) | |||
1036 | return err; | 1082 | return err; |
1037 | } | 1083 | } |
1038 | 1084 | ||
1085 | static bool all_zero(struct drbd_peer_request *peer_req) | ||
1086 | { | ||
1087 | struct page *page = peer_req->pages; | ||
1088 | unsigned int len = peer_req->i.size; | ||
1089 | |||
1090 | page_chain_for_each(page) { | ||
1091 | unsigned int l = min_t(unsigned int, len, PAGE_SIZE); | ||
1092 | unsigned int i, words = l / sizeof(long); | ||
1093 | unsigned long *d; | ||
1094 | |||
1095 | d = kmap_atomic(page); | ||
1096 | for (i = 0; i < words; i++) { | ||
1097 | if (d[i]) { | ||
1098 | kunmap_atomic(d); | ||
1099 | return false; | ||
1100 | } | ||
1101 | } | ||
1102 | kunmap_atomic(d); | ||
1103 | len -= l; | ||
1104 | } | ||
1105 | |||
1106 | return true; | ||
1107 | } | ||
1108 | |||
1039 | /** | 1109 | /** |
1040 | * w_e_end_rsdata_req() - Worker callback to send a P_RS_DATA_REPLY packet in response to a P_RS_DATA_REQUEST | 1110 | * w_e_end_rsdata_req() - Worker callback to send a P_RS_DATA_REPLY packet in response to a P_RS_DATA_REQUEST |
1041 | * @w: work object. | 1111 | * @w: work object. |
@@ -1064,7 +1134,10 @@ int w_e_end_rsdata_req(struct drbd_work *w, int cancel) | |||
1064 | } else if (likely((peer_req->flags & EE_WAS_ERROR) == 0)) { | 1134 | } else if (likely((peer_req->flags & EE_WAS_ERROR) == 0)) { |
1065 | if (likely(device->state.pdsk >= D_INCONSISTENT)) { | 1135 | if (likely(device->state.pdsk >= D_INCONSISTENT)) { |
1066 | inc_rs_pending(device); | 1136 | inc_rs_pending(device); |
1067 | err = drbd_send_block(peer_device, P_RS_DATA_REPLY, peer_req); | 1137 | if (peer_req->flags & EE_RS_THIN_REQ && all_zero(peer_req)) |
1138 | err = drbd_send_rs_deallocated(peer_device, peer_req); | ||
1139 | else | ||
1140 | err = drbd_send_block(peer_device, P_RS_DATA_REPLY, peer_req); | ||
1068 | } else { | 1141 | } else { |
1069 | if (__ratelimit(&drbd_ratelimit_state)) | 1142 | if (__ratelimit(&drbd_ratelimit_state)) |
1070 | drbd_err(device, "Not sending RSDataReply, " | 1143 | drbd_err(device, "Not sending RSDataReply, " |
@@ -1634,7 +1707,7 @@ static bool use_checksum_based_resync(struct drbd_connection *connection, struct | |||
1634 | rcu_read_unlock(); | 1707 | rcu_read_unlock(); |
1635 | return connection->agreed_pro_version >= 89 && /* supported? */ | 1708 | return connection->agreed_pro_version >= 89 && /* supported? */ |
1636 | connection->csums_tfm && /* configured? */ | 1709 | connection->csums_tfm && /* configured? */ |
1637 | (csums_after_crash_only == 0 /* use for each resync? */ | 1710 | (csums_after_crash_only == false /* use for each resync? */ |
1638 | || test_bit(CRASHED_PRIMARY, &device->flags)); /* or only after Primary crash? */ | 1711 | || test_bit(CRASHED_PRIMARY, &device->flags)); /* or only after Primary crash? */ |
1639 | } | 1712 | } |
1640 | 1713 | ||
@@ -1769,7 +1842,7 @@ void drbd_start_resync(struct drbd_device *device, enum drbd_conns side) | |||
1769 | device->bm_resync_fo = 0; | 1842 | device->bm_resync_fo = 0; |
1770 | device->use_csums = use_checksum_based_resync(connection, device); | 1843 | device->use_csums = use_checksum_based_resync(connection, device); |
1771 | } else { | 1844 | } else { |
1772 | device->use_csums = 0; | 1845 | device->use_csums = false; |
1773 | } | 1846 | } |
1774 | 1847 | ||
1775 | /* Since protocol 96, we must serialize drbd_gen_and_send_sync_uuid | 1848 | /* Since protocol 96, we must serialize drbd_gen_and_send_sync_uuid |
diff --git a/drivers/block/floppy.c b/drivers/block/floppy.c index f9bfecd733a8..c557057fe8ae 100644 --- a/drivers/block/floppy.c +++ b/drivers/block/floppy.c | |||
@@ -4350,8 +4350,7 @@ static int __init do_floppy_init(void) | |||
4350 | /* to be cleaned up... */ | 4350 | /* to be cleaned up... */ |
4351 | disks[drive]->private_data = (void *)(long)drive; | 4351 | disks[drive]->private_data = (void *)(long)drive; |
4352 | disks[drive]->flags |= GENHD_FL_REMOVABLE; | 4352 | disks[drive]->flags |= GENHD_FL_REMOVABLE; |
4353 | disks[drive]->driverfs_dev = &floppy_device[drive].dev; | 4353 | device_add_disk(&floppy_device[drive].dev, disks[drive]); |
4354 | add_disk(disks[drive]); | ||
4355 | } | 4354 | } |
4356 | 4355 | ||
4357 | return 0; | 4356 | return 0; |
diff --git a/drivers/block/loop.c b/drivers/block/loop.c index 364d491d4bdd..075377eee0c0 100644 --- a/drivers/block/loop.c +++ b/drivers/block/loop.c | |||
@@ -1765,6 +1765,7 @@ static int loop_add(struct loop_device **l, int i) | |||
1765 | */ | 1765 | */ |
1766 | queue_flag_set_unlocked(QUEUE_FLAG_NOMERGES, lo->lo_queue); | 1766 | queue_flag_set_unlocked(QUEUE_FLAG_NOMERGES, lo->lo_queue); |
1767 | 1767 | ||
1768 | err = -ENOMEM; | ||
1768 | disk = lo->lo_disk = alloc_disk(1 << part_shift); | 1769 | disk = lo->lo_disk = alloc_disk(1 << part_shift); |
1769 | if (!disk) | 1770 | if (!disk) |
1770 | goto out_free_queue; | 1771 | goto out_free_queue; |
diff --git a/drivers/block/mg_disk.c b/drivers/block/mg_disk.c index 145ce2aa2e78..e937fcf71769 100644 --- a/drivers/block/mg_disk.c +++ b/drivers/block/mg_disk.c | |||
@@ -687,15 +687,13 @@ static unsigned int mg_issue_req(struct request *req, | |||
687 | unsigned int sect_num, | 687 | unsigned int sect_num, |
688 | unsigned int sect_cnt) | 688 | unsigned int sect_cnt) |
689 | { | 689 | { |
690 | switch (rq_data_dir(req)) { | 690 | if (rq_data_dir(req) == READ) { |
691 | case READ: | ||
692 | if (mg_out(host, sect_num, sect_cnt, MG_CMD_RD, &mg_read_intr) | 691 | if (mg_out(host, sect_num, sect_cnt, MG_CMD_RD, &mg_read_intr) |
693 | != MG_ERR_NONE) { | 692 | != MG_ERR_NONE) { |
694 | mg_bad_rw_intr(host); | 693 | mg_bad_rw_intr(host); |
695 | return host->error; | 694 | return host->error; |
696 | } | 695 | } |
697 | break; | 696 | } else { |
698 | case WRITE: | ||
699 | /* TODO : handler */ | 697 | /* TODO : handler */ |
700 | outb(ATA_NIEN, (unsigned long)host->dev_base + MG_REG_DRV_CTRL); | 698 | outb(ATA_NIEN, (unsigned long)host->dev_base + MG_REG_DRV_CTRL); |
701 | if (mg_out(host, sect_num, sect_cnt, MG_CMD_WR, &mg_write_intr) | 699 | if (mg_out(host, sect_num, sect_cnt, MG_CMD_WR, &mg_write_intr) |
@@ -714,7 +712,6 @@ static unsigned int mg_issue_req(struct request *req, | |||
714 | mod_timer(&host->timer, jiffies + 3 * HZ); | 712 | mod_timer(&host->timer, jiffies + 3 * HZ); |
715 | outb(MG_CMD_WR_CONF, (unsigned long)host->dev_base + | 713 | outb(MG_CMD_WR_CONF, (unsigned long)host->dev_base + |
716 | MG_REG_COMMAND); | 714 | MG_REG_COMMAND); |
717 | break; | ||
718 | } | 715 | } |
719 | return MG_ERR_NONE; | 716 | return MG_ERR_NONE; |
720 | } | 717 | } |
@@ -1018,7 +1015,7 @@ probe_err_7: | |||
1018 | probe_err_6: | 1015 | probe_err_6: |
1019 | blk_cleanup_queue(host->breq); | 1016 | blk_cleanup_queue(host->breq); |
1020 | probe_err_5: | 1017 | probe_err_5: |
1021 | unregister_blkdev(MG_DISK_MAJ, MG_DISK_NAME); | 1018 | unregister_blkdev(host->major, MG_DISK_NAME); |
1022 | probe_err_4: | 1019 | probe_err_4: |
1023 | if (!prv_data->use_polling) | 1020 | if (!prv_data->use_polling) |
1024 | free_irq(host->irq, host); | 1021 | free_irq(host->irq, host); |
diff --git a/drivers/block/mtip32xx/mtip32xx.c b/drivers/block/mtip32xx/mtip32xx.c index 8e3e708cb9ee..2aca98e8e427 100644 --- a/drivers/block/mtip32xx/mtip32xx.c +++ b/drivers/block/mtip32xx/mtip32xx.c | |||
@@ -3956,7 +3956,6 @@ static int mtip_block_initialize(struct driver_data *dd) | |||
3956 | if (rv) | 3956 | if (rv) |
3957 | goto disk_index_error; | 3957 | goto disk_index_error; |
3958 | 3958 | ||
3959 | dd->disk->driverfs_dev = &dd->pdev->dev; | ||
3960 | dd->disk->major = dd->major; | 3959 | dd->disk->major = dd->major; |
3961 | dd->disk->first_minor = index * MTIP_MAX_MINORS; | 3960 | dd->disk->first_minor = index * MTIP_MAX_MINORS; |
3962 | dd->disk->minors = MTIP_MAX_MINORS; | 3961 | dd->disk->minors = MTIP_MAX_MINORS; |
@@ -4008,7 +4007,7 @@ skip_create_disk: | |||
4008 | 4007 | ||
4009 | /* | 4008 | /* |
4010 | * if rebuild pending, start the service thread, and delay the block | 4009 | * if rebuild pending, start the service thread, and delay the block |
4011 | * queue creation and add_disk() | 4010 | * queue creation and device_add_disk() |
4012 | */ | 4011 | */ |
4013 | if (wait_for_rebuild == MTIP_FTL_REBUILD_MAGIC) | 4012 | if (wait_for_rebuild == MTIP_FTL_REBUILD_MAGIC) |
4014 | goto start_service_thread; | 4013 | goto start_service_thread; |
@@ -4042,7 +4041,7 @@ skip_create_disk: | |||
4042 | set_capacity(dd->disk, capacity); | 4041 | set_capacity(dd->disk, capacity); |
4043 | 4042 | ||
4044 | /* Enable the block device and add it to /dev */ | 4043 | /* Enable the block device and add it to /dev */ |
4045 | add_disk(dd->disk); | 4044 | device_add_disk(&dd->pdev->dev, dd->disk); |
4046 | 4045 | ||
4047 | dd->bdev = bdget_disk(dd->disk, 0); | 4046 | dd->bdev = bdget_disk(dd->disk, 0); |
4048 | /* | 4047 | /* |
diff --git a/drivers/block/null_blk.c b/drivers/block/null_blk.c index cab97593ba54..75a7f88d6717 100644 --- a/drivers/block/null_blk.c +++ b/drivers/block/null_blk.c | |||
@@ -448,7 +448,7 @@ static int null_lnvm_submit_io(struct nvm_dev *dev, struct nvm_rq *rqd) | |||
448 | struct request *rq; | 448 | struct request *rq; |
449 | struct bio *bio = rqd->bio; | 449 | struct bio *bio = rqd->bio; |
450 | 450 | ||
451 | rq = blk_mq_alloc_request(q, bio_rw(bio), 0); | 451 | rq = blk_mq_alloc_request(q, bio_data_dir(bio), 0); |
452 | if (IS_ERR(rq)) | 452 | if (IS_ERR(rq)) |
453 | return -ENOMEM; | 453 | return -ENOMEM; |
454 | 454 | ||
diff --git a/drivers/block/ps3disk.c b/drivers/block/ps3disk.c index acb44529c05e..76f33c84ce3d 100644 --- a/drivers/block/ps3disk.c +++ b/drivers/block/ps3disk.c | |||
@@ -487,7 +487,6 @@ static int ps3disk_probe(struct ps3_system_bus_device *_dev) | |||
487 | gendisk->fops = &ps3disk_fops; | 487 | gendisk->fops = &ps3disk_fops; |
488 | gendisk->queue = queue; | 488 | gendisk->queue = queue; |
489 | gendisk->private_data = dev; | 489 | gendisk->private_data = dev; |
490 | gendisk->driverfs_dev = &dev->sbd.core; | ||
491 | snprintf(gendisk->disk_name, sizeof(gendisk->disk_name), PS3DISK_NAME, | 490 | snprintf(gendisk->disk_name, sizeof(gendisk->disk_name), PS3DISK_NAME, |
492 | devidx+'a'); | 491 | devidx+'a'); |
493 | priv->blocking_factor = dev->blk_size >> 9; | 492 | priv->blocking_factor = dev->blk_size >> 9; |
@@ -499,7 +498,7 @@ static int ps3disk_probe(struct ps3_system_bus_device *_dev) | |||
499 | gendisk->disk_name, priv->model, priv->raw_capacity >> 11, | 498 | gendisk->disk_name, priv->model, priv->raw_capacity >> 11, |
500 | get_capacity(gendisk) >> 11); | 499 | get_capacity(gendisk) >> 11); |
501 | 500 | ||
502 | add_disk(gendisk); | 501 | device_add_disk(&dev->sbd.core, gendisk); |
503 | return 0; | 502 | return 0; |
504 | 503 | ||
505 | fail_cleanup_queue: | 504 | fail_cleanup_queue: |
diff --git a/drivers/block/ps3vram.c b/drivers/block/ps3vram.c index 56847fcda086..456b4fe21559 100644 --- a/drivers/block/ps3vram.c +++ b/drivers/block/ps3vram.c | |||
@@ -773,14 +773,13 @@ static int ps3vram_probe(struct ps3_system_bus_device *dev) | |||
773 | gendisk->fops = &ps3vram_fops; | 773 | gendisk->fops = &ps3vram_fops; |
774 | gendisk->queue = queue; | 774 | gendisk->queue = queue; |
775 | gendisk->private_data = dev; | 775 | gendisk->private_data = dev; |
776 | gendisk->driverfs_dev = &dev->core; | ||
777 | strlcpy(gendisk->disk_name, DEVICE_NAME, sizeof(gendisk->disk_name)); | 776 | strlcpy(gendisk->disk_name, DEVICE_NAME, sizeof(gendisk->disk_name)); |
778 | set_capacity(gendisk, priv->size >> 9); | 777 | set_capacity(gendisk, priv->size >> 9); |
779 | 778 | ||
780 | dev_info(&dev->core, "%s: Using %lu MiB of GPU memory\n", | 779 | dev_info(&dev->core, "%s: Using %lu MiB of GPU memory\n", |
781 | gendisk->disk_name, get_capacity(gendisk) >> 11); | 780 | gendisk->disk_name, get_capacity(gendisk) >> 11); |
782 | 781 | ||
783 | add_disk(gendisk); | 782 | device_add_disk(&dev->core, gendisk); |
784 | return 0; | 783 | return 0; |
785 | 784 | ||
786 | fail_cleanup_queue: | 785 | fail_cleanup_queue: |
diff --git a/drivers/block/rsxx/dev.c b/drivers/block/rsxx/dev.c index e1b8b7061d2f..f81d70b39d10 100644 --- a/drivers/block/rsxx/dev.c +++ b/drivers/block/rsxx/dev.c | |||
@@ -230,8 +230,7 @@ int rsxx_attach_dev(struct rsxx_cardinfo *card) | |||
230 | set_capacity(card->gendisk, card->size8 >> 9); | 230 | set_capacity(card->gendisk, card->size8 >> 9); |
231 | else | 231 | else |
232 | set_capacity(card->gendisk, 0); | 232 | set_capacity(card->gendisk, 0); |
233 | add_disk(card->gendisk); | 233 | device_add_disk(CARD_TO_DEV(card), card->gendisk); |
234 | |||
235 | card->bdev_attached = 1; | 234 | card->bdev_attached = 1; |
236 | } | 235 | } |
237 | 236 | ||
@@ -308,7 +307,6 @@ int rsxx_setup_dev(struct rsxx_cardinfo *card) | |||
308 | 307 | ||
309 | snprintf(card->gendisk->disk_name, sizeof(card->gendisk->disk_name), | 308 | snprintf(card->gendisk->disk_name, sizeof(card->gendisk->disk_name), |
310 | "rsxx%d", card->disk_id); | 309 | "rsxx%d", card->disk_id); |
311 | card->gendisk->driverfs_dev = &card->dev->dev; | ||
312 | card->gendisk->major = card->major; | 310 | card->gendisk->major = card->major; |
313 | card->gendisk->first_minor = 0; | 311 | card->gendisk->first_minor = 0; |
314 | card->gendisk->fops = &rsxx_fops; | 312 | card->gendisk->fops = &rsxx_fops; |
diff --git a/drivers/block/skd_main.c b/drivers/block/skd_main.c index 5c07a23e2ada..3822eae102db 100644 --- a/drivers/block/skd_main.c +++ b/drivers/block/skd_main.c | |||
@@ -4690,10 +4690,10 @@ static int skd_bdev_getgeo(struct block_device *bdev, struct hd_geometry *geo) | |||
4690 | return -EIO; | 4690 | return -EIO; |
4691 | } | 4691 | } |
4692 | 4692 | ||
4693 | static int skd_bdev_attach(struct skd_device *skdev) | 4693 | static int skd_bdev_attach(struct device *parent, struct skd_device *skdev) |
4694 | { | 4694 | { |
4695 | pr_debug("%s:%s:%d add_disk\n", skdev->name, __func__, __LINE__); | 4695 | pr_debug("%s:%s:%d add_disk\n", skdev->name, __func__, __LINE__); |
4696 | add_disk(skdev->disk); | 4696 | device_add_disk(parent, skdev->disk); |
4697 | return 0; | 4697 | return 0; |
4698 | } | 4698 | } |
4699 | 4699 | ||
@@ -4812,8 +4812,6 @@ static int skd_pci_probe(struct pci_dev *pdev, const struct pci_device_id *ent) | |||
4812 | 4812 | ||
4813 | pci_set_drvdata(pdev, skdev); | 4813 | pci_set_drvdata(pdev, skdev); |
4814 | 4814 | ||
4815 | skdev->disk->driverfs_dev = &pdev->dev; | ||
4816 | |||
4817 | for (i = 0; i < SKD_MAX_BARS; i++) { | 4815 | for (i = 0; i < SKD_MAX_BARS; i++) { |
4818 | skdev->mem_phys[i] = pci_resource_start(pdev, i); | 4816 | skdev->mem_phys[i] = pci_resource_start(pdev, i); |
4819 | skdev->mem_size[i] = (u32)pci_resource_len(pdev, i); | 4817 | skdev->mem_size[i] = (u32)pci_resource_len(pdev, i); |
@@ -4851,7 +4849,7 @@ static int skd_pci_probe(struct pci_dev *pdev, const struct pci_device_id *ent) | |||
4851 | (SKD_START_WAIT_SECONDS * HZ)); | 4849 | (SKD_START_WAIT_SECONDS * HZ)); |
4852 | if (skdev->gendisk_on > 0) { | 4850 | if (skdev->gendisk_on > 0) { |
4853 | /* device came on-line after reset */ | 4851 | /* device came on-line after reset */ |
4854 | skd_bdev_attach(skdev); | 4852 | skd_bdev_attach(&pdev->dev, skdev); |
4855 | rc = 0; | 4853 | rc = 0; |
4856 | } else { | 4854 | } else { |
4857 | /* we timed out, something is wrong with the device, | 4855 | /* we timed out, something is wrong with the device, |
diff --git a/drivers/block/sunvdc.c b/drivers/block/sunvdc.c index 4b911ed96ea3..cab157331c4e 100644 --- a/drivers/block/sunvdc.c +++ b/drivers/block/sunvdc.c | |||
@@ -804,7 +804,6 @@ static int probe_disk(struct vdc_port *port) | |||
804 | g->fops = &vdc_fops; | 804 | g->fops = &vdc_fops; |
805 | g->queue = q; | 805 | g->queue = q; |
806 | g->private_data = port; | 806 | g->private_data = port; |
807 | g->driverfs_dev = &port->vio.vdev->dev; | ||
808 | 807 | ||
809 | set_capacity(g, port->vdisk_size); | 808 | set_capacity(g, port->vdisk_size); |
810 | 809 | ||
@@ -835,7 +834,7 @@ static int probe_disk(struct vdc_port *port) | |||
835 | port->vdisk_size, (port->vdisk_size >> (20 - 9)), | 834 | port->vdisk_size, (port->vdisk_size >> (20 - 9)), |
836 | port->vio.ver.major, port->vio.ver.minor); | 835 | port->vio.ver.major, port->vio.ver.minor); |
837 | 836 | ||
838 | add_disk(g); | 837 | device_add_disk(&port->vio.vdev->dev, g); |
839 | 838 | ||
840 | return 0; | 839 | return 0; |
841 | } | 840 | } |
diff --git a/drivers/block/umem.c b/drivers/block/umem.c index 4b3ba74e9d22..d0a3e6d4515f 100644 --- a/drivers/block/umem.c +++ b/drivers/block/umem.c | |||
@@ -344,7 +344,6 @@ static int add_bio(struct cardinfo *card) | |||
344 | int offset; | 344 | int offset; |
345 | struct bio *bio; | 345 | struct bio *bio; |
346 | struct bio_vec vec; | 346 | struct bio_vec vec; |
347 | int rw; | ||
348 | 347 | ||
349 | bio = card->currentbio; | 348 | bio = card->currentbio; |
350 | if (!bio && card->bio) { | 349 | if (!bio && card->bio) { |
@@ -359,7 +358,6 @@ static int add_bio(struct cardinfo *card) | |||
359 | if (!bio) | 358 | if (!bio) |
360 | return 0; | 359 | return 0; |
361 | 360 | ||
362 | rw = bio_rw(bio); | ||
363 | if (card->mm_pages[card->Ready].cnt >= DESC_PER_PAGE) | 361 | if (card->mm_pages[card->Ready].cnt >= DESC_PER_PAGE) |
364 | return 0; | 362 | return 0; |
365 | 363 | ||
@@ -369,7 +367,7 @@ static int add_bio(struct cardinfo *card) | |||
369 | vec.bv_page, | 367 | vec.bv_page, |
370 | vec.bv_offset, | 368 | vec.bv_offset, |
371 | vec.bv_len, | 369 | vec.bv_len, |
372 | (rw == READ) ? | 370 | bio_op(bio) == REQ_OP_READ ? |
373 | PCI_DMA_FROMDEVICE : PCI_DMA_TODEVICE); | 371 | PCI_DMA_FROMDEVICE : PCI_DMA_TODEVICE); |
374 | 372 | ||
375 | p = &card->mm_pages[card->Ready]; | 373 | p = &card->mm_pages[card->Ready]; |
@@ -398,7 +396,7 @@ static int add_bio(struct cardinfo *card) | |||
398 | DMASCR_CHAIN_EN | | 396 | DMASCR_CHAIN_EN | |
399 | DMASCR_SEM_EN | | 397 | DMASCR_SEM_EN | |
400 | pci_cmds); | 398 | pci_cmds); |
401 | if (rw == WRITE) | 399 | if (bio_op(bio) == REQ_OP_WRITE) |
402 | desc->control_bits |= cpu_to_le32(DMASCR_TRANSFER_READ); | 400 | desc->control_bits |= cpu_to_le32(DMASCR_TRANSFER_READ); |
403 | desc->sem_control_bits = desc->control_bits; | 401 | desc->sem_control_bits = desc->control_bits; |
404 | 402 | ||
diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c index 18e4069dd24b..1523e05c46fc 100644 --- a/drivers/block/virtio_blk.c +++ b/drivers/block/virtio_blk.c | |||
@@ -236,25 +236,22 @@ static int virtio_queue_rq(struct blk_mq_hw_ctx *hctx, | |||
236 | static int virtblk_get_id(struct gendisk *disk, char *id_str) | 236 | static int virtblk_get_id(struct gendisk *disk, char *id_str) |
237 | { | 237 | { |
238 | struct virtio_blk *vblk = disk->private_data; | 238 | struct virtio_blk *vblk = disk->private_data; |
239 | struct request_queue *q = vblk->disk->queue; | ||
239 | struct request *req; | 240 | struct request *req; |
240 | struct bio *bio; | ||
241 | int err; | 241 | int err; |
242 | 242 | ||
243 | bio = bio_map_kern(vblk->disk->queue, id_str, VIRTIO_BLK_ID_BYTES, | 243 | req = blk_get_request(q, READ, GFP_KERNEL); |
244 | GFP_KERNEL); | 244 | if (IS_ERR(req)) |
245 | if (IS_ERR(bio)) | ||
246 | return PTR_ERR(bio); | ||
247 | |||
248 | req = blk_make_request(vblk->disk->queue, bio, GFP_KERNEL); | ||
249 | if (IS_ERR(req)) { | ||
250 | bio_put(bio); | ||
251 | return PTR_ERR(req); | 245 | return PTR_ERR(req); |
252 | } | ||
253 | |||
254 | req->cmd_type = REQ_TYPE_DRV_PRIV; | 246 | req->cmd_type = REQ_TYPE_DRV_PRIV; |
247 | |||
248 | err = blk_rq_map_kern(q, req, id_str, VIRTIO_BLK_ID_BYTES, GFP_KERNEL); | ||
249 | if (err) | ||
250 | goto out; | ||
251 | |||
255 | err = blk_execute_rq(vblk->disk->queue, vblk->disk, req, false); | 252 | err = blk_execute_rq(vblk->disk->queue, vblk->disk, req, false); |
253 | out: | ||
256 | blk_put_request(req); | 254 | blk_put_request(req); |
257 | |||
258 | return err; | 255 | return err; |
259 | } | 256 | } |
260 | 257 | ||
@@ -656,7 +653,6 @@ static int virtblk_probe(struct virtio_device *vdev) | |||
656 | vblk->disk->first_minor = index_to_minor(index); | 653 | vblk->disk->first_minor = index_to_minor(index); |
657 | vblk->disk->private_data = vblk; | 654 | vblk->disk->private_data = vblk; |
658 | vblk->disk->fops = &virtblk_fops; | 655 | vblk->disk->fops = &virtblk_fops; |
659 | vblk->disk->driverfs_dev = &vdev->dev; | ||
660 | vblk->disk->flags |= GENHD_FL_EXT_DEVT; | 656 | vblk->disk->flags |= GENHD_FL_EXT_DEVT; |
661 | vblk->index = index; | 657 | vblk->index = index; |
662 | 658 | ||
@@ -733,7 +729,7 @@ static int virtblk_probe(struct virtio_device *vdev) | |||
733 | 729 | ||
734 | virtio_device_ready(vdev); | 730 | virtio_device_ready(vdev); |
735 | 731 | ||
736 | add_disk(vblk->disk); | 732 | device_add_disk(&vdev->dev, vblk->disk); |
737 | err = device_create_file(disk_to_dev(vblk->disk), &dev_attr_serial); | 733 | err = device_create_file(disk_to_dev(vblk->disk), &dev_attr_serial); |
738 | if (err) | 734 | if (err) |
739 | goto out_del_disk; | 735 | goto out_del_disk; |
diff --git a/drivers/block/xen-blkback/xenbus.c b/drivers/block/xen-blkback/xenbus.c index 3355f1cdd4e5..2994cfa44c8a 100644 --- a/drivers/block/xen-blkback/xenbus.c +++ b/drivers/block/xen-blkback/xenbus.c | |||
@@ -480,7 +480,7 @@ static int xen_vbd_create(struct xen_blkif *blkif, blkif_vdev_t handle, | |||
480 | if (q && test_bit(QUEUE_FLAG_WC, &q->queue_flags)) | 480 | if (q && test_bit(QUEUE_FLAG_WC, &q->queue_flags)) |
481 | vbd->flush_support = true; | 481 | vbd->flush_support = true; |
482 | 482 | ||
483 | if (q && blk_queue_secdiscard(q)) | 483 | if (q && blk_queue_secure_erase(q)) |
484 | vbd->discard_secure = true; | 484 | vbd->discard_secure = true; |
485 | 485 | ||
486 | pr_debug("Successful creation of handle=%04x (dom=%u)\n", | 486 | pr_debug("Successful creation of handle=%04x (dom=%u)\n", |
diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c index da05d3f9bad2..0b6682a33e3b 100644 --- a/drivers/block/xen-blkfront.c +++ b/drivers/block/xen-blkfront.c | |||
@@ -548,7 +548,7 @@ static int blkif_queue_discard_req(struct request *req, struct blkfront_ring_inf | |||
548 | ring_req->u.discard.nr_sectors = blk_rq_sectors(req); | 548 | ring_req->u.discard.nr_sectors = blk_rq_sectors(req); |
549 | ring_req->u.discard.id = id; | 549 | ring_req->u.discard.id = id; |
550 | ring_req->u.discard.sector_number = (blkif_sector_t)blk_rq_pos(req); | 550 | ring_req->u.discard.sector_number = (blkif_sector_t)blk_rq_pos(req); |
551 | if ((req->cmd_flags & REQ_SECURE) && info->feature_secdiscard) | 551 | if (req_op(req) == REQ_OP_SECURE_ERASE && info->feature_secdiscard) |
552 | ring_req->u.discard.flag = BLKIF_DISCARD_SECURE; | 552 | ring_req->u.discard.flag = BLKIF_DISCARD_SECURE; |
553 | else | 553 | else |
554 | ring_req->u.discard.flag = 0; | 554 | ring_req->u.discard.flag = 0; |
@@ -844,7 +844,7 @@ static int blkif_queue_request(struct request *req, struct blkfront_ring_info *r | |||
844 | return 1; | 844 | return 1; |
845 | 845 | ||
846 | if (unlikely(req_op(req) == REQ_OP_DISCARD || | 846 | if (unlikely(req_op(req) == REQ_OP_DISCARD || |
847 | req->cmd_flags & REQ_SECURE)) | 847 | req_op(req) == REQ_OP_SECURE_ERASE)) |
848 | return blkif_queue_discard_req(req, rinfo); | 848 | return blkif_queue_discard_req(req, rinfo); |
849 | else | 849 | else |
850 | return blkif_queue_rw_req(req, rinfo); | 850 | return blkif_queue_rw_req(req, rinfo); |
@@ -952,7 +952,7 @@ static int xlvbd_init_blk_queue(struct gendisk *gd, u16 sector_size, | |||
952 | rq->limits.discard_granularity = info->discard_granularity; | 952 | rq->limits.discard_granularity = info->discard_granularity; |
953 | rq->limits.discard_alignment = info->discard_alignment; | 953 | rq->limits.discard_alignment = info->discard_alignment; |
954 | if (info->feature_secdiscard) | 954 | if (info->feature_secdiscard) |
955 | queue_flag_set_unlocked(QUEUE_FLAG_SECDISCARD, rq); | 955 | queue_flag_set_unlocked(QUEUE_FLAG_SECERASE, rq); |
956 | } | 956 | } |
957 | 957 | ||
958 | /* Hard sector size and max sectors impersonate the equiv. hardware. */ | 958 | /* Hard sector size and max sectors impersonate the equiv. hardware. */ |
@@ -1134,7 +1134,6 @@ static int xlvbd_alloc_gendisk(blkif_sector_t capacity, | |||
1134 | gd->first_minor = minor; | 1134 | gd->first_minor = minor; |
1135 | gd->fops = &xlvbd_block_fops; | 1135 | gd->fops = &xlvbd_block_fops; |
1136 | gd->private_data = info; | 1136 | gd->private_data = info; |
1137 | gd->driverfs_dev = &(info->xbdev->dev); | ||
1138 | set_capacity(gd, capacity); | 1137 | set_capacity(gd, capacity); |
1139 | 1138 | ||
1140 | if (xlvbd_init_blk_queue(gd, sector_size, physical_sector_size, | 1139 | if (xlvbd_init_blk_queue(gd, sector_size, physical_sector_size, |
@@ -1592,7 +1591,7 @@ static irqreturn_t blkif_interrupt(int irq, void *dev_id) | |||
1592 | info->feature_discard = 0; | 1591 | info->feature_discard = 0; |
1593 | info->feature_secdiscard = 0; | 1592 | info->feature_secdiscard = 0; |
1594 | queue_flag_clear(QUEUE_FLAG_DISCARD, rq); | 1593 | queue_flag_clear(QUEUE_FLAG_DISCARD, rq); |
1595 | queue_flag_clear(QUEUE_FLAG_SECDISCARD, rq); | 1594 | queue_flag_clear(QUEUE_FLAG_SECERASE, rq); |
1596 | } | 1595 | } |
1597 | blk_mq_complete_request(req, error); | 1596 | blk_mq_complete_request(req, error); |
1598 | break; | 1597 | break; |
@@ -2106,11 +2105,14 @@ static int blkfront_resume(struct xenbus_device *dev) | |||
2106 | */ | 2105 | */ |
2107 | if (req_op(shadow[i].request) == REQ_OP_FLUSH || | 2106 | if (req_op(shadow[i].request) == REQ_OP_FLUSH || |
2108 | req_op(shadow[i].request) == REQ_OP_DISCARD || | 2107 | req_op(shadow[i].request) == REQ_OP_DISCARD || |
2109 | shadow[j].request->cmd_flags & (REQ_FUA | REQ_SECURE)) { | 2108 | req_op(shadow[i].request) == REQ_OP_SECURE_ERASE || |
2110 | 2109 | shadow[j].request->cmd_flags & REQ_FUA) { | |
2111 | /* | 2110 | /* |
2112 | * Flush operations don't contain bios, so | 2111 | * Flush operations don't contain bios, so |
2113 | * we need to requeue the whole request | 2112 | * we need to requeue the whole request |
2113 | * | ||
2114 | * XXX: but this doesn't make any sense for a | ||
2115 | * write with the FUA flag set.. | ||
2114 | */ | 2116 | */ |
2115 | list_add(&shadow[j].request->queuelist, &info->requests); | 2117 | list_add(&shadow[j].request->queuelist, &info->requests); |
2116 | continue; | 2118 | continue; |
@@ -2445,7 +2447,7 @@ static void blkfront_connect(struct blkfront_info *info) | |||
2445 | for (i = 0; i < info->nr_rings; i++) | 2447 | for (i = 0; i < info->nr_rings; i++) |
2446 | kick_pending_request_queues(&info->rinfo[i]); | 2448 | kick_pending_request_queues(&info->rinfo[i]); |
2447 | 2449 | ||
2448 | add_disk(info->gd); | 2450 | device_add_disk(&info->xbdev->dev, info->gd); |
2449 | 2451 | ||
2450 | info->is_ready = 1; | 2452 | info->is_ready = 1; |
2451 | } | 2453 | } |