diff options
Diffstat (limited to 'drivers/block')
-rw-r--r-- | drivers/block/drbd/Makefile | 1 | ||||
-rw-r--r-- | drivers/block/drbd/drbd_actlog.c | 518 | ||||
-rw-r--r-- | drivers/block/drbd/drbd_bitmap.c | 150 | ||||
-rw-r--r-- | drivers/block/drbd/drbd_debugfs.c | 958 | ||||
-rw-r--r-- | drivers/block/drbd/drbd_debugfs.h | 39 | ||||
-rw-r--r-- | drivers/block/drbd/drbd_int.h | 383 | ||||
-rw-r--r-- | drivers/block/drbd/drbd_interval.h | 4 | ||||
-rw-r--r-- | drivers/block/drbd/drbd_main.c | 302 | ||||
-rw-r--r-- | drivers/block/drbd/drbd_nl.c | 110 | ||||
-rw-r--r-- | drivers/block/drbd/drbd_proc.c | 125 | ||||
-rw-r--r-- | drivers/block/drbd/drbd_receiver.c | 316 | ||||
-rw-r--r-- | drivers/block/drbd/drbd_req.c | 527 | ||||
-rw-r--r-- | drivers/block/drbd/drbd_req.h | 1 | ||||
-rw-r--r-- | drivers/block/drbd/drbd_state.c | 90 | ||||
-rw-r--r-- | drivers/block/drbd/drbd_worker.c | 348 | ||||
-rw-r--r-- | drivers/block/virtio_blk.c | 104 |
16 files changed, 2758 insertions, 1218 deletions
diff --git a/drivers/block/drbd/Makefile b/drivers/block/drbd/Makefile index 8b450338075e..4464e353c1e8 100644 --- a/drivers/block/drbd/Makefile +++ b/drivers/block/drbd/Makefile | |||
@@ -3,5 +3,6 @@ drbd-y += drbd_worker.o drbd_receiver.o drbd_req.o drbd_actlog.o | |||
3 | drbd-y += drbd_main.o drbd_strings.o drbd_nl.o | 3 | drbd-y += drbd_main.o drbd_strings.o drbd_nl.o |
4 | drbd-y += drbd_interval.o drbd_state.o | 4 | drbd-y += drbd_interval.o drbd_state.o |
5 | drbd-y += drbd_nla.o | 5 | drbd-y += drbd_nla.o |
6 | drbd-$(CONFIG_DEBUG_FS) += drbd_debugfs.o | ||
6 | 7 | ||
7 | obj-$(CONFIG_BLK_DEV_DRBD) += drbd.o | 8 | obj-$(CONFIG_BLK_DEV_DRBD) += drbd.o |
diff --git a/drivers/block/drbd/drbd_actlog.c b/drivers/block/drbd/drbd_actlog.c index 05a1780ffa85..d26a3fa63688 100644 --- a/drivers/block/drbd/drbd_actlog.c +++ b/drivers/block/drbd/drbd_actlog.c | |||
@@ -92,34 +92,26 @@ struct __packed al_transaction_on_disk { | |||
92 | __be32 context[AL_CONTEXT_PER_TRANSACTION]; | 92 | __be32 context[AL_CONTEXT_PER_TRANSACTION]; |
93 | }; | 93 | }; |
94 | 94 | ||
95 | struct update_odbm_work { | 95 | void *drbd_md_get_buffer(struct drbd_device *device, const char *intent) |
96 | struct drbd_work w; | ||
97 | struct drbd_device *device; | ||
98 | unsigned int enr; | ||
99 | }; | ||
100 | |||
101 | struct update_al_work { | ||
102 | struct drbd_work w; | ||
103 | struct drbd_device *device; | ||
104 | struct completion event; | ||
105 | int err; | ||
106 | }; | ||
107 | |||
108 | |||
109 | void *drbd_md_get_buffer(struct drbd_device *device) | ||
110 | { | 96 | { |
111 | int r; | 97 | int r; |
112 | 98 | ||
113 | wait_event(device->misc_wait, | 99 | wait_event(device->misc_wait, |
114 | (r = atomic_cmpxchg(&device->md_io_in_use, 0, 1)) == 0 || | 100 | (r = atomic_cmpxchg(&device->md_io.in_use, 0, 1)) == 0 || |
115 | device->state.disk <= D_FAILED); | 101 | device->state.disk <= D_FAILED); |
116 | 102 | ||
117 | return r ? NULL : page_address(device->md_io_page); | 103 | if (r) |
104 | return NULL; | ||
105 | |||
106 | device->md_io.current_use = intent; | ||
107 | device->md_io.start_jif = jiffies; | ||
108 | device->md_io.submit_jif = device->md_io.start_jif - 1; | ||
109 | return page_address(device->md_io.page); | ||
118 | } | 110 | } |
119 | 111 | ||
120 | void drbd_md_put_buffer(struct drbd_device *device) | 112 | void drbd_md_put_buffer(struct drbd_device *device) |
121 | { | 113 | { |
122 | if (atomic_dec_and_test(&device->md_io_in_use)) | 114 | if (atomic_dec_and_test(&device->md_io.in_use)) |
123 | wake_up(&device->misc_wait); | 115 | wake_up(&device->misc_wait); |
124 | } | 116 | } |
125 | 117 | ||
@@ -145,10 +137,11 @@ void wait_until_done_or_force_detached(struct drbd_device *device, struct drbd_b | |||
145 | 137 | ||
146 | static int _drbd_md_sync_page_io(struct drbd_device *device, | 138 | static int _drbd_md_sync_page_io(struct drbd_device *device, |
147 | struct drbd_backing_dev *bdev, | 139 | struct drbd_backing_dev *bdev, |
148 | struct page *page, sector_t sector, | 140 | sector_t sector, int rw) |
149 | int rw, int size) | ||
150 | { | 141 | { |
151 | struct bio *bio; | 142 | struct bio *bio; |
143 | /* we do all our meta data IO in aligned 4k blocks. */ | ||
144 | const int size = 4096; | ||
152 | int err; | 145 | int err; |
153 | 146 | ||
154 | device->md_io.done = 0; | 147 | device->md_io.done = 0; |
@@ -156,15 +149,15 @@ static int _drbd_md_sync_page_io(struct drbd_device *device, | |||
156 | 149 | ||
157 | if ((rw & WRITE) && !test_bit(MD_NO_FUA, &device->flags)) | 150 | if ((rw & WRITE) && !test_bit(MD_NO_FUA, &device->flags)) |
158 | rw |= REQ_FUA | REQ_FLUSH; | 151 | rw |= REQ_FUA | REQ_FLUSH; |
159 | rw |= REQ_SYNC; | 152 | rw |= REQ_SYNC | REQ_NOIDLE; |
160 | 153 | ||
161 | bio = bio_alloc_drbd(GFP_NOIO); | 154 | bio = bio_alloc_drbd(GFP_NOIO); |
162 | bio->bi_bdev = bdev->md_bdev; | 155 | bio->bi_bdev = bdev->md_bdev; |
163 | bio->bi_iter.bi_sector = sector; | 156 | bio->bi_iter.bi_sector = sector; |
164 | err = -EIO; | 157 | err = -EIO; |
165 | if (bio_add_page(bio, page, size, 0) != size) | 158 | if (bio_add_page(bio, device->md_io.page, size, 0) != size) |
166 | goto out; | 159 | goto out; |
167 | bio->bi_private = &device->md_io; | 160 | bio->bi_private = device; |
168 | bio->bi_end_io = drbd_md_io_complete; | 161 | bio->bi_end_io = drbd_md_io_complete; |
169 | bio->bi_rw = rw; | 162 | bio->bi_rw = rw; |
170 | 163 | ||
@@ -179,7 +172,8 @@ static int _drbd_md_sync_page_io(struct drbd_device *device, | |||
179 | } | 172 | } |
180 | 173 | ||
181 | bio_get(bio); /* one bio_put() is in the completion handler */ | 174 | bio_get(bio); /* one bio_put() is in the completion handler */ |
182 | atomic_inc(&device->md_io_in_use); /* drbd_md_put_buffer() is in the completion handler */ | 175 | atomic_inc(&device->md_io.in_use); /* drbd_md_put_buffer() is in the completion handler */ |
176 | device->md_io.submit_jif = jiffies; | ||
183 | if (drbd_insert_fault(device, (rw & WRITE) ? DRBD_FAULT_MD_WR : DRBD_FAULT_MD_RD)) | 177 | if (drbd_insert_fault(device, (rw & WRITE) ? DRBD_FAULT_MD_WR : DRBD_FAULT_MD_RD)) |
184 | bio_endio(bio, -EIO); | 178 | bio_endio(bio, -EIO); |
185 | else | 179 | else |
@@ -197,9 +191,7 @@ int drbd_md_sync_page_io(struct drbd_device *device, struct drbd_backing_dev *bd | |||
197 | sector_t sector, int rw) | 191 | sector_t sector, int rw) |
198 | { | 192 | { |
199 | int err; | 193 | int err; |
200 | struct page *iop = device->md_io_page; | 194 | D_ASSERT(device, atomic_read(&device->md_io.in_use) == 1); |
201 | |||
202 | D_ASSERT(device, atomic_read(&device->md_io_in_use) == 1); | ||
203 | 195 | ||
204 | BUG_ON(!bdev->md_bdev); | 196 | BUG_ON(!bdev->md_bdev); |
205 | 197 | ||
@@ -214,8 +206,7 @@ int drbd_md_sync_page_io(struct drbd_device *device, struct drbd_backing_dev *bd | |||
214 | current->comm, current->pid, __func__, | 206 | current->comm, current->pid, __func__, |
215 | (unsigned long long)sector, (rw & WRITE) ? "WRITE" : "READ"); | 207 | (unsigned long long)sector, (rw & WRITE) ? "WRITE" : "READ"); |
216 | 208 | ||
217 | /* we do all our meta data IO in aligned 4k blocks. */ | 209 | err = _drbd_md_sync_page_io(device, bdev, sector, rw); |
218 | err = _drbd_md_sync_page_io(device, bdev, iop, sector, rw, 4096); | ||
219 | if (err) { | 210 | if (err) { |
220 | drbd_err(device, "drbd_md_sync_page_io(,%llus,%s) failed with error %d\n", | 211 | drbd_err(device, "drbd_md_sync_page_io(,%llus,%s) failed with error %d\n", |
221 | (unsigned long long)sector, (rw & WRITE) ? "WRITE" : "READ", err); | 212 | (unsigned long long)sector, (rw & WRITE) ? "WRITE" : "READ", err); |
@@ -297,26 +288,12 @@ bool drbd_al_begin_io_prepare(struct drbd_device *device, struct drbd_interval * | |||
297 | return need_transaction; | 288 | return need_transaction; |
298 | } | 289 | } |
299 | 290 | ||
300 | static int al_write_transaction(struct drbd_device *device, bool delegate); | 291 | static int al_write_transaction(struct drbd_device *device); |
301 | |||
302 | /* When called through generic_make_request(), we must delegate | ||
303 | * activity log I/O to the worker thread: a further request | ||
304 | * submitted via generic_make_request() within the same task | ||
305 | * would be queued on current->bio_list, and would only start | ||
306 | * after this function returns (see generic_make_request()). | ||
307 | * | ||
308 | * However, if we *are* the worker, we must not delegate to ourselves. | ||
309 | */ | ||
310 | 292 | ||
311 | /* | 293 | void drbd_al_begin_io_commit(struct drbd_device *device) |
312 | * @delegate: delegate activity log I/O to the worker thread | ||
313 | */ | ||
314 | void drbd_al_begin_io_commit(struct drbd_device *device, bool delegate) | ||
315 | { | 294 | { |
316 | bool locked = false; | 295 | bool locked = false; |
317 | 296 | ||
318 | BUG_ON(delegate && current == first_peer_device(device)->connection->worker.task); | ||
319 | |||
320 | /* Serialize multiple transactions. | 297 | /* Serialize multiple transactions. |
321 | * This uses test_and_set_bit, memory barrier is implicit. | 298 | * This uses test_and_set_bit, memory barrier is implicit. |
322 | */ | 299 | */ |
@@ -335,7 +312,7 @@ void drbd_al_begin_io_commit(struct drbd_device *device, bool delegate) | |||
335 | rcu_read_unlock(); | 312 | rcu_read_unlock(); |
336 | 313 | ||
337 | if (write_al_updates) | 314 | if (write_al_updates) |
338 | al_write_transaction(device, delegate); | 315 | al_write_transaction(device); |
339 | spin_lock_irq(&device->al_lock); | 316 | spin_lock_irq(&device->al_lock); |
340 | /* FIXME | 317 | /* FIXME |
341 | if (err) | 318 | if (err) |
@@ -352,12 +329,10 @@ void drbd_al_begin_io_commit(struct drbd_device *device, bool delegate) | |||
352 | /* | 329 | /* |
353 | * @delegate: delegate activity log I/O to the worker thread | 330 | * @delegate: delegate activity log I/O to the worker thread |
354 | */ | 331 | */ |
355 | void drbd_al_begin_io(struct drbd_device *device, struct drbd_interval *i, bool delegate) | 332 | void drbd_al_begin_io(struct drbd_device *device, struct drbd_interval *i) |
356 | { | 333 | { |
357 | BUG_ON(delegate && current == first_peer_device(device)->connection->worker.task); | ||
358 | |||
359 | if (drbd_al_begin_io_prepare(device, i)) | 334 | if (drbd_al_begin_io_prepare(device, i)) |
360 | drbd_al_begin_io_commit(device, delegate); | 335 | drbd_al_begin_io_commit(device); |
361 | } | 336 | } |
362 | 337 | ||
363 | int drbd_al_begin_io_nonblock(struct drbd_device *device, struct drbd_interval *i) | 338 | int drbd_al_begin_io_nonblock(struct drbd_device *device, struct drbd_interval *i) |
@@ -380,8 +355,19 @@ int drbd_al_begin_io_nonblock(struct drbd_device *device, struct drbd_interval * | |||
380 | /* We want all necessary updates for a given request within the same transaction | 355 | /* We want all necessary updates for a given request within the same transaction |
381 | * We could first check how many updates are *actually* needed, | 356 | * We could first check how many updates are *actually* needed, |
382 | * and use that instead of the worst-case nr_al_extents */ | 357 | * and use that instead of the worst-case nr_al_extents */ |
383 | if (available_update_slots < nr_al_extents) | 358 | if (available_update_slots < nr_al_extents) { |
384 | return -EWOULDBLOCK; | 359 | /* Too many activity log extents are currently "hot". |
360 | * | ||
361 | * If we have accumulated pending changes already, | ||
362 | * we made progress. | ||
363 | * | ||
364 | * If we cannot get even a single pending change through, | ||
365 | * stop the fast path until we made some progress, | ||
366 | * or requests to "cold" extents could be starved. */ | ||
367 | if (!al->pending_changes) | ||
368 | __set_bit(__LC_STARVING, &device->act_log->flags); | ||
369 | return -ENOBUFS; | ||
370 | } | ||
385 | 371 | ||
386 | /* Is resync active in this area? */ | 372 | /* Is resync active in this area? */ |
387 | for (enr = first; enr <= last; enr++) { | 373 | for (enr = first; enr <= last; enr++) { |
@@ -452,15 +438,6 @@ static unsigned int al_extent_to_bm_page(unsigned int al_enr) | |||
452 | (AL_EXTENT_SHIFT - BM_BLOCK_SHIFT)); | 438 | (AL_EXTENT_SHIFT - BM_BLOCK_SHIFT)); |
453 | } | 439 | } |
454 | 440 | ||
455 | static unsigned int rs_extent_to_bm_page(unsigned int rs_enr) | ||
456 | { | ||
457 | return rs_enr >> | ||
458 | /* bit to page */ | ||
459 | ((PAGE_SHIFT + 3) - | ||
460 | /* resync extent number to bit */ | ||
461 | (BM_EXT_SHIFT - BM_BLOCK_SHIFT)); | ||
462 | } | ||
463 | |||
464 | static sector_t al_tr_number_to_on_disk_sector(struct drbd_device *device) | 441 | static sector_t al_tr_number_to_on_disk_sector(struct drbd_device *device) |
465 | { | 442 | { |
466 | const unsigned int stripes = device->ldev->md.al_stripes; | 443 | const unsigned int stripes = device->ldev->md.al_stripes; |
@@ -479,8 +456,7 @@ static sector_t al_tr_number_to_on_disk_sector(struct drbd_device *device) | |||
479 | return device->ldev->md.md_offset + device->ldev->md.al_offset + t; | 456 | return device->ldev->md.md_offset + device->ldev->md.al_offset + t; |
480 | } | 457 | } |
481 | 458 | ||
482 | static int | 459 | int al_write_transaction(struct drbd_device *device) |
483 | _al_write_transaction(struct drbd_device *device) | ||
484 | { | 460 | { |
485 | struct al_transaction_on_disk *buffer; | 461 | struct al_transaction_on_disk *buffer; |
486 | struct lc_element *e; | 462 | struct lc_element *e; |
@@ -505,7 +481,8 @@ _al_write_transaction(struct drbd_device *device) | |||
505 | return -EIO; | 481 | return -EIO; |
506 | } | 482 | } |
507 | 483 | ||
508 | buffer = drbd_md_get_buffer(device); /* protects md_io_buffer, al_tr_cycle, ... */ | 484 | /* protects md_io_buffer, al_tr_cycle, ... */ |
485 | buffer = drbd_md_get_buffer(device, __func__); | ||
509 | if (!buffer) { | 486 | if (!buffer) { |
510 | drbd_err(device, "disk failed while waiting for md_io buffer\n"); | 487 | drbd_err(device, "disk failed while waiting for md_io buffer\n"); |
511 | put_ldev(device); | 488 | put_ldev(device); |
@@ -590,38 +567,6 @@ _al_write_transaction(struct drbd_device *device) | |||
590 | return err; | 567 | return err; |
591 | } | 568 | } |
592 | 569 | ||
593 | |||
594 | static int w_al_write_transaction(struct drbd_work *w, int unused) | ||
595 | { | ||
596 | struct update_al_work *aw = container_of(w, struct update_al_work, w); | ||
597 | struct drbd_device *device = aw->device; | ||
598 | int err; | ||
599 | |||
600 | err = _al_write_transaction(device); | ||
601 | aw->err = err; | ||
602 | complete(&aw->event); | ||
603 | |||
604 | return err != -EIO ? err : 0; | ||
605 | } | ||
606 | |||
607 | /* Calls from worker context (see w_restart_disk_io()) need to write the | ||
608 | transaction directly. Others came through generic_make_request(), | ||
609 | those need to delegate it to the worker. */ | ||
610 | static int al_write_transaction(struct drbd_device *device, bool delegate) | ||
611 | { | ||
612 | if (delegate) { | ||
613 | struct update_al_work al_work; | ||
614 | init_completion(&al_work.event); | ||
615 | al_work.w.cb = w_al_write_transaction; | ||
616 | al_work.device = device; | ||
617 | drbd_queue_work_front(&first_peer_device(device)->connection->sender_work, | ||
618 | &al_work.w); | ||
619 | wait_for_completion(&al_work.event); | ||
620 | return al_work.err; | ||
621 | } else | ||
622 | return _al_write_transaction(device); | ||
623 | } | ||
624 | |||
625 | static int _try_lc_del(struct drbd_device *device, struct lc_element *al_ext) | 570 | static int _try_lc_del(struct drbd_device *device, struct lc_element *al_ext) |
626 | { | 571 | { |
627 | int rv; | 572 | int rv; |
@@ -682,72 +627,56 @@ int drbd_initialize_al(struct drbd_device *device, void *buffer) | |||
682 | return 0; | 627 | return 0; |
683 | } | 628 | } |
684 | 629 | ||
685 | static int w_update_odbm(struct drbd_work *w, int unused) | 630 | static const char *drbd_change_sync_fname[] = { |
686 | { | 631 | [RECORD_RS_FAILED] = "drbd_rs_failed_io", |
687 | struct update_odbm_work *udw = container_of(w, struct update_odbm_work, w); | 632 | [SET_IN_SYNC] = "drbd_set_in_sync", |
688 | struct drbd_device *device = udw->device; | 633 | [SET_OUT_OF_SYNC] = "drbd_set_out_of_sync" |
689 | struct sib_info sib = { .sib_reason = SIB_SYNC_PROGRESS, }; | 634 | }; |
690 | |||
691 | if (!get_ldev(device)) { | ||
692 | if (__ratelimit(&drbd_ratelimit_state)) | ||
693 | drbd_warn(device, "Can not update on disk bitmap, local IO disabled.\n"); | ||
694 | kfree(udw); | ||
695 | return 0; | ||
696 | } | ||
697 | |||
698 | drbd_bm_write_page(device, rs_extent_to_bm_page(udw->enr)); | ||
699 | put_ldev(device); | ||
700 | |||
701 | kfree(udw); | ||
702 | |||
703 | if (drbd_bm_total_weight(device) <= device->rs_failed) { | ||
704 | switch (device->state.conn) { | ||
705 | case C_SYNC_SOURCE: case C_SYNC_TARGET: | ||
706 | case C_PAUSED_SYNC_S: case C_PAUSED_SYNC_T: | ||
707 | drbd_resync_finished(device); | ||
708 | default: | ||
709 | /* nothing to do */ | ||
710 | break; | ||
711 | } | ||
712 | } | ||
713 | drbd_bcast_event(device, &sib); | ||
714 | |||
715 | return 0; | ||
716 | } | ||
717 | |||
718 | 635 | ||
719 | /* ATTENTION. The AL's extents are 4MB each, while the extents in the | 636 | /* ATTENTION. The AL's extents are 4MB each, while the extents in the |
720 | * resync LRU-cache are 16MB each. | 637 | * resync LRU-cache are 16MB each. |
721 | * The caller of this function has to hold an get_ldev() reference. | 638 | * The caller of this function has to hold an get_ldev() reference. |
722 | * | 639 | * |
640 | * Adjusts the caching members ->rs_left (success) or ->rs_failed (!success), | ||
641 | * potentially pulling in (and recounting the corresponding bits) | ||
642 | * this resync extent into the resync extent lru cache. | ||
643 | * | ||
644 | * Returns whether all bits have been cleared for this resync extent, | ||
645 | * precisely: (rs_left <= rs_failed) | ||
646 | * | ||
723 | * TODO will be obsoleted once we have a caching lru of the on disk bitmap | 647 | * TODO will be obsoleted once we have a caching lru of the on disk bitmap |
724 | */ | 648 | */ |
725 | static void drbd_try_clear_on_disk_bm(struct drbd_device *device, sector_t sector, | 649 | static bool update_rs_extent(struct drbd_device *device, |
726 | int count, int success) | 650 | unsigned int enr, int count, |
651 | enum update_sync_bits_mode mode) | ||
727 | { | 652 | { |
728 | struct lc_element *e; | 653 | struct lc_element *e; |
729 | struct update_odbm_work *udw; | ||
730 | |||
731 | unsigned int enr; | ||
732 | 654 | ||
733 | D_ASSERT(device, atomic_read(&device->local_cnt)); | 655 | D_ASSERT(device, atomic_read(&device->local_cnt)); |
734 | 656 | ||
735 | /* I simply assume that a sector/size pair never crosses | 657 | /* When setting out-of-sync bits, |
736 | * a 16 MB extent border. (Currently this is true...) */ | 658 | * we don't need it cached (lc_find). |
737 | enr = BM_SECT_TO_EXT(sector); | 659 | * But if it is present in the cache, |
738 | 660 | * we should update the cached bit count. | |
739 | e = lc_get(device->resync, enr); | 661 | * Otherwise, that extent should be in the resync extent lru cache |
662 | * already -- or we want to pull it in if necessary -- (lc_get), | ||
663 | * then update and check rs_left and rs_failed. */ | ||
664 | if (mode == SET_OUT_OF_SYNC) | ||
665 | e = lc_find(device->resync, enr); | ||
666 | else | ||
667 | e = lc_get(device->resync, enr); | ||
740 | if (e) { | 668 | if (e) { |
741 | struct bm_extent *ext = lc_entry(e, struct bm_extent, lce); | 669 | struct bm_extent *ext = lc_entry(e, struct bm_extent, lce); |
742 | if (ext->lce.lc_number == enr) { | 670 | if (ext->lce.lc_number == enr) { |
743 | if (success) | 671 | if (mode == SET_IN_SYNC) |
744 | ext->rs_left -= count; | 672 | ext->rs_left -= count; |
673 | else if (mode == SET_OUT_OF_SYNC) | ||
674 | ext->rs_left += count; | ||
745 | else | 675 | else |
746 | ext->rs_failed += count; | 676 | ext->rs_failed += count; |
747 | if (ext->rs_left < ext->rs_failed) { | 677 | if (ext->rs_left < ext->rs_failed) { |
748 | drbd_warn(device, "BAD! sector=%llus enr=%u rs_left=%d " | 678 | drbd_warn(device, "BAD! enr=%u rs_left=%d " |
749 | "rs_failed=%d count=%d cstate=%s\n", | 679 | "rs_failed=%d count=%d cstate=%s\n", |
750 | (unsigned long long)sector, | ||
751 | ext->lce.lc_number, ext->rs_left, | 680 | ext->lce.lc_number, ext->rs_left, |
752 | ext->rs_failed, count, | 681 | ext->rs_failed, count, |
753 | drbd_conn_str(device->state.conn)); | 682 | drbd_conn_str(device->state.conn)); |
@@ -781,34 +710,27 @@ static void drbd_try_clear_on_disk_bm(struct drbd_device *device, sector_t secto | |||
781 | ext->lce.lc_number, ext->rs_failed); | 710 | ext->lce.lc_number, ext->rs_failed); |
782 | } | 711 | } |
783 | ext->rs_left = rs_left; | 712 | ext->rs_left = rs_left; |
784 | ext->rs_failed = success ? 0 : count; | 713 | ext->rs_failed = (mode == RECORD_RS_FAILED) ? count : 0; |
785 | /* we don't keep a persistent log of the resync lru, | 714 | /* we don't keep a persistent log of the resync lru, |
786 | * we can commit any change right away. */ | 715 | * we can commit any change right away. */ |
787 | lc_committed(device->resync); | 716 | lc_committed(device->resync); |
788 | } | 717 | } |
789 | lc_put(device->resync, &ext->lce); | 718 | if (mode != SET_OUT_OF_SYNC) |
719 | lc_put(device->resync, &ext->lce); | ||
790 | /* no race, we are within the al_lock! */ | 720 | /* no race, we are within the al_lock! */ |
791 | 721 | ||
792 | if (ext->rs_left == ext->rs_failed) { | 722 | if (ext->rs_left <= ext->rs_failed) { |
793 | ext->rs_failed = 0; | 723 | ext->rs_failed = 0; |
794 | 724 | return true; | |
795 | udw = kmalloc(sizeof(*udw), GFP_ATOMIC); | ||
796 | if (udw) { | ||
797 | udw->enr = ext->lce.lc_number; | ||
798 | udw->w.cb = w_update_odbm; | ||
799 | udw->device = device; | ||
800 | drbd_queue_work_front(&first_peer_device(device)->connection->sender_work, | ||
801 | &udw->w); | ||
802 | } else { | ||
803 | drbd_warn(device, "Could not kmalloc an udw\n"); | ||
804 | } | ||
805 | } | 725 | } |
806 | } else { | 726 | } else if (mode != SET_OUT_OF_SYNC) { |
727 | /* be quiet if lc_find() did not find it. */ | ||
807 | drbd_err(device, "lc_get() failed! locked=%d/%d flags=%lu\n", | 728 | drbd_err(device, "lc_get() failed! locked=%d/%d flags=%lu\n", |
808 | device->resync_locked, | 729 | device->resync_locked, |
809 | device->resync->nr_elements, | 730 | device->resync->nr_elements, |
810 | device->resync->flags); | 731 | device->resync->flags); |
811 | } | 732 | } |
733 | return false; | ||
812 | } | 734 | } |
813 | 735 | ||
814 | void drbd_advance_rs_marks(struct drbd_device *device, unsigned long still_to_go) | 736 | void drbd_advance_rs_marks(struct drbd_device *device, unsigned long still_to_go) |
@@ -827,105 +749,105 @@ void drbd_advance_rs_marks(struct drbd_device *device, unsigned long still_to_go | |||
827 | } | 749 | } |
828 | } | 750 | } |
829 | 751 | ||
830 | /* clear the bit corresponding to the piece of storage in question: | 752 | /* It is called lazy update, so don't do write-out too often. */ |
831 | * size byte of data starting from sector. Only clear a bits of the affected | 753 | static bool lazy_bitmap_update_due(struct drbd_device *device) |
832 | * one ore more _aligned_ BM_BLOCK_SIZE blocks. | ||
833 | * | ||
834 | * called by worker on C_SYNC_TARGET and receiver on SyncSource. | ||
835 | * | ||
836 | */ | ||
837 | void __drbd_set_in_sync(struct drbd_device *device, sector_t sector, int size, | ||
838 | const char *file, const unsigned int line) | ||
839 | { | 754 | { |
840 | /* Is called from worker and receiver context _only_ */ | 755 | return time_after(jiffies, device->rs_last_bcast + 2*HZ); |
841 | unsigned long sbnr, ebnr, lbnr; | 756 | } |
842 | unsigned long count = 0; | ||
843 | sector_t esector, nr_sectors; | ||
844 | int wake_up = 0; | ||
845 | unsigned long flags; | ||
846 | 757 | ||
847 | if (size <= 0 || !IS_ALIGNED(size, 512) || size > DRBD_MAX_DISCARD_SIZE) { | 758 | static void maybe_schedule_on_disk_bitmap_update(struct drbd_device *device, bool rs_done) |
848 | drbd_err(device, "drbd_set_in_sync: sector=%llus size=%d nonsense!\n", | 759 | { |
849 | (unsigned long long)sector, size); | 760 | if (rs_done) |
761 | set_bit(RS_DONE, &device->flags); | ||
762 | /* and also set RS_PROGRESS below */ | ||
763 | else if (!lazy_bitmap_update_due(device)) | ||
850 | return; | 764 | return; |
851 | } | ||
852 | |||
853 | if (!get_ldev(device)) | ||
854 | return; /* no disk, no metadata, no bitmap to clear bits in */ | ||
855 | |||
856 | nr_sectors = drbd_get_capacity(device->this_bdev); | ||
857 | esector = sector + (size >> 9) - 1; | ||
858 | |||
859 | if (!expect(sector < nr_sectors)) | ||
860 | goto out; | ||
861 | if (!expect(esector < nr_sectors)) | ||
862 | esector = nr_sectors - 1; | ||
863 | |||
864 | lbnr = BM_SECT_TO_BIT(nr_sectors-1); | ||
865 | |||
866 | /* we clear it (in sync). | ||
867 | * round up start sector, round down end sector. we make sure we only | ||
868 | * clear full, aligned, BM_BLOCK_SIZE (4K) blocks */ | ||
869 | if (unlikely(esector < BM_SECT_PER_BIT-1)) | ||
870 | goto out; | ||
871 | if (unlikely(esector == (nr_sectors-1))) | ||
872 | ebnr = lbnr; | ||
873 | else | ||
874 | ebnr = BM_SECT_TO_BIT(esector - (BM_SECT_PER_BIT-1)); | ||
875 | sbnr = BM_SECT_TO_BIT(sector + BM_SECT_PER_BIT-1); | ||
876 | 765 | ||
877 | if (sbnr > ebnr) | 766 | drbd_device_post_work(device, RS_PROGRESS); |
878 | goto out; | 767 | } |
879 | 768 | ||
769 | static int update_sync_bits(struct drbd_device *device, | ||
770 | unsigned long sbnr, unsigned long ebnr, | ||
771 | enum update_sync_bits_mode mode) | ||
772 | { | ||
880 | /* | 773 | /* |
881 | * ok, (capacity & 7) != 0 sometimes, but who cares... | 774 | * We keep a count of set bits per resync-extent in the ->rs_left |
882 | * we count rs_{total,left} in bits, not sectors. | 775 | * caching member, so we need to loop and work within the resync extent |
776 | * alignment. Typically this loop will execute exactly once. | ||
883 | */ | 777 | */ |
884 | count = drbd_bm_clear_bits(device, sbnr, ebnr); | 778 | unsigned long flags; |
885 | if (count) { | 779 | unsigned long count = 0; |
886 | drbd_advance_rs_marks(device, drbd_bm_total_weight(device)); | 780 | unsigned int cleared = 0; |
887 | spin_lock_irqsave(&device->al_lock, flags); | 781 | while (sbnr <= ebnr) { |
888 | drbd_try_clear_on_disk_bm(device, sector, count, true); | 782 | /* set temporary boundary bit number to last bit number within |
889 | spin_unlock_irqrestore(&device->al_lock, flags); | 783 | * the resync extent of the current start bit number, |
890 | 784 | * but cap at provided end bit number */ | |
891 | /* just wake_up unconditional now, various lc_chaged(), | 785 | unsigned long tbnr = min(ebnr, sbnr | BM_BLOCKS_PER_BM_EXT_MASK); |
892 | * lc_put() in drbd_try_clear_on_disk_bm(). */ | 786 | unsigned long c; |
893 | wake_up = 1; | 787 | |
788 | if (mode == RECORD_RS_FAILED) | ||
789 | /* Only called from drbd_rs_failed_io(), bits | ||
790 | * supposedly still set. Recount, maybe some | ||
791 | * of the bits have been successfully cleared | ||
792 | * by application IO meanwhile. | ||
793 | */ | ||
794 | c = drbd_bm_count_bits(device, sbnr, tbnr); | ||
795 | else if (mode == SET_IN_SYNC) | ||
796 | c = drbd_bm_clear_bits(device, sbnr, tbnr); | ||
797 | else /* if (mode == SET_OUT_OF_SYNC) */ | ||
798 | c = drbd_bm_set_bits(device, sbnr, tbnr); | ||
799 | |||
800 | if (c) { | ||
801 | spin_lock_irqsave(&device->al_lock, flags); | ||
802 | cleared += update_rs_extent(device, BM_BIT_TO_EXT(sbnr), c, mode); | ||
803 | spin_unlock_irqrestore(&device->al_lock, flags); | ||
804 | count += c; | ||
805 | } | ||
806 | sbnr = tbnr + 1; | ||
894 | } | 807 | } |
895 | out: | 808 | if (count) { |
896 | put_ldev(device); | 809 | if (mode == SET_IN_SYNC) { |
897 | if (wake_up) | 810 | unsigned long still_to_go = drbd_bm_total_weight(device); |
811 | bool rs_is_done = (still_to_go <= device->rs_failed); | ||
812 | drbd_advance_rs_marks(device, still_to_go); | ||
813 | if (cleared || rs_is_done) | ||
814 | maybe_schedule_on_disk_bitmap_update(device, rs_is_done); | ||
815 | } else if (mode == RECORD_RS_FAILED) | ||
816 | device->rs_failed += count; | ||
898 | wake_up(&device->al_wait); | 817 | wake_up(&device->al_wait); |
818 | } | ||
819 | return count; | ||
899 | } | 820 | } |
900 | 821 | ||
901 | /* | 822 | /* clear the bit corresponding to the piece of storage in question: |
902 | * this is intended to set one request worth of data out of sync. | 823 | * size byte of data starting from sector. Only clear a bits of the affected |
903 | * affects at least 1 bit, | 824 | * one ore more _aligned_ BM_BLOCK_SIZE blocks. |
904 | * and at most 1+DRBD_MAX_BIO_SIZE/BM_BLOCK_SIZE bits. | 825 | * |
826 | * called by worker on C_SYNC_TARGET and receiver on SyncSource. | ||
905 | * | 827 | * |
906 | * called by tl_clear and drbd_send_dblock (==drbd_make_request). | ||
907 | * so this can be _any_ process. | ||
908 | */ | 828 | */ |
909 | int __drbd_set_out_of_sync(struct drbd_device *device, sector_t sector, int size, | 829 | int __drbd_change_sync(struct drbd_device *device, sector_t sector, int size, |
910 | const char *file, const unsigned int line) | 830 | enum update_sync_bits_mode mode, |
831 | const char *file, const unsigned int line) | ||
911 | { | 832 | { |
912 | unsigned long sbnr, ebnr, flags; | 833 | /* Is called from worker and receiver context _only_ */ |
834 | unsigned long sbnr, ebnr, lbnr; | ||
835 | unsigned long count = 0; | ||
913 | sector_t esector, nr_sectors; | 836 | sector_t esector, nr_sectors; |
914 | unsigned int enr, count = 0; | ||
915 | struct lc_element *e; | ||
916 | 837 | ||
917 | /* this should be an empty REQ_FLUSH */ | 838 | /* This would be an empty REQ_FLUSH, be silent. */ |
918 | if (size == 0) | 839 | if ((mode == SET_OUT_OF_SYNC) && size == 0) |
919 | return 0; | 840 | return 0; |
920 | 841 | ||
921 | if (size < 0 || !IS_ALIGNED(size, 512) || size > DRBD_MAX_DISCARD_SIZE) { | 842 | if (size <= 0 || !IS_ALIGNED(size, 512) || size > DRBD_MAX_DISCARD_SIZE) { |
922 | drbd_err(device, "sector: %llus, size: %d\n", | 843 | drbd_err(device, "%s: sector=%llus size=%d nonsense!\n", |
923 | (unsigned long long)sector, size); | 844 | drbd_change_sync_fname[mode], |
845 | (unsigned long long)sector, size); | ||
924 | return 0; | 846 | return 0; |
925 | } | 847 | } |
926 | 848 | ||
927 | if (!get_ldev(device)) | 849 | if (!get_ldev(device)) |
928 | return 0; /* no disk, no metadata, no bitmap to set bits in */ | 850 | return 0; /* no disk, no metadata, no bitmap to manipulate bits in */ |
929 | 851 | ||
930 | nr_sectors = drbd_get_capacity(device->this_bdev); | 852 | nr_sectors = drbd_get_capacity(device->this_bdev); |
931 | esector = sector + (size >> 9) - 1; | 853 | esector = sector + (size >> 9) - 1; |
@@ -935,25 +857,28 @@ int __drbd_set_out_of_sync(struct drbd_device *device, sector_t sector, int size | |||
935 | if (!expect(esector < nr_sectors)) | 857 | if (!expect(esector < nr_sectors)) |
936 | esector = nr_sectors - 1; | 858 | esector = nr_sectors - 1; |
937 | 859 | ||
938 | /* we set it out of sync, | 860 | lbnr = BM_SECT_TO_BIT(nr_sectors-1); |
939 | * we do not need to round anything here */ | ||
940 | sbnr = BM_SECT_TO_BIT(sector); | ||
941 | ebnr = BM_SECT_TO_BIT(esector); | ||
942 | |||
943 | /* ok, (capacity & 7) != 0 sometimes, but who cares... | ||
944 | * we count rs_{total,left} in bits, not sectors. */ | ||
945 | spin_lock_irqsave(&device->al_lock, flags); | ||
946 | count = drbd_bm_set_bits(device, sbnr, ebnr); | ||
947 | 861 | ||
948 | enr = BM_SECT_TO_EXT(sector); | 862 | if (mode == SET_IN_SYNC) { |
949 | e = lc_find(device->resync, enr); | 863 | /* Round up start sector, round down end sector. We make sure |
950 | if (e) | 864 | * we only clear full, aligned, BM_BLOCK_SIZE blocks. */ |
951 | lc_entry(e, struct bm_extent, lce)->rs_left += count; | 865 | if (unlikely(esector < BM_SECT_PER_BIT-1)) |
952 | spin_unlock_irqrestore(&device->al_lock, flags); | 866 | goto out; |
867 | if (unlikely(esector == (nr_sectors-1))) | ||
868 | ebnr = lbnr; | ||
869 | else | ||
870 | ebnr = BM_SECT_TO_BIT(esector - (BM_SECT_PER_BIT-1)); | ||
871 | sbnr = BM_SECT_TO_BIT(sector + BM_SECT_PER_BIT-1); | ||
872 | } else { | ||
873 | /* We set it out of sync, or record resync failure. | ||
874 | * Should not round anything here. */ | ||
875 | sbnr = BM_SECT_TO_BIT(sector); | ||
876 | ebnr = BM_SECT_TO_BIT(esector); | ||
877 | } | ||
953 | 878 | ||
879 | count = update_sync_bits(device, sbnr, ebnr, mode); | ||
954 | out: | 880 | out: |
955 | put_ldev(device); | 881 | put_ldev(device); |
956 | |||
957 | return count; | 882 | return count; |
958 | } | 883 | } |
959 | 884 | ||
@@ -1075,6 +1000,15 @@ int drbd_try_rs_begin_io(struct drbd_device *device, sector_t sector) | |||
1075 | struct lc_element *e; | 1000 | struct lc_element *e; |
1076 | struct bm_extent *bm_ext; | 1001 | struct bm_extent *bm_ext; |
1077 | int i; | 1002 | int i; |
1003 | bool throttle = drbd_rs_should_slow_down(device, sector, true); | ||
1004 | |||
1005 | /* If we need to throttle, a half-locked (only marked BME_NO_WRITES, | ||
1006 | * not yet BME_LOCKED) extent needs to be kicked out explicitly if we | ||
1007 | * need to throttle. There is at most one such half-locked extent, | ||
1008 | * which is remembered in resync_wenr. */ | ||
1009 | |||
1010 | if (throttle && device->resync_wenr != enr) | ||
1011 | return -EAGAIN; | ||
1078 | 1012 | ||
1079 | spin_lock_irq(&device->al_lock); | 1013 | spin_lock_irq(&device->al_lock); |
1080 | if (device->resync_wenr != LC_FREE && device->resync_wenr != enr) { | 1014 | if (device->resync_wenr != LC_FREE && device->resync_wenr != enr) { |
@@ -1098,8 +1032,10 @@ int drbd_try_rs_begin_io(struct drbd_device *device, sector_t sector) | |||
1098 | D_ASSERT(device, test_bit(BME_NO_WRITES, &bm_ext->flags)); | 1032 | D_ASSERT(device, test_bit(BME_NO_WRITES, &bm_ext->flags)); |
1099 | clear_bit(BME_NO_WRITES, &bm_ext->flags); | 1033 | clear_bit(BME_NO_WRITES, &bm_ext->flags); |
1100 | device->resync_wenr = LC_FREE; | 1034 | device->resync_wenr = LC_FREE; |
1101 | if (lc_put(device->resync, &bm_ext->lce) == 0) | 1035 | if (lc_put(device->resync, &bm_ext->lce) == 0) { |
1036 | bm_ext->flags = 0; | ||
1102 | device->resync_locked--; | 1037 | device->resync_locked--; |
1038 | } | ||
1103 | wake_up(&device->al_wait); | 1039 | wake_up(&device->al_wait); |
1104 | } else { | 1040 | } else { |
1105 | drbd_alert(device, "LOGIC BUG\n"); | 1041 | drbd_alert(device, "LOGIC BUG\n"); |
@@ -1161,8 +1097,20 @@ proceed: | |||
1161 | return 0; | 1097 | return 0; |
1162 | 1098 | ||
1163 | try_again: | 1099 | try_again: |
1164 | if (bm_ext) | 1100 | if (bm_ext) { |
1165 | device->resync_wenr = enr; | 1101 | if (throttle) { |
1102 | D_ASSERT(device, !test_bit(BME_LOCKED, &bm_ext->flags)); | ||
1103 | D_ASSERT(device, test_bit(BME_NO_WRITES, &bm_ext->flags)); | ||
1104 | clear_bit(BME_NO_WRITES, &bm_ext->flags); | ||
1105 | device->resync_wenr = LC_FREE; | ||
1106 | if (lc_put(device->resync, &bm_ext->lce) == 0) { | ||
1107 | bm_ext->flags = 0; | ||
1108 | device->resync_locked--; | ||
1109 | } | ||
1110 | wake_up(&device->al_wait); | ||
1111 | } else | ||
1112 | device->resync_wenr = enr; | ||
1113 | } | ||
1166 | spin_unlock_irq(&device->al_lock); | 1114 | spin_unlock_irq(&device->al_lock); |
1167 | return -EAGAIN; | 1115 | return -EAGAIN; |
1168 | } | 1116 | } |
@@ -1270,69 +1218,3 @@ int drbd_rs_del_all(struct drbd_device *device) | |||
1270 | 1218 | ||
1271 | return 0; | 1219 | return 0; |
1272 | } | 1220 | } |
1273 | |||
1274 | /** | ||
1275 | * drbd_rs_failed_io() - Record information on a failure to resync the specified blocks | ||
1276 | * @device: DRBD device. | ||
1277 | * @sector: The sector number. | ||
1278 | * @size: Size of failed IO operation, in byte. | ||
1279 | */ | ||
1280 | void drbd_rs_failed_io(struct drbd_device *device, sector_t sector, int size) | ||
1281 | { | ||
1282 | /* Is called from worker and receiver context _only_ */ | ||
1283 | unsigned long sbnr, ebnr, lbnr; | ||
1284 | unsigned long count; | ||
1285 | sector_t esector, nr_sectors; | ||
1286 | int wake_up = 0; | ||
1287 | |||
1288 | if (size <= 0 || !IS_ALIGNED(size, 512) || size > DRBD_MAX_DISCARD_SIZE) { | ||
1289 | drbd_err(device, "drbd_rs_failed_io: sector=%llus size=%d nonsense!\n", | ||
1290 | (unsigned long long)sector, size); | ||
1291 | return; | ||
1292 | } | ||
1293 | nr_sectors = drbd_get_capacity(device->this_bdev); | ||
1294 | esector = sector + (size >> 9) - 1; | ||
1295 | |||
1296 | if (!expect(sector < nr_sectors)) | ||
1297 | return; | ||
1298 | if (!expect(esector < nr_sectors)) | ||
1299 | esector = nr_sectors - 1; | ||
1300 | |||
1301 | lbnr = BM_SECT_TO_BIT(nr_sectors-1); | ||
1302 | |||
1303 | /* | ||
1304 | * round up start sector, round down end sector. we make sure we only | ||
1305 | * handle full, aligned, BM_BLOCK_SIZE (4K) blocks */ | ||
1306 | if (unlikely(esector < BM_SECT_PER_BIT-1)) | ||
1307 | return; | ||
1308 | if (unlikely(esector == (nr_sectors-1))) | ||
1309 | ebnr = lbnr; | ||
1310 | else | ||
1311 | ebnr = BM_SECT_TO_BIT(esector - (BM_SECT_PER_BIT-1)); | ||
1312 | sbnr = BM_SECT_TO_BIT(sector + BM_SECT_PER_BIT-1); | ||
1313 | |||
1314 | if (sbnr > ebnr) | ||
1315 | return; | ||
1316 | |||
1317 | /* | ||
1318 | * ok, (capacity & 7) != 0 sometimes, but who cares... | ||
1319 | * we count rs_{total,left} in bits, not sectors. | ||
1320 | */ | ||
1321 | spin_lock_irq(&device->al_lock); | ||
1322 | count = drbd_bm_count_bits(device, sbnr, ebnr); | ||
1323 | if (count) { | ||
1324 | device->rs_failed += count; | ||
1325 | |||
1326 | if (get_ldev(device)) { | ||
1327 | drbd_try_clear_on_disk_bm(device, sector, count, false); | ||
1328 | put_ldev(device); | ||
1329 | } | ||
1330 | |||
1331 | /* just wake_up unconditional now, various lc_chaged(), | ||
1332 | * lc_put() in drbd_try_clear_on_disk_bm(). */ | ||
1333 | wake_up = 1; | ||
1334 | } | ||
1335 | spin_unlock_irq(&device->al_lock); | ||
1336 | if (wake_up) | ||
1337 | wake_up(&device->al_wait); | ||
1338 | } | ||
diff --git a/drivers/block/drbd/drbd_bitmap.c b/drivers/block/drbd/drbd_bitmap.c index 1aa29f8fdfe1..426c97aef900 100644 --- a/drivers/block/drbd/drbd_bitmap.c +++ b/drivers/block/drbd/drbd_bitmap.c | |||
@@ -22,6 +22,8 @@ | |||
22 | the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. | 22 | the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. |
23 | */ | 23 | */ |
24 | 24 | ||
25 | #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt | ||
26 | |||
25 | #include <linux/bitops.h> | 27 | #include <linux/bitops.h> |
26 | #include <linux/vmalloc.h> | 28 | #include <linux/vmalloc.h> |
27 | #include <linux/string.h> | 29 | #include <linux/string.h> |
@@ -353,9 +355,8 @@ static void bm_free_pages(struct page **pages, unsigned long number) | |||
353 | 355 | ||
354 | for (i = 0; i < number; i++) { | 356 | for (i = 0; i < number; i++) { |
355 | if (!pages[i]) { | 357 | if (!pages[i]) { |
356 | printk(KERN_ALERT "drbd: bm_free_pages tried to free " | 358 | pr_alert("bm_free_pages tried to free a NULL pointer; i=%lu n=%lu\n", |
357 | "a NULL pointer; i=%lu n=%lu\n", | 359 | i, number); |
358 | i, number); | ||
359 | continue; | 360 | continue; |
360 | } | 361 | } |
361 | __free_page(pages[i]); | 362 | __free_page(pages[i]); |
@@ -592,7 +593,7 @@ static void bm_memset(struct drbd_bitmap *b, size_t offset, int c, size_t len) | |||
592 | end = offset + len; | 593 | end = offset + len; |
593 | 594 | ||
594 | if (end > b->bm_words) { | 595 | if (end > b->bm_words) { |
595 | printk(KERN_ALERT "drbd: bm_memset end > bm_words\n"); | 596 | pr_alert("bm_memset end > bm_words\n"); |
596 | return; | 597 | return; |
597 | } | 598 | } |
598 | 599 | ||
@@ -602,7 +603,7 @@ static void bm_memset(struct drbd_bitmap *b, size_t offset, int c, size_t len) | |||
602 | p_addr = bm_map_pidx(b, idx); | 603 | p_addr = bm_map_pidx(b, idx); |
603 | bm = p_addr + MLPP(offset); | 604 | bm = p_addr + MLPP(offset); |
604 | if (bm+do_now > p_addr + LWPP) { | 605 | if (bm+do_now > p_addr + LWPP) { |
605 | printk(KERN_ALERT "drbd: BUG BUG BUG! p_addr:%p bm:%p do_now:%d\n", | 606 | pr_alert("BUG BUG BUG! p_addr:%p bm:%p do_now:%d\n", |
606 | p_addr, bm, (int)do_now); | 607 | p_addr, bm, (int)do_now); |
607 | } else | 608 | } else |
608 | memset(bm, c, do_now * sizeof(long)); | 609 | memset(bm, c, do_now * sizeof(long)); |
@@ -927,22 +928,14 @@ void drbd_bm_clear_all(struct drbd_device *device) | |||
927 | spin_unlock_irq(&b->bm_lock); | 928 | spin_unlock_irq(&b->bm_lock); |
928 | } | 929 | } |
929 | 930 | ||
930 | struct bm_aio_ctx { | 931 | static void drbd_bm_aio_ctx_destroy(struct kref *kref) |
931 | struct drbd_device *device; | ||
932 | atomic_t in_flight; | ||
933 | unsigned int done; | ||
934 | unsigned flags; | ||
935 | #define BM_AIO_COPY_PAGES 1 | ||
936 | #define BM_AIO_WRITE_HINTED 2 | ||
937 | #define BM_WRITE_ALL_PAGES 4 | ||
938 | int error; | ||
939 | struct kref kref; | ||
940 | }; | ||
941 | |||
942 | static void bm_aio_ctx_destroy(struct kref *kref) | ||
943 | { | 932 | { |
944 | struct bm_aio_ctx *ctx = container_of(kref, struct bm_aio_ctx, kref); | 933 | struct drbd_bm_aio_ctx *ctx = container_of(kref, struct drbd_bm_aio_ctx, kref); |
934 | unsigned long flags; | ||
945 | 935 | ||
936 | spin_lock_irqsave(&ctx->device->resource->req_lock, flags); | ||
937 | list_del(&ctx->list); | ||
938 | spin_unlock_irqrestore(&ctx->device->resource->req_lock, flags); | ||
946 | put_ldev(ctx->device); | 939 | put_ldev(ctx->device); |
947 | kfree(ctx); | 940 | kfree(ctx); |
948 | } | 941 | } |
@@ -950,7 +943,7 @@ static void bm_aio_ctx_destroy(struct kref *kref) | |||
950 | /* bv_page may be a copy, or may be the original */ | 943 | /* bv_page may be a copy, or may be the original */ |
951 | static void bm_async_io_complete(struct bio *bio, int error) | 944 | static void bm_async_io_complete(struct bio *bio, int error) |
952 | { | 945 | { |
953 | struct bm_aio_ctx *ctx = bio->bi_private; | 946 | struct drbd_bm_aio_ctx *ctx = bio->bi_private; |
954 | struct drbd_device *device = ctx->device; | 947 | struct drbd_device *device = ctx->device; |
955 | struct drbd_bitmap *b = device->bitmap; | 948 | struct drbd_bitmap *b = device->bitmap; |
956 | unsigned int idx = bm_page_to_idx(bio->bi_io_vec[0].bv_page); | 949 | unsigned int idx = bm_page_to_idx(bio->bi_io_vec[0].bv_page); |
@@ -993,17 +986,18 @@ static void bm_async_io_complete(struct bio *bio, int error) | |||
993 | if (atomic_dec_and_test(&ctx->in_flight)) { | 986 | if (atomic_dec_and_test(&ctx->in_flight)) { |
994 | ctx->done = 1; | 987 | ctx->done = 1; |
995 | wake_up(&device->misc_wait); | 988 | wake_up(&device->misc_wait); |
996 | kref_put(&ctx->kref, &bm_aio_ctx_destroy); | 989 | kref_put(&ctx->kref, &drbd_bm_aio_ctx_destroy); |
997 | } | 990 | } |
998 | } | 991 | } |
999 | 992 | ||
1000 | static void bm_page_io_async(struct bm_aio_ctx *ctx, int page_nr, int rw) __must_hold(local) | 993 | static void bm_page_io_async(struct drbd_bm_aio_ctx *ctx, int page_nr) __must_hold(local) |
1001 | { | 994 | { |
1002 | struct bio *bio = bio_alloc_drbd(GFP_NOIO); | 995 | struct bio *bio = bio_alloc_drbd(GFP_NOIO); |
1003 | struct drbd_device *device = ctx->device; | 996 | struct drbd_device *device = ctx->device; |
1004 | struct drbd_bitmap *b = device->bitmap; | 997 | struct drbd_bitmap *b = device->bitmap; |
1005 | struct page *page; | 998 | struct page *page; |
1006 | unsigned int len; | 999 | unsigned int len; |
1000 | unsigned int rw = (ctx->flags & BM_AIO_READ) ? READ : WRITE; | ||
1007 | 1001 | ||
1008 | sector_t on_disk_sector = | 1002 | sector_t on_disk_sector = |
1009 | device->ldev->md.md_offset + device->ldev->md.bm_offset; | 1003 | device->ldev->md.md_offset + device->ldev->md.bm_offset; |
@@ -1049,9 +1043,9 @@ static void bm_page_io_async(struct bm_aio_ctx *ctx, int page_nr, int rw) __must | |||
1049 | /* | 1043 | /* |
1050 | * bm_rw: read/write the whole bitmap from/to its on disk location. | 1044 | * bm_rw: read/write the whole bitmap from/to its on disk location. |
1051 | */ | 1045 | */ |
1052 | static int bm_rw(struct drbd_device *device, int rw, unsigned flags, unsigned lazy_writeout_upper_idx) __must_hold(local) | 1046 | static int bm_rw(struct drbd_device *device, const unsigned int flags, unsigned lazy_writeout_upper_idx) __must_hold(local) |
1053 | { | 1047 | { |
1054 | struct bm_aio_ctx *ctx; | 1048 | struct drbd_bm_aio_ctx *ctx; |
1055 | struct drbd_bitmap *b = device->bitmap; | 1049 | struct drbd_bitmap *b = device->bitmap; |
1056 | int num_pages, i, count = 0; | 1050 | int num_pages, i, count = 0; |
1057 | unsigned long now; | 1051 | unsigned long now; |
@@ -1067,12 +1061,13 @@ static int bm_rw(struct drbd_device *device, int rw, unsigned flags, unsigned la | |||
1067 | * as we submit copies of pages anyways. | 1061 | * as we submit copies of pages anyways. |
1068 | */ | 1062 | */ |
1069 | 1063 | ||
1070 | ctx = kmalloc(sizeof(struct bm_aio_ctx), GFP_NOIO); | 1064 | ctx = kmalloc(sizeof(struct drbd_bm_aio_ctx), GFP_NOIO); |
1071 | if (!ctx) | 1065 | if (!ctx) |
1072 | return -ENOMEM; | 1066 | return -ENOMEM; |
1073 | 1067 | ||
1074 | *ctx = (struct bm_aio_ctx) { | 1068 | *ctx = (struct drbd_bm_aio_ctx) { |
1075 | .device = device, | 1069 | .device = device, |
1070 | .start_jif = jiffies, | ||
1076 | .in_flight = ATOMIC_INIT(1), | 1071 | .in_flight = ATOMIC_INIT(1), |
1077 | .done = 0, | 1072 | .done = 0, |
1078 | .flags = flags, | 1073 | .flags = flags, |
@@ -1080,15 +1075,21 @@ static int bm_rw(struct drbd_device *device, int rw, unsigned flags, unsigned la | |||
1080 | .kref = { ATOMIC_INIT(2) }, | 1075 | .kref = { ATOMIC_INIT(2) }, |
1081 | }; | 1076 | }; |
1082 | 1077 | ||
1083 | if (!get_ldev_if_state(device, D_ATTACHING)) { /* put is in bm_aio_ctx_destroy() */ | 1078 | if (!get_ldev_if_state(device, D_ATTACHING)) { /* put is in drbd_bm_aio_ctx_destroy() */ |
1084 | drbd_err(device, "ASSERT FAILED: get_ldev_if_state() == 1 in bm_rw()\n"); | 1079 | drbd_err(device, "ASSERT FAILED: get_ldev_if_state() == 1 in bm_rw()\n"); |
1085 | kfree(ctx); | 1080 | kfree(ctx); |
1086 | return -ENODEV; | 1081 | return -ENODEV; |
1087 | } | 1082 | } |
1083 | /* Here D_ATTACHING is sufficient since drbd_bm_read() is called only from | ||
1084 | drbd_adm_attach(), after device->ldev was assigned. */ | ||
1088 | 1085 | ||
1089 | if (!ctx->flags) | 1086 | if (0 == (ctx->flags & ~BM_AIO_READ)) |
1090 | WARN_ON(!(BM_LOCKED_MASK & b->bm_flags)); | 1087 | WARN_ON(!(BM_LOCKED_MASK & b->bm_flags)); |
1091 | 1088 | ||
1089 | spin_lock_irq(&device->resource->req_lock); | ||
1090 | list_add_tail(&ctx->list, &device->pending_bitmap_io); | ||
1091 | spin_unlock_irq(&device->resource->req_lock); | ||
1092 | |||
1092 | num_pages = b->bm_number_of_pages; | 1093 | num_pages = b->bm_number_of_pages; |
1093 | 1094 | ||
1094 | now = jiffies; | 1095 | now = jiffies; |
@@ -1098,13 +1099,13 @@ static int bm_rw(struct drbd_device *device, int rw, unsigned flags, unsigned la | |||
1098 | /* ignore completely unchanged pages */ | 1099 | /* ignore completely unchanged pages */ |
1099 | if (lazy_writeout_upper_idx && i == lazy_writeout_upper_idx) | 1100 | if (lazy_writeout_upper_idx && i == lazy_writeout_upper_idx) |
1100 | break; | 1101 | break; |
1101 | if (rw & WRITE) { | 1102 | if (!(flags & BM_AIO_READ)) { |
1102 | if ((flags & BM_AIO_WRITE_HINTED) && | 1103 | if ((flags & BM_AIO_WRITE_HINTED) && |
1103 | !test_and_clear_bit(BM_PAGE_HINT_WRITEOUT, | 1104 | !test_and_clear_bit(BM_PAGE_HINT_WRITEOUT, |
1104 | &page_private(b->bm_pages[i]))) | 1105 | &page_private(b->bm_pages[i]))) |
1105 | continue; | 1106 | continue; |
1106 | 1107 | ||
1107 | if (!(flags & BM_WRITE_ALL_PAGES) && | 1108 | if (!(flags & BM_AIO_WRITE_ALL_PAGES) && |
1108 | bm_test_page_unchanged(b->bm_pages[i])) { | 1109 | bm_test_page_unchanged(b->bm_pages[i])) { |
1109 | dynamic_drbd_dbg(device, "skipped bm write for idx %u\n", i); | 1110 | dynamic_drbd_dbg(device, "skipped bm write for idx %u\n", i); |
1110 | continue; | 1111 | continue; |
@@ -1118,7 +1119,7 @@ static int bm_rw(struct drbd_device *device, int rw, unsigned flags, unsigned la | |||
1118 | } | 1119 | } |
1119 | } | 1120 | } |
1120 | atomic_inc(&ctx->in_flight); | 1121 | atomic_inc(&ctx->in_flight); |
1121 | bm_page_io_async(ctx, i, rw); | 1122 | bm_page_io_async(ctx, i); |
1122 | ++count; | 1123 | ++count; |
1123 | cond_resched(); | 1124 | cond_resched(); |
1124 | } | 1125 | } |
@@ -1134,12 +1135,12 @@ static int bm_rw(struct drbd_device *device, int rw, unsigned flags, unsigned la | |||
1134 | if (!atomic_dec_and_test(&ctx->in_flight)) | 1135 | if (!atomic_dec_and_test(&ctx->in_flight)) |
1135 | wait_until_done_or_force_detached(device, device->ldev, &ctx->done); | 1136 | wait_until_done_or_force_detached(device, device->ldev, &ctx->done); |
1136 | else | 1137 | else |
1137 | kref_put(&ctx->kref, &bm_aio_ctx_destroy); | 1138 | kref_put(&ctx->kref, &drbd_bm_aio_ctx_destroy); |
1138 | 1139 | ||
1139 | /* summary for global bitmap IO */ | 1140 | /* summary for global bitmap IO */ |
1140 | if (flags == 0) | 1141 | if (flags == 0) |
1141 | drbd_info(device, "bitmap %s of %u pages took %lu jiffies\n", | 1142 | drbd_info(device, "bitmap %s of %u pages took %lu jiffies\n", |
1142 | rw == WRITE ? "WRITE" : "READ", | 1143 | (flags & BM_AIO_READ) ? "READ" : "WRITE", |
1143 | count, jiffies - now); | 1144 | count, jiffies - now); |
1144 | 1145 | ||
1145 | if (ctx->error) { | 1146 | if (ctx->error) { |
@@ -1152,20 +1153,18 @@ static int bm_rw(struct drbd_device *device, int rw, unsigned flags, unsigned la | |||
1152 | err = -EIO; /* Disk timeout/force-detach during IO... */ | 1153 | err = -EIO; /* Disk timeout/force-detach during IO... */ |
1153 | 1154 | ||
1154 | now = jiffies; | 1155 | now = jiffies; |
1155 | if (rw == WRITE) { | 1156 | if (flags & BM_AIO_READ) { |
1156 | drbd_md_flush(device); | ||
1157 | } else /* rw == READ */ { | ||
1158 | b->bm_set = bm_count_bits(b); | 1157 | b->bm_set = bm_count_bits(b); |
1159 | drbd_info(device, "recounting of set bits took additional %lu jiffies\n", | 1158 | drbd_info(device, "recounting of set bits took additional %lu jiffies\n", |
1160 | jiffies - now); | 1159 | jiffies - now); |
1161 | } | 1160 | } |
1162 | now = b->bm_set; | 1161 | now = b->bm_set; |
1163 | 1162 | ||
1164 | if (flags == 0) | 1163 | if ((flags & ~BM_AIO_READ) == 0) |
1165 | drbd_info(device, "%s (%lu bits) marked out-of-sync by on disk bit-map.\n", | 1164 | drbd_info(device, "%s (%lu bits) marked out-of-sync by on disk bit-map.\n", |
1166 | ppsize(ppb, now << (BM_BLOCK_SHIFT-10)), now); | 1165 | ppsize(ppb, now << (BM_BLOCK_SHIFT-10)), now); |
1167 | 1166 | ||
1168 | kref_put(&ctx->kref, &bm_aio_ctx_destroy); | 1167 | kref_put(&ctx->kref, &drbd_bm_aio_ctx_destroy); |
1169 | return err; | 1168 | return err; |
1170 | } | 1169 | } |
1171 | 1170 | ||
@@ -1175,7 +1174,7 @@ static int bm_rw(struct drbd_device *device, int rw, unsigned flags, unsigned la | |||
1175 | */ | 1174 | */ |
1176 | int drbd_bm_read(struct drbd_device *device) __must_hold(local) | 1175 | int drbd_bm_read(struct drbd_device *device) __must_hold(local) |
1177 | { | 1176 | { |
1178 | return bm_rw(device, READ, 0, 0); | 1177 | return bm_rw(device, BM_AIO_READ, 0); |
1179 | } | 1178 | } |
1180 | 1179 | ||
1181 | /** | 1180 | /** |
@@ -1186,7 +1185,7 @@ int drbd_bm_read(struct drbd_device *device) __must_hold(local) | |||
1186 | */ | 1185 | */ |
1187 | int drbd_bm_write(struct drbd_device *device) __must_hold(local) | 1186 | int drbd_bm_write(struct drbd_device *device) __must_hold(local) |
1188 | { | 1187 | { |
1189 | return bm_rw(device, WRITE, 0, 0); | 1188 | return bm_rw(device, 0, 0); |
1190 | } | 1189 | } |
1191 | 1190 | ||
1192 | /** | 1191 | /** |
@@ -1197,7 +1196,17 @@ int drbd_bm_write(struct drbd_device *device) __must_hold(local) | |||
1197 | */ | 1196 | */ |
1198 | int drbd_bm_write_all(struct drbd_device *device) __must_hold(local) | 1197 | int drbd_bm_write_all(struct drbd_device *device) __must_hold(local) |
1199 | { | 1198 | { |
1200 | return bm_rw(device, WRITE, BM_WRITE_ALL_PAGES, 0); | 1199 | return bm_rw(device, BM_AIO_WRITE_ALL_PAGES, 0); |
1200 | } | ||
1201 | |||
1202 | /** | ||
1203 | * drbd_bm_write_lazy() - Write bitmap pages 0 to @upper_idx-1, if they have changed. | ||
1204 | * @device: DRBD device. | ||
1205 | * @upper_idx: 0: write all changed pages; +ve: page index to stop scanning for changed pages | ||
1206 | */ | ||
1207 | int drbd_bm_write_lazy(struct drbd_device *device, unsigned upper_idx) __must_hold(local) | ||
1208 | { | ||
1209 | return bm_rw(device, BM_AIO_COPY_PAGES, upper_idx); | ||
1201 | } | 1210 | } |
1202 | 1211 | ||
1203 | /** | 1212 | /** |
@@ -1213,7 +1222,7 @@ int drbd_bm_write_all(struct drbd_device *device) __must_hold(local) | |||
1213 | */ | 1222 | */ |
1214 | int drbd_bm_write_copy_pages(struct drbd_device *device) __must_hold(local) | 1223 | int drbd_bm_write_copy_pages(struct drbd_device *device) __must_hold(local) |
1215 | { | 1224 | { |
1216 | return bm_rw(device, WRITE, BM_AIO_COPY_PAGES, 0); | 1225 | return bm_rw(device, BM_AIO_COPY_PAGES, 0); |
1217 | } | 1226 | } |
1218 | 1227 | ||
1219 | /** | 1228 | /** |
@@ -1222,62 +1231,7 @@ int drbd_bm_write_copy_pages(struct drbd_device *device) __must_hold(local) | |||
1222 | */ | 1231 | */ |
1223 | int drbd_bm_write_hinted(struct drbd_device *device) __must_hold(local) | 1232 | int drbd_bm_write_hinted(struct drbd_device *device) __must_hold(local) |
1224 | { | 1233 | { |
1225 | return bm_rw(device, WRITE, BM_AIO_WRITE_HINTED | BM_AIO_COPY_PAGES, 0); | 1234 | return bm_rw(device, BM_AIO_WRITE_HINTED | BM_AIO_COPY_PAGES, 0); |
1226 | } | ||
1227 | |||
1228 | /** | ||
1229 | * drbd_bm_write_page() - Writes a PAGE_SIZE aligned piece of bitmap | ||
1230 | * @device: DRBD device. | ||
1231 | * @idx: bitmap page index | ||
1232 | * | ||
1233 | * We don't want to special case on logical_block_size of the backend device, | ||
1234 | * so we submit PAGE_SIZE aligned pieces. | ||
1235 | * Note that on "most" systems, PAGE_SIZE is 4k. | ||
1236 | * | ||
1237 | * In case this becomes an issue on systems with larger PAGE_SIZE, | ||
1238 | * we may want to change this again to write 4k aligned 4k pieces. | ||
1239 | */ | ||
1240 | int drbd_bm_write_page(struct drbd_device *device, unsigned int idx) __must_hold(local) | ||
1241 | { | ||
1242 | struct bm_aio_ctx *ctx; | ||
1243 | int err; | ||
1244 | |||
1245 | if (bm_test_page_unchanged(device->bitmap->bm_pages[idx])) { | ||
1246 | dynamic_drbd_dbg(device, "skipped bm page write for idx %u\n", idx); | ||
1247 | return 0; | ||
1248 | } | ||
1249 | |||
1250 | ctx = kmalloc(sizeof(struct bm_aio_ctx), GFP_NOIO); | ||
1251 | if (!ctx) | ||
1252 | return -ENOMEM; | ||
1253 | |||
1254 | *ctx = (struct bm_aio_ctx) { | ||
1255 | .device = device, | ||
1256 | .in_flight = ATOMIC_INIT(1), | ||
1257 | .done = 0, | ||
1258 | .flags = BM_AIO_COPY_PAGES, | ||
1259 | .error = 0, | ||
1260 | .kref = { ATOMIC_INIT(2) }, | ||
1261 | }; | ||
1262 | |||
1263 | if (!get_ldev_if_state(device, D_ATTACHING)) { /* put is in bm_aio_ctx_destroy() */ | ||
1264 | drbd_err(device, "ASSERT FAILED: get_ldev_if_state() == 1 in drbd_bm_write_page()\n"); | ||
1265 | kfree(ctx); | ||
1266 | return -ENODEV; | ||
1267 | } | ||
1268 | |||
1269 | bm_page_io_async(ctx, idx, WRITE_SYNC); | ||
1270 | wait_until_done_or_force_detached(device, device->ldev, &ctx->done); | ||
1271 | |||
1272 | if (ctx->error) | ||
1273 | drbd_chk_io_error(device, 1, DRBD_META_IO_ERROR); | ||
1274 | /* that causes us to detach, so the in memory bitmap will be | ||
1275 | * gone in a moment as well. */ | ||
1276 | |||
1277 | device->bm_writ_cnt++; | ||
1278 | err = atomic_read(&ctx->in_flight) ? -EIO : ctx->error; | ||
1279 | kref_put(&ctx->kref, &bm_aio_ctx_destroy); | ||
1280 | return err; | ||
1281 | } | 1235 | } |
1282 | 1236 | ||
1283 | /* NOTE | 1237 | /* NOTE |
diff --git a/drivers/block/drbd/drbd_debugfs.c b/drivers/block/drbd/drbd_debugfs.c new file mode 100644 index 000000000000..5c20b18540b8 --- /dev/null +++ b/drivers/block/drbd/drbd_debugfs.c | |||
@@ -0,0 +1,958 @@ | |||
1 | #define pr_fmt(fmt) "drbd debugfs: " fmt | ||
2 | #include <linux/kernel.h> | ||
3 | #include <linux/module.h> | ||
4 | #include <linux/debugfs.h> | ||
5 | #include <linux/seq_file.h> | ||
6 | #include <linux/stat.h> | ||
7 | #include <linux/jiffies.h> | ||
8 | #include <linux/list.h> | ||
9 | |||
10 | #include "drbd_int.h" | ||
11 | #include "drbd_req.h" | ||
12 | #include "drbd_debugfs.h" | ||
13 | |||
14 | |||
15 | /********************************************************************** | ||
16 | * Whenever you change the file format, remember to bump the version. * | ||
17 | **********************************************************************/ | ||
18 | |||
19 | static struct dentry *drbd_debugfs_root; | ||
20 | static struct dentry *drbd_debugfs_version; | ||
21 | static struct dentry *drbd_debugfs_resources; | ||
22 | static struct dentry *drbd_debugfs_minors; | ||
23 | |||
24 | static void seq_print_age_or_dash(struct seq_file *m, bool valid, unsigned long dt) | ||
25 | { | ||
26 | if (valid) | ||
27 | seq_printf(m, "\t%d", jiffies_to_msecs(dt)); | ||
28 | else | ||
29 | seq_printf(m, "\t-"); | ||
30 | } | ||
31 | |||
32 | static void __seq_print_rq_state_bit(struct seq_file *m, | ||
33 | bool is_set, char *sep, const char *set_name, const char *unset_name) | ||
34 | { | ||
35 | if (is_set && set_name) { | ||
36 | seq_putc(m, *sep); | ||
37 | seq_puts(m, set_name); | ||
38 | *sep = '|'; | ||
39 | } else if (!is_set && unset_name) { | ||
40 | seq_putc(m, *sep); | ||
41 | seq_puts(m, unset_name); | ||
42 | *sep = '|'; | ||
43 | } | ||
44 | } | ||
45 | |||
46 | static void seq_print_rq_state_bit(struct seq_file *m, | ||
47 | bool is_set, char *sep, const char *set_name) | ||
48 | { | ||
49 | __seq_print_rq_state_bit(m, is_set, sep, set_name, NULL); | ||
50 | } | ||
51 | |||
52 | /* pretty print enum drbd_req_state_bits req->rq_state */ | ||
53 | static void seq_print_request_state(struct seq_file *m, struct drbd_request *req) | ||
54 | { | ||
55 | unsigned int s = req->rq_state; | ||
56 | char sep = ' '; | ||
57 | seq_printf(m, "\t0x%08x", s); | ||
58 | seq_printf(m, "\tmaster: %s", req->master_bio ? "pending" : "completed"); | ||
59 | |||
60 | /* RQ_WRITE ignored, already reported */ | ||
61 | seq_puts(m, "\tlocal:"); | ||
62 | seq_print_rq_state_bit(m, s & RQ_IN_ACT_LOG, &sep, "in-AL"); | ||
63 | seq_print_rq_state_bit(m, s & RQ_POSTPONED, &sep, "postponed"); | ||
64 | seq_print_rq_state_bit(m, s & RQ_COMPLETION_SUSP, &sep, "suspended"); | ||
65 | sep = ' '; | ||
66 | seq_print_rq_state_bit(m, s & RQ_LOCAL_PENDING, &sep, "pending"); | ||
67 | seq_print_rq_state_bit(m, s & RQ_LOCAL_COMPLETED, &sep, "completed"); | ||
68 | seq_print_rq_state_bit(m, s & RQ_LOCAL_ABORTED, &sep, "aborted"); | ||
69 | seq_print_rq_state_bit(m, s & RQ_LOCAL_OK, &sep, "ok"); | ||
70 | if (sep == ' ') | ||
71 | seq_puts(m, " -"); | ||
72 | |||
73 | /* for_each_connection ... */ | ||
74 | seq_printf(m, "\tnet:"); | ||
75 | sep = ' '; | ||
76 | seq_print_rq_state_bit(m, s & RQ_NET_PENDING, &sep, "pending"); | ||
77 | seq_print_rq_state_bit(m, s & RQ_NET_QUEUED, &sep, "queued"); | ||
78 | seq_print_rq_state_bit(m, s & RQ_NET_SENT, &sep, "sent"); | ||
79 | seq_print_rq_state_bit(m, s & RQ_NET_DONE, &sep, "done"); | ||
80 | seq_print_rq_state_bit(m, s & RQ_NET_SIS, &sep, "sis"); | ||
81 | seq_print_rq_state_bit(m, s & RQ_NET_OK, &sep, "ok"); | ||
82 | if (sep == ' ') | ||
83 | seq_puts(m, " -"); | ||
84 | |||
85 | seq_printf(m, " :"); | ||
86 | sep = ' '; | ||
87 | seq_print_rq_state_bit(m, s & RQ_EXP_RECEIVE_ACK, &sep, "B"); | ||
88 | seq_print_rq_state_bit(m, s & RQ_EXP_WRITE_ACK, &sep, "C"); | ||
89 | seq_print_rq_state_bit(m, s & RQ_EXP_BARR_ACK, &sep, "barr"); | ||
90 | if (sep == ' ') | ||
91 | seq_puts(m, " -"); | ||
92 | seq_printf(m, "\n"); | ||
93 | } | ||
94 | |||
95 | static void seq_print_one_request(struct seq_file *m, struct drbd_request *req, unsigned long now) | ||
96 | { | ||
97 | /* change anything here, fixup header below! */ | ||
98 | unsigned int s = req->rq_state; | ||
99 | |||
100 | #define RQ_HDR_1 "epoch\tsector\tsize\trw" | ||
101 | seq_printf(m, "0x%x\t%llu\t%u\t%s", | ||
102 | req->epoch, | ||
103 | (unsigned long long)req->i.sector, req->i.size >> 9, | ||
104 | (s & RQ_WRITE) ? "W" : "R"); | ||
105 | |||
106 | #define RQ_HDR_2 "\tstart\tin AL\tsubmit" | ||
107 | seq_printf(m, "\t%d", jiffies_to_msecs(now - req->start_jif)); | ||
108 | seq_print_age_or_dash(m, s & RQ_IN_ACT_LOG, now - req->in_actlog_jif); | ||
109 | seq_print_age_or_dash(m, s & RQ_LOCAL_PENDING, now - req->pre_submit_jif); | ||
110 | |||
111 | #define RQ_HDR_3 "\tsent\tacked\tdone" | ||
112 | seq_print_age_or_dash(m, s & RQ_NET_SENT, now - req->pre_send_jif); | ||
113 | seq_print_age_or_dash(m, (s & RQ_NET_SENT) && !(s & RQ_NET_PENDING), now - req->acked_jif); | ||
114 | seq_print_age_or_dash(m, s & RQ_NET_DONE, now - req->net_done_jif); | ||
115 | |||
116 | #define RQ_HDR_4 "\tstate\n" | ||
117 | seq_print_request_state(m, req); | ||
118 | } | ||
119 | #define RQ_HDR RQ_HDR_1 RQ_HDR_2 RQ_HDR_3 RQ_HDR_4 | ||
120 | |||
121 | static void seq_print_minor_vnr_req(struct seq_file *m, struct drbd_request *req, unsigned long now) | ||
122 | { | ||
123 | seq_printf(m, "%u\t%u\t", req->device->minor, req->device->vnr); | ||
124 | seq_print_one_request(m, req, now); | ||
125 | } | ||
126 | |||
127 | static void seq_print_resource_pending_meta_io(struct seq_file *m, struct drbd_resource *resource, unsigned long now) | ||
128 | { | ||
129 | struct drbd_device *device; | ||
130 | unsigned int i; | ||
131 | |||
132 | seq_puts(m, "minor\tvnr\tstart\tsubmit\tintent\n"); | ||
133 | rcu_read_lock(); | ||
134 | idr_for_each_entry(&resource->devices, device, i) { | ||
135 | struct drbd_md_io tmp; | ||
136 | /* In theory this is racy, | ||
137 | * in the sense that there could have been a | ||
138 | * drbd_md_put_buffer(); drbd_md_get_buffer(); | ||
139 | * between accessing these members here. */ | ||
140 | tmp = device->md_io; | ||
141 | if (atomic_read(&tmp.in_use)) { | ||
142 | seq_printf(m, "%u\t%u\t%d\t", | ||
143 | device->minor, device->vnr, | ||
144 | jiffies_to_msecs(now - tmp.start_jif)); | ||
145 | if (time_before(tmp.submit_jif, tmp.start_jif)) | ||
146 | seq_puts(m, "-\t"); | ||
147 | else | ||
148 | seq_printf(m, "%d\t", jiffies_to_msecs(now - tmp.submit_jif)); | ||
149 | seq_printf(m, "%s\n", tmp.current_use); | ||
150 | } | ||
151 | } | ||
152 | rcu_read_unlock(); | ||
153 | } | ||
154 | |||
155 | static void seq_print_waiting_for_AL(struct seq_file *m, struct drbd_resource *resource, unsigned long now) | ||
156 | { | ||
157 | struct drbd_device *device; | ||
158 | unsigned int i; | ||
159 | |||
160 | seq_puts(m, "minor\tvnr\tage\t#waiting\n"); | ||
161 | rcu_read_lock(); | ||
162 | idr_for_each_entry(&resource->devices, device, i) { | ||
163 | unsigned long jif; | ||
164 | struct drbd_request *req; | ||
165 | int n = atomic_read(&device->ap_actlog_cnt); | ||
166 | if (n) { | ||
167 | spin_lock_irq(&device->resource->req_lock); | ||
168 | req = list_first_entry_or_null(&device->pending_master_completion[1], | ||
169 | struct drbd_request, req_pending_master_completion); | ||
170 | /* if the oldest request does not wait for the activity log | ||
171 | * it is not interesting for us here */ | ||
172 | if (req && !(req->rq_state & RQ_IN_ACT_LOG)) | ||
173 | jif = req->start_jif; | ||
174 | else | ||
175 | req = NULL; | ||
176 | spin_unlock_irq(&device->resource->req_lock); | ||
177 | } | ||
178 | if (n) { | ||
179 | seq_printf(m, "%u\t%u\t", device->minor, device->vnr); | ||
180 | if (req) | ||
181 | seq_printf(m, "%u\t", jiffies_to_msecs(now - jif)); | ||
182 | else | ||
183 | seq_puts(m, "-\t"); | ||
184 | seq_printf(m, "%u\n", n); | ||
185 | } | ||
186 | } | ||
187 | rcu_read_unlock(); | ||
188 | } | ||
189 | |||
190 | static void seq_print_device_bitmap_io(struct seq_file *m, struct drbd_device *device, unsigned long now) | ||
191 | { | ||
192 | struct drbd_bm_aio_ctx *ctx; | ||
193 | unsigned long start_jif; | ||
194 | unsigned int in_flight; | ||
195 | unsigned int flags; | ||
196 | spin_lock_irq(&device->resource->req_lock); | ||
197 | ctx = list_first_entry_or_null(&device->pending_bitmap_io, struct drbd_bm_aio_ctx, list); | ||
198 | if (ctx && ctx->done) | ||
199 | ctx = NULL; | ||
200 | if (ctx) { | ||
201 | start_jif = ctx->start_jif; | ||
202 | in_flight = atomic_read(&ctx->in_flight); | ||
203 | flags = ctx->flags; | ||
204 | } | ||
205 | spin_unlock_irq(&device->resource->req_lock); | ||
206 | if (ctx) { | ||
207 | seq_printf(m, "%u\t%u\t%c\t%u\t%u\n", | ||
208 | device->minor, device->vnr, | ||
209 | (flags & BM_AIO_READ) ? 'R' : 'W', | ||
210 | jiffies_to_msecs(now - start_jif), | ||
211 | in_flight); | ||
212 | } | ||
213 | } | ||
214 | |||
215 | static void seq_print_resource_pending_bitmap_io(struct seq_file *m, struct drbd_resource *resource, unsigned long now) | ||
216 | { | ||
217 | struct drbd_device *device; | ||
218 | unsigned int i; | ||
219 | |||
220 | seq_puts(m, "minor\tvnr\trw\tage\t#in-flight\n"); | ||
221 | rcu_read_lock(); | ||
222 | idr_for_each_entry(&resource->devices, device, i) { | ||
223 | seq_print_device_bitmap_io(m, device, now); | ||
224 | } | ||
225 | rcu_read_unlock(); | ||
226 | } | ||
227 | |||
228 | /* pretty print enum peer_req->flags */ | ||
229 | static void seq_print_peer_request_flags(struct seq_file *m, struct drbd_peer_request *peer_req) | ||
230 | { | ||
231 | unsigned long f = peer_req->flags; | ||
232 | char sep = ' '; | ||
233 | |||
234 | __seq_print_rq_state_bit(m, f & EE_SUBMITTED, &sep, "submitted", "preparing"); | ||
235 | __seq_print_rq_state_bit(m, f & EE_APPLICATION, &sep, "application", "internal"); | ||
236 | seq_print_rq_state_bit(m, f & EE_CALL_AL_COMPLETE_IO, &sep, "in-AL"); | ||
237 | seq_print_rq_state_bit(m, f & EE_SEND_WRITE_ACK, &sep, "C"); | ||
238 | seq_print_rq_state_bit(m, f & EE_MAY_SET_IN_SYNC, &sep, "set-in-sync"); | ||
239 | |||
240 | if (f & EE_IS_TRIM) { | ||
241 | seq_putc(m, sep); | ||
242 | sep = '|'; | ||
243 | if (f & EE_IS_TRIM_USE_ZEROOUT) | ||
244 | seq_puts(m, "zero-out"); | ||
245 | else | ||
246 | seq_puts(m, "trim"); | ||
247 | } | ||
248 | seq_putc(m, '\n'); | ||
249 | } | ||
250 | |||
251 | static void seq_print_peer_request(struct seq_file *m, | ||
252 | struct drbd_device *device, struct list_head *lh, | ||
253 | unsigned long now) | ||
254 | { | ||
255 | bool reported_preparing = false; | ||
256 | struct drbd_peer_request *peer_req; | ||
257 | list_for_each_entry(peer_req, lh, w.list) { | ||
258 | if (reported_preparing && !(peer_req->flags & EE_SUBMITTED)) | ||
259 | continue; | ||
260 | |||
261 | if (device) | ||
262 | seq_printf(m, "%u\t%u\t", device->minor, device->vnr); | ||
263 | |||
264 | seq_printf(m, "%llu\t%u\t%c\t%u\t", | ||
265 | (unsigned long long)peer_req->i.sector, peer_req->i.size >> 9, | ||
266 | (peer_req->flags & EE_WRITE) ? 'W' : 'R', | ||
267 | jiffies_to_msecs(now - peer_req->submit_jif)); | ||
268 | seq_print_peer_request_flags(m, peer_req); | ||
269 | if (peer_req->flags & EE_SUBMITTED) | ||
270 | break; | ||
271 | else | ||
272 | reported_preparing = true; | ||
273 | } | ||
274 | } | ||
275 | |||
276 | static void seq_print_device_peer_requests(struct seq_file *m, | ||
277 | struct drbd_device *device, unsigned long now) | ||
278 | { | ||
279 | seq_puts(m, "minor\tvnr\tsector\tsize\trw\tage\tflags\n"); | ||
280 | spin_lock_irq(&device->resource->req_lock); | ||
281 | seq_print_peer_request(m, device, &device->active_ee, now); | ||
282 | seq_print_peer_request(m, device, &device->read_ee, now); | ||
283 | seq_print_peer_request(m, device, &device->sync_ee, now); | ||
284 | spin_unlock_irq(&device->resource->req_lock); | ||
285 | if (test_bit(FLUSH_PENDING, &device->flags)) { | ||
286 | seq_printf(m, "%u\t%u\t-\t-\tF\t%u\tflush\n", | ||
287 | device->minor, device->vnr, | ||
288 | jiffies_to_msecs(now - device->flush_jif)); | ||
289 | } | ||
290 | } | ||
291 | |||
292 | static void seq_print_resource_pending_peer_requests(struct seq_file *m, | ||
293 | struct drbd_resource *resource, unsigned long now) | ||
294 | { | ||
295 | struct drbd_device *device; | ||
296 | unsigned int i; | ||
297 | |||
298 | rcu_read_lock(); | ||
299 | idr_for_each_entry(&resource->devices, device, i) { | ||
300 | seq_print_device_peer_requests(m, device, now); | ||
301 | } | ||
302 | rcu_read_unlock(); | ||
303 | } | ||
304 | |||
305 | static void seq_print_resource_transfer_log_summary(struct seq_file *m, | ||
306 | struct drbd_resource *resource, | ||
307 | struct drbd_connection *connection, | ||
308 | unsigned long now) | ||
309 | { | ||
310 | struct drbd_request *req; | ||
311 | unsigned int count = 0; | ||
312 | unsigned int show_state = 0; | ||
313 | |||
314 | seq_puts(m, "n\tdevice\tvnr\t" RQ_HDR); | ||
315 | spin_lock_irq(&resource->req_lock); | ||
316 | list_for_each_entry(req, &connection->transfer_log, tl_requests) { | ||
317 | unsigned int tmp = 0; | ||
318 | unsigned int s; | ||
319 | ++count; | ||
320 | |||
321 | /* don't disable irq "forever" */ | ||
322 | if (!(count & 0x1ff)) { | ||
323 | struct drbd_request *req_next; | ||
324 | kref_get(&req->kref); | ||
325 | spin_unlock_irq(&resource->req_lock); | ||
326 | cond_resched(); | ||
327 | spin_lock_irq(&resource->req_lock); | ||
328 | req_next = list_next_entry(req, tl_requests); | ||
329 | if (kref_put(&req->kref, drbd_req_destroy)) | ||
330 | req = req_next; | ||
331 | if (&req->tl_requests == &connection->transfer_log) | ||
332 | break; | ||
333 | } | ||
334 | |||
335 | s = req->rq_state; | ||
336 | |||
337 | /* This is meant to summarize timing issues, to be able to tell | ||
338 | * local disk problems from network problems. | ||
339 | * Skip requests, if we have shown an even older request with | ||
340 | * similar aspects already. */ | ||
341 | if (req->master_bio == NULL) | ||
342 | tmp |= 1; | ||
343 | if ((s & RQ_LOCAL_MASK) && (s & RQ_LOCAL_PENDING)) | ||
344 | tmp |= 2; | ||
345 | if (s & RQ_NET_MASK) { | ||
346 | if (!(s & RQ_NET_SENT)) | ||
347 | tmp |= 4; | ||
348 | if (s & RQ_NET_PENDING) | ||
349 | tmp |= 8; | ||
350 | if (!(s & RQ_NET_DONE)) | ||
351 | tmp |= 16; | ||
352 | } | ||
353 | if ((tmp & show_state) == tmp) | ||
354 | continue; | ||
355 | show_state |= tmp; | ||
356 | seq_printf(m, "%u\t", count); | ||
357 | seq_print_minor_vnr_req(m, req, now); | ||
358 | if (show_state == 0x1f) | ||
359 | break; | ||
360 | } | ||
361 | spin_unlock_irq(&resource->req_lock); | ||
362 | } | ||
363 | |||
364 | /* TODO: transfer_log and friends should be moved to resource */ | ||
365 | static int in_flight_summary_show(struct seq_file *m, void *pos) | ||
366 | { | ||
367 | struct drbd_resource *resource = m->private; | ||
368 | struct drbd_connection *connection; | ||
369 | unsigned long jif = jiffies; | ||
370 | |||
371 | connection = first_connection(resource); | ||
372 | /* This does not happen, actually. | ||
373 | * But be robust and prepare for future code changes. */ | ||
374 | if (!connection || !kref_get_unless_zero(&connection->kref)) | ||
375 | return -ESTALE; | ||
376 | |||
377 | /* BUMP me if you change the file format/content/presentation */ | ||
378 | seq_printf(m, "v: %u\n\n", 0); | ||
379 | |||
380 | seq_puts(m, "oldest bitmap IO\n"); | ||
381 | seq_print_resource_pending_bitmap_io(m, resource, jif); | ||
382 | seq_putc(m, '\n'); | ||
383 | |||
384 | seq_puts(m, "meta data IO\n"); | ||
385 | seq_print_resource_pending_meta_io(m, resource, jif); | ||
386 | seq_putc(m, '\n'); | ||
387 | |||
388 | seq_puts(m, "socket buffer stats\n"); | ||
389 | /* for each connection ... once we have more than one */ | ||
390 | rcu_read_lock(); | ||
391 | if (connection->data.socket) { | ||
392 | /* open coded SIOCINQ, the "relevant" part */ | ||
393 | struct tcp_sock *tp = tcp_sk(connection->data.socket->sk); | ||
394 | int answ = tp->rcv_nxt - tp->copied_seq; | ||
395 | seq_printf(m, "unread receive buffer: %u Byte\n", answ); | ||
396 | /* open coded SIOCOUTQ, the "relevant" part */ | ||
397 | answ = tp->write_seq - tp->snd_una; | ||
398 | seq_printf(m, "unacked send buffer: %u Byte\n", answ); | ||
399 | } | ||
400 | rcu_read_unlock(); | ||
401 | seq_putc(m, '\n'); | ||
402 | |||
403 | seq_puts(m, "oldest peer requests\n"); | ||
404 | seq_print_resource_pending_peer_requests(m, resource, jif); | ||
405 | seq_putc(m, '\n'); | ||
406 | |||
407 | seq_puts(m, "application requests waiting for activity log\n"); | ||
408 | seq_print_waiting_for_AL(m, resource, jif); | ||
409 | seq_putc(m, '\n'); | ||
410 | |||
411 | seq_puts(m, "oldest application requests\n"); | ||
412 | seq_print_resource_transfer_log_summary(m, resource, connection, jif); | ||
413 | seq_putc(m, '\n'); | ||
414 | |||
415 | jif = jiffies - jif; | ||
416 | if (jif) | ||
417 | seq_printf(m, "generated in %d ms\n", jiffies_to_msecs(jif)); | ||
418 | kref_put(&connection->kref, drbd_destroy_connection); | ||
419 | return 0; | ||
420 | } | ||
421 | |||
422 | /* simple_positive(file->f_dentry) respectively debugfs_positive(), | ||
423 | * but neither is "reachable" from here. | ||
424 | * So we have our own inline version of it above. :-( */ | ||
425 | static inline int debugfs_positive(struct dentry *dentry) | ||
426 | { | ||
427 | return dentry->d_inode && !d_unhashed(dentry); | ||
428 | } | ||
429 | |||
430 | /* make sure at *open* time that the respective object won't go away. */ | ||
431 | static int drbd_single_open(struct file *file, int (*show)(struct seq_file *, void *), | ||
432 | void *data, struct kref *kref, | ||
433 | void (*release)(struct kref *)) | ||
434 | { | ||
435 | struct dentry *parent; | ||
436 | int ret = -ESTALE; | ||
437 | |||
438 | /* Are we still linked, | ||
439 | * or has debugfs_remove() already been called? */ | ||
440 | parent = file->f_dentry->d_parent; | ||
441 | /* not sure if this can happen: */ | ||
442 | if (!parent || !parent->d_inode) | ||
443 | goto out; | ||
444 | /* serialize with d_delete() */ | ||
445 | mutex_lock(&parent->d_inode->i_mutex); | ||
446 | /* Make sure the object is still alive */ | ||
447 | if (debugfs_positive(file->f_dentry) | ||
448 | && kref_get_unless_zero(kref)) | ||
449 | ret = 0; | ||
450 | mutex_unlock(&parent->d_inode->i_mutex); | ||
451 | if (!ret) { | ||
452 | ret = single_open(file, show, data); | ||
453 | if (ret) | ||
454 | kref_put(kref, release); | ||
455 | } | ||
456 | out: | ||
457 | return ret; | ||
458 | } | ||
459 | |||
460 | static int in_flight_summary_open(struct inode *inode, struct file *file) | ||
461 | { | ||
462 | struct drbd_resource *resource = inode->i_private; | ||
463 | return drbd_single_open(file, in_flight_summary_show, resource, | ||
464 | &resource->kref, drbd_destroy_resource); | ||
465 | } | ||
466 | |||
467 | static int in_flight_summary_release(struct inode *inode, struct file *file) | ||
468 | { | ||
469 | struct drbd_resource *resource = inode->i_private; | ||
470 | kref_put(&resource->kref, drbd_destroy_resource); | ||
471 | return single_release(inode, file); | ||
472 | } | ||
473 | |||
474 | static const struct file_operations in_flight_summary_fops = { | ||
475 | .owner = THIS_MODULE, | ||
476 | .open = in_flight_summary_open, | ||
477 | .read = seq_read, | ||
478 | .llseek = seq_lseek, | ||
479 | .release = in_flight_summary_release, | ||
480 | }; | ||
481 | |||
482 | void drbd_debugfs_resource_add(struct drbd_resource *resource) | ||
483 | { | ||
484 | struct dentry *dentry; | ||
485 | if (!drbd_debugfs_resources) | ||
486 | return; | ||
487 | |||
488 | dentry = debugfs_create_dir(resource->name, drbd_debugfs_resources); | ||
489 | if (IS_ERR_OR_NULL(dentry)) | ||
490 | goto fail; | ||
491 | resource->debugfs_res = dentry; | ||
492 | |||
493 | dentry = debugfs_create_dir("volumes", resource->debugfs_res); | ||
494 | if (IS_ERR_OR_NULL(dentry)) | ||
495 | goto fail; | ||
496 | resource->debugfs_res_volumes = dentry; | ||
497 | |||
498 | dentry = debugfs_create_dir("connections", resource->debugfs_res); | ||
499 | if (IS_ERR_OR_NULL(dentry)) | ||
500 | goto fail; | ||
501 | resource->debugfs_res_connections = dentry; | ||
502 | |||
503 | dentry = debugfs_create_file("in_flight_summary", S_IRUSR|S_IRGRP, | ||
504 | resource->debugfs_res, resource, | ||
505 | &in_flight_summary_fops); | ||
506 | if (IS_ERR_OR_NULL(dentry)) | ||
507 | goto fail; | ||
508 | resource->debugfs_res_in_flight_summary = dentry; | ||
509 | return; | ||
510 | |||
511 | fail: | ||
512 | drbd_debugfs_resource_cleanup(resource); | ||
513 | drbd_err(resource, "failed to create debugfs dentry\n"); | ||
514 | } | ||
515 | |||
516 | static void drbd_debugfs_remove(struct dentry **dp) | ||
517 | { | ||
518 | debugfs_remove(*dp); | ||
519 | *dp = NULL; | ||
520 | } | ||
521 | |||
522 | void drbd_debugfs_resource_cleanup(struct drbd_resource *resource) | ||
523 | { | ||
524 | /* it is ok to call debugfs_remove(NULL) */ | ||
525 | drbd_debugfs_remove(&resource->debugfs_res_in_flight_summary); | ||
526 | drbd_debugfs_remove(&resource->debugfs_res_connections); | ||
527 | drbd_debugfs_remove(&resource->debugfs_res_volumes); | ||
528 | drbd_debugfs_remove(&resource->debugfs_res); | ||
529 | } | ||
530 | |||
531 | static void seq_print_one_timing_detail(struct seq_file *m, | ||
532 | const struct drbd_thread_timing_details *tdp, | ||
533 | unsigned long now) | ||
534 | { | ||
535 | struct drbd_thread_timing_details td; | ||
536 | /* No locking... | ||
537 | * use temporary assignment to get at consistent data. */ | ||
538 | do { | ||
539 | td = *tdp; | ||
540 | } while (td.cb_nr != tdp->cb_nr); | ||
541 | if (!td.cb_addr) | ||
542 | return; | ||
543 | seq_printf(m, "%u\t%d\t%s:%u\t%ps\n", | ||
544 | td.cb_nr, | ||
545 | jiffies_to_msecs(now - td.start_jif), | ||
546 | td.caller_fn, td.line, | ||
547 | td.cb_addr); | ||
548 | } | ||
549 | |||
550 | static void seq_print_timing_details(struct seq_file *m, | ||
551 | const char *title, | ||
552 | unsigned int cb_nr, struct drbd_thread_timing_details *tdp, unsigned long now) | ||
553 | { | ||
554 | unsigned int start_idx; | ||
555 | unsigned int i; | ||
556 | |||
557 | seq_printf(m, "%s\n", title); | ||
558 | /* If not much is going on, this will result in natural ordering. | ||
559 | * If it is very busy, we will possibly skip events, or even see wrap | ||
560 | * arounds, which could only be avoided with locking. | ||
561 | */ | ||
562 | start_idx = cb_nr % DRBD_THREAD_DETAILS_HIST; | ||
563 | for (i = start_idx; i < DRBD_THREAD_DETAILS_HIST; i++) | ||
564 | seq_print_one_timing_detail(m, tdp+i, now); | ||
565 | for (i = 0; i < start_idx; i++) | ||
566 | seq_print_one_timing_detail(m, tdp+i, now); | ||
567 | } | ||
568 | |||
569 | static int callback_history_show(struct seq_file *m, void *ignored) | ||
570 | { | ||
571 | struct drbd_connection *connection = m->private; | ||
572 | unsigned long jif = jiffies; | ||
573 | |||
574 | /* BUMP me if you change the file format/content/presentation */ | ||
575 | seq_printf(m, "v: %u\n\n", 0); | ||
576 | |||
577 | seq_puts(m, "n\tage\tcallsite\tfn\n"); | ||
578 | seq_print_timing_details(m, "worker", connection->w_cb_nr, connection->w_timing_details, jif); | ||
579 | seq_print_timing_details(m, "receiver", connection->r_cb_nr, connection->r_timing_details, jif); | ||
580 | return 0; | ||
581 | } | ||
582 | |||
583 | static int callback_history_open(struct inode *inode, struct file *file) | ||
584 | { | ||
585 | struct drbd_connection *connection = inode->i_private; | ||
586 | return drbd_single_open(file, callback_history_show, connection, | ||
587 | &connection->kref, drbd_destroy_connection); | ||
588 | } | ||
589 | |||
590 | static int callback_history_release(struct inode *inode, struct file *file) | ||
591 | { | ||
592 | struct drbd_connection *connection = inode->i_private; | ||
593 | kref_put(&connection->kref, drbd_destroy_connection); | ||
594 | return single_release(inode, file); | ||
595 | } | ||
596 | |||
597 | static const struct file_operations connection_callback_history_fops = { | ||
598 | .owner = THIS_MODULE, | ||
599 | .open = callback_history_open, | ||
600 | .read = seq_read, | ||
601 | .llseek = seq_lseek, | ||
602 | .release = callback_history_release, | ||
603 | }; | ||
604 | |||
605 | static int connection_oldest_requests_show(struct seq_file *m, void *ignored) | ||
606 | { | ||
607 | struct drbd_connection *connection = m->private; | ||
608 | unsigned long now = jiffies; | ||
609 | struct drbd_request *r1, *r2; | ||
610 | |||
611 | /* BUMP me if you change the file format/content/presentation */ | ||
612 | seq_printf(m, "v: %u\n\n", 0); | ||
613 | |||
614 | spin_lock_irq(&connection->resource->req_lock); | ||
615 | r1 = connection->req_next; | ||
616 | if (r1) | ||
617 | seq_print_minor_vnr_req(m, r1, now); | ||
618 | r2 = connection->req_ack_pending; | ||
619 | if (r2 && r2 != r1) { | ||
620 | r1 = r2; | ||
621 | seq_print_minor_vnr_req(m, r1, now); | ||
622 | } | ||
623 | r2 = connection->req_not_net_done; | ||
624 | if (r2 && r2 != r1) | ||
625 | seq_print_minor_vnr_req(m, r2, now); | ||
626 | spin_unlock_irq(&connection->resource->req_lock); | ||
627 | return 0; | ||
628 | } | ||
629 | |||
630 | static int connection_oldest_requests_open(struct inode *inode, struct file *file) | ||
631 | { | ||
632 | struct drbd_connection *connection = inode->i_private; | ||
633 | return drbd_single_open(file, connection_oldest_requests_show, connection, | ||
634 | &connection->kref, drbd_destroy_connection); | ||
635 | } | ||
636 | |||
637 | static int connection_oldest_requests_release(struct inode *inode, struct file *file) | ||
638 | { | ||
639 | struct drbd_connection *connection = inode->i_private; | ||
640 | kref_put(&connection->kref, drbd_destroy_connection); | ||
641 | return single_release(inode, file); | ||
642 | } | ||
643 | |||
644 | static const struct file_operations connection_oldest_requests_fops = { | ||
645 | .owner = THIS_MODULE, | ||
646 | .open = connection_oldest_requests_open, | ||
647 | .read = seq_read, | ||
648 | .llseek = seq_lseek, | ||
649 | .release = connection_oldest_requests_release, | ||
650 | }; | ||
651 | |||
652 | void drbd_debugfs_connection_add(struct drbd_connection *connection) | ||
653 | { | ||
654 | struct dentry *conns_dir = connection->resource->debugfs_res_connections; | ||
655 | struct dentry *dentry; | ||
656 | if (!conns_dir) | ||
657 | return; | ||
658 | |||
659 | /* Once we enable mutliple peers, | ||
660 | * these connections will have descriptive names. | ||
661 | * For now, it is just the one connection to the (only) "peer". */ | ||
662 | dentry = debugfs_create_dir("peer", conns_dir); | ||
663 | if (IS_ERR_OR_NULL(dentry)) | ||
664 | goto fail; | ||
665 | connection->debugfs_conn = dentry; | ||
666 | |||
667 | dentry = debugfs_create_file("callback_history", S_IRUSR|S_IRGRP, | ||
668 | connection->debugfs_conn, connection, | ||
669 | &connection_callback_history_fops); | ||
670 | if (IS_ERR_OR_NULL(dentry)) | ||
671 | goto fail; | ||
672 | connection->debugfs_conn_callback_history = dentry; | ||
673 | |||
674 | dentry = debugfs_create_file("oldest_requests", S_IRUSR|S_IRGRP, | ||
675 | connection->debugfs_conn, connection, | ||
676 | &connection_oldest_requests_fops); | ||
677 | if (IS_ERR_OR_NULL(dentry)) | ||
678 | goto fail; | ||
679 | connection->debugfs_conn_oldest_requests = dentry; | ||
680 | return; | ||
681 | |||
682 | fail: | ||
683 | drbd_debugfs_connection_cleanup(connection); | ||
684 | drbd_err(connection, "failed to create debugfs dentry\n"); | ||
685 | } | ||
686 | |||
687 | void drbd_debugfs_connection_cleanup(struct drbd_connection *connection) | ||
688 | { | ||
689 | drbd_debugfs_remove(&connection->debugfs_conn_callback_history); | ||
690 | drbd_debugfs_remove(&connection->debugfs_conn_oldest_requests); | ||
691 | drbd_debugfs_remove(&connection->debugfs_conn); | ||
692 | } | ||
693 | |||
694 | static void resync_dump_detail(struct seq_file *m, struct lc_element *e) | ||
695 | { | ||
696 | struct bm_extent *bme = lc_entry(e, struct bm_extent, lce); | ||
697 | |||
698 | seq_printf(m, "%5d %s %s %s\n", bme->rs_left, | ||
699 | test_bit(BME_NO_WRITES, &bme->flags) ? "NO_WRITES" : "---------", | ||
700 | test_bit(BME_LOCKED, &bme->flags) ? "LOCKED" : "------", | ||
701 | test_bit(BME_PRIORITY, &bme->flags) ? "PRIORITY" : "--------" | ||
702 | ); | ||
703 | } | ||
704 | |||
705 | static int device_resync_extents_show(struct seq_file *m, void *ignored) | ||
706 | { | ||
707 | struct drbd_device *device = m->private; | ||
708 | |||
709 | /* BUMP me if you change the file format/content/presentation */ | ||
710 | seq_printf(m, "v: %u\n\n", 0); | ||
711 | |||
712 | if (get_ldev_if_state(device, D_FAILED)) { | ||
713 | lc_seq_printf_stats(m, device->resync); | ||
714 | lc_seq_dump_details(m, device->resync, "rs_left flags", resync_dump_detail); | ||
715 | put_ldev(device); | ||
716 | } | ||
717 | return 0; | ||
718 | } | ||
719 | |||
720 | static int device_act_log_extents_show(struct seq_file *m, void *ignored) | ||
721 | { | ||
722 | struct drbd_device *device = m->private; | ||
723 | |||
724 | /* BUMP me if you change the file format/content/presentation */ | ||
725 | seq_printf(m, "v: %u\n\n", 0); | ||
726 | |||
727 | if (get_ldev_if_state(device, D_FAILED)) { | ||
728 | lc_seq_printf_stats(m, device->act_log); | ||
729 | lc_seq_dump_details(m, device->act_log, "", NULL); | ||
730 | put_ldev(device); | ||
731 | } | ||
732 | return 0; | ||
733 | } | ||
734 | |||
735 | static int device_oldest_requests_show(struct seq_file *m, void *ignored) | ||
736 | { | ||
737 | struct drbd_device *device = m->private; | ||
738 | struct drbd_resource *resource = device->resource; | ||
739 | unsigned long now = jiffies; | ||
740 | struct drbd_request *r1, *r2; | ||
741 | int i; | ||
742 | |||
743 | /* BUMP me if you change the file format/content/presentation */ | ||
744 | seq_printf(m, "v: %u\n\n", 0); | ||
745 | |||
746 | seq_puts(m, RQ_HDR); | ||
747 | spin_lock_irq(&resource->req_lock); | ||
748 | /* WRITE, then READ */ | ||
749 | for (i = 1; i >= 0; --i) { | ||
750 | r1 = list_first_entry_or_null(&device->pending_master_completion[i], | ||
751 | struct drbd_request, req_pending_master_completion); | ||
752 | r2 = list_first_entry_or_null(&device->pending_completion[i], | ||
753 | struct drbd_request, req_pending_local); | ||
754 | if (r1) | ||
755 | seq_print_one_request(m, r1, now); | ||
756 | if (r2 && r2 != r1) | ||
757 | seq_print_one_request(m, r2, now); | ||
758 | } | ||
759 | spin_unlock_irq(&resource->req_lock); | ||
760 | return 0; | ||
761 | } | ||
762 | |||
763 | static int device_data_gen_id_show(struct seq_file *m, void *ignored) | ||
764 | { | ||
765 | struct drbd_device *device = m->private; | ||
766 | struct drbd_md *md; | ||
767 | enum drbd_uuid_index idx; | ||
768 | |||
769 | if (!get_ldev_if_state(device, D_FAILED)) | ||
770 | return -ENODEV; | ||
771 | |||
772 | md = &device->ldev->md; | ||
773 | spin_lock_irq(&md->uuid_lock); | ||
774 | for (idx = UI_CURRENT; idx <= UI_HISTORY_END; idx++) { | ||
775 | seq_printf(m, "0x%016llX\n", md->uuid[idx]); | ||
776 | } | ||
777 | spin_unlock_irq(&md->uuid_lock); | ||
778 | put_ldev(device); | ||
779 | return 0; | ||
780 | } | ||
781 | |||
782 | #define drbd_debugfs_device_attr(name) \ | ||
783 | static int device_ ## name ## _open(struct inode *inode, struct file *file) \ | ||
784 | { \ | ||
785 | struct drbd_device *device = inode->i_private; \ | ||
786 | return drbd_single_open(file, device_ ## name ## _show, device, \ | ||
787 | &device->kref, drbd_destroy_device); \ | ||
788 | } \ | ||
789 | static int device_ ## name ## _release(struct inode *inode, struct file *file) \ | ||
790 | { \ | ||
791 | struct drbd_device *device = inode->i_private; \ | ||
792 | kref_put(&device->kref, drbd_destroy_device); \ | ||
793 | return single_release(inode, file); \ | ||
794 | } \ | ||
795 | static const struct file_operations device_ ## name ## _fops = { \ | ||
796 | .owner = THIS_MODULE, \ | ||
797 | .open = device_ ## name ## _open, \ | ||
798 | .read = seq_read, \ | ||
799 | .llseek = seq_lseek, \ | ||
800 | .release = device_ ## name ## _release, \ | ||
801 | }; | ||
802 | |||
803 | drbd_debugfs_device_attr(oldest_requests) | ||
804 | drbd_debugfs_device_attr(act_log_extents) | ||
805 | drbd_debugfs_device_attr(resync_extents) | ||
806 | drbd_debugfs_device_attr(data_gen_id) | ||
807 | |||
808 | void drbd_debugfs_device_add(struct drbd_device *device) | ||
809 | { | ||
810 | struct dentry *vols_dir = device->resource->debugfs_res_volumes; | ||
811 | char minor_buf[8]; /* MINORMASK, MINORBITS == 20; */ | ||
812 | char vnr_buf[8]; /* volume number vnr is even 16 bit only; */ | ||
813 | char *slink_name = NULL; | ||
814 | |||
815 | struct dentry *dentry; | ||
816 | if (!vols_dir || !drbd_debugfs_minors) | ||
817 | return; | ||
818 | |||
819 | snprintf(vnr_buf, sizeof(vnr_buf), "%u", device->vnr); | ||
820 | dentry = debugfs_create_dir(vnr_buf, vols_dir); | ||
821 | if (IS_ERR_OR_NULL(dentry)) | ||
822 | goto fail; | ||
823 | device->debugfs_vol = dentry; | ||
824 | |||
825 | snprintf(minor_buf, sizeof(minor_buf), "%u", device->minor); | ||
826 | slink_name = kasprintf(GFP_KERNEL, "../resources/%s/volumes/%u", | ||
827 | device->resource->name, device->vnr); | ||
828 | if (!slink_name) | ||
829 | goto fail; | ||
830 | dentry = debugfs_create_symlink(minor_buf, drbd_debugfs_minors, slink_name); | ||
831 | kfree(slink_name); | ||
832 | slink_name = NULL; | ||
833 | if (IS_ERR_OR_NULL(dentry)) | ||
834 | goto fail; | ||
835 | device->debugfs_minor = dentry; | ||
836 | |||
837 | #define DCF(name) do { \ | ||
838 | dentry = debugfs_create_file(#name, S_IRUSR|S_IRGRP, \ | ||
839 | device->debugfs_vol, device, \ | ||
840 | &device_ ## name ## _fops); \ | ||
841 | if (IS_ERR_OR_NULL(dentry)) \ | ||
842 | goto fail; \ | ||
843 | device->debugfs_vol_ ## name = dentry; \ | ||
844 | } while (0) | ||
845 | |||
846 | DCF(oldest_requests); | ||
847 | DCF(act_log_extents); | ||
848 | DCF(resync_extents); | ||
849 | DCF(data_gen_id); | ||
850 | #undef DCF | ||
851 | return; | ||
852 | |||
853 | fail: | ||
854 | drbd_debugfs_device_cleanup(device); | ||
855 | drbd_err(device, "failed to create debugfs entries\n"); | ||
856 | } | ||
857 | |||
858 | void drbd_debugfs_device_cleanup(struct drbd_device *device) | ||
859 | { | ||
860 | drbd_debugfs_remove(&device->debugfs_minor); | ||
861 | drbd_debugfs_remove(&device->debugfs_vol_oldest_requests); | ||
862 | drbd_debugfs_remove(&device->debugfs_vol_act_log_extents); | ||
863 | drbd_debugfs_remove(&device->debugfs_vol_resync_extents); | ||
864 | drbd_debugfs_remove(&device->debugfs_vol_data_gen_id); | ||
865 | drbd_debugfs_remove(&device->debugfs_vol); | ||
866 | } | ||
867 | |||
868 | void drbd_debugfs_peer_device_add(struct drbd_peer_device *peer_device) | ||
869 | { | ||
870 | struct dentry *conn_dir = peer_device->connection->debugfs_conn; | ||
871 | struct dentry *dentry; | ||
872 | char vnr_buf[8]; | ||
873 | |||
874 | if (!conn_dir) | ||
875 | return; | ||
876 | |||
877 | snprintf(vnr_buf, sizeof(vnr_buf), "%u", peer_device->device->vnr); | ||
878 | dentry = debugfs_create_dir(vnr_buf, conn_dir); | ||
879 | if (IS_ERR_OR_NULL(dentry)) | ||
880 | goto fail; | ||
881 | peer_device->debugfs_peer_dev = dentry; | ||
882 | return; | ||
883 | |||
884 | fail: | ||
885 | drbd_debugfs_peer_device_cleanup(peer_device); | ||
886 | drbd_err(peer_device, "failed to create debugfs entries\n"); | ||
887 | } | ||
888 | |||
889 | void drbd_debugfs_peer_device_cleanup(struct drbd_peer_device *peer_device) | ||
890 | { | ||
891 | drbd_debugfs_remove(&peer_device->debugfs_peer_dev); | ||
892 | } | ||
893 | |||
894 | static int drbd_version_show(struct seq_file *m, void *ignored) | ||
895 | { | ||
896 | seq_printf(m, "# %s\n", drbd_buildtag()); | ||
897 | seq_printf(m, "VERSION=%s\n", REL_VERSION); | ||
898 | seq_printf(m, "API_VERSION=%u\n", API_VERSION); | ||
899 | seq_printf(m, "PRO_VERSION_MIN=%u\n", PRO_VERSION_MIN); | ||
900 | seq_printf(m, "PRO_VERSION_MAX=%u\n", PRO_VERSION_MAX); | ||
901 | return 0; | ||
902 | } | ||
903 | |||
904 | static int drbd_version_open(struct inode *inode, struct file *file) | ||
905 | { | ||
906 | return single_open(file, drbd_version_show, NULL); | ||
907 | } | ||
908 | |||
909 | static struct file_operations drbd_version_fops = { | ||
910 | .owner = THIS_MODULE, | ||
911 | .open = drbd_version_open, | ||
912 | .llseek = seq_lseek, | ||
913 | .read = seq_read, | ||
914 | .release = single_release, | ||
915 | }; | ||
916 | |||
917 | /* not __exit, may be indirectly called | ||
918 | * from the module-load-failure path as well. */ | ||
919 | void drbd_debugfs_cleanup(void) | ||
920 | { | ||
921 | drbd_debugfs_remove(&drbd_debugfs_resources); | ||
922 | drbd_debugfs_remove(&drbd_debugfs_minors); | ||
923 | drbd_debugfs_remove(&drbd_debugfs_version); | ||
924 | drbd_debugfs_remove(&drbd_debugfs_root); | ||
925 | } | ||
926 | |||
927 | int __init drbd_debugfs_init(void) | ||
928 | { | ||
929 | struct dentry *dentry; | ||
930 | |||
931 | dentry = debugfs_create_dir("drbd", NULL); | ||
932 | if (IS_ERR_OR_NULL(dentry)) | ||
933 | goto fail; | ||
934 | drbd_debugfs_root = dentry; | ||
935 | |||
936 | dentry = debugfs_create_file("version", 0444, drbd_debugfs_root, NULL, &drbd_version_fops); | ||
937 | if (IS_ERR_OR_NULL(dentry)) | ||
938 | goto fail; | ||
939 | drbd_debugfs_version = dentry; | ||
940 | |||
941 | dentry = debugfs_create_dir("resources", drbd_debugfs_root); | ||
942 | if (IS_ERR_OR_NULL(dentry)) | ||
943 | goto fail; | ||
944 | drbd_debugfs_resources = dentry; | ||
945 | |||
946 | dentry = debugfs_create_dir("minors", drbd_debugfs_root); | ||
947 | if (IS_ERR_OR_NULL(dentry)) | ||
948 | goto fail; | ||
949 | drbd_debugfs_minors = dentry; | ||
950 | return 0; | ||
951 | |||
952 | fail: | ||
953 | drbd_debugfs_cleanup(); | ||
954 | if (dentry) | ||
955 | return PTR_ERR(dentry); | ||
956 | else | ||
957 | return -EINVAL; | ||
958 | } | ||
diff --git a/drivers/block/drbd/drbd_debugfs.h b/drivers/block/drbd/drbd_debugfs.h new file mode 100644 index 000000000000..8bee21340dce --- /dev/null +++ b/drivers/block/drbd/drbd_debugfs.h | |||
@@ -0,0 +1,39 @@ | |||
1 | #include <linux/kernel.h> | ||
2 | #include <linux/module.h> | ||
3 | #include <linux/debugfs.h> | ||
4 | |||
5 | #include "drbd_int.h" | ||
6 | |||
7 | #ifdef CONFIG_DEBUG_FS | ||
8 | int __init drbd_debugfs_init(void); | ||
9 | void drbd_debugfs_cleanup(void); | ||
10 | |||
11 | void drbd_debugfs_resource_add(struct drbd_resource *resource); | ||
12 | void drbd_debugfs_resource_cleanup(struct drbd_resource *resource); | ||
13 | |||
14 | void drbd_debugfs_connection_add(struct drbd_connection *connection); | ||
15 | void drbd_debugfs_connection_cleanup(struct drbd_connection *connection); | ||
16 | |||
17 | void drbd_debugfs_device_add(struct drbd_device *device); | ||
18 | void drbd_debugfs_device_cleanup(struct drbd_device *device); | ||
19 | |||
20 | void drbd_debugfs_peer_device_add(struct drbd_peer_device *peer_device); | ||
21 | void drbd_debugfs_peer_device_cleanup(struct drbd_peer_device *peer_device); | ||
22 | #else | ||
23 | |||
24 | static inline int __init drbd_debugfs_init(void) { return -ENODEV; } | ||
25 | static inline void drbd_debugfs_cleanup(void) { } | ||
26 | |||
27 | static inline void drbd_debugfs_resource_add(struct drbd_resource *resource) { } | ||
28 | static inline void drbd_debugfs_resource_cleanup(struct drbd_resource *resource) { } | ||
29 | |||
30 | static inline void drbd_debugfs_connection_add(struct drbd_connection *connection) { } | ||
31 | static inline void drbd_debugfs_connection_cleanup(struct drbd_connection *connection) { } | ||
32 | |||
33 | static inline void drbd_debugfs_device_add(struct drbd_device *device) { } | ||
34 | static inline void drbd_debugfs_device_cleanup(struct drbd_device *device) { } | ||
35 | |||
36 | static inline void drbd_debugfs_peer_device_add(struct drbd_peer_device *peer_device) { } | ||
37 | static inline void drbd_debugfs_peer_device_cleanup(struct drbd_peer_device *peer_device) { } | ||
38 | |||
39 | #endif | ||
diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h index a76ceb344d64..1a000016ccdf 100644 --- a/drivers/block/drbd/drbd_int.h +++ b/drivers/block/drbd/drbd_int.h | |||
@@ -317,7 +317,63 @@ struct drbd_request { | |||
317 | 317 | ||
318 | struct list_head tl_requests; /* ring list in the transfer log */ | 318 | struct list_head tl_requests; /* ring list in the transfer log */ |
319 | struct bio *master_bio; /* master bio pointer */ | 319 | struct bio *master_bio; /* master bio pointer */ |
320 | unsigned long start_time; | 320 | |
321 | /* see struct drbd_device */ | ||
322 | struct list_head req_pending_master_completion; | ||
323 | struct list_head req_pending_local; | ||
324 | |||
325 | /* for generic IO accounting */ | ||
326 | unsigned long start_jif; | ||
327 | |||
328 | /* for DRBD internal statistics */ | ||
329 | |||
330 | /* Minimal set of time stamps to determine if we wait for activity log | ||
331 | * transactions, local disk or peer. 32 bit "jiffies" are good enough, | ||
332 | * we don't expect a DRBD request to be stalled for several month. | ||
333 | */ | ||
334 | |||
335 | /* before actual request processing */ | ||
336 | unsigned long in_actlog_jif; | ||
337 | |||
338 | /* local disk */ | ||
339 | unsigned long pre_submit_jif; | ||
340 | |||
341 | /* per connection */ | ||
342 | unsigned long pre_send_jif; | ||
343 | unsigned long acked_jif; | ||
344 | unsigned long net_done_jif; | ||
345 | |||
346 | /* Possibly even more detail to track each phase: | ||
347 | * master_completion_jif | ||
348 | * how long did it take to complete the master bio | ||
349 | * (application visible latency) | ||
350 | * allocated_jif | ||
351 | * how long the master bio was blocked until we finally allocated | ||
352 | * a tracking struct | ||
353 | * in_actlog_jif | ||
354 | * how long did we wait for activity log transactions | ||
355 | * | ||
356 | * net_queued_jif | ||
357 | * when did we finally queue it for sending | ||
358 | * pre_send_jif | ||
359 | * when did we start sending it | ||
360 | * post_send_jif | ||
361 | * how long did we block in the network stack trying to send it | ||
362 | * acked_jif | ||
363 | * when did we receive (or fake, in protocol A) a remote ACK | ||
364 | * net_done_jif | ||
365 | * when did we receive final acknowledgement (P_BARRIER_ACK), | ||
366 | * or decide, e.g. on connection loss, that we do no longer expect | ||
367 | * anything from this peer for this request. | ||
368 | * | ||
369 | * pre_submit_jif | ||
370 | * post_sub_jif | ||
371 | * when did we start submiting to the lower level device, | ||
372 | * and how long did we block in that submit function | ||
373 | * local_completion_jif | ||
374 | * how long did it take the lower level device to complete this request | ||
375 | */ | ||
376 | |||
321 | 377 | ||
322 | /* once it hits 0, we may complete the master_bio */ | 378 | /* once it hits 0, we may complete the master_bio */ |
323 | atomic_t completion_ref; | 379 | atomic_t completion_ref; |
@@ -366,6 +422,7 @@ struct drbd_peer_request { | |||
366 | struct drbd_interval i; | 422 | struct drbd_interval i; |
367 | /* see comments on ee flag bits below */ | 423 | /* see comments on ee flag bits below */ |
368 | unsigned long flags; | 424 | unsigned long flags; |
425 | unsigned long submit_jif; | ||
369 | union { | 426 | union { |
370 | u64 block_id; | 427 | u64 block_id; |
371 | struct digest_info *digest; | 428 | struct digest_info *digest; |
@@ -408,6 +465,17 @@ enum { | |||
408 | 465 | ||
409 | /* Is set when net_conf had two_primaries set while creating this peer_req */ | 466 | /* Is set when net_conf had two_primaries set while creating this peer_req */ |
410 | __EE_IN_INTERVAL_TREE, | 467 | __EE_IN_INTERVAL_TREE, |
468 | |||
469 | /* for debugfs: */ | ||
470 | /* has this been submitted, or does it still wait for something else? */ | ||
471 | __EE_SUBMITTED, | ||
472 | |||
473 | /* this is/was a write request */ | ||
474 | __EE_WRITE, | ||
475 | |||
476 | /* this originates from application on peer | ||
477 | * (not some resync or verify or other DRBD internal request) */ | ||
478 | __EE_APPLICATION, | ||
411 | }; | 479 | }; |
412 | #define EE_CALL_AL_COMPLETE_IO (1<<__EE_CALL_AL_COMPLETE_IO) | 480 | #define EE_CALL_AL_COMPLETE_IO (1<<__EE_CALL_AL_COMPLETE_IO) |
413 | #define EE_MAY_SET_IN_SYNC (1<<__EE_MAY_SET_IN_SYNC) | 481 | #define EE_MAY_SET_IN_SYNC (1<<__EE_MAY_SET_IN_SYNC) |
@@ -419,6 +487,9 @@ enum { | |||
419 | #define EE_RESTART_REQUESTS (1<<__EE_RESTART_REQUESTS) | 487 | #define EE_RESTART_REQUESTS (1<<__EE_RESTART_REQUESTS) |
420 | #define EE_SEND_WRITE_ACK (1<<__EE_SEND_WRITE_ACK) | 488 | #define EE_SEND_WRITE_ACK (1<<__EE_SEND_WRITE_ACK) |
421 | #define EE_IN_INTERVAL_TREE (1<<__EE_IN_INTERVAL_TREE) | 489 | #define EE_IN_INTERVAL_TREE (1<<__EE_IN_INTERVAL_TREE) |
490 | #define EE_SUBMITTED (1<<__EE_SUBMITTED) | ||
491 | #define EE_WRITE (1<<__EE_WRITE) | ||
492 | #define EE_APPLICATION (1<<__EE_APPLICATION) | ||
422 | 493 | ||
423 | /* flag bits per device */ | 494 | /* flag bits per device */ |
424 | enum { | 495 | enum { |
@@ -433,11 +504,11 @@ enum { | |||
433 | CONSIDER_RESYNC, | 504 | CONSIDER_RESYNC, |
434 | 505 | ||
435 | MD_NO_FUA, /* Users wants us to not use FUA/FLUSH on meta data dev */ | 506 | MD_NO_FUA, /* Users wants us to not use FUA/FLUSH on meta data dev */ |
507 | |||
436 | SUSPEND_IO, /* suspend application io */ | 508 | SUSPEND_IO, /* suspend application io */ |
437 | BITMAP_IO, /* suspend application io; | 509 | BITMAP_IO, /* suspend application io; |
438 | once no more io in flight, start bitmap io */ | 510 | once no more io in flight, start bitmap io */ |
439 | BITMAP_IO_QUEUED, /* Started bitmap IO */ | 511 | BITMAP_IO_QUEUED, /* Started bitmap IO */ |
440 | GO_DISKLESS, /* Disk is being detached, on io-error or admin request. */ | ||
441 | WAS_IO_ERROR, /* Local disk failed, returned IO error */ | 512 | WAS_IO_ERROR, /* Local disk failed, returned IO error */ |
442 | WAS_READ_ERROR, /* Local disk READ failed (set additionally to the above) */ | 513 | WAS_READ_ERROR, /* Local disk READ failed (set additionally to the above) */ |
443 | FORCE_DETACH, /* Force-detach from local disk, aborting any pending local IO */ | 514 | FORCE_DETACH, /* Force-detach from local disk, aborting any pending local IO */ |
@@ -450,6 +521,20 @@ enum { | |||
450 | B_RS_H_DONE, /* Before resync handler done (already executed) */ | 521 | B_RS_H_DONE, /* Before resync handler done (already executed) */ |
451 | DISCARD_MY_DATA, /* discard_my_data flag per volume */ | 522 | DISCARD_MY_DATA, /* discard_my_data flag per volume */ |
452 | READ_BALANCE_RR, | 523 | READ_BALANCE_RR, |
524 | |||
525 | FLUSH_PENDING, /* if set, device->flush_jif is when we submitted that flush | ||
526 | * from drbd_flush_after_epoch() */ | ||
527 | |||
528 | /* cleared only after backing device related structures have been destroyed. */ | ||
529 | GOING_DISKLESS, /* Disk is being detached, because of io-error, or admin request. */ | ||
530 | |||
531 | /* to be used in drbd_device_post_work() */ | ||
532 | GO_DISKLESS, /* tell worker to schedule cleanup before detach */ | ||
533 | DESTROY_DISK, /* tell worker to close backing devices and destroy related structures. */ | ||
534 | MD_SYNC, /* tell worker to call drbd_md_sync() */ | ||
535 | RS_START, /* tell worker to start resync/OV */ | ||
536 | RS_PROGRESS, /* tell worker that resync made significant progress */ | ||
537 | RS_DONE, /* tell worker that resync is done */ | ||
453 | }; | 538 | }; |
454 | 539 | ||
455 | struct drbd_bitmap; /* opaque for drbd_device */ | 540 | struct drbd_bitmap; /* opaque for drbd_device */ |
@@ -531,6 +616,11 @@ struct drbd_backing_dev { | |||
531 | }; | 616 | }; |
532 | 617 | ||
533 | struct drbd_md_io { | 618 | struct drbd_md_io { |
619 | struct page *page; | ||
620 | unsigned long start_jif; /* last call to drbd_md_get_buffer */ | ||
621 | unsigned long submit_jif; /* last _drbd_md_sync_page_io() submit */ | ||
622 | const char *current_use; | ||
623 | atomic_t in_use; | ||
534 | unsigned int done; | 624 | unsigned int done; |
535 | int error; | 625 | int error; |
536 | }; | 626 | }; |
@@ -577,10 +667,18 @@ enum { | |||
577 | * and potentially deadlock on, this drbd worker. | 667 | * and potentially deadlock on, this drbd worker. |
578 | */ | 668 | */ |
579 | DISCONNECT_SENT, | 669 | DISCONNECT_SENT, |
670 | |||
671 | DEVICE_WORK_PENDING, /* tell worker that some device has pending work */ | ||
580 | }; | 672 | }; |
581 | 673 | ||
582 | struct drbd_resource { | 674 | struct drbd_resource { |
583 | char *name; | 675 | char *name; |
676 | #ifdef CONFIG_DEBUG_FS | ||
677 | struct dentry *debugfs_res; | ||
678 | struct dentry *debugfs_res_volumes; | ||
679 | struct dentry *debugfs_res_connections; | ||
680 | struct dentry *debugfs_res_in_flight_summary; | ||
681 | #endif | ||
584 | struct kref kref; | 682 | struct kref kref; |
585 | struct idr devices; /* volume number to device mapping */ | 683 | struct idr devices; /* volume number to device mapping */ |
586 | struct list_head connections; | 684 | struct list_head connections; |
@@ -594,12 +692,28 @@ struct drbd_resource { | |||
594 | unsigned susp_nod:1; /* IO suspended because no data */ | 692 | unsigned susp_nod:1; /* IO suspended because no data */ |
595 | unsigned susp_fen:1; /* IO suspended because fence peer handler runs */ | 693 | unsigned susp_fen:1; /* IO suspended because fence peer handler runs */ |
596 | 694 | ||
695 | enum write_ordering_e write_ordering; | ||
696 | |||
597 | cpumask_var_t cpu_mask; | 697 | cpumask_var_t cpu_mask; |
598 | }; | 698 | }; |
599 | 699 | ||
700 | struct drbd_thread_timing_details | ||
701 | { | ||
702 | unsigned long start_jif; | ||
703 | void *cb_addr; | ||
704 | const char *caller_fn; | ||
705 | unsigned int line; | ||
706 | unsigned int cb_nr; | ||
707 | }; | ||
708 | |||
600 | struct drbd_connection { | 709 | struct drbd_connection { |
601 | struct list_head connections; | 710 | struct list_head connections; |
602 | struct drbd_resource *resource; | 711 | struct drbd_resource *resource; |
712 | #ifdef CONFIG_DEBUG_FS | ||
713 | struct dentry *debugfs_conn; | ||
714 | struct dentry *debugfs_conn_callback_history; | ||
715 | struct dentry *debugfs_conn_oldest_requests; | ||
716 | #endif | ||
603 | struct kref kref; | 717 | struct kref kref; |
604 | struct idr peer_devices; /* volume number to peer device mapping */ | 718 | struct idr peer_devices; /* volume number to peer device mapping */ |
605 | enum drbd_conns cstate; /* Only C_STANDALONE to C_WF_REPORT_PARAMS */ | 719 | enum drbd_conns cstate; /* Only C_STANDALONE to C_WF_REPORT_PARAMS */ |
@@ -636,7 +750,6 @@ struct drbd_connection { | |||
636 | struct drbd_epoch *current_epoch; | 750 | struct drbd_epoch *current_epoch; |
637 | spinlock_t epoch_lock; | 751 | spinlock_t epoch_lock; |
638 | unsigned int epochs; | 752 | unsigned int epochs; |
639 | enum write_ordering_e write_ordering; | ||
640 | atomic_t current_tle_nr; /* transfer log epoch number */ | 753 | atomic_t current_tle_nr; /* transfer log epoch number */ |
641 | unsigned current_tle_writes; /* writes seen within this tl epoch */ | 754 | unsigned current_tle_writes; /* writes seen within this tl epoch */ |
642 | 755 | ||
@@ -645,9 +758,22 @@ struct drbd_connection { | |||
645 | struct drbd_thread worker; | 758 | struct drbd_thread worker; |
646 | struct drbd_thread asender; | 759 | struct drbd_thread asender; |
647 | 760 | ||
761 | /* cached pointers, | ||
762 | * so we can look up the oldest pending requests more quickly. | ||
763 | * protected by resource->req_lock */ | ||
764 | struct drbd_request *req_next; /* DRBD 9: todo.req_next */ | ||
765 | struct drbd_request *req_ack_pending; | ||
766 | struct drbd_request *req_not_net_done; | ||
767 | |||
648 | /* sender side */ | 768 | /* sender side */ |
649 | struct drbd_work_queue sender_work; | 769 | struct drbd_work_queue sender_work; |
650 | 770 | ||
771 | #define DRBD_THREAD_DETAILS_HIST 16 | ||
772 | unsigned int w_cb_nr; /* keeps counting up */ | ||
773 | unsigned int r_cb_nr; /* keeps counting up */ | ||
774 | struct drbd_thread_timing_details w_timing_details[DRBD_THREAD_DETAILS_HIST]; | ||
775 | struct drbd_thread_timing_details r_timing_details[DRBD_THREAD_DETAILS_HIST]; | ||
776 | |||
651 | struct { | 777 | struct { |
652 | /* whether this sender thread | 778 | /* whether this sender thread |
653 | * has processed a single write yet. */ | 779 | * has processed a single write yet. */ |
@@ -663,11 +789,22 @@ struct drbd_connection { | |||
663 | } send; | 789 | } send; |
664 | }; | 790 | }; |
665 | 791 | ||
792 | void __update_timing_details( | ||
793 | struct drbd_thread_timing_details *tdp, | ||
794 | unsigned int *cb_nr, | ||
795 | void *cb, | ||
796 | const char *fn, const unsigned int line); | ||
797 | |||
798 | #define update_worker_timing_details(c, cb) \ | ||
799 | __update_timing_details(c->w_timing_details, &c->w_cb_nr, cb, __func__ , __LINE__ ) | ||
800 | #define update_receiver_timing_details(c, cb) \ | ||
801 | __update_timing_details(c->r_timing_details, &c->r_cb_nr, cb, __func__ , __LINE__ ) | ||
802 | |||
666 | struct submit_worker { | 803 | struct submit_worker { |
667 | struct workqueue_struct *wq; | 804 | struct workqueue_struct *wq; |
668 | struct work_struct worker; | 805 | struct work_struct worker; |
669 | 806 | ||
670 | spinlock_t lock; | 807 | /* protected by ..->resource->req_lock */ |
671 | struct list_head writes; | 808 | struct list_head writes; |
672 | }; | 809 | }; |
673 | 810 | ||
@@ -675,12 +812,29 @@ struct drbd_peer_device { | |||
675 | struct list_head peer_devices; | 812 | struct list_head peer_devices; |
676 | struct drbd_device *device; | 813 | struct drbd_device *device; |
677 | struct drbd_connection *connection; | 814 | struct drbd_connection *connection; |
815 | #ifdef CONFIG_DEBUG_FS | ||
816 | struct dentry *debugfs_peer_dev; | ||
817 | #endif | ||
678 | }; | 818 | }; |
679 | 819 | ||
680 | struct drbd_device { | 820 | struct drbd_device { |
681 | struct drbd_resource *resource; | 821 | struct drbd_resource *resource; |
682 | struct list_head peer_devices; | 822 | struct list_head peer_devices; |
683 | int vnr; /* volume number within the connection */ | 823 | struct list_head pending_bitmap_io; |
824 | |||
825 | unsigned long flush_jif; | ||
826 | #ifdef CONFIG_DEBUG_FS | ||
827 | struct dentry *debugfs_minor; | ||
828 | struct dentry *debugfs_vol; | ||
829 | struct dentry *debugfs_vol_oldest_requests; | ||
830 | struct dentry *debugfs_vol_act_log_extents; | ||
831 | struct dentry *debugfs_vol_resync_extents; | ||
832 | struct dentry *debugfs_vol_data_gen_id; | ||
833 | #endif | ||
834 | |||
835 | unsigned int vnr; /* volume number within the connection */ | ||
836 | unsigned int minor; /* device minor number */ | ||
837 | |||
684 | struct kref kref; | 838 | struct kref kref; |
685 | 839 | ||
686 | /* things that are stored as / read from meta data on disk */ | 840 | /* things that are stored as / read from meta data on disk */ |
@@ -697,19 +851,10 @@ struct drbd_device { | |||
697 | unsigned long last_reattach_jif; | 851 | unsigned long last_reattach_jif; |
698 | struct drbd_work resync_work; | 852 | struct drbd_work resync_work; |
699 | struct drbd_work unplug_work; | 853 | struct drbd_work unplug_work; |
700 | struct drbd_work go_diskless; | ||
701 | struct drbd_work md_sync_work; | ||
702 | struct drbd_work start_resync_work; | ||
703 | struct timer_list resync_timer; | 854 | struct timer_list resync_timer; |
704 | struct timer_list md_sync_timer; | 855 | struct timer_list md_sync_timer; |
705 | struct timer_list start_resync_timer; | 856 | struct timer_list start_resync_timer; |
706 | struct timer_list request_timer; | 857 | struct timer_list request_timer; |
707 | #ifdef DRBD_DEBUG_MD_SYNC | ||
708 | struct { | ||
709 | unsigned int line; | ||
710 | const char* func; | ||
711 | } last_md_mark_dirty; | ||
712 | #endif | ||
713 | 858 | ||
714 | /* Used after attach while negotiating new disk state. */ | 859 | /* Used after attach while negotiating new disk state. */ |
715 | union drbd_state new_state_tmp; | 860 | union drbd_state new_state_tmp; |
@@ -724,6 +869,7 @@ struct drbd_device { | |||
724 | unsigned int al_writ_cnt; | 869 | unsigned int al_writ_cnt; |
725 | unsigned int bm_writ_cnt; | 870 | unsigned int bm_writ_cnt; |
726 | atomic_t ap_bio_cnt; /* Requests we need to complete */ | 871 | atomic_t ap_bio_cnt; /* Requests we need to complete */ |
872 | atomic_t ap_actlog_cnt; /* Requests waiting for activity log */ | ||
727 | atomic_t ap_pending_cnt; /* AP data packets on the wire, ack expected */ | 873 | atomic_t ap_pending_cnt; /* AP data packets on the wire, ack expected */ |
728 | atomic_t rs_pending_cnt; /* RS request/data packets on the wire */ | 874 | atomic_t rs_pending_cnt; /* RS request/data packets on the wire */ |
729 | atomic_t unacked_cnt; /* Need to send replies for */ | 875 | atomic_t unacked_cnt; /* Need to send replies for */ |
@@ -733,6 +879,13 @@ struct drbd_device { | |||
733 | struct rb_root read_requests; | 879 | struct rb_root read_requests; |
734 | struct rb_root write_requests; | 880 | struct rb_root write_requests; |
735 | 881 | ||
882 | /* for statistics and timeouts */ | ||
883 | /* [0] read, [1] write */ | ||
884 | struct list_head pending_master_completion[2]; | ||
885 | struct list_head pending_completion[2]; | ||
886 | |||
887 | /* use checksums for *this* resync */ | ||
888 | bool use_csums; | ||
736 | /* blocks to resync in this run [unit BM_BLOCK_SIZE] */ | 889 | /* blocks to resync in this run [unit BM_BLOCK_SIZE] */ |
737 | unsigned long rs_total; | 890 | unsigned long rs_total; |
738 | /* number of resync blocks that failed in this run */ | 891 | /* number of resync blocks that failed in this run */ |
@@ -788,9 +941,7 @@ struct drbd_device { | |||
788 | atomic_t pp_in_use; /* allocated from page pool */ | 941 | atomic_t pp_in_use; /* allocated from page pool */ |
789 | atomic_t pp_in_use_by_net; /* sendpage()d, still referenced by tcp */ | 942 | atomic_t pp_in_use_by_net; /* sendpage()d, still referenced by tcp */ |
790 | wait_queue_head_t ee_wait; | 943 | wait_queue_head_t ee_wait; |
791 | struct page *md_io_page; /* one page buffer for md_io */ | ||
792 | struct drbd_md_io md_io; | 944 | struct drbd_md_io md_io; |
793 | atomic_t md_io_in_use; /* protects the md_io, md_io_page and md_io_tmpp */ | ||
794 | spinlock_t al_lock; | 945 | spinlock_t al_lock; |
795 | wait_queue_head_t al_wait; | 946 | wait_queue_head_t al_wait; |
796 | struct lru_cache *act_log; /* activity log */ | 947 | struct lru_cache *act_log; /* activity log */ |
@@ -800,7 +951,6 @@ struct drbd_device { | |||
800 | atomic_t packet_seq; | 951 | atomic_t packet_seq; |
801 | unsigned int peer_seq; | 952 | unsigned int peer_seq; |
802 | spinlock_t peer_seq_lock; | 953 | spinlock_t peer_seq_lock; |
803 | unsigned int minor; | ||
804 | unsigned long comm_bm_set; /* communicated number of set bits. */ | 954 | unsigned long comm_bm_set; /* communicated number of set bits. */ |
805 | struct bm_io_work bm_io_work; | 955 | struct bm_io_work bm_io_work; |
806 | u64 ed_uuid; /* UUID of the exposed data */ | 956 | u64 ed_uuid; /* UUID of the exposed data */ |
@@ -824,6 +974,21 @@ struct drbd_device { | |||
824 | struct submit_worker submit; | 974 | struct submit_worker submit; |
825 | }; | 975 | }; |
826 | 976 | ||
977 | struct drbd_bm_aio_ctx { | ||
978 | struct drbd_device *device; | ||
979 | struct list_head list; /* on device->pending_bitmap_io */; | ||
980 | unsigned long start_jif; | ||
981 | atomic_t in_flight; | ||
982 | unsigned int done; | ||
983 | unsigned flags; | ||
984 | #define BM_AIO_COPY_PAGES 1 | ||
985 | #define BM_AIO_WRITE_HINTED 2 | ||
986 | #define BM_AIO_WRITE_ALL_PAGES 4 | ||
987 | #define BM_AIO_READ 8 | ||
988 | int error; | ||
989 | struct kref kref; | ||
990 | }; | ||
991 | |||
827 | struct drbd_config_context { | 992 | struct drbd_config_context { |
828 | /* assigned from drbd_genlmsghdr */ | 993 | /* assigned from drbd_genlmsghdr */ |
829 | unsigned int minor; | 994 | unsigned int minor; |
@@ -949,7 +1114,7 @@ extern int drbd_send_ov_request(struct drbd_peer_device *, sector_t sector, int | |||
949 | extern int drbd_send_bitmap(struct drbd_device *device); | 1114 | extern int drbd_send_bitmap(struct drbd_device *device); |
950 | extern void drbd_send_sr_reply(struct drbd_peer_device *, enum drbd_state_rv retcode); | 1115 | extern void drbd_send_sr_reply(struct drbd_peer_device *, enum drbd_state_rv retcode); |
951 | extern void conn_send_sr_reply(struct drbd_connection *connection, enum drbd_state_rv retcode); | 1116 | extern void conn_send_sr_reply(struct drbd_connection *connection, enum drbd_state_rv retcode); |
952 | extern void drbd_free_bc(struct drbd_backing_dev *ldev); | 1117 | extern void drbd_free_ldev(struct drbd_backing_dev *ldev); |
953 | extern void drbd_device_cleanup(struct drbd_device *device); | 1118 | extern void drbd_device_cleanup(struct drbd_device *device); |
954 | void drbd_print_uuids(struct drbd_device *device, const char *text); | 1119 | void drbd_print_uuids(struct drbd_device *device, const char *text); |
955 | 1120 | ||
@@ -966,13 +1131,7 @@ extern void __drbd_uuid_set(struct drbd_device *device, int idx, u64 val) __must | |||
966 | extern void drbd_md_set_flag(struct drbd_device *device, int flags) __must_hold(local); | 1131 | extern void drbd_md_set_flag(struct drbd_device *device, int flags) __must_hold(local); |
967 | extern void drbd_md_clear_flag(struct drbd_device *device, int flags)__must_hold(local); | 1132 | extern void drbd_md_clear_flag(struct drbd_device *device, int flags)__must_hold(local); |
968 | extern int drbd_md_test_flag(struct drbd_backing_dev *, int); | 1133 | extern int drbd_md_test_flag(struct drbd_backing_dev *, int); |
969 | #ifndef DRBD_DEBUG_MD_SYNC | ||
970 | extern void drbd_md_mark_dirty(struct drbd_device *device); | 1134 | extern void drbd_md_mark_dirty(struct drbd_device *device); |
971 | #else | ||
972 | #define drbd_md_mark_dirty(m) drbd_md_mark_dirty_(m, __LINE__ , __func__ ) | ||
973 | extern void drbd_md_mark_dirty_(struct drbd_device *device, | ||
974 | unsigned int line, const char *func); | ||
975 | #endif | ||
976 | extern void drbd_queue_bitmap_io(struct drbd_device *device, | 1135 | extern void drbd_queue_bitmap_io(struct drbd_device *device, |
977 | int (*io_fn)(struct drbd_device *), | 1136 | int (*io_fn)(struct drbd_device *), |
978 | void (*done)(struct drbd_device *, int), | 1137 | void (*done)(struct drbd_device *, int), |
@@ -983,9 +1142,8 @@ extern int drbd_bitmap_io(struct drbd_device *device, | |||
983 | extern int drbd_bitmap_io_from_worker(struct drbd_device *device, | 1142 | extern int drbd_bitmap_io_from_worker(struct drbd_device *device, |
984 | int (*io_fn)(struct drbd_device *), | 1143 | int (*io_fn)(struct drbd_device *), |
985 | char *why, enum bm_flag flags); | 1144 | char *why, enum bm_flag flags); |
986 | extern int drbd_bmio_set_n_write(struct drbd_device *device); | 1145 | extern int drbd_bmio_set_n_write(struct drbd_device *device) __must_hold(local); |
987 | extern int drbd_bmio_clear_n_write(struct drbd_device *device); | 1146 | extern int drbd_bmio_clear_n_write(struct drbd_device *device) __must_hold(local); |
988 | extern void drbd_ldev_destroy(struct drbd_device *device); | ||
989 | 1147 | ||
990 | /* Meta data layout | 1148 | /* Meta data layout |
991 | * | 1149 | * |
@@ -1105,17 +1263,21 @@ struct bm_extent { | |||
1105 | /* in which _bitmap_ extent (resp. sector) the bit for a certain | 1263 | /* in which _bitmap_ extent (resp. sector) the bit for a certain |
1106 | * _storage_ sector is located in */ | 1264 | * _storage_ sector is located in */ |
1107 | #define BM_SECT_TO_EXT(x) ((x)>>(BM_EXT_SHIFT-9)) | 1265 | #define BM_SECT_TO_EXT(x) ((x)>>(BM_EXT_SHIFT-9)) |
1266 | #define BM_BIT_TO_EXT(x) ((x) >> (BM_EXT_SHIFT - BM_BLOCK_SHIFT)) | ||
1108 | 1267 | ||
1109 | /* how much _storage_ sectors we have per bitmap sector */ | 1268 | /* first storage sector a bitmap extent corresponds to */ |
1110 | #define BM_EXT_TO_SECT(x) ((sector_t)(x) << (BM_EXT_SHIFT-9)) | 1269 | #define BM_EXT_TO_SECT(x) ((sector_t)(x) << (BM_EXT_SHIFT-9)) |
1270 | /* how much _storage_ sectors we have per bitmap extent */ | ||
1111 | #define BM_SECT_PER_EXT BM_EXT_TO_SECT(1) | 1271 | #define BM_SECT_PER_EXT BM_EXT_TO_SECT(1) |
1272 | /* how many bits are covered by one bitmap extent (resync extent) */ | ||
1273 | #define BM_BITS_PER_EXT (1UL << (BM_EXT_SHIFT - BM_BLOCK_SHIFT)) | ||
1274 | |||
1275 | #define BM_BLOCKS_PER_BM_EXT_MASK (BM_BITS_PER_EXT - 1) | ||
1276 | |||
1112 | 1277 | ||
1113 | /* in one sector of the bitmap, we have this many activity_log extents. */ | 1278 | /* in one sector of the bitmap, we have this many activity_log extents. */ |
1114 | #define AL_EXT_PER_BM_SECT (1 << (BM_EXT_SHIFT - AL_EXTENT_SHIFT)) | 1279 | #define AL_EXT_PER_BM_SECT (1 << (BM_EXT_SHIFT - AL_EXTENT_SHIFT)) |
1115 | 1280 | ||
1116 | #define BM_BLOCKS_PER_BM_EXT_B (BM_EXT_SHIFT - BM_BLOCK_SHIFT) | ||
1117 | #define BM_BLOCKS_PER_BM_EXT_MASK ((1<<BM_BLOCKS_PER_BM_EXT_B) - 1) | ||
1118 | |||
1119 | /* the extent in "PER_EXTENT" below is an activity log extent | 1281 | /* the extent in "PER_EXTENT" below is an activity log extent |
1120 | * we need that many (long words/bytes) to store the bitmap | 1282 | * we need that many (long words/bytes) to store the bitmap |
1121 | * of one AL_EXTENT_SIZE chunk of storage. | 1283 | * of one AL_EXTENT_SIZE chunk of storage. |
@@ -1195,11 +1357,11 @@ extern void _drbd_bm_set_bits(struct drbd_device *device, | |||
1195 | const unsigned long s, const unsigned long e); | 1357 | const unsigned long s, const unsigned long e); |
1196 | extern int drbd_bm_test_bit(struct drbd_device *device, unsigned long bitnr); | 1358 | extern int drbd_bm_test_bit(struct drbd_device *device, unsigned long bitnr); |
1197 | extern int drbd_bm_e_weight(struct drbd_device *device, unsigned long enr); | 1359 | extern int drbd_bm_e_weight(struct drbd_device *device, unsigned long enr); |
1198 | extern int drbd_bm_write_page(struct drbd_device *device, unsigned int idx) __must_hold(local); | ||
1199 | extern int drbd_bm_read(struct drbd_device *device) __must_hold(local); | 1360 | extern int drbd_bm_read(struct drbd_device *device) __must_hold(local); |
1200 | extern void drbd_bm_mark_for_writeout(struct drbd_device *device, int page_nr); | 1361 | extern void drbd_bm_mark_for_writeout(struct drbd_device *device, int page_nr); |
1201 | extern int drbd_bm_write(struct drbd_device *device) __must_hold(local); | 1362 | extern int drbd_bm_write(struct drbd_device *device) __must_hold(local); |
1202 | extern int drbd_bm_write_hinted(struct drbd_device *device) __must_hold(local); | 1363 | extern int drbd_bm_write_hinted(struct drbd_device *device) __must_hold(local); |
1364 | extern int drbd_bm_write_lazy(struct drbd_device *device, unsigned upper_idx) __must_hold(local); | ||
1203 | extern int drbd_bm_write_all(struct drbd_device *device) __must_hold(local); | 1365 | extern int drbd_bm_write_all(struct drbd_device *device) __must_hold(local); |
1204 | extern int drbd_bm_write_copy_pages(struct drbd_device *device) __must_hold(local); | 1366 | extern int drbd_bm_write_copy_pages(struct drbd_device *device) __must_hold(local); |
1205 | extern size_t drbd_bm_words(struct drbd_device *device); | 1367 | extern size_t drbd_bm_words(struct drbd_device *device); |
@@ -1213,7 +1375,6 @@ extern unsigned long _drbd_bm_find_next(struct drbd_device *device, unsigned lon | |||
1213 | extern unsigned long _drbd_bm_find_next_zero(struct drbd_device *device, unsigned long bm_fo); | 1375 | extern unsigned long _drbd_bm_find_next_zero(struct drbd_device *device, unsigned long bm_fo); |
1214 | extern unsigned long _drbd_bm_total_weight(struct drbd_device *device); | 1376 | extern unsigned long _drbd_bm_total_weight(struct drbd_device *device); |
1215 | extern unsigned long drbd_bm_total_weight(struct drbd_device *device); | 1377 | extern unsigned long drbd_bm_total_weight(struct drbd_device *device); |
1216 | extern int drbd_bm_rs_done(struct drbd_device *device); | ||
1217 | /* for receive_bitmap */ | 1378 | /* for receive_bitmap */ |
1218 | extern void drbd_bm_merge_lel(struct drbd_device *device, size_t offset, | 1379 | extern void drbd_bm_merge_lel(struct drbd_device *device, size_t offset, |
1219 | size_t number, unsigned long *buffer); | 1380 | size_t number, unsigned long *buffer); |
@@ -1312,7 +1473,7 @@ enum determine_dev_size { | |||
1312 | extern enum determine_dev_size | 1473 | extern enum determine_dev_size |
1313 | drbd_determine_dev_size(struct drbd_device *, enum dds_flags, struct resize_parms *) __must_hold(local); | 1474 | drbd_determine_dev_size(struct drbd_device *, enum dds_flags, struct resize_parms *) __must_hold(local); |
1314 | extern void resync_after_online_grow(struct drbd_device *); | 1475 | extern void resync_after_online_grow(struct drbd_device *); |
1315 | extern void drbd_reconsider_max_bio_size(struct drbd_device *device); | 1476 | extern void drbd_reconsider_max_bio_size(struct drbd_device *device, struct drbd_backing_dev *bdev); |
1316 | extern enum drbd_state_rv drbd_set_role(struct drbd_device *device, | 1477 | extern enum drbd_state_rv drbd_set_role(struct drbd_device *device, |
1317 | enum drbd_role new_role, | 1478 | enum drbd_role new_role, |
1318 | int force); | 1479 | int force); |
@@ -1333,7 +1494,7 @@ extern void resume_next_sg(struct drbd_device *device); | |||
1333 | extern void suspend_other_sg(struct drbd_device *device); | 1494 | extern void suspend_other_sg(struct drbd_device *device); |
1334 | extern int drbd_resync_finished(struct drbd_device *device); | 1495 | extern int drbd_resync_finished(struct drbd_device *device); |
1335 | /* maybe rather drbd_main.c ? */ | 1496 | /* maybe rather drbd_main.c ? */ |
1336 | extern void *drbd_md_get_buffer(struct drbd_device *device); | 1497 | extern void *drbd_md_get_buffer(struct drbd_device *device, const char *intent); |
1337 | extern void drbd_md_put_buffer(struct drbd_device *device); | 1498 | extern void drbd_md_put_buffer(struct drbd_device *device); |
1338 | extern int drbd_md_sync_page_io(struct drbd_device *device, | 1499 | extern int drbd_md_sync_page_io(struct drbd_device *device, |
1339 | struct drbd_backing_dev *bdev, sector_t sector, int rw); | 1500 | struct drbd_backing_dev *bdev, sector_t sector, int rw); |
@@ -1380,7 +1541,8 @@ extern void drbd_endio_write_sec_final(struct drbd_peer_request *peer_req); | |||
1380 | extern int drbd_receiver(struct drbd_thread *thi); | 1541 | extern int drbd_receiver(struct drbd_thread *thi); |
1381 | extern int drbd_asender(struct drbd_thread *thi); | 1542 | extern int drbd_asender(struct drbd_thread *thi); |
1382 | extern bool drbd_rs_c_min_rate_throttle(struct drbd_device *device); | 1543 | extern bool drbd_rs_c_min_rate_throttle(struct drbd_device *device); |
1383 | extern bool drbd_rs_should_slow_down(struct drbd_device *device, sector_t sector); | 1544 | extern bool drbd_rs_should_slow_down(struct drbd_device *device, sector_t sector, |
1545 | bool throttle_if_app_is_waiting); | ||
1384 | extern int drbd_submit_peer_request(struct drbd_device *, | 1546 | extern int drbd_submit_peer_request(struct drbd_device *, |
1385 | struct drbd_peer_request *, const unsigned, | 1547 | struct drbd_peer_request *, const unsigned, |
1386 | const int); | 1548 | const int); |
@@ -1464,10 +1626,7 @@ static inline void drbd_generic_make_request(struct drbd_device *device, | |||
1464 | { | 1626 | { |
1465 | __release(local); | 1627 | __release(local); |
1466 | if (!bio->bi_bdev) { | 1628 | if (!bio->bi_bdev) { |
1467 | printk(KERN_ERR "drbd%d: drbd_generic_make_request: " | 1629 | drbd_err(device, "drbd_generic_make_request: bio->bi_bdev == NULL\n"); |
1468 | "bio->bi_bdev == NULL\n", | ||
1469 | device_to_minor(device)); | ||
1470 | dump_stack(); | ||
1471 | bio_endio(bio, -ENODEV); | 1630 | bio_endio(bio, -ENODEV); |
1472 | return; | 1631 | return; |
1473 | } | 1632 | } |
@@ -1478,7 +1637,8 @@ static inline void drbd_generic_make_request(struct drbd_device *device, | |||
1478 | generic_make_request(bio); | 1637 | generic_make_request(bio); |
1479 | } | 1638 | } |
1480 | 1639 | ||
1481 | void drbd_bump_write_ordering(struct drbd_connection *connection, enum write_ordering_e wo); | 1640 | void drbd_bump_write_ordering(struct drbd_resource *resource, struct drbd_backing_dev *bdev, |
1641 | enum write_ordering_e wo); | ||
1482 | 1642 | ||
1483 | /* drbd_proc.c */ | 1643 | /* drbd_proc.c */ |
1484 | extern struct proc_dir_entry *drbd_proc; | 1644 | extern struct proc_dir_entry *drbd_proc; |
@@ -1489,9 +1649,9 @@ extern const char *drbd_role_str(enum drbd_role s); | |||
1489 | /* drbd_actlog.c */ | 1649 | /* drbd_actlog.c */ |
1490 | extern bool drbd_al_begin_io_prepare(struct drbd_device *device, struct drbd_interval *i); | 1650 | extern bool drbd_al_begin_io_prepare(struct drbd_device *device, struct drbd_interval *i); |
1491 | extern int drbd_al_begin_io_nonblock(struct drbd_device *device, struct drbd_interval *i); | 1651 | extern int drbd_al_begin_io_nonblock(struct drbd_device *device, struct drbd_interval *i); |
1492 | extern void drbd_al_begin_io_commit(struct drbd_device *device, bool delegate); | 1652 | extern void drbd_al_begin_io_commit(struct drbd_device *device); |
1493 | extern bool drbd_al_begin_io_fastpath(struct drbd_device *device, struct drbd_interval *i); | 1653 | extern bool drbd_al_begin_io_fastpath(struct drbd_device *device, struct drbd_interval *i); |
1494 | extern void drbd_al_begin_io(struct drbd_device *device, struct drbd_interval *i, bool delegate); | 1654 | extern void drbd_al_begin_io(struct drbd_device *device, struct drbd_interval *i); |
1495 | extern void drbd_al_complete_io(struct drbd_device *device, struct drbd_interval *i); | 1655 | extern void drbd_al_complete_io(struct drbd_device *device, struct drbd_interval *i); |
1496 | extern void drbd_rs_complete_io(struct drbd_device *device, sector_t sector); | 1656 | extern void drbd_rs_complete_io(struct drbd_device *device, sector_t sector); |
1497 | extern int drbd_rs_begin_io(struct drbd_device *device, sector_t sector); | 1657 | extern int drbd_rs_begin_io(struct drbd_device *device, sector_t sector); |
@@ -1501,14 +1661,17 @@ extern int drbd_rs_del_all(struct drbd_device *device); | |||
1501 | extern void drbd_rs_failed_io(struct drbd_device *device, | 1661 | extern void drbd_rs_failed_io(struct drbd_device *device, |
1502 | sector_t sector, int size); | 1662 | sector_t sector, int size); |
1503 | extern void drbd_advance_rs_marks(struct drbd_device *device, unsigned long still_to_go); | 1663 | extern void drbd_advance_rs_marks(struct drbd_device *device, unsigned long still_to_go); |
1504 | extern void __drbd_set_in_sync(struct drbd_device *device, sector_t sector, | 1664 | |
1505 | int size, const char *file, const unsigned int line); | 1665 | enum update_sync_bits_mode { RECORD_RS_FAILED, SET_OUT_OF_SYNC, SET_IN_SYNC }; |
1666 | extern int __drbd_change_sync(struct drbd_device *device, sector_t sector, int size, | ||
1667 | enum update_sync_bits_mode mode, | ||
1668 | const char *file, const unsigned int line); | ||
1506 | #define drbd_set_in_sync(device, sector, size) \ | 1669 | #define drbd_set_in_sync(device, sector, size) \ |
1507 | __drbd_set_in_sync(device, sector, size, __FILE__, __LINE__) | 1670 | __drbd_change_sync(device, sector, size, SET_IN_SYNC, __FILE__, __LINE__) |
1508 | extern int __drbd_set_out_of_sync(struct drbd_device *device, sector_t sector, | ||
1509 | int size, const char *file, const unsigned int line); | ||
1510 | #define drbd_set_out_of_sync(device, sector, size) \ | 1671 | #define drbd_set_out_of_sync(device, sector, size) \ |
1511 | __drbd_set_out_of_sync(device, sector, size, __FILE__, __LINE__) | 1672 | __drbd_change_sync(device, sector, size, SET_OUT_OF_SYNC, __FILE__, __LINE__) |
1673 | #define drbd_rs_failed_io(device, sector, size) \ | ||
1674 | __drbd_change_sync(device, sector, size, RECORD_RS_FAILED, __FILE__, __LINE__) | ||
1512 | extern void drbd_al_shrink(struct drbd_device *device); | 1675 | extern void drbd_al_shrink(struct drbd_device *device); |
1513 | extern int drbd_initialize_al(struct drbd_device *, void *); | 1676 | extern int drbd_initialize_al(struct drbd_device *, void *); |
1514 | 1677 | ||
@@ -1764,25 +1927,38 @@ static inline sector_t drbd_md_ss(struct drbd_backing_dev *bdev) | |||
1764 | } | 1927 | } |
1765 | 1928 | ||
1766 | static inline void | 1929 | static inline void |
1767 | drbd_queue_work_front(struct drbd_work_queue *q, struct drbd_work *w) | 1930 | drbd_queue_work(struct drbd_work_queue *q, struct drbd_work *w) |
1768 | { | 1931 | { |
1769 | unsigned long flags; | 1932 | unsigned long flags; |
1770 | spin_lock_irqsave(&q->q_lock, flags); | 1933 | spin_lock_irqsave(&q->q_lock, flags); |
1771 | list_add(&w->list, &q->q); | 1934 | list_add_tail(&w->list, &q->q); |
1772 | spin_unlock_irqrestore(&q->q_lock, flags); | 1935 | spin_unlock_irqrestore(&q->q_lock, flags); |
1773 | wake_up(&q->q_wait); | 1936 | wake_up(&q->q_wait); |
1774 | } | 1937 | } |
1775 | 1938 | ||
1776 | static inline void | 1939 | static inline void |
1777 | drbd_queue_work(struct drbd_work_queue *q, struct drbd_work *w) | 1940 | drbd_queue_work_if_unqueued(struct drbd_work_queue *q, struct drbd_work *w) |
1778 | { | 1941 | { |
1779 | unsigned long flags; | 1942 | unsigned long flags; |
1780 | spin_lock_irqsave(&q->q_lock, flags); | 1943 | spin_lock_irqsave(&q->q_lock, flags); |
1781 | list_add_tail(&w->list, &q->q); | 1944 | if (list_empty_careful(&w->list)) |
1945 | list_add_tail(&w->list, &q->q); | ||
1782 | spin_unlock_irqrestore(&q->q_lock, flags); | 1946 | spin_unlock_irqrestore(&q->q_lock, flags); |
1783 | wake_up(&q->q_wait); | 1947 | wake_up(&q->q_wait); |
1784 | } | 1948 | } |
1785 | 1949 | ||
1950 | static inline void | ||
1951 | drbd_device_post_work(struct drbd_device *device, int work_bit) | ||
1952 | { | ||
1953 | if (!test_and_set_bit(work_bit, &device->flags)) { | ||
1954 | struct drbd_connection *connection = | ||
1955 | first_peer_device(device)->connection; | ||
1956 | struct drbd_work_queue *q = &connection->sender_work; | ||
1957 | if (!test_and_set_bit(DEVICE_WORK_PENDING, &connection->flags)) | ||
1958 | wake_up(&q->q_wait); | ||
1959 | } | ||
1960 | } | ||
1961 | |||
1786 | extern void drbd_flush_workqueue(struct drbd_work_queue *work_queue); | 1962 | extern void drbd_flush_workqueue(struct drbd_work_queue *work_queue); |
1787 | 1963 | ||
1788 | static inline void wake_asender(struct drbd_connection *connection) | 1964 | static inline void wake_asender(struct drbd_connection *connection) |
@@ -1859,7 +2035,7 @@ static inline void inc_ap_pending(struct drbd_device *device) | |||
1859 | func, line, \ | 2035 | func, line, \ |
1860 | atomic_read(&device->which)) | 2036 | atomic_read(&device->which)) |
1861 | 2037 | ||
1862 | #define dec_ap_pending(device) _dec_ap_pending(device, __FUNCTION__, __LINE__) | 2038 | #define dec_ap_pending(device) _dec_ap_pending(device, __func__, __LINE__) |
1863 | static inline void _dec_ap_pending(struct drbd_device *device, const char *func, int line) | 2039 | static inline void _dec_ap_pending(struct drbd_device *device, const char *func, int line) |
1864 | { | 2040 | { |
1865 | if (atomic_dec_and_test(&device->ap_pending_cnt)) | 2041 | if (atomic_dec_and_test(&device->ap_pending_cnt)) |
@@ -1878,7 +2054,7 @@ static inline void inc_rs_pending(struct drbd_device *device) | |||
1878 | atomic_inc(&device->rs_pending_cnt); | 2054 | atomic_inc(&device->rs_pending_cnt); |
1879 | } | 2055 | } |
1880 | 2056 | ||
1881 | #define dec_rs_pending(device) _dec_rs_pending(device, __FUNCTION__, __LINE__) | 2057 | #define dec_rs_pending(device) _dec_rs_pending(device, __func__, __LINE__) |
1882 | static inline void _dec_rs_pending(struct drbd_device *device, const char *func, int line) | 2058 | static inline void _dec_rs_pending(struct drbd_device *device, const char *func, int line) |
1883 | { | 2059 | { |
1884 | atomic_dec(&device->rs_pending_cnt); | 2060 | atomic_dec(&device->rs_pending_cnt); |
@@ -1899,20 +2075,29 @@ static inline void inc_unacked(struct drbd_device *device) | |||
1899 | atomic_inc(&device->unacked_cnt); | 2075 | atomic_inc(&device->unacked_cnt); |
1900 | } | 2076 | } |
1901 | 2077 | ||
1902 | #define dec_unacked(device) _dec_unacked(device, __FUNCTION__, __LINE__) | 2078 | #define dec_unacked(device) _dec_unacked(device, __func__, __LINE__) |
1903 | static inline void _dec_unacked(struct drbd_device *device, const char *func, int line) | 2079 | static inline void _dec_unacked(struct drbd_device *device, const char *func, int line) |
1904 | { | 2080 | { |
1905 | atomic_dec(&device->unacked_cnt); | 2081 | atomic_dec(&device->unacked_cnt); |
1906 | ERR_IF_CNT_IS_NEGATIVE(unacked_cnt, func, line); | 2082 | ERR_IF_CNT_IS_NEGATIVE(unacked_cnt, func, line); |
1907 | } | 2083 | } |
1908 | 2084 | ||
1909 | #define sub_unacked(device, n) _sub_unacked(device, n, __FUNCTION__, __LINE__) | 2085 | #define sub_unacked(device, n) _sub_unacked(device, n, __func__, __LINE__) |
1910 | static inline void _sub_unacked(struct drbd_device *device, int n, const char *func, int line) | 2086 | static inline void _sub_unacked(struct drbd_device *device, int n, const char *func, int line) |
1911 | { | 2087 | { |
1912 | atomic_sub(n, &device->unacked_cnt); | 2088 | atomic_sub(n, &device->unacked_cnt); |
1913 | ERR_IF_CNT_IS_NEGATIVE(unacked_cnt, func, line); | 2089 | ERR_IF_CNT_IS_NEGATIVE(unacked_cnt, func, line); |
1914 | } | 2090 | } |
1915 | 2091 | ||
2092 | static inline bool is_sync_state(enum drbd_conns connection_state) | ||
2093 | { | ||
2094 | return | ||
2095 | (connection_state == C_SYNC_SOURCE | ||
2096 | || connection_state == C_SYNC_TARGET | ||
2097 | || connection_state == C_PAUSED_SYNC_S | ||
2098 | || connection_state == C_PAUSED_SYNC_T); | ||
2099 | } | ||
2100 | |||
1916 | /** | 2101 | /** |
1917 | * get_ldev() - Increase the ref count on device->ldev. Returns 0 if there is no ldev | 2102 | * get_ldev() - Increase the ref count on device->ldev. Returns 0 if there is no ldev |
1918 | * @M: DRBD device. | 2103 | * @M: DRBD device. |
@@ -1924,6 +2109,11 @@ static inline void _sub_unacked(struct drbd_device *device, int n, const char *f | |||
1924 | 2109 | ||
1925 | static inline void put_ldev(struct drbd_device *device) | 2110 | static inline void put_ldev(struct drbd_device *device) |
1926 | { | 2111 | { |
2112 | enum drbd_disk_state ds = device->state.disk; | ||
2113 | /* We must check the state *before* the atomic_dec becomes visible, | ||
2114 | * or we have a theoretical race where someone hitting zero, | ||
2115 | * while state still D_FAILED, will then see D_DISKLESS in the | ||
2116 | * condition below and calling into destroy, where he must not, yet. */ | ||
1927 | int i = atomic_dec_return(&device->local_cnt); | 2117 | int i = atomic_dec_return(&device->local_cnt); |
1928 | 2118 | ||
1929 | /* This may be called from some endio handler, | 2119 | /* This may be called from some endio handler, |
@@ -1932,15 +2122,13 @@ static inline void put_ldev(struct drbd_device *device) | |||
1932 | __release(local); | 2122 | __release(local); |
1933 | D_ASSERT(device, i >= 0); | 2123 | D_ASSERT(device, i >= 0); |
1934 | if (i == 0) { | 2124 | if (i == 0) { |
1935 | if (device->state.disk == D_DISKLESS) | 2125 | if (ds == D_DISKLESS) |
1936 | /* even internal references gone, safe to destroy */ | 2126 | /* even internal references gone, safe to destroy */ |
1937 | drbd_ldev_destroy(device); | 2127 | drbd_device_post_work(device, DESTROY_DISK); |
1938 | if (device->state.disk == D_FAILED) { | 2128 | if (ds == D_FAILED) |
1939 | /* all application IO references gone. */ | 2129 | /* all application IO references gone. */ |
1940 | if (!test_and_set_bit(GO_DISKLESS, &device->flags)) | 2130 | if (!test_and_set_bit(GOING_DISKLESS, &device->flags)) |
1941 | drbd_queue_work(&first_peer_device(device)->connection->sender_work, | 2131 | drbd_device_post_work(device, GO_DISKLESS); |
1942 | &device->go_diskless); | ||
1943 | } | ||
1944 | wake_up(&device->misc_wait); | 2132 | wake_up(&device->misc_wait); |
1945 | } | 2133 | } |
1946 | } | 2134 | } |
@@ -1964,54 +2152,6 @@ static inline int _get_ldev_if_state(struct drbd_device *device, enum drbd_disk_ | |||
1964 | extern int _get_ldev_if_state(struct drbd_device *device, enum drbd_disk_state mins); | 2152 | extern int _get_ldev_if_state(struct drbd_device *device, enum drbd_disk_state mins); |
1965 | #endif | 2153 | #endif |
1966 | 2154 | ||
1967 | /* you must have an "get_ldev" reference */ | ||
1968 | static inline void drbd_get_syncer_progress(struct drbd_device *device, | ||
1969 | unsigned long *bits_left, unsigned int *per_mil_done) | ||
1970 | { | ||
1971 | /* this is to break it at compile time when we change that, in case we | ||
1972 | * want to support more than (1<<32) bits on a 32bit arch. */ | ||
1973 | typecheck(unsigned long, device->rs_total); | ||
1974 | |||
1975 | /* note: both rs_total and rs_left are in bits, i.e. in | ||
1976 | * units of BM_BLOCK_SIZE. | ||
1977 | * for the percentage, we don't care. */ | ||
1978 | |||
1979 | if (device->state.conn == C_VERIFY_S || device->state.conn == C_VERIFY_T) | ||
1980 | *bits_left = device->ov_left; | ||
1981 | else | ||
1982 | *bits_left = drbd_bm_total_weight(device) - device->rs_failed; | ||
1983 | /* >> 10 to prevent overflow, | ||
1984 | * +1 to prevent division by zero */ | ||
1985 | if (*bits_left > device->rs_total) { | ||
1986 | /* doh. maybe a logic bug somewhere. | ||
1987 | * may also be just a race condition | ||
1988 | * between this and a disconnect during sync. | ||
1989 | * for now, just prevent in-kernel buffer overflow. | ||
1990 | */ | ||
1991 | smp_rmb(); | ||
1992 | drbd_warn(device, "cs:%s rs_left=%lu > rs_total=%lu (rs_failed %lu)\n", | ||
1993 | drbd_conn_str(device->state.conn), | ||
1994 | *bits_left, device->rs_total, device->rs_failed); | ||
1995 | *per_mil_done = 0; | ||
1996 | } else { | ||
1997 | /* Make sure the division happens in long context. | ||
1998 | * We allow up to one petabyte storage right now, | ||
1999 | * at a granularity of 4k per bit that is 2**38 bits. | ||
2000 | * After shift right and multiplication by 1000, | ||
2001 | * this should still fit easily into a 32bit long, | ||
2002 | * so we don't need a 64bit division on 32bit arch. | ||
2003 | * Note: currently we don't support such large bitmaps on 32bit | ||
2004 | * arch anyways, but no harm done to be prepared for it here. | ||
2005 | */ | ||
2006 | unsigned int shift = device->rs_total > UINT_MAX ? 16 : 10; | ||
2007 | unsigned long left = *bits_left >> shift; | ||
2008 | unsigned long total = 1UL + (device->rs_total >> shift); | ||
2009 | unsigned long tmp = 1000UL - left * 1000UL/total; | ||
2010 | *per_mil_done = tmp; | ||
2011 | } | ||
2012 | } | ||
2013 | |||
2014 | |||
2015 | /* this throttles on-the-fly application requests | 2155 | /* this throttles on-the-fly application requests |
2016 | * according to max_buffers settings; | 2156 | * according to max_buffers settings; |
2017 | * maybe re-implement using semaphores? */ | 2157 | * maybe re-implement using semaphores? */ |
@@ -2201,25 +2341,6 @@ static inline int drbd_queue_order_type(struct drbd_device *device) | |||
2201 | return QUEUE_ORDERED_NONE; | 2341 | return QUEUE_ORDERED_NONE; |
2202 | } | 2342 | } |
2203 | 2343 | ||
2204 | static inline void drbd_md_flush(struct drbd_device *device) | ||
2205 | { | ||
2206 | int r; | ||
2207 | |||
2208 | if (device->ldev == NULL) { | ||
2209 | drbd_warn(device, "device->ldev == NULL in drbd_md_flush\n"); | ||
2210 | return; | ||
2211 | } | ||
2212 | |||
2213 | if (test_bit(MD_NO_FUA, &device->flags)) | ||
2214 | return; | ||
2215 | |||
2216 | r = blkdev_issue_flush(device->ldev->md_bdev, GFP_NOIO, NULL); | ||
2217 | if (r) { | ||
2218 | set_bit(MD_NO_FUA, &device->flags); | ||
2219 | drbd_err(device, "meta data flush failed with status %d, disabling md-flushes\n", r); | ||
2220 | } | ||
2221 | } | ||
2222 | |||
2223 | static inline struct drbd_connection *first_connection(struct drbd_resource *resource) | 2344 | static inline struct drbd_connection *first_connection(struct drbd_resource *resource) |
2224 | { | 2345 | { |
2225 | return list_first_entry_or_null(&resource->connections, | 2346 | return list_first_entry_or_null(&resource->connections, |
diff --git a/drivers/block/drbd/drbd_interval.h b/drivers/block/drbd/drbd_interval.h index f38fcb00c10d..f210543f05f4 100644 --- a/drivers/block/drbd/drbd_interval.h +++ b/drivers/block/drbd/drbd_interval.h | |||
@@ -10,7 +10,9 @@ struct drbd_interval { | |||
10 | unsigned int size; /* size in bytes */ | 10 | unsigned int size; /* size in bytes */ |
11 | sector_t end; /* highest interval end in subtree */ | 11 | sector_t end; /* highest interval end in subtree */ |
12 | int local:1 /* local or remote request? */; | 12 | int local:1 /* local or remote request? */; |
13 | int waiting:1; | 13 | int waiting:1; /* someone is waiting for this to complete */ |
14 | int completed:1; /* this has been completed already; | ||
15 | * ignore for conflict detection */ | ||
14 | }; | 16 | }; |
15 | 17 | ||
16 | static inline void drbd_clear_interval(struct drbd_interval *i) | 18 | static inline void drbd_clear_interval(struct drbd_interval *i) |
diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c index 960645c26e6f..9b465bb68487 100644 --- a/drivers/block/drbd/drbd_main.c +++ b/drivers/block/drbd/drbd_main.c | |||
@@ -26,7 +26,10 @@ | |||
26 | 26 | ||
27 | */ | 27 | */ |
28 | 28 | ||
29 | #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt | ||
30 | |||
29 | #include <linux/module.h> | 31 | #include <linux/module.h> |
32 | #include <linux/jiffies.h> | ||
30 | #include <linux/drbd.h> | 33 | #include <linux/drbd.h> |
31 | #include <asm/uaccess.h> | 34 | #include <asm/uaccess.h> |
32 | #include <asm/types.h> | 35 | #include <asm/types.h> |
@@ -54,16 +57,14 @@ | |||
54 | #include "drbd_int.h" | 57 | #include "drbd_int.h" |
55 | #include "drbd_protocol.h" | 58 | #include "drbd_protocol.h" |
56 | #include "drbd_req.h" /* only for _req_mod in tl_release and tl_clear */ | 59 | #include "drbd_req.h" /* only for _req_mod in tl_release and tl_clear */ |
57 | |||
58 | #include "drbd_vli.h" | 60 | #include "drbd_vli.h" |
61 | #include "drbd_debugfs.h" | ||
59 | 62 | ||
60 | static DEFINE_MUTEX(drbd_main_mutex); | 63 | static DEFINE_MUTEX(drbd_main_mutex); |
61 | static int drbd_open(struct block_device *bdev, fmode_t mode); | 64 | static int drbd_open(struct block_device *bdev, fmode_t mode); |
62 | static void drbd_release(struct gendisk *gd, fmode_t mode); | 65 | static void drbd_release(struct gendisk *gd, fmode_t mode); |
63 | static int w_md_sync(struct drbd_work *w, int unused); | ||
64 | static void md_sync_timer_fn(unsigned long data); | 66 | static void md_sync_timer_fn(unsigned long data); |
65 | static int w_bitmap_io(struct drbd_work *w, int unused); | 67 | static int w_bitmap_io(struct drbd_work *w, int unused); |
66 | static int w_go_diskless(struct drbd_work *w, int unused); | ||
67 | 68 | ||
68 | MODULE_AUTHOR("Philipp Reisner <phil@linbit.com>, " | 69 | MODULE_AUTHOR("Philipp Reisner <phil@linbit.com>, " |
69 | "Lars Ellenberg <lars@linbit.com>"); | 70 | "Lars Ellenberg <lars@linbit.com>"); |
@@ -264,7 +265,7 @@ bail: | |||
264 | 265 | ||
265 | /** | 266 | /** |
266 | * _tl_restart() - Walks the transfer log, and applies an action to all requests | 267 | * _tl_restart() - Walks the transfer log, and applies an action to all requests |
267 | * @device: DRBD device. | 268 | * @connection: DRBD connection to operate on. |
268 | * @what: The action/event to perform with all request objects | 269 | * @what: The action/event to perform with all request objects |
269 | * | 270 | * |
270 | * @what might be one of CONNECTION_LOST_WHILE_PENDING, RESEND, FAIL_FROZEN_DISK_IO, | 271 | * @what might be one of CONNECTION_LOST_WHILE_PENDING, RESEND, FAIL_FROZEN_DISK_IO, |
@@ -662,6 +663,11 @@ static int __send_command(struct drbd_connection *connection, int vnr, | |||
662 | msg_flags); | 663 | msg_flags); |
663 | if (data && !err) | 664 | if (data && !err) |
664 | err = drbd_send_all(connection, sock->socket, data, size, 0); | 665 | err = drbd_send_all(connection, sock->socket, data, size, 0); |
666 | /* DRBD protocol "pings" are latency critical. | ||
667 | * This is supposed to trigger tcp_push_pending_frames() */ | ||
668 | if (!err && (cmd == P_PING || cmd == P_PING_ACK)) | ||
669 | drbd_tcp_nodelay(sock->socket); | ||
670 | |||
665 | return err; | 671 | return err; |
666 | } | 672 | } |
667 | 673 | ||
@@ -1636,7 +1642,10 @@ int drbd_send_dblock(struct drbd_peer_device *peer_device, struct drbd_request * | |||
1636 | if (peer_device->connection->agreed_pro_version >= 100) { | 1642 | if (peer_device->connection->agreed_pro_version >= 100) { |
1637 | if (req->rq_state & RQ_EXP_RECEIVE_ACK) | 1643 | if (req->rq_state & RQ_EXP_RECEIVE_ACK) |
1638 | dp_flags |= DP_SEND_RECEIVE_ACK; | 1644 | dp_flags |= DP_SEND_RECEIVE_ACK; |
1639 | if (req->rq_state & RQ_EXP_WRITE_ACK) | 1645 | /* During resync, request an explicit write ack, |
1646 | * even in protocol != C */ | ||
1647 | if (req->rq_state & RQ_EXP_WRITE_ACK | ||
1648 | || (dp_flags & DP_MAY_SET_IN_SYNC)) | ||
1640 | dp_flags |= DP_SEND_WRITE_ACK; | 1649 | dp_flags |= DP_SEND_WRITE_ACK; |
1641 | } | 1650 | } |
1642 | p->dp_flags = cpu_to_be32(dp_flags); | 1651 | p->dp_flags = cpu_to_be32(dp_flags); |
@@ -1900,6 +1909,7 @@ void drbd_init_set_defaults(struct drbd_device *device) | |||
1900 | drbd_set_defaults(device); | 1909 | drbd_set_defaults(device); |
1901 | 1910 | ||
1902 | atomic_set(&device->ap_bio_cnt, 0); | 1911 | atomic_set(&device->ap_bio_cnt, 0); |
1912 | atomic_set(&device->ap_actlog_cnt, 0); | ||
1903 | atomic_set(&device->ap_pending_cnt, 0); | 1913 | atomic_set(&device->ap_pending_cnt, 0); |
1904 | atomic_set(&device->rs_pending_cnt, 0); | 1914 | atomic_set(&device->rs_pending_cnt, 0); |
1905 | atomic_set(&device->unacked_cnt, 0); | 1915 | atomic_set(&device->unacked_cnt, 0); |
@@ -1908,7 +1918,7 @@ void drbd_init_set_defaults(struct drbd_device *device) | |||
1908 | atomic_set(&device->rs_sect_in, 0); | 1918 | atomic_set(&device->rs_sect_in, 0); |
1909 | atomic_set(&device->rs_sect_ev, 0); | 1919 | atomic_set(&device->rs_sect_ev, 0); |
1910 | atomic_set(&device->ap_in_flight, 0); | 1920 | atomic_set(&device->ap_in_flight, 0); |
1911 | atomic_set(&device->md_io_in_use, 0); | 1921 | atomic_set(&device->md_io.in_use, 0); |
1912 | 1922 | ||
1913 | mutex_init(&device->own_state_mutex); | 1923 | mutex_init(&device->own_state_mutex); |
1914 | device->state_mutex = &device->own_state_mutex; | 1924 | device->state_mutex = &device->own_state_mutex; |
@@ -1924,17 +1934,15 @@ void drbd_init_set_defaults(struct drbd_device *device) | |||
1924 | INIT_LIST_HEAD(&device->resync_reads); | 1934 | INIT_LIST_HEAD(&device->resync_reads); |
1925 | INIT_LIST_HEAD(&device->resync_work.list); | 1935 | INIT_LIST_HEAD(&device->resync_work.list); |
1926 | INIT_LIST_HEAD(&device->unplug_work.list); | 1936 | INIT_LIST_HEAD(&device->unplug_work.list); |
1927 | INIT_LIST_HEAD(&device->go_diskless.list); | ||
1928 | INIT_LIST_HEAD(&device->md_sync_work.list); | ||
1929 | INIT_LIST_HEAD(&device->start_resync_work.list); | ||
1930 | INIT_LIST_HEAD(&device->bm_io_work.w.list); | 1937 | INIT_LIST_HEAD(&device->bm_io_work.w.list); |
1938 | INIT_LIST_HEAD(&device->pending_master_completion[0]); | ||
1939 | INIT_LIST_HEAD(&device->pending_master_completion[1]); | ||
1940 | INIT_LIST_HEAD(&device->pending_completion[0]); | ||
1941 | INIT_LIST_HEAD(&device->pending_completion[1]); | ||
1931 | 1942 | ||
1932 | device->resync_work.cb = w_resync_timer; | 1943 | device->resync_work.cb = w_resync_timer; |
1933 | device->unplug_work.cb = w_send_write_hint; | 1944 | device->unplug_work.cb = w_send_write_hint; |
1934 | device->go_diskless.cb = w_go_diskless; | ||
1935 | device->md_sync_work.cb = w_md_sync; | ||
1936 | device->bm_io_work.w.cb = w_bitmap_io; | 1945 | device->bm_io_work.w.cb = w_bitmap_io; |
1937 | device->start_resync_work.cb = w_start_resync; | ||
1938 | 1946 | ||
1939 | init_timer(&device->resync_timer); | 1947 | init_timer(&device->resync_timer); |
1940 | init_timer(&device->md_sync_timer); | 1948 | init_timer(&device->md_sync_timer); |
@@ -1992,7 +2000,7 @@ void drbd_device_cleanup(struct drbd_device *device) | |||
1992 | drbd_bm_cleanup(device); | 2000 | drbd_bm_cleanup(device); |
1993 | } | 2001 | } |
1994 | 2002 | ||
1995 | drbd_free_bc(device->ldev); | 2003 | drbd_free_ldev(device->ldev); |
1996 | device->ldev = NULL; | 2004 | device->ldev = NULL; |
1997 | 2005 | ||
1998 | clear_bit(AL_SUSPENDED, &device->flags); | 2006 | clear_bit(AL_SUSPENDED, &device->flags); |
@@ -2006,7 +2014,6 @@ void drbd_device_cleanup(struct drbd_device *device) | |||
2006 | D_ASSERT(device, list_empty(&first_peer_device(device)->connection->sender_work.q)); | 2014 | D_ASSERT(device, list_empty(&first_peer_device(device)->connection->sender_work.q)); |
2007 | D_ASSERT(device, list_empty(&device->resync_work.list)); | 2015 | D_ASSERT(device, list_empty(&device->resync_work.list)); |
2008 | D_ASSERT(device, list_empty(&device->unplug_work.list)); | 2016 | D_ASSERT(device, list_empty(&device->unplug_work.list)); |
2009 | D_ASSERT(device, list_empty(&device->go_diskless.list)); | ||
2010 | 2017 | ||
2011 | drbd_set_defaults(device); | 2018 | drbd_set_defaults(device); |
2012 | } | 2019 | } |
@@ -2129,20 +2136,6 @@ Enomem: | |||
2129 | return -ENOMEM; | 2136 | return -ENOMEM; |
2130 | } | 2137 | } |
2131 | 2138 | ||
2132 | static int drbd_notify_sys(struct notifier_block *this, unsigned long code, | ||
2133 | void *unused) | ||
2134 | { | ||
2135 | /* just so we have it. you never know what interesting things we | ||
2136 | * might want to do here some day... | ||
2137 | */ | ||
2138 | |||
2139 | return NOTIFY_DONE; | ||
2140 | } | ||
2141 | |||
2142 | static struct notifier_block drbd_notifier = { | ||
2143 | .notifier_call = drbd_notify_sys, | ||
2144 | }; | ||
2145 | |||
2146 | static void drbd_release_all_peer_reqs(struct drbd_device *device) | 2139 | static void drbd_release_all_peer_reqs(struct drbd_device *device) |
2147 | { | 2140 | { |
2148 | int rr; | 2141 | int rr; |
@@ -2173,7 +2166,7 @@ void drbd_destroy_device(struct kref *kref) | |||
2173 | { | 2166 | { |
2174 | struct drbd_device *device = container_of(kref, struct drbd_device, kref); | 2167 | struct drbd_device *device = container_of(kref, struct drbd_device, kref); |
2175 | struct drbd_resource *resource = device->resource; | 2168 | struct drbd_resource *resource = device->resource; |
2176 | struct drbd_connection *connection; | 2169 | struct drbd_peer_device *peer_device, *tmp_peer_device; |
2177 | 2170 | ||
2178 | del_timer_sync(&device->request_timer); | 2171 | del_timer_sync(&device->request_timer); |
2179 | 2172 | ||
@@ -2187,7 +2180,7 @@ void drbd_destroy_device(struct kref *kref) | |||
2187 | if (device->this_bdev) | 2180 | if (device->this_bdev) |
2188 | bdput(device->this_bdev); | 2181 | bdput(device->this_bdev); |
2189 | 2182 | ||
2190 | drbd_free_bc(device->ldev); | 2183 | drbd_free_ldev(device->ldev); |
2191 | device->ldev = NULL; | 2184 | device->ldev = NULL; |
2192 | 2185 | ||
2193 | drbd_release_all_peer_reqs(device); | 2186 | drbd_release_all_peer_reqs(device); |
@@ -2200,15 +2193,20 @@ void drbd_destroy_device(struct kref *kref) | |||
2200 | 2193 | ||
2201 | if (device->bitmap) /* should no longer be there. */ | 2194 | if (device->bitmap) /* should no longer be there. */ |
2202 | drbd_bm_cleanup(device); | 2195 | drbd_bm_cleanup(device); |
2203 | __free_page(device->md_io_page); | 2196 | __free_page(device->md_io.page); |
2204 | put_disk(device->vdisk); | 2197 | put_disk(device->vdisk); |
2205 | blk_cleanup_queue(device->rq_queue); | 2198 | blk_cleanup_queue(device->rq_queue); |
2206 | kfree(device->rs_plan_s); | 2199 | kfree(device->rs_plan_s); |
2207 | kfree(first_peer_device(device)); | ||
2208 | kfree(device); | ||
2209 | 2200 | ||
2210 | for_each_connection(connection, resource) | 2201 | /* not for_each_connection(connection, resource): |
2211 | kref_put(&connection->kref, drbd_destroy_connection); | 2202 | * those may have been cleaned up and disassociated already. |
2203 | */ | ||
2204 | for_each_peer_device_safe(peer_device, tmp_peer_device, device) { | ||
2205 | kref_put(&peer_device->connection->kref, drbd_destroy_connection); | ||
2206 | kfree(peer_device); | ||
2207 | } | ||
2208 | memset(device, 0xfd, sizeof(*device)); | ||
2209 | kfree(device); | ||
2212 | kref_put(&resource->kref, drbd_destroy_resource); | 2210 | kref_put(&resource->kref, drbd_destroy_resource); |
2213 | } | 2211 | } |
2214 | 2212 | ||
@@ -2236,7 +2234,7 @@ static void do_retry(struct work_struct *ws) | |||
2236 | list_for_each_entry_safe(req, tmp, &writes, tl_requests) { | 2234 | list_for_each_entry_safe(req, tmp, &writes, tl_requests) { |
2237 | struct drbd_device *device = req->device; | 2235 | struct drbd_device *device = req->device; |
2238 | struct bio *bio = req->master_bio; | 2236 | struct bio *bio = req->master_bio; |
2239 | unsigned long start_time = req->start_time; | 2237 | unsigned long start_jif = req->start_jif; |
2240 | bool expected; | 2238 | bool expected; |
2241 | 2239 | ||
2242 | expected = | 2240 | expected = |
@@ -2271,10 +2269,12 @@ static void do_retry(struct work_struct *ws) | |||
2271 | /* We are not just doing generic_make_request(), | 2269 | /* We are not just doing generic_make_request(), |
2272 | * as we want to keep the start_time information. */ | 2270 | * as we want to keep the start_time information. */ |
2273 | inc_ap_bio(device); | 2271 | inc_ap_bio(device); |
2274 | __drbd_make_request(device, bio, start_time); | 2272 | __drbd_make_request(device, bio, start_jif); |
2275 | } | 2273 | } |
2276 | } | 2274 | } |
2277 | 2275 | ||
2276 | /* called via drbd_req_put_completion_ref(), | ||
2277 | * holds resource->req_lock */ | ||
2278 | void drbd_restart_request(struct drbd_request *req) | 2278 | void drbd_restart_request(struct drbd_request *req) |
2279 | { | 2279 | { |
2280 | unsigned long flags; | 2280 | unsigned long flags; |
@@ -2298,6 +2298,7 @@ void drbd_destroy_resource(struct kref *kref) | |||
2298 | idr_destroy(&resource->devices); | 2298 | idr_destroy(&resource->devices); |
2299 | free_cpumask_var(resource->cpu_mask); | 2299 | free_cpumask_var(resource->cpu_mask); |
2300 | kfree(resource->name); | 2300 | kfree(resource->name); |
2301 | memset(resource, 0xf2, sizeof(*resource)); | ||
2301 | kfree(resource); | 2302 | kfree(resource); |
2302 | } | 2303 | } |
2303 | 2304 | ||
@@ -2307,8 +2308,10 @@ void drbd_free_resource(struct drbd_resource *resource) | |||
2307 | 2308 | ||
2308 | for_each_connection_safe(connection, tmp, resource) { | 2309 | for_each_connection_safe(connection, tmp, resource) { |
2309 | list_del(&connection->connections); | 2310 | list_del(&connection->connections); |
2311 | drbd_debugfs_connection_cleanup(connection); | ||
2310 | kref_put(&connection->kref, drbd_destroy_connection); | 2312 | kref_put(&connection->kref, drbd_destroy_connection); |
2311 | } | 2313 | } |
2314 | drbd_debugfs_resource_cleanup(resource); | ||
2312 | kref_put(&resource->kref, drbd_destroy_resource); | 2315 | kref_put(&resource->kref, drbd_destroy_resource); |
2313 | } | 2316 | } |
2314 | 2317 | ||
@@ -2318,8 +2321,6 @@ static void drbd_cleanup(void) | |||
2318 | struct drbd_device *device; | 2321 | struct drbd_device *device; |
2319 | struct drbd_resource *resource, *tmp; | 2322 | struct drbd_resource *resource, *tmp; |
2320 | 2323 | ||
2321 | unregister_reboot_notifier(&drbd_notifier); | ||
2322 | |||
2323 | /* first remove proc, | 2324 | /* first remove proc, |
2324 | * drbdsetup uses it's presence to detect | 2325 | * drbdsetup uses it's presence to detect |
2325 | * whether DRBD is loaded. | 2326 | * whether DRBD is loaded. |
@@ -2335,6 +2336,7 @@ static void drbd_cleanup(void) | |||
2335 | destroy_workqueue(retry.wq); | 2336 | destroy_workqueue(retry.wq); |
2336 | 2337 | ||
2337 | drbd_genl_unregister(); | 2338 | drbd_genl_unregister(); |
2339 | drbd_debugfs_cleanup(); | ||
2338 | 2340 | ||
2339 | idr_for_each_entry(&drbd_devices, device, i) | 2341 | idr_for_each_entry(&drbd_devices, device, i) |
2340 | drbd_delete_device(device); | 2342 | drbd_delete_device(device); |
@@ -2350,7 +2352,7 @@ static void drbd_cleanup(void) | |||
2350 | 2352 | ||
2351 | idr_destroy(&drbd_devices); | 2353 | idr_destroy(&drbd_devices); |
2352 | 2354 | ||
2353 | printk(KERN_INFO "drbd: module cleanup done.\n"); | 2355 | pr_info("module cleanup done.\n"); |
2354 | } | 2356 | } |
2355 | 2357 | ||
2356 | /** | 2358 | /** |
@@ -2539,6 +2541,20 @@ int set_resource_options(struct drbd_resource *resource, struct res_opts *res_op | |||
2539 | if (nr_cpu_ids > 1 && res_opts->cpu_mask[0] != 0) { | 2541 | if (nr_cpu_ids > 1 && res_opts->cpu_mask[0] != 0) { |
2540 | err = bitmap_parse(res_opts->cpu_mask, DRBD_CPU_MASK_SIZE, | 2542 | err = bitmap_parse(res_opts->cpu_mask, DRBD_CPU_MASK_SIZE, |
2541 | cpumask_bits(new_cpu_mask), nr_cpu_ids); | 2543 | cpumask_bits(new_cpu_mask), nr_cpu_ids); |
2544 | if (err == -EOVERFLOW) { | ||
2545 | /* So what. mask it out. */ | ||
2546 | cpumask_var_t tmp_cpu_mask; | ||
2547 | if (zalloc_cpumask_var(&tmp_cpu_mask, GFP_KERNEL)) { | ||
2548 | cpumask_setall(tmp_cpu_mask); | ||
2549 | cpumask_and(new_cpu_mask, new_cpu_mask, tmp_cpu_mask); | ||
2550 | drbd_warn(resource, "Overflow in bitmap_parse(%.12s%s), truncating to %u bits\n", | ||
2551 | res_opts->cpu_mask, | ||
2552 | strlen(res_opts->cpu_mask) > 12 ? "..." : "", | ||
2553 | nr_cpu_ids); | ||
2554 | free_cpumask_var(tmp_cpu_mask); | ||
2555 | err = 0; | ||
2556 | } | ||
2557 | } | ||
2542 | if (err) { | 2558 | if (err) { |
2543 | drbd_warn(resource, "bitmap_parse() failed with %d\n", err); | 2559 | drbd_warn(resource, "bitmap_parse() failed with %d\n", err); |
2544 | /* retcode = ERR_CPU_MASK_PARSE; */ | 2560 | /* retcode = ERR_CPU_MASK_PARSE; */ |
@@ -2579,10 +2595,12 @@ struct drbd_resource *drbd_create_resource(const char *name) | |||
2579 | kref_init(&resource->kref); | 2595 | kref_init(&resource->kref); |
2580 | idr_init(&resource->devices); | 2596 | idr_init(&resource->devices); |
2581 | INIT_LIST_HEAD(&resource->connections); | 2597 | INIT_LIST_HEAD(&resource->connections); |
2598 | resource->write_ordering = WO_bdev_flush; | ||
2582 | list_add_tail_rcu(&resource->resources, &drbd_resources); | 2599 | list_add_tail_rcu(&resource->resources, &drbd_resources); |
2583 | mutex_init(&resource->conf_update); | 2600 | mutex_init(&resource->conf_update); |
2584 | mutex_init(&resource->adm_mutex); | 2601 | mutex_init(&resource->adm_mutex); |
2585 | spin_lock_init(&resource->req_lock); | 2602 | spin_lock_init(&resource->req_lock); |
2603 | drbd_debugfs_resource_add(resource); | ||
2586 | return resource; | 2604 | return resource; |
2587 | 2605 | ||
2588 | fail_free_name: | 2606 | fail_free_name: |
@@ -2593,7 +2611,7 @@ fail: | |||
2593 | return NULL; | 2611 | return NULL; |
2594 | } | 2612 | } |
2595 | 2613 | ||
2596 | /* caller must be under genl_lock() */ | 2614 | /* caller must be under adm_mutex */ |
2597 | struct drbd_connection *conn_create(const char *name, struct res_opts *res_opts) | 2615 | struct drbd_connection *conn_create(const char *name, struct res_opts *res_opts) |
2598 | { | 2616 | { |
2599 | struct drbd_resource *resource; | 2617 | struct drbd_resource *resource; |
@@ -2617,7 +2635,6 @@ struct drbd_connection *conn_create(const char *name, struct res_opts *res_opts) | |||
2617 | INIT_LIST_HEAD(&connection->current_epoch->list); | 2635 | INIT_LIST_HEAD(&connection->current_epoch->list); |
2618 | connection->epochs = 1; | 2636 | connection->epochs = 1; |
2619 | spin_lock_init(&connection->epoch_lock); | 2637 | spin_lock_init(&connection->epoch_lock); |
2620 | connection->write_ordering = WO_bdev_flush; | ||
2621 | 2638 | ||
2622 | connection->send.seen_any_write_yet = false; | 2639 | connection->send.seen_any_write_yet = false; |
2623 | connection->send.current_epoch_nr = 0; | 2640 | connection->send.current_epoch_nr = 0; |
@@ -2652,6 +2669,7 @@ struct drbd_connection *conn_create(const char *name, struct res_opts *res_opts) | |||
2652 | 2669 | ||
2653 | kref_get(&resource->kref); | 2670 | kref_get(&resource->kref); |
2654 | list_add_tail_rcu(&connection->connections, &resource->connections); | 2671 | list_add_tail_rcu(&connection->connections, &resource->connections); |
2672 | drbd_debugfs_connection_add(connection); | ||
2655 | return connection; | 2673 | return connection; |
2656 | 2674 | ||
2657 | fail_resource: | 2675 | fail_resource: |
@@ -2680,6 +2698,7 @@ void drbd_destroy_connection(struct kref *kref) | |||
2680 | drbd_free_socket(&connection->data); | 2698 | drbd_free_socket(&connection->data); |
2681 | kfree(connection->int_dig_in); | 2699 | kfree(connection->int_dig_in); |
2682 | kfree(connection->int_dig_vv); | 2700 | kfree(connection->int_dig_vv); |
2701 | memset(connection, 0xfc, sizeof(*connection)); | ||
2683 | kfree(connection); | 2702 | kfree(connection); |
2684 | kref_put(&resource->kref, drbd_destroy_resource); | 2703 | kref_put(&resource->kref, drbd_destroy_resource); |
2685 | } | 2704 | } |
@@ -2694,7 +2713,6 @@ static int init_submitter(struct drbd_device *device) | |||
2694 | return -ENOMEM; | 2713 | return -ENOMEM; |
2695 | 2714 | ||
2696 | INIT_WORK(&device->submit.worker, do_submit); | 2715 | INIT_WORK(&device->submit.worker, do_submit); |
2697 | spin_lock_init(&device->submit.lock); | ||
2698 | INIT_LIST_HEAD(&device->submit.writes); | 2716 | INIT_LIST_HEAD(&device->submit.writes); |
2699 | return 0; | 2717 | return 0; |
2700 | } | 2718 | } |
@@ -2764,8 +2782,8 @@ enum drbd_ret_code drbd_create_device(struct drbd_config_context *adm_ctx, unsig | |||
2764 | blk_queue_merge_bvec(q, drbd_merge_bvec); | 2782 | blk_queue_merge_bvec(q, drbd_merge_bvec); |
2765 | q->queue_lock = &resource->req_lock; | 2783 | q->queue_lock = &resource->req_lock; |
2766 | 2784 | ||
2767 | device->md_io_page = alloc_page(GFP_KERNEL); | 2785 | device->md_io.page = alloc_page(GFP_KERNEL); |
2768 | if (!device->md_io_page) | 2786 | if (!device->md_io.page) |
2769 | goto out_no_io_page; | 2787 | goto out_no_io_page; |
2770 | 2788 | ||
2771 | if (drbd_bm_init(device)) | 2789 | if (drbd_bm_init(device)) |
@@ -2794,6 +2812,7 @@ enum drbd_ret_code drbd_create_device(struct drbd_config_context *adm_ctx, unsig | |||
2794 | kref_get(&device->kref); | 2812 | kref_get(&device->kref); |
2795 | 2813 | ||
2796 | INIT_LIST_HEAD(&device->peer_devices); | 2814 | INIT_LIST_HEAD(&device->peer_devices); |
2815 | INIT_LIST_HEAD(&device->pending_bitmap_io); | ||
2797 | for_each_connection(connection, resource) { | 2816 | for_each_connection(connection, resource) { |
2798 | peer_device = kzalloc(sizeof(struct drbd_peer_device), GFP_KERNEL); | 2817 | peer_device = kzalloc(sizeof(struct drbd_peer_device), GFP_KERNEL); |
2799 | if (!peer_device) | 2818 | if (!peer_device) |
@@ -2829,7 +2848,10 @@ enum drbd_ret_code drbd_create_device(struct drbd_config_context *adm_ctx, unsig | |||
2829 | for_each_peer_device(peer_device, device) | 2848 | for_each_peer_device(peer_device, device) |
2830 | drbd_connected(peer_device); | 2849 | drbd_connected(peer_device); |
2831 | } | 2850 | } |
2832 | 2851 | /* move to create_peer_device() */ | |
2852 | for_each_peer_device(peer_device, device) | ||
2853 | drbd_debugfs_peer_device_add(peer_device); | ||
2854 | drbd_debugfs_device_add(device); | ||
2833 | return NO_ERROR; | 2855 | return NO_ERROR; |
2834 | 2856 | ||
2835 | out_idr_remove_vol: | 2857 | out_idr_remove_vol: |
@@ -2853,7 +2875,7 @@ out_idr_remove_minor: | |||
2853 | out_no_minor_idr: | 2875 | out_no_minor_idr: |
2854 | drbd_bm_cleanup(device); | 2876 | drbd_bm_cleanup(device); |
2855 | out_no_bitmap: | 2877 | out_no_bitmap: |
2856 | __free_page(device->md_io_page); | 2878 | __free_page(device->md_io.page); |
2857 | out_no_io_page: | 2879 | out_no_io_page: |
2858 | put_disk(disk); | 2880 | put_disk(disk); |
2859 | out_no_disk: | 2881 | out_no_disk: |
@@ -2868,8 +2890,13 @@ void drbd_delete_device(struct drbd_device *device) | |||
2868 | { | 2890 | { |
2869 | struct drbd_resource *resource = device->resource; | 2891 | struct drbd_resource *resource = device->resource; |
2870 | struct drbd_connection *connection; | 2892 | struct drbd_connection *connection; |
2893 | struct drbd_peer_device *peer_device; | ||
2871 | int refs = 3; | 2894 | int refs = 3; |
2872 | 2895 | ||
2896 | /* move to free_peer_device() */ | ||
2897 | for_each_peer_device(peer_device, device) | ||
2898 | drbd_debugfs_peer_device_cleanup(peer_device); | ||
2899 | drbd_debugfs_device_cleanup(device); | ||
2873 | for_each_connection(connection, resource) { | 2900 | for_each_connection(connection, resource) { |
2874 | idr_remove(&connection->peer_devices, device->vnr); | 2901 | idr_remove(&connection->peer_devices, device->vnr); |
2875 | refs++; | 2902 | refs++; |
@@ -2881,13 +2908,12 @@ void drbd_delete_device(struct drbd_device *device) | |||
2881 | kref_sub(&device->kref, refs, drbd_destroy_device); | 2908 | kref_sub(&device->kref, refs, drbd_destroy_device); |
2882 | } | 2909 | } |
2883 | 2910 | ||
2884 | int __init drbd_init(void) | 2911 | static int __init drbd_init(void) |
2885 | { | 2912 | { |
2886 | int err; | 2913 | int err; |
2887 | 2914 | ||
2888 | if (minor_count < DRBD_MINOR_COUNT_MIN || minor_count > DRBD_MINOR_COUNT_MAX) { | 2915 | if (minor_count < DRBD_MINOR_COUNT_MIN || minor_count > DRBD_MINOR_COUNT_MAX) { |
2889 | printk(KERN_ERR | 2916 | pr_err("invalid minor_count (%d)\n", minor_count); |
2890 | "drbd: invalid minor_count (%d)\n", minor_count); | ||
2891 | #ifdef MODULE | 2917 | #ifdef MODULE |
2892 | return -EINVAL; | 2918 | return -EINVAL; |
2893 | #else | 2919 | #else |
@@ -2897,14 +2923,11 @@ int __init drbd_init(void) | |||
2897 | 2923 | ||
2898 | err = register_blkdev(DRBD_MAJOR, "drbd"); | 2924 | err = register_blkdev(DRBD_MAJOR, "drbd"); |
2899 | if (err) { | 2925 | if (err) { |
2900 | printk(KERN_ERR | 2926 | pr_err("unable to register block device major %d\n", |
2901 | "drbd: unable to register block device major %d\n", | ||
2902 | DRBD_MAJOR); | 2927 | DRBD_MAJOR); |
2903 | return err; | 2928 | return err; |
2904 | } | 2929 | } |
2905 | 2930 | ||
2906 | register_reboot_notifier(&drbd_notifier); | ||
2907 | |||
2908 | /* | 2931 | /* |
2909 | * allocate all necessary structs | 2932 | * allocate all necessary structs |
2910 | */ | 2933 | */ |
@@ -2918,7 +2941,7 @@ int __init drbd_init(void) | |||
2918 | 2941 | ||
2919 | err = drbd_genl_register(); | 2942 | err = drbd_genl_register(); |
2920 | if (err) { | 2943 | if (err) { |
2921 | printk(KERN_ERR "drbd: unable to register generic netlink family\n"); | 2944 | pr_err("unable to register generic netlink family\n"); |
2922 | goto fail; | 2945 | goto fail; |
2923 | } | 2946 | } |
2924 | 2947 | ||
@@ -2929,38 +2952,39 @@ int __init drbd_init(void) | |||
2929 | err = -ENOMEM; | 2952 | err = -ENOMEM; |
2930 | drbd_proc = proc_create_data("drbd", S_IFREG | S_IRUGO , NULL, &drbd_proc_fops, NULL); | 2953 | drbd_proc = proc_create_data("drbd", S_IFREG | S_IRUGO , NULL, &drbd_proc_fops, NULL); |
2931 | if (!drbd_proc) { | 2954 | if (!drbd_proc) { |
2932 | printk(KERN_ERR "drbd: unable to register proc file\n"); | 2955 | pr_err("unable to register proc file\n"); |
2933 | goto fail; | 2956 | goto fail; |
2934 | } | 2957 | } |
2935 | 2958 | ||
2936 | retry.wq = create_singlethread_workqueue("drbd-reissue"); | 2959 | retry.wq = create_singlethread_workqueue("drbd-reissue"); |
2937 | if (!retry.wq) { | 2960 | if (!retry.wq) { |
2938 | printk(KERN_ERR "drbd: unable to create retry workqueue\n"); | 2961 | pr_err("unable to create retry workqueue\n"); |
2939 | goto fail; | 2962 | goto fail; |
2940 | } | 2963 | } |
2941 | INIT_WORK(&retry.worker, do_retry); | 2964 | INIT_WORK(&retry.worker, do_retry); |
2942 | spin_lock_init(&retry.lock); | 2965 | spin_lock_init(&retry.lock); |
2943 | INIT_LIST_HEAD(&retry.writes); | 2966 | INIT_LIST_HEAD(&retry.writes); |
2944 | 2967 | ||
2945 | printk(KERN_INFO "drbd: initialized. " | 2968 | if (drbd_debugfs_init()) |
2969 | pr_notice("failed to initialize debugfs -- will not be available\n"); | ||
2970 | |||
2971 | pr_info("initialized. " | ||
2946 | "Version: " REL_VERSION " (api:%d/proto:%d-%d)\n", | 2972 | "Version: " REL_VERSION " (api:%d/proto:%d-%d)\n", |
2947 | API_VERSION, PRO_VERSION_MIN, PRO_VERSION_MAX); | 2973 | API_VERSION, PRO_VERSION_MIN, PRO_VERSION_MAX); |
2948 | printk(KERN_INFO "drbd: %s\n", drbd_buildtag()); | 2974 | pr_info("%s\n", drbd_buildtag()); |
2949 | printk(KERN_INFO "drbd: registered as block device major %d\n", | 2975 | pr_info("registered as block device major %d\n", DRBD_MAJOR); |
2950 | DRBD_MAJOR); | ||
2951 | |||
2952 | return 0; /* Success! */ | 2976 | return 0; /* Success! */ |
2953 | 2977 | ||
2954 | fail: | 2978 | fail: |
2955 | drbd_cleanup(); | 2979 | drbd_cleanup(); |
2956 | if (err == -ENOMEM) | 2980 | if (err == -ENOMEM) |
2957 | printk(KERN_ERR "drbd: ran out of memory\n"); | 2981 | pr_err("ran out of memory\n"); |
2958 | else | 2982 | else |
2959 | printk(KERN_ERR "drbd: initialization failure\n"); | 2983 | pr_err("initialization failure\n"); |
2960 | return err; | 2984 | return err; |
2961 | } | 2985 | } |
2962 | 2986 | ||
2963 | void drbd_free_bc(struct drbd_backing_dev *ldev) | 2987 | void drbd_free_ldev(struct drbd_backing_dev *ldev) |
2964 | { | 2988 | { |
2965 | if (ldev == NULL) | 2989 | if (ldev == NULL) |
2966 | return; | 2990 | return; |
@@ -2972,24 +2996,29 @@ void drbd_free_bc(struct drbd_backing_dev *ldev) | |||
2972 | kfree(ldev); | 2996 | kfree(ldev); |
2973 | } | 2997 | } |
2974 | 2998 | ||
2975 | void drbd_free_sock(struct drbd_connection *connection) | 2999 | static void drbd_free_one_sock(struct drbd_socket *ds) |
2976 | { | 3000 | { |
2977 | if (connection->data.socket) { | 3001 | struct socket *s; |
2978 | mutex_lock(&connection->data.mutex); | 3002 | mutex_lock(&ds->mutex); |
2979 | kernel_sock_shutdown(connection->data.socket, SHUT_RDWR); | 3003 | s = ds->socket; |
2980 | sock_release(connection->data.socket); | 3004 | ds->socket = NULL; |
2981 | connection->data.socket = NULL; | 3005 | mutex_unlock(&ds->mutex); |
2982 | mutex_unlock(&connection->data.mutex); | 3006 | if (s) { |
2983 | } | 3007 | /* so debugfs does not need to mutex_lock() */ |
2984 | if (connection->meta.socket) { | 3008 | synchronize_rcu(); |
2985 | mutex_lock(&connection->meta.mutex); | 3009 | kernel_sock_shutdown(s, SHUT_RDWR); |
2986 | kernel_sock_shutdown(connection->meta.socket, SHUT_RDWR); | 3010 | sock_release(s); |
2987 | sock_release(connection->meta.socket); | ||
2988 | connection->meta.socket = NULL; | ||
2989 | mutex_unlock(&connection->meta.mutex); | ||
2990 | } | 3011 | } |
2991 | } | 3012 | } |
2992 | 3013 | ||
3014 | void drbd_free_sock(struct drbd_connection *connection) | ||
3015 | { | ||
3016 | if (connection->data.socket) | ||
3017 | drbd_free_one_sock(&connection->data); | ||
3018 | if (connection->meta.socket) | ||
3019 | drbd_free_one_sock(&connection->meta); | ||
3020 | } | ||
3021 | |||
2993 | /* meta data management */ | 3022 | /* meta data management */ |
2994 | 3023 | ||
2995 | void conn_md_sync(struct drbd_connection *connection) | 3024 | void conn_md_sync(struct drbd_connection *connection) |
@@ -3093,7 +3122,7 @@ void drbd_md_sync(struct drbd_device *device) | |||
3093 | if (!get_ldev_if_state(device, D_FAILED)) | 3122 | if (!get_ldev_if_state(device, D_FAILED)) |
3094 | return; | 3123 | return; |
3095 | 3124 | ||
3096 | buffer = drbd_md_get_buffer(device); | 3125 | buffer = drbd_md_get_buffer(device, __func__); |
3097 | if (!buffer) | 3126 | if (!buffer) |
3098 | goto out; | 3127 | goto out; |
3099 | 3128 | ||
@@ -3253,7 +3282,7 @@ int drbd_md_read(struct drbd_device *device, struct drbd_backing_dev *bdev) | |||
3253 | if (device->state.disk != D_DISKLESS) | 3282 | if (device->state.disk != D_DISKLESS) |
3254 | return ERR_DISK_CONFIGURED; | 3283 | return ERR_DISK_CONFIGURED; |
3255 | 3284 | ||
3256 | buffer = drbd_md_get_buffer(device); | 3285 | buffer = drbd_md_get_buffer(device, __func__); |
3257 | if (!buffer) | 3286 | if (!buffer) |
3258 | return ERR_NOMEM; | 3287 | return ERR_NOMEM; |
3259 | 3288 | ||
@@ -3466,23 +3495,19 @@ void drbd_uuid_set_bm(struct drbd_device *device, u64 val) __must_hold(local) | |||
3466 | * | 3495 | * |
3467 | * Sets all bits in the bitmap and writes the whole bitmap to stable storage. | 3496 | * Sets all bits in the bitmap and writes the whole bitmap to stable storage. |
3468 | */ | 3497 | */ |
3469 | int drbd_bmio_set_n_write(struct drbd_device *device) | 3498 | int drbd_bmio_set_n_write(struct drbd_device *device) __must_hold(local) |
3470 | { | 3499 | { |
3471 | int rv = -EIO; | 3500 | int rv = -EIO; |
3472 | 3501 | ||
3473 | if (get_ldev_if_state(device, D_ATTACHING)) { | 3502 | drbd_md_set_flag(device, MDF_FULL_SYNC); |
3474 | drbd_md_set_flag(device, MDF_FULL_SYNC); | 3503 | drbd_md_sync(device); |
3475 | drbd_md_sync(device); | 3504 | drbd_bm_set_all(device); |
3476 | drbd_bm_set_all(device); | ||
3477 | |||
3478 | rv = drbd_bm_write(device); | ||
3479 | 3505 | ||
3480 | if (!rv) { | 3506 | rv = drbd_bm_write(device); |
3481 | drbd_md_clear_flag(device, MDF_FULL_SYNC); | ||
3482 | drbd_md_sync(device); | ||
3483 | } | ||
3484 | 3507 | ||
3485 | put_ldev(device); | 3508 | if (!rv) { |
3509 | drbd_md_clear_flag(device, MDF_FULL_SYNC); | ||
3510 | drbd_md_sync(device); | ||
3486 | } | 3511 | } |
3487 | 3512 | ||
3488 | return rv; | 3513 | return rv; |
@@ -3494,18 +3519,11 @@ int drbd_bmio_set_n_write(struct drbd_device *device) | |||
3494 | * | 3519 | * |
3495 | * Clears all bits in the bitmap and writes the whole bitmap to stable storage. | 3520 | * Clears all bits in the bitmap and writes the whole bitmap to stable storage. |
3496 | */ | 3521 | */ |
3497 | int drbd_bmio_clear_n_write(struct drbd_device *device) | 3522 | int drbd_bmio_clear_n_write(struct drbd_device *device) __must_hold(local) |
3498 | { | 3523 | { |
3499 | int rv = -EIO; | ||
3500 | |||
3501 | drbd_resume_al(device); | 3524 | drbd_resume_al(device); |
3502 | if (get_ldev_if_state(device, D_ATTACHING)) { | 3525 | drbd_bm_clear_all(device); |
3503 | drbd_bm_clear_all(device); | 3526 | return drbd_bm_write(device); |
3504 | rv = drbd_bm_write(device); | ||
3505 | put_ldev(device); | ||
3506 | } | ||
3507 | |||
3508 | return rv; | ||
3509 | } | 3527 | } |
3510 | 3528 | ||
3511 | static int w_bitmap_io(struct drbd_work *w, int unused) | 3529 | static int w_bitmap_io(struct drbd_work *w, int unused) |
@@ -3537,61 +3555,6 @@ static int w_bitmap_io(struct drbd_work *w, int unused) | |||
3537 | return 0; | 3555 | return 0; |
3538 | } | 3556 | } |
3539 | 3557 | ||
3540 | void drbd_ldev_destroy(struct drbd_device *device) | ||
3541 | { | ||
3542 | lc_destroy(device->resync); | ||
3543 | device->resync = NULL; | ||
3544 | lc_destroy(device->act_log); | ||
3545 | device->act_log = NULL; | ||
3546 | __no_warn(local, | ||
3547 | drbd_free_bc(device->ldev); | ||
3548 | device->ldev = NULL;); | ||
3549 | |||
3550 | clear_bit(GO_DISKLESS, &device->flags); | ||
3551 | } | ||
3552 | |||
3553 | static int w_go_diskless(struct drbd_work *w, int unused) | ||
3554 | { | ||
3555 | struct drbd_device *device = | ||
3556 | container_of(w, struct drbd_device, go_diskless); | ||
3557 | |||
3558 | D_ASSERT(device, device->state.disk == D_FAILED); | ||
3559 | /* we cannot assert local_cnt == 0 here, as get_ldev_if_state will | ||
3560 | * inc/dec it frequently. Once we are D_DISKLESS, no one will touch | ||
3561 | * the protected members anymore, though, so once put_ldev reaches zero | ||
3562 | * again, it will be safe to free them. */ | ||
3563 | |||
3564 | /* Try to write changed bitmap pages, read errors may have just | ||
3565 | * set some bits outside the area covered by the activity log. | ||
3566 | * | ||
3567 | * If we have an IO error during the bitmap writeout, | ||
3568 | * we will want a full sync next time, just in case. | ||
3569 | * (Do we want a specific meta data flag for this?) | ||
3570 | * | ||
3571 | * If that does not make it to stable storage either, | ||
3572 | * we cannot do anything about that anymore. | ||
3573 | * | ||
3574 | * We still need to check if both bitmap and ldev are present, we may | ||
3575 | * end up here after a failed attach, before ldev was even assigned. | ||
3576 | */ | ||
3577 | if (device->bitmap && device->ldev) { | ||
3578 | /* An interrupted resync or similar is allowed to recounts bits | ||
3579 | * while we detach. | ||
3580 | * Any modifications would not be expected anymore, though. | ||
3581 | */ | ||
3582 | if (drbd_bitmap_io_from_worker(device, drbd_bm_write, | ||
3583 | "detach", BM_LOCKED_TEST_ALLOWED)) { | ||
3584 | if (test_bit(WAS_READ_ERROR, &device->flags)) { | ||
3585 | drbd_md_set_flag(device, MDF_FULL_SYNC); | ||
3586 | drbd_md_sync(device); | ||
3587 | } | ||
3588 | } | ||
3589 | } | ||
3590 | |||
3591 | drbd_force_state(device, NS(disk, D_DISKLESS)); | ||
3592 | return 0; | ||
3593 | } | ||
3594 | |||
3595 | /** | 3558 | /** |
3596 | * drbd_queue_bitmap_io() - Queues an IO operation on the whole bitmap | 3559 | * drbd_queue_bitmap_io() - Queues an IO operation on the whole bitmap |
3597 | * @device: DRBD device. | 3560 | * @device: DRBD device. |
@@ -3603,6 +3566,9 @@ static int w_go_diskless(struct drbd_work *w, int unused) | |||
3603 | * that drbd_set_out_of_sync() can not be called. This function MAY ONLY be | 3566 | * that drbd_set_out_of_sync() can not be called. This function MAY ONLY be |
3604 | * called from worker context. It MUST NOT be used while a previous such | 3567 | * called from worker context. It MUST NOT be used while a previous such |
3605 | * work is still pending! | 3568 | * work is still pending! |
3569 | * | ||
3570 | * Its worker function encloses the call of io_fn() by get_ldev() and | ||
3571 | * put_ldev(). | ||
3606 | */ | 3572 | */ |
3607 | void drbd_queue_bitmap_io(struct drbd_device *device, | 3573 | void drbd_queue_bitmap_io(struct drbd_device *device, |
3608 | int (*io_fn)(struct drbd_device *), | 3574 | int (*io_fn)(struct drbd_device *), |
@@ -3685,25 +3651,7 @@ int drbd_md_test_flag(struct drbd_backing_dev *bdev, int flag) | |||
3685 | static void md_sync_timer_fn(unsigned long data) | 3651 | static void md_sync_timer_fn(unsigned long data) |
3686 | { | 3652 | { |
3687 | struct drbd_device *device = (struct drbd_device *) data; | 3653 | struct drbd_device *device = (struct drbd_device *) data; |
3688 | 3654 | drbd_device_post_work(device, MD_SYNC); | |
3689 | /* must not double-queue! */ | ||
3690 | if (list_empty(&device->md_sync_work.list)) | ||
3691 | drbd_queue_work_front(&first_peer_device(device)->connection->sender_work, | ||
3692 | &device->md_sync_work); | ||
3693 | } | ||
3694 | |||
3695 | static int w_md_sync(struct drbd_work *w, int unused) | ||
3696 | { | ||
3697 | struct drbd_device *device = | ||
3698 | container_of(w, struct drbd_device, md_sync_work); | ||
3699 | |||
3700 | drbd_warn(device, "md_sync_timer expired! Worker calls drbd_md_sync().\n"); | ||
3701 | #ifdef DEBUG | ||
3702 | drbd_warn(device, "last md_mark_dirty: %s:%u\n", | ||
3703 | device->last_md_mark_dirty.func, device->last_md_mark_dirty.line); | ||
3704 | #endif | ||
3705 | drbd_md_sync(device); | ||
3706 | return 0; | ||
3707 | } | 3655 | } |
3708 | 3656 | ||
3709 | const char *cmdname(enum drbd_packet cmd) | 3657 | const char *cmdname(enum drbd_packet cmd) |
diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c index 3f2e16738080..1cd47df44bda 100644 --- a/drivers/block/drbd/drbd_nl.c +++ b/drivers/block/drbd/drbd_nl.c | |||
@@ -23,6 +23,8 @@ | |||
23 | 23 | ||
24 | */ | 24 | */ |
25 | 25 | ||
26 | #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt | ||
27 | |||
26 | #include <linux/module.h> | 28 | #include <linux/module.h> |
27 | #include <linux/drbd.h> | 29 | #include <linux/drbd.h> |
28 | #include <linux/in.h> | 30 | #include <linux/in.h> |
@@ -85,7 +87,7 @@ static void drbd_adm_send_reply(struct sk_buff *skb, struct genl_info *info) | |||
85 | { | 87 | { |
86 | genlmsg_end(skb, genlmsg_data(nlmsg_data(nlmsg_hdr(skb)))); | 88 | genlmsg_end(skb, genlmsg_data(nlmsg_data(nlmsg_hdr(skb)))); |
87 | if (genlmsg_reply(skb, info)) | 89 | if (genlmsg_reply(skb, info)) |
88 | printk(KERN_ERR "drbd: error sending genl reply\n"); | 90 | pr_err("error sending genl reply\n"); |
89 | } | 91 | } |
90 | 92 | ||
91 | /* Used on a fresh "drbd_adm_prepare"d reply_skb, this cannot fail: The only | 93 | /* Used on a fresh "drbd_adm_prepare"d reply_skb, this cannot fail: The only |
@@ -558,8 +560,10 @@ void conn_try_outdate_peer_async(struct drbd_connection *connection) | |||
558 | } | 560 | } |
559 | 561 | ||
560 | enum drbd_state_rv | 562 | enum drbd_state_rv |
561 | drbd_set_role(struct drbd_device *device, enum drbd_role new_role, int force) | 563 | drbd_set_role(struct drbd_device *const device, enum drbd_role new_role, int force) |
562 | { | 564 | { |
565 | struct drbd_peer_device *const peer_device = first_peer_device(device); | ||
566 | struct drbd_connection *const connection = peer_device ? peer_device->connection : NULL; | ||
563 | const int max_tries = 4; | 567 | const int max_tries = 4; |
564 | enum drbd_state_rv rv = SS_UNKNOWN_ERROR; | 568 | enum drbd_state_rv rv = SS_UNKNOWN_ERROR; |
565 | struct net_conf *nc; | 569 | struct net_conf *nc; |
@@ -607,7 +611,7 @@ drbd_set_role(struct drbd_device *device, enum drbd_role new_role, int force) | |||
607 | device->state.disk == D_CONSISTENT && mask.pdsk == 0) { | 611 | device->state.disk == D_CONSISTENT && mask.pdsk == 0) { |
608 | D_ASSERT(device, device->state.pdsk == D_UNKNOWN); | 612 | D_ASSERT(device, device->state.pdsk == D_UNKNOWN); |
609 | 613 | ||
610 | if (conn_try_outdate_peer(first_peer_device(device)->connection)) { | 614 | if (conn_try_outdate_peer(connection)) { |
611 | val.disk = D_UP_TO_DATE; | 615 | val.disk = D_UP_TO_DATE; |
612 | mask.disk = D_MASK; | 616 | mask.disk = D_MASK; |
613 | } | 617 | } |
@@ -617,7 +621,7 @@ drbd_set_role(struct drbd_device *device, enum drbd_role new_role, int force) | |||
617 | if (rv == SS_NOTHING_TO_DO) | 621 | if (rv == SS_NOTHING_TO_DO) |
618 | goto out; | 622 | goto out; |
619 | if (rv == SS_PRIMARY_NOP && mask.pdsk == 0) { | 623 | if (rv == SS_PRIMARY_NOP && mask.pdsk == 0) { |
620 | if (!conn_try_outdate_peer(first_peer_device(device)->connection) && force) { | 624 | if (!conn_try_outdate_peer(connection) && force) { |
621 | drbd_warn(device, "Forced into split brain situation!\n"); | 625 | drbd_warn(device, "Forced into split brain situation!\n"); |
622 | mask.pdsk = D_MASK; | 626 | mask.pdsk = D_MASK; |
623 | val.pdsk = D_OUTDATED; | 627 | val.pdsk = D_OUTDATED; |
@@ -630,7 +634,7 @@ drbd_set_role(struct drbd_device *device, enum drbd_role new_role, int force) | |||
630 | retry at most once more in this case. */ | 634 | retry at most once more in this case. */ |
631 | int timeo; | 635 | int timeo; |
632 | rcu_read_lock(); | 636 | rcu_read_lock(); |
633 | nc = rcu_dereference(first_peer_device(device)->connection->net_conf); | 637 | nc = rcu_dereference(connection->net_conf); |
634 | timeo = nc ? (nc->ping_timeo + 1) * HZ / 10 : 1; | 638 | timeo = nc ? (nc->ping_timeo + 1) * HZ / 10 : 1; |
635 | rcu_read_unlock(); | 639 | rcu_read_unlock(); |
636 | schedule_timeout_interruptible(timeo); | 640 | schedule_timeout_interruptible(timeo); |
@@ -659,19 +663,17 @@ drbd_set_role(struct drbd_device *device, enum drbd_role new_role, int force) | |||
659 | /* FIXME also wait for all pending P_BARRIER_ACK? */ | 663 | /* FIXME also wait for all pending P_BARRIER_ACK? */ |
660 | 664 | ||
661 | if (new_role == R_SECONDARY) { | 665 | if (new_role == R_SECONDARY) { |
662 | set_disk_ro(device->vdisk, true); | ||
663 | if (get_ldev(device)) { | 666 | if (get_ldev(device)) { |
664 | device->ldev->md.uuid[UI_CURRENT] &= ~(u64)1; | 667 | device->ldev->md.uuid[UI_CURRENT] &= ~(u64)1; |
665 | put_ldev(device); | 668 | put_ldev(device); |
666 | } | 669 | } |
667 | } else { | 670 | } else { |
668 | /* Called from drbd_adm_set_role only. | 671 | mutex_lock(&device->resource->conf_update); |
669 | * We are still holding the conf_update mutex. */ | 672 | nc = connection->net_conf; |
670 | nc = first_peer_device(device)->connection->net_conf; | ||
671 | if (nc) | 673 | if (nc) |
672 | nc->discard_my_data = 0; /* without copy; single bit op is atomic */ | 674 | nc->discard_my_data = 0; /* without copy; single bit op is atomic */ |
675 | mutex_unlock(&device->resource->conf_update); | ||
673 | 676 | ||
674 | set_disk_ro(device->vdisk, false); | ||
675 | if (get_ldev(device)) { | 677 | if (get_ldev(device)) { |
676 | if (((device->state.conn < C_CONNECTED || | 678 | if (((device->state.conn < C_CONNECTED || |
677 | device->state.pdsk <= D_FAILED) | 679 | device->state.pdsk <= D_FAILED) |
@@ -689,12 +691,12 @@ drbd_set_role(struct drbd_device *device, enum drbd_role new_role, int force) | |||
689 | if (device->state.conn >= C_WF_REPORT_PARAMS) { | 691 | if (device->state.conn >= C_WF_REPORT_PARAMS) { |
690 | /* if this was forced, we should consider sync */ | 692 | /* if this was forced, we should consider sync */ |
691 | if (forced) | 693 | if (forced) |
692 | drbd_send_uuids(first_peer_device(device)); | 694 | drbd_send_uuids(peer_device); |
693 | drbd_send_current_state(first_peer_device(device)); | 695 | drbd_send_current_state(peer_device); |
694 | } | 696 | } |
695 | 697 | ||
696 | drbd_md_sync(device); | 698 | drbd_md_sync(device); |
697 | 699 | set_disk_ro(device->vdisk, new_role == R_SECONDARY); | |
698 | kobject_uevent(&disk_to_dev(device->vdisk)->kobj, KOBJ_CHANGE); | 700 | kobject_uevent(&disk_to_dev(device->vdisk)->kobj, KOBJ_CHANGE); |
699 | out: | 701 | out: |
700 | mutex_unlock(device->state_mutex); | 702 | mutex_unlock(device->state_mutex); |
@@ -891,7 +893,7 @@ drbd_determine_dev_size(struct drbd_device *device, enum dds_flags flags, struct | |||
891 | * still lock the act_log to not trigger ASSERTs there. | 893 | * still lock the act_log to not trigger ASSERTs there. |
892 | */ | 894 | */ |
893 | drbd_suspend_io(device); | 895 | drbd_suspend_io(device); |
894 | buffer = drbd_md_get_buffer(device); /* Lock meta-data IO */ | 896 | buffer = drbd_md_get_buffer(device, __func__); /* Lock meta-data IO */ |
895 | if (!buffer) { | 897 | if (!buffer) { |
896 | drbd_resume_io(device); | 898 | drbd_resume_io(device); |
897 | return DS_ERROR; | 899 | return DS_ERROR; |
@@ -971,6 +973,10 @@ drbd_determine_dev_size(struct drbd_device *device, enum dds_flags flags, struct | |||
971 | if (la_size_changed || md_moved || rs) { | 973 | if (la_size_changed || md_moved || rs) { |
972 | u32 prev_flags; | 974 | u32 prev_flags; |
973 | 975 | ||
976 | /* We do some synchronous IO below, which may take some time. | ||
977 | * Clear the timer, to avoid scary "timer expired!" messages, | ||
978 | * "Superblock" is written out at least twice below, anyways. */ | ||
979 | del_timer(&device->md_sync_timer); | ||
974 | drbd_al_shrink(device); /* All extents inactive. */ | 980 | drbd_al_shrink(device); /* All extents inactive. */ |
975 | 981 | ||
976 | prev_flags = md->flags; | 982 | prev_flags = md->flags; |
@@ -1116,15 +1122,16 @@ static int drbd_check_al_size(struct drbd_device *device, struct disk_conf *dc) | |||
1116 | return 0; | 1122 | return 0; |
1117 | } | 1123 | } |
1118 | 1124 | ||
1119 | static void drbd_setup_queue_param(struct drbd_device *device, unsigned int max_bio_size) | 1125 | static void drbd_setup_queue_param(struct drbd_device *device, struct drbd_backing_dev *bdev, |
1126 | unsigned int max_bio_size) | ||
1120 | { | 1127 | { |
1121 | struct request_queue * const q = device->rq_queue; | 1128 | struct request_queue * const q = device->rq_queue; |
1122 | unsigned int max_hw_sectors = max_bio_size >> 9; | 1129 | unsigned int max_hw_sectors = max_bio_size >> 9; |
1123 | unsigned int max_segments = 0; | 1130 | unsigned int max_segments = 0; |
1124 | struct request_queue *b = NULL; | 1131 | struct request_queue *b = NULL; |
1125 | 1132 | ||
1126 | if (get_ldev_if_state(device, D_ATTACHING)) { | 1133 | if (bdev) { |
1127 | b = device->ldev->backing_bdev->bd_disk->queue; | 1134 | b = bdev->backing_bdev->bd_disk->queue; |
1128 | 1135 | ||
1129 | max_hw_sectors = min(queue_max_hw_sectors(b), max_bio_size >> 9); | 1136 | max_hw_sectors = min(queue_max_hw_sectors(b), max_bio_size >> 9); |
1130 | rcu_read_lock(); | 1137 | rcu_read_lock(); |
@@ -1169,11 +1176,10 @@ static void drbd_setup_queue_param(struct drbd_device *device, unsigned int max_ | |||
1169 | b->backing_dev_info.ra_pages); | 1176 | b->backing_dev_info.ra_pages); |
1170 | q->backing_dev_info.ra_pages = b->backing_dev_info.ra_pages; | 1177 | q->backing_dev_info.ra_pages = b->backing_dev_info.ra_pages; |
1171 | } | 1178 | } |
1172 | put_ldev(device); | ||
1173 | } | 1179 | } |
1174 | } | 1180 | } |
1175 | 1181 | ||
1176 | void drbd_reconsider_max_bio_size(struct drbd_device *device) | 1182 | void drbd_reconsider_max_bio_size(struct drbd_device *device, struct drbd_backing_dev *bdev) |
1177 | { | 1183 | { |
1178 | unsigned int now, new, local, peer; | 1184 | unsigned int now, new, local, peer; |
1179 | 1185 | ||
@@ -1181,10 +1187,9 @@ void drbd_reconsider_max_bio_size(struct drbd_device *device) | |||
1181 | local = device->local_max_bio_size; /* Eventually last known value, from volatile memory */ | 1187 | local = device->local_max_bio_size; /* Eventually last known value, from volatile memory */ |
1182 | peer = device->peer_max_bio_size; /* Eventually last known value, from meta data */ | 1188 | peer = device->peer_max_bio_size; /* Eventually last known value, from meta data */ |
1183 | 1189 | ||
1184 | if (get_ldev_if_state(device, D_ATTACHING)) { | 1190 | if (bdev) { |
1185 | local = queue_max_hw_sectors(device->ldev->backing_bdev->bd_disk->queue) << 9; | 1191 | local = queue_max_hw_sectors(bdev->backing_bdev->bd_disk->queue) << 9; |
1186 | device->local_max_bio_size = local; | 1192 | device->local_max_bio_size = local; |
1187 | put_ldev(device); | ||
1188 | } | 1193 | } |
1189 | local = min(local, DRBD_MAX_BIO_SIZE); | 1194 | local = min(local, DRBD_MAX_BIO_SIZE); |
1190 | 1195 | ||
@@ -1217,7 +1222,7 @@ void drbd_reconsider_max_bio_size(struct drbd_device *device) | |||
1217 | if (new != now) | 1222 | if (new != now) |
1218 | drbd_info(device, "max BIO size = %u\n", new); | 1223 | drbd_info(device, "max BIO size = %u\n", new); |
1219 | 1224 | ||
1220 | drbd_setup_queue_param(device, new); | 1225 | drbd_setup_queue_param(device, bdev, new); |
1221 | } | 1226 | } |
1222 | 1227 | ||
1223 | /* Starts the worker thread */ | 1228 | /* Starts the worker thread */ |
@@ -1299,6 +1304,13 @@ static unsigned int drbd_al_extents_max(struct drbd_backing_dev *bdev) | |||
1299 | return (al_size_4k - 1) * AL_CONTEXT_PER_TRANSACTION; | 1304 | return (al_size_4k - 1) * AL_CONTEXT_PER_TRANSACTION; |
1300 | } | 1305 | } |
1301 | 1306 | ||
1307 | static bool write_ordering_changed(struct disk_conf *a, struct disk_conf *b) | ||
1308 | { | ||
1309 | return a->disk_barrier != b->disk_barrier || | ||
1310 | a->disk_flushes != b->disk_flushes || | ||
1311 | a->disk_drain != b->disk_drain; | ||
1312 | } | ||
1313 | |||
1302 | int drbd_adm_disk_opts(struct sk_buff *skb, struct genl_info *info) | 1314 | int drbd_adm_disk_opts(struct sk_buff *skb, struct genl_info *info) |
1303 | { | 1315 | { |
1304 | struct drbd_config_context adm_ctx; | 1316 | struct drbd_config_context adm_ctx; |
@@ -1405,7 +1417,8 @@ int drbd_adm_disk_opts(struct sk_buff *skb, struct genl_info *info) | |||
1405 | else | 1417 | else |
1406 | set_bit(MD_NO_FUA, &device->flags); | 1418 | set_bit(MD_NO_FUA, &device->flags); |
1407 | 1419 | ||
1408 | drbd_bump_write_ordering(first_peer_device(device)->connection, WO_bdev_flush); | 1420 | if (write_ordering_changed(old_disk_conf, new_disk_conf)) |
1421 | drbd_bump_write_ordering(device->resource, NULL, WO_bdev_flush); | ||
1409 | 1422 | ||
1410 | drbd_md_sync(device); | 1423 | drbd_md_sync(device); |
1411 | 1424 | ||
@@ -1440,6 +1453,8 @@ int drbd_adm_attach(struct sk_buff *skb, struct genl_info *info) | |||
1440 | { | 1453 | { |
1441 | struct drbd_config_context adm_ctx; | 1454 | struct drbd_config_context adm_ctx; |
1442 | struct drbd_device *device; | 1455 | struct drbd_device *device; |
1456 | struct drbd_peer_device *peer_device; | ||
1457 | struct drbd_connection *connection; | ||
1443 | int err; | 1458 | int err; |
1444 | enum drbd_ret_code retcode; | 1459 | enum drbd_ret_code retcode; |
1445 | enum determine_dev_size dd; | 1460 | enum determine_dev_size dd; |
@@ -1462,7 +1477,9 @@ int drbd_adm_attach(struct sk_buff *skb, struct genl_info *info) | |||
1462 | 1477 | ||
1463 | device = adm_ctx.device; | 1478 | device = adm_ctx.device; |
1464 | mutex_lock(&adm_ctx.resource->adm_mutex); | 1479 | mutex_lock(&adm_ctx.resource->adm_mutex); |
1465 | conn_reconfig_start(first_peer_device(device)->connection); | 1480 | peer_device = first_peer_device(device); |
1481 | connection = peer_device ? peer_device->connection : NULL; | ||
1482 | conn_reconfig_start(connection); | ||
1466 | 1483 | ||
1467 | /* if you want to reconfigure, please tear down first */ | 1484 | /* if you want to reconfigure, please tear down first */ |
1468 | if (device->state.disk > D_DISKLESS) { | 1485 | if (device->state.disk > D_DISKLESS) { |
@@ -1473,7 +1490,7 @@ int drbd_adm_attach(struct sk_buff *skb, struct genl_info *info) | |||
1473 | * drbd_ldev_destroy is done already, we may end up here very fast, | 1490 | * drbd_ldev_destroy is done already, we may end up here very fast, |
1474 | * e.g. if someone calls attach from the on-io-error handler, | 1491 | * e.g. if someone calls attach from the on-io-error handler, |
1475 | * to realize a "hot spare" feature (not that I'd recommend that) */ | 1492 | * to realize a "hot spare" feature (not that I'd recommend that) */ |
1476 | wait_event(device->misc_wait, !atomic_read(&device->local_cnt)); | 1493 | wait_event(device->misc_wait, !test_bit(GOING_DISKLESS, &device->flags)); |
1477 | 1494 | ||
1478 | /* make sure there is no leftover from previous force-detach attempts */ | 1495 | /* make sure there is no leftover from previous force-detach attempts */ |
1479 | clear_bit(FORCE_DETACH, &device->flags); | 1496 | clear_bit(FORCE_DETACH, &device->flags); |
@@ -1529,7 +1546,7 @@ int drbd_adm_attach(struct sk_buff *skb, struct genl_info *info) | |||
1529 | goto fail; | 1546 | goto fail; |
1530 | 1547 | ||
1531 | rcu_read_lock(); | 1548 | rcu_read_lock(); |
1532 | nc = rcu_dereference(first_peer_device(device)->connection->net_conf); | 1549 | nc = rcu_dereference(connection->net_conf); |
1533 | if (nc) { | 1550 | if (nc) { |
1534 | if (new_disk_conf->fencing == FP_STONITH && nc->wire_protocol == DRBD_PROT_A) { | 1551 | if (new_disk_conf->fencing == FP_STONITH && nc->wire_protocol == DRBD_PROT_A) { |
1535 | rcu_read_unlock(); | 1552 | rcu_read_unlock(); |
@@ -1649,7 +1666,7 @@ int drbd_adm_attach(struct sk_buff *skb, struct genl_info *info) | |||
1649 | */ | 1666 | */ |
1650 | wait_event(device->misc_wait, !atomic_read(&device->ap_pending_cnt) || drbd_suspended(device)); | 1667 | wait_event(device->misc_wait, !atomic_read(&device->ap_pending_cnt) || drbd_suspended(device)); |
1651 | /* and for any other previously queued work */ | 1668 | /* and for any other previously queued work */ |
1652 | drbd_flush_workqueue(&first_peer_device(device)->connection->sender_work); | 1669 | drbd_flush_workqueue(&connection->sender_work); |
1653 | 1670 | ||
1654 | rv = _drbd_request_state(device, NS(disk, D_ATTACHING), CS_VERBOSE); | 1671 | rv = _drbd_request_state(device, NS(disk, D_ATTACHING), CS_VERBOSE); |
1655 | retcode = rv; /* FIXME: Type mismatch. */ | 1672 | retcode = rv; /* FIXME: Type mismatch. */ |
@@ -1710,7 +1727,7 @@ int drbd_adm_attach(struct sk_buff *skb, struct genl_info *info) | |||
1710 | new_disk_conf = NULL; | 1727 | new_disk_conf = NULL; |
1711 | new_plan = NULL; | 1728 | new_plan = NULL; |
1712 | 1729 | ||
1713 | drbd_bump_write_ordering(first_peer_device(device)->connection, WO_bdev_flush); | 1730 | drbd_bump_write_ordering(device->resource, device->ldev, WO_bdev_flush); |
1714 | 1731 | ||
1715 | if (drbd_md_test_flag(device->ldev, MDF_CRASHED_PRIMARY)) | 1732 | if (drbd_md_test_flag(device->ldev, MDF_CRASHED_PRIMARY)) |
1716 | set_bit(CRASHED_PRIMARY, &device->flags); | 1733 | set_bit(CRASHED_PRIMARY, &device->flags); |
@@ -1726,7 +1743,7 @@ int drbd_adm_attach(struct sk_buff *skb, struct genl_info *info) | |||
1726 | device->read_cnt = 0; | 1743 | device->read_cnt = 0; |
1727 | device->writ_cnt = 0; | 1744 | device->writ_cnt = 0; |
1728 | 1745 | ||
1729 | drbd_reconsider_max_bio_size(device); | 1746 | drbd_reconsider_max_bio_size(device, device->ldev); |
1730 | 1747 | ||
1731 | /* If I am currently not R_PRIMARY, | 1748 | /* If I am currently not R_PRIMARY, |
1732 | * but meta data primary indicator is set, | 1749 | * but meta data primary indicator is set, |
@@ -1845,7 +1862,7 @@ int drbd_adm_attach(struct sk_buff *skb, struct genl_info *info) | |||
1845 | 1862 | ||
1846 | kobject_uevent(&disk_to_dev(device->vdisk)->kobj, KOBJ_CHANGE); | 1863 | kobject_uevent(&disk_to_dev(device->vdisk)->kobj, KOBJ_CHANGE); |
1847 | put_ldev(device); | 1864 | put_ldev(device); |
1848 | conn_reconfig_done(first_peer_device(device)->connection); | 1865 | conn_reconfig_done(connection); |
1849 | mutex_unlock(&adm_ctx.resource->adm_mutex); | 1866 | mutex_unlock(&adm_ctx.resource->adm_mutex); |
1850 | drbd_adm_finish(&adm_ctx, info, retcode); | 1867 | drbd_adm_finish(&adm_ctx, info, retcode); |
1851 | return 0; | 1868 | return 0; |
@@ -1856,7 +1873,7 @@ int drbd_adm_attach(struct sk_buff *skb, struct genl_info *info) | |||
1856 | drbd_force_state(device, NS(disk, D_DISKLESS)); | 1873 | drbd_force_state(device, NS(disk, D_DISKLESS)); |
1857 | drbd_md_sync(device); | 1874 | drbd_md_sync(device); |
1858 | fail: | 1875 | fail: |
1859 | conn_reconfig_done(first_peer_device(device)->connection); | 1876 | conn_reconfig_done(connection); |
1860 | if (nbc) { | 1877 | if (nbc) { |
1861 | if (nbc->backing_bdev) | 1878 | if (nbc->backing_bdev) |
1862 | blkdev_put(nbc->backing_bdev, | 1879 | blkdev_put(nbc->backing_bdev, |
@@ -1888,7 +1905,7 @@ static int adm_detach(struct drbd_device *device, int force) | |||
1888 | } | 1905 | } |
1889 | 1906 | ||
1890 | drbd_suspend_io(device); /* so no-one is stuck in drbd_al_begin_io */ | 1907 | drbd_suspend_io(device); /* so no-one is stuck in drbd_al_begin_io */ |
1891 | drbd_md_get_buffer(device); /* make sure there is no in-flight meta-data IO */ | 1908 | drbd_md_get_buffer(device, __func__); /* make sure there is no in-flight meta-data IO */ |
1892 | retcode = drbd_request_state(device, NS(disk, D_FAILED)); | 1909 | retcode = drbd_request_state(device, NS(disk, D_FAILED)); |
1893 | drbd_md_put_buffer(device); | 1910 | drbd_md_put_buffer(device); |
1894 | /* D_FAILED will transition to DISKLESS. */ | 1911 | /* D_FAILED will transition to DISKLESS. */ |
@@ -2654,8 +2671,13 @@ int drbd_adm_invalidate(struct sk_buff *skb, struct genl_info *info) | |||
2654 | if (retcode != NO_ERROR) | 2671 | if (retcode != NO_ERROR) |
2655 | goto out; | 2672 | goto out; |
2656 | 2673 | ||
2657 | mutex_lock(&adm_ctx.resource->adm_mutex); | ||
2658 | device = adm_ctx.device; | 2674 | device = adm_ctx.device; |
2675 | if (!get_ldev(device)) { | ||
2676 | retcode = ERR_NO_DISK; | ||
2677 | goto out; | ||
2678 | } | ||
2679 | |||
2680 | mutex_lock(&adm_ctx.resource->adm_mutex); | ||
2659 | 2681 | ||
2660 | /* If there is still bitmap IO pending, probably because of a previous | 2682 | /* If there is still bitmap IO pending, probably because of a previous |
2661 | * resync just being finished, wait for it before requesting a new resync. | 2683 | * resync just being finished, wait for it before requesting a new resync. |
@@ -2679,6 +2701,7 @@ int drbd_adm_invalidate(struct sk_buff *skb, struct genl_info *info) | |||
2679 | retcode = drbd_request_state(device, NS(conn, C_STARTING_SYNC_T)); | 2701 | retcode = drbd_request_state(device, NS(conn, C_STARTING_SYNC_T)); |
2680 | drbd_resume_io(device); | 2702 | drbd_resume_io(device); |
2681 | mutex_unlock(&adm_ctx.resource->adm_mutex); | 2703 | mutex_unlock(&adm_ctx.resource->adm_mutex); |
2704 | put_ldev(device); | ||
2682 | out: | 2705 | out: |
2683 | drbd_adm_finish(&adm_ctx, info, retcode); | 2706 | drbd_adm_finish(&adm_ctx, info, retcode); |
2684 | return 0; | 2707 | return 0; |
@@ -2704,7 +2727,7 @@ out: | |||
2704 | return 0; | 2727 | return 0; |
2705 | } | 2728 | } |
2706 | 2729 | ||
2707 | static int drbd_bmio_set_susp_al(struct drbd_device *device) | 2730 | static int drbd_bmio_set_susp_al(struct drbd_device *device) __must_hold(local) |
2708 | { | 2731 | { |
2709 | int rv; | 2732 | int rv; |
2710 | 2733 | ||
@@ -2725,8 +2748,13 @@ int drbd_adm_invalidate_peer(struct sk_buff *skb, struct genl_info *info) | |||
2725 | if (retcode != NO_ERROR) | 2748 | if (retcode != NO_ERROR) |
2726 | goto out; | 2749 | goto out; |
2727 | 2750 | ||
2728 | mutex_lock(&adm_ctx.resource->adm_mutex); | ||
2729 | device = adm_ctx.device; | 2751 | device = adm_ctx.device; |
2752 | if (!get_ldev(device)) { | ||
2753 | retcode = ERR_NO_DISK; | ||
2754 | goto out; | ||
2755 | } | ||
2756 | |||
2757 | mutex_lock(&adm_ctx.resource->adm_mutex); | ||
2730 | 2758 | ||
2731 | /* If there is still bitmap IO pending, probably because of a previous | 2759 | /* If there is still bitmap IO pending, probably because of a previous |
2732 | * resync just being finished, wait for it before requesting a new resync. | 2760 | * resync just being finished, wait for it before requesting a new resync. |
@@ -2753,6 +2781,7 @@ int drbd_adm_invalidate_peer(struct sk_buff *skb, struct genl_info *info) | |||
2753 | retcode = drbd_request_state(device, NS(conn, C_STARTING_SYNC_S)); | 2781 | retcode = drbd_request_state(device, NS(conn, C_STARTING_SYNC_S)); |
2754 | drbd_resume_io(device); | 2782 | drbd_resume_io(device); |
2755 | mutex_unlock(&adm_ctx.resource->adm_mutex); | 2783 | mutex_unlock(&adm_ctx.resource->adm_mutex); |
2784 | put_ldev(device); | ||
2756 | out: | 2785 | out: |
2757 | drbd_adm_finish(&adm_ctx, info, retcode); | 2786 | drbd_adm_finish(&adm_ctx, info, retcode); |
2758 | return 0; | 2787 | return 0; |
@@ -2892,7 +2921,7 @@ static struct drbd_connection *the_only_connection(struct drbd_resource *resourc | |||
2892 | return list_first_entry(&resource->connections, struct drbd_connection, connections); | 2921 | return list_first_entry(&resource->connections, struct drbd_connection, connections); |
2893 | } | 2922 | } |
2894 | 2923 | ||
2895 | int nla_put_status_info(struct sk_buff *skb, struct drbd_device *device, | 2924 | static int nla_put_status_info(struct sk_buff *skb, struct drbd_device *device, |
2896 | const struct sib_info *sib) | 2925 | const struct sib_info *sib) |
2897 | { | 2926 | { |
2898 | struct drbd_resource *resource = device->resource; | 2927 | struct drbd_resource *resource = device->resource; |
@@ -3622,13 +3651,6 @@ void drbd_bcast_event(struct drbd_device *device, const struct sib_info *sib) | |||
3622 | unsigned seq; | 3651 | unsigned seq; |
3623 | int err = -ENOMEM; | 3652 | int err = -ENOMEM; |
3624 | 3653 | ||
3625 | if (sib->sib_reason == SIB_SYNC_PROGRESS) { | ||
3626 | if (time_after(jiffies, device->rs_last_bcast + HZ)) | ||
3627 | device->rs_last_bcast = jiffies; | ||
3628 | else | ||
3629 | return; | ||
3630 | } | ||
3631 | |||
3632 | seq = atomic_inc_return(&drbd_genl_seq); | 3654 | seq = atomic_inc_return(&drbd_genl_seq); |
3633 | msg = genlmsg_new(NLMSG_GOODSIZE, GFP_NOIO); | 3655 | msg = genlmsg_new(NLMSG_GOODSIZE, GFP_NOIO); |
3634 | if (!msg) | 3656 | if (!msg) |
diff --git a/drivers/block/drbd/drbd_proc.c b/drivers/block/drbd/drbd_proc.c index 89736bdbbc70..06e6147c7601 100644 --- a/drivers/block/drbd/drbd_proc.c +++ b/drivers/block/drbd/drbd_proc.c | |||
@@ -60,20 +60,65 @@ static void seq_printf_with_thousands_grouping(struct seq_file *seq, long v) | |||
60 | seq_printf(seq, "%ld", v); | 60 | seq_printf(seq, "%ld", v); |
61 | } | 61 | } |
62 | 62 | ||
63 | static void drbd_get_syncer_progress(struct drbd_device *device, | ||
64 | union drbd_dev_state state, unsigned long *rs_total, | ||
65 | unsigned long *bits_left, unsigned int *per_mil_done) | ||
66 | { | ||
67 | /* this is to break it at compile time when we change that, in case we | ||
68 | * want to support more than (1<<32) bits on a 32bit arch. */ | ||
69 | typecheck(unsigned long, device->rs_total); | ||
70 | *rs_total = device->rs_total; | ||
71 | |||
72 | /* note: both rs_total and rs_left are in bits, i.e. in | ||
73 | * units of BM_BLOCK_SIZE. | ||
74 | * for the percentage, we don't care. */ | ||
75 | |||
76 | if (state.conn == C_VERIFY_S || state.conn == C_VERIFY_T) | ||
77 | *bits_left = device->ov_left; | ||
78 | else | ||
79 | *bits_left = drbd_bm_total_weight(device) - device->rs_failed; | ||
80 | /* >> 10 to prevent overflow, | ||
81 | * +1 to prevent division by zero */ | ||
82 | if (*bits_left > *rs_total) { | ||
83 | /* D'oh. Maybe a logic bug somewhere. More likely just a race | ||
84 | * between state change and reset of rs_total. | ||
85 | */ | ||
86 | *bits_left = *rs_total; | ||
87 | *per_mil_done = *rs_total ? 0 : 1000; | ||
88 | } else { | ||
89 | /* Make sure the division happens in long context. | ||
90 | * We allow up to one petabyte storage right now, | ||
91 | * at a granularity of 4k per bit that is 2**38 bits. | ||
92 | * After shift right and multiplication by 1000, | ||
93 | * this should still fit easily into a 32bit long, | ||
94 | * so we don't need a 64bit division on 32bit arch. | ||
95 | * Note: currently we don't support such large bitmaps on 32bit | ||
96 | * arch anyways, but no harm done to be prepared for it here. | ||
97 | */ | ||
98 | unsigned int shift = *rs_total > UINT_MAX ? 16 : 10; | ||
99 | unsigned long left = *bits_left >> shift; | ||
100 | unsigned long total = 1UL + (*rs_total >> shift); | ||
101 | unsigned long tmp = 1000UL - left * 1000UL/total; | ||
102 | *per_mil_done = tmp; | ||
103 | } | ||
104 | } | ||
105 | |||
106 | |||
63 | /*lge | 107 | /*lge |
64 | * progress bars shamelessly adapted from driver/md/md.c | 108 | * progress bars shamelessly adapted from driver/md/md.c |
65 | * output looks like | 109 | * output looks like |
66 | * [=====>..............] 33.5% (23456/123456) | 110 | * [=====>..............] 33.5% (23456/123456) |
67 | * finish: 2:20:20 speed: 6,345 (6,456) K/sec | 111 | * finish: 2:20:20 speed: 6,345 (6,456) K/sec |
68 | */ | 112 | */ |
69 | static void drbd_syncer_progress(struct drbd_device *device, struct seq_file *seq) | 113 | static void drbd_syncer_progress(struct drbd_device *device, struct seq_file *seq, |
114 | union drbd_dev_state state) | ||
70 | { | 115 | { |
71 | unsigned long db, dt, dbdt, rt, rs_left; | 116 | unsigned long db, dt, dbdt, rt, rs_total, rs_left; |
72 | unsigned int res; | 117 | unsigned int res; |
73 | int i, x, y; | 118 | int i, x, y; |
74 | int stalled = 0; | 119 | int stalled = 0; |
75 | 120 | ||
76 | drbd_get_syncer_progress(device, &rs_left, &res); | 121 | drbd_get_syncer_progress(device, state, &rs_total, &rs_left, &res); |
77 | 122 | ||
78 | x = res/50; | 123 | x = res/50; |
79 | y = 20-x; | 124 | y = 20-x; |
@@ -85,21 +130,21 @@ static void drbd_syncer_progress(struct drbd_device *device, struct seq_file *se | |||
85 | seq_printf(seq, "."); | 130 | seq_printf(seq, "."); |
86 | seq_printf(seq, "] "); | 131 | seq_printf(seq, "] "); |
87 | 132 | ||
88 | if (device->state.conn == C_VERIFY_S || device->state.conn == C_VERIFY_T) | 133 | if (state.conn == C_VERIFY_S || state.conn == C_VERIFY_T) |
89 | seq_printf(seq, "verified:"); | 134 | seq_printf(seq, "verified:"); |
90 | else | 135 | else |
91 | seq_printf(seq, "sync'ed:"); | 136 | seq_printf(seq, "sync'ed:"); |
92 | seq_printf(seq, "%3u.%u%% ", res / 10, res % 10); | 137 | seq_printf(seq, "%3u.%u%% ", res / 10, res % 10); |
93 | 138 | ||
94 | /* if more than a few GB, display in MB */ | 139 | /* if more than a few GB, display in MB */ |
95 | if (device->rs_total > (4UL << (30 - BM_BLOCK_SHIFT))) | 140 | if (rs_total > (4UL << (30 - BM_BLOCK_SHIFT))) |
96 | seq_printf(seq, "(%lu/%lu)M", | 141 | seq_printf(seq, "(%lu/%lu)M", |
97 | (unsigned long) Bit2KB(rs_left >> 10), | 142 | (unsigned long) Bit2KB(rs_left >> 10), |
98 | (unsigned long) Bit2KB(device->rs_total >> 10)); | 143 | (unsigned long) Bit2KB(rs_total >> 10)); |
99 | else | 144 | else |
100 | seq_printf(seq, "(%lu/%lu)K\n\t", | 145 | seq_printf(seq, "(%lu/%lu)K\n\t", |
101 | (unsigned long) Bit2KB(rs_left), | 146 | (unsigned long) Bit2KB(rs_left), |
102 | (unsigned long) Bit2KB(device->rs_total)); | 147 | (unsigned long) Bit2KB(rs_total)); |
103 | 148 | ||
104 | /* see drivers/md/md.c | 149 | /* see drivers/md/md.c |
105 | * We do not want to overflow, so the order of operands and | 150 | * We do not want to overflow, so the order of operands and |
@@ -150,13 +195,13 @@ static void drbd_syncer_progress(struct drbd_device *device, struct seq_file *se | |||
150 | dt = (jiffies - device->rs_start - device->rs_paused) / HZ; | 195 | dt = (jiffies - device->rs_start - device->rs_paused) / HZ; |
151 | if (dt == 0) | 196 | if (dt == 0) |
152 | dt = 1; | 197 | dt = 1; |
153 | db = device->rs_total - rs_left; | 198 | db = rs_total - rs_left; |
154 | dbdt = Bit2KB(db/dt); | 199 | dbdt = Bit2KB(db/dt); |
155 | seq_printf_with_thousands_grouping(seq, dbdt); | 200 | seq_printf_with_thousands_grouping(seq, dbdt); |
156 | seq_printf(seq, ")"); | 201 | seq_printf(seq, ")"); |
157 | 202 | ||
158 | if (device->state.conn == C_SYNC_TARGET || | 203 | if (state.conn == C_SYNC_TARGET || |
159 | device->state.conn == C_VERIFY_S) { | 204 | state.conn == C_VERIFY_S) { |
160 | seq_printf(seq, " want: "); | 205 | seq_printf(seq, " want: "); |
161 | seq_printf_with_thousands_grouping(seq, device->c_sync_rate); | 206 | seq_printf_with_thousands_grouping(seq, device->c_sync_rate); |
162 | } | 207 | } |
@@ -168,8 +213,8 @@ static void drbd_syncer_progress(struct drbd_device *device, struct seq_file *se | |||
168 | unsigned long bm_bits = drbd_bm_bits(device); | 213 | unsigned long bm_bits = drbd_bm_bits(device); |
169 | unsigned long bit_pos; | 214 | unsigned long bit_pos; |
170 | unsigned long long stop_sector = 0; | 215 | unsigned long long stop_sector = 0; |
171 | if (device->state.conn == C_VERIFY_S || | 216 | if (state.conn == C_VERIFY_S || |
172 | device->state.conn == C_VERIFY_T) { | 217 | state.conn == C_VERIFY_T) { |
173 | bit_pos = bm_bits - device->ov_left; | 218 | bit_pos = bm_bits - device->ov_left; |
174 | if (verify_can_do_stop_sector(device)) | 219 | if (verify_can_do_stop_sector(device)) |
175 | stop_sector = device->ov_stop_sector; | 220 | stop_sector = device->ov_stop_sector; |
@@ -188,22 +233,13 @@ static void drbd_syncer_progress(struct drbd_device *device, struct seq_file *se | |||
188 | } | 233 | } |
189 | } | 234 | } |
190 | 235 | ||
191 | static void resync_dump_detail(struct seq_file *seq, struct lc_element *e) | ||
192 | { | ||
193 | struct bm_extent *bme = lc_entry(e, struct bm_extent, lce); | ||
194 | |||
195 | seq_printf(seq, "%5d %s %s\n", bme->rs_left, | ||
196 | bme->flags & BME_NO_WRITES ? "NO_WRITES" : "---------", | ||
197 | bme->flags & BME_LOCKED ? "LOCKED" : "------" | ||
198 | ); | ||
199 | } | ||
200 | |||
201 | static int drbd_seq_show(struct seq_file *seq, void *v) | 236 | static int drbd_seq_show(struct seq_file *seq, void *v) |
202 | { | 237 | { |
203 | int i, prev_i = -1; | 238 | int i, prev_i = -1; |
204 | const char *sn; | 239 | const char *sn; |
205 | struct drbd_device *device; | 240 | struct drbd_device *device; |
206 | struct net_conf *nc; | 241 | struct net_conf *nc; |
242 | union drbd_dev_state state; | ||
207 | char wp; | 243 | char wp; |
208 | 244 | ||
209 | static char write_ordering_chars[] = { | 245 | static char write_ordering_chars[] = { |
@@ -241,11 +277,12 @@ static int drbd_seq_show(struct seq_file *seq, void *v) | |||
241 | seq_printf(seq, "\n"); | 277 | seq_printf(seq, "\n"); |
242 | prev_i = i; | 278 | prev_i = i; |
243 | 279 | ||
244 | sn = drbd_conn_str(device->state.conn); | 280 | state = device->state; |
281 | sn = drbd_conn_str(state.conn); | ||
245 | 282 | ||
246 | if (device->state.conn == C_STANDALONE && | 283 | if (state.conn == C_STANDALONE && |
247 | device->state.disk == D_DISKLESS && | 284 | state.disk == D_DISKLESS && |
248 | device->state.role == R_SECONDARY) { | 285 | state.role == R_SECONDARY) { |
249 | seq_printf(seq, "%2d: cs:Unconfigured\n", i); | 286 | seq_printf(seq, "%2d: cs:Unconfigured\n", i); |
250 | } else { | 287 | } else { |
251 | /* reset device->congestion_reason */ | 288 | /* reset device->congestion_reason */ |
@@ -258,15 +295,15 @@ static int drbd_seq_show(struct seq_file *seq, void *v) | |||
258 | " ns:%u nr:%u dw:%u dr:%u al:%u bm:%u " | 295 | " ns:%u nr:%u dw:%u dr:%u al:%u bm:%u " |
259 | "lo:%d pe:%d ua:%d ap:%d ep:%d wo:%c", | 296 | "lo:%d pe:%d ua:%d ap:%d ep:%d wo:%c", |
260 | i, sn, | 297 | i, sn, |
261 | drbd_role_str(device->state.role), | 298 | drbd_role_str(state.role), |
262 | drbd_role_str(device->state.peer), | 299 | drbd_role_str(state.peer), |
263 | drbd_disk_str(device->state.disk), | 300 | drbd_disk_str(state.disk), |
264 | drbd_disk_str(device->state.pdsk), | 301 | drbd_disk_str(state.pdsk), |
265 | wp, | 302 | wp, |
266 | drbd_suspended(device) ? 's' : 'r', | 303 | drbd_suspended(device) ? 's' : 'r', |
267 | device->state.aftr_isp ? 'a' : '-', | 304 | state.aftr_isp ? 'a' : '-', |
268 | device->state.peer_isp ? 'p' : '-', | 305 | state.peer_isp ? 'p' : '-', |
269 | device->state.user_isp ? 'u' : '-', | 306 | state.user_isp ? 'u' : '-', |
270 | device->congestion_reason ?: '-', | 307 | device->congestion_reason ?: '-', |
271 | test_bit(AL_SUSPENDED, &device->flags) ? 's' : '-', | 308 | test_bit(AL_SUSPENDED, &device->flags) ? 's' : '-', |
272 | device->send_cnt/2, | 309 | device->send_cnt/2, |
@@ -281,17 +318,17 @@ static int drbd_seq_show(struct seq_file *seq, void *v) | |||
281 | atomic_read(&device->unacked_cnt), | 318 | atomic_read(&device->unacked_cnt), |
282 | atomic_read(&device->ap_bio_cnt), | 319 | atomic_read(&device->ap_bio_cnt), |
283 | first_peer_device(device)->connection->epochs, | 320 | first_peer_device(device)->connection->epochs, |
284 | write_ordering_chars[first_peer_device(device)->connection->write_ordering] | 321 | write_ordering_chars[device->resource->write_ordering] |
285 | ); | 322 | ); |
286 | seq_printf(seq, " oos:%llu\n", | 323 | seq_printf(seq, " oos:%llu\n", |
287 | Bit2KB((unsigned long long) | 324 | Bit2KB((unsigned long long) |
288 | drbd_bm_total_weight(device))); | 325 | drbd_bm_total_weight(device))); |
289 | } | 326 | } |
290 | if (device->state.conn == C_SYNC_SOURCE || | 327 | if (state.conn == C_SYNC_SOURCE || |
291 | device->state.conn == C_SYNC_TARGET || | 328 | state.conn == C_SYNC_TARGET || |
292 | device->state.conn == C_VERIFY_S || | 329 | state.conn == C_VERIFY_S || |
293 | device->state.conn == C_VERIFY_T) | 330 | state.conn == C_VERIFY_T) |
294 | drbd_syncer_progress(device, seq); | 331 | drbd_syncer_progress(device, seq, state); |
295 | 332 | ||
296 | if (proc_details >= 1 && get_ldev_if_state(device, D_FAILED)) { | 333 | if (proc_details >= 1 && get_ldev_if_state(device, D_FAILED)) { |
297 | lc_seq_printf_stats(seq, device->resync); | 334 | lc_seq_printf_stats(seq, device->resync); |
@@ -299,12 +336,8 @@ static int drbd_seq_show(struct seq_file *seq, void *v) | |||
299 | put_ldev(device); | 336 | put_ldev(device); |
300 | } | 337 | } |
301 | 338 | ||
302 | if (proc_details >= 2) { | 339 | if (proc_details >= 2) |
303 | if (device->resync) { | 340 | seq_printf(seq, "\tblocked on activity log: %d\n", atomic_read(&device->ap_actlog_cnt)); |
304 | lc_seq_dump_details(seq, device->resync, "rs_left", | ||
305 | resync_dump_detail); | ||
306 | } | ||
307 | } | ||
308 | } | 341 | } |
309 | rcu_read_unlock(); | 342 | rcu_read_unlock(); |
310 | 343 | ||
@@ -316,7 +349,7 @@ static int drbd_proc_open(struct inode *inode, struct file *file) | |||
316 | int err; | 349 | int err; |
317 | 350 | ||
318 | if (try_module_get(THIS_MODULE)) { | 351 | if (try_module_get(THIS_MODULE)) { |
319 | err = single_open(file, drbd_seq_show, PDE_DATA(inode)); | 352 | err = single_open(file, drbd_seq_show, NULL); |
320 | if (err) | 353 | if (err) |
321 | module_put(THIS_MODULE); | 354 | module_put(THIS_MODULE); |
322 | return err; | 355 | return err; |
diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index 5b17ec88ea05..9342b8da73ab 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c | |||
@@ -362,17 +362,14 @@ drbd_alloc_peer_req(struct drbd_peer_device *peer_device, u64 id, sector_t secto | |||
362 | goto fail; | 362 | goto fail; |
363 | } | 363 | } |
364 | 364 | ||
365 | memset(peer_req, 0, sizeof(*peer_req)); | ||
366 | INIT_LIST_HEAD(&peer_req->w.list); | ||
365 | drbd_clear_interval(&peer_req->i); | 367 | drbd_clear_interval(&peer_req->i); |
366 | peer_req->i.size = data_size; | 368 | peer_req->i.size = data_size; |
367 | peer_req->i.sector = sector; | 369 | peer_req->i.sector = sector; |
368 | peer_req->i.local = false; | 370 | peer_req->submit_jif = jiffies; |
369 | peer_req->i.waiting = false; | ||
370 | |||
371 | peer_req->epoch = NULL; | ||
372 | peer_req->peer_device = peer_device; | 371 | peer_req->peer_device = peer_device; |
373 | peer_req->pages = page; | 372 | peer_req->pages = page; |
374 | atomic_set(&peer_req->pending_bios, 0); | ||
375 | peer_req->flags = 0; | ||
376 | /* | 373 | /* |
377 | * The block_id is opaque to the receiver. It is not endianness | 374 | * The block_id is opaque to the receiver. It is not endianness |
378 | * converted, and sent back to the sender unchanged. | 375 | * converted, and sent back to the sender unchanged. |
@@ -389,11 +386,16 @@ drbd_alloc_peer_req(struct drbd_peer_device *peer_device, u64 id, sector_t secto | |||
389 | void __drbd_free_peer_req(struct drbd_device *device, struct drbd_peer_request *peer_req, | 386 | void __drbd_free_peer_req(struct drbd_device *device, struct drbd_peer_request *peer_req, |
390 | int is_net) | 387 | int is_net) |
391 | { | 388 | { |
389 | might_sleep(); | ||
392 | if (peer_req->flags & EE_HAS_DIGEST) | 390 | if (peer_req->flags & EE_HAS_DIGEST) |
393 | kfree(peer_req->digest); | 391 | kfree(peer_req->digest); |
394 | drbd_free_pages(device, peer_req->pages, is_net); | 392 | drbd_free_pages(device, peer_req->pages, is_net); |
395 | D_ASSERT(device, atomic_read(&peer_req->pending_bios) == 0); | 393 | D_ASSERT(device, atomic_read(&peer_req->pending_bios) == 0); |
396 | D_ASSERT(device, drbd_interval_empty(&peer_req->i)); | 394 | D_ASSERT(device, drbd_interval_empty(&peer_req->i)); |
395 | if (!expect(!(peer_req->flags & EE_CALL_AL_COMPLETE_IO))) { | ||
396 | peer_req->flags &= ~EE_CALL_AL_COMPLETE_IO; | ||
397 | drbd_al_complete_io(device, &peer_req->i); | ||
398 | } | ||
397 | mempool_free(peer_req, drbd_ee_mempool); | 399 | mempool_free(peer_req, drbd_ee_mempool); |
398 | } | 400 | } |
399 | 401 | ||
@@ -791,8 +793,18 @@ static int receive_first_packet(struct drbd_connection *connection, struct socke | |||
791 | { | 793 | { |
792 | unsigned int header_size = drbd_header_size(connection); | 794 | unsigned int header_size = drbd_header_size(connection); |
793 | struct packet_info pi; | 795 | struct packet_info pi; |
796 | struct net_conf *nc; | ||
794 | int err; | 797 | int err; |
795 | 798 | ||
799 | rcu_read_lock(); | ||
800 | nc = rcu_dereference(connection->net_conf); | ||
801 | if (!nc) { | ||
802 | rcu_read_unlock(); | ||
803 | return -EIO; | ||
804 | } | ||
805 | sock->sk->sk_rcvtimeo = nc->ping_timeo * 4 * HZ / 10; | ||
806 | rcu_read_unlock(); | ||
807 | |||
796 | err = drbd_recv_short(sock, connection->data.rbuf, header_size, 0); | 808 | err = drbd_recv_short(sock, connection->data.rbuf, header_size, 0); |
797 | if (err != header_size) { | 809 | if (err != header_size) { |
798 | if (err >= 0) | 810 | if (err >= 0) |
@@ -809,7 +821,7 @@ static int receive_first_packet(struct drbd_connection *connection, struct socke | |||
809 | * drbd_socket_okay() - Free the socket if its connection is not okay | 821 | * drbd_socket_okay() - Free the socket if its connection is not okay |
810 | * @sock: pointer to the pointer to the socket. | 822 | * @sock: pointer to the pointer to the socket. |
811 | */ | 823 | */ |
812 | static int drbd_socket_okay(struct socket **sock) | 824 | static bool drbd_socket_okay(struct socket **sock) |
813 | { | 825 | { |
814 | int rr; | 826 | int rr; |
815 | char tb[4]; | 827 | char tb[4]; |
@@ -827,6 +839,30 @@ static int drbd_socket_okay(struct socket **sock) | |||
827 | return false; | 839 | return false; |
828 | } | 840 | } |
829 | } | 841 | } |
842 | |||
843 | static bool connection_established(struct drbd_connection *connection, | ||
844 | struct socket **sock1, | ||
845 | struct socket **sock2) | ||
846 | { | ||
847 | struct net_conf *nc; | ||
848 | int timeout; | ||
849 | bool ok; | ||
850 | |||
851 | if (!*sock1 || !*sock2) | ||
852 | return false; | ||
853 | |||
854 | rcu_read_lock(); | ||
855 | nc = rcu_dereference(connection->net_conf); | ||
856 | timeout = (nc->sock_check_timeo ?: nc->ping_timeo) * HZ / 10; | ||
857 | rcu_read_unlock(); | ||
858 | schedule_timeout_interruptible(timeout); | ||
859 | |||
860 | ok = drbd_socket_okay(sock1); | ||
861 | ok = drbd_socket_okay(sock2) && ok; | ||
862 | |||
863 | return ok; | ||
864 | } | ||
865 | |||
830 | /* Gets called if a connection is established, or if a new minor gets created | 866 | /* Gets called if a connection is established, or if a new minor gets created |
831 | in a connection */ | 867 | in a connection */ |
832 | int drbd_connected(struct drbd_peer_device *peer_device) | 868 | int drbd_connected(struct drbd_peer_device *peer_device) |
@@ -868,8 +904,8 @@ static int conn_connect(struct drbd_connection *connection) | |||
868 | struct drbd_socket sock, msock; | 904 | struct drbd_socket sock, msock; |
869 | struct drbd_peer_device *peer_device; | 905 | struct drbd_peer_device *peer_device; |
870 | struct net_conf *nc; | 906 | struct net_conf *nc; |
871 | int vnr, timeout, h, ok; | 907 | int vnr, timeout, h; |
872 | bool discard_my_data; | 908 | bool discard_my_data, ok; |
873 | enum drbd_state_rv rv; | 909 | enum drbd_state_rv rv; |
874 | struct accept_wait_data ad = { | 910 | struct accept_wait_data ad = { |
875 | .connection = connection, | 911 | .connection = connection, |
@@ -913,17 +949,8 @@ static int conn_connect(struct drbd_connection *connection) | |||
913 | } | 949 | } |
914 | } | 950 | } |
915 | 951 | ||
916 | if (sock.socket && msock.socket) { | 952 | if (connection_established(connection, &sock.socket, &msock.socket)) |
917 | rcu_read_lock(); | 953 | break; |
918 | nc = rcu_dereference(connection->net_conf); | ||
919 | timeout = nc->ping_timeo * HZ / 10; | ||
920 | rcu_read_unlock(); | ||
921 | schedule_timeout_interruptible(timeout); | ||
922 | ok = drbd_socket_okay(&sock.socket); | ||
923 | ok = drbd_socket_okay(&msock.socket) && ok; | ||
924 | if (ok) | ||
925 | break; | ||
926 | } | ||
927 | 954 | ||
928 | retry: | 955 | retry: |
929 | s = drbd_wait_for_connect(connection, &ad); | 956 | s = drbd_wait_for_connect(connection, &ad); |
@@ -969,8 +996,7 @@ randomize: | |||
969 | goto out_release_sockets; | 996 | goto out_release_sockets; |
970 | } | 997 | } |
971 | 998 | ||
972 | ok = drbd_socket_okay(&sock.socket); | 999 | ok = connection_established(connection, &sock.socket, &msock.socket); |
973 | ok = drbd_socket_okay(&msock.socket) && ok; | ||
974 | } while (!ok); | 1000 | } while (!ok); |
975 | 1001 | ||
976 | if (ad.s_listen) | 1002 | if (ad.s_listen) |
@@ -1151,7 +1177,7 @@ static void drbd_flush(struct drbd_connection *connection) | |||
1151 | struct drbd_peer_device *peer_device; | 1177 | struct drbd_peer_device *peer_device; |
1152 | int vnr; | 1178 | int vnr; |
1153 | 1179 | ||
1154 | if (connection->write_ordering >= WO_bdev_flush) { | 1180 | if (connection->resource->write_ordering >= WO_bdev_flush) { |
1155 | rcu_read_lock(); | 1181 | rcu_read_lock(); |
1156 | idr_for_each_entry(&connection->peer_devices, peer_device, vnr) { | 1182 | idr_for_each_entry(&connection->peer_devices, peer_device, vnr) { |
1157 | struct drbd_device *device = peer_device->device; | 1183 | struct drbd_device *device = peer_device->device; |
@@ -1161,14 +1187,22 @@ static void drbd_flush(struct drbd_connection *connection) | |||
1161 | kref_get(&device->kref); | 1187 | kref_get(&device->kref); |
1162 | rcu_read_unlock(); | 1188 | rcu_read_unlock(); |
1163 | 1189 | ||
1190 | /* Right now, we have only this one synchronous code path | ||
1191 | * for flushes between request epochs. | ||
1192 | * We may want to make those asynchronous, | ||
1193 | * or at least parallelize the flushes to the volume devices. | ||
1194 | */ | ||
1195 | device->flush_jif = jiffies; | ||
1196 | set_bit(FLUSH_PENDING, &device->flags); | ||
1164 | rv = blkdev_issue_flush(device->ldev->backing_bdev, | 1197 | rv = blkdev_issue_flush(device->ldev->backing_bdev, |
1165 | GFP_NOIO, NULL); | 1198 | GFP_NOIO, NULL); |
1199 | clear_bit(FLUSH_PENDING, &device->flags); | ||
1166 | if (rv) { | 1200 | if (rv) { |
1167 | drbd_info(device, "local disk flush failed with status %d\n", rv); | 1201 | drbd_info(device, "local disk flush failed with status %d\n", rv); |
1168 | /* would rather check on EOPNOTSUPP, but that is not reliable. | 1202 | /* would rather check on EOPNOTSUPP, but that is not reliable. |
1169 | * don't try again for ANY return value != 0 | 1203 | * don't try again for ANY return value != 0 |
1170 | * if (rv == -EOPNOTSUPP) */ | 1204 | * if (rv == -EOPNOTSUPP) */ |
1171 | drbd_bump_write_ordering(connection, WO_drain_io); | 1205 | drbd_bump_write_ordering(connection->resource, NULL, WO_drain_io); |
1172 | } | 1206 | } |
1173 | put_ldev(device); | 1207 | put_ldev(device); |
1174 | kref_put(&device->kref, drbd_destroy_device); | 1208 | kref_put(&device->kref, drbd_destroy_device); |
@@ -1257,15 +1291,30 @@ static enum finish_epoch drbd_may_finish_epoch(struct drbd_connection *connectio | |||
1257 | return rv; | 1291 | return rv; |
1258 | } | 1292 | } |
1259 | 1293 | ||
1294 | static enum write_ordering_e | ||
1295 | max_allowed_wo(struct drbd_backing_dev *bdev, enum write_ordering_e wo) | ||
1296 | { | ||
1297 | struct disk_conf *dc; | ||
1298 | |||
1299 | dc = rcu_dereference(bdev->disk_conf); | ||
1300 | |||
1301 | if (wo == WO_bdev_flush && !dc->disk_flushes) | ||
1302 | wo = WO_drain_io; | ||
1303 | if (wo == WO_drain_io && !dc->disk_drain) | ||
1304 | wo = WO_none; | ||
1305 | |||
1306 | return wo; | ||
1307 | } | ||
1308 | |||
1260 | /** | 1309 | /** |
1261 | * drbd_bump_write_ordering() - Fall back to an other write ordering method | 1310 | * drbd_bump_write_ordering() - Fall back to an other write ordering method |
1262 | * @connection: DRBD connection. | 1311 | * @connection: DRBD connection. |
1263 | * @wo: Write ordering method to try. | 1312 | * @wo: Write ordering method to try. |
1264 | */ | 1313 | */ |
1265 | void drbd_bump_write_ordering(struct drbd_connection *connection, enum write_ordering_e wo) | 1314 | void drbd_bump_write_ordering(struct drbd_resource *resource, struct drbd_backing_dev *bdev, |
1315 | enum write_ordering_e wo) | ||
1266 | { | 1316 | { |
1267 | struct disk_conf *dc; | 1317 | struct drbd_device *device; |
1268 | struct drbd_peer_device *peer_device; | ||
1269 | enum write_ordering_e pwo; | 1318 | enum write_ordering_e pwo; |
1270 | int vnr; | 1319 | int vnr; |
1271 | static char *write_ordering_str[] = { | 1320 | static char *write_ordering_str[] = { |
@@ -1274,26 +1323,27 @@ void drbd_bump_write_ordering(struct drbd_connection *connection, enum write_ord | |||
1274 | [WO_bdev_flush] = "flush", | 1323 | [WO_bdev_flush] = "flush", |
1275 | }; | 1324 | }; |
1276 | 1325 | ||
1277 | pwo = connection->write_ordering; | 1326 | pwo = resource->write_ordering; |
1278 | wo = min(pwo, wo); | 1327 | if (wo != WO_bdev_flush) |
1328 | wo = min(pwo, wo); | ||
1279 | rcu_read_lock(); | 1329 | rcu_read_lock(); |
1280 | idr_for_each_entry(&connection->peer_devices, peer_device, vnr) { | 1330 | idr_for_each_entry(&resource->devices, device, vnr) { |
1281 | struct drbd_device *device = peer_device->device; | 1331 | if (get_ldev(device)) { |
1332 | wo = max_allowed_wo(device->ldev, wo); | ||
1333 | if (device->ldev == bdev) | ||
1334 | bdev = NULL; | ||
1335 | put_ldev(device); | ||
1336 | } | ||
1337 | } | ||
1282 | 1338 | ||
1283 | if (!get_ldev_if_state(device, D_ATTACHING)) | 1339 | if (bdev) |
1284 | continue; | 1340 | wo = max_allowed_wo(bdev, wo); |
1285 | dc = rcu_dereference(device->ldev->disk_conf); | ||
1286 | 1341 | ||
1287 | if (wo == WO_bdev_flush && !dc->disk_flushes) | ||
1288 | wo = WO_drain_io; | ||
1289 | if (wo == WO_drain_io && !dc->disk_drain) | ||
1290 | wo = WO_none; | ||
1291 | put_ldev(device); | ||
1292 | } | ||
1293 | rcu_read_unlock(); | 1342 | rcu_read_unlock(); |
1294 | connection->write_ordering = wo; | 1343 | |
1295 | if (pwo != connection->write_ordering || wo == WO_bdev_flush) | 1344 | resource->write_ordering = wo; |
1296 | drbd_info(connection, "Method to ensure write ordering: %s\n", write_ordering_str[connection->write_ordering]); | 1345 | if (pwo != resource->write_ordering || wo == WO_bdev_flush) |
1346 | drbd_info(resource, "Method to ensure write ordering: %s\n", write_ordering_str[resource->write_ordering]); | ||
1297 | } | 1347 | } |
1298 | 1348 | ||
1299 | /** | 1349 | /** |
@@ -1330,6 +1380,13 @@ int drbd_submit_peer_request(struct drbd_device *device, | |||
1330 | /* wait for all pending IO completions, before we start | 1380 | /* wait for all pending IO completions, before we start |
1331 | * zeroing things out. */ | 1381 | * zeroing things out. */ |
1332 | conn_wait_active_ee_empty(first_peer_device(device)->connection); | 1382 | conn_wait_active_ee_empty(first_peer_device(device)->connection); |
1383 | /* add it to the active list now, | ||
1384 | * so we can find it to present it in debugfs */ | ||
1385 | peer_req->submit_jif = jiffies; | ||
1386 | peer_req->flags |= EE_SUBMITTED; | ||
1387 | spin_lock_irq(&device->resource->req_lock); | ||
1388 | list_add_tail(&peer_req->w.list, &device->active_ee); | ||
1389 | spin_unlock_irq(&device->resource->req_lock); | ||
1333 | if (blkdev_issue_zeroout(device->ldev->backing_bdev, | 1390 | if (blkdev_issue_zeroout(device->ldev->backing_bdev, |
1334 | sector, ds >> 9, GFP_NOIO)) | 1391 | sector, ds >> 9, GFP_NOIO)) |
1335 | peer_req->flags |= EE_WAS_ERROR; | 1392 | peer_req->flags |= EE_WAS_ERROR; |
@@ -1398,6 +1455,9 @@ submit: | |||
1398 | D_ASSERT(device, page == NULL); | 1455 | D_ASSERT(device, page == NULL); |
1399 | 1456 | ||
1400 | atomic_set(&peer_req->pending_bios, n_bios); | 1457 | atomic_set(&peer_req->pending_bios, n_bios); |
1458 | /* for debugfs: update timestamp, mark as submitted */ | ||
1459 | peer_req->submit_jif = jiffies; | ||
1460 | peer_req->flags |= EE_SUBMITTED; | ||
1401 | do { | 1461 | do { |
1402 | bio = bios; | 1462 | bio = bios; |
1403 | bios = bios->bi_next; | 1463 | bios = bios->bi_next; |
@@ -1471,7 +1531,7 @@ static int receive_Barrier(struct drbd_connection *connection, struct packet_inf | |||
1471 | * R_PRIMARY crashes now. | 1531 | * R_PRIMARY crashes now. |
1472 | * Therefore we must send the barrier_ack after the barrier request was | 1532 | * Therefore we must send the barrier_ack after the barrier request was |
1473 | * completed. */ | 1533 | * completed. */ |
1474 | switch (connection->write_ordering) { | 1534 | switch (connection->resource->write_ordering) { |
1475 | case WO_none: | 1535 | case WO_none: |
1476 | if (rv == FE_RECYCLED) | 1536 | if (rv == FE_RECYCLED) |
1477 | return 0; | 1537 | return 0; |
@@ -1498,7 +1558,8 @@ static int receive_Barrier(struct drbd_connection *connection, struct packet_inf | |||
1498 | 1558 | ||
1499 | return 0; | 1559 | return 0; |
1500 | default: | 1560 | default: |
1501 | drbd_err(connection, "Strangeness in connection->write_ordering %d\n", connection->write_ordering); | 1561 | drbd_err(connection, "Strangeness in connection->write_ordering %d\n", |
1562 | connection->resource->write_ordering); | ||
1502 | return -EIO; | 1563 | return -EIO; |
1503 | } | 1564 | } |
1504 | 1565 | ||
@@ -1531,7 +1592,7 @@ read_in_block(struct drbd_peer_device *peer_device, u64 id, sector_t sector, | |||
1531 | struct drbd_peer_request *peer_req; | 1592 | struct drbd_peer_request *peer_req; |
1532 | struct page *page; | 1593 | struct page *page; |
1533 | int dgs, ds, err; | 1594 | int dgs, ds, err; |
1534 | int data_size = pi->size; | 1595 | unsigned int data_size = pi->size; |
1535 | void *dig_in = peer_device->connection->int_dig_in; | 1596 | void *dig_in = peer_device->connection->int_dig_in; |
1536 | void *dig_vv = peer_device->connection->int_dig_vv; | 1597 | void *dig_vv = peer_device->connection->int_dig_vv; |
1537 | unsigned long *data; | 1598 | unsigned long *data; |
@@ -1578,6 +1639,7 @@ read_in_block(struct drbd_peer_device *peer_device, u64 id, sector_t sector, | |||
1578 | if (!peer_req) | 1639 | if (!peer_req) |
1579 | return NULL; | 1640 | return NULL; |
1580 | 1641 | ||
1642 | peer_req->flags |= EE_WRITE; | ||
1581 | if (trim) | 1643 | if (trim) |
1582 | return peer_req; | 1644 | return peer_req; |
1583 | 1645 | ||
@@ -1734,9 +1796,10 @@ static int recv_resync_read(struct drbd_peer_device *peer_device, sector_t secto | |||
1734 | * respective _drbd_clear_done_ee */ | 1796 | * respective _drbd_clear_done_ee */ |
1735 | 1797 | ||
1736 | peer_req->w.cb = e_end_resync_block; | 1798 | peer_req->w.cb = e_end_resync_block; |
1799 | peer_req->submit_jif = jiffies; | ||
1737 | 1800 | ||
1738 | spin_lock_irq(&device->resource->req_lock); | 1801 | spin_lock_irq(&device->resource->req_lock); |
1739 | list_add(&peer_req->w.list, &device->sync_ee); | 1802 | list_add_tail(&peer_req->w.list, &device->sync_ee); |
1740 | spin_unlock_irq(&device->resource->req_lock); | 1803 | spin_unlock_irq(&device->resource->req_lock); |
1741 | 1804 | ||
1742 | atomic_add(pi->size >> 9, &device->rs_sect_ev); | 1805 | atomic_add(pi->size >> 9, &device->rs_sect_ev); |
@@ -1889,6 +1952,7 @@ static int e_end_block(struct drbd_work *w, int cancel) | |||
1889 | } | 1952 | } |
1890 | dec_unacked(device); | 1953 | dec_unacked(device); |
1891 | } | 1954 | } |
1955 | |||
1892 | /* we delete from the conflict detection hash _after_ we sent out the | 1956 | /* we delete from the conflict detection hash _after_ we sent out the |
1893 | * P_WRITE_ACK / P_NEG_ACK, to get the sequence number right. */ | 1957 | * P_WRITE_ACK / P_NEG_ACK, to get the sequence number right. */ |
1894 | if (peer_req->flags & EE_IN_INTERVAL_TREE) { | 1958 | if (peer_req->flags & EE_IN_INTERVAL_TREE) { |
@@ -2115,6 +2179,8 @@ static int handle_write_conflicts(struct drbd_device *device, | |||
2115 | drbd_for_each_overlap(i, &device->write_requests, sector, size) { | 2179 | drbd_for_each_overlap(i, &device->write_requests, sector, size) { |
2116 | if (i == &peer_req->i) | 2180 | if (i == &peer_req->i) |
2117 | continue; | 2181 | continue; |
2182 | if (i->completed) | ||
2183 | continue; | ||
2118 | 2184 | ||
2119 | if (!i->local) { | 2185 | if (!i->local) { |
2120 | /* | 2186 | /* |
@@ -2147,7 +2213,6 @@ static int handle_write_conflicts(struct drbd_device *device, | |||
2147 | (unsigned long long)sector, size, | 2213 | (unsigned long long)sector, size, |
2148 | superseded ? "local" : "remote"); | 2214 | superseded ? "local" : "remote"); |
2149 | 2215 | ||
2150 | inc_unacked(device); | ||
2151 | peer_req->w.cb = superseded ? e_send_superseded : | 2216 | peer_req->w.cb = superseded ? e_send_superseded : |
2152 | e_send_retry_write; | 2217 | e_send_retry_write; |
2153 | list_add_tail(&peer_req->w.list, &device->done_ee); | 2218 | list_add_tail(&peer_req->w.list, &device->done_ee); |
@@ -2206,6 +2271,7 @@ static int receive_Data(struct drbd_connection *connection, struct packet_info * | |||
2206 | { | 2271 | { |
2207 | struct drbd_peer_device *peer_device; | 2272 | struct drbd_peer_device *peer_device; |
2208 | struct drbd_device *device; | 2273 | struct drbd_device *device; |
2274 | struct net_conf *nc; | ||
2209 | sector_t sector; | 2275 | sector_t sector; |
2210 | struct drbd_peer_request *peer_req; | 2276 | struct drbd_peer_request *peer_req; |
2211 | struct p_data *p = pi->data; | 2277 | struct p_data *p = pi->data; |
@@ -2245,6 +2311,8 @@ static int receive_Data(struct drbd_connection *connection, struct packet_info * | |||
2245 | } | 2311 | } |
2246 | 2312 | ||
2247 | peer_req->w.cb = e_end_block; | 2313 | peer_req->w.cb = e_end_block; |
2314 | peer_req->submit_jif = jiffies; | ||
2315 | peer_req->flags |= EE_APPLICATION; | ||
2248 | 2316 | ||
2249 | dp_flags = be32_to_cpu(p->dp_flags); | 2317 | dp_flags = be32_to_cpu(p->dp_flags); |
2250 | rw |= wire_flags_to_bio(dp_flags); | 2318 | rw |= wire_flags_to_bio(dp_flags); |
@@ -2271,9 +2339,36 @@ static int receive_Data(struct drbd_connection *connection, struct packet_info * | |||
2271 | spin_unlock(&connection->epoch_lock); | 2339 | spin_unlock(&connection->epoch_lock); |
2272 | 2340 | ||
2273 | rcu_read_lock(); | 2341 | rcu_read_lock(); |
2274 | tp = rcu_dereference(peer_device->connection->net_conf)->two_primaries; | 2342 | nc = rcu_dereference(peer_device->connection->net_conf); |
2343 | tp = nc->two_primaries; | ||
2344 | if (peer_device->connection->agreed_pro_version < 100) { | ||
2345 | switch (nc->wire_protocol) { | ||
2346 | case DRBD_PROT_C: | ||
2347 | dp_flags |= DP_SEND_WRITE_ACK; | ||
2348 | break; | ||
2349 | case DRBD_PROT_B: | ||
2350 | dp_flags |= DP_SEND_RECEIVE_ACK; | ||
2351 | break; | ||
2352 | } | ||
2353 | } | ||
2275 | rcu_read_unlock(); | 2354 | rcu_read_unlock(); |
2355 | |||
2356 | if (dp_flags & DP_SEND_WRITE_ACK) { | ||
2357 | peer_req->flags |= EE_SEND_WRITE_ACK; | ||
2358 | inc_unacked(device); | ||
2359 | /* corresponding dec_unacked() in e_end_block() | ||
2360 | * respective _drbd_clear_done_ee */ | ||
2361 | } | ||
2362 | |||
2363 | if (dp_flags & DP_SEND_RECEIVE_ACK) { | ||
2364 | /* I really don't like it that the receiver thread | ||
2365 | * sends on the msock, but anyways */ | ||
2366 | drbd_send_ack(first_peer_device(device), P_RECV_ACK, peer_req); | ||
2367 | } | ||
2368 | |||
2276 | if (tp) { | 2369 | if (tp) { |
2370 | /* two primaries implies protocol C */ | ||
2371 | D_ASSERT(device, dp_flags & DP_SEND_WRITE_ACK); | ||
2277 | peer_req->flags |= EE_IN_INTERVAL_TREE; | 2372 | peer_req->flags |= EE_IN_INTERVAL_TREE; |
2278 | err = wait_for_and_update_peer_seq(peer_device, peer_seq); | 2373 | err = wait_for_and_update_peer_seq(peer_device, peer_seq); |
2279 | if (err) | 2374 | if (err) |
@@ -2297,44 +2392,18 @@ static int receive_Data(struct drbd_connection *connection, struct packet_info * | |||
2297 | * active_ee to become empty in drbd_submit_peer_request(); | 2392 | * active_ee to become empty in drbd_submit_peer_request(); |
2298 | * better not add ourselves here. */ | 2393 | * better not add ourselves here. */ |
2299 | if ((peer_req->flags & EE_IS_TRIM_USE_ZEROOUT) == 0) | 2394 | if ((peer_req->flags & EE_IS_TRIM_USE_ZEROOUT) == 0) |
2300 | list_add(&peer_req->w.list, &device->active_ee); | 2395 | list_add_tail(&peer_req->w.list, &device->active_ee); |
2301 | spin_unlock_irq(&device->resource->req_lock); | 2396 | spin_unlock_irq(&device->resource->req_lock); |
2302 | 2397 | ||
2303 | if (device->state.conn == C_SYNC_TARGET) | 2398 | if (device->state.conn == C_SYNC_TARGET) |
2304 | wait_event(device->ee_wait, !overlapping_resync_write(device, peer_req)); | 2399 | wait_event(device->ee_wait, !overlapping_resync_write(device, peer_req)); |
2305 | 2400 | ||
2306 | if (peer_device->connection->agreed_pro_version < 100) { | ||
2307 | rcu_read_lock(); | ||
2308 | switch (rcu_dereference(peer_device->connection->net_conf)->wire_protocol) { | ||
2309 | case DRBD_PROT_C: | ||
2310 | dp_flags |= DP_SEND_WRITE_ACK; | ||
2311 | break; | ||
2312 | case DRBD_PROT_B: | ||
2313 | dp_flags |= DP_SEND_RECEIVE_ACK; | ||
2314 | break; | ||
2315 | } | ||
2316 | rcu_read_unlock(); | ||
2317 | } | ||
2318 | |||
2319 | if (dp_flags & DP_SEND_WRITE_ACK) { | ||
2320 | peer_req->flags |= EE_SEND_WRITE_ACK; | ||
2321 | inc_unacked(device); | ||
2322 | /* corresponding dec_unacked() in e_end_block() | ||
2323 | * respective _drbd_clear_done_ee */ | ||
2324 | } | ||
2325 | |||
2326 | if (dp_flags & DP_SEND_RECEIVE_ACK) { | ||
2327 | /* I really don't like it that the receiver thread | ||
2328 | * sends on the msock, but anyways */ | ||
2329 | drbd_send_ack(first_peer_device(device), P_RECV_ACK, peer_req); | ||
2330 | } | ||
2331 | |||
2332 | if (device->state.pdsk < D_INCONSISTENT) { | 2401 | if (device->state.pdsk < D_INCONSISTENT) { |
2333 | /* In case we have the only disk of the cluster, */ | 2402 | /* In case we have the only disk of the cluster, */ |
2334 | drbd_set_out_of_sync(device, peer_req->i.sector, peer_req->i.size); | 2403 | drbd_set_out_of_sync(device, peer_req->i.sector, peer_req->i.size); |
2335 | peer_req->flags |= EE_CALL_AL_COMPLETE_IO; | ||
2336 | peer_req->flags &= ~EE_MAY_SET_IN_SYNC; | 2404 | peer_req->flags &= ~EE_MAY_SET_IN_SYNC; |
2337 | drbd_al_begin_io(device, &peer_req->i, true); | 2405 | drbd_al_begin_io(device, &peer_req->i); |
2406 | peer_req->flags |= EE_CALL_AL_COMPLETE_IO; | ||
2338 | } | 2407 | } |
2339 | 2408 | ||
2340 | err = drbd_submit_peer_request(device, peer_req, rw, DRBD_FAULT_DT_WR); | 2409 | err = drbd_submit_peer_request(device, peer_req, rw, DRBD_FAULT_DT_WR); |
@@ -2347,8 +2416,10 @@ static int receive_Data(struct drbd_connection *connection, struct packet_info * | |||
2347 | list_del(&peer_req->w.list); | 2416 | list_del(&peer_req->w.list); |
2348 | drbd_remove_epoch_entry_interval(device, peer_req); | 2417 | drbd_remove_epoch_entry_interval(device, peer_req); |
2349 | spin_unlock_irq(&device->resource->req_lock); | 2418 | spin_unlock_irq(&device->resource->req_lock); |
2350 | if (peer_req->flags & EE_CALL_AL_COMPLETE_IO) | 2419 | if (peer_req->flags & EE_CALL_AL_COMPLETE_IO) { |
2420 | peer_req->flags &= ~EE_CALL_AL_COMPLETE_IO; | ||
2351 | drbd_al_complete_io(device, &peer_req->i); | 2421 | drbd_al_complete_io(device, &peer_req->i); |
2422 | } | ||
2352 | 2423 | ||
2353 | out_interrupted: | 2424 | out_interrupted: |
2354 | drbd_may_finish_epoch(connection, peer_req->epoch, EV_PUT + EV_CLEANUP); | 2425 | drbd_may_finish_epoch(connection, peer_req->epoch, EV_PUT + EV_CLEANUP); |
@@ -2368,13 +2439,14 @@ out_interrupted: | |||
2368 | * The current sync rate used here uses only the most recent two step marks, | 2439 | * The current sync rate used here uses only the most recent two step marks, |
2369 | * to have a short time average so we can react faster. | 2440 | * to have a short time average so we can react faster. |
2370 | */ | 2441 | */ |
2371 | bool drbd_rs_should_slow_down(struct drbd_device *device, sector_t sector) | 2442 | bool drbd_rs_should_slow_down(struct drbd_device *device, sector_t sector, |
2443 | bool throttle_if_app_is_waiting) | ||
2372 | { | 2444 | { |
2373 | struct lc_element *tmp; | 2445 | struct lc_element *tmp; |
2374 | bool throttle = true; | 2446 | bool throttle = drbd_rs_c_min_rate_throttle(device); |
2375 | 2447 | ||
2376 | if (!drbd_rs_c_min_rate_throttle(device)) | 2448 | if (!throttle || throttle_if_app_is_waiting) |
2377 | return false; | 2449 | return throttle; |
2378 | 2450 | ||
2379 | spin_lock_irq(&device->al_lock); | 2451 | spin_lock_irq(&device->al_lock); |
2380 | tmp = lc_find(device->resync, BM_SECT_TO_EXT(sector)); | 2452 | tmp = lc_find(device->resync, BM_SECT_TO_EXT(sector)); |
@@ -2382,7 +2454,8 @@ bool drbd_rs_should_slow_down(struct drbd_device *device, sector_t sector) | |||
2382 | struct bm_extent *bm_ext = lc_entry(tmp, struct bm_extent, lce); | 2454 | struct bm_extent *bm_ext = lc_entry(tmp, struct bm_extent, lce); |
2383 | if (test_bit(BME_PRIORITY, &bm_ext->flags)) | 2455 | if (test_bit(BME_PRIORITY, &bm_ext->flags)) |
2384 | throttle = false; | 2456 | throttle = false; |
2385 | /* Do not slow down if app IO is already waiting for this extent */ | 2457 | /* Do not slow down if app IO is already waiting for this extent, |
2458 | * and our progress is necessary for application IO to complete. */ | ||
2386 | } | 2459 | } |
2387 | spin_unlock_irq(&device->al_lock); | 2460 | spin_unlock_irq(&device->al_lock); |
2388 | 2461 | ||
@@ -2407,7 +2480,9 @@ bool drbd_rs_c_min_rate_throttle(struct drbd_device *device) | |||
2407 | curr_events = (int)part_stat_read(&disk->part0, sectors[0]) + | 2480 | curr_events = (int)part_stat_read(&disk->part0, sectors[0]) + |
2408 | (int)part_stat_read(&disk->part0, sectors[1]) - | 2481 | (int)part_stat_read(&disk->part0, sectors[1]) - |
2409 | atomic_read(&device->rs_sect_ev); | 2482 | atomic_read(&device->rs_sect_ev); |
2410 | if (!device->rs_last_events || curr_events - device->rs_last_events > 64) { | 2483 | |
2484 | if (atomic_read(&device->ap_actlog_cnt) | ||
2485 | || !device->rs_last_events || curr_events - device->rs_last_events > 64) { | ||
2411 | unsigned long rs_left; | 2486 | unsigned long rs_left; |
2412 | int i; | 2487 | int i; |
2413 | 2488 | ||
@@ -2508,6 +2583,7 @@ static int receive_DataRequest(struct drbd_connection *connection, struct packet | |||
2508 | peer_req->w.cb = w_e_end_data_req; | 2583 | peer_req->w.cb = w_e_end_data_req; |
2509 | fault_type = DRBD_FAULT_DT_RD; | 2584 | fault_type = DRBD_FAULT_DT_RD; |
2510 | /* application IO, don't drbd_rs_begin_io */ | 2585 | /* application IO, don't drbd_rs_begin_io */ |
2586 | peer_req->flags |= EE_APPLICATION; | ||
2511 | goto submit; | 2587 | goto submit; |
2512 | 2588 | ||
2513 | case P_RS_DATA_REQUEST: | 2589 | case P_RS_DATA_REQUEST: |
@@ -2538,6 +2614,8 @@ static int receive_DataRequest(struct drbd_connection *connection, struct packet | |||
2538 | peer_req->w.cb = w_e_end_csum_rs_req; | 2614 | peer_req->w.cb = w_e_end_csum_rs_req; |
2539 | /* used in the sector offset progress display */ | 2615 | /* used in the sector offset progress display */ |
2540 | device->bm_resync_fo = BM_SECT_TO_BIT(sector); | 2616 | device->bm_resync_fo = BM_SECT_TO_BIT(sector); |
2617 | /* remember to report stats in drbd_resync_finished */ | ||
2618 | device->use_csums = true; | ||
2541 | } else if (pi->cmd == P_OV_REPLY) { | 2619 | } else if (pi->cmd == P_OV_REPLY) { |
2542 | /* track progress, we may need to throttle */ | 2620 | /* track progress, we may need to throttle */ |
2543 | atomic_add(size >> 9, &device->rs_sect_in); | 2621 | atomic_add(size >> 9, &device->rs_sect_in); |
@@ -2595,8 +2673,20 @@ static int receive_DataRequest(struct drbd_connection *connection, struct packet | |||
2595 | * we would also throttle its application reads. | 2673 | * we would also throttle its application reads. |
2596 | * In that case, throttling is done on the SyncTarget only. | 2674 | * In that case, throttling is done on the SyncTarget only. |
2597 | */ | 2675 | */ |
2598 | if (device->state.peer != R_PRIMARY && drbd_rs_should_slow_down(device, sector)) | 2676 | |
2677 | /* Even though this may be a resync request, we do add to "read_ee"; | ||
2678 | * "sync_ee" is only used for resync WRITEs. | ||
2679 | * Add to list early, so debugfs can find this request | ||
2680 | * even if we have to sleep below. */ | ||
2681 | spin_lock_irq(&device->resource->req_lock); | ||
2682 | list_add_tail(&peer_req->w.list, &device->read_ee); | ||
2683 | spin_unlock_irq(&device->resource->req_lock); | ||
2684 | |||
2685 | update_receiver_timing_details(connection, drbd_rs_should_slow_down); | ||
2686 | if (device->state.peer != R_PRIMARY | ||
2687 | && drbd_rs_should_slow_down(device, sector, false)) | ||
2599 | schedule_timeout_uninterruptible(HZ/10); | 2688 | schedule_timeout_uninterruptible(HZ/10); |
2689 | update_receiver_timing_details(connection, drbd_rs_begin_io); | ||
2600 | if (drbd_rs_begin_io(device, sector)) | 2690 | if (drbd_rs_begin_io(device, sector)) |
2601 | goto out_free_e; | 2691 | goto out_free_e; |
2602 | 2692 | ||
@@ -2604,22 +2694,20 @@ submit_for_resync: | |||
2604 | atomic_add(size >> 9, &device->rs_sect_ev); | 2694 | atomic_add(size >> 9, &device->rs_sect_ev); |
2605 | 2695 | ||
2606 | submit: | 2696 | submit: |
2697 | update_receiver_timing_details(connection, drbd_submit_peer_request); | ||
2607 | inc_unacked(device); | 2698 | inc_unacked(device); |
2608 | spin_lock_irq(&device->resource->req_lock); | ||
2609 | list_add_tail(&peer_req->w.list, &device->read_ee); | ||
2610 | spin_unlock_irq(&device->resource->req_lock); | ||
2611 | |||
2612 | if (drbd_submit_peer_request(device, peer_req, READ, fault_type) == 0) | 2699 | if (drbd_submit_peer_request(device, peer_req, READ, fault_type) == 0) |
2613 | return 0; | 2700 | return 0; |
2614 | 2701 | ||
2615 | /* don't care for the reason here */ | 2702 | /* don't care for the reason here */ |
2616 | drbd_err(device, "submit failed, triggering re-connect\n"); | 2703 | drbd_err(device, "submit failed, triggering re-connect\n"); |
2704 | |||
2705 | out_free_e: | ||
2617 | spin_lock_irq(&device->resource->req_lock); | 2706 | spin_lock_irq(&device->resource->req_lock); |
2618 | list_del(&peer_req->w.list); | 2707 | list_del(&peer_req->w.list); |
2619 | spin_unlock_irq(&device->resource->req_lock); | 2708 | spin_unlock_irq(&device->resource->req_lock); |
2620 | /* no drbd_rs_complete_io(), we are dropping the connection anyways */ | 2709 | /* no drbd_rs_complete_io(), we are dropping the connection anyways */ |
2621 | 2710 | ||
2622 | out_free_e: | ||
2623 | put_ldev(device); | 2711 | put_ldev(device); |
2624 | drbd_free_peer_req(device, peer_req); | 2712 | drbd_free_peer_req(device, peer_req); |
2625 | return -EIO; | 2713 | return -EIO; |
@@ -2842,8 +2930,10 @@ static void drbd_uuid_dump(struct drbd_device *device, char *text, u64 *uuid, | |||
2842 | -1091 requires proto 91 | 2930 | -1091 requires proto 91 |
2843 | -1096 requires proto 96 | 2931 | -1096 requires proto 96 |
2844 | */ | 2932 | */ |
2845 | static int drbd_uuid_compare(struct drbd_device *device, int *rule_nr) __must_hold(local) | 2933 | static int drbd_uuid_compare(struct drbd_device *const device, int *rule_nr) __must_hold(local) |
2846 | { | 2934 | { |
2935 | struct drbd_peer_device *const peer_device = first_peer_device(device); | ||
2936 | struct drbd_connection *const connection = peer_device ? peer_device->connection : NULL; | ||
2847 | u64 self, peer; | 2937 | u64 self, peer; |
2848 | int i, j; | 2938 | int i, j; |
2849 | 2939 | ||
@@ -2869,7 +2959,7 @@ static int drbd_uuid_compare(struct drbd_device *device, int *rule_nr) __must_ho | |||
2869 | 2959 | ||
2870 | if (device->p_uuid[UI_BITMAP] == (u64)0 && device->ldev->md.uuid[UI_BITMAP] != (u64)0) { | 2960 | if (device->p_uuid[UI_BITMAP] == (u64)0 && device->ldev->md.uuid[UI_BITMAP] != (u64)0) { |
2871 | 2961 | ||
2872 | if (first_peer_device(device)->connection->agreed_pro_version < 91) | 2962 | if (connection->agreed_pro_version < 91) |
2873 | return -1091; | 2963 | return -1091; |
2874 | 2964 | ||
2875 | if ((device->ldev->md.uuid[UI_BITMAP] & ~((u64)1)) == (device->p_uuid[UI_HISTORY_START] & ~((u64)1)) && | 2965 | if ((device->ldev->md.uuid[UI_BITMAP] & ~((u64)1)) == (device->p_uuid[UI_HISTORY_START] & ~((u64)1)) && |
@@ -2892,7 +2982,7 @@ static int drbd_uuid_compare(struct drbd_device *device, int *rule_nr) __must_ho | |||
2892 | 2982 | ||
2893 | if (device->ldev->md.uuid[UI_BITMAP] == (u64)0 && device->p_uuid[UI_BITMAP] != (u64)0) { | 2983 | if (device->ldev->md.uuid[UI_BITMAP] == (u64)0 && device->p_uuid[UI_BITMAP] != (u64)0) { |
2894 | 2984 | ||
2895 | if (first_peer_device(device)->connection->agreed_pro_version < 91) | 2985 | if (connection->agreed_pro_version < 91) |
2896 | return -1091; | 2986 | return -1091; |
2897 | 2987 | ||
2898 | if ((device->ldev->md.uuid[UI_HISTORY_START] & ~((u64)1)) == (device->p_uuid[UI_BITMAP] & ~((u64)1)) && | 2988 | if ((device->ldev->md.uuid[UI_HISTORY_START] & ~((u64)1)) == (device->p_uuid[UI_BITMAP] & ~((u64)1)) && |
@@ -2925,7 +3015,7 @@ static int drbd_uuid_compare(struct drbd_device *device, int *rule_nr) __must_ho | |||
2925 | case 1: /* self_pri && !peer_pri */ return 1; | 3015 | case 1: /* self_pri && !peer_pri */ return 1; |
2926 | case 2: /* !self_pri && peer_pri */ return -1; | 3016 | case 2: /* !self_pri && peer_pri */ return -1; |
2927 | case 3: /* self_pri && peer_pri */ | 3017 | case 3: /* self_pri && peer_pri */ |
2928 | dc = test_bit(RESOLVE_CONFLICTS, &first_peer_device(device)->connection->flags); | 3018 | dc = test_bit(RESOLVE_CONFLICTS, &connection->flags); |
2929 | return dc ? -1 : 1; | 3019 | return dc ? -1 : 1; |
2930 | } | 3020 | } |
2931 | } | 3021 | } |
@@ -2938,14 +3028,14 @@ static int drbd_uuid_compare(struct drbd_device *device, int *rule_nr) __must_ho | |||
2938 | *rule_nr = 51; | 3028 | *rule_nr = 51; |
2939 | peer = device->p_uuid[UI_HISTORY_START] & ~((u64)1); | 3029 | peer = device->p_uuid[UI_HISTORY_START] & ~((u64)1); |
2940 | if (self == peer) { | 3030 | if (self == peer) { |
2941 | if (first_peer_device(device)->connection->agreed_pro_version < 96 ? | 3031 | if (connection->agreed_pro_version < 96 ? |
2942 | (device->ldev->md.uuid[UI_HISTORY_START] & ~((u64)1)) == | 3032 | (device->ldev->md.uuid[UI_HISTORY_START] & ~((u64)1)) == |
2943 | (device->p_uuid[UI_HISTORY_START + 1] & ~((u64)1)) : | 3033 | (device->p_uuid[UI_HISTORY_START + 1] & ~((u64)1)) : |
2944 | peer + UUID_NEW_BM_OFFSET == (device->p_uuid[UI_BITMAP] & ~((u64)1))) { | 3034 | peer + UUID_NEW_BM_OFFSET == (device->p_uuid[UI_BITMAP] & ~((u64)1))) { |
2945 | /* The last P_SYNC_UUID did not get though. Undo the last start of | 3035 | /* The last P_SYNC_UUID did not get though. Undo the last start of |
2946 | resync as sync source modifications of the peer's UUIDs. */ | 3036 | resync as sync source modifications of the peer's UUIDs. */ |
2947 | 3037 | ||
2948 | if (first_peer_device(device)->connection->agreed_pro_version < 91) | 3038 | if (connection->agreed_pro_version < 91) |
2949 | return -1091; | 3039 | return -1091; |
2950 | 3040 | ||
2951 | device->p_uuid[UI_BITMAP] = device->p_uuid[UI_HISTORY_START]; | 3041 | device->p_uuid[UI_BITMAP] = device->p_uuid[UI_HISTORY_START]; |
@@ -2975,14 +3065,14 @@ static int drbd_uuid_compare(struct drbd_device *device, int *rule_nr) __must_ho | |||
2975 | *rule_nr = 71; | 3065 | *rule_nr = 71; |
2976 | self = device->ldev->md.uuid[UI_HISTORY_START] & ~((u64)1); | 3066 | self = device->ldev->md.uuid[UI_HISTORY_START] & ~((u64)1); |
2977 | if (self == peer) { | 3067 | if (self == peer) { |
2978 | if (first_peer_device(device)->connection->agreed_pro_version < 96 ? | 3068 | if (connection->agreed_pro_version < 96 ? |
2979 | (device->ldev->md.uuid[UI_HISTORY_START + 1] & ~((u64)1)) == | 3069 | (device->ldev->md.uuid[UI_HISTORY_START + 1] & ~((u64)1)) == |
2980 | (device->p_uuid[UI_HISTORY_START] & ~((u64)1)) : | 3070 | (device->p_uuid[UI_HISTORY_START] & ~((u64)1)) : |
2981 | self + UUID_NEW_BM_OFFSET == (device->ldev->md.uuid[UI_BITMAP] & ~((u64)1))) { | 3071 | self + UUID_NEW_BM_OFFSET == (device->ldev->md.uuid[UI_BITMAP] & ~((u64)1))) { |
2982 | /* The last P_SYNC_UUID did not get though. Undo the last start of | 3072 | /* The last P_SYNC_UUID did not get though. Undo the last start of |
2983 | resync as sync source modifications of our UUIDs. */ | 3073 | resync as sync source modifications of our UUIDs. */ |
2984 | 3074 | ||
2985 | if (first_peer_device(device)->connection->agreed_pro_version < 91) | 3075 | if (connection->agreed_pro_version < 91) |
2986 | return -1091; | 3076 | return -1091; |
2987 | 3077 | ||
2988 | __drbd_uuid_set(device, UI_BITMAP, device->ldev->md.uuid[UI_HISTORY_START]); | 3078 | __drbd_uuid_set(device, UI_BITMAP, device->ldev->md.uuid[UI_HISTORY_START]); |
@@ -3352,8 +3442,7 @@ disconnect: | |||
3352 | * return: NULL (alg name was "") | 3442 | * return: NULL (alg name was "") |
3353 | * ERR_PTR(error) if something goes wrong | 3443 | * ERR_PTR(error) if something goes wrong |
3354 | * or the crypto hash ptr, if it worked out ok. */ | 3444 | * or the crypto hash ptr, if it worked out ok. */ |
3355 | static | 3445 | static struct crypto_hash *drbd_crypto_alloc_digest_safe(const struct drbd_device *device, |
3356 | struct crypto_hash *drbd_crypto_alloc_digest_safe(const struct drbd_device *device, | ||
3357 | const char *alg, const char *name) | 3446 | const char *alg, const char *name) |
3358 | { | 3447 | { |
3359 | struct crypto_hash *tfm; | 3448 | struct crypto_hash *tfm; |
@@ -3639,7 +3728,7 @@ static int receive_sizes(struct drbd_connection *connection, struct packet_info | |||
3639 | struct drbd_device *device; | 3728 | struct drbd_device *device; |
3640 | struct p_sizes *p = pi->data; | 3729 | struct p_sizes *p = pi->data; |
3641 | enum determine_dev_size dd = DS_UNCHANGED; | 3730 | enum determine_dev_size dd = DS_UNCHANGED; |
3642 | sector_t p_size, p_usize, my_usize; | 3731 | sector_t p_size, p_usize, p_csize, my_usize; |
3643 | int ldsc = 0; /* local disk size changed */ | 3732 | int ldsc = 0; /* local disk size changed */ |
3644 | enum dds_flags ddsf; | 3733 | enum dds_flags ddsf; |
3645 | 3734 | ||
@@ -3650,6 +3739,7 @@ static int receive_sizes(struct drbd_connection *connection, struct packet_info | |||
3650 | 3739 | ||
3651 | p_size = be64_to_cpu(p->d_size); | 3740 | p_size = be64_to_cpu(p->d_size); |
3652 | p_usize = be64_to_cpu(p->u_size); | 3741 | p_usize = be64_to_cpu(p->u_size); |
3742 | p_csize = be64_to_cpu(p->c_size); | ||
3653 | 3743 | ||
3654 | /* just store the peer's disk size for now. | 3744 | /* just store the peer's disk size for now. |
3655 | * we still need to figure out whether we accept that. */ | 3745 | * we still need to figure out whether we accept that. */ |
@@ -3710,7 +3800,6 @@ static int receive_sizes(struct drbd_connection *connection, struct packet_info | |||
3710 | } | 3800 | } |
3711 | 3801 | ||
3712 | device->peer_max_bio_size = be32_to_cpu(p->max_bio_size); | 3802 | device->peer_max_bio_size = be32_to_cpu(p->max_bio_size); |
3713 | drbd_reconsider_max_bio_size(device); | ||
3714 | /* Leave drbd_reconsider_max_bio_size() before drbd_determine_dev_size(). | 3803 | /* Leave drbd_reconsider_max_bio_size() before drbd_determine_dev_size(). |
3715 | In case we cleared the QUEUE_FLAG_DISCARD from our queue in | 3804 | In case we cleared the QUEUE_FLAG_DISCARD from our queue in |
3716 | drbd_reconsider_max_bio_size(), we can be sure that after | 3805 | drbd_reconsider_max_bio_size(), we can be sure that after |
@@ -3718,14 +3807,28 @@ static int receive_sizes(struct drbd_connection *connection, struct packet_info | |||
3718 | 3807 | ||
3719 | ddsf = be16_to_cpu(p->dds_flags); | 3808 | ddsf = be16_to_cpu(p->dds_flags); |
3720 | if (get_ldev(device)) { | 3809 | if (get_ldev(device)) { |
3810 | drbd_reconsider_max_bio_size(device, device->ldev); | ||
3721 | dd = drbd_determine_dev_size(device, ddsf, NULL); | 3811 | dd = drbd_determine_dev_size(device, ddsf, NULL); |
3722 | put_ldev(device); | 3812 | put_ldev(device); |
3723 | if (dd == DS_ERROR) | 3813 | if (dd == DS_ERROR) |
3724 | return -EIO; | 3814 | return -EIO; |
3725 | drbd_md_sync(device); | 3815 | drbd_md_sync(device); |
3726 | } else { | 3816 | } else { |
3727 | /* I am diskless, need to accept the peer's size. */ | 3817 | /* |
3728 | drbd_set_my_capacity(device, p_size); | 3818 | * I am diskless, need to accept the peer's *current* size. |
3819 | * I must NOT accept the peers backing disk size, | ||
3820 | * it may have been larger than mine all along... | ||
3821 | * | ||
3822 | * At this point, the peer knows more about my disk, or at | ||
3823 | * least about what we last agreed upon, than myself. | ||
3824 | * So if his c_size is less than his d_size, the most likely | ||
3825 | * reason is that *my* d_size was smaller last time we checked. | ||
3826 | * | ||
3827 | * However, if he sends a zero current size, | ||
3828 | * take his (user-capped or) backing disk size anyways. | ||
3829 | */ | ||
3830 | drbd_reconsider_max_bio_size(device, NULL); | ||
3831 | drbd_set_my_capacity(device, p_csize ?: p_usize ?: p_size); | ||
3729 | } | 3832 | } |
3730 | 3833 | ||
3731 | if (get_ldev(device)) { | 3834 | if (get_ldev(device)) { |
@@ -4501,6 +4604,7 @@ static void drbdd(struct drbd_connection *connection) | |||
4501 | struct data_cmd *cmd; | 4604 | struct data_cmd *cmd; |
4502 | 4605 | ||
4503 | drbd_thread_current_set_cpu(&connection->receiver); | 4606 | drbd_thread_current_set_cpu(&connection->receiver); |
4607 | update_receiver_timing_details(connection, drbd_recv_header); | ||
4504 | if (drbd_recv_header(connection, &pi)) | 4608 | if (drbd_recv_header(connection, &pi)) |
4505 | goto err_out; | 4609 | goto err_out; |
4506 | 4610 | ||
@@ -4519,12 +4623,14 @@ static void drbdd(struct drbd_connection *connection) | |||
4519 | } | 4623 | } |
4520 | 4624 | ||
4521 | if (shs) { | 4625 | if (shs) { |
4626 | update_receiver_timing_details(connection, drbd_recv_all_warn); | ||
4522 | err = drbd_recv_all_warn(connection, pi.data, shs); | 4627 | err = drbd_recv_all_warn(connection, pi.data, shs); |
4523 | if (err) | 4628 | if (err) |
4524 | goto err_out; | 4629 | goto err_out; |
4525 | pi.size -= shs; | 4630 | pi.size -= shs; |
4526 | } | 4631 | } |
4527 | 4632 | ||
4633 | update_receiver_timing_details(connection, cmd->fn); | ||
4528 | err = cmd->fn(connection, &pi); | 4634 | err = cmd->fn(connection, &pi); |
4529 | if (err) { | 4635 | if (err) { |
4530 | drbd_err(connection, "error receiving %s, e: %d l: %d!\n", | 4636 | drbd_err(connection, "error receiving %s, e: %d l: %d!\n", |
diff --git a/drivers/block/drbd/drbd_req.c b/drivers/block/drbd/drbd_req.c index 09803d0d5207..c67717d572d1 100644 --- a/drivers/block/drbd/drbd_req.c +++ b/drivers/block/drbd/drbd_req.c | |||
@@ -52,7 +52,7 @@ static void _drbd_start_io_acct(struct drbd_device *device, struct drbd_request | |||
52 | static void _drbd_end_io_acct(struct drbd_device *device, struct drbd_request *req) | 52 | static void _drbd_end_io_acct(struct drbd_device *device, struct drbd_request *req) |
53 | { | 53 | { |
54 | int rw = bio_data_dir(req->master_bio); | 54 | int rw = bio_data_dir(req->master_bio); |
55 | unsigned long duration = jiffies - req->start_time; | 55 | unsigned long duration = jiffies - req->start_jif; |
56 | int cpu; | 56 | int cpu; |
57 | cpu = part_stat_lock(); | 57 | cpu = part_stat_lock(); |
58 | part_stat_add(cpu, &device->vdisk->part0, ticks[rw], duration); | 58 | part_stat_add(cpu, &device->vdisk->part0, ticks[rw], duration); |
@@ -66,7 +66,7 @@ static struct drbd_request *drbd_req_new(struct drbd_device *device, | |||
66 | { | 66 | { |
67 | struct drbd_request *req; | 67 | struct drbd_request *req; |
68 | 68 | ||
69 | req = mempool_alloc(drbd_request_mempool, GFP_NOIO); | 69 | req = mempool_alloc(drbd_request_mempool, GFP_NOIO | __GFP_ZERO); |
70 | if (!req) | 70 | if (!req) |
71 | return NULL; | 71 | return NULL; |
72 | 72 | ||
@@ -84,6 +84,8 @@ static struct drbd_request *drbd_req_new(struct drbd_device *device, | |||
84 | 84 | ||
85 | INIT_LIST_HEAD(&req->tl_requests); | 85 | INIT_LIST_HEAD(&req->tl_requests); |
86 | INIT_LIST_HEAD(&req->w.list); | 86 | INIT_LIST_HEAD(&req->w.list); |
87 | INIT_LIST_HEAD(&req->req_pending_master_completion); | ||
88 | INIT_LIST_HEAD(&req->req_pending_local); | ||
87 | 89 | ||
88 | /* one reference to be put by __drbd_make_request */ | 90 | /* one reference to be put by __drbd_make_request */ |
89 | atomic_set(&req->completion_ref, 1); | 91 | atomic_set(&req->completion_ref, 1); |
@@ -92,6 +94,19 @@ static struct drbd_request *drbd_req_new(struct drbd_device *device, | |||
92 | return req; | 94 | return req; |
93 | } | 95 | } |
94 | 96 | ||
97 | static void drbd_remove_request_interval(struct rb_root *root, | ||
98 | struct drbd_request *req) | ||
99 | { | ||
100 | struct drbd_device *device = req->device; | ||
101 | struct drbd_interval *i = &req->i; | ||
102 | |||
103 | drbd_remove_interval(root, i); | ||
104 | |||
105 | /* Wake up any processes waiting for this request to complete. */ | ||
106 | if (i->waiting) | ||
107 | wake_up(&device->misc_wait); | ||
108 | } | ||
109 | |||
95 | void drbd_req_destroy(struct kref *kref) | 110 | void drbd_req_destroy(struct kref *kref) |
96 | { | 111 | { |
97 | struct drbd_request *req = container_of(kref, struct drbd_request, kref); | 112 | struct drbd_request *req = container_of(kref, struct drbd_request, kref); |
@@ -107,14 +122,30 @@ void drbd_req_destroy(struct kref *kref) | |||
107 | return; | 122 | return; |
108 | } | 123 | } |
109 | 124 | ||
110 | /* remove it from the transfer log. | 125 | /* If called from mod_rq_state (expected normal case) or |
111 | * well, only if it had been there in the first | 126 | * drbd_send_and_submit (the less likely normal path), this holds the |
112 | * place... if it had not (local only or conflicting | 127 | * req_lock, and req->tl_requests will typicaly be on ->transfer_log, |
113 | * and never sent), it should still be "empty" as | 128 | * though it may be still empty (never added to the transfer log). |
114 | * initialized in drbd_req_new(), so we can list_del() it | 129 | * |
115 | * here unconditionally */ | 130 | * If called from do_retry(), we do NOT hold the req_lock, but we are |
131 | * still allowed to unconditionally list_del(&req->tl_requests), | ||
132 | * because it will be on a local on-stack list only. */ | ||
116 | list_del_init(&req->tl_requests); | 133 | list_del_init(&req->tl_requests); |
117 | 134 | ||
135 | /* finally remove the request from the conflict detection | ||
136 | * respective block_id verification interval tree. */ | ||
137 | if (!drbd_interval_empty(&req->i)) { | ||
138 | struct rb_root *root; | ||
139 | |||
140 | if (s & RQ_WRITE) | ||
141 | root = &device->write_requests; | ||
142 | else | ||
143 | root = &device->read_requests; | ||
144 | drbd_remove_request_interval(root, req); | ||
145 | } else if (s & (RQ_NET_MASK & ~RQ_NET_DONE) && req->i.size != 0) | ||
146 | drbd_err(device, "drbd_req_destroy: Logic BUG: interval empty, but: rq_state=0x%x, sect=%llu, size=%u\n", | ||
147 | s, (unsigned long long)req->i.sector, req->i.size); | ||
148 | |||
118 | /* if it was a write, we may have to set the corresponding | 149 | /* if it was a write, we may have to set the corresponding |
119 | * bit(s) out-of-sync first. If it had a local part, we need to | 150 | * bit(s) out-of-sync first. If it had a local part, we need to |
120 | * release the reference to the activity log. */ | 151 | * release the reference to the activity log. */ |
@@ -188,19 +219,6 @@ void complete_master_bio(struct drbd_device *device, | |||
188 | } | 219 | } |
189 | 220 | ||
190 | 221 | ||
191 | static void drbd_remove_request_interval(struct rb_root *root, | ||
192 | struct drbd_request *req) | ||
193 | { | ||
194 | struct drbd_device *device = req->device; | ||
195 | struct drbd_interval *i = &req->i; | ||
196 | |||
197 | drbd_remove_interval(root, i); | ||
198 | |||
199 | /* Wake up any processes waiting for this request to complete. */ | ||
200 | if (i->waiting) | ||
201 | wake_up(&device->misc_wait); | ||
202 | } | ||
203 | |||
204 | /* Helper for __req_mod(). | 222 | /* Helper for __req_mod(). |
205 | * Set m->bio to the master bio, if it is fit to be completed, | 223 | * Set m->bio to the master bio, if it is fit to be completed, |
206 | * or leave it alone (it is initialized to NULL in __req_mod), | 224 | * or leave it alone (it is initialized to NULL in __req_mod), |
@@ -254,18 +272,6 @@ void drbd_req_complete(struct drbd_request *req, struct bio_and_error *m) | |||
254 | ok = (s & RQ_LOCAL_OK) || (s & RQ_NET_OK); | 272 | ok = (s & RQ_LOCAL_OK) || (s & RQ_NET_OK); |
255 | error = PTR_ERR(req->private_bio); | 273 | error = PTR_ERR(req->private_bio); |
256 | 274 | ||
257 | /* remove the request from the conflict detection | ||
258 | * respective block_id verification hash */ | ||
259 | if (!drbd_interval_empty(&req->i)) { | ||
260 | struct rb_root *root; | ||
261 | |||
262 | if (rw == WRITE) | ||
263 | root = &device->write_requests; | ||
264 | else | ||
265 | root = &device->read_requests; | ||
266 | drbd_remove_request_interval(root, req); | ||
267 | } | ||
268 | |||
269 | /* Before we can signal completion to the upper layers, | 275 | /* Before we can signal completion to the upper layers, |
270 | * we may need to close the current transfer log epoch. | 276 | * we may need to close the current transfer log epoch. |
271 | * We are within the request lock, so we can simply compare | 277 | * We are within the request lock, so we can simply compare |
@@ -301,9 +307,24 @@ void drbd_req_complete(struct drbd_request *req, struct bio_and_error *m) | |||
301 | m->error = ok ? 0 : (error ?: -EIO); | 307 | m->error = ok ? 0 : (error ?: -EIO); |
302 | m->bio = req->master_bio; | 308 | m->bio = req->master_bio; |
303 | req->master_bio = NULL; | 309 | req->master_bio = NULL; |
310 | /* We leave it in the tree, to be able to verify later | ||
311 | * write-acks in protocol != C during resync. | ||
312 | * But we mark it as "complete", so it won't be counted as | ||
313 | * conflict in a multi-primary setup. */ | ||
314 | req->i.completed = true; | ||
304 | } | 315 | } |
316 | |||
317 | if (req->i.waiting) | ||
318 | wake_up(&device->misc_wait); | ||
319 | |||
320 | /* Either we are about to complete to upper layers, | ||
321 | * or we will restart this request. | ||
322 | * In either case, the request object will be destroyed soon, | ||
323 | * so better remove it from all lists. */ | ||
324 | list_del_init(&req->req_pending_master_completion); | ||
305 | } | 325 | } |
306 | 326 | ||
327 | /* still holds resource->req_lock */ | ||
307 | static int drbd_req_put_completion_ref(struct drbd_request *req, struct bio_and_error *m, int put) | 328 | static int drbd_req_put_completion_ref(struct drbd_request *req, struct bio_and_error *m, int put) |
308 | { | 329 | { |
309 | struct drbd_device *device = req->device; | 330 | struct drbd_device *device = req->device; |
@@ -324,12 +345,91 @@ static int drbd_req_put_completion_ref(struct drbd_request *req, struct bio_and_ | |||
324 | return 1; | 345 | return 1; |
325 | } | 346 | } |
326 | 347 | ||
348 | static void set_if_null_req_next(struct drbd_peer_device *peer_device, struct drbd_request *req) | ||
349 | { | ||
350 | struct drbd_connection *connection = peer_device ? peer_device->connection : NULL; | ||
351 | if (!connection) | ||
352 | return; | ||
353 | if (connection->req_next == NULL) | ||
354 | connection->req_next = req; | ||
355 | } | ||
356 | |||
357 | static void advance_conn_req_next(struct drbd_peer_device *peer_device, struct drbd_request *req) | ||
358 | { | ||
359 | struct drbd_connection *connection = peer_device ? peer_device->connection : NULL; | ||
360 | if (!connection) | ||
361 | return; | ||
362 | if (connection->req_next != req) | ||
363 | return; | ||
364 | list_for_each_entry_continue(req, &connection->transfer_log, tl_requests) { | ||
365 | const unsigned s = req->rq_state; | ||
366 | if (s & RQ_NET_QUEUED) | ||
367 | break; | ||
368 | } | ||
369 | if (&req->tl_requests == &connection->transfer_log) | ||
370 | req = NULL; | ||
371 | connection->req_next = req; | ||
372 | } | ||
373 | |||
374 | static void set_if_null_req_ack_pending(struct drbd_peer_device *peer_device, struct drbd_request *req) | ||
375 | { | ||
376 | struct drbd_connection *connection = peer_device ? peer_device->connection : NULL; | ||
377 | if (!connection) | ||
378 | return; | ||
379 | if (connection->req_ack_pending == NULL) | ||
380 | connection->req_ack_pending = req; | ||
381 | } | ||
382 | |||
383 | static void advance_conn_req_ack_pending(struct drbd_peer_device *peer_device, struct drbd_request *req) | ||
384 | { | ||
385 | struct drbd_connection *connection = peer_device ? peer_device->connection : NULL; | ||
386 | if (!connection) | ||
387 | return; | ||
388 | if (connection->req_ack_pending != req) | ||
389 | return; | ||
390 | list_for_each_entry_continue(req, &connection->transfer_log, tl_requests) { | ||
391 | const unsigned s = req->rq_state; | ||
392 | if ((s & RQ_NET_SENT) && (s & RQ_NET_PENDING)) | ||
393 | break; | ||
394 | } | ||
395 | if (&req->tl_requests == &connection->transfer_log) | ||
396 | req = NULL; | ||
397 | connection->req_ack_pending = req; | ||
398 | } | ||
399 | |||
400 | static void set_if_null_req_not_net_done(struct drbd_peer_device *peer_device, struct drbd_request *req) | ||
401 | { | ||
402 | struct drbd_connection *connection = peer_device ? peer_device->connection : NULL; | ||
403 | if (!connection) | ||
404 | return; | ||
405 | if (connection->req_not_net_done == NULL) | ||
406 | connection->req_not_net_done = req; | ||
407 | } | ||
408 | |||
409 | static void advance_conn_req_not_net_done(struct drbd_peer_device *peer_device, struct drbd_request *req) | ||
410 | { | ||
411 | struct drbd_connection *connection = peer_device ? peer_device->connection : NULL; | ||
412 | if (!connection) | ||
413 | return; | ||
414 | if (connection->req_not_net_done != req) | ||
415 | return; | ||
416 | list_for_each_entry_continue(req, &connection->transfer_log, tl_requests) { | ||
417 | const unsigned s = req->rq_state; | ||
418 | if ((s & RQ_NET_SENT) && !(s & RQ_NET_DONE)) | ||
419 | break; | ||
420 | } | ||
421 | if (&req->tl_requests == &connection->transfer_log) | ||
422 | req = NULL; | ||
423 | connection->req_not_net_done = req; | ||
424 | } | ||
425 | |||
327 | /* I'd like this to be the only place that manipulates | 426 | /* I'd like this to be the only place that manipulates |
328 | * req->completion_ref and req->kref. */ | 427 | * req->completion_ref and req->kref. */ |
329 | static void mod_rq_state(struct drbd_request *req, struct bio_and_error *m, | 428 | static void mod_rq_state(struct drbd_request *req, struct bio_and_error *m, |
330 | int clear, int set) | 429 | int clear, int set) |
331 | { | 430 | { |
332 | struct drbd_device *device = req->device; | 431 | struct drbd_device *device = req->device; |
432 | struct drbd_peer_device *peer_device = first_peer_device(device); | ||
333 | unsigned s = req->rq_state; | 433 | unsigned s = req->rq_state; |
334 | int c_put = 0; | 434 | int c_put = 0; |
335 | int k_put = 0; | 435 | int k_put = 0; |
@@ -356,14 +456,23 @@ static void mod_rq_state(struct drbd_request *req, struct bio_and_error *m, | |||
356 | atomic_inc(&req->completion_ref); | 456 | atomic_inc(&req->completion_ref); |
357 | } | 457 | } |
358 | 458 | ||
359 | if (!(s & RQ_NET_QUEUED) && (set & RQ_NET_QUEUED)) | 459 | if (!(s & RQ_NET_QUEUED) && (set & RQ_NET_QUEUED)) { |
360 | atomic_inc(&req->completion_ref); | 460 | atomic_inc(&req->completion_ref); |
461 | set_if_null_req_next(peer_device, req); | ||
462 | } | ||
361 | 463 | ||
362 | if (!(s & RQ_EXP_BARR_ACK) && (set & RQ_EXP_BARR_ACK)) | 464 | if (!(s & RQ_EXP_BARR_ACK) && (set & RQ_EXP_BARR_ACK)) |
363 | kref_get(&req->kref); /* wait for the DONE */ | 465 | kref_get(&req->kref); /* wait for the DONE */ |
364 | 466 | ||
365 | if (!(s & RQ_NET_SENT) && (set & RQ_NET_SENT)) | 467 | if (!(s & RQ_NET_SENT) && (set & RQ_NET_SENT)) { |
366 | atomic_add(req->i.size >> 9, &device->ap_in_flight); | 468 | /* potentially already completed in the asender thread */ |
469 | if (!(s & RQ_NET_DONE)) { | ||
470 | atomic_add(req->i.size >> 9, &device->ap_in_flight); | ||
471 | set_if_null_req_not_net_done(peer_device, req); | ||
472 | } | ||
473 | if (s & RQ_NET_PENDING) | ||
474 | set_if_null_req_ack_pending(peer_device, req); | ||
475 | } | ||
367 | 476 | ||
368 | if (!(s & RQ_COMPLETION_SUSP) && (set & RQ_COMPLETION_SUSP)) | 477 | if (!(s & RQ_COMPLETION_SUSP) && (set & RQ_COMPLETION_SUSP)) |
369 | atomic_inc(&req->completion_ref); | 478 | atomic_inc(&req->completion_ref); |
@@ -386,20 +495,34 @@ static void mod_rq_state(struct drbd_request *req, struct bio_and_error *m, | |||
386 | ++k_put; | 495 | ++k_put; |
387 | else | 496 | else |
388 | ++c_put; | 497 | ++c_put; |
498 | list_del_init(&req->req_pending_local); | ||
389 | } | 499 | } |
390 | 500 | ||
391 | if ((s & RQ_NET_PENDING) && (clear & RQ_NET_PENDING)) { | 501 | if ((s & RQ_NET_PENDING) && (clear & RQ_NET_PENDING)) { |
392 | dec_ap_pending(device); | 502 | dec_ap_pending(device); |
393 | ++c_put; | 503 | ++c_put; |
504 | req->acked_jif = jiffies; | ||
505 | advance_conn_req_ack_pending(peer_device, req); | ||
394 | } | 506 | } |
395 | 507 | ||
396 | if ((s & RQ_NET_QUEUED) && (clear & RQ_NET_QUEUED)) | 508 | if ((s & RQ_NET_QUEUED) && (clear & RQ_NET_QUEUED)) { |
397 | ++c_put; | 509 | ++c_put; |
510 | advance_conn_req_next(peer_device, req); | ||
511 | } | ||
398 | 512 | ||
399 | if ((s & RQ_EXP_BARR_ACK) && !(s & RQ_NET_DONE) && (set & RQ_NET_DONE)) { | 513 | if (!(s & RQ_NET_DONE) && (set & RQ_NET_DONE)) { |
400 | if (req->rq_state & RQ_NET_SENT) | 514 | if (s & RQ_NET_SENT) |
401 | atomic_sub(req->i.size >> 9, &device->ap_in_flight); | 515 | atomic_sub(req->i.size >> 9, &device->ap_in_flight); |
402 | ++k_put; | 516 | if (s & RQ_EXP_BARR_ACK) |
517 | ++k_put; | ||
518 | req->net_done_jif = jiffies; | ||
519 | |||
520 | /* in ahead/behind mode, or just in case, | ||
521 | * before we finally destroy this request, | ||
522 | * the caching pointers must not reference it anymore */ | ||
523 | advance_conn_req_next(peer_device, req); | ||
524 | advance_conn_req_ack_pending(peer_device, req); | ||
525 | advance_conn_req_not_net_done(peer_device, req); | ||
403 | } | 526 | } |
404 | 527 | ||
405 | /* potentially complete and destroy */ | 528 | /* potentially complete and destroy */ |
@@ -439,6 +562,19 @@ static void drbd_report_io_error(struct drbd_device *device, struct drbd_request | |||
439 | bdevname(device->ldev->backing_bdev, b)); | 562 | bdevname(device->ldev->backing_bdev, b)); |
440 | } | 563 | } |
441 | 564 | ||
565 | /* Helper for HANDED_OVER_TO_NETWORK. | ||
566 | * Is this a protocol A write (neither WRITE_ACK nor RECEIVE_ACK expected)? | ||
567 | * Is it also still "PENDING"? | ||
568 | * --> If so, clear PENDING and set NET_OK below. | ||
569 | * If it is a protocol A write, but not RQ_PENDING anymore, neg-ack was faster | ||
570 | * (and we must not set RQ_NET_OK) */ | ||
571 | static inline bool is_pending_write_protocol_A(struct drbd_request *req) | ||
572 | { | ||
573 | return (req->rq_state & | ||
574 | (RQ_WRITE|RQ_NET_PENDING|RQ_EXP_WRITE_ACK|RQ_EXP_RECEIVE_ACK)) | ||
575 | == (RQ_WRITE|RQ_NET_PENDING); | ||
576 | } | ||
577 | |||
442 | /* obviously this could be coded as many single functions | 578 | /* obviously this could be coded as many single functions |
443 | * instead of one huge switch, | 579 | * instead of one huge switch, |
444 | * or by putting the code directly in the respective locations | 580 | * or by putting the code directly in the respective locations |
@@ -454,7 +590,9 @@ static void drbd_report_io_error(struct drbd_device *device, struct drbd_request | |||
454 | int __req_mod(struct drbd_request *req, enum drbd_req_event what, | 590 | int __req_mod(struct drbd_request *req, enum drbd_req_event what, |
455 | struct bio_and_error *m) | 591 | struct bio_and_error *m) |
456 | { | 592 | { |
457 | struct drbd_device *device = req->device; | 593 | struct drbd_device *const device = req->device; |
594 | struct drbd_peer_device *const peer_device = first_peer_device(device); | ||
595 | struct drbd_connection *const connection = peer_device ? peer_device->connection : NULL; | ||
458 | struct net_conf *nc; | 596 | struct net_conf *nc; |
459 | int p, rv = 0; | 597 | int p, rv = 0; |
460 | 598 | ||
@@ -477,7 +615,7 @@ int __req_mod(struct drbd_request *req, enum drbd_req_event what, | |||
477 | * and from w_read_retry_remote */ | 615 | * and from w_read_retry_remote */ |
478 | D_ASSERT(device, !(req->rq_state & RQ_NET_MASK)); | 616 | D_ASSERT(device, !(req->rq_state & RQ_NET_MASK)); |
479 | rcu_read_lock(); | 617 | rcu_read_lock(); |
480 | nc = rcu_dereference(first_peer_device(device)->connection->net_conf); | 618 | nc = rcu_dereference(connection->net_conf); |
481 | p = nc->wire_protocol; | 619 | p = nc->wire_protocol; |
482 | rcu_read_unlock(); | 620 | rcu_read_unlock(); |
483 | req->rq_state |= | 621 | req->rq_state |= |
@@ -549,7 +687,7 @@ int __req_mod(struct drbd_request *req, enum drbd_req_event what, | |||
549 | D_ASSERT(device, (req->rq_state & RQ_LOCAL_MASK) == 0); | 687 | D_ASSERT(device, (req->rq_state & RQ_LOCAL_MASK) == 0); |
550 | mod_rq_state(req, m, 0, RQ_NET_QUEUED); | 688 | mod_rq_state(req, m, 0, RQ_NET_QUEUED); |
551 | req->w.cb = w_send_read_req; | 689 | req->w.cb = w_send_read_req; |
552 | drbd_queue_work(&first_peer_device(device)->connection->sender_work, | 690 | drbd_queue_work(&connection->sender_work, |
553 | &req->w); | 691 | &req->w); |
554 | break; | 692 | break; |
555 | 693 | ||
@@ -585,23 +723,23 @@ int __req_mod(struct drbd_request *req, enum drbd_req_event what, | |||
585 | D_ASSERT(device, req->rq_state & RQ_NET_PENDING); | 723 | D_ASSERT(device, req->rq_state & RQ_NET_PENDING); |
586 | mod_rq_state(req, m, 0, RQ_NET_QUEUED|RQ_EXP_BARR_ACK); | 724 | mod_rq_state(req, m, 0, RQ_NET_QUEUED|RQ_EXP_BARR_ACK); |
587 | req->w.cb = w_send_dblock; | 725 | req->w.cb = w_send_dblock; |
588 | drbd_queue_work(&first_peer_device(device)->connection->sender_work, | 726 | drbd_queue_work(&connection->sender_work, |
589 | &req->w); | 727 | &req->w); |
590 | 728 | ||
591 | /* close the epoch, in case it outgrew the limit */ | 729 | /* close the epoch, in case it outgrew the limit */ |
592 | rcu_read_lock(); | 730 | rcu_read_lock(); |
593 | nc = rcu_dereference(first_peer_device(device)->connection->net_conf); | 731 | nc = rcu_dereference(connection->net_conf); |
594 | p = nc->max_epoch_size; | 732 | p = nc->max_epoch_size; |
595 | rcu_read_unlock(); | 733 | rcu_read_unlock(); |
596 | if (first_peer_device(device)->connection->current_tle_writes >= p) | 734 | if (connection->current_tle_writes >= p) |
597 | start_new_tl_epoch(first_peer_device(device)->connection); | 735 | start_new_tl_epoch(connection); |
598 | 736 | ||
599 | break; | 737 | break; |
600 | 738 | ||
601 | case QUEUE_FOR_SEND_OOS: | 739 | case QUEUE_FOR_SEND_OOS: |
602 | mod_rq_state(req, m, 0, RQ_NET_QUEUED); | 740 | mod_rq_state(req, m, 0, RQ_NET_QUEUED); |
603 | req->w.cb = w_send_out_of_sync; | 741 | req->w.cb = w_send_out_of_sync; |
604 | drbd_queue_work(&first_peer_device(device)->connection->sender_work, | 742 | drbd_queue_work(&connection->sender_work, |
605 | &req->w); | 743 | &req->w); |
606 | break; | 744 | break; |
607 | 745 | ||
@@ -615,18 +753,16 @@ int __req_mod(struct drbd_request *req, enum drbd_req_event what, | |||
615 | 753 | ||
616 | case HANDED_OVER_TO_NETWORK: | 754 | case HANDED_OVER_TO_NETWORK: |
617 | /* assert something? */ | 755 | /* assert something? */ |
618 | if (bio_data_dir(req->master_bio) == WRITE && | 756 | if (is_pending_write_protocol_A(req)) |
619 | !(req->rq_state & (RQ_EXP_RECEIVE_ACK | RQ_EXP_WRITE_ACK))) { | ||
620 | /* this is what is dangerous about protocol A: | 757 | /* this is what is dangerous about protocol A: |
621 | * pretend it was successfully written on the peer. */ | 758 | * pretend it was successfully written on the peer. */ |
622 | if (req->rq_state & RQ_NET_PENDING) | 759 | mod_rq_state(req, m, RQ_NET_QUEUED|RQ_NET_PENDING, |
623 | mod_rq_state(req, m, RQ_NET_PENDING, RQ_NET_OK); | 760 | RQ_NET_SENT|RQ_NET_OK); |
624 | /* else: neg-ack was faster... */ | 761 | else |
625 | /* it is still not yet RQ_NET_DONE until the | 762 | mod_rq_state(req, m, RQ_NET_QUEUED, RQ_NET_SENT); |
626 | * corresponding epoch barrier got acked as well, | 763 | /* It is still not yet RQ_NET_DONE until the |
627 | * so we know what to dirty on connection loss */ | 764 | * corresponding epoch barrier got acked as well, |
628 | } | 765 | * so we know what to dirty on connection loss. */ |
629 | mod_rq_state(req, m, RQ_NET_QUEUED, RQ_NET_SENT); | ||
630 | break; | 766 | break; |
631 | 767 | ||
632 | case OOS_HANDED_TO_NETWORK: | 768 | case OOS_HANDED_TO_NETWORK: |
@@ -658,12 +794,13 @@ int __req_mod(struct drbd_request *req, enum drbd_req_event what, | |||
658 | case WRITE_ACKED_BY_PEER_AND_SIS: | 794 | case WRITE_ACKED_BY_PEER_AND_SIS: |
659 | req->rq_state |= RQ_NET_SIS; | 795 | req->rq_state |= RQ_NET_SIS; |
660 | case WRITE_ACKED_BY_PEER: | 796 | case WRITE_ACKED_BY_PEER: |
661 | D_ASSERT(device, req->rq_state & RQ_EXP_WRITE_ACK); | 797 | /* Normal operation protocol C: successfully written on peer. |
662 | /* protocol C; successfully written on peer. | 798 | * During resync, even in protocol != C, |
799 | * we requested an explicit write ack anyways. | ||
800 | * Which means we cannot even assert anything here. | ||
663 | * Nothing more to do here. | 801 | * Nothing more to do here. |
664 | * We want to keep the tl in place for all protocols, to cater | 802 | * We want to keep the tl in place for all protocols, to cater |
665 | * for volatile write-back caches on lower level devices. */ | 803 | * for volatile write-back caches on lower level devices. */ |
666 | |||
667 | goto ack_common; | 804 | goto ack_common; |
668 | case RECV_ACKED_BY_PEER: | 805 | case RECV_ACKED_BY_PEER: |
669 | D_ASSERT(device, req->rq_state & RQ_EXP_RECEIVE_ACK); | 806 | D_ASSERT(device, req->rq_state & RQ_EXP_RECEIVE_ACK); |
@@ -671,7 +808,6 @@ int __req_mod(struct drbd_request *req, enum drbd_req_event what, | |||
671 | * see also notes above in HANDED_OVER_TO_NETWORK about | 808 | * see also notes above in HANDED_OVER_TO_NETWORK about |
672 | * protocol != C */ | 809 | * protocol != C */ |
673 | ack_common: | 810 | ack_common: |
674 | D_ASSERT(device, req->rq_state & RQ_NET_PENDING); | ||
675 | mod_rq_state(req, m, RQ_NET_PENDING, RQ_NET_OK); | 811 | mod_rq_state(req, m, RQ_NET_PENDING, RQ_NET_OK); |
676 | break; | 812 | break; |
677 | 813 | ||
@@ -714,7 +850,7 @@ int __req_mod(struct drbd_request *req, enum drbd_req_event what, | |||
714 | 850 | ||
715 | get_ldev(device); /* always succeeds in this call path */ | 851 | get_ldev(device); /* always succeeds in this call path */ |
716 | req->w.cb = w_restart_disk_io; | 852 | req->w.cb = w_restart_disk_io; |
717 | drbd_queue_work(&first_peer_device(device)->connection->sender_work, | 853 | drbd_queue_work(&connection->sender_work, |
718 | &req->w); | 854 | &req->w); |
719 | break; | 855 | break; |
720 | 856 | ||
@@ -736,7 +872,8 @@ int __req_mod(struct drbd_request *req, enum drbd_req_event what, | |||
736 | 872 | ||
737 | mod_rq_state(req, m, RQ_COMPLETION_SUSP, RQ_NET_QUEUED|RQ_NET_PENDING); | 873 | mod_rq_state(req, m, RQ_COMPLETION_SUSP, RQ_NET_QUEUED|RQ_NET_PENDING); |
738 | if (req->w.cb) { | 874 | if (req->w.cb) { |
739 | drbd_queue_work(&first_peer_device(device)->connection->sender_work, | 875 | /* w.cb expected to be w_send_dblock, or w_send_read_req */ |
876 | drbd_queue_work(&connection->sender_work, | ||
740 | &req->w); | 877 | &req->w); |
741 | rv = req->rq_state & RQ_WRITE ? MR_WRITE : MR_READ; | 878 | rv = req->rq_state & RQ_WRITE ? MR_WRITE : MR_READ; |
742 | } /* else: FIXME can this happen? */ | 879 | } /* else: FIXME can this happen? */ |
@@ -769,7 +906,7 @@ int __req_mod(struct drbd_request *req, enum drbd_req_event what, | |||
769 | break; | 906 | break; |
770 | 907 | ||
771 | case QUEUE_AS_DRBD_BARRIER: | 908 | case QUEUE_AS_DRBD_BARRIER: |
772 | start_new_tl_epoch(first_peer_device(device)->connection); | 909 | start_new_tl_epoch(connection); |
773 | mod_rq_state(req, m, 0, RQ_NET_OK|RQ_NET_DONE); | 910 | mod_rq_state(req, m, 0, RQ_NET_OK|RQ_NET_DONE); |
774 | break; | 911 | break; |
775 | }; | 912 | }; |
@@ -886,6 +1023,9 @@ static void maybe_pull_ahead(struct drbd_device *device) | |||
886 | connection->agreed_pro_version < 96) | 1023 | connection->agreed_pro_version < 96) |
887 | return; | 1024 | return; |
888 | 1025 | ||
1026 | if (on_congestion == OC_PULL_AHEAD && device->state.conn == C_AHEAD) | ||
1027 | return; /* nothing to do ... */ | ||
1028 | |||
889 | /* If I don't even have good local storage, we can not reasonably try | 1029 | /* If I don't even have good local storage, we can not reasonably try |
890 | * to pull ahead of the peer. We also need the local reference to make | 1030 | * to pull ahead of the peer. We also need the local reference to make |
891 | * sure device->act_log is there. | 1031 | * sure device->act_log is there. |
@@ -1021,6 +1161,7 @@ drbd_submit_req_private_bio(struct drbd_request *req) | |||
1021 | * stable storage, and this is a WRITE, we may not even submit | 1161 | * stable storage, and this is a WRITE, we may not even submit |
1022 | * this bio. */ | 1162 | * this bio. */ |
1023 | if (get_ldev(device)) { | 1163 | if (get_ldev(device)) { |
1164 | req->pre_submit_jif = jiffies; | ||
1024 | if (drbd_insert_fault(device, | 1165 | if (drbd_insert_fault(device, |
1025 | rw == WRITE ? DRBD_FAULT_DT_WR | 1166 | rw == WRITE ? DRBD_FAULT_DT_WR |
1026 | : rw == READ ? DRBD_FAULT_DT_RD | 1167 | : rw == READ ? DRBD_FAULT_DT_RD |
@@ -1035,10 +1176,14 @@ drbd_submit_req_private_bio(struct drbd_request *req) | |||
1035 | 1176 | ||
1036 | static void drbd_queue_write(struct drbd_device *device, struct drbd_request *req) | 1177 | static void drbd_queue_write(struct drbd_device *device, struct drbd_request *req) |
1037 | { | 1178 | { |
1038 | spin_lock(&device->submit.lock); | 1179 | spin_lock_irq(&device->resource->req_lock); |
1039 | list_add_tail(&req->tl_requests, &device->submit.writes); | 1180 | list_add_tail(&req->tl_requests, &device->submit.writes); |
1040 | spin_unlock(&device->submit.lock); | 1181 | list_add_tail(&req->req_pending_master_completion, |
1182 | &device->pending_master_completion[1 /* WRITE */]); | ||
1183 | spin_unlock_irq(&device->resource->req_lock); | ||
1041 | queue_work(device->submit.wq, &device->submit.worker); | 1184 | queue_work(device->submit.wq, &device->submit.worker); |
1185 | /* do_submit() may sleep internally on al_wait, too */ | ||
1186 | wake_up(&device->al_wait); | ||
1042 | } | 1187 | } |
1043 | 1188 | ||
1044 | /* returns the new drbd_request pointer, if the caller is expected to | 1189 | /* returns the new drbd_request pointer, if the caller is expected to |
@@ -1047,7 +1192,7 @@ static void drbd_queue_write(struct drbd_device *device, struct drbd_request *re | |||
1047 | * Returns ERR_PTR(-ENOMEM) if we cannot allocate a drbd_request. | 1192 | * Returns ERR_PTR(-ENOMEM) if we cannot allocate a drbd_request. |
1048 | */ | 1193 | */ |
1049 | static struct drbd_request * | 1194 | static struct drbd_request * |
1050 | drbd_request_prepare(struct drbd_device *device, struct bio *bio, unsigned long start_time) | 1195 | drbd_request_prepare(struct drbd_device *device, struct bio *bio, unsigned long start_jif) |
1051 | { | 1196 | { |
1052 | const int rw = bio_data_dir(bio); | 1197 | const int rw = bio_data_dir(bio); |
1053 | struct drbd_request *req; | 1198 | struct drbd_request *req; |
@@ -1062,7 +1207,7 @@ drbd_request_prepare(struct drbd_device *device, struct bio *bio, unsigned long | |||
1062 | bio_endio(bio, -ENOMEM); | 1207 | bio_endio(bio, -ENOMEM); |
1063 | return ERR_PTR(-ENOMEM); | 1208 | return ERR_PTR(-ENOMEM); |
1064 | } | 1209 | } |
1065 | req->start_time = start_time; | 1210 | req->start_jif = start_jif; |
1066 | 1211 | ||
1067 | if (!get_ldev(device)) { | 1212 | if (!get_ldev(device)) { |
1068 | bio_put(req->private_bio); | 1213 | bio_put(req->private_bio); |
@@ -1075,10 +1220,12 @@ drbd_request_prepare(struct drbd_device *device, struct bio *bio, unsigned long | |||
1075 | if (rw == WRITE && req->private_bio && req->i.size | 1220 | if (rw == WRITE && req->private_bio && req->i.size |
1076 | && !test_bit(AL_SUSPENDED, &device->flags)) { | 1221 | && !test_bit(AL_SUSPENDED, &device->flags)) { |
1077 | if (!drbd_al_begin_io_fastpath(device, &req->i)) { | 1222 | if (!drbd_al_begin_io_fastpath(device, &req->i)) { |
1223 | atomic_inc(&device->ap_actlog_cnt); | ||
1078 | drbd_queue_write(device, req); | 1224 | drbd_queue_write(device, req); |
1079 | return NULL; | 1225 | return NULL; |
1080 | } | 1226 | } |
1081 | req->rq_state |= RQ_IN_ACT_LOG; | 1227 | req->rq_state |= RQ_IN_ACT_LOG; |
1228 | req->in_actlog_jif = jiffies; | ||
1082 | } | 1229 | } |
1083 | 1230 | ||
1084 | return req; | 1231 | return req; |
@@ -1086,11 +1233,13 @@ drbd_request_prepare(struct drbd_device *device, struct bio *bio, unsigned long | |||
1086 | 1233 | ||
1087 | static void drbd_send_and_submit(struct drbd_device *device, struct drbd_request *req) | 1234 | static void drbd_send_and_submit(struct drbd_device *device, struct drbd_request *req) |
1088 | { | 1235 | { |
1236 | struct drbd_resource *resource = device->resource; | ||
1089 | const int rw = bio_rw(req->master_bio); | 1237 | const int rw = bio_rw(req->master_bio); |
1090 | struct bio_and_error m = { NULL, }; | 1238 | struct bio_and_error m = { NULL, }; |
1091 | bool no_remote = false; | 1239 | bool no_remote = false; |
1240 | bool submit_private_bio = false; | ||
1092 | 1241 | ||
1093 | spin_lock_irq(&device->resource->req_lock); | 1242 | spin_lock_irq(&resource->req_lock); |
1094 | if (rw == WRITE) { | 1243 | if (rw == WRITE) { |
1095 | /* This may temporarily give up the req_lock, | 1244 | /* This may temporarily give up the req_lock, |
1096 | * but will re-aquire it before it returns here. | 1245 | * but will re-aquire it before it returns here. |
@@ -1148,13 +1297,18 @@ static void drbd_send_and_submit(struct drbd_device *device, struct drbd_request | |||
1148 | no_remote = true; | 1297 | no_remote = true; |
1149 | } | 1298 | } |
1150 | 1299 | ||
1300 | /* If it took the fast path in drbd_request_prepare, add it here. | ||
1301 | * The slow path has added it already. */ | ||
1302 | if (list_empty(&req->req_pending_master_completion)) | ||
1303 | list_add_tail(&req->req_pending_master_completion, | ||
1304 | &device->pending_master_completion[rw == WRITE]); | ||
1151 | if (req->private_bio) { | 1305 | if (req->private_bio) { |
1152 | /* needs to be marked within the same spinlock */ | 1306 | /* needs to be marked within the same spinlock */ |
1307 | list_add_tail(&req->req_pending_local, | ||
1308 | &device->pending_completion[rw == WRITE]); | ||
1153 | _req_mod(req, TO_BE_SUBMITTED); | 1309 | _req_mod(req, TO_BE_SUBMITTED); |
1154 | /* but we need to give up the spinlock to submit */ | 1310 | /* but we need to give up the spinlock to submit */ |
1155 | spin_unlock_irq(&device->resource->req_lock); | 1311 | submit_private_bio = true; |
1156 | drbd_submit_req_private_bio(req); | ||
1157 | spin_lock_irq(&device->resource->req_lock); | ||
1158 | } else if (no_remote) { | 1312 | } else if (no_remote) { |
1159 | nodata: | 1313 | nodata: |
1160 | if (__ratelimit(&drbd_ratelimit_state)) | 1314 | if (__ratelimit(&drbd_ratelimit_state)) |
@@ -1167,15 +1321,23 @@ nodata: | |||
1167 | out: | 1321 | out: |
1168 | if (drbd_req_put_completion_ref(req, &m, 1)) | 1322 | if (drbd_req_put_completion_ref(req, &m, 1)) |
1169 | kref_put(&req->kref, drbd_req_destroy); | 1323 | kref_put(&req->kref, drbd_req_destroy); |
1170 | spin_unlock_irq(&device->resource->req_lock); | 1324 | spin_unlock_irq(&resource->req_lock); |
1171 | 1325 | ||
1326 | /* Even though above is a kref_put(), this is safe. | ||
1327 | * As long as we still need to submit our private bio, | ||
1328 | * we hold a completion ref, and the request cannot disappear. | ||
1329 | * If however this request did not even have a private bio to submit | ||
1330 | * (e.g. remote read), req may already be invalid now. | ||
1331 | * That's why we cannot check on req->private_bio. */ | ||
1332 | if (submit_private_bio) | ||
1333 | drbd_submit_req_private_bio(req); | ||
1172 | if (m.bio) | 1334 | if (m.bio) |
1173 | complete_master_bio(device, &m); | 1335 | complete_master_bio(device, &m); |
1174 | } | 1336 | } |
1175 | 1337 | ||
1176 | void __drbd_make_request(struct drbd_device *device, struct bio *bio, unsigned long start_time) | 1338 | void __drbd_make_request(struct drbd_device *device, struct bio *bio, unsigned long start_jif) |
1177 | { | 1339 | { |
1178 | struct drbd_request *req = drbd_request_prepare(device, bio, start_time); | 1340 | struct drbd_request *req = drbd_request_prepare(device, bio, start_jif); |
1179 | if (IS_ERR_OR_NULL(req)) | 1341 | if (IS_ERR_OR_NULL(req)) |
1180 | return; | 1342 | return; |
1181 | drbd_send_and_submit(device, req); | 1343 | drbd_send_and_submit(device, req); |
@@ -1194,6 +1356,8 @@ static void submit_fast_path(struct drbd_device *device, struct list_head *incom | |||
1194 | continue; | 1356 | continue; |
1195 | 1357 | ||
1196 | req->rq_state |= RQ_IN_ACT_LOG; | 1358 | req->rq_state |= RQ_IN_ACT_LOG; |
1359 | req->in_actlog_jif = jiffies; | ||
1360 | atomic_dec(&device->ap_actlog_cnt); | ||
1197 | } | 1361 | } |
1198 | 1362 | ||
1199 | list_del_init(&req->tl_requests); | 1363 | list_del_init(&req->tl_requests); |
@@ -1203,7 +1367,8 @@ static void submit_fast_path(struct drbd_device *device, struct list_head *incom | |||
1203 | 1367 | ||
1204 | static bool prepare_al_transaction_nonblock(struct drbd_device *device, | 1368 | static bool prepare_al_transaction_nonblock(struct drbd_device *device, |
1205 | struct list_head *incoming, | 1369 | struct list_head *incoming, |
1206 | struct list_head *pending) | 1370 | struct list_head *pending, |
1371 | struct list_head *later) | ||
1207 | { | 1372 | { |
1208 | struct drbd_request *req, *tmp; | 1373 | struct drbd_request *req, *tmp; |
1209 | int wake = 0; | 1374 | int wake = 0; |
@@ -1212,45 +1377,105 @@ static bool prepare_al_transaction_nonblock(struct drbd_device *device, | |||
1212 | spin_lock_irq(&device->al_lock); | 1377 | spin_lock_irq(&device->al_lock); |
1213 | list_for_each_entry_safe(req, tmp, incoming, tl_requests) { | 1378 | list_for_each_entry_safe(req, tmp, incoming, tl_requests) { |
1214 | err = drbd_al_begin_io_nonblock(device, &req->i); | 1379 | err = drbd_al_begin_io_nonblock(device, &req->i); |
1380 | if (err == -ENOBUFS) | ||
1381 | break; | ||
1215 | if (err == -EBUSY) | 1382 | if (err == -EBUSY) |
1216 | wake = 1; | 1383 | wake = 1; |
1217 | if (err) | 1384 | if (err) |
1218 | continue; | 1385 | list_move_tail(&req->tl_requests, later); |
1219 | req->rq_state |= RQ_IN_ACT_LOG; | 1386 | else |
1220 | list_move_tail(&req->tl_requests, pending); | 1387 | list_move_tail(&req->tl_requests, pending); |
1221 | } | 1388 | } |
1222 | spin_unlock_irq(&device->al_lock); | 1389 | spin_unlock_irq(&device->al_lock); |
1223 | if (wake) | 1390 | if (wake) |
1224 | wake_up(&device->al_wait); | 1391 | wake_up(&device->al_wait); |
1225 | |||
1226 | return !list_empty(pending); | 1392 | return !list_empty(pending); |
1227 | } | 1393 | } |
1228 | 1394 | ||
1395 | void send_and_submit_pending(struct drbd_device *device, struct list_head *pending) | ||
1396 | { | ||
1397 | struct drbd_request *req, *tmp; | ||
1398 | |||
1399 | list_for_each_entry_safe(req, tmp, pending, tl_requests) { | ||
1400 | req->rq_state |= RQ_IN_ACT_LOG; | ||
1401 | req->in_actlog_jif = jiffies; | ||
1402 | atomic_dec(&device->ap_actlog_cnt); | ||
1403 | list_del_init(&req->tl_requests); | ||
1404 | drbd_send_and_submit(device, req); | ||
1405 | } | ||
1406 | } | ||
1407 | |||
1229 | void do_submit(struct work_struct *ws) | 1408 | void do_submit(struct work_struct *ws) |
1230 | { | 1409 | { |
1231 | struct drbd_device *device = container_of(ws, struct drbd_device, submit.worker); | 1410 | struct drbd_device *device = container_of(ws, struct drbd_device, submit.worker); |
1232 | LIST_HEAD(incoming); | 1411 | LIST_HEAD(incoming); /* from drbd_make_request() */ |
1233 | LIST_HEAD(pending); | 1412 | LIST_HEAD(pending); /* to be submitted after next AL-transaction commit */ |
1234 | struct drbd_request *req, *tmp; | 1413 | LIST_HEAD(busy); /* blocked by resync requests */ |
1414 | |||
1415 | /* grab new incoming requests */ | ||
1416 | spin_lock_irq(&device->resource->req_lock); | ||
1417 | list_splice_tail_init(&device->submit.writes, &incoming); | ||
1418 | spin_unlock_irq(&device->resource->req_lock); | ||
1235 | 1419 | ||
1236 | for (;;) { | 1420 | for (;;) { |
1237 | spin_lock(&device->submit.lock); | 1421 | DEFINE_WAIT(wait); |
1238 | list_splice_tail_init(&device->submit.writes, &incoming); | ||
1239 | spin_unlock(&device->submit.lock); | ||
1240 | 1422 | ||
1423 | /* move used-to-be-busy back to front of incoming */ | ||
1424 | list_splice_init(&busy, &incoming); | ||
1241 | submit_fast_path(device, &incoming); | 1425 | submit_fast_path(device, &incoming); |
1242 | if (list_empty(&incoming)) | 1426 | if (list_empty(&incoming)) |
1243 | break; | 1427 | break; |
1244 | 1428 | ||
1245 | skip_fast_path: | ||
1246 | wait_event(device->al_wait, prepare_al_transaction_nonblock(device, &incoming, &pending)); | ||
1247 | /* Maybe more was queued, while we prepared the transaction? | ||
1248 | * Try to stuff them into this transaction as well. | ||
1249 | * Be strictly non-blocking here, no wait_event, we already | ||
1250 | * have something to commit. | ||
1251 | * Stop if we don't make any more progres. | ||
1252 | */ | ||
1253 | for (;;) { | 1429 | for (;;) { |
1430 | prepare_to_wait(&device->al_wait, &wait, TASK_UNINTERRUPTIBLE); | ||
1431 | |||
1432 | list_splice_init(&busy, &incoming); | ||
1433 | prepare_al_transaction_nonblock(device, &incoming, &pending, &busy); | ||
1434 | if (!list_empty(&pending)) | ||
1435 | break; | ||
1436 | |||
1437 | schedule(); | ||
1438 | |||
1439 | /* If all currently "hot" activity log extents are kept busy by | ||
1440 | * incoming requests, we still must not totally starve new | ||
1441 | * requests to "cold" extents. | ||
1442 | * Something left on &incoming means there had not been | ||
1443 | * enough update slots available, and the activity log | ||
1444 | * has been marked as "starving". | ||
1445 | * | ||
1446 | * Try again now, without looking for new requests, | ||
1447 | * effectively blocking all new requests until we made | ||
1448 | * at least _some_ progress with what we currently have. | ||
1449 | */ | ||
1450 | if (!list_empty(&incoming)) | ||
1451 | continue; | ||
1452 | |||
1453 | /* Nothing moved to pending, but nothing left | ||
1454 | * on incoming: all moved to busy! | ||
1455 | * Grab new and iterate. */ | ||
1456 | spin_lock_irq(&device->resource->req_lock); | ||
1457 | list_splice_tail_init(&device->submit.writes, &incoming); | ||
1458 | spin_unlock_irq(&device->resource->req_lock); | ||
1459 | } | ||
1460 | finish_wait(&device->al_wait, &wait); | ||
1461 | |||
1462 | /* If the transaction was full, before all incoming requests | ||
1463 | * had been processed, skip ahead to commit, and iterate | ||
1464 | * without splicing in more incoming requests from upper layers. | ||
1465 | * | ||
1466 | * Else, if all incoming have been processed, | ||
1467 | * they have become either "pending" (to be submitted after | ||
1468 | * next transaction commit) or "busy" (blocked by resync). | ||
1469 | * | ||
1470 | * Maybe more was queued, while we prepared the transaction? | ||
1471 | * Try to stuff those into this transaction as well. | ||
1472 | * Be strictly non-blocking here, | ||
1473 | * we already have something to commit. | ||
1474 | * | ||
1475 | * Commit if we don't make any more progres. | ||
1476 | */ | ||
1477 | |||
1478 | while (list_empty(&incoming)) { | ||
1254 | LIST_HEAD(more_pending); | 1479 | LIST_HEAD(more_pending); |
1255 | LIST_HEAD(more_incoming); | 1480 | LIST_HEAD(more_incoming); |
1256 | bool made_progress; | 1481 | bool made_progress; |
@@ -1260,55 +1485,32 @@ skip_fast_path: | |||
1260 | if (list_empty(&device->submit.writes)) | 1485 | if (list_empty(&device->submit.writes)) |
1261 | break; | 1486 | break; |
1262 | 1487 | ||
1263 | spin_lock(&device->submit.lock); | 1488 | spin_lock_irq(&device->resource->req_lock); |
1264 | list_splice_tail_init(&device->submit.writes, &more_incoming); | 1489 | list_splice_tail_init(&device->submit.writes, &more_incoming); |
1265 | spin_unlock(&device->submit.lock); | 1490 | spin_unlock_irq(&device->resource->req_lock); |
1266 | 1491 | ||
1267 | if (list_empty(&more_incoming)) | 1492 | if (list_empty(&more_incoming)) |
1268 | break; | 1493 | break; |
1269 | 1494 | ||
1270 | made_progress = prepare_al_transaction_nonblock(device, &more_incoming, &more_pending); | 1495 | made_progress = prepare_al_transaction_nonblock(device, &more_incoming, &more_pending, &busy); |
1271 | 1496 | ||
1272 | list_splice_tail_init(&more_pending, &pending); | 1497 | list_splice_tail_init(&more_pending, &pending); |
1273 | list_splice_tail_init(&more_incoming, &incoming); | 1498 | list_splice_tail_init(&more_incoming, &incoming); |
1274 | |||
1275 | if (!made_progress) | 1499 | if (!made_progress) |
1276 | break; | 1500 | break; |
1277 | } | 1501 | } |
1278 | drbd_al_begin_io_commit(device, false); | ||
1279 | |||
1280 | list_for_each_entry_safe(req, tmp, &pending, tl_requests) { | ||
1281 | list_del_init(&req->tl_requests); | ||
1282 | drbd_send_and_submit(device, req); | ||
1283 | } | ||
1284 | 1502 | ||
1285 | /* If all currently hot activity log extents are kept busy by | 1503 | drbd_al_begin_io_commit(device); |
1286 | * incoming requests, we still must not totally starve new | 1504 | send_and_submit_pending(device, &pending); |
1287 | * requests to cold extents. In that case, prepare one request | ||
1288 | * in blocking mode. */ | ||
1289 | list_for_each_entry_safe(req, tmp, &incoming, tl_requests) { | ||
1290 | list_del_init(&req->tl_requests); | ||
1291 | req->rq_state |= RQ_IN_ACT_LOG; | ||
1292 | if (!drbd_al_begin_io_prepare(device, &req->i)) { | ||
1293 | /* Corresponding extent was hot after all? */ | ||
1294 | drbd_send_and_submit(device, req); | ||
1295 | } else { | ||
1296 | /* Found a request to a cold extent. | ||
1297 | * Put on "pending" list, | ||
1298 | * and try to cumulate with more. */ | ||
1299 | list_add(&req->tl_requests, &pending); | ||
1300 | goto skip_fast_path; | ||
1301 | } | ||
1302 | } | ||
1303 | } | 1505 | } |
1304 | } | 1506 | } |
1305 | 1507 | ||
1306 | void drbd_make_request(struct request_queue *q, struct bio *bio) | 1508 | void drbd_make_request(struct request_queue *q, struct bio *bio) |
1307 | { | 1509 | { |
1308 | struct drbd_device *device = (struct drbd_device *) q->queuedata; | 1510 | struct drbd_device *device = (struct drbd_device *) q->queuedata; |
1309 | unsigned long start_time; | 1511 | unsigned long start_jif; |
1310 | 1512 | ||
1311 | start_time = jiffies; | 1513 | start_jif = jiffies; |
1312 | 1514 | ||
1313 | /* | 1515 | /* |
1314 | * what we "blindly" assume: | 1516 | * what we "blindly" assume: |
@@ -1316,7 +1518,7 @@ void drbd_make_request(struct request_queue *q, struct bio *bio) | |||
1316 | D_ASSERT(device, IS_ALIGNED(bio->bi_iter.bi_size, 512)); | 1518 | D_ASSERT(device, IS_ALIGNED(bio->bi_iter.bi_size, 512)); |
1317 | 1519 | ||
1318 | inc_ap_bio(device); | 1520 | inc_ap_bio(device); |
1319 | __drbd_make_request(device, bio, start_time); | 1521 | __drbd_make_request(device, bio, start_jif); |
1320 | } | 1522 | } |
1321 | 1523 | ||
1322 | /* This is called by bio_add_page(). | 1524 | /* This is called by bio_add_page(). |
@@ -1353,36 +1555,13 @@ int drbd_merge_bvec(struct request_queue *q, struct bvec_merge_data *bvm, struct | |||
1353 | return limit; | 1555 | return limit; |
1354 | } | 1556 | } |
1355 | 1557 | ||
1356 | static void find_oldest_requests( | ||
1357 | struct drbd_connection *connection, | ||
1358 | struct drbd_device *device, | ||
1359 | struct drbd_request **oldest_req_waiting_for_peer, | ||
1360 | struct drbd_request **oldest_req_waiting_for_disk) | ||
1361 | { | ||
1362 | struct drbd_request *r; | ||
1363 | *oldest_req_waiting_for_peer = NULL; | ||
1364 | *oldest_req_waiting_for_disk = NULL; | ||
1365 | list_for_each_entry(r, &connection->transfer_log, tl_requests) { | ||
1366 | const unsigned s = r->rq_state; | ||
1367 | if (!*oldest_req_waiting_for_peer | ||
1368 | && ((s & RQ_NET_MASK) && !(s & RQ_NET_DONE))) | ||
1369 | *oldest_req_waiting_for_peer = r; | ||
1370 | |||
1371 | if (!*oldest_req_waiting_for_disk | ||
1372 | && (s & RQ_LOCAL_PENDING) && r->device == device) | ||
1373 | *oldest_req_waiting_for_disk = r; | ||
1374 | |||
1375 | if (*oldest_req_waiting_for_peer && *oldest_req_waiting_for_disk) | ||
1376 | break; | ||
1377 | } | ||
1378 | } | ||
1379 | |||
1380 | void request_timer_fn(unsigned long data) | 1558 | void request_timer_fn(unsigned long data) |
1381 | { | 1559 | { |
1382 | struct drbd_device *device = (struct drbd_device *) data; | 1560 | struct drbd_device *device = (struct drbd_device *) data; |
1383 | struct drbd_connection *connection = first_peer_device(device)->connection; | 1561 | struct drbd_connection *connection = first_peer_device(device)->connection; |
1384 | struct drbd_request *req_disk, *req_peer; /* oldest request */ | 1562 | struct drbd_request *req_read, *req_write, *req_peer; /* oldest request */ |
1385 | struct net_conf *nc; | 1563 | struct net_conf *nc; |
1564 | unsigned long oldest_submit_jif; | ||
1386 | unsigned long ent = 0, dt = 0, et, nt; /* effective timeout = ko_count * timeout */ | 1565 | unsigned long ent = 0, dt = 0, et, nt; /* effective timeout = ko_count * timeout */ |
1387 | unsigned long now; | 1566 | unsigned long now; |
1388 | 1567 | ||
@@ -1403,14 +1582,31 @@ void request_timer_fn(unsigned long data) | |||
1403 | return; /* Recurring timer stopped */ | 1582 | return; /* Recurring timer stopped */ |
1404 | 1583 | ||
1405 | now = jiffies; | 1584 | now = jiffies; |
1585 | nt = now + et; | ||
1406 | 1586 | ||
1407 | spin_lock_irq(&device->resource->req_lock); | 1587 | spin_lock_irq(&device->resource->req_lock); |
1408 | find_oldest_requests(connection, device, &req_peer, &req_disk); | 1588 | req_read = list_first_entry_or_null(&device->pending_completion[0], struct drbd_request, req_pending_local); |
1409 | if (req_peer == NULL && req_disk == NULL) { | 1589 | req_write = list_first_entry_or_null(&device->pending_completion[1], struct drbd_request, req_pending_local); |
1410 | spin_unlock_irq(&device->resource->req_lock); | 1590 | req_peer = connection->req_not_net_done; |
1411 | mod_timer(&device->request_timer, now + et); | 1591 | /* maybe the oldest request waiting for the peer is in fact still |
1412 | return; | 1592 | * blocking in tcp sendmsg */ |
1413 | } | 1593 | if (!req_peer && connection->req_next && connection->req_next->pre_send_jif) |
1594 | req_peer = connection->req_next; | ||
1595 | |||
1596 | /* evaluate the oldest peer request only in one timer! */ | ||
1597 | if (req_peer && req_peer->device != device) | ||
1598 | req_peer = NULL; | ||
1599 | |||
1600 | /* do we have something to evaluate? */ | ||
1601 | if (req_peer == NULL && req_write == NULL && req_read == NULL) | ||
1602 | goto out; | ||
1603 | |||
1604 | oldest_submit_jif = | ||
1605 | (req_write && req_read) | ||
1606 | ? ( time_before(req_write->pre_submit_jif, req_read->pre_submit_jif) | ||
1607 | ? req_write->pre_submit_jif : req_read->pre_submit_jif ) | ||
1608 | : req_write ? req_write->pre_submit_jif | ||
1609 | : req_read ? req_read->pre_submit_jif : now; | ||
1414 | 1610 | ||
1415 | /* The request is considered timed out, if | 1611 | /* The request is considered timed out, if |
1416 | * - we have some effective timeout from the configuration, | 1612 | * - we have some effective timeout from the configuration, |
@@ -1429,13 +1625,13 @@ void request_timer_fn(unsigned long data) | |||
1429 | * to expire twice (worst case) to become effective. Good enough. | 1625 | * to expire twice (worst case) to become effective. Good enough. |
1430 | */ | 1626 | */ |
1431 | if (ent && req_peer && | 1627 | if (ent && req_peer && |
1432 | time_after(now, req_peer->start_time + ent) && | 1628 | time_after(now, req_peer->pre_send_jif + ent) && |
1433 | !time_in_range(now, connection->last_reconnect_jif, connection->last_reconnect_jif + ent)) { | 1629 | !time_in_range(now, connection->last_reconnect_jif, connection->last_reconnect_jif + ent)) { |
1434 | drbd_warn(device, "Remote failed to finish a request within ko-count * timeout\n"); | 1630 | drbd_warn(device, "Remote failed to finish a request within ko-count * timeout\n"); |
1435 | _drbd_set_state(_NS(device, conn, C_TIMEOUT), CS_VERBOSE | CS_HARD, NULL); | 1631 | _drbd_set_state(_NS(device, conn, C_TIMEOUT), CS_VERBOSE | CS_HARD, NULL); |
1436 | } | 1632 | } |
1437 | if (dt && req_disk && | 1633 | if (dt && oldest_submit_jif != now && |
1438 | time_after(now, req_disk->start_time + dt) && | 1634 | time_after(now, oldest_submit_jif + dt) && |
1439 | !time_in_range(now, device->last_reattach_jif, device->last_reattach_jif + dt)) { | 1635 | !time_in_range(now, device->last_reattach_jif, device->last_reattach_jif + dt)) { |
1440 | drbd_warn(device, "Local backing device failed to meet the disk-timeout\n"); | 1636 | drbd_warn(device, "Local backing device failed to meet the disk-timeout\n"); |
1441 | __drbd_chk_io_error(device, DRBD_FORCE_DETACH); | 1637 | __drbd_chk_io_error(device, DRBD_FORCE_DETACH); |
@@ -1443,11 +1639,12 @@ void request_timer_fn(unsigned long data) | |||
1443 | 1639 | ||
1444 | /* Reschedule timer for the nearest not already expired timeout. | 1640 | /* Reschedule timer for the nearest not already expired timeout. |
1445 | * Fallback to now + min(effective network timeout, disk timeout). */ | 1641 | * Fallback to now + min(effective network timeout, disk timeout). */ |
1446 | ent = (ent && req_peer && time_before(now, req_peer->start_time + ent)) | 1642 | ent = (ent && req_peer && time_before(now, req_peer->pre_send_jif + ent)) |
1447 | ? req_peer->start_time + ent : now + et; | 1643 | ? req_peer->pre_send_jif + ent : now + et; |
1448 | dt = (dt && req_disk && time_before(now, req_disk->start_time + dt)) | 1644 | dt = (dt && oldest_submit_jif != now && time_before(now, oldest_submit_jif + dt)) |
1449 | ? req_disk->start_time + dt : now + et; | 1645 | ? oldest_submit_jif + dt : now + et; |
1450 | nt = time_before(ent, dt) ? ent : dt; | 1646 | nt = time_before(ent, dt) ? ent : dt; |
1647 | out: | ||
1451 | spin_unlock_irq(&connection->resource->req_lock); | 1648 | spin_unlock_irq(&connection->resource->req_lock); |
1452 | mod_timer(&device->request_timer, nt); | 1649 | mod_timer(&device->request_timer, nt); |
1453 | } | 1650 | } |
diff --git a/drivers/block/drbd/drbd_req.h b/drivers/block/drbd/drbd_req.h index 8566cd5866b4..9f6a04080e9f 100644 --- a/drivers/block/drbd/drbd_req.h +++ b/drivers/block/drbd/drbd_req.h | |||
@@ -288,6 +288,7 @@ extern void complete_master_bio(struct drbd_device *device, | |||
288 | extern void request_timer_fn(unsigned long data); | 288 | extern void request_timer_fn(unsigned long data); |
289 | extern void tl_restart(struct drbd_connection *connection, enum drbd_req_event what); | 289 | extern void tl_restart(struct drbd_connection *connection, enum drbd_req_event what); |
290 | extern void _tl_restart(struct drbd_connection *connection, enum drbd_req_event what); | 290 | extern void _tl_restart(struct drbd_connection *connection, enum drbd_req_event what); |
291 | extern void tl_abort_disk_io(struct drbd_device *device); | ||
291 | 292 | ||
292 | /* this is in drbd_main.c */ | 293 | /* this is in drbd_main.c */ |
293 | extern void drbd_restart_request(struct drbd_request *req); | 294 | extern void drbd_restart_request(struct drbd_request *req); |
diff --git a/drivers/block/drbd/drbd_state.c b/drivers/block/drbd/drbd_state.c index a5d8aae00e04..c35c0f001bb7 100644 --- a/drivers/block/drbd/drbd_state.c +++ b/drivers/block/drbd/drbd_state.c | |||
@@ -410,7 +410,7 @@ _drbd_request_state(struct drbd_device *device, union drbd_state mask, | |||
410 | return rv; | 410 | return rv; |
411 | } | 411 | } |
412 | 412 | ||
413 | static void print_st(struct drbd_device *device, char *name, union drbd_state ns) | 413 | static void print_st(struct drbd_device *device, const char *name, union drbd_state ns) |
414 | { | 414 | { |
415 | drbd_err(device, " %s = { cs:%s ro:%s/%s ds:%s/%s %c%c%c%c%c%c }\n", | 415 | drbd_err(device, " %s = { cs:%s ro:%s/%s ds:%s/%s %c%c%c%c%c%c }\n", |
416 | name, | 416 | name, |
@@ -952,11 +952,12 @@ enum drbd_state_rv | |||
952 | __drbd_set_state(struct drbd_device *device, union drbd_state ns, | 952 | __drbd_set_state(struct drbd_device *device, union drbd_state ns, |
953 | enum chg_state_flags flags, struct completion *done) | 953 | enum chg_state_flags flags, struct completion *done) |
954 | { | 954 | { |
955 | struct drbd_peer_device *peer_device = first_peer_device(device); | ||
956 | struct drbd_connection *connection = peer_device ? peer_device->connection : NULL; | ||
955 | union drbd_state os; | 957 | union drbd_state os; |
956 | enum drbd_state_rv rv = SS_SUCCESS; | 958 | enum drbd_state_rv rv = SS_SUCCESS; |
957 | enum sanitize_state_warnings ssw; | 959 | enum sanitize_state_warnings ssw; |
958 | struct after_state_chg_work *ascw; | 960 | struct after_state_chg_work *ascw; |
959 | bool did_remote, should_do_remote; | ||
960 | 961 | ||
961 | os = drbd_read_state(device); | 962 | os = drbd_read_state(device); |
962 | 963 | ||
@@ -978,9 +979,9 @@ __drbd_set_state(struct drbd_device *device, union drbd_state ns, | |||
978 | this happen...*/ | 979 | this happen...*/ |
979 | 980 | ||
980 | if (is_valid_state(device, os) == rv) | 981 | if (is_valid_state(device, os) == rv) |
981 | rv = is_valid_soft_transition(os, ns, first_peer_device(device)->connection); | 982 | rv = is_valid_soft_transition(os, ns, connection); |
982 | } else | 983 | } else |
983 | rv = is_valid_soft_transition(os, ns, first_peer_device(device)->connection); | 984 | rv = is_valid_soft_transition(os, ns, connection); |
984 | } | 985 | } |
985 | 986 | ||
986 | if (rv < SS_SUCCESS) { | 987 | if (rv < SS_SUCCESS) { |
@@ -997,7 +998,7 @@ __drbd_set_state(struct drbd_device *device, union drbd_state ns, | |||
997 | sanitize_state(). Only display it here if we where not called from | 998 | sanitize_state(). Only display it here if we where not called from |
998 | _conn_request_state() */ | 999 | _conn_request_state() */ |
999 | if (!(flags & CS_DC_SUSP)) | 1000 | if (!(flags & CS_DC_SUSP)) |
1000 | conn_pr_state_change(first_peer_device(device)->connection, os, ns, | 1001 | conn_pr_state_change(connection, os, ns, |
1001 | (flags & ~CS_DC_MASK) | CS_DC_SUSP); | 1002 | (flags & ~CS_DC_MASK) | CS_DC_SUSP); |
1002 | 1003 | ||
1003 | /* if we are going -> D_FAILED or D_DISKLESS, grab one extra reference | 1004 | /* if we are going -> D_FAILED or D_DISKLESS, grab one extra reference |
@@ -1008,28 +1009,35 @@ __drbd_set_state(struct drbd_device *device, union drbd_state ns, | |||
1008 | (os.disk != D_DISKLESS && ns.disk == D_DISKLESS)) | 1009 | (os.disk != D_DISKLESS && ns.disk == D_DISKLESS)) |
1009 | atomic_inc(&device->local_cnt); | 1010 | atomic_inc(&device->local_cnt); |
1010 | 1011 | ||
1011 | did_remote = drbd_should_do_remote(device->state); | 1012 | if (!is_sync_state(os.conn) && is_sync_state(ns.conn)) |
1013 | clear_bit(RS_DONE, &device->flags); | ||
1014 | |||
1015 | /* changes to local_cnt and device flags should be visible before | ||
1016 | * changes to state, which again should be visible before anything else | ||
1017 | * depending on that change happens. */ | ||
1018 | smp_wmb(); | ||
1012 | device->state.i = ns.i; | 1019 | device->state.i = ns.i; |
1013 | should_do_remote = drbd_should_do_remote(device->state); | ||
1014 | device->resource->susp = ns.susp; | 1020 | device->resource->susp = ns.susp; |
1015 | device->resource->susp_nod = ns.susp_nod; | 1021 | device->resource->susp_nod = ns.susp_nod; |
1016 | device->resource->susp_fen = ns.susp_fen; | 1022 | device->resource->susp_fen = ns.susp_fen; |
1023 | smp_wmb(); | ||
1017 | 1024 | ||
1018 | /* put replicated vs not-replicated requests in seperate epochs */ | 1025 | /* put replicated vs not-replicated requests in seperate epochs */ |
1019 | if (did_remote != should_do_remote) | 1026 | if (drbd_should_do_remote((union drbd_dev_state)os.i) != |
1020 | start_new_tl_epoch(first_peer_device(device)->connection); | 1027 | drbd_should_do_remote((union drbd_dev_state)ns.i)) |
1028 | start_new_tl_epoch(connection); | ||
1021 | 1029 | ||
1022 | if (os.disk == D_ATTACHING && ns.disk >= D_NEGOTIATING) | 1030 | if (os.disk == D_ATTACHING && ns.disk >= D_NEGOTIATING) |
1023 | drbd_print_uuids(device, "attached to UUIDs"); | 1031 | drbd_print_uuids(device, "attached to UUIDs"); |
1024 | 1032 | ||
1025 | /* Wake up role changes, that were delayed because of connection establishing */ | 1033 | /* Wake up role changes, that were delayed because of connection establishing */ |
1026 | if (os.conn == C_WF_REPORT_PARAMS && ns.conn != C_WF_REPORT_PARAMS && | 1034 | if (os.conn == C_WF_REPORT_PARAMS && ns.conn != C_WF_REPORT_PARAMS && |
1027 | no_peer_wf_report_params(first_peer_device(device)->connection)) | 1035 | no_peer_wf_report_params(connection)) |
1028 | clear_bit(STATE_SENT, &first_peer_device(device)->connection->flags); | 1036 | clear_bit(STATE_SENT, &connection->flags); |
1029 | 1037 | ||
1030 | wake_up(&device->misc_wait); | 1038 | wake_up(&device->misc_wait); |
1031 | wake_up(&device->state_wait); | 1039 | wake_up(&device->state_wait); |
1032 | wake_up(&first_peer_device(device)->connection->ping_wait); | 1040 | wake_up(&connection->ping_wait); |
1033 | 1041 | ||
1034 | /* Aborted verify run, or we reached the stop sector. | 1042 | /* Aborted verify run, or we reached the stop sector. |
1035 | * Log the last position, unless end-of-device. */ | 1043 | * Log the last position, unless end-of-device. */ |
@@ -1118,21 +1126,21 @@ __drbd_set_state(struct drbd_device *device, union drbd_state ns, | |||
1118 | 1126 | ||
1119 | /* Receiver should clean up itself */ | 1127 | /* Receiver should clean up itself */ |
1120 | if (os.conn != C_DISCONNECTING && ns.conn == C_DISCONNECTING) | 1128 | if (os.conn != C_DISCONNECTING && ns.conn == C_DISCONNECTING) |
1121 | drbd_thread_stop_nowait(&first_peer_device(device)->connection->receiver); | 1129 | drbd_thread_stop_nowait(&connection->receiver); |
1122 | 1130 | ||
1123 | /* Now the receiver finished cleaning up itself, it should die */ | 1131 | /* Now the receiver finished cleaning up itself, it should die */ |
1124 | if (os.conn != C_STANDALONE && ns.conn == C_STANDALONE) | 1132 | if (os.conn != C_STANDALONE && ns.conn == C_STANDALONE) |
1125 | drbd_thread_stop_nowait(&first_peer_device(device)->connection->receiver); | 1133 | drbd_thread_stop_nowait(&connection->receiver); |
1126 | 1134 | ||
1127 | /* Upon network failure, we need to restart the receiver. */ | 1135 | /* Upon network failure, we need to restart the receiver. */ |
1128 | if (os.conn > C_WF_CONNECTION && | 1136 | if (os.conn > C_WF_CONNECTION && |
1129 | ns.conn <= C_TEAR_DOWN && ns.conn >= C_TIMEOUT) | 1137 | ns.conn <= C_TEAR_DOWN && ns.conn >= C_TIMEOUT) |
1130 | drbd_thread_restart_nowait(&first_peer_device(device)->connection->receiver); | 1138 | drbd_thread_restart_nowait(&connection->receiver); |
1131 | 1139 | ||
1132 | /* Resume AL writing if we get a connection */ | 1140 | /* Resume AL writing if we get a connection */ |
1133 | if (os.conn < C_CONNECTED && ns.conn >= C_CONNECTED) { | 1141 | if (os.conn < C_CONNECTED && ns.conn >= C_CONNECTED) { |
1134 | drbd_resume_al(device); | 1142 | drbd_resume_al(device); |
1135 | first_peer_device(device)->connection->connect_cnt++; | 1143 | connection->connect_cnt++; |
1136 | } | 1144 | } |
1137 | 1145 | ||
1138 | /* remember last attach time so request_timer_fn() won't | 1146 | /* remember last attach time so request_timer_fn() won't |
@@ -1150,7 +1158,7 @@ __drbd_set_state(struct drbd_device *device, union drbd_state ns, | |||
1150 | ascw->w.cb = w_after_state_ch; | 1158 | ascw->w.cb = w_after_state_ch; |
1151 | ascw->device = device; | 1159 | ascw->device = device; |
1152 | ascw->done = done; | 1160 | ascw->done = done; |
1153 | drbd_queue_work(&first_peer_device(device)->connection->sender_work, | 1161 | drbd_queue_work(&connection->sender_work, |
1154 | &ascw->w); | 1162 | &ascw->w); |
1155 | } else { | 1163 | } else { |
1156 | drbd_err(device, "Could not kmalloc an ascw\n"); | 1164 | drbd_err(device, "Could not kmalloc an ascw\n"); |
@@ -1222,13 +1230,16 @@ static void after_state_ch(struct drbd_device *device, union drbd_state os, | |||
1222 | union drbd_state ns, enum chg_state_flags flags) | 1230 | union drbd_state ns, enum chg_state_flags flags) |
1223 | { | 1231 | { |
1224 | struct drbd_resource *resource = device->resource; | 1232 | struct drbd_resource *resource = device->resource; |
1233 | struct drbd_peer_device *peer_device = first_peer_device(device); | ||
1234 | struct drbd_connection *connection = peer_device ? peer_device->connection : NULL; | ||
1225 | struct sib_info sib; | 1235 | struct sib_info sib; |
1226 | 1236 | ||
1227 | sib.sib_reason = SIB_STATE_CHANGE; | 1237 | sib.sib_reason = SIB_STATE_CHANGE; |
1228 | sib.os = os; | 1238 | sib.os = os; |
1229 | sib.ns = ns; | 1239 | sib.ns = ns; |
1230 | 1240 | ||
1231 | if (os.conn != C_CONNECTED && ns.conn == C_CONNECTED) { | 1241 | if ((os.disk != D_UP_TO_DATE || os.pdsk != D_UP_TO_DATE) |
1242 | && (ns.disk == D_UP_TO_DATE && ns.pdsk == D_UP_TO_DATE)) { | ||
1232 | clear_bit(CRASHED_PRIMARY, &device->flags); | 1243 | clear_bit(CRASHED_PRIMARY, &device->flags); |
1233 | if (device->p_uuid) | 1244 | if (device->p_uuid) |
1234 | device->p_uuid[UI_FLAGS] &= ~((u64)2); | 1245 | device->p_uuid[UI_FLAGS] &= ~((u64)2); |
@@ -1245,7 +1256,6 @@ static void after_state_ch(struct drbd_device *device, union drbd_state os, | |||
1245 | state change. This function might sleep */ | 1256 | state change. This function might sleep */ |
1246 | 1257 | ||
1247 | if (ns.susp_nod) { | 1258 | if (ns.susp_nod) { |
1248 | struct drbd_connection *connection = first_peer_device(device)->connection; | ||
1249 | enum drbd_req_event what = NOTHING; | 1259 | enum drbd_req_event what = NOTHING; |
1250 | 1260 | ||
1251 | spin_lock_irq(&device->resource->req_lock); | 1261 | spin_lock_irq(&device->resource->req_lock); |
@@ -1267,8 +1277,6 @@ static void after_state_ch(struct drbd_device *device, union drbd_state os, | |||
1267 | } | 1277 | } |
1268 | 1278 | ||
1269 | if (ns.susp_fen) { | 1279 | if (ns.susp_fen) { |
1270 | struct drbd_connection *connection = first_peer_device(device)->connection; | ||
1271 | |||
1272 | spin_lock_irq(&device->resource->req_lock); | 1280 | spin_lock_irq(&device->resource->req_lock); |
1273 | if (resource->susp_fen && conn_lowest_conn(connection) >= C_CONNECTED) { | 1281 | if (resource->susp_fen && conn_lowest_conn(connection) >= C_CONNECTED) { |
1274 | /* case2: The connection was established again: */ | 1282 | /* case2: The connection was established again: */ |
@@ -1294,8 +1302,8 @@ static void after_state_ch(struct drbd_device *device, union drbd_state os, | |||
1294 | * which is unexpected. */ | 1302 | * which is unexpected. */ |
1295 | if ((os.conn != C_SYNC_SOURCE && os.conn != C_PAUSED_SYNC_S) && | 1303 | if ((os.conn != C_SYNC_SOURCE && os.conn != C_PAUSED_SYNC_S) && |
1296 | (ns.conn == C_SYNC_SOURCE || ns.conn == C_PAUSED_SYNC_S) && | 1304 | (ns.conn == C_SYNC_SOURCE || ns.conn == C_PAUSED_SYNC_S) && |
1297 | first_peer_device(device)->connection->agreed_pro_version >= 96 && get_ldev(device)) { | 1305 | connection->agreed_pro_version >= 96 && get_ldev(device)) { |
1298 | drbd_gen_and_send_sync_uuid(first_peer_device(device)); | 1306 | drbd_gen_and_send_sync_uuid(peer_device); |
1299 | put_ldev(device); | 1307 | put_ldev(device); |
1300 | } | 1308 | } |
1301 | 1309 | ||
@@ -1309,8 +1317,8 @@ static void after_state_ch(struct drbd_device *device, union drbd_state os, | |||
1309 | atomic_set(&device->rs_pending_cnt, 0); | 1317 | atomic_set(&device->rs_pending_cnt, 0); |
1310 | drbd_rs_cancel_all(device); | 1318 | drbd_rs_cancel_all(device); |
1311 | 1319 | ||
1312 | drbd_send_uuids(first_peer_device(device)); | 1320 | drbd_send_uuids(peer_device); |
1313 | drbd_send_state(first_peer_device(device), ns); | 1321 | drbd_send_state(peer_device, ns); |
1314 | } | 1322 | } |
1315 | /* No point in queuing send_bitmap if we don't have a connection | 1323 | /* No point in queuing send_bitmap if we don't have a connection |
1316 | * anymore, so check also the _current_ state, not only the new state | 1324 | * anymore, so check also the _current_ state, not only the new state |
@@ -1335,7 +1343,7 @@ static void after_state_ch(struct drbd_device *device, union drbd_state os, | |||
1335 | set_bit(NEW_CUR_UUID, &device->flags); | 1343 | set_bit(NEW_CUR_UUID, &device->flags); |
1336 | } else { | 1344 | } else { |
1337 | drbd_uuid_new_current(device); | 1345 | drbd_uuid_new_current(device); |
1338 | drbd_send_uuids(first_peer_device(device)); | 1346 | drbd_send_uuids(peer_device); |
1339 | } | 1347 | } |
1340 | } | 1348 | } |
1341 | put_ldev(device); | 1349 | put_ldev(device); |
@@ -1346,7 +1354,7 @@ static void after_state_ch(struct drbd_device *device, union drbd_state os, | |||
1346 | if (os.peer == R_SECONDARY && ns.peer == R_PRIMARY && | 1354 | if (os.peer == R_SECONDARY && ns.peer == R_PRIMARY && |
1347 | device->ldev->md.uuid[UI_BITMAP] == 0 && ns.disk >= D_UP_TO_DATE) { | 1355 | device->ldev->md.uuid[UI_BITMAP] == 0 && ns.disk >= D_UP_TO_DATE) { |
1348 | drbd_uuid_new_current(device); | 1356 | drbd_uuid_new_current(device); |
1349 | drbd_send_uuids(first_peer_device(device)); | 1357 | drbd_send_uuids(peer_device); |
1350 | } | 1358 | } |
1351 | /* D_DISKLESS Peer becomes secondary */ | 1359 | /* D_DISKLESS Peer becomes secondary */ |
1352 | if (os.peer == R_PRIMARY && ns.peer == R_SECONDARY) | 1360 | if (os.peer == R_PRIMARY && ns.peer == R_SECONDARY) |
@@ -1373,16 +1381,16 @@ static void after_state_ch(struct drbd_device *device, union drbd_state os, | |||
1373 | /* Last part of the attaching process ... */ | 1381 | /* Last part of the attaching process ... */ |
1374 | if (ns.conn >= C_CONNECTED && | 1382 | if (ns.conn >= C_CONNECTED && |
1375 | os.disk == D_ATTACHING && ns.disk == D_NEGOTIATING) { | 1383 | os.disk == D_ATTACHING && ns.disk == D_NEGOTIATING) { |
1376 | drbd_send_sizes(first_peer_device(device), 0, 0); /* to start sync... */ | 1384 | drbd_send_sizes(peer_device, 0, 0); /* to start sync... */ |
1377 | drbd_send_uuids(first_peer_device(device)); | 1385 | drbd_send_uuids(peer_device); |
1378 | drbd_send_state(first_peer_device(device), ns); | 1386 | drbd_send_state(peer_device, ns); |
1379 | } | 1387 | } |
1380 | 1388 | ||
1381 | /* We want to pause/continue resync, tell peer. */ | 1389 | /* We want to pause/continue resync, tell peer. */ |
1382 | if (ns.conn >= C_CONNECTED && | 1390 | if (ns.conn >= C_CONNECTED && |
1383 | ((os.aftr_isp != ns.aftr_isp) || | 1391 | ((os.aftr_isp != ns.aftr_isp) || |
1384 | (os.user_isp != ns.user_isp))) | 1392 | (os.user_isp != ns.user_isp))) |
1385 | drbd_send_state(first_peer_device(device), ns); | 1393 | drbd_send_state(peer_device, ns); |
1386 | 1394 | ||
1387 | /* In case one of the isp bits got set, suspend other devices. */ | 1395 | /* In case one of the isp bits got set, suspend other devices. */ |
1388 | if ((!os.aftr_isp && !os.peer_isp && !os.user_isp) && | 1396 | if ((!os.aftr_isp && !os.peer_isp && !os.user_isp) && |
@@ -1392,10 +1400,10 @@ static void after_state_ch(struct drbd_device *device, union drbd_state os, | |||
1392 | /* Make sure the peer gets informed about eventual state | 1400 | /* Make sure the peer gets informed about eventual state |
1393 | changes (ISP bits) while we were in WFReportParams. */ | 1401 | changes (ISP bits) while we were in WFReportParams. */ |
1394 | if (os.conn == C_WF_REPORT_PARAMS && ns.conn >= C_CONNECTED) | 1402 | if (os.conn == C_WF_REPORT_PARAMS && ns.conn >= C_CONNECTED) |
1395 | drbd_send_state(first_peer_device(device), ns); | 1403 | drbd_send_state(peer_device, ns); |
1396 | 1404 | ||
1397 | if (os.conn != C_AHEAD && ns.conn == C_AHEAD) | 1405 | if (os.conn != C_AHEAD && ns.conn == C_AHEAD) |
1398 | drbd_send_state(first_peer_device(device), ns); | 1406 | drbd_send_state(peer_device, ns); |
1399 | 1407 | ||
1400 | /* We are in the progress to start a full sync... */ | 1408 | /* We are in the progress to start a full sync... */ |
1401 | if ((os.conn != C_STARTING_SYNC_T && ns.conn == C_STARTING_SYNC_T) || | 1409 | if ((os.conn != C_STARTING_SYNC_T && ns.conn == C_STARTING_SYNC_T) || |
@@ -1449,7 +1457,7 @@ static void after_state_ch(struct drbd_device *device, union drbd_state os, | |||
1449 | drbd_disk_str(device->state.disk)); | 1457 | drbd_disk_str(device->state.disk)); |
1450 | 1458 | ||
1451 | if (ns.conn >= C_CONNECTED) | 1459 | if (ns.conn >= C_CONNECTED) |
1452 | drbd_send_state(first_peer_device(device), ns); | 1460 | drbd_send_state(peer_device, ns); |
1453 | 1461 | ||
1454 | drbd_rs_cancel_all(device); | 1462 | drbd_rs_cancel_all(device); |
1455 | 1463 | ||
@@ -1473,7 +1481,7 @@ static void after_state_ch(struct drbd_device *device, union drbd_state os, | |||
1473 | drbd_disk_str(device->state.disk)); | 1481 | drbd_disk_str(device->state.disk)); |
1474 | 1482 | ||
1475 | if (ns.conn >= C_CONNECTED) | 1483 | if (ns.conn >= C_CONNECTED) |
1476 | drbd_send_state(first_peer_device(device), ns); | 1484 | drbd_send_state(peer_device, ns); |
1477 | /* corresponding get_ldev in __drbd_set_state | 1485 | /* corresponding get_ldev in __drbd_set_state |
1478 | * this may finally trigger drbd_ldev_destroy. */ | 1486 | * this may finally trigger drbd_ldev_destroy. */ |
1479 | put_ldev(device); | 1487 | put_ldev(device); |
@@ -1481,7 +1489,7 @@ static void after_state_ch(struct drbd_device *device, union drbd_state os, | |||
1481 | 1489 | ||
1482 | /* Notify peer that I had a local IO error, and did not detached.. */ | 1490 | /* Notify peer that I had a local IO error, and did not detached.. */ |
1483 | if (os.disk == D_UP_TO_DATE && ns.disk == D_INCONSISTENT && ns.conn >= C_CONNECTED) | 1491 | if (os.disk == D_UP_TO_DATE && ns.disk == D_INCONSISTENT && ns.conn >= C_CONNECTED) |
1484 | drbd_send_state(first_peer_device(device), ns); | 1492 | drbd_send_state(peer_device, ns); |
1485 | 1493 | ||
1486 | /* Disks got bigger while they were detached */ | 1494 | /* Disks got bigger while they were detached */ |
1487 | if (ns.disk > D_NEGOTIATING && ns.pdsk > D_NEGOTIATING && | 1495 | if (ns.disk > D_NEGOTIATING && ns.pdsk > D_NEGOTIATING && |
@@ -1499,14 +1507,14 @@ static void after_state_ch(struct drbd_device *device, union drbd_state os, | |||
1499 | /* sync target done with resync. Explicitly notify peer, even though | 1507 | /* sync target done with resync. Explicitly notify peer, even though |
1500 | * it should (at least for non-empty resyncs) already know itself. */ | 1508 | * it should (at least for non-empty resyncs) already know itself. */ |
1501 | if (os.disk < D_UP_TO_DATE && os.conn >= C_SYNC_SOURCE && ns.conn == C_CONNECTED) | 1509 | if (os.disk < D_UP_TO_DATE && os.conn >= C_SYNC_SOURCE && ns.conn == C_CONNECTED) |
1502 | drbd_send_state(first_peer_device(device), ns); | 1510 | drbd_send_state(peer_device, ns); |
1503 | 1511 | ||
1504 | /* Verify finished, or reached stop sector. Peer did not know about | 1512 | /* Verify finished, or reached stop sector. Peer did not know about |
1505 | * the stop sector, and we may even have changed the stop sector during | 1513 | * the stop sector, and we may even have changed the stop sector during |
1506 | * verify to interrupt/stop early. Send the new state. */ | 1514 | * verify to interrupt/stop early. Send the new state. */ |
1507 | if (os.conn == C_VERIFY_S && ns.conn == C_CONNECTED | 1515 | if (os.conn == C_VERIFY_S && ns.conn == C_CONNECTED |
1508 | && verify_can_do_stop_sector(device)) | 1516 | && verify_can_do_stop_sector(device)) |
1509 | drbd_send_state(first_peer_device(device), ns); | 1517 | drbd_send_state(peer_device, ns); |
1510 | 1518 | ||
1511 | /* This triggers bitmap writeout of potentially still unwritten pages | 1519 | /* This triggers bitmap writeout of potentially still unwritten pages |
1512 | * if the resync finished cleanly, or aborted because of peer disk | 1520 | * if the resync finished cleanly, or aborted because of peer disk |
@@ -1563,7 +1571,7 @@ static int w_after_conn_state_ch(struct drbd_work *w, int unused) | |||
1563 | old_conf = connection->net_conf; | 1571 | old_conf = connection->net_conf; |
1564 | connection->my_addr_len = 0; | 1572 | connection->my_addr_len = 0; |
1565 | connection->peer_addr_len = 0; | 1573 | connection->peer_addr_len = 0; |
1566 | rcu_assign_pointer(connection->net_conf, NULL); | 1574 | RCU_INIT_POINTER(connection->net_conf, NULL); |
1567 | conn_free_crypto(connection); | 1575 | conn_free_crypto(connection); |
1568 | mutex_unlock(&connection->resource->conf_update); | 1576 | mutex_unlock(&connection->resource->conf_update); |
1569 | 1577 | ||
@@ -1599,7 +1607,7 @@ static int w_after_conn_state_ch(struct drbd_work *w, int unused) | |||
1599 | return 0; | 1607 | return 0; |
1600 | } | 1608 | } |
1601 | 1609 | ||
1602 | void conn_old_common_state(struct drbd_connection *connection, union drbd_state *pcs, enum chg_state_flags *pf) | 1610 | static void conn_old_common_state(struct drbd_connection *connection, union drbd_state *pcs, enum chg_state_flags *pf) |
1603 | { | 1611 | { |
1604 | enum chg_state_flags flags = ~0; | 1612 | enum chg_state_flags flags = ~0; |
1605 | struct drbd_peer_device *peer_device; | 1613 | struct drbd_peer_device *peer_device; |
@@ -1688,7 +1696,7 @@ conn_is_valid_transition(struct drbd_connection *connection, union drbd_state ma | |||
1688 | return rv; | 1696 | return rv; |
1689 | } | 1697 | } |
1690 | 1698 | ||
1691 | void | 1699 | static void |
1692 | conn_set_state(struct drbd_connection *connection, union drbd_state mask, union drbd_state val, | 1700 | conn_set_state(struct drbd_connection *connection, union drbd_state mask, union drbd_state val, |
1693 | union drbd_state *pns_min, union drbd_state *pns_max, enum chg_state_flags flags) | 1701 | union drbd_state *pns_min, union drbd_state *pns_max, enum chg_state_flags flags) |
1694 | { | 1702 | { |
diff --git a/drivers/block/drbd/drbd_worker.c b/drivers/block/drbd/drbd_worker.c index d8f57b6305cd..50776b362828 100644 --- a/drivers/block/drbd/drbd_worker.c +++ b/drivers/block/drbd/drbd_worker.c | |||
@@ -67,13 +67,10 @@ rwlock_t global_state_lock; | |||
67 | */ | 67 | */ |
68 | void drbd_md_io_complete(struct bio *bio, int error) | 68 | void drbd_md_io_complete(struct bio *bio, int error) |
69 | { | 69 | { |
70 | struct drbd_md_io *md_io; | ||
71 | struct drbd_device *device; | 70 | struct drbd_device *device; |
72 | 71 | ||
73 | md_io = (struct drbd_md_io *)bio->bi_private; | 72 | device = bio->bi_private; |
74 | device = container_of(md_io, struct drbd_device, md_io); | 73 | device->md_io.error = error; |
75 | |||
76 | md_io->error = error; | ||
77 | 74 | ||
78 | /* We grabbed an extra reference in _drbd_md_sync_page_io() to be able | 75 | /* We grabbed an extra reference in _drbd_md_sync_page_io() to be able |
79 | * to timeout on the lower level device, and eventually detach from it. | 76 | * to timeout on the lower level device, and eventually detach from it. |
@@ -87,7 +84,7 @@ void drbd_md_io_complete(struct bio *bio, int error) | |||
87 | * ASSERT(atomic_read(&device->md_io_in_use) == 1) there. | 84 | * ASSERT(atomic_read(&device->md_io_in_use) == 1) there. |
88 | */ | 85 | */ |
89 | drbd_md_put_buffer(device); | 86 | drbd_md_put_buffer(device); |
90 | md_io->done = 1; | 87 | device->md_io.done = 1; |
91 | wake_up(&device->misc_wait); | 88 | wake_up(&device->misc_wait); |
92 | bio_put(bio); | 89 | bio_put(bio); |
93 | if (device->ldev) /* special case: drbd_md_read() during drbd_adm_attach() */ | 90 | if (device->ldev) /* special case: drbd_md_read() during drbd_adm_attach() */ |
@@ -135,6 +132,7 @@ void drbd_endio_write_sec_final(struct drbd_peer_request *peer_req) __releases(l | |||
135 | i = peer_req->i; | 132 | i = peer_req->i; |
136 | do_al_complete_io = peer_req->flags & EE_CALL_AL_COMPLETE_IO; | 133 | do_al_complete_io = peer_req->flags & EE_CALL_AL_COMPLETE_IO; |
137 | block_id = peer_req->block_id; | 134 | block_id = peer_req->block_id; |
135 | peer_req->flags &= ~EE_CALL_AL_COMPLETE_IO; | ||
138 | 136 | ||
139 | spin_lock_irqsave(&device->resource->req_lock, flags); | 137 | spin_lock_irqsave(&device->resource->req_lock, flags); |
140 | device->writ_cnt += peer_req->i.size >> 9; | 138 | device->writ_cnt += peer_req->i.size >> 9; |
@@ -398,9 +396,6 @@ static int read_for_csum(struct drbd_peer_device *peer_device, sector_t sector, | |||
398 | if (!get_ldev(device)) | 396 | if (!get_ldev(device)) |
399 | return -EIO; | 397 | return -EIO; |
400 | 398 | ||
401 | if (drbd_rs_should_slow_down(device, sector)) | ||
402 | goto defer; | ||
403 | |||
404 | /* GFP_TRY, because if there is no memory available right now, this may | 399 | /* GFP_TRY, because if there is no memory available right now, this may |
405 | * be rescheduled for later. It is "only" background resync, after all. */ | 400 | * be rescheduled for later. It is "only" background resync, after all. */ |
406 | peer_req = drbd_alloc_peer_req(peer_device, ID_SYNCER /* unused */, sector, | 401 | peer_req = drbd_alloc_peer_req(peer_device, ID_SYNCER /* unused */, sector, |
@@ -410,7 +405,7 @@ static int read_for_csum(struct drbd_peer_device *peer_device, sector_t sector, | |||
410 | 405 | ||
411 | peer_req->w.cb = w_e_send_csum; | 406 | peer_req->w.cb = w_e_send_csum; |
412 | spin_lock_irq(&device->resource->req_lock); | 407 | spin_lock_irq(&device->resource->req_lock); |
413 | list_add(&peer_req->w.list, &device->read_ee); | 408 | list_add_tail(&peer_req->w.list, &device->read_ee); |
414 | spin_unlock_irq(&device->resource->req_lock); | 409 | spin_unlock_irq(&device->resource->req_lock); |
415 | 410 | ||
416 | atomic_add(size >> 9, &device->rs_sect_ev); | 411 | atomic_add(size >> 9, &device->rs_sect_ev); |
@@ -452,9 +447,9 @@ void resync_timer_fn(unsigned long data) | |||
452 | { | 447 | { |
453 | struct drbd_device *device = (struct drbd_device *) data; | 448 | struct drbd_device *device = (struct drbd_device *) data; |
454 | 449 | ||
455 | if (list_empty(&device->resync_work.list)) | 450 | drbd_queue_work_if_unqueued( |
456 | drbd_queue_work(&first_peer_device(device)->connection->sender_work, | 451 | &first_peer_device(device)->connection->sender_work, |
457 | &device->resync_work); | 452 | &device->resync_work); |
458 | } | 453 | } |
459 | 454 | ||
460 | static void fifo_set(struct fifo_buffer *fb, int value) | 455 | static void fifo_set(struct fifo_buffer *fb, int value) |
@@ -504,9 +499,9 @@ struct fifo_buffer *fifo_alloc(int fifo_size) | |||
504 | static int drbd_rs_controller(struct drbd_device *device, unsigned int sect_in) | 499 | static int drbd_rs_controller(struct drbd_device *device, unsigned int sect_in) |
505 | { | 500 | { |
506 | struct disk_conf *dc; | 501 | struct disk_conf *dc; |
507 | unsigned int want; /* The number of sectors we want in the proxy */ | 502 | unsigned int want; /* The number of sectors we want in-flight */ |
508 | int req_sect; /* Number of sectors to request in this turn */ | 503 | int req_sect; /* Number of sectors to request in this turn */ |
509 | int correction; /* Number of sectors more we need in the proxy*/ | 504 | int correction; /* Number of sectors more we need in-flight */ |
510 | int cps; /* correction per invocation of drbd_rs_controller() */ | 505 | int cps; /* correction per invocation of drbd_rs_controller() */ |
511 | int steps; /* Number of time steps to plan ahead */ | 506 | int steps; /* Number of time steps to plan ahead */ |
512 | int curr_corr; | 507 | int curr_corr; |
@@ -577,20 +572,27 @@ static int drbd_rs_number_requests(struct drbd_device *device) | |||
577 | * potentially causing a distributed deadlock on congestion during | 572 | * potentially causing a distributed deadlock on congestion during |
578 | * online-verify or (checksum-based) resync, if max-buffers, | 573 | * online-verify or (checksum-based) resync, if max-buffers, |
579 | * socket buffer sizes and resync rate settings are mis-configured. */ | 574 | * socket buffer sizes and resync rate settings are mis-configured. */ |
580 | if (mxb - device->rs_in_flight < number) | 575 | |
581 | number = mxb - device->rs_in_flight; | 576 | /* note that "number" is in units of "BM_BLOCK_SIZE" (which is 4k), |
577 | * mxb (as used here, and in drbd_alloc_pages on the peer) is | ||
578 | * "number of pages" (typically also 4k), | ||
579 | * but "rs_in_flight" is in "sectors" (512 Byte). */ | ||
580 | if (mxb - device->rs_in_flight/8 < number) | ||
581 | number = mxb - device->rs_in_flight/8; | ||
582 | 582 | ||
583 | return number; | 583 | return number; |
584 | } | 584 | } |
585 | 585 | ||
586 | static int make_resync_request(struct drbd_device *device, int cancel) | 586 | static int make_resync_request(struct drbd_device *const device, int cancel) |
587 | { | 587 | { |
588 | struct drbd_peer_device *const peer_device = first_peer_device(device); | ||
589 | struct drbd_connection *const connection = peer_device ? peer_device->connection : NULL; | ||
588 | unsigned long bit; | 590 | unsigned long bit; |
589 | sector_t sector; | 591 | sector_t sector; |
590 | const sector_t capacity = drbd_get_capacity(device->this_bdev); | 592 | const sector_t capacity = drbd_get_capacity(device->this_bdev); |
591 | int max_bio_size; | 593 | int max_bio_size; |
592 | int number, rollback_i, size; | 594 | int number, rollback_i, size; |
593 | int align, queued, sndbuf; | 595 | int align, requeue = 0; |
594 | int i = 0; | 596 | int i = 0; |
595 | 597 | ||
596 | if (unlikely(cancel)) | 598 | if (unlikely(cancel)) |
@@ -617,17 +619,22 @@ static int make_resync_request(struct drbd_device *device, int cancel) | |||
617 | goto requeue; | 619 | goto requeue; |
618 | 620 | ||
619 | for (i = 0; i < number; i++) { | 621 | for (i = 0; i < number; i++) { |
620 | /* Stop generating RS requests, when half of the send buffer is filled */ | 622 | /* Stop generating RS requests when half of the send buffer is filled, |
621 | mutex_lock(&first_peer_device(device)->connection->data.mutex); | 623 | * but notify TCP that we'd like to have more space. */ |
622 | if (first_peer_device(device)->connection->data.socket) { | 624 | mutex_lock(&connection->data.mutex); |
623 | queued = first_peer_device(device)->connection->data.socket->sk->sk_wmem_queued; | 625 | if (connection->data.socket) { |
624 | sndbuf = first_peer_device(device)->connection->data.socket->sk->sk_sndbuf; | 626 | struct sock *sk = connection->data.socket->sk; |
625 | } else { | 627 | int queued = sk->sk_wmem_queued; |
626 | queued = 1; | 628 | int sndbuf = sk->sk_sndbuf; |
627 | sndbuf = 0; | 629 | if (queued > sndbuf / 2) { |
628 | } | 630 | requeue = 1; |
629 | mutex_unlock(&first_peer_device(device)->connection->data.mutex); | 631 | if (sk->sk_socket) |
630 | if (queued > sndbuf / 2) | 632 | set_bit(SOCK_NOSPACE, &sk->sk_socket->flags); |
633 | } | ||
634 | } else | ||
635 | requeue = 1; | ||
636 | mutex_unlock(&connection->data.mutex); | ||
637 | if (requeue) | ||
631 | goto requeue; | 638 | goto requeue; |
632 | 639 | ||
633 | next_sector: | 640 | next_sector: |
@@ -642,8 +649,7 @@ next_sector: | |||
642 | 649 | ||
643 | sector = BM_BIT_TO_SECT(bit); | 650 | sector = BM_BIT_TO_SECT(bit); |
644 | 651 | ||
645 | if (drbd_rs_should_slow_down(device, sector) || | 652 | if (drbd_try_rs_begin_io(device, sector)) { |
646 | drbd_try_rs_begin_io(device, sector)) { | ||
647 | device->bm_resync_fo = bit; | 653 | device->bm_resync_fo = bit; |
648 | goto requeue; | 654 | goto requeue; |
649 | } | 655 | } |
@@ -696,9 +702,9 @@ next_sector: | |||
696 | /* adjust very last sectors, in case we are oddly sized */ | 702 | /* adjust very last sectors, in case we are oddly sized */ |
697 | if (sector + (size>>9) > capacity) | 703 | if (sector + (size>>9) > capacity) |
698 | size = (capacity-sector)<<9; | 704 | size = (capacity-sector)<<9; |
699 | if (first_peer_device(device)->connection->agreed_pro_version >= 89 && | 705 | |
700 | first_peer_device(device)->connection->csums_tfm) { | 706 | if (device->use_csums) { |
701 | switch (read_for_csum(first_peer_device(device), sector, size)) { | 707 | switch (read_for_csum(peer_device, sector, size)) { |
702 | case -EIO: /* Disk failure */ | 708 | case -EIO: /* Disk failure */ |
703 | put_ldev(device); | 709 | put_ldev(device); |
704 | return -EIO; | 710 | return -EIO; |
@@ -717,7 +723,7 @@ next_sector: | |||
717 | int err; | 723 | int err; |
718 | 724 | ||
719 | inc_rs_pending(device); | 725 | inc_rs_pending(device); |
720 | err = drbd_send_drequest(first_peer_device(device), P_RS_DATA_REQUEST, | 726 | err = drbd_send_drequest(peer_device, P_RS_DATA_REQUEST, |
721 | sector, size, ID_SYNCER); | 727 | sector, size, ID_SYNCER); |
722 | if (err) { | 728 | if (err) { |
723 | drbd_err(device, "drbd_send_drequest() failed, aborting...\n"); | 729 | drbd_err(device, "drbd_send_drequest() failed, aborting...\n"); |
@@ -774,8 +780,7 @@ static int make_ov_request(struct drbd_device *device, int cancel) | |||
774 | 780 | ||
775 | size = BM_BLOCK_SIZE; | 781 | size = BM_BLOCK_SIZE; |
776 | 782 | ||
777 | if (drbd_rs_should_slow_down(device, sector) || | 783 | if (drbd_try_rs_begin_io(device, sector)) { |
778 | drbd_try_rs_begin_io(device, sector)) { | ||
779 | device->ov_position = sector; | 784 | device->ov_position = sector; |
780 | goto requeue; | 785 | goto requeue; |
781 | } | 786 | } |
@@ -911,7 +916,7 @@ int drbd_resync_finished(struct drbd_device *device) | |||
911 | if (os.conn == C_SYNC_TARGET || os.conn == C_PAUSED_SYNC_T) | 916 | if (os.conn == C_SYNC_TARGET || os.conn == C_PAUSED_SYNC_T) |
912 | khelper_cmd = "after-resync-target"; | 917 | khelper_cmd = "after-resync-target"; |
913 | 918 | ||
914 | if (first_peer_device(device)->connection->csums_tfm && device->rs_total) { | 919 | if (device->use_csums && device->rs_total) { |
915 | const unsigned long s = device->rs_same_csum; | 920 | const unsigned long s = device->rs_same_csum; |
916 | const unsigned long t = device->rs_total; | 921 | const unsigned long t = device->rs_total; |
917 | const int ratio = | 922 | const int ratio = |
@@ -1351,13 +1356,15 @@ int w_send_out_of_sync(struct drbd_work *w, int cancel) | |||
1351 | { | 1356 | { |
1352 | struct drbd_request *req = container_of(w, struct drbd_request, w); | 1357 | struct drbd_request *req = container_of(w, struct drbd_request, w); |
1353 | struct drbd_device *device = req->device; | 1358 | struct drbd_device *device = req->device; |
1354 | struct drbd_connection *connection = first_peer_device(device)->connection; | 1359 | struct drbd_peer_device *const peer_device = first_peer_device(device); |
1360 | struct drbd_connection *const connection = peer_device->connection; | ||
1355 | int err; | 1361 | int err; |
1356 | 1362 | ||
1357 | if (unlikely(cancel)) { | 1363 | if (unlikely(cancel)) { |
1358 | req_mod(req, SEND_CANCELED); | 1364 | req_mod(req, SEND_CANCELED); |
1359 | return 0; | 1365 | return 0; |
1360 | } | 1366 | } |
1367 | req->pre_send_jif = jiffies; | ||
1361 | 1368 | ||
1362 | /* this time, no connection->send.current_epoch_writes++; | 1369 | /* this time, no connection->send.current_epoch_writes++; |
1363 | * If it was sent, it was the closing barrier for the last | 1370 | * If it was sent, it was the closing barrier for the last |
@@ -1365,7 +1372,7 @@ int w_send_out_of_sync(struct drbd_work *w, int cancel) | |||
1365 | * No more barriers will be sent, until we leave AHEAD mode again. */ | 1372 | * No more barriers will be sent, until we leave AHEAD mode again. */ |
1366 | maybe_send_barrier(connection, req->epoch); | 1373 | maybe_send_barrier(connection, req->epoch); |
1367 | 1374 | ||
1368 | err = drbd_send_out_of_sync(first_peer_device(device), req); | 1375 | err = drbd_send_out_of_sync(peer_device, req); |
1369 | req_mod(req, OOS_HANDED_TO_NETWORK); | 1376 | req_mod(req, OOS_HANDED_TO_NETWORK); |
1370 | 1377 | ||
1371 | return err; | 1378 | return err; |
@@ -1380,19 +1387,21 @@ int w_send_dblock(struct drbd_work *w, int cancel) | |||
1380 | { | 1387 | { |
1381 | struct drbd_request *req = container_of(w, struct drbd_request, w); | 1388 | struct drbd_request *req = container_of(w, struct drbd_request, w); |
1382 | struct drbd_device *device = req->device; | 1389 | struct drbd_device *device = req->device; |
1383 | struct drbd_connection *connection = first_peer_device(device)->connection; | 1390 | struct drbd_peer_device *const peer_device = first_peer_device(device); |
1391 | struct drbd_connection *connection = peer_device->connection; | ||
1384 | int err; | 1392 | int err; |
1385 | 1393 | ||
1386 | if (unlikely(cancel)) { | 1394 | if (unlikely(cancel)) { |
1387 | req_mod(req, SEND_CANCELED); | 1395 | req_mod(req, SEND_CANCELED); |
1388 | return 0; | 1396 | return 0; |
1389 | } | 1397 | } |
1398 | req->pre_send_jif = jiffies; | ||
1390 | 1399 | ||
1391 | re_init_if_first_write(connection, req->epoch); | 1400 | re_init_if_first_write(connection, req->epoch); |
1392 | maybe_send_barrier(connection, req->epoch); | 1401 | maybe_send_barrier(connection, req->epoch); |
1393 | connection->send.current_epoch_writes++; | 1402 | connection->send.current_epoch_writes++; |
1394 | 1403 | ||
1395 | err = drbd_send_dblock(first_peer_device(device), req); | 1404 | err = drbd_send_dblock(peer_device, req); |
1396 | req_mod(req, err ? SEND_FAILED : HANDED_OVER_TO_NETWORK); | 1405 | req_mod(req, err ? SEND_FAILED : HANDED_OVER_TO_NETWORK); |
1397 | 1406 | ||
1398 | return err; | 1407 | return err; |
@@ -1407,19 +1416,21 @@ int w_send_read_req(struct drbd_work *w, int cancel) | |||
1407 | { | 1416 | { |
1408 | struct drbd_request *req = container_of(w, struct drbd_request, w); | 1417 | struct drbd_request *req = container_of(w, struct drbd_request, w); |
1409 | struct drbd_device *device = req->device; | 1418 | struct drbd_device *device = req->device; |
1410 | struct drbd_connection *connection = first_peer_device(device)->connection; | 1419 | struct drbd_peer_device *const peer_device = first_peer_device(device); |
1420 | struct drbd_connection *connection = peer_device->connection; | ||
1411 | int err; | 1421 | int err; |
1412 | 1422 | ||
1413 | if (unlikely(cancel)) { | 1423 | if (unlikely(cancel)) { |
1414 | req_mod(req, SEND_CANCELED); | 1424 | req_mod(req, SEND_CANCELED); |
1415 | return 0; | 1425 | return 0; |
1416 | } | 1426 | } |
1427 | req->pre_send_jif = jiffies; | ||
1417 | 1428 | ||
1418 | /* Even read requests may close a write epoch, | 1429 | /* Even read requests may close a write epoch, |
1419 | * if there was any yet. */ | 1430 | * if there was any yet. */ |
1420 | maybe_send_barrier(connection, req->epoch); | 1431 | maybe_send_barrier(connection, req->epoch); |
1421 | 1432 | ||
1422 | err = drbd_send_drequest(first_peer_device(device), P_DATA_REQUEST, req->i.sector, req->i.size, | 1433 | err = drbd_send_drequest(peer_device, P_DATA_REQUEST, req->i.sector, req->i.size, |
1423 | (unsigned long)req); | 1434 | (unsigned long)req); |
1424 | 1435 | ||
1425 | req_mod(req, err ? SEND_FAILED : HANDED_OVER_TO_NETWORK); | 1436 | req_mod(req, err ? SEND_FAILED : HANDED_OVER_TO_NETWORK); |
@@ -1433,7 +1444,7 @@ int w_restart_disk_io(struct drbd_work *w, int cancel) | |||
1433 | struct drbd_device *device = req->device; | 1444 | struct drbd_device *device = req->device; |
1434 | 1445 | ||
1435 | if (bio_data_dir(req->master_bio) == WRITE && req->rq_state & RQ_IN_ACT_LOG) | 1446 | if (bio_data_dir(req->master_bio) == WRITE && req->rq_state & RQ_IN_ACT_LOG) |
1436 | drbd_al_begin_io(device, &req->i, false); | 1447 | drbd_al_begin_io(device, &req->i); |
1437 | 1448 | ||
1438 | drbd_req_make_private_bio(req, req->master_bio); | 1449 | drbd_req_make_private_bio(req, req->master_bio); |
1439 | req->private_bio->bi_bdev = device->ldev->backing_bdev; | 1450 | req->private_bio->bi_bdev = device->ldev->backing_bdev; |
@@ -1601,26 +1612,32 @@ void drbd_rs_controller_reset(struct drbd_device *device) | |||
1601 | void start_resync_timer_fn(unsigned long data) | 1612 | void start_resync_timer_fn(unsigned long data) |
1602 | { | 1613 | { |
1603 | struct drbd_device *device = (struct drbd_device *) data; | 1614 | struct drbd_device *device = (struct drbd_device *) data; |
1604 | 1615 | drbd_device_post_work(device, RS_START); | |
1605 | drbd_queue_work(&first_peer_device(device)->connection->sender_work, | ||
1606 | &device->start_resync_work); | ||
1607 | } | 1616 | } |
1608 | 1617 | ||
1609 | int w_start_resync(struct drbd_work *w, int cancel) | 1618 | static void do_start_resync(struct drbd_device *device) |
1610 | { | 1619 | { |
1611 | struct drbd_device *device = | ||
1612 | container_of(w, struct drbd_device, start_resync_work); | ||
1613 | |||
1614 | if (atomic_read(&device->unacked_cnt) || atomic_read(&device->rs_pending_cnt)) { | 1620 | if (atomic_read(&device->unacked_cnt) || atomic_read(&device->rs_pending_cnt)) { |
1615 | drbd_warn(device, "w_start_resync later...\n"); | 1621 | drbd_warn(device, "postponing start_resync ...\n"); |
1616 | device->start_resync_timer.expires = jiffies + HZ/10; | 1622 | device->start_resync_timer.expires = jiffies + HZ/10; |
1617 | add_timer(&device->start_resync_timer); | 1623 | add_timer(&device->start_resync_timer); |
1618 | return 0; | 1624 | return; |
1619 | } | 1625 | } |
1620 | 1626 | ||
1621 | drbd_start_resync(device, C_SYNC_SOURCE); | 1627 | drbd_start_resync(device, C_SYNC_SOURCE); |
1622 | clear_bit(AHEAD_TO_SYNC_SOURCE, &device->flags); | 1628 | clear_bit(AHEAD_TO_SYNC_SOURCE, &device->flags); |
1623 | return 0; | 1629 | } |
1630 | |||
1631 | static bool use_checksum_based_resync(struct drbd_connection *connection, struct drbd_device *device) | ||
1632 | { | ||
1633 | bool csums_after_crash_only; | ||
1634 | rcu_read_lock(); | ||
1635 | csums_after_crash_only = rcu_dereference(connection->net_conf)->csums_after_crash_only; | ||
1636 | rcu_read_unlock(); | ||
1637 | return connection->agreed_pro_version >= 89 && /* supported? */ | ||
1638 | connection->csums_tfm && /* configured? */ | ||
1639 | (csums_after_crash_only == 0 /* use for each resync? */ | ||
1640 | || test_bit(CRASHED_PRIMARY, &device->flags)); /* or only after Primary crash? */ | ||
1624 | } | 1641 | } |
1625 | 1642 | ||
1626 | /** | 1643 | /** |
@@ -1633,6 +1650,8 @@ int w_start_resync(struct drbd_work *w, int cancel) | |||
1633 | */ | 1650 | */ |
1634 | void drbd_start_resync(struct drbd_device *device, enum drbd_conns side) | 1651 | void drbd_start_resync(struct drbd_device *device, enum drbd_conns side) |
1635 | { | 1652 | { |
1653 | struct drbd_peer_device *peer_device = first_peer_device(device); | ||
1654 | struct drbd_connection *connection = peer_device ? peer_device->connection : NULL; | ||
1636 | union drbd_state ns; | 1655 | union drbd_state ns; |
1637 | int r; | 1656 | int r; |
1638 | 1657 | ||
@@ -1651,7 +1670,7 @@ void drbd_start_resync(struct drbd_device *device, enum drbd_conns side) | |||
1651 | if (r > 0) { | 1670 | if (r > 0) { |
1652 | drbd_info(device, "before-resync-target handler returned %d, " | 1671 | drbd_info(device, "before-resync-target handler returned %d, " |
1653 | "dropping connection.\n", r); | 1672 | "dropping connection.\n", r); |
1654 | conn_request_state(first_peer_device(device)->connection, NS(conn, C_DISCONNECTING), CS_HARD); | 1673 | conn_request_state(connection, NS(conn, C_DISCONNECTING), CS_HARD); |
1655 | return; | 1674 | return; |
1656 | } | 1675 | } |
1657 | } else /* C_SYNC_SOURCE */ { | 1676 | } else /* C_SYNC_SOURCE */ { |
@@ -1664,7 +1683,7 @@ void drbd_start_resync(struct drbd_device *device, enum drbd_conns side) | |||
1664 | } else { | 1683 | } else { |
1665 | drbd_info(device, "before-resync-source handler returned %d, " | 1684 | drbd_info(device, "before-resync-source handler returned %d, " |
1666 | "dropping connection.\n", r); | 1685 | "dropping connection.\n", r); |
1667 | conn_request_state(first_peer_device(device)->connection, | 1686 | conn_request_state(connection, |
1668 | NS(conn, C_DISCONNECTING), CS_HARD); | 1687 | NS(conn, C_DISCONNECTING), CS_HARD); |
1669 | return; | 1688 | return; |
1670 | } | 1689 | } |
@@ -1672,7 +1691,7 @@ void drbd_start_resync(struct drbd_device *device, enum drbd_conns side) | |||
1672 | } | 1691 | } |
1673 | } | 1692 | } |
1674 | 1693 | ||
1675 | if (current == first_peer_device(device)->connection->worker.task) { | 1694 | if (current == connection->worker.task) { |
1676 | /* The worker should not sleep waiting for state_mutex, | 1695 | /* The worker should not sleep waiting for state_mutex, |
1677 | that can take long */ | 1696 | that can take long */ |
1678 | if (!mutex_trylock(device->state_mutex)) { | 1697 | if (!mutex_trylock(device->state_mutex)) { |
@@ -1733,11 +1752,20 @@ void drbd_start_resync(struct drbd_device *device, enum drbd_conns side) | |||
1733 | device->rs_mark_time[i] = now; | 1752 | device->rs_mark_time[i] = now; |
1734 | } | 1753 | } |
1735 | _drbd_pause_after(device); | 1754 | _drbd_pause_after(device); |
1755 | /* Forget potentially stale cached per resync extent bit-counts. | ||
1756 | * Open coded drbd_rs_cancel_all(device), we already have IRQs | ||
1757 | * disabled, and know the disk state is ok. */ | ||
1758 | spin_lock(&device->al_lock); | ||
1759 | lc_reset(device->resync); | ||
1760 | device->resync_locked = 0; | ||
1761 | device->resync_wenr = LC_FREE; | ||
1762 | spin_unlock(&device->al_lock); | ||
1736 | } | 1763 | } |
1737 | write_unlock(&global_state_lock); | 1764 | write_unlock(&global_state_lock); |
1738 | spin_unlock_irq(&device->resource->req_lock); | 1765 | spin_unlock_irq(&device->resource->req_lock); |
1739 | 1766 | ||
1740 | if (r == SS_SUCCESS) { | 1767 | if (r == SS_SUCCESS) { |
1768 | wake_up(&device->al_wait); /* for lc_reset() above */ | ||
1741 | /* reset rs_last_bcast when a resync or verify is started, | 1769 | /* reset rs_last_bcast when a resync or verify is started, |
1742 | * to deal with potential jiffies wrap. */ | 1770 | * to deal with potential jiffies wrap. */ |
1743 | device->rs_last_bcast = jiffies - HZ; | 1771 | device->rs_last_bcast = jiffies - HZ; |
@@ -1746,8 +1774,12 @@ void drbd_start_resync(struct drbd_device *device, enum drbd_conns side) | |||
1746 | drbd_conn_str(ns.conn), | 1774 | drbd_conn_str(ns.conn), |
1747 | (unsigned long) device->rs_total << (BM_BLOCK_SHIFT-10), | 1775 | (unsigned long) device->rs_total << (BM_BLOCK_SHIFT-10), |
1748 | (unsigned long) device->rs_total); | 1776 | (unsigned long) device->rs_total); |
1749 | if (side == C_SYNC_TARGET) | 1777 | if (side == C_SYNC_TARGET) { |
1750 | device->bm_resync_fo = 0; | 1778 | device->bm_resync_fo = 0; |
1779 | device->use_csums = use_checksum_based_resync(connection, device); | ||
1780 | } else { | ||
1781 | device->use_csums = 0; | ||
1782 | } | ||
1751 | 1783 | ||
1752 | /* Since protocol 96, we must serialize drbd_gen_and_send_sync_uuid | 1784 | /* Since protocol 96, we must serialize drbd_gen_and_send_sync_uuid |
1753 | * with w_send_oos, or the sync target will get confused as to | 1785 | * with w_send_oos, or the sync target will get confused as to |
@@ -1756,12 +1788,10 @@ void drbd_start_resync(struct drbd_device *device, enum drbd_conns side) | |||
1756 | * drbd_resync_finished from here in that case. | 1788 | * drbd_resync_finished from here in that case. |
1757 | * We drbd_gen_and_send_sync_uuid here for protocol < 96, | 1789 | * We drbd_gen_and_send_sync_uuid here for protocol < 96, |
1758 | * and from after_state_ch otherwise. */ | 1790 | * and from after_state_ch otherwise. */ |
1759 | if (side == C_SYNC_SOURCE && | 1791 | if (side == C_SYNC_SOURCE && connection->agreed_pro_version < 96) |
1760 | first_peer_device(device)->connection->agreed_pro_version < 96) | 1792 | drbd_gen_and_send_sync_uuid(peer_device); |
1761 | drbd_gen_and_send_sync_uuid(first_peer_device(device)); | ||
1762 | 1793 | ||
1763 | if (first_peer_device(device)->connection->agreed_pro_version < 95 && | 1794 | if (connection->agreed_pro_version < 95 && device->rs_total == 0) { |
1764 | device->rs_total == 0) { | ||
1765 | /* This still has a race (about when exactly the peers | 1795 | /* This still has a race (about when exactly the peers |
1766 | * detect connection loss) that can lead to a full sync | 1796 | * detect connection loss) that can lead to a full sync |
1767 | * on next handshake. In 8.3.9 we fixed this with explicit | 1797 | * on next handshake. In 8.3.9 we fixed this with explicit |
@@ -1777,7 +1807,7 @@ void drbd_start_resync(struct drbd_device *device, enum drbd_conns side) | |||
1777 | int timeo; | 1807 | int timeo; |
1778 | 1808 | ||
1779 | rcu_read_lock(); | 1809 | rcu_read_lock(); |
1780 | nc = rcu_dereference(first_peer_device(device)->connection->net_conf); | 1810 | nc = rcu_dereference(connection->net_conf); |
1781 | timeo = nc->ping_int * HZ + nc->ping_timeo * HZ / 9; | 1811 | timeo = nc->ping_int * HZ + nc->ping_timeo * HZ / 9; |
1782 | rcu_read_unlock(); | 1812 | rcu_read_unlock(); |
1783 | schedule_timeout_interruptible(timeo); | 1813 | schedule_timeout_interruptible(timeo); |
@@ -1799,10 +1829,165 @@ void drbd_start_resync(struct drbd_device *device, enum drbd_conns side) | |||
1799 | mutex_unlock(device->state_mutex); | 1829 | mutex_unlock(device->state_mutex); |
1800 | } | 1830 | } |
1801 | 1831 | ||
1832 | static void update_on_disk_bitmap(struct drbd_device *device, bool resync_done) | ||
1833 | { | ||
1834 | struct sib_info sib = { .sib_reason = SIB_SYNC_PROGRESS, }; | ||
1835 | device->rs_last_bcast = jiffies; | ||
1836 | |||
1837 | if (!get_ldev(device)) | ||
1838 | return; | ||
1839 | |||
1840 | drbd_bm_write_lazy(device, 0); | ||
1841 | if (resync_done && is_sync_state(device->state.conn)) | ||
1842 | drbd_resync_finished(device); | ||
1843 | |||
1844 | drbd_bcast_event(device, &sib); | ||
1845 | /* update timestamp, in case it took a while to write out stuff */ | ||
1846 | device->rs_last_bcast = jiffies; | ||
1847 | put_ldev(device); | ||
1848 | } | ||
1849 | |||
1850 | static void drbd_ldev_destroy(struct drbd_device *device) | ||
1851 | { | ||
1852 | lc_destroy(device->resync); | ||
1853 | device->resync = NULL; | ||
1854 | lc_destroy(device->act_log); | ||
1855 | device->act_log = NULL; | ||
1856 | __no_warn(local, | ||
1857 | drbd_free_ldev(device->ldev); | ||
1858 | device->ldev = NULL;); | ||
1859 | clear_bit(GOING_DISKLESS, &device->flags); | ||
1860 | wake_up(&device->misc_wait); | ||
1861 | } | ||
1862 | |||
1863 | static void go_diskless(struct drbd_device *device) | ||
1864 | { | ||
1865 | D_ASSERT(device, device->state.disk == D_FAILED); | ||
1866 | /* we cannot assert local_cnt == 0 here, as get_ldev_if_state will | ||
1867 | * inc/dec it frequently. Once we are D_DISKLESS, no one will touch | ||
1868 | * the protected members anymore, though, so once put_ldev reaches zero | ||
1869 | * again, it will be safe to free them. */ | ||
1870 | |||
1871 | /* Try to write changed bitmap pages, read errors may have just | ||
1872 | * set some bits outside the area covered by the activity log. | ||
1873 | * | ||
1874 | * If we have an IO error during the bitmap writeout, | ||
1875 | * we will want a full sync next time, just in case. | ||
1876 | * (Do we want a specific meta data flag for this?) | ||
1877 | * | ||
1878 | * If that does not make it to stable storage either, | ||
1879 | * we cannot do anything about that anymore. | ||
1880 | * | ||
1881 | * We still need to check if both bitmap and ldev are present, we may | ||
1882 | * end up here after a failed attach, before ldev was even assigned. | ||
1883 | */ | ||
1884 | if (device->bitmap && device->ldev) { | ||
1885 | /* An interrupted resync or similar is allowed to recounts bits | ||
1886 | * while we detach. | ||
1887 | * Any modifications would not be expected anymore, though. | ||
1888 | */ | ||
1889 | if (drbd_bitmap_io_from_worker(device, drbd_bm_write, | ||
1890 | "detach", BM_LOCKED_TEST_ALLOWED)) { | ||
1891 | if (test_bit(WAS_READ_ERROR, &device->flags)) { | ||
1892 | drbd_md_set_flag(device, MDF_FULL_SYNC); | ||
1893 | drbd_md_sync(device); | ||
1894 | } | ||
1895 | } | ||
1896 | } | ||
1897 | |||
1898 | drbd_force_state(device, NS(disk, D_DISKLESS)); | ||
1899 | } | ||
1900 | |||
1901 | static int do_md_sync(struct drbd_device *device) | ||
1902 | { | ||
1903 | drbd_warn(device, "md_sync_timer expired! Worker calls drbd_md_sync().\n"); | ||
1904 | drbd_md_sync(device); | ||
1905 | return 0; | ||
1906 | } | ||
1907 | |||
1908 | /* only called from drbd_worker thread, no locking */ | ||
1909 | void __update_timing_details( | ||
1910 | struct drbd_thread_timing_details *tdp, | ||
1911 | unsigned int *cb_nr, | ||
1912 | void *cb, | ||
1913 | const char *fn, const unsigned int line) | ||
1914 | { | ||
1915 | unsigned int i = *cb_nr % DRBD_THREAD_DETAILS_HIST; | ||
1916 | struct drbd_thread_timing_details *td = tdp + i; | ||
1917 | |||
1918 | td->start_jif = jiffies; | ||
1919 | td->cb_addr = cb; | ||
1920 | td->caller_fn = fn; | ||
1921 | td->line = line; | ||
1922 | td->cb_nr = *cb_nr; | ||
1923 | |||
1924 | i = (i+1) % DRBD_THREAD_DETAILS_HIST; | ||
1925 | td = tdp + i; | ||
1926 | memset(td, 0, sizeof(*td)); | ||
1927 | |||
1928 | ++(*cb_nr); | ||
1929 | } | ||
1930 | |||
1931 | #define WORK_PENDING(work_bit, todo) (todo & (1UL << work_bit)) | ||
1932 | static void do_device_work(struct drbd_device *device, const unsigned long todo) | ||
1933 | { | ||
1934 | if (WORK_PENDING(MD_SYNC, todo)) | ||
1935 | do_md_sync(device); | ||
1936 | if (WORK_PENDING(RS_DONE, todo) || | ||
1937 | WORK_PENDING(RS_PROGRESS, todo)) | ||
1938 | update_on_disk_bitmap(device, WORK_PENDING(RS_DONE, todo)); | ||
1939 | if (WORK_PENDING(GO_DISKLESS, todo)) | ||
1940 | go_diskless(device); | ||
1941 | if (WORK_PENDING(DESTROY_DISK, todo)) | ||
1942 | drbd_ldev_destroy(device); | ||
1943 | if (WORK_PENDING(RS_START, todo)) | ||
1944 | do_start_resync(device); | ||
1945 | } | ||
1946 | |||
1947 | #define DRBD_DEVICE_WORK_MASK \ | ||
1948 | ((1UL << GO_DISKLESS) \ | ||
1949 | |(1UL << DESTROY_DISK) \ | ||
1950 | |(1UL << MD_SYNC) \ | ||
1951 | |(1UL << RS_START) \ | ||
1952 | |(1UL << RS_PROGRESS) \ | ||
1953 | |(1UL << RS_DONE) \ | ||
1954 | ) | ||
1955 | |||
1956 | static unsigned long get_work_bits(unsigned long *flags) | ||
1957 | { | ||
1958 | unsigned long old, new; | ||
1959 | do { | ||
1960 | old = *flags; | ||
1961 | new = old & ~DRBD_DEVICE_WORK_MASK; | ||
1962 | } while (cmpxchg(flags, old, new) != old); | ||
1963 | return old & DRBD_DEVICE_WORK_MASK; | ||
1964 | } | ||
1965 | |||
1966 | static void do_unqueued_work(struct drbd_connection *connection) | ||
1967 | { | ||
1968 | struct drbd_peer_device *peer_device; | ||
1969 | int vnr; | ||
1970 | |||
1971 | rcu_read_lock(); | ||
1972 | idr_for_each_entry(&connection->peer_devices, peer_device, vnr) { | ||
1973 | struct drbd_device *device = peer_device->device; | ||
1974 | unsigned long todo = get_work_bits(&device->flags); | ||
1975 | if (!todo) | ||
1976 | continue; | ||
1977 | |||
1978 | kref_get(&device->kref); | ||
1979 | rcu_read_unlock(); | ||
1980 | do_device_work(device, todo); | ||
1981 | kref_put(&device->kref, drbd_destroy_device); | ||
1982 | rcu_read_lock(); | ||
1983 | } | ||
1984 | rcu_read_unlock(); | ||
1985 | } | ||
1986 | |||
1802 | static bool dequeue_work_batch(struct drbd_work_queue *queue, struct list_head *work_list) | 1987 | static bool dequeue_work_batch(struct drbd_work_queue *queue, struct list_head *work_list) |
1803 | { | 1988 | { |
1804 | spin_lock_irq(&queue->q_lock); | 1989 | spin_lock_irq(&queue->q_lock); |
1805 | list_splice_init(&queue->q, work_list); | 1990 | list_splice_tail_init(&queue->q, work_list); |
1806 | spin_unlock_irq(&queue->q_lock); | 1991 | spin_unlock_irq(&queue->q_lock); |
1807 | return !list_empty(work_list); | 1992 | return !list_empty(work_list); |
1808 | } | 1993 | } |
@@ -1851,7 +2036,7 @@ static void wait_for_work(struct drbd_connection *connection, struct list_head * | |||
1851 | /* dequeue single item only, | 2036 | /* dequeue single item only, |
1852 | * we still use drbd_queue_work_front() in some places */ | 2037 | * we still use drbd_queue_work_front() in some places */ |
1853 | if (!list_empty(&connection->sender_work.q)) | 2038 | if (!list_empty(&connection->sender_work.q)) |
1854 | list_move(connection->sender_work.q.next, work_list); | 2039 | list_splice_tail_init(&connection->sender_work.q, work_list); |
1855 | spin_unlock(&connection->sender_work.q_lock); /* FIXME get rid of this one? */ | 2040 | spin_unlock(&connection->sender_work.q_lock); /* FIXME get rid of this one? */ |
1856 | if (!list_empty(work_list) || signal_pending(current)) { | 2041 | if (!list_empty(work_list) || signal_pending(current)) { |
1857 | spin_unlock_irq(&connection->resource->req_lock); | 2042 | spin_unlock_irq(&connection->resource->req_lock); |
@@ -1873,6 +2058,14 @@ static void wait_for_work(struct drbd_connection *connection, struct list_head * | |||
1873 | if (send_barrier) | 2058 | if (send_barrier) |
1874 | maybe_send_barrier(connection, | 2059 | maybe_send_barrier(connection, |
1875 | connection->send.current_epoch_nr + 1); | 2060 | connection->send.current_epoch_nr + 1); |
2061 | |||
2062 | if (test_bit(DEVICE_WORK_PENDING, &connection->flags)) | ||
2063 | break; | ||
2064 | |||
2065 | /* drbd_send() may have called flush_signals() */ | ||
2066 | if (get_t_state(&connection->worker) != RUNNING) | ||
2067 | break; | ||
2068 | |||
1876 | schedule(); | 2069 | schedule(); |
1877 | /* may be woken up for other things but new work, too, | 2070 | /* may be woken up for other things but new work, too, |
1878 | * e.g. if the current epoch got closed. | 2071 | * e.g. if the current epoch got closed. |
@@ -1906,10 +2099,15 @@ int drbd_worker(struct drbd_thread *thi) | |||
1906 | while (get_t_state(thi) == RUNNING) { | 2099 | while (get_t_state(thi) == RUNNING) { |
1907 | drbd_thread_current_set_cpu(thi); | 2100 | drbd_thread_current_set_cpu(thi); |
1908 | 2101 | ||
1909 | /* as long as we use drbd_queue_work_front(), | 2102 | if (list_empty(&work_list)) { |
1910 | * we may only dequeue single work items here, not batches. */ | 2103 | update_worker_timing_details(connection, wait_for_work); |
1911 | if (list_empty(&work_list)) | ||
1912 | wait_for_work(connection, &work_list); | 2104 | wait_for_work(connection, &work_list); |
2105 | } | ||
2106 | |||
2107 | if (test_and_clear_bit(DEVICE_WORK_PENDING, &connection->flags)) { | ||
2108 | update_worker_timing_details(connection, do_unqueued_work); | ||
2109 | do_unqueued_work(connection); | ||
2110 | } | ||
1913 | 2111 | ||
1914 | if (signal_pending(current)) { | 2112 | if (signal_pending(current)) { |
1915 | flush_signals(current); | 2113 | flush_signals(current); |
@@ -1926,6 +2124,7 @@ int drbd_worker(struct drbd_thread *thi) | |||
1926 | while (!list_empty(&work_list)) { | 2124 | while (!list_empty(&work_list)) { |
1927 | w = list_first_entry(&work_list, struct drbd_work, list); | 2125 | w = list_first_entry(&work_list, struct drbd_work, list); |
1928 | list_del_init(&w->list); | 2126 | list_del_init(&w->list); |
2127 | update_worker_timing_details(connection, w->cb); | ||
1929 | if (w->cb(w, connection->cstate < C_WF_REPORT_PARAMS) == 0) | 2128 | if (w->cb(w, connection->cstate < C_WF_REPORT_PARAMS) == 0) |
1930 | continue; | 2129 | continue; |
1931 | if (connection->cstate >= C_WF_REPORT_PARAMS) | 2130 | if (connection->cstate >= C_WF_REPORT_PARAMS) |
@@ -1934,13 +2133,18 @@ int drbd_worker(struct drbd_thread *thi) | |||
1934 | } | 2133 | } |
1935 | 2134 | ||
1936 | do { | 2135 | do { |
2136 | if (test_and_clear_bit(DEVICE_WORK_PENDING, &connection->flags)) { | ||
2137 | update_worker_timing_details(connection, do_unqueued_work); | ||
2138 | do_unqueued_work(connection); | ||
2139 | } | ||
1937 | while (!list_empty(&work_list)) { | 2140 | while (!list_empty(&work_list)) { |
1938 | w = list_first_entry(&work_list, struct drbd_work, list); | 2141 | w = list_first_entry(&work_list, struct drbd_work, list); |
1939 | list_del_init(&w->list); | 2142 | list_del_init(&w->list); |
2143 | update_worker_timing_details(connection, w->cb); | ||
1940 | w->cb(w, 1); | 2144 | w->cb(w, 1); |
1941 | } | 2145 | } |
1942 | dequeue_work_batch(&connection->sender_work, &work_list); | 2146 | dequeue_work_batch(&connection->sender_work, &work_list); |
1943 | } while (!list_empty(&work_list)); | 2147 | } while (!list_empty(&work_list) || test_bit(DEVICE_WORK_PENDING, &connection->flags)); |
1944 | 2148 | ||
1945 | rcu_read_lock(); | 2149 | rcu_read_lock(); |
1946 | idr_for_each_entry(&connection->peer_devices, peer_device, vnr) { | 2150 | idr_for_each_entry(&connection->peer_devices, peer_device, vnr) { |
diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c index f63d358f3d93..0a581400de0f 100644 --- a/drivers/block/virtio_blk.c +++ b/drivers/block/virtio_blk.c | |||
@@ -15,17 +15,22 @@ | |||
15 | #include <linux/numa.h> | 15 | #include <linux/numa.h> |
16 | 16 | ||
17 | #define PART_BITS 4 | 17 | #define PART_BITS 4 |
18 | #define VQ_NAME_LEN 16 | ||
18 | 19 | ||
19 | static int major; | 20 | static int major; |
20 | static DEFINE_IDA(vd_index_ida); | 21 | static DEFINE_IDA(vd_index_ida); |
21 | 22 | ||
22 | static struct workqueue_struct *virtblk_wq; | 23 | static struct workqueue_struct *virtblk_wq; |
23 | 24 | ||
25 | struct virtio_blk_vq { | ||
26 | struct virtqueue *vq; | ||
27 | spinlock_t lock; | ||
28 | char name[VQ_NAME_LEN]; | ||
29 | } ____cacheline_aligned_in_smp; | ||
30 | |||
24 | struct virtio_blk | 31 | struct virtio_blk |
25 | { | 32 | { |
26 | struct virtio_device *vdev; | 33 | struct virtio_device *vdev; |
27 | struct virtqueue *vq; | ||
28 | spinlock_t vq_lock; | ||
29 | 34 | ||
30 | /* The disk structure for the kernel. */ | 35 | /* The disk structure for the kernel. */ |
31 | struct gendisk *disk; | 36 | struct gendisk *disk; |
@@ -47,6 +52,10 @@ struct virtio_blk | |||
47 | 52 | ||
48 | /* Ida index - used to track minor number allocations. */ | 53 | /* Ida index - used to track minor number allocations. */ |
49 | int index; | 54 | int index; |
55 | |||
56 | /* num of vqs */ | ||
57 | int num_vqs; | ||
58 | struct virtio_blk_vq *vqs; | ||
50 | }; | 59 | }; |
51 | 60 | ||
52 | struct virtblk_req | 61 | struct virtblk_req |
@@ -133,14 +142,15 @@ static void virtblk_done(struct virtqueue *vq) | |||
133 | { | 142 | { |
134 | struct virtio_blk *vblk = vq->vdev->priv; | 143 | struct virtio_blk *vblk = vq->vdev->priv; |
135 | bool req_done = false; | 144 | bool req_done = false; |
145 | int qid = vq->index; | ||
136 | struct virtblk_req *vbr; | 146 | struct virtblk_req *vbr; |
137 | unsigned long flags; | 147 | unsigned long flags; |
138 | unsigned int len; | 148 | unsigned int len; |
139 | 149 | ||
140 | spin_lock_irqsave(&vblk->vq_lock, flags); | 150 | spin_lock_irqsave(&vblk->vqs[qid].lock, flags); |
141 | do { | 151 | do { |
142 | virtqueue_disable_cb(vq); | 152 | virtqueue_disable_cb(vq); |
143 | while ((vbr = virtqueue_get_buf(vblk->vq, &len)) != NULL) { | 153 | while ((vbr = virtqueue_get_buf(vblk->vqs[qid].vq, &len)) != NULL) { |
144 | blk_mq_complete_request(vbr->req); | 154 | blk_mq_complete_request(vbr->req); |
145 | req_done = true; | 155 | req_done = true; |
146 | } | 156 | } |
@@ -151,7 +161,7 @@ static void virtblk_done(struct virtqueue *vq) | |||
151 | /* In case queue is stopped waiting for more buffers. */ | 161 | /* In case queue is stopped waiting for more buffers. */ |
152 | if (req_done) | 162 | if (req_done) |
153 | blk_mq_start_stopped_hw_queues(vblk->disk->queue, true); | 163 | blk_mq_start_stopped_hw_queues(vblk->disk->queue, true); |
154 | spin_unlock_irqrestore(&vblk->vq_lock, flags); | 164 | spin_unlock_irqrestore(&vblk->vqs[qid].lock, flags); |
155 | } | 165 | } |
156 | 166 | ||
157 | static int virtio_queue_rq(struct blk_mq_hw_ctx *hctx, struct request *req) | 167 | static int virtio_queue_rq(struct blk_mq_hw_ctx *hctx, struct request *req) |
@@ -160,6 +170,7 @@ static int virtio_queue_rq(struct blk_mq_hw_ctx *hctx, struct request *req) | |||
160 | struct virtblk_req *vbr = blk_mq_rq_to_pdu(req); | 170 | struct virtblk_req *vbr = blk_mq_rq_to_pdu(req); |
161 | unsigned long flags; | 171 | unsigned long flags; |
162 | unsigned int num; | 172 | unsigned int num; |
173 | int qid = hctx->queue_num; | ||
163 | const bool last = (req->cmd_flags & REQ_END) != 0; | 174 | const bool last = (req->cmd_flags & REQ_END) != 0; |
164 | int err; | 175 | int err; |
165 | bool notify = false; | 176 | bool notify = false; |
@@ -202,12 +213,12 @@ static int virtio_queue_rq(struct blk_mq_hw_ctx *hctx, struct request *req) | |||
202 | vbr->out_hdr.type |= VIRTIO_BLK_T_IN; | 213 | vbr->out_hdr.type |= VIRTIO_BLK_T_IN; |
203 | } | 214 | } |
204 | 215 | ||
205 | spin_lock_irqsave(&vblk->vq_lock, flags); | 216 | spin_lock_irqsave(&vblk->vqs[qid].lock, flags); |
206 | err = __virtblk_add_req(vblk->vq, vbr, vbr->sg, num); | 217 | err = __virtblk_add_req(vblk->vqs[qid].vq, vbr, vbr->sg, num); |
207 | if (err) { | 218 | if (err) { |
208 | virtqueue_kick(vblk->vq); | 219 | virtqueue_kick(vblk->vqs[qid].vq); |
209 | blk_mq_stop_hw_queue(hctx); | 220 | blk_mq_stop_hw_queue(hctx); |
210 | spin_unlock_irqrestore(&vblk->vq_lock, flags); | 221 | spin_unlock_irqrestore(&vblk->vqs[qid].lock, flags); |
211 | /* Out of mem doesn't actually happen, since we fall back | 222 | /* Out of mem doesn't actually happen, since we fall back |
212 | * to direct descriptors */ | 223 | * to direct descriptors */ |
213 | if (err == -ENOMEM || err == -ENOSPC) | 224 | if (err == -ENOMEM || err == -ENOSPC) |
@@ -215,12 +226,12 @@ static int virtio_queue_rq(struct blk_mq_hw_ctx *hctx, struct request *req) | |||
215 | return BLK_MQ_RQ_QUEUE_ERROR; | 226 | return BLK_MQ_RQ_QUEUE_ERROR; |
216 | } | 227 | } |
217 | 228 | ||
218 | if (last && virtqueue_kick_prepare(vblk->vq)) | 229 | if (last && virtqueue_kick_prepare(vblk->vqs[qid].vq)) |
219 | notify = true; | 230 | notify = true; |
220 | spin_unlock_irqrestore(&vblk->vq_lock, flags); | 231 | spin_unlock_irqrestore(&vblk->vqs[qid].lock, flags); |
221 | 232 | ||
222 | if (notify) | 233 | if (notify) |
223 | virtqueue_notify(vblk->vq); | 234 | virtqueue_notify(vblk->vqs[qid].vq); |
224 | return BLK_MQ_RQ_QUEUE_OK; | 235 | return BLK_MQ_RQ_QUEUE_OK; |
225 | } | 236 | } |
226 | 237 | ||
@@ -377,12 +388,64 @@ static void virtblk_config_changed(struct virtio_device *vdev) | |||
377 | static int init_vq(struct virtio_blk *vblk) | 388 | static int init_vq(struct virtio_blk *vblk) |
378 | { | 389 | { |
379 | int err = 0; | 390 | int err = 0; |
391 | int i; | ||
392 | vq_callback_t **callbacks; | ||
393 | const char **names; | ||
394 | struct virtqueue **vqs; | ||
395 | unsigned short num_vqs; | ||
396 | struct virtio_device *vdev = vblk->vdev; | ||
397 | |||
398 | err = virtio_cread_feature(vdev, VIRTIO_BLK_F_MQ, | ||
399 | struct virtio_blk_config, num_queues, | ||
400 | &num_vqs); | ||
401 | if (err) | ||
402 | num_vqs = 1; | ||
403 | |||
404 | vblk->vqs = kmalloc(sizeof(*vblk->vqs) * num_vqs, GFP_KERNEL); | ||
405 | if (!vblk->vqs) { | ||
406 | err = -ENOMEM; | ||
407 | goto out; | ||
408 | } | ||
409 | |||
410 | names = kmalloc(sizeof(*names) * num_vqs, GFP_KERNEL); | ||
411 | if (!names) | ||
412 | goto err_names; | ||
413 | |||
414 | callbacks = kmalloc(sizeof(*callbacks) * num_vqs, GFP_KERNEL); | ||
415 | if (!callbacks) | ||
416 | goto err_callbacks; | ||
417 | |||
418 | vqs = kmalloc(sizeof(*vqs) * num_vqs, GFP_KERNEL); | ||
419 | if (!vqs) | ||
420 | goto err_vqs; | ||
380 | 421 | ||
381 | /* We expect one virtqueue, for output. */ | 422 | for (i = 0; i < num_vqs; i++) { |
382 | vblk->vq = virtio_find_single_vq(vblk->vdev, virtblk_done, "requests"); | 423 | callbacks[i] = virtblk_done; |
383 | if (IS_ERR(vblk->vq)) | 424 | snprintf(vblk->vqs[i].name, VQ_NAME_LEN, "req.%d", i); |
384 | err = PTR_ERR(vblk->vq); | 425 | names[i] = vblk->vqs[i].name; |
426 | } | ||
427 | |||
428 | /* Discover virtqueues and write information to configuration. */ | ||
429 | err = vdev->config->find_vqs(vdev, num_vqs, vqs, callbacks, names); | ||
430 | if (err) | ||
431 | goto err_find_vqs; | ||
385 | 432 | ||
433 | for (i = 0; i < num_vqs; i++) { | ||
434 | spin_lock_init(&vblk->vqs[i].lock); | ||
435 | vblk->vqs[i].vq = vqs[i]; | ||
436 | } | ||
437 | vblk->num_vqs = num_vqs; | ||
438 | |||
439 | err_find_vqs: | ||
440 | kfree(vqs); | ||
441 | err_vqs: | ||
442 | kfree(callbacks); | ||
443 | err_callbacks: | ||
444 | kfree(names); | ||
445 | err_names: | ||
446 | if (err) | ||
447 | kfree(vblk->vqs); | ||
448 | out: | ||
386 | return err; | 449 | return err; |
387 | } | 450 | } |
388 | 451 | ||
@@ -551,7 +614,6 @@ static int virtblk_probe(struct virtio_device *vdev) | |||
551 | err = init_vq(vblk); | 614 | err = init_vq(vblk); |
552 | if (err) | 615 | if (err) |
553 | goto out_free_vblk; | 616 | goto out_free_vblk; |
554 | spin_lock_init(&vblk->vq_lock); | ||
555 | 617 | ||
556 | /* FIXME: How many partitions? How long is a piece of string? */ | 618 | /* FIXME: How many partitions? How long is a piece of string? */ |
557 | vblk->disk = alloc_disk(1 << PART_BITS); | 619 | vblk->disk = alloc_disk(1 << PART_BITS); |
@@ -562,7 +624,7 @@ static int virtblk_probe(struct virtio_device *vdev) | |||
562 | 624 | ||
563 | /* Default queue sizing is to fill the ring. */ | 625 | /* Default queue sizing is to fill the ring. */ |
564 | if (!virtblk_queue_depth) { | 626 | if (!virtblk_queue_depth) { |
565 | virtblk_queue_depth = vblk->vq->num_free; | 627 | virtblk_queue_depth = vblk->vqs[0].vq->num_free; |
566 | /* ... but without indirect descs, we use 2 descs per req */ | 628 | /* ... but without indirect descs, we use 2 descs per req */ |
567 | if (!virtio_has_feature(vdev, VIRTIO_RING_F_INDIRECT_DESC)) | 629 | if (!virtio_has_feature(vdev, VIRTIO_RING_F_INDIRECT_DESC)) |
568 | virtblk_queue_depth /= 2; | 630 | virtblk_queue_depth /= 2; |
@@ -570,7 +632,6 @@ static int virtblk_probe(struct virtio_device *vdev) | |||
570 | 632 | ||
571 | memset(&vblk->tag_set, 0, sizeof(vblk->tag_set)); | 633 | memset(&vblk->tag_set, 0, sizeof(vblk->tag_set)); |
572 | vblk->tag_set.ops = &virtio_mq_ops; | 634 | vblk->tag_set.ops = &virtio_mq_ops; |
573 | vblk->tag_set.nr_hw_queues = 1; | ||
574 | vblk->tag_set.queue_depth = virtblk_queue_depth; | 635 | vblk->tag_set.queue_depth = virtblk_queue_depth; |
575 | vblk->tag_set.numa_node = NUMA_NO_NODE; | 636 | vblk->tag_set.numa_node = NUMA_NO_NODE; |
576 | vblk->tag_set.flags = BLK_MQ_F_SHOULD_MERGE; | 637 | vblk->tag_set.flags = BLK_MQ_F_SHOULD_MERGE; |
@@ -578,6 +639,7 @@ static int virtblk_probe(struct virtio_device *vdev) | |||
578 | sizeof(struct virtblk_req) + | 639 | sizeof(struct virtblk_req) + |
579 | sizeof(struct scatterlist) * sg_elems; | 640 | sizeof(struct scatterlist) * sg_elems; |
580 | vblk->tag_set.driver_data = vblk; | 641 | vblk->tag_set.driver_data = vblk; |
642 | vblk->tag_set.nr_hw_queues = vblk->num_vqs; | ||
581 | 643 | ||
582 | err = blk_mq_alloc_tag_set(&vblk->tag_set); | 644 | err = blk_mq_alloc_tag_set(&vblk->tag_set); |
583 | if (err) | 645 | if (err) |
@@ -727,6 +789,7 @@ static void virtblk_remove(struct virtio_device *vdev) | |||
727 | refc = atomic_read(&disk_to_dev(vblk->disk)->kobj.kref.refcount); | 789 | refc = atomic_read(&disk_to_dev(vblk->disk)->kobj.kref.refcount); |
728 | put_disk(vblk->disk); | 790 | put_disk(vblk->disk); |
729 | vdev->config->del_vqs(vdev); | 791 | vdev->config->del_vqs(vdev); |
792 | kfree(vblk->vqs); | ||
730 | kfree(vblk); | 793 | kfree(vblk); |
731 | 794 | ||
732 | /* Only free device id if we don't have any users */ | 795 | /* Only free device id if we don't have any users */ |
@@ -777,7 +840,8 @@ static const struct virtio_device_id id_table[] = { | |||
777 | static unsigned int features[] = { | 840 | static unsigned int features[] = { |
778 | VIRTIO_BLK_F_SEG_MAX, VIRTIO_BLK_F_SIZE_MAX, VIRTIO_BLK_F_GEOMETRY, | 841 | VIRTIO_BLK_F_SEG_MAX, VIRTIO_BLK_F_SIZE_MAX, VIRTIO_BLK_F_GEOMETRY, |
779 | VIRTIO_BLK_F_RO, VIRTIO_BLK_F_BLK_SIZE, VIRTIO_BLK_F_SCSI, | 842 | VIRTIO_BLK_F_RO, VIRTIO_BLK_F_BLK_SIZE, VIRTIO_BLK_F_SCSI, |
780 | VIRTIO_BLK_F_WCE, VIRTIO_BLK_F_TOPOLOGY, VIRTIO_BLK_F_CONFIG_WCE | 843 | VIRTIO_BLK_F_WCE, VIRTIO_BLK_F_TOPOLOGY, VIRTIO_BLK_F_CONFIG_WCE, |
844 | VIRTIO_BLK_F_MQ, | ||
781 | }; | 845 | }; |
782 | 846 | ||
783 | static struct virtio_driver virtio_blk = { | 847 | static struct virtio_driver virtio_blk = { |