diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2016-01-21 21:19:38 -0500 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2016-01-21 21:19:38 -0500 |
commit | 641203549a21ba6a701aecd05c3dfc969ec670cc (patch) | |
tree | 5e3d177c380ed811b5bf37e0bf9b8098416a9bc6 | |
parent | 404a47410c26a115123885977053e9a1a4460929 (diff) | |
parent | e93d12ae3be91d18b2a46deebb90a3f516db3d3c (diff) |
Merge branch 'for-4.5/drivers' of git://git.kernel.dk/linux-block
Pull block driver updates from Jens Axboe:
"This is the block driver pull request for 4.5, with the exception of
NVMe, which is in a separate branch and will be posted after this one.
This pull request contains:
- A set of bcache stability fixes, which have been acked by Kent.
These have been used and tested for more than a year by the
community, so it's about time that they got in.
- A set of drbd updates from the drbd team (Andreas, Lars, Philipp)
and Markus Elfring, Oleg Drokin.
- A set of fixes for xen blkback/front from the usual suspects, (Bob,
Konrad) as well as community based fixes from Kiri, Julien, and
Peng.
- A 2038 time fix for sx8 from Shraddha, with a fix from me.
- A small mtip32xx cleanup from Zhu Yanjun.
- A null_blk division fix from Arnd"
* 'for-4.5/drivers' of git://git.kernel.dk/linux-block: (71 commits)
null_blk: use sector_div instead of do_div
mtip32xx: restrict variables visible in current code module
xen/blkfront: Fix crash if backend doesn't follow the right states.
xen/blkback: Fix two memory leaks.
xen/blkback: make st_ statistics per ring
xen/blkfront: Handle non-indirect grant with 64KB pages
xen-blkfront: Introduce blkif_ring_get_request
xen-blkback: clear PF_NOFREEZE for xen_blkif_schedule()
xen/blkback: Free resources if connect_ring failed.
xen/blocks: Return -EXX instead of -1
xen/blkback: make pool of persistent grants and free pages per-queue
xen/blkback: get the number of hardware queues/rings from blkfront
xen/blkback: pseudo support for multi hardware queues/rings
xen/blkback: separate ring information out of struct xen_blkif
xen/blkfront: correct setting for xen_blkif_max_ring_order
xen/blkfront: make persistent grants pool per-queue
xen/blkfront: Remove duplicate setting of ->xbdev.
xen/blkfront: Cleanup of comments, fix unaligned variables, and syntax errors.
xen/blkfront: negotiate number of queues/rings to be used with backend
xen/blkfront: split per device io_lock
...
33 files changed, 3893 insertions, 1326 deletions
diff --git a/MAINTAINERS b/MAINTAINERS index 45d2717760fc..b8a717c4f863 100644 --- a/MAINTAINERS +++ b/MAINTAINERS | |||
@@ -3665,13 +3665,12 @@ F: drivers/scsi/dpt* | |||
3665 | F: drivers/scsi/dpt/ | 3665 | F: drivers/scsi/dpt/ |
3666 | 3666 | ||
3667 | DRBD DRIVER | 3667 | DRBD DRIVER |
3668 | P: Philipp Reisner | 3668 | M: Philipp Reisner <philipp.reisner@linbit.com> |
3669 | P: Lars Ellenberg | 3669 | M: Lars Ellenberg <lars.ellenberg@linbit.com> |
3670 | M: drbd-dev@lists.linbit.com | 3670 | L: drbd-dev@lists.linbit.com |
3671 | L: drbd-user@lists.linbit.com | ||
3672 | W: http://www.drbd.org | 3671 | W: http://www.drbd.org |
3673 | T: git git://git.drbd.org/linux-2.6-drbd.git drbd | 3672 | T: git git://git.linbit.com/linux-drbd.git |
3674 | T: git git://git.drbd.org/drbd-8.3.git | 3673 | T: git git://git.linbit.com/drbd-8.4.git |
3675 | S: Supported | 3674 | S: Supported |
3676 | F: drivers/block/drbd/ | 3675 | F: drivers/block/drbd/ |
3677 | F: lib/lru_cache.c | 3676 | F: lib/lru_cache.c |
diff --git a/drivers/block/drbd/drbd_actlog.c b/drivers/block/drbd/drbd_actlog.c index b3868e7a1ffd..10459a145062 100644 --- a/drivers/block/drbd/drbd_actlog.c +++ b/drivers/block/drbd/drbd_actlog.c | |||
@@ -288,7 +288,162 @@ bool drbd_al_begin_io_prepare(struct drbd_device *device, struct drbd_interval * | |||
288 | return need_transaction; | 288 | return need_transaction; |
289 | } | 289 | } |
290 | 290 | ||
291 | static int al_write_transaction(struct drbd_device *device); | 291 | #if (PAGE_SHIFT + 3) < (AL_EXTENT_SHIFT - BM_BLOCK_SHIFT) |
292 | /* Currently BM_BLOCK_SHIFT, BM_EXT_SHIFT and AL_EXTENT_SHIFT | ||
293 | * are still coupled, or assume too much about their relation. | ||
294 | * Code below will not work if this is violated. | ||
295 | * Will be cleaned up with some followup patch. | ||
296 | */ | ||
297 | # error FIXME | ||
298 | #endif | ||
299 | |||
300 | static unsigned int al_extent_to_bm_page(unsigned int al_enr) | ||
301 | { | ||
302 | return al_enr >> | ||
303 | /* bit to page */ | ||
304 | ((PAGE_SHIFT + 3) - | ||
305 | /* al extent number to bit */ | ||
306 | (AL_EXTENT_SHIFT - BM_BLOCK_SHIFT)); | ||
307 | } | ||
308 | |||
309 | static sector_t al_tr_number_to_on_disk_sector(struct drbd_device *device) | ||
310 | { | ||
311 | const unsigned int stripes = device->ldev->md.al_stripes; | ||
312 | const unsigned int stripe_size_4kB = device->ldev->md.al_stripe_size_4k; | ||
313 | |||
314 | /* transaction number, modulo on-disk ring buffer wrap around */ | ||
315 | unsigned int t = device->al_tr_number % (device->ldev->md.al_size_4k); | ||
316 | |||
317 | /* ... to aligned 4k on disk block */ | ||
318 | t = ((t % stripes) * stripe_size_4kB) + t/stripes; | ||
319 | |||
320 | /* ... to 512 byte sector in activity log */ | ||
321 | t *= 8; | ||
322 | |||
323 | /* ... plus offset to the on disk position */ | ||
324 | return device->ldev->md.md_offset + device->ldev->md.al_offset + t; | ||
325 | } | ||
326 | |||
327 | static int __al_write_transaction(struct drbd_device *device, struct al_transaction_on_disk *buffer) | ||
328 | { | ||
329 | struct lc_element *e; | ||
330 | sector_t sector; | ||
331 | int i, mx; | ||
332 | unsigned extent_nr; | ||
333 | unsigned crc = 0; | ||
334 | int err = 0; | ||
335 | |||
336 | memset(buffer, 0, sizeof(*buffer)); | ||
337 | buffer->magic = cpu_to_be32(DRBD_AL_MAGIC); | ||
338 | buffer->tr_number = cpu_to_be32(device->al_tr_number); | ||
339 | |||
340 | i = 0; | ||
341 | |||
342 | /* Even though no one can start to change this list | ||
343 | * once we set the LC_LOCKED -- from drbd_al_begin_io(), | ||
344 | * lc_try_lock_for_transaction() --, someone may still | ||
345 | * be in the process of changing it. */ | ||
346 | spin_lock_irq(&device->al_lock); | ||
347 | list_for_each_entry(e, &device->act_log->to_be_changed, list) { | ||
348 | if (i == AL_UPDATES_PER_TRANSACTION) { | ||
349 | i++; | ||
350 | break; | ||
351 | } | ||
352 | buffer->update_slot_nr[i] = cpu_to_be16(e->lc_index); | ||
353 | buffer->update_extent_nr[i] = cpu_to_be32(e->lc_new_number); | ||
354 | if (e->lc_number != LC_FREE) | ||
355 | drbd_bm_mark_for_writeout(device, | ||
356 | al_extent_to_bm_page(e->lc_number)); | ||
357 | i++; | ||
358 | } | ||
359 | spin_unlock_irq(&device->al_lock); | ||
360 | BUG_ON(i > AL_UPDATES_PER_TRANSACTION); | ||
361 | |||
362 | buffer->n_updates = cpu_to_be16(i); | ||
363 | for ( ; i < AL_UPDATES_PER_TRANSACTION; i++) { | ||
364 | buffer->update_slot_nr[i] = cpu_to_be16(-1); | ||
365 | buffer->update_extent_nr[i] = cpu_to_be32(LC_FREE); | ||
366 | } | ||
367 | |||
368 | buffer->context_size = cpu_to_be16(device->act_log->nr_elements); | ||
369 | buffer->context_start_slot_nr = cpu_to_be16(device->al_tr_cycle); | ||
370 | |||
371 | mx = min_t(int, AL_CONTEXT_PER_TRANSACTION, | ||
372 | device->act_log->nr_elements - device->al_tr_cycle); | ||
373 | for (i = 0; i < mx; i++) { | ||
374 | unsigned idx = device->al_tr_cycle + i; | ||
375 | extent_nr = lc_element_by_index(device->act_log, idx)->lc_number; | ||
376 | buffer->context[i] = cpu_to_be32(extent_nr); | ||
377 | } | ||
378 | for (; i < AL_CONTEXT_PER_TRANSACTION; i++) | ||
379 | buffer->context[i] = cpu_to_be32(LC_FREE); | ||
380 | |||
381 | device->al_tr_cycle += AL_CONTEXT_PER_TRANSACTION; | ||
382 | if (device->al_tr_cycle >= device->act_log->nr_elements) | ||
383 | device->al_tr_cycle = 0; | ||
384 | |||
385 | sector = al_tr_number_to_on_disk_sector(device); | ||
386 | |||
387 | crc = crc32c(0, buffer, 4096); | ||
388 | buffer->crc32c = cpu_to_be32(crc); | ||
389 | |||
390 | if (drbd_bm_write_hinted(device)) | ||
391 | err = -EIO; | ||
392 | else { | ||
393 | bool write_al_updates; | ||
394 | rcu_read_lock(); | ||
395 | write_al_updates = rcu_dereference(device->ldev->disk_conf)->al_updates; | ||
396 | rcu_read_unlock(); | ||
397 | if (write_al_updates) { | ||
398 | if (drbd_md_sync_page_io(device, device->ldev, sector, WRITE)) { | ||
399 | err = -EIO; | ||
400 | drbd_chk_io_error(device, 1, DRBD_META_IO_ERROR); | ||
401 | } else { | ||
402 | device->al_tr_number++; | ||
403 | device->al_writ_cnt++; | ||
404 | } | ||
405 | } | ||
406 | } | ||
407 | |||
408 | return err; | ||
409 | } | ||
410 | |||
411 | static int al_write_transaction(struct drbd_device *device) | ||
412 | { | ||
413 | struct al_transaction_on_disk *buffer; | ||
414 | int err; | ||
415 | |||
416 | if (!get_ldev(device)) { | ||
417 | drbd_err(device, "disk is %s, cannot start al transaction\n", | ||
418 | drbd_disk_str(device->state.disk)); | ||
419 | return -EIO; | ||
420 | } | ||
421 | |||
422 | /* The bitmap write may have failed, causing a state change. */ | ||
423 | if (device->state.disk < D_INCONSISTENT) { | ||
424 | drbd_err(device, | ||
425 | "disk is %s, cannot write al transaction\n", | ||
426 | drbd_disk_str(device->state.disk)); | ||
427 | put_ldev(device); | ||
428 | return -EIO; | ||
429 | } | ||
430 | |||
431 | /* protects md_io_buffer, al_tr_cycle, ... */ | ||
432 | buffer = drbd_md_get_buffer(device, __func__); | ||
433 | if (!buffer) { | ||
434 | drbd_err(device, "disk failed while waiting for md_io buffer\n"); | ||
435 | put_ldev(device); | ||
436 | return -ENODEV; | ||
437 | } | ||
438 | |||
439 | err = __al_write_transaction(device, buffer); | ||
440 | |||
441 | drbd_md_put_buffer(device); | ||
442 | put_ldev(device); | ||
443 | |||
444 | return err; | ||
445 | } | ||
446 | |||
292 | 447 | ||
293 | void drbd_al_begin_io_commit(struct drbd_device *device) | 448 | void drbd_al_begin_io_commit(struct drbd_device *device) |
294 | { | 449 | { |
@@ -420,153 +575,6 @@ void drbd_al_complete_io(struct drbd_device *device, struct drbd_interval *i) | |||
420 | wake_up(&device->al_wait); | 575 | wake_up(&device->al_wait); |
421 | } | 576 | } |
422 | 577 | ||
423 | #if (PAGE_SHIFT + 3) < (AL_EXTENT_SHIFT - BM_BLOCK_SHIFT) | ||
424 | /* Currently BM_BLOCK_SHIFT, BM_EXT_SHIFT and AL_EXTENT_SHIFT | ||
425 | * are still coupled, or assume too much about their relation. | ||
426 | * Code below will not work if this is violated. | ||
427 | * Will be cleaned up with some followup patch. | ||
428 | */ | ||
429 | # error FIXME | ||
430 | #endif | ||
431 | |||
432 | static unsigned int al_extent_to_bm_page(unsigned int al_enr) | ||
433 | { | ||
434 | return al_enr >> | ||
435 | /* bit to page */ | ||
436 | ((PAGE_SHIFT + 3) - | ||
437 | /* al extent number to bit */ | ||
438 | (AL_EXTENT_SHIFT - BM_BLOCK_SHIFT)); | ||
439 | } | ||
440 | |||
441 | static sector_t al_tr_number_to_on_disk_sector(struct drbd_device *device) | ||
442 | { | ||
443 | const unsigned int stripes = device->ldev->md.al_stripes; | ||
444 | const unsigned int stripe_size_4kB = device->ldev->md.al_stripe_size_4k; | ||
445 | |||
446 | /* transaction number, modulo on-disk ring buffer wrap around */ | ||
447 | unsigned int t = device->al_tr_number % (device->ldev->md.al_size_4k); | ||
448 | |||
449 | /* ... to aligned 4k on disk block */ | ||
450 | t = ((t % stripes) * stripe_size_4kB) + t/stripes; | ||
451 | |||
452 | /* ... to 512 byte sector in activity log */ | ||
453 | t *= 8; | ||
454 | |||
455 | /* ... plus offset to the on disk position */ | ||
456 | return device->ldev->md.md_offset + device->ldev->md.al_offset + t; | ||
457 | } | ||
458 | |||
459 | int al_write_transaction(struct drbd_device *device) | ||
460 | { | ||
461 | struct al_transaction_on_disk *buffer; | ||
462 | struct lc_element *e; | ||
463 | sector_t sector; | ||
464 | int i, mx; | ||
465 | unsigned extent_nr; | ||
466 | unsigned crc = 0; | ||
467 | int err = 0; | ||
468 | |||
469 | if (!get_ldev(device)) { | ||
470 | drbd_err(device, "disk is %s, cannot start al transaction\n", | ||
471 | drbd_disk_str(device->state.disk)); | ||
472 | return -EIO; | ||
473 | } | ||
474 | |||
475 | /* The bitmap write may have failed, causing a state change. */ | ||
476 | if (device->state.disk < D_INCONSISTENT) { | ||
477 | drbd_err(device, | ||
478 | "disk is %s, cannot write al transaction\n", | ||
479 | drbd_disk_str(device->state.disk)); | ||
480 | put_ldev(device); | ||
481 | return -EIO; | ||
482 | } | ||
483 | |||
484 | /* protects md_io_buffer, al_tr_cycle, ... */ | ||
485 | buffer = drbd_md_get_buffer(device, __func__); | ||
486 | if (!buffer) { | ||
487 | drbd_err(device, "disk failed while waiting for md_io buffer\n"); | ||
488 | put_ldev(device); | ||
489 | return -ENODEV; | ||
490 | } | ||
491 | |||
492 | memset(buffer, 0, sizeof(*buffer)); | ||
493 | buffer->magic = cpu_to_be32(DRBD_AL_MAGIC); | ||
494 | buffer->tr_number = cpu_to_be32(device->al_tr_number); | ||
495 | |||
496 | i = 0; | ||
497 | |||
498 | /* Even though no one can start to change this list | ||
499 | * once we set the LC_LOCKED -- from drbd_al_begin_io(), | ||
500 | * lc_try_lock_for_transaction() --, someone may still | ||
501 | * be in the process of changing it. */ | ||
502 | spin_lock_irq(&device->al_lock); | ||
503 | list_for_each_entry(e, &device->act_log->to_be_changed, list) { | ||
504 | if (i == AL_UPDATES_PER_TRANSACTION) { | ||
505 | i++; | ||
506 | break; | ||
507 | } | ||
508 | buffer->update_slot_nr[i] = cpu_to_be16(e->lc_index); | ||
509 | buffer->update_extent_nr[i] = cpu_to_be32(e->lc_new_number); | ||
510 | if (e->lc_number != LC_FREE) | ||
511 | drbd_bm_mark_for_writeout(device, | ||
512 | al_extent_to_bm_page(e->lc_number)); | ||
513 | i++; | ||
514 | } | ||
515 | spin_unlock_irq(&device->al_lock); | ||
516 | BUG_ON(i > AL_UPDATES_PER_TRANSACTION); | ||
517 | |||
518 | buffer->n_updates = cpu_to_be16(i); | ||
519 | for ( ; i < AL_UPDATES_PER_TRANSACTION; i++) { | ||
520 | buffer->update_slot_nr[i] = cpu_to_be16(-1); | ||
521 | buffer->update_extent_nr[i] = cpu_to_be32(LC_FREE); | ||
522 | } | ||
523 | |||
524 | buffer->context_size = cpu_to_be16(device->act_log->nr_elements); | ||
525 | buffer->context_start_slot_nr = cpu_to_be16(device->al_tr_cycle); | ||
526 | |||
527 | mx = min_t(int, AL_CONTEXT_PER_TRANSACTION, | ||
528 | device->act_log->nr_elements - device->al_tr_cycle); | ||
529 | for (i = 0; i < mx; i++) { | ||
530 | unsigned idx = device->al_tr_cycle + i; | ||
531 | extent_nr = lc_element_by_index(device->act_log, idx)->lc_number; | ||
532 | buffer->context[i] = cpu_to_be32(extent_nr); | ||
533 | } | ||
534 | for (; i < AL_CONTEXT_PER_TRANSACTION; i++) | ||
535 | buffer->context[i] = cpu_to_be32(LC_FREE); | ||
536 | |||
537 | device->al_tr_cycle += AL_CONTEXT_PER_TRANSACTION; | ||
538 | if (device->al_tr_cycle >= device->act_log->nr_elements) | ||
539 | device->al_tr_cycle = 0; | ||
540 | |||
541 | sector = al_tr_number_to_on_disk_sector(device); | ||
542 | |||
543 | crc = crc32c(0, buffer, 4096); | ||
544 | buffer->crc32c = cpu_to_be32(crc); | ||
545 | |||
546 | if (drbd_bm_write_hinted(device)) | ||
547 | err = -EIO; | ||
548 | else { | ||
549 | bool write_al_updates; | ||
550 | rcu_read_lock(); | ||
551 | write_al_updates = rcu_dereference(device->ldev->disk_conf)->al_updates; | ||
552 | rcu_read_unlock(); | ||
553 | if (write_al_updates) { | ||
554 | if (drbd_md_sync_page_io(device, device->ldev, sector, WRITE)) { | ||
555 | err = -EIO; | ||
556 | drbd_chk_io_error(device, 1, DRBD_META_IO_ERROR); | ||
557 | } else { | ||
558 | device->al_tr_number++; | ||
559 | device->al_writ_cnt++; | ||
560 | } | ||
561 | } | ||
562 | } | ||
563 | |||
564 | drbd_md_put_buffer(device); | ||
565 | put_ldev(device); | ||
566 | |||
567 | return err; | ||
568 | } | ||
569 | |||
570 | static int _try_lc_del(struct drbd_device *device, struct lc_element *al_ext) | 578 | static int _try_lc_del(struct drbd_device *device, struct lc_element *al_ext) |
571 | { | 579 | { |
572 | int rv; | 580 | int rv; |
@@ -606,21 +614,24 @@ void drbd_al_shrink(struct drbd_device *device) | |||
606 | wake_up(&device->al_wait); | 614 | wake_up(&device->al_wait); |
607 | } | 615 | } |
608 | 616 | ||
609 | int drbd_initialize_al(struct drbd_device *device, void *buffer) | 617 | int drbd_al_initialize(struct drbd_device *device, void *buffer) |
610 | { | 618 | { |
611 | struct al_transaction_on_disk *al = buffer; | 619 | struct al_transaction_on_disk *al = buffer; |
612 | struct drbd_md *md = &device->ldev->md; | 620 | struct drbd_md *md = &device->ldev->md; |
613 | sector_t al_base = md->md_offset + md->al_offset; | ||
614 | int al_size_4k = md->al_stripes * md->al_stripe_size_4k; | 621 | int al_size_4k = md->al_stripes * md->al_stripe_size_4k; |
615 | int i; | 622 | int i; |
616 | 623 | ||
617 | memset(al, 0, 4096); | 624 | __al_write_transaction(device, al); |
618 | al->magic = cpu_to_be32(DRBD_AL_MAGIC); | 625 | /* There may or may not have been a pending transaction. */ |
619 | al->transaction_type = cpu_to_be16(AL_TR_INITIALIZED); | 626 | spin_lock_irq(&device->al_lock); |
620 | al->crc32c = cpu_to_be32(crc32c(0, al, 4096)); | 627 | lc_committed(device->act_log); |
628 | spin_unlock_irq(&device->al_lock); | ||
621 | 629 | ||
622 | for (i = 0; i < al_size_4k; i++) { | 630 | /* The rest of the transactions will have an empty "updates" list, and |
623 | int err = drbd_md_sync_page_io(device, device->ldev, al_base + i * 8, WRITE); | 631 | * are written out only to provide the context, and to initialize the |
632 | * on-disk ring buffer. */ | ||
633 | for (i = 1; i < al_size_4k; i++) { | ||
634 | int err = __al_write_transaction(device, al); | ||
624 | if (err) | 635 | if (err) |
625 | return err; | 636 | return err; |
626 | } | 637 | } |
diff --git a/drivers/block/drbd/drbd_bitmap.c b/drivers/block/drbd/drbd_bitmap.c index 9462d2752850..0dabc9b93725 100644 --- a/drivers/block/drbd/drbd_bitmap.c +++ b/drivers/block/drbd/drbd_bitmap.c | |||
@@ -24,7 +24,7 @@ | |||
24 | 24 | ||
25 | #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt | 25 | #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt |
26 | 26 | ||
27 | #include <linux/bitops.h> | 27 | #include <linux/bitmap.h> |
28 | #include <linux/vmalloc.h> | 28 | #include <linux/vmalloc.h> |
29 | #include <linux/string.h> | 29 | #include <linux/string.h> |
30 | #include <linux/drbd.h> | 30 | #include <linux/drbd.h> |
@@ -479,8 +479,14 @@ void drbd_bm_cleanup(struct drbd_device *device) | |||
479 | * this masks out the remaining bits. | 479 | * this masks out the remaining bits. |
480 | * Returns the number of bits cleared. | 480 | * Returns the number of bits cleared. |
481 | */ | 481 | */ |
482 | #ifndef BITS_PER_PAGE | ||
482 | #define BITS_PER_PAGE (1UL << (PAGE_SHIFT + 3)) | 483 | #define BITS_PER_PAGE (1UL << (PAGE_SHIFT + 3)) |
483 | #define BITS_PER_PAGE_MASK (BITS_PER_PAGE - 1) | 484 | #define BITS_PER_PAGE_MASK (BITS_PER_PAGE - 1) |
485 | #else | ||
486 | # if BITS_PER_PAGE != (1UL << (PAGE_SHIFT + 3)) | ||
487 | # error "ambiguous BITS_PER_PAGE" | ||
488 | # endif | ||
489 | #endif | ||
484 | #define BITS_PER_LONG_MASK (BITS_PER_LONG - 1) | 490 | #define BITS_PER_LONG_MASK (BITS_PER_LONG - 1) |
485 | static int bm_clear_surplus(struct drbd_bitmap *b) | 491 | static int bm_clear_surplus(struct drbd_bitmap *b) |
486 | { | 492 | { |
@@ -559,21 +565,19 @@ static unsigned long bm_count_bits(struct drbd_bitmap *b) | |||
559 | unsigned long *p_addr; | 565 | unsigned long *p_addr; |
560 | unsigned long bits = 0; | 566 | unsigned long bits = 0; |
561 | unsigned long mask = (1UL << (b->bm_bits & BITS_PER_LONG_MASK)) -1; | 567 | unsigned long mask = (1UL << (b->bm_bits & BITS_PER_LONG_MASK)) -1; |
562 | int idx, i, last_word; | 568 | int idx, last_word; |
563 | 569 | ||
564 | /* all but last page */ | 570 | /* all but last page */ |
565 | for (idx = 0; idx < b->bm_number_of_pages - 1; idx++) { | 571 | for (idx = 0; idx < b->bm_number_of_pages - 1; idx++) { |
566 | p_addr = __bm_map_pidx(b, idx); | 572 | p_addr = __bm_map_pidx(b, idx); |
567 | for (i = 0; i < LWPP; i++) | 573 | bits += bitmap_weight(p_addr, BITS_PER_PAGE); |
568 | bits += hweight_long(p_addr[i]); | ||
569 | __bm_unmap(p_addr); | 574 | __bm_unmap(p_addr); |
570 | cond_resched(); | 575 | cond_resched(); |
571 | } | 576 | } |
572 | /* last (or only) page */ | 577 | /* last (or only) page */ |
573 | last_word = ((b->bm_bits - 1) & BITS_PER_PAGE_MASK) >> LN2_BPL; | 578 | last_word = ((b->bm_bits - 1) & BITS_PER_PAGE_MASK) >> LN2_BPL; |
574 | p_addr = __bm_map_pidx(b, idx); | 579 | p_addr = __bm_map_pidx(b, idx); |
575 | for (i = 0; i < last_word; i++) | 580 | bits += bitmap_weight(p_addr, last_word * BITS_PER_LONG); |
576 | bits += hweight_long(p_addr[i]); | ||
577 | p_addr[last_word] &= cpu_to_lel(mask); | 581 | p_addr[last_word] &= cpu_to_lel(mask); |
578 | bits += hweight_long(p_addr[last_word]); | 582 | bits += hweight_long(p_addr[last_word]); |
579 | /* 32bit arch, may have an unused padding long */ | 583 | /* 32bit arch, may have an unused padding long */ |
@@ -1419,6 +1423,9 @@ static inline void bm_set_full_words_within_one_page(struct drbd_bitmap *b, | |||
1419 | int bits; | 1423 | int bits; |
1420 | int changed = 0; | 1424 | int changed = 0; |
1421 | unsigned long *paddr = kmap_atomic(b->bm_pages[page_nr]); | 1425 | unsigned long *paddr = kmap_atomic(b->bm_pages[page_nr]); |
1426 | |||
1427 | /* I think it is more cache line friendly to hweight_long then set to ~0UL, | ||
1428 | * than to first bitmap_weight() all words, then bitmap_fill() all words */ | ||
1422 | for (i = first_word; i < last_word; i++) { | 1429 | for (i = first_word; i < last_word; i++) { |
1423 | bits = hweight_long(paddr[i]); | 1430 | bits = hweight_long(paddr[i]); |
1424 | paddr[i] = ~0UL; | 1431 | paddr[i] = ~0UL; |
@@ -1628,8 +1635,7 @@ int drbd_bm_e_weight(struct drbd_device *device, unsigned long enr) | |||
1628 | int n = e-s; | 1635 | int n = e-s; |
1629 | p_addr = bm_map_pidx(b, bm_word_to_page_idx(b, s)); | 1636 | p_addr = bm_map_pidx(b, bm_word_to_page_idx(b, s)); |
1630 | bm = p_addr + MLPP(s); | 1637 | bm = p_addr + MLPP(s); |
1631 | while (n--) | 1638 | count += bitmap_weight(bm, n * BITS_PER_LONG); |
1632 | count += hweight_long(*bm++); | ||
1633 | bm_unmap(p_addr); | 1639 | bm_unmap(p_addr); |
1634 | } else { | 1640 | } else { |
1635 | drbd_err(device, "start offset (%d) too large in drbd_bm_e_weight\n", s); | 1641 | drbd_err(device, "start offset (%d) too large in drbd_bm_e_weight\n", s); |
diff --git a/drivers/block/drbd/drbd_debugfs.c b/drivers/block/drbd/drbd_debugfs.c index 6b88a35fb048..96a0107a72ea 100644 --- a/drivers/block/drbd/drbd_debugfs.c +++ b/drivers/block/drbd/drbd_debugfs.c | |||
@@ -771,6 +771,13 @@ static int device_data_gen_id_show(struct seq_file *m, void *ignored) | |||
771 | return 0; | 771 | return 0; |
772 | } | 772 | } |
773 | 773 | ||
774 | static int device_ed_gen_id_show(struct seq_file *m, void *ignored) | ||
775 | { | ||
776 | struct drbd_device *device = m->private; | ||
777 | seq_printf(m, "0x%016llX\n", (unsigned long long)device->ed_uuid); | ||
778 | return 0; | ||
779 | } | ||
780 | |||
774 | #define drbd_debugfs_device_attr(name) \ | 781 | #define drbd_debugfs_device_attr(name) \ |
775 | static int device_ ## name ## _open(struct inode *inode, struct file *file) \ | 782 | static int device_ ## name ## _open(struct inode *inode, struct file *file) \ |
776 | { \ | 783 | { \ |
@@ -796,6 +803,7 @@ drbd_debugfs_device_attr(oldest_requests) | |||
796 | drbd_debugfs_device_attr(act_log_extents) | 803 | drbd_debugfs_device_attr(act_log_extents) |
797 | drbd_debugfs_device_attr(resync_extents) | 804 | drbd_debugfs_device_attr(resync_extents) |
798 | drbd_debugfs_device_attr(data_gen_id) | 805 | drbd_debugfs_device_attr(data_gen_id) |
806 | drbd_debugfs_device_attr(ed_gen_id) | ||
799 | 807 | ||
800 | void drbd_debugfs_device_add(struct drbd_device *device) | 808 | void drbd_debugfs_device_add(struct drbd_device *device) |
801 | { | 809 | { |
@@ -839,6 +847,7 @@ void drbd_debugfs_device_add(struct drbd_device *device) | |||
839 | DCF(act_log_extents); | 847 | DCF(act_log_extents); |
840 | DCF(resync_extents); | 848 | DCF(resync_extents); |
841 | DCF(data_gen_id); | 849 | DCF(data_gen_id); |
850 | DCF(ed_gen_id); | ||
842 | #undef DCF | 851 | #undef DCF |
843 | return; | 852 | return; |
844 | 853 | ||
@@ -854,6 +863,7 @@ void drbd_debugfs_device_cleanup(struct drbd_device *device) | |||
854 | drbd_debugfs_remove(&device->debugfs_vol_act_log_extents); | 863 | drbd_debugfs_remove(&device->debugfs_vol_act_log_extents); |
855 | drbd_debugfs_remove(&device->debugfs_vol_resync_extents); | 864 | drbd_debugfs_remove(&device->debugfs_vol_resync_extents); |
856 | drbd_debugfs_remove(&device->debugfs_vol_data_gen_id); | 865 | drbd_debugfs_remove(&device->debugfs_vol_data_gen_id); |
866 | drbd_debugfs_remove(&device->debugfs_vol_ed_gen_id); | ||
857 | drbd_debugfs_remove(&device->debugfs_vol); | 867 | drbd_debugfs_remove(&device->debugfs_vol); |
858 | } | 868 | } |
859 | 869 | ||
diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h index e66d453a5f2b..b6844feb9f9b 100644 --- a/drivers/block/drbd/drbd_int.h +++ b/drivers/block/drbd/drbd_int.h | |||
@@ -77,13 +77,6 @@ extern int fault_devs; | |||
77 | extern char usermode_helper[]; | 77 | extern char usermode_helper[]; |
78 | 78 | ||
79 | 79 | ||
80 | /* I don't remember why XCPU ... | ||
81 | * This is used to wake the asender, | ||
82 | * and to interrupt sending the sending task | ||
83 | * on disconnect. | ||
84 | */ | ||
85 | #define DRBD_SIG SIGXCPU | ||
86 | |||
87 | /* This is used to stop/restart our threads. | 80 | /* This is used to stop/restart our threads. |
88 | * Cannot use SIGTERM nor SIGKILL, since these | 81 | * Cannot use SIGTERM nor SIGKILL, since these |
89 | * are sent out by init on runlevel changes | 82 | * are sent out by init on runlevel changes |
@@ -292,6 +285,9 @@ struct drbd_device_work { | |||
292 | 285 | ||
293 | extern int drbd_wait_misc(struct drbd_device *, struct drbd_interval *); | 286 | extern int drbd_wait_misc(struct drbd_device *, struct drbd_interval *); |
294 | 287 | ||
288 | extern void lock_all_resources(void); | ||
289 | extern void unlock_all_resources(void); | ||
290 | |||
295 | struct drbd_request { | 291 | struct drbd_request { |
296 | struct drbd_work w; | 292 | struct drbd_work w; |
297 | struct drbd_device *device; | 293 | struct drbd_device *device; |
@@ -504,7 +500,6 @@ enum { | |||
504 | 500 | ||
505 | MD_NO_FUA, /* Users wants us to not use FUA/FLUSH on meta data dev */ | 501 | MD_NO_FUA, /* Users wants us to not use FUA/FLUSH on meta data dev */ |
506 | 502 | ||
507 | SUSPEND_IO, /* suspend application io */ | ||
508 | BITMAP_IO, /* suspend application io; | 503 | BITMAP_IO, /* suspend application io; |
509 | once no more io in flight, start bitmap io */ | 504 | once no more io in flight, start bitmap io */ |
510 | BITMAP_IO_QUEUED, /* Started bitmap IO */ | 505 | BITMAP_IO_QUEUED, /* Started bitmap IO */ |
@@ -632,12 +627,6 @@ struct bm_io_work { | |||
632 | void (*done)(struct drbd_device *device, int rv); | 627 | void (*done)(struct drbd_device *device, int rv); |
633 | }; | 628 | }; |
634 | 629 | ||
635 | enum write_ordering_e { | ||
636 | WO_none, | ||
637 | WO_drain_io, | ||
638 | WO_bdev_flush, | ||
639 | }; | ||
640 | |||
641 | struct fifo_buffer { | 630 | struct fifo_buffer { |
642 | unsigned int head_index; | 631 | unsigned int head_index; |
643 | unsigned int size; | 632 | unsigned int size; |
@@ -650,8 +639,7 @@ extern struct fifo_buffer *fifo_alloc(int fifo_size); | |||
650 | enum { | 639 | enum { |
651 | NET_CONGESTED, /* The data socket is congested */ | 640 | NET_CONGESTED, /* The data socket is congested */ |
652 | RESOLVE_CONFLICTS, /* Set on one node, cleared on the peer! */ | 641 | RESOLVE_CONFLICTS, /* Set on one node, cleared on the peer! */ |
653 | SEND_PING, /* whether asender should send a ping asap */ | 642 | SEND_PING, |
654 | SIGNAL_ASENDER, /* whether asender wants to be interrupted */ | ||
655 | GOT_PING_ACK, /* set when we receive a ping_ack packet, ping_wait gets woken */ | 643 | GOT_PING_ACK, /* set when we receive a ping_ack packet, ping_wait gets woken */ |
656 | CONN_WD_ST_CHG_REQ, /* A cluster wide state change on the connection is active */ | 644 | CONN_WD_ST_CHG_REQ, /* A cluster wide state change on the connection is active */ |
657 | CONN_WD_ST_CHG_OKAY, | 645 | CONN_WD_ST_CHG_OKAY, |
@@ -670,6 +658,8 @@ enum { | |||
670 | DEVICE_WORK_PENDING, /* tell worker that some device has pending work */ | 658 | DEVICE_WORK_PENDING, /* tell worker that some device has pending work */ |
671 | }; | 659 | }; |
672 | 660 | ||
661 | enum which_state { NOW, OLD = NOW, NEW }; | ||
662 | |||
673 | struct drbd_resource { | 663 | struct drbd_resource { |
674 | char *name; | 664 | char *name; |
675 | #ifdef CONFIG_DEBUG_FS | 665 | #ifdef CONFIG_DEBUG_FS |
@@ -755,7 +745,8 @@ struct drbd_connection { | |||
755 | unsigned long last_reconnect_jif; | 745 | unsigned long last_reconnect_jif; |
756 | struct drbd_thread receiver; | 746 | struct drbd_thread receiver; |
757 | struct drbd_thread worker; | 747 | struct drbd_thread worker; |
758 | struct drbd_thread asender; | 748 | struct drbd_thread ack_receiver; |
749 | struct workqueue_struct *ack_sender; | ||
759 | 750 | ||
760 | /* cached pointers, | 751 | /* cached pointers, |
761 | * so we can look up the oldest pending requests more quickly. | 752 | * so we can look up the oldest pending requests more quickly. |
@@ -774,6 +765,8 @@ struct drbd_connection { | |||
774 | struct drbd_thread_timing_details r_timing_details[DRBD_THREAD_DETAILS_HIST]; | 765 | struct drbd_thread_timing_details r_timing_details[DRBD_THREAD_DETAILS_HIST]; |
775 | 766 | ||
776 | struct { | 767 | struct { |
768 | unsigned long last_sent_barrier_jif; | ||
769 | |||
777 | /* whether this sender thread | 770 | /* whether this sender thread |
778 | * has processed a single write yet. */ | 771 | * has processed a single write yet. */ |
779 | bool seen_any_write_yet; | 772 | bool seen_any_write_yet; |
@@ -788,6 +781,17 @@ struct drbd_connection { | |||
788 | } send; | 781 | } send; |
789 | }; | 782 | }; |
790 | 783 | ||
784 | static inline bool has_net_conf(struct drbd_connection *connection) | ||
785 | { | ||
786 | bool has_net_conf; | ||
787 | |||
788 | rcu_read_lock(); | ||
789 | has_net_conf = rcu_dereference(connection->net_conf); | ||
790 | rcu_read_unlock(); | ||
791 | |||
792 | return has_net_conf; | ||
793 | } | ||
794 | |||
791 | void __update_timing_details( | 795 | void __update_timing_details( |
792 | struct drbd_thread_timing_details *tdp, | 796 | struct drbd_thread_timing_details *tdp, |
793 | unsigned int *cb_nr, | 797 | unsigned int *cb_nr, |
@@ -811,6 +815,7 @@ struct drbd_peer_device { | |||
811 | struct list_head peer_devices; | 815 | struct list_head peer_devices; |
812 | struct drbd_device *device; | 816 | struct drbd_device *device; |
813 | struct drbd_connection *connection; | 817 | struct drbd_connection *connection; |
818 | struct work_struct send_acks_work; | ||
814 | #ifdef CONFIG_DEBUG_FS | 819 | #ifdef CONFIG_DEBUG_FS |
815 | struct dentry *debugfs_peer_dev; | 820 | struct dentry *debugfs_peer_dev; |
816 | #endif | 821 | #endif |
@@ -829,6 +834,7 @@ struct drbd_device { | |||
829 | struct dentry *debugfs_vol_act_log_extents; | 834 | struct dentry *debugfs_vol_act_log_extents; |
830 | struct dentry *debugfs_vol_resync_extents; | 835 | struct dentry *debugfs_vol_resync_extents; |
831 | struct dentry *debugfs_vol_data_gen_id; | 836 | struct dentry *debugfs_vol_data_gen_id; |
837 | struct dentry *debugfs_vol_ed_gen_id; | ||
832 | #endif | 838 | #endif |
833 | 839 | ||
834 | unsigned int vnr; /* volume number within the connection */ | 840 | unsigned int vnr; /* volume number within the connection */ |
@@ -873,6 +879,7 @@ struct drbd_device { | |||
873 | atomic_t rs_pending_cnt; /* RS request/data packets on the wire */ | 879 | atomic_t rs_pending_cnt; /* RS request/data packets on the wire */ |
874 | atomic_t unacked_cnt; /* Need to send replies for */ | 880 | atomic_t unacked_cnt; /* Need to send replies for */ |
875 | atomic_t local_cnt; /* Waiting for local completion */ | 881 | atomic_t local_cnt; /* Waiting for local completion */ |
882 | atomic_t suspend_cnt; | ||
876 | 883 | ||
877 | /* Interval tree of pending local requests */ | 884 | /* Interval tree of pending local requests */ |
878 | struct rb_root read_requests; | 885 | struct rb_root read_requests; |
@@ -1020,6 +1027,12 @@ static inline struct drbd_peer_device *first_peer_device(struct drbd_device *dev | |||
1020 | return list_first_entry_or_null(&device->peer_devices, struct drbd_peer_device, peer_devices); | 1027 | return list_first_entry_or_null(&device->peer_devices, struct drbd_peer_device, peer_devices); |
1021 | } | 1028 | } |
1022 | 1029 | ||
1030 | static inline struct drbd_peer_device * | ||
1031 | conn_peer_device(struct drbd_connection *connection, int volume_number) | ||
1032 | { | ||
1033 | return idr_find(&connection->peer_devices, volume_number); | ||
1034 | } | ||
1035 | |||
1023 | #define for_each_resource(resource, _resources) \ | 1036 | #define for_each_resource(resource, _resources) \ |
1024 | list_for_each_entry(resource, _resources, resources) | 1037 | list_for_each_entry(resource, _resources, resources) |
1025 | 1038 | ||
@@ -1113,7 +1126,7 @@ extern int drbd_send_ov_request(struct drbd_peer_device *, sector_t sector, int | |||
1113 | extern int drbd_send_bitmap(struct drbd_device *device); | 1126 | extern int drbd_send_bitmap(struct drbd_device *device); |
1114 | extern void drbd_send_sr_reply(struct drbd_peer_device *, enum drbd_state_rv retcode); | 1127 | extern void drbd_send_sr_reply(struct drbd_peer_device *, enum drbd_state_rv retcode); |
1115 | extern void conn_send_sr_reply(struct drbd_connection *connection, enum drbd_state_rv retcode); | 1128 | extern void conn_send_sr_reply(struct drbd_connection *connection, enum drbd_state_rv retcode); |
1116 | extern void drbd_free_ldev(struct drbd_backing_dev *ldev); | 1129 | extern void drbd_backing_dev_free(struct drbd_device *device, struct drbd_backing_dev *ldev); |
1117 | extern void drbd_device_cleanup(struct drbd_device *device); | 1130 | extern void drbd_device_cleanup(struct drbd_device *device); |
1118 | void drbd_print_uuids(struct drbd_device *device, const char *text); | 1131 | void drbd_print_uuids(struct drbd_device *device, const char *text); |
1119 | 1132 | ||
@@ -1424,7 +1437,7 @@ extern struct bio_set *drbd_md_io_bio_set; | |||
1424 | /* to allocate from that set */ | 1437 | /* to allocate from that set */ |
1425 | extern struct bio *bio_alloc_drbd(gfp_t gfp_mask); | 1438 | extern struct bio *bio_alloc_drbd(gfp_t gfp_mask); |
1426 | 1439 | ||
1427 | extern rwlock_t global_state_lock; | 1440 | extern struct mutex resources_mutex; |
1428 | 1441 | ||
1429 | extern int conn_lowest_minor(struct drbd_connection *connection); | 1442 | extern int conn_lowest_minor(struct drbd_connection *connection); |
1430 | extern enum drbd_ret_code drbd_create_device(struct drbd_config_context *adm_ctx, unsigned int minor); | 1443 | extern enum drbd_ret_code drbd_create_device(struct drbd_config_context *adm_ctx, unsigned int minor); |
@@ -1454,6 +1467,9 @@ extern int is_valid_ar_handle(struct drbd_request *, sector_t); | |||
1454 | 1467 | ||
1455 | 1468 | ||
1456 | /* drbd_nl.c */ | 1469 | /* drbd_nl.c */ |
1470 | |||
1471 | extern struct mutex notification_mutex; | ||
1472 | |||
1457 | extern void drbd_suspend_io(struct drbd_device *device); | 1473 | extern void drbd_suspend_io(struct drbd_device *device); |
1458 | extern void drbd_resume_io(struct drbd_device *device); | 1474 | extern void drbd_resume_io(struct drbd_device *device); |
1459 | extern char *ppsize(char *buf, unsigned long long size); | 1475 | extern char *ppsize(char *buf, unsigned long long size); |
@@ -1536,7 +1552,9 @@ extern void drbd_endio_write_sec_final(struct drbd_peer_request *peer_req); | |||
1536 | 1552 | ||
1537 | /* drbd_receiver.c */ | 1553 | /* drbd_receiver.c */ |
1538 | extern int drbd_receiver(struct drbd_thread *thi); | 1554 | extern int drbd_receiver(struct drbd_thread *thi); |
1539 | extern int drbd_asender(struct drbd_thread *thi); | 1555 | extern int drbd_ack_receiver(struct drbd_thread *thi); |
1556 | extern void drbd_send_ping_wf(struct work_struct *ws); | ||
1557 | extern void drbd_send_acks_wf(struct work_struct *ws); | ||
1540 | extern bool drbd_rs_c_min_rate_throttle(struct drbd_device *device); | 1558 | extern bool drbd_rs_c_min_rate_throttle(struct drbd_device *device); |
1541 | extern bool drbd_rs_should_slow_down(struct drbd_device *device, sector_t sector, | 1559 | extern bool drbd_rs_should_slow_down(struct drbd_device *device, sector_t sector, |
1542 | bool throttle_if_app_is_waiting); | 1560 | bool throttle_if_app_is_waiting); |
@@ -1649,7 +1667,7 @@ extern int __drbd_change_sync(struct drbd_device *device, sector_t sector, int s | |||
1649 | #define drbd_rs_failed_io(device, sector, size) \ | 1667 | #define drbd_rs_failed_io(device, sector, size) \ |
1650 | __drbd_change_sync(device, sector, size, RECORD_RS_FAILED) | 1668 | __drbd_change_sync(device, sector, size, RECORD_RS_FAILED) |
1651 | extern void drbd_al_shrink(struct drbd_device *device); | 1669 | extern void drbd_al_shrink(struct drbd_device *device); |
1652 | extern int drbd_initialize_al(struct drbd_device *, void *); | 1670 | extern int drbd_al_initialize(struct drbd_device *, void *); |
1653 | 1671 | ||
1654 | /* drbd_nl.c */ | 1672 | /* drbd_nl.c */ |
1655 | /* state info broadcast */ | 1673 | /* state info broadcast */ |
@@ -1668,6 +1686,29 @@ struct sib_info { | |||
1668 | }; | 1686 | }; |
1669 | void drbd_bcast_event(struct drbd_device *device, const struct sib_info *sib); | 1687 | void drbd_bcast_event(struct drbd_device *device, const struct sib_info *sib); |
1670 | 1688 | ||
1689 | extern void notify_resource_state(struct sk_buff *, | ||
1690 | unsigned int, | ||
1691 | struct drbd_resource *, | ||
1692 | struct resource_info *, | ||
1693 | enum drbd_notification_type); | ||
1694 | extern void notify_device_state(struct sk_buff *, | ||
1695 | unsigned int, | ||
1696 | struct drbd_device *, | ||
1697 | struct device_info *, | ||
1698 | enum drbd_notification_type); | ||
1699 | extern void notify_connection_state(struct sk_buff *, | ||
1700 | unsigned int, | ||
1701 | struct drbd_connection *, | ||
1702 | struct connection_info *, | ||
1703 | enum drbd_notification_type); | ||
1704 | extern void notify_peer_device_state(struct sk_buff *, | ||
1705 | unsigned int, | ||
1706 | struct drbd_peer_device *, | ||
1707 | struct peer_device_info *, | ||
1708 | enum drbd_notification_type); | ||
1709 | extern void notify_helper(enum drbd_notification_type, struct drbd_device *, | ||
1710 | struct drbd_connection *, const char *, int); | ||
1711 | |||
1671 | /* | 1712 | /* |
1672 | * inline helper functions | 1713 | * inline helper functions |
1673 | *************************/ | 1714 | *************************/ |
@@ -1694,19 +1735,6 @@ static inline int drbd_peer_req_has_active_page(struct drbd_peer_request *peer_r | |||
1694 | return 0; | 1735 | return 0; |
1695 | } | 1736 | } |
1696 | 1737 | ||
1697 | static inline enum drbd_state_rv | ||
1698 | _drbd_set_state(struct drbd_device *device, union drbd_state ns, | ||
1699 | enum chg_state_flags flags, struct completion *done) | ||
1700 | { | ||
1701 | enum drbd_state_rv rv; | ||
1702 | |||
1703 | read_lock(&global_state_lock); | ||
1704 | rv = __drbd_set_state(device, ns, flags, done); | ||
1705 | read_unlock(&global_state_lock); | ||
1706 | |||
1707 | return rv; | ||
1708 | } | ||
1709 | |||
1710 | static inline union drbd_state drbd_read_state(struct drbd_device *device) | 1738 | static inline union drbd_state drbd_read_state(struct drbd_device *device) |
1711 | { | 1739 | { |
1712 | struct drbd_resource *resource = device->resource; | 1740 | struct drbd_resource *resource = device->resource; |
@@ -1937,16 +1965,21 @@ drbd_device_post_work(struct drbd_device *device, int work_bit) | |||
1937 | 1965 | ||
1938 | extern void drbd_flush_workqueue(struct drbd_work_queue *work_queue); | 1966 | extern void drbd_flush_workqueue(struct drbd_work_queue *work_queue); |
1939 | 1967 | ||
1940 | static inline void wake_asender(struct drbd_connection *connection) | 1968 | /* To get the ack_receiver out of the blocking network stack, |
1969 | * so it can change its sk_rcvtimeo from idle- to ping-timeout, | ||
1970 | * and send a ping, we need to send a signal. | ||
1971 | * Which signal we send is irrelevant. */ | ||
1972 | static inline void wake_ack_receiver(struct drbd_connection *connection) | ||
1941 | { | 1973 | { |
1942 | if (test_bit(SIGNAL_ASENDER, &connection->flags)) | 1974 | struct task_struct *task = connection->ack_receiver.task; |
1943 | force_sig(DRBD_SIG, connection->asender.task); | 1975 | if (task && get_t_state(&connection->ack_receiver) == RUNNING) |
1976 | force_sig(SIGXCPU, task); | ||
1944 | } | 1977 | } |
1945 | 1978 | ||
1946 | static inline void request_ping(struct drbd_connection *connection) | 1979 | static inline void request_ping(struct drbd_connection *connection) |
1947 | { | 1980 | { |
1948 | set_bit(SEND_PING, &connection->flags); | 1981 | set_bit(SEND_PING, &connection->flags); |
1949 | wake_asender(connection); | 1982 | wake_ack_receiver(connection); |
1950 | } | 1983 | } |
1951 | 1984 | ||
1952 | extern void *conn_prepare_command(struct drbd_connection *, struct drbd_socket *); | 1985 | extern void *conn_prepare_command(struct drbd_connection *, struct drbd_socket *); |
@@ -2230,7 +2263,7 @@ static inline bool may_inc_ap_bio(struct drbd_device *device) | |||
2230 | 2263 | ||
2231 | if (drbd_suspended(device)) | 2264 | if (drbd_suspended(device)) |
2232 | return false; | 2265 | return false; |
2233 | if (test_bit(SUSPEND_IO, &device->flags)) | 2266 | if (atomic_read(&device->suspend_cnt)) |
2234 | return false; | 2267 | return false; |
2235 | 2268 | ||
2236 | /* to avoid potential deadlock or bitmap corruption, | 2269 | /* to avoid potential deadlock or bitmap corruption, |
diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c index 74d97f4bac34..5b43dfb79819 100644 --- a/drivers/block/drbd/drbd_main.c +++ b/drivers/block/drbd/drbd_main.c | |||
@@ -117,6 +117,7 @@ module_param_string(usermode_helper, usermode_helper, sizeof(usermode_helper), 0 | |||
117 | */ | 117 | */ |
118 | struct idr drbd_devices; | 118 | struct idr drbd_devices; |
119 | struct list_head drbd_resources; | 119 | struct list_head drbd_resources; |
120 | struct mutex resources_mutex; | ||
120 | 121 | ||
121 | struct kmem_cache *drbd_request_cache; | 122 | struct kmem_cache *drbd_request_cache; |
122 | struct kmem_cache *drbd_ee_cache; /* peer requests */ | 123 | struct kmem_cache *drbd_ee_cache; /* peer requests */ |
@@ -1435,8 +1436,8 @@ static int we_should_drop_the_connection(struct drbd_connection *connection, str | |||
1435 | /* long elapsed = (long)(jiffies - device->last_received); */ | 1436 | /* long elapsed = (long)(jiffies - device->last_received); */ |
1436 | 1437 | ||
1437 | drop_it = connection->meta.socket == sock | 1438 | drop_it = connection->meta.socket == sock |
1438 | || !connection->asender.task | 1439 | || !connection->ack_receiver.task |
1439 | || get_t_state(&connection->asender) != RUNNING | 1440 | || get_t_state(&connection->ack_receiver) != RUNNING |
1440 | || connection->cstate < C_WF_REPORT_PARAMS; | 1441 | || connection->cstate < C_WF_REPORT_PARAMS; |
1441 | 1442 | ||
1442 | if (drop_it) | 1443 | if (drop_it) |
@@ -1793,15 +1794,6 @@ int drbd_send(struct drbd_connection *connection, struct socket *sock, | |||
1793 | drbd_update_congested(connection); | 1794 | drbd_update_congested(connection); |
1794 | } | 1795 | } |
1795 | do { | 1796 | do { |
1796 | /* STRANGE | ||
1797 | * tcp_sendmsg does _not_ use its size parameter at all ? | ||
1798 | * | ||
1799 | * -EAGAIN on timeout, -EINTR on signal. | ||
1800 | */ | ||
1801 | /* THINK | ||
1802 | * do we need to block DRBD_SIG if sock == &meta.socket ?? | ||
1803 | * otherwise wake_asender() might interrupt some send_*Ack ! | ||
1804 | */ | ||
1805 | rv = kernel_sendmsg(sock, &msg, &iov, 1, size); | 1797 | rv = kernel_sendmsg(sock, &msg, &iov, 1, size); |
1806 | if (rv == -EAGAIN) { | 1798 | if (rv == -EAGAIN) { |
1807 | if (we_should_drop_the_connection(connection, sock)) | 1799 | if (we_should_drop_the_connection(connection, sock)) |
@@ -2000,7 +1992,7 @@ void drbd_device_cleanup(struct drbd_device *device) | |||
2000 | drbd_bm_cleanup(device); | 1992 | drbd_bm_cleanup(device); |
2001 | } | 1993 | } |
2002 | 1994 | ||
2003 | drbd_free_ldev(device->ldev); | 1995 | drbd_backing_dev_free(device, device->ldev); |
2004 | device->ldev = NULL; | 1996 | device->ldev = NULL; |
2005 | 1997 | ||
2006 | clear_bit(AL_SUSPENDED, &device->flags); | 1998 | clear_bit(AL_SUSPENDED, &device->flags); |
@@ -2179,7 +2171,7 @@ void drbd_destroy_device(struct kref *kref) | |||
2179 | if (device->this_bdev) | 2171 | if (device->this_bdev) |
2180 | bdput(device->this_bdev); | 2172 | bdput(device->this_bdev); |
2181 | 2173 | ||
2182 | drbd_free_ldev(device->ldev); | 2174 | drbd_backing_dev_free(device, device->ldev); |
2183 | device->ldev = NULL; | 2175 | device->ldev = NULL; |
2184 | 2176 | ||
2185 | drbd_release_all_peer_reqs(device); | 2177 | drbd_release_all_peer_reqs(device); |
@@ -2563,7 +2555,7 @@ int set_resource_options(struct drbd_resource *resource, struct res_opts *res_op | |||
2563 | cpumask_copy(resource->cpu_mask, new_cpu_mask); | 2555 | cpumask_copy(resource->cpu_mask, new_cpu_mask); |
2564 | for_each_connection_rcu(connection, resource) { | 2556 | for_each_connection_rcu(connection, resource) { |
2565 | connection->receiver.reset_cpu_mask = 1; | 2557 | connection->receiver.reset_cpu_mask = 1; |
2566 | connection->asender.reset_cpu_mask = 1; | 2558 | connection->ack_receiver.reset_cpu_mask = 1; |
2567 | connection->worker.reset_cpu_mask = 1; | 2559 | connection->worker.reset_cpu_mask = 1; |
2568 | } | 2560 | } |
2569 | } | 2561 | } |
@@ -2590,7 +2582,7 @@ struct drbd_resource *drbd_create_resource(const char *name) | |||
2590 | kref_init(&resource->kref); | 2582 | kref_init(&resource->kref); |
2591 | idr_init(&resource->devices); | 2583 | idr_init(&resource->devices); |
2592 | INIT_LIST_HEAD(&resource->connections); | 2584 | INIT_LIST_HEAD(&resource->connections); |
2593 | resource->write_ordering = WO_bdev_flush; | 2585 | resource->write_ordering = WO_BDEV_FLUSH; |
2594 | list_add_tail_rcu(&resource->resources, &drbd_resources); | 2586 | list_add_tail_rcu(&resource->resources, &drbd_resources); |
2595 | mutex_init(&resource->conf_update); | 2587 | mutex_init(&resource->conf_update); |
2596 | mutex_init(&resource->adm_mutex); | 2588 | mutex_init(&resource->adm_mutex); |
@@ -2652,8 +2644,8 @@ struct drbd_connection *conn_create(const char *name, struct res_opts *res_opts) | |||
2652 | connection->receiver.connection = connection; | 2644 | connection->receiver.connection = connection; |
2653 | drbd_thread_init(resource, &connection->worker, drbd_worker, "worker"); | 2645 | drbd_thread_init(resource, &connection->worker, drbd_worker, "worker"); |
2654 | connection->worker.connection = connection; | 2646 | connection->worker.connection = connection; |
2655 | drbd_thread_init(resource, &connection->asender, drbd_asender, "asender"); | 2647 | drbd_thread_init(resource, &connection->ack_receiver, drbd_ack_receiver, "ack_recv"); |
2656 | connection->asender.connection = connection; | 2648 | connection->ack_receiver.connection = connection; |
2657 | 2649 | ||
2658 | kref_init(&connection->kref); | 2650 | kref_init(&connection->kref); |
2659 | 2651 | ||
@@ -2702,8 +2694,8 @@ static int init_submitter(struct drbd_device *device) | |||
2702 | { | 2694 | { |
2703 | /* opencoded create_singlethread_workqueue(), | 2695 | /* opencoded create_singlethread_workqueue(), |
2704 | * to be able to say "drbd%d", ..., minor */ | 2696 | * to be able to say "drbd%d", ..., minor */ |
2705 | device->submit.wq = alloc_workqueue("drbd%u_submit", | 2697 | device->submit.wq = |
2706 | WQ_UNBOUND | WQ_MEM_RECLAIM, 1, device->minor); | 2698 | alloc_ordered_workqueue("drbd%u_submit", WQ_MEM_RECLAIM, device->minor); |
2707 | if (!device->submit.wq) | 2699 | if (!device->submit.wq) |
2708 | return -ENOMEM; | 2700 | return -ENOMEM; |
2709 | 2701 | ||
@@ -2820,6 +2812,7 @@ enum drbd_ret_code drbd_create_device(struct drbd_config_context *adm_ctx, unsig | |||
2820 | goto out_idr_remove_from_resource; | 2812 | goto out_idr_remove_from_resource; |
2821 | } | 2813 | } |
2822 | kref_get(&connection->kref); | 2814 | kref_get(&connection->kref); |
2815 | INIT_WORK(&peer_device->send_acks_work, drbd_send_acks_wf); | ||
2823 | } | 2816 | } |
2824 | 2817 | ||
2825 | if (init_submitter(device)) { | 2818 | if (init_submitter(device)) { |
@@ -2923,7 +2916,7 @@ static int __init drbd_init(void) | |||
2923 | drbd_proc = NULL; /* play safe for drbd_cleanup */ | 2916 | drbd_proc = NULL; /* play safe for drbd_cleanup */ |
2924 | idr_init(&drbd_devices); | 2917 | idr_init(&drbd_devices); |
2925 | 2918 | ||
2926 | rwlock_init(&global_state_lock); | 2919 | mutex_init(&resources_mutex); |
2927 | INIT_LIST_HEAD(&drbd_resources); | 2920 | INIT_LIST_HEAD(&drbd_resources); |
2928 | 2921 | ||
2929 | err = drbd_genl_register(); | 2922 | err = drbd_genl_register(); |
@@ -2971,18 +2964,6 @@ fail: | |||
2971 | return err; | 2964 | return err; |
2972 | } | 2965 | } |
2973 | 2966 | ||
2974 | void drbd_free_ldev(struct drbd_backing_dev *ldev) | ||
2975 | { | ||
2976 | if (ldev == NULL) | ||
2977 | return; | ||
2978 | |||
2979 | blkdev_put(ldev->backing_bdev, FMODE_READ | FMODE_WRITE | FMODE_EXCL); | ||
2980 | blkdev_put(ldev->md_bdev, FMODE_READ | FMODE_WRITE | FMODE_EXCL); | ||
2981 | |||
2982 | kfree(ldev->disk_conf); | ||
2983 | kfree(ldev); | ||
2984 | } | ||
2985 | |||
2986 | static void drbd_free_one_sock(struct drbd_socket *ds) | 2967 | static void drbd_free_one_sock(struct drbd_socket *ds) |
2987 | { | 2968 | { |
2988 | struct socket *s; | 2969 | struct socket *s; |
@@ -3277,6 +3258,10 @@ int drbd_md_read(struct drbd_device *device, struct drbd_backing_dev *bdev) | |||
3277 | * and read it. */ | 3258 | * and read it. */ |
3278 | bdev->md.meta_dev_idx = bdev->disk_conf->meta_dev_idx; | 3259 | bdev->md.meta_dev_idx = bdev->disk_conf->meta_dev_idx; |
3279 | bdev->md.md_offset = drbd_md_ss(bdev); | 3260 | bdev->md.md_offset = drbd_md_ss(bdev); |
3261 | /* Even for (flexible or indexed) external meta data, | ||
3262 | * initially restrict us to the 4k superblock for now. | ||
3263 | * Affects the paranoia out-of-range access check in drbd_md_sync_page_io(). */ | ||
3264 | bdev->md.md_size_sect = 8; | ||
3280 | 3265 | ||
3281 | if (drbd_md_sync_page_io(device, bdev, bdev->md.md_offset, READ)) { | 3266 | if (drbd_md_sync_page_io(device, bdev, bdev->md.md_offset, READ)) { |
3282 | /* NOTE: can't do normal error processing here as this is | 3267 | /* NOTE: can't do normal error processing here as this is |
@@ -3578,7 +3563,9 @@ void drbd_queue_bitmap_io(struct drbd_device *device, | |||
3578 | 3563 | ||
3579 | spin_lock_irq(&device->resource->req_lock); | 3564 | spin_lock_irq(&device->resource->req_lock); |
3580 | set_bit(BITMAP_IO, &device->flags); | 3565 | set_bit(BITMAP_IO, &device->flags); |
3581 | if (atomic_read(&device->ap_bio_cnt) == 0) { | 3566 | /* don't wait for pending application IO if the caller indicates that |
3567 | * application IO does not conflict anyways. */ | ||
3568 | if (flags == BM_LOCKED_CHANGE_ALLOWED || atomic_read(&device->ap_bio_cnt) == 0) { | ||
3582 | if (!test_and_set_bit(BITMAP_IO_QUEUED, &device->flags)) | 3569 | if (!test_and_set_bit(BITMAP_IO_QUEUED, &device->flags)) |
3583 | drbd_queue_work(&first_peer_device(device)->connection->sender_work, | 3570 | drbd_queue_work(&first_peer_device(device)->connection->sender_work, |
3584 | &device->bm_io_work.w); | 3571 | &device->bm_io_work.w); |
@@ -3746,6 +3733,27 @@ int drbd_wait_misc(struct drbd_device *device, struct drbd_interval *i) | |||
3746 | return 0; | 3733 | return 0; |
3747 | } | 3734 | } |
3748 | 3735 | ||
3736 | void lock_all_resources(void) | ||
3737 | { | ||
3738 | struct drbd_resource *resource; | ||
3739 | int __maybe_unused i = 0; | ||
3740 | |||
3741 | mutex_lock(&resources_mutex); | ||
3742 | local_irq_disable(); | ||
3743 | for_each_resource(resource, &drbd_resources) | ||
3744 | spin_lock_nested(&resource->req_lock, i++); | ||
3745 | } | ||
3746 | |||
3747 | void unlock_all_resources(void) | ||
3748 | { | ||
3749 | struct drbd_resource *resource; | ||
3750 | |||
3751 | for_each_resource(resource, &drbd_resources) | ||
3752 | spin_unlock(&resource->req_lock); | ||
3753 | local_irq_enable(); | ||
3754 | mutex_unlock(&resources_mutex); | ||
3755 | } | ||
3756 | |||
3749 | #ifdef CONFIG_DRBD_FAULT_INJECTION | 3757 | #ifdef CONFIG_DRBD_FAULT_INJECTION |
3750 | /* Fault insertion support including random number generator shamelessly | 3758 | /* Fault insertion support including random number generator shamelessly |
3751 | * stolen from kernel/rcutorture.c */ | 3759 | * stolen from kernel/rcutorture.c */ |
diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c index e80cbefbc2b5..c055c5e12f24 100644 --- a/drivers/block/drbd/drbd_nl.c +++ b/drivers/block/drbd/drbd_nl.c | |||
@@ -36,6 +36,7 @@ | |||
36 | #include "drbd_int.h" | 36 | #include "drbd_int.h" |
37 | #include "drbd_protocol.h" | 37 | #include "drbd_protocol.h" |
38 | #include "drbd_req.h" | 38 | #include "drbd_req.h" |
39 | #include "drbd_state_change.h" | ||
39 | #include <asm/unaligned.h> | 40 | #include <asm/unaligned.h> |
40 | #include <linux/drbd_limits.h> | 41 | #include <linux/drbd_limits.h> |
41 | #include <linux/kthread.h> | 42 | #include <linux/kthread.h> |
@@ -75,11 +76,24 @@ int drbd_adm_get_status(struct sk_buff *skb, struct genl_info *info); | |||
75 | int drbd_adm_get_timeout_type(struct sk_buff *skb, struct genl_info *info); | 76 | int drbd_adm_get_timeout_type(struct sk_buff *skb, struct genl_info *info); |
76 | /* .dumpit */ | 77 | /* .dumpit */ |
77 | int drbd_adm_get_status_all(struct sk_buff *skb, struct netlink_callback *cb); | 78 | int drbd_adm_get_status_all(struct sk_buff *skb, struct netlink_callback *cb); |
79 | int drbd_adm_dump_resources(struct sk_buff *skb, struct netlink_callback *cb); | ||
80 | int drbd_adm_dump_devices(struct sk_buff *skb, struct netlink_callback *cb); | ||
81 | int drbd_adm_dump_devices_done(struct netlink_callback *cb); | ||
82 | int drbd_adm_dump_connections(struct sk_buff *skb, struct netlink_callback *cb); | ||
83 | int drbd_adm_dump_connections_done(struct netlink_callback *cb); | ||
84 | int drbd_adm_dump_peer_devices(struct sk_buff *skb, struct netlink_callback *cb); | ||
85 | int drbd_adm_dump_peer_devices_done(struct netlink_callback *cb); | ||
86 | int drbd_adm_get_initial_state(struct sk_buff *skb, struct netlink_callback *cb); | ||
78 | 87 | ||
79 | #include <linux/drbd_genl_api.h> | 88 | #include <linux/drbd_genl_api.h> |
80 | #include "drbd_nla.h" | 89 | #include "drbd_nla.h" |
81 | #include <linux/genl_magic_func.h> | 90 | #include <linux/genl_magic_func.h> |
82 | 91 | ||
92 | static atomic_t drbd_genl_seq = ATOMIC_INIT(2); /* two. */ | ||
93 | static atomic_t notify_genl_seq = ATOMIC_INIT(2); /* two. */ | ||
94 | |||
95 | DEFINE_MUTEX(notification_mutex); | ||
96 | |||
83 | /* used blkdev_get_by_path, to claim our meta data device(s) */ | 97 | /* used blkdev_get_by_path, to claim our meta data device(s) */ |
84 | static char *drbd_m_holder = "Hands off! this is DRBD's meta data device."; | 98 | static char *drbd_m_holder = "Hands off! this is DRBD's meta data device."; |
85 | 99 | ||
@@ -349,6 +363,7 @@ int drbd_khelper(struct drbd_device *device, char *cmd) | |||
349 | sib.sib_reason = SIB_HELPER_PRE; | 363 | sib.sib_reason = SIB_HELPER_PRE; |
350 | sib.helper_name = cmd; | 364 | sib.helper_name = cmd; |
351 | drbd_bcast_event(device, &sib); | 365 | drbd_bcast_event(device, &sib); |
366 | notify_helper(NOTIFY_CALL, device, connection, cmd, 0); | ||
352 | ret = call_usermodehelper(usermode_helper, argv, envp, UMH_WAIT_PROC); | 367 | ret = call_usermodehelper(usermode_helper, argv, envp, UMH_WAIT_PROC); |
353 | if (ret) | 368 | if (ret) |
354 | drbd_warn(device, "helper command: %s %s %s exit code %u (0x%x)\n", | 369 | drbd_warn(device, "helper command: %s %s %s exit code %u (0x%x)\n", |
@@ -361,6 +376,7 @@ int drbd_khelper(struct drbd_device *device, char *cmd) | |||
361 | sib.sib_reason = SIB_HELPER_POST; | 376 | sib.sib_reason = SIB_HELPER_POST; |
362 | sib.helper_exit_code = ret; | 377 | sib.helper_exit_code = ret; |
363 | drbd_bcast_event(device, &sib); | 378 | drbd_bcast_event(device, &sib); |
379 | notify_helper(NOTIFY_RESPONSE, device, connection, cmd, ret); | ||
364 | 380 | ||
365 | if (current == connection->worker.task) | 381 | if (current == connection->worker.task) |
366 | clear_bit(CALLBACK_PENDING, &connection->flags); | 382 | clear_bit(CALLBACK_PENDING, &connection->flags); |
@@ -388,6 +404,7 @@ static int conn_khelper(struct drbd_connection *connection, char *cmd) | |||
388 | 404 | ||
389 | drbd_info(connection, "helper command: %s %s %s\n", usermode_helper, cmd, resource_name); | 405 | drbd_info(connection, "helper command: %s %s %s\n", usermode_helper, cmd, resource_name); |
390 | /* TODO: conn_bcast_event() ?? */ | 406 | /* TODO: conn_bcast_event() ?? */ |
407 | notify_helper(NOTIFY_CALL, NULL, connection, cmd, 0); | ||
391 | 408 | ||
392 | ret = call_usermodehelper(usermode_helper, argv, envp, UMH_WAIT_PROC); | 409 | ret = call_usermodehelper(usermode_helper, argv, envp, UMH_WAIT_PROC); |
393 | if (ret) | 410 | if (ret) |
@@ -399,6 +416,7 @@ static int conn_khelper(struct drbd_connection *connection, char *cmd) | |||
399 | usermode_helper, cmd, resource_name, | 416 | usermode_helper, cmd, resource_name, |
400 | (ret >> 8) & 0xff, ret); | 417 | (ret >> 8) & 0xff, ret); |
401 | /* TODO: conn_bcast_event() ?? */ | 418 | /* TODO: conn_bcast_event() ?? */ |
419 | notify_helper(NOTIFY_RESPONSE, NULL, connection, cmd, ret); | ||
402 | 420 | ||
403 | if (ret < 0) /* Ignore any ERRNOs we got. */ | 421 | if (ret < 0) /* Ignore any ERRNOs we got. */ |
404 | ret = 0; | 422 | ret = 0; |
@@ -847,9 +865,11 @@ char *ppsize(char *buf, unsigned long long size) | |||
847 | * and can be long lived. | 865 | * and can be long lived. |
848 | * This changes an device->flag, is triggered by drbd internals, | 866 | * This changes an device->flag, is triggered by drbd internals, |
849 | * and should be short-lived. */ | 867 | * and should be short-lived. */ |
868 | /* It needs to be a counter, since multiple threads might | ||
869 | independently suspend and resume IO. */ | ||
850 | void drbd_suspend_io(struct drbd_device *device) | 870 | void drbd_suspend_io(struct drbd_device *device) |
851 | { | 871 | { |
852 | set_bit(SUSPEND_IO, &device->flags); | 872 | atomic_inc(&device->suspend_cnt); |
853 | if (drbd_suspended(device)) | 873 | if (drbd_suspended(device)) |
854 | return; | 874 | return; |
855 | wait_event(device->misc_wait, !atomic_read(&device->ap_bio_cnt)); | 875 | wait_event(device->misc_wait, !atomic_read(&device->ap_bio_cnt)); |
@@ -857,8 +877,8 @@ void drbd_suspend_io(struct drbd_device *device) | |||
857 | 877 | ||
858 | void drbd_resume_io(struct drbd_device *device) | 878 | void drbd_resume_io(struct drbd_device *device) |
859 | { | 879 | { |
860 | clear_bit(SUSPEND_IO, &device->flags); | 880 | if (atomic_dec_and_test(&device->suspend_cnt)) |
861 | wake_up(&device->misc_wait); | 881 | wake_up(&device->misc_wait); |
862 | } | 882 | } |
863 | 883 | ||
864 | /** | 884 | /** |
@@ -871,27 +891,32 @@ void drbd_resume_io(struct drbd_device *device) | |||
871 | enum determine_dev_size | 891 | enum determine_dev_size |
872 | drbd_determine_dev_size(struct drbd_device *device, enum dds_flags flags, struct resize_parms *rs) __must_hold(local) | 892 | drbd_determine_dev_size(struct drbd_device *device, enum dds_flags flags, struct resize_parms *rs) __must_hold(local) |
873 | { | 893 | { |
874 | sector_t prev_first_sect, prev_size; /* previous meta location */ | 894 | struct md_offsets_and_sizes { |
875 | sector_t la_size_sect, u_size; | 895 | u64 last_agreed_sect; |
896 | u64 md_offset; | ||
897 | s32 al_offset; | ||
898 | s32 bm_offset; | ||
899 | u32 md_size_sect; | ||
900 | |||
901 | u32 al_stripes; | ||
902 | u32 al_stripe_size_4k; | ||
903 | } prev; | ||
904 | sector_t u_size, size; | ||
876 | struct drbd_md *md = &device->ldev->md; | 905 | struct drbd_md *md = &device->ldev->md; |
877 | u32 prev_al_stripe_size_4k; | ||
878 | u32 prev_al_stripes; | ||
879 | sector_t size; | ||
880 | char ppb[10]; | 906 | char ppb[10]; |
881 | void *buffer; | 907 | void *buffer; |
882 | 908 | ||
883 | int md_moved, la_size_changed; | 909 | int md_moved, la_size_changed; |
884 | enum determine_dev_size rv = DS_UNCHANGED; | 910 | enum determine_dev_size rv = DS_UNCHANGED; |
885 | 911 | ||
886 | /* race: | 912 | /* We may change the on-disk offsets of our meta data below. Lock out |
887 | * application request passes inc_ap_bio, | 913 | * anything that may cause meta data IO, to avoid acting on incomplete |
888 | * but then cannot get an AL-reference. | 914 | * layout changes or scribbling over meta data that is in the process |
889 | * this function later may wait on ap_bio_cnt == 0. -> deadlock. | 915 | * of being moved. |
890 | * | 916 | * |
891 | * to avoid that: | 917 | * Move is not exactly correct, btw, currently we have all our meta |
892 | * Suspend IO right here. | 918 | * data in core memory, to "move" it we just write it all out, there |
893 | * still lock the act_log to not trigger ASSERTs there. | 919 | * are no reads. */ |
894 | */ | ||
895 | drbd_suspend_io(device); | 920 | drbd_suspend_io(device); |
896 | buffer = drbd_md_get_buffer(device, __func__); /* Lock meta-data IO */ | 921 | buffer = drbd_md_get_buffer(device, __func__); /* Lock meta-data IO */ |
897 | if (!buffer) { | 922 | if (!buffer) { |
@@ -899,19 +924,17 @@ drbd_determine_dev_size(struct drbd_device *device, enum dds_flags flags, struct | |||
899 | return DS_ERROR; | 924 | return DS_ERROR; |
900 | } | 925 | } |
901 | 926 | ||
902 | /* no wait necessary anymore, actually we could assert that */ | 927 | /* remember current offset and sizes */ |
903 | wait_event(device->al_wait, lc_try_lock(device->act_log)); | 928 | prev.last_agreed_sect = md->la_size_sect; |
904 | 929 | prev.md_offset = md->md_offset; | |
905 | prev_first_sect = drbd_md_first_sector(device->ldev); | 930 | prev.al_offset = md->al_offset; |
906 | prev_size = device->ldev->md.md_size_sect; | 931 | prev.bm_offset = md->bm_offset; |
907 | la_size_sect = device->ldev->md.la_size_sect; | 932 | prev.md_size_sect = md->md_size_sect; |
933 | prev.al_stripes = md->al_stripes; | ||
934 | prev.al_stripe_size_4k = md->al_stripe_size_4k; | ||
908 | 935 | ||
909 | if (rs) { | 936 | if (rs) { |
910 | /* rs is non NULL if we should change the AL layout only */ | 937 | /* rs is non NULL if we should change the AL layout only */ |
911 | |||
912 | prev_al_stripes = md->al_stripes; | ||
913 | prev_al_stripe_size_4k = md->al_stripe_size_4k; | ||
914 | |||
915 | md->al_stripes = rs->al_stripes; | 938 | md->al_stripes = rs->al_stripes; |
916 | md->al_stripe_size_4k = rs->al_stripe_size / 4; | 939 | md->al_stripe_size_4k = rs->al_stripe_size / 4; |
917 | md->al_size_4k = (u64)rs->al_stripes * rs->al_stripe_size / 4; | 940 | md->al_size_4k = (u64)rs->al_stripes * rs->al_stripe_size / 4; |
@@ -924,7 +947,7 @@ drbd_determine_dev_size(struct drbd_device *device, enum dds_flags flags, struct | |||
924 | rcu_read_unlock(); | 947 | rcu_read_unlock(); |
925 | size = drbd_new_dev_size(device, device->ldev, u_size, flags & DDSF_FORCED); | 948 | size = drbd_new_dev_size(device, device->ldev, u_size, flags & DDSF_FORCED); |
926 | 949 | ||
927 | if (size < la_size_sect) { | 950 | if (size < prev.last_agreed_sect) { |
928 | if (rs && u_size == 0) { | 951 | if (rs && u_size == 0) { |
929 | /* Remove "rs &&" later. This check should always be active, but | 952 | /* Remove "rs &&" later. This check should always be active, but |
930 | right now the receiver expects the permissive behavior */ | 953 | right now the receiver expects the permissive behavior */ |
@@ -945,30 +968,29 @@ drbd_determine_dev_size(struct drbd_device *device, enum dds_flags flags, struct | |||
945 | err = drbd_bm_resize(device, size, !(flags & DDSF_NO_RESYNC)); | 968 | err = drbd_bm_resize(device, size, !(flags & DDSF_NO_RESYNC)); |
946 | if (unlikely(err)) { | 969 | if (unlikely(err)) { |
947 | /* currently there is only one error: ENOMEM! */ | 970 | /* currently there is only one error: ENOMEM! */ |
948 | size = drbd_bm_capacity(device)>>1; | 971 | size = drbd_bm_capacity(device); |
949 | if (size == 0) { | 972 | if (size == 0) { |
950 | drbd_err(device, "OUT OF MEMORY! " | 973 | drbd_err(device, "OUT OF MEMORY! " |
951 | "Could not allocate bitmap!\n"); | 974 | "Could not allocate bitmap!\n"); |
952 | } else { | 975 | } else { |
953 | drbd_err(device, "BM resizing failed. " | 976 | drbd_err(device, "BM resizing failed. " |
954 | "Leaving size unchanged at size = %lu KB\n", | 977 | "Leaving size unchanged\n"); |
955 | (unsigned long)size); | ||
956 | } | 978 | } |
957 | rv = DS_ERROR; | 979 | rv = DS_ERROR; |
958 | } | 980 | } |
959 | /* racy, see comments above. */ | 981 | /* racy, see comments above. */ |
960 | drbd_set_my_capacity(device, size); | 982 | drbd_set_my_capacity(device, size); |
961 | device->ldev->md.la_size_sect = size; | 983 | md->la_size_sect = size; |
962 | drbd_info(device, "size = %s (%llu KB)\n", ppsize(ppb, size>>1), | 984 | drbd_info(device, "size = %s (%llu KB)\n", ppsize(ppb, size>>1), |
963 | (unsigned long long)size>>1); | 985 | (unsigned long long)size>>1); |
964 | } | 986 | } |
965 | if (rv <= DS_ERROR) | 987 | if (rv <= DS_ERROR) |
966 | goto err_out; | 988 | goto err_out; |
967 | 989 | ||
968 | la_size_changed = (la_size_sect != device->ldev->md.la_size_sect); | 990 | la_size_changed = (prev.last_agreed_sect != md->la_size_sect); |
969 | 991 | ||
970 | md_moved = prev_first_sect != drbd_md_first_sector(device->ldev) | 992 | md_moved = prev.md_offset != md->md_offset |
971 | || prev_size != device->ldev->md.md_size_sect; | 993 | || prev.md_size_sect != md->md_size_sect; |
972 | 994 | ||
973 | if (la_size_changed || md_moved || rs) { | 995 | if (la_size_changed || md_moved || rs) { |
974 | u32 prev_flags; | 996 | u32 prev_flags; |
@@ -977,20 +999,29 @@ drbd_determine_dev_size(struct drbd_device *device, enum dds_flags flags, struct | |||
977 | * Clear the timer, to avoid scary "timer expired!" messages, | 999 | * Clear the timer, to avoid scary "timer expired!" messages, |
978 | * "Superblock" is written out at least twice below, anyways. */ | 1000 | * "Superblock" is written out at least twice below, anyways. */ |
979 | del_timer(&device->md_sync_timer); | 1001 | del_timer(&device->md_sync_timer); |
980 | drbd_al_shrink(device); /* All extents inactive. */ | ||
981 | 1002 | ||
1003 | /* We won't change the "al-extents" setting, we just may need | ||
1004 | * to move the on-disk location of the activity log ringbuffer. | ||
1005 | * Lock for transaction is good enough, it may well be "dirty" | ||
1006 | * or even "starving". */ | ||
1007 | wait_event(device->al_wait, lc_try_lock_for_transaction(device->act_log)); | ||
1008 | |||
1009 | /* mark current on-disk bitmap and activity log as unreliable */ | ||
982 | prev_flags = md->flags; | 1010 | prev_flags = md->flags; |
983 | md->flags &= ~MDF_PRIMARY_IND; | 1011 | md->flags |= MDF_FULL_SYNC | MDF_AL_DISABLED; |
984 | drbd_md_write(device, buffer); | 1012 | drbd_md_write(device, buffer); |
985 | 1013 | ||
1014 | drbd_al_initialize(device, buffer); | ||
1015 | |||
986 | drbd_info(device, "Writing the whole bitmap, %s\n", | 1016 | drbd_info(device, "Writing the whole bitmap, %s\n", |
987 | la_size_changed && md_moved ? "size changed and md moved" : | 1017 | la_size_changed && md_moved ? "size changed and md moved" : |
988 | la_size_changed ? "size changed" : "md moved"); | 1018 | la_size_changed ? "size changed" : "md moved"); |
989 | /* next line implicitly does drbd_suspend_io()+drbd_resume_io() */ | 1019 | /* next line implicitly does drbd_suspend_io()+drbd_resume_io() */ |
990 | drbd_bitmap_io(device, md_moved ? &drbd_bm_write_all : &drbd_bm_write, | 1020 | drbd_bitmap_io(device, md_moved ? &drbd_bm_write_all : &drbd_bm_write, |
991 | "size changed", BM_LOCKED_MASK); | 1021 | "size changed", BM_LOCKED_MASK); |
992 | drbd_initialize_al(device, buffer); | ||
993 | 1022 | ||
1023 | /* on-disk bitmap and activity log is authoritative again | ||
1024 | * (unless there was an IO error meanwhile...) */ | ||
994 | md->flags = prev_flags; | 1025 | md->flags = prev_flags; |
995 | drbd_md_write(device, buffer); | 1026 | drbd_md_write(device, buffer); |
996 | 1027 | ||
@@ -999,20 +1030,22 @@ drbd_determine_dev_size(struct drbd_device *device, enum dds_flags flags, struct | |||
999 | md->al_stripes, md->al_stripe_size_4k * 4); | 1030 | md->al_stripes, md->al_stripe_size_4k * 4); |
1000 | } | 1031 | } |
1001 | 1032 | ||
1002 | if (size > la_size_sect) | 1033 | if (size > prev.last_agreed_sect) |
1003 | rv = la_size_sect ? DS_GREW : DS_GREW_FROM_ZERO; | 1034 | rv = prev.last_agreed_sect ? DS_GREW : DS_GREW_FROM_ZERO; |
1004 | if (size < la_size_sect) | 1035 | if (size < prev.last_agreed_sect) |
1005 | rv = DS_SHRUNK; | 1036 | rv = DS_SHRUNK; |
1006 | 1037 | ||
1007 | if (0) { | 1038 | if (0) { |
1008 | err_out: | 1039 | err_out: |
1009 | if (rs) { | 1040 | /* restore previous offset and sizes */ |
1010 | md->al_stripes = prev_al_stripes; | 1041 | md->la_size_sect = prev.last_agreed_sect; |
1011 | md->al_stripe_size_4k = prev_al_stripe_size_4k; | 1042 | md->md_offset = prev.md_offset; |
1012 | md->al_size_4k = (u64)prev_al_stripes * prev_al_stripe_size_4k; | 1043 | md->al_offset = prev.al_offset; |
1013 | 1044 | md->bm_offset = prev.bm_offset; | |
1014 | drbd_md_set_sector_offsets(device, device->ldev); | 1045 | md->md_size_sect = prev.md_size_sect; |
1015 | } | 1046 | md->al_stripes = prev.al_stripes; |
1047 | md->al_stripe_size_4k = prev.al_stripe_size_4k; | ||
1048 | md->al_size_4k = (u64)prev.al_stripes * prev.al_stripe_size_4k; | ||
1016 | } | 1049 | } |
1017 | lc_unlock(device->act_log); | 1050 | lc_unlock(device->act_log); |
1018 | wake_up(&device->al_wait); | 1051 | wake_up(&device->al_wait); |
@@ -1115,8 +1148,7 @@ static int drbd_check_al_size(struct drbd_device *device, struct disk_conf *dc) | |||
1115 | lc_destroy(n); | 1148 | lc_destroy(n); |
1116 | return -EBUSY; | 1149 | return -EBUSY; |
1117 | } else { | 1150 | } else { |
1118 | if (t) | 1151 | lc_destroy(t); |
1119 | lc_destroy(t); | ||
1120 | } | 1152 | } |
1121 | drbd_md_mark_dirty(device); /* we changed device->act_log->nr_elemens */ | 1153 | drbd_md_mark_dirty(device); /* we changed device->act_log->nr_elemens */ |
1122 | return 0; | 1154 | return 0; |
@@ -1151,21 +1183,20 @@ static void drbd_setup_queue_param(struct drbd_device *device, struct drbd_backi | |||
1151 | if (b) { | 1183 | if (b) { |
1152 | struct drbd_connection *connection = first_peer_device(device)->connection; | 1184 | struct drbd_connection *connection = first_peer_device(device)->connection; |
1153 | 1185 | ||
1186 | blk_queue_max_discard_sectors(q, DRBD_MAX_DISCARD_SECTORS); | ||
1187 | |||
1154 | if (blk_queue_discard(b) && | 1188 | if (blk_queue_discard(b) && |
1155 | (connection->cstate < C_CONNECTED || connection->agreed_features & FF_TRIM)) { | 1189 | (connection->cstate < C_CONNECTED || connection->agreed_features & FF_TRIM)) { |
1156 | /* For now, don't allow more than one activity log extent worth of data | 1190 | /* We don't care, stacking below should fix it for the local device. |
1157 | * to be discarded in one go. We may need to rework drbd_al_begin_io() | 1191 | * Whether or not it is a suitable granularity on the remote device |
1158 | * to allow for even larger discard ranges */ | 1192 | * is not our problem, really. If you care, you need to |
1159 | blk_queue_max_discard_sectors(q, DRBD_MAX_DISCARD_SECTORS); | 1193 | * use devices with similar topology on all peers. */ |
1160 | 1194 | q->limits.discard_granularity = 512; | |
1161 | queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, q); | 1195 | queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, q); |
1162 | /* REALLY? Is stacking secdiscard "legal"? */ | ||
1163 | if (blk_queue_secdiscard(b)) | ||
1164 | queue_flag_set_unlocked(QUEUE_FLAG_SECDISCARD, q); | ||
1165 | } else { | 1196 | } else { |
1166 | blk_queue_max_discard_sectors(q, 0); | 1197 | blk_queue_max_discard_sectors(q, 0); |
1167 | queue_flag_clear_unlocked(QUEUE_FLAG_DISCARD, q); | 1198 | queue_flag_clear_unlocked(QUEUE_FLAG_DISCARD, q); |
1168 | queue_flag_clear_unlocked(QUEUE_FLAG_SECDISCARD, q); | 1199 | q->limits.discard_granularity = 0; |
1169 | } | 1200 | } |
1170 | 1201 | ||
1171 | blk_queue_stack_limits(q, b); | 1202 | blk_queue_stack_limits(q, b); |
@@ -1177,6 +1208,12 @@ static void drbd_setup_queue_param(struct drbd_device *device, struct drbd_backi | |||
1177 | q->backing_dev_info.ra_pages = b->backing_dev_info.ra_pages; | 1208 | q->backing_dev_info.ra_pages = b->backing_dev_info.ra_pages; |
1178 | } | 1209 | } |
1179 | } | 1210 | } |
1211 | /* To avoid confusion, if this queue does not support discard, clear | ||
1212 | * max_discard_sectors, which is what lsblk -D reports to the user. */ | ||
1213 | if (!blk_queue_discard(q)) { | ||
1214 | blk_queue_max_discard_sectors(q, 0); | ||
1215 | q->limits.discard_granularity = 0; | ||
1216 | } | ||
1180 | } | 1217 | } |
1181 | 1218 | ||
1182 | void drbd_reconsider_max_bio_size(struct drbd_device *device, struct drbd_backing_dev *bdev) | 1219 | void drbd_reconsider_max_bio_size(struct drbd_device *device, struct drbd_backing_dev *bdev) |
@@ -1241,8 +1278,8 @@ static void conn_reconfig_done(struct drbd_connection *connection) | |||
1241 | connection->cstate == C_STANDALONE; | 1278 | connection->cstate == C_STANDALONE; |
1242 | spin_unlock_irq(&connection->resource->req_lock); | 1279 | spin_unlock_irq(&connection->resource->req_lock); |
1243 | if (stop_threads) { | 1280 | if (stop_threads) { |
1244 | /* asender is implicitly stopped by receiver | 1281 | /* ack_receiver thread and ack_sender workqueue are implicitly |
1245 | * in conn_disconnect() */ | 1282 | * stopped by receiver in conn_disconnect() */ |
1246 | drbd_thread_stop(&connection->receiver); | 1283 | drbd_thread_stop(&connection->receiver); |
1247 | drbd_thread_stop(&connection->worker); | 1284 | drbd_thread_stop(&connection->worker); |
1248 | } | 1285 | } |
@@ -1389,13 +1426,13 @@ int drbd_adm_disk_opts(struct sk_buff *skb, struct genl_info *info) | |||
1389 | goto fail_unlock; | 1426 | goto fail_unlock; |
1390 | } | 1427 | } |
1391 | 1428 | ||
1392 | write_lock_irq(&global_state_lock); | 1429 | lock_all_resources(); |
1393 | retcode = drbd_resync_after_valid(device, new_disk_conf->resync_after); | 1430 | retcode = drbd_resync_after_valid(device, new_disk_conf->resync_after); |
1394 | if (retcode == NO_ERROR) { | 1431 | if (retcode == NO_ERROR) { |
1395 | rcu_assign_pointer(device->ldev->disk_conf, new_disk_conf); | 1432 | rcu_assign_pointer(device->ldev->disk_conf, new_disk_conf); |
1396 | drbd_resync_after_changed(device); | 1433 | drbd_resync_after_changed(device); |
1397 | } | 1434 | } |
1398 | write_unlock_irq(&global_state_lock); | 1435 | unlock_all_resources(); |
1399 | 1436 | ||
1400 | if (retcode != NO_ERROR) | 1437 | if (retcode != NO_ERROR) |
1401 | goto fail_unlock; | 1438 | goto fail_unlock; |
@@ -1418,7 +1455,7 @@ int drbd_adm_disk_opts(struct sk_buff *skb, struct genl_info *info) | |||
1418 | set_bit(MD_NO_FUA, &device->flags); | 1455 | set_bit(MD_NO_FUA, &device->flags); |
1419 | 1456 | ||
1420 | if (write_ordering_changed(old_disk_conf, new_disk_conf)) | 1457 | if (write_ordering_changed(old_disk_conf, new_disk_conf)) |
1421 | drbd_bump_write_ordering(device->resource, NULL, WO_bdev_flush); | 1458 | drbd_bump_write_ordering(device->resource, NULL, WO_BDEV_FLUSH); |
1422 | 1459 | ||
1423 | drbd_md_sync(device); | 1460 | drbd_md_sync(device); |
1424 | 1461 | ||
@@ -1449,6 +1486,88 @@ success: | |||
1449 | return 0; | 1486 | return 0; |
1450 | } | 1487 | } |
1451 | 1488 | ||
1489 | static struct block_device *open_backing_dev(struct drbd_device *device, | ||
1490 | const char *bdev_path, void *claim_ptr, bool do_bd_link) | ||
1491 | { | ||
1492 | struct block_device *bdev; | ||
1493 | int err = 0; | ||
1494 | |||
1495 | bdev = blkdev_get_by_path(bdev_path, | ||
1496 | FMODE_READ | FMODE_WRITE | FMODE_EXCL, claim_ptr); | ||
1497 | if (IS_ERR(bdev)) { | ||
1498 | drbd_err(device, "open(\"%s\") failed with %ld\n", | ||
1499 | bdev_path, PTR_ERR(bdev)); | ||
1500 | return bdev; | ||
1501 | } | ||
1502 | |||
1503 | if (!do_bd_link) | ||
1504 | return bdev; | ||
1505 | |||
1506 | err = bd_link_disk_holder(bdev, device->vdisk); | ||
1507 | if (err) { | ||
1508 | blkdev_put(bdev, FMODE_READ | FMODE_WRITE | FMODE_EXCL); | ||
1509 | drbd_err(device, "bd_link_disk_holder(\"%s\", ...) failed with %d\n", | ||
1510 | bdev_path, err); | ||
1511 | bdev = ERR_PTR(err); | ||
1512 | } | ||
1513 | return bdev; | ||
1514 | } | ||
1515 | |||
1516 | static int open_backing_devices(struct drbd_device *device, | ||
1517 | struct disk_conf *new_disk_conf, | ||
1518 | struct drbd_backing_dev *nbc) | ||
1519 | { | ||
1520 | struct block_device *bdev; | ||
1521 | |||
1522 | bdev = open_backing_dev(device, new_disk_conf->backing_dev, device, true); | ||
1523 | if (IS_ERR(bdev)) | ||
1524 | return ERR_OPEN_DISK; | ||
1525 | nbc->backing_bdev = bdev; | ||
1526 | |||
1527 | /* | ||
1528 | * meta_dev_idx >= 0: external fixed size, possibly multiple | ||
1529 | * drbd sharing one meta device. TODO in that case, paranoia | ||
1530 | * check that [md_bdev, meta_dev_idx] is not yet used by some | ||
1531 | * other drbd minor! (if you use drbd.conf + drbdadm, that | ||
1532 | * should check it for you already; but if you don't, or | ||
1533 | * someone fooled it, we need to double check here) | ||
1534 | */ | ||
1535 | bdev = open_backing_dev(device, new_disk_conf->meta_dev, | ||
1536 | /* claim ptr: device, if claimed exclusively; shared drbd_m_holder, | ||
1537 | * if potentially shared with other drbd minors */ | ||
1538 | (new_disk_conf->meta_dev_idx < 0) ? (void*)device : (void*)drbd_m_holder, | ||
1539 | /* avoid double bd_claim_by_disk() for the same (source,target) tuple, | ||
1540 | * as would happen with internal metadata. */ | ||
1541 | (new_disk_conf->meta_dev_idx != DRBD_MD_INDEX_FLEX_INT && | ||
1542 | new_disk_conf->meta_dev_idx != DRBD_MD_INDEX_INTERNAL)); | ||
1543 | if (IS_ERR(bdev)) | ||
1544 | return ERR_OPEN_MD_DISK; | ||
1545 | nbc->md_bdev = bdev; | ||
1546 | return NO_ERROR; | ||
1547 | } | ||
1548 | |||
1549 | static void close_backing_dev(struct drbd_device *device, struct block_device *bdev, | ||
1550 | bool do_bd_unlink) | ||
1551 | { | ||
1552 | if (!bdev) | ||
1553 | return; | ||
1554 | if (do_bd_unlink) | ||
1555 | bd_unlink_disk_holder(bdev, device->vdisk); | ||
1556 | blkdev_put(bdev, FMODE_READ | FMODE_WRITE | FMODE_EXCL); | ||
1557 | } | ||
1558 | |||
1559 | void drbd_backing_dev_free(struct drbd_device *device, struct drbd_backing_dev *ldev) | ||
1560 | { | ||
1561 | if (ldev == NULL) | ||
1562 | return; | ||
1563 | |||
1564 | close_backing_dev(device, ldev->md_bdev, ldev->md_bdev != ldev->backing_bdev); | ||
1565 | close_backing_dev(device, ldev->backing_bdev, true); | ||
1566 | |||
1567 | kfree(ldev->disk_conf); | ||
1568 | kfree(ldev); | ||
1569 | } | ||
1570 | |||
1452 | int drbd_adm_attach(struct sk_buff *skb, struct genl_info *info) | 1571 | int drbd_adm_attach(struct sk_buff *skb, struct genl_info *info) |
1453 | { | 1572 | { |
1454 | struct drbd_config_context adm_ctx; | 1573 | struct drbd_config_context adm_ctx; |
@@ -1462,7 +1581,6 @@ int drbd_adm_attach(struct sk_buff *skb, struct genl_info *info) | |||
1462 | sector_t min_md_device_sectors; | 1581 | sector_t min_md_device_sectors; |
1463 | struct drbd_backing_dev *nbc = NULL; /* new_backing_conf */ | 1582 | struct drbd_backing_dev *nbc = NULL; /* new_backing_conf */ |
1464 | struct disk_conf *new_disk_conf = NULL; | 1583 | struct disk_conf *new_disk_conf = NULL; |
1465 | struct block_device *bdev; | ||
1466 | struct lru_cache *resync_lru = NULL; | 1584 | struct lru_cache *resync_lru = NULL; |
1467 | struct fifo_buffer *new_plan = NULL; | 1585 | struct fifo_buffer *new_plan = NULL; |
1468 | union drbd_state ns, os; | 1586 | union drbd_state ns, os; |
@@ -1478,7 +1596,7 @@ int drbd_adm_attach(struct sk_buff *skb, struct genl_info *info) | |||
1478 | device = adm_ctx.device; | 1596 | device = adm_ctx.device; |
1479 | mutex_lock(&adm_ctx.resource->adm_mutex); | 1597 | mutex_lock(&adm_ctx.resource->adm_mutex); |
1480 | peer_device = first_peer_device(device); | 1598 | peer_device = first_peer_device(device); |
1481 | connection = peer_device ? peer_device->connection : NULL; | 1599 | connection = peer_device->connection; |
1482 | conn_reconfig_start(connection); | 1600 | conn_reconfig_start(connection); |
1483 | 1601 | ||
1484 | /* if you want to reconfigure, please tear down first */ | 1602 | /* if you want to reconfigure, please tear down first */ |
@@ -1539,12 +1657,6 @@ int drbd_adm_attach(struct sk_buff *skb, struct genl_info *info) | |||
1539 | goto fail; | 1657 | goto fail; |
1540 | } | 1658 | } |
1541 | 1659 | ||
1542 | write_lock_irq(&global_state_lock); | ||
1543 | retcode = drbd_resync_after_valid(device, new_disk_conf->resync_after); | ||
1544 | write_unlock_irq(&global_state_lock); | ||
1545 | if (retcode != NO_ERROR) | ||
1546 | goto fail; | ||
1547 | |||
1548 | rcu_read_lock(); | 1660 | rcu_read_lock(); |
1549 | nc = rcu_dereference(connection->net_conf); | 1661 | nc = rcu_dereference(connection->net_conf); |
1550 | if (nc) { | 1662 | if (nc) { |
@@ -1556,35 +1668,9 @@ int drbd_adm_attach(struct sk_buff *skb, struct genl_info *info) | |||
1556 | } | 1668 | } |
1557 | rcu_read_unlock(); | 1669 | rcu_read_unlock(); |
1558 | 1670 | ||
1559 | bdev = blkdev_get_by_path(new_disk_conf->backing_dev, | 1671 | retcode = open_backing_devices(device, new_disk_conf, nbc); |
1560 | FMODE_READ | FMODE_WRITE | FMODE_EXCL, device); | 1672 | if (retcode != NO_ERROR) |
1561 | if (IS_ERR(bdev)) { | ||
1562 | drbd_err(device, "open(\"%s\") failed with %ld\n", new_disk_conf->backing_dev, | ||
1563 | PTR_ERR(bdev)); | ||
1564 | retcode = ERR_OPEN_DISK; | ||
1565 | goto fail; | ||
1566 | } | ||
1567 | nbc->backing_bdev = bdev; | ||
1568 | |||
1569 | /* | ||
1570 | * meta_dev_idx >= 0: external fixed size, possibly multiple | ||
1571 | * drbd sharing one meta device. TODO in that case, paranoia | ||
1572 | * check that [md_bdev, meta_dev_idx] is not yet used by some | ||
1573 | * other drbd minor! (if you use drbd.conf + drbdadm, that | ||
1574 | * should check it for you already; but if you don't, or | ||
1575 | * someone fooled it, we need to double check here) | ||
1576 | */ | ||
1577 | bdev = blkdev_get_by_path(new_disk_conf->meta_dev, | ||
1578 | FMODE_READ | FMODE_WRITE | FMODE_EXCL, | ||
1579 | (new_disk_conf->meta_dev_idx < 0) ? | ||
1580 | (void *)device : (void *)drbd_m_holder); | ||
1581 | if (IS_ERR(bdev)) { | ||
1582 | drbd_err(device, "open(\"%s\") failed with %ld\n", new_disk_conf->meta_dev, | ||
1583 | PTR_ERR(bdev)); | ||
1584 | retcode = ERR_OPEN_MD_DISK; | ||
1585 | goto fail; | 1673 | goto fail; |
1586 | } | ||
1587 | nbc->md_bdev = bdev; | ||
1588 | 1674 | ||
1589 | if ((nbc->backing_bdev == nbc->md_bdev) != | 1675 | if ((nbc->backing_bdev == nbc->md_bdev) != |
1590 | (new_disk_conf->meta_dev_idx == DRBD_MD_INDEX_INTERNAL || | 1676 | (new_disk_conf->meta_dev_idx == DRBD_MD_INDEX_INTERNAL || |
@@ -1707,6 +1793,13 @@ int drbd_adm_attach(struct sk_buff *skb, struct genl_info *info) | |||
1707 | goto force_diskless_dec; | 1793 | goto force_diskless_dec; |
1708 | } | 1794 | } |
1709 | 1795 | ||
1796 | lock_all_resources(); | ||
1797 | retcode = drbd_resync_after_valid(device, new_disk_conf->resync_after); | ||
1798 | if (retcode != NO_ERROR) { | ||
1799 | unlock_all_resources(); | ||
1800 | goto force_diskless_dec; | ||
1801 | } | ||
1802 | |||
1710 | /* Reset the "barriers don't work" bits here, then force meta data to | 1803 | /* Reset the "barriers don't work" bits here, then force meta data to |
1711 | * be written, to ensure we determine if barriers are supported. */ | 1804 | * be written, to ensure we determine if barriers are supported. */ |
1712 | if (new_disk_conf->md_flushes) | 1805 | if (new_disk_conf->md_flushes) |
@@ -1727,7 +1820,9 @@ int drbd_adm_attach(struct sk_buff *skb, struct genl_info *info) | |||
1727 | new_disk_conf = NULL; | 1820 | new_disk_conf = NULL; |
1728 | new_plan = NULL; | 1821 | new_plan = NULL; |
1729 | 1822 | ||
1730 | drbd_bump_write_ordering(device->resource, device->ldev, WO_bdev_flush); | 1823 | drbd_resync_after_changed(device); |
1824 | drbd_bump_write_ordering(device->resource, device->ldev, WO_BDEV_FLUSH); | ||
1825 | unlock_all_resources(); | ||
1731 | 1826 | ||
1732 | if (drbd_md_test_flag(device->ldev, MDF_CRASHED_PRIMARY)) | 1827 | if (drbd_md_test_flag(device->ldev, MDF_CRASHED_PRIMARY)) |
1733 | set_bit(CRASHED_PRIMARY, &device->flags); | 1828 | set_bit(CRASHED_PRIMARY, &device->flags); |
@@ -1875,12 +1970,8 @@ int drbd_adm_attach(struct sk_buff *skb, struct genl_info *info) | |||
1875 | fail: | 1970 | fail: |
1876 | conn_reconfig_done(connection); | 1971 | conn_reconfig_done(connection); |
1877 | if (nbc) { | 1972 | if (nbc) { |
1878 | if (nbc->backing_bdev) | 1973 | close_backing_dev(device, nbc->md_bdev, nbc->md_bdev != nbc->backing_bdev); |
1879 | blkdev_put(nbc->backing_bdev, | 1974 | close_backing_dev(device, nbc->backing_bdev, true); |
1880 | FMODE_READ | FMODE_WRITE | FMODE_EXCL); | ||
1881 | if (nbc->md_bdev) | ||
1882 | blkdev_put(nbc->md_bdev, | ||
1883 | FMODE_READ | FMODE_WRITE | FMODE_EXCL); | ||
1884 | kfree(nbc); | 1975 | kfree(nbc); |
1885 | } | 1976 | } |
1886 | kfree(new_disk_conf); | 1977 | kfree(new_disk_conf); |
@@ -1895,6 +1986,7 @@ int drbd_adm_attach(struct sk_buff *skb, struct genl_info *info) | |||
1895 | static int adm_detach(struct drbd_device *device, int force) | 1986 | static int adm_detach(struct drbd_device *device, int force) |
1896 | { | 1987 | { |
1897 | enum drbd_state_rv retcode; | 1988 | enum drbd_state_rv retcode; |
1989 | void *buffer; | ||
1898 | int ret; | 1990 | int ret; |
1899 | 1991 | ||
1900 | if (force) { | 1992 | if (force) { |
@@ -1905,13 +1997,16 @@ static int adm_detach(struct drbd_device *device, int force) | |||
1905 | } | 1997 | } |
1906 | 1998 | ||
1907 | drbd_suspend_io(device); /* so no-one is stuck in drbd_al_begin_io */ | 1999 | drbd_suspend_io(device); /* so no-one is stuck in drbd_al_begin_io */ |
1908 | drbd_md_get_buffer(device, __func__); /* make sure there is no in-flight meta-data IO */ | 2000 | buffer = drbd_md_get_buffer(device, __func__); /* make sure there is no in-flight meta-data IO */ |
1909 | retcode = drbd_request_state(device, NS(disk, D_FAILED)); | 2001 | if (buffer) { |
1910 | drbd_md_put_buffer(device); | 2002 | retcode = drbd_request_state(device, NS(disk, D_FAILED)); |
2003 | drbd_md_put_buffer(device); | ||
2004 | } else /* already <= D_FAILED */ | ||
2005 | retcode = SS_NOTHING_TO_DO; | ||
1911 | /* D_FAILED will transition to DISKLESS. */ | 2006 | /* D_FAILED will transition to DISKLESS. */ |
2007 | drbd_resume_io(device); | ||
1912 | ret = wait_event_interruptible(device->misc_wait, | 2008 | ret = wait_event_interruptible(device->misc_wait, |
1913 | device->state.disk != D_FAILED); | 2009 | device->state.disk != D_FAILED); |
1914 | drbd_resume_io(device); | ||
1915 | if ((int)retcode == (int)SS_IS_DISKLESS) | 2010 | if ((int)retcode == (int)SS_IS_DISKLESS) |
1916 | retcode = SS_NOTHING_TO_DO; | 2011 | retcode = SS_NOTHING_TO_DO; |
1917 | if (ret) | 2012 | if (ret) |
@@ -2245,8 +2340,31 @@ int drbd_adm_net_opts(struct sk_buff *skb, struct genl_info *info) | |||
2245 | return 0; | 2340 | return 0; |
2246 | } | 2341 | } |
2247 | 2342 | ||
2343 | static void connection_to_info(struct connection_info *info, | ||
2344 | struct drbd_connection *connection) | ||
2345 | { | ||
2346 | info->conn_connection_state = connection->cstate; | ||
2347 | info->conn_role = conn_highest_peer(connection); | ||
2348 | } | ||
2349 | |||
2350 | static void peer_device_to_info(struct peer_device_info *info, | ||
2351 | struct drbd_peer_device *peer_device) | ||
2352 | { | ||
2353 | struct drbd_device *device = peer_device->device; | ||
2354 | |||
2355 | info->peer_repl_state = | ||
2356 | max_t(enum drbd_conns, C_WF_REPORT_PARAMS, device->state.conn); | ||
2357 | info->peer_disk_state = device->state.pdsk; | ||
2358 | info->peer_resync_susp_user = device->state.user_isp; | ||
2359 | info->peer_resync_susp_peer = device->state.peer_isp; | ||
2360 | info->peer_resync_susp_dependency = device->state.aftr_isp; | ||
2361 | } | ||
2362 | |||
2248 | int drbd_adm_connect(struct sk_buff *skb, struct genl_info *info) | 2363 | int drbd_adm_connect(struct sk_buff *skb, struct genl_info *info) |
2249 | { | 2364 | { |
2365 | struct connection_info connection_info; | ||
2366 | enum drbd_notification_type flags; | ||
2367 | unsigned int peer_devices = 0; | ||
2250 | struct drbd_config_context adm_ctx; | 2368 | struct drbd_config_context adm_ctx; |
2251 | struct drbd_peer_device *peer_device; | 2369 | struct drbd_peer_device *peer_device; |
2252 | struct net_conf *old_net_conf, *new_net_conf = NULL; | 2370 | struct net_conf *old_net_conf, *new_net_conf = NULL; |
@@ -2347,6 +2465,22 @@ int drbd_adm_connect(struct sk_buff *skb, struct genl_info *info) | |||
2347 | connection->peer_addr_len = nla_len(adm_ctx.peer_addr); | 2465 | connection->peer_addr_len = nla_len(adm_ctx.peer_addr); |
2348 | memcpy(&connection->peer_addr, nla_data(adm_ctx.peer_addr), connection->peer_addr_len); | 2466 | memcpy(&connection->peer_addr, nla_data(adm_ctx.peer_addr), connection->peer_addr_len); |
2349 | 2467 | ||
2468 | idr_for_each_entry(&connection->peer_devices, peer_device, i) { | ||
2469 | peer_devices++; | ||
2470 | } | ||
2471 | |||
2472 | connection_to_info(&connection_info, connection); | ||
2473 | flags = (peer_devices--) ? NOTIFY_CONTINUES : 0; | ||
2474 | mutex_lock(¬ification_mutex); | ||
2475 | notify_connection_state(NULL, 0, connection, &connection_info, NOTIFY_CREATE | flags); | ||
2476 | idr_for_each_entry(&connection->peer_devices, peer_device, i) { | ||
2477 | struct peer_device_info peer_device_info; | ||
2478 | |||
2479 | peer_device_to_info(&peer_device_info, peer_device); | ||
2480 | flags = (peer_devices--) ? NOTIFY_CONTINUES : 0; | ||
2481 | notify_peer_device_state(NULL, 0, peer_device, &peer_device_info, NOTIFY_CREATE | flags); | ||
2482 | } | ||
2483 | mutex_unlock(¬ification_mutex); | ||
2350 | mutex_unlock(&adm_ctx.resource->conf_update); | 2484 | mutex_unlock(&adm_ctx.resource->conf_update); |
2351 | 2485 | ||
2352 | rcu_read_lock(); | 2486 | rcu_read_lock(); |
@@ -2428,6 +2562,8 @@ static enum drbd_state_rv conn_try_disconnect(struct drbd_connection *connection | |||
2428 | drbd_err(connection, | 2562 | drbd_err(connection, |
2429 | "unexpected rv2=%d in conn_try_disconnect()\n", | 2563 | "unexpected rv2=%d in conn_try_disconnect()\n", |
2430 | rv2); | 2564 | rv2); |
2565 | /* Unlike in DRBD 9, the state engine has generated | ||
2566 | * NOTIFY_DESTROY events before clearing connection->net_conf. */ | ||
2431 | } | 2567 | } |
2432 | return rv; | 2568 | return rv; |
2433 | } | 2569 | } |
@@ -2585,6 +2721,7 @@ int drbd_adm_resize(struct sk_buff *skb, struct genl_info *info) | |||
2585 | mutex_unlock(&device->resource->conf_update); | 2721 | mutex_unlock(&device->resource->conf_update); |
2586 | synchronize_rcu(); | 2722 | synchronize_rcu(); |
2587 | kfree(old_disk_conf); | 2723 | kfree(old_disk_conf); |
2724 | new_disk_conf = NULL; | ||
2588 | } | 2725 | } |
2589 | 2726 | ||
2590 | ddsf = (rs.resize_force ? DDSF_FORCED : 0) | (rs.no_resync ? DDSF_NO_RESYNC : 0); | 2727 | ddsf = (rs.resize_force ? DDSF_FORCED : 0) | (rs.no_resync ? DDSF_NO_RESYNC : 0); |
@@ -2618,6 +2755,7 @@ int drbd_adm_resize(struct sk_buff *skb, struct genl_info *info) | |||
2618 | 2755 | ||
2619 | fail_ldev: | 2756 | fail_ldev: |
2620 | put_ldev(device); | 2757 | put_ldev(device); |
2758 | kfree(new_disk_conf); | ||
2621 | goto fail; | 2759 | goto fail; |
2622 | } | 2760 | } |
2623 | 2761 | ||
@@ -2855,7 +2993,30 @@ int drbd_adm_resume_io(struct sk_buff *skb, struct genl_info *info) | |||
2855 | mutex_lock(&adm_ctx.resource->adm_mutex); | 2993 | mutex_lock(&adm_ctx.resource->adm_mutex); |
2856 | device = adm_ctx.device; | 2994 | device = adm_ctx.device; |
2857 | if (test_bit(NEW_CUR_UUID, &device->flags)) { | 2995 | if (test_bit(NEW_CUR_UUID, &device->flags)) { |
2858 | drbd_uuid_new_current(device); | 2996 | if (get_ldev_if_state(device, D_ATTACHING)) { |
2997 | drbd_uuid_new_current(device); | ||
2998 | put_ldev(device); | ||
2999 | } else { | ||
3000 | /* This is effectively a multi-stage "forced down". | ||
3001 | * The NEW_CUR_UUID bit is supposedly only set, if we | ||
3002 | * lost the replication connection, and are configured | ||
3003 | * to freeze IO and wait for some fence-peer handler. | ||
3004 | * So we still don't have a replication connection. | ||
3005 | * And now we don't have a local disk either. After | ||
3006 | * resume, we will fail all pending and new IO, because | ||
3007 | * we don't have any data anymore. Which means we will | ||
3008 | * eventually be able to terminate all users of this | ||
3009 | * device, and then take it down. By bumping the | ||
3010 | * "effective" data uuid, we make sure that you really | ||
3011 | * need to tear down before you reconfigure, we will | ||
3012 | * the refuse to re-connect or re-attach (because no | ||
3013 | * matching real data uuid exists). | ||
3014 | */ | ||
3015 | u64 val; | ||
3016 | get_random_bytes(&val, sizeof(u64)); | ||
3017 | drbd_set_ed_uuid(device, val); | ||
3018 | drbd_warn(device, "Resumed without access to data; please tear down before attempting to re-configure.\n"); | ||
3019 | } | ||
2859 | clear_bit(NEW_CUR_UUID, &device->flags); | 3020 | clear_bit(NEW_CUR_UUID, &device->flags); |
2860 | } | 3021 | } |
2861 | drbd_suspend_io(device); | 3022 | drbd_suspend_io(device); |
@@ -2910,6 +3071,486 @@ nla_put_failure: | |||
2910 | } | 3071 | } |
2911 | 3072 | ||
2912 | /* | 3073 | /* |
3074 | * The generic netlink dump callbacks are called outside the genl_lock(), so | ||
3075 | * they cannot use the simple attribute parsing code which uses global | ||
3076 | * attribute tables. | ||
3077 | */ | ||
3078 | static struct nlattr *find_cfg_context_attr(const struct nlmsghdr *nlh, int attr) | ||
3079 | { | ||
3080 | const unsigned hdrlen = GENL_HDRLEN + GENL_MAGIC_FAMILY_HDRSZ; | ||
3081 | const int maxtype = ARRAY_SIZE(drbd_cfg_context_nl_policy) - 1; | ||
3082 | struct nlattr *nla; | ||
3083 | |||
3084 | nla = nla_find(nlmsg_attrdata(nlh, hdrlen), nlmsg_attrlen(nlh, hdrlen), | ||
3085 | DRBD_NLA_CFG_CONTEXT); | ||
3086 | if (!nla) | ||
3087 | return NULL; | ||
3088 | return drbd_nla_find_nested(maxtype, nla, __nla_type(attr)); | ||
3089 | } | ||
3090 | |||
3091 | static void resource_to_info(struct resource_info *, struct drbd_resource *); | ||
3092 | |||
3093 | int drbd_adm_dump_resources(struct sk_buff *skb, struct netlink_callback *cb) | ||
3094 | { | ||
3095 | struct drbd_genlmsghdr *dh; | ||
3096 | struct drbd_resource *resource; | ||
3097 | struct resource_info resource_info; | ||
3098 | struct resource_statistics resource_statistics; | ||
3099 | int err; | ||
3100 | |||
3101 | rcu_read_lock(); | ||
3102 | if (cb->args[0]) { | ||
3103 | for_each_resource_rcu(resource, &drbd_resources) | ||
3104 | if (resource == (struct drbd_resource *)cb->args[0]) | ||
3105 | goto found_resource; | ||
3106 | err = 0; /* resource was probably deleted */ | ||
3107 | goto out; | ||
3108 | } | ||
3109 | resource = list_entry(&drbd_resources, | ||
3110 | struct drbd_resource, resources); | ||
3111 | |||
3112 | found_resource: | ||
3113 | list_for_each_entry_continue_rcu(resource, &drbd_resources, resources) { | ||
3114 | goto put_result; | ||
3115 | } | ||
3116 | err = 0; | ||
3117 | goto out; | ||
3118 | |||
3119 | put_result: | ||
3120 | dh = genlmsg_put(skb, NETLINK_CB(cb->skb).portid, | ||
3121 | cb->nlh->nlmsg_seq, &drbd_genl_family, | ||
3122 | NLM_F_MULTI, DRBD_ADM_GET_RESOURCES); | ||
3123 | err = -ENOMEM; | ||
3124 | if (!dh) | ||
3125 | goto out; | ||
3126 | dh->minor = -1U; | ||
3127 | dh->ret_code = NO_ERROR; | ||
3128 | err = nla_put_drbd_cfg_context(skb, resource, NULL, NULL); | ||
3129 | if (err) | ||
3130 | goto out; | ||
3131 | err = res_opts_to_skb(skb, &resource->res_opts, !capable(CAP_SYS_ADMIN)); | ||
3132 | if (err) | ||
3133 | goto out; | ||
3134 | resource_to_info(&resource_info, resource); | ||
3135 | err = resource_info_to_skb(skb, &resource_info, !capable(CAP_SYS_ADMIN)); | ||
3136 | if (err) | ||
3137 | goto out; | ||
3138 | resource_statistics.res_stat_write_ordering = resource->write_ordering; | ||
3139 | err = resource_statistics_to_skb(skb, &resource_statistics, !capable(CAP_SYS_ADMIN)); | ||
3140 | if (err) | ||
3141 | goto out; | ||
3142 | cb->args[0] = (long)resource; | ||
3143 | genlmsg_end(skb, dh); | ||
3144 | err = 0; | ||
3145 | |||
3146 | out: | ||
3147 | rcu_read_unlock(); | ||
3148 | if (err) | ||
3149 | return err; | ||
3150 | return skb->len; | ||
3151 | } | ||
3152 | |||
3153 | static void device_to_statistics(struct device_statistics *s, | ||
3154 | struct drbd_device *device) | ||
3155 | { | ||
3156 | memset(s, 0, sizeof(*s)); | ||
3157 | s->dev_upper_blocked = !may_inc_ap_bio(device); | ||
3158 | if (get_ldev(device)) { | ||
3159 | struct drbd_md *md = &device->ldev->md; | ||
3160 | u64 *history_uuids = (u64 *)s->history_uuids; | ||
3161 | struct request_queue *q; | ||
3162 | int n; | ||
3163 | |||
3164 | spin_lock_irq(&md->uuid_lock); | ||
3165 | s->dev_current_uuid = md->uuid[UI_CURRENT]; | ||
3166 | BUILD_BUG_ON(sizeof(s->history_uuids) < UI_HISTORY_END - UI_HISTORY_START + 1); | ||
3167 | for (n = 0; n < UI_HISTORY_END - UI_HISTORY_START + 1; n++) | ||
3168 | history_uuids[n] = md->uuid[UI_HISTORY_START + n]; | ||
3169 | for (; n < HISTORY_UUIDS; n++) | ||
3170 | history_uuids[n] = 0; | ||
3171 | s->history_uuids_len = HISTORY_UUIDS; | ||
3172 | spin_unlock_irq(&md->uuid_lock); | ||
3173 | |||
3174 | s->dev_disk_flags = md->flags; | ||
3175 | q = bdev_get_queue(device->ldev->backing_bdev); | ||
3176 | s->dev_lower_blocked = | ||
3177 | bdi_congested(&q->backing_dev_info, | ||
3178 | (1 << WB_async_congested) | | ||
3179 | (1 << WB_sync_congested)); | ||
3180 | put_ldev(device); | ||
3181 | } | ||
3182 | s->dev_size = drbd_get_capacity(device->this_bdev); | ||
3183 | s->dev_read = device->read_cnt; | ||
3184 | s->dev_write = device->writ_cnt; | ||
3185 | s->dev_al_writes = device->al_writ_cnt; | ||
3186 | s->dev_bm_writes = device->bm_writ_cnt; | ||
3187 | s->dev_upper_pending = atomic_read(&device->ap_bio_cnt); | ||
3188 | s->dev_lower_pending = atomic_read(&device->local_cnt); | ||
3189 | s->dev_al_suspended = test_bit(AL_SUSPENDED, &device->flags); | ||
3190 | s->dev_exposed_data_uuid = device->ed_uuid; | ||
3191 | } | ||
3192 | |||
3193 | static int put_resource_in_arg0(struct netlink_callback *cb, int holder_nr) | ||
3194 | { | ||
3195 | if (cb->args[0]) { | ||
3196 | struct drbd_resource *resource = | ||
3197 | (struct drbd_resource *)cb->args[0]; | ||
3198 | kref_put(&resource->kref, drbd_destroy_resource); | ||
3199 | } | ||
3200 | |||
3201 | return 0; | ||
3202 | } | ||
3203 | |||
3204 | int drbd_adm_dump_devices_done(struct netlink_callback *cb) { | ||
3205 | return put_resource_in_arg0(cb, 7); | ||
3206 | } | ||
3207 | |||
3208 | static void device_to_info(struct device_info *, struct drbd_device *); | ||
3209 | |||
3210 | int drbd_adm_dump_devices(struct sk_buff *skb, struct netlink_callback *cb) | ||
3211 | { | ||
3212 | struct nlattr *resource_filter; | ||
3213 | struct drbd_resource *resource; | ||
3214 | struct drbd_device *uninitialized_var(device); | ||
3215 | int minor, err, retcode; | ||
3216 | struct drbd_genlmsghdr *dh; | ||
3217 | struct device_info device_info; | ||
3218 | struct device_statistics device_statistics; | ||
3219 | struct idr *idr_to_search; | ||
3220 | |||
3221 | resource = (struct drbd_resource *)cb->args[0]; | ||
3222 | if (!cb->args[0] && !cb->args[1]) { | ||
3223 | resource_filter = find_cfg_context_attr(cb->nlh, T_ctx_resource_name); | ||
3224 | if (resource_filter) { | ||
3225 | retcode = ERR_RES_NOT_KNOWN; | ||
3226 | resource = drbd_find_resource(nla_data(resource_filter)); | ||
3227 | if (!resource) | ||
3228 | goto put_result; | ||
3229 | cb->args[0] = (long)resource; | ||
3230 | } | ||
3231 | } | ||
3232 | |||
3233 | rcu_read_lock(); | ||
3234 | minor = cb->args[1]; | ||
3235 | idr_to_search = resource ? &resource->devices : &drbd_devices; | ||
3236 | device = idr_get_next(idr_to_search, &minor); | ||
3237 | if (!device) { | ||
3238 | err = 0; | ||
3239 | goto out; | ||
3240 | } | ||
3241 | idr_for_each_entry_continue(idr_to_search, device, minor) { | ||
3242 | retcode = NO_ERROR; | ||
3243 | goto put_result; /* only one iteration */ | ||
3244 | } | ||
3245 | err = 0; | ||
3246 | goto out; /* no more devices */ | ||
3247 | |||
3248 | put_result: | ||
3249 | dh = genlmsg_put(skb, NETLINK_CB(cb->skb).portid, | ||
3250 | cb->nlh->nlmsg_seq, &drbd_genl_family, | ||
3251 | NLM_F_MULTI, DRBD_ADM_GET_DEVICES); | ||
3252 | err = -ENOMEM; | ||
3253 | if (!dh) | ||
3254 | goto out; | ||
3255 | dh->ret_code = retcode; | ||
3256 | dh->minor = -1U; | ||
3257 | if (retcode == NO_ERROR) { | ||
3258 | dh->minor = device->minor; | ||
3259 | err = nla_put_drbd_cfg_context(skb, device->resource, NULL, device); | ||
3260 | if (err) | ||
3261 | goto out; | ||
3262 | if (get_ldev(device)) { | ||
3263 | struct disk_conf *disk_conf = | ||
3264 | rcu_dereference(device->ldev->disk_conf); | ||
3265 | |||
3266 | err = disk_conf_to_skb(skb, disk_conf, !capable(CAP_SYS_ADMIN)); | ||
3267 | put_ldev(device); | ||
3268 | if (err) | ||
3269 | goto out; | ||
3270 | } | ||
3271 | device_to_info(&device_info, device); | ||
3272 | err = device_info_to_skb(skb, &device_info, !capable(CAP_SYS_ADMIN)); | ||
3273 | if (err) | ||
3274 | goto out; | ||
3275 | |||
3276 | device_to_statistics(&device_statistics, device); | ||
3277 | err = device_statistics_to_skb(skb, &device_statistics, !capable(CAP_SYS_ADMIN)); | ||
3278 | if (err) | ||
3279 | goto out; | ||
3280 | cb->args[1] = minor + 1; | ||
3281 | } | ||
3282 | genlmsg_end(skb, dh); | ||
3283 | err = 0; | ||
3284 | |||
3285 | out: | ||
3286 | rcu_read_unlock(); | ||
3287 | if (err) | ||
3288 | return err; | ||
3289 | return skb->len; | ||
3290 | } | ||
3291 | |||
3292 | int drbd_adm_dump_connections_done(struct netlink_callback *cb) | ||
3293 | { | ||
3294 | return put_resource_in_arg0(cb, 6); | ||
3295 | } | ||
3296 | |||
3297 | enum { SINGLE_RESOURCE, ITERATE_RESOURCES }; | ||
3298 | |||
3299 | int drbd_adm_dump_connections(struct sk_buff *skb, struct netlink_callback *cb) | ||
3300 | { | ||
3301 | struct nlattr *resource_filter; | ||
3302 | struct drbd_resource *resource = NULL, *next_resource; | ||
3303 | struct drbd_connection *uninitialized_var(connection); | ||
3304 | int err = 0, retcode; | ||
3305 | struct drbd_genlmsghdr *dh; | ||
3306 | struct connection_info connection_info; | ||
3307 | struct connection_statistics connection_statistics; | ||
3308 | |||
3309 | rcu_read_lock(); | ||
3310 | resource = (struct drbd_resource *)cb->args[0]; | ||
3311 | if (!cb->args[0]) { | ||
3312 | resource_filter = find_cfg_context_attr(cb->nlh, T_ctx_resource_name); | ||
3313 | if (resource_filter) { | ||
3314 | retcode = ERR_RES_NOT_KNOWN; | ||
3315 | resource = drbd_find_resource(nla_data(resource_filter)); | ||
3316 | if (!resource) | ||
3317 | goto put_result; | ||
3318 | cb->args[0] = (long)resource; | ||
3319 | cb->args[1] = SINGLE_RESOURCE; | ||
3320 | } | ||
3321 | } | ||
3322 | if (!resource) { | ||
3323 | if (list_empty(&drbd_resources)) | ||
3324 | goto out; | ||
3325 | resource = list_first_entry(&drbd_resources, struct drbd_resource, resources); | ||
3326 | kref_get(&resource->kref); | ||
3327 | cb->args[0] = (long)resource; | ||
3328 | cb->args[1] = ITERATE_RESOURCES; | ||
3329 | } | ||
3330 | |||
3331 | next_resource: | ||
3332 | rcu_read_unlock(); | ||
3333 | mutex_lock(&resource->conf_update); | ||
3334 | rcu_read_lock(); | ||
3335 | if (cb->args[2]) { | ||
3336 | for_each_connection_rcu(connection, resource) | ||
3337 | if (connection == (struct drbd_connection *)cb->args[2]) | ||
3338 | goto found_connection; | ||
3339 | /* connection was probably deleted */ | ||
3340 | goto no_more_connections; | ||
3341 | } | ||
3342 | connection = list_entry(&resource->connections, struct drbd_connection, connections); | ||
3343 | |||
3344 | found_connection: | ||
3345 | list_for_each_entry_continue_rcu(connection, &resource->connections, connections) { | ||
3346 | if (!has_net_conf(connection)) | ||
3347 | continue; | ||
3348 | retcode = NO_ERROR; | ||
3349 | goto put_result; /* only one iteration */ | ||
3350 | } | ||
3351 | |||
3352 | no_more_connections: | ||
3353 | if (cb->args[1] == ITERATE_RESOURCES) { | ||
3354 | for_each_resource_rcu(next_resource, &drbd_resources) { | ||
3355 | if (next_resource == resource) | ||
3356 | goto found_resource; | ||
3357 | } | ||
3358 | /* resource was probably deleted */ | ||
3359 | } | ||
3360 | goto out; | ||
3361 | |||
3362 | found_resource: | ||
3363 | list_for_each_entry_continue_rcu(next_resource, &drbd_resources, resources) { | ||
3364 | mutex_unlock(&resource->conf_update); | ||
3365 | kref_put(&resource->kref, drbd_destroy_resource); | ||
3366 | resource = next_resource; | ||
3367 | kref_get(&resource->kref); | ||
3368 | cb->args[0] = (long)resource; | ||
3369 | cb->args[2] = 0; | ||
3370 | goto next_resource; | ||
3371 | } | ||
3372 | goto out; /* no more resources */ | ||
3373 | |||
3374 | put_result: | ||
3375 | dh = genlmsg_put(skb, NETLINK_CB(cb->skb).portid, | ||
3376 | cb->nlh->nlmsg_seq, &drbd_genl_family, | ||
3377 | NLM_F_MULTI, DRBD_ADM_GET_CONNECTIONS); | ||
3378 | err = -ENOMEM; | ||
3379 | if (!dh) | ||
3380 | goto out; | ||
3381 | dh->ret_code = retcode; | ||
3382 | dh->minor = -1U; | ||
3383 | if (retcode == NO_ERROR) { | ||
3384 | struct net_conf *net_conf; | ||
3385 | |||
3386 | err = nla_put_drbd_cfg_context(skb, resource, connection, NULL); | ||
3387 | if (err) | ||
3388 | goto out; | ||
3389 | net_conf = rcu_dereference(connection->net_conf); | ||
3390 | if (net_conf) { | ||
3391 | err = net_conf_to_skb(skb, net_conf, !capable(CAP_SYS_ADMIN)); | ||
3392 | if (err) | ||
3393 | goto out; | ||
3394 | } | ||
3395 | connection_to_info(&connection_info, connection); | ||
3396 | err = connection_info_to_skb(skb, &connection_info, !capable(CAP_SYS_ADMIN)); | ||
3397 | if (err) | ||
3398 | goto out; | ||
3399 | connection_statistics.conn_congested = test_bit(NET_CONGESTED, &connection->flags); | ||
3400 | err = connection_statistics_to_skb(skb, &connection_statistics, !capable(CAP_SYS_ADMIN)); | ||
3401 | if (err) | ||
3402 | goto out; | ||
3403 | cb->args[2] = (long)connection; | ||
3404 | } | ||
3405 | genlmsg_end(skb, dh); | ||
3406 | err = 0; | ||
3407 | |||
3408 | out: | ||
3409 | rcu_read_unlock(); | ||
3410 | if (resource) | ||
3411 | mutex_unlock(&resource->conf_update); | ||
3412 | if (err) | ||
3413 | return err; | ||
3414 | return skb->len; | ||
3415 | } | ||
3416 | |||
3417 | enum mdf_peer_flag { | ||
3418 | MDF_PEER_CONNECTED = 1 << 0, | ||
3419 | MDF_PEER_OUTDATED = 1 << 1, | ||
3420 | MDF_PEER_FENCING = 1 << 2, | ||
3421 | MDF_PEER_FULL_SYNC = 1 << 3, | ||
3422 | }; | ||
3423 | |||
3424 | static void peer_device_to_statistics(struct peer_device_statistics *s, | ||
3425 | struct drbd_peer_device *peer_device) | ||
3426 | { | ||
3427 | struct drbd_device *device = peer_device->device; | ||
3428 | |||
3429 | memset(s, 0, sizeof(*s)); | ||
3430 | s->peer_dev_received = device->recv_cnt; | ||
3431 | s->peer_dev_sent = device->send_cnt; | ||
3432 | s->peer_dev_pending = atomic_read(&device->ap_pending_cnt) + | ||
3433 | atomic_read(&device->rs_pending_cnt); | ||
3434 | s->peer_dev_unacked = atomic_read(&device->unacked_cnt); | ||
3435 | s->peer_dev_out_of_sync = drbd_bm_total_weight(device) << (BM_BLOCK_SHIFT - 9); | ||
3436 | s->peer_dev_resync_failed = device->rs_failed << (BM_BLOCK_SHIFT - 9); | ||
3437 | if (get_ldev(device)) { | ||
3438 | struct drbd_md *md = &device->ldev->md; | ||
3439 | |||
3440 | spin_lock_irq(&md->uuid_lock); | ||
3441 | s->peer_dev_bitmap_uuid = md->uuid[UI_BITMAP]; | ||
3442 | spin_unlock_irq(&md->uuid_lock); | ||
3443 | s->peer_dev_flags = | ||
3444 | (drbd_md_test_flag(device->ldev, MDF_CONNECTED_IND) ? | ||
3445 | MDF_PEER_CONNECTED : 0) + | ||
3446 | (drbd_md_test_flag(device->ldev, MDF_CONSISTENT) && | ||
3447 | !drbd_md_test_flag(device->ldev, MDF_WAS_UP_TO_DATE) ? | ||
3448 | MDF_PEER_OUTDATED : 0) + | ||
3449 | /* FIXME: MDF_PEER_FENCING? */ | ||
3450 | (drbd_md_test_flag(device->ldev, MDF_FULL_SYNC) ? | ||
3451 | MDF_PEER_FULL_SYNC : 0); | ||
3452 | put_ldev(device); | ||
3453 | } | ||
3454 | } | ||
3455 | |||
3456 | int drbd_adm_dump_peer_devices_done(struct netlink_callback *cb) | ||
3457 | { | ||
3458 | return put_resource_in_arg0(cb, 9); | ||
3459 | } | ||
3460 | |||
3461 | int drbd_adm_dump_peer_devices(struct sk_buff *skb, struct netlink_callback *cb) | ||
3462 | { | ||
3463 | struct nlattr *resource_filter; | ||
3464 | struct drbd_resource *resource; | ||
3465 | struct drbd_device *uninitialized_var(device); | ||
3466 | struct drbd_peer_device *peer_device = NULL; | ||
3467 | int minor, err, retcode; | ||
3468 | struct drbd_genlmsghdr *dh; | ||
3469 | struct idr *idr_to_search; | ||
3470 | |||
3471 | resource = (struct drbd_resource *)cb->args[0]; | ||
3472 | if (!cb->args[0] && !cb->args[1]) { | ||
3473 | resource_filter = find_cfg_context_attr(cb->nlh, T_ctx_resource_name); | ||
3474 | if (resource_filter) { | ||
3475 | retcode = ERR_RES_NOT_KNOWN; | ||
3476 | resource = drbd_find_resource(nla_data(resource_filter)); | ||
3477 | if (!resource) | ||
3478 | goto put_result; | ||
3479 | } | ||
3480 | cb->args[0] = (long)resource; | ||
3481 | } | ||
3482 | |||
3483 | rcu_read_lock(); | ||
3484 | minor = cb->args[1]; | ||
3485 | idr_to_search = resource ? &resource->devices : &drbd_devices; | ||
3486 | device = idr_find(idr_to_search, minor); | ||
3487 | if (!device) { | ||
3488 | next_device: | ||
3489 | minor++; | ||
3490 | cb->args[2] = 0; | ||
3491 | device = idr_get_next(idr_to_search, &minor); | ||
3492 | if (!device) { | ||
3493 | err = 0; | ||
3494 | goto out; | ||
3495 | } | ||
3496 | } | ||
3497 | if (cb->args[2]) { | ||
3498 | for_each_peer_device(peer_device, device) | ||
3499 | if (peer_device == (struct drbd_peer_device *)cb->args[2]) | ||
3500 | goto found_peer_device; | ||
3501 | /* peer device was probably deleted */ | ||
3502 | goto next_device; | ||
3503 | } | ||
3504 | /* Make peer_device point to the list head (not the first entry). */ | ||
3505 | peer_device = list_entry(&device->peer_devices, struct drbd_peer_device, peer_devices); | ||
3506 | |||
3507 | found_peer_device: | ||
3508 | list_for_each_entry_continue_rcu(peer_device, &device->peer_devices, peer_devices) { | ||
3509 | if (!has_net_conf(peer_device->connection)) | ||
3510 | continue; | ||
3511 | retcode = NO_ERROR; | ||
3512 | goto put_result; /* only one iteration */ | ||
3513 | } | ||
3514 | goto next_device; | ||
3515 | |||
3516 | put_result: | ||
3517 | dh = genlmsg_put(skb, NETLINK_CB(cb->skb).portid, | ||
3518 | cb->nlh->nlmsg_seq, &drbd_genl_family, | ||
3519 | NLM_F_MULTI, DRBD_ADM_GET_PEER_DEVICES); | ||
3520 | err = -ENOMEM; | ||
3521 | if (!dh) | ||
3522 | goto out; | ||
3523 | dh->ret_code = retcode; | ||
3524 | dh->minor = -1U; | ||
3525 | if (retcode == NO_ERROR) { | ||
3526 | struct peer_device_info peer_device_info; | ||
3527 | struct peer_device_statistics peer_device_statistics; | ||
3528 | |||
3529 | dh->minor = minor; | ||
3530 | err = nla_put_drbd_cfg_context(skb, device->resource, peer_device->connection, device); | ||
3531 | if (err) | ||
3532 | goto out; | ||
3533 | peer_device_to_info(&peer_device_info, peer_device); | ||
3534 | err = peer_device_info_to_skb(skb, &peer_device_info, !capable(CAP_SYS_ADMIN)); | ||
3535 | if (err) | ||
3536 | goto out; | ||
3537 | peer_device_to_statistics(&peer_device_statistics, peer_device); | ||
3538 | err = peer_device_statistics_to_skb(skb, &peer_device_statistics, !capable(CAP_SYS_ADMIN)); | ||
3539 | if (err) | ||
3540 | goto out; | ||
3541 | cb->args[1] = minor; | ||
3542 | cb->args[2] = (long)peer_device; | ||
3543 | } | ||
3544 | genlmsg_end(skb, dh); | ||
3545 | err = 0; | ||
3546 | |||
3547 | out: | ||
3548 | rcu_read_unlock(); | ||
3549 | if (err) | ||
3550 | return err; | ||
3551 | return skb->len; | ||
3552 | } | ||
3553 | /* | ||
2913 | * Return the connection of @resource if @resource has exactly one connection. | 3554 | * Return the connection of @resource if @resource has exactly one connection. |
2914 | */ | 3555 | */ |
2915 | static struct drbd_connection *the_only_connection(struct drbd_resource *resource) | 3556 | static struct drbd_connection *the_only_connection(struct drbd_resource *resource) |
@@ -3414,8 +4055,18 @@ drbd_check_resource_name(struct drbd_config_context *adm_ctx) | |||
3414 | return NO_ERROR; | 4055 | return NO_ERROR; |
3415 | } | 4056 | } |
3416 | 4057 | ||
4058 | static void resource_to_info(struct resource_info *info, | ||
4059 | struct drbd_resource *resource) | ||
4060 | { | ||
4061 | info->res_role = conn_highest_role(first_connection(resource)); | ||
4062 | info->res_susp = resource->susp; | ||
4063 | info->res_susp_nod = resource->susp_nod; | ||
4064 | info->res_susp_fen = resource->susp_fen; | ||
4065 | } | ||
4066 | |||
3417 | int drbd_adm_new_resource(struct sk_buff *skb, struct genl_info *info) | 4067 | int drbd_adm_new_resource(struct sk_buff *skb, struct genl_info *info) |
3418 | { | 4068 | { |
4069 | struct drbd_connection *connection; | ||
3419 | struct drbd_config_context adm_ctx; | 4070 | struct drbd_config_context adm_ctx; |
3420 | enum drbd_ret_code retcode; | 4071 | enum drbd_ret_code retcode; |
3421 | struct res_opts res_opts; | 4072 | struct res_opts res_opts; |
@@ -3449,13 +4100,33 @@ int drbd_adm_new_resource(struct sk_buff *skb, struct genl_info *info) | |||
3449 | } | 4100 | } |
3450 | 4101 | ||
3451 | /* not yet safe for genl_family.parallel_ops */ | 4102 | /* not yet safe for genl_family.parallel_ops */ |
3452 | if (!conn_create(adm_ctx.resource_name, &res_opts)) | 4103 | mutex_lock(&resources_mutex); |
4104 | connection = conn_create(adm_ctx.resource_name, &res_opts); | ||
4105 | mutex_unlock(&resources_mutex); | ||
4106 | |||
4107 | if (connection) { | ||
4108 | struct resource_info resource_info; | ||
4109 | |||
4110 | mutex_lock(¬ification_mutex); | ||
4111 | resource_to_info(&resource_info, connection->resource); | ||
4112 | notify_resource_state(NULL, 0, connection->resource, | ||
4113 | &resource_info, NOTIFY_CREATE); | ||
4114 | mutex_unlock(¬ification_mutex); | ||
4115 | } else | ||
3453 | retcode = ERR_NOMEM; | 4116 | retcode = ERR_NOMEM; |
4117 | |||
3454 | out: | 4118 | out: |
3455 | drbd_adm_finish(&adm_ctx, info, retcode); | 4119 | drbd_adm_finish(&adm_ctx, info, retcode); |
3456 | return 0; | 4120 | return 0; |
3457 | } | 4121 | } |
3458 | 4122 | ||
4123 | static void device_to_info(struct device_info *info, | ||
4124 | struct drbd_device *device) | ||
4125 | { | ||
4126 | info->dev_disk_state = device->state.disk; | ||
4127 | } | ||
4128 | |||
4129 | |||
3459 | int drbd_adm_new_minor(struct sk_buff *skb, struct genl_info *info) | 4130 | int drbd_adm_new_minor(struct sk_buff *skb, struct genl_info *info) |
3460 | { | 4131 | { |
3461 | struct drbd_config_context adm_ctx; | 4132 | struct drbd_config_context adm_ctx; |
@@ -3490,6 +4161,36 @@ int drbd_adm_new_minor(struct sk_buff *skb, struct genl_info *info) | |||
3490 | 4161 | ||
3491 | mutex_lock(&adm_ctx.resource->adm_mutex); | 4162 | mutex_lock(&adm_ctx.resource->adm_mutex); |
3492 | retcode = drbd_create_device(&adm_ctx, dh->minor); | 4163 | retcode = drbd_create_device(&adm_ctx, dh->minor); |
4164 | if (retcode == NO_ERROR) { | ||
4165 | struct drbd_device *device; | ||
4166 | struct drbd_peer_device *peer_device; | ||
4167 | struct device_info info; | ||
4168 | unsigned int peer_devices = 0; | ||
4169 | enum drbd_notification_type flags; | ||
4170 | |||
4171 | device = minor_to_device(dh->minor); | ||
4172 | for_each_peer_device(peer_device, device) { | ||
4173 | if (!has_net_conf(peer_device->connection)) | ||
4174 | continue; | ||
4175 | peer_devices++; | ||
4176 | } | ||
4177 | |||
4178 | device_to_info(&info, device); | ||
4179 | mutex_lock(¬ification_mutex); | ||
4180 | flags = (peer_devices--) ? NOTIFY_CONTINUES : 0; | ||
4181 | notify_device_state(NULL, 0, device, &info, NOTIFY_CREATE | flags); | ||
4182 | for_each_peer_device(peer_device, device) { | ||
4183 | struct peer_device_info peer_device_info; | ||
4184 | |||
4185 | if (!has_net_conf(peer_device->connection)) | ||
4186 | continue; | ||
4187 | peer_device_to_info(&peer_device_info, peer_device); | ||
4188 | flags = (peer_devices--) ? NOTIFY_CONTINUES : 0; | ||
4189 | notify_peer_device_state(NULL, 0, peer_device, &peer_device_info, | ||
4190 | NOTIFY_CREATE | flags); | ||
4191 | } | ||
4192 | mutex_unlock(¬ification_mutex); | ||
4193 | } | ||
3493 | mutex_unlock(&adm_ctx.resource->adm_mutex); | 4194 | mutex_unlock(&adm_ctx.resource->adm_mutex); |
3494 | out: | 4195 | out: |
3495 | drbd_adm_finish(&adm_ctx, info, retcode); | 4196 | drbd_adm_finish(&adm_ctx, info, retcode); |
@@ -3498,13 +4199,35 @@ out: | |||
3498 | 4199 | ||
3499 | static enum drbd_ret_code adm_del_minor(struct drbd_device *device) | 4200 | static enum drbd_ret_code adm_del_minor(struct drbd_device *device) |
3500 | { | 4201 | { |
4202 | struct drbd_peer_device *peer_device; | ||
4203 | |||
3501 | if (device->state.disk == D_DISKLESS && | 4204 | if (device->state.disk == D_DISKLESS && |
3502 | /* no need to be device->state.conn == C_STANDALONE && | 4205 | /* no need to be device->state.conn == C_STANDALONE && |
3503 | * we may want to delete a minor from a live replication group. | 4206 | * we may want to delete a minor from a live replication group. |
3504 | */ | 4207 | */ |
3505 | device->state.role == R_SECONDARY) { | 4208 | device->state.role == R_SECONDARY) { |
4209 | struct drbd_connection *connection = | ||
4210 | first_connection(device->resource); | ||
4211 | |||
3506 | _drbd_request_state(device, NS(conn, C_WF_REPORT_PARAMS), | 4212 | _drbd_request_state(device, NS(conn, C_WF_REPORT_PARAMS), |
3507 | CS_VERBOSE + CS_WAIT_COMPLETE); | 4213 | CS_VERBOSE + CS_WAIT_COMPLETE); |
4214 | |||
4215 | /* If the state engine hasn't stopped the sender thread yet, we | ||
4216 | * need to flush the sender work queue before generating the | ||
4217 | * DESTROY events here. */ | ||
4218 | if (get_t_state(&connection->worker) == RUNNING) | ||
4219 | drbd_flush_workqueue(&connection->sender_work); | ||
4220 | |||
4221 | mutex_lock(¬ification_mutex); | ||
4222 | for_each_peer_device(peer_device, device) { | ||
4223 | if (!has_net_conf(peer_device->connection)) | ||
4224 | continue; | ||
4225 | notify_peer_device_state(NULL, 0, peer_device, NULL, | ||
4226 | NOTIFY_DESTROY | NOTIFY_CONTINUES); | ||
4227 | } | ||
4228 | notify_device_state(NULL, 0, device, NULL, NOTIFY_DESTROY); | ||
4229 | mutex_unlock(¬ification_mutex); | ||
4230 | |||
3508 | drbd_delete_device(device); | 4231 | drbd_delete_device(device); |
3509 | return NO_ERROR; | 4232 | return NO_ERROR; |
3510 | } else | 4233 | } else |
@@ -3541,7 +4264,16 @@ static int adm_del_resource(struct drbd_resource *resource) | |||
3541 | if (!idr_is_empty(&resource->devices)) | 4264 | if (!idr_is_empty(&resource->devices)) |
3542 | return ERR_RES_IN_USE; | 4265 | return ERR_RES_IN_USE; |
3543 | 4266 | ||
4267 | /* The state engine has stopped the sender thread, so we don't | ||
4268 | * need to flush the sender work queue before generating the | ||
4269 | * DESTROY event here. */ | ||
4270 | mutex_lock(¬ification_mutex); | ||
4271 | notify_resource_state(NULL, 0, resource, NULL, NOTIFY_DESTROY); | ||
4272 | mutex_unlock(¬ification_mutex); | ||
4273 | |||
4274 | mutex_lock(&resources_mutex); | ||
3544 | list_del_rcu(&resource->resources); | 4275 | list_del_rcu(&resource->resources); |
4276 | mutex_unlock(&resources_mutex); | ||
3545 | /* Make sure all threads have actually stopped: state handling only | 4277 | /* Make sure all threads have actually stopped: state handling only |
3546 | * does drbd_thread_stop_nowait(). */ | 4278 | * does drbd_thread_stop_nowait(). */ |
3547 | list_for_each_entry(connection, &resource->connections, connections) | 4279 | list_for_each_entry(connection, &resource->connections, connections) |
@@ -3637,7 +4369,6 @@ finish: | |||
3637 | 4369 | ||
3638 | void drbd_bcast_event(struct drbd_device *device, const struct sib_info *sib) | 4370 | void drbd_bcast_event(struct drbd_device *device, const struct sib_info *sib) |
3639 | { | 4371 | { |
3640 | static atomic_t drbd_genl_seq = ATOMIC_INIT(2); /* two. */ | ||
3641 | struct sk_buff *msg; | 4372 | struct sk_buff *msg; |
3642 | struct drbd_genlmsghdr *d_out; | 4373 | struct drbd_genlmsghdr *d_out; |
3643 | unsigned seq; | 4374 | unsigned seq; |
@@ -3658,7 +4389,7 @@ void drbd_bcast_event(struct drbd_device *device, const struct sib_info *sib) | |||
3658 | if (nla_put_status_info(msg, device, sib)) | 4389 | if (nla_put_status_info(msg, device, sib)) |
3659 | goto nla_put_failure; | 4390 | goto nla_put_failure; |
3660 | genlmsg_end(msg, d_out); | 4391 | genlmsg_end(msg, d_out); |
3661 | err = drbd_genl_multicast_events(msg, 0); | 4392 | err = drbd_genl_multicast_events(msg, GFP_NOWAIT); |
3662 | /* msg has been consumed or freed in netlink_broadcast() */ | 4393 | /* msg has been consumed or freed in netlink_broadcast() */ |
3663 | if (err && err != -ESRCH) | 4394 | if (err && err != -ESRCH) |
3664 | goto failed; | 4395 | goto failed; |
@@ -3672,3 +4403,405 @@ failed: | |||
3672 | "Event seq:%u sib_reason:%u\n", | 4403 | "Event seq:%u sib_reason:%u\n", |
3673 | err, seq, sib->sib_reason); | 4404 | err, seq, sib->sib_reason); |
3674 | } | 4405 | } |
4406 | |||
4407 | static int nla_put_notification_header(struct sk_buff *msg, | ||
4408 | enum drbd_notification_type type) | ||
4409 | { | ||
4410 | struct drbd_notification_header nh = { | ||
4411 | .nh_type = type, | ||
4412 | }; | ||
4413 | |||
4414 | return drbd_notification_header_to_skb(msg, &nh, true); | ||
4415 | } | ||
4416 | |||
4417 | void notify_resource_state(struct sk_buff *skb, | ||
4418 | unsigned int seq, | ||
4419 | struct drbd_resource *resource, | ||
4420 | struct resource_info *resource_info, | ||
4421 | enum drbd_notification_type type) | ||
4422 | { | ||
4423 | struct resource_statistics resource_statistics; | ||
4424 | struct drbd_genlmsghdr *dh; | ||
4425 | bool multicast = false; | ||
4426 | int err; | ||
4427 | |||
4428 | if (!skb) { | ||
4429 | seq = atomic_inc_return(¬ify_genl_seq); | ||
4430 | skb = genlmsg_new(NLMSG_GOODSIZE, GFP_NOIO); | ||
4431 | err = -ENOMEM; | ||
4432 | if (!skb) | ||
4433 | goto failed; | ||
4434 | multicast = true; | ||
4435 | } | ||
4436 | |||
4437 | err = -EMSGSIZE; | ||
4438 | dh = genlmsg_put(skb, 0, seq, &drbd_genl_family, 0, DRBD_RESOURCE_STATE); | ||
4439 | if (!dh) | ||
4440 | goto nla_put_failure; | ||
4441 | dh->minor = -1U; | ||
4442 | dh->ret_code = NO_ERROR; | ||
4443 | if (nla_put_drbd_cfg_context(skb, resource, NULL, NULL) || | ||
4444 | nla_put_notification_header(skb, type) || | ||
4445 | ((type & ~NOTIFY_FLAGS) != NOTIFY_DESTROY && | ||
4446 | resource_info_to_skb(skb, resource_info, true))) | ||
4447 | goto nla_put_failure; | ||
4448 | resource_statistics.res_stat_write_ordering = resource->write_ordering; | ||
4449 | err = resource_statistics_to_skb(skb, &resource_statistics, !capable(CAP_SYS_ADMIN)); | ||
4450 | if (err) | ||
4451 | goto nla_put_failure; | ||
4452 | genlmsg_end(skb, dh); | ||
4453 | if (multicast) { | ||
4454 | err = drbd_genl_multicast_events(skb, GFP_NOWAIT); | ||
4455 | /* skb has been consumed or freed in netlink_broadcast() */ | ||
4456 | if (err && err != -ESRCH) | ||
4457 | goto failed; | ||
4458 | } | ||
4459 | return; | ||
4460 | |||
4461 | nla_put_failure: | ||
4462 | nlmsg_free(skb); | ||
4463 | failed: | ||
4464 | drbd_err(resource, "Error %d while broadcasting event. Event seq:%u\n", | ||
4465 | err, seq); | ||
4466 | } | ||
4467 | |||
4468 | void notify_device_state(struct sk_buff *skb, | ||
4469 | unsigned int seq, | ||
4470 | struct drbd_device *device, | ||
4471 | struct device_info *device_info, | ||
4472 | enum drbd_notification_type type) | ||
4473 | { | ||
4474 | struct device_statistics device_statistics; | ||
4475 | struct drbd_genlmsghdr *dh; | ||
4476 | bool multicast = false; | ||
4477 | int err; | ||
4478 | |||
4479 | if (!skb) { | ||
4480 | seq = atomic_inc_return(¬ify_genl_seq); | ||
4481 | skb = genlmsg_new(NLMSG_GOODSIZE, GFP_NOIO); | ||
4482 | err = -ENOMEM; | ||
4483 | if (!skb) | ||
4484 | goto failed; | ||
4485 | multicast = true; | ||
4486 | } | ||
4487 | |||
4488 | err = -EMSGSIZE; | ||
4489 | dh = genlmsg_put(skb, 0, seq, &drbd_genl_family, 0, DRBD_DEVICE_STATE); | ||
4490 | if (!dh) | ||
4491 | goto nla_put_failure; | ||
4492 | dh->minor = device->minor; | ||
4493 | dh->ret_code = NO_ERROR; | ||
4494 | if (nla_put_drbd_cfg_context(skb, device->resource, NULL, device) || | ||
4495 | nla_put_notification_header(skb, type) || | ||
4496 | ((type & ~NOTIFY_FLAGS) != NOTIFY_DESTROY && | ||
4497 | device_info_to_skb(skb, device_info, true))) | ||
4498 | goto nla_put_failure; | ||
4499 | device_to_statistics(&device_statistics, device); | ||
4500 | device_statistics_to_skb(skb, &device_statistics, !capable(CAP_SYS_ADMIN)); | ||
4501 | genlmsg_end(skb, dh); | ||
4502 | if (multicast) { | ||
4503 | err = drbd_genl_multicast_events(skb, GFP_NOWAIT); | ||
4504 | /* skb has been consumed or freed in netlink_broadcast() */ | ||
4505 | if (err && err != -ESRCH) | ||
4506 | goto failed; | ||
4507 | } | ||
4508 | return; | ||
4509 | |||
4510 | nla_put_failure: | ||
4511 | nlmsg_free(skb); | ||
4512 | failed: | ||
4513 | drbd_err(device, "Error %d while broadcasting event. Event seq:%u\n", | ||
4514 | err, seq); | ||
4515 | } | ||
4516 | |||
4517 | void notify_connection_state(struct sk_buff *skb, | ||
4518 | unsigned int seq, | ||
4519 | struct drbd_connection *connection, | ||
4520 | struct connection_info *connection_info, | ||
4521 | enum drbd_notification_type type) | ||
4522 | { | ||
4523 | struct connection_statistics connection_statistics; | ||
4524 | struct drbd_genlmsghdr *dh; | ||
4525 | bool multicast = false; | ||
4526 | int err; | ||
4527 | |||
4528 | if (!skb) { | ||
4529 | seq = atomic_inc_return(¬ify_genl_seq); | ||
4530 | skb = genlmsg_new(NLMSG_GOODSIZE, GFP_NOIO); | ||
4531 | err = -ENOMEM; | ||
4532 | if (!skb) | ||
4533 | goto failed; | ||
4534 | multicast = true; | ||
4535 | } | ||
4536 | |||
4537 | err = -EMSGSIZE; | ||
4538 | dh = genlmsg_put(skb, 0, seq, &drbd_genl_family, 0, DRBD_CONNECTION_STATE); | ||
4539 | if (!dh) | ||
4540 | goto nla_put_failure; | ||
4541 | dh->minor = -1U; | ||
4542 | dh->ret_code = NO_ERROR; | ||
4543 | if (nla_put_drbd_cfg_context(skb, connection->resource, connection, NULL) || | ||
4544 | nla_put_notification_header(skb, type) || | ||
4545 | ((type & ~NOTIFY_FLAGS) != NOTIFY_DESTROY && | ||
4546 | connection_info_to_skb(skb, connection_info, true))) | ||
4547 | goto nla_put_failure; | ||
4548 | connection_statistics.conn_congested = test_bit(NET_CONGESTED, &connection->flags); | ||
4549 | connection_statistics_to_skb(skb, &connection_statistics, !capable(CAP_SYS_ADMIN)); | ||
4550 | genlmsg_end(skb, dh); | ||
4551 | if (multicast) { | ||
4552 | err = drbd_genl_multicast_events(skb, GFP_NOWAIT); | ||
4553 | /* skb has been consumed or freed in netlink_broadcast() */ | ||
4554 | if (err && err != -ESRCH) | ||
4555 | goto failed; | ||
4556 | } | ||
4557 | return; | ||
4558 | |||
4559 | nla_put_failure: | ||
4560 | nlmsg_free(skb); | ||
4561 | failed: | ||
4562 | drbd_err(connection, "Error %d while broadcasting event. Event seq:%u\n", | ||
4563 | err, seq); | ||
4564 | } | ||
4565 | |||
4566 | void notify_peer_device_state(struct sk_buff *skb, | ||
4567 | unsigned int seq, | ||
4568 | struct drbd_peer_device *peer_device, | ||
4569 | struct peer_device_info *peer_device_info, | ||
4570 | enum drbd_notification_type type) | ||
4571 | { | ||
4572 | struct peer_device_statistics peer_device_statistics; | ||
4573 | struct drbd_resource *resource = peer_device->device->resource; | ||
4574 | struct drbd_genlmsghdr *dh; | ||
4575 | bool multicast = false; | ||
4576 | int err; | ||
4577 | |||
4578 | if (!skb) { | ||
4579 | seq = atomic_inc_return(¬ify_genl_seq); | ||
4580 | skb = genlmsg_new(NLMSG_GOODSIZE, GFP_NOIO); | ||
4581 | err = -ENOMEM; | ||
4582 | if (!skb) | ||
4583 | goto failed; | ||
4584 | multicast = true; | ||
4585 | } | ||
4586 | |||
4587 | err = -EMSGSIZE; | ||
4588 | dh = genlmsg_put(skb, 0, seq, &drbd_genl_family, 0, DRBD_PEER_DEVICE_STATE); | ||
4589 | if (!dh) | ||
4590 | goto nla_put_failure; | ||
4591 | dh->minor = -1U; | ||
4592 | dh->ret_code = NO_ERROR; | ||
4593 | if (nla_put_drbd_cfg_context(skb, resource, peer_device->connection, peer_device->device) || | ||
4594 | nla_put_notification_header(skb, type) || | ||
4595 | ((type & ~NOTIFY_FLAGS) != NOTIFY_DESTROY && | ||
4596 | peer_device_info_to_skb(skb, peer_device_info, true))) | ||
4597 | goto nla_put_failure; | ||
4598 | peer_device_to_statistics(&peer_device_statistics, peer_device); | ||
4599 | peer_device_statistics_to_skb(skb, &peer_device_statistics, !capable(CAP_SYS_ADMIN)); | ||
4600 | genlmsg_end(skb, dh); | ||
4601 | if (multicast) { | ||
4602 | err = drbd_genl_multicast_events(skb, GFP_NOWAIT); | ||
4603 | /* skb has been consumed or freed in netlink_broadcast() */ | ||
4604 | if (err && err != -ESRCH) | ||
4605 | goto failed; | ||
4606 | } | ||
4607 | return; | ||
4608 | |||
4609 | nla_put_failure: | ||
4610 | nlmsg_free(skb); | ||
4611 | failed: | ||
4612 | drbd_err(peer_device, "Error %d while broadcasting event. Event seq:%u\n", | ||
4613 | err, seq); | ||
4614 | } | ||
4615 | |||
4616 | void notify_helper(enum drbd_notification_type type, | ||
4617 | struct drbd_device *device, struct drbd_connection *connection, | ||
4618 | const char *name, int status) | ||
4619 | { | ||
4620 | struct drbd_resource *resource = device ? device->resource : connection->resource; | ||
4621 | struct drbd_helper_info helper_info; | ||
4622 | unsigned int seq = atomic_inc_return(¬ify_genl_seq); | ||
4623 | struct sk_buff *skb = NULL; | ||
4624 | struct drbd_genlmsghdr *dh; | ||
4625 | int err; | ||
4626 | |||
4627 | strlcpy(helper_info.helper_name, name, sizeof(helper_info.helper_name)); | ||
4628 | helper_info.helper_name_len = min(strlen(name), sizeof(helper_info.helper_name)); | ||
4629 | helper_info.helper_status = status; | ||
4630 | |||
4631 | skb = genlmsg_new(NLMSG_GOODSIZE, GFP_NOIO); | ||
4632 | err = -ENOMEM; | ||
4633 | if (!skb) | ||
4634 | goto fail; | ||
4635 | |||
4636 | err = -EMSGSIZE; | ||
4637 | dh = genlmsg_put(skb, 0, seq, &drbd_genl_family, 0, DRBD_HELPER); | ||
4638 | if (!dh) | ||
4639 | goto fail; | ||
4640 | dh->minor = device ? device->minor : -1; | ||
4641 | dh->ret_code = NO_ERROR; | ||
4642 | mutex_lock(¬ification_mutex); | ||
4643 | if (nla_put_drbd_cfg_context(skb, resource, connection, device) || | ||
4644 | nla_put_notification_header(skb, type) || | ||
4645 | drbd_helper_info_to_skb(skb, &helper_info, true)) | ||
4646 | goto unlock_fail; | ||
4647 | genlmsg_end(skb, dh); | ||
4648 | err = drbd_genl_multicast_events(skb, GFP_NOWAIT); | ||
4649 | skb = NULL; | ||
4650 | /* skb has been consumed or freed in netlink_broadcast() */ | ||
4651 | if (err && err != -ESRCH) | ||
4652 | goto unlock_fail; | ||
4653 | mutex_unlock(¬ification_mutex); | ||
4654 | return; | ||
4655 | |||
4656 | unlock_fail: | ||
4657 | mutex_unlock(¬ification_mutex); | ||
4658 | fail: | ||
4659 | nlmsg_free(skb); | ||
4660 | drbd_err(resource, "Error %d while broadcasting event. Event seq:%u\n", | ||
4661 | err, seq); | ||
4662 | } | ||
4663 | |||
4664 | static void notify_initial_state_done(struct sk_buff *skb, unsigned int seq) | ||
4665 | { | ||
4666 | struct drbd_genlmsghdr *dh; | ||
4667 | int err; | ||
4668 | |||
4669 | err = -EMSGSIZE; | ||
4670 | dh = genlmsg_put(skb, 0, seq, &drbd_genl_family, 0, DRBD_INITIAL_STATE_DONE); | ||
4671 | if (!dh) | ||
4672 | goto nla_put_failure; | ||
4673 | dh->minor = -1U; | ||
4674 | dh->ret_code = NO_ERROR; | ||
4675 | if (nla_put_notification_header(skb, NOTIFY_EXISTS)) | ||
4676 | goto nla_put_failure; | ||
4677 | genlmsg_end(skb, dh); | ||
4678 | return; | ||
4679 | |||
4680 | nla_put_failure: | ||
4681 | nlmsg_free(skb); | ||
4682 | pr_err("Error %d sending event. Event seq:%u\n", err, seq); | ||
4683 | } | ||
4684 | |||
4685 | static void free_state_changes(struct list_head *list) | ||
4686 | { | ||
4687 | while (!list_empty(list)) { | ||
4688 | struct drbd_state_change *state_change = | ||
4689 | list_first_entry(list, struct drbd_state_change, list); | ||
4690 | list_del(&state_change->list); | ||
4691 | forget_state_change(state_change); | ||
4692 | } | ||
4693 | } | ||
4694 | |||
4695 | static unsigned int notifications_for_state_change(struct drbd_state_change *state_change) | ||
4696 | { | ||
4697 | return 1 + | ||
4698 | state_change->n_connections + | ||
4699 | state_change->n_devices + | ||
4700 | state_change->n_devices * state_change->n_connections; | ||
4701 | } | ||
4702 | |||
4703 | static int get_initial_state(struct sk_buff *skb, struct netlink_callback *cb) | ||
4704 | { | ||
4705 | struct drbd_state_change *state_change = (struct drbd_state_change *)cb->args[0]; | ||
4706 | unsigned int seq = cb->args[2]; | ||
4707 | unsigned int n; | ||
4708 | enum drbd_notification_type flags = 0; | ||
4709 | |||
4710 | /* There is no need for taking notification_mutex here: it doesn't | ||
4711 | matter if the initial state events mix with later state chage | ||
4712 | events; we can always tell the events apart by the NOTIFY_EXISTS | ||
4713 | flag. */ | ||
4714 | |||
4715 | cb->args[5]--; | ||
4716 | if (cb->args[5] == 1) { | ||
4717 | notify_initial_state_done(skb, seq); | ||
4718 | goto out; | ||
4719 | } | ||
4720 | n = cb->args[4]++; | ||
4721 | if (cb->args[4] < cb->args[3]) | ||
4722 | flags |= NOTIFY_CONTINUES; | ||
4723 | if (n < 1) { | ||
4724 | notify_resource_state_change(skb, seq, state_change->resource, | ||
4725 | NOTIFY_EXISTS | flags); | ||
4726 | goto next; | ||
4727 | } | ||
4728 | n--; | ||
4729 | if (n < state_change->n_connections) { | ||
4730 | notify_connection_state_change(skb, seq, &state_change->connections[n], | ||
4731 | NOTIFY_EXISTS | flags); | ||
4732 | goto next; | ||
4733 | } | ||
4734 | n -= state_change->n_connections; | ||
4735 | if (n < state_change->n_devices) { | ||
4736 | notify_device_state_change(skb, seq, &state_change->devices[n], | ||
4737 | NOTIFY_EXISTS | flags); | ||
4738 | goto next; | ||
4739 | } | ||
4740 | n -= state_change->n_devices; | ||
4741 | if (n < state_change->n_devices * state_change->n_connections) { | ||
4742 | notify_peer_device_state_change(skb, seq, &state_change->peer_devices[n], | ||
4743 | NOTIFY_EXISTS | flags); | ||
4744 | goto next; | ||
4745 | } | ||
4746 | |||
4747 | next: | ||
4748 | if (cb->args[4] == cb->args[3]) { | ||
4749 | struct drbd_state_change *next_state_change = | ||
4750 | list_entry(state_change->list.next, | ||
4751 | struct drbd_state_change, list); | ||
4752 | cb->args[0] = (long)next_state_change; | ||
4753 | cb->args[3] = notifications_for_state_change(next_state_change); | ||
4754 | cb->args[4] = 0; | ||
4755 | } | ||
4756 | out: | ||
4757 | return skb->len; | ||
4758 | } | ||
4759 | |||
4760 | int drbd_adm_get_initial_state(struct sk_buff *skb, struct netlink_callback *cb) | ||
4761 | { | ||
4762 | struct drbd_resource *resource; | ||
4763 | LIST_HEAD(head); | ||
4764 | |||
4765 | if (cb->args[5] >= 1) { | ||
4766 | if (cb->args[5] > 1) | ||
4767 | return get_initial_state(skb, cb); | ||
4768 | if (cb->args[0]) { | ||
4769 | struct drbd_state_change *state_change = | ||
4770 | (struct drbd_state_change *)cb->args[0]; | ||
4771 | |||
4772 | /* connect list to head */ | ||
4773 | list_add(&head, &state_change->list); | ||
4774 | free_state_changes(&head); | ||
4775 | } | ||
4776 | return 0; | ||
4777 | } | ||
4778 | |||
4779 | cb->args[5] = 2; /* number of iterations */ | ||
4780 | mutex_lock(&resources_mutex); | ||
4781 | for_each_resource(resource, &drbd_resources) { | ||
4782 | struct drbd_state_change *state_change; | ||
4783 | |||
4784 | state_change = remember_old_state(resource, GFP_KERNEL); | ||
4785 | if (!state_change) { | ||
4786 | if (!list_empty(&head)) | ||
4787 | free_state_changes(&head); | ||
4788 | mutex_unlock(&resources_mutex); | ||
4789 | return -ENOMEM; | ||
4790 | } | ||
4791 | copy_old_to_new_state_change(state_change); | ||
4792 | list_add_tail(&state_change->list, &head); | ||
4793 | cb->args[5] += notifications_for_state_change(state_change); | ||
4794 | } | ||
4795 | mutex_unlock(&resources_mutex); | ||
4796 | |||
4797 | if (!list_empty(&head)) { | ||
4798 | struct drbd_state_change *state_change = | ||
4799 | list_entry(head.next, struct drbd_state_change, list); | ||
4800 | cb->args[0] = (long)state_change; | ||
4801 | cb->args[3] = notifications_for_state_change(state_change); | ||
4802 | list_del(&head); /* detach list from head */ | ||
4803 | } | ||
4804 | |||
4805 | cb->args[2] = cb->nlh->nlmsg_seq; | ||
4806 | return get_initial_state(skb, cb); | ||
4807 | } | ||
diff --git a/drivers/block/drbd/drbd_proc.c b/drivers/block/drbd/drbd_proc.c index 3b10fa6cb039..6537b25db9c1 100644 --- a/drivers/block/drbd/drbd_proc.c +++ b/drivers/block/drbd/drbd_proc.c | |||
@@ -245,9 +245,9 @@ static int drbd_seq_show(struct seq_file *seq, void *v) | |||
245 | char wp; | 245 | char wp; |
246 | 246 | ||
247 | static char write_ordering_chars[] = { | 247 | static char write_ordering_chars[] = { |
248 | [WO_none] = 'n', | 248 | [WO_NONE] = 'n', |
249 | [WO_drain_io] = 'd', | 249 | [WO_DRAIN_IO] = 'd', |
250 | [WO_bdev_flush] = 'f', | 250 | [WO_BDEV_FLUSH] = 'f', |
251 | }; | 251 | }; |
252 | 252 | ||
253 | seq_printf(seq, "version: " REL_VERSION " (api:%d/proto:%d-%d)\n%s\n", | 253 | seq_printf(seq, "version: " REL_VERSION " (api:%d/proto:%d-%d)\n%s\n", |
diff --git a/drivers/block/drbd/drbd_protocol.h b/drivers/block/drbd/drbd_protocol.h index 2da9104a3851..ef9245363dcc 100644 --- a/drivers/block/drbd/drbd_protocol.h +++ b/drivers/block/drbd/drbd_protocol.h | |||
@@ -23,7 +23,7 @@ enum drbd_packet { | |||
23 | P_AUTH_RESPONSE = 0x11, | 23 | P_AUTH_RESPONSE = 0x11, |
24 | P_STATE_CHG_REQ = 0x12, | 24 | P_STATE_CHG_REQ = 0x12, |
25 | 25 | ||
26 | /* asender (meta socket */ | 26 | /* (meta socket) */ |
27 | P_PING = 0x13, | 27 | P_PING = 0x13, |
28 | P_PING_ACK = 0x14, | 28 | P_PING_ACK = 0x14, |
29 | P_RECV_ACK = 0x15, /* Used in protocol B */ | 29 | P_RECV_ACK = 0x15, /* Used in protocol B */ |
diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index b4b5680ac6ad..1957fe8601dc 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c | |||
@@ -215,7 +215,7 @@ static void reclaim_finished_net_peer_reqs(struct drbd_device *device, | |||
215 | } | 215 | } |
216 | } | 216 | } |
217 | 217 | ||
218 | static void drbd_kick_lo_and_reclaim_net(struct drbd_device *device) | 218 | static void drbd_reclaim_net_peer_reqs(struct drbd_device *device) |
219 | { | 219 | { |
220 | LIST_HEAD(reclaimed); | 220 | LIST_HEAD(reclaimed); |
221 | struct drbd_peer_request *peer_req, *t; | 221 | struct drbd_peer_request *peer_req, *t; |
@@ -223,11 +223,30 @@ static void drbd_kick_lo_and_reclaim_net(struct drbd_device *device) | |||
223 | spin_lock_irq(&device->resource->req_lock); | 223 | spin_lock_irq(&device->resource->req_lock); |
224 | reclaim_finished_net_peer_reqs(device, &reclaimed); | 224 | reclaim_finished_net_peer_reqs(device, &reclaimed); |
225 | spin_unlock_irq(&device->resource->req_lock); | 225 | spin_unlock_irq(&device->resource->req_lock); |
226 | |||
227 | list_for_each_entry_safe(peer_req, t, &reclaimed, w.list) | 226 | list_for_each_entry_safe(peer_req, t, &reclaimed, w.list) |
228 | drbd_free_net_peer_req(device, peer_req); | 227 | drbd_free_net_peer_req(device, peer_req); |
229 | } | 228 | } |
230 | 229 | ||
230 | static void conn_reclaim_net_peer_reqs(struct drbd_connection *connection) | ||
231 | { | ||
232 | struct drbd_peer_device *peer_device; | ||
233 | int vnr; | ||
234 | |||
235 | rcu_read_lock(); | ||
236 | idr_for_each_entry(&connection->peer_devices, peer_device, vnr) { | ||
237 | struct drbd_device *device = peer_device->device; | ||
238 | if (!atomic_read(&device->pp_in_use_by_net)) | ||
239 | continue; | ||
240 | |||
241 | kref_get(&device->kref); | ||
242 | rcu_read_unlock(); | ||
243 | drbd_reclaim_net_peer_reqs(device); | ||
244 | kref_put(&device->kref, drbd_destroy_device); | ||
245 | rcu_read_lock(); | ||
246 | } | ||
247 | rcu_read_unlock(); | ||
248 | } | ||
249 | |||
231 | /** | 250 | /** |
232 | * drbd_alloc_pages() - Returns @number pages, retries forever (or until signalled) | 251 | * drbd_alloc_pages() - Returns @number pages, retries forever (or until signalled) |
233 | * @device: DRBD device. | 252 | * @device: DRBD device. |
@@ -265,10 +284,15 @@ struct page *drbd_alloc_pages(struct drbd_peer_device *peer_device, unsigned int | |||
265 | if (atomic_read(&device->pp_in_use) < mxb) | 284 | if (atomic_read(&device->pp_in_use) < mxb) |
266 | page = __drbd_alloc_pages(device, number); | 285 | page = __drbd_alloc_pages(device, number); |
267 | 286 | ||
287 | /* Try to keep the fast path fast, but occasionally we need | ||
288 | * to reclaim the pages we lended to the network stack. */ | ||
289 | if (page && atomic_read(&device->pp_in_use_by_net) > 512) | ||
290 | drbd_reclaim_net_peer_reqs(device); | ||
291 | |||
268 | while (page == NULL) { | 292 | while (page == NULL) { |
269 | prepare_to_wait(&drbd_pp_wait, &wait, TASK_INTERRUPTIBLE); | 293 | prepare_to_wait(&drbd_pp_wait, &wait, TASK_INTERRUPTIBLE); |
270 | 294 | ||
271 | drbd_kick_lo_and_reclaim_net(device); | 295 | drbd_reclaim_net_peer_reqs(device); |
272 | 296 | ||
273 | if (atomic_read(&device->pp_in_use) < mxb) { | 297 | if (atomic_read(&device->pp_in_use) < mxb) { |
274 | page = __drbd_alloc_pages(device, number); | 298 | page = __drbd_alloc_pages(device, number); |
@@ -1099,7 +1123,15 @@ randomize: | |||
1099 | return 0; | 1123 | return 0; |
1100 | } | 1124 | } |
1101 | 1125 | ||
1102 | drbd_thread_start(&connection->asender); | 1126 | drbd_thread_start(&connection->ack_receiver); |
1127 | /* opencoded create_singlethread_workqueue(), | ||
1128 | * to be able to use format string arguments */ | ||
1129 | connection->ack_sender = | ||
1130 | alloc_ordered_workqueue("drbd_as_%s", WQ_MEM_RECLAIM, connection->resource->name); | ||
1131 | if (!connection->ack_sender) { | ||
1132 | drbd_err(connection, "Failed to create workqueue ack_sender\n"); | ||
1133 | return 0; | ||
1134 | } | ||
1103 | 1135 | ||
1104 | mutex_lock(&connection->resource->conf_update); | 1136 | mutex_lock(&connection->resource->conf_update); |
1105 | /* The discard_my_data flag is a single-shot modifier to the next | 1137 | /* The discard_my_data flag is a single-shot modifier to the next |
@@ -1178,7 +1210,7 @@ static void drbd_flush(struct drbd_connection *connection) | |||
1178 | struct drbd_peer_device *peer_device; | 1210 | struct drbd_peer_device *peer_device; |
1179 | int vnr; | 1211 | int vnr; |
1180 | 1212 | ||
1181 | if (connection->resource->write_ordering >= WO_bdev_flush) { | 1213 | if (connection->resource->write_ordering >= WO_BDEV_FLUSH) { |
1182 | rcu_read_lock(); | 1214 | rcu_read_lock(); |
1183 | idr_for_each_entry(&connection->peer_devices, peer_device, vnr) { | 1215 | idr_for_each_entry(&connection->peer_devices, peer_device, vnr) { |
1184 | struct drbd_device *device = peer_device->device; | 1216 | struct drbd_device *device = peer_device->device; |
@@ -1203,7 +1235,7 @@ static void drbd_flush(struct drbd_connection *connection) | |||
1203 | /* would rather check on EOPNOTSUPP, but that is not reliable. | 1235 | /* would rather check on EOPNOTSUPP, but that is not reliable. |
1204 | * don't try again for ANY return value != 0 | 1236 | * don't try again for ANY return value != 0 |
1205 | * if (rv == -EOPNOTSUPP) */ | 1237 | * if (rv == -EOPNOTSUPP) */ |
1206 | drbd_bump_write_ordering(connection->resource, NULL, WO_drain_io); | 1238 | drbd_bump_write_ordering(connection->resource, NULL, WO_DRAIN_IO); |
1207 | } | 1239 | } |
1208 | put_ldev(device); | 1240 | put_ldev(device); |
1209 | kref_put(&device->kref, drbd_destroy_device); | 1241 | kref_put(&device->kref, drbd_destroy_device); |
@@ -1299,10 +1331,10 @@ max_allowed_wo(struct drbd_backing_dev *bdev, enum write_ordering_e wo) | |||
1299 | 1331 | ||
1300 | dc = rcu_dereference(bdev->disk_conf); | 1332 | dc = rcu_dereference(bdev->disk_conf); |
1301 | 1333 | ||
1302 | if (wo == WO_bdev_flush && !dc->disk_flushes) | 1334 | if (wo == WO_BDEV_FLUSH && !dc->disk_flushes) |
1303 | wo = WO_drain_io; | 1335 | wo = WO_DRAIN_IO; |
1304 | if (wo == WO_drain_io && !dc->disk_drain) | 1336 | if (wo == WO_DRAIN_IO && !dc->disk_drain) |
1305 | wo = WO_none; | 1337 | wo = WO_NONE; |
1306 | 1338 | ||
1307 | return wo; | 1339 | return wo; |
1308 | } | 1340 | } |
@@ -1319,13 +1351,13 @@ void drbd_bump_write_ordering(struct drbd_resource *resource, struct drbd_backin | |||
1319 | enum write_ordering_e pwo; | 1351 | enum write_ordering_e pwo; |
1320 | int vnr; | 1352 | int vnr; |
1321 | static char *write_ordering_str[] = { | 1353 | static char *write_ordering_str[] = { |
1322 | [WO_none] = "none", | 1354 | [WO_NONE] = "none", |
1323 | [WO_drain_io] = "drain", | 1355 | [WO_DRAIN_IO] = "drain", |
1324 | [WO_bdev_flush] = "flush", | 1356 | [WO_BDEV_FLUSH] = "flush", |
1325 | }; | 1357 | }; |
1326 | 1358 | ||
1327 | pwo = resource->write_ordering; | 1359 | pwo = resource->write_ordering; |
1328 | if (wo != WO_bdev_flush) | 1360 | if (wo != WO_BDEV_FLUSH) |
1329 | wo = min(pwo, wo); | 1361 | wo = min(pwo, wo); |
1330 | rcu_read_lock(); | 1362 | rcu_read_lock(); |
1331 | idr_for_each_entry(&resource->devices, device, vnr) { | 1363 | idr_for_each_entry(&resource->devices, device, vnr) { |
@@ -1343,7 +1375,7 @@ void drbd_bump_write_ordering(struct drbd_resource *resource, struct drbd_backin | |||
1343 | rcu_read_unlock(); | 1375 | rcu_read_unlock(); |
1344 | 1376 | ||
1345 | resource->write_ordering = wo; | 1377 | resource->write_ordering = wo; |
1346 | if (pwo != resource->write_ordering || wo == WO_bdev_flush) | 1378 | if (pwo != resource->write_ordering || wo == WO_BDEV_FLUSH) |
1347 | drbd_info(resource, "Method to ensure write ordering: %s\n", write_ordering_str[resource->write_ordering]); | 1379 | drbd_info(resource, "Method to ensure write ordering: %s\n", write_ordering_str[resource->write_ordering]); |
1348 | } | 1380 | } |
1349 | 1381 | ||
@@ -1380,7 +1412,7 @@ int drbd_submit_peer_request(struct drbd_device *device, | |||
1380 | if (peer_req->flags & EE_IS_TRIM_USE_ZEROOUT) { | 1412 | if (peer_req->flags & EE_IS_TRIM_USE_ZEROOUT) { |
1381 | /* wait for all pending IO completions, before we start | 1413 | /* wait for all pending IO completions, before we start |
1382 | * zeroing things out. */ | 1414 | * zeroing things out. */ |
1383 | conn_wait_active_ee_empty(first_peer_device(device)->connection); | 1415 | conn_wait_active_ee_empty(peer_req->peer_device->connection); |
1384 | /* add it to the active list now, | 1416 | /* add it to the active list now, |
1385 | * so we can find it to present it in debugfs */ | 1417 | * so we can find it to present it in debugfs */ |
1386 | peer_req->submit_jif = jiffies; | 1418 | peer_req->submit_jif = jiffies; |
@@ -1508,12 +1540,6 @@ static void conn_wait_active_ee_empty(struct drbd_connection *connection) | |||
1508 | rcu_read_unlock(); | 1540 | rcu_read_unlock(); |
1509 | } | 1541 | } |
1510 | 1542 | ||
1511 | static struct drbd_peer_device * | ||
1512 | conn_peer_device(struct drbd_connection *connection, int volume_number) | ||
1513 | { | ||
1514 | return idr_find(&connection->peer_devices, volume_number); | ||
1515 | } | ||
1516 | |||
1517 | static int receive_Barrier(struct drbd_connection *connection, struct packet_info *pi) | 1543 | static int receive_Barrier(struct drbd_connection *connection, struct packet_info *pi) |
1518 | { | 1544 | { |
1519 | int rv; | 1545 | int rv; |
@@ -1533,7 +1559,7 @@ static int receive_Barrier(struct drbd_connection *connection, struct packet_inf | |||
1533 | * Therefore we must send the barrier_ack after the barrier request was | 1559 | * Therefore we must send the barrier_ack after the barrier request was |
1534 | * completed. */ | 1560 | * completed. */ |
1535 | switch (connection->resource->write_ordering) { | 1561 | switch (connection->resource->write_ordering) { |
1536 | case WO_none: | 1562 | case WO_NONE: |
1537 | if (rv == FE_RECYCLED) | 1563 | if (rv == FE_RECYCLED) |
1538 | return 0; | 1564 | return 0; |
1539 | 1565 | ||
@@ -1546,8 +1572,8 @@ static int receive_Barrier(struct drbd_connection *connection, struct packet_inf | |||
1546 | drbd_warn(connection, "Allocation of an epoch failed, slowing down\n"); | 1572 | drbd_warn(connection, "Allocation of an epoch failed, slowing down\n"); |
1547 | /* Fall through */ | 1573 | /* Fall through */ |
1548 | 1574 | ||
1549 | case WO_bdev_flush: | 1575 | case WO_BDEV_FLUSH: |
1550 | case WO_drain_io: | 1576 | case WO_DRAIN_IO: |
1551 | conn_wait_active_ee_empty(connection); | 1577 | conn_wait_active_ee_empty(connection); |
1552 | drbd_flush(connection); | 1578 | drbd_flush(connection); |
1553 | 1579 | ||
@@ -1752,7 +1778,7 @@ static int recv_dless_read(struct drbd_peer_device *peer_device, struct drbd_req | |||
1752 | } | 1778 | } |
1753 | 1779 | ||
1754 | /* | 1780 | /* |
1755 | * e_end_resync_block() is called in asender context via | 1781 | * e_end_resync_block() is called in ack_sender context via |
1756 | * drbd_finish_peer_reqs(). | 1782 | * drbd_finish_peer_reqs(). |
1757 | */ | 1783 | */ |
1758 | static int e_end_resync_block(struct drbd_work *w, int unused) | 1784 | static int e_end_resync_block(struct drbd_work *w, int unused) |
@@ -1926,7 +1952,7 @@ static void restart_conflicting_writes(struct drbd_device *device, | |||
1926 | } | 1952 | } |
1927 | 1953 | ||
1928 | /* | 1954 | /* |
1929 | * e_end_block() is called in asender context via drbd_finish_peer_reqs(). | 1955 | * e_end_block() is called in ack_sender context via drbd_finish_peer_reqs(). |
1930 | */ | 1956 | */ |
1931 | static int e_end_block(struct drbd_work *w, int cancel) | 1957 | static int e_end_block(struct drbd_work *w, int cancel) |
1932 | { | 1958 | { |
@@ -1966,7 +1992,7 @@ static int e_end_block(struct drbd_work *w, int cancel) | |||
1966 | } else | 1992 | } else |
1967 | D_ASSERT(device, drbd_interval_empty(&peer_req->i)); | 1993 | D_ASSERT(device, drbd_interval_empty(&peer_req->i)); |
1968 | 1994 | ||
1969 | drbd_may_finish_epoch(first_peer_device(device)->connection, peer_req->epoch, EV_PUT + (cancel ? EV_CLEANUP : 0)); | 1995 | drbd_may_finish_epoch(peer_device->connection, peer_req->epoch, EV_PUT + (cancel ? EV_CLEANUP : 0)); |
1970 | 1996 | ||
1971 | return err; | 1997 | return err; |
1972 | } | 1998 | } |
@@ -2098,7 +2124,7 @@ static int wait_for_and_update_peer_seq(struct drbd_peer_device *peer_device, co | |||
2098 | } | 2124 | } |
2099 | 2125 | ||
2100 | rcu_read_lock(); | 2126 | rcu_read_lock(); |
2101 | tp = rcu_dereference(first_peer_device(device)->connection->net_conf)->two_primaries; | 2127 | tp = rcu_dereference(peer_device->connection->net_conf)->two_primaries; |
2102 | rcu_read_unlock(); | 2128 | rcu_read_unlock(); |
2103 | 2129 | ||
2104 | if (!tp) | 2130 | if (!tp) |
@@ -2217,7 +2243,7 @@ static int handle_write_conflicts(struct drbd_device *device, | |||
2217 | peer_req->w.cb = superseded ? e_send_superseded : | 2243 | peer_req->w.cb = superseded ? e_send_superseded : |
2218 | e_send_retry_write; | 2244 | e_send_retry_write; |
2219 | list_add_tail(&peer_req->w.list, &device->done_ee); | 2245 | list_add_tail(&peer_req->w.list, &device->done_ee); |
2220 | wake_asender(connection); | 2246 | queue_work(connection->ack_sender, &peer_req->peer_device->send_acks_work); |
2221 | 2247 | ||
2222 | err = -ENOENT; | 2248 | err = -ENOENT; |
2223 | goto out; | 2249 | goto out; |
@@ -2364,7 +2390,7 @@ static int receive_Data(struct drbd_connection *connection, struct packet_info * | |||
2364 | if (dp_flags & DP_SEND_RECEIVE_ACK) { | 2390 | if (dp_flags & DP_SEND_RECEIVE_ACK) { |
2365 | /* I really don't like it that the receiver thread | 2391 | /* I really don't like it that the receiver thread |
2366 | * sends on the msock, but anyways */ | 2392 | * sends on the msock, but anyways */ |
2367 | drbd_send_ack(first_peer_device(device), P_RECV_ACK, peer_req); | 2393 | drbd_send_ack(peer_device, P_RECV_ACK, peer_req); |
2368 | } | 2394 | } |
2369 | 2395 | ||
2370 | if (tp) { | 2396 | if (tp) { |
@@ -4056,7 +4082,7 @@ static int receive_state(struct drbd_connection *connection, struct packet_info | |||
4056 | os = ns = drbd_read_state(device); | 4082 | os = ns = drbd_read_state(device); |
4057 | spin_unlock_irq(&device->resource->req_lock); | 4083 | spin_unlock_irq(&device->resource->req_lock); |
4058 | 4084 | ||
4059 | /* If some other part of the code (asender thread, timeout) | 4085 | /* If some other part of the code (ack_receiver thread, timeout) |
4060 | * already decided to close the connection again, | 4086 | * already decided to close the connection again, |
4061 | * we must not "re-establish" it here. */ | 4087 | * we must not "re-establish" it here. */ |
4062 | if (os.conn <= C_TEAR_DOWN) | 4088 | if (os.conn <= C_TEAR_DOWN) |
@@ -4661,8 +4687,12 @@ static void conn_disconnect(struct drbd_connection *connection) | |||
4661 | */ | 4687 | */ |
4662 | conn_request_state(connection, NS(conn, C_NETWORK_FAILURE), CS_HARD); | 4688 | conn_request_state(connection, NS(conn, C_NETWORK_FAILURE), CS_HARD); |
4663 | 4689 | ||
4664 | /* asender does not clean up anything. it must not interfere, either */ | 4690 | /* ack_receiver does not clean up anything. it must not interfere, either */ |
4665 | drbd_thread_stop(&connection->asender); | 4691 | drbd_thread_stop(&connection->ack_receiver); |
4692 | if (connection->ack_sender) { | ||
4693 | destroy_workqueue(connection->ack_sender); | ||
4694 | connection->ack_sender = NULL; | ||
4695 | } | ||
4666 | drbd_free_sock(connection); | 4696 | drbd_free_sock(connection); |
4667 | 4697 | ||
4668 | rcu_read_lock(); | 4698 | rcu_read_lock(); |
@@ -5431,49 +5461,39 @@ static int got_skip(struct drbd_connection *connection, struct packet_info *pi) | |||
5431 | return 0; | 5461 | return 0; |
5432 | } | 5462 | } |
5433 | 5463 | ||
5434 | static int connection_finish_peer_reqs(struct drbd_connection *connection) | 5464 | struct meta_sock_cmd { |
5465 | size_t pkt_size; | ||
5466 | int (*fn)(struct drbd_connection *connection, struct packet_info *); | ||
5467 | }; | ||
5468 | |||
5469 | static void set_rcvtimeo(struct drbd_connection *connection, bool ping_timeout) | ||
5435 | { | 5470 | { |
5436 | struct drbd_peer_device *peer_device; | 5471 | long t; |
5437 | int vnr, not_empty = 0; | 5472 | struct net_conf *nc; |
5438 | 5473 | ||
5439 | do { | 5474 | rcu_read_lock(); |
5440 | clear_bit(SIGNAL_ASENDER, &connection->flags); | 5475 | nc = rcu_dereference(connection->net_conf); |
5441 | flush_signals(current); | 5476 | t = ping_timeout ? nc->ping_timeo : nc->ping_int; |
5477 | rcu_read_unlock(); | ||
5442 | 5478 | ||
5443 | rcu_read_lock(); | 5479 | t *= HZ; |
5444 | idr_for_each_entry(&connection->peer_devices, peer_device, vnr) { | 5480 | if (ping_timeout) |
5445 | struct drbd_device *device = peer_device->device; | 5481 | t /= 10; |
5446 | kref_get(&device->kref); | ||
5447 | rcu_read_unlock(); | ||
5448 | if (drbd_finish_peer_reqs(device)) { | ||
5449 | kref_put(&device->kref, drbd_destroy_device); | ||
5450 | return 1; | ||
5451 | } | ||
5452 | kref_put(&device->kref, drbd_destroy_device); | ||
5453 | rcu_read_lock(); | ||
5454 | } | ||
5455 | set_bit(SIGNAL_ASENDER, &connection->flags); | ||
5456 | 5482 | ||
5457 | spin_lock_irq(&connection->resource->req_lock); | 5483 | connection->meta.socket->sk->sk_rcvtimeo = t; |
5458 | idr_for_each_entry(&connection->peer_devices, peer_device, vnr) { | 5484 | } |
5459 | struct drbd_device *device = peer_device->device; | ||
5460 | not_empty = !list_empty(&device->done_ee); | ||
5461 | if (not_empty) | ||
5462 | break; | ||
5463 | } | ||
5464 | spin_unlock_irq(&connection->resource->req_lock); | ||
5465 | rcu_read_unlock(); | ||
5466 | } while (not_empty); | ||
5467 | 5485 | ||
5468 | return 0; | 5486 | static void set_ping_timeout(struct drbd_connection *connection) |
5487 | { | ||
5488 | set_rcvtimeo(connection, 1); | ||
5469 | } | 5489 | } |
5470 | 5490 | ||
5471 | struct asender_cmd { | 5491 | static void set_idle_timeout(struct drbd_connection *connection) |
5472 | size_t pkt_size; | 5492 | { |
5473 | int (*fn)(struct drbd_connection *connection, struct packet_info *); | 5493 | set_rcvtimeo(connection, 0); |
5474 | }; | 5494 | } |
5475 | 5495 | ||
5476 | static struct asender_cmd asender_tbl[] = { | 5496 | static struct meta_sock_cmd ack_receiver_tbl[] = { |
5477 | [P_PING] = { 0, got_Ping }, | 5497 | [P_PING] = { 0, got_Ping }, |
5478 | [P_PING_ACK] = { 0, got_PingAck }, | 5498 | [P_PING_ACK] = { 0, got_PingAck }, |
5479 | [P_RECV_ACK] = { sizeof(struct p_block_ack), got_BlockAck }, | 5499 | [P_RECV_ACK] = { sizeof(struct p_block_ack), got_BlockAck }, |
@@ -5493,64 +5513,40 @@ static struct asender_cmd asender_tbl[] = { | |||
5493 | [P_RETRY_WRITE] = { sizeof(struct p_block_ack), got_BlockAck }, | 5513 | [P_RETRY_WRITE] = { sizeof(struct p_block_ack), got_BlockAck }, |
5494 | }; | 5514 | }; |
5495 | 5515 | ||
5496 | int drbd_asender(struct drbd_thread *thi) | 5516 | int drbd_ack_receiver(struct drbd_thread *thi) |
5497 | { | 5517 | { |
5498 | struct drbd_connection *connection = thi->connection; | 5518 | struct drbd_connection *connection = thi->connection; |
5499 | struct asender_cmd *cmd = NULL; | 5519 | struct meta_sock_cmd *cmd = NULL; |
5500 | struct packet_info pi; | 5520 | struct packet_info pi; |
5521 | unsigned long pre_recv_jif; | ||
5501 | int rv; | 5522 | int rv; |
5502 | void *buf = connection->meta.rbuf; | 5523 | void *buf = connection->meta.rbuf; |
5503 | int received = 0; | 5524 | int received = 0; |
5504 | unsigned int header_size = drbd_header_size(connection); | 5525 | unsigned int header_size = drbd_header_size(connection); |
5505 | int expect = header_size; | 5526 | int expect = header_size; |
5506 | bool ping_timeout_active = false; | 5527 | bool ping_timeout_active = false; |
5507 | struct net_conf *nc; | ||
5508 | int ping_timeo, tcp_cork, ping_int; | ||
5509 | struct sched_param param = { .sched_priority = 2 }; | 5528 | struct sched_param param = { .sched_priority = 2 }; |
5510 | 5529 | ||
5511 | rv = sched_setscheduler(current, SCHED_RR, ¶m); | 5530 | rv = sched_setscheduler(current, SCHED_RR, ¶m); |
5512 | if (rv < 0) | 5531 | if (rv < 0) |
5513 | drbd_err(connection, "drbd_asender: ERROR set priority, ret=%d\n", rv); | 5532 | drbd_err(connection, "drbd_ack_receiver: ERROR set priority, ret=%d\n", rv); |
5514 | 5533 | ||
5515 | while (get_t_state(thi) == RUNNING) { | 5534 | while (get_t_state(thi) == RUNNING) { |
5516 | drbd_thread_current_set_cpu(thi); | 5535 | drbd_thread_current_set_cpu(thi); |
5517 | 5536 | ||
5518 | rcu_read_lock(); | 5537 | conn_reclaim_net_peer_reqs(connection); |
5519 | nc = rcu_dereference(connection->net_conf); | ||
5520 | ping_timeo = nc->ping_timeo; | ||
5521 | tcp_cork = nc->tcp_cork; | ||
5522 | ping_int = nc->ping_int; | ||
5523 | rcu_read_unlock(); | ||
5524 | 5538 | ||
5525 | if (test_and_clear_bit(SEND_PING, &connection->flags)) { | 5539 | if (test_and_clear_bit(SEND_PING, &connection->flags)) { |
5526 | if (drbd_send_ping(connection)) { | 5540 | if (drbd_send_ping(connection)) { |
5527 | drbd_err(connection, "drbd_send_ping has failed\n"); | 5541 | drbd_err(connection, "drbd_send_ping has failed\n"); |
5528 | goto reconnect; | 5542 | goto reconnect; |
5529 | } | 5543 | } |
5530 | connection->meta.socket->sk->sk_rcvtimeo = ping_timeo * HZ / 10; | 5544 | set_ping_timeout(connection); |
5531 | ping_timeout_active = true; | 5545 | ping_timeout_active = true; |
5532 | } | 5546 | } |
5533 | 5547 | ||
5534 | /* TODO: conditionally cork; it may hurt latency if we cork without | 5548 | pre_recv_jif = jiffies; |
5535 | much to send */ | ||
5536 | if (tcp_cork) | ||
5537 | drbd_tcp_cork(connection->meta.socket); | ||
5538 | if (connection_finish_peer_reqs(connection)) { | ||
5539 | drbd_err(connection, "connection_finish_peer_reqs() failed\n"); | ||
5540 | goto reconnect; | ||
5541 | } | ||
5542 | /* but unconditionally uncork unless disabled */ | ||
5543 | if (tcp_cork) | ||
5544 | drbd_tcp_uncork(connection->meta.socket); | ||
5545 | |||
5546 | /* short circuit, recv_msg would return EINTR anyways. */ | ||
5547 | if (signal_pending(current)) | ||
5548 | continue; | ||
5549 | |||
5550 | rv = drbd_recv_short(connection->meta.socket, buf, expect-received, 0); | 5549 | rv = drbd_recv_short(connection->meta.socket, buf, expect-received, 0); |
5551 | clear_bit(SIGNAL_ASENDER, &connection->flags); | ||
5552 | |||
5553 | flush_signals(current); | ||
5554 | 5550 | ||
5555 | /* Note: | 5551 | /* Note: |
5556 | * -EINTR (on meta) we got a signal | 5552 | * -EINTR (on meta) we got a signal |
@@ -5562,7 +5558,6 @@ int drbd_asender(struct drbd_thread *thi) | |||
5562 | * rv < expected: "woken" by signal during receive | 5558 | * rv < expected: "woken" by signal during receive |
5563 | * rv == 0 : "connection shut down by peer" | 5559 | * rv == 0 : "connection shut down by peer" |
5564 | */ | 5560 | */ |
5565 | received_more: | ||
5566 | if (likely(rv > 0)) { | 5561 | if (likely(rv > 0)) { |
5567 | received += rv; | 5562 | received += rv; |
5568 | buf += rv; | 5563 | buf += rv; |
@@ -5584,8 +5579,7 @@ received_more: | |||
5584 | } else if (rv == -EAGAIN) { | 5579 | } else if (rv == -EAGAIN) { |
5585 | /* If the data socket received something meanwhile, | 5580 | /* If the data socket received something meanwhile, |
5586 | * that is good enough: peer is still alive. */ | 5581 | * that is good enough: peer is still alive. */ |
5587 | if (time_after(connection->last_received, | 5582 | if (time_after(connection->last_received, pre_recv_jif)) |
5588 | jiffies - connection->meta.socket->sk->sk_rcvtimeo)) | ||
5589 | continue; | 5583 | continue; |
5590 | if (ping_timeout_active) { | 5584 | if (ping_timeout_active) { |
5591 | drbd_err(connection, "PingAck did not arrive in time.\n"); | 5585 | drbd_err(connection, "PingAck did not arrive in time.\n"); |
@@ -5594,6 +5588,10 @@ received_more: | |||
5594 | set_bit(SEND_PING, &connection->flags); | 5588 | set_bit(SEND_PING, &connection->flags); |
5595 | continue; | 5589 | continue; |
5596 | } else if (rv == -EINTR) { | 5590 | } else if (rv == -EINTR) { |
5591 | /* maybe drbd_thread_stop(): the while condition will notice. | ||
5592 | * maybe woken for send_ping: we'll send a ping above, | ||
5593 | * and change the rcvtimeo */ | ||
5594 | flush_signals(current); | ||
5597 | continue; | 5595 | continue; |
5598 | } else { | 5596 | } else { |
5599 | drbd_err(connection, "sock_recvmsg returned %d\n", rv); | 5597 | drbd_err(connection, "sock_recvmsg returned %d\n", rv); |
@@ -5603,8 +5601,8 @@ received_more: | |||
5603 | if (received == expect && cmd == NULL) { | 5601 | if (received == expect && cmd == NULL) { |
5604 | if (decode_header(connection, connection->meta.rbuf, &pi)) | 5602 | if (decode_header(connection, connection->meta.rbuf, &pi)) |
5605 | goto reconnect; | 5603 | goto reconnect; |
5606 | cmd = &asender_tbl[pi.cmd]; | 5604 | cmd = &ack_receiver_tbl[pi.cmd]; |
5607 | if (pi.cmd >= ARRAY_SIZE(asender_tbl) || !cmd->fn) { | 5605 | if (pi.cmd >= ARRAY_SIZE(ack_receiver_tbl) || !cmd->fn) { |
5608 | drbd_err(connection, "Unexpected meta packet %s (0x%04x)\n", | 5606 | drbd_err(connection, "Unexpected meta packet %s (0x%04x)\n", |
5609 | cmdname(pi.cmd), pi.cmd); | 5607 | cmdname(pi.cmd), pi.cmd); |
5610 | goto disconnect; | 5608 | goto disconnect; |
@@ -5627,9 +5625,8 @@ received_more: | |||
5627 | 5625 | ||
5628 | connection->last_received = jiffies; | 5626 | connection->last_received = jiffies; |
5629 | 5627 | ||
5630 | if (cmd == &asender_tbl[P_PING_ACK]) { | 5628 | if (cmd == &ack_receiver_tbl[P_PING_ACK]) { |
5631 | /* restore idle timeout */ | 5629 | set_idle_timeout(connection); |
5632 | connection->meta.socket->sk->sk_rcvtimeo = ping_int * HZ; | ||
5633 | ping_timeout_active = false; | 5630 | ping_timeout_active = false; |
5634 | } | 5631 | } |
5635 | 5632 | ||
@@ -5638,11 +5635,6 @@ received_more: | |||
5638 | expect = header_size; | 5635 | expect = header_size; |
5639 | cmd = NULL; | 5636 | cmd = NULL; |
5640 | } | 5637 | } |
5641 | if (test_bit(SEND_PING, &connection->flags)) | ||
5642 | continue; | ||
5643 | rv = drbd_recv_short(connection->meta.socket, buf, expect-received, MSG_DONTWAIT); | ||
5644 | if (rv > 0) | ||
5645 | goto received_more; | ||
5646 | } | 5638 | } |
5647 | 5639 | ||
5648 | if (0) { | 5640 | if (0) { |
@@ -5654,9 +5646,41 @@ reconnect: | |||
5654 | disconnect: | 5646 | disconnect: |
5655 | conn_request_state(connection, NS(conn, C_DISCONNECTING), CS_HARD); | 5647 | conn_request_state(connection, NS(conn, C_DISCONNECTING), CS_HARD); |
5656 | } | 5648 | } |
5657 | clear_bit(SIGNAL_ASENDER, &connection->flags); | ||
5658 | 5649 | ||
5659 | drbd_info(connection, "asender terminated\n"); | 5650 | drbd_info(connection, "ack_receiver terminated\n"); |
5660 | 5651 | ||
5661 | return 0; | 5652 | return 0; |
5662 | } | 5653 | } |
5654 | |||
5655 | void drbd_send_acks_wf(struct work_struct *ws) | ||
5656 | { | ||
5657 | struct drbd_peer_device *peer_device = | ||
5658 | container_of(ws, struct drbd_peer_device, send_acks_work); | ||
5659 | struct drbd_connection *connection = peer_device->connection; | ||
5660 | struct drbd_device *device = peer_device->device; | ||
5661 | struct net_conf *nc; | ||
5662 | int tcp_cork, err; | ||
5663 | |||
5664 | rcu_read_lock(); | ||
5665 | nc = rcu_dereference(connection->net_conf); | ||
5666 | tcp_cork = nc->tcp_cork; | ||
5667 | rcu_read_unlock(); | ||
5668 | |||
5669 | if (tcp_cork) | ||
5670 | drbd_tcp_cork(connection->meta.socket); | ||
5671 | |||
5672 | err = drbd_finish_peer_reqs(device); | ||
5673 | kref_put(&device->kref, drbd_destroy_device); | ||
5674 | /* get is in drbd_endio_write_sec_final(). That is necessary to keep the | ||
5675 | struct work_struct send_acks_work alive, which is in the peer_device object */ | ||
5676 | |||
5677 | if (err) { | ||
5678 | conn_request_state(connection, NS(conn, C_NETWORK_FAILURE), CS_HARD); | ||
5679 | return; | ||
5680 | } | ||
5681 | |||
5682 | if (tcp_cork) | ||
5683 | drbd_tcp_uncork(connection->meta.socket); | ||
5684 | |||
5685 | return; | ||
5686 | } | ||
diff --git a/drivers/block/drbd/drbd_req.c b/drivers/block/drbd/drbd_req.c index 3ae2c0086563..2255dcfebd2b 100644 --- a/drivers/block/drbd/drbd_req.c +++ b/drivers/block/drbd/drbd_req.c | |||
@@ -453,12 +453,12 @@ static void mod_rq_state(struct drbd_request *req, struct bio_and_error *m, | |||
453 | kref_get(&req->kref); /* wait for the DONE */ | 453 | kref_get(&req->kref); /* wait for the DONE */ |
454 | 454 | ||
455 | if (!(s & RQ_NET_SENT) && (set & RQ_NET_SENT)) { | 455 | if (!(s & RQ_NET_SENT) && (set & RQ_NET_SENT)) { |
456 | /* potentially already completed in the asender thread */ | 456 | /* potentially already completed in the ack_receiver thread */ |
457 | if (!(s & RQ_NET_DONE)) { | 457 | if (!(s & RQ_NET_DONE)) { |
458 | atomic_add(req->i.size >> 9, &device->ap_in_flight); | 458 | atomic_add(req->i.size >> 9, &device->ap_in_flight); |
459 | set_if_null_req_not_net_done(peer_device, req); | 459 | set_if_null_req_not_net_done(peer_device, req); |
460 | } | 460 | } |
461 | if (s & RQ_NET_PENDING) | 461 | if (req->rq_state & RQ_NET_PENDING) |
462 | set_if_null_req_ack_pending(peer_device, req); | 462 | set_if_null_req_ack_pending(peer_device, req); |
463 | } | 463 | } |
464 | 464 | ||
@@ -1095,6 +1095,24 @@ static bool do_remote_read(struct drbd_request *req) | |||
1095 | return false; | 1095 | return false; |
1096 | } | 1096 | } |
1097 | 1097 | ||
1098 | bool drbd_should_do_remote(union drbd_dev_state s) | ||
1099 | { | ||
1100 | return s.pdsk == D_UP_TO_DATE || | ||
1101 | (s.pdsk >= D_INCONSISTENT && | ||
1102 | s.conn >= C_WF_BITMAP_T && | ||
1103 | s.conn < C_AHEAD); | ||
1104 | /* Before proto 96 that was >= CONNECTED instead of >= C_WF_BITMAP_T. | ||
1105 | That is equivalent since before 96 IO was frozen in the C_WF_BITMAP* | ||
1106 | states. */ | ||
1107 | } | ||
1108 | |||
1109 | static bool drbd_should_send_out_of_sync(union drbd_dev_state s) | ||
1110 | { | ||
1111 | return s.conn == C_AHEAD || s.conn == C_WF_BITMAP_S; | ||
1112 | /* pdsk = D_INCONSISTENT as a consequence. Protocol 96 check not necessary | ||
1113 | since we enter state C_AHEAD only if proto >= 96 */ | ||
1114 | } | ||
1115 | |||
1098 | /* returns number of connections (== 1, for drbd 8.4) | 1116 | /* returns number of connections (== 1, for drbd 8.4) |
1099 | * expected to actually write this data, | 1117 | * expected to actually write this data, |
1100 | * which does NOT include those that we are L_AHEAD for. */ | 1118 | * which does NOT include those that we are L_AHEAD for. */ |
@@ -1149,7 +1167,6 @@ drbd_submit_req_private_bio(struct drbd_request *req) | |||
1149 | * stable storage, and this is a WRITE, we may not even submit | 1167 | * stable storage, and this is a WRITE, we may not even submit |
1150 | * this bio. */ | 1168 | * this bio. */ |
1151 | if (get_ldev(device)) { | 1169 | if (get_ldev(device)) { |
1152 | req->pre_submit_jif = jiffies; | ||
1153 | if (drbd_insert_fault(device, | 1170 | if (drbd_insert_fault(device, |
1154 | rw == WRITE ? DRBD_FAULT_DT_WR | 1171 | rw == WRITE ? DRBD_FAULT_DT_WR |
1155 | : rw == READ ? DRBD_FAULT_DT_RD | 1172 | : rw == READ ? DRBD_FAULT_DT_RD |
@@ -1293,6 +1310,7 @@ static void drbd_send_and_submit(struct drbd_device *device, struct drbd_request | |||
1293 | &device->pending_master_completion[rw == WRITE]); | 1310 | &device->pending_master_completion[rw == WRITE]); |
1294 | if (req->private_bio) { | 1311 | if (req->private_bio) { |
1295 | /* needs to be marked within the same spinlock */ | 1312 | /* needs to be marked within the same spinlock */ |
1313 | req->pre_submit_jif = jiffies; | ||
1296 | list_add_tail(&req->req_pending_local, | 1314 | list_add_tail(&req->req_pending_local, |
1297 | &device->pending_completion[rw == WRITE]); | 1315 | &device->pending_completion[rw == WRITE]); |
1298 | _req_mod(req, TO_BE_SUBMITTED); | 1316 | _req_mod(req, TO_BE_SUBMITTED); |
@@ -1513,6 +1531,78 @@ blk_qc_t drbd_make_request(struct request_queue *q, struct bio *bio) | |||
1513 | return BLK_QC_T_NONE; | 1531 | return BLK_QC_T_NONE; |
1514 | } | 1532 | } |
1515 | 1533 | ||
1534 | static bool net_timeout_reached(struct drbd_request *net_req, | ||
1535 | struct drbd_connection *connection, | ||
1536 | unsigned long now, unsigned long ent, | ||
1537 | unsigned int ko_count, unsigned int timeout) | ||
1538 | { | ||
1539 | struct drbd_device *device = net_req->device; | ||
1540 | |||
1541 | if (!time_after(now, net_req->pre_send_jif + ent)) | ||
1542 | return false; | ||
1543 | |||
1544 | if (time_in_range(now, connection->last_reconnect_jif, connection->last_reconnect_jif + ent)) | ||
1545 | return false; | ||
1546 | |||
1547 | if (net_req->rq_state & RQ_NET_PENDING) { | ||
1548 | drbd_warn(device, "Remote failed to finish a request within %ums > ko-count (%u) * timeout (%u * 0.1s)\n", | ||
1549 | jiffies_to_msecs(now - net_req->pre_send_jif), ko_count, timeout); | ||
1550 | return true; | ||
1551 | } | ||
1552 | |||
1553 | /* We received an ACK already (or are using protocol A), | ||
1554 | * but are waiting for the epoch closing barrier ack. | ||
1555 | * Check if we sent the barrier already. We should not blame the peer | ||
1556 | * for being unresponsive, if we did not even ask it yet. */ | ||
1557 | if (net_req->epoch == connection->send.current_epoch_nr) { | ||
1558 | drbd_warn(device, | ||
1559 | "We did not send a P_BARRIER for %ums > ko-count (%u) * timeout (%u * 0.1s); drbd kernel thread blocked?\n", | ||
1560 | jiffies_to_msecs(now - net_req->pre_send_jif), ko_count, timeout); | ||
1561 | return false; | ||
1562 | } | ||
1563 | |||
1564 | /* Worst case: we may have been blocked for whatever reason, then | ||
1565 | * suddenly are able to send a lot of requests (and epoch separating | ||
1566 | * barriers) in quick succession. | ||
1567 | * The timestamp of the net_req may be much too old and not correspond | ||
1568 | * to the sending time of the relevant unack'ed barrier packet, so | ||
1569 | * would trigger a spurious timeout. The latest barrier packet may | ||
1570 | * have a too recent timestamp to trigger the timeout, potentially miss | ||
1571 | * a timeout. Right now we don't have a place to conveniently store | ||
1572 | * these timestamps. | ||
1573 | * But in this particular situation, the application requests are still | ||
1574 | * completed to upper layers, DRBD should still "feel" responsive. | ||
1575 | * No need yet to kill this connection, it may still recover. | ||
1576 | * If not, eventually we will have queued enough into the network for | ||
1577 | * us to block. From that point of view, the timestamp of the last sent | ||
1578 | * barrier packet is relevant enough. | ||
1579 | */ | ||
1580 | if (time_after(now, connection->send.last_sent_barrier_jif + ent)) { | ||
1581 | drbd_warn(device, "Remote failed to answer a P_BARRIER (sent at %lu jif; now=%lu jif) within %ums > ko-count (%u) * timeout (%u * 0.1s)\n", | ||
1582 | connection->send.last_sent_barrier_jif, now, | ||
1583 | jiffies_to_msecs(now - connection->send.last_sent_barrier_jif), ko_count, timeout); | ||
1584 | return true; | ||
1585 | } | ||
1586 | return false; | ||
1587 | } | ||
1588 | |||
1589 | /* A request is considered timed out, if | ||
1590 | * - we have some effective timeout from the configuration, | ||
1591 | * with some state restrictions applied, | ||
1592 | * - the oldest request is waiting for a response from the network | ||
1593 | * resp. the local disk, | ||
1594 | * - the oldest request is in fact older than the effective timeout, | ||
1595 | * - the connection was established (resp. disk was attached) | ||
1596 | * for longer than the timeout already. | ||
1597 | * Note that for 32bit jiffies and very stable connections/disks, | ||
1598 | * we may have a wrap around, which is catched by | ||
1599 | * !time_in_range(now, last_..._jif, last_..._jif + timeout). | ||
1600 | * | ||
1601 | * Side effect: once per 32bit wrap-around interval, which means every | ||
1602 | * ~198 days with 250 HZ, we have a window where the timeout would need | ||
1603 | * to expire twice (worst case) to become effective. Good enough. | ||
1604 | */ | ||
1605 | |||
1516 | void request_timer_fn(unsigned long data) | 1606 | void request_timer_fn(unsigned long data) |
1517 | { | 1607 | { |
1518 | struct drbd_device *device = (struct drbd_device *) data; | 1608 | struct drbd_device *device = (struct drbd_device *) data; |
@@ -1522,11 +1612,14 @@ void request_timer_fn(unsigned long data) | |||
1522 | unsigned long oldest_submit_jif; | 1612 | unsigned long oldest_submit_jif; |
1523 | unsigned long ent = 0, dt = 0, et, nt; /* effective timeout = ko_count * timeout */ | 1613 | unsigned long ent = 0, dt = 0, et, nt; /* effective timeout = ko_count * timeout */ |
1524 | unsigned long now; | 1614 | unsigned long now; |
1615 | unsigned int ko_count = 0, timeout = 0; | ||
1525 | 1616 | ||
1526 | rcu_read_lock(); | 1617 | rcu_read_lock(); |
1527 | nc = rcu_dereference(connection->net_conf); | 1618 | nc = rcu_dereference(connection->net_conf); |
1528 | if (nc && device->state.conn >= C_WF_REPORT_PARAMS) | 1619 | if (nc && device->state.conn >= C_WF_REPORT_PARAMS) { |
1529 | ent = nc->timeout * HZ/10 * nc->ko_count; | 1620 | ko_count = nc->ko_count; |
1621 | timeout = nc->timeout; | ||
1622 | } | ||
1530 | 1623 | ||
1531 | if (get_ldev(device)) { /* implicit state.disk >= D_INCONSISTENT */ | 1624 | if (get_ldev(device)) { /* implicit state.disk >= D_INCONSISTENT */ |
1532 | dt = rcu_dereference(device->ldev->disk_conf)->disk_timeout * HZ / 10; | 1625 | dt = rcu_dereference(device->ldev->disk_conf)->disk_timeout * HZ / 10; |
@@ -1534,6 +1627,8 @@ void request_timer_fn(unsigned long data) | |||
1534 | } | 1627 | } |
1535 | rcu_read_unlock(); | 1628 | rcu_read_unlock(); |
1536 | 1629 | ||
1630 | |||
1631 | ent = timeout * HZ/10 * ko_count; | ||
1537 | et = min_not_zero(dt, ent); | 1632 | et = min_not_zero(dt, ent); |
1538 | 1633 | ||
1539 | if (!et) | 1634 | if (!et) |
@@ -1545,11 +1640,22 @@ void request_timer_fn(unsigned long data) | |||
1545 | spin_lock_irq(&device->resource->req_lock); | 1640 | spin_lock_irq(&device->resource->req_lock); |
1546 | req_read = list_first_entry_or_null(&device->pending_completion[0], struct drbd_request, req_pending_local); | 1641 | req_read = list_first_entry_or_null(&device->pending_completion[0], struct drbd_request, req_pending_local); |
1547 | req_write = list_first_entry_or_null(&device->pending_completion[1], struct drbd_request, req_pending_local); | 1642 | req_write = list_first_entry_or_null(&device->pending_completion[1], struct drbd_request, req_pending_local); |
1548 | req_peer = connection->req_not_net_done; | 1643 | |
1549 | /* maybe the oldest request waiting for the peer is in fact still | 1644 | /* maybe the oldest request waiting for the peer is in fact still |
1550 | * blocking in tcp sendmsg */ | 1645 | * blocking in tcp sendmsg. That's ok, though, that's handled via the |
1551 | if (!req_peer && connection->req_next && connection->req_next->pre_send_jif) | 1646 | * socket send timeout, requesting a ping, and bumping ko-count in |
1552 | req_peer = connection->req_next; | 1647 | * we_should_drop_the_connection(). |
1648 | */ | ||
1649 | |||
1650 | /* check the oldest request we did successfully sent, | ||
1651 | * but which is still waiting for an ACK. */ | ||
1652 | req_peer = connection->req_ack_pending; | ||
1653 | |||
1654 | /* if we don't have such request (e.g. protocoll A) | ||
1655 | * check the oldest requests which is still waiting on its epoch | ||
1656 | * closing barrier ack. */ | ||
1657 | if (!req_peer) | ||
1658 | req_peer = connection->req_not_net_done; | ||
1553 | 1659 | ||
1554 | /* evaluate the oldest peer request only in one timer! */ | 1660 | /* evaluate the oldest peer request only in one timer! */ |
1555 | if (req_peer && req_peer->device != device) | 1661 | if (req_peer && req_peer->device != device) |
@@ -1566,28 +1672,9 @@ void request_timer_fn(unsigned long data) | |||
1566 | : req_write ? req_write->pre_submit_jif | 1672 | : req_write ? req_write->pre_submit_jif |
1567 | : req_read ? req_read->pre_submit_jif : now; | 1673 | : req_read ? req_read->pre_submit_jif : now; |
1568 | 1674 | ||
1569 | /* The request is considered timed out, if | 1675 | if (ent && req_peer && net_timeout_reached(req_peer, connection, now, ent, ko_count, timeout)) |
1570 | * - we have some effective timeout from the configuration, | ||
1571 | * with above state restrictions applied, | ||
1572 | * - the oldest request is waiting for a response from the network | ||
1573 | * resp. the local disk, | ||
1574 | * - the oldest request is in fact older than the effective timeout, | ||
1575 | * - the connection was established (resp. disk was attached) | ||
1576 | * for longer than the timeout already. | ||
1577 | * Note that for 32bit jiffies and very stable connections/disks, | ||
1578 | * we may have a wrap around, which is catched by | ||
1579 | * !time_in_range(now, last_..._jif, last_..._jif + timeout). | ||
1580 | * | ||
1581 | * Side effect: once per 32bit wrap-around interval, which means every | ||
1582 | * ~198 days with 250 HZ, we have a window where the timeout would need | ||
1583 | * to expire twice (worst case) to become effective. Good enough. | ||
1584 | */ | ||
1585 | if (ent && req_peer && | ||
1586 | time_after(now, req_peer->pre_send_jif + ent) && | ||
1587 | !time_in_range(now, connection->last_reconnect_jif, connection->last_reconnect_jif + ent)) { | ||
1588 | drbd_warn(device, "Remote failed to finish a request within ko-count * timeout\n"); | ||
1589 | _conn_request_state(connection, NS(conn, C_TIMEOUT), CS_VERBOSE | CS_HARD); | 1676 | _conn_request_state(connection, NS(conn, C_TIMEOUT), CS_VERBOSE | CS_HARD); |
1590 | } | 1677 | |
1591 | if (dt && oldest_submit_jif != now && | 1678 | if (dt && oldest_submit_jif != now && |
1592 | time_after(now, oldest_submit_jif + dt) && | 1679 | time_after(now, oldest_submit_jif + dt) && |
1593 | !time_in_range(now, device->last_reattach_jif, device->last_reattach_jif + dt)) { | 1680 | !time_in_range(now, device->last_reattach_jif, device->last_reattach_jif + dt)) { |
diff --git a/drivers/block/drbd/drbd_req.h b/drivers/block/drbd/drbd_req.h index 9f6a04080e9f..bb2ef78165e5 100644 --- a/drivers/block/drbd/drbd_req.h +++ b/drivers/block/drbd/drbd_req.h | |||
@@ -331,21 +331,6 @@ static inline int req_mod(struct drbd_request *req, | |||
331 | return rv; | 331 | return rv; |
332 | } | 332 | } |
333 | 333 | ||
334 | static inline bool drbd_should_do_remote(union drbd_dev_state s) | 334 | extern bool drbd_should_do_remote(union drbd_dev_state); |
335 | { | ||
336 | return s.pdsk == D_UP_TO_DATE || | ||
337 | (s.pdsk >= D_INCONSISTENT && | ||
338 | s.conn >= C_WF_BITMAP_T && | ||
339 | s.conn < C_AHEAD); | ||
340 | /* Before proto 96 that was >= CONNECTED instead of >= C_WF_BITMAP_T. | ||
341 | That is equivalent since before 96 IO was frozen in the C_WF_BITMAP* | ||
342 | states. */ | ||
343 | } | ||
344 | static inline bool drbd_should_send_out_of_sync(union drbd_dev_state s) | ||
345 | { | ||
346 | return s.conn == C_AHEAD || s.conn == C_WF_BITMAP_S; | ||
347 | /* pdsk = D_INCONSISTENT as a consequence. Protocol 96 check not necessary | ||
348 | since we enter state C_AHEAD only if proto >= 96 */ | ||
349 | } | ||
350 | 335 | ||
351 | #endif | 336 | #endif |
diff --git a/drivers/block/drbd/drbd_state.c b/drivers/block/drbd/drbd_state.c index 2d7dd269b6a8..5a7ef7873b67 100644 --- a/drivers/block/drbd/drbd_state.c +++ b/drivers/block/drbd/drbd_state.c | |||
@@ -29,6 +29,7 @@ | |||
29 | #include "drbd_int.h" | 29 | #include "drbd_int.h" |
30 | #include "drbd_protocol.h" | 30 | #include "drbd_protocol.h" |
31 | #include "drbd_req.h" | 31 | #include "drbd_req.h" |
32 | #include "drbd_state_change.h" | ||
32 | 33 | ||
33 | struct after_state_chg_work { | 34 | struct after_state_chg_work { |
34 | struct drbd_work w; | 35 | struct drbd_work w; |
@@ -37,6 +38,7 @@ struct after_state_chg_work { | |||
37 | union drbd_state ns; | 38 | union drbd_state ns; |
38 | enum chg_state_flags flags; | 39 | enum chg_state_flags flags; |
39 | struct completion *done; | 40 | struct completion *done; |
41 | struct drbd_state_change *state_change; | ||
40 | }; | 42 | }; |
41 | 43 | ||
42 | enum sanitize_state_warnings { | 44 | enum sanitize_state_warnings { |
@@ -48,9 +50,248 @@ enum sanitize_state_warnings { | |||
48 | IMPLICITLY_UPGRADED_PDSK, | 50 | IMPLICITLY_UPGRADED_PDSK, |
49 | }; | 51 | }; |
50 | 52 | ||
53 | static void count_objects(struct drbd_resource *resource, | ||
54 | unsigned int *n_devices, | ||
55 | unsigned int *n_connections) | ||
56 | { | ||
57 | struct drbd_device *device; | ||
58 | struct drbd_connection *connection; | ||
59 | int vnr; | ||
60 | |||
61 | *n_devices = 0; | ||
62 | *n_connections = 0; | ||
63 | |||
64 | idr_for_each_entry(&resource->devices, device, vnr) | ||
65 | (*n_devices)++; | ||
66 | for_each_connection(connection, resource) | ||
67 | (*n_connections)++; | ||
68 | } | ||
69 | |||
70 | static struct drbd_state_change *alloc_state_change(unsigned int n_devices, unsigned int n_connections, gfp_t gfp) | ||
71 | { | ||
72 | struct drbd_state_change *state_change; | ||
73 | unsigned int size, n; | ||
74 | |||
75 | size = sizeof(struct drbd_state_change) + | ||
76 | n_devices * sizeof(struct drbd_device_state_change) + | ||
77 | n_connections * sizeof(struct drbd_connection_state_change) + | ||
78 | n_devices * n_connections * sizeof(struct drbd_peer_device_state_change); | ||
79 | state_change = kmalloc(size, gfp); | ||
80 | if (!state_change) | ||
81 | return NULL; | ||
82 | state_change->n_devices = n_devices; | ||
83 | state_change->n_connections = n_connections; | ||
84 | state_change->devices = (void *)(state_change + 1); | ||
85 | state_change->connections = (void *)&state_change->devices[n_devices]; | ||
86 | state_change->peer_devices = (void *)&state_change->connections[n_connections]; | ||
87 | state_change->resource->resource = NULL; | ||
88 | for (n = 0; n < n_devices; n++) | ||
89 | state_change->devices[n].device = NULL; | ||
90 | for (n = 0; n < n_connections; n++) | ||
91 | state_change->connections[n].connection = NULL; | ||
92 | return state_change; | ||
93 | } | ||
94 | |||
95 | struct drbd_state_change *remember_old_state(struct drbd_resource *resource, gfp_t gfp) | ||
96 | { | ||
97 | struct drbd_state_change *state_change; | ||
98 | struct drbd_device *device; | ||
99 | unsigned int n_devices; | ||
100 | struct drbd_connection *connection; | ||
101 | unsigned int n_connections; | ||
102 | int vnr; | ||
103 | |||
104 | struct drbd_device_state_change *device_state_change; | ||
105 | struct drbd_peer_device_state_change *peer_device_state_change; | ||
106 | struct drbd_connection_state_change *connection_state_change; | ||
107 | |||
108 | /* Caller holds req_lock spinlock. | ||
109 | * No state, no device IDR, no connections lists can change. */ | ||
110 | count_objects(resource, &n_devices, &n_connections); | ||
111 | state_change = alloc_state_change(n_devices, n_connections, gfp); | ||
112 | if (!state_change) | ||
113 | return NULL; | ||
114 | |||
115 | kref_get(&resource->kref); | ||
116 | state_change->resource->resource = resource; | ||
117 | state_change->resource->role[OLD] = | ||
118 | conn_highest_role(first_connection(resource)); | ||
119 | state_change->resource->susp[OLD] = resource->susp; | ||
120 | state_change->resource->susp_nod[OLD] = resource->susp_nod; | ||
121 | state_change->resource->susp_fen[OLD] = resource->susp_fen; | ||
122 | |||
123 | connection_state_change = state_change->connections; | ||
124 | for_each_connection(connection, resource) { | ||
125 | kref_get(&connection->kref); | ||
126 | connection_state_change->connection = connection; | ||
127 | connection_state_change->cstate[OLD] = | ||
128 | connection->cstate; | ||
129 | connection_state_change->peer_role[OLD] = | ||
130 | conn_highest_peer(connection); | ||
131 | connection_state_change++; | ||
132 | } | ||
133 | |||
134 | device_state_change = state_change->devices; | ||
135 | peer_device_state_change = state_change->peer_devices; | ||
136 | idr_for_each_entry(&resource->devices, device, vnr) { | ||
137 | kref_get(&device->kref); | ||
138 | device_state_change->device = device; | ||
139 | device_state_change->disk_state[OLD] = device->state.disk; | ||
140 | |||
141 | /* The peer_devices for each device have to be enumerated in | ||
142 | the order of the connections. We may not use for_each_peer_device() here. */ | ||
143 | for_each_connection(connection, resource) { | ||
144 | struct drbd_peer_device *peer_device; | ||
145 | |||
146 | peer_device = conn_peer_device(connection, device->vnr); | ||
147 | peer_device_state_change->peer_device = peer_device; | ||
148 | peer_device_state_change->disk_state[OLD] = | ||
149 | device->state.pdsk; | ||
150 | peer_device_state_change->repl_state[OLD] = | ||
151 | max_t(enum drbd_conns, | ||
152 | C_WF_REPORT_PARAMS, device->state.conn); | ||
153 | peer_device_state_change->resync_susp_user[OLD] = | ||
154 | device->state.user_isp; | ||
155 | peer_device_state_change->resync_susp_peer[OLD] = | ||
156 | device->state.peer_isp; | ||
157 | peer_device_state_change->resync_susp_dependency[OLD] = | ||
158 | device->state.aftr_isp; | ||
159 | peer_device_state_change++; | ||
160 | } | ||
161 | device_state_change++; | ||
162 | } | ||
163 | |||
164 | return state_change; | ||
165 | } | ||
166 | |||
167 | static void remember_new_state(struct drbd_state_change *state_change) | ||
168 | { | ||
169 | struct drbd_resource_state_change *resource_state_change; | ||
170 | struct drbd_resource *resource; | ||
171 | unsigned int n; | ||
172 | |||
173 | if (!state_change) | ||
174 | return; | ||
175 | |||
176 | resource_state_change = &state_change->resource[0]; | ||
177 | resource = resource_state_change->resource; | ||
178 | |||
179 | resource_state_change->role[NEW] = | ||
180 | conn_highest_role(first_connection(resource)); | ||
181 | resource_state_change->susp[NEW] = resource->susp; | ||
182 | resource_state_change->susp_nod[NEW] = resource->susp_nod; | ||
183 | resource_state_change->susp_fen[NEW] = resource->susp_fen; | ||
184 | |||
185 | for (n = 0; n < state_change->n_devices; n++) { | ||
186 | struct drbd_device_state_change *device_state_change = | ||
187 | &state_change->devices[n]; | ||
188 | struct drbd_device *device = device_state_change->device; | ||
189 | |||
190 | device_state_change->disk_state[NEW] = device->state.disk; | ||
191 | } | ||
192 | |||
193 | for (n = 0; n < state_change->n_connections; n++) { | ||
194 | struct drbd_connection_state_change *connection_state_change = | ||
195 | &state_change->connections[n]; | ||
196 | struct drbd_connection *connection = | ||
197 | connection_state_change->connection; | ||
198 | |||
199 | connection_state_change->cstate[NEW] = connection->cstate; | ||
200 | connection_state_change->peer_role[NEW] = | ||
201 | conn_highest_peer(connection); | ||
202 | } | ||
203 | |||
204 | for (n = 0; n < state_change->n_devices * state_change->n_connections; n++) { | ||
205 | struct drbd_peer_device_state_change *peer_device_state_change = | ||
206 | &state_change->peer_devices[n]; | ||
207 | struct drbd_device *device = | ||
208 | peer_device_state_change->peer_device->device; | ||
209 | union drbd_dev_state state = device->state; | ||
210 | |||
211 | peer_device_state_change->disk_state[NEW] = state.pdsk; | ||
212 | peer_device_state_change->repl_state[NEW] = | ||
213 | max_t(enum drbd_conns, C_WF_REPORT_PARAMS, state.conn); | ||
214 | peer_device_state_change->resync_susp_user[NEW] = | ||
215 | state.user_isp; | ||
216 | peer_device_state_change->resync_susp_peer[NEW] = | ||
217 | state.peer_isp; | ||
218 | peer_device_state_change->resync_susp_dependency[NEW] = | ||
219 | state.aftr_isp; | ||
220 | } | ||
221 | } | ||
222 | |||
223 | void copy_old_to_new_state_change(struct drbd_state_change *state_change) | ||
224 | { | ||
225 | struct drbd_resource_state_change *resource_state_change = &state_change->resource[0]; | ||
226 | unsigned int n_device, n_connection, n_peer_device, n_peer_devices; | ||
227 | |||
228 | #define OLD_TO_NEW(x) \ | ||
229 | (x[NEW] = x[OLD]) | ||
230 | |||
231 | OLD_TO_NEW(resource_state_change->role); | ||
232 | OLD_TO_NEW(resource_state_change->susp); | ||
233 | OLD_TO_NEW(resource_state_change->susp_nod); | ||
234 | OLD_TO_NEW(resource_state_change->susp_fen); | ||
235 | |||
236 | for (n_connection = 0; n_connection < state_change->n_connections; n_connection++) { | ||
237 | struct drbd_connection_state_change *connection_state_change = | ||
238 | &state_change->connections[n_connection]; | ||
239 | |||
240 | OLD_TO_NEW(connection_state_change->peer_role); | ||
241 | OLD_TO_NEW(connection_state_change->cstate); | ||
242 | } | ||
243 | |||
244 | for (n_device = 0; n_device < state_change->n_devices; n_device++) { | ||
245 | struct drbd_device_state_change *device_state_change = | ||
246 | &state_change->devices[n_device]; | ||
247 | |||
248 | OLD_TO_NEW(device_state_change->disk_state); | ||
249 | } | ||
250 | |||
251 | n_peer_devices = state_change->n_devices * state_change->n_connections; | ||
252 | for (n_peer_device = 0; n_peer_device < n_peer_devices; n_peer_device++) { | ||
253 | struct drbd_peer_device_state_change *p = | ||
254 | &state_change->peer_devices[n_peer_device]; | ||
255 | |||
256 | OLD_TO_NEW(p->disk_state); | ||
257 | OLD_TO_NEW(p->repl_state); | ||
258 | OLD_TO_NEW(p->resync_susp_user); | ||
259 | OLD_TO_NEW(p->resync_susp_peer); | ||
260 | OLD_TO_NEW(p->resync_susp_dependency); | ||
261 | } | ||
262 | |||
263 | #undef OLD_TO_NEW | ||
264 | } | ||
265 | |||
266 | void forget_state_change(struct drbd_state_change *state_change) | ||
267 | { | ||
268 | unsigned int n; | ||
269 | |||
270 | if (!state_change) | ||
271 | return; | ||
272 | |||
273 | if (state_change->resource->resource) | ||
274 | kref_put(&state_change->resource->resource->kref, drbd_destroy_resource); | ||
275 | for (n = 0; n < state_change->n_devices; n++) { | ||
276 | struct drbd_device *device = state_change->devices[n].device; | ||
277 | |||
278 | if (device) | ||
279 | kref_put(&device->kref, drbd_destroy_device); | ||
280 | } | ||
281 | for (n = 0; n < state_change->n_connections; n++) { | ||
282 | struct drbd_connection *connection = | ||
283 | state_change->connections[n].connection; | ||
284 | |||
285 | if (connection) | ||
286 | kref_put(&connection->kref, drbd_destroy_connection); | ||
287 | } | ||
288 | kfree(state_change); | ||
289 | } | ||
290 | |||
51 | static int w_after_state_ch(struct drbd_work *w, int unused); | 291 | static int w_after_state_ch(struct drbd_work *w, int unused); |
52 | static void after_state_ch(struct drbd_device *device, union drbd_state os, | 292 | static void after_state_ch(struct drbd_device *device, union drbd_state os, |
53 | union drbd_state ns, enum chg_state_flags flags); | 293 | union drbd_state ns, enum chg_state_flags flags, |
294 | struct drbd_state_change *); | ||
54 | static enum drbd_state_rv is_valid_state(struct drbd_device *, union drbd_state); | 295 | static enum drbd_state_rv is_valid_state(struct drbd_device *, union drbd_state); |
55 | static enum drbd_state_rv is_valid_soft_transition(union drbd_state, union drbd_state, struct drbd_connection *); | 296 | static enum drbd_state_rv is_valid_soft_transition(union drbd_state, union drbd_state, struct drbd_connection *); |
56 | static enum drbd_state_rv is_valid_transition(union drbd_state os, union drbd_state ns); | 297 | static enum drbd_state_rv is_valid_transition(union drbd_state os, union drbd_state ns); |
@@ -93,6 +334,7 @@ static enum drbd_role max_role(enum drbd_role role1, enum drbd_role role2) | |||
93 | return R_SECONDARY; | 334 | return R_SECONDARY; |
94 | return R_UNKNOWN; | 335 | return R_UNKNOWN; |
95 | } | 336 | } |
337 | |||
96 | static enum drbd_role min_role(enum drbd_role role1, enum drbd_role role2) | 338 | static enum drbd_role min_role(enum drbd_role role1, enum drbd_role role2) |
97 | { | 339 | { |
98 | if (role1 == R_UNKNOWN || role2 == R_UNKNOWN) | 340 | if (role1 == R_UNKNOWN || role2 == R_UNKNOWN) |
@@ -937,7 +1179,7 @@ void drbd_resume_al(struct drbd_device *device) | |||
937 | drbd_info(device, "Resumed AL updates\n"); | 1179 | drbd_info(device, "Resumed AL updates\n"); |
938 | } | 1180 | } |
939 | 1181 | ||
940 | /* helper for __drbd_set_state */ | 1182 | /* helper for _drbd_set_state */ |
941 | static void set_ov_position(struct drbd_device *device, enum drbd_conns cs) | 1183 | static void set_ov_position(struct drbd_device *device, enum drbd_conns cs) |
942 | { | 1184 | { |
943 | if (first_peer_device(device)->connection->agreed_pro_version < 90) | 1185 | if (first_peer_device(device)->connection->agreed_pro_version < 90) |
@@ -965,17 +1207,17 @@ static void set_ov_position(struct drbd_device *device, enum drbd_conns cs) | |||
965 | } | 1207 | } |
966 | 1208 | ||
967 | /** | 1209 | /** |
968 | * __drbd_set_state() - Set a new DRBD state | 1210 | * _drbd_set_state() - Set a new DRBD state |
969 | * @device: DRBD device. | 1211 | * @device: DRBD device. |
970 | * @ns: new state. | 1212 | * @ns: new state. |
971 | * @flags: Flags | 1213 | * @flags: Flags |
972 | * @done: Optional completion, that will get completed after the after_state_ch() finished | 1214 | * @done: Optional completion, that will get completed after the after_state_ch() finished |
973 | * | 1215 | * |
974 | * Caller needs to hold req_lock, and global_state_lock. Do not call directly. | 1216 | * Caller needs to hold req_lock. Do not call directly. |
975 | */ | 1217 | */ |
976 | enum drbd_state_rv | 1218 | enum drbd_state_rv |
977 | __drbd_set_state(struct drbd_device *device, union drbd_state ns, | 1219 | _drbd_set_state(struct drbd_device *device, union drbd_state ns, |
978 | enum chg_state_flags flags, struct completion *done) | 1220 | enum chg_state_flags flags, struct completion *done) |
979 | { | 1221 | { |
980 | struct drbd_peer_device *peer_device = first_peer_device(device); | 1222 | struct drbd_peer_device *peer_device = first_peer_device(device); |
981 | struct drbd_connection *connection = peer_device ? peer_device->connection : NULL; | 1223 | struct drbd_connection *connection = peer_device ? peer_device->connection : NULL; |
@@ -983,6 +1225,7 @@ __drbd_set_state(struct drbd_device *device, union drbd_state ns, | |||
983 | enum drbd_state_rv rv = SS_SUCCESS; | 1225 | enum drbd_state_rv rv = SS_SUCCESS; |
984 | enum sanitize_state_warnings ssw; | 1226 | enum sanitize_state_warnings ssw; |
985 | struct after_state_chg_work *ascw; | 1227 | struct after_state_chg_work *ascw; |
1228 | struct drbd_state_change *state_change; | ||
986 | 1229 | ||
987 | os = drbd_read_state(device); | 1230 | os = drbd_read_state(device); |
988 | 1231 | ||
@@ -1037,6 +1280,9 @@ __drbd_set_state(struct drbd_device *device, union drbd_state ns, | |||
1037 | if (!is_sync_state(os.conn) && is_sync_state(ns.conn)) | 1280 | if (!is_sync_state(os.conn) && is_sync_state(ns.conn)) |
1038 | clear_bit(RS_DONE, &device->flags); | 1281 | clear_bit(RS_DONE, &device->flags); |
1039 | 1282 | ||
1283 | /* FIXME: Have any flags been set earlier in this function already? */ | ||
1284 | state_change = remember_old_state(device->resource, GFP_ATOMIC); | ||
1285 | |||
1040 | /* changes to local_cnt and device flags should be visible before | 1286 | /* changes to local_cnt and device flags should be visible before |
1041 | * changes to state, which again should be visible before anything else | 1287 | * changes to state, which again should be visible before anything else |
1042 | * depending on that change happens. */ | 1288 | * depending on that change happens. */ |
@@ -1047,6 +1293,8 @@ __drbd_set_state(struct drbd_device *device, union drbd_state ns, | |||
1047 | device->resource->susp_fen = ns.susp_fen; | 1293 | device->resource->susp_fen = ns.susp_fen; |
1048 | smp_wmb(); | 1294 | smp_wmb(); |
1049 | 1295 | ||
1296 | remember_new_state(state_change); | ||
1297 | |||
1050 | /* put replicated vs not-replicated requests in seperate epochs */ | 1298 | /* put replicated vs not-replicated requests in seperate epochs */ |
1051 | if (drbd_should_do_remote((union drbd_dev_state)os.i) != | 1299 | if (drbd_should_do_remote((union drbd_dev_state)os.i) != |
1052 | drbd_should_do_remote((union drbd_dev_state)ns.i)) | 1300 | drbd_should_do_remote((union drbd_dev_state)ns.i)) |
@@ -1184,6 +1432,7 @@ __drbd_set_state(struct drbd_device *device, union drbd_state ns, | |||
1184 | ascw->w.cb = w_after_state_ch; | 1432 | ascw->w.cb = w_after_state_ch; |
1185 | ascw->device = device; | 1433 | ascw->device = device; |
1186 | ascw->done = done; | 1434 | ascw->done = done; |
1435 | ascw->state_change = state_change; | ||
1187 | drbd_queue_work(&connection->sender_work, | 1436 | drbd_queue_work(&connection->sender_work, |
1188 | &ascw->w); | 1437 | &ascw->w); |
1189 | } else { | 1438 | } else { |
@@ -1199,7 +1448,8 @@ static int w_after_state_ch(struct drbd_work *w, int unused) | |||
1199 | container_of(w, struct after_state_chg_work, w); | 1448 | container_of(w, struct after_state_chg_work, w); |
1200 | struct drbd_device *device = ascw->device; | 1449 | struct drbd_device *device = ascw->device; |
1201 | 1450 | ||
1202 | after_state_ch(device, ascw->os, ascw->ns, ascw->flags); | 1451 | after_state_ch(device, ascw->os, ascw->ns, ascw->flags, ascw->state_change); |
1452 | forget_state_change(ascw->state_change); | ||
1203 | if (ascw->flags & CS_WAIT_COMPLETE) | 1453 | if (ascw->flags & CS_WAIT_COMPLETE) |
1204 | complete(ascw->done); | 1454 | complete(ascw->done); |
1205 | kfree(ascw); | 1455 | kfree(ascw); |
@@ -1234,7 +1484,7 @@ int drbd_bitmap_io_from_worker(struct drbd_device *device, | |||
1234 | D_ASSERT(device, current == first_peer_device(device)->connection->worker.task); | 1484 | D_ASSERT(device, current == first_peer_device(device)->connection->worker.task); |
1235 | 1485 | ||
1236 | /* open coded non-blocking drbd_suspend_io(device); */ | 1486 | /* open coded non-blocking drbd_suspend_io(device); */ |
1237 | set_bit(SUSPEND_IO, &device->flags); | 1487 | atomic_inc(&device->suspend_cnt); |
1238 | 1488 | ||
1239 | drbd_bm_lock(device, why, flags); | 1489 | drbd_bm_lock(device, why, flags); |
1240 | rv = io_fn(device); | 1490 | rv = io_fn(device); |
@@ -1245,6 +1495,139 @@ int drbd_bitmap_io_from_worker(struct drbd_device *device, | |||
1245 | return rv; | 1495 | return rv; |
1246 | } | 1496 | } |
1247 | 1497 | ||
1498 | void notify_resource_state_change(struct sk_buff *skb, | ||
1499 | unsigned int seq, | ||
1500 | struct drbd_resource_state_change *resource_state_change, | ||
1501 | enum drbd_notification_type type) | ||
1502 | { | ||
1503 | struct drbd_resource *resource = resource_state_change->resource; | ||
1504 | struct resource_info resource_info = { | ||
1505 | .res_role = resource_state_change->role[NEW], | ||
1506 | .res_susp = resource_state_change->susp[NEW], | ||
1507 | .res_susp_nod = resource_state_change->susp_nod[NEW], | ||
1508 | .res_susp_fen = resource_state_change->susp_fen[NEW], | ||
1509 | }; | ||
1510 | |||
1511 | notify_resource_state(skb, seq, resource, &resource_info, type); | ||
1512 | } | ||
1513 | |||
1514 | void notify_connection_state_change(struct sk_buff *skb, | ||
1515 | unsigned int seq, | ||
1516 | struct drbd_connection_state_change *connection_state_change, | ||
1517 | enum drbd_notification_type type) | ||
1518 | { | ||
1519 | struct drbd_connection *connection = connection_state_change->connection; | ||
1520 | struct connection_info connection_info = { | ||
1521 | .conn_connection_state = connection_state_change->cstate[NEW], | ||
1522 | .conn_role = connection_state_change->peer_role[NEW], | ||
1523 | }; | ||
1524 | |||
1525 | notify_connection_state(skb, seq, connection, &connection_info, type); | ||
1526 | } | ||
1527 | |||
1528 | void notify_device_state_change(struct sk_buff *skb, | ||
1529 | unsigned int seq, | ||
1530 | struct drbd_device_state_change *device_state_change, | ||
1531 | enum drbd_notification_type type) | ||
1532 | { | ||
1533 | struct drbd_device *device = device_state_change->device; | ||
1534 | struct device_info device_info = { | ||
1535 | .dev_disk_state = device_state_change->disk_state[NEW], | ||
1536 | }; | ||
1537 | |||
1538 | notify_device_state(skb, seq, device, &device_info, type); | ||
1539 | } | ||
1540 | |||
1541 | void notify_peer_device_state_change(struct sk_buff *skb, | ||
1542 | unsigned int seq, | ||
1543 | struct drbd_peer_device_state_change *p, | ||
1544 | enum drbd_notification_type type) | ||
1545 | { | ||
1546 | struct drbd_peer_device *peer_device = p->peer_device; | ||
1547 | struct peer_device_info peer_device_info = { | ||
1548 | .peer_repl_state = p->repl_state[NEW], | ||
1549 | .peer_disk_state = p->disk_state[NEW], | ||
1550 | .peer_resync_susp_user = p->resync_susp_user[NEW], | ||
1551 | .peer_resync_susp_peer = p->resync_susp_peer[NEW], | ||
1552 | .peer_resync_susp_dependency = p->resync_susp_dependency[NEW], | ||
1553 | }; | ||
1554 | |||
1555 | notify_peer_device_state(skb, seq, peer_device, &peer_device_info, type); | ||
1556 | } | ||
1557 | |||
1558 | static void broadcast_state_change(struct drbd_state_change *state_change) | ||
1559 | { | ||
1560 | struct drbd_resource_state_change *resource_state_change = &state_change->resource[0]; | ||
1561 | bool resource_state_has_changed; | ||
1562 | unsigned int n_device, n_connection, n_peer_device, n_peer_devices; | ||
1563 | void (*last_func)(struct sk_buff *, unsigned int, void *, | ||
1564 | enum drbd_notification_type) = NULL; | ||
1565 | void *uninitialized_var(last_arg); | ||
1566 | |||
1567 | #define HAS_CHANGED(state) ((state)[OLD] != (state)[NEW]) | ||
1568 | #define FINAL_STATE_CHANGE(type) \ | ||
1569 | ({ if (last_func) \ | ||
1570 | last_func(NULL, 0, last_arg, type); \ | ||
1571 | }) | ||
1572 | #define REMEMBER_STATE_CHANGE(func, arg, type) \ | ||
1573 | ({ FINAL_STATE_CHANGE(type | NOTIFY_CONTINUES); \ | ||
1574 | last_func = (typeof(last_func))func; \ | ||
1575 | last_arg = arg; \ | ||
1576 | }) | ||
1577 | |||
1578 | mutex_lock(¬ification_mutex); | ||
1579 | |||
1580 | resource_state_has_changed = | ||
1581 | HAS_CHANGED(resource_state_change->role) || | ||
1582 | HAS_CHANGED(resource_state_change->susp) || | ||
1583 | HAS_CHANGED(resource_state_change->susp_nod) || | ||
1584 | HAS_CHANGED(resource_state_change->susp_fen); | ||
1585 | |||
1586 | if (resource_state_has_changed) | ||
1587 | REMEMBER_STATE_CHANGE(notify_resource_state_change, | ||
1588 | resource_state_change, NOTIFY_CHANGE); | ||
1589 | |||
1590 | for (n_connection = 0; n_connection < state_change->n_connections; n_connection++) { | ||
1591 | struct drbd_connection_state_change *connection_state_change = | ||
1592 | &state_change->connections[n_connection]; | ||
1593 | |||
1594 | if (HAS_CHANGED(connection_state_change->peer_role) || | ||
1595 | HAS_CHANGED(connection_state_change->cstate)) | ||
1596 | REMEMBER_STATE_CHANGE(notify_connection_state_change, | ||
1597 | connection_state_change, NOTIFY_CHANGE); | ||
1598 | } | ||
1599 | |||
1600 | for (n_device = 0; n_device < state_change->n_devices; n_device++) { | ||
1601 | struct drbd_device_state_change *device_state_change = | ||
1602 | &state_change->devices[n_device]; | ||
1603 | |||
1604 | if (HAS_CHANGED(device_state_change->disk_state)) | ||
1605 | REMEMBER_STATE_CHANGE(notify_device_state_change, | ||
1606 | device_state_change, NOTIFY_CHANGE); | ||
1607 | } | ||
1608 | |||
1609 | n_peer_devices = state_change->n_devices * state_change->n_connections; | ||
1610 | for (n_peer_device = 0; n_peer_device < n_peer_devices; n_peer_device++) { | ||
1611 | struct drbd_peer_device_state_change *p = | ||
1612 | &state_change->peer_devices[n_peer_device]; | ||
1613 | |||
1614 | if (HAS_CHANGED(p->disk_state) || | ||
1615 | HAS_CHANGED(p->repl_state) || | ||
1616 | HAS_CHANGED(p->resync_susp_user) || | ||
1617 | HAS_CHANGED(p->resync_susp_peer) || | ||
1618 | HAS_CHANGED(p->resync_susp_dependency)) | ||
1619 | REMEMBER_STATE_CHANGE(notify_peer_device_state_change, | ||
1620 | p, NOTIFY_CHANGE); | ||
1621 | } | ||
1622 | |||
1623 | FINAL_STATE_CHANGE(NOTIFY_CHANGE); | ||
1624 | mutex_unlock(¬ification_mutex); | ||
1625 | |||
1626 | #undef HAS_CHANGED | ||
1627 | #undef FINAL_STATE_CHANGE | ||
1628 | #undef REMEMBER_STATE_CHANGE | ||
1629 | } | ||
1630 | |||
1248 | /** | 1631 | /** |
1249 | * after_state_ch() - Perform after state change actions that may sleep | 1632 | * after_state_ch() - Perform after state change actions that may sleep |
1250 | * @device: DRBD device. | 1633 | * @device: DRBD device. |
@@ -1253,13 +1636,16 @@ int drbd_bitmap_io_from_worker(struct drbd_device *device, | |||
1253 | * @flags: Flags | 1636 | * @flags: Flags |
1254 | */ | 1637 | */ |
1255 | static void after_state_ch(struct drbd_device *device, union drbd_state os, | 1638 | static void after_state_ch(struct drbd_device *device, union drbd_state os, |
1256 | union drbd_state ns, enum chg_state_flags flags) | 1639 | union drbd_state ns, enum chg_state_flags flags, |
1640 | struct drbd_state_change *state_change) | ||
1257 | { | 1641 | { |
1258 | struct drbd_resource *resource = device->resource; | 1642 | struct drbd_resource *resource = device->resource; |
1259 | struct drbd_peer_device *peer_device = first_peer_device(device); | 1643 | struct drbd_peer_device *peer_device = first_peer_device(device); |
1260 | struct drbd_connection *connection = peer_device ? peer_device->connection : NULL; | 1644 | struct drbd_connection *connection = peer_device ? peer_device->connection : NULL; |
1261 | struct sib_info sib; | 1645 | struct sib_info sib; |
1262 | 1646 | ||
1647 | broadcast_state_change(state_change); | ||
1648 | |||
1263 | sib.sib_reason = SIB_STATE_CHANGE; | 1649 | sib.sib_reason = SIB_STATE_CHANGE; |
1264 | sib.os = os; | 1650 | sib.os = os; |
1265 | sib.ns = ns; | 1651 | sib.ns = ns; |
@@ -1377,7 +1763,7 @@ static void after_state_ch(struct drbd_device *device, union drbd_state os, | |||
1377 | } | 1763 | } |
1378 | 1764 | ||
1379 | if (ns.pdsk < D_INCONSISTENT && get_ldev(device)) { | 1765 | if (ns.pdsk < D_INCONSISTENT && get_ldev(device)) { |
1380 | if (os.peer == R_SECONDARY && ns.peer == R_PRIMARY && | 1766 | if (os.peer != R_PRIMARY && ns.peer == R_PRIMARY && |
1381 | device->ldev->md.uuid[UI_BITMAP] == 0 && ns.disk >= D_UP_TO_DATE) { | 1767 | device->ldev->md.uuid[UI_BITMAP] == 0 && ns.disk >= D_UP_TO_DATE) { |
1382 | drbd_uuid_new_current(device); | 1768 | drbd_uuid_new_current(device); |
1383 | drbd_send_uuids(peer_device); | 1769 | drbd_send_uuids(peer_device); |
@@ -1444,7 +1830,7 @@ static void after_state_ch(struct drbd_device *device, union drbd_state os, | |||
1444 | if (os.disk != D_FAILED && ns.disk == D_FAILED) { | 1830 | if (os.disk != D_FAILED && ns.disk == D_FAILED) { |
1445 | enum drbd_io_error_p eh = EP_PASS_ON; | 1831 | enum drbd_io_error_p eh = EP_PASS_ON; |
1446 | int was_io_error = 0; | 1832 | int was_io_error = 0; |
1447 | /* corresponding get_ldev was in __drbd_set_state, to serialize | 1833 | /* corresponding get_ldev was in _drbd_set_state, to serialize |
1448 | * our cleanup here with the transition to D_DISKLESS. | 1834 | * our cleanup here with the transition to D_DISKLESS. |
1449 | * But is is still not save to dreference ldev here, since | 1835 | * But is is still not save to dreference ldev here, since |
1450 | * we might come from an failed Attach before ldev was set. */ | 1836 | * we might come from an failed Attach before ldev was set. */ |
@@ -1455,6 +1841,10 @@ static void after_state_ch(struct drbd_device *device, union drbd_state os, | |||
1455 | 1841 | ||
1456 | was_io_error = test_and_clear_bit(WAS_IO_ERROR, &device->flags); | 1842 | was_io_error = test_and_clear_bit(WAS_IO_ERROR, &device->flags); |
1457 | 1843 | ||
1844 | /* Intentionally call this handler first, before drbd_send_state(). | ||
1845 | * See: 2932204 drbd: call local-io-error handler early | ||
1846 | * People may chose to hard-reset the box from this handler. | ||
1847 | * It is useful if this looks like a "regular node crash". */ | ||
1458 | if (was_io_error && eh == EP_CALL_HELPER) | 1848 | if (was_io_error && eh == EP_CALL_HELPER) |
1459 | drbd_khelper(device, "local-io-error"); | 1849 | drbd_khelper(device, "local-io-error"); |
1460 | 1850 | ||
@@ -1572,6 +1962,7 @@ struct after_conn_state_chg_work { | |||
1572 | union drbd_state ns_max; /* new, max state, over all devices */ | 1962 | union drbd_state ns_max; /* new, max state, over all devices */ |
1573 | enum chg_state_flags flags; | 1963 | enum chg_state_flags flags; |
1574 | struct drbd_connection *connection; | 1964 | struct drbd_connection *connection; |
1965 | struct drbd_state_change *state_change; | ||
1575 | }; | 1966 | }; |
1576 | 1967 | ||
1577 | static int w_after_conn_state_ch(struct drbd_work *w, int unused) | 1968 | static int w_after_conn_state_ch(struct drbd_work *w, int unused) |
@@ -1584,6 +1975,8 @@ static int w_after_conn_state_ch(struct drbd_work *w, int unused) | |||
1584 | struct drbd_peer_device *peer_device; | 1975 | struct drbd_peer_device *peer_device; |
1585 | int vnr; | 1976 | int vnr; |
1586 | 1977 | ||
1978 | broadcast_state_change(acscw->state_change); | ||
1979 | forget_state_change(acscw->state_change); | ||
1587 | kfree(acscw); | 1980 | kfree(acscw); |
1588 | 1981 | ||
1589 | /* Upon network configuration, we need to start the receiver */ | 1982 | /* Upon network configuration, we need to start the receiver */ |
@@ -1593,6 +1986,13 @@ static int w_after_conn_state_ch(struct drbd_work *w, int unused) | |||
1593 | if (oc == C_DISCONNECTING && ns_max.conn == C_STANDALONE) { | 1986 | if (oc == C_DISCONNECTING && ns_max.conn == C_STANDALONE) { |
1594 | struct net_conf *old_conf; | 1987 | struct net_conf *old_conf; |
1595 | 1988 | ||
1989 | mutex_lock(¬ification_mutex); | ||
1990 | idr_for_each_entry(&connection->peer_devices, peer_device, vnr) | ||
1991 | notify_peer_device_state(NULL, 0, peer_device, NULL, | ||
1992 | NOTIFY_DESTROY | NOTIFY_CONTINUES); | ||
1993 | notify_connection_state(NULL, 0, connection, NULL, NOTIFY_DESTROY); | ||
1994 | mutex_unlock(¬ification_mutex); | ||
1995 | |||
1596 | mutex_lock(&connection->resource->conf_update); | 1996 | mutex_lock(&connection->resource->conf_update); |
1597 | old_conf = connection->net_conf; | 1997 | old_conf = connection->net_conf; |
1598 | connection->my_addr_len = 0; | 1998 | connection->my_addr_len = 0; |
@@ -1759,7 +2159,7 @@ conn_set_state(struct drbd_connection *connection, union drbd_state mask, union | |||
1759 | if (flags & CS_IGN_OUTD_FAIL && ns.disk == D_OUTDATED && os.disk < D_OUTDATED) | 2159 | if (flags & CS_IGN_OUTD_FAIL && ns.disk == D_OUTDATED && os.disk < D_OUTDATED) |
1760 | ns.disk = os.disk; | 2160 | ns.disk = os.disk; |
1761 | 2161 | ||
1762 | rv = __drbd_set_state(device, ns, flags, NULL); | 2162 | rv = _drbd_set_state(device, ns, flags, NULL); |
1763 | if (rv < SS_SUCCESS) | 2163 | if (rv < SS_SUCCESS) |
1764 | BUG(); | 2164 | BUG(); |
1765 | 2165 | ||
@@ -1823,6 +2223,7 @@ _conn_request_state(struct drbd_connection *connection, union drbd_state mask, u | |||
1823 | enum drbd_conns oc = connection->cstate; | 2223 | enum drbd_conns oc = connection->cstate; |
1824 | union drbd_state ns_max, ns_min, os; | 2224 | union drbd_state ns_max, ns_min, os; |
1825 | bool have_mutex = false; | 2225 | bool have_mutex = false; |
2226 | struct drbd_state_change *state_change; | ||
1826 | 2227 | ||
1827 | if (mask.conn) { | 2228 | if (mask.conn) { |
1828 | rv = is_valid_conn_transition(oc, val.conn); | 2229 | rv = is_valid_conn_transition(oc, val.conn); |
@@ -1868,10 +2269,12 @@ _conn_request_state(struct drbd_connection *connection, union drbd_state mask, u | |||
1868 | goto abort; | 2269 | goto abort; |
1869 | } | 2270 | } |
1870 | 2271 | ||
2272 | state_change = remember_old_state(connection->resource, GFP_ATOMIC); | ||
1871 | conn_old_common_state(connection, &os, &flags); | 2273 | conn_old_common_state(connection, &os, &flags); |
1872 | flags |= CS_DC_SUSP; | 2274 | flags |= CS_DC_SUSP; |
1873 | conn_set_state(connection, mask, val, &ns_min, &ns_max, flags); | 2275 | conn_set_state(connection, mask, val, &ns_min, &ns_max, flags); |
1874 | conn_pr_state_change(connection, os, ns_max, flags); | 2276 | conn_pr_state_change(connection, os, ns_max, flags); |
2277 | remember_new_state(state_change); | ||
1875 | 2278 | ||
1876 | acscw = kmalloc(sizeof(*acscw), GFP_ATOMIC); | 2279 | acscw = kmalloc(sizeof(*acscw), GFP_ATOMIC); |
1877 | if (acscw) { | 2280 | if (acscw) { |
@@ -1882,6 +2285,7 @@ _conn_request_state(struct drbd_connection *connection, union drbd_state mask, u | |||
1882 | acscw->w.cb = w_after_conn_state_ch; | 2285 | acscw->w.cb = w_after_conn_state_ch; |
1883 | kref_get(&connection->kref); | 2286 | kref_get(&connection->kref); |
1884 | acscw->connection = connection; | 2287 | acscw->connection = connection; |
2288 | acscw->state_change = state_change; | ||
1885 | drbd_queue_work(&connection->sender_work, &acscw->w); | 2289 | drbd_queue_work(&connection->sender_work, &acscw->w); |
1886 | } else { | 2290 | } else { |
1887 | drbd_err(connection, "Could not kmalloc an acscw\n"); | 2291 | drbd_err(connection, "Could not kmalloc an acscw\n"); |
diff --git a/drivers/block/drbd/drbd_state.h b/drivers/block/drbd/drbd_state.h index 7f53c40823cd..bd989536f888 100644 --- a/drivers/block/drbd/drbd_state.h +++ b/drivers/block/drbd/drbd_state.h | |||
@@ -122,9 +122,9 @@ extern enum drbd_state_rv | |||
122 | _drbd_request_state_holding_state_mutex(struct drbd_device *, union drbd_state, | 122 | _drbd_request_state_holding_state_mutex(struct drbd_device *, union drbd_state, |
123 | union drbd_state, enum chg_state_flags); | 123 | union drbd_state, enum chg_state_flags); |
124 | 124 | ||
125 | extern enum drbd_state_rv __drbd_set_state(struct drbd_device *, union drbd_state, | 125 | extern enum drbd_state_rv _drbd_set_state(struct drbd_device *, union drbd_state, |
126 | enum chg_state_flags, | 126 | enum chg_state_flags, |
127 | struct completion *done); | 127 | struct completion *done); |
128 | extern void print_st_err(struct drbd_device *, union drbd_state, | 128 | extern void print_st_err(struct drbd_device *, union drbd_state, |
129 | union drbd_state, int); | 129 | union drbd_state, int); |
130 | 130 | ||
diff --git a/drivers/block/drbd/drbd_state_change.h b/drivers/block/drbd/drbd_state_change.h new file mode 100644 index 000000000000..9e503a1a0bfb --- /dev/null +++ b/drivers/block/drbd/drbd_state_change.h | |||
@@ -0,0 +1,63 @@ | |||
1 | #ifndef DRBD_STATE_CHANGE_H | ||
2 | #define DRBD_STATE_CHANGE_H | ||
3 | |||
4 | struct drbd_resource_state_change { | ||
5 | struct drbd_resource *resource; | ||
6 | enum drbd_role role[2]; | ||
7 | bool susp[2]; | ||
8 | bool susp_nod[2]; | ||
9 | bool susp_fen[2]; | ||
10 | }; | ||
11 | |||
12 | struct drbd_device_state_change { | ||
13 | struct drbd_device *device; | ||
14 | enum drbd_disk_state disk_state[2]; | ||
15 | }; | ||
16 | |||
17 | struct drbd_connection_state_change { | ||
18 | struct drbd_connection *connection; | ||
19 | enum drbd_conns cstate[2]; /* drbd9: enum drbd_conn_state */ | ||
20 | enum drbd_role peer_role[2]; | ||
21 | }; | ||
22 | |||
23 | struct drbd_peer_device_state_change { | ||
24 | struct drbd_peer_device *peer_device; | ||
25 | enum drbd_disk_state disk_state[2]; | ||
26 | enum drbd_conns repl_state[2]; /* drbd9: enum drbd_repl_state */ | ||
27 | bool resync_susp_user[2]; | ||
28 | bool resync_susp_peer[2]; | ||
29 | bool resync_susp_dependency[2]; | ||
30 | }; | ||
31 | |||
32 | struct drbd_state_change { | ||
33 | struct list_head list; | ||
34 | unsigned int n_devices; | ||
35 | unsigned int n_connections; | ||
36 | struct drbd_resource_state_change resource[1]; | ||
37 | struct drbd_device_state_change *devices; | ||
38 | struct drbd_connection_state_change *connections; | ||
39 | struct drbd_peer_device_state_change *peer_devices; | ||
40 | }; | ||
41 | |||
42 | extern struct drbd_state_change *remember_old_state(struct drbd_resource *, gfp_t); | ||
43 | extern void copy_old_to_new_state_change(struct drbd_state_change *); | ||
44 | extern void forget_state_change(struct drbd_state_change *); | ||
45 | |||
46 | extern void notify_resource_state_change(struct sk_buff *, | ||
47 | unsigned int, | ||
48 | struct drbd_resource_state_change *, | ||
49 | enum drbd_notification_type type); | ||
50 | extern void notify_connection_state_change(struct sk_buff *, | ||
51 | unsigned int, | ||
52 | struct drbd_connection_state_change *, | ||
53 | enum drbd_notification_type type); | ||
54 | extern void notify_device_state_change(struct sk_buff *, | ||
55 | unsigned int, | ||
56 | struct drbd_device_state_change *, | ||
57 | enum drbd_notification_type type); | ||
58 | extern void notify_peer_device_state_change(struct sk_buff *, | ||
59 | unsigned int, | ||
60 | struct drbd_peer_device_state_change *, | ||
61 | enum drbd_notification_type type); | ||
62 | |||
63 | #endif /* DRBD_STATE_CHANGE_H */ | ||
diff --git a/drivers/block/drbd/drbd_worker.c b/drivers/block/drbd/drbd_worker.c index 5578c1477ba6..eff716c27b1f 100644 --- a/drivers/block/drbd/drbd_worker.c +++ b/drivers/block/drbd/drbd_worker.c | |||
@@ -55,13 +55,6 @@ static int make_resync_request(struct drbd_device *, int); | |||
55 | * | 55 | * |
56 | */ | 56 | */ |
57 | 57 | ||
58 | |||
59 | /* About the global_state_lock | ||
60 | Each state transition on an device holds a read lock. In case we have | ||
61 | to evaluate the resync after dependencies, we grab a write lock, because | ||
62 | we need stable states on all devices for that. */ | ||
63 | rwlock_t global_state_lock; | ||
64 | |||
65 | /* used for synchronous meta data and bitmap IO | 58 | /* used for synchronous meta data and bitmap IO |
66 | * submitted by drbd_md_sync_page_io() | 59 | * submitted by drbd_md_sync_page_io() |
67 | */ | 60 | */ |
@@ -120,6 +113,7 @@ void drbd_endio_write_sec_final(struct drbd_peer_request *peer_req) __releases(l | |||
120 | unsigned long flags = 0; | 113 | unsigned long flags = 0; |
121 | struct drbd_peer_device *peer_device = peer_req->peer_device; | 114 | struct drbd_peer_device *peer_device = peer_req->peer_device; |
122 | struct drbd_device *device = peer_device->device; | 115 | struct drbd_device *device = peer_device->device; |
116 | struct drbd_connection *connection = peer_device->connection; | ||
123 | struct drbd_interval i; | 117 | struct drbd_interval i; |
124 | int do_wake; | 118 | int do_wake; |
125 | u64 block_id; | 119 | u64 block_id; |
@@ -152,6 +146,12 @@ void drbd_endio_write_sec_final(struct drbd_peer_request *peer_req) __releases(l | |||
152 | * ((peer_req->flags & (EE_WAS_ERROR|EE_IS_TRIM)) == EE_WAS_ERROR) */ | 146 | * ((peer_req->flags & (EE_WAS_ERROR|EE_IS_TRIM)) == EE_WAS_ERROR) */ |
153 | if (peer_req->flags & EE_WAS_ERROR) | 147 | if (peer_req->flags & EE_WAS_ERROR) |
154 | __drbd_chk_io_error(device, DRBD_WRITE_ERROR); | 148 | __drbd_chk_io_error(device, DRBD_WRITE_ERROR); |
149 | |||
150 | if (connection->cstate >= C_WF_REPORT_PARAMS) { | ||
151 | kref_get(&device->kref); /* put is in drbd_send_acks_wf() */ | ||
152 | if (!queue_work(connection->ack_sender, &peer_device->send_acks_work)) | ||
153 | kref_put(&device->kref, drbd_destroy_device); | ||
154 | } | ||
155 | spin_unlock_irqrestore(&device->resource->req_lock, flags); | 155 | spin_unlock_irqrestore(&device->resource->req_lock, flags); |
156 | 156 | ||
157 | if (block_id == ID_SYNCER) | 157 | if (block_id == ID_SYNCER) |
@@ -163,7 +163,6 @@ void drbd_endio_write_sec_final(struct drbd_peer_request *peer_req) __releases(l | |||
163 | if (do_al_complete_io) | 163 | if (do_al_complete_io) |
164 | drbd_al_complete_io(device, &i); | 164 | drbd_al_complete_io(device, &i); |
165 | 165 | ||
166 | wake_asender(peer_device->connection); | ||
167 | put_ldev(device); | 166 | put_ldev(device); |
168 | } | 167 | } |
169 | 168 | ||
@@ -195,6 +194,12 @@ void drbd_peer_request_endio(struct bio *bio) | |||
195 | } | 194 | } |
196 | } | 195 | } |
197 | 196 | ||
197 | void drbd_panic_after_delayed_completion_of_aborted_request(struct drbd_device *device) | ||
198 | { | ||
199 | panic("drbd%u %s/%u potential random memory corruption caused by delayed completion of aborted local request\n", | ||
200 | device->minor, device->resource->name, device->vnr); | ||
201 | } | ||
202 | |||
198 | /* read, readA or write requests on R_PRIMARY coming from drbd_make_request | 203 | /* read, readA or write requests on R_PRIMARY coming from drbd_make_request |
199 | */ | 204 | */ |
200 | void drbd_request_endio(struct bio *bio) | 205 | void drbd_request_endio(struct bio *bio) |
@@ -238,7 +243,7 @@ void drbd_request_endio(struct bio *bio) | |||
238 | drbd_emerg(device, "delayed completion of aborted local request; disk-timeout may be too aggressive\n"); | 243 | drbd_emerg(device, "delayed completion of aborted local request; disk-timeout may be too aggressive\n"); |
239 | 244 | ||
240 | if (!bio->bi_error) | 245 | if (!bio->bi_error) |
241 | panic("possible random memory corruption caused by delayed completion of aborted local request\n"); | 246 | drbd_panic_after_delayed_completion_of_aborted_request(device); |
242 | } | 247 | } |
243 | 248 | ||
244 | /* to avoid recursion in __req_mod */ | 249 | /* to avoid recursion in __req_mod */ |
@@ -1291,6 +1296,7 @@ static int drbd_send_barrier(struct drbd_connection *connection) | |||
1291 | p->barrier = connection->send.current_epoch_nr; | 1296 | p->barrier = connection->send.current_epoch_nr; |
1292 | p->pad = 0; | 1297 | p->pad = 0; |
1293 | connection->send.current_epoch_writes = 0; | 1298 | connection->send.current_epoch_writes = 0; |
1299 | connection->send.last_sent_barrier_jif = jiffies; | ||
1294 | 1300 | ||
1295 | return conn_send_command(connection, sock, P_BARRIER, sizeof(*p), NULL, 0); | 1301 | return conn_send_command(connection, sock, P_BARRIER, sizeof(*p), NULL, 0); |
1296 | } | 1302 | } |
@@ -1315,6 +1321,7 @@ static void re_init_if_first_write(struct drbd_connection *connection, unsigned | |||
1315 | connection->send.seen_any_write_yet = true; | 1321 | connection->send.seen_any_write_yet = true; |
1316 | connection->send.current_epoch_nr = epoch; | 1322 | connection->send.current_epoch_nr = epoch; |
1317 | connection->send.current_epoch_writes = 0; | 1323 | connection->send.current_epoch_writes = 0; |
1324 | connection->send.last_sent_barrier_jif = jiffies; | ||
1318 | } | 1325 | } |
1319 | } | 1326 | } |
1320 | 1327 | ||
@@ -1456,70 +1463,73 @@ static int _drbd_may_sync_now(struct drbd_device *device) | |||
1456 | } | 1463 | } |
1457 | 1464 | ||
1458 | /** | 1465 | /** |
1459 | * _drbd_pause_after() - Pause resync on all devices that may not resync now | 1466 | * drbd_pause_after() - Pause resync on all devices that may not resync now |
1460 | * @device: DRBD device. | 1467 | * @device: DRBD device. |
1461 | * | 1468 | * |
1462 | * Called from process context only (admin command and after_state_ch). | 1469 | * Called from process context only (admin command and after_state_ch). |
1463 | */ | 1470 | */ |
1464 | static int _drbd_pause_after(struct drbd_device *device) | 1471 | static bool drbd_pause_after(struct drbd_device *device) |
1465 | { | 1472 | { |
1473 | bool changed = false; | ||
1466 | struct drbd_device *odev; | 1474 | struct drbd_device *odev; |
1467 | int i, rv = 0; | 1475 | int i; |
1468 | 1476 | ||
1469 | rcu_read_lock(); | 1477 | rcu_read_lock(); |
1470 | idr_for_each_entry(&drbd_devices, odev, i) { | 1478 | idr_for_each_entry(&drbd_devices, odev, i) { |
1471 | if (odev->state.conn == C_STANDALONE && odev->state.disk == D_DISKLESS) | 1479 | if (odev->state.conn == C_STANDALONE && odev->state.disk == D_DISKLESS) |
1472 | continue; | 1480 | continue; |
1473 | if (!_drbd_may_sync_now(odev)) | 1481 | if (!_drbd_may_sync_now(odev) && |
1474 | rv |= (__drbd_set_state(_NS(odev, aftr_isp, 1), CS_HARD, NULL) | 1482 | _drbd_set_state(_NS(odev, aftr_isp, 1), |
1475 | != SS_NOTHING_TO_DO); | 1483 | CS_HARD, NULL) != SS_NOTHING_TO_DO) |
1484 | changed = true; | ||
1476 | } | 1485 | } |
1477 | rcu_read_unlock(); | 1486 | rcu_read_unlock(); |
1478 | 1487 | ||
1479 | return rv; | 1488 | return changed; |
1480 | } | 1489 | } |
1481 | 1490 | ||
1482 | /** | 1491 | /** |
1483 | * _drbd_resume_next() - Resume resync on all devices that may resync now | 1492 | * drbd_resume_next() - Resume resync on all devices that may resync now |
1484 | * @device: DRBD device. | 1493 | * @device: DRBD device. |
1485 | * | 1494 | * |
1486 | * Called from process context only (admin command and worker). | 1495 | * Called from process context only (admin command and worker). |
1487 | */ | 1496 | */ |
1488 | static int _drbd_resume_next(struct drbd_device *device) | 1497 | static bool drbd_resume_next(struct drbd_device *device) |
1489 | { | 1498 | { |
1499 | bool changed = false; | ||
1490 | struct drbd_device *odev; | 1500 | struct drbd_device *odev; |
1491 | int i, rv = 0; | 1501 | int i; |
1492 | 1502 | ||
1493 | rcu_read_lock(); | 1503 | rcu_read_lock(); |
1494 | idr_for_each_entry(&drbd_devices, odev, i) { | 1504 | idr_for_each_entry(&drbd_devices, odev, i) { |
1495 | if (odev->state.conn == C_STANDALONE && odev->state.disk == D_DISKLESS) | 1505 | if (odev->state.conn == C_STANDALONE && odev->state.disk == D_DISKLESS) |
1496 | continue; | 1506 | continue; |
1497 | if (odev->state.aftr_isp) { | 1507 | if (odev->state.aftr_isp) { |
1498 | if (_drbd_may_sync_now(odev)) | 1508 | if (_drbd_may_sync_now(odev) && |
1499 | rv |= (__drbd_set_state(_NS(odev, aftr_isp, 0), | 1509 | _drbd_set_state(_NS(odev, aftr_isp, 0), |
1500 | CS_HARD, NULL) | 1510 | CS_HARD, NULL) != SS_NOTHING_TO_DO) |
1501 | != SS_NOTHING_TO_DO) ; | 1511 | changed = true; |
1502 | } | 1512 | } |
1503 | } | 1513 | } |
1504 | rcu_read_unlock(); | 1514 | rcu_read_unlock(); |
1505 | return rv; | 1515 | return changed; |
1506 | } | 1516 | } |
1507 | 1517 | ||
1508 | void resume_next_sg(struct drbd_device *device) | 1518 | void resume_next_sg(struct drbd_device *device) |
1509 | { | 1519 | { |
1510 | write_lock_irq(&global_state_lock); | 1520 | lock_all_resources(); |
1511 | _drbd_resume_next(device); | 1521 | drbd_resume_next(device); |
1512 | write_unlock_irq(&global_state_lock); | 1522 | unlock_all_resources(); |
1513 | } | 1523 | } |
1514 | 1524 | ||
1515 | void suspend_other_sg(struct drbd_device *device) | 1525 | void suspend_other_sg(struct drbd_device *device) |
1516 | { | 1526 | { |
1517 | write_lock_irq(&global_state_lock); | 1527 | lock_all_resources(); |
1518 | _drbd_pause_after(device); | 1528 | drbd_pause_after(device); |
1519 | write_unlock_irq(&global_state_lock); | 1529 | unlock_all_resources(); |
1520 | } | 1530 | } |
1521 | 1531 | ||
1522 | /* caller must hold global_state_lock */ | 1532 | /* caller must lock_all_resources() */ |
1523 | enum drbd_ret_code drbd_resync_after_valid(struct drbd_device *device, int o_minor) | 1533 | enum drbd_ret_code drbd_resync_after_valid(struct drbd_device *device, int o_minor) |
1524 | { | 1534 | { |
1525 | struct drbd_device *odev; | 1535 | struct drbd_device *odev; |
@@ -1557,15 +1567,15 @@ enum drbd_ret_code drbd_resync_after_valid(struct drbd_device *device, int o_min | |||
1557 | } | 1567 | } |
1558 | } | 1568 | } |
1559 | 1569 | ||
1560 | /* caller must hold global_state_lock */ | 1570 | /* caller must lock_all_resources() */ |
1561 | void drbd_resync_after_changed(struct drbd_device *device) | 1571 | void drbd_resync_after_changed(struct drbd_device *device) |
1562 | { | 1572 | { |
1563 | int changes; | 1573 | int changed; |
1564 | 1574 | ||
1565 | do { | 1575 | do { |
1566 | changes = _drbd_pause_after(device); | 1576 | changed = drbd_pause_after(device); |
1567 | changes |= _drbd_resume_next(device); | 1577 | changed |= drbd_resume_next(device); |
1568 | } while (changes); | 1578 | } while (changed); |
1569 | } | 1579 | } |
1570 | 1580 | ||
1571 | void drbd_rs_controller_reset(struct drbd_device *device) | 1581 | void drbd_rs_controller_reset(struct drbd_device *device) |
@@ -1685,19 +1695,14 @@ void drbd_start_resync(struct drbd_device *device, enum drbd_conns side) | |||
1685 | } else { | 1695 | } else { |
1686 | mutex_lock(device->state_mutex); | 1696 | mutex_lock(device->state_mutex); |
1687 | } | 1697 | } |
1688 | clear_bit(B_RS_H_DONE, &device->flags); | ||
1689 | 1698 | ||
1690 | /* req_lock: serialize with drbd_send_and_submit() and others | 1699 | lock_all_resources(); |
1691 | * global_state_lock: for stable sync-after dependencies */ | 1700 | clear_bit(B_RS_H_DONE, &device->flags); |
1692 | spin_lock_irq(&device->resource->req_lock); | ||
1693 | write_lock(&global_state_lock); | ||
1694 | /* Did some connection breakage or IO error race with us? */ | 1701 | /* Did some connection breakage or IO error race with us? */ |
1695 | if (device->state.conn < C_CONNECTED | 1702 | if (device->state.conn < C_CONNECTED |
1696 | || !get_ldev_if_state(device, D_NEGOTIATING)) { | 1703 | || !get_ldev_if_state(device, D_NEGOTIATING)) { |
1697 | write_unlock(&global_state_lock); | 1704 | unlock_all_resources(); |
1698 | spin_unlock_irq(&device->resource->req_lock); | 1705 | goto out; |
1699 | mutex_unlock(device->state_mutex); | ||
1700 | return; | ||
1701 | } | 1706 | } |
1702 | 1707 | ||
1703 | ns = drbd_read_state(device); | 1708 | ns = drbd_read_state(device); |
@@ -1711,7 +1716,7 @@ void drbd_start_resync(struct drbd_device *device, enum drbd_conns side) | |||
1711 | else /* side == C_SYNC_SOURCE */ | 1716 | else /* side == C_SYNC_SOURCE */ |
1712 | ns.pdsk = D_INCONSISTENT; | 1717 | ns.pdsk = D_INCONSISTENT; |
1713 | 1718 | ||
1714 | r = __drbd_set_state(device, ns, CS_VERBOSE, NULL); | 1719 | r = _drbd_set_state(device, ns, CS_VERBOSE, NULL); |
1715 | ns = drbd_read_state(device); | 1720 | ns = drbd_read_state(device); |
1716 | 1721 | ||
1717 | if (ns.conn < C_CONNECTED) | 1722 | if (ns.conn < C_CONNECTED) |
@@ -1732,7 +1737,7 @@ void drbd_start_resync(struct drbd_device *device, enum drbd_conns side) | |||
1732 | device->rs_mark_left[i] = tw; | 1737 | device->rs_mark_left[i] = tw; |
1733 | device->rs_mark_time[i] = now; | 1738 | device->rs_mark_time[i] = now; |
1734 | } | 1739 | } |
1735 | _drbd_pause_after(device); | 1740 | drbd_pause_after(device); |
1736 | /* Forget potentially stale cached per resync extent bit-counts. | 1741 | /* Forget potentially stale cached per resync extent bit-counts. |
1737 | * Open coded drbd_rs_cancel_all(device), we already have IRQs | 1742 | * Open coded drbd_rs_cancel_all(device), we already have IRQs |
1738 | * disabled, and know the disk state is ok. */ | 1743 | * disabled, and know the disk state is ok. */ |
@@ -1742,8 +1747,7 @@ void drbd_start_resync(struct drbd_device *device, enum drbd_conns side) | |||
1742 | device->resync_wenr = LC_FREE; | 1747 | device->resync_wenr = LC_FREE; |
1743 | spin_unlock(&device->al_lock); | 1748 | spin_unlock(&device->al_lock); |
1744 | } | 1749 | } |
1745 | write_unlock(&global_state_lock); | 1750 | unlock_all_resources(); |
1746 | spin_unlock_irq(&device->resource->req_lock); | ||
1747 | 1751 | ||
1748 | if (r == SS_SUCCESS) { | 1752 | if (r == SS_SUCCESS) { |
1749 | wake_up(&device->al_wait); /* for lc_reset() above */ | 1753 | wake_up(&device->al_wait); /* for lc_reset() above */ |
@@ -1807,6 +1811,7 @@ void drbd_start_resync(struct drbd_device *device, enum drbd_conns side) | |||
1807 | drbd_md_sync(device); | 1811 | drbd_md_sync(device); |
1808 | } | 1812 | } |
1809 | put_ldev(device); | 1813 | put_ldev(device); |
1814 | out: | ||
1810 | mutex_unlock(device->state_mutex); | 1815 | mutex_unlock(device->state_mutex); |
1811 | } | 1816 | } |
1812 | 1817 | ||
@@ -1836,7 +1841,7 @@ static void drbd_ldev_destroy(struct drbd_device *device) | |||
1836 | device->act_log = NULL; | 1841 | device->act_log = NULL; |
1837 | 1842 | ||
1838 | __acquire(local); | 1843 | __acquire(local); |
1839 | drbd_free_ldev(device->ldev); | 1844 | drbd_backing_dev_free(device, device->ldev); |
1840 | device->ldev = NULL; | 1845 | device->ldev = NULL; |
1841 | __release(local); | 1846 | __release(local); |
1842 | 1847 | ||
diff --git a/drivers/block/mtip32xx/mtip32xx.c b/drivers/block/mtip32xx/mtip32xx.c index 15bec407ac37..9b180dbbd03c 100644 --- a/drivers/block/mtip32xx/mtip32xx.c +++ b/drivers/block/mtip32xx/mtip32xx.c | |||
@@ -104,9 +104,9 @@ | |||
104 | /* Device instance number, incremented each time a device is probed. */ | 104 | /* Device instance number, incremented each time a device is probed. */ |
105 | static int instance; | 105 | static int instance; |
106 | 106 | ||
107 | struct list_head online_list; | 107 | static struct list_head online_list; |
108 | struct list_head removing_list; | 108 | static struct list_head removing_list; |
109 | spinlock_t dev_lock; | 109 | static spinlock_t dev_lock; |
110 | 110 | ||
111 | /* | 111 | /* |
112 | * Global variable used to hold the major block device number | 112 | * Global variable used to hold the major block device number |
diff --git a/drivers/block/null_blk.c b/drivers/block/null_blk.c index 95dff91135ad..6f9587156569 100644 --- a/drivers/block/null_blk.c +++ b/drivers/block/null_blk.c | |||
@@ -495,17 +495,17 @@ static int null_lnvm_id(struct nvm_dev *dev, struct nvm_id *id) | |||
495 | id->ppaf.ch_offset = 56; | 495 | id->ppaf.ch_offset = 56; |
496 | id->ppaf.ch_len = 8; | 496 | id->ppaf.ch_len = 8; |
497 | 497 | ||
498 | do_div(size, bs); /* convert size to pages */ | 498 | sector_div(size, bs); /* convert size to pages */ |
499 | do_div(size, 256); /* concert size to pgs pr blk */ | 499 | size >>= 8; /* concert size to pgs pr blk */ |
500 | grp = &id->groups[0]; | 500 | grp = &id->groups[0]; |
501 | grp->mtype = 0; | 501 | grp->mtype = 0; |
502 | grp->fmtype = 0; | 502 | grp->fmtype = 0; |
503 | grp->num_ch = 1; | 503 | grp->num_ch = 1; |
504 | grp->num_pg = 256; | 504 | grp->num_pg = 256; |
505 | blksize = size; | 505 | blksize = size; |
506 | do_div(size, (1 << 16)); | 506 | size >>= 16; |
507 | grp->num_lun = size + 1; | 507 | grp->num_lun = size + 1; |
508 | do_div(blksize, grp->num_lun); | 508 | sector_div(blksize, grp->num_lun); |
509 | grp->num_blk = blksize; | 509 | grp->num_blk = blksize; |
510 | grp->num_pln = 1; | 510 | grp->num_pln = 1; |
511 | 511 | ||
diff --git a/drivers/block/sx8.c b/drivers/block/sx8.c index 59c91d49b14b..ba4bfe933276 100644 --- a/drivers/block/sx8.c +++ b/drivers/block/sx8.c | |||
@@ -23,7 +23,7 @@ | |||
23 | #include <linux/workqueue.h> | 23 | #include <linux/workqueue.h> |
24 | #include <linux/bitops.h> | 24 | #include <linux/bitops.h> |
25 | #include <linux/delay.h> | 25 | #include <linux/delay.h> |
26 | #include <linux/time.h> | 26 | #include <linux/ktime.h> |
27 | #include <linux/hdreg.h> | 27 | #include <linux/hdreg.h> |
28 | #include <linux/dma-mapping.h> | 28 | #include <linux/dma-mapping.h> |
29 | #include <linux/completion.h> | 29 | #include <linux/completion.h> |
@@ -671,16 +671,15 @@ static int carm_send_special (struct carm_host *host, carm_sspc_t func) | |||
671 | static unsigned int carm_fill_sync_time(struct carm_host *host, | 671 | static unsigned int carm_fill_sync_time(struct carm_host *host, |
672 | unsigned int idx, void *mem) | 672 | unsigned int idx, void *mem) |
673 | { | 673 | { |
674 | struct timeval tv; | ||
675 | struct carm_msg_sync_time *st = mem; | 674 | struct carm_msg_sync_time *st = mem; |
676 | 675 | ||
677 | do_gettimeofday(&tv); | 676 | time64_t tv = ktime_get_real_seconds(); |
678 | 677 | ||
679 | memset(st, 0, sizeof(*st)); | 678 | memset(st, 0, sizeof(*st)); |
680 | st->type = CARM_MSG_MISC; | 679 | st->type = CARM_MSG_MISC; |
681 | st->subtype = MISC_SET_TIME; | 680 | st->subtype = MISC_SET_TIME; |
682 | st->handle = cpu_to_le32(TAG_ENCODE(idx)); | 681 | st->handle = cpu_to_le32(TAG_ENCODE(idx)); |
683 | st->timestamp = cpu_to_le32(tv.tv_sec); | 682 | st->timestamp = cpu_to_le32(tv); |
684 | 683 | ||
685 | return sizeof(struct carm_msg_sync_time); | 684 | return sizeof(struct carm_msg_sync_time); |
686 | } | 685 | } |
diff --git a/drivers/block/xen-blkback/blkback.c b/drivers/block/xen-blkback/blkback.c index 41fb1a917b17..4809c1501d7e 100644 --- a/drivers/block/xen-blkback/blkback.c +++ b/drivers/block/xen-blkback/blkback.c | |||
@@ -84,6 +84,16 @@ MODULE_PARM_DESC(max_persistent_grants, | |||
84 | "Maximum number of grants to map persistently"); | 84 | "Maximum number of grants to map persistently"); |
85 | 85 | ||
86 | /* | 86 | /* |
87 | * Maximum number of rings/queues blkback supports, allow as many queues as there | ||
88 | * are CPUs if user has not specified a value. | ||
89 | */ | ||
90 | unsigned int xenblk_max_queues; | ||
91 | module_param_named(max_queues, xenblk_max_queues, uint, 0644); | ||
92 | MODULE_PARM_DESC(max_queues, | ||
93 | "Maximum number of hardware queues per virtual disk." \ | ||
94 | "By default it is the number of online CPUs."); | ||
95 | |||
96 | /* | ||
87 | * Maximum order of pages to be used for the shared ring between front and | 97 | * Maximum order of pages to be used for the shared ring between front and |
88 | * backend, 4KB page granularity is used. | 98 | * backend, 4KB page granularity is used. |
89 | */ | 99 | */ |
@@ -113,71 +123,71 @@ module_param(log_stats, int, 0644); | |||
113 | /* Number of free pages to remove on each call to gnttab_free_pages */ | 123 | /* Number of free pages to remove on each call to gnttab_free_pages */ |
114 | #define NUM_BATCH_FREE_PAGES 10 | 124 | #define NUM_BATCH_FREE_PAGES 10 |
115 | 125 | ||
116 | static inline int get_free_page(struct xen_blkif *blkif, struct page **page) | 126 | static inline int get_free_page(struct xen_blkif_ring *ring, struct page **page) |
117 | { | 127 | { |
118 | unsigned long flags; | 128 | unsigned long flags; |
119 | 129 | ||
120 | spin_lock_irqsave(&blkif->free_pages_lock, flags); | 130 | spin_lock_irqsave(&ring->free_pages_lock, flags); |
121 | if (list_empty(&blkif->free_pages)) { | 131 | if (list_empty(&ring->free_pages)) { |
122 | BUG_ON(blkif->free_pages_num != 0); | 132 | BUG_ON(ring->free_pages_num != 0); |
123 | spin_unlock_irqrestore(&blkif->free_pages_lock, flags); | 133 | spin_unlock_irqrestore(&ring->free_pages_lock, flags); |
124 | return gnttab_alloc_pages(1, page); | 134 | return gnttab_alloc_pages(1, page); |
125 | } | 135 | } |
126 | BUG_ON(blkif->free_pages_num == 0); | 136 | BUG_ON(ring->free_pages_num == 0); |
127 | page[0] = list_first_entry(&blkif->free_pages, struct page, lru); | 137 | page[0] = list_first_entry(&ring->free_pages, struct page, lru); |
128 | list_del(&page[0]->lru); | 138 | list_del(&page[0]->lru); |
129 | blkif->free_pages_num--; | 139 | ring->free_pages_num--; |
130 | spin_unlock_irqrestore(&blkif->free_pages_lock, flags); | 140 | spin_unlock_irqrestore(&ring->free_pages_lock, flags); |
131 | 141 | ||
132 | return 0; | 142 | return 0; |
133 | } | 143 | } |
134 | 144 | ||
135 | static inline void put_free_pages(struct xen_blkif *blkif, struct page **page, | 145 | static inline void put_free_pages(struct xen_blkif_ring *ring, struct page **page, |
136 | int num) | 146 | int num) |
137 | { | 147 | { |
138 | unsigned long flags; | 148 | unsigned long flags; |
139 | int i; | 149 | int i; |
140 | 150 | ||
141 | spin_lock_irqsave(&blkif->free_pages_lock, flags); | 151 | spin_lock_irqsave(&ring->free_pages_lock, flags); |
142 | for (i = 0; i < num; i++) | 152 | for (i = 0; i < num; i++) |
143 | list_add(&page[i]->lru, &blkif->free_pages); | 153 | list_add(&page[i]->lru, &ring->free_pages); |
144 | blkif->free_pages_num += num; | 154 | ring->free_pages_num += num; |
145 | spin_unlock_irqrestore(&blkif->free_pages_lock, flags); | 155 | spin_unlock_irqrestore(&ring->free_pages_lock, flags); |
146 | } | 156 | } |
147 | 157 | ||
148 | static inline void shrink_free_pagepool(struct xen_blkif *blkif, int num) | 158 | static inline void shrink_free_pagepool(struct xen_blkif_ring *ring, int num) |
149 | { | 159 | { |
150 | /* Remove requested pages in batches of NUM_BATCH_FREE_PAGES */ | 160 | /* Remove requested pages in batches of NUM_BATCH_FREE_PAGES */ |
151 | struct page *page[NUM_BATCH_FREE_PAGES]; | 161 | struct page *page[NUM_BATCH_FREE_PAGES]; |
152 | unsigned int num_pages = 0; | 162 | unsigned int num_pages = 0; |
153 | unsigned long flags; | 163 | unsigned long flags; |
154 | 164 | ||
155 | spin_lock_irqsave(&blkif->free_pages_lock, flags); | 165 | spin_lock_irqsave(&ring->free_pages_lock, flags); |
156 | while (blkif->free_pages_num > num) { | 166 | while (ring->free_pages_num > num) { |
157 | BUG_ON(list_empty(&blkif->free_pages)); | 167 | BUG_ON(list_empty(&ring->free_pages)); |
158 | page[num_pages] = list_first_entry(&blkif->free_pages, | 168 | page[num_pages] = list_first_entry(&ring->free_pages, |
159 | struct page, lru); | 169 | struct page, lru); |
160 | list_del(&page[num_pages]->lru); | 170 | list_del(&page[num_pages]->lru); |
161 | blkif->free_pages_num--; | 171 | ring->free_pages_num--; |
162 | if (++num_pages == NUM_BATCH_FREE_PAGES) { | 172 | if (++num_pages == NUM_BATCH_FREE_PAGES) { |
163 | spin_unlock_irqrestore(&blkif->free_pages_lock, flags); | 173 | spin_unlock_irqrestore(&ring->free_pages_lock, flags); |
164 | gnttab_free_pages(num_pages, page); | 174 | gnttab_free_pages(num_pages, page); |
165 | spin_lock_irqsave(&blkif->free_pages_lock, flags); | 175 | spin_lock_irqsave(&ring->free_pages_lock, flags); |
166 | num_pages = 0; | 176 | num_pages = 0; |
167 | } | 177 | } |
168 | } | 178 | } |
169 | spin_unlock_irqrestore(&blkif->free_pages_lock, flags); | 179 | spin_unlock_irqrestore(&ring->free_pages_lock, flags); |
170 | if (num_pages != 0) | 180 | if (num_pages != 0) |
171 | gnttab_free_pages(num_pages, page); | 181 | gnttab_free_pages(num_pages, page); |
172 | } | 182 | } |
173 | 183 | ||
174 | #define vaddr(page) ((unsigned long)pfn_to_kaddr(page_to_pfn(page))) | 184 | #define vaddr(page) ((unsigned long)pfn_to_kaddr(page_to_pfn(page))) |
175 | 185 | ||
176 | static int do_block_io_op(struct xen_blkif *blkif); | 186 | static int do_block_io_op(struct xen_blkif_ring *ring); |
177 | static int dispatch_rw_block_io(struct xen_blkif *blkif, | 187 | static int dispatch_rw_block_io(struct xen_blkif_ring *ring, |
178 | struct blkif_request *req, | 188 | struct blkif_request *req, |
179 | struct pending_req *pending_req); | 189 | struct pending_req *pending_req); |
180 | static void make_response(struct xen_blkif *blkif, u64 id, | 190 | static void make_response(struct xen_blkif_ring *ring, u64 id, |
181 | unsigned short op, int st); | 191 | unsigned short op, int st); |
182 | 192 | ||
183 | #define foreach_grant_safe(pos, n, rbtree, node) \ | 193 | #define foreach_grant_safe(pos, n, rbtree, node) \ |
@@ -190,7 +200,7 @@ static void make_response(struct xen_blkif *blkif, u64 id, | |||
190 | 200 | ||
191 | /* | 201 | /* |
192 | * We don't need locking around the persistent grant helpers | 202 | * We don't need locking around the persistent grant helpers |
193 | * because blkback uses a single-thread for each backed, so we | 203 | * because blkback uses a single-thread for each backend, so we |
194 | * can be sure that this functions will never be called recursively. | 204 | * can be sure that this functions will never be called recursively. |
195 | * | 205 | * |
196 | * The only exception to that is put_persistent_grant, that can be called | 206 | * The only exception to that is put_persistent_grant, that can be called |
@@ -198,19 +208,20 @@ static void make_response(struct xen_blkif *blkif, u64 id, | |||
198 | * bit operations to modify the flags of a persistent grant and to count | 208 | * bit operations to modify the flags of a persistent grant and to count |
199 | * the number of used grants. | 209 | * the number of used grants. |
200 | */ | 210 | */ |
201 | static int add_persistent_gnt(struct xen_blkif *blkif, | 211 | static int add_persistent_gnt(struct xen_blkif_ring *ring, |
202 | struct persistent_gnt *persistent_gnt) | 212 | struct persistent_gnt *persistent_gnt) |
203 | { | 213 | { |
204 | struct rb_node **new = NULL, *parent = NULL; | 214 | struct rb_node **new = NULL, *parent = NULL; |
205 | struct persistent_gnt *this; | 215 | struct persistent_gnt *this; |
216 | struct xen_blkif *blkif = ring->blkif; | ||
206 | 217 | ||
207 | if (blkif->persistent_gnt_c >= xen_blkif_max_pgrants) { | 218 | if (ring->persistent_gnt_c >= xen_blkif_max_pgrants) { |
208 | if (!blkif->vbd.overflow_max_grants) | 219 | if (!blkif->vbd.overflow_max_grants) |
209 | blkif->vbd.overflow_max_grants = 1; | 220 | blkif->vbd.overflow_max_grants = 1; |
210 | return -EBUSY; | 221 | return -EBUSY; |
211 | } | 222 | } |
212 | /* Figure out where to put new node */ | 223 | /* Figure out where to put new node */ |
213 | new = &blkif->persistent_gnts.rb_node; | 224 | new = &ring->persistent_gnts.rb_node; |
214 | while (*new) { | 225 | while (*new) { |
215 | this = container_of(*new, struct persistent_gnt, node); | 226 | this = container_of(*new, struct persistent_gnt, node); |
216 | 227 | ||
@@ -229,19 +240,19 @@ static int add_persistent_gnt(struct xen_blkif *blkif, | |||
229 | set_bit(PERSISTENT_GNT_ACTIVE, persistent_gnt->flags); | 240 | set_bit(PERSISTENT_GNT_ACTIVE, persistent_gnt->flags); |
230 | /* Add new node and rebalance tree. */ | 241 | /* Add new node and rebalance tree. */ |
231 | rb_link_node(&(persistent_gnt->node), parent, new); | 242 | rb_link_node(&(persistent_gnt->node), parent, new); |
232 | rb_insert_color(&(persistent_gnt->node), &blkif->persistent_gnts); | 243 | rb_insert_color(&(persistent_gnt->node), &ring->persistent_gnts); |
233 | blkif->persistent_gnt_c++; | 244 | ring->persistent_gnt_c++; |
234 | atomic_inc(&blkif->persistent_gnt_in_use); | 245 | atomic_inc(&ring->persistent_gnt_in_use); |
235 | return 0; | 246 | return 0; |
236 | } | 247 | } |
237 | 248 | ||
238 | static struct persistent_gnt *get_persistent_gnt(struct xen_blkif *blkif, | 249 | static struct persistent_gnt *get_persistent_gnt(struct xen_blkif_ring *ring, |
239 | grant_ref_t gref) | 250 | grant_ref_t gref) |
240 | { | 251 | { |
241 | struct persistent_gnt *data; | 252 | struct persistent_gnt *data; |
242 | struct rb_node *node = NULL; | 253 | struct rb_node *node = NULL; |
243 | 254 | ||
244 | node = blkif->persistent_gnts.rb_node; | 255 | node = ring->persistent_gnts.rb_node; |
245 | while (node) { | 256 | while (node) { |
246 | data = container_of(node, struct persistent_gnt, node); | 257 | data = container_of(node, struct persistent_gnt, node); |
247 | 258 | ||
@@ -255,24 +266,24 @@ static struct persistent_gnt *get_persistent_gnt(struct xen_blkif *blkif, | |||
255 | return NULL; | 266 | return NULL; |
256 | } | 267 | } |
257 | set_bit(PERSISTENT_GNT_ACTIVE, data->flags); | 268 | set_bit(PERSISTENT_GNT_ACTIVE, data->flags); |
258 | atomic_inc(&blkif->persistent_gnt_in_use); | 269 | atomic_inc(&ring->persistent_gnt_in_use); |
259 | return data; | 270 | return data; |
260 | } | 271 | } |
261 | } | 272 | } |
262 | return NULL; | 273 | return NULL; |
263 | } | 274 | } |
264 | 275 | ||
265 | static void put_persistent_gnt(struct xen_blkif *blkif, | 276 | static void put_persistent_gnt(struct xen_blkif_ring *ring, |
266 | struct persistent_gnt *persistent_gnt) | 277 | struct persistent_gnt *persistent_gnt) |
267 | { | 278 | { |
268 | if(!test_bit(PERSISTENT_GNT_ACTIVE, persistent_gnt->flags)) | 279 | if(!test_bit(PERSISTENT_GNT_ACTIVE, persistent_gnt->flags)) |
269 | pr_alert_ratelimited("freeing a grant already unused\n"); | 280 | pr_alert_ratelimited("freeing a grant already unused\n"); |
270 | set_bit(PERSISTENT_GNT_WAS_ACTIVE, persistent_gnt->flags); | 281 | set_bit(PERSISTENT_GNT_WAS_ACTIVE, persistent_gnt->flags); |
271 | clear_bit(PERSISTENT_GNT_ACTIVE, persistent_gnt->flags); | 282 | clear_bit(PERSISTENT_GNT_ACTIVE, persistent_gnt->flags); |
272 | atomic_dec(&blkif->persistent_gnt_in_use); | 283 | atomic_dec(&ring->persistent_gnt_in_use); |
273 | } | 284 | } |
274 | 285 | ||
275 | static void free_persistent_gnts(struct xen_blkif *blkif, struct rb_root *root, | 286 | static void free_persistent_gnts(struct xen_blkif_ring *ring, struct rb_root *root, |
276 | unsigned int num) | 287 | unsigned int num) |
277 | { | 288 | { |
278 | struct gnttab_unmap_grant_ref unmap[BLKIF_MAX_SEGMENTS_PER_REQUEST]; | 289 | struct gnttab_unmap_grant_ref unmap[BLKIF_MAX_SEGMENTS_PER_REQUEST]; |
@@ -303,7 +314,7 @@ static void free_persistent_gnts(struct xen_blkif *blkif, struct rb_root *root, | |||
303 | unmap_data.count = segs_to_unmap; | 314 | unmap_data.count = segs_to_unmap; |
304 | BUG_ON(gnttab_unmap_refs_sync(&unmap_data)); | 315 | BUG_ON(gnttab_unmap_refs_sync(&unmap_data)); |
305 | 316 | ||
306 | put_free_pages(blkif, pages, segs_to_unmap); | 317 | put_free_pages(ring, pages, segs_to_unmap); |
307 | segs_to_unmap = 0; | 318 | segs_to_unmap = 0; |
308 | } | 319 | } |
309 | 320 | ||
@@ -320,15 +331,15 @@ void xen_blkbk_unmap_purged_grants(struct work_struct *work) | |||
320 | struct page *pages[BLKIF_MAX_SEGMENTS_PER_REQUEST]; | 331 | struct page *pages[BLKIF_MAX_SEGMENTS_PER_REQUEST]; |
321 | struct persistent_gnt *persistent_gnt; | 332 | struct persistent_gnt *persistent_gnt; |
322 | int segs_to_unmap = 0; | 333 | int segs_to_unmap = 0; |
323 | struct xen_blkif *blkif = container_of(work, typeof(*blkif), persistent_purge_work); | 334 | struct xen_blkif_ring *ring = container_of(work, typeof(*ring), persistent_purge_work); |
324 | struct gntab_unmap_queue_data unmap_data; | 335 | struct gntab_unmap_queue_data unmap_data; |
325 | 336 | ||
326 | unmap_data.pages = pages; | 337 | unmap_data.pages = pages; |
327 | unmap_data.unmap_ops = unmap; | 338 | unmap_data.unmap_ops = unmap; |
328 | unmap_data.kunmap_ops = NULL; | 339 | unmap_data.kunmap_ops = NULL; |
329 | 340 | ||
330 | while(!list_empty(&blkif->persistent_purge_list)) { | 341 | while(!list_empty(&ring->persistent_purge_list)) { |
331 | persistent_gnt = list_first_entry(&blkif->persistent_purge_list, | 342 | persistent_gnt = list_first_entry(&ring->persistent_purge_list, |
332 | struct persistent_gnt, | 343 | struct persistent_gnt, |
333 | remove_node); | 344 | remove_node); |
334 | list_del(&persistent_gnt->remove_node); | 345 | list_del(&persistent_gnt->remove_node); |
@@ -343,7 +354,7 @@ void xen_blkbk_unmap_purged_grants(struct work_struct *work) | |||
343 | if (++segs_to_unmap == BLKIF_MAX_SEGMENTS_PER_REQUEST) { | 354 | if (++segs_to_unmap == BLKIF_MAX_SEGMENTS_PER_REQUEST) { |
344 | unmap_data.count = segs_to_unmap; | 355 | unmap_data.count = segs_to_unmap; |
345 | BUG_ON(gnttab_unmap_refs_sync(&unmap_data)); | 356 | BUG_ON(gnttab_unmap_refs_sync(&unmap_data)); |
346 | put_free_pages(blkif, pages, segs_to_unmap); | 357 | put_free_pages(ring, pages, segs_to_unmap); |
347 | segs_to_unmap = 0; | 358 | segs_to_unmap = 0; |
348 | } | 359 | } |
349 | kfree(persistent_gnt); | 360 | kfree(persistent_gnt); |
@@ -351,11 +362,11 @@ void xen_blkbk_unmap_purged_grants(struct work_struct *work) | |||
351 | if (segs_to_unmap > 0) { | 362 | if (segs_to_unmap > 0) { |
352 | unmap_data.count = segs_to_unmap; | 363 | unmap_data.count = segs_to_unmap; |
353 | BUG_ON(gnttab_unmap_refs_sync(&unmap_data)); | 364 | BUG_ON(gnttab_unmap_refs_sync(&unmap_data)); |
354 | put_free_pages(blkif, pages, segs_to_unmap); | 365 | put_free_pages(ring, pages, segs_to_unmap); |
355 | } | 366 | } |
356 | } | 367 | } |
357 | 368 | ||
358 | static void purge_persistent_gnt(struct xen_blkif *blkif) | 369 | static void purge_persistent_gnt(struct xen_blkif_ring *ring) |
359 | { | 370 | { |
360 | struct persistent_gnt *persistent_gnt; | 371 | struct persistent_gnt *persistent_gnt; |
361 | struct rb_node *n; | 372 | struct rb_node *n; |
@@ -363,23 +374,23 @@ static void purge_persistent_gnt(struct xen_blkif *blkif) | |||
363 | bool scan_used = false, clean_used = false; | 374 | bool scan_used = false, clean_used = false; |
364 | struct rb_root *root; | 375 | struct rb_root *root; |
365 | 376 | ||
366 | if (blkif->persistent_gnt_c < xen_blkif_max_pgrants || | 377 | if (ring->persistent_gnt_c < xen_blkif_max_pgrants || |
367 | (blkif->persistent_gnt_c == xen_blkif_max_pgrants && | 378 | (ring->persistent_gnt_c == xen_blkif_max_pgrants && |
368 | !blkif->vbd.overflow_max_grants)) { | 379 | !ring->blkif->vbd.overflow_max_grants)) { |
369 | return; | 380 | goto out; |
370 | } | 381 | } |
371 | 382 | ||
372 | if (work_busy(&blkif->persistent_purge_work)) { | 383 | if (work_busy(&ring->persistent_purge_work)) { |
373 | pr_alert_ratelimited("Scheduled work from previous purge is still busy, cannot purge list\n"); | 384 | pr_alert_ratelimited("Scheduled work from previous purge is still busy, cannot purge list\n"); |
374 | return; | 385 | goto out; |
375 | } | 386 | } |
376 | 387 | ||
377 | num_clean = (xen_blkif_max_pgrants / 100) * LRU_PERCENT_CLEAN; | 388 | num_clean = (xen_blkif_max_pgrants / 100) * LRU_PERCENT_CLEAN; |
378 | num_clean = blkif->persistent_gnt_c - xen_blkif_max_pgrants + num_clean; | 389 | num_clean = ring->persistent_gnt_c - xen_blkif_max_pgrants + num_clean; |
379 | num_clean = min(blkif->persistent_gnt_c, num_clean); | 390 | num_clean = min(ring->persistent_gnt_c, num_clean); |
380 | if ((num_clean == 0) || | 391 | if ((num_clean == 0) || |
381 | (num_clean > (blkif->persistent_gnt_c - atomic_read(&blkif->persistent_gnt_in_use)))) | 392 | (num_clean > (ring->persistent_gnt_c - atomic_read(&ring->persistent_gnt_in_use)))) |
382 | return; | 393 | goto out; |
383 | 394 | ||
384 | /* | 395 | /* |
385 | * At this point, we can assure that there will be no calls | 396 | * At this point, we can assure that there will be no calls |
@@ -394,8 +405,8 @@ static void purge_persistent_gnt(struct xen_blkif *blkif) | |||
394 | 405 | ||
395 | pr_debug("Going to purge %u persistent grants\n", num_clean); | 406 | pr_debug("Going to purge %u persistent grants\n", num_clean); |
396 | 407 | ||
397 | BUG_ON(!list_empty(&blkif->persistent_purge_list)); | 408 | BUG_ON(!list_empty(&ring->persistent_purge_list)); |
398 | root = &blkif->persistent_gnts; | 409 | root = &ring->persistent_gnts; |
399 | purge_list: | 410 | purge_list: |
400 | foreach_grant_safe(persistent_gnt, n, root, node) { | 411 | foreach_grant_safe(persistent_gnt, n, root, node) { |
401 | BUG_ON(persistent_gnt->handle == | 412 | BUG_ON(persistent_gnt->handle == |
@@ -414,7 +425,7 @@ purge_list: | |||
414 | 425 | ||
415 | rb_erase(&persistent_gnt->node, root); | 426 | rb_erase(&persistent_gnt->node, root); |
416 | list_add(&persistent_gnt->remove_node, | 427 | list_add(&persistent_gnt->remove_node, |
417 | &blkif->persistent_purge_list); | 428 | &ring->persistent_purge_list); |
418 | if (--num_clean == 0) | 429 | if (--num_clean == 0) |
419 | goto finished; | 430 | goto finished; |
420 | } | 431 | } |
@@ -435,30 +446,32 @@ finished: | |||
435 | goto purge_list; | 446 | goto purge_list; |
436 | } | 447 | } |
437 | 448 | ||
438 | blkif->persistent_gnt_c -= (total - num_clean); | 449 | ring->persistent_gnt_c -= (total - num_clean); |
439 | blkif->vbd.overflow_max_grants = 0; | 450 | ring->blkif->vbd.overflow_max_grants = 0; |
440 | 451 | ||
441 | /* We can defer this work */ | 452 | /* We can defer this work */ |
442 | schedule_work(&blkif->persistent_purge_work); | 453 | schedule_work(&ring->persistent_purge_work); |
443 | pr_debug("Purged %u/%u\n", (total - num_clean), total); | 454 | pr_debug("Purged %u/%u\n", (total - num_clean), total); |
455 | |||
456 | out: | ||
444 | return; | 457 | return; |
445 | } | 458 | } |
446 | 459 | ||
447 | /* | 460 | /* |
448 | * Retrieve from the 'pending_reqs' a free pending_req structure to be used. | 461 | * Retrieve from the 'pending_reqs' a free pending_req structure to be used. |
449 | */ | 462 | */ |
450 | static struct pending_req *alloc_req(struct xen_blkif *blkif) | 463 | static struct pending_req *alloc_req(struct xen_blkif_ring *ring) |
451 | { | 464 | { |
452 | struct pending_req *req = NULL; | 465 | struct pending_req *req = NULL; |
453 | unsigned long flags; | 466 | unsigned long flags; |
454 | 467 | ||
455 | spin_lock_irqsave(&blkif->pending_free_lock, flags); | 468 | spin_lock_irqsave(&ring->pending_free_lock, flags); |
456 | if (!list_empty(&blkif->pending_free)) { | 469 | if (!list_empty(&ring->pending_free)) { |
457 | req = list_entry(blkif->pending_free.next, struct pending_req, | 470 | req = list_entry(ring->pending_free.next, struct pending_req, |
458 | free_list); | 471 | free_list); |
459 | list_del(&req->free_list); | 472 | list_del(&req->free_list); |
460 | } | 473 | } |
461 | spin_unlock_irqrestore(&blkif->pending_free_lock, flags); | 474 | spin_unlock_irqrestore(&ring->pending_free_lock, flags); |
462 | return req; | 475 | return req; |
463 | } | 476 | } |
464 | 477 | ||
@@ -466,17 +479,17 @@ static struct pending_req *alloc_req(struct xen_blkif *blkif) | |||
466 | * Return the 'pending_req' structure back to the freepool. We also | 479 | * Return the 'pending_req' structure back to the freepool. We also |
467 | * wake up the thread if it was waiting for a free page. | 480 | * wake up the thread if it was waiting for a free page. |
468 | */ | 481 | */ |
469 | static void free_req(struct xen_blkif *blkif, struct pending_req *req) | 482 | static void free_req(struct xen_blkif_ring *ring, struct pending_req *req) |
470 | { | 483 | { |
471 | unsigned long flags; | 484 | unsigned long flags; |
472 | int was_empty; | 485 | int was_empty; |
473 | 486 | ||
474 | spin_lock_irqsave(&blkif->pending_free_lock, flags); | 487 | spin_lock_irqsave(&ring->pending_free_lock, flags); |
475 | was_empty = list_empty(&blkif->pending_free); | 488 | was_empty = list_empty(&ring->pending_free); |
476 | list_add(&req->free_list, &blkif->pending_free); | 489 | list_add(&req->free_list, &ring->pending_free); |
477 | spin_unlock_irqrestore(&blkif->pending_free_lock, flags); | 490 | spin_unlock_irqrestore(&ring->pending_free_lock, flags); |
478 | if (was_empty) | 491 | if (was_empty) |
479 | wake_up(&blkif->pending_free_wq); | 492 | wake_up(&ring->pending_free_wq); |
480 | } | 493 | } |
481 | 494 | ||
482 | /* | 495 | /* |
@@ -556,10 +569,10 @@ abort: | |||
556 | /* | 569 | /* |
557 | * Notification from the guest OS. | 570 | * Notification from the guest OS. |
558 | */ | 571 | */ |
559 | static void blkif_notify_work(struct xen_blkif *blkif) | 572 | static void blkif_notify_work(struct xen_blkif_ring *ring) |
560 | { | 573 | { |
561 | blkif->waiting_reqs = 1; | 574 | ring->waiting_reqs = 1; |
562 | wake_up(&blkif->wq); | 575 | wake_up(&ring->wq); |
563 | } | 576 | } |
564 | 577 | ||
565 | irqreturn_t xen_blkif_be_int(int irq, void *dev_id) | 578 | irqreturn_t xen_blkif_be_int(int irq, void *dev_id) |
@@ -572,31 +585,33 @@ irqreturn_t xen_blkif_be_int(int irq, void *dev_id) | |||
572 | * SCHEDULER FUNCTIONS | 585 | * SCHEDULER FUNCTIONS |
573 | */ | 586 | */ |
574 | 587 | ||
575 | static void print_stats(struct xen_blkif *blkif) | 588 | static void print_stats(struct xen_blkif_ring *ring) |
576 | { | 589 | { |
577 | pr_info("(%s): oo %3llu | rd %4llu | wr %4llu | f %4llu" | 590 | pr_info("(%s): oo %3llu | rd %4llu | wr %4llu | f %4llu" |
578 | " | ds %4llu | pg: %4u/%4d\n", | 591 | " | ds %4llu | pg: %4u/%4d\n", |
579 | current->comm, blkif->st_oo_req, | 592 | current->comm, ring->st_oo_req, |
580 | blkif->st_rd_req, blkif->st_wr_req, | 593 | ring->st_rd_req, ring->st_wr_req, |
581 | blkif->st_f_req, blkif->st_ds_req, | 594 | ring->st_f_req, ring->st_ds_req, |
582 | blkif->persistent_gnt_c, | 595 | ring->persistent_gnt_c, |
583 | xen_blkif_max_pgrants); | 596 | xen_blkif_max_pgrants); |
584 | blkif->st_print = jiffies + msecs_to_jiffies(10 * 1000); | 597 | ring->st_print = jiffies + msecs_to_jiffies(10 * 1000); |
585 | blkif->st_rd_req = 0; | 598 | ring->st_rd_req = 0; |
586 | blkif->st_wr_req = 0; | 599 | ring->st_wr_req = 0; |
587 | blkif->st_oo_req = 0; | 600 | ring->st_oo_req = 0; |
588 | blkif->st_ds_req = 0; | 601 | ring->st_ds_req = 0; |
589 | } | 602 | } |
590 | 603 | ||
591 | int xen_blkif_schedule(void *arg) | 604 | int xen_blkif_schedule(void *arg) |
592 | { | 605 | { |
593 | struct xen_blkif *blkif = arg; | 606 | struct xen_blkif_ring *ring = arg; |
607 | struct xen_blkif *blkif = ring->blkif; | ||
594 | struct xen_vbd *vbd = &blkif->vbd; | 608 | struct xen_vbd *vbd = &blkif->vbd; |
595 | unsigned long timeout; | 609 | unsigned long timeout; |
596 | int ret; | 610 | int ret; |
597 | 611 | ||
598 | xen_blkif_get(blkif); | 612 | xen_blkif_get(blkif); |
599 | 613 | ||
614 | set_freezable(); | ||
600 | while (!kthread_should_stop()) { | 615 | while (!kthread_should_stop()) { |
601 | if (try_to_freeze()) | 616 | if (try_to_freeze()) |
602 | continue; | 617 | continue; |
@@ -606,50 +621,50 @@ int xen_blkif_schedule(void *arg) | |||
606 | timeout = msecs_to_jiffies(LRU_INTERVAL); | 621 | timeout = msecs_to_jiffies(LRU_INTERVAL); |
607 | 622 | ||
608 | timeout = wait_event_interruptible_timeout( | 623 | timeout = wait_event_interruptible_timeout( |
609 | blkif->wq, | 624 | ring->wq, |
610 | blkif->waiting_reqs || kthread_should_stop(), | 625 | ring->waiting_reqs || kthread_should_stop(), |
611 | timeout); | 626 | timeout); |
612 | if (timeout == 0) | 627 | if (timeout == 0) |
613 | goto purge_gnt_list; | 628 | goto purge_gnt_list; |
614 | timeout = wait_event_interruptible_timeout( | 629 | timeout = wait_event_interruptible_timeout( |
615 | blkif->pending_free_wq, | 630 | ring->pending_free_wq, |
616 | !list_empty(&blkif->pending_free) || | 631 | !list_empty(&ring->pending_free) || |
617 | kthread_should_stop(), | 632 | kthread_should_stop(), |
618 | timeout); | 633 | timeout); |
619 | if (timeout == 0) | 634 | if (timeout == 0) |
620 | goto purge_gnt_list; | 635 | goto purge_gnt_list; |
621 | 636 | ||
622 | blkif->waiting_reqs = 0; | 637 | ring->waiting_reqs = 0; |
623 | smp_mb(); /* clear flag *before* checking for work */ | 638 | smp_mb(); /* clear flag *before* checking for work */ |
624 | 639 | ||
625 | ret = do_block_io_op(blkif); | 640 | ret = do_block_io_op(ring); |
626 | if (ret > 0) | 641 | if (ret > 0) |
627 | blkif->waiting_reqs = 1; | 642 | ring->waiting_reqs = 1; |
628 | if (ret == -EACCES) | 643 | if (ret == -EACCES) |
629 | wait_event_interruptible(blkif->shutdown_wq, | 644 | wait_event_interruptible(ring->shutdown_wq, |
630 | kthread_should_stop()); | 645 | kthread_should_stop()); |
631 | 646 | ||
632 | purge_gnt_list: | 647 | purge_gnt_list: |
633 | if (blkif->vbd.feature_gnt_persistent && | 648 | if (blkif->vbd.feature_gnt_persistent && |
634 | time_after(jiffies, blkif->next_lru)) { | 649 | time_after(jiffies, ring->next_lru)) { |
635 | purge_persistent_gnt(blkif); | 650 | purge_persistent_gnt(ring); |
636 | blkif->next_lru = jiffies + msecs_to_jiffies(LRU_INTERVAL); | 651 | ring->next_lru = jiffies + msecs_to_jiffies(LRU_INTERVAL); |
637 | } | 652 | } |
638 | 653 | ||
639 | /* Shrink if we have more than xen_blkif_max_buffer_pages */ | 654 | /* Shrink if we have more than xen_blkif_max_buffer_pages */ |
640 | shrink_free_pagepool(blkif, xen_blkif_max_buffer_pages); | 655 | shrink_free_pagepool(ring, xen_blkif_max_buffer_pages); |
641 | 656 | ||
642 | if (log_stats && time_after(jiffies, blkif->st_print)) | 657 | if (log_stats && time_after(jiffies, ring->st_print)) |
643 | print_stats(blkif); | 658 | print_stats(ring); |
644 | } | 659 | } |
645 | 660 | ||
646 | /* Drain pending purge work */ | 661 | /* Drain pending purge work */ |
647 | flush_work(&blkif->persistent_purge_work); | 662 | flush_work(&ring->persistent_purge_work); |
648 | 663 | ||
649 | if (log_stats) | 664 | if (log_stats) |
650 | print_stats(blkif); | 665 | print_stats(ring); |
651 | 666 | ||
652 | blkif->xenblkd = NULL; | 667 | ring->xenblkd = NULL; |
653 | xen_blkif_put(blkif); | 668 | xen_blkif_put(blkif); |
654 | 669 | ||
655 | return 0; | 670 | return 0; |
@@ -658,22 +673,22 @@ purge_gnt_list: | |||
658 | /* | 673 | /* |
659 | * Remove persistent grants and empty the pool of free pages | 674 | * Remove persistent grants and empty the pool of free pages |
660 | */ | 675 | */ |
661 | void xen_blkbk_free_caches(struct xen_blkif *blkif) | 676 | void xen_blkbk_free_caches(struct xen_blkif_ring *ring) |
662 | { | 677 | { |
663 | /* Free all persistent grant pages */ | 678 | /* Free all persistent grant pages */ |
664 | if (!RB_EMPTY_ROOT(&blkif->persistent_gnts)) | 679 | if (!RB_EMPTY_ROOT(&ring->persistent_gnts)) |
665 | free_persistent_gnts(blkif, &blkif->persistent_gnts, | 680 | free_persistent_gnts(ring, &ring->persistent_gnts, |
666 | blkif->persistent_gnt_c); | 681 | ring->persistent_gnt_c); |
667 | 682 | ||
668 | BUG_ON(!RB_EMPTY_ROOT(&blkif->persistent_gnts)); | 683 | BUG_ON(!RB_EMPTY_ROOT(&ring->persistent_gnts)); |
669 | blkif->persistent_gnt_c = 0; | 684 | ring->persistent_gnt_c = 0; |
670 | 685 | ||
671 | /* Since we are shutting down remove all pages from the buffer */ | 686 | /* Since we are shutting down remove all pages from the buffer */ |
672 | shrink_free_pagepool(blkif, 0 /* All */); | 687 | shrink_free_pagepool(ring, 0 /* All */); |
673 | } | 688 | } |
674 | 689 | ||
675 | static unsigned int xen_blkbk_unmap_prepare( | 690 | static unsigned int xen_blkbk_unmap_prepare( |
676 | struct xen_blkif *blkif, | 691 | struct xen_blkif_ring *ring, |
677 | struct grant_page **pages, | 692 | struct grant_page **pages, |
678 | unsigned int num, | 693 | unsigned int num, |
679 | struct gnttab_unmap_grant_ref *unmap_ops, | 694 | struct gnttab_unmap_grant_ref *unmap_ops, |
@@ -683,7 +698,7 @@ static unsigned int xen_blkbk_unmap_prepare( | |||
683 | 698 | ||
684 | for (i = 0; i < num; i++) { | 699 | for (i = 0; i < num; i++) { |
685 | if (pages[i]->persistent_gnt != NULL) { | 700 | if (pages[i]->persistent_gnt != NULL) { |
686 | put_persistent_gnt(blkif, pages[i]->persistent_gnt); | 701 | put_persistent_gnt(ring, pages[i]->persistent_gnt); |
687 | continue; | 702 | continue; |
688 | } | 703 | } |
689 | if (pages[i]->handle == BLKBACK_INVALID_HANDLE) | 704 | if (pages[i]->handle == BLKBACK_INVALID_HANDLE) |
@@ -700,17 +715,18 @@ static unsigned int xen_blkbk_unmap_prepare( | |||
700 | 715 | ||
701 | static void xen_blkbk_unmap_and_respond_callback(int result, struct gntab_unmap_queue_data *data) | 716 | static void xen_blkbk_unmap_and_respond_callback(int result, struct gntab_unmap_queue_data *data) |
702 | { | 717 | { |
703 | struct pending_req* pending_req = (struct pending_req*) (data->data); | 718 | struct pending_req *pending_req = (struct pending_req *)(data->data); |
704 | struct xen_blkif *blkif = pending_req->blkif; | 719 | struct xen_blkif_ring *ring = pending_req->ring; |
720 | struct xen_blkif *blkif = ring->blkif; | ||
705 | 721 | ||
706 | /* BUG_ON used to reproduce existing behaviour, | 722 | /* BUG_ON used to reproduce existing behaviour, |
707 | but is this the best way to deal with this? */ | 723 | but is this the best way to deal with this? */ |
708 | BUG_ON(result); | 724 | BUG_ON(result); |
709 | 725 | ||
710 | put_free_pages(blkif, data->pages, data->count); | 726 | put_free_pages(ring, data->pages, data->count); |
711 | make_response(blkif, pending_req->id, | 727 | make_response(ring, pending_req->id, |
712 | pending_req->operation, pending_req->status); | 728 | pending_req->operation, pending_req->status); |
713 | free_req(blkif, pending_req); | 729 | free_req(ring, pending_req); |
714 | /* | 730 | /* |
715 | * Make sure the request is freed before releasing blkif, | 731 | * Make sure the request is freed before releasing blkif, |
716 | * or there could be a race between free_req and the | 732 | * or there could be a race between free_req and the |
@@ -723,7 +739,7 @@ static void xen_blkbk_unmap_and_respond_callback(int result, struct gntab_unmap_ | |||
723 | * pending_free_wq if there's a drain going on, but it has | 739 | * pending_free_wq if there's a drain going on, but it has |
724 | * to be taken into account if the current model is changed. | 740 | * to be taken into account if the current model is changed. |
725 | */ | 741 | */ |
726 | if (atomic_dec_and_test(&blkif->inflight) && atomic_read(&blkif->drain)) { | 742 | if (atomic_dec_and_test(&ring->inflight) && atomic_read(&blkif->drain)) { |
727 | complete(&blkif->drain_complete); | 743 | complete(&blkif->drain_complete); |
728 | } | 744 | } |
729 | xen_blkif_put(blkif); | 745 | xen_blkif_put(blkif); |
@@ -732,11 +748,11 @@ static void xen_blkbk_unmap_and_respond_callback(int result, struct gntab_unmap_ | |||
732 | static void xen_blkbk_unmap_and_respond(struct pending_req *req) | 748 | static void xen_blkbk_unmap_and_respond(struct pending_req *req) |
733 | { | 749 | { |
734 | struct gntab_unmap_queue_data* work = &req->gnttab_unmap_data; | 750 | struct gntab_unmap_queue_data* work = &req->gnttab_unmap_data; |
735 | struct xen_blkif *blkif = req->blkif; | 751 | struct xen_blkif_ring *ring = req->ring; |
736 | struct grant_page **pages = req->segments; | 752 | struct grant_page **pages = req->segments; |
737 | unsigned int invcount; | 753 | unsigned int invcount; |
738 | 754 | ||
739 | invcount = xen_blkbk_unmap_prepare(blkif, pages, req->nr_segs, | 755 | invcount = xen_blkbk_unmap_prepare(ring, pages, req->nr_segs, |
740 | req->unmap, req->unmap_pages); | 756 | req->unmap, req->unmap_pages); |
741 | 757 | ||
742 | work->data = req; | 758 | work->data = req; |
@@ -757,7 +773,7 @@ static void xen_blkbk_unmap_and_respond(struct pending_req *req) | |||
757 | * of hypercalls, but since this is only used in error paths there's | 773 | * of hypercalls, but since this is only used in error paths there's |
758 | * no real need. | 774 | * no real need. |
759 | */ | 775 | */ |
760 | static void xen_blkbk_unmap(struct xen_blkif *blkif, | 776 | static void xen_blkbk_unmap(struct xen_blkif_ring *ring, |
761 | struct grant_page *pages[], | 777 | struct grant_page *pages[], |
762 | int num) | 778 | int num) |
763 | { | 779 | { |
@@ -768,20 +784,20 @@ static void xen_blkbk_unmap(struct xen_blkif *blkif, | |||
768 | 784 | ||
769 | while (num) { | 785 | while (num) { |
770 | unsigned int batch = min(num, BLKIF_MAX_SEGMENTS_PER_REQUEST); | 786 | unsigned int batch = min(num, BLKIF_MAX_SEGMENTS_PER_REQUEST); |
771 | 787 | ||
772 | invcount = xen_blkbk_unmap_prepare(blkif, pages, batch, | 788 | invcount = xen_blkbk_unmap_prepare(ring, pages, batch, |
773 | unmap, unmap_pages); | 789 | unmap, unmap_pages); |
774 | if (invcount) { | 790 | if (invcount) { |
775 | ret = gnttab_unmap_refs(unmap, NULL, unmap_pages, invcount); | 791 | ret = gnttab_unmap_refs(unmap, NULL, unmap_pages, invcount); |
776 | BUG_ON(ret); | 792 | BUG_ON(ret); |
777 | put_free_pages(blkif, unmap_pages, invcount); | 793 | put_free_pages(ring, unmap_pages, invcount); |
778 | } | 794 | } |
779 | pages += batch; | 795 | pages += batch; |
780 | num -= batch; | 796 | num -= batch; |
781 | } | 797 | } |
782 | } | 798 | } |
783 | 799 | ||
784 | static int xen_blkbk_map(struct xen_blkif *blkif, | 800 | static int xen_blkbk_map(struct xen_blkif_ring *ring, |
785 | struct grant_page *pages[], | 801 | struct grant_page *pages[], |
786 | int num, bool ro) | 802 | int num, bool ro) |
787 | { | 803 | { |
@@ -794,6 +810,7 @@ static int xen_blkbk_map(struct xen_blkif *blkif, | |||
794 | int ret = 0; | 810 | int ret = 0; |
795 | int last_map = 0, map_until = 0; | 811 | int last_map = 0, map_until = 0; |
796 | int use_persistent_gnts; | 812 | int use_persistent_gnts; |
813 | struct xen_blkif *blkif = ring->blkif; | ||
797 | 814 | ||
798 | use_persistent_gnts = (blkif->vbd.feature_gnt_persistent); | 815 | use_persistent_gnts = (blkif->vbd.feature_gnt_persistent); |
799 | 816 | ||
@@ -806,10 +823,11 @@ again: | |||
806 | for (i = map_until; i < num; i++) { | 823 | for (i = map_until; i < num; i++) { |
807 | uint32_t flags; | 824 | uint32_t flags; |
808 | 825 | ||
809 | if (use_persistent_gnts) | 826 | if (use_persistent_gnts) { |
810 | persistent_gnt = get_persistent_gnt( | 827 | persistent_gnt = get_persistent_gnt( |
811 | blkif, | 828 | ring, |
812 | pages[i]->gref); | 829 | pages[i]->gref); |
830 | } | ||
813 | 831 | ||
814 | if (persistent_gnt) { | 832 | if (persistent_gnt) { |
815 | /* | 833 | /* |
@@ -819,7 +837,7 @@ again: | |||
819 | pages[i]->page = persistent_gnt->page; | 837 | pages[i]->page = persistent_gnt->page; |
820 | pages[i]->persistent_gnt = persistent_gnt; | 838 | pages[i]->persistent_gnt = persistent_gnt; |
821 | } else { | 839 | } else { |
822 | if (get_free_page(blkif, &pages[i]->page)) | 840 | if (get_free_page(ring, &pages[i]->page)) |
823 | goto out_of_memory; | 841 | goto out_of_memory; |
824 | addr = vaddr(pages[i]->page); | 842 | addr = vaddr(pages[i]->page); |
825 | pages_to_gnt[segs_to_map] = pages[i]->page; | 843 | pages_to_gnt[segs_to_map] = pages[i]->page; |
@@ -852,7 +870,7 @@ again: | |||
852 | BUG_ON(new_map_idx >= segs_to_map); | 870 | BUG_ON(new_map_idx >= segs_to_map); |
853 | if (unlikely(map[new_map_idx].status != 0)) { | 871 | if (unlikely(map[new_map_idx].status != 0)) { |
854 | pr_debug("invalid buffer -- could not remap it\n"); | 872 | pr_debug("invalid buffer -- could not remap it\n"); |
855 | put_free_pages(blkif, &pages[seg_idx]->page, 1); | 873 | put_free_pages(ring, &pages[seg_idx]->page, 1); |
856 | pages[seg_idx]->handle = BLKBACK_INVALID_HANDLE; | 874 | pages[seg_idx]->handle = BLKBACK_INVALID_HANDLE; |
857 | ret |= 1; | 875 | ret |= 1; |
858 | goto next; | 876 | goto next; |
@@ -862,7 +880,7 @@ again: | |||
862 | continue; | 880 | continue; |
863 | } | 881 | } |
864 | if (use_persistent_gnts && | 882 | if (use_persistent_gnts && |
865 | blkif->persistent_gnt_c < xen_blkif_max_pgrants) { | 883 | ring->persistent_gnt_c < xen_blkif_max_pgrants) { |
866 | /* | 884 | /* |
867 | * We are using persistent grants, the grant is | 885 | * We are using persistent grants, the grant is |
868 | * not mapped but we might have room for it. | 886 | * not mapped but we might have room for it. |
@@ -880,7 +898,7 @@ again: | |||
880 | persistent_gnt->gnt = map[new_map_idx].ref; | 898 | persistent_gnt->gnt = map[new_map_idx].ref; |
881 | persistent_gnt->handle = map[new_map_idx].handle; | 899 | persistent_gnt->handle = map[new_map_idx].handle; |
882 | persistent_gnt->page = pages[seg_idx]->page; | 900 | persistent_gnt->page = pages[seg_idx]->page; |
883 | if (add_persistent_gnt(blkif, | 901 | if (add_persistent_gnt(ring, |
884 | persistent_gnt)) { | 902 | persistent_gnt)) { |
885 | kfree(persistent_gnt); | 903 | kfree(persistent_gnt); |
886 | persistent_gnt = NULL; | 904 | persistent_gnt = NULL; |
@@ -888,7 +906,7 @@ again: | |||
888 | } | 906 | } |
889 | pages[seg_idx]->persistent_gnt = persistent_gnt; | 907 | pages[seg_idx]->persistent_gnt = persistent_gnt; |
890 | pr_debug("grant %u added to the tree of persistent grants, using %u/%u\n", | 908 | pr_debug("grant %u added to the tree of persistent grants, using %u/%u\n", |
891 | persistent_gnt->gnt, blkif->persistent_gnt_c, | 909 | persistent_gnt->gnt, ring->persistent_gnt_c, |
892 | xen_blkif_max_pgrants); | 910 | xen_blkif_max_pgrants); |
893 | goto next; | 911 | goto next; |
894 | } | 912 | } |
@@ -913,7 +931,7 @@ next: | |||
913 | 931 | ||
914 | out_of_memory: | 932 | out_of_memory: |
915 | pr_alert("%s: out of memory\n", __func__); | 933 | pr_alert("%s: out of memory\n", __func__); |
916 | put_free_pages(blkif, pages_to_gnt, segs_to_map); | 934 | put_free_pages(ring, pages_to_gnt, segs_to_map); |
917 | return -ENOMEM; | 935 | return -ENOMEM; |
918 | } | 936 | } |
919 | 937 | ||
@@ -921,7 +939,7 @@ static int xen_blkbk_map_seg(struct pending_req *pending_req) | |||
921 | { | 939 | { |
922 | int rc; | 940 | int rc; |
923 | 941 | ||
924 | rc = xen_blkbk_map(pending_req->blkif, pending_req->segments, | 942 | rc = xen_blkbk_map(pending_req->ring, pending_req->segments, |
925 | pending_req->nr_segs, | 943 | pending_req->nr_segs, |
926 | (pending_req->operation != BLKIF_OP_READ)); | 944 | (pending_req->operation != BLKIF_OP_READ)); |
927 | 945 | ||
@@ -934,7 +952,7 @@ static int xen_blkbk_parse_indirect(struct blkif_request *req, | |||
934 | struct phys_req *preq) | 952 | struct phys_req *preq) |
935 | { | 953 | { |
936 | struct grant_page **pages = pending_req->indirect_pages; | 954 | struct grant_page **pages = pending_req->indirect_pages; |
937 | struct xen_blkif *blkif = pending_req->blkif; | 955 | struct xen_blkif_ring *ring = pending_req->ring; |
938 | int indirect_grefs, rc, n, nseg, i; | 956 | int indirect_grefs, rc, n, nseg, i; |
939 | struct blkif_request_segment *segments = NULL; | 957 | struct blkif_request_segment *segments = NULL; |
940 | 958 | ||
@@ -945,7 +963,7 @@ static int xen_blkbk_parse_indirect(struct blkif_request *req, | |||
945 | for (i = 0; i < indirect_grefs; i++) | 963 | for (i = 0; i < indirect_grefs; i++) |
946 | pages[i]->gref = req->u.indirect.indirect_grefs[i]; | 964 | pages[i]->gref = req->u.indirect.indirect_grefs[i]; |
947 | 965 | ||
948 | rc = xen_blkbk_map(blkif, pages, indirect_grefs, true); | 966 | rc = xen_blkbk_map(ring, pages, indirect_grefs, true); |
949 | if (rc) | 967 | if (rc) |
950 | goto unmap; | 968 | goto unmap; |
951 | 969 | ||
@@ -977,15 +995,16 @@ static int xen_blkbk_parse_indirect(struct blkif_request *req, | |||
977 | unmap: | 995 | unmap: |
978 | if (segments) | 996 | if (segments) |
979 | kunmap_atomic(segments); | 997 | kunmap_atomic(segments); |
980 | xen_blkbk_unmap(blkif, pages, indirect_grefs); | 998 | xen_blkbk_unmap(ring, pages, indirect_grefs); |
981 | return rc; | 999 | return rc; |
982 | } | 1000 | } |
983 | 1001 | ||
984 | static int dispatch_discard_io(struct xen_blkif *blkif, | 1002 | static int dispatch_discard_io(struct xen_blkif_ring *ring, |
985 | struct blkif_request *req) | 1003 | struct blkif_request *req) |
986 | { | 1004 | { |
987 | int err = 0; | 1005 | int err = 0; |
988 | int status = BLKIF_RSP_OKAY; | 1006 | int status = BLKIF_RSP_OKAY; |
1007 | struct xen_blkif *blkif = ring->blkif; | ||
989 | struct block_device *bdev = blkif->vbd.bdev; | 1008 | struct block_device *bdev = blkif->vbd.bdev; |
990 | unsigned long secure; | 1009 | unsigned long secure; |
991 | struct phys_req preq; | 1010 | struct phys_req preq; |
@@ -1002,7 +1021,7 @@ static int dispatch_discard_io(struct xen_blkif *blkif, | |||
1002 | preq.sector_number + preq.nr_sects, blkif->vbd.pdevice); | 1021 | preq.sector_number + preq.nr_sects, blkif->vbd.pdevice); |
1003 | goto fail_response; | 1022 | goto fail_response; |
1004 | } | 1023 | } |
1005 | blkif->st_ds_req++; | 1024 | ring->st_ds_req++; |
1006 | 1025 | ||
1007 | secure = (blkif->vbd.discard_secure && | 1026 | secure = (blkif->vbd.discard_secure && |
1008 | (req->u.discard.flag & BLKIF_DISCARD_SECURE)) ? | 1027 | (req->u.discard.flag & BLKIF_DISCARD_SECURE)) ? |
@@ -1018,26 +1037,28 @@ fail_response: | |||
1018 | } else if (err) | 1037 | } else if (err) |
1019 | status = BLKIF_RSP_ERROR; | 1038 | status = BLKIF_RSP_ERROR; |
1020 | 1039 | ||
1021 | make_response(blkif, req->u.discard.id, req->operation, status); | 1040 | make_response(ring, req->u.discard.id, req->operation, status); |
1022 | xen_blkif_put(blkif); | 1041 | xen_blkif_put(blkif); |
1023 | return err; | 1042 | return err; |
1024 | } | 1043 | } |
1025 | 1044 | ||
1026 | static int dispatch_other_io(struct xen_blkif *blkif, | 1045 | static int dispatch_other_io(struct xen_blkif_ring *ring, |
1027 | struct blkif_request *req, | 1046 | struct blkif_request *req, |
1028 | struct pending_req *pending_req) | 1047 | struct pending_req *pending_req) |
1029 | { | 1048 | { |
1030 | free_req(blkif, pending_req); | 1049 | free_req(ring, pending_req); |
1031 | make_response(blkif, req->u.other.id, req->operation, | 1050 | make_response(ring, req->u.other.id, req->operation, |
1032 | BLKIF_RSP_EOPNOTSUPP); | 1051 | BLKIF_RSP_EOPNOTSUPP); |
1033 | return -EIO; | 1052 | return -EIO; |
1034 | } | 1053 | } |
1035 | 1054 | ||
1036 | static void xen_blk_drain_io(struct xen_blkif *blkif) | 1055 | static void xen_blk_drain_io(struct xen_blkif_ring *ring) |
1037 | { | 1056 | { |
1057 | struct xen_blkif *blkif = ring->blkif; | ||
1058 | |||
1038 | atomic_set(&blkif->drain, 1); | 1059 | atomic_set(&blkif->drain, 1); |
1039 | do { | 1060 | do { |
1040 | if (atomic_read(&blkif->inflight) == 0) | 1061 | if (atomic_read(&ring->inflight) == 0) |
1041 | break; | 1062 | break; |
1042 | wait_for_completion_interruptible_timeout( | 1063 | wait_for_completion_interruptible_timeout( |
1043 | &blkif->drain_complete, HZ); | 1064 | &blkif->drain_complete, HZ); |
@@ -1058,12 +1079,12 @@ static void __end_block_io_op(struct pending_req *pending_req, int error) | |||
1058 | if ((pending_req->operation == BLKIF_OP_FLUSH_DISKCACHE) && | 1079 | if ((pending_req->operation == BLKIF_OP_FLUSH_DISKCACHE) && |
1059 | (error == -EOPNOTSUPP)) { | 1080 | (error == -EOPNOTSUPP)) { |
1060 | pr_debug("flush diskcache op failed, not supported\n"); | 1081 | pr_debug("flush diskcache op failed, not supported\n"); |
1061 | xen_blkbk_flush_diskcache(XBT_NIL, pending_req->blkif->be, 0); | 1082 | xen_blkbk_flush_diskcache(XBT_NIL, pending_req->ring->blkif->be, 0); |
1062 | pending_req->status = BLKIF_RSP_EOPNOTSUPP; | 1083 | pending_req->status = BLKIF_RSP_EOPNOTSUPP; |
1063 | } else if ((pending_req->operation == BLKIF_OP_WRITE_BARRIER) && | 1084 | } else if ((pending_req->operation == BLKIF_OP_WRITE_BARRIER) && |
1064 | (error == -EOPNOTSUPP)) { | 1085 | (error == -EOPNOTSUPP)) { |
1065 | pr_debug("write barrier op failed, not supported\n"); | 1086 | pr_debug("write barrier op failed, not supported\n"); |
1066 | xen_blkbk_barrier(XBT_NIL, pending_req->blkif->be, 0); | 1087 | xen_blkbk_barrier(XBT_NIL, pending_req->ring->blkif->be, 0); |
1067 | pending_req->status = BLKIF_RSP_EOPNOTSUPP; | 1088 | pending_req->status = BLKIF_RSP_EOPNOTSUPP; |
1068 | } else if (error) { | 1089 | } else if (error) { |
1069 | pr_debug("Buffer not up-to-date at end of operation," | 1090 | pr_debug("Buffer not up-to-date at end of operation," |
@@ -1097,9 +1118,9 @@ static void end_block_io_op(struct bio *bio) | |||
1097 | * and transmute it to the block API to hand it over to the proper block disk. | 1118 | * and transmute it to the block API to hand it over to the proper block disk. |
1098 | */ | 1119 | */ |
1099 | static int | 1120 | static int |
1100 | __do_block_io_op(struct xen_blkif *blkif) | 1121 | __do_block_io_op(struct xen_blkif_ring *ring) |
1101 | { | 1122 | { |
1102 | union blkif_back_rings *blk_rings = &blkif->blk_rings; | 1123 | union blkif_back_rings *blk_rings = &ring->blk_rings; |
1103 | struct blkif_request req; | 1124 | struct blkif_request req; |
1104 | struct pending_req *pending_req; | 1125 | struct pending_req *pending_req; |
1105 | RING_IDX rc, rp; | 1126 | RING_IDX rc, rp; |
@@ -1112,7 +1133,7 @@ __do_block_io_op(struct xen_blkif *blkif) | |||
1112 | if (RING_REQUEST_PROD_OVERFLOW(&blk_rings->common, rp)) { | 1133 | if (RING_REQUEST_PROD_OVERFLOW(&blk_rings->common, rp)) { |
1113 | rc = blk_rings->common.rsp_prod_pvt; | 1134 | rc = blk_rings->common.rsp_prod_pvt; |
1114 | pr_warn("Frontend provided bogus ring requests (%d - %d = %d). Halting ring processing on dev=%04x\n", | 1135 | pr_warn("Frontend provided bogus ring requests (%d - %d = %d). Halting ring processing on dev=%04x\n", |
1115 | rp, rc, rp - rc, blkif->vbd.pdevice); | 1136 | rp, rc, rp - rc, ring->blkif->vbd.pdevice); |
1116 | return -EACCES; | 1137 | return -EACCES; |
1117 | } | 1138 | } |
1118 | while (rc != rp) { | 1139 | while (rc != rp) { |
@@ -1125,14 +1146,14 @@ __do_block_io_op(struct xen_blkif *blkif) | |||
1125 | break; | 1146 | break; |
1126 | } | 1147 | } |
1127 | 1148 | ||
1128 | pending_req = alloc_req(blkif); | 1149 | pending_req = alloc_req(ring); |
1129 | if (NULL == pending_req) { | 1150 | if (NULL == pending_req) { |
1130 | blkif->st_oo_req++; | 1151 | ring->st_oo_req++; |
1131 | more_to_do = 1; | 1152 | more_to_do = 1; |
1132 | break; | 1153 | break; |
1133 | } | 1154 | } |
1134 | 1155 | ||
1135 | switch (blkif->blk_protocol) { | 1156 | switch (ring->blkif->blk_protocol) { |
1136 | case BLKIF_PROTOCOL_NATIVE: | 1157 | case BLKIF_PROTOCOL_NATIVE: |
1137 | memcpy(&req, RING_GET_REQUEST(&blk_rings->native, rc), sizeof(req)); | 1158 | memcpy(&req, RING_GET_REQUEST(&blk_rings->native, rc), sizeof(req)); |
1138 | break; | 1159 | break; |
@@ -1156,16 +1177,16 @@ __do_block_io_op(struct xen_blkif *blkif) | |||
1156 | case BLKIF_OP_WRITE_BARRIER: | 1177 | case BLKIF_OP_WRITE_BARRIER: |
1157 | case BLKIF_OP_FLUSH_DISKCACHE: | 1178 | case BLKIF_OP_FLUSH_DISKCACHE: |
1158 | case BLKIF_OP_INDIRECT: | 1179 | case BLKIF_OP_INDIRECT: |
1159 | if (dispatch_rw_block_io(blkif, &req, pending_req)) | 1180 | if (dispatch_rw_block_io(ring, &req, pending_req)) |
1160 | goto done; | 1181 | goto done; |
1161 | break; | 1182 | break; |
1162 | case BLKIF_OP_DISCARD: | 1183 | case BLKIF_OP_DISCARD: |
1163 | free_req(blkif, pending_req); | 1184 | free_req(ring, pending_req); |
1164 | if (dispatch_discard_io(blkif, &req)) | 1185 | if (dispatch_discard_io(ring, &req)) |
1165 | goto done; | 1186 | goto done; |
1166 | break; | 1187 | break; |
1167 | default: | 1188 | default: |
1168 | if (dispatch_other_io(blkif, &req, pending_req)) | 1189 | if (dispatch_other_io(ring, &req, pending_req)) |
1169 | goto done; | 1190 | goto done; |
1170 | break; | 1191 | break; |
1171 | } | 1192 | } |
@@ -1178,13 +1199,13 @@ done: | |||
1178 | } | 1199 | } |
1179 | 1200 | ||
1180 | static int | 1201 | static int |
1181 | do_block_io_op(struct xen_blkif *blkif) | 1202 | do_block_io_op(struct xen_blkif_ring *ring) |
1182 | { | 1203 | { |
1183 | union blkif_back_rings *blk_rings = &blkif->blk_rings; | 1204 | union blkif_back_rings *blk_rings = &ring->blk_rings; |
1184 | int more_to_do; | 1205 | int more_to_do; |
1185 | 1206 | ||
1186 | do { | 1207 | do { |
1187 | more_to_do = __do_block_io_op(blkif); | 1208 | more_to_do = __do_block_io_op(ring); |
1188 | if (more_to_do) | 1209 | if (more_to_do) |
1189 | break; | 1210 | break; |
1190 | 1211 | ||
@@ -1197,7 +1218,7 @@ do_block_io_op(struct xen_blkif *blkif) | |||
1197 | * Transmutation of the 'struct blkif_request' to a proper 'struct bio' | 1218 | * Transmutation of the 'struct blkif_request' to a proper 'struct bio' |
1198 | * and call the 'submit_bio' to pass it to the underlying storage. | 1219 | * and call the 'submit_bio' to pass it to the underlying storage. |
1199 | */ | 1220 | */ |
1200 | static int dispatch_rw_block_io(struct xen_blkif *blkif, | 1221 | static int dispatch_rw_block_io(struct xen_blkif_ring *ring, |
1201 | struct blkif_request *req, | 1222 | struct blkif_request *req, |
1202 | struct pending_req *pending_req) | 1223 | struct pending_req *pending_req) |
1203 | { | 1224 | { |
@@ -1225,17 +1246,17 @@ static int dispatch_rw_block_io(struct xen_blkif *blkif, | |||
1225 | 1246 | ||
1226 | switch (req_operation) { | 1247 | switch (req_operation) { |
1227 | case BLKIF_OP_READ: | 1248 | case BLKIF_OP_READ: |
1228 | blkif->st_rd_req++; | 1249 | ring->st_rd_req++; |
1229 | operation = READ; | 1250 | operation = READ; |
1230 | break; | 1251 | break; |
1231 | case BLKIF_OP_WRITE: | 1252 | case BLKIF_OP_WRITE: |
1232 | blkif->st_wr_req++; | 1253 | ring->st_wr_req++; |
1233 | operation = WRITE_ODIRECT; | 1254 | operation = WRITE_ODIRECT; |
1234 | break; | 1255 | break; |
1235 | case BLKIF_OP_WRITE_BARRIER: | 1256 | case BLKIF_OP_WRITE_BARRIER: |
1236 | drain = true; | 1257 | drain = true; |
1237 | case BLKIF_OP_FLUSH_DISKCACHE: | 1258 | case BLKIF_OP_FLUSH_DISKCACHE: |
1238 | blkif->st_f_req++; | 1259 | ring->st_f_req++; |
1239 | operation = WRITE_FLUSH; | 1260 | operation = WRITE_FLUSH; |
1240 | break; | 1261 | break; |
1241 | default: | 1262 | default: |
@@ -1260,7 +1281,7 @@ static int dispatch_rw_block_io(struct xen_blkif *blkif, | |||
1260 | 1281 | ||
1261 | preq.nr_sects = 0; | 1282 | preq.nr_sects = 0; |
1262 | 1283 | ||
1263 | pending_req->blkif = blkif; | 1284 | pending_req->ring = ring; |
1264 | pending_req->id = req->u.rw.id; | 1285 | pending_req->id = req->u.rw.id; |
1265 | pending_req->operation = req_operation; | 1286 | pending_req->operation = req_operation; |
1266 | pending_req->status = BLKIF_RSP_OKAY; | 1287 | pending_req->status = BLKIF_RSP_OKAY; |
@@ -1287,12 +1308,12 @@ static int dispatch_rw_block_io(struct xen_blkif *blkif, | |||
1287 | goto fail_response; | 1308 | goto fail_response; |
1288 | } | 1309 | } |
1289 | 1310 | ||
1290 | if (xen_vbd_translate(&preq, blkif, operation) != 0) { | 1311 | if (xen_vbd_translate(&preq, ring->blkif, operation) != 0) { |
1291 | pr_debug("access denied: %s of [%llu,%llu] on dev=%04x\n", | 1312 | pr_debug("access denied: %s of [%llu,%llu] on dev=%04x\n", |
1292 | operation == READ ? "read" : "write", | 1313 | operation == READ ? "read" : "write", |
1293 | preq.sector_number, | 1314 | preq.sector_number, |
1294 | preq.sector_number + preq.nr_sects, | 1315 | preq.sector_number + preq.nr_sects, |
1295 | blkif->vbd.pdevice); | 1316 | ring->blkif->vbd.pdevice); |
1296 | goto fail_response; | 1317 | goto fail_response; |
1297 | } | 1318 | } |
1298 | 1319 | ||
@@ -1304,7 +1325,7 @@ static int dispatch_rw_block_io(struct xen_blkif *blkif, | |||
1304 | if (((int)preq.sector_number|(int)seg[i].nsec) & | 1325 | if (((int)preq.sector_number|(int)seg[i].nsec) & |
1305 | ((bdev_logical_block_size(preq.bdev) >> 9) - 1)) { | 1326 | ((bdev_logical_block_size(preq.bdev) >> 9) - 1)) { |
1306 | pr_debug("Misaligned I/O request from domain %d\n", | 1327 | pr_debug("Misaligned I/O request from domain %d\n", |
1307 | blkif->domid); | 1328 | ring->blkif->domid); |
1308 | goto fail_response; | 1329 | goto fail_response; |
1309 | } | 1330 | } |
1310 | } | 1331 | } |
@@ -1313,7 +1334,7 @@ static int dispatch_rw_block_io(struct xen_blkif *blkif, | |||
1313 | * issue the WRITE_FLUSH. | 1334 | * issue the WRITE_FLUSH. |
1314 | */ | 1335 | */ |
1315 | if (drain) | 1336 | if (drain) |
1316 | xen_blk_drain_io(pending_req->blkif); | 1337 | xen_blk_drain_io(pending_req->ring); |
1317 | 1338 | ||
1318 | /* | 1339 | /* |
1319 | * If we have failed at this point, we need to undo the M2P override, | 1340 | * If we have failed at this point, we need to undo the M2P override, |
@@ -1328,8 +1349,8 @@ static int dispatch_rw_block_io(struct xen_blkif *blkif, | |||
1328 | * This corresponding xen_blkif_put is done in __end_block_io_op, or | 1349 | * This corresponding xen_blkif_put is done in __end_block_io_op, or |
1329 | * below (in "!bio") if we are handling a BLKIF_OP_DISCARD. | 1350 | * below (in "!bio") if we are handling a BLKIF_OP_DISCARD. |
1330 | */ | 1351 | */ |
1331 | xen_blkif_get(blkif); | 1352 | xen_blkif_get(ring->blkif); |
1332 | atomic_inc(&blkif->inflight); | 1353 | atomic_inc(&ring->inflight); |
1333 | 1354 | ||
1334 | for (i = 0; i < nseg; i++) { | 1355 | for (i = 0; i < nseg; i++) { |
1335 | while ((bio == NULL) || | 1356 | while ((bio == NULL) || |
@@ -1377,19 +1398,19 @@ static int dispatch_rw_block_io(struct xen_blkif *blkif, | |||
1377 | blk_finish_plug(&plug); | 1398 | blk_finish_plug(&plug); |
1378 | 1399 | ||
1379 | if (operation == READ) | 1400 | if (operation == READ) |
1380 | blkif->st_rd_sect += preq.nr_sects; | 1401 | ring->st_rd_sect += preq.nr_sects; |
1381 | else if (operation & WRITE) | 1402 | else if (operation & WRITE) |
1382 | blkif->st_wr_sect += preq.nr_sects; | 1403 | ring->st_wr_sect += preq.nr_sects; |
1383 | 1404 | ||
1384 | return 0; | 1405 | return 0; |
1385 | 1406 | ||
1386 | fail_flush: | 1407 | fail_flush: |
1387 | xen_blkbk_unmap(blkif, pending_req->segments, | 1408 | xen_blkbk_unmap(ring, pending_req->segments, |
1388 | pending_req->nr_segs); | 1409 | pending_req->nr_segs); |
1389 | fail_response: | 1410 | fail_response: |
1390 | /* Haven't submitted any bio's yet. */ | 1411 | /* Haven't submitted any bio's yet. */ |
1391 | make_response(blkif, req->u.rw.id, req_operation, BLKIF_RSP_ERROR); | 1412 | make_response(ring, req->u.rw.id, req_operation, BLKIF_RSP_ERROR); |
1392 | free_req(blkif, pending_req); | 1413 | free_req(ring, pending_req); |
1393 | msleep(1); /* back off a bit */ | 1414 | msleep(1); /* back off a bit */ |
1394 | return -EIO; | 1415 | return -EIO; |
1395 | 1416 | ||
@@ -1407,21 +1428,22 @@ static int dispatch_rw_block_io(struct xen_blkif *blkif, | |||
1407 | /* | 1428 | /* |
1408 | * Put a response on the ring on how the operation fared. | 1429 | * Put a response on the ring on how the operation fared. |
1409 | */ | 1430 | */ |
1410 | static void make_response(struct xen_blkif *blkif, u64 id, | 1431 | static void make_response(struct xen_blkif_ring *ring, u64 id, |
1411 | unsigned short op, int st) | 1432 | unsigned short op, int st) |
1412 | { | 1433 | { |
1413 | struct blkif_response resp; | 1434 | struct blkif_response resp; |
1414 | unsigned long flags; | 1435 | unsigned long flags; |
1415 | union blkif_back_rings *blk_rings = &blkif->blk_rings; | 1436 | union blkif_back_rings *blk_rings; |
1416 | int notify; | 1437 | int notify; |
1417 | 1438 | ||
1418 | resp.id = id; | 1439 | resp.id = id; |
1419 | resp.operation = op; | 1440 | resp.operation = op; |
1420 | resp.status = st; | 1441 | resp.status = st; |
1421 | 1442 | ||
1422 | spin_lock_irqsave(&blkif->blk_ring_lock, flags); | 1443 | spin_lock_irqsave(&ring->blk_ring_lock, flags); |
1444 | blk_rings = &ring->blk_rings; | ||
1423 | /* Place on the response ring for the relevant domain. */ | 1445 | /* Place on the response ring for the relevant domain. */ |
1424 | switch (blkif->blk_protocol) { | 1446 | switch (ring->blkif->blk_protocol) { |
1425 | case BLKIF_PROTOCOL_NATIVE: | 1447 | case BLKIF_PROTOCOL_NATIVE: |
1426 | memcpy(RING_GET_RESPONSE(&blk_rings->native, blk_rings->native.rsp_prod_pvt), | 1448 | memcpy(RING_GET_RESPONSE(&blk_rings->native, blk_rings->native.rsp_prod_pvt), |
1427 | &resp, sizeof(resp)); | 1449 | &resp, sizeof(resp)); |
@@ -1439,9 +1461,9 @@ static void make_response(struct xen_blkif *blkif, u64 id, | |||
1439 | } | 1461 | } |
1440 | blk_rings->common.rsp_prod_pvt++; | 1462 | blk_rings->common.rsp_prod_pvt++; |
1441 | RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(&blk_rings->common, notify); | 1463 | RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(&blk_rings->common, notify); |
1442 | spin_unlock_irqrestore(&blkif->blk_ring_lock, flags); | 1464 | spin_unlock_irqrestore(&ring->blk_ring_lock, flags); |
1443 | if (notify) | 1465 | if (notify) |
1444 | notify_remote_via_irq(blkif->irq); | 1466 | notify_remote_via_irq(ring->irq); |
1445 | } | 1467 | } |
1446 | 1468 | ||
1447 | static int __init xen_blkif_init(void) | 1469 | static int __init xen_blkif_init(void) |
@@ -1457,6 +1479,9 @@ static int __init xen_blkif_init(void) | |||
1457 | xen_blkif_max_ring_order = XENBUS_MAX_RING_GRANT_ORDER; | 1479 | xen_blkif_max_ring_order = XENBUS_MAX_RING_GRANT_ORDER; |
1458 | } | 1480 | } |
1459 | 1481 | ||
1482 | if (xenblk_max_queues == 0) | ||
1483 | xenblk_max_queues = num_online_cpus(); | ||
1484 | |||
1460 | rc = xen_blkif_interface_init(); | 1485 | rc = xen_blkif_interface_init(); |
1461 | if (rc) | 1486 | if (rc) |
1462 | goto failed_init; | 1487 | goto failed_init; |
diff --git a/drivers/block/xen-blkback/common.h b/drivers/block/xen-blkback/common.h index c929ae22764c..dea61f6ab8cb 100644 --- a/drivers/block/xen-blkback/common.h +++ b/drivers/block/xen-blkback/common.h | |||
@@ -46,6 +46,7 @@ | |||
46 | #include <xen/interface/io/protocols.h> | 46 | #include <xen/interface/io/protocols.h> |
47 | 47 | ||
48 | extern unsigned int xen_blkif_max_ring_order; | 48 | extern unsigned int xen_blkif_max_ring_order; |
49 | extern unsigned int xenblk_max_queues; | ||
49 | /* | 50 | /* |
50 | * This is the maximum number of segments that would be allowed in indirect | 51 | * This is the maximum number of segments that would be allowed in indirect |
51 | * requests. This value will also be passed to the frontend. | 52 | * requests. This value will also be passed to the frontend. |
@@ -269,68 +270,79 @@ struct persistent_gnt { | |||
269 | struct list_head remove_node; | 270 | struct list_head remove_node; |
270 | }; | 271 | }; |
271 | 272 | ||
272 | struct xen_blkif { | 273 | /* Per-ring information. */ |
273 | /* Unique identifier for this interface. */ | 274 | struct xen_blkif_ring { |
274 | domid_t domid; | ||
275 | unsigned int handle; | ||
276 | /* Physical parameters of the comms window. */ | 275 | /* Physical parameters of the comms window. */ |
277 | unsigned int irq; | 276 | unsigned int irq; |
278 | /* Comms information. */ | ||
279 | enum blkif_protocol blk_protocol; | ||
280 | union blkif_back_rings blk_rings; | 277 | union blkif_back_rings blk_rings; |
281 | void *blk_ring; | 278 | void *blk_ring; |
282 | /* The VBD attached to this interface. */ | ||
283 | struct xen_vbd vbd; | ||
284 | /* Back pointer to the backend_info. */ | ||
285 | struct backend_info *be; | ||
286 | /* Private fields. */ | 279 | /* Private fields. */ |
287 | spinlock_t blk_ring_lock; | 280 | spinlock_t blk_ring_lock; |
288 | atomic_t refcnt; | ||
289 | 281 | ||
290 | wait_queue_head_t wq; | 282 | wait_queue_head_t wq; |
291 | /* for barrier (drain) requests */ | ||
292 | struct completion drain_complete; | ||
293 | atomic_t drain; | ||
294 | atomic_t inflight; | 283 | atomic_t inflight; |
295 | /* One thread per one blkif. */ | 284 | /* One thread per blkif ring. */ |
296 | struct task_struct *xenblkd; | 285 | struct task_struct *xenblkd; |
297 | unsigned int waiting_reqs; | 286 | unsigned int waiting_reqs; |
298 | 287 | ||
299 | /* tree to store persistent grants */ | 288 | /* List of all 'pending_req' available */ |
289 | struct list_head pending_free; | ||
290 | /* And its spinlock. */ | ||
291 | spinlock_t pending_free_lock; | ||
292 | wait_queue_head_t pending_free_wq; | ||
293 | |||
294 | /* Tree to store persistent grants. */ | ||
295 | spinlock_t pers_gnts_lock; | ||
300 | struct rb_root persistent_gnts; | 296 | struct rb_root persistent_gnts; |
301 | unsigned int persistent_gnt_c; | 297 | unsigned int persistent_gnt_c; |
302 | atomic_t persistent_gnt_in_use; | 298 | atomic_t persistent_gnt_in_use; |
303 | unsigned long next_lru; | 299 | unsigned long next_lru; |
304 | 300 | ||
305 | /* used by the kworker that offload work from the persistent purge */ | 301 | /* Statistics. */ |
302 | unsigned long st_print; | ||
303 | unsigned long long st_rd_req; | ||
304 | unsigned long long st_wr_req; | ||
305 | unsigned long long st_oo_req; | ||
306 | unsigned long long st_f_req; | ||
307 | unsigned long long st_ds_req; | ||
308 | unsigned long long st_rd_sect; | ||
309 | unsigned long long st_wr_sect; | ||
310 | |||
311 | /* Used by the kworker that offload work from the persistent purge. */ | ||
306 | struct list_head persistent_purge_list; | 312 | struct list_head persistent_purge_list; |
307 | struct work_struct persistent_purge_work; | 313 | struct work_struct persistent_purge_work; |
308 | 314 | ||
309 | /* buffer of free pages to map grant refs */ | 315 | /* Buffer of free pages to map grant refs. */ |
310 | spinlock_t free_pages_lock; | 316 | spinlock_t free_pages_lock; |
311 | int free_pages_num; | 317 | int free_pages_num; |
312 | struct list_head free_pages; | 318 | struct list_head free_pages; |
313 | 319 | ||
314 | /* List of all 'pending_req' available */ | ||
315 | struct list_head pending_free; | ||
316 | /* And its spinlock. */ | ||
317 | spinlock_t pending_free_lock; | ||
318 | wait_queue_head_t pending_free_wq; | ||
319 | |||
320 | /* statistics */ | ||
321 | unsigned long st_print; | ||
322 | unsigned long long st_rd_req; | ||
323 | unsigned long long st_wr_req; | ||
324 | unsigned long long st_oo_req; | ||
325 | unsigned long long st_f_req; | ||
326 | unsigned long long st_ds_req; | ||
327 | unsigned long long st_rd_sect; | ||
328 | unsigned long long st_wr_sect; | ||
329 | |||
330 | struct work_struct free_work; | 320 | struct work_struct free_work; |
331 | /* Thread shutdown wait queue. */ | 321 | /* Thread shutdown wait queue. */ |
332 | wait_queue_head_t shutdown_wq; | 322 | wait_queue_head_t shutdown_wq; |
333 | unsigned int nr_ring_pages; | 323 | struct xen_blkif *blkif; |
324 | }; | ||
325 | |||
326 | struct xen_blkif { | ||
327 | /* Unique identifier for this interface. */ | ||
328 | domid_t domid; | ||
329 | unsigned int handle; | ||
330 | /* Comms information. */ | ||
331 | enum blkif_protocol blk_protocol; | ||
332 | /* The VBD attached to this interface. */ | ||
333 | struct xen_vbd vbd; | ||
334 | /* Back pointer to the backend_info. */ | ||
335 | struct backend_info *be; | ||
336 | atomic_t refcnt; | ||
337 | /* for barrier (drain) requests */ | ||
338 | struct completion drain_complete; | ||
339 | atomic_t drain; | ||
340 | |||
341 | struct work_struct free_work; | ||
342 | unsigned int nr_ring_pages; | ||
343 | /* All rings for this device. */ | ||
344 | struct xen_blkif_ring *rings; | ||
345 | unsigned int nr_rings; | ||
334 | }; | 346 | }; |
335 | 347 | ||
336 | struct seg_buf { | 348 | struct seg_buf { |
@@ -352,7 +364,7 @@ struct grant_page { | |||
352 | * response queued for it, with the saved 'id' passed back. | 364 | * response queued for it, with the saved 'id' passed back. |
353 | */ | 365 | */ |
354 | struct pending_req { | 366 | struct pending_req { |
355 | struct xen_blkif *blkif; | 367 | struct xen_blkif_ring *ring; |
356 | u64 id; | 368 | u64 id; |
357 | int nr_segs; | 369 | int nr_segs; |
358 | atomic_t pendcnt; | 370 | atomic_t pendcnt; |
@@ -394,7 +406,7 @@ int xen_blkif_xenbus_init(void); | |||
394 | irqreturn_t xen_blkif_be_int(int irq, void *dev_id); | 406 | irqreturn_t xen_blkif_be_int(int irq, void *dev_id); |
395 | int xen_blkif_schedule(void *arg); | 407 | int xen_blkif_schedule(void *arg); |
396 | int xen_blkif_purge_persistent(void *arg); | 408 | int xen_blkif_purge_persistent(void *arg); |
397 | void xen_blkbk_free_caches(struct xen_blkif *blkif); | 409 | void xen_blkbk_free_caches(struct xen_blkif_ring *ring); |
398 | 410 | ||
399 | int xen_blkbk_flush_diskcache(struct xenbus_transaction xbt, | 411 | int xen_blkbk_flush_diskcache(struct xenbus_transaction xbt, |
400 | struct backend_info *be, int state); | 412 | struct backend_info *be, int state); |
diff --git a/drivers/block/xen-blkback/xenbus.c b/drivers/block/xen-blkback/xenbus.c index f53cff42f8da..876763f7f13e 100644 --- a/drivers/block/xen-blkback/xenbus.c +++ b/drivers/block/xen-blkback/xenbus.c | |||
@@ -86,9 +86,11 @@ static void xen_update_blkif_status(struct xen_blkif *blkif) | |||
86 | { | 86 | { |
87 | int err; | 87 | int err; |
88 | char name[BLKBACK_NAME_LEN]; | 88 | char name[BLKBACK_NAME_LEN]; |
89 | struct xen_blkif_ring *ring; | ||
90 | int i; | ||
89 | 91 | ||
90 | /* Not ready to connect? */ | 92 | /* Not ready to connect? */ |
91 | if (!blkif->irq || !blkif->vbd.bdev) | 93 | if (!blkif->rings || !blkif->rings[0].irq || !blkif->vbd.bdev) |
92 | return; | 94 | return; |
93 | 95 | ||
94 | /* Already connected? */ | 96 | /* Already connected? */ |
@@ -113,13 +115,55 @@ static void xen_update_blkif_status(struct xen_blkif *blkif) | |||
113 | } | 115 | } |
114 | invalidate_inode_pages2(blkif->vbd.bdev->bd_inode->i_mapping); | 116 | invalidate_inode_pages2(blkif->vbd.bdev->bd_inode->i_mapping); |
115 | 117 | ||
116 | blkif->xenblkd = kthread_run(xen_blkif_schedule, blkif, "%s", name); | 118 | for (i = 0; i < blkif->nr_rings; i++) { |
117 | if (IS_ERR(blkif->xenblkd)) { | 119 | ring = &blkif->rings[i]; |
118 | err = PTR_ERR(blkif->xenblkd); | 120 | ring->xenblkd = kthread_run(xen_blkif_schedule, ring, "%s-%d", name, i); |
119 | blkif->xenblkd = NULL; | 121 | if (IS_ERR(ring->xenblkd)) { |
120 | xenbus_dev_error(blkif->be->dev, err, "start xenblkd"); | 122 | err = PTR_ERR(ring->xenblkd); |
121 | return; | 123 | ring->xenblkd = NULL; |
124 | xenbus_dev_fatal(blkif->be->dev, err, | ||
125 | "start %s-%d xenblkd", name, i); | ||
126 | goto out; | ||
127 | } | ||
128 | } | ||
129 | return; | ||
130 | |||
131 | out: | ||
132 | while (--i >= 0) { | ||
133 | ring = &blkif->rings[i]; | ||
134 | kthread_stop(ring->xenblkd); | ||
135 | } | ||
136 | return; | ||
137 | } | ||
138 | |||
139 | static int xen_blkif_alloc_rings(struct xen_blkif *blkif) | ||
140 | { | ||
141 | unsigned int r; | ||
142 | |||
143 | blkif->rings = kzalloc(blkif->nr_rings * sizeof(struct xen_blkif_ring), GFP_KERNEL); | ||
144 | if (!blkif->rings) | ||
145 | return -ENOMEM; | ||
146 | |||
147 | for (r = 0; r < blkif->nr_rings; r++) { | ||
148 | struct xen_blkif_ring *ring = &blkif->rings[r]; | ||
149 | |||
150 | spin_lock_init(&ring->blk_ring_lock); | ||
151 | init_waitqueue_head(&ring->wq); | ||
152 | INIT_LIST_HEAD(&ring->pending_free); | ||
153 | INIT_LIST_HEAD(&ring->persistent_purge_list); | ||
154 | INIT_WORK(&ring->persistent_purge_work, xen_blkbk_unmap_purged_grants); | ||
155 | spin_lock_init(&ring->free_pages_lock); | ||
156 | INIT_LIST_HEAD(&ring->free_pages); | ||
157 | |||
158 | spin_lock_init(&ring->pending_free_lock); | ||
159 | init_waitqueue_head(&ring->pending_free_wq); | ||
160 | init_waitqueue_head(&ring->shutdown_wq); | ||
161 | ring->blkif = blkif; | ||
162 | ring->st_print = jiffies; | ||
163 | xen_blkif_get(blkif); | ||
122 | } | 164 | } |
165 | |||
166 | return 0; | ||
123 | } | 167 | } |
124 | 168 | ||
125 | static struct xen_blkif *xen_blkif_alloc(domid_t domid) | 169 | static struct xen_blkif *xen_blkif_alloc(domid_t domid) |
@@ -133,41 +177,25 @@ static struct xen_blkif *xen_blkif_alloc(domid_t domid) | |||
133 | return ERR_PTR(-ENOMEM); | 177 | return ERR_PTR(-ENOMEM); |
134 | 178 | ||
135 | blkif->domid = domid; | 179 | blkif->domid = domid; |
136 | spin_lock_init(&blkif->blk_ring_lock); | ||
137 | atomic_set(&blkif->refcnt, 1); | 180 | atomic_set(&blkif->refcnt, 1); |
138 | init_waitqueue_head(&blkif->wq); | ||
139 | init_completion(&blkif->drain_complete); | 181 | init_completion(&blkif->drain_complete); |
140 | atomic_set(&blkif->drain, 0); | ||
141 | blkif->st_print = jiffies; | ||
142 | blkif->persistent_gnts.rb_node = NULL; | ||
143 | spin_lock_init(&blkif->free_pages_lock); | ||
144 | INIT_LIST_HEAD(&blkif->free_pages); | ||
145 | INIT_LIST_HEAD(&blkif->persistent_purge_list); | ||
146 | blkif->free_pages_num = 0; | ||
147 | atomic_set(&blkif->persistent_gnt_in_use, 0); | ||
148 | atomic_set(&blkif->inflight, 0); | ||
149 | INIT_WORK(&blkif->persistent_purge_work, xen_blkbk_unmap_purged_grants); | ||
150 | |||
151 | INIT_LIST_HEAD(&blkif->pending_free); | ||
152 | INIT_WORK(&blkif->free_work, xen_blkif_deferred_free); | 182 | INIT_WORK(&blkif->free_work, xen_blkif_deferred_free); |
153 | spin_lock_init(&blkif->pending_free_lock); | ||
154 | init_waitqueue_head(&blkif->pending_free_wq); | ||
155 | init_waitqueue_head(&blkif->shutdown_wq); | ||
156 | 183 | ||
157 | return blkif; | 184 | return blkif; |
158 | } | 185 | } |
159 | 186 | ||
160 | static int xen_blkif_map(struct xen_blkif *blkif, grant_ref_t *gref, | 187 | static int xen_blkif_map(struct xen_blkif_ring *ring, grant_ref_t *gref, |
161 | unsigned int nr_grefs, unsigned int evtchn) | 188 | unsigned int nr_grefs, unsigned int evtchn) |
162 | { | 189 | { |
163 | int err; | 190 | int err; |
191 | struct xen_blkif *blkif = ring->blkif; | ||
164 | 192 | ||
165 | /* Already connected through? */ | 193 | /* Already connected through? */ |
166 | if (blkif->irq) | 194 | if (ring->irq) |
167 | return 0; | 195 | return 0; |
168 | 196 | ||
169 | err = xenbus_map_ring_valloc(blkif->be->dev, gref, nr_grefs, | 197 | err = xenbus_map_ring_valloc(blkif->be->dev, gref, nr_grefs, |
170 | &blkif->blk_ring); | 198 | &ring->blk_ring); |
171 | if (err < 0) | 199 | if (err < 0) |
172 | return err; | 200 | return err; |
173 | 201 | ||
@@ -175,24 +203,24 @@ static int xen_blkif_map(struct xen_blkif *blkif, grant_ref_t *gref, | |||
175 | case BLKIF_PROTOCOL_NATIVE: | 203 | case BLKIF_PROTOCOL_NATIVE: |
176 | { | 204 | { |
177 | struct blkif_sring *sring; | 205 | struct blkif_sring *sring; |
178 | sring = (struct blkif_sring *)blkif->blk_ring; | 206 | sring = (struct blkif_sring *)ring->blk_ring; |
179 | BACK_RING_INIT(&blkif->blk_rings.native, sring, | 207 | BACK_RING_INIT(&ring->blk_rings.native, sring, |
180 | XEN_PAGE_SIZE * nr_grefs); | 208 | XEN_PAGE_SIZE * nr_grefs); |
181 | break; | 209 | break; |
182 | } | 210 | } |
183 | case BLKIF_PROTOCOL_X86_32: | 211 | case BLKIF_PROTOCOL_X86_32: |
184 | { | 212 | { |
185 | struct blkif_x86_32_sring *sring_x86_32; | 213 | struct blkif_x86_32_sring *sring_x86_32; |
186 | sring_x86_32 = (struct blkif_x86_32_sring *)blkif->blk_ring; | 214 | sring_x86_32 = (struct blkif_x86_32_sring *)ring->blk_ring; |
187 | BACK_RING_INIT(&blkif->blk_rings.x86_32, sring_x86_32, | 215 | BACK_RING_INIT(&ring->blk_rings.x86_32, sring_x86_32, |
188 | XEN_PAGE_SIZE * nr_grefs); | 216 | XEN_PAGE_SIZE * nr_grefs); |
189 | break; | 217 | break; |
190 | } | 218 | } |
191 | case BLKIF_PROTOCOL_X86_64: | 219 | case BLKIF_PROTOCOL_X86_64: |
192 | { | 220 | { |
193 | struct blkif_x86_64_sring *sring_x86_64; | 221 | struct blkif_x86_64_sring *sring_x86_64; |
194 | sring_x86_64 = (struct blkif_x86_64_sring *)blkif->blk_ring; | 222 | sring_x86_64 = (struct blkif_x86_64_sring *)ring->blk_ring; |
195 | BACK_RING_INIT(&blkif->blk_rings.x86_64, sring_x86_64, | 223 | BACK_RING_INIT(&ring->blk_rings.x86_64, sring_x86_64, |
196 | XEN_PAGE_SIZE * nr_grefs); | 224 | XEN_PAGE_SIZE * nr_grefs); |
197 | break; | 225 | break; |
198 | } | 226 | } |
@@ -202,13 +230,13 @@ static int xen_blkif_map(struct xen_blkif *blkif, grant_ref_t *gref, | |||
202 | 230 | ||
203 | err = bind_interdomain_evtchn_to_irqhandler(blkif->domid, evtchn, | 231 | err = bind_interdomain_evtchn_to_irqhandler(blkif->domid, evtchn, |
204 | xen_blkif_be_int, 0, | 232 | xen_blkif_be_int, 0, |
205 | "blkif-backend", blkif); | 233 | "blkif-backend", ring); |
206 | if (err < 0) { | 234 | if (err < 0) { |
207 | xenbus_unmap_ring_vfree(blkif->be->dev, blkif->blk_ring); | 235 | xenbus_unmap_ring_vfree(blkif->be->dev, ring->blk_ring); |
208 | blkif->blk_rings.common.sring = NULL; | 236 | ring->blk_rings.common.sring = NULL; |
209 | return err; | 237 | return err; |
210 | } | 238 | } |
211 | blkif->irq = err; | 239 | ring->irq = err; |
212 | 240 | ||
213 | return 0; | 241 | return 0; |
214 | } | 242 | } |
@@ -216,50 +244,69 @@ static int xen_blkif_map(struct xen_blkif *blkif, grant_ref_t *gref, | |||
216 | static int xen_blkif_disconnect(struct xen_blkif *blkif) | 244 | static int xen_blkif_disconnect(struct xen_blkif *blkif) |
217 | { | 245 | { |
218 | struct pending_req *req, *n; | 246 | struct pending_req *req, *n; |
219 | int i = 0, j; | 247 | unsigned int j, r; |
220 | 248 | ||
221 | if (blkif->xenblkd) { | 249 | for (r = 0; r < blkif->nr_rings; r++) { |
222 | kthread_stop(blkif->xenblkd); | 250 | struct xen_blkif_ring *ring = &blkif->rings[r]; |
223 | wake_up(&blkif->shutdown_wq); | 251 | unsigned int i = 0; |
224 | blkif->xenblkd = NULL; | ||
225 | } | ||
226 | 252 | ||
227 | /* The above kthread_stop() guarantees that at this point we | 253 | if (ring->xenblkd) { |
228 | * don't have any discard_io or other_io requests. So, checking | 254 | kthread_stop(ring->xenblkd); |
229 | * for inflight IO is enough. | 255 | wake_up(&ring->shutdown_wq); |
230 | */ | 256 | ring->xenblkd = NULL; |
231 | if (atomic_read(&blkif->inflight) > 0) | 257 | } |
232 | return -EBUSY; | ||
233 | 258 | ||
234 | if (blkif->irq) { | 259 | /* The above kthread_stop() guarantees that at this point we |
235 | unbind_from_irqhandler(blkif->irq, blkif); | 260 | * don't have any discard_io or other_io requests. So, checking |
236 | blkif->irq = 0; | 261 | * for inflight IO is enough. |
237 | } | 262 | */ |
263 | if (atomic_read(&ring->inflight) > 0) | ||
264 | return -EBUSY; | ||
238 | 265 | ||
239 | if (blkif->blk_rings.common.sring) { | 266 | if (ring->irq) { |
240 | xenbus_unmap_ring_vfree(blkif->be->dev, blkif->blk_ring); | 267 | unbind_from_irqhandler(ring->irq, ring); |
241 | blkif->blk_rings.common.sring = NULL; | 268 | ring->irq = 0; |
242 | } | 269 | } |
243 | 270 | ||
244 | /* Remove all persistent grants and the cache of ballooned pages. */ | 271 | if (ring->blk_rings.common.sring) { |
245 | xen_blkbk_free_caches(blkif); | 272 | xenbus_unmap_ring_vfree(blkif->be->dev, ring->blk_ring); |
273 | ring->blk_rings.common.sring = NULL; | ||
274 | } | ||
246 | 275 | ||
247 | /* Check that there is no request in use */ | 276 | /* Remove all persistent grants and the cache of ballooned pages. */ |
248 | list_for_each_entry_safe(req, n, &blkif->pending_free, free_list) { | 277 | xen_blkbk_free_caches(ring); |
249 | list_del(&req->free_list); | ||
250 | 278 | ||
251 | for (j = 0; j < MAX_INDIRECT_SEGMENTS; j++) | 279 | /* Check that there is no request in use */ |
252 | kfree(req->segments[j]); | 280 | list_for_each_entry_safe(req, n, &ring->pending_free, free_list) { |
281 | list_del(&req->free_list); | ||
253 | 282 | ||
254 | for (j = 0; j < MAX_INDIRECT_PAGES; j++) | 283 | for (j = 0; j < MAX_INDIRECT_SEGMENTS; j++) |
255 | kfree(req->indirect_pages[j]); | 284 | kfree(req->segments[j]); |
256 | 285 | ||
257 | kfree(req); | 286 | for (j = 0; j < MAX_INDIRECT_PAGES; j++) |
258 | i++; | 287 | kfree(req->indirect_pages[j]); |
259 | } | 288 | |
289 | kfree(req); | ||
290 | i++; | ||
291 | } | ||
260 | 292 | ||
261 | WARN_ON(i != (XEN_BLKIF_REQS_PER_PAGE * blkif->nr_ring_pages)); | 293 | BUG_ON(atomic_read(&ring->persistent_gnt_in_use) != 0); |
294 | BUG_ON(!list_empty(&ring->persistent_purge_list)); | ||
295 | BUG_ON(!RB_EMPTY_ROOT(&ring->persistent_gnts)); | ||
296 | BUG_ON(!list_empty(&ring->free_pages)); | ||
297 | BUG_ON(ring->free_pages_num != 0); | ||
298 | BUG_ON(ring->persistent_gnt_c != 0); | ||
299 | WARN_ON(i != (XEN_BLKIF_REQS_PER_PAGE * blkif->nr_ring_pages)); | ||
300 | xen_blkif_put(blkif); | ||
301 | } | ||
262 | blkif->nr_ring_pages = 0; | 302 | blkif->nr_ring_pages = 0; |
303 | /* | ||
304 | * blkif->rings was allocated in connect_ring, so we should free it in | ||
305 | * here. | ||
306 | */ | ||
307 | kfree(blkif->rings); | ||
308 | blkif->rings = NULL; | ||
309 | blkif->nr_rings = 0; | ||
263 | 310 | ||
264 | return 0; | 311 | return 0; |
265 | } | 312 | } |
@@ -271,13 +318,6 @@ static void xen_blkif_free(struct xen_blkif *blkif) | |||
271 | xen_vbd_free(&blkif->vbd); | 318 | xen_vbd_free(&blkif->vbd); |
272 | 319 | ||
273 | /* Make sure everything is drained before shutting down */ | 320 | /* Make sure everything is drained before shutting down */ |
274 | BUG_ON(blkif->persistent_gnt_c != 0); | ||
275 | BUG_ON(atomic_read(&blkif->persistent_gnt_in_use) != 0); | ||
276 | BUG_ON(blkif->free_pages_num != 0); | ||
277 | BUG_ON(!list_empty(&blkif->persistent_purge_list)); | ||
278 | BUG_ON(!list_empty(&blkif->free_pages)); | ||
279 | BUG_ON(!RB_EMPTY_ROOT(&blkif->persistent_gnts)); | ||
280 | |||
281 | kmem_cache_free(xen_blkif_cachep, blkif); | 321 | kmem_cache_free(xen_blkif_cachep, blkif); |
282 | } | 322 | } |
283 | 323 | ||
@@ -296,25 +336,38 @@ int __init xen_blkif_interface_init(void) | |||
296 | * sysfs interface for VBD I/O requests | 336 | * sysfs interface for VBD I/O requests |
297 | */ | 337 | */ |
298 | 338 | ||
299 | #define VBD_SHOW(name, format, args...) \ | 339 | #define VBD_SHOW_ALLRING(name, format) \ |
300 | static ssize_t show_##name(struct device *_dev, \ | 340 | static ssize_t show_##name(struct device *_dev, \ |
301 | struct device_attribute *attr, \ | 341 | struct device_attribute *attr, \ |
302 | char *buf) \ | 342 | char *buf) \ |
303 | { \ | 343 | { \ |
304 | struct xenbus_device *dev = to_xenbus_device(_dev); \ | 344 | struct xenbus_device *dev = to_xenbus_device(_dev); \ |
305 | struct backend_info *be = dev_get_drvdata(&dev->dev); \ | 345 | struct backend_info *be = dev_get_drvdata(&dev->dev); \ |
346 | struct xen_blkif *blkif = be->blkif; \ | ||
347 | unsigned int i; \ | ||
348 | unsigned long long result = 0; \ | ||
306 | \ | 349 | \ |
307 | return sprintf(buf, format, ##args); \ | 350 | if (!blkif->rings) \ |
351 | goto out; \ | ||
352 | \ | ||
353 | for (i = 0; i < blkif->nr_rings; i++) { \ | ||
354 | struct xen_blkif_ring *ring = &blkif->rings[i]; \ | ||
355 | \ | ||
356 | result += ring->st_##name; \ | ||
357 | } \ | ||
358 | \ | ||
359 | out: \ | ||
360 | return sprintf(buf, format, result); \ | ||
308 | } \ | 361 | } \ |
309 | static DEVICE_ATTR(name, S_IRUGO, show_##name, NULL) | 362 | static DEVICE_ATTR(name, S_IRUGO, show_##name, NULL) |
310 | 363 | ||
311 | VBD_SHOW(oo_req, "%llu\n", be->blkif->st_oo_req); | 364 | VBD_SHOW_ALLRING(oo_req, "%llu\n"); |
312 | VBD_SHOW(rd_req, "%llu\n", be->blkif->st_rd_req); | 365 | VBD_SHOW_ALLRING(rd_req, "%llu\n"); |
313 | VBD_SHOW(wr_req, "%llu\n", be->blkif->st_wr_req); | 366 | VBD_SHOW_ALLRING(wr_req, "%llu\n"); |
314 | VBD_SHOW(f_req, "%llu\n", be->blkif->st_f_req); | 367 | VBD_SHOW_ALLRING(f_req, "%llu\n"); |
315 | VBD_SHOW(ds_req, "%llu\n", be->blkif->st_ds_req); | 368 | VBD_SHOW_ALLRING(ds_req, "%llu\n"); |
316 | VBD_SHOW(rd_sect, "%llu\n", be->blkif->st_rd_sect); | 369 | VBD_SHOW_ALLRING(rd_sect, "%llu\n"); |
317 | VBD_SHOW(wr_sect, "%llu\n", be->blkif->st_wr_sect); | 370 | VBD_SHOW_ALLRING(wr_sect, "%llu\n"); |
318 | 371 | ||
319 | static struct attribute *xen_vbdstat_attrs[] = { | 372 | static struct attribute *xen_vbdstat_attrs[] = { |
320 | &dev_attr_oo_req.attr, | 373 | &dev_attr_oo_req.attr, |
@@ -332,6 +385,18 @@ static struct attribute_group xen_vbdstat_group = { | |||
332 | .attrs = xen_vbdstat_attrs, | 385 | .attrs = xen_vbdstat_attrs, |
333 | }; | 386 | }; |
334 | 387 | ||
388 | #define VBD_SHOW(name, format, args...) \ | ||
389 | static ssize_t show_##name(struct device *_dev, \ | ||
390 | struct device_attribute *attr, \ | ||
391 | char *buf) \ | ||
392 | { \ | ||
393 | struct xenbus_device *dev = to_xenbus_device(_dev); \ | ||
394 | struct backend_info *be = dev_get_drvdata(&dev->dev); \ | ||
395 | \ | ||
396 | return sprintf(buf, format, ##args); \ | ||
397 | } \ | ||
398 | static DEVICE_ATTR(name, S_IRUGO, show_##name, NULL) | ||
399 | |||
335 | VBD_SHOW(physical_device, "%x:%x\n", be->major, be->minor); | 400 | VBD_SHOW(physical_device, "%x:%x\n", be->major, be->minor); |
336 | VBD_SHOW(mode, "%s\n", be->mode); | 401 | VBD_SHOW(mode, "%s\n", be->mode); |
337 | 402 | ||
@@ -440,11 +505,11 @@ static int xen_blkbk_remove(struct xenbus_device *dev) | |||
440 | 505 | ||
441 | dev_set_drvdata(&dev->dev, NULL); | 506 | dev_set_drvdata(&dev->dev, NULL); |
442 | 507 | ||
443 | if (be->blkif) { | 508 | if (be->blkif) |
444 | xen_blkif_disconnect(be->blkif); | 509 | xen_blkif_disconnect(be->blkif); |
445 | xen_blkif_put(be->blkif); | ||
446 | } | ||
447 | 510 | ||
511 | /* Put the reference we set in xen_blkif_alloc(). */ | ||
512 | xen_blkif_put(be->blkif); | ||
448 | kfree(be->mode); | 513 | kfree(be->mode); |
449 | kfree(be); | 514 | kfree(be); |
450 | return 0; | 515 | return 0; |
@@ -553,6 +618,12 @@ static int xen_blkbk_probe(struct xenbus_device *dev, | |||
553 | goto fail; | 618 | goto fail; |
554 | } | 619 | } |
555 | 620 | ||
621 | /* Multi-queue: advertise how many queues are supported by us.*/ | ||
622 | err = xenbus_printf(XBT_NIL, dev->nodename, | ||
623 | "multi-queue-max-queues", "%u", xenblk_max_queues); | ||
624 | if (err) | ||
625 | pr_warn("Error writing multi-queue-max-queues\n"); | ||
626 | |||
556 | /* setup back pointer */ | 627 | /* setup back pointer */ |
557 | be->blkif->be = be; | 628 | be->blkif->be = be; |
558 | 629 | ||
@@ -708,8 +779,14 @@ static void frontend_changed(struct xenbus_device *dev, | |||
708 | } | 779 | } |
709 | 780 | ||
710 | err = connect_ring(be); | 781 | err = connect_ring(be); |
711 | if (err) | 782 | if (err) { |
783 | /* | ||
784 | * Clean up so that memory resources can be used by | ||
785 | * other devices. connect_ring reported already error. | ||
786 | */ | ||
787 | xen_blkif_disconnect(be->blkif); | ||
712 | break; | 788 | break; |
789 | } | ||
713 | xen_update_blkif_status(be->blkif); | 790 | xen_update_blkif_status(be->blkif); |
714 | break; | 791 | break; |
715 | 792 | ||
@@ -825,50 +902,43 @@ again: | |||
825 | xenbus_transaction_end(xbt, 1); | 902 | xenbus_transaction_end(xbt, 1); |
826 | } | 903 | } |
827 | 904 | ||
828 | 905 | /* | |
829 | static int connect_ring(struct backend_info *be) | 906 | * Each ring may have multi pages, depends on "ring-page-order". |
907 | */ | ||
908 | static int read_per_ring_refs(struct xen_blkif_ring *ring, const char *dir) | ||
830 | { | 909 | { |
831 | struct xenbus_device *dev = be->dev; | ||
832 | unsigned int ring_ref[XENBUS_MAX_RING_GRANTS]; | 910 | unsigned int ring_ref[XENBUS_MAX_RING_GRANTS]; |
833 | unsigned int evtchn, nr_grefs, ring_page_order; | ||
834 | unsigned int pers_grants; | ||
835 | char protocol[64] = ""; | ||
836 | struct pending_req *req, *n; | 911 | struct pending_req *req, *n; |
837 | int err, i, j; | 912 | int err, i, j; |
913 | struct xen_blkif *blkif = ring->blkif; | ||
914 | struct xenbus_device *dev = blkif->be->dev; | ||
915 | unsigned int ring_page_order, nr_grefs, evtchn; | ||
838 | 916 | ||
839 | pr_debug("%s %s\n", __func__, dev->otherend); | 917 | err = xenbus_scanf(XBT_NIL, dir, "event-channel", "%u", |
840 | |||
841 | err = xenbus_scanf(XBT_NIL, dev->otherend, "event-channel", "%u", | ||
842 | &evtchn); | 918 | &evtchn); |
843 | if (err != 1) { | 919 | if (err != 1) { |
844 | err = -EINVAL; | 920 | err = -EINVAL; |
845 | xenbus_dev_fatal(dev, err, "reading %s/event-channel", | 921 | xenbus_dev_fatal(dev, err, "reading %s/event-channel", dir); |
846 | dev->otherend); | ||
847 | return err; | 922 | return err; |
848 | } | 923 | } |
849 | pr_info("event-channel %u\n", evtchn); | ||
850 | 924 | ||
851 | err = xenbus_scanf(XBT_NIL, dev->otherend, "ring-page-order", "%u", | 925 | err = xenbus_scanf(XBT_NIL, dev->otherend, "ring-page-order", "%u", |
852 | &ring_page_order); | 926 | &ring_page_order); |
853 | if (err != 1) { | 927 | if (err != 1) { |
854 | err = xenbus_scanf(XBT_NIL, dev->otherend, "ring-ref", | 928 | err = xenbus_scanf(XBT_NIL, dir, "ring-ref", "%u", &ring_ref[0]); |
855 | "%u", &ring_ref[0]); | ||
856 | if (err != 1) { | 929 | if (err != 1) { |
857 | err = -EINVAL; | 930 | err = -EINVAL; |
858 | xenbus_dev_fatal(dev, err, "reading %s/ring-ref", | 931 | xenbus_dev_fatal(dev, err, "reading %s/ring-ref", dir); |
859 | dev->otherend); | ||
860 | return err; | 932 | return err; |
861 | } | 933 | } |
862 | nr_grefs = 1; | 934 | nr_grefs = 1; |
863 | pr_info("%s:using single page: ring-ref %d\n", dev->otherend, | ||
864 | ring_ref[0]); | ||
865 | } else { | 935 | } else { |
866 | unsigned int i; | 936 | unsigned int i; |
867 | 937 | ||
868 | if (ring_page_order > xen_blkif_max_ring_order) { | 938 | if (ring_page_order > xen_blkif_max_ring_order) { |
869 | err = -EINVAL; | 939 | err = -EINVAL; |
870 | xenbus_dev_fatal(dev, err, "%s/request %d ring page order exceed max:%d", | 940 | xenbus_dev_fatal(dev, err, "%s/request %d ring page order exceed max:%d", |
871 | dev->otherend, ring_page_order, | 941 | dir, ring_page_order, |
872 | xen_blkif_max_ring_order); | 942 | xen_blkif_max_ring_order); |
873 | return err; | 943 | return err; |
874 | } | 944 | } |
@@ -878,52 +948,23 @@ static int connect_ring(struct backend_info *be) | |||
878 | char ring_ref_name[RINGREF_NAME_LEN]; | 948 | char ring_ref_name[RINGREF_NAME_LEN]; |
879 | 949 | ||
880 | snprintf(ring_ref_name, RINGREF_NAME_LEN, "ring-ref%u", i); | 950 | snprintf(ring_ref_name, RINGREF_NAME_LEN, "ring-ref%u", i); |
881 | err = xenbus_scanf(XBT_NIL, dev->otherend, ring_ref_name, | 951 | err = xenbus_scanf(XBT_NIL, dir, ring_ref_name, |
882 | "%u", &ring_ref[i]); | 952 | "%u", &ring_ref[i]); |
883 | if (err != 1) { | 953 | if (err != 1) { |
884 | err = -EINVAL; | 954 | err = -EINVAL; |
885 | xenbus_dev_fatal(dev, err, "reading %s/%s", | 955 | xenbus_dev_fatal(dev, err, "reading %s/%s", |
886 | dev->otherend, ring_ref_name); | 956 | dir, ring_ref_name); |
887 | return err; | 957 | return err; |
888 | } | 958 | } |
889 | pr_info("ring-ref%u: %u\n", i, ring_ref[i]); | ||
890 | } | 959 | } |
891 | } | 960 | } |
892 | 961 | blkif->nr_ring_pages = nr_grefs; | |
893 | be->blkif->blk_protocol = BLKIF_PROTOCOL_DEFAULT; | ||
894 | err = xenbus_gather(XBT_NIL, dev->otherend, "protocol", | ||
895 | "%63s", protocol, NULL); | ||
896 | if (err) | ||
897 | strcpy(protocol, "unspecified, assuming default"); | ||
898 | else if (0 == strcmp(protocol, XEN_IO_PROTO_ABI_NATIVE)) | ||
899 | be->blkif->blk_protocol = BLKIF_PROTOCOL_NATIVE; | ||
900 | else if (0 == strcmp(protocol, XEN_IO_PROTO_ABI_X86_32)) | ||
901 | be->blkif->blk_protocol = BLKIF_PROTOCOL_X86_32; | ||
902 | else if (0 == strcmp(protocol, XEN_IO_PROTO_ABI_X86_64)) | ||
903 | be->blkif->blk_protocol = BLKIF_PROTOCOL_X86_64; | ||
904 | else { | ||
905 | xenbus_dev_fatal(dev, err, "unknown fe protocol %s", protocol); | ||
906 | return -1; | ||
907 | } | ||
908 | err = xenbus_gather(XBT_NIL, dev->otherend, | ||
909 | "feature-persistent", "%u", | ||
910 | &pers_grants, NULL); | ||
911 | if (err) | ||
912 | pers_grants = 0; | ||
913 | |||
914 | be->blkif->vbd.feature_gnt_persistent = pers_grants; | ||
915 | be->blkif->vbd.overflow_max_grants = 0; | ||
916 | be->blkif->nr_ring_pages = nr_grefs; | ||
917 | |||
918 | pr_info("ring-pages:%d, event-channel %d, protocol %d (%s) %s\n", | ||
919 | nr_grefs, evtchn, be->blkif->blk_protocol, protocol, | ||
920 | pers_grants ? "persistent grants" : ""); | ||
921 | 962 | ||
922 | for (i = 0; i < nr_grefs * XEN_BLKIF_REQS_PER_PAGE; i++) { | 963 | for (i = 0; i < nr_grefs * XEN_BLKIF_REQS_PER_PAGE; i++) { |
923 | req = kzalloc(sizeof(*req), GFP_KERNEL); | 964 | req = kzalloc(sizeof(*req), GFP_KERNEL); |
924 | if (!req) | 965 | if (!req) |
925 | goto fail; | 966 | goto fail; |
926 | list_add_tail(&req->free_list, &be->blkif->pending_free); | 967 | list_add_tail(&req->free_list, &ring->pending_free); |
927 | for (j = 0; j < MAX_INDIRECT_SEGMENTS; j++) { | 968 | for (j = 0; j < MAX_INDIRECT_SEGMENTS; j++) { |
928 | req->segments[j] = kzalloc(sizeof(*req->segments[0]), GFP_KERNEL); | 969 | req->segments[j] = kzalloc(sizeof(*req->segments[0]), GFP_KERNEL); |
929 | if (!req->segments[j]) | 970 | if (!req->segments[j]) |
@@ -938,7 +979,7 @@ static int connect_ring(struct backend_info *be) | |||
938 | } | 979 | } |
939 | 980 | ||
940 | /* Map the shared frame, irq etc. */ | 981 | /* Map the shared frame, irq etc. */ |
941 | err = xen_blkif_map(be->blkif, ring_ref, nr_grefs, evtchn); | 982 | err = xen_blkif_map(ring, ring_ref, nr_grefs, evtchn); |
942 | if (err) { | 983 | if (err) { |
943 | xenbus_dev_fatal(dev, err, "mapping ring-ref port %u", evtchn); | 984 | xenbus_dev_fatal(dev, err, "mapping ring-ref port %u", evtchn); |
944 | return err; | 985 | return err; |
@@ -947,7 +988,7 @@ static int connect_ring(struct backend_info *be) | |||
947 | return 0; | 988 | return 0; |
948 | 989 | ||
949 | fail: | 990 | fail: |
950 | list_for_each_entry_safe(req, n, &be->blkif->pending_free, free_list) { | 991 | list_for_each_entry_safe(req, n, &ring->pending_free, free_list) { |
951 | list_del(&req->free_list); | 992 | list_del(&req->free_list); |
952 | for (j = 0; j < MAX_INDIRECT_SEGMENTS; j++) { | 993 | for (j = 0; j < MAX_INDIRECT_SEGMENTS; j++) { |
953 | if (!req->segments[j]) | 994 | if (!req->segments[j]) |
@@ -962,6 +1003,93 @@ fail: | |||
962 | kfree(req); | 1003 | kfree(req); |
963 | } | 1004 | } |
964 | return -ENOMEM; | 1005 | return -ENOMEM; |
1006 | |||
1007 | } | ||
1008 | |||
1009 | static int connect_ring(struct backend_info *be) | ||
1010 | { | ||
1011 | struct xenbus_device *dev = be->dev; | ||
1012 | unsigned int pers_grants; | ||
1013 | char protocol[64] = ""; | ||
1014 | int err, i; | ||
1015 | char *xspath; | ||
1016 | size_t xspathsize; | ||
1017 | const size_t xenstore_path_ext_size = 11; /* sufficient for "/queue-NNN" */ | ||
1018 | unsigned int requested_num_queues = 0; | ||
1019 | |||
1020 | pr_debug("%s %s\n", __func__, dev->otherend); | ||
1021 | |||
1022 | be->blkif->blk_protocol = BLKIF_PROTOCOL_DEFAULT; | ||
1023 | err = xenbus_gather(XBT_NIL, dev->otherend, "protocol", | ||
1024 | "%63s", protocol, NULL); | ||
1025 | if (err) | ||
1026 | strcpy(protocol, "unspecified, assuming default"); | ||
1027 | else if (0 == strcmp(protocol, XEN_IO_PROTO_ABI_NATIVE)) | ||
1028 | be->blkif->blk_protocol = BLKIF_PROTOCOL_NATIVE; | ||
1029 | else if (0 == strcmp(protocol, XEN_IO_PROTO_ABI_X86_32)) | ||
1030 | be->blkif->blk_protocol = BLKIF_PROTOCOL_X86_32; | ||
1031 | else if (0 == strcmp(protocol, XEN_IO_PROTO_ABI_X86_64)) | ||
1032 | be->blkif->blk_protocol = BLKIF_PROTOCOL_X86_64; | ||
1033 | else { | ||
1034 | xenbus_dev_fatal(dev, err, "unknown fe protocol %s", protocol); | ||
1035 | return -ENOSYS; | ||
1036 | } | ||
1037 | err = xenbus_gather(XBT_NIL, dev->otherend, | ||
1038 | "feature-persistent", "%u", | ||
1039 | &pers_grants, NULL); | ||
1040 | if (err) | ||
1041 | pers_grants = 0; | ||
1042 | |||
1043 | be->blkif->vbd.feature_gnt_persistent = pers_grants; | ||
1044 | be->blkif->vbd.overflow_max_grants = 0; | ||
1045 | |||
1046 | /* | ||
1047 | * Read the number of hardware queues from frontend. | ||
1048 | */ | ||
1049 | err = xenbus_scanf(XBT_NIL, dev->otherend, "multi-queue-num-queues", | ||
1050 | "%u", &requested_num_queues); | ||
1051 | if (err < 0) { | ||
1052 | requested_num_queues = 1; | ||
1053 | } else { | ||
1054 | if (requested_num_queues > xenblk_max_queues | ||
1055 | || requested_num_queues == 0) { | ||
1056 | /* Buggy or malicious guest. */ | ||
1057 | xenbus_dev_fatal(dev, err, | ||
1058 | "guest requested %u queues, exceeding the maximum of %u.", | ||
1059 | requested_num_queues, xenblk_max_queues); | ||
1060 | return -ENOSYS; | ||
1061 | } | ||
1062 | } | ||
1063 | be->blkif->nr_rings = requested_num_queues; | ||
1064 | if (xen_blkif_alloc_rings(be->blkif)) | ||
1065 | return -ENOMEM; | ||
1066 | |||
1067 | pr_info("%s: using %d queues, protocol %d (%s) %s\n", dev->nodename, | ||
1068 | be->blkif->nr_rings, be->blkif->blk_protocol, protocol, | ||
1069 | pers_grants ? "persistent grants" : ""); | ||
1070 | |||
1071 | if (be->blkif->nr_rings == 1) | ||
1072 | return read_per_ring_refs(&be->blkif->rings[0], dev->otherend); | ||
1073 | else { | ||
1074 | xspathsize = strlen(dev->otherend) + xenstore_path_ext_size; | ||
1075 | xspath = kmalloc(xspathsize, GFP_KERNEL); | ||
1076 | if (!xspath) { | ||
1077 | xenbus_dev_fatal(dev, -ENOMEM, "reading ring references"); | ||
1078 | return -ENOMEM; | ||
1079 | } | ||
1080 | |||
1081 | for (i = 0; i < be->blkif->nr_rings; i++) { | ||
1082 | memset(xspath, 0, xspathsize); | ||
1083 | snprintf(xspath, xspathsize, "%s/queue-%u", dev->otherend, i); | ||
1084 | err = read_per_ring_refs(&be->blkif->rings[i], xspath); | ||
1085 | if (err) { | ||
1086 | kfree(xspath); | ||
1087 | return err; | ||
1088 | } | ||
1089 | } | ||
1090 | kfree(xspath); | ||
1091 | } | ||
1092 | return 0; | ||
965 | } | 1093 | } |
966 | 1094 | ||
967 | static const struct xenbus_device_id xen_blkbk_ids[] = { | 1095 | static const struct xenbus_device_id xen_blkbk_ids[] = { |
diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c index 2fee2eef988d..8a8dc91c39f7 100644 --- a/drivers/block/xen-blkfront.c +++ b/drivers/block/xen-blkfront.c | |||
@@ -60,6 +60,20 @@ | |||
60 | 60 | ||
61 | #include <asm/xen/hypervisor.h> | 61 | #include <asm/xen/hypervisor.h> |
62 | 62 | ||
63 | /* | ||
64 | * The minimal size of segment supported by the block framework is PAGE_SIZE. | ||
65 | * When Linux is using a different page size than Xen, it may not be possible | ||
66 | * to put all the data in a single segment. | ||
67 | * This can happen when the backend doesn't support indirect descriptor and | ||
68 | * therefore the maximum amount of data that a request can carry is | ||
69 | * BLKIF_MAX_SEGMENTS_PER_REQUEST * XEN_PAGE_SIZE = 44KB | ||
70 | * | ||
71 | * Note that we only support one extra request. So the Linux page size | ||
72 | * should be <= ( 2 * BLKIF_MAX_SEGMENTS_PER_REQUEST * XEN_PAGE_SIZE) = | ||
73 | * 88KB. | ||
74 | */ | ||
75 | #define HAS_EXTRA_REQ (BLKIF_MAX_SEGMENTS_PER_REQUEST < XEN_PFN_PER_PAGE) | ||
76 | |||
63 | enum blkif_state { | 77 | enum blkif_state { |
64 | BLKIF_STATE_DISCONNECTED, | 78 | BLKIF_STATE_DISCONNECTED, |
65 | BLKIF_STATE_CONNECTED, | 79 | BLKIF_STATE_CONNECTED, |
@@ -72,6 +86,13 @@ struct grant { | |||
72 | struct list_head node; | 86 | struct list_head node; |
73 | }; | 87 | }; |
74 | 88 | ||
89 | enum blk_req_status { | ||
90 | REQ_WAITING, | ||
91 | REQ_DONE, | ||
92 | REQ_ERROR, | ||
93 | REQ_EOPNOTSUPP, | ||
94 | }; | ||
95 | |||
75 | struct blk_shadow { | 96 | struct blk_shadow { |
76 | struct blkif_request req; | 97 | struct blkif_request req; |
77 | struct request *request; | 98 | struct request *request; |
@@ -79,6 +100,14 @@ struct blk_shadow { | |||
79 | struct grant **indirect_grants; | 100 | struct grant **indirect_grants; |
80 | struct scatterlist *sg; | 101 | struct scatterlist *sg; |
81 | unsigned int num_sg; | 102 | unsigned int num_sg; |
103 | enum blk_req_status status; | ||
104 | |||
105 | #define NO_ASSOCIATED_ID ~0UL | ||
106 | /* | ||
107 | * Id of the sibling if we ever need 2 requests when handling a | ||
108 | * block I/O request | ||
109 | */ | ||
110 | unsigned long associated_id; | ||
82 | }; | 111 | }; |
83 | 112 | ||
84 | struct split_bio { | 113 | struct split_bio { |
@@ -99,6 +128,10 @@ static unsigned int xen_blkif_max_segments = 32; | |||
99 | module_param_named(max, xen_blkif_max_segments, int, S_IRUGO); | 128 | module_param_named(max, xen_blkif_max_segments, int, S_IRUGO); |
100 | MODULE_PARM_DESC(max, "Maximum amount of segments in indirect requests (default is 32)"); | 129 | MODULE_PARM_DESC(max, "Maximum amount of segments in indirect requests (default is 32)"); |
101 | 130 | ||
131 | static unsigned int xen_blkif_max_queues = 4; | ||
132 | module_param_named(max_queues, xen_blkif_max_queues, uint, S_IRUGO); | ||
133 | MODULE_PARM_DESC(max_queues, "Maximum number of hardware queues/rings used per virtual disk"); | ||
134 | |||
102 | /* | 135 | /* |
103 | * Maximum order of pages to be used for the shared ring between front and | 136 | * Maximum order of pages to be used for the shared ring between front and |
104 | * backend, 4KB page granularity is used. | 137 | * backend, 4KB page granularity is used. |
@@ -114,10 +147,35 @@ MODULE_PARM_DESC(max_ring_page_order, "Maximum order of pages to be used for the | |||
114 | __CONST_RING_SIZE(blkif, XEN_PAGE_SIZE * XENBUS_MAX_RING_GRANTS) | 147 | __CONST_RING_SIZE(blkif, XEN_PAGE_SIZE * XENBUS_MAX_RING_GRANTS) |
115 | 148 | ||
116 | /* | 149 | /* |
117 | * ring-ref%i i=(-1UL) would take 11 characters + 'ring-ref' is 8, so 19 | 150 | * ring-ref%u i=(-1UL) would take 11 characters + 'ring-ref' is 8, so 19 |
118 | * characters are enough. Define to 20 to keep consist with backend. | 151 | * characters are enough. Define to 20 to keep consistent with backend. |
119 | */ | 152 | */ |
120 | #define RINGREF_NAME_LEN (20) | 153 | #define RINGREF_NAME_LEN (20) |
154 | /* | ||
155 | * queue-%u would take 7 + 10(UINT_MAX) = 17 characters. | ||
156 | */ | ||
157 | #define QUEUE_NAME_LEN (17) | ||
158 | |||
159 | /* | ||
160 | * Per-ring info. | ||
161 | * Every blkfront device can associate with one or more blkfront_ring_info, | ||
162 | * depending on how many hardware queues/rings to be used. | ||
163 | */ | ||
164 | struct blkfront_ring_info { | ||
165 | /* Lock to protect data in every ring buffer. */ | ||
166 | spinlock_t ring_lock; | ||
167 | struct blkif_front_ring ring; | ||
168 | unsigned int ring_ref[XENBUS_MAX_RING_GRANTS]; | ||
169 | unsigned int evtchn, irq; | ||
170 | struct work_struct work; | ||
171 | struct gnttab_free_callback callback; | ||
172 | struct blk_shadow shadow[BLK_MAX_RING_SIZE]; | ||
173 | struct list_head indirect_pages; | ||
174 | struct list_head grants; | ||
175 | unsigned int persistent_gnts_c; | ||
176 | unsigned long shadow_free; | ||
177 | struct blkfront_info *dev_info; | ||
178 | }; | ||
121 | 179 | ||
122 | /* | 180 | /* |
123 | * We have one of these per vbd, whether ide, scsi or 'other'. They | 181 | * We have one of these per vbd, whether ide, scsi or 'other'. They |
@@ -126,25 +184,15 @@ MODULE_PARM_DESC(max_ring_page_order, "Maximum order of pages to be used for the | |||
126 | */ | 184 | */ |
127 | struct blkfront_info | 185 | struct blkfront_info |
128 | { | 186 | { |
129 | spinlock_t io_lock; | ||
130 | struct mutex mutex; | 187 | struct mutex mutex; |
131 | struct xenbus_device *xbdev; | 188 | struct xenbus_device *xbdev; |
132 | struct gendisk *gd; | 189 | struct gendisk *gd; |
133 | int vdevice; | 190 | int vdevice; |
134 | blkif_vdev_t handle; | 191 | blkif_vdev_t handle; |
135 | enum blkif_state connected; | 192 | enum blkif_state connected; |
136 | int ring_ref[XENBUS_MAX_RING_GRANTS]; | 193 | /* Number of pages per ring buffer. */ |
137 | unsigned int nr_ring_pages; | 194 | unsigned int nr_ring_pages; |
138 | struct blkif_front_ring ring; | ||
139 | unsigned int evtchn, irq; | ||
140 | struct request_queue *rq; | 195 | struct request_queue *rq; |
141 | struct work_struct work; | ||
142 | struct gnttab_free_callback callback; | ||
143 | struct blk_shadow shadow[BLK_MAX_RING_SIZE]; | ||
144 | struct list_head grants; | ||
145 | struct list_head indirect_pages; | ||
146 | unsigned int persistent_gnts_c; | ||
147 | unsigned long shadow_free; | ||
148 | unsigned int feature_flush; | 196 | unsigned int feature_flush; |
149 | unsigned int feature_discard:1; | 197 | unsigned int feature_discard:1; |
150 | unsigned int feature_secdiscard:1; | 198 | unsigned int feature_secdiscard:1; |
@@ -155,6 +203,8 @@ struct blkfront_info | |||
155 | unsigned int max_indirect_segments; | 203 | unsigned int max_indirect_segments; |
156 | int is_ready; | 204 | int is_ready; |
157 | struct blk_mq_tag_set tag_set; | 205 | struct blk_mq_tag_set tag_set; |
206 | struct blkfront_ring_info *rinfo; | ||
207 | unsigned int nr_rings; | ||
158 | }; | 208 | }; |
159 | 209 | ||
160 | static unsigned int nr_minors; | 210 | static unsigned int nr_minors; |
@@ -198,38 +248,40 @@ static DEFINE_SPINLOCK(minor_lock); | |||
198 | 248 | ||
199 | #define GREFS(_psegs) ((_psegs) * GRANTS_PER_PSEG) | 249 | #define GREFS(_psegs) ((_psegs) * GRANTS_PER_PSEG) |
200 | 250 | ||
201 | static int blkfront_setup_indirect(struct blkfront_info *info); | 251 | static int blkfront_setup_indirect(struct blkfront_ring_info *rinfo); |
202 | static int blkfront_gather_backend_features(struct blkfront_info *info); | 252 | static void blkfront_gather_backend_features(struct blkfront_info *info); |
203 | 253 | ||
204 | static int get_id_from_freelist(struct blkfront_info *info) | 254 | static int get_id_from_freelist(struct blkfront_ring_info *rinfo) |
205 | { | 255 | { |
206 | unsigned long free = info->shadow_free; | 256 | unsigned long free = rinfo->shadow_free; |
207 | BUG_ON(free >= BLK_RING_SIZE(info)); | 257 | |
208 | info->shadow_free = info->shadow[free].req.u.rw.id; | 258 | BUG_ON(free >= BLK_RING_SIZE(rinfo->dev_info)); |
209 | info->shadow[free].req.u.rw.id = 0x0fffffee; /* debug */ | 259 | rinfo->shadow_free = rinfo->shadow[free].req.u.rw.id; |
260 | rinfo->shadow[free].req.u.rw.id = 0x0fffffee; /* debug */ | ||
210 | return free; | 261 | return free; |
211 | } | 262 | } |
212 | 263 | ||
213 | static int add_id_to_freelist(struct blkfront_info *info, | 264 | static int add_id_to_freelist(struct blkfront_ring_info *rinfo, |
214 | unsigned long id) | 265 | unsigned long id) |
215 | { | 266 | { |
216 | if (info->shadow[id].req.u.rw.id != id) | 267 | if (rinfo->shadow[id].req.u.rw.id != id) |
217 | return -EINVAL; | 268 | return -EINVAL; |
218 | if (info->shadow[id].request == NULL) | 269 | if (rinfo->shadow[id].request == NULL) |
219 | return -EINVAL; | 270 | return -EINVAL; |
220 | info->shadow[id].req.u.rw.id = info->shadow_free; | 271 | rinfo->shadow[id].req.u.rw.id = rinfo->shadow_free; |
221 | info->shadow[id].request = NULL; | 272 | rinfo->shadow[id].request = NULL; |
222 | info->shadow_free = id; | 273 | rinfo->shadow_free = id; |
223 | return 0; | 274 | return 0; |
224 | } | 275 | } |
225 | 276 | ||
226 | static int fill_grant_buffer(struct blkfront_info *info, int num) | 277 | static int fill_grant_buffer(struct blkfront_ring_info *rinfo, int num) |
227 | { | 278 | { |
279 | struct blkfront_info *info = rinfo->dev_info; | ||
228 | struct page *granted_page; | 280 | struct page *granted_page; |
229 | struct grant *gnt_list_entry, *n; | 281 | struct grant *gnt_list_entry, *n; |
230 | int i = 0; | 282 | int i = 0; |
231 | 283 | ||
232 | while(i < num) { | 284 | while (i < num) { |
233 | gnt_list_entry = kzalloc(sizeof(struct grant), GFP_NOIO); | 285 | gnt_list_entry = kzalloc(sizeof(struct grant), GFP_NOIO); |
234 | if (!gnt_list_entry) | 286 | if (!gnt_list_entry) |
235 | goto out_of_memory; | 287 | goto out_of_memory; |
@@ -244,7 +296,7 @@ static int fill_grant_buffer(struct blkfront_info *info, int num) | |||
244 | } | 296 | } |
245 | 297 | ||
246 | gnt_list_entry->gref = GRANT_INVALID_REF; | 298 | gnt_list_entry->gref = GRANT_INVALID_REF; |
247 | list_add(&gnt_list_entry->node, &info->grants); | 299 | list_add(&gnt_list_entry->node, &rinfo->grants); |
248 | i++; | 300 | i++; |
249 | } | 301 | } |
250 | 302 | ||
@@ -252,7 +304,7 @@ static int fill_grant_buffer(struct blkfront_info *info, int num) | |||
252 | 304 | ||
253 | out_of_memory: | 305 | out_of_memory: |
254 | list_for_each_entry_safe(gnt_list_entry, n, | 306 | list_for_each_entry_safe(gnt_list_entry, n, |
255 | &info->grants, node) { | 307 | &rinfo->grants, node) { |
256 | list_del(&gnt_list_entry->node); | 308 | list_del(&gnt_list_entry->node); |
257 | if (info->feature_persistent) | 309 | if (info->feature_persistent) |
258 | __free_page(gnt_list_entry->page); | 310 | __free_page(gnt_list_entry->page); |
@@ -263,17 +315,17 @@ out_of_memory: | |||
263 | return -ENOMEM; | 315 | return -ENOMEM; |
264 | } | 316 | } |
265 | 317 | ||
266 | static struct grant *get_free_grant(struct blkfront_info *info) | 318 | static struct grant *get_free_grant(struct blkfront_ring_info *rinfo) |
267 | { | 319 | { |
268 | struct grant *gnt_list_entry; | 320 | struct grant *gnt_list_entry; |
269 | 321 | ||
270 | BUG_ON(list_empty(&info->grants)); | 322 | BUG_ON(list_empty(&rinfo->grants)); |
271 | gnt_list_entry = list_first_entry(&info->grants, struct grant, | 323 | gnt_list_entry = list_first_entry(&rinfo->grants, struct grant, |
272 | node); | 324 | node); |
273 | list_del(&gnt_list_entry->node); | 325 | list_del(&gnt_list_entry->node); |
274 | 326 | ||
275 | if (gnt_list_entry->gref != GRANT_INVALID_REF) | 327 | if (gnt_list_entry->gref != GRANT_INVALID_REF) |
276 | info->persistent_gnts_c--; | 328 | rinfo->persistent_gnts_c--; |
277 | 329 | ||
278 | return gnt_list_entry; | 330 | return gnt_list_entry; |
279 | } | 331 | } |
@@ -289,9 +341,10 @@ static inline void grant_foreign_access(const struct grant *gnt_list_entry, | |||
289 | 341 | ||
290 | static struct grant *get_grant(grant_ref_t *gref_head, | 342 | static struct grant *get_grant(grant_ref_t *gref_head, |
291 | unsigned long gfn, | 343 | unsigned long gfn, |
292 | struct blkfront_info *info) | 344 | struct blkfront_ring_info *rinfo) |
293 | { | 345 | { |
294 | struct grant *gnt_list_entry = get_free_grant(info); | 346 | struct grant *gnt_list_entry = get_free_grant(rinfo); |
347 | struct blkfront_info *info = rinfo->dev_info; | ||
295 | 348 | ||
296 | if (gnt_list_entry->gref != GRANT_INVALID_REF) | 349 | if (gnt_list_entry->gref != GRANT_INVALID_REF) |
297 | return gnt_list_entry; | 350 | return gnt_list_entry; |
@@ -312,9 +365,10 @@ static struct grant *get_grant(grant_ref_t *gref_head, | |||
312 | } | 365 | } |
313 | 366 | ||
314 | static struct grant *get_indirect_grant(grant_ref_t *gref_head, | 367 | static struct grant *get_indirect_grant(grant_ref_t *gref_head, |
315 | struct blkfront_info *info) | 368 | struct blkfront_ring_info *rinfo) |
316 | { | 369 | { |
317 | struct grant *gnt_list_entry = get_free_grant(info); | 370 | struct grant *gnt_list_entry = get_free_grant(rinfo); |
371 | struct blkfront_info *info = rinfo->dev_info; | ||
318 | 372 | ||
319 | if (gnt_list_entry->gref != GRANT_INVALID_REF) | 373 | if (gnt_list_entry->gref != GRANT_INVALID_REF) |
320 | return gnt_list_entry; | 374 | return gnt_list_entry; |
@@ -326,8 +380,8 @@ static struct grant *get_indirect_grant(grant_ref_t *gref_head, | |||
326 | struct page *indirect_page; | 380 | struct page *indirect_page; |
327 | 381 | ||
328 | /* Fetch a pre-allocated page to use for indirect grefs */ | 382 | /* Fetch a pre-allocated page to use for indirect grefs */ |
329 | BUG_ON(list_empty(&info->indirect_pages)); | 383 | BUG_ON(list_empty(&rinfo->indirect_pages)); |
330 | indirect_page = list_first_entry(&info->indirect_pages, | 384 | indirect_page = list_first_entry(&rinfo->indirect_pages, |
331 | struct page, lru); | 385 | struct page, lru); |
332 | list_del(&indirect_page->lru); | 386 | list_del(&indirect_page->lru); |
333 | gnt_list_entry->page = indirect_page; | 387 | gnt_list_entry->page = indirect_page; |
@@ -403,8 +457,8 @@ static void xlbd_release_minors(unsigned int minor, unsigned int nr) | |||
403 | 457 | ||
404 | static void blkif_restart_queue_callback(void *arg) | 458 | static void blkif_restart_queue_callback(void *arg) |
405 | { | 459 | { |
406 | struct blkfront_info *info = (struct blkfront_info *)arg; | 460 | struct blkfront_ring_info *rinfo = (struct blkfront_ring_info *)arg; |
407 | schedule_work(&info->work); | 461 | schedule_work(&rinfo->work); |
408 | } | 462 | } |
409 | 463 | ||
410 | static int blkif_getgeo(struct block_device *bd, struct hd_geometry *hg) | 464 | static int blkif_getgeo(struct block_device *bd, struct hd_geometry *hg) |
@@ -456,16 +510,33 @@ static int blkif_ioctl(struct block_device *bdev, fmode_t mode, | |||
456 | return 0; | 510 | return 0; |
457 | } | 511 | } |
458 | 512 | ||
459 | static int blkif_queue_discard_req(struct request *req) | 513 | static unsigned long blkif_ring_get_request(struct blkfront_ring_info *rinfo, |
514 | struct request *req, | ||
515 | struct blkif_request **ring_req) | ||
460 | { | 516 | { |
461 | struct blkfront_info *info = req->rq_disk->private_data; | 517 | unsigned long id; |
518 | |||
519 | *ring_req = RING_GET_REQUEST(&rinfo->ring, rinfo->ring.req_prod_pvt); | ||
520 | rinfo->ring.req_prod_pvt++; | ||
521 | |||
522 | id = get_id_from_freelist(rinfo); | ||
523 | rinfo->shadow[id].request = req; | ||
524 | rinfo->shadow[id].status = REQ_WAITING; | ||
525 | rinfo->shadow[id].associated_id = NO_ASSOCIATED_ID; | ||
526 | |||
527 | (*ring_req)->u.rw.id = id; | ||
528 | |||
529 | return id; | ||
530 | } | ||
531 | |||
532 | static int blkif_queue_discard_req(struct request *req, struct blkfront_ring_info *rinfo) | ||
533 | { | ||
534 | struct blkfront_info *info = rinfo->dev_info; | ||
462 | struct blkif_request *ring_req; | 535 | struct blkif_request *ring_req; |
463 | unsigned long id; | 536 | unsigned long id; |
464 | 537 | ||
465 | /* Fill out a communications ring structure. */ | 538 | /* Fill out a communications ring structure. */ |
466 | ring_req = RING_GET_REQUEST(&info->ring, info->ring.req_prod_pvt); | 539 | id = blkif_ring_get_request(rinfo, req, &ring_req); |
467 | id = get_id_from_freelist(info); | ||
468 | info->shadow[id].request = req; | ||
469 | 540 | ||
470 | ring_req->operation = BLKIF_OP_DISCARD; | 541 | ring_req->operation = BLKIF_OP_DISCARD; |
471 | ring_req->u.discard.nr_sectors = blk_rq_sectors(req); | 542 | ring_req->u.discard.nr_sectors = blk_rq_sectors(req); |
@@ -476,10 +547,8 @@ static int blkif_queue_discard_req(struct request *req) | |||
476 | else | 547 | else |
477 | ring_req->u.discard.flag = 0; | 548 | ring_req->u.discard.flag = 0; |
478 | 549 | ||
479 | info->ring.req_prod_pvt++; | ||
480 | |||
481 | /* Keep a private copy so we can reissue requests when recovering. */ | 550 | /* Keep a private copy so we can reissue requests when recovering. */ |
482 | info->shadow[id].req = *ring_req; | 551 | rinfo->shadow[id].req = *ring_req; |
483 | 552 | ||
484 | return 0; | 553 | return 0; |
485 | } | 554 | } |
@@ -487,7 +556,7 @@ static int blkif_queue_discard_req(struct request *req) | |||
487 | struct setup_rw_req { | 556 | struct setup_rw_req { |
488 | unsigned int grant_idx; | 557 | unsigned int grant_idx; |
489 | struct blkif_request_segment *segments; | 558 | struct blkif_request_segment *segments; |
490 | struct blkfront_info *info; | 559 | struct blkfront_ring_info *rinfo; |
491 | struct blkif_request *ring_req; | 560 | struct blkif_request *ring_req; |
492 | grant_ref_t gref_head; | 561 | grant_ref_t gref_head; |
493 | unsigned int id; | 562 | unsigned int id; |
@@ -495,6 +564,9 @@ struct setup_rw_req { | |||
495 | bool need_copy; | 564 | bool need_copy; |
496 | unsigned int bvec_off; | 565 | unsigned int bvec_off; |
497 | char *bvec_data; | 566 | char *bvec_data; |
567 | |||
568 | bool require_extra_req; | ||
569 | struct blkif_request *extra_ring_req; | ||
498 | }; | 570 | }; |
499 | 571 | ||
500 | static void blkif_setup_rw_req_grant(unsigned long gfn, unsigned int offset, | 572 | static void blkif_setup_rw_req_grant(unsigned long gfn, unsigned int offset, |
@@ -507,8 +579,24 @@ static void blkif_setup_rw_req_grant(unsigned long gfn, unsigned int offset, | |||
507 | /* Convenient aliases */ | 579 | /* Convenient aliases */ |
508 | unsigned int grant_idx = setup->grant_idx; | 580 | unsigned int grant_idx = setup->grant_idx; |
509 | struct blkif_request *ring_req = setup->ring_req; | 581 | struct blkif_request *ring_req = setup->ring_req; |
510 | struct blkfront_info *info = setup->info; | 582 | struct blkfront_ring_info *rinfo = setup->rinfo; |
511 | struct blk_shadow *shadow = &info->shadow[setup->id]; | 583 | /* |
584 | * We always use the shadow of the first request to store the list | ||
585 | * of grant associated to the block I/O request. This made the | ||
586 | * completion more easy to handle even if the block I/O request is | ||
587 | * split. | ||
588 | */ | ||
589 | struct blk_shadow *shadow = &rinfo->shadow[setup->id]; | ||
590 | |||
591 | if (unlikely(setup->require_extra_req && | ||
592 | grant_idx >= BLKIF_MAX_SEGMENTS_PER_REQUEST)) { | ||
593 | /* | ||
594 | * We are using the second request, setup grant_idx | ||
595 | * to be the index of the segment array. | ||
596 | */ | ||
597 | grant_idx -= BLKIF_MAX_SEGMENTS_PER_REQUEST; | ||
598 | ring_req = setup->extra_ring_req; | ||
599 | } | ||
512 | 600 | ||
513 | if ((ring_req->operation == BLKIF_OP_INDIRECT) && | 601 | if ((ring_req->operation == BLKIF_OP_INDIRECT) && |
514 | (grant_idx % GRANTS_PER_INDIRECT_FRAME == 0)) { | 602 | (grant_idx % GRANTS_PER_INDIRECT_FRAME == 0)) { |
@@ -516,15 +604,19 @@ static void blkif_setup_rw_req_grant(unsigned long gfn, unsigned int offset, | |||
516 | kunmap_atomic(setup->segments); | 604 | kunmap_atomic(setup->segments); |
517 | 605 | ||
518 | n = grant_idx / GRANTS_PER_INDIRECT_FRAME; | 606 | n = grant_idx / GRANTS_PER_INDIRECT_FRAME; |
519 | gnt_list_entry = get_indirect_grant(&setup->gref_head, info); | 607 | gnt_list_entry = get_indirect_grant(&setup->gref_head, rinfo); |
520 | shadow->indirect_grants[n] = gnt_list_entry; | 608 | shadow->indirect_grants[n] = gnt_list_entry; |
521 | setup->segments = kmap_atomic(gnt_list_entry->page); | 609 | setup->segments = kmap_atomic(gnt_list_entry->page); |
522 | ring_req->u.indirect.indirect_grefs[n] = gnt_list_entry->gref; | 610 | ring_req->u.indirect.indirect_grefs[n] = gnt_list_entry->gref; |
523 | } | 611 | } |
524 | 612 | ||
525 | gnt_list_entry = get_grant(&setup->gref_head, gfn, info); | 613 | gnt_list_entry = get_grant(&setup->gref_head, gfn, rinfo); |
526 | ref = gnt_list_entry->gref; | 614 | ref = gnt_list_entry->gref; |
527 | shadow->grants_used[grant_idx] = gnt_list_entry; | 615 | /* |
616 | * All the grants are stored in the shadow of the first | ||
617 | * request. Therefore we have to use the global index. | ||
618 | */ | ||
619 | shadow->grants_used[setup->grant_idx] = gnt_list_entry; | ||
528 | 620 | ||
529 | if (setup->need_copy) { | 621 | if (setup->need_copy) { |
530 | void *shared_data; | 622 | void *shared_data; |
@@ -566,16 +658,36 @@ static void blkif_setup_rw_req_grant(unsigned long gfn, unsigned int offset, | |||
566 | (setup->grant_idx)++; | 658 | (setup->grant_idx)++; |
567 | } | 659 | } |
568 | 660 | ||
569 | static int blkif_queue_rw_req(struct request *req) | 661 | static void blkif_setup_extra_req(struct blkif_request *first, |
662 | struct blkif_request *second) | ||
570 | { | 663 | { |
571 | struct blkfront_info *info = req->rq_disk->private_data; | 664 | uint16_t nr_segments = first->u.rw.nr_segments; |
572 | struct blkif_request *ring_req; | 665 | |
573 | unsigned long id; | 666 | /* |
667 | * The second request is only present when the first request uses | ||
668 | * all its segments. It's always the continuity of the first one. | ||
669 | */ | ||
670 | first->u.rw.nr_segments = BLKIF_MAX_SEGMENTS_PER_REQUEST; | ||
671 | |||
672 | second->u.rw.nr_segments = nr_segments - BLKIF_MAX_SEGMENTS_PER_REQUEST; | ||
673 | second->u.rw.sector_number = first->u.rw.sector_number + | ||
674 | (BLKIF_MAX_SEGMENTS_PER_REQUEST * XEN_PAGE_SIZE) / 512; | ||
675 | |||
676 | second->u.rw.handle = first->u.rw.handle; | ||
677 | second->operation = first->operation; | ||
678 | } | ||
679 | |||
680 | static int blkif_queue_rw_req(struct request *req, struct blkfront_ring_info *rinfo) | ||
681 | { | ||
682 | struct blkfront_info *info = rinfo->dev_info; | ||
683 | struct blkif_request *ring_req, *extra_ring_req = NULL; | ||
684 | unsigned long id, extra_id = NO_ASSOCIATED_ID; | ||
685 | bool require_extra_req = false; | ||
574 | int i; | 686 | int i; |
575 | struct setup_rw_req setup = { | 687 | struct setup_rw_req setup = { |
576 | .grant_idx = 0, | 688 | .grant_idx = 0, |
577 | .segments = NULL, | 689 | .segments = NULL, |
578 | .info = info, | 690 | .rinfo = rinfo, |
579 | .need_copy = rq_data_dir(req) && info->feature_persistent, | 691 | .need_copy = rq_data_dir(req) && info->feature_persistent, |
580 | }; | 692 | }; |
581 | 693 | ||
@@ -584,7 +696,6 @@ static int blkif_queue_rw_req(struct request *req) | |||
584 | * existing persistent grants, or if we have to get new grants, | 696 | * existing persistent grants, or if we have to get new grants, |
585 | * as there are not sufficiently many free. | 697 | * as there are not sufficiently many free. |
586 | */ | 698 | */ |
587 | bool new_persistent_gnts; | ||
588 | struct scatterlist *sg; | 699 | struct scatterlist *sg; |
589 | int num_sg, max_grefs, num_grant; | 700 | int num_sg, max_grefs, num_grant; |
590 | 701 | ||
@@ -596,41 +707,36 @@ static int blkif_queue_rw_req(struct request *req) | |||
596 | */ | 707 | */ |
597 | max_grefs += INDIRECT_GREFS(max_grefs); | 708 | max_grefs += INDIRECT_GREFS(max_grefs); |
598 | 709 | ||
599 | /* Check if we have enough grants to allocate a requests */ | 710 | /* |
600 | if (info->persistent_gnts_c < max_grefs) { | 711 | * We have to reserve 'max_grefs' grants because persistent |
601 | new_persistent_gnts = 1; | 712 | * grants are shared by all rings. |
602 | if (gnttab_alloc_grant_references( | 713 | */ |
603 | max_grefs - info->persistent_gnts_c, | 714 | if (max_grefs > 0) |
604 | &setup.gref_head) < 0) { | 715 | if (gnttab_alloc_grant_references(max_grefs, &setup.gref_head) < 0) { |
605 | gnttab_request_free_callback( | 716 | gnttab_request_free_callback( |
606 | &info->callback, | 717 | &rinfo->callback, |
607 | blkif_restart_queue_callback, | 718 | blkif_restart_queue_callback, |
608 | info, | 719 | rinfo, |
609 | max_grefs); | 720 | max_grefs); |
610 | return 1; | 721 | return 1; |
611 | } | 722 | } |
612 | } else | ||
613 | new_persistent_gnts = 0; | ||
614 | 723 | ||
615 | /* Fill out a communications ring structure. */ | 724 | /* Fill out a communications ring structure. */ |
616 | ring_req = RING_GET_REQUEST(&info->ring, info->ring.req_prod_pvt); | 725 | id = blkif_ring_get_request(rinfo, req, &ring_req); |
617 | id = get_id_from_freelist(info); | ||
618 | info->shadow[id].request = req; | ||
619 | |||
620 | BUG_ON(info->max_indirect_segments == 0 && | ||
621 | GREFS(req->nr_phys_segments) > BLKIF_MAX_SEGMENTS_PER_REQUEST); | ||
622 | BUG_ON(info->max_indirect_segments && | ||
623 | GREFS(req->nr_phys_segments) > info->max_indirect_segments); | ||
624 | 726 | ||
625 | num_sg = blk_rq_map_sg(req->q, req, info->shadow[id].sg); | 727 | num_sg = blk_rq_map_sg(req->q, req, rinfo->shadow[id].sg); |
626 | num_grant = 0; | 728 | num_grant = 0; |
627 | /* Calculate the number of grant used */ | 729 | /* Calculate the number of grant used */ |
628 | for_each_sg(info->shadow[id].sg, sg, num_sg, i) | 730 | for_each_sg(rinfo->shadow[id].sg, sg, num_sg, i) |
629 | num_grant += gnttab_count_grant(sg->offset, sg->length); | 731 | num_grant += gnttab_count_grant(sg->offset, sg->length); |
630 | 732 | ||
631 | ring_req->u.rw.id = id; | 733 | require_extra_req = info->max_indirect_segments == 0 && |
632 | info->shadow[id].num_sg = num_sg; | 734 | num_grant > BLKIF_MAX_SEGMENTS_PER_REQUEST; |
633 | if (num_grant > BLKIF_MAX_SEGMENTS_PER_REQUEST) { | 735 | BUG_ON(!HAS_EXTRA_REQ && require_extra_req); |
736 | |||
737 | rinfo->shadow[id].num_sg = num_sg; | ||
738 | if (num_grant > BLKIF_MAX_SEGMENTS_PER_REQUEST && | ||
739 | likely(!require_extra_req)) { | ||
634 | /* | 740 | /* |
635 | * The indirect operation can only be a BLKIF_OP_READ or | 741 | * The indirect operation can only be a BLKIF_OP_READ or |
636 | * BLKIF_OP_WRITE | 742 | * BLKIF_OP_WRITE |
@@ -670,11 +776,31 @@ static int blkif_queue_rw_req(struct request *req) | |||
670 | } | 776 | } |
671 | } | 777 | } |
672 | ring_req->u.rw.nr_segments = num_grant; | 778 | ring_req->u.rw.nr_segments = num_grant; |
779 | if (unlikely(require_extra_req)) { | ||
780 | extra_id = blkif_ring_get_request(rinfo, req, | ||
781 | &extra_ring_req); | ||
782 | /* | ||
783 | * Only the first request contains the scatter-gather | ||
784 | * list. | ||
785 | */ | ||
786 | rinfo->shadow[extra_id].num_sg = 0; | ||
787 | |||
788 | blkif_setup_extra_req(ring_req, extra_ring_req); | ||
789 | |||
790 | /* Link the 2 requests together */ | ||
791 | rinfo->shadow[extra_id].associated_id = id; | ||
792 | rinfo->shadow[id].associated_id = extra_id; | ||
793 | } | ||
673 | } | 794 | } |
674 | 795 | ||
675 | setup.ring_req = ring_req; | 796 | setup.ring_req = ring_req; |
676 | setup.id = id; | 797 | setup.id = id; |
677 | for_each_sg(info->shadow[id].sg, sg, num_sg, i) { | 798 | |
799 | setup.require_extra_req = require_extra_req; | ||
800 | if (unlikely(require_extra_req)) | ||
801 | setup.extra_ring_req = extra_ring_req; | ||
802 | |||
803 | for_each_sg(rinfo->shadow[id].sg, sg, num_sg, i) { | ||
678 | BUG_ON(sg->offset + sg->length > PAGE_SIZE); | 804 | BUG_ON(sg->offset + sg->length > PAGE_SIZE); |
679 | 805 | ||
680 | if (setup.need_copy) { | 806 | if (setup.need_copy) { |
@@ -694,12 +820,12 @@ static int blkif_queue_rw_req(struct request *req) | |||
694 | if (setup.segments) | 820 | if (setup.segments) |
695 | kunmap_atomic(setup.segments); | 821 | kunmap_atomic(setup.segments); |
696 | 822 | ||
697 | info->ring.req_prod_pvt++; | ||
698 | |||
699 | /* Keep a private copy so we can reissue requests when recovering. */ | 823 | /* Keep a private copy so we can reissue requests when recovering. */ |
700 | info->shadow[id].req = *ring_req; | 824 | rinfo->shadow[id].req = *ring_req; |
825 | if (unlikely(require_extra_req)) | ||
826 | rinfo->shadow[extra_id].req = *extra_ring_req; | ||
701 | 827 | ||
702 | if (new_persistent_gnts) | 828 | if (max_grefs > 0) |
703 | gnttab_free_grant_references(setup.gref_head); | 829 | gnttab_free_grant_references(setup.gref_head); |
704 | 830 | ||
705 | return 0; | 831 | return 0; |
@@ -711,27 +837,25 @@ static int blkif_queue_rw_req(struct request *req) | |||
711 | * | 837 | * |
712 | * @req: a request struct | 838 | * @req: a request struct |
713 | */ | 839 | */ |
714 | static int blkif_queue_request(struct request *req) | 840 | static int blkif_queue_request(struct request *req, struct blkfront_ring_info *rinfo) |
715 | { | 841 | { |
716 | struct blkfront_info *info = req->rq_disk->private_data; | 842 | if (unlikely(rinfo->dev_info->connected != BLKIF_STATE_CONNECTED)) |
717 | |||
718 | if (unlikely(info->connected != BLKIF_STATE_CONNECTED)) | ||
719 | return 1; | 843 | return 1; |
720 | 844 | ||
721 | if (unlikely(req->cmd_flags & (REQ_DISCARD | REQ_SECURE))) | 845 | if (unlikely(req->cmd_flags & (REQ_DISCARD | REQ_SECURE))) |
722 | return blkif_queue_discard_req(req); | 846 | return blkif_queue_discard_req(req, rinfo); |
723 | else | 847 | else |
724 | return blkif_queue_rw_req(req); | 848 | return blkif_queue_rw_req(req, rinfo); |
725 | } | 849 | } |
726 | 850 | ||
727 | static inline void flush_requests(struct blkfront_info *info) | 851 | static inline void flush_requests(struct blkfront_ring_info *rinfo) |
728 | { | 852 | { |
729 | int notify; | 853 | int notify; |
730 | 854 | ||
731 | RING_PUSH_REQUESTS_AND_CHECK_NOTIFY(&info->ring, notify); | 855 | RING_PUSH_REQUESTS_AND_CHECK_NOTIFY(&rinfo->ring, notify); |
732 | 856 | ||
733 | if (notify) | 857 | if (notify) |
734 | notify_remote_via_irq(info->irq); | 858 | notify_remote_via_irq(rinfo->irq); |
735 | } | 859 | } |
736 | 860 | ||
737 | static inline bool blkif_request_flush_invalid(struct request *req, | 861 | static inline bool blkif_request_flush_invalid(struct request *req, |
@@ -745,38 +869,50 @@ static inline bool blkif_request_flush_invalid(struct request *req, | |||
745 | } | 869 | } |
746 | 870 | ||
747 | static int blkif_queue_rq(struct blk_mq_hw_ctx *hctx, | 871 | static int blkif_queue_rq(struct blk_mq_hw_ctx *hctx, |
748 | const struct blk_mq_queue_data *qd) | 872 | const struct blk_mq_queue_data *qd) |
749 | { | 873 | { |
750 | struct blkfront_info *info = qd->rq->rq_disk->private_data; | 874 | unsigned long flags; |
875 | struct blkfront_ring_info *rinfo = (struct blkfront_ring_info *)hctx->driver_data; | ||
751 | 876 | ||
752 | blk_mq_start_request(qd->rq); | 877 | blk_mq_start_request(qd->rq); |
753 | spin_lock_irq(&info->io_lock); | 878 | spin_lock_irqsave(&rinfo->ring_lock, flags); |
754 | if (RING_FULL(&info->ring)) | 879 | if (RING_FULL(&rinfo->ring)) |
755 | goto out_busy; | 880 | goto out_busy; |
756 | 881 | ||
757 | if (blkif_request_flush_invalid(qd->rq, info)) | 882 | if (blkif_request_flush_invalid(qd->rq, rinfo->dev_info)) |
758 | goto out_err; | 883 | goto out_err; |
759 | 884 | ||
760 | if (blkif_queue_request(qd->rq)) | 885 | if (blkif_queue_request(qd->rq, rinfo)) |
761 | goto out_busy; | 886 | goto out_busy; |
762 | 887 | ||
763 | flush_requests(info); | 888 | flush_requests(rinfo); |
764 | spin_unlock_irq(&info->io_lock); | 889 | spin_unlock_irqrestore(&rinfo->ring_lock, flags); |
765 | return BLK_MQ_RQ_QUEUE_OK; | 890 | return BLK_MQ_RQ_QUEUE_OK; |
766 | 891 | ||
767 | out_err: | 892 | out_err: |
768 | spin_unlock_irq(&info->io_lock); | 893 | spin_unlock_irqrestore(&rinfo->ring_lock, flags); |
769 | return BLK_MQ_RQ_QUEUE_ERROR; | 894 | return BLK_MQ_RQ_QUEUE_ERROR; |
770 | 895 | ||
771 | out_busy: | 896 | out_busy: |
772 | spin_unlock_irq(&info->io_lock); | 897 | spin_unlock_irqrestore(&rinfo->ring_lock, flags); |
773 | blk_mq_stop_hw_queue(hctx); | 898 | blk_mq_stop_hw_queue(hctx); |
774 | return BLK_MQ_RQ_QUEUE_BUSY; | 899 | return BLK_MQ_RQ_QUEUE_BUSY; |
775 | } | 900 | } |
776 | 901 | ||
902 | static int blk_mq_init_hctx(struct blk_mq_hw_ctx *hctx, void *data, | ||
903 | unsigned int index) | ||
904 | { | ||
905 | struct blkfront_info *info = (struct blkfront_info *)data; | ||
906 | |||
907 | BUG_ON(info->nr_rings <= index); | ||
908 | hctx->driver_data = &info->rinfo[index]; | ||
909 | return 0; | ||
910 | } | ||
911 | |||
777 | static struct blk_mq_ops blkfront_mq_ops = { | 912 | static struct blk_mq_ops blkfront_mq_ops = { |
778 | .queue_rq = blkif_queue_rq, | 913 | .queue_rq = blkif_queue_rq, |
779 | .map_queue = blk_mq_map_queue, | 914 | .map_queue = blk_mq_map_queue, |
915 | .init_hctx = blk_mq_init_hctx, | ||
780 | }; | 916 | }; |
781 | 917 | ||
782 | static int xlvbd_init_blk_queue(struct gendisk *gd, u16 sector_size, | 918 | static int xlvbd_init_blk_queue(struct gendisk *gd, u16 sector_size, |
@@ -788,19 +924,28 @@ static int xlvbd_init_blk_queue(struct gendisk *gd, u16 sector_size, | |||
788 | 924 | ||
789 | memset(&info->tag_set, 0, sizeof(info->tag_set)); | 925 | memset(&info->tag_set, 0, sizeof(info->tag_set)); |
790 | info->tag_set.ops = &blkfront_mq_ops; | 926 | info->tag_set.ops = &blkfront_mq_ops; |
791 | info->tag_set.nr_hw_queues = 1; | 927 | info->tag_set.nr_hw_queues = info->nr_rings; |
792 | info->tag_set.queue_depth = BLK_RING_SIZE(info); | 928 | if (HAS_EXTRA_REQ && info->max_indirect_segments == 0) { |
929 | /* | ||
930 | * When indirect descriptior is not supported, the I/O request | ||
931 | * will be split between multiple request in the ring. | ||
932 | * To avoid problems when sending the request, divide by | ||
933 | * 2 the depth of the queue. | ||
934 | */ | ||
935 | info->tag_set.queue_depth = BLK_RING_SIZE(info) / 2; | ||
936 | } else | ||
937 | info->tag_set.queue_depth = BLK_RING_SIZE(info); | ||
793 | info->tag_set.numa_node = NUMA_NO_NODE; | 938 | info->tag_set.numa_node = NUMA_NO_NODE; |
794 | info->tag_set.flags = BLK_MQ_F_SHOULD_MERGE | BLK_MQ_F_SG_MERGE; | 939 | info->tag_set.flags = BLK_MQ_F_SHOULD_MERGE | BLK_MQ_F_SG_MERGE; |
795 | info->tag_set.cmd_size = 0; | 940 | info->tag_set.cmd_size = 0; |
796 | info->tag_set.driver_data = info; | 941 | info->tag_set.driver_data = info; |
797 | 942 | ||
798 | if (blk_mq_alloc_tag_set(&info->tag_set)) | 943 | if (blk_mq_alloc_tag_set(&info->tag_set)) |
799 | return -1; | 944 | return -EINVAL; |
800 | rq = blk_mq_init_queue(&info->tag_set); | 945 | rq = blk_mq_init_queue(&info->tag_set); |
801 | if (IS_ERR(rq)) { | 946 | if (IS_ERR(rq)) { |
802 | blk_mq_free_tag_set(&info->tag_set); | 947 | blk_mq_free_tag_set(&info->tag_set); |
803 | return -1; | 948 | return PTR_ERR(rq); |
804 | } | 949 | } |
805 | 950 | ||
806 | queue_flag_set_unlocked(QUEUE_FLAG_VIRT, rq); | 951 | queue_flag_set_unlocked(QUEUE_FLAG_VIRT, rq); |
@@ -1028,7 +1173,7 @@ static int xlvbd_alloc_gendisk(blkif_sector_t capacity, | |||
1028 | 1173 | ||
1029 | static void xlvbd_release_gendisk(struct blkfront_info *info) | 1174 | static void xlvbd_release_gendisk(struct blkfront_info *info) |
1030 | { | 1175 | { |
1031 | unsigned int minor, nr_minors; | 1176 | unsigned int minor, nr_minors, i; |
1032 | 1177 | ||
1033 | if (info->rq == NULL) | 1178 | if (info->rq == NULL) |
1034 | return; | 1179 | return; |
@@ -1036,11 +1181,15 @@ static void xlvbd_release_gendisk(struct blkfront_info *info) | |||
1036 | /* No more blkif_request(). */ | 1181 | /* No more blkif_request(). */ |
1037 | blk_mq_stop_hw_queues(info->rq); | 1182 | blk_mq_stop_hw_queues(info->rq); |
1038 | 1183 | ||
1039 | /* No more gnttab callback work. */ | 1184 | for (i = 0; i < info->nr_rings; i++) { |
1040 | gnttab_cancel_free_callback(&info->callback); | 1185 | struct blkfront_ring_info *rinfo = &info->rinfo[i]; |
1041 | 1186 | ||
1042 | /* Flush gnttab callback work. Must be done with no locks held. */ | 1187 | /* No more gnttab callback work. */ |
1043 | flush_work(&info->work); | 1188 | gnttab_cancel_free_callback(&rinfo->callback); |
1189 | |||
1190 | /* Flush gnttab callback work. Must be done with no locks held. */ | ||
1191 | flush_work(&rinfo->work); | ||
1192 | } | ||
1044 | 1193 | ||
1045 | del_gendisk(info->gd); | 1194 | del_gendisk(info->gd); |
1046 | 1195 | ||
@@ -1056,88 +1205,87 @@ static void xlvbd_release_gendisk(struct blkfront_info *info) | |||
1056 | info->gd = NULL; | 1205 | info->gd = NULL; |
1057 | } | 1206 | } |
1058 | 1207 | ||
1059 | /* Must be called with io_lock holded */ | 1208 | /* Already hold rinfo->ring_lock. */ |
1060 | static void kick_pending_request_queues(struct blkfront_info *info) | 1209 | static inline void kick_pending_request_queues_locked(struct blkfront_ring_info *rinfo) |
1061 | { | 1210 | { |
1062 | if (!RING_FULL(&info->ring)) | 1211 | if (!RING_FULL(&rinfo->ring)) |
1063 | blk_mq_start_stopped_hw_queues(info->rq, true); | 1212 | blk_mq_start_stopped_hw_queues(rinfo->dev_info->rq, true); |
1064 | } | 1213 | } |
1065 | 1214 | ||
1066 | static void blkif_restart_queue(struct work_struct *work) | 1215 | static void kick_pending_request_queues(struct blkfront_ring_info *rinfo) |
1067 | { | 1216 | { |
1068 | struct blkfront_info *info = container_of(work, struct blkfront_info, work); | 1217 | unsigned long flags; |
1069 | 1218 | ||
1070 | spin_lock_irq(&info->io_lock); | 1219 | spin_lock_irqsave(&rinfo->ring_lock, flags); |
1071 | if (info->connected == BLKIF_STATE_CONNECTED) | 1220 | kick_pending_request_queues_locked(rinfo); |
1072 | kick_pending_request_queues(info); | 1221 | spin_unlock_irqrestore(&rinfo->ring_lock, flags); |
1073 | spin_unlock_irq(&info->io_lock); | ||
1074 | } | 1222 | } |
1075 | 1223 | ||
1076 | static void blkif_free(struct blkfront_info *info, int suspend) | 1224 | static void blkif_restart_queue(struct work_struct *work) |
1077 | { | 1225 | { |
1078 | struct grant *persistent_gnt; | 1226 | struct blkfront_ring_info *rinfo = container_of(work, struct blkfront_ring_info, work); |
1079 | struct grant *n; | ||
1080 | int i, j, segs; | ||
1081 | 1227 | ||
1082 | /* Prevent new requests being issued until we fix things up. */ | 1228 | if (rinfo->dev_info->connected == BLKIF_STATE_CONNECTED) |
1083 | spin_lock_irq(&info->io_lock); | 1229 | kick_pending_request_queues(rinfo); |
1084 | info->connected = suspend ? | 1230 | } |
1085 | BLKIF_STATE_SUSPENDED : BLKIF_STATE_DISCONNECTED; | ||
1086 | /* No more blkif_request(). */ | ||
1087 | if (info->rq) | ||
1088 | blk_mq_stop_hw_queues(info->rq); | ||
1089 | 1231 | ||
1090 | /* Remove all persistent grants */ | 1232 | static void blkif_free_ring(struct blkfront_ring_info *rinfo) |
1091 | if (!list_empty(&info->grants)) { | 1233 | { |
1092 | list_for_each_entry_safe(persistent_gnt, n, | 1234 | struct grant *persistent_gnt, *n; |
1093 | &info->grants, node) { | 1235 | struct blkfront_info *info = rinfo->dev_info; |
1094 | list_del(&persistent_gnt->node); | 1236 | int i, j, segs; |
1095 | if (persistent_gnt->gref != GRANT_INVALID_REF) { | ||
1096 | gnttab_end_foreign_access(persistent_gnt->gref, | ||
1097 | 0, 0UL); | ||
1098 | info->persistent_gnts_c--; | ||
1099 | } | ||
1100 | if (info->feature_persistent) | ||
1101 | __free_page(persistent_gnt->page); | ||
1102 | kfree(persistent_gnt); | ||
1103 | } | ||
1104 | } | ||
1105 | BUG_ON(info->persistent_gnts_c != 0); | ||
1106 | 1237 | ||
1107 | /* | 1238 | /* |
1108 | * Remove indirect pages, this only happens when using indirect | 1239 | * Remove indirect pages, this only happens when using indirect |
1109 | * descriptors but not persistent grants | 1240 | * descriptors but not persistent grants |
1110 | */ | 1241 | */ |
1111 | if (!list_empty(&info->indirect_pages)) { | 1242 | if (!list_empty(&rinfo->indirect_pages)) { |
1112 | struct page *indirect_page, *n; | 1243 | struct page *indirect_page, *n; |
1113 | 1244 | ||
1114 | BUG_ON(info->feature_persistent); | 1245 | BUG_ON(info->feature_persistent); |
1115 | list_for_each_entry_safe(indirect_page, n, &info->indirect_pages, lru) { | 1246 | list_for_each_entry_safe(indirect_page, n, &rinfo->indirect_pages, lru) { |
1116 | list_del(&indirect_page->lru); | 1247 | list_del(&indirect_page->lru); |
1117 | __free_page(indirect_page); | 1248 | __free_page(indirect_page); |
1118 | } | 1249 | } |
1119 | } | 1250 | } |
1120 | 1251 | ||
1252 | /* Remove all persistent grants. */ | ||
1253 | if (!list_empty(&rinfo->grants)) { | ||
1254 | list_for_each_entry_safe(persistent_gnt, n, | ||
1255 | &rinfo->grants, node) { | ||
1256 | list_del(&persistent_gnt->node); | ||
1257 | if (persistent_gnt->gref != GRANT_INVALID_REF) { | ||
1258 | gnttab_end_foreign_access(persistent_gnt->gref, | ||
1259 | 0, 0UL); | ||
1260 | rinfo->persistent_gnts_c--; | ||
1261 | } | ||
1262 | if (info->feature_persistent) | ||
1263 | __free_page(persistent_gnt->page); | ||
1264 | kfree(persistent_gnt); | ||
1265 | } | ||
1266 | } | ||
1267 | BUG_ON(rinfo->persistent_gnts_c != 0); | ||
1268 | |||
1121 | for (i = 0; i < BLK_RING_SIZE(info); i++) { | 1269 | for (i = 0; i < BLK_RING_SIZE(info); i++) { |
1122 | /* | 1270 | /* |
1123 | * Clear persistent grants present in requests already | 1271 | * Clear persistent grants present in requests already |
1124 | * on the shared ring | 1272 | * on the shared ring |
1125 | */ | 1273 | */ |
1126 | if (!info->shadow[i].request) | 1274 | if (!rinfo->shadow[i].request) |
1127 | goto free_shadow; | 1275 | goto free_shadow; |
1128 | 1276 | ||
1129 | segs = info->shadow[i].req.operation == BLKIF_OP_INDIRECT ? | 1277 | segs = rinfo->shadow[i].req.operation == BLKIF_OP_INDIRECT ? |
1130 | info->shadow[i].req.u.indirect.nr_segments : | 1278 | rinfo->shadow[i].req.u.indirect.nr_segments : |
1131 | info->shadow[i].req.u.rw.nr_segments; | 1279 | rinfo->shadow[i].req.u.rw.nr_segments; |
1132 | for (j = 0; j < segs; j++) { | 1280 | for (j = 0; j < segs; j++) { |
1133 | persistent_gnt = info->shadow[i].grants_used[j]; | 1281 | persistent_gnt = rinfo->shadow[i].grants_used[j]; |
1134 | gnttab_end_foreign_access(persistent_gnt->gref, 0, 0UL); | 1282 | gnttab_end_foreign_access(persistent_gnt->gref, 0, 0UL); |
1135 | if (info->feature_persistent) | 1283 | if (info->feature_persistent) |
1136 | __free_page(persistent_gnt->page); | 1284 | __free_page(persistent_gnt->page); |
1137 | kfree(persistent_gnt); | 1285 | kfree(persistent_gnt); |
1138 | } | 1286 | } |
1139 | 1287 | ||
1140 | if (info->shadow[i].req.operation != BLKIF_OP_INDIRECT) | 1288 | if (rinfo->shadow[i].req.operation != BLKIF_OP_INDIRECT) |
1141 | /* | 1289 | /* |
1142 | * If this is not an indirect operation don't try to | 1290 | * If this is not an indirect operation don't try to |
1143 | * free indirect segments | 1291 | * free indirect segments |
@@ -1145,42 +1293,59 @@ static void blkif_free(struct blkfront_info *info, int suspend) | |||
1145 | goto free_shadow; | 1293 | goto free_shadow; |
1146 | 1294 | ||
1147 | for (j = 0; j < INDIRECT_GREFS(segs); j++) { | 1295 | for (j = 0; j < INDIRECT_GREFS(segs); j++) { |
1148 | persistent_gnt = info->shadow[i].indirect_grants[j]; | 1296 | persistent_gnt = rinfo->shadow[i].indirect_grants[j]; |
1149 | gnttab_end_foreign_access(persistent_gnt->gref, 0, 0UL); | 1297 | gnttab_end_foreign_access(persistent_gnt->gref, 0, 0UL); |
1150 | __free_page(persistent_gnt->page); | 1298 | __free_page(persistent_gnt->page); |
1151 | kfree(persistent_gnt); | 1299 | kfree(persistent_gnt); |
1152 | } | 1300 | } |
1153 | 1301 | ||
1154 | free_shadow: | 1302 | free_shadow: |
1155 | kfree(info->shadow[i].grants_used); | 1303 | kfree(rinfo->shadow[i].grants_used); |
1156 | info->shadow[i].grants_used = NULL; | 1304 | rinfo->shadow[i].grants_used = NULL; |
1157 | kfree(info->shadow[i].indirect_grants); | 1305 | kfree(rinfo->shadow[i].indirect_grants); |
1158 | info->shadow[i].indirect_grants = NULL; | 1306 | rinfo->shadow[i].indirect_grants = NULL; |
1159 | kfree(info->shadow[i].sg); | 1307 | kfree(rinfo->shadow[i].sg); |
1160 | info->shadow[i].sg = NULL; | 1308 | rinfo->shadow[i].sg = NULL; |
1161 | } | 1309 | } |
1162 | 1310 | ||
1163 | /* No more gnttab callback work. */ | 1311 | /* No more gnttab callback work. */ |
1164 | gnttab_cancel_free_callback(&info->callback); | 1312 | gnttab_cancel_free_callback(&rinfo->callback); |
1165 | spin_unlock_irq(&info->io_lock); | ||
1166 | 1313 | ||
1167 | /* Flush gnttab callback work. Must be done with no locks held. */ | 1314 | /* Flush gnttab callback work. Must be done with no locks held. */ |
1168 | flush_work(&info->work); | 1315 | flush_work(&rinfo->work); |
1169 | 1316 | ||
1170 | /* Free resources associated with old device channel. */ | 1317 | /* Free resources associated with old device channel. */ |
1171 | for (i = 0; i < info->nr_ring_pages; i++) { | 1318 | for (i = 0; i < info->nr_ring_pages; i++) { |
1172 | if (info->ring_ref[i] != GRANT_INVALID_REF) { | 1319 | if (rinfo->ring_ref[i] != GRANT_INVALID_REF) { |
1173 | gnttab_end_foreign_access(info->ring_ref[i], 0, 0); | 1320 | gnttab_end_foreign_access(rinfo->ring_ref[i], 0, 0); |
1174 | info->ring_ref[i] = GRANT_INVALID_REF; | 1321 | rinfo->ring_ref[i] = GRANT_INVALID_REF; |
1175 | } | 1322 | } |
1176 | } | 1323 | } |
1177 | free_pages((unsigned long)info->ring.sring, get_order(info->nr_ring_pages * PAGE_SIZE)); | 1324 | free_pages((unsigned long)rinfo->ring.sring, get_order(info->nr_ring_pages * PAGE_SIZE)); |
1178 | info->ring.sring = NULL; | 1325 | rinfo->ring.sring = NULL; |
1179 | 1326 | ||
1180 | if (info->irq) | 1327 | if (rinfo->irq) |
1181 | unbind_from_irqhandler(info->irq, info); | 1328 | unbind_from_irqhandler(rinfo->irq, rinfo); |
1182 | info->evtchn = info->irq = 0; | 1329 | rinfo->evtchn = rinfo->irq = 0; |
1330 | } | ||
1183 | 1331 | ||
1332 | static void blkif_free(struct blkfront_info *info, int suspend) | ||
1333 | { | ||
1334 | unsigned int i; | ||
1335 | |||
1336 | /* Prevent new requests being issued until we fix things up. */ | ||
1337 | info->connected = suspend ? | ||
1338 | BLKIF_STATE_SUSPENDED : BLKIF_STATE_DISCONNECTED; | ||
1339 | /* No more blkif_request(). */ | ||
1340 | if (info->rq) | ||
1341 | blk_mq_stop_hw_queues(info->rq); | ||
1342 | |||
1343 | for (i = 0; i < info->nr_rings; i++) | ||
1344 | blkif_free_ring(&info->rinfo[i]); | ||
1345 | |||
1346 | kfree(info->rinfo); | ||
1347 | info->rinfo = NULL; | ||
1348 | info->nr_rings = 0; | ||
1184 | } | 1349 | } |
1185 | 1350 | ||
1186 | struct copy_from_grant { | 1351 | struct copy_from_grant { |
@@ -1209,19 +1374,93 @@ static void blkif_copy_from_grant(unsigned long gfn, unsigned int offset, | |||
1209 | kunmap_atomic(shared_data); | 1374 | kunmap_atomic(shared_data); |
1210 | } | 1375 | } |
1211 | 1376 | ||
1212 | static void blkif_completion(struct blk_shadow *s, struct blkfront_info *info, | 1377 | static enum blk_req_status blkif_rsp_to_req_status(int rsp) |
1378 | { | ||
1379 | switch (rsp) | ||
1380 | { | ||
1381 | case BLKIF_RSP_OKAY: | ||
1382 | return REQ_DONE; | ||
1383 | case BLKIF_RSP_EOPNOTSUPP: | ||
1384 | return REQ_EOPNOTSUPP; | ||
1385 | case BLKIF_RSP_ERROR: | ||
1386 | /* Fallthrough. */ | ||
1387 | default: | ||
1388 | return REQ_ERROR; | ||
1389 | } | ||
1390 | } | ||
1391 | |||
1392 | /* | ||
1393 | * Get the final status of the block request based on two ring response | ||
1394 | */ | ||
1395 | static int blkif_get_final_status(enum blk_req_status s1, | ||
1396 | enum blk_req_status s2) | ||
1397 | { | ||
1398 | BUG_ON(s1 == REQ_WAITING); | ||
1399 | BUG_ON(s2 == REQ_WAITING); | ||
1400 | |||
1401 | if (s1 == REQ_ERROR || s2 == REQ_ERROR) | ||
1402 | return BLKIF_RSP_ERROR; | ||
1403 | else if (s1 == REQ_EOPNOTSUPP || s2 == REQ_EOPNOTSUPP) | ||
1404 | return BLKIF_RSP_EOPNOTSUPP; | ||
1405 | return BLKIF_RSP_OKAY; | ||
1406 | } | ||
1407 | |||
1408 | static bool blkif_completion(unsigned long *id, | ||
1409 | struct blkfront_ring_info *rinfo, | ||
1213 | struct blkif_response *bret) | 1410 | struct blkif_response *bret) |
1214 | { | 1411 | { |
1215 | int i = 0; | 1412 | int i = 0; |
1216 | struct scatterlist *sg; | 1413 | struct scatterlist *sg; |
1217 | int num_sg, num_grant; | 1414 | int num_sg, num_grant; |
1415 | struct blkfront_info *info = rinfo->dev_info; | ||
1416 | struct blk_shadow *s = &rinfo->shadow[*id]; | ||
1218 | struct copy_from_grant data = { | 1417 | struct copy_from_grant data = { |
1219 | .s = s, | ||
1220 | .grant_idx = 0, | 1418 | .grant_idx = 0, |
1221 | }; | 1419 | }; |
1222 | 1420 | ||
1223 | num_grant = s->req.operation == BLKIF_OP_INDIRECT ? | 1421 | num_grant = s->req.operation == BLKIF_OP_INDIRECT ? |
1224 | s->req.u.indirect.nr_segments : s->req.u.rw.nr_segments; | 1422 | s->req.u.indirect.nr_segments : s->req.u.rw.nr_segments; |
1423 | |||
1424 | /* The I/O request may be split in two. */ | ||
1425 | if (unlikely(s->associated_id != NO_ASSOCIATED_ID)) { | ||
1426 | struct blk_shadow *s2 = &rinfo->shadow[s->associated_id]; | ||
1427 | |||
1428 | /* Keep the status of the current response in shadow. */ | ||
1429 | s->status = blkif_rsp_to_req_status(bret->status); | ||
1430 | |||
1431 | /* Wait the second response if not yet here. */ | ||
1432 | if (s2->status == REQ_WAITING) | ||
1433 | return 0; | ||
1434 | |||
1435 | bret->status = blkif_get_final_status(s->status, | ||
1436 | s2->status); | ||
1437 | |||
1438 | /* | ||
1439 | * All the grants is stored in the first shadow in order | ||
1440 | * to make the completion code simpler. | ||
1441 | */ | ||
1442 | num_grant += s2->req.u.rw.nr_segments; | ||
1443 | |||
1444 | /* | ||
1445 | * The two responses may not come in order. Only the | ||
1446 | * first request will store the scatter-gather list. | ||
1447 | */ | ||
1448 | if (s2->num_sg != 0) { | ||
1449 | /* Update "id" with the ID of the first response. */ | ||
1450 | *id = s->associated_id; | ||
1451 | s = s2; | ||
1452 | } | ||
1453 | |||
1454 | /* | ||
1455 | * We don't need anymore the second request, so recycling | ||
1456 | * it now. | ||
1457 | */ | ||
1458 | if (add_id_to_freelist(rinfo, s->associated_id)) | ||
1459 | WARN(1, "%s: can't recycle the second part (id = %ld) of the request\n", | ||
1460 | info->gd->disk_name, s->associated_id); | ||
1461 | } | ||
1462 | |||
1463 | data.s = s; | ||
1225 | num_sg = s->num_sg; | 1464 | num_sg = s->num_sg; |
1226 | 1465 | ||
1227 | if (bret->operation == BLKIF_OP_READ && info->feature_persistent) { | 1466 | if (bret->operation == BLKIF_OP_READ && info->feature_persistent) { |
@@ -1252,8 +1491,8 @@ static void blkif_completion(struct blk_shadow *s, struct blkfront_info *info, | |||
1252 | if (!info->feature_persistent) | 1491 | if (!info->feature_persistent) |
1253 | pr_alert_ratelimited("backed has not unmapped grant: %u\n", | 1492 | pr_alert_ratelimited("backed has not unmapped grant: %u\n", |
1254 | s->grants_used[i]->gref); | 1493 | s->grants_used[i]->gref); |
1255 | list_add(&s->grants_used[i]->node, &info->grants); | 1494 | list_add(&s->grants_used[i]->node, &rinfo->grants); |
1256 | info->persistent_gnts_c++; | 1495 | rinfo->persistent_gnts_c++; |
1257 | } else { | 1496 | } else { |
1258 | /* | 1497 | /* |
1259 | * If the grant is not mapped by the backend we end the | 1498 | * If the grant is not mapped by the backend we end the |
@@ -1263,7 +1502,7 @@ static void blkif_completion(struct blk_shadow *s, struct blkfront_info *info, | |||
1263 | */ | 1502 | */ |
1264 | gnttab_end_foreign_access(s->grants_used[i]->gref, 0, 0UL); | 1503 | gnttab_end_foreign_access(s->grants_used[i]->gref, 0, 0UL); |
1265 | s->grants_used[i]->gref = GRANT_INVALID_REF; | 1504 | s->grants_used[i]->gref = GRANT_INVALID_REF; |
1266 | list_add_tail(&s->grants_used[i]->node, &info->grants); | 1505 | list_add_tail(&s->grants_used[i]->node, &rinfo->grants); |
1267 | } | 1506 | } |
1268 | } | 1507 | } |
1269 | if (s->req.operation == BLKIF_OP_INDIRECT) { | 1508 | if (s->req.operation == BLKIF_OP_INDIRECT) { |
@@ -1272,8 +1511,8 @@ static void blkif_completion(struct blk_shadow *s, struct blkfront_info *info, | |||
1272 | if (!info->feature_persistent) | 1511 | if (!info->feature_persistent) |
1273 | pr_alert_ratelimited("backed has not unmapped grant: %u\n", | 1512 | pr_alert_ratelimited("backed has not unmapped grant: %u\n", |
1274 | s->indirect_grants[i]->gref); | 1513 | s->indirect_grants[i]->gref); |
1275 | list_add(&s->indirect_grants[i]->node, &info->grants); | 1514 | list_add(&s->indirect_grants[i]->node, &rinfo->grants); |
1276 | info->persistent_gnts_c++; | 1515 | rinfo->persistent_gnts_c++; |
1277 | } else { | 1516 | } else { |
1278 | struct page *indirect_page; | 1517 | struct page *indirect_page; |
1279 | 1518 | ||
@@ -1284,13 +1523,15 @@ static void blkif_completion(struct blk_shadow *s, struct blkfront_info *info, | |||
1284 | */ | 1523 | */ |
1285 | if (!info->feature_persistent) { | 1524 | if (!info->feature_persistent) { |
1286 | indirect_page = s->indirect_grants[i]->page; | 1525 | indirect_page = s->indirect_grants[i]->page; |
1287 | list_add(&indirect_page->lru, &info->indirect_pages); | 1526 | list_add(&indirect_page->lru, &rinfo->indirect_pages); |
1288 | } | 1527 | } |
1289 | s->indirect_grants[i]->gref = GRANT_INVALID_REF; | 1528 | s->indirect_grants[i]->gref = GRANT_INVALID_REF; |
1290 | list_add_tail(&s->indirect_grants[i]->node, &info->grants); | 1529 | list_add_tail(&s->indirect_grants[i]->node, &rinfo->grants); |
1291 | } | 1530 | } |
1292 | } | 1531 | } |
1293 | } | 1532 | } |
1533 | |||
1534 | return 1; | ||
1294 | } | 1535 | } |
1295 | 1536 | ||
1296 | static irqreturn_t blkif_interrupt(int irq, void *dev_id) | 1537 | static irqreturn_t blkif_interrupt(int irq, void *dev_id) |
@@ -1299,24 +1540,22 @@ static irqreturn_t blkif_interrupt(int irq, void *dev_id) | |||
1299 | struct blkif_response *bret; | 1540 | struct blkif_response *bret; |
1300 | RING_IDX i, rp; | 1541 | RING_IDX i, rp; |
1301 | unsigned long flags; | 1542 | unsigned long flags; |
1302 | struct blkfront_info *info = (struct blkfront_info *)dev_id; | 1543 | struct blkfront_ring_info *rinfo = (struct blkfront_ring_info *)dev_id; |
1544 | struct blkfront_info *info = rinfo->dev_info; | ||
1303 | int error; | 1545 | int error; |
1304 | 1546 | ||
1305 | spin_lock_irqsave(&info->io_lock, flags); | 1547 | if (unlikely(info->connected != BLKIF_STATE_CONNECTED)) |
1306 | |||
1307 | if (unlikely(info->connected != BLKIF_STATE_CONNECTED)) { | ||
1308 | spin_unlock_irqrestore(&info->io_lock, flags); | ||
1309 | return IRQ_HANDLED; | 1548 | return IRQ_HANDLED; |
1310 | } | ||
1311 | 1549 | ||
1550 | spin_lock_irqsave(&rinfo->ring_lock, flags); | ||
1312 | again: | 1551 | again: |
1313 | rp = info->ring.sring->rsp_prod; | 1552 | rp = rinfo->ring.sring->rsp_prod; |
1314 | rmb(); /* Ensure we see queued responses up to 'rp'. */ | 1553 | rmb(); /* Ensure we see queued responses up to 'rp'. */ |
1315 | 1554 | ||
1316 | for (i = info->ring.rsp_cons; i != rp; i++) { | 1555 | for (i = rinfo->ring.rsp_cons; i != rp; i++) { |
1317 | unsigned long id; | 1556 | unsigned long id; |
1318 | 1557 | ||
1319 | bret = RING_GET_RESPONSE(&info->ring, i); | 1558 | bret = RING_GET_RESPONSE(&rinfo->ring, i); |
1320 | id = bret->id; | 1559 | id = bret->id; |
1321 | /* | 1560 | /* |
1322 | * The backend has messed up and given us an id that we would | 1561 | * The backend has messed up and given us an id that we would |
@@ -1330,12 +1569,18 @@ static irqreturn_t blkif_interrupt(int irq, void *dev_id) | |||
1330 | * the id is busted. */ | 1569 | * the id is busted. */ |
1331 | continue; | 1570 | continue; |
1332 | } | 1571 | } |
1333 | req = info->shadow[id].request; | 1572 | req = rinfo->shadow[id].request; |
1334 | 1573 | ||
1335 | if (bret->operation != BLKIF_OP_DISCARD) | 1574 | if (bret->operation != BLKIF_OP_DISCARD) { |
1336 | blkif_completion(&info->shadow[id], info, bret); | 1575 | /* |
1576 | * We may need to wait for an extra response if the | ||
1577 | * I/O request is split in 2 | ||
1578 | */ | ||
1579 | if (!blkif_completion(&id, rinfo, bret)) | ||
1580 | continue; | ||
1581 | } | ||
1337 | 1582 | ||
1338 | if (add_id_to_freelist(info, id)) { | 1583 | if (add_id_to_freelist(rinfo, id)) { |
1339 | WARN(1, "%s: response to %s (id %ld) couldn't be recycled!\n", | 1584 | WARN(1, "%s: response to %s (id %ld) couldn't be recycled!\n", |
1340 | info->gd->disk_name, op_name(bret->operation), id); | 1585 | info->gd->disk_name, op_name(bret->operation), id); |
1341 | continue; | 1586 | continue; |
@@ -1364,7 +1609,7 @@ static irqreturn_t blkif_interrupt(int irq, void *dev_id) | |||
1364 | error = -EOPNOTSUPP; | 1609 | error = -EOPNOTSUPP; |
1365 | } | 1610 | } |
1366 | if (unlikely(bret->status == BLKIF_RSP_ERROR && | 1611 | if (unlikely(bret->status == BLKIF_RSP_ERROR && |
1367 | info->shadow[id].req.u.rw.nr_segments == 0)) { | 1612 | rinfo->shadow[id].req.u.rw.nr_segments == 0)) { |
1368 | printk(KERN_WARNING "blkfront: %s: empty %s op failed\n", | 1613 | printk(KERN_WARNING "blkfront: %s: empty %s op failed\n", |
1369 | info->gd->disk_name, op_name(bret->operation)); | 1614 | info->gd->disk_name, op_name(bret->operation)); |
1370 | error = -EOPNOTSUPP; | 1615 | error = -EOPNOTSUPP; |
@@ -1389,34 +1634,35 @@ static irqreturn_t blkif_interrupt(int irq, void *dev_id) | |||
1389 | } | 1634 | } |
1390 | } | 1635 | } |
1391 | 1636 | ||
1392 | info->ring.rsp_cons = i; | 1637 | rinfo->ring.rsp_cons = i; |
1393 | 1638 | ||
1394 | if (i != info->ring.req_prod_pvt) { | 1639 | if (i != rinfo->ring.req_prod_pvt) { |
1395 | int more_to_do; | 1640 | int more_to_do; |
1396 | RING_FINAL_CHECK_FOR_RESPONSES(&info->ring, more_to_do); | 1641 | RING_FINAL_CHECK_FOR_RESPONSES(&rinfo->ring, more_to_do); |
1397 | if (more_to_do) | 1642 | if (more_to_do) |
1398 | goto again; | 1643 | goto again; |
1399 | } else | 1644 | } else |
1400 | info->ring.sring->rsp_event = i + 1; | 1645 | rinfo->ring.sring->rsp_event = i + 1; |
1401 | 1646 | ||
1402 | kick_pending_request_queues(info); | 1647 | kick_pending_request_queues_locked(rinfo); |
1403 | 1648 | ||
1404 | spin_unlock_irqrestore(&info->io_lock, flags); | 1649 | spin_unlock_irqrestore(&rinfo->ring_lock, flags); |
1405 | 1650 | ||
1406 | return IRQ_HANDLED; | 1651 | return IRQ_HANDLED; |
1407 | } | 1652 | } |
1408 | 1653 | ||
1409 | 1654 | ||
1410 | static int setup_blkring(struct xenbus_device *dev, | 1655 | static int setup_blkring(struct xenbus_device *dev, |
1411 | struct blkfront_info *info) | 1656 | struct blkfront_ring_info *rinfo) |
1412 | { | 1657 | { |
1413 | struct blkif_sring *sring; | 1658 | struct blkif_sring *sring; |
1414 | int err, i; | 1659 | int err, i; |
1660 | struct blkfront_info *info = rinfo->dev_info; | ||
1415 | unsigned long ring_size = info->nr_ring_pages * XEN_PAGE_SIZE; | 1661 | unsigned long ring_size = info->nr_ring_pages * XEN_PAGE_SIZE; |
1416 | grant_ref_t gref[XENBUS_MAX_RING_GRANTS]; | 1662 | grant_ref_t gref[XENBUS_MAX_RING_GRANTS]; |
1417 | 1663 | ||
1418 | for (i = 0; i < info->nr_ring_pages; i++) | 1664 | for (i = 0; i < info->nr_ring_pages; i++) |
1419 | info->ring_ref[i] = GRANT_INVALID_REF; | 1665 | rinfo->ring_ref[i] = GRANT_INVALID_REF; |
1420 | 1666 | ||
1421 | sring = (struct blkif_sring *)__get_free_pages(GFP_NOIO | __GFP_HIGH, | 1667 | sring = (struct blkif_sring *)__get_free_pages(GFP_NOIO | __GFP_HIGH, |
1422 | get_order(ring_size)); | 1668 | get_order(ring_size)); |
@@ -1425,29 +1671,29 @@ static int setup_blkring(struct xenbus_device *dev, | |||
1425 | return -ENOMEM; | 1671 | return -ENOMEM; |
1426 | } | 1672 | } |
1427 | SHARED_RING_INIT(sring); | 1673 | SHARED_RING_INIT(sring); |
1428 | FRONT_RING_INIT(&info->ring, sring, ring_size); | 1674 | FRONT_RING_INIT(&rinfo->ring, sring, ring_size); |
1429 | 1675 | ||
1430 | err = xenbus_grant_ring(dev, info->ring.sring, info->nr_ring_pages, gref); | 1676 | err = xenbus_grant_ring(dev, rinfo->ring.sring, info->nr_ring_pages, gref); |
1431 | if (err < 0) { | 1677 | if (err < 0) { |
1432 | free_pages((unsigned long)sring, get_order(ring_size)); | 1678 | free_pages((unsigned long)sring, get_order(ring_size)); |
1433 | info->ring.sring = NULL; | 1679 | rinfo->ring.sring = NULL; |
1434 | goto fail; | 1680 | goto fail; |
1435 | } | 1681 | } |
1436 | for (i = 0; i < info->nr_ring_pages; i++) | 1682 | for (i = 0; i < info->nr_ring_pages; i++) |
1437 | info->ring_ref[i] = gref[i]; | 1683 | rinfo->ring_ref[i] = gref[i]; |
1438 | 1684 | ||
1439 | err = xenbus_alloc_evtchn(dev, &info->evtchn); | 1685 | err = xenbus_alloc_evtchn(dev, &rinfo->evtchn); |
1440 | if (err) | 1686 | if (err) |
1441 | goto fail; | 1687 | goto fail; |
1442 | 1688 | ||
1443 | err = bind_evtchn_to_irqhandler(info->evtchn, blkif_interrupt, 0, | 1689 | err = bind_evtchn_to_irqhandler(rinfo->evtchn, blkif_interrupt, 0, |
1444 | "blkif", info); | 1690 | "blkif", rinfo); |
1445 | if (err <= 0) { | 1691 | if (err <= 0) { |
1446 | xenbus_dev_fatal(dev, err, | 1692 | xenbus_dev_fatal(dev, err, |
1447 | "bind_evtchn_to_irqhandler failed"); | 1693 | "bind_evtchn_to_irqhandler failed"); |
1448 | goto fail; | 1694 | goto fail; |
1449 | } | 1695 | } |
1450 | info->irq = err; | 1696 | rinfo->irq = err; |
1451 | 1697 | ||
1452 | return 0; | 1698 | return 0; |
1453 | fail: | 1699 | fail: |
@@ -1455,6 +1701,53 @@ fail: | |||
1455 | return err; | 1701 | return err; |
1456 | } | 1702 | } |
1457 | 1703 | ||
1704 | /* | ||
1705 | * Write out per-ring/queue nodes including ring-ref and event-channel, and each | ||
1706 | * ring buffer may have multi pages depending on ->nr_ring_pages. | ||
1707 | */ | ||
1708 | static int write_per_ring_nodes(struct xenbus_transaction xbt, | ||
1709 | struct blkfront_ring_info *rinfo, const char *dir) | ||
1710 | { | ||
1711 | int err; | ||
1712 | unsigned int i; | ||
1713 | const char *message = NULL; | ||
1714 | struct blkfront_info *info = rinfo->dev_info; | ||
1715 | |||
1716 | if (info->nr_ring_pages == 1) { | ||
1717 | err = xenbus_printf(xbt, dir, "ring-ref", "%u", rinfo->ring_ref[0]); | ||
1718 | if (err) { | ||
1719 | message = "writing ring-ref"; | ||
1720 | goto abort_transaction; | ||
1721 | } | ||
1722 | } else { | ||
1723 | for (i = 0; i < info->nr_ring_pages; i++) { | ||
1724 | char ring_ref_name[RINGREF_NAME_LEN]; | ||
1725 | |||
1726 | snprintf(ring_ref_name, RINGREF_NAME_LEN, "ring-ref%u", i); | ||
1727 | err = xenbus_printf(xbt, dir, ring_ref_name, | ||
1728 | "%u", rinfo->ring_ref[i]); | ||
1729 | if (err) { | ||
1730 | message = "writing ring-ref"; | ||
1731 | goto abort_transaction; | ||
1732 | } | ||
1733 | } | ||
1734 | } | ||
1735 | |||
1736 | err = xenbus_printf(xbt, dir, "event-channel", "%u", rinfo->evtchn); | ||
1737 | if (err) { | ||
1738 | message = "writing event-channel"; | ||
1739 | goto abort_transaction; | ||
1740 | } | ||
1741 | |||
1742 | return 0; | ||
1743 | |||
1744 | abort_transaction: | ||
1745 | xenbus_transaction_end(xbt, 1); | ||
1746 | if (message) | ||
1747 | xenbus_dev_fatal(info->xbdev, err, "%s", message); | ||
1748 | |||
1749 | return err; | ||
1750 | } | ||
1458 | 1751 | ||
1459 | /* Common code used when first setting up, and when resuming. */ | 1752 | /* Common code used when first setting up, and when resuming. */ |
1460 | static int talk_to_blkback(struct xenbus_device *dev, | 1753 | static int talk_to_blkback(struct xenbus_device *dev, |
@@ -1462,8 +1755,8 @@ static int talk_to_blkback(struct xenbus_device *dev, | |||
1462 | { | 1755 | { |
1463 | const char *message = NULL; | 1756 | const char *message = NULL; |
1464 | struct xenbus_transaction xbt; | 1757 | struct xenbus_transaction xbt; |
1465 | int err, i; | 1758 | int err; |
1466 | unsigned int max_page_order = 0; | 1759 | unsigned int i, max_page_order = 0; |
1467 | unsigned int ring_page_order = 0; | 1760 | unsigned int ring_page_order = 0; |
1468 | 1761 | ||
1469 | err = xenbus_scanf(XBT_NIL, info->xbdev->otherend, | 1762 | err = xenbus_scanf(XBT_NIL, info->xbdev->otherend, |
@@ -1475,10 +1768,14 @@ static int talk_to_blkback(struct xenbus_device *dev, | |||
1475 | info->nr_ring_pages = 1 << ring_page_order; | 1768 | info->nr_ring_pages = 1 << ring_page_order; |
1476 | } | 1769 | } |
1477 | 1770 | ||
1478 | /* Create shared ring, alloc event channel. */ | 1771 | for (i = 0; i < info->nr_rings; i++) { |
1479 | err = setup_blkring(dev, info); | 1772 | struct blkfront_ring_info *rinfo = &info->rinfo[i]; |
1480 | if (err) | 1773 | |
1481 | goto out; | 1774 | /* Create shared ring, alloc event channel. */ |
1775 | err = setup_blkring(dev, rinfo); | ||
1776 | if (err) | ||
1777 | goto destroy_blkring; | ||
1778 | } | ||
1482 | 1779 | ||
1483 | again: | 1780 | again: |
1484 | err = xenbus_transaction_start(&xbt); | 1781 | err = xenbus_transaction_start(&xbt); |
@@ -1487,38 +1784,49 @@ again: | |||
1487 | goto destroy_blkring; | 1784 | goto destroy_blkring; |
1488 | } | 1785 | } |
1489 | 1786 | ||
1490 | if (info->nr_ring_pages == 1) { | 1787 | if (info->nr_ring_pages > 1) { |
1491 | err = xenbus_printf(xbt, dev->nodename, | 1788 | err = xenbus_printf(xbt, dev->nodename, "ring-page-order", "%u", |
1492 | "ring-ref", "%u", info->ring_ref[0]); | 1789 | ring_page_order); |
1493 | if (err) { | 1790 | if (err) { |
1494 | message = "writing ring-ref"; | 1791 | message = "writing ring-page-order"; |
1495 | goto abort_transaction; | 1792 | goto abort_transaction; |
1496 | } | 1793 | } |
1794 | } | ||
1795 | |||
1796 | /* We already got the number of queues/rings in _probe */ | ||
1797 | if (info->nr_rings == 1) { | ||
1798 | err = write_per_ring_nodes(xbt, &info->rinfo[0], dev->nodename); | ||
1799 | if (err) | ||
1800 | goto destroy_blkring; | ||
1497 | } else { | 1801 | } else { |
1498 | err = xenbus_printf(xbt, dev->nodename, | 1802 | char *path; |
1499 | "ring-page-order", "%u", ring_page_order); | 1803 | size_t pathsize; |
1804 | |||
1805 | err = xenbus_printf(xbt, dev->nodename, "multi-queue-num-queues", "%u", | ||
1806 | info->nr_rings); | ||
1500 | if (err) { | 1807 | if (err) { |
1501 | message = "writing ring-page-order"; | 1808 | message = "writing multi-queue-num-queues"; |
1502 | goto abort_transaction; | 1809 | goto abort_transaction; |
1503 | } | 1810 | } |
1504 | 1811 | ||
1505 | for (i = 0; i < info->nr_ring_pages; i++) { | 1812 | pathsize = strlen(dev->nodename) + QUEUE_NAME_LEN; |
1506 | char ring_ref_name[RINGREF_NAME_LEN]; | 1813 | path = kmalloc(pathsize, GFP_KERNEL); |
1814 | if (!path) { | ||
1815 | err = -ENOMEM; | ||
1816 | message = "ENOMEM while writing ring references"; | ||
1817 | goto abort_transaction; | ||
1818 | } | ||
1507 | 1819 | ||
1508 | snprintf(ring_ref_name, RINGREF_NAME_LEN, "ring-ref%u", i); | 1820 | for (i = 0; i < info->nr_rings; i++) { |
1509 | err = xenbus_printf(xbt, dev->nodename, ring_ref_name, | 1821 | memset(path, 0, pathsize); |
1510 | "%u", info->ring_ref[i]); | 1822 | snprintf(path, pathsize, "%s/queue-%u", dev->nodename, i); |
1823 | err = write_per_ring_nodes(xbt, &info->rinfo[i], path); | ||
1511 | if (err) { | 1824 | if (err) { |
1512 | message = "writing ring-ref"; | 1825 | kfree(path); |
1513 | goto abort_transaction; | 1826 | goto destroy_blkring; |
1514 | } | 1827 | } |
1515 | } | 1828 | } |
1516 | } | 1829 | kfree(path); |
1517 | err = xenbus_printf(xbt, dev->nodename, | ||
1518 | "event-channel", "%u", info->evtchn); | ||
1519 | if (err) { | ||
1520 | message = "writing event-channel"; | ||
1521 | goto abort_transaction; | ||
1522 | } | 1830 | } |
1523 | err = xenbus_printf(xbt, dev->nodename, "protocol", "%s", | 1831 | err = xenbus_printf(xbt, dev->nodename, "protocol", "%s", |
1524 | XEN_IO_PROTO_ABI_NATIVE); | 1832 | XEN_IO_PROTO_ABI_NATIVE); |
@@ -1540,9 +1848,14 @@ again: | |||
1540 | goto destroy_blkring; | 1848 | goto destroy_blkring; |
1541 | } | 1849 | } |
1542 | 1850 | ||
1543 | for (i = 0; i < BLK_RING_SIZE(info); i++) | 1851 | for (i = 0; i < info->nr_rings; i++) { |
1544 | info->shadow[i].req.u.rw.id = i+1; | 1852 | unsigned int j; |
1545 | info->shadow[BLK_RING_SIZE(info)-1].req.u.rw.id = 0x0fffffff; | 1853 | struct blkfront_ring_info *rinfo = &info->rinfo[i]; |
1854 | |||
1855 | for (j = 0; j < BLK_RING_SIZE(info); j++) | ||
1856 | rinfo->shadow[j].req.u.rw.id = j + 1; | ||
1857 | rinfo->shadow[BLK_RING_SIZE(info)-1].req.u.rw.id = 0x0fffffff; | ||
1858 | } | ||
1546 | xenbus_switch_state(dev, XenbusStateInitialised); | 1859 | xenbus_switch_state(dev, XenbusStateInitialised); |
1547 | 1860 | ||
1548 | return 0; | 1861 | return 0; |
@@ -1553,7 +1866,10 @@ again: | |||
1553 | xenbus_dev_fatal(dev, err, "%s", message); | 1866 | xenbus_dev_fatal(dev, err, "%s", message); |
1554 | destroy_blkring: | 1867 | destroy_blkring: |
1555 | blkif_free(info, 0); | 1868 | blkif_free(info, 0); |
1556 | out: | 1869 | |
1870 | kfree(info); | ||
1871 | dev_set_drvdata(&dev->dev, NULL); | ||
1872 | |||
1557 | return err; | 1873 | return err; |
1558 | } | 1874 | } |
1559 | 1875 | ||
@@ -1567,7 +1883,9 @@ static int blkfront_probe(struct xenbus_device *dev, | |||
1567 | const struct xenbus_device_id *id) | 1883 | const struct xenbus_device_id *id) |
1568 | { | 1884 | { |
1569 | int err, vdevice; | 1885 | int err, vdevice; |
1886 | unsigned int r_index; | ||
1570 | struct blkfront_info *info; | 1887 | struct blkfront_info *info; |
1888 | unsigned int backend_max_queues = 0; | ||
1571 | 1889 | ||
1572 | /* FIXME: Use dynamic device id if this is not set. */ | 1890 | /* FIXME: Use dynamic device id if this is not set. */ |
1573 | err = xenbus_scanf(XBT_NIL, dev->nodename, | 1891 | err = xenbus_scanf(XBT_NIL, dev->nodename, |
@@ -1617,15 +1935,39 @@ static int blkfront_probe(struct xenbus_device *dev, | |||
1617 | return -ENOMEM; | 1935 | return -ENOMEM; |
1618 | } | 1936 | } |
1619 | 1937 | ||
1620 | mutex_init(&info->mutex); | ||
1621 | spin_lock_init(&info->io_lock); | ||
1622 | info->xbdev = dev; | 1938 | info->xbdev = dev; |
1939 | /* Check if backend supports multiple queues. */ | ||
1940 | err = xenbus_scanf(XBT_NIL, info->xbdev->otherend, | ||
1941 | "multi-queue-max-queues", "%u", &backend_max_queues); | ||
1942 | if (err < 0) | ||
1943 | backend_max_queues = 1; | ||
1944 | |||
1945 | info->nr_rings = min(backend_max_queues, xen_blkif_max_queues); | ||
1946 | /* We need at least one ring. */ | ||
1947 | if (!info->nr_rings) | ||
1948 | info->nr_rings = 1; | ||
1949 | |||
1950 | info->rinfo = kzalloc(sizeof(struct blkfront_ring_info) * info->nr_rings, GFP_KERNEL); | ||
1951 | if (!info->rinfo) { | ||
1952 | xenbus_dev_fatal(dev, -ENOMEM, "allocating ring_info structure"); | ||
1953 | kfree(info); | ||
1954 | return -ENOMEM; | ||
1955 | } | ||
1956 | |||
1957 | for (r_index = 0; r_index < info->nr_rings; r_index++) { | ||
1958 | struct blkfront_ring_info *rinfo; | ||
1959 | |||
1960 | rinfo = &info->rinfo[r_index]; | ||
1961 | INIT_LIST_HEAD(&rinfo->indirect_pages); | ||
1962 | INIT_LIST_HEAD(&rinfo->grants); | ||
1963 | rinfo->dev_info = info; | ||
1964 | INIT_WORK(&rinfo->work, blkif_restart_queue); | ||
1965 | spin_lock_init(&rinfo->ring_lock); | ||
1966 | } | ||
1967 | |||
1968 | mutex_init(&info->mutex); | ||
1623 | info->vdevice = vdevice; | 1969 | info->vdevice = vdevice; |
1624 | INIT_LIST_HEAD(&info->grants); | ||
1625 | INIT_LIST_HEAD(&info->indirect_pages); | ||
1626 | info->persistent_gnts_c = 0; | ||
1627 | info->connected = BLKIF_STATE_DISCONNECTED; | 1970 | info->connected = BLKIF_STATE_DISCONNECTED; |
1628 | INIT_WORK(&info->work, blkif_restart_queue); | ||
1629 | 1971 | ||
1630 | /* Front end dir is a number, which is used as the id. */ | 1972 | /* Front end dir is a number, which is used as the id. */ |
1631 | info->handle = simple_strtoul(strrchr(dev->nodename, '/')+1, NULL, 0); | 1973 | info->handle = simple_strtoul(strrchr(dev->nodename, '/')+1, NULL, 0); |
@@ -1649,7 +1991,7 @@ static void split_bio_end(struct bio *bio) | |||
1649 | 1991 | ||
1650 | static int blkif_recover(struct blkfront_info *info) | 1992 | static int blkif_recover(struct blkfront_info *info) |
1651 | { | 1993 | { |
1652 | int i; | 1994 | unsigned int i, r_index; |
1653 | struct request *req, *n; | 1995 | struct request *req, *n; |
1654 | struct blk_shadow *copy; | 1996 | struct blk_shadow *copy; |
1655 | int rc; | 1997 | int rc; |
@@ -1660,64 +2002,73 @@ static int blkif_recover(struct blkfront_info *info) | |||
1660 | struct split_bio *split_bio; | 2002 | struct split_bio *split_bio; |
1661 | struct list_head requests; | 2003 | struct list_head requests; |
1662 | 2004 | ||
1663 | /* Stage 1: Make a safe copy of the shadow state. */ | 2005 | blkfront_gather_backend_features(info); |
1664 | copy = kmemdup(info->shadow, sizeof(info->shadow), | ||
1665 | GFP_NOIO | __GFP_REPEAT | __GFP_HIGH); | ||
1666 | if (!copy) | ||
1667 | return -ENOMEM; | ||
1668 | |||
1669 | /* Stage 2: Set up free list. */ | ||
1670 | memset(&info->shadow, 0, sizeof(info->shadow)); | ||
1671 | for (i = 0; i < BLK_RING_SIZE(info); i++) | ||
1672 | info->shadow[i].req.u.rw.id = i+1; | ||
1673 | info->shadow_free = info->ring.req_prod_pvt; | ||
1674 | info->shadow[BLK_RING_SIZE(info)-1].req.u.rw.id = 0x0fffffff; | ||
1675 | |||
1676 | rc = blkfront_gather_backend_features(info); | ||
1677 | if (rc) { | ||
1678 | kfree(copy); | ||
1679 | return rc; | ||
1680 | } | ||
1681 | |||
1682 | segs = info->max_indirect_segments ? : BLKIF_MAX_SEGMENTS_PER_REQUEST; | 2006 | segs = info->max_indirect_segments ? : BLKIF_MAX_SEGMENTS_PER_REQUEST; |
1683 | blk_queue_max_segments(info->rq, segs); | 2007 | blk_queue_max_segments(info->rq, segs); |
1684 | bio_list_init(&bio_list); | 2008 | bio_list_init(&bio_list); |
1685 | INIT_LIST_HEAD(&requests); | 2009 | INIT_LIST_HEAD(&requests); |
1686 | for (i = 0; i < BLK_RING_SIZE(info); i++) { | ||
1687 | /* Not in use? */ | ||
1688 | if (!copy[i].request) | ||
1689 | continue; | ||
1690 | 2010 | ||
1691 | /* | 2011 | for (r_index = 0; r_index < info->nr_rings; r_index++) { |
1692 | * Get the bios in the request so we can re-queue them. | 2012 | struct blkfront_ring_info *rinfo; |
1693 | */ | 2013 | |
1694 | if (copy[i].request->cmd_flags & | 2014 | rinfo = &info->rinfo[r_index]; |
1695 | (REQ_FLUSH | REQ_FUA | REQ_DISCARD | REQ_SECURE)) { | 2015 | /* Stage 1: Make a safe copy of the shadow state. */ |
2016 | copy = kmemdup(rinfo->shadow, sizeof(rinfo->shadow), | ||
2017 | GFP_NOIO | __GFP_REPEAT | __GFP_HIGH); | ||
2018 | if (!copy) | ||
2019 | return -ENOMEM; | ||
2020 | |||
2021 | /* Stage 2: Set up free list. */ | ||
2022 | memset(&rinfo->shadow, 0, sizeof(rinfo->shadow)); | ||
2023 | for (i = 0; i < BLK_RING_SIZE(info); i++) | ||
2024 | rinfo->shadow[i].req.u.rw.id = i+1; | ||
2025 | rinfo->shadow_free = rinfo->ring.req_prod_pvt; | ||
2026 | rinfo->shadow[BLK_RING_SIZE(info)-1].req.u.rw.id = 0x0fffffff; | ||
2027 | |||
2028 | rc = blkfront_setup_indirect(rinfo); | ||
2029 | if (rc) { | ||
2030 | kfree(copy); | ||
2031 | return rc; | ||
2032 | } | ||
2033 | |||
2034 | for (i = 0; i < BLK_RING_SIZE(info); i++) { | ||
2035 | /* Not in use? */ | ||
2036 | if (!copy[i].request) | ||
2037 | continue; | ||
2038 | |||
1696 | /* | 2039 | /* |
1697 | * Flush operations don't contain bios, so | 2040 | * Get the bios in the request so we can re-queue them. |
1698 | * we need to requeue the whole request | ||
1699 | */ | 2041 | */ |
1700 | list_add(©[i].request->queuelist, &requests); | 2042 | if (copy[i].request->cmd_flags & |
1701 | continue; | 2043 | (REQ_FLUSH | REQ_FUA | REQ_DISCARD | REQ_SECURE)) { |
2044 | /* | ||
2045 | * Flush operations don't contain bios, so | ||
2046 | * we need to requeue the whole request | ||
2047 | */ | ||
2048 | list_add(©[i].request->queuelist, &requests); | ||
2049 | continue; | ||
2050 | } | ||
2051 | merge_bio.head = copy[i].request->bio; | ||
2052 | merge_bio.tail = copy[i].request->biotail; | ||
2053 | bio_list_merge(&bio_list, &merge_bio); | ||
2054 | copy[i].request->bio = NULL; | ||
2055 | blk_end_request_all(copy[i].request, 0); | ||
1702 | } | 2056 | } |
1703 | merge_bio.head = copy[i].request->bio; | ||
1704 | merge_bio.tail = copy[i].request->biotail; | ||
1705 | bio_list_merge(&bio_list, &merge_bio); | ||
1706 | copy[i].request->bio = NULL; | ||
1707 | blk_end_request_all(copy[i].request, 0); | ||
1708 | } | ||
1709 | |||
1710 | kfree(copy); | ||
1711 | 2057 | ||
2058 | kfree(copy); | ||
2059 | } | ||
1712 | xenbus_switch_state(info->xbdev, XenbusStateConnected); | 2060 | xenbus_switch_state(info->xbdev, XenbusStateConnected); |
1713 | 2061 | ||
1714 | spin_lock_irq(&info->io_lock); | ||
1715 | |||
1716 | /* Now safe for us to use the shared ring */ | 2062 | /* Now safe for us to use the shared ring */ |
1717 | info->connected = BLKIF_STATE_CONNECTED; | 2063 | info->connected = BLKIF_STATE_CONNECTED; |
1718 | 2064 | ||
1719 | /* Kick any other new requests queued since we resumed */ | 2065 | for (r_index = 0; r_index < info->nr_rings; r_index++) { |
1720 | kick_pending_request_queues(info); | 2066 | struct blkfront_ring_info *rinfo; |
2067 | |||
2068 | rinfo = &info->rinfo[r_index]; | ||
2069 | /* Kick any other new requests queued since we resumed */ | ||
2070 | kick_pending_request_queues(rinfo); | ||
2071 | } | ||
1721 | 2072 | ||
1722 | list_for_each_entry_safe(req, n, &requests, queuelist) { | 2073 | list_for_each_entry_safe(req, n, &requests, queuelist) { |
1723 | /* Requeue pending requests (flush or discard) */ | 2074 | /* Requeue pending requests (flush or discard) */ |
@@ -1725,7 +2076,6 @@ static int blkif_recover(struct blkfront_info *info) | |||
1725 | BUG_ON(req->nr_phys_segments > segs); | 2076 | BUG_ON(req->nr_phys_segments > segs); |
1726 | blk_mq_requeue_request(req); | 2077 | blk_mq_requeue_request(req); |
1727 | } | 2078 | } |
1728 | spin_unlock_irq(&info->io_lock); | ||
1729 | blk_mq_kick_requeue_list(info->rq); | 2079 | blk_mq_kick_requeue_list(info->rq); |
1730 | 2080 | ||
1731 | while ((bio = bio_list_pop(&bio_list)) != NULL) { | 2081 | while ((bio = bio_list_pop(&bio_list)) != NULL) { |
@@ -1790,8 +2140,7 @@ static int blkfront_resume(struct xenbus_device *dev) | |||
1790 | return err; | 2140 | return err; |
1791 | } | 2141 | } |
1792 | 2142 | ||
1793 | static void | 2143 | static void blkfront_closing(struct blkfront_info *info) |
1794 | blkfront_closing(struct blkfront_info *info) | ||
1795 | { | 2144 | { |
1796 | struct xenbus_device *xbdev = info->xbdev; | 2145 | struct xenbus_device *xbdev = info->xbdev; |
1797 | struct block_device *bdev = NULL; | 2146 | struct block_device *bdev = NULL; |
@@ -1851,18 +2200,29 @@ static void blkfront_setup_discard(struct blkfront_info *info) | |||
1851 | info->feature_secdiscard = !!discard_secure; | 2200 | info->feature_secdiscard = !!discard_secure; |
1852 | } | 2201 | } |
1853 | 2202 | ||
1854 | static int blkfront_setup_indirect(struct blkfront_info *info) | 2203 | static int blkfront_setup_indirect(struct blkfront_ring_info *rinfo) |
1855 | { | 2204 | { |
1856 | unsigned int psegs, grants; | 2205 | unsigned int psegs, grants; |
1857 | int err, i; | 2206 | int err, i; |
2207 | struct blkfront_info *info = rinfo->dev_info; | ||
1858 | 2208 | ||
1859 | if (info->max_indirect_segments == 0) | 2209 | if (info->max_indirect_segments == 0) { |
1860 | grants = BLKIF_MAX_SEGMENTS_PER_REQUEST; | 2210 | if (!HAS_EXTRA_REQ) |
2211 | grants = BLKIF_MAX_SEGMENTS_PER_REQUEST; | ||
2212 | else { | ||
2213 | /* | ||
2214 | * When an extra req is required, the maximum | ||
2215 | * grants supported is related to the size of the | ||
2216 | * Linux block segment. | ||
2217 | */ | ||
2218 | grants = GRANTS_PER_PSEG; | ||
2219 | } | ||
2220 | } | ||
1861 | else | 2221 | else |
1862 | grants = info->max_indirect_segments; | 2222 | grants = info->max_indirect_segments; |
1863 | psegs = grants / GRANTS_PER_PSEG; | 2223 | psegs = grants / GRANTS_PER_PSEG; |
1864 | 2224 | ||
1865 | err = fill_grant_buffer(info, | 2225 | err = fill_grant_buffer(rinfo, |
1866 | (grants + INDIRECT_GREFS(grants)) * BLK_RING_SIZE(info)); | 2226 | (grants + INDIRECT_GREFS(grants)) * BLK_RING_SIZE(info)); |
1867 | if (err) | 2227 | if (err) |
1868 | goto out_of_memory; | 2228 | goto out_of_memory; |
@@ -1875,31 +2235,31 @@ static int blkfront_setup_indirect(struct blkfront_info *info) | |||
1875 | */ | 2235 | */ |
1876 | int num = INDIRECT_GREFS(grants) * BLK_RING_SIZE(info); | 2236 | int num = INDIRECT_GREFS(grants) * BLK_RING_SIZE(info); |
1877 | 2237 | ||
1878 | BUG_ON(!list_empty(&info->indirect_pages)); | 2238 | BUG_ON(!list_empty(&rinfo->indirect_pages)); |
1879 | for (i = 0; i < num; i++) { | 2239 | for (i = 0; i < num; i++) { |
1880 | struct page *indirect_page = alloc_page(GFP_NOIO); | 2240 | struct page *indirect_page = alloc_page(GFP_NOIO); |
1881 | if (!indirect_page) | 2241 | if (!indirect_page) |
1882 | goto out_of_memory; | 2242 | goto out_of_memory; |
1883 | list_add(&indirect_page->lru, &info->indirect_pages); | 2243 | list_add(&indirect_page->lru, &rinfo->indirect_pages); |
1884 | } | 2244 | } |
1885 | } | 2245 | } |
1886 | 2246 | ||
1887 | for (i = 0; i < BLK_RING_SIZE(info); i++) { | 2247 | for (i = 0; i < BLK_RING_SIZE(info); i++) { |
1888 | info->shadow[i].grants_used = kzalloc( | 2248 | rinfo->shadow[i].grants_used = kzalloc( |
1889 | sizeof(info->shadow[i].grants_used[0]) * grants, | 2249 | sizeof(rinfo->shadow[i].grants_used[0]) * grants, |
1890 | GFP_NOIO); | 2250 | GFP_NOIO); |
1891 | info->shadow[i].sg = kzalloc(sizeof(info->shadow[i].sg[0]) * psegs, GFP_NOIO); | 2251 | rinfo->shadow[i].sg = kzalloc(sizeof(rinfo->shadow[i].sg[0]) * psegs, GFP_NOIO); |
1892 | if (info->max_indirect_segments) | 2252 | if (info->max_indirect_segments) |
1893 | info->shadow[i].indirect_grants = kzalloc( | 2253 | rinfo->shadow[i].indirect_grants = kzalloc( |
1894 | sizeof(info->shadow[i].indirect_grants[0]) * | 2254 | sizeof(rinfo->shadow[i].indirect_grants[0]) * |
1895 | INDIRECT_GREFS(grants), | 2255 | INDIRECT_GREFS(grants), |
1896 | GFP_NOIO); | 2256 | GFP_NOIO); |
1897 | if ((info->shadow[i].grants_used == NULL) || | 2257 | if ((rinfo->shadow[i].grants_used == NULL) || |
1898 | (info->shadow[i].sg == NULL) || | 2258 | (rinfo->shadow[i].sg == NULL) || |
1899 | (info->max_indirect_segments && | 2259 | (info->max_indirect_segments && |
1900 | (info->shadow[i].indirect_grants == NULL))) | 2260 | (rinfo->shadow[i].indirect_grants == NULL))) |
1901 | goto out_of_memory; | 2261 | goto out_of_memory; |
1902 | sg_init_table(info->shadow[i].sg, psegs); | 2262 | sg_init_table(rinfo->shadow[i].sg, psegs); |
1903 | } | 2263 | } |
1904 | 2264 | ||
1905 | 2265 | ||
@@ -1907,16 +2267,16 @@ static int blkfront_setup_indirect(struct blkfront_info *info) | |||
1907 | 2267 | ||
1908 | out_of_memory: | 2268 | out_of_memory: |
1909 | for (i = 0; i < BLK_RING_SIZE(info); i++) { | 2269 | for (i = 0; i < BLK_RING_SIZE(info); i++) { |
1910 | kfree(info->shadow[i].grants_used); | 2270 | kfree(rinfo->shadow[i].grants_used); |
1911 | info->shadow[i].grants_used = NULL; | 2271 | rinfo->shadow[i].grants_used = NULL; |
1912 | kfree(info->shadow[i].sg); | 2272 | kfree(rinfo->shadow[i].sg); |
1913 | info->shadow[i].sg = NULL; | 2273 | rinfo->shadow[i].sg = NULL; |
1914 | kfree(info->shadow[i].indirect_grants); | 2274 | kfree(rinfo->shadow[i].indirect_grants); |
1915 | info->shadow[i].indirect_grants = NULL; | 2275 | rinfo->shadow[i].indirect_grants = NULL; |
1916 | } | 2276 | } |
1917 | if (!list_empty(&info->indirect_pages)) { | 2277 | if (!list_empty(&rinfo->indirect_pages)) { |
1918 | struct page *indirect_page, *n; | 2278 | struct page *indirect_page, *n; |
1919 | list_for_each_entry_safe(indirect_page, n, &info->indirect_pages, lru) { | 2279 | list_for_each_entry_safe(indirect_page, n, &rinfo->indirect_pages, lru) { |
1920 | list_del(&indirect_page->lru); | 2280 | list_del(&indirect_page->lru); |
1921 | __free_page(indirect_page); | 2281 | __free_page(indirect_page); |
1922 | } | 2282 | } |
@@ -1927,7 +2287,7 @@ out_of_memory: | |||
1927 | /* | 2287 | /* |
1928 | * Gather all backend feature-* | 2288 | * Gather all backend feature-* |
1929 | */ | 2289 | */ |
1930 | static int blkfront_gather_backend_features(struct blkfront_info *info) | 2290 | static void blkfront_gather_backend_features(struct blkfront_info *info) |
1931 | { | 2291 | { |
1932 | int err; | 2292 | int err; |
1933 | int barrier, flush, discard, persistent; | 2293 | int barrier, flush, discard, persistent; |
@@ -1982,8 +2342,6 @@ static int blkfront_gather_backend_features(struct blkfront_info *info) | |||
1982 | else | 2342 | else |
1983 | info->max_indirect_segments = min(indirect_segments, | 2343 | info->max_indirect_segments = min(indirect_segments, |
1984 | xen_blkif_max_segments); | 2344 | xen_blkif_max_segments); |
1985 | |||
1986 | return blkfront_setup_indirect(info); | ||
1987 | } | 2345 | } |
1988 | 2346 | ||
1989 | /* | 2347 | /* |
@@ -1996,7 +2354,7 @@ static void blkfront_connect(struct blkfront_info *info) | |||
1996 | unsigned long sector_size; | 2354 | unsigned long sector_size; |
1997 | unsigned int physical_sector_size; | 2355 | unsigned int physical_sector_size; |
1998 | unsigned int binfo; | 2356 | unsigned int binfo; |
1999 | int err; | 2357 | int err, i; |
2000 | 2358 | ||
2001 | switch (info->connected) { | 2359 | switch (info->connected) { |
2002 | case BLKIF_STATE_CONNECTED: | 2360 | case BLKIF_STATE_CONNECTED: |
@@ -2053,11 +2411,15 @@ static void blkfront_connect(struct blkfront_info *info) | |||
2053 | if (err != 1) | 2411 | if (err != 1) |
2054 | physical_sector_size = sector_size; | 2412 | physical_sector_size = sector_size; |
2055 | 2413 | ||
2056 | err = blkfront_gather_backend_features(info); | 2414 | blkfront_gather_backend_features(info); |
2057 | if (err) { | 2415 | for (i = 0; i < info->nr_rings; i++) { |
2058 | xenbus_dev_fatal(info->xbdev, err, "setup_indirect at %s", | 2416 | err = blkfront_setup_indirect(&info->rinfo[i]); |
2059 | info->xbdev->otherend); | 2417 | if (err) { |
2060 | return; | 2418 | xenbus_dev_fatal(info->xbdev, err, "setup_indirect at %s", |
2419 | info->xbdev->otherend); | ||
2420 | blkif_free(info, 0); | ||
2421 | break; | ||
2422 | } | ||
2061 | } | 2423 | } |
2062 | 2424 | ||
2063 | err = xlvbd_alloc_gendisk(sectors, info, binfo, sector_size, | 2425 | err = xlvbd_alloc_gendisk(sectors, info, binfo, sector_size, |
@@ -2071,10 +2433,9 @@ static void blkfront_connect(struct blkfront_info *info) | |||
2071 | xenbus_switch_state(info->xbdev, XenbusStateConnected); | 2433 | xenbus_switch_state(info->xbdev, XenbusStateConnected); |
2072 | 2434 | ||
2073 | /* Kick pending requests. */ | 2435 | /* Kick pending requests. */ |
2074 | spin_lock_irq(&info->io_lock); | ||
2075 | info->connected = BLKIF_STATE_CONNECTED; | 2436 | info->connected = BLKIF_STATE_CONNECTED; |
2076 | kick_pending_request_queues(info); | 2437 | for (i = 0; i < info->nr_rings; i++) |
2077 | spin_unlock_irq(&info->io_lock); | 2438 | kick_pending_request_queues(&info->rinfo[i]); |
2078 | 2439 | ||
2079 | add_disk(info->gd); | 2440 | add_disk(info->gd); |
2080 | 2441 | ||
@@ -2095,11 +2456,8 @@ static void blkback_changed(struct xenbus_device *dev, | |||
2095 | case XenbusStateInitWait: | 2456 | case XenbusStateInitWait: |
2096 | if (dev->state != XenbusStateInitialising) | 2457 | if (dev->state != XenbusStateInitialising) |
2097 | break; | 2458 | break; |
2098 | if (talk_to_blkback(dev, info)) { | 2459 | if (talk_to_blkback(dev, info)) |
2099 | kfree(info); | ||
2100 | dev_set_drvdata(&dev->dev, NULL); | ||
2101 | break; | 2460 | break; |
2102 | } | ||
2103 | case XenbusStateInitialising: | 2461 | case XenbusStateInitialising: |
2104 | case XenbusStateInitialised: | 2462 | case XenbusStateInitialised: |
2105 | case XenbusStateReconfiguring: | 2463 | case XenbusStateReconfiguring: |
@@ -2108,6 +2466,10 @@ static void blkback_changed(struct xenbus_device *dev, | |||
2108 | break; | 2466 | break; |
2109 | 2467 | ||
2110 | case XenbusStateConnected: | 2468 | case XenbusStateConnected: |
2469 | if (dev->state != XenbusStateInitialised) { | ||
2470 | if (talk_to_blkback(dev, info)) | ||
2471 | break; | ||
2472 | } | ||
2111 | blkfront_connect(info); | 2473 | blkfront_connect(info); |
2112 | break; | 2474 | break; |
2113 | 2475 | ||
@@ -2281,6 +2643,7 @@ static struct xenbus_driver blkfront_driver = { | |||
2281 | static int __init xlblk_init(void) | 2643 | static int __init xlblk_init(void) |
2282 | { | 2644 | { |
2283 | int ret; | 2645 | int ret; |
2646 | int nr_cpus = num_online_cpus(); | ||
2284 | 2647 | ||
2285 | if (!xen_domain()) | 2648 | if (!xen_domain()) |
2286 | return -ENODEV; | 2649 | return -ENODEV; |
@@ -2288,7 +2651,13 @@ static int __init xlblk_init(void) | |||
2288 | if (xen_blkif_max_ring_order > XENBUS_MAX_RING_GRANT_ORDER) { | 2651 | if (xen_blkif_max_ring_order > XENBUS_MAX_RING_GRANT_ORDER) { |
2289 | pr_info("Invalid max_ring_order (%d), will use default max: %d.\n", | 2652 | pr_info("Invalid max_ring_order (%d), will use default max: %d.\n", |
2290 | xen_blkif_max_ring_order, XENBUS_MAX_RING_GRANT_ORDER); | 2653 | xen_blkif_max_ring_order, XENBUS_MAX_RING_GRANT_ORDER); |
2291 | xen_blkif_max_ring_order = 0; | 2654 | xen_blkif_max_ring_order = XENBUS_MAX_RING_GRANT_ORDER; |
2655 | } | ||
2656 | |||
2657 | if (xen_blkif_max_queues > nr_cpus) { | ||
2658 | pr_info("Invalid max_queues (%d), will use default max: %d.\n", | ||
2659 | xen_blkif_max_queues, nr_cpus); | ||
2660 | xen_blkif_max_queues = nr_cpus; | ||
2292 | } | 2661 | } |
2293 | 2662 | ||
2294 | if (!xen_has_pv_disk_devices()) | 2663 | if (!xen_has_pv_disk_devices()) |
diff --git a/drivers/md/bcache/btree.c b/drivers/md/bcache/btree.c index 83392f856dfd..22b9e34ceb75 100644 --- a/drivers/md/bcache/btree.c +++ b/drivers/md/bcache/btree.c | |||
@@ -1741,6 +1741,7 @@ static void bch_btree_gc(struct cache_set *c) | |||
1741 | do { | 1741 | do { |
1742 | ret = btree_root(gc_root, c, &op, &writes, &stats); | 1742 | ret = btree_root(gc_root, c, &op, &writes, &stats); |
1743 | closure_sync(&writes); | 1743 | closure_sync(&writes); |
1744 | cond_resched(); | ||
1744 | 1745 | ||
1745 | if (ret && ret != -EAGAIN) | 1746 | if (ret && ret != -EAGAIN) |
1746 | pr_warn("gc failed!"); | 1747 | pr_warn("gc failed!"); |
@@ -2162,8 +2163,10 @@ int bch_btree_insert_check_key(struct btree *b, struct btree_op *op, | |||
2162 | rw_lock(true, b, b->level); | 2163 | rw_lock(true, b, b->level); |
2163 | 2164 | ||
2164 | if (b->key.ptr[0] != btree_ptr || | 2165 | if (b->key.ptr[0] != btree_ptr || |
2165 | b->seq != seq + 1) | 2166 | b->seq != seq + 1) { |
2167 | op->lock = b->level; | ||
2166 | goto out; | 2168 | goto out; |
2169 | } | ||
2167 | } | 2170 | } |
2168 | 2171 | ||
2169 | SET_KEY_PTRS(check_key, 1); | 2172 | SET_KEY_PTRS(check_key, 1); |
diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c index 679a093a3bf6..8d0ead98eb6e 100644 --- a/drivers/md/bcache/super.c +++ b/drivers/md/bcache/super.c | |||
@@ -685,6 +685,8 @@ static void bcache_device_link(struct bcache_device *d, struct cache_set *c, | |||
685 | WARN(sysfs_create_link(&d->kobj, &c->kobj, "cache") || | 685 | WARN(sysfs_create_link(&d->kobj, &c->kobj, "cache") || |
686 | sysfs_create_link(&c->kobj, &d->kobj, d->name), | 686 | sysfs_create_link(&c->kobj, &d->kobj, d->name), |
687 | "Couldn't create device <-> cache set symlinks"); | 687 | "Couldn't create device <-> cache set symlinks"); |
688 | |||
689 | clear_bit(BCACHE_DEV_UNLINK_DONE, &d->flags); | ||
688 | } | 690 | } |
689 | 691 | ||
690 | static void bcache_device_detach(struct bcache_device *d) | 692 | static void bcache_device_detach(struct bcache_device *d) |
@@ -847,8 +849,11 @@ void bch_cached_dev_run(struct cached_dev *dc) | |||
847 | buf[SB_LABEL_SIZE] = '\0'; | 849 | buf[SB_LABEL_SIZE] = '\0'; |
848 | env[2] = kasprintf(GFP_KERNEL, "CACHED_LABEL=%s", buf); | 850 | env[2] = kasprintf(GFP_KERNEL, "CACHED_LABEL=%s", buf); |
849 | 851 | ||
850 | if (atomic_xchg(&dc->running, 1)) | 852 | if (atomic_xchg(&dc->running, 1)) { |
853 | kfree(env[1]); | ||
854 | kfree(env[2]); | ||
851 | return; | 855 | return; |
856 | } | ||
852 | 857 | ||
853 | if (!d->c && | 858 | if (!d->c && |
854 | BDEV_STATE(&dc->sb) != BDEV_STATE_NONE) { | 859 | BDEV_STATE(&dc->sb) != BDEV_STATE_NONE) { |
@@ -1933,6 +1938,8 @@ static ssize_t register_bcache(struct kobject *k, struct kobj_attribute *attr, | |||
1933 | else | 1938 | else |
1934 | err = "device busy"; | 1939 | err = "device busy"; |
1935 | mutex_unlock(&bch_register_lock); | 1940 | mutex_unlock(&bch_register_lock); |
1941 | if (attr == &ksysfs_register_quiet) | ||
1942 | goto out; | ||
1936 | } | 1943 | } |
1937 | goto err; | 1944 | goto err; |
1938 | } | 1945 | } |
@@ -1971,8 +1978,7 @@ out: | |||
1971 | err_close: | 1978 | err_close: |
1972 | blkdev_put(bdev, FMODE_READ|FMODE_WRITE|FMODE_EXCL); | 1979 | blkdev_put(bdev, FMODE_READ|FMODE_WRITE|FMODE_EXCL); |
1973 | err: | 1980 | err: |
1974 | if (attr != &ksysfs_register_quiet) | 1981 | pr_info("error opening %s: %s", path, err); |
1975 | pr_info("error opening %s: %s", path, err); | ||
1976 | ret = -EINVAL; | 1982 | ret = -EINVAL; |
1977 | goto out; | 1983 | goto out; |
1978 | } | 1984 | } |
@@ -2066,8 +2072,10 @@ static int __init bcache_init(void) | |||
2066 | closure_debug_init(); | 2072 | closure_debug_init(); |
2067 | 2073 | ||
2068 | bcache_major = register_blkdev(0, "bcache"); | 2074 | bcache_major = register_blkdev(0, "bcache"); |
2069 | if (bcache_major < 0) | 2075 | if (bcache_major < 0) { |
2076 | unregister_reboot_notifier(&reboot); | ||
2070 | return bcache_major; | 2077 | return bcache_major; |
2078 | } | ||
2071 | 2079 | ||
2072 | if (!(bcache_wq = create_workqueue("bcache")) || | 2080 | if (!(bcache_wq = create_workqueue("bcache")) || |
2073 | !(bcache_kobj = kobject_create_and_add("bcache", fs_kobj)) || | 2081 | !(bcache_kobj = kobject_create_and_add("bcache", fs_kobj)) || |
diff --git a/drivers/md/bcache/writeback.c b/drivers/md/bcache/writeback.c index b23f88d9f18c..b9346cd9cda1 100644 --- a/drivers/md/bcache/writeback.c +++ b/drivers/md/bcache/writeback.c | |||
@@ -323,6 +323,10 @@ void bcache_dev_sectors_dirty_add(struct cache_set *c, unsigned inode, | |||
323 | 323 | ||
324 | static bool dirty_pred(struct keybuf *buf, struct bkey *k) | 324 | static bool dirty_pred(struct keybuf *buf, struct bkey *k) |
325 | { | 325 | { |
326 | struct cached_dev *dc = container_of(buf, struct cached_dev, writeback_keys); | ||
327 | |||
328 | BUG_ON(KEY_INODE(k) != dc->disk.id); | ||
329 | |||
326 | return KEY_DIRTY(k); | 330 | return KEY_DIRTY(k); |
327 | } | 331 | } |
328 | 332 | ||
@@ -372,11 +376,24 @@ next: | |||
372 | } | 376 | } |
373 | } | 377 | } |
374 | 378 | ||
379 | /* | ||
380 | * Returns true if we scanned the entire disk | ||
381 | */ | ||
375 | static bool refill_dirty(struct cached_dev *dc) | 382 | static bool refill_dirty(struct cached_dev *dc) |
376 | { | 383 | { |
377 | struct keybuf *buf = &dc->writeback_keys; | 384 | struct keybuf *buf = &dc->writeback_keys; |
385 | struct bkey start = KEY(dc->disk.id, 0, 0); | ||
378 | struct bkey end = KEY(dc->disk.id, MAX_KEY_OFFSET, 0); | 386 | struct bkey end = KEY(dc->disk.id, MAX_KEY_OFFSET, 0); |
379 | bool searched_from_start = false; | 387 | struct bkey start_pos; |
388 | |||
389 | /* | ||
390 | * make sure keybuf pos is inside the range for this disk - at bringup | ||
391 | * we might not be attached yet so this disk's inode nr isn't | ||
392 | * initialized then | ||
393 | */ | ||
394 | if (bkey_cmp(&buf->last_scanned, &start) < 0 || | ||
395 | bkey_cmp(&buf->last_scanned, &end) > 0) | ||
396 | buf->last_scanned = start; | ||
380 | 397 | ||
381 | if (dc->partial_stripes_expensive) { | 398 | if (dc->partial_stripes_expensive) { |
382 | refill_full_stripes(dc); | 399 | refill_full_stripes(dc); |
@@ -384,14 +401,20 @@ static bool refill_dirty(struct cached_dev *dc) | |||
384 | return false; | 401 | return false; |
385 | } | 402 | } |
386 | 403 | ||
387 | if (bkey_cmp(&buf->last_scanned, &end) >= 0) { | 404 | start_pos = buf->last_scanned; |
388 | buf->last_scanned = KEY(dc->disk.id, 0, 0); | ||
389 | searched_from_start = true; | ||
390 | } | ||
391 | |||
392 | bch_refill_keybuf(dc->disk.c, buf, &end, dirty_pred); | 405 | bch_refill_keybuf(dc->disk.c, buf, &end, dirty_pred); |
393 | 406 | ||
394 | return bkey_cmp(&buf->last_scanned, &end) >= 0 && searched_from_start; | 407 | if (bkey_cmp(&buf->last_scanned, &end) < 0) |
408 | return false; | ||
409 | |||
410 | /* | ||
411 | * If we get to the end start scanning again from the beginning, and | ||
412 | * only scan up to where we initially started scanning from: | ||
413 | */ | ||
414 | buf->last_scanned = start; | ||
415 | bch_refill_keybuf(dc->disk.c, buf, &start_pos, dirty_pred); | ||
416 | |||
417 | return bkey_cmp(&buf->last_scanned, &start_pos) >= 0; | ||
395 | } | 418 | } |
396 | 419 | ||
397 | static int bch_writeback_thread(void *arg) | 420 | static int bch_writeback_thread(void *arg) |
diff --git a/drivers/md/bcache/writeback.h b/drivers/md/bcache/writeback.h index 0a9dab187b79..073a042aed24 100644 --- a/drivers/md/bcache/writeback.h +++ b/drivers/md/bcache/writeback.h | |||
@@ -63,7 +63,8 @@ static inline bool should_writeback(struct cached_dev *dc, struct bio *bio, | |||
63 | 63 | ||
64 | static inline void bch_writeback_queue(struct cached_dev *dc) | 64 | static inline void bch_writeback_queue(struct cached_dev *dc) |
65 | { | 65 | { |
66 | wake_up_process(dc->writeback_thread); | 66 | if (!IS_ERR_OR_NULL(dc->writeback_thread)) |
67 | wake_up_process(dc->writeback_thread); | ||
67 | } | 68 | } |
68 | 69 | ||
69 | static inline void bch_writeback_add(struct cached_dev *dc) | 70 | static inline void bch_writeback_add(struct cached_dev *dc) |
diff --git a/include/linux/drbd.h b/include/linux/drbd.h index 8723f2a99e15..d6b3c9943a2c 100644 --- a/include/linux/drbd.h +++ b/include/linux/drbd.h | |||
@@ -25,7 +25,6 @@ | |||
25 | */ | 25 | */ |
26 | #ifndef DRBD_H | 26 | #ifndef DRBD_H |
27 | #define DRBD_H | 27 | #define DRBD_H |
28 | #include <linux/connector.h> | ||
29 | #include <asm/types.h> | 28 | #include <asm/types.h> |
30 | 29 | ||
31 | #ifdef __KERNEL__ | 30 | #ifdef __KERNEL__ |
@@ -52,7 +51,7 @@ | |||
52 | #endif | 51 | #endif |
53 | 52 | ||
54 | extern const char *drbd_buildtag(void); | 53 | extern const char *drbd_buildtag(void); |
55 | #define REL_VERSION "8.4.5" | 54 | #define REL_VERSION "8.4.6" |
56 | #define API_VERSION 1 | 55 | #define API_VERSION 1 |
57 | #define PRO_VERSION_MIN 86 | 56 | #define PRO_VERSION_MIN 86 |
58 | #define PRO_VERSION_MAX 101 | 57 | #define PRO_VERSION_MAX 101 |
@@ -339,6 +338,8 @@ enum drbd_state_rv { | |||
339 | #define MDF_AL_CLEAN (1 << 7) | 338 | #define MDF_AL_CLEAN (1 << 7) |
340 | #define MDF_AL_DISABLED (1 << 8) | 339 | #define MDF_AL_DISABLED (1 << 8) |
341 | 340 | ||
341 | #define MAX_PEERS 32 | ||
342 | |||
342 | enum drbd_uuid_index { | 343 | enum drbd_uuid_index { |
343 | UI_CURRENT, | 344 | UI_CURRENT, |
344 | UI_BITMAP, | 345 | UI_BITMAP, |
@@ -349,14 +350,35 @@ enum drbd_uuid_index { | |||
349 | UI_EXTENDED_SIZE /* Everything. */ | 350 | UI_EXTENDED_SIZE /* Everything. */ |
350 | }; | 351 | }; |
351 | 352 | ||
353 | #define HISTORY_UUIDS MAX_PEERS | ||
354 | |||
352 | enum drbd_timeout_flag { | 355 | enum drbd_timeout_flag { |
353 | UT_DEFAULT = 0, | 356 | UT_DEFAULT = 0, |
354 | UT_DEGRADED = 1, | 357 | UT_DEGRADED = 1, |
355 | UT_PEER_OUTDATED = 2, | 358 | UT_PEER_OUTDATED = 2, |
356 | }; | 359 | }; |
357 | 360 | ||
361 | enum drbd_notification_type { | ||
362 | NOTIFY_EXISTS, | ||
363 | NOTIFY_CREATE, | ||
364 | NOTIFY_CHANGE, | ||
365 | NOTIFY_DESTROY, | ||
366 | NOTIFY_CALL, | ||
367 | NOTIFY_RESPONSE, | ||
368 | |||
369 | NOTIFY_CONTINUES = 0x8000, | ||
370 | NOTIFY_FLAGS = NOTIFY_CONTINUES, | ||
371 | }; | ||
372 | |||
358 | #define UUID_JUST_CREATED ((__u64)4) | 373 | #define UUID_JUST_CREATED ((__u64)4) |
359 | 374 | ||
375 | enum write_ordering_e { | ||
376 | WO_NONE, | ||
377 | WO_DRAIN_IO, | ||
378 | WO_BDEV_FLUSH, | ||
379 | WO_BIO_BARRIER | ||
380 | }; | ||
381 | |||
360 | /* magic numbers used in meta data and network packets */ | 382 | /* magic numbers used in meta data and network packets */ |
361 | #define DRBD_MAGIC 0x83740267 | 383 | #define DRBD_MAGIC 0x83740267 |
362 | #define DRBD_MAGIC_BIG 0x835a | 384 | #define DRBD_MAGIC_BIG 0x835a |
diff --git a/include/linux/drbd_genl.h b/include/linux/drbd_genl.h index 7b131ed8f9c6..2d0e5ad5de9d 100644 --- a/include/linux/drbd_genl.h +++ b/include/linux/drbd_genl.h | |||
@@ -250,6 +250,76 @@ GENL_struct(DRBD_NLA_DETACH_PARMS, 13, detach_parms, | |||
250 | __flg_field(1, DRBD_GENLA_F_MANDATORY, force_detach) | 250 | __flg_field(1, DRBD_GENLA_F_MANDATORY, force_detach) |
251 | ) | 251 | ) |
252 | 252 | ||
253 | GENL_struct(DRBD_NLA_RESOURCE_INFO, 15, resource_info, | ||
254 | __u32_field(1, 0, res_role) | ||
255 | __flg_field(2, 0, res_susp) | ||
256 | __flg_field(3, 0, res_susp_nod) | ||
257 | __flg_field(4, 0, res_susp_fen) | ||
258 | /* __flg_field(5, 0, res_weak) */ | ||
259 | ) | ||
260 | |||
261 | GENL_struct(DRBD_NLA_DEVICE_INFO, 16, device_info, | ||
262 | __u32_field(1, 0, dev_disk_state) | ||
263 | ) | ||
264 | |||
265 | GENL_struct(DRBD_NLA_CONNECTION_INFO, 17, connection_info, | ||
266 | __u32_field(1, 0, conn_connection_state) | ||
267 | __u32_field(2, 0, conn_role) | ||
268 | ) | ||
269 | |||
270 | GENL_struct(DRBD_NLA_PEER_DEVICE_INFO, 18, peer_device_info, | ||
271 | __u32_field(1, 0, peer_repl_state) | ||
272 | __u32_field(2, 0, peer_disk_state) | ||
273 | __u32_field(3, 0, peer_resync_susp_user) | ||
274 | __u32_field(4, 0, peer_resync_susp_peer) | ||
275 | __u32_field(5, 0, peer_resync_susp_dependency) | ||
276 | ) | ||
277 | |||
278 | GENL_struct(DRBD_NLA_RESOURCE_STATISTICS, 19, resource_statistics, | ||
279 | __u32_field(1, 0, res_stat_write_ordering) | ||
280 | ) | ||
281 | |||
282 | GENL_struct(DRBD_NLA_DEVICE_STATISTICS, 20, device_statistics, | ||
283 | __u64_field(1, 0, dev_size) /* (sectors) */ | ||
284 | __u64_field(2, 0, dev_read) /* (sectors) */ | ||
285 | __u64_field(3, 0, dev_write) /* (sectors) */ | ||
286 | __u64_field(4, 0, dev_al_writes) /* activity log writes (count) */ | ||
287 | __u64_field(5, 0, dev_bm_writes) /* bitmap writes (count) */ | ||
288 | __u32_field(6, 0, dev_upper_pending) /* application requests in progress */ | ||
289 | __u32_field(7, 0, dev_lower_pending) /* backing device requests in progress */ | ||
290 | __flg_field(8, 0, dev_upper_blocked) | ||
291 | __flg_field(9, 0, dev_lower_blocked) | ||
292 | __flg_field(10, 0, dev_al_suspended) /* activity log suspended */ | ||
293 | __u64_field(11, 0, dev_exposed_data_uuid) | ||
294 | __u64_field(12, 0, dev_current_uuid) | ||
295 | __u32_field(13, 0, dev_disk_flags) | ||
296 | __bin_field(14, 0, history_uuids, HISTORY_UUIDS * sizeof(__u64)) | ||
297 | ) | ||
298 | |||
299 | GENL_struct(DRBD_NLA_CONNECTION_STATISTICS, 21, connection_statistics, | ||
300 | __flg_field(1, 0, conn_congested) | ||
301 | ) | ||
302 | |||
303 | GENL_struct(DRBD_NLA_PEER_DEVICE_STATISTICS, 22, peer_device_statistics, | ||
304 | __u64_field(1, 0, peer_dev_received) /* sectors */ | ||
305 | __u64_field(2, 0, peer_dev_sent) /* sectors */ | ||
306 | __u32_field(3, 0, peer_dev_pending) /* number of requests */ | ||
307 | __u32_field(4, 0, peer_dev_unacked) /* number of requests */ | ||
308 | __u64_field(5, 0, peer_dev_out_of_sync) /* sectors */ | ||
309 | __u64_field(6, 0, peer_dev_resync_failed) /* sectors */ | ||
310 | __u64_field(7, 0, peer_dev_bitmap_uuid) | ||
311 | __u32_field(9, 0, peer_dev_flags) | ||
312 | ) | ||
313 | |||
314 | GENL_struct(DRBD_NLA_NOTIFICATION_HEADER, 23, drbd_notification_header, | ||
315 | __u32_field(1, DRBD_GENLA_F_MANDATORY, nh_type) | ||
316 | ) | ||
317 | |||
318 | GENL_struct(DRBD_NLA_HELPER, 24, drbd_helper_info, | ||
319 | __str_field(1, DRBD_GENLA_F_MANDATORY, helper_name, 32) | ||
320 | __u32_field(2, DRBD_GENLA_F_MANDATORY, helper_status) | ||
321 | ) | ||
322 | |||
253 | /* | 323 | /* |
254 | * Notifications and commands (genlmsghdr->cmd) | 324 | * Notifications and commands (genlmsghdr->cmd) |
255 | */ | 325 | */ |
@@ -382,3 +452,82 @@ GENL_op(DRBD_ADM_GET_TIMEOUT_TYPE, 26, GENL_doit(drbd_adm_get_timeout_type), | |||
382 | GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, DRBD_F_REQUIRED)) | 452 | GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, DRBD_F_REQUIRED)) |
383 | GENL_op(DRBD_ADM_DOWN, 27, GENL_doit(drbd_adm_down), | 453 | GENL_op(DRBD_ADM_DOWN, 27, GENL_doit(drbd_adm_down), |
384 | GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, DRBD_F_REQUIRED)) | 454 | GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, DRBD_F_REQUIRED)) |
455 | |||
456 | GENL_op(DRBD_ADM_GET_RESOURCES, 30, | ||
457 | GENL_op_init( | ||
458 | .dumpit = drbd_adm_dump_resources, | ||
459 | ), | ||
460 | GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, DRBD_GENLA_F_MANDATORY) | ||
461 | GENL_tla_expected(DRBD_NLA_RESOURCE_INFO, DRBD_GENLA_F_MANDATORY) | ||
462 | GENL_tla_expected(DRBD_NLA_RESOURCE_STATISTICS, DRBD_GENLA_F_MANDATORY)) | ||
463 | |||
464 | GENL_op(DRBD_ADM_GET_DEVICES, 31, | ||
465 | GENL_op_init( | ||
466 | .dumpit = drbd_adm_dump_devices, | ||
467 | .done = drbd_adm_dump_devices_done, | ||
468 | ), | ||
469 | GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, DRBD_GENLA_F_MANDATORY) | ||
470 | GENL_tla_expected(DRBD_NLA_DEVICE_INFO, DRBD_GENLA_F_MANDATORY) | ||
471 | GENL_tla_expected(DRBD_NLA_DEVICE_STATISTICS, DRBD_GENLA_F_MANDATORY)) | ||
472 | |||
473 | GENL_op(DRBD_ADM_GET_CONNECTIONS, 32, | ||
474 | GENL_op_init( | ||
475 | .dumpit = drbd_adm_dump_connections, | ||
476 | .done = drbd_adm_dump_connections_done, | ||
477 | ), | ||
478 | GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, DRBD_GENLA_F_MANDATORY) | ||
479 | GENL_tla_expected(DRBD_NLA_CONNECTION_INFO, DRBD_GENLA_F_MANDATORY) | ||
480 | GENL_tla_expected(DRBD_NLA_CONNECTION_STATISTICS, DRBD_GENLA_F_MANDATORY)) | ||
481 | |||
482 | GENL_op(DRBD_ADM_GET_PEER_DEVICES, 33, | ||
483 | GENL_op_init( | ||
484 | .dumpit = drbd_adm_dump_peer_devices, | ||
485 | .done = drbd_adm_dump_peer_devices_done, | ||
486 | ), | ||
487 | GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, DRBD_GENLA_F_MANDATORY) | ||
488 | GENL_tla_expected(DRBD_NLA_PEER_DEVICE_INFO, DRBD_GENLA_F_MANDATORY) | ||
489 | GENL_tla_expected(DRBD_NLA_PEER_DEVICE_STATISTICS, DRBD_GENLA_F_MANDATORY)) | ||
490 | |||
491 | GENL_notification( | ||
492 | DRBD_RESOURCE_STATE, 34, events, | ||
493 | GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, DRBD_F_REQUIRED) | ||
494 | GENL_tla_expected(DRBD_NLA_NOTIFICATION_HEADER, DRBD_F_REQUIRED) | ||
495 | GENL_tla_expected(DRBD_NLA_RESOURCE_INFO, DRBD_F_REQUIRED) | ||
496 | GENL_tla_expected(DRBD_NLA_RESOURCE_STATISTICS, DRBD_F_REQUIRED)) | ||
497 | |||
498 | GENL_notification( | ||
499 | DRBD_DEVICE_STATE, 35, events, | ||
500 | GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, DRBD_F_REQUIRED) | ||
501 | GENL_tla_expected(DRBD_NLA_NOTIFICATION_HEADER, DRBD_F_REQUIRED) | ||
502 | GENL_tla_expected(DRBD_NLA_DEVICE_INFO, DRBD_F_REQUIRED) | ||
503 | GENL_tla_expected(DRBD_NLA_DEVICE_STATISTICS, DRBD_F_REQUIRED)) | ||
504 | |||
505 | GENL_notification( | ||
506 | DRBD_CONNECTION_STATE, 36, events, | ||
507 | GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, DRBD_F_REQUIRED) | ||
508 | GENL_tla_expected(DRBD_NLA_NOTIFICATION_HEADER, DRBD_F_REQUIRED) | ||
509 | GENL_tla_expected(DRBD_NLA_CONNECTION_INFO, DRBD_F_REQUIRED) | ||
510 | GENL_tla_expected(DRBD_NLA_CONNECTION_STATISTICS, DRBD_F_REQUIRED)) | ||
511 | |||
512 | GENL_notification( | ||
513 | DRBD_PEER_DEVICE_STATE, 37, events, | ||
514 | GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, DRBD_F_REQUIRED) | ||
515 | GENL_tla_expected(DRBD_NLA_NOTIFICATION_HEADER, DRBD_F_REQUIRED) | ||
516 | GENL_tla_expected(DRBD_NLA_PEER_DEVICE_INFO, DRBD_F_REQUIRED) | ||
517 | GENL_tla_expected(DRBD_NLA_PEER_DEVICE_STATISTICS, DRBD_F_REQUIRED)) | ||
518 | |||
519 | GENL_op( | ||
520 | DRBD_ADM_GET_INITIAL_STATE, 38, | ||
521 | GENL_op_init( | ||
522 | .dumpit = drbd_adm_get_initial_state, | ||
523 | ), | ||
524 | GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, DRBD_GENLA_F_MANDATORY)) | ||
525 | |||
526 | GENL_notification( | ||
527 | DRBD_HELPER, 40, events, | ||
528 | GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, DRBD_F_REQUIRED) | ||
529 | GENL_tla_expected(DRBD_NLA_HELPER, DRBD_F_REQUIRED)) | ||
530 | |||
531 | GENL_notification( | ||
532 | DRBD_INITIAL_STATE_DONE, 41, events, | ||
533 | GENL_tla_expected(DRBD_NLA_NOTIFICATION_HEADER, DRBD_F_REQUIRED)) | ||
diff --git a/include/linux/idr.h b/include/linux/idr.h index 013fd9bc4cb6..083d61e92706 100644 --- a/include/linux/idr.h +++ b/include/linux/idr.h | |||
@@ -135,6 +135,20 @@ static inline void *idr_find(struct idr *idr, int id) | |||
135 | #define idr_for_each_entry(idp, entry, id) \ | 135 | #define idr_for_each_entry(idp, entry, id) \ |
136 | for (id = 0; ((entry) = idr_get_next(idp, &(id))) != NULL; ++id) | 136 | for (id = 0; ((entry) = idr_get_next(idp, &(id))) != NULL; ++id) |
137 | 137 | ||
138 | /** | ||
139 | * idr_for_each_entry - continue iteration over an idr's elements of a given type | ||
140 | * @idp: idr handle | ||
141 | * @entry: the type * to use as cursor | ||
142 | * @id: id entry's key | ||
143 | * | ||
144 | * Continue to iterate over list of given type, continuing after | ||
145 | * the current position. | ||
146 | */ | ||
147 | #define idr_for_each_entry_continue(idp, entry, id) \ | ||
148 | for ((entry) = idr_get_next((idp), &(id)); \ | ||
149 | entry; \ | ||
150 | ++id, (entry) = idr_get_next((idp), &(id))) | ||
151 | |||
138 | /* | 152 | /* |
139 | * IDA - IDR based id allocator, use when translation from id to | 153 | * IDA - IDR based id allocator, use when translation from id to |
140 | * pointer isn't necessary. | 154 | * pointer isn't necessary. |
diff --git a/include/linux/lru_cache.h b/include/linux/lru_cache.h index 46262284de47..04fc6e6c7ff0 100644 --- a/include/linux/lru_cache.h +++ b/include/linux/lru_cache.h | |||
@@ -264,7 +264,7 @@ extern unsigned int lc_put(struct lru_cache *lc, struct lc_element *e); | |||
264 | extern void lc_committed(struct lru_cache *lc); | 264 | extern void lc_committed(struct lru_cache *lc); |
265 | 265 | ||
266 | struct seq_file; | 266 | struct seq_file; |
267 | extern size_t lc_seq_printf_stats(struct seq_file *seq, struct lru_cache *lc); | 267 | extern void lc_seq_printf_stats(struct seq_file *seq, struct lru_cache *lc); |
268 | 268 | ||
269 | extern void lc_seq_dump_details(struct seq_file *seq, struct lru_cache *lc, char *utext, | 269 | extern void lc_seq_dump_details(struct seq_file *seq, struct lru_cache *lc, char *utext, |
270 | void (*detail) (struct seq_file *, struct lc_element *)); | 270 | void (*detail) (struct seq_file *, struct lc_element *)); |
diff --git a/include/xen/interface/io/blkif.h b/include/xen/interface/io/blkif.h index c33e1c489eb2..8b8cfadf7833 100644 --- a/include/xen/interface/io/blkif.h +++ b/include/xen/interface/io/blkif.h | |||
@@ -28,6 +28,54 @@ typedef uint16_t blkif_vdev_t; | |||
28 | typedef uint64_t blkif_sector_t; | 28 | typedef uint64_t blkif_sector_t; |
29 | 29 | ||
30 | /* | 30 | /* |
31 | * Multiple hardware queues/rings: | ||
32 | * If supported, the backend will write the key "multi-queue-max-queues" to | ||
33 | * the directory for that vbd, and set its value to the maximum supported | ||
34 | * number of queues. | ||
35 | * Frontends that are aware of this feature and wish to use it can write the | ||
36 | * key "multi-queue-num-queues" with the number they wish to use, which must be | ||
37 | * greater than zero, and no more than the value reported by the backend in | ||
38 | * "multi-queue-max-queues". | ||
39 | * | ||
40 | * For frontends requesting just one queue, the usual event-channel and | ||
41 | * ring-ref keys are written as before, simplifying the backend processing | ||
42 | * to avoid distinguishing between a frontend that doesn't understand the | ||
43 | * multi-queue feature, and one that does, but requested only one queue. | ||
44 | * | ||
45 | * Frontends requesting two or more queues must not write the toplevel | ||
46 | * event-channel and ring-ref keys, instead writing those keys under sub-keys | ||
47 | * having the name "queue-N" where N is the integer ID of the queue/ring for | ||
48 | * which those keys belong. Queues are indexed from zero. | ||
49 | * For example, a frontend with two queues must write the following set of | ||
50 | * queue-related keys: | ||
51 | * | ||
52 | * /local/domain/1/device/vbd/0/multi-queue-num-queues = "2" | ||
53 | * /local/domain/1/device/vbd/0/queue-0 = "" | ||
54 | * /local/domain/1/device/vbd/0/queue-0/ring-ref = "<ring-ref#0>" | ||
55 | * /local/domain/1/device/vbd/0/queue-0/event-channel = "<evtchn#0>" | ||
56 | * /local/domain/1/device/vbd/0/queue-1 = "" | ||
57 | * /local/domain/1/device/vbd/0/queue-1/ring-ref = "<ring-ref#1>" | ||
58 | * /local/domain/1/device/vbd/0/queue-1/event-channel = "<evtchn#1>" | ||
59 | * | ||
60 | * It is also possible to use multiple queues/rings together with | ||
61 | * feature multi-page ring buffer. | ||
62 | * For example, a frontend requests two queues/rings and the size of each ring | ||
63 | * buffer is two pages must write the following set of related keys: | ||
64 | * | ||
65 | * /local/domain/1/device/vbd/0/multi-queue-num-queues = "2" | ||
66 | * /local/domain/1/device/vbd/0/ring-page-order = "1" | ||
67 | * /local/domain/1/device/vbd/0/queue-0 = "" | ||
68 | * /local/domain/1/device/vbd/0/queue-0/ring-ref0 = "<ring-ref#0>" | ||
69 | * /local/domain/1/device/vbd/0/queue-0/ring-ref1 = "<ring-ref#1>" | ||
70 | * /local/domain/1/device/vbd/0/queue-0/event-channel = "<evtchn#0>" | ||
71 | * /local/domain/1/device/vbd/0/queue-1 = "" | ||
72 | * /local/domain/1/device/vbd/0/queue-1/ring-ref0 = "<ring-ref#2>" | ||
73 | * /local/domain/1/device/vbd/0/queue-1/ring-ref1 = "<ring-ref#3>" | ||
74 | * /local/domain/1/device/vbd/0/queue-1/event-channel = "<evtchn#1>" | ||
75 | * | ||
76 | */ | ||
77 | |||
78 | /* | ||
31 | * REQUEST CODES. | 79 | * REQUEST CODES. |
32 | */ | 80 | */ |
33 | #define BLKIF_OP_READ 0 | 81 | #define BLKIF_OP_READ 0 |
diff --git a/lib/lru_cache.c b/lib/lru_cache.c index 028f5d996eef..28ba40b99337 100644 --- a/lib/lru_cache.c +++ b/lib/lru_cache.c | |||
@@ -238,7 +238,7 @@ void lc_reset(struct lru_cache *lc) | |||
238 | * @seq: the seq_file to print into | 238 | * @seq: the seq_file to print into |
239 | * @lc: the lru cache to print statistics of | 239 | * @lc: the lru cache to print statistics of |
240 | */ | 240 | */ |
241 | size_t lc_seq_printf_stats(struct seq_file *seq, struct lru_cache *lc) | 241 | void lc_seq_printf_stats(struct seq_file *seq, struct lru_cache *lc) |
242 | { | 242 | { |
243 | /* NOTE: | 243 | /* NOTE: |
244 | * total calls to lc_get are | 244 | * total calls to lc_get are |
@@ -250,8 +250,6 @@ size_t lc_seq_printf_stats(struct seq_file *seq, struct lru_cache *lc) | |||
250 | seq_printf(seq, "\t%s: used:%u/%u hits:%lu misses:%lu starving:%lu locked:%lu changed:%lu\n", | 250 | seq_printf(seq, "\t%s: used:%u/%u hits:%lu misses:%lu starving:%lu locked:%lu changed:%lu\n", |
251 | lc->name, lc->used, lc->nr_elements, | 251 | lc->name, lc->used, lc->nr_elements, |
252 | lc->hits, lc->misses, lc->starving, lc->locked, lc->changed); | 252 | lc->hits, lc->misses, lc->starving, lc->locked, lc->changed); |
253 | |||
254 | return 0; | ||
255 | } | 253 | } |
256 | 254 | ||
257 | static struct hlist_head *lc_hash_slot(struct lru_cache *lc, unsigned int enr) | 255 | static struct hlist_head *lc_hash_slot(struct lru_cache *lc, unsigned int enr) |