diff options
author | Philipp Reisner <philipp.reisner@linbit.com> | 2011-01-27 08:07:51 -0500 |
---|---|---|
committer | Philipp Reisner <philipp.reisner@linbit.com> | 2011-09-28 04:26:43 -0400 |
commit | b8907339534b8d17f6aad9e9cc98d490aa0c6137 (patch) | |
tree | 13f8142eccb0e17447d96996a21ee8c2ec1f8704 /drivers/block/drbd | |
parent | db830c464b69e26ea4d371e38bb2320c99c82f41 (diff) |
drbd: Moved the state functions into its own source file
Signed-off-by: Philipp Reisner <philipp.reisner@linbit.com>
Signed-off-by: Lars Ellenberg <lars.ellenberg@linbit.com>
Diffstat (limited to 'drivers/block/drbd')
-rw-r--r-- | drivers/block/drbd/Makefile | 2 | ||||
-rw-r--r-- | drivers/block/drbd/drbd_int.h | 46 | ||||
-rw-r--r-- | drivers/block/drbd/drbd_main.c | 1179 | ||||
-rw-r--r-- | drivers/block/drbd/drbd_state.c | 1217 | ||||
-rw-r--r-- | drivers/block/drbd/drbd_state.h | 101 |
5 files changed, 1326 insertions, 1219 deletions
diff --git a/drivers/block/drbd/Makefile b/drivers/block/drbd/Makefile index cacbb04f285d..06fb4453734c 100644 --- a/drivers/block/drbd/Makefile +++ b/drivers/block/drbd/Makefile | |||
@@ -1,6 +1,6 @@ | |||
1 | drbd-y := drbd_bitmap.o drbd_proc.o | 1 | drbd-y := drbd_bitmap.o drbd_proc.o |
2 | drbd-y += drbd_worker.o drbd_receiver.o drbd_req.o drbd_actlog.o | 2 | drbd-y += drbd_worker.o drbd_receiver.o drbd_req.o drbd_actlog.o |
3 | drbd-y += drbd_main.o drbd_strings.o drbd_nl.o | 3 | drbd-y += drbd_main.o drbd_strings.o drbd_nl.o |
4 | drbd-y += drbd_interval.o | 4 | drbd-y += drbd_interval.o drbd_state.o |
5 | 5 | ||
6 | obj-$(CONFIG_BLK_DEV_DRBD) += drbd.o | 6 | obj-$(CONFIG_BLK_DEV_DRBD) += drbd.o |
diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h index 302ccc6d9432..98addab2c928 100644 --- a/drivers/block/drbd/drbd_int.h +++ b/drivers/block/drbd/drbd_int.h | |||
@@ -43,6 +43,8 @@ | |||
43 | #include <net/tcp.h> | 43 | #include <net/tcp.h> |
44 | #include <linux/lru_cache.h> | 44 | #include <linux/lru_cache.h> |
45 | #include <linux/prefetch.h> | 45 | #include <linux/prefetch.h> |
46 | #include <linux/drbd.h> | ||
47 | #include "drbd_state.h" | ||
46 | 48 | ||
47 | #ifdef __CHECKER__ | 49 | #ifdef __CHECKER__ |
48 | # define __protected_by(x) __attribute__((require_context(x,1,999,"rdwr"))) | 50 | # define __protected_by(x) __attribute__((require_context(x,1,999,"rdwr"))) |
@@ -1120,35 +1122,12 @@ static inline void drbd_put_data_sock(struct drbd_conf *mdev) | |||
1120 | 1122 | ||
1121 | /* drbd_main.c */ | 1123 | /* drbd_main.c */ |
1122 | 1124 | ||
1123 | enum chg_state_flags { | ||
1124 | CS_HARD = 1, | ||
1125 | CS_VERBOSE = 2, | ||
1126 | CS_WAIT_COMPLETE = 4, | ||
1127 | CS_SERIALIZE = 8, | ||
1128 | CS_ORDERED = CS_WAIT_COMPLETE + CS_SERIALIZE, | ||
1129 | }; | ||
1130 | |||
1131 | enum dds_flags { | 1125 | enum dds_flags { |
1132 | DDSF_FORCED = 1, | 1126 | DDSF_FORCED = 1, |
1133 | DDSF_NO_RESYNC = 2, /* Do not run a resync for the new space */ | 1127 | DDSF_NO_RESYNC = 2, /* Do not run a resync for the new space */ |
1134 | }; | 1128 | }; |
1135 | 1129 | ||
1136 | extern void drbd_init_set_defaults(struct drbd_conf *mdev); | 1130 | extern void drbd_init_set_defaults(struct drbd_conf *mdev); |
1137 | extern enum drbd_state_rv drbd_change_state(struct drbd_conf *mdev, | ||
1138 | enum chg_state_flags f, | ||
1139 | union drbd_state mask, | ||
1140 | union drbd_state val); | ||
1141 | extern void drbd_force_state(struct drbd_conf *, union drbd_state, | ||
1142 | union drbd_state); | ||
1143 | extern enum drbd_state_rv _drbd_request_state(struct drbd_conf *, | ||
1144 | union drbd_state, | ||
1145 | union drbd_state, | ||
1146 | enum chg_state_flags); | ||
1147 | extern enum drbd_state_rv __drbd_set_state(struct drbd_conf *, union drbd_state, | ||
1148 | enum chg_state_flags, | ||
1149 | struct completion *done); | ||
1150 | extern void print_st_err(struct drbd_conf *, union drbd_state, | ||
1151 | union drbd_state, int); | ||
1152 | extern int drbd_thread_start(struct drbd_thread *thi); | 1131 | extern int drbd_thread_start(struct drbd_thread *thi); |
1153 | extern void _drbd_thread_stop(struct drbd_thread *thi, int restart, int wait); | 1132 | extern void _drbd_thread_stop(struct drbd_thread *thi, int restart, int wait); |
1154 | #ifdef CONFIG_SMP | 1133 | #ifdef CONFIG_SMP |
@@ -1712,6 +1691,10 @@ static inline int drbd_ee_has_active_page(struct drbd_peer_request *peer_req) | |||
1712 | } | 1691 | } |
1713 | 1692 | ||
1714 | 1693 | ||
1694 | |||
1695 | |||
1696 | |||
1697 | |||
1715 | static inline void drbd_state_lock(struct drbd_conf *mdev) | 1698 | static inline void drbd_state_lock(struct drbd_conf *mdev) |
1716 | { | 1699 | { |
1717 | wait_event(mdev->misc_wait, | 1700 | wait_event(mdev->misc_wait, |
@@ -1737,23 +1720,6 @@ _drbd_set_state(struct drbd_conf *mdev, union drbd_state ns, | |||
1737 | return rv; | 1720 | return rv; |
1738 | } | 1721 | } |
1739 | 1722 | ||
1740 | /** | ||
1741 | * drbd_request_state() - Reqest a state change | ||
1742 | * @mdev: DRBD device. | ||
1743 | * @mask: mask of state bits to change. | ||
1744 | * @val: value of new state bits. | ||
1745 | * | ||
1746 | * This is the most graceful way of requesting a state change. It is verbose | ||
1747 | * quite verbose in case the state change is not possible, and all those | ||
1748 | * state changes are globally serialized. | ||
1749 | */ | ||
1750 | static inline int drbd_request_state(struct drbd_conf *mdev, | ||
1751 | union drbd_state mask, | ||
1752 | union drbd_state val) | ||
1753 | { | ||
1754 | return _drbd_request_state(mdev, mask, val, CS_VERBOSE + CS_ORDERED); | ||
1755 | } | ||
1756 | |||
1757 | #define __drbd_chk_io_error(m,f) __drbd_chk_io_error_(m,f, __func__) | 1723 | #define __drbd_chk_io_error(m,f) __drbd_chk_io_error_(m,f, __func__) |
1758 | static inline void __drbd_chk_io_error_(struct drbd_conf *mdev, int forcedetach, const char *where) | 1724 | static inline void __drbd_chk_io_error_(struct drbd_conf *mdev, int forcedetach, const char *where) |
1759 | { | 1725 | { |
diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c index 7728d1613406..4b39b3d0dd55 100644 --- a/drivers/block/drbd/drbd_main.c +++ b/drivers/block/drbd/drbd_main.c | |||
@@ -56,14 +56,6 @@ | |||
56 | 56 | ||
57 | #include "drbd_vli.h" | 57 | #include "drbd_vli.h" |
58 | 58 | ||
59 | struct after_state_chg_work { | ||
60 | struct drbd_work w; | ||
61 | union drbd_state os; | ||
62 | union drbd_state ns; | ||
63 | enum chg_state_flags flags; | ||
64 | struct completion *done; | ||
65 | }; | ||
66 | |||
67 | static DEFINE_MUTEX(drbd_main_mutex); | 59 | static DEFINE_MUTEX(drbd_main_mutex); |
68 | int drbdd_init(struct drbd_thread *); | 60 | int drbdd_init(struct drbd_thread *); |
69 | int drbd_worker(struct drbd_thread *); | 61 | int drbd_worker(struct drbd_thread *); |
@@ -72,9 +64,6 @@ int drbd_asender(struct drbd_thread *); | |||
72 | int drbd_init(void); | 64 | int drbd_init(void); |
73 | static int drbd_open(struct block_device *bdev, fmode_t mode); | 65 | static int drbd_open(struct block_device *bdev, fmode_t mode); |
74 | static int drbd_release(struct gendisk *gd, fmode_t mode); | 66 | static int drbd_release(struct gendisk *gd, fmode_t mode); |
75 | static int w_after_state_ch(struct drbd_conf *mdev, struct drbd_work *w, int unused); | ||
76 | static void after_state_ch(struct drbd_conf *mdev, union drbd_state os, | ||
77 | union drbd_state ns, enum chg_state_flags flags); | ||
78 | static int w_md_sync(struct drbd_conf *mdev, struct drbd_work *w, int unused); | 67 | static int w_md_sync(struct drbd_conf *mdev, struct drbd_work *w, int unused); |
79 | static void md_sync_timer_fn(unsigned long data); | 68 | static void md_sync_timer_fn(unsigned long data); |
80 | static int w_bitmap_io(struct drbd_conf *mdev, struct drbd_work *w, int unused); | 69 | static int w_bitmap_io(struct drbd_conf *mdev, struct drbd_work *w, int unused); |
@@ -340,7 +329,7 @@ bail: | |||
340 | * @what might be one of CONNECTION_LOST_WHILE_PENDING, RESEND, FAIL_FROZEN_DISK_IO, | 329 | * @what might be one of CONNECTION_LOST_WHILE_PENDING, RESEND, FAIL_FROZEN_DISK_IO, |
341 | * RESTART_FROZEN_DISK_IO. | 330 | * RESTART_FROZEN_DISK_IO. |
342 | */ | 331 | */ |
343 | static void _tl_restart(struct drbd_conf *mdev, enum drbd_req_event what) | 332 | void _tl_restart(struct drbd_conf *mdev, enum drbd_req_event what) |
344 | { | 333 | { |
345 | struct drbd_tl_epoch *b, *tmp, **pn; | 334 | struct drbd_tl_epoch *b, *tmp, **pn; |
346 | struct list_head *le, *tle, carry_reads; | 335 | struct list_head *le, *tle, carry_reads; |
@@ -450,1172 +439,6 @@ void tl_restart(struct drbd_conf *mdev, enum drbd_req_event what) | |||
450 | spin_unlock_irq(&mdev->tconn->req_lock); | 439 | spin_unlock_irq(&mdev->tconn->req_lock); |
451 | } | 440 | } |
452 | 441 | ||
453 | /** | ||
454 | * cl_wide_st_chg() - true if the state change is a cluster wide one | ||
455 | * @mdev: DRBD device. | ||
456 | * @os: old (current) state. | ||
457 | * @ns: new (wanted) state. | ||
458 | */ | ||
459 | static int cl_wide_st_chg(struct drbd_conf *mdev, | ||
460 | union drbd_state os, union drbd_state ns) | ||
461 | { | ||
462 | return (os.conn >= C_CONNECTED && ns.conn >= C_CONNECTED && | ||
463 | ((os.role != R_PRIMARY && ns.role == R_PRIMARY) || | ||
464 | (os.conn != C_STARTING_SYNC_T && ns.conn == C_STARTING_SYNC_T) || | ||
465 | (os.conn != C_STARTING_SYNC_S && ns.conn == C_STARTING_SYNC_S) || | ||
466 | (os.disk != D_DISKLESS && ns.disk == D_DISKLESS))) || | ||
467 | (os.conn >= C_CONNECTED && ns.conn == C_DISCONNECTING) || | ||
468 | (os.conn == C_CONNECTED && ns.conn == C_VERIFY_S); | ||
469 | } | ||
470 | |||
471 | enum drbd_state_rv | ||
472 | drbd_change_state(struct drbd_conf *mdev, enum chg_state_flags f, | ||
473 | union drbd_state mask, union drbd_state val) | ||
474 | { | ||
475 | unsigned long flags; | ||
476 | union drbd_state os, ns; | ||
477 | enum drbd_state_rv rv; | ||
478 | |||
479 | spin_lock_irqsave(&mdev->tconn->req_lock, flags); | ||
480 | os = mdev->state; | ||
481 | ns.i = (os.i & ~mask.i) | val.i; | ||
482 | rv = _drbd_set_state(mdev, ns, f, NULL); | ||
483 | ns = mdev->state; | ||
484 | spin_unlock_irqrestore(&mdev->tconn->req_lock, flags); | ||
485 | |||
486 | return rv; | ||
487 | } | ||
488 | |||
489 | /** | ||
490 | * drbd_force_state() - Impose a change which happens outside our control on our state | ||
491 | * @mdev: DRBD device. | ||
492 | * @mask: mask of state bits to change. | ||
493 | * @val: value of new state bits. | ||
494 | */ | ||
495 | void drbd_force_state(struct drbd_conf *mdev, | ||
496 | union drbd_state mask, union drbd_state val) | ||
497 | { | ||
498 | drbd_change_state(mdev, CS_HARD, mask, val); | ||
499 | } | ||
500 | |||
501 | static enum drbd_state_rv is_valid_state(struct drbd_conf *, union drbd_state); | ||
502 | static enum drbd_state_rv is_valid_state_transition(struct drbd_conf *, | ||
503 | union drbd_state, | ||
504 | union drbd_state); | ||
505 | static union drbd_state sanitize_state(struct drbd_conf *mdev, union drbd_state os, | ||
506 | union drbd_state ns, const char **warn_sync_abort); | ||
507 | int drbd_send_state_req(struct drbd_conf *, | ||
508 | union drbd_state, union drbd_state); | ||
509 | |||
510 | static enum drbd_state_rv | ||
511 | _req_st_cond(struct drbd_conf *mdev, union drbd_state mask, | ||
512 | union drbd_state val) | ||
513 | { | ||
514 | union drbd_state os, ns; | ||
515 | unsigned long flags; | ||
516 | enum drbd_state_rv rv; | ||
517 | |||
518 | if (test_and_clear_bit(CL_ST_CHG_SUCCESS, &mdev->flags)) | ||
519 | return SS_CW_SUCCESS; | ||
520 | |||
521 | if (test_and_clear_bit(CL_ST_CHG_FAIL, &mdev->flags)) | ||
522 | return SS_CW_FAILED_BY_PEER; | ||
523 | |||
524 | rv = 0; | ||
525 | spin_lock_irqsave(&mdev->tconn->req_lock, flags); | ||
526 | os = mdev->state; | ||
527 | ns.i = (os.i & ~mask.i) | val.i; | ||
528 | ns = sanitize_state(mdev, os, ns, NULL); | ||
529 | |||
530 | if (!cl_wide_st_chg(mdev, os, ns)) | ||
531 | rv = SS_CW_NO_NEED; | ||
532 | if (!rv) { | ||
533 | rv = is_valid_state(mdev, ns); | ||
534 | if (rv == SS_SUCCESS) { | ||
535 | rv = is_valid_state_transition(mdev, ns, os); | ||
536 | if (rv == SS_SUCCESS) | ||
537 | rv = SS_UNKNOWN_ERROR; /* cont waiting, otherwise fail. */ | ||
538 | } | ||
539 | } | ||
540 | spin_unlock_irqrestore(&mdev->tconn->req_lock, flags); | ||
541 | |||
542 | return rv; | ||
543 | } | ||
544 | |||
545 | /** | ||
546 | * drbd_req_state() - Perform an eventually cluster wide state change | ||
547 | * @mdev: DRBD device. | ||
548 | * @mask: mask of state bits to change. | ||
549 | * @val: value of new state bits. | ||
550 | * @f: flags | ||
551 | * | ||
552 | * Should not be called directly, use drbd_request_state() or | ||
553 | * _drbd_request_state(). | ||
554 | */ | ||
555 | static enum drbd_state_rv | ||
556 | drbd_req_state(struct drbd_conf *mdev, union drbd_state mask, | ||
557 | union drbd_state val, enum chg_state_flags f) | ||
558 | { | ||
559 | struct completion done; | ||
560 | unsigned long flags; | ||
561 | union drbd_state os, ns; | ||
562 | enum drbd_state_rv rv; | ||
563 | |||
564 | init_completion(&done); | ||
565 | |||
566 | if (f & CS_SERIALIZE) | ||
567 | mutex_lock(&mdev->state_mutex); | ||
568 | |||
569 | spin_lock_irqsave(&mdev->tconn->req_lock, flags); | ||
570 | os = mdev->state; | ||
571 | ns.i = (os.i & ~mask.i) | val.i; | ||
572 | ns = sanitize_state(mdev, os, ns, NULL); | ||
573 | |||
574 | if (cl_wide_st_chg(mdev, os, ns)) { | ||
575 | rv = is_valid_state(mdev, ns); | ||
576 | if (rv == SS_SUCCESS) | ||
577 | rv = is_valid_state_transition(mdev, ns, os); | ||
578 | spin_unlock_irqrestore(&mdev->tconn->req_lock, flags); | ||
579 | |||
580 | if (rv < SS_SUCCESS) { | ||
581 | if (f & CS_VERBOSE) | ||
582 | print_st_err(mdev, os, ns, rv); | ||
583 | goto abort; | ||
584 | } | ||
585 | |||
586 | drbd_state_lock(mdev); | ||
587 | if (!drbd_send_state_req(mdev, mask, val)) { | ||
588 | drbd_state_unlock(mdev); | ||
589 | rv = SS_CW_FAILED_BY_PEER; | ||
590 | if (f & CS_VERBOSE) | ||
591 | print_st_err(mdev, os, ns, rv); | ||
592 | goto abort; | ||
593 | } | ||
594 | |||
595 | wait_event(mdev->state_wait, | ||
596 | (rv = _req_st_cond(mdev, mask, val))); | ||
597 | |||
598 | if (rv < SS_SUCCESS) { | ||
599 | drbd_state_unlock(mdev); | ||
600 | if (f & CS_VERBOSE) | ||
601 | print_st_err(mdev, os, ns, rv); | ||
602 | goto abort; | ||
603 | } | ||
604 | spin_lock_irqsave(&mdev->tconn->req_lock, flags); | ||
605 | os = mdev->state; | ||
606 | ns.i = (os.i & ~mask.i) | val.i; | ||
607 | rv = _drbd_set_state(mdev, ns, f, &done); | ||
608 | drbd_state_unlock(mdev); | ||
609 | } else { | ||
610 | rv = _drbd_set_state(mdev, ns, f, &done); | ||
611 | } | ||
612 | |||
613 | spin_unlock_irqrestore(&mdev->tconn->req_lock, flags); | ||
614 | |||
615 | if (f & CS_WAIT_COMPLETE && rv == SS_SUCCESS) { | ||
616 | D_ASSERT(current != mdev->tconn->worker.task); | ||
617 | wait_for_completion(&done); | ||
618 | } | ||
619 | |||
620 | abort: | ||
621 | if (f & CS_SERIALIZE) | ||
622 | mutex_unlock(&mdev->state_mutex); | ||
623 | |||
624 | return rv; | ||
625 | } | ||
626 | |||
627 | /** | ||
628 | * _drbd_request_state() - Request a state change (with flags) | ||
629 | * @mdev: DRBD device. | ||
630 | * @mask: mask of state bits to change. | ||
631 | * @val: value of new state bits. | ||
632 | * @f: flags | ||
633 | * | ||
634 | * Cousin of drbd_request_state(), useful with the CS_WAIT_COMPLETE | ||
635 | * flag, or when logging of failed state change requests is not desired. | ||
636 | */ | ||
637 | enum drbd_state_rv | ||
638 | _drbd_request_state(struct drbd_conf *mdev, union drbd_state mask, | ||
639 | union drbd_state val, enum chg_state_flags f) | ||
640 | { | ||
641 | enum drbd_state_rv rv; | ||
642 | |||
643 | wait_event(mdev->state_wait, | ||
644 | (rv = drbd_req_state(mdev, mask, val, f)) != SS_IN_TRANSIENT_STATE); | ||
645 | |||
646 | return rv; | ||
647 | } | ||
648 | |||
649 | static void print_st(struct drbd_conf *mdev, char *name, union drbd_state ns) | ||
650 | { | ||
651 | dev_err(DEV, " %s = { cs:%s ro:%s/%s ds:%s/%s %c%c%c%c }\n", | ||
652 | name, | ||
653 | drbd_conn_str(ns.conn), | ||
654 | drbd_role_str(ns.role), | ||
655 | drbd_role_str(ns.peer), | ||
656 | drbd_disk_str(ns.disk), | ||
657 | drbd_disk_str(ns.pdsk), | ||
658 | is_susp(ns) ? 's' : 'r', | ||
659 | ns.aftr_isp ? 'a' : '-', | ||
660 | ns.peer_isp ? 'p' : '-', | ||
661 | ns.user_isp ? 'u' : '-' | ||
662 | ); | ||
663 | } | ||
664 | |||
665 | void print_st_err(struct drbd_conf *mdev, union drbd_state os, | ||
666 | union drbd_state ns, enum drbd_state_rv err) | ||
667 | { | ||
668 | if (err == SS_IN_TRANSIENT_STATE) | ||
669 | return; | ||
670 | dev_err(DEV, "State change failed: %s\n", drbd_set_st_err_str(err)); | ||
671 | print_st(mdev, " state", os); | ||
672 | print_st(mdev, "wanted", ns); | ||
673 | } | ||
674 | |||
675 | |||
676 | /** | ||
677 | * is_valid_state() - Returns an SS_ error code if ns is not valid | ||
678 | * @mdev: DRBD device. | ||
679 | * @ns: State to consider. | ||
680 | */ | ||
681 | static enum drbd_state_rv | ||
682 | is_valid_state(struct drbd_conf *mdev, union drbd_state ns) | ||
683 | { | ||
684 | /* See drbd_state_sw_errors in drbd_strings.c */ | ||
685 | |||
686 | enum drbd_fencing_p fp; | ||
687 | enum drbd_state_rv rv = SS_SUCCESS; | ||
688 | |||
689 | fp = FP_DONT_CARE; | ||
690 | if (get_ldev(mdev)) { | ||
691 | fp = mdev->ldev->dc.fencing; | ||
692 | put_ldev(mdev); | ||
693 | } | ||
694 | |||
695 | if (get_net_conf(mdev->tconn)) { | ||
696 | if (!mdev->tconn->net_conf->two_primaries && | ||
697 | ns.role == R_PRIMARY && ns.peer == R_PRIMARY) | ||
698 | rv = SS_TWO_PRIMARIES; | ||
699 | put_net_conf(mdev->tconn); | ||
700 | } | ||
701 | |||
702 | if (rv <= 0) | ||
703 | /* already found a reason to abort */; | ||
704 | else if (ns.role == R_SECONDARY && mdev->open_cnt) | ||
705 | rv = SS_DEVICE_IN_USE; | ||
706 | |||
707 | else if (ns.role == R_PRIMARY && ns.conn < C_CONNECTED && ns.disk < D_UP_TO_DATE) | ||
708 | rv = SS_NO_UP_TO_DATE_DISK; | ||
709 | |||
710 | else if (fp >= FP_RESOURCE && | ||
711 | ns.role == R_PRIMARY && ns.conn < C_CONNECTED && ns.pdsk >= D_UNKNOWN) | ||
712 | rv = SS_PRIMARY_NOP; | ||
713 | |||
714 | else if (ns.role == R_PRIMARY && ns.disk <= D_INCONSISTENT && ns.pdsk <= D_INCONSISTENT) | ||
715 | rv = SS_NO_UP_TO_DATE_DISK; | ||
716 | |||
717 | else if (ns.conn > C_CONNECTED && ns.disk < D_INCONSISTENT) | ||
718 | rv = SS_NO_LOCAL_DISK; | ||
719 | |||
720 | else if (ns.conn > C_CONNECTED && ns.pdsk < D_INCONSISTENT) | ||
721 | rv = SS_NO_REMOTE_DISK; | ||
722 | |||
723 | else if (ns.conn > C_CONNECTED && ns.disk < D_UP_TO_DATE && ns.pdsk < D_UP_TO_DATE) | ||
724 | rv = SS_NO_UP_TO_DATE_DISK; | ||
725 | |||
726 | else if ((ns.conn == C_CONNECTED || | ||
727 | ns.conn == C_WF_BITMAP_S || | ||
728 | ns.conn == C_SYNC_SOURCE || | ||
729 | ns.conn == C_PAUSED_SYNC_S) && | ||
730 | ns.disk == D_OUTDATED) | ||
731 | rv = SS_CONNECTED_OUTDATES; | ||
732 | |||
733 | else if ((ns.conn == C_VERIFY_S || ns.conn == C_VERIFY_T) && | ||
734 | (mdev->sync_conf.verify_alg[0] == 0)) | ||
735 | rv = SS_NO_VERIFY_ALG; | ||
736 | |||
737 | else if ((ns.conn == C_VERIFY_S || ns.conn == C_VERIFY_T) && | ||
738 | mdev->tconn->agreed_pro_version < 88) | ||
739 | rv = SS_NOT_SUPPORTED; | ||
740 | |||
741 | else if (ns.conn >= C_CONNECTED && ns.pdsk == D_UNKNOWN) | ||
742 | rv = SS_CONNECTED_OUTDATES; | ||
743 | |||
744 | return rv; | ||
745 | } | ||
746 | |||
747 | /** | ||
748 | * is_valid_state_transition() - Returns an SS_ error code if the state transition is not possible | ||
749 | * @mdev: DRBD device. | ||
750 | * @ns: new state. | ||
751 | * @os: old state. | ||
752 | */ | ||
753 | static enum drbd_state_rv | ||
754 | is_valid_state_transition(struct drbd_conf *mdev, union drbd_state ns, | ||
755 | union drbd_state os) | ||
756 | { | ||
757 | enum drbd_state_rv rv = SS_SUCCESS; | ||
758 | |||
759 | if ((ns.conn == C_STARTING_SYNC_T || ns.conn == C_STARTING_SYNC_S) && | ||
760 | os.conn > C_CONNECTED) | ||
761 | rv = SS_RESYNC_RUNNING; | ||
762 | |||
763 | if (ns.conn == C_DISCONNECTING && os.conn == C_STANDALONE) | ||
764 | rv = SS_ALREADY_STANDALONE; | ||
765 | |||
766 | if (ns.disk > D_ATTACHING && os.disk == D_DISKLESS) | ||
767 | rv = SS_IS_DISKLESS; | ||
768 | |||
769 | if (ns.conn == C_WF_CONNECTION && os.conn < C_UNCONNECTED) | ||
770 | rv = SS_NO_NET_CONFIG; | ||
771 | |||
772 | if (ns.disk == D_OUTDATED && os.disk < D_OUTDATED && os.disk != D_ATTACHING) | ||
773 | rv = SS_LOWER_THAN_OUTDATED; | ||
774 | |||
775 | if (ns.conn == C_DISCONNECTING && os.conn == C_UNCONNECTED) | ||
776 | rv = SS_IN_TRANSIENT_STATE; | ||
777 | |||
778 | if (ns.conn == os.conn && ns.conn == C_WF_REPORT_PARAMS) | ||
779 | rv = SS_IN_TRANSIENT_STATE; | ||
780 | |||
781 | if ((ns.conn == C_VERIFY_S || ns.conn == C_VERIFY_T) && os.conn < C_CONNECTED) | ||
782 | rv = SS_NEED_CONNECTION; | ||
783 | |||
784 | if ((ns.conn == C_VERIFY_S || ns.conn == C_VERIFY_T) && | ||
785 | ns.conn != os.conn && os.conn > C_CONNECTED) | ||
786 | rv = SS_RESYNC_RUNNING; | ||
787 | |||
788 | if ((ns.conn == C_STARTING_SYNC_S || ns.conn == C_STARTING_SYNC_T) && | ||
789 | os.conn < C_CONNECTED) | ||
790 | rv = SS_NEED_CONNECTION; | ||
791 | |||
792 | if ((ns.conn == C_SYNC_TARGET || ns.conn == C_SYNC_SOURCE) | ||
793 | && os.conn < C_WF_REPORT_PARAMS) | ||
794 | rv = SS_NEED_CONNECTION; /* No NetworkFailure -> SyncTarget etc... */ | ||
795 | |||
796 | return rv; | ||
797 | } | ||
798 | |||
799 | /** | ||
800 | * sanitize_state() - Resolves implicitly necessary additional changes to a state transition | ||
801 | * @mdev: DRBD device. | ||
802 | * @os: old state. | ||
803 | * @ns: new state. | ||
804 | * @warn_sync_abort: | ||
805 | * | ||
806 | * When we loose connection, we have to set the state of the peers disk (pdsk) | ||
807 | * to D_UNKNOWN. This rule and many more along those lines are in this function. | ||
808 | */ | ||
809 | static union drbd_state sanitize_state(struct drbd_conf *mdev, union drbd_state os, | ||
810 | union drbd_state ns, const char **warn_sync_abort) | ||
811 | { | ||
812 | enum drbd_fencing_p fp; | ||
813 | enum drbd_disk_state disk_min, disk_max, pdsk_min, pdsk_max; | ||
814 | |||
815 | fp = FP_DONT_CARE; | ||
816 | if (get_ldev(mdev)) { | ||
817 | fp = mdev->ldev->dc.fencing; | ||
818 | put_ldev(mdev); | ||
819 | } | ||
820 | |||
821 | /* Disallow Network errors to configure a device's network part */ | ||
822 | if ((ns.conn >= C_TIMEOUT && ns.conn <= C_TEAR_DOWN) && | ||
823 | os.conn <= C_DISCONNECTING) | ||
824 | ns.conn = os.conn; | ||
825 | |||
826 | /* After a network error (+C_TEAR_DOWN) only C_UNCONNECTED or C_DISCONNECTING can follow. | ||
827 | * If you try to go into some Sync* state, that shall fail (elsewhere). */ | ||
828 | if (os.conn >= C_TIMEOUT && os.conn <= C_TEAR_DOWN && | ||
829 | ns.conn != C_UNCONNECTED && ns.conn != C_DISCONNECTING && ns.conn <= C_TEAR_DOWN) | ||
830 | ns.conn = os.conn; | ||
831 | |||
832 | /* we cannot fail (again) if we already detached */ | ||
833 | if (ns.disk == D_FAILED && os.disk == D_DISKLESS) | ||
834 | ns.disk = D_DISKLESS; | ||
835 | |||
836 | /* if we are only D_ATTACHING yet, | ||
837 | * we can (and should) go directly to D_DISKLESS. */ | ||
838 | if (ns.disk == D_FAILED && os.disk == D_ATTACHING) | ||
839 | ns.disk = D_DISKLESS; | ||
840 | |||
841 | /* After C_DISCONNECTING only C_STANDALONE may follow */ | ||
842 | if (os.conn == C_DISCONNECTING && ns.conn != C_STANDALONE) | ||
843 | ns.conn = os.conn; | ||
844 | |||
845 | if (ns.conn < C_CONNECTED) { | ||
846 | ns.peer_isp = 0; | ||
847 | ns.peer = R_UNKNOWN; | ||
848 | if (ns.pdsk > D_UNKNOWN || ns.pdsk < D_INCONSISTENT) | ||
849 | ns.pdsk = D_UNKNOWN; | ||
850 | } | ||
851 | |||
852 | /* Clear the aftr_isp when becoming unconfigured */ | ||
853 | if (ns.conn == C_STANDALONE && ns.disk == D_DISKLESS && ns.role == R_SECONDARY) | ||
854 | ns.aftr_isp = 0; | ||
855 | |||
856 | /* Abort resync if a disk fails/detaches */ | ||
857 | if (os.conn > C_CONNECTED && ns.conn > C_CONNECTED && | ||
858 | (ns.disk <= D_FAILED || ns.pdsk <= D_FAILED)) { | ||
859 | if (warn_sync_abort) | ||
860 | *warn_sync_abort = | ||
861 | os.conn == C_VERIFY_S || os.conn == C_VERIFY_T ? | ||
862 | "Online-verify" : "Resync"; | ||
863 | ns.conn = C_CONNECTED; | ||
864 | } | ||
865 | |||
866 | /* Connection breaks down before we finished "Negotiating" */ | ||
867 | if (ns.conn < C_CONNECTED && ns.disk == D_NEGOTIATING && | ||
868 | get_ldev_if_state(mdev, D_NEGOTIATING)) { | ||
869 | if (mdev->ed_uuid == mdev->ldev->md.uuid[UI_CURRENT]) { | ||
870 | ns.disk = mdev->new_state_tmp.disk; | ||
871 | ns.pdsk = mdev->new_state_tmp.pdsk; | ||
872 | } else { | ||
873 | dev_alert(DEV, "Connection lost while negotiating, no data!\n"); | ||
874 | ns.disk = D_DISKLESS; | ||
875 | ns.pdsk = D_UNKNOWN; | ||
876 | } | ||
877 | put_ldev(mdev); | ||
878 | } | ||
879 | |||
880 | /* D_CONSISTENT and D_OUTDATED vanish when we get connected */ | ||
881 | if (ns.conn >= C_CONNECTED && ns.conn < C_AHEAD) { | ||
882 | if (ns.disk == D_CONSISTENT || ns.disk == D_OUTDATED) | ||
883 | ns.disk = D_UP_TO_DATE; | ||
884 | if (ns.pdsk == D_CONSISTENT || ns.pdsk == D_OUTDATED) | ||
885 | ns.pdsk = D_UP_TO_DATE; | ||
886 | } | ||
887 | |||
888 | /* Implications of the connection stat on the disk states */ | ||
889 | disk_min = D_DISKLESS; | ||
890 | disk_max = D_UP_TO_DATE; | ||
891 | pdsk_min = D_INCONSISTENT; | ||
892 | pdsk_max = D_UNKNOWN; | ||
893 | switch ((enum drbd_conns)ns.conn) { | ||
894 | case C_WF_BITMAP_T: | ||
895 | case C_PAUSED_SYNC_T: | ||
896 | case C_STARTING_SYNC_T: | ||
897 | case C_WF_SYNC_UUID: | ||
898 | case C_BEHIND: | ||
899 | disk_min = D_INCONSISTENT; | ||
900 | disk_max = D_OUTDATED; | ||
901 | pdsk_min = D_UP_TO_DATE; | ||
902 | pdsk_max = D_UP_TO_DATE; | ||
903 | break; | ||
904 | case C_VERIFY_S: | ||
905 | case C_VERIFY_T: | ||
906 | disk_min = D_UP_TO_DATE; | ||
907 | disk_max = D_UP_TO_DATE; | ||
908 | pdsk_min = D_UP_TO_DATE; | ||
909 | pdsk_max = D_UP_TO_DATE; | ||
910 | break; | ||
911 | case C_CONNECTED: | ||
912 | disk_min = D_DISKLESS; | ||
913 | disk_max = D_UP_TO_DATE; | ||
914 | pdsk_min = D_DISKLESS; | ||
915 | pdsk_max = D_UP_TO_DATE; | ||
916 | break; | ||
917 | case C_WF_BITMAP_S: | ||
918 | case C_PAUSED_SYNC_S: | ||
919 | case C_STARTING_SYNC_S: | ||
920 | case C_AHEAD: | ||
921 | disk_min = D_UP_TO_DATE; | ||
922 | disk_max = D_UP_TO_DATE; | ||
923 | pdsk_min = D_INCONSISTENT; | ||
924 | pdsk_max = D_CONSISTENT; /* D_OUTDATED would be nice. But explicit outdate necessary*/ | ||
925 | break; | ||
926 | case C_SYNC_TARGET: | ||
927 | disk_min = D_INCONSISTENT; | ||
928 | disk_max = D_INCONSISTENT; | ||
929 | pdsk_min = D_UP_TO_DATE; | ||
930 | pdsk_max = D_UP_TO_DATE; | ||
931 | break; | ||
932 | case C_SYNC_SOURCE: | ||
933 | disk_min = D_UP_TO_DATE; | ||
934 | disk_max = D_UP_TO_DATE; | ||
935 | pdsk_min = D_INCONSISTENT; | ||
936 | pdsk_max = D_INCONSISTENT; | ||
937 | break; | ||
938 | case C_STANDALONE: | ||
939 | case C_DISCONNECTING: | ||
940 | case C_UNCONNECTED: | ||
941 | case C_TIMEOUT: | ||
942 | case C_BROKEN_PIPE: | ||
943 | case C_NETWORK_FAILURE: | ||
944 | case C_PROTOCOL_ERROR: | ||
945 | case C_TEAR_DOWN: | ||
946 | case C_WF_CONNECTION: | ||
947 | case C_WF_REPORT_PARAMS: | ||
948 | case C_MASK: | ||
949 | break; | ||
950 | } | ||
951 | if (ns.disk > disk_max) | ||
952 | ns.disk = disk_max; | ||
953 | |||
954 | if (ns.disk < disk_min) { | ||
955 | dev_warn(DEV, "Implicitly set disk from %s to %s\n", | ||
956 | drbd_disk_str(ns.disk), drbd_disk_str(disk_min)); | ||
957 | ns.disk = disk_min; | ||
958 | } | ||
959 | if (ns.pdsk > pdsk_max) | ||
960 | ns.pdsk = pdsk_max; | ||
961 | |||
962 | if (ns.pdsk < pdsk_min) { | ||
963 | dev_warn(DEV, "Implicitly set pdsk from %s to %s\n", | ||
964 | drbd_disk_str(ns.pdsk), drbd_disk_str(pdsk_min)); | ||
965 | ns.pdsk = pdsk_min; | ||
966 | } | ||
967 | |||
968 | if (fp == FP_STONITH && | ||
969 | (ns.role == R_PRIMARY && ns.conn < C_CONNECTED && ns.pdsk > D_OUTDATED) && | ||
970 | !(os.role == R_PRIMARY && os.conn < C_CONNECTED && os.pdsk > D_OUTDATED)) | ||
971 | ns.susp_fen = 1; /* Suspend IO while fence-peer handler runs (peer lost) */ | ||
972 | |||
973 | if (mdev->sync_conf.on_no_data == OND_SUSPEND_IO && | ||
974 | (ns.role == R_PRIMARY && ns.disk < D_UP_TO_DATE && ns.pdsk < D_UP_TO_DATE) && | ||
975 | !(os.role == R_PRIMARY && os.disk < D_UP_TO_DATE && os.pdsk < D_UP_TO_DATE)) | ||
976 | ns.susp_nod = 1; /* Suspend IO while no data available (no accessible data available) */ | ||
977 | |||
978 | if (ns.aftr_isp || ns.peer_isp || ns.user_isp) { | ||
979 | if (ns.conn == C_SYNC_SOURCE) | ||
980 | ns.conn = C_PAUSED_SYNC_S; | ||
981 | if (ns.conn == C_SYNC_TARGET) | ||
982 | ns.conn = C_PAUSED_SYNC_T; | ||
983 | } else { | ||
984 | if (ns.conn == C_PAUSED_SYNC_S) | ||
985 | ns.conn = C_SYNC_SOURCE; | ||
986 | if (ns.conn == C_PAUSED_SYNC_T) | ||
987 | ns.conn = C_SYNC_TARGET; | ||
988 | } | ||
989 | |||
990 | return ns; | ||
991 | } | ||
992 | |||
993 | /* helper for __drbd_set_state */ | ||
994 | static void set_ov_position(struct drbd_conf *mdev, enum drbd_conns cs) | ||
995 | { | ||
996 | if (mdev->tconn->agreed_pro_version < 90) | ||
997 | mdev->ov_start_sector = 0; | ||
998 | mdev->rs_total = drbd_bm_bits(mdev); | ||
999 | mdev->ov_position = 0; | ||
1000 | if (cs == C_VERIFY_T) { | ||
1001 | /* starting online verify from an arbitrary position | ||
1002 | * does not fit well into the existing protocol. | ||
1003 | * on C_VERIFY_T, we initialize ov_left and friends | ||
1004 | * implicitly in receive_DataRequest once the | ||
1005 | * first P_OV_REQUEST is received */ | ||
1006 | mdev->ov_start_sector = ~(sector_t)0; | ||
1007 | } else { | ||
1008 | unsigned long bit = BM_SECT_TO_BIT(mdev->ov_start_sector); | ||
1009 | if (bit >= mdev->rs_total) { | ||
1010 | mdev->ov_start_sector = | ||
1011 | BM_BIT_TO_SECT(mdev->rs_total - 1); | ||
1012 | mdev->rs_total = 1; | ||
1013 | } else | ||
1014 | mdev->rs_total -= bit; | ||
1015 | mdev->ov_position = mdev->ov_start_sector; | ||
1016 | } | ||
1017 | mdev->ov_left = mdev->rs_total; | ||
1018 | } | ||
1019 | |||
1020 | static void drbd_resume_al(struct drbd_conf *mdev) | ||
1021 | { | ||
1022 | if (test_and_clear_bit(AL_SUSPENDED, &mdev->flags)) | ||
1023 | dev_info(DEV, "Resumed AL updates\n"); | ||
1024 | } | ||
1025 | |||
1026 | /** | ||
1027 | * __drbd_set_state() - Set a new DRBD state | ||
1028 | * @mdev: DRBD device. | ||
1029 | * @ns: new state. | ||
1030 | * @flags: Flags | ||
1031 | * @done: Optional completion, that will get completed after the after_state_ch() finished | ||
1032 | * | ||
1033 | * Caller needs to hold req_lock, and global_state_lock. Do not call directly. | ||
1034 | */ | ||
1035 | enum drbd_state_rv | ||
1036 | __drbd_set_state(struct drbd_conf *mdev, union drbd_state ns, | ||
1037 | enum chg_state_flags flags, struct completion *done) | ||
1038 | { | ||
1039 | union drbd_state os; | ||
1040 | enum drbd_state_rv rv = SS_SUCCESS; | ||
1041 | const char *warn_sync_abort = NULL; | ||
1042 | struct after_state_chg_work *ascw; | ||
1043 | |||
1044 | os = mdev->state; | ||
1045 | |||
1046 | ns = sanitize_state(mdev, os, ns, &warn_sync_abort); | ||
1047 | |||
1048 | if (ns.i == os.i) | ||
1049 | return SS_NOTHING_TO_DO; | ||
1050 | |||
1051 | if (!(flags & CS_HARD)) { | ||
1052 | /* pre-state-change checks ; only look at ns */ | ||
1053 | /* See drbd_state_sw_errors in drbd_strings.c */ | ||
1054 | |||
1055 | rv = is_valid_state(mdev, ns); | ||
1056 | if (rv < SS_SUCCESS) { | ||
1057 | /* If the old state was illegal as well, then let | ||
1058 | this happen...*/ | ||
1059 | |||
1060 | if (is_valid_state(mdev, os) == rv) | ||
1061 | rv = is_valid_state_transition(mdev, ns, os); | ||
1062 | } else | ||
1063 | rv = is_valid_state_transition(mdev, ns, os); | ||
1064 | } | ||
1065 | |||
1066 | if (rv < SS_SUCCESS) { | ||
1067 | if (flags & CS_VERBOSE) | ||
1068 | print_st_err(mdev, os, ns, rv); | ||
1069 | return rv; | ||
1070 | } | ||
1071 | |||
1072 | if (warn_sync_abort) | ||
1073 | dev_warn(DEV, "%s aborted.\n", warn_sync_abort); | ||
1074 | |||
1075 | { | ||
1076 | char *pbp, pb[300]; | ||
1077 | pbp = pb; | ||
1078 | *pbp = 0; | ||
1079 | if (ns.role != os.role) | ||
1080 | pbp += sprintf(pbp, "role( %s -> %s ) ", | ||
1081 | drbd_role_str(os.role), | ||
1082 | drbd_role_str(ns.role)); | ||
1083 | if (ns.peer != os.peer) | ||
1084 | pbp += sprintf(pbp, "peer( %s -> %s ) ", | ||
1085 | drbd_role_str(os.peer), | ||
1086 | drbd_role_str(ns.peer)); | ||
1087 | if (ns.conn != os.conn) | ||
1088 | pbp += sprintf(pbp, "conn( %s -> %s ) ", | ||
1089 | drbd_conn_str(os.conn), | ||
1090 | drbd_conn_str(ns.conn)); | ||
1091 | if (ns.disk != os.disk) | ||
1092 | pbp += sprintf(pbp, "disk( %s -> %s ) ", | ||
1093 | drbd_disk_str(os.disk), | ||
1094 | drbd_disk_str(ns.disk)); | ||
1095 | if (ns.pdsk != os.pdsk) | ||
1096 | pbp += sprintf(pbp, "pdsk( %s -> %s ) ", | ||
1097 | drbd_disk_str(os.pdsk), | ||
1098 | drbd_disk_str(ns.pdsk)); | ||
1099 | if (is_susp(ns) != is_susp(os)) | ||
1100 | pbp += sprintf(pbp, "susp( %d -> %d ) ", | ||
1101 | is_susp(os), | ||
1102 | is_susp(ns)); | ||
1103 | if (ns.aftr_isp != os.aftr_isp) | ||
1104 | pbp += sprintf(pbp, "aftr_isp( %d -> %d ) ", | ||
1105 | os.aftr_isp, | ||
1106 | ns.aftr_isp); | ||
1107 | if (ns.peer_isp != os.peer_isp) | ||
1108 | pbp += sprintf(pbp, "peer_isp( %d -> %d ) ", | ||
1109 | os.peer_isp, | ||
1110 | ns.peer_isp); | ||
1111 | if (ns.user_isp != os.user_isp) | ||
1112 | pbp += sprintf(pbp, "user_isp( %d -> %d ) ", | ||
1113 | os.user_isp, | ||
1114 | ns.user_isp); | ||
1115 | dev_info(DEV, "%s\n", pb); | ||
1116 | } | ||
1117 | |||
1118 | /* solve the race between becoming unconfigured, | ||
1119 | * worker doing the cleanup, and | ||
1120 | * admin reconfiguring us: | ||
1121 | * on (re)configure, first set CONFIG_PENDING, | ||
1122 | * then wait for a potentially exiting worker, | ||
1123 | * start the worker, and schedule one no_op. | ||
1124 | * then proceed with configuration. | ||
1125 | */ | ||
1126 | if (ns.disk == D_DISKLESS && | ||
1127 | ns.conn == C_STANDALONE && | ||
1128 | ns.role == R_SECONDARY && | ||
1129 | !test_and_set_bit(CONFIG_PENDING, &mdev->flags)) | ||
1130 | set_bit(DEVICE_DYING, &mdev->flags); | ||
1131 | |||
1132 | /* if we are going -> D_FAILED or D_DISKLESS, grab one extra reference | ||
1133 | * on the ldev here, to be sure the transition -> D_DISKLESS resp. | ||
1134 | * drbd_ldev_destroy() won't happen before our corresponding | ||
1135 | * after_state_ch works run, where we put_ldev again. */ | ||
1136 | if ((os.disk != D_FAILED && ns.disk == D_FAILED) || | ||
1137 | (os.disk != D_DISKLESS && ns.disk == D_DISKLESS)) | ||
1138 | atomic_inc(&mdev->local_cnt); | ||
1139 | |||
1140 | mdev->state = ns; | ||
1141 | |||
1142 | if (os.disk == D_ATTACHING && ns.disk >= D_NEGOTIATING) | ||
1143 | drbd_print_uuids(mdev, "attached to UUIDs"); | ||
1144 | |||
1145 | wake_up(&mdev->misc_wait); | ||
1146 | wake_up(&mdev->state_wait); | ||
1147 | |||
1148 | /* aborted verify run. log the last position */ | ||
1149 | if ((os.conn == C_VERIFY_S || os.conn == C_VERIFY_T) && | ||
1150 | ns.conn < C_CONNECTED) { | ||
1151 | mdev->ov_start_sector = | ||
1152 | BM_BIT_TO_SECT(drbd_bm_bits(mdev) - mdev->ov_left); | ||
1153 | dev_info(DEV, "Online Verify reached sector %llu\n", | ||
1154 | (unsigned long long)mdev->ov_start_sector); | ||
1155 | } | ||
1156 | |||
1157 | if ((os.conn == C_PAUSED_SYNC_T || os.conn == C_PAUSED_SYNC_S) && | ||
1158 | (ns.conn == C_SYNC_TARGET || ns.conn == C_SYNC_SOURCE)) { | ||
1159 | dev_info(DEV, "Syncer continues.\n"); | ||
1160 | mdev->rs_paused += (long)jiffies | ||
1161 | -(long)mdev->rs_mark_time[mdev->rs_last_mark]; | ||
1162 | if (ns.conn == C_SYNC_TARGET) | ||
1163 | mod_timer(&mdev->resync_timer, jiffies); | ||
1164 | } | ||
1165 | |||
1166 | if ((os.conn == C_SYNC_TARGET || os.conn == C_SYNC_SOURCE) && | ||
1167 | (ns.conn == C_PAUSED_SYNC_T || ns.conn == C_PAUSED_SYNC_S)) { | ||
1168 | dev_info(DEV, "Resync suspended\n"); | ||
1169 | mdev->rs_mark_time[mdev->rs_last_mark] = jiffies; | ||
1170 | } | ||
1171 | |||
1172 | if (os.conn == C_CONNECTED && | ||
1173 | (ns.conn == C_VERIFY_S || ns.conn == C_VERIFY_T)) { | ||
1174 | unsigned long now = jiffies; | ||
1175 | int i; | ||
1176 | |||
1177 | set_ov_position(mdev, ns.conn); | ||
1178 | mdev->rs_start = now; | ||
1179 | mdev->rs_last_events = 0; | ||
1180 | mdev->rs_last_sect_ev = 0; | ||
1181 | mdev->ov_last_oos_size = 0; | ||
1182 | mdev->ov_last_oos_start = 0; | ||
1183 | |||
1184 | for (i = 0; i < DRBD_SYNC_MARKS; i++) { | ||
1185 | mdev->rs_mark_left[i] = mdev->ov_left; | ||
1186 | mdev->rs_mark_time[i] = now; | ||
1187 | } | ||
1188 | |||
1189 | drbd_rs_controller_reset(mdev); | ||
1190 | |||
1191 | if (ns.conn == C_VERIFY_S) { | ||
1192 | dev_info(DEV, "Starting Online Verify from sector %llu\n", | ||
1193 | (unsigned long long)mdev->ov_position); | ||
1194 | mod_timer(&mdev->resync_timer, jiffies); | ||
1195 | } | ||
1196 | } | ||
1197 | |||
1198 | if (get_ldev(mdev)) { | ||
1199 | u32 mdf = mdev->ldev->md.flags & ~(MDF_CONSISTENT|MDF_PRIMARY_IND| | ||
1200 | MDF_CONNECTED_IND|MDF_WAS_UP_TO_DATE| | ||
1201 | MDF_PEER_OUT_DATED|MDF_CRASHED_PRIMARY); | ||
1202 | |||
1203 | if (test_bit(CRASHED_PRIMARY, &mdev->flags)) | ||
1204 | mdf |= MDF_CRASHED_PRIMARY; | ||
1205 | if (mdev->state.role == R_PRIMARY || | ||
1206 | (mdev->state.pdsk < D_INCONSISTENT && mdev->state.peer == R_PRIMARY)) | ||
1207 | mdf |= MDF_PRIMARY_IND; | ||
1208 | if (mdev->state.conn > C_WF_REPORT_PARAMS) | ||
1209 | mdf |= MDF_CONNECTED_IND; | ||
1210 | if (mdev->state.disk > D_INCONSISTENT) | ||
1211 | mdf |= MDF_CONSISTENT; | ||
1212 | if (mdev->state.disk > D_OUTDATED) | ||
1213 | mdf |= MDF_WAS_UP_TO_DATE; | ||
1214 | if (mdev->state.pdsk <= D_OUTDATED && mdev->state.pdsk >= D_INCONSISTENT) | ||
1215 | mdf |= MDF_PEER_OUT_DATED; | ||
1216 | if (mdf != mdev->ldev->md.flags) { | ||
1217 | mdev->ldev->md.flags = mdf; | ||
1218 | drbd_md_mark_dirty(mdev); | ||
1219 | } | ||
1220 | if (os.disk < D_CONSISTENT && ns.disk >= D_CONSISTENT) | ||
1221 | drbd_set_ed_uuid(mdev, mdev->ldev->md.uuid[UI_CURRENT]); | ||
1222 | put_ldev(mdev); | ||
1223 | } | ||
1224 | |||
1225 | /* Peer was forced D_UP_TO_DATE & R_PRIMARY, consider to resync */ | ||
1226 | if (os.disk == D_INCONSISTENT && os.pdsk == D_INCONSISTENT && | ||
1227 | os.peer == R_SECONDARY && ns.peer == R_PRIMARY) | ||
1228 | set_bit(CONSIDER_RESYNC, &mdev->flags); | ||
1229 | |||
1230 | /* Receiver should clean up itself */ | ||
1231 | if (os.conn != C_DISCONNECTING && ns.conn == C_DISCONNECTING) | ||
1232 | drbd_thread_stop_nowait(&mdev->tconn->receiver); | ||
1233 | |||
1234 | /* Now the receiver finished cleaning up itself, it should die */ | ||
1235 | if (os.conn != C_STANDALONE && ns.conn == C_STANDALONE) | ||
1236 | drbd_thread_stop_nowait(&mdev->tconn->receiver); | ||
1237 | |||
1238 | /* Upon network failure, we need to restart the receiver. */ | ||
1239 | if (os.conn > C_TEAR_DOWN && | ||
1240 | ns.conn <= C_TEAR_DOWN && ns.conn >= C_TIMEOUT) | ||
1241 | drbd_thread_restart_nowait(&mdev->tconn->receiver); | ||
1242 | |||
1243 | /* Resume AL writing if we get a connection */ | ||
1244 | if (os.conn < C_CONNECTED && ns.conn >= C_CONNECTED) | ||
1245 | drbd_resume_al(mdev); | ||
1246 | |||
1247 | ascw = kmalloc(sizeof(*ascw), GFP_ATOMIC); | ||
1248 | if (ascw) { | ||
1249 | ascw->os = os; | ||
1250 | ascw->ns = ns; | ||
1251 | ascw->flags = flags; | ||
1252 | ascw->w.cb = w_after_state_ch; | ||
1253 | ascw->done = done; | ||
1254 | drbd_queue_work(&mdev->tconn->data.work, &ascw->w); | ||
1255 | } else { | ||
1256 | dev_warn(DEV, "Could not kmalloc an ascw\n"); | ||
1257 | } | ||
1258 | |||
1259 | return rv; | ||
1260 | } | ||
1261 | |||
1262 | static int w_after_state_ch(struct drbd_conf *mdev, struct drbd_work *w, int unused) | ||
1263 | { | ||
1264 | struct after_state_chg_work *ascw = | ||
1265 | container_of(w, struct after_state_chg_work, w); | ||
1266 | after_state_ch(mdev, ascw->os, ascw->ns, ascw->flags); | ||
1267 | if (ascw->flags & CS_WAIT_COMPLETE) { | ||
1268 | D_ASSERT(ascw->done != NULL); | ||
1269 | complete(ascw->done); | ||
1270 | } | ||
1271 | kfree(ascw); | ||
1272 | |||
1273 | return 1; | ||
1274 | } | ||
1275 | |||
1276 | static void abw_start_sync(struct drbd_conf *mdev, int rv) | ||
1277 | { | ||
1278 | if (rv) { | ||
1279 | dev_err(DEV, "Writing the bitmap failed not starting resync.\n"); | ||
1280 | _drbd_request_state(mdev, NS(conn, C_CONNECTED), CS_VERBOSE); | ||
1281 | return; | ||
1282 | } | ||
1283 | |||
1284 | switch (mdev->state.conn) { | ||
1285 | case C_STARTING_SYNC_T: | ||
1286 | _drbd_request_state(mdev, NS(conn, C_WF_SYNC_UUID), CS_VERBOSE); | ||
1287 | break; | ||
1288 | case C_STARTING_SYNC_S: | ||
1289 | drbd_start_resync(mdev, C_SYNC_SOURCE); | ||
1290 | break; | ||
1291 | } | ||
1292 | } | ||
1293 | |||
1294 | int drbd_bitmap_io_from_worker(struct drbd_conf *mdev, | ||
1295 | int (*io_fn)(struct drbd_conf *), | ||
1296 | char *why, enum bm_flag flags) | ||
1297 | { | ||
1298 | int rv; | ||
1299 | |||
1300 | D_ASSERT(current == mdev->tconn->worker.task); | ||
1301 | |||
1302 | /* open coded non-blocking drbd_suspend_io(mdev); */ | ||
1303 | set_bit(SUSPEND_IO, &mdev->flags); | ||
1304 | |||
1305 | drbd_bm_lock(mdev, why, flags); | ||
1306 | rv = io_fn(mdev); | ||
1307 | drbd_bm_unlock(mdev); | ||
1308 | |||
1309 | drbd_resume_io(mdev); | ||
1310 | |||
1311 | return rv; | ||
1312 | } | ||
1313 | |||
1314 | /** | ||
1315 | * after_state_ch() - Perform after state change actions that may sleep | ||
1316 | * @mdev: DRBD device. | ||
1317 | * @os: old state. | ||
1318 | * @ns: new state. | ||
1319 | * @flags: Flags | ||
1320 | */ | ||
1321 | static void after_state_ch(struct drbd_conf *mdev, union drbd_state os, | ||
1322 | union drbd_state ns, enum chg_state_flags flags) | ||
1323 | { | ||
1324 | enum drbd_fencing_p fp; | ||
1325 | enum drbd_req_event what = NOTHING; | ||
1326 | union drbd_state nsm = (union drbd_state){ .i = -1 }; | ||
1327 | |||
1328 | if (os.conn != C_CONNECTED && ns.conn == C_CONNECTED) { | ||
1329 | clear_bit(CRASHED_PRIMARY, &mdev->flags); | ||
1330 | if (mdev->p_uuid) | ||
1331 | mdev->p_uuid[UI_FLAGS] &= ~((u64)2); | ||
1332 | } | ||
1333 | |||
1334 | fp = FP_DONT_CARE; | ||
1335 | if (get_ldev(mdev)) { | ||
1336 | fp = mdev->ldev->dc.fencing; | ||
1337 | put_ldev(mdev); | ||
1338 | } | ||
1339 | |||
1340 | /* Inform userspace about the change... */ | ||
1341 | drbd_bcast_state(mdev, ns); | ||
1342 | |||
1343 | if (!(os.role == R_PRIMARY && os.disk < D_UP_TO_DATE && os.pdsk < D_UP_TO_DATE) && | ||
1344 | (ns.role == R_PRIMARY && ns.disk < D_UP_TO_DATE && ns.pdsk < D_UP_TO_DATE)) | ||
1345 | drbd_khelper(mdev, "pri-on-incon-degr"); | ||
1346 | |||
1347 | /* Here we have the actions that are performed after a | ||
1348 | state change. This function might sleep */ | ||
1349 | |||
1350 | nsm.i = -1; | ||
1351 | if (ns.susp_nod) { | ||
1352 | if (os.conn < C_CONNECTED && ns.conn >= C_CONNECTED) | ||
1353 | what = RESEND; | ||
1354 | |||
1355 | if (os.disk == D_ATTACHING && ns.disk > D_ATTACHING) | ||
1356 | what = RESTART_FROZEN_DISK_IO; | ||
1357 | |||
1358 | if (what != NOTHING) | ||
1359 | nsm.susp_nod = 0; | ||
1360 | } | ||
1361 | |||
1362 | if (ns.susp_fen) { | ||
1363 | /* case1: The outdate peer handler is successful: */ | ||
1364 | if (os.pdsk > D_OUTDATED && ns.pdsk <= D_OUTDATED) { | ||
1365 | tl_clear(mdev); | ||
1366 | if (test_bit(NEW_CUR_UUID, &mdev->flags)) { | ||
1367 | drbd_uuid_new_current(mdev); | ||
1368 | clear_bit(NEW_CUR_UUID, &mdev->flags); | ||
1369 | } | ||
1370 | spin_lock_irq(&mdev->tconn->req_lock); | ||
1371 | _drbd_set_state(_NS(mdev, susp_fen, 0), CS_VERBOSE, NULL); | ||
1372 | spin_unlock_irq(&mdev->tconn->req_lock); | ||
1373 | } | ||
1374 | /* case2: The connection was established again: */ | ||
1375 | if (os.conn < C_CONNECTED && ns.conn >= C_CONNECTED) { | ||
1376 | clear_bit(NEW_CUR_UUID, &mdev->flags); | ||
1377 | what = RESEND; | ||
1378 | nsm.susp_fen = 0; | ||
1379 | } | ||
1380 | } | ||
1381 | |||
1382 | if (what != NOTHING) { | ||
1383 | spin_lock_irq(&mdev->tconn->req_lock); | ||
1384 | _tl_restart(mdev, what); | ||
1385 | nsm.i &= mdev->state.i; | ||
1386 | _drbd_set_state(mdev, nsm, CS_VERBOSE, NULL); | ||
1387 | spin_unlock_irq(&mdev->tconn->req_lock); | ||
1388 | } | ||
1389 | |||
1390 | /* Became sync source. With protocol >= 96, we still need to send out | ||
1391 | * the sync uuid now. Need to do that before any drbd_send_state, or | ||
1392 | * the other side may go "paused sync" before receiving the sync uuids, | ||
1393 | * which is unexpected. */ | ||
1394 | if ((os.conn != C_SYNC_SOURCE && os.conn != C_PAUSED_SYNC_S) && | ||
1395 | (ns.conn == C_SYNC_SOURCE || ns.conn == C_PAUSED_SYNC_S) && | ||
1396 | mdev->tconn->agreed_pro_version >= 96 && get_ldev(mdev)) { | ||
1397 | drbd_gen_and_send_sync_uuid(mdev); | ||
1398 | put_ldev(mdev); | ||
1399 | } | ||
1400 | |||
1401 | /* Do not change the order of the if above and the two below... */ | ||
1402 | if (os.pdsk == D_DISKLESS && ns.pdsk > D_DISKLESS) { /* attach on the peer */ | ||
1403 | drbd_send_uuids(mdev); | ||
1404 | drbd_send_state(mdev); | ||
1405 | } | ||
1406 | /* No point in queuing send_bitmap if we don't have a connection | ||
1407 | * anymore, so check also the _current_ state, not only the new state | ||
1408 | * at the time this work was queued. */ | ||
1409 | if (os.conn != C_WF_BITMAP_S && ns.conn == C_WF_BITMAP_S && | ||
1410 | mdev->state.conn == C_WF_BITMAP_S) | ||
1411 | drbd_queue_bitmap_io(mdev, &drbd_send_bitmap, NULL, | ||
1412 | "send_bitmap (WFBitMapS)", | ||
1413 | BM_LOCKED_TEST_ALLOWED); | ||
1414 | |||
1415 | /* Lost contact to peer's copy of the data */ | ||
1416 | if ((os.pdsk >= D_INCONSISTENT && | ||
1417 | os.pdsk != D_UNKNOWN && | ||
1418 | os.pdsk != D_OUTDATED) | ||
1419 | && (ns.pdsk < D_INCONSISTENT || | ||
1420 | ns.pdsk == D_UNKNOWN || | ||
1421 | ns.pdsk == D_OUTDATED)) { | ||
1422 | if (get_ldev(mdev)) { | ||
1423 | if ((ns.role == R_PRIMARY || ns.peer == R_PRIMARY) && | ||
1424 | mdev->ldev->md.uuid[UI_BITMAP] == 0 && ns.disk >= D_UP_TO_DATE) { | ||
1425 | if (is_susp(mdev->state)) { | ||
1426 | set_bit(NEW_CUR_UUID, &mdev->flags); | ||
1427 | } else { | ||
1428 | drbd_uuid_new_current(mdev); | ||
1429 | drbd_send_uuids(mdev); | ||
1430 | } | ||
1431 | } | ||
1432 | put_ldev(mdev); | ||
1433 | } | ||
1434 | } | ||
1435 | |||
1436 | if (ns.pdsk < D_INCONSISTENT && get_ldev(mdev)) { | ||
1437 | if (ns.peer == R_PRIMARY && mdev->ldev->md.uuid[UI_BITMAP] == 0) { | ||
1438 | drbd_uuid_new_current(mdev); | ||
1439 | drbd_send_uuids(mdev); | ||
1440 | } | ||
1441 | |||
1442 | /* D_DISKLESS Peer becomes secondary */ | ||
1443 | if (os.peer == R_PRIMARY && ns.peer == R_SECONDARY) | ||
1444 | /* We may still be Primary ourselves. | ||
1445 | * No harm done if the bitmap still changes, | ||
1446 | * redirtied pages will follow later. */ | ||
1447 | drbd_bitmap_io_from_worker(mdev, &drbd_bm_write, | ||
1448 | "demote diskless peer", BM_LOCKED_SET_ALLOWED); | ||
1449 | put_ldev(mdev); | ||
1450 | } | ||
1451 | |||
1452 | /* Write out all changed bits on demote. | ||
1453 | * Though, no need to da that just yet | ||
1454 | * if there is a resync going on still */ | ||
1455 | if (os.role == R_PRIMARY && ns.role == R_SECONDARY && | ||
1456 | mdev->state.conn <= C_CONNECTED && get_ldev(mdev)) { | ||
1457 | /* No changes to the bitmap expected this time, so assert that, | ||
1458 | * even though no harm was done if it did change. */ | ||
1459 | drbd_bitmap_io_from_worker(mdev, &drbd_bm_write, | ||
1460 | "demote", BM_LOCKED_TEST_ALLOWED); | ||
1461 | put_ldev(mdev); | ||
1462 | } | ||
1463 | |||
1464 | /* Last part of the attaching process ... */ | ||
1465 | if (ns.conn >= C_CONNECTED && | ||
1466 | os.disk == D_ATTACHING && ns.disk == D_NEGOTIATING) { | ||
1467 | drbd_send_sizes(mdev, 0, 0); /* to start sync... */ | ||
1468 | drbd_send_uuids(mdev); | ||
1469 | drbd_send_state(mdev); | ||
1470 | } | ||
1471 | |||
1472 | /* We want to pause/continue resync, tell peer. */ | ||
1473 | if (ns.conn >= C_CONNECTED && | ||
1474 | ((os.aftr_isp != ns.aftr_isp) || | ||
1475 | (os.user_isp != ns.user_isp))) | ||
1476 | drbd_send_state(mdev); | ||
1477 | |||
1478 | /* In case one of the isp bits got set, suspend other devices. */ | ||
1479 | if ((!os.aftr_isp && !os.peer_isp && !os.user_isp) && | ||
1480 | (ns.aftr_isp || ns.peer_isp || ns.user_isp)) | ||
1481 | suspend_other_sg(mdev); | ||
1482 | |||
1483 | /* Make sure the peer gets informed about eventual state | ||
1484 | changes (ISP bits) while we were in WFReportParams. */ | ||
1485 | if (os.conn == C_WF_REPORT_PARAMS && ns.conn >= C_CONNECTED) | ||
1486 | drbd_send_state(mdev); | ||
1487 | |||
1488 | if (os.conn != C_AHEAD && ns.conn == C_AHEAD) | ||
1489 | drbd_send_state(mdev); | ||
1490 | |||
1491 | /* We are in the progress to start a full sync... */ | ||
1492 | if ((os.conn != C_STARTING_SYNC_T && ns.conn == C_STARTING_SYNC_T) || | ||
1493 | (os.conn != C_STARTING_SYNC_S && ns.conn == C_STARTING_SYNC_S)) | ||
1494 | /* no other bitmap changes expected during this phase */ | ||
1495 | drbd_queue_bitmap_io(mdev, | ||
1496 | &drbd_bmio_set_n_write, &abw_start_sync, | ||
1497 | "set_n_write from StartingSync", BM_LOCKED_TEST_ALLOWED); | ||
1498 | |||
1499 | /* We are invalidating our self... */ | ||
1500 | if (os.conn < C_CONNECTED && ns.conn < C_CONNECTED && | ||
1501 | os.disk > D_INCONSISTENT && ns.disk == D_INCONSISTENT) | ||
1502 | /* other bitmap operation expected during this phase */ | ||
1503 | drbd_queue_bitmap_io(mdev, &drbd_bmio_set_n_write, NULL, | ||
1504 | "set_n_write from invalidate", BM_LOCKED_MASK); | ||
1505 | |||
1506 | /* first half of local IO error, failure to attach, | ||
1507 | * or administrative detach */ | ||
1508 | if (os.disk != D_FAILED && ns.disk == D_FAILED) { | ||
1509 | enum drbd_io_error_p eh; | ||
1510 | int was_io_error; | ||
1511 | /* corresponding get_ldev was in __drbd_set_state, to serialize | ||
1512 | * our cleanup here with the transition to D_DISKLESS, | ||
1513 | * so it is safe to dreference ldev here. */ | ||
1514 | eh = mdev->ldev->dc.on_io_error; | ||
1515 | was_io_error = test_and_clear_bit(WAS_IO_ERROR, &mdev->flags); | ||
1516 | |||
1517 | /* current state still has to be D_FAILED, | ||
1518 | * there is only one way out: to D_DISKLESS, | ||
1519 | * and that may only happen after our put_ldev below. */ | ||
1520 | if (mdev->state.disk != D_FAILED) | ||
1521 | dev_err(DEV, | ||
1522 | "ASSERT FAILED: disk is %s during detach\n", | ||
1523 | drbd_disk_str(mdev->state.disk)); | ||
1524 | |||
1525 | if (drbd_send_state(mdev)) | ||
1526 | dev_warn(DEV, "Notified peer that I am detaching my disk\n"); | ||
1527 | else | ||
1528 | dev_err(DEV, "Sending state for detaching disk failed\n"); | ||
1529 | |||
1530 | drbd_rs_cancel_all(mdev); | ||
1531 | |||
1532 | /* In case we want to get something to stable storage still, | ||
1533 | * this may be the last chance. | ||
1534 | * Following put_ldev may transition to D_DISKLESS. */ | ||
1535 | drbd_md_sync(mdev); | ||
1536 | put_ldev(mdev); | ||
1537 | |||
1538 | if (was_io_error && eh == EP_CALL_HELPER) | ||
1539 | drbd_khelper(mdev, "local-io-error"); | ||
1540 | } | ||
1541 | |||
1542 | /* second half of local IO error, failure to attach, | ||
1543 | * or administrative detach, | ||
1544 | * after local_cnt references have reached zero again */ | ||
1545 | if (os.disk != D_DISKLESS && ns.disk == D_DISKLESS) { | ||
1546 | /* We must still be diskless, | ||
1547 | * re-attach has to be serialized with this! */ | ||
1548 | if (mdev->state.disk != D_DISKLESS) | ||
1549 | dev_err(DEV, | ||
1550 | "ASSERT FAILED: disk is %s while going diskless\n", | ||
1551 | drbd_disk_str(mdev->state.disk)); | ||
1552 | |||
1553 | mdev->rs_total = 0; | ||
1554 | mdev->rs_failed = 0; | ||
1555 | atomic_set(&mdev->rs_pending_cnt, 0); | ||
1556 | |||
1557 | if (drbd_send_state(mdev)) | ||
1558 | dev_warn(DEV, "Notified peer that I'm now diskless.\n"); | ||
1559 | /* corresponding get_ldev in __drbd_set_state | ||
1560 | * this may finally trigger drbd_ldev_destroy. */ | ||
1561 | put_ldev(mdev); | ||
1562 | } | ||
1563 | |||
1564 | /* Notify peer that I had a local IO error, and did not detached.. */ | ||
1565 | if (os.disk == D_UP_TO_DATE && ns.disk == D_INCONSISTENT) | ||
1566 | drbd_send_state(mdev); | ||
1567 | |||
1568 | /* Disks got bigger while they were detached */ | ||
1569 | if (ns.disk > D_NEGOTIATING && ns.pdsk > D_NEGOTIATING && | ||
1570 | test_and_clear_bit(RESYNC_AFTER_NEG, &mdev->flags)) { | ||
1571 | if (ns.conn == C_CONNECTED) | ||
1572 | resync_after_online_grow(mdev); | ||
1573 | } | ||
1574 | |||
1575 | /* A resync finished or aborted, wake paused devices... */ | ||
1576 | if ((os.conn > C_CONNECTED && ns.conn <= C_CONNECTED) || | ||
1577 | (os.peer_isp && !ns.peer_isp) || | ||
1578 | (os.user_isp && !ns.user_isp)) | ||
1579 | resume_next_sg(mdev); | ||
1580 | |||
1581 | /* sync target done with resync. Explicitly notify peer, even though | ||
1582 | * it should (at least for non-empty resyncs) already know itself. */ | ||
1583 | if (os.disk < D_UP_TO_DATE && os.conn >= C_SYNC_SOURCE && ns.conn == C_CONNECTED) | ||
1584 | drbd_send_state(mdev); | ||
1585 | |||
1586 | /* This triggers bitmap writeout of potentially still unwritten pages | ||
1587 | * if the resync finished cleanly, or aborted because of peer disk | ||
1588 | * failure, or because of connection loss. | ||
1589 | * For resync aborted because of local disk failure, we cannot do | ||
1590 | * any bitmap writeout anymore. | ||
1591 | * No harm done if some bits change during this phase. | ||
1592 | */ | ||
1593 | if (os.conn > C_CONNECTED && ns.conn <= C_CONNECTED && get_ldev(mdev)) { | ||
1594 | drbd_queue_bitmap_io(mdev, &drbd_bm_write, NULL, | ||
1595 | "write from resync_finished", BM_LOCKED_SET_ALLOWED); | ||
1596 | put_ldev(mdev); | ||
1597 | } | ||
1598 | |||
1599 | /* Upon network connection, we need to start the receiver */ | ||
1600 | if (os.conn == C_STANDALONE && ns.conn == C_UNCONNECTED) | ||
1601 | drbd_thread_start(&mdev->tconn->receiver); | ||
1602 | |||
1603 | /* Terminate worker thread if we are unconfigured - it will be | ||
1604 | restarted as needed... */ | ||
1605 | if (ns.disk == D_DISKLESS && | ||
1606 | ns.conn == C_STANDALONE && | ||
1607 | ns.role == R_SECONDARY) { | ||
1608 | if (os.aftr_isp != ns.aftr_isp) | ||
1609 | resume_next_sg(mdev); | ||
1610 | /* set in __drbd_set_state, unless CONFIG_PENDING was set */ | ||
1611 | if (test_bit(DEVICE_DYING, &mdev->flags)) | ||
1612 | drbd_thread_stop_nowait(&mdev->tconn->worker); | ||
1613 | } | ||
1614 | |||
1615 | drbd_md_sync(mdev); | ||
1616 | } | ||
1617 | |||
1618 | |||
1619 | static int drbd_thread_setup(void *arg) | 442 | static int drbd_thread_setup(void *arg) |
1620 | { | 443 | { |
1621 | struct drbd_thread *thi = (struct drbd_thread *) arg; | 444 | struct drbd_thread *thi = (struct drbd_thread *) arg; |
diff --git a/drivers/block/drbd/drbd_state.c b/drivers/block/drbd/drbd_state.c new file mode 100644 index 000000000000..38d330b7b662 --- /dev/null +++ b/drivers/block/drbd/drbd_state.c | |||
@@ -0,0 +1,1217 @@ | |||
1 | /* | ||
2 | drbd_state.c | ||
3 | |||
4 | This file is part of DRBD by Philipp Reisner and Lars Ellenberg. | ||
5 | |||
6 | Copyright (C) 2001-2008, LINBIT Information Technologies GmbH. | ||
7 | Copyright (C) 1999-2008, Philipp Reisner <philipp.reisner@linbit.com>. | ||
8 | Copyright (C) 2002-2008, Lars Ellenberg <lars.ellenberg@linbit.com>. | ||
9 | |||
10 | Thanks to Carter Burden, Bart Grantham and Gennadiy Nerubayev | ||
11 | from Logicworks, Inc. for making SDP replication support possible. | ||
12 | |||
13 | drbd is free software; you can redistribute it and/or modify | ||
14 | it under the terms of the GNU General Public License as published by | ||
15 | the Free Software Foundation; either version 2, or (at your option) | ||
16 | any later version. | ||
17 | |||
18 | drbd is distributed in the hope that it will be useful, | ||
19 | but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
20 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
21 | GNU General Public License for more details. | ||
22 | |||
23 | You should have received a copy of the GNU General Public License | ||
24 | along with drbd; see the file COPYING. If not, write to | ||
25 | the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. | ||
26 | */ | ||
27 | |||
28 | #include <linux/drbd_limits.h> | ||
29 | #include "drbd_int.h" | ||
30 | #include "drbd_req.h" | ||
31 | |||
32 | struct after_state_chg_work { | ||
33 | struct drbd_work w; | ||
34 | union drbd_state os; | ||
35 | union drbd_state ns; | ||
36 | enum chg_state_flags flags; | ||
37 | struct completion *done; | ||
38 | }; | ||
39 | |||
40 | |||
41 | extern void _tl_restart(struct drbd_conf *mdev, enum drbd_req_event what); | ||
42 | int drbd_send_state_req(struct drbd_conf *, union drbd_state, union drbd_state); | ||
43 | static int w_after_state_ch(struct drbd_conf *mdev, struct drbd_work *w, int unused); | ||
44 | static void after_state_ch(struct drbd_conf *mdev, union drbd_state os, | ||
45 | union drbd_state ns, enum chg_state_flags flags); | ||
46 | static void after_conn_state_ch(struct drbd_tconn *tconn, union drbd_state os, | ||
47 | union drbd_state ns, enum chg_state_flags flags); | ||
48 | static enum drbd_state_rv is_valid_state(struct drbd_conf *, union drbd_state); | ||
49 | static enum drbd_state_rv is_valid_soft_transition(union drbd_state, union drbd_state); | ||
50 | static union drbd_state sanitize_state(struct drbd_conf *mdev, union drbd_state os, | ||
51 | union drbd_state ns, const char **warn_sync_abort); | ||
52 | |||
53 | /** | ||
54 | * cl_wide_st_chg() - true if the state change is a cluster wide one | ||
55 | * @mdev: DRBD device. | ||
56 | * @os: old (current) state. | ||
57 | * @ns: new (wanted) state. | ||
58 | */ | ||
59 | static int cl_wide_st_chg(struct drbd_conf *mdev, | ||
60 | union drbd_state os, union drbd_state ns) | ||
61 | { | ||
62 | return (os.conn >= C_CONNECTED && ns.conn >= C_CONNECTED && | ||
63 | ((os.role != R_PRIMARY && ns.role == R_PRIMARY) || | ||
64 | (os.conn != C_STARTING_SYNC_T && ns.conn == C_STARTING_SYNC_T) || | ||
65 | (os.conn != C_STARTING_SYNC_S && ns.conn == C_STARTING_SYNC_S) || | ||
66 | (os.disk != D_DISKLESS && ns.disk == D_DISKLESS))) || | ||
67 | (os.conn >= C_CONNECTED && ns.conn == C_DISCONNECTING) || | ||
68 | (os.conn == C_CONNECTED && ns.conn == C_VERIFY_S); | ||
69 | } | ||
70 | |||
71 | enum drbd_state_rv | ||
72 | drbd_change_state(struct drbd_conf *mdev, enum chg_state_flags f, | ||
73 | union drbd_state mask, union drbd_state val) | ||
74 | { | ||
75 | unsigned long flags; | ||
76 | union drbd_state os, ns; | ||
77 | enum drbd_state_rv rv; | ||
78 | |||
79 | spin_lock_irqsave(&mdev->tconn->req_lock, flags); | ||
80 | os = mdev->state; | ||
81 | ns.i = (os.i & ~mask.i) | val.i; | ||
82 | rv = _drbd_set_state(mdev, ns, f, NULL); | ||
83 | ns = mdev->state; | ||
84 | spin_unlock_irqrestore(&mdev->tconn->req_lock, flags); | ||
85 | |||
86 | return rv; | ||
87 | } | ||
88 | |||
89 | /** | ||
90 | * drbd_force_state() - Impose a change which happens outside our control on our state | ||
91 | * @mdev: DRBD device. | ||
92 | * @mask: mask of state bits to change. | ||
93 | * @val: value of new state bits. | ||
94 | */ | ||
95 | void drbd_force_state(struct drbd_conf *mdev, | ||
96 | union drbd_state mask, union drbd_state val) | ||
97 | { | ||
98 | drbd_change_state(mdev, CS_HARD, mask, val); | ||
99 | } | ||
100 | |||
101 | static enum drbd_state_rv | ||
102 | _req_st_cond(struct drbd_conf *mdev, union drbd_state mask, | ||
103 | union drbd_state val) | ||
104 | { | ||
105 | union drbd_state os, ns; | ||
106 | unsigned long flags; | ||
107 | enum drbd_state_rv rv; | ||
108 | |||
109 | if (test_and_clear_bit(CL_ST_CHG_SUCCESS, &mdev->flags)) | ||
110 | return SS_CW_SUCCESS; | ||
111 | |||
112 | if (test_and_clear_bit(CL_ST_CHG_FAIL, &mdev->flags)) | ||
113 | return SS_CW_FAILED_BY_PEER; | ||
114 | |||
115 | rv = 0; | ||
116 | spin_lock_irqsave(&mdev->tconn->req_lock, flags); | ||
117 | os = mdev->state; | ||
118 | ns.i = (os.i & ~mask.i) | val.i; | ||
119 | ns = sanitize_state(mdev, os, ns, NULL); | ||
120 | |||
121 | if (!cl_wide_st_chg(mdev, os, ns)) | ||
122 | rv = SS_CW_NO_NEED; | ||
123 | if (!rv) { | ||
124 | rv = is_valid_state(mdev, ns); | ||
125 | if (rv == SS_SUCCESS) { | ||
126 | rv = is_valid_soft_transition(os, ns); | ||
127 | if (rv == SS_SUCCESS) | ||
128 | rv = SS_UNKNOWN_ERROR; /* cont waiting, otherwise fail. */ | ||
129 | } | ||
130 | } | ||
131 | spin_unlock_irqrestore(&mdev->tconn->req_lock, flags); | ||
132 | |||
133 | return rv; | ||
134 | } | ||
135 | |||
136 | /** | ||
137 | * drbd_req_state() - Perform an eventually cluster wide state change | ||
138 | * @mdev: DRBD device. | ||
139 | * @mask: mask of state bits to change. | ||
140 | * @val: value of new state bits. | ||
141 | * @f: flags | ||
142 | * | ||
143 | * Should not be called directly, use drbd_request_state() or | ||
144 | * _drbd_request_state(). | ||
145 | */ | ||
146 | static enum drbd_state_rv | ||
147 | drbd_req_state(struct drbd_conf *mdev, union drbd_state mask, | ||
148 | union drbd_state val, enum chg_state_flags f) | ||
149 | { | ||
150 | struct completion done; | ||
151 | unsigned long flags; | ||
152 | union drbd_state os, ns; | ||
153 | enum drbd_state_rv rv; | ||
154 | |||
155 | init_completion(&done); | ||
156 | |||
157 | if (f & CS_SERIALIZE) | ||
158 | mutex_lock(&mdev->state_mutex); | ||
159 | |||
160 | spin_lock_irqsave(&mdev->tconn->req_lock, flags); | ||
161 | os = mdev->state; | ||
162 | ns.i = (os.i & ~mask.i) | val.i; | ||
163 | |||
164 | ns = sanitize_state(mdev, os, ns, NULL); | ||
165 | |||
166 | if (cl_wide_st_chg(mdev, os, ns)) { | ||
167 | rv = is_valid_state(mdev, ns); | ||
168 | if (rv == SS_SUCCESS) | ||
169 | rv = is_valid_soft_transition(os, ns); | ||
170 | spin_unlock_irqrestore(&mdev->tconn->req_lock, flags); | ||
171 | |||
172 | if (rv < SS_SUCCESS) { | ||
173 | if (f & CS_VERBOSE) | ||
174 | print_st_err(mdev, os, ns, rv); | ||
175 | goto abort; | ||
176 | } | ||
177 | |||
178 | drbd_state_lock(mdev); | ||
179 | if (!drbd_send_state_req(mdev, mask, val)) { | ||
180 | drbd_state_unlock(mdev); | ||
181 | rv = SS_CW_FAILED_BY_PEER; | ||
182 | if (f & CS_VERBOSE) | ||
183 | print_st_err(mdev, os, ns, rv); | ||
184 | goto abort; | ||
185 | } | ||
186 | |||
187 | wait_event(mdev->state_wait, | ||
188 | (rv = _req_st_cond(mdev, mask, val))); | ||
189 | |||
190 | if (rv < SS_SUCCESS) { | ||
191 | drbd_state_unlock(mdev); | ||
192 | if (f & CS_VERBOSE) | ||
193 | print_st_err(mdev, os, ns, rv); | ||
194 | goto abort; | ||
195 | } | ||
196 | spin_lock_irqsave(&mdev->tconn->req_lock, flags); | ||
197 | os = mdev->state; | ||
198 | ns.i = (os.i & ~mask.i) | val.i; | ||
199 | rv = _drbd_set_state(mdev, ns, f, &done); | ||
200 | drbd_state_unlock(mdev); | ||
201 | } else { | ||
202 | rv = _drbd_set_state(mdev, ns, f, &done); | ||
203 | } | ||
204 | |||
205 | spin_unlock_irqrestore(&mdev->tconn->req_lock, flags); | ||
206 | |||
207 | if (f & CS_WAIT_COMPLETE && rv == SS_SUCCESS) { | ||
208 | D_ASSERT(current != mdev->tconn->worker.task); | ||
209 | wait_for_completion(&done); | ||
210 | } | ||
211 | |||
212 | abort: | ||
213 | if (f & CS_SERIALIZE) | ||
214 | mutex_unlock(&mdev->state_mutex); | ||
215 | |||
216 | return rv; | ||
217 | } | ||
218 | |||
219 | /** | ||
220 | * _drbd_request_state() - Request a state change (with flags) | ||
221 | * @mdev: DRBD device. | ||
222 | * @mask: mask of state bits to change. | ||
223 | * @val: value of new state bits. | ||
224 | * @f: flags | ||
225 | * | ||
226 | * Cousin of drbd_request_state(), useful with the CS_WAIT_COMPLETE | ||
227 | * flag, or when logging of failed state change requests is not desired. | ||
228 | */ | ||
229 | enum drbd_state_rv | ||
230 | _drbd_request_state(struct drbd_conf *mdev, union drbd_state mask, | ||
231 | union drbd_state val, enum chg_state_flags f) | ||
232 | { | ||
233 | enum drbd_state_rv rv; | ||
234 | |||
235 | wait_event(mdev->state_wait, | ||
236 | (rv = drbd_req_state(mdev, mask, val, f)) != SS_IN_TRANSIENT_STATE); | ||
237 | |||
238 | return rv; | ||
239 | } | ||
240 | |||
241 | static void print_st(struct drbd_conf *mdev, char *name, union drbd_state ns) | ||
242 | { | ||
243 | dev_err(DEV, " %s = { cs:%s ro:%s/%s ds:%s/%s %c%c%c%c%c%c }\n", | ||
244 | name, | ||
245 | drbd_conn_str(ns.conn), | ||
246 | drbd_role_str(ns.role), | ||
247 | drbd_role_str(ns.peer), | ||
248 | drbd_disk_str(ns.disk), | ||
249 | drbd_disk_str(ns.pdsk), | ||
250 | is_susp(ns) ? 's' : 'r', | ||
251 | ns.aftr_isp ? 'a' : '-', | ||
252 | ns.peer_isp ? 'p' : '-', | ||
253 | ns.user_isp ? 'u' : '-', | ||
254 | ns.susp_fen ? 'F' : '-', | ||
255 | ns.susp_nod ? 'N' : '-' | ||
256 | ); | ||
257 | } | ||
258 | |||
259 | void print_st_err(struct drbd_conf *mdev, union drbd_state os, | ||
260 | union drbd_state ns, enum drbd_state_rv err) | ||
261 | { | ||
262 | if (err == SS_IN_TRANSIENT_STATE) | ||
263 | return; | ||
264 | dev_err(DEV, "State change failed: %s\n", drbd_set_st_err_str(err)); | ||
265 | print_st(mdev, " state", os); | ||
266 | print_st(mdev, "wanted", ns); | ||
267 | } | ||
268 | |||
269 | |||
270 | /** | ||
271 | * is_valid_state() - Returns an SS_ error code if ns is not valid | ||
272 | * @mdev: DRBD device. | ||
273 | * @ns: State to consider. | ||
274 | */ | ||
275 | static enum drbd_state_rv | ||
276 | is_valid_state(struct drbd_conf *mdev, union drbd_state ns) | ||
277 | { | ||
278 | /* See drbd_state_sw_errors in drbd_strings.c */ | ||
279 | |||
280 | enum drbd_fencing_p fp; | ||
281 | enum drbd_state_rv rv = SS_SUCCESS; | ||
282 | |||
283 | fp = FP_DONT_CARE; | ||
284 | if (get_ldev(mdev)) { | ||
285 | fp = mdev->ldev->dc.fencing; | ||
286 | put_ldev(mdev); | ||
287 | } | ||
288 | |||
289 | if (get_net_conf(mdev->tconn)) { | ||
290 | if (!mdev->tconn->net_conf->two_primaries && | ||
291 | ns.role == R_PRIMARY && ns.peer == R_PRIMARY) | ||
292 | rv = SS_TWO_PRIMARIES; | ||
293 | put_net_conf(mdev->tconn); | ||
294 | } | ||
295 | |||
296 | if (rv <= 0) | ||
297 | /* already found a reason to abort */; | ||
298 | else if (ns.role == R_SECONDARY && mdev->open_cnt) | ||
299 | rv = SS_DEVICE_IN_USE; | ||
300 | |||
301 | else if (ns.role == R_PRIMARY && ns.conn < C_CONNECTED && ns.disk < D_UP_TO_DATE) | ||
302 | rv = SS_NO_UP_TO_DATE_DISK; | ||
303 | |||
304 | else if (fp >= FP_RESOURCE && | ||
305 | ns.role == R_PRIMARY && ns.conn < C_CONNECTED && ns.pdsk >= D_UNKNOWN) | ||
306 | rv = SS_PRIMARY_NOP; | ||
307 | |||
308 | else if (ns.role == R_PRIMARY && ns.disk <= D_INCONSISTENT && ns.pdsk <= D_INCONSISTENT) | ||
309 | rv = SS_NO_UP_TO_DATE_DISK; | ||
310 | |||
311 | else if (ns.conn > C_CONNECTED && ns.disk < D_INCONSISTENT) | ||
312 | rv = SS_NO_LOCAL_DISK; | ||
313 | |||
314 | else if (ns.conn > C_CONNECTED && ns.pdsk < D_INCONSISTENT) | ||
315 | rv = SS_NO_REMOTE_DISK; | ||
316 | |||
317 | else if (ns.conn > C_CONNECTED && ns.disk < D_UP_TO_DATE && ns.pdsk < D_UP_TO_DATE) | ||
318 | rv = SS_NO_UP_TO_DATE_DISK; | ||
319 | |||
320 | else if ((ns.conn == C_CONNECTED || | ||
321 | ns.conn == C_WF_BITMAP_S || | ||
322 | ns.conn == C_SYNC_SOURCE || | ||
323 | ns.conn == C_PAUSED_SYNC_S) && | ||
324 | ns.disk == D_OUTDATED) | ||
325 | rv = SS_CONNECTED_OUTDATES; | ||
326 | |||
327 | else if ((ns.conn == C_VERIFY_S || ns.conn == C_VERIFY_T) && | ||
328 | (mdev->sync_conf.verify_alg[0] == 0)) | ||
329 | rv = SS_NO_VERIFY_ALG; | ||
330 | |||
331 | else if ((ns.conn == C_VERIFY_S || ns.conn == C_VERIFY_T) && | ||
332 | mdev->tconn->agreed_pro_version < 88) | ||
333 | rv = SS_NOT_SUPPORTED; | ||
334 | |||
335 | else if (ns.conn >= C_CONNECTED && ns.pdsk == D_UNKNOWN) | ||
336 | rv = SS_CONNECTED_OUTDATES; | ||
337 | |||
338 | return rv; | ||
339 | } | ||
340 | |||
341 | /** | ||
342 | * is_valid_soft_transition() - Returns an SS_ error code if the state transition is not possible | ||
343 | * @mdev: DRBD device. | ||
344 | * @ns: new state. | ||
345 | * @os: old state. | ||
346 | */ | ||
347 | static enum drbd_state_rv | ||
348 | is_valid_soft_transition(union drbd_state os, union drbd_state ns) | ||
349 | { | ||
350 | enum drbd_state_rv rv = SS_SUCCESS; | ||
351 | |||
352 | if ((ns.conn == C_STARTING_SYNC_T || ns.conn == C_STARTING_SYNC_S) && | ||
353 | os.conn > C_CONNECTED) | ||
354 | rv = SS_RESYNC_RUNNING; | ||
355 | |||
356 | if (ns.conn == C_DISCONNECTING && os.conn == C_STANDALONE) | ||
357 | rv = SS_ALREADY_STANDALONE; | ||
358 | |||
359 | if (ns.disk > D_ATTACHING && os.disk == D_DISKLESS) | ||
360 | rv = SS_IS_DISKLESS; | ||
361 | |||
362 | if (ns.conn == C_WF_CONNECTION && os.conn < C_UNCONNECTED) | ||
363 | rv = SS_NO_NET_CONFIG; | ||
364 | |||
365 | if (ns.disk == D_OUTDATED && os.disk < D_OUTDATED && os.disk != D_ATTACHING) | ||
366 | rv = SS_LOWER_THAN_OUTDATED; | ||
367 | |||
368 | if (ns.conn == C_DISCONNECTING && os.conn == C_UNCONNECTED) | ||
369 | rv = SS_IN_TRANSIENT_STATE; | ||
370 | |||
371 | if (ns.conn == os.conn && ns.conn == C_WF_REPORT_PARAMS) | ||
372 | rv = SS_IN_TRANSIENT_STATE; | ||
373 | |||
374 | if ((ns.conn == C_VERIFY_S || ns.conn == C_VERIFY_T) && os.conn < C_CONNECTED) | ||
375 | rv = SS_NEED_CONNECTION; | ||
376 | |||
377 | if ((ns.conn == C_VERIFY_S || ns.conn == C_VERIFY_T) && | ||
378 | ns.conn != os.conn && os.conn > C_CONNECTED) | ||
379 | rv = SS_RESYNC_RUNNING; | ||
380 | |||
381 | if ((ns.conn == C_STARTING_SYNC_S || ns.conn == C_STARTING_SYNC_T) && | ||
382 | os.conn < C_CONNECTED) | ||
383 | rv = SS_NEED_CONNECTION; | ||
384 | |||
385 | if ((ns.conn == C_SYNC_TARGET || ns.conn == C_SYNC_SOURCE) | ||
386 | && os.conn < C_WF_REPORT_PARAMS) | ||
387 | rv = SS_NEED_CONNECTION; /* No NetworkFailure -> SyncTarget etc... */ | ||
388 | |||
389 | return rv; | ||
390 | } | ||
391 | |||
392 | /** | ||
393 | * sanitize_state() - Resolves implicitly necessary additional changes to a state transition | ||
394 | * @mdev: DRBD device. | ||
395 | * @os: old state. | ||
396 | * @ns: new state. | ||
397 | * @warn_sync_abort: | ||
398 | * | ||
399 | * When we loose connection, we have to set the state of the peers disk (pdsk) | ||
400 | * to D_UNKNOWN. This rule and many more along those lines are in this function. | ||
401 | */ | ||
402 | static union drbd_state sanitize_state(struct drbd_conf *mdev, union drbd_state os, | ||
403 | union drbd_state ns, const char **warn_sync_abort) | ||
404 | { | ||
405 | enum drbd_fencing_p fp; | ||
406 | enum drbd_disk_state disk_min, disk_max, pdsk_min, pdsk_max; | ||
407 | |||
408 | fp = FP_DONT_CARE; | ||
409 | if (get_ldev(mdev)) { | ||
410 | fp = mdev->ldev->dc.fencing; | ||
411 | put_ldev(mdev); | ||
412 | } | ||
413 | |||
414 | /* Disallow Network errors to configure a device's network part */ | ||
415 | if ((ns.conn >= C_TIMEOUT && ns.conn <= C_TEAR_DOWN) && | ||
416 | os.conn <= C_DISCONNECTING) | ||
417 | ns.conn = os.conn; | ||
418 | |||
419 | /* After a network error (+C_TEAR_DOWN) only C_UNCONNECTED or C_DISCONNECTING can follow. | ||
420 | * If you try to go into some Sync* state, that shall fail (elsewhere). */ | ||
421 | if (os.conn >= C_TIMEOUT && os.conn <= C_TEAR_DOWN && | ||
422 | ns.conn != C_UNCONNECTED && ns.conn != C_DISCONNECTING && ns.conn <= C_TEAR_DOWN) | ||
423 | ns.conn = os.conn; | ||
424 | |||
425 | /* we cannot fail (again) if we already detached */ | ||
426 | if (ns.disk == D_FAILED && os.disk == D_DISKLESS) | ||
427 | ns.disk = D_DISKLESS; | ||
428 | |||
429 | /* if we are only D_ATTACHING yet, | ||
430 | * we can (and should) go directly to D_DISKLESS. */ | ||
431 | if (ns.disk == D_FAILED && os.disk == D_ATTACHING) | ||
432 | ns.disk = D_DISKLESS; | ||
433 | |||
434 | /* After C_DISCONNECTING only C_STANDALONE may follow */ | ||
435 | if (os.conn == C_DISCONNECTING && ns.conn != C_STANDALONE) | ||
436 | ns.conn = os.conn; | ||
437 | |||
438 | if (ns.conn < C_CONNECTED) { | ||
439 | ns.peer_isp = 0; | ||
440 | ns.peer = R_UNKNOWN; | ||
441 | if (ns.pdsk > D_UNKNOWN || ns.pdsk < D_INCONSISTENT) | ||
442 | ns.pdsk = D_UNKNOWN; | ||
443 | } | ||
444 | |||
445 | /* Clear the aftr_isp when becoming unconfigured */ | ||
446 | if (ns.conn == C_STANDALONE && ns.disk == D_DISKLESS && ns.role == R_SECONDARY) | ||
447 | ns.aftr_isp = 0; | ||
448 | |||
449 | /* Abort resync if a disk fails/detaches */ | ||
450 | if (os.conn > C_CONNECTED && ns.conn > C_CONNECTED && | ||
451 | (ns.disk <= D_FAILED || ns.pdsk <= D_FAILED)) { | ||
452 | if (warn_sync_abort) | ||
453 | *warn_sync_abort = | ||
454 | os.conn == C_VERIFY_S || os.conn == C_VERIFY_T ? | ||
455 | "Online-verify" : "Resync"; | ||
456 | ns.conn = C_CONNECTED; | ||
457 | } | ||
458 | |||
459 | /* Connection breaks down before we finished "Negotiating" */ | ||
460 | if (ns.conn < C_CONNECTED && ns.disk == D_NEGOTIATING && | ||
461 | get_ldev_if_state(mdev, D_NEGOTIATING)) { | ||
462 | if (mdev->ed_uuid == mdev->ldev->md.uuid[UI_CURRENT]) { | ||
463 | ns.disk = mdev->new_state_tmp.disk; | ||
464 | ns.pdsk = mdev->new_state_tmp.pdsk; | ||
465 | } else { | ||
466 | dev_alert(DEV, "Connection lost while negotiating, no data!\n"); | ||
467 | ns.disk = D_DISKLESS; | ||
468 | ns.pdsk = D_UNKNOWN; | ||
469 | } | ||
470 | put_ldev(mdev); | ||
471 | } | ||
472 | |||
473 | /* D_CONSISTENT and D_OUTDATED vanish when we get connected */ | ||
474 | if (ns.conn >= C_CONNECTED && ns.conn < C_AHEAD) { | ||
475 | if (ns.disk == D_CONSISTENT || ns.disk == D_OUTDATED) | ||
476 | ns.disk = D_UP_TO_DATE; | ||
477 | if (ns.pdsk == D_CONSISTENT || ns.pdsk == D_OUTDATED) | ||
478 | ns.pdsk = D_UP_TO_DATE; | ||
479 | } | ||
480 | |||
481 | /* Implications of the connection stat on the disk states */ | ||
482 | disk_min = D_DISKLESS; | ||
483 | disk_max = D_UP_TO_DATE; | ||
484 | pdsk_min = D_INCONSISTENT; | ||
485 | pdsk_max = D_UNKNOWN; | ||
486 | switch ((enum drbd_conns)ns.conn) { | ||
487 | case C_WF_BITMAP_T: | ||
488 | case C_PAUSED_SYNC_T: | ||
489 | case C_STARTING_SYNC_T: | ||
490 | case C_WF_SYNC_UUID: | ||
491 | case C_BEHIND: | ||
492 | disk_min = D_INCONSISTENT; | ||
493 | disk_max = D_OUTDATED; | ||
494 | pdsk_min = D_UP_TO_DATE; | ||
495 | pdsk_max = D_UP_TO_DATE; | ||
496 | break; | ||
497 | case C_VERIFY_S: | ||
498 | case C_VERIFY_T: | ||
499 | disk_min = D_UP_TO_DATE; | ||
500 | disk_max = D_UP_TO_DATE; | ||
501 | pdsk_min = D_UP_TO_DATE; | ||
502 | pdsk_max = D_UP_TO_DATE; | ||
503 | break; | ||
504 | case C_CONNECTED: | ||
505 | disk_min = D_DISKLESS; | ||
506 | disk_max = D_UP_TO_DATE; | ||
507 | pdsk_min = D_DISKLESS; | ||
508 | pdsk_max = D_UP_TO_DATE; | ||
509 | break; | ||
510 | case C_WF_BITMAP_S: | ||
511 | case C_PAUSED_SYNC_S: | ||
512 | case C_STARTING_SYNC_S: | ||
513 | case C_AHEAD: | ||
514 | disk_min = D_UP_TO_DATE; | ||
515 | disk_max = D_UP_TO_DATE; | ||
516 | pdsk_min = D_INCONSISTENT; | ||
517 | pdsk_max = D_CONSISTENT; /* D_OUTDATED would be nice. But explicit outdate necessary*/ | ||
518 | break; | ||
519 | case C_SYNC_TARGET: | ||
520 | disk_min = D_INCONSISTENT; | ||
521 | disk_max = D_INCONSISTENT; | ||
522 | pdsk_min = D_UP_TO_DATE; | ||
523 | pdsk_max = D_UP_TO_DATE; | ||
524 | break; | ||
525 | case C_SYNC_SOURCE: | ||
526 | disk_min = D_UP_TO_DATE; | ||
527 | disk_max = D_UP_TO_DATE; | ||
528 | pdsk_min = D_INCONSISTENT; | ||
529 | pdsk_max = D_INCONSISTENT; | ||
530 | break; | ||
531 | case C_STANDALONE: | ||
532 | case C_DISCONNECTING: | ||
533 | case C_UNCONNECTED: | ||
534 | case C_TIMEOUT: | ||
535 | case C_BROKEN_PIPE: | ||
536 | case C_NETWORK_FAILURE: | ||
537 | case C_PROTOCOL_ERROR: | ||
538 | case C_TEAR_DOWN: | ||
539 | case C_WF_CONNECTION: | ||
540 | case C_WF_REPORT_PARAMS: | ||
541 | case C_MASK: | ||
542 | break; | ||
543 | } | ||
544 | if (ns.disk > disk_max) | ||
545 | ns.disk = disk_max; | ||
546 | |||
547 | if (ns.disk < disk_min) { | ||
548 | dev_warn(DEV, "Implicitly set disk from %s to %s\n", | ||
549 | drbd_disk_str(ns.disk), drbd_disk_str(disk_min)); | ||
550 | ns.disk = disk_min; | ||
551 | } | ||
552 | if (ns.pdsk > pdsk_max) | ||
553 | ns.pdsk = pdsk_max; | ||
554 | |||
555 | if (ns.pdsk < pdsk_min) { | ||
556 | dev_warn(DEV, "Implicitly set pdsk from %s to %s\n", | ||
557 | drbd_disk_str(ns.pdsk), drbd_disk_str(pdsk_min)); | ||
558 | ns.pdsk = pdsk_min; | ||
559 | } | ||
560 | |||
561 | if (fp == FP_STONITH && | ||
562 | (ns.role == R_PRIMARY && ns.conn < C_CONNECTED && ns.pdsk > D_OUTDATED) && | ||
563 | !(os.role == R_PRIMARY && os.conn < C_CONNECTED && os.pdsk > D_OUTDATED)) | ||
564 | ns.susp_fen = 1; /* Suspend IO while fence-peer handler runs (peer lost) */ | ||
565 | |||
566 | if (mdev->sync_conf.on_no_data == OND_SUSPEND_IO && | ||
567 | (ns.role == R_PRIMARY && ns.disk < D_UP_TO_DATE && ns.pdsk < D_UP_TO_DATE) && | ||
568 | !(os.role == R_PRIMARY && os.disk < D_UP_TO_DATE && os.pdsk < D_UP_TO_DATE)) | ||
569 | ns.susp_nod = 1; /* Suspend IO while no data available (no accessible data available) */ | ||
570 | |||
571 | if (ns.aftr_isp || ns.peer_isp || ns.user_isp) { | ||
572 | if (ns.conn == C_SYNC_SOURCE) | ||
573 | ns.conn = C_PAUSED_SYNC_S; | ||
574 | if (ns.conn == C_SYNC_TARGET) | ||
575 | ns.conn = C_PAUSED_SYNC_T; | ||
576 | } else { | ||
577 | if (ns.conn == C_PAUSED_SYNC_S) | ||
578 | ns.conn = C_SYNC_SOURCE; | ||
579 | if (ns.conn == C_PAUSED_SYNC_T) | ||
580 | ns.conn = C_SYNC_TARGET; | ||
581 | } | ||
582 | |||
583 | return ns; | ||
584 | } | ||
585 | |||
586 | void drbd_resume_al(struct drbd_conf *mdev) | ||
587 | { | ||
588 | if (test_and_clear_bit(AL_SUSPENDED, &mdev->flags)) | ||
589 | dev_info(DEV, "Resumed AL updates\n"); | ||
590 | } | ||
591 | |||
592 | /* helper for __drbd_set_state */ | ||
593 | static void set_ov_position(struct drbd_conf *mdev, enum drbd_conns cs) | ||
594 | { | ||
595 | if (mdev->tconn->agreed_pro_version < 90) | ||
596 | mdev->ov_start_sector = 0; | ||
597 | mdev->rs_total = drbd_bm_bits(mdev); | ||
598 | mdev->ov_position = 0; | ||
599 | if (cs == C_VERIFY_T) { | ||
600 | /* starting online verify from an arbitrary position | ||
601 | * does not fit well into the existing protocol. | ||
602 | * on C_VERIFY_T, we initialize ov_left and friends | ||
603 | * implicitly in receive_DataRequest once the | ||
604 | * first P_OV_REQUEST is received */ | ||
605 | mdev->ov_start_sector = ~(sector_t)0; | ||
606 | } else { | ||
607 | unsigned long bit = BM_SECT_TO_BIT(mdev->ov_start_sector); | ||
608 | if (bit >= mdev->rs_total) { | ||
609 | mdev->ov_start_sector = | ||
610 | BM_BIT_TO_SECT(mdev->rs_total - 1); | ||
611 | mdev->rs_total = 1; | ||
612 | } else | ||
613 | mdev->rs_total -= bit; | ||
614 | mdev->ov_position = mdev->ov_start_sector; | ||
615 | } | ||
616 | mdev->ov_left = mdev->rs_total; | ||
617 | } | ||
618 | |||
619 | /** | ||
620 | * __drbd_set_state() - Set a new DRBD state | ||
621 | * @mdev: DRBD device. | ||
622 | * @ns: new state. | ||
623 | * @flags: Flags | ||
624 | * @done: Optional completion, that will get completed after the after_state_ch() finished | ||
625 | * | ||
626 | * Caller needs to hold req_lock, and global_state_lock. Do not call directly. | ||
627 | */ | ||
628 | enum drbd_state_rv | ||
629 | __drbd_set_state(struct drbd_conf *mdev, union drbd_state ns, | ||
630 | enum chg_state_flags flags, struct completion *done) | ||
631 | { | ||
632 | union drbd_state os; | ||
633 | enum drbd_state_rv rv = SS_SUCCESS; | ||
634 | const char *warn_sync_abort = NULL; | ||
635 | struct after_state_chg_work *ascw; | ||
636 | |||
637 | os = mdev->state; | ||
638 | |||
639 | ns = sanitize_state(mdev, os, ns, &warn_sync_abort); | ||
640 | |||
641 | if (ns.i == os.i) | ||
642 | return SS_NOTHING_TO_DO; | ||
643 | |||
644 | if (!(flags & CS_HARD)) { | ||
645 | /* pre-state-change checks ; only look at ns */ | ||
646 | /* See drbd_state_sw_errors in drbd_strings.c */ | ||
647 | |||
648 | rv = is_valid_state(mdev, ns); | ||
649 | if (rv < SS_SUCCESS) { | ||
650 | /* If the old state was illegal as well, then let | ||
651 | this happen...*/ | ||
652 | |||
653 | if (is_valid_state(mdev, os) == rv) | ||
654 | rv = is_valid_soft_transition(os, ns); | ||
655 | } else | ||
656 | rv = is_valid_soft_transition(os, ns); | ||
657 | } | ||
658 | |||
659 | if (rv < SS_SUCCESS) { | ||
660 | if (flags & CS_VERBOSE) | ||
661 | print_st_err(mdev, os, ns, rv); | ||
662 | return rv; | ||
663 | } | ||
664 | |||
665 | if (warn_sync_abort) | ||
666 | dev_warn(DEV, "%s aborted.\n", warn_sync_abort); | ||
667 | |||
668 | { | ||
669 | char *pbp, pb[300]; | ||
670 | pbp = pb; | ||
671 | *pbp = 0; | ||
672 | if (ns.role != os.role) | ||
673 | pbp += sprintf(pbp, "role( %s -> %s ) ", | ||
674 | drbd_role_str(os.role), | ||
675 | drbd_role_str(ns.role)); | ||
676 | if (ns.peer != os.peer) | ||
677 | pbp += sprintf(pbp, "peer( %s -> %s ) ", | ||
678 | drbd_role_str(os.peer), | ||
679 | drbd_role_str(ns.peer)); | ||
680 | if (ns.conn != os.conn) | ||
681 | pbp += sprintf(pbp, "conn( %s -> %s ) ", | ||
682 | drbd_conn_str(os.conn), | ||
683 | drbd_conn_str(ns.conn)); | ||
684 | if (ns.disk != os.disk) | ||
685 | pbp += sprintf(pbp, "disk( %s -> %s ) ", | ||
686 | drbd_disk_str(os.disk), | ||
687 | drbd_disk_str(ns.disk)); | ||
688 | if (ns.pdsk != os.pdsk) | ||
689 | pbp += sprintf(pbp, "pdsk( %s -> %s ) ", | ||
690 | drbd_disk_str(os.pdsk), | ||
691 | drbd_disk_str(ns.pdsk)); | ||
692 | if (is_susp(ns) != is_susp(os)) | ||
693 | pbp += sprintf(pbp, "susp( %d -> %d ) ", | ||
694 | is_susp(os), | ||
695 | is_susp(ns)); | ||
696 | if (ns.aftr_isp != os.aftr_isp) | ||
697 | pbp += sprintf(pbp, "aftr_isp( %d -> %d ) ", | ||
698 | os.aftr_isp, | ||
699 | ns.aftr_isp); | ||
700 | if (ns.peer_isp != os.peer_isp) | ||
701 | pbp += sprintf(pbp, "peer_isp( %d -> %d ) ", | ||
702 | os.peer_isp, | ||
703 | ns.peer_isp); | ||
704 | if (ns.user_isp != os.user_isp) | ||
705 | pbp += sprintf(pbp, "user_isp( %d -> %d ) ", | ||
706 | os.user_isp, | ||
707 | ns.user_isp); | ||
708 | dev_info(DEV, "%s\n", pb); | ||
709 | } | ||
710 | |||
711 | /* solve the race between becoming unconfigured, | ||
712 | * worker doing the cleanup, and | ||
713 | * admin reconfiguring us: | ||
714 | * on (re)configure, first set CONFIG_PENDING, | ||
715 | * then wait for a potentially exiting worker, | ||
716 | * start the worker, and schedule one no_op. | ||
717 | * then proceed with configuration. | ||
718 | */ | ||
719 | if (ns.disk == D_DISKLESS && | ||
720 | ns.conn == C_STANDALONE && | ||
721 | ns.role == R_SECONDARY && | ||
722 | !test_and_set_bit(CONFIG_PENDING, &mdev->flags)) | ||
723 | set_bit(DEVICE_DYING, &mdev->flags); | ||
724 | |||
725 | /* if we are going -> D_FAILED or D_DISKLESS, grab one extra reference | ||
726 | * on the ldev here, to be sure the transition -> D_DISKLESS resp. | ||
727 | * drbd_ldev_destroy() won't happen before our corresponding | ||
728 | * after_state_ch works run, where we put_ldev again. */ | ||
729 | if ((os.disk != D_FAILED && ns.disk == D_FAILED) || | ||
730 | (os.disk != D_DISKLESS && ns.disk == D_DISKLESS)) | ||
731 | atomic_inc(&mdev->local_cnt); | ||
732 | |||
733 | mdev->state = ns; | ||
734 | |||
735 | if (os.disk == D_ATTACHING && ns.disk >= D_NEGOTIATING) | ||
736 | drbd_print_uuids(mdev, "attached to UUIDs"); | ||
737 | |||
738 | wake_up(&mdev->misc_wait); | ||
739 | wake_up(&mdev->state_wait); | ||
740 | |||
741 | /* aborted verify run. log the last position */ | ||
742 | if ((os.conn == C_VERIFY_S || os.conn == C_VERIFY_T) && | ||
743 | ns.conn < C_CONNECTED) { | ||
744 | mdev->ov_start_sector = | ||
745 | BM_BIT_TO_SECT(drbd_bm_bits(mdev) - mdev->ov_left); | ||
746 | dev_info(DEV, "Online Verify reached sector %llu\n", | ||
747 | (unsigned long long)mdev->ov_start_sector); | ||
748 | } | ||
749 | |||
750 | if ((os.conn == C_PAUSED_SYNC_T || os.conn == C_PAUSED_SYNC_S) && | ||
751 | (ns.conn == C_SYNC_TARGET || ns.conn == C_SYNC_SOURCE)) { | ||
752 | dev_info(DEV, "Syncer continues.\n"); | ||
753 | mdev->rs_paused += (long)jiffies | ||
754 | -(long)mdev->rs_mark_time[mdev->rs_last_mark]; | ||
755 | if (ns.conn == C_SYNC_TARGET) | ||
756 | mod_timer(&mdev->resync_timer, jiffies); | ||
757 | } | ||
758 | |||
759 | if ((os.conn == C_SYNC_TARGET || os.conn == C_SYNC_SOURCE) && | ||
760 | (ns.conn == C_PAUSED_SYNC_T || ns.conn == C_PAUSED_SYNC_S)) { | ||
761 | dev_info(DEV, "Resync suspended\n"); | ||
762 | mdev->rs_mark_time[mdev->rs_last_mark] = jiffies; | ||
763 | } | ||
764 | |||
765 | if (os.conn == C_CONNECTED && | ||
766 | (ns.conn == C_VERIFY_S || ns.conn == C_VERIFY_T)) { | ||
767 | unsigned long now = jiffies; | ||
768 | int i; | ||
769 | |||
770 | set_ov_position(mdev, ns.conn); | ||
771 | mdev->rs_start = now; | ||
772 | mdev->rs_last_events = 0; | ||
773 | mdev->rs_last_sect_ev = 0; | ||
774 | mdev->ov_last_oos_size = 0; | ||
775 | mdev->ov_last_oos_start = 0; | ||
776 | |||
777 | for (i = 0; i < DRBD_SYNC_MARKS; i++) { | ||
778 | mdev->rs_mark_left[i] = mdev->ov_left; | ||
779 | mdev->rs_mark_time[i] = now; | ||
780 | } | ||
781 | |||
782 | drbd_rs_controller_reset(mdev); | ||
783 | |||
784 | if (ns.conn == C_VERIFY_S) { | ||
785 | dev_info(DEV, "Starting Online Verify from sector %llu\n", | ||
786 | (unsigned long long)mdev->ov_position); | ||
787 | mod_timer(&mdev->resync_timer, jiffies); | ||
788 | } | ||
789 | } | ||
790 | |||
791 | if (get_ldev(mdev)) { | ||
792 | u32 mdf = mdev->ldev->md.flags & ~(MDF_CONSISTENT|MDF_PRIMARY_IND| | ||
793 | MDF_CONNECTED_IND|MDF_WAS_UP_TO_DATE| | ||
794 | MDF_PEER_OUT_DATED|MDF_CRASHED_PRIMARY); | ||
795 | |||
796 | if (test_bit(CRASHED_PRIMARY, &mdev->flags)) | ||
797 | mdf |= MDF_CRASHED_PRIMARY; | ||
798 | if (mdev->state.role == R_PRIMARY || | ||
799 | (mdev->state.pdsk < D_INCONSISTENT && mdev->state.peer == R_PRIMARY)) | ||
800 | mdf |= MDF_PRIMARY_IND; | ||
801 | if (mdev->state.conn > C_WF_REPORT_PARAMS) | ||
802 | mdf |= MDF_CONNECTED_IND; | ||
803 | if (mdev->state.disk > D_INCONSISTENT) | ||
804 | mdf |= MDF_CONSISTENT; | ||
805 | if (mdev->state.disk > D_OUTDATED) | ||
806 | mdf |= MDF_WAS_UP_TO_DATE; | ||
807 | if (mdev->state.pdsk <= D_OUTDATED && mdev->state.pdsk >= D_INCONSISTENT) | ||
808 | mdf |= MDF_PEER_OUT_DATED; | ||
809 | if (mdf != mdev->ldev->md.flags) { | ||
810 | mdev->ldev->md.flags = mdf; | ||
811 | drbd_md_mark_dirty(mdev); | ||
812 | } | ||
813 | if (os.disk < D_CONSISTENT && ns.disk >= D_CONSISTENT) | ||
814 | drbd_set_ed_uuid(mdev, mdev->ldev->md.uuid[UI_CURRENT]); | ||
815 | put_ldev(mdev); | ||
816 | } | ||
817 | |||
818 | /* Peer was forced D_UP_TO_DATE & R_PRIMARY, consider to resync */ | ||
819 | if (os.disk == D_INCONSISTENT && os.pdsk == D_INCONSISTENT && | ||
820 | os.peer == R_SECONDARY && ns.peer == R_PRIMARY) | ||
821 | set_bit(CONSIDER_RESYNC, &mdev->flags); | ||
822 | |||
823 | /* Receiver should clean up itself */ | ||
824 | if (os.conn != C_DISCONNECTING && ns.conn == C_DISCONNECTING) | ||
825 | drbd_thread_stop_nowait(&mdev->tconn->receiver); | ||
826 | |||
827 | /* Now the receiver finished cleaning up itself, it should die */ | ||
828 | if (os.conn != C_STANDALONE && ns.conn == C_STANDALONE) | ||
829 | drbd_thread_stop_nowait(&mdev->tconn->receiver); | ||
830 | |||
831 | /* Upon network failure, we need to restart the receiver. */ | ||
832 | if (os.conn > C_TEAR_DOWN && | ||
833 | ns.conn <= C_TEAR_DOWN && ns.conn >= C_TIMEOUT) | ||
834 | drbd_thread_restart_nowait(&mdev->tconn->receiver); | ||
835 | |||
836 | /* Resume AL writing if we get a connection */ | ||
837 | if (os.conn < C_CONNECTED && ns.conn >= C_CONNECTED) | ||
838 | drbd_resume_al(mdev); | ||
839 | |||
840 | ascw = kmalloc(sizeof(*ascw), GFP_ATOMIC); | ||
841 | if (ascw) { | ||
842 | ascw->os = os; | ||
843 | ascw->ns = ns; | ||
844 | ascw->flags = flags; | ||
845 | ascw->w.cb = w_after_state_ch; | ||
846 | ascw->done = done; | ||
847 | drbd_queue_work(&mdev->tconn->data.work, &ascw->w); | ||
848 | } else { | ||
849 | dev_warn(DEV, "Could not kmalloc an ascw\n"); | ||
850 | } | ||
851 | |||
852 | return rv; | ||
853 | } | ||
854 | |||
855 | static int w_after_state_ch(struct drbd_conf *mdev, struct drbd_work *w, int unused) | ||
856 | { | ||
857 | struct after_state_chg_work *ascw = | ||
858 | container_of(w, struct after_state_chg_work, w); | ||
859 | |||
860 | after_state_ch(mdev, ascw->os, ascw->ns, ascw->flags); | ||
861 | if (ascw->flags & CS_WAIT_COMPLETE) { | ||
862 | D_ASSERT(ascw->done != NULL); | ||
863 | complete(ascw->done); | ||
864 | } | ||
865 | kfree(ascw); | ||
866 | |||
867 | return 1; | ||
868 | } | ||
869 | |||
870 | static void abw_start_sync(struct drbd_conf *mdev, int rv) | ||
871 | { | ||
872 | if (rv) { | ||
873 | dev_err(DEV, "Writing the bitmap failed not starting resync.\n"); | ||
874 | _drbd_request_state(mdev, NS(conn, C_CONNECTED), CS_VERBOSE); | ||
875 | return; | ||
876 | } | ||
877 | |||
878 | switch (mdev->state.conn) { | ||
879 | case C_STARTING_SYNC_T: | ||
880 | _drbd_request_state(mdev, NS(conn, C_WF_SYNC_UUID), CS_VERBOSE); | ||
881 | break; | ||
882 | case C_STARTING_SYNC_S: | ||
883 | drbd_start_resync(mdev, C_SYNC_SOURCE); | ||
884 | break; | ||
885 | } | ||
886 | } | ||
887 | |||
888 | int drbd_bitmap_io_from_worker(struct drbd_conf *mdev, | ||
889 | int (*io_fn)(struct drbd_conf *), | ||
890 | char *why, enum bm_flag flags) | ||
891 | { | ||
892 | int rv; | ||
893 | |||
894 | D_ASSERT(current == mdev->tconn->worker.task); | ||
895 | |||
896 | /* open coded non-blocking drbd_suspend_io(mdev); */ | ||
897 | set_bit(SUSPEND_IO, &mdev->flags); | ||
898 | |||
899 | drbd_bm_lock(mdev, why, flags); | ||
900 | rv = io_fn(mdev); | ||
901 | drbd_bm_unlock(mdev); | ||
902 | |||
903 | drbd_resume_io(mdev); | ||
904 | |||
905 | return rv; | ||
906 | } | ||
907 | |||
908 | /** | ||
909 | * after_state_ch() - Perform after state change actions that may sleep | ||
910 | * @mdev: DRBD device. | ||
911 | * @os: old state. | ||
912 | * @ns: new state. | ||
913 | * @flags: Flags | ||
914 | */ | ||
915 | static void after_state_ch(struct drbd_conf *mdev, union drbd_state os, | ||
916 | union drbd_state ns, enum chg_state_flags flags) | ||
917 | { | ||
918 | enum drbd_fencing_p fp; | ||
919 | enum drbd_req_event what = NOTHING; | ||
920 | union drbd_state nsm = (union drbd_state){ .i = -1 }; | ||
921 | |||
922 | if (os.conn != C_CONNECTED && ns.conn == C_CONNECTED) { | ||
923 | clear_bit(CRASHED_PRIMARY, &mdev->flags); | ||
924 | if (mdev->p_uuid) | ||
925 | mdev->p_uuid[UI_FLAGS] &= ~((u64)2); | ||
926 | } | ||
927 | |||
928 | fp = FP_DONT_CARE; | ||
929 | if (get_ldev(mdev)) { | ||
930 | fp = mdev->ldev->dc.fencing; | ||
931 | put_ldev(mdev); | ||
932 | } | ||
933 | |||
934 | /* Inform userspace about the change... */ | ||
935 | drbd_bcast_state(mdev, ns); | ||
936 | |||
937 | if (!(os.role == R_PRIMARY && os.disk < D_UP_TO_DATE && os.pdsk < D_UP_TO_DATE) && | ||
938 | (ns.role == R_PRIMARY && ns.disk < D_UP_TO_DATE && ns.pdsk < D_UP_TO_DATE)) | ||
939 | drbd_khelper(mdev, "pri-on-incon-degr"); | ||
940 | |||
941 | /* Here we have the actions that are performed after a | ||
942 | state change. This function might sleep */ | ||
943 | |||
944 | nsm.i = -1; | ||
945 | if (ns.susp_nod) { | ||
946 | if (os.conn < C_CONNECTED && ns.conn >= C_CONNECTED) | ||
947 | what = RESEND; | ||
948 | |||
949 | if (os.disk == D_ATTACHING && ns.disk > D_ATTACHING) | ||
950 | what = RESTART_FROZEN_DISK_IO; | ||
951 | |||
952 | if (what != NOTHING) | ||
953 | nsm.susp_nod = 0; | ||
954 | } | ||
955 | |||
956 | if (ns.susp_fen) { | ||
957 | /* case1: The outdate peer handler is successful: */ | ||
958 | if (os.pdsk > D_OUTDATED && ns.pdsk <= D_OUTDATED) { | ||
959 | tl_clear(mdev); | ||
960 | if (test_bit(NEW_CUR_UUID, &mdev->flags)) { | ||
961 | drbd_uuid_new_current(mdev); | ||
962 | clear_bit(NEW_CUR_UUID, &mdev->flags); | ||
963 | } | ||
964 | spin_lock_irq(&mdev->tconn->req_lock); | ||
965 | _drbd_set_state(_NS(mdev, susp_fen, 0), CS_VERBOSE, NULL); | ||
966 | spin_unlock_irq(&mdev->tconn->req_lock); | ||
967 | } | ||
968 | /* case2: The connection was established again: */ | ||
969 | if (os.conn < C_CONNECTED && ns.conn >= C_CONNECTED) { | ||
970 | clear_bit(NEW_CUR_UUID, &mdev->flags); | ||
971 | what = RESEND; | ||
972 | nsm.susp_fen = 0; | ||
973 | } | ||
974 | } | ||
975 | |||
976 | if (what != NOTHING) { | ||
977 | spin_lock_irq(&mdev->tconn->req_lock); | ||
978 | _tl_restart(mdev, what); | ||
979 | nsm.i &= mdev->state.i; | ||
980 | _drbd_set_state(mdev, nsm, CS_VERBOSE, NULL); | ||
981 | spin_unlock_irq(&mdev->tconn->req_lock); | ||
982 | } | ||
983 | |||
984 | /* Became sync source. With protocol >= 96, we still need to send out | ||
985 | * the sync uuid now. Need to do that before any drbd_send_state, or | ||
986 | * the other side may go "paused sync" before receiving the sync uuids, | ||
987 | * which is unexpected. */ | ||
988 | if ((os.conn != C_SYNC_SOURCE && os.conn != C_PAUSED_SYNC_S) && | ||
989 | (ns.conn == C_SYNC_SOURCE || ns.conn == C_PAUSED_SYNC_S) && | ||
990 | mdev->tconn->agreed_pro_version >= 96 && get_ldev(mdev)) { | ||
991 | drbd_gen_and_send_sync_uuid(mdev); | ||
992 | put_ldev(mdev); | ||
993 | } | ||
994 | |||
995 | /* Do not change the order of the if above and the two below... */ | ||
996 | if (os.pdsk == D_DISKLESS && ns.pdsk > D_DISKLESS) { /* attach on the peer */ | ||
997 | drbd_send_uuids(mdev); | ||
998 | drbd_send_state(mdev); | ||
999 | } | ||
1000 | /* No point in queuing send_bitmap if we don't have a connection | ||
1001 | * anymore, so check also the _current_ state, not only the new state | ||
1002 | * at the time this work was queued. */ | ||
1003 | if (os.conn != C_WF_BITMAP_S && ns.conn == C_WF_BITMAP_S && | ||
1004 | mdev->state.conn == C_WF_BITMAP_S) | ||
1005 | drbd_queue_bitmap_io(mdev, &drbd_send_bitmap, NULL, | ||
1006 | "send_bitmap (WFBitMapS)", | ||
1007 | BM_LOCKED_TEST_ALLOWED); | ||
1008 | |||
1009 | /* Lost contact to peer's copy of the data */ | ||
1010 | if ((os.pdsk >= D_INCONSISTENT && | ||
1011 | os.pdsk != D_UNKNOWN && | ||
1012 | os.pdsk != D_OUTDATED) | ||
1013 | && (ns.pdsk < D_INCONSISTENT || | ||
1014 | ns.pdsk == D_UNKNOWN || | ||
1015 | ns.pdsk == D_OUTDATED)) { | ||
1016 | if (get_ldev(mdev)) { | ||
1017 | if ((ns.role == R_PRIMARY || ns.peer == R_PRIMARY) && | ||
1018 | mdev->ldev->md.uuid[UI_BITMAP] == 0 && ns.disk >= D_UP_TO_DATE) { | ||
1019 | if (is_susp(mdev->state)) { | ||
1020 | set_bit(NEW_CUR_UUID, &mdev->flags); | ||
1021 | } else { | ||
1022 | drbd_uuid_new_current(mdev); | ||
1023 | drbd_send_uuids(mdev); | ||
1024 | } | ||
1025 | } | ||
1026 | put_ldev(mdev); | ||
1027 | } | ||
1028 | } | ||
1029 | |||
1030 | if (ns.pdsk < D_INCONSISTENT && get_ldev(mdev)) { | ||
1031 | if (ns.peer == R_PRIMARY && mdev->ldev->md.uuid[UI_BITMAP] == 0) { | ||
1032 | drbd_uuid_new_current(mdev); | ||
1033 | drbd_send_uuids(mdev); | ||
1034 | } | ||
1035 | |||
1036 | /* D_DISKLESS Peer becomes secondary */ | ||
1037 | if (os.peer == R_PRIMARY && ns.peer == R_SECONDARY) | ||
1038 | /* We may still be Primary ourselves. | ||
1039 | * No harm done if the bitmap still changes, | ||
1040 | * redirtied pages will follow later. */ | ||
1041 | drbd_bitmap_io_from_worker(mdev, &drbd_bm_write, | ||
1042 | "demote diskless peer", BM_LOCKED_SET_ALLOWED); | ||
1043 | put_ldev(mdev); | ||
1044 | } | ||
1045 | |||
1046 | /* Write out all changed bits on demote. | ||
1047 | * Though, no need to da that just yet | ||
1048 | * if there is a resync going on still */ | ||
1049 | if (os.role == R_PRIMARY && ns.role == R_SECONDARY && | ||
1050 | mdev->state.conn <= C_CONNECTED && get_ldev(mdev)) { | ||
1051 | /* No changes to the bitmap expected this time, so assert that, | ||
1052 | * even though no harm was done if it did change. */ | ||
1053 | drbd_bitmap_io_from_worker(mdev, &drbd_bm_write, | ||
1054 | "demote", BM_LOCKED_TEST_ALLOWED); | ||
1055 | put_ldev(mdev); | ||
1056 | } | ||
1057 | |||
1058 | /* Last part of the attaching process ... */ | ||
1059 | if (ns.conn >= C_CONNECTED && | ||
1060 | os.disk == D_ATTACHING && ns.disk == D_NEGOTIATING) { | ||
1061 | drbd_send_sizes(mdev, 0, 0); /* to start sync... */ | ||
1062 | drbd_send_uuids(mdev); | ||
1063 | drbd_send_state(mdev); | ||
1064 | } | ||
1065 | |||
1066 | /* We want to pause/continue resync, tell peer. */ | ||
1067 | if (ns.conn >= C_CONNECTED && | ||
1068 | ((os.aftr_isp != ns.aftr_isp) || | ||
1069 | (os.user_isp != ns.user_isp))) | ||
1070 | drbd_send_state(mdev); | ||
1071 | |||
1072 | /* In case one of the isp bits got set, suspend other devices. */ | ||
1073 | if ((!os.aftr_isp && !os.peer_isp && !os.user_isp) && | ||
1074 | (ns.aftr_isp || ns.peer_isp || ns.user_isp)) | ||
1075 | suspend_other_sg(mdev); | ||
1076 | |||
1077 | /* Make sure the peer gets informed about eventual state | ||
1078 | changes (ISP bits) while we were in WFReportParams. */ | ||
1079 | if (os.conn == C_WF_REPORT_PARAMS && ns.conn >= C_CONNECTED) | ||
1080 | drbd_send_state(mdev); | ||
1081 | |||
1082 | if (os.conn != C_AHEAD && ns.conn == C_AHEAD) | ||
1083 | drbd_send_state(mdev); | ||
1084 | |||
1085 | /* We are in the progress to start a full sync... */ | ||
1086 | if ((os.conn != C_STARTING_SYNC_T && ns.conn == C_STARTING_SYNC_T) || | ||
1087 | (os.conn != C_STARTING_SYNC_S && ns.conn == C_STARTING_SYNC_S)) | ||
1088 | /* no other bitmap changes expected during this phase */ | ||
1089 | drbd_queue_bitmap_io(mdev, | ||
1090 | &drbd_bmio_set_n_write, &abw_start_sync, | ||
1091 | "set_n_write from StartingSync", BM_LOCKED_TEST_ALLOWED); | ||
1092 | |||
1093 | /* We are invalidating our self... */ | ||
1094 | if (os.conn < C_CONNECTED && ns.conn < C_CONNECTED && | ||
1095 | os.disk > D_INCONSISTENT && ns.disk == D_INCONSISTENT) | ||
1096 | /* other bitmap operation expected during this phase */ | ||
1097 | drbd_queue_bitmap_io(mdev, &drbd_bmio_set_n_write, NULL, | ||
1098 | "set_n_write from invalidate", BM_LOCKED_MASK); | ||
1099 | |||
1100 | /* first half of local IO error, failure to attach, | ||
1101 | * or administrative detach */ | ||
1102 | if (os.disk != D_FAILED && ns.disk == D_FAILED) { | ||
1103 | enum drbd_io_error_p eh; | ||
1104 | int was_io_error; | ||
1105 | /* corresponding get_ldev was in __drbd_set_state, to serialize | ||
1106 | * our cleanup here with the transition to D_DISKLESS, | ||
1107 | * so it is safe to dreference ldev here. */ | ||
1108 | eh = mdev->ldev->dc.on_io_error; | ||
1109 | was_io_error = test_and_clear_bit(WAS_IO_ERROR, &mdev->flags); | ||
1110 | |||
1111 | /* current state still has to be D_FAILED, | ||
1112 | * there is only one way out: to D_DISKLESS, | ||
1113 | * and that may only happen after our put_ldev below. */ | ||
1114 | if (mdev->state.disk != D_FAILED) | ||
1115 | dev_err(DEV, | ||
1116 | "ASSERT FAILED: disk is %s during detach\n", | ||
1117 | drbd_disk_str(mdev->state.disk)); | ||
1118 | |||
1119 | if (drbd_send_state(mdev)) | ||
1120 | dev_warn(DEV, "Notified peer that I am detaching my disk\n"); | ||
1121 | else | ||
1122 | dev_err(DEV, "Sending state for detaching disk failed\n"); | ||
1123 | |||
1124 | drbd_rs_cancel_all(mdev); | ||
1125 | |||
1126 | /* In case we want to get something to stable storage still, | ||
1127 | * this may be the last chance. | ||
1128 | * Following put_ldev may transition to D_DISKLESS. */ | ||
1129 | drbd_md_sync(mdev); | ||
1130 | put_ldev(mdev); | ||
1131 | |||
1132 | if (was_io_error && eh == EP_CALL_HELPER) | ||
1133 | drbd_khelper(mdev, "local-io-error"); | ||
1134 | } | ||
1135 | |||
1136 | /* second half of local IO error, failure to attach, | ||
1137 | * or administrative detach, | ||
1138 | * after local_cnt references have reached zero again */ | ||
1139 | if (os.disk != D_DISKLESS && ns.disk == D_DISKLESS) { | ||
1140 | /* We must still be diskless, | ||
1141 | * re-attach has to be serialized with this! */ | ||
1142 | if (mdev->state.disk != D_DISKLESS) | ||
1143 | dev_err(DEV, | ||
1144 | "ASSERT FAILED: disk is %s while going diskless\n", | ||
1145 | drbd_disk_str(mdev->state.disk)); | ||
1146 | |||
1147 | mdev->rs_total = 0; | ||
1148 | mdev->rs_failed = 0; | ||
1149 | atomic_set(&mdev->rs_pending_cnt, 0); | ||
1150 | |||
1151 | if (drbd_send_state(mdev)) | ||
1152 | dev_warn(DEV, "Notified peer that I'm now diskless.\n"); | ||
1153 | /* corresponding get_ldev in __drbd_set_state | ||
1154 | * this may finally trigger drbd_ldev_destroy. */ | ||
1155 | put_ldev(mdev); | ||
1156 | } | ||
1157 | |||
1158 | /* Notify peer that I had a local IO error, and did not detached.. */ | ||
1159 | if (os.disk == D_UP_TO_DATE && ns.disk == D_INCONSISTENT) | ||
1160 | drbd_send_state(mdev); | ||
1161 | |||
1162 | /* Disks got bigger while they were detached */ | ||
1163 | if (ns.disk > D_NEGOTIATING && ns.pdsk > D_NEGOTIATING && | ||
1164 | test_and_clear_bit(RESYNC_AFTER_NEG, &mdev->flags)) { | ||
1165 | if (ns.conn == C_CONNECTED) | ||
1166 | resync_after_online_grow(mdev); | ||
1167 | } | ||
1168 | |||
1169 | /* A resync finished or aborted, wake paused devices... */ | ||
1170 | if ((os.conn > C_CONNECTED && ns.conn <= C_CONNECTED) || | ||
1171 | (os.peer_isp && !ns.peer_isp) || | ||
1172 | (os.user_isp && !ns.user_isp)) | ||
1173 | resume_next_sg(mdev); | ||
1174 | |||
1175 | /* sync target done with resync. Explicitly notify peer, even though | ||
1176 | * it should (at least for non-empty resyncs) already know itself. */ | ||
1177 | if (os.disk < D_UP_TO_DATE && os.conn >= C_SYNC_SOURCE && ns.conn == C_CONNECTED) | ||
1178 | drbd_send_state(mdev); | ||
1179 | |||
1180 | /* This triggers bitmap writeout of potentially still unwritten pages | ||
1181 | * if the resync finished cleanly, or aborted because of peer disk | ||
1182 | * failure, or because of connection loss. | ||
1183 | * For resync aborted because of local disk failure, we cannot do | ||
1184 | * any bitmap writeout anymore. | ||
1185 | * No harm done if some bits change during this phase. | ||
1186 | */ | ||
1187 | if (os.conn > C_CONNECTED && ns.conn <= C_CONNECTED && get_ldev(mdev)) { | ||
1188 | drbd_queue_bitmap_io(mdev, &drbd_bm_write, NULL, | ||
1189 | "write from resync_finished", BM_LOCKED_SET_ALLOWED); | ||
1190 | put_ldev(mdev); | ||
1191 | } | ||
1192 | |||
1193 | if (ns.disk == D_DISKLESS && | ||
1194 | ns.conn == C_STANDALONE && | ||
1195 | ns.role == R_SECONDARY) { | ||
1196 | if (os.aftr_isp != ns.aftr_isp) | ||
1197 | resume_next_sg(mdev); | ||
1198 | } | ||
1199 | |||
1200 | after_conn_state_ch(mdev->tconn, os, ns, flags); | ||
1201 | drbd_md_sync(mdev); | ||
1202 | } | ||
1203 | |||
1204 | static void after_conn_state_ch(struct drbd_tconn *tconn, union drbd_state os, | ||
1205 | union drbd_state ns, enum chg_state_flags flags) | ||
1206 | { | ||
1207 | /* Upon network configuration, we need to start the receiver */ | ||
1208 | if (os.conn == C_STANDALONE && ns.conn == C_UNCONNECTED) | ||
1209 | drbd_thread_start(&tconn->receiver); | ||
1210 | |||
1211 | if (ns.disk == D_DISKLESS && | ||
1212 | ns.conn == C_STANDALONE && | ||
1213 | ns.role == R_SECONDARY) { | ||
1214 | /* if (test_bit(DEVICE_DYING, &mdev->flags)) TODO: DEVICE_DYING functionality */ | ||
1215 | drbd_thread_stop_nowait(&tconn->worker); | ||
1216 | } | ||
1217 | } | ||
diff --git a/drivers/block/drbd/drbd_state.h b/drivers/block/drbd/drbd_state.h new file mode 100644 index 000000000000..3ec26e2c4c40 --- /dev/null +++ b/drivers/block/drbd/drbd_state.h | |||
@@ -0,0 +1,101 @@ | |||
1 | #ifndef DRBD_STATE_H | ||
2 | #define DRBD_STATE_H | ||
3 | |||
4 | struct drbd_conf; | ||
5 | |||
6 | /** | ||
7 | * DOC: DRBD State macros | ||
8 | * | ||
9 | * These macros are used to express state changes in easily readable form. | ||
10 | * | ||
11 | * The NS macros expand to a mask and a value, that can be bit ored onto the | ||
12 | * current state as soon as the spinlock (req_lock) was taken. | ||
13 | * | ||
14 | * The _NS macros are used for state functions that get called with the | ||
15 | * spinlock. These macros expand directly to the new state value. | ||
16 | * | ||
17 | * Besides the basic forms NS() and _NS() additional _?NS[23] are defined | ||
18 | * to express state changes that affect more than one aspect of the state. | ||
19 | * | ||
20 | * E.g. NS2(conn, C_CONNECTED, peer, R_SECONDARY) | ||
21 | * Means that the network connection was established and that the peer | ||
22 | * is in secondary role. | ||
23 | */ | ||
24 | #define role_MASK R_MASK | ||
25 | #define peer_MASK R_MASK | ||
26 | #define disk_MASK D_MASK | ||
27 | #define pdsk_MASK D_MASK | ||
28 | #define conn_MASK C_MASK | ||
29 | #define susp_MASK 1 | ||
30 | #define user_isp_MASK 1 | ||
31 | #define aftr_isp_MASK 1 | ||
32 | #define susp_nod_MASK 1 | ||
33 | #define susp_fen_MASK 1 | ||
34 | |||
35 | #define NS(T, S) \ | ||
36 | ({ union drbd_state mask; mask.i = 0; mask.T = T##_MASK; mask; }), \ | ||
37 | ({ union drbd_state val; val.i = 0; val.T = (S); val; }) | ||
38 | #define NS2(T1, S1, T2, S2) \ | ||
39 | ({ union drbd_state mask; mask.i = 0; mask.T1 = T1##_MASK; \ | ||
40 | mask.T2 = T2##_MASK; mask; }), \ | ||
41 | ({ union drbd_state val; val.i = 0; val.T1 = (S1); \ | ||
42 | val.T2 = (S2); val; }) | ||
43 | #define NS3(T1, S1, T2, S2, T3, S3) \ | ||
44 | ({ union drbd_state mask; mask.i = 0; mask.T1 = T1##_MASK; \ | ||
45 | mask.T2 = T2##_MASK; mask.T3 = T3##_MASK; mask; }), \ | ||
46 | ({ union drbd_state val; val.i = 0; val.T1 = (S1); \ | ||
47 | val.T2 = (S2); val.T3 = (S3); val; }) | ||
48 | |||
49 | #define _NS(D, T, S) \ | ||
50 | D, ({ union drbd_state __ns; __ns.i = D->state.i; __ns.T = (S); __ns; }) | ||
51 | #define _NS2(D, T1, S1, T2, S2) \ | ||
52 | D, ({ union drbd_state __ns; __ns.i = D->state.i; __ns.T1 = (S1); \ | ||
53 | __ns.T2 = (S2); __ns; }) | ||
54 | #define _NS3(D, T1, S1, T2, S2, T3, S3) \ | ||
55 | D, ({ union drbd_state __ns; __ns.i = D->state.i; __ns.T1 = (S1); \ | ||
56 | __ns.T2 = (S2); __ns.T3 = (S3); __ns; }) | ||
57 | |||
58 | enum chg_state_flags { | ||
59 | CS_HARD = 1, | ||
60 | CS_VERBOSE = 2, | ||
61 | CS_WAIT_COMPLETE = 4, | ||
62 | CS_SERIALIZE = 8, | ||
63 | CS_ORDERED = CS_WAIT_COMPLETE + CS_SERIALIZE, | ||
64 | }; | ||
65 | |||
66 | extern enum drbd_state_rv drbd_change_state(struct drbd_conf *mdev, | ||
67 | enum chg_state_flags f, | ||
68 | union drbd_state mask, | ||
69 | union drbd_state val); | ||
70 | extern void drbd_force_state(struct drbd_conf *, union drbd_state, | ||
71 | union drbd_state); | ||
72 | extern enum drbd_state_rv _drbd_request_state(struct drbd_conf *, | ||
73 | union drbd_state, | ||
74 | union drbd_state, | ||
75 | enum chg_state_flags); | ||
76 | extern enum drbd_state_rv __drbd_set_state(struct drbd_conf *, union drbd_state, | ||
77 | enum chg_state_flags, | ||
78 | struct completion *done); | ||
79 | extern void print_st_err(struct drbd_conf *, union drbd_state, | ||
80 | union drbd_state, int); | ||
81 | |||
82 | extern void drbd_resume_al(struct drbd_conf *mdev); | ||
83 | |||
84 | /** | ||
85 | * drbd_request_state() - Reqest a state change | ||
86 | * @mdev: DRBD device. | ||
87 | * @mask: mask of state bits to change. | ||
88 | * @val: value of new state bits. | ||
89 | * | ||
90 | * This is the most graceful way of requesting a state change. It is verbose | ||
91 | * quite verbose in case the state change is not possible, and all those | ||
92 | * state changes are globally serialized. | ||
93 | */ | ||
94 | static inline int drbd_request_state(struct drbd_conf *mdev, | ||
95 | union drbd_state mask, | ||
96 | union drbd_state val) | ||
97 | { | ||
98 | return _drbd_request_state(mdev, mask, val, CS_VERBOSE + CS_ORDERED); | ||
99 | } | ||
100 | |||
101 | #endif | ||