aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/block/drbd
diff options
context:
space:
mode:
authorPhilipp Reisner <philipp.reisner@linbit.com>2011-01-27 08:07:51 -0500
committerPhilipp Reisner <philipp.reisner@linbit.com>2011-09-28 04:26:43 -0400
commitb8907339534b8d17f6aad9e9cc98d490aa0c6137 (patch)
tree13f8142eccb0e17447d96996a21ee8c2ec1f8704 /drivers/block/drbd
parentdb830c464b69e26ea4d371e38bb2320c99c82f41 (diff)
drbd: Moved the state functions into its own source file
Signed-off-by: Philipp Reisner <philipp.reisner@linbit.com> Signed-off-by: Lars Ellenberg <lars.ellenberg@linbit.com>
Diffstat (limited to 'drivers/block/drbd')
-rw-r--r--drivers/block/drbd/Makefile2
-rw-r--r--drivers/block/drbd/drbd_int.h46
-rw-r--r--drivers/block/drbd/drbd_main.c1179
-rw-r--r--drivers/block/drbd/drbd_state.c1217
-rw-r--r--drivers/block/drbd/drbd_state.h101
5 files changed, 1326 insertions, 1219 deletions
diff --git a/drivers/block/drbd/Makefile b/drivers/block/drbd/Makefile
index cacbb04f285d..06fb4453734c 100644
--- a/drivers/block/drbd/Makefile
+++ b/drivers/block/drbd/Makefile
@@ -1,6 +1,6 @@
1drbd-y := drbd_bitmap.o drbd_proc.o 1drbd-y := drbd_bitmap.o drbd_proc.o
2drbd-y += drbd_worker.o drbd_receiver.o drbd_req.o drbd_actlog.o 2drbd-y += drbd_worker.o drbd_receiver.o drbd_req.o drbd_actlog.o
3drbd-y += drbd_main.o drbd_strings.o drbd_nl.o 3drbd-y += drbd_main.o drbd_strings.o drbd_nl.o
4drbd-y += drbd_interval.o 4drbd-y += drbd_interval.o drbd_state.o
5 5
6obj-$(CONFIG_BLK_DEV_DRBD) += drbd.o 6obj-$(CONFIG_BLK_DEV_DRBD) += drbd.o
diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h
index 302ccc6d9432..98addab2c928 100644
--- a/drivers/block/drbd/drbd_int.h
+++ b/drivers/block/drbd/drbd_int.h
@@ -43,6 +43,8 @@
43#include <net/tcp.h> 43#include <net/tcp.h>
44#include <linux/lru_cache.h> 44#include <linux/lru_cache.h>
45#include <linux/prefetch.h> 45#include <linux/prefetch.h>
46#include <linux/drbd.h>
47#include "drbd_state.h"
46 48
47#ifdef __CHECKER__ 49#ifdef __CHECKER__
48# define __protected_by(x) __attribute__((require_context(x,1,999,"rdwr"))) 50# define __protected_by(x) __attribute__((require_context(x,1,999,"rdwr")))
@@ -1120,35 +1122,12 @@ static inline void drbd_put_data_sock(struct drbd_conf *mdev)
1120 1122
1121/* drbd_main.c */ 1123/* drbd_main.c */
1122 1124
1123enum chg_state_flags {
1124 CS_HARD = 1,
1125 CS_VERBOSE = 2,
1126 CS_WAIT_COMPLETE = 4,
1127 CS_SERIALIZE = 8,
1128 CS_ORDERED = CS_WAIT_COMPLETE + CS_SERIALIZE,
1129};
1130
1131enum dds_flags { 1125enum dds_flags {
1132 DDSF_FORCED = 1, 1126 DDSF_FORCED = 1,
1133 DDSF_NO_RESYNC = 2, /* Do not run a resync for the new space */ 1127 DDSF_NO_RESYNC = 2, /* Do not run a resync for the new space */
1134}; 1128};
1135 1129
1136extern void drbd_init_set_defaults(struct drbd_conf *mdev); 1130extern void drbd_init_set_defaults(struct drbd_conf *mdev);
1137extern enum drbd_state_rv drbd_change_state(struct drbd_conf *mdev,
1138 enum chg_state_flags f,
1139 union drbd_state mask,
1140 union drbd_state val);
1141extern void drbd_force_state(struct drbd_conf *, union drbd_state,
1142 union drbd_state);
1143extern enum drbd_state_rv _drbd_request_state(struct drbd_conf *,
1144 union drbd_state,
1145 union drbd_state,
1146 enum chg_state_flags);
1147extern enum drbd_state_rv __drbd_set_state(struct drbd_conf *, union drbd_state,
1148 enum chg_state_flags,
1149 struct completion *done);
1150extern void print_st_err(struct drbd_conf *, union drbd_state,
1151 union drbd_state, int);
1152extern int drbd_thread_start(struct drbd_thread *thi); 1131extern int drbd_thread_start(struct drbd_thread *thi);
1153extern void _drbd_thread_stop(struct drbd_thread *thi, int restart, int wait); 1132extern void _drbd_thread_stop(struct drbd_thread *thi, int restart, int wait);
1154#ifdef CONFIG_SMP 1133#ifdef CONFIG_SMP
@@ -1712,6 +1691,10 @@ static inline int drbd_ee_has_active_page(struct drbd_peer_request *peer_req)
1712} 1691}
1713 1692
1714 1693
1694
1695
1696
1697
1715static inline void drbd_state_lock(struct drbd_conf *mdev) 1698static inline void drbd_state_lock(struct drbd_conf *mdev)
1716{ 1699{
1717 wait_event(mdev->misc_wait, 1700 wait_event(mdev->misc_wait,
@@ -1737,23 +1720,6 @@ _drbd_set_state(struct drbd_conf *mdev, union drbd_state ns,
1737 return rv; 1720 return rv;
1738} 1721}
1739 1722
1740/**
1741 * drbd_request_state() - Reqest a state change
1742 * @mdev: DRBD device.
1743 * @mask: mask of state bits to change.
1744 * @val: value of new state bits.
1745 *
1746 * This is the most graceful way of requesting a state change. It is verbose
1747 * quite verbose in case the state change is not possible, and all those
1748 * state changes are globally serialized.
1749 */
1750static inline int drbd_request_state(struct drbd_conf *mdev,
1751 union drbd_state mask,
1752 union drbd_state val)
1753{
1754 return _drbd_request_state(mdev, mask, val, CS_VERBOSE + CS_ORDERED);
1755}
1756
1757#define __drbd_chk_io_error(m,f) __drbd_chk_io_error_(m,f, __func__) 1723#define __drbd_chk_io_error(m,f) __drbd_chk_io_error_(m,f, __func__)
1758static inline void __drbd_chk_io_error_(struct drbd_conf *mdev, int forcedetach, const char *where) 1724static inline void __drbd_chk_io_error_(struct drbd_conf *mdev, int forcedetach, const char *where)
1759{ 1725{
diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c
index 7728d1613406..4b39b3d0dd55 100644
--- a/drivers/block/drbd/drbd_main.c
+++ b/drivers/block/drbd/drbd_main.c
@@ -56,14 +56,6 @@
56 56
57#include "drbd_vli.h" 57#include "drbd_vli.h"
58 58
59struct after_state_chg_work {
60 struct drbd_work w;
61 union drbd_state os;
62 union drbd_state ns;
63 enum chg_state_flags flags;
64 struct completion *done;
65};
66
67static DEFINE_MUTEX(drbd_main_mutex); 59static DEFINE_MUTEX(drbd_main_mutex);
68int drbdd_init(struct drbd_thread *); 60int drbdd_init(struct drbd_thread *);
69int drbd_worker(struct drbd_thread *); 61int drbd_worker(struct drbd_thread *);
@@ -72,9 +64,6 @@ int drbd_asender(struct drbd_thread *);
72int drbd_init(void); 64int drbd_init(void);
73static int drbd_open(struct block_device *bdev, fmode_t mode); 65static int drbd_open(struct block_device *bdev, fmode_t mode);
74static int drbd_release(struct gendisk *gd, fmode_t mode); 66static int drbd_release(struct gendisk *gd, fmode_t mode);
75static int w_after_state_ch(struct drbd_conf *mdev, struct drbd_work *w, int unused);
76static void after_state_ch(struct drbd_conf *mdev, union drbd_state os,
77 union drbd_state ns, enum chg_state_flags flags);
78static int w_md_sync(struct drbd_conf *mdev, struct drbd_work *w, int unused); 67static int w_md_sync(struct drbd_conf *mdev, struct drbd_work *w, int unused);
79static void md_sync_timer_fn(unsigned long data); 68static void md_sync_timer_fn(unsigned long data);
80static int w_bitmap_io(struct drbd_conf *mdev, struct drbd_work *w, int unused); 69static int w_bitmap_io(struct drbd_conf *mdev, struct drbd_work *w, int unused);
@@ -340,7 +329,7 @@ bail:
340 * @what might be one of CONNECTION_LOST_WHILE_PENDING, RESEND, FAIL_FROZEN_DISK_IO, 329 * @what might be one of CONNECTION_LOST_WHILE_PENDING, RESEND, FAIL_FROZEN_DISK_IO,
341 * RESTART_FROZEN_DISK_IO. 330 * RESTART_FROZEN_DISK_IO.
342 */ 331 */
343static void _tl_restart(struct drbd_conf *mdev, enum drbd_req_event what) 332void _tl_restart(struct drbd_conf *mdev, enum drbd_req_event what)
344{ 333{
345 struct drbd_tl_epoch *b, *tmp, **pn; 334 struct drbd_tl_epoch *b, *tmp, **pn;
346 struct list_head *le, *tle, carry_reads; 335 struct list_head *le, *tle, carry_reads;
@@ -450,1172 +439,6 @@ void tl_restart(struct drbd_conf *mdev, enum drbd_req_event what)
450 spin_unlock_irq(&mdev->tconn->req_lock); 439 spin_unlock_irq(&mdev->tconn->req_lock);
451} 440}
452 441
453/**
454 * cl_wide_st_chg() - true if the state change is a cluster wide one
455 * @mdev: DRBD device.
456 * @os: old (current) state.
457 * @ns: new (wanted) state.
458 */
459static int cl_wide_st_chg(struct drbd_conf *mdev,
460 union drbd_state os, union drbd_state ns)
461{
462 return (os.conn >= C_CONNECTED && ns.conn >= C_CONNECTED &&
463 ((os.role != R_PRIMARY && ns.role == R_PRIMARY) ||
464 (os.conn != C_STARTING_SYNC_T && ns.conn == C_STARTING_SYNC_T) ||
465 (os.conn != C_STARTING_SYNC_S && ns.conn == C_STARTING_SYNC_S) ||
466 (os.disk != D_DISKLESS && ns.disk == D_DISKLESS))) ||
467 (os.conn >= C_CONNECTED && ns.conn == C_DISCONNECTING) ||
468 (os.conn == C_CONNECTED && ns.conn == C_VERIFY_S);
469}
470
471enum drbd_state_rv
472drbd_change_state(struct drbd_conf *mdev, enum chg_state_flags f,
473 union drbd_state mask, union drbd_state val)
474{
475 unsigned long flags;
476 union drbd_state os, ns;
477 enum drbd_state_rv rv;
478
479 spin_lock_irqsave(&mdev->tconn->req_lock, flags);
480 os = mdev->state;
481 ns.i = (os.i & ~mask.i) | val.i;
482 rv = _drbd_set_state(mdev, ns, f, NULL);
483 ns = mdev->state;
484 spin_unlock_irqrestore(&mdev->tconn->req_lock, flags);
485
486 return rv;
487}
488
489/**
490 * drbd_force_state() - Impose a change which happens outside our control on our state
491 * @mdev: DRBD device.
492 * @mask: mask of state bits to change.
493 * @val: value of new state bits.
494 */
495void drbd_force_state(struct drbd_conf *mdev,
496 union drbd_state mask, union drbd_state val)
497{
498 drbd_change_state(mdev, CS_HARD, mask, val);
499}
500
501static enum drbd_state_rv is_valid_state(struct drbd_conf *, union drbd_state);
502static enum drbd_state_rv is_valid_state_transition(struct drbd_conf *,
503 union drbd_state,
504 union drbd_state);
505static union drbd_state sanitize_state(struct drbd_conf *mdev, union drbd_state os,
506 union drbd_state ns, const char **warn_sync_abort);
507int drbd_send_state_req(struct drbd_conf *,
508 union drbd_state, union drbd_state);
509
510static enum drbd_state_rv
511_req_st_cond(struct drbd_conf *mdev, union drbd_state mask,
512 union drbd_state val)
513{
514 union drbd_state os, ns;
515 unsigned long flags;
516 enum drbd_state_rv rv;
517
518 if (test_and_clear_bit(CL_ST_CHG_SUCCESS, &mdev->flags))
519 return SS_CW_SUCCESS;
520
521 if (test_and_clear_bit(CL_ST_CHG_FAIL, &mdev->flags))
522 return SS_CW_FAILED_BY_PEER;
523
524 rv = 0;
525 spin_lock_irqsave(&mdev->tconn->req_lock, flags);
526 os = mdev->state;
527 ns.i = (os.i & ~mask.i) | val.i;
528 ns = sanitize_state(mdev, os, ns, NULL);
529
530 if (!cl_wide_st_chg(mdev, os, ns))
531 rv = SS_CW_NO_NEED;
532 if (!rv) {
533 rv = is_valid_state(mdev, ns);
534 if (rv == SS_SUCCESS) {
535 rv = is_valid_state_transition(mdev, ns, os);
536 if (rv == SS_SUCCESS)
537 rv = SS_UNKNOWN_ERROR; /* cont waiting, otherwise fail. */
538 }
539 }
540 spin_unlock_irqrestore(&mdev->tconn->req_lock, flags);
541
542 return rv;
543}
544
545/**
546 * drbd_req_state() - Perform an eventually cluster wide state change
547 * @mdev: DRBD device.
548 * @mask: mask of state bits to change.
549 * @val: value of new state bits.
550 * @f: flags
551 *
552 * Should not be called directly, use drbd_request_state() or
553 * _drbd_request_state().
554 */
555static enum drbd_state_rv
556drbd_req_state(struct drbd_conf *mdev, union drbd_state mask,
557 union drbd_state val, enum chg_state_flags f)
558{
559 struct completion done;
560 unsigned long flags;
561 union drbd_state os, ns;
562 enum drbd_state_rv rv;
563
564 init_completion(&done);
565
566 if (f & CS_SERIALIZE)
567 mutex_lock(&mdev->state_mutex);
568
569 spin_lock_irqsave(&mdev->tconn->req_lock, flags);
570 os = mdev->state;
571 ns.i = (os.i & ~mask.i) | val.i;
572 ns = sanitize_state(mdev, os, ns, NULL);
573
574 if (cl_wide_st_chg(mdev, os, ns)) {
575 rv = is_valid_state(mdev, ns);
576 if (rv == SS_SUCCESS)
577 rv = is_valid_state_transition(mdev, ns, os);
578 spin_unlock_irqrestore(&mdev->tconn->req_lock, flags);
579
580 if (rv < SS_SUCCESS) {
581 if (f & CS_VERBOSE)
582 print_st_err(mdev, os, ns, rv);
583 goto abort;
584 }
585
586 drbd_state_lock(mdev);
587 if (!drbd_send_state_req(mdev, mask, val)) {
588 drbd_state_unlock(mdev);
589 rv = SS_CW_FAILED_BY_PEER;
590 if (f & CS_VERBOSE)
591 print_st_err(mdev, os, ns, rv);
592 goto abort;
593 }
594
595 wait_event(mdev->state_wait,
596 (rv = _req_st_cond(mdev, mask, val)));
597
598 if (rv < SS_SUCCESS) {
599 drbd_state_unlock(mdev);
600 if (f & CS_VERBOSE)
601 print_st_err(mdev, os, ns, rv);
602 goto abort;
603 }
604 spin_lock_irqsave(&mdev->tconn->req_lock, flags);
605 os = mdev->state;
606 ns.i = (os.i & ~mask.i) | val.i;
607 rv = _drbd_set_state(mdev, ns, f, &done);
608 drbd_state_unlock(mdev);
609 } else {
610 rv = _drbd_set_state(mdev, ns, f, &done);
611 }
612
613 spin_unlock_irqrestore(&mdev->tconn->req_lock, flags);
614
615 if (f & CS_WAIT_COMPLETE && rv == SS_SUCCESS) {
616 D_ASSERT(current != mdev->tconn->worker.task);
617 wait_for_completion(&done);
618 }
619
620abort:
621 if (f & CS_SERIALIZE)
622 mutex_unlock(&mdev->state_mutex);
623
624 return rv;
625}
626
627/**
628 * _drbd_request_state() - Request a state change (with flags)
629 * @mdev: DRBD device.
630 * @mask: mask of state bits to change.
631 * @val: value of new state bits.
632 * @f: flags
633 *
634 * Cousin of drbd_request_state(), useful with the CS_WAIT_COMPLETE
635 * flag, or when logging of failed state change requests is not desired.
636 */
637enum drbd_state_rv
638_drbd_request_state(struct drbd_conf *mdev, union drbd_state mask,
639 union drbd_state val, enum chg_state_flags f)
640{
641 enum drbd_state_rv rv;
642
643 wait_event(mdev->state_wait,
644 (rv = drbd_req_state(mdev, mask, val, f)) != SS_IN_TRANSIENT_STATE);
645
646 return rv;
647}
648
649static void print_st(struct drbd_conf *mdev, char *name, union drbd_state ns)
650{
651 dev_err(DEV, " %s = { cs:%s ro:%s/%s ds:%s/%s %c%c%c%c }\n",
652 name,
653 drbd_conn_str(ns.conn),
654 drbd_role_str(ns.role),
655 drbd_role_str(ns.peer),
656 drbd_disk_str(ns.disk),
657 drbd_disk_str(ns.pdsk),
658 is_susp(ns) ? 's' : 'r',
659 ns.aftr_isp ? 'a' : '-',
660 ns.peer_isp ? 'p' : '-',
661 ns.user_isp ? 'u' : '-'
662 );
663}
664
665void print_st_err(struct drbd_conf *mdev, union drbd_state os,
666 union drbd_state ns, enum drbd_state_rv err)
667{
668 if (err == SS_IN_TRANSIENT_STATE)
669 return;
670 dev_err(DEV, "State change failed: %s\n", drbd_set_st_err_str(err));
671 print_st(mdev, " state", os);
672 print_st(mdev, "wanted", ns);
673}
674
675
676/**
677 * is_valid_state() - Returns an SS_ error code if ns is not valid
678 * @mdev: DRBD device.
679 * @ns: State to consider.
680 */
681static enum drbd_state_rv
682is_valid_state(struct drbd_conf *mdev, union drbd_state ns)
683{
684 /* See drbd_state_sw_errors in drbd_strings.c */
685
686 enum drbd_fencing_p fp;
687 enum drbd_state_rv rv = SS_SUCCESS;
688
689 fp = FP_DONT_CARE;
690 if (get_ldev(mdev)) {
691 fp = mdev->ldev->dc.fencing;
692 put_ldev(mdev);
693 }
694
695 if (get_net_conf(mdev->tconn)) {
696 if (!mdev->tconn->net_conf->two_primaries &&
697 ns.role == R_PRIMARY && ns.peer == R_PRIMARY)
698 rv = SS_TWO_PRIMARIES;
699 put_net_conf(mdev->tconn);
700 }
701
702 if (rv <= 0)
703 /* already found a reason to abort */;
704 else if (ns.role == R_SECONDARY && mdev->open_cnt)
705 rv = SS_DEVICE_IN_USE;
706
707 else if (ns.role == R_PRIMARY && ns.conn < C_CONNECTED && ns.disk < D_UP_TO_DATE)
708 rv = SS_NO_UP_TO_DATE_DISK;
709
710 else if (fp >= FP_RESOURCE &&
711 ns.role == R_PRIMARY && ns.conn < C_CONNECTED && ns.pdsk >= D_UNKNOWN)
712 rv = SS_PRIMARY_NOP;
713
714 else if (ns.role == R_PRIMARY && ns.disk <= D_INCONSISTENT && ns.pdsk <= D_INCONSISTENT)
715 rv = SS_NO_UP_TO_DATE_DISK;
716
717 else if (ns.conn > C_CONNECTED && ns.disk < D_INCONSISTENT)
718 rv = SS_NO_LOCAL_DISK;
719
720 else if (ns.conn > C_CONNECTED && ns.pdsk < D_INCONSISTENT)
721 rv = SS_NO_REMOTE_DISK;
722
723 else if (ns.conn > C_CONNECTED && ns.disk < D_UP_TO_DATE && ns.pdsk < D_UP_TO_DATE)
724 rv = SS_NO_UP_TO_DATE_DISK;
725
726 else if ((ns.conn == C_CONNECTED ||
727 ns.conn == C_WF_BITMAP_S ||
728 ns.conn == C_SYNC_SOURCE ||
729 ns.conn == C_PAUSED_SYNC_S) &&
730 ns.disk == D_OUTDATED)
731 rv = SS_CONNECTED_OUTDATES;
732
733 else if ((ns.conn == C_VERIFY_S || ns.conn == C_VERIFY_T) &&
734 (mdev->sync_conf.verify_alg[0] == 0))
735 rv = SS_NO_VERIFY_ALG;
736
737 else if ((ns.conn == C_VERIFY_S || ns.conn == C_VERIFY_T) &&
738 mdev->tconn->agreed_pro_version < 88)
739 rv = SS_NOT_SUPPORTED;
740
741 else if (ns.conn >= C_CONNECTED && ns.pdsk == D_UNKNOWN)
742 rv = SS_CONNECTED_OUTDATES;
743
744 return rv;
745}
746
747/**
748 * is_valid_state_transition() - Returns an SS_ error code if the state transition is not possible
749 * @mdev: DRBD device.
750 * @ns: new state.
751 * @os: old state.
752 */
753static enum drbd_state_rv
754is_valid_state_transition(struct drbd_conf *mdev, union drbd_state ns,
755 union drbd_state os)
756{
757 enum drbd_state_rv rv = SS_SUCCESS;
758
759 if ((ns.conn == C_STARTING_SYNC_T || ns.conn == C_STARTING_SYNC_S) &&
760 os.conn > C_CONNECTED)
761 rv = SS_RESYNC_RUNNING;
762
763 if (ns.conn == C_DISCONNECTING && os.conn == C_STANDALONE)
764 rv = SS_ALREADY_STANDALONE;
765
766 if (ns.disk > D_ATTACHING && os.disk == D_DISKLESS)
767 rv = SS_IS_DISKLESS;
768
769 if (ns.conn == C_WF_CONNECTION && os.conn < C_UNCONNECTED)
770 rv = SS_NO_NET_CONFIG;
771
772 if (ns.disk == D_OUTDATED && os.disk < D_OUTDATED && os.disk != D_ATTACHING)
773 rv = SS_LOWER_THAN_OUTDATED;
774
775 if (ns.conn == C_DISCONNECTING && os.conn == C_UNCONNECTED)
776 rv = SS_IN_TRANSIENT_STATE;
777
778 if (ns.conn == os.conn && ns.conn == C_WF_REPORT_PARAMS)
779 rv = SS_IN_TRANSIENT_STATE;
780
781 if ((ns.conn == C_VERIFY_S || ns.conn == C_VERIFY_T) && os.conn < C_CONNECTED)
782 rv = SS_NEED_CONNECTION;
783
784 if ((ns.conn == C_VERIFY_S || ns.conn == C_VERIFY_T) &&
785 ns.conn != os.conn && os.conn > C_CONNECTED)
786 rv = SS_RESYNC_RUNNING;
787
788 if ((ns.conn == C_STARTING_SYNC_S || ns.conn == C_STARTING_SYNC_T) &&
789 os.conn < C_CONNECTED)
790 rv = SS_NEED_CONNECTION;
791
792 if ((ns.conn == C_SYNC_TARGET || ns.conn == C_SYNC_SOURCE)
793 && os.conn < C_WF_REPORT_PARAMS)
794 rv = SS_NEED_CONNECTION; /* No NetworkFailure -> SyncTarget etc... */
795
796 return rv;
797}
798
799/**
800 * sanitize_state() - Resolves implicitly necessary additional changes to a state transition
801 * @mdev: DRBD device.
802 * @os: old state.
803 * @ns: new state.
804 * @warn_sync_abort:
805 *
806 * When we loose connection, we have to set the state of the peers disk (pdsk)
807 * to D_UNKNOWN. This rule and many more along those lines are in this function.
808 */
809static union drbd_state sanitize_state(struct drbd_conf *mdev, union drbd_state os,
810 union drbd_state ns, const char **warn_sync_abort)
811{
812 enum drbd_fencing_p fp;
813 enum drbd_disk_state disk_min, disk_max, pdsk_min, pdsk_max;
814
815 fp = FP_DONT_CARE;
816 if (get_ldev(mdev)) {
817 fp = mdev->ldev->dc.fencing;
818 put_ldev(mdev);
819 }
820
821 /* Disallow Network errors to configure a device's network part */
822 if ((ns.conn >= C_TIMEOUT && ns.conn <= C_TEAR_DOWN) &&
823 os.conn <= C_DISCONNECTING)
824 ns.conn = os.conn;
825
826 /* After a network error (+C_TEAR_DOWN) only C_UNCONNECTED or C_DISCONNECTING can follow.
827 * If you try to go into some Sync* state, that shall fail (elsewhere). */
828 if (os.conn >= C_TIMEOUT && os.conn <= C_TEAR_DOWN &&
829 ns.conn != C_UNCONNECTED && ns.conn != C_DISCONNECTING && ns.conn <= C_TEAR_DOWN)
830 ns.conn = os.conn;
831
832 /* we cannot fail (again) if we already detached */
833 if (ns.disk == D_FAILED && os.disk == D_DISKLESS)
834 ns.disk = D_DISKLESS;
835
836 /* if we are only D_ATTACHING yet,
837 * we can (and should) go directly to D_DISKLESS. */
838 if (ns.disk == D_FAILED && os.disk == D_ATTACHING)
839 ns.disk = D_DISKLESS;
840
841 /* After C_DISCONNECTING only C_STANDALONE may follow */
842 if (os.conn == C_DISCONNECTING && ns.conn != C_STANDALONE)
843 ns.conn = os.conn;
844
845 if (ns.conn < C_CONNECTED) {
846 ns.peer_isp = 0;
847 ns.peer = R_UNKNOWN;
848 if (ns.pdsk > D_UNKNOWN || ns.pdsk < D_INCONSISTENT)
849 ns.pdsk = D_UNKNOWN;
850 }
851
852 /* Clear the aftr_isp when becoming unconfigured */
853 if (ns.conn == C_STANDALONE && ns.disk == D_DISKLESS && ns.role == R_SECONDARY)
854 ns.aftr_isp = 0;
855
856 /* Abort resync if a disk fails/detaches */
857 if (os.conn > C_CONNECTED && ns.conn > C_CONNECTED &&
858 (ns.disk <= D_FAILED || ns.pdsk <= D_FAILED)) {
859 if (warn_sync_abort)
860 *warn_sync_abort =
861 os.conn == C_VERIFY_S || os.conn == C_VERIFY_T ?
862 "Online-verify" : "Resync";
863 ns.conn = C_CONNECTED;
864 }
865
866 /* Connection breaks down before we finished "Negotiating" */
867 if (ns.conn < C_CONNECTED && ns.disk == D_NEGOTIATING &&
868 get_ldev_if_state(mdev, D_NEGOTIATING)) {
869 if (mdev->ed_uuid == mdev->ldev->md.uuid[UI_CURRENT]) {
870 ns.disk = mdev->new_state_tmp.disk;
871 ns.pdsk = mdev->new_state_tmp.pdsk;
872 } else {
873 dev_alert(DEV, "Connection lost while negotiating, no data!\n");
874 ns.disk = D_DISKLESS;
875 ns.pdsk = D_UNKNOWN;
876 }
877 put_ldev(mdev);
878 }
879
880 /* D_CONSISTENT and D_OUTDATED vanish when we get connected */
881 if (ns.conn >= C_CONNECTED && ns.conn < C_AHEAD) {
882 if (ns.disk == D_CONSISTENT || ns.disk == D_OUTDATED)
883 ns.disk = D_UP_TO_DATE;
884 if (ns.pdsk == D_CONSISTENT || ns.pdsk == D_OUTDATED)
885 ns.pdsk = D_UP_TO_DATE;
886 }
887
888 /* Implications of the connection stat on the disk states */
889 disk_min = D_DISKLESS;
890 disk_max = D_UP_TO_DATE;
891 pdsk_min = D_INCONSISTENT;
892 pdsk_max = D_UNKNOWN;
893 switch ((enum drbd_conns)ns.conn) {
894 case C_WF_BITMAP_T:
895 case C_PAUSED_SYNC_T:
896 case C_STARTING_SYNC_T:
897 case C_WF_SYNC_UUID:
898 case C_BEHIND:
899 disk_min = D_INCONSISTENT;
900 disk_max = D_OUTDATED;
901 pdsk_min = D_UP_TO_DATE;
902 pdsk_max = D_UP_TO_DATE;
903 break;
904 case C_VERIFY_S:
905 case C_VERIFY_T:
906 disk_min = D_UP_TO_DATE;
907 disk_max = D_UP_TO_DATE;
908 pdsk_min = D_UP_TO_DATE;
909 pdsk_max = D_UP_TO_DATE;
910 break;
911 case C_CONNECTED:
912 disk_min = D_DISKLESS;
913 disk_max = D_UP_TO_DATE;
914 pdsk_min = D_DISKLESS;
915 pdsk_max = D_UP_TO_DATE;
916 break;
917 case C_WF_BITMAP_S:
918 case C_PAUSED_SYNC_S:
919 case C_STARTING_SYNC_S:
920 case C_AHEAD:
921 disk_min = D_UP_TO_DATE;
922 disk_max = D_UP_TO_DATE;
923 pdsk_min = D_INCONSISTENT;
924 pdsk_max = D_CONSISTENT; /* D_OUTDATED would be nice. But explicit outdate necessary*/
925 break;
926 case C_SYNC_TARGET:
927 disk_min = D_INCONSISTENT;
928 disk_max = D_INCONSISTENT;
929 pdsk_min = D_UP_TO_DATE;
930 pdsk_max = D_UP_TO_DATE;
931 break;
932 case C_SYNC_SOURCE:
933 disk_min = D_UP_TO_DATE;
934 disk_max = D_UP_TO_DATE;
935 pdsk_min = D_INCONSISTENT;
936 pdsk_max = D_INCONSISTENT;
937 break;
938 case C_STANDALONE:
939 case C_DISCONNECTING:
940 case C_UNCONNECTED:
941 case C_TIMEOUT:
942 case C_BROKEN_PIPE:
943 case C_NETWORK_FAILURE:
944 case C_PROTOCOL_ERROR:
945 case C_TEAR_DOWN:
946 case C_WF_CONNECTION:
947 case C_WF_REPORT_PARAMS:
948 case C_MASK:
949 break;
950 }
951 if (ns.disk > disk_max)
952 ns.disk = disk_max;
953
954 if (ns.disk < disk_min) {
955 dev_warn(DEV, "Implicitly set disk from %s to %s\n",
956 drbd_disk_str(ns.disk), drbd_disk_str(disk_min));
957 ns.disk = disk_min;
958 }
959 if (ns.pdsk > pdsk_max)
960 ns.pdsk = pdsk_max;
961
962 if (ns.pdsk < pdsk_min) {
963 dev_warn(DEV, "Implicitly set pdsk from %s to %s\n",
964 drbd_disk_str(ns.pdsk), drbd_disk_str(pdsk_min));
965 ns.pdsk = pdsk_min;
966 }
967
968 if (fp == FP_STONITH &&
969 (ns.role == R_PRIMARY && ns.conn < C_CONNECTED && ns.pdsk > D_OUTDATED) &&
970 !(os.role == R_PRIMARY && os.conn < C_CONNECTED && os.pdsk > D_OUTDATED))
971 ns.susp_fen = 1; /* Suspend IO while fence-peer handler runs (peer lost) */
972
973 if (mdev->sync_conf.on_no_data == OND_SUSPEND_IO &&
974 (ns.role == R_PRIMARY && ns.disk < D_UP_TO_DATE && ns.pdsk < D_UP_TO_DATE) &&
975 !(os.role == R_PRIMARY && os.disk < D_UP_TO_DATE && os.pdsk < D_UP_TO_DATE))
976 ns.susp_nod = 1; /* Suspend IO while no data available (no accessible data available) */
977
978 if (ns.aftr_isp || ns.peer_isp || ns.user_isp) {
979 if (ns.conn == C_SYNC_SOURCE)
980 ns.conn = C_PAUSED_SYNC_S;
981 if (ns.conn == C_SYNC_TARGET)
982 ns.conn = C_PAUSED_SYNC_T;
983 } else {
984 if (ns.conn == C_PAUSED_SYNC_S)
985 ns.conn = C_SYNC_SOURCE;
986 if (ns.conn == C_PAUSED_SYNC_T)
987 ns.conn = C_SYNC_TARGET;
988 }
989
990 return ns;
991}
992
993/* helper for __drbd_set_state */
994static void set_ov_position(struct drbd_conf *mdev, enum drbd_conns cs)
995{
996 if (mdev->tconn->agreed_pro_version < 90)
997 mdev->ov_start_sector = 0;
998 mdev->rs_total = drbd_bm_bits(mdev);
999 mdev->ov_position = 0;
1000 if (cs == C_VERIFY_T) {
1001 /* starting online verify from an arbitrary position
1002 * does not fit well into the existing protocol.
1003 * on C_VERIFY_T, we initialize ov_left and friends
1004 * implicitly in receive_DataRequest once the
1005 * first P_OV_REQUEST is received */
1006 mdev->ov_start_sector = ~(sector_t)0;
1007 } else {
1008 unsigned long bit = BM_SECT_TO_BIT(mdev->ov_start_sector);
1009 if (bit >= mdev->rs_total) {
1010 mdev->ov_start_sector =
1011 BM_BIT_TO_SECT(mdev->rs_total - 1);
1012 mdev->rs_total = 1;
1013 } else
1014 mdev->rs_total -= bit;
1015 mdev->ov_position = mdev->ov_start_sector;
1016 }
1017 mdev->ov_left = mdev->rs_total;
1018}
1019
1020static void drbd_resume_al(struct drbd_conf *mdev)
1021{
1022 if (test_and_clear_bit(AL_SUSPENDED, &mdev->flags))
1023 dev_info(DEV, "Resumed AL updates\n");
1024}
1025
1026/**
1027 * __drbd_set_state() - Set a new DRBD state
1028 * @mdev: DRBD device.
1029 * @ns: new state.
1030 * @flags: Flags
1031 * @done: Optional completion, that will get completed after the after_state_ch() finished
1032 *
1033 * Caller needs to hold req_lock, and global_state_lock. Do not call directly.
1034 */
1035enum drbd_state_rv
1036__drbd_set_state(struct drbd_conf *mdev, union drbd_state ns,
1037 enum chg_state_flags flags, struct completion *done)
1038{
1039 union drbd_state os;
1040 enum drbd_state_rv rv = SS_SUCCESS;
1041 const char *warn_sync_abort = NULL;
1042 struct after_state_chg_work *ascw;
1043
1044 os = mdev->state;
1045
1046 ns = sanitize_state(mdev, os, ns, &warn_sync_abort);
1047
1048 if (ns.i == os.i)
1049 return SS_NOTHING_TO_DO;
1050
1051 if (!(flags & CS_HARD)) {
1052 /* pre-state-change checks ; only look at ns */
1053 /* See drbd_state_sw_errors in drbd_strings.c */
1054
1055 rv = is_valid_state(mdev, ns);
1056 if (rv < SS_SUCCESS) {
1057 /* If the old state was illegal as well, then let
1058 this happen...*/
1059
1060 if (is_valid_state(mdev, os) == rv)
1061 rv = is_valid_state_transition(mdev, ns, os);
1062 } else
1063 rv = is_valid_state_transition(mdev, ns, os);
1064 }
1065
1066 if (rv < SS_SUCCESS) {
1067 if (flags & CS_VERBOSE)
1068 print_st_err(mdev, os, ns, rv);
1069 return rv;
1070 }
1071
1072 if (warn_sync_abort)
1073 dev_warn(DEV, "%s aborted.\n", warn_sync_abort);
1074
1075 {
1076 char *pbp, pb[300];
1077 pbp = pb;
1078 *pbp = 0;
1079 if (ns.role != os.role)
1080 pbp += sprintf(pbp, "role( %s -> %s ) ",
1081 drbd_role_str(os.role),
1082 drbd_role_str(ns.role));
1083 if (ns.peer != os.peer)
1084 pbp += sprintf(pbp, "peer( %s -> %s ) ",
1085 drbd_role_str(os.peer),
1086 drbd_role_str(ns.peer));
1087 if (ns.conn != os.conn)
1088 pbp += sprintf(pbp, "conn( %s -> %s ) ",
1089 drbd_conn_str(os.conn),
1090 drbd_conn_str(ns.conn));
1091 if (ns.disk != os.disk)
1092 pbp += sprintf(pbp, "disk( %s -> %s ) ",
1093 drbd_disk_str(os.disk),
1094 drbd_disk_str(ns.disk));
1095 if (ns.pdsk != os.pdsk)
1096 pbp += sprintf(pbp, "pdsk( %s -> %s ) ",
1097 drbd_disk_str(os.pdsk),
1098 drbd_disk_str(ns.pdsk));
1099 if (is_susp(ns) != is_susp(os))
1100 pbp += sprintf(pbp, "susp( %d -> %d ) ",
1101 is_susp(os),
1102 is_susp(ns));
1103 if (ns.aftr_isp != os.aftr_isp)
1104 pbp += sprintf(pbp, "aftr_isp( %d -> %d ) ",
1105 os.aftr_isp,
1106 ns.aftr_isp);
1107 if (ns.peer_isp != os.peer_isp)
1108 pbp += sprintf(pbp, "peer_isp( %d -> %d ) ",
1109 os.peer_isp,
1110 ns.peer_isp);
1111 if (ns.user_isp != os.user_isp)
1112 pbp += sprintf(pbp, "user_isp( %d -> %d ) ",
1113 os.user_isp,
1114 ns.user_isp);
1115 dev_info(DEV, "%s\n", pb);
1116 }
1117
1118 /* solve the race between becoming unconfigured,
1119 * worker doing the cleanup, and
1120 * admin reconfiguring us:
1121 * on (re)configure, first set CONFIG_PENDING,
1122 * then wait for a potentially exiting worker,
1123 * start the worker, and schedule one no_op.
1124 * then proceed with configuration.
1125 */
1126 if (ns.disk == D_DISKLESS &&
1127 ns.conn == C_STANDALONE &&
1128 ns.role == R_SECONDARY &&
1129 !test_and_set_bit(CONFIG_PENDING, &mdev->flags))
1130 set_bit(DEVICE_DYING, &mdev->flags);
1131
1132 /* if we are going -> D_FAILED or D_DISKLESS, grab one extra reference
1133 * on the ldev here, to be sure the transition -> D_DISKLESS resp.
1134 * drbd_ldev_destroy() won't happen before our corresponding
1135 * after_state_ch works run, where we put_ldev again. */
1136 if ((os.disk != D_FAILED && ns.disk == D_FAILED) ||
1137 (os.disk != D_DISKLESS && ns.disk == D_DISKLESS))
1138 atomic_inc(&mdev->local_cnt);
1139
1140 mdev->state = ns;
1141
1142 if (os.disk == D_ATTACHING && ns.disk >= D_NEGOTIATING)
1143 drbd_print_uuids(mdev, "attached to UUIDs");
1144
1145 wake_up(&mdev->misc_wait);
1146 wake_up(&mdev->state_wait);
1147
1148 /* aborted verify run. log the last position */
1149 if ((os.conn == C_VERIFY_S || os.conn == C_VERIFY_T) &&
1150 ns.conn < C_CONNECTED) {
1151 mdev->ov_start_sector =
1152 BM_BIT_TO_SECT(drbd_bm_bits(mdev) - mdev->ov_left);
1153 dev_info(DEV, "Online Verify reached sector %llu\n",
1154 (unsigned long long)mdev->ov_start_sector);
1155 }
1156
1157 if ((os.conn == C_PAUSED_SYNC_T || os.conn == C_PAUSED_SYNC_S) &&
1158 (ns.conn == C_SYNC_TARGET || ns.conn == C_SYNC_SOURCE)) {
1159 dev_info(DEV, "Syncer continues.\n");
1160 mdev->rs_paused += (long)jiffies
1161 -(long)mdev->rs_mark_time[mdev->rs_last_mark];
1162 if (ns.conn == C_SYNC_TARGET)
1163 mod_timer(&mdev->resync_timer, jiffies);
1164 }
1165
1166 if ((os.conn == C_SYNC_TARGET || os.conn == C_SYNC_SOURCE) &&
1167 (ns.conn == C_PAUSED_SYNC_T || ns.conn == C_PAUSED_SYNC_S)) {
1168 dev_info(DEV, "Resync suspended\n");
1169 mdev->rs_mark_time[mdev->rs_last_mark] = jiffies;
1170 }
1171
1172 if (os.conn == C_CONNECTED &&
1173 (ns.conn == C_VERIFY_S || ns.conn == C_VERIFY_T)) {
1174 unsigned long now = jiffies;
1175 int i;
1176
1177 set_ov_position(mdev, ns.conn);
1178 mdev->rs_start = now;
1179 mdev->rs_last_events = 0;
1180 mdev->rs_last_sect_ev = 0;
1181 mdev->ov_last_oos_size = 0;
1182 mdev->ov_last_oos_start = 0;
1183
1184 for (i = 0; i < DRBD_SYNC_MARKS; i++) {
1185 mdev->rs_mark_left[i] = mdev->ov_left;
1186 mdev->rs_mark_time[i] = now;
1187 }
1188
1189 drbd_rs_controller_reset(mdev);
1190
1191 if (ns.conn == C_VERIFY_S) {
1192 dev_info(DEV, "Starting Online Verify from sector %llu\n",
1193 (unsigned long long)mdev->ov_position);
1194 mod_timer(&mdev->resync_timer, jiffies);
1195 }
1196 }
1197
1198 if (get_ldev(mdev)) {
1199 u32 mdf = mdev->ldev->md.flags & ~(MDF_CONSISTENT|MDF_PRIMARY_IND|
1200 MDF_CONNECTED_IND|MDF_WAS_UP_TO_DATE|
1201 MDF_PEER_OUT_DATED|MDF_CRASHED_PRIMARY);
1202
1203 if (test_bit(CRASHED_PRIMARY, &mdev->flags))
1204 mdf |= MDF_CRASHED_PRIMARY;
1205 if (mdev->state.role == R_PRIMARY ||
1206 (mdev->state.pdsk < D_INCONSISTENT && mdev->state.peer == R_PRIMARY))
1207 mdf |= MDF_PRIMARY_IND;
1208 if (mdev->state.conn > C_WF_REPORT_PARAMS)
1209 mdf |= MDF_CONNECTED_IND;
1210 if (mdev->state.disk > D_INCONSISTENT)
1211 mdf |= MDF_CONSISTENT;
1212 if (mdev->state.disk > D_OUTDATED)
1213 mdf |= MDF_WAS_UP_TO_DATE;
1214 if (mdev->state.pdsk <= D_OUTDATED && mdev->state.pdsk >= D_INCONSISTENT)
1215 mdf |= MDF_PEER_OUT_DATED;
1216 if (mdf != mdev->ldev->md.flags) {
1217 mdev->ldev->md.flags = mdf;
1218 drbd_md_mark_dirty(mdev);
1219 }
1220 if (os.disk < D_CONSISTENT && ns.disk >= D_CONSISTENT)
1221 drbd_set_ed_uuid(mdev, mdev->ldev->md.uuid[UI_CURRENT]);
1222 put_ldev(mdev);
1223 }
1224
1225 /* Peer was forced D_UP_TO_DATE & R_PRIMARY, consider to resync */
1226 if (os.disk == D_INCONSISTENT && os.pdsk == D_INCONSISTENT &&
1227 os.peer == R_SECONDARY && ns.peer == R_PRIMARY)
1228 set_bit(CONSIDER_RESYNC, &mdev->flags);
1229
1230 /* Receiver should clean up itself */
1231 if (os.conn != C_DISCONNECTING && ns.conn == C_DISCONNECTING)
1232 drbd_thread_stop_nowait(&mdev->tconn->receiver);
1233
1234 /* Now the receiver finished cleaning up itself, it should die */
1235 if (os.conn != C_STANDALONE && ns.conn == C_STANDALONE)
1236 drbd_thread_stop_nowait(&mdev->tconn->receiver);
1237
1238 /* Upon network failure, we need to restart the receiver. */
1239 if (os.conn > C_TEAR_DOWN &&
1240 ns.conn <= C_TEAR_DOWN && ns.conn >= C_TIMEOUT)
1241 drbd_thread_restart_nowait(&mdev->tconn->receiver);
1242
1243 /* Resume AL writing if we get a connection */
1244 if (os.conn < C_CONNECTED && ns.conn >= C_CONNECTED)
1245 drbd_resume_al(mdev);
1246
1247 ascw = kmalloc(sizeof(*ascw), GFP_ATOMIC);
1248 if (ascw) {
1249 ascw->os = os;
1250 ascw->ns = ns;
1251 ascw->flags = flags;
1252 ascw->w.cb = w_after_state_ch;
1253 ascw->done = done;
1254 drbd_queue_work(&mdev->tconn->data.work, &ascw->w);
1255 } else {
1256 dev_warn(DEV, "Could not kmalloc an ascw\n");
1257 }
1258
1259 return rv;
1260}
1261
1262static int w_after_state_ch(struct drbd_conf *mdev, struct drbd_work *w, int unused)
1263{
1264 struct after_state_chg_work *ascw =
1265 container_of(w, struct after_state_chg_work, w);
1266 after_state_ch(mdev, ascw->os, ascw->ns, ascw->flags);
1267 if (ascw->flags & CS_WAIT_COMPLETE) {
1268 D_ASSERT(ascw->done != NULL);
1269 complete(ascw->done);
1270 }
1271 kfree(ascw);
1272
1273 return 1;
1274}
1275
1276static void abw_start_sync(struct drbd_conf *mdev, int rv)
1277{
1278 if (rv) {
1279 dev_err(DEV, "Writing the bitmap failed not starting resync.\n");
1280 _drbd_request_state(mdev, NS(conn, C_CONNECTED), CS_VERBOSE);
1281 return;
1282 }
1283
1284 switch (mdev->state.conn) {
1285 case C_STARTING_SYNC_T:
1286 _drbd_request_state(mdev, NS(conn, C_WF_SYNC_UUID), CS_VERBOSE);
1287 break;
1288 case C_STARTING_SYNC_S:
1289 drbd_start_resync(mdev, C_SYNC_SOURCE);
1290 break;
1291 }
1292}
1293
1294int drbd_bitmap_io_from_worker(struct drbd_conf *mdev,
1295 int (*io_fn)(struct drbd_conf *),
1296 char *why, enum bm_flag flags)
1297{
1298 int rv;
1299
1300 D_ASSERT(current == mdev->tconn->worker.task);
1301
1302 /* open coded non-blocking drbd_suspend_io(mdev); */
1303 set_bit(SUSPEND_IO, &mdev->flags);
1304
1305 drbd_bm_lock(mdev, why, flags);
1306 rv = io_fn(mdev);
1307 drbd_bm_unlock(mdev);
1308
1309 drbd_resume_io(mdev);
1310
1311 return rv;
1312}
1313
1314/**
1315 * after_state_ch() - Perform after state change actions that may sleep
1316 * @mdev: DRBD device.
1317 * @os: old state.
1318 * @ns: new state.
1319 * @flags: Flags
1320 */
1321static void after_state_ch(struct drbd_conf *mdev, union drbd_state os,
1322 union drbd_state ns, enum chg_state_flags flags)
1323{
1324 enum drbd_fencing_p fp;
1325 enum drbd_req_event what = NOTHING;
1326 union drbd_state nsm = (union drbd_state){ .i = -1 };
1327
1328 if (os.conn != C_CONNECTED && ns.conn == C_CONNECTED) {
1329 clear_bit(CRASHED_PRIMARY, &mdev->flags);
1330 if (mdev->p_uuid)
1331 mdev->p_uuid[UI_FLAGS] &= ~((u64)2);
1332 }
1333
1334 fp = FP_DONT_CARE;
1335 if (get_ldev(mdev)) {
1336 fp = mdev->ldev->dc.fencing;
1337 put_ldev(mdev);
1338 }
1339
1340 /* Inform userspace about the change... */
1341 drbd_bcast_state(mdev, ns);
1342
1343 if (!(os.role == R_PRIMARY && os.disk < D_UP_TO_DATE && os.pdsk < D_UP_TO_DATE) &&
1344 (ns.role == R_PRIMARY && ns.disk < D_UP_TO_DATE && ns.pdsk < D_UP_TO_DATE))
1345 drbd_khelper(mdev, "pri-on-incon-degr");
1346
1347 /* Here we have the actions that are performed after a
1348 state change. This function might sleep */
1349
1350 nsm.i = -1;
1351 if (ns.susp_nod) {
1352 if (os.conn < C_CONNECTED && ns.conn >= C_CONNECTED)
1353 what = RESEND;
1354
1355 if (os.disk == D_ATTACHING && ns.disk > D_ATTACHING)
1356 what = RESTART_FROZEN_DISK_IO;
1357
1358 if (what != NOTHING)
1359 nsm.susp_nod = 0;
1360 }
1361
1362 if (ns.susp_fen) {
1363 /* case1: The outdate peer handler is successful: */
1364 if (os.pdsk > D_OUTDATED && ns.pdsk <= D_OUTDATED) {
1365 tl_clear(mdev);
1366 if (test_bit(NEW_CUR_UUID, &mdev->flags)) {
1367 drbd_uuid_new_current(mdev);
1368 clear_bit(NEW_CUR_UUID, &mdev->flags);
1369 }
1370 spin_lock_irq(&mdev->tconn->req_lock);
1371 _drbd_set_state(_NS(mdev, susp_fen, 0), CS_VERBOSE, NULL);
1372 spin_unlock_irq(&mdev->tconn->req_lock);
1373 }
1374 /* case2: The connection was established again: */
1375 if (os.conn < C_CONNECTED && ns.conn >= C_CONNECTED) {
1376 clear_bit(NEW_CUR_UUID, &mdev->flags);
1377 what = RESEND;
1378 nsm.susp_fen = 0;
1379 }
1380 }
1381
1382 if (what != NOTHING) {
1383 spin_lock_irq(&mdev->tconn->req_lock);
1384 _tl_restart(mdev, what);
1385 nsm.i &= mdev->state.i;
1386 _drbd_set_state(mdev, nsm, CS_VERBOSE, NULL);
1387 spin_unlock_irq(&mdev->tconn->req_lock);
1388 }
1389
1390 /* Became sync source. With protocol >= 96, we still need to send out
1391 * the sync uuid now. Need to do that before any drbd_send_state, or
1392 * the other side may go "paused sync" before receiving the sync uuids,
1393 * which is unexpected. */
1394 if ((os.conn != C_SYNC_SOURCE && os.conn != C_PAUSED_SYNC_S) &&
1395 (ns.conn == C_SYNC_SOURCE || ns.conn == C_PAUSED_SYNC_S) &&
1396 mdev->tconn->agreed_pro_version >= 96 && get_ldev(mdev)) {
1397 drbd_gen_and_send_sync_uuid(mdev);
1398 put_ldev(mdev);
1399 }
1400
1401 /* Do not change the order of the if above and the two below... */
1402 if (os.pdsk == D_DISKLESS && ns.pdsk > D_DISKLESS) { /* attach on the peer */
1403 drbd_send_uuids(mdev);
1404 drbd_send_state(mdev);
1405 }
1406 /* No point in queuing send_bitmap if we don't have a connection
1407 * anymore, so check also the _current_ state, not only the new state
1408 * at the time this work was queued. */
1409 if (os.conn != C_WF_BITMAP_S && ns.conn == C_WF_BITMAP_S &&
1410 mdev->state.conn == C_WF_BITMAP_S)
1411 drbd_queue_bitmap_io(mdev, &drbd_send_bitmap, NULL,
1412 "send_bitmap (WFBitMapS)",
1413 BM_LOCKED_TEST_ALLOWED);
1414
1415 /* Lost contact to peer's copy of the data */
1416 if ((os.pdsk >= D_INCONSISTENT &&
1417 os.pdsk != D_UNKNOWN &&
1418 os.pdsk != D_OUTDATED)
1419 && (ns.pdsk < D_INCONSISTENT ||
1420 ns.pdsk == D_UNKNOWN ||
1421 ns.pdsk == D_OUTDATED)) {
1422 if (get_ldev(mdev)) {
1423 if ((ns.role == R_PRIMARY || ns.peer == R_PRIMARY) &&
1424 mdev->ldev->md.uuid[UI_BITMAP] == 0 && ns.disk >= D_UP_TO_DATE) {
1425 if (is_susp(mdev->state)) {
1426 set_bit(NEW_CUR_UUID, &mdev->flags);
1427 } else {
1428 drbd_uuid_new_current(mdev);
1429 drbd_send_uuids(mdev);
1430 }
1431 }
1432 put_ldev(mdev);
1433 }
1434 }
1435
1436 if (ns.pdsk < D_INCONSISTENT && get_ldev(mdev)) {
1437 if (ns.peer == R_PRIMARY && mdev->ldev->md.uuid[UI_BITMAP] == 0) {
1438 drbd_uuid_new_current(mdev);
1439 drbd_send_uuids(mdev);
1440 }
1441
1442 /* D_DISKLESS Peer becomes secondary */
1443 if (os.peer == R_PRIMARY && ns.peer == R_SECONDARY)
1444 /* We may still be Primary ourselves.
1445 * No harm done if the bitmap still changes,
1446 * redirtied pages will follow later. */
1447 drbd_bitmap_io_from_worker(mdev, &drbd_bm_write,
1448 "demote diskless peer", BM_LOCKED_SET_ALLOWED);
1449 put_ldev(mdev);
1450 }
1451
1452 /* Write out all changed bits on demote.
1453 * Though, no need to da that just yet
1454 * if there is a resync going on still */
1455 if (os.role == R_PRIMARY && ns.role == R_SECONDARY &&
1456 mdev->state.conn <= C_CONNECTED && get_ldev(mdev)) {
1457 /* No changes to the bitmap expected this time, so assert that,
1458 * even though no harm was done if it did change. */
1459 drbd_bitmap_io_from_worker(mdev, &drbd_bm_write,
1460 "demote", BM_LOCKED_TEST_ALLOWED);
1461 put_ldev(mdev);
1462 }
1463
1464 /* Last part of the attaching process ... */
1465 if (ns.conn >= C_CONNECTED &&
1466 os.disk == D_ATTACHING && ns.disk == D_NEGOTIATING) {
1467 drbd_send_sizes(mdev, 0, 0); /* to start sync... */
1468 drbd_send_uuids(mdev);
1469 drbd_send_state(mdev);
1470 }
1471
1472 /* We want to pause/continue resync, tell peer. */
1473 if (ns.conn >= C_CONNECTED &&
1474 ((os.aftr_isp != ns.aftr_isp) ||
1475 (os.user_isp != ns.user_isp)))
1476 drbd_send_state(mdev);
1477
1478 /* In case one of the isp bits got set, suspend other devices. */
1479 if ((!os.aftr_isp && !os.peer_isp && !os.user_isp) &&
1480 (ns.aftr_isp || ns.peer_isp || ns.user_isp))
1481 suspend_other_sg(mdev);
1482
1483 /* Make sure the peer gets informed about eventual state
1484 changes (ISP bits) while we were in WFReportParams. */
1485 if (os.conn == C_WF_REPORT_PARAMS && ns.conn >= C_CONNECTED)
1486 drbd_send_state(mdev);
1487
1488 if (os.conn != C_AHEAD && ns.conn == C_AHEAD)
1489 drbd_send_state(mdev);
1490
1491 /* We are in the progress to start a full sync... */
1492 if ((os.conn != C_STARTING_SYNC_T && ns.conn == C_STARTING_SYNC_T) ||
1493 (os.conn != C_STARTING_SYNC_S && ns.conn == C_STARTING_SYNC_S))
1494 /* no other bitmap changes expected during this phase */
1495 drbd_queue_bitmap_io(mdev,
1496 &drbd_bmio_set_n_write, &abw_start_sync,
1497 "set_n_write from StartingSync", BM_LOCKED_TEST_ALLOWED);
1498
1499 /* We are invalidating our self... */
1500 if (os.conn < C_CONNECTED && ns.conn < C_CONNECTED &&
1501 os.disk > D_INCONSISTENT && ns.disk == D_INCONSISTENT)
1502 /* other bitmap operation expected during this phase */
1503 drbd_queue_bitmap_io(mdev, &drbd_bmio_set_n_write, NULL,
1504 "set_n_write from invalidate", BM_LOCKED_MASK);
1505
1506 /* first half of local IO error, failure to attach,
1507 * or administrative detach */
1508 if (os.disk != D_FAILED && ns.disk == D_FAILED) {
1509 enum drbd_io_error_p eh;
1510 int was_io_error;
1511 /* corresponding get_ldev was in __drbd_set_state, to serialize
1512 * our cleanup here with the transition to D_DISKLESS,
1513 * so it is safe to dreference ldev here. */
1514 eh = mdev->ldev->dc.on_io_error;
1515 was_io_error = test_and_clear_bit(WAS_IO_ERROR, &mdev->flags);
1516
1517 /* current state still has to be D_FAILED,
1518 * there is only one way out: to D_DISKLESS,
1519 * and that may only happen after our put_ldev below. */
1520 if (mdev->state.disk != D_FAILED)
1521 dev_err(DEV,
1522 "ASSERT FAILED: disk is %s during detach\n",
1523 drbd_disk_str(mdev->state.disk));
1524
1525 if (drbd_send_state(mdev))
1526 dev_warn(DEV, "Notified peer that I am detaching my disk\n");
1527 else
1528 dev_err(DEV, "Sending state for detaching disk failed\n");
1529
1530 drbd_rs_cancel_all(mdev);
1531
1532 /* In case we want to get something to stable storage still,
1533 * this may be the last chance.
1534 * Following put_ldev may transition to D_DISKLESS. */
1535 drbd_md_sync(mdev);
1536 put_ldev(mdev);
1537
1538 if (was_io_error && eh == EP_CALL_HELPER)
1539 drbd_khelper(mdev, "local-io-error");
1540 }
1541
1542 /* second half of local IO error, failure to attach,
1543 * or administrative detach,
1544 * after local_cnt references have reached zero again */
1545 if (os.disk != D_DISKLESS && ns.disk == D_DISKLESS) {
1546 /* We must still be diskless,
1547 * re-attach has to be serialized with this! */
1548 if (mdev->state.disk != D_DISKLESS)
1549 dev_err(DEV,
1550 "ASSERT FAILED: disk is %s while going diskless\n",
1551 drbd_disk_str(mdev->state.disk));
1552
1553 mdev->rs_total = 0;
1554 mdev->rs_failed = 0;
1555 atomic_set(&mdev->rs_pending_cnt, 0);
1556
1557 if (drbd_send_state(mdev))
1558 dev_warn(DEV, "Notified peer that I'm now diskless.\n");
1559 /* corresponding get_ldev in __drbd_set_state
1560 * this may finally trigger drbd_ldev_destroy. */
1561 put_ldev(mdev);
1562 }
1563
1564 /* Notify peer that I had a local IO error, and did not detached.. */
1565 if (os.disk == D_UP_TO_DATE && ns.disk == D_INCONSISTENT)
1566 drbd_send_state(mdev);
1567
1568 /* Disks got bigger while they were detached */
1569 if (ns.disk > D_NEGOTIATING && ns.pdsk > D_NEGOTIATING &&
1570 test_and_clear_bit(RESYNC_AFTER_NEG, &mdev->flags)) {
1571 if (ns.conn == C_CONNECTED)
1572 resync_after_online_grow(mdev);
1573 }
1574
1575 /* A resync finished or aborted, wake paused devices... */
1576 if ((os.conn > C_CONNECTED && ns.conn <= C_CONNECTED) ||
1577 (os.peer_isp && !ns.peer_isp) ||
1578 (os.user_isp && !ns.user_isp))
1579 resume_next_sg(mdev);
1580
1581 /* sync target done with resync. Explicitly notify peer, even though
1582 * it should (at least for non-empty resyncs) already know itself. */
1583 if (os.disk < D_UP_TO_DATE && os.conn >= C_SYNC_SOURCE && ns.conn == C_CONNECTED)
1584 drbd_send_state(mdev);
1585
1586 /* This triggers bitmap writeout of potentially still unwritten pages
1587 * if the resync finished cleanly, or aborted because of peer disk
1588 * failure, or because of connection loss.
1589 * For resync aborted because of local disk failure, we cannot do
1590 * any bitmap writeout anymore.
1591 * No harm done if some bits change during this phase.
1592 */
1593 if (os.conn > C_CONNECTED && ns.conn <= C_CONNECTED && get_ldev(mdev)) {
1594 drbd_queue_bitmap_io(mdev, &drbd_bm_write, NULL,
1595 "write from resync_finished", BM_LOCKED_SET_ALLOWED);
1596 put_ldev(mdev);
1597 }
1598
1599 /* Upon network connection, we need to start the receiver */
1600 if (os.conn == C_STANDALONE && ns.conn == C_UNCONNECTED)
1601 drbd_thread_start(&mdev->tconn->receiver);
1602
1603 /* Terminate worker thread if we are unconfigured - it will be
1604 restarted as needed... */
1605 if (ns.disk == D_DISKLESS &&
1606 ns.conn == C_STANDALONE &&
1607 ns.role == R_SECONDARY) {
1608 if (os.aftr_isp != ns.aftr_isp)
1609 resume_next_sg(mdev);
1610 /* set in __drbd_set_state, unless CONFIG_PENDING was set */
1611 if (test_bit(DEVICE_DYING, &mdev->flags))
1612 drbd_thread_stop_nowait(&mdev->tconn->worker);
1613 }
1614
1615 drbd_md_sync(mdev);
1616}
1617
1618
1619static int drbd_thread_setup(void *arg) 442static int drbd_thread_setup(void *arg)
1620{ 443{
1621 struct drbd_thread *thi = (struct drbd_thread *) arg; 444 struct drbd_thread *thi = (struct drbd_thread *) arg;
diff --git a/drivers/block/drbd/drbd_state.c b/drivers/block/drbd/drbd_state.c
new file mode 100644
index 000000000000..38d330b7b662
--- /dev/null
+++ b/drivers/block/drbd/drbd_state.c
@@ -0,0 +1,1217 @@
1/*
2 drbd_state.c
3
4 This file is part of DRBD by Philipp Reisner and Lars Ellenberg.
5
6 Copyright (C) 2001-2008, LINBIT Information Technologies GmbH.
7 Copyright (C) 1999-2008, Philipp Reisner <philipp.reisner@linbit.com>.
8 Copyright (C) 2002-2008, Lars Ellenberg <lars.ellenberg@linbit.com>.
9
10 Thanks to Carter Burden, Bart Grantham and Gennadiy Nerubayev
11 from Logicworks, Inc. for making SDP replication support possible.
12
13 drbd is free software; you can redistribute it and/or modify
14 it under the terms of the GNU General Public License as published by
15 the Free Software Foundation; either version 2, or (at your option)
16 any later version.
17
18 drbd is distributed in the hope that it will be useful,
19 but WITHOUT ANY WARRANTY; without even the implied warranty of
20 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
21 GNU General Public License for more details.
22
23 You should have received a copy of the GNU General Public License
24 along with drbd; see the file COPYING. If not, write to
25 the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
26 */
27
28#include <linux/drbd_limits.h>
29#include "drbd_int.h"
30#include "drbd_req.h"
31
32struct after_state_chg_work {
33 struct drbd_work w;
34 union drbd_state os;
35 union drbd_state ns;
36 enum chg_state_flags flags;
37 struct completion *done;
38};
39
40
41extern void _tl_restart(struct drbd_conf *mdev, enum drbd_req_event what);
42int drbd_send_state_req(struct drbd_conf *, union drbd_state, union drbd_state);
43static int w_after_state_ch(struct drbd_conf *mdev, struct drbd_work *w, int unused);
44static void after_state_ch(struct drbd_conf *mdev, union drbd_state os,
45 union drbd_state ns, enum chg_state_flags flags);
46static void after_conn_state_ch(struct drbd_tconn *tconn, union drbd_state os,
47 union drbd_state ns, enum chg_state_flags flags);
48static enum drbd_state_rv is_valid_state(struct drbd_conf *, union drbd_state);
49static enum drbd_state_rv is_valid_soft_transition(union drbd_state, union drbd_state);
50static union drbd_state sanitize_state(struct drbd_conf *mdev, union drbd_state os,
51 union drbd_state ns, const char **warn_sync_abort);
52
53/**
54 * cl_wide_st_chg() - true if the state change is a cluster wide one
55 * @mdev: DRBD device.
56 * @os: old (current) state.
57 * @ns: new (wanted) state.
58 */
59static int cl_wide_st_chg(struct drbd_conf *mdev,
60 union drbd_state os, union drbd_state ns)
61{
62 return (os.conn >= C_CONNECTED && ns.conn >= C_CONNECTED &&
63 ((os.role != R_PRIMARY && ns.role == R_PRIMARY) ||
64 (os.conn != C_STARTING_SYNC_T && ns.conn == C_STARTING_SYNC_T) ||
65 (os.conn != C_STARTING_SYNC_S && ns.conn == C_STARTING_SYNC_S) ||
66 (os.disk != D_DISKLESS && ns.disk == D_DISKLESS))) ||
67 (os.conn >= C_CONNECTED && ns.conn == C_DISCONNECTING) ||
68 (os.conn == C_CONNECTED && ns.conn == C_VERIFY_S);
69}
70
71enum drbd_state_rv
72drbd_change_state(struct drbd_conf *mdev, enum chg_state_flags f,
73 union drbd_state mask, union drbd_state val)
74{
75 unsigned long flags;
76 union drbd_state os, ns;
77 enum drbd_state_rv rv;
78
79 spin_lock_irqsave(&mdev->tconn->req_lock, flags);
80 os = mdev->state;
81 ns.i = (os.i & ~mask.i) | val.i;
82 rv = _drbd_set_state(mdev, ns, f, NULL);
83 ns = mdev->state;
84 spin_unlock_irqrestore(&mdev->tconn->req_lock, flags);
85
86 return rv;
87}
88
89/**
90 * drbd_force_state() - Impose a change which happens outside our control on our state
91 * @mdev: DRBD device.
92 * @mask: mask of state bits to change.
93 * @val: value of new state bits.
94 */
95void drbd_force_state(struct drbd_conf *mdev,
96 union drbd_state mask, union drbd_state val)
97{
98 drbd_change_state(mdev, CS_HARD, mask, val);
99}
100
101static enum drbd_state_rv
102_req_st_cond(struct drbd_conf *mdev, union drbd_state mask,
103 union drbd_state val)
104{
105 union drbd_state os, ns;
106 unsigned long flags;
107 enum drbd_state_rv rv;
108
109 if (test_and_clear_bit(CL_ST_CHG_SUCCESS, &mdev->flags))
110 return SS_CW_SUCCESS;
111
112 if (test_and_clear_bit(CL_ST_CHG_FAIL, &mdev->flags))
113 return SS_CW_FAILED_BY_PEER;
114
115 rv = 0;
116 spin_lock_irqsave(&mdev->tconn->req_lock, flags);
117 os = mdev->state;
118 ns.i = (os.i & ~mask.i) | val.i;
119 ns = sanitize_state(mdev, os, ns, NULL);
120
121 if (!cl_wide_st_chg(mdev, os, ns))
122 rv = SS_CW_NO_NEED;
123 if (!rv) {
124 rv = is_valid_state(mdev, ns);
125 if (rv == SS_SUCCESS) {
126 rv = is_valid_soft_transition(os, ns);
127 if (rv == SS_SUCCESS)
128 rv = SS_UNKNOWN_ERROR; /* cont waiting, otherwise fail. */
129 }
130 }
131 spin_unlock_irqrestore(&mdev->tconn->req_lock, flags);
132
133 return rv;
134}
135
136/**
137 * drbd_req_state() - Perform an eventually cluster wide state change
138 * @mdev: DRBD device.
139 * @mask: mask of state bits to change.
140 * @val: value of new state bits.
141 * @f: flags
142 *
143 * Should not be called directly, use drbd_request_state() or
144 * _drbd_request_state().
145 */
146static enum drbd_state_rv
147drbd_req_state(struct drbd_conf *mdev, union drbd_state mask,
148 union drbd_state val, enum chg_state_flags f)
149{
150 struct completion done;
151 unsigned long flags;
152 union drbd_state os, ns;
153 enum drbd_state_rv rv;
154
155 init_completion(&done);
156
157 if (f & CS_SERIALIZE)
158 mutex_lock(&mdev->state_mutex);
159
160 spin_lock_irqsave(&mdev->tconn->req_lock, flags);
161 os = mdev->state;
162 ns.i = (os.i & ~mask.i) | val.i;
163
164 ns = sanitize_state(mdev, os, ns, NULL);
165
166 if (cl_wide_st_chg(mdev, os, ns)) {
167 rv = is_valid_state(mdev, ns);
168 if (rv == SS_SUCCESS)
169 rv = is_valid_soft_transition(os, ns);
170 spin_unlock_irqrestore(&mdev->tconn->req_lock, flags);
171
172 if (rv < SS_SUCCESS) {
173 if (f & CS_VERBOSE)
174 print_st_err(mdev, os, ns, rv);
175 goto abort;
176 }
177
178 drbd_state_lock(mdev);
179 if (!drbd_send_state_req(mdev, mask, val)) {
180 drbd_state_unlock(mdev);
181 rv = SS_CW_FAILED_BY_PEER;
182 if (f & CS_VERBOSE)
183 print_st_err(mdev, os, ns, rv);
184 goto abort;
185 }
186
187 wait_event(mdev->state_wait,
188 (rv = _req_st_cond(mdev, mask, val)));
189
190 if (rv < SS_SUCCESS) {
191 drbd_state_unlock(mdev);
192 if (f & CS_VERBOSE)
193 print_st_err(mdev, os, ns, rv);
194 goto abort;
195 }
196 spin_lock_irqsave(&mdev->tconn->req_lock, flags);
197 os = mdev->state;
198 ns.i = (os.i & ~mask.i) | val.i;
199 rv = _drbd_set_state(mdev, ns, f, &done);
200 drbd_state_unlock(mdev);
201 } else {
202 rv = _drbd_set_state(mdev, ns, f, &done);
203 }
204
205 spin_unlock_irqrestore(&mdev->tconn->req_lock, flags);
206
207 if (f & CS_WAIT_COMPLETE && rv == SS_SUCCESS) {
208 D_ASSERT(current != mdev->tconn->worker.task);
209 wait_for_completion(&done);
210 }
211
212abort:
213 if (f & CS_SERIALIZE)
214 mutex_unlock(&mdev->state_mutex);
215
216 return rv;
217}
218
219/**
220 * _drbd_request_state() - Request a state change (with flags)
221 * @mdev: DRBD device.
222 * @mask: mask of state bits to change.
223 * @val: value of new state bits.
224 * @f: flags
225 *
226 * Cousin of drbd_request_state(), useful with the CS_WAIT_COMPLETE
227 * flag, or when logging of failed state change requests is not desired.
228 */
229enum drbd_state_rv
230_drbd_request_state(struct drbd_conf *mdev, union drbd_state mask,
231 union drbd_state val, enum chg_state_flags f)
232{
233 enum drbd_state_rv rv;
234
235 wait_event(mdev->state_wait,
236 (rv = drbd_req_state(mdev, mask, val, f)) != SS_IN_TRANSIENT_STATE);
237
238 return rv;
239}
240
241static void print_st(struct drbd_conf *mdev, char *name, union drbd_state ns)
242{
243 dev_err(DEV, " %s = { cs:%s ro:%s/%s ds:%s/%s %c%c%c%c%c%c }\n",
244 name,
245 drbd_conn_str(ns.conn),
246 drbd_role_str(ns.role),
247 drbd_role_str(ns.peer),
248 drbd_disk_str(ns.disk),
249 drbd_disk_str(ns.pdsk),
250 is_susp(ns) ? 's' : 'r',
251 ns.aftr_isp ? 'a' : '-',
252 ns.peer_isp ? 'p' : '-',
253 ns.user_isp ? 'u' : '-',
254 ns.susp_fen ? 'F' : '-',
255 ns.susp_nod ? 'N' : '-'
256 );
257}
258
259void print_st_err(struct drbd_conf *mdev, union drbd_state os,
260 union drbd_state ns, enum drbd_state_rv err)
261{
262 if (err == SS_IN_TRANSIENT_STATE)
263 return;
264 dev_err(DEV, "State change failed: %s\n", drbd_set_st_err_str(err));
265 print_st(mdev, " state", os);
266 print_st(mdev, "wanted", ns);
267}
268
269
270/**
271 * is_valid_state() - Returns an SS_ error code if ns is not valid
272 * @mdev: DRBD device.
273 * @ns: State to consider.
274 */
275static enum drbd_state_rv
276is_valid_state(struct drbd_conf *mdev, union drbd_state ns)
277{
278 /* See drbd_state_sw_errors in drbd_strings.c */
279
280 enum drbd_fencing_p fp;
281 enum drbd_state_rv rv = SS_SUCCESS;
282
283 fp = FP_DONT_CARE;
284 if (get_ldev(mdev)) {
285 fp = mdev->ldev->dc.fencing;
286 put_ldev(mdev);
287 }
288
289 if (get_net_conf(mdev->tconn)) {
290 if (!mdev->tconn->net_conf->two_primaries &&
291 ns.role == R_PRIMARY && ns.peer == R_PRIMARY)
292 rv = SS_TWO_PRIMARIES;
293 put_net_conf(mdev->tconn);
294 }
295
296 if (rv <= 0)
297 /* already found a reason to abort */;
298 else if (ns.role == R_SECONDARY && mdev->open_cnt)
299 rv = SS_DEVICE_IN_USE;
300
301 else if (ns.role == R_PRIMARY && ns.conn < C_CONNECTED && ns.disk < D_UP_TO_DATE)
302 rv = SS_NO_UP_TO_DATE_DISK;
303
304 else if (fp >= FP_RESOURCE &&
305 ns.role == R_PRIMARY && ns.conn < C_CONNECTED && ns.pdsk >= D_UNKNOWN)
306 rv = SS_PRIMARY_NOP;
307
308 else if (ns.role == R_PRIMARY && ns.disk <= D_INCONSISTENT && ns.pdsk <= D_INCONSISTENT)
309 rv = SS_NO_UP_TO_DATE_DISK;
310
311 else if (ns.conn > C_CONNECTED && ns.disk < D_INCONSISTENT)
312 rv = SS_NO_LOCAL_DISK;
313
314 else if (ns.conn > C_CONNECTED && ns.pdsk < D_INCONSISTENT)
315 rv = SS_NO_REMOTE_DISK;
316
317 else if (ns.conn > C_CONNECTED && ns.disk < D_UP_TO_DATE && ns.pdsk < D_UP_TO_DATE)
318 rv = SS_NO_UP_TO_DATE_DISK;
319
320 else if ((ns.conn == C_CONNECTED ||
321 ns.conn == C_WF_BITMAP_S ||
322 ns.conn == C_SYNC_SOURCE ||
323 ns.conn == C_PAUSED_SYNC_S) &&
324 ns.disk == D_OUTDATED)
325 rv = SS_CONNECTED_OUTDATES;
326
327 else if ((ns.conn == C_VERIFY_S || ns.conn == C_VERIFY_T) &&
328 (mdev->sync_conf.verify_alg[0] == 0))
329 rv = SS_NO_VERIFY_ALG;
330
331 else if ((ns.conn == C_VERIFY_S || ns.conn == C_VERIFY_T) &&
332 mdev->tconn->agreed_pro_version < 88)
333 rv = SS_NOT_SUPPORTED;
334
335 else if (ns.conn >= C_CONNECTED && ns.pdsk == D_UNKNOWN)
336 rv = SS_CONNECTED_OUTDATES;
337
338 return rv;
339}
340
341/**
342 * is_valid_soft_transition() - Returns an SS_ error code if the state transition is not possible
343 * @mdev: DRBD device.
344 * @ns: new state.
345 * @os: old state.
346 */
347static enum drbd_state_rv
348is_valid_soft_transition(union drbd_state os, union drbd_state ns)
349{
350 enum drbd_state_rv rv = SS_SUCCESS;
351
352 if ((ns.conn == C_STARTING_SYNC_T || ns.conn == C_STARTING_SYNC_S) &&
353 os.conn > C_CONNECTED)
354 rv = SS_RESYNC_RUNNING;
355
356 if (ns.conn == C_DISCONNECTING && os.conn == C_STANDALONE)
357 rv = SS_ALREADY_STANDALONE;
358
359 if (ns.disk > D_ATTACHING && os.disk == D_DISKLESS)
360 rv = SS_IS_DISKLESS;
361
362 if (ns.conn == C_WF_CONNECTION && os.conn < C_UNCONNECTED)
363 rv = SS_NO_NET_CONFIG;
364
365 if (ns.disk == D_OUTDATED && os.disk < D_OUTDATED && os.disk != D_ATTACHING)
366 rv = SS_LOWER_THAN_OUTDATED;
367
368 if (ns.conn == C_DISCONNECTING && os.conn == C_UNCONNECTED)
369 rv = SS_IN_TRANSIENT_STATE;
370
371 if (ns.conn == os.conn && ns.conn == C_WF_REPORT_PARAMS)
372 rv = SS_IN_TRANSIENT_STATE;
373
374 if ((ns.conn == C_VERIFY_S || ns.conn == C_VERIFY_T) && os.conn < C_CONNECTED)
375 rv = SS_NEED_CONNECTION;
376
377 if ((ns.conn == C_VERIFY_S || ns.conn == C_VERIFY_T) &&
378 ns.conn != os.conn && os.conn > C_CONNECTED)
379 rv = SS_RESYNC_RUNNING;
380
381 if ((ns.conn == C_STARTING_SYNC_S || ns.conn == C_STARTING_SYNC_T) &&
382 os.conn < C_CONNECTED)
383 rv = SS_NEED_CONNECTION;
384
385 if ((ns.conn == C_SYNC_TARGET || ns.conn == C_SYNC_SOURCE)
386 && os.conn < C_WF_REPORT_PARAMS)
387 rv = SS_NEED_CONNECTION; /* No NetworkFailure -> SyncTarget etc... */
388
389 return rv;
390}
391
392/**
393 * sanitize_state() - Resolves implicitly necessary additional changes to a state transition
394 * @mdev: DRBD device.
395 * @os: old state.
396 * @ns: new state.
397 * @warn_sync_abort:
398 *
399 * When we loose connection, we have to set the state of the peers disk (pdsk)
400 * to D_UNKNOWN. This rule and many more along those lines are in this function.
401 */
402static union drbd_state sanitize_state(struct drbd_conf *mdev, union drbd_state os,
403 union drbd_state ns, const char **warn_sync_abort)
404{
405 enum drbd_fencing_p fp;
406 enum drbd_disk_state disk_min, disk_max, pdsk_min, pdsk_max;
407
408 fp = FP_DONT_CARE;
409 if (get_ldev(mdev)) {
410 fp = mdev->ldev->dc.fencing;
411 put_ldev(mdev);
412 }
413
414 /* Disallow Network errors to configure a device's network part */
415 if ((ns.conn >= C_TIMEOUT && ns.conn <= C_TEAR_DOWN) &&
416 os.conn <= C_DISCONNECTING)
417 ns.conn = os.conn;
418
419 /* After a network error (+C_TEAR_DOWN) only C_UNCONNECTED or C_DISCONNECTING can follow.
420 * If you try to go into some Sync* state, that shall fail (elsewhere). */
421 if (os.conn >= C_TIMEOUT && os.conn <= C_TEAR_DOWN &&
422 ns.conn != C_UNCONNECTED && ns.conn != C_DISCONNECTING && ns.conn <= C_TEAR_DOWN)
423 ns.conn = os.conn;
424
425 /* we cannot fail (again) if we already detached */
426 if (ns.disk == D_FAILED && os.disk == D_DISKLESS)
427 ns.disk = D_DISKLESS;
428
429 /* if we are only D_ATTACHING yet,
430 * we can (and should) go directly to D_DISKLESS. */
431 if (ns.disk == D_FAILED && os.disk == D_ATTACHING)
432 ns.disk = D_DISKLESS;
433
434 /* After C_DISCONNECTING only C_STANDALONE may follow */
435 if (os.conn == C_DISCONNECTING && ns.conn != C_STANDALONE)
436 ns.conn = os.conn;
437
438 if (ns.conn < C_CONNECTED) {
439 ns.peer_isp = 0;
440 ns.peer = R_UNKNOWN;
441 if (ns.pdsk > D_UNKNOWN || ns.pdsk < D_INCONSISTENT)
442 ns.pdsk = D_UNKNOWN;
443 }
444
445 /* Clear the aftr_isp when becoming unconfigured */
446 if (ns.conn == C_STANDALONE && ns.disk == D_DISKLESS && ns.role == R_SECONDARY)
447 ns.aftr_isp = 0;
448
449 /* Abort resync if a disk fails/detaches */
450 if (os.conn > C_CONNECTED && ns.conn > C_CONNECTED &&
451 (ns.disk <= D_FAILED || ns.pdsk <= D_FAILED)) {
452 if (warn_sync_abort)
453 *warn_sync_abort =
454 os.conn == C_VERIFY_S || os.conn == C_VERIFY_T ?
455 "Online-verify" : "Resync";
456 ns.conn = C_CONNECTED;
457 }
458
459 /* Connection breaks down before we finished "Negotiating" */
460 if (ns.conn < C_CONNECTED && ns.disk == D_NEGOTIATING &&
461 get_ldev_if_state(mdev, D_NEGOTIATING)) {
462 if (mdev->ed_uuid == mdev->ldev->md.uuid[UI_CURRENT]) {
463 ns.disk = mdev->new_state_tmp.disk;
464 ns.pdsk = mdev->new_state_tmp.pdsk;
465 } else {
466 dev_alert(DEV, "Connection lost while negotiating, no data!\n");
467 ns.disk = D_DISKLESS;
468 ns.pdsk = D_UNKNOWN;
469 }
470 put_ldev(mdev);
471 }
472
473 /* D_CONSISTENT and D_OUTDATED vanish when we get connected */
474 if (ns.conn >= C_CONNECTED && ns.conn < C_AHEAD) {
475 if (ns.disk == D_CONSISTENT || ns.disk == D_OUTDATED)
476 ns.disk = D_UP_TO_DATE;
477 if (ns.pdsk == D_CONSISTENT || ns.pdsk == D_OUTDATED)
478 ns.pdsk = D_UP_TO_DATE;
479 }
480
481 /* Implications of the connection stat on the disk states */
482 disk_min = D_DISKLESS;
483 disk_max = D_UP_TO_DATE;
484 pdsk_min = D_INCONSISTENT;
485 pdsk_max = D_UNKNOWN;
486 switch ((enum drbd_conns)ns.conn) {
487 case C_WF_BITMAP_T:
488 case C_PAUSED_SYNC_T:
489 case C_STARTING_SYNC_T:
490 case C_WF_SYNC_UUID:
491 case C_BEHIND:
492 disk_min = D_INCONSISTENT;
493 disk_max = D_OUTDATED;
494 pdsk_min = D_UP_TO_DATE;
495 pdsk_max = D_UP_TO_DATE;
496 break;
497 case C_VERIFY_S:
498 case C_VERIFY_T:
499 disk_min = D_UP_TO_DATE;
500 disk_max = D_UP_TO_DATE;
501 pdsk_min = D_UP_TO_DATE;
502 pdsk_max = D_UP_TO_DATE;
503 break;
504 case C_CONNECTED:
505 disk_min = D_DISKLESS;
506 disk_max = D_UP_TO_DATE;
507 pdsk_min = D_DISKLESS;
508 pdsk_max = D_UP_TO_DATE;
509 break;
510 case C_WF_BITMAP_S:
511 case C_PAUSED_SYNC_S:
512 case C_STARTING_SYNC_S:
513 case C_AHEAD:
514 disk_min = D_UP_TO_DATE;
515 disk_max = D_UP_TO_DATE;
516 pdsk_min = D_INCONSISTENT;
517 pdsk_max = D_CONSISTENT; /* D_OUTDATED would be nice. But explicit outdate necessary*/
518 break;
519 case C_SYNC_TARGET:
520 disk_min = D_INCONSISTENT;
521 disk_max = D_INCONSISTENT;
522 pdsk_min = D_UP_TO_DATE;
523 pdsk_max = D_UP_TO_DATE;
524 break;
525 case C_SYNC_SOURCE:
526 disk_min = D_UP_TO_DATE;
527 disk_max = D_UP_TO_DATE;
528 pdsk_min = D_INCONSISTENT;
529 pdsk_max = D_INCONSISTENT;
530 break;
531 case C_STANDALONE:
532 case C_DISCONNECTING:
533 case C_UNCONNECTED:
534 case C_TIMEOUT:
535 case C_BROKEN_PIPE:
536 case C_NETWORK_FAILURE:
537 case C_PROTOCOL_ERROR:
538 case C_TEAR_DOWN:
539 case C_WF_CONNECTION:
540 case C_WF_REPORT_PARAMS:
541 case C_MASK:
542 break;
543 }
544 if (ns.disk > disk_max)
545 ns.disk = disk_max;
546
547 if (ns.disk < disk_min) {
548 dev_warn(DEV, "Implicitly set disk from %s to %s\n",
549 drbd_disk_str(ns.disk), drbd_disk_str(disk_min));
550 ns.disk = disk_min;
551 }
552 if (ns.pdsk > pdsk_max)
553 ns.pdsk = pdsk_max;
554
555 if (ns.pdsk < pdsk_min) {
556 dev_warn(DEV, "Implicitly set pdsk from %s to %s\n",
557 drbd_disk_str(ns.pdsk), drbd_disk_str(pdsk_min));
558 ns.pdsk = pdsk_min;
559 }
560
561 if (fp == FP_STONITH &&
562 (ns.role == R_PRIMARY && ns.conn < C_CONNECTED && ns.pdsk > D_OUTDATED) &&
563 !(os.role == R_PRIMARY && os.conn < C_CONNECTED && os.pdsk > D_OUTDATED))
564 ns.susp_fen = 1; /* Suspend IO while fence-peer handler runs (peer lost) */
565
566 if (mdev->sync_conf.on_no_data == OND_SUSPEND_IO &&
567 (ns.role == R_PRIMARY && ns.disk < D_UP_TO_DATE && ns.pdsk < D_UP_TO_DATE) &&
568 !(os.role == R_PRIMARY && os.disk < D_UP_TO_DATE && os.pdsk < D_UP_TO_DATE))
569 ns.susp_nod = 1; /* Suspend IO while no data available (no accessible data available) */
570
571 if (ns.aftr_isp || ns.peer_isp || ns.user_isp) {
572 if (ns.conn == C_SYNC_SOURCE)
573 ns.conn = C_PAUSED_SYNC_S;
574 if (ns.conn == C_SYNC_TARGET)
575 ns.conn = C_PAUSED_SYNC_T;
576 } else {
577 if (ns.conn == C_PAUSED_SYNC_S)
578 ns.conn = C_SYNC_SOURCE;
579 if (ns.conn == C_PAUSED_SYNC_T)
580 ns.conn = C_SYNC_TARGET;
581 }
582
583 return ns;
584}
585
586void drbd_resume_al(struct drbd_conf *mdev)
587{
588 if (test_and_clear_bit(AL_SUSPENDED, &mdev->flags))
589 dev_info(DEV, "Resumed AL updates\n");
590}
591
592/* helper for __drbd_set_state */
593static void set_ov_position(struct drbd_conf *mdev, enum drbd_conns cs)
594{
595 if (mdev->tconn->agreed_pro_version < 90)
596 mdev->ov_start_sector = 0;
597 mdev->rs_total = drbd_bm_bits(mdev);
598 mdev->ov_position = 0;
599 if (cs == C_VERIFY_T) {
600 /* starting online verify from an arbitrary position
601 * does not fit well into the existing protocol.
602 * on C_VERIFY_T, we initialize ov_left and friends
603 * implicitly in receive_DataRequest once the
604 * first P_OV_REQUEST is received */
605 mdev->ov_start_sector = ~(sector_t)0;
606 } else {
607 unsigned long bit = BM_SECT_TO_BIT(mdev->ov_start_sector);
608 if (bit >= mdev->rs_total) {
609 mdev->ov_start_sector =
610 BM_BIT_TO_SECT(mdev->rs_total - 1);
611 mdev->rs_total = 1;
612 } else
613 mdev->rs_total -= bit;
614 mdev->ov_position = mdev->ov_start_sector;
615 }
616 mdev->ov_left = mdev->rs_total;
617}
618
619/**
620 * __drbd_set_state() - Set a new DRBD state
621 * @mdev: DRBD device.
622 * @ns: new state.
623 * @flags: Flags
624 * @done: Optional completion, that will get completed after the after_state_ch() finished
625 *
626 * Caller needs to hold req_lock, and global_state_lock. Do not call directly.
627 */
628enum drbd_state_rv
629__drbd_set_state(struct drbd_conf *mdev, union drbd_state ns,
630 enum chg_state_flags flags, struct completion *done)
631{
632 union drbd_state os;
633 enum drbd_state_rv rv = SS_SUCCESS;
634 const char *warn_sync_abort = NULL;
635 struct after_state_chg_work *ascw;
636
637 os = mdev->state;
638
639 ns = sanitize_state(mdev, os, ns, &warn_sync_abort);
640
641 if (ns.i == os.i)
642 return SS_NOTHING_TO_DO;
643
644 if (!(flags & CS_HARD)) {
645 /* pre-state-change checks ; only look at ns */
646 /* See drbd_state_sw_errors in drbd_strings.c */
647
648 rv = is_valid_state(mdev, ns);
649 if (rv < SS_SUCCESS) {
650 /* If the old state was illegal as well, then let
651 this happen...*/
652
653 if (is_valid_state(mdev, os) == rv)
654 rv = is_valid_soft_transition(os, ns);
655 } else
656 rv = is_valid_soft_transition(os, ns);
657 }
658
659 if (rv < SS_SUCCESS) {
660 if (flags & CS_VERBOSE)
661 print_st_err(mdev, os, ns, rv);
662 return rv;
663 }
664
665 if (warn_sync_abort)
666 dev_warn(DEV, "%s aborted.\n", warn_sync_abort);
667
668 {
669 char *pbp, pb[300];
670 pbp = pb;
671 *pbp = 0;
672 if (ns.role != os.role)
673 pbp += sprintf(pbp, "role( %s -> %s ) ",
674 drbd_role_str(os.role),
675 drbd_role_str(ns.role));
676 if (ns.peer != os.peer)
677 pbp += sprintf(pbp, "peer( %s -> %s ) ",
678 drbd_role_str(os.peer),
679 drbd_role_str(ns.peer));
680 if (ns.conn != os.conn)
681 pbp += sprintf(pbp, "conn( %s -> %s ) ",
682 drbd_conn_str(os.conn),
683 drbd_conn_str(ns.conn));
684 if (ns.disk != os.disk)
685 pbp += sprintf(pbp, "disk( %s -> %s ) ",
686 drbd_disk_str(os.disk),
687 drbd_disk_str(ns.disk));
688 if (ns.pdsk != os.pdsk)
689 pbp += sprintf(pbp, "pdsk( %s -> %s ) ",
690 drbd_disk_str(os.pdsk),
691 drbd_disk_str(ns.pdsk));
692 if (is_susp(ns) != is_susp(os))
693 pbp += sprintf(pbp, "susp( %d -> %d ) ",
694 is_susp(os),
695 is_susp(ns));
696 if (ns.aftr_isp != os.aftr_isp)
697 pbp += sprintf(pbp, "aftr_isp( %d -> %d ) ",
698 os.aftr_isp,
699 ns.aftr_isp);
700 if (ns.peer_isp != os.peer_isp)
701 pbp += sprintf(pbp, "peer_isp( %d -> %d ) ",
702 os.peer_isp,
703 ns.peer_isp);
704 if (ns.user_isp != os.user_isp)
705 pbp += sprintf(pbp, "user_isp( %d -> %d ) ",
706 os.user_isp,
707 ns.user_isp);
708 dev_info(DEV, "%s\n", pb);
709 }
710
711 /* solve the race between becoming unconfigured,
712 * worker doing the cleanup, and
713 * admin reconfiguring us:
714 * on (re)configure, first set CONFIG_PENDING,
715 * then wait for a potentially exiting worker,
716 * start the worker, and schedule one no_op.
717 * then proceed with configuration.
718 */
719 if (ns.disk == D_DISKLESS &&
720 ns.conn == C_STANDALONE &&
721 ns.role == R_SECONDARY &&
722 !test_and_set_bit(CONFIG_PENDING, &mdev->flags))
723 set_bit(DEVICE_DYING, &mdev->flags);
724
725 /* if we are going -> D_FAILED or D_DISKLESS, grab one extra reference
726 * on the ldev here, to be sure the transition -> D_DISKLESS resp.
727 * drbd_ldev_destroy() won't happen before our corresponding
728 * after_state_ch works run, where we put_ldev again. */
729 if ((os.disk != D_FAILED && ns.disk == D_FAILED) ||
730 (os.disk != D_DISKLESS && ns.disk == D_DISKLESS))
731 atomic_inc(&mdev->local_cnt);
732
733 mdev->state = ns;
734
735 if (os.disk == D_ATTACHING && ns.disk >= D_NEGOTIATING)
736 drbd_print_uuids(mdev, "attached to UUIDs");
737
738 wake_up(&mdev->misc_wait);
739 wake_up(&mdev->state_wait);
740
741 /* aborted verify run. log the last position */
742 if ((os.conn == C_VERIFY_S || os.conn == C_VERIFY_T) &&
743 ns.conn < C_CONNECTED) {
744 mdev->ov_start_sector =
745 BM_BIT_TO_SECT(drbd_bm_bits(mdev) - mdev->ov_left);
746 dev_info(DEV, "Online Verify reached sector %llu\n",
747 (unsigned long long)mdev->ov_start_sector);
748 }
749
750 if ((os.conn == C_PAUSED_SYNC_T || os.conn == C_PAUSED_SYNC_S) &&
751 (ns.conn == C_SYNC_TARGET || ns.conn == C_SYNC_SOURCE)) {
752 dev_info(DEV, "Syncer continues.\n");
753 mdev->rs_paused += (long)jiffies
754 -(long)mdev->rs_mark_time[mdev->rs_last_mark];
755 if (ns.conn == C_SYNC_TARGET)
756 mod_timer(&mdev->resync_timer, jiffies);
757 }
758
759 if ((os.conn == C_SYNC_TARGET || os.conn == C_SYNC_SOURCE) &&
760 (ns.conn == C_PAUSED_SYNC_T || ns.conn == C_PAUSED_SYNC_S)) {
761 dev_info(DEV, "Resync suspended\n");
762 mdev->rs_mark_time[mdev->rs_last_mark] = jiffies;
763 }
764
765 if (os.conn == C_CONNECTED &&
766 (ns.conn == C_VERIFY_S || ns.conn == C_VERIFY_T)) {
767 unsigned long now = jiffies;
768 int i;
769
770 set_ov_position(mdev, ns.conn);
771 mdev->rs_start = now;
772 mdev->rs_last_events = 0;
773 mdev->rs_last_sect_ev = 0;
774 mdev->ov_last_oos_size = 0;
775 mdev->ov_last_oos_start = 0;
776
777 for (i = 0; i < DRBD_SYNC_MARKS; i++) {
778 mdev->rs_mark_left[i] = mdev->ov_left;
779 mdev->rs_mark_time[i] = now;
780 }
781
782 drbd_rs_controller_reset(mdev);
783
784 if (ns.conn == C_VERIFY_S) {
785 dev_info(DEV, "Starting Online Verify from sector %llu\n",
786 (unsigned long long)mdev->ov_position);
787 mod_timer(&mdev->resync_timer, jiffies);
788 }
789 }
790
791 if (get_ldev(mdev)) {
792 u32 mdf = mdev->ldev->md.flags & ~(MDF_CONSISTENT|MDF_PRIMARY_IND|
793 MDF_CONNECTED_IND|MDF_WAS_UP_TO_DATE|
794 MDF_PEER_OUT_DATED|MDF_CRASHED_PRIMARY);
795
796 if (test_bit(CRASHED_PRIMARY, &mdev->flags))
797 mdf |= MDF_CRASHED_PRIMARY;
798 if (mdev->state.role == R_PRIMARY ||
799 (mdev->state.pdsk < D_INCONSISTENT && mdev->state.peer == R_PRIMARY))
800 mdf |= MDF_PRIMARY_IND;
801 if (mdev->state.conn > C_WF_REPORT_PARAMS)
802 mdf |= MDF_CONNECTED_IND;
803 if (mdev->state.disk > D_INCONSISTENT)
804 mdf |= MDF_CONSISTENT;
805 if (mdev->state.disk > D_OUTDATED)
806 mdf |= MDF_WAS_UP_TO_DATE;
807 if (mdev->state.pdsk <= D_OUTDATED && mdev->state.pdsk >= D_INCONSISTENT)
808 mdf |= MDF_PEER_OUT_DATED;
809 if (mdf != mdev->ldev->md.flags) {
810 mdev->ldev->md.flags = mdf;
811 drbd_md_mark_dirty(mdev);
812 }
813 if (os.disk < D_CONSISTENT && ns.disk >= D_CONSISTENT)
814 drbd_set_ed_uuid(mdev, mdev->ldev->md.uuid[UI_CURRENT]);
815 put_ldev(mdev);
816 }
817
818 /* Peer was forced D_UP_TO_DATE & R_PRIMARY, consider to resync */
819 if (os.disk == D_INCONSISTENT && os.pdsk == D_INCONSISTENT &&
820 os.peer == R_SECONDARY && ns.peer == R_PRIMARY)
821 set_bit(CONSIDER_RESYNC, &mdev->flags);
822
823 /* Receiver should clean up itself */
824 if (os.conn != C_DISCONNECTING && ns.conn == C_DISCONNECTING)
825 drbd_thread_stop_nowait(&mdev->tconn->receiver);
826
827 /* Now the receiver finished cleaning up itself, it should die */
828 if (os.conn != C_STANDALONE && ns.conn == C_STANDALONE)
829 drbd_thread_stop_nowait(&mdev->tconn->receiver);
830
831 /* Upon network failure, we need to restart the receiver. */
832 if (os.conn > C_TEAR_DOWN &&
833 ns.conn <= C_TEAR_DOWN && ns.conn >= C_TIMEOUT)
834 drbd_thread_restart_nowait(&mdev->tconn->receiver);
835
836 /* Resume AL writing if we get a connection */
837 if (os.conn < C_CONNECTED && ns.conn >= C_CONNECTED)
838 drbd_resume_al(mdev);
839
840 ascw = kmalloc(sizeof(*ascw), GFP_ATOMIC);
841 if (ascw) {
842 ascw->os = os;
843 ascw->ns = ns;
844 ascw->flags = flags;
845 ascw->w.cb = w_after_state_ch;
846 ascw->done = done;
847 drbd_queue_work(&mdev->tconn->data.work, &ascw->w);
848 } else {
849 dev_warn(DEV, "Could not kmalloc an ascw\n");
850 }
851
852 return rv;
853}
854
855static int w_after_state_ch(struct drbd_conf *mdev, struct drbd_work *w, int unused)
856{
857 struct after_state_chg_work *ascw =
858 container_of(w, struct after_state_chg_work, w);
859
860 after_state_ch(mdev, ascw->os, ascw->ns, ascw->flags);
861 if (ascw->flags & CS_WAIT_COMPLETE) {
862 D_ASSERT(ascw->done != NULL);
863 complete(ascw->done);
864 }
865 kfree(ascw);
866
867 return 1;
868}
869
870static void abw_start_sync(struct drbd_conf *mdev, int rv)
871{
872 if (rv) {
873 dev_err(DEV, "Writing the bitmap failed not starting resync.\n");
874 _drbd_request_state(mdev, NS(conn, C_CONNECTED), CS_VERBOSE);
875 return;
876 }
877
878 switch (mdev->state.conn) {
879 case C_STARTING_SYNC_T:
880 _drbd_request_state(mdev, NS(conn, C_WF_SYNC_UUID), CS_VERBOSE);
881 break;
882 case C_STARTING_SYNC_S:
883 drbd_start_resync(mdev, C_SYNC_SOURCE);
884 break;
885 }
886}
887
888int drbd_bitmap_io_from_worker(struct drbd_conf *mdev,
889 int (*io_fn)(struct drbd_conf *),
890 char *why, enum bm_flag flags)
891{
892 int rv;
893
894 D_ASSERT(current == mdev->tconn->worker.task);
895
896 /* open coded non-blocking drbd_suspend_io(mdev); */
897 set_bit(SUSPEND_IO, &mdev->flags);
898
899 drbd_bm_lock(mdev, why, flags);
900 rv = io_fn(mdev);
901 drbd_bm_unlock(mdev);
902
903 drbd_resume_io(mdev);
904
905 return rv;
906}
907
908/**
909 * after_state_ch() - Perform after state change actions that may sleep
910 * @mdev: DRBD device.
911 * @os: old state.
912 * @ns: new state.
913 * @flags: Flags
914 */
915static void after_state_ch(struct drbd_conf *mdev, union drbd_state os,
916 union drbd_state ns, enum chg_state_flags flags)
917{
918 enum drbd_fencing_p fp;
919 enum drbd_req_event what = NOTHING;
920 union drbd_state nsm = (union drbd_state){ .i = -1 };
921
922 if (os.conn != C_CONNECTED && ns.conn == C_CONNECTED) {
923 clear_bit(CRASHED_PRIMARY, &mdev->flags);
924 if (mdev->p_uuid)
925 mdev->p_uuid[UI_FLAGS] &= ~((u64)2);
926 }
927
928 fp = FP_DONT_CARE;
929 if (get_ldev(mdev)) {
930 fp = mdev->ldev->dc.fencing;
931 put_ldev(mdev);
932 }
933
934 /* Inform userspace about the change... */
935 drbd_bcast_state(mdev, ns);
936
937 if (!(os.role == R_PRIMARY && os.disk < D_UP_TO_DATE && os.pdsk < D_UP_TO_DATE) &&
938 (ns.role == R_PRIMARY && ns.disk < D_UP_TO_DATE && ns.pdsk < D_UP_TO_DATE))
939 drbd_khelper(mdev, "pri-on-incon-degr");
940
941 /* Here we have the actions that are performed after a
942 state change. This function might sleep */
943
944 nsm.i = -1;
945 if (ns.susp_nod) {
946 if (os.conn < C_CONNECTED && ns.conn >= C_CONNECTED)
947 what = RESEND;
948
949 if (os.disk == D_ATTACHING && ns.disk > D_ATTACHING)
950 what = RESTART_FROZEN_DISK_IO;
951
952 if (what != NOTHING)
953 nsm.susp_nod = 0;
954 }
955
956 if (ns.susp_fen) {
957 /* case1: The outdate peer handler is successful: */
958 if (os.pdsk > D_OUTDATED && ns.pdsk <= D_OUTDATED) {
959 tl_clear(mdev);
960 if (test_bit(NEW_CUR_UUID, &mdev->flags)) {
961 drbd_uuid_new_current(mdev);
962 clear_bit(NEW_CUR_UUID, &mdev->flags);
963 }
964 spin_lock_irq(&mdev->tconn->req_lock);
965 _drbd_set_state(_NS(mdev, susp_fen, 0), CS_VERBOSE, NULL);
966 spin_unlock_irq(&mdev->tconn->req_lock);
967 }
968 /* case2: The connection was established again: */
969 if (os.conn < C_CONNECTED && ns.conn >= C_CONNECTED) {
970 clear_bit(NEW_CUR_UUID, &mdev->flags);
971 what = RESEND;
972 nsm.susp_fen = 0;
973 }
974 }
975
976 if (what != NOTHING) {
977 spin_lock_irq(&mdev->tconn->req_lock);
978 _tl_restart(mdev, what);
979 nsm.i &= mdev->state.i;
980 _drbd_set_state(mdev, nsm, CS_VERBOSE, NULL);
981 spin_unlock_irq(&mdev->tconn->req_lock);
982 }
983
984 /* Became sync source. With protocol >= 96, we still need to send out
985 * the sync uuid now. Need to do that before any drbd_send_state, or
986 * the other side may go "paused sync" before receiving the sync uuids,
987 * which is unexpected. */
988 if ((os.conn != C_SYNC_SOURCE && os.conn != C_PAUSED_SYNC_S) &&
989 (ns.conn == C_SYNC_SOURCE || ns.conn == C_PAUSED_SYNC_S) &&
990 mdev->tconn->agreed_pro_version >= 96 && get_ldev(mdev)) {
991 drbd_gen_and_send_sync_uuid(mdev);
992 put_ldev(mdev);
993 }
994
995 /* Do not change the order of the if above and the two below... */
996 if (os.pdsk == D_DISKLESS && ns.pdsk > D_DISKLESS) { /* attach on the peer */
997 drbd_send_uuids(mdev);
998 drbd_send_state(mdev);
999 }
1000 /* No point in queuing send_bitmap if we don't have a connection
1001 * anymore, so check also the _current_ state, not only the new state
1002 * at the time this work was queued. */
1003 if (os.conn != C_WF_BITMAP_S && ns.conn == C_WF_BITMAP_S &&
1004 mdev->state.conn == C_WF_BITMAP_S)
1005 drbd_queue_bitmap_io(mdev, &drbd_send_bitmap, NULL,
1006 "send_bitmap (WFBitMapS)",
1007 BM_LOCKED_TEST_ALLOWED);
1008
1009 /* Lost contact to peer's copy of the data */
1010 if ((os.pdsk >= D_INCONSISTENT &&
1011 os.pdsk != D_UNKNOWN &&
1012 os.pdsk != D_OUTDATED)
1013 && (ns.pdsk < D_INCONSISTENT ||
1014 ns.pdsk == D_UNKNOWN ||
1015 ns.pdsk == D_OUTDATED)) {
1016 if (get_ldev(mdev)) {
1017 if ((ns.role == R_PRIMARY || ns.peer == R_PRIMARY) &&
1018 mdev->ldev->md.uuid[UI_BITMAP] == 0 && ns.disk >= D_UP_TO_DATE) {
1019 if (is_susp(mdev->state)) {
1020 set_bit(NEW_CUR_UUID, &mdev->flags);
1021 } else {
1022 drbd_uuid_new_current(mdev);
1023 drbd_send_uuids(mdev);
1024 }
1025 }
1026 put_ldev(mdev);
1027 }
1028 }
1029
1030 if (ns.pdsk < D_INCONSISTENT && get_ldev(mdev)) {
1031 if (ns.peer == R_PRIMARY && mdev->ldev->md.uuid[UI_BITMAP] == 0) {
1032 drbd_uuid_new_current(mdev);
1033 drbd_send_uuids(mdev);
1034 }
1035
1036 /* D_DISKLESS Peer becomes secondary */
1037 if (os.peer == R_PRIMARY && ns.peer == R_SECONDARY)
1038 /* We may still be Primary ourselves.
1039 * No harm done if the bitmap still changes,
1040 * redirtied pages will follow later. */
1041 drbd_bitmap_io_from_worker(mdev, &drbd_bm_write,
1042 "demote diskless peer", BM_LOCKED_SET_ALLOWED);
1043 put_ldev(mdev);
1044 }
1045
1046 /* Write out all changed bits on demote.
1047 * Though, no need to da that just yet
1048 * if there is a resync going on still */
1049 if (os.role == R_PRIMARY && ns.role == R_SECONDARY &&
1050 mdev->state.conn <= C_CONNECTED && get_ldev(mdev)) {
1051 /* No changes to the bitmap expected this time, so assert that,
1052 * even though no harm was done if it did change. */
1053 drbd_bitmap_io_from_worker(mdev, &drbd_bm_write,
1054 "demote", BM_LOCKED_TEST_ALLOWED);
1055 put_ldev(mdev);
1056 }
1057
1058 /* Last part of the attaching process ... */
1059 if (ns.conn >= C_CONNECTED &&
1060 os.disk == D_ATTACHING && ns.disk == D_NEGOTIATING) {
1061 drbd_send_sizes(mdev, 0, 0); /* to start sync... */
1062 drbd_send_uuids(mdev);
1063 drbd_send_state(mdev);
1064 }
1065
1066 /* We want to pause/continue resync, tell peer. */
1067 if (ns.conn >= C_CONNECTED &&
1068 ((os.aftr_isp != ns.aftr_isp) ||
1069 (os.user_isp != ns.user_isp)))
1070 drbd_send_state(mdev);
1071
1072 /* In case one of the isp bits got set, suspend other devices. */
1073 if ((!os.aftr_isp && !os.peer_isp && !os.user_isp) &&
1074 (ns.aftr_isp || ns.peer_isp || ns.user_isp))
1075 suspend_other_sg(mdev);
1076
1077 /* Make sure the peer gets informed about eventual state
1078 changes (ISP bits) while we were in WFReportParams. */
1079 if (os.conn == C_WF_REPORT_PARAMS && ns.conn >= C_CONNECTED)
1080 drbd_send_state(mdev);
1081
1082 if (os.conn != C_AHEAD && ns.conn == C_AHEAD)
1083 drbd_send_state(mdev);
1084
1085 /* We are in the progress to start a full sync... */
1086 if ((os.conn != C_STARTING_SYNC_T && ns.conn == C_STARTING_SYNC_T) ||
1087 (os.conn != C_STARTING_SYNC_S && ns.conn == C_STARTING_SYNC_S))
1088 /* no other bitmap changes expected during this phase */
1089 drbd_queue_bitmap_io(mdev,
1090 &drbd_bmio_set_n_write, &abw_start_sync,
1091 "set_n_write from StartingSync", BM_LOCKED_TEST_ALLOWED);
1092
1093 /* We are invalidating our self... */
1094 if (os.conn < C_CONNECTED && ns.conn < C_CONNECTED &&
1095 os.disk > D_INCONSISTENT && ns.disk == D_INCONSISTENT)
1096 /* other bitmap operation expected during this phase */
1097 drbd_queue_bitmap_io(mdev, &drbd_bmio_set_n_write, NULL,
1098 "set_n_write from invalidate", BM_LOCKED_MASK);
1099
1100 /* first half of local IO error, failure to attach,
1101 * or administrative detach */
1102 if (os.disk != D_FAILED && ns.disk == D_FAILED) {
1103 enum drbd_io_error_p eh;
1104 int was_io_error;
1105 /* corresponding get_ldev was in __drbd_set_state, to serialize
1106 * our cleanup here with the transition to D_DISKLESS,
1107 * so it is safe to dreference ldev here. */
1108 eh = mdev->ldev->dc.on_io_error;
1109 was_io_error = test_and_clear_bit(WAS_IO_ERROR, &mdev->flags);
1110
1111 /* current state still has to be D_FAILED,
1112 * there is only one way out: to D_DISKLESS,
1113 * and that may only happen after our put_ldev below. */
1114 if (mdev->state.disk != D_FAILED)
1115 dev_err(DEV,
1116 "ASSERT FAILED: disk is %s during detach\n",
1117 drbd_disk_str(mdev->state.disk));
1118
1119 if (drbd_send_state(mdev))
1120 dev_warn(DEV, "Notified peer that I am detaching my disk\n");
1121 else
1122 dev_err(DEV, "Sending state for detaching disk failed\n");
1123
1124 drbd_rs_cancel_all(mdev);
1125
1126 /* In case we want to get something to stable storage still,
1127 * this may be the last chance.
1128 * Following put_ldev may transition to D_DISKLESS. */
1129 drbd_md_sync(mdev);
1130 put_ldev(mdev);
1131
1132 if (was_io_error && eh == EP_CALL_HELPER)
1133 drbd_khelper(mdev, "local-io-error");
1134 }
1135
1136 /* second half of local IO error, failure to attach,
1137 * or administrative detach,
1138 * after local_cnt references have reached zero again */
1139 if (os.disk != D_DISKLESS && ns.disk == D_DISKLESS) {
1140 /* We must still be diskless,
1141 * re-attach has to be serialized with this! */
1142 if (mdev->state.disk != D_DISKLESS)
1143 dev_err(DEV,
1144 "ASSERT FAILED: disk is %s while going diskless\n",
1145 drbd_disk_str(mdev->state.disk));
1146
1147 mdev->rs_total = 0;
1148 mdev->rs_failed = 0;
1149 atomic_set(&mdev->rs_pending_cnt, 0);
1150
1151 if (drbd_send_state(mdev))
1152 dev_warn(DEV, "Notified peer that I'm now diskless.\n");
1153 /* corresponding get_ldev in __drbd_set_state
1154 * this may finally trigger drbd_ldev_destroy. */
1155 put_ldev(mdev);
1156 }
1157
1158 /* Notify peer that I had a local IO error, and did not detached.. */
1159 if (os.disk == D_UP_TO_DATE && ns.disk == D_INCONSISTENT)
1160 drbd_send_state(mdev);
1161
1162 /* Disks got bigger while they were detached */
1163 if (ns.disk > D_NEGOTIATING && ns.pdsk > D_NEGOTIATING &&
1164 test_and_clear_bit(RESYNC_AFTER_NEG, &mdev->flags)) {
1165 if (ns.conn == C_CONNECTED)
1166 resync_after_online_grow(mdev);
1167 }
1168
1169 /* A resync finished or aborted, wake paused devices... */
1170 if ((os.conn > C_CONNECTED && ns.conn <= C_CONNECTED) ||
1171 (os.peer_isp && !ns.peer_isp) ||
1172 (os.user_isp && !ns.user_isp))
1173 resume_next_sg(mdev);
1174
1175 /* sync target done with resync. Explicitly notify peer, even though
1176 * it should (at least for non-empty resyncs) already know itself. */
1177 if (os.disk < D_UP_TO_DATE && os.conn >= C_SYNC_SOURCE && ns.conn == C_CONNECTED)
1178 drbd_send_state(mdev);
1179
1180 /* This triggers bitmap writeout of potentially still unwritten pages
1181 * if the resync finished cleanly, or aborted because of peer disk
1182 * failure, or because of connection loss.
1183 * For resync aborted because of local disk failure, we cannot do
1184 * any bitmap writeout anymore.
1185 * No harm done if some bits change during this phase.
1186 */
1187 if (os.conn > C_CONNECTED && ns.conn <= C_CONNECTED && get_ldev(mdev)) {
1188 drbd_queue_bitmap_io(mdev, &drbd_bm_write, NULL,
1189 "write from resync_finished", BM_LOCKED_SET_ALLOWED);
1190 put_ldev(mdev);
1191 }
1192
1193 if (ns.disk == D_DISKLESS &&
1194 ns.conn == C_STANDALONE &&
1195 ns.role == R_SECONDARY) {
1196 if (os.aftr_isp != ns.aftr_isp)
1197 resume_next_sg(mdev);
1198 }
1199
1200 after_conn_state_ch(mdev->tconn, os, ns, flags);
1201 drbd_md_sync(mdev);
1202}
1203
1204static void after_conn_state_ch(struct drbd_tconn *tconn, union drbd_state os,
1205 union drbd_state ns, enum chg_state_flags flags)
1206{
1207 /* Upon network configuration, we need to start the receiver */
1208 if (os.conn == C_STANDALONE && ns.conn == C_UNCONNECTED)
1209 drbd_thread_start(&tconn->receiver);
1210
1211 if (ns.disk == D_DISKLESS &&
1212 ns.conn == C_STANDALONE &&
1213 ns.role == R_SECONDARY) {
1214 /* if (test_bit(DEVICE_DYING, &mdev->flags)) TODO: DEVICE_DYING functionality */
1215 drbd_thread_stop_nowait(&tconn->worker);
1216 }
1217}
diff --git a/drivers/block/drbd/drbd_state.h b/drivers/block/drbd/drbd_state.h
new file mode 100644
index 000000000000..3ec26e2c4c40
--- /dev/null
+++ b/drivers/block/drbd/drbd_state.h
@@ -0,0 +1,101 @@
1#ifndef DRBD_STATE_H
2#define DRBD_STATE_H
3
4struct drbd_conf;
5
6/**
7 * DOC: DRBD State macros
8 *
9 * These macros are used to express state changes in easily readable form.
10 *
11 * The NS macros expand to a mask and a value, that can be bit ored onto the
12 * current state as soon as the spinlock (req_lock) was taken.
13 *
14 * The _NS macros are used for state functions that get called with the
15 * spinlock. These macros expand directly to the new state value.
16 *
17 * Besides the basic forms NS() and _NS() additional _?NS[23] are defined
18 * to express state changes that affect more than one aspect of the state.
19 *
20 * E.g. NS2(conn, C_CONNECTED, peer, R_SECONDARY)
21 * Means that the network connection was established and that the peer
22 * is in secondary role.
23 */
24#define role_MASK R_MASK
25#define peer_MASK R_MASK
26#define disk_MASK D_MASK
27#define pdsk_MASK D_MASK
28#define conn_MASK C_MASK
29#define susp_MASK 1
30#define user_isp_MASK 1
31#define aftr_isp_MASK 1
32#define susp_nod_MASK 1
33#define susp_fen_MASK 1
34
35#define NS(T, S) \
36 ({ union drbd_state mask; mask.i = 0; mask.T = T##_MASK; mask; }), \
37 ({ union drbd_state val; val.i = 0; val.T = (S); val; })
38#define NS2(T1, S1, T2, S2) \
39 ({ union drbd_state mask; mask.i = 0; mask.T1 = T1##_MASK; \
40 mask.T2 = T2##_MASK; mask; }), \
41 ({ union drbd_state val; val.i = 0; val.T1 = (S1); \
42 val.T2 = (S2); val; })
43#define NS3(T1, S1, T2, S2, T3, S3) \
44 ({ union drbd_state mask; mask.i = 0; mask.T1 = T1##_MASK; \
45 mask.T2 = T2##_MASK; mask.T3 = T3##_MASK; mask; }), \
46 ({ union drbd_state val; val.i = 0; val.T1 = (S1); \
47 val.T2 = (S2); val.T3 = (S3); val; })
48
49#define _NS(D, T, S) \
50 D, ({ union drbd_state __ns; __ns.i = D->state.i; __ns.T = (S); __ns; })
51#define _NS2(D, T1, S1, T2, S2) \
52 D, ({ union drbd_state __ns; __ns.i = D->state.i; __ns.T1 = (S1); \
53 __ns.T2 = (S2); __ns; })
54#define _NS3(D, T1, S1, T2, S2, T3, S3) \
55 D, ({ union drbd_state __ns; __ns.i = D->state.i; __ns.T1 = (S1); \
56 __ns.T2 = (S2); __ns.T3 = (S3); __ns; })
57
58enum chg_state_flags {
59 CS_HARD = 1,
60 CS_VERBOSE = 2,
61 CS_WAIT_COMPLETE = 4,
62 CS_SERIALIZE = 8,
63 CS_ORDERED = CS_WAIT_COMPLETE + CS_SERIALIZE,
64};
65
66extern enum drbd_state_rv drbd_change_state(struct drbd_conf *mdev,
67 enum chg_state_flags f,
68 union drbd_state mask,
69 union drbd_state val);
70extern void drbd_force_state(struct drbd_conf *, union drbd_state,
71 union drbd_state);
72extern enum drbd_state_rv _drbd_request_state(struct drbd_conf *,
73 union drbd_state,
74 union drbd_state,
75 enum chg_state_flags);
76extern enum drbd_state_rv __drbd_set_state(struct drbd_conf *, union drbd_state,
77 enum chg_state_flags,
78 struct completion *done);
79extern void print_st_err(struct drbd_conf *, union drbd_state,
80 union drbd_state, int);
81
82extern void drbd_resume_al(struct drbd_conf *mdev);
83
84/**
85 * drbd_request_state() - Reqest a state change
86 * @mdev: DRBD device.
87 * @mask: mask of state bits to change.
88 * @val: value of new state bits.
89 *
90 * This is the most graceful way of requesting a state change. It is verbose
91 * quite verbose in case the state change is not possible, and all those
92 * state changes are globally serialized.
93 */
94static inline int drbd_request_state(struct drbd_conf *mdev,
95 union drbd_state mask,
96 union drbd_state val)
97{
98 return _drbd_request_state(mdev, mask, val, CS_VERBOSE + CS_ORDERED);
99}
100
101#endif