diff options
Diffstat (limited to 'drivers/block')
-rw-r--r-- | drivers/block/Kconfig | 22 | ||||
-rw-r--r-- | drivers/block/drbd/drbd_bitmap.c | 21 | ||||
-rw-r--r-- | drivers/block/drbd/drbd_int.h | 151 | ||||
-rw-r--r-- | drivers/block/drbd/drbd_main.c | 158 | ||||
-rw-r--r-- | drivers/block/drbd/drbd_nl.c | 52 | ||||
-rw-r--r-- | drivers/block/drbd/drbd_proc.c | 19 | ||||
-rw-r--r-- | drivers/block/drbd/drbd_receiver.c | 666 | ||||
-rw-r--r-- | drivers/block/drbd/drbd_req.c | 40 | ||||
-rw-r--r-- | drivers/block/drbd/drbd_strings.c | 2 | ||||
-rw-r--r-- | drivers/block/drbd/drbd_worker.c | 206 | ||||
-rw-r--r-- | drivers/block/drbd/drbd_wrappers.h | 16 | ||||
-rw-r--r-- | drivers/block/virtio_blk.c | 46 |
12 files changed, 957 insertions, 442 deletions
diff --git a/drivers/block/Kconfig b/drivers/block/Kconfig index 77bfce52e9ca..de277689da61 100644 --- a/drivers/block/Kconfig +++ b/drivers/block/Kconfig | |||
@@ -76,6 +76,17 @@ config BLK_DEV_XD | |||
76 | 76 | ||
77 | It's pretty unlikely that you have one of these: say N. | 77 | It's pretty unlikely that you have one of these: say N. |
78 | 78 | ||
79 | config GDROM | ||
80 | tristate "SEGA Dreamcast GD-ROM drive" | ||
81 | depends on SH_DREAMCAST | ||
82 | help | ||
83 | A standard SEGA Dreamcast comes with a modified CD ROM drive called a | ||
84 | "GD-ROM" by SEGA to signify it is capable of reading special disks | ||
85 | with up to 1 GB of data. This drive will also read standard CD ROM | ||
86 | disks. Select this option to access any disks in your GD ROM drive. | ||
87 | Most users will want to say "Y" here. | ||
88 | You can also build this as a module which will be called gdrom. | ||
89 | |||
79 | config PARIDE | 90 | config PARIDE |
80 | tristate "Parallel port IDE device support" | 91 | tristate "Parallel port IDE device support" |
81 | depends on PARPORT_PC | 92 | depends on PARPORT_PC |
@@ -103,17 +114,6 @@ config PARIDE | |||
103 | "MicroSolutions backpack protocol", "DataStor Commuter protocol" | 114 | "MicroSolutions backpack protocol", "DataStor Commuter protocol" |
104 | etc.). | 115 | etc.). |
105 | 116 | ||
106 | config GDROM | ||
107 | tristate "SEGA Dreamcast GD-ROM drive" | ||
108 | depends on SH_DREAMCAST | ||
109 | help | ||
110 | A standard SEGA Dreamcast comes with a modified CD ROM drive called a | ||
111 | "GD-ROM" by SEGA to signify it is capable of reading special disks | ||
112 | with up to 1 GB of data. This drive will also read standard CD ROM | ||
113 | disks. Select this option to access any disks in your GD ROM drive. | ||
114 | Most users will want to say "Y" here. | ||
115 | You can also build this as a module which will be called gdrom. | ||
116 | |||
117 | source "drivers/block/paride/Kconfig" | 117 | source "drivers/block/paride/Kconfig" |
118 | 118 | ||
119 | config BLK_CPQ_DA | 119 | config BLK_CPQ_DA |
diff --git a/drivers/block/drbd/drbd_bitmap.c b/drivers/block/drbd/drbd_bitmap.c index 3390716898d5..e3f88d6e1412 100644 --- a/drivers/block/drbd/drbd_bitmap.c +++ b/drivers/block/drbd/drbd_bitmap.c | |||
@@ -84,6 +84,9 @@ struct drbd_bitmap { | |||
84 | #define BM_MD_IO_ERROR 1 | 84 | #define BM_MD_IO_ERROR 1 |
85 | #define BM_P_VMALLOCED 2 | 85 | #define BM_P_VMALLOCED 2 |
86 | 86 | ||
87 | static int __bm_change_bits_to(struct drbd_conf *mdev, const unsigned long s, | ||
88 | unsigned long e, int val, const enum km_type km); | ||
89 | |||
87 | static int bm_is_locked(struct drbd_bitmap *b) | 90 | static int bm_is_locked(struct drbd_bitmap *b) |
88 | { | 91 | { |
89 | return test_bit(BM_LOCKED, &b->bm_flags); | 92 | return test_bit(BM_LOCKED, &b->bm_flags); |
@@ -441,7 +444,7 @@ static void bm_memset(struct drbd_bitmap *b, size_t offset, int c, size_t len) | |||
441 | * In case this is actually a resize, we copy the old bitmap into the new one. | 444 | * In case this is actually a resize, we copy the old bitmap into the new one. |
442 | * Otherwise, the bitmap is initialized to all bits set. | 445 | * Otherwise, the bitmap is initialized to all bits set. |
443 | */ | 446 | */ |
444 | int drbd_bm_resize(struct drbd_conf *mdev, sector_t capacity) | 447 | int drbd_bm_resize(struct drbd_conf *mdev, sector_t capacity, int set_new_bits) |
445 | { | 448 | { |
446 | struct drbd_bitmap *b = mdev->bitmap; | 449 | struct drbd_bitmap *b = mdev->bitmap; |
447 | unsigned long bits, words, owords, obits, *p_addr, *bm; | 450 | unsigned long bits, words, owords, obits, *p_addr, *bm; |
@@ -516,7 +519,7 @@ int drbd_bm_resize(struct drbd_conf *mdev, sector_t capacity) | |||
516 | obits = b->bm_bits; | 519 | obits = b->bm_bits; |
517 | 520 | ||
518 | growing = bits > obits; | 521 | growing = bits > obits; |
519 | if (opages) | 522 | if (opages && growing && set_new_bits) |
520 | bm_set_surplus(b); | 523 | bm_set_surplus(b); |
521 | 524 | ||
522 | b->bm_pages = npages; | 525 | b->bm_pages = npages; |
@@ -526,8 +529,12 @@ int drbd_bm_resize(struct drbd_conf *mdev, sector_t capacity) | |||
526 | b->bm_dev_capacity = capacity; | 529 | b->bm_dev_capacity = capacity; |
527 | 530 | ||
528 | if (growing) { | 531 | if (growing) { |
529 | bm_memset(b, owords, 0xff, words-owords); | 532 | if (set_new_bits) { |
530 | b->bm_set += bits - obits; | 533 | bm_memset(b, owords, 0xff, words-owords); |
534 | b->bm_set += bits - obits; | ||
535 | } else | ||
536 | bm_memset(b, owords, 0x00, words-owords); | ||
537 | |||
531 | } | 538 | } |
532 | 539 | ||
533 | if (want < have) { | 540 | if (want < have) { |
@@ -773,7 +780,7 @@ static void bm_page_io_async(struct drbd_conf *mdev, struct drbd_bitmap *b, int | |||
773 | /* nothing to do, on disk == in memory */ | 780 | /* nothing to do, on disk == in memory */ |
774 | # define bm_cpu_to_lel(x) ((void)0) | 781 | # define bm_cpu_to_lel(x) ((void)0) |
775 | # else | 782 | # else |
776 | void bm_cpu_to_lel(struct drbd_bitmap *b) | 783 | static void bm_cpu_to_lel(struct drbd_bitmap *b) |
777 | { | 784 | { |
778 | /* need to cpu_to_lel all the pages ... | 785 | /* need to cpu_to_lel all the pages ... |
779 | * this may be optimized by using | 786 | * this may be optimized by using |
@@ -1015,7 +1022,7 @@ unsigned long _drbd_bm_find_next_zero(struct drbd_conf *mdev, unsigned long bm_f | |||
1015 | * wants bitnr, not sector. | 1022 | * wants bitnr, not sector. |
1016 | * expected to be called for only a few bits (e - s about BITS_PER_LONG). | 1023 | * expected to be called for only a few bits (e - s about BITS_PER_LONG). |
1017 | * Must hold bitmap lock already. */ | 1024 | * Must hold bitmap lock already. */ |
1018 | int __bm_change_bits_to(struct drbd_conf *mdev, const unsigned long s, | 1025 | static int __bm_change_bits_to(struct drbd_conf *mdev, const unsigned long s, |
1019 | unsigned long e, int val, const enum km_type km) | 1026 | unsigned long e, int val, const enum km_type km) |
1020 | { | 1027 | { |
1021 | struct drbd_bitmap *b = mdev->bitmap; | 1028 | struct drbd_bitmap *b = mdev->bitmap; |
@@ -1053,7 +1060,7 @@ int __bm_change_bits_to(struct drbd_conf *mdev, const unsigned long s, | |||
1053 | * for val != 0, we change 0 -> 1, return code positive | 1060 | * for val != 0, we change 0 -> 1, return code positive |
1054 | * for val == 0, we change 1 -> 0, return code negative | 1061 | * for val == 0, we change 1 -> 0, return code negative |
1055 | * wants bitnr, not sector */ | 1062 | * wants bitnr, not sector */ |
1056 | int bm_change_bits_to(struct drbd_conf *mdev, const unsigned long s, | 1063 | static int bm_change_bits_to(struct drbd_conf *mdev, const unsigned long s, |
1057 | const unsigned long e, int val) | 1064 | const unsigned long e, int val) |
1058 | { | 1065 | { |
1059 | unsigned long flags; | 1066 | unsigned long flags; |
diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h index e5e86a781820..e9654c8d5b62 100644 --- a/drivers/block/drbd/drbd_int.h +++ b/drivers/block/drbd/drbd_int.h | |||
@@ -132,6 +132,7 @@ enum { | |||
132 | DRBD_FAULT_DT_RA = 6, /* data read ahead */ | 132 | DRBD_FAULT_DT_RA = 6, /* data read ahead */ |
133 | DRBD_FAULT_BM_ALLOC = 7, /* bitmap allocation */ | 133 | DRBD_FAULT_BM_ALLOC = 7, /* bitmap allocation */ |
134 | DRBD_FAULT_AL_EE = 8, /* alloc ee */ | 134 | DRBD_FAULT_AL_EE = 8, /* alloc ee */ |
135 | DRBD_FAULT_RECEIVE = 9, /* Changes some bytes upon receiving a [rs]data block */ | ||
135 | 136 | ||
136 | DRBD_FAULT_MAX, | 137 | DRBD_FAULT_MAX, |
137 | }; | 138 | }; |
@@ -208,8 +209,11 @@ enum drbd_packets { | |||
208 | P_RS_IS_IN_SYNC = 0x22, /* meta socket */ | 209 | P_RS_IS_IN_SYNC = 0x22, /* meta socket */ |
209 | P_SYNC_PARAM89 = 0x23, /* data socket, protocol version 89 replacement for P_SYNC_PARAM */ | 210 | P_SYNC_PARAM89 = 0x23, /* data socket, protocol version 89 replacement for P_SYNC_PARAM */ |
210 | P_COMPRESSED_BITMAP = 0x24, /* compressed or otherwise encoded bitmap transfer */ | 211 | P_COMPRESSED_BITMAP = 0x24, /* compressed or otherwise encoded bitmap transfer */ |
212 | /* P_CKPT_FENCE_REQ = 0x25, * currently reserved for protocol D */ | ||
213 | /* P_CKPT_DISABLE_REQ = 0x26, * currently reserved for protocol D */ | ||
214 | P_DELAY_PROBE = 0x27, /* is used on BOTH sockets */ | ||
211 | 215 | ||
212 | P_MAX_CMD = 0x25, | 216 | P_MAX_CMD = 0x28, |
213 | P_MAY_IGNORE = 0x100, /* Flag to test if (cmd > P_MAY_IGNORE) ... */ | 217 | P_MAY_IGNORE = 0x100, /* Flag to test if (cmd > P_MAY_IGNORE) ... */ |
214 | P_MAX_OPT_CMD = 0x101, | 218 | P_MAX_OPT_CMD = 0x101, |
215 | 219 | ||
@@ -264,6 +268,7 @@ static inline const char *cmdname(enum drbd_packets cmd) | |||
264 | [P_CSUM_RS_REQUEST] = "CsumRSRequest", | 268 | [P_CSUM_RS_REQUEST] = "CsumRSRequest", |
265 | [P_RS_IS_IN_SYNC] = "CsumRSIsInSync", | 269 | [P_RS_IS_IN_SYNC] = "CsumRSIsInSync", |
266 | [P_COMPRESSED_BITMAP] = "CBitmap", | 270 | [P_COMPRESSED_BITMAP] = "CBitmap", |
271 | [P_DELAY_PROBE] = "DelayProbe", | ||
267 | [P_MAX_CMD] = NULL, | 272 | [P_MAX_CMD] = NULL, |
268 | }; | 273 | }; |
269 | 274 | ||
@@ -481,7 +486,8 @@ struct p_sizes { | |||
481 | u64 u_size; /* user requested size */ | 486 | u64 u_size; /* user requested size */ |
482 | u64 c_size; /* current exported size */ | 487 | u64 c_size; /* current exported size */ |
483 | u32 max_segment_size; /* Maximal size of a BIO */ | 488 | u32 max_segment_size; /* Maximal size of a BIO */ |
484 | u32 queue_order_type; | 489 | u16 queue_order_type; /* not yet implemented in DRBD*/ |
490 | u16 dds_flags; /* use enum dds_flags here. */ | ||
485 | } __packed; | 491 | } __packed; |
486 | 492 | ||
487 | struct p_state { | 493 | struct p_state { |
@@ -538,6 +544,18 @@ struct p_compressed_bm { | |||
538 | u8 code[0]; | 544 | u8 code[0]; |
539 | } __packed; | 545 | } __packed; |
540 | 546 | ||
547 | struct p_delay_probe { | ||
548 | struct p_header head; | ||
549 | u32 seq_num; /* sequence number to match the two probe packets */ | ||
550 | u32 offset; /* usecs the probe got sent after the reference time point */ | ||
551 | } __packed; | ||
552 | |||
553 | struct delay_probe { | ||
554 | struct list_head list; | ||
555 | unsigned int seq_num; | ||
556 | struct timeval time; | ||
557 | }; | ||
558 | |||
541 | /* DCBP: Drbd Compressed Bitmap Packet ... */ | 559 | /* DCBP: Drbd Compressed Bitmap Packet ... */ |
542 | static inline enum drbd_bitmap_code | 560 | static inline enum drbd_bitmap_code |
543 | DCBP_get_code(struct p_compressed_bm *p) | 561 | DCBP_get_code(struct p_compressed_bm *p) |
@@ -722,22 +740,6 @@ enum epoch_event { | |||
722 | EV_CLEANUP = 32, /* used as flag */ | 740 | EV_CLEANUP = 32, /* used as flag */ |
723 | }; | 741 | }; |
724 | 742 | ||
725 | struct drbd_epoch_entry { | ||
726 | struct drbd_work w; | ||
727 | struct drbd_conf *mdev; | ||
728 | struct bio *private_bio; | ||
729 | struct hlist_node colision; | ||
730 | sector_t sector; | ||
731 | unsigned int size; | ||
732 | struct drbd_epoch *epoch; | ||
733 | |||
734 | /* up to here, the struct layout is identical to drbd_request; | ||
735 | * we might be able to use that to our advantage... */ | ||
736 | |||
737 | unsigned int flags; | ||
738 | u64 block_id; | ||
739 | }; | ||
740 | |||
741 | struct drbd_wq_barrier { | 743 | struct drbd_wq_barrier { |
742 | struct drbd_work w; | 744 | struct drbd_work w; |
743 | struct completion done; | 745 | struct completion done; |
@@ -748,17 +750,49 @@ struct digest_info { | |||
748 | void *digest; | 750 | void *digest; |
749 | }; | 751 | }; |
750 | 752 | ||
751 | /* ee flag bits */ | 753 | struct drbd_epoch_entry { |
754 | struct drbd_work w; | ||
755 | struct hlist_node colision; | ||
756 | struct drbd_epoch *epoch; | ||
757 | struct drbd_conf *mdev; | ||
758 | struct page *pages; | ||
759 | atomic_t pending_bios; | ||
760 | unsigned int size; | ||
761 | /* see comments on ee flag bits below */ | ||
762 | unsigned long flags; | ||
763 | sector_t sector; | ||
764 | u64 block_id; | ||
765 | }; | ||
766 | |||
767 | /* ee flag bits. | ||
768 | * While corresponding bios are in flight, the only modification will be | ||
769 | * set_bit WAS_ERROR, which has to be atomic. | ||
770 | * If no bios are in flight yet, or all have been completed, | ||
771 | * non-atomic modification to ee->flags is ok. | ||
772 | */ | ||
752 | enum { | 773 | enum { |
753 | __EE_CALL_AL_COMPLETE_IO, | 774 | __EE_CALL_AL_COMPLETE_IO, |
754 | __EE_CONFLICT_PENDING, | ||
755 | __EE_MAY_SET_IN_SYNC, | 775 | __EE_MAY_SET_IN_SYNC, |
776 | |||
777 | /* This epoch entry closes an epoch using a barrier. | ||
778 | * On sucessful completion, the epoch is released, | ||
779 | * and the P_BARRIER_ACK send. */ | ||
756 | __EE_IS_BARRIER, | 780 | __EE_IS_BARRIER, |
781 | |||
782 | /* In case a barrier failed, | ||
783 | * we need to resubmit without the barrier flag. */ | ||
784 | __EE_RESUBMITTED, | ||
785 | |||
786 | /* we may have several bios per epoch entry. | ||
787 | * if any of those fail, we set this flag atomically | ||
788 | * from the endio callback */ | ||
789 | __EE_WAS_ERROR, | ||
757 | }; | 790 | }; |
758 | #define EE_CALL_AL_COMPLETE_IO (1<<__EE_CALL_AL_COMPLETE_IO) | 791 | #define EE_CALL_AL_COMPLETE_IO (1<<__EE_CALL_AL_COMPLETE_IO) |
759 | #define EE_CONFLICT_PENDING (1<<__EE_CONFLICT_PENDING) | ||
760 | #define EE_MAY_SET_IN_SYNC (1<<__EE_MAY_SET_IN_SYNC) | 792 | #define EE_MAY_SET_IN_SYNC (1<<__EE_MAY_SET_IN_SYNC) |
761 | #define EE_IS_BARRIER (1<<__EE_IS_BARRIER) | 793 | #define EE_IS_BARRIER (1<<__EE_IS_BARRIER) |
794 | #define EE_RESUBMITTED (1<<__EE_RESUBMITTED) | ||
795 | #define EE_WAS_ERROR (1<<__EE_WAS_ERROR) | ||
762 | 796 | ||
763 | /* global flag bits */ | 797 | /* global flag bits */ |
764 | enum { | 798 | enum { |
@@ -908,9 +942,12 @@ struct drbd_conf { | |||
908 | unsigned int ko_count; | 942 | unsigned int ko_count; |
909 | struct drbd_work resync_work, | 943 | struct drbd_work resync_work, |
910 | unplug_work, | 944 | unplug_work, |
911 | md_sync_work; | 945 | md_sync_work, |
946 | delay_probe_work, | ||
947 | uuid_work; | ||
912 | struct timer_list resync_timer; | 948 | struct timer_list resync_timer; |
913 | struct timer_list md_sync_timer; | 949 | struct timer_list md_sync_timer; |
950 | struct timer_list delay_probe_timer; | ||
914 | 951 | ||
915 | /* Used after attach while negotiating new disk state. */ | 952 | /* Used after attach while negotiating new disk state. */ |
916 | union drbd_state new_state_tmp; | 953 | union drbd_state new_state_tmp; |
@@ -1026,6 +1063,13 @@ struct drbd_conf { | |||
1026 | u64 ed_uuid; /* UUID of the exposed data */ | 1063 | u64 ed_uuid; /* UUID of the exposed data */ |
1027 | struct mutex state_mutex; | 1064 | struct mutex state_mutex; |
1028 | char congestion_reason; /* Why we where congested... */ | 1065 | char congestion_reason; /* Why we where congested... */ |
1066 | struct list_head delay_probes; /* protected by peer_seq_lock */ | ||
1067 | int data_delay; /* Delay of packets on the data-sock behind meta-sock */ | ||
1068 | unsigned int delay_seq; /* To generate sequence numbers of delay probes */ | ||
1069 | struct timeval dps_time; /* delay-probes-start-time */ | ||
1070 | unsigned int dp_volume_last; /* send_cnt of last delay probe */ | ||
1071 | int c_sync_rate; /* current resync rate after delay_probe magic */ | ||
1072 | atomic_t new_c_uuid; | ||
1029 | }; | 1073 | }; |
1030 | 1074 | ||
1031 | static inline struct drbd_conf *minor_to_mdev(unsigned int minor) | 1075 | static inline struct drbd_conf *minor_to_mdev(unsigned int minor) |
@@ -1081,6 +1125,11 @@ enum chg_state_flags { | |||
1081 | CS_ORDERED = CS_WAIT_COMPLETE + CS_SERIALIZE, | 1125 | CS_ORDERED = CS_WAIT_COMPLETE + CS_SERIALIZE, |
1082 | }; | 1126 | }; |
1083 | 1127 | ||
1128 | enum dds_flags { | ||
1129 | DDSF_FORCED = 1, | ||
1130 | DDSF_NO_RESYNC = 2, /* Do not run a resync for the new space */ | ||
1131 | }; | ||
1132 | |||
1084 | extern void drbd_init_set_defaults(struct drbd_conf *mdev); | 1133 | extern void drbd_init_set_defaults(struct drbd_conf *mdev); |
1085 | extern int drbd_change_state(struct drbd_conf *mdev, enum chg_state_flags f, | 1134 | extern int drbd_change_state(struct drbd_conf *mdev, enum chg_state_flags f, |
1086 | union drbd_state mask, union drbd_state val); | 1135 | union drbd_state mask, union drbd_state val); |
@@ -1113,7 +1162,7 @@ extern int drbd_send_protocol(struct drbd_conf *mdev); | |||
1113 | extern int drbd_send_uuids(struct drbd_conf *mdev); | 1162 | extern int drbd_send_uuids(struct drbd_conf *mdev); |
1114 | extern int drbd_send_uuids_skip_initial_sync(struct drbd_conf *mdev); | 1163 | extern int drbd_send_uuids_skip_initial_sync(struct drbd_conf *mdev); |
1115 | extern int drbd_send_sync_uuid(struct drbd_conf *mdev, u64 val); | 1164 | extern int drbd_send_sync_uuid(struct drbd_conf *mdev, u64 val); |
1116 | extern int drbd_send_sizes(struct drbd_conf *mdev, int trigger_reply); | 1165 | extern int drbd_send_sizes(struct drbd_conf *mdev, int trigger_reply, enum dds_flags flags); |
1117 | extern int _drbd_send_state(struct drbd_conf *mdev); | 1166 | extern int _drbd_send_state(struct drbd_conf *mdev); |
1118 | extern int drbd_send_state(struct drbd_conf *mdev); | 1167 | extern int drbd_send_state(struct drbd_conf *mdev); |
1119 | extern int _drbd_send_cmd(struct drbd_conf *mdev, struct socket *sock, | 1168 | extern int _drbd_send_cmd(struct drbd_conf *mdev, struct socket *sock, |
@@ -1311,7 +1360,7 @@ struct bm_extent { | |||
1311 | #define APP_R_HSIZE 15 | 1360 | #define APP_R_HSIZE 15 |
1312 | 1361 | ||
1313 | extern int drbd_bm_init(struct drbd_conf *mdev); | 1362 | extern int drbd_bm_init(struct drbd_conf *mdev); |
1314 | extern int drbd_bm_resize(struct drbd_conf *mdev, sector_t sectors); | 1363 | extern int drbd_bm_resize(struct drbd_conf *mdev, sector_t sectors, int set_new_bits); |
1315 | extern void drbd_bm_cleanup(struct drbd_conf *mdev); | 1364 | extern void drbd_bm_cleanup(struct drbd_conf *mdev); |
1316 | extern void drbd_bm_set_all(struct drbd_conf *mdev); | 1365 | extern void drbd_bm_set_all(struct drbd_conf *mdev); |
1317 | extern void drbd_bm_clear_all(struct drbd_conf *mdev); | 1366 | extern void drbd_bm_clear_all(struct drbd_conf *mdev); |
@@ -1383,7 +1432,7 @@ extern void drbd_resume_io(struct drbd_conf *mdev); | |||
1383 | extern char *ppsize(char *buf, unsigned long long size); | 1432 | extern char *ppsize(char *buf, unsigned long long size); |
1384 | extern sector_t drbd_new_dev_size(struct drbd_conf *, struct drbd_backing_dev *, int); | 1433 | extern sector_t drbd_new_dev_size(struct drbd_conf *, struct drbd_backing_dev *, int); |
1385 | enum determine_dev_size { dev_size_error = -1, unchanged = 0, shrunk = 1, grew = 2 }; | 1434 | enum determine_dev_size { dev_size_error = -1, unchanged = 0, shrunk = 1, grew = 2 }; |
1386 | extern enum determine_dev_size drbd_determin_dev_size(struct drbd_conf *, int force) __must_hold(local); | 1435 | extern enum determine_dev_size drbd_determin_dev_size(struct drbd_conf *, enum dds_flags) __must_hold(local); |
1387 | extern void resync_after_online_grow(struct drbd_conf *); | 1436 | extern void resync_after_online_grow(struct drbd_conf *); |
1388 | extern void drbd_setup_queue_param(struct drbd_conf *mdev, unsigned int) __must_hold(local); | 1437 | extern void drbd_setup_queue_param(struct drbd_conf *mdev, unsigned int) __must_hold(local); |
1389 | extern int drbd_set_role(struct drbd_conf *mdev, enum drbd_role new_role, | 1438 | extern int drbd_set_role(struct drbd_conf *mdev, enum drbd_role new_role, |
@@ -1414,7 +1463,8 @@ static inline void ov_oos_print(struct drbd_conf *mdev) | |||
1414 | } | 1463 | } |
1415 | 1464 | ||
1416 | 1465 | ||
1417 | extern void drbd_csum(struct drbd_conf *, struct crypto_hash *, struct bio *, void *); | 1466 | extern void drbd_csum_bio(struct drbd_conf *, struct crypto_hash *, struct bio *, void *); |
1467 | extern void drbd_csum_ee(struct drbd_conf *, struct crypto_hash *, struct drbd_epoch_entry *, void *); | ||
1418 | /* worker callbacks */ | 1468 | /* worker callbacks */ |
1419 | extern int w_req_cancel_conflict(struct drbd_conf *, struct drbd_work *, int); | 1469 | extern int w_req_cancel_conflict(struct drbd_conf *, struct drbd_work *, int); |
1420 | extern int w_read_retry_remote(struct drbd_conf *, struct drbd_work *, int); | 1470 | extern int w_read_retry_remote(struct drbd_conf *, struct drbd_work *, int); |
@@ -1438,6 +1488,8 @@ extern int w_e_reissue(struct drbd_conf *, struct drbd_work *, int); | |||
1438 | extern void resync_timer_fn(unsigned long data); | 1488 | extern void resync_timer_fn(unsigned long data); |
1439 | 1489 | ||
1440 | /* drbd_receiver.c */ | 1490 | /* drbd_receiver.c */ |
1491 | extern int drbd_submit_ee(struct drbd_conf *mdev, struct drbd_epoch_entry *e, | ||
1492 | const unsigned rw, const int fault_type); | ||
1441 | extern int drbd_release_ee(struct drbd_conf *mdev, struct list_head *list); | 1493 | extern int drbd_release_ee(struct drbd_conf *mdev, struct list_head *list); |
1442 | extern struct drbd_epoch_entry *drbd_alloc_ee(struct drbd_conf *mdev, | 1494 | extern struct drbd_epoch_entry *drbd_alloc_ee(struct drbd_conf *mdev, |
1443 | u64 id, | 1495 | u64 id, |
@@ -1593,6 +1645,41 @@ void drbd_bcast_ee(struct drbd_conf *mdev, | |||
1593 | * inline helper functions | 1645 | * inline helper functions |
1594 | *************************/ | 1646 | *************************/ |
1595 | 1647 | ||
1648 | /* see also page_chain_add and friends in drbd_receiver.c */ | ||
1649 | static inline struct page *page_chain_next(struct page *page) | ||
1650 | { | ||
1651 | return (struct page *)page_private(page); | ||
1652 | } | ||
1653 | #define page_chain_for_each(page) \ | ||
1654 | for (; page && ({ prefetch(page_chain_next(page)); 1; }); \ | ||
1655 | page = page_chain_next(page)) | ||
1656 | #define page_chain_for_each_safe(page, n) \ | ||
1657 | for (; page && ({ n = page_chain_next(page); 1; }); page = n) | ||
1658 | |||
1659 | static inline int drbd_bio_has_active_page(struct bio *bio) | ||
1660 | { | ||
1661 | struct bio_vec *bvec; | ||
1662 | int i; | ||
1663 | |||
1664 | __bio_for_each_segment(bvec, bio, i, 0) { | ||
1665 | if (page_count(bvec->bv_page) > 1) | ||
1666 | return 1; | ||
1667 | } | ||
1668 | |||
1669 | return 0; | ||
1670 | } | ||
1671 | |||
1672 | static inline int drbd_ee_has_active_page(struct drbd_epoch_entry *e) | ||
1673 | { | ||
1674 | struct page *page = e->pages; | ||
1675 | page_chain_for_each(page) { | ||
1676 | if (page_count(page) > 1) | ||
1677 | return 1; | ||
1678 | } | ||
1679 | return 0; | ||
1680 | } | ||
1681 | |||
1682 | |||
1596 | static inline void drbd_state_lock(struct drbd_conf *mdev) | 1683 | static inline void drbd_state_lock(struct drbd_conf *mdev) |
1597 | { | 1684 | { |
1598 | wait_event(mdev->misc_wait, | 1685 | wait_event(mdev->misc_wait, |
@@ -2132,13 +2219,15 @@ static inline int __inc_ap_bio_cond(struct drbd_conf *mdev) | |||
2132 | return 0; | 2219 | return 0; |
2133 | if (test_bit(BITMAP_IO, &mdev->flags)) | 2220 | if (test_bit(BITMAP_IO, &mdev->flags)) |
2134 | return 0; | 2221 | return 0; |
2222 | if (atomic_read(&mdev->new_c_uuid)) | ||
2223 | return 0; | ||
2135 | return 1; | 2224 | return 1; |
2136 | } | 2225 | } |
2137 | 2226 | ||
2138 | /* I'd like to use wait_event_lock_irq, | 2227 | /* I'd like to use wait_event_lock_irq, |
2139 | * but I'm not sure when it got introduced, | 2228 | * but I'm not sure when it got introduced, |
2140 | * and not sure when it has 3 or 4 arguments */ | 2229 | * and not sure when it has 3 or 4 arguments */ |
2141 | static inline void inc_ap_bio(struct drbd_conf *mdev, int one_or_two) | 2230 | static inline void inc_ap_bio(struct drbd_conf *mdev, int count) |
2142 | { | 2231 | { |
2143 | /* compare with after_state_ch, | 2232 | /* compare with after_state_ch, |
2144 | * os.conn != C_WF_BITMAP_S && ns.conn == C_WF_BITMAP_S */ | 2233 | * os.conn != C_WF_BITMAP_S && ns.conn == C_WF_BITMAP_S */ |
@@ -2152,6 +2241,9 @@ static inline void inc_ap_bio(struct drbd_conf *mdev, int one_or_two) | |||
2152 | * to avoid races with the reconnect code, | 2241 | * to avoid races with the reconnect code, |
2153 | * we need to atomic_inc within the spinlock. */ | 2242 | * we need to atomic_inc within the spinlock. */ |
2154 | 2243 | ||
2244 | if (atomic_read(&mdev->new_c_uuid) && atomic_add_unless(&mdev->new_c_uuid, -1, 1)) | ||
2245 | drbd_queue_work_front(&mdev->data.work, &mdev->uuid_work); | ||
2246 | |||
2155 | spin_lock_irq(&mdev->req_lock); | 2247 | spin_lock_irq(&mdev->req_lock); |
2156 | while (!__inc_ap_bio_cond(mdev)) { | 2248 | while (!__inc_ap_bio_cond(mdev)) { |
2157 | prepare_to_wait(&mdev->misc_wait, &wait, TASK_UNINTERRUPTIBLE); | 2249 | prepare_to_wait(&mdev->misc_wait, &wait, TASK_UNINTERRUPTIBLE); |
@@ -2160,7 +2252,7 @@ static inline void inc_ap_bio(struct drbd_conf *mdev, int one_or_two) | |||
2160 | finish_wait(&mdev->misc_wait, &wait); | 2252 | finish_wait(&mdev->misc_wait, &wait); |
2161 | spin_lock_irq(&mdev->req_lock); | 2253 | spin_lock_irq(&mdev->req_lock); |
2162 | } | 2254 | } |
2163 | atomic_add(one_or_two, &mdev->ap_bio_cnt); | 2255 | atomic_add(count, &mdev->ap_bio_cnt); |
2164 | spin_unlock_irq(&mdev->req_lock); | 2256 | spin_unlock_irq(&mdev->req_lock); |
2165 | } | 2257 | } |
2166 | 2258 | ||
@@ -2251,7 +2343,8 @@ static inline void drbd_md_flush(struct drbd_conf *mdev) | |||
2251 | if (test_bit(MD_NO_BARRIER, &mdev->flags)) | 2343 | if (test_bit(MD_NO_BARRIER, &mdev->flags)) |
2252 | return; | 2344 | return; |
2253 | 2345 | ||
2254 | r = blkdev_issue_flush(mdev->ldev->md_bdev, NULL); | 2346 | r = blkdev_issue_flush(mdev->ldev->md_bdev, GFP_KERNEL, NULL, |
2347 | BLKDEV_IFL_WAIT); | ||
2255 | if (r) { | 2348 | if (r) { |
2256 | set_bit(MD_NO_BARRIER, &mdev->flags); | 2349 | set_bit(MD_NO_BARRIER, &mdev->flags); |
2257 | dev_err(DEV, "meta data flush failed with status %d, disabling md-flushes\n", r); | 2350 | dev_err(DEV, "meta data flush failed with status %d, disabling md-flushes\n", r); |
diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c index 93d1f9b469d4..be2d2da9cdba 100644 --- a/drivers/block/drbd/drbd_main.c +++ b/drivers/block/drbd/drbd_main.c | |||
@@ -684,6 +684,9 @@ static int is_valid_state(struct drbd_conf *mdev, union drbd_state ns) | |||
684 | else if (ns.conn > C_CONNECTED && ns.pdsk < D_INCONSISTENT) | 684 | else if (ns.conn > C_CONNECTED && ns.pdsk < D_INCONSISTENT) |
685 | rv = SS_NO_REMOTE_DISK; | 685 | rv = SS_NO_REMOTE_DISK; |
686 | 686 | ||
687 | else if (ns.conn > C_CONNECTED && ns.disk < D_UP_TO_DATE && ns.pdsk < D_UP_TO_DATE) | ||
688 | rv = SS_NO_UP_TO_DATE_DISK; | ||
689 | |||
687 | else if ((ns.conn == C_CONNECTED || | 690 | else if ((ns.conn == C_CONNECTED || |
688 | ns.conn == C_WF_BITMAP_S || | 691 | ns.conn == C_WF_BITMAP_S || |
689 | ns.conn == C_SYNC_SOURCE || | 692 | ns.conn == C_SYNC_SOURCE || |
@@ -840,7 +843,12 @@ static union drbd_state sanitize_state(struct drbd_conf *mdev, union drbd_state | |||
840 | break; | 843 | break; |
841 | case C_WF_BITMAP_S: | 844 | case C_WF_BITMAP_S: |
842 | case C_PAUSED_SYNC_S: | 845 | case C_PAUSED_SYNC_S: |
843 | ns.pdsk = D_OUTDATED; | 846 | /* remap any consistent state to D_OUTDATED, |
847 | * but disallow "upgrade" of not even consistent states. | ||
848 | */ | ||
849 | ns.pdsk = | ||
850 | (D_DISKLESS < os.pdsk && os.pdsk < D_OUTDATED) | ||
851 | ? os.pdsk : D_OUTDATED; | ||
844 | break; | 852 | break; |
845 | case C_SYNC_SOURCE: | 853 | case C_SYNC_SOURCE: |
846 | ns.pdsk = D_INCONSISTENT; | 854 | ns.pdsk = D_INCONSISTENT; |
@@ -1205,21 +1213,20 @@ static void after_state_ch(struct drbd_conf *mdev, union drbd_state os, | |||
1205 | && (ns.pdsk < D_INCONSISTENT || | 1213 | && (ns.pdsk < D_INCONSISTENT || |
1206 | ns.pdsk == D_UNKNOWN || | 1214 | ns.pdsk == D_UNKNOWN || |
1207 | ns.pdsk == D_OUTDATED)) { | 1215 | ns.pdsk == D_OUTDATED)) { |
1208 | kfree(mdev->p_uuid); | ||
1209 | mdev->p_uuid = NULL; | ||
1210 | if (get_ldev(mdev)) { | 1216 | if (get_ldev(mdev)) { |
1211 | if ((ns.role == R_PRIMARY || ns.peer == R_PRIMARY) && | 1217 | if ((ns.role == R_PRIMARY || ns.peer == R_PRIMARY) && |
1212 | mdev->ldev->md.uuid[UI_BITMAP] == 0 && ns.disk >= D_UP_TO_DATE) { | 1218 | mdev->ldev->md.uuid[UI_BITMAP] == 0 && ns.disk >= D_UP_TO_DATE && |
1213 | drbd_uuid_new_current(mdev); | 1219 | !atomic_read(&mdev->new_c_uuid)) |
1214 | drbd_send_uuids(mdev); | 1220 | atomic_set(&mdev->new_c_uuid, 2); |
1215 | } | ||
1216 | put_ldev(mdev); | 1221 | put_ldev(mdev); |
1217 | } | 1222 | } |
1218 | } | 1223 | } |
1219 | 1224 | ||
1220 | if (ns.pdsk < D_INCONSISTENT && get_ldev(mdev)) { | 1225 | if (ns.pdsk < D_INCONSISTENT && get_ldev(mdev)) { |
1221 | if (ns.peer == R_PRIMARY && mdev->ldev->md.uuid[UI_BITMAP] == 0) | 1226 | /* Diskless peer becomes primary or got connected do diskless, primary peer. */ |
1222 | drbd_uuid_new_current(mdev); | 1227 | if (ns.peer == R_PRIMARY && mdev->ldev->md.uuid[UI_BITMAP] == 0 && |
1228 | !atomic_read(&mdev->new_c_uuid)) | ||
1229 | atomic_set(&mdev->new_c_uuid, 2); | ||
1223 | 1230 | ||
1224 | /* D_DISKLESS Peer becomes secondary */ | 1231 | /* D_DISKLESS Peer becomes secondary */ |
1225 | if (os.peer == R_PRIMARY && ns.peer == R_SECONDARY) | 1232 | if (os.peer == R_PRIMARY && ns.peer == R_SECONDARY) |
@@ -1232,7 +1239,7 @@ static void after_state_ch(struct drbd_conf *mdev, union drbd_state os, | |||
1232 | os.disk == D_ATTACHING && ns.disk == D_NEGOTIATING) { | 1239 | os.disk == D_ATTACHING && ns.disk == D_NEGOTIATING) { |
1233 | kfree(mdev->p_uuid); /* We expect to receive up-to-date UUIDs soon. */ | 1240 | kfree(mdev->p_uuid); /* We expect to receive up-to-date UUIDs soon. */ |
1234 | mdev->p_uuid = NULL; /* ...to not use the old ones in the mean time */ | 1241 | mdev->p_uuid = NULL; /* ...to not use the old ones in the mean time */ |
1235 | drbd_send_sizes(mdev, 0); /* to start sync... */ | 1242 | drbd_send_sizes(mdev, 0, 0); /* to start sync... */ |
1236 | drbd_send_uuids(mdev); | 1243 | drbd_send_uuids(mdev); |
1237 | drbd_send_state(mdev); | 1244 | drbd_send_state(mdev); |
1238 | } | 1245 | } |
@@ -1343,6 +1350,24 @@ static void after_state_ch(struct drbd_conf *mdev, union drbd_state os, | |||
1343 | drbd_md_sync(mdev); | 1350 | drbd_md_sync(mdev); |
1344 | } | 1351 | } |
1345 | 1352 | ||
1353 | static int w_new_current_uuid(struct drbd_conf *mdev, struct drbd_work *w, int cancel) | ||
1354 | { | ||
1355 | if (get_ldev(mdev)) { | ||
1356 | if (mdev->ldev->md.uuid[UI_BITMAP] == 0) { | ||
1357 | drbd_uuid_new_current(mdev); | ||
1358 | if (get_net_conf(mdev)) { | ||
1359 | drbd_send_uuids(mdev); | ||
1360 | put_net_conf(mdev); | ||
1361 | } | ||
1362 | drbd_md_sync(mdev); | ||
1363 | } | ||
1364 | put_ldev(mdev); | ||
1365 | } | ||
1366 | atomic_dec(&mdev->new_c_uuid); | ||
1367 | wake_up(&mdev->misc_wait); | ||
1368 | |||
1369 | return 1; | ||
1370 | } | ||
1346 | 1371 | ||
1347 | static int drbd_thread_setup(void *arg) | 1372 | static int drbd_thread_setup(void *arg) |
1348 | { | 1373 | { |
@@ -1755,7 +1780,7 @@ int drbd_send_sync_uuid(struct drbd_conf *mdev, u64 val) | |||
1755 | (struct p_header *)&p, sizeof(p)); | 1780 | (struct p_header *)&p, sizeof(p)); |
1756 | } | 1781 | } |
1757 | 1782 | ||
1758 | int drbd_send_sizes(struct drbd_conf *mdev, int trigger_reply) | 1783 | int drbd_send_sizes(struct drbd_conf *mdev, int trigger_reply, enum dds_flags flags) |
1759 | { | 1784 | { |
1760 | struct p_sizes p; | 1785 | struct p_sizes p; |
1761 | sector_t d_size, u_size; | 1786 | sector_t d_size, u_size; |
@@ -1767,7 +1792,6 @@ int drbd_send_sizes(struct drbd_conf *mdev, int trigger_reply) | |||
1767 | d_size = drbd_get_max_capacity(mdev->ldev); | 1792 | d_size = drbd_get_max_capacity(mdev->ldev); |
1768 | u_size = mdev->ldev->dc.disk_size; | 1793 | u_size = mdev->ldev->dc.disk_size; |
1769 | q_order_type = drbd_queue_order_type(mdev); | 1794 | q_order_type = drbd_queue_order_type(mdev); |
1770 | p.queue_order_type = cpu_to_be32(drbd_queue_order_type(mdev)); | ||
1771 | put_ldev(mdev); | 1795 | put_ldev(mdev); |
1772 | } else { | 1796 | } else { |
1773 | d_size = 0; | 1797 | d_size = 0; |
@@ -1779,7 +1803,8 @@ int drbd_send_sizes(struct drbd_conf *mdev, int trigger_reply) | |||
1779 | p.u_size = cpu_to_be64(u_size); | 1803 | p.u_size = cpu_to_be64(u_size); |
1780 | p.c_size = cpu_to_be64(trigger_reply ? 0 : drbd_get_capacity(mdev->this_bdev)); | 1804 | p.c_size = cpu_to_be64(trigger_reply ? 0 : drbd_get_capacity(mdev->this_bdev)); |
1781 | p.max_segment_size = cpu_to_be32(queue_max_segment_size(mdev->rq_queue)); | 1805 | p.max_segment_size = cpu_to_be32(queue_max_segment_size(mdev->rq_queue)); |
1782 | p.queue_order_type = cpu_to_be32(q_order_type); | 1806 | p.queue_order_type = cpu_to_be16(q_order_type); |
1807 | p.dds_flags = cpu_to_be16(flags); | ||
1783 | 1808 | ||
1784 | ok = drbd_send_cmd(mdev, USE_DATA_SOCKET, P_SIZES, | 1809 | ok = drbd_send_cmd(mdev, USE_DATA_SOCKET, P_SIZES, |
1785 | (struct p_header *)&p, sizeof(p)); | 1810 | (struct p_header *)&p, sizeof(p)); |
@@ -2180,6 +2205,43 @@ int drbd_send_ov_request(struct drbd_conf *mdev, sector_t sector, int size) | |||
2180 | return ok; | 2205 | return ok; |
2181 | } | 2206 | } |
2182 | 2207 | ||
2208 | static int drbd_send_delay_probe(struct drbd_conf *mdev, struct drbd_socket *ds) | ||
2209 | { | ||
2210 | struct p_delay_probe dp; | ||
2211 | int offset, ok = 0; | ||
2212 | struct timeval now; | ||
2213 | |||
2214 | mutex_lock(&ds->mutex); | ||
2215 | if (likely(ds->socket)) { | ||
2216 | do_gettimeofday(&now); | ||
2217 | offset = now.tv_usec - mdev->dps_time.tv_usec + | ||
2218 | (now.tv_sec - mdev->dps_time.tv_sec) * 1000000; | ||
2219 | dp.seq_num = cpu_to_be32(mdev->delay_seq); | ||
2220 | dp.offset = cpu_to_be32(offset); | ||
2221 | |||
2222 | ok = _drbd_send_cmd(mdev, ds->socket, P_DELAY_PROBE, | ||
2223 | (struct p_header *)&dp, sizeof(dp), 0); | ||
2224 | } | ||
2225 | mutex_unlock(&ds->mutex); | ||
2226 | |||
2227 | return ok; | ||
2228 | } | ||
2229 | |||
2230 | static int drbd_send_delay_probes(struct drbd_conf *mdev) | ||
2231 | { | ||
2232 | int ok; | ||
2233 | |||
2234 | mdev->delay_seq++; | ||
2235 | do_gettimeofday(&mdev->dps_time); | ||
2236 | ok = drbd_send_delay_probe(mdev, &mdev->meta); | ||
2237 | ok = ok && drbd_send_delay_probe(mdev, &mdev->data); | ||
2238 | |||
2239 | mdev->dp_volume_last = mdev->send_cnt; | ||
2240 | mod_timer(&mdev->delay_probe_timer, jiffies + mdev->sync_conf.dp_interval * HZ / 10); | ||
2241 | |||
2242 | return ok; | ||
2243 | } | ||
2244 | |||
2183 | /* called on sndtimeo | 2245 | /* called on sndtimeo |
2184 | * returns FALSE if we should retry, | 2246 | * returns FALSE if we should retry, |
2185 | * TRUE if we think connection is dead | 2247 | * TRUE if we think connection is dead |
@@ -2309,6 +2371,44 @@ static int _drbd_send_zc_bio(struct drbd_conf *mdev, struct bio *bio) | |||
2309 | return 1; | 2371 | return 1; |
2310 | } | 2372 | } |
2311 | 2373 | ||
2374 | static int _drbd_send_zc_ee(struct drbd_conf *mdev, struct drbd_epoch_entry *e) | ||
2375 | { | ||
2376 | struct page *page = e->pages; | ||
2377 | unsigned len = e->size; | ||
2378 | page_chain_for_each(page) { | ||
2379 | unsigned l = min_t(unsigned, len, PAGE_SIZE); | ||
2380 | if (!_drbd_send_page(mdev, page, 0, l)) | ||
2381 | return 0; | ||
2382 | len -= l; | ||
2383 | } | ||
2384 | return 1; | ||
2385 | } | ||
2386 | |||
2387 | static void consider_delay_probes(struct drbd_conf *mdev) | ||
2388 | { | ||
2389 | if (mdev->state.conn != C_SYNC_SOURCE || mdev->agreed_pro_version < 93) | ||
2390 | return; | ||
2391 | |||
2392 | if (mdev->dp_volume_last + mdev->sync_conf.dp_volume * 2 < mdev->send_cnt) | ||
2393 | drbd_send_delay_probes(mdev); | ||
2394 | } | ||
2395 | |||
2396 | static int w_delay_probes(struct drbd_conf *mdev, struct drbd_work *w, int cancel) | ||
2397 | { | ||
2398 | if (!cancel && mdev->state.conn == C_SYNC_SOURCE) | ||
2399 | drbd_send_delay_probes(mdev); | ||
2400 | |||
2401 | return 1; | ||
2402 | } | ||
2403 | |||
2404 | static void delay_probe_timer_fn(unsigned long data) | ||
2405 | { | ||
2406 | struct drbd_conf *mdev = (struct drbd_conf *) data; | ||
2407 | |||
2408 | if (list_empty(&mdev->delay_probe_work.list)) | ||
2409 | drbd_queue_work(&mdev->data.work, &mdev->delay_probe_work); | ||
2410 | } | ||
2411 | |||
2312 | /* Used to send write requests | 2412 | /* Used to send write requests |
2313 | * R_PRIMARY -> Peer (P_DATA) | 2413 | * R_PRIMARY -> Peer (P_DATA) |
2314 | */ | 2414 | */ |
@@ -2360,7 +2460,7 @@ int drbd_send_dblock(struct drbd_conf *mdev, struct drbd_request *req) | |||
2360 | drbd_send(mdev, mdev->data.socket, &p, sizeof(p), MSG_MORE)); | 2460 | drbd_send(mdev, mdev->data.socket, &p, sizeof(p), MSG_MORE)); |
2361 | if (ok && dgs) { | 2461 | if (ok && dgs) { |
2362 | dgb = mdev->int_dig_out; | 2462 | dgb = mdev->int_dig_out; |
2363 | drbd_csum(mdev, mdev->integrity_w_tfm, req->master_bio, dgb); | 2463 | drbd_csum_bio(mdev, mdev->integrity_w_tfm, req->master_bio, dgb); |
2364 | ok = drbd_send(mdev, mdev->data.socket, dgb, dgs, MSG_MORE); | 2464 | ok = drbd_send(mdev, mdev->data.socket, dgb, dgs, MSG_MORE); |
2365 | } | 2465 | } |
2366 | if (ok) { | 2466 | if (ok) { |
@@ -2371,6 +2471,10 @@ int drbd_send_dblock(struct drbd_conf *mdev, struct drbd_request *req) | |||
2371 | } | 2471 | } |
2372 | 2472 | ||
2373 | drbd_put_data_sock(mdev); | 2473 | drbd_put_data_sock(mdev); |
2474 | |||
2475 | if (ok) | ||
2476 | consider_delay_probes(mdev); | ||
2477 | |||
2374 | return ok; | 2478 | return ok; |
2375 | } | 2479 | } |
2376 | 2480 | ||
@@ -2409,13 +2513,17 @@ int drbd_send_block(struct drbd_conf *mdev, enum drbd_packets cmd, | |||
2409 | sizeof(p), MSG_MORE); | 2513 | sizeof(p), MSG_MORE); |
2410 | if (ok && dgs) { | 2514 | if (ok && dgs) { |
2411 | dgb = mdev->int_dig_out; | 2515 | dgb = mdev->int_dig_out; |
2412 | drbd_csum(mdev, mdev->integrity_w_tfm, e->private_bio, dgb); | 2516 | drbd_csum_ee(mdev, mdev->integrity_w_tfm, e, dgb); |
2413 | ok = drbd_send(mdev, mdev->data.socket, dgb, dgs, MSG_MORE); | 2517 | ok = drbd_send(mdev, mdev->data.socket, dgb, dgs, MSG_MORE); |
2414 | } | 2518 | } |
2415 | if (ok) | 2519 | if (ok) |
2416 | ok = _drbd_send_zc_bio(mdev, e->private_bio); | 2520 | ok = _drbd_send_zc_ee(mdev, e); |
2417 | 2521 | ||
2418 | drbd_put_data_sock(mdev); | 2522 | drbd_put_data_sock(mdev); |
2523 | |||
2524 | if (ok) | ||
2525 | consider_delay_probes(mdev); | ||
2526 | |||
2419 | return ok; | 2527 | return ok; |
2420 | } | 2528 | } |
2421 | 2529 | ||
@@ -2600,6 +2708,7 @@ void drbd_init_set_defaults(struct drbd_conf *mdev) | |||
2600 | atomic_set(&mdev->net_cnt, 0); | 2708 | atomic_set(&mdev->net_cnt, 0); |
2601 | atomic_set(&mdev->packet_seq, 0); | 2709 | atomic_set(&mdev->packet_seq, 0); |
2602 | atomic_set(&mdev->pp_in_use, 0); | 2710 | atomic_set(&mdev->pp_in_use, 0); |
2711 | atomic_set(&mdev->new_c_uuid, 0); | ||
2603 | 2712 | ||
2604 | mutex_init(&mdev->md_io_mutex); | 2713 | mutex_init(&mdev->md_io_mutex); |
2605 | mutex_init(&mdev->data.mutex); | 2714 | mutex_init(&mdev->data.mutex); |
@@ -2628,16 +2737,26 @@ void drbd_init_set_defaults(struct drbd_conf *mdev) | |||
2628 | INIT_LIST_HEAD(&mdev->unplug_work.list); | 2737 | INIT_LIST_HEAD(&mdev->unplug_work.list); |
2629 | INIT_LIST_HEAD(&mdev->md_sync_work.list); | 2738 | INIT_LIST_HEAD(&mdev->md_sync_work.list); |
2630 | INIT_LIST_HEAD(&mdev->bm_io_work.w.list); | 2739 | INIT_LIST_HEAD(&mdev->bm_io_work.w.list); |
2740 | INIT_LIST_HEAD(&mdev->delay_probes); | ||
2741 | INIT_LIST_HEAD(&mdev->delay_probe_work.list); | ||
2742 | INIT_LIST_HEAD(&mdev->uuid_work.list); | ||
2743 | |||
2631 | mdev->resync_work.cb = w_resync_inactive; | 2744 | mdev->resync_work.cb = w_resync_inactive; |
2632 | mdev->unplug_work.cb = w_send_write_hint; | 2745 | mdev->unplug_work.cb = w_send_write_hint; |
2633 | mdev->md_sync_work.cb = w_md_sync; | 2746 | mdev->md_sync_work.cb = w_md_sync; |
2634 | mdev->bm_io_work.w.cb = w_bitmap_io; | 2747 | mdev->bm_io_work.w.cb = w_bitmap_io; |
2748 | mdev->delay_probe_work.cb = w_delay_probes; | ||
2749 | mdev->uuid_work.cb = w_new_current_uuid; | ||
2635 | init_timer(&mdev->resync_timer); | 2750 | init_timer(&mdev->resync_timer); |
2636 | init_timer(&mdev->md_sync_timer); | 2751 | init_timer(&mdev->md_sync_timer); |
2752 | init_timer(&mdev->delay_probe_timer); | ||
2637 | mdev->resync_timer.function = resync_timer_fn; | 2753 | mdev->resync_timer.function = resync_timer_fn; |
2638 | mdev->resync_timer.data = (unsigned long) mdev; | 2754 | mdev->resync_timer.data = (unsigned long) mdev; |
2639 | mdev->md_sync_timer.function = md_sync_timer_fn; | 2755 | mdev->md_sync_timer.function = md_sync_timer_fn; |
2640 | mdev->md_sync_timer.data = (unsigned long) mdev; | 2756 | mdev->md_sync_timer.data = (unsigned long) mdev; |
2757 | mdev->delay_probe_timer.function = delay_probe_timer_fn; | ||
2758 | mdev->delay_probe_timer.data = (unsigned long) mdev; | ||
2759 | |||
2641 | 2760 | ||
2642 | init_waitqueue_head(&mdev->misc_wait); | 2761 | init_waitqueue_head(&mdev->misc_wait); |
2643 | init_waitqueue_head(&mdev->state_wait); | 2762 | init_waitqueue_head(&mdev->state_wait); |
@@ -2680,7 +2799,7 @@ void drbd_mdev_cleanup(struct drbd_conf *mdev) | |||
2680 | drbd_set_my_capacity(mdev, 0); | 2799 | drbd_set_my_capacity(mdev, 0); |
2681 | if (mdev->bitmap) { | 2800 | if (mdev->bitmap) { |
2682 | /* maybe never allocated. */ | 2801 | /* maybe never allocated. */ |
2683 | drbd_bm_resize(mdev, 0); | 2802 | drbd_bm_resize(mdev, 0, 1); |
2684 | drbd_bm_cleanup(mdev); | 2803 | drbd_bm_cleanup(mdev); |
2685 | } | 2804 | } |
2686 | 2805 | ||
@@ -3129,7 +3248,7 @@ int __init drbd_init(void) | |||
3129 | if (err) | 3248 | if (err) |
3130 | goto Enomem; | 3249 | goto Enomem; |
3131 | 3250 | ||
3132 | drbd_proc = proc_create("drbd", S_IFREG | S_IRUGO , NULL, &drbd_proc_fops); | 3251 | drbd_proc = proc_create_data("drbd", S_IFREG | S_IRUGO , NULL, &drbd_proc_fops, NULL); |
3133 | if (!drbd_proc) { | 3252 | if (!drbd_proc) { |
3134 | printk(KERN_ERR "drbd: unable to register proc file\n"); | 3253 | printk(KERN_ERR "drbd: unable to register proc file\n"); |
3135 | goto Enomem; | 3254 | goto Enomem; |
@@ -3660,7 +3779,8 @@ _drbd_fault_str(unsigned int type) { | |||
3660 | [DRBD_FAULT_DT_RD] = "Data read", | 3779 | [DRBD_FAULT_DT_RD] = "Data read", |
3661 | [DRBD_FAULT_DT_RA] = "Data read ahead", | 3780 | [DRBD_FAULT_DT_RA] = "Data read ahead", |
3662 | [DRBD_FAULT_BM_ALLOC] = "BM allocation", | 3781 | [DRBD_FAULT_BM_ALLOC] = "BM allocation", |
3663 | [DRBD_FAULT_AL_EE] = "EE allocation" | 3782 | [DRBD_FAULT_AL_EE] = "EE allocation", |
3783 | [DRBD_FAULT_RECEIVE] = "receive data corruption", | ||
3664 | }; | 3784 | }; |
3665 | 3785 | ||
3666 | return (type < DRBD_FAULT_MAX) ? _faults[type] : "**Unknown**"; | 3786 | return (type < DRBD_FAULT_MAX) ? _faults[type] : "**Unknown**"; |
diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c index 6429d2b19e06..632e3245d1bb 100644 --- a/drivers/block/drbd/drbd_nl.c +++ b/drivers/block/drbd/drbd_nl.c | |||
@@ -510,7 +510,7 @@ void drbd_resume_io(struct drbd_conf *mdev) | |||
510 | * Returns 0 on success, negative return values indicate errors. | 510 | * Returns 0 on success, negative return values indicate errors. |
511 | * You should call drbd_md_sync() after calling this function. | 511 | * You should call drbd_md_sync() after calling this function. |
512 | */ | 512 | */ |
513 | enum determine_dev_size drbd_determin_dev_size(struct drbd_conf *mdev, int force) __must_hold(local) | 513 | enum determine_dev_size drbd_determin_dev_size(struct drbd_conf *mdev, enum dds_flags flags) __must_hold(local) |
514 | { | 514 | { |
515 | sector_t prev_first_sect, prev_size; /* previous meta location */ | 515 | sector_t prev_first_sect, prev_size; /* previous meta location */ |
516 | sector_t la_size; | 516 | sector_t la_size; |
@@ -541,12 +541,12 @@ enum determine_dev_size drbd_determin_dev_size(struct drbd_conf *mdev, int force | |||
541 | /* TODO: should only be some assert here, not (re)init... */ | 541 | /* TODO: should only be some assert here, not (re)init... */ |
542 | drbd_md_set_sector_offsets(mdev, mdev->ldev); | 542 | drbd_md_set_sector_offsets(mdev, mdev->ldev); |
543 | 543 | ||
544 | size = drbd_new_dev_size(mdev, mdev->ldev, force); | 544 | size = drbd_new_dev_size(mdev, mdev->ldev, flags & DDSF_FORCED); |
545 | 545 | ||
546 | if (drbd_get_capacity(mdev->this_bdev) != size || | 546 | if (drbd_get_capacity(mdev->this_bdev) != size || |
547 | drbd_bm_capacity(mdev) != size) { | 547 | drbd_bm_capacity(mdev) != size) { |
548 | int err; | 548 | int err; |
549 | err = drbd_bm_resize(mdev, size); | 549 | err = drbd_bm_resize(mdev, size, !(flags & DDSF_NO_RESYNC)); |
550 | if (unlikely(err)) { | 550 | if (unlikely(err)) { |
551 | /* currently there is only one error: ENOMEM! */ | 551 | /* currently there is only one error: ENOMEM! */ |
552 | size = drbd_bm_capacity(mdev)>>1; | 552 | size = drbd_bm_capacity(mdev)>>1; |
@@ -704,9 +704,6 @@ void drbd_setup_queue_param(struct drbd_conf *mdev, unsigned int max_seg_s) __mu | |||
704 | struct request_queue * const b = mdev->ldev->backing_bdev->bd_disk->queue; | 704 | struct request_queue * const b = mdev->ldev->backing_bdev->bd_disk->queue; |
705 | int max_segments = mdev->ldev->dc.max_bio_bvecs; | 705 | int max_segments = mdev->ldev->dc.max_bio_bvecs; |
706 | 706 | ||
707 | if (b->merge_bvec_fn && !mdev->ldev->dc.use_bmbv) | ||
708 | max_seg_s = PAGE_SIZE; | ||
709 | |||
710 | max_seg_s = min(queue_max_sectors(b) * queue_logical_block_size(b), max_seg_s); | 707 | max_seg_s = min(queue_max_sectors(b) * queue_logical_block_size(b), max_seg_s); |
711 | 708 | ||
712 | blk_queue_max_hw_sectors(q, max_seg_s >> 9); | 709 | blk_queue_max_hw_sectors(q, max_seg_s >> 9); |
@@ -1199,13 +1196,12 @@ static int drbd_nl_net_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp, | |||
1199 | } | 1196 | } |
1200 | 1197 | ||
1201 | /* allocation not in the IO path, cqueue thread context */ | 1198 | /* allocation not in the IO path, cqueue thread context */ |
1202 | new_conf = kmalloc(sizeof(struct net_conf), GFP_KERNEL); | 1199 | new_conf = kzalloc(sizeof(struct net_conf), GFP_KERNEL); |
1203 | if (!new_conf) { | 1200 | if (!new_conf) { |
1204 | retcode = ERR_NOMEM; | 1201 | retcode = ERR_NOMEM; |
1205 | goto fail; | 1202 | goto fail; |
1206 | } | 1203 | } |
1207 | 1204 | ||
1208 | memset(new_conf, 0, sizeof(struct net_conf)); | ||
1209 | new_conf->timeout = DRBD_TIMEOUT_DEF; | 1205 | new_conf->timeout = DRBD_TIMEOUT_DEF; |
1210 | new_conf->try_connect_int = DRBD_CONNECT_INT_DEF; | 1206 | new_conf->try_connect_int = DRBD_CONNECT_INT_DEF; |
1211 | new_conf->ping_int = DRBD_PING_INT_DEF; | 1207 | new_conf->ping_int = DRBD_PING_INT_DEF; |
@@ -1477,8 +1473,8 @@ static int drbd_nl_resize(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp, | |||
1477 | { | 1473 | { |
1478 | struct resize rs; | 1474 | struct resize rs; |
1479 | int retcode = NO_ERROR; | 1475 | int retcode = NO_ERROR; |
1480 | int ldsc = 0; /* local disk size changed */ | ||
1481 | enum determine_dev_size dd; | 1476 | enum determine_dev_size dd; |
1477 | enum dds_flags ddsf; | ||
1482 | 1478 | ||
1483 | memset(&rs, 0, sizeof(struct resize)); | 1479 | memset(&rs, 0, sizeof(struct resize)); |
1484 | if (!resize_from_tags(mdev, nlp->tag_list, &rs)) { | 1480 | if (!resize_from_tags(mdev, nlp->tag_list, &rs)) { |
@@ -1502,13 +1498,17 @@ static int drbd_nl_resize(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp, | |||
1502 | goto fail; | 1498 | goto fail; |
1503 | } | 1499 | } |
1504 | 1500 | ||
1505 | if (mdev->ldev->known_size != drbd_get_capacity(mdev->ldev->backing_bdev)) { | 1501 | if (rs.no_resync && mdev->agreed_pro_version < 93) { |
1506 | mdev->ldev->known_size = drbd_get_capacity(mdev->ldev->backing_bdev); | 1502 | retcode = ERR_NEED_APV_93; |
1507 | ldsc = 1; | 1503 | goto fail; |
1508 | } | 1504 | } |
1509 | 1505 | ||
1506 | if (mdev->ldev->known_size != drbd_get_capacity(mdev->ldev->backing_bdev)) | ||
1507 | mdev->ldev->known_size = drbd_get_capacity(mdev->ldev->backing_bdev); | ||
1508 | |||
1510 | mdev->ldev->dc.disk_size = (sector_t)rs.resize_size; | 1509 | mdev->ldev->dc.disk_size = (sector_t)rs.resize_size; |
1511 | dd = drbd_determin_dev_size(mdev, rs.resize_force); | 1510 | ddsf = (rs.resize_force ? DDSF_FORCED : 0) | (rs.no_resync ? DDSF_NO_RESYNC : 0); |
1511 | dd = drbd_determin_dev_size(mdev, ddsf); | ||
1512 | drbd_md_sync(mdev); | 1512 | drbd_md_sync(mdev); |
1513 | put_ldev(mdev); | 1513 | put_ldev(mdev); |
1514 | if (dd == dev_size_error) { | 1514 | if (dd == dev_size_error) { |
@@ -1516,12 +1516,12 @@ static int drbd_nl_resize(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp, | |||
1516 | goto fail; | 1516 | goto fail; |
1517 | } | 1517 | } |
1518 | 1518 | ||
1519 | if (mdev->state.conn == C_CONNECTED && (dd != unchanged || ldsc)) { | 1519 | if (mdev->state.conn == C_CONNECTED) { |
1520 | if (dd == grew) | 1520 | if (dd == grew) |
1521 | set_bit(RESIZE_PENDING, &mdev->flags); | 1521 | set_bit(RESIZE_PENDING, &mdev->flags); |
1522 | 1522 | ||
1523 | drbd_send_uuids(mdev); | 1523 | drbd_send_uuids(mdev); |
1524 | drbd_send_sizes(mdev, 1); | 1524 | drbd_send_sizes(mdev, 1, ddsf); |
1525 | } | 1525 | } |
1526 | 1526 | ||
1527 | fail: | 1527 | fail: |
@@ -1551,6 +1551,10 @@ static int drbd_nl_syncer_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *n | |||
1551 | sc.rate = DRBD_RATE_DEF; | 1551 | sc.rate = DRBD_RATE_DEF; |
1552 | sc.after = DRBD_AFTER_DEF; | 1552 | sc.after = DRBD_AFTER_DEF; |
1553 | sc.al_extents = DRBD_AL_EXTENTS_DEF; | 1553 | sc.al_extents = DRBD_AL_EXTENTS_DEF; |
1554 | sc.dp_volume = DRBD_DP_VOLUME_DEF; | ||
1555 | sc.dp_interval = DRBD_DP_INTERVAL_DEF; | ||
1556 | sc.throttle_th = DRBD_RS_THROTTLE_TH_DEF; | ||
1557 | sc.hold_off_th = DRBD_RS_HOLD_OFF_TH_DEF; | ||
1554 | } else | 1558 | } else |
1555 | memcpy(&sc, &mdev->sync_conf, sizeof(struct syncer_conf)); | 1559 | memcpy(&sc, &mdev->sync_conf, sizeof(struct syncer_conf)); |
1556 | 1560 | ||
@@ -2207,9 +2211,9 @@ void drbd_bcast_ee(struct drbd_conf *mdev, | |||
2207 | { | 2211 | { |
2208 | struct cn_msg *cn_reply; | 2212 | struct cn_msg *cn_reply; |
2209 | struct drbd_nl_cfg_reply *reply; | 2213 | struct drbd_nl_cfg_reply *reply; |
2210 | struct bio_vec *bvec; | ||
2211 | unsigned short *tl; | 2214 | unsigned short *tl; |
2212 | int i; | 2215 | struct page *page; |
2216 | unsigned len; | ||
2213 | 2217 | ||
2214 | if (!e) | 2218 | if (!e) |
2215 | return; | 2219 | return; |
@@ -2247,11 +2251,15 @@ void drbd_bcast_ee(struct drbd_conf *mdev, | |||
2247 | put_unaligned(T_ee_data, tl++); | 2251 | put_unaligned(T_ee_data, tl++); |
2248 | put_unaligned(e->size, tl++); | 2252 | put_unaligned(e->size, tl++); |
2249 | 2253 | ||
2250 | __bio_for_each_segment(bvec, e->private_bio, i, 0) { | 2254 | len = e->size; |
2251 | void *d = kmap(bvec->bv_page); | 2255 | page = e->pages; |
2252 | memcpy(tl, d + bvec->bv_offset, bvec->bv_len); | 2256 | page_chain_for_each(page) { |
2253 | kunmap(bvec->bv_page); | 2257 | void *d = kmap_atomic(page, KM_USER0); |
2254 | tl=(unsigned short*)((char*)tl + bvec->bv_len); | 2258 | unsigned l = min_t(unsigned, len, PAGE_SIZE); |
2259 | memcpy(tl, d, l); | ||
2260 | kunmap_atomic(d, KM_USER0); | ||
2261 | tl = (unsigned short*)((char*)tl + l); | ||
2262 | len -= l; | ||
2255 | } | 2263 | } |
2256 | put_unaligned(TT_END, tl++); /* Close the tag list */ | 2264 | put_unaligned(TT_END, tl++); /* Close the tag list */ |
2257 | 2265 | ||
diff --git a/drivers/block/drbd/drbd_proc.c b/drivers/block/drbd/drbd_proc.c index be3374b68460..d0f1767ea4c3 100644 --- a/drivers/block/drbd/drbd_proc.c +++ b/drivers/block/drbd/drbd_proc.c | |||
@@ -73,14 +73,21 @@ static void drbd_syncer_progress(struct drbd_conf *mdev, struct seq_file *seq) | |||
73 | seq_printf(seq, "sync'ed:%3u.%u%% ", res / 10, res % 10); | 73 | seq_printf(seq, "sync'ed:%3u.%u%% ", res / 10, res % 10); |
74 | /* if more than 1 GB display in MB */ | 74 | /* if more than 1 GB display in MB */ |
75 | if (mdev->rs_total > 0x100000L) | 75 | if (mdev->rs_total > 0x100000L) |
76 | seq_printf(seq, "(%lu/%lu)M\n\t", | 76 | seq_printf(seq, "(%lu/%lu)M", |
77 | (unsigned long) Bit2KB(rs_left >> 10), | 77 | (unsigned long) Bit2KB(rs_left >> 10), |
78 | (unsigned long) Bit2KB(mdev->rs_total >> 10)); | 78 | (unsigned long) Bit2KB(mdev->rs_total >> 10)); |
79 | else | 79 | else |
80 | seq_printf(seq, "(%lu/%lu)K\n\t", | 80 | seq_printf(seq, "(%lu/%lu)K", |
81 | (unsigned long) Bit2KB(rs_left), | 81 | (unsigned long) Bit2KB(rs_left), |
82 | (unsigned long) Bit2KB(mdev->rs_total)); | 82 | (unsigned long) Bit2KB(mdev->rs_total)); |
83 | 83 | ||
84 | if (mdev->state.conn == C_SYNC_TARGET) | ||
85 | seq_printf(seq, " queue_delay: %d.%d ms\n\t", | ||
86 | mdev->data_delay / 1000, | ||
87 | (mdev->data_delay % 1000) / 100); | ||
88 | else if (mdev->state.conn == C_SYNC_SOURCE) | ||
89 | seq_printf(seq, " delay_probe: %u\n\t", mdev->delay_seq); | ||
90 | |||
84 | /* see drivers/md/md.c | 91 | /* see drivers/md/md.c |
85 | * We do not want to overflow, so the order of operands and | 92 | * We do not want to overflow, so the order of operands and |
86 | * the * 100 / 100 trick are important. We do a +1 to be | 93 | * the * 100 / 100 trick are important. We do a +1 to be |
@@ -128,6 +135,14 @@ static void drbd_syncer_progress(struct drbd_conf *mdev, struct seq_file *seq) | |||
128 | else | 135 | else |
129 | seq_printf(seq, " (%ld)", dbdt); | 136 | seq_printf(seq, " (%ld)", dbdt); |
130 | 137 | ||
138 | if (mdev->state.conn == C_SYNC_TARGET) { | ||
139 | if (mdev->c_sync_rate > 1000) | ||
140 | seq_printf(seq, " want: %d,%03d", | ||
141 | mdev->c_sync_rate / 1000, mdev->c_sync_rate % 1000); | ||
142 | else | ||
143 | seq_printf(seq, " want: %d", mdev->c_sync_rate); | ||
144 | } | ||
145 | |||
131 | seq_printf(seq, " K/sec\n"); | 146 | seq_printf(seq, " K/sec\n"); |
132 | } | 147 | } |
133 | 148 | ||
diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index 3f096e7959b4..bc9ab7fb2cc7 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c | |||
@@ -80,30 +80,128 @@ static struct drbd_epoch *previous_epoch(struct drbd_conf *mdev, struct drbd_epo | |||
80 | 80 | ||
81 | #define GFP_TRY (__GFP_HIGHMEM | __GFP_NOWARN) | 81 | #define GFP_TRY (__GFP_HIGHMEM | __GFP_NOWARN) |
82 | 82 | ||
83 | static struct page *drbd_pp_first_page_or_try_alloc(struct drbd_conf *mdev) | 83 | /* |
84 | * some helper functions to deal with single linked page lists, | ||
85 | * page->private being our "next" pointer. | ||
86 | */ | ||
87 | |||
88 | /* If at least n pages are linked at head, get n pages off. | ||
89 | * Otherwise, don't modify head, and return NULL. | ||
90 | * Locking is the responsibility of the caller. | ||
91 | */ | ||
92 | static struct page *page_chain_del(struct page **head, int n) | ||
93 | { | ||
94 | struct page *page; | ||
95 | struct page *tmp; | ||
96 | |||
97 | BUG_ON(!n); | ||
98 | BUG_ON(!head); | ||
99 | |||
100 | page = *head; | ||
101 | |||
102 | if (!page) | ||
103 | return NULL; | ||
104 | |||
105 | while (page) { | ||
106 | tmp = page_chain_next(page); | ||
107 | if (--n == 0) | ||
108 | break; /* found sufficient pages */ | ||
109 | if (tmp == NULL) | ||
110 | /* insufficient pages, don't use any of them. */ | ||
111 | return NULL; | ||
112 | page = tmp; | ||
113 | } | ||
114 | |||
115 | /* add end of list marker for the returned list */ | ||
116 | set_page_private(page, 0); | ||
117 | /* actual return value, and adjustment of head */ | ||
118 | page = *head; | ||
119 | *head = tmp; | ||
120 | return page; | ||
121 | } | ||
122 | |||
123 | /* may be used outside of locks to find the tail of a (usually short) | ||
124 | * "private" page chain, before adding it back to a global chain head | ||
125 | * with page_chain_add() under a spinlock. */ | ||
126 | static struct page *page_chain_tail(struct page *page, int *len) | ||
127 | { | ||
128 | struct page *tmp; | ||
129 | int i = 1; | ||
130 | while ((tmp = page_chain_next(page))) | ||
131 | ++i, page = tmp; | ||
132 | if (len) | ||
133 | *len = i; | ||
134 | return page; | ||
135 | } | ||
136 | |||
137 | static int page_chain_free(struct page *page) | ||
138 | { | ||
139 | struct page *tmp; | ||
140 | int i = 0; | ||
141 | page_chain_for_each_safe(page, tmp) { | ||
142 | put_page(page); | ||
143 | ++i; | ||
144 | } | ||
145 | return i; | ||
146 | } | ||
147 | |||
148 | static void page_chain_add(struct page **head, | ||
149 | struct page *chain_first, struct page *chain_last) | ||
150 | { | ||
151 | #if 1 | ||
152 | struct page *tmp; | ||
153 | tmp = page_chain_tail(chain_first, NULL); | ||
154 | BUG_ON(tmp != chain_last); | ||
155 | #endif | ||
156 | |||
157 | /* add chain to head */ | ||
158 | set_page_private(chain_last, (unsigned long)*head); | ||
159 | *head = chain_first; | ||
160 | } | ||
161 | |||
162 | static struct page *drbd_pp_first_pages_or_try_alloc(struct drbd_conf *mdev, int number) | ||
84 | { | 163 | { |
85 | struct page *page = NULL; | 164 | struct page *page = NULL; |
165 | struct page *tmp = NULL; | ||
166 | int i = 0; | ||
86 | 167 | ||
87 | /* Yes, testing drbd_pp_vacant outside the lock is racy. | 168 | /* Yes, testing drbd_pp_vacant outside the lock is racy. |
88 | * So what. It saves a spin_lock. */ | 169 | * So what. It saves a spin_lock. */ |
89 | if (drbd_pp_vacant > 0) { | 170 | if (drbd_pp_vacant >= number) { |
90 | spin_lock(&drbd_pp_lock); | 171 | spin_lock(&drbd_pp_lock); |
91 | page = drbd_pp_pool; | 172 | page = page_chain_del(&drbd_pp_pool, number); |
92 | if (page) { | 173 | if (page) |
93 | drbd_pp_pool = (struct page *)page_private(page); | 174 | drbd_pp_vacant -= number; |
94 | set_page_private(page, 0); /* just to be polite */ | ||
95 | drbd_pp_vacant--; | ||
96 | } | ||
97 | spin_unlock(&drbd_pp_lock); | 175 | spin_unlock(&drbd_pp_lock); |
176 | if (page) | ||
177 | return page; | ||
98 | } | 178 | } |
179 | |||
99 | /* GFP_TRY, because we must not cause arbitrary write-out: in a DRBD | 180 | /* GFP_TRY, because we must not cause arbitrary write-out: in a DRBD |
100 | * "criss-cross" setup, that might cause write-out on some other DRBD, | 181 | * "criss-cross" setup, that might cause write-out on some other DRBD, |
101 | * which in turn might block on the other node at this very place. */ | 182 | * which in turn might block on the other node at this very place. */ |
102 | if (!page) | 183 | for (i = 0; i < number; i++) { |
103 | page = alloc_page(GFP_TRY); | 184 | tmp = alloc_page(GFP_TRY); |
104 | if (page) | 185 | if (!tmp) |
105 | atomic_inc(&mdev->pp_in_use); | 186 | break; |
106 | return page; | 187 | set_page_private(tmp, (unsigned long)page); |
188 | page = tmp; | ||
189 | } | ||
190 | |||
191 | if (i == number) | ||
192 | return page; | ||
193 | |||
194 | /* Not enough pages immediately available this time. | ||
195 | * No need to jump around here, drbd_pp_alloc will retry this | ||
196 | * function "soon". */ | ||
197 | if (page) { | ||
198 | tmp = page_chain_tail(page, NULL); | ||
199 | spin_lock(&drbd_pp_lock); | ||
200 | page_chain_add(&drbd_pp_pool, page, tmp); | ||
201 | drbd_pp_vacant += i; | ||
202 | spin_unlock(&drbd_pp_lock); | ||
203 | } | ||
204 | return NULL; | ||
107 | } | 205 | } |
108 | 206 | ||
109 | /* kick lower level device, if we have more than (arbitrary number) | 207 | /* kick lower level device, if we have more than (arbitrary number) |
@@ -127,7 +225,7 @@ static void reclaim_net_ee(struct drbd_conf *mdev, struct list_head *to_be_freed | |||
127 | 225 | ||
128 | list_for_each_safe(le, tle, &mdev->net_ee) { | 226 | list_for_each_safe(le, tle, &mdev->net_ee) { |
129 | e = list_entry(le, struct drbd_epoch_entry, w.list); | 227 | e = list_entry(le, struct drbd_epoch_entry, w.list); |
130 | if (drbd_bio_has_active_page(e->private_bio)) | 228 | if (drbd_ee_has_active_page(e)) |
131 | break; | 229 | break; |
132 | list_move(le, to_be_freed); | 230 | list_move(le, to_be_freed); |
133 | } | 231 | } |
@@ -148,32 +246,34 @@ static void drbd_kick_lo_and_reclaim_net(struct drbd_conf *mdev) | |||
148 | } | 246 | } |
149 | 247 | ||
150 | /** | 248 | /** |
151 | * drbd_pp_alloc() - Returns a page, fails only if a signal comes in | 249 | * drbd_pp_alloc() - Returns @number pages, retries forever (or until signalled) |
152 | * @mdev: DRBD device. | 250 | * @mdev: DRBD device. |
153 | * @retry: whether or not to retry allocation forever (or until signalled) | 251 | * @number: number of pages requested |
252 | * @retry: whether to retry, if not enough pages are available right now | ||
253 | * | ||
254 | * Tries to allocate number pages, first from our own page pool, then from | ||
255 | * the kernel, unless this allocation would exceed the max_buffers setting. | ||
256 | * Possibly retry until DRBD frees sufficient pages somewhere else. | ||
154 | * | 257 | * |
155 | * Tries to allocate a page, first from our own page pool, then from the | 258 | * Returns a page chain linked via page->private. |
156 | * kernel, unless this allocation would exceed the max_buffers setting. | ||
157 | * If @retry is non-zero, retry until DRBD frees a page somewhere else. | ||
158 | */ | 259 | */ |
159 | static struct page *drbd_pp_alloc(struct drbd_conf *mdev, int retry) | 260 | static struct page *drbd_pp_alloc(struct drbd_conf *mdev, unsigned number, bool retry) |
160 | { | 261 | { |
161 | struct page *page = NULL; | 262 | struct page *page = NULL; |
162 | DEFINE_WAIT(wait); | 263 | DEFINE_WAIT(wait); |
163 | 264 | ||
164 | if (atomic_read(&mdev->pp_in_use) < mdev->net_conf->max_buffers) { | 265 | /* Yes, we may run up to @number over max_buffers. If we |
165 | page = drbd_pp_first_page_or_try_alloc(mdev); | 266 | * follow it strictly, the admin will get it wrong anyways. */ |
166 | if (page) | 267 | if (atomic_read(&mdev->pp_in_use) < mdev->net_conf->max_buffers) |
167 | return page; | 268 | page = drbd_pp_first_pages_or_try_alloc(mdev, number); |
168 | } | ||
169 | 269 | ||
170 | for (;;) { | 270 | while (page == NULL) { |
171 | prepare_to_wait(&drbd_pp_wait, &wait, TASK_INTERRUPTIBLE); | 271 | prepare_to_wait(&drbd_pp_wait, &wait, TASK_INTERRUPTIBLE); |
172 | 272 | ||
173 | drbd_kick_lo_and_reclaim_net(mdev); | 273 | drbd_kick_lo_and_reclaim_net(mdev); |
174 | 274 | ||
175 | if (atomic_read(&mdev->pp_in_use) < mdev->net_conf->max_buffers) { | 275 | if (atomic_read(&mdev->pp_in_use) < mdev->net_conf->max_buffers) { |
176 | page = drbd_pp_first_page_or_try_alloc(mdev); | 276 | page = drbd_pp_first_pages_or_try_alloc(mdev, number); |
177 | if (page) | 277 | if (page) |
178 | break; | 278 | break; |
179 | } | 279 | } |
@@ -190,62 +290,32 @@ static struct page *drbd_pp_alloc(struct drbd_conf *mdev, int retry) | |||
190 | } | 290 | } |
191 | finish_wait(&drbd_pp_wait, &wait); | 291 | finish_wait(&drbd_pp_wait, &wait); |
192 | 292 | ||
293 | if (page) | ||
294 | atomic_add(number, &mdev->pp_in_use); | ||
193 | return page; | 295 | return page; |
194 | } | 296 | } |
195 | 297 | ||
196 | /* Must not be used from irq, as that may deadlock: see drbd_pp_alloc. | 298 | /* Must not be used from irq, as that may deadlock: see drbd_pp_alloc. |
197 | * Is also used from inside an other spin_lock_irq(&mdev->req_lock) */ | 299 | * Is also used from inside an other spin_lock_irq(&mdev->req_lock); |
300 | * Either links the page chain back to the global pool, | ||
301 | * or returns all pages to the system. */ | ||
198 | static void drbd_pp_free(struct drbd_conf *mdev, struct page *page) | 302 | static void drbd_pp_free(struct drbd_conf *mdev, struct page *page) |
199 | { | 303 | { |
200 | int free_it; | ||
201 | |||
202 | spin_lock(&drbd_pp_lock); | ||
203 | if (drbd_pp_vacant > (DRBD_MAX_SEGMENT_SIZE/PAGE_SIZE)*minor_count) { | ||
204 | free_it = 1; | ||
205 | } else { | ||
206 | set_page_private(page, (unsigned long)drbd_pp_pool); | ||
207 | drbd_pp_pool = page; | ||
208 | drbd_pp_vacant++; | ||
209 | free_it = 0; | ||
210 | } | ||
211 | spin_unlock(&drbd_pp_lock); | ||
212 | |||
213 | atomic_dec(&mdev->pp_in_use); | ||
214 | |||
215 | if (free_it) | ||
216 | __free_page(page); | ||
217 | |||
218 | wake_up(&drbd_pp_wait); | ||
219 | } | ||
220 | |||
221 | static void drbd_pp_free_bio_pages(struct drbd_conf *mdev, struct bio *bio) | ||
222 | { | ||
223 | struct page *p_to_be_freed = NULL; | ||
224 | struct page *page; | ||
225 | struct bio_vec *bvec; | ||
226 | int i; | 304 | int i; |
227 | 305 | if (drbd_pp_vacant > (DRBD_MAX_SEGMENT_SIZE/PAGE_SIZE)*minor_count) | |
228 | spin_lock(&drbd_pp_lock); | 306 | i = page_chain_free(page); |
229 | __bio_for_each_segment(bvec, bio, i, 0) { | 307 | else { |
230 | if (drbd_pp_vacant > (DRBD_MAX_SEGMENT_SIZE/PAGE_SIZE)*minor_count) { | 308 | struct page *tmp; |
231 | set_page_private(bvec->bv_page, (unsigned long)p_to_be_freed); | 309 | tmp = page_chain_tail(page, &i); |
232 | p_to_be_freed = bvec->bv_page; | 310 | spin_lock(&drbd_pp_lock); |
233 | } else { | 311 | page_chain_add(&drbd_pp_pool, page, tmp); |
234 | set_page_private(bvec->bv_page, (unsigned long)drbd_pp_pool); | 312 | drbd_pp_vacant += i; |
235 | drbd_pp_pool = bvec->bv_page; | 313 | spin_unlock(&drbd_pp_lock); |
236 | drbd_pp_vacant++; | ||
237 | } | ||
238 | } | ||
239 | spin_unlock(&drbd_pp_lock); | ||
240 | atomic_sub(bio->bi_vcnt, &mdev->pp_in_use); | ||
241 | |||
242 | while (p_to_be_freed) { | ||
243 | page = p_to_be_freed; | ||
244 | p_to_be_freed = (struct page *)page_private(page); | ||
245 | set_page_private(page, 0); /* just to be polite */ | ||
246 | put_page(page); | ||
247 | } | 314 | } |
248 | 315 | atomic_sub(i, &mdev->pp_in_use); | |
316 | i = atomic_read(&mdev->pp_in_use); | ||
317 | if (i < 0) | ||
318 | dev_warn(DEV, "ASSERTION FAILED: pp_in_use: %d < 0\n", i); | ||
249 | wake_up(&drbd_pp_wait); | 319 | wake_up(&drbd_pp_wait); |
250 | } | 320 | } |
251 | 321 | ||
@@ -270,11 +340,9 @@ struct drbd_epoch_entry *drbd_alloc_ee(struct drbd_conf *mdev, | |||
270 | unsigned int data_size, | 340 | unsigned int data_size, |
271 | gfp_t gfp_mask) __must_hold(local) | 341 | gfp_t gfp_mask) __must_hold(local) |
272 | { | 342 | { |
273 | struct request_queue *q; | ||
274 | struct drbd_epoch_entry *e; | 343 | struct drbd_epoch_entry *e; |
275 | struct page *page; | 344 | struct page *page; |
276 | struct bio *bio; | 345 | unsigned nr_pages = (data_size + PAGE_SIZE -1) >> PAGE_SHIFT; |
277 | unsigned int ds; | ||
278 | 346 | ||
279 | if (FAULT_ACTIVE(mdev, DRBD_FAULT_AL_EE)) | 347 | if (FAULT_ACTIVE(mdev, DRBD_FAULT_AL_EE)) |
280 | return NULL; | 348 | return NULL; |
@@ -286,84 +354,32 @@ struct drbd_epoch_entry *drbd_alloc_ee(struct drbd_conf *mdev, | |||
286 | return NULL; | 354 | return NULL; |
287 | } | 355 | } |
288 | 356 | ||
289 | bio = bio_alloc(gfp_mask & ~__GFP_HIGHMEM, div_ceil(data_size, PAGE_SIZE)); | 357 | page = drbd_pp_alloc(mdev, nr_pages, (gfp_mask & __GFP_WAIT)); |
290 | if (!bio) { | 358 | if (!page) |
291 | if (!(gfp_mask & __GFP_NOWARN)) | 359 | goto fail; |
292 | dev_err(DEV, "alloc_ee: Allocation of a bio failed\n"); | ||
293 | goto fail1; | ||
294 | } | ||
295 | |||
296 | bio->bi_bdev = mdev->ldev->backing_bdev; | ||
297 | bio->bi_sector = sector; | ||
298 | |||
299 | ds = data_size; | ||
300 | while (ds) { | ||
301 | page = drbd_pp_alloc(mdev, (gfp_mask & __GFP_WAIT)); | ||
302 | if (!page) { | ||
303 | if (!(gfp_mask & __GFP_NOWARN)) | ||
304 | dev_err(DEV, "alloc_ee: Allocation of a page failed\n"); | ||
305 | goto fail2; | ||
306 | } | ||
307 | if (!bio_add_page(bio, page, min_t(int, ds, PAGE_SIZE), 0)) { | ||
308 | drbd_pp_free(mdev, page); | ||
309 | dev_err(DEV, "alloc_ee: bio_add_page(s=%llu," | ||
310 | "data_size=%u,ds=%u) failed\n", | ||
311 | (unsigned long long)sector, data_size, ds); | ||
312 | |||
313 | q = bdev_get_queue(bio->bi_bdev); | ||
314 | if (q->merge_bvec_fn) { | ||
315 | struct bvec_merge_data bvm = { | ||
316 | .bi_bdev = bio->bi_bdev, | ||
317 | .bi_sector = bio->bi_sector, | ||
318 | .bi_size = bio->bi_size, | ||
319 | .bi_rw = bio->bi_rw, | ||
320 | }; | ||
321 | int l = q->merge_bvec_fn(q, &bvm, | ||
322 | &bio->bi_io_vec[bio->bi_vcnt]); | ||
323 | dev_err(DEV, "merge_bvec_fn() = %d\n", l); | ||
324 | } | ||
325 | |||
326 | /* dump more of the bio. */ | ||
327 | dev_err(DEV, "bio->bi_max_vecs = %d\n", bio->bi_max_vecs); | ||
328 | dev_err(DEV, "bio->bi_vcnt = %d\n", bio->bi_vcnt); | ||
329 | dev_err(DEV, "bio->bi_size = %d\n", bio->bi_size); | ||
330 | dev_err(DEV, "bio->bi_phys_segments = %d\n", bio->bi_phys_segments); | ||
331 | |||
332 | goto fail2; | ||
333 | break; | ||
334 | } | ||
335 | ds -= min_t(int, ds, PAGE_SIZE); | ||
336 | } | ||
337 | |||
338 | D_ASSERT(data_size == bio->bi_size); | ||
339 | |||
340 | bio->bi_private = e; | ||
341 | e->mdev = mdev; | ||
342 | e->sector = sector; | ||
343 | e->size = bio->bi_size; | ||
344 | 360 | ||
345 | e->private_bio = bio; | ||
346 | e->block_id = id; | ||
347 | INIT_HLIST_NODE(&e->colision); | 361 | INIT_HLIST_NODE(&e->colision); |
348 | e->epoch = NULL; | 362 | e->epoch = NULL; |
363 | e->mdev = mdev; | ||
364 | e->pages = page; | ||
365 | atomic_set(&e->pending_bios, 0); | ||
366 | e->size = data_size; | ||
349 | e->flags = 0; | 367 | e->flags = 0; |
368 | e->sector = sector; | ||
369 | e->sector = sector; | ||
370 | e->block_id = id; | ||
350 | 371 | ||
351 | return e; | 372 | return e; |
352 | 373 | ||
353 | fail2: | 374 | fail: |
354 | drbd_pp_free_bio_pages(mdev, bio); | ||
355 | bio_put(bio); | ||
356 | fail1: | ||
357 | mempool_free(e, drbd_ee_mempool); | 375 | mempool_free(e, drbd_ee_mempool); |
358 | |||
359 | return NULL; | 376 | return NULL; |
360 | } | 377 | } |
361 | 378 | ||
362 | void drbd_free_ee(struct drbd_conf *mdev, struct drbd_epoch_entry *e) | 379 | void drbd_free_ee(struct drbd_conf *mdev, struct drbd_epoch_entry *e) |
363 | { | 380 | { |
364 | struct bio *bio = e->private_bio; | 381 | drbd_pp_free(mdev, e->pages); |
365 | drbd_pp_free_bio_pages(mdev, bio); | 382 | D_ASSERT(atomic_read(&e->pending_bios) == 0); |
366 | bio_put(bio); | ||
367 | D_ASSERT(hlist_unhashed(&e->colision)); | 383 | D_ASSERT(hlist_unhashed(&e->colision)); |
368 | mempool_free(e, drbd_ee_mempool); | 384 | mempool_free(e, drbd_ee_mempool); |
369 | } | 385 | } |
@@ -902,7 +918,7 @@ retry: | |||
902 | if (!drbd_send_protocol(mdev)) | 918 | if (!drbd_send_protocol(mdev)) |
903 | return -1; | 919 | return -1; |
904 | drbd_send_sync_param(mdev, &mdev->sync_conf); | 920 | drbd_send_sync_param(mdev, &mdev->sync_conf); |
905 | drbd_send_sizes(mdev, 0); | 921 | drbd_send_sizes(mdev, 0, 0); |
906 | drbd_send_uuids(mdev); | 922 | drbd_send_uuids(mdev); |
907 | drbd_send_state(mdev); | 923 | drbd_send_state(mdev); |
908 | clear_bit(USE_DEGR_WFC_T, &mdev->flags); | 924 | clear_bit(USE_DEGR_WFC_T, &mdev->flags); |
@@ -946,7 +962,8 @@ static enum finish_epoch drbd_flush_after_epoch(struct drbd_conf *mdev, struct d | |||
946 | int rv; | 962 | int rv; |
947 | 963 | ||
948 | if (mdev->write_ordering >= WO_bdev_flush && get_ldev(mdev)) { | 964 | if (mdev->write_ordering >= WO_bdev_flush && get_ldev(mdev)) { |
949 | rv = blkdev_issue_flush(mdev->ldev->backing_bdev, NULL); | 965 | rv = blkdev_issue_flush(mdev->ldev->backing_bdev, GFP_KERNEL, |
966 | NULL, BLKDEV_IFL_WAIT); | ||
950 | if (rv) { | 967 | if (rv) { |
951 | dev_err(DEV, "local disk flush failed with status %d\n", rv); | 968 | dev_err(DEV, "local disk flush failed with status %d\n", rv); |
952 | /* would rather check on EOPNOTSUPP, but that is not reliable. | 969 | /* would rather check on EOPNOTSUPP, but that is not reliable. |
@@ -1120,6 +1137,101 @@ void drbd_bump_write_ordering(struct drbd_conf *mdev, enum write_ordering_e wo) | |||
1120 | } | 1137 | } |
1121 | 1138 | ||
1122 | /** | 1139 | /** |
1140 | * drbd_submit_ee() | ||
1141 | * @mdev: DRBD device. | ||
1142 | * @e: epoch entry | ||
1143 | * @rw: flag field, see bio->bi_rw | ||
1144 | */ | ||
1145 | /* TODO allocate from our own bio_set. */ | ||
1146 | int drbd_submit_ee(struct drbd_conf *mdev, struct drbd_epoch_entry *e, | ||
1147 | const unsigned rw, const int fault_type) | ||
1148 | { | ||
1149 | struct bio *bios = NULL; | ||
1150 | struct bio *bio; | ||
1151 | struct page *page = e->pages; | ||
1152 | sector_t sector = e->sector; | ||
1153 | unsigned ds = e->size; | ||
1154 | unsigned n_bios = 0; | ||
1155 | unsigned nr_pages = (ds + PAGE_SIZE -1) >> PAGE_SHIFT; | ||
1156 | |||
1157 | if (atomic_read(&mdev->new_c_uuid)) { | ||
1158 | if (atomic_add_unless(&mdev->new_c_uuid, -1, 1)) { | ||
1159 | drbd_uuid_new_current(mdev); | ||
1160 | drbd_md_sync(mdev); | ||
1161 | |||
1162 | atomic_dec(&mdev->new_c_uuid); | ||
1163 | wake_up(&mdev->misc_wait); | ||
1164 | } | ||
1165 | wait_event(mdev->misc_wait, !atomic_read(&mdev->new_c_uuid)); | ||
1166 | } | ||
1167 | |||
1168 | /* In most cases, we will only need one bio. But in case the lower | ||
1169 | * level restrictions happen to be different at this offset on this | ||
1170 | * side than those of the sending peer, we may need to submit the | ||
1171 | * request in more than one bio. */ | ||
1172 | next_bio: | ||
1173 | bio = bio_alloc(GFP_NOIO, nr_pages); | ||
1174 | if (!bio) { | ||
1175 | dev_err(DEV, "submit_ee: Allocation of a bio failed\n"); | ||
1176 | goto fail; | ||
1177 | } | ||
1178 | /* > e->sector, unless this is the first bio */ | ||
1179 | bio->bi_sector = sector; | ||
1180 | bio->bi_bdev = mdev->ldev->backing_bdev; | ||
1181 | /* we special case some flags in the multi-bio case, see below | ||
1182 | * (BIO_RW_UNPLUG, BIO_RW_BARRIER) */ | ||
1183 | bio->bi_rw = rw; | ||
1184 | bio->bi_private = e; | ||
1185 | bio->bi_end_io = drbd_endio_sec; | ||
1186 | |||
1187 | bio->bi_next = bios; | ||
1188 | bios = bio; | ||
1189 | ++n_bios; | ||
1190 | |||
1191 | page_chain_for_each(page) { | ||
1192 | unsigned len = min_t(unsigned, ds, PAGE_SIZE); | ||
1193 | if (!bio_add_page(bio, page, len, 0)) { | ||
1194 | /* a single page must always be possible! */ | ||
1195 | BUG_ON(bio->bi_vcnt == 0); | ||
1196 | goto next_bio; | ||
1197 | } | ||
1198 | ds -= len; | ||
1199 | sector += len >> 9; | ||
1200 | --nr_pages; | ||
1201 | } | ||
1202 | D_ASSERT(page == NULL); | ||
1203 | D_ASSERT(ds == 0); | ||
1204 | |||
1205 | atomic_set(&e->pending_bios, n_bios); | ||
1206 | do { | ||
1207 | bio = bios; | ||
1208 | bios = bios->bi_next; | ||
1209 | bio->bi_next = NULL; | ||
1210 | |||
1211 | /* strip off BIO_RW_UNPLUG unless it is the last bio */ | ||
1212 | if (bios) | ||
1213 | bio->bi_rw &= ~(1<<BIO_RW_UNPLUG); | ||
1214 | |||
1215 | drbd_generic_make_request(mdev, fault_type, bio); | ||
1216 | |||
1217 | /* strip off BIO_RW_BARRIER, | ||
1218 | * unless it is the first or last bio */ | ||
1219 | if (bios && bios->bi_next) | ||
1220 | bios->bi_rw &= ~(1<<BIO_RW_BARRIER); | ||
1221 | } while (bios); | ||
1222 | maybe_kick_lo(mdev); | ||
1223 | return 0; | ||
1224 | |||
1225 | fail: | ||
1226 | while (bios) { | ||
1227 | bio = bios; | ||
1228 | bios = bios->bi_next; | ||
1229 | bio_put(bio); | ||
1230 | } | ||
1231 | return -ENOMEM; | ||
1232 | } | ||
1233 | |||
1234 | /** | ||
1123 | * w_e_reissue() - Worker callback; Resubmit a bio, without BIO_RW_BARRIER set | 1235 | * w_e_reissue() - Worker callback; Resubmit a bio, without BIO_RW_BARRIER set |
1124 | * @mdev: DRBD device. | 1236 | * @mdev: DRBD device. |
1125 | * @w: work object. | 1237 | * @w: work object. |
@@ -1128,8 +1240,6 @@ void drbd_bump_write_ordering(struct drbd_conf *mdev, enum write_ordering_e wo) | |||
1128 | int w_e_reissue(struct drbd_conf *mdev, struct drbd_work *w, int cancel) __releases(local) | 1240 | int w_e_reissue(struct drbd_conf *mdev, struct drbd_work *w, int cancel) __releases(local) |
1129 | { | 1241 | { |
1130 | struct drbd_epoch_entry *e = (struct drbd_epoch_entry *)w; | 1242 | struct drbd_epoch_entry *e = (struct drbd_epoch_entry *)w; |
1131 | struct bio *bio = e->private_bio; | ||
1132 | |||
1133 | /* We leave DE_CONTAINS_A_BARRIER and EE_IS_BARRIER in place, | 1243 | /* We leave DE_CONTAINS_A_BARRIER and EE_IS_BARRIER in place, |
1134 | (and DE_BARRIER_IN_NEXT_EPOCH_ISSUED in the previous Epoch) | 1244 | (and DE_BARRIER_IN_NEXT_EPOCH_ISSUED in the previous Epoch) |
1135 | so that we can finish that epoch in drbd_may_finish_epoch(). | 1245 | so that we can finish that epoch in drbd_may_finish_epoch(). |
@@ -1143,33 +1253,17 @@ int w_e_reissue(struct drbd_conf *mdev, struct drbd_work *w, int cancel) __relea | |||
1143 | if (previous_epoch(mdev, e->epoch)) | 1253 | if (previous_epoch(mdev, e->epoch)) |
1144 | dev_warn(DEV, "Write ordering was not enforced (one time event)\n"); | 1254 | dev_warn(DEV, "Write ordering was not enforced (one time event)\n"); |
1145 | 1255 | ||
1146 | /* prepare bio for re-submit, | ||
1147 | * re-init volatile members */ | ||
1148 | /* we still have a local reference, | 1256 | /* we still have a local reference, |
1149 | * get_ldev was done in receive_Data. */ | 1257 | * get_ldev was done in receive_Data. */ |
1150 | bio->bi_bdev = mdev->ldev->backing_bdev; | ||
1151 | bio->bi_sector = e->sector; | ||
1152 | bio->bi_size = e->size; | ||
1153 | bio->bi_idx = 0; | ||
1154 | |||
1155 | bio->bi_flags &= ~(BIO_POOL_MASK - 1); | ||
1156 | bio->bi_flags |= 1 << BIO_UPTODATE; | ||
1157 | |||
1158 | /* don't know whether this is necessary: */ | ||
1159 | bio->bi_phys_segments = 0; | ||
1160 | bio->bi_next = NULL; | ||
1161 | |||
1162 | /* these should be unchanged: */ | ||
1163 | /* bio->bi_end_io = drbd_endio_write_sec; */ | ||
1164 | /* bio->bi_vcnt = whatever; */ | ||
1165 | 1258 | ||
1166 | e->w.cb = e_end_block; | 1259 | e->w.cb = e_end_block; |
1167 | 1260 | if (drbd_submit_ee(mdev, e, WRITE, DRBD_FAULT_DT_WR) != 0) { | |
1168 | /* This is no longer a barrier request. */ | 1261 | /* drbd_submit_ee fails for one reason only: |
1169 | bio->bi_rw &= ~(1UL << BIO_RW_BARRIER); | 1262 | * if was not able to allocate sufficient bios. |
1170 | 1263 | * requeue, try again later. */ | |
1171 | drbd_generic_make_request(mdev, DRBD_FAULT_DT_WR, bio); | 1264 | e->w.cb = w_e_reissue; |
1172 | 1265 | drbd_queue_work(&mdev->data.work, &e->w); | |
1266 | } | ||
1173 | return 1; | 1267 | return 1; |
1174 | } | 1268 | } |
1175 | 1269 | ||
@@ -1261,13 +1355,13 @@ static int receive_Barrier(struct drbd_conf *mdev, struct p_header *h) | |||
1261 | static struct drbd_epoch_entry * | 1355 | static struct drbd_epoch_entry * |
1262 | read_in_block(struct drbd_conf *mdev, u64 id, sector_t sector, int data_size) __must_hold(local) | 1356 | read_in_block(struct drbd_conf *mdev, u64 id, sector_t sector, int data_size) __must_hold(local) |
1263 | { | 1357 | { |
1358 | const sector_t capacity = drbd_get_capacity(mdev->this_bdev); | ||
1264 | struct drbd_epoch_entry *e; | 1359 | struct drbd_epoch_entry *e; |
1265 | struct bio_vec *bvec; | ||
1266 | struct page *page; | 1360 | struct page *page; |
1267 | struct bio *bio; | 1361 | int dgs, ds, rr; |
1268 | int dgs, ds, i, rr; | ||
1269 | void *dig_in = mdev->int_dig_in; | 1362 | void *dig_in = mdev->int_dig_in; |
1270 | void *dig_vv = mdev->int_dig_vv; | 1363 | void *dig_vv = mdev->int_dig_vv; |
1364 | unsigned long *data; | ||
1271 | 1365 | ||
1272 | dgs = (mdev->agreed_pro_version >= 87 && mdev->integrity_r_tfm) ? | 1366 | dgs = (mdev->agreed_pro_version >= 87 && mdev->integrity_r_tfm) ? |
1273 | crypto_hash_digestsize(mdev->integrity_r_tfm) : 0; | 1367 | crypto_hash_digestsize(mdev->integrity_r_tfm) : 0; |
@@ -1286,29 +1380,44 @@ read_in_block(struct drbd_conf *mdev, u64 id, sector_t sector, int data_size) __ | |||
1286 | ERR_IF(data_size & 0x1ff) return NULL; | 1380 | ERR_IF(data_size & 0x1ff) return NULL; |
1287 | ERR_IF(data_size > DRBD_MAX_SEGMENT_SIZE) return NULL; | 1381 | ERR_IF(data_size > DRBD_MAX_SEGMENT_SIZE) return NULL; |
1288 | 1382 | ||
1383 | /* even though we trust out peer, | ||
1384 | * we sometimes have to double check. */ | ||
1385 | if (sector + (data_size>>9) > capacity) { | ||
1386 | dev_err(DEV, "capacity: %llus < sector: %llus + size: %u\n", | ||
1387 | (unsigned long long)capacity, | ||
1388 | (unsigned long long)sector, data_size); | ||
1389 | return NULL; | ||
1390 | } | ||
1391 | |||
1289 | /* GFP_NOIO, because we must not cause arbitrary write-out: in a DRBD | 1392 | /* GFP_NOIO, because we must not cause arbitrary write-out: in a DRBD |
1290 | * "criss-cross" setup, that might cause write-out on some other DRBD, | 1393 | * "criss-cross" setup, that might cause write-out on some other DRBD, |
1291 | * which in turn might block on the other node at this very place. */ | 1394 | * which in turn might block on the other node at this very place. */ |
1292 | e = drbd_alloc_ee(mdev, id, sector, data_size, GFP_NOIO); | 1395 | e = drbd_alloc_ee(mdev, id, sector, data_size, GFP_NOIO); |
1293 | if (!e) | 1396 | if (!e) |
1294 | return NULL; | 1397 | return NULL; |
1295 | bio = e->private_bio; | 1398 | |
1296 | ds = data_size; | 1399 | ds = data_size; |
1297 | bio_for_each_segment(bvec, bio, i) { | 1400 | page = e->pages; |
1298 | page = bvec->bv_page; | 1401 | page_chain_for_each(page) { |
1299 | rr = drbd_recv(mdev, kmap(page), min_t(int, ds, PAGE_SIZE)); | 1402 | unsigned len = min_t(int, ds, PAGE_SIZE); |
1403 | data = kmap(page); | ||
1404 | rr = drbd_recv(mdev, data, len); | ||
1405 | if (FAULT_ACTIVE(mdev, DRBD_FAULT_RECEIVE)) { | ||
1406 | dev_err(DEV, "Fault injection: Corrupting data on receive\n"); | ||
1407 | data[0] = data[0] ^ (unsigned long)-1; | ||
1408 | } | ||
1300 | kunmap(page); | 1409 | kunmap(page); |
1301 | if (rr != min_t(int, ds, PAGE_SIZE)) { | 1410 | if (rr != len) { |
1302 | drbd_free_ee(mdev, e); | 1411 | drbd_free_ee(mdev, e); |
1303 | dev_warn(DEV, "short read receiving data: read %d expected %d\n", | 1412 | dev_warn(DEV, "short read receiving data: read %d expected %d\n", |
1304 | rr, min_t(int, ds, PAGE_SIZE)); | 1413 | rr, len); |
1305 | return NULL; | 1414 | return NULL; |
1306 | } | 1415 | } |
1307 | ds -= rr; | 1416 | ds -= rr; |
1308 | } | 1417 | } |
1309 | 1418 | ||
1310 | if (dgs) { | 1419 | if (dgs) { |
1311 | drbd_csum(mdev, mdev->integrity_r_tfm, bio, dig_vv); | 1420 | drbd_csum_ee(mdev, mdev->integrity_r_tfm, e, dig_vv); |
1312 | if (memcmp(dig_in, dig_vv, dgs)) { | 1421 | if (memcmp(dig_in, dig_vv, dgs)) { |
1313 | dev_err(DEV, "Digest integrity check FAILED.\n"); | 1422 | dev_err(DEV, "Digest integrity check FAILED.\n"); |
1314 | drbd_bcast_ee(mdev, "digest failed", | 1423 | drbd_bcast_ee(mdev, "digest failed", |
@@ -1330,7 +1439,10 @@ static int drbd_drain_block(struct drbd_conf *mdev, int data_size) | |||
1330 | int rr, rv = 1; | 1439 | int rr, rv = 1; |
1331 | void *data; | 1440 | void *data; |
1332 | 1441 | ||
1333 | page = drbd_pp_alloc(mdev, 1); | 1442 | if (!data_size) |
1443 | return TRUE; | ||
1444 | |||
1445 | page = drbd_pp_alloc(mdev, 1, 1); | ||
1334 | 1446 | ||
1335 | data = kmap(page); | 1447 | data = kmap(page); |
1336 | while (data_size) { | 1448 | while (data_size) { |
@@ -1394,7 +1506,7 @@ static int recv_dless_read(struct drbd_conf *mdev, struct drbd_request *req, | |||
1394 | } | 1506 | } |
1395 | 1507 | ||
1396 | if (dgs) { | 1508 | if (dgs) { |
1397 | drbd_csum(mdev, mdev->integrity_r_tfm, bio, dig_vv); | 1509 | drbd_csum_bio(mdev, mdev->integrity_r_tfm, bio, dig_vv); |
1398 | if (memcmp(dig_in, dig_vv, dgs)) { | 1510 | if (memcmp(dig_in, dig_vv, dgs)) { |
1399 | dev_err(DEV, "Digest integrity check FAILED. Broken NICs?\n"); | 1511 | dev_err(DEV, "Digest integrity check FAILED. Broken NICs?\n"); |
1400 | return 0; | 1512 | return 0; |
@@ -1415,7 +1527,7 @@ static int e_end_resync_block(struct drbd_conf *mdev, struct drbd_work *w, int u | |||
1415 | 1527 | ||
1416 | D_ASSERT(hlist_unhashed(&e->colision)); | 1528 | D_ASSERT(hlist_unhashed(&e->colision)); |
1417 | 1529 | ||
1418 | if (likely(drbd_bio_uptodate(e->private_bio))) { | 1530 | if (likely((e->flags & EE_WAS_ERROR) == 0)) { |
1419 | drbd_set_in_sync(mdev, sector, e->size); | 1531 | drbd_set_in_sync(mdev, sector, e->size); |
1420 | ok = drbd_send_ack(mdev, P_RS_WRITE_ACK, e); | 1532 | ok = drbd_send_ack(mdev, P_RS_WRITE_ACK, e); |
1421 | } else { | 1533 | } else { |
@@ -1434,30 +1546,28 @@ static int recv_resync_read(struct drbd_conf *mdev, sector_t sector, int data_si | |||
1434 | struct drbd_epoch_entry *e; | 1546 | struct drbd_epoch_entry *e; |
1435 | 1547 | ||
1436 | e = read_in_block(mdev, ID_SYNCER, sector, data_size); | 1548 | e = read_in_block(mdev, ID_SYNCER, sector, data_size); |
1437 | if (!e) { | 1549 | if (!e) |
1438 | put_ldev(mdev); | 1550 | goto fail; |
1439 | return FALSE; | ||
1440 | } | ||
1441 | 1551 | ||
1442 | dec_rs_pending(mdev); | 1552 | dec_rs_pending(mdev); |
1443 | 1553 | ||
1444 | e->private_bio->bi_end_io = drbd_endio_write_sec; | ||
1445 | e->private_bio->bi_rw = WRITE; | ||
1446 | e->w.cb = e_end_resync_block; | ||
1447 | |||
1448 | inc_unacked(mdev); | 1554 | inc_unacked(mdev); |
1449 | /* corresponding dec_unacked() in e_end_resync_block() | 1555 | /* corresponding dec_unacked() in e_end_resync_block() |
1450 | * respective _drbd_clear_done_ee */ | 1556 | * respective _drbd_clear_done_ee */ |
1451 | 1557 | ||
1558 | e->w.cb = e_end_resync_block; | ||
1559 | |||
1452 | spin_lock_irq(&mdev->req_lock); | 1560 | spin_lock_irq(&mdev->req_lock); |
1453 | list_add(&e->w.list, &mdev->sync_ee); | 1561 | list_add(&e->w.list, &mdev->sync_ee); |
1454 | spin_unlock_irq(&mdev->req_lock); | 1562 | spin_unlock_irq(&mdev->req_lock); |
1455 | 1563 | ||
1456 | drbd_generic_make_request(mdev, DRBD_FAULT_RS_WR, e->private_bio); | 1564 | if (drbd_submit_ee(mdev, e, WRITE, DRBD_FAULT_RS_WR) == 0) |
1457 | /* accounting done in endio */ | 1565 | return TRUE; |
1458 | 1566 | ||
1459 | maybe_kick_lo(mdev); | 1567 | drbd_free_ee(mdev, e); |
1460 | return TRUE; | 1568 | fail: |
1569 | put_ldev(mdev); | ||
1570 | return FALSE; | ||
1461 | } | 1571 | } |
1462 | 1572 | ||
1463 | static int receive_DataReply(struct drbd_conf *mdev, struct p_header *h) | 1573 | static int receive_DataReply(struct drbd_conf *mdev, struct p_header *h) |
@@ -1552,7 +1662,7 @@ static int e_end_block(struct drbd_conf *mdev, struct drbd_work *w, int cancel) | |||
1552 | } | 1662 | } |
1553 | 1663 | ||
1554 | if (mdev->net_conf->wire_protocol == DRBD_PROT_C) { | 1664 | if (mdev->net_conf->wire_protocol == DRBD_PROT_C) { |
1555 | if (likely(drbd_bio_uptodate(e->private_bio))) { | 1665 | if (likely((e->flags & EE_WAS_ERROR) == 0)) { |
1556 | pcmd = (mdev->state.conn >= C_SYNC_SOURCE && | 1666 | pcmd = (mdev->state.conn >= C_SYNC_SOURCE && |
1557 | mdev->state.conn <= C_PAUSED_SYNC_T && | 1667 | mdev->state.conn <= C_PAUSED_SYNC_T && |
1558 | e->flags & EE_MAY_SET_IN_SYNC) ? | 1668 | e->flags & EE_MAY_SET_IN_SYNC) ? |
@@ -1698,7 +1808,6 @@ static int receive_Data(struct drbd_conf *mdev, struct p_header *h) | |||
1698 | return FALSE; | 1808 | return FALSE; |
1699 | } | 1809 | } |
1700 | 1810 | ||
1701 | e->private_bio->bi_end_io = drbd_endio_write_sec; | ||
1702 | e->w.cb = e_end_block; | 1811 | e->w.cb = e_end_block; |
1703 | 1812 | ||
1704 | spin_lock(&mdev->epoch_lock); | 1813 | spin_lock(&mdev->epoch_lock); |
@@ -1894,12 +2003,8 @@ static int receive_Data(struct drbd_conf *mdev, struct p_header *h) | |||
1894 | drbd_al_begin_io(mdev, e->sector); | 2003 | drbd_al_begin_io(mdev, e->sector); |
1895 | } | 2004 | } |
1896 | 2005 | ||
1897 | e->private_bio->bi_rw = rw; | 2006 | if (drbd_submit_ee(mdev, e, rw, DRBD_FAULT_DT_WR) == 0) |
1898 | drbd_generic_make_request(mdev, DRBD_FAULT_DT_WR, e->private_bio); | 2007 | return TRUE; |
1899 | /* accounting done in endio */ | ||
1900 | |||
1901 | maybe_kick_lo(mdev); | ||
1902 | return TRUE; | ||
1903 | 2008 | ||
1904 | out_interrupted: | 2009 | out_interrupted: |
1905 | /* yes, the epoch_size now is imbalanced. | 2010 | /* yes, the epoch_size now is imbalanced. |
@@ -1945,7 +2050,7 @@ static int receive_DataRequest(struct drbd_conf *mdev, struct p_header *h) | |||
1945 | "no local data.\n"); | 2050 | "no local data.\n"); |
1946 | drbd_send_ack_rp(mdev, h->command == P_DATA_REQUEST ? P_NEG_DREPLY : | 2051 | drbd_send_ack_rp(mdev, h->command == P_DATA_REQUEST ? P_NEG_DREPLY : |
1947 | P_NEG_RS_DREPLY , p); | 2052 | P_NEG_RS_DREPLY , p); |
1948 | return TRUE; | 2053 | return drbd_drain_block(mdev, h->length - brps); |
1949 | } | 2054 | } |
1950 | 2055 | ||
1951 | /* GFP_NOIO, because we must not cause arbitrary write-out: in a DRBD | 2056 | /* GFP_NOIO, because we must not cause arbitrary write-out: in a DRBD |
@@ -1957,9 +2062,6 @@ static int receive_DataRequest(struct drbd_conf *mdev, struct p_header *h) | |||
1957 | return FALSE; | 2062 | return FALSE; |
1958 | } | 2063 | } |
1959 | 2064 | ||
1960 | e->private_bio->bi_rw = READ; | ||
1961 | e->private_bio->bi_end_io = drbd_endio_read_sec; | ||
1962 | |||
1963 | switch (h->command) { | 2065 | switch (h->command) { |
1964 | case P_DATA_REQUEST: | 2066 | case P_DATA_REQUEST: |
1965 | e->w.cb = w_e_end_data_req; | 2067 | e->w.cb = w_e_end_data_req; |
@@ -2053,10 +2155,8 @@ static int receive_DataRequest(struct drbd_conf *mdev, struct p_header *h) | |||
2053 | 2155 | ||
2054 | inc_unacked(mdev); | 2156 | inc_unacked(mdev); |
2055 | 2157 | ||
2056 | drbd_generic_make_request(mdev, fault_type, e->private_bio); | 2158 | if (drbd_submit_ee(mdev, e, READ, fault_type) == 0) |
2057 | maybe_kick_lo(mdev); | 2159 | return TRUE; |
2058 | |||
2059 | return TRUE; | ||
2060 | 2160 | ||
2061 | out_free_e: | 2161 | out_free_e: |
2062 | kfree(di); | 2162 | kfree(di); |
@@ -2473,6 +2573,9 @@ static enum drbd_conns drbd_sync_handshake(struct drbd_conf *mdev, enum drbd_rol | |||
2473 | hg > 0 ? "source" : "target"); | 2573 | hg > 0 ? "source" : "target"); |
2474 | } | 2574 | } |
2475 | 2575 | ||
2576 | if (abs(hg) == 100) | ||
2577 | drbd_khelper(mdev, "initial-split-brain"); | ||
2578 | |||
2476 | if (hg == 100 || (hg == -100 && mdev->net_conf->always_asbp)) { | 2579 | if (hg == 100 || (hg == -100 && mdev->net_conf->always_asbp)) { |
2477 | int pcount = (mdev->state.role == R_PRIMARY) | 2580 | int pcount = (mdev->state.role == R_PRIMARY) |
2478 | + (peer_role == R_PRIMARY); | 2581 | + (peer_role == R_PRIMARY); |
@@ -2518,7 +2621,7 @@ static enum drbd_conns drbd_sync_handshake(struct drbd_conf *mdev, enum drbd_rol | |||
2518 | * after an attempted attach on a diskless node. | 2621 | * after an attempted attach on a diskless node. |
2519 | * We just refuse to attach -- well, we drop the "connection" | 2622 | * We just refuse to attach -- well, we drop the "connection" |
2520 | * to that disk, in a way... */ | 2623 | * to that disk, in a way... */ |
2521 | dev_alert(DEV, "Split-Brain detected, dropping connection!\n"); | 2624 | dev_alert(DEV, "Split-Brain detected but unresolved, dropping connection!\n"); |
2522 | drbd_khelper(mdev, "split-brain"); | 2625 | drbd_khelper(mdev, "split-brain"); |
2523 | return C_MASK; | 2626 | return C_MASK; |
2524 | } | 2627 | } |
@@ -2849,7 +2952,7 @@ static int receive_sizes(struct drbd_conf *mdev, struct p_header *h) | |||
2849 | unsigned int max_seg_s; | 2952 | unsigned int max_seg_s; |
2850 | sector_t p_size, p_usize, my_usize; | 2953 | sector_t p_size, p_usize, my_usize; |
2851 | int ldsc = 0; /* local disk size changed */ | 2954 | int ldsc = 0; /* local disk size changed */ |
2852 | enum drbd_conns nconn; | 2955 | enum dds_flags ddsf; |
2853 | 2956 | ||
2854 | ERR_IF(h->length != (sizeof(*p)-sizeof(*h))) return FALSE; | 2957 | ERR_IF(h->length != (sizeof(*p)-sizeof(*h))) return FALSE; |
2855 | if (drbd_recv(mdev, h->payload, h->length) != h->length) | 2958 | if (drbd_recv(mdev, h->payload, h->length) != h->length) |
@@ -2905,8 +3008,9 @@ static int receive_sizes(struct drbd_conf *mdev, struct p_header *h) | |||
2905 | } | 3008 | } |
2906 | #undef min_not_zero | 3009 | #undef min_not_zero |
2907 | 3010 | ||
3011 | ddsf = be16_to_cpu(p->dds_flags); | ||
2908 | if (get_ldev(mdev)) { | 3012 | if (get_ldev(mdev)) { |
2909 | dd = drbd_determin_dev_size(mdev, 0); | 3013 | dd = drbd_determin_dev_size(mdev, ddsf); |
2910 | put_ldev(mdev); | 3014 | put_ldev(mdev); |
2911 | if (dd == dev_size_error) | 3015 | if (dd == dev_size_error) |
2912 | return FALSE; | 3016 | return FALSE; |
@@ -2916,33 +3020,21 @@ static int receive_sizes(struct drbd_conf *mdev, struct p_header *h) | |||
2916 | drbd_set_my_capacity(mdev, p_size); | 3020 | drbd_set_my_capacity(mdev, p_size); |
2917 | } | 3021 | } |
2918 | 3022 | ||
2919 | if (mdev->p_uuid && mdev->state.conn <= C_CONNECTED && get_ldev(mdev)) { | ||
2920 | nconn = drbd_sync_handshake(mdev, | ||
2921 | mdev->state.peer, mdev->state.pdsk); | ||
2922 | put_ldev(mdev); | ||
2923 | |||
2924 | if (nconn == C_MASK) { | ||
2925 | drbd_force_state(mdev, NS(conn, C_DISCONNECTING)); | ||
2926 | return FALSE; | ||
2927 | } | ||
2928 | |||
2929 | if (drbd_request_state(mdev, NS(conn, nconn)) < SS_SUCCESS) { | ||
2930 | drbd_force_state(mdev, NS(conn, C_DISCONNECTING)); | ||
2931 | return FALSE; | ||
2932 | } | ||
2933 | } | ||
2934 | |||
2935 | if (get_ldev(mdev)) { | 3023 | if (get_ldev(mdev)) { |
2936 | if (mdev->ldev->known_size != drbd_get_capacity(mdev->ldev->backing_bdev)) { | 3024 | if (mdev->ldev->known_size != drbd_get_capacity(mdev->ldev->backing_bdev)) { |
2937 | mdev->ldev->known_size = drbd_get_capacity(mdev->ldev->backing_bdev); | 3025 | mdev->ldev->known_size = drbd_get_capacity(mdev->ldev->backing_bdev); |
2938 | ldsc = 1; | 3026 | ldsc = 1; |
2939 | } | 3027 | } |
2940 | 3028 | ||
2941 | max_seg_s = be32_to_cpu(p->max_segment_size); | 3029 | if (mdev->agreed_pro_version < 94) |
3030 | max_seg_s = be32_to_cpu(p->max_segment_size); | ||
3031 | else /* drbd 8.3.8 onwards */ | ||
3032 | max_seg_s = DRBD_MAX_SEGMENT_SIZE; | ||
3033 | |||
2942 | if (max_seg_s != queue_max_segment_size(mdev->rq_queue)) | 3034 | if (max_seg_s != queue_max_segment_size(mdev->rq_queue)) |
2943 | drbd_setup_queue_param(mdev, max_seg_s); | 3035 | drbd_setup_queue_param(mdev, max_seg_s); |
2944 | 3036 | ||
2945 | drbd_setup_order_type(mdev, be32_to_cpu(p->queue_order_type)); | 3037 | drbd_setup_order_type(mdev, be16_to_cpu(p->queue_order_type)); |
2946 | put_ldev(mdev); | 3038 | put_ldev(mdev); |
2947 | } | 3039 | } |
2948 | 3040 | ||
@@ -2951,14 +3043,17 @@ static int receive_sizes(struct drbd_conf *mdev, struct p_header *h) | |||
2951 | drbd_get_capacity(mdev->this_bdev) || ldsc) { | 3043 | drbd_get_capacity(mdev->this_bdev) || ldsc) { |
2952 | /* we have different sizes, probably peer | 3044 | /* we have different sizes, probably peer |
2953 | * needs to know my new size... */ | 3045 | * needs to know my new size... */ |
2954 | drbd_send_sizes(mdev, 0); | 3046 | drbd_send_sizes(mdev, 0, ddsf); |
2955 | } | 3047 | } |
2956 | if (test_and_clear_bit(RESIZE_PENDING, &mdev->flags) || | 3048 | if (test_and_clear_bit(RESIZE_PENDING, &mdev->flags) || |
2957 | (dd == grew && mdev->state.conn == C_CONNECTED)) { | 3049 | (dd == grew && mdev->state.conn == C_CONNECTED)) { |
2958 | if (mdev->state.pdsk >= D_INCONSISTENT && | 3050 | if (mdev->state.pdsk >= D_INCONSISTENT && |
2959 | mdev->state.disk >= D_INCONSISTENT) | 3051 | mdev->state.disk >= D_INCONSISTENT) { |
2960 | resync_after_online_grow(mdev); | 3052 | if (ddsf & DDSF_NO_RESYNC) |
2961 | else | 3053 | dev_info(DEV, "Resync of new storage suppressed with --assume-clean\n"); |
3054 | else | ||
3055 | resync_after_online_grow(mdev); | ||
3056 | } else | ||
2962 | set_bit(RESYNC_AFTER_NEG, &mdev->flags); | 3057 | set_bit(RESYNC_AFTER_NEG, &mdev->flags); |
2963 | } | 3058 | } |
2964 | } | 3059 | } |
@@ -3490,6 +3585,92 @@ static int receive_UnplugRemote(struct drbd_conf *mdev, struct p_header *h) | |||
3490 | return TRUE; | 3585 | return TRUE; |
3491 | } | 3586 | } |
3492 | 3587 | ||
3588 | static void timeval_sub_us(struct timeval* tv, unsigned int us) | ||
3589 | { | ||
3590 | tv->tv_sec -= us / 1000000; | ||
3591 | us = us % 1000000; | ||
3592 | if (tv->tv_usec > us) { | ||
3593 | tv->tv_usec += 1000000; | ||
3594 | tv->tv_sec--; | ||
3595 | } | ||
3596 | tv->tv_usec -= us; | ||
3597 | } | ||
3598 | |||
3599 | static void got_delay_probe(struct drbd_conf *mdev, int from, struct p_delay_probe *p) | ||
3600 | { | ||
3601 | struct delay_probe *dp; | ||
3602 | struct list_head *le; | ||
3603 | struct timeval now; | ||
3604 | int seq_num; | ||
3605 | int offset; | ||
3606 | int data_delay; | ||
3607 | |||
3608 | seq_num = be32_to_cpu(p->seq_num); | ||
3609 | offset = be32_to_cpu(p->offset); | ||
3610 | |||
3611 | spin_lock(&mdev->peer_seq_lock); | ||
3612 | if (!list_empty(&mdev->delay_probes)) { | ||
3613 | if (from == USE_DATA_SOCKET) | ||
3614 | le = mdev->delay_probes.next; | ||
3615 | else | ||
3616 | le = mdev->delay_probes.prev; | ||
3617 | |||
3618 | dp = list_entry(le, struct delay_probe, list); | ||
3619 | |||
3620 | if (dp->seq_num == seq_num) { | ||
3621 | list_del(le); | ||
3622 | spin_unlock(&mdev->peer_seq_lock); | ||
3623 | do_gettimeofday(&now); | ||
3624 | timeval_sub_us(&now, offset); | ||
3625 | data_delay = | ||
3626 | now.tv_usec - dp->time.tv_usec + | ||
3627 | (now.tv_sec - dp->time.tv_sec) * 1000000; | ||
3628 | |||
3629 | if (data_delay > 0) | ||
3630 | mdev->data_delay = data_delay; | ||
3631 | |||
3632 | kfree(dp); | ||
3633 | return; | ||
3634 | } | ||
3635 | |||
3636 | if (dp->seq_num > seq_num) { | ||
3637 | spin_unlock(&mdev->peer_seq_lock); | ||
3638 | dev_warn(DEV, "Previous allocation failure of struct delay_probe?\n"); | ||
3639 | return; /* Do not alloca a struct delay_probe.... */ | ||
3640 | } | ||
3641 | } | ||
3642 | spin_unlock(&mdev->peer_seq_lock); | ||
3643 | |||
3644 | dp = kmalloc(sizeof(struct delay_probe), GFP_NOIO); | ||
3645 | if (!dp) { | ||
3646 | dev_warn(DEV, "Failed to allocate a struct delay_probe, do not worry.\n"); | ||
3647 | return; | ||
3648 | } | ||
3649 | |||
3650 | dp->seq_num = seq_num; | ||
3651 | do_gettimeofday(&dp->time); | ||
3652 | timeval_sub_us(&dp->time, offset); | ||
3653 | |||
3654 | spin_lock(&mdev->peer_seq_lock); | ||
3655 | if (from == USE_DATA_SOCKET) | ||
3656 | list_add(&dp->list, &mdev->delay_probes); | ||
3657 | else | ||
3658 | list_add_tail(&dp->list, &mdev->delay_probes); | ||
3659 | spin_unlock(&mdev->peer_seq_lock); | ||
3660 | } | ||
3661 | |||
3662 | static int receive_delay_probe(struct drbd_conf *mdev, struct p_header *h) | ||
3663 | { | ||
3664 | struct p_delay_probe *p = (struct p_delay_probe *)h; | ||
3665 | |||
3666 | ERR_IF(h->length != (sizeof(*p)-sizeof(*h))) return FALSE; | ||
3667 | if (drbd_recv(mdev, h->payload, h->length) != h->length) | ||
3668 | return FALSE; | ||
3669 | |||
3670 | got_delay_probe(mdev, USE_DATA_SOCKET, p); | ||
3671 | return TRUE; | ||
3672 | } | ||
3673 | |||
3493 | typedef int (*drbd_cmd_handler_f)(struct drbd_conf *, struct p_header *); | 3674 | typedef int (*drbd_cmd_handler_f)(struct drbd_conf *, struct p_header *); |
3494 | 3675 | ||
3495 | static drbd_cmd_handler_f drbd_default_handler[] = { | 3676 | static drbd_cmd_handler_f drbd_default_handler[] = { |
@@ -3513,6 +3694,7 @@ static drbd_cmd_handler_f drbd_default_handler[] = { | |||
3513 | [P_OV_REQUEST] = receive_DataRequest, | 3694 | [P_OV_REQUEST] = receive_DataRequest, |
3514 | [P_OV_REPLY] = receive_DataRequest, | 3695 | [P_OV_REPLY] = receive_DataRequest, |
3515 | [P_CSUM_RS_REQUEST] = receive_DataRequest, | 3696 | [P_CSUM_RS_REQUEST] = receive_DataRequest, |
3697 | [P_DELAY_PROBE] = receive_delay_probe, | ||
3516 | /* anything missing from this table is in | 3698 | /* anything missing from this table is in |
3517 | * the asender_tbl, see get_asender_cmd */ | 3699 | * the asender_tbl, see get_asender_cmd */ |
3518 | [P_MAX_CMD] = NULL, | 3700 | [P_MAX_CMD] = NULL, |
@@ -3739,7 +3921,7 @@ static void drbd_disconnect(struct drbd_conf *mdev) | |||
3739 | dev_info(DEV, "net_ee not empty, killed %u entries\n", i); | 3921 | dev_info(DEV, "net_ee not empty, killed %u entries\n", i); |
3740 | i = atomic_read(&mdev->pp_in_use); | 3922 | i = atomic_read(&mdev->pp_in_use); |
3741 | if (i) | 3923 | if (i) |
3742 | dev_info(DEV, "pp_in_use = %u, expected 0\n", i); | 3924 | dev_info(DEV, "pp_in_use = %d, expected 0\n", i); |
3743 | 3925 | ||
3744 | D_ASSERT(list_empty(&mdev->read_ee)); | 3926 | D_ASSERT(list_empty(&mdev->read_ee)); |
3745 | D_ASSERT(list_empty(&mdev->active_ee)); | 3927 | D_ASSERT(list_empty(&mdev->active_ee)); |
@@ -4232,7 +4414,6 @@ static int got_NegRSDReply(struct drbd_conf *mdev, struct p_header *h) | |||
4232 | 4414 | ||
4233 | sector = be64_to_cpu(p->sector); | 4415 | sector = be64_to_cpu(p->sector); |
4234 | size = be32_to_cpu(p->blksize); | 4416 | size = be32_to_cpu(p->blksize); |
4235 | D_ASSERT(p->block_id == ID_SYNCER); | ||
4236 | 4417 | ||
4237 | update_peer_seq(mdev, be32_to_cpu(p->seq_num)); | 4418 | update_peer_seq(mdev, be32_to_cpu(p->seq_num)); |
4238 | 4419 | ||
@@ -4290,6 +4471,14 @@ static int got_OVResult(struct drbd_conf *mdev, struct p_header *h) | |||
4290 | return TRUE; | 4471 | return TRUE; |
4291 | } | 4472 | } |
4292 | 4473 | ||
4474 | static int got_delay_probe_m(struct drbd_conf *mdev, struct p_header *h) | ||
4475 | { | ||
4476 | struct p_delay_probe *p = (struct p_delay_probe *)h; | ||
4477 | |||
4478 | got_delay_probe(mdev, USE_META_SOCKET, p); | ||
4479 | return TRUE; | ||
4480 | } | ||
4481 | |||
4293 | struct asender_cmd { | 4482 | struct asender_cmd { |
4294 | size_t pkt_size; | 4483 | size_t pkt_size; |
4295 | int (*process)(struct drbd_conf *mdev, struct p_header *h); | 4484 | int (*process)(struct drbd_conf *mdev, struct p_header *h); |
@@ -4314,6 +4503,7 @@ static struct asender_cmd *get_asender_cmd(int cmd) | |||
4314 | [P_BARRIER_ACK] = { sizeof(struct p_barrier_ack), got_BarrierAck }, | 4503 | [P_BARRIER_ACK] = { sizeof(struct p_barrier_ack), got_BarrierAck }, |
4315 | [P_STATE_CHG_REPLY] = { sizeof(struct p_req_state_reply), got_RqSReply }, | 4504 | [P_STATE_CHG_REPLY] = { sizeof(struct p_req_state_reply), got_RqSReply }, |
4316 | [P_RS_IS_IN_SYNC] = { sizeof(struct p_block_ack), got_IsInSync }, | 4505 | [P_RS_IS_IN_SYNC] = { sizeof(struct p_block_ack), got_IsInSync }, |
4506 | [P_DELAY_PROBE] = { sizeof(struct p_delay_probe), got_delay_probe_m }, | ||
4317 | [P_MAX_CMD] = { 0, NULL }, | 4507 | [P_MAX_CMD] = { 0, NULL }, |
4318 | }; | 4508 | }; |
4319 | if (cmd > P_MAX_CMD || asender_tbl[cmd].process == NULL) | 4509 | if (cmd > P_MAX_CMD || asender_tbl[cmd].process == NULL) |
diff --git a/drivers/block/drbd/drbd_req.c b/drivers/block/drbd/drbd_req.c index de81ab7b4627..3397f11d0ba9 100644 --- a/drivers/block/drbd/drbd_req.c +++ b/drivers/block/drbd/drbd_req.c | |||
@@ -722,6 +722,7 @@ static int drbd_make_request_common(struct drbd_conf *mdev, struct bio *bio) | |||
722 | struct drbd_request *req; | 722 | struct drbd_request *req; |
723 | int local, remote; | 723 | int local, remote; |
724 | int err = -EIO; | 724 | int err = -EIO; |
725 | int ret = 0; | ||
725 | 726 | ||
726 | /* allocate outside of all locks; */ | 727 | /* allocate outside of all locks; */ |
727 | req = drbd_req_new(mdev, bio); | 728 | req = drbd_req_new(mdev, bio); |
@@ -784,7 +785,7 @@ static int drbd_make_request_common(struct drbd_conf *mdev, struct bio *bio) | |||
784 | (mdev->state.pdsk == D_INCONSISTENT && | 785 | (mdev->state.pdsk == D_INCONSISTENT && |
785 | mdev->state.conn >= C_CONNECTED)); | 786 | mdev->state.conn >= C_CONNECTED)); |
786 | 787 | ||
787 | if (!(local || remote)) { | 788 | if (!(local || remote) && !mdev->state.susp) { |
788 | dev_err(DEV, "IO ERROR: neither local nor remote disk\n"); | 789 | dev_err(DEV, "IO ERROR: neither local nor remote disk\n"); |
789 | goto fail_free_complete; | 790 | goto fail_free_complete; |
790 | } | 791 | } |
@@ -810,6 +811,16 @@ allocate_barrier: | |||
810 | /* GOOD, everything prepared, grab the spin_lock */ | 811 | /* GOOD, everything prepared, grab the spin_lock */ |
811 | spin_lock_irq(&mdev->req_lock); | 812 | spin_lock_irq(&mdev->req_lock); |
812 | 813 | ||
814 | if (mdev->state.susp) { | ||
815 | /* If we got suspended, use the retry mechanism of | ||
816 | generic_make_request() to restart processing of this | ||
817 | bio. In the next call to drbd_make_request_26 | ||
818 | we sleep in inc_ap_bio() */ | ||
819 | ret = 1; | ||
820 | spin_unlock_irq(&mdev->req_lock); | ||
821 | goto fail_free_complete; | ||
822 | } | ||
823 | |||
813 | if (remote) { | 824 | if (remote) { |
814 | remote = (mdev->state.pdsk == D_UP_TO_DATE || | 825 | remote = (mdev->state.pdsk == D_UP_TO_DATE || |
815 | (mdev->state.pdsk == D_INCONSISTENT && | 826 | (mdev->state.pdsk == D_INCONSISTENT && |
@@ -947,12 +958,14 @@ fail_and_free_req: | |||
947 | req->private_bio = NULL; | 958 | req->private_bio = NULL; |
948 | put_ldev(mdev); | 959 | put_ldev(mdev); |
949 | } | 960 | } |
950 | bio_endio(bio, err); | 961 | if (!ret) |
962 | bio_endio(bio, err); | ||
963 | |||
951 | drbd_req_free(req); | 964 | drbd_req_free(req); |
952 | dec_ap_bio(mdev); | 965 | dec_ap_bio(mdev); |
953 | kfree(b); | 966 | kfree(b); |
954 | 967 | ||
955 | return 0; | 968 | return ret; |
956 | } | 969 | } |
957 | 970 | ||
958 | /* helper function for drbd_make_request | 971 | /* helper function for drbd_make_request |
@@ -962,11 +975,6 @@ fail_and_free_req: | |||
962 | */ | 975 | */ |
963 | static int drbd_fail_request_early(struct drbd_conf *mdev, int is_write) | 976 | static int drbd_fail_request_early(struct drbd_conf *mdev, int is_write) |
964 | { | 977 | { |
965 | /* Unconfigured */ | ||
966 | if (mdev->state.conn == C_DISCONNECTING && | ||
967 | mdev->state.disk == D_DISKLESS) | ||
968 | return 1; | ||
969 | |||
970 | if (mdev->state.role != R_PRIMARY && | 978 | if (mdev->state.role != R_PRIMARY && |
971 | (!allow_oos || is_write)) { | 979 | (!allow_oos || is_write)) { |
972 | if (__ratelimit(&drbd_ratelimit_state)) { | 980 | if (__ratelimit(&drbd_ratelimit_state)) { |
@@ -1070,15 +1078,21 @@ int drbd_make_request_26(struct request_queue *q, struct bio *bio) | |||
1070 | 1078 | ||
1071 | /* we need to get a "reference count" (ap_bio_cnt) | 1079 | /* we need to get a "reference count" (ap_bio_cnt) |
1072 | * to avoid races with the disconnect/reconnect/suspend code. | 1080 | * to avoid races with the disconnect/reconnect/suspend code. |
1073 | * In case we need to split the bio here, we need to get two references | 1081 | * In case we need to split the bio here, we need to get three references |
1074 | * atomically, otherwise we might deadlock when trying to submit the | 1082 | * atomically, otherwise we might deadlock when trying to submit the |
1075 | * second one! */ | 1083 | * second one! */ |
1076 | inc_ap_bio(mdev, 2); | 1084 | inc_ap_bio(mdev, 3); |
1077 | 1085 | ||
1078 | D_ASSERT(e_enr == s_enr + 1); | 1086 | D_ASSERT(e_enr == s_enr + 1); |
1079 | 1087 | ||
1080 | drbd_make_request_common(mdev, &bp->bio1); | 1088 | while (drbd_make_request_common(mdev, &bp->bio1)) |
1081 | drbd_make_request_common(mdev, &bp->bio2); | 1089 | inc_ap_bio(mdev, 1); |
1090 | |||
1091 | while (drbd_make_request_common(mdev, &bp->bio2)) | ||
1092 | inc_ap_bio(mdev, 1); | ||
1093 | |||
1094 | dec_ap_bio(mdev); | ||
1095 | |||
1082 | bio_pair_release(bp); | 1096 | bio_pair_release(bp); |
1083 | } | 1097 | } |
1084 | return 0; | 1098 | return 0; |
@@ -1115,7 +1129,7 @@ int drbd_merge_bvec(struct request_queue *q, struct bvec_merge_data *bvm, struct | |||
1115 | } else if (limit && get_ldev(mdev)) { | 1129 | } else if (limit && get_ldev(mdev)) { |
1116 | struct request_queue * const b = | 1130 | struct request_queue * const b = |
1117 | mdev->ldev->backing_bdev->bd_disk->queue; | 1131 | mdev->ldev->backing_bdev->bd_disk->queue; |
1118 | if (b->merge_bvec_fn && mdev->ldev->dc.use_bmbv) { | 1132 | if (b->merge_bvec_fn) { |
1119 | backing_limit = b->merge_bvec_fn(b, bvm, bvec); | 1133 | backing_limit = b->merge_bvec_fn(b, bvm, bvec); |
1120 | limit = min(limit, backing_limit); | 1134 | limit = min(limit, backing_limit); |
1121 | } | 1135 | } |
diff --git a/drivers/block/drbd/drbd_strings.c b/drivers/block/drbd/drbd_strings.c index 76863e3f05be..85179e1fb50a 100644 --- a/drivers/block/drbd/drbd_strings.c +++ b/drivers/block/drbd/drbd_strings.c | |||
@@ -70,7 +70,7 @@ static const char *drbd_disk_s_names[] = { | |||
70 | 70 | ||
71 | static const char *drbd_state_sw_errors[] = { | 71 | static const char *drbd_state_sw_errors[] = { |
72 | [-SS_TWO_PRIMARIES] = "Multiple primaries not allowed by config", | 72 | [-SS_TWO_PRIMARIES] = "Multiple primaries not allowed by config", |
73 | [-SS_NO_UP_TO_DATE_DISK] = "Refusing to be Primary without at least one UpToDate disk", | 73 | [-SS_NO_UP_TO_DATE_DISK] = "Need access to UpToDate data", |
74 | [-SS_NO_LOCAL_DISK] = "Can not resync without local disk", | 74 | [-SS_NO_LOCAL_DISK] = "Can not resync without local disk", |
75 | [-SS_NO_REMOTE_DISK] = "Can not resync without remote disk", | 75 | [-SS_NO_REMOTE_DISK] = "Can not resync without remote disk", |
76 | [-SS_CONNECTED_OUTDATES] = "Refusing to be Outdated while Connected", | 76 | [-SS_CONNECTED_OUTDATES] = "Refusing to be Outdated while Connected", |
diff --git a/drivers/block/drbd/drbd_worker.c b/drivers/block/drbd/drbd_worker.c index d48a1dfd7b24..727ff6339754 100644 --- a/drivers/block/drbd/drbd_worker.c +++ b/drivers/block/drbd/drbd_worker.c | |||
@@ -47,8 +47,7 @@ static int w_make_ov_request(struct drbd_conf *mdev, struct drbd_work *w, int ca | |||
47 | 47 | ||
48 | /* defined here: | 48 | /* defined here: |
49 | drbd_md_io_complete | 49 | drbd_md_io_complete |
50 | drbd_endio_write_sec | 50 | drbd_endio_sec |
51 | drbd_endio_read_sec | ||
52 | drbd_endio_pri | 51 | drbd_endio_pri |
53 | 52 | ||
54 | * more endio handlers: | 53 | * more endio handlers: |
@@ -85,27 +84,10 @@ void drbd_md_io_complete(struct bio *bio, int error) | |||
85 | /* reads on behalf of the partner, | 84 | /* reads on behalf of the partner, |
86 | * "submitted" by the receiver | 85 | * "submitted" by the receiver |
87 | */ | 86 | */ |
88 | void drbd_endio_read_sec(struct bio *bio, int error) __releases(local) | 87 | void drbd_endio_read_sec_final(struct drbd_epoch_entry *e) __releases(local) |
89 | { | 88 | { |
90 | unsigned long flags = 0; | 89 | unsigned long flags = 0; |
91 | struct drbd_epoch_entry *e = NULL; | 90 | struct drbd_conf *mdev = e->mdev; |
92 | struct drbd_conf *mdev; | ||
93 | int uptodate = bio_flagged(bio, BIO_UPTODATE); | ||
94 | |||
95 | e = bio->bi_private; | ||
96 | mdev = e->mdev; | ||
97 | |||
98 | if (error) | ||
99 | dev_warn(DEV, "read: error=%d s=%llus\n", error, | ||
100 | (unsigned long long)e->sector); | ||
101 | if (!error && !uptodate) { | ||
102 | dev_warn(DEV, "read: setting error to -EIO s=%llus\n", | ||
103 | (unsigned long long)e->sector); | ||
104 | /* strange behavior of some lower level drivers... | ||
105 | * fail the request by clearing the uptodate flag, | ||
106 | * but do not return any error?! */ | ||
107 | error = -EIO; | ||
108 | } | ||
109 | 91 | ||
110 | D_ASSERT(e->block_id != ID_VACANT); | 92 | D_ASSERT(e->block_id != ID_VACANT); |
111 | 93 | ||
@@ -114,49 +96,38 @@ void drbd_endio_read_sec(struct bio *bio, int error) __releases(local) | |||
114 | list_del(&e->w.list); | 96 | list_del(&e->w.list); |
115 | if (list_empty(&mdev->read_ee)) | 97 | if (list_empty(&mdev->read_ee)) |
116 | wake_up(&mdev->ee_wait); | 98 | wake_up(&mdev->ee_wait); |
99 | if (test_bit(__EE_WAS_ERROR, &e->flags)) | ||
100 | __drbd_chk_io_error(mdev, FALSE); | ||
117 | spin_unlock_irqrestore(&mdev->req_lock, flags); | 101 | spin_unlock_irqrestore(&mdev->req_lock, flags); |
118 | 102 | ||
119 | drbd_chk_io_error(mdev, error, FALSE); | ||
120 | drbd_queue_work(&mdev->data.work, &e->w); | 103 | drbd_queue_work(&mdev->data.work, &e->w); |
121 | put_ldev(mdev); | 104 | put_ldev(mdev); |
122 | } | 105 | } |
123 | 106 | ||
107 | static int is_failed_barrier(int ee_flags) | ||
108 | { | ||
109 | return (ee_flags & (EE_IS_BARRIER|EE_WAS_ERROR|EE_RESUBMITTED)) | ||
110 | == (EE_IS_BARRIER|EE_WAS_ERROR); | ||
111 | } | ||
112 | |||
124 | /* writes on behalf of the partner, or resync writes, | 113 | /* writes on behalf of the partner, or resync writes, |
125 | * "submitted" by the receiver. | 114 | * "submitted" by the receiver, final stage. */ |
126 | */ | 115 | static void drbd_endio_write_sec_final(struct drbd_epoch_entry *e) __releases(local) |
127 | void drbd_endio_write_sec(struct bio *bio, int error) __releases(local) | ||
128 | { | 116 | { |
129 | unsigned long flags = 0; | 117 | unsigned long flags = 0; |
130 | struct drbd_epoch_entry *e = NULL; | 118 | struct drbd_conf *mdev = e->mdev; |
131 | struct drbd_conf *mdev; | ||
132 | sector_t e_sector; | 119 | sector_t e_sector; |
133 | int do_wake; | 120 | int do_wake; |
134 | int is_syncer_req; | 121 | int is_syncer_req; |
135 | int do_al_complete_io; | 122 | int do_al_complete_io; |
136 | int uptodate = bio_flagged(bio, BIO_UPTODATE); | ||
137 | int is_barrier = bio_rw_flagged(bio, BIO_RW_BARRIER); | ||
138 | |||
139 | e = bio->bi_private; | ||
140 | mdev = e->mdev; | ||
141 | 123 | ||
142 | if (error) | 124 | /* if this is a failed barrier request, disable use of barriers, |
143 | dev_warn(DEV, "write: error=%d s=%llus\n", error, | 125 | * and schedule for resubmission */ |
144 | (unsigned long long)e->sector); | 126 | if (is_failed_barrier(e->flags)) { |
145 | if (!error && !uptodate) { | ||
146 | dev_warn(DEV, "write: setting error to -EIO s=%llus\n", | ||
147 | (unsigned long long)e->sector); | ||
148 | /* strange behavior of some lower level drivers... | ||
149 | * fail the request by clearing the uptodate flag, | ||
150 | * but do not return any error?! */ | ||
151 | error = -EIO; | ||
152 | } | ||
153 | |||
154 | /* error == -ENOTSUPP would be a better test, | ||
155 | * alas it is not reliable */ | ||
156 | if (error && is_barrier && e->flags & EE_IS_BARRIER) { | ||
157 | drbd_bump_write_ordering(mdev, WO_bdev_flush); | 127 | drbd_bump_write_ordering(mdev, WO_bdev_flush); |
158 | spin_lock_irqsave(&mdev->req_lock, flags); | 128 | spin_lock_irqsave(&mdev->req_lock, flags); |
159 | list_del(&e->w.list); | 129 | list_del(&e->w.list); |
130 | e->flags = (e->flags & ~EE_WAS_ERROR) | EE_RESUBMITTED; | ||
160 | e->w.cb = w_e_reissue; | 131 | e->w.cb = w_e_reissue; |
161 | /* put_ldev actually happens below, once we come here again. */ | 132 | /* put_ldev actually happens below, once we come here again. */ |
162 | __release(local); | 133 | __release(local); |
@@ -167,17 +138,16 @@ void drbd_endio_write_sec(struct bio *bio, int error) __releases(local) | |||
167 | 138 | ||
168 | D_ASSERT(e->block_id != ID_VACANT); | 139 | D_ASSERT(e->block_id != ID_VACANT); |
169 | 140 | ||
170 | spin_lock_irqsave(&mdev->req_lock, flags); | ||
171 | mdev->writ_cnt += e->size >> 9; | ||
172 | is_syncer_req = is_syncer_block_id(e->block_id); | ||
173 | |||
174 | /* after we moved e to done_ee, | 141 | /* after we moved e to done_ee, |
175 | * we may no longer access it, | 142 | * we may no longer access it, |
176 | * it may be freed/reused already! | 143 | * it may be freed/reused already! |
177 | * (as soon as we release the req_lock) */ | 144 | * (as soon as we release the req_lock) */ |
178 | e_sector = e->sector; | 145 | e_sector = e->sector; |
179 | do_al_complete_io = e->flags & EE_CALL_AL_COMPLETE_IO; | 146 | do_al_complete_io = e->flags & EE_CALL_AL_COMPLETE_IO; |
147 | is_syncer_req = is_syncer_block_id(e->block_id); | ||
180 | 148 | ||
149 | spin_lock_irqsave(&mdev->req_lock, flags); | ||
150 | mdev->writ_cnt += e->size >> 9; | ||
181 | list_del(&e->w.list); /* has been on active_ee or sync_ee */ | 151 | list_del(&e->w.list); /* has been on active_ee or sync_ee */ |
182 | list_add_tail(&e->w.list, &mdev->done_ee); | 152 | list_add_tail(&e->w.list, &mdev->done_ee); |
183 | 153 | ||
@@ -190,7 +160,7 @@ void drbd_endio_write_sec(struct bio *bio, int error) __releases(local) | |||
190 | ? list_empty(&mdev->sync_ee) | 160 | ? list_empty(&mdev->sync_ee) |
191 | : list_empty(&mdev->active_ee); | 161 | : list_empty(&mdev->active_ee); |
192 | 162 | ||
193 | if (error) | 163 | if (test_bit(__EE_WAS_ERROR, &e->flags)) |
194 | __drbd_chk_io_error(mdev, FALSE); | 164 | __drbd_chk_io_error(mdev, FALSE); |
195 | spin_unlock_irqrestore(&mdev->req_lock, flags); | 165 | spin_unlock_irqrestore(&mdev->req_lock, flags); |
196 | 166 | ||
@@ -205,7 +175,42 @@ void drbd_endio_write_sec(struct bio *bio, int error) __releases(local) | |||
205 | 175 | ||
206 | wake_asender(mdev); | 176 | wake_asender(mdev); |
207 | put_ldev(mdev); | 177 | put_ldev(mdev); |
178 | } | ||
179 | |||
180 | /* writes on behalf of the partner, or resync writes, | ||
181 | * "submitted" by the receiver. | ||
182 | */ | ||
183 | void drbd_endio_sec(struct bio *bio, int error) | ||
184 | { | ||
185 | struct drbd_epoch_entry *e = bio->bi_private; | ||
186 | struct drbd_conf *mdev = e->mdev; | ||
187 | int uptodate = bio_flagged(bio, BIO_UPTODATE); | ||
188 | int is_write = bio_data_dir(bio) == WRITE; | ||
189 | |||
190 | if (error) | ||
191 | dev_warn(DEV, "%s: error=%d s=%llus\n", | ||
192 | is_write ? "write" : "read", error, | ||
193 | (unsigned long long)e->sector); | ||
194 | if (!error && !uptodate) { | ||
195 | dev_warn(DEV, "%s: setting error to -EIO s=%llus\n", | ||
196 | is_write ? "write" : "read", | ||
197 | (unsigned long long)e->sector); | ||
198 | /* strange behavior of some lower level drivers... | ||
199 | * fail the request by clearing the uptodate flag, | ||
200 | * but do not return any error?! */ | ||
201 | error = -EIO; | ||
202 | } | ||
203 | |||
204 | if (error) | ||
205 | set_bit(__EE_WAS_ERROR, &e->flags); | ||
208 | 206 | ||
207 | bio_put(bio); /* no need for the bio anymore */ | ||
208 | if (atomic_dec_and_test(&e->pending_bios)) { | ||
209 | if (is_write) | ||
210 | drbd_endio_write_sec_final(e); | ||
211 | else | ||
212 | drbd_endio_read_sec_final(e); | ||
213 | } | ||
209 | } | 214 | } |
210 | 215 | ||
211 | /* read, readA or write requests on R_PRIMARY coming from drbd_make_request | 216 | /* read, readA or write requests on R_PRIMARY coming from drbd_make_request |
@@ -295,7 +300,34 @@ int w_resync_inactive(struct drbd_conf *mdev, struct drbd_work *w, int cancel) | |||
295 | return 1; /* Simply ignore this! */ | 300 | return 1; /* Simply ignore this! */ |
296 | } | 301 | } |
297 | 302 | ||
298 | void drbd_csum(struct drbd_conf *mdev, struct crypto_hash *tfm, struct bio *bio, void *digest) | 303 | void drbd_csum_ee(struct drbd_conf *mdev, struct crypto_hash *tfm, struct drbd_epoch_entry *e, void *digest) |
304 | { | ||
305 | struct hash_desc desc; | ||
306 | struct scatterlist sg; | ||
307 | struct page *page = e->pages; | ||
308 | struct page *tmp; | ||
309 | unsigned len; | ||
310 | |||
311 | desc.tfm = tfm; | ||
312 | desc.flags = 0; | ||
313 | |||
314 | sg_init_table(&sg, 1); | ||
315 | crypto_hash_init(&desc); | ||
316 | |||
317 | while ((tmp = page_chain_next(page))) { | ||
318 | /* all but the last page will be fully used */ | ||
319 | sg_set_page(&sg, page, PAGE_SIZE, 0); | ||
320 | crypto_hash_update(&desc, &sg, sg.length); | ||
321 | page = tmp; | ||
322 | } | ||
323 | /* and now the last, possibly only partially used page */ | ||
324 | len = e->size & (PAGE_SIZE - 1); | ||
325 | sg_set_page(&sg, page, len ?: PAGE_SIZE, 0); | ||
326 | crypto_hash_update(&desc, &sg, sg.length); | ||
327 | crypto_hash_final(&desc, digest); | ||
328 | } | ||
329 | |||
330 | void drbd_csum_bio(struct drbd_conf *mdev, struct crypto_hash *tfm, struct bio *bio, void *digest) | ||
299 | { | 331 | { |
300 | struct hash_desc desc; | 332 | struct hash_desc desc; |
301 | struct scatterlist sg; | 333 | struct scatterlist sg; |
@@ -329,11 +361,11 @@ static int w_e_send_csum(struct drbd_conf *mdev, struct drbd_work *w, int cancel | |||
329 | return 1; | 361 | return 1; |
330 | } | 362 | } |
331 | 363 | ||
332 | if (likely(drbd_bio_uptodate(e->private_bio))) { | 364 | if (likely((e->flags & EE_WAS_ERROR) == 0)) { |
333 | digest_size = crypto_hash_digestsize(mdev->csums_tfm); | 365 | digest_size = crypto_hash_digestsize(mdev->csums_tfm); |
334 | digest = kmalloc(digest_size, GFP_NOIO); | 366 | digest = kmalloc(digest_size, GFP_NOIO); |
335 | if (digest) { | 367 | if (digest) { |
336 | drbd_csum(mdev, mdev->csums_tfm, e->private_bio, digest); | 368 | drbd_csum_ee(mdev, mdev->csums_tfm, e, digest); |
337 | 369 | ||
338 | inc_rs_pending(mdev); | 370 | inc_rs_pending(mdev); |
339 | ok = drbd_send_drequest_csum(mdev, | 371 | ok = drbd_send_drequest_csum(mdev, |
@@ -369,23 +401,21 @@ static int read_for_csum(struct drbd_conf *mdev, sector_t sector, int size) | |||
369 | /* GFP_TRY, because if there is no memory available right now, this may | 401 | /* GFP_TRY, because if there is no memory available right now, this may |
370 | * be rescheduled for later. It is "only" background resync, after all. */ | 402 | * be rescheduled for later. It is "only" background resync, after all. */ |
371 | e = drbd_alloc_ee(mdev, DRBD_MAGIC+0xbeef, sector, size, GFP_TRY); | 403 | e = drbd_alloc_ee(mdev, DRBD_MAGIC+0xbeef, sector, size, GFP_TRY); |
372 | if (!e) { | 404 | if (!e) |
373 | put_ldev(mdev); | 405 | goto fail; |
374 | return 2; | ||
375 | } | ||
376 | 406 | ||
377 | spin_lock_irq(&mdev->req_lock); | 407 | spin_lock_irq(&mdev->req_lock); |
378 | list_add(&e->w.list, &mdev->read_ee); | 408 | list_add(&e->w.list, &mdev->read_ee); |
379 | spin_unlock_irq(&mdev->req_lock); | 409 | spin_unlock_irq(&mdev->req_lock); |
380 | 410 | ||
381 | e->private_bio->bi_end_io = drbd_endio_read_sec; | ||
382 | e->private_bio->bi_rw = READ; | ||
383 | e->w.cb = w_e_send_csum; | 411 | e->w.cb = w_e_send_csum; |
412 | if (drbd_submit_ee(mdev, e, READ, DRBD_FAULT_RS_RD) == 0) | ||
413 | return 1; | ||
384 | 414 | ||
385 | mdev->read_cnt += size >> 9; | 415 | drbd_free_ee(mdev, e); |
386 | drbd_generic_make_request(mdev, DRBD_FAULT_RS_RD, e->private_bio); | 416 | fail: |
387 | 417 | put_ldev(mdev); | |
388 | return 1; | 418 | return 2; |
389 | } | 419 | } |
390 | 420 | ||
391 | void resync_timer_fn(unsigned long data) | 421 | void resync_timer_fn(unsigned long data) |
@@ -414,13 +444,25 @@ void resync_timer_fn(unsigned long data) | |||
414 | drbd_queue_work(&mdev->data.work, &mdev->resync_work); | 444 | drbd_queue_work(&mdev->data.work, &mdev->resync_work); |
415 | } | 445 | } |
416 | 446 | ||
447 | static int calc_resync_rate(struct drbd_conf *mdev) | ||
448 | { | ||
449 | int d = mdev->data_delay / 1000; /* us -> ms */ | ||
450 | int td = mdev->sync_conf.throttle_th * 100; /* 0.1s -> ms */ | ||
451 | int hd = mdev->sync_conf.hold_off_th * 100; /* 0.1s -> ms */ | ||
452 | int cr = mdev->sync_conf.rate; | ||
453 | |||
454 | return d <= td ? cr : | ||
455 | d >= hd ? 0 : | ||
456 | cr + (cr * (td - d) / (hd - td)); | ||
457 | } | ||
458 | |||
417 | int w_make_resync_request(struct drbd_conf *mdev, | 459 | int w_make_resync_request(struct drbd_conf *mdev, |
418 | struct drbd_work *w, int cancel) | 460 | struct drbd_work *w, int cancel) |
419 | { | 461 | { |
420 | unsigned long bit; | 462 | unsigned long bit; |
421 | sector_t sector; | 463 | sector_t sector; |
422 | const sector_t capacity = drbd_get_capacity(mdev->this_bdev); | 464 | const sector_t capacity = drbd_get_capacity(mdev->this_bdev); |
423 | int max_segment_size = queue_max_segment_size(mdev->rq_queue); | 465 | int max_segment_size; |
424 | int number, i, size, pe, mx; | 466 | int number, i, size, pe, mx; |
425 | int align, queued, sndbuf; | 467 | int align, queued, sndbuf; |
426 | 468 | ||
@@ -446,7 +488,13 @@ int w_make_resync_request(struct drbd_conf *mdev, | |||
446 | return 1; | 488 | return 1; |
447 | } | 489 | } |
448 | 490 | ||
449 | number = SLEEP_TIME * mdev->sync_conf.rate / ((BM_BLOCK_SIZE/1024)*HZ); | 491 | /* starting with drbd 8.3.8, we can handle multi-bio EEs, |
492 | * if it should be necessary */ | ||
493 | max_segment_size = mdev->agreed_pro_version < 94 ? | ||
494 | queue_max_segment_size(mdev->rq_queue) : DRBD_MAX_SEGMENT_SIZE; | ||
495 | |||
496 | mdev->c_sync_rate = calc_resync_rate(mdev); | ||
497 | number = SLEEP_TIME * mdev->c_sync_rate / ((BM_BLOCK_SIZE / 1024) * HZ); | ||
450 | pe = atomic_read(&mdev->rs_pending_cnt); | 498 | pe = atomic_read(&mdev->rs_pending_cnt); |
451 | 499 | ||
452 | mutex_lock(&mdev->data.mutex); | 500 | mutex_lock(&mdev->data.mutex); |
@@ -509,12 +557,6 @@ next_sector: | |||
509 | * | 557 | * |
510 | * Additionally always align bigger requests, in order to | 558 | * Additionally always align bigger requests, in order to |
511 | * be prepared for all stripe sizes of software RAIDs. | 559 | * be prepared for all stripe sizes of software RAIDs. |
512 | * | ||
513 | * we _do_ care about the agreed-upon q->max_segment_size | ||
514 | * here, as splitting up the requests on the other side is more | ||
515 | * difficult. the consequence is, that on lvm and md and other | ||
516 | * "indirect" devices, this is dead code, since | ||
517 | * q->max_segment_size will be PAGE_SIZE. | ||
518 | */ | 560 | */ |
519 | align = 1; | 561 | align = 1; |
520 | for (;;) { | 562 | for (;;) { |
@@ -806,7 +848,7 @@ out: | |||
806 | /* helper */ | 848 | /* helper */ |
807 | static void move_to_net_ee_or_free(struct drbd_conf *mdev, struct drbd_epoch_entry *e) | 849 | static void move_to_net_ee_or_free(struct drbd_conf *mdev, struct drbd_epoch_entry *e) |
808 | { | 850 | { |
809 | if (drbd_bio_has_active_page(e->private_bio)) { | 851 | if (drbd_ee_has_active_page(e)) { |
810 | /* This might happen if sendpage() has not finished */ | 852 | /* This might happen if sendpage() has not finished */ |
811 | spin_lock_irq(&mdev->req_lock); | 853 | spin_lock_irq(&mdev->req_lock); |
812 | list_add_tail(&e->w.list, &mdev->net_ee); | 854 | list_add_tail(&e->w.list, &mdev->net_ee); |
@@ -832,7 +874,7 @@ int w_e_end_data_req(struct drbd_conf *mdev, struct drbd_work *w, int cancel) | |||
832 | return 1; | 874 | return 1; |
833 | } | 875 | } |
834 | 876 | ||
835 | if (likely(drbd_bio_uptodate(e->private_bio))) { | 877 | if (likely((e->flags & EE_WAS_ERROR) == 0)) { |
836 | ok = drbd_send_block(mdev, P_DATA_REPLY, e); | 878 | ok = drbd_send_block(mdev, P_DATA_REPLY, e); |
837 | } else { | 879 | } else { |
838 | if (__ratelimit(&drbd_ratelimit_state)) | 880 | if (__ratelimit(&drbd_ratelimit_state)) |
@@ -873,7 +915,7 @@ int w_e_end_rsdata_req(struct drbd_conf *mdev, struct drbd_work *w, int cancel) | |||
873 | put_ldev(mdev); | 915 | put_ldev(mdev); |
874 | } | 916 | } |
875 | 917 | ||
876 | if (likely(drbd_bio_uptodate(e->private_bio))) { | 918 | if (likely((e->flags & EE_WAS_ERROR) == 0)) { |
877 | if (likely(mdev->state.pdsk >= D_INCONSISTENT)) { | 919 | if (likely(mdev->state.pdsk >= D_INCONSISTENT)) { |
878 | inc_rs_pending(mdev); | 920 | inc_rs_pending(mdev); |
879 | ok = drbd_send_block(mdev, P_RS_DATA_REPLY, e); | 921 | ok = drbd_send_block(mdev, P_RS_DATA_REPLY, e); |
@@ -921,7 +963,7 @@ int w_e_end_csum_rs_req(struct drbd_conf *mdev, struct drbd_work *w, int cancel) | |||
921 | 963 | ||
922 | di = (struct digest_info *)(unsigned long)e->block_id; | 964 | di = (struct digest_info *)(unsigned long)e->block_id; |
923 | 965 | ||
924 | if (likely(drbd_bio_uptodate(e->private_bio))) { | 966 | if (likely((e->flags & EE_WAS_ERROR) == 0)) { |
925 | /* quick hack to try to avoid a race against reconfiguration. | 967 | /* quick hack to try to avoid a race against reconfiguration. |
926 | * a real fix would be much more involved, | 968 | * a real fix would be much more involved, |
927 | * introducing more locking mechanisms */ | 969 | * introducing more locking mechanisms */ |
@@ -931,7 +973,7 @@ int w_e_end_csum_rs_req(struct drbd_conf *mdev, struct drbd_work *w, int cancel) | |||
931 | digest = kmalloc(digest_size, GFP_NOIO); | 973 | digest = kmalloc(digest_size, GFP_NOIO); |
932 | } | 974 | } |
933 | if (digest) { | 975 | if (digest) { |
934 | drbd_csum(mdev, mdev->csums_tfm, e->private_bio, digest); | 976 | drbd_csum_ee(mdev, mdev->csums_tfm, e, digest); |
935 | eq = !memcmp(digest, di->digest, digest_size); | 977 | eq = !memcmp(digest, di->digest, digest_size); |
936 | kfree(digest); | 978 | kfree(digest); |
937 | } | 979 | } |
@@ -973,14 +1015,14 @@ int w_e_end_ov_req(struct drbd_conf *mdev, struct drbd_work *w, int cancel) | |||
973 | if (unlikely(cancel)) | 1015 | if (unlikely(cancel)) |
974 | goto out; | 1016 | goto out; |
975 | 1017 | ||
976 | if (unlikely(!drbd_bio_uptodate(e->private_bio))) | 1018 | if (unlikely((e->flags & EE_WAS_ERROR) != 0)) |
977 | goto out; | 1019 | goto out; |
978 | 1020 | ||
979 | digest_size = crypto_hash_digestsize(mdev->verify_tfm); | 1021 | digest_size = crypto_hash_digestsize(mdev->verify_tfm); |
980 | /* FIXME if this allocation fails, online verify will not terminate! */ | 1022 | /* FIXME if this allocation fails, online verify will not terminate! */ |
981 | digest = kmalloc(digest_size, GFP_NOIO); | 1023 | digest = kmalloc(digest_size, GFP_NOIO); |
982 | if (digest) { | 1024 | if (digest) { |
983 | drbd_csum(mdev, mdev->verify_tfm, e->private_bio, digest); | 1025 | drbd_csum_ee(mdev, mdev->verify_tfm, e, digest); |
984 | inc_rs_pending(mdev); | 1026 | inc_rs_pending(mdev); |
985 | ok = drbd_send_drequest_csum(mdev, e->sector, e->size, | 1027 | ok = drbd_send_drequest_csum(mdev, e->sector, e->size, |
986 | digest, digest_size, P_OV_REPLY); | 1028 | digest, digest_size, P_OV_REPLY); |
@@ -1029,11 +1071,11 @@ int w_e_end_ov_reply(struct drbd_conf *mdev, struct drbd_work *w, int cancel) | |||
1029 | 1071 | ||
1030 | di = (struct digest_info *)(unsigned long)e->block_id; | 1072 | di = (struct digest_info *)(unsigned long)e->block_id; |
1031 | 1073 | ||
1032 | if (likely(drbd_bio_uptodate(e->private_bio))) { | 1074 | if (likely((e->flags & EE_WAS_ERROR) == 0)) { |
1033 | digest_size = crypto_hash_digestsize(mdev->verify_tfm); | 1075 | digest_size = crypto_hash_digestsize(mdev->verify_tfm); |
1034 | digest = kmalloc(digest_size, GFP_NOIO); | 1076 | digest = kmalloc(digest_size, GFP_NOIO); |
1035 | if (digest) { | 1077 | if (digest) { |
1036 | drbd_csum(mdev, mdev->verify_tfm, e->private_bio, digest); | 1078 | drbd_csum_ee(mdev, mdev->verify_tfm, e, digest); |
1037 | 1079 | ||
1038 | D_ASSERT(digest_size == di->digest_size); | 1080 | D_ASSERT(digest_size == di->digest_size); |
1039 | eq = !memcmp(digest, di->digest, digest_size); | 1081 | eq = !memcmp(digest, di->digest, digest_size); |
diff --git a/drivers/block/drbd/drbd_wrappers.h b/drivers/block/drbd/drbd_wrappers.h index f93fa111ce50..defdb5013ea3 100644 --- a/drivers/block/drbd/drbd_wrappers.h +++ b/drivers/block/drbd/drbd_wrappers.h | |||
@@ -18,23 +18,9 @@ static inline void drbd_set_my_capacity(struct drbd_conf *mdev, | |||
18 | 18 | ||
19 | #define drbd_bio_uptodate(bio) bio_flagged(bio, BIO_UPTODATE) | 19 | #define drbd_bio_uptodate(bio) bio_flagged(bio, BIO_UPTODATE) |
20 | 20 | ||
21 | static inline int drbd_bio_has_active_page(struct bio *bio) | ||
22 | { | ||
23 | struct bio_vec *bvec; | ||
24 | int i; | ||
25 | |||
26 | __bio_for_each_segment(bvec, bio, i, 0) { | ||
27 | if (page_count(bvec->bv_page) > 1) | ||
28 | return 1; | ||
29 | } | ||
30 | |||
31 | return 0; | ||
32 | } | ||
33 | |||
34 | /* bi_end_io handlers */ | 21 | /* bi_end_io handlers */ |
35 | extern void drbd_md_io_complete(struct bio *bio, int error); | 22 | extern void drbd_md_io_complete(struct bio *bio, int error); |
36 | extern void drbd_endio_read_sec(struct bio *bio, int error); | 23 | extern void drbd_endio_sec(struct bio *bio, int error); |
37 | extern void drbd_endio_write_sec(struct bio *bio, int error); | ||
38 | extern void drbd_endio_pri(struct bio *bio, int error); | 24 | extern void drbd_endio_pri(struct bio *bio, int error); |
39 | 25 | ||
40 | /* | 26 | /* |
diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c index 2138a7ae050c..83fa09a836ca 100644 --- a/drivers/block/virtio_blk.c +++ b/drivers/block/virtio_blk.c | |||
@@ -50,7 +50,7 @@ static void blk_done(struct virtqueue *vq) | |||
50 | unsigned long flags; | 50 | unsigned long flags; |
51 | 51 | ||
52 | spin_lock_irqsave(&vblk->lock, flags); | 52 | spin_lock_irqsave(&vblk->lock, flags); |
53 | while ((vbr = vblk->vq->vq_ops->get_buf(vblk->vq, &len)) != NULL) { | 53 | while ((vbr = virtqueue_get_buf(vblk->vq, &len)) != NULL) { |
54 | int error; | 54 | int error; |
55 | 55 | ||
56 | switch (vbr->status) { | 56 | switch (vbr->status) { |
@@ -70,6 +70,8 @@ static void blk_done(struct virtqueue *vq) | |||
70 | vbr->req->sense_len = vbr->in_hdr.sense_len; | 70 | vbr->req->sense_len = vbr->in_hdr.sense_len; |
71 | vbr->req->errors = vbr->in_hdr.errors; | 71 | vbr->req->errors = vbr->in_hdr.errors; |
72 | } | 72 | } |
73 | if (blk_special_request(vbr->req)) | ||
74 | vbr->req->errors = (error != 0); | ||
73 | 75 | ||
74 | __blk_end_request_all(vbr->req, error); | 76 | __blk_end_request_all(vbr->req, error); |
75 | list_del(&vbr->list); | 77 | list_del(&vbr->list); |
@@ -103,6 +105,11 @@ static bool do_req(struct request_queue *q, struct virtio_blk *vblk, | |||
103 | vbr->out_hdr.sector = 0; | 105 | vbr->out_hdr.sector = 0; |
104 | vbr->out_hdr.ioprio = req_get_ioprio(vbr->req); | 106 | vbr->out_hdr.ioprio = req_get_ioprio(vbr->req); |
105 | break; | 107 | break; |
108 | case REQ_TYPE_SPECIAL: | ||
109 | vbr->out_hdr.type = VIRTIO_BLK_T_GET_ID; | ||
110 | vbr->out_hdr.sector = 0; | ||
111 | vbr->out_hdr.ioprio = req_get_ioprio(vbr->req); | ||
112 | break; | ||
106 | case REQ_TYPE_LINUX_BLOCK: | 113 | case REQ_TYPE_LINUX_BLOCK: |
107 | if (req->cmd[0] == REQ_LB_OP_FLUSH) { | 114 | if (req->cmd[0] == REQ_LB_OP_FLUSH) { |
108 | vbr->out_hdr.type = VIRTIO_BLK_T_FLUSH; | 115 | vbr->out_hdr.type = VIRTIO_BLK_T_FLUSH; |
@@ -151,7 +158,7 @@ static bool do_req(struct request_queue *q, struct virtio_blk *vblk, | |||
151 | } | 158 | } |
152 | } | 159 | } |
153 | 160 | ||
154 | if (vblk->vq->vq_ops->add_buf(vblk->vq, vblk->sg, out, in, vbr) < 0) { | 161 | if (virtqueue_add_buf(vblk->vq, vblk->sg, out, in, vbr) < 0) { |
155 | mempool_free(vbr, vblk->pool); | 162 | mempool_free(vbr, vblk->pool); |
156 | return false; | 163 | return false; |
157 | } | 164 | } |
@@ -180,7 +187,7 @@ static void do_virtblk_request(struct request_queue *q) | |||
180 | } | 187 | } |
181 | 188 | ||
182 | if (issued) | 189 | if (issued) |
183 | vblk->vq->vq_ops->kick(vblk->vq); | 190 | virtqueue_kick(vblk->vq); |
184 | } | 191 | } |
185 | 192 | ||
186 | static void virtblk_prepare_flush(struct request_queue *q, struct request *req) | 193 | static void virtblk_prepare_flush(struct request_queue *q, struct request *req) |
@@ -189,12 +196,45 @@ static void virtblk_prepare_flush(struct request_queue *q, struct request *req) | |||
189 | req->cmd[0] = REQ_LB_OP_FLUSH; | 196 | req->cmd[0] = REQ_LB_OP_FLUSH; |
190 | } | 197 | } |
191 | 198 | ||
199 | /* return id (s/n) string for *disk to *id_str | ||
200 | */ | ||
201 | static int virtblk_get_id(struct gendisk *disk, char *id_str) | ||
202 | { | ||
203 | struct virtio_blk *vblk = disk->private_data; | ||
204 | struct request *req; | ||
205 | struct bio *bio; | ||
206 | |||
207 | bio = bio_map_kern(vblk->disk->queue, id_str, VIRTIO_BLK_ID_BYTES, | ||
208 | GFP_KERNEL); | ||
209 | if (IS_ERR(bio)) | ||
210 | return PTR_ERR(bio); | ||
211 | |||
212 | req = blk_make_request(vblk->disk->queue, bio, GFP_KERNEL); | ||
213 | if (IS_ERR(req)) { | ||
214 | bio_put(bio); | ||
215 | return PTR_ERR(req); | ||
216 | } | ||
217 | |||
218 | req->cmd_type = REQ_TYPE_SPECIAL; | ||
219 | return blk_execute_rq(vblk->disk->queue, vblk->disk, req, false); | ||
220 | } | ||
221 | |||
192 | static int virtblk_ioctl(struct block_device *bdev, fmode_t mode, | 222 | static int virtblk_ioctl(struct block_device *bdev, fmode_t mode, |
193 | unsigned cmd, unsigned long data) | 223 | unsigned cmd, unsigned long data) |
194 | { | 224 | { |
195 | struct gendisk *disk = bdev->bd_disk; | 225 | struct gendisk *disk = bdev->bd_disk; |
196 | struct virtio_blk *vblk = disk->private_data; | 226 | struct virtio_blk *vblk = disk->private_data; |
197 | 227 | ||
228 | if (cmd == 0x56424944) { /* 'VBID' */ | ||
229 | void __user *usr_data = (void __user *)data; | ||
230 | char id_str[VIRTIO_BLK_ID_BYTES]; | ||
231 | int err; | ||
232 | |||
233 | err = virtblk_get_id(disk, id_str); | ||
234 | if (!err && copy_to_user(usr_data, id_str, VIRTIO_BLK_ID_BYTES)) | ||
235 | err = -EFAULT; | ||
236 | return err; | ||
237 | } | ||
198 | /* | 238 | /* |
199 | * Only allow the generic SCSI ioctls if the host can support it. | 239 | * Only allow the generic SCSI ioctls if the host can support it. |
200 | */ | 240 | */ |